| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.946082081380579, | |
| "eval_steps": 500, | |
| "global_step": 2700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00035040077088169594, | |
| "grad_norm": 6.5142412185668945, | |
| "learning_rate": 0.0, | |
| "loss": 5.324, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007008015417633919, | |
| "grad_norm": 6.758334159851074, | |
| "learning_rate": 6.993006993006994e-07, | |
| "loss": 5.3405, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0014016030835267838, | |
| "grad_norm": 6.22674036026001, | |
| "learning_rate": 2.0979020979020983e-06, | |
| "loss": 5.3286, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0021024046252901755, | |
| "grad_norm": 5.438386917114258, | |
| "learning_rate": 3.496503496503497e-06, | |
| "loss": 5.25, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0028032061670535675, | |
| "grad_norm": 3.365504741668701, | |
| "learning_rate": 4.895104895104895e-06, | |
| "loss": 5.2821, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0035040077088169595, | |
| "grad_norm": 7.186147212982178, | |
| "learning_rate": 6.2937062937062944e-06, | |
| "loss": 5.21, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004204809250580351, | |
| "grad_norm": 4.960826396942139, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 5.0759, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004905610792343743, | |
| "grad_norm": 4.001464366912842, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 5.1092, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005606412334107135, | |
| "grad_norm": 3.2986342906951904, | |
| "learning_rate": 1.048951048951049e-05, | |
| "loss": 4.93, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006307213875870527, | |
| "grad_norm": 2.5407276153564453, | |
| "learning_rate": 1.188811188811189e-05, | |
| "loss": 4.8535, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.007008015417633919, | |
| "grad_norm": 2.211754083633423, | |
| "learning_rate": 1.3286713286713287e-05, | |
| "loss": 4.74, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007708816959397311, | |
| "grad_norm": 1.6710195541381836, | |
| "learning_rate": 1.4685314685314686e-05, | |
| "loss": 4.609, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008409618501160702, | |
| "grad_norm": 1.280752182006836, | |
| "learning_rate": 1.6083916083916083e-05, | |
| "loss": 4.4879, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009110420042924094, | |
| "grad_norm": 1.312186598777771, | |
| "learning_rate": 1.7482517482517483e-05, | |
| "loss": 4.3995, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.009811221584687486, | |
| "grad_norm": 1.3315190076828003, | |
| "learning_rate": 1.888111888111888e-05, | |
| "loss": 4.3005, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.010512023126450878, | |
| "grad_norm": 1.3252590894699097, | |
| "learning_rate": 2.027972027972028e-05, | |
| "loss": 4.1952, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01121282466821427, | |
| "grad_norm": 1.3794758319854736, | |
| "learning_rate": 2.1678321678321677e-05, | |
| "loss": 4.1459, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.011913626209977662, | |
| "grad_norm": 1.1808068752288818, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 4.034, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.012614427751741054, | |
| "grad_norm": 1.31660795211792, | |
| "learning_rate": 2.4475524475524478e-05, | |
| "loss": 3.926, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.013315229293504446, | |
| "grad_norm": 1.0347495079040527, | |
| "learning_rate": 2.5874125874125877e-05, | |
| "loss": 3.8812, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014016030835267838, | |
| "grad_norm": 1.050775408744812, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 3.7787, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01471683237703123, | |
| "grad_norm": 0.9461761713027954, | |
| "learning_rate": 2.8671328671328672e-05, | |
| "loss": 3.6738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.015417633918794622, | |
| "grad_norm": 1.0460454225540161, | |
| "learning_rate": 3.0069930069930068e-05, | |
| "loss": 3.6385, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.016118435460558012, | |
| "grad_norm": 1.0687191486358643, | |
| "learning_rate": 3.146853146853147e-05, | |
| "loss": 3.5701, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.016819237002321404, | |
| "grad_norm": 1.4722611904144287, | |
| "learning_rate": 3.2867132867132866e-05, | |
| "loss": 3.5438, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.017520038544084796, | |
| "grad_norm": 1.1305724382400513, | |
| "learning_rate": 3.4265734265734265e-05, | |
| "loss": 3.4694, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018220840085848188, | |
| "grad_norm": 0.9322625994682312, | |
| "learning_rate": 3.566433566433567e-05, | |
| "loss": 3.4488, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01892164162761158, | |
| "grad_norm": 1.2441555261611938, | |
| "learning_rate": 3.7062937062937064e-05, | |
| "loss": 3.4289, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.019622443169374972, | |
| "grad_norm": 0.9397731423377991, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 3.4021, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.020323244711138364, | |
| "grad_norm": 1.3261164426803589, | |
| "learning_rate": 3.986013986013986e-05, | |
| "loss": 3.3575, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.021024046252901756, | |
| "grad_norm": 1.08541738986969, | |
| "learning_rate": 4.125874125874126e-05, | |
| "loss": 3.3403, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021724847794665148, | |
| "grad_norm": 0.8626166582107544, | |
| "learning_rate": 4.265734265734266e-05, | |
| "loss": 3.3306, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02242564933642854, | |
| "grad_norm": 1.0596344470977783, | |
| "learning_rate": 4.405594405594406e-05, | |
| "loss": 3.2779, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.023126450878191932, | |
| "grad_norm": 1.511917233467102, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 3.2759, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.023827252419955324, | |
| "grad_norm": 1.2062046527862549, | |
| "learning_rate": 4.685314685314686e-05, | |
| "loss": 3.2545, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.024528053961718716, | |
| "grad_norm": 1.1399930715560913, | |
| "learning_rate": 4.825174825174825e-05, | |
| "loss": 3.2235, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.025228855503482108, | |
| "grad_norm": 0.8960133790969849, | |
| "learning_rate": 4.9650349650349656e-05, | |
| "loss": 3.2025, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0259296570452455, | |
| "grad_norm": 1.3042056560516357, | |
| "learning_rate": 5.1048951048951055e-05, | |
| "loss": 3.1475, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.026630458587008892, | |
| "grad_norm": 1.186320424079895, | |
| "learning_rate": 5.244755244755245e-05, | |
| "loss": 3.1759, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.027331260128772284, | |
| "grad_norm": 1.2691158056259155, | |
| "learning_rate": 5.384615384615385e-05, | |
| "loss": 3.1296, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.028032061670535676, | |
| "grad_norm": 0.7816159129142761, | |
| "learning_rate": 5.524475524475524e-05, | |
| "loss": 3.1017, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.028732863212299068, | |
| "grad_norm": 1.1489295959472656, | |
| "learning_rate": 5.664335664335665e-05, | |
| "loss": 3.1151, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02943366475406246, | |
| "grad_norm": 1.5686062574386597, | |
| "learning_rate": 5.8041958041958044e-05, | |
| "loss": 3.114, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.030134466295825852, | |
| "grad_norm": 1.4421433210372925, | |
| "learning_rate": 5.944055944055944e-05, | |
| "loss": 3.0946, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.030835267837589244, | |
| "grad_norm": 1.335250973701477, | |
| "learning_rate": 6.083916083916085e-05, | |
| "loss": 3.084, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03153606937935263, | |
| "grad_norm": 0.970507800579071, | |
| "learning_rate": 6.223776223776224e-05, | |
| "loss": 3.1163, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.032236870921116025, | |
| "grad_norm": 1.2849407196044922, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 3.063, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.032937672462879417, | |
| "grad_norm": 1.0378247499465942, | |
| "learning_rate": 6.503496503496504e-05, | |
| "loss": 3.0223, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03363847400464281, | |
| "grad_norm": 1.3139392137527466, | |
| "learning_rate": 6.643356643356644e-05, | |
| "loss": 3.0572, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0343392755464062, | |
| "grad_norm": 1.254752278327942, | |
| "learning_rate": 6.783216783216784e-05, | |
| "loss": 3.0408, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.03504007708816959, | |
| "grad_norm": 1.3333168029785156, | |
| "learning_rate": 6.923076923076924e-05, | |
| "loss": 3.0185, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.035740878629932984, | |
| "grad_norm": 1.2795464992523193, | |
| "learning_rate": 7.062937062937062e-05, | |
| "loss": 3.0328, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.036441680171696376, | |
| "grad_norm": 1.2025645971298218, | |
| "learning_rate": 7.202797202797204e-05, | |
| "loss": 3.0303, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.03714248171345977, | |
| "grad_norm": 1.1741266250610352, | |
| "learning_rate": 7.342657342657343e-05, | |
| "loss": 3.0252, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03784328325522316, | |
| "grad_norm": 1.2022653818130493, | |
| "learning_rate": 7.482517482517482e-05, | |
| "loss": 3.0183, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03854408479698655, | |
| "grad_norm": 1.1950666904449463, | |
| "learning_rate": 7.622377622377622e-05, | |
| "loss": 2.9804, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.039244886338749944, | |
| "grad_norm": 1.5780822038650513, | |
| "learning_rate": 7.762237762237763e-05, | |
| "loss": 2.9804, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.039945687880513336, | |
| "grad_norm": 1.0478655099868774, | |
| "learning_rate": 7.902097902097903e-05, | |
| "loss": 2.9894, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04064648942227673, | |
| "grad_norm": 1.1782268285751343, | |
| "learning_rate": 8.041958041958042e-05, | |
| "loss": 2.9717, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04134729096404012, | |
| "grad_norm": 1.0321820974349976, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 2.9776, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04204809250580351, | |
| "grad_norm": 0.9697206020355225, | |
| "learning_rate": 8.321678321678323e-05, | |
| "loss": 2.9804, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.042748894047566904, | |
| "grad_norm": 1.1984606981277466, | |
| "learning_rate": 8.461538461538461e-05, | |
| "loss": 2.9495, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.043449695589330296, | |
| "grad_norm": 0.9830178618431091, | |
| "learning_rate": 8.601398601398601e-05, | |
| "loss": 2.9656, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04415049713109369, | |
| "grad_norm": 1.3105114698410034, | |
| "learning_rate": 8.741258741258743e-05, | |
| "loss": 2.9306, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04485129867285708, | |
| "grad_norm": 1.3499157428741455, | |
| "learning_rate": 8.881118881118881e-05, | |
| "loss": 2.9381, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04555210021462047, | |
| "grad_norm": 0.9977575540542603, | |
| "learning_rate": 9.020979020979021e-05, | |
| "loss": 2.907, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.046252901756383864, | |
| "grad_norm": 1.2331498861312866, | |
| "learning_rate": 9.160839160839161e-05, | |
| "loss": 2.9224, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.046953703298147256, | |
| "grad_norm": 1.451253890991211, | |
| "learning_rate": 9.300699300699301e-05, | |
| "loss": 2.9202, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04765450483991065, | |
| "grad_norm": 1.2146471738815308, | |
| "learning_rate": 9.440559440559441e-05, | |
| "loss": 2.9098, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04835530638167404, | |
| "grad_norm": 1.0873245000839233, | |
| "learning_rate": 9.580419580419581e-05, | |
| "loss": 2.9218, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04905610792343743, | |
| "grad_norm": 1.276413083076477, | |
| "learning_rate": 9.72027972027972e-05, | |
| "loss": 2.8947, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.049756909465200824, | |
| "grad_norm": 1.126065731048584, | |
| "learning_rate": 9.86013986013986e-05, | |
| "loss": 2.8788, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.050457711006964216, | |
| "grad_norm": 1.5177017450332642, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9043, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05115851254872761, | |
| "grad_norm": 1.3744112253189087, | |
| "learning_rate": 9.99998657109765e-05, | |
| "loss": 2.888, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.051859314090491, | |
| "grad_norm": 1.7921055555343628, | |
| "learning_rate": 9.999946284462733e-05, | |
| "loss": 2.8631, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.05256011563225439, | |
| "grad_norm": 1.1755317449569702, | |
| "learning_rate": 9.999879140311652e-05, | |
| "loss": 2.8735, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.053260917174017784, | |
| "grad_norm": 0.846362292766571, | |
| "learning_rate": 9.999785139005073e-05, | |
| "loss": 2.8768, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.053961718715781176, | |
| "grad_norm": 0.9867280721664429, | |
| "learning_rate": 9.999664281047933e-05, | |
| "loss": 2.8859, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.05466252025754457, | |
| "grad_norm": 0.9751666188240051, | |
| "learning_rate": 9.999516567089429e-05, | |
| "loss": 2.8497, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05536332179930796, | |
| "grad_norm": 1.0603703260421753, | |
| "learning_rate": 9.999341997923011e-05, | |
| "loss": 2.8404, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05606412334107135, | |
| "grad_norm": 1.0447975397109985, | |
| "learning_rate": 9.999140574486392e-05, | |
| "loss": 2.9092, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.056764924882834744, | |
| "grad_norm": 1.3046443462371826, | |
| "learning_rate": 9.998912297861527e-05, | |
| "loss": 2.8971, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.057465726424598136, | |
| "grad_norm": 1.1029243469238281, | |
| "learning_rate": 9.998657169274622e-05, | |
| "loss": 2.8834, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.05816652796636153, | |
| "grad_norm": 0.8594210743904114, | |
| "learning_rate": 9.99837519009611e-05, | |
| "loss": 2.8361, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05886732950812492, | |
| "grad_norm": 0.8585363030433655, | |
| "learning_rate": 9.998066361840665e-05, | |
| "loss": 2.8782, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05956813104988831, | |
| "grad_norm": 0.693467378616333, | |
| "learning_rate": 9.997730686167173e-05, | |
| "loss": 2.8537, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.060268932591651704, | |
| "grad_norm": 0.8418940305709839, | |
| "learning_rate": 9.997368164878738e-05, | |
| "loss": 2.8294, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.060969734133415096, | |
| "grad_norm": 0.9938271045684814, | |
| "learning_rate": 9.996978799922665e-05, | |
| "loss": 2.8458, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.06167053567517849, | |
| "grad_norm": 1.0347217321395874, | |
| "learning_rate": 9.99656259339045e-05, | |
| "loss": 2.8081, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06237133721694188, | |
| "grad_norm": 0.9216743111610413, | |
| "learning_rate": 9.996119547517775e-05, | |
| "loss": 2.8655, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.06307213875870527, | |
| "grad_norm": 1.0579859018325806, | |
| "learning_rate": 9.995649664684486e-05, | |
| "loss": 2.823, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06377294030046866, | |
| "grad_norm": 0.9864194393157959, | |
| "learning_rate": 9.995152947414586e-05, | |
| "loss": 2.8081, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.06447374184223205, | |
| "grad_norm": 0.8999143838882446, | |
| "learning_rate": 9.994629398376226e-05, | |
| "loss": 2.7947, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.06517454338399545, | |
| "grad_norm": 0.9121315479278564, | |
| "learning_rate": 9.994079020381676e-05, | |
| "loss": 2.8253, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.06587534492575883, | |
| "grad_norm": 0.8578842282295227, | |
| "learning_rate": 9.993501816387329e-05, | |
| "loss": 2.7548, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06657614646752223, | |
| "grad_norm": 0.8564820289611816, | |
| "learning_rate": 9.992897789493672e-05, | |
| "loss": 2.8361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06727694800928562, | |
| "grad_norm": 0.8013344407081604, | |
| "learning_rate": 9.992266942945269e-05, | |
| "loss": 2.8606, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.06797774955104902, | |
| "grad_norm": 0.7343975901603699, | |
| "learning_rate": 9.991609280130752e-05, | |
| "loss": 2.7947, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.0686785510928124, | |
| "grad_norm": 0.7338536381721497, | |
| "learning_rate": 9.990924804582797e-05, | |
| "loss": 2.7492, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0693793526345758, | |
| "grad_norm": 0.828781008720398, | |
| "learning_rate": 9.990213519978109e-05, | |
| "loss": 2.8013, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07008015417633918, | |
| "grad_norm": 0.7156624794006348, | |
| "learning_rate": 9.989475430137391e-05, | |
| "loss": 2.7943, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07078095571810258, | |
| "grad_norm": 0.6014353632926941, | |
| "learning_rate": 9.988710539025341e-05, | |
| "loss": 2.8099, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07148175725986597, | |
| "grad_norm": 0.6569661498069763, | |
| "learning_rate": 9.987918850750619e-05, | |
| "loss": 2.8125, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.07218255880162937, | |
| "grad_norm": 0.6558775305747986, | |
| "learning_rate": 9.987100369565825e-05, | |
| "loss": 2.7487, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07288336034339275, | |
| "grad_norm": 0.6454245448112488, | |
| "learning_rate": 9.986255099867481e-05, | |
| "loss": 2.7648, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.07358416188515615, | |
| "grad_norm": 0.5741921067237854, | |
| "learning_rate": 9.985383046196004e-05, | |
| "loss": 2.7743, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07428496342691954, | |
| "grad_norm": 0.5875937938690186, | |
| "learning_rate": 9.984484213235685e-05, | |
| "loss": 2.7728, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.07498576496868294, | |
| "grad_norm": 0.6638422012329102, | |
| "learning_rate": 9.98355860581466e-05, | |
| "loss": 2.7504, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.07568656651044632, | |
| "grad_norm": 1.1614341735839844, | |
| "learning_rate": 9.982606228904884e-05, | |
| "loss": 2.7923, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.07638736805220972, | |
| "grad_norm": 1.005254864692688, | |
| "learning_rate": 9.981627087622108e-05, | |
| "loss": 2.76, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.0770881695939731, | |
| "grad_norm": 0.7738555669784546, | |
| "learning_rate": 9.980621187225852e-05, | |
| "loss": 2.7866, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0777889711357365, | |
| "grad_norm": 0.9469527006149292, | |
| "learning_rate": 9.979588533119367e-05, | |
| "loss": 2.8012, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.07848977267749989, | |
| "grad_norm": 0.9031473398208618, | |
| "learning_rate": 9.978529130849619e-05, | |
| "loss": 2.7522, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.07919057421926329, | |
| "grad_norm": 0.9450514912605286, | |
| "learning_rate": 9.977442986107252e-05, | |
| "loss": 2.7791, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.07989137576102667, | |
| "grad_norm": 0.7259206771850586, | |
| "learning_rate": 9.97633010472656e-05, | |
| "loss": 2.7237, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08059217730279007, | |
| "grad_norm": 0.6595309972763062, | |
| "learning_rate": 9.975190492685451e-05, | |
| "loss": 2.7284, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08129297884455346, | |
| "grad_norm": 0.7696382999420166, | |
| "learning_rate": 9.974024156105422e-05, | |
| "loss": 2.7631, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.08199378038631686, | |
| "grad_norm": 0.7305110096931458, | |
| "learning_rate": 9.972831101251521e-05, | |
| "loss": 2.7793, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.08269458192808024, | |
| "grad_norm": 0.6039514541625977, | |
| "learning_rate": 9.971611334532314e-05, | |
| "loss": 2.7669, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08339538346984364, | |
| "grad_norm": 0.5824711918830872, | |
| "learning_rate": 9.970364862499852e-05, | |
| "loss": 2.7476, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.08409618501160702, | |
| "grad_norm": 0.6831758618354797, | |
| "learning_rate": 9.969091691849637e-05, | |
| "loss": 2.7098, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08479698655337042, | |
| "grad_norm": 0.6469074487686157, | |
| "learning_rate": 9.967791829420581e-05, | |
| "loss": 2.7609, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.08549778809513381, | |
| "grad_norm": 0.5876832604408264, | |
| "learning_rate": 9.966465282194976e-05, | |
| "loss": 2.7306, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.08619858963689721, | |
| "grad_norm": 0.6310129761695862, | |
| "learning_rate": 9.965112057298451e-05, | |
| "loss": 2.7283, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.08689939117866059, | |
| "grad_norm": 0.6113069653511047, | |
| "learning_rate": 9.963732161999935e-05, | |
| "loss": 2.7274, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.08760019272042399, | |
| "grad_norm": 1.0655111074447632, | |
| "learning_rate": 9.96232560371162e-05, | |
| "loss": 2.7022, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08830099426218738, | |
| "grad_norm": 0.8412613272666931, | |
| "learning_rate": 9.960892389988918e-05, | |
| "loss": 2.7213, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.08900179580395078, | |
| "grad_norm": 0.7329776883125305, | |
| "learning_rate": 9.959432528530428e-05, | |
| "loss": 2.7343, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.08970259734571416, | |
| "grad_norm": 0.702498197555542, | |
| "learning_rate": 9.95794602717788e-05, | |
| "loss": 2.7642, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.09040339888747755, | |
| "grad_norm": 0.6936408281326294, | |
| "learning_rate": 9.95643289391611e-05, | |
| "loss": 2.7081, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.09110420042924094, | |
| "grad_norm": 0.664743959903717, | |
| "learning_rate": 9.954893136873005e-05, | |
| "loss": 2.7054, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09180500197100433, | |
| "grad_norm": 0.5716791152954102, | |
| "learning_rate": 9.953326764319463e-05, | |
| "loss": 2.6751, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.09250580351276773, | |
| "grad_norm": 0.6207195520401001, | |
| "learning_rate": 9.95173378466935e-05, | |
| "loss": 2.6945, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.09320660505453111, | |
| "grad_norm": 0.6572092771530151, | |
| "learning_rate": 9.950114206479453e-05, | |
| "loss": 2.6989, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.09390740659629451, | |
| "grad_norm": 0.7676830887794495, | |
| "learning_rate": 9.948468038449435e-05, | |
| "loss": 2.7613, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.0946082081380579, | |
| "grad_norm": 0.5810503959655762, | |
| "learning_rate": 9.946795289421787e-05, | |
| "loss": 2.7234, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0953090096798213, | |
| "grad_norm": 0.6459682583808899, | |
| "learning_rate": 9.945095968381784e-05, | |
| "loss": 2.717, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.09600981122158468, | |
| "grad_norm": 0.6498464345932007, | |
| "learning_rate": 9.94337008445743e-05, | |
| "loss": 2.7389, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.09671061276334808, | |
| "grad_norm": 0.6287350654602051, | |
| "learning_rate": 9.941617646919421e-05, | |
| "loss": 2.681, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.09741141430511147, | |
| "grad_norm": 0.7516258955001831, | |
| "learning_rate": 9.939838665181076e-05, | |
| "loss": 2.6696, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.09811221584687486, | |
| "grad_norm": 0.6962350606918335, | |
| "learning_rate": 9.938033148798307e-05, | |
| "loss": 2.6971, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09881301738863825, | |
| "grad_norm": 0.6605144739151001, | |
| "learning_rate": 9.936201107469555e-05, | |
| "loss": 2.6999, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.09951381893040165, | |
| "grad_norm": 0.5991240739822388, | |
| "learning_rate": 9.93434255103574e-05, | |
| "loss": 2.6936, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.10021462047216503, | |
| "grad_norm": 0.5660961866378784, | |
| "learning_rate": 9.932457489480213e-05, | |
| "loss": 2.686, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.10091542201392843, | |
| "grad_norm": 0.690290093421936, | |
| "learning_rate": 9.930545932928698e-05, | |
| "loss": 2.6809, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.10161622355569182, | |
| "grad_norm": 0.7119167447090149, | |
| "learning_rate": 9.928607891649234e-05, | |
| "loss": 2.7221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10231702509745522, | |
| "grad_norm": 0.7049365639686584, | |
| "learning_rate": 9.926643376052131e-05, | |
| "loss": 2.6569, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1030178266392186, | |
| "grad_norm": 0.6691743731498718, | |
| "learning_rate": 9.924652396689902e-05, | |
| "loss": 2.6751, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.103718628180982, | |
| "grad_norm": 0.5533433556556702, | |
| "learning_rate": 9.922634964257215e-05, | |
| "loss": 2.7064, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.10441942972274539, | |
| "grad_norm": 0.6669672727584839, | |
| "learning_rate": 9.920591089590831e-05, | |
| "loss": 2.687, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.10512023126450878, | |
| "grad_norm": 0.8539720773696899, | |
| "learning_rate": 9.918520783669549e-05, | |
| "loss": 2.6968, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10582103280627217, | |
| "grad_norm": 0.827905535697937, | |
| "learning_rate": 9.916424057614142e-05, | |
| "loss": 2.7339, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.10652183434803557, | |
| "grad_norm": 0.7071542143821716, | |
| "learning_rate": 9.9143009226873e-05, | |
| "loss": 2.67, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.10722263588979895, | |
| "grad_norm": 0.6667853593826294, | |
| "learning_rate": 9.912151390293575e-05, | |
| "loss": 2.7113, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.10792343743156235, | |
| "grad_norm": 0.49210044741630554, | |
| "learning_rate": 9.90997547197931e-05, | |
| "loss": 2.7034, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.10862423897332574, | |
| "grad_norm": 0.5823047757148743, | |
| "learning_rate": 9.907773179432581e-05, | |
| "loss": 2.6815, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.10932504051508914, | |
| "grad_norm": 0.5159279704093933, | |
| "learning_rate": 9.905544524483138e-05, | |
| "loss": 2.7055, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.11002584205685252, | |
| "grad_norm": 0.5294278264045715, | |
| "learning_rate": 9.903289519102338e-05, | |
| "loss": 2.6821, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.11072664359861592, | |
| "grad_norm": 0.5865507125854492, | |
| "learning_rate": 9.901008175403078e-05, | |
| "loss": 2.698, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1114274451403793, | |
| "grad_norm": 0.7102755904197693, | |
| "learning_rate": 9.898700505639735e-05, | |
| "loss": 2.693, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1121282466821427, | |
| "grad_norm": 0.8151699900627136, | |
| "learning_rate": 9.8963665222081e-05, | |
| "loss": 2.6482, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11282904822390609, | |
| "grad_norm": 0.5769193172454834, | |
| "learning_rate": 9.894006237645304e-05, | |
| "loss": 2.6893, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.11352984976566949, | |
| "grad_norm": 0.6606284976005554, | |
| "learning_rate": 9.891619664629762e-05, | |
| "loss": 2.6859, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.11423065130743287, | |
| "grad_norm": 0.5883016586303711, | |
| "learning_rate": 9.889206815981094e-05, | |
| "loss": 2.6622, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.11493145284919627, | |
| "grad_norm": 0.5413339734077454, | |
| "learning_rate": 9.886767704660067e-05, | |
| "loss": 2.6718, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.11563225439095966, | |
| "grad_norm": 0.7391770482063293, | |
| "learning_rate": 9.884302343768512e-05, | |
| "loss": 2.6695, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11633305593272306, | |
| "grad_norm": 0.7529366612434387, | |
| "learning_rate": 9.881810746549267e-05, | |
| "loss": 2.7341, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.11703385747448644, | |
| "grad_norm": 0.6971571445465088, | |
| "learning_rate": 9.8792929263861e-05, | |
| "loss": 2.6444, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.11773465901624984, | |
| "grad_norm": 0.544129490852356, | |
| "learning_rate": 9.876748896803633e-05, | |
| "loss": 2.7351, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.11843546055801323, | |
| "grad_norm": 0.6561135649681091, | |
| "learning_rate": 9.874178671467277e-05, | |
| "loss": 2.6896, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.11913626209977662, | |
| "grad_norm": 0.6607089042663574, | |
| "learning_rate": 9.871582264183155e-05, | |
| "loss": 2.6664, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11983706364154001, | |
| "grad_norm": 0.6727411150932312, | |
| "learning_rate": 9.868959688898023e-05, | |
| "loss": 2.68, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.12053786518330341, | |
| "grad_norm": 0.5672718286514282, | |
| "learning_rate": 9.86631095969921e-05, | |
| "loss": 2.6639, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1212386667250668, | |
| "grad_norm": 0.7188961505889893, | |
| "learning_rate": 9.86363609081452e-05, | |
| "loss": 2.6604, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.12193946826683019, | |
| "grad_norm": 0.9785953760147095, | |
| "learning_rate": 9.86093509661218e-05, | |
| "loss": 2.6557, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.12264026980859358, | |
| "grad_norm": 0.7856999635696411, | |
| "learning_rate": 9.85820799160074e-05, | |
| "loss": 2.6418, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12334107135035698, | |
| "grad_norm": 0.5956946015357971, | |
| "learning_rate": 9.855454790429015e-05, | |
| "loss": 2.658, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.12404187289212036, | |
| "grad_norm": 0.6523074507713318, | |
| "learning_rate": 9.852675507885991e-05, | |
| "loss": 2.6743, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.12474267443388376, | |
| "grad_norm": 0.71266108751297, | |
| "learning_rate": 9.849870158900753e-05, | |
| "loss": 2.6805, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.12544347597564715, | |
| "grad_norm": 0.5674154162406921, | |
| "learning_rate": 9.847038758542404e-05, | |
| "loss": 2.6678, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.12614427751741053, | |
| "grad_norm": 0.5430511236190796, | |
| "learning_rate": 9.844181322019983e-05, | |
| "loss": 2.643, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12684507905917394, | |
| "grad_norm": 0.508791983127594, | |
| "learning_rate": 9.841297864682388e-05, | |
| "loss": 2.6524, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.12754588060093733, | |
| "grad_norm": 0.6082713603973389, | |
| "learning_rate": 9.838388402018282e-05, | |
| "loss": 2.6892, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1282466821427007, | |
| "grad_norm": 0.6065689325332642, | |
| "learning_rate": 9.835452949656022e-05, | |
| "loss": 2.6083, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1289474836844641, | |
| "grad_norm": 0.5220572352409363, | |
| "learning_rate": 9.83249152336357e-05, | |
| "loss": 2.6573, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1296482852262275, | |
| "grad_norm": 0.568534791469574, | |
| "learning_rate": 9.829504139048406e-05, | |
| "loss": 2.6266, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1303490867679909, | |
| "grad_norm": 0.6165401339530945, | |
| "learning_rate": 9.826490812757452e-05, | |
| "loss": 2.6928, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.13104988830975428, | |
| "grad_norm": 0.5951835513114929, | |
| "learning_rate": 9.823451560676966e-05, | |
| "loss": 2.6468, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.13175068985151767, | |
| "grad_norm": 0.4942519962787628, | |
| "learning_rate": 9.820386399132482e-05, | |
| "loss": 2.6493, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.13245149139328108, | |
| "grad_norm": 0.6185161471366882, | |
| "learning_rate": 9.8172953445887e-05, | |
| "loss": 2.6741, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.13315229293504446, | |
| "grad_norm": 0.5588895678520203, | |
| "learning_rate": 9.814178413649407e-05, | |
| "loss": 2.6393, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.13385309447680785, | |
| "grad_norm": 0.6289598941802979, | |
| "learning_rate": 9.811035623057387e-05, | |
| "loss": 2.6022, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.13455389601857123, | |
| "grad_norm": 0.6258370280265808, | |
| "learning_rate": 9.807866989694334e-05, | |
| "loss": 2.6033, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.13525469756033462, | |
| "grad_norm": 0.6390899419784546, | |
| "learning_rate": 9.804672530580754e-05, | |
| "loss": 2.6413, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.13595549910209803, | |
| "grad_norm": 0.6844115257263184, | |
| "learning_rate": 9.801452262875877e-05, | |
| "loss": 2.6339, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.13665630064386142, | |
| "grad_norm": 0.70540452003479, | |
| "learning_rate": 9.798206203877569e-05, | |
| "loss": 2.6471, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1373571021856248, | |
| "grad_norm": 0.7336652278900146, | |
| "learning_rate": 9.794934371022233e-05, | |
| "loss": 2.6348, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1380579037273882, | |
| "grad_norm": 0.7155029773712158, | |
| "learning_rate": 9.79163678188472e-05, | |
| "loss": 2.6128, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1387587052691516, | |
| "grad_norm": 0.6354189515113831, | |
| "learning_rate": 9.788313454178228e-05, | |
| "loss": 2.6281, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.13945950681091498, | |
| "grad_norm": 0.596047043800354, | |
| "learning_rate": 9.78496440575422e-05, | |
| "loss": 2.6719, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.14016030835267837, | |
| "grad_norm": 0.6149719953536987, | |
| "learning_rate": 9.781589654602306e-05, | |
| "loss": 2.625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14086110989444176, | |
| "grad_norm": 0.6066911816596985, | |
| "learning_rate": 9.778189218850174e-05, | |
| "loss": 2.6193, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.14156191143620517, | |
| "grad_norm": 0.5690994262695312, | |
| "learning_rate": 9.774763116763466e-05, | |
| "loss": 2.6239, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.14226271297796855, | |
| "grad_norm": 0.532486081123352, | |
| "learning_rate": 9.771311366745703e-05, | |
| "loss": 2.6264, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.14296351451973194, | |
| "grad_norm": 0.5434598326683044, | |
| "learning_rate": 9.767833987338171e-05, | |
| "loss": 2.6534, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.14366431606149532, | |
| "grad_norm": 0.522413432598114, | |
| "learning_rate": 9.764330997219822e-05, | |
| "loss": 2.6468, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14436511760325874, | |
| "grad_norm": 0.5612457990646362, | |
| "learning_rate": 9.760802415207181e-05, | |
| "loss": 2.6307, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.14506591914502212, | |
| "grad_norm": 0.5850318670272827, | |
| "learning_rate": 9.757248260254244e-05, | |
| "loss": 2.6324, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1457667206867855, | |
| "grad_norm": 0.688555121421814, | |
| "learning_rate": 9.753668551452368e-05, | |
| "loss": 2.6066, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1464675222285489, | |
| "grad_norm": 0.6506465077400208, | |
| "learning_rate": 9.750063308030179e-05, | |
| "loss": 2.5964, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1471683237703123, | |
| "grad_norm": 0.6529019474983215, | |
| "learning_rate": 9.746432549353462e-05, | |
| "loss": 2.651, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1478691253120757, | |
| "grad_norm": 0.5469995141029358, | |
| "learning_rate": 9.742776294925058e-05, | |
| "loss": 2.6129, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.14856992685383907, | |
| "grad_norm": 0.4992043673992157, | |
| "learning_rate": 9.739094564384758e-05, | |
| "loss": 2.6074, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.14927072839560246, | |
| "grad_norm": 0.5064156651496887, | |
| "learning_rate": 9.735387377509206e-05, | |
| "loss": 2.6408, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.14997152993736587, | |
| "grad_norm": 0.5961376428604126, | |
| "learning_rate": 9.731654754211781e-05, | |
| "loss": 2.615, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.15067233147912926, | |
| "grad_norm": 0.5533669590950012, | |
| "learning_rate": 9.727896714542494e-05, | |
| "loss": 2.6225, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15137313302089264, | |
| "grad_norm": 0.5527905821800232, | |
| "learning_rate": 9.724113278687888e-05, | |
| "loss": 2.5836, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.15207393456265603, | |
| "grad_norm": 0.4616098701953888, | |
| "learning_rate": 9.720304466970916e-05, | |
| "loss": 2.6236, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.15277473610441944, | |
| "grad_norm": 0.5189539790153503, | |
| "learning_rate": 9.716470299850844e-05, | |
| "loss": 2.6364, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.15347553764618282, | |
| "grad_norm": 0.5303817987442017, | |
| "learning_rate": 9.712610797923133e-05, | |
| "loss": 2.6097, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1541763391879462, | |
| "grad_norm": 0.5957894921302795, | |
| "learning_rate": 9.708725981919333e-05, | |
| "loss": 2.5749, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1548771407297096, | |
| "grad_norm": 0.5686895251274109, | |
| "learning_rate": 9.704815872706972e-05, | |
| "loss": 2.6319, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.155577942271473, | |
| "grad_norm": 0.5570897459983826, | |
| "learning_rate": 9.700880491289438e-05, | |
| "loss": 2.6287, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1562787438132364, | |
| "grad_norm": 0.5330969095230103, | |
| "learning_rate": 9.696919858805873e-05, | |
| "loss": 2.6014, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.15697954535499978, | |
| "grad_norm": 0.4891030192375183, | |
| "learning_rate": 9.692933996531053e-05, | |
| "loss": 2.6097, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.15768034689676316, | |
| "grad_norm": 0.5465073585510254, | |
| "learning_rate": 9.688922925875285e-05, | |
| "loss": 2.6162, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15838114843852658, | |
| "grad_norm": 0.5483290553092957, | |
| "learning_rate": 9.684886668384277e-05, | |
| "loss": 2.5999, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.15908194998028996, | |
| "grad_norm": 0.6061928868293762, | |
| "learning_rate": 9.68082524573903e-05, | |
| "loss": 2.6614, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.15978275152205335, | |
| "grad_norm": 0.5806353688240051, | |
| "learning_rate": 9.676738679755726e-05, | |
| "loss": 2.6039, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.16048355306381673, | |
| "grad_norm": 0.5722226500511169, | |
| "learning_rate": 9.672626992385602e-05, | |
| "loss": 2.6529, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.16118435460558014, | |
| "grad_norm": 0.5939204096794128, | |
| "learning_rate": 9.668490205714839e-05, | |
| "loss": 2.6314, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.16188515614734353, | |
| "grad_norm": 0.7260386943817139, | |
| "learning_rate": 9.664328341964436e-05, | |
| "loss": 2.6211, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.1625859576891069, | |
| "grad_norm": 0.8503554463386536, | |
| "learning_rate": 9.6601414234901e-05, | |
| "loss": 2.6134, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.1632867592308703, | |
| "grad_norm": 0.5818518996238708, | |
| "learning_rate": 9.655929472782116e-05, | |
| "loss": 2.5667, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1639875607726337, | |
| "grad_norm": 0.5678598284721375, | |
| "learning_rate": 9.651692512465239e-05, | |
| "loss": 2.6153, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.1646883623143971, | |
| "grad_norm": 0.5939005613327026, | |
| "learning_rate": 9.647430565298555e-05, | |
| "loss": 2.6098, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.16538916385616048, | |
| "grad_norm": 0.5300047993659973, | |
| "learning_rate": 9.643143654175373e-05, | |
| "loss": 2.6167, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.16608996539792387, | |
| "grad_norm": 0.4946250319480896, | |
| "learning_rate": 9.638831802123101e-05, | |
| "loss": 2.581, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.16679076693968728, | |
| "grad_norm": 0.4555206000804901, | |
| "learning_rate": 9.634495032303111e-05, | |
| "loss": 2.588, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.16749156848145066, | |
| "grad_norm": 0.5159677267074585, | |
| "learning_rate": 9.630133368010628e-05, | |
| "loss": 2.5868, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.16819237002321405, | |
| "grad_norm": 0.5565433502197266, | |
| "learning_rate": 9.625746832674597e-05, | |
| "loss": 2.6185, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16889317156497743, | |
| "grad_norm": 0.4775915741920471, | |
| "learning_rate": 9.621335449857562e-05, | |
| "loss": 2.5897, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.16959397310674085, | |
| "grad_norm": 0.5150102376937866, | |
| "learning_rate": 9.616899243255532e-05, | |
| "loss": 2.5478, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.17029477464850423, | |
| "grad_norm": 0.48455357551574707, | |
| "learning_rate": 9.612438236697863e-05, | |
| "loss": 2.5639, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.17099557619026762, | |
| "grad_norm": 0.5149878859519958, | |
| "learning_rate": 9.607952454147121e-05, | |
| "loss": 2.599, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.171696377732031, | |
| "grad_norm": 0.6969982385635376, | |
| "learning_rate": 9.603441919698963e-05, | |
| "loss": 2.5733, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17239717927379442, | |
| "grad_norm": 0.57285475730896, | |
| "learning_rate": 9.598906657582e-05, | |
| "loss": 2.5791, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.1730979808155578, | |
| "grad_norm": 0.5704159140586853, | |
| "learning_rate": 9.594346692157667e-05, | |
| "loss": 2.5692, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.17379878235732119, | |
| "grad_norm": 0.681797444820404, | |
| "learning_rate": 9.589762047920096e-05, | |
| "loss": 2.5759, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.17449958389908457, | |
| "grad_norm": 0.49717003107070923, | |
| "learning_rate": 9.585152749495984e-05, | |
| "loss": 2.5848, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.17520038544084798, | |
| "grad_norm": 0.48680582642555237, | |
| "learning_rate": 9.580518821644457e-05, | |
| "loss": 2.5682, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17590118698261137, | |
| "grad_norm": 0.5525830388069153, | |
| "learning_rate": 9.575860289256943e-05, | |
| "loss": 2.5894, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.17660198852437475, | |
| "grad_norm": 0.5562606453895569, | |
| "learning_rate": 9.571177177357032e-05, | |
| "loss": 2.5675, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.17730279006613814, | |
| "grad_norm": 0.5515877604484558, | |
| "learning_rate": 9.566469511100345e-05, | |
| "loss": 2.5877, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.17800359160790155, | |
| "grad_norm": 0.6816357970237732, | |
| "learning_rate": 9.561737315774398e-05, | |
| "loss": 2.596, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.17870439314966494, | |
| "grad_norm": 0.507437527179718, | |
| "learning_rate": 9.556980616798463e-05, | |
| "loss": 2.5721, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17940519469142832, | |
| "grad_norm": 0.5275202989578247, | |
| "learning_rate": 9.552199439723443e-05, | |
| "loss": 2.568, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.1801059962331917, | |
| "grad_norm": 0.5467104911804199, | |
| "learning_rate": 9.547393810231722e-05, | |
| "loss": 2.5842, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.1808067977749551, | |
| "grad_norm": 0.5407027006149292, | |
| "learning_rate": 9.542563754137031e-05, | |
| "loss": 2.5891, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.1815075993167185, | |
| "grad_norm": 0.5731847882270813, | |
| "learning_rate": 9.537709297384308e-05, | |
| "loss": 2.6143, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.1822084008584819, | |
| "grad_norm": 0.566457986831665, | |
| "learning_rate": 9.532830466049565e-05, | |
| "loss": 2.5522, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.18290920240024527, | |
| "grad_norm": 0.4899183213710785, | |
| "learning_rate": 9.527927286339744e-05, | |
| "loss": 2.5961, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.18361000394200866, | |
| "grad_norm": 0.4883110523223877, | |
| "learning_rate": 9.52299978459257e-05, | |
| "loss": 2.5557, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.18431080548377207, | |
| "grad_norm": 0.5534235239028931, | |
| "learning_rate": 9.518047987276421e-05, | |
| "loss": 2.6452, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.18501160702553546, | |
| "grad_norm": 0.47292667627334595, | |
| "learning_rate": 9.513071920990179e-05, | |
| "loss": 2.5848, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.18571240856729884, | |
| "grad_norm": 0.5438964366912842, | |
| "learning_rate": 9.508071612463086e-05, | |
| "loss": 2.5332, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.18641321010906223, | |
| "grad_norm": 0.5318060517311096, | |
| "learning_rate": 9.503047088554601e-05, | |
| "loss": 2.585, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.18711401165082564, | |
| "grad_norm": 0.49279502034187317, | |
| "learning_rate": 9.497998376254267e-05, | |
| "loss": 2.5948, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.18781481319258903, | |
| "grad_norm": 0.5161717534065247, | |
| "learning_rate": 9.492925502681545e-05, | |
| "loss": 2.5644, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.1885156147343524, | |
| "grad_norm": 0.4586479663848877, | |
| "learning_rate": 9.487828495085684e-05, | |
| "loss": 2.5568, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.1892164162761158, | |
| "grad_norm": 0.4390322268009186, | |
| "learning_rate": 9.482707380845573e-05, | |
| "loss": 2.5938, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1899172178178792, | |
| "grad_norm": 0.5253728628158569, | |
| "learning_rate": 9.47756218746959e-05, | |
| "loss": 2.5996, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.1906180193596426, | |
| "grad_norm": 0.4567623436450958, | |
| "learning_rate": 9.472392942595454e-05, | |
| "loss": 2.5576, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.19131882090140598, | |
| "grad_norm": 0.5091727375984192, | |
| "learning_rate": 9.467199673990077e-05, | |
| "loss": 2.5873, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.19201962244316936, | |
| "grad_norm": 0.4959392845630646, | |
| "learning_rate": 9.46198240954942e-05, | |
| "loss": 2.5291, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.19272042398493278, | |
| "grad_norm": 0.5150632262229919, | |
| "learning_rate": 9.456741177298336e-05, | |
| "loss": 2.5503, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.19342122552669616, | |
| "grad_norm": 0.4603368639945984, | |
| "learning_rate": 9.451476005390422e-05, | |
| "loss": 2.5785, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.19412202706845955, | |
| "grad_norm": 0.4441729784011841, | |
| "learning_rate": 9.446186922107873e-05, | |
| "loss": 2.5512, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.19482282861022293, | |
| "grad_norm": 0.5432455539703369, | |
| "learning_rate": 9.44087395586132e-05, | |
| "loss": 2.5741, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.19552363015198634, | |
| "grad_norm": 0.42969366908073425, | |
| "learning_rate": 9.435537135189687e-05, | |
| "loss": 2.5677, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.19622443169374973, | |
| "grad_norm": 0.5706619620323181, | |
| "learning_rate": 9.430176488760027e-05, | |
| "loss": 2.556, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.19692523323551311, | |
| "grad_norm": 0.7202513217926025, | |
| "learning_rate": 9.424792045367383e-05, | |
| "loss": 2.5435, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.1976260347772765, | |
| "grad_norm": 0.5471363663673401, | |
| "learning_rate": 9.419383833934621e-05, | |
| "loss": 2.572, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.1983268363190399, | |
| "grad_norm": 0.654058575630188, | |
| "learning_rate": 9.413951883512275e-05, | |
| "loss": 2.5432, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.1990276378608033, | |
| "grad_norm": 0.6124361157417297, | |
| "learning_rate": 9.408496223278403e-05, | |
| "loss": 2.5803, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.19972843940256668, | |
| "grad_norm": 0.5291132926940918, | |
| "learning_rate": 9.403016882538408e-05, | |
| "loss": 2.576, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20042924094433007, | |
| "grad_norm": 0.6087374687194824, | |
| "learning_rate": 9.397513890724911e-05, | |
| "loss": 2.5171, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.20113004248609348, | |
| "grad_norm": 0.5776922106742859, | |
| "learning_rate": 9.391987277397566e-05, | |
| "loss": 2.6054, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.20183084402785686, | |
| "grad_norm": 0.544319748878479, | |
| "learning_rate": 9.38643707224291e-05, | |
| "loss": 2.548, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 0.5210007429122925, | |
| "learning_rate": 9.38086330507421e-05, | |
| "loss": 2.6019, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.20323244711138364, | |
| "grad_norm": 0.5160629153251648, | |
| "learning_rate": 9.375266005831297e-05, | |
| "loss": 2.6046, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.20393324865314705, | |
| "grad_norm": 0.6452796459197998, | |
| "learning_rate": 9.369645204580403e-05, | |
| "loss": 2.566, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.20463405019491043, | |
| "grad_norm": 0.5813329815864563, | |
| "learning_rate": 9.364000931514008e-05, | |
| "loss": 2.5661, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.20533485173667382, | |
| "grad_norm": 0.5450593829154968, | |
| "learning_rate": 9.358333216950664e-05, | |
| "loss": 2.5769, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2060356532784372, | |
| "grad_norm": 0.5340794324874878, | |
| "learning_rate": 9.352642091334849e-05, | |
| "loss": 2.5549, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.20673645482020062, | |
| "grad_norm": 0.5767348408699036, | |
| "learning_rate": 9.34692758523679e-05, | |
| "loss": 2.5604, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.207437256361964, | |
| "grad_norm": 0.6048093438148499, | |
| "learning_rate": 9.341189729352302e-05, | |
| "loss": 2.5929, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.20813805790372739, | |
| "grad_norm": 0.4430505335330963, | |
| "learning_rate": 9.33542855450263e-05, | |
| "loss": 2.5563, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.20883885944549077, | |
| "grad_norm": 0.49373888969421387, | |
| "learning_rate": 9.329644091634278e-05, | |
| "loss": 2.5517, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.20953966098725418, | |
| "grad_norm": 0.5227393507957458, | |
| "learning_rate": 9.323836371818837e-05, | |
| "loss": 2.5286, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.21024046252901757, | |
| "grad_norm": 0.497405081987381, | |
| "learning_rate": 9.318005426252832e-05, | |
| "loss": 2.5638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21094126407078095, | |
| "grad_norm": 0.48721396923065186, | |
| "learning_rate": 9.312151286257537e-05, | |
| "loss": 2.5751, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.21164206561254434, | |
| "grad_norm": 0.4621741771697998, | |
| "learning_rate": 9.306273983278825e-05, | |
| "loss": 2.5654, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.21234286715430775, | |
| "grad_norm": 0.4756307005882263, | |
| "learning_rate": 9.300373548886987e-05, | |
| "loss": 2.5989, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.21304366869607114, | |
| "grad_norm": 0.42497771978378296, | |
| "learning_rate": 9.294450014776566e-05, | |
| "loss": 2.564, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.21374447023783452, | |
| "grad_norm": 0.5173219442367554, | |
| "learning_rate": 9.288503412766185e-05, | |
| "loss": 2.5296, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2144452717795979, | |
| "grad_norm": 0.4622451066970825, | |
| "learning_rate": 9.28253377479838e-05, | |
| "loss": 2.5829, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.21514607332136132, | |
| "grad_norm": 0.5879294276237488, | |
| "learning_rate": 9.276541132939428e-05, | |
| "loss": 2.5462, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.2158468748631247, | |
| "grad_norm": 0.6237635612487793, | |
| "learning_rate": 9.270525519379165e-05, | |
| "loss": 2.6143, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.2165476764048881, | |
| "grad_norm": 0.5845280289649963, | |
| "learning_rate": 9.264486966430829e-05, | |
| "loss": 2.5272, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.21724847794665147, | |
| "grad_norm": 0.5140432715415955, | |
| "learning_rate": 9.258425506530872e-05, | |
| "loss": 2.5716, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2179492794884149, | |
| "grad_norm": 0.5868300199508667, | |
| "learning_rate": 9.2523411722388e-05, | |
| "loss": 2.5699, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.21865008103017827, | |
| "grad_norm": 0.587374210357666, | |
| "learning_rate": 9.246233996236983e-05, | |
| "loss": 2.5335, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.21935088257194166, | |
| "grad_norm": 0.5000743865966797, | |
| "learning_rate": 9.240104011330489e-05, | |
| "loss": 2.5367, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.22005168411370504, | |
| "grad_norm": 0.5124289393424988, | |
| "learning_rate": 9.233951250446902e-05, | |
| "loss": 2.5598, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.22075248565546846, | |
| "grad_norm": 0.4815032482147217, | |
| "learning_rate": 9.227775746636158e-05, | |
| "loss": 2.5468, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.22145328719723184, | |
| "grad_norm": 0.5089353919029236, | |
| "learning_rate": 9.22157753307035e-05, | |
| "loss": 2.5482, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.22215408873899523, | |
| "grad_norm": 0.468841552734375, | |
| "learning_rate": 9.215356643043559e-05, | |
| "loss": 2.5138, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.2228548902807586, | |
| "grad_norm": 0.511968731880188, | |
| "learning_rate": 9.209113109971676e-05, | |
| "loss": 2.5481, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.223555691822522, | |
| "grad_norm": 0.6082082390785217, | |
| "learning_rate": 9.202846967392217e-05, | |
| "loss": 2.5459, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.2242564933642854, | |
| "grad_norm": 0.4931623637676239, | |
| "learning_rate": 9.196558248964151e-05, | |
| "loss": 2.5785, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2249572949060488, | |
| "grad_norm": 0.5754916071891785, | |
| "learning_rate": 9.190246988467712e-05, | |
| "loss": 2.5166, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.22565809644781218, | |
| "grad_norm": 0.5335285067558289, | |
| "learning_rate": 9.183913219804221e-05, | |
| "loss": 2.4976, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.22635889798957556, | |
| "grad_norm": 0.4676333963871002, | |
| "learning_rate": 9.1775569769959e-05, | |
| "loss": 2.5361, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.22705969953133898, | |
| "grad_norm": 0.48826783895492554, | |
| "learning_rate": 9.171178294185697e-05, | |
| "loss": 2.5347, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.22776050107310236, | |
| "grad_norm": 0.509066104888916, | |
| "learning_rate": 9.164777205637094e-05, | |
| "loss": 2.5326, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22846130261486575, | |
| "grad_norm": 0.5001896619796753, | |
| "learning_rate": 9.158353745733927e-05, | |
| "loss": 2.5605, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.22916210415662913, | |
| "grad_norm": 0.5497420430183411, | |
| "learning_rate": 9.151907948980206e-05, | |
| "loss": 2.5295, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.22986290569839254, | |
| "grad_norm": 0.43462875485420227, | |
| "learning_rate": 9.145439849999919e-05, | |
| "loss": 2.5358, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.23056370724015593, | |
| "grad_norm": 0.5398270487785339, | |
| "learning_rate": 9.138949483536852e-05, | |
| "loss": 2.5464, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.23126450878191931, | |
| "grad_norm": 0.5165109038352966, | |
| "learning_rate": 9.132436884454408e-05, | |
| "loss": 2.5043, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2319653103236827, | |
| "grad_norm": 0.6717212200164795, | |
| "learning_rate": 9.125902087735407e-05, | |
| "loss": 2.547, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2326661118654461, | |
| "grad_norm": 0.4584912061691284, | |
| "learning_rate": 9.119345128481909e-05, | |
| "loss": 2.5106, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2333669134072095, | |
| "grad_norm": 0.5452204942703247, | |
| "learning_rate": 9.112766041915019e-05, | |
| "loss": 2.5189, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.23406771494897288, | |
| "grad_norm": 0.5055968761444092, | |
| "learning_rate": 9.106164863374702e-05, | |
| "loss": 2.4957, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.23476851649073627, | |
| "grad_norm": 0.4905461072921753, | |
| "learning_rate": 9.099541628319592e-05, | |
| "loss": 2.5523, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.23546931803249968, | |
| "grad_norm": 0.44840848445892334, | |
| "learning_rate": 9.092896372326798e-05, | |
| "loss": 2.4713, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.23617011957426307, | |
| "grad_norm": 0.46489134430885315, | |
| "learning_rate": 9.086229131091717e-05, | |
| "loss": 2.5071, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.23687092111602645, | |
| "grad_norm": 0.4460737705230713, | |
| "learning_rate": 9.079539940427845e-05, | |
| "loss": 2.5799, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.23757172265778984, | |
| "grad_norm": 0.5268511176109314, | |
| "learning_rate": 9.072828836266574e-05, | |
| "loss": 2.5574, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.23827252419955325, | |
| "grad_norm": 0.5001477003097534, | |
| "learning_rate": 9.066095854657011e-05, | |
| "loss": 2.5117, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.23897332574131663, | |
| "grad_norm": 0.5136899352073669, | |
| "learning_rate": 9.059341031765773e-05, | |
| "loss": 2.4855, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.23967412728308002, | |
| "grad_norm": 0.5532418489456177, | |
| "learning_rate": 9.052564403876808e-05, | |
| "loss": 2.5623, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.2403749288248434, | |
| "grad_norm": 0.4908037483692169, | |
| "learning_rate": 9.045766007391185e-05, | |
| "loss": 2.5248, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.24107573036660682, | |
| "grad_norm": 0.45994317531585693, | |
| "learning_rate": 9.038945878826903e-05, | |
| "loss": 2.5007, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.2417765319083702, | |
| "grad_norm": 0.5593565702438354, | |
| "learning_rate": 9.032104054818698e-05, | |
| "loss": 2.5759, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2424773334501336, | |
| "grad_norm": 0.5076695084571838, | |
| "learning_rate": 9.025240572117846e-05, | |
| "loss": 2.5272, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.24317813499189697, | |
| "grad_norm": 0.3996141850948334, | |
| "learning_rate": 9.018355467591962e-05, | |
| "loss": 2.5317, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.24387893653366038, | |
| "grad_norm": 0.49347859621047974, | |
| "learning_rate": 9.011448778224802e-05, | |
| "loss": 2.5186, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.24457973807542377, | |
| "grad_norm": 0.5040503144264221, | |
| "learning_rate": 9.004520541116075e-05, | |
| "loss": 2.5015, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.24528053961718715, | |
| "grad_norm": 0.4658913016319275, | |
| "learning_rate": 8.997570793481223e-05, | |
| "loss": 2.5481, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24598134115895054, | |
| "grad_norm": 0.47850051522254944, | |
| "learning_rate": 8.990599572651242e-05, | |
| "loss": 2.5505, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.24668214270071395, | |
| "grad_norm": 0.48090964555740356, | |
| "learning_rate": 8.983606916072469e-05, | |
| "loss": 2.5669, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.24738294424247734, | |
| "grad_norm": 0.5716775059700012, | |
| "learning_rate": 8.976592861306384e-05, | |
| "loss": 2.523, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.24808374578424072, | |
| "grad_norm": 0.49985334277153015, | |
| "learning_rate": 8.969557446029409e-05, | |
| "loss": 2.5439, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2487845473260041, | |
| "grad_norm": 0.6331408023834229, | |
| "learning_rate": 8.962500708032708e-05, | |
| "loss": 2.5601, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.24948534886776752, | |
| "grad_norm": 0.5418590307235718, | |
| "learning_rate": 8.955422685221979e-05, | |
| "loss": 2.5495, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2501861504095309, | |
| "grad_norm": 0.5396260619163513, | |
| "learning_rate": 8.948323415617253e-05, | |
| "loss": 2.5151, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.2508869519512943, | |
| "grad_norm": 0.5641499161720276, | |
| "learning_rate": 8.941202937352686e-05, | |
| "loss": 2.4895, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2515877534930577, | |
| "grad_norm": 0.47651517391204834, | |
| "learning_rate": 8.934061288676365e-05, | |
| "loss": 2.5634, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.25228855503482106, | |
| "grad_norm": 0.5351449251174927, | |
| "learning_rate": 8.92689850795009e-05, | |
| "loss": 2.4804, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.25298935657658445, | |
| "grad_norm": 0.5856335759162903, | |
| "learning_rate": 8.919714633649172e-05, | |
| "loss": 2.5304, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.2536901581183479, | |
| "grad_norm": 0.4513723850250244, | |
| "learning_rate": 8.912509704362232e-05, | |
| "loss": 2.5369, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.25439095966011127, | |
| "grad_norm": 0.4676707983016968, | |
| "learning_rate": 8.905283758790985e-05, | |
| "loss": 2.5589, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.25509176120187466, | |
| "grad_norm": 0.5069173574447632, | |
| "learning_rate": 8.89803683575004e-05, | |
| "loss": 2.4958, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.25579256274363804, | |
| "grad_norm": 0.4774676263332367, | |
| "learning_rate": 8.890768974166685e-05, | |
| "loss": 2.5229, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2564933642854014, | |
| "grad_norm": 0.548409104347229, | |
| "learning_rate": 8.883480213080681e-05, | |
| "loss": 2.4815, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.2571941658271648, | |
| "grad_norm": 0.4854792356491089, | |
| "learning_rate": 8.876170591644054e-05, | |
| "loss": 2.5118, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2578949673689282, | |
| "grad_norm": 0.4988788664340973, | |
| "learning_rate": 8.868840149120876e-05, | |
| "loss": 2.5073, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.2585957689106916, | |
| "grad_norm": 0.4614211618900299, | |
| "learning_rate": 8.861488924887071e-05, | |
| "loss": 2.4866, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.259296570452455, | |
| "grad_norm": 0.4878149926662445, | |
| "learning_rate": 8.854116958430185e-05, | |
| "loss": 2.5315, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2599973719942184, | |
| "grad_norm": 0.47185149788856506, | |
| "learning_rate": 8.846724289349189e-05, | |
| "loss": 2.4766, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2606981735359818, | |
| "grad_norm": 0.446411669254303, | |
| "learning_rate": 8.839310957354249e-05, | |
| "loss": 2.5278, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.2613989750777452, | |
| "grad_norm": 0.45869573950767517, | |
| "learning_rate": 8.831877002266536e-05, | |
| "loss": 2.5051, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.26209977661950856, | |
| "grad_norm": 0.4578917920589447, | |
| "learning_rate": 8.82442246401799e-05, | |
| "loss": 2.4903, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.26280057816127195, | |
| "grad_norm": 0.4389136731624603, | |
| "learning_rate": 8.816947382651116e-05, | |
| "loss": 2.519, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.26350137970303533, | |
| "grad_norm": 0.4686265289783478, | |
| "learning_rate": 8.80945179831877e-05, | |
| "loss": 2.5537, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.2642021812447987, | |
| "grad_norm": 0.49357905983924866, | |
| "learning_rate": 8.801935751283944e-05, | |
| "loss": 2.4971, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.26490298278656216, | |
| "grad_norm": 0.5659007430076599, | |
| "learning_rate": 8.794399281919537e-05, | |
| "loss": 2.5291, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.26560378432832554, | |
| "grad_norm": 0.5637578964233398, | |
| "learning_rate": 8.786842430708157e-05, | |
| "loss": 2.5335, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.26630458587008893, | |
| "grad_norm": 0.47859886288642883, | |
| "learning_rate": 8.779265238241888e-05, | |
| "loss": 2.5104, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2670053874118523, | |
| "grad_norm": 0.5444939732551575, | |
| "learning_rate": 8.771667745222082e-05, | |
| "loss": 2.4823, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.2677061889536157, | |
| "grad_norm": 0.5456621050834656, | |
| "learning_rate": 8.76404999245914e-05, | |
| "loss": 2.5027, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2684069904953791, | |
| "grad_norm": 0.5168180465698242, | |
| "learning_rate": 8.75641202087228e-05, | |
| "loss": 2.5562, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.26910779203714247, | |
| "grad_norm": 0.5675712823867798, | |
| "learning_rate": 8.748753871489333e-05, | |
| "loss": 2.5195, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.26980859357890585, | |
| "grad_norm": 0.4084811806678772, | |
| "learning_rate": 8.741075585446514e-05, | |
| "loss": 2.4853, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.27050939512066924, | |
| "grad_norm": 0.4109669327735901, | |
| "learning_rate": 8.733377203988208e-05, | |
| "loss": 2.5186, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.2712101966624327, | |
| "grad_norm": 0.5689636468887329, | |
| "learning_rate": 8.725658768466738e-05, | |
| "loss": 2.5106, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.27191099820419606, | |
| "grad_norm": 0.4750414192676544, | |
| "learning_rate": 8.71792032034215e-05, | |
| "loss": 2.4927, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.27261179974595945, | |
| "grad_norm": 0.4577466547489166, | |
| "learning_rate": 8.710161901181993e-05, | |
| "loss": 2.5005, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.27331260128772283, | |
| "grad_norm": 0.4786745011806488, | |
| "learning_rate": 8.702383552661081e-05, | |
| "loss": 2.5099, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2740134028294862, | |
| "grad_norm": 0.508456289768219, | |
| "learning_rate": 8.694585316561296e-05, | |
| "loss": 2.5377, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2747142043712496, | |
| "grad_norm": 0.49584171175956726, | |
| "learning_rate": 8.686767234771333e-05, | |
| "loss": 2.5208, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.275415005913013, | |
| "grad_norm": 0.4523308575153351, | |
| "learning_rate": 8.678929349286498e-05, | |
| "loss": 2.5663, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.2761158074547764, | |
| "grad_norm": 0.411276638507843, | |
| "learning_rate": 8.671071702208467e-05, | |
| "loss": 2.5076, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.2768166089965398, | |
| "grad_norm": 0.47366130352020264, | |
| "learning_rate": 8.663194335745071e-05, | |
| "loss": 2.4725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2775174105383032, | |
| "grad_norm": 0.44845113158226013, | |
| "learning_rate": 8.655297292210067e-05, | |
| "loss": 2.5204, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.2782182120800666, | |
| "grad_norm": 0.4630947709083557, | |
| "learning_rate": 8.647380614022902e-05, | |
| "loss": 2.4848, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.27891901362182997, | |
| "grad_norm": 0.4739050567150116, | |
| "learning_rate": 8.639444343708496e-05, | |
| "loss": 2.4975, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.27961981516359335, | |
| "grad_norm": 0.41872844099998474, | |
| "learning_rate": 8.631488523897011e-05, | |
| "loss": 2.5105, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.28032061670535674, | |
| "grad_norm": 0.5174891948699951, | |
| "learning_rate": 8.623513197323615e-05, | |
| "loss": 2.4428, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2810214182471201, | |
| "grad_norm": 0.4543634057044983, | |
| "learning_rate": 8.615518406828262e-05, | |
| "loss": 2.5248, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.2817222197888835, | |
| "grad_norm": 0.433250367641449, | |
| "learning_rate": 8.607504195355458e-05, | |
| "loss": 2.4887, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.28242302133064695, | |
| "grad_norm": 0.47642698884010315, | |
| "learning_rate": 8.599470605954025e-05, | |
| "loss": 2.5391, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.28312382287241034, | |
| "grad_norm": 0.45496654510498047, | |
| "learning_rate": 8.59141768177688e-05, | |
| "loss": 2.5444, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.2838246244141737, | |
| "grad_norm": 0.4619695544242859, | |
| "learning_rate": 8.583345466080796e-05, | |
| "loss": 2.504, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2845254259559371, | |
| "grad_norm": 0.4610481262207031, | |
| "learning_rate": 8.575254002226173e-05, | |
| "loss": 2.4904, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.2852262274977005, | |
| "grad_norm": 0.4597660005092621, | |
| "learning_rate": 8.5671433336768e-05, | |
| "loss": 2.4923, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2859270290394639, | |
| "grad_norm": 0.5440905094146729, | |
| "learning_rate": 8.559013503999626e-05, | |
| "loss": 2.4806, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.28662783058122726, | |
| "grad_norm": 0.4667718708515167, | |
| "learning_rate": 8.550864556864529e-05, | |
| "loss": 2.5595, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.28732863212299065, | |
| "grad_norm": 0.47145599126815796, | |
| "learning_rate": 8.542696536044075e-05, | |
| "loss": 2.4813, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2880294336647541, | |
| "grad_norm": 0.4581964313983917, | |
| "learning_rate": 8.534509485413284e-05, | |
| "loss": 2.5467, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.28873023520651747, | |
| "grad_norm": 0.5127134919166565, | |
| "learning_rate": 8.5263034489494e-05, | |
| "loss": 2.5067, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.28943103674828086, | |
| "grad_norm": 0.5416949391365051, | |
| "learning_rate": 8.518078470731644e-05, | |
| "loss": 2.4669, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.29013183829004424, | |
| "grad_norm": 0.442828506231308, | |
| "learning_rate": 8.509834594940991e-05, | |
| "loss": 2.4708, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2908326398318076, | |
| "grad_norm": 0.4708557426929474, | |
| "learning_rate": 8.501571865859924e-05, | |
| "loss": 2.5192, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.291533441373571, | |
| "grad_norm": 0.4371870458126068, | |
| "learning_rate": 8.49329032787219e-05, | |
| "loss": 2.4778, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.2922342429153344, | |
| "grad_norm": 0.48408806324005127, | |
| "learning_rate": 8.48499002546258e-05, | |
| "loss": 2.4868, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.2929350444570978, | |
| "grad_norm": 0.45126622915267944, | |
| "learning_rate": 8.47667100321667e-05, | |
| "loss": 2.4999, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.2936358459988612, | |
| "grad_norm": 0.4448654353618622, | |
| "learning_rate": 8.468333305820599e-05, | |
| "loss": 2.4848, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.2943366475406246, | |
| "grad_norm": 0.47776126861572266, | |
| "learning_rate": 8.459976978060815e-05, | |
| "loss": 2.5515, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.295037449082388, | |
| "grad_norm": 0.4572128653526306, | |
| "learning_rate": 8.45160206482384e-05, | |
| "loss": 2.5172, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.2957382506241514, | |
| "grad_norm": 0.4419424831867218, | |
| "learning_rate": 8.443208611096036e-05, | |
| "loss": 2.5035, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.29643905216591476, | |
| "grad_norm": 0.42213693261146545, | |
| "learning_rate": 8.434796661963344e-05, | |
| "loss": 2.542, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.29713985370767815, | |
| "grad_norm": 0.446344792842865, | |
| "learning_rate": 8.426366262611067e-05, | |
| "loss": 2.5119, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.29784065524944153, | |
| "grad_norm": 0.44233253598213196, | |
| "learning_rate": 8.417917458323607e-05, | |
| "loss": 2.4985, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2985414567912049, | |
| "grad_norm": 0.492471843957901, | |
| "learning_rate": 8.40945029448423e-05, | |
| "loss": 2.4553, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.29924225833296836, | |
| "grad_norm": 0.4490063488483429, | |
| "learning_rate": 8.400964816574826e-05, | |
| "loss": 2.5389, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.29994305987473174, | |
| "grad_norm": 0.5494585633277893, | |
| "learning_rate": 8.392461070175652e-05, | |
| "loss": 2.5163, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.30064386141649513, | |
| "grad_norm": 0.4822872281074524, | |
| "learning_rate": 8.383939100965103e-05, | |
| "loss": 2.504, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.3013446629582585, | |
| "grad_norm": 0.5434439778327942, | |
| "learning_rate": 8.375398954719456e-05, | |
| "loss": 2.4841, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3020454645000219, | |
| "grad_norm": 0.5055859088897705, | |
| "learning_rate": 8.366840677312626e-05, | |
| "loss": 2.4985, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.3027462660417853, | |
| "grad_norm": 0.44319674372673035, | |
| "learning_rate": 8.358264314715923e-05, | |
| "loss": 2.4661, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.30344706758354867, | |
| "grad_norm": 0.5121539235115051, | |
| "learning_rate": 8.349669912997799e-05, | |
| "loss": 2.4797, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.30414786912531205, | |
| "grad_norm": 0.4748767912387848, | |
| "learning_rate": 8.341057518323607e-05, | |
| "loss": 2.5009, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3048486706670755, | |
| "grad_norm": 0.4823194742202759, | |
| "learning_rate": 8.332427176955353e-05, | |
| "loss": 2.4798, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3055494722088389, | |
| "grad_norm": 0.4242302477359772, | |
| "learning_rate": 8.323778935251437e-05, | |
| "loss": 2.4764, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.30625027375060226, | |
| "grad_norm": 0.46324998140335083, | |
| "learning_rate": 8.31511283966642e-05, | |
| "loss": 2.509, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.30695107529236565, | |
| "grad_norm": 0.4894976317882538, | |
| "learning_rate": 8.30642893675076e-05, | |
| "loss": 2.498, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.30765187683412903, | |
| "grad_norm": 0.4574197232723236, | |
| "learning_rate": 8.297727273150573e-05, | |
| "loss": 2.48, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.3083526783758924, | |
| "grad_norm": 0.44225645065307617, | |
| "learning_rate": 8.289007895607375e-05, | |
| "loss": 2.502, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3090534799176558, | |
| "grad_norm": 0.47749781608581543, | |
| "learning_rate": 8.28027085095783e-05, | |
| "loss": 2.5043, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.3097542814594192, | |
| "grad_norm": 0.4569682478904724, | |
| "learning_rate": 8.271516186133511e-05, | |
| "loss": 2.4454, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.31045508300118263, | |
| "grad_norm": 0.4561903178691864, | |
| "learning_rate": 8.262743948160632e-05, | |
| "loss": 2.4826, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.311155884542946, | |
| "grad_norm": 0.4749627411365509, | |
| "learning_rate": 8.253954184159803e-05, | |
| "loss": 2.4707, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.3118566860847094, | |
| "grad_norm": 0.4455653131008148, | |
| "learning_rate": 8.245146941345774e-05, | |
| "loss": 2.4647, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3125574876264728, | |
| "grad_norm": 0.4758734405040741, | |
| "learning_rate": 8.236322267027193e-05, | |
| "loss": 2.4885, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.31325828916823617, | |
| "grad_norm": 0.45016252994537354, | |
| "learning_rate": 8.227480208606333e-05, | |
| "loss": 2.4993, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.31395909070999956, | |
| "grad_norm": 0.48177486658096313, | |
| "learning_rate": 8.218620813578847e-05, | |
| "loss": 2.4838, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.31465989225176294, | |
| "grad_norm": 0.4863053858280182, | |
| "learning_rate": 8.209744129533519e-05, | |
| "loss": 2.5381, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.3153606937935263, | |
| "grad_norm": 0.49010857939720154, | |
| "learning_rate": 8.200850204151995e-05, | |
| "loss": 2.5721, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3160614953352897, | |
| "grad_norm": 0.43315884470939636, | |
| "learning_rate": 8.191939085208542e-05, | |
| "loss": 2.4976, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.31676229687705315, | |
| "grad_norm": 0.4580542743206024, | |
| "learning_rate": 8.183010820569775e-05, | |
| "loss": 2.4885, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.31746309841881654, | |
| "grad_norm": 0.40409061312675476, | |
| "learning_rate": 8.17406545819441e-05, | |
| "loss": 2.4872, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.3181638999605799, | |
| "grad_norm": 0.5763331055641174, | |
| "learning_rate": 8.16510304613301e-05, | |
| "loss": 2.4991, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.3188647015023433, | |
| "grad_norm": 0.4705376625061035, | |
| "learning_rate": 8.156123632527714e-05, | |
| "loss": 2.5071, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3195655030441067, | |
| "grad_norm": 0.42091286182403564, | |
| "learning_rate": 8.147127265611991e-05, | |
| "loss": 2.4639, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.3202663045858701, | |
| "grad_norm": 0.4637336730957031, | |
| "learning_rate": 8.138113993710377e-05, | |
| "loss": 2.5368, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.32096710612763346, | |
| "grad_norm": 0.48139557242393494, | |
| "learning_rate": 8.129083865238207e-05, | |
| "loss": 2.4677, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.32166790766939685, | |
| "grad_norm": 0.4305325448513031, | |
| "learning_rate": 8.120036928701367e-05, | |
| "loss": 2.5125, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.3223687092111603, | |
| "grad_norm": 0.4576675295829773, | |
| "learning_rate": 8.110973232696029e-05, | |
| "loss": 2.4559, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.32306951075292367, | |
| "grad_norm": 0.4506300687789917, | |
| "learning_rate": 8.10189282590839e-05, | |
| "loss": 2.46, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.32377031229468706, | |
| "grad_norm": 0.4221961200237274, | |
| "learning_rate": 8.092795757114405e-05, | |
| "loss": 2.5269, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.32447111383645044, | |
| "grad_norm": 0.4183507561683655, | |
| "learning_rate": 8.083682075179535e-05, | |
| "loss": 2.4963, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.3251719153782138, | |
| "grad_norm": 0.40157100558280945, | |
| "learning_rate": 8.07455182905848e-05, | |
| "loss": 2.497, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.3258727169199772, | |
| "grad_norm": 0.41130340099334717, | |
| "learning_rate": 8.06540506779491e-05, | |
| "loss": 2.4894, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3265735184617406, | |
| "grad_norm": 0.45491817593574524, | |
| "learning_rate": 8.056241840521212e-05, | |
| "loss": 2.4991, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.327274320003504, | |
| "grad_norm": 0.5285101532936096, | |
| "learning_rate": 8.047062196458222e-05, | |
| "loss": 2.4956, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.3279751215452674, | |
| "grad_norm": 0.4162616729736328, | |
| "learning_rate": 8.037866184914952e-05, | |
| "loss": 2.4349, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.3286759230870308, | |
| "grad_norm": 0.47431331872940063, | |
| "learning_rate": 8.028653855288342e-05, | |
| "loss": 2.4502, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.3293767246287942, | |
| "grad_norm": 0.516228437423706, | |
| "learning_rate": 8.019425257062983e-05, | |
| "loss": 2.463, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3300775261705576, | |
| "grad_norm": 0.4729955196380615, | |
| "learning_rate": 8.010180439810852e-05, | |
| "loss": 2.4764, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.33077832771232096, | |
| "grad_norm": 0.4590371549129486, | |
| "learning_rate": 8.000919453191046e-05, | |
| "loss": 2.4677, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.33147912925408435, | |
| "grad_norm": 0.4180818498134613, | |
| "learning_rate": 7.99164234694952e-05, | |
| "loss": 2.4478, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.33217993079584773, | |
| "grad_norm": 0.459830641746521, | |
| "learning_rate": 7.982349170918819e-05, | |
| "loss": 2.5018, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.3328807323376111, | |
| "grad_norm": 0.42037877440452576, | |
| "learning_rate": 7.9730399750178e-05, | |
| "loss": 2.5018, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.33358153387937456, | |
| "grad_norm": 0.48284855484962463, | |
| "learning_rate": 7.963714809251375e-05, | |
| "loss": 2.4853, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.33428233542113794, | |
| "grad_norm": 0.4275258779525757, | |
| "learning_rate": 7.954373723710247e-05, | |
| "loss": 2.4716, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.33498313696290133, | |
| "grad_norm": 0.4475047290325165, | |
| "learning_rate": 7.945016768570619e-05, | |
| "loss": 2.5075, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.3356839385046647, | |
| "grad_norm": 0.4026126265525818, | |
| "learning_rate": 7.93564399409395e-05, | |
| "loss": 2.4381, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.3363847400464281, | |
| "grad_norm": 0.4063897132873535, | |
| "learning_rate": 7.926255450626668e-05, | |
| "loss": 2.4607, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3370855415881915, | |
| "grad_norm": 0.4034193158149719, | |
| "learning_rate": 7.916851188599908e-05, | |
| "loss": 2.4615, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.33778634312995487, | |
| "grad_norm": 0.449735552072525, | |
| "learning_rate": 7.907431258529232e-05, | |
| "loss": 2.447, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.33848714467171825, | |
| "grad_norm": 0.46238863468170166, | |
| "learning_rate": 7.897995711014373e-05, | |
| "loss": 2.5097, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.3391879462134817, | |
| "grad_norm": 0.4304943084716797, | |
| "learning_rate": 7.88854459673895e-05, | |
| "loss": 2.48, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.3398887477552451, | |
| "grad_norm": 0.4926084578037262, | |
| "learning_rate": 7.879077966470194e-05, | |
| "loss": 2.4716, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.34058954929700846, | |
| "grad_norm": 0.4950031638145447, | |
| "learning_rate": 7.86959587105869e-05, | |
| "loss": 2.4648, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.34129035083877185, | |
| "grad_norm": 0.4255363345146179, | |
| "learning_rate": 7.860098361438092e-05, | |
| "loss": 2.4388, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.34199115238053523, | |
| "grad_norm": 0.48036453127861023, | |
| "learning_rate": 7.85058548862485e-05, | |
| "loss": 2.4286, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.3426919539222986, | |
| "grad_norm": 0.43235254287719727, | |
| "learning_rate": 7.84105730371794e-05, | |
| "loss": 2.5079, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.343392755464062, | |
| "grad_norm": 0.4415871500968933, | |
| "learning_rate": 7.831513857898589e-05, | |
| "loss": 2.4995, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3440935570058254, | |
| "grad_norm": 0.4261731207370758, | |
| "learning_rate": 7.821955202429997e-05, | |
| "loss": 2.4732, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.34479435854758883, | |
| "grad_norm": 0.4182513654232025, | |
| "learning_rate": 7.812381388657066e-05, | |
| "loss": 2.4424, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.3454951600893522, | |
| "grad_norm": 0.38975790143013, | |
| "learning_rate": 7.802792468006119e-05, | |
| "loss": 2.4672, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.3461959616311156, | |
| "grad_norm": 0.4010710120201111, | |
| "learning_rate": 7.793188491984626e-05, | |
| "loss": 2.4458, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.346896763172879, | |
| "grad_norm": 0.4334274232387543, | |
| "learning_rate": 7.783569512180933e-05, | |
| "loss": 2.4296, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.34759756471464237, | |
| "grad_norm": 0.42379283905029297, | |
| "learning_rate": 7.77393558026397e-05, | |
| "loss": 2.4737, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.34829836625640576, | |
| "grad_norm": 0.3837527632713318, | |
| "learning_rate": 7.764286747982998e-05, | |
| "loss": 2.5038, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.34899916779816914, | |
| "grad_norm": 0.3951718211174011, | |
| "learning_rate": 7.754623067167301e-05, | |
| "loss": 2.4743, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.3496999693399325, | |
| "grad_norm": 0.42385029792785645, | |
| "learning_rate": 7.744944589725931e-05, | |
| "loss": 2.4839, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.35040077088169597, | |
| "grad_norm": 0.35979923605918884, | |
| "learning_rate": 7.73525136764742e-05, | |
| "loss": 2.466, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.35110157242345935, | |
| "grad_norm": 0.3813255727291107, | |
| "learning_rate": 7.725543452999501e-05, | |
| "loss": 2.4657, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.35180237396522274, | |
| "grad_norm": 0.44004762172698975, | |
| "learning_rate": 7.715820897928831e-05, | |
| "loss": 2.4969, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.3525031755069861, | |
| "grad_norm": 0.41002413630485535, | |
| "learning_rate": 7.706083754660704e-05, | |
| "loss": 2.4762, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.3532039770487495, | |
| "grad_norm": 0.39008840918540955, | |
| "learning_rate": 7.696332075498778e-05, | |
| "loss": 2.4857, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.3539047785905129, | |
| "grad_norm": 0.38840317726135254, | |
| "learning_rate": 7.686565912824797e-05, | |
| "loss": 2.4517, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3546055801322763, | |
| "grad_norm": 0.4124884605407715, | |
| "learning_rate": 7.676785319098292e-05, | |
| "loss": 2.5003, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.35530638167403966, | |
| "grad_norm": 0.39664867520332336, | |
| "learning_rate": 7.666990346856323e-05, | |
| "loss": 2.4567, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.3560071832158031, | |
| "grad_norm": 0.41662028431892395, | |
| "learning_rate": 7.65718104871318e-05, | |
| "loss": 2.4603, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.3567079847575665, | |
| "grad_norm": 0.41218042373657227, | |
| "learning_rate": 7.647357477360103e-05, | |
| "loss": 2.4775, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.3574087862993299, | |
| "grad_norm": 0.4241933226585388, | |
| "learning_rate": 7.637519685565007e-05, | |
| "loss": 2.4581, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.35810958784109326, | |
| "grad_norm": 0.4417102336883545, | |
| "learning_rate": 7.627667726172188e-05, | |
| "loss": 2.4707, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.35881038938285664, | |
| "grad_norm": 0.4153529405593872, | |
| "learning_rate": 7.617801652102047e-05, | |
| "loss": 2.4899, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.35951119092462, | |
| "grad_norm": 0.42329278588294983, | |
| "learning_rate": 7.607921516350805e-05, | |
| "loss": 2.4753, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.3602119924663834, | |
| "grad_norm": 0.4246659576892853, | |
| "learning_rate": 7.598027371990209e-05, | |
| "loss": 2.4887, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.3609127940081468, | |
| "grad_norm": 0.4006962180137634, | |
| "learning_rate": 7.58811927216726e-05, | |
| "loss": 2.4446, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3616135955499102, | |
| "grad_norm": 0.4364110231399536, | |
| "learning_rate": 7.578197270103922e-05, | |
| "loss": 2.4661, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.3623143970916736, | |
| "grad_norm": 0.41410303115844727, | |
| "learning_rate": 7.568261419096829e-05, | |
| "loss": 2.5071, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.363015198633437, | |
| "grad_norm": 0.4187104403972626, | |
| "learning_rate": 7.558311772517015e-05, | |
| "loss": 2.4325, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.3637160001752004, | |
| "grad_norm": 0.3853437304496765, | |
| "learning_rate": 7.548348383809609e-05, | |
| "loss": 2.5366, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.3644168017169638, | |
| "grad_norm": 0.4048047363758087, | |
| "learning_rate": 7.538371306493565e-05, | |
| "loss": 2.5017, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.36511760325872716, | |
| "grad_norm": 0.5083643198013306, | |
| "learning_rate": 7.528380594161357e-05, | |
| "loss": 2.4921, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.36581840480049055, | |
| "grad_norm": 0.48616695404052734, | |
| "learning_rate": 7.51837630047871e-05, | |
| "loss": 2.4939, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.36651920634225393, | |
| "grad_norm": 0.4780706763267517, | |
| "learning_rate": 7.508358479184292e-05, | |
| "loss": 2.4355, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.3672200078840173, | |
| "grad_norm": 0.48671412467956543, | |
| "learning_rate": 7.498327184089444e-05, | |
| "loss": 2.4921, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.36792080942578076, | |
| "grad_norm": 0.4375866651535034, | |
| "learning_rate": 7.488282469077878e-05, | |
| "loss": 2.47, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.36862161096754414, | |
| "grad_norm": 0.40635204315185547, | |
| "learning_rate": 7.478224388105395e-05, | |
| "loss": 2.4903, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.36932241250930753, | |
| "grad_norm": 0.4381403923034668, | |
| "learning_rate": 7.468152995199586e-05, | |
| "loss": 2.4512, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.3700232140510709, | |
| "grad_norm": 0.4581202566623688, | |
| "learning_rate": 7.458068344459556e-05, | |
| "loss": 2.461, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.3707240155928343, | |
| "grad_norm": 0.41336745023727417, | |
| "learning_rate": 7.447970490055615e-05, | |
| "loss": 2.5116, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.3714248171345977, | |
| "grad_norm": 0.44782060384750366, | |
| "learning_rate": 7.437859486229008e-05, | |
| "loss": 2.463, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.37212561867636107, | |
| "grad_norm": 0.39710888266563416, | |
| "learning_rate": 7.42773538729161e-05, | |
| "loss": 2.4415, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.37282642021812445, | |
| "grad_norm": 0.4727705717086792, | |
| "learning_rate": 7.41759824762563e-05, | |
| "loss": 2.5044, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.3735272217598879, | |
| "grad_norm": 0.4554997384548187, | |
| "learning_rate": 7.407448121683334e-05, | |
| "loss": 2.458, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.3742280233016513, | |
| "grad_norm": 0.3889988958835602, | |
| "learning_rate": 7.397285063986743e-05, | |
| "loss": 2.3949, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.37492882484341467, | |
| "grad_norm": 0.4311942160129547, | |
| "learning_rate": 7.387109129127338e-05, | |
| "loss": 2.49, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.37562962638517805, | |
| "grad_norm": 0.5033543705940247, | |
| "learning_rate": 7.376920371765778e-05, | |
| "loss": 2.5222, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.37633042792694144, | |
| "grad_norm": 0.44039902091026306, | |
| "learning_rate": 7.366718846631589e-05, | |
| "loss": 2.4676, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.3770312294687048, | |
| "grad_norm": 0.4856110215187073, | |
| "learning_rate": 7.356504608522886e-05, | |
| "loss": 2.475, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.3777320310104682, | |
| "grad_norm": 0.4543730914592743, | |
| "learning_rate": 7.346277712306074e-05, | |
| "loss": 2.4278, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.3784328325522316, | |
| "grad_norm": 0.44085338711738586, | |
| "learning_rate": 7.336038212915547e-05, | |
| "loss": 2.4556, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.37913363409399503, | |
| "grad_norm": 0.46556994318962097, | |
| "learning_rate": 7.325786165353403e-05, | |
| "loss": 2.5022, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.3798344356357584, | |
| "grad_norm": 0.49558380246162415, | |
| "learning_rate": 7.315521624689135e-05, | |
| "loss": 2.441, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.3805352371775218, | |
| "grad_norm": 0.41412246227264404, | |
| "learning_rate": 7.305244646059353e-05, | |
| "loss": 2.4467, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.3812360387192852, | |
| "grad_norm": 0.40206030011177063, | |
| "learning_rate": 7.294955284667473e-05, | |
| "loss": 2.4546, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.38193684026104857, | |
| "grad_norm": 0.4202801585197449, | |
| "learning_rate": 7.284653595783427e-05, | |
| "loss": 2.4638, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.38263764180281196, | |
| "grad_norm": 0.41482287645339966, | |
| "learning_rate": 7.274339634743364e-05, | |
| "loss": 2.4295, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.38333844334457534, | |
| "grad_norm": 0.4058382511138916, | |
| "learning_rate": 7.264013456949352e-05, | |
| "loss": 2.4885, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.3840392448863387, | |
| "grad_norm": 0.3752575218677521, | |
| "learning_rate": 7.253675117869088e-05, | |
| "loss": 2.4247, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.38474004642810217, | |
| "grad_norm": 0.4093259572982788, | |
| "learning_rate": 7.24332467303559e-05, | |
| "loss": 2.4403, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.38544084796986555, | |
| "grad_norm": 0.4372352063655853, | |
| "learning_rate": 7.232962178046901e-05, | |
| "loss": 2.4606, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.38614164951162894, | |
| "grad_norm": 0.4315595328807831, | |
| "learning_rate": 7.222587688565796e-05, | |
| "loss": 2.4756, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.3868424510533923, | |
| "grad_norm": 0.4413852095603943, | |
| "learning_rate": 7.212201260319477e-05, | |
| "loss": 2.4678, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.3875432525951557, | |
| "grad_norm": 0.41132184863090515, | |
| "learning_rate": 7.201802949099275e-05, | |
| "loss": 2.4659, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.3882440541369191, | |
| "grad_norm": 0.3732544183731079, | |
| "learning_rate": 7.191392810760356e-05, | |
| "loss": 2.4209, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.3889448556786825, | |
| "grad_norm": 0.4892706871032715, | |
| "learning_rate": 7.180970901221408e-05, | |
| "loss": 2.4762, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.38964565722044586, | |
| "grad_norm": 0.42454981803894043, | |
| "learning_rate": 7.170537276464355e-05, | |
| "loss": 2.4781, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.3903464587622093, | |
| "grad_norm": 0.4017874598503113, | |
| "learning_rate": 7.160091992534051e-05, | |
| "loss": 2.4563, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.3910472603039727, | |
| "grad_norm": 0.4679020643234253, | |
| "learning_rate": 7.14963510553797e-05, | |
| "loss": 2.4337, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.3917480618457361, | |
| "grad_norm": 0.4594592750072479, | |
| "learning_rate": 7.139166671645923e-05, | |
| "loss": 2.4431, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.39244886338749946, | |
| "grad_norm": 0.4678356349468231, | |
| "learning_rate": 7.128686747089737e-05, | |
| "loss": 2.4513, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.39314966492926284, | |
| "grad_norm": 0.462960422039032, | |
| "learning_rate": 7.118195388162966e-05, | |
| "loss": 2.4636, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.39385046647102623, | |
| "grad_norm": 0.4102267920970917, | |
| "learning_rate": 7.107692651220585e-05, | |
| "loss": 2.4249, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.3945512680127896, | |
| "grad_norm": 0.4165082573890686, | |
| "learning_rate": 7.097178592678683e-05, | |
| "loss": 2.4793, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.395252069554553, | |
| "grad_norm": 0.438484787940979, | |
| "learning_rate": 7.086653269014171e-05, | |
| "loss": 2.4437, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.39595287109631644, | |
| "grad_norm": 0.4235878884792328, | |
| "learning_rate": 7.076116736764461e-05, | |
| "loss": 2.4276, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3966536726380798, | |
| "grad_norm": 0.4382783770561218, | |
| "learning_rate": 7.065569052527182e-05, | |
| "loss": 2.4572, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.3973544741798432, | |
| "grad_norm": 0.4001896381378174, | |
| "learning_rate": 7.055010272959861e-05, | |
| "loss": 2.4796, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.3980552757216066, | |
| "grad_norm": 0.5236755609512329, | |
| "learning_rate": 7.044440454779625e-05, | |
| "loss": 2.395, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.39875607726337, | |
| "grad_norm": 0.4379066824913025, | |
| "learning_rate": 7.033859654762898e-05, | |
| "loss": 2.4206, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.39945687880513336, | |
| "grad_norm": 0.44132131338119507, | |
| "learning_rate": 7.023267929745092e-05, | |
| "loss": 2.4723, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.40015768034689675, | |
| "grad_norm": 0.4132694602012634, | |
| "learning_rate": 7.012665336620303e-05, | |
| "loss": 2.4371, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.40085848188866013, | |
| "grad_norm": 0.4186249077320099, | |
| "learning_rate": 7.002051932341008e-05, | |
| "loss": 2.4136, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.4015592834304235, | |
| "grad_norm": 0.43945756554603577, | |
| "learning_rate": 6.99142777391775e-05, | |
| "loss": 2.3931, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.40226008497218696, | |
| "grad_norm": 0.402310311794281, | |
| "learning_rate": 6.980792918418849e-05, | |
| "loss": 2.4078, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.40296088651395034, | |
| "grad_norm": 0.4048164486885071, | |
| "learning_rate": 6.970147422970074e-05, | |
| "loss": 2.44, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.40366168805571373, | |
| "grad_norm": 0.4304601550102234, | |
| "learning_rate": 6.959491344754357e-05, | |
| "loss": 2.481, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.4043624895974771, | |
| "grad_norm": 0.41222673654556274, | |
| "learning_rate": 6.94882474101147e-05, | |
| "loss": 2.4581, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 0.4438692033290863, | |
| "learning_rate": 6.938147669037724e-05, | |
| "loss": 2.4474, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.4057640926810039, | |
| "grad_norm": 0.41356995701789856, | |
| "learning_rate": 6.927460186185663e-05, | |
| "loss": 2.4394, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.40646489422276727, | |
| "grad_norm": 0.43938153982162476, | |
| "learning_rate": 6.916762349863755e-05, | |
| "loss": 2.4529, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.40716569576453066, | |
| "grad_norm": 0.4264746904373169, | |
| "learning_rate": 6.906054217536074e-05, | |
| "loss": 2.4103, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.4078664973062941, | |
| "grad_norm": 0.40205949544906616, | |
| "learning_rate": 6.895335846722007e-05, | |
| "loss": 2.4533, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.4085672988480575, | |
| "grad_norm": 0.3951698839664459, | |
| "learning_rate": 6.88460729499594e-05, | |
| "loss": 2.4059, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.40926810038982087, | |
| "grad_norm": 0.40137380361557007, | |
| "learning_rate": 6.873868619986938e-05, | |
| "loss": 2.4282, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.40996890193158425, | |
| "grad_norm": 0.39662086963653564, | |
| "learning_rate": 6.863119879378451e-05, | |
| "loss": 2.404, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.41066970347334764, | |
| "grad_norm": 0.40667644143104553, | |
| "learning_rate": 6.852361130907992e-05, | |
| "loss": 2.4205, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.411370505015111, | |
| "grad_norm": 0.4023439288139343, | |
| "learning_rate": 6.841592432366835e-05, | |
| "loss": 2.4271, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.4120713065568744, | |
| "grad_norm": 0.4581812620162964, | |
| "learning_rate": 6.830813841599702e-05, | |
| "loss": 2.4589, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.4127721080986378, | |
| "grad_norm": 0.40035149455070496, | |
| "learning_rate": 6.820025416504447e-05, | |
| "loss": 2.4299, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.41347290964040123, | |
| "grad_norm": 0.40674343705177307, | |
| "learning_rate": 6.809227215031757e-05, | |
| "loss": 2.4479, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4141737111821646, | |
| "grad_norm": 0.3893290162086487, | |
| "learning_rate": 6.798419295184823e-05, | |
| "loss": 2.4555, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.414874512723928, | |
| "grad_norm": 0.4277731776237488, | |
| "learning_rate": 6.787601715019051e-05, | |
| "loss": 2.426, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.4155753142656914, | |
| "grad_norm": 0.3891987204551697, | |
| "learning_rate": 6.776774532641731e-05, | |
| "loss": 2.455, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.41627611580745477, | |
| "grad_norm": 0.42866095900535583, | |
| "learning_rate": 6.765937806211731e-05, | |
| "loss": 2.4278, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.41697691734921816, | |
| "grad_norm": 0.4561346769332886, | |
| "learning_rate": 6.75509159393919e-05, | |
| "loss": 2.4655, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.41767771889098154, | |
| "grad_norm": 0.4195440709590912, | |
| "learning_rate": 6.744235954085193e-05, | |
| "loss": 2.4429, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.4183785204327449, | |
| "grad_norm": 0.43625494837760925, | |
| "learning_rate": 6.733370944961476e-05, | |
| "loss": 2.4733, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.41907932197450837, | |
| "grad_norm": 0.45825034379959106, | |
| "learning_rate": 6.7224966249301e-05, | |
| "loss": 2.4267, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.41978012351627175, | |
| "grad_norm": 0.4522963762283325, | |
| "learning_rate": 6.711613052403129e-05, | |
| "loss": 2.4268, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.42048092505803514, | |
| "grad_norm": 0.43499064445495605, | |
| "learning_rate": 6.700720285842344e-05, | |
| "loss": 2.4429, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4211817265997985, | |
| "grad_norm": 0.39839762449264526, | |
| "learning_rate": 6.689818383758899e-05, | |
| "loss": 2.4226, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.4218825281415619, | |
| "grad_norm": 0.42643973231315613, | |
| "learning_rate": 6.678907404713028e-05, | |
| "loss": 2.4154, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.4225833296833253, | |
| "grad_norm": 0.38087818026542664, | |
| "learning_rate": 6.667987407313721e-05, | |
| "loss": 2.4337, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.4232841312250887, | |
| "grad_norm": 0.3658069968223572, | |
| "learning_rate": 6.657058450218407e-05, | |
| "loss": 2.4608, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.42398493276685206, | |
| "grad_norm": 0.35932332277297974, | |
| "learning_rate": 6.646120592132647e-05, | |
| "loss": 2.3846, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4246857343086155, | |
| "grad_norm": 0.3892819583415985, | |
| "learning_rate": 6.635173891809811e-05, | |
| "loss": 2.4385, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.4253865358503789, | |
| "grad_norm": 0.4014433026313782, | |
| "learning_rate": 6.624218408050768e-05, | |
| "loss": 2.4243, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.4260873373921423, | |
| "grad_norm": 0.38692885637283325, | |
| "learning_rate": 6.613254199703567e-05, | |
| "loss": 2.484, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.42678813893390566, | |
| "grad_norm": 0.3929356336593628, | |
| "learning_rate": 6.602281325663119e-05, | |
| "loss": 2.4391, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.42748894047566904, | |
| "grad_norm": 0.37778228521347046, | |
| "learning_rate": 6.591299844870886e-05, | |
| "loss": 2.4475, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.42818974201743243, | |
| "grad_norm": 0.3806099593639374, | |
| "learning_rate": 6.580309816314566e-05, | |
| "loss": 2.465, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.4288905435591958, | |
| "grad_norm": 0.3700454831123352, | |
| "learning_rate": 6.569311299027758e-05, | |
| "loss": 2.4834, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.4295913451009592, | |
| "grad_norm": 0.4086442291736603, | |
| "learning_rate": 6.558304352089676e-05, | |
| "loss": 2.4605, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.43029214664272264, | |
| "grad_norm": 0.4239005744457245, | |
| "learning_rate": 6.547289034624803e-05, | |
| "loss": 2.4578, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.430992948184486, | |
| "grad_norm": 0.4295092523097992, | |
| "learning_rate": 6.536265405802588e-05, | |
| "loss": 2.4021, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4316937497262494, | |
| "grad_norm": 0.3939379155635834, | |
| "learning_rate": 6.525233524837126e-05, | |
| "loss": 2.4258, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.4323945512680128, | |
| "grad_norm": 0.40129542350769043, | |
| "learning_rate": 6.514193450986838e-05, | |
| "loss": 2.4434, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.4330953528097762, | |
| "grad_norm": 0.41189342737197876, | |
| "learning_rate": 6.503145243554151e-05, | |
| "loss": 2.4488, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.43379615435153956, | |
| "grad_norm": 0.39507558941841125, | |
| "learning_rate": 6.492088961885189e-05, | |
| "loss": 2.437, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.43449695589330295, | |
| "grad_norm": 0.4083710014820099, | |
| "learning_rate": 6.481024665369437e-05, | |
| "loss": 2.4487, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.43519775743506633, | |
| "grad_norm": 0.39629602432250977, | |
| "learning_rate": 6.469952413439444e-05, | |
| "loss": 2.4023, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.4358985589768298, | |
| "grad_norm": 0.38575878739356995, | |
| "learning_rate": 6.458872265570482e-05, | |
| "loss": 2.4191, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.43659936051859316, | |
| "grad_norm": 0.4187893271446228, | |
| "learning_rate": 6.44778428128024e-05, | |
| "loss": 2.424, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.43730016206035655, | |
| "grad_norm": 0.41006091237068176, | |
| "learning_rate": 6.436688520128505e-05, | |
| "loss": 2.4236, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.43800096360211993, | |
| "grad_norm": 0.3775830566883087, | |
| "learning_rate": 6.42558504171683e-05, | |
| "loss": 2.4487, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4387017651438833, | |
| "grad_norm": 0.40173962712287903, | |
| "learning_rate": 6.41447390568823e-05, | |
| "loss": 2.4602, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.4394025666856467, | |
| "grad_norm": 0.4107741713523865, | |
| "learning_rate": 6.403355171726844e-05, | |
| "loss": 2.4658, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.4401033682274101, | |
| "grad_norm": 0.37253475189208984, | |
| "learning_rate": 6.392228899557635e-05, | |
| "loss": 2.444, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.44080416976917347, | |
| "grad_norm": 0.41569939255714417, | |
| "learning_rate": 6.38109514894605e-05, | |
| "loss": 2.413, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.4415049713109369, | |
| "grad_norm": 0.3902023732662201, | |
| "learning_rate": 6.369953979697707e-05, | |
| "loss": 2.4157, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4422057728527003, | |
| "grad_norm": 0.3787744641304016, | |
| "learning_rate": 6.358805451658079e-05, | |
| "loss": 2.4063, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.4429065743944637, | |
| "grad_norm": 0.38019701838493347, | |
| "learning_rate": 6.347649624712159e-05, | |
| "loss": 2.4503, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.44360737593622707, | |
| "grad_norm": 0.37601906061172485, | |
| "learning_rate": 6.336486558784154e-05, | |
| "loss": 2.4674, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.44430817747799045, | |
| "grad_norm": 0.3607248067855835, | |
| "learning_rate": 6.325316313837153e-05, | |
| "loss": 2.4162, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.44500897901975384, | |
| "grad_norm": 0.3930775821208954, | |
| "learning_rate": 6.314138949872808e-05, | |
| "loss": 2.4097, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4457097805615172, | |
| "grad_norm": 0.37233275175094604, | |
| "learning_rate": 6.302954526931009e-05, | |
| "loss": 2.4397, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.4464105821032806, | |
| "grad_norm": 0.36300432682037354, | |
| "learning_rate": 6.291763105089567e-05, | |
| "loss": 2.429, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.447111383645044, | |
| "grad_norm": 0.3873036205768585, | |
| "learning_rate": 6.280564744463886e-05, | |
| "loss": 2.449, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.44781218518680743, | |
| "grad_norm": 0.3959653973579407, | |
| "learning_rate": 6.269359505206641e-05, | |
| "loss": 2.4212, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.4485129867285708, | |
| "grad_norm": 0.38037773966789246, | |
| "learning_rate": 6.25814744750746e-05, | |
| "loss": 2.409, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4492137882703342, | |
| "grad_norm": 0.42133620381355286, | |
| "learning_rate": 6.246928631592593e-05, | |
| "loss": 2.472, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.4499145898120976, | |
| "grad_norm": 0.3604891002178192, | |
| "learning_rate": 6.235703117724591e-05, | |
| "loss": 2.4107, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.450615391353861, | |
| "grad_norm": 0.40617790818214417, | |
| "learning_rate": 6.224470966201991e-05, | |
| "loss": 2.4255, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.45131619289562436, | |
| "grad_norm": 0.424081414937973, | |
| "learning_rate": 6.213232237358977e-05, | |
| "loss": 2.4598, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.45201699443738774, | |
| "grad_norm": 0.4017449617385864, | |
| "learning_rate": 6.201986991565063e-05, | |
| "loss": 2.4215, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4527177959791511, | |
| "grad_norm": 0.37244996428489685, | |
| "learning_rate": 6.190735289224775e-05, | |
| "loss": 2.4077, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.45341859752091457, | |
| "grad_norm": 0.3613777458667755, | |
| "learning_rate": 6.179477190777317e-05, | |
| "loss": 2.4602, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.45411939906267795, | |
| "grad_norm": 0.38270074129104614, | |
| "learning_rate": 6.168212756696252e-05, | |
| "loss": 2.398, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.45482020060444134, | |
| "grad_norm": 0.35369983315467834, | |
| "learning_rate": 6.156942047489174e-05, | |
| "loss": 2.4614, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.4555210021462047, | |
| "grad_norm": 0.3624161183834076, | |
| "learning_rate": 6.145665123697383e-05, | |
| "loss": 2.3981, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4562218036879681, | |
| "grad_norm": 0.35648027062416077, | |
| "learning_rate": 6.134382045895563e-05, | |
| "loss": 2.4268, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.4569226052297315, | |
| "grad_norm": 0.3938058018684387, | |
| "learning_rate": 6.123092874691453e-05, | |
| "loss": 2.4214, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.4576234067714949, | |
| "grad_norm": 0.4438329041004181, | |
| "learning_rate": 6.111797670725527e-05, | |
| "loss": 2.4096, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.45832420831325826, | |
| "grad_norm": 0.47116366028785706, | |
| "learning_rate": 6.100496494670658e-05, | |
| "loss": 2.4272, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.4590250098550217, | |
| "grad_norm": 0.42287012934684753, | |
| "learning_rate": 6.0891894072318056e-05, | |
| "loss": 2.3914, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4597258113967851, | |
| "grad_norm": 0.4389081299304962, | |
| "learning_rate": 6.077876469145675e-05, | |
| "loss": 2.4542, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.4604266129385485, | |
| "grad_norm": 0.406688392162323, | |
| "learning_rate": 6.0665577411804056e-05, | |
| "loss": 2.4614, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.46112741448031186, | |
| "grad_norm": 0.42373642325401306, | |
| "learning_rate": 6.055233284135231e-05, | |
| "loss": 2.4162, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.46182821602207524, | |
| "grad_norm": 0.42829835414886475, | |
| "learning_rate": 6.043903158840166e-05, | |
| "loss": 2.4324, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.46252901756383863, | |
| "grad_norm": 0.4124324917793274, | |
| "learning_rate": 6.0325674261556686e-05, | |
| "loss": 2.4084, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.463229819105602, | |
| "grad_norm": 0.38950327038764954, | |
| "learning_rate": 6.021226146972315e-05, | |
| "loss": 2.5012, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.4639306206473654, | |
| "grad_norm": 0.36520397663116455, | |
| "learning_rate": 6.0098793822104804e-05, | |
| "loss": 2.3733, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.46463142218912884, | |
| "grad_norm": 0.38024482131004333, | |
| "learning_rate": 5.998527192820001e-05, | |
| "loss": 2.398, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.4653322237308922, | |
| "grad_norm": 0.39080825448036194, | |
| "learning_rate": 5.987169639779856e-05, | |
| "loss": 2.4214, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.4660330252726556, | |
| "grad_norm": 0.35209205746650696, | |
| "learning_rate": 5.9758067840978325e-05, | |
| "loss": 2.4532, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.466733826814419, | |
| "grad_norm": 0.36672139167785645, | |
| "learning_rate": 5.964438686810202e-05, | |
| "loss": 2.4343, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.4674346283561824, | |
| "grad_norm": 0.37624260783195496, | |
| "learning_rate": 5.953065408981392e-05, | |
| "loss": 2.4449, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.46813542989794577, | |
| "grad_norm": 0.3691907525062561, | |
| "learning_rate": 5.941687011703657e-05, | |
| "loss": 2.4148, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.46883623143970915, | |
| "grad_norm": 0.37521377205848694, | |
| "learning_rate": 5.9303035560967546e-05, | |
| "loss": 2.4287, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.46953703298147254, | |
| "grad_norm": 0.4108455777168274, | |
| "learning_rate": 5.918915103307605e-05, | |
| "loss": 2.4007, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.470237834523236, | |
| "grad_norm": 0.4070550799369812, | |
| "learning_rate": 5.9075217145099806e-05, | |
| "loss": 2.4627, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.47093863606499936, | |
| "grad_norm": 0.38162675499916077, | |
| "learning_rate": 5.896123450904162e-05, | |
| "loss": 2.4326, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.47163943760676275, | |
| "grad_norm": 0.4042952358722687, | |
| "learning_rate": 5.884720373716617e-05, | |
| "loss": 2.4683, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.47234023914852613, | |
| "grad_norm": 0.3937741219997406, | |
| "learning_rate": 5.8733125441996696e-05, | |
| "loss": 2.4243, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.4730410406902895, | |
| "grad_norm": 0.394265353679657, | |
| "learning_rate": 5.861900023631172e-05, | |
| "loss": 2.4323, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4737418422320529, | |
| "grad_norm": 0.44313526153564453, | |
| "learning_rate": 5.8504828733141716e-05, | |
| "loss": 2.4085, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.4744426437738163, | |
| "grad_norm": 0.4558910131454468, | |
| "learning_rate": 5.8390611545765886e-05, | |
| "loss": 2.3757, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.47514344531557967, | |
| "grad_norm": 0.41724249720573425, | |
| "learning_rate": 5.827634928770882e-05, | |
| "loss": 2.4224, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.4758442468573431, | |
| "grad_norm": 0.40503472089767456, | |
| "learning_rate": 5.816204257273719e-05, | |
| "loss": 2.4405, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.4765450483991065, | |
| "grad_norm": 0.36606565117836, | |
| "learning_rate": 5.804769201485648e-05, | |
| "loss": 2.4352, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4772458499408699, | |
| "grad_norm": 0.3968331515789032, | |
| "learning_rate": 5.79332982283077e-05, | |
| "loss": 2.4547, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.47794665148263327, | |
| "grad_norm": 0.3790927231311798, | |
| "learning_rate": 5.7818861827564006e-05, | |
| "loss": 2.3899, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.47864745302439665, | |
| "grad_norm": 0.3793366253376007, | |
| "learning_rate": 5.770438342732755e-05, | |
| "loss": 2.4267, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.47934825456616004, | |
| "grad_norm": 0.43721577525138855, | |
| "learning_rate": 5.7589863642525984e-05, | |
| "loss": 2.3998, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.4800490561079234, | |
| "grad_norm": 0.4096902012825012, | |
| "learning_rate": 5.7475303088309355e-05, | |
| "loss": 2.4216, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4807498576496868, | |
| "grad_norm": 0.37640735507011414, | |
| "learning_rate": 5.736070238004663e-05, | |
| "loss": 2.4119, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.48145065919145025, | |
| "grad_norm": 0.37925073504447937, | |
| "learning_rate": 5.724606213332251e-05, | |
| "loss": 2.4375, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.48215146073321363, | |
| "grad_norm": 0.40147697925567627, | |
| "learning_rate": 5.713138296393407e-05, | |
| "loss": 2.3849, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.482852262274977, | |
| "grad_norm": 0.39544767141342163, | |
| "learning_rate": 5.701666548788743e-05, | |
| "loss": 2.3931, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.4835530638167404, | |
| "grad_norm": 0.3495292365550995, | |
| "learning_rate": 5.6901910321394535e-05, | |
| "loss": 2.4072, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4842538653585038, | |
| "grad_norm": 0.3946402966976166, | |
| "learning_rate": 5.678711808086975e-05, | |
| "loss": 2.4355, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.4849546669002672, | |
| "grad_norm": 0.4135989248752594, | |
| "learning_rate": 5.667228938292658e-05, | |
| "loss": 2.446, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.48565546844203056, | |
| "grad_norm": 0.4330977201461792, | |
| "learning_rate": 5.655742484437438e-05, | |
| "loss": 2.3511, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.48635626998379394, | |
| "grad_norm": 0.4151771664619446, | |
| "learning_rate": 5.6442525082215026e-05, | |
| "loss": 2.4394, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.4870570715255574, | |
| "grad_norm": 0.39435574412345886, | |
| "learning_rate": 5.6327590713639575e-05, | |
| "loss": 2.4012, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.48775787306732077, | |
| "grad_norm": 0.37605562806129456, | |
| "learning_rate": 5.6212622356025015e-05, | |
| "loss": 2.4322, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.48845867460908415, | |
| "grad_norm": 0.39531436562538147, | |
| "learning_rate": 5.609762062693086e-05, | |
| "loss": 2.3881, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.48915947615084754, | |
| "grad_norm": 0.3732660114765167, | |
| "learning_rate": 5.5982586144095913e-05, | |
| "loss": 2.4337, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.4898602776926109, | |
| "grad_norm": 0.3481023907661438, | |
| "learning_rate": 5.586751952543493e-05, | |
| "loss": 2.3736, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.4905610792343743, | |
| "grad_norm": 0.39541634917259216, | |
| "learning_rate": 5.5752421389035235e-05, | |
| "loss": 2.3951, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4912618807761377, | |
| "grad_norm": 0.41655370593070984, | |
| "learning_rate": 5.56372923531535e-05, | |
| "loss": 2.4328, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.4919626823179011, | |
| "grad_norm": 0.37396180629730225, | |
| "learning_rate": 5.552213303621235e-05, | |
| "loss": 2.4274, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.49266348385966446, | |
| "grad_norm": 0.36494237184524536, | |
| "learning_rate": 5.540694405679707e-05, | |
| "loss": 2.3988, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.4933642854014279, | |
| "grad_norm": 0.4091382920742035, | |
| "learning_rate": 5.52917260336523e-05, | |
| "loss": 2.4052, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.4940650869431913, | |
| "grad_norm": 0.5013120174407959, | |
| "learning_rate": 5.517647958567867e-05, | |
| "loss": 2.414, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4947658884849547, | |
| "grad_norm": 0.3942740261554718, | |
| "learning_rate": 5.506120533192948e-05, | |
| "loss": 2.3965, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.49546669002671806, | |
| "grad_norm": 0.3768194019794464, | |
| "learning_rate": 5.4945903891607406e-05, | |
| "loss": 2.3793, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.49616749156848144, | |
| "grad_norm": 0.39486873149871826, | |
| "learning_rate": 5.4830575884061184e-05, | |
| "loss": 2.4039, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.49686829311024483, | |
| "grad_norm": 0.3837323784828186, | |
| "learning_rate": 5.471522192878222e-05, | |
| "loss": 2.4353, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.4975690946520082, | |
| "grad_norm": 0.37630417943000793, | |
| "learning_rate": 5.4599842645401335e-05, | |
| "loss": 2.4056, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4982698961937716, | |
| "grad_norm": 0.39257028698921204, | |
| "learning_rate": 5.4484438653685345e-05, | |
| "loss": 2.4018, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.49897069773553504, | |
| "grad_norm": 0.4382604658603668, | |
| "learning_rate": 5.436901057353385e-05, | |
| "loss": 2.4285, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.4996714992772984, | |
| "grad_norm": 0.43138858675956726, | |
| "learning_rate": 5.4253559024975816e-05, | |
| "loss": 2.3996, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.5003723008190618, | |
| "grad_norm": 0.37432757019996643, | |
| "learning_rate": 5.4138084628166266e-05, | |
| "loss": 2.4102, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.5010731023608251, | |
| "grad_norm": 0.3795533776283264, | |
| "learning_rate": 5.4022588003382955e-05, | |
| "loss": 2.4231, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5017739039025886, | |
| "grad_norm": 0.3747682571411133, | |
| "learning_rate": 5.390706977102304e-05, | |
| "loss": 2.3939, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.502474705444352, | |
| "grad_norm": 0.3895587921142578, | |
| "learning_rate": 5.379153055159978e-05, | |
| "loss": 2.3694, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.5031755069861154, | |
| "grad_norm": 0.3492620289325714, | |
| "learning_rate": 5.3675970965739076e-05, | |
| "loss": 2.3752, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.5038763085278788, | |
| "grad_norm": 0.3917596638202667, | |
| "learning_rate": 5.356039163417633e-05, | |
| "loss": 2.4205, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.5045771100696421, | |
| "grad_norm": 0.3660494089126587, | |
| "learning_rate": 5.344479317775295e-05, | |
| "loss": 2.3649, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5052779116114056, | |
| "grad_norm": 0.3819909393787384, | |
| "learning_rate": 5.332917621741308e-05, | |
| "loss": 2.4108, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.5059787131531689, | |
| "grad_norm": 0.34344562888145447, | |
| "learning_rate": 5.321354137420029e-05, | |
| "loss": 2.4052, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.5066795146949323, | |
| "grad_norm": 0.3695683479309082, | |
| "learning_rate": 5.309788926925418e-05, | |
| "loss": 2.4155, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.5073803162366958, | |
| "grad_norm": 0.35396599769592285, | |
| "learning_rate": 5.2982220523807055e-05, | |
| "loss": 2.4107, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.5080811177784591, | |
| "grad_norm": 0.3450440466403961, | |
| "learning_rate": 5.286653575918066e-05, | |
| "loss": 2.4528, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5087819193202225, | |
| "grad_norm": 0.3628271520137787, | |
| "learning_rate": 5.275083559678275e-05, | |
| "loss": 2.4078, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.5094827208619859, | |
| "grad_norm": 0.40400680899620056, | |
| "learning_rate": 5.263512065810379e-05, | |
| "loss": 2.3694, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.5101835224037493, | |
| "grad_norm": 0.3815267086029053, | |
| "learning_rate": 5.2519391564713626e-05, | |
| "loss": 2.4096, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.5108843239455126, | |
| "grad_norm": 0.35269948840141296, | |
| "learning_rate": 5.2403648938258144e-05, | |
| "loss": 2.4205, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.5115851254872761, | |
| "grad_norm": 0.3467412292957306, | |
| "learning_rate": 5.228789340045591e-05, | |
| "loss": 2.4357, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5122859270290394, | |
| "grad_norm": 0.38257092237472534, | |
| "learning_rate": 5.217212557309485e-05, | |
| "loss": 2.3958, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.5129867285708029, | |
| "grad_norm": 0.42102304100990295, | |
| "learning_rate": 5.20563460780289e-05, | |
| "loss": 2.4122, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.5136875301125663, | |
| "grad_norm": 0.410961389541626, | |
| "learning_rate": 5.194055553717471e-05, | |
| "loss": 2.4256, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.5143883316543296, | |
| "grad_norm": 0.3856711685657501, | |
| "learning_rate": 5.182475457250816e-05, | |
| "loss": 2.3983, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.5150891331960931, | |
| "grad_norm": 0.4079275131225586, | |
| "learning_rate": 5.1708943806061225e-05, | |
| "loss": 2.377, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5157899347378564, | |
| "grad_norm": 0.39763742685317993, | |
| "learning_rate": 5.15931238599185e-05, | |
| "loss": 2.4304, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.5164907362796198, | |
| "grad_norm": 0.44861507415771484, | |
| "learning_rate": 5.147729535621388e-05, | |
| "loss": 2.3918, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.5171915378213832, | |
| "grad_norm": 0.44281211495399475, | |
| "learning_rate": 5.136145891712721e-05, | |
| "loss": 2.4509, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.5178923393631466, | |
| "grad_norm": 0.3945384919643402, | |
| "learning_rate": 5.1245615164881025e-05, | |
| "loss": 2.3719, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.51859314090491, | |
| "grad_norm": 0.4102880358695984, | |
| "learning_rate": 5.112976472173706e-05, | |
| "loss": 2.4198, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5192939424466734, | |
| "grad_norm": 0.3924848735332489, | |
| "learning_rate": 5.1013908209993045e-05, | |
| "loss": 2.3773, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.5199947439884368, | |
| "grad_norm": 0.38885822892189026, | |
| "learning_rate": 5.089804625197929e-05, | |
| "loss": 2.3925, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.5206955455302001, | |
| "grad_norm": 0.38488003611564636, | |
| "learning_rate": 5.078217947005537e-05, | |
| "loss": 2.403, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.5213963470719636, | |
| "grad_norm": 0.43979960680007935, | |
| "learning_rate": 5.066630848660676e-05, | |
| "loss": 2.424, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.5220971486137269, | |
| "grad_norm": 0.40183478593826294, | |
| "learning_rate": 5.055043392404151e-05, | |
| "loss": 2.4164, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5227979501554904, | |
| "grad_norm": 0.36646613478660583, | |
| "learning_rate": 5.0434556404786894e-05, | |
| "loss": 2.4044, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.5234987516972537, | |
| "grad_norm": 0.3749536871910095, | |
| "learning_rate": 5.031867655128606e-05, | |
| "loss": 2.3571, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.5241995532390171, | |
| "grad_norm": 0.36778751015663147, | |
| "learning_rate": 5.0202794985994716e-05, | |
| "loss": 2.4313, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.5249003547807806, | |
| "grad_norm": 0.3651956021785736, | |
| "learning_rate": 5.0086912331377743e-05, | |
| "loss": 2.3786, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.5256011563225439, | |
| "grad_norm": 0.3685001730918884, | |
| "learning_rate": 4.997102920990589e-05, | |
| "loss": 2.3805, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5263019578643073, | |
| "grad_norm": 0.3800550401210785, | |
| "learning_rate": 4.9855146244052393e-05, | |
| "loss": 2.3931, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.5270027594060707, | |
| "grad_norm": 0.3740440905094147, | |
| "learning_rate": 4.97392640562897e-05, | |
| "loss": 2.3742, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.5277035609478341, | |
| "grad_norm": 0.36996012926101685, | |
| "learning_rate": 4.962338326908599e-05, | |
| "loss": 2.3557, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.5284043624895974, | |
| "grad_norm": 0.36498022079467773, | |
| "learning_rate": 4.9507504504902006e-05, | |
| "loss": 2.4455, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.5291051640313609, | |
| "grad_norm": 0.34525755047798157, | |
| "learning_rate": 4.939162838618759e-05, | |
| "loss": 2.4129, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5298059655731243, | |
| "grad_norm": 0.35908079147338867, | |
| "learning_rate": 4.927575553537837e-05, | |
| "loss": 2.4145, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.5305067671148876, | |
| "grad_norm": 0.35571855306625366, | |
| "learning_rate": 4.9159886574892434e-05, | |
| "loss": 2.3932, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.5312075686566511, | |
| "grad_norm": 0.3751217722892761, | |
| "learning_rate": 4.904402212712695e-05, | |
| "loss": 2.3982, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.5319083701984144, | |
| "grad_norm": 0.38550952076911926, | |
| "learning_rate": 4.892816281445487e-05, | |
| "loss": 2.4541, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.5326091717401779, | |
| "grad_norm": 0.39120054244995117, | |
| "learning_rate": 4.881230925922154e-05, | |
| "loss": 2.3904, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5333099732819412, | |
| "grad_norm": 0.39747923612594604, | |
| "learning_rate": 4.869646208374141e-05, | |
| "loss": 2.3975, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.5340107748237046, | |
| "grad_norm": 0.37797030806541443, | |
| "learning_rate": 4.858062191029466e-05, | |
| "loss": 2.434, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.534711576365468, | |
| "grad_norm": 0.3707491159439087, | |
| "learning_rate": 4.846478936112379e-05, | |
| "loss": 2.3451, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.5354123779072314, | |
| "grad_norm": 0.39037153124809265, | |
| "learning_rate": 4.8348965058430425e-05, | |
| "loss": 2.3882, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.5361131794489948, | |
| "grad_norm": 0.37281128764152527, | |
| "learning_rate": 4.8233149624371856e-05, | |
| "loss": 2.3984, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5368139809907582, | |
| "grad_norm": 0.39304113388061523, | |
| "learning_rate": 4.811734368105776e-05, | |
| "loss": 2.3877, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.5375147825325216, | |
| "grad_norm": 0.37360695004463196, | |
| "learning_rate": 4.800154785054683e-05, | |
| "loss": 2.4403, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.5382155840742849, | |
| "grad_norm": 0.3492148518562317, | |
| "learning_rate": 4.788576275484338e-05, | |
| "loss": 2.4061, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.5389163856160484, | |
| "grad_norm": 0.33681145310401917, | |
| "learning_rate": 4.776998901589414e-05, | |
| "loss": 2.4487, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.5396171871578117, | |
| "grad_norm": 0.34817156195640564, | |
| "learning_rate": 4.76542272555848e-05, | |
| "loss": 2.3694, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5403179886995751, | |
| "grad_norm": 0.3471536934375763, | |
| "learning_rate": 4.7538478095736704e-05, | |
| "loss": 2.3757, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.5410187902413385, | |
| "grad_norm": 0.3631412386894226, | |
| "learning_rate": 4.742274215810354e-05, | |
| "loss": 2.393, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.5417195917831019, | |
| "grad_norm": 0.3778044581413269, | |
| "learning_rate": 4.730702006436792e-05, | |
| "loss": 2.3574, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.5424203933248654, | |
| "grad_norm": 0.34900644421577454, | |
| "learning_rate": 4.719131243613813e-05, | |
| "loss": 2.4216, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.5431211948666287, | |
| "grad_norm": 0.3680863082408905, | |
| "learning_rate": 4.7075619894944755e-05, | |
| "loss": 2.4101, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5438219964083921, | |
| "grad_norm": 0.37231966853141785, | |
| "learning_rate": 4.695994306223732e-05, | |
| "loss": 2.4376, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.5445227979501555, | |
| "grad_norm": 0.3646167814731598, | |
| "learning_rate": 4.6844282559381004e-05, | |
| "loss": 2.4531, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.5452235994919189, | |
| "grad_norm": 0.3668572008609772, | |
| "learning_rate": 4.672863900765322e-05, | |
| "loss": 2.4066, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.5459244010336822, | |
| "grad_norm": 0.3679658770561218, | |
| "learning_rate": 4.661301302824036e-05, | |
| "loss": 2.3772, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.5466252025754457, | |
| "grad_norm": 0.3740265667438507, | |
| "learning_rate": 4.649740524223443e-05, | |
| "loss": 2.4107, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5473260041172091, | |
| "grad_norm": 0.37704795598983765, | |
| "learning_rate": 4.638181627062968e-05, | |
| "loss": 2.397, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.5480268056589724, | |
| "grad_norm": 0.3817128837108612, | |
| "learning_rate": 4.626624673431933e-05, | |
| "loss": 2.4236, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.5487276072007359, | |
| "grad_norm": 0.36348238587379456, | |
| "learning_rate": 4.61506972540922e-05, | |
| "loss": 2.3804, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.5494284087424992, | |
| "grad_norm": 0.3793163299560547, | |
| "learning_rate": 4.603516845062933e-05, | |
| "loss": 2.359, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.5501292102842626, | |
| "grad_norm": 0.3661409020423889, | |
| "learning_rate": 4.591966094450074e-05, | |
| "loss": 2.3475, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.550830011826026, | |
| "grad_norm": 0.3748769164085388, | |
| "learning_rate": 4.580417535616205e-05, | |
| "loss": 2.4047, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.5515308133677894, | |
| "grad_norm": 0.41463643312454224, | |
| "learning_rate": 4.568871230595112e-05, | |
| "loss": 2.3938, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.5522316149095527, | |
| "grad_norm": 0.45439499616622925, | |
| "learning_rate": 4.55732724140848e-05, | |
| "loss": 2.3943, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.5529324164513162, | |
| "grad_norm": 0.41736286878585815, | |
| "learning_rate": 4.5457856300655446e-05, | |
| "loss": 2.4391, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.5536332179930796, | |
| "grad_norm": 0.37334778904914856, | |
| "learning_rate": 4.5342464585627775e-05, | |
| "loss": 2.3826, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.554334019534843, | |
| "grad_norm": 0.39312267303466797, | |
| "learning_rate": 4.522709788883541e-05, | |
| "loss": 2.3993, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.5550348210766064, | |
| "grad_norm": 0.40094032883644104, | |
| "learning_rate": 4.5111756829977604e-05, | |
| "loss": 2.4042, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.5557356226183697, | |
| "grad_norm": 0.40080416202545166, | |
| "learning_rate": 4.4996442028615887e-05, | |
| "loss": 2.4187, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.5564364241601332, | |
| "grad_norm": 0.38033226132392883, | |
| "learning_rate": 4.4881154104170715e-05, | |
| "loss": 2.3835, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.5571372257018965, | |
| "grad_norm": 0.36874693632125854, | |
| "learning_rate": 4.4765893675918216e-05, | |
| "loss": 2.4102, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5578380272436599, | |
| "grad_norm": 0.38017359375953674, | |
| "learning_rate": 4.4650661362986804e-05, | |
| "loss": 2.419, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.5585388287854234, | |
| "grad_norm": 0.3930913209915161, | |
| "learning_rate": 4.4535457784353874e-05, | |
| "loss": 2.3678, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.5592396303271867, | |
| "grad_norm": 0.4158961772918701, | |
| "learning_rate": 4.442028355884247e-05, | |
| "loss": 2.379, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.5599404318689502, | |
| "grad_norm": 0.3942618668079376, | |
| "learning_rate": 4.430513930511795e-05, | |
| "loss": 2.436, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.5606412334107135, | |
| "grad_norm": 0.3719567656517029, | |
| "learning_rate": 4.4190025641684694e-05, | |
| "loss": 2.4292, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5613420349524769, | |
| "grad_norm": 0.3342512249946594, | |
| "learning_rate": 4.4074943186882775e-05, | |
| "loss": 2.3999, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.5620428364942402, | |
| "grad_norm": 0.33845871686935425, | |
| "learning_rate": 4.3959892558884594e-05, | |
| "loss": 2.3872, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.5627436380360037, | |
| "grad_norm": 0.35243046283721924, | |
| "learning_rate": 4.384487437569164e-05, | |
| "loss": 2.4129, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.563444439577767, | |
| "grad_norm": 0.36894673109054565, | |
| "learning_rate": 4.372988925513105e-05, | |
| "loss": 2.422, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.5641452411195305, | |
| "grad_norm": 0.37123262882232666, | |
| "learning_rate": 4.361493781485243e-05, | |
| "loss": 2.4417, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5648460426612939, | |
| "grad_norm": 0.37714582681655884, | |
| "learning_rate": 4.350002067232445e-05, | |
| "loss": 2.3607, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.5655468442030572, | |
| "grad_norm": 0.35975465178489685, | |
| "learning_rate": 4.338513844483153e-05, | |
| "loss": 2.4045, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.5662476457448207, | |
| "grad_norm": 0.3770267963409424, | |
| "learning_rate": 4.327029174947059e-05, | |
| "loss": 2.3977, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.566948447286584, | |
| "grad_norm": 0.34482887387275696, | |
| "learning_rate": 4.31554812031476e-05, | |
| "loss": 2.4076, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.5676492488283474, | |
| "grad_norm": 0.37005364894866943, | |
| "learning_rate": 4.304070742257444e-05, | |
| "loss": 2.413, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5683500503701108, | |
| "grad_norm": 0.36416202783584595, | |
| "learning_rate": 4.292597102426546e-05, | |
| "loss": 2.3839, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.5690508519118742, | |
| "grad_norm": 0.3722652494907379, | |
| "learning_rate": 4.281127262453421e-05, | |
| "loss": 2.367, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.5697516534536377, | |
| "grad_norm": 0.3665387034416199, | |
| "learning_rate": 4.269661283949014e-05, | |
| "loss": 2.4089, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.570452454995401, | |
| "grad_norm": 0.3720584511756897, | |
| "learning_rate": 4.25819922850353e-05, | |
| "loss": 2.4024, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.5711532565371644, | |
| "grad_norm": 0.3438746929168701, | |
| "learning_rate": 4.2467411576860936e-05, | |
| "loss": 2.4021, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5718540580789278, | |
| "grad_norm": 0.32350221276283264, | |
| "learning_rate": 4.235287133044432e-05, | |
| "loss": 2.42, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.5725548596206912, | |
| "grad_norm": 0.34177878499031067, | |
| "learning_rate": 4.223837216104537e-05, | |
| "loss": 2.395, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.5732556611624545, | |
| "grad_norm": 0.32010525465011597, | |
| "learning_rate": 4.212391468370335e-05, | |
| "loss": 2.386, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.573956462704218, | |
| "grad_norm": 0.35073044896125793, | |
| "learning_rate": 4.200949951323359e-05, | |
| "loss": 2.4172, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.5746572642459813, | |
| "grad_norm": 0.37043678760528564, | |
| "learning_rate": 4.189512726422412e-05, | |
| "loss": 2.4242, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5753580657877447, | |
| "grad_norm": 0.32336142659187317, | |
| "learning_rate": 4.1780798551032465e-05, | |
| "loss": 2.4033, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.5760588673295082, | |
| "grad_norm": 0.34827011823654175, | |
| "learning_rate": 4.1666513987782285e-05, | |
| "loss": 2.3715, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.5767596688712715, | |
| "grad_norm": 0.3357372283935547, | |
| "learning_rate": 4.155227418836005e-05, | |
| "loss": 2.4134, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.5774604704130349, | |
| "grad_norm": 0.3466857969760895, | |
| "learning_rate": 4.1438079766411845e-05, | |
| "loss": 2.3879, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.5781612719547983, | |
| "grad_norm": 0.354726105928421, | |
| "learning_rate": 4.132393133533991e-05, | |
| "loss": 2.3771, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5788620734965617, | |
| "grad_norm": 0.3625982701778412, | |
| "learning_rate": 4.120982950829954e-05, | |
| "loss": 2.4021, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.579562875038325, | |
| "grad_norm": 0.3679793179035187, | |
| "learning_rate": 4.109577489819563e-05, | |
| "loss": 2.3891, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.5802636765800885, | |
| "grad_norm": 0.40574145317077637, | |
| "learning_rate": 4.098176811767949e-05, | |
| "loss": 2.4121, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.5809644781218518, | |
| "grad_norm": 0.3955237567424774, | |
| "learning_rate": 4.086780977914549e-05, | |
| "loss": 2.3566, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.5816652796636153, | |
| "grad_norm": 0.3654053807258606, | |
| "learning_rate": 4.075390049472776e-05, | |
| "loss": 2.4196, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5823660812053787, | |
| "grad_norm": 0.3621748685836792, | |
| "learning_rate": 4.064004087629699e-05, | |
| "loss": 2.4013, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.583066882747142, | |
| "grad_norm": 0.3717096447944641, | |
| "learning_rate": 4.0526231535457044e-05, | |
| "loss": 2.4051, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.5837676842889055, | |
| "grad_norm": 0.3727043867111206, | |
| "learning_rate": 4.0412473083541734e-05, | |
| "loss": 2.406, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.5844684858306688, | |
| "grad_norm": 0.3818533718585968, | |
| "learning_rate": 4.0298766131611554e-05, | |
| "loss": 2.3367, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.5851692873724322, | |
| "grad_norm": 0.3708714246749878, | |
| "learning_rate": 4.018511129045026e-05, | |
| "loss": 2.4063, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5858700889141956, | |
| "grad_norm": 0.3678399324417114, | |
| "learning_rate": 4.00715091705618e-05, | |
| "loss": 2.4302, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.586570890455959, | |
| "grad_norm": 0.356626033782959, | |
| "learning_rate": 3.995796038216687e-05, | |
| "loss": 2.4227, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.5872716919977224, | |
| "grad_norm": 0.3606754243373871, | |
| "learning_rate": 3.984446553519971e-05, | |
| "loss": 2.4077, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.5879724935394858, | |
| "grad_norm": 0.3369152247905731, | |
| "learning_rate": 3.973102523930482e-05, | |
| "loss": 2.3857, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.5886732950812492, | |
| "grad_norm": 0.38065439462661743, | |
| "learning_rate": 3.961764010383364e-05, | |
| "loss": 2.3598, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5893740966230125, | |
| "grad_norm": 0.3832966089248657, | |
| "learning_rate": 3.9504310737841355e-05, | |
| "loss": 2.4021, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.590074898164776, | |
| "grad_norm": 0.3579697608947754, | |
| "learning_rate": 3.939103775008354e-05, | |
| "loss": 2.3775, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.5907756997065393, | |
| "grad_norm": 0.3174007833003998, | |
| "learning_rate": 3.927782174901296e-05, | |
| "loss": 2.369, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.5914765012483028, | |
| "grad_norm": 0.37479647994041443, | |
| "learning_rate": 3.9164663342776283e-05, | |
| "loss": 2.3713, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.5921773027900661, | |
| "grad_norm": 0.3320664167404175, | |
| "learning_rate": 3.905156313921075e-05, | |
| "loss": 2.4052, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5928781043318295, | |
| "grad_norm": 0.3524352014064789, | |
| "learning_rate": 3.8938521745841015e-05, | |
| "loss": 2.3561, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.593578905873593, | |
| "grad_norm": 0.33558979630470276, | |
| "learning_rate": 3.882553976987582e-05, | |
| "loss": 2.3747, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.5942797074153563, | |
| "grad_norm": 0.3500381410121918, | |
| "learning_rate": 3.8712617818204715e-05, | |
| "loss": 2.394, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.5949805089571197, | |
| "grad_norm": 0.3464096188545227, | |
| "learning_rate": 3.859975649739486e-05, | |
| "loss": 2.3596, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.5956813104988831, | |
| "grad_norm": 0.32962745428085327, | |
| "learning_rate": 3.848695641368773e-05, | |
| "loss": 2.3439, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5963821120406465, | |
| "grad_norm": 0.32587331533432007, | |
| "learning_rate": 3.837421817299581e-05, | |
| "loss": 2.3904, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.5970829135824098, | |
| "grad_norm": 0.38531258702278137, | |
| "learning_rate": 3.8261542380899455e-05, | |
| "loss": 2.3632, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.5977837151241733, | |
| "grad_norm": 0.37153252959251404, | |
| "learning_rate": 3.814892964264354e-05, | |
| "loss": 2.3881, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.5984845166659367, | |
| "grad_norm": 0.3472519814968109, | |
| "learning_rate": 3.803638056313426e-05, | |
| "loss": 2.3681, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.5991853182077, | |
| "grad_norm": 0.36131608486175537, | |
| "learning_rate": 3.792389574693587e-05, | |
| "loss": 2.3396, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5998861197494635, | |
| "grad_norm": 0.35826346278190613, | |
| "learning_rate": 3.781147579826737e-05, | |
| "loss": 2.3902, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.6005869212912268, | |
| "grad_norm": 0.3627219796180725, | |
| "learning_rate": 3.7699121320999395e-05, | |
| "loss": 2.3596, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.6012877228329903, | |
| "grad_norm": 0.3495817184448242, | |
| "learning_rate": 3.758683291865086e-05, | |
| "loss": 2.4382, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.6019885243747536, | |
| "grad_norm": 0.36131903529167175, | |
| "learning_rate": 3.7474611194385766e-05, | |
| "loss": 2.3524, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.602689325916517, | |
| "grad_norm": 0.34963107109069824, | |
| "learning_rate": 3.736245675100995e-05, | |
| "loss": 2.3319, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6033901274582804, | |
| "grad_norm": 0.3820604681968689, | |
| "learning_rate": 3.725037019096783e-05, | |
| "loss": 2.4102, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.6040909290000438, | |
| "grad_norm": 0.35276249051094055, | |
| "learning_rate": 3.713835211633918e-05, | |
| "loss": 2.3642, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.6047917305418072, | |
| "grad_norm": 0.34335559606552124, | |
| "learning_rate": 3.702640312883593e-05, | |
| "loss": 2.4387, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.6054925320835706, | |
| "grad_norm": 0.3219720721244812, | |
| "learning_rate": 3.6914523829798886e-05, | |
| "loss": 2.3652, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.606193333625334, | |
| "grad_norm": 0.3399808704853058, | |
| "learning_rate": 3.6802714820194536e-05, | |
| "loss": 2.3715, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6068941351670973, | |
| "grad_norm": 0.3394257426261902, | |
| "learning_rate": 3.669097670061176e-05, | |
| "loss": 2.3597, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 0.3402734696865082, | |
| "learning_rate": 3.65793100712587e-05, | |
| "loss": 2.413, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.6082957382506241, | |
| "grad_norm": 0.32373809814453125, | |
| "learning_rate": 3.646771553195944e-05, | |
| "loss": 2.3725, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.6089965397923875, | |
| "grad_norm": 0.3503466546535492, | |
| "learning_rate": 3.6356193682150866e-05, | |
| "loss": 2.3952, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.609697341334151, | |
| "grad_norm": 0.346301406621933, | |
| "learning_rate": 3.62447451208794e-05, | |
| "loss": 2.3609, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6103981428759143, | |
| "grad_norm": 0.32363587617874146, | |
| "learning_rate": 3.613337044679774e-05, | |
| "loss": 2.3927, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.6110989444176778, | |
| "grad_norm": 0.32885536551475525, | |
| "learning_rate": 3.602207025816178e-05, | |
| "loss": 2.415, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.6117997459594411, | |
| "grad_norm": 0.3492369055747986, | |
| "learning_rate": 3.5910845152827226e-05, | |
| "loss": 2.3572, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.6125005475012045, | |
| "grad_norm": 0.33846792578697205, | |
| "learning_rate": 3.579969572824653e-05, | |
| "loss": 2.3844, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.6132013490429679, | |
| "grad_norm": 0.34526312351226807, | |
| "learning_rate": 3.568862258146564e-05, | |
| "loss": 2.392, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6139021505847313, | |
| "grad_norm": 0.3390520215034485, | |
| "learning_rate": 3.557762630912065e-05, | |
| "loss": 2.3845, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.6146029521264946, | |
| "grad_norm": 0.3587706983089447, | |
| "learning_rate": 3.546670750743485e-05, | |
| "loss": 2.3938, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.6153037536682581, | |
| "grad_norm": 0.3918447494506836, | |
| "learning_rate": 3.535586677221533e-05, | |
| "loss": 2.3673, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.6160045552100215, | |
| "grad_norm": 0.35069739818573, | |
| "learning_rate": 3.524510469884986e-05, | |
| "loss": 2.4215, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.6167053567517848, | |
| "grad_norm": 0.32987985014915466, | |
| "learning_rate": 3.5134421882303675e-05, | |
| "loss": 2.3984, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6174061582935483, | |
| "grad_norm": 0.3513684868812561, | |
| "learning_rate": 3.5023818917116275e-05, | |
| "loss": 2.3863, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.6181069598353116, | |
| "grad_norm": 0.3559418320655823, | |
| "learning_rate": 3.491329639739819e-05, | |
| "loss": 2.3977, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.618807761377075, | |
| "grad_norm": 0.336349219083786, | |
| "learning_rate": 3.48028549168279e-05, | |
| "loss": 2.4053, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.6195085629188384, | |
| "grad_norm": 0.34559494256973267, | |
| "learning_rate": 3.469249506864853e-05, | |
| "loss": 2.3263, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.6202093644606018, | |
| "grad_norm": 0.3335482180118561, | |
| "learning_rate": 3.458221744566475e-05, | |
| "loss": 2.4196, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6209101660023653, | |
| "grad_norm": 0.33852940797805786, | |
| "learning_rate": 3.4472022640239535e-05, | |
| "loss": 2.4278, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.6216109675441286, | |
| "grad_norm": 0.3589319884777069, | |
| "learning_rate": 3.436191124429097e-05, | |
| "loss": 2.3749, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.622311769085892, | |
| "grad_norm": 0.35172101855278015, | |
| "learning_rate": 3.425188384928915e-05, | |
| "loss": 2.3647, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.6230125706276554, | |
| "grad_norm": 0.35688769817352295, | |
| "learning_rate": 3.414194104625291e-05, | |
| "loss": 2.3983, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.6237133721694188, | |
| "grad_norm": 0.357171893119812, | |
| "learning_rate": 3.403208342574675e-05, | |
| "loss": 2.3959, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6244141737111821, | |
| "grad_norm": 0.3492053747177124, | |
| "learning_rate": 3.392231157787757e-05, | |
| "loss": 2.4036, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.6251149752529456, | |
| "grad_norm": 0.3516004979610443, | |
| "learning_rate": 3.381262609229151e-05, | |
| "loss": 2.3698, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.6258157767947089, | |
| "grad_norm": 0.369892954826355, | |
| "learning_rate": 3.370302755817085e-05, | |
| "loss": 2.3745, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.6265165783364723, | |
| "grad_norm": 0.35066142678260803, | |
| "learning_rate": 3.359351656423081e-05, | |
| "loss": 2.4181, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.6272173798782358, | |
| "grad_norm": 0.33483070135116577, | |
| "learning_rate": 3.3484093698716354e-05, | |
| "loss": 2.3796, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6279181814199991, | |
| "grad_norm": 0.33405861258506775, | |
| "learning_rate": 3.33747595493991e-05, | |
| "loss": 2.3701, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.6286189829617626, | |
| "grad_norm": 0.3403043746948242, | |
| "learning_rate": 3.3265514703574054e-05, | |
| "loss": 2.3794, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.6293197845035259, | |
| "grad_norm": 0.3302968442440033, | |
| "learning_rate": 3.315635974805657e-05, | |
| "loss": 2.3611, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.6300205860452893, | |
| "grad_norm": 0.3315591812133789, | |
| "learning_rate": 3.304729526917916e-05, | |
| "loss": 2.4135, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.6307213875870527, | |
| "grad_norm": 0.3364754617214203, | |
| "learning_rate": 3.293832185278831e-05, | |
| "loss": 2.3489, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6314221891288161, | |
| "grad_norm": 0.34280499815940857, | |
| "learning_rate": 3.2829440084241395e-05, | |
| "loss": 2.3161, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.6321229906705794, | |
| "grad_norm": 0.3507125973701477, | |
| "learning_rate": 3.272065054840343e-05, | |
| "loss": 2.3812, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.6328237922123429, | |
| "grad_norm": 0.34269067645072937, | |
| "learning_rate": 3.2611953829644035e-05, | |
| "loss": 2.4026, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.6335245937541063, | |
| "grad_norm": 0.3264462649822235, | |
| "learning_rate": 3.250335051183428e-05, | |
| "loss": 2.3835, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.6342253952958696, | |
| "grad_norm": 0.3625379502773285, | |
| "learning_rate": 3.239484117834351e-05, | |
| "loss": 2.3814, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6349261968376331, | |
| "grad_norm": 0.4048402011394501, | |
| "learning_rate": 3.228642641203621e-05, | |
| "loss": 2.3472, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.6356269983793964, | |
| "grad_norm": 0.37241023778915405, | |
| "learning_rate": 3.2178106795268906e-05, | |
| "loss": 2.421, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.6363277999211598, | |
| "grad_norm": 0.36283889412879944, | |
| "learning_rate": 3.2069882909887014e-05, | |
| "loss": 2.4044, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.6370286014629232, | |
| "grad_norm": 0.342521607875824, | |
| "learning_rate": 3.196175533722173e-05, | |
| "loss": 2.3803, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.6377294030046866, | |
| "grad_norm": 0.36356839537620544, | |
| "learning_rate": 3.1853724658086916e-05, | |
| "loss": 2.3757, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.63843020454645, | |
| "grad_norm": 0.39169037342071533, | |
| "learning_rate": 3.1745791452775945e-05, | |
| "loss": 2.3647, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.6391310060882134, | |
| "grad_norm": 0.3376883864402771, | |
| "learning_rate": 3.16379563010586e-05, | |
| "loss": 2.4062, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.6398318076299768, | |
| "grad_norm": 0.3581342101097107, | |
| "learning_rate": 3.153021978217796e-05, | |
| "loss": 2.3286, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.6405326091717402, | |
| "grad_norm": 0.33370834589004517, | |
| "learning_rate": 3.142258247484732e-05, | |
| "loss": 2.3828, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.6412334107135036, | |
| "grad_norm": 0.33179208636283875, | |
| "learning_rate": 3.131504495724701e-05, | |
| "loss": 2.3454, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6419342122552669, | |
| "grad_norm": 0.3586229979991913, | |
| "learning_rate": 3.120760780702139e-05, | |
| "loss": 2.3573, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.6426350137970304, | |
| "grad_norm": 0.35560375452041626, | |
| "learning_rate": 3.110027160127567e-05, | |
| "loss": 2.3991, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.6433358153387937, | |
| "grad_norm": 0.3441837430000305, | |
| "learning_rate": 3.0993036916572765e-05, | |
| "loss": 2.3563, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.6440366168805571, | |
| "grad_norm": 0.3472030460834503, | |
| "learning_rate": 3.088590432893036e-05, | |
| "loss": 2.3637, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.6447374184223206, | |
| "grad_norm": 0.35485780239105225, | |
| "learning_rate": 3.077887441381766e-05, | |
| "loss": 2.3822, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6454382199640839, | |
| "grad_norm": 0.3376321792602539, | |
| "learning_rate": 3.067194774615238e-05, | |
| "loss": 2.4303, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.6461390215058473, | |
| "grad_norm": 0.3651726543903351, | |
| "learning_rate": 3.056512490029767e-05, | |
| "loss": 2.3709, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.6468398230476107, | |
| "grad_norm": 0.3571789264678955, | |
| "learning_rate": 3.0458406450058898e-05, | |
| "loss": 2.3516, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.6475406245893741, | |
| "grad_norm": 0.3585575520992279, | |
| "learning_rate": 3.0351792968680747e-05, | |
| "loss": 2.3929, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.6482414261311374, | |
| "grad_norm": 0.3756263852119446, | |
| "learning_rate": 3.024528502884403e-05, | |
| "loss": 2.364, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6489422276729009, | |
| "grad_norm": 0.3554941415786743, | |
| "learning_rate": 3.013888320266264e-05, | |
| "loss": 2.384, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.6496430292146643, | |
| "grad_norm": 0.32703763246536255, | |
| "learning_rate": 3.0032588061680473e-05, | |
| "loss": 2.3772, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.6503438307564277, | |
| "grad_norm": 0.33231183886528015, | |
| "learning_rate": 2.9926400176868342e-05, | |
| "loss": 2.3496, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.6510446322981911, | |
| "grad_norm": 0.3634251356124878, | |
| "learning_rate": 2.9820320118620948e-05, | |
| "loss": 2.4144, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.6517454338399544, | |
| "grad_norm": 0.3680966794490814, | |
| "learning_rate": 2.9714348456753798e-05, | |
| "loss": 2.3846, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6524462353817179, | |
| "grad_norm": 0.34089308977127075, | |
| "learning_rate": 2.9608485760500114e-05, | |
| "loss": 2.3603, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.6531470369234812, | |
| "grad_norm": 0.3336240351200104, | |
| "learning_rate": 2.9502732598507848e-05, | |
| "loss": 2.4076, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.6538478384652446, | |
| "grad_norm": 0.3516181707382202, | |
| "learning_rate": 2.939708953883652e-05, | |
| "loss": 2.3718, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.654548640007008, | |
| "grad_norm": 0.34208613634109497, | |
| "learning_rate": 2.929155714895428e-05, | |
| "loss": 2.3718, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.6552494415487714, | |
| "grad_norm": 0.32070669531822205, | |
| "learning_rate": 2.91861359957348e-05, | |
| "loss": 2.3663, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6559502430905348, | |
| "grad_norm": 0.3350364863872528, | |
| "learning_rate": 2.9080826645454202e-05, | |
| "loss": 2.3477, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.6566510446322982, | |
| "grad_norm": 0.31771308183670044, | |
| "learning_rate": 2.8975629663788116e-05, | |
| "loss": 2.3984, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.6573518461740616, | |
| "grad_norm": 0.33855804800987244, | |
| "learning_rate": 2.887054561580852e-05, | |
| "loss": 2.3398, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.658052647715825, | |
| "grad_norm": 0.3342411518096924, | |
| "learning_rate": 2.8765575065980756e-05, | |
| "loss": 2.3784, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.6587534492575884, | |
| "grad_norm": 0.3331626057624817, | |
| "learning_rate": 2.8660718578160577e-05, | |
| "loss": 2.354, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6594542507993517, | |
| "grad_norm": 0.3264644742012024, | |
| "learning_rate": 2.855597671559098e-05, | |
| "loss": 2.3638, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.6601550523411152, | |
| "grad_norm": 0.3431566059589386, | |
| "learning_rate": 2.8451350040899233e-05, | |
| "loss": 2.4266, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.6608558538828786, | |
| "grad_norm": 0.35340288281440735, | |
| "learning_rate": 2.8346839116093937e-05, | |
| "loss": 2.3948, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.6615566554246419, | |
| "grad_norm": 0.32424479722976685, | |
| "learning_rate": 2.8242444502561883e-05, | |
| "loss": 2.3137, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.6622574569664054, | |
| "grad_norm": 0.31753644347190857, | |
| "learning_rate": 2.813816676106507e-05, | |
| "loss": 2.3725, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6629582585081687, | |
| "grad_norm": 0.32894328236579895, | |
| "learning_rate": 2.803400645173778e-05, | |
| "loss": 2.366, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.6636590600499321, | |
| "grad_norm": 0.31512895226478577, | |
| "learning_rate": 2.7929964134083435e-05, | |
| "loss": 2.3395, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.6643598615916955, | |
| "grad_norm": 0.3142877221107483, | |
| "learning_rate": 2.7826040366971732e-05, | |
| "loss": 2.3626, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.6650606631334589, | |
| "grad_norm": 0.3408631682395935, | |
| "learning_rate": 2.7722235708635447e-05, | |
| "loss": 2.3859, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.6657614646752222, | |
| "grad_norm": 0.34155815839767456, | |
| "learning_rate": 2.7618550716667675e-05, | |
| "loss": 2.4071, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6664622662169857, | |
| "grad_norm": 0.3305151164531708, | |
| "learning_rate": 2.7514985948018647e-05, | |
| "loss": 2.4022, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.6671630677587491, | |
| "grad_norm": 0.322914183139801, | |
| "learning_rate": 2.7411541958992846e-05, | |
| "loss": 2.3756, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.6678638693005124, | |
| "grad_norm": 0.334212064743042, | |
| "learning_rate": 2.730821930524597e-05, | |
| "loss": 2.3582, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.6685646708422759, | |
| "grad_norm": 0.35008370876312256, | |
| "learning_rate": 2.7205018541781935e-05, | |
| "loss": 2.3606, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.6692654723840392, | |
| "grad_norm": 0.3606286644935608, | |
| "learning_rate": 2.7101940222949905e-05, | |
| "loss": 2.3668, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6699662739258027, | |
| "grad_norm": 0.3384963274002075, | |
| "learning_rate": 2.699898490244141e-05, | |
| "loss": 2.393, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.670667075467566, | |
| "grad_norm": 0.3387192487716675, | |
| "learning_rate": 2.6896153133287173e-05, | |
| "loss": 2.3758, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.6713678770093294, | |
| "grad_norm": 0.3330741822719574, | |
| "learning_rate": 2.679344546785435e-05, | |
| "loss": 2.3432, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.6720686785510928, | |
| "grad_norm": 0.3369733989238739, | |
| "learning_rate": 2.6690862457843414e-05, | |
| "loss": 2.3836, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.6727694800928562, | |
| "grad_norm": 0.32594871520996094, | |
| "learning_rate": 2.658840465428525e-05, | |
| "loss": 2.3593, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6734702816346196, | |
| "grad_norm": 0.34917503595352173, | |
| "learning_rate": 2.648607260753816e-05, | |
| "loss": 2.3494, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.674171083176383, | |
| "grad_norm": 0.33990025520324707, | |
| "learning_rate": 2.6383866867285034e-05, | |
| "loss": 2.3964, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.6748718847181464, | |
| "grad_norm": 0.3280743956565857, | |
| "learning_rate": 2.6281787982530193e-05, | |
| "loss": 2.3544, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.6755726862599097, | |
| "grad_norm": 0.3042140305042267, | |
| "learning_rate": 2.6179836501596582e-05, | |
| "loss": 2.3887, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.6762734878016732, | |
| "grad_norm": 0.31472572684288025, | |
| "learning_rate": 2.6078012972122823e-05, | |
| "loss": 2.388, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6769742893434365, | |
| "grad_norm": 0.31360968947410583, | |
| "learning_rate": 2.597631794106018e-05, | |
| "loss": 2.3672, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.6776750908852, | |
| "grad_norm": 0.30370283126831055, | |
| "learning_rate": 2.587475195466974e-05, | |
| "loss": 2.3699, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.6783758924269634, | |
| "grad_norm": 0.31287258863449097, | |
| "learning_rate": 2.5773315558519374e-05, | |
| "loss": 2.3488, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.6790766939687267, | |
| "grad_norm": 0.3105391561985016, | |
| "learning_rate": 2.567200929748087e-05, | |
| "loss": 2.3786, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.6797774955104902, | |
| "grad_norm": 0.3138721287250519, | |
| "learning_rate": 2.557083371572695e-05, | |
| "loss": 2.3476, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6804782970522535, | |
| "grad_norm": 0.34871888160705566, | |
| "learning_rate": 2.546978935672846e-05, | |
| "loss": 2.3925, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.6811790985940169, | |
| "grad_norm": 0.32576167583465576, | |
| "learning_rate": 2.5368876763251304e-05, | |
| "loss": 2.3744, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.6818799001357803, | |
| "grad_norm": 0.3523615896701813, | |
| "learning_rate": 2.5268096477353653e-05, | |
| "loss": 2.3429, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.6825807016775437, | |
| "grad_norm": 0.3550513982772827, | |
| "learning_rate": 2.516744904038294e-05, | |
| "loss": 2.3549, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.683281503219307, | |
| "grad_norm": 0.34193849563598633, | |
| "learning_rate": 2.5066934992973013e-05, | |
| "loss": 2.3867, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6839823047610705, | |
| "grad_norm": 0.33134225010871887, | |
| "learning_rate": 2.496655487504117e-05, | |
| "loss": 2.4244, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.6846831063028339, | |
| "grad_norm": 0.3228800594806671, | |
| "learning_rate": 2.4866309225785384e-05, | |
| "loss": 2.3861, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.6853839078445972, | |
| "grad_norm": 0.3166099786758423, | |
| "learning_rate": 2.476619858368122e-05, | |
| "loss": 2.391, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.6860847093863607, | |
| "grad_norm": 0.3262847661972046, | |
| "learning_rate": 2.4666223486479157e-05, | |
| "loss": 2.3723, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.686785510928124, | |
| "grad_norm": 0.3269117772579193, | |
| "learning_rate": 2.4566384471201442e-05, | |
| "loss": 2.3764, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6874863124698875, | |
| "grad_norm": 0.3313688039779663, | |
| "learning_rate": 2.4466682074139484e-05, | |
| "loss": 2.3745, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.6881871140116508, | |
| "grad_norm": 0.3263818621635437, | |
| "learning_rate": 2.4367116830850755e-05, | |
| "loss": 2.3769, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.6888879155534142, | |
| "grad_norm": 0.31019410490989685, | |
| "learning_rate": 2.4267689276156063e-05, | |
| "loss": 2.3721, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.6895887170951777, | |
| "grad_norm": 0.3046749234199524, | |
| "learning_rate": 2.4168399944136555e-05, | |
| "loss": 2.3839, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.690289518636941, | |
| "grad_norm": 0.32256773114204407, | |
| "learning_rate": 2.4069249368130907e-05, | |
| "loss": 2.3812, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6909903201787044, | |
| "grad_norm": 0.35312485694885254, | |
| "learning_rate": 2.397023808073252e-05, | |
| "loss": 2.3781, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.6916911217204678, | |
| "grad_norm": 0.35512855648994446, | |
| "learning_rate": 2.3871366613786528e-05, | |
| "loss": 2.395, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.6923919232622312, | |
| "grad_norm": 0.331129252910614, | |
| "learning_rate": 2.377263549838707e-05, | |
| "loss": 2.354, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.6930927248039945, | |
| "grad_norm": 0.3296273946762085, | |
| "learning_rate": 2.3674045264874328e-05, | |
| "loss": 2.3715, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.693793526345758, | |
| "grad_norm": 0.31631606817245483, | |
| "learning_rate": 2.357559644283176e-05, | |
| "loss": 2.342, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6944943278875213, | |
| "grad_norm": 0.32511383295059204, | |
| "learning_rate": 2.347728956108319e-05, | |
| "loss": 2.3861, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.6951951294292847, | |
| "grad_norm": 0.3479836881160736, | |
| "learning_rate": 2.337912514769005e-05, | |
| "loss": 2.3903, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.6958959309710482, | |
| "grad_norm": 0.3035725951194763, | |
| "learning_rate": 2.328110372994845e-05, | |
| "loss": 2.3855, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.6965967325128115, | |
| "grad_norm": 0.327636182308197, | |
| "learning_rate": 2.3183225834386458e-05, | |
| "loss": 2.4165, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.697297534054575, | |
| "grad_norm": 0.32202282547950745, | |
| "learning_rate": 2.308549198676107e-05, | |
| "loss": 2.3301, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6979983355963383, | |
| "grad_norm": 0.314789742231369, | |
| "learning_rate": 2.2987902712055675e-05, | |
| "loss": 2.3704, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.6986991371381017, | |
| "grad_norm": 0.31462863087654114, | |
| "learning_rate": 2.2890458534476965e-05, | |
| "loss": 2.4075, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.699399938679865, | |
| "grad_norm": 0.30073073506355286, | |
| "learning_rate": 2.2793159977452316e-05, | |
| "loss": 2.346, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.7001007402216285, | |
| "grad_norm": 0.3146219253540039, | |
| "learning_rate": 2.2696007563626836e-05, | |
| "loss": 2.3283, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.7008015417633919, | |
| "grad_norm": 0.32649290561676025, | |
| "learning_rate": 2.2599001814860638e-05, | |
| "loss": 2.3676, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7015023433051553, | |
| "grad_norm": 0.3073072135448456, | |
| "learning_rate": 2.2502143252225995e-05, | |
| "loss": 2.3607, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.7022031448469187, | |
| "grad_norm": 0.3149600625038147, | |
| "learning_rate": 2.2405432396004618e-05, | |
| "loss": 2.421, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.702903946388682, | |
| "grad_norm": 0.32012826204299927, | |
| "learning_rate": 2.230886976568472e-05, | |
| "loss": 2.3586, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.7036047479304455, | |
| "grad_norm": 0.3099457919597626, | |
| "learning_rate": 2.22124558799584e-05, | |
| "loss": 2.3279, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.7043055494722088, | |
| "grad_norm": 0.32523518800735474, | |
| "learning_rate": 2.21161912567187e-05, | |
| "loss": 2.384, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7050063510139722, | |
| "grad_norm": 0.30514463782310486, | |
| "learning_rate": 2.2020076413056888e-05, | |
| "loss": 2.3864, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.7057071525557356, | |
| "grad_norm": 0.31663551926612854, | |
| "learning_rate": 2.1924111865259744e-05, | |
| "loss": 2.3916, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.706407954097499, | |
| "grad_norm": 0.3069145679473877, | |
| "learning_rate": 2.1828298128806647e-05, | |
| "loss": 2.4038, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.7071087556392625, | |
| "grad_norm": 0.31538233160972595, | |
| "learning_rate": 2.1732635718366957e-05, | |
| "loss": 2.3838, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.7078095571810258, | |
| "grad_norm": 0.31778815388679504, | |
| "learning_rate": 2.1637125147797127e-05, | |
| "loss": 2.363, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7085103587227892, | |
| "grad_norm": 0.30828920006752014, | |
| "learning_rate": 2.1541766930138e-05, | |
| "loss": 2.3477, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.7092111602645526, | |
| "grad_norm": 0.3219800293445587, | |
| "learning_rate": 2.1446561577612047e-05, | |
| "loss": 2.4047, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.709911961806316, | |
| "grad_norm": 0.3244566023349762, | |
| "learning_rate": 2.135150960162064e-05, | |
| "loss": 2.3579, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.7106127633480793, | |
| "grad_norm": 0.3246060013771057, | |
| "learning_rate": 2.1256611512741236e-05, | |
| "loss": 2.4145, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.7113135648898428, | |
| "grad_norm": 0.3167009949684143, | |
| "learning_rate": 2.1161867820724762e-05, | |
| "loss": 2.3503, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7120143664316062, | |
| "grad_norm": 0.32336217164993286, | |
| "learning_rate": 2.106727903449265e-05, | |
| "loss": 2.4206, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.7127151679733695, | |
| "grad_norm": 0.31934854388237, | |
| "learning_rate": 2.0972845662134383e-05, | |
| "loss": 2.3433, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.713415969515133, | |
| "grad_norm": 0.31124401092529297, | |
| "learning_rate": 2.087856821090455e-05, | |
| "loss": 2.3847, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.7141167710568963, | |
| "grad_norm": 0.3190089762210846, | |
| "learning_rate": 2.078444718722025e-05, | |
| "loss": 2.3882, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.7148175725986597, | |
| "grad_norm": 0.32175031304359436, | |
| "learning_rate": 2.0690483096658285e-05, | |
| "loss": 2.3594, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7155183741404231, | |
| "grad_norm": 0.3287060260772705, | |
| "learning_rate": 2.0596676443952502e-05, | |
| "loss": 2.3887, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.7162191756821865, | |
| "grad_norm": 0.33534112572669983, | |
| "learning_rate": 2.0503027732991014e-05, | |
| "loss": 2.386, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.7169199772239498, | |
| "grad_norm": 0.33609291911125183, | |
| "learning_rate": 2.040953746681364e-05, | |
| "loss": 2.3531, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.7176207787657133, | |
| "grad_norm": 0.3085058331489563, | |
| "learning_rate": 2.0316206147608986e-05, | |
| "loss": 2.3343, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.7183215803074767, | |
| "grad_norm": 0.3150891661643982, | |
| "learning_rate": 2.022303427671196e-05, | |
| "loss": 2.4038, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.71902238184924, | |
| "grad_norm": 0.3157922625541687, | |
| "learning_rate": 2.0130022354600937e-05, | |
| "loss": 2.3908, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.7197231833910035, | |
| "grad_norm": 0.3120626211166382, | |
| "learning_rate": 2.0037170880895095e-05, | |
| "loss": 2.3935, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.7204239849327668, | |
| "grad_norm": 0.31322991847991943, | |
| "learning_rate": 1.9944480354351826e-05, | |
| "loss": 2.3477, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.7211247864745303, | |
| "grad_norm": 0.3245989680290222, | |
| "learning_rate": 1.9851951272863924e-05, | |
| "loss": 2.3643, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.7218255880162936, | |
| "grad_norm": 0.30701684951782227, | |
| "learning_rate": 1.975958413345699e-05, | |
| "loss": 2.3869, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.722526389558057, | |
| "grad_norm": 0.331910103559494, | |
| "learning_rate": 1.9667379432286714e-05, | |
| "loss": 2.3918, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.7232271910998204, | |
| "grad_norm": 0.31571826338768005, | |
| "learning_rate": 1.9575337664636318e-05, | |
| "loss": 2.4046, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.7239279926415838, | |
| "grad_norm": 0.3003998100757599, | |
| "learning_rate": 1.9483459324913716e-05, | |
| "loss": 2.3751, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.7246287941833472, | |
| "grad_norm": 0.31976088881492615, | |
| "learning_rate": 1.9391744906649057e-05, | |
| "loss": 2.353, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.7253295957251106, | |
| "grad_norm": 0.32052701711654663, | |
| "learning_rate": 1.9300194902491886e-05, | |
| "loss": 2.3765, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.726030397266874, | |
| "grad_norm": 0.3127794861793518, | |
| "learning_rate": 1.920880980420868e-05, | |
| "loss": 2.3353, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.7267311988086373, | |
| "grad_norm": 0.3171677887439728, | |
| "learning_rate": 1.911759010268e-05, | |
| "loss": 2.3363, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.7274320003504008, | |
| "grad_norm": 0.31756749749183655, | |
| "learning_rate": 1.902653628789808e-05, | |
| "loss": 2.3129, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.7281328018921641, | |
| "grad_norm": 0.3366509675979614, | |
| "learning_rate": 1.8935648848963993e-05, | |
| "loss": 2.3455, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.7288336034339276, | |
| "grad_norm": 0.32132747769355774, | |
| "learning_rate": 1.8844928274085204e-05, | |
| "loss": 2.3961, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.729534404975691, | |
| "grad_norm": 0.31926000118255615, | |
| "learning_rate": 1.8754375050572793e-05, | |
| "loss": 2.3712, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.7302352065174543, | |
| "grad_norm": 0.3073156177997589, | |
| "learning_rate": 1.8663989664838904e-05, | |
| "loss": 2.3746, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.7309360080592178, | |
| "grad_norm": 0.3057498037815094, | |
| "learning_rate": 1.8573772602394137e-05, | |
| "loss": 2.395, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.7316368096009811, | |
| "grad_norm": 0.3112606704235077, | |
| "learning_rate": 1.8483724347844972e-05, | |
| "loss": 2.3476, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.7323376111427445, | |
| "grad_norm": 0.30744901299476624, | |
| "learning_rate": 1.8393845384891063e-05, | |
| "loss": 2.3579, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7330384126845079, | |
| "grad_norm": 0.29776525497436523, | |
| "learning_rate": 1.8304136196322774e-05, | |
| "loss": 2.3737, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.7337392142262713, | |
| "grad_norm": 0.3106415569782257, | |
| "learning_rate": 1.8214597264018457e-05, | |
| "loss": 2.3484, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.7344400157680346, | |
| "grad_norm": 0.3004627227783203, | |
| "learning_rate": 1.812522906894194e-05, | |
| "loss": 2.3676, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.7351408173097981, | |
| "grad_norm": 0.3104562759399414, | |
| "learning_rate": 1.803603209113992e-05, | |
| "loss": 2.3755, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.7358416188515615, | |
| "grad_norm": 0.3282312750816345, | |
| "learning_rate": 1.794700680973943e-05, | |
| "loss": 2.3844, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7365424203933248, | |
| "grad_norm": 0.3132244646549225, | |
| "learning_rate": 1.7858153702945178e-05, | |
| "loss": 2.3635, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.7372432219350883, | |
| "grad_norm": 0.30049052834510803, | |
| "learning_rate": 1.7769473248037023e-05, | |
| "loss": 2.3607, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.7379440234768516, | |
| "grad_norm": 0.31369882822036743, | |
| "learning_rate": 1.7680965921367464e-05, | |
| "loss": 2.3505, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.7386448250186151, | |
| "grad_norm": 0.31958824396133423, | |
| "learning_rate": 1.759263219835897e-05, | |
| "loss": 2.3458, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.7393456265603784, | |
| "grad_norm": 0.30758705735206604, | |
| "learning_rate": 1.7504472553501548e-05, | |
| "loss": 2.3489, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7400464281021418, | |
| "grad_norm": 0.3082713484764099, | |
| "learning_rate": 1.7416487460350094e-05, | |
| "loss": 2.3831, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.7407472296439053, | |
| "grad_norm": 0.31405431032180786, | |
| "learning_rate": 1.732867739152189e-05, | |
| "loss": 2.3671, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.7414480311856686, | |
| "grad_norm": 0.3141951262950897, | |
| "learning_rate": 1.724104281869406e-05, | |
| "loss": 2.3364, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.742148832727432, | |
| "grad_norm": 0.32172268629074097, | |
| "learning_rate": 1.7153584212601076e-05, | |
| "loss": 2.3402, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.7428496342691954, | |
| "grad_norm": 0.3076493740081787, | |
| "learning_rate": 1.7066302043032145e-05, | |
| "loss": 2.3494, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7435504358109588, | |
| "grad_norm": 0.30486494302749634, | |
| "learning_rate": 1.6979196778828808e-05, | |
| "loss": 2.3672, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.7442512373527221, | |
| "grad_norm": 0.3125833570957184, | |
| "learning_rate": 1.689226888788224e-05, | |
| "loss": 2.4057, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.7449520388944856, | |
| "grad_norm": 0.3145892024040222, | |
| "learning_rate": 1.6805518837130945e-05, | |
| "loss": 2.3246, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.7456528404362489, | |
| "grad_norm": 0.3154381215572357, | |
| "learning_rate": 1.6718947092558074e-05, | |
| "loss": 2.3379, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.7463536419780123, | |
| "grad_norm": 0.31429165601730347, | |
| "learning_rate": 1.663255411918907e-05, | |
| "loss": 2.3551, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7470544435197758, | |
| "grad_norm": 0.3162185549736023, | |
| "learning_rate": 1.6546340381089014e-05, | |
| "loss": 2.3231, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.7477552450615391, | |
| "grad_norm": 0.3119218349456787, | |
| "learning_rate": 1.6460306341360287e-05, | |
| "loss": 2.393, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.7484560466033026, | |
| "grad_norm": 0.2972491979598999, | |
| "learning_rate": 1.6374452462139966e-05, | |
| "loss": 2.379, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.7491568481450659, | |
| "grad_norm": 0.2924615144729614, | |
| "learning_rate": 1.6288779204597395e-05, | |
| "loss": 2.3434, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.7498576496868293, | |
| "grad_norm": 0.31871649622917175, | |
| "learning_rate": 1.6203287028931673e-05, | |
| "loss": 2.3528, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.7505584512285927, | |
| "grad_norm": 0.30172446370124817, | |
| "learning_rate": 1.6117976394369265e-05, | |
| "loss": 2.3445, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.7512592527703561, | |
| "grad_norm": 0.31651854515075684, | |
| "learning_rate": 1.603284775916144e-05, | |
| "loss": 2.3723, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.7519600543121195, | |
| "grad_norm": 0.3154197931289673, | |
| "learning_rate": 1.594790158058182e-05, | |
| "loss": 2.363, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.7526608558538829, | |
| "grad_norm": 0.3029579818248749, | |
| "learning_rate": 1.586313831492402e-05, | |
| "loss": 2.381, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.7533616573956463, | |
| "grad_norm": 0.3101804852485657, | |
| "learning_rate": 1.5778558417499047e-05, | |
| "loss": 2.3628, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7540624589374096, | |
| "grad_norm": 0.30842190980911255, | |
| "learning_rate": 1.569416234263302e-05, | |
| "loss": 2.3497, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.7547632604791731, | |
| "grad_norm": 0.300167977809906, | |
| "learning_rate": 1.5609950543664565e-05, | |
| "loss": 2.358, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.7554640620209364, | |
| "grad_norm": 0.3156902492046356, | |
| "learning_rate": 1.55259234729425e-05, | |
| "loss": 2.3526, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.7561648635626999, | |
| "grad_norm": 0.3075679540634155, | |
| "learning_rate": 1.544208158182334e-05, | |
| "loss": 2.3826, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.7568656651044632, | |
| "grad_norm": 0.3025675415992737, | |
| "learning_rate": 1.535842532066895e-05, | |
| "loss": 2.3589, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7575664666462266, | |
| "grad_norm": 0.30232781171798706, | |
| "learning_rate": 1.5274955138844e-05, | |
| "loss": 2.3755, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.7582672681879901, | |
| "grad_norm": 0.3080657124519348, | |
| "learning_rate": 1.5191671484713699e-05, | |
| "loss": 2.3516, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.7589680697297534, | |
| "grad_norm": 0.30511733889579773, | |
| "learning_rate": 1.5108574805641212e-05, | |
| "loss": 2.3538, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.7596688712715168, | |
| "grad_norm": 0.32080376148223877, | |
| "learning_rate": 1.5025665547985463e-05, | |
| "loss": 2.362, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.7603696728132802, | |
| "grad_norm": 0.31854546070098877, | |
| "learning_rate": 1.4942944157098548e-05, | |
| "loss": 2.3471, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7610704743550436, | |
| "grad_norm": 0.3112950921058655, | |
| "learning_rate": 1.4860411077323494e-05, | |
| "loss": 2.3557, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.7617712758968069, | |
| "grad_norm": 0.3270244002342224, | |
| "learning_rate": 1.4778066751991753e-05, | |
| "loss": 2.4197, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.7624720774385704, | |
| "grad_norm": 0.3201599419116974, | |
| "learning_rate": 1.4695911623420882e-05, | |
| "loss": 2.3543, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.7631728789803337, | |
| "grad_norm": 0.32384952902793884, | |
| "learning_rate": 1.4613946132912148e-05, | |
| "loss": 2.3549, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.7638736805220971, | |
| "grad_norm": 0.3128424882888794, | |
| "learning_rate": 1.4532170720748206e-05, | |
| "loss": 2.3562, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7645744820638606, | |
| "grad_norm": 0.3070273995399475, | |
| "learning_rate": 1.4450585826190644e-05, | |
| "loss": 2.349, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.7652752836056239, | |
| "grad_norm": 0.3091195821762085, | |
| "learning_rate": 1.4369191887477718e-05, | |
| "loss": 2.3387, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.7659760851473874, | |
| "grad_norm": 0.3153855502605438, | |
| "learning_rate": 1.4287989341821912e-05, | |
| "loss": 2.3883, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.7666768866891507, | |
| "grad_norm": 0.29262298345565796, | |
| "learning_rate": 1.420697862540764e-05, | |
| "loss": 2.3984, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.7673776882309141, | |
| "grad_norm": 0.3134435713291168, | |
| "learning_rate": 1.4126160173388931e-05, | |
| "loss": 2.3826, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7680784897726775, | |
| "grad_norm": 0.32292330265045166, | |
| "learning_rate": 1.4045534419886991e-05, | |
| "loss": 2.4054, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.7687792913144409, | |
| "grad_norm": 0.31151464581489563, | |
| "learning_rate": 1.3965101797988005e-05, | |
| "loss": 2.382, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.7694800928562043, | |
| "grad_norm": 0.29010194540023804, | |
| "learning_rate": 1.3884862739740684e-05, | |
| "loss": 2.3674, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.7701808943979677, | |
| "grad_norm": 0.3445095419883728, | |
| "learning_rate": 1.3804817676154013e-05, | |
| "loss": 2.3417, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.7708816959397311, | |
| "grad_norm": 0.32138895988464355, | |
| "learning_rate": 1.3724967037194924e-05, | |
| "loss": 2.3349, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7715824974814944, | |
| "grad_norm": 0.30607300996780396, | |
| "learning_rate": 1.3645311251786018e-05, | |
| "loss": 2.3805, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.7722832990232579, | |
| "grad_norm": 0.3357834219932556, | |
| "learning_rate": 1.3565850747803171e-05, | |
| "loss": 2.3941, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.7729841005650212, | |
| "grad_norm": 0.3017485439777374, | |
| "learning_rate": 1.3486585952073365e-05, | |
| "loss": 2.372, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.7736849021067846, | |
| "grad_norm": 0.30723971128463745, | |
| "learning_rate": 1.3407517290372218e-05, | |
| "loss": 2.3409, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.774385703648548, | |
| "grad_norm": 0.30255433917045593, | |
| "learning_rate": 1.3328645187421918e-05, | |
| "loss": 2.4014, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7750865051903114, | |
| "grad_norm": 0.2989521622657776, | |
| "learning_rate": 1.3249970066888733e-05, | |
| "loss": 2.3645, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.7757873067320749, | |
| "grad_norm": 0.3191367983818054, | |
| "learning_rate": 1.3171492351380909e-05, | |
| "loss": 2.3949, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.7764881082738382, | |
| "grad_norm": 0.3137199878692627, | |
| "learning_rate": 1.3093212462446247e-05, | |
| "loss": 2.352, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.7771889098156016, | |
| "grad_norm": 0.31314703822135925, | |
| "learning_rate": 1.3015130820569955e-05, | |
| "loss": 2.367, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.777889711357365, | |
| "grad_norm": 0.32591792941093445, | |
| "learning_rate": 1.2937247845172306e-05, | |
| "loss": 2.379, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.7785905128991284, | |
| "grad_norm": 0.3091329038143158, | |
| "learning_rate": 1.2859563954606486e-05, | |
| "loss": 2.3594, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.7792913144408917, | |
| "grad_norm": 0.2992273271083832, | |
| "learning_rate": 1.2782079566156214e-05, | |
| "loss": 2.3752, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.7799921159826552, | |
| "grad_norm": 0.3123078942298889, | |
| "learning_rate": 1.2704795096033628e-05, | |
| "loss": 2.3217, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.7806929175244186, | |
| "grad_norm": 0.3097928464412689, | |
| "learning_rate": 1.2627710959376965e-05, | |
| "loss": 2.3391, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.7813937190661819, | |
| "grad_norm": 0.30402112007141113, | |
| "learning_rate": 1.255082757024834e-05, | |
| "loss": 2.3702, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7820945206079454, | |
| "grad_norm": 0.32970523834228516, | |
| "learning_rate": 1.24741453416316e-05, | |
| "loss": 2.3548, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.7827953221497087, | |
| "grad_norm": 0.29732605814933777, | |
| "learning_rate": 1.2397664685429972e-05, | |
| "loss": 2.3492, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.7834961236914721, | |
| "grad_norm": 0.3095628321170807, | |
| "learning_rate": 1.2321386012464009e-05, | |
| "loss": 2.3184, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.7841969252332355, | |
| "grad_norm": 0.2890729606151581, | |
| "learning_rate": 1.2245309732469196e-05, | |
| "loss": 2.3688, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.7848977267749989, | |
| "grad_norm": 0.29672184586524963, | |
| "learning_rate": 1.216943625409394e-05, | |
| "loss": 2.4295, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.7855985283167622, | |
| "grad_norm": 0.2954619526863098, | |
| "learning_rate": 1.2093765984897238e-05, | |
| "loss": 2.3044, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.7862993298585257, | |
| "grad_norm": 0.2894071042537689, | |
| "learning_rate": 1.2018299331346572e-05, | |
| "loss": 2.3387, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.7870001314002891, | |
| "grad_norm": 0.3077840507030487, | |
| "learning_rate": 1.1943036698815657e-05, | |
| "loss": 2.338, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.7877009329420525, | |
| "grad_norm": 0.288380891084671, | |
| "learning_rate": 1.1867978491582315e-05, | |
| "loss": 2.323, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.7884017344838159, | |
| "grad_norm": 0.3052459955215454, | |
| "learning_rate": 1.1793125112826265e-05, | |
| "loss": 2.3496, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7891025360255792, | |
| "grad_norm": 0.3037883937358856, | |
| "learning_rate": 1.1718476964627018e-05, | |
| "loss": 2.3409, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.7898033375673427, | |
| "grad_norm": 0.29088008403778076, | |
| "learning_rate": 1.1644034447961626e-05, | |
| "loss": 2.3891, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.790504139109106, | |
| "grad_norm": 0.3015872538089752, | |
| "learning_rate": 1.1569797962702644e-05, | |
| "loss": 2.3521, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.7912049406508694, | |
| "grad_norm": 0.3099019229412079, | |
| "learning_rate": 1.1495767907615817e-05, | |
| "loss": 2.3265, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.7919057421926329, | |
| "grad_norm": 0.2973034381866455, | |
| "learning_rate": 1.142194468035815e-05, | |
| "loss": 2.3174, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7926065437343962, | |
| "grad_norm": 0.3025253415107727, | |
| "learning_rate": 1.1348328677475567e-05, | |
| "loss": 2.4097, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.7933073452761596, | |
| "grad_norm": 0.31553056836128235, | |
| "learning_rate": 1.1274920294400949e-05, | |
| "loss": 2.3167, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.794008146817923, | |
| "grad_norm": 0.2875699996948242, | |
| "learning_rate": 1.1201719925451864e-05, | |
| "loss": 2.3568, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.7947089483596864, | |
| "grad_norm": 0.30479565262794495, | |
| "learning_rate": 1.1128727963828589e-05, | |
| "loss": 2.3514, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.7954097499014497, | |
| "grad_norm": 0.29742810130119324, | |
| "learning_rate": 1.1055944801611884e-05, | |
| "loss": 2.3243, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7961105514432132, | |
| "grad_norm": 0.3056187033653259, | |
| "learning_rate": 1.0983370829760914e-05, | |
| "loss": 2.333, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.7968113529849765, | |
| "grad_norm": 0.2938902676105499, | |
| "learning_rate": 1.0911006438111232e-05, | |
| "loss": 2.3725, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.79751215452674, | |
| "grad_norm": 0.3125405013561249, | |
| "learning_rate": 1.0838852015372553e-05, | |
| "loss": 2.357, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.7982129560685034, | |
| "grad_norm": 0.29329320788383484, | |
| "learning_rate": 1.0766907949126765e-05, | |
| "loss": 2.3789, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.7989137576102667, | |
| "grad_norm": 0.3034367263317108, | |
| "learning_rate": 1.0695174625825783e-05, | |
| "loss": 2.3901, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7996145591520302, | |
| "grad_norm": 0.2973790466785431, | |
| "learning_rate": 1.0623652430789561e-05, | |
| "loss": 2.3296, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.8003153606937935, | |
| "grad_norm": 0.30673426389694214, | |
| "learning_rate": 1.0552341748203887e-05, | |
| "loss": 2.3801, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.8010161622355569, | |
| "grad_norm": 0.30800938606262207, | |
| "learning_rate": 1.0481242961118475e-05, | |
| "loss": 2.3843, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.8017169637773203, | |
| "grad_norm": 0.3065621852874756, | |
| "learning_rate": 1.041035645144478e-05, | |
| "loss": 2.4006, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.8024177653190837, | |
| "grad_norm": 0.30960944294929504, | |
| "learning_rate": 1.0339682599954009e-05, | |
| "loss": 2.3861, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.803118566860847, | |
| "grad_norm": 0.3208168148994446, | |
| "learning_rate": 1.026922178627504e-05, | |
| "loss": 2.3746, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.8038193684026105, | |
| "grad_norm": 0.29509106278419495, | |
| "learning_rate": 1.0198974388892463e-05, | |
| "loss": 2.3109, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.8045201699443739, | |
| "grad_norm": 0.30645373463630676, | |
| "learning_rate": 1.0128940785144426e-05, | |
| "loss": 2.3651, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.8052209714861372, | |
| "grad_norm": 0.30181217193603516, | |
| "learning_rate": 1.0059121351220735e-05, | |
| "loss": 2.3878, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.8059217730279007, | |
| "grad_norm": 0.30057424306869507, | |
| "learning_rate": 9.989516462160687e-06, | |
| "loss": 2.3662, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.806622574569664, | |
| "grad_norm": 0.31955328583717346, | |
| "learning_rate": 9.920126491851217e-06, | |
| "loss": 2.3452, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.8073233761114275, | |
| "grad_norm": 0.2932940423488617, | |
| "learning_rate": 9.850951813024761e-06, | |
| "loss": 2.3778, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.8080241776531908, | |
| "grad_norm": 0.3064028024673462, | |
| "learning_rate": 9.78199279725734e-06, | |
| "loss": 2.3042, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.8087249791949542, | |
| "grad_norm": 0.279764860868454, | |
| "learning_rate": 9.713249814966507e-06, | |
| "loss": 2.3288, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.8094257807367177, | |
| "grad_norm": 0.2855755388736725, | |
| "learning_rate": 9.644723235409359e-06, | |
| "loss": 2.2993, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 0.29255473613739014, | |
| "learning_rate": 9.576413426680619e-06, | |
| "loss": 2.3562, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.8108273838202444, | |
| "grad_norm": 0.2857861816883087, | |
| "learning_rate": 9.508320755710586e-06, | |
| "loss": 2.3769, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.8115281853620078, | |
| "grad_norm": 0.28814780712127686, | |
| "learning_rate": 9.440445588263164e-06, | |
| "loss": 2.3193, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.8122289869037712, | |
| "grad_norm": 0.29499754309654236, | |
| "learning_rate": 9.372788288933987e-06, | |
| "loss": 2.3557, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.8129297884455345, | |
| "grad_norm": 0.2996094524860382, | |
| "learning_rate": 9.305349221148345e-06, | |
| "loss": 2.345, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.813630589987298, | |
| "grad_norm": 0.284106969833374, | |
| "learning_rate": 9.238128747159291e-06, | |
| "loss": 2.3453, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.8143313915290613, | |
| "grad_norm": 0.2909647524356842, | |
| "learning_rate": 9.171127228045718e-06, | |
| "loss": 2.3384, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.8150321930708248, | |
| "grad_norm": 0.2747179865837097, | |
| "learning_rate": 9.104345023710343e-06, | |
| "loss": 2.3428, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.8157329946125882, | |
| "grad_norm": 0.2932035028934479, | |
| "learning_rate": 9.037782492877878e-06, | |
| "loss": 2.3644, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.8164337961543515, | |
| "grad_norm": 0.2991825342178345, | |
| "learning_rate": 8.971439993092995e-06, | |
| "loss": 2.3578, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.817134597696115, | |
| "grad_norm": 0.3002317547798157, | |
| "learning_rate": 8.905317880718478e-06, | |
| "loss": 2.3347, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.8178353992378783, | |
| "grad_norm": 0.29123926162719727, | |
| "learning_rate": 8.839416510933268e-06, | |
| "loss": 2.3821, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.8185362007796417, | |
| "grad_norm": 0.2993260324001312, | |
| "learning_rate": 8.773736237730617e-06, | |
| "loss": 2.3353, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.8192370023214051, | |
| "grad_norm": 0.29723092913627625, | |
| "learning_rate": 8.708277413916088e-06, | |
| "loss": 2.3736, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.8199378038631685, | |
| "grad_norm": 0.2963494062423706, | |
| "learning_rate": 8.643040391105784e-06, | |
| "loss": 2.3436, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8206386054049319, | |
| "grad_norm": 0.28377822041511536, | |
| "learning_rate": 8.578025519724292e-06, | |
| "loss": 2.4163, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.8213394069466953, | |
| "grad_norm": 0.29302090406417847, | |
| "learning_rate": 8.51323314900299e-06, | |
| "loss": 2.3031, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.8220402084884587, | |
| "grad_norm": 0.28734418749809265, | |
| "learning_rate": 8.448663626978031e-06, | |
| "loss": 2.3552, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.822741010030222, | |
| "grad_norm": 0.2865493893623352, | |
| "learning_rate": 8.384317300488565e-06, | |
| "loss": 2.4054, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.8234418115719855, | |
| "grad_norm": 0.287177175283432, | |
| "learning_rate": 8.320194515174779e-06, | |
| "loss": 2.3819, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8241426131137488, | |
| "grad_norm": 0.2869892418384552, | |
| "learning_rate": 8.256295615476129e-06, | |
| "loss": 2.342, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.8248434146555123, | |
| "grad_norm": 0.2897149622440338, | |
| "learning_rate": 8.192620944629437e-06, | |
| "loss": 2.3824, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.8255442161972756, | |
| "grad_norm": 0.2985283434391022, | |
| "learning_rate": 8.129170844667106e-06, | |
| "loss": 2.3729, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.826245017739039, | |
| "grad_norm": 0.29701536893844604, | |
| "learning_rate": 8.065945656415186e-06, | |
| "loss": 2.3608, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.8269458192808025, | |
| "grad_norm": 0.2814193665981293, | |
| "learning_rate": 8.00294571949165e-06, | |
| "loss": 2.3527, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8276466208225658, | |
| "grad_norm": 0.2885274291038513, | |
| "learning_rate": 7.940171372304489e-06, | |
| "loss": 2.38, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.8283474223643292, | |
| "grad_norm": 0.27283158898353577, | |
| "learning_rate": 7.87762295204993e-06, | |
| "loss": 2.3393, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.8290482239060926, | |
| "grad_norm": 0.28884851932525635, | |
| "learning_rate": 7.815300794710634e-06, | |
| "loss": 2.3636, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.829749025447856, | |
| "grad_norm": 0.27974218130111694, | |
| "learning_rate": 7.753205235053856e-06, | |
| "loss": 2.3276, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.8304498269896193, | |
| "grad_norm": 0.3126884996891022, | |
| "learning_rate": 7.691336606629707e-06, | |
| "loss": 2.3752, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8311506285313828, | |
| "grad_norm": 0.29018256068229675, | |
| "learning_rate": 7.629695241769247e-06, | |
| "loss": 2.346, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.8318514300731462, | |
| "grad_norm": 0.3088786005973816, | |
| "learning_rate": 7.5682814715828606e-06, | |
| "loss": 2.3144, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.8325522316149095, | |
| "grad_norm": 0.293910950422287, | |
| "learning_rate": 7.507095625958338e-06, | |
| "loss": 2.3485, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.833253033156673, | |
| "grad_norm": 0.28551560640335083, | |
| "learning_rate": 7.446138033559197e-06, | |
| "loss": 2.3444, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.8339538346984363, | |
| "grad_norm": 0.3061683773994446, | |
| "learning_rate": 7.385409021822848e-06, | |
| "loss": 2.3695, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8346546362401998, | |
| "grad_norm": 0.2754514813423157, | |
| "learning_rate": 7.324908916958883e-06, | |
| "loss": 2.3637, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.8353554377819631, | |
| "grad_norm": 0.2902991473674774, | |
| "learning_rate": 7.264638043947281e-06, | |
| "loss": 2.3318, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.8360562393237265, | |
| "grad_norm": 0.2895221412181854, | |
| "learning_rate": 7.204596726536738e-06, | |
| "loss": 2.3439, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.8367570408654899, | |
| "grad_norm": 0.30740001797676086, | |
| "learning_rate": 7.144785287242828e-06, | |
| "loss": 2.3461, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.8374578424072533, | |
| "grad_norm": 0.2855359613895416, | |
| "learning_rate": 7.0852040473463665e-06, | |
| "loss": 2.3419, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8381586439490167, | |
| "grad_norm": 0.28643205761909485, | |
| "learning_rate": 7.0258533268915964e-06, | |
| "loss": 2.3418, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.8388594454907801, | |
| "grad_norm": 0.2963784337043762, | |
| "learning_rate": 6.966733444684537e-06, | |
| "loss": 2.3695, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.8395602470325435, | |
| "grad_norm": 0.2804722189903259, | |
| "learning_rate": 6.9078447182912175e-06, | |
| "loss": 2.3186, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.8402610485743068, | |
| "grad_norm": 0.27778077125549316, | |
| "learning_rate": 6.8491874640360555e-06, | |
| "loss": 2.3647, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.8409618501160703, | |
| "grad_norm": 0.29211902618408203, | |
| "learning_rate": 6.790761997000033e-06, | |
| "loss": 2.3382, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8416626516578336, | |
| "grad_norm": 0.2860686182975769, | |
| "learning_rate": 6.732568631019132e-06, | |
| "loss": 2.3561, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.842363453199597, | |
| "grad_norm": 0.2880151569843292, | |
| "learning_rate": 6.674607678682554e-06, | |
| "loss": 2.3331, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.8430642547413605, | |
| "grad_norm": 0.28042176365852356, | |
| "learning_rate": 6.616879451331082e-06, | |
| "loss": 2.3797, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.8437650562831238, | |
| "grad_norm": 0.28274786472320557, | |
| "learning_rate": 6.559384259055418e-06, | |
| "loss": 2.341, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.8444658578248873, | |
| "grad_norm": 0.29342707991600037, | |
| "learning_rate": 6.502122410694478e-06, | |
| "loss": 2.3667, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8451666593666506, | |
| "grad_norm": 0.28903117775917053, | |
| "learning_rate": 6.445094213833797e-06, | |
| "loss": 2.3198, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.845867460908414, | |
| "grad_norm": 0.28006690740585327, | |
| "learning_rate": 6.388299974803769e-06, | |
| "loss": 2.3359, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.8465682624501774, | |
| "grad_norm": 0.28730151057243347, | |
| "learning_rate": 6.331739998678143e-06, | |
| "loss": 2.3462, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.8472690639919408, | |
| "grad_norm": 0.2867075502872467, | |
| "learning_rate": 6.2754145892722495e-06, | |
| "loss": 2.3515, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.8479698655337041, | |
| "grad_norm": 0.26967528462409973, | |
| "learning_rate": 6.2193240491414894e-06, | |
| "loss": 2.3315, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.8486706670754676, | |
| "grad_norm": 0.2916017174720764, | |
| "learning_rate": 6.163468679579603e-06, | |
| "loss": 2.3365, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.849371468617231, | |
| "grad_norm": 0.2829345762729645, | |
| "learning_rate": 6.107848780617109e-06, | |
| "loss": 2.3679, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.8500722701589943, | |
| "grad_norm": 0.2843177616596222, | |
| "learning_rate": 6.052464651019674e-06, | |
| "loss": 2.3818, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.8507730717007578, | |
| "grad_norm": 0.27724432945251465, | |
| "learning_rate": 5.997316588286544e-06, | |
| "loss": 2.3463, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.8514738732425211, | |
| "grad_norm": 0.2801852822303772, | |
| "learning_rate": 5.942404888648889e-06, | |
| "loss": 2.3602, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.8521746747842845, | |
| "grad_norm": 0.27697739005088806, | |
| "learning_rate": 5.887729847068268e-06, | |
| "loss": 2.3908, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.8528754763260479, | |
| "grad_norm": 0.2772228717803955, | |
| "learning_rate": 5.8332917572349595e-06, | |
| "loss": 2.3689, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.8535762778678113, | |
| "grad_norm": 0.27209293842315674, | |
| "learning_rate": 5.77909091156652e-06, | |
| "loss": 2.2898, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.8542770794095746, | |
| "grad_norm": 0.28523585200309753, | |
| "learning_rate": 5.7251276012060654e-06, | |
| "loss": 2.3309, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.8549778809513381, | |
| "grad_norm": 0.2797084152698517, | |
| "learning_rate": 5.6714021160208295e-06, | |
| "loss": 2.3525, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8556786824931015, | |
| "grad_norm": 0.27046072483062744, | |
| "learning_rate": 5.617914744600522e-06, | |
| "loss": 2.3176, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.8563794840348649, | |
| "grad_norm": 0.2719738483428955, | |
| "learning_rate": 5.5646657742558065e-06, | |
| "loss": 2.3782, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.8570802855766283, | |
| "grad_norm": 0.2789684236049652, | |
| "learning_rate": 5.511655491016793e-06, | |
| "loss": 2.3633, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.8577810871183916, | |
| "grad_norm": 0.281419038772583, | |
| "learning_rate": 5.458884179631424e-06, | |
| "loss": 2.3182, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.8584818886601551, | |
| "grad_norm": 0.2749309837818146, | |
| "learning_rate": 5.406352123564041e-06, | |
| "loss": 2.3494, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8591826902019184, | |
| "grad_norm": 0.2750987410545349, | |
| "learning_rate": 5.354059604993755e-06, | |
| "loss": 2.2786, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.8598834917436818, | |
| "grad_norm": 0.28280219435691833, | |
| "learning_rate": 5.302006904813062e-06, | |
| "loss": 2.365, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.8605842932854453, | |
| "grad_norm": 0.28348422050476074, | |
| "learning_rate": 5.250194302626155e-06, | |
| "loss": 2.3392, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.8612850948272086, | |
| "grad_norm": 0.28861358761787415, | |
| "learning_rate": 5.198622076747628e-06, | |
| "loss": 2.3378, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.861985896368972, | |
| "grad_norm": 0.28437864780426025, | |
| "learning_rate": 5.147290504200802e-06, | |
| "loss": 2.356, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.8626866979107354, | |
| "grad_norm": 0.29528146982192993, | |
| "learning_rate": 5.096199860716383e-06, | |
| "loss": 2.3888, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.8633874994524988, | |
| "grad_norm": 0.2746603786945343, | |
| "learning_rate": 5.045350420730854e-06, | |
| "loss": 2.3338, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.8640883009942621, | |
| "grad_norm": 0.28209125995635986, | |
| "learning_rate": 4.994742457385087e-06, | |
| "loss": 2.3381, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.8647891025360256, | |
| "grad_norm": 0.2793874144554138, | |
| "learning_rate": 4.944376242522825e-06, | |
| "loss": 2.3631, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.8654899040777889, | |
| "grad_norm": 0.2787705659866333, | |
| "learning_rate": 4.894252046689285e-06, | |
| "loss": 2.3413, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.8661907056195524, | |
| "grad_norm": 0.2764906585216522, | |
| "learning_rate": 4.844370139129622e-06, | |
| "loss": 2.3401, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.8668915071613158, | |
| "grad_norm": 0.2849162518978119, | |
| "learning_rate": 4.794730787787566e-06, | |
| "loss": 2.31, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.8675923087030791, | |
| "grad_norm": 0.27567917108535767, | |
| "learning_rate": 4.74533425930388e-06, | |
| "loss": 2.383, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.8682931102448426, | |
| "grad_norm": 0.2714690864086151, | |
| "learning_rate": 4.696180819015061e-06, | |
| "loss": 2.299, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.8689939117866059, | |
| "grad_norm": 0.27050715684890747, | |
| "learning_rate": 4.64727073095178e-06, | |
| "loss": 2.3215, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8696947133283693, | |
| "grad_norm": 0.27261003851890564, | |
| "learning_rate": 4.598604257837585e-06, | |
| "loss": 2.4066, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.8703955148701327, | |
| "grad_norm": 0.2710648775100708, | |
| "learning_rate": 4.550181661087388e-06, | |
| "loss": 2.3078, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.8710963164118961, | |
| "grad_norm": 0.26733291149139404, | |
| "learning_rate": 4.502003200806109e-06, | |
| "loss": 2.3444, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.8717971179536596, | |
| "grad_norm": 0.2685307562351227, | |
| "learning_rate": 4.454069135787303e-06, | |
| "loss": 2.3734, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.8724979194954229, | |
| "grad_norm": 0.27340108156204224, | |
| "learning_rate": 4.4063797235117e-06, | |
| "loss": 2.3292, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8731987210371863, | |
| "grad_norm": 0.27594032883644104, | |
| "learning_rate": 4.358935220145904e-06, | |
| "loss": 2.3748, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.8738995225789497, | |
| "grad_norm": 0.2704198360443115, | |
| "learning_rate": 4.311735880540951e-06, | |
| "loss": 2.3344, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.8746003241207131, | |
| "grad_norm": 0.2730448246002197, | |
| "learning_rate": 4.264781958230962e-06, | |
| "loss": 2.2874, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.8753011256624764, | |
| "grad_norm": 0.2650400698184967, | |
| "learning_rate": 4.218073705431791e-06, | |
| "loss": 2.3454, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.8760019272042399, | |
| "grad_norm": 0.27303510904312134, | |
| "learning_rate": 4.17161137303968e-06, | |
| "loss": 2.3498, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8767027287460032, | |
| "grad_norm": 0.27234286069869995, | |
| "learning_rate": 4.125395210629857e-06, | |
| "loss": 2.3935, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 0.8774035302877666, | |
| "grad_norm": 0.27970659732818604, | |
| "learning_rate": 4.079425466455289e-06, | |
| "loss": 2.3283, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.8781043318295301, | |
| "grad_norm": 0.27473318576812744, | |
| "learning_rate": 4.033702387445215e-06, | |
| "loss": 2.3489, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.8788051333712934, | |
| "grad_norm": 0.27126818895339966, | |
| "learning_rate": 3.988226219203967e-06, | |
| "loss": 2.3265, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 0.8795059349130568, | |
| "grad_norm": 0.27091753482818604, | |
| "learning_rate": 3.9429972060095465e-06, | |
| "loss": 2.3118, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.8802067364548202, | |
| "grad_norm": 0.28497523069381714, | |
| "learning_rate": 3.8980155908123594e-06, | |
| "loss": 2.3268, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 0.8809075379965836, | |
| "grad_norm": 0.27699658274650574, | |
| "learning_rate": 3.8532816152338835e-06, | |
| "loss": 2.3362, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 0.8816083395383469, | |
| "grad_norm": 0.27109819650650024, | |
| "learning_rate": 3.8087955195654167e-06, | |
| "loss": 2.3108, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 0.8823091410801104, | |
| "grad_norm": 0.2755720019340515, | |
| "learning_rate": 3.7645575427667e-06, | |
| "loss": 2.3616, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 0.8830099426218738, | |
| "grad_norm": 0.27005869150161743, | |
| "learning_rate": 3.720567922464746e-06, | |
| "loss": 2.3469, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.8837107441636372, | |
| "grad_norm": 0.27299705147743225, | |
| "learning_rate": 3.676826894952462e-06, | |
| "loss": 2.3394, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 0.8844115457054006, | |
| "grad_norm": 0.2688041925430298, | |
| "learning_rate": 3.633334695187468e-06, | |
| "loss": 2.3386, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 0.8851123472471639, | |
| "grad_norm": 0.27154210209846497, | |
| "learning_rate": 3.5900915567907536e-06, | |
| "loss": 2.3659, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 0.8858131487889274, | |
| "grad_norm": 0.2803657650947571, | |
| "learning_rate": 3.5470977120454552e-06, | |
| "loss": 2.3201, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.8865139503306907, | |
| "grad_norm": 0.26754680275917053, | |
| "learning_rate": 3.5043533918956538e-06, | |
| "loss": 2.3314, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.8872147518724541, | |
| "grad_norm": 0.27161145210266113, | |
| "learning_rate": 3.4618588259450633e-06, | |
| "loss": 2.2919, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 0.8879155534142175, | |
| "grad_norm": 0.2814792990684509, | |
| "learning_rate": 3.4196142424558266e-06, | |
| "loss": 2.3159, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.8886163549559809, | |
| "grad_norm": 0.2711291015148163, | |
| "learning_rate": 3.377619868347309e-06, | |
| "loss": 2.3451, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 0.8893171564977443, | |
| "grad_norm": 0.2673906683921814, | |
| "learning_rate": 3.3358759291948425e-06, | |
| "loss": 2.3729, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 0.8900179580395077, | |
| "grad_norm": 0.2697390019893646, | |
| "learning_rate": 3.2943826492285335e-06, | |
| "loss": 2.3524, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.8907187595812711, | |
| "grad_norm": 0.2605597674846649, | |
| "learning_rate": 3.2531402513320876e-06, | |
| "loss": 2.3326, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.8914195611230344, | |
| "grad_norm": 0.27018940448760986, | |
| "learning_rate": 3.2121489570415276e-06, | |
| "loss": 2.3634, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 0.8921203626647979, | |
| "grad_norm": 0.27040642499923706, | |
| "learning_rate": 3.1714089865441276e-06, | |
| "loss": 2.3682, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.8928211642065612, | |
| "grad_norm": 0.2684932351112366, | |
| "learning_rate": 3.1309205586770806e-06, | |
| "loss": 2.3519, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.8935219657483247, | |
| "grad_norm": 0.2710065245628357, | |
| "learning_rate": 3.090683890926471e-06, | |
| "loss": 2.3436, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.894222767290088, | |
| "grad_norm": 0.26228034496307373, | |
| "learning_rate": 3.0506991994259936e-06, | |
| "loss": 2.3479, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.8949235688318514, | |
| "grad_norm": 0.2639433443546295, | |
| "learning_rate": 3.0109666989558747e-06, | |
| "loss": 2.3286, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 0.8956243703736149, | |
| "grad_norm": 0.26126542687416077, | |
| "learning_rate": 2.9714866029416535e-06, | |
| "loss": 2.353, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 0.8963251719153782, | |
| "grad_norm": 0.277758926153183, | |
| "learning_rate": 2.932259123453068e-06, | |
| "loss": 2.4018, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 0.8970259734571416, | |
| "grad_norm": 0.2752660810947418, | |
| "learning_rate": 2.893284471202912e-06, | |
| "loss": 2.4104, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.897726774998905, | |
| "grad_norm": 0.27137380838394165, | |
| "learning_rate": 2.8545628555459168e-06, | |
| "loss": 2.4107, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.8984275765406684, | |
| "grad_norm": 0.27417030930519104, | |
| "learning_rate": 2.8160944844775884e-06, | |
| "loss": 2.3391, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 0.8991283780824317, | |
| "grad_norm": 0.273007333278656, | |
| "learning_rate": 2.7778795646331348e-06, | |
| "loss": 2.3245, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 0.8998291796241952, | |
| "grad_norm": 0.2673615515232086, | |
| "learning_rate": 2.7399183012863315e-06, | |
| "loss": 2.3225, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 0.9005299811659586, | |
| "grad_norm": 0.2729108929634094, | |
| "learning_rate": 2.7022108983484063e-06, | |
| "loss": 2.3417, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.901230782707722, | |
| "grad_norm": 0.27933502197265625, | |
| "learning_rate": 2.6647575583669705e-06, | |
| "loss": 2.3544, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 0.9019315842494854, | |
| "grad_norm": 0.26635363698005676, | |
| "learning_rate": 2.6275584825249334e-06, | |
| "loss": 2.3434, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 0.9026323857912487, | |
| "grad_norm": 0.26474449038505554, | |
| "learning_rate": 2.5906138706393857e-06, | |
| "loss": 2.3337, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.9033331873330122, | |
| "grad_norm": 0.27736666798591614, | |
| "learning_rate": 2.553923921160578e-06, | |
| "loss": 2.3501, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 0.9040339888747755, | |
| "grad_norm": 0.2660214900970459, | |
| "learning_rate": 2.5174888311708002e-06, | |
| "loss": 2.3154, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9047347904165389, | |
| "grad_norm": 0.2688727080821991, | |
| "learning_rate": 2.481308796383347e-06, | |
| "loss": 2.3094, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 0.9054355919583023, | |
| "grad_norm": 0.2788686752319336, | |
| "learning_rate": 2.445384011141505e-06, | |
| "loss": 2.3465, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 0.9061363935000657, | |
| "grad_norm": 0.2629021406173706, | |
| "learning_rate": 2.409714668417423e-06, | |
| "loss": 2.4043, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 0.9068371950418291, | |
| "grad_norm": 0.2659167945384979, | |
| "learning_rate": 2.3743009598111764e-06, | |
| "loss": 2.3315, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 0.9075379965835925, | |
| "grad_norm": 0.26323938369750977, | |
| "learning_rate": 2.339143075549627e-06, | |
| "loss": 2.3503, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9082387981253559, | |
| "grad_norm": 0.2721712589263916, | |
| "learning_rate": 2.3042412044855132e-06, | |
| "loss": 2.3358, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.9089395996671192, | |
| "grad_norm": 0.2640015780925751, | |
| "learning_rate": 2.2695955340963525e-06, | |
| "loss": 2.3521, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 0.9096404012088827, | |
| "grad_norm": 0.2649107575416565, | |
| "learning_rate": 2.2352062504834904e-06, | |
| "loss": 2.3638, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 0.910341202750646, | |
| "grad_norm": 0.2557833194732666, | |
| "learning_rate": 2.2010735383710457e-06, | |
| "loss": 2.314, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 0.9110420042924094, | |
| "grad_norm": 0.269994854927063, | |
| "learning_rate": 2.167197581104963e-06, | |
| "loss": 2.3653, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9117428058341729, | |
| "grad_norm": 0.2819438576698303, | |
| "learning_rate": 2.1335785606520052e-06, | |
| "loss": 2.3856, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 0.9124436073759362, | |
| "grad_norm": 0.26454123854637146, | |
| "learning_rate": 2.1002166575988082e-06, | |
| "loss": 2.3329, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.9131444089176997, | |
| "grad_norm": 0.2635791003704071, | |
| "learning_rate": 2.0671120511508492e-06, | |
| "loss": 2.3384, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 0.913845210459463, | |
| "grad_norm": 0.2686479091644287, | |
| "learning_rate": 2.0342649191315566e-06, | |
| "loss": 2.3115, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 0.9145460120012264, | |
| "grad_norm": 0.2614147663116455, | |
| "learning_rate": 2.0016754379812817e-06, | |
| "loss": 2.3316, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9152468135429898, | |
| "grad_norm": 0.2683558762073517, | |
| "learning_rate": 1.969343782756422e-06, | |
| "loss": 2.3695, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 0.9159476150847532, | |
| "grad_norm": 0.2628380358219147, | |
| "learning_rate": 1.9372701271284265e-06, | |
| "loss": 2.3399, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 0.9166484166265165, | |
| "grad_norm": 0.25948262214660645, | |
| "learning_rate": 1.905454643382898e-06, | |
| "loss": 2.3317, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 0.91734921816828, | |
| "grad_norm": 0.26860958337783813, | |
| "learning_rate": 1.8738975024186422e-06, | |
| "loss": 2.3769, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.9180500197100434, | |
| "grad_norm": 0.2748177647590637, | |
| "learning_rate": 1.8425988737467593e-06, | |
| "loss": 2.365, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9187508212518067, | |
| "grad_norm": 0.27372971177101135, | |
| "learning_rate": 1.8115589254897535e-06, | |
| "loss": 2.3421, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 0.9194516227935702, | |
| "grad_norm": 0.2683233618736267, | |
| "learning_rate": 1.7807778243805918e-06, | |
| "loss": 2.3433, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.9201524243353335, | |
| "grad_norm": 0.25900888442993164, | |
| "learning_rate": 1.75025573576183e-06, | |
| "loss": 2.3705, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 0.920853225877097, | |
| "grad_norm": 0.27348002791404724, | |
| "learning_rate": 1.7199928235847373e-06, | |
| "loss": 2.3506, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 0.9215540274188603, | |
| "grad_norm": 0.26492583751678467, | |
| "learning_rate": 1.6899892504083858e-06, | |
| "loss": 2.3641, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9222548289606237, | |
| "grad_norm": 0.26733097434043884, | |
| "learning_rate": 1.6602451773987893e-06, | |
| "loss": 2.3329, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.9229556305023872, | |
| "grad_norm": 0.26391738653182983, | |
| "learning_rate": 1.6307607643280598e-06, | |
| "loss": 2.3533, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 0.9236564320441505, | |
| "grad_norm": 0.2692205011844635, | |
| "learning_rate": 1.6015361695735142e-06, | |
| "loss": 2.4126, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 0.9243572335859139, | |
| "grad_norm": 0.2715857923030853, | |
| "learning_rate": 1.572571550116847e-06, | |
| "loss": 2.3435, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 0.9250580351276773, | |
| "grad_norm": 0.26217779517173767, | |
| "learning_rate": 1.5438670615432694e-06, | |
| "loss": 2.3401, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9257588366694407, | |
| "grad_norm": 0.2711847424507141, | |
| "learning_rate": 1.5154228580407048e-06, | |
| "loss": 2.3816, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 0.926459638211204, | |
| "grad_norm": 0.26257359981536865, | |
| "learning_rate": 1.4872390923989066e-06, | |
| "loss": 2.3327, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 0.9271604397529675, | |
| "grad_norm": 0.27749139070510864, | |
| "learning_rate": 1.4593159160087078e-06, | |
| "loss": 2.3226, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.9278612412947308, | |
| "grad_norm": 0.26849448680877686, | |
| "learning_rate": 1.4316534788611335e-06, | |
| "loss": 2.3326, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 0.9285620428364942, | |
| "grad_norm": 0.2660931646823883, | |
| "learning_rate": 1.4042519295466794e-06, | |
| "loss": 2.3661, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9292628443782577, | |
| "grad_norm": 0.2676352858543396, | |
| "learning_rate": 1.3771114152544063e-06, | |
| "loss": 2.4243, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 0.929963645920021, | |
| "grad_norm": 0.2659156024456024, | |
| "learning_rate": 1.3502320817712576e-06, | |
| "loss": 2.3589, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 0.9306644474617844, | |
| "grad_norm": 0.26603996753692627, | |
| "learning_rate": 1.3236140734811997e-06, | |
| "loss": 2.3711, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.9313652490035478, | |
| "grad_norm": 0.27449002861976624, | |
| "learning_rate": 1.2972575333645044e-06, | |
| "loss": 2.2935, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 0.9320660505453112, | |
| "grad_norm": 0.26013731956481934, | |
| "learning_rate": 1.271162602996917e-06, | |
| "loss": 2.3564, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.9327668520870745, | |
| "grad_norm": 0.26281431317329407, | |
| "learning_rate": 1.245329422548952e-06, | |
| "loss": 2.3292, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 0.933467653628838, | |
| "grad_norm": 0.2613161504268646, | |
| "learning_rate": 1.2197581307851147e-06, | |
| "loss": 2.3321, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 0.9341684551706014, | |
| "grad_norm": 0.2653728425502777, | |
| "learning_rate": 1.1944488650631468e-06, | |
| "loss": 2.3114, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.9348692567123648, | |
| "grad_norm": 0.2642427980899811, | |
| "learning_rate": 1.1694017613333275e-06, | |
| "loss": 2.3004, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.9355700582541282, | |
| "grad_norm": 0.2627279460430145, | |
| "learning_rate": 1.1446169541376893e-06, | |
| "loss": 2.3692, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.9362708597958915, | |
| "grad_norm": 0.2644914388656616, | |
| "learning_rate": 1.120094576609343e-06, | |
| "loss": 2.3567, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 0.936971661337655, | |
| "grad_norm": 0.2687349319458008, | |
| "learning_rate": 1.0958347604717201e-06, | |
| "loss": 2.3527, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.9376724628794183, | |
| "grad_norm": 0.26397764682769775, | |
| "learning_rate": 1.0718376360379145e-06, | |
| "loss": 2.3628, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 0.9383732644211817, | |
| "grad_norm": 0.2732083797454834, | |
| "learning_rate": 1.0481033322099375e-06, | |
| "loss": 2.3258, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 0.9390740659629451, | |
| "grad_norm": 0.2604101896286011, | |
| "learning_rate": 1.0246319764780521e-06, | |
| "loss": 2.3576, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.9397748675047085, | |
| "grad_norm": 0.2678563892841339, | |
| "learning_rate": 1.0014236949200673e-06, | |
| "loss": 2.3665, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 0.940475669046472, | |
| "grad_norm": 0.26518163084983826, | |
| "learning_rate": 9.784786122006896e-07, | |
| "loss": 2.3143, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.26517802476882935, | |
| "learning_rate": 9.557968515708226e-07, | |
| "loss": 2.3853, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 0.9418772721299987, | |
| "grad_norm": 0.27053287625312805, | |
| "learning_rate": 9.333785348669355e-07, | |
| "loss": 2.3127, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.942578073671762, | |
| "grad_norm": 0.2658546268939972, | |
| "learning_rate": 9.112237825103787e-07, | |
| "loss": 2.3175, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.9432788752135255, | |
| "grad_norm": 0.26636263728141785, | |
| "learning_rate": 8.893327135067519e-07, | |
| "loss": 2.3576, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 0.9439796767552888, | |
| "grad_norm": 0.26162707805633545, | |
| "learning_rate": 8.677054454452605e-07, | |
| "loss": 2.3228, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 0.9446804782970523, | |
| "grad_norm": 0.2616811692714691, | |
| "learning_rate": 8.463420944981093e-07, | |
| "loss": 2.3319, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 0.9453812798388156, | |
| "grad_norm": 0.2599705159664154, | |
| "learning_rate": 8.252427754198211e-07, | |
| "loss": 2.3318, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 0.946082081380579, | |
| "grad_norm": 0.2655077576637268, | |
| "learning_rate": 8.044076015466862e-07, | |
| "loss": 2.3656, | |
| "step": 2700 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2854, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.039795302576947e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |