| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3153606937935263, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00035040077088169594, | |
| "grad_norm": 6.5142412185668945, | |
| "learning_rate": 0.0, | |
| "loss": 5.324, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007008015417633919, | |
| "grad_norm": 6.758334159851074, | |
| "learning_rate": 6.993006993006994e-07, | |
| "loss": 5.3405, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0014016030835267838, | |
| "grad_norm": 6.22674036026001, | |
| "learning_rate": 2.0979020979020983e-06, | |
| "loss": 5.3286, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0021024046252901755, | |
| "grad_norm": 5.438386917114258, | |
| "learning_rate": 3.496503496503497e-06, | |
| "loss": 5.25, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0028032061670535675, | |
| "grad_norm": 3.365504741668701, | |
| "learning_rate": 4.895104895104895e-06, | |
| "loss": 5.2821, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0035040077088169595, | |
| "grad_norm": 7.186147212982178, | |
| "learning_rate": 6.2937062937062944e-06, | |
| "loss": 5.21, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004204809250580351, | |
| "grad_norm": 4.960826396942139, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 5.0759, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004905610792343743, | |
| "grad_norm": 4.001464366912842, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 5.1092, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005606412334107135, | |
| "grad_norm": 3.2986342906951904, | |
| "learning_rate": 1.048951048951049e-05, | |
| "loss": 4.93, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006307213875870527, | |
| "grad_norm": 2.5407276153564453, | |
| "learning_rate": 1.188811188811189e-05, | |
| "loss": 4.8535, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.007008015417633919, | |
| "grad_norm": 2.211754083633423, | |
| "learning_rate": 1.3286713286713287e-05, | |
| "loss": 4.74, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007708816959397311, | |
| "grad_norm": 1.6710195541381836, | |
| "learning_rate": 1.4685314685314686e-05, | |
| "loss": 4.609, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008409618501160702, | |
| "grad_norm": 1.280752182006836, | |
| "learning_rate": 1.6083916083916083e-05, | |
| "loss": 4.4879, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009110420042924094, | |
| "grad_norm": 1.312186598777771, | |
| "learning_rate": 1.7482517482517483e-05, | |
| "loss": 4.3995, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.009811221584687486, | |
| "grad_norm": 1.3315190076828003, | |
| "learning_rate": 1.888111888111888e-05, | |
| "loss": 4.3005, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.010512023126450878, | |
| "grad_norm": 1.3252590894699097, | |
| "learning_rate": 2.027972027972028e-05, | |
| "loss": 4.1952, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01121282466821427, | |
| "grad_norm": 1.3794758319854736, | |
| "learning_rate": 2.1678321678321677e-05, | |
| "loss": 4.1459, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.011913626209977662, | |
| "grad_norm": 1.1808068752288818, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 4.034, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.012614427751741054, | |
| "grad_norm": 1.31660795211792, | |
| "learning_rate": 2.4475524475524478e-05, | |
| "loss": 3.926, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.013315229293504446, | |
| "grad_norm": 1.0347495079040527, | |
| "learning_rate": 2.5874125874125877e-05, | |
| "loss": 3.8812, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014016030835267838, | |
| "grad_norm": 1.050775408744812, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 3.7787, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01471683237703123, | |
| "grad_norm": 0.9461761713027954, | |
| "learning_rate": 2.8671328671328672e-05, | |
| "loss": 3.6738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.015417633918794622, | |
| "grad_norm": 1.0460454225540161, | |
| "learning_rate": 3.0069930069930068e-05, | |
| "loss": 3.6385, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.016118435460558012, | |
| "grad_norm": 1.0687191486358643, | |
| "learning_rate": 3.146853146853147e-05, | |
| "loss": 3.5701, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.016819237002321404, | |
| "grad_norm": 1.4722611904144287, | |
| "learning_rate": 3.2867132867132866e-05, | |
| "loss": 3.5438, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.017520038544084796, | |
| "grad_norm": 1.1305724382400513, | |
| "learning_rate": 3.4265734265734265e-05, | |
| "loss": 3.4694, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018220840085848188, | |
| "grad_norm": 0.9322625994682312, | |
| "learning_rate": 3.566433566433567e-05, | |
| "loss": 3.4488, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01892164162761158, | |
| "grad_norm": 1.2441555261611938, | |
| "learning_rate": 3.7062937062937064e-05, | |
| "loss": 3.4289, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.019622443169374972, | |
| "grad_norm": 0.9397731423377991, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 3.4021, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.020323244711138364, | |
| "grad_norm": 1.3261164426803589, | |
| "learning_rate": 3.986013986013986e-05, | |
| "loss": 3.3575, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.021024046252901756, | |
| "grad_norm": 1.08541738986969, | |
| "learning_rate": 4.125874125874126e-05, | |
| "loss": 3.3403, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021724847794665148, | |
| "grad_norm": 0.8626166582107544, | |
| "learning_rate": 4.265734265734266e-05, | |
| "loss": 3.3306, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02242564933642854, | |
| "grad_norm": 1.0596344470977783, | |
| "learning_rate": 4.405594405594406e-05, | |
| "loss": 3.2779, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.023126450878191932, | |
| "grad_norm": 1.511917233467102, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 3.2759, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.023827252419955324, | |
| "grad_norm": 1.2062046527862549, | |
| "learning_rate": 4.685314685314686e-05, | |
| "loss": 3.2545, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.024528053961718716, | |
| "grad_norm": 1.1399930715560913, | |
| "learning_rate": 4.825174825174825e-05, | |
| "loss": 3.2235, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.025228855503482108, | |
| "grad_norm": 0.8960133790969849, | |
| "learning_rate": 4.9650349650349656e-05, | |
| "loss": 3.2025, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0259296570452455, | |
| "grad_norm": 1.3042056560516357, | |
| "learning_rate": 5.1048951048951055e-05, | |
| "loss": 3.1475, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.026630458587008892, | |
| "grad_norm": 1.186320424079895, | |
| "learning_rate": 5.244755244755245e-05, | |
| "loss": 3.1759, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.027331260128772284, | |
| "grad_norm": 1.2691158056259155, | |
| "learning_rate": 5.384615384615385e-05, | |
| "loss": 3.1296, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.028032061670535676, | |
| "grad_norm": 0.7816159129142761, | |
| "learning_rate": 5.524475524475524e-05, | |
| "loss": 3.1017, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.028732863212299068, | |
| "grad_norm": 1.1489295959472656, | |
| "learning_rate": 5.664335664335665e-05, | |
| "loss": 3.1151, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02943366475406246, | |
| "grad_norm": 1.5686062574386597, | |
| "learning_rate": 5.8041958041958044e-05, | |
| "loss": 3.114, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.030134466295825852, | |
| "grad_norm": 1.4421433210372925, | |
| "learning_rate": 5.944055944055944e-05, | |
| "loss": 3.0946, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.030835267837589244, | |
| "grad_norm": 1.335250973701477, | |
| "learning_rate": 6.083916083916085e-05, | |
| "loss": 3.084, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03153606937935263, | |
| "grad_norm": 0.970507800579071, | |
| "learning_rate": 6.223776223776224e-05, | |
| "loss": 3.1163, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.032236870921116025, | |
| "grad_norm": 1.2849407196044922, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 3.063, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.032937672462879417, | |
| "grad_norm": 1.0378247499465942, | |
| "learning_rate": 6.503496503496504e-05, | |
| "loss": 3.0223, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03363847400464281, | |
| "grad_norm": 1.3139392137527466, | |
| "learning_rate": 6.643356643356644e-05, | |
| "loss": 3.0572, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0343392755464062, | |
| "grad_norm": 1.254752278327942, | |
| "learning_rate": 6.783216783216784e-05, | |
| "loss": 3.0408, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.03504007708816959, | |
| "grad_norm": 1.3333168029785156, | |
| "learning_rate": 6.923076923076924e-05, | |
| "loss": 3.0185, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.035740878629932984, | |
| "grad_norm": 1.2795464992523193, | |
| "learning_rate": 7.062937062937062e-05, | |
| "loss": 3.0328, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.036441680171696376, | |
| "grad_norm": 1.2025645971298218, | |
| "learning_rate": 7.202797202797204e-05, | |
| "loss": 3.0303, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.03714248171345977, | |
| "grad_norm": 1.1741266250610352, | |
| "learning_rate": 7.342657342657343e-05, | |
| "loss": 3.0252, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03784328325522316, | |
| "grad_norm": 1.2022653818130493, | |
| "learning_rate": 7.482517482517482e-05, | |
| "loss": 3.0183, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03854408479698655, | |
| "grad_norm": 1.1950666904449463, | |
| "learning_rate": 7.622377622377622e-05, | |
| "loss": 2.9804, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.039244886338749944, | |
| "grad_norm": 1.5780822038650513, | |
| "learning_rate": 7.762237762237763e-05, | |
| "loss": 2.9804, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.039945687880513336, | |
| "grad_norm": 1.0478655099868774, | |
| "learning_rate": 7.902097902097903e-05, | |
| "loss": 2.9894, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04064648942227673, | |
| "grad_norm": 1.1782268285751343, | |
| "learning_rate": 8.041958041958042e-05, | |
| "loss": 2.9717, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04134729096404012, | |
| "grad_norm": 1.0321820974349976, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 2.9776, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04204809250580351, | |
| "grad_norm": 0.9697206020355225, | |
| "learning_rate": 8.321678321678323e-05, | |
| "loss": 2.9804, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.042748894047566904, | |
| "grad_norm": 1.1984606981277466, | |
| "learning_rate": 8.461538461538461e-05, | |
| "loss": 2.9495, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.043449695589330296, | |
| "grad_norm": 0.9830178618431091, | |
| "learning_rate": 8.601398601398601e-05, | |
| "loss": 2.9656, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04415049713109369, | |
| "grad_norm": 1.3105114698410034, | |
| "learning_rate": 8.741258741258743e-05, | |
| "loss": 2.9306, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04485129867285708, | |
| "grad_norm": 1.3499157428741455, | |
| "learning_rate": 8.881118881118881e-05, | |
| "loss": 2.9381, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04555210021462047, | |
| "grad_norm": 0.9977575540542603, | |
| "learning_rate": 9.020979020979021e-05, | |
| "loss": 2.907, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.046252901756383864, | |
| "grad_norm": 1.2331498861312866, | |
| "learning_rate": 9.160839160839161e-05, | |
| "loss": 2.9224, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.046953703298147256, | |
| "grad_norm": 1.451253890991211, | |
| "learning_rate": 9.300699300699301e-05, | |
| "loss": 2.9202, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04765450483991065, | |
| "grad_norm": 1.2146471738815308, | |
| "learning_rate": 9.440559440559441e-05, | |
| "loss": 2.9098, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04835530638167404, | |
| "grad_norm": 1.0873245000839233, | |
| "learning_rate": 9.580419580419581e-05, | |
| "loss": 2.9218, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04905610792343743, | |
| "grad_norm": 1.276413083076477, | |
| "learning_rate": 9.72027972027972e-05, | |
| "loss": 2.8947, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.049756909465200824, | |
| "grad_norm": 1.126065731048584, | |
| "learning_rate": 9.86013986013986e-05, | |
| "loss": 2.8788, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.050457711006964216, | |
| "grad_norm": 1.5177017450332642, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9043, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05115851254872761, | |
| "grad_norm": 1.3744112253189087, | |
| "learning_rate": 9.99998657109765e-05, | |
| "loss": 2.888, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.051859314090491, | |
| "grad_norm": 1.7921055555343628, | |
| "learning_rate": 9.999946284462733e-05, | |
| "loss": 2.8631, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.05256011563225439, | |
| "grad_norm": 1.1755317449569702, | |
| "learning_rate": 9.999879140311652e-05, | |
| "loss": 2.8735, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.053260917174017784, | |
| "grad_norm": 0.846362292766571, | |
| "learning_rate": 9.999785139005073e-05, | |
| "loss": 2.8768, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.053961718715781176, | |
| "grad_norm": 0.9867280721664429, | |
| "learning_rate": 9.999664281047933e-05, | |
| "loss": 2.8859, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.05466252025754457, | |
| "grad_norm": 0.9751666188240051, | |
| "learning_rate": 9.999516567089429e-05, | |
| "loss": 2.8497, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05536332179930796, | |
| "grad_norm": 1.0603703260421753, | |
| "learning_rate": 9.999341997923011e-05, | |
| "loss": 2.8404, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05606412334107135, | |
| "grad_norm": 1.0447975397109985, | |
| "learning_rate": 9.999140574486392e-05, | |
| "loss": 2.9092, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.056764924882834744, | |
| "grad_norm": 1.3046443462371826, | |
| "learning_rate": 9.998912297861527e-05, | |
| "loss": 2.8971, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.057465726424598136, | |
| "grad_norm": 1.1029243469238281, | |
| "learning_rate": 9.998657169274622e-05, | |
| "loss": 2.8834, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.05816652796636153, | |
| "grad_norm": 0.8594210743904114, | |
| "learning_rate": 9.99837519009611e-05, | |
| "loss": 2.8361, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05886732950812492, | |
| "grad_norm": 0.8585363030433655, | |
| "learning_rate": 9.998066361840665e-05, | |
| "loss": 2.8782, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05956813104988831, | |
| "grad_norm": 0.693467378616333, | |
| "learning_rate": 9.997730686167173e-05, | |
| "loss": 2.8537, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.060268932591651704, | |
| "grad_norm": 0.8418940305709839, | |
| "learning_rate": 9.997368164878738e-05, | |
| "loss": 2.8294, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.060969734133415096, | |
| "grad_norm": 0.9938271045684814, | |
| "learning_rate": 9.996978799922665e-05, | |
| "loss": 2.8458, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.06167053567517849, | |
| "grad_norm": 1.0347217321395874, | |
| "learning_rate": 9.99656259339045e-05, | |
| "loss": 2.8081, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06237133721694188, | |
| "grad_norm": 0.9216743111610413, | |
| "learning_rate": 9.996119547517775e-05, | |
| "loss": 2.8655, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.06307213875870527, | |
| "grad_norm": 1.0579859018325806, | |
| "learning_rate": 9.995649664684486e-05, | |
| "loss": 2.823, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06377294030046866, | |
| "grad_norm": 0.9864194393157959, | |
| "learning_rate": 9.995152947414586e-05, | |
| "loss": 2.8081, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.06447374184223205, | |
| "grad_norm": 0.8999143838882446, | |
| "learning_rate": 9.994629398376226e-05, | |
| "loss": 2.7947, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.06517454338399545, | |
| "grad_norm": 0.9121315479278564, | |
| "learning_rate": 9.994079020381676e-05, | |
| "loss": 2.8253, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.06587534492575883, | |
| "grad_norm": 0.8578842282295227, | |
| "learning_rate": 9.993501816387329e-05, | |
| "loss": 2.7548, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06657614646752223, | |
| "grad_norm": 0.8564820289611816, | |
| "learning_rate": 9.992897789493672e-05, | |
| "loss": 2.8361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06727694800928562, | |
| "grad_norm": 0.8013344407081604, | |
| "learning_rate": 9.992266942945269e-05, | |
| "loss": 2.8606, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.06797774955104902, | |
| "grad_norm": 0.7343975901603699, | |
| "learning_rate": 9.991609280130752e-05, | |
| "loss": 2.7947, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.0686785510928124, | |
| "grad_norm": 0.7338536381721497, | |
| "learning_rate": 9.990924804582797e-05, | |
| "loss": 2.7492, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0693793526345758, | |
| "grad_norm": 0.828781008720398, | |
| "learning_rate": 9.990213519978109e-05, | |
| "loss": 2.8013, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07008015417633918, | |
| "grad_norm": 0.7156624794006348, | |
| "learning_rate": 9.989475430137391e-05, | |
| "loss": 2.7943, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07078095571810258, | |
| "grad_norm": 0.6014353632926941, | |
| "learning_rate": 9.988710539025341e-05, | |
| "loss": 2.8099, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07148175725986597, | |
| "grad_norm": 0.6569661498069763, | |
| "learning_rate": 9.987918850750619e-05, | |
| "loss": 2.8125, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.07218255880162937, | |
| "grad_norm": 0.6558775305747986, | |
| "learning_rate": 9.987100369565825e-05, | |
| "loss": 2.7487, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07288336034339275, | |
| "grad_norm": 0.6454245448112488, | |
| "learning_rate": 9.986255099867481e-05, | |
| "loss": 2.7648, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.07358416188515615, | |
| "grad_norm": 0.5741921067237854, | |
| "learning_rate": 9.985383046196004e-05, | |
| "loss": 2.7743, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07428496342691954, | |
| "grad_norm": 0.5875937938690186, | |
| "learning_rate": 9.984484213235685e-05, | |
| "loss": 2.7728, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.07498576496868294, | |
| "grad_norm": 0.6638422012329102, | |
| "learning_rate": 9.98355860581466e-05, | |
| "loss": 2.7504, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.07568656651044632, | |
| "grad_norm": 1.1614341735839844, | |
| "learning_rate": 9.982606228904884e-05, | |
| "loss": 2.7923, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.07638736805220972, | |
| "grad_norm": 1.005254864692688, | |
| "learning_rate": 9.981627087622108e-05, | |
| "loss": 2.76, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.0770881695939731, | |
| "grad_norm": 0.7738555669784546, | |
| "learning_rate": 9.980621187225852e-05, | |
| "loss": 2.7866, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0777889711357365, | |
| "grad_norm": 0.9469527006149292, | |
| "learning_rate": 9.979588533119367e-05, | |
| "loss": 2.8012, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.07848977267749989, | |
| "grad_norm": 0.9031473398208618, | |
| "learning_rate": 9.978529130849619e-05, | |
| "loss": 2.7522, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.07919057421926329, | |
| "grad_norm": 0.9450514912605286, | |
| "learning_rate": 9.977442986107252e-05, | |
| "loss": 2.7791, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.07989137576102667, | |
| "grad_norm": 0.7259206771850586, | |
| "learning_rate": 9.97633010472656e-05, | |
| "loss": 2.7237, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08059217730279007, | |
| "grad_norm": 0.6595309972763062, | |
| "learning_rate": 9.975190492685451e-05, | |
| "loss": 2.7284, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08129297884455346, | |
| "grad_norm": 0.7696382999420166, | |
| "learning_rate": 9.974024156105422e-05, | |
| "loss": 2.7631, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.08199378038631686, | |
| "grad_norm": 0.7305110096931458, | |
| "learning_rate": 9.972831101251521e-05, | |
| "loss": 2.7793, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.08269458192808024, | |
| "grad_norm": 0.6039514541625977, | |
| "learning_rate": 9.971611334532314e-05, | |
| "loss": 2.7669, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08339538346984364, | |
| "grad_norm": 0.5824711918830872, | |
| "learning_rate": 9.970364862499852e-05, | |
| "loss": 2.7476, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.08409618501160702, | |
| "grad_norm": 0.6831758618354797, | |
| "learning_rate": 9.969091691849637e-05, | |
| "loss": 2.7098, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08479698655337042, | |
| "grad_norm": 0.6469074487686157, | |
| "learning_rate": 9.967791829420581e-05, | |
| "loss": 2.7609, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.08549778809513381, | |
| "grad_norm": 0.5876832604408264, | |
| "learning_rate": 9.966465282194976e-05, | |
| "loss": 2.7306, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.08619858963689721, | |
| "grad_norm": 0.6310129761695862, | |
| "learning_rate": 9.965112057298451e-05, | |
| "loss": 2.7283, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.08689939117866059, | |
| "grad_norm": 0.6113069653511047, | |
| "learning_rate": 9.963732161999935e-05, | |
| "loss": 2.7274, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.08760019272042399, | |
| "grad_norm": 1.0655111074447632, | |
| "learning_rate": 9.96232560371162e-05, | |
| "loss": 2.7022, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08830099426218738, | |
| "grad_norm": 0.8412613272666931, | |
| "learning_rate": 9.960892389988918e-05, | |
| "loss": 2.7213, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.08900179580395078, | |
| "grad_norm": 0.7329776883125305, | |
| "learning_rate": 9.959432528530428e-05, | |
| "loss": 2.7343, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.08970259734571416, | |
| "grad_norm": 0.702498197555542, | |
| "learning_rate": 9.95794602717788e-05, | |
| "loss": 2.7642, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.09040339888747755, | |
| "grad_norm": 0.6936408281326294, | |
| "learning_rate": 9.95643289391611e-05, | |
| "loss": 2.7081, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.09110420042924094, | |
| "grad_norm": 0.664743959903717, | |
| "learning_rate": 9.954893136873005e-05, | |
| "loss": 2.7054, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09180500197100433, | |
| "grad_norm": 0.5716791152954102, | |
| "learning_rate": 9.953326764319463e-05, | |
| "loss": 2.6751, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.09250580351276773, | |
| "grad_norm": 0.6207195520401001, | |
| "learning_rate": 9.95173378466935e-05, | |
| "loss": 2.6945, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.09320660505453111, | |
| "grad_norm": 0.6572092771530151, | |
| "learning_rate": 9.950114206479453e-05, | |
| "loss": 2.6989, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.09390740659629451, | |
| "grad_norm": 0.7676830887794495, | |
| "learning_rate": 9.948468038449435e-05, | |
| "loss": 2.7613, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.0946082081380579, | |
| "grad_norm": 0.5810503959655762, | |
| "learning_rate": 9.946795289421787e-05, | |
| "loss": 2.7234, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0953090096798213, | |
| "grad_norm": 0.6459682583808899, | |
| "learning_rate": 9.945095968381784e-05, | |
| "loss": 2.717, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.09600981122158468, | |
| "grad_norm": 0.6498464345932007, | |
| "learning_rate": 9.94337008445743e-05, | |
| "loss": 2.7389, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.09671061276334808, | |
| "grad_norm": 0.6287350654602051, | |
| "learning_rate": 9.941617646919421e-05, | |
| "loss": 2.681, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.09741141430511147, | |
| "grad_norm": 0.7516258955001831, | |
| "learning_rate": 9.939838665181076e-05, | |
| "loss": 2.6696, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.09811221584687486, | |
| "grad_norm": 0.6962350606918335, | |
| "learning_rate": 9.938033148798307e-05, | |
| "loss": 2.6971, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09881301738863825, | |
| "grad_norm": 0.6605144739151001, | |
| "learning_rate": 9.936201107469555e-05, | |
| "loss": 2.6999, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.09951381893040165, | |
| "grad_norm": 0.5991240739822388, | |
| "learning_rate": 9.93434255103574e-05, | |
| "loss": 2.6936, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.10021462047216503, | |
| "grad_norm": 0.5660961866378784, | |
| "learning_rate": 9.932457489480213e-05, | |
| "loss": 2.686, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.10091542201392843, | |
| "grad_norm": 0.690290093421936, | |
| "learning_rate": 9.930545932928698e-05, | |
| "loss": 2.6809, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.10161622355569182, | |
| "grad_norm": 0.7119167447090149, | |
| "learning_rate": 9.928607891649234e-05, | |
| "loss": 2.7221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10231702509745522, | |
| "grad_norm": 0.7049365639686584, | |
| "learning_rate": 9.926643376052131e-05, | |
| "loss": 2.6569, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1030178266392186, | |
| "grad_norm": 0.6691743731498718, | |
| "learning_rate": 9.924652396689902e-05, | |
| "loss": 2.6751, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.103718628180982, | |
| "grad_norm": 0.5533433556556702, | |
| "learning_rate": 9.922634964257215e-05, | |
| "loss": 2.7064, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.10441942972274539, | |
| "grad_norm": 0.6669672727584839, | |
| "learning_rate": 9.920591089590831e-05, | |
| "loss": 2.687, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.10512023126450878, | |
| "grad_norm": 0.8539720773696899, | |
| "learning_rate": 9.918520783669549e-05, | |
| "loss": 2.6968, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10582103280627217, | |
| "grad_norm": 0.827905535697937, | |
| "learning_rate": 9.916424057614142e-05, | |
| "loss": 2.7339, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.10652183434803557, | |
| "grad_norm": 0.7071542143821716, | |
| "learning_rate": 9.9143009226873e-05, | |
| "loss": 2.67, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.10722263588979895, | |
| "grad_norm": 0.6667853593826294, | |
| "learning_rate": 9.912151390293575e-05, | |
| "loss": 2.7113, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.10792343743156235, | |
| "grad_norm": 0.49210044741630554, | |
| "learning_rate": 9.90997547197931e-05, | |
| "loss": 2.7034, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.10862423897332574, | |
| "grad_norm": 0.5823047757148743, | |
| "learning_rate": 9.907773179432581e-05, | |
| "loss": 2.6815, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.10932504051508914, | |
| "grad_norm": 0.5159279704093933, | |
| "learning_rate": 9.905544524483138e-05, | |
| "loss": 2.7055, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.11002584205685252, | |
| "grad_norm": 0.5294278264045715, | |
| "learning_rate": 9.903289519102338e-05, | |
| "loss": 2.6821, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.11072664359861592, | |
| "grad_norm": 0.5865507125854492, | |
| "learning_rate": 9.901008175403078e-05, | |
| "loss": 2.698, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1114274451403793, | |
| "grad_norm": 0.7102755904197693, | |
| "learning_rate": 9.898700505639735e-05, | |
| "loss": 2.693, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1121282466821427, | |
| "grad_norm": 0.8151699900627136, | |
| "learning_rate": 9.8963665222081e-05, | |
| "loss": 2.6482, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11282904822390609, | |
| "grad_norm": 0.5769193172454834, | |
| "learning_rate": 9.894006237645304e-05, | |
| "loss": 2.6893, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.11352984976566949, | |
| "grad_norm": 0.6606284976005554, | |
| "learning_rate": 9.891619664629762e-05, | |
| "loss": 2.6859, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.11423065130743287, | |
| "grad_norm": 0.5883016586303711, | |
| "learning_rate": 9.889206815981094e-05, | |
| "loss": 2.6622, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.11493145284919627, | |
| "grad_norm": 0.5413339734077454, | |
| "learning_rate": 9.886767704660067e-05, | |
| "loss": 2.6718, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.11563225439095966, | |
| "grad_norm": 0.7391770482063293, | |
| "learning_rate": 9.884302343768512e-05, | |
| "loss": 2.6695, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11633305593272306, | |
| "grad_norm": 0.7529366612434387, | |
| "learning_rate": 9.881810746549267e-05, | |
| "loss": 2.7341, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.11703385747448644, | |
| "grad_norm": 0.6971571445465088, | |
| "learning_rate": 9.8792929263861e-05, | |
| "loss": 2.6444, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.11773465901624984, | |
| "grad_norm": 0.544129490852356, | |
| "learning_rate": 9.876748896803633e-05, | |
| "loss": 2.7351, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.11843546055801323, | |
| "grad_norm": 0.6561135649681091, | |
| "learning_rate": 9.874178671467277e-05, | |
| "loss": 2.6896, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.11913626209977662, | |
| "grad_norm": 0.6607089042663574, | |
| "learning_rate": 9.871582264183155e-05, | |
| "loss": 2.6664, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11983706364154001, | |
| "grad_norm": 0.6727411150932312, | |
| "learning_rate": 9.868959688898023e-05, | |
| "loss": 2.68, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.12053786518330341, | |
| "grad_norm": 0.5672718286514282, | |
| "learning_rate": 9.86631095969921e-05, | |
| "loss": 2.6639, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1212386667250668, | |
| "grad_norm": 0.7188961505889893, | |
| "learning_rate": 9.86363609081452e-05, | |
| "loss": 2.6604, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.12193946826683019, | |
| "grad_norm": 0.9785953760147095, | |
| "learning_rate": 9.86093509661218e-05, | |
| "loss": 2.6557, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.12264026980859358, | |
| "grad_norm": 0.7856999635696411, | |
| "learning_rate": 9.85820799160074e-05, | |
| "loss": 2.6418, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12334107135035698, | |
| "grad_norm": 0.5956946015357971, | |
| "learning_rate": 9.855454790429015e-05, | |
| "loss": 2.658, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.12404187289212036, | |
| "grad_norm": 0.6523074507713318, | |
| "learning_rate": 9.852675507885991e-05, | |
| "loss": 2.6743, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.12474267443388376, | |
| "grad_norm": 0.71266108751297, | |
| "learning_rate": 9.849870158900753e-05, | |
| "loss": 2.6805, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.12544347597564715, | |
| "grad_norm": 0.5674154162406921, | |
| "learning_rate": 9.847038758542404e-05, | |
| "loss": 2.6678, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.12614427751741053, | |
| "grad_norm": 0.5430511236190796, | |
| "learning_rate": 9.844181322019983e-05, | |
| "loss": 2.643, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12684507905917394, | |
| "grad_norm": 0.508791983127594, | |
| "learning_rate": 9.841297864682388e-05, | |
| "loss": 2.6524, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.12754588060093733, | |
| "grad_norm": 0.6082713603973389, | |
| "learning_rate": 9.838388402018282e-05, | |
| "loss": 2.6892, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1282466821427007, | |
| "grad_norm": 0.6065689325332642, | |
| "learning_rate": 9.835452949656022e-05, | |
| "loss": 2.6083, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1289474836844641, | |
| "grad_norm": 0.5220572352409363, | |
| "learning_rate": 9.83249152336357e-05, | |
| "loss": 2.6573, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1296482852262275, | |
| "grad_norm": 0.568534791469574, | |
| "learning_rate": 9.829504139048406e-05, | |
| "loss": 2.6266, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1303490867679909, | |
| "grad_norm": 0.6165401339530945, | |
| "learning_rate": 9.826490812757452e-05, | |
| "loss": 2.6928, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.13104988830975428, | |
| "grad_norm": 0.5951835513114929, | |
| "learning_rate": 9.823451560676966e-05, | |
| "loss": 2.6468, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.13175068985151767, | |
| "grad_norm": 0.4942519962787628, | |
| "learning_rate": 9.820386399132482e-05, | |
| "loss": 2.6493, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.13245149139328108, | |
| "grad_norm": 0.6185161471366882, | |
| "learning_rate": 9.8172953445887e-05, | |
| "loss": 2.6741, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.13315229293504446, | |
| "grad_norm": 0.5588895678520203, | |
| "learning_rate": 9.814178413649407e-05, | |
| "loss": 2.6393, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.13385309447680785, | |
| "grad_norm": 0.6289598941802979, | |
| "learning_rate": 9.811035623057387e-05, | |
| "loss": 2.6022, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.13455389601857123, | |
| "grad_norm": 0.6258370280265808, | |
| "learning_rate": 9.807866989694334e-05, | |
| "loss": 2.6033, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.13525469756033462, | |
| "grad_norm": 0.6390899419784546, | |
| "learning_rate": 9.804672530580754e-05, | |
| "loss": 2.6413, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.13595549910209803, | |
| "grad_norm": 0.6844115257263184, | |
| "learning_rate": 9.801452262875877e-05, | |
| "loss": 2.6339, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.13665630064386142, | |
| "grad_norm": 0.70540452003479, | |
| "learning_rate": 9.798206203877569e-05, | |
| "loss": 2.6471, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1373571021856248, | |
| "grad_norm": 0.7336652278900146, | |
| "learning_rate": 9.794934371022233e-05, | |
| "loss": 2.6348, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1380579037273882, | |
| "grad_norm": 0.7155029773712158, | |
| "learning_rate": 9.79163678188472e-05, | |
| "loss": 2.6128, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1387587052691516, | |
| "grad_norm": 0.6354189515113831, | |
| "learning_rate": 9.788313454178228e-05, | |
| "loss": 2.6281, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.13945950681091498, | |
| "grad_norm": 0.596047043800354, | |
| "learning_rate": 9.78496440575422e-05, | |
| "loss": 2.6719, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.14016030835267837, | |
| "grad_norm": 0.6149719953536987, | |
| "learning_rate": 9.781589654602306e-05, | |
| "loss": 2.625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14086110989444176, | |
| "grad_norm": 0.6066911816596985, | |
| "learning_rate": 9.778189218850174e-05, | |
| "loss": 2.6193, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.14156191143620517, | |
| "grad_norm": 0.5690994262695312, | |
| "learning_rate": 9.774763116763466e-05, | |
| "loss": 2.6239, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.14226271297796855, | |
| "grad_norm": 0.532486081123352, | |
| "learning_rate": 9.771311366745703e-05, | |
| "loss": 2.6264, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.14296351451973194, | |
| "grad_norm": 0.5434598326683044, | |
| "learning_rate": 9.767833987338171e-05, | |
| "loss": 2.6534, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.14366431606149532, | |
| "grad_norm": 0.522413432598114, | |
| "learning_rate": 9.764330997219822e-05, | |
| "loss": 2.6468, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14436511760325874, | |
| "grad_norm": 0.5612457990646362, | |
| "learning_rate": 9.760802415207181e-05, | |
| "loss": 2.6307, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.14506591914502212, | |
| "grad_norm": 0.5850318670272827, | |
| "learning_rate": 9.757248260254244e-05, | |
| "loss": 2.6324, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1457667206867855, | |
| "grad_norm": 0.688555121421814, | |
| "learning_rate": 9.753668551452368e-05, | |
| "loss": 2.6066, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1464675222285489, | |
| "grad_norm": 0.6506465077400208, | |
| "learning_rate": 9.750063308030179e-05, | |
| "loss": 2.5964, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1471683237703123, | |
| "grad_norm": 0.6529019474983215, | |
| "learning_rate": 9.746432549353462e-05, | |
| "loss": 2.651, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1478691253120757, | |
| "grad_norm": 0.5469995141029358, | |
| "learning_rate": 9.742776294925058e-05, | |
| "loss": 2.6129, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.14856992685383907, | |
| "grad_norm": 0.4992043673992157, | |
| "learning_rate": 9.739094564384758e-05, | |
| "loss": 2.6074, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.14927072839560246, | |
| "grad_norm": 0.5064156651496887, | |
| "learning_rate": 9.735387377509206e-05, | |
| "loss": 2.6408, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.14997152993736587, | |
| "grad_norm": 0.5961376428604126, | |
| "learning_rate": 9.731654754211781e-05, | |
| "loss": 2.615, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.15067233147912926, | |
| "grad_norm": 0.5533669590950012, | |
| "learning_rate": 9.727896714542494e-05, | |
| "loss": 2.6225, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15137313302089264, | |
| "grad_norm": 0.5527905821800232, | |
| "learning_rate": 9.724113278687888e-05, | |
| "loss": 2.5836, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.15207393456265603, | |
| "grad_norm": 0.4616098701953888, | |
| "learning_rate": 9.720304466970916e-05, | |
| "loss": 2.6236, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.15277473610441944, | |
| "grad_norm": 0.5189539790153503, | |
| "learning_rate": 9.716470299850844e-05, | |
| "loss": 2.6364, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.15347553764618282, | |
| "grad_norm": 0.5303817987442017, | |
| "learning_rate": 9.712610797923133e-05, | |
| "loss": 2.6097, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1541763391879462, | |
| "grad_norm": 0.5957894921302795, | |
| "learning_rate": 9.708725981919333e-05, | |
| "loss": 2.5749, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1548771407297096, | |
| "grad_norm": 0.5686895251274109, | |
| "learning_rate": 9.704815872706972e-05, | |
| "loss": 2.6319, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.155577942271473, | |
| "grad_norm": 0.5570897459983826, | |
| "learning_rate": 9.700880491289438e-05, | |
| "loss": 2.6287, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1562787438132364, | |
| "grad_norm": 0.5330969095230103, | |
| "learning_rate": 9.696919858805873e-05, | |
| "loss": 2.6014, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.15697954535499978, | |
| "grad_norm": 0.4891030192375183, | |
| "learning_rate": 9.692933996531053e-05, | |
| "loss": 2.6097, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.15768034689676316, | |
| "grad_norm": 0.5465073585510254, | |
| "learning_rate": 9.688922925875285e-05, | |
| "loss": 2.6162, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15838114843852658, | |
| "grad_norm": 0.5483290553092957, | |
| "learning_rate": 9.684886668384277e-05, | |
| "loss": 2.5999, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.15908194998028996, | |
| "grad_norm": 0.6061928868293762, | |
| "learning_rate": 9.68082524573903e-05, | |
| "loss": 2.6614, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.15978275152205335, | |
| "grad_norm": 0.5806353688240051, | |
| "learning_rate": 9.676738679755726e-05, | |
| "loss": 2.6039, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.16048355306381673, | |
| "grad_norm": 0.5722226500511169, | |
| "learning_rate": 9.672626992385602e-05, | |
| "loss": 2.6529, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.16118435460558014, | |
| "grad_norm": 0.5939204096794128, | |
| "learning_rate": 9.668490205714839e-05, | |
| "loss": 2.6314, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.16188515614734353, | |
| "grad_norm": 0.7260386943817139, | |
| "learning_rate": 9.664328341964436e-05, | |
| "loss": 2.6211, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.1625859576891069, | |
| "grad_norm": 0.8503554463386536, | |
| "learning_rate": 9.6601414234901e-05, | |
| "loss": 2.6134, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.1632867592308703, | |
| "grad_norm": 0.5818518996238708, | |
| "learning_rate": 9.655929472782116e-05, | |
| "loss": 2.5667, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1639875607726337, | |
| "grad_norm": 0.5678598284721375, | |
| "learning_rate": 9.651692512465239e-05, | |
| "loss": 2.6153, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.1646883623143971, | |
| "grad_norm": 0.5939005613327026, | |
| "learning_rate": 9.647430565298555e-05, | |
| "loss": 2.6098, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.16538916385616048, | |
| "grad_norm": 0.5300047993659973, | |
| "learning_rate": 9.643143654175373e-05, | |
| "loss": 2.6167, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.16608996539792387, | |
| "grad_norm": 0.4946250319480896, | |
| "learning_rate": 9.638831802123101e-05, | |
| "loss": 2.581, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.16679076693968728, | |
| "grad_norm": 0.4555206000804901, | |
| "learning_rate": 9.634495032303111e-05, | |
| "loss": 2.588, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.16749156848145066, | |
| "grad_norm": 0.5159677267074585, | |
| "learning_rate": 9.630133368010628e-05, | |
| "loss": 2.5868, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.16819237002321405, | |
| "grad_norm": 0.5565433502197266, | |
| "learning_rate": 9.625746832674597e-05, | |
| "loss": 2.6185, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16889317156497743, | |
| "grad_norm": 0.4775915741920471, | |
| "learning_rate": 9.621335449857562e-05, | |
| "loss": 2.5897, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.16959397310674085, | |
| "grad_norm": 0.5150102376937866, | |
| "learning_rate": 9.616899243255532e-05, | |
| "loss": 2.5478, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.17029477464850423, | |
| "grad_norm": 0.48455357551574707, | |
| "learning_rate": 9.612438236697863e-05, | |
| "loss": 2.5639, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.17099557619026762, | |
| "grad_norm": 0.5149878859519958, | |
| "learning_rate": 9.607952454147121e-05, | |
| "loss": 2.599, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.171696377732031, | |
| "grad_norm": 0.6969982385635376, | |
| "learning_rate": 9.603441919698963e-05, | |
| "loss": 2.5733, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17239717927379442, | |
| "grad_norm": 0.57285475730896, | |
| "learning_rate": 9.598906657582e-05, | |
| "loss": 2.5791, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.1730979808155578, | |
| "grad_norm": 0.5704159140586853, | |
| "learning_rate": 9.594346692157667e-05, | |
| "loss": 2.5692, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.17379878235732119, | |
| "grad_norm": 0.681797444820404, | |
| "learning_rate": 9.589762047920096e-05, | |
| "loss": 2.5759, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.17449958389908457, | |
| "grad_norm": 0.49717003107070923, | |
| "learning_rate": 9.585152749495984e-05, | |
| "loss": 2.5848, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.17520038544084798, | |
| "grad_norm": 0.48680582642555237, | |
| "learning_rate": 9.580518821644457e-05, | |
| "loss": 2.5682, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17590118698261137, | |
| "grad_norm": 0.5525830388069153, | |
| "learning_rate": 9.575860289256943e-05, | |
| "loss": 2.5894, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.17660198852437475, | |
| "grad_norm": 0.5562606453895569, | |
| "learning_rate": 9.571177177357032e-05, | |
| "loss": 2.5675, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.17730279006613814, | |
| "grad_norm": 0.5515877604484558, | |
| "learning_rate": 9.566469511100345e-05, | |
| "loss": 2.5877, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.17800359160790155, | |
| "grad_norm": 0.6816357970237732, | |
| "learning_rate": 9.561737315774398e-05, | |
| "loss": 2.596, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.17870439314966494, | |
| "grad_norm": 0.507437527179718, | |
| "learning_rate": 9.556980616798463e-05, | |
| "loss": 2.5721, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17940519469142832, | |
| "grad_norm": 0.5275202989578247, | |
| "learning_rate": 9.552199439723443e-05, | |
| "loss": 2.568, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.1801059962331917, | |
| "grad_norm": 0.5467104911804199, | |
| "learning_rate": 9.547393810231722e-05, | |
| "loss": 2.5842, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.1808067977749551, | |
| "grad_norm": 0.5407027006149292, | |
| "learning_rate": 9.542563754137031e-05, | |
| "loss": 2.5891, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.1815075993167185, | |
| "grad_norm": 0.5731847882270813, | |
| "learning_rate": 9.537709297384308e-05, | |
| "loss": 2.6143, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.1822084008584819, | |
| "grad_norm": 0.566457986831665, | |
| "learning_rate": 9.532830466049565e-05, | |
| "loss": 2.5522, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.18290920240024527, | |
| "grad_norm": 0.4899183213710785, | |
| "learning_rate": 9.527927286339744e-05, | |
| "loss": 2.5961, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.18361000394200866, | |
| "grad_norm": 0.4883110523223877, | |
| "learning_rate": 9.52299978459257e-05, | |
| "loss": 2.5557, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.18431080548377207, | |
| "grad_norm": 0.5534235239028931, | |
| "learning_rate": 9.518047987276421e-05, | |
| "loss": 2.6452, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.18501160702553546, | |
| "grad_norm": 0.47292667627334595, | |
| "learning_rate": 9.513071920990179e-05, | |
| "loss": 2.5848, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.18571240856729884, | |
| "grad_norm": 0.5438964366912842, | |
| "learning_rate": 9.508071612463086e-05, | |
| "loss": 2.5332, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.18641321010906223, | |
| "grad_norm": 0.5318060517311096, | |
| "learning_rate": 9.503047088554601e-05, | |
| "loss": 2.585, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.18711401165082564, | |
| "grad_norm": 0.49279502034187317, | |
| "learning_rate": 9.497998376254267e-05, | |
| "loss": 2.5948, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.18781481319258903, | |
| "grad_norm": 0.5161717534065247, | |
| "learning_rate": 9.492925502681545e-05, | |
| "loss": 2.5644, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.1885156147343524, | |
| "grad_norm": 0.4586479663848877, | |
| "learning_rate": 9.487828495085684e-05, | |
| "loss": 2.5568, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.1892164162761158, | |
| "grad_norm": 0.4390322268009186, | |
| "learning_rate": 9.482707380845573e-05, | |
| "loss": 2.5938, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1899172178178792, | |
| "grad_norm": 0.5253728628158569, | |
| "learning_rate": 9.47756218746959e-05, | |
| "loss": 2.5996, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.1906180193596426, | |
| "grad_norm": 0.4567623436450958, | |
| "learning_rate": 9.472392942595454e-05, | |
| "loss": 2.5576, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.19131882090140598, | |
| "grad_norm": 0.5091727375984192, | |
| "learning_rate": 9.467199673990077e-05, | |
| "loss": 2.5873, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.19201962244316936, | |
| "grad_norm": 0.4959392845630646, | |
| "learning_rate": 9.46198240954942e-05, | |
| "loss": 2.5291, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.19272042398493278, | |
| "grad_norm": 0.5150632262229919, | |
| "learning_rate": 9.456741177298336e-05, | |
| "loss": 2.5503, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.19342122552669616, | |
| "grad_norm": 0.4603368639945984, | |
| "learning_rate": 9.451476005390422e-05, | |
| "loss": 2.5785, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.19412202706845955, | |
| "grad_norm": 0.4441729784011841, | |
| "learning_rate": 9.446186922107873e-05, | |
| "loss": 2.5512, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.19482282861022293, | |
| "grad_norm": 0.5432455539703369, | |
| "learning_rate": 9.44087395586132e-05, | |
| "loss": 2.5741, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.19552363015198634, | |
| "grad_norm": 0.42969366908073425, | |
| "learning_rate": 9.435537135189687e-05, | |
| "loss": 2.5677, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.19622443169374973, | |
| "grad_norm": 0.5706619620323181, | |
| "learning_rate": 9.430176488760027e-05, | |
| "loss": 2.556, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.19692523323551311, | |
| "grad_norm": 0.7202513217926025, | |
| "learning_rate": 9.424792045367383e-05, | |
| "loss": 2.5435, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.1976260347772765, | |
| "grad_norm": 0.5471363663673401, | |
| "learning_rate": 9.419383833934621e-05, | |
| "loss": 2.572, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.1983268363190399, | |
| "grad_norm": 0.654058575630188, | |
| "learning_rate": 9.413951883512275e-05, | |
| "loss": 2.5432, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.1990276378608033, | |
| "grad_norm": 0.6124361157417297, | |
| "learning_rate": 9.408496223278403e-05, | |
| "loss": 2.5803, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.19972843940256668, | |
| "grad_norm": 0.5291132926940918, | |
| "learning_rate": 9.403016882538408e-05, | |
| "loss": 2.576, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20042924094433007, | |
| "grad_norm": 0.6087374687194824, | |
| "learning_rate": 9.397513890724911e-05, | |
| "loss": 2.5171, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.20113004248609348, | |
| "grad_norm": 0.5776922106742859, | |
| "learning_rate": 9.391987277397566e-05, | |
| "loss": 2.6054, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.20183084402785686, | |
| "grad_norm": 0.544319748878479, | |
| "learning_rate": 9.38643707224291e-05, | |
| "loss": 2.548, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 0.5210007429122925, | |
| "learning_rate": 9.38086330507421e-05, | |
| "loss": 2.6019, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.20323244711138364, | |
| "grad_norm": 0.5160629153251648, | |
| "learning_rate": 9.375266005831297e-05, | |
| "loss": 2.6046, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.20393324865314705, | |
| "grad_norm": 0.6452796459197998, | |
| "learning_rate": 9.369645204580403e-05, | |
| "loss": 2.566, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.20463405019491043, | |
| "grad_norm": 0.5813329815864563, | |
| "learning_rate": 9.364000931514008e-05, | |
| "loss": 2.5661, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.20533485173667382, | |
| "grad_norm": 0.5450593829154968, | |
| "learning_rate": 9.358333216950664e-05, | |
| "loss": 2.5769, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2060356532784372, | |
| "grad_norm": 0.5340794324874878, | |
| "learning_rate": 9.352642091334849e-05, | |
| "loss": 2.5549, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.20673645482020062, | |
| "grad_norm": 0.5767348408699036, | |
| "learning_rate": 9.34692758523679e-05, | |
| "loss": 2.5604, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.207437256361964, | |
| "grad_norm": 0.6048093438148499, | |
| "learning_rate": 9.341189729352302e-05, | |
| "loss": 2.5929, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.20813805790372739, | |
| "grad_norm": 0.4430505335330963, | |
| "learning_rate": 9.33542855450263e-05, | |
| "loss": 2.5563, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.20883885944549077, | |
| "grad_norm": 0.49373888969421387, | |
| "learning_rate": 9.329644091634278e-05, | |
| "loss": 2.5517, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.20953966098725418, | |
| "grad_norm": 0.5227393507957458, | |
| "learning_rate": 9.323836371818837e-05, | |
| "loss": 2.5286, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.21024046252901757, | |
| "grad_norm": 0.497405081987381, | |
| "learning_rate": 9.318005426252832e-05, | |
| "loss": 2.5638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21094126407078095, | |
| "grad_norm": 0.48721396923065186, | |
| "learning_rate": 9.312151286257537e-05, | |
| "loss": 2.5751, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.21164206561254434, | |
| "grad_norm": 0.4621741771697998, | |
| "learning_rate": 9.306273983278825e-05, | |
| "loss": 2.5654, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.21234286715430775, | |
| "grad_norm": 0.4756307005882263, | |
| "learning_rate": 9.300373548886987e-05, | |
| "loss": 2.5989, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.21304366869607114, | |
| "grad_norm": 0.42497771978378296, | |
| "learning_rate": 9.294450014776566e-05, | |
| "loss": 2.564, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.21374447023783452, | |
| "grad_norm": 0.5173219442367554, | |
| "learning_rate": 9.288503412766185e-05, | |
| "loss": 2.5296, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2144452717795979, | |
| "grad_norm": 0.4622451066970825, | |
| "learning_rate": 9.28253377479838e-05, | |
| "loss": 2.5829, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.21514607332136132, | |
| "grad_norm": 0.5879294276237488, | |
| "learning_rate": 9.276541132939428e-05, | |
| "loss": 2.5462, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.2158468748631247, | |
| "grad_norm": 0.6237635612487793, | |
| "learning_rate": 9.270525519379165e-05, | |
| "loss": 2.6143, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.2165476764048881, | |
| "grad_norm": 0.5845280289649963, | |
| "learning_rate": 9.264486966430829e-05, | |
| "loss": 2.5272, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.21724847794665147, | |
| "grad_norm": 0.5140432715415955, | |
| "learning_rate": 9.258425506530872e-05, | |
| "loss": 2.5716, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2179492794884149, | |
| "grad_norm": 0.5868300199508667, | |
| "learning_rate": 9.2523411722388e-05, | |
| "loss": 2.5699, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.21865008103017827, | |
| "grad_norm": 0.587374210357666, | |
| "learning_rate": 9.246233996236983e-05, | |
| "loss": 2.5335, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.21935088257194166, | |
| "grad_norm": 0.5000743865966797, | |
| "learning_rate": 9.240104011330489e-05, | |
| "loss": 2.5367, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.22005168411370504, | |
| "grad_norm": 0.5124289393424988, | |
| "learning_rate": 9.233951250446902e-05, | |
| "loss": 2.5598, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.22075248565546846, | |
| "grad_norm": 0.4815032482147217, | |
| "learning_rate": 9.227775746636158e-05, | |
| "loss": 2.5468, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.22145328719723184, | |
| "grad_norm": 0.5089353919029236, | |
| "learning_rate": 9.22157753307035e-05, | |
| "loss": 2.5482, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.22215408873899523, | |
| "grad_norm": 0.468841552734375, | |
| "learning_rate": 9.215356643043559e-05, | |
| "loss": 2.5138, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.2228548902807586, | |
| "grad_norm": 0.511968731880188, | |
| "learning_rate": 9.209113109971676e-05, | |
| "loss": 2.5481, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.223555691822522, | |
| "grad_norm": 0.6082082390785217, | |
| "learning_rate": 9.202846967392217e-05, | |
| "loss": 2.5459, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.2242564933642854, | |
| "grad_norm": 0.4931623637676239, | |
| "learning_rate": 9.196558248964151e-05, | |
| "loss": 2.5785, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2249572949060488, | |
| "grad_norm": 0.5754916071891785, | |
| "learning_rate": 9.190246988467712e-05, | |
| "loss": 2.5166, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.22565809644781218, | |
| "grad_norm": 0.5335285067558289, | |
| "learning_rate": 9.183913219804221e-05, | |
| "loss": 2.4976, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.22635889798957556, | |
| "grad_norm": 0.4676333963871002, | |
| "learning_rate": 9.1775569769959e-05, | |
| "loss": 2.5361, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.22705969953133898, | |
| "grad_norm": 0.48826783895492554, | |
| "learning_rate": 9.171178294185697e-05, | |
| "loss": 2.5347, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.22776050107310236, | |
| "grad_norm": 0.509066104888916, | |
| "learning_rate": 9.164777205637094e-05, | |
| "loss": 2.5326, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22846130261486575, | |
| "grad_norm": 0.5001896619796753, | |
| "learning_rate": 9.158353745733927e-05, | |
| "loss": 2.5605, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.22916210415662913, | |
| "grad_norm": 0.5497420430183411, | |
| "learning_rate": 9.151907948980206e-05, | |
| "loss": 2.5295, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.22986290569839254, | |
| "grad_norm": 0.43462875485420227, | |
| "learning_rate": 9.145439849999919e-05, | |
| "loss": 2.5358, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.23056370724015593, | |
| "grad_norm": 0.5398270487785339, | |
| "learning_rate": 9.138949483536852e-05, | |
| "loss": 2.5464, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.23126450878191931, | |
| "grad_norm": 0.5165109038352966, | |
| "learning_rate": 9.132436884454408e-05, | |
| "loss": 2.5043, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2319653103236827, | |
| "grad_norm": 0.6717212200164795, | |
| "learning_rate": 9.125902087735407e-05, | |
| "loss": 2.547, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2326661118654461, | |
| "grad_norm": 0.4584912061691284, | |
| "learning_rate": 9.119345128481909e-05, | |
| "loss": 2.5106, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2333669134072095, | |
| "grad_norm": 0.5452204942703247, | |
| "learning_rate": 9.112766041915019e-05, | |
| "loss": 2.5189, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.23406771494897288, | |
| "grad_norm": 0.5055968761444092, | |
| "learning_rate": 9.106164863374702e-05, | |
| "loss": 2.4957, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.23476851649073627, | |
| "grad_norm": 0.4905461072921753, | |
| "learning_rate": 9.099541628319592e-05, | |
| "loss": 2.5523, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.23546931803249968, | |
| "grad_norm": 0.44840848445892334, | |
| "learning_rate": 9.092896372326798e-05, | |
| "loss": 2.4713, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.23617011957426307, | |
| "grad_norm": 0.46489134430885315, | |
| "learning_rate": 9.086229131091717e-05, | |
| "loss": 2.5071, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.23687092111602645, | |
| "grad_norm": 0.4460737705230713, | |
| "learning_rate": 9.079539940427845e-05, | |
| "loss": 2.5799, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.23757172265778984, | |
| "grad_norm": 0.5268511176109314, | |
| "learning_rate": 9.072828836266574e-05, | |
| "loss": 2.5574, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.23827252419955325, | |
| "grad_norm": 0.5001477003097534, | |
| "learning_rate": 9.066095854657011e-05, | |
| "loss": 2.5117, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.23897332574131663, | |
| "grad_norm": 0.5136899352073669, | |
| "learning_rate": 9.059341031765773e-05, | |
| "loss": 2.4855, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.23967412728308002, | |
| "grad_norm": 0.5532418489456177, | |
| "learning_rate": 9.052564403876808e-05, | |
| "loss": 2.5623, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.2403749288248434, | |
| "grad_norm": 0.4908037483692169, | |
| "learning_rate": 9.045766007391185e-05, | |
| "loss": 2.5248, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.24107573036660682, | |
| "grad_norm": 0.45994317531585693, | |
| "learning_rate": 9.038945878826903e-05, | |
| "loss": 2.5007, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.2417765319083702, | |
| "grad_norm": 0.5593565702438354, | |
| "learning_rate": 9.032104054818698e-05, | |
| "loss": 2.5759, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2424773334501336, | |
| "grad_norm": 0.5076695084571838, | |
| "learning_rate": 9.025240572117846e-05, | |
| "loss": 2.5272, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.24317813499189697, | |
| "grad_norm": 0.3996141850948334, | |
| "learning_rate": 9.018355467591962e-05, | |
| "loss": 2.5317, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.24387893653366038, | |
| "grad_norm": 0.49347859621047974, | |
| "learning_rate": 9.011448778224802e-05, | |
| "loss": 2.5186, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.24457973807542377, | |
| "grad_norm": 0.5040503144264221, | |
| "learning_rate": 9.004520541116075e-05, | |
| "loss": 2.5015, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.24528053961718715, | |
| "grad_norm": 0.4658913016319275, | |
| "learning_rate": 8.997570793481223e-05, | |
| "loss": 2.5481, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24598134115895054, | |
| "grad_norm": 0.47850051522254944, | |
| "learning_rate": 8.990599572651242e-05, | |
| "loss": 2.5505, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.24668214270071395, | |
| "grad_norm": 0.48090964555740356, | |
| "learning_rate": 8.983606916072469e-05, | |
| "loss": 2.5669, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.24738294424247734, | |
| "grad_norm": 0.5716775059700012, | |
| "learning_rate": 8.976592861306384e-05, | |
| "loss": 2.523, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.24808374578424072, | |
| "grad_norm": 0.49985334277153015, | |
| "learning_rate": 8.969557446029409e-05, | |
| "loss": 2.5439, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2487845473260041, | |
| "grad_norm": 0.6331408023834229, | |
| "learning_rate": 8.962500708032708e-05, | |
| "loss": 2.5601, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.24948534886776752, | |
| "grad_norm": 0.5418590307235718, | |
| "learning_rate": 8.955422685221979e-05, | |
| "loss": 2.5495, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2501861504095309, | |
| "grad_norm": 0.5396260619163513, | |
| "learning_rate": 8.948323415617253e-05, | |
| "loss": 2.5151, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.2508869519512943, | |
| "grad_norm": 0.5641499161720276, | |
| "learning_rate": 8.941202937352686e-05, | |
| "loss": 2.4895, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2515877534930577, | |
| "grad_norm": 0.47651517391204834, | |
| "learning_rate": 8.934061288676365e-05, | |
| "loss": 2.5634, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.25228855503482106, | |
| "grad_norm": 0.5351449251174927, | |
| "learning_rate": 8.92689850795009e-05, | |
| "loss": 2.4804, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.25298935657658445, | |
| "grad_norm": 0.5856335759162903, | |
| "learning_rate": 8.919714633649172e-05, | |
| "loss": 2.5304, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.2536901581183479, | |
| "grad_norm": 0.4513723850250244, | |
| "learning_rate": 8.912509704362232e-05, | |
| "loss": 2.5369, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.25439095966011127, | |
| "grad_norm": 0.4676707983016968, | |
| "learning_rate": 8.905283758790985e-05, | |
| "loss": 2.5589, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.25509176120187466, | |
| "grad_norm": 0.5069173574447632, | |
| "learning_rate": 8.89803683575004e-05, | |
| "loss": 2.4958, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.25579256274363804, | |
| "grad_norm": 0.4774676263332367, | |
| "learning_rate": 8.890768974166685e-05, | |
| "loss": 2.5229, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2564933642854014, | |
| "grad_norm": 0.548409104347229, | |
| "learning_rate": 8.883480213080681e-05, | |
| "loss": 2.4815, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.2571941658271648, | |
| "grad_norm": 0.4854792356491089, | |
| "learning_rate": 8.876170591644054e-05, | |
| "loss": 2.5118, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2578949673689282, | |
| "grad_norm": 0.4988788664340973, | |
| "learning_rate": 8.868840149120876e-05, | |
| "loss": 2.5073, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.2585957689106916, | |
| "grad_norm": 0.4614211618900299, | |
| "learning_rate": 8.861488924887071e-05, | |
| "loss": 2.4866, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.259296570452455, | |
| "grad_norm": 0.4878149926662445, | |
| "learning_rate": 8.854116958430185e-05, | |
| "loss": 2.5315, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2599973719942184, | |
| "grad_norm": 0.47185149788856506, | |
| "learning_rate": 8.846724289349189e-05, | |
| "loss": 2.4766, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2606981735359818, | |
| "grad_norm": 0.446411669254303, | |
| "learning_rate": 8.839310957354249e-05, | |
| "loss": 2.5278, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.2613989750777452, | |
| "grad_norm": 0.45869573950767517, | |
| "learning_rate": 8.831877002266536e-05, | |
| "loss": 2.5051, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.26209977661950856, | |
| "grad_norm": 0.4578917920589447, | |
| "learning_rate": 8.82442246401799e-05, | |
| "loss": 2.4903, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.26280057816127195, | |
| "grad_norm": 0.4389136731624603, | |
| "learning_rate": 8.816947382651116e-05, | |
| "loss": 2.519, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.26350137970303533, | |
| "grad_norm": 0.4686265289783478, | |
| "learning_rate": 8.80945179831877e-05, | |
| "loss": 2.5537, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.2642021812447987, | |
| "grad_norm": 0.49357905983924866, | |
| "learning_rate": 8.801935751283944e-05, | |
| "loss": 2.4971, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.26490298278656216, | |
| "grad_norm": 0.5659007430076599, | |
| "learning_rate": 8.794399281919537e-05, | |
| "loss": 2.5291, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.26560378432832554, | |
| "grad_norm": 0.5637578964233398, | |
| "learning_rate": 8.786842430708157e-05, | |
| "loss": 2.5335, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.26630458587008893, | |
| "grad_norm": 0.47859886288642883, | |
| "learning_rate": 8.779265238241888e-05, | |
| "loss": 2.5104, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2670053874118523, | |
| "grad_norm": 0.5444939732551575, | |
| "learning_rate": 8.771667745222082e-05, | |
| "loss": 2.4823, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.2677061889536157, | |
| "grad_norm": 0.5456621050834656, | |
| "learning_rate": 8.76404999245914e-05, | |
| "loss": 2.5027, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2684069904953791, | |
| "grad_norm": 0.5168180465698242, | |
| "learning_rate": 8.75641202087228e-05, | |
| "loss": 2.5562, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.26910779203714247, | |
| "grad_norm": 0.5675712823867798, | |
| "learning_rate": 8.748753871489333e-05, | |
| "loss": 2.5195, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.26980859357890585, | |
| "grad_norm": 0.4084811806678772, | |
| "learning_rate": 8.741075585446514e-05, | |
| "loss": 2.4853, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.27050939512066924, | |
| "grad_norm": 0.4109669327735901, | |
| "learning_rate": 8.733377203988208e-05, | |
| "loss": 2.5186, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.2712101966624327, | |
| "grad_norm": 0.5689636468887329, | |
| "learning_rate": 8.725658768466738e-05, | |
| "loss": 2.5106, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.27191099820419606, | |
| "grad_norm": 0.4750414192676544, | |
| "learning_rate": 8.71792032034215e-05, | |
| "loss": 2.4927, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.27261179974595945, | |
| "grad_norm": 0.4577466547489166, | |
| "learning_rate": 8.710161901181993e-05, | |
| "loss": 2.5005, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.27331260128772283, | |
| "grad_norm": 0.4786745011806488, | |
| "learning_rate": 8.702383552661081e-05, | |
| "loss": 2.5099, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2740134028294862, | |
| "grad_norm": 0.508456289768219, | |
| "learning_rate": 8.694585316561296e-05, | |
| "loss": 2.5377, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2747142043712496, | |
| "grad_norm": 0.49584171175956726, | |
| "learning_rate": 8.686767234771333e-05, | |
| "loss": 2.5208, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.275415005913013, | |
| "grad_norm": 0.4523308575153351, | |
| "learning_rate": 8.678929349286498e-05, | |
| "loss": 2.5663, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.2761158074547764, | |
| "grad_norm": 0.411276638507843, | |
| "learning_rate": 8.671071702208467e-05, | |
| "loss": 2.5076, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.2768166089965398, | |
| "grad_norm": 0.47366130352020264, | |
| "learning_rate": 8.663194335745071e-05, | |
| "loss": 2.4725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2775174105383032, | |
| "grad_norm": 0.44845113158226013, | |
| "learning_rate": 8.655297292210067e-05, | |
| "loss": 2.5204, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.2782182120800666, | |
| "grad_norm": 0.4630947709083557, | |
| "learning_rate": 8.647380614022902e-05, | |
| "loss": 2.4848, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.27891901362182997, | |
| "grad_norm": 0.4739050567150116, | |
| "learning_rate": 8.639444343708496e-05, | |
| "loss": 2.4975, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.27961981516359335, | |
| "grad_norm": 0.41872844099998474, | |
| "learning_rate": 8.631488523897011e-05, | |
| "loss": 2.5105, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.28032061670535674, | |
| "grad_norm": 0.5174891948699951, | |
| "learning_rate": 8.623513197323615e-05, | |
| "loss": 2.4428, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2810214182471201, | |
| "grad_norm": 0.4543634057044983, | |
| "learning_rate": 8.615518406828262e-05, | |
| "loss": 2.5248, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.2817222197888835, | |
| "grad_norm": 0.433250367641449, | |
| "learning_rate": 8.607504195355458e-05, | |
| "loss": 2.4887, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.28242302133064695, | |
| "grad_norm": 0.47642698884010315, | |
| "learning_rate": 8.599470605954025e-05, | |
| "loss": 2.5391, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.28312382287241034, | |
| "grad_norm": 0.45496654510498047, | |
| "learning_rate": 8.59141768177688e-05, | |
| "loss": 2.5444, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.2838246244141737, | |
| "grad_norm": 0.4619695544242859, | |
| "learning_rate": 8.583345466080796e-05, | |
| "loss": 2.504, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2845254259559371, | |
| "grad_norm": 0.4610481262207031, | |
| "learning_rate": 8.575254002226173e-05, | |
| "loss": 2.4904, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.2852262274977005, | |
| "grad_norm": 0.4597660005092621, | |
| "learning_rate": 8.5671433336768e-05, | |
| "loss": 2.4923, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2859270290394639, | |
| "grad_norm": 0.5440905094146729, | |
| "learning_rate": 8.559013503999626e-05, | |
| "loss": 2.4806, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.28662783058122726, | |
| "grad_norm": 0.4667718708515167, | |
| "learning_rate": 8.550864556864529e-05, | |
| "loss": 2.5595, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.28732863212299065, | |
| "grad_norm": 0.47145599126815796, | |
| "learning_rate": 8.542696536044075e-05, | |
| "loss": 2.4813, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2880294336647541, | |
| "grad_norm": 0.4581964313983917, | |
| "learning_rate": 8.534509485413284e-05, | |
| "loss": 2.5467, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.28873023520651747, | |
| "grad_norm": 0.5127134919166565, | |
| "learning_rate": 8.5263034489494e-05, | |
| "loss": 2.5067, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.28943103674828086, | |
| "grad_norm": 0.5416949391365051, | |
| "learning_rate": 8.518078470731644e-05, | |
| "loss": 2.4669, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.29013183829004424, | |
| "grad_norm": 0.442828506231308, | |
| "learning_rate": 8.509834594940991e-05, | |
| "loss": 2.4708, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2908326398318076, | |
| "grad_norm": 0.4708557426929474, | |
| "learning_rate": 8.501571865859924e-05, | |
| "loss": 2.5192, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.291533441373571, | |
| "grad_norm": 0.4371870458126068, | |
| "learning_rate": 8.49329032787219e-05, | |
| "loss": 2.4778, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.2922342429153344, | |
| "grad_norm": 0.48408806324005127, | |
| "learning_rate": 8.48499002546258e-05, | |
| "loss": 2.4868, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.2929350444570978, | |
| "grad_norm": 0.45126622915267944, | |
| "learning_rate": 8.47667100321667e-05, | |
| "loss": 2.4999, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.2936358459988612, | |
| "grad_norm": 0.4448654353618622, | |
| "learning_rate": 8.468333305820599e-05, | |
| "loss": 2.4848, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.2943366475406246, | |
| "grad_norm": 0.47776126861572266, | |
| "learning_rate": 8.459976978060815e-05, | |
| "loss": 2.5515, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.295037449082388, | |
| "grad_norm": 0.4572128653526306, | |
| "learning_rate": 8.45160206482384e-05, | |
| "loss": 2.5172, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.2957382506241514, | |
| "grad_norm": 0.4419424831867218, | |
| "learning_rate": 8.443208611096036e-05, | |
| "loss": 2.5035, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.29643905216591476, | |
| "grad_norm": 0.42213693261146545, | |
| "learning_rate": 8.434796661963344e-05, | |
| "loss": 2.542, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.29713985370767815, | |
| "grad_norm": 0.446344792842865, | |
| "learning_rate": 8.426366262611067e-05, | |
| "loss": 2.5119, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.29784065524944153, | |
| "grad_norm": 0.44233253598213196, | |
| "learning_rate": 8.417917458323607e-05, | |
| "loss": 2.4985, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2985414567912049, | |
| "grad_norm": 0.492471843957901, | |
| "learning_rate": 8.40945029448423e-05, | |
| "loss": 2.4553, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.29924225833296836, | |
| "grad_norm": 0.4490063488483429, | |
| "learning_rate": 8.400964816574826e-05, | |
| "loss": 2.5389, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.29994305987473174, | |
| "grad_norm": 0.5494585633277893, | |
| "learning_rate": 8.392461070175652e-05, | |
| "loss": 2.5163, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.30064386141649513, | |
| "grad_norm": 0.4822872281074524, | |
| "learning_rate": 8.383939100965103e-05, | |
| "loss": 2.504, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.3013446629582585, | |
| "grad_norm": 0.5434439778327942, | |
| "learning_rate": 8.375398954719456e-05, | |
| "loss": 2.4841, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3020454645000219, | |
| "grad_norm": 0.5055859088897705, | |
| "learning_rate": 8.366840677312626e-05, | |
| "loss": 2.4985, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.3027462660417853, | |
| "grad_norm": 0.44319674372673035, | |
| "learning_rate": 8.358264314715923e-05, | |
| "loss": 2.4661, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.30344706758354867, | |
| "grad_norm": 0.5121539235115051, | |
| "learning_rate": 8.349669912997799e-05, | |
| "loss": 2.4797, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.30414786912531205, | |
| "grad_norm": 0.4748767912387848, | |
| "learning_rate": 8.341057518323607e-05, | |
| "loss": 2.5009, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3048486706670755, | |
| "grad_norm": 0.4823194742202759, | |
| "learning_rate": 8.332427176955353e-05, | |
| "loss": 2.4798, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3055494722088389, | |
| "grad_norm": 0.4242302477359772, | |
| "learning_rate": 8.323778935251437e-05, | |
| "loss": 2.4764, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.30625027375060226, | |
| "grad_norm": 0.46324998140335083, | |
| "learning_rate": 8.31511283966642e-05, | |
| "loss": 2.509, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.30695107529236565, | |
| "grad_norm": 0.4894976317882538, | |
| "learning_rate": 8.30642893675076e-05, | |
| "loss": 2.498, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.30765187683412903, | |
| "grad_norm": 0.4574197232723236, | |
| "learning_rate": 8.297727273150573e-05, | |
| "loss": 2.48, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.3083526783758924, | |
| "grad_norm": 0.44225645065307617, | |
| "learning_rate": 8.289007895607375e-05, | |
| "loss": 2.502, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3090534799176558, | |
| "grad_norm": 0.47749781608581543, | |
| "learning_rate": 8.28027085095783e-05, | |
| "loss": 2.5043, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.3097542814594192, | |
| "grad_norm": 0.4569682478904724, | |
| "learning_rate": 8.271516186133511e-05, | |
| "loss": 2.4454, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.31045508300118263, | |
| "grad_norm": 0.4561903178691864, | |
| "learning_rate": 8.262743948160632e-05, | |
| "loss": 2.4826, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.311155884542946, | |
| "grad_norm": 0.4749627411365509, | |
| "learning_rate": 8.253954184159803e-05, | |
| "loss": 2.4707, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.3118566860847094, | |
| "grad_norm": 0.4455653131008148, | |
| "learning_rate": 8.245146941345774e-05, | |
| "loss": 2.4647, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3125574876264728, | |
| "grad_norm": 0.4758734405040741, | |
| "learning_rate": 8.236322267027193e-05, | |
| "loss": 2.4885, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.31325828916823617, | |
| "grad_norm": 0.45016252994537354, | |
| "learning_rate": 8.227480208606333e-05, | |
| "loss": 2.4993, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.31395909070999956, | |
| "grad_norm": 0.48177486658096313, | |
| "learning_rate": 8.218620813578847e-05, | |
| "loss": 2.4838, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.31465989225176294, | |
| "grad_norm": 0.4863053858280182, | |
| "learning_rate": 8.209744129533519e-05, | |
| "loss": 2.5381, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.3153606937935263, | |
| "grad_norm": 0.49010857939720154, | |
| "learning_rate": 8.200850204151995e-05, | |
| "loss": 2.5721, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2854, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0132651008589824e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |