| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.10512023126450878, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00035040077088169594, | |
| "grad_norm": 6.5142412185668945, | |
| "learning_rate": 0.0, | |
| "loss": 5.324, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007008015417633919, | |
| "grad_norm": 6.758334159851074, | |
| "learning_rate": 6.993006993006994e-07, | |
| "loss": 5.3405, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0014016030835267838, | |
| "grad_norm": 6.22674036026001, | |
| "learning_rate": 2.0979020979020983e-06, | |
| "loss": 5.3286, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0021024046252901755, | |
| "grad_norm": 5.438386917114258, | |
| "learning_rate": 3.496503496503497e-06, | |
| "loss": 5.25, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0028032061670535675, | |
| "grad_norm": 3.365504741668701, | |
| "learning_rate": 4.895104895104895e-06, | |
| "loss": 5.2821, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0035040077088169595, | |
| "grad_norm": 7.186147212982178, | |
| "learning_rate": 6.2937062937062944e-06, | |
| "loss": 5.21, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004204809250580351, | |
| "grad_norm": 4.960826396942139, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 5.0759, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004905610792343743, | |
| "grad_norm": 4.001464366912842, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 5.1092, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005606412334107135, | |
| "grad_norm": 3.2986342906951904, | |
| "learning_rate": 1.048951048951049e-05, | |
| "loss": 4.93, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006307213875870527, | |
| "grad_norm": 2.5407276153564453, | |
| "learning_rate": 1.188811188811189e-05, | |
| "loss": 4.8535, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.007008015417633919, | |
| "grad_norm": 2.211754083633423, | |
| "learning_rate": 1.3286713286713287e-05, | |
| "loss": 4.74, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007708816959397311, | |
| "grad_norm": 1.6710195541381836, | |
| "learning_rate": 1.4685314685314686e-05, | |
| "loss": 4.609, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008409618501160702, | |
| "grad_norm": 1.280752182006836, | |
| "learning_rate": 1.6083916083916083e-05, | |
| "loss": 4.4879, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009110420042924094, | |
| "grad_norm": 1.312186598777771, | |
| "learning_rate": 1.7482517482517483e-05, | |
| "loss": 4.3995, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.009811221584687486, | |
| "grad_norm": 1.3315190076828003, | |
| "learning_rate": 1.888111888111888e-05, | |
| "loss": 4.3005, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.010512023126450878, | |
| "grad_norm": 1.3252590894699097, | |
| "learning_rate": 2.027972027972028e-05, | |
| "loss": 4.1952, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01121282466821427, | |
| "grad_norm": 1.3794758319854736, | |
| "learning_rate": 2.1678321678321677e-05, | |
| "loss": 4.1459, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.011913626209977662, | |
| "grad_norm": 1.1808068752288818, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 4.034, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.012614427751741054, | |
| "grad_norm": 1.31660795211792, | |
| "learning_rate": 2.4475524475524478e-05, | |
| "loss": 3.926, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.013315229293504446, | |
| "grad_norm": 1.0347495079040527, | |
| "learning_rate": 2.5874125874125877e-05, | |
| "loss": 3.8812, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014016030835267838, | |
| "grad_norm": 1.050775408744812, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 3.7787, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01471683237703123, | |
| "grad_norm": 0.9461761713027954, | |
| "learning_rate": 2.8671328671328672e-05, | |
| "loss": 3.6738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.015417633918794622, | |
| "grad_norm": 1.0460454225540161, | |
| "learning_rate": 3.0069930069930068e-05, | |
| "loss": 3.6385, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.016118435460558012, | |
| "grad_norm": 1.0687191486358643, | |
| "learning_rate": 3.146853146853147e-05, | |
| "loss": 3.5701, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.016819237002321404, | |
| "grad_norm": 1.4722611904144287, | |
| "learning_rate": 3.2867132867132866e-05, | |
| "loss": 3.5438, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.017520038544084796, | |
| "grad_norm": 1.1305724382400513, | |
| "learning_rate": 3.4265734265734265e-05, | |
| "loss": 3.4694, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018220840085848188, | |
| "grad_norm": 0.9322625994682312, | |
| "learning_rate": 3.566433566433567e-05, | |
| "loss": 3.4488, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01892164162761158, | |
| "grad_norm": 1.2441555261611938, | |
| "learning_rate": 3.7062937062937064e-05, | |
| "loss": 3.4289, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.019622443169374972, | |
| "grad_norm": 0.9397731423377991, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 3.4021, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.020323244711138364, | |
| "grad_norm": 1.3261164426803589, | |
| "learning_rate": 3.986013986013986e-05, | |
| "loss": 3.3575, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.021024046252901756, | |
| "grad_norm": 1.08541738986969, | |
| "learning_rate": 4.125874125874126e-05, | |
| "loss": 3.3403, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021724847794665148, | |
| "grad_norm": 0.8626166582107544, | |
| "learning_rate": 4.265734265734266e-05, | |
| "loss": 3.3306, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02242564933642854, | |
| "grad_norm": 1.0596344470977783, | |
| "learning_rate": 4.405594405594406e-05, | |
| "loss": 3.2779, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.023126450878191932, | |
| "grad_norm": 1.511917233467102, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 3.2759, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.023827252419955324, | |
| "grad_norm": 1.2062046527862549, | |
| "learning_rate": 4.685314685314686e-05, | |
| "loss": 3.2545, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.024528053961718716, | |
| "grad_norm": 1.1399930715560913, | |
| "learning_rate": 4.825174825174825e-05, | |
| "loss": 3.2235, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.025228855503482108, | |
| "grad_norm": 0.8960133790969849, | |
| "learning_rate": 4.9650349650349656e-05, | |
| "loss": 3.2025, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0259296570452455, | |
| "grad_norm": 1.3042056560516357, | |
| "learning_rate": 5.1048951048951055e-05, | |
| "loss": 3.1475, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.026630458587008892, | |
| "grad_norm": 1.186320424079895, | |
| "learning_rate": 5.244755244755245e-05, | |
| "loss": 3.1759, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.027331260128772284, | |
| "grad_norm": 1.2691158056259155, | |
| "learning_rate": 5.384615384615385e-05, | |
| "loss": 3.1296, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.028032061670535676, | |
| "grad_norm": 0.7816159129142761, | |
| "learning_rate": 5.524475524475524e-05, | |
| "loss": 3.1017, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.028732863212299068, | |
| "grad_norm": 1.1489295959472656, | |
| "learning_rate": 5.664335664335665e-05, | |
| "loss": 3.1151, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02943366475406246, | |
| "grad_norm": 1.5686062574386597, | |
| "learning_rate": 5.8041958041958044e-05, | |
| "loss": 3.114, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.030134466295825852, | |
| "grad_norm": 1.4421433210372925, | |
| "learning_rate": 5.944055944055944e-05, | |
| "loss": 3.0946, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.030835267837589244, | |
| "grad_norm": 1.335250973701477, | |
| "learning_rate": 6.083916083916085e-05, | |
| "loss": 3.084, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03153606937935263, | |
| "grad_norm": 0.970507800579071, | |
| "learning_rate": 6.223776223776224e-05, | |
| "loss": 3.1163, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.032236870921116025, | |
| "grad_norm": 1.2849407196044922, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 3.063, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.032937672462879417, | |
| "grad_norm": 1.0378247499465942, | |
| "learning_rate": 6.503496503496504e-05, | |
| "loss": 3.0223, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03363847400464281, | |
| "grad_norm": 1.3139392137527466, | |
| "learning_rate": 6.643356643356644e-05, | |
| "loss": 3.0572, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0343392755464062, | |
| "grad_norm": 1.254752278327942, | |
| "learning_rate": 6.783216783216784e-05, | |
| "loss": 3.0408, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.03504007708816959, | |
| "grad_norm": 1.3333168029785156, | |
| "learning_rate": 6.923076923076924e-05, | |
| "loss": 3.0185, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.035740878629932984, | |
| "grad_norm": 1.2795464992523193, | |
| "learning_rate": 7.062937062937062e-05, | |
| "loss": 3.0328, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.036441680171696376, | |
| "grad_norm": 1.2025645971298218, | |
| "learning_rate": 7.202797202797204e-05, | |
| "loss": 3.0303, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.03714248171345977, | |
| "grad_norm": 1.1741266250610352, | |
| "learning_rate": 7.342657342657343e-05, | |
| "loss": 3.0252, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03784328325522316, | |
| "grad_norm": 1.2022653818130493, | |
| "learning_rate": 7.482517482517482e-05, | |
| "loss": 3.0183, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03854408479698655, | |
| "grad_norm": 1.1950666904449463, | |
| "learning_rate": 7.622377622377622e-05, | |
| "loss": 2.9804, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.039244886338749944, | |
| "grad_norm": 1.5780822038650513, | |
| "learning_rate": 7.762237762237763e-05, | |
| "loss": 2.9804, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.039945687880513336, | |
| "grad_norm": 1.0478655099868774, | |
| "learning_rate": 7.902097902097903e-05, | |
| "loss": 2.9894, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04064648942227673, | |
| "grad_norm": 1.1782268285751343, | |
| "learning_rate": 8.041958041958042e-05, | |
| "loss": 2.9717, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04134729096404012, | |
| "grad_norm": 1.0321820974349976, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 2.9776, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04204809250580351, | |
| "grad_norm": 0.9697206020355225, | |
| "learning_rate": 8.321678321678323e-05, | |
| "loss": 2.9804, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.042748894047566904, | |
| "grad_norm": 1.1984606981277466, | |
| "learning_rate": 8.461538461538461e-05, | |
| "loss": 2.9495, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.043449695589330296, | |
| "grad_norm": 0.9830178618431091, | |
| "learning_rate": 8.601398601398601e-05, | |
| "loss": 2.9656, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04415049713109369, | |
| "grad_norm": 1.3105114698410034, | |
| "learning_rate": 8.741258741258743e-05, | |
| "loss": 2.9306, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04485129867285708, | |
| "grad_norm": 1.3499157428741455, | |
| "learning_rate": 8.881118881118881e-05, | |
| "loss": 2.9381, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04555210021462047, | |
| "grad_norm": 0.9977575540542603, | |
| "learning_rate": 9.020979020979021e-05, | |
| "loss": 2.907, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.046252901756383864, | |
| "grad_norm": 1.2331498861312866, | |
| "learning_rate": 9.160839160839161e-05, | |
| "loss": 2.9224, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.046953703298147256, | |
| "grad_norm": 1.451253890991211, | |
| "learning_rate": 9.300699300699301e-05, | |
| "loss": 2.9202, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04765450483991065, | |
| "grad_norm": 1.2146471738815308, | |
| "learning_rate": 9.440559440559441e-05, | |
| "loss": 2.9098, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04835530638167404, | |
| "grad_norm": 1.0873245000839233, | |
| "learning_rate": 9.580419580419581e-05, | |
| "loss": 2.9218, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04905610792343743, | |
| "grad_norm": 1.276413083076477, | |
| "learning_rate": 9.72027972027972e-05, | |
| "loss": 2.8947, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.049756909465200824, | |
| "grad_norm": 1.126065731048584, | |
| "learning_rate": 9.86013986013986e-05, | |
| "loss": 2.8788, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.050457711006964216, | |
| "grad_norm": 1.5177017450332642, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9043, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05115851254872761, | |
| "grad_norm": 1.3744112253189087, | |
| "learning_rate": 9.99998657109765e-05, | |
| "loss": 2.888, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.051859314090491, | |
| "grad_norm": 1.7921055555343628, | |
| "learning_rate": 9.999946284462733e-05, | |
| "loss": 2.8631, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.05256011563225439, | |
| "grad_norm": 1.1755317449569702, | |
| "learning_rate": 9.999879140311652e-05, | |
| "loss": 2.8735, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.053260917174017784, | |
| "grad_norm": 0.846362292766571, | |
| "learning_rate": 9.999785139005073e-05, | |
| "loss": 2.8768, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.053961718715781176, | |
| "grad_norm": 0.9867280721664429, | |
| "learning_rate": 9.999664281047933e-05, | |
| "loss": 2.8859, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.05466252025754457, | |
| "grad_norm": 0.9751666188240051, | |
| "learning_rate": 9.999516567089429e-05, | |
| "loss": 2.8497, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05536332179930796, | |
| "grad_norm": 1.0603703260421753, | |
| "learning_rate": 9.999341997923011e-05, | |
| "loss": 2.8404, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05606412334107135, | |
| "grad_norm": 1.0447975397109985, | |
| "learning_rate": 9.999140574486392e-05, | |
| "loss": 2.9092, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.056764924882834744, | |
| "grad_norm": 1.3046443462371826, | |
| "learning_rate": 9.998912297861527e-05, | |
| "loss": 2.8971, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.057465726424598136, | |
| "grad_norm": 1.1029243469238281, | |
| "learning_rate": 9.998657169274622e-05, | |
| "loss": 2.8834, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.05816652796636153, | |
| "grad_norm": 0.8594210743904114, | |
| "learning_rate": 9.99837519009611e-05, | |
| "loss": 2.8361, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05886732950812492, | |
| "grad_norm": 0.8585363030433655, | |
| "learning_rate": 9.998066361840665e-05, | |
| "loss": 2.8782, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05956813104988831, | |
| "grad_norm": 0.693467378616333, | |
| "learning_rate": 9.997730686167173e-05, | |
| "loss": 2.8537, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.060268932591651704, | |
| "grad_norm": 0.8418940305709839, | |
| "learning_rate": 9.997368164878738e-05, | |
| "loss": 2.8294, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.060969734133415096, | |
| "grad_norm": 0.9938271045684814, | |
| "learning_rate": 9.996978799922665e-05, | |
| "loss": 2.8458, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.06167053567517849, | |
| "grad_norm": 1.0347217321395874, | |
| "learning_rate": 9.99656259339045e-05, | |
| "loss": 2.8081, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06237133721694188, | |
| "grad_norm": 0.9216743111610413, | |
| "learning_rate": 9.996119547517775e-05, | |
| "loss": 2.8655, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.06307213875870527, | |
| "grad_norm": 1.0579859018325806, | |
| "learning_rate": 9.995649664684486e-05, | |
| "loss": 2.823, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06377294030046866, | |
| "grad_norm": 0.9864194393157959, | |
| "learning_rate": 9.995152947414586e-05, | |
| "loss": 2.8081, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.06447374184223205, | |
| "grad_norm": 0.8999143838882446, | |
| "learning_rate": 9.994629398376226e-05, | |
| "loss": 2.7947, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.06517454338399545, | |
| "grad_norm": 0.9121315479278564, | |
| "learning_rate": 9.994079020381676e-05, | |
| "loss": 2.8253, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.06587534492575883, | |
| "grad_norm": 0.8578842282295227, | |
| "learning_rate": 9.993501816387329e-05, | |
| "loss": 2.7548, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06657614646752223, | |
| "grad_norm": 0.8564820289611816, | |
| "learning_rate": 9.992897789493672e-05, | |
| "loss": 2.8361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06727694800928562, | |
| "grad_norm": 0.8013344407081604, | |
| "learning_rate": 9.992266942945269e-05, | |
| "loss": 2.8606, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.06797774955104902, | |
| "grad_norm": 0.7343975901603699, | |
| "learning_rate": 9.991609280130752e-05, | |
| "loss": 2.7947, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.0686785510928124, | |
| "grad_norm": 0.7338536381721497, | |
| "learning_rate": 9.990924804582797e-05, | |
| "loss": 2.7492, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0693793526345758, | |
| "grad_norm": 0.828781008720398, | |
| "learning_rate": 9.990213519978109e-05, | |
| "loss": 2.8013, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07008015417633918, | |
| "grad_norm": 0.7156624794006348, | |
| "learning_rate": 9.989475430137391e-05, | |
| "loss": 2.7943, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07078095571810258, | |
| "grad_norm": 0.6014353632926941, | |
| "learning_rate": 9.988710539025341e-05, | |
| "loss": 2.8099, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07148175725986597, | |
| "grad_norm": 0.6569661498069763, | |
| "learning_rate": 9.987918850750619e-05, | |
| "loss": 2.8125, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.07218255880162937, | |
| "grad_norm": 0.6558775305747986, | |
| "learning_rate": 9.987100369565825e-05, | |
| "loss": 2.7487, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07288336034339275, | |
| "grad_norm": 0.6454245448112488, | |
| "learning_rate": 9.986255099867481e-05, | |
| "loss": 2.7648, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.07358416188515615, | |
| "grad_norm": 0.5741921067237854, | |
| "learning_rate": 9.985383046196004e-05, | |
| "loss": 2.7743, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07428496342691954, | |
| "grad_norm": 0.5875937938690186, | |
| "learning_rate": 9.984484213235685e-05, | |
| "loss": 2.7728, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.07498576496868294, | |
| "grad_norm": 0.6638422012329102, | |
| "learning_rate": 9.98355860581466e-05, | |
| "loss": 2.7504, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.07568656651044632, | |
| "grad_norm": 1.1614341735839844, | |
| "learning_rate": 9.982606228904884e-05, | |
| "loss": 2.7923, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.07638736805220972, | |
| "grad_norm": 1.005254864692688, | |
| "learning_rate": 9.981627087622108e-05, | |
| "loss": 2.76, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.0770881695939731, | |
| "grad_norm": 0.7738555669784546, | |
| "learning_rate": 9.980621187225852e-05, | |
| "loss": 2.7866, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0777889711357365, | |
| "grad_norm": 0.9469527006149292, | |
| "learning_rate": 9.979588533119367e-05, | |
| "loss": 2.8012, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.07848977267749989, | |
| "grad_norm": 0.9031473398208618, | |
| "learning_rate": 9.978529130849619e-05, | |
| "loss": 2.7522, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.07919057421926329, | |
| "grad_norm": 0.9450514912605286, | |
| "learning_rate": 9.977442986107252e-05, | |
| "loss": 2.7791, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.07989137576102667, | |
| "grad_norm": 0.7259206771850586, | |
| "learning_rate": 9.97633010472656e-05, | |
| "loss": 2.7237, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08059217730279007, | |
| "grad_norm": 0.6595309972763062, | |
| "learning_rate": 9.975190492685451e-05, | |
| "loss": 2.7284, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08129297884455346, | |
| "grad_norm": 0.7696382999420166, | |
| "learning_rate": 9.974024156105422e-05, | |
| "loss": 2.7631, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.08199378038631686, | |
| "grad_norm": 0.7305110096931458, | |
| "learning_rate": 9.972831101251521e-05, | |
| "loss": 2.7793, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.08269458192808024, | |
| "grad_norm": 0.6039514541625977, | |
| "learning_rate": 9.971611334532314e-05, | |
| "loss": 2.7669, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08339538346984364, | |
| "grad_norm": 0.5824711918830872, | |
| "learning_rate": 9.970364862499852e-05, | |
| "loss": 2.7476, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.08409618501160702, | |
| "grad_norm": 0.6831758618354797, | |
| "learning_rate": 9.969091691849637e-05, | |
| "loss": 2.7098, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08479698655337042, | |
| "grad_norm": 0.6469074487686157, | |
| "learning_rate": 9.967791829420581e-05, | |
| "loss": 2.7609, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.08549778809513381, | |
| "grad_norm": 0.5876832604408264, | |
| "learning_rate": 9.966465282194976e-05, | |
| "loss": 2.7306, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.08619858963689721, | |
| "grad_norm": 0.6310129761695862, | |
| "learning_rate": 9.965112057298451e-05, | |
| "loss": 2.7283, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.08689939117866059, | |
| "grad_norm": 0.6113069653511047, | |
| "learning_rate": 9.963732161999935e-05, | |
| "loss": 2.7274, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.08760019272042399, | |
| "grad_norm": 1.0655111074447632, | |
| "learning_rate": 9.96232560371162e-05, | |
| "loss": 2.7022, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08830099426218738, | |
| "grad_norm": 0.8412613272666931, | |
| "learning_rate": 9.960892389988918e-05, | |
| "loss": 2.7213, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.08900179580395078, | |
| "grad_norm": 0.7329776883125305, | |
| "learning_rate": 9.959432528530428e-05, | |
| "loss": 2.7343, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.08970259734571416, | |
| "grad_norm": 0.702498197555542, | |
| "learning_rate": 9.95794602717788e-05, | |
| "loss": 2.7642, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.09040339888747755, | |
| "grad_norm": 0.6936408281326294, | |
| "learning_rate": 9.95643289391611e-05, | |
| "loss": 2.7081, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.09110420042924094, | |
| "grad_norm": 0.664743959903717, | |
| "learning_rate": 9.954893136873005e-05, | |
| "loss": 2.7054, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09180500197100433, | |
| "grad_norm": 0.5716791152954102, | |
| "learning_rate": 9.953326764319463e-05, | |
| "loss": 2.6751, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.09250580351276773, | |
| "grad_norm": 0.6207195520401001, | |
| "learning_rate": 9.95173378466935e-05, | |
| "loss": 2.6945, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.09320660505453111, | |
| "grad_norm": 0.6572092771530151, | |
| "learning_rate": 9.950114206479453e-05, | |
| "loss": 2.6989, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.09390740659629451, | |
| "grad_norm": 0.7676830887794495, | |
| "learning_rate": 9.948468038449435e-05, | |
| "loss": 2.7613, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.0946082081380579, | |
| "grad_norm": 0.5810503959655762, | |
| "learning_rate": 9.946795289421787e-05, | |
| "loss": 2.7234, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0953090096798213, | |
| "grad_norm": 0.6459682583808899, | |
| "learning_rate": 9.945095968381784e-05, | |
| "loss": 2.717, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.09600981122158468, | |
| "grad_norm": 0.6498464345932007, | |
| "learning_rate": 9.94337008445743e-05, | |
| "loss": 2.7389, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.09671061276334808, | |
| "grad_norm": 0.6287350654602051, | |
| "learning_rate": 9.941617646919421e-05, | |
| "loss": 2.681, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.09741141430511147, | |
| "grad_norm": 0.7516258955001831, | |
| "learning_rate": 9.939838665181076e-05, | |
| "loss": 2.6696, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.09811221584687486, | |
| "grad_norm": 0.6962350606918335, | |
| "learning_rate": 9.938033148798307e-05, | |
| "loss": 2.6971, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09881301738863825, | |
| "grad_norm": 0.6605144739151001, | |
| "learning_rate": 9.936201107469555e-05, | |
| "loss": 2.6999, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.09951381893040165, | |
| "grad_norm": 0.5991240739822388, | |
| "learning_rate": 9.93434255103574e-05, | |
| "loss": 2.6936, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.10021462047216503, | |
| "grad_norm": 0.5660961866378784, | |
| "learning_rate": 9.932457489480213e-05, | |
| "loss": 2.686, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.10091542201392843, | |
| "grad_norm": 0.690290093421936, | |
| "learning_rate": 9.930545932928698e-05, | |
| "loss": 2.6809, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.10161622355569182, | |
| "grad_norm": 0.7119167447090149, | |
| "learning_rate": 9.928607891649234e-05, | |
| "loss": 2.7221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10231702509745522, | |
| "grad_norm": 0.7049365639686584, | |
| "learning_rate": 9.926643376052131e-05, | |
| "loss": 2.6569, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1030178266392186, | |
| "grad_norm": 0.6691743731498718, | |
| "learning_rate": 9.924652396689902e-05, | |
| "loss": 2.6751, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.103718628180982, | |
| "grad_norm": 0.5533433556556702, | |
| "learning_rate": 9.922634964257215e-05, | |
| "loss": 2.7064, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.10441942972274539, | |
| "grad_norm": 0.6669672727584839, | |
| "learning_rate": 9.920591089590831e-05, | |
| "loss": 2.687, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.10512023126450878, | |
| "grad_norm": 0.8539720773696899, | |
| "learning_rate": 9.918520783669549e-05, | |
| "loss": 2.6968, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2854, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.377550336196608e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |