| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 417, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007203962179198559, | |
| "grad_norm": 6.173737525939941, | |
| "learning_rate": 0.0, | |
| "loss": 1.7414, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.014407924358397118, | |
| "grad_norm": 6.159329891204834, | |
| "learning_rate": 2.3809523809523811e-07, | |
| "loss": 1.7221, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.021611886537595677, | |
| "grad_norm": 6.446763038635254, | |
| "learning_rate": 4.7619047619047623e-07, | |
| "loss": 1.7481, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.028815848716794237, | |
| "grad_norm": 6.09867000579834, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.7203, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03601981089599279, | |
| "grad_norm": 6.325934886932373, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 1.7419, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04322377307519135, | |
| "grad_norm": 6.029388904571533, | |
| "learning_rate": 1.1904761904761906e-06, | |
| "loss": 1.7401, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05042773525438991, | |
| "grad_norm": 5.957275867462158, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.7205, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05763169743358847, | |
| "grad_norm": 5.456174373626709, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.7044, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06483565961278703, | |
| "grad_norm": 5.111164093017578, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 1.6829, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.07203962179198559, | |
| "grad_norm": 3.8403587341308594, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 1.6228, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07924358397118415, | |
| "grad_norm": 3.6962971687316895, | |
| "learning_rate": 2.380952380952381e-06, | |
| "loss": 1.6107, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0864475461503827, | |
| "grad_norm": 3.4146342277526855, | |
| "learning_rate": 2.6190476190476192e-06, | |
| "loss": 1.6153, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.09365150832958127, | |
| "grad_norm": 2.1463379859924316, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.5442, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.10085547050877983, | |
| "grad_norm": 2.0825576782226562, | |
| "learning_rate": 3.0952380952380957e-06, | |
| "loss": 1.5311, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.1080594326879784, | |
| "grad_norm": 1.9198007583618164, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.5017, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.11526339486717695, | |
| "grad_norm": 1.7519303560256958, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.4866, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.12246735704637551, | |
| "grad_norm": 1.6560941934585571, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 1.4904, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.12967131922557407, | |
| "grad_norm": 1.7502397298812866, | |
| "learning_rate": 4.047619047619048e-06, | |
| "loss": 1.447, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.13687528140477262, | |
| "grad_norm": 1.8956769704818726, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.4399, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.14407924358397117, | |
| "grad_norm": 1.8007680177688599, | |
| "learning_rate": 4.523809523809524e-06, | |
| "loss": 1.4126, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15128320576316975, | |
| "grad_norm": 1.6545991897583008, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 1.4053, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1584871679423683, | |
| "grad_norm": 1.3897682428359985, | |
| "learning_rate": 5e-06, | |
| "loss": 1.3917, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.16569113012156686, | |
| "grad_norm": 1.124558687210083, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 1.3667, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1728950923007654, | |
| "grad_norm": 0.8316662907600403, | |
| "learning_rate": 5.476190476190477e-06, | |
| "loss": 1.3764, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.180099054479964, | |
| "grad_norm": 1.0051462650299072, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.3569, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.18730301665916255, | |
| "grad_norm": 1.0914835929870605, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 1.3281, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1945069788383611, | |
| "grad_norm": 1.0524057149887085, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 1.3194, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.20171094101755965, | |
| "grad_norm": 0.7890483736991882, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.2971, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2089149031967582, | |
| "grad_norm": 0.6859455704689026, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.3046, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2161188653759568, | |
| "grad_norm": 0.6448878645896912, | |
| "learning_rate": 6.9047619047619055e-06, | |
| "loss": 1.2594, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22332282755515534, | |
| "grad_norm": 0.6465410590171814, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.269, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2305267897343539, | |
| "grad_norm": 0.6410360336303711, | |
| "learning_rate": 7.380952380952382e-06, | |
| "loss": 1.2648, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.23773075191355245, | |
| "grad_norm": 0.6101223230361938, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 1.2401, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.24493471409275103, | |
| "grad_norm": 0.5768052339553833, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.2381, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2521386762719496, | |
| "grad_norm": 0.5501332879066467, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 1.2576, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.25934263845114813, | |
| "grad_norm": 0.5579516291618347, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.2269, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2665466006303467, | |
| "grad_norm": 0.5129104256629944, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.215, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.27375056280954524, | |
| "grad_norm": 0.45212557911872864, | |
| "learning_rate": 8.80952380952381e-06, | |
| "loss": 1.2273, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2809545249887438, | |
| "grad_norm": 0.41491127014160156, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 1.2279, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.28815848716794235, | |
| "grad_norm": 0.4179735481739044, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 1.2341, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.29536244934714095, | |
| "grad_norm": 0.431852251291275, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 1.1875, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.3025664115263395, | |
| "grad_norm": 0.4219491183757782, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 1.2045, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.30977037370553806, | |
| "grad_norm": 0.37112221121788025, | |
| "learning_rate": 1e-05, | |
| "loss": 1.1965, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3169743358847366, | |
| "grad_norm": 0.34050077199935913, | |
| "learning_rate": 9.999824541392404e-06, | |
| "loss": 1.1774, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.32417829806393517, | |
| "grad_norm": 0.32600072026252747, | |
| "learning_rate": 9.999298177883902e-06, | |
| "loss": 1.1969, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3313822602431337, | |
| "grad_norm": 0.33435478806495667, | |
| "learning_rate": 9.9984209464165e-06, | |
| "loss": 1.1712, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3385862224223323, | |
| "grad_norm": 0.32657337188720703, | |
| "learning_rate": 9.997192908557322e-06, | |
| "loss": 1.1872, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3457901846015308, | |
| "grad_norm": 0.31742358207702637, | |
| "learning_rate": 9.995614150494293e-06, | |
| "loss": 1.1829, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3529941467807294, | |
| "grad_norm": 0.27555903792381287, | |
| "learning_rate": 9.99368478303009e-06, | |
| "loss": 1.1683, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.360198108959928, | |
| "grad_norm": 0.26494070887565613, | |
| "learning_rate": 9.99140494157436e-06, | |
| "loss": 1.1746, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.36740207113912654, | |
| "grad_norm": 0.28536906838417053, | |
| "learning_rate": 9.988774786134235e-06, | |
| "loss": 1.1668, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.3746060333183251, | |
| "grad_norm": 0.2787037193775177, | |
| "learning_rate": 9.98579450130307e-06, | |
| "loss": 1.1793, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.38180999549752365, | |
| "grad_norm": 0.26992520689964294, | |
| "learning_rate": 9.982464296247523e-06, | |
| "loss": 1.1748, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3890139576767222, | |
| "grad_norm": 0.2353690266609192, | |
| "learning_rate": 9.978784404692847e-06, | |
| "loss": 1.1559, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.39621791985592075, | |
| "grad_norm": 0.2522650957107544, | |
| "learning_rate": 9.974755084906503e-06, | |
| "loss": 1.1635, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4034218820351193, | |
| "grad_norm": 0.2653239965438843, | |
| "learning_rate": 9.970376619680024e-06, | |
| "loss": 1.1715, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.41062584421431786, | |
| "grad_norm": 0.24647340178489685, | |
| "learning_rate": 9.965649316309178e-06, | |
| "loss": 1.1556, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.4178298063935164, | |
| "grad_norm": 0.2625945508480072, | |
| "learning_rate": 9.960573506572391e-06, | |
| "loss": 1.1361, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.425033768572715, | |
| "grad_norm": 0.223532035946846, | |
| "learning_rate": 9.955149546707465e-06, | |
| "loss": 1.1367, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.4322377307519136, | |
| "grad_norm": 0.2535472810268402, | |
| "learning_rate": 9.94937781738658e-06, | |
| "loss": 1.1551, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4394416929311121, | |
| "grad_norm": 0.23278413712978363, | |
| "learning_rate": 9.94325872368957e-06, | |
| "loss": 1.1482, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.4466456551103107, | |
| "grad_norm": 0.2383483499288559, | |
| "learning_rate": 9.936792695075502e-06, | |
| "loss": 1.1389, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.45384961728950923, | |
| "grad_norm": 0.25417017936706543, | |
| "learning_rate": 9.929980185352525e-06, | |
| "loss": 1.145, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.4610535794687078, | |
| "grad_norm": 0.2187829613685608, | |
| "learning_rate": 9.922821672646028e-06, | |
| "loss": 1.1367, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.46825754164790634, | |
| "grad_norm": 0.2265864610671997, | |
| "learning_rate": 9.915317659365078e-06, | |
| "loss": 1.1353, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4754615038271049, | |
| "grad_norm": 0.2197989523410797, | |
| "learning_rate": 9.907468672167165e-06, | |
| "loss": 1.1486, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.48266546600630345, | |
| "grad_norm": 0.224925234913826, | |
| "learning_rate": 9.899275261921236e-06, | |
| "loss": 1.1587, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.48986942818550205, | |
| "grad_norm": 0.2215765416622162, | |
| "learning_rate": 9.890738003669029e-06, | |
| "loss": 1.1294, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.4970733903647006, | |
| "grad_norm": 0.2000226378440857, | |
| "learning_rate": 9.881857496584726e-06, | |
| "loss": 1.1185, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.5042773525438992, | |
| "grad_norm": 0.21352718770503998, | |
| "learning_rate": 9.872634363932887e-06, | |
| "loss": 1.1372, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5114813147230977, | |
| "grad_norm": 0.21923653781414032, | |
| "learning_rate": 9.863069253024719e-06, | |
| "loss": 1.1246, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.5186852769022963, | |
| "grad_norm": 0.2089078724384308, | |
| "learning_rate": 9.853162835172638e-06, | |
| "loss": 1.1313, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.5258892390814949, | |
| "grad_norm": 0.2270357608795166, | |
| "learning_rate": 9.842915805643156e-06, | |
| "loss": 1.1161, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.5330932012606934, | |
| "grad_norm": 0.2318277657032013, | |
| "learning_rate": 9.832328883608088e-06, | |
| "loss": 1.1108, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.540297163439892, | |
| "grad_norm": 0.19568417966365814, | |
| "learning_rate": 9.821402812094074e-06, | |
| "loss": 1.1373, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5475011256190905, | |
| "grad_norm": 0.2250116616487503, | |
| "learning_rate": 9.81013835793043e-06, | |
| "loss": 1.1459, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5547050877982891, | |
| "grad_norm": 0.2082553207874298, | |
| "learning_rate": 9.798536311695334e-06, | |
| "loss": 1.1298, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.5619090499774876, | |
| "grad_norm": 0.22250007092952728, | |
| "learning_rate": 9.786597487660336e-06, | |
| "loss": 1.1365, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5691130121566862, | |
| "grad_norm": 0.19979779422283173, | |
| "learning_rate": 9.774322723733216e-06, | |
| "loss": 1.1233, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5763169743358847, | |
| "grad_norm": 0.20838016271591187, | |
| "learning_rate": 9.761712881399164e-06, | |
| "loss": 1.1542, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5835209365150833, | |
| "grad_norm": 0.20710162818431854, | |
| "learning_rate": 9.748768845660335e-06, | |
| "loss": 1.1318, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5907248986942819, | |
| "grad_norm": 0.21674193441867828, | |
| "learning_rate": 9.735491524973723e-06, | |
| "loss": 1.0994, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.5979288608734804, | |
| "grad_norm": 0.20445245504379272, | |
| "learning_rate": 9.721881851187406e-06, | |
| "loss": 1.1342, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.605132823052679, | |
| "grad_norm": 0.20889800786972046, | |
| "learning_rate": 9.707940779475151e-06, | |
| "loss": 1.0988, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.6123367852318775, | |
| "grad_norm": 0.22214913368225098, | |
| "learning_rate": 9.693669288269371e-06, | |
| "loss": 1.1129, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6195407474110761, | |
| "grad_norm": 0.22072745859622955, | |
| "learning_rate": 9.679068379192455e-06, | |
| "loss": 1.1, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.6267447095902746, | |
| "grad_norm": 0.22884686291217804, | |
| "learning_rate": 9.664139076986473e-06, | |
| "loss": 1.1257, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.6339486717694732, | |
| "grad_norm": 0.20036116242408752, | |
| "learning_rate": 9.648882429441258e-06, | |
| "loss": 1.1299, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.6411526339486717, | |
| "grad_norm": 0.24411164224147797, | |
| "learning_rate": 9.633299507320862e-06, | |
| "loss": 1.1071, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.6483565961278703, | |
| "grad_norm": 0.2171693742275238, | |
| "learning_rate": 9.617391404288412e-06, | |
| "loss": 1.1328, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6555605583070689, | |
| "grad_norm": 0.2116158902645111, | |
| "learning_rate": 9.601159236829353e-06, | |
| "loss": 1.1227, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.6627645204862674, | |
| "grad_norm": 0.22783496975898743, | |
| "learning_rate": 9.584604144173084e-06, | |
| "loss": 1.0958, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.669968482665466, | |
| "grad_norm": 0.23521655797958374, | |
| "learning_rate": 9.567727288213005e-06, | |
| "loss": 1.1481, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6771724448446645, | |
| "grad_norm": 0.22170735895633698, | |
| "learning_rate": 9.550529853424979e-06, | |
| "loss": 1.1099, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.6843764070238632, | |
| "grad_norm": 0.23990625143051147, | |
| "learning_rate": 9.53301304678419e-06, | |
| "loss": 1.1115, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6915803692030617, | |
| "grad_norm": 0.22480110824108124, | |
| "learning_rate": 9.515178097680437e-06, | |
| "loss": 1.0993, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6987843313822603, | |
| "grad_norm": 0.2508992552757263, | |
| "learning_rate": 9.497026257831856e-06, | |
| "loss": 1.1251, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.7059882935614588, | |
| "grad_norm": 0.25253668427467346, | |
| "learning_rate": 9.478558801197065e-06, | |
| "loss": 1.1121, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.7131922557406574, | |
| "grad_norm": 0.23142355680465698, | |
| "learning_rate": 9.459777023885754e-06, | |
| "loss": 1.136, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.720396217919856, | |
| "grad_norm": 0.2402750551700592, | |
| "learning_rate": 9.440682244067724e-06, | |
| "loss": 1.1049, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7276001800990545, | |
| "grad_norm": 0.23145467042922974, | |
| "learning_rate": 9.421275801880363e-06, | |
| "loss": 1.096, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.7348041422782531, | |
| "grad_norm": 0.24229560792446136, | |
| "learning_rate": 9.401559059334601e-06, | |
| "loss": 1.1077, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.7420081044574516, | |
| "grad_norm": 0.2245631366968155, | |
| "learning_rate": 9.381533400219319e-06, | |
| "loss": 1.1078, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.7492120666366502, | |
| "grad_norm": 0.22122395038604736, | |
| "learning_rate": 9.361200230004219e-06, | |
| "loss": 1.1255, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.7564160288158487, | |
| "grad_norm": 0.2282179296016693, | |
| "learning_rate": 9.340560975741198e-06, | |
| "loss": 1.1343, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7636199909950473, | |
| "grad_norm": 0.23190978169441223, | |
| "learning_rate": 9.319617085964177e-06, | |
| "loss": 1.1082, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.7708239531742458, | |
| "grad_norm": 0.24386624991893768, | |
| "learning_rate": 9.298370030587456e-06, | |
| "loss": 1.1096, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.7780279153534444, | |
| "grad_norm": 0.22043921053409576, | |
| "learning_rate": 9.276821300802535e-06, | |
| "loss": 1.0869, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.785231877532643, | |
| "grad_norm": 0.2251676470041275, | |
| "learning_rate": 9.25497240897346e-06, | |
| "loss": 1.1036, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.7924358397118415, | |
| "grad_norm": 0.23501946032047272, | |
| "learning_rate": 9.232824888530689e-06, | |
| "loss": 1.1151, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7996398018910401, | |
| "grad_norm": 0.21661776304244995, | |
| "learning_rate": 9.210380293863462e-06, | |
| "loss": 1.1053, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.8068437640702386, | |
| "grad_norm": 0.22219465672969818, | |
| "learning_rate": 9.18764020021071e-06, | |
| "loss": 1.1176, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.8140477262494372, | |
| "grad_norm": 0.2121913731098175, | |
| "learning_rate": 9.164606203550498e-06, | |
| "loss": 1.1292, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.8212516884286357, | |
| "grad_norm": 0.21977250277996063, | |
| "learning_rate": 9.141279920488021e-06, | |
| "loss": 1.097, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.8284556506078343, | |
| "grad_norm": 0.2105371654033661, | |
| "learning_rate": 9.117662988142138e-06, | |
| "loss": 1.1161, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.8356596127870328, | |
| "grad_norm": 0.25086918473243713, | |
| "learning_rate": 9.093757064030473e-06, | |
| "loss": 1.1275, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.8428635749662314, | |
| "grad_norm": 0.20763848721981049, | |
| "learning_rate": 9.069563825953092e-06, | |
| "loss": 1.0862, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.85006753714543, | |
| "grad_norm": 0.24541738629341125, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 1.0928, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.8572714993246285, | |
| "grad_norm": 0.22416935861110687, | |
| "learning_rate": 9.020322219805674e-06, | |
| "loss": 1.1145, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.8644754615038271, | |
| "grad_norm": 0.2052609622478485, | |
| "learning_rate": 8.9952773076811e-06, | |
| "loss": 1.0615, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8716794236830256, | |
| "grad_norm": 0.23820039629936218, | |
| "learning_rate": 8.969951993239177e-06, | |
| "loss": 1.1167, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.8788833858622243, | |
| "grad_norm": 0.24450096487998962, | |
| "learning_rate": 8.944348053897672e-06, | |
| "loss": 1.1331, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.8860873480414228, | |
| "grad_norm": 0.24509143829345703, | |
| "learning_rate": 8.9184672866292e-06, | |
| "loss": 1.0708, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.8932913102206214, | |
| "grad_norm": 0.23418590426445007, | |
| "learning_rate": 8.892311507835118e-06, | |
| "loss": 1.094, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.9004952723998199, | |
| "grad_norm": 0.23996609449386597, | |
| "learning_rate": 8.865882553218036e-06, | |
| "loss": 1.1309, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.9076992345790185, | |
| "grad_norm": 0.23122315108776093, | |
| "learning_rate": 8.83918227765299e-06, | |
| "loss": 1.105, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.9149031967582171, | |
| "grad_norm": 0.22732949256896973, | |
| "learning_rate": 8.81221255505724e-06, | |
| "loss": 1.092, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.9221071589374156, | |
| "grad_norm": 0.26016128063201904, | |
| "learning_rate": 8.784975278258783e-06, | |
| "loss": 1.1058, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.9293111211166142, | |
| "grad_norm": 0.2204255759716034, | |
| "learning_rate": 8.757472358863481e-06, | |
| "loss": 1.1294, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.9365150832958127, | |
| "grad_norm": 0.2520386278629303, | |
| "learning_rate": 8.729705727120911e-06, | |
| "loss": 1.0975, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9437190454750113, | |
| "grad_norm": 0.24364745616912842, | |
| "learning_rate": 8.701677331788891e-06, | |
| "loss": 1.1135, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.9509230076542098, | |
| "grad_norm": 0.26485344767570496, | |
| "learning_rate": 8.673389139996708e-06, | |
| "loss": 1.0937, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.9581269698334084, | |
| "grad_norm": 0.23761332035064697, | |
| "learning_rate": 8.644843137107058e-06, | |
| "loss": 1.0834, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.9653309320126069, | |
| "grad_norm": 0.22497673332691193, | |
| "learning_rate": 8.616041326576711e-06, | |
| "loss": 1.1093, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.9725348941918055, | |
| "grad_norm": 0.23355747759342194, | |
| "learning_rate": 8.586985729815895e-06, | |
| "loss": 1.1207, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9797388563710041, | |
| "grad_norm": 0.2490537166595459, | |
| "learning_rate": 8.557678386046429e-06, | |
| "loss": 1.079, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.9869428185502026, | |
| "grad_norm": 0.26007816195487976, | |
| "learning_rate": 8.528121352158604e-06, | |
| "loss": 1.1101, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.9941467807294012, | |
| "grad_norm": 0.23331965506076813, | |
| "learning_rate": 8.498316702566828e-06, | |
| "loss": 1.1167, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.23331965506076813, | |
| "learning_rate": 8.468266529064025e-06, | |
| "loss": 1.0964, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.0072039621791986, | |
| "grad_norm": 0.33547741174697876, | |
| "learning_rate": 8.437972940674838e-06, | |
| "loss": 1.07, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0144079243583972, | |
| "grad_norm": 0.24112224578857422, | |
| "learning_rate": 8.4074380635076e-06, | |
| "loss": 1.0695, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.0216118865375956, | |
| "grad_norm": 0.27535709738731384, | |
| "learning_rate": 8.376664040605122e-06, | |
| "loss": 1.1001, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.0288158487167942, | |
| "grad_norm": 0.3114432394504547, | |
| "learning_rate": 8.345653031794292e-06, | |
| "loss": 1.0891, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.0360198108959928, | |
| "grad_norm": 0.24609720706939697, | |
| "learning_rate": 8.314407213534477e-06, | |
| "loss": 1.0843, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.0432237730751914, | |
| "grad_norm": 0.3140798807144165, | |
| "learning_rate": 8.282928778764783e-06, | |
| "loss": 1.0936, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0504277352543898, | |
| "grad_norm": 0.2414146065711975, | |
| "learning_rate": 8.251219936750145e-06, | |
| "loss": 1.0705, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.0576316974335884, | |
| "grad_norm": 0.30057480931282043, | |
| "learning_rate": 8.21928291292627e-06, | |
| "loss": 1.092, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.064835659612787, | |
| "grad_norm": 0.25547441840171814, | |
| "learning_rate": 8.18711994874345e-06, | |
| "loss": 1.0512, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.0720396217919856, | |
| "grad_norm": 0.26846885681152344, | |
| "learning_rate": 8.154733301509249e-06, | |
| "loss": 1.0865, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.0792435839711843, | |
| "grad_norm": 0.30581358075141907, | |
| "learning_rate": 8.12212524423008e-06, | |
| "loss": 1.0674, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0864475461503826, | |
| "grad_norm": 0.2594411373138428, | |
| "learning_rate": 8.089298065451673e-06, | |
| "loss": 1.0779, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.0936515083295812, | |
| "grad_norm": 0.27017942070961, | |
| "learning_rate": 8.05625406909846e-06, | |
| "loss": 1.0922, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.1008554705087799, | |
| "grad_norm": 0.2681860327720642, | |
| "learning_rate": 8.022995574311876e-06, | |
| "loss": 1.0632, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.1080594326879785, | |
| "grad_norm": 0.24879907071590424, | |
| "learning_rate": 7.989524915287595e-06, | |
| "loss": 1.0773, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.1152633948671768, | |
| "grad_norm": 0.28538674116134644, | |
| "learning_rate": 7.95584444111171e-06, | |
| "loss": 1.0853, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.1224673570463755, | |
| "grad_norm": 0.2812231779098511, | |
| "learning_rate": 7.921956515595861e-06, | |
| "loss": 1.083, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.129671319225574, | |
| "grad_norm": 0.26302483677864075, | |
| "learning_rate": 7.887863517111337e-06, | |
| "loss": 1.0749, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.1368752814047727, | |
| "grad_norm": 0.26634806394577026, | |
| "learning_rate": 7.85356783842216e-06, | |
| "loss": 1.093, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.1440792435839713, | |
| "grad_norm": 0.27896663546562195, | |
| "learning_rate": 7.819071886517134e-06, | |
| "loss": 1.0736, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.1512832057631697, | |
| "grad_norm": 0.2650260031223297, | |
| "learning_rate": 7.78437808244094e-06, | |
| "loss": 1.0683, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.1584871679423683, | |
| "grad_norm": 0.2808700203895569, | |
| "learning_rate": 7.7494888611242e-06, | |
| "loss": 1.0312, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.1656911301215669, | |
| "grad_norm": 0.2390362024307251, | |
| "learning_rate": 7.714406671212589e-06, | |
| "loss": 1.0757, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.1728950923007655, | |
| "grad_norm": 0.25637757778167725, | |
| "learning_rate": 7.679133974894984e-06, | |
| "loss": 1.0633, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.1800990544799639, | |
| "grad_norm": 0.2605026662349701, | |
| "learning_rate": 7.64367324773066e-06, | |
| "loss": 1.0942, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.1873030166591625, | |
| "grad_norm": 0.23692801594734192, | |
| "learning_rate": 7.6080269784755405e-06, | |
| "loss": 1.0863, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.194506978838361, | |
| "grad_norm": 0.2812054455280304, | |
| "learning_rate": 7.572197668907533e-06, | |
| "loss": 1.102, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.2017109410175597, | |
| "grad_norm": 0.25642886757850647, | |
| "learning_rate": 7.536187833650947e-06, | |
| "loss": 1.076, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.2089149031967583, | |
| "grad_norm": 0.2664526402950287, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.0877, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.2161188653759567, | |
| "grad_norm": 0.25459641218185425, | |
| "learning_rate": 7.463636707741458e-06, | |
| "loss": 1.0798, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.2233228275551553, | |
| "grad_norm": 0.2598586082458496, | |
| "learning_rate": 7.42710050897637e-06, | |
| "loss": 1.0774, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.230526789734354, | |
| "grad_norm": 0.26702508330345154, | |
| "learning_rate": 7.390393967940962e-06, | |
| "loss": 1.092, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.2377307519135525, | |
| "grad_norm": 0.2823182940483093, | |
| "learning_rate": 7.353519660826665e-06, | |
| "loss": 1.0816, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.244934714092751, | |
| "grad_norm": 0.23282551765441895, | |
| "learning_rate": 7.31648017559931e-06, | |
| "loss": 1.0691, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.2521386762719495, | |
| "grad_norm": 0.2649790346622467, | |
| "learning_rate": 7.279278111817502e-06, | |
| "loss": 1.033, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.2593426384511481, | |
| "grad_norm": 0.23375588655471802, | |
| "learning_rate": 7.241916080450163e-06, | |
| "loss": 1.0749, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.2665466006303467, | |
| "grad_norm": 0.2711394727230072, | |
| "learning_rate": 7.2043967036932935e-06, | |
| "loss": 1.0416, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.2737505628095454, | |
| "grad_norm": 0.24989663064479828, | |
| "learning_rate": 7.166722614785937e-06, | |
| "loss": 1.0743, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.2809545249887437, | |
| "grad_norm": 0.23100513219833374, | |
| "learning_rate": 7.128896457825364e-06, | |
| "loss": 1.0769, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.2881584871679423, | |
| "grad_norm": 0.26965799927711487, | |
| "learning_rate": 7.090920887581507e-06, | |
| "loss": 1.0739, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.295362449347141, | |
| "grad_norm": 0.2137940227985382, | |
| "learning_rate": 7.052798569310641e-06, | |
| "loss": 1.0872, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.3025664115263396, | |
| "grad_norm": 0.2575233280658722, | |
| "learning_rate": 7.014532178568314e-06, | |
| "loss": 1.0558, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.309770373705538, | |
| "grad_norm": 0.22105364501476288, | |
| "learning_rate": 6.976124401021583e-06, | |
| "loss": 1.0484, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.3169743358847366, | |
| "grad_norm": 0.2336052805185318, | |
| "learning_rate": 6.9375779322605154e-06, | |
| "loss": 1.0805, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.3241782980639352, | |
| "grad_norm": 0.23742294311523438, | |
| "learning_rate": 6.898895477609007e-06, | |
| "loss": 1.0862, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.3313822602431338, | |
| "grad_norm": 0.258781373500824, | |
| "learning_rate": 6.860079751934908e-06, | |
| "loss": 1.0739, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.3385862224223324, | |
| "grad_norm": 0.2347659021615982, | |
| "learning_rate": 6.821133479459492e-06, | |
| "loss": 1.0877, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.3457901846015308, | |
| "grad_norm": 0.24011020362377167, | |
| "learning_rate": 6.782059393566254e-06, | |
| "loss": 1.0676, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.3529941467807294, | |
| "grad_norm": 0.30535200238227844, | |
| "learning_rate": 6.7428602366090764e-06, | |
| "loss": 1.0809, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.360198108959928, | |
| "grad_norm": 0.23181499540805817, | |
| "learning_rate": 6.70353875971976e-06, | |
| "loss": 1.0809, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.3674020711391266, | |
| "grad_norm": 0.20072412490844727, | |
| "learning_rate": 6.664097722614934e-06, | |
| "loss": 1.0607, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.374606033318325, | |
| "grad_norm": 0.22480317950248718, | |
| "learning_rate": 6.624539893402383e-06, | |
| "loss": 1.0745, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.3818099954975236, | |
| "grad_norm": 0.22832217812538147, | |
| "learning_rate": 6.58486804838676e-06, | |
| "loss": 1.0856, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.3890139576767222, | |
| "grad_norm": 0.24673670530319214, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 1.0905, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.3962179198559208, | |
| "grad_norm": 0.24316424131393433, | |
| "learning_rate": 6.505193455979603e-06, | |
| "loss": 1.0795, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.4034218820351194, | |
| "grad_norm": 0.23344801366329193, | |
| "learning_rate": 6.465196300425287e-06, | |
| "loss": 1.0879, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.4106258442143178, | |
| "grad_norm": 0.2587188482284546, | |
| "learning_rate": 6.425096312349881e-06, | |
| "loss": 1.082, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.4178298063935164, | |
| "grad_norm": 0.23247972130775452, | |
| "learning_rate": 6.384896306108612e-06, | |
| "loss": 1.0677, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.425033768572715, | |
| "grad_norm": 0.2605839669704437, | |
| "learning_rate": 6.344599103076329e-06, | |
| "loss": 1.0615, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.4322377307519136, | |
| "grad_norm": 0.22571925818920135, | |
| "learning_rate": 6.304207531449486e-06, | |
| "loss": 1.0821, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.439441692931112, | |
| "grad_norm": 0.27554214000701904, | |
| "learning_rate": 6.2637244260476474e-06, | |
| "loss": 1.0659, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.4466456551103106, | |
| "grad_norm": 0.24706511199474335, | |
| "learning_rate": 6.223152628114537e-06, | |
| "loss": 1.0664, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.4538496172895092, | |
| "grad_norm": 0.24783776700496674, | |
| "learning_rate": 6.182494985118625e-06, | |
| "loss": 1.0548, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.4610535794687078, | |
| "grad_norm": 0.24173712730407715, | |
| "learning_rate": 6.141754350553279e-06, | |
| "loss": 1.0647, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.4682575416479065, | |
| "grad_norm": 0.2548038363456726, | |
| "learning_rate": 6.100933583736508e-06, | |
| "loss": 1.0656, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.4754615038271048, | |
| "grad_norm": 0.23962704837322235, | |
| "learning_rate": 6.060035549610275e-06, | |
| "loss": 1.0756, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.4826654660063034, | |
| "grad_norm": 0.23094172775745392, | |
| "learning_rate": 6.019063118539425e-06, | |
| "loss": 1.063, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.489869428185502, | |
| "grad_norm": 0.24982015788555145, | |
| "learning_rate": 5.978019166110242e-06, | |
| "loss": 1.0805, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.4970733903647007, | |
| "grad_norm": 0.23258346319198608, | |
| "learning_rate": 5.936906572928625e-06, | |
| "loss": 1.0595, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.504277352543899, | |
| "grad_norm": 0.23061244189739227, | |
| "learning_rate": 5.8957282244179125e-06, | |
| "loss": 1.082, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.5114813147230977, | |
| "grad_norm": 0.23120225965976715, | |
| "learning_rate": 5.854487010616384e-06, | |
| "loss": 1.0831, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.5186852769022963, | |
| "grad_norm": 0.24363379180431366, | |
| "learning_rate": 5.813185825974419e-06, | |
| "loss": 1.1031, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.5258892390814949, | |
| "grad_norm": 0.246430441737175, | |
| "learning_rate": 5.771827569151357e-06, | |
| "loss": 1.0902, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.5330932012606935, | |
| "grad_norm": 0.24487066268920898, | |
| "learning_rate": 5.730415142812059e-06, | |
| "loss": 1.0521, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.540297163439892, | |
| "grad_norm": 0.2253061830997467, | |
| "learning_rate": 5.68895145342319e-06, | |
| "loss": 1.0843, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.5475011256190905, | |
| "grad_norm": 0.22573482990264893, | |
| "learning_rate": 5.647439411049235e-06, | |
| "loss": 1.068, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.554705087798289, | |
| "grad_norm": 0.24459558725357056, | |
| "learning_rate": 5.605881929148254e-06, | |
| "loss": 1.0707, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.5619090499774875, | |
| "grad_norm": 0.2182885855436325, | |
| "learning_rate": 5.5642819243674085e-06, | |
| "loss": 1.0446, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.569113012156686, | |
| "grad_norm": 0.21815907955169678, | |
| "learning_rate": 5.522642316338268e-06, | |
| "loss": 1.0404, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.5763169743358847, | |
| "grad_norm": 0.22507762908935547, | |
| "learning_rate": 5.480966027471889e-06, | |
| "loss": 1.0527, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.5835209365150833, | |
| "grad_norm": 0.2513904869556427, | |
| "learning_rate": 5.439255982753717e-06, | |
| "loss": 1.0503, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.590724898694282, | |
| "grad_norm": 0.20894253253936768, | |
| "learning_rate": 5.3975151095383e-06, | |
| "loss": 1.0765, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.5979288608734805, | |
| "grad_norm": 0.2522652745246887, | |
| "learning_rate": 5.355746337343835e-06, | |
| "loss": 1.0855, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.6051328230526791, | |
| "grad_norm": 0.2368040829896927, | |
| "learning_rate": 5.3139525976465675e-06, | |
| "loss": 1.0654, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.6123367852318775, | |
| "grad_norm": 0.2553313672542572, | |
| "learning_rate": 5.272136823675046e-06, | |
| "loss": 1.073, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.6195407474110761, | |
| "grad_norm": 0.23547260463237762, | |
| "learning_rate": 5.230301950204261e-06, | |
| "loss": 1.0681, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.6267447095902745, | |
| "grad_norm": 0.2587689757347107, | |
| "learning_rate": 5.188450913349674e-06, | |
| "loss": 1.0603, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.6339486717694731, | |
| "grad_norm": 0.2566240429878235, | |
| "learning_rate": 5.146586650361143e-06, | |
| "loss": 1.0576, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.6411526339486717, | |
| "grad_norm": 0.23041875660419464, | |
| "learning_rate": 5.1047120994167855e-06, | |
| "loss": 1.0694, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.6483565961278703, | |
| "grad_norm": 0.24431072175502777, | |
| "learning_rate": 5.062830199416764e-06, | |
| "loss": 1.0616, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.655560558307069, | |
| "grad_norm": 0.21524447202682495, | |
| "learning_rate": 5.0209438897770205e-06, | |
| "loss": 1.065, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.6627645204862675, | |
| "grad_norm": 0.24770976603031158, | |
| "learning_rate": 4.979056110222982e-06, | |
| "loss": 1.0548, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.6699684826654662, | |
| "grad_norm": 0.2506217062473297, | |
| "learning_rate": 4.937169800583237e-06, | |
| "loss": 1.0779, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.6771724448446645, | |
| "grad_norm": 0.246966153383255, | |
| "learning_rate": 4.895287900583216e-06, | |
| "loss": 1.0487, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.6843764070238632, | |
| "grad_norm": 0.23613645136356354, | |
| "learning_rate": 4.853413349638859e-06, | |
| "loss": 1.0649, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.6915803692030615, | |
| "grad_norm": 0.24523551762104034, | |
| "learning_rate": 4.811549086650327e-06, | |
| "loss": 1.0652, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.6987843313822601, | |
| "grad_norm": 0.22116148471832275, | |
| "learning_rate": 4.769698049795739e-06, | |
| "loss": 1.0589, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.7059882935614588, | |
| "grad_norm": 0.2507432997226715, | |
| "learning_rate": 4.727863176324955e-06, | |
| "loss": 1.0257, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.7131922557406574, | |
| "grad_norm": 0.2185191810131073, | |
| "learning_rate": 4.686047402353433e-06, | |
| "loss": 1.0596, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.720396217919856, | |
| "grad_norm": 0.2304830700159073, | |
| "learning_rate": 4.644253662656167e-06, | |
| "loss": 1.0531, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.7276001800990546, | |
| "grad_norm": 0.25681596994400024, | |
| "learning_rate": 4.602484890461702e-06, | |
| "loss": 1.0719, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.7348041422782532, | |
| "grad_norm": 0.23360076546669006, | |
| "learning_rate": 4.560744017246284e-06, | |
| "loss": 1.0593, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.7420081044574516, | |
| "grad_norm": 0.23592416942119598, | |
| "learning_rate": 4.519033972528114e-06, | |
| "loss": 1.0583, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.7492120666366502, | |
| "grad_norm": 0.23926499485969543, | |
| "learning_rate": 4.477357683661734e-06, | |
| "loss": 1.0778, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.7564160288158486, | |
| "grad_norm": 0.23426315188407898, | |
| "learning_rate": 4.4357180756325915e-06, | |
| "loss": 1.0469, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.7636199909950472, | |
| "grad_norm": 0.21399535238742828, | |
| "learning_rate": 4.394118070851749e-06, | |
| "loss": 1.036, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.7708239531742458, | |
| "grad_norm": 0.25305458903312683, | |
| "learning_rate": 4.352560588950766e-06, | |
| "loss": 1.0811, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.7780279153534444, | |
| "grad_norm": 0.23678237199783325, | |
| "learning_rate": 4.31104854657681e-06, | |
| "loss": 1.0563, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.785231877532643, | |
| "grad_norm": 0.22029711306095123, | |
| "learning_rate": 4.269584857187942e-06, | |
| "loss": 1.0634, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.7924358397118416, | |
| "grad_norm": 0.21898190677165985, | |
| "learning_rate": 4.228172430848645e-06, | |
| "loss": 1.0609, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.7996398018910402, | |
| "grad_norm": 0.22045612335205078, | |
| "learning_rate": 4.186814174025582e-06, | |
| "loss": 1.0483, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.8068437640702386, | |
| "grad_norm": 0.2106829732656479, | |
| "learning_rate": 4.145512989383618e-06, | |
| "loss": 1.0443, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.8140477262494372, | |
| "grad_norm": 0.2186656892299652, | |
| "learning_rate": 4.104271775582089e-06, | |
| "loss": 1.0346, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.8212516884286356, | |
| "grad_norm": 0.23272953927516937, | |
| "learning_rate": 4.063093427071376e-06, | |
| "loss": 1.0576, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.8284556506078342, | |
| "grad_norm": 0.23906800150871277, | |
| "learning_rate": 4.02198083388976e-06, | |
| "loss": 1.0428, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.8356596127870328, | |
| "grad_norm": 0.22326606512069702, | |
| "learning_rate": 3.980936881460576e-06, | |
| "loss": 1.0751, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.8428635749662314, | |
| "grad_norm": 0.2465428113937378, | |
| "learning_rate": 3.939964450389728e-06, | |
| "loss": 1.064, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.85006753714543, | |
| "grad_norm": 0.25662410259246826, | |
| "learning_rate": 3.899066416263493e-06, | |
| "loss": 1.0593, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.8572714993246286, | |
| "grad_norm": 0.22539134323596954, | |
| "learning_rate": 3.8582456494467214e-06, | |
| "loss": 1.0588, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.8644754615038273, | |
| "grad_norm": 0.2232930064201355, | |
| "learning_rate": 3.817505014881378e-06, | |
| "loss": 1.0399, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.8716794236830256, | |
| "grad_norm": 0.24547068774700165, | |
| "learning_rate": 3.776847371885464e-06, | |
| "loss": 1.0477, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.8788833858622243, | |
| "grad_norm": 0.2386842668056488, | |
| "learning_rate": 3.736275573952354e-06, | |
| "loss": 1.0538, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.8860873480414226, | |
| "grad_norm": 0.23360906541347504, | |
| "learning_rate": 3.695792468550517e-06, | |
| "loss": 1.0455, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.8932913102206212, | |
| "grad_norm": 0.22610723972320557, | |
| "learning_rate": 3.655400896923672e-06, | |
| "loss": 1.0779, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.9004952723998199, | |
| "grad_norm": 0.2285996526479721, | |
| "learning_rate": 3.6151036938913887e-06, | |
| "loss": 1.0672, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.9076992345790185, | |
| "grad_norm": 0.2220553308725357, | |
| "learning_rate": 3.5749036876501196e-06, | |
| "loss": 1.0876, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.914903196758217, | |
| "grad_norm": 0.2081567645072937, | |
| "learning_rate": 3.5348036995747135e-06, | |
| "loss": 1.0844, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.9221071589374157, | |
| "grad_norm": 0.21750611066818237, | |
| "learning_rate": 3.4948065440203982e-06, | |
| "loss": 1.0582, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.9293111211166143, | |
| "grad_norm": 0.20807136595249176, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 1.0738, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.9365150832958127, | |
| "grad_norm": 0.21424974501132965, | |
| "learning_rate": 3.4151319516132414e-06, | |
| "loss": 1.0293, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.9437190454750113, | |
| "grad_norm": 0.2395254671573639, | |
| "learning_rate": 3.375460106597619e-06, | |
| "loss": 1.0624, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.9509230076542097, | |
| "grad_norm": 0.23250122368335724, | |
| "learning_rate": 3.3359022773850673e-06, | |
| "loss": 1.0406, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.9581269698334083, | |
| "grad_norm": 0.20610974729061127, | |
| "learning_rate": 3.2964612402802422e-06, | |
| "loss": 1.0673, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.965330932012607, | |
| "grad_norm": 0.2716231346130371, | |
| "learning_rate": 3.2571397633909252e-06, | |
| "loss": 1.0312, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.9725348941918055, | |
| "grad_norm": 0.23458805680274963, | |
| "learning_rate": 3.217940606433747e-06, | |
| "loss": 1.0442, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.979738856371004, | |
| "grad_norm": 0.21636377274990082, | |
| "learning_rate": 3.178866520540509e-06, | |
| "loss": 1.0448, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.9869428185502027, | |
| "grad_norm": 0.2635723054409027, | |
| "learning_rate": 3.139920248065095e-06, | |
| "loss": 1.0657, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.9941467807294013, | |
| "grad_norm": 0.20790298283100128, | |
| "learning_rate": 3.1011045223909954e-06, | |
| "loss": 1.0635, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.22311536967754364, | |
| "learning_rate": 3.0624220677394854e-06, | |
| "loss": 1.0679, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.0072039621791986, | |
| "grad_norm": 0.2736155092716217, | |
| "learning_rate": 3.023875598978419e-06, | |
| "loss": 1.0501, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 2.014407924358397, | |
| "grad_norm": 0.22263078391551971, | |
| "learning_rate": 2.9854678214316875e-06, | |
| "loss": 1.0339, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.021611886537596, | |
| "grad_norm": 0.21312211453914642, | |
| "learning_rate": 2.9472014306893605e-06, | |
| "loss": 1.0475, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 2.0288158487167944, | |
| "grad_norm": 0.2075333446264267, | |
| "learning_rate": 2.9090791124184934e-06, | |
| "loss": 1.0658, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.0360198108959926, | |
| "grad_norm": 0.21481953561306, | |
| "learning_rate": 2.871103542174637e-06, | |
| "loss": 1.0638, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 2.043223773075191, | |
| "grad_norm": 0.21095344424247742, | |
| "learning_rate": 2.8332773852140644e-06, | |
| "loss": 1.0372, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.05042773525439, | |
| "grad_norm": 0.23806796967983246, | |
| "learning_rate": 2.795603296306708e-06, | |
| "loss": 1.0547, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.0576316974335884, | |
| "grad_norm": 0.22928300499916077, | |
| "learning_rate": 2.7580839195498397e-06, | |
| "loss": 1.044, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.064835659612787, | |
| "grad_norm": 0.20792540907859802, | |
| "learning_rate": 2.7207218881825016e-06, | |
| "loss": 1.0486, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 2.0720396217919856, | |
| "grad_norm": 0.21220359206199646, | |
| "learning_rate": 2.683519824400693e-06, | |
| "loss": 1.0735, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.0792435839711843, | |
| "grad_norm": 0.21435540914535522, | |
| "learning_rate": 2.646480339173337e-06, | |
| "loss": 1.0422, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 2.086447546150383, | |
| "grad_norm": 0.20643557608127594, | |
| "learning_rate": 2.6096060320590393e-06, | |
| "loss": 1.0268, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.0936515083295815, | |
| "grad_norm": 0.2074732780456543, | |
| "learning_rate": 2.5728994910236304e-06, | |
| "loss": 1.0434, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.1008554705087796, | |
| "grad_norm": 0.22091282904148102, | |
| "learning_rate": 2.536363292258543e-06, | |
| "loss": 1.048, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.1080594326879782, | |
| "grad_norm": 0.2222498208284378, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 1.0387, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 2.115263394867177, | |
| "grad_norm": 0.2226988971233368, | |
| "learning_rate": 2.4638121663490546e-06, | |
| "loss": 1.0144, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.1224673570463755, | |
| "grad_norm": 0.20479106903076172, | |
| "learning_rate": 2.4278023310924676e-06, | |
| "loss": 1.0411, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.129671319225574, | |
| "grad_norm": 0.20795980095863342, | |
| "learning_rate": 2.391973021524461e-06, | |
| "loss": 1.0469, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.1368752814047727, | |
| "grad_norm": 0.1892288625240326, | |
| "learning_rate": 2.356326752269342e-06, | |
| "loss": 1.0543, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.1440792435839713, | |
| "grad_norm": 0.21468913555145264, | |
| "learning_rate": 2.320866025105016e-06, | |
| "loss": 1.0356, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.15128320576317, | |
| "grad_norm": 0.19955170154571533, | |
| "learning_rate": 2.285593328787414e-06, | |
| "loss": 1.0236, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.1584871679423685, | |
| "grad_norm": 0.2040010690689087, | |
| "learning_rate": 2.250511138875801e-06, | |
| "loss": 1.0398, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.1656911301215667, | |
| "grad_norm": 0.2120560258626938, | |
| "learning_rate": 2.2156219175590623e-06, | |
| "loss": 1.05, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.1728950923007653, | |
| "grad_norm": 0.21219217777252197, | |
| "learning_rate": 2.1809281134828663e-06, | |
| "loss": 1.0505, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.180099054479964, | |
| "grad_norm": 0.22002087533473969, | |
| "learning_rate": 2.146432161577842e-06, | |
| "loss": 1.0316, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.1873030166591625, | |
| "grad_norm": 0.2064754068851471, | |
| "learning_rate": 2.112136482888663e-06, | |
| "loss": 1.0318, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.194506978838361, | |
| "grad_norm": 0.21059390902519226, | |
| "learning_rate": 2.07804348440414e-06, | |
| "loss": 1.046, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.2017109410175597, | |
| "grad_norm": 0.2134746015071869, | |
| "learning_rate": 2.04415555888829e-06, | |
| "loss": 1.0578, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.2089149031967583, | |
| "grad_norm": 0.19525548815727234, | |
| "learning_rate": 2.0104750847124075e-06, | |
| "loss": 1.0484, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.216118865375957, | |
| "grad_norm": 0.19859297573566437, | |
| "learning_rate": 1.977004425688126e-06, | |
| "loss": 1.0266, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.2233228275551555, | |
| "grad_norm": 0.2262914627790451, | |
| "learning_rate": 1.9437459309015426e-06, | |
| "loss": 1.0691, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.2305267897343537, | |
| "grad_norm": 0.2108326107263565, | |
| "learning_rate": 1.910701934548329e-06, | |
| "loss": 1.0429, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.2377307519135523, | |
| "grad_norm": 0.21613864600658417, | |
| "learning_rate": 1.8778747557699223e-06, | |
| "loss": 1.0604, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.244934714092751, | |
| "grad_norm": 0.19557908177375793, | |
| "learning_rate": 1.8452666984907519e-06, | |
| "loss": 1.0558, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.2521386762719495, | |
| "grad_norm": 0.19418083131313324, | |
| "learning_rate": 1.8128800512565514e-06, | |
| "loss": 1.0507, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.259342638451148, | |
| "grad_norm": 0.1997075378894806, | |
| "learning_rate": 1.7807170870737317e-06, | |
| "loss": 1.0338, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.2665466006303467, | |
| "grad_norm": 0.20641541481018066, | |
| "learning_rate": 1.7487800632498547e-06, | |
| "loss": 1.0297, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.2737505628095454, | |
| "grad_norm": 0.18877221643924713, | |
| "learning_rate": 1.7170712212352187e-06, | |
| "loss": 1.0564, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.280954524988744, | |
| "grad_norm": 0.20154546201229095, | |
| "learning_rate": 1.6855927864655241e-06, | |
| "loss": 1.0268, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.2881584871679426, | |
| "grad_norm": 0.2065822035074234, | |
| "learning_rate": 1.6543469682057105e-06, | |
| "loss": 1.06, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.2953624493471407, | |
| "grad_norm": 0.19270487129688263, | |
| "learning_rate": 1.6233359593948777e-06, | |
| "loss": 1.0753, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.3025664115263393, | |
| "grad_norm": 0.19982437789440155, | |
| "learning_rate": 1.5925619364924016e-06, | |
| "loss": 1.0346, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.309770373705538, | |
| "grad_norm": 0.20062725245952606, | |
| "learning_rate": 1.5620270593251635e-06, | |
| "loss": 1.0228, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.3169743358847366, | |
| "grad_norm": 0.18592402338981628, | |
| "learning_rate": 1.531733470935976e-06, | |
| "loss": 1.0677, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.324178298063935, | |
| "grad_norm": 0.20376469194889069, | |
| "learning_rate": 1.5016832974331725e-06, | |
| "loss": 1.0468, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.3313822602431338, | |
| "grad_norm": 0.20351487398147583, | |
| "learning_rate": 1.4718786478413983e-06, | |
| "loss": 1.0707, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.3385862224223324, | |
| "grad_norm": 0.21062229573726654, | |
| "learning_rate": 1.4423216139535735e-06, | |
| "loss": 1.0609, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.345790184601531, | |
| "grad_norm": 0.20279563963413239, | |
| "learning_rate": 1.4130142701841076e-06, | |
| "loss": 1.0263, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.3529941467807296, | |
| "grad_norm": 0.1935521960258484, | |
| "learning_rate": 1.3839586734232907e-06, | |
| "loss": 1.0262, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.3601981089599278, | |
| "grad_norm": 0.2124936282634735, | |
| "learning_rate": 1.3551568628929434e-06, | |
| "loss": 1.0125, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.3674020711391264, | |
| "grad_norm": 0.19464442133903503, | |
| "learning_rate": 1.3266108600032928e-06, | |
| "loss": 1.0478, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.374606033318325, | |
| "grad_norm": 0.19089345633983612, | |
| "learning_rate": 1.2983226682111094e-06, | |
| "loss": 1.0116, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.3818099954975236, | |
| "grad_norm": 0.19948884844779968, | |
| "learning_rate": 1.2702942728790897e-06, | |
| "loss": 1.0635, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.389013957676722, | |
| "grad_norm": 0.19687066972255707, | |
| "learning_rate": 1.24252764113652e-06, | |
| "loss": 1.0264, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.396217919855921, | |
| "grad_norm": 0.2047269642353058, | |
| "learning_rate": 1.2150247217412186e-06, | |
| "loss": 1.0241, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.4034218820351194, | |
| "grad_norm": 0.19504639506340027, | |
| "learning_rate": 1.18778744494276e-06, | |
| "loss": 1.0271, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.410625844214318, | |
| "grad_norm": 0.18415555357933044, | |
| "learning_rate": 1.160817722347014e-06, | |
| "loss": 1.0322, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.4178298063935166, | |
| "grad_norm": 0.18974873423576355, | |
| "learning_rate": 1.1341174467819637e-06, | |
| "loss": 1.0191, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.425033768572715, | |
| "grad_norm": 0.18910925090312958, | |
| "learning_rate": 1.1076884921648834e-06, | |
| "loss": 1.0632, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.4322377307519134, | |
| "grad_norm": 0.180083766579628, | |
| "learning_rate": 1.0815327133708015e-06, | |
| "loss": 1.0412, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.439441692931112, | |
| "grad_norm": 0.18438957631587982, | |
| "learning_rate": 1.0556519461023301e-06, | |
| "loss": 1.0345, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.4466456551103106, | |
| "grad_norm": 0.1945696324110031, | |
| "learning_rate": 1.0300480067608232e-06, | |
| "loss": 1.0305, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.4538496172895092, | |
| "grad_norm": 0.20218323171138763, | |
| "learning_rate": 1.0047226923189024e-06, | |
| "loss": 1.0532, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.461053579468708, | |
| "grad_norm": 0.18660913407802582, | |
| "learning_rate": 9.79677780194327e-07, | |
| "loss": 1.0248, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.4682575416479065, | |
| "grad_norm": 0.20080283284187317, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 1.0266, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.475461503827105, | |
| "grad_norm": 0.17952297627925873, | |
| "learning_rate": 9.304361740469103e-07, | |
| "loss": 1.0285, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.4826654660063037, | |
| "grad_norm": 0.19411954283714294, | |
| "learning_rate": 9.06242935969528e-07, | |
| "loss": 1.0362, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.489869428185502, | |
| "grad_norm": 0.19213198125362396, | |
| "learning_rate": 8.823370118578628e-07, | |
| "loss": 1.0304, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.4970733903647004, | |
| "grad_norm": 0.1971171349287033, | |
| "learning_rate": 8.587200795119793e-07, | |
| "loss": 1.0621, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.504277352543899, | |
| "grad_norm": 0.2063921093940735, | |
| "learning_rate": 8.353937964495029e-07, | |
| "loss": 1.0198, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.5114813147230977, | |
| "grad_norm": 0.18766029179096222, | |
| "learning_rate": 8.123597997892918e-07, | |
| "loss": 1.0231, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.5186852769022963, | |
| "grad_norm": 0.17498192191123962, | |
| "learning_rate": 7.89619706136539e-07, | |
| "loss": 1.0438, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.525889239081495, | |
| "grad_norm": 0.18453362584114075, | |
| "learning_rate": 7.671751114693104e-07, | |
| "loss": 1.0313, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.5330932012606935, | |
| "grad_norm": 0.17940948903560638, | |
| "learning_rate": 7.450275910265415e-07, | |
| "loss": 1.0169, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.540297163439892, | |
| "grad_norm": 0.1889266073703766, | |
| "learning_rate": 7.23178699197467e-07, | |
| "loss": 1.0317, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.5475011256190907, | |
| "grad_norm": 0.17744366824626923, | |
| "learning_rate": 7.01629969412545e-07, | |
| "loss": 1.0466, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.554705087798289, | |
| "grad_norm": 0.1801387220621109, | |
| "learning_rate": 6.803829140358237e-07, | |
| "loss": 1.0414, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.5619090499774875, | |
| "grad_norm": 0.19586600363254547, | |
| "learning_rate": 6.594390242588044e-07, | |
| "loss": 1.0464, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.569113012156686, | |
| "grad_norm": 0.19039765000343323, | |
| "learning_rate": 6.387997699957815e-07, | |
| "loss": 1.0275, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.5763169743358847, | |
| "grad_norm": 0.18481585383415222, | |
| "learning_rate": 6.184665997806832e-07, | |
| "loss": 1.0298, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.5835209365150833, | |
| "grad_norm": 0.19624339044094086, | |
| "learning_rate": 5.98440940665399e-07, | |
| "loss": 1.0538, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.590724898694282, | |
| "grad_norm": 0.18870003521442413, | |
| "learning_rate": 5.787241981196384e-07, | |
| "loss": 1.0454, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.5979288608734805, | |
| "grad_norm": 0.1834286004304886, | |
| "learning_rate": 5.593177559322776e-07, | |
| "loss": 0.9966, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.605132823052679, | |
| "grad_norm": 0.19677117466926575, | |
| "learning_rate": 5.402229761142464e-07, | |
| "loss": 1.0752, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.6123367852318777, | |
| "grad_norm": 0.18339574337005615, | |
| "learning_rate": 5.214411988029355e-07, | |
| "loss": 1.0525, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.619540747411076, | |
| "grad_norm": 0.17459526658058167, | |
| "learning_rate": 5.029737421681446e-07, | |
| "loss": 1.0209, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.6267447095902745, | |
| "grad_norm": 0.18108704686164856, | |
| "learning_rate": 4.848219023195644e-07, | |
| "loss": 1.053, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.633948671769473, | |
| "grad_norm": 0.19006100296974182, | |
| "learning_rate": 4.6698695321581165e-07, | |
| "loss": 1.0343, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.6411526339486717, | |
| "grad_norm": 0.19221577048301697, | |
| "learning_rate": 4.494701465750217e-07, | |
| "loss": 1.0217, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.6483565961278703, | |
| "grad_norm": 0.1743839979171753, | |
| "learning_rate": 4.322727117869951e-07, | |
| "loss": 1.0542, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.655560558307069, | |
| "grad_norm": 0.19367291033267975, | |
| "learning_rate": 4.153958558269189e-07, | |
| "loss": 1.0319, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.6627645204862675, | |
| "grad_norm": 0.1787528395652771, | |
| "learning_rate": 3.9884076317064813e-07, | |
| "loss": 1.0489, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.669968482665466, | |
| "grad_norm": 0.1872800886631012, | |
| "learning_rate": 3.8260859571158883e-07, | |
| "loss": 1.0282, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.6771724448446648, | |
| "grad_norm": 0.188198521733284, | |
| "learning_rate": 3.6670049267913954e-07, | |
| "loss": 1.0347, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.684376407023863, | |
| "grad_norm": 0.17867067456245422, | |
| "learning_rate": 3.511175705587433e-07, | |
| "loss": 1.0329, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.6915803692030615, | |
| "grad_norm": 0.1834060102701187, | |
| "learning_rate": 3.358609230135268e-07, | |
| "loss": 1.0427, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.69878433138226, | |
| "grad_norm": 0.18759381771087646, | |
| "learning_rate": 3.2093162080754634e-07, | |
| "loss": 1.0221, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.7059882935614588, | |
| "grad_norm": 0.1826649308204651, | |
| "learning_rate": 3.0633071173062966e-07, | |
| "loss": 1.0362, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.7131922557406574, | |
| "grad_norm": 0.1856626272201538, | |
| "learning_rate": 2.920592205248496e-07, | |
| "loss": 1.0376, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.720396217919856, | |
| "grad_norm": 0.1898716390132904, | |
| "learning_rate": 2.7811814881259503e-07, | |
| "loss": 1.0424, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.7276001800990546, | |
| "grad_norm": 0.18418952822685242, | |
| "learning_rate": 2.6450847502627883e-07, | |
| "loss": 1.049, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.734804142278253, | |
| "grad_norm": 0.17716997861862183, | |
| "learning_rate": 2.5123115433966615e-07, | |
| "loss": 1.055, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.742008104457452, | |
| "grad_norm": 0.17998386919498444, | |
| "learning_rate": 2.3828711860083676e-07, | |
| "loss": 1.0457, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.74921206663665, | |
| "grad_norm": 0.17547675967216492, | |
| "learning_rate": 2.2567727626678527e-07, | |
| "loss": 1.0483, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.7564160288158486, | |
| "grad_norm": 0.17643079161643982, | |
| "learning_rate": 2.134025123396638e-07, | |
| "loss": 1.0344, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.763619990995047, | |
| "grad_norm": 0.16483494639396667, | |
| "learning_rate": 2.0146368830466668e-07, | |
| "loss": 1.0752, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.770823953174246, | |
| "grad_norm": 0.17367129027843475, | |
| "learning_rate": 1.8986164206957037e-07, | |
| "loss": 1.0487, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.7780279153534444, | |
| "grad_norm": 0.17891699075698853, | |
| "learning_rate": 1.785971879059273e-07, | |
| "loss": 1.0087, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.785231877532643, | |
| "grad_norm": 0.19728310406208038, | |
| "learning_rate": 1.6767111639191202e-07, | |
| "loss": 1.0226, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.7924358397118416, | |
| "grad_norm": 0.17195719480514526, | |
| "learning_rate": 1.5708419435684463e-07, | |
| "loss": 1.0392, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.7996398018910402, | |
| "grad_norm": 0.18537551164627075, | |
| "learning_rate": 1.4683716482736364e-07, | |
| "loss": 1.038, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.806843764070239, | |
| "grad_norm": 0.191681370139122, | |
| "learning_rate": 1.3693074697528231e-07, | |
| "loss": 1.0431, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.814047726249437, | |
| "grad_norm": 0.16845721006393433, | |
| "learning_rate": 1.2736563606711384e-07, | |
| "loss": 1.02, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.8212516884286356, | |
| "grad_norm": 0.176877960562706, | |
| "learning_rate": 1.1814250341527611e-07, | |
| "loss": 1.0547, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.828455650607834, | |
| "grad_norm": 0.1708219051361084, | |
| "learning_rate": 1.0926199633097156e-07, | |
| "loss": 1.0143, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.835659612787033, | |
| "grad_norm": 0.17549683153629303, | |
| "learning_rate": 1.007247380787657e-07, | |
| "loss": 1.0222, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.8428635749662314, | |
| "grad_norm": 0.18442362546920776, | |
| "learning_rate": 9.253132783283548e-08, | |
| "loss": 1.0487, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.85006753714543, | |
| "grad_norm": 0.17564083635807037, | |
| "learning_rate": 8.468234063492287e-08, | |
| "loss": 1.0169, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.8572714993246286, | |
| "grad_norm": 0.17153891921043396, | |
| "learning_rate": 7.717832735397335e-08, | |
| "loss": 1.0532, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.8644754615038273, | |
| "grad_norm": 0.1801021248102188, | |
| "learning_rate": 7.001981464747565e-08, | |
| "loss": 1.0294, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.871679423683026, | |
| "grad_norm": 0.18023864924907684, | |
| "learning_rate": 6.3207304924498e-08, | |
| "loss": 1.0518, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.878883385862224, | |
| "grad_norm": 0.17345793545246124, | |
| "learning_rate": 5.674127631043025e-08, | |
| "loss": 1.0301, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.8860873480414226, | |
| "grad_norm": 0.18773488700389862, | |
| "learning_rate": 5.062218261342122e-08, | |
| "loss": 1.0307, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.8932913102206212, | |
| "grad_norm": 0.1761646866798401, | |
| "learning_rate": 4.485045329253646e-08, | |
| "loss": 1.0221, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.90049527239982, | |
| "grad_norm": 0.1819511353969574, | |
| "learning_rate": 3.9426493427611177e-08, | |
| "loss": 1.0565, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.9076992345790185, | |
| "grad_norm": 0.17573249340057373, | |
| "learning_rate": 3.435068369082306e-08, | |
| "loss": 1.0432, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.914903196758217, | |
| "grad_norm": 0.1772117167711258, | |
| "learning_rate": 2.9623380319976912e-08, | |
| "loss": 1.039, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.9221071589374157, | |
| "grad_norm": 0.17637501657009125, | |
| "learning_rate": 2.5244915093499134e-08, | |
| "loss": 1.0398, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.9293111211166143, | |
| "grad_norm": 0.16873933374881744, | |
| "learning_rate": 2.1215595307154667e-08, | |
| "loss": 1.0196, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.936515083295813, | |
| "grad_norm": 0.1777278184890747, | |
| "learning_rate": 1.753570375247815e-08, | |
| "loss": 1.0373, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.943719045475011, | |
| "grad_norm": 0.1736447662115097, | |
| "learning_rate": 1.4205498696930332e-08, | |
| "loss": 1.0593, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.9509230076542097, | |
| "grad_norm": 0.17402058839797974, | |
| "learning_rate": 1.1225213865767026e-08, | |
| "loss": 1.0174, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.9581269698334083, | |
| "grad_norm": 0.16966642439365387, | |
| "learning_rate": 8.595058425640012e-09, | |
| "loss": 1.0114, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.965330932012607, | |
| "grad_norm": 0.17315024137496948, | |
| "learning_rate": 6.315216969912663e-09, | |
| "loss": 1.0558, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.9725348941918055, | |
| "grad_norm": 0.1718152016401291, | |
| "learning_rate": 4.385849505708084e-09, | |
| "loss": 1.058, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.979738856371004, | |
| "grad_norm": 0.1746918112039566, | |
| "learning_rate": 2.8070914426786555e-09, | |
| "loss": 1.0505, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.9869428185502027, | |
| "grad_norm": 0.18175256252288818, | |
| "learning_rate": 1.5790535835003006e-09, | |
| "loss": 1.0555, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.9941467807294013, | |
| "grad_norm": 0.177822545170784, | |
| "learning_rate": 7.018221160981498e-10, | |
| "loss": 1.0346, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.18819527328014374, | |
| "learning_rate": 1.7545860759693446e-10, | |
| "loss": 0.9961, | |
| "step": 417 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 417, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2226454674800640.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |