{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.996111181945684, "eval_steps": 5000000.0, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9922223638913683e-05, "grad_norm": 0.22225640714168549, "learning_rate": 4e-06, "loss": 2.5507, "step": 10 }, { "epoch": 3.9844447277827365e-05, "grad_norm": 0.13248342275619507, "learning_rate": 8e-06, "loss": 2.5542, "step": 20 }, { "epoch": 5.9766670916741044e-05, "grad_norm": 0.13120247423648834, "learning_rate": 1.2e-05, "loss": 2.5435, "step": 30 }, { "epoch": 7.968889455565473e-05, "grad_norm": 0.12027782201766968, "learning_rate": 1.6e-05, "loss": 2.5486, "step": 40 }, { "epoch": 9.961111819456841e-05, "grad_norm": 0.11423682421445847, "learning_rate": 2e-05, "loss": 2.5488, "step": 50 }, { "epoch": 0.00011953334183348209, "grad_norm": 0.10780773311853409, "learning_rate": 2.4e-05, "loss": 2.5298, "step": 60 }, { "epoch": 0.00013945556547239577, "grad_norm": 0.10946134477853775, "learning_rate": 2.8e-05, "loss": 2.5403, "step": 70 }, { "epoch": 0.00015937778911130946, "grad_norm": 0.10962969064712524, "learning_rate": 3.2e-05, "loss": 2.5425, "step": 80 }, { "epoch": 0.00017930001275022313, "grad_norm": 0.10995651036500931, "learning_rate": 3.6e-05, "loss": 2.5407, "step": 90 }, { "epoch": 0.00019922223638913682, "grad_norm": 0.11075466126203537, "learning_rate": 4e-05, "loss": 2.5298, "step": 100 }, { "epoch": 0.00021914446002805048, "grad_norm": 0.11424089968204498, "learning_rate": 4.4e-05, "loss": 2.5405, "step": 110 }, { "epoch": 0.00023906668366696418, "grad_norm": 0.11623690277338028, "learning_rate": 4.8e-05, "loss": 2.5229, "step": 120 }, { "epoch": 0.00025898890730587787, "grad_norm": 0.11054340749979019, "learning_rate": 5.2e-05, "loss": 2.5286, "step": 130 }, { "epoch": 0.00027891113094479154, "grad_norm": 0.1180311068892479, "learning_rate": 5.6e-05, "loss": 2.5312, "step": 140 }, { "epoch": 0.0002988333545837052, "grad_norm": 0.1189795657992363, "learning_rate": 6e-05, "loss": 2.523, "step": 150 }, { "epoch": 0.0003187555782226189, "grad_norm": 0.11599735170602798, "learning_rate": 6.4e-05, "loss": 2.5317, "step": 160 }, { "epoch": 0.0003386778018615326, "grad_norm": 0.1201535239815712, "learning_rate": 6.800000000000001e-05, "loss": 2.5258, "step": 170 }, { "epoch": 0.00035860002550044625, "grad_norm": 0.12472977489233017, "learning_rate": 7.2e-05, "loss": 2.5343, "step": 180 }, { "epoch": 0.0003785222491393599, "grad_norm": 0.12174173444509506, "learning_rate": 7.6e-05, "loss": 2.5147, "step": 190 }, { "epoch": 0.00039844447277827364, "grad_norm": 0.14952948689460754, "learning_rate": 8e-05, "loss": 2.5294, "step": 200 }, { "epoch": 0.0004183666964171873, "grad_norm": 0.12045567482709885, "learning_rate": 8.400000000000001e-05, "loss": 2.5174, "step": 210 }, { "epoch": 0.00043828892005610097, "grad_norm": 0.1230551227927208, "learning_rate": 8.8e-05, "loss": 2.5108, "step": 220 }, { "epoch": 0.0004582111436950147, "grad_norm": 0.1234893724322319, "learning_rate": 9.2e-05, "loss": 2.5037, "step": 230 }, { "epoch": 0.00047813336733392836, "grad_norm": 0.12539304792881012, "learning_rate": 9.6e-05, "loss": 2.5161, "step": 240 }, { "epoch": 0.000498055590972842, "grad_norm": 0.1251053661108017, "learning_rate": 0.0001, "loss": 2.5164, "step": 250 }, { "epoch": 0.0005179778146117557, "grad_norm": 0.1283879578113556, "learning_rate": 0.000104, "loss": 2.5355, "step": 260 }, { "epoch": 0.0005379000382506694, "grad_norm": 0.12668563425540924, "learning_rate": 0.000108, "loss": 2.5127, "step": 270 }, { "epoch": 0.0005578222618895831, "grad_norm": 0.13011544942855835, "learning_rate": 0.000112, "loss": 2.5032, "step": 280 }, { "epoch": 0.0005777444855284968, "grad_norm": 0.13584111630916595, "learning_rate": 0.00011600000000000001, "loss": 2.5085, "step": 290 }, { "epoch": 0.0005976667091674104, "grad_norm": 0.1286136507987976, "learning_rate": 0.00012, "loss": 2.5184, "step": 300 }, { "epoch": 0.0006175889328063241, "grad_norm": 0.12791892886161804, "learning_rate": 0.000124, "loss": 2.5125, "step": 310 }, { "epoch": 0.0006375111564452378, "grad_norm": 0.13116571307182312, "learning_rate": 0.000128, "loss": 2.5055, "step": 320 }, { "epoch": 0.0006574333800841515, "grad_norm": 0.12447402626276016, "learning_rate": 0.000132, "loss": 2.5134, "step": 330 }, { "epoch": 0.0006773556037230652, "grad_norm": 0.13535194098949432, "learning_rate": 0.00013600000000000003, "loss": 2.5098, "step": 340 }, { "epoch": 0.0006972778273619788, "grad_norm": 0.16249141097068787, "learning_rate": 0.00014000000000000001, "loss": 2.5075, "step": 350 }, { "epoch": 0.0007172000510008925, "grad_norm": 0.13414284586906433, "learning_rate": 0.000144, "loss": 2.5101, "step": 360 }, { "epoch": 0.0007371222746398062, "grad_norm": 0.13875740766525269, "learning_rate": 0.000148, "loss": 2.5088, "step": 370 }, { "epoch": 0.0007570444982787198, "grad_norm": 0.13065026700496674, "learning_rate": 0.000152, "loss": 2.5095, "step": 380 }, { "epoch": 0.0007769667219176336, "grad_norm": 0.13997220993041992, "learning_rate": 0.000156, "loss": 2.5274, "step": 390 }, { "epoch": 0.0007968889455565473, "grad_norm": 0.14138536155223846, "learning_rate": 0.00016, "loss": 2.5162, "step": 400 }, { "epoch": 0.0008168111691954609, "grad_norm": 0.13484136760234833, "learning_rate": 0.000164, "loss": 2.5031, "step": 410 }, { "epoch": 0.0008367333928343746, "grad_norm": 0.14089025557041168, "learning_rate": 0.00016800000000000002, "loss": 2.492, "step": 420 }, { "epoch": 0.0008566556164732883, "grad_norm": 0.151044562458992, "learning_rate": 0.00017199999999999998, "loss": 2.5117, "step": 430 }, { "epoch": 0.0008765778401122019, "grad_norm": 0.14644674956798553, "learning_rate": 0.000176, "loss": 2.5137, "step": 440 }, { "epoch": 0.0008965000637511157, "grad_norm": 0.13731703162193298, "learning_rate": 0.00017999999999999998, "loss": 2.5244, "step": 450 }, { "epoch": 0.0009164222873900294, "grad_norm": 0.13919800519943237, "learning_rate": 0.000184, "loss": 2.5096, "step": 460 }, { "epoch": 0.000936344511028943, "grad_norm": 0.143782839179039, "learning_rate": 0.00018800000000000002, "loss": 2.5187, "step": 470 }, { "epoch": 0.0009562667346678567, "grad_norm": 0.1444229930639267, "learning_rate": 0.000192, "loss": 2.5086, "step": 480 }, { "epoch": 0.0009761889583067703, "grad_norm": 0.15487931668758392, "learning_rate": 0.00019600000000000002, "loss": 2.5127, "step": 490 }, { "epoch": 0.000996111181945684, "grad_norm": 0.14538133144378662, "learning_rate": 0.0002, "loss": 2.4943, "step": 500 }, { "epoch": 0.0010160334055845978, "grad_norm": 0.1431230902671814, "learning_rate": 0.000204, "loss": 2.5078, "step": 510 }, { "epoch": 0.0010359556292235115, "grad_norm": 0.14787259697914124, "learning_rate": 0.000208, "loss": 2.512, "step": 520 }, { "epoch": 0.001055877852862425, "grad_norm": 0.14886300265789032, "learning_rate": 0.000212, "loss": 2.5078, "step": 530 }, { "epoch": 0.0010758000765013387, "grad_norm": 0.1512366533279419, "learning_rate": 0.000216, "loss": 2.5131, "step": 540 }, { "epoch": 0.0010957223001402524, "grad_norm": 0.15718625485897064, "learning_rate": 0.00022, "loss": 2.5034, "step": 550 }, { "epoch": 0.0011156445237791661, "grad_norm": 0.1429869681596756, "learning_rate": 0.000224, "loss": 2.5188, "step": 560 }, { "epoch": 0.0011355667474180799, "grad_norm": 0.14706192910671234, "learning_rate": 0.000228, "loss": 2.5042, "step": 570 }, { "epoch": 0.0011554889710569936, "grad_norm": 0.1526515632867813, "learning_rate": 0.00023200000000000003, "loss": 2.5249, "step": 580 }, { "epoch": 0.001175411194695907, "grad_norm": 0.15660381317138672, "learning_rate": 0.000236, "loss": 2.504, "step": 590 }, { "epoch": 0.0011953334183348208, "grad_norm": 0.15409359335899353, "learning_rate": 0.00024, "loss": 2.4969, "step": 600 }, { "epoch": 0.0012152556419737345, "grad_norm": 0.15638570487499237, "learning_rate": 0.000244, "loss": 2.5022, "step": 610 }, { "epoch": 0.0012351778656126482, "grad_norm": 0.15512992441654205, "learning_rate": 0.000248, "loss": 2.5092, "step": 620 }, { "epoch": 0.001255100089251562, "grad_norm": 0.16342400014400482, "learning_rate": 0.000252, "loss": 2.5079, "step": 630 }, { "epoch": 0.0012750223128904757, "grad_norm": 0.15651795268058777, "learning_rate": 0.000256, "loss": 2.5106, "step": 640 }, { "epoch": 0.0012949445365293892, "grad_norm": 0.16322879493236542, "learning_rate": 0.00026000000000000003, "loss": 2.5023, "step": 650 }, { "epoch": 0.001314866760168303, "grad_norm": 0.16122852265834808, "learning_rate": 0.000264, "loss": 2.4956, "step": 660 }, { "epoch": 0.0013347889838072166, "grad_norm": 0.15795326232910156, "learning_rate": 0.000268, "loss": 2.5008, "step": 670 }, { "epoch": 0.0013547112074461303, "grad_norm": 0.16872341930866241, "learning_rate": 0.00027200000000000005, "loss": 2.5076, "step": 680 }, { "epoch": 0.001374633431085044, "grad_norm": 0.16317681968212128, "learning_rate": 0.00027600000000000004, "loss": 2.5108, "step": 690 }, { "epoch": 0.0013945556547239576, "grad_norm": 0.15933899581432343, "learning_rate": 0.00028000000000000003, "loss": 2.525, "step": 700 }, { "epoch": 0.0014144778783628713, "grad_norm": 0.1800127476453781, "learning_rate": 0.00028399999999999996, "loss": 2.5099, "step": 710 }, { "epoch": 0.001434400102001785, "grad_norm": 0.16866375505924225, "learning_rate": 0.000288, "loss": 2.5099, "step": 720 }, { "epoch": 0.0014543223256406987, "grad_norm": 0.16662093997001648, "learning_rate": 0.000292, "loss": 2.5061, "step": 730 }, { "epoch": 0.0014742445492796125, "grad_norm": 0.17133064568042755, "learning_rate": 0.000296, "loss": 2.5259, "step": 740 }, { "epoch": 0.0014941667729185262, "grad_norm": 0.17582514882087708, "learning_rate": 0.0003, "loss": 2.5114, "step": 750 }, { "epoch": 0.0015140889965574397, "grad_norm": 0.1791105717420578, "learning_rate": 0.000304, "loss": 2.511, "step": 760 }, { "epoch": 0.0015340112201963534, "grad_norm": 0.161884605884552, "learning_rate": 0.000308, "loss": 2.5059, "step": 770 }, { "epoch": 0.0015539334438352671, "grad_norm": 0.1598110795021057, "learning_rate": 0.000312, "loss": 2.5143, "step": 780 }, { "epoch": 0.0015738556674741808, "grad_norm": 0.16367986798286438, "learning_rate": 0.000316, "loss": 2.5016, "step": 790 }, { "epoch": 0.0015937778911130946, "grad_norm": 0.17654728889465332, "learning_rate": 0.00032, "loss": 2.5047, "step": 800 }, { "epoch": 0.001613700114752008, "grad_norm": 0.16961124539375305, "learning_rate": 0.000324, "loss": 2.4945, "step": 810 }, { "epoch": 0.0016336223383909218, "grad_norm": 0.17417950928211212, "learning_rate": 0.000328, "loss": 2.5133, "step": 820 }, { "epoch": 0.0016535445620298355, "grad_norm": 0.1763867735862732, "learning_rate": 0.00033200000000000005, "loss": 2.511, "step": 830 }, { "epoch": 0.0016734667856687492, "grad_norm": 0.17779897153377533, "learning_rate": 0.00033600000000000004, "loss": 2.5184, "step": 840 }, { "epoch": 0.001693389009307663, "grad_norm": 0.17711205780506134, "learning_rate": 0.00034, "loss": 2.5141, "step": 850 }, { "epoch": 0.0017133112329465767, "grad_norm": 0.16711321473121643, "learning_rate": 0.00034399999999999996, "loss": 2.4976, "step": 860 }, { "epoch": 0.0017332334565854902, "grad_norm": 0.17754951119422913, "learning_rate": 0.000348, "loss": 2.5099, "step": 870 }, { "epoch": 0.0017531556802244039, "grad_norm": 0.16957689821720123, "learning_rate": 0.000352, "loss": 2.5143, "step": 880 }, { "epoch": 0.0017730779038633176, "grad_norm": 0.1758495420217514, "learning_rate": 0.000356, "loss": 2.5158, "step": 890 }, { "epoch": 0.0017930001275022313, "grad_norm": 0.1774735301733017, "learning_rate": 0.00035999999999999997, "loss": 2.5, "step": 900 }, { "epoch": 0.001812922351141145, "grad_norm": 0.16998068988323212, "learning_rate": 0.000364, "loss": 2.514, "step": 910 }, { "epoch": 0.0018328445747800588, "grad_norm": 0.17667070031166077, "learning_rate": 0.000368, "loss": 2.4984, "step": 920 }, { "epoch": 0.0018527667984189723, "grad_norm": 0.18873026967048645, "learning_rate": 0.000372, "loss": 2.4946, "step": 930 }, { "epoch": 0.001872689022057886, "grad_norm": 0.17981784045696259, "learning_rate": 0.00037600000000000003, "loss": 2.4981, "step": 940 }, { "epoch": 0.0018926112456967997, "grad_norm": 0.1838296800851822, "learning_rate": 0.00038, "loss": 2.5047, "step": 950 }, { "epoch": 0.0019125334693357134, "grad_norm": 0.18285934627056122, "learning_rate": 0.000384, "loss": 2.5082, "step": 960 }, { "epoch": 0.0019324556929746271, "grad_norm": 0.3023233115673065, "learning_rate": 0.000388, "loss": 2.5176, "step": 970 }, { "epoch": 0.0019523779166135406, "grad_norm": 0.17722448706626892, "learning_rate": 0.00039200000000000004, "loss": 2.5161, "step": 980 }, { "epoch": 0.0019723001402524544, "grad_norm": 0.1990327090024948, "learning_rate": 0.00039600000000000003, "loss": 2.5266, "step": 990 }, { "epoch": 0.001992222363891368, "grad_norm": 0.19595959782600403, "learning_rate": 0.0004, "loss": 2.5245, "step": 1000 }, { "epoch": 0.002012144587530282, "grad_norm": 0.17783492803573608, "learning_rate": 0.000404, "loss": 2.5162, "step": 1010 }, { "epoch": 0.0020320668111691955, "grad_norm": 0.1853020340204239, "learning_rate": 0.000408, "loss": 2.5119, "step": 1020 }, { "epoch": 0.0020519890348081092, "grad_norm": 0.19267410039901733, "learning_rate": 0.000412, "loss": 2.5248, "step": 1030 }, { "epoch": 0.002071911258447023, "grad_norm": 0.18308015167713165, "learning_rate": 0.000416, "loss": 2.5205, "step": 1040 }, { "epoch": 0.0020918334820859367, "grad_norm": 0.19002577662467957, "learning_rate": 0.00042, "loss": 2.5161, "step": 1050 }, { "epoch": 0.00211175570572485, "grad_norm": 0.17977763712406158, "learning_rate": 0.000424, "loss": 2.5095, "step": 1060 }, { "epoch": 0.0021316779293637637, "grad_norm": 0.18890805542469025, "learning_rate": 0.000428, "loss": 2.5193, "step": 1070 }, { "epoch": 0.0021516001530026774, "grad_norm": 0.179189994931221, "learning_rate": 0.000432, "loss": 2.5159, "step": 1080 }, { "epoch": 0.002171522376641591, "grad_norm": 0.20613765716552734, "learning_rate": 0.000436, "loss": 2.5226, "step": 1090 }, { "epoch": 0.002191444600280505, "grad_norm": 0.196745827794075, "learning_rate": 0.00044, "loss": 2.5271, "step": 1100 }, { "epoch": 0.0022113668239194186, "grad_norm": 0.19697219133377075, "learning_rate": 0.000444, "loss": 2.5141, "step": 1110 }, { "epoch": 0.0022312890475583323, "grad_norm": 0.18688498437404633, "learning_rate": 0.000448, "loss": 2.5104, "step": 1120 }, { "epoch": 0.002251211271197246, "grad_norm": 0.18862606585025787, "learning_rate": 0.00045200000000000004, "loss": 2.5298, "step": 1130 }, { "epoch": 0.0022711334948361597, "grad_norm": 0.18029338121414185, "learning_rate": 0.000456, "loss": 2.5268, "step": 1140 }, { "epoch": 0.0022910557184750734, "grad_norm": 0.18459296226501465, "learning_rate": 0.00046, "loss": 2.5137, "step": 1150 }, { "epoch": 0.002310977942113987, "grad_norm": 0.20109063386917114, "learning_rate": 0.00046400000000000006, "loss": 2.53, "step": 1160 }, { "epoch": 0.002330900165752901, "grad_norm": 0.19286932051181793, "learning_rate": 0.00046800000000000005, "loss": 2.5177, "step": 1170 }, { "epoch": 0.002350822389391814, "grad_norm": 0.19991391897201538, "learning_rate": 0.000472, "loss": 2.5054, "step": 1180 }, { "epoch": 0.002370744613030728, "grad_norm": 0.18395060300827026, "learning_rate": 0.00047599999999999997, "loss": 2.5297, "step": 1190 }, { "epoch": 0.0023906668366696416, "grad_norm": 0.19021935760974884, "learning_rate": 0.00048, "loss": 2.5334, "step": 1200 }, { "epoch": 0.0024105890603085553, "grad_norm": 0.21491365134716034, "learning_rate": 0.000484, "loss": 2.5217, "step": 1210 }, { "epoch": 0.002430511283947469, "grad_norm": 0.1992131471633911, "learning_rate": 0.000488, "loss": 2.511, "step": 1220 }, { "epoch": 0.0024504335075863828, "grad_norm": 0.1866706907749176, "learning_rate": 0.000492, "loss": 2.5275, "step": 1230 }, { "epoch": 0.0024703557312252965, "grad_norm": 0.2097688913345337, "learning_rate": 0.000496, "loss": 2.5216, "step": 1240 }, { "epoch": 0.00249027795486421, "grad_norm": 0.2049318104982376, "learning_rate": 0.0005, "loss": 2.5407, "step": 1250 }, { "epoch": 0.002510200178503124, "grad_norm": 0.18944574892520905, "learning_rate": 0.000504, "loss": 2.53, "step": 1260 }, { "epoch": 0.0025301224021420377, "grad_norm": 0.19439218938350677, "learning_rate": 0.000508, "loss": 2.5281, "step": 1270 }, { "epoch": 0.0025500446257809514, "grad_norm": 0.2092132419347763, "learning_rate": 0.000512, "loss": 2.5256, "step": 1280 }, { "epoch": 0.0025699668494198647, "grad_norm": 0.20420727133750916, "learning_rate": 0.0005160000000000001, "loss": 2.5353, "step": 1290 }, { "epoch": 0.0025898890730587784, "grad_norm": 0.20064711570739746, "learning_rate": 0.0005200000000000001, "loss": 2.5191, "step": 1300 }, { "epoch": 0.002609811296697692, "grad_norm": 0.18636390566825867, "learning_rate": 0.000524, "loss": 2.5268, "step": 1310 }, { "epoch": 0.002629733520336606, "grad_norm": 0.22001898288726807, "learning_rate": 0.000528, "loss": 2.5083, "step": 1320 }, { "epoch": 0.0026496557439755195, "grad_norm": 0.20122948288917542, "learning_rate": 0.000532, "loss": 2.5241, "step": 1330 }, { "epoch": 0.0026695779676144333, "grad_norm": 0.1916215866804123, "learning_rate": 0.000536, "loss": 2.5241, "step": 1340 }, { "epoch": 0.002689500191253347, "grad_norm": 0.20830222964286804, "learning_rate": 0.00054, "loss": 2.5258, "step": 1350 }, { "epoch": 0.0027094224148922607, "grad_norm": 0.21430468559265137, "learning_rate": 0.0005440000000000001, "loss": 2.525, "step": 1360 }, { "epoch": 0.0027293446385311744, "grad_norm": 0.20717178285121918, "learning_rate": 0.0005480000000000001, "loss": 2.5122, "step": 1370 }, { "epoch": 0.002749266862170088, "grad_norm": 0.22540459036827087, "learning_rate": 0.0005520000000000001, "loss": 2.5252, "step": 1380 }, { "epoch": 0.002769189085809002, "grad_norm": 0.24714042246341705, "learning_rate": 0.0005560000000000001, "loss": 2.5128, "step": 1390 }, { "epoch": 0.002789111309447915, "grad_norm": 0.1944097876548767, "learning_rate": 0.0005600000000000001, "loss": 2.5283, "step": 1400 }, { "epoch": 0.002809033533086829, "grad_norm": 0.21367643773555756, "learning_rate": 0.0005639999999999999, "loss": 2.5304, "step": 1410 }, { "epoch": 0.0028289557567257426, "grad_norm": 0.24416056275367737, "learning_rate": 0.0005679999999999999, "loss": 2.5271, "step": 1420 }, { "epoch": 0.0028488779803646563, "grad_norm": 0.20965443551540375, "learning_rate": 0.0005719999999999999, "loss": 2.5364, "step": 1430 }, { "epoch": 0.00286880020400357, "grad_norm": 0.21871362626552582, "learning_rate": 0.000576, "loss": 2.5491, "step": 1440 }, { "epoch": 0.0028887224276424837, "grad_norm": 0.20491304993629456, "learning_rate": 0.00058, "loss": 2.5321, "step": 1450 }, { "epoch": 0.0029086446512813975, "grad_norm": 0.22155886888504028, "learning_rate": 0.000584, "loss": 2.5312, "step": 1460 }, { "epoch": 0.002928566874920311, "grad_norm": 0.2045210599899292, "learning_rate": 0.000588, "loss": 2.5323, "step": 1470 }, { "epoch": 0.002948489098559225, "grad_norm": 0.2011423110961914, "learning_rate": 0.000592, "loss": 2.5408, "step": 1480 }, { "epoch": 0.0029684113221981386, "grad_norm": 0.2028450071811676, "learning_rate": 0.000596, "loss": 2.5268, "step": 1490 }, { "epoch": 0.0029883335458370523, "grad_norm": 0.21257512271404266, "learning_rate": 0.0006, "loss": 2.5388, "step": 1500 }, { "epoch": 0.0030082557694759656, "grad_norm": 0.21462330222129822, "learning_rate": 0.000604, "loss": 2.5357, "step": 1510 }, { "epoch": 0.0030281779931148793, "grad_norm": 0.23705007135868073, "learning_rate": 0.000608, "loss": 2.5462, "step": 1520 }, { "epoch": 0.003048100216753793, "grad_norm": 0.19787919521331787, "learning_rate": 0.000612, "loss": 2.5172, "step": 1530 }, { "epoch": 0.003068022440392707, "grad_norm": 0.1920015811920166, "learning_rate": 0.000616, "loss": 2.5346, "step": 1540 }, { "epoch": 0.0030879446640316205, "grad_norm": 0.20741654932498932, "learning_rate": 0.00062, "loss": 2.5439, "step": 1550 }, { "epoch": 0.0031078668876705342, "grad_norm": 0.21476690471172333, "learning_rate": 0.000624, "loss": 2.5426, "step": 1560 }, { "epoch": 0.003127789111309448, "grad_norm": 0.2005511373281479, "learning_rate": 0.000628, "loss": 2.5352, "step": 1570 }, { "epoch": 0.0031477113349483617, "grad_norm": 0.2319255769252777, "learning_rate": 0.000632, "loss": 2.5427, "step": 1580 }, { "epoch": 0.0031676335585872754, "grad_norm": 0.20050933957099915, "learning_rate": 0.0006360000000000001, "loss": 2.542, "step": 1590 }, { "epoch": 0.003187555782226189, "grad_norm": 0.20548281073570251, "learning_rate": 0.00064, "loss": 2.5315, "step": 1600 }, { "epoch": 0.003207478005865103, "grad_norm": 0.21950297057628632, "learning_rate": 0.000644, "loss": 2.5462, "step": 1610 }, { "epoch": 0.003227400229504016, "grad_norm": 0.23117676377296448, "learning_rate": 0.000648, "loss": 2.5313, "step": 1620 }, { "epoch": 0.00324732245314293, "grad_norm": 0.20900553464889526, "learning_rate": 0.000652, "loss": 2.5651, "step": 1630 }, { "epoch": 0.0032672446767818436, "grad_norm": 0.2222466617822647, "learning_rate": 0.000656, "loss": 2.5399, "step": 1640 }, { "epoch": 0.0032871669004207573, "grad_norm": 0.19482707977294922, "learning_rate": 0.00066, "loss": 2.5241, "step": 1650 }, { "epoch": 0.003307089124059671, "grad_norm": 0.22494061291217804, "learning_rate": 0.0006640000000000001, "loss": 2.5469, "step": 1660 }, { "epoch": 0.0033270113476985847, "grad_norm": 0.21638897061347961, "learning_rate": 0.0006680000000000001, "loss": 2.5493, "step": 1670 }, { "epoch": 0.0033469335713374984, "grad_norm": 0.22949138283729553, "learning_rate": 0.0006720000000000001, "loss": 2.53, "step": 1680 }, { "epoch": 0.003366855794976412, "grad_norm": 0.22913858294487, "learning_rate": 0.0006760000000000001, "loss": 2.5351, "step": 1690 }, { "epoch": 0.003386778018615326, "grad_norm": 0.22649629414081573, "learning_rate": 0.00068, "loss": 2.5506, "step": 1700 }, { "epoch": 0.0034067002422542396, "grad_norm": 0.21526403725147247, "learning_rate": 0.000684, "loss": 2.5383, "step": 1710 }, { "epoch": 0.0034266224658931533, "grad_norm": 0.24186953902244568, "learning_rate": 0.0006879999999999999, "loss": 2.5266, "step": 1720 }, { "epoch": 0.0034465446895320666, "grad_norm": 0.20638421177864075, "learning_rate": 0.000692, "loss": 2.5599, "step": 1730 }, { "epoch": 0.0034664669131709803, "grad_norm": 0.21967943012714386, "learning_rate": 0.000696, "loss": 2.536, "step": 1740 }, { "epoch": 0.003486389136809894, "grad_norm": 0.2160131335258484, "learning_rate": 0.0007, "loss": 2.5549, "step": 1750 }, { "epoch": 0.0035063113604488078, "grad_norm": 0.20818905532360077, "learning_rate": 0.000704, "loss": 2.542, "step": 1760 }, { "epoch": 0.0035262335840877215, "grad_norm": 0.2235855609178543, "learning_rate": 0.000708, "loss": 2.5307, "step": 1770 }, { "epoch": 0.003546155807726635, "grad_norm": 0.26046672463417053, "learning_rate": 0.000712, "loss": 2.5297, "step": 1780 }, { "epoch": 0.003566078031365549, "grad_norm": 0.2510378360748291, "learning_rate": 0.000716, "loss": 2.5383, "step": 1790 }, { "epoch": 0.0035860002550044626, "grad_norm": 0.2252194881439209, "learning_rate": 0.0007199999999999999, "loss": 2.5455, "step": 1800 }, { "epoch": 0.0036059224786433764, "grad_norm": 0.2500559389591217, "learning_rate": 0.000724, "loss": 2.5427, "step": 1810 }, { "epoch": 0.00362584470228229, "grad_norm": 0.20764319598674774, "learning_rate": 0.000728, "loss": 2.5605, "step": 1820 }, { "epoch": 0.003645766925921204, "grad_norm": 0.2280072569847107, "learning_rate": 0.000732, "loss": 2.5571, "step": 1830 }, { "epoch": 0.0036656891495601175, "grad_norm": 0.23052218556404114, "learning_rate": 0.000736, "loss": 2.5428, "step": 1840 }, { "epoch": 0.003685611373199031, "grad_norm": 0.25743797421455383, "learning_rate": 0.00074, "loss": 2.552, "step": 1850 }, { "epoch": 0.0037055335968379445, "grad_norm": 0.25077056884765625, "learning_rate": 0.000744, "loss": 2.5478, "step": 1860 }, { "epoch": 0.0037254558204768582, "grad_norm": 0.2216740846633911, "learning_rate": 0.000748, "loss": 2.5468, "step": 1870 }, { "epoch": 0.003745378044115772, "grad_norm": 0.2421252429485321, "learning_rate": 0.0007520000000000001, "loss": 2.5517, "step": 1880 }, { "epoch": 0.0037653002677546857, "grad_norm": 0.21068233251571655, "learning_rate": 0.000756, "loss": 2.5548, "step": 1890 }, { "epoch": 0.0037852224913935994, "grad_norm": 0.25830066204071045, "learning_rate": 0.00076, "loss": 2.536, "step": 1900 }, { "epoch": 0.003805144715032513, "grad_norm": 0.29486846923828125, "learning_rate": 0.000764, "loss": 2.5426, "step": 1910 }, { "epoch": 0.003825066938671427, "grad_norm": 0.23505210876464844, "learning_rate": 0.000768, "loss": 2.5486, "step": 1920 }, { "epoch": 0.0038449891623103406, "grad_norm": 0.26091268658638, "learning_rate": 0.000772, "loss": 2.5581, "step": 1930 }, { "epoch": 0.0038649113859492543, "grad_norm": 0.21453934907913208, "learning_rate": 0.000776, "loss": 2.5564, "step": 1940 }, { "epoch": 0.003884833609588168, "grad_norm": 0.2485467940568924, "learning_rate": 0.0007800000000000001, "loss": 2.5367, "step": 1950 }, { "epoch": 0.0039047558332270813, "grad_norm": 0.26867416501045227, "learning_rate": 0.0007840000000000001, "loss": 2.5481, "step": 1960 }, { "epoch": 0.0039246780568659954, "grad_norm": 0.2548444867134094, "learning_rate": 0.0007880000000000001, "loss": 2.5687, "step": 1970 }, { "epoch": 0.003944600280504909, "grad_norm": 0.23629386723041534, "learning_rate": 0.0007920000000000001, "loss": 2.5633, "step": 1980 }, { "epoch": 0.003964522504143823, "grad_norm": 0.2663552761077881, "learning_rate": 0.000796, "loss": 2.5413, "step": 1990 }, { "epoch": 0.003984444727782736, "grad_norm": 0.2327505350112915, "learning_rate": 0.0008, "loss": 2.5498, "step": 2000 }, { "epoch": 0.0040043669514216495, "grad_norm": 0.2415759414434433, "learning_rate": 0.000804, "loss": 2.5603, "step": 2010 }, { "epoch": 0.004024289175060564, "grad_norm": 0.2319788634777069, "learning_rate": 0.000808, "loss": 2.5505, "step": 2020 }, { "epoch": 0.004044211398699477, "grad_norm": 0.2219291776418686, "learning_rate": 0.0008120000000000001, "loss": 2.5598, "step": 2030 }, { "epoch": 0.004064133622338391, "grad_norm": 0.2662579417228699, "learning_rate": 0.000816, "loss": 2.5572, "step": 2040 }, { "epoch": 0.004084055845977304, "grad_norm": 0.21992264688014984, "learning_rate": 0.00082, "loss": 2.5513, "step": 2050 }, { "epoch": 0.0041039780696162185, "grad_norm": 0.2151525765657425, "learning_rate": 0.000824, "loss": 2.5499, "step": 2060 }, { "epoch": 0.004123900293255132, "grad_norm": 0.24063314497470856, "learning_rate": 0.000828, "loss": 2.5749, "step": 2070 }, { "epoch": 0.004143822516894046, "grad_norm": 0.2284354567527771, "learning_rate": 0.000832, "loss": 2.5667, "step": 2080 }, { "epoch": 0.004163744740532959, "grad_norm": 0.22535906732082367, "learning_rate": 0.0008359999999999999, "loss": 2.5623, "step": 2090 }, { "epoch": 0.004183666964171873, "grad_norm": 0.2491723746061325, "learning_rate": 0.00084, "loss": 2.553, "step": 2100 }, { "epoch": 0.004203589187810787, "grad_norm": 0.2715577483177185, "learning_rate": 0.000844, "loss": 2.5434, "step": 2110 }, { "epoch": 0.0042235114114497, "grad_norm": 0.2632870674133301, "learning_rate": 0.000848, "loss": 2.5559, "step": 2120 }, { "epoch": 0.004243433635088614, "grad_norm": 0.2472478300333023, "learning_rate": 0.000852, "loss": 2.5612, "step": 2130 }, { "epoch": 0.004263355858727527, "grad_norm": 0.2154143899679184, "learning_rate": 0.000856, "loss": 2.5398, "step": 2140 }, { "epoch": 0.0042832780823664415, "grad_norm": 0.24163460731506348, "learning_rate": 0.00086, "loss": 2.5402, "step": 2150 }, { "epoch": 0.004303200306005355, "grad_norm": 0.24049802124500275, "learning_rate": 0.000864, "loss": 2.5528, "step": 2160 }, { "epoch": 0.004323122529644269, "grad_norm": 0.308476984500885, "learning_rate": 0.0008680000000000001, "loss": 2.5636, "step": 2170 }, { "epoch": 0.004343044753283182, "grad_norm": 0.2502630054950714, "learning_rate": 0.000872, "loss": 2.56, "step": 2180 }, { "epoch": 0.004362966976922096, "grad_norm": 0.23667123913764954, "learning_rate": 0.000876, "loss": 2.5573, "step": 2190 }, { "epoch": 0.00438288920056101, "grad_norm": 0.27608323097229004, "learning_rate": 0.00088, "loss": 2.5533, "step": 2200 }, { "epoch": 0.004402811424199924, "grad_norm": 0.26815056800842285, "learning_rate": 0.000884, "loss": 2.5745, "step": 2210 }, { "epoch": 0.004422733647838837, "grad_norm": 0.24995353817939758, "learning_rate": 0.000888, "loss": 2.5607, "step": 2220 }, { "epoch": 0.004442655871477751, "grad_norm": 0.2644597589969635, "learning_rate": 0.000892, "loss": 2.5683, "step": 2230 }, { "epoch": 0.004462578095116665, "grad_norm": 0.2791763246059418, "learning_rate": 0.000896, "loss": 2.5792, "step": 2240 }, { "epoch": 0.004482500318755578, "grad_norm": 0.26268434524536133, "learning_rate": 0.0009000000000000001, "loss": 2.574, "step": 2250 }, { "epoch": 0.004502422542394492, "grad_norm": 0.2735709547996521, "learning_rate": 0.0009040000000000001, "loss": 2.5622, "step": 2260 }, { "epoch": 0.004522344766033405, "grad_norm": 0.24114887416362762, "learning_rate": 0.0009080000000000001, "loss": 2.5442, "step": 2270 }, { "epoch": 0.0045422669896723195, "grad_norm": 0.3228271007537842, "learning_rate": 0.000912, "loss": 2.5565, "step": 2280 }, { "epoch": 0.004562189213311233, "grad_norm": 0.2338641881942749, "learning_rate": 0.000916, "loss": 2.5677, "step": 2290 }, { "epoch": 0.004582111436950147, "grad_norm": 0.23789522051811218, "learning_rate": 0.00092, "loss": 2.5838, "step": 2300 }, { "epoch": 0.00460203366058906, "grad_norm": 0.20970459282398224, "learning_rate": 0.000924, "loss": 2.5738, "step": 2310 }, { "epoch": 0.004621955884227974, "grad_norm": 0.24046500027179718, "learning_rate": 0.0009280000000000001, "loss": 2.5637, "step": 2320 }, { "epoch": 0.004641878107866888, "grad_norm": 0.27391934394836426, "learning_rate": 0.0009320000000000001, "loss": 2.5536, "step": 2330 }, { "epoch": 0.004661800331505802, "grad_norm": 0.26901596784591675, "learning_rate": 0.0009360000000000001, "loss": 2.5756, "step": 2340 }, { "epoch": 0.004681722555144715, "grad_norm": 0.28664132952690125, "learning_rate": 0.00094, "loss": 2.5586, "step": 2350 }, { "epoch": 0.004701644778783628, "grad_norm": 0.2764895260334015, "learning_rate": 0.000944, "loss": 2.5641, "step": 2360 }, { "epoch": 0.0047215670024225425, "grad_norm": 0.33589300513267517, "learning_rate": 0.000948, "loss": 2.5669, "step": 2370 }, { "epoch": 0.004741489226061456, "grad_norm": 0.24068614840507507, "learning_rate": 0.0009519999999999999, "loss": 2.5634, "step": 2380 }, { "epoch": 0.00476141144970037, "grad_norm": 0.260348379611969, "learning_rate": 0.0009559999999999999, "loss": 2.5658, "step": 2390 }, { "epoch": 0.004781333673339283, "grad_norm": 0.2319243848323822, "learning_rate": 0.00096, "loss": 2.5773, "step": 2400 }, { "epoch": 0.004801255896978197, "grad_norm": 0.2722843289375305, "learning_rate": 0.000964, "loss": 2.577, "step": 2410 }, { "epoch": 0.004821178120617111, "grad_norm": 0.24729569256305695, "learning_rate": 0.000968, "loss": 2.5614, "step": 2420 }, { "epoch": 0.004841100344256025, "grad_norm": 0.23456113040447235, "learning_rate": 0.000972, "loss": 2.5783, "step": 2430 }, { "epoch": 0.004861022567894938, "grad_norm": 0.2702525556087494, "learning_rate": 0.000976, "loss": 2.5755, "step": 2440 }, { "epoch": 0.004880944791533852, "grad_norm": 0.22950294613838196, "learning_rate": 0.00098, "loss": 2.5594, "step": 2450 }, { "epoch": 0.0049008670151727655, "grad_norm": 0.3355078101158142, "learning_rate": 0.000984, "loss": 2.5747, "step": 2460 }, { "epoch": 0.004920789238811679, "grad_norm": 0.24861250817775726, "learning_rate": 0.000988, "loss": 2.5704, "step": 2470 }, { "epoch": 0.004940711462450593, "grad_norm": 0.29910025000572205, "learning_rate": 0.000992, "loss": 2.5651, "step": 2480 }, { "epoch": 0.004960633686089506, "grad_norm": 0.30367907881736755, "learning_rate": 0.000996, "loss": 2.5749, "step": 2490 }, { "epoch": 0.00498055590972842, "grad_norm": 0.2733355164527893, "learning_rate": 0.001, "loss": 2.5694, "step": 2500 }, { "epoch": 0.005000478133367334, "grad_norm": 0.28956910967826843, "learning_rate": 0.0010040000000000001, "loss": 2.5735, "step": 2510 }, { "epoch": 0.005020400357006248, "grad_norm": 0.2837396562099457, "learning_rate": 0.001008, "loss": 2.5657, "step": 2520 }, { "epoch": 0.005040322580645161, "grad_norm": 0.3412976562976837, "learning_rate": 0.001012, "loss": 2.5704, "step": 2530 }, { "epoch": 0.005060244804284075, "grad_norm": 0.24843519926071167, "learning_rate": 0.001016, "loss": 2.5901, "step": 2540 }, { "epoch": 0.005080167027922989, "grad_norm": 0.2358395755290985, "learning_rate": 0.00102, "loss": 2.5622, "step": 2550 }, { "epoch": 0.005100089251561903, "grad_norm": 0.23315957188606262, "learning_rate": 0.001024, "loss": 2.5837, "step": 2560 }, { "epoch": 0.005120011475200816, "grad_norm": 0.279890239238739, "learning_rate": 0.001028, "loss": 2.5863, "step": 2570 }, { "epoch": 0.005139933698839729, "grad_norm": 0.22231028974056244, "learning_rate": 0.0010320000000000001, "loss": 2.579, "step": 2580 }, { "epoch": 0.0051598559224786435, "grad_norm": 0.22945058345794678, "learning_rate": 0.001036, "loss": 2.5859, "step": 2590 }, { "epoch": 0.005179778146117557, "grad_norm": 0.24692727625370026, "learning_rate": 0.0010400000000000001, "loss": 2.5768, "step": 2600 }, { "epoch": 0.005199700369756471, "grad_norm": 0.25870949029922485, "learning_rate": 0.001044, "loss": 2.5847, "step": 2610 }, { "epoch": 0.005219622593395384, "grad_norm": 0.4632367491722107, "learning_rate": 0.001048, "loss": 2.5836, "step": 2620 }, { "epoch": 0.005239544817034298, "grad_norm": 0.2412486970424652, "learning_rate": 0.001052, "loss": 2.564, "step": 2630 }, { "epoch": 0.005259467040673212, "grad_norm": 0.6172210574150085, "learning_rate": 0.001056, "loss": 2.58, "step": 2640 }, { "epoch": 0.005279389264312126, "grad_norm": 0.2520333230495453, "learning_rate": 0.0010600000000000002, "loss": 2.5947, "step": 2650 }, { "epoch": 0.005299311487951039, "grad_norm": 0.23048943281173706, "learning_rate": 0.001064, "loss": 2.5853, "step": 2660 }, { "epoch": 0.005319233711589953, "grad_norm": 0.34435757994651794, "learning_rate": 0.0010680000000000002, "loss": 2.5849, "step": 2670 }, { "epoch": 0.0053391559352288665, "grad_norm": 0.2797832787036896, "learning_rate": 0.001072, "loss": 2.5793, "step": 2680 }, { "epoch": 0.00535907815886778, "grad_norm": 0.24780000746250153, "learning_rate": 0.0010760000000000001, "loss": 2.5835, "step": 2690 }, { "epoch": 0.005379000382506694, "grad_norm": 0.2505281865596771, "learning_rate": 0.00108, "loss": 2.5948, "step": 2700 }, { "epoch": 0.005398922606145607, "grad_norm": 0.3478362262248993, "learning_rate": 0.0010840000000000001, "loss": 2.5833, "step": 2710 }, { "epoch": 0.005418844829784521, "grad_norm": 0.2648862302303314, "learning_rate": 0.0010880000000000002, "loss": 2.596, "step": 2720 }, { "epoch": 0.005438767053423435, "grad_norm": 0.2331032156944275, "learning_rate": 0.001092, "loss": 2.5817, "step": 2730 }, { "epoch": 0.005458689277062349, "grad_norm": 0.30658334493637085, "learning_rate": 0.0010960000000000002, "loss": 2.5799, "step": 2740 }, { "epoch": 0.005478611500701262, "grad_norm": 0.22962512075901031, "learning_rate": 0.0011, "loss": 2.5769, "step": 2750 }, { "epoch": 0.005498533724340176, "grad_norm": 0.23078025877475739, "learning_rate": 0.0011040000000000002, "loss": 2.5785, "step": 2760 }, { "epoch": 0.0055184559479790896, "grad_norm": 0.2438967525959015, "learning_rate": 0.001108, "loss": 2.5755, "step": 2770 }, { "epoch": 0.005538378171618004, "grad_norm": 0.40837937593460083, "learning_rate": 0.0011120000000000001, "loss": 2.5805, "step": 2780 }, { "epoch": 0.005558300395256917, "grad_norm": 0.2555682063102722, "learning_rate": 0.001116, "loss": 2.5828, "step": 2790 }, { "epoch": 0.00557822261889583, "grad_norm": 0.23931996524333954, "learning_rate": 0.0011200000000000001, "loss": 2.5952, "step": 2800 }, { "epoch": 0.0055981448425347444, "grad_norm": 0.26489880681037903, "learning_rate": 0.0011240000000000002, "loss": 2.562, "step": 2810 }, { "epoch": 0.005618067066173658, "grad_norm": 0.2707553803920746, "learning_rate": 0.0011279999999999999, "loss": 2.5948, "step": 2820 }, { "epoch": 0.005637989289812572, "grad_norm": 0.28092247247695923, "learning_rate": 0.001132, "loss": 2.6003, "step": 2830 }, { "epoch": 0.005657911513451485, "grad_norm": 0.26613515615463257, "learning_rate": 0.0011359999999999999, "loss": 2.5961, "step": 2840 }, { "epoch": 0.005677833737090399, "grad_norm": 0.25995898246765137, "learning_rate": 0.00114, "loss": 2.5838, "step": 2850 }, { "epoch": 0.005697755960729313, "grad_norm": 0.271176815032959, "learning_rate": 0.0011439999999999998, "loss": 2.5873, "step": 2860 }, { "epoch": 0.005717678184368227, "grad_norm": 0.26292479038238525, "learning_rate": 0.001148, "loss": 2.5934, "step": 2870 }, { "epoch": 0.00573760040800714, "grad_norm": 0.2620735466480255, "learning_rate": 0.001152, "loss": 2.5874, "step": 2880 }, { "epoch": 0.005757522631646054, "grad_norm": 0.23686350882053375, "learning_rate": 0.001156, "loss": 2.5898, "step": 2890 }, { "epoch": 0.0057774448552849675, "grad_norm": 0.3200198709964752, "learning_rate": 0.00116, "loss": 2.5973, "step": 2900 }, { "epoch": 0.005797367078923881, "grad_norm": 0.23734453320503235, "learning_rate": 0.0011639999999999999, "loss": 2.5905, "step": 2910 }, { "epoch": 0.005817289302562795, "grad_norm": 0.2879522740840912, "learning_rate": 0.001168, "loss": 2.5963, "step": 2920 }, { "epoch": 0.005837211526201708, "grad_norm": 0.2449742555618286, "learning_rate": 0.0011719999999999999, "loss": 2.5859, "step": 2930 }, { "epoch": 0.005857133749840622, "grad_norm": 0.24374155700206757, "learning_rate": 0.001176, "loss": 2.6111, "step": 2940 }, { "epoch": 0.005877055973479536, "grad_norm": 0.28259655833244324, "learning_rate": 0.00118, "loss": 2.6069, "step": 2950 }, { "epoch": 0.00589697819711845, "grad_norm": 0.27314120531082153, "learning_rate": 0.001184, "loss": 2.5879, "step": 2960 }, { "epoch": 0.005916900420757363, "grad_norm": 0.30148279666900635, "learning_rate": 0.001188, "loss": 2.5902, "step": 2970 }, { "epoch": 0.005936822644396277, "grad_norm": 0.32117053866386414, "learning_rate": 0.001192, "loss": 2.596, "step": 2980 }, { "epoch": 0.0059567448680351905, "grad_norm": 0.25355303287506104, "learning_rate": 0.001196, "loss": 2.601, "step": 2990 }, { "epoch": 0.005976667091674105, "grad_norm": 0.2518067955970764, "learning_rate": 0.0012, "loss": 2.6085, "step": 3000 }, { "epoch": 0.005996589315313018, "grad_norm": 0.26244089007377625, "learning_rate": 0.001204, "loss": 2.6123, "step": 3010 }, { "epoch": 0.006016511538951931, "grad_norm": 0.26593145728111267, "learning_rate": 0.001208, "loss": 2.6089, "step": 3020 }, { "epoch": 0.006036433762590845, "grad_norm": 0.29951128363609314, "learning_rate": 0.001212, "loss": 2.588, "step": 3030 }, { "epoch": 0.006056355986229759, "grad_norm": 0.27152347564697266, "learning_rate": 0.001216, "loss": 2.6029, "step": 3040 }, { "epoch": 0.006076278209868673, "grad_norm": 0.24328401684761047, "learning_rate": 0.00122, "loss": 2.5943, "step": 3050 }, { "epoch": 0.006096200433507586, "grad_norm": 0.2655673325061798, "learning_rate": 0.001224, "loss": 2.5894, "step": 3060 }, { "epoch": 0.0061161226571465, "grad_norm": 0.3621736466884613, "learning_rate": 0.001228, "loss": 2.5887, "step": 3070 }, { "epoch": 0.006136044880785414, "grad_norm": 0.2576375901699066, "learning_rate": 0.001232, "loss": 2.5968, "step": 3080 }, { "epoch": 0.006155967104424328, "grad_norm": 0.29246342182159424, "learning_rate": 0.0012360000000000001, "loss": 2.6036, "step": 3090 }, { "epoch": 0.006175889328063241, "grad_norm": 0.27366045117378235, "learning_rate": 0.00124, "loss": 2.5922, "step": 3100 }, { "epoch": 0.006195811551702155, "grad_norm": 0.280772864818573, "learning_rate": 0.001244, "loss": 2.5967, "step": 3110 }, { "epoch": 0.0062157337753410685, "grad_norm": 0.2966826558113098, "learning_rate": 0.001248, "loss": 2.5925, "step": 3120 }, { "epoch": 0.006235655998979982, "grad_norm": 0.2556586265563965, "learning_rate": 0.001252, "loss": 2.6083, "step": 3130 }, { "epoch": 0.006255578222618896, "grad_norm": 0.23883666098117828, "learning_rate": 0.001256, "loss": 2.5963, "step": 3140 }, { "epoch": 0.006275500446257809, "grad_norm": 0.2539077699184418, "learning_rate": 0.00126, "loss": 2.5996, "step": 3150 }, { "epoch": 0.006295422669896723, "grad_norm": 0.24899086356163025, "learning_rate": 0.001264, "loss": 2.6178, "step": 3160 }, { "epoch": 0.006315344893535637, "grad_norm": 0.22951564192771912, "learning_rate": 0.001268, "loss": 2.606, "step": 3170 }, { "epoch": 0.006335267117174551, "grad_norm": 0.3541465401649475, "learning_rate": 0.0012720000000000001, "loss": 2.6075, "step": 3180 }, { "epoch": 0.006355189340813464, "grad_norm": 0.25518253445625305, "learning_rate": 0.001276, "loss": 2.6086, "step": 3190 }, { "epoch": 0.006375111564452378, "grad_norm": 0.24203000962734222, "learning_rate": 0.00128, "loss": 2.6041, "step": 3200 }, { "epoch": 0.0063950337880912915, "grad_norm": 0.2816888391971588, "learning_rate": 0.001284, "loss": 2.6049, "step": 3210 }, { "epoch": 0.006414956011730206, "grad_norm": 0.300187885761261, "learning_rate": 0.001288, "loss": 2.6195, "step": 3220 }, { "epoch": 0.006434878235369119, "grad_norm": 0.23966571688652039, "learning_rate": 0.001292, "loss": 2.6176, "step": 3230 }, { "epoch": 0.006454800459008032, "grad_norm": 0.33454546332359314, "learning_rate": 0.001296, "loss": 2.5999, "step": 3240 }, { "epoch": 0.006474722682646946, "grad_norm": 0.27680057287216187, "learning_rate": 0.0013000000000000002, "loss": 2.6095, "step": 3250 }, { "epoch": 0.00649464490628586, "grad_norm": 0.29735133051872253, "learning_rate": 0.001304, "loss": 2.6221, "step": 3260 }, { "epoch": 0.006514567129924774, "grad_norm": 0.28589770197868347, "learning_rate": 0.0013080000000000001, "loss": 2.6086, "step": 3270 }, { "epoch": 0.006534489353563687, "grad_norm": 0.3059546649456024, "learning_rate": 0.001312, "loss": 2.6261, "step": 3280 }, { "epoch": 0.006554411577202601, "grad_norm": 0.29460880160331726, "learning_rate": 0.0013160000000000001, "loss": 2.6005, "step": 3290 }, { "epoch": 0.0065743338008415145, "grad_norm": 0.27405038475990295, "learning_rate": 0.00132, "loss": 2.6116, "step": 3300 }, { "epoch": 0.006594256024480429, "grad_norm": 0.25319793820381165, "learning_rate": 0.001324, "loss": 2.6031, "step": 3310 }, { "epoch": 0.006614178248119342, "grad_norm": 0.35302215814590454, "learning_rate": 0.0013280000000000002, "loss": 2.6091, "step": 3320 }, { "epoch": 0.006634100471758256, "grad_norm": 0.2664816677570343, "learning_rate": 0.001332, "loss": 2.5946, "step": 3330 }, { "epoch": 0.006654022695397169, "grad_norm": 0.24505211412906647, "learning_rate": 0.0013360000000000002, "loss": 2.605, "step": 3340 }, { "epoch": 0.006673944919036083, "grad_norm": 0.26685065031051636, "learning_rate": 0.00134, "loss": 2.6119, "step": 3350 }, { "epoch": 0.006693867142674997, "grad_norm": 0.25476929545402527, "learning_rate": 0.0013440000000000001, "loss": 2.6095, "step": 3360 }, { "epoch": 0.00671378936631391, "grad_norm": 0.27251169085502625, "learning_rate": 0.001348, "loss": 2.6146, "step": 3370 }, { "epoch": 0.006733711589952824, "grad_norm": 0.24473616480827332, "learning_rate": 0.0013520000000000001, "loss": 2.602, "step": 3380 }, { "epoch": 0.006753633813591738, "grad_norm": 0.2815043032169342, "learning_rate": 0.0013560000000000002, "loss": 2.5977, "step": 3390 }, { "epoch": 0.006773556037230652, "grad_norm": 0.28628477454185486, "learning_rate": 0.00136, "loss": 2.6042, "step": 3400 }, { "epoch": 0.006793478260869565, "grad_norm": 0.2548704445362091, "learning_rate": 0.0013640000000000002, "loss": 2.6285, "step": 3410 }, { "epoch": 0.006813400484508479, "grad_norm": 0.3524932265281677, "learning_rate": 0.001368, "loss": 2.6259, "step": 3420 }, { "epoch": 0.0068333227081473925, "grad_norm": 0.2637835144996643, "learning_rate": 0.0013720000000000002, "loss": 2.6264, "step": 3430 }, { "epoch": 0.006853244931786307, "grad_norm": 0.3576293885707855, "learning_rate": 0.0013759999999999998, "loss": 2.6006, "step": 3440 }, { "epoch": 0.00687316715542522, "grad_norm": 0.2390197068452835, "learning_rate": 0.00138, "loss": 2.5942, "step": 3450 }, { "epoch": 0.006893089379064133, "grad_norm": 0.33034971356391907, "learning_rate": 0.001384, "loss": 2.6037, "step": 3460 }, { "epoch": 0.006913011602703047, "grad_norm": 0.25614821910858154, "learning_rate": 0.001388, "loss": 2.6275, "step": 3470 }, { "epoch": 0.006932933826341961, "grad_norm": 0.2546170949935913, "learning_rate": 0.001392, "loss": 2.6137, "step": 3480 }, { "epoch": 0.006952856049980875, "grad_norm": 0.3648693561553955, "learning_rate": 0.0013959999999999999, "loss": 2.6114, "step": 3490 }, { "epoch": 0.006972778273619788, "grad_norm": 0.24452868103981018, "learning_rate": 0.0014, "loss": 2.6228, "step": 3500 }, { "epoch": 0.006992700497258702, "grad_norm": 0.23322179913520813, "learning_rate": 0.0014039999999999999, "loss": 2.6195, "step": 3510 }, { "epoch": 0.0070126227208976155, "grad_norm": 0.284115195274353, "learning_rate": 0.001408, "loss": 2.6137, "step": 3520 }, { "epoch": 0.00703254494453653, "grad_norm": 0.26048699021339417, "learning_rate": 0.0014119999999999998, "loss": 2.6021, "step": 3530 }, { "epoch": 0.007052467168175443, "grad_norm": 0.2848442792892456, "learning_rate": 0.001416, "loss": 2.6179, "step": 3540 }, { "epoch": 0.007072389391814357, "grad_norm": 0.3198232650756836, "learning_rate": 0.00142, "loss": 2.608, "step": 3550 }, { "epoch": 0.00709231161545327, "grad_norm": 0.4013734757900238, "learning_rate": 0.001424, "loss": 2.6251, "step": 3560 }, { "epoch": 0.0071122338390921845, "grad_norm": 0.23831209540367126, "learning_rate": 0.001428, "loss": 2.6141, "step": 3570 }, { "epoch": 0.007132156062731098, "grad_norm": 0.28859972953796387, "learning_rate": 0.001432, "loss": 2.6264, "step": 3580 }, { "epoch": 0.007152078286370011, "grad_norm": 0.28992605209350586, "learning_rate": 0.001436, "loss": 2.6125, "step": 3590 }, { "epoch": 0.007172000510008925, "grad_norm": 0.27403560280799866, "learning_rate": 0.0014399999999999999, "loss": 2.6023, "step": 3600 }, { "epoch": 0.0071919227336478386, "grad_norm": 0.27017760276794434, "learning_rate": 0.001444, "loss": 2.6109, "step": 3610 }, { "epoch": 0.007211844957286753, "grad_norm": 0.2375950813293457, "learning_rate": 0.001448, "loss": 2.6151, "step": 3620 }, { "epoch": 0.007231767180925666, "grad_norm": 0.2718348503112793, "learning_rate": 0.001452, "loss": 2.6194, "step": 3630 }, { "epoch": 0.00725168940456458, "grad_norm": 0.3569204807281494, "learning_rate": 0.001456, "loss": 2.6269, "step": 3640 }, { "epoch": 0.0072716116282034934, "grad_norm": 0.2565053105354309, "learning_rate": 0.00146, "loss": 2.6118, "step": 3650 }, { "epoch": 0.007291533851842408, "grad_norm": 0.31034380197525024, "learning_rate": 0.001464, "loss": 2.6196, "step": 3660 }, { "epoch": 0.007311456075481321, "grad_norm": 0.2622755169868469, "learning_rate": 0.001468, "loss": 2.6295, "step": 3670 }, { "epoch": 0.007331378299120235, "grad_norm": 0.2522021234035492, "learning_rate": 0.001472, "loss": 2.6356, "step": 3680 }, { "epoch": 0.007351300522759148, "grad_norm": 0.2655661106109619, "learning_rate": 0.001476, "loss": 2.6279, "step": 3690 }, { "epoch": 0.007371222746398062, "grad_norm": 0.2689841091632843, "learning_rate": 0.00148, "loss": 2.6114, "step": 3700 }, { "epoch": 0.007391144970036976, "grad_norm": 0.24858500063419342, "learning_rate": 0.001484, "loss": 2.6287, "step": 3710 }, { "epoch": 0.007411067193675889, "grad_norm": 0.36599624156951904, "learning_rate": 0.001488, "loss": 2.6364, "step": 3720 }, { "epoch": 0.007430989417314803, "grad_norm": 0.24837727844715118, "learning_rate": 0.001492, "loss": 2.6207, "step": 3730 }, { "epoch": 0.0074509116409537165, "grad_norm": 0.25442805886268616, "learning_rate": 0.001496, "loss": 2.6361, "step": 3740 }, { "epoch": 0.007470833864592631, "grad_norm": 0.25367292761802673, "learning_rate": 0.0015, "loss": 2.6267, "step": 3750 }, { "epoch": 0.007490756088231544, "grad_norm": 0.2959444522857666, "learning_rate": 0.0015040000000000001, "loss": 2.6329, "step": 3760 }, { "epoch": 0.007510678311870458, "grad_norm": 0.22691310942173004, "learning_rate": 0.001508, "loss": 2.6308, "step": 3770 }, { "epoch": 0.007530600535509371, "grad_norm": 0.24800993502140045, "learning_rate": 0.001512, "loss": 2.6186, "step": 3780 }, { "epoch": 0.0075505227591482855, "grad_norm": 0.392956405878067, "learning_rate": 0.001516, "loss": 2.6182, "step": 3790 }, { "epoch": 0.007570444982787199, "grad_norm": 0.28158995509147644, "learning_rate": 0.00152, "loss": 2.6289, "step": 3800 }, { "epoch": 0.007590367206426112, "grad_norm": 0.2748469412326813, "learning_rate": 0.001524, "loss": 2.6315, "step": 3810 }, { "epoch": 0.007610289430065026, "grad_norm": 0.21554911136627197, "learning_rate": 0.001528, "loss": 2.6212, "step": 3820 }, { "epoch": 0.0076302116537039395, "grad_norm": 0.22068694233894348, "learning_rate": 0.0015320000000000002, "loss": 2.6203, "step": 3830 }, { "epoch": 0.007650133877342854, "grad_norm": 0.3972865045070648, "learning_rate": 0.001536, "loss": 2.6346, "step": 3840 }, { "epoch": 0.007670056100981767, "grad_norm": 0.26717135310173035, "learning_rate": 0.0015400000000000001, "loss": 2.64, "step": 3850 }, { "epoch": 0.007689978324620681, "grad_norm": 0.2912931442260742, "learning_rate": 0.001544, "loss": 2.6383, "step": 3860 }, { "epoch": 0.007709900548259594, "grad_norm": 0.35138964653015137, "learning_rate": 0.0015480000000000001, "loss": 2.6221, "step": 3870 }, { "epoch": 0.0077298227718985086, "grad_norm": 0.28958526253700256, "learning_rate": 0.001552, "loss": 2.6373, "step": 3880 }, { "epoch": 0.007749744995537422, "grad_norm": 0.31249693036079407, "learning_rate": 0.001556, "loss": 2.6455, "step": 3890 }, { "epoch": 0.007769667219176336, "grad_norm": 0.2603747844696045, "learning_rate": 0.0015600000000000002, "loss": 2.6417, "step": 3900 }, { "epoch": 0.007789589442815249, "grad_norm": 0.3006944954395294, "learning_rate": 0.001564, "loss": 2.6364, "step": 3910 }, { "epoch": 0.007809511666454163, "grad_norm": 0.2534780502319336, "learning_rate": 0.0015680000000000002, "loss": 2.6185, "step": 3920 }, { "epoch": 0.007829433890093076, "grad_norm": 0.2495919018983841, "learning_rate": 0.001572, "loss": 2.6371, "step": 3930 }, { "epoch": 0.007849356113731991, "grad_norm": 0.2861161231994629, "learning_rate": 0.0015760000000000001, "loss": 2.6454, "step": 3940 }, { "epoch": 0.007869278337370904, "grad_norm": 0.24656371772289276, "learning_rate": 0.00158, "loss": 2.62, "step": 3950 }, { "epoch": 0.007889200561009817, "grad_norm": 0.28920915722846985, "learning_rate": 0.0015840000000000001, "loss": 2.6106, "step": 3960 }, { "epoch": 0.00790912278464873, "grad_norm": 0.3093283772468567, "learning_rate": 0.0015880000000000002, "loss": 2.6364, "step": 3970 }, { "epoch": 0.007929045008287646, "grad_norm": 0.287251353263855, "learning_rate": 0.001592, "loss": 2.6288, "step": 3980 }, { "epoch": 0.007948967231926559, "grad_norm": 0.2677977979183197, "learning_rate": 0.0015960000000000002, "loss": 2.6433, "step": 3990 }, { "epoch": 0.007968889455565472, "grad_norm": 0.23656222224235535, "learning_rate": 0.0016, "loss": 2.6361, "step": 4000 }, { "epoch": 0.007988811679204386, "grad_norm": 0.4056943356990814, "learning_rate": 0.0016040000000000002, "loss": 2.6278, "step": 4010 }, { "epoch": 0.008008733902843299, "grad_norm": 0.307620108127594, "learning_rate": 0.001608, "loss": 2.6516, "step": 4020 }, { "epoch": 0.008028656126482214, "grad_norm": 0.22841262817382812, "learning_rate": 0.0016120000000000002, "loss": 2.6383, "step": 4030 }, { "epoch": 0.008048578350121127, "grad_norm": 0.3290756046772003, "learning_rate": 0.001616, "loss": 2.6307, "step": 4040 }, { "epoch": 0.00806850057376004, "grad_norm": 0.25768473744392395, "learning_rate": 0.0016200000000000001, "loss": 2.6289, "step": 4050 }, { "epoch": 0.008088422797398954, "grad_norm": 0.24225610494613647, "learning_rate": 0.0016240000000000002, "loss": 2.629, "step": 4060 }, { "epoch": 0.008108345021037869, "grad_norm": 0.24170571565628052, "learning_rate": 0.0016279999999999999, "loss": 2.6335, "step": 4070 }, { "epoch": 0.008128267244676782, "grad_norm": 0.4362369179725647, "learning_rate": 0.001632, "loss": 2.6307, "step": 4080 }, { "epoch": 0.008148189468315695, "grad_norm": 0.2639000117778778, "learning_rate": 0.0016359999999999999, "loss": 2.64, "step": 4090 }, { "epoch": 0.008168111691954609, "grad_norm": 0.2621782124042511, "learning_rate": 0.00164, "loss": 2.6376, "step": 4100 }, { "epoch": 0.008188033915593524, "grad_norm": 0.36529791355133057, "learning_rate": 0.0016439999999999998, "loss": 2.6578, "step": 4110 }, { "epoch": 0.008207956139232437, "grad_norm": 0.27161234617233276, "learning_rate": 0.001648, "loss": 2.6261, "step": 4120 }, { "epoch": 0.00822787836287135, "grad_norm": 0.2856981158256531, "learning_rate": 0.001652, "loss": 2.6463, "step": 4130 }, { "epoch": 0.008247800586510264, "grad_norm": 0.27752554416656494, "learning_rate": 0.001656, "loss": 2.6298, "step": 4140 }, { "epoch": 0.008267722810149177, "grad_norm": 0.25993698835372925, "learning_rate": 0.00166, "loss": 2.639, "step": 4150 }, { "epoch": 0.008287645033788092, "grad_norm": 0.3556019961833954, "learning_rate": 0.001664, "loss": 2.6402, "step": 4160 }, { "epoch": 0.008307567257427005, "grad_norm": 0.23478955030441284, "learning_rate": 0.001668, "loss": 2.6339, "step": 4170 }, { "epoch": 0.008327489481065918, "grad_norm": 0.32295477390289307, "learning_rate": 0.0016719999999999999, "loss": 2.632, "step": 4180 }, { "epoch": 0.008347411704704832, "grad_norm": 0.2807144522666931, "learning_rate": 0.001676, "loss": 2.6482, "step": 4190 }, { "epoch": 0.008367333928343747, "grad_norm": 0.2943168580532074, "learning_rate": 0.00168, "loss": 2.646, "step": 4200 }, { "epoch": 0.00838725615198266, "grad_norm": 0.29926857352256775, "learning_rate": 0.001684, "loss": 2.6425, "step": 4210 }, { "epoch": 0.008407178375621573, "grad_norm": 0.2447386085987091, "learning_rate": 0.001688, "loss": 2.6483, "step": 4220 }, { "epoch": 0.008427100599260487, "grad_norm": 0.23517653346061707, "learning_rate": 0.001692, "loss": 2.6487, "step": 4230 }, { "epoch": 0.0084470228228994, "grad_norm": 0.2671336233615875, "learning_rate": 0.001696, "loss": 2.655, "step": 4240 }, { "epoch": 0.008466945046538315, "grad_norm": 0.246806800365448, "learning_rate": 0.0017, "loss": 2.6305, "step": 4250 }, { "epoch": 0.008486867270177228, "grad_norm": 0.2666430175304413, "learning_rate": 0.001704, "loss": 2.6486, "step": 4260 }, { "epoch": 0.008506789493816141, "grad_norm": 0.25025445222854614, "learning_rate": 0.001708, "loss": 2.6618, "step": 4270 }, { "epoch": 0.008526711717455055, "grad_norm": 0.27437132596969604, "learning_rate": 0.001712, "loss": 2.6358, "step": 4280 }, { "epoch": 0.00854663394109397, "grad_norm": 0.26234883069992065, "learning_rate": 0.001716, "loss": 2.6414, "step": 4290 }, { "epoch": 0.008566556164732883, "grad_norm": 0.3261853754520416, "learning_rate": 0.00172, "loss": 2.6311, "step": 4300 }, { "epoch": 0.008586478388371796, "grad_norm": 0.2813408076763153, "learning_rate": 0.001724, "loss": 2.6344, "step": 4310 }, { "epoch": 0.00860640061201071, "grad_norm": 0.2186128795146942, "learning_rate": 0.001728, "loss": 2.6521, "step": 4320 }, { "epoch": 0.008626322835649625, "grad_norm": 0.2446129471063614, "learning_rate": 0.001732, "loss": 2.6472, "step": 4330 }, { "epoch": 0.008646245059288538, "grad_norm": 0.3584851026535034, "learning_rate": 0.0017360000000000001, "loss": 2.6414, "step": 4340 }, { "epoch": 0.008666167282927451, "grad_norm": 0.29514381289482117, "learning_rate": 0.00174, "loss": 2.6503, "step": 4350 }, { "epoch": 0.008686089506566365, "grad_norm": 0.25573790073394775, "learning_rate": 0.001744, "loss": 2.6453, "step": 4360 }, { "epoch": 0.008706011730205278, "grad_norm": 0.2788326144218445, "learning_rate": 0.001748, "loss": 2.6329, "step": 4370 }, { "epoch": 0.008725933953844193, "grad_norm": 0.32184508442878723, "learning_rate": 0.001752, "loss": 2.6333, "step": 4380 }, { "epoch": 0.008745856177483106, "grad_norm": 0.24099747836589813, "learning_rate": 0.001756, "loss": 2.6346, "step": 4390 }, { "epoch": 0.00876577840112202, "grad_norm": 0.23427201807498932, "learning_rate": 0.00176, "loss": 2.6337, "step": 4400 }, { "epoch": 0.008785700624760933, "grad_norm": 0.3614480197429657, "learning_rate": 0.001764, "loss": 2.6484, "step": 4410 }, { "epoch": 0.008805622848399848, "grad_norm": 0.27369412779808044, "learning_rate": 0.001768, "loss": 2.654, "step": 4420 }, { "epoch": 0.008825545072038761, "grad_norm": 0.33976051211357117, "learning_rate": 0.0017720000000000001, "loss": 2.6558, "step": 4430 }, { "epoch": 0.008845467295677674, "grad_norm": 0.2559027373790741, "learning_rate": 0.001776, "loss": 2.6345, "step": 4440 }, { "epoch": 0.008865389519316588, "grad_norm": 0.2715472877025604, "learning_rate": 0.0017800000000000001, "loss": 2.6633, "step": 4450 }, { "epoch": 0.008885311742955503, "grad_norm": 0.2539695203304291, "learning_rate": 0.001784, "loss": 2.6351, "step": 4460 }, { "epoch": 0.008905233966594416, "grad_norm": 0.26313838362693787, "learning_rate": 0.001788, "loss": 2.6327, "step": 4470 }, { "epoch": 0.00892515619023333, "grad_norm": 0.29808932542800903, "learning_rate": 0.001792, "loss": 2.636, "step": 4480 }, { "epoch": 0.008945078413872242, "grad_norm": 0.25095269083976746, "learning_rate": 0.001796, "loss": 2.6418, "step": 4490 }, { "epoch": 0.008965000637511156, "grad_norm": 0.31279709935188293, "learning_rate": 0.0018000000000000002, "loss": 2.6589, "step": 4500 }, { "epoch": 0.00898492286115007, "grad_norm": 0.27606409788131714, "learning_rate": 0.001804, "loss": 2.6393, "step": 4510 }, { "epoch": 0.009004845084788984, "grad_norm": 0.2701137065887451, "learning_rate": 0.0018080000000000001, "loss": 2.6365, "step": 4520 }, { "epoch": 0.009024767308427897, "grad_norm": 0.2722721993923187, "learning_rate": 0.001812, "loss": 2.6362, "step": 4530 }, { "epoch": 0.00904468953206681, "grad_norm": 0.25126731395721436, "learning_rate": 0.0018160000000000001, "loss": 2.6519, "step": 4540 }, { "epoch": 0.009064611755705726, "grad_norm": 0.22555232048034668, "learning_rate": 0.00182, "loss": 2.6585, "step": 4550 }, { "epoch": 0.009084533979344639, "grad_norm": 0.4315498471260071, "learning_rate": 0.001824, "loss": 2.6485, "step": 4560 }, { "epoch": 0.009104456202983552, "grad_norm": 0.2177431285381317, "learning_rate": 0.0018280000000000002, "loss": 2.6427, "step": 4570 }, { "epoch": 0.009124378426622465, "grad_norm": 0.28132423758506775, "learning_rate": 0.001832, "loss": 2.6247, "step": 4580 }, { "epoch": 0.009144300650261379, "grad_norm": 0.3109949231147766, "learning_rate": 0.0018360000000000002, "loss": 2.6374, "step": 4590 }, { "epoch": 0.009164222873900294, "grad_norm": 0.24755898118019104, "learning_rate": 0.00184, "loss": 2.6383, "step": 4600 }, { "epoch": 0.009184145097539207, "grad_norm": 0.30041778087615967, "learning_rate": 0.0018440000000000002, "loss": 2.664, "step": 4610 }, { "epoch": 0.00920406732117812, "grad_norm": 0.23424208164215088, "learning_rate": 0.001848, "loss": 2.6381, "step": 4620 }, { "epoch": 0.009223989544817034, "grad_norm": 0.26328516006469727, "learning_rate": 0.0018520000000000001, "loss": 2.6405, "step": 4630 }, { "epoch": 0.009243911768455949, "grad_norm": 0.2686675488948822, "learning_rate": 0.0018560000000000002, "loss": 2.6541, "step": 4640 }, { "epoch": 0.009263833992094862, "grad_norm": 0.224777489900589, "learning_rate": 0.00186, "loss": 2.6537, "step": 4650 }, { "epoch": 0.009283756215733775, "grad_norm": 0.24811263382434845, "learning_rate": 0.0018640000000000002, "loss": 2.6712, "step": 4660 }, { "epoch": 0.009303678439372689, "grad_norm": 0.30919334292411804, "learning_rate": 0.001868, "loss": 2.6612, "step": 4670 }, { "epoch": 0.009323600663011604, "grad_norm": 0.23683860898017883, "learning_rate": 0.0018720000000000002, "loss": 2.6613, "step": 4680 }, { "epoch": 0.009343522886650517, "grad_norm": 0.3471679985523224, "learning_rate": 0.0018759999999999998, "loss": 2.6522, "step": 4690 }, { "epoch": 0.00936344511028943, "grad_norm": 0.2518165707588196, "learning_rate": 0.00188, "loss": 2.672, "step": 4700 }, { "epoch": 0.009383367333928343, "grad_norm": 0.3638230562210083, "learning_rate": 0.001884, "loss": 2.6488, "step": 4710 }, { "epoch": 0.009403289557567257, "grad_norm": 0.2599489688873291, "learning_rate": 0.001888, "loss": 2.6577, "step": 4720 }, { "epoch": 0.009423211781206172, "grad_norm": 0.2732684910297394, "learning_rate": 0.001892, "loss": 2.6428, "step": 4730 }, { "epoch": 0.009443134004845085, "grad_norm": 0.258099764585495, "learning_rate": 0.001896, "loss": 2.6499, "step": 4740 }, { "epoch": 0.009463056228483998, "grad_norm": 0.4018145203590393, "learning_rate": 0.0019, "loss": 2.6658, "step": 4750 }, { "epoch": 0.009482978452122912, "grad_norm": 0.23909227550029755, "learning_rate": 0.0019039999999999999, "loss": 2.6514, "step": 4760 }, { "epoch": 0.009502900675761827, "grad_norm": 0.2606949508190155, "learning_rate": 0.001908, "loss": 2.6548, "step": 4770 }, { "epoch": 0.00952282289940074, "grad_norm": 0.3217521607875824, "learning_rate": 0.0019119999999999999, "loss": 2.6558, "step": 4780 }, { "epoch": 0.009542745123039653, "grad_norm": 0.24605603516101837, "learning_rate": 0.001916, "loss": 2.6649, "step": 4790 }, { "epoch": 0.009562667346678566, "grad_norm": 0.23503199219703674, "learning_rate": 0.00192, "loss": 2.6461, "step": 4800 }, { "epoch": 0.00958258957031748, "grad_norm": 0.36071571707725525, "learning_rate": 0.001924, "loss": 2.6526, "step": 4810 }, { "epoch": 0.009602511793956395, "grad_norm": 0.24227696657180786, "learning_rate": 0.001928, "loss": 2.6529, "step": 4820 }, { "epoch": 0.009622434017595308, "grad_norm": 0.31274229288101196, "learning_rate": 0.001932, "loss": 2.6549, "step": 4830 }, { "epoch": 0.009642356241234221, "grad_norm": 0.28790974617004395, "learning_rate": 0.001936, "loss": 2.663, "step": 4840 }, { "epoch": 0.009662278464873135, "grad_norm": 0.22570858895778656, "learning_rate": 0.0019399999999999999, "loss": 2.6539, "step": 4850 }, { "epoch": 0.00968220068851205, "grad_norm": 0.2529376745223999, "learning_rate": 0.001944, "loss": 2.6723, "step": 4860 }, { "epoch": 0.009702122912150963, "grad_norm": 0.25963717699050903, "learning_rate": 0.001948, "loss": 2.6542, "step": 4870 }, { "epoch": 0.009722045135789876, "grad_norm": 0.2851378321647644, "learning_rate": 0.001952, "loss": 2.6535, "step": 4880 }, { "epoch": 0.00974196735942879, "grad_norm": 0.2801332473754883, "learning_rate": 0.001956, "loss": 2.6556, "step": 4890 }, { "epoch": 0.009761889583067705, "grad_norm": 0.23545946180820465, "learning_rate": 0.00196, "loss": 2.6461, "step": 4900 }, { "epoch": 0.009781811806706618, "grad_norm": 0.23655717074871063, "learning_rate": 0.001964, "loss": 2.6688, "step": 4910 }, { "epoch": 0.009801734030345531, "grad_norm": 0.31396037340164185, "learning_rate": 0.001968, "loss": 2.6523, "step": 4920 }, { "epoch": 0.009821656253984444, "grad_norm": 0.19960422813892365, "learning_rate": 0.0019720000000000002, "loss": 2.6408, "step": 4930 }, { "epoch": 0.009841578477623358, "grad_norm": 0.3075007200241089, "learning_rate": 0.001976, "loss": 2.6579, "step": 4940 }, { "epoch": 0.009861500701262273, "grad_norm": 0.2974744141101837, "learning_rate": 0.00198, "loss": 2.6543, "step": 4950 }, { "epoch": 0.009881422924901186, "grad_norm": 0.28340721130371094, "learning_rate": 0.001984, "loss": 2.6615, "step": 4960 }, { "epoch": 0.0099013451485401, "grad_norm": 0.25365665555000305, "learning_rate": 0.001988, "loss": 2.6504, "step": 4970 }, { "epoch": 0.009921267372179013, "grad_norm": 0.39314189553260803, "learning_rate": 0.001992, "loss": 2.6629, "step": 4980 }, { "epoch": 0.009941189595817928, "grad_norm": 0.22162103652954102, "learning_rate": 0.001996, "loss": 2.6491, "step": 4990 }, { "epoch": 0.00996111181945684, "grad_norm": 0.27169665694236755, "learning_rate": 0.002, "loss": 2.6646, "step": 5000 }, { "epoch": 0.009981034043095754, "grad_norm": 0.292549729347229, "learning_rate": 0.002, "loss": 2.653, "step": 5010 }, { "epoch": 0.010000956266734667, "grad_norm": 0.3372136652469635, "learning_rate": 0.002, "loss": 2.6524, "step": 5020 }, { "epoch": 0.01002087849037358, "grad_norm": 0.2365424782037735, "learning_rate": 0.002, "loss": 2.6434, "step": 5030 }, { "epoch": 0.010040800714012496, "grad_norm": 0.24282845854759216, "learning_rate": 0.002, "loss": 2.6506, "step": 5040 }, { "epoch": 0.010060722937651409, "grad_norm": 0.3228405714035034, "learning_rate": 0.002, "loss": 2.6628, "step": 5050 }, { "epoch": 0.010080645161290322, "grad_norm": 0.2426726520061493, "learning_rate": 0.002, "loss": 2.6603, "step": 5060 }, { "epoch": 0.010100567384929236, "grad_norm": 0.22327937185764313, "learning_rate": 0.002, "loss": 2.655, "step": 5070 }, { "epoch": 0.01012048960856815, "grad_norm": 0.3108415901660919, "learning_rate": 0.002, "loss": 2.662, "step": 5080 }, { "epoch": 0.010140411832207064, "grad_norm": 0.2372429519891739, "learning_rate": 0.002, "loss": 2.6696, "step": 5090 }, { "epoch": 0.010160334055845977, "grad_norm": 0.2991734445095062, "learning_rate": 0.002, "loss": 2.673, "step": 5100 }, { "epoch": 0.01018025627948489, "grad_norm": 0.20926590263843536, "learning_rate": 0.002, "loss": 2.6602, "step": 5110 }, { "epoch": 0.010200178503123805, "grad_norm": 0.30474039912223816, "learning_rate": 0.002, "loss": 2.6499, "step": 5120 }, { "epoch": 0.010220100726762719, "grad_norm": 0.252201646566391, "learning_rate": 0.002, "loss": 2.6691, "step": 5130 }, { "epoch": 0.010240022950401632, "grad_norm": 0.2533051669597626, "learning_rate": 0.002, "loss": 2.6617, "step": 5140 }, { "epoch": 0.010259945174040545, "grad_norm": 0.24129579961299896, "learning_rate": 0.002, "loss": 2.6544, "step": 5150 }, { "epoch": 0.010279867397679459, "grad_norm": 0.23019099235534668, "learning_rate": 0.002, "loss": 2.6564, "step": 5160 }, { "epoch": 0.010299789621318374, "grad_norm": 0.2621488869190216, "learning_rate": 0.002, "loss": 2.6785, "step": 5170 }, { "epoch": 0.010319711844957287, "grad_norm": 0.2567979693412781, "learning_rate": 0.002, "loss": 2.6566, "step": 5180 }, { "epoch": 0.0103396340685962, "grad_norm": 0.24124987423419952, "learning_rate": 0.002, "loss": 2.6461, "step": 5190 }, { "epoch": 0.010359556292235114, "grad_norm": 0.2868908941745758, "learning_rate": 0.002, "loss": 2.6605, "step": 5200 }, { "epoch": 0.010379478515874029, "grad_norm": 0.2492065280675888, "learning_rate": 0.002, "loss": 2.6697, "step": 5210 }, { "epoch": 0.010399400739512942, "grad_norm": 0.20710085332393646, "learning_rate": 0.002, "loss": 2.6587, "step": 5220 }, { "epoch": 0.010419322963151855, "grad_norm": 0.22695226967334747, "learning_rate": 0.002, "loss": 2.6531, "step": 5230 }, { "epoch": 0.010439245186790768, "grad_norm": 0.2340531051158905, "learning_rate": 0.002, "loss": 2.6699, "step": 5240 }, { "epoch": 0.010459167410429682, "grad_norm": 0.22510793805122375, "learning_rate": 0.002, "loss": 2.6452, "step": 5250 }, { "epoch": 0.010479089634068597, "grad_norm": 0.30028390884399414, "learning_rate": 0.002, "loss": 2.6503, "step": 5260 }, { "epoch": 0.01049901185770751, "grad_norm": 0.21423693001270294, "learning_rate": 0.002, "loss": 2.6613, "step": 5270 }, { "epoch": 0.010518934081346423, "grad_norm": 0.26742202043533325, "learning_rate": 0.002, "loss": 2.6496, "step": 5280 }, { "epoch": 0.010538856304985337, "grad_norm": 0.24487444758415222, "learning_rate": 0.002, "loss": 2.6486, "step": 5290 }, { "epoch": 0.010558778528624252, "grad_norm": 0.1950957477092743, "learning_rate": 0.002, "loss": 2.6409, "step": 5300 }, { "epoch": 0.010578700752263165, "grad_norm": 0.3008197844028473, "learning_rate": 0.002, "loss": 2.6368, "step": 5310 }, { "epoch": 0.010598622975902078, "grad_norm": 0.2170853167772293, "learning_rate": 0.002, "loss": 2.6441, "step": 5320 }, { "epoch": 0.010618545199540991, "grad_norm": 0.27102652192115784, "learning_rate": 0.002, "loss": 2.6486, "step": 5330 }, { "epoch": 0.010638467423179906, "grad_norm": 0.2533071041107178, "learning_rate": 0.002, "loss": 2.653, "step": 5340 }, { "epoch": 0.01065838964681882, "grad_norm": 0.22336485981941223, "learning_rate": 0.002, "loss": 2.6527, "step": 5350 }, { "epoch": 0.010678311870457733, "grad_norm": 0.24454209208488464, "learning_rate": 0.002, "loss": 2.6517, "step": 5360 }, { "epoch": 0.010698234094096646, "grad_norm": 0.24621517956256866, "learning_rate": 0.002, "loss": 2.6443, "step": 5370 }, { "epoch": 0.01071815631773556, "grad_norm": 0.25834566354751587, "learning_rate": 0.002, "loss": 2.6516, "step": 5380 }, { "epoch": 0.010738078541374475, "grad_norm": 0.3320177495479584, "learning_rate": 0.002, "loss": 2.6631, "step": 5390 }, { "epoch": 0.010758000765013388, "grad_norm": 0.2194259911775589, "learning_rate": 0.002, "loss": 2.6455, "step": 5400 }, { "epoch": 0.010777922988652301, "grad_norm": 0.2532918155193329, "learning_rate": 0.002, "loss": 2.658, "step": 5410 }, { "epoch": 0.010797845212291214, "grad_norm": 0.264913946390152, "learning_rate": 0.002, "loss": 2.6528, "step": 5420 }, { "epoch": 0.01081776743593013, "grad_norm": 0.2742248773574829, "learning_rate": 0.002, "loss": 2.6488, "step": 5430 }, { "epoch": 0.010837689659569043, "grad_norm": 0.2052263766527176, "learning_rate": 0.002, "loss": 2.6543, "step": 5440 }, { "epoch": 0.010857611883207956, "grad_norm": 0.2556586265563965, "learning_rate": 0.002, "loss": 2.6477, "step": 5450 }, { "epoch": 0.01087753410684687, "grad_norm": 0.21249651908874512, "learning_rate": 0.002, "loss": 2.6497, "step": 5460 }, { "epoch": 0.010897456330485783, "grad_norm": 0.24264992773532867, "learning_rate": 0.002, "loss": 2.6336, "step": 5470 }, { "epoch": 0.010917378554124698, "grad_norm": 0.2720754146575928, "learning_rate": 0.002, "loss": 2.6383, "step": 5480 }, { "epoch": 0.010937300777763611, "grad_norm": 0.2623012363910675, "learning_rate": 0.002, "loss": 2.6433, "step": 5490 }, { "epoch": 0.010957223001402524, "grad_norm": 0.2493743896484375, "learning_rate": 0.002, "loss": 2.6601, "step": 5500 }, { "epoch": 0.010977145225041438, "grad_norm": 0.26818040013313293, "learning_rate": 0.002, "loss": 2.6532, "step": 5510 }, { "epoch": 0.010997067448680353, "grad_norm": 0.23443378508090973, "learning_rate": 0.002, "loss": 2.6559, "step": 5520 }, { "epoch": 0.011016989672319266, "grad_norm": 0.22406287491321564, "learning_rate": 0.002, "loss": 2.6397, "step": 5530 }, { "epoch": 0.011036911895958179, "grad_norm": 0.22658081352710724, "learning_rate": 0.002, "loss": 2.643, "step": 5540 }, { "epoch": 0.011056834119597092, "grad_norm": 0.2528095245361328, "learning_rate": 0.002, "loss": 2.654, "step": 5550 }, { "epoch": 0.011076756343236007, "grad_norm": 0.2626362442970276, "learning_rate": 0.002, "loss": 2.6523, "step": 5560 }, { "epoch": 0.01109667856687492, "grad_norm": 0.24810366332530975, "learning_rate": 0.002, "loss": 2.6493, "step": 5570 }, { "epoch": 0.011116600790513834, "grad_norm": 0.1813693791627884, "learning_rate": 0.002, "loss": 2.6551, "step": 5580 }, { "epoch": 0.011136523014152747, "grad_norm": 0.26380762457847595, "learning_rate": 0.002, "loss": 2.6421, "step": 5590 }, { "epoch": 0.01115644523779166, "grad_norm": 0.24770748615264893, "learning_rate": 0.002, "loss": 2.6389, "step": 5600 }, { "epoch": 0.011176367461430576, "grad_norm": 0.18157590925693512, "learning_rate": 0.002, "loss": 2.6431, "step": 5610 }, { "epoch": 0.011196289685069489, "grad_norm": 0.22631093859672546, "learning_rate": 0.002, "loss": 2.643, "step": 5620 }, { "epoch": 0.011216211908708402, "grad_norm": 0.24889683723449707, "learning_rate": 0.002, "loss": 2.6342, "step": 5630 }, { "epoch": 0.011236134132347315, "grad_norm": 0.21920843422412872, "learning_rate": 0.002, "loss": 2.6485, "step": 5640 }, { "epoch": 0.01125605635598623, "grad_norm": 0.2161376178264618, "learning_rate": 0.002, "loss": 2.6314, "step": 5650 }, { "epoch": 0.011275978579625144, "grad_norm": 0.29879331588745117, "learning_rate": 0.002, "loss": 2.6429, "step": 5660 }, { "epoch": 0.011295900803264057, "grad_norm": 0.18281014263629913, "learning_rate": 0.002, "loss": 2.6557, "step": 5670 }, { "epoch": 0.01131582302690297, "grad_norm": 0.1970023661851883, "learning_rate": 0.002, "loss": 2.6393, "step": 5680 }, { "epoch": 0.011335745250541884, "grad_norm": 0.2547535002231598, "learning_rate": 0.002, "loss": 2.6357, "step": 5690 }, { "epoch": 0.011355667474180799, "grad_norm": 0.2771916091442108, "learning_rate": 0.002, "loss": 2.6402, "step": 5700 }, { "epoch": 0.011375589697819712, "grad_norm": 0.26350435614585876, "learning_rate": 0.002, "loss": 2.67, "step": 5710 }, { "epoch": 0.011395511921458625, "grad_norm": 0.24207192659378052, "learning_rate": 0.002, "loss": 2.6633, "step": 5720 }, { "epoch": 0.011415434145097538, "grad_norm": 0.25290101766586304, "learning_rate": 0.002, "loss": 2.6502, "step": 5730 }, { "epoch": 0.011435356368736454, "grad_norm": 0.2775394320487976, "learning_rate": 0.002, "loss": 2.6596, "step": 5740 }, { "epoch": 0.011455278592375367, "grad_norm": 0.23551501333713531, "learning_rate": 0.002, "loss": 2.6686, "step": 5750 }, { "epoch": 0.01147520081601428, "grad_norm": 0.251463383436203, "learning_rate": 0.002, "loss": 2.6326, "step": 5760 }, { "epoch": 0.011495123039653193, "grad_norm": 0.25761985778808594, "learning_rate": 0.002, "loss": 2.666, "step": 5770 }, { "epoch": 0.011515045263292108, "grad_norm": 0.2811390161514282, "learning_rate": 0.002, "loss": 2.6319, "step": 5780 }, { "epoch": 0.011534967486931022, "grad_norm": 0.2177535742521286, "learning_rate": 0.002, "loss": 2.6351, "step": 5790 }, { "epoch": 0.011554889710569935, "grad_norm": 0.21137763559818268, "learning_rate": 0.002, "loss": 2.6624, "step": 5800 }, { "epoch": 0.011574811934208848, "grad_norm": 0.1719178408384323, "learning_rate": 0.002, "loss": 2.6334, "step": 5810 }, { "epoch": 0.011594734157847762, "grad_norm": 0.20835991203784943, "learning_rate": 0.002, "loss": 2.6389, "step": 5820 }, { "epoch": 0.011614656381486677, "grad_norm": 0.2670765221118927, "learning_rate": 0.002, "loss": 2.6332, "step": 5830 }, { "epoch": 0.01163457860512559, "grad_norm": 0.20557242631912231, "learning_rate": 0.002, "loss": 2.6372, "step": 5840 }, { "epoch": 0.011654500828764503, "grad_norm": 0.2379782497882843, "learning_rate": 0.002, "loss": 2.641, "step": 5850 }, { "epoch": 0.011674423052403416, "grad_norm": 0.266085684299469, "learning_rate": 0.002, "loss": 2.645, "step": 5860 }, { "epoch": 0.011694345276042331, "grad_norm": 0.2515946924686432, "learning_rate": 0.002, "loss": 2.642, "step": 5870 }, { "epoch": 0.011714267499681245, "grad_norm": 0.20749199390411377, "learning_rate": 0.002, "loss": 2.636, "step": 5880 }, { "epoch": 0.011734189723320158, "grad_norm": 0.26344165205955505, "learning_rate": 0.002, "loss": 2.655, "step": 5890 }, { "epoch": 0.011754111946959071, "grad_norm": 0.2220536321401596, "learning_rate": 0.002, "loss": 2.6408, "step": 5900 }, { "epoch": 0.011774034170597986, "grad_norm": 0.3426555395126343, "learning_rate": 0.002, "loss": 2.6315, "step": 5910 }, { "epoch": 0.0117939563942369, "grad_norm": 0.18472176790237427, "learning_rate": 0.002, "loss": 2.6317, "step": 5920 }, { "epoch": 0.011813878617875813, "grad_norm": 0.24610751867294312, "learning_rate": 0.002, "loss": 2.653, "step": 5930 }, { "epoch": 0.011833800841514726, "grad_norm": 0.2344995141029358, "learning_rate": 0.002, "loss": 2.6418, "step": 5940 }, { "epoch": 0.01185372306515364, "grad_norm": 0.21005327999591827, "learning_rate": 0.002, "loss": 2.6439, "step": 5950 }, { "epoch": 0.011873645288792554, "grad_norm": 0.3029705584049225, "learning_rate": 0.002, "loss": 2.6256, "step": 5960 }, { "epoch": 0.011893567512431468, "grad_norm": 0.22369332611560822, "learning_rate": 0.002, "loss": 2.6469, "step": 5970 }, { "epoch": 0.011913489736070381, "grad_norm": 0.24053986370563507, "learning_rate": 0.002, "loss": 2.6411, "step": 5980 }, { "epoch": 0.011933411959709294, "grad_norm": 0.21900437772274017, "learning_rate": 0.002, "loss": 2.6343, "step": 5990 }, { "epoch": 0.01195333418334821, "grad_norm": 0.23958046734333038, "learning_rate": 0.002, "loss": 2.643, "step": 6000 }, { "epoch": 0.011973256406987123, "grad_norm": 0.25524038076400757, "learning_rate": 0.002, "loss": 2.638, "step": 6010 }, { "epoch": 0.011993178630626036, "grad_norm": 0.23588870465755463, "learning_rate": 0.002, "loss": 2.6433, "step": 6020 }, { "epoch": 0.01201310085426495, "grad_norm": 0.2370671182870865, "learning_rate": 0.002, "loss": 2.6523, "step": 6030 }, { "epoch": 0.012033023077903863, "grad_norm": 0.2594859004020691, "learning_rate": 0.002, "loss": 2.6542, "step": 6040 }, { "epoch": 0.012052945301542778, "grad_norm": 0.2277677208185196, "learning_rate": 0.002, "loss": 2.6495, "step": 6050 }, { "epoch": 0.01207286752518169, "grad_norm": 0.2288864254951477, "learning_rate": 0.002, "loss": 2.6302, "step": 6060 }, { "epoch": 0.012092789748820604, "grad_norm": 0.19973905384540558, "learning_rate": 0.002, "loss": 2.6357, "step": 6070 }, { "epoch": 0.012112711972459517, "grad_norm": 0.20615699887275696, "learning_rate": 0.002, "loss": 2.6282, "step": 6080 }, { "epoch": 0.012132634196098432, "grad_norm": 0.3014143705368042, "learning_rate": 0.002, "loss": 2.6456, "step": 6090 }, { "epoch": 0.012152556419737346, "grad_norm": 0.19326026737689972, "learning_rate": 0.002, "loss": 2.6446, "step": 6100 }, { "epoch": 0.012172478643376259, "grad_norm": 0.19333821535110474, "learning_rate": 0.002, "loss": 2.6421, "step": 6110 }, { "epoch": 0.012192400867015172, "grad_norm": 0.2308701127767563, "learning_rate": 0.002, "loss": 2.6421, "step": 6120 }, { "epoch": 0.012212323090654087, "grad_norm": 0.2107623666524887, "learning_rate": 0.002, "loss": 2.6531, "step": 6130 }, { "epoch": 0.012232245314293, "grad_norm": 0.24213136732578278, "learning_rate": 0.002, "loss": 2.6407, "step": 6140 }, { "epoch": 0.012252167537931914, "grad_norm": 0.2391958236694336, "learning_rate": 0.002, "loss": 2.6349, "step": 6150 }, { "epoch": 0.012272089761570827, "grad_norm": 0.23523858189582825, "learning_rate": 0.002, "loss": 2.6285, "step": 6160 }, { "epoch": 0.01229201198520974, "grad_norm": 0.2609156370162964, "learning_rate": 0.002, "loss": 2.6485, "step": 6170 }, { "epoch": 0.012311934208848655, "grad_norm": 0.20153500139713287, "learning_rate": 0.002, "loss": 2.6308, "step": 6180 }, { "epoch": 0.012331856432487569, "grad_norm": 0.17557115852832794, "learning_rate": 0.002, "loss": 2.6518, "step": 6190 }, { "epoch": 0.012351778656126482, "grad_norm": 0.3081878423690796, "learning_rate": 0.002, "loss": 2.6398, "step": 6200 }, { "epoch": 0.012371700879765395, "grad_norm": 0.18677663803100586, "learning_rate": 0.002, "loss": 2.6485, "step": 6210 }, { "epoch": 0.01239162310340431, "grad_norm": 0.23560264706611633, "learning_rate": 0.002, "loss": 2.6175, "step": 6220 }, { "epoch": 0.012411545327043224, "grad_norm": 0.23332323133945465, "learning_rate": 0.002, "loss": 2.6288, "step": 6230 }, { "epoch": 0.012431467550682137, "grad_norm": 0.1844009906053543, "learning_rate": 0.002, "loss": 2.6135, "step": 6240 }, { "epoch": 0.01245138977432105, "grad_norm": 0.23262456059455872, "learning_rate": 0.002, "loss": 2.6305, "step": 6250 }, { "epoch": 0.012471311997959963, "grad_norm": 0.22835469245910645, "learning_rate": 0.002, "loss": 2.6478, "step": 6260 }, { "epoch": 0.012491234221598879, "grad_norm": 0.18260468542575836, "learning_rate": 0.002, "loss": 2.6369, "step": 6270 }, { "epoch": 0.012511156445237792, "grad_norm": 0.24717769026756287, "learning_rate": 0.002, "loss": 2.6289, "step": 6280 }, { "epoch": 0.012531078668876705, "grad_norm": 0.2879694700241089, "learning_rate": 0.002, "loss": 2.6408, "step": 6290 }, { "epoch": 0.012551000892515618, "grad_norm": 0.18451528251171112, "learning_rate": 0.002, "loss": 2.6419, "step": 6300 }, { "epoch": 0.012570923116154533, "grad_norm": 0.20520569384098053, "learning_rate": 0.002, "loss": 2.654, "step": 6310 }, { "epoch": 0.012590845339793447, "grad_norm": 0.20881463587284088, "learning_rate": 0.002, "loss": 2.6429, "step": 6320 }, { "epoch": 0.01261076756343236, "grad_norm": 0.243753582239151, "learning_rate": 0.002, "loss": 2.638, "step": 6330 }, { "epoch": 0.012630689787071273, "grad_norm": 0.21805480122566223, "learning_rate": 0.002, "loss": 2.6402, "step": 6340 }, { "epoch": 0.012650612010710188, "grad_norm": 0.22613516449928284, "learning_rate": 0.002, "loss": 2.6478, "step": 6350 }, { "epoch": 0.012670534234349102, "grad_norm": 0.1931035965681076, "learning_rate": 0.002, "loss": 2.6247, "step": 6360 }, { "epoch": 0.012690456457988015, "grad_norm": 0.2519269585609436, "learning_rate": 0.002, "loss": 2.6449, "step": 6370 }, { "epoch": 0.012710378681626928, "grad_norm": 0.2336338609457016, "learning_rate": 0.002, "loss": 2.6329, "step": 6380 }, { "epoch": 0.012730300905265841, "grad_norm": 0.2310912311077118, "learning_rate": 0.002, "loss": 2.6287, "step": 6390 }, { "epoch": 0.012750223128904756, "grad_norm": 0.22870244085788727, "learning_rate": 0.002, "loss": 2.6426, "step": 6400 }, { "epoch": 0.01277014535254367, "grad_norm": 0.270793616771698, "learning_rate": 0.002, "loss": 2.64, "step": 6410 }, { "epoch": 0.012790067576182583, "grad_norm": 0.21005769073963165, "learning_rate": 0.002, "loss": 2.6266, "step": 6420 }, { "epoch": 0.012809989799821496, "grad_norm": 0.2652980387210846, "learning_rate": 0.002, "loss": 2.6509, "step": 6430 }, { "epoch": 0.012829912023460411, "grad_norm": 0.19125047326087952, "learning_rate": 0.002, "loss": 2.6251, "step": 6440 }, { "epoch": 0.012849834247099325, "grad_norm": 0.2931942939758301, "learning_rate": 0.002, "loss": 2.6514, "step": 6450 }, { "epoch": 0.012869756470738238, "grad_norm": 0.20146970450878143, "learning_rate": 0.002, "loss": 2.6349, "step": 6460 }, { "epoch": 0.012889678694377151, "grad_norm": 0.1947196125984192, "learning_rate": 0.002, "loss": 2.6253, "step": 6470 }, { "epoch": 0.012909600918016064, "grad_norm": 0.253132164478302, "learning_rate": 0.002, "loss": 2.6393, "step": 6480 }, { "epoch": 0.01292952314165498, "grad_norm": 0.212583988904953, "learning_rate": 0.002, "loss": 2.6274, "step": 6490 }, { "epoch": 0.012949445365293893, "grad_norm": 0.2624717056751251, "learning_rate": 0.002, "loss": 2.6355, "step": 6500 }, { "epoch": 0.012969367588932806, "grad_norm": 0.18376167118549347, "learning_rate": 0.002, "loss": 2.6464, "step": 6510 }, { "epoch": 0.01298928981257172, "grad_norm": 0.22605857253074646, "learning_rate": 0.002, "loss": 2.6409, "step": 6520 }, { "epoch": 0.013009212036210634, "grad_norm": 0.2862149477005005, "learning_rate": 0.002, "loss": 2.6436, "step": 6530 }, { "epoch": 0.013029134259849548, "grad_norm": 0.22679361701011658, "learning_rate": 0.002, "loss": 2.6461, "step": 6540 }, { "epoch": 0.013049056483488461, "grad_norm": 0.22347143292427063, "learning_rate": 0.002, "loss": 2.6482, "step": 6550 }, { "epoch": 0.013068978707127374, "grad_norm": 0.20335768163204193, "learning_rate": 0.002, "loss": 2.6416, "step": 6560 }, { "epoch": 0.01308890093076629, "grad_norm": 0.30125823616981506, "learning_rate": 0.002, "loss": 2.6464, "step": 6570 }, { "epoch": 0.013108823154405203, "grad_norm": 0.185206800699234, "learning_rate": 0.002, "loss": 2.6579, "step": 6580 }, { "epoch": 0.013128745378044116, "grad_norm": 0.2015102505683899, "learning_rate": 0.002, "loss": 2.6236, "step": 6590 }, { "epoch": 0.013148667601683029, "grad_norm": 0.2928968071937561, "learning_rate": 0.002, "loss": 2.6412, "step": 6600 }, { "epoch": 0.013168589825321942, "grad_norm": 0.22095546126365662, "learning_rate": 0.002, "loss": 2.6355, "step": 6610 }, { "epoch": 0.013188512048960857, "grad_norm": 0.17720098793506622, "learning_rate": 0.002, "loss": 2.6447, "step": 6620 }, { "epoch": 0.01320843427259977, "grad_norm": 0.23646119236946106, "learning_rate": 0.002, "loss": 2.6368, "step": 6630 }, { "epoch": 0.013228356496238684, "grad_norm": 0.25978976488113403, "learning_rate": 0.002, "loss": 2.655, "step": 6640 }, { "epoch": 0.013248278719877597, "grad_norm": 0.2158976048231125, "learning_rate": 0.002, "loss": 2.6334, "step": 6650 }, { "epoch": 0.013268200943516512, "grad_norm": 0.22559994459152222, "learning_rate": 0.002, "loss": 2.6479, "step": 6660 }, { "epoch": 0.013288123167155426, "grad_norm": 0.20094847679138184, "learning_rate": 0.002, "loss": 2.6227, "step": 6670 }, { "epoch": 0.013308045390794339, "grad_norm": 0.23317670822143555, "learning_rate": 0.002, "loss": 2.6416, "step": 6680 }, { "epoch": 0.013327967614433252, "grad_norm": 0.21865856647491455, "learning_rate": 0.002, "loss": 2.6372, "step": 6690 }, { "epoch": 0.013347889838072165, "grad_norm": 0.23539967834949493, "learning_rate": 0.002, "loss": 2.6344, "step": 6700 }, { "epoch": 0.01336781206171108, "grad_norm": 0.26446881890296936, "learning_rate": 0.002, "loss": 2.6334, "step": 6710 }, { "epoch": 0.013387734285349994, "grad_norm": 0.25341010093688965, "learning_rate": 0.002, "loss": 2.6316, "step": 6720 }, { "epoch": 0.013407656508988907, "grad_norm": 0.2164037674665451, "learning_rate": 0.002, "loss": 2.636, "step": 6730 }, { "epoch": 0.01342757873262782, "grad_norm": 0.2042967975139618, "learning_rate": 0.002, "loss": 2.6312, "step": 6740 }, { "epoch": 0.013447500956266735, "grad_norm": 0.1951330006122589, "learning_rate": 0.002, "loss": 2.6303, "step": 6750 }, { "epoch": 0.013467423179905649, "grad_norm": 0.25764259696006775, "learning_rate": 0.002, "loss": 2.6413, "step": 6760 }, { "epoch": 0.013487345403544562, "grad_norm": 0.19745519757270813, "learning_rate": 0.002, "loss": 2.6201, "step": 6770 }, { "epoch": 0.013507267627183475, "grad_norm": 0.21118402481079102, "learning_rate": 0.002, "loss": 2.6436, "step": 6780 }, { "epoch": 0.01352718985082239, "grad_norm": 0.17575572431087494, "learning_rate": 0.002, "loss": 2.6495, "step": 6790 }, { "epoch": 0.013547112074461303, "grad_norm": 0.22562982141971588, "learning_rate": 0.002, "loss": 2.6371, "step": 6800 }, { "epoch": 0.013567034298100217, "grad_norm": 0.2030625343322754, "learning_rate": 0.002, "loss": 2.6447, "step": 6810 }, { "epoch": 0.01358695652173913, "grad_norm": 0.19910331070423126, "learning_rate": 0.002, "loss": 2.6408, "step": 6820 }, { "epoch": 0.013606878745378043, "grad_norm": 0.21103836596012115, "learning_rate": 0.002, "loss": 2.6314, "step": 6830 }, { "epoch": 0.013626800969016958, "grad_norm": 0.20587223768234253, "learning_rate": 0.002, "loss": 2.6469, "step": 6840 }, { "epoch": 0.013646723192655872, "grad_norm": 0.2176707535982132, "learning_rate": 0.002, "loss": 2.6316, "step": 6850 }, { "epoch": 0.013666645416294785, "grad_norm": 0.19651858508586884, "learning_rate": 0.002, "loss": 2.6238, "step": 6860 }, { "epoch": 0.013686567639933698, "grad_norm": 0.2879965901374817, "learning_rate": 0.002, "loss": 2.6301, "step": 6870 }, { "epoch": 0.013706489863572613, "grad_norm": 0.20563893020153046, "learning_rate": 0.002, "loss": 2.6381, "step": 6880 }, { "epoch": 0.013726412087211527, "grad_norm": 0.20421193540096283, "learning_rate": 0.002, "loss": 2.6452, "step": 6890 }, { "epoch": 0.01374633431085044, "grad_norm": 0.23565742373466492, "learning_rate": 0.002, "loss": 2.6442, "step": 6900 }, { "epoch": 0.013766256534489353, "grad_norm": 0.24246038496494293, "learning_rate": 0.002, "loss": 2.6339, "step": 6910 }, { "epoch": 0.013786178758128266, "grad_norm": 0.16877904534339905, "learning_rate": 0.002, "loss": 2.6364, "step": 6920 }, { "epoch": 0.013806100981767181, "grad_norm": 0.22356858849525452, "learning_rate": 0.002, "loss": 2.6224, "step": 6930 }, { "epoch": 0.013826023205406095, "grad_norm": 0.2002035230398178, "learning_rate": 0.002, "loss": 2.649, "step": 6940 }, { "epoch": 0.013845945429045008, "grad_norm": 0.2189420759677887, "learning_rate": 0.002, "loss": 2.6258, "step": 6950 }, { "epoch": 0.013865867652683921, "grad_norm": 0.19798795878887177, "learning_rate": 0.002, "loss": 2.6294, "step": 6960 }, { "epoch": 0.013885789876322836, "grad_norm": 0.228176087141037, "learning_rate": 0.002, "loss": 2.6413, "step": 6970 }, { "epoch": 0.01390571209996175, "grad_norm": 0.17356382310390472, "learning_rate": 0.002, "loss": 2.6321, "step": 6980 }, { "epoch": 0.013925634323600663, "grad_norm": 0.26661837100982666, "learning_rate": 0.002, "loss": 2.6307, "step": 6990 }, { "epoch": 0.013945556547239576, "grad_norm": 0.24382898211479187, "learning_rate": 0.002, "loss": 2.6314, "step": 7000 }, { "epoch": 0.013965478770878491, "grad_norm": 0.1888446807861328, "learning_rate": 0.002, "loss": 2.6269, "step": 7010 }, { "epoch": 0.013985400994517404, "grad_norm": 0.20862562954425812, "learning_rate": 0.002, "loss": 2.6334, "step": 7020 }, { "epoch": 0.014005323218156318, "grad_norm": 0.21248072385787964, "learning_rate": 0.002, "loss": 2.6303, "step": 7030 }, { "epoch": 0.014025245441795231, "grad_norm": 0.17312243580818176, "learning_rate": 0.002, "loss": 2.6421, "step": 7040 }, { "epoch": 0.014045167665434144, "grad_norm": 0.2823936641216278, "learning_rate": 0.002, "loss": 2.6412, "step": 7050 }, { "epoch": 0.01406508988907306, "grad_norm": 0.17755654454231262, "learning_rate": 0.002, "loss": 2.6326, "step": 7060 }, { "epoch": 0.014085012112711973, "grad_norm": 0.22349777817726135, "learning_rate": 0.002, "loss": 2.6415, "step": 7070 }, { "epoch": 0.014104934336350886, "grad_norm": 0.2064586728811264, "learning_rate": 0.002, "loss": 2.6258, "step": 7080 }, { "epoch": 0.0141248565599898, "grad_norm": 0.18739019334316254, "learning_rate": 0.002, "loss": 2.6303, "step": 7090 }, { "epoch": 0.014144778783628714, "grad_norm": 0.22709085047245026, "learning_rate": 0.002, "loss": 2.6449, "step": 7100 }, { "epoch": 0.014164701007267627, "grad_norm": 0.2201376110315323, "learning_rate": 0.002, "loss": 2.6332, "step": 7110 }, { "epoch": 0.01418462323090654, "grad_norm": 0.20301729440689087, "learning_rate": 0.002, "loss": 2.6461, "step": 7120 }, { "epoch": 0.014204545454545454, "grad_norm": 0.23440340161323547, "learning_rate": 0.002, "loss": 2.6356, "step": 7130 }, { "epoch": 0.014224467678184369, "grad_norm": 0.24827584624290466, "learning_rate": 0.002, "loss": 2.6347, "step": 7140 }, { "epoch": 0.014244389901823282, "grad_norm": 0.204240620136261, "learning_rate": 0.002, "loss": 2.6435, "step": 7150 }, { "epoch": 0.014264312125462196, "grad_norm": 0.1994403898715973, "learning_rate": 0.002, "loss": 2.6308, "step": 7160 }, { "epoch": 0.014284234349101109, "grad_norm": 0.25609612464904785, "learning_rate": 0.002, "loss": 2.6239, "step": 7170 }, { "epoch": 0.014304156572740022, "grad_norm": 0.224915012717247, "learning_rate": 0.002, "loss": 2.6124, "step": 7180 }, { "epoch": 0.014324078796378937, "grad_norm": 0.2361534833908081, "learning_rate": 0.002, "loss": 2.6457, "step": 7190 }, { "epoch": 0.01434400102001785, "grad_norm": 0.21256154775619507, "learning_rate": 0.002, "loss": 2.6155, "step": 7200 }, { "epoch": 0.014363923243656764, "grad_norm": 0.1719050109386444, "learning_rate": 0.002, "loss": 2.601, "step": 7210 }, { "epoch": 0.014383845467295677, "grad_norm": 0.30747607350349426, "learning_rate": 0.002, "loss": 2.6239, "step": 7220 }, { "epoch": 0.014403767690934592, "grad_norm": 0.17914436757564545, "learning_rate": 0.002, "loss": 2.6097, "step": 7230 }, { "epoch": 0.014423689914573505, "grad_norm": 0.21543174982070923, "learning_rate": 0.002, "loss": 2.6301, "step": 7240 }, { "epoch": 0.014443612138212419, "grad_norm": 0.23945750296115875, "learning_rate": 0.002, "loss": 2.6115, "step": 7250 }, { "epoch": 0.014463534361851332, "grad_norm": 0.19698640704154968, "learning_rate": 0.002, "loss": 2.622, "step": 7260 }, { "epoch": 0.014483456585490245, "grad_norm": 0.23643916845321655, "learning_rate": 0.002, "loss": 2.6239, "step": 7270 }, { "epoch": 0.01450337880912916, "grad_norm": 0.1940932422876358, "learning_rate": 0.002, "loss": 2.635, "step": 7280 }, { "epoch": 0.014523301032768074, "grad_norm": 0.2381376475095749, "learning_rate": 0.002, "loss": 2.6402, "step": 7290 }, { "epoch": 0.014543223256406987, "grad_norm": 0.21240642666816711, "learning_rate": 0.002, "loss": 2.617, "step": 7300 }, { "epoch": 0.0145631454800459, "grad_norm": 0.1762395203113556, "learning_rate": 0.002, "loss": 2.6406, "step": 7310 }, { "epoch": 0.014583067703684815, "grad_norm": 0.17772123217582703, "learning_rate": 0.002, "loss": 2.6303, "step": 7320 }, { "epoch": 0.014602989927323728, "grad_norm": 0.23144158720970154, "learning_rate": 0.002, "loss": 2.6124, "step": 7330 }, { "epoch": 0.014622912150962642, "grad_norm": 0.2095419466495514, "learning_rate": 0.002, "loss": 2.6228, "step": 7340 }, { "epoch": 0.014642834374601555, "grad_norm": 0.2120116502046585, "learning_rate": 0.002, "loss": 2.6208, "step": 7350 }, { "epoch": 0.01466275659824047, "grad_norm": 0.19468773901462555, "learning_rate": 0.002, "loss": 2.6342, "step": 7360 }, { "epoch": 0.014682678821879383, "grad_norm": 0.1765110045671463, "learning_rate": 0.002, "loss": 2.6172, "step": 7370 }, { "epoch": 0.014702601045518297, "grad_norm": 0.20155897736549377, "learning_rate": 0.002, "loss": 2.6224, "step": 7380 }, { "epoch": 0.01472252326915721, "grad_norm": 0.21776604652404785, "learning_rate": 0.002, "loss": 2.6395, "step": 7390 }, { "epoch": 0.014742445492796123, "grad_norm": 0.197760671377182, "learning_rate": 0.002, "loss": 2.6351, "step": 7400 }, { "epoch": 0.014762367716435038, "grad_norm": 0.2081114649772644, "learning_rate": 0.002, "loss": 2.6202, "step": 7410 }, { "epoch": 0.014782289940073952, "grad_norm": 0.23737777769565582, "learning_rate": 0.002, "loss": 2.6327, "step": 7420 }, { "epoch": 0.014802212163712865, "grad_norm": 0.19918616116046906, "learning_rate": 0.002, "loss": 2.6208, "step": 7430 }, { "epoch": 0.014822134387351778, "grad_norm": 0.20668719708919525, "learning_rate": 0.002, "loss": 2.619, "step": 7440 }, { "epoch": 0.014842056610990693, "grad_norm": 0.18955446779727936, "learning_rate": 0.002, "loss": 2.6331, "step": 7450 }, { "epoch": 0.014861978834629606, "grad_norm": 0.18515463173389435, "learning_rate": 0.002, "loss": 2.6371, "step": 7460 }, { "epoch": 0.01488190105826852, "grad_norm": 0.20459194481372833, "learning_rate": 0.002, "loss": 2.6373, "step": 7470 }, { "epoch": 0.014901823281907433, "grad_norm": 0.19902509450912476, "learning_rate": 0.002, "loss": 2.6313, "step": 7480 }, { "epoch": 0.014921745505546346, "grad_norm": 0.21148434281349182, "learning_rate": 0.002, "loss": 2.6254, "step": 7490 }, { "epoch": 0.014941667729185261, "grad_norm": 0.18466147780418396, "learning_rate": 0.002, "loss": 2.6129, "step": 7500 }, { "epoch": 0.014961589952824175, "grad_norm": 0.19415001571178436, "learning_rate": 0.002, "loss": 2.6276, "step": 7510 }, { "epoch": 0.014981512176463088, "grad_norm": 0.22488979995250702, "learning_rate": 0.002, "loss": 2.6253, "step": 7520 }, { "epoch": 0.015001434400102001, "grad_norm": 0.19628497958183289, "learning_rate": 0.002, "loss": 2.6158, "step": 7530 }, { "epoch": 0.015021356623740916, "grad_norm": 0.21710678935050964, "learning_rate": 0.002, "loss": 2.628, "step": 7540 }, { "epoch": 0.01504127884737983, "grad_norm": 0.18917298316955566, "learning_rate": 0.002, "loss": 2.6165, "step": 7550 }, { "epoch": 0.015061201071018743, "grad_norm": 0.271056205034256, "learning_rate": 0.002, "loss": 2.6201, "step": 7560 }, { "epoch": 0.015081123294657656, "grad_norm": 0.1681516319513321, "learning_rate": 0.002, "loss": 2.6297, "step": 7570 }, { "epoch": 0.015101045518296571, "grad_norm": 0.1813138872385025, "learning_rate": 0.002, "loss": 2.6368, "step": 7580 }, { "epoch": 0.015120967741935484, "grad_norm": 0.19040675461292267, "learning_rate": 0.002, "loss": 2.622, "step": 7590 }, { "epoch": 0.015140889965574398, "grad_norm": 0.20127856731414795, "learning_rate": 0.002, "loss": 2.6252, "step": 7600 }, { "epoch": 0.015160812189213311, "grad_norm": 0.21762199699878693, "learning_rate": 0.002, "loss": 2.6227, "step": 7610 }, { "epoch": 0.015180734412852224, "grad_norm": 0.2260235697031021, "learning_rate": 0.002, "loss": 2.6443, "step": 7620 }, { "epoch": 0.01520065663649114, "grad_norm": 0.23314708471298218, "learning_rate": 0.002, "loss": 2.6313, "step": 7630 }, { "epoch": 0.015220578860130052, "grad_norm": 0.22361336648464203, "learning_rate": 0.002, "loss": 2.6418, "step": 7640 }, { "epoch": 0.015240501083768966, "grad_norm": 0.18788130581378937, "learning_rate": 0.002, "loss": 2.6137, "step": 7650 }, { "epoch": 0.015260423307407879, "grad_norm": 0.20453357696533203, "learning_rate": 0.002, "loss": 2.6358, "step": 7660 }, { "epoch": 0.015280345531046794, "grad_norm": 0.21934033930301666, "learning_rate": 0.002, "loss": 2.6112, "step": 7670 }, { "epoch": 0.015300267754685707, "grad_norm": 0.22164125740528107, "learning_rate": 0.002, "loss": 2.632, "step": 7680 }, { "epoch": 0.01532018997832462, "grad_norm": 0.243486225605011, "learning_rate": 0.002, "loss": 2.6204, "step": 7690 }, { "epoch": 0.015340112201963534, "grad_norm": 0.20412059128284454, "learning_rate": 0.002, "loss": 2.6304, "step": 7700 }, { "epoch": 0.015360034425602447, "grad_norm": 0.23793528974056244, "learning_rate": 0.002, "loss": 2.6226, "step": 7710 }, { "epoch": 0.015379956649241362, "grad_norm": 0.20716550946235657, "learning_rate": 0.002, "loss": 2.6376, "step": 7720 }, { "epoch": 0.015399878872880276, "grad_norm": 0.30941158533096313, "learning_rate": 0.002, "loss": 2.6358, "step": 7730 }, { "epoch": 0.015419801096519189, "grad_norm": 0.19951249659061432, "learning_rate": 0.002, "loss": 2.6288, "step": 7740 }, { "epoch": 0.015439723320158102, "grad_norm": 0.21383458375930786, "learning_rate": 0.002, "loss": 2.6317, "step": 7750 }, { "epoch": 0.015459645543797017, "grad_norm": 0.20977994799613953, "learning_rate": 0.002, "loss": 2.6317, "step": 7760 }, { "epoch": 0.01547956776743593, "grad_norm": 0.22788937389850616, "learning_rate": 0.002, "loss": 2.6525, "step": 7770 }, { "epoch": 0.015499489991074844, "grad_norm": 0.2589676082134247, "learning_rate": 0.002, "loss": 2.6179, "step": 7780 }, { "epoch": 0.015519412214713757, "grad_norm": 0.18173816800117493, "learning_rate": 0.002, "loss": 2.6181, "step": 7790 }, { "epoch": 0.015539334438352672, "grad_norm": 0.22317740321159363, "learning_rate": 0.002, "loss": 2.6127, "step": 7800 }, { "epoch": 0.015559256661991585, "grad_norm": 0.1706157773733139, "learning_rate": 0.002, "loss": 2.6302, "step": 7810 }, { "epoch": 0.015579178885630499, "grad_norm": 0.24326510727405548, "learning_rate": 0.002, "loss": 2.6183, "step": 7820 }, { "epoch": 0.015599101109269412, "grad_norm": 0.19466236233711243, "learning_rate": 0.002, "loss": 2.6394, "step": 7830 }, { "epoch": 0.015619023332908325, "grad_norm": 0.22515608370304108, "learning_rate": 0.002, "loss": 2.6238, "step": 7840 }, { "epoch": 0.01563894555654724, "grad_norm": 0.17300431430339813, "learning_rate": 0.002, "loss": 2.6346, "step": 7850 }, { "epoch": 0.01565886778018615, "grad_norm": 0.17736375331878662, "learning_rate": 0.002, "loss": 2.6328, "step": 7860 }, { "epoch": 0.015678790003825067, "grad_norm": 0.2322779893875122, "learning_rate": 0.002, "loss": 2.6281, "step": 7870 }, { "epoch": 0.015698712227463982, "grad_norm": 0.24052216112613678, "learning_rate": 0.002, "loss": 2.619, "step": 7880 }, { "epoch": 0.015718634451102893, "grad_norm": 0.1773671656847, "learning_rate": 0.002, "loss": 2.6129, "step": 7890 }, { "epoch": 0.01573855667474181, "grad_norm": 0.2085932046175003, "learning_rate": 0.002, "loss": 2.6225, "step": 7900 }, { "epoch": 0.015758478898380723, "grad_norm": 0.21418945491313934, "learning_rate": 0.002, "loss": 2.6066, "step": 7910 }, { "epoch": 0.015778401122019635, "grad_norm": 0.23653586208820343, "learning_rate": 0.002, "loss": 2.6158, "step": 7920 }, { "epoch": 0.01579832334565855, "grad_norm": 0.21534159779548645, "learning_rate": 0.002, "loss": 2.6201, "step": 7930 }, { "epoch": 0.01581824556929746, "grad_norm": 0.1714116781949997, "learning_rate": 0.002, "loss": 2.6368, "step": 7940 }, { "epoch": 0.015838167792936376, "grad_norm": 0.2017006278038025, "learning_rate": 0.002, "loss": 2.6194, "step": 7950 }, { "epoch": 0.01585809001657529, "grad_norm": 0.5316706895828247, "learning_rate": 0.002, "loss": 2.6309, "step": 7960 }, { "epoch": 0.015878012240214203, "grad_norm": 0.20379069447517395, "learning_rate": 0.002, "loss": 2.6257, "step": 7970 }, { "epoch": 0.015897934463853118, "grad_norm": 0.16030801832675934, "learning_rate": 0.002, "loss": 2.6085, "step": 7980 }, { "epoch": 0.01591785668749203, "grad_norm": 0.19772140681743622, "learning_rate": 0.002, "loss": 2.6213, "step": 7990 }, { "epoch": 0.015937778911130945, "grad_norm": 0.18883393704891205, "learning_rate": 0.002, "loss": 2.6181, "step": 8000 }, { "epoch": 0.01595770113476986, "grad_norm": 0.19449083507061005, "learning_rate": 0.002, "loss": 2.6137, "step": 8010 }, { "epoch": 0.01597762335840877, "grad_norm": 0.17725993692874908, "learning_rate": 0.002, "loss": 2.6225, "step": 8020 }, { "epoch": 0.015997545582047686, "grad_norm": 0.20772479474544525, "learning_rate": 0.002, "loss": 2.6309, "step": 8030 }, { "epoch": 0.016017467805686598, "grad_norm": 0.21998099982738495, "learning_rate": 0.002, "loss": 2.6137, "step": 8040 }, { "epoch": 0.016037390029325513, "grad_norm": 0.2017204910516739, "learning_rate": 0.002, "loss": 2.636, "step": 8050 }, { "epoch": 0.016057312252964428, "grad_norm": 0.2018091231584549, "learning_rate": 0.002, "loss": 2.6258, "step": 8060 }, { "epoch": 0.01607723447660334, "grad_norm": 0.20748884975910187, "learning_rate": 0.002, "loss": 2.6246, "step": 8070 }, { "epoch": 0.016097156700242254, "grad_norm": 0.19103269279003143, "learning_rate": 0.002, "loss": 2.6336, "step": 8080 }, { "epoch": 0.01611707892388117, "grad_norm": 0.17840303480625153, "learning_rate": 0.002, "loss": 2.6195, "step": 8090 }, { "epoch": 0.01613700114752008, "grad_norm": 0.22755716741085052, "learning_rate": 0.002, "loss": 2.6335, "step": 8100 }, { "epoch": 0.016156923371158996, "grad_norm": 0.1826552003622055, "learning_rate": 0.002, "loss": 2.6278, "step": 8110 }, { "epoch": 0.016176845594797908, "grad_norm": 0.1930696815252304, "learning_rate": 0.002, "loss": 2.6229, "step": 8120 }, { "epoch": 0.016196767818436823, "grad_norm": 0.18572747707366943, "learning_rate": 0.002, "loss": 2.6268, "step": 8130 }, { "epoch": 0.016216690042075738, "grad_norm": 0.19687660038471222, "learning_rate": 0.002, "loss": 2.6345, "step": 8140 }, { "epoch": 0.01623661226571465, "grad_norm": 0.1636424958705902, "learning_rate": 0.002, "loss": 2.631, "step": 8150 }, { "epoch": 0.016256534489353564, "grad_norm": 0.20958632230758667, "learning_rate": 0.002, "loss": 2.6089, "step": 8160 }, { "epoch": 0.016276456712992476, "grad_norm": 0.17690211534500122, "learning_rate": 0.002, "loss": 2.6065, "step": 8170 }, { "epoch": 0.01629637893663139, "grad_norm": 0.1945447325706482, "learning_rate": 0.002, "loss": 2.6172, "step": 8180 }, { "epoch": 0.016316301160270306, "grad_norm": 0.16679024696350098, "learning_rate": 0.002, "loss": 2.6243, "step": 8190 }, { "epoch": 0.016336223383909217, "grad_norm": 0.2409147024154663, "learning_rate": 0.002, "loss": 2.6361, "step": 8200 }, { "epoch": 0.016356145607548132, "grad_norm": 0.20243161916732788, "learning_rate": 0.002, "loss": 2.6148, "step": 8210 }, { "epoch": 0.016376067831187047, "grad_norm": 0.16285614669322968, "learning_rate": 0.002, "loss": 2.6179, "step": 8220 }, { "epoch": 0.01639599005482596, "grad_norm": 0.16587543487548828, "learning_rate": 0.002, "loss": 2.6261, "step": 8230 }, { "epoch": 0.016415912278464874, "grad_norm": 0.2074224054813385, "learning_rate": 0.002, "loss": 2.6289, "step": 8240 }, { "epoch": 0.016435834502103785, "grad_norm": 0.17113614082336426, "learning_rate": 0.002, "loss": 2.6197, "step": 8250 }, { "epoch": 0.0164557567257427, "grad_norm": 0.2455836683511734, "learning_rate": 0.002, "loss": 2.6197, "step": 8260 }, { "epoch": 0.016475678949381616, "grad_norm": 0.1803913563489914, "learning_rate": 0.002, "loss": 2.6159, "step": 8270 }, { "epoch": 0.016495601173020527, "grad_norm": 0.20337826013565063, "learning_rate": 0.002, "loss": 2.6359, "step": 8280 }, { "epoch": 0.016515523396659442, "grad_norm": 0.22313682734966278, "learning_rate": 0.002, "loss": 2.6237, "step": 8290 }, { "epoch": 0.016535445620298354, "grad_norm": 0.18791629374027252, "learning_rate": 0.002, "loss": 2.6207, "step": 8300 }, { "epoch": 0.01655536784393727, "grad_norm": 0.1847572773694992, "learning_rate": 0.002, "loss": 2.6308, "step": 8310 }, { "epoch": 0.016575290067576184, "grad_norm": 0.19051875174045563, "learning_rate": 0.002, "loss": 2.6186, "step": 8320 }, { "epoch": 0.016595212291215095, "grad_norm": 0.20058950781822205, "learning_rate": 0.002, "loss": 2.6259, "step": 8330 }, { "epoch": 0.01661513451485401, "grad_norm": 0.16262570023536682, "learning_rate": 0.002, "loss": 2.6161, "step": 8340 }, { "epoch": 0.016635056738492925, "grad_norm": 0.2185358703136444, "learning_rate": 0.002, "loss": 2.6299, "step": 8350 }, { "epoch": 0.016654978962131837, "grad_norm": 0.22736473381519318, "learning_rate": 0.002, "loss": 2.614, "step": 8360 }, { "epoch": 0.016674901185770752, "grad_norm": 0.1761915683746338, "learning_rate": 0.002, "loss": 2.6419, "step": 8370 }, { "epoch": 0.016694823409409663, "grad_norm": 0.21741671860218048, "learning_rate": 0.002, "loss": 2.6404, "step": 8380 }, { "epoch": 0.01671474563304858, "grad_norm": 0.21235212683677673, "learning_rate": 0.002, "loss": 2.6298, "step": 8390 }, { "epoch": 0.016734667856687493, "grad_norm": 0.18045401573181152, "learning_rate": 0.002, "loss": 2.613, "step": 8400 }, { "epoch": 0.016754590080326405, "grad_norm": 0.18937979638576508, "learning_rate": 0.002, "loss": 2.6118, "step": 8410 }, { "epoch": 0.01677451230396532, "grad_norm": 0.20015771687030792, "learning_rate": 0.002, "loss": 2.6212, "step": 8420 }, { "epoch": 0.01679443452760423, "grad_norm": 0.18350081145763397, "learning_rate": 0.002, "loss": 2.6237, "step": 8430 }, { "epoch": 0.016814356751243147, "grad_norm": 0.32113710045814514, "learning_rate": 0.002, "loss": 2.634, "step": 8440 }, { "epoch": 0.01683427897488206, "grad_norm": 0.17454063892364502, "learning_rate": 0.002, "loss": 2.6077, "step": 8450 }, { "epoch": 0.016854201198520973, "grad_norm": 0.2955682575702667, "learning_rate": 0.002, "loss": 2.6224, "step": 8460 }, { "epoch": 0.016874123422159888, "grad_norm": 0.20464195311069489, "learning_rate": 0.002, "loss": 2.6103, "step": 8470 }, { "epoch": 0.0168940456457988, "grad_norm": 0.20598678290843964, "learning_rate": 0.002, "loss": 2.6188, "step": 8480 }, { "epoch": 0.016913967869437715, "grad_norm": 0.19879212975502014, "learning_rate": 0.002, "loss": 2.6355, "step": 8490 }, { "epoch": 0.01693389009307663, "grad_norm": 0.1698692888021469, "learning_rate": 0.002, "loss": 2.616, "step": 8500 }, { "epoch": 0.01695381231671554, "grad_norm": 0.1937219202518463, "learning_rate": 0.002, "loss": 2.6136, "step": 8510 }, { "epoch": 0.016973734540354456, "grad_norm": 0.18803144991397858, "learning_rate": 0.002, "loss": 2.6211, "step": 8520 }, { "epoch": 0.01699365676399337, "grad_norm": 0.17571832239627838, "learning_rate": 0.002, "loss": 2.6135, "step": 8530 }, { "epoch": 0.017013578987632283, "grad_norm": 0.2141956090927124, "learning_rate": 0.002, "loss": 2.6116, "step": 8540 }, { "epoch": 0.017033501211271198, "grad_norm": 0.17624591290950775, "learning_rate": 0.002, "loss": 2.6214, "step": 8550 }, { "epoch": 0.01705342343491011, "grad_norm": 0.15955126285552979, "learning_rate": 0.002, "loss": 2.614, "step": 8560 }, { "epoch": 0.017073345658549025, "grad_norm": 0.23790714144706726, "learning_rate": 0.002, "loss": 2.5995, "step": 8570 }, { "epoch": 0.01709326788218794, "grad_norm": 0.1944655328989029, "learning_rate": 0.002, "loss": 2.6168, "step": 8580 }, { "epoch": 0.01711319010582685, "grad_norm": 0.2030920684337616, "learning_rate": 0.002, "loss": 2.6236, "step": 8590 }, { "epoch": 0.017133112329465766, "grad_norm": 0.19406576454639435, "learning_rate": 0.002, "loss": 2.6165, "step": 8600 }, { "epoch": 0.017153034553104678, "grad_norm": 0.18993134796619415, "learning_rate": 0.002, "loss": 2.6276, "step": 8610 }, { "epoch": 0.017172956776743593, "grad_norm": 0.1887965351343155, "learning_rate": 0.002, "loss": 2.6036, "step": 8620 }, { "epoch": 0.017192879000382508, "grad_norm": 0.1991686224937439, "learning_rate": 0.002, "loss": 2.6111, "step": 8630 }, { "epoch": 0.01721280122402142, "grad_norm": 0.1588328629732132, "learning_rate": 0.002, "loss": 2.6034, "step": 8640 }, { "epoch": 0.017232723447660334, "grad_norm": 0.18346349895000458, "learning_rate": 0.002, "loss": 2.6124, "step": 8650 }, { "epoch": 0.01725264567129925, "grad_norm": 0.16575075685977936, "learning_rate": 0.002, "loss": 2.6199, "step": 8660 }, { "epoch": 0.01727256789493816, "grad_norm": 0.19264602661132812, "learning_rate": 0.002, "loss": 2.6219, "step": 8670 }, { "epoch": 0.017292490118577076, "grad_norm": 0.16380974650382996, "learning_rate": 0.002, "loss": 2.6137, "step": 8680 }, { "epoch": 0.017312412342215987, "grad_norm": 0.1812136173248291, "learning_rate": 0.002, "loss": 2.6013, "step": 8690 }, { "epoch": 0.017332334565854902, "grad_norm": 0.1655120700597763, "learning_rate": 0.002, "loss": 2.6282, "step": 8700 }, { "epoch": 0.017352256789493817, "grad_norm": 0.1859893649816513, "learning_rate": 0.002, "loss": 2.6158, "step": 8710 }, { "epoch": 0.01737217901313273, "grad_norm": 0.25077536702156067, "learning_rate": 0.002, "loss": 2.6038, "step": 8720 }, { "epoch": 0.017392101236771644, "grad_norm": 0.19442617893218994, "learning_rate": 0.002, "loss": 2.6411, "step": 8730 }, { "epoch": 0.017412023460410556, "grad_norm": 0.2140180766582489, "learning_rate": 0.002, "loss": 2.6043, "step": 8740 }, { "epoch": 0.01743194568404947, "grad_norm": 0.18202249705791473, "learning_rate": 0.002, "loss": 2.6109, "step": 8750 }, { "epoch": 0.017451867907688386, "grad_norm": 0.19334687292575836, "learning_rate": 0.002, "loss": 2.6149, "step": 8760 }, { "epoch": 0.017471790131327297, "grad_norm": 0.1748533993959427, "learning_rate": 0.002, "loss": 2.6346, "step": 8770 }, { "epoch": 0.017491712354966212, "grad_norm": 0.2203064113855362, "learning_rate": 0.002, "loss": 2.6251, "step": 8780 }, { "epoch": 0.017511634578605127, "grad_norm": 0.19065988063812256, "learning_rate": 0.002, "loss": 2.6231, "step": 8790 }, { "epoch": 0.01753155680224404, "grad_norm": 0.22482676804065704, "learning_rate": 0.002, "loss": 2.6207, "step": 8800 }, { "epoch": 0.017551479025882954, "grad_norm": 0.22218763828277588, "learning_rate": 0.002, "loss": 2.6208, "step": 8810 }, { "epoch": 0.017571401249521865, "grad_norm": 0.18003524839878082, "learning_rate": 0.002, "loss": 2.6091, "step": 8820 }, { "epoch": 0.01759132347316078, "grad_norm": 0.22126111388206482, "learning_rate": 0.002, "loss": 2.6129, "step": 8830 }, { "epoch": 0.017611245696799695, "grad_norm": 0.23315219581127167, "learning_rate": 0.002, "loss": 2.6164, "step": 8840 }, { "epoch": 0.017631167920438607, "grad_norm": 0.2319544404745102, "learning_rate": 0.002, "loss": 2.6274, "step": 8850 }, { "epoch": 0.017651090144077522, "grad_norm": 0.16445223987102509, "learning_rate": 0.002, "loss": 2.6107, "step": 8860 }, { "epoch": 0.017671012367716434, "grad_norm": 0.16101600229740143, "learning_rate": 0.002, "loss": 2.6088, "step": 8870 }, { "epoch": 0.01769093459135535, "grad_norm": 0.26777175068855286, "learning_rate": 0.002, "loss": 2.616, "step": 8880 }, { "epoch": 0.017710856814994264, "grad_norm": 0.17660021781921387, "learning_rate": 0.002, "loss": 2.6171, "step": 8890 }, { "epoch": 0.017730779038633175, "grad_norm": 0.17876212298870087, "learning_rate": 0.002, "loss": 2.6066, "step": 8900 }, { "epoch": 0.01775070126227209, "grad_norm": 0.21244195103645325, "learning_rate": 0.002, "loss": 2.6298, "step": 8910 }, { "epoch": 0.017770623485911005, "grad_norm": 0.21999742090702057, "learning_rate": 0.002, "loss": 2.6074, "step": 8920 }, { "epoch": 0.017790545709549917, "grad_norm": 0.1872703582048416, "learning_rate": 0.002, "loss": 2.6096, "step": 8930 }, { "epoch": 0.01781046793318883, "grad_norm": 0.16911981999874115, "learning_rate": 0.002, "loss": 2.6049, "step": 8940 }, { "epoch": 0.017830390156827743, "grad_norm": 0.17684519290924072, "learning_rate": 0.002, "loss": 2.6157, "step": 8950 }, { "epoch": 0.01785031238046666, "grad_norm": 0.20418372750282288, "learning_rate": 0.002, "loss": 2.6142, "step": 8960 }, { "epoch": 0.017870234604105573, "grad_norm": 0.2053007036447525, "learning_rate": 0.002, "loss": 2.612, "step": 8970 }, { "epoch": 0.017890156827744485, "grad_norm": 0.20263604819774628, "learning_rate": 0.002, "loss": 2.6167, "step": 8980 }, { "epoch": 0.0179100790513834, "grad_norm": 0.18487796187400818, "learning_rate": 0.002, "loss": 2.6291, "step": 8990 }, { "epoch": 0.01793000127502231, "grad_norm": 0.2049819678068161, "learning_rate": 0.002, "loss": 2.6216, "step": 9000 }, { "epoch": 0.017949923498661226, "grad_norm": 0.17386750876903534, "learning_rate": 0.002, "loss": 2.6238, "step": 9010 }, { "epoch": 0.01796984572230014, "grad_norm": 0.22398091852664948, "learning_rate": 0.002, "loss": 2.6209, "step": 9020 }, { "epoch": 0.017989767945939053, "grad_norm": 0.17615824937820435, "learning_rate": 0.002, "loss": 2.6137, "step": 9030 }, { "epoch": 0.018009690169577968, "grad_norm": 0.23307599127292633, "learning_rate": 0.002, "loss": 2.611, "step": 9040 }, { "epoch": 0.01802961239321688, "grad_norm": 0.16529418528079987, "learning_rate": 0.002, "loss": 2.6091, "step": 9050 }, { "epoch": 0.018049534616855795, "grad_norm": 0.1936095505952835, "learning_rate": 0.002, "loss": 2.6115, "step": 9060 }, { "epoch": 0.01806945684049471, "grad_norm": 0.20242077112197876, "learning_rate": 0.002, "loss": 2.6296, "step": 9070 }, { "epoch": 0.01808937906413362, "grad_norm": 0.17537419497966766, "learning_rate": 0.002, "loss": 2.6129, "step": 9080 }, { "epoch": 0.018109301287772536, "grad_norm": 0.20675505697727203, "learning_rate": 0.002, "loss": 2.6085, "step": 9090 }, { "epoch": 0.01812922351141145, "grad_norm": 0.2047574520111084, "learning_rate": 0.002, "loss": 2.6171, "step": 9100 }, { "epoch": 0.018149145735050363, "grad_norm": 0.2009614259004593, "learning_rate": 0.002, "loss": 2.5925, "step": 9110 }, { "epoch": 0.018169067958689278, "grad_norm": 0.17228882014751434, "learning_rate": 0.002, "loss": 2.6189, "step": 9120 }, { "epoch": 0.01818899018232819, "grad_norm": 0.19801951944828033, "learning_rate": 0.002, "loss": 2.6192, "step": 9130 }, { "epoch": 0.018208912405967104, "grad_norm": 0.17982779443264008, "learning_rate": 0.002, "loss": 2.6199, "step": 9140 }, { "epoch": 0.01822883462960602, "grad_norm": 0.19412505626678467, "learning_rate": 0.002, "loss": 2.6112, "step": 9150 }, { "epoch": 0.01824875685324493, "grad_norm": 0.19382715225219727, "learning_rate": 0.002, "loss": 2.5996, "step": 9160 }, { "epoch": 0.018268679076883846, "grad_norm": 0.21138308942317963, "learning_rate": 0.002, "loss": 2.6169, "step": 9170 }, { "epoch": 0.018288601300522758, "grad_norm": 0.2091972529888153, "learning_rate": 0.002, "loss": 2.6139, "step": 9180 }, { "epoch": 0.018308523524161673, "grad_norm": 0.16810134053230286, "learning_rate": 0.002, "loss": 2.6248, "step": 9190 }, { "epoch": 0.018328445747800588, "grad_norm": 0.2324506789445877, "learning_rate": 0.002, "loss": 2.6045, "step": 9200 }, { "epoch": 0.0183483679714395, "grad_norm": 0.17355512082576752, "learning_rate": 0.002, "loss": 2.628, "step": 9210 }, { "epoch": 0.018368290195078414, "grad_norm": 0.20283189415931702, "learning_rate": 0.002, "loss": 2.6187, "step": 9220 }, { "epoch": 0.01838821241871733, "grad_norm": 0.2916675806045532, "learning_rate": 0.002, "loss": 2.6188, "step": 9230 }, { "epoch": 0.01840813464235624, "grad_norm": 0.18146595358848572, "learning_rate": 0.002, "loss": 2.6296, "step": 9240 }, { "epoch": 0.018428056865995156, "grad_norm": 0.1933811902999878, "learning_rate": 0.002, "loss": 2.6198, "step": 9250 }, { "epoch": 0.018447979089634067, "grad_norm": 0.18319252133369446, "learning_rate": 0.002, "loss": 2.6142, "step": 9260 }, { "epoch": 0.018467901313272982, "grad_norm": 0.3570939600467682, "learning_rate": 0.002, "loss": 2.601, "step": 9270 }, { "epoch": 0.018487823536911897, "grad_norm": 0.1625196486711502, "learning_rate": 0.002, "loss": 2.6077, "step": 9280 }, { "epoch": 0.01850774576055081, "grad_norm": 0.17595741152763367, "learning_rate": 0.002, "loss": 2.6151, "step": 9290 }, { "epoch": 0.018527667984189724, "grad_norm": 0.21657085418701172, "learning_rate": 0.002, "loss": 2.6295, "step": 9300 }, { "epoch": 0.018547590207828635, "grad_norm": 0.1614822894334793, "learning_rate": 0.002, "loss": 2.6078, "step": 9310 }, { "epoch": 0.01856751243146755, "grad_norm": 0.18274514377117157, "learning_rate": 0.002, "loss": 2.6178, "step": 9320 }, { "epoch": 0.018587434655106466, "grad_norm": 0.22662727534770966, "learning_rate": 0.002, "loss": 2.621, "step": 9330 }, { "epoch": 0.018607356878745377, "grad_norm": 0.18708552420139313, "learning_rate": 0.002, "loss": 2.6201, "step": 9340 }, { "epoch": 0.018627279102384292, "grad_norm": 0.2092534303665161, "learning_rate": 0.002, "loss": 2.6148, "step": 9350 }, { "epoch": 0.018647201326023207, "grad_norm": 0.2605736255645752, "learning_rate": 0.002, "loss": 2.6143, "step": 9360 }, { "epoch": 0.01866712354966212, "grad_norm": 0.16923752427101135, "learning_rate": 0.002, "loss": 2.6085, "step": 9370 }, { "epoch": 0.018687045773301034, "grad_norm": 0.22345584630966187, "learning_rate": 0.002, "loss": 2.6289, "step": 9380 }, { "epoch": 0.018706967996939945, "grad_norm": 0.21248167753219604, "learning_rate": 0.002, "loss": 2.6173, "step": 9390 }, { "epoch": 0.01872689022057886, "grad_norm": 0.19577397406101227, "learning_rate": 0.002, "loss": 2.6165, "step": 9400 }, { "epoch": 0.018746812444217775, "grad_norm": 0.19283853471279144, "learning_rate": 0.002, "loss": 2.5976, "step": 9410 }, { "epoch": 0.018766734667856687, "grad_norm": 0.1893066018819809, "learning_rate": 0.002, "loss": 2.6202, "step": 9420 }, { "epoch": 0.018786656891495602, "grad_norm": 0.2191261500120163, "learning_rate": 0.002, "loss": 2.6087, "step": 9430 }, { "epoch": 0.018806579115134513, "grad_norm": 0.22219529747962952, "learning_rate": 0.002, "loss": 2.6212, "step": 9440 }, { "epoch": 0.01882650133877343, "grad_norm": 0.16821110248565674, "learning_rate": 0.002, "loss": 2.6185, "step": 9450 }, { "epoch": 0.018846423562412343, "grad_norm": 0.21353019773960114, "learning_rate": 0.002, "loss": 2.6195, "step": 9460 }, { "epoch": 0.018866345786051255, "grad_norm": 0.17474517226219177, "learning_rate": 0.002, "loss": 2.6212, "step": 9470 }, { "epoch": 0.01888626800969017, "grad_norm": 0.24098901450634003, "learning_rate": 0.002, "loss": 2.6068, "step": 9480 }, { "epoch": 0.01890619023332908, "grad_norm": 0.17342017590999603, "learning_rate": 0.002, "loss": 2.6105, "step": 9490 }, { "epoch": 0.018926112456967997, "grad_norm": 0.1925184726715088, "learning_rate": 0.002, "loss": 2.6189, "step": 9500 }, { "epoch": 0.01894603468060691, "grad_norm": 0.16476404666900635, "learning_rate": 0.002, "loss": 2.6094, "step": 9510 }, { "epoch": 0.018965956904245823, "grad_norm": 0.2294086068868637, "learning_rate": 0.002, "loss": 2.6107, "step": 9520 }, { "epoch": 0.018985879127884738, "grad_norm": 0.15866467356681824, "learning_rate": 0.002, "loss": 2.6153, "step": 9530 }, { "epoch": 0.019005801351523653, "grad_norm": 0.21496699750423431, "learning_rate": 0.002, "loss": 2.6154, "step": 9540 }, { "epoch": 0.019025723575162565, "grad_norm": 0.20280270278453827, "learning_rate": 0.002, "loss": 2.6132, "step": 9550 }, { "epoch": 0.01904564579880148, "grad_norm": 0.175337553024292, "learning_rate": 0.002, "loss": 2.6003, "step": 9560 }, { "epoch": 0.01906556802244039, "grad_norm": 0.1592695116996765, "learning_rate": 0.002, "loss": 2.611, "step": 9570 }, { "epoch": 0.019085490246079306, "grad_norm": 0.21660907566547394, "learning_rate": 0.002, "loss": 2.6103, "step": 9580 }, { "epoch": 0.01910541246971822, "grad_norm": 0.19173564016819, "learning_rate": 0.002, "loss": 2.6307, "step": 9590 }, { "epoch": 0.019125334693357133, "grad_norm": 0.16595308482646942, "learning_rate": 0.002, "loss": 2.5988, "step": 9600 }, { "epoch": 0.019145256916996048, "grad_norm": 0.22009792923927307, "learning_rate": 0.002, "loss": 2.6147, "step": 9610 }, { "epoch": 0.01916517914063496, "grad_norm": 0.18184790015220642, "learning_rate": 0.002, "loss": 2.6265, "step": 9620 }, { "epoch": 0.019185101364273874, "grad_norm": 0.18866269290447235, "learning_rate": 0.002, "loss": 2.6286, "step": 9630 }, { "epoch": 0.01920502358791279, "grad_norm": 0.2004113495349884, "learning_rate": 0.002, "loss": 2.6114, "step": 9640 }, { "epoch": 0.0192249458115517, "grad_norm": 0.15203161537647247, "learning_rate": 0.002, "loss": 2.6085, "step": 9650 }, { "epoch": 0.019244868035190616, "grad_norm": 0.17618197202682495, "learning_rate": 0.002, "loss": 2.6039, "step": 9660 }, { "epoch": 0.01926479025882953, "grad_norm": 0.24612919986248016, "learning_rate": 0.002, "loss": 2.6179, "step": 9670 }, { "epoch": 0.019284712482468443, "grad_norm": 0.1860174536705017, "learning_rate": 0.002, "loss": 2.6106, "step": 9680 }, { "epoch": 0.019304634706107358, "grad_norm": 0.14464916288852692, "learning_rate": 0.002, "loss": 2.6089, "step": 9690 }, { "epoch": 0.01932455692974627, "grad_norm": 0.19666893780231476, "learning_rate": 0.002, "loss": 2.5981, "step": 9700 }, { "epoch": 0.019344479153385184, "grad_norm": 0.22091060876846313, "learning_rate": 0.002, "loss": 2.6198, "step": 9710 }, { "epoch": 0.0193644013770241, "grad_norm": 0.20566034317016602, "learning_rate": 0.002, "loss": 2.605, "step": 9720 }, { "epoch": 0.01938432360066301, "grad_norm": 0.17463912069797516, "learning_rate": 0.002, "loss": 2.618, "step": 9730 }, { "epoch": 0.019404245824301926, "grad_norm": 0.2035626918077469, "learning_rate": 0.002, "loss": 2.6221, "step": 9740 }, { "epoch": 0.019424168047940837, "grad_norm": 0.1725081503391266, "learning_rate": 0.002, "loss": 2.6073, "step": 9750 }, { "epoch": 0.019444090271579752, "grad_norm": 0.19502349197864532, "learning_rate": 0.002, "loss": 2.6207, "step": 9760 }, { "epoch": 0.019464012495218667, "grad_norm": 0.19989925622940063, "learning_rate": 0.002, "loss": 2.6063, "step": 9770 }, { "epoch": 0.01948393471885758, "grad_norm": 0.2107342630624771, "learning_rate": 0.002, "loss": 2.6203, "step": 9780 }, { "epoch": 0.019503856942496494, "grad_norm": 0.16643905639648438, "learning_rate": 0.002, "loss": 2.6244, "step": 9790 }, { "epoch": 0.01952377916613541, "grad_norm": 0.19579999148845673, "learning_rate": 0.002, "loss": 2.6102, "step": 9800 }, { "epoch": 0.01954370138977432, "grad_norm": 0.1829473227262497, "learning_rate": 0.002, "loss": 2.611, "step": 9810 }, { "epoch": 0.019563623613413236, "grad_norm": 0.16105154156684875, "learning_rate": 0.002, "loss": 2.5989, "step": 9820 }, { "epoch": 0.019583545837052147, "grad_norm": 0.1797274649143219, "learning_rate": 0.002, "loss": 2.6009, "step": 9830 }, { "epoch": 0.019603468060691062, "grad_norm": 0.2563207745552063, "learning_rate": 0.002, "loss": 2.61, "step": 9840 }, { "epoch": 0.019623390284329977, "grad_norm": 0.17592205107212067, "learning_rate": 0.002, "loss": 2.6041, "step": 9850 }, { "epoch": 0.01964331250796889, "grad_norm": 0.21983708441257477, "learning_rate": 0.002, "loss": 2.6154, "step": 9860 }, { "epoch": 0.019663234731607804, "grad_norm": 0.1963844746351242, "learning_rate": 0.002, "loss": 2.6059, "step": 9870 }, { "epoch": 0.019683156955246715, "grad_norm": 0.17453619837760925, "learning_rate": 0.002, "loss": 2.6044, "step": 9880 }, { "epoch": 0.01970307917888563, "grad_norm": 0.19484621286392212, "learning_rate": 0.002, "loss": 2.614, "step": 9890 }, { "epoch": 0.019723001402524545, "grad_norm": 0.1959705352783203, "learning_rate": 0.002, "loss": 2.6149, "step": 9900 }, { "epoch": 0.019742923626163457, "grad_norm": 0.1845661997795105, "learning_rate": 0.002, "loss": 2.6066, "step": 9910 }, { "epoch": 0.019762845849802372, "grad_norm": 0.16691778600215912, "learning_rate": 0.002, "loss": 2.6207, "step": 9920 }, { "epoch": 0.019782768073441283, "grad_norm": 0.1967514008283615, "learning_rate": 0.002, "loss": 2.6191, "step": 9930 }, { "epoch": 0.0198026902970802, "grad_norm": 0.1940336525440216, "learning_rate": 0.002, "loss": 2.6007, "step": 9940 }, { "epoch": 0.019822612520719114, "grad_norm": 0.19405384361743927, "learning_rate": 0.002, "loss": 2.6112, "step": 9950 }, { "epoch": 0.019842534744358025, "grad_norm": 0.21770867705345154, "learning_rate": 0.002, "loss": 2.6145, "step": 9960 }, { "epoch": 0.01986245696799694, "grad_norm": 0.17237602174282074, "learning_rate": 0.002, "loss": 2.605, "step": 9970 }, { "epoch": 0.019882379191635855, "grad_norm": 0.21236231923103333, "learning_rate": 0.002, "loss": 2.6237, "step": 9980 }, { "epoch": 0.019902301415274767, "grad_norm": 0.18340973556041718, "learning_rate": 0.002, "loss": 2.6155, "step": 9990 }, { "epoch": 0.01992222363891368, "grad_norm": 0.16798926889896393, "learning_rate": 0.002, "loss": 2.6167, "step": 10000 }, { "epoch": 0.019942145862552593, "grad_norm": 0.19828446209430695, "learning_rate": 0.002, "loss": 2.6141, "step": 10010 }, { "epoch": 0.01996206808619151, "grad_norm": 0.17206020653247833, "learning_rate": 0.002, "loss": 2.6132, "step": 10020 }, { "epoch": 0.019981990309830423, "grad_norm": 0.16998085379600525, "learning_rate": 0.002, "loss": 2.6251, "step": 10030 }, { "epoch": 0.020001912533469335, "grad_norm": 0.1699577420949936, "learning_rate": 0.002, "loss": 2.625, "step": 10040 }, { "epoch": 0.02002183475710825, "grad_norm": 0.17022216320037842, "learning_rate": 0.002, "loss": 2.6099, "step": 10050 }, { "epoch": 0.02004175698074716, "grad_norm": 0.20220999419689178, "learning_rate": 0.002, "loss": 2.6105, "step": 10060 }, { "epoch": 0.020061679204386076, "grad_norm": 0.15182802081108093, "learning_rate": 0.002, "loss": 2.6022, "step": 10070 }, { "epoch": 0.02008160142802499, "grad_norm": 0.2013459950685501, "learning_rate": 0.002, "loss": 2.6197, "step": 10080 }, { "epoch": 0.020101523651663903, "grad_norm": 0.16106155514717102, "learning_rate": 0.002, "loss": 2.6107, "step": 10090 }, { "epoch": 0.020121445875302818, "grad_norm": 0.19329392910003662, "learning_rate": 0.002, "loss": 2.6268, "step": 10100 }, { "epoch": 0.020141368098941733, "grad_norm": 0.15791982412338257, "learning_rate": 0.002, "loss": 2.6012, "step": 10110 }, { "epoch": 0.020161290322580645, "grad_norm": 0.19443224370479584, "learning_rate": 0.002, "loss": 2.6166, "step": 10120 }, { "epoch": 0.02018121254621956, "grad_norm": 0.19214844703674316, "learning_rate": 0.002, "loss": 2.601, "step": 10130 }, { "epoch": 0.02020113476985847, "grad_norm": 0.17756833136081696, "learning_rate": 0.002, "loss": 2.5931, "step": 10140 }, { "epoch": 0.020221056993497386, "grad_norm": 0.17086805403232574, "learning_rate": 0.002, "loss": 2.6168, "step": 10150 }, { "epoch": 0.0202409792171363, "grad_norm": 0.19013233482837677, "learning_rate": 0.002, "loss": 2.6239, "step": 10160 }, { "epoch": 0.020260901440775213, "grad_norm": 0.20604442059993744, "learning_rate": 0.002, "loss": 2.6003, "step": 10170 }, { "epoch": 0.020280823664414128, "grad_norm": 0.1713206022977829, "learning_rate": 0.002, "loss": 2.6037, "step": 10180 }, { "epoch": 0.02030074588805304, "grad_norm": 0.17732979357242584, "learning_rate": 0.002, "loss": 2.5982, "step": 10190 }, { "epoch": 0.020320668111691954, "grad_norm": 0.16155549883842468, "learning_rate": 0.002, "loss": 2.6175, "step": 10200 }, { "epoch": 0.02034059033533087, "grad_norm": 0.15683257579803467, "learning_rate": 0.002, "loss": 2.6354, "step": 10210 }, { "epoch": 0.02036051255896978, "grad_norm": 0.16059483587741852, "learning_rate": 0.002, "loss": 2.6014, "step": 10220 }, { "epoch": 0.020380434782608696, "grad_norm": 0.17963215708732605, "learning_rate": 0.002, "loss": 2.6057, "step": 10230 }, { "epoch": 0.02040035700624761, "grad_norm": 0.20650804042816162, "learning_rate": 0.002, "loss": 2.6055, "step": 10240 }, { "epoch": 0.020420279229886523, "grad_norm": 0.14350053668022156, "learning_rate": 0.002, "loss": 2.6134, "step": 10250 }, { "epoch": 0.020440201453525438, "grad_norm": 0.17791971564292908, "learning_rate": 0.002, "loss": 2.6154, "step": 10260 }, { "epoch": 0.02046012367716435, "grad_norm": 0.22155652940273285, "learning_rate": 0.002, "loss": 2.5904, "step": 10270 }, { "epoch": 0.020480045900803264, "grad_norm": 0.16138575971126556, "learning_rate": 0.002, "loss": 2.6097, "step": 10280 }, { "epoch": 0.02049996812444218, "grad_norm": 0.22347992658615112, "learning_rate": 0.002, "loss": 2.6154, "step": 10290 }, { "epoch": 0.02051989034808109, "grad_norm": 0.16253246366977692, "learning_rate": 0.002, "loss": 2.6025, "step": 10300 }, { "epoch": 0.020539812571720006, "grad_norm": 0.1813543289899826, "learning_rate": 0.002, "loss": 2.6131, "step": 10310 }, { "epoch": 0.020559734795358917, "grad_norm": 0.18421976268291473, "learning_rate": 0.002, "loss": 2.5951, "step": 10320 }, { "epoch": 0.020579657018997832, "grad_norm": 0.2620803713798523, "learning_rate": 0.002, "loss": 2.6179, "step": 10330 }, { "epoch": 0.020599579242636747, "grad_norm": 0.20085814595222473, "learning_rate": 0.002, "loss": 2.6108, "step": 10340 }, { "epoch": 0.02061950146627566, "grad_norm": 0.2092554122209549, "learning_rate": 0.002, "loss": 2.6146, "step": 10350 }, { "epoch": 0.020639423689914574, "grad_norm": 0.19993910193443298, "learning_rate": 0.002, "loss": 2.6259, "step": 10360 }, { "epoch": 0.02065934591355349, "grad_norm": 0.17654964327812195, "learning_rate": 0.002, "loss": 2.599, "step": 10370 }, { "epoch": 0.0206792681371924, "grad_norm": 0.1681276559829712, "learning_rate": 0.002, "loss": 2.5924, "step": 10380 }, { "epoch": 0.020699190360831315, "grad_norm": 0.21244634687900543, "learning_rate": 0.002, "loss": 2.6057, "step": 10390 }, { "epoch": 0.020719112584470227, "grad_norm": 0.23516586422920227, "learning_rate": 0.002, "loss": 2.6047, "step": 10400 }, { "epoch": 0.020739034808109142, "grad_norm": 0.16137972474098206, "learning_rate": 0.002, "loss": 2.6108, "step": 10410 }, { "epoch": 0.020758957031748057, "grad_norm": 0.22871454060077667, "learning_rate": 0.002, "loss": 2.5946, "step": 10420 }, { "epoch": 0.02077887925538697, "grad_norm": 0.2349330484867096, "learning_rate": 0.002, "loss": 2.5852, "step": 10430 }, { "epoch": 0.020798801479025884, "grad_norm": 0.2065851092338562, "learning_rate": 0.002, "loss": 2.6273, "step": 10440 }, { "epoch": 0.020818723702664795, "grad_norm": 0.16612404584884644, "learning_rate": 0.002, "loss": 2.5957, "step": 10450 }, { "epoch": 0.02083864592630371, "grad_norm": 0.21147668361663818, "learning_rate": 0.002, "loss": 2.6049, "step": 10460 }, { "epoch": 0.020858568149942625, "grad_norm": 0.17237743735313416, "learning_rate": 0.002, "loss": 2.6105, "step": 10470 }, { "epoch": 0.020878490373581537, "grad_norm": 0.17566412687301636, "learning_rate": 0.002, "loss": 2.5943, "step": 10480 }, { "epoch": 0.020898412597220452, "grad_norm": 0.1801772266626358, "learning_rate": 0.002, "loss": 2.6261, "step": 10490 }, { "epoch": 0.020918334820859363, "grad_norm": 0.19210268557071686, "learning_rate": 0.002, "loss": 2.5947, "step": 10500 }, { "epoch": 0.02093825704449828, "grad_norm": 0.1710929572582245, "learning_rate": 0.002, "loss": 2.6044, "step": 10510 }, { "epoch": 0.020958179268137193, "grad_norm": 0.18295209109783173, "learning_rate": 0.002, "loss": 2.6041, "step": 10520 }, { "epoch": 0.020978101491776105, "grad_norm": 0.15579932928085327, "learning_rate": 0.002, "loss": 2.602, "step": 10530 }, { "epoch": 0.02099802371541502, "grad_norm": 0.19662635028362274, "learning_rate": 0.002, "loss": 2.613, "step": 10540 }, { "epoch": 0.021017945939053935, "grad_norm": 0.18155600130558014, "learning_rate": 0.002, "loss": 2.6268, "step": 10550 }, { "epoch": 0.021037868162692847, "grad_norm": 0.16538260877132416, "learning_rate": 0.002, "loss": 2.611, "step": 10560 }, { "epoch": 0.02105779038633176, "grad_norm": 0.19594687223434448, "learning_rate": 0.002, "loss": 2.5896, "step": 10570 }, { "epoch": 0.021077712609970673, "grad_norm": 0.1810852438211441, "learning_rate": 0.002, "loss": 2.5853, "step": 10580 }, { "epoch": 0.021097634833609588, "grad_norm": 0.1984042078256607, "learning_rate": 0.002, "loss": 2.6038, "step": 10590 }, { "epoch": 0.021117557057248503, "grad_norm": 0.18390989303588867, "learning_rate": 0.002, "loss": 2.5907, "step": 10600 }, { "epoch": 0.021137479280887415, "grad_norm": 0.18219220638275146, "learning_rate": 0.002, "loss": 2.6021, "step": 10610 }, { "epoch": 0.02115740150452633, "grad_norm": 0.18501722812652588, "learning_rate": 0.002, "loss": 2.5892, "step": 10620 }, { "epoch": 0.02117732372816524, "grad_norm": 0.17138171195983887, "learning_rate": 0.002, "loss": 2.6127, "step": 10630 }, { "epoch": 0.021197245951804156, "grad_norm": 0.1410994976758957, "learning_rate": 0.002, "loss": 2.6088, "step": 10640 }, { "epoch": 0.02121716817544307, "grad_norm": 0.20464621484279633, "learning_rate": 0.002, "loss": 2.6073, "step": 10650 }, { "epoch": 0.021237090399081983, "grad_norm": 0.16384336352348328, "learning_rate": 0.002, "loss": 2.6167, "step": 10660 }, { "epoch": 0.021257012622720898, "grad_norm": 0.19057029485702515, "learning_rate": 0.002, "loss": 2.5974, "step": 10670 }, { "epoch": 0.021276934846359813, "grad_norm": 0.19878466427326202, "learning_rate": 0.002, "loss": 2.6101, "step": 10680 }, { "epoch": 0.021296857069998724, "grad_norm": 0.17407351732254028, "learning_rate": 0.002, "loss": 2.5861, "step": 10690 }, { "epoch": 0.02131677929363764, "grad_norm": 0.17217950522899628, "learning_rate": 0.002, "loss": 2.6062, "step": 10700 }, { "epoch": 0.02133670151727655, "grad_norm": 0.1985625922679901, "learning_rate": 0.002, "loss": 2.6119, "step": 10710 }, { "epoch": 0.021356623740915466, "grad_norm": 0.18633325397968292, "learning_rate": 0.002, "loss": 2.609, "step": 10720 }, { "epoch": 0.02137654596455438, "grad_norm": 0.2101009637117386, "learning_rate": 0.002, "loss": 2.6102, "step": 10730 }, { "epoch": 0.021396468188193293, "grad_norm": 0.16976609826087952, "learning_rate": 0.002, "loss": 2.6258, "step": 10740 }, { "epoch": 0.021416390411832208, "grad_norm": 0.17643913626670837, "learning_rate": 0.002, "loss": 2.6187, "step": 10750 }, { "epoch": 0.02143631263547112, "grad_norm": 0.21358022093772888, "learning_rate": 0.002, "loss": 2.6092, "step": 10760 }, { "epoch": 0.021456234859110034, "grad_norm": 0.16864432394504547, "learning_rate": 0.002, "loss": 2.5998, "step": 10770 }, { "epoch": 0.02147615708274895, "grad_norm": 0.19009338319301605, "learning_rate": 0.002, "loss": 2.6032, "step": 10780 }, { "epoch": 0.02149607930638786, "grad_norm": 0.2031269371509552, "learning_rate": 0.002, "loss": 2.6187, "step": 10790 }, { "epoch": 0.021516001530026776, "grad_norm": 0.19306088984012604, "learning_rate": 0.002, "loss": 2.5931, "step": 10800 }, { "epoch": 0.02153592375366569, "grad_norm": 0.2077362835407257, "learning_rate": 0.002, "loss": 2.6054, "step": 10810 }, { "epoch": 0.021555845977304602, "grad_norm": 0.17492537200450897, "learning_rate": 0.002, "loss": 2.598, "step": 10820 }, { "epoch": 0.021575768200943517, "grad_norm": 0.1766132116317749, "learning_rate": 0.002, "loss": 2.6046, "step": 10830 }, { "epoch": 0.02159569042458243, "grad_norm": 0.20641185343265533, "learning_rate": 0.002, "loss": 2.6067, "step": 10840 }, { "epoch": 0.021615612648221344, "grad_norm": 0.20590411126613617, "learning_rate": 0.002, "loss": 2.6404, "step": 10850 }, { "epoch": 0.02163553487186026, "grad_norm": 0.16957570612430573, "learning_rate": 0.002, "loss": 2.6219, "step": 10860 }, { "epoch": 0.02165545709549917, "grad_norm": 0.2114560902118683, "learning_rate": 0.002, "loss": 2.6085, "step": 10870 }, { "epoch": 0.021675379319138086, "grad_norm": 0.15997296571731567, "learning_rate": 0.002, "loss": 2.6144, "step": 10880 }, { "epoch": 0.021695301542776997, "grad_norm": 0.18250316381454468, "learning_rate": 0.002, "loss": 2.6008, "step": 10890 }, { "epoch": 0.021715223766415912, "grad_norm": 0.20363295078277588, "learning_rate": 0.002, "loss": 2.5943, "step": 10900 }, { "epoch": 0.021735145990054827, "grad_norm": 0.15829013288021088, "learning_rate": 0.002, "loss": 2.6067, "step": 10910 }, { "epoch": 0.02175506821369374, "grad_norm": 0.17828835546970367, "learning_rate": 0.002, "loss": 2.6007, "step": 10920 }, { "epoch": 0.021774990437332654, "grad_norm": 0.16623954474925995, "learning_rate": 0.002, "loss": 2.6142, "step": 10930 }, { "epoch": 0.021794912660971565, "grad_norm": 0.21726830303668976, "learning_rate": 0.002, "loss": 2.617, "step": 10940 }, { "epoch": 0.02181483488461048, "grad_norm": 0.17279911041259766, "learning_rate": 0.002, "loss": 2.6022, "step": 10950 }, { "epoch": 0.021834757108249395, "grad_norm": 0.18770988285541534, "learning_rate": 0.002, "loss": 2.6005, "step": 10960 }, { "epoch": 0.021854679331888307, "grad_norm": 0.1758040189743042, "learning_rate": 0.002, "loss": 2.6079, "step": 10970 }, { "epoch": 0.021874601555527222, "grad_norm": 0.20659665763378143, "learning_rate": 0.002, "loss": 2.6054, "step": 10980 }, { "epoch": 0.021894523779166137, "grad_norm": 0.20627106726169586, "learning_rate": 0.002, "loss": 2.6138, "step": 10990 }, { "epoch": 0.02191444600280505, "grad_norm": 0.1850523203611374, "learning_rate": 0.002, "loss": 2.6071, "step": 11000 }, { "epoch": 0.021934368226443964, "grad_norm": 0.20891647040843964, "learning_rate": 0.002, "loss": 2.6055, "step": 11010 }, { "epoch": 0.021954290450082875, "grad_norm": 0.2205222249031067, "learning_rate": 0.002, "loss": 2.6062, "step": 11020 }, { "epoch": 0.02197421267372179, "grad_norm": 0.14069287478923798, "learning_rate": 0.002, "loss": 2.6085, "step": 11030 }, { "epoch": 0.021994134897360705, "grad_norm": 0.2120995670557022, "learning_rate": 0.002, "loss": 2.6143, "step": 11040 }, { "epoch": 0.022014057120999617, "grad_norm": 0.17825230956077576, "learning_rate": 0.002, "loss": 2.6125, "step": 11050 }, { "epoch": 0.02203397934463853, "grad_norm": 0.17001478374004364, "learning_rate": 0.002, "loss": 2.6152, "step": 11060 }, { "epoch": 0.022053901568277443, "grad_norm": 0.1938123255968094, "learning_rate": 0.002, "loss": 2.6, "step": 11070 }, { "epoch": 0.022073823791916358, "grad_norm": 0.18587008118629456, "learning_rate": 0.002, "loss": 2.5893, "step": 11080 }, { "epoch": 0.022093746015555273, "grad_norm": 0.180850550532341, "learning_rate": 0.002, "loss": 2.632, "step": 11090 }, { "epoch": 0.022113668239194185, "grad_norm": 0.17537961900234222, "learning_rate": 0.002, "loss": 2.5927, "step": 11100 }, { "epoch": 0.0221335904628331, "grad_norm": 0.1530979871749878, "learning_rate": 0.002, "loss": 2.6123, "step": 11110 }, { "epoch": 0.022153512686472015, "grad_norm": 0.20529136061668396, "learning_rate": 0.002, "loss": 2.6071, "step": 11120 }, { "epoch": 0.022173434910110926, "grad_norm": 0.16874517500400543, "learning_rate": 0.002, "loss": 2.6226, "step": 11130 }, { "epoch": 0.02219335713374984, "grad_norm": 0.17960123717784882, "learning_rate": 0.002, "loss": 2.6193, "step": 11140 }, { "epoch": 0.022213279357388753, "grad_norm": 0.22820501029491425, "learning_rate": 0.002, "loss": 2.6002, "step": 11150 }, { "epoch": 0.022233201581027668, "grad_norm": 0.16822955012321472, "learning_rate": 0.002, "loss": 2.6085, "step": 11160 }, { "epoch": 0.022253123804666583, "grad_norm": 0.16662685573101044, "learning_rate": 0.002, "loss": 2.603, "step": 11170 }, { "epoch": 0.022273046028305495, "grad_norm": 0.17054259777069092, "learning_rate": 0.002, "loss": 2.5857, "step": 11180 }, { "epoch": 0.02229296825194441, "grad_norm": 0.2061425745487213, "learning_rate": 0.002, "loss": 2.6105, "step": 11190 }, { "epoch": 0.02231289047558332, "grad_norm": 0.17005455493927002, "learning_rate": 0.002, "loss": 2.597, "step": 11200 }, { "epoch": 0.022332812699222236, "grad_norm": 0.16486923396587372, "learning_rate": 0.002, "loss": 2.6226, "step": 11210 }, { "epoch": 0.02235273492286115, "grad_norm": 0.1610601246356964, "learning_rate": 0.002, "loss": 2.5978, "step": 11220 }, { "epoch": 0.022372657146500063, "grad_norm": 0.18174275755882263, "learning_rate": 0.002, "loss": 2.6013, "step": 11230 }, { "epoch": 0.022392579370138978, "grad_norm": 0.17066548764705658, "learning_rate": 0.002, "loss": 2.6048, "step": 11240 }, { "epoch": 0.022412501593777893, "grad_norm": 0.166887566447258, "learning_rate": 0.002, "loss": 2.6049, "step": 11250 }, { "epoch": 0.022432423817416804, "grad_norm": 0.18410608172416687, "learning_rate": 0.002, "loss": 2.6153, "step": 11260 }, { "epoch": 0.02245234604105572, "grad_norm": 0.18038861453533173, "learning_rate": 0.002, "loss": 2.6177, "step": 11270 }, { "epoch": 0.02247226826469463, "grad_norm": 0.2367008924484253, "learning_rate": 0.002, "loss": 2.6079, "step": 11280 }, { "epoch": 0.022492190488333546, "grad_norm": 0.15670251846313477, "learning_rate": 0.002, "loss": 2.6009, "step": 11290 }, { "epoch": 0.02251211271197246, "grad_norm": 0.18310406804084778, "learning_rate": 0.002, "loss": 2.6028, "step": 11300 }, { "epoch": 0.022532034935611372, "grad_norm": 0.1896446943283081, "learning_rate": 0.002, "loss": 2.5998, "step": 11310 }, { "epoch": 0.022551957159250288, "grad_norm": 0.1826857030391693, "learning_rate": 0.002, "loss": 2.6136, "step": 11320 }, { "epoch": 0.0225718793828892, "grad_norm": 0.191055566072464, "learning_rate": 0.002, "loss": 2.597, "step": 11330 }, { "epoch": 0.022591801606528114, "grad_norm": 0.23004433512687683, "learning_rate": 0.002, "loss": 2.5805, "step": 11340 }, { "epoch": 0.02261172383016703, "grad_norm": 0.1725514978170395, "learning_rate": 0.002, "loss": 2.6135, "step": 11350 }, { "epoch": 0.02263164605380594, "grad_norm": 0.1612851470708847, "learning_rate": 0.002, "loss": 2.598, "step": 11360 }, { "epoch": 0.022651568277444856, "grad_norm": 0.22550494968891144, "learning_rate": 0.002, "loss": 2.6067, "step": 11370 }, { "epoch": 0.022671490501083767, "grad_norm": 0.17270857095718384, "learning_rate": 0.002, "loss": 2.6024, "step": 11380 }, { "epoch": 0.022691412724722682, "grad_norm": 0.15813913941383362, "learning_rate": 0.002, "loss": 2.6028, "step": 11390 }, { "epoch": 0.022711334948361597, "grad_norm": 0.1825796365737915, "learning_rate": 0.002, "loss": 2.6063, "step": 11400 }, { "epoch": 0.02273125717200051, "grad_norm": 0.1898205280303955, "learning_rate": 0.002, "loss": 2.6088, "step": 11410 }, { "epoch": 0.022751179395639424, "grad_norm": 0.179752379655838, "learning_rate": 0.002, "loss": 2.6116, "step": 11420 }, { "epoch": 0.02277110161927834, "grad_norm": 0.19693145155906677, "learning_rate": 0.002, "loss": 2.6189, "step": 11430 }, { "epoch": 0.02279102384291725, "grad_norm": 0.2513704299926758, "learning_rate": 0.002, "loss": 2.5933, "step": 11440 }, { "epoch": 0.022810946066556165, "grad_norm": 0.170314222574234, "learning_rate": 0.002, "loss": 2.605, "step": 11450 }, { "epoch": 0.022830868290195077, "grad_norm": 0.2502501308917999, "learning_rate": 0.002, "loss": 2.6062, "step": 11460 }, { "epoch": 0.022850790513833992, "grad_norm": 0.1541101485490799, "learning_rate": 0.002, "loss": 2.6041, "step": 11470 }, { "epoch": 0.022870712737472907, "grad_norm": 0.1682330071926117, "learning_rate": 0.002, "loss": 2.6132, "step": 11480 }, { "epoch": 0.02289063496111182, "grad_norm": 0.14790412783622742, "learning_rate": 0.002, "loss": 2.6029, "step": 11490 }, { "epoch": 0.022910557184750734, "grad_norm": 0.17155814170837402, "learning_rate": 0.002, "loss": 2.5985, "step": 11500 }, { "epoch": 0.022930479408389645, "grad_norm": 0.1697099208831787, "learning_rate": 0.002, "loss": 2.6245, "step": 11510 }, { "epoch": 0.02295040163202856, "grad_norm": 0.18130867183208466, "learning_rate": 0.002, "loss": 2.6184, "step": 11520 }, { "epoch": 0.022970323855667475, "grad_norm": 0.1643761843442917, "learning_rate": 0.002, "loss": 2.6018, "step": 11530 }, { "epoch": 0.022990246079306387, "grad_norm": 0.17616581916809082, "learning_rate": 0.002, "loss": 2.6038, "step": 11540 }, { "epoch": 0.023010168302945302, "grad_norm": 0.25958001613616943, "learning_rate": 0.002, "loss": 2.6016, "step": 11550 }, { "epoch": 0.023030090526584217, "grad_norm": 0.1801983118057251, "learning_rate": 0.002, "loss": 2.6095, "step": 11560 }, { "epoch": 0.02305001275022313, "grad_norm": 0.1827424019575119, "learning_rate": 0.002, "loss": 2.6135, "step": 11570 }, { "epoch": 0.023069934973862043, "grad_norm": 0.18556497991085052, "learning_rate": 0.002, "loss": 2.6115, "step": 11580 }, { "epoch": 0.023089857197500955, "grad_norm": 0.1940179020166397, "learning_rate": 0.002, "loss": 2.6008, "step": 11590 }, { "epoch": 0.02310977942113987, "grad_norm": 0.22075878083705902, "learning_rate": 0.002, "loss": 2.5989, "step": 11600 }, { "epoch": 0.023129701644778785, "grad_norm": 0.18059805035591125, "learning_rate": 0.002, "loss": 2.6016, "step": 11610 }, { "epoch": 0.023149623868417697, "grad_norm": 0.1937091052532196, "learning_rate": 0.002, "loss": 2.6112, "step": 11620 }, { "epoch": 0.02316954609205661, "grad_norm": 0.17905142903327942, "learning_rate": 0.002, "loss": 2.5973, "step": 11630 }, { "epoch": 0.023189468315695523, "grad_norm": 0.178644061088562, "learning_rate": 0.002, "loss": 2.5972, "step": 11640 }, { "epoch": 0.023209390539334438, "grad_norm": 0.19864986836910248, "learning_rate": 0.002, "loss": 2.6133, "step": 11650 }, { "epoch": 0.023229312762973353, "grad_norm": 0.16537383198738098, "learning_rate": 0.002, "loss": 2.6034, "step": 11660 }, { "epoch": 0.023249234986612265, "grad_norm": 0.18615898489952087, "learning_rate": 0.002, "loss": 2.6017, "step": 11670 }, { "epoch": 0.02326915721025118, "grad_norm": 0.1697196662425995, "learning_rate": 0.002, "loss": 2.6326, "step": 11680 }, { "epoch": 0.023289079433890095, "grad_norm": 0.177900493144989, "learning_rate": 0.002, "loss": 2.6033, "step": 11690 }, { "epoch": 0.023309001657529006, "grad_norm": 0.2098761796951294, "learning_rate": 0.002, "loss": 2.5873, "step": 11700 }, { "epoch": 0.02332892388116792, "grad_norm": 0.17807313799858093, "learning_rate": 0.002, "loss": 2.6038, "step": 11710 }, { "epoch": 0.023348846104806833, "grad_norm": 0.15632474422454834, "learning_rate": 0.002, "loss": 2.6046, "step": 11720 }, { "epoch": 0.023368768328445748, "grad_norm": 0.20212116837501526, "learning_rate": 0.002, "loss": 2.6001, "step": 11730 }, { "epoch": 0.023388690552084663, "grad_norm": 0.20282936096191406, "learning_rate": 0.002, "loss": 2.6053, "step": 11740 }, { "epoch": 0.023408612775723574, "grad_norm": 0.16725830733776093, "learning_rate": 0.002, "loss": 2.6071, "step": 11750 }, { "epoch": 0.02342853499936249, "grad_norm": 0.19858692586421967, "learning_rate": 0.002, "loss": 2.601, "step": 11760 }, { "epoch": 0.0234484572230014, "grad_norm": 0.1926221400499344, "learning_rate": 0.002, "loss": 2.5883, "step": 11770 }, { "epoch": 0.023468379446640316, "grad_norm": 0.16439788043498993, "learning_rate": 0.002, "loss": 2.6098, "step": 11780 }, { "epoch": 0.02348830167027923, "grad_norm": 0.17976920306682587, "learning_rate": 0.002, "loss": 2.6095, "step": 11790 }, { "epoch": 0.023508223893918143, "grad_norm": 0.18277178704738617, "learning_rate": 0.002, "loss": 2.6123, "step": 11800 }, { "epoch": 0.023528146117557058, "grad_norm": 0.1857341080904007, "learning_rate": 0.002, "loss": 2.6101, "step": 11810 }, { "epoch": 0.023548068341195973, "grad_norm": 0.16805046796798706, "learning_rate": 0.002, "loss": 2.5964, "step": 11820 }, { "epoch": 0.023567990564834884, "grad_norm": 0.18534351885318756, "learning_rate": 0.002, "loss": 2.6115, "step": 11830 }, { "epoch": 0.0235879127884738, "grad_norm": 0.2055422067642212, "learning_rate": 0.002, "loss": 2.6243, "step": 11840 }, { "epoch": 0.02360783501211271, "grad_norm": 0.1612221896648407, "learning_rate": 0.002, "loss": 2.6006, "step": 11850 }, { "epoch": 0.023627757235751626, "grad_norm": 0.21651239693164825, "learning_rate": 0.002, "loss": 2.5997, "step": 11860 }, { "epoch": 0.02364767945939054, "grad_norm": 0.16768963634967804, "learning_rate": 0.002, "loss": 2.603, "step": 11870 }, { "epoch": 0.023667601683029452, "grad_norm": 0.2016330063343048, "learning_rate": 0.002, "loss": 2.5925, "step": 11880 }, { "epoch": 0.023687523906668367, "grad_norm": 0.20292484760284424, "learning_rate": 0.002, "loss": 2.6194, "step": 11890 }, { "epoch": 0.02370744613030728, "grad_norm": 0.23764312267303467, "learning_rate": 0.002, "loss": 2.6119, "step": 11900 }, { "epoch": 0.023727368353946194, "grad_norm": 0.17351999878883362, "learning_rate": 0.002, "loss": 2.616, "step": 11910 }, { "epoch": 0.02374729057758511, "grad_norm": 0.24952173233032227, "learning_rate": 0.002, "loss": 2.6067, "step": 11920 }, { "epoch": 0.02376721280122402, "grad_norm": 0.21868151426315308, "learning_rate": 0.002, "loss": 2.602, "step": 11930 }, { "epoch": 0.023787135024862936, "grad_norm": 0.1485920548439026, "learning_rate": 0.002, "loss": 2.6135, "step": 11940 }, { "epoch": 0.023807057248501847, "grad_norm": 0.1803300976753235, "learning_rate": 0.002, "loss": 2.619, "step": 11950 }, { "epoch": 0.023826979472140762, "grad_norm": 0.19586755335330963, "learning_rate": 0.002, "loss": 2.6185, "step": 11960 }, { "epoch": 0.023846901695779677, "grad_norm": 0.19533789157867432, "learning_rate": 0.002, "loss": 2.6086, "step": 11970 }, { "epoch": 0.02386682391941859, "grad_norm": 0.1952221840620041, "learning_rate": 0.002, "loss": 2.6046, "step": 11980 }, { "epoch": 0.023886746143057504, "grad_norm": 0.1710333377122879, "learning_rate": 0.002, "loss": 2.5948, "step": 11990 }, { "epoch": 0.02390666836669642, "grad_norm": 0.1736249327659607, "learning_rate": 0.002, "loss": 2.6089, "step": 12000 }, { "epoch": 0.02392659059033533, "grad_norm": 0.1635056883096695, "learning_rate": 0.002, "loss": 2.6091, "step": 12010 }, { "epoch": 0.023946512813974245, "grad_norm": 0.20120127499103546, "learning_rate": 0.002, "loss": 2.6067, "step": 12020 }, { "epoch": 0.023966435037613157, "grad_norm": 0.14642533659934998, "learning_rate": 0.002, "loss": 2.5948, "step": 12030 }, { "epoch": 0.023986357261252072, "grad_norm": 0.18439991772174835, "learning_rate": 0.002, "loss": 2.5951, "step": 12040 }, { "epoch": 0.024006279484890987, "grad_norm": 0.2460283637046814, "learning_rate": 0.002, "loss": 2.6, "step": 12050 }, { "epoch": 0.0240262017085299, "grad_norm": 0.18703573942184448, "learning_rate": 0.002, "loss": 2.6, "step": 12060 }, { "epoch": 0.024046123932168813, "grad_norm": 0.1759602278470993, "learning_rate": 0.002, "loss": 2.6092, "step": 12070 }, { "epoch": 0.024066046155807725, "grad_norm": 0.15180550515651703, "learning_rate": 0.002, "loss": 2.5912, "step": 12080 }, { "epoch": 0.02408596837944664, "grad_norm": 0.16867898404598236, "learning_rate": 0.002, "loss": 2.6052, "step": 12090 }, { "epoch": 0.024105890603085555, "grad_norm": 0.20649339258670807, "learning_rate": 0.002, "loss": 2.6072, "step": 12100 }, { "epoch": 0.024125812826724467, "grad_norm": 0.17277538776397705, "learning_rate": 0.002, "loss": 2.5951, "step": 12110 }, { "epoch": 0.02414573505036338, "grad_norm": 0.1533670276403427, "learning_rate": 0.002, "loss": 2.6208, "step": 12120 }, { "epoch": 0.024165657274002297, "grad_norm": 0.17293167114257812, "learning_rate": 0.002, "loss": 2.6061, "step": 12130 }, { "epoch": 0.024185579497641208, "grad_norm": 0.18856783211231232, "learning_rate": 0.002, "loss": 2.6096, "step": 12140 }, { "epoch": 0.024205501721280123, "grad_norm": 0.22097665071487427, "learning_rate": 0.002, "loss": 2.6058, "step": 12150 }, { "epoch": 0.024225423944919035, "grad_norm": 0.14685297012329102, "learning_rate": 0.002, "loss": 2.6164, "step": 12160 }, { "epoch": 0.02424534616855795, "grad_norm": 0.24308060109615326, "learning_rate": 0.002, "loss": 2.599, "step": 12170 }, { "epoch": 0.024265268392196865, "grad_norm": 0.15847823023796082, "learning_rate": 0.002, "loss": 2.604, "step": 12180 }, { "epoch": 0.024285190615835776, "grad_norm": 0.18321964144706726, "learning_rate": 0.002, "loss": 2.6123, "step": 12190 }, { "epoch": 0.02430511283947469, "grad_norm": 0.1552821546792984, "learning_rate": 0.002, "loss": 2.6136, "step": 12200 }, { "epoch": 0.024325035063113603, "grad_norm": 0.187515527009964, "learning_rate": 0.002, "loss": 2.6116, "step": 12210 }, { "epoch": 0.024344957286752518, "grad_norm": 0.15421126782894135, "learning_rate": 0.002, "loss": 2.5968, "step": 12220 }, { "epoch": 0.024364879510391433, "grad_norm": 0.14723989367485046, "learning_rate": 0.002, "loss": 2.6088, "step": 12230 }, { "epoch": 0.024384801734030345, "grad_norm": 0.2126293182373047, "learning_rate": 0.002, "loss": 2.5961, "step": 12240 }, { "epoch": 0.02440472395766926, "grad_norm": 0.16941864788532257, "learning_rate": 0.002, "loss": 2.5973, "step": 12250 }, { "epoch": 0.024424646181308175, "grad_norm": 0.20287808775901794, "learning_rate": 0.002, "loss": 2.6012, "step": 12260 }, { "epoch": 0.024444568404947086, "grad_norm": 0.1999441236257553, "learning_rate": 0.002, "loss": 2.5976, "step": 12270 }, { "epoch": 0.024464490628586, "grad_norm": 0.21117988228797913, "learning_rate": 0.002, "loss": 2.6155, "step": 12280 }, { "epoch": 0.024484412852224913, "grad_norm": 0.13820502161979675, "learning_rate": 0.002, "loss": 2.6065, "step": 12290 }, { "epoch": 0.024504335075863828, "grad_norm": 0.19411610066890717, "learning_rate": 0.002, "loss": 2.5997, "step": 12300 }, { "epoch": 0.024524257299502743, "grad_norm": 0.168791726231575, "learning_rate": 0.002, "loss": 2.5977, "step": 12310 }, { "epoch": 0.024544179523141654, "grad_norm": 0.14449435472488403, "learning_rate": 0.002, "loss": 2.6051, "step": 12320 }, { "epoch": 0.02456410174678057, "grad_norm": 0.16175530850887299, "learning_rate": 0.002, "loss": 2.5937, "step": 12330 }, { "epoch": 0.02458402397041948, "grad_norm": 0.16970467567443848, "learning_rate": 0.002, "loss": 2.6086, "step": 12340 }, { "epoch": 0.024603946194058396, "grad_norm": 0.19018280506134033, "learning_rate": 0.002, "loss": 2.6099, "step": 12350 }, { "epoch": 0.02462386841769731, "grad_norm": 0.21881967782974243, "learning_rate": 0.002, "loss": 2.6073, "step": 12360 }, { "epoch": 0.024643790641336222, "grad_norm": 0.16693216562271118, "learning_rate": 0.002, "loss": 2.5846, "step": 12370 }, { "epoch": 0.024663712864975137, "grad_norm": 0.18363788723945618, "learning_rate": 0.002, "loss": 2.6165, "step": 12380 }, { "epoch": 0.02468363508861405, "grad_norm": 0.17420537769794464, "learning_rate": 0.002, "loss": 2.6138, "step": 12390 }, { "epoch": 0.024703557312252964, "grad_norm": 0.17401094734668732, "learning_rate": 0.002, "loss": 2.6136, "step": 12400 }, { "epoch": 0.02472347953589188, "grad_norm": 0.161782905459404, "learning_rate": 0.002, "loss": 2.6085, "step": 12410 }, { "epoch": 0.02474340175953079, "grad_norm": 0.17182859778404236, "learning_rate": 0.002, "loss": 2.6033, "step": 12420 }, { "epoch": 0.024763323983169706, "grad_norm": 0.2147749364376068, "learning_rate": 0.002, "loss": 2.6117, "step": 12430 }, { "epoch": 0.02478324620680862, "grad_norm": 0.17679260671138763, "learning_rate": 0.002, "loss": 2.6032, "step": 12440 }, { "epoch": 0.024803168430447532, "grad_norm": 0.1785353571176529, "learning_rate": 0.002, "loss": 2.6158, "step": 12450 }, { "epoch": 0.024823090654086447, "grad_norm": 0.19723263382911682, "learning_rate": 0.002, "loss": 2.608, "step": 12460 }, { "epoch": 0.02484301287772536, "grad_norm": 0.15596802532672882, "learning_rate": 0.002, "loss": 2.6002, "step": 12470 }, { "epoch": 0.024862935101364274, "grad_norm": 0.19819381833076477, "learning_rate": 0.002, "loss": 2.6029, "step": 12480 }, { "epoch": 0.02488285732500319, "grad_norm": 0.1792689859867096, "learning_rate": 0.002, "loss": 2.5769, "step": 12490 }, { "epoch": 0.0249027795486421, "grad_norm": 0.18135929107666016, "learning_rate": 0.002, "loss": 2.6141, "step": 12500 }, { "epoch": 0.024922701772281015, "grad_norm": 0.20976515114307404, "learning_rate": 0.002, "loss": 2.6018, "step": 12510 }, { "epoch": 0.024942623995919927, "grad_norm": 0.22110743820667267, "learning_rate": 0.002, "loss": 2.6147, "step": 12520 }, { "epoch": 0.024962546219558842, "grad_norm": 0.16746944189071655, "learning_rate": 0.002, "loss": 2.6112, "step": 12530 }, { "epoch": 0.024982468443197757, "grad_norm": 0.1616380661725998, "learning_rate": 0.002, "loss": 2.6126, "step": 12540 }, { "epoch": 0.02500239066683667, "grad_norm": 0.19358961284160614, "learning_rate": 0.002, "loss": 2.6087, "step": 12550 }, { "epoch": 0.025022312890475584, "grad_norm": 0.16612887382507324, "learning_rate": 0.002, "loss": 2.605, "step": 12560 }, { "epoch": 0.0250422351141145, "grad_norm": 0.1792205274105072, "learning_rate": 0.002, "loss": 2.5823, "step": 12570 }, { "epoch": 0.02506215733775341, "grad_norm": 0.17733633518218994, "learning_rate": 0.002, "loss": 2.5998, "step": 12580 }, { "epoch": 0.025082079561392325, "grad_norm": 0.16254159808158875, "learning_rate": 0.002, "loss": 2.6125, "step": 12590 }, { "epoch": 0.025102001785031237, "grad_norm": 0.17693233489990234, "learning_rate": 0.002, "loss": 2.6063, "step": 12600 }, { "epoch": 0.025121924008670152, "grad_norm": 0.18063803017139435, "learning_rate": 0.002, "loss": 2.6194, "step": 12610 }, { "epoch": 0.025141846232309067, "grad_norm": 0.15344397723674774, "learning_rate": 0.002, "loss": 2.6032, "step": 12620 }, { "epoch": 0.02516176845594798, "grad_norm": 0.1889510303735733, "learning_rate": 0.002, "loss": 2.6021, "step": 12630 }, { "epoch": 0.025181690679586893, "grad_norm": 0.15594446659088135, "learning_rate": 0.002, "loss": 2.593, "step": 12640 }, { "epoch": 0.025201612903225805, "grad_norm": 0.18714657425880432, "learning_rate": 0.002, "loss": 2.5875, "step": 12650 }, { "epoch": 0.02522153512686472, "grad_norm": 0.18683022260665894, "learning_rate": 0.002, "loss": 2.6117, "step": 12660 }, { "epoch": 0.025241457350503635, "grad_norm": 0.17430652678012848, "learning_rate": 0.002, "loss": 2.5845, "step": 12670 }, { "epoch": 0.025261379574142546, "grad_norm": 0.20978660881519318, "learning_rate": 0.002, "loss": 2.6034, "step": 12680 }, { "epoch": 0.02528130179778146, "grad_norm": 0.15255634486675262, "learning_rate": 0.002, "loss": 2.6045, "step": 12690 }, { "epoch": 0.025301224021420377, "grad_norm": 0.18802416324615479, "learning_rate": 0.002, "loss": 2.61, "step": 12700 }, { "epoch": 0.025321146245059288, "grad_norm": 0.17225682735443115, "learning_rate": 0.002, "loss": 2.6031, "step": 12710 }, { "epoch": 0.025341068468698203, "grad_norm": 0.1750660091638565, "learning_rate": 0.002, "loss": 2.6027, "step": 12720 }, { "epoch": 0.025360990692337115, "grad_norm": 0.16923700273036957, "learning_rate": 0.002, "loss": 2.6126, "step": 12730 }, { "epoch": 0.02538091291597603, "grad_norm": 0.16983634233474731, "learning_rate": 0.002, "loss": 2.5852, "step": 12740 }, { "epoch": 0.025400835139614945, "grad_norm": 0.1705174297094345, "learning_rate": 0.002, "loss": 2.6178, "step": 12750 }, { "epoch": 0.025420757363253856, "grad_norm": 0.16462990641593933, "learning_rate": 0.002, "loss": 2.5997, "step": 12760 }, { "epoch": 0.02544067958689277, "grad_norm": 0.18846650421619415, "learning_rate": 0.002, "loss": 2.6076, "step": 12770 }, { "epoch": 0.025460601810531683, "grad_norm": 0.24946388602256775, "learning_rate": 0.002, "loss": 2.6066, "step": 12780 }, { "epoch": 0.025480524034170598, "grad_norm": 0.16048739850521088, "learning_rate": 0.002, "loss": 2.6084, "step": 12790 }, { "epoch": 0.025500446257809513, "grad_norm": 0.1760735660791397, "learning_rate": 0.002, "loss": 2.605, "step": 12800 }, { "epoch": 0.025520368481448424, "grad_norm": 0.16110298037528992, "learning_rate": 0.002, "loss": 2.5997, "step": 12810 }, { "epoch": 0.02554029070508734, "grad_norm": 0.17794246971607208, "learning_rate": 0.002, "loss": 2.6033, "step": 12820 }, { "epoch": 0.02556021292872625, "grad_norm": 0.19817854464054108, "learning_rate": 0.002, "loss": 2.6124, "step": 12830 }, { "epoch": 0.025580135152365166, "grad_norm": 0.18067431449890137, "learning_rate": 0.002, "loss": 2.5951, "step": 12840 }, { "epoch": 0.02560005737600408, "grad_norm": 0.2183968424797058, "learning_rate": 0.002, "loss": 2.6063, "step": 12850 }, { "epoch": 0.025619979599642993, "grad_norm": 0.18110886216163635, "learning_rate": 0.002, "loss": 2.611, "step": 12860 }, { "epoch": 0.025639901823281908, "grad_norm": 0.16916272044181824, "learning_rate": 0.002, "loss": 2.5964, "step": 12870 }, { "epoch": 0.025659824046920823, "grad_norm": 0.17860475182533264, "learning_rate": 0.002, "loss": 2.5957, "step": 12880 }, { "epoch": 0.025679746270559734, "grad_norm": 0.18404677510261536, "learning_rate": 0.002, "loss": 2.5996, "step": 12890 }, { "epoch": 0.02569966849419865, "grad_norm": 0.1888464391231537, "learning_rate": 0.002, "loss": 2.6218, "step": 12900 }, { "epoch": 0.02571959071783756, "grad_norm": 0.20212993025779724, "learning_rate": 0.002, "loss": 2.6056, "step": 12910 }, { "epoch": 0.025739512941476476, "grad_norm": 0.18478699028491974, "learning_rate": 0.002, "loss": 2.6167, "step": 12920 }, { "epoch": 0.02575943516511539, "grad_norm": 0.164030060172081, "learning_rate": 0.002, "loss": 2.6047, "step": 12930 }, { "epoch": 0.025779357388754302, "grad_norm": 0.15573473274707794, "learning_rate": 0.002, "loss": 2.6135, "step": 12940 }, { "epoch": 0.025799279612393217, "grad_norm": 0.1607455164194107, "learning_rate": 0.002, "loss": 2.5983, "step": 12950 }, { "epoch": 0.02581920183603213, "grad_norm": 0.1696045845746994, "learning_rate": 0.002, "loss": 2.5929, "step": 12960 }, { "epoch": 0.025839124059671044, "grad_norm": 0.1568962186574936, "learning_rate": 0.002, "loss": 2.5941, "step": 12970 }, { "epoch": 0.02585904628330996, "grad_norm": 0.17355716228485107, "learning_rate": 0.002, "loss": 2.6013, "step": 12980 }, { "epoch": 0.02587896850694887, "grad_norm": 0.17512869834899902, "learning_rate": 0.002, "loss": 2.6063, "step": 12990 }, { "epoch": 0.025898890730587786, "grad_norm": 0.15978802740573883, "learning_rate": 0.002, "loss": 2.6024, "step": 13000 }, { "epoch": 0.0259188129542267, "grad_norm": 0.14087732136249542, "learning_rate": 0.002, "loss": 2.618, "step": 13010 }, { "epoch": 0.025938735177865612, "grad_norm": 0.2671426832675934, "learning_rate": 0.002, "loss": 2.6056, "step": 13020 }, { "epoch": 0.025958657401504527, "grad_norm": 0.1574532389640808, "learning_rate": 0.002, "loss": 2.5966, "step": 13030 }, { "epoch": 0.02597857962514344, "grad_norm": 0.16531147062778473, "learning_rate": 0.002, "loss": 2.6009, "step": 13040 }, { "epoch": 0.025998501848782354, "grad_norm": 0.18855364620685577, "learning_rate": 0.002, "loss": 2.5849, "step": 13050 }, { "epoch": 0.02601842407242127, "grad_norm": 0.20205658674240112, "learning_rate": 0.002, "loss": 2.6126, "step": 13060 }, { "epoch": 0.02603834629606018, "grad_norm": 0.1765764206647873, "learning_rate": 0.002, "loss": 2.6009, "step": 13070 }, { "epoch": 0.026058268519699095, "grad_norm": 0.2251121550798416, "learning_rate": 0.002, "loss": 2.603, "step": 13080 }, { "epoch": 0.026078190743338007, "grad_norm": 0.18294163048267365, "learning_rate": 0.002, "loss": 2.5955, "step": 13090 }, { "epoch": 0.026098112966976922, "grad_norm": 0.1480778157711029, "learning_rate": 0.002, "loss": 2.5945, "step": 13100 }, { "epoch": 0.026118035190615837, "grad_norm": 0.20369943976402283, "learning_rate": 0.002, "loss": 2.6054, "step": 13110 }, { "epoch": 0.02613795741425475, "grad_norm": 0.18081340193748474, "learning_rate": 0.002, "loss": 2.6002, "step": 13120 }, { "epoch": 0.026157879637893663, "grad_norm": 0.14847080409526825, "learning_rate": 0.002, "loss": 2.5907, "step": 13130 }, { "epoch": 0.02617780186153258, "grad_norm": 0.1761661171913147, "learning_rate": 0.002, "loss": 2.6052, "step": 13140 }, { "epoch": 0.02619772408517149, "grad_norm": 0.18186083436012268, "learning_rate": 0.002, "loss": 2.609, "step": 13150 }, { "epoch": 0.026217646308810405, "grad_norm": 0.16312989592552185, "learning_rate": 0.002, "loss": 2.6033, "step": 13160 }, { "epoch": 0.026237568532449317, "grad_norm": 0.16006635129451752, "learning_rate": 0.002, "loss": 2.6086, "step": 13170 }, { "epoch": 0.02625749075608823, "grad_norm": 0.17358477413654327, "learning_rate": 0.002, "loss": 2.5894, "step": 13180 }, { "epoch": 0.026277412979727147, "grad_norm": 0.15516430139541626, "learning_rate": 0.002, "loss": 2.5994, "step": 13190 }, { "epoch": 0.026297335203366058, "grad_norm": 0.1689622849225998, "learning_rate": 0.002, "loss": 2.5965, "step": 13200 }, { "epoch": 0.026317257427004973, "grad_norm": 0.19964884221553802, "learning_rate": 0.002, "loss": 2.6063, "step": 13210 }, { "epoch": 0.026337179650643885, "grad_norm": 0.17349763214588165, "learning_rate": 0.002, "loss": 2.5972, "step": 13220 }, { "epoch": 0.0263571018742828, "grad_norm": 0.1714962124824524, "learning_rate": 0.002, "loss": 2.5985, "step": 13230 }, { "epoch": 0.026377024097921715, "grad_norm": 0.17830710113048553, "learning_rate": 0.002, "loss": 2.5846, "step": 13240 }, { "epoch": 0.026396946321560626, "grad_norm": 0.17529040575027466, "learning_rate": 0.002, "loss": 2.5935, "step": 13250 }, { "epoch": 0.02641686854519954, "grad_norm": 0.17763210833072662, "learning_rate": 0.002, "loss": 2.5934, "step": 13260 }, { "epoch": 0.026436790768838456, "grad_norm": 0.17745277285575867, "learning_rate": 0.002, "loss": 2.607, "step": 13270 }, { "epoch": 0.026456712992477368, "grad_norm": 0.1554098278284073, "learning_rate": 0.002, "loss": 2.6011, "step": 13280 }, { "epoch": 0.026476635216116283, "grad_norm": 0.24296937882900238, "learning_rate": 0.002, "loss": 2.6026, "step": 13290 }, { "epoch": 0.026496557439755195, "grad_norm": 0.16734112799167633, "learning_rate": 0.002, "loss": 2.596, "step": 13300 }, { "epoch": 0.02651647966339411, "grad_norm": 0.17623130977153778, "learning_rate": 0.002, "loss": 2.6108, "step": 13310 }, { "epoch": 0.026536401887033025, "grad_norm": 0.13174913823604584, "learning_rate": 0.002, "loss": 2.5971, "step": 13320 }, { "epoch": 0.026556324110671936, "grad_norm": 0.17973582446575165, "learning_rate": 0.002, "loss": 2.5919, "step": 13330 }, { "epoch": 0.02657624633431085, "grad_norm": 0.19733548164367676, "learning_rate": 0.002, "loss": 2.6062, "step": 13340 }, { "epoch": 0.026596168557949763, "grad_norm": 0.18435384333133698, "learning_rate": 0.002, "loss": 2.5983, "step": 13350 }, { "epoch": 0.026616090781588678, "grad_norm": 0.15351015329360962, "learning_rate": 0.002, "loss": 2.5992, "step": 13360 }, { "epoch": 0.026636013005227593, "grad_norm": 0.17907430231571198, "learning_rate": 0.002, "loss": 2.5816, "step": 13370 }, { "epoch": 0.026655935228866504, "grad_norm": 0.1592131108045578, "learning_rate": 0.002, "loss": 2.604, "step": 13380 }, { "epoch": 0.02667585745250542, "grad_norm": 0.1836588829755783, "learning_rate": 0.002, "loss": 2.6033, "step": 13390 }, { "epoch": 0.02669577967614433, "grad_norm": 0.17414772510528564, "learning_rate": 0.002, "loss": 2.5843, "step": 13400 }, { "epoch": 0.026715701899783246, "grad_norm": 0.22312377393245697, "learning_rate": 0.002, "loss": 2.5961, "step": 13410 }, { "epoch": 0.02673562412342216, "grad_norm": 0.1706368774175644, "learning_rate": 0.002, "loss": 2.6125, "step": 13420 }, { "epoch": 0.026755546347061072, "grad_norm": 0.14813371002674103, "learning_rate": 0.002, "loss": 2.5944, "step": 13430 }, { "epoch": 0.026775468570699987, "grad_norm": 0.1561143845319748, "learning_rate": 0.002, "loss": 2.5878, "step": 13440 }, { "epoch": 0.026795390794338902, "grad_norm": 0.1616666465997696, "learning_rate": 0.002, "loss": 2.6185, "step": 13450 }, { "epoch": 0.026815313017977814, "grad_norm": 0.16962315142154694, "learning_rate": 0.002, "loss": 2.5882, "step": 13460 }, { "epoch": 0.02683523524161673, "grad_norm": 0.1739758551120758, "learning_rate": 0.002, "loss": 2.6006, "step": 13470 }, { "epoch": 0.02685515746525564, "grad_norm": 0.17802178859710693, "learning_rate": 0.002, "loss": 2.6181, "step": 13480 }, { "epoch": 0.026875079688894556, "grad_norm": 0.18793389201164246, "learning_rate": 0.002, "loss": 2.598, "step": 13490 }, { "epoch": 0.02689500191253347, "grad_norm": 0.1509685218334198, "learning_rate": 0.002, "loss": 2.5993, "step": 13500 }, { "epoch": 0.026914924136172382, "grad_norm": 0.16925743222236633, "learning_rate": 0.002, "loss": 2.6025, "step": 13510 }, { "epoch": 0.026934846359811297, "grad_norm": 0.17623119056224823, "learning_rate": 0.002, "loss": 2.597, "step": 13520 }, { "epoch": 0.02695476858345021, "grad_norm": 0.15726113319396973, "learning_rate": 0.002, "loss": 2.6063, "step": 13530 }, { "epoch": 0.026974690807089124, "grad_norm": 0.19044865667819977, "learning_rate": 0.002, "loss": 2.5971, "step": 13540 }, { "epoch": 0.02699461303072804, "grad_norm": 0.19189657270908356, "learning_rate": 0.002, "loss": 2.5991, "step": 13550 }, { "epoch": 0.02701453525436695, "grad_norm": 0.2060278356075287, "learning_rate": 0.002, "loss": 2.6075, "step": 13560 }, { "epoch": 0.027034457478005865, "grad_norm": 0.18266600370407104, "learning_rate": 0.002, "loss": 2.607, "step": 13570 }, { "epoch": 0.02705437970164478, "grad_norm": 0.1728449910879135, "learning_rate": 0.002, "loss": 2.6139, "step": 13580 }, { "epoch": 0.027074301925283692, "grad_norm": 0.21238328516483307, "learning_rate": 0.002, "loss": 2.6041, "step": 13590 }, { "epoch": 0.027094224148922607, "grad_norm": 0.19100941717624664, "learning_rate": 0.002, "loss": 2.6101, "step": 13600 }, { "epoch": 0.02711414637256152, "grad_norm": 0.16089944541454315, "learning_rate": 0.002, "loss": 2.5926, "step": 13610 }, { "epoch": 0.027134068596200434, "grad_norm": 0.1802932173013687, "learning_rate": 0.002, "loss": 2.6133, "step": 13620 }, { "epoch": 0.02715399081983935, "grad_norm": 0.15389616787433624, "learning_rate": 0.002, "loss": 2.5898, "step": 13630 }, { "epoch": 0.02717391304347826, "grad_norm": 0.1489216536283493, "learning_rate": 0.002, "loss": 2.5894, "step": 13640 }, { "epoch": 0.027193835267117175, "grad_norm": 0.18853439390659332, "learning_rate": 0.002, "loss": 2.5945, "step": 13650 }, { "epoch": 0.027213757490756087, "grad_norm": 0.16988058388233185, "learning_rate": 0.002, "loss": 2.6099, "step": 13660 }, { "epoch": 0.027233679714395, "grad_norm": 0.16382989287376404, "learning_rate": 0.002, "loss": 2.5903, "step": 13670 }, { "epoch": 0.027253601938033917, "grad_norm": 0.15697790682315826, "learning_rate": 0.002, "loss": 2.6085, "step": 13680 }, { "epoch": 0.02727352416167283, "grad_norm": 0.13887034356594086, "learning_rate": 0.002, "loss": 2.5872, "step": 13690 }, { "epoch": 0.027293446385311743, "grad_norm": 0.15399713814258575, "learning_rate": 0.002, "loss": 2.5982, "step": 13700 }, { "epoch": 0.02731336860895066, "grad_norm": 0.21977128088474274, "learning_rate": 0.002, "loss": 2.6, "step": 13710 }, { "epoch": 0.02733329083258957, "grad_norm": 0.21264968812465668, "learning_rate": 0.002, "loss": 2.6095, "step": 13720 }, { "epoch": 0.027353213056228485, "grad_norm": 0.2089831382036209, "learning_rate": 0.002, "loss": 2.5984, "step": 13730 }, { "epoch": 0.027373135279867396, "grad_norm": 0.17749255895614624, "learning_rate": 0.002, "loss": 2.6109, "step": 13740 }, { "epoch": 0.02739305750350631, "grad_norm": 0.15352308750152588, "learning_rate": 0.002, "loss": 2.5921, "step": 13750 }, { "epoch": 0.027412979727145226, "grad_norm": 0.1982085108757019, "learning_rate": 0.002, "loss": 2.6069, "step": 13760 }, { "epoch": 0.027432901950784138, "grad_norm": 0.1885855346918106, "learning_rate": 0.002, "loss": 2.6029, "step": 13770 }, { "epoch": 0.027452824174423053, "grad_norm": 0.1524071991443634, "learning_rate": 0.002, "loss": 2.5998, "step": 13780 }, { "epoch": 0.027472746398061965, "grad_norm": 0.1901773065328598, "learning_rate": 0.002, "loss": 2.6068, "step": 13790 }, { "epoch": 0.02749266862170088, "grad_norm": 0.16203252971172333, "learning_rate": 0.002, "loss": 2.6034, "step": 13800 }, { "epoch": 0.027512590845339795, "grad_norm": 0.1915811151266098, "learning_rate": 0.002, "loss": 2.6162, "step": 13810 }, { "epoch": 0.027532513068978706, "grad_norm": 0.15609599649906158, "learning_rate": 0.002, "loss": 2.5926, "step": 13820 }, { "epoch": 0.02755243529261762, "grad_norm": 0.18425588309764862, "learning_rate": 0.002, "loss": 2.5939, "step": 13830 }, { "epoch": 0.027572357516256533, "grad_norm": 0.18665575981140137, "learning_rate": 0.002, "loss": 2.6056, "step": 13840 }, { "epoch": 0.027592279739895448, "grad_norm": 0.20956066250801086, "learning_rate": 0.002, "loss": 2.5919, "step": 13850 }, { "epoch": 0.027612201963534363, "grad_norm": 0.19401061534881592, "learning_rate": 0.002, "loss": 2.6027, "step": 13860 }, { "epoch": 0.027632124187173274, "grad_norm": 0.15094663202762604, "learning_rate": 0.002, "loss": 2.6084, "step": 13870 }, { "epoch": 0.02765204641081219, "grad_norm": 0.20284578204154968, "learning_rate": 0.002, "loss": 2.5996, "step": 13880 }, { "epoch": 0.027671968634451104, "grad_norm": 0.14482808113098145, "learning_rate": 0.002, "loss": 2.5987, "step": 13890 }, { "epoch": 0.027691890858090016, "grad_norm": 0.16736836731433868, "learning_rate": 0.002, "loss": 2.6072, "step": 13900 }, { "epoch": 0.02771181308172893, "grad_norm": 0.1592842936515808, "learning_rate": 0.002, "loss": 2.6083, "step": 13910 }, { "epoch": 0.027731735305367843, "grad_norm": 0.22687304019927979, "learning_rate": 0.002, "loss": 2.6008, "step": 13920 }, { "epoch": 0.027751657529006758, "grad_norm": 0.18396778404712677, "learning_rate": 0.002, "loss": 2.6175, "step": 13930 }, { "epoch": 0.027771579752645673, "grad_norm": 0.16729512810707092, "learning_rate": 0.002, "loss": 2.5901, "step": 13940 }, { "epoch": 0.027791501976284584, "grad_norm": 0.1387462168931961, "learning_rate": 0.002, "loss": 2.5991, "step": 13950 }, { "epoch": 0.0278114241999235, "grad_norm": 0.2919985055923462, "learning_rate": 0.002, "loss": 2.6073, "step": 13960 }, { "epoch": 0.02783134642356241, "grad_norm": 0.1627107858657837, "learning_rate": 0.002, "loss": 2.6084, "step": 13970 }, { "epoch": 0.027851268647201326, "grad_norm": 0.19427482783794403, "learning_rate": 0.002, "loss": 2.6031, "step": 13980 }, { "epoch": 0.02787119087084024, "grad_norm": 0.19217287003993988, "learning_rate": 0.002, "loss": 2.59, "step": 13990 }, { "epoch": 0.027891113094479152, "grad_norm": 0.18072915077209473, "learning_rate": 0.002, "loss": 2.6103, "step": 14000 }, { "epoch": 0.027911035318118067, "grad_norm": 0.22535622119903564, "learning_rate": 0.002, "loss": 2.6055, "step": 14010 }, { "epoch": 0.027930957541756982, "grad_norm": 0.1738964319229126, "learning_rate": 0.002, "loss": 2.5919, "step": 14020 }, { "epoch": 0.027950879765395894, "grad_norm": 0.20162001252174377, "learning_rate": 0.002, "loss": 2.604, "step": 14030 }, { "epoch": 0.02797080198903481, "grad_norm": 0.1652487963438034, "learning_rate": 0.002, "loss": 2.5941, "step": 14040 }, { "epoch": 0.02799072421267372, "grad_norm": 0.22743552923202515, "learning_rate": 0.002, "loss": 2.5817, "step": 14050 }, { "epoch": 0.028010646436312635, "grad_norm": 0.17712444067001343, "learning_rate": 0.002, "loss": 2.6035, "step": 14060 }, { "epoch": 0.02803056865995155, "grad_norm": 0.17126810550689697, "learning_rate": 0.002, "loss": 2.5979, "step": 14070 }, { "epoch": 0.028050490883590462, "grad_norm": 0.1939006745815277, "learning_rate": 0.002, "loss": 2.5884, "step": 14080 }, { "epoch": 0.028070413107229377, "grad_norm": 0.18248923122882843, "learning_rate": 0.002, "loss": 2.5995, "step": 14090 }, { "epoch": 0.02809033533086829, "grad_norm": 0.1931474655866623, "learning_rate": 0.002, "loss": 2.6134, "step": 14100 }, { "epoch": 0.028110257554507204, "grad_norm": 0.15996548533439636, "learning_rate": 0.002, "loss": 2.6026, "step": 14110 }, { "epoch": 0.02813017977814612, "grad_norm": 0.1737498790025711, "learning_rate": 0.002, "loss": 2.6024, "step": 14120 }, { "epoch": 0.02815010200178503, "grad_norm": 0.16552388668060303, "learning_rate": 0.002, "loss": 2.6085, "step": 14130 }, { "epoch": 0.028170024225423945, "grad_norm": 0.16323819756507874, "learning_rate": 0.002, "loss": 2.5896, "step": 14140 }, { "epoch": 0.02818994644906286, "grad_norm": 0.1721225529909134, "learning_rate": 0.002, "loss": 2.6027, "step": 14150 }, { "epoch": 0.028209868672701772, "grad_norm": 0.17863982915878296, "learning_rate": 0.002, "loss": 2.5967, "step": 14160 }, { "epoch": 0.028229790896340687, "grad_norm": 0.15063686668872833, "learning_rate": 0.002, "loss": 2.5965, "step": 14170 }, { "epoch": 0.0282497131199796, "grad_norm": 0.15525026619434357, "learning_rate": 0.002, "loss": 2.6034, "step": 14180 }, { "epoch": 0.028269635343618513, "grad_norm": 0.210113987326622, "learning_rate": 0.002, "loss": 2.604, "step": 14190 }, { "epoch": 0.02828955756725743, "grad_norm": 0.15237420797348022, "learning_rate": 0.002, "loss": 2.5905, "step": 14200 }, { "epoch": 0.02830947979089634, "grad_norm": 0.1785321831703186, "learning_rate": 0.002, "loss": 2.5955, "step": 14210 }, { "epoch": 0.028329402014535255, "grad_norm": 0.15970128774642944, "learning_rate": 0.002, "loss": 2.6107, "step": 14220 }, { "epoch": 0.028349324238174167, "grad_norm": 0.1553671807050705, "learning_rate": 0.002, "loss": 2.6041, "step": 14230 }, { "epoch": 0.02836924646181308, "grad_norm": 0.18150246143341064, "learning_rate": 0.002, "loss": 2.6061, "step": 14240 }, { "epoch": 0.028389168685451997, "grad_norm": 0.1943359524011612, "learning_rate": 0.002, "loss": 2.5855, "step": 14250 }, { "epoch": 0.028409090909090908, "grad_norm": 0.18384429812431335, "learning_rate": 0.002, "loss": 2.6052, "step": 14260 }, { "epoch": 0.028429013132729823, "grad_norm": 0.1974559873342514, "learning_rate": 0.002, "loss": 2.5934, "step": 14270 }, { "epoch": 0.028448935356368738, "grad_norm": 0.1567339152097702, "learning_rate": 0.002, "loss": 2.6038, "step": 14280 }, { "epoch": 0.02846885758000765, "grad_norm": 0.15618550777435303, "learning_rate": 0.002, "loss": 2.5983, "step": 14290 }, { "epoch": 0.028488779803646565, "grad_norm": 0.17209772765636444, "learning_rate": 0.002, "loss": 2.5863, "step": 14300 }, { "epoch": 0.028508702027285476, "grad_norm": 0.1624661386013031, "learning_rate": 0.002, "loss": 2.5981, "step": 14310 }, { "epoch": 0.02852862425092439, "grad_norm": 0.14480872452259064, "learning_rate": 0.002, "loss": 2.6019, "step": 14320 }, { "epoch": 0.028548546474563306, "grad_norm": 0.19105249643325806, "learning_rate": 0.002, "loss": 2.6065, "step": 14330 }, { "epoch": 0.028568468698202218, "grad_norm": 0.1583808958530426, "learning_rate": 0.002, "loss": 2.6017, "step": 14340 }, { "epoch": 0.028588390921841133, "grad_norm": 0.1505967080593109, "learning_rate": 0.002, "loss": 2.6042, "step": 14350 }, { "epoch": 0.028608313145480044, "grad_norm": 0.15391182899475098, "learning_rate": 0.002, "loss": 2.6008, "step": 14360 }, { "epoch": 0.02862823536911896, "grad_norm": 0.2067326307296753, "learning_rate": 0.002, "loss": 2.5901, "step": 14370 }, { "epoch": 0.028648157592757875, "grad_norm": 0.15579114854335785, "learning_rate": 0.002, "loss": 2.5887, "step": 14380 }, { "epoch": 0.028668079816396786, "grad_norm": 0.16351009905338287, "learning_rate": 0.002, "loss": 2.6028, "step": 14390 }, { "epoch": 0.0286880020400357, "grad_norm": 0.18199361860752106, "learning_rate": 0.002, "loss": 2.5901, "step": 14400 }, { "epoch": 0.028707924263674613, "grad_norm": 0.15752191841602325, "learning_rate": 0.002, "loss": 2.5997, "step": 14410 }, { "epoch": 0.028727846487313528, "grad_norm": 0.1659782975912094, "learning_rate": 0.002, "loss": 2.6073, "step": 14420 }, { "epoch": 0.028747768710952443, "grad_norm": 0.23244626820087433, "learning_rate": 0.002, "loss": 2.6135, "step": 14430 }, { "epoch": 0.028767690934591354, "grad_norm": 0.1931057870388031, "learning_rate": 0.002, "loss": 2.5942, "step": 14440 }, { "epoch": 0.02878761315823027, "grad_norm": 0.17485973238945007, "learning_rate": 0.002, "loss": 2.5939, "step": 14450 }, { "epoch": 0.028807535381869184, "grad_norm": 0.1809871345758438, "learning_rate": 0.002, "loss": 2.5856, "step": 14460 }, { "epoch": 0.028827457605508096, "grad_norm": 0.1441650390625, "learning_rate": 0.002, "loss": 2.5879, "step": 14470 }, { "epoch": 0.02884737982914701, "grad_norm": 0.17204692959785461, "learning_rate": 0.002, "loss": 2.599, "step": 14480 }, { "epoch": 0.028867302052785922, "grad_norm": 0.1694149672985077, "learning_rate": 0.002, "loss": 2.6077, "step": 14490 }, { "epoch": 0.028887224276424837, "grad_norm": 0.2015097439289093, "learning_rate": 0.002, "loss": 2.6025, "step": 14500 }, { "epoch": 0.028907146500063752, "grad_norm": 0.17583343386650085, "learning_rate": 0.002, "loss": 2.599, "step": 14510 }, { "epoch": 0.028927068723702664, "grad_norm": 0.1731332689523697, "learning_rate": 0.002, "loss": 2.5829, "step": 14520 }, { "epoch": 0.02894699094734158, "grad_norm": 0.16744281351566315, "learning_rate": 0.002, "loss": 2.6097, "step": 14530 }, { "epoch": 0.02896691317098049, "grad_norm": 0.1917903572320938, "learning_rate": 0.002, "loss": 2.5909, "step": 14540 }, { "epoch": 0.028986835394619406, "grad_norm": 0.1858767867088318, "learning_rate": 0.002, "loss": 2.6066, "step": 14550 }, { "epoch": 0.02900675761825832, "grad_norm": 0.21143405139446259, "learning_rate": 0.002, "loss": 2.6128, "step": 14560 }, { "epoch": 0.029026679841897232, "grad_norm": 0.180702805519104, "learning_rate": 0.002, "loss": 2.604, "step": 14570 }, { "epoch": 0.029046602065536147, "grad_norm": 0.15101896226406097, "learning_rate": 0.002, "loss": 2.6012, "step": 14580 }, { "epoch": 0.029066524289175062, "grad_norm": 0.16426506638526917, "learning_rate": 0.002, "loss": 2.5975, "step": 14590 }, { "epoch": 0.029086446512813974, "grad_norm": 0.1672566682100296, "learning_rate": 0.002, "loss": 2.5899, "step": 14600 }, { "epoch": 0.02910636873645289, "grad_norm": 0.16014070808887482, "learning_rate": 0.002, "loss": 2.6041, "step": 14610 }, { "epoch": 0.0291262909600918, "grad_norm": 0.1636802703142166, "learning_rate": 0.002, "loss": 2.5922, "step": 14620 }, { "epoch": 0.029146213183730715, "grad_norm": 0.303223580121994, "learning_rate": 0.002, "loss": 2.5975, "step": 14630 }, { "epoch": 0.02916613540736963, "grad_norm": 0.1676090806722641, "learning_rate": 0.002, "loss": 2.6058, "step": 14640 }, { "epoch": 0.029186057631008542, "grad_norm": 0.2378983348608017, "learning_rate": 0.002, "loss": 2.6088, "step": 14650 }, { "epoch": 0.029205979854647457, "grad_norm": 0.1535787135362625, "learning_rate": 0.002, "loss": 2.5985, "step": 14660 }, { "epoch": 0.02922590207828637, "grad_norm": 0.20743191242218018, "learning_rate": 0.002, "loss": 2.5961, "step": 14670 }, { "epoch": 0.029245824301925284, "grad_norm": 0.14681626856327057, "learning_rate": 0.002, "loss": 2.5843, "step": 14680 }, { "epoch": 0.0292657465255642, "grad_norm": 0.17741602659225464, "learning_rate": 0.002, "loss": 2.5867, "step": 14690 }, { "epoch": 0.02928566874920311, "grad_norm": 0.165219247341156, "learning_rate": 0.002, "loss": 2.6116, "step": 14700 }, { "epoch": 0.029305590972842025, "grad_norm": 0.1727760136127472, "learning_rate": 0.002, "loss": 2.5909, "step": 14710 }, { "epoch": 0.02932551319648094, "grad_norm": 0.1589241772890091, "learning_rate": 0.002, "loss": 2.6033, "step": 14720 }, { "epoch": 0.02934543542011985, "grad_norm": 0.19006863236427307, "learning_rate": 0.002, "loss": 2.5961, "step": 14730 }, { "epoch": 0.029365357643758767, "grad_norm": 0.19748544692993164, "learning_rate": 0.002, "loss": 2.6064, "step": 14740 }, { "epoch": 0.029385279867397678, "grad_norm": 0.1956217736005783, "learning_rate": 0.002, "loss": 2.5917, "step": 14750 }, { "epoch": 0.029405202091036593, "grad_norm": 0.17426761984825134, "learning_rate": 0.002, "loss": 2.5948, "step": 14760 }, { "epoch": 0.02942512431467551, "grad_norm": 0.17647835612297058, "learning_rate": 0.002, "loss": 2.5859, "step": 14770 }, { "epoch": 0.02944504653831442, "grad_norm": 0.19157521426677704, "learning_rate": 0.002, "loss": 2.6, "step": 14780 }, { "epoch": 0.029464968761953335, "grad_norm": 0.18149203062057495, "learning_rate": 0.002, "loss": 2.6058, "step": 14790 }, { "epoch": 0.029484890985592246, "grad_norm": 0.15718787908554077, "learning_rate": 0.002, "loss": 2.5928, "step": 14800 }, { "epoch": 0.02950481320923116, "grad_norm": 0.16518628597259521, "learning_rate": 0.002, "loss": 2.5757, "step": 14810 }, { "epoch": 0.029524735432870076, "grad_norm": 0.20701520144939423, "learning_rate": 0.002, "loss": 2.5972, "step": 14820 }, { "epoch": 0.029544657656508988, "grad_norm": 0.17353856563568115, "learning_rate": 0.002, "loss": 2.5961, "step": 14830 }, { "epoch": 0.029564579880147903, "grad_norm": 0.1799556016921997, "learning_rate": 0.002, "loss": 2.5978, "step": 14840 }, { "epoch": 0.029584502103786815, "grad_norm": 0.15911315381526947, "learning_rate": 0.002, "loss": 2.5908, "step": 14850 }, { "epoch": 0.02960442432742573, "grad_norm": 0.15979619324207306, "learning_rate": 0.002, "loss": 2.5896, "step": 14860 }, { "epoch": 0.029624346551064645, "grad_norm": 0.14567692577838898, "learning_rate": 0.002, "loss": 2.5997, "step": 14870 }, { "epoch": 0.029644268774703556, "grad_norm": 0.16024354100227356, "learning_rate": 0.002, "loss": 2.597, "step": 14880 }, { "epoch": 0.02966419099834247, "grad_norm": 0.1495259553194046, "learning_rate": 0.002, "loss": 2.5885, "step": 14890 }, { "epoch": 0.029684113221981386, "grad_norm": 0.20131687819957733, "learning_rate": 0.002, "loss": 2.5986, "step": 14900 }, { "epoch": 0.029704035445620298, "grad_norm": 0.3545282483100891, "learning_rate": 0.002, "loss": 2.5999, "step": 14910 }, { "epoch": 0.029723957669259213, "grad_norm": 0.16680030524730682, "learning_rate": 0.002, "loss": 2.597, "step": 14920 }, { "epoch": 0.029743879892898124, "grad_norm": 0.16965070366859436, "learning_rate": 0.002, "loss": 2.5798, "step": 14930 }, { "epoch": 0.02976380211653704, "grad_norm": 0.20690864324569702, "learning_rate": 0.002, "loss": 2.5962, "step": 14940 }, { "epoch": 0.029783724340175954, "grad_norm": 0.18136247992515564, "learning_rate": 0.002, "loss": 2.5957, "step": 14950 }, { "epoch": 0.029803646563814866, "grad_norm": 0.21694788336753845, "learning_rate": 0.002, "loss": 2.6136, "step": 14960 }, { "epoch": 0.02982356878745378, "grad_norm": 0.14590205252170563, "learning_rate": 0.002, "loss": 2.5986, "step": 14970 }, { "epoch": 0.029843491011092693, "grad_norm": 0.16678588092327118, "learning_rate": 0.002, "loss": 2.6039, "step": 14980 }, { "epoch": 0.029863413234731608, "grad_norm": 0.20425136387348175, "learning_rate": 0.002, "loss": 2.6061, "step": 14990 }, { "epoch": 0.029883335458370523, "grad_norm": 0.1579863578081131, "learning_rate": 0.002, "loss": 2.5916, "step": 15000 }, { "epoch": 0.029903257682009434, "grad_norm": 0.20202305912971497, "learning_rate": 0.002, "loss": 2.5985, "step": 15010 }, { "epoch": 0.02992317990564835, "grad_norm": 0.14356735348701477, "learning_rate": 0.002, "loss": 2.5966, "step": 15020 }, { "epoch": 0.029943102129287264, "grad_norm": 0.15708985924720764, "learning_rate": 0.002, "loss": 2.5911, "step": 15030 }, { "epoch": 0.029963024352926176, "grad_norm": 0.19218319654464722, "learning_rate": 0.002, "loss": 2.6068, "step": 15040 }, { "epoch": 0.02998294657656509, "grad_norm": 0.17553752660751343, "learning_rate": 0.002, "loss": 2.595, "step": 15050 }, { "epoch": 0.030002868800204002, "grad_norm": 0.1487298607826233, "learning_rate": 0.002, "loss": 2.5872, "step": 15060 }, { "epoch": 0.030022791023842917, "grad_norm": 0.18576587736606598, "learning_rate": 0.002, "loss": 2.5908, "step": 15070 }, { "epoch": 0.030042713247481832, "grad_norm": 0.1753302365541458, "learning_rate": 0.002, "loss": 2.6147, "step": 15080 }, { "epoch": 0.030062635471120744, "grad_norm": 0.1492171585559845, "learning_rate": 0.002, "loss": 2.5957, "step": 15090 }, { "epoch": 0.03008255769475966, "grad_norm": 0.19117528200149536, "learning_rate": 0.002, "loss": 2.5957, "step": 15100 }, { "epoch": 0.03010247991839857, "grad_norm": 0.21359169483184814, "learning_rate": 0.002, "loss": 2.5879, "step": 15110 }, { "epoch": 0.030122402142037485, "grad_norm": 0.16792263090610504, "learning_rate": 0.002, "loss": 2.5883, "step": 15120 }, { "epoch": 0.0301423243656764, "grad_norm": 0.1693137288093567, "learning_rate": 0.002, "loss": 2.5889, "step": 15130 }, { "epoch": 0.030162246589315312, "grad_norm": 0.1902046948671341, "learning_rate": 0.002, "loss": 2.6199, "step": 15140 }, { "epoch": 0.030182168812954227, "grad_norm": 0.16755244135856628, "learning_rate": 0.002, "loss": 2.5894, "step": 15150 }, { "epoch": 0.030202091036593142, "grad_norm": 0.18026019632816315, "learning_rate": 0.002, "loss": 2.6059, "step": 15160 }, { "epoch": 0.030222013260232054, "grad_norm": 0.19352537393569946, "learning_rate": 0.002, "loss": 2.5923, "step": 15170 }, { "epoch": 0.03024193548387097, "grad_norm": 0.1521030217409134, "learning_rate": 0.002, "loss": 2.58, "step": 15180 }, { "epoch": 0.03026185770750988, "grad_norm": 0.2014336735010147, "learning_rate": 0.002, "loss": 2.5995, "step": 15190 }, { "epoch": 0.030281779931148795, "grad_norm": 0.1893642693758011, "learning_rate": 0.002, "loss": 2.6027, "step": 15200 }, { "epoch": 0.03030170215478771, "grad_norm": 0.14217312633991241, "learning_rate": 0.002, "loss": 2.6037, "step": 15210 }, { "epoch": 0.030321624378426622, "grad_norm": 0.2020893692970276, "learning_rate": 0.002, "loss": 2.5901, "step": 15220 }, { "epoch": 0.030341546602065537, "grad_norm": 0.18969175219535828, "learning_rate": 0.002, "loss": 2.593, "step": 15230 }, { "epoch": 0.03036146882570445, "grad_norm": 0.16847455501556396, "learning_rate": 0.002, "loss": 2.5801, "step": 15240 }, { "epoch": 0.030381391049343363, "grad_norm": 0.16843585669994354, "learning_rate": 0.002, "loss": 2.6005, "step": 15250 }, { "epoch": 0.03040131327298228, "grad_norm": 0.18455900251865387, "learning_rate": 0.002, "loss": 2.591, "step": 15260 }, { "epoch": 0.03042123549662119, "grad_norm": 0.1681274026632309, "learning_rate": 0.002, "loss": 2.6036, "step": 15270 }, { "epoch": 0.030441157720260105, "grad_norm": 0.15949507057666779, "learning_rate": 0.002, "loss": 2.5926, "step": 15280 }, { "epoch": 0.030461079943899017, "grad_norm": 0.19107216596603394, "learning_rate": 0.002, "loss": 2.5774, "step": 15290 }, { "epoch": 0.03048100216753793, "grad_norm": 0.2197277694940567, "learning_rate": 0.002, "loss": 2.6078, "step": 15300 }, { "epoch": 0.030500924391176847, "grad_norm": 0.15119676291942596, "learning_rate": 0.002, "loss": 2.608, "step": 15310 }, { "epoch": 0.030520846614815758, "grad_norm": 0.19761207699775696, "learning_rate": 0.002, "loss": 2.5851, "step": 15320 }, { "epoch": 0.030540768838454673, "grad_norm": 0.14226780831813812, "learning_rate": 0.002, "loss": 2.5951, "step": 15330 }, { "epoch": 0.030560691062093588, "grad_norm": 0.1625441014766693, "learning_rate": 0.002, "loss": 2.5746, "step": 15340 }, { "epoch": 0.0305806132857325, "grad_norm": 0.21397849917411804, "learning_rate": 0.002, "loss": 2.5942, "step": 15350 }, { "epoch": 0.030600535509371415, "grad_norm": 0.18685199320316315, "learning_rate": 0.002, "loss": 2.6106, "step": 15360 }, { "epoch": 0.030620457733010326, "grad_norm": 0.16781142354011536, "learning_rate": 0.002, "loss": 2.5972, "step": 15370 }, { "epoch": 0.03064037995664924, "grad_norm": 0.1633199155330658, "learning_rate": 0.002, "loss": 2.5949, "step": 15380 }, { "epoch": 0.030660302180288156, "grad_norm": 0.15556229650974274, "learning_rate": 0.002, "loss": 2.5905, "step": 15390 }, { "epoch": 0.030680224403927068, "grad_norm": 0.2506594657897949, "learning_rate": 0.002, "loss": 2.594, "step": 15400 }, { "epoch": 0.030700146627565983, "grad_norm": 0.14672254025936127, "learning_rate": 0.002, "loss": 2.5927, "step": 15410 }, { "epoch": 0.030720068851204894, "grad_norm": 0.1640395075082779, "learning_rate": 0.002, "loss": 2.6025, "step": 15420 }, { "epoch": 0.03073999107484381, "grad_norm": 0.14907172322273254, "learning_rate": 0.002, "loss": 2.6097, "step": 15430 }, { "epoch": 0.030759913298482724, "grad_norm": 0.17620600759983063, "learning_rate": 0.002, "loss": 2.5891, "step": 15440 }, { "epoch": 0.030779835522121636, "grad_norm": 0.18820665776729584, "learning_rate": 0.002, "loss": 2.621, "step": 15450 }, { "epoch": 0.03079975774576055, "grad_norm": 0.17229057848453522, "learning_rate": 0.002, "loss": 2.5763, "step": 15460 }, { "epoch": 0.030819679969399466, "grad_norm": 0.20527024567127228, "learning_rate": 0.002, "loss": 2.5916, "step": 15470 }, { "epoch": 0.030839602193038378, "grad_norm": 0.13723082840442657, "learning_rate": 0.002, "loss": 2.5758, "step": 15480 }, { "epoch": 0.030859524416677293, "grad_norm": 0.21758294105529785, "learning_rate": 0.002, "loss": 2.5873, "step": 15490 }, { "epoch": 0.030879446640316204, "grad_norm": 0.17261186242103577, "learning_rate": 0.002, "loss": 2.601, "step": 15500 }, { "epoch": 0.03089936886395512, "grad_norm": 0.18693223595619202, "learning_rate": 0.002, "loss": 2.5779, "step": 15510 }, { "epoch": 0.030919291087594034, "grad_norm": 0.16453051567077637, "learning_rate": 0.002, "loss": 2.6033, "step": 15520 }, { "epoch": 0.030939213311232946, "grad_norm": 0.1843382567167282, "learning_rate": 0.002, "loss": 2.5947, "step": 15530 }, { "epoch": 0.03095913553487186, "grad_norm": 0.1517864167690277, "learning_rate": 0.002, "loss": 2.6007, "step": 15540 }, { "epoch": 0.030979057758510772, "grad_norm": 0.16629968583583832, "learning_rate": 0.002, "loss": 2.5953, "step": 15550 }, { "epoch": 0.030998979982149687, "grad_norm": 0.15524893999099731, "learning_rate": 0.002, "loss": 2.5735, "step": 15560 }, { "epoch": 0.031018902205788602, "grad_norm": 0.16839997470378876, "learning_rate": 0.002, "loss": 2.6041, "step": 15570 }, { "epoch": 0.031038824429427514, "grad_norm": 0.1759311556816101, "learning_rate": 0.002, "loss": 2.5976, "step": 15580 }, { "epoch": 0.03105874665306643, "grad_norm": 0.18765847384929657, "learning_rate": 0.002, "loss": 2.5997, "step": 15590 }, { "epoch": 0.031078668876705344, "grad_norm": 0.1457558572292328, "learning_rate": 0.002, "loss": 2.5879, "step": 15600 }, { "epoch": 0.031098591100344256, "grad_norm": 0.19509676098823547, "learning_rate": 0.002, "loss": 2.6035, "step": 15610 }, { "epoch": 0.03111851332398317, "grad_norm": 0.1504821479320526, "learning_rate": 0.002, "loss": 2.6066, "step": 15620 }, { "epoch": 0.031138435547622082, "grad_norm": 0.18330249190330505, "learning_rate": 0.002, "loss": 2.6049, "step": 15630 }, { "epoch": 0.031158357771260997, "grad_norm": 0.16188162565231323, "learning_rate": 0.002, "loss": 2.6004, "step": 15640 }, { "epoch": 0.031178279994899912, "grad_norm": 0.25655609369277954, "learning_rate": 0.002, "loss": 2.596, "step": 15650 }, { "epoch": 0.031198202218538824, "grad_norm": 0.1718389242887497, "learning_rate": 0.002, "loss": 2.5991, "step": 15660 }, { "epoch": 0.03121812444217774, "grad_norm": 0.17268699407577515, "learning_rate": 0.002, "loss": 2.6056, "step": 15670 }, { "epoch": 0.03123804666581665, "grad_norm": 0.17181377112865448, "learning_rate": 0.002, "loss": 2.5933, "step": 15680 }, { "epoch": 0.03125796888945557, "grad_norm": 0.15412838757038116, "learning_rate": 0.002, "loss": 2.5846, "step": 15690 }, { "epoch": 0.03127789111309448, "grad_norm": 0.1738101691007614, "learning_rate": 0.002, "loss": 2.5913, "step": 15700 }, { "epoch": 0.03129781333673339, "grad_norm": 0.14728879928588867, "learning_rate": 0.002, "loss": 2.6009, "step": 15710 }, { "epoch": 0.0313177355603723, "grad_norm": 0.1676713228225708, "learning_rate": 0.002, "loss": 2.5952, "step": 15720 }, { "epoch": 0.03133765778401122, "grad_norm": 0.22701027989387512, "learning_rate": 0.002, "loss": 2.6131, "step": 15730 }, { "epoch": 0.031357580007650133, "grad_norm": 0.1606195569038391, "learning_rate": 0.002, "loss": 2.5994, "step": 15740 }, { "epoch": 0.031377502231289045, "grad_norm": 0.19113373756408691, "learning_rate": 0.002, "loss": 2.5947, "step": 15750 }, { "epoch": 0.031397424454927964, "grad_norm": 0.1746813952922821, "learning_rate": 0.002, "loss": 2.594, "step": 15760 }, { "epoch": 0.031417346678566875, "grad_norm": 0.16621103882789612, "learning_rate": 0.002, "loss": 2.5784, "step": 15770 }, { "epoch": 0.03143726890220579, "grad_norm": 0.16461527347564697, "learning_rate": 0.002, "loss": 2.5924, "step": 15780 }, { "epoch": 0.031457191125844705, "grad_norm": 0.17262235283851624, "learning_rate": 0.002, "loss": 2.5995, "step": 15790 }, { "epoch": 0.03147711334948362, "grad_norm": 0.21009698510169983, "learning_rate": 0.002, "loss": 2.5877, "step": 15800 }, { "epoch": 0.03149703557312253, "grad_norm": 0.1565656214952469, "learning_rate": 0.002, "loss": 2.5933, "step": 15810 }, { "epoch": 0.03151695779676145, "grad_norm": 0.1577666848897934, "learning_rate": 0.002, "loss": 2.5856, "step": 15820 }, { "epoch": 0.03153688002040036, "grad_norm": 0.15604491531848907, "learning_rate": 0.002, "loss": 2.5945, "step": 15830 }, { "epoch": 0.03155680224403927, "grad_norm": 0.2282039225101471, "learning_rate": 0.002, "loss": 2.6045, "step": 15840 }, { "epoch": 0.03157672446767818, "grad_norm": 0.25223320722579956, "learning_rate": 0.002, "loss": 2.5885, "step": 15850 }, { "epoch": 0.0315966466913171, "grad_norm": 0.17919881641864777, "learning_rate": 0.002, "loss": 2.6071, "step": 15860 }, { "epoch": 0.03161656891495601, "grad_norm": 0.18784871697425842, "learning_rate": 0.002, "loss": 2.586, "step": 15870 }, { "epoch": 0.03163649113859492, "grad_norm": 0.16271664202213287, "learning_rate": 0.002, "loss": 2.5993, "step": 15880 }, { "epoch": 0.03165641336223384, "grad_norm": 0.17741522192955017, "learning_rate": 0.002, "loss": 2.5842, "step": 15890 }, { "epoch": 0.03167633558587275, "grad_norm": 0.1664050668478012, "learning_rate": 0.002, "loss": 2.603, "step": 15900 }, { "epoch": 0.031696257809511665, "grad_norm": 0.19772818684577942, "learning_rate": 0.002, "loss": 2.5944, "step": 15910 }, { "epoch": 0.03171618003315058, "grad_norm": 0.17111730575561523, "learning_rate": 0.002, "loss": 2.5856, "step": 15920 }, { "epoch": 0.031736102256789495, "grad_norm": 0.15413329005241394, "learning_rate": 0.002, "loss": 2.5899, "step": 15930 }, { "epoch": 0.031756024480428406, "grad_norm": 0.15272431075572968, "learning_rate": 0.002, "loss": 2.6011, "step": 15940 }, { "epoch": 0.031775946704067325, "grad_norm": 0.1726084053516388, "learning_rate": 0.002, "loss": 2.5857, "step": 15950 }, { "epoch": 0.031795868927706236, "grad_norm": 0.16464455425739288, "learning_rate": 0.002, "loss": 2.5985, "step": 15960 }, { "epoch": 0.03181579115134515, "grad_norm": 0.18328149616718292, "learning_rate": 0.002, "loss": 2.5904, "step": 15970 }, { "epoch": 0.03183571337498406, "grad_norm": 0.16842477023601532, "learning_rate": 0.002, "loss": 2.5903, "step": 15980 }, { "epoch": 0.03185563559862298, "grad_norm": 0.16529043018817902, "learning_rate": 0.002, "loss": 2.5956, "step": 15990 }, { "epoch": 0.03187555782226189, "grad_norm": 0.18519751727581024, "learning_rate": 0.002, "loss": 2.6127, "step": 16000 }, { "epoch": 0.0318954800459008, "grad_norm": 0.15607576072216034, "learning_rate": 0.002, "loss": 2.5967, "step": 16010 }, { "epoch": 0.03191540226953972, "grad_norm": 0.20774203538894653, "learning_rate": 0.002, "loss": 2.5981, "step": 16020 }, { "epoch": 0.03193532449317863, "grad_norm": 0.16940298676490784, "learning_rate": 0.002, "loss": 2.5951, "step": 16030 }, { "epoch": 0.03195524671681754, "grad_norm": 0.165387362241745, "learning_rate": 0.002, "loss": 2.5959, "step": 16040 }, { "epoch": 0.03197516894045646, "grad_norm": 0.19123907387256622, "learning_rate": 0.002, "loss": 2.5938, "step": 16050 }, { "epoch": 0.03199509116409537, "grad_norm": 0.15908463299274445, "learning_rate": 0.002, "loss": 2.5971, "step": 16060 }, { "epoch": 0.032015013387734284, "grad_norm": 0.19200719892978668, "learning_rate": 0.002, "loss": 2.6047, "step": 16070 }, { "epoch": 0.032034935611373196, "grad_norm": 0.16918157041072845, "learning_rate": 0.002, "loss": 2.5866, "step": 16080 }, { "epoch": 0.032054857835012114, "grad_norm": 0.16842877864837646, "learning_rate": 0.002, "loss": 2.6132, "step": 16090 }, { "epoch": 0.032074780058651026, "grad_norm": 0.15469032526016235, "learning_rate": 0.002, "loss": 2.6166, "step": 16100 }, { "epoch": 0.03209470228228994, "grad_norm": 0.1606515645980835, "learning_rate": 0.002, "loss": 2.5993, "step": 16110 }, { "epoch": 0.032114624505928856, "grad_norm": 0.1591905653476715, "learning_rate": 0.002, "loss": 2.5812, "step": 16120 }, { "epoch": 0.03213454672956777, "grad_norm": 0.20152366161346436, "learning_rate": 0.002, "loss": 2.6112, "step": 16130 }, { "epoch": 0.03215446895320668, "grad_norm": 0.15638156235218048, "learning_rate": 0.002, "loss": 2.5913, "step": 16140 }, { "epoch": 0.0321743911768456, "grad_norm": 0.18502411246299744, "learning_rate": 0.002, "loss": 2.587, "step": 16150 }, { "epoch": 0.03219431340048451, "grad_norm": 0.1777833253145218, "learning_rate": 0.002, "loss": 2.5928, "step": 16160 }, { "epoch": 0.03221423562412342, "grad_norm": 0.17788422107696533, "learning_rate": 0.002, "loss": 2.5899, "step": 16170 }, { "epoch": 0.03223415784776234, "grad_norm": 0.16628825664520264, "learning_rate": 0.002, "loss": 2.6031, "step": 16180 }, { "epoch": 0.03225408007140125, "grad_norm": 0.17220942676067352, "learning_rate": 0.002, "loss": 2.589, "step": 16190 }, { "epoch": 0.03227400229504016, "grad_norm": 0.16657111048698425, "learning_rate": 0.002, "loss": 2.6104, "step": 16200 }, { "epoch": 0.032293924518679074, "grad_norm": 0.19046594202518463, "learning_rate": 0.002, "loss": 2.5885, "step": 16210 }, { "epoch": 0.03231384674231799, "grad_norm": 0.16964979469776154, "learning_rate": 0.002, "loss": 2.6024, "step": 16220 }, { "epoch": 0.032333768965956904, "grad_norm": 0.19198264181613922, "learning_rate": 0.002, "loss": 2.5856, "step": 16230 }, { "epoch": 0.032353691189595815, "grad_norm": 0.15256455540657043, "learning_rate": 0.002, "loss": 2.5854, "step": 16240 }, { "epoch": 0.032373613413234734, "grad_norm": 0.1740190088748932, "learning_rate": 0.002, "loss": 2.5886, "step": 16250 }, { "epoch": 0.032393535636873645, "grad_norm": 0.21361097693443298, "learning_rate": 0.002, "loss": 2.5911, "step": 16260 }, { "epoch": 0.03241345786051256, "grad_norm": 0.19561073184013367, "learning_rate": 0.002, "loss": 2.6028, "step": 16270 }, { "epoch": 0.032433380084151475, "grad_norm": 0.14819832146167755, "learning_rate": 0.002, "loss": 2.5889, "step": 16280 }, { "epoch": 0.03245330230779039, "grad_norm": 0.204860657453537, "learning_rate": 0.002, "loss": 2.6008, "step": 16290 }, { "epoch": 0.0324732245314293, "grad_norm": 0.16990463435649872, "learning_rate": 0.002, "loss": 2.6058, "step": 16300 }, { "epoch": 0.03249314675506822, "grad_norm": 0.19358187913894653, "learning_rate": 0.002, "loss": 2.5974, "step": 16310 }, { "epoch": 0.03251306897870713, "grad_norm": 0.1626157909631729, "learning_rate": 0.002, "loss": 2.606, "step": 16320 }, { "epoch": 0.03253299120234604, "grad_norm": 0.17844276130199432, "learning_rate": 0.002, "loss": 2.5785, "step": 16330 }, { "epoch": 0.03255291342598495, "grad_norm": 0.14183737337589264, "learning_rate": 0.002, "loss": 2.598, "step": 16340 }, { "epoch": 0.03257283564962387, "grad_norm": 0.17414942383766174, "learning_rate": 0.002, "loss": 2.5999, "step": 16350 }, { "epoch": 0.03259275787326278, "grad_norm": 0.17126767337322235, "learning_rate": 0.002, "loss": 2.5945, "step": 16360 }, { "epoch": 0.03261268009690169, "grad_norm": 0.1647283136844635, "learning_rate": 0.002, "loss": 2.594, "step": 16370 }, { "epoch": 0.03263260232054061, "grad_norm": 0.18386870622634888, "learning_rate": 0.002, "loss": 2.6042, "step": 16380 }, { "epoch": 0.03265252454417952, "grad_norm": 0.1741904318332672, "learning_rate": 0.002, "loss": 2.5955, "step": 16390 }, { "epoch": 0.032672446767818435, "grad_norm": 0.17025955021381378, "learning_rate": 0.002, "loss": 2.5921, "step": 16400 }, { "epoch": 0.03269236899145735, "grad_norm": 0.19851677119731903, "learning_rate": 0.002, "loss": 2.6041, "step": 16410 }, { "epoch": 0.032712291215096265, "grad_norm": 0.19362226128578186, "learning_rate": 0.002, "loss": 2.5914, "step": 16420 }, { "epoch": 0.032732213438735176, "grad_norm": 0.16715137660503387, "learning_rate": 0.002, "loss": 2.588, "step": 16430 }, { "epoch": 0.032752135662374095, "grad_norm": 0.14847859740257263, "learning_rate": 0.002, "loss": 2.5933, "step": 16440 }, { "epoch": 0.032772057886013006, "grad_norm": 0.18118681013584137, "learning_rate": 0.002, "loss": 2.5915, "step": 16450 }, { "epoch": 0.03279198010965192, "grad_norm": 0.20593573153018951, "learning_rate": 0.002, "loss": 2.5844, "step": 16460 }, { "epoch": 0.03281190233329083, "grad_norm": 0.18852272629737854, "learning_rate": 0.002, "loss": 2.5844, "step": 16470 }, { "epoch": 0.03283182455692975, "grad_norm": 0.2187688648700714, "learning_rate": 0.002, "loss": 2.6016, "step": 16480 }, { "epoch": 0.03285174678056866, "grad_norm": 0.19137506186962128, "learning_rate": 0.002, "loss": 2.5836, "step": 16490 }, { "epoch": 0.03287166900420757, "grad_norm": 0.15154044330120087, "learning_rate": 0.002, "loss": 2.5913, "step": 16500 }, { "epoch": 0.03289159122784649, "grad_norm": 0.16244179010391235, "learning_rate": 0.002, "loss": 2.6161, "step": 16510 }, { "epoch": 0.0329115134514854, "grad_norm": 0.1823386400938034, "learning_rate": 0.002, "loss": 2.6028, "step": 16520 }, { "epoch": 0.03293143567512431, "grad_norm": 0.18295873701572418, "learning_rate": 0.002, "loss": 2.6016, "step": 16530 }, { "epoch": 0.03295135789876323, "grad_norm": 0.16870379447937012, "learning_rate": 0.002, "loss": 2.582, "step": 16540 }, { "epoch": 0.03297128012240214, "grad_norm": 0.1912563294172287, "learning_rate": 0.002, "loss": 2.6126, "step": 16550 }, { "epoch": 0.032991202346041054, "grad_norm": 0.14821133017539978, "learning_rate": 0.002, "loss": 2.5878, "step": 16560 }, { "epoch": 0.03301112456967997, "grad_norm": 0.18962927162647247, "learning_rate": 0.002, "loss": 2.5889, "step": 16570 }, { "epoch": 0.033031046793318884, "grad_norm": 0.15526294708251953, "learning_rate": 0.002, "loss": 2.5965, "step": 16580 }, { "epoch": 0.033050969016957796, "grad_norm": 0.18099461495876312, "learning_rate": 0.002, "loss": 2.5904, "step": 16590 }, { "epoch": 0.03307089124059671, "grad_norm": 0.198124498128891, "learning_rate": 0.002, "loss": 2.6071, "step": 16600 }, { "epoch": 0.033090813464235626, "grad_norm": 0.15617576241493225, "learning_rate": 0.002, "loss": 2.5943, "step": 16610 }, { "epoch": 0.03311073568787454, "grad_norm": 0.151605024933815, "learning_rate": 0.002, "loss": 2.596, "step": 16620 }, { "epoch": 0.03313065791151345, "grad_norm": 0.14631621539592743, "learning_rate": 0.002, "loss": 2.5997, "step": 16630 }, { "epoch": 0.03315058013515237, "grad_norm": 0.17086732387542725, "learning_rate": 0.002, "loss": 2.6006, "step": 16640 }, { "epoch": 0.03317050235879128, "grad_norm": 0.188404843211174, "learning_rate": 0.002, "loss": 2.5987, "step": 16650 }, { "epoch": 0.03319042458243019, "grad_norm": 0.15763485431671143, "learning_rate": 0.002, "loss": 2.6017, "step": 16660 }, { "epoch": 0.03321034680606911, "grad_norm": 0.17167873680591583, "learning_rate": 0.002, "loss": 2.5942, "step": 16670 }, { "epoch": 0.03323026902970802, "grad_norm": 0.18536128103733063, "learning_rate": 0.002, "loss": 2.5881, "step": 16680 }, { "epoch": 0.03325019125334693, "grad_norm": 0.19545894861221313, "learning_rate": 0.002, "loss": 2.5963, "step": 16690 }, { "epoch": 0.03327011347698585, "grad_norm": 0.16132111847400665, "learning_rate": 0.002, "loss": 2.5785, "step": 16700 }, { "epoch": 0.03329003570062476, "grad_norm": 0.220100536942482, "learning_rate": 0.002, "loss": 2.5919, "step": 16710 }, { "epoch": 0.033309957924263674, "grad_norm": 0.16424521803855896, "learning_rate": 0.002, "loss": 2.5973, "step": 16720 }, { "epoch": 0.033329880147902585, "grad_norm": 0.16102929413318634, "learning_rate": 0.002, "loss": 2.5929, "step": 16730 }, { "epoch": 0.033349802371541504, "grad_norm": 0.16831812262535095, "learning_rate": 0.002, "loss": 2.5994, "step": 16740 }, { "epoch": 0.033369724595180415, "grad_norm": 0.1614300161600113, "learning_rate": 0.002, "loss": 2.5989, "step": 16750 }, { "epoch": 0.03338964681881933, "grad_norm": 0.17894354462623596, "learning_rate": 0.002, "loss": 2.5893, "step": 16760 }, { "epoch": 0.033409569042458245, "grad_norm": 0.17095281183719635, "learning_rate": 0.002, "loss": 2.6141, "step": 16770 }, { "epoch": 0.03342949126609716, "grad_norm": 0.18713760375976562, "learning_rate": 0.002, "loss": 2.5969, "step": 16780 }, { "epoch": 0.03344941348973607, "grad_norm": 0.2445313185453415, "learning_rate": 0.002, "loss": 2.6043, "step": 16790 }, { "epoch": 0.03346933571337499, "grad_norm": 0.16731876134872437, "learning_rate": 0.002, "loss": 2.5918, "step": 16800 }, { "epoch": 0.0334892579370139, "grad_norm": 0.17391543090343475, "learning_rate": 0.002, "loss": 2.5831, "step": 16810 }, { "epoch": 0.03350918016065281, "grad_norm": 0.1725706160068512, "learning_rate": 0.002, "loss": 2.5913, "step": 16820 }, { "epoch": 0.03352910238429173, "grad_norm": 0.165979266166687, "learning_rate": 0.002, "loss": 2.5958, "step": 16830 }, { "epoch": 0.03354902460793064, "grad_norm": 0.1442907303571701, "learning_rate": 0.002, "loss": 2.5896, "step": 16840 }, { "epoch": 0.03356894683156955, "grad_norm": 0.21812224388122559, "learning_rate": 0.002, "loss": 2.5972, "step": 16850 }, { "epoch": 0.03358886905520846, "grad_norm": 0.18484051525592804, "learning_rate": 0.002, "loss": 2.6081, "step": 16860 }, { "epoch": 0.03360879127884738, "grad_norm": 0.15308777987957, "learning_rate": 0.002, "loss": 2.6019, "step": 16870 }, { "epoch": 0.03362871350248629, "grad_norm": 0.15832233428955078, "learning_rate": 0.002, "loss": 2.5937, "step": 16880 }, { "epoch": 0.033648635726125205, "grad_norm": 0.19223934412002563, "learning_rate": 0.002, "loss": 2.5831, "step": 16890 }, { "epoch": 0.03366855794976412, "grad_norm": 0.16574063897132874, "learning_rate": 0.002, "loss": 2.604, "step": 16900 }, { "epoch": 0.033688480173403035, "grad_norm": 0.18776585161685944, "learning_rate": 0.002, "loss": 2.5951, "step": 16910 }, { "epoch": 0.033708402397041946, "grad_norm": 0.17331500351428986, "learning_rate": 0.002, "loss": 2.5896, "step": 16920 }, { "epoch": 0.033728324620680865, "grad_norm": 0.18346357345581055, "learning_rate": 0.002, "loss": 2.5714, "step": 16930 }, { "epoch": 0.033748246844319776, "grad_norm": 0.16834142804145813, "learning_rate": 0.002, "loss": 2.6046, "step": 16940 }, { "epoch": 0.03376816906795869, "grad_norm": 0.14778897166252136, "learning_rate": 0.002, "loss": 2.5881, "step": 16950 }, { "epoch": 0.0337880912915976, "grad_norm": 0.19941195845603943, "learning_rate": 0.002, "loss": 2.6053, "step": 16960 }, { "epoch": 0.03380801351523652, "grad_norm": 0.16147226095199585, "learning_rate": 0.002, "loss": 2.5871, "step": 16970 }, { "epoch": 0.03382793573887543, "grad_norm": 0.18488207459449768, "learning_rate": 0.002, "loss": 2.5886, "step": 16980 }, { "epoch": 0.03384785796251434, "grad_norm": 0.17432567477226257, "learning_rate": 0.002, "loss": 2.5961, "step": 16990 }, { "epoch": 0.03386778018615326, "grad_norm": 0.17880715429782867, "learning_rate": 0.002, "loss": 2.5881, "step": 17000 }, { "epoch": 0.03388770240979217, "grad_norm": 0.17247635126113892, "learning_rate": 0.002, "loss": 2.5909, "step": 17010 }, { "epoch": 0.03390762463343108, "grad_norm": 0.18197642266750336, "learning_rate": 0.002, "loss": 2.5872, "step": 17020 }, { "epoch": 0.03392754685707, "grad_norm": 0.19222109019756317, "learning_rate": 0.002, "loss": 2.599, "step": 17030 }, { "epoch": 0.03394746908070891, "grad_norm": 0.19708707928657532, "learning_rate": 0.002, "loss": 2.588, "step": 17040 }, { "epoch": 0.033967391304347824, "grad_norm": 0.170964777469635, "learning_rate": 0.002, "loss": 2.5945, "step": 17050 }, { "epoch": 0.03398731352798674, "grad_norm": 0.18148577213287354, "learning_rate": 0.002, "loss": 2.592, "step": 17060 }, { "epoch": 0.034007235751625654, "grad_norm": 0.1552060842514038, "learning_rate": 0.002, "loss": 2.6006, "step": 17070 }, { "epoch": 0.034027157975264566, "grad_norm": 0.18835724890232086, "learning_rate": 0.002, "loss": 2.584, "step": 17080 }, { "epoch": 0.03404708019890348, "grad_norm": 0.17047102749347687, "learning_rate": 0.002, "loss": 2.5851, "step": 17090 }, { "epoch": 0.034067002422542396, "grad_norm": 0.18121661245822906, "learning_rate": 0.002, "loss": 2.6057, "step": 17100 }, { "epoch": 0.03408692464618131, "grad_norm": 0.1735120713710785, "learning_rate": 0.002, "loss": 2.5944, "step": 17110 }, { "epoch": 0.03410684686982022, "grad_norm": 0.14641137421131134, "learning_rate": 0.002, "loss": 2.5996, "step": 17120 }, { "epoch": 0.03412676909345914, "grad_norm": 0.2316562384366989, "learning_rate": 0.002, "loss": 2.6012, "step": 17130 }, { "epoch": 0.03414669131709805, "grad_norm": 0.1835334300994873, "learning_rate": 0.002, "loss": 2.5652, "step": 17140 }, { "epoch": 0.03416661354073696, "grad_norm": 0.14957782626152039, "learning_rate": 0.002, "loss": 2.5904, "step": 17150 }, { "epoch": 0.03418653576437588, "grad_norm": 0.18480093777179718, "learning_rate": 0.002, "loss": 2.5992, "step": 17160 }, { "epoch": 0.03420645798801479, "grad_norm": 0.1685805469751358, "learning_rate": 0.002, "loss": 2.6123, "step": 17170 }, { "epoch": 0.0342263802116537, "grad_norm": 0.1762377917766571, "learning_rate": 0.002, "loss": 2.6097, "step": 17180 }, { "epoch": 0.03424630243529262, "grad_norm": 0.1768418401479721, "learning_rate": 0.002, "loss": 2.5949, "step": 17190 }, { "epoch": 0.03426622465893153, "grad_norm": 0.18612495064735413, "learning_rate": 0.002, "loss": 2.6054, "step": 17200 }, { "epoch": 0.034286146882570444, "grad_norm": 0.16736429929733276, "learning_rate": 0.002, "loss": 2.5969, "step": 17210 }, { "epoch": 0.034306069106209355, "grad_norm": 0.19351547956466675, "learning_rate": 0.002, "loss": 2.6006, "step": 17220 }, { "epoch": 0.034325991329848274, "grad_norm": 0.17012427747249603, "learning_rate": 0.002, "loss": 2.5944, "step": 17230 }, { "epoch": 0.034345913553487185, "grad_norm": 0.15170828998088837, "learning_rate": 0.002, "loss": 2.5829, "step": 17240 }, { "epoch": 0.0343658357771261, "grad_norm": 0.1697607785463333, "learning_rate": 0.002, "loss": 2.603, "step": 17250 }, { "epoch": 0.034385758000765015, "grad_norm": 0.15304921567440033, "learning_rate": 0.002, "loss": 2.5827, "step": 17260 }, { "epoch": 0.03440568022440393, "grad_norm": 0.21091677248477936, "learning_rate": 0.002, "loss": 2.5946, "step": 17270 }, { "epoch": 0.03442560244804284, "grad_norm": 0.1927540898323059, "learning_rate": 0.002, "loss": 2.6093, "step": 17280 }, { "epoch": 0.03444552467168176, "grad_norm": 0.16568702459335327, "learning_rate": 0.002, "loss": 2.6098, "step": 17290 }, { "epoch": 0.03446544689532067, "grad_norm": 0.193705216050148, "learning_rate": 0.002, "loss": 2.5977, "step": 17300 }, { "epoch": 0.03448536911895958, "grad_norm": 0.1751696914434433, "learning_rate": 0.002, "loss": 2.6026, "step": 17310 }, { "epoch": 0.0345052913425985, "grad_norm": 0.1714276373386383, "learning_rate": 0.002, "loss": 2.5836, "step": 17320 }, { "epoch": 0.03452521356623741, "grad_norm": 0.18449655175209045, "learning_rate": 0.002, "loss": 2.5844, "step": 17330 }, { "epoch": 0.03454513578987632, "grad_norm": 0.16923348605632782, "learning_rate": 0.002, "loss": 2.5848, "step": 17340 }, { "epoch": 0.03456505801351523, "grad_norm": 0.21040689945220947, "learning_rate": 0.002, "loss": 2.5883, "step": 17350 }, { "epoch": 0.03458498023715415, "grad_norm": 0.21976429224014282, "learning_rate": 0.002, "loss": 2.6053, "step": 17360 }, { "epoch": 0.03460490246079306, "grad_norm": 0.16983681917190552, "learning_rate": 0.002, "loss": 2.5977, "step": 17370 }, { "epoch": 0.034624824684431975, "grad_norm": 0.17684364318847656, "learning_rate": 0.002, "loss": 2.605, "step": 17380 }, { "epoch": 0.03464474690807089, "grad_norm": 0.16985414922237396, "learning_rate": 0.002, "loss": 2.5898, "step": 17390 }, { "epoch": 0.034664669131709805, "grad_norm": 0.14482152462005615, "learning_rate": 0.002, "loss": 2.591, "step": 17400 }, { "epoch": 0.034684591355348716, "grad_norm": 0.20164291560649872, "learning_rate": 0.002, "loss": 2.5825, "step": 17410 }, { "epoch": 0.034704513578987635, "grad_norm": 0.14456160366535187, "learning_rate": 0.002, "loss": 2.5887, "step": 17420 }, { "epoch": 0.034724435802626546, "grad_norm": 0.1528194397687912, "learning_rate": 0.002, "loss": 2.6054, "step": 17430 }, { "epoch": 0.03474435802626546, "grad_norm": 0.16805389523506165, "learning_rate": 0.002, "loss": 2.6136, "step": 17440 }, { "epoch": 0.03476428024990438, "grad_norm": 0.1707071214914322, "learning_rate": 0.002, "loss": 2.5841, "step": 17450 }, { "epoch": 0.03478420247354329, "grad_norm": 0.15809352695941925, "learning_rate": 0.002, "loss": 2.6156, "step": 17460 }, { "epoch": 0.0348041246971822, "grad_norm": 0.18810974061489105, "learning_rate": 0.002, "loss": 2.5842, "step": 17470 }, { "epoch": 0.03482404692082111, "grad_norm": 0.1729336529970169, "learning_rate": 0.002, "loss": 2.594, "step": 17480 }, { "epoch": 0.03484396914446003, "grad_norm": 0.20026583969593048, "learning_rate": 0.002, "loss": 2.5914, "step": 17490 }, { "epoch": 0.03486389136809894, "grad_norm": 0.1712988317012787, "learning_rate": 0.002, "loss": 2.6039, "step": 17500 }, { "epoch": 0.03488381359173785, "grad_norm": 0.16809438169002533, "learning_rate": 0.002, "loss": 2.5956, "step": 17510 }, { "epoch": 0.03490373581537677, "grad_norm": 0.24435389041900635, "learning_rate": 0.002, "loss": 2.5778, "step": 17520 }, { "epoch": 0.03492365803901568, "grad_norm": 0.17308157682418823, "learning_rate": 0.002, "loss": 2.5726, "step": 17530 }, { "epoch": 0.034943580262654594, "grad_norm": 0.17763067781925201, "learning_rate": 0.002, "loss": 2.5983, "step": 17540 }, { "epoch": 0.03496350248629351, "grad_norm": 0.16075308620929718, "learning_rate": 0.002, "loss": 2.5903, "step": 17550 }, { "epoch": 0.034983424709932424, "grad_norm": 0.15937523543834686, "learning_rate": 0.002, "loss": 2.5937, "step": 17560 }, { "epoch": 0.035003346933571336, "grad_norm": 0.21856112778186798, "learning_rate": 0.002, "loss": 2.5989, "step": 17570 }, { "epoch": 0.035023269157210254, "grad_norm": 0.17703580856323242, "learning_rate": 0.002, "loss": 2.5993, "step": 17580 }, { "epoch": 0.035043191380849166, "grad_norm": 0.20682746171951294, "learning_rate": 0.002, "loss": 2.5891, "step": 17590 }, { "epoch": 0.03506311360448808, "grad_norm": 0.17384028434753418, "learning_rate": 0.002, "loss": 2.5818, "step": 17600 }, { "epoch": 0.03508303582812699, "grad_norm": 0.16914083063602448, "learning_rate": 0.002, "loss": 2.5875, "step": 17610 }, { "epoch": 0.03510295805176591, "grad_norm": 0.20971357822418213, "learning_rate": 0.002, "loss": 2.588, "step": 17620 }, { "epoch": 0.03512288027540482, "grad_norm": 0.16529028117656708, "learning_rate": 0.002, "loss": 2.5973, "step": 17630 }, { "epoch": 0.03514280249904373, "grad_norm": 0.18013766407966614, "learning_rate": 0.002, "loss": 2.599, "step": 17640 }, { "epoch": 0.03516272472268265, "grad_norm": 0.2038879245519638, "learning_rate": 0.002, "loss": 2.5798, "step": 17650 }, { "epoch": 0.03518264694632156, "grad_norm": 0.16345252096652985, "learning_rate": 0.002, "loss": 2.5961, "step": 17660 }, { "epoch": 0.03520256916996047, "grad_norm": 0.1660909801721573, "learning_rate": 0.002, "loss": 2.5931, "step": 17670 }, { "epoch": 0.03522249139359939, "grad_norm": 0.16061225533485413, "learning_rate": 0.002, "loss": 2.6016, "step": 17680 }, { "epoch": 0.0352424136172383, "grad_norm": 0.13988882303237915, "learning_rate": 0.002, "loss": 2.5966, "step": 17690 }, { "epoch": 0.035262335840877214, "grad_norm": 0.1841249018907547, "learning_rate": 0.002, "loss": 2.5763, "step": 17700 }, { "epoch": 0.03528225806451613, "grad_norm": 0.1690298467874527, "learning_rate": 0.002, "loss": 2.5977, "step": 17710 }, { "epoch": 0.035302180288155044, "grad_norm": 0.1770709902048111, "learning_rate": 0.002, "loss": 2.5883, "step": 17720 }, { "epoch": 0.035322102511793955, "grad_norm": 0.20095613598823547, "learning_rate": 0.002, "loss": 2.5857, "step": 17730 }, { "epoch": 0.03534202473543287, "grad_norm": 0.17478308081626892, "learning_rate": 0.002, "loss": 2.5901, "step": 17740 }, { "epoch": 0.035361946959071786, "grad_norm": 0.1591305285692215, "learning_rate": 0.002, "loss": 2.6061, "step": 17750 }, { "epoch": 0.0353818691827107, "grad_norm": 0.15509483218193054, "learning_rate": 0.002, "loss": 2.5857, "step": 17760 }, { "epoch": 0.03540179140634961, "grad_norm": 0.1910688430070877, "learning_rate": 0.002, "loss": 2.5934, "step": 17770 }, { "epoch": 0.03542171362998853, "grad_norm": 0.17488771677017212, "learning_rate": 0.002, "loss": 2.5809, "step": 17780 }, { "epoch": 0.03544163585362744, "grad_norm": 0.16155527532100677, "learning_rate": 0.002, "loss": 2.5866, "step": 17790 }, { "epoch": 0.03546155807726635, "grad_norm": 0.18926723301410675, "learning_rate": 0.002, "loss": 2.5793, "step": 17800 }, { "epoch": 0.03548148030090527, "grad_norm": 0.1706748902797699, "learning_rate": 0.002, "loss": 2.588, "step": 17810 }, { "epoch": 0.03550140252454418, "grad_norm": 0.17661549150943756, "learning_rate": 0.002, "loss": 2.5882, "step": 17820 }, { "epoch": 0.03552132474818309, "grad_norm": 0.19945114850997925, "learning_rate": 0.002, "loss": 2.6116, "step": 17830 }, { "epoch": 0.03554124697182201, "grad_norm": 0.18729406595230103, "learning_rate": 0.002, "loss": 2.6038, "step": 17840 }, { "epoch": 0.03556116919546092, "grad_norm": 0.14103591442108154, "learning_rate": 0.002, "loss": 2.5914, "step": 17850 }, { "epoch": 0.03558109141909983, "grad_norm": 0.1930791437625885, "learning_rate": 0.002, "loss": 2.6022, "step": 17860 }, { "epoch": 0.035601013642738745, "grad_norm": 0.20442454516887665, "learning_rate": 0.002, "loss": 2.5891, "step": 17870 }, { "epoch": 0.03562093586637766, "grad_norm": 0.1685013473033905, "learning_rate": 0.002, "loss": 2.5896, "step": 17880 }, { "epoch": 0.035640858090016575, "grad_norm": 0.15119190514087677, "learning_rate": 0.002, "loss": 2.5933, "step": 17890 }, { "epoch": 0.03566078031365549, "grad_norm": 0.2135598063468933, "learning_rate": 0.002, "loss": 2.5928, "step": 17900 }, { "epoch": 0.035680702537294405, "grad_norm": 0.16726385056972504, "learning_rate": 0.002, "loss": 2.5903, "step": 17910 }, { "epoch": 0.03570062476093332, "grad_norm": 0.1861872375011444, "learning_rate": 0.002, "loss": 2.5926, "step": 17920 }, { "epoch": 0.03572054698457223, "grad_norm": 0.1782727986574173, "learning_rate": 0.002, "loss": 2.6029, "step": 17930 }, { "epoch": 0.03574046920821115, "grad_norm": 0.17058341205120087, "learning_rate": 0.002, "loss": 2.5724, "step": 17940 }, { "epoch": 0.03576039143185006, "grad_norm": 0.1637953370809555, "learning_rate": 0.002, "loss": 2.5843, "step": 17950 }, { "epoch": 0.03578031365548897, "grad_norm": 0.1710672676563263, "learning_rate": 0.002, "loss": 2.598, "step": 17960 }, { "epoch": 0.03580023587912788, "grad_norm": 0.1362610012292862, "learning_rate": 0.002, "loss": 2.5891, "step": 17970 }, { "epoch": 0.0358201581027668, "grad_norm": 0.1821565330028534, "learning_rate": 0.002, "loss": 2.6021, "step": 17980 }, { "epoch": 0.03584008032640571, "grad_norm": 0.16136609017848969, "learning_rate": 0.002, "loss": 2.5923, "step": 17990 }, { "epoch": 0.03586000255004462, "grad_norm": 0.17181065678596497, "learning_rate": 0.002, "loss": 2.6087, "step": 18000 }, { "epoch": 0.03587992477368354, "grad_norm": 0.15580661594867706, "learning_rate": 0.002, "loss": 2.5845, "step": 18010 }, { "epoch": 0.03589984699732245, "grad_norm": 0.16447147727012634, "learning_rate": 0.002, "loss": 2.5989, "step": 18020 }, { "epoch": 0.035919769220961364, "grad_norm": 0.19890831410884857, "learning_rate": 0.002, "loss": 2.5883, "step": 18030 }, { "epoch": 0.03593969144460028, "grad_norm": 0.16495126485824585, "learning_rate": 0.002, "loss": 2.5944, "step": 18040 }, { "epoch": 0.035959613668239195, "grad_norm": 0.15772895514965057, "learning_rate": 0.002, "loss": 2.5851, "step": 18050 }, { "epoch": 0.035979535891878106, "grad_norm": 0.1587798148393631, "learning_rate": 0.002, "loss": 2.5859, "step": 18060 }, { "epoch": 0.035999458115517025, "grad_norm": 0.14990608394145966, "learning_rate": 0.002, "loss": 2.6207, "step": 18070 }, { "epoch": 0.036019380339155936, "grad_norm": 0.1427256017923355, "learning_rate": 0.002, "loss": 2.5978, "step": 18080 }, { "epoch": 0.03603930256279485, "grad_norm": 0.22555844485759735, "learning_rate": 0.002, "loss": 2.5929, "step": 18090 }, { "epoch": 0.03605922478643376, "grad_norm": 0.16299912333488464, "learning_rate": 0.002, "loss": 2.603, "step": 18100 }, { "epoch": 0.03607914701007268, "grad_norm": 0.15448325872421265, "learning_rate": 0.002, "loss": 2.5696, "step": 18110 }, { "epoch": 0.03609906923371159, "grad_norm": 0.17461678385734558, "learning_rate": 0.002, "loss": 2.5881, "step": 18120 }, { "epoch": 0.0361189914573505, "grad_norm": 0.22201475501060486, "learning_rate": 0.002, "loss": 2.5964, "step": 18130 }, { "epoch": 0.03613891368098942, "grad_norm": 0.16447152197360992, "learning_rate": 0.002, "loss": 2.586, "step": 18140 }, { "epoch": 0.03615883590462833, "grad_norm": 0.15339769423007965, "learning_rate": 0.002, "loss": 2.5877, "step": 18150 }, { "epoch": 0.03617875812826724, "grad_norm": 0.20736418664455414, "learning_rate": 0.002, "loss": 2.5904, "step": 18160 }, { "epoch": 0.03619868035190616, "grad_norm": 0.16051174700260162, "learning_rate": 0.002, "loss": 2.5854, "step": 18170 }, { "epoch": 0.03621860257554507, "grad_norm": 0.17121052742004395, "learning_rate": 0.002, "loss": 2.5833, "step": 18180 }, { "epoch": 0.036238524799183984, "grad_norm": 0.18952293694019318, "learning_rate": 0.002, "loss": 2.5965, "step": 18190 }, { "epoch": 0.0362584470228229, "grad_norm": 0.16152521967887878, "learning_rate": 0.002, "loss": 2.5952, "step": 18200 }, { "epoch": 0.036278369246461814, "grad_norm": 0.15876957774162292, "learning_rate": 0.002, "loss": 2.5875, "step": 18210 }, { "epoch": 0.036298291470100726, "grad_norm": 0.18165120482444763, "learning_rate": 0.002, "loss": 2.5941, "step": 18220 }, { "epoch": 0.03631821369373964, "grad_norm": 0.15487006306648254, "learning_rate": 0.002, "loss": 2.5938, "step": 18230 }, { "epoch": 0.036338135917378556, "grad_norm": 0.19487743079662323, "learning_rate": 0.002, "loss": 2.5754, "step": 18240 }, { "epoch": 0.03635805814101747, "grad_norm": 0.1925029158592224, "learning_rate": 0.002, "loss": 2.5834, "step": 18250 }, { "epoch": 0.03637798036465638, "grad_norm": 0.17195390164852142, "learning_rate": 0.002, "loss": 2.6054, "step": 18260 }, { "epoch": 0.0363979025882953, "grad_norm": 0.20549359917640686, "learning_rate": 0.002, "loss": 2.5839, "step": 18270 }, { "epoch": 0.03641782481193421, "grad_norm": 0.16636763513088226, "learning_rate": 0.002, "loss": 2.5784, "step": 18280 }, { "epoch": 0.03643774703557312, "grad_norm": 0.15559421479701996, "learning_rate": 0.002, "loss": 2.5846, "step": 18290 }, { "epoch": 0.03645766925921204, "grad_norm": 0.1795450747013092, "learning_rate": 0.002, "loss": 2.5884, "step": 18300 }, { "epoch": 0.03647759148285095, "grad_norm": 0.1759076565504074, "learning_rate": 0.002, "loss": 2.59, "step": 18310 }, { "epoch": 0.03649751370648986, "grad_norm": 0.1871751844882965, "learning_rate": 0.002, "loss": 2.5981, "step": 18320 }, { "epoch": 0.03651743593012878, "grad_norm": 0.14538684487342834, "learning_rate": 0.002, "loss": 2.5996, "step": 18330 }, { "epoch": 0.03653735815376769, "grad_norm": 0.22825001180171967, "learning_rate": 0.002, "loss": 2.5834, "step": 18340 }, { "epoch": 0.036557280377406604, "grad_norm": 0.19165608286857605, "learning_rate": 0.002, "loss": 2.6015, "step": 18350 }, { "epoch": 0.036577202601045515, "grad_norm": 0.15397511422634125, "learning_rate": 0.002, "loss": 2.5748, "step": 18360 }, { "epoch": 0.036597124824684434, "grad_norm": 0.15582017600536346, "learning_rate": 0.002, "loss": 2.6008, "step": 18370 }, { "epoch": 0.036617047048323345, "grad_norm": 0.22421053051948547, "learning_rate": 0.002, "loss": 2.5948, "step": 18380 }, { "epoch": 0.03663696927196226, "grad_norm": 0.1643954962491989, "learning_rate": 0.002, "loss": 2.5989, "step": 18390 }, { "epoch": 0.036656891495601175, "grad_norm": 0.19826462864875793, "learning_rate": 0.002, "loss": 2.6029, "step": 18400 }, { "epoch": 0.03667681371924009, "grad_norm": 0.16158907115459442, "learning_rate": 0.002, "loss": 2.5936, "step": 18410 }, { "epoch": 0.036696735942879, "grad_norm": 0.15354909002780914, "learning_rate": 0.002, "loss": 2.6052, "step": 18420 }, { "epoch": 0.03671665816651792, "grad_norm": 0.1650199145078659, "learning_rate": 0.002, "loss": 2.5972, "step": 18430 }, { "epoch": 0.03673658039015683, "grad_norm": 0.22590121626853943, "learning_rate": 0.002, "loss": 2.5952, "step": 18440 }, { "epoch": 0.03675650261379574, "grad_norm": 0.20093077421188354, "learning_rate": 0.002, "loss": 2.6133, "step": 18450 }, { "epoch": 0.03677642483743466, "grad_norm": 0.16103215515613556, "learning_rate": 0.002, "loss": 2.5748, "step": 18460 }, { "epoch": 0.03679634706107357, "grad_norm": 0.1689419001340866, "learning_rate": 0.002, "loss": 2.5952, "step": 18470 }, { "epoch": 0.03681626928471248, "grad_norm": 0.18676352500915527, "learning_rate": 0.002, "loss": 2.603, "step": 18480 }, { "epoch": 0.03683619150835139, "grad_norm": 0.19596242904663086, "learning_rate": 0.002, "loss": 2.583, "step": 18490 }, { "epoch": 0.03685611373199031, "grad_norm": 0.19892986118793488, "learning_rate": 0.002, "loss": 2.6083, "step": 18500 }, { "epoch": 0.03687603595562922, "grad_norm": 0.15449456870555878, "learning_rate": 0.002, "loss": 2.5901, "step": 18510 }, { "epoch": 0.036895958179268135, "grad_norm": 0.22846853733062744, "learning_rate": 0.002, "loss": 2.5966, "step": 18520 }, { "epoch": 0.03691588040290705, "grad_norm": 0.14354228973388672, "learning_rate": 0.002, "loss": 2.5921, "step": 18530 }, { "epoch": 0.036935802626545965, "grad_norm": 0.17180068790912628, "learning_rate": 0.002, "loss": 2.5901, "step": 18540 }, { "epoch": 0.036955724850184876, "grad_norm": 0.20418661832809448, "learning_rate": 0.002, "loss": 2.5931, "step": 18550 }, { "epoch": 0.036975647073823795, "grad_norm": 0.26340970396995544, "learning_rate": 0.002, "loss": 2.5979, "step": 18560 }, { "epoch": 0.036995569297462706, "grad_norm": 0.20776113867759705, "learning_rate": 0.002, "loss": 2.5888, "step": 18570 }, { "epoch": 0.03701549152110162, "grad_norm": 0.15014685690402985, "learning_rate": 0.002, "loss": 2.5954, "step": 18580 }, { "epoch": 0.037035413744740536, "grad_norm": 0.15700028836727142, "learning_rate": 0.002, "loss": 2.5795, "step": 18590 }, { "epoch": 0.03705533596837945, "grad_norm": 0.18181212246418, "learning_rate": 0.002, "loss": 2.5755, "step": 18600 }, { "epoch": 0.03707525819201836, "grad_norm": 0.15004025399684906, "learning_rate": 0.002, "loss": 2.5846, "step": 18610 }, { "epoch": 0.03709518041565727, "grad_norm": 0.19790680706501007, "learning_rate": 0.002, "loss": 2.5834, "step": 18620 }, { "epoch": 0.03711510263929619, "grad_norm": 0.1495388150215149, "learning_rate": 0.002, "loss": 2.5732, "step": 18630 }, { "epoch": 0.0371350248629351, "grad_norm": 0.18085427582263947, "learning_rate": 0.002, "loss": 2.5845, "step": 18640 }, { "epoch": 0.03715494708657401, "grad_norm": 0.29285159707069397, "learning_rate": 0.002, "loss": 2.5914, "step": 18650 }, { "epoch": 0.03717486931021293, "grad_norm": 0.1613306701183319, "learning_rate": 0.002, "loss": 2.6023, "step": 18660 }, { "epoch": 0.03719479153385184, "grad_norm": 0.15127554535865784, "learning_rate": 0.002, "loss": 2.5968, "step": 18670 }, { "epoch": 0.037214713757490754, "grad_norm": 0.1806434988975525, "learning_rate": 0.002, "loss": 2.5938, "step": 18680 }, { "epoch": 0.03723463598112967, "grad_norm": 0.13921359181404114, "learning_rate": 0.002, "loss": 2.5805, "step": 18690 }, { "epoch": 0.037254558204768584, "grad_norm": 0.18503038585186005, "learning_rate": 0.002, "loss": 2.5864, "step": 18700 }, { "epoch": 0.037274480428407496, "grad_norm": 0.1892700493335724, "learning_rate": 0.002, "loss": 2.5897, "step": 18710 }, { "epoch": 0.037294402652046414, "grad_norm": 0.15609437227249146, "learning_rate": 0.002, "loss": 2.5888, "step": 18720 }, { "epoch": 0.037314324875685326, "grad_norm": 0.1513087898492813, "learning_rate": 0.002, "loss": 2.5883, "step": 18730 }, { "epoch": 0.03733424709932424, "grad_norm": 0.18246246874332428, "learning_rate": 0.002, "loss": 2.5898, "step": 18740 }, { "epoch": 0.03735416932296315, "grad_norm": 0.21945632994174957, "learning_rate": 0.002, "loss": 2.583, "step": 18750 }, { "epoch": 0.03737409154660207, "grad_norm": 0.1424155831336975, "learning_rate": 0.002, "loss": 2.5917, "step": 18760 }, { "epoch": 0.03739401377024098, "grad_norm": 0.23057612776756287, "learning_rate": 0.002, "loss": 2.5877, "step": 18770 }, { "epoch": 0.03741393599387989, "grad_norm": 0.17476430535316467, "learning_rate": 0.002, "loss": 2.5906, "step": 18780 }, { "epoch": 0.03743385821751881, "grad_norm": 0.16787873208522797, "learning_rate": 0.002, "loss": 2.5845, "step": 18790 }, { "epoch": 0.03745378044115772, "grad_norm": 0.16060347855091095, "learning_rate": 0.002, "loss": 2.6016, "step": 18800 }, { "epoch": 0.03747370266479663, "grad_norm": 0.1831161230802536, "learning_rate": 0.002, "loss": 2.5927, "step": 18810 }, { "epoch": 0.03749362488843555, "grad_norm": 0.22608795762062073, "learning_rate": 0.002, "loss": 2.5967, "step": 18820 }, { "epoch": 0.03751354711207446, "grad_norm": 0.1549803465604782, "learning_rate": 0.002, "loss": 2.5783, "step": 18830 }, { "epoch": 0.037533469335713374, "grad_norm": 0.18306364119052887, "learning_rate": 0.002, "loss": 2.6178, "step": 18840 }, { "epoch": 0.03755339155935229, "grad_norm": 0.15847843885421753, "learning_rate": 0.002, "loss": 2.5833, "step": 18850 }, { "epoch": 0.037573313782991204, "grad_norm": 0.16042549908161163, "learning_rate": 0.002, "loss": 2.6117, "step": 18860 }, { "epoch": 0.037593236006630115, "grad_norm": 0.1755245178937912, "learning_rate": 0.002, "loss": 2.6016, "step": 18870 }, { "epoch": 0.03761315823026903, "grad_norm": 0.1618748903274536, "learning_rate": 0.002, "loss": 2.5846, "step": 18880 }, { "epoch": 0.037633080453907945, "grad_norm": 0.1500900536775589, "learning_rate": 0.002, "loss": 2.5924, "step": 18890 }, { "epoch": 0.03765300267754686, "grad_norm": 0.18327359855175018, "learning_rate": 0.002, "loss": 2.5864, "step": 18900 }, { "epoch": 0.03767292490118577, "grad_norm": 0.16561457514762878, "learning_rate": 0.002, "loss": 2.5904, "step": 18910 }, { "epoch": 0.03769284712482469, "grad_norm": 0.15860654413700104, "learning_rate": 0.002, "loss": 2.5862, "step": 18920 }, { "epoch": 0.0377127693484636, "grad_norm": 0.15797200798988342, "learning_rate": 0.002, "loss": 2.6073, "step": 18930 }, { "epoch": 0.03773269157210251, "grad_norm": 0.20180484652519226, "learning_rate": 0.002, "loss": 2.5747, "step": 18940 }, { "epoch": 0.03775261379574143, "grad_norm": 0.14895591139793396, "learning_rate": 0.002, "loss": 2.5871, "step": 18950 }, { "epoch": 0.03777253601938034, "grad_norm": 0.2219354212284088, "learning_rate": 0.002, "loss": 2.591, "step": 18960 }, { "epoch": 0.03779245824301925, "grad_norm": 0.16537466645240784, "learning_rate": 0.002, "loss": 2.5823, "step": 18970 }, { "epoch": 0.03781238046665816, "grad_norm": 0.18362130224704742, "learning_rate": 0.002, "loss": 2.5818, "step": 18980 }, { "epoch": 0.03783230269029708, "grad_norm": 0.19387193024158478, "learning_rate": 0.002, "loss": 2.5896, "step": 18990 }, { "epoch": 0.03785222491393599, "grad_norm": 0.15313315391540527, "learning_rate": 0.002, "loss": 2.5779, "step": 19000 }, { "epoch": 0.037872147137574905, "grad_norm": 0.17667146027088165, "learning_rate": 0.002, "loss": 2.5907, "step": 19010 }, { "epoch": 0.03789206936121382, "grad_norm": 0.1656194031238556, "learning_rate": 0.002, "loss": 2.5883, "step": 19020 }, { "epoch": 0.037911991584852735, "grad_norm": 0.16264741122722626, "learning_rate": 0.002, "loss": 2.5937, "step": 19030 }, { "epoch": 0.037931913808491646, "grad_norm": 0.19980396330356598, "learning_rate": 0.002, "loss": 2.5857, "step": 19040 }, { "epoch": 0.037951836032130565, "grad_norm": 0.18683376908302307, "learning_rate": 0.002, "loss": 2.5941, "step": 19050 }, { "epoch": 0.037971758255769476, "grad_norm": 0.16678814589977264, "learning_rate": 0.002, "loss": 2.5777, "step": 19060 }, { "epoch": 0.03799168047940839, "grad_norm": 0.17123880982398987, "learning_rate": 0.002, "loss": 2.5901, "step": 19070 }, { "epoch": 0.038011602703047306, "grad_norm": 0.1518276482820511, "learning_rate": 0.002, "loss": 2.5991, "step": 19080 }, { "epoch": 0.03803152492668622, "grad_norm": 0.179530531167984, "learning_rate": 0.002, "loss": 2.6062, "step": 19090 }, { "epoch": 0.03805144715032513, "grad_norm": 0.16272874176502228, "learning_rate": 0.002, "loss": 2.5963, "step": 19100 }, { "epoch": 0.03807136937396404, "grad_norm": 0.17457318305969238, "learning_rate": 0.002, "loss": 2.583, "step": 19110 }, { "epoch": 0.03809129159760296, "grad_norm": 0.180558979511261, "learning_rate": 0.002, "loss": 2.5918, "step": 19120 }, { "epoch": 0.03811121382124187, "grad_norm": 0.15383599698543549, "learning_rate": 0.002, "loss": 2.5813, "step": 19130 }, { "epoch": 0.03813113604488078, "grad_norm": 0.177476167678833, "learning_rate": 0.002, "loss": 2.5802, "step": 19140 }, { "epoch": 0.0381510582685197, "grad_norm": 0.1649060696363449, "learning_rate": 0.002, "loss": 2.588, "step": 19150 }, { "epoch": 0.03817098049215861, "grad_norm": 0.15417365729808807, "learning_rate": 0.002, "loss": 2.5884, "step": 19160 }, { "epoch": 0.038190902715797524, "grad_norm": 0.1863427460193634, "learning_rate": 0.002, "loss": 2.604, "step": 19170 }, { "epoch": 0.03821082493943644, "grad_norm": 0.20226968824863434, "learning_rate": 0.002, "loss": 2.5836, "step": 19180 }, { "epoch": 0.038230747163075354, "grad_norm": 0.17075064778327942, "learning_rate": 0.002, "loss": 2.6003, "step": 19190 }, { "epoch": 0.038250669386714266, "grad_norm": 0.16465531289577484, "learning_rate": 0.002, "loss": 2.5923, "step": 19200 }, { "epoch": 0.038270591610353184, "grad_norm": 0.1766914427280426, "learning_rate": 0.002, "loss": 2.5816, "step": 19210 }, { "epoch": 0.038290513833992096, "grad_norm": 0.16604061424732208, "learning_rate": 0.002, "loss": 2.5743, "step": 19220 }, { "epoch": 0.03831043605763101, "grad_norm": 0.18755577504634857, "learning_rate": 0.002, "loss": 2.6006, "step": 19230 }, { "epoch": 0.03833035828126992, "grad_norm": 0.17711696028709412, "learning_rate": 0.002, "loss": 2.5904, "step": 19240 }, { "epoch": 0.03835028050490884, "grad_norm": 0.16555126011371613, "learning_rate": 0.002, "loss": 2.5895, "step": 19250 }, { "epoch": 0.03837020272854775, "grad_norm": 0.18986506760120392, "learning_rate": 0.002, "loss": 2.5859, "step": 19260 }, { "epoch": 0.03839012495218666, "grad_norm": 0.20091530680656433, "learning_rate": 0.002, "loss": 2.6085, "step": 19270 }, { "epoch": 0.03841004717582558, "grad_norm": 0.16784630715847015, "learning_rate": 0.002, "loss": 2.5902, "step": 19280 }, { "epoch": 0.03842996939946449, "grad_norm": 0.19499671459197998, "learning_rate": 0.002, "loss": 2.5955, "step": 19290 }, { "epoch": 0.0384498916231034, "grad_norm": 0.19497308135032654, "learning_rate": 0.002, "loss": 2.6046, "step": 19300 }, { "epoch": 0.03846981384674232, "grad_norm": 0.1810404658317566, "learning_rate": 0.002, "loss": 2.5917, "step": 19310 }, { "epoch": 0.03848973607038123, "grad_norm": 0.16557082533836365, "learning_rate": 0.002, "loss": 2.6031, "step": 19320 }, { "epoch": 0.038509658294020144, "grad_norm": 0.20696036517620087, "learning_rate": 0.002, "loss": 2.6025, "step": 19330 }, { "epoch": 0.03852958051765906, "grad_norm": 0.1716865450143814, "learning_rate": 0.002, "loss": 2.5956, "step": 19340 }, { "epoch": 0.038549502741297974, "grad_norm": 0.15223722159862518, "learning_rate": 0.002, "loss": 2.5885, "step": 19350 }, { "epoch": 0.038569424964936885, "grad_norm": 0.17496252059936523, "learning_rate": 0.002, "loss": 2.5946, "step": 19360 }, { "epoch": 0.0385893471885758, "grad_norm": 0.20800995826721191, "learning_rate": 0.002, "loss": 2.5874, "step": 19370 }, { "epoch": 0.038609269412214715, "grad_norm": 0.14872685074806213, "learning_rate": 0.002, "loss": 2.5849, "step": 19380 }, { "epoch": 0.03862919163585363, "grad_norm": 0.17782026529312134, "learning_rate": 0.002, "loss": 2.5858, "step": 19390 }, { "epoch": 0.03864911385949254, "grad_norm": 0.16193988919258118, "learning_rate": 0.002, "loss": 2.595, "step": 19400 }, { "epoch": 0.03866903608313146, "grad_norm": 0.1837720274925232, "learning_rate": 0.002, "loss": 2.5934, "step": 19410 }, { "epoch": 0.03868895830677037, "grad_norm": 0.1727377474308014, "learning_rate": 0.002, "loss": 2.5869, "step": 19420 }, { "epoch": 0.03870888053040928, "grad_norm": 0.14658468961715698, "learning_rate": 0.002, "loss": 2.5847, "step": 19430 }, { "epoch": 0.0387288027540482, "grad_norm": 0.15621447563171387, "learning_rate": 0.002, "loss": 2.5829, "step": 19440 }, { "epoch": 0.03874872497768711, "grad_norm": 0.21681709587574005, "learning_rate": 0.002, "loss": 2.5948, "step": 19450 }, { "epoch": 0.03876864720132602, "grad_norm": 0.1780635267496109, "learning_rate": 0.002, "loss": 2.6098, "step": 19460 }, { "epoch": 0.03878856942496494, "grad_norm": 0.17545074224472046, "learning_rate": 0.002, "loss": 2.5921, "step": 19470 }, { "epoch": 0.03880849164860385, "grad_norm": 0.14982739090919495, "learning_rate": 0.002, "loss": 2.5789, "step": 19480 }, { "epoch": 0.03882841387224276, "grad_norm": 0.17288658022880554, "learning_rate": 0.002, "loss": 2.5878, "step": 19490 }, { "epoch": 0.038848336095881675, "grad_norm": 0.20985034108161926, "learning_rate": 0.002, "loss": 2.5964, "step": 19500 }, { "epoch": 0.03886825831952059, "grad_norm": 0.15685713291168213, "learning_rate": 0.002, "loss": 2.5825, "step": 19510 }, { "epoch": 0.038888180543159505, "grad_norm": 0.21286186575889587, "learning_rate": 0.002, "loss": 2.5849, "step": 19520 }, { "epoch": 0.038908102766798416, "grad_norm": 0.159754678606987, "learning_rate": 0.002, "loss": 2.5973, "step": 19530 }, { "epoch": 0.038928024990437335, "grad_norm": 0.1547863930463791, "learning_rate": 0.002, "loss": 2.5876, "step": 19540 }, { "epoch": 0.038947947214076246, "grad_norm": 0.2251441925764084, "learning_rate": 0.002, "loss": 2.5801, "step": 19550 }, { "epoch": 0.03896786943771516, "grad_norm": 0.15600897371768951, "learning_rate": 0.002, "loss": 2.5835, "step": 19560 }, { "epoch": 0.038987791661354076, "grad_norm": 0.1911911964416504, "learning_rate": 0.002, "loss": 2.6058, "step": 19570 }, { "epoch": 0.03900771388499299, "grad_norm": 0.18484772741794586, "learning_rate": 0.002, "loss": 2.577, "step": 19580 }, { "epoch": 0.0390276361086319, "grad_norm": 0.21335437893867493, "learning_rate": 0.002, "loss": 2.5841, "step": 19590 }, { "epoch": 0.03904755833227082, "grad_norm": 0.15553885698318481, "learning_rate": 0.002, "loss": 2.5815, "step": 19600 }, { "epoch": 0.03906748055590973, "grad_norm": 0.15542958676815033, "learning_rate": 0.002, "loss": 2.5883, "step": 19610 }, { "epoch": 0.03908740277954864, "grad_norm": 0.16031914949417114, "learning_rate": 0.002, "loss": 2.583, "step": 19620 }, { "epoch": 0.03910732500318755, "grad_norm": 0.15005317330360413, "learning_rate": 0.002, "loss": 2.5744, "step": 19630 }, { "epoch": 0.03912724722682647, "grad_norm": 0.1888498216867447, "learning_rate": 0.002, "loss": 2.5805, "step": 19640 }, { "epoch": 0.03914716945046538, "grad_norm": 0.16924908757209778, "learning_rate": 0.002, "loss": 2.5808, "step": 19650 }, { "epoch": 0.039167091674104294, "grad_norm": 0.19005204737186432, "learning_rate": 0.002, "loss": 2.5999, "step": 19660 }, { "epoch": 0.03918701389774321, "grad_norm": 0.21278274059295654, "learning_rate": 0.002, "loss": 2.5774, "step": 19670 }, { "epoch": 0.039206936121382124, "grad_norm": 0.14524345099925995, "learning_rate": 0.002, "loss": 2.5859, "step": 19680 }, { "epoch": 0.039226858345021036, "grad_norm": 0.17115844786167145, "learning_rate": 0.002, "loss": 2.5894, "step": 19690 }, { "epoch": 0.039246780568659954, "grad_norm": 0.1599634736776352, "learning_rate": 0.002, "loss": 2.5882, "step": 19700 }, { "epoch": 0.039266702792298866, "grad_norm": 0.15251046419143677, "learning_rate": 0.002, "loss": 2.5936, "step": 19710 }, { "epoch": 0.03928662501593778, "grad_norm": 0.15343736112117767, "learning_rate": 0.002, "loss": 2.5801, "step": 19720 }, { "epoch": 0.039306547239576696, "grad_norm": 0.20250532031059265, "learning_rate": 0.002, "loss": 2.5802, "step": 19730 }, { "epoch": 0.03932646946321561, "grad_norm": 0.15998180210590363, "learning_rate": 0.002, "loss": 2.5928, "step": 19740 }, { "epoch": 0.03934639168685452, "grad_norm": 0.18419991433620453, "learning_rate": 0.002, "loss": 2.5933, "step": 19750 }, { "epoch": 0.03936631391049343, "grad_norm": 0.1497587412595749, "learning_rate": 0.002, "loss": 2.5833, "step": 19760 }, { "epoch": 0.03938623613413235, "grad_norm": 0.1991940140724182, "learning_rate": 0.002, "loss": 2.5945, "step": 19770 }, { "epoch": 0.03940615835777126, "grad_norm": 0.17207680642604828, "learning_rate": 0.002, "loss": 2.5906, "step": 19780 }, { "epoch": 0.03942608058141017, "grad_norm": 0.19131368398666382, "learning_rate": 0.002, "loss": 2.5909, "step": 19790 }, { "epoch": 0.03944600280504909, "grad_norm": 0.1434231549501419, "learning_rate": 0.002, "loss": 2.5717, "step": 19800 }, { "epoch": 0.039465925028688, "grad_norm": 0.15742211043834686, "learning_rate": 0.002, "loss": 2.6071, "step": 19810 }, { "epoch": 0.039485847252326914, "grad_norm": 0.19936054944992065, "learning_rate": 0.002, "loss": 2.5913, "step": 19820 }, { "epoch": 0.03950576947596583, "grad_norm": 0.16151420772075653, "learning_rate": 0.002, "loss": 2.592, "step": 19830 }, { "epoch": 0.039525691699604744, "grad_norm": 0.15998855233192444, "learning_rate": 0.002, "loss": 2.5961, "step": 19840 }, { "epoch": 0.039545613923243655, "grad_norm": 0.15366485714912415, "learning_rate": 0.002, "loss": 2.5904, "step": 19850 }, { "epoch": 0.03956553614688257, "grad_norm": 0.1762085258960724, "learning_rate": 0.002, "loss": 2.5892, "step": 19860 }, { "epoch": 0.039585458370521485, "grad_norm": 0.16085729002952576, "learning_rate": 0.002, "loss": 2.5877, "step": 19870 }, { "epoch": 0.0396053805941604, "grad_norm": 0.1915021687746048, "learning_rate": 0.002, "loss": 2.5963, "step": 19880 }, { "epoch": 0.03962530281779931, "grad_norm": 0.16086824238300323, "learning_rate": 0.002, "loss": 2.591, "step": 19890 }, { "epoch": 0.03964522504143823, "grad_norm": 0.16790014505386353, "learning_rate": 0.002, "loss": 2.5907, "step": 19900 }, { "epoch": 0.03966514726507714, "grad_norm": 0.16268044710159302, "learning_rate": 0.002, "loss": 2.5927, "step": 19910 }, { "epoch": 0.03968506948871605, "grad_norm": 0.15615953505039215, "learning_rate": 0.002, "loss": 2.586, "step": 19920 }, { "epoch": 0.03970499171235497, "grad_norm": 0.15287083387374878, "learning_rate": 0.002, "loss": 2.5849, "step": 19930 }, { "epoch": 0.03972491393599388, "grad_norm": 0.20666325092315674, "learning_rate": 0.002, "loss": 2.6037, "step": 19940 }, { "epoch": 0.03974483615963279, "grad_norm": 0.1728454977273941, "learning_rate": 0.002, "loss": 2.5807, "step": 19950 }, { "epoch": 0.03976475838327171, "grad_norm": 0.14654797315597534, "learning_rate": 0.002, "loss": 2.5849, "step": 19960 }, { "epoch": 0.03978468060691062, "grad_norm": 0.20536507666110992, "learning_rate": 0.002, "loss": 2.5915, "step": 19970 }, { "epoch": 0.03980460283054953, "grad_norm": 0.16752125322818756, "learning_rate": 0.002, "loss": 2.5982, "step": 19980 }, { "epoch": 0.039824525054188445, "grad_norm": 0.18312618136405945, "learning_rate": 0.002, "loss": 2.5823, "step": 19990 }, { "epoch": 0.03984444727782736, "grad_norm": 0.17783497273921967, "learning_rate": 0.002, "loss": 2.595, "step": 20000 }, { "epoch": 0.039864369501466275, "grad_norm": 0.14462098479270935, "learning_rate": 0.002, "loss": 2.6002, "step": 20010 }, { "epoch": 0.039884291725105186, "grad_norm": 0.18995055556297302, "learning_rate": 0.002, "loss": 2.6051, "step": 20020 }, { "epoch": 0.039904213948744105, "grad_norm": 0.17462964355945587, "learning_rate": 0.002, "loss": 2.5946, "step": 20030 }, { "epoch": 0.03992413617238302, "grad_norm": 0.1632544994354248, "learning_rate": 0.002, "loss": 2.6008, "step": 20040 }, { "epoch": 0.03994405839602193, "grad_norm": 0.19744031131267548, "learning_rate": 0.002, "loss": 2.5986, "step": 20050 }, { "epoch": 0.03996398061966085, "grad_norm": 0.13665111362934113, "learning_rate": 0.002, "loss": 2.5897, "step": 20060 }, { "epoch": 0.03998390284329976, "grad_norm": 0.16466331481933594, "learning_rate": 0.002, "loss": 2.5938, "step": 20070 }, { "epoch": 0.04000382506693867, "grad_norm": 0.14680899679660797, "learning_rate": 0.002, "loss": 2.5894, "step": 20080 }, { "epoch": 0.04002374729057759, "grad_norm": 0.20171670615673065, "learning_rate": 0.002, "loss": 2.5917, "step": 20090 }, { "epoch": 0.0400436695142165, "grad_norm": 0.1722014993429184, "learning_rate": 0.002, "loss": 2.5916, "step": 20100 }, { "epoch": 0.04006359173785541, "grad_norm": 0.14521053433418274, "learning_rate": 0.002, "loss": 2.5957, "step": 20110 }, { "epoch": 0.04008351396149432, "grad_norm": 0.1710224151611328, "learning_rate": 0.002, "loss": 2.5909, "step": 20120 }, { "epoch": 0.04010343618513324, "grad_norm": 0.16517460346221924, "learning_rate": 0.002, "loss": 2.6004, "step": 20130 }, { "epoch": 0.04012335840877215, "grad_norm": 0.1719328612089157, "learning_rate": 0.002, "loss": 2.5877, "step": 20140 }, { "epoch": 0.040143280632411064, "grad_norm": 0.15460214018821716, "learning_rate": 0.002, "loss": 2.6011, "step": 20150 }, { "epoch": 0.04016320285604998, "grad_norm": 0.21432745456695557, "learning_rate": 0.002, "loss": 2.594, "step": 20160 }, { "epoch": 0.040183125079688894, "grad_norm": 0.2900466024875641, "learning_rate": 0.002, "loss": 2.5938, "step": 20170 }, { "epoch": 0.040203047303327806, "grad_norm": 0.17593669891357422, "learning_rate": 0.002, "loss": 2.5971, "step": 20180 }, { "epoch": 0.040222969526966725, "grad_norm": 0.16279630362987518, "learning_rate": 0.002, "loss": 2.6022, "step": 20190 }, { "epoch": 0.040242891750605636, "grad_norm": 0.17978501319885254, "learning_rate": 0.002, "loss": 2.609, "step": 20200 }, { "epoch": 0.04026281397424455, "grad_norm": 0.21060647070407867, "learning_rate": 0.002, "loss": 2.5883, "step": 20210 }, { "epoch": 0.040282736197883466, "grad_norm": 0.14441685378551483, "learning_rate": 0.002, "loss": 2.5924, "step": 20220 }, { "epoch": 0.04030265842152238, "grad_norm": 0.14241203665733337, "learning_rate": 0.002, "loss": 2.5933, "step": 20230 }, { "epoch": 0.04032258064516129, "grad_norm": 0.17189879715442657, "learning_rate": 0.002, "loss": 2.6059, "step": 20240 }, { "epoch": 0.0403425028688002, "grad_norm": 0.17244724929332733, "learning_rate": 0.002, "loss": 2.5961, "step": 20250 }, { "epoch": 0.04036242509243912, "grad_norm": 0.17870332300662994, "learning_rate": 0.002, "loss": 2.589, "step": 20260 }, { "epoch": 0.04038234731607803, "grad_norm": 0.17066043615341187, "learning_rate": 0.002, "loss": 2.6015, "step": 20270 }, { "epoch": 0.04040226953971694, "grad_norm": 0.17466798424720764, "learning_rate": 0.002, "loss": 2.584, "step": 20280 }, { "epoch": 0.04042219176335586, "grad_norm": 0.19186867773532867, "learning_rate": 0.002, "loss": 2.5998, "step": 20290 }, { "epoch": 0.04044211398699477, "grad_norm": 0.18571807444095612, "learning_rate": 0.002, "loss": 2.5784, "step": 20300 }, { "epoch": 0.040462036210633684, "grad_norm": 0.1612861007452011, "learning_rate": 0.002, "loss": 2.6029, "step": 20310 }, { "epoch": 0.0404819584342726, "grad_norm": 0.20065948367118835, "learning_rate": 0.002, "loss": 2.5931, "step": 20320 }, { "epoch": 0.040501880657911514, "grad_norm": 0.1780698150396347, "learning_rate": 0.002, "loss": 2.5933, "step": 20330 }, { "epoch": 0.040521802881550426, "grad_norm": 0.15845727920532227, "learning_rate": 0.002, "loss": 2.5833, "step": 20340 }, { "epoch": 0.040541725105189344, "grad_norm": 0.1933976411819458, "learning_rate": 0.002, "loss": 2.5926, "step": 20350 }, { "epoch": 0.040561647328828256, "grad_norm": 0.20418326556682587, "learning_rate": 0.002, "loss": 2.5873, "step": 20360 }, { "epoch": 0.04058156955246717, "grad_norm": 0.17577707767486572, "learning_rate": 0.002, "loss": 2.5901, "step": 20370 }, { "epoch": 0.04060149177610608, "grad_norm": 0.15475083887577057, "learning_rate": 0.002, "loss": 2.5945, "step": 20380 }, { "epoch": 0.040621413999745, "grad_norm": 0.1826654076576233, "learning_rate": 0.002, "loss": 2.5889, "step": 20390 }, { "epoch": 0.04064133622338391, "grad_norm": 0.19316264986991882, "learning_rate": 0.002, "loss": 2.5865, "step": 20400 }, { "epoch": 0.04066125844702282, "grad_norm": 0.1608094573020935, "learning_rate": 0.002, "loss": 2.5923, "step": 20410 }, { "epoch": 0.04068118067066174, "grad_norm": 0.1749347448348999, "learning_rate": 0.002, "loss": 2.582, "step": 20420 }, { "epoch": 0.04070110289430065, "grad_norm": 0.16690313816070557, "learning_rate": 0.002, "loss": 2.5815, "step": 20430 }, { "epoch": 0.04072102511793956, "grad_norm": 0.15870697796344757, "learning_rate": 0.002, "loss": 2.5888, "step": 20440 }, { "epoch": 0.04074094734157848, "grad_norm": 0.15861132740974426, "learning_rate": 0.002, "loss": 2.5875, "step": 20450 }, { "epoch": 0.04076086956521739, "grad_norm": 0.15311278402805328, "learning_rate": 0.002, "loss": 2.5941, "step": 20460 }, { "epoch": 0.0407807917888563, "grad_norm": 0.15291649103164673, "learning_rate": 0.002, "loss": 2.6036, "step": 20470 }, { "epoch": 0.04080071401249522, "grad_norm": 0.16745483875274658, "learning_rate": 0.002, "loss": 2.5984, "step": 20480 }, { "epoch": 0.040820636236134134, "grad_norm": 0.17243003845214844, "learning_rate": 0.002, "loss": 2.5922, "step": 20490 }, { "epoch": 0.040840558459773045, "grad_norm": 0.21324896812438965, "learning_rate": 0.002, "loss": 2.5957, "step": 20500 }, { "epoch": 0.04086048068341196, "grad_norm": 0.1531812995672226, "learning_rate": 0.002, "loss": 2.5974, "step": 20510 }, { "epoch": 0.040880402907050875, "grad_norm": 0.1774512529373169, "learning_rate": 0.002, "loss": 2.6017, "step": 20520 }, { "epoch": 0.04090032513068979, "grad_norm": 0.16525787115097046, "learning_rate": 0.002, "loss": 2.5865, "step": 20530 }, { "epoch": 0.0409202473543287, "grad_norm": 0.16457080841064453, "learning_rate": 0.002, "loss": 2.5922, "step": 20540 }, { "epoch": 0.04094016957796762, "grad_norm": 0.14104540646076202, "learning_rate": 0.002, "loss": 2.585, "step": 20550 }, { "epoch": 0.04096009180160653, "grad_norm": 0.1954895704984665, "learning_rate": 0.002, "loss": 2.5992, "step": 20560 }, { "epoch": 0.04098001402524544, "grad_norm": 0.18948110938072205, "learning_rate": 0.002, "loss": 2.5914, "step": 20570 }, { "epoch": 0.04099993624888436, "grad_norm": 0.16076840460300446, "learning_rate": 0.002, "loss": 2.5851, "step": 20580 }, { "epoch": 0.04101985847252327, "grad_norm": 0.1781962811946869, "learning_rate": 0.002, "loss": 2.5982, "step": 20590 }, { "epoch": 0.04103978069616218, "grad_norm": 0.1818939447402954, "learning_rate": 0.002, "loss": 2.6035, "step": 20600 }, { "epoch": 0.0410597029198011, "grad_norm": 0.14348381757736206, "learning_rate": 0.002, "loss": 2.5976, "step": 20610 }, { "epoch": 0.04107962514344001, "grad_norm": 0.18779441714286804, "learning_rate": 0.002, "loss": 2.5821, "step": 20620 }, { "epoch": 0.04109954736707892, "grad_norm": 0.15073147416114807, "learning_rate": 0.002, "loss": 2.5766, "step": 20630 }, { "epoch": 0.041119469590717835, "grad_norm": 0.19527722895145416, "learning_rate": 0.002, "loss": 2.588, "step": 20640 }, { "epoch": 0.04113939181435675, "grad_norm": 0.19182170927524567, "learning_rate": 0.002, "loss": 2.5772, "step": 20650 }, { "epoch": 0.041159314037995665, "grad_norm": 0.16185228526592255, "learning_rate": 0.002, "loss": 2.5947, "step": 20660 }, { "epoch": 0.041179236261634576, "grad_norm": 0.22933192551136017, "learning_rate": 0.002, "loss": 2.5974, "step": 20670 }, { "epoch": 0.041199158485273495, "grad_norm": 0.1526186466217041, "learning_rate": 0.002, "loss": 2.5966, "step": 20680 }, { "epoch": 0.041219080708912406, "grad_norm": 0.17400678992271423, "learning_rate": 0.002, "loss": 2.6066, "step": 20690 }, { "epoch": 0.04123900293255132, "grad_norm": 0.2028559446334839, "learning_rate": 0.002, "loss": 2.6004, "step": 20700 }, { "epoch": 0.041258925156190236, "grad_norm": 0.15328533947467804, "learning_rate": 0.002, "loss": 2.5926, "step": 20710 }, { "epoch": 0.04127884737982915, "grad_norm": 0.17444726824760437, "learning_rate": 0.002, "loss": 2.5975, "step": 20720 }, { "epoch": 0.04129876960346806, "grad_norm": 0.2013855129480362, "learning_rate": 0.002, "loss": 2.5885, "step": 20730 }, { "epoch": 0.04131869182710698, "grad_norm": 0.16077370941638947, "learning_rate": 0.002, "loss": 2.5815, "step": 20740 }, { "epoch": 0.04133861405074589, "grad_norm": 0.2075040638446808, "learning_rate": 0.002, "loss": 2.5858, "step": 20750 }, { "epoch": 0.0413585362743848, "grad_norm": 0.1740034520626068, "learning_rate": 0.002, "loss": 2.6015, "step": 20760 }, { "epoch": 0.04137845849802371, "grad_norm": 0.18525883555412292, "learning_rate": 0.002, "loss": 2.5819, "step": 20770 }, { "epoch": 0.04139838072166263, "grad_norm": 0.1655520349740982, "learning_rate": 0.002, "loss": 2.5989, "step": 20780 }, { "epoch": 0.04141830294530154, "grad_norm": 0.14338445663452148, "learning_rate": 0.002, "loss": 2.5968, "step": 20790 }, { "epoch": 0.041438225168940454, "grad_norm": 0.18300363421440125, "learning_rate": 0.002, "loss": 2.5894, "step": 20800 }, { "epoch": 0.04145814739257937, "grad_norm": 0.22016815841197968, "learning_rate": 0.002, "loss": 2.5949, "step": 20810 }, { "epoch": 0.041478069616218284, "grad_norm": 0.1752375215291977, "learning_rate": 0.002, "loss": 2.5917, "step": 20820 }, { "epoch": 0.041497991839857196, "grad_norm": 0.18319879472255707, "learning_rate": 0.002, "loss": 2.5778, "step": 20830 }, { "epoch": 0.041517914063496114, "grad_norm": 0.22201286256313324, "learning_rate": 0.002, "loss": 2.5893, "step": 20840 }, { "epoch": 0.041537836287135026, "grad_norm": 0.15855243802070618, "learning_rate": 0.002, "loss": 2.5935, "step": 20850 }, { "epoch": 0.04155775851077394, "grad_norm": 0.18448887765407562, "learning_rate": 0.002, "loss": 2.5884, "step": 20860 }, { "epoch": 0.04157768073441285, "grad_norm": 0.19256645441055298, "learning_rate": 0.002, "loss": 2.5953, "step": 20870 }, { "epoch": 0.04159760295805177, "grad_norm": 0.15901917219161987, "learning_rate": 0.002, "loss": 2.5885, "step": 20880 }, { "epoch": 0.04161752518169068, "grad_norm": 0.17401766777038574, "learning_rate": 0.002, "loss": 2.5936, "step": 20890 }, { "epoch": 0.04163744740532959, "grad_norm": 0.18281641602516174, "learning_rate": 0.002, "loss": 2.6012, "step": 20900 }, { "epoch": 0.04165736962896851, "grad_norm": 0.21530452370643616, "learning_rate": 0.002, "loss": 2.5986, "step": 20910 }, { "epoch": 0.04167729185260742, "grad_norm": 0.19188453257083893, "learning_rate": 0.002, "loss": 2.5874, "step": 20920 }, { "epoch": 0.04169721407624633, "grad_norm": 0.15226691961288452, "learning_rate": 0.002, "loss": 2.5922, "step": 20930 }, { "epoch": 0.04171713629988525, "grad_norm": 0.16789066791534424, "learning_rate": 0.002, "loss": 2.5876, "step": 20940 }, { "epoch": 0.04173705852352416, "grad_norm": 0.20164760947227478, "learning_rate": 0.002, "loss": 2.5936, "step": 20950 }, { "epoch": 0.041756980747163074, "grad_norm": 0.16657480597496033, "learning_rate": 0.002, "loss": 2.5867, "step": 20960 }, { "epoch": 0.04177690297080199, "grad_norm": 0.1939626932144165, "learning_rate": 0.002, "loss": 2.6006, "step": 20970 }, { "epoch": 0.041796825194440904, "grad_norm": 0.14400118589401245, "learning_rate": 0.002, "loss": 2.583, "step": 20980 }, { "epoch": 0.041816747418079815, "grad_norm": 0.2080538272857666, "learning_rate": 0.002, "loss": 2.597, "step": 20990 }, { "epoch": 0.04183666964171873, "grad_norm": 0.19694621860980988, "learning_rate": 0.002, "loss": 2.5832, "step": 21000 }, { "epoch": 0.041856591865357645, "grad_norm": 0.16136275231838226, "learning_rate": 0.002, "loss": 2.6013, "step": 21010 }, { "epoch": 0.04187651408899656, "grad_norm": 0.2134607881307602, "learning_rate": 0.002, "loss": 2.5939, "step": 21020 }, { "epoch": 0.04189643631263547, "grad_norm": 0.17515185475349426, "learning_rate": 0.002, "loss": 2.5859, "step": 21030 }, { "epoch": 0.04191635853627439, "grad_norm": 0.20859172940254211, "learning_rate": 0.002, "loss": 2.589, "step": 21040 }, { "epoch": 0.0419362807599133, "grad_norm": 0.1829386204481125, "learning_rate": 0.002, "loss": 2.5954, "step": 21050 }, { "epoch": 0.04195620298355221, "grad_norm": 0.17364102602005005, "learning_rate": 0.002, "loss": 2.6009, "step": 21060 }, { "epoch": 0.04197612520719113, "grad_norm": 0.15884391963481903, "learning_rate": 0.002, "loss": 2.5924, "step": 21070 }, { "epoch": 0.04199604743083004, "grad_norm": 0.17501866817474365, "learning_rate": 0.002, "loss": 2.5875, "step": 21080 }, { "epoch": 0.04201596965446895, "grad_norm": 0.15534871816635132, "learning_rate": 0.002, "loss": 2.5904, "step": 21090 }, { "epoch": 0.04203589187810787, "grad_norm": 0.2019946426153183, "learning_rate": 0.002, "loss": 2.6013, "step": 21100 }, { "epoch": 0.04205581410174678, "grad_norm": 0.14708568155765533, "learning_rate": 0.002, "loss": 2.5872, "step": 21110 }, { "epoch": 0.04207573632538569, "grad_norm": 0.17505763471126556, "learning_rate": 0.002, "loss": 2.5855, "step": 21120 }, { "epoch": 0.042095658549024605, "grad_norm": 0.19701771438121796, "learning_rate": 0.002, "loss": 2.5977, "step": 21130 }, { "epoch": 0.04211558077266352, "grad_norm": 0.17172235250473022, "learning_rate": 0.002, "loss": 2.5863, "step": 21140 }, { "epoch": 0.042135502996302435, "grad_norm": 0.17594659328460693, "learning_rate": 0.002, "loss": 2.5835, "step": 21150 }, { "epoch": 0.042155425219941346, "grad_norm": 0.15380893647670746, "learning_rate": 0.002, "loss": 2.595, "step": 21160 }, { "epoch": 0.042175347443580265, "grad_norm": 0.1491313874721527, "learning_rate": 0.002, "loss": 2.5746, "step": 21170 }, { "epoch": 0.042195269667219176, "grad_norm": 0.14776435494422913, "learning_rate": 0.002, "loss": 2.594, "step": 21180 }, { "epoch": 0.04221519189085809, "grad_norm": 0.15168730914592743, "learning_rate": 0.002, "loss": 2.5858, "step": 21190 }, { "epoch": 0.042235114114497006, "grad_norm": 0.206496462225914, "learning_rate": 0.002, "loss": 2.586, "step": 21200 }, { "epoch": 0.04225503633813592, "grad_norm": 0.17429189383983612, "learning_rate": 0.002, "loss": 2.5894, "step": 21210 }, { "epoch": 0.04227495856177483, "grad_norm": 0.15113933384418488, "learning_rate": 0.002, "loss": 2.58, "step": 21220 }, { "epoch": 0.04229488078541375, "grad_norm": 0.19356024265289307, "learning_rate": 0.002, "loss": 2.6077, "step": 21230 }, { "epoch": 0.04231480300905266, "grad_norm": 0.20501317083835602, "learning_rate": 0.002, "loss": 2.592, "step": 21240 }, { "epoch": 0.04233472523269157, "grad_norm": 0.153561070561409, "learning_rate": 0.002, "loss": 2.5837, "step": 21250 }, { "epoch": 0.04235464745633048, "grad_norm": 0.20447379350662231, "learning_rate": 0.002, "loss": 2.5869, "step": 21260 }, { "epoch": 0.0423745696799694, "grad_norm": 0.17936836183071136, "learning_rate": 0.002, "loss": 2.5977, "step": 21270 }, { "epoch": 0.04239449190360831, "grad_norm": 0.18789397180080414, "learning_rate": 0.002, "loss": 2.5902, "step": 21280 }, { "epoch": 0.042414414127247224, "grad_norm": 0.17277079820632935, "learning_rate": 0.002, "loss": 2.5962, "step": 21290 }, { "epoch": 0.04243433635088614, "grad_norm": 0.16693609952926636, "learning_rate": 0.002, "loss": 2.5691, "step": 21300 }, { "epoch": 0.042454258574525054, "grad_norm": 0.1887582540512085, "learning_rate": 0.002, "loss": 2.588, "step": 21310 }, { "epoch": 0.042474180798163966, "grad_norm": 0.15497350692749023, "learning_rate": 0.002, "loss": 2.566, "step": 21320 }, { "epoch": 0.042494103021802884, "grad_norm": 0.19050969183444977, "learning_rate": 0.002, "loss": 2.5803, "step": 21330 }, { "epoch": 0.042514025245441796, "grad_norm": 0.20350882411003113, "learning_rate": 0.002, "loss": 2.5781, "step": 21340 }, { "epoch": 0.04253394746908071, "grad_norm": 0.16677244007587433, "learning_rate": 0.002, "loss": 2.5835, "step": 21350 }, { "epoch": 0.042553869692719626, "grad_norm": 0.13458476960659027, "learning_rate": 0.002, "loss": 2.5866, "step": 21360 }, { "epoch": 0.04257379191635854, "grad_norm": 0.1831544190645218, "learning_rate": 0.002, "loss": 2.5838, "step": 21370 }, { "epoch": 0.04259371413999745, "grad_norm": 0.15599897503852844, "learning_rate": 0.002, "loss": 2.5953, "step": 21380 }, { "epoch": 0.04261363636363636, "grad_norm": 0.16155467927455902, "learning_rate": 0.002, "loss": 2.6098, "step": 21390 }, { "epoch": 0.04263355858727528, "grad_norm": 0.20902244746685028, "learning_rate": 0.002, "loss": 2.5942, "step": 21400 }, { "epoch": 0.04265348081091419, "grad_norm": 0.16934730112552643, "learning_rate": 0.002, "loss": 2.5955, "step": 21410 }, { "epoch": 0.0426734030345531, "grad_norm": 0.16600269079208374, "learning_rate": 0.002, "loss": 2.6019, "step": 21420 }, { "epoch": 0.04269332525819202, "grad_norm": 0.15050064027309418, "learning_rate": 0.002, "loss": 2.5959, "step": 21430 }, { "epoch": 0.04271324748183093, "grad_norm": 0.19719524681568146, "learning_rate": 0.002, "loss": 2.5873, "step": 21440 }, { "epoch": 0.042733169705469844, "grad_norm": 0.15831772983074188, "learning_rate": 0.002, "loss": 2.5933, "step": 21450 }, { "epoch": 0.04275309192910876, "grad_norm": 0.20652782917022705, "learning_rate": 0.002, "loss": 2.6009, "step": 21460 }, { "epoch": 0.042773014152747674, "grad_norm": 0.15920287370681763, "learning_rate": 0.002, "loss": 2.5783, "step": 21470 }, { "epoch": 0.042792936376386585, "grad_norm": 0.17043766379356384, "learning_rate": 0.002, "loss": 2.5796, "step": 21480 }, { "epoch": 0.042812858600025504, "grad_norm": 0.1743660867214203, "learning_rate": 0.002, "loss": 2.5833, "step": 21490 }, { "epoch": 0.042832780823664415, "grad_norm": 0.1772574931383133, "learning_rate": 0.002, "loss": 2.5921, "step": 21500 }, { "epoch": 0.04285270304730333, "grad_norm": 0.17575238645076752, "learning_rate": 0.002, "loss": 2.5829, "step": 21510 }, { "epoch": 0.04287262527094224, "grad_norm": 0.15036138892173767, "learning_rate": 0.002, "loss": 2.5893, "step": 21520 }, { "epoch": 0.04289254749458116, "grad_norm": 0.16869094967842102, "learning_rate": 0.002, "loss": 2.6041, "step": 21530 }, { "epoch": 0.04291246971822007, "grad_norm": 0.17458583414554596, "learning_rate": 0.002, "loss": 2.5858, "step": 21540 }, { "epoch": 0.04293239194185898, "grad_norm": 0.2021024078130722, "learning_rate": 0.002, "loss": 2.597, "step": 21550 }, { "epoch": 0.0429523141654979, "grad_norm": 0.19731494784355164, "learning_rate": 0.002, "loss": 2.5807, "step": 21560 }, { "epoch": 0.04297223638913681, "grad_norm": 0.18867863714694977, "learning_rate": 0.002, "loss": 2.5794, "step": 21570 }, { "epoch": 0.04299215861277572, "grad_norm": 0.17086830735206604, "learning_rate": 0.002, "loss": 2.5872, "step": 21580 }, { "epoch": 0.04301208083641464, "grad_norm": 0.19031408429145813, "learning_rate": 0.002, "loss": 2.5911, "step": 21590 }, { "epoch": 0.04303200306005355, "grad_norm": 0.14856000244617462, "learning_rate": 0.002, "loss": 2.5878, "step": 21600 }, { "epoch": 0.04305192528369246, "grad_norm": 0.20844301581382751, "learning_rate": 0.002, "loss": 2.5751, "step": 21610 }, { "epoch": 0.04307184750733138, "grad_norm": 0.1561032086610794, "learning_rate": 0.002, "loss": 2.5828, "step": 21620 }, { "epoch": 0.04309176973097029, "grad_norm": 0.1831149160861969, "learning_rate": 0.002, "loss": 2.5905, "step": 21630 }, { "epoch": 0.043111691954609205, "grad_norm": 0.15260501205921173, "learning_rate": 0.002, "loss": 2.5756, "step": 21640 }, { "epoch": 0.043131614178248116, "grad_norm": 0.18827226758003235, "learning_rate": 0.002, "loss": 2.5863, "step": 21650 }, { "epoch": 0.043151536401887035, "grad_norm": 0.19600874185562134, "learning_rate": 0.002, "loss": 2.5906, "step": 21660 }, { "epoch": 0.043171458625525946, "grad_norm": 0.16163437068462372, "learning_rate": 0.002, "loss": 2.5928, "step": 21670 }, { "epoch": 0.04319138084916486, "grad_norm": 0.15289704501628876, "learning_rate": 0.002, "loss": 2.5873, "step": 21680 }, { "epoch": 0.043211303072803776, "grad_norm": 0.17620514333248138, "learning_rate": 0.002, "loss": 2.5807, "step": 21690 }, { "epoch": 0.04323122529644269, "grad_norm": 0.16252762079238892, "learning_rate": 0.002, "loss": 2.5882, "step": 21700 }, { "epoch": 0.0432511475200816, "grad_norm": 0.17916598916053772, "learning_rate": 0.002, "loss": 2.5891, "step": 21710 }, { "epoch": 0.04327106974372052, "grad_norm": 0.16326899826526642, "learning_rate": 0.002, "loss": 2.5884, "step": 21720 }, { "epoch": 0.04329099196735943, "grad_norm": 0.1828003227710724, "learning_rate": 0.002, "loss": 2.5777, "step": 21730 }, { "epoch": 0.04331091419099834, "grad_norm": 0.15343140065670013, "learning_rate": 0.002, "loss": 2.5966, "step": 21740 }, { "epoch": 0.04333083641463726, "grad_norm": 0.1815957874059677, "learning_rate": 0.002, "loss": 2.5992, "step": 21750 }, { "epoch": 0.04335075863827617, "grad_norm": 0.23507848381996155, "learning_rate": 0.002, "loss": 2.5962, "step": 21760 }, { "epoch": 0.04337068086191508, "grad_norm": 0.14623819291591644, "learning_rate": 0.002, "loss": 2.5918, "step": 21770 }, { "epoch": 0.043390603085553994, "grad_norm": 0.19265654683113098, "learning_rate": 0.002, "loss": 2.5875, "step": 21780 }, { "epoch": 0.04341052530919291, "grad_norm": 0.1697506308555603, "learning_rate": 0.002, "loss": 2.5953, "step": 21790 }, { "epoch": 0.043430447532831824, "grad_norm": 0.1790686547756195, "learning_rate": 0.002, "loss": 2.5714, "step": 21800 }, { "epoch": 0.043450369756470736, "grad_norm": 0.15564881265163422, "learning_rate": 0.002, "loss": 2.5811, "step": 21810 }, { "epoch": 0.043470291980109654, "grad_norm": 0.16486041247844696, "learning_rate": 0.002, "loss": 2.5841, "step": 21820 }, { "epoch": 0.043490214203748566, "grad_norm": 0.14905762672424316, "learning_rate": 0.002, "loss": 2.597, "step": 21830 }, { "epoch": 0.04351013642738748, "grad_norm": 0.17347390949726105, "learning_rate": 0.002, "loss": 2.5869, "step": 21840 }, { "epoch": 0.043530058651026396, "grad_norm": 0.18288305401802063, "learning_rate": 0.002, "loss": 2.596, "step": 21850 }, { "epoch": 0.04354998087466531, "grad_norm": 0.1665126383304596, "learning_rate": 0.002, "loss": 2.5838, "step": 21860 }, { "epoch": 0.04356990309830422, "grad_norm": 0.16630952060222626, "learning_rate": 0.002, "loss": 2.5872, "step": 21870 }, { "epoch": 0.04358982532194313, "grad_norm": 0.1733371764421463, "learning_rate": 0.002, "loss": 2.585, "step": 21880 }, { "epoch": 0.04360974754558205, "grad_norm": 0.19327577948570251, "learning_rate": 0.002, "loss": 2.5899, "step": 21890 }, { "epoch": 0.04362966976922096, "grad_norm": 0.1591615080833435, "learning_rate": 0.002, "loss": 2.6045, "step": 21900 }, { "epoch": 0.04364959199285987, "grad_norm": 0.16197651624679565, "learning_rate": 0.002, "loss": 2.6023, "step": 21910 }, { "epoch": 0.04366951421649879, "grad_norm": 0.1647893786430359, "learning_rate": 0.002, "loss": 2.5811, "step": 21920 }, { "epoch": 0.0436894364401377, "grad_norm": 0.16817472875118256, "learning_rate": 0.002, "loss": 2.5929, "step": 21930 }, { "epoch": 0.043709358663776614, "grad_norm": 0.16562235355377197, "learning_rate": 0.002, "loss": 2.5951, "step": 21940 }, { "epoch": 0.04372928088741553, "grad_norm": 0.19263312220573425, "learning_rate": 0.002, "loss": 2.6004, "step": 21950 }, { "epoch": 0.043749203111054444, "grad_norm": 0.16919739544391632, "learning_rate": 0.002, "loss": 2.5899, "step": 21960 }, { "epoch": 0.043769125334693355, "grad_norm": 0.18296188116073608, "learning_rate": 0.002, "loss": 2.6016, "step": 21970 }, { "epoch": 0.043789047558332274, "grad_norm": 0.16207173466682434, "learning_rate": 0.002, "loss": 2.5808, "step": 21980 }, { "epoch": 0.043808969781971185, "grad_norm": 0.1954079270362854, "learning_rate": 0.002, "loss": 2.5875, "step": 21990 }, { "epoch": 0.0438288920056101, "grad_norm": 0.14473186433315277, "learning_rate": 0.002, "loss": 2.5854, "step": 22000 }, { "epoch": 0.04384881422924901, "grad_norm": 0.1800789088010788, "learning_rate": 0.002, "loss": 2.5904, "step": 22010 }, { "epoch": 0.04386873645288793, "grad_norm": 0.21380016207695007, "learning_rate": 0.002, "loss": 2.5968, "step": 22020 }, { "epoch": 0.04388865867652684, "grad_norm": 0.2110869139432907, "learning_rate": 0.002, "loss": 2.5763, "step": 22030 }, { "epoch": 0.04390858090016575, "grad_norm": 0.15862447023391724, "learning_rate": 0.002, "loss": 2.5731, "step": 22040 }, { "epoch": 0.04392850312380467, "grad_norm": 0.1544109433889389, "learning_rate": 0.002, "loss": 2.5762, "step": 22050 }, { "epoch": 0.04394842534744358, "grad_norm": 0.1764393150806427, "learning_rate": 0.002, "loss": 2.5924, "step": 22060 }, { "epoch": 0.04396834757108249, "grad_norm": 0.14988823235034943, "learning_rate": 0.002, "loss": 2.58, "step": 22070 }, { "epoch": 0.04398826979472141, "grad_norm": 0.2465115785598755, "learning_rate": 0.002, "loss": 2.5708, "step": 22080 }, { "epoch": 0.04400819201836032, "grad_norm": 0.166851207613945, "learning_rate": 0.002, "loss": 2.5835, "step": 22090 }, { "epoch": 0.04402811424199923, "grad_norm": 0.15078343451023102, "learning_rate": 0.002, "loss": 2.5798, "step": 22100 }, { "epoch": 0.04404803646563815, "grad_norm": 0.21102526783943176, "learning_rate": 0.002, "loss": 2.5734, "step": 22110 }, { "epoch": 0.04406795868927706, "grad_norm": 0.16151779890060425, "learning_rate": 0.002, "loss": 2.5775, "step": 22120 }, { "epoch": 0.044087880912915975, "grad_norm": 0.158097043633461, "learning_rate": 0.002, "loss": 2.5791, "step": 22130 }, { "epoch": 0.044107803136554886, "grad_norm": 0.22627748548984528, "learning_rate": 0.002, "loss": 2.5878, "step": 22140 }, { "epoch": 0.044127725360193805, "grad_norm": 0.1600818634033203, "learning_rate": 0.002, "loss": 2.5893, "step": 22150 }, { "epoch": 0.044147647583832716, "grad_norm": 0.2253037989139557, "learning_rate": 0.002, "loss": 2.5811, "step": 22160 }, { "epoch": 0.04416756980747163, "grad_norm": 0.16105619072914124, "learning_rate": 0.002, "loss": 2.5845, "step": 22170 }, { "epoch": 0.044187492031110547, "grad_norm": 0.1647716760635376, "learning_rate": 0.002, "loss": 2.5939, "step": 22180 }, { "epoch": 0.04420741425474946, "grad_norm": 0.15982207655906677, "learning_rate": 0.002, "loss": 2.5788, "step": 22190 }, { "epoch": 0.04422733647838837, "grad_norm": 0.18038326501846313, "learning_rate": 0.002, "loss": 2.5865, "step": 22200 }, { "epoch": 0.04424725870202729, "grad_norm": 0.14934928715229034, "learning_rate": 0.002, "loss": 2.5971, "step": 22210 }, { "epoch": 0.0442671809256662, "grad_norm": 0.17921428382396698, "learning_rate": 0.002, "loss": 2.5944, "step": 22220 }, { "epoch": 0.04428710314930511, "grad_norm": 0.17061784863471985, "learning_rate": 0.002, "loss": 2.5827, "step": 22230 }, { "epoch": 0.04430702537294403, "grad_norm": 0.1596117913722992, "learning_rate": 0.002, "loss": 2.585, "step": 22240 }, { "epoch": 0.04432694759658294, "grad_norm": 0.1836266964673996, "learning_rate": 0.002, "loss": 2.5918, "step": 22250 }, { "epoch": 0.04434686982022185, "grad_norm": 0.17127501964569092, "learning_rate": 0.002, "loss": 2.5891, "step": 22260 }, { "epoch": 0.044366792043860764, "grad_norm": 0.18886570632457733, "learning_rate": 0.002, "loss": 2.5867, "step": 22270 }, { "epoch": 0.04438671426749968, "grad_norm": 0.17190739512443542, "learning_rate": 0.002, "loss": 2.5849, "step": 22280 }, { "epoch": 0.044406636491138594, "grad_norm": 0.17151610553264618, "learning_rate": 0.002, "loss": 2.5781, "step": 22290 }, { "epoch": 0.044426558714777506, "grad_norm": 0.17484824359416962, "learning_rate": 0.002, "loss": 2.5867, "step": 22300 }, { "epoch": 0.044446480938416424, "grad_norm": 0.18365544080734253, "learning_rate": 0.002, "loss": 2.5868, "step": 22310 }, { "epoch": 0.044466403162055336, "grad_norm": 0.14172501862049103, "learning_rate": 0.002, "loss": 2.5705, "step": 22320 }, { "epoch": 0.04448632538569425, "grad_norm": 0.17769351601600647, "learning_rate": 0.002, "loss": 2.5938, "step": 22330 }, { "epoch": 0.044506247609333166, "grad_norm": 0.19947713613510132, "learning_rate": 0.002, "loss": 2.5899, "step": 22340 }, { "epoch": 0.04452616983297208, "grad_norm": 0.16458052396774292, "learning_rate": 0.002, "loss": 2.5706, "step": 22350 }, { "epoch": 0.04454609205661099, "grad_norm": 0.1596670001745224, "learning_rate": 0.002, "loss": 2.5911, "step": 22360 }, { "epoch": 0.04456601428024991, "grad_norm": 0.21562416851520538, "learning_rate": 0.002, "loss": 2.5877, "step": 22370 }, { "epoch": 0.04458593650388882, "grad_norm": 0.15810395777225494, "learning_rate": 0.002, "loss": 2.5869, "step": 22380 }, { "epoch": 0.04460585872752773, "grad_norm": 0.1622173935174942, "learning_rate": 0.002, "loss": 2.5777, "step": 22390 }, { "epoch": 0.04462578095116664, "grad_norm": 0.16858138144016266, "learning_rate": 0.002, "loss": 2.5903, "step": 22400 }, { "epoch": 0.04464570317480556, "grad_norm": 0.17981021106243134, "learning_rate": 0.002, "loss": 2.5791, "step": 22410 }, { "epoch": 0.04466562539844447, "grad_norm": 0.14871074259281158, "learning_rate": 0.002, "loss": 2.5781, "step": 22420 }, { "epoch": 0.044685547622083384, "grad_norm": 0.16809266805648804, "learning_rate": 0.002, "loss": 2.5967, "step": 22430 }, { "epoch": 0.0447054698457223, "grad_norm": 0.18237954378128052, "learning_rate": 0.002, "loss": 2.5775, "step": 22440 }, { "epoch": 0.044725392069361214, "grad_norm": 0.23700949549674988, "learning_rate": 0.002, "loss": 2.5867, "step": 22450 }, { "epoch": 0.044745314293000125, "grad_norm": 0.16998408734798431, "learning_rate": 0.002, "loss": 2.5849, "step": 22460 }, { "epoch": 0.044765236516639044, "grad_norm": 0.21578426659107208, "learning_rate": 0.002, "loss": 2.5954, "step": 22470 }, { "epoch": 0.044785158740277956, "grad_norm": 0.17792992293834686, "learning_rate": 0.002, "loss": 2.5735, "step": 22480 }, { "epoch": 0.04480508096391687, "grad_norm": 0.146487295627594, "learning_rate": 0.002, "loss": 2.5793, "step": 22490 }, { "epoch": 0.044825003187555786, "grad_norm": 0.1799069195985794, "learning_rate": 0.002, "loss": 2.5951, "step": 22500 }, { "epoch": 0.0448449254111947, "grad_norm": 0.16698355972766876, "learning_rate": 0.002, "loss": 2.5858, "step": 22510 }, { "epoch": 0.04486484763483361, "grad_norm": 0.16011904180049896, "learning_rate": 0.002, "loss": 2.6007, "step": 22520 }, { "epoch": 0.04488476985847252, "grad_norm": 0.17984376847743988, "learning_rate": 0.002, "loss": 2.5948, "step": 22530 }, { "epoch": 0.04490469208211144, "grad_norm": 0.15316861867904663, "learning_rate": 0.002, "loss": 2.5984, "step": 22540 }, { "epoch": 0.04492461430575035, "grad_norm": 0.18709073960781097, "learning_rate": 0.002, "loss": 2.5864, "step": 22550 }, { "epoch": 0.04494453652938926, "grad_norm": 0.15466926991939545, "learning_rate": 0.002, "loss": 2.5833, "step": 22560 }, { "epoch": 0.04496445875302818, "grad_norm": 0.1495508998632431, "learning_rate": 0.002, "loss": 2.5899, "step": 22570 }, { "epoch": 0.04498438097666709, "grad_norm": 0.18113520741462708, "learning_rate": 0.002, "loss": 2.5923, "step": 22580 }, { "epoch": 0.045004303200306, "grad_norm": 0.17131534218788147, "learning_rate": 0.002, "loss": 2.5945, "step": 22590 }, { "epoch": 0.04502422542394492, "grad_norm": 0.18245141208171844, "learning_rate": 0.002, "loss": 2.5899, "step": 22600 }, { "epoch": 0.04504414764758383, "grad_norm": 0.17638593912124634, "learning_rate": 0.002, "loss": 2.6131, "step": 22610 }, { "epoch": 0.045064069871222745, "grad_norm": 0.1414194256067276, "learning_rate": 0.002, "loss": 2.5817, "step": 22620 }, { "epoch": 0.045083992094861663, "grad_norm": 0.17713333666324615, "learning_rate": 0.002, "loss": 2.5842, "step": 22630 }, { "epoch": 0.045103914318500575, "grad_norm": 0.16934911906719208, "learning_rate": 0.002, "loss": 2.5814, "step": 22640 }, { "epoch": 0.04512383654213949, "grad_norm": 0.16802264750003815, "learning_rate": 0.002, "loss": 2.5979, "step": 22650 }, { "epoch": 0.0451437587657784, "grad_norm": 0.17785023152828217, "learning_rate": 0.002, "loss": 2.5975, "step": 22660 }, { "epoch": 0.04516368098941732, "grad_norm": 0.16079357266426086, "learning_rate": 0.002, "loss": 2.5903, "step": 22670 }, { "epoch": 0.04518360321305623, "grad_norm": 0.18530601263046265, "learning_rate": 0.002, "loss": 2.5756, "step": 22680 }, { "epoch": 0.04520352543669514, "grad_norm": 0.14290325343608856, "learning_rate": 0.002, "loss": 2.5837, "step": 22690 }, { "epoch": 0.04522344766033406, "grad_norm": 0.18187259137630463, "learning_rate": 0.002, "loss": 2.5675, "step": 22700 }, { "epoch": 0.04524336988397297, "grad_norm": 0.16307643055915833, "learning_rate": 0.002, "loss": 2.598, "step": 22710 }, { "epoch": 0.04526329210761188, "grad_norm": 0.17871591448783875, "learning_rate": 0.002, "loss": 2.5672, "step": 22720 }, { "epoch": 0.0452832143312508, "grad_norm": 0.18768255412578583, "learning_rate": 0.002, "loss": 2.5956, "step": 22730 }, { "epoch": 0.04530313655488971, "grad_norm": 0.1923542320728302, "learning_rate": 0.002, "loss": 2.5981, "step": 22740 }, { "epoch": 0.04532305877852862, "grad_norm": 0.16487298905849457, "learning_rate": 0.002, "loss": 2.5921, "step": 22750 }, { "epoch": 0.045342981002167534, "grad_norm": 0.17095115780830383, "learning_rate": 0.002, "loss": 2.5834, "step": 22760 }, { "epoch": 0.04536290322580645, "grad_norm": 0.1879565715789795, "learning_rate": 0.002, "loss": 2.594, "step": 22770 }, { "epoch": 0.045382825449445365, "grad_norm": 0.14858205616474152, "learning_rate": 0.002, "loss": 2.6038, "step": 22780 }, { "epoch": 0.045402747673084276, "grad_norm": 0.2009568214416504, "learning_rate": 0.002, "loss": 2.6033, "step": 22790 }, { "epoch": 0.045422669896723195, "grad_norm": 0.2015601545572281, "learning_rate": 0.002, "loss": 2.5888, "step": 22800 }, { "epoch": 0.045442592120362106, "grad_norm": 0.16435076296329498, "learning_rate": 0.002, "loss": 2.5806, "step": 22810 }, { "epoch": 0.04546251434400102, "grad_norm": 0.2009073793888092, "learning_rate": 0.002, "loss": 2.5921, "step": 22820 }, { "epoch": 0.045482436567639936, "grad_norm": 0.15833231806755066, "learning_rate": 0.002, "loss": 2.5825, "step": 22830 }, { "epoch": 0.04550235879127885, "grad_norm": 0.16046668589115143, "learning_rate": 0.002, "loss": 2.5896, "step": 22840 }, { "epoch": 0.04552228101491776, "grad_norm": 0.18084785342216492, "learning_rate": 0.002, "loss": 2.6086, "step": 22850 }, { "epoch": 0.04554220323855668, "grad_norm": 0.19030353426933289, "learning_rate": 0.002, "loss": 2.5944, "step": 22860 }, { "epoch": 0.04556212546219559, "grad_norm": 0.19128261506557465, "learning_rate": 0.002, "loss": 2.5939, "step": 22870 }, { "epoch": 0.0455820476858345, "grad_norm": 0.18001702427864075, "learning_rate": 0.002, "loss": 2.5858, "step": 22880 }, { "epoch": 0.04560196990947341, "grad_norm": 0.140046626329422, "learning_rate": 0.002, "loss": 2.5806, "step": 22890 }, { "epoch": 0.04562189213311233, "grad_norm": 0.23005032539367676, "learning_rate": 0.002, "loss": 2.5821, "step": 22900 }, { "epoch": 0.04564181435675124, "grad_norm": 0.1709132045507431, "learning_rate": 0.002, "loss": 2.5977, "step": 22910 }, { "epoch": 0.045661736580390154, "grad_norm": 0.17043043673038483, "learning_rate": 0.002, "loss": 2.6013, "step": 22920 }, { "epoch": 0.04568165880402907, "grad_norm": 0.1749974936246872, "learning_rate": 0.002, "loss": 2.5992, "step": 22930 }, { "epoch": 0.045701581027667984, "grad_norm": 0.19643200933933258, "learning_rate": 0.002, "loss": 2.6043, "step": 22940 }, { "epoch": 0.045721503251306896, "grad_norm": 0.2116202563047409, "learning_rate": 0.002, "loss": 2.5991, "step": 22950 }, { "epoch": 0.045741425474945814, "grad_norm": 0.15337714552879333, "learning_rate": 0.002, "loss": 2.581, "step": 22960 }, { "epoch": 0.045761347698584726, "grad_norm": 0.164792001247406, "learning_rate": 0.002, "loss": 2.5804, "step": 22970 }, { "epoch": 0.04578126992222364, "grad_norm": 0.22242754697799683, "learning_rate": 0.002, "loss": 2.5833, "step": 22980 }, { "epoch": 0.045801192145862556, "grad_norm": 0.16670453548431396, "learning_rate": 0.002, "loss": 2.5963, "step": 22990 }, { "epoch": 0.04582111436950147, "grad_norm": 0.14918039739131927, "learning_rate": 0.002, "loss": 2.5881, "step": 23000 }, { "epoch": 0.04584103659314038, "grad_norm": 0.17159385979175568, "learning_rate": 0.002, "loss": 2.5902, "step": 23010 }, { "epoch": 0.04586095881677929, "grad_norm": 0.1687636524438858, "learning_rate": 0.002, "loss": 2.5866, "step": 23020 }, { "epoch": 0.04588088104041821, "grad_norm": 0.20533613860607147, "learning_rate": 0.002, "loss": 2.5867, "step": 23030 }, { "epoch": 0.04590080326405712, "grad_norm": 0.14790105819702148, "learning_rate": 0.002, "loss": 2.5913, "step": 23040 }, { "epoch": 0.04592072548769603, "grad_norm": 0.16423584520816803, "learning_rate": 0.002, "loss": 2.5933, "step": 23050 }, { "epoch": 0.04594064771133495, "grad_norm": 0.2245778888463974, "learning_rate": 0.002, "loss": 2.6138, "step": 23060 }, { "epoch": 0.04596056993497386, "grad_norm": 0.16662582755088806, "learning_rate": 0.002, "loss": 2.5817, "step": 23070 }, { "epoch": 0.045980492158612774, "grad_norm": 0.16692088544368744, "learning_rate": 0.002, "loss": 2.5913, "step": 23080 }, { "epoch": 0.04600041438225169, "grad_norm": 0.17654788494110107, "learning_rate": 0.002, "loss": 2.5944, "step": 23090 }, { "epoch": 0.046020336605890604, "grad_norm": 0.16472318768501282, "learning_rate": 0.002, "loss": 2.5826, "step": 23100 }, { "epoch": 0.046040258829529515, "grad_norm": 0.16948913037776947, "learning_rate": 0.002, "loss": 2.5744, "step": 23110 }, { "epoch": 0.046060181053168434, "grad_norm": 0.17402106523513794, "learning_rate": 0.002, "loss": 2.5954, "step": 23120 }, { "epoch": 0.046080103276807345, "grad_norm": 0.18039469420909882, "learning_rate": 0.002, "loss": 2.5867, "step": 23130 }, { "epoch": 0.04610002550044626, "grad_norm": 0.13467468321323395, "learning_rate": 0.002, "loss": 2.5869, "step": 23140 }, { "epoch": 0.04611994772408517, "grad_norm": 0.2012787014245987, "learning_rate": 0.002, "loss": 2.5763, "step": 23150 }, { "epoch": 0.04613986994772409, "grad_norm": 0.23501035571098328, "learning_rate": 0.002, "loss": 2.5964, "step": 23160 }, { "epoch": 0.046159792171363, "grad_norm": 0.16958262026309967, "learning_rate": 0.002, "loss": 2.5905, "step": 23170 }, { "epoch": 0.04617971439500191, "grad_norm": 0.16367177665233612, "learning_rate": 0.002, "loss": 2.5929, "step": 23180 }, { "epoch": 0.04619963661864083, "grad_norm": 0.15644851326942444, "learning_rate": 0.002, "loss": 2.5867, "step": 23190 }, { "epoch": 0.04621955884227974, "grad_norm": 0.18556584417819977, "learning_rate": 0.002, "loss": 2.5838, "step": 23200 }, { "epoch": 0.04623948106591865, "grad_norm": 0.19720281660556793, "learning_rate": 0.002, "loss": 2.6013, "step": 23210 }, { "epoch": 0.04625940328955757, "grad_norm": 0.21075133979320526, "learning_rate": 0.002, "loss": 2.5888, "step": 23220 }, { "epoch": 0.04627932551319648, "grad_norm": 0.1867942214012146, "learning_rate": 0.002, "loss": 2.6082, "step": 23230 }, { "epoch": 0.04629924773683539, "grad_norm": 0.16296029090881348, "learning_rate": 0.002, "loss": 2.5915, "step": 23240 }, { "epoch": 0.04631916996047431, "grad_norm": 0.16578209400177002, "learning_rate": 0.002, "loss": 2.592, "step": 23250 }, { "epoch": 0.04633909218411322, "grad_norm": 0.25116094946861267, "learning_rate": 0.002, "loss": 2.5874, "step": 23260 }, { "epoch": 0.046359014407752135, "grad_norm": 0.1456488072872162, "learning_rate": 0.002, "loss": 2.5948, "step": 23270 }, { "epoch": 0.046378936631391046, "grad_norm": 0.1656821370124817, "learning_rate": 0.002, "loss": 2.5823, "step": 23280 }, { "epoch": 0.046398858855029965, "grad_norm": 0.19203460216522217, "learning_rate": 0.002, "loss": 2.5842, "step": 23290 }, { "epoch": 0.046418781078668876, "grad_norm": 0.1547890603542328, "learning_rate": 0.002, "loss": 2.5942, "step": 23300 }, { "epoch": 0.04643870330230779, "grad_norm": 0.17754189670085907, "learning_rate": 0.002, "loss": 2.5838, "step": 23310 }, { "epoch": 0.046458625525946706, "grad_norm": 0.15195921063423157, "learning_rate": 0.002, "loss": 2.5915, "step": 23320 }, { "epoch": 0.04647854774958562, "grad_norm": 0.19813351333141327, "learning_rate": 0.002, "loss": 2.5879, "step": 23330 }, { "epoch": 0.04649846997322453, "grad_norm": 0.2043328732252121, "learning_rate": 0.002, "loss": 2.5883, "step": 23340 }, { "epoch": 0.04651839219686345, "grad_norm": 0.14753848314285278, "learning_rate": 0.002, "loss": 2.5754, "step": 23350 }, { "epoch": 0.04653831442050236, "grad_norm": 0.17521513998508453, "learning_rate": 0.002, "loss": 2.5952, "step": 23360 }, { "epoch": 0.04655823664414127, "grad_norm": 0.1857367902994156, "learning_rate": 0.002, "loss": 2.5901, "step": 23370 }, { "epoch": 0.04657815886778019, "grad_norm": 0.16900210082530975, "learning_rate": 0.002, "loss": 2.5991, "step": 23380 }, { "epoch": 0.0465980810914191, "grad_norm": 0.18594388663768768, "learning_rate": 0.002, "loss": 2.5885, "step": 23390 }, { "epoch": 0.04661800331505801, "grad_norm": 0.20964761078357697, "learning_rate": 0.002, "loss": 2.586, "step": 23400 }, { "epoch": 0.046637925538696924, "grad_norm": 0.18887127935886383, "learning_rate": 0.002, "loss": 2.6033, "step": 23410 }, { "epoch": 0.04665784776233584, "grad_norm": 0.14850765466690063, "learning_rate": 0.002, "loss": 2.5862, "step": 23420 }, { "epoch": 0.046677769985974754, "grad_norm": 0.1706082671880722, "learning_rate": 0.002, "loss": 2.5816, "step": 23430 }, { "epoch": 0.046697692209613666, "grad_norm": 0.16140501201152802, "learning_rate": 0.002, "loss": 2.5892, "step": 23440 }, { "epoch": 0.046717614433252584, "grad_norm": 0.1648332178592682, "learning_rate": 0.002, "loss": 2.6093, "step": 23450 }, { "epoch": 0.046737536656891496, "grad_norm": 0.17086444795131683, "learning_rate": 0.002, "loss": 2.5899, "step": 23460 }, { "epoch": 0.04675745888053041, "grad_norm": 0.15017636120319366, "learning_rate": 0.002, "loss": 2.5969, "step": 23470 }, { "epoch": 0.046777381104169326, "grad_norm": 0.19947631657123566, "learning_rate": 0.002, "loss": 2.5798, "step": 23480 }, { "epoch": 0.04679730332780824, "grad_norm": 0.22184687852859497, "learning_rate": 0.002, "loss": 2.5785, "step": 23490 }, { "epoch": 0.04681722555144715, "grad_norm": 0.16975119709968567, "learning_rate": 0.002, "loss": 2.5843, "step": 23500 }, { "epoch": 0.04683714777508607, "grad_norm": 0.18118302524089813, "learning_rate": 0.002, "loss": 2.5849, "step": 23510 }, { "epoch": 0.04685706999872498, "grad_norm": 0.184641495347023, "learning_rate": 0.002, "loss": 2.5867, "step": 23520 }, { "epoch": 0.04687699222236389, "grad_norm": 0.19495567679405212, "learning_rate": 0.002, "loss": 2.5927, "step": 23530 }, { "epoch": 0.0468969144460028, "grad_norm": 0.1742057502269745, "learning_rate": 0.002, "loss": 2.5786, "step": 23540 }, { "epoch": 0.04691683666964172, "grad_norm": 0.19680161774158478, "learning_rate": 0.002, "loss": 2.5929, "step": 23550 }, { "epoch": 0.04693675889328063, "grad_norm": 0.19863952696323395, "learning_rate": 0.002, "loss": 2.5928, "step": 23560 }, { "epoch": 0.046956681116919544, "grad_norm": 0.16501130163669586, "learning_rate": 0.002, "loss": 2.5803, "step": 23570 }, { "epoch": 0.04697660334055846, "grad_norm": 0.18582412600517273, "learning_rate": 0.002, "loss": 2.5871, "step": 23580 }, { "epoch": 0.046996525564197374, "grad_norm": 0.16675813496112823, "learning_rate": 0.002, "loss": 2.5764, "step": 23590 }, { "epoch": 0.047016447787836285, "grad_norm": 0.15668430924415588, "learning_rate": 0.002, "loss": 2.5924, "step": 23600 }, { "epoch": 0.047036370011475204, "grad_norm": 0.21729713678359985, "learning_rate": 0.002, "loss": 2.5875, "step": 23610 }, { "epoch": 0.047056292235114115, "grad_norm": 0.18428167700767517, "learning_rate": 0.002, "loss": 2.5801, "step": 23620 }, { "epoch": 0.04707621445875303, "grad_norm": 0.14973260462284088, "learning_rate": 0.002, "loss": 2.5812, "step": 23630 }, { "epoch": 0.047096136682391945, "grad_norm": 0.2217007279396057, "learning_rate": 0.002, "loss": 2.5817, "step": 23640 }, { "epoch": 0.04711605890603086, "grad_norm": 0.1556837558746338, "learning_rate": 0.002, "loss": 2.5924, "step": 23650 }, { "epoch": 0.04713598112966977, "grad_norm": 0.17176958918571472, "learning_rate": 0.002, "loss": 2.5668, "step": 23660 }, { "epoch": 0.04715590335330868, "grad_norm": 0.19432519376277924, "learning_rate": 0.002, "loss": 2.5885, "step": 23670 }, { "epoch": 0.0471758255769476, "grad_norm": 0.14124201238155365, "learning_rate": 0.002, "loss": 2.5983, "step": 23680 }, { "epoch": 0.04719574780058651, "grad_norm": 0.1432739943265915, "learning_rate": 0.002, "loss": 2.5954, "step": 23690 }, { "epoch": 0.04721567002422542, "grad_norm": 0.17122313380241394, "learning_rate": 0.002, "loss": 2.5869, "step": 23700 }, { "epoch": 0.04723559224786434, "grad_norm": 0.13406667113304138, "learning_rate": 0.002, "loss": 2.5927, "step": 23710 }, { "epoch": 0.04725551447150325, "grad_norm": 0.15775662660598755, "learning_rate": 0.002, "loss": 2.5942, "step": 23720 }, { "epoch": 0.04727543669514216, "grad_norm": 0.16813744604587555, "learning_rate": 0.002, "loss": 2.5894, "step": 23730 }, { "epoch": 0.04729535891878108, "grad_norm": 0.14824600517749786, "learning_rate": 0.002, "loss": 2.5808, "step": 23740 }, { "epoch": 0.04731528114241999, "grad_norm": 0.1882524937391281, "learning_rate": 0.002, "loss": 2.5875, "step": 23750 }, { "epoch": 0.047335203366058905, "grad_norm": 0.20512530207633972, "learning_rate": 0.002, "loss": 2.5917, "step": 23760 }, { "epoch": 0.047355125589697816, "grad_norm": 0.18540751934051514, "learning_rate": 0.002, "loss": 2.5878, "step": 23770 }, { "epoch": 0.047375047813336735, "grad_norm": 0.17507235705852509, "learning_rate": 0.002, "loss": 2.566, "step": 23780 }, { "epoch": 0.047394970036975646, "grad_norm": 0.19320333003997803, "learning_rate": 0.002, "loss": 2.5879, "step": 23790 }, { "epoch": 0.04741489226061456, "grad_norm": 0.16222864389419556, "learning_rate": 0.002, "loss": 2.5928, "step": 23800 }, { "epoch": 0.047434814484253476, "grad_norm": 0.17594671249389648, "learning_rate": 0.002, "loss": 2.6062, "step": 23810 }, { "epoch": 0.04745473670789239, "grad_norm": 0.2021639496088028, "learning_rate": 0.002, "loss": 2.6015, "step": 23820 }, { "epoch": 0.0474746589315313, "grad_norm": 0.17257048189640045, "learning_rate": 0.002, "loss": 2.5865, "step": 23830 }, { "epoch": 0.04749458115517022, "grad_norm": 0.16045023500919342, "learning_rate": 0.002, "loss": 2.5853, "step": 23840 }, { "epoch": 0.04751450337880913, "grad_norm": 0.19302842020988464, "learning_rate": 0.002, "loss": 2.5707, "step": 23850 }, { "epoch": 0.04753442560244804, "grad_norm": 0.1694880723953247, "learning_rate": 0.002, "loss": 2.5926, "step": 23860 }, { "epoch": 0.04755434782608696, "grad_norm": 0.15626214444637299, "learning_rate": 0.002, "loss": 2.6063, "step": 23870 }, { "epoch": 0.04757427004972587, "grad_norm": 0.17338013648986816, "learning_rate": 0.002, "loss": 2.5992, "step": 23880 }, { "epoch": 0.04759419227336478, "grad_norm": 0.20085877180099487, "learning_rate": 0.002, "loss": 2.5891, "step": 23890 }, { "epoch": 0.047614114497003694, "grad_norm": 0.17261329293251038, "learning_rate": 0.002, "loss": 2.5905, "step": 23900 }, { "epoch": 0.04763403672064261, "grad_norm": 0.15334969758987427, "learning_rate": 0.002, "loss": 2.5881, "step": 23910 }, { "epoch": 0.047653958944281524, "grad_norm": 0.20085696876049042, "learning_rate": 0.002, "loss": 2.5808, "step": 23920 }, { "epoch": 0.047673881167920436, "grad_norm": 0.172287255525589, "learning_rate": 0.002, "loss": 2.5815, "step": 23930 }, { "epoch": 0.047693803391559354, "grad_norm": 0.15924541652202606, "learning_rate": 0.002, "loss": 2.574, "step": 23940 }, { "epoch": 0.047713725615198266, "grad_norm": 0.1641913503408432, "learning_rate": 0.002, "loss": 2.591, "step": 23950 }, { "epoch": 0.04773364783883718, "grad_norm": 0.22362971305847168, "learning_rate": 0.002, "loss": 2.5932, "step": 23960 }, { "epoch": 0.047753570062476096, "grad_norm": 0.17175628244876862, "learning_rate": 0.002, "loss": 2.5777, "step": 23970 }, { "epoch": 0.04777349228611501, "grad_norm": 0.15607525408267975, "learning_rate": 0.002, "loss": 2.5876, "step": 23980 }, { "epoch": 0.04779341450975392, "grad_norm": 0.1925334483385086, "learning_rate": 0.002, "loss": 2.5694, "step": 23990 }, { "epoch": 0.04781333673339284, "grad_norm": 0.17221416532993317, "learning_rate": 0.002, "loss": 2.5936, "step": 24000 }, { "epoch": 0.04783325895703175, "grad_norm": 0.18661834299564362, "learning_rate": 0.002, "loss": 2.5839, "step": 24010 }, { "epoch": 0.04785318118067066, "grad_norm": 0.18292269110679626, "learning_rate": 0.002, "loss": 2.5883, "step": 24020 }, { "epoch": 0.04787310340430957, "grad_norm": 0.16021990776062012, "learning_rate": 0.002, "loss": 2.5986, "step": 24030 }, { "epoch": 0.04789302562794849, "grad_norm": 0.15115080773830414, "learning_rate": 0.002, "loss": 2.597, "step": 24040 }, { "epoch": 0.0479129478515874, "grad_norm": 0.16425853967666626, "learning_rate": 0.002, "loss": 2.58, "step": 24050 }, { "epoch": 0.047932870075226314, "grad_norm": 0.19211558997631073, "learning_rate": 0.002, "loss": 2.5879, "step": 24060 }, { "epoch": 0.04795279229886523, "grad_norm": 0.17620521783828735, "learning_rate": 0.002, "loss": 2.5923, "step": 24070 }, { "epoch": 0.047972714522504144, "grad_norm": 0.16708847880363464, "learning_rate": 0.002, "loss": 2.5861, "step": 24080 }, { "epoch": 0.047992636746143055, "grad_norm": 0.17754966020584106, "learning_rate": 0.002, "loss": 2.5834, "step": 24090 }, { "epoch": 0.048012558969781974, "grad_norm": 0.20542259514331818, "learning_rate": 0.002, "loss": 2.5819, "step": 24100 }, { "epoch": 0.048032481193420885, "grad_norm": 0.14083042740821838, "learning_rate": 0.002, "loss": 2.5995, "step": 24110 }, { "epoch": 0.0480524034170598, "grad_norm": 0.14602506160736084, "learning_rate": 0.002, "loss": 2.5786, "step": 24120 }, { "epoch": 0.048072325640698715, "grad_norm": 0.19246092438697815, "learning_rate": 0.002, "loss": 2.5869, "step": 24130 }, { "epoch": 0.04809224786433763, "grad_norm": 0.15123073756694794, "learning_rate": 0.002, "loss": 2.578, "step": 24140 }, { "epoch": 0.04811217008797654, "grad_norm": 0.16538101434707642, "learning_rate": 0.002, "loss": 2.5886, "step": 24150 }, { "epoch": 0.04813209231161545, "grad_norm": 0.17351007461547852, "learning_rate": 0.002, "loss": 2.591, "step": 24160 }, { "epoch": 0.04815201453525437, "grad_norm": 0.1903926283121109, "learning_rate": 0.002, "loss": 2.562, "step": 24170 }, { "epoch": 0.04817193675889328, "grad_norm": 0.15491928160190582, "learning_rate": 0.002, "loss": 2.5945, "step": 24180 }, { "epoch": 0.04819185898253219, "grad_norm": 0.1724681854248047, "learning_rate": 0.002, "loss": 2.5784, "step": 24190 }, { "epoch": 0.04821178120617111, "grad_norm": 0.17849455773830414, "learning_rate": 0.002, "loss": 2.5848, "step": 24200 }, { "epoch": 0.04823170342981002, "grad_norm": 0.1884990930557251, "learning_rate": 0.002, "loss": 2.5715, "step": 24210 }, { "epoch": 0.04825162565344893, "grad_norm": 0.1795714795589447, "learning_rate": 0.002, "loss": 2.5849, "step": 24220 }, { "epoch": 0.04827154787708785, "grad_norm": 0.17232781648635864, "learning_rate": 0.002, "loss": 2.5951, "step": 24230 }, { "epoch": 0.04829147010072676, "grad_norm": 0.14801354706287384, "learning_rate": 0.002, "loss": 2.5686, "step": 24240 }, { "epoch": 0.048311392324365675, "grad_norm": 0.17751145362854004, "learning_rate": 0.002, "loss": 2.5912, "step": 24250 }, { "epoch": 0.04833131454800459, "grad_norm": 0.1861853152513504, "learning_rate": 0.002, "loss": 2.5792, "step": 24260 }, { "epoch": 0.048351236771643505, "grad_norm": 0.20706623792648315, "learning_rate": 0.002, "loss": 2.5591, "step": 24270 }, { "epoch": 0.048371158995282416, "grad_norm": 0.18830977380275726, "learning_rate": 0.002, "loss": 2.5898, "step": 24280 }, { "epoch": 0.04839108121892133, "grad_norm": 0.1448371261358261, "learning_rate": 0.002, "loss": 2.597, "step": 24290 }, { "epoch": 0.048411003442560246, "grad_norm": 0.14845643937587738, "learning_rate": 0.002, "loss": 2.5818, "step": 24300 }, { "epoch": 0.04843092566619916, "grad_norm": 0.17318470776081085, "learning_rate": 0.002, "loss": 2.6034, "step": 24310 }, { "epoch": 0.04845084788983807, "grad_norm": 0.22270870208740234, "learning_rate": 0.002, "loss": 2.5811, "step": 24320 }, { "epoch": 0.04847077011347699, "grad_norm": 0.20945216715335846, "learning_rate": 0.002, "loss": 2.5955, "step": 24330 }, { "epoch": 0.0484906923371159, "grad_norm": 0.19736014306545258, "learning_rate": 0.002, "loss": 2.5979, "step": 24340 }, { "epoch": 0.04851061456075481, "grad_norm": 0.1695547103881836, "learning_rate": 0.002, "loss": 2.5912, "step": 24350 }, { "epoch": 0.04853053678439373, "grad_norm": 0.2010594755411148, "learning_rate": 0.002, "loss": 2.5787, "step": 24360 }, { "epoch": 0.04855045900803264, "grad_norm": 0.16302277147769928, "learning_rate": 0.002, "loss": 2.574, "step": 24370 }, { "epoch": 0.04857038123167155, "grad_norm": 0.17586398124694824, "learning_rate": 0.002, "loss": 2.5789, "step": 24380 }, { "epoch": 0.04859030345531047, "grad_norm": 0.1825006604194641, "learning_rate": 0.002, "loss": 2.5841, "step": 24390 }, { "epoch": 0.04861022567894938, "grad_norm": 0.16461920738220215, "learning_rate": 0.002, "loss": 2.567, "step": 24400 }, { "epoch": 0.048630147902588294, "grad_norm": 0.17091667652130127, "learning_rate": 0.002, "loss": 2.5767, "step": 24410 }, { "epoch": 0.048650070126227206, "grad_norm": 0.1860252320766449, "learning_rate": 0.002, "loss": 2.6027, "step": 24420 }, { "epoch": 0.048669992349866124, "grad_norm": 0.20054516196250916, "learning_rate": 0.002, "loss": 2.5834, "step": 24430 }, { "epoch": 0.048689914573505036, "grad_norm": 0.1486617773771286, "learning_rate": 0.002, "loss": 2.5842, "step": 24440 }, { "epoch": 0.04870983679714395, "grad_norm": 0.16211552917957306, "learning_rate": 0.002, "loss": 2.5782, "step": 24450 }, { "epoch": 0.048729759020782866, "grad_norm": 0.20996831357479095, "learning_rate": 0.002, "loss": 2.5879, "step": 24460 }, { "epoch": 0.04874968124442178, "grad_norm": 0.14731888473033905, "learning_rate": 0.002, "loss": 2.588, "step": 24470 }, { "epoch": 0.04876960346806069, "grad_norm": 0.2189418375492096, "learning_rate": 0.002, "loss": 2.5815, "step": 24480 }, { "epoch": 0.04878952569169961, "grad_norm": 0.17440913617610931, "learning_rate": 0.002, "loss": 2.5869, "step": 24490 }, { "epoch": 0.04880944791533852, "grad_norm": 0.1475774198770523, "learning_rate": 0.002, "loss": 2.5852, "step": 24500 }, { "epoch": 0.04882937013897743, "grad_norm": 0.2122229039669037, "learning_rate": 0.002, "loss": 2.6016, "step": 24510 }, { "epoch": 0.04884929236261635, "grad_norm": 0.16954180598258972, "learning_rate": 0.002, "loss": 2.5875, "step": 24520 }, { "epoch": 0.04886921458625526, "grad_norm": 0.1958920806646347, "learning_rate": 0.002, "loss": 2.5917, "step": 24530 }, { "epoch": 0.04888913680989417, "grad_norm": 0.17802025377750397, "learning_rate": 0.002, "loss": 2.5801, "step": 24540 }, { "epoch": 0.048909059033533084, "grad_norm": 0.19145911931991577, "learning_rate": 0.002, "loss": 2.5826, "step": 24550 }, { "epoch": 0.048928981257172, "grad_norm": 0.2031901478767395, "learning_rate": 0.002, "loss": 2.5876, "step": 24560 }, { "epoch": 0.048948903480810914, "grad_norm": 0.1920546591281891, "learning_rate": 0.002, "loss": 2.5829, "step": 24570 }, { "epoch": 0.048968825704449825, "grad_norm": 0.17723295092582703, "learning_rate": 0.002, "loss": 2.6004, "step": 24580 }, { "epoch": 0.048988747928088744, "grad_norm": 0.25530630350112915, "learning_rate": 0.002, "loss": 2.5901, "step": 24590 }, { "epoch": 0.049008670151727655, "grad_norm": 0.15902677178382874, "learning_rate": 0.002, "loss": 2.5896, "step": 24600 }, { "epoch": 0.04902859237536657, "grad_norm": 0.17949655652046204, "learning_rate": 0.002, "loss": 2.589, "step": 24610 }, { "epoch": 0.049048514599005485, "grad_norm": 0.18571776151657104, "learning_rate": 0.002, "loss": 2.5667, "step": 24620 }, { "epoch": 0.0490684368226444, "grad_norm": 0.14886543154716492, "learning_rate": 0.002, "loss": 2.5852, "step": 24630 }, { "epoch": 0.04908835904628331, "grad_norm": 0.22212424874305725, "learning_rate": 0.002, "loss": 2.5911, "step": 24640 }, { "epoch": 0.04910828126992223, "grad_norm": 0.15387725830078125, "learning_rate": 0.002, "loss": 2.5902, "step": 24650 }, { "epoch": 0.04912820349356114, "grad_norm": 0.1661953181028366, "learning_rate": 0.002, "loss": 2.5879, "step": 24660 }, { "epoch": 0.04914812571720005, "grad_norm": 0.1815551072359085, "learning_rate": 0.002, "loss": 2.5915, "step": 24670 }, { "epoch": 0.04916804794083896, "grad_norm": 0.14251118898391724, "learning_rate": 0.002, "loss": 2.5916, "step": 24680 }, { "epoch": 0.04918797016447788, "grad_norm": 0.16351920366287231, "learning_rate": 0.002, "loss": 2.5905, "step": 24690 }, { "epoch": 0.04920789238811679, "grad_norm": 0.17610150575637817, "learning_rate": 0.002, "loss": 2.5778, "step": 24700 }, { "epoch": 0.0492278146117557, "grad_norm": 0.17396074533462524, "learning_rate": 0.002, "loss": 2.589, "step": 24710 }, { "epoch": 0.04924773683539462, "grad_norm": 0.16994431614875793, "learning_rate": 0.002, "loss": 2.6108, "step": 24720 }, { "epoch": 0.04926765905903353, "grad_norm": 0.19316338002681732, "learning_rate": 0.002, "loss": 2.5949, "step": 24730 }, { "epoch": 0.049287581282672445, "grad_norm": 0.19385653734207153, "learning_rate": 0.002, "loss": 2.587, "step": 24740 }, { "epoch": 0.04930750350631136, "grad_norm": 0.18339557945728302, "learning_rate": 0.002, "loss": 2.5794, "step": 24750 }, { "epoch": 0.049327425729950275, "grad_norm": 0.16755086183547974, "learning_rate": 0.002, "loss": 2.5882, "step": 24760 }, { "epoch": 0.049347347953589187, "grad_norm": 0.17408478260040283, "learning_rate": 0.002, "loss": 2.5911, "step": 24770 }, { "epoch": 0.0493672701772281, "grad_norm": 0.19835881888866425, "learning_rate": 0.002, "loss": 2.575, "step": 24780 }, { "epoch": 0.04938719240086702, "grad_norm": 0.1670958399772644, "learning_rate": 0.002, "loss": 2.5688, "step": 24790 }, { "epoch": 0.04940711462450593, "grad_norm": 0.1565229594707489, "learning_rate": 0.002, "loss": 2.5631, "step": 24800 }, { "epoch": 0.04942703684814484, "grad_norm": 0.202510803937912, "learning_rate": 0.002, "loss": 2.571, "step": 24810 }, { "epoch": 0.04944695907178376, "grad_norm": 0.18133379518985748, "learning_rate": 0.002, "loss": 2.5755, "step": 24820 }, { "epoch": 0.04946688129542267, "grad_norm": 0.1685137301683426, "learning_rate": 0.002, "loss": 2.5873, "step": 24830 }, { "epoch": 0.04948680351906158, "grad_norm": 0.23017580807209015, "learning_rate": 0.002, "loss": 2.5813, "step": 24840 }, { "epoch": 0.0495067257427005, "grad_norm": 0.15389329195022583, "learning_rate": 0.002, "loss": 2.5707, "step": 24850 }, { "epoch": 0.04952664796633941, "grad_norm": 0.16403500735759735, "learning_rate": 0.002, "loss": 2.5903, "step": 24860 }, { "epoch": 0.04954657018997832, "grad_norm": 0.2254001796245575, "learning_rate": 0.002, "loss": 2.5604, "step": 24870 }, { "epoch": 0.04956649241361724, "grad_norm": 0.17962957918643951, "learning_rate": 0.002, "loss": 2.5864, "step": 24880 }, { "epoch": 0.04958641463725615, "grad_norm": 0.18859483301639557, "learning_rate": 0.002, "loss": 2.5859, "step": 24890 }, { "epoch": 0.049606336860895064, "grad_norm": 0.15022322535514832, "learning_rate": 0.002, "loss": 2.5743, "step": 24900 }, { "epoch": 0.049626259084533976, "grad_norm": 0.18981017172336578, "learning_rate": 0.002, "loss": 2.5753, "step": 24910 }, { "epoch": 0.049646181308172894, "grad_norm": 0.16894127428531647, "learning_rate": 0.002, "loss": 2.576, "step": 24920 }, { "epoch": 0.049666103531811806, "grad_norm": 0.1588817983865738, "learning_rate": 0.002, "loss": 2.5879, "step": 24930 }, { "epoch": 0.04968602575545072, "grad_norm": 0.16947126388549805, "learning_rate": 0.002, "loss": 2.583, "step": 24940 }, { "epoch": 0.049705947979089636, "grad_norm": 0.21782030165195465, "learning_rate": 0.002, "loss": 2.5985, "step": 24950 }, { "epoch": 0.04972587020272855, "grad_norm": 0.150143563747406, "learning_rate": 0.002, "loss": 2.5803, "step": 24960 }, { "epoch": 0.04974579242636746, "grad_norm": 0.19521087408065796, "learning_rate": 0.002, "loss": 2.5781, "step": 24970 }, { "epoch": 0.04976571465000638, "grad_norm": 0.17332391440868378, "learning_rate": 0.002, "loss": 2.5793, "step": 24980 }, { "epoch": 0.04978563687364529, "grad_norm": 0.20413702726364136, "learning_rate": 0.002, "loss": 2.5887, "step": 24990 }, { "epoch": 0.0498055590972842, "grad_norm": 0.18014580011367798, "learning_rate": 0.002, "loss": 2.601, "step": 25000 }, { "epoch": 0.04982548132092312, "grad_norm": 0.13668815791606903, "learning_rate": 0.002, "loss": 2.5792, "step": 25010 }, { "epoch": 0.04984540354456203, "grad_norm": 0.16792595386505127, "learning_rate": 0.002, "loss": 2.582, "step": 25020 }, { "epoch": 0.04986532576820094, "grad_norm": 0.19885192811489105, "learning_rate": 0.002, "loss": 2.5871, "step": 25030 }, { "epoch": 0.049885247991839854, "grad_norm": 0.18150164186954498, "learning_rate": 0.002, "loss": 2.592, "step": 25040 }, { "epoch": 0.04990517021547877, "grad_norm": 0.17435866594314575, "learning_rate": 0.002, "loss": 2.5873, "step": 25050 }, { "epoch": 0.049925092439117684, "grad_norm": 0.1516008973121643, "learning_rate": 0.002, "loss": 2.5683, "step": 25060 }, { "epoch": 0.049945014662756596, "grad_norm": 0.15737250447273254, "learning_rate": 0.002, "loss": 2.6004, "step": 25070 }, { "epoch": 0.049964936886395514, "grad_norm": 0.19594356417655945, "learning_rate": 0.002, "loss": 2.5877, "step": 25080 }, { "epoch": 0.049984859110034426, "grad_norm": 0.20839481055736542, "learning_rate": 0.002, "loss": 2.5882, "step": 25090 }, { "epoch": 0.05000478133367334, "grad_norm": 0.17959342896938324, "learning_rate": 0.002, "loss": 2.5836, "step": 25100 }, { "epoch": 0.050024703557312256, "grad_norm": 0.14394979178905487, "learning_rate": 0.002, "loss": 2.5928, "step": 25110 }, { "epoch": 0.05004462578095117, "grad_norm": 0.1690848469734192, "learning_rate": 0.002, "loss": 2.5877, "step": 25120 }, { "epoch": 0.05006454800459008, "grad_norm": 0.18983981013298035, "learning_rate": 0.002, "loss": 2.5799, "step": 25130 }, { "epoch": 0.050084470228229, "grad_norm": 0.1861996054649353, "learning_rate": 0.002, "loss": 2.5789, "step": 25140 }, { "epoch": 0.05010439245186791, "grad_norm": 0.17483431100845337, "learning_rate": 0.002, "loss": 2.5766, "step": 25150 }, { "epoch": 0.05012431467550682, "grad_norm": 0.1500418484210968, "learning_rate": 0.002, "loss": 2.5911, "step": 25160 }, { "epoch": 0.05014423689914573, "grad_norm": 0.17068277299404144, "learning_rate": 0.002, "loss": 2.5918, "step": 25170 }, { "epoch": 0.05016415912278465, "grad_norm": 0.17070074379444122, "learning_rate": 0.002, "loss": 2.5938, "step": 25180 }, { "epoch": 0.05018408134642356, "grad_norm": 0.18782353401184082, "learning_rate": 0.002, "loss": 2.5901, "step": 25190 }, { "epoch": 0.05020400357006247, "grad_norm": 0.16063429415225983, "learning_rate": 0.002, "loss": 2.5961, "step": 25200 }, { "epoch": 0.05022392579370139, "grad_norm": 0.15217016637325287, "learning_rate": 0.002, "loss": 2.5977, "step": 25210 }, { "epoch": 0.050243848017340303, "grad_norm": 0.19474220275878906, "learning_rate": 0.002, "loss": 2.5927, "step": 25220 }, { "epoch": 0.050263770240979215, "grad_norm": 0.17349663376808167, "learning_rate": 0.002, "loss": 2.5802, "step": 25230 }, { "epoch": 0.050283692464618134, "grad_norm": 0.1680254340171814, "learning_rate": 0.002, "loss": 2.5856, "step": 25240 }, { "epoch": 0.050303614688257045, "grad_norm": 0.1705658733844757, "learning_rate": 0.002, "loss": 2.599, "step": 25250 }, { "epoch": 0.05032353691189596, "grad_norm": 0.18079416453838348, "learning_rate": 0.002, "loss": 2.6034, "step": 25260 }, { "epoch": 0.050343459135534875, "grad_norm": 0.1913960874080658, "learning_rate": 0.002, "loss": 2.5836, "step": 25270 }, { "epoch": 0.05036338135917379, "grad_norm": 0.18809638917446136, "learning_rate": 0.002, "loss": 2.5832, "step": 25280 }, { "epoch": 0.0503833035828127, "grad_norm": 0.16594800353050232, "learning_rate": 0.002, "loss": 2.5873, "step": 25290 }, { "epoch": 0.05040322580645161, "grad_norm": 0.1617114394903183, "learning_rate": 0.002, "loss": 2.5937, "step": 25300 }, { "epoch": 0.05042314803009053, "grad_norm": 0.1891014724969864, "learning_rate": 0.002, "loss": 2.588, "step": 25310 }, { "epoch": 0.05044307025372944, "grad_norm": 0.17116136848926544, "learning_rate": 0.002, "loss": 2.5874, "step": 25320 }, { "epoch": 0.05046299247736835, "grad_norm": 0.16428524255752563, "learning_rate": 0.002, "loss": 2.5898, "step": 25330 }, { "epoch": 0.05048291470100727, "grad_norm": 0.18272866308689117, "learning_rate": 0.002, "loss": 2.5857, "step": 25340 }, { "epoch": 0.05050283692464618, "grad_norm": 0.16551771759986877, "learning_rate": 0.002, "loss": 2.5933, "step": 25350 }, { "epoch": 0.05052275914828509, "grad_norm": 0.15667682886123657, "learning_rate": 0.002, "loss": 2.5724, "step": 25360 }, { "epoch": 0.05054268137192401, "grad_norm": 0.14328935742378235, "learning_rate": 0.002, "loss": 2.5819, "step": 25370 }, { "epoch": 0.05056260359556292, "grad_norm": 0.27022939920425415, "learning_rate": 0.002, "loss": 2.5944, "step": 25380 }, { "epoch": 0.050582525819201835, "grad_norm": 0.17435622215270996, "learning_rate": 0.002, "loss": 2.5938, "step": 25390 }, { "epoch": 0.05060244804284075, "grad_norm": 0.1872229278087616, "learning_rate": 0.002, "loss": 2.593, "step": 25400 }, { "epoch": 0.050622370266479665, "grad_norm": 0.16011635959148407, "learning_rate": 0.002, "loss": 2.5986, "step": 25410 }, { "epoch": 0.050642292490118576, "grad_norm": 0.22536700963974, "learning_rate": 0.002, "loss": 2.5946, "step": 25420 }, { "epoch": 0.05066221471375749, "grad_norm": 0.16946011781692505, "learning_rate": 0.002, "loss": 2.5949, "step": 25430 }, { "epoch": 0.050682136937396406, "grad_norm": 0.18818505108356476, "learning_rate": 0.002, "loss": 2.5794, "step": 25440 }, { "epoch": 0.05070205916103532, "grad_norm": 0.19655200839042664, "learning_rate": 0.002, "loss": 2.5789, "step": 25450 }, { "epoch": 0.05072198138467423, "grad_norm": 0.2117767184972763, "learning_rate": 0.002, "loss": 2.5726, "step": 25460 }, { "epoch": 0.05074190360831315, "grad_norm": 0.185683473944664, "learning_rate": 0.002, "loss": 2.5938, "step": 25470 }, { "epoch": 0.05076182583195206, "grad_norm": 0.15687204897403717, "learning_rate": 0.002, "loss": 2.5839, "step": 25480 }, { "epoch": 0.05078174805559097, "grad_norm": 0.1658901572227478, "learning_rate": 0.002, "loss": 2.5804, "step": 25490 }, { "epoch": 0.05080167027922989, "grad_norm": 0.1895565688610077, "learning_rate": 0.002, "loss": 2.5782, "step": 25500 }, { "epoch": 0.0508215925028688, "grad_norm": 0.18305586278438568, "learning_rate": 0.002, "loss": 2.5839, "step": 25510 }, { "epoch": 0.05084151472650771, "grad_norm": 0.16314877569675446, "learning_rate": 0.002, "loss": 2.5613, "step": 25520 }, { "epoch": 0.05086143695014663, "grad_norm": 0.16315428912639618, "learning_rate": 0.002, "loss": 2.5781, "step": 25530 }, { "epoch": 0.05088135917378554, "grad_norm": 0.19321344792842865, "learning_rate": 0.002, "loss": 2.6013, "step": 25540 }, { "epoch": 0.050901281397424454, "grad_norm": 0.17319591343402863, "learning_rate": 0.002, "loss": 2.5759, "step": 25550 }, { "epoch": 0.050921203621063366, "grad_norm": 0.1536049097776413, "learning_rate": 0.002, "loss": 2.5956, "step": 25560 }, { "epoch": 0.050941125844702284, "grad_norm": 0.2078230232000351, "learning_rate": 0.002, "loss": 2.5869, "step": 25570 }, { "epoch": 0.050961048068341196, "grad_norm": 0.19189807772636414, "learning_rate": 0.002, "loss": 2.5888, "step": 25580 }, { "epoch": 0.05098097029198011, "grad_norm": 0.16677525639533997, "learning_rate": 0.002, "loss": 2.5702, "step": 25590 }, { "epoch": 0.051000892515619026, "grad_norm": 0.21530921757221222, "learning_rate": 0.002, "loss": 2.6011, "step": 25600 }, { "epoch": 0.05102081473925794, "grad_norm": 0.17290465533733368, "learning_rate": 0.002, "loss": 2.5945, "step": 25610 }, { "epoch": 0.05104073696289685, "grad_norm": 0.18910099565982819, "learning_rate": 0.002, "loss": 2.5759, "step": 25620 }, { "epoch": 0.05106065918653577, "grad_norm": 0.2135501205921173, "learning_rate": 0.002, "loss": 2.5961, "step": 25630 }, { "epoch": 0.05108058141017468, "grad_norm": 0.16934143006801605, "learning_rate": 0.002, "loss": 2.5885, "step": 25640 }, { "epoch": 0.05110050363381359, "grad_norm": 0.15042859315872192, "learning_rate": 0.002, "loss": 2.5873, "step": 25650 }, { "epoch": 0.0511204258574525, "grad_norm": 0.1765710413455963, "learning_rate": 0.002, "loss": 2.5813, "step": 25660 }, { "epoch": 0.05114034808109142, "grad_norm": 0.21056270599365234, "learning_rate": 0.002, "loss": 2.579, "step": 25670 }, { "epoch": 0.05116027030473033, "grad_norm": 0.15807652473449707, "learning_rate": 0.002, "loss": 2.5881, "step": 25680 }, { "epoch": 0.051180192528369244, "grad_norm": 0.16475674510002136, "learning_rate": 0.002, "loss": 2.6142, "step": 25690 }, { "epoch": 0.05120011475200816, "grad_norm": 0.19532042741775513, "learning_rate": 0.002, "loss": 2.5915, "step": 25700 }, { "epoch": 0.051220036975647074, "grad_norm": 0.1740802377462387, "learning_rate": 0.002, "loss": 2.5824, "step": 25710 }, { "epoch": 0.051239959199285985, "grad_norm": 0.17941918969154358, "learning_rate": 0.002, "loss": 2.5863, "step": 25720 }, { "epoch": 0.051259881422924904, "grad_norm": 0.1853962391614914, "learning_rate": 0.002, "loss": 2.585, "step": 25730 }, { "epoch": 0.051279803646563815, "grad_norm": 0.1681087613105774, "learning_rate": 0.002, "loss": 2.5787, "step": 25740 }, { "epoch": 0.05129972587020273, "grad_norm": 0.15189911425113678, "learning_rate": 0.002, "loss": 2.5877, "step": 25750 }, { "epoch": 0.051319648093841645, "grad_norm": 0.19549494981765747, "learning_rate": 0.002, "loss": 2.5849, "step": 25760 }, { "epoch": 0.05133957031748056, "grad_norm": 0.18323145806789398, "learning_rate": 0.002, "loss": 2.5971, "step": 25770 }, { "epoch": 0.05135949254111947, "grad_norm": 0.18970713019371033, "learning_rate": 0.002, "loss": 2.5919, "step": 25780 }, { "epoch": 0.05137941476475838, "grad_norm": 0.19826935231685638, "learning_rate": 0.002, "loss": 2.5889, "step": 25790 }, { "epoch": 0.0513993369883973, "grad_norm": 0.16678887605667114, "learning_rate": 0.002, "loss": 2.5943, "step": 25800 }, { "epoch": 0.05141925921203621, "grad_norm": 0.15008628368377686, "learning_rate": 0.002, "loss": 2.5874, "step": 25810 }, { "epoch": 0.05143918143567512, "grad_norm": 0.18459023535251617, "learning_rate": 0.002, "loss": 2.5862, "step": 25820 }, { "epoch": 0.05145910365931404, "grad_norm": 0.23595306277275085, "learning_rate": 0.002, "loss": 2.5892, "step": 25830 }, { "epoch": 0.05147902588295295, "grad_norm": 0.16279961168766022, "learning_rate": 0.002, "loss": 2.593, "step": 25840 }, { "epoch": 0.05149894810659186, "grad_norm": 0.153792604804039, "learning_rate": 0.002, "loss": 2.5873, "step": 25850 }, { "epoch": 0.05151887033023078, "grad_norm": 0.1734774112701416, "learning_rate": 0.002, "loss": 2.584, "step": 25860 }, { "epoch": 0.05153879255386969, "grad_norm": 0.1738571971654892, "learning_rate": 0.002, "loss": 2.602, "step": 25870 }, { "epoch": 0.051558714777508605, "grad_norm": 0.16350623965263367, "learning_rate": 0.002, "loss": 2.5992, "step": 25880 }, { "epoch": 0.05157863700114752, "grad_norm": 0.17955684661865234, "learning_rate": 0.002, "loss": 2.5759, "step": 25890 }, { "epoch": 0.051598559224786435, "grad_norm": 0.1553955078125, "learning_rate": 0.002, "loss": 2.594, "step": 25900 }, { "epoch": 0.051618481448425346, "grad_norm": 0.16716012358665466, "learning_rate": 0.002, "loss": 2.5896, "step": 25910 }, { "epoch": 0.05163840367206426, "grad_norm": 0.19496017694473267, "learning_rate": 0.002, "loss": 2.5836, "step": 25920 }, { "epoch": 0.051658325895703176, "grad_norm": 0.18759676814079285, "learning_rate": 0.002, "loss": 2.5758, "step": 25930 }, { "epoch": 0.05167824811934209, "grad_norm": 0.1561720222234726, "learning_rate": 0.002, "loss": 2.5885, "step": 25940 }, { "epoch": 0.051698170342981, "grad_norm": 0.21548707783222198, "learning_rate": 0.002, "loss": 2.5982, "step": 25950 }, { "epoch": 0.05171809256661992, "grad_norm": 0.14473474025726318, "learning_rate": 0.002, "loss": 2.5879, "step": 25960 }, { "epoch": 0.05173801479025883, "grad_norm": 0.22746531665325165, "learning_rate": 0.002, "loss": 2.5774, "step": 25970 }, { "epoch": 0.05175793701389774, "grad_norm": 0.17993292212486267, "learning_rate": 0.002, "loss": 2.5938, "step": 25980 }, { "epoch": 0.05177785923753666, "grad_norm": 0.19473715126514435, "learning_rate": 0.002, "loss": 2.6024, "step": 25990 }, { "epoch": 0.05179778146117557, "grad_norm": 0.14493641257286072, "learning_rate": 0.002, "loss": 2.5907, "step": 26000 }, { "epoch": 0.05181770368481448, "grad_norm": 0.17448951303958893, "learning_rate": 0.002, "loss": 2.5771, "step": 26010 }, { "epoch": 0.0518376259084534, "grad_norm": 0.1831733137369156, "learning_rate": 0.002, "loss": 2.5663, "step": 26020 }, { "epoch": 0.05185754813209231, "grad_norm": 0.1464269757270813, "learning_rate": 0.002, "loss": 2.5861, "step": 26030 }, { "epoch": 0.051877470355731224, "grad_norm": 0.14436112344264984, "learning_rate": 0.002, "loss": 2.5851, "step": 26040 }, { "epoch": 0.051897392579370136, "grad_norm": 0.19052432477474213, "learning_rate": 0.002, "loss": 2.5987, "step": 26050 }, { "epoch": 0.051917314803009054, "grad_norm": 0.15412719547748566, "learning_rate": 0.002, "loss": 2.5822, "step": 26060 }, { "epoch": 0.051937237026647966, "grad_norm": 0.17996132373809814, "learning_rate": 0.002, "loss": 2.5684, "step": 26070 }, { "epoch": 0.05195715925028688, "grad_norm": 0.20504295825958252, "learning_rate": 0.002, "loss": 2.583, "step": 26080 }, { "epoch": 0.051977081473925796, "grad_norm": 0.16501270234584808, "learning_rate": 0.002, "loss": 2.5932, "step": 26090 }, { "epoch": 0.05199700369756471, "grad_norm": 0.15646688640117645, "learning_rate": 0.002, "loss": 2.5889, "step": 26100 }, { "epoch": 0.05201692592120362, "grad_norm": 0.21287354826927185, "learning_rate": 0.002, "loss": 2.5946, "step": 26110 }, { "epoch": 0.05203684814484254, "grad_norm": 0.14568151533603668, "learning_rate": 0.002, "loss": 2.5864, "step": 26120 }, { "epoch": 0.05205677036848145, "grad_norm": 0.2196943312883377, "learning_rate": 0.002, "loss": 2.5769, "step": 26130 }, { "epoch": 0.05207669259212036, "grad_norm": 0.14329610764980316, "learning_rate": 0.002, "loss": 2.5941, "step": 26140 }, { "epoch": 0.05209661481575928, "grad_norm": 0.2014036774635315, "learning_rate": 0.002, "loss": 2.5924, "step": 26150 }, { "epoch": 0.05211653703939819, "grad_norm": 0.18119095265865326, "learning_rate": 0.002, "loss": 2.5854, "step": 26160 }, { "epoch": 0.0521364592630371, "grad_norm": 0.16072671115398407, "learning_rate": 0.002, "loss": 2.5823, "step": 26170 }, { "epoch": 0.052156381486676014, "grad_norm": 0.17593269050121307, "learning_rate": 0.002, "loss": 2.5875, "step": 26180 }, { "epoch": 0.05217630371031493, "grad_norm": 0.16769422590732574, "learning_rate": 0.002, "loss": 2.6076, "step": 26190 }, { "epoch": 0.052196225933953844, "grad_norm": 0.18625515699386597, "learning_rate": 0.002, "loss": 2.5905, "step": 26200 }, { "epoch": 0.052216148157592755, "grad_norm": 0.17765553295612335, "learning_rate": 0.002, "loss": 2.5915, "step": 26210 }, { "epoch": 0.052236070381231674, "grad_norm": 0.17046505212783813, "learning_rate": 0.002, "loss": 2.5953, "step": 26220 }, { "epoch": 0.052255992604870585, "grad_norm": 0.20628562569618225, "learning_rate": 0.002, "loss": 2.5805, "step": 26230 }, { "epoch": 0.0522759148285095, "grad_norm": 0.2032381296157837, "learning_rate": 0.002, "loss": 2.5698, "step": 26240 }, { "epoch": 0.052295837052148415, "grad_norm": 0.17751455307006836, "learning_rate": 0.002, "loss": 2.5863, "step": 26250 }, { "epoch": 0.05231575927578733, "grad_norm": 0.15800514817237854, "learning_rate": 0.002, "loss": 2.5889, "step": 26260 }, { "epoch": 0.05233568149942624, "grad_norm": 0.1930209845304489, "learning_rate": 0.002, "loss": 2.5901, "step": 26270 }, { "epoch": 0.05235560372306516, "grad_norm": 0.182341068983078, "learning_rate": 0.002, "loss": 2.5631, "step": 26280 }, { "epoch": 0.05237552594670407, "grad_norm": 0.16199621558189392, "learning_rate": 0.002, "loss": 2.6025, "step": 26290 }, { "epoch": 0.05239544817034298, "grad_norm": 0.19584019482135773, "learning_rate": 0.002, "loss": 2.5938, "step": 26300 }, { "epoch": 0.05241537039398189, "grad_norm": 0.15301001071929932, "learning_rate": 0.002, "loss": 2.595, "step": 26310 }, { "epoch": 0.05243529261762081, "grad_norm": 0.17750979959964752, "learning_rate": 0.002, "loss": 2.569, "step": 26320 }, { "epoch": 0.05245521484125972, "grad_norm": 0.20109204947948456, "learning_rate": 0.002, "loss": 2.5964, "step": 26330 }, { "epoch": 0.05247513706489863, "grad_norm": 0.17937342822551727, "learning_rate": 0.002, "loss": 2.5767, "step": 26340 }, { "epoch": 0.05249505928853755, "grad_norm": 0.17924615740776062, "learning_rate": 0.002, "loss": 2.5815, "step": 26350 }, { "epoch": 0.05251498151217646, "grad_norm": 0.18396446108818054, "learning_rate": 0.002, "loss": 2.5928, "step": 26360 }, { "epoch": 0.052534903735815375, "grad_norm": 0.188653364777565, "learning_rate": 0.002, "loss": 2.5824, "step": 26370 }, { "epoch": 0.05255482595945429, "grad_norm": 0.18347379565238953, "learning_rate": 0.002, "loss": 2.5868, "step": 26380 }, { "epoch": 0.052574748183093205, "grad_norm": 0.16903477907180786, "learning_rate": 0.002, "loss": 2.5913, "step": 26390 }, { "epoch": 0.052594670406732116, "grad_norm": 0.16857348382472992, "learning_rate": 0.002, "loss": 2.5835, "step": 26400 }, { "epoch": 0.052614592630371035, "grad_norm": 0.17319035530090332, "learning_rate": 0.002, "loss": 2.5892, "step": 26410 }, { "epoch": 0.052634514854009946, "grad_norm": 0.17610569298267365, "learning_rate": 0.002, "loss": 2.579, "step": 26420 }, { "epoch": 0.05265443707764886, "grad_norm": 0.17314402759075165, "learning_rate": 0.002, "loss": 2.5809, "step": 26430 }, { "epoch": 0.05267435930128777, "grad_norm": 0.18066439032554626, "learning_rate": 0.002, "loss": 2.5888, "step": 26440 }, { "epoch": 0.05269428152492669, "grad_norm": 0.16608786582946777, "learning_rate": 0.002, "loss": 2.5803, "step": 26450 }, { "epoch": 0.0527142037485656, "grad_norm": 0.19349928200244904, "learning_rate": 0.002, "loss": 2.5903, "step": 26460 }, { "epoch": 0.05273412597220451, "grad_norm": 0.1802690029144287, "learning_rate": 0.002, "loss": 2.5891, "step": 26470 }, { "epoch": 0.05275404819584343, "grad_norm": 0.1721189320087433, "learning_rate": 0.002, "loss": 2.5728, "step": 26480 }, { "epoch": 0.05277397041948234, "grad_norm": 0.17412064969539642, "learning_rate": 0.002, "loss": 2.5868, "step": 26490 }, { "epoch": 0.05279389264312125, "grad_norm": 0.16093963384628296, "learning_rate": 0.002, "loss": 2.5773, "step": 26500 }, { "epoch": 0.05281381486676017, "grad_norm": 0.18706223368644714, "learning_rate": 0.002, "loss": 2.5767, "step": 26510 }, { "epoch": 0.05283373709039908, "grad_norm": 0.1728285253047943, "learning_rate": 0.002, "loss": 2.5796, "step": 26520 }, { "epoch": 0.052853659314037994, "grad_norm": 0.16998596489429474, "learning_rate": 0.002, "loss": 2.5801, "step": 26530 }, { "epoch": 0.05287358153767691, "grad_norm": 0.16728751361370087, "learning_rate": 0.002, "loss": 2.5681, "step": 26540 }, { "epoch": 0.052893503761315824, "grad_norm": 0.2203172743320465, "learning_rate": 0.002, "loss": 2.5917, "step": 26550 }, { "epoch": 0.052913425984954736, "grad_norm": 0.1594330072402954, "learning_rate": 0.002, "loss": 2.5942, "step": 26560 }, { "epoch": 0.05293334820859365, "grad_norm": 0.18815694749355316, "learning_rate": 0.002, "loss": 2.5832, "step": 26570 }, { "epoch": 0.052953270432232566, "grad_norm": 0.1746845245361328, "learning_rate": 0.002, "loss": 2.5854, "step": 26580 }, { "epoch": 0.05297319265587148, "grad_norm": 0.1772923320531845, "learning_rate": 0.002, "loss": 2.5859, "step": 26590 }, { "epoch": 0.05299311487951039, "grad_norm": 0.15547777712345123, "learning_rate": 0.002, "loss": 2.5889, "step": 26600 }, { "epoch": 0.05301303710314931, "grad_norm": 0.1790527105331421, "learning_rate": 0.002, "loss": 2.5962, "step": 26610 }, { "epoch": 0.05303295932678822, "grad_norm": 0.1595131754875183, "learning_rate": 0.002, "loss": 2.6015, "step": 26620 }, { "epoch": 0.05305288155042713, "grad_norm": 0.16760863363742828, "learning_rate": 0.002, "loss": 2.5909, "step": 26630 }, { "epoch": 0.05307280377406605, "grad_norm": 0.23167455196380615, "learning_rate": 0.002, "loss": 2.5733, "step": 26640 }, { "epoch": 0.05309272599770496, "grad_norm": 0.15466134250164032, "learning_rate": 0.002, "loss": 2.5997, "step": 26650 }, { "epoch": 0.05311264822134387, "grad_norm": 0.1929693967103958, "learning_rate": 0.002, "loss": 2.5807, "step": 26660 }, { "epoch": 0.053132570444982784, "grad_norm": 0.19239205121994019, "learning_rate": 0.002, "loss": 2.5938, "step": 26670 }, { "epoch": 0.0531524926686217, "grad_norm": 0.16267241537570953, "learning_rate": 0.002, "loss": 2.5997, "step": 26680 }, { "epoch": 0.053172414892260614, "grad_norm": 0.14673656225204468, "learning_rate": 0.002, "loss": 2.5676, "step": 26690 }, { "epoch": 0.053192337115899525, "grad_norm": 0.16087886691093445, "learning_rate": 0.002, "loss": 2.5916, "step": 26700 }, { "epoch": 0.053212259339538444, "grad_norm": 0.15172748267650604, "learning_rate": 0.002, "loss": 2.5884, "step": 26710 }, { "epoch": 0.053232181563177355, "grad_norm": 0.17716602981090546, "learning_rate": 0.002, "loss": 2.5835, "step": 26720 }, { "epoch": 0.05325210378681627, "grad_norm": 0.14532752335071564, "learning_rate": 0.002, "loss": 2.5804, "step": 26730 }, { "epoch": 0.053272026010455185, "grad_norm": 0.17473500967025757, "learning_rate": 0.002, "loss": 2.5933, "step": 26740 }, { "epoch": 0.0532919482340941, "grad_norm": 0.16648724675178528, "learning_rate": 0.002, "loss": 2.5922, "step": 26750 }, { "epoch": 0.05331187045773301, "grad_norm": 0.1530217081308365, "learning_rate": 0.002, "loss": 2.5891, "step": 26760 }, { "epoch": 0.05333179268137193, "grad_norm": 0.19272395968437195, "learning_rate": 0.002, "loss": 2.5936, "step": 26770 }, { "epoch": 0.05335171490501084, "grad_norm": 0.15654005110263824, "learning_rate": 0.002, "loss": 2.5913, "step": 26780 }, { "epoch": 0.05337163712864975, "grad_norm": 0.22502845525741577, "learning_rate": 0.002, "loss": 2.5937, "step": 26790 }, { "epoch": 0.05339155935228866, "grad_norm": 0.15353773534297943, "learning_rate": 0.002, "loss": 2.5681, "step": 26800 }, { "epoch": 0.05341148157592758, "grad_norm": 0.1737641990184784, "learning_rate": 0.002, "loss": 2.6023, "step": 26810 }, { "epoch": 0.05343140379956649, "grad_norm": 0.15039856731891632, "learning_rate": 0.002, "loss": 2.5809, "step": 26820 }, { "epoch": 0.0534513260232054, "grad_norm": 0.1769665628671646, "learning_rate": 0.002, "loss": 2.5649, "step": 26830 }, { "epoch": 0.05347124824684432, "grad_norm": 0.145414799451828, "learning_rate": 0.002, "loss": 2.5824, "step": 26840 }, { "epoch": 0.05349117047048323, "grad_norm": 0.16635282337665558, "learning_rate": 0.002, "loss": 2.5727, "step": 26850 }, { "epoch": 0.053511092694122145, "grad_norm": 0.163896381855011, "learning_rate": 0.002, "loss": 2.598, "step": 26860 }, { "epoch": 0.05353101491776106, "grad_norm": 0.17680923640727997, "learning_rate": 0.002, "loss": 2.5993, "step": 26870 }, { "epoch": 0.053550937141399975, "grad_norm": 0.16445690393447876, "learning_rate": 0.002, "loss": 2.5835, "step": 26880 }, { "epoch": 0.053570859365038886, "grad_norm": 0.16515816748142242, "learning_rate": 0.002, "loss": 2.5788, "step": 26890 }, { "epoch": 0.053590781588677805, "grad_norm": 0.1824319064617157, "learning_rate": 0.002, "loss": 2.5746, "step": 26900 }, { "epoch": 0.053610703812316716, "grad_norm": 0.22251443564891815, "learning_rate": 0.002, "loss": 2.5754, "step": 26910 }, { "epoch": 0.05363062603595563, "grad_norm": 0.1616549789905548, "learning_rate": 0.002, "loss": 2.5967, "step": 26920 }, { "epoch": 0.05365054825959454, "grad_norm": 0.16562369465827942, "learning_rate": 0.002, "loss": 2.5875, "step": 26930 }, { "epoch": 0.05367047048323346, "grad_norm": 0.1797037571668625, "learning_rate": 0.002, "loss": 2.5562, "step": 26940 }, { "epoch": 0.05369039270687237, "grad_norm": 0.18260033428668976, "learning_rate": 0.002, "loss": 2.5857, "step": 26950 }, { "epoch": 0.05371031493051128, "grad_norm": 0.18521186709403992, "learning_rate": 0.002, "loss": 2.5933, "step": 26960 }, { "epoch": 0.0537302371541502, "grad_norm": 0.15061572194099426, "learning_rate": 0.002, "loss": 2.5974, "step": 26970 }, { "epoch": 0.05375015937778911, "grad_norm": 0.20127591490745544, "learning_rate": 0.002, "loss": 2.578, "step": 26980 }, { "epoch": 0.05377008160142802, "grad_norm": 0.1700388491153717, "learning_rate": 0.002, "loss": 2.6134, "step": 26990 }, { "epoch": 0.05379000382506694, "grad_norm": 0.16281862556934357, "learning_rate": 0.002, "loss": 2.5615, "step": 27000 }, { "epoch": 0.05380992604870585, "grad_norm": 0.1503276377916336, "learning_rate": 0.002, "loss": 2.5836, "step": 27010 }, { "epoch": 0.053829848272344764, "grad_norm": 0.1818704456090927, "learning_rate": 0.002, "loss": 2.5736, "step": 27020 }, { "epoch": 0.05384977049598368, "grad_norm": 0.1714886873960495, "learning_rate": 0.002, "loss": 2.5928, "step": 27030 }, { "epoch": 0.053869692719622594, "grad_norm": 0.15380766987800598, "learning_rate": 0.002, "loss": 2.5733, "step": 27040 }, { "epoch": 0.053889614943261506, "grad_norm": 0.17924846708774567, "learning_rate": 0.002, "loss": 2.5761, "step": 27050 }, { "epoch": 0.05390953716690042, "grad_norm": 0.18359507620334625, "learning_rate": 0.002, "loss": 2.5874, "step": 27060 }, { "epoch": 0.053929459390539336, "grad_norm": 0.181449294090271, "learning_rate": 0.002, "loss": 2.5973, "step": 27070 }, { "epoch": 0.05394938161417825, "grad_norm": 0.17731288075447083, "learning_rate": 0.002, "loss": 2.5735, "step": 27080 }, { "epoch": 0.05396930383781716, "grad_norm": 0.1784643530845642, "learning_rate": 0.002, "loss": 2.5906, "step": 27090 }, { "epoch": 0.05398922606145608, "grad_norm": 0.1850927174091339, "learning_rate": 0.002, "loss": 2.5874, "step": 27100 }, { "epoch": 0.05400914828509499, "grad_norm": 0.1454775035381317, "learning_rate": 0.002, "loss": 2.5755, "step": 27110 }, { "epoch": 0.0540290705087339, "grad_norm": 0.1646062433719635, "learning_rate": 0.002, "loss": 2.5807, "step": 27120 }, { "epoch": 0.05404899273237282, "grad_norm": 0.15166673064231873, "learning_rate": 0.002, "loss": 2.5946, "step": 27130 }, { "epoch": 0.05406891495601173, "grad_norm": 0.15450307726860046, "learning_rate": 0.002, "loss": 2.5804, "step": 27140 }, { "epoch": 0.05408883717965064, "grad_norm": 0.16186688840389252, "learning_rate": 0.002, "loss": 2.576, "step": 27150 }, { "epoch": 0.05410875940328956, "grad_norm": 0.17045289278030396, "learning_rate": 0.002, "loss": 2.5981, "step": 27160 }, { "epoch": 0.05412868162692847, "grad_norm": 0.2018124908208847, "learning_rate": 0.002, "loss": 2.5832, "step": 27170 }, { "epoch": 0.054148603850567384, "grad_norm": 0.1885221004486084, "learning_rate": 0.002, "loss": 2.5911, "step": 27180 }, { "epoch": 0.054168526074206295, "grad_norm": 0.1664811074733734, "learning_rate": 0.002, "loss": 2.5899, "step": 27190 }, { "epoch": 0.054188448297845214, "grad_norm": 0.15365439653396606, "learning_rate": 0.002, "loss": 2.5847, "step": 27200 }, { "epoch": 0.054208370521484125, "grad_norm": 0.14780300855636597, "learning_rate": 0.002, "loss": 2.5655, "step": 27210 }, { "epoch": 0.05422829274512304, "grad_norm": 0.20918594300746918, "learning_rate": 0.002, "loss": 2.5963, "step": 27220 }, { "epoch": 0.054248214968761956, "grad_norm": 0.15025271475315094, "learning_rate": 0.002, "loss": 2.5693, "step": 27230 }, { "epoch": 0.05426813719240087, "grad_norm": 0.18210697174072266, "learning_rate": 0.002, "loss": 2.6004, "step": 27240 }, { "epoch": 0.05428805941603978, "grad_norm": 0.15297502279281616, "learning_rate": 0.002, "loss": 2.5953, "step": 27250 }, { "epoch": 0.0543079816396787, "grad_norm": 0.17326349020004272, "learning_rate": 0.002, "loss": 2.5686, "step": 27260 }, { "epoch": 0.05432790386331761, "grad_norm": 0.18129613995552063, "learning_rate": 0.002, "loss": 2.6033, "step": 27270 }, { "epoch": 0.05434782608695652, "grad_norm": 0.14550524950027466, "learning_rate": 0.002, "loss": 2.5834, "step": 27280 }, { "epoch": 0.05436774831059544, "grad_norm": 0.1583324372768402, "learning_rate": 0.002, "loss": 2.583, "step": 27290 }, { "epoch": 0.05438767053423435, "grad_norm": 0.16277208924293518, "learning_rate": 0.002, "loss": 2.575, "step": 27300 }, { "epoch": 0.05440759275787326, "grad_norm": 0.15766336023807526, "learning_rate": 0.002, "loss": 2.5744, "step": 27310 }, { "epoch": 0.05442751498151217, "grad_norm": 0.18788854777812958, "learning_rate": 0.002, "loss": 2.5952, "step": 27320 }, { "epoch": 0.05444743720515109, "grad_norm": 0.15788693726062775, "learning_rate": 0.002, "loss": 2.5772, "step": 27330 }, { "epoch": 0.05446735942879, "grad_norm": 0.16715994477272034, "learning_rate": 0.002, "loss": 2.5614, "step": 27340 }, { "epoch": 0.054487281652428915, "grad_norm": 0.1661289781332016, "learning_rate": 0.002, "loss": 2.6089, "step": 27350 }, { "epoch": 0.05450720387606783, "grad_norm": 0.1696612387895584, "learning_rate": 0.002, "loss": 2.5804, "step": 27360 }, { "epoch": 0.054527126099706745, "grad_norm": 0.16301564872264862, "learning_rate": 0.002, "loss": 2.5646, "step": 27370 }, { "epoch": 0.05454704832334566, "grad_norm": 0.1669325828552246, "learning_rate": 0.002, "loss": 2.5718, "step": 27380 }, { "epoch": 0.054566970546984575, "grad_norm": 0.1679258942604065, "learning_rate": 0.002, "loss": 2.5783, "step": 27390 }, { "epoch": 0.05458689277062349, "grad_norm": 0.19125592708587646, "learning_rate": 0.002, "loss": 2.5911, "step": 27400 }, { "epoch": 0.0546068149942624, "grad_norm": 0.16589480638504028, "learning_rate": 0.002, "loss": 2.5856, "step": 27410 }, { "epoch": 0.05462673721790132, "grad_norm": 0.1532873958349228, "learning_rate": 0.002, "loss": 2.5949, "step": 27420 }, { "epoch": 0.05464665944154023, "grad_norm": 0.18180014193058014, "learning_rate": 0.002, "loss": 2.6012, "step": 27430 }, { "epoch": 0.05466658166517914, "grad_norm": 0.15007148683071136, "learning_rate": 0.002, "loss": 2.5825, "step": 27440 }, { "epoch": 0.05468650388881805, "grad_norm": 0.1819143444299698, "learning_rate": 0.002, "loss": 2.5829, "step": 27450 }, { "epoch": 0.05470642611245697, "grad_norm": 0.20436830818653107, "learning_rate": 0.002, "loss": 2.5957, "step": 27460 }, { "epoch": 0.05472634833609588, "grad_norm": 0.1883927434682846, "learning_rate": 0.002, "loss": 2.5856, "step": 27470 }, { "epoch": 0.05474627055973479, "grad_norm": 0.15913525223731995, "learning_rate": 0.002, "loss": 2.5676, "step": 27480 }, { "epoch": 0.05476619278337371, "grad_norm": 0.15709960460662842, "learning_rate": 0.002, "loss": 2.597, "step": 27490 }, { "epoch": 0.05478611500701262, "grad_norm": 0.18256570398807526, "learning_rate": 0.002, "loss": 2.5851, "step": 27500 }, { "epoch": 0.054806037230651534, "grad_norm": 0.18455015122890472, "learning_rate": 0.002, "loss": 2.5905, "step": 27510 }, { "epoch": 0.05482595945429045, "grad_norm": 0.15777789056301117, "learning_rate": 0.002, "loss": 2.5747, "step": 27520 }, { "epoch": 0.054845881677929365, "grad_norm": 0.14806486666202545, "learning_rate": 0.002, "loss": 2.5882, "step": 27530 }, { "epoch": 0.054865803901568276, "grad_norm": 0.18578501045703888, "learning_rate": 0.002, "loss": 2.5979, "step": 27540 }, { "epoch": 0.054885726125207195, "grad_norm": 0.16928552091121674, "learning_rate": 0.002, "loss": 2.5764, "step": 27550 }, { "epoch": 0.054905648348846106, "grad_norm": 0.19913281500339508, "learning_rate": 0.002, "loss": 2.5767, "step": 27560 }, { "epoch": 0.05492557057248502, "grad_norm": 0.1554754674434662, "learning_rate": 0.002, "loss": 2.58, "step": 27570 }, { "epoch": 0.05494549279612393, "grad_norm": 0.15360485017299652, "learning_rate": 0.002, "loss": 2.5918, "step": 27580 }, { "epoch": 0.05496541501976285, "grad_norm": 0.18467161059379578, "learning_rate": 0.002, "loss": 2.6079, "step": 27590 }, { "epoch": 0.05498533724340176, "grad_norm": 0.19014795124530792, "learning_rate": 0.002, "loss": 2.5811, "step": 27600 }, { "epoch": 0.05500525946704067, "grad_norm": 0.17269372940063477, "learning_rate": 0.002, "loss": 2.5971, "step": 27610 }, { "epoch": 0.05502518169067959, "grad_norm": 0.17917181551456451, "learning_rate": 0.002, "loss": 2.5919, "step": 27620 }, { "epoch": 0.0550451039143185, "grad_norm": 0.19560977816581726, "learning_rate": 0.002, "loss": 2.5929, "step": 27630 }, { "epoch": 0.05506502613795741, "grad_norm": 0.20350381731987, "learning_rate": 0.002, "loss": 2.5805, "step": 27640 }, { "epoch": 0.05508494836159633, "grad_norm": 0.18721017241477966, "learning_rate": 0.002, "loss": 2.5778, "step": 27650 }, { "epoch": 0.05510487058523524, "grad_norm": 0.17965225875377655, "learning_rate": 0.002, "loss": 2.5856, "step": 27660 }, { "epoch": 0.055124792808874154, "grad_norm": 0.18258273601531982, "learning_rate": 0.002, "loss": 2.5892, "step": 27670 }, { "epoch": 0.055144715032513066, "grad_norm": 0.15317121148109436, "learning_rate": 0.002, "loss": 2.5885, "step": 27680 }, { "epoch": 0.055164637256151984, "grad_norm": 0.22759291529655457, "learning_rate": 0.002, "loss": 2.5695, "step": 27690 }, { "epoch": 0.055184559479790896, "grad_norm": 0.16297344863414764, "learning_rate": 0.002, "loss": 2.5824, "step": 27700 }, { "epoch": 0.05520448170342981, "grad_norm": 0.16843479871749878, "learning_rate": 0.002, "loss": 2.5807, "step": 27710 }, { "epoch": 0.055224403927068726, "grad_norm": 0.15926581621170044, "learning_rate": 0.002, "loss": 2.5929, "step": 27720 }, { "epoch": 0.05524432615070764, "grad_norm": 0.18352541327476501, "learning_rate": 0.002, "loss": 2.5859, "step": 27730 }, { "epoch": 0.05526424837434655, "grad_norm": 0.1510915756225586, "learning_rate": 0.002, "loss": 2.5767, "step": 27740 }, { "epoch": 0.05528417059798547, "grad_norm": 0.21391580998897552, "learning_rate": 0.002, "loss": 2.6001, "step": 27750 }, { "epoch": 0.05530409282162438, "grad_norm": 0.17898456752300262, "learning_rate": 0.002, "loss": 2.5691, "step": 27760 }, { "epoch": 0.05532401504526329, "grad_norm": 0.17172278463840485, "learning_rate": 0.002, "loss": 2.572, "step": 27770 }, { "epoch": 0.05534393726890221, "grad_norm": 0.17180094122886658, "learning_rate": 0.002, "loss": 2.5686, "step": 27780 }, { "epoch": 0.05536385949254112, "grad_norm": 0.19669270515441895, "learning_rate": 0.002, "loss": 2.5705, "step": 27790 }, { "epoch": 0.05538378171618003, "grad_norm": 0.16527193784713745, "learning_rate": 0.002, "loss": 2.6009, "step": 27800 }, { "epoch": 0.055403703939818943, "grad_norm": 0.18405845761299133, "learning_rate": 0.002, "loss": 2.5787, "step": 27810 }, { "epoch": 0.05542362616345786, "grad_norm": 0.15838685631752014, "learning_rate": 0.002, "loss": 2.5934, "step": 27820 }, { "epoch": 0.055443548387096774, "grad_norm": 0.184425488114357, "learning_rate": 0.002, "loss": 2.6037, "step": 27830 }, { "epoch": 0.055463470610735685, "grad_norm": 0.17389079928398132, "learning_rate": 0.002, "loss": 2.5826, "step": 27840 }, { "epoch": 0.055483392834374604, "grad_norm": 0.16240055859088898, "learning_rate": 0.002, "loss": 2.5624, "step": 27850 }, { "epoch": 0.055503315058013515, "grad_norm": 0.17253553867340088, "learning_rate": 0.002, "loss": 2.5772, "step": 27860 }, { "epoch": 0.05552323728165243, "grad_norm": 0.16621534526348114, "learning_rate": 0.002, "loss": 2.5854, "step": 27870 }, { "epoch": 0.055543159505291345, "grad_norm": 0.16547423601150513, "learning_rate": 0.002, "loss": 2.606, "step": 27880 }, { "epoch": 0.05556308172893026, "grad_norm": 0.1897682100534439, "learning_rate": 0.002, "loss": 2.5789, "step": 27890 }, { "epoch": 0.05558300395256917, "grad_norm": 0.17004019021987915, "learning_rate": 0.002, "loss": 2.5862, "step": 27900 }, { "epoch": 0.05560292617620809, "grad_norm": 0.17200280725955963, "learning_rate": 0.002, "loss": 2.5785, "step": 27910 }, { "epoch": 0.055622848399847, "grad_norm": 0.17683501541614532, "learning_rate": 0.002, "loss": 2.5893, "step": 27920 }, { "epoch": 0.05564277062348591, "grad_norm": 0.21890965104103088, "learning_rate": 0.002, "loss": 2.5942, "step": 27930 }, { "epoch": 0.05566269284712482, "grad_norm": 0.1777757853269577, "learning_rate": 0.002, "loss": 2.5877, "step": 27940 }, { "epoch": 0.05568261507076374, "grad_norm": 0.16149748861789703, "learning_rate": 0.002, "loss": 2.5938, "step": 27950 }, { "epoch": 0.05570253729440265, "grad_norm": 0.17146363854408264, "learning_rate": 0.002, "loss": 2.5911, "step": 27960 }, { "epoch": 0.05572245951804156, "grad_norm": 0.19210828840732574, "learning_rate": 0.002, "loss": 2.5748, "step": 27970 }, { "epoch": 0.05574238174168048, "grad_norm": 0.15926744043827057, "learning_rate": 0.002, "loss": 2.5821, "step": 27980 }, { "epoch": 0.05576230396531939, "grad_norm": 0.19288615882396698, "learning_rate": 0.002, "loss": 2.5782, "step": 27990 }, { "epoch": 0.055782226188958305, "grad_norm": 0.19194748997688293, "learning_rate": 0.002, "loss": 2.5822, "step": 28000 }, { "epoch": 0.05580214841259722, "grad_norm": 0.1696983128786087, "learning_rate": 0.002, "loss": 2.5881, "step": 28010 }, { "epoch": 0.055822070636236135, "grad_norm": 0.13754869997501373, "learning_rate": 0.002, "loss": 2.6047, "step": 28020 }, { "epoch": 0.055841992859875046, "grad_norm": 0.14982911944389343, "learning_rate": 0.002, "loss": 2.5775, "step": 28030 }, { "epoch": 0.055861915083513965, "grad_norm": 0.17767125368118286, "learning_rate": 0.002, "loss": 2.5871, "step": 28040 }, { "epoch": 0.055881837307152876, "grad_norm": 0.1666271686553955, "learning_rate": 0.002, "loss": 2.5809, "step": 28050 }, { "epoch": 0.05590175953079179, "grad_norm": 0.14024034142494202, "learning_rate": 0.002, "loss": 2.587, "step": 28060 }, { "epoch": 0.0559216817544307, "grad_norm": 0.1780448704957962, "learning_rate": 0.002, "loss": 2.5908, "step": 28070 }, { "epoch": 0.05594160397806962, "grad_norm": 0.17180085182189941, "learning_rate": 0.002, "loss": 2.5985, "step": 28080 }, { "epoch": 0.05596152620170853, "grad_norm": 0.1858317106962204, "learning_rate": 0.002, "loss": 2.5699, "step": 28090 }, { "epoch": 0.05598144842534744, "grad_norm": 0.17404338717460632, "learning_rate": 0.002, "loss": 2.5893, "step": 28100 }, { "epoch": 0.05600137064898636, "grad_norm": 0.20023483037948608, "learning_rate": 0.002, "loss": 2.5727, "step": 28110 }, { "epoch": 0.05602129287262527, "grad_norm": 0.1798105388879776, "learning_rate": 0.002, "loss": 2.5877, "step": 28120 }, { "epoch": 0.05604121509626418, "grad_norm": 0.16659916937351227, "learning_rate": 0.002, "loss": 2.5973, "step": 28130 }, { "epoch": 0.0560611373199031, "grad_norm": 0.1892274022102356, "learning_rate": 0.002, "loss": 2.5822, "step": 28140 }, { "epoch": 0.05608105954354201, "grad_norm": 0.16867491602897644, "learning_rate": 0.002, "loss": 2.5733, "step": 28150 }, { "epoch": 0.056100981767180924, "grad_norm": 0.19844841957092285, "learning_rate": 0.002, "loss": 2.5893, "step": 28160 }, { "epoch": 0.05612090399081984, "grad_norm": 0.1879391223192215, "learning_rate": 0.002, "loss": 2.5946, "step": 28170 }, { "epoch": 0.056140826214458754, "grad_norm": 0.1647326946258545, "learning_rate": 0.002, "loss": 2.5871, "step": 28180 }, { "epoch": 0.056160748438097666, "grad_norm": 0.16392897069454193, "learning_rate": 0.002, "loss": 2.5834, "step": 28190 }, { "epoch": 0.05618067066173658, "grad_norm": 0.18548333644866943, "learning_rate": 0.002, "loss": 2.589, "step": 28200 }, { "epoch": 0.056200592885375496, "grad_norm": 0.15956905484199524, "learning_rate": 0.002, "loss": 2.5934, "step": 28210 }, { "epoch": 0.05622051510901441, "grad_norm": 0.19283238053321838, "learning_rate": 0.002, "loss": 2.589, "step": 28220 }, { "epoch": 0.05624043733265332, "grad_norm": 0.14909076690673828, "learning_rate": 0.002, "loss": 2.5977, "step": 28230 }, { "epoch": 0.05626035955629224, "grad_norm": 0.20598334074020386, "learning_rate": 0.002, "loss": 2.5824, "step": 28240 }, { "epoch": 0.05628028177993115, "grad_norm": 0.16690035164356232, "learning_rate": 0.002, "loss": 2.5928, "step": 28250 }, { "epoch": 0.05630020400357006, "grad_norm": 0.14458602666854858, "learning_rate": 0.002, "loss": 2.575, "step": 28260 }, { "epoch": 0.05632012622720898, "grad_norm": 0.16945256292819977, "learning_rate": 0.002, "loss": 2.5728, "step": 28270 }, { "epoch": 0.05634004845084789, "grad_norm": 0.16350547969341278, "learning_rate": 0.002, "loss": 2.5823, "step": 28280 }, { "epoch": 0.0563599706744868, "grad_norm": 0.18576188385486603, "learning_rate": 0.002, "loss": 2.5859, "step": 28290 }, { "epoch": 0.05637989289812572, "grad_norm": 0.20408794283866882, "learning_rate": 0.002, "loss": 2.5813, "step": 28300 }, { "epoch": 0.05639981512176463, "grad_norm": 0.18046462535858154, "learning_rate": 0.002, "loss": 2.5792, "step": 28310 }, { "epoch": 0.056419737345403544, "grad_norm": 0.18146444857120514, "learning_rate": 0.002, "loss": 2.5834, "step": 28320 }, { "epoch": 0.056439659569042455, "grad_norm": 0.15595179796218872, "learning_rate": 0.002, "loss": 2.584, "step": 28330 }, { "epoch": 0.056459581792681374, "grad_norm": 0.18865157663822174, "learning_rate": 0.002, "loss": 2.5754, "step": 28340 }, { "epoch": 0.056479504016320285, "grad_norm": 0.1992429494857788, "learning_rate": 0.002, "loss": 2.5658, "step": 28350 }, { "epoch": 0.0564994262399592, "grad_norm": 0.16320618987083435, "learning_rate": 0.002, "loss": 2.5897, "step": 28360 }, { "epoch": 0.056519348463598115, "grad_norm": 0.1880398541688919, "learning_rate": 0.002, "loss": 2.5854, "step": 28370 }, { "epoch": 0.05653927068723703, "grad_norm": 0.1815977543592453, "learning_rate": 0.002, "loss": 2.5947, "step": 28380 }, { "epoch": 0.05655919291087594, "grad_norm": 0.16594049334526062, "learning_rate": 0.002, "loss": 2.5746, "step": 28390 }, { "epoch": 0.05657911513451486, "grad_norm": 0.17929862439632416, "learning_rate": 0.002, "loss": 2.5976, "step": 28400 }, { "epoch": 0.05659903735815377, "grad_norm": 0.1784684658050537, "learning_rate": 0.002, "loss": 2.5845, "step": 28410 }, { "epoch": 0.05661895958179268, "grad_norm": 0.16739490628242493, "learning_rate": 0.002, "loss": 2.564, "step": 28420 }, { "epoch": 0.0566388818054316, "grad_norm": 0.2026769071817398, "learning_rate": 0.002, "loss": 2.5808, "step": 28430 }, { "epoch": 0.05665880402907051, "grad_norm": 0.16866351664066315, "learning_rate": 0.002, "loss": 2.6051, "step": 28440 }, { "epoch": 0.05667872625270942, "grad_norm": 0.17724239826202393, "learning_rate": 0.002, "loss": 2.5831, "step": 28450 }, { "epoch": 0.05669864847634833, "grad_norm": 0.16351057589054108, "learning_rate": 0.002, "loss": 2.5901, "step": 28460 }, { "epoch": 0.05671857069998725, "grad_norm": 0.17923149466514587, "learning_rate": 0.002, "loss": 2.5796, "step": 28470 }, { "epoch": 0.05673849292362616, "grad_norm": 0.18277935683727264, "learning_rate": 0.002, "loss": 2.5855, "step": 28480 }, { "epoch": 0.056758415147265075, "grad_norm": 0.19036732614040375, "learning_rate": 0.002, "loss": 2.5934, "step": 28490 }, { "epoch": 0.05677833737090399, "grad_norm": 0.16418153047561646, "learning_rate": 0.002, "loss": 2.5866, "step": 28500 }, { "epoch": 0.056798259594542905, "grad_norm": 0.1844404637813568, "learning_rate": 0.002, "loss": 2.5808, "step": 28510 }, { "epoch": 0.056818181818181816, "grad_norm": 0.16915275156497955, "learning_rate": 0.002, "loss": 2.5669, "step": 28520 }, { "epoch": 0.056838104041820735, "grad_norm": 0.17634230852127075, "learning_rate": 0.002, "loss": 2.5904, "step": 28530 }, { "epoch": 0.056858026265459646, "grad_norm": 0.1925240159034729, "learning_rate": 0.002, "loss": 2.5741, "step": 28540 }, { "epoch": 0.05687794848909856, "grad_norm": 0.1752542108297348, "learning_rate": 0.002, "loss": 2.5788, "step": 28550 }, { "epoch": 0.056897870712737476, "grad_norm": 0.18785052001476288, "learning_rate": 0.002, "loss": 2.5852, "step": 28560 }, { "epoch": 0.05691779293637639, "grad_norm": 0.1611928641796112, "learning_rate": 0.002, "loss": 2.5819, "step": 28570 }, { "epoch": 0.0569377151600153, "grad_norm": 0.24590104818344116, "learning_rate": 0.002, "loss": 2.5919, "step": 28580 }, { "epoch": 0.05695763738365421, "grad_norm": 0.17048245668411255, "learning_rate": 0.002, "loss": 2.5917, "step": 28590 }, { "epoch": 0.05697755960729313, "grad_norm": 0.1652214527130127, "learning_rate": 0.002, "loss": 2.5943, "step": 28600 }, { "epoch": 0.05699748183093204, "grad_norm": 0.16933703422546387, "learning_rate": 0.002, "loss": 2.5805, "step": 28610 }, { "epoch": 0.05701740405457095, "grad_norm": 0.15992242097854614, "learning_rate": 0.002, "loss": 2.5854, "step": 28620 }, { "epoch": 0.05703732627820987, "grad_norm": 0.16829685866832733, "learning_rate": 0.002, "loss": 2.5827, "step": 28630 }, { "epoch": 0.05705724850184878, "grad_norm": 0.18317894637584686, "learning_rate": 0.002, "loss": 2.5737, "step": 28640 }, { "epoch": 0.057077170725487694, "grad_norm": 0.15511886775493622, "learning_rate": 0.002, "loss": 2.5779, "step": 28650 }, { "epoch": 0.05709709294912661, "grad_norm": 0.1629985123872757, "learning_rate": 0.002, "loss": 2.5943, "step": 28660 }, { "epoch": 0.057117015172765524, "grad_norm": 0.2042141854763031, "learning_rate": 0.002, "loss": 2.5908, "step": 28670 }, { "epoch": 0.057136937396404436, "grad_norm": 0.18430161476135254, "learning_rate": 0.002, "loss": 2.5682, "step": 28680 }, { "epoch": 0.05715685962004335, "grad_norm": 0.20710615813732147, "learning_rate": 0.002, "loss": 2.5857, "step": 28690 }, { "epoch": 0.057176781843682266, "grad_norm": 0.2491215020418167, "learning_rate": 0.002, "loss": 2.586, "step": 28700 }, { "epoch": 0.05719670406732118, "grad_norm": 0.14818698167800903, "learning_rate": 0.002, "loss": 2.5788, "step": 28710 }, { "epoch": 0.05721662629096009, "grad_norm": 0.20667928457260132, "learning_rate": 0.002, "loss": 2.5926, "step": 28720 }, { "epoch": 0.05723654851459901, "grad_norm": 0.1579035371541977, "learning_rate": 0.002, "loss": 2.5951, "step": 28730 }, { "epoch": 0.05725647073823792, "grad_norm": 0.16813108325004578, "learning_rate": 0.002, "loss": 2.6057, "step": 28740 }, { "epoch": 0.05727639296187683, "grad_norm": 0.21335382759571075, "learning_rate": 0.002, "loss": 2.5974, "step": 28750 }, { "epoch": 0.05729631518551575, "grad_norm": 0.16189853847026825, "learning_rate": 0.002, "loss": 2.5785, "step": 28760 }, { "epoch": 0.05731623740915466, "grad_norm": 0.18666787445545197, "learning_rate": 0.002, "loss": 2.5876, "step": 28770 }, { "epoch": 0.05733615963279357, "grad_norm": 0.1493554264307022, "learning_rate": 0.002, "loss": 2.5985, "step": 28780 }, { "epoch": 0.05735608185643249, "grad_norm": 0.1802438497543335, "learning_rate": 0.002, "loss": 2.5787, "step": 28790 }, { "epoch": 0.0573760040800714, "grad_norm": 0.1640588790178299, "learning_rate": 0.002, "loss": 2.5822, "step": 28800 }, { "epoch": 0.057395926303710314, "grad_norm": 0.1645812690258026, "learning_rate": 0.002, "loss": 2.5739, "step": 28810 }, { "epoch": 0.057415848527349225, "grad_norm": 0.1780269294977188, "learning_rate": 0.002, "loss": 2.5875, "step": 28820 }, { "epoch": 0.057435770750988144, "grad_norm": 0.16895101964473724, "learning_rate": 0.002, "loss": 2.5909, "step": 28830 }, { "epoch": 0.057455692974627055, "grad_norm": 0.1714593917131424, "learning_rate": 0.002, "loss": 2.5827, "step": 28840 }, { "epoch": 0.05747561519826597, "grad_norm": 0.16620677709579468, "learning_rate": 0.002, "loss": 2.5906, "step": 28850 }, { "epoch": 0.057495537421904885, "grad_norm": 0.1667700856924057, "learning_rate": 0.002, "loss": 2.5974, "step": 28860 }, { "epoch": 0.0575154596455438, "grad_norm": 0.19239212572574615, "learning_rate": 0.002, "loss": 2.5859, "step": 28870 }, { "epoch": 0.05753538186918271, "grad_norm": 0.16862380504608154, "learning_rate": 0.002, "loss": 2.5839, "step": 28880 }, { "epoch": 0.05755530409282163, "grad_norm": 0.16689079999923706, "learning_rate": 0.002, "loss": 2.5968, "step": 28890 }, { "epoch": 0.05757522631646054, "grad_norm": 0.18407005071640015, "learning_rate": 0.002, "loss": 2.5781, "step": 28900 }, { "epoch": 0.05759514854009945, "grad_norm": 0.1591210812330246, "learning_rate": 0.002, "loss": 2.5788, "step": 28910 }, { "epoch": 0.05761507076373837, "grad_norm": 0.186543270945549, "learning_rate": 0.002, "loss": 2.5962, "step": 28920 }, { "epoch": 0.05763499298737728, "grad_norm": 0.16892418265342712, "learning_rate": 0.002, "loss": 2.5967, "step": 28930 }, { "epoch": 0.05765491521101619, "grad_norm": 0.161961629986763, "learning_rate": 0.002, "loss": 2.5845, "step": 28940 }, { "epoch": 0.0576748374346551, "grad_norm": 0.19246694445610046, "learning_rate": 0.002, "loss": 2.5843, "step": 28950 }, { "epoch": 0.05769475965829402, "grad_norm": 0.19728931784629822, "learning_rate": 0.002, "loss": 2.5975, "step": 28960 }, { "epoch": 0.05771468188193293, "grad_norm": 0.20137575268745422, "learning_rate": 0.002, "loss": 2.5967, "step": 28970 }, { "epoch": 0.057734604105571845, "grad_norm": 0.17811213433742523, "learning_rate": 0.002, "loss": 2.5841, "step": 28980 }, { "epoch": 0.05775452632921076, "grad_norm": 0.1911436915397644, "learning_rate": 0.002, "loss": 2.5875, "step": 28990 }, { "epoch": 0.057774448552849675, "grad_norm": 0.14309129118919373, "learning_rate": 0.002, "loss": 2.5817, "step": 29000 }, { "epoch": 0.057794370776488586, "grad_norm": 0.16735726594924927, "learning_rate": 0.002, "loss": 2.5948, "step": 29010 }, { "epoch": 0.057814293000127505, "grad_norm": 0.19721609354019165, "learning_rate": 0.002, "loss": 2.5871, "step": 29020 }, { "epoch": 0.057834215223766416, "grad_norm": 0.16223402321338654, "learning_rate": 0.002, "loss": 2.5917, "step": 29030 }, { "epoch": 0.05785413744740533, "grad_norm": 0.18691575527191162, "learning_rate": 0.002, "loss": 2.5909, "step": 29040 }, { "epoch": 0.057874059671044246, "grad_norm": 0.1651085466146469, "learning_rate": 0.002, "loss": 2.5857, "step": 29050 }, { "epoch": 0.05789398189468316, "grad_norm": 0.18111614882946014, "learning_rate": 0.002, "loss": 2.5778, "step": 29060 }, { "epoch": 0.05791390411832207, "grad_norm": 0.17998510599136353, "learning_rate": 0.002, "loss": 2.5887, "step": 29070 }, { "epoch": 0.05793382634196098, "grad_norm": 0.2089114785194397, "learning_rate": 0.002, "loss": 2.5731, "step": 29080 }, { "epoch": 0.0579537485655999, "grad_norm": 0.2152222990989685, "learning_rate": 0.002, "loss": 2.5887, "step": 29090 }, { "epoch": 0.05797367078923881, "grad_norm": 0.1565570831298828, "learning_rate": 0.002, "loss": 2.5986, "step": 29100 }, { "epoch": 0.05799359301287772, "grad_norm": 0.16139943897724152, "learning_rate": 0.002, "loss": 2.5683, "step": 29110 }, { "epoch": 0.05801351523651664, "grad_norm": 0.2281729131937027, "learning_rate": 0.002, "loss": 2.5774, "step": 29120 }, { "epoch": 0.05803343746015555, "grad_norm": 0.16096097230911255, "learning_rate": 0.002, "loss": 2.5877, "step": 29130 }, { "epoch": 0.058053359683794464, "grad_norm": 0.1876230239868164, "learning_rate": 0.002, "loss": 2.5785, "step": 29140 }, { "epoch": 0.05807328190743338, "grad_norm": 0.1514131873846054, "learning_rate": 0.002, "loss": 2.5904, "step": 29150 }, { "epoch": 0.058093204131072294, "grad_norm": 0.16070963442325592, "learning_rate": 0.002, "loss": 2.5793, "step": 29160 }, { "epoch": 0.058113126354711206, "grad_norm": 0.16197162866592407, "learning_rate": 0.002, "loss": 2.5711, "step": 29170 }, { "epoch": 0.058133048578350124, "grad_norm": 0.17784331738948822, "learning_rate": 0.002, "loss": 2.5843, "step": 29180 }, { "epoch": 0.058152970801989036, "grad_norm": 0.17889240384101868, "learning_rate": 0.002, "loss": 2.6055, "step": 29190 }, { "epoch": 0.05817289302562795, "grad_norm": 0.21953581273555756, "learning_rate": 0.002, "loss": 2.5718, "step": 29200 }, { "epoch": 0.05819281524926686, "grad_norm": 0.1527792364358902, "learning_rate": 0.002, "loss": 2.566, "step": 29210 }, { "epoch": 0.05821273747290578, "grad_norm": 0.1918921023607254, "learning_rate": 0.002, "loss": 2.5777, "step": 29220 }, { "epoch": 0.05823265969654469, "grad_norm": 0.159190371632576, "learning_rate": 0.002, "loss": 2.5931, "step": 29230 }, { "epoch": 0.0582525819201836, "grad_norm": 0.2126617431640625, "learning_rate": 0.002, "loss": 2.5793, "step": 29240 }, { "epoch": 0.05827250414382252, "grad_norm": 0.16797982156276703, "learning_rate": 0.002, "loss": 2.5687, "step": 29250 }, { "epoch": 0.05829242636746143, "grad_norm": 0.16386835277080536, "learning_rate": 0.002, "loss": 2.5988, "step": 29260 }, { "epoch": 0.05831234859110034, "grad_norm": 0.16930952668190002, "learning_rate": 0.002, "loss": 2.5852, "step": 29270 }, { "epoch": 0.05833227081473926, "grad_norm": 0.18531028926372528, "learning_rate": 0.002, "loss": 2.5818, "step": 29280 }, { "epoch": 0.05835219303837817, "grad_norm": 0.17077738046646118, "learning_rate": 0.002, "loss": 2.5562, "step": 29290 }, { "epoch": 0.058372115262017084, "grad_norm": 0.17827104032039642, "learning_rate": 0.002, "loss": 2.5834, "step": 29300 }, { "epoch": 0.058392037485656, "grad_norm": 0.16766424477100372, "learning_rate": 0.002, "loss": 2.6006, "step": 29310 }, { "epoch": 0.058411959709294914, "grad_norm": 0.1717868149280548, "learning_rate": 0.002, "loss": 2.586, "step": 29320 }, { "epoch": 0.058431881932933825, "grad_norm": 0.15772917866706848, "learning_rate": 0.002, "loss": 2.5878, "step": 29330 }, { "epoch": 0.05845180415657274, "grad_norm": 0.16535335779190063, "learning_rate": 0.002, "loss": 2.578, "step": 29340 }, { "epoch": 0.058471726380211655, "grad_norm": 0.2026078701019287, "learning_rate": 0.002, "loss": 2.583, "step": 29350 }, { "epoch": 0.05849164860385057, "grad_norm": 0.14334401488304138, "learning_rate": 0.002, "loss": 2.5889, "step": 29360 }, { "epoch": 0.05851157082748948, "grad_norm": 0.16260460019111633, "learning_rate": 0.002, "loss": 2.5816, "step": 29370 }, { "epoch": 0.0585314930511284, "grad_norm": 0.16421541571617126, "learning_rate": 0.002, "loss": 2.5792, "step": 29380 }, { "epoch": 0.05855141527476731, "grad_norm": 0.15729442238807678, "learning_rate": 0.002, "loss": 2.5732, "step": 29390 }, { "epoch": 0.05857133749840622, "grad_norm": 0.2138790637254715, "learning_rate": 0.002, "loss": 2.5939, "step": 29400 }, { "epoch": 0.05859125972204514, "grad_norm": 0.1627342700958252, "learning_rate": 0.002, "loss": 2.5776, "step": 29410 }, { "epoch": 0.05861118194568405, "grad_norm": 0.2370878905057907, "learning_rate": 0.002, "loss": 2.596, "step": 29420 }, { "epoch": 0.05863110416932296, "grad_norm": 0.18019476532936096, "learning_rate": 0.002, "loss": 2.5925, "step": 29430 }, { "epoch": 0.05865102639296188, "grad_norm": 0.18313224613666534, "learning_rate": 0.002, "loss": 2.5823, "step": 29440 }, { "epoch": 0.05867094861660079, "grad_norm": 0.16549338400363922, "learning_rate": 0.002, "loss": 2.5831, "step": 29450 }, { "epoch": 0.0586908708402397, "grad_norm": 0.17146936058998108, "learning_rate": 0.002, "loss": 2.5946, "step": 29460 }, { "epoch": 0.058710793063878615, "grad_norm": 0.17030246555805206, "learning_rate": 0.002, "loss": 2.5896, "step": 29470 }, { "epoch": 0.05873071528751753, "grad_norm": 0.15094007551670074, "learning_rate": 0.002, "loss": 2.5977, "step": 29480 }, { "epoch": 0.058750637511156445, "grad_norm": 0.18577103316783905, "learning_rate": 0.002, "loss": 2.5951, "step": 29490 }, { "epoch": 0.058770559734795356, "grad_norm": 0.16368368268013, "learning_rate": 0.002, "loss": 2.5821, "step": 29500 }, { "epoch": 0.058790481958434275, "grad_norm": 0.14551159739494324, "learning_rate": 0.002, "loss": 2.5742, "step": 29510 }, { "epoch": 0.05881040418207319, "grad_norm": 0.18360164761543274, "learning_rate": 0.002, "loss": 2.5849, "step": 29520 }, { "epoch": 0.0588303264057121, "grad_norm": 0.15353326499462128, "learning_rate": 0.002, "loss": 2.5765, "step": 29530 }, { "epoch": 0.05885024862935102, "grad_norm": 0.2602309584617615, "learning_rate": 0.002, "loss": 2.5752, "step": 29540 }, { "epoch": 0.05887017085298993, "grad_norm": 0.17455120384693146, "learning_rate": 0.002, "loss": 2.5768, "step": 29550 }, { "epoch": 0.05889009307662884, "grad_norm": 0.15132834017276764, "learning_rate": 0.002, "loss": 2.5704, "step": 29560 }, { "epoch": 0.05891001530026775, "grad_norm": 0.18526078760623932, "learning_rate": 0.002, "loss": 2.5877, "step": 29570 }, { "epoch": 0.05892993752390667, "grad_norm": 0.20389989018440247, "learning_rate": 0.002, "loss": 2.5894, "step": 29580 }, { "epoch": 0.05894985974754558, "grad_norm": 0.16068291664123535, "learning_rate": 0.002, "loss": 2.5841, "step": 29590 }, { "epoch": 0.05896978197118449, "grad_norm": 0.17377574741840363, "learning_rate": 0.002, "loss": 2.6137, "step": 29600 }, { "epoch": 0.05898970419482341, "grad_norm": 0.15615588426589966, "learning_rate": 0.002, "loss": 2.5818, "step": 29610 }, { "epoch": 0.05900962641846232, "grad_norm": 0.16660663485527039, "learning_rate": 0.002, "loss": 2.5757, "step": 29620 }, { "epoch": 0.059029548642101234, "grad_norm": 0.23388531804084778, "learning_rate": 0.002, "loss": 2.5815, "step": 29630 }, { "epoch": 0.05904947086574015, "grad_norm": 0.16445249319076538, "learning_rate": 0.002, "loss": 2.572, "step": 29640 }, { "epoch": 0.059069393089379064, "grad_norm": 0.1600853055715561, "learning_rate": 0.002, "loss": 2.5991, "step": 29650 }, { "epoch": 0.059089315313017976, "grad_norm": 0.14340130984783173, "learning_rate": 0.002, "loss": 2.5965, "step": 29660 }, { "epoch": 0.059109237536656895, "grad_norm": 0.15591023862361908, "learning_rate": 0.002, "loss": 2.5788, "step": 29670 }, { "epoch": 0.059129159760295806, "grad_norm": 0.17412979900836945, "learning_rate": 0.002, "loss": 2.609, "step": 29680 }, { "epoch": 0.05914908198393472, "grad_norm": 0.2015865594148636, "learning_rate": 0.002, "loss": 2.5882, "step": 29690 }, { "epoch": 0.05916900420757363, "grad_norm": 0.18307660520076752, "learning_rate": 0.002, "loss": 2.5861, "step": 29700 }, { "epoch": 0.05918892643121255, "grad_norm": 0.1783522516489029, "learning_rate": 0.002, "loss": 2.5747, "step": 29710 }, { "epoch": 0.05920884865485146, "grad_norm": 0.1961592435836792, "learning_rate": 0.002, "loss": 2.5915, "step": 29720 }, { "epoch": 0.05922877087849037, "grad_norm": 0.17020125687122345, "learning_rate": 0.002, "loss": 2.5776, "step": 29730 }, { "epoch": 0.05924869310212929, "grad_norm": 0.21118459105491638, "learning_rate": 0.002, "loss": 2.5778, "step": 29740 }, { "epoch": 0.0592686153257682, "grad_norm": 0.17240005731582642, "learning_rate": 0.002, "loss": 2.5947, "step": 29750 }, { "epoch": 0.05928853754940711, "grad_norm": 0.18255971372127533, "learning_rate": 0.002, "loss": 2.5734, "step": 29760 }, { "epoch": 0.05930845977304603, "grad_norm": 0.20241010189056396, "learning_rate": 0.002, "loss": 2.5839, "step": 29770 }, { "epoch": 0.05932838199668494, "grad_norm": 0.17796550691127777, "learning_rate": 0.002, "loss": 2.5743, "step": 29780 }, { "epoch": 0.059348304220323854, "grad_norm": 0.19237539172172546, "learning_rate": 0.002, "loss": 2.5871, "step": 29790 }, { "epoch": 0.05936822644396277, "grad_norm": 0.1676863729953766, "learning_rate": 0.002, "loss": 2.5801, "step": 29800 }, { "epoch": 0.059388148667601684, "grad_norm": 0.18125604093074799, "learning_rate": 0.002, "loss": 2.5928, "step": 29810 }, { "epoch": 0.059408070891240596, "grad_norm": 0.21457718312740326, "learning_rate": 0.002, "loss": 2.5946, "step": 29820 }, { "epoch": 0.05942799311487951, "grad_norm": 0.16320578753948212, "learning_rate": 0.002, "loss": 2.5882, "step": 29830 }, { "epoch": 0.059447915338518426, "grad_norm": 0.16669517755508423, "learning_rate": 0.002, "loss": 2.5705, "step": 29840 }, { "epoch": 0.05946783756215734, "grad_norm": 0.22345946729183197, "learning_rate": 0.002, "loss": 2.5888, "step": 29850 }, { "epoch": 0.05948775978579625, "grad_norm": 0.19443874061107635, "learning_rate": 0.002, "loss": 2.5891, "step": 29860 }, { "epoch": 0.05950768200943517, "grad_norm": 0.20394790172576904, "learning_rate": 0.002, "loss": 2.5739, "step": 29870 }, { "epoch": 0.05952760423307408, "grad_norm": 0.16108855605125427, "learning_rate": 0.002, "loss": 2.5639, "step": 29880 }, { "epoch": 0.05954752645671299, "grad_norm": 0.19534088671207428, "learning_rate": 0.002, "loss": 2.5988, "step": 29890 }, { "epoch": 0.05956744868035191, "grad_norm": 0.18022041022777557, "learning_rate": 0.002, "loss": 2.5748, "step": 29900 }, { "epoch": 0.05958737090399082, "grad_norm": 0.1593679040670395, "learning_rate": 0.002, "loss": 2.583, "step": 29910 }, { "epoch": 0.05960729312762973, "grad_norm": 0.13934673368930817, "learning_rate": 0.002, "loss": 2.5705, "step": 29920 }, { "epoch": 0.05962721535126865, "grad_norm": 0.19312343001365662, "learning_rate": 0.002, "loss": 2.5896, "step": 29930 }, { "epoch": 0.05964713757490756, "grad_norm": 0.2258928418159485, "learning_rate": 0.002, "loss": 2.5852, "step": 29940 }, { "epoch": 0.05966705979854647, "grad_norm": 0.19218331575393677, "learning_rate": 0.002, "loss": 2.5769, "step": 29950 }, { "epoch": 0.059686982022185385, "grad_norm": 0.1799752563238144, "learning_rate": 0.002, "loss": 2.5893, "step": 29960 }, { "epoch": 0.059706904245824304, "grad_norm": 0.18108966946601868, "learning_rate": 0.002, "loss": 2.5872, "step": 29970 }, { "epoch": 0.059726826469463215, "grad_norm": 0.19499313831329346, "learning_rate": 0.002, "loss": 2.5848, "step": 29980 }, { "epoch": 0.05974674869310213, "grad_norm": 0.17617401480674744, "learning_rate": 0.002, "loss": 2.5898, "step": 29990 }, { "epoch": 0.059766670916741045, "grad_norm": 0.15463723242282867, "learning_rate": 0.002, "loss": 2.5802, "step": 30000 }, { "epoch": 0.05978659314037996, "grad_norm": 0.1686076819896698, "learning_rate": 0.002, "loss": 2.5995, "step": 30010 }, { "epoch": 0.05980651536401887, "grad_norm": 0.16047817468643188, "learning_rate": 0.002, "loss": 2.5751, "step": 30020 }, { "epoch": 0.05982643758765779, "grad_norm": 0.1909192055463791, "learning_rate": 0.002, "loss": 2.5828, "step": 30030 }, { "epoch": 0.0598463598112967, "grad_norm": 0.1718902736902237, "learning_rate": 0.002, "loss": 2.5959, "step": 30040 }, { "epoch": 0.05986628203493561, "grad_norm": 0.15431691706180573, "learning_rate": 0.002, "loss": 2.58, "step": 30050 }, { "epoch": 0.05988620425857453, "grad_norm": 0.1407969743013382, "learning_rate": 0.002, "loss": 2.5763, "step": 30060 }, { "epoch": 0.05990612648221344, "grad_norm": 0.1987900286912918, "learning_rate": 0.002, "loss": 2.5849, "step": 30070 }, { "epoch": 0.05992604870585235, "grad_norm": 0.1520947366952896, "learning_rate": 0.002, "loss": 2.5784, "step": 30080 }, { "epoch": 0.05994597092949126, "grad_norm": 0.15333053469657898, "learning_rate": 0.002, "loss": 2.5785, "step": 30090 }, { "epoch": 0.05996589315313018, "grad_norm": 0.2704237997531891, "learning_rate": 0.002, "loss": 2.5949, "step": 30100 }, { "epoch": 0.05998581537676909, "grad_norm": 0.16430805623531342, "learning_rate": 0.002, "loss": 2.5943, "step": 30110 }, { "epoch": 0.060005737600408005, "grad_norm": 0.16555257141590118, "learning_rate": 0.002, "loss": 2.5827, "step": 30120 }, { "epoch": 0.06002565982404692, "grad_norm": 0.1665162742137909, "learning_rate": 0.002, "loss": 2.5814, "step": 30130 }, { "epoch": 0.060045582047685835, "grad_norm": 0.1643582284450531, "learning_rate": 0.002, "loss": 2.5705, "step": 30140 }, { "epoch": 0.060065504271324746, "grad_norm": 0.16591201722621918, "learning_rate": 0.002, "loss": 2.5796, "step": 30150 }, { "epoch": 0.060085426494963665, "grad_norm": 0.17151688039302826, "learning_rate": 0.002, "loss": 2.5915, "step": 30160 }, { "epoch": 0.060105348718602576, "grad_norm": 0.1840543895959854, "learning_rate": 0.002, "loss": 2.5939, "step": 30170 }, { "epoch": 0.06012527094224149, "grad_norm": 0.1840987205505371, "learning_rate": 0.002, "loss": 2.5958, "step": 30180 }, { "epoch": 0.060145193165880406, "grad_norm": 0.2256021350622177, "learning_rate": 0.002, "loss": 2.582, "step": 30190 }, { "epoch": 0.06016511538951932, "grad_norm": 0.15005016326904297, "learning_rate": 0.002, "loss": 2.5825, "step": 30200 }, { "epoch": 0.06018503761315823, "grad_norm": 0.19680379331111908, "learning_rate": 0.002, "loss": 2.5732, "step": 30210 }, { "epoch": 0.06020495983679714, "grad_norm": 0.17521856725215912, "learning_rate": 0.002, "loss": 2.6042, "step": 30220 }, { "epoch": 0.06022488206043606, "grad_norm": 0.18544183671474457, "learning_rate": 0.002, "loss": 2.5917, "step": 30230 }, { "epoch": 0.06024480428407497, "grad_norm": 0.15174245834350586, "learning_rate": 0.002, "loss": 2.5662, "step": 30240 }, { "epoch": 0.06026472650771388, "grad_norm": 0.1725042164325714, "learning_rate": 0.002, "loss": 2.6045, "step": 30250 }, { "epoch": 0.0602846487313528, "grad_norm": 0.21266129612922668, "learning_rate": 0.002, "loss": 2.589, "step": 30260 }, { "epoch": 0.06030457095499171, "grad_norm": 0.18737611174583435, "learning_rate": 0.002, "loss": 2.5796, "step": 30270 }, { "epoch": 0.060324493178630624, "grad_norm": 0.16240806877613068, "learning_rate": 0.002, "loss": 2.5778, "step": 30280 }, { "epoch": 0.06034441540226954, "grad_norm": 0.1553172618150711, "learning_rate": 0.002, "loss": 2.5952, "step": 30290 }, { "epoch": 0.060364337625908454, "grad_norm": 0.15888695418834686, "learning_rate": 0.002, "loss": 2.5844, "step": 30300 }, { "epoch": 0.060384259849547366, "grad_norm": 0.14433878660202026, "learning_rate": 0.002, "loss": 2.5805, "step": 30310 }, { "epoch": 0.060404182073186284, "grad_norm": 0.15466488897800446, "learning_rate": 0.002, "loss": 2.5899, "step": 30320 }, { "epoch": 0.060424104296825196, "grad_norm": 0.1818501204252243, "learning_rate": 0.002, "loss": 2.5922, "step": 30330 }, { "epoch": 0.06044402652046411, "grad_norm": 0.1764431893825531, "learning_rate": 0.002, "loss": 2.5925, "step": 30340 }, { "epoch": 0.06046394874410302, "grad_norm": 0.19972528517246246, "learning_rate": 0.002, "loss": 2.5844, "step": 30350 }, { "epoch": 0.06048387096774194, "grad_norm": 0.1430976390838623, "learning_rate": 0.002, "loss": 2.5851, "step": 30360 }, { "epoch": 0.06050379319138085, "grad_norm": 0.17503371834754944, "learning_rate": 0.002, "loss": 2.5747, "step": 30370 }, { "epoch": 0.06052371541501976, "grad_norm": 0.16476552188396454, "learning_rate": 0.002, "loss": 2.5869, "step": 30380 }, { "epoch": 0.06054363763865868, "grad_norm": 0.21440765261650085, "learning_rate": 0.002, "loss": 2.5907, "step": 30390 }, { "epoch": 0.06056355986229759, "grad_norm": 0.16651925444602966, "learning_rate": 0.002, "loss": 2.5866, "step": 30400 }, { "epoch": 0.0605834820859365, "grad_norm": 0.16116440296173096, "learning_rate": 0.002, "loss": 2.5711, "step": 30410 }, { "epoch": 0.06060340430957542, "grad_norm": 0.17028120160102844, "learning_rate": 0.002, "loss": 2.5817, "step": 30420 }, { "epoch": 0.06062332653321433, "grad_norm": 0.2606968283653259, "learning_rate": 0.002, "loss": 2.5882, "step": 30430 }, { "epoch": 0.060643248756853244, "grad_norm": 0.18502067029476166, "learning_rate": 0.002, "loss": 2.5939, "step": 30440 }, { "epoch": 0.06066317098049216, "grad_norm": 0.14649488031864166, "learning_rate": 0.002, "loss": 2.58, "step": 30450 }, { "epoch": 0.060683093204131074, "grad_norm": 0.20059260725975037, "learning_rate": 0.002, "loss": 2.5757, "step": 30460 }, { "epoch": 0.060703015427769985, "grad_norm": 0.18019258975982666, "learning_rate": 0.002, "loss": 2.6031, "step": 30470 }, { "epoch": 0.0607229376514089, "grad_norm": 0.18203498423099518, "learning_rate": 0.002, "loss": 2.605, "step": 30480 }, { "epoch": 0.060742859875047815, "grad_norm": 0.17484213411808014, "learning_rate": 0.002, "loss": 2.5806, "step": 30490 }, { "epoch": 0.06076278209868673, "grad_norm": 0.18278677761554718, "learning_rate": 0.002, "loss": 2.5832, "step": 30500 }, { "epoch": 0.06078270432232564, "grad_norm": 0.16993963718414307, "learning_rate": 0.002, "loss": 2.5818, "step": 30510 }, { "epoch": 0.06080262654596456, "grad_norm": 0.18978554010391235, "learning_rate": 0.002, "loss": 2.5794, "step": 30520 }, { "epoch": 0.06082254876960347, "grad_norm": 0.19172050058841705, "learning_rate": 0.002, "loss": 2.5835, "step": 30530 }, { "epoch": 0.06084247099324238, "grad_norm": 0.17879986763000488, "learning_rate": 0.002, "loss": 2.5982, "step": 30540 }, { "epoch": 0.0608623932168813, "grad_norm": 0.15900668501853943, "learning_rate": 0.002, "loss": 2.5838, "step": 30550 }, { "epoch": 0.06088231544052021, "grad_norm": 0.16507229208946228, "learning_rate": 0.002, "loss": 2.5809, "step": 30560 }, { "epoch": 0.06090223766415912, "grad_norm": 0.19866372644901276, "learning_rate": 0.002, "loss": 2.584, "step": 30570 }, { "epoch": 0.06092215988779803, "grad_norm": 0.15717996656894684, "learning_rate": 0.002, "loss": 2.5818, "step": 30580 }, { "epoch": 0.06094208211143695, "grad_norm": 0.1887792944908142, "learning_rate": 0.002, "loss": 2.582, "step": 30590 }, { "epoch": 0.06096200433507586, "grad_norm": 0.1858900636434555, "learning_rate": 0.002, "loss": 2.5832, "step": 30600 }, { "epoch": 0.060981926558714775, "grad_norm": 0.18737125396728516, "learning_rate": 0.002, "loss": 2.5825, "step": 30610 }, { "epoch": 0.06100184878235369, "grad_norm": 0.19259001314640045, "learning_rate": 0.002, "loss": 2.5819, "step": 30620 }, { "epoch": 0.061021771005992605, "grad_norm": 0.19504068791866302, "learning_rate": 0.002, "loss": 2.5794, "step": 30630 }, { "epoch": 0.061041693229631516, "grad_norm": 0.16887377202510834, "learning_rate": 0.002, "loss": 2.5942, "step": 30640 }, { "epoch": 0.061061615453270435, "grad_norm": 0.14440622925758362, "learning_rate": 0.002, "loss": 2.5594, "step": 30650 }, { "epoch": 0.061081537676909346, "grad_norm": 0.18813873827457428, "learning_rate": 0.002, "loss": 2.5638, "step": 30660 }, { "epoch": 0.06110145990054826, "grad_norm": 0.17258557677268982, "learning_rate": 0.002, "loss": 2.5752, "step": 30670 }, { "epoch": 0.061121382124187176, "grad_norm": 0.13642260432243347, "learning_rate": 0.002, "loss": 2.5819, "step": 30680 }, { "epoch": 0.06114130434782609, "grad_norm": 0.19261540472507477, "learning_rate": 0.002, "loss": 2.5886, "step": 30690 }, { "epoch": 0.061161226571465, "grad_norm": 0.1537981927394867, "learning_rate": 0.002, "loss": 2.5831, "step": 30700 }, { "epoch": 0.06118114879510391, "grad_norm": 0.14566102623939514, "learning_rate": 0.002, "loss": 2.5939, "step": 30710 }, { "epoch": 0.06120107101874283, "grad_norm": 0.16937606036663055, "learning_rate": 0.002, "loss": 2.5837, "step": 30720 }, { "epoch": 0.06122099324238174, "grad_norm": 0.16278743743896484, "learning_rate": 0.002, "loss": 2.5877, "step": 30730 }, { "epoch": 0.06124091546602065, "grad_norm": 0.1631595492362976, "learning_rate": 0.002, "loss": 2.5847, "step": 30740 }, { "epoch": 0.06126083768965957, "grad_norm": 0.2032332867383957, "learning_rate": 0.002, "loss": 2.5997, "step": 30750 }, { "epoch": 0.06128075991329848, "grad_norm": 0.1700579971075058, "learning_rate": 0.002, "loss": 2.5849, "step": 30760 }, { "epoch": 0.061300682136937394, "grad_norm": 0.16751857101917267, "learning_rate": 0.002, "loss": 2.5756, "step": 30770 }, { "epoch": 0.06132060436057631, "grad_norm": 0.16863563656806946, "learning_rate": 0.002, "loss": 2.5605, "step": 30780 }, { "epoch": 0.061340526584215224, "grad_norm": 0.16538286209106445, "learning_rate": 0.002, "loss": 2.5871, "step": 30790 }, { "epoch": 0.061360448807854136, "grad_norm": 0.18583913147449493, "learning_rate": 0.002, "loss": 2.5824, "step": 30800 }, { "epoch": 0.061380371031493054, "grad_norm": 0.15231890976428986, "learning_rate": 0.002, "loss": 2.5891, "step": 30810 }, { "epoch": 0.061400293255131966, "grad_norm": 0.2064877599477768, "learning_rate": 0.002, "loss": 2.5695, "step": 30820 }, { "epoch": 0.06142021547877088, "grad_norm": 0.16523830592632294, "learning_rate": 0.002, "loss": 2.5947, "step": 30830 }, { "epoch": 0.06144013770240979, "grad_norm": 0.17449533939361572, "learning_rate": 0.002, "loss": 2.5886, "step": 30840 }, { "epoch": 0.06146005992604871, "grad_norm": 0.1883176863193512, "learning_rate": 0.002, "loss": 2.595, "step": 30850 }, { "epoch": 0.06147998214968762, "grad_norm": 0.16914376616477966, "learning_rate": 0.002, "loss": 2.5816, "step": 30860 }, { "epoch": 0.06149990437332653, "grad_norm": 0.17218652367591858, "learning_rate": 0.002, "loss": 2.5874, "step": 30870 }, { "epoch": 0.06151982659696545, "grad_norm": 0.20085382461547852, "learning_rate": 0.002, "loss": 2.5868, "step": 30880 }, { "epoch": 0.06153974882060436, "grad_norm": 0.17531715333461761, "learning_rate": 0.002, "loss": 2.5669, "step": 30890 }, { "epoch": 0.06155967104424327, "grad_norm": 0.20383107662200928, "learning_rate": 0.002, "loss": 2.5824, "step": 30900 }, { "epoch": 0.06157959326788219, "grad_norm": 0.16815277934074402, "learning_rate": 0.002, "loss": 2.5734, "step": 30910 }, { "epoch": 0.0615995154915211, "grad_norm": 0.1916276514530182, "learning_rate": 0.002, "loss": 2.5894, "step": 30920 }, { "epoch": 0.061619437715160014, "grad_norm": 0.15526384115219116, "learning_rate": 0.002, "loss": 2.5844, "step": 30930 }, { "epoch": 0.06163935993879893, "grad_norm": 0.18226009607315063, "learning_rate": 0.002, "loss": 2.5862, "step": 30940 }, { "epoch": 0.061659282162437844, "grad_norm": 0.19320829212665558, "learning_rate": 0.002, "loss": 2.575, "step": 30950 }, { "epoch": 0.061679204386076755, "grad_norm": 0.18605507910251617, "learning_rate": 0.002, "loss": 2.5738, "step": 30960 }, { "epoch": 0.06169912660971567, "grad_norm": 0.17842185497283936, "learning_rate": 0.002, "loss": 2.5777, "step": 30970 }, { "epoch": 0.061719048833354585, "grad_norm": 0.16339682042598724, "learning_rate": 0.002, "loss": 2.5926, "step": 30980 }, { "epoch": 0.0617389710569935, "grad_norm": 0.15672920644283295, "learning_rate": 0.002, "loss": 2.5904, "step": 30990 }, { "epoch": 0.06175889328063241, "grad_norm": 0.18017049133777618, "learning_rate": 0.002, "loss": 2.5908, "step": 31000 }, { "epoch": 0.06177881550427133, "grad_norm": 0.1860140562057495, "learning_rate": 0.002, "loss": 2.5966, "step": 31010 }, { "epoch": 0.06179873772791024, "grad_norm": 0.18124397099018097, "learning_rate": 0.002, "loss": 2.5738, "step": 31020 }, { "epoch": 0.06181865995154915, "grad_norm": 0.16957108676433563, "learning_rate": 0.002, "loss": 2.5688, "step": 31030 }, { "epoch": 0.06183858217518807, "grad_norm": 0.1545717418193817, "learning_rate": 0.002, "loss": 2.5914, "step": 31040 }, { "epoch": 0.06185850439882698, "grad_norm": 0.17230084538459778, "learning_rate": 0.002, "loss": 2.5863, "step": 31050 }, { "epoch": 0.06187842662246589, "grad_norm": 0.223469540476799, "learning_rate": 0.002, "loss": 2.5799, "step": 31060 }, { "epoch": 0.06189834884610481, "grad_norm": 0.16540825366973877, "learning_rate": 0.002, "loss": 2.5841, "step": 31070 }, { "epoch": 0.06191827106974372, "grad_norm": 0.20018893480300903, "learning_rate": 0.002, "loss": 2.5867, "step": 31080 }, { "epoch": 0.06193819329338263, "grad_norm": 0.1664251685142517, "learning_rate": 0.002, "loss": 2.5962, "step": 31090 }, { "epoch": 0.061958115517021545, "grad_norm": 0.20332464575767517, "learning_rate": 0.002, "loss": 2.5755, "step": 31100 }, { "epoch": 0.06197803774066046, "grad_norm": 0.1946181207895279, "learning_rate": 0.002, "loss": 2.598, "step": 31110 }, { "epoch": 0.061997959964299375, "grad_norm": 0.16331851482391357, "learning_rate": 0.002, "loss": 2.5935, "step": 31120 }, { "epoch": 0.062017882187938286, "grad_norm": 0.1789269894361496, "learning_rate": 0.002, "loss": 2.5842, "step": 31130 }, { "epoch": 0.062037804411577205, "grad_norm": 0.15503248572349548, "learning_rate": 0.002, "loss": 2.5886, "step": 31140 }, { "epoch": 0.062057726635216116, "grad_norm": 0.1399829238653183, "learning_rate": 0.002, "loss": 2.5698, "step": 31150 }, { "epoch": 0.06207764885885503, "grad_norm": 0.16422350704669952, "learning_rate": 0.002, "loss": 2.5935, "step": 31160 }, { "epoch": 0.062097571082493946, "grad_norm": 0.20167167484760284, "learning_rate": 0.002, "loss": 2.5838, "step": 31170 }, { "epoch": 0.06211749330613286, "grad_norm": 0.14472134411334991, "learning_rate": 0.002, "loss": 2.5851, "step": 31180 }, { "epoch": 0.06213741552977177, "grad_norm": 0.1844523847103119, "learning_rate": 0.002, "loss": 2.5799, "step": 31190 }, { "epoch": 0.06215733775341069, "grad_norm": 0.1699010282754898, "learning_rate": 0.002, "loss": 2.5796, "step": 31200 }, { "epoch": 0.0621772599770496, "grad_norm": 0.15836715698242188, "learning_rate": 0.002, "loss": 2.584, "step": 31210 }, { "epoch": 0.06219718220068851, "grad_norm": 0.1611434370279312, "learning_rate": 0.002, "loss": 2.5764, "step": 31220 }, { "epoch": 0.06221710442432742, "grad_norm": 0.15429040789604187, "learning_rate": 0.002, "loss": 2.5876, "step": 31230 }, { "epoch": 0.06223702664796634, "grad_norm": 0.17350222170352936, "learning_rate": 0.002, "loss": 2.5832, "step": 31240 }, { "epoch": 0.06225694887160525, "grad_norm": 0.1878085732460022, "learning_rate": 0.002, "loss": 2.568, "step": 31250 }, { "epoch": 0.062276871095244164, "grad_norm": 0.17501340806484222, "learning_rate": 0.002, "loss": 2.5818, "step": 31260 }, { "epoch": 0.06229679331888308, "grad_norm": 0.16519202291965485, "learning_rate": 0.002, "loss": 2.5895, "step": 31270 }, { "epoch": 0.062316715542521994, "grad_norm": 0.2087690234184265, "learning_rate": 0.002, "loss": 2.5752, "step": 31280 }, { "epoch": 0.062336637766160906, "grad_norm": 0.16643275320529938, "learning_rate": 0.002, "loss": 2.586, "step": 31290 }, { "epoch": 0.062356559989799824, "grad_norm": 0.16641077399253845, "learning_rate": 0.002, "loss": 2.5875, "step": 31300 }, { "epoch": 0.062376482213438736, "grad_norm": 0.17245522141456604, "learning_rate": 0.002, "loss": 2.5898, "step": 31310 }, { "epoch": 0.06239640443707765, "grad_norm": 0.2265341728925705, "learning_rate": 0.002, "loss": 2.5853, "step": 31320 }, { "epoch": 0.062416326660716566, "grad_norm": 0.1887817680835724, "learning_rate": 0.002, "loss": 2.5901, "step": 31330 }, { "epoch": 0.06243624888435548, "grad_norm": 0.17282405495643616, "learning_rate": 0.002, "loss": 2.5791, "step": 31340 }, { "epoch": 0.06245617110799439, "grad_norm": 0.15984316170215607, "learning_rate": 0.002, "loss": 2.5776, "step": 31350 }, { "epoch": 0.0624760933316333, "grad_norm": 0.19426588714122772, "learning_rate": 0.002, "loss": 2.5979, "step": 31360 }, { "epoch": 0.06249601555527222, "grad_norm": 0.16533391177654266, "learning_rate": 0.002, "loss": 2.5782, "step": 31370 }, { "epoch": 0.06251593777891114, "grad_norm": 0.18612626194953918, "learning_rate": 0.002, "loss": 2.5799, "step": 31380 }, { "epoch": 0.06253586000255004, "grad_norm": 0.19142965972423553, "learning_rate": 0.002, "loss": 2.5755, "step": 31390 }, { "epoch": 0.06255578222618896, "grad_norm": 0.15405456721782684, "learning_rate": 0.002, "loss": 2.5828, "step": 31400 }, { "epoch": 0.06257570444982787, "grad_norm": 0.17681632936000824, "learning_rate": 0.002, "loss": 2.5743, "step": 31410 }, { "epoch": 0.06259562667346678, "grad_norm": 0.14681392908096313, "learning_rate": 0.002, "loss": 2.5728, "step": 31420 }, { "epoch": 0.0626155488971057, "grad_norm": 0.171296164393425, "learning_rate": 0.002, "loss": 2.5853, "step": 31430 }, { "epoch": 0.0626354711207446, "grad_norm": 0.19488544762134552, "learning_rate": 0.002, "loss": 2.5692, "step": 31440 }, { "epoch": 0.06265539334438353, "grad_norm": 0.16731901466846466, "learning_rate": 0.002, "loss": 2.5777, "step": 31450 }, { "epoch": 0.06267531556802244, "grad_norm": 0.19355879724025726, "learning_rate": 0.002, "loss": 2.5763, "step": 31460 }, { "epoch": 0.06269523779166135, "grad_norm": 0.3695874512195587, "learning_rate": 0.002, "loss": 2.5723, "step": 31470 }, { "epoch": 0.06271516001530027, "grad_norm": 0.17087812721729279, "learning_rate": 0.002, "loss": 2.5877, "step": 31480 }, { "epoch": 0.06273508223893919, "grad_norm": 0.19339270889759064, "learning_rate": 0.002, "loss": 2.5921, "step": 31490 }, { "epoch": 0.06275500446257809, "grad_norm": 0.17427800595760345, "learning_rate": 0.002, "loss": 2.5831, "step": 31500 }, { "epoch": 0.06277492668621701, "grad_norm": 0.20112983882427216, "learning_rate": 0.002, "loss": 2.5969, "step": 31510 }, { "epoch": 0.06279484890985593, "grad_norm": 0.15455928444862366, "learning_rate": 0.002, "loss": 2.5817, "step": 31520 }, { "epoch": 0.06281477113349483, "grad_norm": 0.18617303669452667, "learning_rate": 0.002, "loss": 2.5891, "step": 31530 }, { "epoch": 0.06283469335713375, "grad_norm": 0.19217713177204132, "learning_rate": 0.002, "loss": 2.5953, "step": 31540 }, { "epoch": 0.06285461558077267, "grad_norm": 0.21883633732795715, "learning_rate": 0.002, "loss": 2.5728, "step": 31550 }, { "epoch": 0.06287453780441157, "grad_norm": 0.17607156932353973, "learning_rate": 0.002, "loss": 2.5922, "step": 31560 }, { "epoch": 0.06289446002805049, "grad_norm": 0.18628063797950745, "learning_rate": 0.002, "loss": 2.587, "step": 31570 }, { "epoch": 0.06291438225168941, "grad_norm": 0.17001502215862274, "learning_rate": 0.002, "loss": 2.5639, "step": 31580 }, { "epoch": 0.06293430447532831, "grad_norm": 0.16725654900074005, "learning_rate": 0.002, "loss": 2.5859, "step": 31590 }, { "epoch": 0.06295422669896723, "grad_norm": 0.1974397450685501, "learning_rate": 0.002, "loss": 2.572, "step": 31600 }, { "epoch": 0.06297414892260615, "grad_norm": 0.15282602608203888, "learning_rate": 0.002, "loss": 2.5985, "step": 31610 }, { "epoch": 0.06299407114624506, "grad_norm": 0.20064584910869598, "learning_rate": 0.002, "loss": 2.5763, "step": 31620 }, { "epoch": 0.06301399336988397, "grad_norm": 0.1916407346725464, "learning_rate": 0.002, "loss": 2.58, "step": 31630 }, { "epoch": 0.0630339155935229, "grad_norm": 0.1713387817144394, "learning_rate": 0.002, "loss": 2.579, "step": 31640 }, { "epoch": 0.0630538378171618, "grad_norm": 0.2229447364807129, "learning_rate": 0.002, "loss": 2.5911, "step": 31650 }, { "epoch": 0.06307376004080072, "grad_norm": 0.14483389258384705, "learning_rate": 0.002, "loss": 2.5865, "step": 31660 }, { "epoch": 0.06309368226443962, "grad_norm": 0.16790416836738586, "learning_rate": 0.002, "loss": 2.5779, "step": 31670 }, { "epoch": 0.06311360448807854, "grad_norm": 0.16228601336479187, "learning_rate": 0.002, "loss": 2.5893, "step": 31680 }, { "epoch": 0.06313352671171746, "grad_norm": 0.2419762909412384, "learning_rate": 0.002, "loss": 2.5872, "step": 31690 }, { "epoch": 0.06315344893535636, "grad_norm": 0.16939318180084229, "learning_rate": 0.002, "loss": 2.6023, "step": 31700 }, { "epoch": 0.06317337115899528, "grad_norm": 0.1359463334083557, "learning_rate": 0.002, "loss": 2.5822, "step": 31710 }, { "epoch": 0.0631932933826342, "grad_norm": 0.18485499918460846, "learning_rate": 0.002, "loss": 2.5871, "step": 31720 }, { "epoch": 0.0632132156062731, "grad_norm": 0.15197013318538666, "learning_rate": 0.002, "loss": 2.5792, "step": 31730 }, { "epoch": 0.06323313782991202, "grad_norm": 0.17131686210632324, "learning_rate": 0.002, "loss": 2.5925, "step": 31740 }, { "epoch": 0.06325306005355094, "grad_norm": 0.22837790846824646, "learning_rate": 0.002, "loss": 2.5817, "step": 31750 }, { "epoch": 0.06327298227718985, "grad_norm": 0.1956920325756073, "learning_rate": 0.002, "loss": 2.5626, "step": 31760 }, { "epoch": 0.06329290450082876, "grad_norm": 0.14080305397510529, "learning_rate": 0.002, "loss": 2.5853, "step": 31770 }, { "epoch": 0.06331282672446768, "grad_norm": 0.20922662317752838, "learning_rate": 0.002, "loss": 2.5827, "step": 31780 }, { "epoch": 0.06333274894810659, "grad_norm": 0.16303326189517975, "learning_rate": 0.002, "loss": 2.5805, "step": 31790 }, { "epoch": 0.0633526711717455, "grad_norm": 0.19405370950698853, "learning_rate": 0.002, "loss": 2.588, "step": 31800 }, { "epoch": 0.06337259339538442, "grad_norm": 0.15410132706165314, "learning_rate": 0.002, "loss": 2.5783, "step": 31810 }, { "epoch": 0.06339251561902333, "grad_norm": 0.18784888088703156, "learning_rate": 0.002, "loss": 2.5623, "step": 31820 }, { "epoch": 0.06341243784266225, "grad_norm": 0.18228726089000702, "learning_rate": 0.002, "loss": 2.5753, "step": 31830 }, { "epoch": 0.06343236006630117, "grad_norm": 0.16563594341278076, "learning_rate": 0.002, "loss": 2.5919, "step": 31840 }, { "epoch": 0.06345228228994007, "grad_norm": 0.1817663609981537, "learning_rate": 0.002, "loss": 2.5817, "step": 31850 }, { "epoch": 0.06347220451357899, "grad_norm": 0.1495690941810608, "learning_rate": 0.002, "loss": 2.5789, "step": 31860 }, { "epoch": 0.06349212673721791, "grad_norm": 0.18923112750053406, "learning_rate": 0.002, "loss": 2.5992, "step": 31870 }, { "epoch": 0.06351204896085681, "grad_norm": 0.1592911183834076, "learning_rate": 0.002, "loss": 2.5777, "step": 31880 }, { "epoch": 0.06353197118449573, "grad_norm": 0.1432742327451706, "learning_rate": 0.002, "loss": 2.5836, "step": 31890 }, { "epoch": 0.06355189340813465, "grad_norm": 0.1865924447774887, "learning_rate": 0.002, "loss": 2.5977, "step": 31900 }, { "epoch": 0.06357181563177355, "grad_norm": 0.19150027632713318, "learning_rate": 0.002, "loss": 2.5713, "step": 31910 }, { "epoch": 0.06359173785541247, "grad_norm": 0.15790791809558868, "learning_rate": 0.002, "loss": 2.5768, "step": 31920 }, { "epoch": 0.06361166007905138, "grad_norm": 0.17395761609077454, "learning_rate": 0.002, "loss": 2.5794, "step": 31930 }, { "epoch": 0.0636315823026903, "grad_norm": 0.1674225777387619, "learning_rate": 0.002, "loss": 2.5683, "step": 31940 }, { "epoch": 0.06365150452632921, "grad_norm": 0.17298246920108795, "learning_rate": 0.002, "loss": 2.5807, "step": 31950 }, { "epoch": 0.06367142674996812, "grad_norm": 0.17389406263828278, "learning_rate": 0.002, "loss": 2.5765, "step": 31960 }, { "epoch": 0.06369134897360704, "grad_norm": 0.19757701456546783, "learning_rate": 0.002, "loss": 2.5757, "step": 31970 }, { "epoch": 0.06371127119724596, "grad_norm": 0.1534549593925476, "learning_rate": 0.002, "loss": 2.5946, "step": 31980 }, { "epoch": 0.06373119342088486, "grad_norm": 0.18817277252674103, "learning_rate": 0.002, "loss": 2.5967, "step": 31990 }, { "epoch": 0.06375111564452378, "grad_norm": 0.21115680038928986, "learning_rate": 0.002, "loss": 2.5905, "step": 32000 }, { "epoch": 0.0637710378681627, "grad_norm": 0.15540944039821625, "learning_rate": 0.002, "loss": 2.6004, "step": 32010 }, { "epoch": 0.0637909600918016, "grad_norm": 0.167299285531044, "learning_rate": 0.002, "loss": 2.5979, "step": 32020 }, { "epoch": 0.06381088231544052, "grad_norm": 0.16122378408908844, "learning_rate": 0.002, "loss": 2.5685, "step": 32030 }, { "epoch": 0.06383080453907944, "grad_norm": 0.16916298866271973, "learning_rate": 0.002, "loss": 2.5692, "step": 32040 }, { "epoch": 0.06385072676271834, "grad_norm": 0.19407342374324799, "learning_rate": 0.002, "loss": 2.5801, "step": 32050 }, { "epoch": 0.06387064898635726, "grad_norm": 0.1550748646259308, "learning_rate": 0.002, "loss": 2.5774, "step": 32060 }, { "epoch": 0.06389057120999618, "grad_norm": 0.19654935598373413, "learning_rate": 0.002, "loss": 2.5755, "step": 32070 }, { "epoch": 0.06391049343363508, "grad_norm": 0.21432039141654968, "learning_rate": 0.002, "loss": 2.5818, "step": 32080 }, { "epoch": 0.063930415657274, "grad_norm": 0.15922078490257263, "learning_rate": 0.002, "loss": 2.5846, "step": 32090 }, { "epoch": 0.06395033788091292, "grad_norm": 0.20032308995723724, "learning_rate": 0.002, "loss": 2.5697, "step": 32100 }, { "epoch": 0.06397026010455183, "grad_norm": 0.18943867087364197, "learning_rate": 0.002, "loss": 2.5883, "step": 32110 }, { "epoch": 0.06399018232819075, "grad_norm": 0.17765504121780396, "learning_rate": 0.002, "loss": 2.5884, "step": 32120 }, { "epoch": 0.06401010455182966, "grad_norm": 0.16606953740119934, "learning_rate": 0.002, "loss": 2.5704, "step": 32130 }, { "epoch": 0.06403002677546857, "grad_norm": 0.16614261269569397, "learning_rate": 0.002, "loss": 2.5798, "step": 32140 }, { "epoch": 0.06404994899910749, "grad_norm": 0.15565969049930573, "learning_rate": 0.002, "loss": 2.5941, "step": 32150 }, { "epoch": 0.06406987122274639, "grad_norm": 0.1855083703994751, "learning_rate": 0.002, "loss": 2.5972, "step": 32160 }, { "epoch": 0.06408979344638531, "grad_norm": 0.16817328333854675, "learning_rate": 0.002, "loss": 2.5801, "step": 32170 }, { "epoch": 0.06410971567002423, "grad_norm": 0.20447589457035065, "learning_rate": 0.002, "loss": 2.5936, "step": 32180 }, { "epoch": 0.06412963789366313, "grad_norm": 0.15840594470500946, "learning_rate": 0.002, "loss": 2.5742, "step": 32190 }, { "epoch": 0.06414956011730205, "grad_norm": 0.2066657543182373, "learning_rate": 0.002, "loss": 2.5839, "step": 32200 }, { "epoch": 0.06416948234094097, "grad_norm": 0.15521478652954102, "learning_rate": 0.002, "loss": 2.5792, "step": 32210 }, { "epoch": 0.06418940456457987, "grad_norm": 0.178106889128685, "learning_rate": 0.002, "loss": 2.5759, "step": 32220 }, { "epoch": 0.06420932678821879, "grad_norm": 0.18071922659873962, "learning_rate": 0.002, "loss": 2.5692, "step": 32230 }, { "epoch": 0.06422924901185771, "grad_norm": 0.17742495238780975, "learning_rate": 0.002, "loss": 2.5869, "step": 32240 }, { "epoch": 0.06424917123549662, "grad_norm": 0.17711292207241058, "learning_rate": 0.002, "loss": 2.5937, "step": 32250 }, { "epoch": 0.06426909345913553, "grad_norm": 0.2126317173242569, "learning_rate": 0.002, "loss": 2.5774, "step": 32260 }, { "epoch": 0.06428901568277445, "grad_norm": 0.1381249725818634, "learning_rate": 0.002, "loss": 2.5869, "step": 32270 }, { "epoch": 0.06430893790641336, "grad_norm": 0.18849971890449524, "learning_rate": 0.002, "loss": 2.5897, "step": 32280 }, { "epoch": 0.06432886013005228, "grad_norm": 0.1748391091823578, "learning_rate": 0.002, "loss": 2.5773, "step": 32290 }, { "epoch": 0.0643487823536912, "grad_norm": 0.20269261300563812, "learning_rate": 0.002, "loss": 2.5682, "step": 32300 }, { "epoch": 0.0643687045773301, "grad_norm": 0.15886306762695312, "learning_rate": 0.002, "loss": 2.5841, "step": 32310 }, { "epoch": 0.06438862680096902, "grad_norm": 0.14712993800640106, "learning_rate": 0.002, "loss": 2.5807, "step": 32320 }, { "epoch": 0.06440854902460794, "grad_norm": 0.173366516828537, "learning_rate": 0.002, "loss": 2.5817, "step": 32330 }, { "epoch": 0.06442847124824684, "grad_norm": 0.1757202297449112, "learning_rate": 0.002, "loss": 2.576, "step": 32340 }, { "epoch": 0.06444839347188576, "grad_norm": 0.1813686341047287, "learning_rate": 0.002, "loss": 2.5813, "step": 32350 }, { "epoch": 0.06446831569552468, "grad_norm": 0.20240084826946259, "learning_rate": 0.002, "loss": 2.5831, "step": 32360 }, { "epoch": 0.06448823791916358, "grad_norm": 0.17037947475910187, "learning_rate": 0.002, "loss": 2.5752, "step": 32370 }, { "epoch": 0.0645081601428025, "grad_norm": 0.1498997062444687, "learning_rate": 0.002, "loss": 2.5792, "step": 32380 }, { "epoch": 0.06452808236644142, "grad_norm": 0.19087877869606018, "learning_rate": 0.002, "loss": 2.5905, "step": 32390 }, { "epoch": 0.06454800459008032, "grad_norm": 0.17746654152870178, "learning_rate": 0.002, "loss": 2.579, "step": 32400 }, { "epoch": 0.06456792681371924, "grad_norm": 0.16915304958820343, "learning_rate": 0.002, "loss": 2.5651, "step": 32410 }, { "epoch": 0.06458784903735815, "grad_norm": 0.175521582365036, "learning_rate": 0.002, "loss": 2.5717, "step": 32420 }, { "epoch": 0.06460777126099707, "grad_norm": 0.1422380954027176, "learning_rate": 0.002, "loss": 2.5896, "step": 32430 }, { "epoch": 0.06462769348463598, "grad_norm": 0.16255566477775574, "learning_rate": 0.002, "loss": 2.5723, "step": 32440 }, { "epoch": 0.06464761570827489, "grad_norm": 0.18986418843269348, "learning_rate": 0.002, "loss": 2.5781, "step": 32450 }, { "epoch": 0.06466753793191381, "grad_norm": 0.2682022750377655, "learning_rate": 0.002, "loss": 2.5667, "step": 32460 }, { "epoch": 0.06468746015555273, "grad_norm": 0.20933043956756592, "learning_rate": 0.002, "loss": 2.5936, "step": 32470 }, { "epoch": 0.06470738237919163, "grad_norm": 0.15954574942588806, "learning_rate": 0.002, "loss": 2.5931, "step": 32480 }, { "epoch": 0.06472730460283055, "grad_norm": 0.23391097784042358, "learning_rate": 0.002, "loss": 2.574, "step": 32490 }, { "epoch": 0.06474722682646947, "grad_norm": 0.16195744276046753, "learning_rate": 0.002, "loss": 2.5807, "step": 32500 }, { "epoch": 0.06476714905010837, "grad_norm": 0.16043268144130707, "learning_rate": 0.002, "loss": 2.5902, "step": 32510 }, { "epoch": 0.06478707127374729, "grad_norm": 0.16379117965698242, "learning_rate": 0.002, "loss": 2.5742, "step": 32520 }, { "epoch": 0.06480699349738621, "grad_norm": 0.1850557178258896, "learning_rate": 0.002, "loss": 2.5872, "step": 32530 }, { "epoch": 0.06482691572102511, "grad_norm": 0.15875481069087982, "learning_rate": 0.002, "loss": 2.5954, "step": 32540 }, { "epoch": 0.06484683794466403, "grad_norm": 0.18420268595218658, "learning_rate": 0.002, "loss": 2.5895, "step": 32550 }, { "epoch": 0.06486676016830295, "grad_norm": 0.22823286056518555, "learning_rate": 0.002, "loss": 2.5957, "step": 32560 }, { "epoch": 0.06488668239194186, "grad_norm": 0.16493524610996246, "learning_rate": 0.002, "loss": 2.5774, "step": 32570 }, { "epoch": 0.06490660461558077, "grad_norm": 0.13694489002227783, "learning_rate": 0.002, "loss": 2.568, "step": 32580 }, { "epoch": 0.06492652683921969, "grad_norm": 0.18605297803878784, "learning_rate": 0.002, "loss": 2.5657, "step": 32590 }, { "epoch": 0.0649464490628586, "grad_norm": 0.17703884840011597, "learning_rate": 0.002, "loss": 2.5758, "step": 32600 }, { "epoch": 0.06496637128649752, "grad_norm": 0.16139617562294006, "learning_rate": 0.002, "loss": 2.5765, "step": 32610 }, { "epoch": 0.06498629351013643, "grad_norm": 0.2325436770915985, "learning_rate": 0.002, "loss": 2.5824, "step": 32620 }, { "epoch": 0.06500621573377534, "grad_norm": 0.18111523985862732, "learning_rate": 0.002, "loss": 2.5788, "step": 32630 }, { "epoch": 0.06502613795741426, "grad_norm": 0.17147819697856903, "learning_rate": 0.002, "loss": 2.5769, "step": 32640 }, { "epoch": 0.06504606018105318, "grad_norm": 0.15799470245838165, "learning_rate": 0.002, "loss": 2.5915, "step": 32650 }, { "epoch": 0.06506598240469208, "grad_norm": 0.17781876027584076, "learning_rate": 0.002, "loss": 2.5764, "step": 32660 }, { "epoch": 0.065085904628331, "grad_norm": 0.15157683193683624, "learning_rate": 0.002, "loss": 2.5835, "step": 32670 }, { "epoch": 0.0651058268519699, "grad_norm": 0.24172373116016388, "learning_rate": 0.002, "loss": 2.5952, "step": 32680 }, { "epoch": 0.06512574907560882, "grad_norm": 0.16097307205200195, "learning_rate": 0.002, "loss": 2.5965, "step": 32690 }, { "epoch": 0.06514567129924774, "grad_norm": 0.1745757907629013, "learning_rate": 0.002, "loss": 2.5821, "step": 32700 }, { "epoch": 0.06516559352288664, "grad_norm": 0.163258358836174, "learning_rate": 0.002, "loss": 2.5964, "step": 32710 }, { "epoch": 0.06518551574652556, "grad_norm": 0.1639789640903473, "learning_rate": 0.002, "loss": 2.5792, "step": 32720 }, { "epoch": 0.06520543797016448, "grad_norm": 0.2027866691350937, "learning_rate": 0.002, "loss": 2.5794, "step": 32730 }, { "epoch": 0.06522536019380339, "grad_norm": 0.16022631525993347, "learning_rate": 0.002, "loss": 2.5946, "step": 32740 }, { "epoch": 0.0652452824174423, "grad_norm": 0.19897879660129547, "learning_rate": 0.002, "loss": 2.5982, "step": 32750 }, { "epoch": 0.06526520464108122, "grad_norm": 0.1548442244529724, "learning_rate": 0.002, "loss": 2.5889, "step": 32760 }, { "epoch": 0.06528512686472013, "grad_norm": 0.16614030301570892, "learning_rate": 0.002, "loss": 2.5915, "step": 32770 }, { "epoch": 0.06530504908835905, "grad_norm": 0.207230344414711, "learning_rate": 0.002, "loss": 2.5686, "step": 32780 }, { "epoch": 0.06532497131199796, "grad_norm": 0.14142628014087677, "learning_rate": 0.002, "loss": 2.5855, "step": 32790 }, { "epoch": 0.06534489353563687, "grad_norm": 0.17198793590068817, "learning_rate": 0.002, "loss": 2.5884, "step": 32800 }, { "epoch": 0.06536481575927579, "grad_norm": 0.15941043198108673, "learning_rate": 0.002, "loss": 2.5855, "step": 32810 }, { "epoch": 0.0653847379829147, "grad_norm": 0.20955190062522888, "learning_rate": 0.002, "loss": 2.5838, "step": 32820 }, { "epoch": 0.06540466020655361, "grad_norm": 0.1631559580564499, "learning_rate": 0.002, "loss": 2.6002, "step": 32830 }, { "epoch": 0.06542458243019253, "grad_norm": 0.18079371750354767, "learning_rate": 0.002, "loss": 2.591, "step": 32840 }, { "epoch": 0.06544450465383145, "grad_norm": 0.2607443034648895, "learning_rate": 0.002, "loss": 2.5939, "step": 32850 }, { "epoch": 0.06546442687747035, "grad_norm": 0.15411131083965302, "learning_rate": 0.002, "loss": 2.5858, "step": 32860 }, { "epoch": 0.06548434910110927, "grad_norm": 0.1531631499528885, "learning_rate": 0.002, "loss": 2.584, "step": 32870 }, { "epoch": 0.06550427132474819, "grad_norm": 0.16326609253883362, "learning_rate": 0.002, "loss": 2.588, "step": 32880 }, { "epoch": 0.0655241935483871, "grad_norm": 0.17687711119651794, "learning_rate": 0.002, "loss": 2.5734, "step": 32890 }, { "epoch": 0.06554411577202601, "grad_norm": 0.1641993522644043, "learning_rate": 0.002, "loss": 2.5791, "step": 32900 }, { "epoch": 0.06556403799566493, "grad_norm": 0.16506724059581757, "learning_rate": 0.002, "loss": 2.5734, "step": 32910 }, { "epoch": 0.06558396021930384, "grad_norm": 0.16431114077568054, "learning_rate": 0.002, "loss": 2.5776, "step": 32920 }, { "epoch": 0.06560388244294275, "grad_norm": 0.16903994977474213, "learning_rate": 0.002, "loss": 2.5904, "step": 32930 }, { "epoch": 0.06562380466658166, "grad_norm": 0.19745926558971405, "learning_rate": 0.002, "loss": 2.5809, "step": 32940 }, { "epoch": 0.06564372689022058, "grad_norm": 0.15283766388893127, "learning_rate": 0.002, "loss": 2.5856, "step": 32950 }, { "epoch": 0.0656636491138595, "grad_norm": 0.1703520566225052, "learning_rate": 0.002, "loss": 2.5894, "step": 32960 }, { "epoch": 0.0656835713374984, "grad_norm": 0.1924659013748169, "learning_rate": 0.002, "loss": 2.582, "step": 32970 }, { "epoch": 0.06570349356113732, "grad_norm": 0.17063944041728973, "learning_rate": 0.002, "loss": 2.5737, "step": 32980 }, { "epoch": 0.06572341578477624, "grad_norm": 0.18514762818813324, "learning_rate": 0.002, "loss": 2.5855, "step": 32990 }, { "epoch": 0.06574333800841514, "grad_norm": 0.1945972740650177, "learning_rate": 0.002, "loss": 2.5713, "step": 33000 }, { "epoch": 0.06576326023205406, "grad_norm": 0.17349891364574432, "learning_rate": 0.002, "loss": 2.5944, "step": 33010 }, { "epoch": 0.06578318245569298, "grad_norm": 0.16504423320293427, "learning_rate": 0.002, "loss": 2.5836, "step": 33020 }, { "epoch": 0.06580310467933188, "grad_norm": 0.17386780679225922, "learning_rate": 0.002, "loss": 2.6005, "step": 33030 }, { "epoch": 0.0658230269029708, "grad_norm": 0.15593507885932922, "learning_rate": 0.002, "loss": 2.5754, "step": 33040 }, { "epoch": 0.06584294912660972, "grad_norm": 0.16303960978984833, "learning_rate": 0.002, "loss": 2.576, "step": 33050 }, { "epoch": 0.06586287135024863, "grad_norm": 0.19655701518058777, "learning_rate": 0.002, "loss": 2.5788, "step": 33060 }, { "epoch": 0.06588279357388754, "grad_norm": 0.1484663188457489, "learning_rate": 0.002, "loss": 2.5772, "step": 33070 }, { "epoch": 0.06590271579752646, "grad_norm": 0.16879390180110931, "learning_rate": 0.002, "loss": 2.5821, "step": 33080 }, { "epoch": 0.06592263802116537, "grad_norm": 0.1452464461326599, "learning_rate": 0.002, "loss": 2.5776, "step": 33090 }, { "epoch": 0.06594256024480429, "grad_norm": 0.1759609431028366, "learning_rate": 0.002, "loss": 2.5802, "step": 33100 }, { "epoch": 0.0659624824684432, "grad_norm": 0.17897720634937286, "learning_rate": 0.002, "loss": 2.5919, "step": 33110 }, { "epoch": 0.06598240469208211, "grad_norm": 0.154389426112175, "learning_rate": 0.002, "loss": 2.5951, "step": 33120 }, { "epoch": 0.06600232691572103, "grad_norm": 0.1750633716583252, "learning_rate": 0.002, "loss": 2.5867, "step": 33130 }, { "epoch": 0.06602224913935995, "grad_norm": 0.15397508442401886, "learning_rate": 0.002, "loss": 2.5774, "step": 33140 }, { "epoch": 0.06604217136299885, "grad_norm": 0.19683703780174255, "learning_rate": 0.002, "loss": 2.5766, "step": 33150 }, { "epoch": 0.06606209358663777, "grad_norm": 0.13805034756660461, "learning_rate": 0.002, "loss": 2.5992, "step": 33160 }, { "epoch": 0.06608201581027667, "grad_norm": 0.1818941980600357, "learning_rate": 0.002, "loss": 2.5883, "step": 33170 }, { "epoch": 0.06610193803391559, "grad_norm": 0.20678207278251648, "learning_rate": 0.002, "loss": 2.5713, "step": 33180 }, { "epoch": 0.06612186025755451, "grad_norm": 0.16489878296852112, "learning_rate": 0.002, "loss": 2.6049, "step": 33190 }, { "epoch": 0.06614178248119341, "grad_norm": 0.16947530210018158, "learning_rate": 0.002, "loss": 2.5944, "step": 33200 }, { "epoch": 0.06616170470483233, "grad_norm": 0.24719476699829102, "learning_rate": 0.002, "loss": 2.5824, "step": 33210 }, { "epoch": 0.06618162692847125, "grad_norm": 0.15953554213047028, "learning_rate": 0.002, "loss": 2.5766, "step": 33220 }, { "epoch": 0.06620154915211016, "grad_norm": 0.19388650357723236, "learning_rate": 0.002, "loss": 2.5731, "step": 33230 }, { "epoch": 0.06622147137574907, "grad_norm": 0.1598372459411621, "learning_rate": 0.002, "loss": 2.5886, "step": 33240 }, { "epoch": 0.066241393599388, "grad_norm": 0.17749063670635223, "learning_rate": 0.002, "loss": 2.5773, "step": 33250 }, { "epoch": 0.0662613158230269, "grad_norm": 0.2580915689468384, "learning_rate": 0.002, "loss": 2.577, "step": 33260 }, { "epoch": 0.06628123804666582, "grad_norm": 0.15589214861392975, "learning_rate": 0.002, "loss": 2.5786, "step": 33270 }, { "epoch": 0.06630116027030473, "grad_norm": 0.18350915610790253, "learning_rate": 0.002, "loss": 2.5862, "step": 33280 }, { "epoch": 0.06632108249394364, "grad_norm": 0.17827188968658447, "learning_rate": 0.002, "loss": 2.5826, "step": 33290 }, { "epoch": 0.06634100471758256, "grad_norm": 0.17119896411895752, "learning_rate": 0.002, "loss": 2.5898, "step": 33300 }, { "epoch": 0.06636092694122148, "grad_norm": 0.16417433321475983, "learning_rate": 0.002, "loss": 2.5837, "step": 33310 }, { "epoch": 0.06638084916486038, "grad_norm": 0.15528208017349243, "learning_rate": 0.002, "loss": 2.587, "step": 33320 }, { "epoch": 0.0664007713884993, "grad_norm": 0.15843465924263, "learning_rate": 0.002, "loss": 2.5872, "step": 33330 }, { "epoch": 0.06642069361213822, "grad_norm": 0.17762817442417145, "learning_rate": 0.002, "loss": 2.5897, "step": 33340 }, { "epoch": 0.06644061583577712, "grad_norm": 0.2222694754600525, "learning_rate": 0.002, "loss": 2.5668, "step": 33350 }, { "epoch": 0.06646053805941604, "grad_norm": 0.15403437614440918, "learning_rate": 0.002, "loss": 2.5909, "step": 33360 }, { "epoch": 0.06648046028305496, "grad_norm": 0.17389428615570068, "learning_rate": 0.002, "loss": 2.5847, "step": 33370 }, { "epoch": 0.06650038250669386, "grad_norm": 0.20874224603176117, "learning_rate": 0.002, "loss": 2.5833, "step": 33380 }, { "epoch": 0.06652030473033278, "grad_norm": 0.17588289082050323, "learning_rate": 0.002, "loss": 2.5769, "step": 33390 }, { "epoch": 0.0665402269539717, "grad_norm": 0.18303149938583374, "learning_rate": 0.002, "loss": 2.5924, "step": 33400 }, { "epoch": 0.0665601491776106, "grad_norm": 0.16285696625709534, "learning_rate": 0.002, "loss": 2.5852, "step": 33410 }, { "epoch": 0.06658007140124952, "grad_norm": 0.16307130455970764, "learning_rate": 0.002, "loss": 2.5866, "step": 33420 }, { "epoch": 0.06659999362488843, "grad_norm": 0.19675713777542114, "learning_rate": 0.002, "loss": 2.5838, "step": 33430 }, { "epoch": 0.06661991584852735, "grad_norm": 0.1759311556816101, "learning_rate": 0.002, "loss": 2.5703, "step": 33440 }, { "epoch": 0.06663983807216627, "grad_norm": 0.16510732471942902, "learning_rate": 0.002, "loss": 2.5775, "step": 33450 }, { "epoch": 0.06665976029580517, "grad_norm": 0.1842198520898819, "learning_rate": 0.002, "loss": 2.5756, "step": 33460 }, { "epoch": 0.06667968251944409, "grad_norm": 0.20826953649520874, "learning_rate": 0.002, "loss": 2.5658, "step": 33470 }, { "epoch": 0.06669960474308301, "grad_norm": 0.15868842601776123, "learning_rate": 0.002, "loss": 2.5798, "step": 33480 }, { "epoch": 0.06671952696672191, "grad_norm": 0.2073439359664917, "learning_rate": 0.002, "loss": 2.5784, "step": 33490 }, { "epoch": 0.06673944919036083, "grad_norm": 0.1660415530204773, "learning_rate": 0.002, "loss": 2.5801, "step": 33500 }, { "epoch": 0.06675937141399975, "grad_norm": 0.2142777442932129, "learning_rate": 0.002, "loss": 2.5892, "step": 33510 }, { "epoch": 0.06677929363763865, "grad_norm": 0.1799732893705368, "learning_rate": 0.002, "loss": 2.5901, "step": 33520 }, { "epoch": 0.06679921586127757, "grad_norm": 0.18658851087093353, "learning_rate": 0.002, "loss": 2.58, "step": 33530 }, { "epoch": 0.06681913808491649, "grad_norm": 0.17104125022888184, "learning_rate": 0.002, "loss": 2.5899, "step": 33540 }, { "epoch": 0.0668390603085554, "grad_norm": 0.14825239777565002, "learning_rate": 0.002, "loss": 2.5885, "step": 33550 }, { "epoch": 0.06685898253219431, "grad_norm": 0.24577493965625763, "learning_rate": 0.002, "loss": 2.5756, "step": 33560 }, { "epoch": 0.06687890475583323, "grad_norm": 0.1405012309551239, "learning_rate": 0.002, "loss": 2.5886, "step": 33570 }, { "epoch": 0.06689882697947214, "grad_norm": 0.19576779007911682, "learning_rate": 0.002, "loss": 2.5715, "step": 33580 }, { "epoch": 0.06691874920311106, "grad_norm": 0.1890070140361786, "learning_rate": 0.002, "loss": 2.5776, "step": 33590 }, { "epoch": 0.06693867142674997, "grad_norm": 0.17109565436840057, "learning_rate": 0.002, "loss": 2.5793, "step": 33600 }, { "epoch": 0.06695859365038888, "grad_norm": 0.1821363866329193, "learning_rate": 0.002, "loss": 2.5851, "step": 33610 }, { "epoch": 0.0669785158740278, "grad_norm": 0.1486554592847824, "learning_rate": 0.002, "loss": 2.5778, "step": 33620 }, { "epoch": 0.06699843809766672, "grad_norm": 0.21288761496543884, "learning_rate": 0.002, "loss": 2.5705, "step": 33630 }, { "epoch": 0.06701836032130562, "grad_norm": 0.14917375147342682, "learning_rate": 0.002, "loss": 2.5967, "step": 33640 }, { "epoch": 0.06703828254494454, "grad_norm": 0.18232691287994385, "learning_rate": 0.002, "loss": 2.5794, "step": 33650 }, { "epoch": 0.06705820476858346, "grad_norm": 0.18510960042476654, "learning_rate": 0.002, "loss": 2.5732, "step": 33660 }, { "epoch": 0.06707812699222236, "grad_norm": 0.15014144778251648, "learning_rate": 0.002, "loss": 2.5824, "step": 33670 }, { "epoch": 0.06709804921586128, "grad_norm": 0.18240895867347717, "learning_rate": 0.002, "loss": 2.5888, "step": 33680 }, { "epoch": 0.06711797143950018, "grad_norm": 0.1950579583644867, "learning_rate": 0.002, "loss": 2.5919, "step": 33690 }, { "epoch": 0.0671378936631391, "grad_norm": 0.1504124104976654, "learning_rate": 0.002, "loss": 2.5713, "step": 33700 }, { "epoch": 0.06715781588677802, "grad_norm": 0.19220517575740814, "learning_rate": 0.002, "loss": 2.5814, "step": 33710 }, { "epoch": 0.06717773811041693, "grad_norm": 0.1700599044561386, "learning_rate": 0.002, "loss": 2.5872, "step": 33720 }, { "epoch": 0.06719766033405584, "grad_norm": 0.19597461819648743, "learning_rate": 0.002, "loss": 2.5751, "step": 33730 }, { "epoch": 0.06721758255769476, "grad_norm": 0.1924414038658142, "learning_rate": 0.002, "loss": 2.5868, "step": 33740 }, { "epoch": 0.06723750478133367, "grad_norm": 0.21803131699562073, "learning_rate": 0.002, "loss": 2.5748, "step": 33750 }, { "epoch": 0.06725742700497259, "grad_norm": 0.15934030711650848, "learning_rate": 0.002, "loss": 2.5893, "step": 33760 }, { "epoch": 0.0672773492286115, "grad_norm": 0.20012196898460388, "learning_rate": 0.002, "loss": 2.5731, "step": 33770 }, { "epoch": 0.06729727145225041, "grad_norm": 0.16985908150672913, "learning_rate": 0.002, "loss": 2.5786, "step": 33780 }, { "epoch": 0.06731719367588933, "grad_norm": 0.14481763541698456, "learning_rate": 0.002, "loss": 2.5761, "step": 33790 }, { "epoch": 0.06733711589952825, "grad_norm": 0.1498771458864212, "learning_rate": 0.002, "loss": 2.5736, "step": 33800 }, { "epoch": 0.06735703812316715, "grad_norm": 0.15239448845386505, "learning_rate": 0.002, "loss": 2.5582, "step": 33810 }, { "epoch": 0.06737696034680607, "grad_norm": 0.18915778398513794, "learning_rate": 0.002, "loss": 2.5894, "step": 33820 }, { "epoch": 0.06739688257044499, "grad_norm": 0.15658564865589142, "learning_rate": 0.002, "loss": 2.5642, "step": 33830 }, { "epoch": 0.06741680479408389, "grad_norm": 0.17112164199352264, "learning_rate": 0.002, "loss": 2.5721, "step": 33840 }, { "epoch": 0.06743672701772281, "grad_norm": 0.20541666448116302, "learning_rate": 0.002, "loss": 2.5886, "step": 33850 }, { "epoch": 0.06745664924136173, "grad_norm": 0.20513498783111572, "learning_rate": 0.002, "loss": 2.5756, "step": 33860 }, { "epoch": 0.06747657146500063, "grad_norm": 0.1725904941558838, "learning_rate": 0.002, "loss": 2.5758, "step": 33870 }, { "epoch": 0.06749649368863955, "grad_norm": 0.1929531693458557, "learning_rate": 0.002, "loss": 2.5847, "step": 33880 }, { "epoch": 0.06751641591227847, "grad_norm": 0.18456286191940308, "learning_rate": 0.002, "loss": 2.5879, "step": 33890 }, { "epoch": 0.06753633813591738, "grad_norm": 0.19724465906620026, "learning_rate": 0.002, "loss": 2.5758, "step": 33900 }, { "epoch": 0.0675562603595563, "grad_norm": 0.17714452743530273, "learning_rate": 0.002, "loss": 2.5787, "step": 33910 }, { "epoch": 0.0675761825831952, "grad_norm": 0.14518150687217712, "learning_rate": 0.002, "loss": 2.576, "step": 33920 }, { "epoch": 0.06759610480683412, "grad_norm": 0.1663220077753067, "learning_rate": 0.002, "loss": 2.5972, "step": 33930 }, { "epoch": 0.06761602703047304, "grad_norm": 0.16646812856197357, "learning_rate": 0.002, "loss": 2.5885, "step": 33940 }, { "epoch": 0.06763594925411194, "grad_norm": 0.18687978386878967, "learning_rate": 0.002, "loss": 2.5733, "step": 33950 }, { "epoch": 0.06765587147775086, "grad_norm": 0.21907593309879303, "learning_rate": 0.002, "loss": 2.5705, "step": 33960 }, { "epoch": 0.06767579370138978, "grad_norm": 0.21287347376346588, "learning_rate": 0.002, "loss": 2.592, "step": 33970 }, { "epoch": 0.06769571592502868, "grad_norm": 0.15574660897254944, "learning_rate": 0.002, "loss": 2.5793, "step": 33980 }, { "epoch": 0.0677156381486676, "grad_norm": 0.1822294294834137, "learning_rate": 0.002, "loss": 2.567, "step": 33990 }, { "epoch": 0.06773556037230652, "grad_norm": 0.16364020109176636, "learning_rate": 0.002, "loss": 2.5619, "step": 34000 }, { "epoch": 0.06775548259594542, "grad_norm": 0.18340618908405304, "learning_rate": 0.002, "loss": 2.5787, "step": 34010 }, { "epoch": 0.06777540481958434, "grad_norm": 0.16097566485404968, "learning_rate": 0.002, "loss": 2.5685, "step": 34020 }, { "epoch": 0.06779532704322326, "grad_norm": 0.15801578760147095, "learning_rate": 0.002, "loss": 2.5944, "step": 34030 }, { "epoch": 0.06781524926686217, "grad_norm": 0.16879041492938995, "learning_rate": 0.002, "loss": 2.581, "step": 34040 }, { "epoch": 0.06783517149050108, "grad_norm": 0.1796632558107376, "learning_rate": 0.002, "loss": 2.5718, "step": 34050 }, { "epoch": 0.06785509371414, "grad_norm": 0.15312476456165314, "learning_rate": 0.002, "loss": 2.5935, "step": 34060 }, { "epoch": 0.06787501593777891, "grad_norm": 0.21203221380710602, "learning_rate": 0.002, "loss": 2.5936, "step": 34070 }, { "epoch": 0.06789493816141783, "grad_norm": 0.15788309276103973, "learning_rate": 0.002, "loss": 2.5818, "step": 34080 }, { "epoch": 0.06791486038505674, "grad_norm": 0.15068772435188293, "learning_rate": 0.002, "loss": 2.5744, "step": 34090 }, { "epoch": 0.06793478260869565, "grad_norm": 0.17808797955513, "learning_rate": 0.002, "loss": 2.5868, "step": 34100 }, { "epoch": 0.06795470483233457, "grad_norm": 0.17819418013095856, "learning_rate": 0.002, "loss": 2.5931, "step": 34110 }, { "epoch": 0.06797462705597349, "grad_norm": 0.17066246271133423, "learning_rate": 0.002, "loss": 2.5834, "step": 34120 }, { "epoch": 0.06799454927961239, "grad_norm": 0.19092261791229248, "learning_rate": 0.002, "loss": 2.5706, "step": 34130 }, { "epoch": 0.06801447150325131, "grad_norm": 0.17401178181171417, "learning_rate": 0.002, "loss": 2.5783, "step": 34140 }, { "epoch": 0.06803439372689023, "grad_norm": 0.1933360993862152, "learning_rate": 0.002, "loss": 2.589, "step": 34150 }, { "epoch": 0.06805431595052913, "grad_norm": 0.17431239783763885, "learning_rate": 0.002, "loss": 2.5779, "step": 34160 }, { "epoch": 0.06807423817416805, "grad_norm": 0.1672477126121521, "learning_rate": 0.002, "loss": 2.5793, "step": 34170 }, { "epoch": 0.06809416039780695, "grad_norm": 0.1663948893547058, "learning_rate": 0.002, "loss": 2.5799, "step": 34180 }, { "epoch": 0.06811408262144587, "grad_norm": 0.15757335722446442, "learning_rate": 0.002, "loss": 2.5721, "step": 34190 }, { "epoch": 0.06813400484508479, "grad_norm": 0.16072948276996613, "learning_rate": 0.002, "loss": 2.5982, "step": 34200 }, { "epoch": 0.0681539270687237, "grad_norm": 0.2517760396003723, "learning_rate": 0.002, "loss": 2.5797, "step": 34210 }, { "epoch": 0.06817384929236261, "grad_norm": 0.17866335809230804, "learning_rate": 0.002, "loss": 2.5688, "step": 34220 }, { "epoch": 0.06819377151600153, "grad_norm": 0.20321227610111237, "learning_rate": 0.002, "loss": 2.5981, "step": 34230 }, { "epoch": 0.06821369373964044, "grad_norm": 0.20417997241020203, "learning_rate": 0.002, "loss": 2.5788, "step": 34240 }, { "epoch": 0.06823361596327936, "grad_norm": 0.1708742380142212, "learning_rate": 0.002, "loss": 2.5914, "step": 34250 }, { "epoch": 0.06825353818691828, "grad_norm": 0.1672973483800888, "learning_rate": 0.002, "loss": 2.5854, "step": 34260 }, { "epoch": 0.06827346041055718, "grad_norm": 0.14693762362003326, "learning_rate": 0.002, "loss": 2.5891, "step": 34270 }, { "epoch": 0.0682933826341961, "grad_norm": 0.18068799376487732, "learning_rate": 0.002, "loss": 2.5808, "step": 34280 }, { "epoch": 0.06831330485783502, "grad_norm": 0.16714853048324585, "learning_rate": 0.002, "loss": 2.5843, "step": 34290 }, { "epoch": 0.06833322708147392, "grad_norm": 0.16258294880390167, "learning_rate": 0.002, "loss": 2.5798, "step": 34300 }, { "epoch": 0.06835314930511284, "grad_norm": 0.14837586879730225, "learning_rate": 0.002, "loss": 2.5842, "step": 34310 }, { "epoch": 0.06837307152875176, "grad_norm": 0.18992836773395538, "learning_rate": 0.002, "loss": 2.5868, "step": 34320 }, { "epoch": 0.06839299375239066, "grad_norm": 0.19966118037700653, "learning_rate": 0.002, "loss": 2.5893, "step": 34330 }, { "epoch": 0.06841291597602958, "grad_norm": 0.15659870207309723, "learning_rate": 0.002, "loss": 2.587, "step": 34340 }, { "epoch": 0.0684328381996685, "grad_norm": 0.20483213663101196, "learning_rate": 0.002, "loss": 2.5814, "step": 34350 }, { "epoch": 0.0684527604233074, "grad_norm": 0.16108964383602142, "learning_rate": 0.002, "loss": 2.5758, "step": 34360 }, { "epoch": 0.06847268264694632, "grad_norm": 0.183487206697464, "learning_rate": 0.002, "loss": 2.5843, "step": 34370 }, { "epoch": 0.06849260487058524, "grad_norm": 0.16198576986789703, "learning_rate": 0.002, "loss": 2.5839, "step": 34380 }, { "epoch": 0.06851252709422415, "grad_norm": 0.21097521483898163, "learning_rate": 0.002, "loss": 2.5844, "step": 34390 }, { "epoch": 0.06853244931786306, "grad_norm": 0.18204885721206665, "learning_rate": 0.002, "loss": 2.5887, "step": 34400 }, { "epoch": 0.06855237154150198, "grad_norm": 0.15493278205394745, "learning_rate": 0.002, "loss": 2.5812, "step": 34410 }, { "epoch": 0.06857229376514089, "grad_norm": 0.15906955301761627, "learning_rate": 0.002, "loss": 2.5826, "step": 34420 }, { "epoch": 0.0685922159887798, "grad_norm": 0.14683865010738373, "learning_rate": 0.002, "loss": 2.59, "step": 34430 }, { "epoch": 0.06861213821241871, "grad_norm": 0.1824183464050293, "learning_rate": 0.002, "loss": 2.5887, "step": 34440 }, { "epoch": 0.06863206043605763, "grad_norm": 0.16095368564128876, "learning_rate": 0.002, "loss": 2.5815, "step": 34450 }, { "epoch": 0.06865198265969655, "grad_norm": 0.18866105377674103, "learning_rate": 0.002, "loss": 2.5883, "step": 34460 }, { "epoch": 0.06867190488333545, "grad_norm": 0.16550958156585693, "learning_rate": 0.002, "loss": 2.5839, "step": 34470 }, { "epoch": 0.06869182710697437, "grad_norm": 0.15852749347686768, "learning_rate": 0.002, "loss": 2.5806, "step": 34480 }, { "epoch": 0.06871174933061329, "grad_norm": 0.18024930357933044, "learning_rate": 0.002, "loss": 2.579, "step": 34490 }, { "epoch": 0.0687316715542522, "grad_norm": 0.3342995047569275, "learning_rate": 0.002, "loss": 2.5864, "step": 34500 }, { "epoch": 0.06875159377789111, "grad_norm": 0.19142703711986542, "learning_rate": 0.002, "loss": 2.5792, "step": 34510 }, { "epoch": 0.06877151600153003, "grad_norm": 0.2089821994304657, "learning_rate": 0.002, "loss": 2.5768, "step": 34520 }, { "epoch": 0.06879143822516894, "grad_norm": 0.1557404100894928, "learning_rate": 0.002, "loss": 2.582, "step": 34530 }, { "epoch": 0.06881136044880785, "grad_norm": 0.16683173179626465, "learning_rate": 0.002, "loss": 2.5857, "step": 34540 }, { "epoch": 0.06883128267244677, "grad_norm": 0.15943478047847748, "learning_rate": 0.002, "loss": 2.5699, "step": 34550 }, { "epoch": 0.06885120489608568, "grad_norm": 0.15904958546161652, "learning_rate": 0.002, "loss": 2.5904, "step": 34560 }, { "epoch": 0.0688711271197246, "grad_norm": 0.1936812698841095, "learning_rate": 0.002, "loss": 2.5946, "step": 34570 }, { "epoch": 0.06889104934336351, "grad_norm": 0.17053528130054474, "learning_rate": 0.002, "loss": 2.582, "step": 34580 }, { "epoch": 0.06891097156700242, "grad_norm": 0.18628902733325958, "learning_rate": 0.002, "loss": 2.5936, "step": 34590 }, { "epoch": 0.06893089379064134, "grad_norm": 0.18642829358577728, "learning_rate": 0.002, "loss": 2.5866, "step": 34600 }, { "epoch": 0.06895081601428026, "grad_norm": 0.19513356685638428, "learning_rate": 0.002, "loss": 2.5931, "step": 34610 }, { "epoch": 0.06897073823791916, "grad_norm": 0.19522342085838318, "learning_rate": 0.002, "loss": 2.5879, "step": 34620 }, { "epoch": 0.06899066046155808, "grad_norm": 0.1751604676246643, "learning_rate": 0.002, "loss": 2.5922, "step": 34630 }, { "epoch": 0.069010582685197, "grad_norm": 0.16135439276695251, "learning_rate": 0.002, "loss": 2.5693, "step": 34640 }, { "epoch": 0.0690305049088359, "grad_norm": 0.1696232706308365, "learning_rate": 0.002, "loss": 2.5923, "step": 34650 }, { "epoch": 0.06905042713247482, "grad_norm": 0.1660030335187912, "learning_rate": 0.002, "loss": 2.5841, "step": 34660 }, { "epoch": 0.06907034935611374, "grad_norm": 0.1346091777086258, "learning_rate": 0.002, "loss": 2.574, "step": 34670 }, { "epoch": 0.06909027157975264, "grad_norm": 0.19583646953105927, "learning_rate": 0.002, "loss": 2.5746, "step": 34680 }, { "epoch": 0.06911019380339156, "grad_norm": 0.18284469842910767, "learning_rate": 0.002, "loss": 2.5865, "step": 34690 }, { "epoch": 0.06913011602703047, "grad_norm": 0.1741645336151123, "learning_rate": 0.002, "loss": 2.5827, "step": 34700 }, { "epoch": 0.06915003825066939, "grad_norm": 0.1398077756166458, "learning_rate": 0.002, "loss": 2.5732, "step": 34710 }, { "epoch": 0.0691699604743083, "grad_norm": 0.22030124068260193, "learning_rate": 0.002, "loss": 2.5778, "step": 34720 }, { "epoch": 0.06918988269794721, "grad_norm": 0.16854538023471832, "learning_rate": 0.002, "loss": 2.5898, "step": 34730 }, { "epoch": 0.06920980492158613, "grad_norm": 0.15870383381843567, "learning_rate": 0.002, "loss": 2.5876, "step": 34740 }, { "epoch": 0.06922972714522505, "grad_norm": 0.15805019438266754, "learning_rate": 0.002, "loss": 2.5791, "step": 34750 }, { "epoch": 0.06924964936886395, "grad_norm": 0.17397063970565796, "learning_rate": 0.002, "loss": 2.5754, "step": 34760 }, { "epoch": 0.06926957159250287, "grad_norm": 0.17190924286842346, "learning_rate": 0.002, "loss": 2.5861, "step": 34770 }, { "epoch": 0.06928949381614179, "grad_norm": 0.19843004643917084, "learning_rate": 0.002, "loss": 2.5922, "step": 34780 }, { "epoch": 0.06930941603978069, "grad_norm": 0.19873985648155212, "learning_rate": 0.002, "loss": 2.6026, "step": 34790 }, { "epoch": 0.06932933826341961, "grad_norm": 0.15748746693134308, "learning_rate": 0.002, "loss": 2.5801, "step": 34800 }, { "epoch": 0.06934926048705853, "grad_norm": 0.23689953982830048, "learning_rate": 0.002, "loss": 2.5945, "step": 34810 }, { "epoch": 0.06936918271069743, "grad_norm": 0.202627032995224, "learning_rate": 0.002, "loss": 2.5802, "step": 34820 }, { "epoch": 0.06938910493433635, "grad_norm": 0.18404746055603027, "learning_rate": 0.002, "loss": 2.5979, "step": 34830 }, { "epoch": 0.06940902715797527, "grad_norm": 0.21362265944480896, "learning_rate": 0.002, "loss": 2.5671, "step": 34840 }, { "epoch": 0.06942894938161417, "grad_norm": 0.15131528675556183, "learning_rate": 0.002, "loss": 2.5869, "step": 34850 }, { "epoch": 0.06944887160525309, "grad_norm": 0.1488545686006546, "learning_rate": 0.002, "loss": 2.5859, "step": 34860 }, { "epoch": 0.06946879382889201, "grad_norm": 0.2719632089138031, "learning_rate": 0.002, "loss": 2.5781, "step": 34870 }, { "epoch": 0.06948871605253092, "grad_norm": 0.1648089587688446, "learning_rate": 0.002, "loss": 2.5798, "step": 34880 }, { "epoch": 0.06950863827616983, "grad_norm": 0.1824488341808319, "learning_rate": 0.002, "loss": 2.5835, "step": 34890 }, { "epoch": 0.06952856049980875, "grad_norm": 0.15272846817970276, "learning_rate": 0.002, "loss": 2.5874, "step": 34900 }, { "epoch": 0.06954848272344766, "grad_norm": 0.16392427682876587, "learning_rate": 0.002, "loss": 2.5833, "step": 34910 }, { "epoch": 0.06956840494708658, "grad_norm": 0.17944273352622986, "learning_rate": 0.002, "loss": 2.5846, "step": 34920 }, { "epoch": 0.06958832717072548, "grad_norm": 0.18900646269321442, "learning_rate": 0.002, "loss": 2.5795, "step": 34930 }, { "epoch": 0.0696082493943644, "grad_norm": 0.18759004771709442, "learning_rate": 0.002, "loss": 2.5795, "step": 34940 }, { "epoch": 0.06962817161800332, "grad_norm": 0.16237252950668335, "learning_rate": 0.002, "loss": 2.5904, "step": 34950 }, { "epoch": 0.06964809384164222, "grad_norm": 0.15614773333072662, "learning_rate": 0.002, "loss": 2.5722, "step": 34960 }, { "epoch": 0.06966801606528114, "grad_norm": 0.2117154449224472, "learning_rate": 0.002, "loss": 2.5823, "step": 34970 }, { "epoch": 0.06968793828892006, "grad_norm": 0.15801595151424408, "learning_rate": 0.002, "loss": 2.578, "step": 34980 }, { "epoch": 0.06970786051255896, "grad_norm": 0.16815653443336487, "learning_rate": 0.002, "loss": 2.5859, "step": 34990 }, { "epoch": 0.06972778273619788, "grad_norm": 0.1724843978881836, "learning_rate": 0.002, "loss": 2.5759, "step": 35000 }, { "epoch": 0.0697477049598368, "grad_norm": 0.14942674338817596, "learning_rate": 0.002, "loss": 2.5738, "step": 35010 }, { "epoch": 0.0697676271834757, "grad_norm": 0.18396225571632385, "learning_rate": 0.002, "loss": 2.5575, "step": 35020 }, { "epoch": 0.06978754940711462, "grad_norm": 0.17278499901294708, "learning_rate": 0.002, "loss": 2.5788, "step": 35030 }, { "epoch": 0.06980747163075354, "grad_norm": 0.20317524671554565, "learning_rate": 0.002, "loss": 2.5786, "step": 35040 }, { "epoch": 0.06982739385439245, "grad_norm": 0.1770993173122406, "learning_rate": 0.002, "loss": 2.5762, "step": 35050 }, { "epoch": 0.06984731607803137, "grad_norm": 0.2022845596075058, "learning_rate": 0.002, "loss": 2.5944, "step": 35060 }, { "epoch": 0.06986723830167028, "grad_norm": 0.16222509741783142, "learning_rate": 0.002, "loss": 2.5967, "step": 35070 }, { "epoch": 0.06988716052530919, "grad_norm": 0.15521472692489624, "learning_rate": 0.002, "loss": 2.5772, "step": 35080 }, { "epoch": 0.06990708274894811, "grad_norm": 0.15152151882648468, "learning_rate": 0.002, "loss": 2.5805, "step": 35090 }, { "epoch": 0.06992700497258703, "grad_norm": 0.17144599556922913, "learning_rate": 0.002, "loss": 2.5911, "step": 35100 }, { "epoch": 0.06994692719622593, "grad_norm": 0.17172197997570038, "learning_rate": 0.002, "loss": 2.5967, "step": 35110 }, { "epoch": 0.06996684941986485, "grad_norm": 0.17665739357471466, "learning_rate": 0.002, "loss": 2.5843, "step": 35120 }, { "epoch": 0.06998677164350377, "grad_norm": 0.2150290310382843, "learning_rate": 0.002, "loss": 2.5778, "step": 35130 }, { "epoch": 0.07000669386714267, "grad_norm": 0.18136049807071686, "learning_rate": 0.002, "loss": 2.5752, "step": 35140 }, { "epoch": 0.07002661609078159, "grad_norm": 0.1716262549161911, "learning_rate": 0.002, "loss": 2.5894, "step": 35150 }, { "epoch": 0.07004653831442051, "grad_norm": 0.15772059559822083, "learning_rate": 0.002, "loss": 2.5614, "step": 35160 }, { "epoch": 0.07006646053805941, "grad_norm": 0.17426714301109314, "learning_rate": 0.002, "loss": 2.5808, "step": 35170 }, { "epoch": 0.07008638276169833, "grad_norm": 0.16653402149677277, "learning_rate": 0.002, "loss": 2.5688, "step": 35180 }, { "epoch": 0.07010630498533724, "grad_norm": 0.156472310423851, "learning_rate": 0.002, "loss": 2.6004, "step": 35190 }, { "epoch": 0.07012622720897616, "grad_norm": 0.17549030482769012, "learning_rate": 0.002, "loss": 2.566, "step": 35200 }, { "epoch": 0.07014614943261507, "grad_norm": 0.16608349978923798, "learning_rate": 0.002, "loss": 2.5742, "step": 35210 }, { "epoch": 0.07016607165625398, "grad_norm": 0.16870643198490143, "learning_rate": 0.002, "loss": 2.5765, "step": 35220 }, { "epoch": 0.0701859938798929, "grad_norm": 0.15699796378612518, "learning_rate": 0.002, "loss": 2.5741, "step": 35230 }, { "epoch": 0.07020591610353182, "grad_norm": 0.18860995769500732, "learning_rate": 0.002, "loss": 2.5936, "step": 35240 }, { "epoch": 0.07022583832717072, "grad_norm": 0.1775842010974884, "learning_rate": 0.002, "loss": 2.587, "step": 35250 }, { "epoch": 0.07024576055080964, "grad_norm": 0.16093367338180542, "learning_rate": 0.002, "loss": 2.5894, "step": 35260 }, { "epoch": 0.07026568277444856, "grad_norm": 0.17308378219604492, "learning_rate": 0.002, "loss": 2.589, "step": 35270 }, { "epoch": 0.07028560499808746, "grad_norm": 0.17677631974220276, "learning_rate": 0.002, "loss": 2.6004, "step": 35280 }, { "epoch": 0.07030552722172638, "grad_norm": 0.17565123736858368, "learning_rate": 0.002, "loss": 2.5768, "step": 35290 }, { "epoch": 0.0703254494453653, "grad_norm": 0.18760748207569122, "learning_rate": 0.002, "loss": 2.586, "step": 35300 }, { "epoch": 0.0703453716690042, "grad_norm": 0.1719500571489334, "learning_rate": 0.002, "loss": 2.5847, "step": 35310 }, { "epoch": 0.07036529389264312, "grad_norm": 0.16701823472976685, "learning_rate": 0.002, "loss": 2.5749, "step": 35320 }, { "epoch": 0.07038521611628204, "grad_norm": 0.136610209941864, "learning_rate": 0.002, "loss": 2.5694, "step": 35330 }, { "epoch": 0.07040513833992094, "grad_norm": 0.2060316801071167, "learning_rate": 0.002, "loss": 2.5719, "step": 35340 }, { "epoch": 0.07042506056355986, "grad_norm": 0.18741853535175323, "learning_rate": 0.002, "loss": 2.5653, "step": 35350 }, { "epoch": 0.07044498278719878, "grad_norm": 0.1530361920595169, "learning_rate": 0.002, "loss": 2.5823, "step": 35360 }, { "epoch": 0.07046490501083769, "grad_norm": 0.17295511066913605, "learning_rate": 0.002, "loss": 2.5813, "step": 35370 }, { "epoch": 0.0704848272344766, "grad_norm": 0.1623687446117401, "learning_rate": 0.002, "loss": 2.5715, "step": 35380 }, { "epoch": 0.07050474945811552, "grad_norm": 0.18732237815856934, "learning_rate": 0.002, "loss": 2.5869, "step": 35390 }, { "epoch": 0.07052467168175443, "grad_norm": 0.1662697196006775, "learning_rate": 0.002, "loss": 2.5635, "step": 35400 }, { "epoch": 0.07054459390539335, "grad_norm": 0.17209163308143616, "learning_rate": 0.002, "loss": 2.5798, "step": 35410 }, { "epoch": 0.07056451612903226, "grad_norm": 0.16166052222251892, "learning_rate": 0.002, "loss": 2.5853, "step": 35420 }, { "epoch": 0.07058443835267117, "grad_norm": 0.1810404360294342, "learning_rate": 0.002, "loss": 2.6041, "step": 35430 }, { "epoch": 0.07060436057631009, "grad_norm": 0.16344930231571198, "learning_rate": 0.002, "loss": 2.5606, "step": 35440 }, { "epoch": 0.07062428279994899, "grad_norm": 0.19549968838691711, "learning_rate": 0.002, "loss": 2.5756, "step": 35450 }, { "epoch": 0.07064420502358791, "grad_norm": 0.18059806525707245, "learning_rate": 0.002, "loss": 2.6004, "step": 35460 }, { "epoch": 0.07066412724722683, "grad_norm": 0.19367356598377228, "learning_rate": 0.002, "loss": 2.5796, "step": 35470 }, { "epoch": 0.07068404947086573, "grad_norm": 0.1728130578994751, "learning_rate": 0.002, "loss": 2.5859, "step": 35480 }, { "epoch": 0.07070397169450465, "grad_norm": 0.19326092302799225, "learning_rate": 0.002, "loss": 2.5824, "step": 35490 }, { "epoch": 0.07072389391814357, "grad_norm": 0.15535713732242584, "learning_rate": 0.002, "loss": 2.5783, "step": 35500 }, { "epoch": 0.07074381614178248, "grad_norm": 0.18727034330368042, "learning_rate": 0.002, "loss": 2.5849, "step": 35510 }, { "epoch": 0.0707637383654214, "grad_norm": 0.1465715616941452, "learning_rate": 0.002, "loss": 2.5716, "step": 35520 }, { "epoch": 0.07078366058906031, "grad_norm": 0.1751878410577774, "learning_rate": 0.002, "loss": 2.5748, "step": 35530 }, { "epoch": 0.07080358281269922, "grad_norm": 0.19223281741142273, "learning_rate": 0.002, "loss": 2.5709, "step": 35540 }, { "epoch": 0.07082350503633814, "grad_norm": 0.1973821371793747, "learning_rate": 0.002, "loss": 2.5977, "step": 35550 }, { "epoch": 0.07084342725997705, "grad_norm": 0.14666511118412018, "learning_rate": 0.002, "loss": 2.5759, "step": 35560 }, { "epoch": 0.07086334948361596, "grad_norm": 0.1818213015794754, "learning_rate": 0.002, "loss": 2.5846, "step": 35570 }, { "epoch": 0.07088327170725488, "grad_norm": 0.1623172163963318, "learning_rate": 0.002, "loss": 2.5954, "step": 35580 }, { "epoch": 0.0709031939308938, "grad_norm": 0.18052707612514496, "learning_rate": 0.002, "loss": 2.5828, "step": 35590 }, { "epoch": 0.0709231161545327, "grad_norm": 0.17479045689105988, "learning_rate": 0.002, "loss": 2.5755, "step": 35600 }, { "epoch": 0.07094303837817162, "grad_norm": 0.15458999574184418, "learning_rate": 0.002, "loss": 2.5895, "step": 35610 }, { "epoch": 0.07096296060181054, "grad_norm": 0.14900709688663483, "learning_rate": 0.002, "loss": 2.5834, "step": 35620 }, { "epoch": 0.07098288282544944, "grad_norm": 0.153630331158638, "learning_rate": 0.002, "loss": 2.5799, "step": 35630 }, { "epoch": 0.07100280504908836, "grad_norm": 0.1736198514699936, "learning_rate": 0.002, "loss": 2.58, "step": 35640 }, { "epoch": 0.07102272727272728, "grad_norm": 0.14763912558555603, "learning_rate": 0.002, "loss": 2.5962, "step": 35650 }, { "epoch": 0.07104264949636618, "grad_norm": 0.19546179473400116, "learning_rate": 0.002, "loss": 2.5877, "step": 35660 }, { "epoch": 0.0710625717200051, "grad_norm": 0.16904348134994507, "learning_rate": 0.002, "loss": 2.5714, "step": 35670 }, { "epoch": 0.07108249394364402, "grad_norm": 0.18735574185848236, "learning_rate": 0.002, "loss": 2.5852, "step": 35680 }, { "epoch": 0.07110241616728293, "grad_norm": 0.1671905666589737, "learning_rate": 0.002, "loss": 2.5734, "step": 35690 }, { "epoch": 0.07112233839092184, "grad_norm": 0.22225871682167053, "learning_rate": 0.002, "loss": 2.585, "step": 35700 }, { "epoch": 0.07114226061456075, "grad_norm": 0.15216884016990662, "learning_rate": 0.002, "loss": 2.5833, "step": 35710 }, { "epoch": 0.07116218283819967, "grad_norm": 0.19123421609401703, "learning_rate": 0.002, "loss": 2.5735, "step": 35720 }, { "epoch": 0.07118210506183859, "grad_norm": 0.17960253357887268, "learning_rate": 0.002, "loss": 2.5689, "step": 35730 }, { "epoch": 0.07120202728547749, "grad_norm": 0.17321138083934784, "learning_rate": 0.002, "loss": 2.5866, "step": 35740 }, { "epoch": 0.07122194950911641, "grad_norm": 0.15618105232715607, "learning_rate": 0.002, "loss": 2.5841, "step": 35750 }, { "epoch": 0.07124187173275533, "grad_norm": 0.1391860991716385, "learning_rate": 0.002, "loss": 2.5816, "step": 35760 }, { "epoch": 0.07126179395639423, "grad_norm": 0.2295987904071808, "learning_rate": 0.002, "loss": 2.5872, "step": 35770 }, { "epoch": 0.07128171618003315, "grad_norm": 0.1627628356218338, "learning_rate": 0.002, "loss": 2.5858, "step": 35780 }, { "epoch": 0.07130163840367207, "grad_norm": 0.1587877869606018, "learning_rate": 0.002, "loss": 2.5811, "step": 35790 }, { "epoch": 0.07132156062731097, "grad_norm": 0.20674753189086914, "learning_rate": 0.002, "loss": 2.5752, "step": 35800 }, { "epoch": 0.07134148285094989, "grad_norm": 0.16452664136886597, "learning_rate": 0.002, "loss": 2.5866, "step": 35810 }, { "epoch": 0.07136140507458881, "grad_norm": 0.18386614322662354, "learning_rate": 0.002, "loss": 2.5734, "step": 35820 }, { "epoch": 0.07138132729822771, "grad_norm": 0.14576296508312225, "learning_rate": 0.002, "loss": 2.5808, "step": 35830 }, { "epoch": 0.07140124952186663, "grad_norm": 0.1772330105304718, "learning_rate": 0.002, "loss": 2.5907, "step": 35840 }, { "epoch": 0.07142117174550555, "grad_norm": 0.16256560385227203, "learning_rate": 0.002, "loss": 2.5764, "step": 35850 }, { "epoch": 0.07144109396914446, "grad_norm": 0.16280139982700348, "learning_rate": 0.002, "loss": 2.5839, "step": 35860 }, { "epoch": 0.07146101619278337, "grad_norm": 0.16148172318935394, "learning_rate": 0.002, "loss": 2.5615, "step": 35870 }, { "epoch": 0.0714809384164223, "grad_norm": 0.17729461193084717, "learning_rate": 0.002, "loss": 2.5774, "step": 35880 }, { "epoch": 0.0715008606400612, "grad_norm": 0.20294737815856934, "learning_rate": 0.002, "loss": 2.5861, "step": 35890 }, { "epoch": 0.07152078286370012, "grad_norm": 0.17291878163814545, "learning_rate": 0.002, "loss": 2.5768, "step": 35900 }, { "epoch": 0.07154070508733903, "grad_norm": 0.1679038107395172, "learning_rate": 0.002, "loss": 2.5808, "step": 35910 }, { "epoch": 0.07156062731097794, "grad_norm": 0.18564660847187042, "learning_rate": 0.002, "loss": 2.5816, "step": 35920 }, { "epoch": 0.07158054953461686, "grad_norm": 0.16885115206241608, "learning_rate": 0.002, "loss": 2.5932, "step": 35930 }, { "epoch": 0.07160047175825576, "grad_norm": 0.1891031116247177, "learning_rate": 0.002, "loss": 2.5708, "step": 35940 }, { "epoch": 0.07162039398189468, "grad_norm": 0.15816333889961243, "learning_rate": 0.002, "loss": 2.5866, "step": 35950 }, { "epoch": 0.0716403162055336, "grad_norm": 0.18801487982273102, "learning_rate": 0.002, "loss": 2.5778, "step": 35960 }, { "epoch": 0.0716602384291725, "grad_norm": 0.1518321931362152, "learning_rate": 0.002, "loss": 2.5942, "step": 35970 }, { "epoch": 0.07168016065281142, "grad_norm": 0.16533932089805603, "learning_rate": 0.002, "loss": 2.5847, "step": 35980 }, { "epoch": 0.07170008287645034, "grad_norm": 0.2237655520439148, "learning_rate": 0.002, "loss": 2.5856, "step": 35990 }, { "epoch": 0.07172000510008925, "grad_norm": 0.19079531729221344, "learning_rate": 0.002, "loss": 2.5696, "step": 36000 }, { "epoch": 0.07173992732372816, "grad_norm": 0.16666531562805176, "learning_rate": 0.002, "loss": 2.5787, "step": 36010 }, { "epoch": 0.07175984954736708, "grad_norm": 0.17648756504058838, "learning_rate": 0.002, "loss": 2.5883, "step": 36020 }, { "epoch": 0.07177977177100599, "grad_norm": 0.1917579174041748, "learning_rate": 0.002, "loss": 2.5777, "step": 36030 }, { "epoch": 0.0717996939946449, "grad_norm": 0.1475636065006256, "learning_rate": 0.002, "loss": 2.5766, "step": 36040 }, { "epoch": 0.07181961621828382, "grad_norm": 0.18173953890800476, "learning_rate": 0.002, "loss": 2.5787, "step": 36050 }, { "epoch": 0.07183953844192273, "grad_norm": 0.16727109253406525, "learning_rate": 0.002, "loss": 2.5834, "step": 36060 }, { "epoch": 0.07185946066556165, "grad_norm": 0.18074186146259308, "learning_rate": 0.002, "loss": 2.5889, "step": 36070 }, { "epoch": 0.07187938288920057, "grad_norm": 0.1826927363872528, "learning_rate": 0.002, "loss": 2.5786, "step": 36080 }, { "epoch": 0.07189930511283947, "grad_norm": 0.1565757840871811, "learning_rate": 0.002, "loss": 2.5815, "step": 36090 }, { "epoch": 0.07191922733647839, "grad_norm": 0.1671961545944214, "learning_rate": 0.002, "loss": 2.5741, "step": 36100 }, { "epoch": 0.07193914956011731, "grad_norm": 0.15276861190795898, "learning_rate": 0.002, "loss": 2.5793, "step": 36110 }, { "epoch": 0.07195907178375621, "grad_norm": 0.1988084763288498, "learning_rate": 0.002, "loss": 2.5852, "step": 36120 }, { "epoch": 0.07197899400739513, "grad_norm": 0.1566968858242035, "learning_rate": 0.002, "loss": 2.5758, "step": 36130 }, { "epoch": 0.07199891623103405, "grad_norm": 0.20313100516796112, "learning_rate": 0.002, "loss": 2.5872, "step": 36140 }, { "epoch": 0.07201883845467295, "grad_norm": 0.1696740835905075, "learning_rate": 0.002, "loss": 2.592, "step": 36150 }, { "epoch": 0.07203876067831187, "grad_norm": 0.15593214333057404, "learning_rate": 0.002, "loss": 2.5874, "step": 36160 }, { "epoch": 0.07205868290195079, "grad_norm": 0.15695308148860931, "learning_rate": 0.002, "loss": 2.5855, "step": 36170 }, { "epoch": 0.0720786051255897, "grad_norm": 0.14664363861083984, "learning_rate": 0.002, "loss": 2.5891, "step": 36180 }, { "epoch": 0.07209852734922861, "grad_norm": 0.20562683045864105, "learning_rate": 0.002, "loss": 2.5839, "step": 36190 }, { "epoch": 0.07211844957286752, "grad_norm": 0.17213094234466553, "learning_rate": 0.002, "loss": 2.5857, "step": 36200 }, { "epoch": 0.07213837179650644, "grad_norm": 0.19952014088630676, "learning_rate": 0.002, "loss": 2.5756, "step": 36210 }, { "epoch": 0.07215829402014536, "grad_norm": 0.1605312079191208, "learning_rate": 0.002, "loss": 2.5633, "step": 36220 }, { "epoch": 0.07217821624378426, "grad_norm": 0.18237027525901794, "learning_rate": 0.002, "loss": 2.5801, "step": 36230 }, { "epoch": 0.07219813846742318, "grad_norm": 0.15107028186321259, "learning_rate": 0.002, "loss": 2.5718, "step": 36240 }, { "epoch": 0.0722180606910621, "grad_norm": 0.21099914610385895, "learning_rate": 0.002, "loss": 2.5862, "step": 36250 }, { "epoch": 0.072237982914701, "grad_norm": 0.18398374319076538, "learning_rate": 0.002, "loss": 2.5893, "step": 36260 }, { "epoch": 0.07225790513833992, "grad_norm": 0.20160166919231415, "learning_rate": 0.002, "loss": 2.5796, "step": 36270 }, { "epoch": 0.07227782736197884, "grad_norm": 0.17383939027786255, "learning_rate": 0.002, "loss": 2.5655, "step": 36280 }, { "epoch": 0.07229774958561774, "grad_norm": 0.16974622011184692, "learning_rate": 0.002, "loss": 2.5754, "step": 36290 }, { "epoch": 0.07231767180925666, "grad_norm": 0.1610172837972641, "learning_rate": 0.002, "loss": 2.587, "step": 36300 }, { "epoch": 0.07233759403289558, "grad_norm": 0.18287216126918793, "learning_rate": 0.002, "loss": 2.5963, "step": 36310 }, { "epoch": 0.07235751625653448, "grad_norm": 0.15679514408111572, "learning_rate": 0.002, "loss": 2.5762, "step": 36320 }, { "epoch": 0.0723774384801734, "grad_norm": 0.18395297229290009, "learning_rate": 0.002, "loss": 2.5738, "step": 36330 }, { "epoch": 0.07239736070381232, "grad_norm": 0.16547907888889313, "learning_rate": 0.002, "loss": 2.5755, "step": 36340 }, { "epoch": 0.07241728292745123, "grad_norm": 0.157828688621521, "learning_rate": 0.002, "loss": 2.5833, "step": 36350 }, { "epoch": 0.07243720515109014, "grad_norm": 0.1985209882259369, "learning_rate": 0.002, "loss": 2.5797, "step": 36360 }, { "epoch": 0.07245712737472906, "grad_norm": 0.15683217346668243, "learning_rate": 0.002, "loss": 2.589, "step": 36370 }, { "epoch": 0.07247704959836797, "grad_norm": 0.1844377964735031, "learning_rate": 0.002, "loss": 2.5771, "step": 36380 }, { "epoch": 0.07249697182200689, "grad_norm": 0.1925509124994278, "learning_rate": 0.002, "loss": 2.5749, "step": 36390 }, { "epoch": 0.0725168940456458, "grad_norm": 0.16809630393981934, "learning_rate": 0.002, "loss": 2.5843, "step": 36400 }, { "epoch": 0.07253681626928471, "grad_norm": 0.19899877905845642, "learning_rate": 0.002, "loss": 2.5756, "step": 36410 }, { "epoch": 0.07255673849292363, "grad_norm": 0.19830766320228577, "learning_rate": 0.002, "loss": 2.5916, "step": 36420 }, { "epoch": 0.07257666071656255, "grad_norm": 0.18036597967147827, "learning_rate": 0.002, "loss": 2.5828, "step": 36430 }, { "epoch": 0.07259658294020145, "grad_norm": 0.1798490285873413, "learning_rate": 0.002, "loss": 2.5718, "step": 36440 }, { "epoch": 0.07261650516384037, "grad_norm": 0.19120478630065918, "learning_rate": 0.002, "loss": 2.5814, "step": 36450 }, { "epoch": 0.07263642738747927, "grad_norm": 0.18407391011714935, "learning_rate": 0.002, "loss": 2.5899, "step": 36460 }, { "epoch": 0.07265634961111819, "grad_norm": 0.15946157276630402, "learning_rate": 0.002, "loss": 2.5692, "step": 36470 }, { "epoch": 0.07267627183475711, "grad_norm": 0.20291560888290405, "learning_rate": 0.002, "loss": 2.5901, "step": 36480 }, { "epoch": 0.07269619405839602, "grad_norm": 0.19764931499958038, "learning_rate": 0.002, "loss": 2.587, "step": 36490 }, { "epoch": 0.07271611628203493, "grad_norm": 0.17038671672344208, "learning_rate": 0.002, "loss": 2.5745, "step": 36500 }, { "epoch": 0.07273603850567385, "grad_norm": 0.21232368052005768, "learning_rate": 0.002, "loss": 2.5836, "step": 36510 }, { "epoch": 0.07275596072931276, "grad_norm": 0.1483970433473587, "learning_rate": 0.002, "loss": 2.5858, "step": 36520 }, { "epoch": 0.07277588295295168, "grad_norm": 0.1858948916196823, "learning_rate": 0.002, "loss": 2.574, "step": 36530 }, { "epoch": 0.0727958051765906, "grad_norm": 0.15826815366744995, "learning_rate": 0.002, "loss": 2.5755, "step": 36540 }, { "epoch": 0.0728157274002295, "grad_norm": 0.1519828885793686, "learning_rate": 0.002, "loss": 2.5664, "step": 36550 }, { "epoch": 0.07283564962386842, "grad_norm": 0.13864430785179138, "learning_rate": 0.002, "loss": 2.5637, "step": 36560 }, { "epoch": 0.07285557184750734, "grad_norm": 0.1994246393442154, "learning_rate": 0.002, "loss": 2.5622, "step": 36570 }, { "epoch": 0.07287549407114624, "grad_norm": 0.17694614827632904, "learning_rate": 0.002, "loss": 2.5892, "step": 36580 }, { "epoch": 0.07289541629478516, "grad_norm": 0.1643671989440918, "learning_rate": 0.002, "loss": 2.5711, "step": 36590 }, { "epoch": 0.07291533851842408, "grad_norm": 0.2150661200284958, "learning_rate": 0.002, "loss": 2.5833, "step": 36600 }, { "epoch": 0.07293526074206298, "grad_norm": 0.17827261984348297, "learning_rate": 0.002, "loss": 2.5752, "step": 36610 }, { "epoch": 0.0729551829657019, "grad_norm": 0.18329960107803345, "learning_rate": 0.002, "loss": 2.5873, "step": 36620 }, { "epoch": 0.07297510518934082, "grad_norm": 0.1759112924337387, "learning_rate": 0.002, "loss": 2.5741, "step": 36630 }, { "epoch": 0.07299502741297972, "grad_norm": 0.21060100197792053, "learning_rate": 0.002, "loss": 2.5782, "step": 36640 }, { "epoch": 0.07301494963661864, "grad_norm": 0.13656000792980194, "learning_rate": 0.002, "loss": 2.5865, "step": 36650 }, { "epoch": 0.07303487186025756, "grad_norm": 0.19597384333610535, "learning_rate": 0.002, "loss": 2.5756, "step": 36660 }, { "epoch": 0.07305479408389647, "grad_norm": 0.15157732367515564, "learning_rate": 0.002, "loss": 2.5683, "step": 36670 }, { "epoch": 0.07307471630753538, "grad_norm": 0.1847669929265976, "learning_rate": 0.002, "loss": 2.5785, "step": 36680 }, { "epoch": 0.0730946385311743, "grad_norm": 0.20097015798091888, "learning_rate": 0.002, "loss": 2.5952, "step": 36690 }, { "epoch": 0.07311456075481321, "grad_norm": 0.16764600574970245, "learning_rate": 0.002, "loss": 2.5887, "step": 36700 }, { "epoch": 0.07313448297845213, "grad_norm": 0.1600983738899231, "learning_rate": 0.002, "loss": 2.6007, "step": 36710 }, { "epoch": 0.07315440520209103, "grad_norm": 0.18347547948360443, "learning_rate": 0.002, "loss": 2.585, "step": 36720 }, { "epoch": 0.07317432742572995, "grad_norm": 0.18437893688678741, "learning_rate": 0.002, "loss": 2.5746, "step": 36730 }, { "epoch": 0.07319424964936887, "grad_norm": 0.17997461557388306, "learning_rate": 0.002, "loss": 2.5841, "step": 36740 }, { "epoch": 0.07321417187300777, "grad_norm": 0.1686040759086609, "learning_rate": 0.002, "loss": 2.5749, "step": 36750 }, { "epoch": 0.07323409409664669, "grad_norm": 0.22857582569122314, "learning_rate": 0.002, "loss": 2.5807, "step": 36760 }, { "epoch": 0.07325401632028561, "grad_norm": 0.1579320728778839, "learning_rate": 0.002, "loss": 2.5826, "step": 36770 }, { "epoch": 0.07327393854392451, "grad_norm": 0.1726491004228592, "learning_rate": 0.002, "loss": 2.5822, "step": 36780 }, { "epoch": 0.07329386076756343, "grad_norm": 0.14982615411281586, "learning_rate": 0.002, "loss": 2.5815, "step": 36790 }, { "epoch": 0.07331378299120235, "grad_norm": 0.16253963112831116, "learning_rate": 0.002, "loss": 2.5796, "step": 36800 }, { "epoch": 0.07333370521484125, "grad_norm": 0.19601906836032867, "learning_rate": 0.002, "loss": 2.5834, "step": 36810 }, { "epoch": 0.07335362743848017, "grad_norm": 0.16802676022052765, "learning_rate": 0.002, "loss": 2.5891, "step": 36820 }, { "epoch": 0.07337354966211909, "grad_norm": 0.1671302318572998, "learning_rate": 0.002, "loss": 2.5839, "step": 36830 }, { "epoch": 0.073393471885758, "grad_norm": 0.20665761828422546, "learning_rate": 0.002, "loss": 2.5908, "step": 36840 }, { "epoch": 0.07341339410939692, "grad_norm": 0.1502489447593689, "learning_rate": 0.002, "loss": 2.5624, "step": 36850 }, { "epoch": 0.07343331633303583, "grad_norm": 0.16449008882045746, "learning_rate": 0.002, "loss": 2.5907, "step": 36860 }, { "epoch": 0.07345323855667474, "grad_norm": 0.17935562133789062, "learning_rate": 0.002, "loss": 2.584, "step": 36870 }, { "epoch": 0.07347316078031366, "grad_norm": 0.1587989777326584, "learning_rate": 0.002, "loss": 2.5808, "step": 36880 }, { "epoch": 0.07349308300395258, "grad_norm": 0.17849493026733398, "learning_rate": 0.002, "loss": 2.5891, "step": 36890 }, { "epoch": 0.07351300522759148, "grad_norm": 0.19769985973834991, "learning_rate": 0.002, "loss": 2.5791, "step": 36900 }, { "epoch": 0.0735329274512304, "grad_norm": 0.1911238729953766, "learning_rate": 0.002, "loss": 2.5719, "step": 36910 }, { "epoch": 0.07355284967486932, "grad_norm": 0.16011172533035278, "learning_rate": 0.002, "loss": 2.5812, "step": 36920 }, { "epoch": 0.07357277189850822, "grad_norm": 0.19324921071529388, "learning_rate": 0.002, "loss": 2.5822, "step": 36930 }, { "epoch": 0.07359269412214714, "grad_norm": 0.14756421744823456, "learning_rate": 0.002, "loss": 2.5776, "step": 36940 }, { "epoch": 0.07361261634578604, "grad_norm": 0.16636395454406738, "learning_rate": 0.002, "loss": 2.5862, "step": 36950 }, { "epoch": 0.07363253856942496, "grad_norm": 0.16467240452766418, "learning_rate": 0.002, "loss": 2.5694, "step": 36960 }, { "epoch": 0.07365246079306388, "grad_norm": 0.19280433654785156, "learning_rate": 0.002, "loss": 2.5801, "step": 36970 }, { "epoch": 0.07367238301670279, "grad_norm": 0.14997555315494537, "learning_rate": 0.002, "loss": 2.5882, "step": 36980 }, { "epoch": 0.0736923052403417, "grad_norm": 0.20325058698654175, "learning_rate": 0.002, "loss": 2.5831, "step": 36990 }, { "epoch": 0.07371222746398062, "grad_norm": 0.1558198183774948, "learning_rate": 0.002, "loss": 2.5696, "step": 37000 }, { "epoch": 0.07373214968761953, "grad_norm": 0.17686189711093903, "learning_rate": 0.002, "loss": 2.5841, "step": 37010 }, { "epoch": 0.07375207191125845, "grad_norm": 0.15315356850624084, "learning_rate": 0.002, "loss": 2.5852, "step": 37020 }, { "epoch": 0.07377199413489736, "grad_norm": 0.2128005027770996, "learning_rate": 0.002, "loss": 2.5886, "step": 37030 }, { "epoch": 0.07379191635853627, "grad_norm": 0.1852640062570572, "learning_rate": 0.002, "loss": 2.5836, "step": 37040 }, { "epoch": 0.07381183858217519, "grad_norm": 0.16898514330387115, "learning_rate": 0.002, "loss": 2.5723, "step": 37050 }, { "epoch": 0.0738317608058141, "grad_norm": 0.17078182101249695, "learning_rate": 0.002, "loss": 2.5744, "step": 37060 }, { "epoch": 0.07385168302945301, "grad_norm": 0.2165805697441101, "learning_rate": 0.002, "loss": 2.5707, "step": 37070 }, { "epoch": 0.07387160525309193, "grad_norm": 0.15659019351005554, "learning_rate": 0.002, "loss": 2.5879, "step": 37080 }, { "epoch": 0.07389152747673085, "grad_norm": 0.1435033679008484, "learning_rate": 0.002, "loss": 2.5705, "step": 37090 }, { "epoch": 0.07391144970036975, "grad_norm": 0.18922929465770721, "learning_rate": 0.002, "loss": 2.5751, "step": 37100 }, { "epoch": 0.07393137192400867, "grad_norm": 0.17290206253528595, "learning_rate": 0.002, "loss": 2.5822, "step": 37110 }, { "epoch": 0.07395129414764759, "grad_norm": 0.1561581939458847, "learning_rate": 0.002, "loss": 2.585, "step": 37120 }, { "epoch": 0.0739712163712865, "grad_norm": 0.18192562460899353, "learning_rate": 0.002, "loss": 2.5934, "step": 37130 }, { "epoch": 0.07399113859492541, "grad_norm": 0.15770995616912842, "learning_rate": 0.002, "loss": 2.5686, "step": 37140 }, { "epoch": 0.07401106081856433, "grad_norm": 0.18027466535568237, "learning_rate": 0.002, "loss": 2.5642, "step": 37150 }, { "epoch": 0.07403098304220324, "grad_norm": 0.21496133506298065, "learning_rate": 0.002, "loss": 2.5825, "step": 37160 }, { "epoch": 0.07405090526584215, "grad_norm": 0.17681460082530975, "learning_rate": 0.002, "loss": 2.5741, "step": 37170 }, { "epoch": 0.07407082748948107, "grad_norm": 0.18730758130550385, "learning_rate": 0.002, "loss": 2.5693, "step": 37180 }, { "epoch": 0.07409074971311998, "grad_norm": 0.17125722765922546, "learning_rate": 0.002, "loss": 2.5931, "step": 37190 }, { "epoch": 0.0741106719367589, "grad_norm": 0.16088177263736725, "learning_rate": 0.002, "loss": 2.5809, "step": 37200 }, { "epoch": 0.0741305941603978, "grad_norm": 0.18272808194160461, "learning_rate": 0.002, "loss": 2.5894, "step": 37210 }, { "epoch": 0.07415051638403672, "grad_norm": 0.171370267868042, "learning_rate": 0.002, "loss": 2.5712, "step": 37220 }, { "epoch": 0.07417043860767564, "grad_norm": 0.1694844365119934, "learning_rate": 0.002, "loss": 2.5582, "step": 37230 }, { "epoch": 0.07419036083131454, "grad_norm": 0.2245549112558365, "learning_rate": 0.002, "loss": 2.5686, "step": 37240 }, { "epoch": 0.07421028305495346, "grad_norm": 0.1622442901134491, "learning_rate": 0.002, "loss": 2.5881, "step": 37250 }, { "epoch": 0.07423020527859238, "grad_norm": 0.16361109912395477, "learning_rate": 0.002, "loss": 2.5789, "step": 37260 }, { "epoch": 0.07425012750223128, "grad_norm": 0.16934069991111755, "learning_rate": 0.002, "loss": 2.5846, "step": 37270 }, { "epoch": 0.0742700497258702, "grad_norm": 0.1513328105211258, "learning_rate": 0.002, "loss": 2.5883, "step": 37280 }, { "epoch": 0.07428997194950912, "grad_norm": 0.15129141509532928, "learning_rate": 0.002, "loss": 2.5738, "step": 37290 }, { "epoch": 0.07430989417314803, "grad_norm": 0.19261710345745087, "learning_rate": 0.002, "loss": 2.5907, "step": 37300 }, { "epoch": 0.07432981639678694, "grad_norm": 0.1702430099248886, "learning_rate": 0.002, "loss": 2.5657, "step": 37310 }, { "epoch": 0.07434973862042586, "grad_norm": 0.18088151514530182, "learning_rate": 0.002, "loss": 2.5795, "step": 37320 }, { "epoch": 0.07436966084406477, "grad_norm": 0.2155427187681198, "learning_rate": 0.002, "loss": 2.5697, "step": 37330 }, { "epoch": 0.07438958306770369, "grad_norm": 0.15308886766433716, "learning_rate": 0.002, "loss": 2.5718, "step": 37340 }, { "epoch": 0.0744095052913426, "grad_norm": 0.23394057154655457, "learning_rate": 0.002, "loss": 2.5912, "step": 37350 }, { "epoch": 0.07442942751498151, "grad_norm": 0.187154158949852, "learning_rate": 0.002, "loss": 2.5708, "step": 37360 }, { "epoch": 0.07444934973862043, "grad_norm": 0.1607036590576172, "learning_rate": 0.002, "loss": 2.5739, "step": 37370 }, { "epoch": 0.07446927196225935, "grad_norm": 0.18038181960582733, "learning_rate": 0.002, "loss": 2.5795, "step": 37380 }, { "epoch": 0.07448919418589825, "grad_norm": 0.14793461561203003, "learning_rate": 0.002, "loss": 2.5824, "step": 37390 }, { "epoch": 0.07450911640953717, "grad_norm": 0.20466017723083496, "learning_rate": 0.002, "loss": 2.584, "step": 37400 }, { "epoch": 0.07452903863317609, "grad_norm": 0.19190002977848053, "learning_rate": 0.002, "loss": 2.584, "step": 37410 }, { "epoch": 0.07454896085681499, "grad_norm": 0.1489516943693161, "learning_rate": 0.002, "loss": 2.5785, "step": 37420 }, { "epoch": 0.07456888308045391, "grad_norm": 0.13486860692501068, "learning_rate": 0.002, "loss": 2.5908, "step": 37430 }, { "epoch": 0.07458880530409283, "grad_norm": 0.1545078456401825, "learning_rate": 0.002, "loss": 2.5749, "step": 37440 }, { "epoch": 0.07460872752773173, "grad_norm": 0.20059834420681, "learning_rate": 0.002, "loss": 2.5905, "step": 37450 }, { "epoch": 0.07462864975137065, "grad_norm": 0.15899790823459625, "learning_rate": 0.002, "loss": 2.584, "step": 37460 }, { "epoch": 0.07464857197500956, "grad_norm": 0.16951759159564972, "learning_rate": 0.002, "loss": 2.5891, "step": 37470 }, { "epoch": 0.07466849419864847, "grad_norm": 0.1561262607574463, "learning_rate": 0.002, "loss": 2.5802, "step": 37480 }, { "epoch": 0.0746884164222874, "grad_norm": 0.20032520592212677, "learning_rate": 0.002, "loss": 2.5877, "step": 37490 }, { "epoch": 0.0747083386459263, "grad_norm": 0.18809236586093903, "learning_rate": 0.002, "loss": 2.5762, "step": 37500 }, { "epoch": 0.07472826086956522, "grad_norm": 0.18265746533870697, "learning_rate": 0.002, "loss": 2.5831, "step": 37510 }, { "epoch": 0.07474818309320413, "grad_norm": 0.2128915935754776, "learning_rate": 0.002, "loss": 2.5937, "step": 37520 }, { "epoch": 0.07476810531684304, "grad_norm": 0.18596500158309937, "learning_rate": 0.002, "loss": 2.5859, "step": 37530 }, { "epoch": 0.07478802754048196, "grad_norm": 0.17784535884857178, "learning_rate": 0.002, "loss": 2.5936, "step": 37540 }, { "epoch": 0.07480794976412088, "grad_norm": 0.15859712660312653, "learning_rate": 0.002, "loss": 2.5672, "step": 37550 }, { "epoch": 0.07482787198775978, "grad_norm": 0.19881458580493927, "learning_rate": 0.002, "loss": 2.5898, "step": 37560 }, { "epoch": 0.0748477942113987, "grad_norm": 0.16521933674812317, "learning_rate": 0.002, "loss": 2.5947, "step": 37570 }, { "epoch": 0.07486771643503762, "grad_norm": 0.16976164281368256, "learning_rate": 0.002, "loss": 2.5711, "step": 37580 }, { "epoch": 0.07488763865867652, "grad_norm": 0.17106366157531738, "learning_rate": 0.002, "loss": 2.5795, "step": 37590 }, { "epoch": 0.07490756088231544, "grad_norm": 0.1728004664182663, "learning_rate": 0.002, "loss": 2.5881, "step": 37600 }, { "epoch": 0.07492748310595436, "grad_norm": 0.19443494081497192, "learning_rate": 0.002, "loss": 2.5741, "step": 37610 }, { "epoch": 0.07494740532959326, "grad_norm": 0.1669616848230362, "learning_rate": 0.002, "loss": 2.5738, "step": 37620 }, { "epoch": 0.07496732755323218, "grad_norm": 0.19538410007953644, "learning_rate": 0.002, "loss": 2.5875, "step": 37630 }, { "epoch": 0.0749872497768711, "grad_norm": 0.17546606063842773, "learning_rate": 0.002, "loss": 2.5691, "step": 37640 }, { "epoch": 0.07500717200051, "grad_norm": 0.19805335998535156, "learning_rate": 0.002, "loss": 2.5866, "step": 37650 }, { "epoch": 0.07502709422414892, "grad_norm": 0.16562317311763763, "learning_rate": 0.002, "loss": 2.5815, "step": 37660 }, { "epoch": 0.07504701644778784, "grad_norm": 0.15963327884674072, "learning_rate": 0.002, "loss": 2.5693, "step": 37670 }, { "epoch": 0.07506693867142675, "grad_norm": 0.16231350600719452, "learning_rate": 0.002, "loss": 2.5764, "step": 37680 }, { "epoch": 0.07508686089506567, "grad_norm": 0.15755070745944977, "learning_rate": 0.002, "loss": 2.5806, "step": 37690 }, { "epoch": 0.07510678311870458, "grad_norm": 0.1692740023136139, "learning_rate": 0.002, "loss": 2.5825, "step": 37700 }, { "epoch": 0.07512670534234349, "grad_norm": 0.20502746105194092, "learning_rate": 0.002, "loss": 2.5927, "step": 37710 }, { "epoch": 0.07514662756598241, "grad_norm": 0.16655778884887695, "learning_rate": 0.002, "loss": 2.5903, "step": 37720 }, { "epoch": 0.07516654978962131, "grad_norm": 0.17740608751773834, "learning_rate": 0.002, "loss": 2.5931, "step": 37730 }, { "epoch": 0.07518647201326023, "grad_norm": 0.19132472574710846, "learning_rate": 0.002, "loss": 2.5724, "step": 37740 }, { "epoch": 0.07520639423689915, "grad_norm": 0.17167812585830688, "learning_rate": 0.002, "loss": 2.5762, "step": 37750 }, { "epoch": 0.07522631646053805, "grad_norm": 0.19134943187236786, "learning_rate": 0.002, "loss": 2.5804, "step": 37760 }, { "epoch": 0.07524623868417697, "grad_norm": 0.16991755366325378, "learning_rate": 0.002, "loss": 2.5804, "step": 37770 }, { "epoch": 0.07526616090781589, "grad_norm": 0.18383383750915527, "learning_rate": 0.002, "loss": 2.5703, "step": 37780 }, { "epoch": 0.0752860831314548, "grad_norm": 0.1585446000099182, "learning_rate": 0.002, "loss": 2.5786, "step": 37790 }, { "epoch": 0.07530600535509371, "grad_norm": 0.19788554310798645, "learning_rate": 0.002, "loss": 2.5787, "step": 37800 }, { "epoch": 0.07532592757873263, "grad_norm": 0.15945369005203247, "learning_rate": 0.002, "loss": 2.5823, "step": 37810 }, { "epoch": 0.07534584980237154, "grad_norm": 0.1627752035856247, "learning_rate": 0.002, "loss": 2.5691, "step": 37820 }, { "epoch": 0.07536577202601046, "grad_norm": 0.15531368553638458, "learning_rate": 0.002, "loss": 2.5802, "step": 37830 }, { "epoch": 0.07538569424964937, "grad_norm": 0.16470861434936523, "learning_rate": 0.002, "loss": 2.5782, "step": 37840 }, { "epoch": 0.07540561647328828, "grad_norm": 0.1439896672964096, "learning_rate": 0.002, "loss": 2.5759, "step": 37850 }, { "epoch": 0.0754255386969272, "grad_norm": 0.1863107532262802, "learning_rate": 0.002, "loss": 2.5698, "step": 37860 }, { "epoch": 0.07544546092056612, "grad_norm": 0.1576426923274994, "learning_rate": 0.002, "loss": 2.5794, "step": 37870 }, { "epoch": 0.07546538314420502, "grad_norm": 0.24209751188755035, "learning_rate": 0.002, "loss": 2.5865, "step": 37880 }, { "epoch": 0.07548530536784394, "grad_norm": 0.14878486096858978, "learning_rate": 0.002, "loss": 2.5909, "step": 37890 }, { "epoch": 0.07550522759148286, "grad_norm": 0.14707225561141968, "learning_rate": 0.002, "loss": 2.5802, "step": 37900 }, { "epoch": 0.07552514981512176, "grad_norm": 0.22179578244686127, "learning_rate": 0.002, "loss": 2.5787, "step": 37910 }, { "epoch": 0.07554507203876068, "grad_norm": 0.17580446600914001, "learning_rate": 0.002, "loss": 2.5788, "step": 37920 }, { "epoch": 0.0755649942623996, "grad_norm": 0.1744176149368286, "learning_rate": 0.002, "loss": 2.582, "step": 37930 }, { "epoch": 0.0755849164860385, "grad_norm": 0.17823350429534912, "learning_rate": 0.002, "loss": 2.5913, "step": 37940 }, { "epoch": 0.07560483870967742, "grad_norm": 0.1455046832561493, "learning_rate": 0.002, "loss": 2.5825, "step": 37950 }, { "epoch": 0.07562476093331633, "grad_norm": 0.17945502698421478, "learning_rate": 0.002, "loss": 2.584, "step": 37960 }, { "epoch": 0.07564468315695524, "grad_norm": 0.1513303816318512, "learning_rate": 0.002, "loss": 2.5774, "step": 37970 }, { "epoch": 0.07566460538059416, "grad_norm": 0.20159313082695007, "learning_rate": 0.002, "loss": 2.582, "step": 37980 }, { "epoch": 0.07568452760423307, "grad_norm": 0.18568341434001923, "learning_rate": 0.002, "loss": 2.5709, "step": 37990 }, { "epoch": 0.07570444982787199, "grad_norm": 0.15360501408576965, "learning_rate": 0.002, "loss": 2.5689, "step": 38000 }, { "epoch": 0.0757243720515109, "grad_norm": 0.17220436036586761, "learning_rate": 0.002, "loss": 2.5772, "step": 38010 }, { "epoch": 0.07574429427514981, "grad_norm": 0.18719829618930817, "learning_rate": 0.002, "loss": 2.5807, "step": 38020 }, { "epoch": 0.07576421649878873, "grad_norm": 0.1695055365562439, "learning_rate": 0.002, "loss": 2.5722, "step": 38030 }, { "epoch": 0.07578413872242765, "grad_norm": 0.14275537431240082, "learning_rate": 0.002, "loss": 2.5824, "step": 38040 }, { "epoch": 0.07580406094606655, "grad_norm": 0.1894727349281311, "learning_rate": 0.002, "loss": 2.5852, "step": 38050 }, { "epoch": 0.07582398316970547, "grad_norm": 0.18796667456626892, "learning_rate": 0.002, "loss": 2.5663, "step": 38060 }, { "epoch": 0.07584390539334439, "grad_norm": 0.1454664021730423, "learning_rate": 0.002, "loss": 2.5814, "step": 38070 }, { "epoch": 0.07586382761698329, "grad_norm": 0.23347748816013336, "learning_rate": 0.002, "loss": 2.5681, "step": 38080 }, { "epoch": 0.07588374984062221, "grad_norm": 0.15224319696426392, "learning_rate": 0.002, "loss": 2.5801, "step": 38090 }, { "epoch": 0.07590367206426113, "grad_norm": 0.16960088908672333, "learning_rate": 0.002, "loss": 2.5732, "step": 38100 }, { "epoch": 0.07592359428790003, "grad_norm": 0.1553097367286682, "learning_rate": 0.002, "loss": 2.5753, "step": 38110 }, { "epoch": 0.07594351651153895, "grad_norm": 0.2022046595811844, "learning_rate": 0.002, "loss": 2.5825, "step": 38120 }, { "epoch": 0.07596343873517787, "grad_norm": 0.15191245079040527, "learning_rate": 0.002, "loss": 2.5721, "step": 38130 }, { "epoch": 0.07598336095881678, "grad_norm": 0.1577339470386505, "learning_rate": 0.002, "loss": 2.5997, "step": 38140 }, { "epoch": 0.0760032831824557, "grad_norm": 0.2034701257944107, "learning_rate": 0.002, "loss": 2.5879, "step": 38150 }, { "epoch": 0.07602320540609461, "grad_norm": 0.14187410473823547, "learning_rate": 0.002, "loss": 2.5678, "step": 38160 }, { "epoch": 0.07604312762973352, "grad_norm": 0.2061033993959427, "learning_rate": 0.002, "loss": 2.5761, "step": 38170 }, { "epoch": 0.07606304985337244, "grad_norm": 0.14750456809997559, "learning_rate": 0.002, "loss": 2.5832, "step": 38180 }, { "epoch": 0.07608297207701135, "grad_norm": 0.19548849761486053, "learning_rate": 0.002, "loss": 2.5912, "step": 38190 }, { "epoch": 0.07610289430065026, "grad_norm": 0.19220466911792755, "learning_rate": 0.002, "loss": 2.583, "step": 38200 }, { "epoch": 0.07612281652428918, "grad_norm": 0.15991808474063873, "learning_rate": 0.002, "loss": 2.5814, "step": 38210 }, { "epoch": 0.07614273874792808, "grad_norm": 0.17379549145698547, "learning_rate": 0.002, "loss": 2.5908, "step": 38220 }, { "epoch": 0.076162660971567, "grad_norm": 0.16768406331539154, "learning_rate": 0.002, "loss": 2.581, "step": 38230 }, { "epoch": 0.07618258319520592, "grad_norm": 0.17114660143852234, "learning_rate": 0.002, "loss": 2.581, "step": 38240 }, { "epoch": 0.07620250541884482, "grad_norm": 0.18225981295108795, "learning_rate": 0.002, "loss": 2.5791, "step": 38250 }, { "epoch": 0.07622242764248374, "grad_norm": 0.1772771179676056, "learning_rate": 0.002, "loss": 2.5717, "step": 38260 }, { "epoch": 0.07624234986612266, "grad_norm": 0.16706474125385284, "learning_rate": 0.002, "loss": 2.5885, "step": 38270 }, { "epoch": 0.07626227208976157, "grad_norm": 0.21912477910518646, "learning_rate": 0.002, "loss": 2.5812, "step": 38280 }, { "epoch": 0.07628219431340048, "grad_norm": 0.17018386721611023, "learning_rate": 0.002, "loss": 2.5815, "step": 38290 }, { "epoch": 0.0763021165370394, "grad_norm": 0.15063388645648956, "learning_rate": 0.002, "loss": 2.5956, "step": 38300 }, { "epoch": 0.0763220387606783, "grad_norm": 0.15049661695957184, "learning_rate": 0.002, "loss": 2.5723, "step": 38310 }, { "epoch": 0.07634196098431723, "grad_norm": 0.21721909940242767, "learning_rate": 0.002, "loss": 2.5824, "step": 38320 }, { "epoch": 0.07636188320795614, "grad_norm": 0.17794691026210785, "learning_rate": 0.002, "loss": 2.5766, "step": 38330 }, { "epoch": 0.07638180543159505, "grad_norm": 0.17029468715190887, "learning_rate": 0.002, "loss": 2.5663, "step": 38340 }, { "epoch": 0.07640172765523397, "grad_norm": 0.2324613332748413, "learning_rate": 0.002, "loss": 2.5826, "step": 38350 }, { "epoch": 0.07642164987887289, "grad_norm": 0.16166581213474274, "learning_rate": 0.002, "loss": 2.5757, "step": 38360 }, { "epoch": 0.07644157210251179, "grad_norm": 0.17792582511901855, "learning_rate": 0.002, "loss": 2.5739, "step": 38370 }, { "epoch": 0.07646149432615071, "grad_norm": 0.16466525197029114, "learning_rate": 0.002, "loss": 2.5925, "step": 38380 }, { "epoch": 0.07648141654978963, "grad_norm": 0.1625477522611618, "learning_rate": 0.002, "loss": 2.5978, "step": 38390 }, { "epoch": 0.07650133877342853, "grad_norm": 0.18046781420707703, "learning_rate": 0.002, "loss": 2.5684, "step": 38400 }, { "epoch": 0.07652126099706745, "grad_norm": 0.22036442160606384, "learning_rate": 0.002, "loss": 2.5701, "step": 38410 }, { "epoch": 0.07654118322070637, "grad_norm": 0.1875237226486206, "learning_rate": 0.002, "loss": 2.585, "step": 38420 }, { "epoch": 0.07656110544434527, "grad_norm": 0.1631677746772766, "learning_rate": 0.002, "loss": 2.5855, "step": 38430 }, { "epoch": 0.07658102766798419, "grad_norm": 0.14782436192035675, "learning_rate": 0.002, "loss": 2.57, "step": 38440 }, { "epoch": 0.07660094989162311, "grad_norm": 0.1942218691110611, "learning_rate": 0.002, "loss": 2.5539, "step": 38450 }, { "epoch": 0.07662087211526201, "grad_norm": 0.1986299306154251, "learning_rate": 0.002, "loss": 2.599, "step": 38460 }, { "epoch": 0.07664079433890093, "grad_norm": 0.14933523535728455, "learning_rate": 0.002, "loss": 2.573, "step": 38470 }, { "epoch": 0.07666071656253984, "grad_norm": 0.18381205201148987, "learning_rate": 0.002, "loss": 2.5722, "step": 38480 }, { "epoch": 0.07668063878617876, "grad_norm": 0.21820229291915894, "learning_rate": 0.002, "loss": 2.5612, "step": 38490 }, { "epoch": 0.07670056100981767, "grad_norm": 0.19381767511367798, "learning_rate": 0.002, "loss": 2.5728, "step": 38500 }, { "epoch": 0.07672048323345658, "grad_norm": 0.1582702249288559, "learning_rate": 0.002, "loss": 2.5833, "step": 38510 }, { "epoch": 0.0767404054570955, "grad_norm": 0.1899988055229187, "learning_rate": 0.002, "loss": 2.5771, "step": 38520 }, { "epoch": 0.07676032768073442, "grad_norm": 0.15557001531124115, "learning_rate": 0.002, "loss": 2.5765, "step": 38530 }, { "epoch": 0.07678024990437332, "grad_norm": 0.15760542452335358, "learning_rate": 0.002, "loss": 2.5723, "step": 38540 }, { "epoch": 0.07680017212801224, "grad_norm": 0.15874966979026794, "learning_rate": 0.002, "loss": 2.5983, "step": 38550 }, { "epoch": 0.07682009435165116, "grad_norm": 0.16340796649456024, "learning_rate": 0.002, "loss": 2.5811, "step": 38560 }, { "epoch": 0.07684001657529006, "grad_norm": 0.1710335612297058, "learning_rate": 0.002, "loss": 2.5823, "step": 38570 }, { "epoch": 0.07685993879892898, "grad_norm": 0.21769991517066956, "learning_rate": 0.002, "loss": 2.5797, "step": 38580 }, { "epoch": 0.0768798610225679, "grad_norm": 0.15999849140644073, "learning_rate": 0.002, "loss": 2.5704, "step": 38590 }, { "epoch": 0.0768997832462068, "grad_norm": 0.18701346218585968, "learning_rate": 0.002, "loss": 2.5816, "step": 38600 }, { "epoch": 0.07691970546984572, "grad_norm": 0.1976521760225296, "learning_rate": 0.002, "loss": 2.5759, "step": 38610 }, { "epoch": 0.07693962769348464, "grad_norm": 0.17333319783210754, "learning_rate": 0.002, "loss": 2.5749, "step": 38620 }, { "epoch": 0.07695954991712355, "grad_norm": 0.18329501152038574, "learning_rate": 0.002, "loss": 2.5891, "step": 38630 }, { "epoch": 0.07697947214076246, "grad_norm": 0.18261729180812836, "learning_rate": 0.002, "loss": 2.5774, "step": 38640 }, { "epoch": 0.07699939436440138, "grad_norm": 0.17374329268932343, "learning_rate": 0.002, "loss": 2.5892, "step": 38650 }, { "epoch": 0.07701931658804029, "grad_norm": 0.17018023133277893, "learning_rate": 0.002, "loss": 2.5919, "step": 38660 }, { "epoch": 0.0770392388116792, "grad_norm": 0.18050932884216309, "learning_rate": 0.002, "loss": 2.5785, "step": 38670 }, { "epoch": 0.07705916103531812, "grad_norm": 0.17677432298660278, "learning_rate": 0.002, "loss": 2.5803, "step": 38680 }, { "epoch": 0.07707908325895703, "grad_norm": 0.153480663895607, "learning_rate": 0.002, "loss": 2.5987, "step": 38690 }, { "epoch": 0.07709900548259595, "grad_norm": 0.14895488321781158, "learning_rate": 0.002, "loss": 2.5827, "step": 38700 }, { "epoch": 0.07711892770623487, "grad_norm": 0.16423088312149048, "learning_rate": 0.002, "loss": 2.5773, "step": 38710 }, { "epoch": 0.07713884992987377, "grad_norm": 0.16711845993995667, "learning_rate": 0.002, "loss": 2.5631, "step": 38720 }, { "epoch": 0.07715877215351269, "grad_norm": 0.19532230496406555, "learning_rate": 0.002, "loss": 2.5911, "step": 38730 }, { "epoch": 0.0771786943771516, "grad_norm": 0.21041253209114075, "learning_rate": 0.002, "loss": 2.5725, "step": 38740 }, { "epoch": 0.07719861660079051, "grad_norm": 0.17028741538524628, "learning_rate": 0.002, "loss": 2.5711, "step": 38750 }, { "epoch": 0.07721853882442943, "grad_norm": 0.1390230357646942, "learning_rate": 0.002, "loss": 2.5699, "step": 38760 }, { "epoch": 0.07723846104806834, "grad_norm": 0.17881718277931213, "learning_rate": 0.002, "loss": 2.5922, "step": 38770 }, { "epoch": 0.07725838327170725, "grad_norm": 0.16480231285095215, "learning_rate": 0.002, "loss": 2.5818, "step": 38780 }, { "epoch": 0.07727830549534617, "grad_norm": 0.1780325472354889, "learning_rate": 0.002, "loss": 2.5646, "step": 38790 }, { "epoch": 0.07729822771898508, "grad_norm": 0.18106497824192047, "learning_rate": 0.002, "loss": 2.5756, "step": 38800 }, { "epoch": 0.077318149942624, "grad_norm": 0.1824197918176651, "learning_rate": 0.002, "loss": 2.5744, "step": 38810 }, { "epoch": 0.07733807216626291, "grad_norm": 0.17989221215248108, "learning_rate": 0.002, "loss": 2.5827, "step": 38820 }, { "epoch": 0.07735799438990182, "grad_norm": 0.17942078411579132, "learning_rate": 0.002, "loss": 2.5784, "step": 38830 }, { "epoch": 0.07737791661354074, "grad_norm": 0.14341196417808533, "learning_rate": 0.002, "loss": 2.5965, "step": 38840 }, { "epoch": 0.07739783883717966, "grad_norm": 0.1957087367773056, "learning_rate": 0.002, "loss": 2.5763, "step": 38850 }, { "epoch": 0.07741776106081856, "grad_norm": 0.18680380284786224, "learning_rate": 0.002, "loss": 2.5681, "step": 38860 }, { "epoch": 0.07743768328445748, "grad_norm": 0.20050010085105896, "learning_rate": 0.002, "loss": 2.5628, "step": 38870 }, { "epoch": 0.0774576055080964, "grad_norm": 0.15218651294708252, "learning_rate": 0.002, "loss": 2.5797, "step": 38880 }, { "epoch": 0.0774775277317353, "grad_norm": 0.15887874364852905, "learning_rate": 0.002, "loss": 2.5849, "step": 38890 }, { "epoch": 0.07749744995537422, "grad_norm": 0.161271870136261, "learning_rate": 0.002, "loss": 2.5872, "step": 38900 }, { "epoch": 0.07751737217901314, "grad_norm": 0.15626825392246246, "learning_rate": 0.002, "loss": 2.5747, "step": 38910 }, { "epoch": 0.07753729440265204, "grad_norm": 0.1639796644449234, "learning_rate": 0.002, "loss": 2.5655, "step": 38920 }, { "epoch": 0.07755721662629096, "grad_norm": 0.2099258452653885, "learning_rate": 0.002, "loss": 2.5826, "step": 38930 }, { "epoch": 0.07757713884992988, "grad_norm": 0.15161889791488647, "learning_rate": 0.002, "loss": 2.5655, "step": 38940 }, { "epoch": 0.07759706107356878, "grad_norm": 0.16908586025238037, "learning_rate": 0.002, "loss": 2.5734, "step": 38950 }, { "epoch": 0.0776169832972077, "grad_norm": 0.1814139038324356, "learning_rate": 0.002, "loss": 2.5791, "step": 38960 }, { "epoch": 0.07763690552084661, "grad_norm": 0.18151408433914185, "learning_rate": 0.002, "loss": 2.5776, "step": 38970 }, { "epoch": 0.07765682774448553, "grad_norm": 0.21093709766864777, "learning_rate": 0.002, "loss": 2.579, "step": 38980 }, { "epoch": 0.07767674996812444, "grad_norm": 0.18253584206104279, "learning_rate": 0.002, "loss": 2.5784, "step": 38990 }, { "epoch": 0.07769667219176335, "grad_norm": 0.17417976260185242, "learning_rate": 0.002, "loss": 2.5774, "step": 39000 }, { "epoch": 0.07771659441540227, "grad_norm": 0.14624981582164764, "learning_rate": 0.002, "loss": 2.5821, "step": 39010 }, { "epoch": 0.07773651663904119, "grad_norm": 0.15129652619361877, "learning_rate": 0.002, "loss": 2.5734, "step": 39020 }, { "epoch": 0.07775643886268009, "grad_norm": 0.19318869709968567, "learning_rate": 0.002, "loss": 2.5738, "step": 39030 }, { "epoch": 0.07777636108631901, "grad_norm": 0.1836967170238495, "learning_rate": 0.002, "loss": 2.5732, "step": 39040 }, { "epoch": 0.07779628330995793, "grad_norm": 0.201280876994133, "learning_rate": 0.002, "loss": 2.5897, "step": 39050 }, { "epoch": 0.07781620553359683, "grad_norm": 0.14193838834762573, "learning_rate": 0.002, "loss": 2.5881, "step": 39060 }, { "epoch": 0.07783612775723575, "grad_norm": 0.17169925570487976, "learning_rate": 0.002, "loss": 2.5759, "step": 39070 }, { "epoch": 0.07785604998087467, "grad_norm": 0.17010444402694702, "learning_rate": 0.002, "loss": 2.5946, "step": 39080 }, { "epoch": 0.07787597220451357, "grad_norm": 0.15628385543823242, "learning_rate": 0.002, "loss": 2.5846, "step": 39090 }, { "epoch": 0.07789589442815249, "grad_norm": 0.16766059398651123, "learning_rate": 0.002, "loss": 2.5824, "step": 39100 }, { "epoch": 0.07791581665179141, "grad_norm": 0.18525934219360352, "learning_rate": 0.002, "loss": 2.5896, "step": 39110 }, { "epoch": 0.07793573887543032, "grad_norm": 0.16232231259346008, "learning_rate": 0.002, "loss": 2.57, "step": 39120 }, { "epoch": 0.07795566109906923, "grad_norm": 0.21811746060848236, "learning_rate": 0.002, "loss": 2.5786, "step": 39130 }, { "epoch": 0.07797558332270815, "grad_norm": 0.18582162261009216, "learning_rate": 0.002, "loss": 2.5745, "step": 39140 }, { "epoch": 0.07799550554634706, "grad_norm": 0.19804733991622925, "learning_rate": 0.002, "loss": 2.5825, "step": 39150 }, { "epoch": 0.07801542776998598, "grad_norm": 0.15772943198680878, "learning_rate": 0.002, "loss": 2.5786, "step": 39160 }, { "epoch": 0.0780353499936249, "grad_norm": 0.2567611038684845, "learning_rate": 0.002, "loss": 2.5763, "step": 39170 }, { "epoch": 0.0780552722172638, "grad_norm": 0.16197411715984344, "learning_rate": 0.002, "loss": 2.5848, "step": 39180 }, { "epoch": 0.07807519444090272, "grad_norm": 0.17722788453102112, "learning_rate": 0.002, "loss": 2.5752, "step": 39190 }, { "epoch": 0.07809511666454164, "grad_norm": 0.16575506329536438, "learning_rate": 0.002, "loss": 2.5854, "step": 39200 }, { "epoch": 0.07811503888818054, "grad_norm": 0.1830284297466278, "learning_rate": 0.002, "loss": 2.5749, "step": 39210 }, { "epoch": 0.07813496111181946, "grad_norm": 0.1861298829317093, "learning_rate": 0.002, "loss": 2.5768, "step": 39220 }, { "epoch": 0.07815488333545836, "grad_norm": 0.18357114493846893, "learning_rate": 0.002, "loss": 2.5674, "step": 39230 }, { "epoch": 0.07817480555909728, "grad_norm": 0.14983272552490234, "learning_rate": 0.002, "loss": 2.5954, "step": 39240 }, { "epoch": 0.0781947277827362, "grad_norm": 0.16569143533706665, "learning_rate": 0.002, "loss": 2.5876, "step": 39250 }, { "epoch": 0.0782146500063751, "grad_norm": 0.17592217028141022, "learning_rate": 0.002, "loss": 2.5833, "step": 39260 }, { "epoch": 0.07823457223001402, "grad_norm": 0.16973067820072174, "learning_rate": 0.002, "loss": 2.5845, "step": 39270 }, { "epoch": 0.07825449445365294, "grad_norm": 0.18288639187812805, "learning_rate": 0.002, "loss": 2.5599, "step": 39280 }, { "epoch": 0.07827441667729185, "grad_norm": 0.17925752699375153, "learning_rate": 0.002, "loss": 2.5772, "step": 39290 }, { "epoch": 0.07829433890093077, "grad_norm": 0.18685021996498108, "learning_rate": 0.002, "loss": 2.5853, "step": 39300 }, { "epoch": 0.07831426112456968, "grad_norm": 0.16959430277347565, "learning_rate": 0.002, "loss": 2.5636, "step": 39310 }, { "epoch": 0.07833418334820859, "grad_norm": 0.18509480357170105, "learning_rate": 0.002, "loss": 2.5905, "step": 39320 }, { "epoch": 0.07835410557184751, "grad_norm": 0.17000623047351837, "learning_rate": 0.002, "loss": 2.5769, "step": 39330 }, { "epoch": 0.07837402779548643, "grad_norm": 0.2036254107952118, "learning_rate": 0.002, "loss": 2.5823, "step": 39340 }, { "epoch": 0.07839395001912533, "grad_norm": 0.1612783521413803, "learning_rate": 0.002, "loss": 2.5786, "step": 39350 }, { "epoch": 0.07841387224276425, "grad_norm": 0.1793815642595291, "learning_rate": 0.002, "loss": 2.5731, "step": 39360 }, { "epoch": 0.07843379446640317, "grad_norm": 0.18643400073051453, "learning_rate": 0.002, "loss": 2.5833, "step": 39370 }, { "epoch": 0.07845371669004207, "grad_norm": 0.1695050746202469, "learning_rate": 0.002, "loss": 2.58, "step": 39380 }, { "epoch": 0.07847363891368099, "grad_norm": 0.16447390615940094, "learning_rate": 0.002, "loss": 2.5731, "step": 39390 }, { "epoch": 0.07849356113731991, "grad_norm": 0.14978007972240448, "learning_rate": 0.002, "loss": 2.5797, "step": 39400 }, { "epoch": 0.07851348336095881, "grad_norm": 0.2294880598783493, "learning_rate": 0.002, "loss": 2.584, "step": 39410 }, { "epoch": 0.07853340558459773, "grad_norm": 0.17166262865066528, "learning_rate": 0.002, "loss": 2.5617, "step": 39420 }, { "epoch": 0.07855332780823665, "grad_norm": 0.14863264560699463, "learning_rate": 0.002, "loss": 2.56, "step": 39430 }, { "epoch": 0.07857325003187556, "grad_norm": 0.1489374339580536, "learning_rate": 0.002, "loss": 2.5876, "step": 39440 }, { "epoch": 0.07859317225551447, "grad_norm": 0.18151822686195374, "learning_rate": 0.002, "loss": 2.5894, "step": 39450 }, { "epoch": 0.07861309447915339, "grad_norm": 0.18216127157211304, "learning_rate": 0.002, "loss": 2.5734, "step": 39460 }, { "epoch": 0.0786330167027923, "grad_norm": 0.15336667001247406, "learning_rate": 0.002, "loss": 2.5909, "step": 39470 }, { "epoch": 0.07865293892643122, "grad_norm": 0.16591675579547882, "learning_rate": 0.002, "loss": 2.5791, "step": 39480 }, { "epoch": 0.07867286115007012, "grad_norm": 0.19586461782455444, "learning_rate": 0.002, "loss": 2.5729, "step": 39490 }, { "epoch": 0.07869278337370904, "grad_norm": 0.16812700033187866, "learning_rate": 0.002, "loss": 2.5832, "step": 39500 }, { "epoch": 0.07871270559734796, "grad_norm": 0.1840260773897171, "learning_rate": 0.002, "loss": 2.5691, "step": 39510 }, { "epoch": 0.07873262782098686, "grad_norm": 0.1837407648563385, "learning_rate": 0.002, "loss": 2.5809, "step": 39520 }, { "epoch": 0.07875255004462578, "grad_norm": 0.19910672307014465, "learning_rate": 0.002, "loss": 2.5893, "step": 39530 }, { "epoch": 0.0787724722682647, "grad_norm": 0.18282835185527802, "learning_rate": 0.002, "loss": 2.5761, "step": 39540 }, { "epoch": 0.0787923944919036, "grad_norm": 0.1839141696691513, "learning_rate": 0.002, "loss": 2.5738, "step": 39550 }, { "epoch": 0.07881231671554252, "grad_norm": 0.17318758368492126, "learning_rate": 0.002, "loss": 2.5894, "step": 39560 }, { "epoch": 0.07883223893918144, "grad_norm": 0.1652703583240509, "learning_rate": 0.002, "loss": 2.5712, "step": 39570 }, { "epoch": 0.07885216116282034, "grad_norm": 0.21539495885372162, "learning_rate": 0.002, "loss": 2.5795, "step": 39580 }, { "epoch": 0.07887208338645926, "grad_norm": 0.1895311176776886, "learning_rate": 0.002, "loss": 2.5759, "step": 39590 }, { "epoch": 0.07889200561009818, "grad_norm": 0.15909232199192047, "learning_rate": 0.002, "loss": 2.5717, "step": 39600 }, { "epoch": 0.07891192783373709, "grad_norm": 0.16626691818237305, "learning_rate": 0.002, "loss": 2.581, "step": 39610 }, { "epoch": 0.078931850057376, "grad_norm": 0.14912153780460358, "learning_rate": 0.002, "loss": 2.5794, "step": 39620 }, { "epoch": 0.07895177228101492, "grad_norm": 0.1627683788537979, "learning_rate": 0.002, "loss": 2.5748, "step": 39630 }, { "epoch": 0.07897169450465383, "grad_norm": 0.18070518970489502, "learning_rate": 0.002, "loss": 2.5706, "step": 39640 }, { "epoch": 0.07899161672829275, "grad_norm": 0.17068351805210114, "learning_rate": 0.002, "loss": 2.5924, "step": 39650 }, { "epoch": 0.07901153895193166, "grad_norm": 0.19548381865024567, "learning_rate": 0.002, "loss": 2.568, "step": 39660 }, { "epoch": 0.07903146117557057, "grad_norm": 0.15956924855709076, "learning_rate": 0.002, "loss": 2.5642, "step": 39670 }, { "epoch": 0.07905138339920949, "grad_norm": 0.1637202352285385, "learning_rate": 0.002, "loss": 2.5833, "step": 39680 }, { "epoch": 0.0790713056228484, "grad_norm": 0.20306187868118286, "learning_rate": 0.002, "loss": 2.5679, "step": 39690 }, { "epoch": 0.07909122784648731, "grad_norm": 0.17696839570999146, "learning_rate": 0.002, "loss": 2.5868, "step": 39700 }, { "epoch": 0.07911115007012623, "grad_norm": 0.1764955073595047, "learning_rate": 0.002, "loss": 2.5814, "step": 39710 }, { "epoch": 0.07913107229376513, "grad_norm": 0.14654459059238434, "learning_rate": 0.002, "loss": 2.5762, "step": 39720 }, { "epoch": 0.07915099451740405, "grad_norm": 0.17653045058250427, "learning_rate": 0.002, "loss": 2.5834, "step": 39730 }, { "epoch": 0.07917091674104297, "grad_norm": 0.16446785628795624, "learning_rate": 0.002, "loss": 2.5845, "step": 39740 }, { "epoch": 0.07919083896468188, "grad_norm": 0.1575869768857956, "learning_rate": 0.002, "loss": 2.5731, "step": 39750 }, { "epoch": 0.0792107611883208, "grad_norm": 0.17703118920326233, "learning_rate": 0.002, "loss": 2.583, "step": 39760 }, { "epoch": 0.07923068341195971, "grad_norm": 0.20890650153160095, "learning_rate": 0.002, "loss": 2.5917, "step": 39770 }, { "epoch": 0.07925060563559862, "grad_norm": 0.153142049908638, "learning_rate": 0.002, "loss": 2.5829, "step": 39780 }, { "epoch": 0.07927052785923754, "grad_norm": 0.16518287360668182, "learning_rate": 0.002, "loss": 2.5892, "step": 39790 }, { "epoch": 0.07929045008287645, "grad_norm": 0.16993370652198792, "learning_rate": 0.002, "loss": 2.5765, "step": 39800 }, { "epoch": 0.07931037230651536, "grad_norm": 0.17045529186725616, "learning_rate": 0.002, "loss": 2.5911, "step": 39810 }, { "epoch": 0.07933029453015428, "grad_norm": 0.2040574848651886, "learning_rate": 0.002, "loss": 2.5832, "step": 39820 }, { "epoch": 0.0793502167537932, "grad_norm": 0.18239831924438477, "learning_rate": 0.002, "loss": 2.5747, "step": 39830 }, { "epoch": 0.0793701389774321, "grad_norm": 0.4362499713897705, "learning_rate": 0.002, "loss": 2.574, "step": 39840 }, { "epoch": 0.07939006120107102, "grad_norm": 0.1521136611700058, "learning_rate": 0.002, "loss": 2.5965, "step": 39850 }, { "epoch": 0.07940998342470994, "grad_norm": 0.21590498089790344, "learning_rate": 0.002, "loss": 2.5922, "step": 39860 }, { "epoch": 0.07942990564834884, "grad_norm": 0.16794851422309875, "learning_rate": 0.002, "loss": 2.575, "step": 39870 }, { "epoch": 0.07944982787198776, "grad_norm": 0.18079861998558044, "learning_rate": 0.002, "loss": 2.574, "step": 39880 }, { "epoch": 0.07946975009562668, "grad_norm": 0.1674220860004425, "learning_rate": 0.002, "loss": 2.5834, "step": 39890 }, { "epoch": 0.07948967231926558, "grad_norm": 0.15960776805877686, "learning_rate": 0.002, "loss": 2.5761, "step": 39900 }, { "epoch": 0.0795095945429045, "grad_norm": 0.15700489282608032, "learning_rate": 0.002, "loss": 2.5767, "step": 39910 }, { "epoch": 0.07952951676654342, "grad_norm": 0.15294277667999268, "learning_rate": 0.002, "loss": 2.5849, "step": 39920 }, { "epoch": 0.07954943899018233, "grad_norm": 0.20836186408996582, "learning_rate": 0.002, "loss": 2.5748, "step": 39930 }, { "epoch": 0.07956936121382124, "grad_norm": 0.13575370609760284, "learning_rate": 0.002, "loss": 2.573, "step": 39940 }, { "epoch": 0.07958928343746016, "grad_norm": 0.1870923638343811, "learning_rate": 0.002, "loss": 2.5877, "step": 39950 }, { "epoch": 0.07960920566109907, "grad_norm": 0.17062820494174957, "learning_rate": 0.002, "loss": 2.5915, "step": 39960 }, { "epoch": 0.07962912788473799, "grad_norm": 0.1713099330663681, "learning_rate": 0.002, "loss": 2.5633, "step": 39970 }, { "epoch": 0.07964905010837689, "grad_norm": 0.1588384062051773, "learning_rate": 0.002, "loss": 2.5755, "step": 39980 }, { "epoch": 0.07966897233201581, "grad_norm": 0.15501104295253754, "learning_rate": 0.002, "loss": 2.5823, "step": 39990 }, { "epoch": 0.07968889455565473, "grad_norm": 0.1767406314611435, "learning_rate": 0.002, "loss": 2.5736, "step": 40000 }, { "epoch": 0.07970881677929363, "grad_norm": 0.207200288772583, "learning_rate": 0.002, "loss": 2.5707, "step": 40010 }, { "epoch": 0.07972873900293255, "grad_norm": 0.1535424143075943, "learning_rate": 0.002, "loss": 2.5789, "step": 40020 }, { "epoch": 0.07974866122657147, "grad_norm": 0.17771096527576447, "learning_rate": 0.002, "loss": 2.5684, "step": 40030 }, { "epoch": 0.07976858345021037, "grad_norm": 0.17174650728702545, "learning_rate": 0.002, "loss": 2.5816, "step": 40040 }, { "epoch": 0.07978850567384929, "grad_norm": 0.1866033673286438, "learning_rate": 0.002, "loss": 2.5873, "step": 40050 }, { "epoch": 0.07980842789748821, "grad_norm": 0.16258057951927185, "learning_rate": 0.002, "loss": 2.5589, "step": 40060 }, { "epoch": 0.07982835012112711, "grad_norm": 0.17045637965202332, "learning_rate": 0.002, "loss": 2.5867, "step": 40070 }, { "epoch": 0.07984827234476603, "grad_norm": 0.166195347905159, "learning_rate": 0.002, "loss": 2.5752, "step": 40080 }, { "epoch": 0.07986819456840495, "grad_norm": 0.1828908622264862, "learning_rate": 0.002, "loss": 2.5826, "step": 40090 }, { "epoch": 0.07988811679204386, "grad_norm": 0.17328083515167236, "learning_rate": 0.002, "loss": 2.5781, "step": 40100 }, { "epoch": 0.07990803901568277, "grad_norm": 0.17924025654792786, "learning_rate": 0.002, "loss": 2.5756, "step": 40110 }, { "epoch": 0.0799279612393217, "grad_norm": 0.18239173293113708, "learning_rate": 0.002, "loss": 2.5785, "step": 40120 }, { "epoch": 0.0799478834629606, "grad_norm": 0.17805926501750946, "learning_rate": 0.002, "loss": 2.5782, "step": 40130 }, { "epoch": 0.07996780568659952, "grad_norm": 0.1697738617658615, "learning_rate": 0.002, "loss": 2.5652, "step": 40140 }, { "epoch": 0.07998772791023843, "grad_norm": 0.16152827441692352, "learning_rate": 0.002, "loss": 2.5681, "step": 40150 }, { "epoch": 0.08000765013387734, "grad_norm": 0.17580458521842957, "learning_rate": 0.002, "loss": 2.5791, "step": 40160 }, { "epoch": 0.08002757235751626, "grad_norm": 0.15695175528526306, "learning_rate": 0.002, "loss": 2.5739, "step": 40170 }, { "epoch": 0.08004749458115518, "grad_norm": 0.17288975417613983, "learning_rate": 0.002, "loss": 2.5921, "step": 40180 }, { "epoch": 0.08006741680479408, "grad_norm": 0.1724284142255783, "learning_rate": 0.002, "loss": 2.6079, "step": 40190 }, { "epoch": 0.080087339028433, "grad_norm": 0.14565010368824005, "learning_rate": 0.002, "loss": 2.5791, "step": 40200 }, { "epoch": 0.08010726125207192, "grad_norm": 0.18533983826637268, "learning_rate": 0.002, "loss": 2.5621, "step": 40210 }, { "epoch": 0.08012718347571082, "grad_norm": 0.18798255920410156, "learning_rate": 0.002, "loss": 2.5904, "step": 40220 }, { "epoch": 0.08014710569934974, "grad_norm": 0.16062507033348083, "learning_rate": 0.002, "loss": 2.5882, "step": 40230 }, { "epoch": 0.08016702792298865, "grad_norm": 0.1972459852695465, "learning_rate": 0.002, "loss": 2.5695, "step": 40240 }, { "epoch": 0.08018695014662756, "grad_norm": 0.14755363762378693, "learning_rate": 0.002, "loss": 2.5801, "step": 40250 }, { "epoch": 0.08020687237026648, "grad_norm": 0.18703056871891022, "learning_rate": 0.002, "loss": 2.5855, "step": 40260 }, { "epoch": 0.08022679459390539, "grad_norm": 0.18925340473651886, "learning_rate": 0.002, "loss": 2.5951, "step": 40270 }, { "epoch": 0.0802467168175443, "grad_norm": 0.16863705217838287, "learning_rate": 0.002, "loss": 2.5894, "step": 40280 }, { "epoch": 0.08026663904118322, "grad_norm": 0.1579178273677826, "learning_rate": 0.002, "loss": 2.5686, "step": 40290 }, { "epoch": 0.08028656126482213, "grad_norm": 0.1726887971162796, "learning_rate": 0.002, "loss": 2.5826, "step": 40300 }, { "epoch": 0.08030648348846105, "grad_norm": 0.19151534140110016, "learning_rate": 0.002, "loss": 2.5668, "step": 40310 }, { "epoch": 0.08032640571209997, "grad_norm": 0.17195221781730652, "learning_rate": 0.002, "loss": 2.5819, "step": 40320 }, { "epoch": 0.08034632793573887, "grad_norm": 0.17116834223270416, "learning_rate": 0.002, "loss": 2.5841, "step": 40330 }, { "epoch": 0.08036625015937779, "grad_norm": 0.15394844114780426, "learning_rate": 0.002, "loss": 2.5887, "step": 40340 }, { "epoch": 0.08038617238301671, "grad_norm": 0.17755818367004395, "learning_rate": 0.002, "loss": 2.5657, "step": 40350 }, { "epoch": 0.08040609460665561, "grad_norm": 0.14417368173599243, "learning_rate": 0.002, "loss": 2.5831, "step": 40360 }, { "epoch": 0.08042601683029453, "grad_norm": 0.1745554804801941, "learning_rate": 0.002, "loss": 2.5854, "step": 40370 }, { "epoch": 0.08044593905393345, "grad_norm": 0.18164749443531036, "learning_rate": 0.002, "loss": 2.5636, "step": 40380 }, { "epoch": 0.08046586127757235, "grad_norm": 0.16621439158916473, "learning_rate": 0.002, "loss": 2.59, "step": 40390 }, { "epoch": 0.08048578350121127, "grad_norm": 0.14520376920700073, "learning_rate": 0.002, "loss": 2.5704, "step": 40400 }, { "epoch": 0.08050570572485019, "grad_norm": 0.18884477019309998, "learning_rate": 0.002, "loss": 2.5851, "step": 40410 }, { "epoch": 0.0805256279484891, "grad_norm": 0.17985118925571442, "learning_rate": 0.002, "loss": 2.5831, "step": 40420 }, { "epoch": 0.08054555017212801, "grad_norm": 0.19432035088539124, "learning_rate": 0.002, "loss": 2.578, "step": 40430 }, { "epoch": 0.08056547239576693, "grad_norm": 0.1711464673280716, "learning_rate": 0.002, "loss": 2.5787, "step": 40440 }, { "epoch": 0.08058539461940584, "grad_norm": 0.1526443064212799, "learning_rate": 0.002, "loss": 2.5747, "step": 40450 }, { "epoch": 0.08060531684304476, "grad_norm": 0.17627213895320892, "learning_rate": 0.002, "loss": 2.5978, "step": 40460 }, { "epoch": 0.08062523906668367, "grad_norm": 0.1863989531993866, "learning_rate": 0.002, "loss": 2.5708, "step": 40470 }, { "epoch": 0.08064516129032258, "grad_norm": 0.1624610722064972, "learning_rate": 0.002, "loss": 2.5773, "step": 40480 }, { "epoch": 0.0806650835139615, "grad_norm": 0.19058921933174133, "learning_rate": 0.002, "loss": 2.5877, "step": 40490 }, { "epoch": 0.0806850057376004, "grad_norm": 0.16840825974941254, "learning_rate": 0.002, "loss": 2.5851, "step": 40500 }, { "epoch": 0.08070492796123932, "grad_norm": 0.16875998675823212, "learning_rate": 0.002, "loss": 2.5638, "step": 40510 }, { "epoch": 0.08072485018487824, "grad_norm": 0.16610971093177795, "learning_rate": 0.002, "loss": 2.5768, "step": 40520 }, { "epoch": 0.08074477240851714, "grad_norm": 0.19486333429813385, "learning_rate": 0.002, "loss": 2.5697, "step": 40530 }, { "epoch": 0.08076469463215606, "grad_norm": 0.1571742445230484, "learning_rate": 0.002, "loss": 2.5842, "step": 40540 }, { "epoch": 0.08078461685579498, "grad_norm": 0.17510581016540527, "learning_rate": 0.002, "loss": 2.5742, "step": 40550 }, { "epoch": 0.08080453907943388, "grad_norm": 0.19194220006465912, "learning_rate": 0.002, "loss": 2.5742, "step": 40560 }, { "epoch": 0.0808244613030728, "grad_norm": 0.18941904604434967, "learning_rate": 0.002, "loss": 2.5787, "step": 40570 }, { "epoch": 0.08084438352671172, "grad_norm": 0.16640637814998627, "learning_rate": 0.002, "loss": 2.5657, "step": 40580 }, { "epoch": 0.08086430575035063, "grad_norm": 0.17727957665920258, "learning_rate": 0.002, "loss": 2.5751, "step": 40590 }, { "epoch": 0.08088422797398954, "grad_norm": 0.16569332778453827, "learning_rate": 0.002, "loss": 2.5742, "step": 40600 }, { "epoch": 0.08090415019762846, "grad_norm": 0.19277487695217133, "learning_rate": 0.002, "loss": 2.5775, "step": 40610 }, { "epoch": 0.08092407242126737, "grad_norm": 0.1559244692325592, "learning_rate": 0.002, "loss": 2.5781, "step": 40620 }, { "epoch": 0.08094399464490629, "grad_norm": 0.24669764935970306, "learning_rate": 0.002, "loss": 2.5647, "step": 40630 }, { "epoch": 0.0809639168685452, "grad_norm": 0.1585988700389862, "learning_rate": 0.002, "loss": 2.5721, "step": 40640 }, { "epoch": 0.08098383909218411, "grad_norm": 0.16487190127372742, "learning_rate": 0.002, "loss": 2.5835, "step": 40650 }, { "epoch": 0.08100376131582303, "grad_norm": 0.2036055475473404, "learning_rate": 0.002, "loss": 2.5785, "step": 40660 }, { "epoch": 0.08102368353946195, "grad_norm": 0.1907365769147873, "learning_rate": 0.002, "loss": 2.5671, "step": 40670 }, { "epoch": 0.08104360576310085, "grad_norm": 0.15133433043956757, "learning_rate": 0.002, "loss": 2.5724, "step": 40680 }, { "epoch": 0.08106352798673977, "grad_norm": 0.18778710067272186, "learning_rate": 0.002, "loss": 2.5825, "step": 40690 }, { "epoch": 0.08108345021037869, "grad_norm": 0.1704799085855484, "learning_rate": 0.002, "loss": 2.6002, "step": 40700 }, { "epoch": 0.08110337243401759, "grad_norm": 0.14768469333648682, "learning_rate": 0.002, "loss": 2.597, "step": 40710 }, { "epoch": 0.08112329465765651, "grad_norm": 0.17261241376399994, "learning_rate": 0.002, "loss": 2.5859, "step": 40720 }, { "epoch": 0.08114321688129542, "grad_norm": 0.16918981075286865, "learning_rate": 0.002, "loss": 2.5748, "step": 40730 }, { "epoch": 0.08116313910493433, "grad_norm": 0.19891004264354706, "learning_rate": 0.002, "loss": 2.5654, "step": 40740 }, { "epoch": 0.08118306132857325, "grad_norm": 0.20951807498931885, "learning_rate": 0.002, "loss": 2.5944, "step": 40750 }, { "epoch": 0.08120298355221216, "grad_norm": 0.1573321372270584, "learning_rate": 0.002, "loss": 2.5654, "step": 40760 }, { "epoch": 0.08122290577585108, "grad_norm": 0.16638191044330597, "learning_rate": 0.002, "loss": 2.5935, "step": 40770 }, { "epoch": 0.08124282799949, "grad_norm": 0.16728724539279938, "learning_rate": 0.002, "loss": 2.5695, "step": 40780 }, { "epoch": 0.0812627502231289, "grad_norm": 0.19909322261810303, "learning_rate": 0.002, "loss": 2.5676, "step": 40790 }, { "epoch": 0.08128267244676782, "grad_norm": 0.16525670886039734, "learning_rate": 0.002, "loss": 2.5726, "step": 40800 }, { "epoch": 0.08130259467040674, "grad_norm": 0.15577906370162964, "learning_rate": 0.002, "loss": 2.5807, "step": 40810 }, { "epoch": 0.08132251689404564, "grad_norm": 0.19273418188095093, "learning_rate": 0.002, "loss": 2.5729, "step": 40820 }, { "epoch": 0.08134243911768456, "grad_norm": 0.17514750361442566, "learning_rate": 0.002, "loss": 2.5637, "step": 40830 }, { "epoch": 0.08136236134132348, "grad_norm": 0.18939298391342163, "learning_rate": 0.002, "loss": 2.6022, "step": 40840 }, { "epoch": 0.08138228356496238, "grad_norm": 0.16457058489322662, "learning_rate": 0.002, "loss": 2.5737, "step": 40850 }, { "epoch": 0.0814022057886013, "grad_norm": 0.16800916194915771, "learning_rate": 0.002, "loss": 2.5658, "step": 40860 }, { "epoch": 0.08142212801224022, "grad_norm": 0.17347192764282227, "learning_rate": 0.002, "loss": 2.5774, "step": 40870 }, { "epoch": 0.08144205023587912, "grad_norm": 0.18136608600616455, "learning_rate": 0.002, "loss": 2.5688, "step": 40880 }, { "epoch": 0.08146197245951804, "grad_norm": 0.17819291353225708, "learning_rate": 0.002, "loss": 2.5711, "step": 40890 }, { "epoch": 0.08148189468315696, "grad_norm": 0.18732571601867676, "learning_rate": 0.002, "loss": 2.5764, "step": 40900 }, { "epoch": 0.08150181690679587, "grad_norm": 0.15440930426120758, "learning_rate": 0.002, "loss": 2.5909, "step": 40910 }, { "epoch": 0.08152173913043478, "grad_norm": 0.23333266377449036, "learning_rate": 0.002, "loss": 2.5662, "step": 40920 }, { "epoch": 0.0815416613540737, "grad_norm": 0.15823791921138763, "learning_rate": 0.002, "loss": 2.5708, "step": 40930 }, { "epoch": 0.0815615835777126, "grad_norm": 0.15387076139450073, "learning_rate": 0.002, "loss": 2.5869, "step": 40940 }, { "epoch": 0.08158150580135153, "grad_norm": 0.17168912291526794, "learning_rate": 0.002, "loss": 2.5805, "step": 40950 }, { "epoch": 0.08160142802499044, "grad_norm": 0.17236122488975525, "learning_rate": 0.002, "loss": 2.5806, "step": 40960 }, { "epoch": 0.08162135024862935, "grad_norm": 0.18249380588531494, "learning_rate": 0.002, "loss": 2.5743, "step": 40970 }, { "epoch": 0.08164127247226827, "grad_norm": 0.19218102097511292, "learning_rate": 0.002, "loss": 2.5737, "step": 40980 }, { "epoch": 0.08166119469590717, "grad_norm": 0.15995191037654877, "learning_rate": 0.002, "loss": 2.5814, "step": 40990 }, { "epoch": 0.08168111691954609, "grad_norm": 0.22064857184886932, "learning_rate": 0.002, "loss": 2.5729, "step": 41000 }, { "epoch": 0.08170103914318501, "grad_norm": 0.18232618272304535, "learning_rate": 0.002, "loss": 2.5783, "step": 41010 }, { "epoch": 0.08172096136682391, "grad_norm": 0.17221316695213318, "learning_rate": 0.002, "loss": 2.5707, "step": 41020 }, { "epoch": 0.08174088359046283, "grad_norm": 0.1631922870874405, "learning_rate": 0.002, "loss": 2.5842, "step": 41030 }, { "epoch": 0.08176080581410175, "grad_norm": 0.1583796590566635, "learning_rate": 0.002, "loss": 2.5782, "step": 41040 }, { "epoch": 0.08178072803774065, "grad_norm": 0.26074913144111633, "learning_rate": 0.002, "loss": 2.6035, "step": 41050 }, { "epoch": 0.08180065026137957, "grad_norm": 0.1621347814798355, "learning_rate": 0.002, "loss": 2.5704, "step": 41060 }, { "epoch": 0.08182057248501849, "grad_norm": 0.15502291917800903, "learning_rate": 0.002, "loss": 2.5738, "step": 41070 }, { "epoch": 0.0818404947086574, "grad_norm": 0.15457920730113983, "learning_rate": 0.002, "loss": 2.5832, "step": 41080 }, { "epoch": 0.08186041693229631, "grad_norm": 0.1698063611984253, "learning_rate": 0.002, "loss": 2.5812, "step": 41090 }, { "epoch": 0.08188033915593523, "grad_norm": 0.20775610208511353, "learning_rate": 0.002, "loss": 2.5771, "step": 41100 }, { "epoch": 0.08190026137957414, "grad_norm": 0.1599467396736145, "learning_rate": 0.002, "loss": 2.5795, "step": 41110 }, { "epoch": 0.08192018360321306, "grad_norm": 0.17443442344665527, "learning_rate": 0.002, "loss": 2.5751, "step": 41120 }, { "epoch": 0.08194010582685197, "grad_norm": 0.17559434473514557, "learning_rate": 0.002, "loss": 2.5599, "step": 41130 }, { "epoch": 0.08196002805049088, "grad_norm": 0.15971297025680542, "learning_rate": 0.002, "loss": 2.5721, "step": 41140 }, { "epoch": 0.0819799502741298, "grad_norm": 0.14252978563308716, "learning_rate": 0.002, "loss": 2.5644, "step": 41150 }, { "epoch": 0.08199987249776872, "grad_norm": 0.15708264708518982, "learning_rate": 0.002, "loss": 2.576, "step": 41160 }, { "epoch": 0.08201979472140762, "grad_norm": 0.21369029581546783, "learning_rate": 0.002, "loss": 2.5855, "step": 41170 }, { "epoch": 0.08203971694504654, "grad_norm": 0.18380141258239746, "learning_rate": 0.002, "loss": 2.5816, "step": 41180 }, { "epoch": 0.08205963916868546, "grad_norm": 0.1689918488264084, "learning_rate": 0.002, "loss": 2.5827, "step": 41190 }, { "epoch": 0.08207956139232436, "grad_norm": 0.20473451912403107, "learning_rate": 0.002, "loss": 2.5715, "step": 41200 }, { "epoch": 0.08209948361596328, "grad_norm": 0.15572820603847504, "learning_rate": 0.002, "loss": 2.5832, "step": 41210 }, { "epoch": 0.0821194058396022, "grad_norm": 0.17761704325675964, "learning_rate": 0.002, "loss": 2.5883, "step": 41220 }, { "epoch": 0.0821393280632411, "grad_norm": 0.17900516092777252, "learning_rate": 0.002, "loss": 2.58, "step": 41230 }, { "epoch": 0.08215925028688002, "grad_norm": 0.1817556619644165, "learning_rate": 0.002, "loss": 2.5747, "step": 41240 }, { "epoch": 0.08217917251051893, "grad_norm": 0.17906445264816284, "learning_rate": 0.002, "loss": 2.5745, "step": 41250 }, { "epoch": 0.08219909473415785, "grad_norm": 0.16155405342578888, "learning_rate": 0.002, "loss": 2.5988, "step": 41260 }, { "epoch": 0.08221901695779676, "grad_norm": 0.18374618887901306, "learning_rate": 0.002, "loss": 2.581, "step": 41270 }, { "epoch": 0.08223893918143567, "grad_norm": 0.1870214343070984, "learning_rate": 0.002, "loss": 2.5888, "step": 41280 }, { "epoch": 0.08225886140507459, "grad_norm": 0.1730262041091919, "learning_rate": 0.002, "loss": 2.569, "step": 41290 }, { "epoch": 0.0822787836287135, "grad_norm": 0.1862390786409378, "learning_rate": 0.002, "loss": 2.5723, "step": 41300 }, { "epoch": 0.08229870585235241, "grad_norm": 0.18210060894489288, "learning_rate": 0.002, "loss": 2.5809, "step": 41310 }, { "epoch": 0.08231862807599133, "grad_norm": 0.18298307061195374, "learning_rate": 0.002, "loss": 2.5813, "step": 41320 }, { "epoch": 0.08233855029963025, "grad_norm": 0.15168973803520203, "learning_rate": 0.002, "loss": 2.5759, "step": 41330 }, { "epoch": 0.08235847252326915, "grad_norm": 0.16850252449512482, "learning_rate": 0.002, "loss": 2.5818, "step": 41340 }, { "epoch": 0.08237839474690807, "grad_norm": 0.21988199651241302, "learning_rate": 0.002, "loss": 2.5745, "step": 41350 }, { "epoch": 0.08239831697054699, "grad_norm": 0.19914932548999786, "learning_rate": 0.002, "loss": 2.5991, "step": 41360 }, { "epoch": 0.0824182391941859, "grad_norm": 0.1881561577320099, "learning_rate": 0.002, "loss": 2.5796, "step": 41370 }, { "epoch": 0.08243816141782481, "grad_norm": 0.15816068649291992, "learning_rate": 0.002, "loss": 2.5749, "step": 41380 }, { "epoch": 0.08245808364146373, "grad_norm": 0.17191679775714874, "learning_rate": 0.002, "loss": 2.5787, "step": 41390 }, { "epoch": 0.08247800586510264, "grad_norm": 0.1975279599428177, "learning_rate": 0.002, "loss": 2.5725, "step": 41400 }, { "epoch": 0.08249792808874155, "grad_norm": 0.16186001896858215, "learning_rate": 0.002, "loss": 2.5731, "step": 41410 }, { "epoch": 0.08251785031238047, "grad_norm": 0.18780888617038727, "learning_rate": 0.002, "loss": 2.5765, "step": 41420 }, { "epoch": 0.08253777253601938, "grad_norm": 0.15270772576332092, "learning_rate": 0.002, "loss": 2.5822, "step": 41430 }, { "epoch": 0.0825576947596583, "grad_norm": 0.17245274782180786, "learning_rate": 0.002, "loss": 2.5911, "step": 41440 }, { "epoch": 0.08257761698329721, "grad_norm": 0.19512297213077545, "learning_rate": 0.002, "loss": 2.5865, "step": 41450 }, { "epoch": 0.08259753920693612, "grad_norm": 0.1681053638458252, "learning_rate": 0.002, "loss": 2.5781, "step": 41460 }, { "epoch": 0.08261746143057504, "grad_norm": 0.17702198028564453, "learning_rate": 0.002, "loss": 2.5792, "step": 41470 }, { "epoch": 0.08263738365421396, "grad_norm": 0.17706723511219025, "learning_rate": 0.002, "loss": 2.5803, "step": 41480 }, { "epoch": 0.08265730587785286, "grad_norm": 0.1684921830892563, "learning_rate": 0.002, "loss": 2.5757, "step": 41490 }, { "epoch": 0.08267722810149178, "grad_norm": 0.18997570872306824, "learning_rate": 0.002, "loss": 2.5929, "step": 41500 }, { "epoch": 0.08269715032513068, "grad_norm": 0.146104633808136, "learning_rate": 0.002, "loss": 2.5668, "step": 41510 }, { "epoch": 0.0827170725487696, "grad_norm": 0.1896403431892395, "learning_rate": 0.002, "loss": 2.5792, "step": 41520 }, { "epoch": 0.08273699477240852, "grad_norm": 0.1652923971414566, "learning_rate": 0.002, "loss": 2.5708, "step": 41530 }, { "epoch": 0.08275691699604742, "grad_norm": 0.1834820955991745, "learning_rate": 0.002, "loss": 2.5782, "step": 41540 }, { "epoch": 0.08277683921968634, "grad_norm": 0.1960153877735138, "learning_rate": 0.002, "loss": 2.5867, "step": 41550 }, { "epoch": 0.08279676144332526, "grad_norm": 0.17034310102462769, "learning_rate": 0.002, "loss": 2.5733, "step": 41560 }, { "epoch": 0.08281668366696417, "grad_norm": 0.15910650789737701, "learning_rate": 0.002, "loss": 2.568, "step": 41570 }, { "epoch": 0.08283660589060308, "grad_norm": 0.1747513860464096, "learning_rate": 0.002, "loss": 2.5694, "step": 41580 }, { "epoch": 0.082856528114242, "grad_norm": 0.1868322342634201, "learning_rate": 0.002, "loss": 2.5675, "step": 41590 }, { "epoch": 0.08287645033788091, "grad_norm": 0.1556849181652069, "learning_rate": 0.002, "loss": 2.5761, "step": 41600 }, { "epoch": 0.08289637256151983, "grad_norm": 0.19283948838710785, "learning_rate": 0.002, "loss": 2.5765, "step": 41610 }, { "epoch": 0.08291629478515875, "grad_norm": 0.17181354761123657, "learning_rate": 0.002, "loss": 2.5871, "step": 41620 }, { "epoch": 0.08293621700879765, "grad_norm": 0.17375899851322174, "learning_rate": 0.002, "loss": 2.565, "step": 41630 }, { "epoch": 0.08295613923243657, "grad_norm": 0.1875692903995514, "learning_rate": 0.002, "loss": 2.58, "step": 41640 }, { "epoch": 0.08297606145607549, "grad_norm": 0.16901710629463196, "learning_rate": 0.002, "loss": 2.5824, "step": 41650 }, { "epoch": 0.08299598367971439, "grad_norm": 0.15139825642108917, "learning_rate": 0.002, "loss": 2.5778, "step": 41660 }, { "epoch": 0.08301590590335331, "grad_norm": 0.18604117631912231, "learning_rate": 0.002, "loss": 2.5907, "step": 41670 }, { "epoch": 0.08303582812699223, "grad_norm": 0.14193220436573029, "learning_rate": 0.002, "loss": 2.5761, "step": 41680 }, { "epoch": 0.08305575035063113, "grad_norm": 0.1553369164466858, "learning_rate": 0.002, "loss": 2.575, "step": 41690 }, { "epoch": 0.08307567257427005, "grad_norm": 0.16608421504497528, "learning_rate": 0.002, "loss": 2.5769, "step": 41700 }, { "epoch": 0.08309559479790897, "grad_norm": 0.16806794703006744, "learning_rate": 0.002, "loss": 2.5753, "step": 41710 }, { "epoch": 0.08311551702154787, "grad_norm": 0.1520327478647232, "learning_rate": 0.002, "loss": 2.573, "step": 41720 }, { "epoch": 0.08313543924518679, "grad_norm": 0.1877509206533432, "learning_rate": 0.002, "loss": 2.5824, "step": 41730 }, { "epoch": 0.0831553614688257, "grad_norm": 0.1760701835155487, "learning_rate": 0.002, "loss": 2.5833, "step": 41740 }, { "epoch": 0.08317528369246462, "grad_norm": 0.16309762001037598, "learning_rate": 0.002, "loss": 2.569, "step": 41750 }, { "epoch": 0.08319520591610353, "grad_norm": 0.16481786966323853, "learning_rate": 0.002, "loss": 2.5755, "step": 41760 }, { "epoch": 0.08321512813974244, "grad_norm": 0.17795135080814362, "learning_rate": 0.002, "loss": 2.5642, "step": 41770 }, { "epoch": 0.08323505036338136, "grad_norm": 0.1842493712902069, "learning_rate": 0.002, "loss": 2.5618, "step": 41780 }, { "epoch": 0.08325497258702028, "grad_norm": 0.1427624374628067, "learning_rate": 0.002, "loss": 2.562, "step": 41790 }, { "epoch": 0.08327489481065918, "grad_norm": 0.15471352636814117, "learning_rate": 0.002, "loss": 2.5735, "step": 41800 }, { "epoch": 0.0832948170342981, "grad_norm": 0.15752826631069183, "learning_rate": 0.002, "loss": 2.5671, "step": 41810 }, { "epoch": 0.08331473925793702, "grad_norm": 0.18032915890216827, "learning_rate": 0.002, "loss": 2.577, "step": 41820 }, { "epoch": 0.08333466148157592, "grad_norm": 0.16342242062091827, "learning_rate": 0.002, "loss": 2.5784, "step": 41830 }, { "epoch": 0.08335458370521484, "grad_norm": 0.21455229818820953, "learning_rate": 0.002, "loss": 2.5694, "step": 41840 }, { "epoch": 0.08337450592885376, "grad_norm": 0.1821259707212448, "learning_rate": 0.002, "loss": 2.5776, "step": 41850 }, { "epoch": 0.08339442815249266, "grad_norm": 0.15690568089485168, "learning_rate": 0.002, "loss": 2.5883, "step": 41860 }, { "epoch": 0.08341435037613158, "grad_norm": 0.14644423127174377, "learning_rate": 0.002, "loss": 2.5667, "step": 41870 }, { "epoch": 0.0834342725997705, "grad_norm": 0.16151748597621918, "learning_rate": 0.002, "loss": 2.5873, "step": 41880 }, { "epoch": 0.0834541948234094, "grad_norm": 0.16012507677078247, "learning_rate": 0.002, "loss": 2.5767, "step": 41890 }, { "epoch": 0.08347411704704832, "grad_norm": 0.1755189150571823, "learning_rate": 0.002, "loss": 2.5771, "step": 41900 }, { "epoch": 0.08349403927068724, "grad_norm": 0.17789584398269653, "learning_rate": 0.002, "loss": 2.5617, "step": 41910 }, { "epoch": 0.08351396149432615, "grad_norm": 0.1783728450536728, "learning_rate": 0.002, "loss": 2.5779, "step": 41920 }, { "epoch": 0.08353388371796507, "grad_norm": 0.16703586280345917, "learning_rate": 0.002, "loss": 2.574, "step": 41930 }, { "epoch": 0.08355380594160398, "grad_norm": 0.17306968569755554, "learning_rate": 0.002, "loss": 2.5674, "step": 41940 }, { "epoch": 0.08357372816524289, "grad_norm": 0.19015857577323914, "learning_rate": 0.002, "loss": 2.5625, "step": 41950 }, { "epoch": 0.08359365038888181, "grad_norm": 0.20763497054576874, "learning_rate": 0.002, "loss": 2.5723, "step": 41960 }, { "epoch": 0.08361357261252073, "grad_norm": 0.1566556841135025, "learning_rate": 0.002, "loss": 2.5724, "step": 41970 }, { "epoch": 0.08363349483615963, "grad_norm": 0.16928696632385254, "learning_rate": 0.002, "loss": 2.5664, "step": 41980 }, { "epoch": 0.08365341705979855, "grad_norm": 0.20432908833026886, "learning_rate": 0.002, "loss": 2.5647, "step": 41990 }, { "epoch": 0.08367333928343745, "grad_norm": 0.1605004519224167, "learning_rate": 0.002, "loss": 2.5723, "step": 42000 }, { "epoch": 0.08369326150707637, "grad_norm": 0.23284311592578888, "learning_rate": 0.002, "loss": 2.5915, "step": 42010 }, { "epoch": 0.08371318373071529, "grad_norm": 0.19205297529697418, "learning_rate": 0.002, "loss": 2.5831, "step": 42020 }, { "epoch": 0.0837331059543542, "grad_norm": 0.16614706814289093, "learning_rate": 0.002, "loss": 2.5894, "step": 42030 }, { "epoch": 0.08375302817799311, "grad_norm": 0.19370459020137787, "learning_rate": 0.002, "loss": 2.5716, "step": 42040 }, { "epoch": 0.08377295040163203, "grad_norm": 0.1825285106897354, "learning_rate": 0.002, "loss": 2.5791, "step": 42050 }, { "epoch": 0.08379287262527094, "grad_norm": 0.4417503774166107, "learning_rate": 0.002, "loss": 2.5729, "step": 42060 }, { "epoch": 0.08381279484890986, "grad_norm": 0.16940993070602417, "learning_rate": 0.002, "loss": 2.582, "step": 42070 }, { "epoch": 0.08383271707254877, "grad_norm": 0.1613614857196808, "learning_rate": 0.002, "loss": 2.5765, "step": 42080 }, { "epoch": 0.08385263929618768, "grad_norm": 0.15020060539245605, "learning_rate": 0.002, "loss": 2.5843, "step": 42090 }, { "epoch": 0.0838725615198266, "grad_norm": 0.19892945885658264, "learning_rate": 0.002, "loss": 2.5748, "step": 42100 }, { "epoch": 0.08389248374346552, "grad_norm": 0.16421666741371155, "learning_rate": 0.002, "loss": 2.5767, "step": 42110 }, { "epoch": 0.08391240596710442, "grad_norm": 0.2091779112815857, "learning_rate": 0.002, "loss": 2.6005, "step": 42120 }, { "epoch": 0.08393232819074334, "grad_norm": 0.15970446169376373, "learning_rate": 0.002, "loss": 2.5769, "step": 42130 }, { "epoch": 0.08395225041438226, "grad_norm": 0.15973007678985596, "learning_rate": 0.002, "loss": 2.5839, "step": 42140 }, { "epoch": 0.08397217263802116, "grad_norm": 0.18020623922348022, "learning_rate": 0.002, "loss": 2.5759, "step": 42150 }, { "epoch": 0.08399209486166008, "grad_norm": 0.14705577492713928, "learning_rate": 0.002, "loss": 2.5602, "step": 42160 }, { "epoch": 0.084012017085299, "grad_norm": 0.18429142236709595, "learning_rate": 0.002, "loss": 2.5781, "step": 42170 }, { "epoch": 0.0840319393089379, "grad_norm": 0.17203140258789062, "learning_rate": 0.002, "loss": 2.5933, "step": 42180 }, { "epoch": 0.08405186153257682, "grad_norm": 0.168572798371315, "learning_rate": 0.002, "loss": 2.5893, "step": 42190 }, { "epoch": 0.08407178375621574, "grad_norm": 0.18564526736736298, "learning_rate": 0.002, "loss": 2.5848, "step": 42200 }, { "epoch": 0.08409170597985464, "grad_norm": 0.1538056582212448, "learning_rate": 0.002, "loss": 2.5595, "step": 42210 }, { "epoch": 0.08411162820349356, "grad_norm": 0.17515301704406738, "learning_rate": 0.002, "loss": 2.5812, "step": 42220 }, { "epoch": 0.08413155042713248, "grad_norm": 0.19240929186344147, "learning_rate": 0.002, "loss": 2.572, "step": 42230 }, { "epoch": 0.08415147265077139, "grad_norm": 0.16376498341560364, "learning_rate": 0.002, "loss": 2.5846, "step": 42240 }, { "epoch": 0.0841713948744103, "grad_norm": 0.18019671738147736, "learning_rate": 0.002, "loss": 2.5795, "step": 42250 }, { "epoch": 0.08419131709804921, "grad_norm": 0.16746696829795837, "learning_rate": 0.002, "loss": 2.573, "step": 42260 }, { "epoch": 0.08421123932168813, "grad_norm": 0.14729571342468262, "learning_rate": 0.002, "loss": 2.5948, "step": 42270 }, { "epoch": 0.08423116154532705, "grad_norm": 0.144574373960495, "learning_rate": 0.002, "loss": 2.5695, "step": 42280 }, { "epoch": 0.08425108376896595, "grad_norm": 0.19264130294322968, "learning_rate": 0.002, "loss": 2.5738, "step": 42290 }, { "epoch": 0.08427100599260487, "grad_norm": 0.17178137600421906, "learning_rate": 0.002, "loss": 2.5761, "step": 42300 }, { "epoch": 0.08429092821624379, "grad_norm": 0.15108804404735565, "learning_rate": 0.002, "loss": 2.561, "step": 42310 }, { "epoch": 0.08431085043988269, "grad_norm": 0.17616501450538635, "learning_rate": 0.002, "loss": 2.5912, "step": 42320 }, { "epoch": 0.08433077266352161, "grad_norm": 0.17007015645503998, "learning_rate": 0.002, "loss": 2.567, "step": 42330 }, { "epoch": 0.08435069488716053, "grad_norm": 0.175955668091774, "learning_rate": 0.002, "loss": 2.5754, "step": 42340 }, { "epoch": 0.08437061711079943, "grad_norm": 0.16903157532215118, "learning_rate": 0.002, "loss": 2.579, "step": 42350 }, { "epoch": 0.08439053933443835, "grad_norm": 0.1549229621887207, "learning_rate": 0.002, "loss": 2.5855, "step": 42360 }, { "epoch": 0.08441046155807727, "grad_norm": 0.1804344803094864, "learning_rate": 0.002, "loss": 2.5846, "step": 42370 }, { "epoch": 0.08443038378171618, "grad_norm": 0.16456778347492218, "learning_rate": 0.002, "loss": 2.5797, "step": 42380 }, { "epoch": 0.0844503060053551, "grad_norm": 0.14878234267234802, "learning_rate": 0.002, "loss": 2.5805, "step": 42390 }, { "epoch": 0.08447022822899401, "grad_norm": 0.19052770733833313, "learning_rate": 0.002, "loss": 2.5735, "step": 42400 }, { "epoch": 0.08449015045263292, "grad_norm": 0.14763140678405762, "learning_rate": 0.002, "loss": 2.5739, "step": 42410 }, { "epoch": 0.08451007267627184, "grad_norm": 0.17895464599132538, "learning_rate": 0.002, "loss": 2.5652, "step": 42420 }, { "epoch": 0.08452999489991075, "grad_norm": 0.18179889023303986, "learning_rate": 0.002, "loss": 2.5739, "step": 42430 }, { "epoch": 0.08454991712354966, "grad_norm": 0.19689536094665527, "learning_rate": 0.002, "loss": 2.5862, "step": 42440 }, { "epoch": 0.08456983934718858, "grad_norm": 0.15307100117206573, "learning_rate": 0.002, "loss": 2.5607, "step": 42450 }, { "epoch": 0.0845897615708275, "grad_norm": 0.1738438457250595, "learning_rate": 0.002, "loss": 2.5817, "step": 42460 }, { "epoch": 0.0846096837944664, "grad_norm": 0.1721062809228897, "learning_rate": 0.002, "loss": 2.5793, "step": 42470 }, { "epoch": 0.08462960601810532, "grad_norm": 0.193965882062912, "learning_rate": 0.002, "loss": 2.5852, "step": 42480 }, { "epoch": 0.08464952824174424, "grad_norm": 0.19091816246509552, "learning_rate": 0.002, "loss": 2.581, "step": 42490 }, { "epoch": 0.08466945046538314, "grad_norm": 0.1863948553800583, "learning_rate": 0.002, "loss": 2.5863, "step": 42500 }, { "epoch": 0.08468937268902206, "grad_norm": 0.15407592058181763, "learning_rate": 0.002, "loss": 2.5744, "step": 42510 }, { "epoch": 0.08470929491266097, "grad_norm": 0.1786046326160431, "learning_rate": 0.002, "loss": 2.5901, "step": 42520 }, { "epoch": 0.08472921713629988, "grad_norm": 0.15806959569454193, "learning_rate": 0.002, "loss": 2.5752, "step": 42530 }, { "epoch": 0.0847491393599388, "grad_norm": 0.1523788571357727, "learning_rate": 0.002, "loss": 2.5659, "step": 42540 }, { "epoch": 0.0847690615835777, "grad_norm": 0.17562368512153625, "learning_rate": 0.002, "loss": 2.5849, "step": 42550 }, { "epoch": 0.08478898380721663, "grad_norm": 0.2159397453069687, "learning_rate": 0.002, "loss": 2.5666, "step": 42560 }, { "epoch": 0.08480890603085554, "grad_norm": 0.14358648657798767, "learning_rate": 0.002, "loss": 2.5923, "step": 42570 }, { "epoch": 0.08482882825449445, "grad_norm": 0.18746145069599152, "learning_rate": 0.002, "loss": 2.5664, "step": 42580 }, { "epoch": 0.08484875047813337, "grad_norm": 0.1714947372674942, "learning_rate": 0.002, "loss": 2.577, "step": 42590 }, { "epoch": 0.08486867270177229, "grad_norm": 0.1820218414068222, "learning_rate": 0.002, "loss": 2.5752, "step": 42600 }, { "epoch": 0.08488859492541119, "grad_norm": 0.19009561836719513, "learning_rate": 0.002, "loss": 2.5725, "step": 42610 }, { "epoch": 0.08490851714905011, "grad_norm": 0.1641906350851059, "learning_rate": 0.002, "loss": 2.5702, "step": 42620 }, { "epoch": 0.08492843937268903, "grad_norm": 0.15879471600055695, "learning_rate": 0.002, "loss": 2.5719, "step": 42630 }, { "epoch": 0.08494836159632793, "grad_norm": 0.19014720618724823, "learning_rate": 0.002, "loss": 2.577, "step": 42640 }, { "epoch": 0.08496828381996685, "grad_norm": 0.16923880577087402, "learning_rate": 0.002, "loss": 2.5621, "step": 42650 }, { "epoch": 0.08498820604360577, "grad_norm": 0.18698494136333466, "learning_rate": 0.002, "loss": 2.5783, "step": 42660 }, { "epoch": 0.08500812826724467, "grad_norm": 0.1749420017004013, "learning_rate": 0.002, "loss": 2.5765, "step": 42670 }, { "epoch": 0.08502805049088359, "grad_norm": 0.17532093822956085, "learning_rate": 0.002, "loss": 2.5758, "step": 42680 }, { "epoch": 0.08504797271452251, "grad_norm": 0.1596604585647583, "learning_rate": 0.002, "loss": 2.5822, "step": 42690 }, { "epoch": 0.08506789493816141, "grad_norm": 0.1527462750673294, "learning_rate": 0.002, "loss": 2.5868, "step": 42700 }, { "epoch": 0.08508781716180033, "grad_norm": 0.17619654536247253, "learning_rate": 0.002, "loss": 2.59, "step": 42710 }, { "epoch": 0.08510773938543925, "grad_norm": 0.1740257441997528, "learning_rate": 0.002, "loss": 2.5661, "step": 42720 }, { "epoch": 0.08512766160907816, "grad_norm": 0.15925641357898712, "learning_rate": 0.002, "loss": 2.5638, "step": 42730 }, { "epoch": 0.08514758383271707, "grad_norm": 0.17838910222053528, "learning_rate": 0.002, "loss": 2.5774, "step": 42740 }, { "epoch": 0.08516750605635598, "grad_norm": 0.1583150327205658, "learning_rate": 0.002, "loss": 2.5723, "step": 42750 }, { "epoch": 0.0851874282799949, "grad_norm": 0.21564526855945587, "learning_rate": 0.002, "loss": 2.576, "step": 42760 }, { "epoch": 0.08520735050363382, "grad_norm": 0.1630212515592575, "learning_rate": 0.002, "loss": 2.5843, "step": 42770 }, { "epoch": 0.08522727272727272, "grad_norm": 0.17254911363124847, "learning_rate": 0.002, "loss": 2.5913, "step": 42780 }, { "epoch": 0.08524719495091164, "grad_norm": 0.18597069382667542, "learning_rate": 0.002, "loss": 2.57, "step": 42790 }, { "epoch": 0.08526711717455056, "grad_norm": 0.17483165860176086, "learning_rate": 0.002, "loss": 2.5823, "step": 42800 }, { "epoch": 0.08528703939818946, "grad_norm": 0.16459515690803528, "learning_rate": 0.002, "loss": 2.576, "step": 42810 }, { "epoch": 0.08530696162182838, "grad_norm": 0.14924678206443787, "learning_rate": 0.002, "loss": 2.5731, "step": 42820 }, { "epoch": 0.0853268838454673, "grad_norm": 0.15100827813148499, "learning_rate": 0.002, "loss": 2.5748, "step": 42830 }, { "epoch": 0.0853468060691062, "grad_norm": 0.18305757641792297, "learning_rate": 0.002, "loss": 2.5736, "step": 42840 }, { "epoch": 0.08536672829274512, "grad_norm": 0.1681728959083557, "learning_rate": 0.002, "loss": 2.5859, "step": 42850 }, { "epoch": 0.08538665051638404, "grad_norm": 0.18250055611133575, "learning_rate": 0.002, "loss": 2.5713, "step": 42860 }, { "epoch": 0.08540657274002295, "grad_norm": 0.20653879642486572, "learning_rate": 0.002, "loss": 2.5814, "step": 42870 }, { "epoch": 0.08542649496366186, "grad_norm": 0.15297305583953857, "learning_rate": 0.002, "loss": 2.5888, "step": 42880 }, { "epoch": 0.08544641718730078, "grad_norm": 0.17973937094211578, "learning_rate": 0.002, "loss": 2.5646, "step": 42890 }, { "epoch": 0.08546633941093969, "grad_norm": 0.17516006529331207, "learning_rate": 0.002, "loss": 2.5725, "step": 42900 }, { "epoch": 0.0854862616345786, "grad_norm": 0.17235930263996124, "learning_rate": 0.002, "loss": 2.5928, "step": 42910 }, { "epoch": 0.08550618385821752, "grad_norm": 0.15944723784923553, "learning_rate": 0.002, "loss": 2.575, "step": 42920 }, { "epoch": 0.08552610608185643, "grad_norm": 0.19784213602542877, "learning_rate": 0.002, "loss": 2.5826, "step": 42930 }, { "epoch": 0.08554602830549535, "grad_norm": 0.17806920409202576, "learning_rate": 0.002, "loss": 2.5683, "step": 42940 }, { "epoch": 0.08556595052913427, "grad_norm": 0.20488448441028595, "learning_rate": 0.002, "loss": 2.5768, "step": 42950 }, { "epoch": 0.08558587275277317, "grad_norm": 0.15621811151504517, "learning_rate": 0.002, "loss": 2.576, "step": 42960 }, { "epoch": 0.08560579497641209, "grad_norm": 0.1909632384777069, "learning_rate": 0.002, "loss": 2.5688, "step": 42970 }, { "epoch": 0.08562571720005101, "grad_norm": 0.1810688078403473, "learning_rate": 0.002, "loss": 2.5882, "step": 42980 }, { "epoch": 0.08564563942368991, "grad_norm": 0.1744854599237442, "learning_rate": 0.002, "loss": 2.5792, "step": 42990 }, { "epoch": 0.08566556164732883, "grad_norm": 0.19680936634540558, "learning_rate": 0.002, "loss": 2.5792, "step": 43000 }, { "epoch": 0.08568548387096774, "grad_norm": 0.1836700737476349, "learning_rate": 0.002, "loss": 2.5648, "step": 43010 }, { "epoch": 0.08570540609460665, "grad_norm": 0.15522703528404236, "learning_rate": 0.002, "loss": 2.5687, "step": 43020 }, { "epoch": 0.08572532831824557, "grad_norm": 0.18313778936862946, "learning_rate": 0.002, "loss": 2.5736, "step": 43030 }, { "epoch": 0.08574525054188448, "grad_norm": 0.19026120007038116, "learning_rate": 0.002, "loss": 2.59, "step": 43040 }, { "epoch": 0.0857651727655234, "grad_norm": 0.16608978807926178, "learning_rate": 0.002, "loss": 2.5914, "step": 43050 }, { "epoch": 0.08578509498916231, "grad_norm": 0.16401328146457672, "learning_rate": 0.002, "loss": 2.5813, "step": 43060 }, { "epoch": 0.08580501721280122, "grad_norm": 0.18595324456691742, "learning_rate": 0.002, "loss": 2.5883, "step": 43070 }, { "epoch": 0.08582493943644014, "grad_norm": 0.15220053493976593, "learning_rate": 0.002, "loss": 2.5715, "step": 43080 }, { "epoch": 0.08584486166007906, "grad_norm": 0.1790962666273117, "learning_rate": 0.002, "loss": 2.5803, "step": 43090 }, { "epoch": 0.08586478388371796, "grad_norm": 0.21408692002296448, "learning_rate": 0.002, "loss": 2.5833, "step": 43100 }, { "epoch": 0.08588470610735688, "grad_norm": 0.15141065418720245, "learning_rate": 0.002, "loss": 2.5607, "step": 43110 }, { "epoch": 0.0859046283309958, "grad_norm": 0.1775282472372055, "learning_rate": 0.002, "loss": 2.5558, "step": 43120 }, { "epoch": 0.0859245505546347, "grad_norm": 0.16643986105918884, "learning_rate": 0.002, "loss": 2.5833, "step": 43130 }, { "epoch": 0.08594447277827362, "grad_norm": 0.16095073521137238, "learning_rate": 0.002, "loss": 2.5672, "step": 43140 }, { "epoch": 0.08596439500191254, "grad_norm": 0.19378633797168732, "learning_rate": 0.002, "loss": 2.5684, "step": 43150 }, { "epoch": 0.08598431722555144, "grad_norm": 0.18285906314849854, "learning_rate": 0.002, "loss": 2.5698, "step": 43160 }, { "epoch": 0.08600423944919036, "grad_norm": 0.20013853907585144, "learning_rate": 0.002, "loss": 2.5726, "step": 43170 }, { "epoch": 0.08602416167282928, "grad_norm": 0.1585560441017151, "learning_rate": 0.002, "loss": 2.5796, "step": 43180 }, { "epoch": 0.08604408389646818, "grad_norm": 0.19720718264579773, "learning_rate": 0.002, "loss": 2.5678, "step": 43190 }, { "epoch": 0.0860640061201071, "grad_norm": 0.1506858915090561, "learning_rate": 0.002, "loss": 2.5876, "step": 43200 }, { "epoch": 0.08608392834374602, "grad_norm": 0.1682663857936859, "learning_rate": 0.002, "loss": 2.5846, "step": 43210 }, { "epoch": 0.08610385056738493, "grad_norm": 0.15738479793071747, "learning_rate": 0.002, "loss": 2.5978, "step": 43220 }, { "epoch": 0.08612377279102384, "grad_norm": 0.17376798391342163, "learning_rate": 0.002, "loss": 2.5791, "step": 43230 }, { "epoch": 0.08614369501466276, "grad_norm": 0.17977489531040192, "learning_rate": 0.002, "loss": 2.5611, "step": 43240 }, { "epoch": 0.08616361723830167, "grad_norm": 0.18008194863796234, "learning_rate": 0.002, "loss": 2.5826, "step": 43250 }, { "epoch": 0.08618353946194059, "grad_norm": 0.1852930635213852, "learning_rate": 0.002, "loss": 2.5942, "step": 43260 }, { "epoch": 0.08620346168557949, "grad_norm": 0.23120594024658203, "learning_rate": 0.002, "loss": 2.5847, "step": 43270 }, { "epoch": 0.08622338390921841, "grad_norm": 0.1654234379529953, "learning_rate": 0.002, "loss": 2.5819, "step": 43280 }, { "epoch": 0.08624330613285733, "grad_norm": 0.17878827452659607, "learning_rate": 0.002, "loss": 2.5777, "step": 43290 }, { "epoch": 0.08626322835649623, "grad_norm": 0.18044413626194, "learning_rate": 0.002, "loss": 2.5702, "step": 43300 }, { "epoch": 0.08628315058013515, "grad_norm": 0.14692750573158264, "learning_rate": 0.002, "loss": 2.5668, "step": 43310 }, { "epoch": 0.08630307280377407, "grad_norm": 0.16995760798454285, "learning_rate": 0.002, "loss": 2.5739, "step": 43320 }, { "epoch": 0.08632299502741297, "grad_norm": 0.2041388601064682, "learning_rate": 0.002, "loss": 2.586, "step": 43330 }, { "epoch": 0.08634291725105189, "grad_norm": 0.1506902277469635, "learning_rate": 0.002, "loss": 2.598, "step": 43340 }, { "epoch": 0.08636283947469081, "grad_norm": 0.18522921204566956, "learning_rate": 0.002, "loss": 2.5689, "step": 43350 }, { "epoch": 0.08638276169832972, "grad_norm": 0.14556948840618134, "learning_rate": 0.002, "loss": 2.5716, "step": 43360 }, { "epoch": 0.08640268392196863, "grad_norm": 0.2128453105688095, "learning_rate": 0.002, "loss": 2.567, "step": 43370 }, { "epoch": 0.08642260614560755, "grad_norm": 0.16301943361759186, "learning_rate": 0.002, "loss": 2.579, "step": 43380 }, { "epoch": 0.08644252836924646, "grad_norm": 0.18786656856536865, "learning_rate": 0.002, "loss": 2.5711, "step": 43390 }, { "epoch": 0.08646245059288538, "grad_norm": 0.15266460180282593, "learning_rate": 0.002, "loss": 2.5963, "step": 43400 }, { "epoch": 0.0864823728165243, "grad_norm": 0.16996356844902039, "learning_rate": 0.002, "loss": 2.5705, "step": 43410 }, { "epoch": 0.0865022950401632, "grad_norm": 0.1800781786441803, "learning_rate": 0.002, "loss": 2.5918, "step": 43420 }, { "epoch": 0.08652221726380212, "grad_norm": 0.1643417328596115, "learning_rate": 0.002, "loss": 2.5676, "step": 43430 }, { "epoch": 0.08654213948744104, "grad_norm": 0.1614031195640564, "learning_rate": 0.002, "loss": 2.5753, "step": 43440 }, { "epoch": 0.08656206171107994, "grad_norm": 0.19590263068675995, "learning_rate": 0.002, "loss": 2.5912, "step": 43450 }, { "epoch": 0.08658198393471886, "grad_norm": 0.16684475541114807, "learning_rate": 0.002, "loss": 2.5743, "step": 43460 }, { "epoch": 0.08660190615835778, "grad_norm": 0.24353648722171783, "learning_rate": 0.002, "loss": 2.5883, "step": 43470 }, { "epoch": 0.08662182838199668, "grad_norm": 0.1455281674861908, "learning_rate": 0.002, "loss": 2.5687, "step": 43480 }, { "epoch": 0.0866417506056356, "grad_norm": 0.13912884891033173, "learning_rate": 0.002, "loss": 2.5818, "step": 43490 }, { "epoch": 0.08666167282927452, "grad_norm": 0.16923274099826813, "learning_rate": 0.002, "loss": 2.5849, "step": 43500 }, { "epoch": 0.08668159505291342, "grad_norm": 0.18142184615135193, "learning_rate": 0.002, "loss": 2.5772, "step": 43510 }, { "epoch": 0.08670151727655234, "grad_norm": 0.18320435285568237, "learning_rate": 0.002, "loss": 2.571, "step": 43520 }, { "epoch": 0.08672143950019125, "grad_norm": 0.16996924579143524, "learning_rate": 0.002, "loss": 2.5758, "step": 43530 }, { "epoch": 0.08674136172383017, "grad_norm": 0.19260311126708984, "learning_rate": 0.002, "loss": 2.5831, "step": 43540 }, { "epoch": 0.08676128394746908, "grad_norm": 0.1566590517759323, "learning_rate": 0.002, "loss": 2.5714, "step": 43550 }, { "epoch": 0.08678120617110799, "grad_norm": 0.19536881148815155, "learning_rate": 0.002, "loss": 2.5851, "step": 43560 }, { "epoch": 0.08680112839474691, "grad_norm": 0.2025947868824005, "learning_rate": 0.002, "loss": 2.5702, "step": 43570 }, { "epoch": 0.08682105061838583, "grad_norm": 0.2009185254573822, "learning_rate": 0.002, "loss": 2.583, "step": 43580 }, { "epoch": 0.08684097284202473, "grad_norm": 0.17378991842269897, "learning_rate": 0.002, "loss": 2.5673, "step": 43590 }, { "epoch": 0.08686089506566365, "grad_norm": 0.165401428937912, "learning_rate": 0.002, "loss": 2.586, "step": 43600 }, { "epoch": 0.08688081728930257, "grad_norm": 0.1555377095937729, "learning_rate": 0.002, "loss": 2.5777, "step": 43610 }, { "epoch": 0.08690073951294147, "grad_norm": 0.19100189208984375, "learning_rate": 0.002, "loss": 2.5703, "step": 43620 }, { "epoch": 0.08692066173658039, "grad_norm": 0.1609416902065277, "learning_rate": 0.002, "loss": 2.5734, "step": 43630 }, { "epoch": 0.08694058396021931, "grad_norm": 0.17117443680763245, "learning_rate": 0.002, "loss": 2.5748, "step": 43640 }, { "epoch": 0.08696050618385821, "grad_norm": 0.21471920609474182, "learning_rate": 0.002, "loss": 2.585, "step": 43650 }, { "epoch": 0.08698042840749713, "grad_norm": 0.16195712983608246, "learning_rate": 0.002, "loss": 2.5767, "step": 43660 }, { "epoch": 0.08700035063113605, "grad_norm": 0.1948430836200714, "learning_rate": 0.002, "loss": 2.5914, "step": 43670 }, { "epoch": 0.08702027285477495, "grad_norm": 0.16427144408226013, "learning_rate": 0.002, "loss": 2.5809, "step": 43680 }, { "epoch": 0.08704019507841387, "grad_norm": 0.19303889572620392, "learning_rate": 0.002, "loss": 2.584, "step": 43690 }, { "epoch": 0.08706011730205279, "grad_norm": 0.17969726026058197, "learning_rate": 0.002, "loss": 2.5824, "step": 43700 }, { "epoch": 0.0870800395256917, "grad_norm": 0.1908373087644577, "learning_rate": 0.002, "loss": 2.5858, "step": 43710 }, { "epoch": 0.08709996174933061, "grad_norm": 0.1874733418226242, "learning_rate": 0.002, "loss": 2.5761, "step": 43720 }, { "epoch": 0.08711988397296953, "grad_norm": 0.17261409759521484, "learning_rate": 0.002, "loss": 2.5712, "step": 43730 }, { "epoch": 0.08713980619660844, "grad_norm": 0.1964954435825348, "learning_rate": 0.002, "loss": 2.5684, "step": 43740 }, { "epoch": 0.08715972842024736, "grad_norm": 0.15889064967632294, "learning_rate": 0.002, "loss": 2.5764, "step": 43750 }, { "epoch": 0.08717965064388626, "grad_norm": 0.19265460968017578, "learning_rate": 0.002, "loss": 2.5749, "step": 43760 }, { "epoch": 0.08719957286752518, "grad_norm": 0.16910572350025177, "learning_rate": 0.002, "loss": 2.5759, "step": 43770 }, { "epoch": 0.0872194950911641, "grad_norm": 0.18117696046829224, "learning_rate": 0.002, "loss": 2.5643, "step": 43780 }, { "epoch": 0.087239417314803, "grad_norm": 0.15434706211090088, "learning_rate": 0.002, "loss": 2.5737, "step": 43790 }, { "epoch": 0.08725933953844192, "grad_norm": 0.1623864322900772, "learning_rate": 0.002, "loss": 2.586, "step": 43800 }, { "epoch": 0.08727926176208084, "grad_norm": 0.23964394629001617, "learning_rate": 0.002, "loss": 2.5874, "step": 43810 }, { "epoch": 0.08729918398571974, "grad_norm": 0.16633263230323792, "learning_rate": 0.002, "loss": 2.5741, "step": 43820 }, { "epoch": 0.08731910620935866, "grad_norm": 0.21269549429416656, "learning_rate": 0.002, "loss": 2.5811, "step": 43830 }, { "epoch": 0.08733902843299758, "grad_norm": 0.16278031468391418, "learning_rate": 0.002, "loss": 2.597, "step": 43840 }, { "epoch": 0.08735895065663649, "grad_norm": 0.17258605360984802, "learning_rate": 0.002, "loss": 2.5852, "step": 43850 }, { "epoch": 0.0873788728802754, "grad_norm": 0.1778136044740677, "learning_rate": 0.002, "loss": 2.5781, "step": 43860 }, { "epoch": 0.08739879510391432, "grad_norm": 0.18961797654628754, "learning_rate": 0.002, "loss": 2.5793, "step": 43870 }, { "epoch": 0.08741871732755323, "grad_norm": 0.146419957280159, "learning_rate": 0.002, "loss": 2.5739, "step": 43880 }, { "epoch": 0.08743863955119215, "grad_norm": 0.17313969135284424, "learning_rate": 0.002, "loss": 2.5851, "step": 43890 }, { "epoch": 0.08745856177483106, "grad_norm": 0.2235182821750641, "learning_rate": 0.002, "loss": 2.5935, "step": 43900 }, { "epoch": 0.08747848399846997, "grad_norm": 0.17769016325473785, "learning_rate": 0.002, "loss": 2.5782, "step": 43910 }, { "epoch": 0.08749840622210889, "grad_norm": 0.1431354582309723, "learning_rate": 0.002, "loss": 2.5858, "step": 43920 }, { "epoch": 0.0875183284457478, "grad_norm": 0.1702062040567398, "learning_rate": 0.002, "loss": 2.5909, "step": 43930 }, { "epoch": 0.08753825066938671, "grad_norm": 0.16506820917129517, "learning_rate": 0.002, "loss": 2.58, "step": 43940 }, { "epoch": 0.08755817289302563, "grad_norm": 0.14185109734535217, "learning_rate": 0.002, "loss": 2.5783, "step": 43950 }, { "epoch": 0.08757809511666455, "grad_norm": 0.21042877435684204, "learning_rate": 0.002, "loss": 2.5698, "step": 43960 }, { "epoch": 0.08759801734030345, "grad_norm": 0.1655174046754837, "learning_rate": 0.002, "loss": 2.5792, "step": 43970 }, { "epoch": 0.08761793956394237, "grad_norm": 0.1808571070432663, "learning_rate": 0.002, "loss": 2.5832, "step": 43980 }, { "epoch": 0.08763786178758129, "grad_norm": 0.15937113761901855, "learning_rate": 0.002, "loss": 2.5779, "step": 43990 }, { "epoch": 0.0876577840112202, "grad_norm": 0.15229950845241547, "learning_rate": 0.002, "loss": 2.5791, "step": 44000 }, { "epoch": 0.08767770623485911, "grad_norm": 0.18352101743221283, "learning_rate": 0.002, "loss": 2.5896, "step": 44010 }, { "epoch": 0.08769762845849802, "grad_norm": 0.14784398674964905, "learning_rate": 0.002, "loss": 2.581, "step": 44020 }, { "epoch": 0.08771755068213694, "grad_norm": 0.16215384006500244, "learning_rate": 0.002, "loss": 2.59, "step": 44030 }, { "epoch": 0.08773747290577585, "grad_norm": 0.188812717795372, "learning_rate": 0.002, "loss": 2.5888, "step": 44040 }, { "epoch": 0.08775739512941476, "grad_norm": 0.19262129068374634, "learning_rate": 0.002, "loss": 2.5755, "step": 44050 }, { "epoch": 0.08777731735305368, "grad_norm": 0.15997402369976044, "learning_rate": 0.002, "loss": 2.5751, "step": 44060 }, { "epoch": 0.0877972395766926, "grad_norm": 0.15330804884433746, "learning_rate": 0.002, "loss": 2.5669, "step": 44070 }, { "epoch": 0.0878171618003315, "grad_norm": 0.21136219799518585, "learning_rate": 0.002, "loss": 2.5933, "step": 44080 }, { "epoch": 0.08783708402397042, "grad_norm": 0.16936726868152618, "learning_rate": 0.002, "loss": 2.5821, "step": 44090 }, { "epoch": 0.08785700624760934, "grad_norm": 0.23041938245296478, "learning_rate": 0.002, "loss": 2.5816, "step": 44100 }, { "epoch": 0.08787692847124824, "grad_norm": 0.1739884614944458, "learning_rate": 0.002, "loss": 2.5718, "step": 44110 }, { "epoch": 0.08789685069488716, "grad_norm": 0.19089189171791077, "learning_rate": 0.002, "loss": 2.5664, "step": 44120 }, { "epoch": 0.08791677291852608, "grad_norm": 0.1630881279706955, "learning_rate": 0.002, "loss": 2.5703, "step": 44130 }, { "epoch": 0.08793669514216498, "grad_norm": 0.22519239783287048, "learning_rate": 0.002, "loss": 2.5924, "step": 44140 }, { "epoch": 0.0879566173658039, "grad_norm": 0.15205128490924835, "learning_rate": 0.002, "loss": 2.5687, "step": 44150 }, { "epoch": 0.08797653958944282, "grad_norm": 0.15229380130767822, "learning_rate": 0.002, "loss": 2.5685, "step": 44160 }, { "epoch": 0.08799646181308172, "grad_norm": 0.1841769814491272, "learning_rate": 0.002, "loss": 2.5929, "step": 44170 }, { "epoch": 0.08801638403672064, "grad_norm": 0.18008606135845184, "learning_rate": 0.002, "loss": 2.5647, "step": 44180 }, { "epoch": 0.08803630626035956, "grad_norm": 0.15809224545955658, "learning_rate": 0.002, "loss": 2.5888, "step": 44190 }, { "epoch": 0.08805622848399847, "grad_norm": 0.17629599571228027, "learning_rate": 0.002, "loss": 2.5923, "step": 44200 }, { "epoch": 0.08807615070763739, "grad_norm": 0.16197514533996582, "learning_rate": 0.002, "loss": 2.5586, "step": 44210 }, { "epoch": 0.0880960729312763, "grad_norm": 0.17864692211151123, "learning_rate": 0.002, "loss": 2.5852, "step": 44220 }, { "epoch": 0.08811599515491521, "grad_norm": 0.17397525906562805, "learning_rate": 0.002, "loss": 2.5819, "step": 44230 }, { "epoch": 0.08813591737855413, "grad_norm": 0.14810006320476532, "learning_rate": 0.002, "loss": 2.5526, "step": 44240 }, { "epoch": 0.08815583960219305, "grad_norm": 0.17273394763469696, "learning_rate": 0.002, "loss": 2.5844, "step": 44250 }, { "epoch": 0.08817576182583195, "grad_norm": 0.15179014205932617, "learning_rate": 0.002, "loss": 2.573, "step": 44260 }, { "epoch": 0.08819568404947087, "grad_norm": 0.1823364943265915, "learning_rate": 0.002, "loss": 2.5892, "step": 44270 }, { "epoch": 0.08821560627310977, "grad_norm": 0.18980571627616882, "learning_rate": 0.002, "loss": 2.5774, "step": 44280 }, { "epoch": 0.08823552849674869, "grad_norm": 0.16420529782772064, "learning_rate": 0.002, "loss": 2.5708, "step": 44290 }, { "epoch": 0.08825545072038761, "grad_norm": 0.17302216589450836, "learning_rate": 0.002, "loss": 2.5905, "step": 44300 }, { "epoch": 0.08827537294402651, "grad_norm": 0.16158264875411987, "learning_rate": 0.002, "loss": 2.5748, "step": 44310 }, { "epoch": 0.08829529516766543, "grad_norm": 0.14489516615867615, "learning_rate": 0.002, "loss": 2.5666, "step": 44320 }, { "epoch": 0.08831521739130435, "grad_norm": 0.1691456139087677, "learning_rate": 0.002, "loss": 2.5852, "step": 44330 }, { "epoch": 0.08833513961494326, "grad_norm": 0.1814192831516266, "learning_rate": 0.002, "loss": 2.5756, "step": 44340 }, { "epoch": 0.08835506183858217, "grad_norm": 0.17790384590625763, "learning_rate": 0.002, "loss": 2.5753, "step": 44350 }, { "epoch": 0.08837498406222109, "grad_norm": 0.21245290338993073, "learning_rate": 0.002, "loss": 2.5683, "step": 44360 }, { "epoch": 0.08839490628586, "grad_norm": 0.1781073361635208, "learning_rate": 0.002, "loss": 2.5793, "step": 44370 }, { "epoch": 0.08841482850949892, "grad_norm": 0.25543245673179626, "learning_rate": 0.002, "loss": 2.589, "step": 44380 }, { "epoch": 0.08843475073313783, "grad_norm": 0.15219087898731232, "learning_rate": 0.002, "loss": 2.5792, "step": 44390 }, { "epoch": 0.08845467295677674, "grad_norm": 0.16254964470863342, "learning_rate": 0.002, "loss": 2.5807, "step": 44400 }, { "epoch": 0.08847459518041566, "grad_norm": 0.189755380153656, "learning_rate": 0.002, "loss": 2.5872, "step": 44410 }, { "epoch": 0.08849451740405458, "grad_norm": 0.20316004753112793, "learning_rate": 0.002, "loss": 2.5769, "step": 44420 }, { "epoch": 0.08851443962769348, "grad_norm": 0.1696532517671585, "learning_rate": 0.002, "loss": 2.5772, "step": 44430 }, { "epoch": 0.0885343618513324, "grad_norm": 0.15426132082939148, "learning_rate": 0.002, "loss": 2.5828, "step": 44440 }, { "epoch": 0.08855428407497132, "grad_norm": 0.20176248252391815, "learning_rate": 0.002, "loss": 2.5904, "step": 44450 }, { "epoch": 0.08857420629861022, "grad_norm": 0.14545126259326935, "learning_rate": 0.002, "loss": 2.5814, "step": 44460 }, { "epoch": 0.08859412852224914, "grad_norm": 0.15016813576221466, "learning_rate": 0.002, "loss": 2.574, "step": 44470 }, { "epoch": 0.08861405074588806, "grad_norm": 0.19239136576652527, "learning_rate": 0.002, "loss": 2.5639, "step": 44480 }, { "epoch": 0.08863397296952696, "grad_norm": 0.1642952412366867, "learning_rate": 0.002, "loss": 2.5767, "step": 44490 }, { "epoch": 0.08865389519316588, "grad_norm": 0.15782445669174194, "learning_rate": 0.002, "loss": 2.5814, "step": 44500 }, { "epoch": 0.0886738174168048, "grad_norm": 0.19180163741111755, "learning_rate": 0.002, "loss": 2.5705, "step": 44510 }, { "epoch": 0.0886937396404437, "grad_norm": 0.16238734126091003, "learning_rate": 0.002, "loss": 2.5821, "step": 44520 }, { "epoch": 0.08871366186408262, "grad_norm": 0.15378254652023315, "learning_rate": 0.002, "loss": 2.5673, "step": 44530 }, { "epoch": 0.08873358408772153, "grad_norm": 0.20071035623550415, "learning_rate": 0.002, "loss": 2.5811, "step": 44540 }, { "epoch": 0.08875350631136045, "grad_norm": 0.15988102555274963, "learning_rate": 0.002, "loss": 2.5851, "step": 44550 }, { "epoch": 0.08877342853499937, "grad_norm": 0.19766917824745178, "learning_rate": 0.002, "loss": 2.5754, "step": 44560 }, { "epoch": 0.08879335075863827, "grad_norm": 0.17521192133426666, "learning_rate": 0.002, "loss": 2.5803, "step": 44570 }, { "epoch": 0.08881327298227719, "grad_norm": 0.17127719521522522, "learning_rate": 0.002, "loss": 2.5792, "step": 44580 }, { "epoch": 0.08883319520591611, "grad_norm": 0.2024412900209427, "learning_rate": 0.002, "loss": 2.5586, "step": 44590 }, { "epoch": 0.08885311742955501, "grad_norm": 0.17853526771068573, "learning_rate": 0.002, "loss": 2.5705, "step": 44600 }, { "epoch": 0.08887303965319393, "grad_norm": 0.1593686193227768, "learning_rate": 0.002, "loss": 2.5777, "step": 44610 }, { "epoch": 0.08889296187683285, "grad_norm": 0.16812120378017426, "learning_rate": 0.002, "loss": 2.5849, "step": 44620 }, { "epoch": 0.08891288410047175, "grad_norm": 0.1729382425546646, "learning_rate": 0.002, "loss": 2.5786, "step": 44630 }, { "epoch": 0.08893280632411067, "grad_norm": 0.1440718173980713, "learning_rate": 0.002, "loss": 2.5663, "step": 44640 }, { "epoch": 0.08895272854774959, "grad_norm": 0.17182126641273499, "learning_rate": 0.002, "loss": 2.5797, "step": 44650 }, { "epoch": 0.0889726507713885, "grad_norm": 0.16428664326667786, "learning_rate": 0.002, "loss": 2.5812, "step": 44660 }, { "epoch": 0.08899257299502741, "grad_norm": 0.189707413315773, "learning_rate": 0.002, "loss": 2.5698, "step": 44670 }, { "epoch": 0.08901249521866633, "grad_norm": 0.16791832447052002, "learning_rate": 0.002, "loss": 2.5938, "step": 44680 }, { "epoch": 0.08903241744230524, "grad_norm": 0.16589327156543732, "learning_rate": 0.002, "loss": 2.5762, "step": 44690 }, { "epoch": 0.08905233966594416, "grad_norm": 0.21372953057289124, "learning_rate": 0.002, "loss": 2.568, "step": 44700 }, { "epoch": 0.08907226188958307, "grad_norm": 0.20021529495716095, "learning_rate": 0.002, "loss": 2.5866, "step": 44710 }, { "epoch": 0.08909218411322198, "grad_norm": 0.1521144062280655, "learning_rate": 0.002, "loss": 2.5766, "step": 44720 }, { "epoch": 0.0891121063368609, "grad_norm": 0.15721534192562103, "learning_rate": 0.002, "loss": 2.5676, "step": 44730 }, { "epoch": 0.08913202856049982, "grad_norm": 0.1482132226228714, "learning_rate": 0.002, "loss": 2.5657, "step": 44740 }, { "epoch": 0.08915195078413872, "grad_norm": 0.21298162639141083, "learning_rate": 0.002, "loss": 2.5708, "step": 44750 }, { "epoch": 0.08917187300777764, "grad_norm": 0.17246027290821075, "learning_rate": 0.002, "loss": 2.5656, "step": 44760 }, { "epoch": 0.08919179523141654, "grad_norm": 0.17844916880130768, "learning_rate": 0.002, "loss": 2.5777, "step": 44770 }, { "epoch": 0.08921171745505546, "grad_norm": 0.19311346113681793, "learning_rate": 0.002, "loss": 2.5699, "step": 44780 }, { "epoch": 0.08923163967869438, "grad_norm": 0.16149890422821045, "learning_rate": 0.002, "loss": 2.5771, "step": 44790 }, { "epoch": 0.08925156190233328, "grad_norm": 0.182280033826828, "learning_rate": 0.002, "loss": 2.5967, "step": 44800 }, { "epoch": 0.0892714841259722, "grad_norm": 0.16486385464668274, "learning_rate": 0.002, "loss": 2.5748, "step": 44810 }, { "epoch": 0.08929140634961112, "grad_norm": 0.16857601702213287, "learning_rate": 0.002, "loss": 2.5836, "step": 44820 }, { "epoch": 0.08931132857325003, "grad_norm": 0.16172975301742554, "learning_rate": 0.002, "loss": 2.5672, "step": 44830 }, { "epoch": 0.08933125079688894, "grad_norm": 0.18767642974853516, "learning_rate": 0.002, "loss": 2.5834, "step": 44840 }, { "epoch": 0.08935117302052786, "grad_norm": 0.1930336058139801, "learning_rate": 0.002, "loss": 2.5829, "step": 44850 }, { "epoch": 0.08937109524416677, "grad_norm": 0.1758536994457245, "learning_rate": 0.002, "loss": 2.5855, "step": 44860 }, { "epoch": 0.08939101746780569, "grad_norm": 0.14445695281028748, "learning_rate": 0.002, "loss": 2.5737, "step": 44870 }, { "epoch": 0.0894109396914446, "grad_norm": 0.16918794810771942, "learning_rate": 0.002, "loss": 2.5802, "step": 44880 }, { "epoch": 0.08943086191508351, "grad_norm": 0.1712140440940857, "learning_rate": 0.002, "loss": 2.5715, "step": 44890 }, { "epoch": 0.08945078413872243, "grad_norm": 0.19290558993816376, "learning_rate": 0.002, "loss": 2.5668, "step": 44900 }, { "epoch": 0.08947070636236135, "grad_norm": 0.17013874650001526, "learning_rate": 0.002, "loss": 2.57, "step": 44910 }, { "epoch": 0.08949062858600025, "grad_norm": 0.14078092575073242, "learning_rate": 0.002, "loss": 2.5582, "step": 44920 }, { "epoch": 0.08951055080963917, "grad_norm": 0.16397666931152344, "learning_rate": 0.002, "loss": 2.573, "step": 44930 }, { "epoch": 0.08953047303327809, "grad_norm": 0.19976529479026794, "learning_rate": 0.002, "loss": 2.5751, "step": 44940 }, { "epoch": 0.08955039525691699, "grad_norm": 0.16253620386123657, "learning_rate": 0.002, "loss": 2.5695, "step": 44950 }, { "epoch": 0.08957031748055591, "grad_norm": 0.16687104105949402, "learning_rate": 0.002, "loss": 2.5723, "step": 44960 }, { "epoch": 0.08959023970419483, "grad_norm": 0.19146603345870972, "learning_rate": 0.002, "loss": 2.5862, "step": 44970 }, { "epoch": 0.08961016192783373, "grad_norm": 0.1606254279613495, "learning_rate": 0.002, "loss": 2.5773, "step": 44980 }, { "epoch": 0.08963008415147265, "grad_norm": 0.19848112761974335, "learning_rate": 0.002, "loss": 2.5717, "step": 44990 }, { "epoch": 0.08965000637511157, "grad_norm": 0.22735978662967682, "learning_rate": 0.002, "loss": 2.563, "step": 45000 }, { "epoch": 0.08966992859875048, "grad_norm": 0.1563633382320404, "learning_rate": 0.002, "loss": 2.5986, "step": 45010 }, { "epoch": 0.0896898508223894, "grad_norm": 0.1659681349992752, "learning_rate": 0.002, "loss": 2.5922, "step": 45020 }, { "epoch": 0.0897097730460283, "grad_norm": 0.17771652340888977, "learning_rate": 0.002, "loss": 2.5773, "step": 45030 }, { "epoch": 0.08972969526966722, "grad_norm": 0.1869574338197708, "learning_rate": 0.002, "loss": 2.5816, "step": 45040 }, { "epoch": 0.08974961749330614, "grad_norm": 0.16526223719120026, "learning_rate": 0.002, "loss": 2.5665, "step": 45050 }, { "epoch": 0.08976953971694504, "grad_norm": 0.17855101823806763, "learning_rate": 0.002, "loss": 2.5843, "step": 45060 }, { "epoch": 0.08978946194058396, "grad_norm": 0.17844124138355255, "learning_rate": 0.002, "loss": 2.5642, "step": 45070 }, { "epoch": 0.08980938416422288, "grad_norm": 0.1758597195148468, "learning_rate": 0.002, "loss": 2.5648, "step": 45080 }, { "epoch": 0.08982930638786178, "grad_norm": 0.17160539329051971, "learning_rate": 0.002, "loss": 2.5828, "step": 45090 }, { "epoch": 0.0898492286115007, "grad_norm": 0.16499009728431702, "learning_rate": 0.002, "loss": 2.565, "step": 45100 }, { "epoch": 0.08986915083513962, "grad_norm": 0.1744372546672821, "learning_rate": 0.002, "loss": 2.5788, "step": 45110 }, { "epoch": 0.08988907305877852, "grad_norm": 0.16655853390693665, "learning_rate": 0.002, "loss": 2.5786, "step": 45120 }, { "epoch": 0.08990899528241744, "grad_norm": 0.14801780879497528, "learning_rate": 0.002, "loss": 2.5796, "step": 45130 }, { "epoch": 0.08992891750605636, "grad_norm": 0.19707784056663513, "learning_rate": 0.002, "loss": 2.5725, "step": 45140 }, { "epoch": 0.08994883972969527, "grad_norm": 0.14653800427913666, "learning_rate": 0.002, "loss": 2.5782, "step": 45150 }, { "epoch": 0.08996876195333418, "grad_norm": 0.20091809332370758, "learning_rate": 0.002, "loss": 2.5824, "step": 45160 }, { "epoch": 0.0899886841769731, "grad_norm": 0.1548669934272766, "learning_rate": 0.002, "loss": 2.5699, "step": 45170 }, { "epoch": 0.090008606400612, "grad_norm": 0.15077972412109375, "learning_rate": 0.002, "loss": 2.5786, "step": 45180 }, { "epoch": 0.09002852862425093, "grad_norm": 0.198324516415596, "learning_rate": 0.002, "loss": 2.577, "step": 45190 }, { "epoch": 0.09004845084788984, "grad_norm": 0.18436791002750397, "learning_rate": 0.002, "loss": 2.5792, "step": 45200 }, { "epoch": 0.09006837307152875, "grad_norm": 0.1749226599931717, "learning_rate": 0.002, "loss": 2.5866, "step": 45210 }, { "epoch": 0.09008829529516767, "grad_norm": 0.15775209665298462, "learning_rate": 0.002, "loss": 2.5844, "step": 45220 }, { "epoch": 0.09010821751880659, "grad_norm": 0.15810216963291168, "learning_rate": 0.002, "loss": 2.5805, "step": 45230 }, { "epoch": 0.09012813974244549, "grad_norm": 0.19176246225833893, "learning_rate": 0.002, "loss": 2.5785, "step": 45240 }, { "epoch": 0.09014806196608441, "grad_norm": 0.16781917214393616, "learning_rate": 0.002, "loss": 2.5708, "step": 45250 }, { "epoch": 0.09016798418972333, "grad_norm": 0.19797489047050476, "learning_rate": 0.002, "loss": 2.5771, "step": 45260 }, { "epoch": 0.09018790641336223, "grad_norm": 0.15047462284564972, "learning_rate": 0.002, "loss": 2.5711, "step": 45270 }, { "epoch": 0.09020782863700115, "grad_norm": 0.16145436465740204, "learning_rate": 0.002, "loss": 2.5856, "step": 45280 }, { "epoch": 0.09022775086064005, "grad_norm": 0.189473494887352, "learning_rate": 0.002, "loss": 2.5683, "step": 45290 }, { "epoch": 0.09024767308427897, "grad_norm": 0.1630975753068924, "learning_rate": 0.002, "loss": 2.5793, "step": 45300 }, { "epoch": 0.09026759530791789, "grad_norm": 0.1983298510313034, "learning_rate": 0.002, "loss": 2.5963, "step": 45310 }, { "epoch": 0.0902875175315568, "grad_norm": 0.16033001244068146, "learning_rate": 0.002, "loss": 2.568, "step": 45320 }, { "epoch": 0.09030743975519571, "grad_norm": 0.16899815201759338, "learning_rate": 0.002, "loss": 2.5788, "step": 45330 }, { "epoch": 0.09032736197883463, "grad_norm": 0.13631244003772736, "learning_rate": 0.002, "loss": 2.5862, "step": 45340 }, { "epoch": 0.09034728420247354, "grad_norm": 0.18435761332511902, "learning_rate": 0.002, "loss": 2.5879, "step": 45350 }, { "epoch": 0.09036720642611246, "grad_norm": 0.17153476178646088, "learning_rate": 0.002, "loss": 2.578, "step": 45360 }, { "epoch": 0.09038712864975137, "grad_norm": 0.15229150652885437, "learning_rate": 0.002, "loss": 2.5569, "step": 45370 }, { "epoch": 0.09040705087339028, "grad_norm": 0.16966284811496735, "learning_rate": 0.002, "loss": 2.5792, "step": 45380 }, { "epoch": 0.0904269730970292, "grad_norm": 0.1763056069612503, "learning_rate": 0.002, "loss": 2.5876, "step": 45390 }, { "epoch": 0.09044689532066812, "grad_norm": 0.16410677134990692, "learning_rate": 0.002, "loss": 2.5568, "step": 45400 }, { "epoch": 0.09046681754430702, "grad_norm": 0.18643635511398315, "learning_rate": 0.002, "loss": 2.5707, "step": 45410 }, { "epoch": 0.09048673976794594, "grad_norm": 0.17523552477359772, "learning_rate": 0.002, "loss": 2.5777, "step": 45420 }, { "epoch": 0.09050666199158486, "grad_norm": 0.1850629597902298, "learning_rate": 0.002, "loss": 2.5751, "step": 45430 }, { "epoch": 0.09052658421522376, "grad_norm": 0.17490409314632416, "learning_rate": 0.002, "loss": 2.5786, "step": 45440 }, { "epoch": 0.09054650643886268, "grad_norm": 0.16316087543964386, "learning_rate": 0.002, "loss": 2.58, "step": 45450 }, { "epoch": 0.0905664286625016, "grad_norm": 0.1453726887702942, "learning_rate": 0.002, "loss": 2.5759, "step": 45460 }, { "epoch": 0.0905863508861405, "grad_norm": 0.1489792764186859, "learning_rate": 0.002, "loss": 2.5743, "step": 45470 }, { "epoch": 0.09060627310977942, "grad_norm": 0.15810300409793854, "learning_rate": 0.002, "loss": 2.5933, "step": 45480 }, { "epoch": 0.09062619533341834, "grad_norm": 0.1864948868751526, "learning_rate": 0.002, "loss": 2.5856, "step": 45490 }, { "epoch": 0.09064611755705725, "grad_norm": 0.1565379649400711, "learning_rate": 0.002, "loss": 2.5786, "step": 45500 }, { "epoch": 0.09066603978069616, "grad_norm": 0.21834982931613922, "learning_rate": 0.002, "loss": 2.5776, "step": 45510 }, { "epoch": 0.09068596200433507, "grad_norm": 0.20204739272594452, "learning_rate": 0.002, "loss": 2.5917, "step": 45520 }, { "epoch": 0.09070588422797399, "grad_norm": 0.14401182532310486, "learning_rate": 0.002, "loss": 2.5863, "step": 45530 }, { "epoch": 0.0907258064516129, "grad_norm": 0.16469325125217438, "learning_rate": 0.002, "loss": 2.5826, "step": 45540 }, { "epoch": 0.09074572867525181, "grad_norm": 0.1865440309047699, "learning_rate": 0.002, "loss": 2.5704, "step": 45550 }, { "epoch": 0.09076565089889073, "grad_norm": 0.1867426335811615, "learning_rate": 0.002, "loss": 2.577, "step": 45560 }, { "epoch": 0.09078557312252965, "grad_norm": 0.18695604801177979, "learning_rate": 0.002, "loss": 2.5695, "step": 45570 }, { "epoch": 0.09080549534616855, "grad_norm": 0.15976661443710327, "learning_rate": 0.002, "loss": 2.5779, "step": 45580 }, { "epoch": 0.09082541756980747, "grad_norm": 0.19843989610671997, "learning_rate": 0.002, "loss": 2.5916, "step": 45590 }, { "epoch": 0.09084533979344639, "grad_norm": 0.18004050850868225, "learning_rate": 0.002, "loss": 2.5888, "step": 45600 }, { "epoch": 0.0908652620170853, "grad_norm": 0.16324687004089355, "learning_rate": 0.002, "loss": 2.581, "step": 45610 }, { "epoch": 0.09088518424072421, "grad_norm": 0.17420421540737152, "learning_rate": 0.002, "loss": 2.5686, "step": 45620 }, { "epoch": 0.09090510646436313, "grad_norm": 0.1618928462266922, "learning_rate": 0.002, "loss": 2.568, "step": 45630 }, { "epoch": 0.09092502868800204, "grad_norm": 0.17091956734657288, "learning_rate": 0.002, "loss": 2.5785, "step": 45640 }, { "epoch": 0.09094495091164095, "grad_norm": 0.16902446746826172, "learning_rate": 0.002, "loss": 2.5883, "step": 45650 }, { "epoch": 0.09096487313527987, "grad_norm": 0.1551520824432373, "learning_rate": 0.002, "loss": 2.5852, "step": 45660 }, { "epoch": 0.09098479535891878, "grad_norm": 0.18411438167095184, "learning_rate": 0.002, "loss": 2.5833, "step": 45670 }, { "epoch": 0.0910047175825577, "grad_norm": 0.159823939204216, "learning_rate": 0.002, "loss": 2.5752, "step": 45680 }, { "epoch": 0.09102463980619661, "grad_norm": 0.189689502120018, "learning_rate": 0.002, "loss": 2.5851, "step": 45690 }, { "epoch": 0.09104456202983552, "grad_norm": 0.14450007677078247, "learning_rate": 0.002, "loss": 2.5669, "step": 45700 }, { "epoch": 0.09106448425347444, "grad_norm": 0.1647994965314865, "learning_rate": 0.002, "loss": 2.5641, "step": 45710 }, { "epoch": 0.09108440647711336, "grad_norm": 0.15751437842845917, "learning_rate": 0.002, "loss": 2.5616, "step": 45720 }, { "epoch": 0.09110432870075226, "grad_norm": 0.1568155735731125, "learning_rate": 0.002, "loss": 2.5896, "step": 45730 }, { "epoch": 0.09112425092439118, "grad_norm": 0.16425374150276184, "learning_rate": 0.002, "loss": 2.5797, "step": 45740 }, { "epoch": 0.0911441731480301, "grad_norm": 0.18463915586471558, "learning_rate": 0.002, "loss": 2.5606, "step": 45750 }, { "epoch": 0.091164095371669, "grad_norm": 0.18154758214950562, "learning_rate": 0.002, "loss": 2.5873, "step": 45760 }, { "epoch": 0.09118401759530792, "grad_norm": 0.16109494864940643, "learning_rate": 0.002, "loss": 2.5502, "step": 45770 }, { "epoch": 0.09120393981894682, "grad_norm": 0.23490867018699646, "learning_rate": 0.002, "loss": 2.5793, "step": 45780 }, { "epoch": 0.09122386204258574, "grad_norm": 0.15619800984859467, "learning_rate": 0.002, "loss": 2.5648, "step": 45790 }, { "epoch": 0.09124378426622466, "grad_norm": 0.17149876058101654, "learning_rate": 0.002, "loss": 2.5769, "step": 45800 }, { "epoch": 0.09126370648986357, "grad_norm": 0.19361865520477295, "learning_rate": 0.002, "loss": 2.5833, "step": 45810 }, { "epoch": 0.09128362871350248, "grad_norm": 0.14620761573314667, "learning_rate": 0.002, "loss": 2.5832, "step": 45820 }, { "epoch": 0.0913035509371414, "grad_norm": 0.14912310242652893, "learning_rate": 0.002, "loss": 2.5647, "step": 45830 }, { "epoch": 0.09132347316078031, "grad_norm": 0.16890361905097961, "learning_rate": 0.002, "loss": 2.5796, "step": 45840 }, { "epoch": 0.09134339538441923, "grad_norm": 0.15491840243339539, "learning_rate": 0.002, "loss": 2.5765, "step": 45850 }, { "epoch": 0.09136331760805814, "grad_norm": 0.18352490663528442, "learning_rate": 0.002, "loss": 2.5752, "step": 45860 }, { "epoch": 0.09138323983169705, "grad_norm": 0.20314787328243256, "learning_rate": 0.002, "loss": 2.5911, "step": 45870 }, { "epoch": 0.09140316205533597, "grad_norm": 0.15321755409240723, "learning_rate": 0.002, "loss": 2.5772, "step": 45880 }, { "epoch": 0.09142308427897489, "grad_norm": 0.21994969248771667, "learning_rate": 0.002, "loss": 2.5808, "step": 45890 }, { "epoch": 0.09144300650261379, "grad_norm": 0.17511656880378723, "learning_rate": 0.002, "loss": 2.5946, "step": 45900 }, { "epoch": 0.09146292872625271, "grad_norm": 0.17684757709503174, "learning_rate": 0.002, "loss": 2.5724, "step": 45910 }, { "epoch": 0.09148285094989163, "grad_norm": 0.17983828485012054, "learning_rate": 0.002, "loss": 2.5852, "step": 45920 }, { "epoch": 0.09150277317353053, "grad_norm": 0.18226061761379242, "learning_rate": 0.002, "loss": 2.5644, "step": 45930 }, { "epoch": 0.09152269539716945, "grad_norm": 0.16367217898368835, "learning_rate": 0.002, "loss": 2.5896, "step": 45940 }, { "epoch": 0.09154261762080837, "grad_norm": 0.16286812722682953, "learning_rate": 0.002, "loss": 2.5717, "step": 45950 }, { "epoch": 0.09156253984444727, "grad_norm": 0.15652666985988617, "learning_rate": 0.002, "loss": 2.5861, "step": 45960 }, { "epoch": 0.09158246206808619, "grad_norm": 0.2070000320672989, "learning_rate": 0.002, "loss": 2.5774, "step": 45970 }, { "epoch": 0.09160238429172511, "grad_norm": 0.15749157965183258, "learning_rate": 0.002, "loss": 2.5673, "step": 45980 }, { "epoch": 0.09162230651536402, "grad_norm": 0.18443036079406738, "learning_rate": 0.002, "loss": 2.5622, "step": 45990 }, { "epoch": 0.09164222873900293, "grad_norm": 0.18831709027290344, "learning_rate": 0.002, "loss": 2.5833, "step": 46000 }, { "epoch": 0.09166215096264185, "grad_norm": 0.15106694400310516, "learning_rate": 0.002, "loss": 2.5553, "step": 46010 }, { "epoch": 0.09168207318628076, "grad_norm": 0.186127707362175, "learning_rate": 0.002, "loss": 2.5714, "step": 46020 }, { "epoch": 0.09170199540991968, "grad_norm": 0.14672976732254028, "learning_rate": 0.002, "loss": 2.5832, "step": 46030 }, { "epoch": 0.09172191763355858, "grad_norm": 0.20747359097003937, "learning_rate": 0.002, "loss": 2.5832, "step": 46040 }, { "epoch": 0.0917418398571975, "grad_norm": 0.18410389125347137, "learning_rate": 0.002, "loss": 2.5747, "step": 46050 }, { "epoch": 0.09176176208083642, "grad_norm": 0.17274513840675354, "learning_rate": 0.002, "loss": 2.5624, "step": 46060 }, { "epoch": 0.09178168430447532, "grad_norm": 0.1725182682275772, "learning_rate": 0.002, "loss": 2.5745, "step": 46070 }, { "epoch": 0.09180160652811424, "grad_norm": 0.1740109771490097, "learning_rate": 0.002, "loss": 2.5837, "step": 46080 }, { "epoch": 0.09182152875175316, "grad_norm": 0.16667144000530243, "learning_rate": 0.002, "loss": 2.5891, "step": 46090 }, { "epoch": 0.09184145097539206, "grad_norm": 0.1779172569513321, "learning_rate": 0.002, "loss": 2.5732, "step": 46100 }, { "epoch": 0.09186137319903098, "grad_norm": 0.1728372424840927, "learning_rate": 0.002, "loss": 2.5674, "step": 46110 }, { "epoch": 0.0918812954226699, "grad_norm": 0.14963005483150482, "learning_rate": 0.002, "loss": 2.5732, "step": 46120 }, { "epoch": 0.0919012176463088, "grad_norm": 0.20317165553569794, "learning_rate": 0.002, "loss": 2.5748, "step": 46130 }, { "epoch": 0.09192113986994772, "grad_norm": 0.16372118890285492, "learning_rate": 0.002, "loss": 2.5778, "step": 46140 }, { "epoch": 0.09194106209358664, "grad_norm": 0.18889003992080688, "learning_rate": 0.002, "loss": 2.5866, "step": 46150 }, { "epoch": 0.09196098431722555, "grad_norm": 0.1579994410276413, "learning_rate": 0.002, "loss": 2.5642, "step": 46160 }, { "epoch": 0.09198090654086447, "grad_norm": 0.15346238017082214, "learning_rate": 0.002, "loss": 2.5719, "step": 46170 }, { "epoch": 0.09200082876450338, "grad_norm": 0.20403146743774414, "learning_rate": 0.002, "loss": 2.569, "step": 46180 }, { "epoch": 0.09202075098814229, "grad_norm": 0.1772054135799408, "learning_rate": 0.002, "loss": 2.5742, "step": 46190 }, { "epoch": 0.09204067321178121, "grad_norm": 0.17901365458965302, "learning_rate": 0.002, "loss": 2.5668, "step": 46200 }, { "epoch": 0.09206059543542013, "grad_norm": 0.1935337483882904, "learning_rate": 0.002, "loss": 2.5802, "step": 46210 }, { "epoch": 0.09208051765905903, "grad_norm": 0.1890152543783188, "learning_rate": 0.002, "loss": 2.5817, "step": 46220 }, { "epoch": 0.09210043988269795, "grad_norm": 0.1656842827796936, "learning_rate": 0.002, "loss": 2.5703, "step": 46230 }, { "epoch": 0.09212036210633687, "grad_norm": 0.15887591242790222, "learning_rate": 0.002, "loss": 2.5955, "step": 46240 }, { "epoch": 0.09214028432997577, "grad_norm": 0.14461812376976013, "learning_rate": 0.002, "loss": 2.5878, "step": 46250 }, { "epoch": 0.09216020655361469, "grad_norm": 0.17542685568332672, "learning_rate": 0.002, "loss": 2.5746, "step": 46260 }, { "epoch": 0.09218012877725361, "grad_norm": 0.1886063516139984, "learning_rate": 0.002, "loss": 2.583, "step": 46270 }, { "epoch": 0.09220005100089251, "grad_norm": 0.17713423073291779, "learning_rate": 0.002, "loss": 2.5848, "step": 46280 }, { "epoch": 0.09221997322453143, "grad_norm": 0.1639765352010727, "learning_rate": 0.002, "loss": 2.5843, "step": 46290 }, { "epoch": 0.09223989544817034, "grad_norm": 0.18381087481975555, "learning_rate": 0.002, "loss": 2.5747, "step": 46300 }, { "epoch": 0.09225981767180925, "grad_norm": 0.18758997321128845, "learning_rate": 0.002, "loss": 2.5882, "step": 46310 }, { "epoch": 0.09227973989544817, "grad_norm": 0.1564992219209671, "learning_rate": 0.002, "loss": 2.5919, "step": 46320 }, { "epoch": 0.09229966211908708, "grad_norm": 0.19015374779701233, "learning_rate": 0.002, "loss": 2.5785, "step": 46330 }, { "epoch": 0.092319584342726, "grad_norm": 0.18373757600784302, "learning_rate": 0.002, "loss": 2.5653, "step": 46340 }, { "epoch": 0.09233950656636492, "grad_norm": 0.1631132811307907, "learning_rate": 0.002, "loss": 2.5767, "step": 46350 }, { "epoch": 0.09235942879000382, "grad_norm": 0.16372209787368774, "learning_rate": 0.002, "loss": 2.57, "step": 46360 }, { "epoch": 0.09237935101364274, "grad_norm": 0.15250596404075623, "learning_rate": 0.002, "loss": 2.5598, "step": 46370 }, { "epoch": 0.09239927323728166, "grad_norm": 0.1949261873960495, "learning_rate": 0.002, "loss": 2.5668, "step": 46380 }, { "epoch": 0.09241919546092056, "grad_norm": 0.15779943764209747, "learning_rate": 0.002, "loss": 2.574, "step": 46390 }, { "epoch": 0.09243911768455948, "grad_norm": 0.16070346534252167, "learning_rate": 0.002, "loss": 2.5774, "step": 46400 }, { "epoch": 0.0924590399081984, "grad_norm": 0.14925703406333923, "learning_rate": 0.002, "loss": 2.5789, "step": 46410 }, { "epoch": 0.0924789621318373, "grad_norm": 0.17369572818279266, "learning_rate": 0.002, "loss": 2.5806, "step": 46420 }, { "epoch": 0.09249888435547622, "grad_norm": 0.16090019047260284, "learning_rate": 0.002, "loss": 2.5776, "step": 46430 }, { "epoch": 0.09251880657911514, "grad_norm": 0.17023278772830963, "learning_rate": 0.002, "loss": 2.5769, "step": 46440 }, { "epoch": 0.09253872880275404, "grad_norm": 0.16875486075878143, "learning_rate": 0.002, "loss": 2.5828, "step": 46450 }, { "epoch": 0.09255865102639296, "grad_norm": 0.15317250788211823, "learning_rate": 0.002, "loss": 2.5814, "step": 46460 }, { "epoch": 0.09257857325003188, "grad_norm": 0.2344529777765274, "learning_rate": 0.002, "loss": 2.5835, "step": 46470 }, { "epoch": 0.09259849547367079, "grad_norm": 0.17011131346225739, "learning_rate": 0.002, "loss": 2.5864, "step": 46480 }, { "epoch": 0.0926184176973097, "grad_norm": 0.15018145740032196, "learning_rate": 0.002, "loss": 2.5839, "step": 46490 }, { "epoch": 0.09263833992094862, "grad_norm": 0.17320629954338074, "learning_rate": 0.002, "loss": 2.5824, "step": 46500 }, { "epoch": 0.09265826214458753, "grad_norm": 0.17772236466407776, "learning_rate": 0.002, "loss": 2.5655, "step": 46510 }, { "epoch": 0.09267818436822645, "grad_norm": 0.1698092222213745, "learning_rate": 0.002, "loss": 2.5851, "step": 46520 }, { "epoch": 0.09269810659186535, "grad_norm": 0.15309658646583557, "learning_rate": 0.002, "loss": 2.5852, "step": 46530 }, { "epoch": 0.09271802881550427, "grad_norm": 0.21564358472824097, "learning_rate": 0.002, "loss": 2.586, "step": 46540 }, { "epoch": 0.09273795103914319, "grad_norm": 0.16790740191936493, "learning_rate": 0.002, "loss": 2.5929, "step": 46550 }, { "epoch": 0.09275787326278209, "grad_norm": 0.17639338970184326, "learning_rate": 0.002, "loss": 2.5903, "step": 46560 }, { "epoch": 0.09277779548642101, "grad_norm": 0.17764505743980408, "learning_rate": 0.002, "loss": 2.5704, "step": 46570 }, { "epoch": 0.09279771771005993, "grad_norm": 0.15935924649238586, "learning_rate": 0.002, "loss": 2.5963, "step": 46580 }, { "epoch": 0.09281763993369883, "grad_norm": 0.14497311413288116, "learning_rate": 0.002, "loss": 2.5833, "step": 46590 }, { "epoch": 0.09283756215733775, "grad_norm": 0.20690657198429108, "learning_rate": 0.002, "loss": 2.5697, "step": 46600 }, { "epoch": 0.09285748438097667, "grad_norm": 0.16927367448806763, "learning_rate": 0.002, "loss": 2.5696, "step": 46610 }, { "epoch": 0.09287740660461558, "grad_norm": 0.16317415237426758, "learning_rate": 0.002, "loss": 2.5918, "step": 46620 }, { "epoch": 0.0928973288282545, "grad_norm": 0.1821802258491516, "learning_rate": 0.002, "loss": 2.5784, "step": 46630 }, { "epoch": 0.09291725105189341, "grad_norm": 0.15671230852603912, "learning_rate": 0.002, "loss": 2.5834, "step": 46640 }, { "epoch": 0.09293717327553232, "grad_norm": 0.18222303688526154, "learning_rate": 0.002, "loss": 2.5904, "step": 46650 }, { "epoch": 0.09295709549917124, "grad_norm": 0.17765101790428162, "learning_rate": 0.002, "loss": 2.5786, "step": 46660 }, { "epoch": 0.09297701772281015, "grad_norm": 0.1549069732427597, "learning_rate": 0.002, "loss": 2.5644, "step": 46670 }, { "epoch": 0.09299693994644906, "grad_norm": 0.17649619281291962, "learning_rate": 0.002, "loss": 2.5859, "step": 46680 }, { "epoch": 0.09301686217008798, "grad_norm": 0.1841515302658081, "learning_rate": 0.002, "loss": 2.5772, "step": 46690 }, { "epoch": 0.0930367843937269, "grad_norm": 0.17290806770324707, "learning_rate": 0.002, "loss": 2.5773, "step": 46700 }, { "epoch": 0.0930567066173658, "grad_norm": 0.1687006652355194, "learning_rate": 0.002, "loss": 2.5809, "step": 46710 }, { "epoch": 0.09307662884100472, "grad_norm": 0.18681025505065918, "learning_rate": 0.002, "loss": 2.5565, "step": 46720 }, { "epoch": 0.09309655106464364, "grad_norm": 0.16762548685073853, "learning_rate": 0.002, "loss": 2.577, "step": 46730 }, { "epoch": 0.09311647328828254, "grad_norm": 0.16332441568374634, "learning_rate": 0.002, "loss": 2.5762, "step": 46740 }, { "epoch": 0.09313639551192146, "grad_norm": 0.1520930677652359, "learning_rate": 0.002, "loss": 2.5761, "step": 46750 }, { "epoch": 0.09315631773556038, "grad_norm": 0.18309734761714935, "learning_rate": 0.002, "loss": 2.5812, "step": 46760 }, { "epoch": 0.09317623995919928, "grad_norm": 0.14805127680301666, "learning_rate": 0.002, "loss": 2.5667, "step": 46770 }, { "epoch": 0.0931961621828382, "grad_norm": 0.17657533288002014, "learning_rate": 0.002, "loss": 2.5689, "step": 46780 }, { "epoch": 0.0932160844064771, "grad_norm": 0.15026871860027313, "learning_rate": 0.002, "loss": 2.5621, "step": 46790 }, { "epoch": 0.09323600663011603, "grad_norm": 0.17876625061035156, "learning_rate": 0.002, "loss": 2.5702, "step": 46800 }, { "epoch": 0.09325592885375494, "grad_norm": 0.16770145297050476, "learning_rate": 0.002, "loss": 2.5795, "step": 46810 }, { "epoch": 0.09327585107739385, "grad_norm": 0.20920595526695251, "learning_rate": 0.002, "loss": 2.5763, "step": 46820 }, { "epoch": 0.09329577330103277, "grad_norm": 0.180496945977211, "learning_rate": 0.002, "loss": 2.5618, "step": 46830 }, { "epoch": 0.09331569552467169, "grad_norm": 0.16316311061382294, "learning_rate": 0.002, "loss": 2.577, "step": 46840 }, { "epoch": 0.09333561774831059, "grad_norm": 0.1632407009601593, "learning_rate": 0.002, "loss": 2.5667, "step": 46850 }, { "epoch": 0.09335553997194951, "grad_norm": 0.17477673292160034, "learning_rate": 0.002, "loss": 2.575, "step": 46860 }, { "epoch": 0.09337546219558843, "grad_norm": 0.19287186861038208, "learning_rate": 0.002, "loss": 2.5671, "step": 46870 }, { "epoch": 0.09339538441922733, "grad_norm": 0.17575010657310486, "learning_rate": 0.002, "loss": 2.5765, "step": 46880 }, { "epoch": 0.09341530664286625, "grad_norm": 0.16648456454277039, "learning_rate": 0.002, "loss": 2.5798, "step": 46890 }, { "epoch": 0.09343522886650517, "grad_norm": 0.17753218114376068, "learning_rate": 0.002, "loss": 2.5793, "step": 46900 }, { "epoch": 0.09345515109014407, "grad_norm": 0.1645866334438324, "learning_rate": 0.002, "loss": 2.5841, "step": 46910 }, { "epoch": 0.09347507331378299, "grad_norm": 0.15110836923122406, "learning_rate": 0.002, "loss": 2.5765, "step": 46920 }, { "epoch": 0.09349499553742191, "grad_norm": 0.17133328318595886, "learning_rate": 0.002, "loss": 2.5825, "step": 46930 }, { "epoch": 0.09351491776106081, "grad_norm": 0.24272413551807404, "learning_rate": 0.002, "loss": 2.5749, "step": 46940 }, { "epoch": 0.09353483998469973, "grad_norm": 0.16689692437648773, "learning_rate": 0.002, "loss": 2.5892, "step": 46950 }, { "epoch": 0.09355476220833865, "grad_norm": 0.14888426661491394, "learning_rate": 0.002, "loss": 2.5905, "step": 46960 }, { "epoch": 0.09357468443197756, "grad_norm": 0.18632718920707703, "learning_rate": 0.002, "loss": 2.5655, "step": 46970 }, { "epoch": 0.09359460665561647, "grad_norm": 0.16447630524635315, "learning_rate": 0.002, "loss": 2.5637, "step": 46980 }, { "epoch": 0.0936145288792554, "grad_norm": 0.14915305376052856, "learning_rate": 0.002, "loss": 2.5661, "step": 46990 }, { "epoch": 0.0936344511028943, "grad_norm": 0.18988655507564545, "learning_rate": 0.002, "loss": 2.5808, "step": 47000 }, { "epoch": 0.09365437332653322, "grad_norm": 0.15813711285591125, "learning_rate": 0.002, "loss": 2.5744, "step": 47010 }, { "epoch": 0.09367429555017213, "grad_norm": 0.18668226897716522, "learning_rate": 0.002, "loss": 2.573, "step": 47020 }, { "epoch": 0.09369421777381104, "grad_norm": 0.16722257435321808, "learning_rate": 0.002, "loss": 2.5808, "step": 47030 }, { "epoch": 0.09371413999744996, "grad_norm": 0.16992691159248352, "learning_rate": 0.002, "loss": 2.5743, "step": 47040 }, { "epoch": 0.09373406222108886, "grad_norm": 0.16260957717895508, "learning_rate": 0.002, "loss": 2.5809, "step": 47050 }, { "epoch": 0.09375398444472778, "grad_norm": 0.21114078164100647, "learning_rate": 0.002, "loss": 2.5754, "step": 47060 }, { "epoch": 0.0937739066683667, "grad_norm": 0.1795983761548996, "learning_rate": 0.002, "loss": 2.5746, "step": 47070 }, { "epoch": 0.0937938288920056, "grad_norm": 0.15959133207798004, "learning_rate": 0.002, "loss": 2.5761, "step": 47080 }, { "epoch": 0.09381375111564452, "grad_norm": 0.15854398906230927, "learning_rate": 0.002, "loss": 2.5664, "step": 47090 }, { "epoch": 0.09383367333928344, "grad_norm": 0.15255151689052582, "learning_rate": 0.002, "loss": 2.5855, "step": 47100 }, { "epoch": 0.09385359556292235, "grad_norm": 0.15939044952392578, "learning_rate": 0.002, "loss": 2.5775, "step": 47110 }, { "epoch": 0.09387351778656126, "grad_norm": 0.17170186340808868, "learning_rate": 0.002, "loss": 2.5776, "step": 47120 }, { "epoch": 0.09389344001020018, "grad_norm": 0.1500854790210724, "learning_rate": 0.002, "loss": 2.5849, "step": 47130 }, { "epoch": 0.09391336223383909, "grad_norm": 0.16579127311706543, "learning_rate": 0.002, "loss": 2.5833, "step": 47140 }, { "epoch": 0.093933284457478, "grad_norm": 0.15437853336334229, "learning_rate": 0.002, "loss": 2.5692, "step": 47150 }, { "epoch": 0.09395320668111692, "grad_norm": 0.1796380579471588, "learning_rate": 0.002, "loss": 2.5695, "step": 47160 }, { "epoch": 0.09397312890475583, "grad_norm": 0.15488912165164948, "learning_rate": 0.002, "loss": 2.5693, "step": 47170 }, { "epoch": 0.09399305112839475, "grad_norm": 0.15549123287200928, "learning_rate": 0.002, "loss": 2.5675, "step": 47180 }, { "epoch": 0.09401297335203367, "grad_norm": 0.1893153339624405, "learning_rate": 0.002, "loss": 2.5774, "step": 47190 }, { "epoch": 0.09403289557567257, "grad_norm": 0.19416584074497223, "learning_rate": 0.002, "loss": 2.578, "step": 47200 }, { "epoch": 0.09405281779931149, "grad_norm": 0.1638166755437851, "learning_rate": 0.002, "loss": 2.569, "step": 47210 }, { "epoch": 0.09407274002295041, "grad_norm": 0.148771271109581, "learning_rate": 0.002, "loss": 2.5775, "step": 47220 }, { "epoch": 0.09409266224658931, "grad_norm": 0.16093842685222626, "learning_rate": 0.002, "loss": 2.5764, "step": 47230 }, { "epoch": 0.09411258447022823, "grad_norm": 0.1822044551372528, "learning_rate": 0.002, "loss": 2.5567, "step": 47240 }, { "epoch": 0.09413250669386715, "grad_norm": 0.1749604493379593, "learning_rate": 0.002, "loss": 2.5765, "step": 47250 }, { "epoch": 0.09415242891750605, "grad_norm": 0.15332813560962677, "learning_rate": 0.002, "loss": 2.5828, "step": 47260 }, { "epoch": 0.09417235114114497, "grad_norm": 0.15759368240833282, "learning_rate": 0.002, "loss": 2.5715, "step": 47270 }, { "epoch": 0.09419227336478389, "grad_norm": 0.14791595935821533, "learning_rate": 0.002, "loss": 2.5854, "step": 47280 }, { "epoch": 0.0942121955884228, "grad_norm": 0.18116843700408936, "learning_rate": 0.002, "loss": 2.5874, "step": 47290 }, { "epoch": 0.09423211781206171, "grad_norm": 0.17247328162193298, "learning_rate": 0.002, "loss": 2.5908, "step": 47300 }, { "epoch": 0.09425204003570062, "grad_norm": 0.1622123271226883, "learning_rate": 0.002, "loss": 2.5739, "step": 47310 }, { "epoch": 0.09427196225933954, "grad_norm": 0.16595934331417084, "learning_rate": 0.002, "loss": 2.5779, "step": 47320 }, { "epoch": 0.09429188448297846, "grad_norm": 0.19034183025360107, "learning_rate": 0.002, "loss": 2.5678, "step": 47330 }, { "epoch": 0.09431180670661736, "grad_norm": 0.18130433559417725, "learning_rate": 0.002, "loss": 2.5826, "step": 47340 }, { "epoch": 0.09433172893025628, "grad_norm": 0.16332943737506866, "learning_rate": 0.002, "loss": 2.573, "step": 47350 }, { "epoch": 0.0943516511538952, "grad_norm": 0.1619066745042801, "learning_rate": 0.002, "loss": 2.5576, "step": 47360 }, { "epoch": 0.0943715733775341, "grad_norm": 0.20733612775802612, "learning_rate": 0.002, "loss": 2.5824, "step": 47370 }, { "epoch": 0.09439149560117302, "grad_norm": 0.17189528048038483, "learning_rate": 0.002, "loss": 2.5729, "step": 47380 }, { "epoch": 0.09441141782481194, "grad_norm": 0.1664191335439682, "learning_rate": 0.002, "loss": 2.5723, "step": 47390 }, { "epoch": 0.09443134004845084, "grad_norm": 0.15851399302482605, "learning_rate": 0.002, "loss": 2.5745, "step": 47400 }, { "epoch": 0.09445126227208976, "grad_norm": 0.15237891674041748, "learning_rate": 0.002, "loss": 2.5913, "step": 47410 }, { "epoch": 0.09447118449572868, "grad_norm": 0.16606678068637848, "learning_rate": 0.002, "loss": 2.5731, "step": 47420 }, { "epoch": 0.09449110671936758, "grad_norm": 0.17996180057525635, "learning_rate": 0.002, "loss": 2.574, "step": 47430 }, { "epoch": 0.0945110289430065, "grad_norm": 0.18514414131641388, "learning_rate": 0.002, "loss": 2.5653, "step": 47440 }, { "epoch": 0.09453095116664542, "grad_norm": 0.13772006332874298, "learning_rate": 0.002, "loss": 2.5744, "step": 47450 }, { "epoch": 0.09455087339028433, "grad_norm": 0.1856192946434021, "learning_rate": 0.002, "loss": 2.5837, "step": 47460 }, { "epoch": 0.09457079561392324, "grad_norm": 0.17843538522720337, "learning_rate": 0.002, "loss": 2.5866, "step": 47470 }, { "epoch": 0.09459071783756216, "grad_norm": 0.15520113706588745, "learning_rate": 0.002, "loss": 2.56, "step": 47480 }, { "epoch": 0.09461064006120107, "grad_norm": 0.21008574962615967, "learning_rate": 0.002, "loss": 2.5885, "step": 47490 }, { "epoch": 0.09463056228483999, "grad_norm": 0.15720705687999725, "learning_rate": 0.002, "loss": 2.5856, "step": 47500 }, { "epoch": 0.0946504845084789, "grad_norm": 0.15709523856639862, "learning_rate": 0.002, "loss": 2.5737, "step": 47510 }, { "epoch": 0.09467040673211781, "grad_norm": 0.17288275063037872, "learning_rate": 0.002, "loss": 2.5664, "step": 47520 }, { "epoch": 0.09469032895575673, "grad_norm": 0.18205757439136505, "learning_rate": 0.002, "loss": 2.577, "step": 47530 }, { "epoch": 0.09471025117939563, "grad_norm": 0.16236917674541473, "learning_rate": 0.002, "loss": 2.5708, "step": 47540 }, { "epoch": 0.09473017340303455, "grad_norm": 0.16105663776397705, "learning_rate": 0.002, "loss": 2.5756, "step": 47550 }, { "epoch": 0.09475009562667347, "grad_norm": 0.15919040143489838, "learning_rate": 0.002, "loss": 2.5676, "step": 47560 }, { "epoch": 0.09477001785031237, "grad_norm": 0.1644292175769806, "learning_rate": 0.002, "loss": 2.5697, "step": 47570 }, { "epoch": 0.09478994007395129, "grad_norm": 0.1318158060312271, "learning_rate": 0.002, "loss": 2.5593, "step": 47580 }, { "epoch": 0.09480986229759021, "grad_norm": 0.15833275020122528, "learning_rate": 0.002, "loss": 2.577, "step": 47590 }, { "epoch": 0.09482978452122912, "grad_norm": 0.20662693679332733, "learning_rate": 0.002, "loss": 2.5648, "step": 47600 }, { "epoch": 0.09484970674486803, "grad_norm": 0.18006864190101624, "learning_rate": 0.002, "loss": 2.5738, "step": 47610 }, { "epoch": 0.09486962896850695, "grad_norm": 0.19331569969654083, "learning_rate": 0.002, "loss": 2.5739, "step": 47620 }, { "epoch": 0.09488955119214586, "grad_norm": 0.14700107276439667, "learning_rate": 0.002, "loss": 2.5741, "step": 47630 }, { "epoch": 0.09490947341578478, "grad_norm": 0.16993358731269836, "learning_rate": 0.002, "loss": 2.5682, "step": 47640 }, { "epoch": 0.0949293956394237, "grad_norm": 0.14377008378505707, "learning_rate": 0.002, "loss": 2.5776, "step": 47650 }, { "epoch": 0.0949493178630626, "grad_norm": 0.18346911668777466, "learning_rate": 0.002, "loss": 2.5749, "step": 47660 }, { "epoch": 0.09496924008670152, "grad_norm": 0.1717989295721054, "learning_rate": 0.002, "loss": 2.5682, "step": 47670 }, { "epoch": 0.09498916231034044, "grad_norm": 0.16344957053661346, "learning_rate": 0.002, "loss": 2.5864, "step": 47680 }, { "epoch": 0.09500908453397934, "grad_norm": 0.18627454340457916, "learning_rate": 0.002, "loss": 2.5694, "step": 47690 }, { "epoch": 0.09502900675761826, "grad_norm": 0.18526960909366608, "learning_rate": 0.002, "loss": 2.5808, "step": 47700 }, { "epoch": 0.09504892898125718, "grad_norm": 0.16293740272521973, "learning_rate": 0.002, "loss": 2.5775, "step": 47710 }, { "epoch": 0.09506885120489608, "grad_norm": 0.17848126590251923, "learning_rate": 0.002, "loss": 2.5859, "step": 47720 }, { "epoch": 0.095088773428535, "grad_norm": 0.1828361600637436, "learning_rate": 0.002, "loss": 2.5844, "step": 47730 }, { "epoch": 0.09510869565217392, "grad_norm": 0.17063717544078827, "learning_rate": 0.002, "loss": 2.5589, "step": 47740 }, { "epoch": 0.09512861787581282, "grad_norm": 0.1641269326210022, "learning_rate": 0.002, "loss": 2.5669, "step": 47750 }, { "epoch": 0.09514854009945174, "grad_norm": 0.18749092519283295, "learning_rate": 0.002, "loss": 2.5695, "step": 47760 }, { "epoch": 0.09516846232309066, "grad_norm": 0.13973288238048553, "learning_rate": 0.002, "loss": 2.5641, "step": 47770 }, { "epoch": 0.09518838454672957, "grad_norm": 0.15887528657913208, "learning_rate": 0.002, "loss": 2.5638, "step": 47780 }, { "epoch": 0.09520830677036848, "grad_norm": 0.1599070280790329, "learning_rate": 0.002, "loss": 2.5993, "step": 47790 }, { "epoch": 0.09522822899400739, "grad_norm": 0.18581868708133698, "learning_rate": 0.002, "loss": 2.5772, "step": 47800 }, { "epoch": 0.0952481512176463, "grad_norm": 0.14277920126914978, "learning_rate": 0.002, "loss": 2.5826, "step": 47810 }, { "epoch": 0.09526807344128523, "grad_norm": 0.15211613476276398, "learning_rate": 0.002, "loss": 2.5706, "step": 47820 }, { "epoch": 0.09528799566492413, "grad_norm": 0.16729873418807983, "learning_rate": 0.002, "loss": 2.5884, "step": 47830 }, { "epoch": 0.09530791788856305, "grad_norm": 0.1618463546037674, "learning_rate": 0.002, "loss": 2.5687, "step": 47840 }, { "epoch": 0.09532784011220197, "grad_norm": 0.14248764514923096, "learning_rate": 0.002, "loss": 2.5848, "step": 47850 }, { "epoch": 0.09534776233584087, "grad_norm": 0.16162100434303284, "learning_rate": 0.002, "loss": 2.5591, "step": 47860 }, { "epoch": 0.09536768455947979, "grad_norm": 0.1584748476743698, "learning_rate": 0.002, "loss": 2.5815, "step": 47870 }, { "epoch": 0.09538760678311871, "grad_norm": 0.1895523965358734, "learning_rate": 0.002, "loss": 2.5638, "step": 47880 }, { "epoch": 0.09540752900675761, "grad_norm": 0.1395021677017212, "learning_rate": 0.002, "loss": 2.5769, "step": 47890 }, { "epoch": 0.09542745123039653, "grad_norm": 0.16646069288253784, "learning_rate": 0.002, "loss": 2.5883, "step": 47900 }, { "epoch": 0.09544737345403545, "grad_norm": 0.17089243233203888, "learning_rate": 0.002, "loss": 2.5742, "step": 47910 }, { "epoch": 0.09546729567767435, "grad_norm": 0.1525910347700119, "learning_rate": 0.002, "loss": 2.5816, "step": 47920 }, { "epoch": 0.09548721790131327, "grad_norm": 0.17039446532726288, "learning_rate": 0.002, "loss": 2.5816, "step": 47930 }, { "epoch": 0.09550714012495219, "grad_norm": 0.1526171714067459, "learning_rate": 0.002, "loss": 2.5773, "step": 47940 }, { "epoch": 0.0955270623485911, "grad_norm": 0.18520280718803406, "learning_rate": 0.002, "loss": 2.5802, "step": 47950 }, { "epoch": 0.09554698457223001, "grad_norm": 0.17224082350730896, "learning_rate": 0.002, "loss": 2.5619, "step": 47960 }, { "epoch": 0.09556690679586893, "grad_norm": 0.16895344853401184, "learning_rate": 0.002, "loss": 2.5663, "step": 47970 }, { "epoch": 0.09558682901950784, "grad_norm": 0.17457884550094604, "learning_rate": 0.002, "loss": 2.5785, "step": 47980 }, { "epoch": 0.09560675124314676, "grad_norm": 0.19606059789657593, "learning_rate": 0.002, "loss": 2.5768, "step": 47990 }, { "epoch": 0.09562667346678567, "grad_norm": 0.16222256422042847, "learning_rate": 0.002, "loss": 2.593, "step": 48000 }, { "epoch": 0.09564659569042458, "grad_norm": 0.1845824420452118, "learning_rate": 0.002, "loss": 2.5811, "step": 48010 }, { "epoch": 0.0956665179140635, "grad_norm": 0.1818266659975052, "learning_rate": 0.002, "loss": 2.5848, "step": 48020 }, { "epoch": 0.09568644013770242, "grad_norm": 0.15511734783649445, "learning_rate": 0.002, "loss": 2.5765, "step": 48030 }, { "epoch": 0.09570636236134132, "grad_norm": 0.16613756120204926, "learning_rate": 0.002, "loss": 2.5835, "step": 48040 }, { "epoch": 0.09572628458498024, "grad_norm": 0.20588351786136627, "learning_rate": 0.002, "loss": 2.5693, "step": 48050 }, { "epoch": 0.09574620680861914, "grad_norm": 0.1646861433982849, "learning_rate": 0.002, "loss": 2.589, "step": 48060 }, { "epoch": 0.09576612903225806, "grad_norm": 0.1986979991197586, "learning_rate": 0.002, "loss": 2.577, "step": 48070 }, { "epoch": 0.09578605125589698, "grad_norm": 0.14998087286949158, "learning_rate": 0.002, "loss": 2.5775, "step": 48080 }, { "epoch": 0.09580597347953589, "grad_norm": 0.16361115872859955, "learning_rate": 0.002, "loss": 2.5642, "step": 48090 }, { "epoch": 0.0958258957031748, "grad_norm": 0.17946086823940277, "learning_rate": 0.002, "loss": 2.5915, "step": 48100 }, { "epoch": 0.09584581792681372, "grad_norm": 0.15864481031894684, "learning_rate": 0.002, "loss": 2.5771, "step": 48110 }, { "epoch": 0.09586574015045263, "grad_norm": 0.19088439643383026, "learning_rate": 0.002, "loss": 2.5898, "step": 48120 }, { "epoch": 0.09588566237409155, "grad_norm": 0.16904667019844055, "learning_rate": 0.002, "loss": 2.5697, "step": 48130 }, { "epoch": 0.09590558459773046, "grad_norm": 0.19313685595989227, "learning_rate": 0.002, "loss": 2.5702, "step": 48140 }, { "epoch": 0.09592550682136937, "grad_norm": 0.16817070543766022, "learning_rate": 0.002, "loss": 2.5751, "step": 48150 }, { "epoch": 0.09594542904500829, "grad_norm": 0.19320346415042877, "learning_rate": 0.002, "loss": 2.5674, "step": 48160 }, { "epoch": 0.0959653512686472, "grad_norm": 0.18663503229618073, "learning_rate": 0.002, "loss": 2.5829, "step": 48170 }, { "epoch": 0.09598527349228611, "grad_norm": 0.1585860550403595, "learning_rate": 0.002, "loss": 2.584, "step": 48180 }, { "epoch": 0.09600519571592503, "grad_norm": 0.16161860525608063, "learning_rate": 0.002, "loss": 2.589, "step": 48190 }, { "epoch": 0.09602511793956395, "grad_norm": 0.148987278342247, "learning_rate": 0.002, "loss": 2.5747, "step": 48200 }, { "epoch": 0.09604504016320285, "grad_norm": 0.16675806045532227, "learning_rate": 0.002, "loss": 2.5623, "step": 48210 }, { "epoch": 0.09606496238684177, "grad_norm": 0.173954576253891, "learning_rate": 0.002, "loss": 2.5667, "step": 48220 }, { "epoch": 0.09608488461048069, "grad_norm": 0.16964146494865417, "learning_rate": 0.002, "loss": 2.5638, "step": 48230 }, { "epoch": 0.0961048068341196, "grad_norm": 0.14810603857040405, "learning_rate": 0.002, "loss": 2.5735, "step": 48240 }, { "epoch": 0.09612472905775851, "grad_norm": 0.1798299252986908, "learning_rate": 0.002, "loss": 2.565, "step": 48250 }, { "epoch": 0.09614465128139743, "grad_norm": 0.16976164281368256, "learning_rate": 0.002, "loss": 2.5809, "step": 48260 }, { "epoch": 0.09616457350503634, "grad_norm": 0.17991141974925995, "learning_rate": 0.002, "loss": 2.5654, "step": 48270 }, { "epoch": 0.09618449572867525, "grad_norm": 0.15524014830589294, "learning_rate": 0.002, "loss": 2.5807, "step": 48280 }, { "epoch": 0.09620441795231417, "grad_norm": 0.17610618472099304, "learning_rate": 0.002, "loss": 2.5683, "step": 48290 }, { "epoch": 0.09622434017595308, "grad_norm": 0.17665664851665497, "learning_rate": 0.002, "loss": 2.5721, "step": 48300 }, { "epoch": 0.096244262399592, "grad_norm": 0.17230288684368134, "learning_rate": 0.002, "loss": 2.5951, "step": 48310 }, { "epoch": 0.0962641846232309, "grad_norm": 0.1681937426328659, "learning_rate": 0.002, "loss": 2.5842, "step": 48320 }, { "epoch": 0.09628410684686982, "grad_norm": 0.16213679313659668, "learning_rate": 0.002, "loss": 2.5707, "step": 48330 }, { "epoch": 0.09630402907050874, "grad_norm": 0.1819523572921753, "learning_rate": 0.002, "loss": 2.579, "step": 48340 }, { "epoch": 0.09632395129414764, "grad_norm": 0.17716342210769653, "learning_rate": 0.002, "loss": 2.566, "step": 48350 }, { "epoch": 0.09634387351778656, "grad_norm": 0.1598542034626007, "learning_rate": 0.002, "loss": 2.5693, "step": 48360 }, { "epoch": 0.09636379574142548, "grad_norm": 0.1540025770664215, "learning_rate": 0.002, "loss": 2.5735, "step": 48370 }, { "epoch": 0.09638371796506438, "grad_norm": 0.1633872538805008, "learning_rate": 0.002, "loss": 2.5818, "step": 48380 }, { "epoch": 0.0964036401887033, "grad_norm": 0.17181411385536194, "learning_rate": 0.002, "loss": 2.5701, "step": 48390 }, { "epoch": 0.09642356241234222, "grad_norm": 0.1744150072336197, "learning_rate": 0.002, "loss": 2.5708, "step": 48400 }, { "epoch": 0.09644348463598112, "grad_norm": 0.19943715631961823, "learning_rate": 0.002, "loss": 2.5803, "step": 48410 }, { "epoch": 0.09646340685962004, "grad_norm": 0.17211860418319702, "learning_rate": 0.002, "loss": 2.5809, "step": 48420 }, { "epoch": 0.09648332908325896, "grad_norm": 0.12946298718452454, "learning_rate": 0.002, "loss": 2.5846, "step": 48430 }, { "epoch": 0.09650325130689787, "grad_norm": 0.15876896679401398, "learning_rate": 0.002, "loss": 2.5899, "step": 48440 }, { "epoch": 0.09652317353053678, "grad_norm": 0.19516390562057495, "learning_rate": 0.002, "loss": 2.5783, "step": 48450 }, { "epoch": 0.0965430957541757, "grad_norm": 0.15211261808872223, "learning_rate": 0.002, "loss": 2.5727, "step": 48460 }, { "epoch": 0.09656301797781461, "grad_norm": 0.156084805727005, "learning_rate": 0.002, "loss": 2.5656, "step": 48470 }, { "epoch": 0.09658294020145353, "grad_norm": 0.17741915583610535, "learning_rate": 0.002, "loss": 2.5878, "step": 48480 }, { "epoch": 0.09660286242509245, "grad_norm": 0.1739877611398697, "learning_rate": 0.002, "loss": 2.5792, "step": 48490 }, { "epoch": 0.09662278464873135, "grad_norm": 0.16467535495758057, "learning_rate": 0.002, "loss": 2.5797, "step": 48500 }, { "epoch": 0.09664270687237027, "grad_norm": 0.190552219748497, "learning_rate": 0.002, "loss": 2.5748, "step": 48510 }, { "epoch": 0.09666262909600919, "grad_norm": 0.15402010083198547, "learning_rate": 0.002, "loss": 2.5728, "step": 48520 }, { "epoch": 0.09668255131964809, "grad_norm": 0.19613006711006165, "learning_rate": 0.002, "loss": 2.5718, "step": 48530 }, { "epoch": 0.09670247354328701, "grad_norm": 0.1824350208044052, "learning_rate": 0.002, "loss": 2.5808, "step": 48540 }, { "epoch": 0.09672239576692591, "grad_norm": 0.18635840713977814, "learning_rate": 0.002, "loss": 2.5797, "step": 48550 }, { "epoch": 0.09674231799056483, "grad_norm": 0.14500153064727783, "learning_rate": 0.002, "loss": 2.5737, "step": 48560 }, { "epoch": 0.09676224021420375, "grad_norm": 0.21085114777088165, "learning_rate": 0.002, "loss": 2.5889, "step": 48570 }, { "epoch": 0.09678216243784266, "grad_norm": 0.13497351109981537, "learning_rate": 0.002, "loss": 2.5906, "step": 48580 }, { "epoch": 0.09680208466148157, "grad_norm": 0.17292402684688568, "learning_rate": 0.002, "loss": 2.5766, "step": 48590 }, { "epoch": 0.09682200688512049, "grad_norm": 0.17073242366313934, "learning_rate": 0.002, "loss": 2.5853, "step": 48600 }, { "epoch": 0.0968419291087594, "grad_norm": 0.17290687561035156, "learning_rate": 0.002, "loss": 2.5821, "step": 48610 }, { "epoch": 0.09686185133239832, "grad_norm": 0.2014668881893158, "learning_rate": 0.002, "loss": 2.5657, "step": 48620 }, { "epoch": 0.09688177355603723, "grad_norm": 0.17184434831142426, "learning_rate": 0.002, "loss": 2.5695, "step": 48630 }, { "epoch": 0.09690169577967614, "grad_norm": 0.14153993129730225, "learning_rate": 0.002, "loss": 2.5712, "step": 48640 }, { "epoch": 0.09692161800331506, "grad_norm": 0.19665107131004333, "learning_rate": 0.002, "loss": 2.5846, "step": 48650 }, { "epoch": 0.09694154022695398, "grad_norm": 0.16932663321495056, "learning_rate": 0.002, "loss": 2.5916, "step": 48660 }, { "epoch": 0.09696146245059288, "grad_norm": 0.17741043865680695, "learning_rate": 0.002, "loss": 2.5767, "step": 48670 }, { "epoch": 0.0969813846742318, "grad_norm": 0.15795446932315826, "learning_rate": 0.002, "loss": 2.5662, "step": 48680 }, { "epoch": 0.09700130689787072, "grad_norm": 0.18135330080986023, "learning_rate": 0.002, "loss": 2.5778, "step": 48690 }, { "epoch": 0.09702122912150962, "grad_norm": 0.155636727809906, "learning_rate": 0.002, "loss": 2.5846, "step": 48700 }, { "epoch": 0.09704115134514854, "grad_norm": 0.19811001420021057, "learning_rate": 0.002, "loss": 2.5809, "step": 48710 }, { "epoch": 0.09706107356878746, "grad_norm": 0.1580212563276291, "learning_rate": 0.002, "loss": 2.5647, "step": 48720 }, { "epoch": 0.09708099579242636, "grad_norm": 0.18014968931674957, "learning_rate": 0.002, "loss": 2.5711, "step": 48730 }, { "epoch": 0.09710091801606528, "grad_norm": 0.18540650606155396, "learning_rate": 0.002, "loss": 2.5716, "step": 48740 }, { "epoch": 0.0971208402397042, "grad_norm": 0.1742752343416214, "learning_rate": 0.002, "loss": 2.5713, "step": 48750 }, { "epoch": 0.0971407624633431, "grad_norm": 0.17721733450889587, "learning_rate": 0.002, "loss": 2.5705, "step": 48760 }, { "epoch": 0.09716068468698202, "grad_norm": 0.1636677384376526, "learning_rate": 0.002, "loss": 2.5764, "step": 48770 }, { "epoch": 0.09718060691062094, "grad_norm": 0.15244190394878387, "learning_rate": 0.002, "loss": 2.5731, "step": 48780 }, { "epoch": 0.09720052913425985, "grad_norm": 0.1734827607870102, "learning_rate": 0.002, "loss": 2.5658, "step": 48790 }, { "epoch": 0.09722045135789877, "grad_norm": 0.15176349878311157, "learning_rate": 0.002, "loss": 2.568, "step": 48800 }, { "epoch": 0.09724037358153767, "grad_norm": 0.24467423558235168, "learning_rate": 0.002, "loss": 2.5843, "step": 48810 }, { "epoch": 0.09726029580517659, "grad_norm": 0.15900948643684387, "learning_rate": 0.002, "loss": 2.5748, "step": 48820 }, { "epoch": 0.09728021802881551, "grad_norm": 0.1685657501220703, "learning_rate": 0.002, "loss": 2.5722, "step": 48830 }, { "epoch": 0.09730014025245441, "grad_norm": 0.1823887974023819, "learning_rate": 0.002, "loss": 2.5655, "step": 48840 }, { "epoch": 0.09732006247609333, "grad_norm": 0.16836482286453247, "learning_rate": 0.002, "loss": 2.5744, "step": 48850 }, { "epoch": 0.09733998469973225, "grad_norm": 0.20073284208774567, "learning_rate": 0.002, "loss": 2.5754, "step": 48860 }, { "epoch": 0.09735990692337115, "grad_norm": 0.18285006284713745, "learning_rate": 0.002, "loss": 2.5763, "step": 48870 }, { "epoch": 0.09737982914701007, "grad_norm": 0.18893252313137054, "learning_rate": 0.002, "loss": 2.5955, "step": 48880 }, { "epoch": 0.09739975137064899, "grad_norm": 0.16807802021503448, "learning_rate": 0.002, "loss": 2.5746, "step": 48890 }, { "epoch": 0.0974196735942879, "grad_norm": 0.16906946897506714, "learning_rate": 0.002, "loss": 2.5781, "step": 48900 }, { "epoch": 0.09743959581792681, "grad_norm": 0.19686566293239594, "learning_rate": 0.002, "loss": 2.5796, "step": 48910 }, { "epoch": 0.09745951804156573, "grad_norm": 0.16272448003292084, "learning_rate": 0.002, "loss": 2.5758, "step": 48920 }, { "epoch": 0.09747944026520464, "grad_norm": 0.1678842008113861, "learning_rate": 0.002, "loss": 2.5892, "step": 48930 }, { "epoch": 0.09749936248884356, "grad_norm": 0.18382057547569275, "learning_rate": 0.002, "loss": 2.5801, "step": 48940 }, { "epoch": 0.09751928471248247, "grad_norm": 0.14853116869926453, "learning_rate": 0.002, "loss": 2.5691, "step": 48950 }, { "epoch": 0.09753920693612138, "grad_norm": 0.18929272890090942, "learning_rate": 0.002, "loss": 2.5546, "step": 48960 }, { "epoch": 0.0975591291597603, "grad_norm": 0.1576753854751587, "learning_rate": 0.002, "loss": 2.5692, "step": 48970 }, { "epoch": 0.09757905138339922, "grad_norm": 0.2103065550327301, "learning_rate": 0.002, "loss": 2.564, "step": 48980 }, { "epoch": 0.09759897360703812, "grad_norm": 0.15739069879055023, "learning_rate": 0.002, "loss": 2.5752, "step": 48990 }, { "epoch": 0.09761889583067704, "grad_norm": 0.16561901569366455, "learning_rate": 0.002, "loss": 2.5696, "step": 49000 }, { "epoch": 0.09763881805431596, "grad_norm": 0.1736094355583191, "learning_rate": 0.002, "loss": 2.5772, "step": 49010 }, { "epoch": 0.09765874027795486, "grad_norm": 0.19682516157627106, "learning_rate": 0.002, "loss": 2.5711, "step": 49020 }, { "epoch": 0.09767866250159378, "grad_norm": 0.16397592425346375, "learning_rate": 0.002, "loss": 2.5653, "step": 49030 }, { "epoch": 0.0976985847252327, "grad_norm": 0.16381895542144775, "learning_rate": 0.002, "loss": 2.5639, "step": 49040 }, { "epoch": 0.0977185069488716, "grad_norm": 0.16399124264717102, "learning_rate": 0.002, "loss": 2.577, "step": 49050 }, { "epoch": 0.09773842917251052, "grad_norm": 0.19637539982795715, "learning_rate": 0.002, "loss": 2.5753, "step": 49060 }, { "epoch": 0.09775835139614943, "grad_norm": 0.16737991571426392, "learning_rate": 0.002, "loss": 2.5747, "step": 49070 }, { "epoch": 0.09777827361978834, "grad_norm": 0.1562831550836563, "learning_rate": 0.002, "loss": 2.5751, "step": 49080 }, { "epoch": 0.09779819584342726, "grad_norm": 0.1614709049463272, "learning_rate": 0.002, "loss": 2.5755, "step": 49090 }, { "epoch": 0.09781811806706617, "grad_norm": 0.1743582934141159, "learning_rate": 0.002, "loss": 2.5787, "step": 49100 }, { "epoch": 0.09783804029070509, "grad_norm": 0.15174153447151184, "learning_rate": 0.002, "loss": 2.5643, "step": 49110 }, { "epoch": 0.097857962514344, "grad_norm": 0.177457794547081, "learning_rate": 0.002, "loss": 2.5804, "step": 49120 }, { "epoch": 0.09787788473798291, "grad_norm": 0.1562792807817459, "learning_rate": 0.002, "loss": 2.5911, "step": 49130 }, { "epoch": 0.09789780696162183, "grad_norm": 0.22473953664302826, "learning_rate": 0.002, "loss": 2.569, "step": 49140 }, { "epoch": 0.09791772918526075, "grad_norm": 0.13986879587173462, "learning_rate": 0.002, "loss": 2.5605, "step": 49150 }, { "epoch": 0.09793765140889965, "grad_norm": 0.17698051035404205, "learning_rate": 0.002, "loss": 2.5734, "step": 49160 }, { "epoch": 0.09795757363253857, "grad_norm": 0.1575440764427185, "learning_rate": 0.002, "loss": 2.5765, "step": 49170 }, { "epoch": 0.09797749585617749, "grad_norm": 0.18831823766231537, "learning_rate": 0.002, "loss": 2.5814, "step": 49180 }, { "epoch": 0.09799741807981639, "grad_norm": 0.191656231880188, "learning_rate": 0.002, "loss": 2.581, "step": 49190 }, { "epoch": 0.09801734030345531, "grad_norm": 0.1916889101266861, "learning_rate": 0.002, "loss": 2.5602, "step": 49200 }, { "epoch": 0.09803726252709423, "grad_norm": 0.16444770991802216, "learning_rate": 0.002, "loss": 2.5721, "step": 49210 }, { "epoch": 0.09805718475073313, "grad_norm": 0.1525401771068573, "learning_rate": 0.002, "loss": 2.5741, "step": 49220 }, { "epoch": 0.09807710697437205, "grad_norm": 0.16288205981254578, "learning_rate": 0.002, "loss": 2.5942, "step": 49230 }, { "epoch": 0.09809702919801097, "grad_norm": 0.1733528971672058, "learning_rate": 0.002, "loss": 2.5848, "step": 49240 }, { "epoch": 0.09811695142164988, "grad_norm": 0.16583076119422913, "learning_rate": 0.002, "loss": 2.5807, "step": 49250 }, { "epoch": 0.0981368736452888, "grad_norm": 0.15611279010772705, "learning_rate": 0.002, "loss": 2.5911, "step": 49260 }, { "epoch": 0.09815679586892771, "grad_norm": 0.14430005848407745, "learning_rate": 0.002, "loss": 2.5818, "step": 49270 }, { "epoch": 0.09817671809256662, "grad_norm": 0.1735553741455078, "learning_rate": 0.002, "loss": 2.5797, "step": 49280 }, { "epoch": 0.09819664031620554, "grad_norm": 0.18385860323905945, "learning_rate": 0.002, "loss": 2.562, "step": 49290 }, { "epoch": 0.09821656253984445, "grad_norm": 0.18907666206359863, "learning_rate": 0.002, "loss": 2.5638, "step": 49300 }, { "epoch": 0.09823648476348336, "grad_norm": 0.16992907226085663, "learning_rate": 0.002, "loss": 2.5752, "step": 49310 }, { "epoch": 0.09825640698712228, "grad_norm": 0.24392585456371307, "learning_rate": 0.002, "loss": 2.5744, "step": 49320 }, { "epoch": 0.09827632921076118, "grad_norm": 0.17849232256412506, "learning_rate": 0.002, "loss": 2.5766, "step": 49330 }, { "epoch": 0.0982962514344001, "grad_norm": 0.14726732671260834, "learning_rate": 0.002, "loss": 2.5844, "step": 49340 }, { "epoch": 0.09831617365803902, "grad_norm": 0.13725845515727997, "learning_rate": 0.002, "loss": 2.5704, "step": 49350 }, { "epoch": 0.09833609588167792, "grad_norm": 0.1808825582265854, "learning_rate": 0.002, "loss": 2.5839, "step": 49360 }, { "epoch": 0.09835601810531684, "grad_norm": 0.18402722477912903, "learning_rate": 0.002, "loss": 2.5721, "step": 49370 }, { "epoch": 0.09837594032895576, "grad_norm": 0.1339861899614334, "learning_rate": 0.002, "loss": 2.5687, "step": 49380 }, { "epoch": 0.09839586255259467, "grad_norm": 0.1832709014415741, "learning_rate": 0.002, "loss": 2.5678, "step": 49390 }, { "epoch": 0.09841578477623358, "grad_norm": 0.15883192420005798, "learning_rate": 0.002, "loss": 2.5925, "step": 49400 }, { "epoch": 0.0984357069998725, "grad_norm": 0.18749554455280304, "learning_rate": 0.002, "loss": 2.5628, "step": 49410 }, { "epoch": 0.0984556292235114, "grad_norm": 0.14859183132648468, "learning_rate": 0.002, "loss": 2.5769, "step": 49420 }, { "epoch": 0.09847555144715033, "grad_norm": 0.1761532425880432, "learning_rate": 0.002, "loss": 2.58, "step": 49430 }, { "epoch": 0.09849547367078924, "grad_norm": 0.1638520061969757, "learning_rate": 0.002, "loss": 2.5694, "step": 49440 }, { "epoch": 0.09851539589442815, "grad_norm": 0.1481352150440216, "learning_rate": 0.002, "loss": 2.5702, "step": 49450 }, { "epoch": 0.09853531811806707, "grad_norm": 0.16768315434455872, "learning_rate": 0.002, "loss": 2.5954, "step": 49460 }, { "epoch": 0.09855524034170599, "grad_norm": 0.1434771716594696, "learning_rate": 0.002, "loss": 2.5865, "step": 49470 }, { "epoch": 0.09857516256534489, "grad_norm": 0.1718664914369583, "learning_rate": 0.002, "loss": 2.5814, "step": 49480 }, { "epoch": 0.09859508478898381, "grad_norm": 0.16454948484897614, "learning_rate": 0.002, "loss": 2.5634, "step": 49490 }, { "epoch": 0.09861500701262273, "grad_norm": 0.1707855761051178, "learning_rate": 0.002, "loss": 2.5865, "step": 49500 }, { "epoch": 0.09863492923626163, "grad_norm": 0.16854459047317505, "learning_rate": 0.002, "loss": 2.5777, "step": 49510 }, { "epoch": 0.09865485145990055, "grad_norm": 0.15384359657764435, "learning_rate": 0.002, "loss": 2.5814, "step": 49520 }, { "epoch": 0.09867477368353947, "grad_norm": 0.15316013991832733, "learning_rate": 0.002, "loss": 2.5819, "step": 49530 }, { "epoch": 0.09869469590717837, "grad_norm": 0.1776590645313263, "learning_rate": 0.002, "loss": 2.579, "step": 49540 }, { "epoch": 0.09871461813081729, "grad_norm": 0.18385185301303864, "learning_rate": 0.002, "loss": 2.5749, "step": 49550 }, { "epoch": 0.0987345403544562, "grad_norm": 0.1956550031900406, "learning_rate": 0.002, "loss": 2.5758, "step": 49560 }, { "epoch": 0.09875446257809511, "grad_norm": 0.15027262270450592, "learning_rate": 0.002, "loss": 2.5747, "step": 49570 }, { "epoch": 0.09877438480173403, "grad_norm": 0.17314276099205017, "learning_rate": 0.002, "loss": 2.5687, "step": 49580 }, { "epoch": 0.09879430702537294, "grad_norm": 0.15401943027973175, "learning_rate": 0.002, "loss": 2.5743, "step": 49590 }, { "epoch": 0.09881422924901186, "grad_norm": 0.18458174169063568, "learning_rate": 0.002, "loss": 2.5865, "step": 49600 }, { "epoch": 0.09883415147265077, "grad_norm": 0.19050945341587067, "learning_rate": 0.002, "loss": 2.5741, "step": 49610 }, { "epoch": 0.09885407369628968, "grad_norm": 0.1756778359413147, "learning_rate": 0.002, "loss": 2.5823, "step": 49620 }, { "epoch": 0.0988739959199286, "grad_norm": 0.15000031888484955, "learning_rate": 0.002, "loss": 2.5741, "step": 49630 }, { "epoch": 0.09889391814356752, "grad_norm": 0.15444998443126678, "learning_rate": 0.002, "loss": 2.5643, "step": 49640 }, { "epoch": 0.09891384036720642, "grad_norm": 0.18002501130104065, "learning_rate": 0.002, "loss": 2.5767, "step": 49650 }, { "epoch": 0.09893376259084534, "grad_norm": 0.15522997081279755, "learning_rate": 0.002, "loss": 2.567, "step": 49660 }, { "epoch": 0.09895368481448426, "grad_norm": 0.17066475749015808, "learning_rate": 0.002, "loss": 2.5715, "step": 49670 }, { "epoch": 0.09897360703812316, "grad_norm": 0.15158921480178833, "learning_rate": 0.002, "loss": 2.57, "step": 49680 }, { "epoch": 0.09899352926176208, "grad_norm": 0.1605934202671051, "learning_rate": 0.002, "loss": 2.5827, "step": 49690 }, { "epoch": 0.099013451485401, "grad_norm": 0.17644846439361572, "learning_rate": 0.002, "loss": 2.5717, "step": 49700 }, { "epoch": 0.0990333737090399, "grad_norm": 0.18275240063667297, "learning_rate": 0.002, "loss": 2.5778, "step": 49710 }, { "epoch": 0.09905329593267882, "grad_norm": 0.18138191103935242, "learning_rate": 0.002, "loss": 2.5729, "step": 49720 }, { "epoch": 0.09907321815631774, "grad_norm": 0.1738789826631546, "learning_rate": 0.002, "loss": 2.5801, "step": 49730 }, { "epoch": 0.09909314037995665, "grad_norm": 0.17394807934761047, "learning_rate": 0.002, "loss": 2.5833, "step": 49740 }, { "epoch": 0.09911306260359556, "grad_norm": 0.1521793007850647, "learning_rate": 0.002, "loss": 2.5729, "step": 49750 }, { "epoch": 0.09913298482723448, "grad_norm": 0.16917000710964203, "learning_rate": 0.002, "loss": 2.5724, "step": 49760 }, { "epoch": 0.09915290705087339, "grad_norm": 0.17963619530200958, "learning_rate": 0.002, "loss": 2.5683, "step": 49770 }, { "epoch": 0.0991728292745123, "grad_norm": 0.16246648132801056, "learning_rate": 0.002, "loss": 2.5929, "step": 49780 }, { "epoch": 0.09919275149815122, "grad_norm": 0.15404078364372253, "learning_rate": 0.002, "loss": 2.5759, "step": 49790 }, { "epoch": 0.09921267372179013, "grad_norm": 0.1598043143749237, "learning_rate": 0.002, "loss": 2.5899, "step": 49800 }, { "epoch": 0.09923259594542905, "grad_norm": 0.1627986580133438, "learning_rate": 0.002, "loss": 2.5879, "step": 49810 }, { "epoch": 0.09925251816906795, "grad_norm": 0.16906426846981049, "learning_rate": 0.002, "loss": 2.5708, "step": 49820 }, { "epoch": 0.09927244039270687, "grad_norm": 0.1937701255083084, "learning_rate": 0.002, "loss": 2.5762, "step": 49830 }, { "epoch": 0.09929236261634579, "grad_norm": 0.19279888272285461, "learning_rate": 0.002, "loss": 2.5928, "step": 49840 }, { "epoch": 0.0993122848399847, "grad_norm": 0.17219652235507965, "learning_rate": 0.002, "loss": 2.5737, "step": 49850 }, { "epoch": 0.09933220706362361, "grad_norm": 0.17815114557743073, "learning_rate": 0.002, "loss": 2.5725, "step": 49860 }, { "epoch": 0.09935212928726253, "grad_norm": 0.16806498169898987, "learning_rate": 0.002, "loss": 2.5834, "step": 49870 }, { "epoch": 0.09937205151090144, "grad_norm": 0.182827889919281, "learning_rate": 0.002, "loss": 2.5878, "step": 49880 }, { "epoch": 0.09939197373454035, "grad_norm": 0.1717797964811325, "learning_rate": 0.002, "loss": 2.5848, "step": 49890 }, { "epoch": 0.09941189595817927, "grad_norm": 0.14142055809497833, "learning_rate": 0.002, "loss": 2.5657, "step": 49900 }, { "epoch": 0.09943181818181818, "grad_norm": 0.18595638871192932, "learning_rate": 0.002, "loss": 2.5779, "step": 49910 }, { "epoch": 0.0994517404054571, "grad_norm": 0.17084823548793793, "learning_rate": 0.002, "loss": 2.5855, "step": 49920 }, { "epoch": 0.09947166262909601, "grad_norm": 0.22568798065185547, "learning_rate": 0.002, "loss": 2.5909, "step": 49930 }, { "epoch": 0.09949158485273492, "grad_norm": 0.14812390506267548, "learning_rate": 0.002, "loss": 2.5819, "step": 49940 }, { "epoch": 0.09951150707637384, "grad_norm": 0.20784892141819, "learning_rate": 0.002, "loss": 2.5812, "step": 49950 }, { "epoch": 0.09953142930001276, "grad_norm": 0.16647376120090485, "learning_rate": 0.002, "loss": 2.5767, "step": 49960 }, { "epoch": 0.09955135152365166, "grad_norm": 0.16243965923786163, "learning_rate": 0.002, "loss": 2.5783, "step": 49970 }, { "epoch": 0.09957127374729058, "grad_norm": 0.1855640560388565, "learning_rate": 0.002, "loss": 2.5846, "step": 49980 }, { "epoch": 0.0995911959709295, "grad_norm": 0.16464102268218994, "learning_rate": 0.002, "loss": 2.5738, "step": 49990 }, { "epoch": 0.0996111181945684, "grad_norm": 0.18397971987724304, "learning_rate": 0.002, "loss": 2.5776, "step": 50000 }, { "epoch": 0.09963104041820732, "grad_norm": 0.17204803228378296, "learning_rate": 0.002, "loss": 2.5798, "step": 50010 }, { "epoch": 0.09965096264184624, "grad_norm": 0.14540579915046692, "learning_rate": 0.002, "loss": 2.5717, "step": 50020 }, { "epoch": 0.09967088486548514, "grad_norm": 0.20211811363697052, "learning_rate": 0.002, "loss": 2.5721, "step": 50030 }, { "epoch": 0.09969080708912406, "grad_norm": 0.14991439878940582, "learning_rate": 0.002, "loss": 2.5714, "step": 50040 }, { "epoch": 0.09971072931276298, "grad_norm": 0.16380558907985687, "learning_rate": 0.002, "loss": 2.5935, "step": 50050 }, { "epoch": 0.09973065153640188, "grad_norm": 0.16185925900936127, "learning_rate": 0.002, "loss": 2.5649, "step": 50060 }, { "epoch": 0.0997505737600408, "grad_norm": 0.1694953590631485, "learning_rate": 0.002, "loss": 2.5643, "step": 50070 }, { "epoch": 0.09977049598367971, "grad_norm": 0.1435200273990631, "learning_rate": 0.002, "loss": 2.5773, "step": 50080 }, { "epoch": 0.09979041820731863, "grad_norm": 0.1782960742712021, "learning_rate": 0.002, "loss": 2.5678, "step": 50090 }, { "epoch": 0.09981034043095754, "grad_norm": 0.15764467418193817, "learning_rate": 0.002, "loss": 2.5651, "step": 50100 }, { "epoch": 0.09983026265459645, "grad_norm": 0.18005426228046417, "learning_rate": 0.002, "loss": 2.5837, "step": 50110 }, { "epoch": 0.09985018487823537, "grad_norm": 0.16068100929260254, "learning_rate": 0.002, "loss": 2.5702, "step": 50120 }, { "epoch": 0.09987010710187429, "grad_norm": 0.19193948805332184, "learning_rate": 0.002, "loss": 2.5751, "step": 50130 }, { "epoch": 0.09989002932551319, "grad_norm": 0.20582950115203857, "learning_rate": 0.002, "loss": 2.5889, "step": 50140 }, { "epoch": 0.09990995154915211, "grad_norm": 0.1672532856464386, "learning_rate": 0.002, "loss": 2.5755, "step": 50150 }, { "epoch": 0.09992987377279103, "grad_norm": 0.15678748488426208, "learning_rate": 0.002, "loss": 2.5661, "step": 50160 }, { "epoch": 0.09994979599642993, "grad_norm": 0.15059629082679749, "learning_rate": 0.002, "loss": 2.5763, "step": 50170 }, { "epoch": 0.09996971822006885, "grad_norm": 0.1672075092792511, "learning_rate": 0.002, "loss": 2.5749, "step": 50180 }, { "epoch": 0.09998964044370777, "grad_norm": 0.147891566157341, "learning_rate": 0.002, "loss": 2.5817, "step": 50190 }, { "epoch": 0.10000956266734667, "grad_norm": 0.17678490281105042, "learning_rate": 0.002, "loss": 2.5698, "step": 50200 }, { "epoch": 0.10002948489098559, "grad_norm": 0.15186230838298798, "learning_rate": 0.002, "loss": 2.5751, "step": 50210 }, { "epoch": 0.10004940711462451, "grad_norm": 0.17756035923957825, "learning_rate": 0.002, "loss": 2.5926, "step": 50220 }, { "epoch": 0.10006932933826342, "grad_norm": 0.1934552937746048, "learning_rate": 0.002, "loss": 2.594, "step": 50230 }, { "epoch": 0.10008925156190233, "grad_norm": 0.1558610200881958, "learning_rate": 0.002, "loss": 2.5838, "step": 50240 }, { "epoch": 0.10010917378554125, "grad_norm": 0.1584233045578003, "learning_rate": 0.002, "loss": 2.5766, "step": 50250 }, { "epoch": 0.10012909600918016, "grad_norm": 0.1747473180294037, "learning_rate": 0.002, "loss": 2.5831, "step": 50260 }, { "epoch": 0.10014901823281908, "grad_norm": 0.18758288025856018, "learning_rate": 0.002, "loss": 2.5854, "step": 50270 }, { "epoch": 0.100168940456458, "grad_norm": 0.14864280819892883, "learning_rate": 0.002, "loss": 2.5937, "step": 50280 }, { "epoch": 0.1001888626800969, "grad_norm": 0.14780953526496887, "learning_rate": 0.002, "loss": 2.5653, "step": 50290 }, { "epoch": 0.10020878490373582, "grad_norm": 0.19583351910114288, "learning_rate": 0.002, "loss": 2.5839, "step": 50300 }, { "epoch": 0.10022870712737474, "grad_norm": 0.15107563138008118, "learning_rate": 0.002, "loss": 2.5803, "step": 50310 }, { "epoch": 0.10024862935101364, "grad_norm": 0.14401808381080627, "learning_rate": 0.002, "loss": 2.567, "step": 50320 }, { "epoch": 0.10026855157465256, "grad_norm": 0.1915637105703354, "learning_rate": 0.002, "loss": 2.5802, "step": 50330 }, { "epoch": 0.10028847379829146, "grad_norm": 0.1559678018093109, "learning_rate": 0.002, "loss": 2.569, "step": 50340 }, { "epoch": 0.10030839602193038, "grad_norm": 0.17418593168258667, "learning_rate": 0.002, "loss": 2.5575, "step": 50350 }, { "epoch": 0.1003283182455693, "grad_norm": 0.16795895993709564, "learning_rate": 0.002, "loss": 2.5785, "step": 50360 }, { "epoch": 0.1003482404692082, "grad_norm": 0.17487019300460815, "learning_rate": 0.002, "loss": 2.5711, "step": 50370 }, { "epoch": 0.10036816269284712, "grad_norm": 0.14192542433738708, "learning_rate": 0.002, "loss": 2.57, "step": 50380 }, { "epoch": 0.10038808491648604, "grad_norm": 0.16391652822494507, "learning_rate": 0.002, "loss": 2.5646, "step": 50390 }, { "epoch": 0.10040800714012495, "grad_norm": 0.17461061477661133, "learning_rate": 0.002, "loss": 2.5766, "step": 50400 }, { "epoch": 0.10042792936376387, "grad_norm": 0.15572790801525116, "learning_rate": 0.002, "loss": 2.5712, "step": 50410 }, { "epoch": 0.10044785158740278, "grad_norm": 0.18322451412677765, "learning_rate": 0.002, "loss": 2.5653, "step": 50420 }, { "epoch": 0.10046777381104169, "grad_norm": 0.1481543779373169, "learning_rate": 0.002, "loss": 2.5856, "step": 50430 }, { "epoch": 0.10048769603468061, "grad_norm": 0.15612401068210602, "learning_rate": 0.002, "loss": 2.5717, "step": 50440 }, { "epoch": 0.10050761825831953, "grad_norm": 0.1802411526441574, "learning_rate": 0.002, "loss": 2.5675, "step": 50450 }, { "epoch": 0.10052754048195843, "grad_norm": 0.1588233858346939, "learning_rate": 0.002, "loss": 2.567, "step": 50460 }, { "epoch": 0.10054746270559735, "grad_norm": 0.14590679109096527, "learning_rate": 0.002, "loss": 2.5695, "step": 50470 }, { "epoch": 0.10056738492923627, "grad_norm": 0.17138701677322388, "learning_rate": 0.002, "loss": 2.5752, "step": 50480 }, { "epoch": 0.10058730715287517, "grad_norm": 0.1777544915676117, "learning_rate": 0.002, "loss": 2.5686, "step": 50490 }, { "epoch": 0.10060722937651409, "grad_norm": 0.17268699407577515, "learning_rate": 0.002, "loss": 2.5723, "step": 50500 }, { "epoch": 0.10062715160015301, "grad_norm": 0.1509544849395752, "learning_rate": 0.002, "loss": 2.5715, "step": 50510 }, { "epoch": 0.10064707382379191, "grad_norm": 0.16669149696826935, "learning_rate": 0.002, "loss": 2.5728, "step": 50520 }, { "epoch": 0.10066699604743083, "grad_norm": 0.18614332377910614, "learning_rate": 0.002, "loss": 2.5754, "step": 50530 }, { "epoch": 0.10068691827106975, "grad_norm": 0.17105232179164886, "learning_rate": 0.002, "loss": 2.5683, "step": 50540 }, { "epoch": 0.10070684049470865, "grad_norm": 0.19676810503005981, "learning_rate": 0.002, "loss": 2.5839, "step": 50550 }, { "epoch": 0.10072676271834757, "grad_norm": 0.13704565167427063, "learning_rate": 0.002, "loss": 2.5652, "step": 50560 }, { "epoch": 0.10074668494198648, "grad_norm": 0.19619272649288177, "learning_rate": 0.002, "loss": 2.571, "step": 50570 }, { "epoch": 0.1007666071656254, "grad_norm": 0.1517920345067978, "learning_rate": 0.002, "loss": 2.5762, "step": 50580 }, { "epoch": 0.10078652938926431, "grad_norm": 0.15586020052433014, "learning_rate": 0.002, "loss": 2.5598, "step": 50590 }, { "epoch": 0.10080645161290322, "grad_norm": 0.16281360387802124, "learning_rate": 0.002, "loss": 2.5589, "step": 50600 }, { "epoch": 0.10082637383654214, "grad_norm": 0.17525923252105713, "learning_rate": 0.002, "loss": 2.5886, "step": 50610 }, { "epoch": 0.10084629606018106, "grad_norm": 0.13151036202907562, "learning_rate": 0.002, "loss": 2.5656, "step": 50620 }, { "epoch": 0.10086621828381996, "grad_norm": 0.16931754350662231, "learning_rate": 0.002, "loss": 2.5672, "step": 50630 }, { "epoch": 0.10088614050745888, "grad_norm": 0.14832864701747894, "learning_rate": 0.002, "loss": 2.581, "step": 50640 }, { "epoch": 0.1009060627310978, "grad_norm": 0.2220337837934494, "learning_rate": 0.002, "loss": 2.594, "step": 50650 }, { "epoch": 0.1009259849547367, "grad_norm": 0.14416229724884033, "learning_rate": 0.002, "loss": 2.5651, "step": 50660 }, { "epoch": 0.10094590717837562, "grad_norm": 0.1918857991695404, "learning_rate": 0.002, "loss": 2.5876, "step": 50670 }, { "epoch": 0.10096582940201454, "grad_norm": 0.19940535724163055, "learning_rate": 0.002, "loss": 2.5791, "step": 50680 }, { "epoch": 0.10098575162565344, "grad_norm": 0.1709170788526535, "learning_rate": 0.002, "loss": 2.5733, "step": 50690 }, { "epoch": 0.10100567384929236, "grad_norm": 0.18565255403518677, "learning_rate": 0.002, "loss": 2.5712, "step": 50700 }, { "epoch": 0.10102559607293128, "grad_norm": 0.16897699236869812, "learning_rate": 0.002, "loss": 2.5658, "step": 50710 }, { "epoch": 0.10104551829657019, "grad_norm": 0.14820760488510132, "learning_rate": 0.002, "loss": 2.5792, "step": 50720 }, { "epoch": 0.1010654405202091, "grad_norm": 0.19763347506523132, "learning_rate": 0.002, "loss": 2.5898, "step": 50730 }, { "epoch": 0.10108536274384802, "grad_norm": 0.15436311066150665, "learning_rate": 0.002, "loss": 2.5805, "step": 50740 }, { "epoch": 0.10110528496748693, "grad_norm": 0.1559058427810669, "learning_rate": 0.002, "loss": 2.582, "step": 50750 }, { "epoch": 0.10112520719112585, "grad_norm": 0.14835333824157715, "learning_rate": 0.002, "loss": 2.5703, "step": 50760 }, { "epoch": 0.10114512941476476, "grad_norm": 0.1577427238225937, "learning_rate": 0.002, "loss": 2.5663, "step": 50770 }, { "epoch": 0.10116505163840367, "grad_norm": 0.14215071499347687, "learning_rate": 0.002, "loss": 2.5868, "step": 50780 }, { "epoch": 0.10118497386204259, "grad_norm": 0.18071895837783813, "learning_rate": 0.002, "loss": 2.5777, "step": 50790 }, { "epoch": 0.1012048960856815, "grad_norm": 0.16668018698692322, "learning_rate": 0.002, "loss": 2.588, "step": 50800 }, { "epoch": 0.10122481830932041, "grad_norm": 0.1692785918712616, "learning_rate": 0.002, "loss": 2.5888, "step": 50810 }, { "epoch": 0.10124474053295933, "grad_norm": 0.17086820304393768, "learning_rate": 0.002, "loss": 2.5772, "step": 50820 }, { "epoch": 0.10126466275659823, "grad_norm": 0.16471388936042786, "learning_rate": 0.002, "loss": 2.5681, "step": 50830 }, { "epoch": 0.10128458498023715, "grad_norm": 0.19430594146251678, "learning_rate": 0.002, "loss": 2.5859, "step": 50840 }, { "epoch": 0.10130450720387607, "grad_norm": 0.15611869096755981, "learning_rate": 0.002, "loss": 2.5824, "step": 50850 }, { "epoch": 0.10132442942751498, "grad_norm": 0.15061992406845093, "learning_rate": 0.002, "loss": 2.5728, "step": 50860 }, { "epoch": 0.1013443516511539, "grad_norm": 0.17949551343917847, "learning_rate": 0.002, "loss": 2.5764, "step": 50870 }, { "epoch": 0.10136427387479281, "grad_norm": 0.1742057353258133, "learning_rate": 0.002, "loss": 2.5881, "step": 50880 }, { "epoch": 0.10138419609843172, "grad_norm": 0.1816909909248352, "learning_rate": 0.002, "loss": 2.5819, "step": 50890 }, { "epoch": 0.10140411832207064, "grad_norm": 0.14672604203224182, "learning_rate": 0.002, "loss": 2.5851, "step": 50900 }, { "epoch": 0.10142404054570955, "grad_norm": 0.14606685936450958, "learning_rate": 0.002, "loss": 2.5832, "step": 50910 }, { "epoch": 0.10144396276934846, "grad_norm": 0.18658313155174255, "learning_rate": 0.002, "loss": 2.5716, "step": 50920 }, { "epoch": 0.10146388499298738, "grad_norm": 0.18731817603111267, "learning_rate": 0.002, "loss": 2.5908, "step": 50930 }, { "epoch": 0.1014838072166263, "grad_norm": 0.1387503296136856, "learning_rate": 0.002, "loss": 2.5621, "step": 50940 }, { "epoch": 0.1015037294402652, "grad_norm": 0.18448005616664886, "learning_rate": 0.002, "loss": 2.583, "step": 50950 }, { "epoch": 0.10152365166390412, "grad_norm": 0.24890626966953278, "learning_rate": 0.002, "loss": 2.5811, "step": 50960 }, { "epoch": 0.10154357388754304, "grad_norm": 0.16171419620513916, "learning_rate": 0.002, "loss": 2.5546, "step": 50970 }, { "epoch": 0.10156349611118194, "grad_norm": 0.14709654450416565, "learning_rate": 0.002, "loss": 2.5683, "step": 50980 }, { "epoch": 0.10158341833482086, "grad_norm": 0.1943962574005127, "learning_rate": 0.002, "loss": 2.5679, "step": 50990 }, { "epoch": 0.10160334055845978, "grad_norm": 0.15776579082012177, "learning_rate": 0.002, "loss": 2.5791, "step": 51000 }, { "epoch": 0.10162326278209868, "grad_norm": 0.19966895878314972, "learning_rate": 0.002, "loss": 2.5768, "step": 51010 }, { "epoch": 0.1016431850057376, "grad_norm": 0.14556889235973358, "learning_rate": 0.002, "loss": 2.5673, "step": 51020 }, { "epoch": 0.10166310722937652, "grad_norm": 0.16540388762950897, "learning_rate": 0.002, "loss": 2.5655, "step": 51030 }, { "epoch": 0.10168302945301542, "grad_norm": 0.15599186718463898, "learning_rate": 0.002, "loss": 2.572, "step": 51040 }, { "epoch": 0.10170295167665434, "grad_norm": 0.18157798051834106, "learning_rate": 0.002, "loss": 2.597, "step": 51050 }, { "epoch": 0.10172287390029326, "grad_norm": 0.17753876745700836, "learning_rate": 0.002, "loss": 2.5753, "step": 51060 }, { "epoch": 0.10174279612393217, "grad_norm": 0.14624157547950745, "learning_rate": 0.002, "loss": 2.5759, "step": 51070 }, { "epoch": 0.10176271834757109, "grad_norm": 0.17939206957817078, "learning_rate": 0.002, "loss": 2.5868, "step": 51080 }, { "epoch": 0.10178264057120999, "grad_norm": 0.15167517960071564, "learning_rate": 0.002, "loss": 2.5833, "step": 51090 }, { "epoch": 0.10180256279484891, "grad_norm": 0.22793374955654144, "learning_rate": 0.002, "loss": 2.5708, "step": 51100 }, { "epoch": 0.10182248501848783, "grad_norm": 0.1561797559261322, "learning_rate": 0.002, "loss": 2.5805, "step": 51110 }, { "epoch": 0.10184240724212673, "grad_norm": 0.13333629071712494, "learning_rate": 0.002, "loss": 2.5663, "step": 51120 }, { "epoch": 0.10186232946576565, "grad_norm": 0.15865734219551086, "learning_rate": 0.002, "loss": 2.5856, "step": 51130 }, { "epoch": 0.10188225168940457, "grad_norm": 0.14107277989387512, "learning_rate": 0.002, "loss": 2.5747, "step": 51140 }, { "epoch": 0.10190217391304347, "grad_norm": 0.1935759037733078, "learning_rate": 0.002, "loss": 2.5902, "step": 51150 }, { "epoch": 0.10192209613668239, "grad_norm": 0.2592508792877197, "learning_rate": 0.002, "loss": 2.5686, "step": 51160 }, { "epoch": 0.10194201836032131, "grad_norm": 0.1559174507856369, "learning_rate": 0.002, "loss": 2.5677, "step": 51170 }, { "epoch": 0.10196194058396021, "grad_norm": 0.1506897211074829, "learning_rate": 0.002, "loss": 2.5606, "step": 51180 }, { "epoch": 0.10198186280759913, "grad_norm": 0.15052083134651184, "learning_rate": 0.002, "loss": 2.5639, "step": 51190 }, { "epoch": 0.10200178503123805, "grad_norm": 0.18954630196094513, "learning_rate": 0.002, "loss": 2.5583, "step": 51200 }, { "epoch": 0.10202170725487696, "grad_norm": 0.14566555619239807, "learning_rate": 0.002, "loss": 2.5703, "step": 51210 }, { "epoch": 0.10204162947851587, "grad_norm": 0.15737473964691162, "learning_rate": 0.002, "loss": 2.581, "step": 51220 }, { "epoch": 0.10206155170215479, "grad_norm": 0.161048024892807, "learning_rate": 0.002, "loss": 2.5879, "step": 51230 }, { "epoch": 0.1020814739257937, "grad_norm": 0.14903108775615692, "learning_rate": 0.002, "loss": 2.5825, "step": 51240 }, { "epoch": 0.10210139614943262, "grad_norm": 0.15598809719085693, "learning_rate": 0.002, "loss": 2.5692, "step": 51250 }, { "epoch": 0.10212131837307153, "grad_norm": 0.19904936850070953, "learning_rate": 0.002, "loss": 2.5683, "step": 51260 }, { "epoch": 0.10214124059671044, "grad_norm": 0.1485205739736557, "learning_rate": 0.002, "loss": 2.56, "step": 51270 }, { "epoch": 0.10216116282034936, "grad_norm": 0.1914554089307785, "learning_rate": 0.002, "loss": 2.5842, "step": 51280 }, { "epoch": 0.10218108504398828, "grad_norm": 0.1634744256734848, "learning_rate": 0.002, "loss": 2.5745, "step": 51290 }, { "epoch": 0.10220100726762718, "grad_norm": 0.16169121861457825, "learning_rate": 0.002, "loss": 2.5783, "step": 51300 }, { "epoch": 0.1022209294912661, "grad_norm": 0.17246852815151215, "learning_rate": 0.002, "loss": 2.5818, "step": 51310 }, { "epoch": 0.102240851714905, "grad_norm": 0.18248072266578674, "learning_rate": 0.002, "loss": 2.5871, "step": 51320 }, { "epoch": 0.10226077393854392, "grad_norm": 0.14882498979568481, "learning_rate": 0.002, "loss": 2.5794, "step": 51330 }, { "epoch": 0.10228069616218284, "grad_norm": 0.16686463356018066, "learning_rate": 0.002, "loss": 2.5777, "step": 51340 }, { "epoch": 0.10230061838582175, "grad_norm": 0.17874565720558167, "learning_rate": 0.002, "loss": 2.5714, "step": 51350 }, { "epoch": 0.10232054060946066, "grad_norm": 0.1574539691209793, "learning_rate": 0.002, "loss": 2.5796, "step": 51360 }, { "epoch": 0.10234046283309958, "grad_norm": 0.16630049049854279, "learning_rate": 0.002, "loss": 2.5875, "step": 51370 }, { "epoch": 0.10236038505673849, "grad_norm": 0.15300798416137695, "learning_rate": 0.002, "loss": 2.581, "step": 51380 }, { "epoch": 0.1023803072803774, "grad_norm": 0.16436322033405304, "learning_rate": 0.002, "loss": 2.5843, "step": 51390 }, { "epoch": 0.10240022950401632, "grad_norm": 0.16287261247634888, "learning_rate": 0.002, "loss": 2.5772, "step": 51400 }, { "epoch": 0.10242015172765523, "grad_norm": 0.16925972700119019, "learning_rate": 0.002, "loss": 2.5591, "step": 51410 }, { "epoch": 0.10244007395129415, "grad_norm": 0.16666048765182495, "learning_rate": 0.002, "loss": 2.5759, "step": 51420 }, { "epoch": 0.10245999617493307, "grad_norm": 0.18301871418952942, "learning_rate": 0.002, "loss": 2.5752, "step": 51430 }, { "epoch": 0.10247991839857197, "grad_norm": 0.19148863852024078, "learning_rate": 0.002, "loss": 2.5708, "step": 51440 }, { "epoch": 0.10249984062221089, "grad_norm": 0.17126981914043427, "learning_rate": 0.002, "loss": 2.5821, "step": 51450 }, { "epoch": 0.10251976284584981, "grad_norm": 0.15466369688510895, "learning_rate": 0.002, "loss": 2.5746, "step": 51460 }, { "epoch": 0.10253968506948871, "grad_norm": 0.20566751062870026, "learning_rate": 0.002, "loss": 2.5721, "step": 51470 }, { "epoch": 0.10255960729312763, "grad_norm": 0.18592742085456848, "learning_rate": 0.002, "loss": 2.5837, "step": 51480 }, { "epoch": 0.10257952951676655, "grad_norm": 0.1706756055355072, "learning_rate": 0.002, "loss": 2.5759, "step": 51490 }, { "epoch": 0.10259945174040545, "grad_norm": 0.1859961450099945, "learning_rate": 0.002, "loss": 2.5683, "step": 51500 }, { "epoch": 0.10261937396404437, "grad_norm": 0.19180504977703094, "learning_rate": 0.002, "loss": 2.5806, "step": 51510 }, { "epoch": 0.10263929618768329, "grad_norm": 0.17893831431865692, "learning_rate": 0.002, "loss": 2.5707, "step": 51520 }, { "epoch": 0.1026592184113222, "grad_norm": 0.15878313779830933, "learning_rate": 0.002, "loss": 2.5943, "step": 51530 }, { "epoch": 0.10267914063496111, "grad_norm": 0.17192760109901428, "learning_rate": 0.002, "loss": 2.5749, "step": 51540 }, { "epoch": 0.10269906285860003, "grad_norm": 0.14708928763866425, "learning_rate": 0.002, "loss": 2.5806, "step": 51550 }, { "epoch": 0.10271898508223894, "grad_norm": 0.15677998960018158, "learning_rate": 0.002, "loss": 2.5987, "step": 51560 }, { "epoch": 0.10273890730587786, "grad_norm": 0.15666401386260986, "learning_rate": 0.002, "loss": 2.5476, "step": 51570 }, { "epoch": 0.10275882952951676, "grad_norm": 0.19550824165344238, "learning_rate": 0.002, "loss": 2.572, "step": 51580 }, { "epoch": 0.10277875175315568, "grad_norm": 0.16202425956726074, "learning_rate": 0.002, "loss": 2.5622, "step": 51590 }, { "epoch": 0.1027986739767946, "grad_norm": 0.18180951476097107, "learning_rate": 0.002, "loss": 2.5747, "step": 51600 }, { "epoch": 0.1028185962004335, "grad_norm": 0.1368928998708725, "learning_rate": 0.002, "loss": 2.5549, "step": 51610 }, { "epoch": 0.10283851842407242, "grad_norm": 0.25194114446640015, "learning_rate": 0.002, "loss": 2.576, "step": 51620 }, { "epoch": 0.10285844064771134, "grad_norm": 0.13874979317188263, "learning_rate": 0.002, "loss": 2.5803, "step": 51630 }, { "epoch": 0.10287836287135024, "grad_norm": 0.14223098754882812, "learning_rate": 0.002, "loss": 2.5685, "step": 51640 }, { "epoch": 0.10289828509498916, "grad_norm": 0.1532931923866272, "learning_rate": 0.002, "loss": 2.578, "step": 51650 }, { "epoch": 0.10291820731862808, "grad_norm": 0.22402861714363098, "learning_rate": 0.002, "loss": 2.5757, "step": 51660 }, { "epoch": 0.10293812954226698, "grad_norm": 0.13896432518959045, "learning_rate": 0.002, "loss": 2.5765, "step": 51670 }, { "epoch": 0.1029580517659059, "grad_norm": 0.150566965341568, "learning_rate": 0.002, "loss": 2.5789, "step": 51680 }, { "epoch": 0.10297797398954482, "grad_norm": 0.18451160192489624, "learning_rate": 0.002, "loss": 2.5771, "step": 51690 }, { "epoch": 0.10299789621318373, "grad_norm": 0.17098680138587952, "learning_rate": 0.002, "loss": 2.5941, "step": 51700 }, { "epoch": 0.10301781843682264, "grad_norm": 0.14499063789844513, "learning_rate": 0.002, "loss": 2.5828, "step": 51710 }, { "epoch": 0.10303774066046156, "grad_norm": 0.19140951335430145, "learning_rate": 0.002, "loss": 2.5832, "step": 51720 }, { "epoch": 0.10305766288410047, "grad_norm": 0.16502894461154938, "learning_rate": 0.002, "loss": 2.5671, "step": 51730 }, { "epoch": 0.10307758510773939, "grad_norm": 0.1805427074432373, "learning_rate": 0.002, "loss": 2.5795, "step": 51740 }, { "epoch": 0.1030975073313783, "grad_norm": 0.15176457166671753, "learning_rate": 0.002, "loss": 2.574, "step": 51750 }, { "epoch": 0.10311742955501721, "grad_norm": 0.14043974876403809, "learning_rate": 0.002, "loss": 2.5855, "step": 51760 }, { "epoch": 0.10313735177865613, "grad_norm": 0.1414804458618164, "learning_rate": 0.002, "loss": 2.5941, "step": 51770 }, { "epoch": 0.10315727400229505, "grad_norm": 0.18621769547462463, "learning_rate": 0.002, "loss": 2.5799, "step": 51780 }, { "epoch": 0.10317719622593395, "grad_norm": 0.15443609654903412, "learning_rate": 0.002, "loss": 2.5764, "step": 51790 }, { "epoch": 0.10319711844957287, "grad_norm": 0.14722242951393127, "learning_rate": 0.002, "loss": 2.5661, "step": 51800 }, { "epoch": 0.10321704067321179, "grad_norm": 0.1790482997894287, "learning_rate": 0.002, "loss": 2.5689, "step": 51810 }, { "epoch": 0.10323696289685069, "grad_norm": 0.18212194740772247, "learning_rate": 0.002, "loss": 2.5774, "step": 51820 }, { "epoch": 0.10325688512048961, "grad_norm": 0.14104700088500977, "learning_rate": 0.002, "loss": 2.5844, "step": 51830 }, { "epoch": 0.10327680734412852, "grad_norm": 0.15161915123462677, "learning_rate": 0.002, "loss": 2.5598, "step": 51840 }, { "epoch": 0.10329672956776743, "grad_norm": 0.21532447636127472, "learning_rate": 0.002, "loss": 2.5855, "step": 51850 }, { "epoch": 0.10331665179140635, "grad_norm": 0.15379326045513153, "learning_rate": 0.002, "loss": 2.5735, "step": 51860 }, { "epoch": 0.10333657401504526, "grad_norm": 0.1572919487953186, "learning_rate": 0.002, "loss": 2.586, "step": 51870 }, { "epoch": 0.10335649623868418, "grad_norm": 0.18330295383930206, "learning_rate": 0.002, "loss": 2.5587, "step": 51880 }, { "epoch": 0.1033764184623231, "grad_norm": 0.1655866652727127, "learning_rate": 0.002, "loss": 2.5956, "step": 51890 }, { "epoch": 0.103396340685962, "grad_norm": 0.18929095566272736, "learning_rate": 0.002, "loss": 2.5645, "step": 51900 }, { "epoch": 0.10341626290960092, "grad_norm": 0.14369188249111176, "learning_rate": 0.002, "loss": 2.5779, "step": 51910 }, { "epoch": 0.10343618513323984, "grad_norm": 0.1478201001882553, "learning_rate": 0.002, "loss": 2.5744, "step": 51920 }, { "epoch": 0.10345610735687874, "grad_norm": 0.1792897880077362, "learning_rate": 0.002, "loss": 2.5698, "step": 51930 }, { "epoch": 0.10347602958051766, "grad_norm": 0.16867724061012268, "learning_rate": 0.002, "loss": 2.58, "step": 51940 }, { "epoch": 0.10349595180415658, "grad_norm": 0.15028396248817444, "learning_rate": 0.002, "loss": 2.5668, "step": 51950 }, { "epoch": 0.10351587402779548, "grad_norm": 0.16923591494560242, "learning_rate": 0.002, "loss": 2.5818, "step": 51960 }, { "epoch": 0.1035357962514344, "grad_norm": 0.18927228450775146, "learning_rate": 0.002, "loss": 2.5534, "step": 51970 }, { "epoch": 0.10355571847507332, "grad_norm": 0.23954613506793976, "learning_rate": 0.002, "loss": 2.5754, "step": 51980 }, { "epoch": 0.10357564069871222, "grad_norm": 0.18370521068572998, "learning_rate": 0.002, "loss": 2.5711, "step": 51990 }, { "epoch": 0.10359556292235114, "grad_norm": 0.16181817650794983, "learning_rate": 0.002, "loss": 2.5875, "step": 52000 }, { "epoch": 0.10361548514599006, "grad_norm": 0.17266832292079926, "learning_rate": 0.002, "loss": 2.5933, "step": 52010 }, { "epoch": 0.10363540736962897, "grad_norm": 0.16297021508216858, "learning_rate": 0.002, "loss": 2.5792, "step": 52020 }, { "epoch": 0.10365532959326788, "grad_norm": 0.14542138576507568, "learning_rate": 0.002, "loss": 2.5867, "step": 52030 }, { "epoch": 0.1036752518169068, "grad_norm": 0.20756277441978455, "learning_rate": 0.002, "loss": 2.5636, "step": 52040 }, { "epoch": 0.1036951740405457, "grad_norm": 0.17656821012496948, "learning_rate": 0.002, "loss": 2.589, "step": 52050 }, { "epoch": 0.10371509626418463, "grad_norm": 0.15046556293964386, "learning_rate": 0.002, "loss": 2.5714, "step": 52060 }, { "epoch": 0.10373501848782354, "grad_norm": 0.13662956655025482, "learning_rate": 0.002, "loss": 2.5632, "step": 52070 }, { "epoch": 0.10375494071146245, "grad_norm": 0.2138286828994751, "learning_rate": 0.002, "loss": 2.5692, "step": 52080 }, { "epoch": 0.10377486293510137, "grad_norm": 0.15498541295528412, "learning_rate": 0.002, "loss": 2.5681, "step": 52090 }, { "epoch": 0.10379478515874027, "grad_norm": 0.1693347841501236, "learning_rate": 0.002, "loss": 2.5892, "step": 52100 }, { "epoch": 0.10381470738237919, "grad_norm": 0.1982731968164444, "learning_rate": 0.002, "loss": 2.5606, "step": 52110 }, { "epoch": 0.10383462960601811, "grad_norm": 0.2144494354724884, "learning_rate": 0.002, "loss": 2.5706, "step": 52120 }, { "epoch": 0.10385455182965701, "grad_norm": 0.17168928682804108, "learning_rate": 0.002, "loss": 2.5663, "step": 52130 }, { "epoch": 0.10387447405329593, "grad_norm": 0.1503278762102127, "learning_rate": 0.002, "loss": 2.5781, "step": 52140 }, { "epoch": 0.10389439627693485, "grad_norm": 0.16183273494243622, "learning_rate": 0.002, "loss": 2.5802, "step": 52150 }, { "epoch": 0.10391431850057375, "grad_norm": 0.17667703330516815, "learning_rate": 0.002, "loss": 2.5963, "step": 52160 }, { "epoch": 0.10393424072421267, "grad_norm": 0.19285671412944794, "learning_rate": 0.002, "loss": 2.574, "step": 52170 }, { "epoch": 0.10395416294785159, "grad_norm": 0.1473301351070404, "learning_rate": 0.002, "loss": 2.5729, "step": 52180 }, { "epoch": 0.1039740851714905, "grad_norm": 0.15170268714427948, "learning_rate": 0.002, "loss": 2.5615, "step": 52190 }, { "epoch": 0.10399400739512941, "grad_norm": 0.15979309380054474, "learning_rate": 0.002, "loss": 2.5717, "step": 52200 }, { "epoch": 0.10401392961876833, "grad_norm": 0.15202689170837402, "learning_rate": 0.002, "loss": 2.569, "step": 52210 }, { "epoch": 0.10403385184240724, "grad_norm": 0.17734190821647644, "learning_rate": 0.002, "loss": 2.5972, "step": 52220 }, { "epoch": 0.10405377406604616, "grad_norm": 0.13450901210308075, "learning_rate": 0.002, "loss": 2.5868, "step": 52230 }, { "epoch": 0.10407369628968507, "grad_norm": 0.19632044434547424, "learning_rate": 0.002, "loss": 2.5705, "step": 52240 }, { "epoch": 0.10409361851332398, "grad_norm": 0.14834535121917725, "learning_rate": 0.002, "loss": 2.573, "step": 52250 }, { "epoch": 0.1041135407369629, "grad_norm": 0.1480390876531601, "learning_rate": 0.002, "loss": 2.5893, "step": 52260 }, { "epoch": 0.10413346296060182, "grad_norm": 0.17472834885120392, "learning_rate": 0.002, "loss": 2.5803, "step": 52270 }, { "epoch": 0.10415338518424072, "grad_norm": 0.15323105454444885, "learning_rate": 0.002, "loss": 2.5792, "step": 52280 }, { "epoch": 0.10417330740787964, "grad_norm": 0.18388237059116364, "learning_rate": 0.002, "loss": 2.5686, "step": 52290 }, { "epoch": 0.10419322963151856, "grad_norm": 0.13833265006542206, "learning_rate": 0.002, "loss": 2.5727, "step": 52300 }, { "epoch": 0.10421315185515746, "grad_norm": 0.20508968830108643, "learning_rate": 0.002, "loss": 2.5766, "step": 52310 }, { "epoch": 0.10423307407879638, "grad_norm": 0.16256797313690186, "learning_rate": 0.002, "loss": 2.5689, "step": 52320 }, { "epoch": 0.10425299630243529, "grad_norm": 0.15074031054973602, "learning_rate": 0.002, "loss": 2.574, "step": 52330 }, { "epoch": 0.1042729185260742, "grad_norm": 0.15274305641651154, "learning_rate": 0.002, "loss": 2.5783, "step": 52340 }, { "epoch": 0.10429284074971312, "grad_norm": 0.17127250134944916, "learning_rate": 0.002, "loss": 2.58, "step": 52350 }, { "epoch": 0.10431276297335203, "grad_norm": 0.15931348502635956, "learning_rate": 0.002, "loss": 2.5823, "step": 52360 }, { "epoch": 0.10433268519699095, "grad_norm": 0.1784321367740631, "learning_rate": 0.002, "loss": 2.575, "step": 52370 }, { "epoch": 0.10435260742062986, "grad_norm": 0.1871524155139923, "learning_rate": 0.002, "loss": 2.5762, "step": 52380 }, { "epoch": 0.10437252964426877, "grad_norm": 0.1483502835035324, "learning_rate": 0.002, "loss": 2.5797, "step": 52390 }, { "epoch": 0.10439245186790769, "grad_norm": 0.1850375235080719, "learning_rate": 0.002, "loss": 2.5877, "step": 52400 }, { "epoch": 0.1044123740915466, "grad_norm": 0.17017801105976105, "learning_rate": 0.002, "loss": 2.5643, "step": 52410 }, { "epoch": 0.10443229631518551, "grad_norm": 0.17134183645248413, "learning_rate": 0.002, "loss": 2.5707, "step": 52420 }, { "epoch": 0.10445221853882443, "grad_norm": 0.15816353261470795, "learning_rate": 0.002, "loss": 2.5835, "step": 52430 }, { "epoch": 0.10447214076246335, "grad_norm": 0.17531700432300568, "learning_rate": 0.002, "loss": 2.5758, "step": 52440 }, { "epoch": 0.10449206298610225, "grad_norm": 0.1872791200876236, "learning_rate": 0.002, "loss": 2.5705, "step": 52450 }, { "epoch": 0.10451198520974117, "grad_norm": 0.20453713834285736, "learning_rate": 0.002, "loss": 2.569, "step": 52460 }, { "epoch": 0.10453190743338009, "grad_norm": 0.13987106084823608, "learning_rate": 0.002, "loss": 2.5798, "step": 52470 }, { "epoch": 0.104551829657019, "grad_norm": 0.17504587769508362, "learning_rate": 0.002, "loss": 2.5707, "step": 52480 }, { "epoch": 0.10457175188065791, "grad_norm": 0.19054412841796875, "learning_rate": 0.002, "loss": 2.5916, "step": 52490 }, { "epoch": 0.10459167410429683, "grad_norm": 0.14939993619918823, "learning_rate": 0.002, "loss": 2.5683, "step": 52500 }, { "epoch": 0.10461159632793574, "grad_norm": 0.20840269327163696, "learning_rate": 0.002, "loss": 2.5796, "step": 52510 }, { "epoch": 0.10463151855157465, "grad_norm": 0.18078581988811493, "learning_rate": 0.002, "loss": 2.5651, "step": 52520 }, { "epoch": 0.10465144077521357, "grad_norm": 0.18804368376731873, "learning_rate": 0.002, "loss": 2.5777, "step": 52530 }, { "epoch": 0.10467136299885248, "grad_norm": 0.167487233877182, "learning_rate": 0.002, "loss": 2.5865, "step": 52540 }, { "epoch": 0.1046912852224914, "grad_norm": 0.19505071640014648, "learning_rate": 0.002, "loss": 2.5904, "step": 52550 }, { "epoch": 0.10471120744613031, "grad_norm": 0.1634722650051117, "learning_rate": 0.002, "loss": 2.5831, "step": 52560 }, { "epoch": 0.10473112966976922, "grad_norm": 0.18016968667507172, "learning_rate": 0.002, "loss": 2.5934, "step": 52570 }, { "epoch": 0.10475105189340814, "grad_norm": 0.15329523384571075, "learning_rate": 0.002, "loss": 2.5685, "step": 52580 }, { "epoch": 0.10477097411704704, "grad_norm": 0.1578017920255661, "learning_rate": 0.002, "loss": 2.5634, "step": 52590 }, { "epoch": 0.10479089634068596, "grad_norm": 0.16866931319236755, "learning_rate": 0.002, "loss": 2.5648, "step": 52600 }, { "epoch": 0.10481081856432488, "grad_norm": 0.14351704716682434, "learning_rate": 0.002, "loss": 2.5919, "step": 52610 }, { "epoch": 0.10483074078796378, "grad_norm": 0.14871346950531006, "learning_rate": 0.002, "loss": 2.5769, "step": 52620 }, { "epoch": 0.1048506630116027, "grad_norm": 0.18444891273975372, "learning_rate": 0.002, "loss": 2.5882, "step": 52630 }, { "epoch": 0.10487058523524162, "grad_norm": 0.14990422129631042, "learning_rate": 0.002, "loss": 2.5677, "step": 52640 }, { "epoch": 0.10489050745888052, "grad_norm": 0.19132190942764282, "learning_rate": 0.002, "loss": 2.5774, "step": 52650 }, { "epoch": 0.10491042968251944, "grad_norm": 0.212691068649292, "learning_rate": 0.002, "loss": 2.5841, "step": 52660 }, { "epoch": 0.10493035190615836, "grad_norm": 0.20190729200839996, "learning_rate": 0.002, "loss": 2.6024, "step": 52670 }, { "epoch": 0.10495027412979727, "grad_norm": 0.15270838141441345, "learning_rate": 0.002, "loss": 2.5623, "step": 52680 }, { "epoch": 0.10497019635343618, "grad_norm": 0.1574941873550415, "learning_rate": 0.002, "loss": 2.561, "step": 52690 }, { "epoch": 0.1049901185770751, "grad_norm": 0.20290438830852509, "learning_rate": 0.002, "loss": 2.5938, "step": 52700 }, { "epoch": 0.10501004080071401, "grad_norm": 0.16913940012454987, "learning_rate": 0.002, "loss": 2.5723, "step": 52710 }, { "epoch": 0.10502996302435293, "grad_norm": 0.16730019450187683, "learning_rate": 0.002, "loss": 2.5744, "step": 52720 }, { "epoch": 0.10504988524799184, "grad_norm": 0.1701555997133255, "learning_rate": 0.002, "loss": 2.5567, "step": 52730 }, { "epoch": 0.10506980747163075, "grad_norm": 0.1464407593011856, "learning_rate": 0.002, "loss": 2.5871, "step": 52740 }, { "epoch": 0.10508972969526967, "grad_norm": 0.16085457801818848, "learning_rate": 0.002, "loss": 2.572, "step": 52750 }, { "epoch": 0.10510965191890859, "grad_norm": 0.16025619208812714, "learning_rate": 0.002, "loss": 2.5667, "step": 52760 }, { "epoch": 0.10512957414254749, "grad_norm": 0.16828739643096924, "learning_rate": 0.002, "loss": 2.5758, "step": 52770 }, { "epoch": 0.10514949636618641, "grad_norm": 0.1832427680492401, "learning_rate": 0.002, "loss": 2.5846, "step": 52780 }, { "epoch": 0.10516941858982533, "grad_norm": 0.18277552723884583, "learning_rate": 0.002, "loss": 2.574, "step": 52790 }, { "epoch": 0.10518934081346423, "grad_norm": 0.16989487409591675, "learning_rate": 0.002, "loss": 2.5556, "step": 52800 }, { "epoch": 0.10520926303710315, "grad_norm": 0.16103436052799225, "learning_rate": 0.002, "loss": 2.5749, "step": 52810 }, { "epoch": 0.10522918526074207, "grad_norm": 0.19317695498466492, "learning_rate": 0.002, "loss": 2.5818, "step": 52820 }, { "epoch": 0.10524910748438097, "grad_norm": 0.14930257201194763, "learning_rate": 0.002, "loss": 2.5757, "step": 52830 }, { "epoch": 0.10526902970801989, "grad_norm": 0.1682666540145874, "learning_rate": 0.002, "loss": 2.591, "step": 52840 }, { "epoch": 0.1052889519316588, "grad_norm": 0.1606859415769577, "learning_rate": 0.002, "loss": 2.5786, "step": 52850 }, { "epoch": 0.10530887415529772, "grad_norm": 0.17639650404453278, "learning_rate": 0.002, "loss": 2.5724, "step": 52860 }, { "epoch": 0.10532879637893663, "grad_norm": 0.14167457818984985, "learning_rate": 0.002, "loss": 2.5614, "step": 52870 }, { "epoch": 0.10534871860257554, "grad_norm": 0.17389091849327087, "learning_rate": 0.002, "loss": 2.5769, "step": 52880 }, { "epoch": 0.10536864082621446, "grad_norm": 0.17493778467178345, "learning_rate": 0.002, "loss": 2.5829, "step": 52890 }, { "epoch": 0.10538856304985338, "grad_norm": 0.18424925208091736, "learning_rate": 0.002, "loss": 2.5596, "step": 52900 }, { "epoch": 0.10540848527349228, "grad_norm": 0.17943623661994934, "learning_rate": 0.002, "loss": 2.5638, "step": 52910 }, { "epoch": 0.1054284074971312, "grad_norm": 0.1641630232334137, "learning_rate": 0.002, "loss": 2.588, "step": 52920 }, { "epoch": 0.10544832972077012, "grad_norm": 0.14452344179153442, "learning_rate": 0.002, "loss": 2.5731, "step": 52930 }, { "epoch": 0.10546825194440902, "grad_norm": 0.1818189173936844, "learning_rate": 0.002, "loss": 2.5966, "step": 52940 }, { "epoch": 0.10548817416804794, "grad_norm": 0.16518202424049377, "learning_rate": 0.002, "loss": 2.5961, "step": 52950 }, { "epoch": 0.10550809639168686, "grad_norm": 0.16774937510490417, "learning_rate": 0.002, "loss": 2.5702, "step": 52960 }, { "epoch": 0.10552801861532576, "grad_norm": 0.16449005901813507, "learning_rate": 0.002, "loss": 2.5668, "step": 52970 }, { "epoch": 0.10554794083896468, "grad_norm": 0.18152745068073273, "learning_rate": 0.002, "loss": 2.5742, "step": 52980 }, { "epoch": 0.1055678630626036, "grad_norm": 0.18268828094005585, "learning_rate": 0.002, "loss": 2.5759, "step": 52990 }, { "epoch": 0.1055877852862425, "grad_norm": 0.17604956030845642, "learning_rate": 0.002, "loss": 2.576, "step": 53000 }, { "epoch": 0.10560770750988142, "grad_norm": 0.13997232913970947, "learning_rate": 0.002, "loss": 2.5772, "step": 53010 }, { "epoch": 0.10562762973352034, "grad_norm": 0.20166952908039093, "learning_rate": 0.002, "loss": 2.5776, "step": 53020 }, { "epoch": 0.10564755195715925, "grad_norm": 0.1389542520046234, "learning_rate": 0.002, "loss": 2.5698, "step": 53030 }, { "epoch": 0.10566747418079817, "grad_norm": 0.15571464598178864, "learning_rate": 0.002, "loss": 2.5769, "step": 53040 }, { "epoch": 0.10568739640443708, "grad_norm": 0.13278134167194366, "learning_rate": 0.002, "loss": 2.576, "step": 53050 }, { "epoch": 0.10570731862807599, "grad_norm": 0.1494891494512558, "learning_rate": 0.002, "loss": 2.5732, "step": 53060 }, { "epoch": 0.10572724085171491, "grad_norm": 0.16463187336921692, "learning_rate": 0.002, "loss": 2.5676, "step": 53070 }, { "epoch": 0.10574716307535383, "grad_norm": 0.14763236045837402, "learning_rate": 0.002, "loss": 2.5722, "step": 53080 }, { "epoch": 0.10576708529899273, "grad_norm": 0.16572196781635284, "learning_rate": 0.002, "loss": 2.5795, "step": 53090 }, { "epoch": 0.10578700752263165, "grad_norm": 0.16226008534431458, "learning_rate": 0.002, "loss": 2.5804, "step": 53100 }, { "epoch": 0.10580692974627055, "grad_norm": 0.1736498922109604, "learning_rate": 0.002, "loss": 2.5748, "step": 53110 }, { "epoch": 0.10582685196990947, "grad_norm": 0.15901190042495728, "learning_rate": 0.002, "loss": 2.574, "step": 53120 }, { "epoch": 0.10584677419354839, "grad_norm": 0.15612094104290009, "learning_rate": 0.002, "loss": 2.5849, "step": 53130 }, { "epoch": 0.1058666964171873, "grad_norm": 0.1804787516593933, "learning_rate": 0.002, "loss": 2.5821, "step": 53140 }, { "epoch": 0.10588661864082621, "grad_norm": 0.21112528443336487, "learning_rate": 0.002, "loss": 2.5923, "step": 53150 }, { "epoch": 0.10590654086446513, "grad_norm": 0.1601332575082779, "learning_rate": 0.002, "loss": 2.5853, "step": 53160 }, { "epoch": 0.10592646308810404, "grad_norm": 0.17158164083957672, "learning_rate": 0.002, "loss": 2.6119, "step": 53170 }, { "epoch": 0.10594638531174295, "grad_norm": 0.18132387101650238, "learning_rate": 0.002, "loss": 2.5732, "step": 53180 }, { "epoch": 0.10596630753538187, "grad_norm": 0.15091487765312195, "learning_rate": 0.002, "loss": 2.5808, "step": 53190 }, { "epoch": 0.10598622975902078, "grad_norm": 0.13364222645759583, "learning_rate": 0.002, "loss": 2.5761, "step": 53200 }, { "epoch": 0.1060061519826597, "grad_norm": 0.22277627885341644, "learning_rate": 0.002, "loss": 2.5798, "step": 53210 }, { "epoch": 0.10602607420629862, "grad_norm": 0.15431572496891022, "learning_rate": 0.002, "loss": 2.5839, "step": 53220 }, { "epoch": 0.10604599642993752, "grad_norm": 0.14727891981601715, "learning_rate": 0.002, "loss": 2.5913, "step": 53230 }, { "epoch": 0.10606591865357644, "grad_norm": 0.1563219130039215, "learning_rate": 0.002, "loss": 2.5568, "step": 53240 }, { "epoch": 0.10608584087721536, "grad_norm": 0.14914819598197937, "learning_rate": 0.002, "loss": 2.574, "step": 53250 }, { "epoch": 0.10610576310085426, "grad_norm": 0.19004090130329132, "learning_rate": 0.002, "loss": 2.5652, "step": 53260 }, { "epoch": 0.10612568532449318, "grad_norm": 0.14557188749313354, "learning_rate": 0.002, "loss": 2.5756, "step": 53270 }, { "epoch": 0.1061456075481321, "grad_norm": 0.20784732699394226, "learning_rate": 0.002, "loss": 2.5724, "step": 53280 }, { "epoch": 0.106165529771771, "grad_norm": 0.1493583470582962, "learning_rate": 0.002, "loss": 2.5669, "step": 53290 }, { "epoch": 0.10618545199540992, "grad_norm": 0.1958567500114441, "learning_rate": 0.002, "loss": 2.5757, "step": 53300 }, { "epoch": 0.10620537421904884, "grad_norm": 0.1581474095582962, "learning_rate": 0.002, "loss": 2.5672, "step": 53310 }, { "epoch": 0.10622529644268774, "grad_norm": 0.16985438764095306, "learning_rate": 0.002, "loss": 2.5622, "step": 53320 }, { "epoch": 0.10624521866632666, "grad_norm": 0.17322316765785217, "learning_rate": 0.002, "loss": 2.5753, "step": 53330 }, { "epoch": 0.10626514088996557, "grad_norm": 0.15179245173931122, "learning_rate": 0.002, "loss": 2.563, "step": 53340 }, { "epoch": 0.10628506311360449, "grad_norm": 0.15617163479328156, "learning_rate": 0.002, "loss": 2.5856, "step": 53350 }, { "epoch": 0.1063049853372434, "grad_norm": 0.2143586426973343, "learning_rate": 0.002, "loss": 2.5891, "step": 53360 }, { "epoch": 0.10632490756088231, "grad_norm": 0.19730032980442047, "learning_rate": 0.002, "loss": 2.5574, "step": 53370 }, { "epoch": 0.10634482978452123, "grad_norm": 0.1480601280927658, "learning_rate": 0.002, "loss": 2.5659, "step": 53380 }, { "epoch": 0.10636475200816015, "grad_norm": 0.16654281318187714, "learning_rate": 0.002, "loss": 2.5763, "step": 53390 }, { "epoch": 0.10638467423179905, "grad_norm": 0.17769762873649597, "learning_rate": 0.002, "loss": 2.5754, "step": 53400 }, { "epoch": 0.10640459645543797, "grad_norm": 0.17181973159313202, "learning_rate": 0.002, "loss": 2.582, "step": 53410 }, { "epoch": 0.10642451867907689, "grad_norm": 0.17887651920318604, "learning_rate": 0.002, "loss": 2.5777, "step": 53420 }, { "epoch": 0.10644444090271579, "grad_norm": 0.1639140397310257, "learning_rate": 0.002, "loss": 2.5789, "step": 53430 }, { "epoch": 0.10646436312635471, "grad_norm": 0.17442618310451508, "learning_rate": 0.002, "loss": 2.5849, "step": 53440 }, { "epoch": 0.10648428534999363, "grad_norm": 0.17995116114616394, "learning_rate": 0.002, "loss": 2.5707, "step": 53450 }, { "epoch": 0.10650420757363253, "grad_norm": 0.15240736305713654, "learning_rate": 0.002, "loss": 2.5639, "step": 53460 }, { "epoch": 0.10652412979727145, "grad_norm": 0.15497863292694092, "learning_rate": 0.002, "loss": 2.5702, "step": 53470 }, { "epoch": 0.10654405202091037, "grad_norm": 0.16947989165782928, "learning_rate": 0.002, "loss": 2.5717, "step": 53480 }, { "epoch": 0.10656397424454928, "grad_norm": 0.14650389552116394, "learning_rate": 0.002, "loss": 2.5647, "step": 53490 }, { "epoch": 0.1065838964681882, "grad_norm": 0.16097570955753326, "learning_rate": 0.002, "loss": 2.5733, "step": 53500 }, { "epoch": 0.10660381869182711, "grad_norm": 0.18572257459163666, "learning_rate": 0.002, "loss": 2.568, "step": 53510 }, { "epoch": 0.10662374091546602, "grad_norm": 0.15297746658325195, "learning_rate": 0.002, "loss": 2.5811, "step": 53520 }, { "epoch": 0.10664366313910494, "grad_norm": 0.17090243101119995, "learning_rate": 0.002, "loss": 2.573, "step": 53530 }, { "epoch": 0.10666358536274385, "grad_norm": 0.15427421033382416, "learning_rate": 0.002, "loss": 2.5726, "step": 53540 }, { "epoch": 0.10668350758638276, "grad_norm": 0.1801782250404358, "learning_rate": 0.002, "loss": 2.5719, "step": 53550 }, { "epoch": 0.10670342981002168, "grad_norm": 0.13388150930404663, "learning_rate": 0.002, "loss": 2.5689, "step": 53560 }, { "epoch": 0.1067233520336606, "grad_norm": 0.15260739624500275, "learning_rate": 0.002, "loss": 2.5866, "step": 53570 }, { "epoch": 0.1067432742572995, "grad_norm": 0.1772858053445816, "learning_rate": 0.002, "loss": 2.5977, "step": 53580 }, { "epoch": 0.10676319648093842, "grad_norm": 0.1868247538805008, "learning_rate": 0.002, "loss": 2.5849, "step": 53590 }, { "epoch": 0.10678311870457732, "grad_norm": 0.1537046730518341, "learning_rate": 0.002, "loss": 2.578, "step": 53600 }, { "epoch": 0.10680304092821624, "grad_norm": 0.19427438080310822, "learning_rate": 0.002, "loss": 2.575, "step": 53610 }, { "epoch": 0.10682296315185516, "grad_norm": 0.1625223308801651, "learning_rate": 0.002, "loss": 2.581, "step": 53620 }, { "epoch": 0.10684288537549406, "grad_norm": 0.17887529730796814, "learning_rate": 0.002, "loss": 2.5887, "step": 53630 }, { "epoch": 0.10686280759913298, "grad_norm": 0.14366844296455383, "learning_rate": 0.002, "loss": 2.5738, "step": 53640 }, { "epoch": 0.1068827298227719, "grad_norm": 0.1593329906463623, "learning_rate": 0.002, "loss": 2.5712, "step": 53650 }, { "epoch": 0.1069026520464108, "grad_norm": 0.1838676780462265, "learning_rate": 0.002, "loss": 2.5754, "step": 53660 }, { "epoch": 0.10692257427004973, "grad_norm": 0.15554513037204742, "learning_rate": 0.002, "loss": 2.5682, "step": 53670 }, { "epoch": 0.10694249649368864, "grad_norm": 0.1570783406496048, "learning_rate": 0.002, "loss": 2.5763, "step": 53680 }, { "epoch": 0.10696241871732755, "grad_norm": 0.1867537945508957, "learning_rate": 0.002, "loss": 2.5712, "step": 53690 }, { "epoch": 0.10698234094096647, "grad_norm": 0.16741631925106049, "learning_rate": 0.002, "loss": 2.5565, "step": 53700 }, { "epoch": 0.10700226316460539, "grad_norm": 0.18099215626716614, "learning_rate": 0.002, "loss": 2.576, "step": 53710 }, { "epoch": 0.10702218538824429, "grad_norm": 0.1570865958929062, "learning_rate": 0.002, "loss": 2.5657, "step": 53720 }, { "epoch": 0.10704210761188321, "grad_norm": 0.15171921253204346, "learning_rate": 0.002, "loss": 2.5693, "step": 53730 }, { "epoch": 0.10706202983552213, "grad_norm": 0.18736791610717773, "learning_rate": 0.002, "loss": 2.5725, "step": 53740 }, { "epoch": 0.10708195205916103, "grad_norm": 0.15262581408023834, "learning_rate": 0.002, "loss": 2.5706, "step": 53750 }, { "epoch": 0.10710187428279995, "grad_norm": 0.18933913111686707, "learning_rate": 0.002, "loss": 2.5749, "step": 53760 }, { "epoch": 0.10712179650643887, "grad_norm": 0.15372221171855927, "learning_rate": 0.002, "loss": 2.5702, "step": 53770 }, { "epoch": 0.10714171873007777, "grad_norm": 0.15043431520462036, "learning_rate": 0.002, "loss": 2.5721, "step": 53780 }, { "epoch": 0.10716164095371669, "grad_norm": 0.1729842871427536, "learning_rate": 0.002, "loss": 2.5824, "step": 53790 }, { "epoch": 0.10718156317735561, "grad_norm": 0.1882009506225586, "learning_rate": 0.002, "loss": 2.5761, "step": 53800 }, { "epoch": 0.10720148540099451, "grad_norm": 0.14912964403629303, "learning_rate": 0.002, "loss": 2.5673, "step": 53810 }, { "epoch": 0.10722140762463343, "grad_norm": 0.17352353036403656, "learning_rate": 0.002, "loss": 2.5681, "step": 53820 }, { "epoch": 0.10724132984827235, "grad_norm": 0.184627965092659, "learning_rate": 0.002, "loss": 2.5886, "step": 53830 }, { "epoch": 0.10726125207191126, "grad_norm": 0.1541433334350586, "learning_rate": 0.002, "loss": 2.5634, "step": 53840 }, { "epoch": 0.10728117429555017, "grad_norm": 0.1627500355243683, "learning_rate": 0.002, "loss": 2.5662, "step": 53850 }, { "epoch": 0.10730109651918908, "grad_norm": 0.18465912342071533, "learning_rate": 0.002, "loss": 2.5821, "step": 53860 }, { "epoch": 0.107321018742828, "grad_norm": 0.15855570137500763, "learning_rate": 0.002, "loss": 2.5771, "step": 53870 }, { "epoch": 0.10734094096646692, "grad_norm": 0.1718076914548874, "learning_rate": 0.002, "loss": 2.5726, "step": 53880 }, { "epoch": 0.10736086319010582, "grad_norm": 0.17586791515350342, "learning_rate": 0.002, "loss": 2.5733, "step": 53890 }, { "epoch": 0.10738078541374474, "grad_norm": 0.14527453482151031, "learning_rate": 0.002, "loss": 2.5862, "step": 53900 }, { "epoch": 0.10740070763738366, "grad_norm": 0.20468974113464355, "learning_rate": 0.002, "loss": 2.5621, "step": 53910 }, { "epoch": 0.10742062986102256, "grad_norm": 0.16180795431137085, "learning_rate": 0.002, "loss": 2.5783, "step": 53920 }, { "epoch": 0.10744055208466148, "grad_norm": 0.16594411432743073, "learning_rate": 0.002, "loss": 2.5719, "step": 53930 }, { "epoch": 0.1074604743083004, "grad_norm": 0.1673121601343155, "learning_rate": 0.002, "loss": 2.5702, "step": 53940 }, { "epoch": 0.1074803965319393, "grad_norm": 0.15697045624256134, "learning_rate": 0.002, "loss": 2.5862, "step": 53950 }, { "epoch": 0.10750031875557822, "grad_norm": 0.17295962572097778, "learning_rate": 0.002, "loss": 2.5795, "step": 53960 }, { "epoch": 0.10752024097921714, "grad_norm": 0.1734103262424469, "learning_rate": 0.002, "loss": 2.5748, "step": 53970 }, { "epoch": 0.10754016320285605, "grad_norm": 0.156381294131279, "learning_rate": 0.002, "loss": 2.5814, "step": 53980 }, { "epoch": 0.10756008542649496, "grad_norm": 0.17702439427375793, "learning_rate": 0.002, "loss": 2.5686, "step": 53990 }, { "epoch": 0.10758000765013388, "grad_norm": 0.1738879680633545, "learning_rate": 0.002, "loss": 2.5553, "step": 54000 }, { "epoch": 0.10759992987377279, "grad_norm": 0.16494600474834442, "learning_rate": 0.002, "loss": 2.5856, "step": 54010 }, { "epoch": 0.1076198520974117, "grad_norm": 0.1375805288553238, "learning_rate": 0.002, "loss": 2.5761, "step": 54020 }, { "epoch": 0.10763977432105062, "grad_norm": 0.16121870279312134, "learning_rate": 0.002, "loss": 2.5737, "step": 54030 }, { "epoch": 0.10765969654468953, "grad_norm": 0.1483800858259201, "learning_rate": 0.002, "loss": 2.5634, "step": 54040 }, { "epoch": 0.10767961876832845, "grad_norm": 0.16286903619766235, "learning_rate": 0.002, "loss": 2.585, "step": 54050 }, { "epoch": 0.10769954099196737, "grad_norm": 0.1864442378282547, "learning_rate": 0.002, "loss": 2.5841, "step": 54060 }, { "epoch": 0.10771946321560627, "grad_norm": 0.1642676144838333, "learning_rate": 0.002, "loss": 2.5692, "step": 54070 }, { "epoch": 0.10773938543924519, "grad_norm": 0.14933769404888153, "learning_rate": 0.002, "loss": 2.5835, "step": 54080 }, { "epoch": 0.10775930766288411, "grad_norm": 0.1344839632511139, "learning_rate": 0.002, "loss": 2.5799, "step": 54090 }, { "epoch": 0.10777922988652301, "grad_norm": 0.16615156829357147, "learning_rate": 0.002, "loss": 2.5719, "step": 54100 }, { "epoch": 0.10779915211016193, "grad_norm": 0.16947969794273376, "learning_rate": 0.002, "loss": 2.5812, "step": 54110 }, { "epoch": 0.10781907433380084, "grad_norm": 0.1624169796705246, "learning_rate": 0.002, "loss": 2.5861, "step": 54120 }, { "epoch": 0.10783899655743975, "grad_norm": 0.15218976140022278, "learning_rate": 0.002, "loss": 2.5687, "step": 54130 }, { "epoch": 0.10785891878107867, "grad_norm": 0.17815525829792023, "learning_rate": 0.002, "loss": 2.5668, "step": 54140 }, { "epoch": 0.10787884100471758, "grad_norm": 0.17252716422080994, "learning_rate": 0.002, "loss": 2.5811, "step": 54150 }, { "epoch": 0.1078987632283565, "grad_norm": 0.17351260781288147, "learning_rate": 0.002, "loss": 2.5754, "step": 54160 }, { "epoch": 0.10791868545199541, "grad_norm": 0.1813732534646988, "learning_rate": 0.002, "loss": 2.5726, "step": 54170 }, { "epoch": 0.10793860767563432, "grad_norm": 0.17992441356182098, "learning_rate": 0.002, "loss": 2.5768, "step": 54180 }, { "epoch": 0.10795852989927324, "grad_norm": 0.16844600439071655, "learning_rate": 0.002, "loss": 2.59, "step": 54190 }, { "epoch": 0.10797845212291216, "grad_norm": 0.1522957682609558, "learning_rate": 0.002, "loss": 2.5795, "step": 54200 }, { "epoch": 0.10799837434655106, "grad_norm": 0.15333163738250732, "learning_rate": 0.002, "loss": 2.5655, "step": 54210 }, { "epoch": 0.10801829657018998, "grad_norm": 0.21686433255672455, "learning_rate": 0.002, "loss": 2.5732, "step": 54220 }, { "epoch": 0.1080382187938289, "grad_norm": 0.1638038158416748, "learning_rate": 0.002, "loss": 2.5826, "step": 54230 }, { "epoch": 0.1080581410174678, "grad_norm": 0.1533343493938446, "learning_rate": 0.002, "loss": 2.5735, "step": 54240 }, { "epoch": 0.10807806324110672, "grad_norm": 0.15761859714984894, "learning_rate": 0.002, "loss": 2.5872, "step": 54250 }, { "epoch": 0.10809798546474564, "grad_norm": 0.1736682504415512, "learning_rate": 0.002, "loss": 2.5753, "step": 54260 }, { "epoch": 0.10811790768838454, "grad_norm": 0.1512756198644638, "learning_rate": 0.002, "loss": 2.5695, "step": 54270 }, { "epoch": 0.10813782991202346, "grad_norm": 0.1684778928756714, "learning_rate": 0.002, "loss": 2.5902, "step": 54280 }, { "epoch": 0.10815775213566238, "grad_norm": 0.1624356210231781, "learning_rate": 0.002, "loss": 2.567, "step": 54290 }, { "epoch": 0.10817767435930128, "grad_norm": 0.1832011491060257, "learning_rate": 0.002, "loss": 2.5711, "step": 54300 }, { "epoch": 0.1081975965829402, "grad_norm": 0.16620144248008728, "learning_rate": 0.002, "loss": 2.5743, "step": 54310 }, { "epoch": 0.10821751880657912, "grad_norm": 0.15239332616329193, "learning_rate": 0.002, "loss": 2.567, "step": 54320 }, { "epoch": 0.10823744103021803, "grad_norm": 0.18253789842128754, "learning_rate": 0.002, "loss": 2.5793, "step": 54330 }, { "epoch": 0.10825736325385694, "grad_norm": 0.16783639788627625, "learning_rate": 0.002, "loss": 2.5661, "step": 54340 }, { "epoch": 0.10827728547749585, "grad_norm": 0.15817493200302124, "learning_rate": 0.002, "loss": 2.5732, "step": 54350 }, { "epoch": 0.10829720770113477, "grad_norm": 0.15933500230312347, "learning_rate": 0.002, "loss": 2.5633, "step": 54360 }, { "epoch": 0.10831712992477369, "grad_norm": 0.15165942907333374, "learning_rate": 0.002, "loss": 2.5668, "step": 54370 }, { "epoch": 0.10833705214841259, "grad_norm": 0.2191014140844345, "learning_rate": 0.002, "loss": 2.5602, "step": 54380 }, { "epoch": 0.10835697437205151, "grad_norm": 0.16080313920974731, "learning_rate": 0.002, "loss": 2.5784, "step": 54390 }, { "epoch": 0.10837689659569043, "grad_norm": 0.17371170222759247, "learning_rate": 0.002, "loss": 2.5739, "step": 54400 }, { "epoch": 0.10839681881932933, "grad_norm": 0.1766483634710312, "learning_rate": 0.002, "loss": 2.5909, "step": 54410 }, { "epoch": 0.10841674104296825, "grad_norm": 0.15184776484966278, "learning_rate": 0.002, "loss": 2.5748, "step": 54420 }, { "epoch": 0.10843666326660717, "grad_norm": 0.14017952978610992, "learning_rate": 0.002, "loss": 2.5744, "step": 54430 }, { "epoch": 0.10845658549024607, "grad_norm": 0.14030510187149048, "learning_rate": 0.002, "loss": 2.5749, "step": 54440 }, { "epoch": 0.10847650771388499, "grad_norm": 0.17916719615459442, "learning_rate": 0.002, "loss": 2.5677, "step": 54450 }, { "epoch": 0.10849642993752391, "grad_norm": 0.1507183462381363, "learning_rate": 0.002, "loss": 2.5854, "step": 54460 }, { "epoch": 0.10851635216116282, "grad_norm": 0.16273623704910278, "learning_rate": 0.002, "loss": 2.5764, "step": 54470 }, { "epoch": 0.10853627438480173, "grad_norm": 0.1506076604127884, "learning_rate": 0.002, "loss": 2.5621, "step": 54480 }, { "epoch": 0.10855619660844065, "grad_norm": 0.1500120609998703, "learning_rate": 0.002, "loss": 2.567, "step": 54490 }, { "epoch": 0.10857611883207956, "grad_norm": 0.2012033611536026, "learning_rate": 0.002, "loss": 2.5672, "step": 54500 }, { "epoch": 0.10859604105571848, "grad_norm": 0.15304172039031982, "learning_rate": 0.002, "loss": 2.5618, "step": 54510 }, { "epoch": 0.1086159632793574, "grad_norm": 0.18339362740516663, "learning_rate": 0.002, "loss": 2.5751, "step": 54520 }, { "epoch": 0.1086358855029963, "grad_norm": 0.26899829506874084, "learning_rate": 0.002, "loss": 2.5735, "step": 54530 }, { "epoch": 0.10865580772663522, "grad_norm": 0.15440136194229126, "learning_rate": 0.002, "loss": 2.5679, "step": 54540 }, { "epoch": 0.10867572995027414, "grad_norm": 0.18164002895355225, "learning_rate": 0.002, "loss": 2.5665, "step": 54550 }, { "epoch": 0.10869565217391304, "grad_norm": 0.15070606768131256, "learning_rate": 0.002, "loss": 2.5864, "step": 54560 }, { "epoch": 0.10871557439755196, "grad_norm": 0.20053400099277496, "learning_rate": 0.002, "loss": 2.5841, "step": 54570 }, { "epoch": 0.10873549662119088, "grad_norm": 0.16125057637691498, "learning_rate": 0.002, "loss": 2.5731, "step": 54580 }, { "epoch": 0.10875541884482978, "grad_norm": 0.23133055865764618, "learning_rate": 0.002, "loss": 2.5974, "step": 54590 }, { "epoch": 0.1087753410684687, "grad_norm": 0.15343934297561646, "learning_rate": 0.002, "loss": 2.574, "step": 54600 }, { "epoch": 0.1087952632921076, "grad_norm": 0.1522986739873886, "learning_rate": 0.002, "loss": 2.5691, "step": 54610 }, { "epoch": 0.10881518551574652, "grad_norm": 0.20660178363323212, "learning_rate": 0.002, "loss": 2.5741, "step": 54620 }, { "epoch": 0.10883510773938544, "grad_norm": 0.17137882113456726, "learning_rate": 0.002, "loss": 2.5786, "step": 54630 }, { "epoch": 0.10885502996302435, "grad_norm": 0.16056950390338898, "learning_rate": 0.002, "loss": 2.5655, "step": 54640 }, { "epoch": 0.10887495218666327, "grad_norm": 0.15671633183956146, "learning_rate": 0.002, "loss": 2.5794, "step": 54650 }, { "epoch": 0.10889487441030218, "grad_norm": 0.17362482845783234, "learning_rate": 0.002, "loss": 2.5812, "step": 54660 }, { "epoch": 0.10891479663394109, "grad_norm": 0.13231004774570465, "learning_rate": 0.002, "loss": 2.5939, "step": 54670 }, { "epoch": 0.10893471885758, "grad_norm": 0.16758976876735687, "learning_rate": 0.002, "loss": 2.5763, "step": 54680 }, { "epoch": 0.10895464108121893, "grad_norm": 0.1481105387210846, "learning_rate": 0.002, "loss": 2.5795, "step": 54690 }, { "epoch": 0.10897456330485783, "grad_norm": 0.13632671535015106, "learning_rate": 0.002, "loss": 2.5775, "step": 54700 }, { "epoch": 0.10899448552849675, "grad_norm": 0.18107256293296814, "learning_rate": 0.002, "loss": 2.583, "step": 54710 }, { "epoch": 0.10901440775213567, "grad_norm": 0.1667666882276535, "learning_rate": 0.002, "loss": 2.5729, "step": 54720 }, { "epoch": 0.10903432997577457, "grad_norm": 0.15656757354736328, "learning_rate": 0.002, "loss": 2.5672, "step": 54730 }, { "epoch": 0.10905425219941349, "grad_norm": 0.15127958357334137, "learning_rate": 0.002, "loss": 2.5702, "step": 54740 }, { "epoch": 0.10907417442305241, "grad_norm": 0.16704651713371277, "learning_rate": 0.002, "loss": 2.5645, "step": 54750 }, { "epoch": 0.10909409664669131, "grad_norm": 0.16816240549087524, "learning_rate": 0.002, "loss": 2.5794, "step": 54760 }, { "epoch": 0.10911401887033023, "grad_norm": 0.158869668841362, "learning_rate": 0.002, "loss": 2.5692, "step": 54770 }, { "epoch": 0.10913394109396915, "grad_norm": 0.1789892166852951, "learning_rate": 0.002, "loss": 2.5723, "step": 54780 }, { "epoch": 0.10915386331760805, "grad_norm": 0.177705779671669, "learning_rate": 0.002, "loss": 2.5838, "step": 54790 }, { "epoch": 0.10917378554124697, "grad_norm": 0.1841283142566681, "learning_rate": 0.002, "loss": 2.5826, "step": 54800 }, { "epoch": 0.10919370776488589, "grad_norm": 0.15305984020233154, "learning_rate": 0.002, "loss": 2.574, "step": 54810 }, { "epoch": 0.1092136299885248, "grad_norm": 0.15801286697387695, "learning_rate": 0.002, "loss": 2.5732, "step": 54820 }, { "epoch": 0.10923355221216371, "grad_norm": 0.17863290011882782, "learning_rate": 0.002, "loss": 2.578, "step": 54830 }, { "epoch": 0.10925347443580263, "grad_norm": 0.1787438541650772, "learning_rate": 0.002, "loss": 2.5759, "step": 54840 }, { "epoch": 0.10927339665944154, "grad_norm": 0.15538518130779266, "learning_rate": 0.002, "loss": 2.5808, "step": 54850 }, { "epoch": 0.10929331888308046, "grad_norm": 0.18065384030342102, "learning_rate": 0.002, "loss": 2.5818, "step": 54860 }, { "epoch": 0.10931324110671936, "grad_norm": 0.1502930074930191, "learning_rate": 0.002, "loss": 2.5619, "step": 54870 }, { "epoch": 0.10933316333035828, "grad_norm": 0.18379934132099152, "learning_rate": 0.002, "loss": 2.5554, "step": 54880 }, { "epoch": 0.1093530855539972, "grad_norm": 0.15294188261032104, "learning_rate": 0.002, "loss": 2.5672, "step": 54890 }, { "epoch": 0.1093730077776361, "grad_norm": 0.1333295851945877, "learning_rate": 0.002, "loss": 2.5655, "step": 54900 }, { "epoch": 0.10939293000127502, "grad_norm": 0.1695556789636612, "learning_rate": 0.002, "loss": 2.5756, "step": 54910 }, { "epoch": 0.10941285222491394, "grad_norm": 0.15794262290000916, "learning_rate": 0.002, "loss": 2.5663, "step": 54920 }, { "epoch": 0.10943277444855284, "grad_norm": 0.13879171013832092, "learning_rate": 0.002, "loss": 2.5631, "step": 54930 }, { "epoch": 0.10945269667219176, "grad_norm": 0.17803849279880524, "learning_rate": 0.002, "loss": 2.5638, "step": 54940 }, { "epoch": 0.10947261889583068, "grad_norm": 0.14903724193572998, "learning_rate": 0.002, "loss": 2.5739, "step": 54950 }, { "epoch": 0.10949254111946959, "grad_norm": 0.20035478472709656, "learning_rate": 0.002, "loss": 2.5868, "step": 54960 }, { "epoch": 0.1095124633431085, "grad_norm": 0.1704430729150772, "learning_rate": 0.002, "loss": 2.5725, "step": 54970 }, { "epoch": 0.10953238556674742, "grad_norm": 0.14600728452205658, "learning_rate": 0.002, "loss": 2.5731, "step": 54980 }, { "epoch": 0.10955230779038633, "grad_norm": 0.15426984429359436, "learning_rate": 0.002, "loss": 2.5769, "step": 54990 }, { "epoch": 0.10957223001402525, "grad_norm": 0.18793603777885437, "learning_rate": 0.002, "loss": 2.5904, "step": 55000 }, { "epoch": 0.10959215223766416, "grad_norm": 0.14750659465789795, "learning_rate": 0.002, "loss": 2.5692, "step": 55010 }, { "epoch": 0.10961207446130307, "grad_norm": 0.16656240820884705, "learning_rate": 0.002, "loss": 2.5856, "step": 55020 }, { "epoch": 0.10963199668494199, "grad_norm": 0.15964192152023315, "learning_rate": 0.002, "loss": 2.5807, "step": 55030 }, { "epoch": 0.1096519189085809, "grad_norm": 0.18421733379364014, "learning_rate": 0.002, "loss": 2.5652, "step": 55040 }, { "epoch": 0.10967184113221981, "grad_norm": 0.21212545037269592, "learning_rate": 0.002, "loss": 2.5904, "step": 55050 }, { "epoch": 0.10969176335585873, "grad_norm": 0.16346174478530884, "learning_rate": 0.002, "loss": 2.6035, "step": 55060 }, { "epoch": 0.10971168557949765, "grad_norm": 0.1481909155845642, "learning_rate": 0.002, "loss": 2.596, "step": 55070 }, { "epoch": 0.10973160780313655, "grad_norm": 0.18836583197116852, "learning_rate": 0.002, "loss": 2.578, "step": 55080 }, { "epoch": 0.10975153002677547, "grad_norm": 0.15447524189949036, "learning_rate": 0.002, "loss": 2.5744, "step": 55090 }, { "epoch": 0.10977145225041439, "grad_norm": 0.18371862173080444, "learning_rate": 0.002, "loss": 2.5726, "step": 55100 }, { "epoch": 0.1097913744740533, "grad_norm": 0.1547946333885193, "learning_rate": 0.002, "loss": 2.5817, "step": 55110 }, { "epoch": 0.10981129669769221, "grad_norm": 0.1478741317987442, "learning_rate": 0.002, "loss": 2.5621, "step": 55120 }, { "epoch": 0.10983121892133112, "grad_norm": 0.16379697620868683, "learning_rate": 0.002, "loss": 2.5669, "step": 55130 }, { "epoch": 0.10985114114497004, "grad_norm": 0.15486179292201996, "learning_rate": 0.002, "loss": 2.5784, "step": 55140 }, { "epoch": 0.10987106336860895, "grad_norm": 0.16865962743759155, "learning_rate": 0.002, "loss": 2.5699, "step": 55150 }, { "epoch": 0.10989098559224786, "grad_norm": 0.1414819359779358, "learning_rate": 0.002, "loss": 2.5595, "step": 55160 }, { "epoch": 0.10991090781588678, "grad_norm": 0.1621890664100647, "learning_rate": 0.002, "loss": 2.5842, "step": 55170 }, { "epoch": 0.1099308300395257, "grad_norm": 0.14492730796337128, "learning_rate": 0.002, "loss": 2.5734, "step": 55180 }, { "epoch": 0.1099507522631646, "grad_norm": 0.22918541729450226, "learning_rate": 0.002, "loss": 2.5821, "step": 55190 }, { "epoch": 0.10997067448680352, "grad_norm": 0.14911381900310516, "learning_rate": 0.002, "loss": 2.5833, "step": 55200 }, { "epoch": 0.10999059671044244, "grad_norm": 0.16141662001609802, "learning_rate": 0.002, "loss": 2.5772, "step": 55210 }, { "epoch": 0.11001051893408134, "grad_norm": 0.1468731313943863, "learning_rate": 0.002, "loss": 2.5662, "step": 55220 }, { "epoch": 0.11003044115772026, "grad_norm": 0.1578463315963745, "learning_rate": 0.002, "loss": 2.5928, "step": 55230 }, { "epoch": 0.11005036338135918, "grad_norm": 0.18097195029258728, "learning_rate": 0.002, "loss": 2.5621, "step": 55240 }, { "epoch": 0.11007028560499808, "grad_norm": 0.1656266152858734, "learning_rate": 0.002, "loss": 2.5985, "step": 55250 }, { "epoch": 0.110090207828637, "grad_norm": 0.16207429766654968, "learning_rate": 0.002, "loss": 2.5754, "step": 55260 }, { "epoch": 0.11011013005227592, "grad_norm": 0.18752454221248627, "learning_rate": 0.002, "loss": 2.5565, "step": 55270 }, { "epoch": 0.11013005227591482, "grad_norm": 0.1675509363412857, "learning_rate": 0.002, "loss": 2.5694, "step": 55280 }, { "epoch": 0.11014997449955374, "grad_norm": 0.15102185308933258, "learning_rate": 0.002, "loss": 2.5757, "step": 55290 }, { "epoch": 0.11016989672319266, "grad_norm": 0.1643625944852829, "learning_rate": 0.002, "loss": 2.5722, "step": 55300 }, { "epoch": 0.11018981894683157, "grad_norm": 0.1605243682861328, "learning_rate": 0.002, "loss": 2.5807, "step": 55310 }, { "epoch": 0.11020974117047048, "grad_norm": 0.16351105272769928, "learning_rate": 0.002, "loss": 2.579, "step": 55320 }, { "epoch": 0.1102296633941094, "grad_norm": 0.16836588084697723, "learning_rate": 0.002, "loss": 2.5745, "step": 55330 }, { "epoch": 0.11024958561774831, "grad_norm": 0.19601887464523315, "learning_rate": 0.002, "loss": 2.5856, "step": 55340 }, { "epoch": 0.11026950784138723, "grad_norm": 0.1551598161458969, "learning_rate": 0.002, "loss": 2.5781, "step": 55350 }, { "epoch": 0.11028943006502613, "grad_norm": 0.15072135627269745, "learning_rate": 0.002, "loss": 2.5849, "step": 55360 }, { "epoch": 0.11030935228866505, "grad_norm": 0.18053703010082245, "learning_rate": 0.002, "loss": 2.5808, "step": 55370 }, { "epoch": 0.11032927451230397, "grad_norm": 0.1812949925661087, "learning_rate": 0.002, "loss": 2.5773, "step": 55380 }, { "epoch": 0.11034919673594287, "grad_norm": 0.1453130543231964, "learning_rate": 0.002, "loss": 2.5866, "step": 55390 }, { "epoch": 0.11036911895958179, "grad_norm": 0.1728980839252472, "learning_rate": 0.002, "loss": 2.5655, "step": 55400 }, { "epoch": 0.11038904118322071, "grad_norm": 0.1465320736169815, "learning_rate": 0.002, "loss": 2.5764, "step": 55410 }, { "epoch": 0.11040896340685961, "grad_norm": 0.1420043706893921, "learning_rate": 0.002, "loss": 2.5771, "step": 55420 }, { "epoch": 0.11042888563049853, "grad_norm": 0.2074752151966095, "learning_rate": 0.002, "loss": 2.5709, "step": 55430 }, { "epoch": 0.11044880785413745, "grad_norm": 0.15165629982948303, "learning_rate": 0.002, "loss": 2.5788, "step": 55440 }, { "epoch": 0.11046873007777636, "grad_norm": 0.15817593038082123, "learning_rate": 0.002, "loss": 2.5919, "step": 55450 }, { "epoch": 0.11048865230141527, "grad_norm": 0.1503429263830185, "learning_rate": 0.002, "loss": 2.5724, "step": 55460 }, { "epoch": 0.11050857452505419, "grad_norm": 0.14497476816177368, "learning_rate": 0.002, "loss": 2.5734, "step": 55470 }, { "epoch": 0.1105284967486931, "grad_norm": 0.1717585027217865, "learning_rate": 0.002, "loss": 2.5653, "step": 55480 }, { "epoch": 0.11054841897233202, "grad_norm": 0.16129069030284882, "learning_rate": 0.002, "loss": 2.5801, "step": 55490 }, { "epoch": 0.11056834119597093, "grad_norm": 0.16348066926002502, "learning_rate": 0.002, "loss": 2.5834, "step": 55500 }, { "epoch": 0.11058826341960984, "grad_norm": 0.17536437511444092, "learning_rate": 0.002, "loss": 2.5891, "step": 55510 }, { "epoch": 0.11060818564324876, "grad_norm": 0.1613079160451889, "learning_rate": 0.002, "loss": 2.5815, "step": 55520 }, { "epoch": 0.11062810786688768, "grad_norm": 0.16291126608848572, "learning_rate": 0.002, "loss": 2.5655, "step": 55530 }, { "epoch": 0.11064803009052658, "grad_norm": 0.1449456661939621, "learning_rate": 0.002, "loss": 2.5785, "step": 55540 }, { "epoch": 0.1106679523141655, "grad_norm": 0.20725728571414948, "learning_rate": 0.002, "loss": 2.5782, "step": 55550 }, { "epoch": 0.11068787453780442, "grad_norm": 0.1631866842508316, "learning_rate": 0.002, "loss": 2.5771, "step": 55560 }, { "epoch": 0.11070779676144332, "grad_norm": 0.1567074954509735, "learning_rate": 0.002, "loss": 2.5614, "step": 55570 }, { "epoch": 0.11072771898508224, "grad_norm": 0.1616877317428589, "learning_rate": 0.002, "loss": 2.5902, "step": 55580 }, { "epoch": 0.11074764120872116, "grad_norm": 0.1797361522912979, "learning_rate": 0.002, "loss": 2.5765, "step": 55590 }, { "epoch": 0.11076756343236006, "grad_norm": 0.14278073608875275, "learning_rate": 0.002, "loss": 2.5788, "step": 55600 }, { "epoch": 0.11078748565599898, "grad_norm": 0.16108904778957367, "learning_rate": 0.002, "loss": 2.5662, "step": 55610 }, { "epoch": 0.11080740787963789, "grad_norm": 0.14384210109710693, "learning_rate": 0.002, "loss": 2.5834, "step": 55620 }, { "epoch": 0.1108273301032768, "grad_norm": 0.15617577731609344, "learning_rate": 0.002, "loss": 2.5736, "step": 55630 }, { "epoch": 0.11084725232691572, "grad_norm": 0.18518409132957458, "learning_rate": 0.002, "loss": 2.5766, "step": 55640 }, { "epoch": 0.11086717455055463, "grad_norm": 0.14896829426288605, "learning_rate": 0.002, "loss": 2.5622, "step": 55650 }, { "epoch": 0.11088709677419355, "grad_norm": 0.157521590590477, "learning_rate": 0.002, "loss": 2.5748, "step": 55660 }, { "epoch": 0.11090701899783247, "grad_norm": 0.19075095653533936, "learning_rate": 0.002, "loss": 2.5669, "step": 55670 }, { "epoch": 0.11092694122147137, "grad_norm": 0.16961756348609924, "learning_rate": 0.002, "loss": 2.575, "step": 55680 }, { "epoch": 0.11094686344511029, "grad_norm": 0.14213275909423828, "learning_rate": 0.002, "loss": 2.5688, "step": 55690 }, { "epoch": 0.11096678566874921, "grad_norm": 0.16898386180400848, "learning_rate": 0.002, "loss": 2.5898, "step": 55700 }, { "epoch": 0.11098670789238811, "grad_norm": 0.16673065721988678, "learning_rate": 0.002, "loss": 2.5808, "step": 55710 }, { "epoch": 0.11100663011602703, "grad_norm": 0.17064030468463898, "learning_rate": 0.002, "loss": 2.5706, "step": 55720 }, { "epoch": 0.11102655233966595, "grad_norm": 0.16003814339637756, "learning_rate": 0.002, "loss": 2.58, "step": 55730 }, { "epoch": 0.11104647456330485, "grad_norm": 0.16412734985351562, "learning_rate": 0.002, "loss": 2.5667, "step": 55740 }, { "epoch": 0.11106639678694377, "grad_norm": 0.18339525163173676, "learning_rate": 0.002, "loss": 2.5647, "step": 55750 }, { "epoch": 0.11108631901058269, "grad_norm": 0.17967242002487183, "learning_rate": 0.002, "loss": 2.5711, "step": 55760 }, { "epoch": 0.1111062412342216, "grad_norm": 0.1652657389640808, "learning_rate": 0.002, "loss": 2.5728, "step": 55770 }, { "epoch": 0.11112616345786051, "grad_norm": 0.2223317176103592, "learning_rate": 0.002, "loss": 2.5806, "step": 55780 }, { "epoch": 0.11114608568149943, "grad_norm": 0.16805259883403778, "learning_rate": 0.002, "loss": 2.5668, "step": 55790 }, { "epoch": 0.11116600790513834, "grad_norm": 0.14735957980155945, "learning_rate": 0.002, "loss": 2.5708, "step": 55800 }, { "epoch": 0.11118593012877726, "grad_norm": 0.19374850392341614, "learning_rate": 0.002, "loss": 2.5601, "step": 55810 }, { "epoch": 0.11120585235241617, "grad_norm": 0.15546241402626038, "learning_rate": 0.002, "loss": 2.5698, "step": 55820 }, { "epoch": 0.11122577457605508, "grad_norm": 0.1416267454624176, "learning_rate": 0.002, "loss": 2.5775, "step": 55830 }, { "epoch": 0.111245696799694, "grad_norm": 0.18586914241313934, "learning_rate": 0.002, "loss": 2.5809, "step": 55840 }, { "epoch": 0.11126561902333292, "grad_norm": 0.1795649379491806, "learning_rate": 0.002, "loss": 2.5748, "step": 55850 }, { "epoch": 0.11128554124697182, "grad_norm": 0.15548159182071686, "learning_rate": 0.002, "loss": 2.5692, "step": 55860 }, { "epoch": 0.11130546347061074, "grad_norm": 0.1966884434223175, "learning_rate": 0.002, "loss": 2.5804, "step": 55870 }, { "epoch": 0.11132538569424964, "grad_norm": 0.17062509059906006, "learning_rate": 0.002, "loss": 2.5839, "step": 55880 }, { "epoch": 0.11134530791788856, "grad_norm": 0.13429319858551025, "learning_rate": 0.002, "loss": 2.5788, "step": 55890 }, { "epoch": 0.11136523014152748, "grad_norm": 0.18220160901546478, "learning_rate": 0.002, "loss": 2.5748, "step": 55900 }, { "epoch": 0.11138515236516638, "grad_norm": 0.20258423686027527, "learning_rate": 0.002, "loss": 2.5833, "step": 55910 }, { "epoch": 0.1114050745888053, "grad_norm": 0.1568852812051773, "learning_rate": 0.002, "loss": 2.5764, "step": 55920 }, { "epoch": 0.11142499681244422, "grad_norm": 0.1255771964788437, "learning_rate": 0.002, "loss": 2.5764, "step": 55930 }, { "epoch": 0.11144491903608313, "grad_norm": 0.14587287604808807, "learning_rate": 0.002, "loss": 2.5774, "step": 55940 }, { "epoch": 0.11146484125972204, "grad_norm": 0.16237008571624756, "learning_rate": 0.002, "loss": 2.584, "step": 55950 }, { "epoch": 0.11148476348336096, "grad_norm": 0.16900306940078735, "learning_rate": 0.002, "loss": 2.5866, "step": 55960 }, { "epoch": 0.11150468570699987, "grad_norm": 0.15949910879135132, "learning_rate": 0.002, "loss": 2.5732, "step": 55970 }, { "epoch": 0.11152460793063879, "grad_norm": 0.1493978500366211, "learning_rate": 0.002, "loss": 2.5783, "step": 55980 }, { "epoch": 0.1115445301542777, "grad_norm": 0.16600175201892853, "learning_rate": 0.002, "loss": 2.5775, "step": 55990 }, { "epoch": 0.11156445237791661, "grad_norm": 0.1736292541027069, "learning_rate": 0.002, "loss": 2.563, "step": 56000 }, { "epoch": 0.11158437460155553, "grad_norm": 0.17871299386024475, "learning_rate": 0.002, "loss": 2.592, "step": 56010 }, { "epoch": 0.11160429682519445, "grad_norm": 0.17280587553977966, "learning_rate": 0.002, "loss": 2.5628, "step": 56020 }, { "epoch": 0.11162421904883335, "grad_norm": 0.21848857402801514, "learning_rate": 0.002, "loss": 2.5748, "step": 56030 }, { "epoch": 0.11164414127247227, "grad_norm": 0.15628333389759064, "learning_rate": 0.002, "loss": 2.5716, "step": 56040 }, { "epoch": 0.11166406349611119, "grad_norm": 0.20310863852500916, "learning_rate": 0.002, "loss": 2.5761, "step": 56050 }, { "epoch": 0.11168398571975009, "grad_norm": 0.1604943424463272, "learning_rate": 0.002, "loss": 2.5636, "step": 56060 }, { "epoch": 0.11170390794338901, "grad_norm": 0.16900110244750977, "learning_rate": 0.002, "loss": 2.5697, "step": 56070 }, { "epoch": 0.11172383016702793, "grad_norm": 0.15433835983276367, "learning_rate": 0.002, "loss": 2.5814, "step": 56080 }, { "epoch": 0.11174375239066683, "grad_norm": 0.13957196474075317, "learning_rate": 0.002, "loss": 2.5784, "step": 56090 }, { "epoch": 0.11176367461430575, "grad_norm": 0.17918574810028076, "learning_rate": 0.002, "loss": 2.588, "step": 56100 }, { "epoch": 0.11178359683794467, "grad_norm": 0.14243613183498383, "learning_rate": 0.002, "loss": 2.5672, "step": 56110 }, { "epoch": 0.11180351906158358, "grad_norm": 0.15067048370838165, "learning_rate": 0.002, "loss": 2.5648, "step": 56120 }, { "epoch": 0.1118234412852225, "grad_norm": 0.20827730000019073, "learning_rate": 0.002, "loss": 2.5634, "step": 56130 }, { "epoch": 0.1118433635088614, "grad_norm": 0.14000320434570312, "learning_rate": 0.002, "loss": 2.5798, "step": 56140 }, { "epoch": 0.11186328573250032, "grad_norm": 0.1830059289932251, "learning_rate": 0.002, "loss": 2.5663, "step": 56150 }, { "epoch": 0.11188320795613924, "grad_norm": 0.14975765347480774, "learning_rate": 0.002, "loss": 2.584, "step": 56160 }, { "epoch": 0.11190313017977814, "grad_norm": 0.1708276867866516, "learning_rate": 0.002, "loss": 2.5524, "step": 56170 }, { "epoch": 0.11192305240341706, "grad_norm": 0.15937314927577972, "learning_rate": 0.002, "loss": 2.5624, "step": 56180 }, { "epoch": 0.11194297462705598, "grad_norm": 0.1750740110874176, "learning_rate": 0.002, "loss": 2.582, "step": 56190 }, { "epoch": 0.11196289685069488, "grad_norm": 0.17269493639469147, "learning_rate": 0.002, "loss": 2.575, "step": 56200 }, { "epoch": 0.1119828190743338, "grad_norm": 0.20254471898078918, "learning_rate": 0.002, "loss": 2.5577, "step": 56210 }, { "epoch": 0.11200274129797272, "grad_norm": 0.18666931986808777, "learning_rate": 0.002, "loss": 2.5897, "step": 56220 }, { "epoch": 0.11202266352161162, "grad_norm": 0.17361962795257568, "learning_rate": 0.002, "loss": 2.5653, "step": 56230 }, { "epoch": 0.11204258574525054, "grad_norm": 0.1619705855846405, "learning_rate": 0.002, "loss": 2.5805, "step": 56240 }, { "epoch": 0.11206250796888946, "grad_norm": 0.14421096444129944, "learning_rate": 0.002, "loss": 2.5763, "step": 56250 }, { "epoch": 0.11208243019252837, "grad_norm": 0.17133770883083344, "learning_rate": 0.002, "loss": 2.5737, "step": 56260 }, { "epoch": 0.11210235241616728, "grad_norm": 0.18093673884868622, "learning_rate": 0.002, "loss": 2.577, "step": 56270 }, { "epoch": 0.1121222746398062, "grad_norm": 0.17828403413295746, "learning_rate": 0.002, "loss": 2.5795, "step": 56280 }, { "epoch": 0.1121421968634451, "grad_norm": 0.15027420222759247, "learning_rate": 0.002, "loss": 2.5733, "step": 56290 }, { "epoch": 0.11216211908708403, "grad_norm": 0.15831901133060455, "learning_rate": 0.002, "loss": 2.5656, "step": 56300 }, { "epoch": 0.11218204131072294, "grad_norm": 0.15856464207172394, "learning_rate": 0.002, "loss": 2.5649, "step": 56310 }, { "epoch": 0.11220196353436185, "grad_norm": 0.16424888372421265, "learning_rate": 0.002, "loss": 2.5825, "step": 56320 }, { "epoch": 0.11222188575800077, "grad_norm": 0.17677675187587738, "learning_rate": 0.002, "loss": 2.5624, "step": 56330 }, { "epoch": 0.11224180798163969, "grad_norm": 0.18511749804019928, "learning_rate": 0.002, "loss": 2.5596, "step": 56340 }, { "epoch": 0.11226173020527859, "grad_norm": 0.17145194113254547, "learning_rate": 0.002, "loss": 2.5665, "step": 56350 }, { "epoch": 0.11228165242891751, "grad_norm": 0.13818025588989258, "learning_rate": 0.002, "loss": 2.5625, "step": 56360 }, { "epoch": 0.11230157465255641, "grad_norm": 0.1698409616947174, "learning_rate": 0.002, "loss": 2.5893, "step": 56370 }, { "epoch": 0.11232149687619533, "grad_norm": 0.15585434436798096, "learning_rate": 0.002, "loss": 2.5618, "step": 56380 }, { "epoch": 0.11234141909983425, "grad_norm": 0.14770002663135529, "learning_rate": 0.002, "loss": 2.5565, "step": 56390 }, { "epoch": 0.11236134132347315, "grad_norm": 0.17941893637180328, "learning_rate": 0.002, "loss": 2.5865, "step": 56400 }, { "epoch": 0.11238126354711207, "grad_norm": 0.16888384521007538, "learning_rate": 0.002, "loss": 2.5645, "step": 56410 }, { "epoch": 0.11240118577075099, "grad_norm": 0.15097305178642273, "learning_rate": 0.002, "loss": 2.5732, "step": 56420 }, { "epoch": 0.1124211079943899, "grad_norm": 0.16048173606395721, "learning_rate": 0.002, "loss": 2.5743, "step": 56430 }, { "epoch": 0.11244103021802881, "grad_norm": 0.16504868865013123, "learning_rate": 0.002, "loss": 2.5724, "step": 56440 }, { "epoch": 0.11246095244166773, "grad_norm": 0.1550481915473938, "learning_rate": 0.002, "loss": 2.5596, "step": 56450 }, { "epoch": 0.11248087466530664, "grad_norm": 0.17106691002845764, "learning_rate": 0.002, "loss": 2.5662, "step": 56460 }, { "epoch": 0.11250079688894556, "grad_norm": 0.16492052376270294, "learning_rate": 0.002, "loss": 2.5711, "step": 56470 }, { "epoch": 0.11252071911258447, "grad_norm": 0.16045039892196655, "learning_rate": 0.002, "loss": 2.5609, "step": 56480 }, { "epoch": 0.11254064133622338, "grad_norm": 0.155086487531662, "learning_rate": 0.002, "loss": 2.5702, "step": 56490 }, { "epoch": 0.1125605635598623, "grad_norm": 0.16356757283210754, "learning_rate": 0.002, "loss": 2.5829, "step": 56500 }, { "epoch": 0.11258048578350122, "grad_norm": 0.21073469519615173, "learning_rate": 0.002, "loss": 2.5782, "step": 56510 }, { "epoch": 0.11260040800714012, "grad_norm": 0.15579678118228912, "learning_rate": 0.002, "loss": 2.5737, "step": 56520 }, { "epoch": 0.11262033023077904, "grad_norm": 0.1871858537197113, "learning_rate": 0.002, "loss": 2.613, "step": 56530 }, { "epoch": 0.11264025245441796, "grad_norm": 0.16443894803524017, "learning_rate": 0.002, "loss": 2.5546, "step": 56540 }, { "epoch": 0.11266017467805686, "grad_norm": 0.15937557816505432, "learning_rate": 0.002, "loss": 2.5755, "step": 56550 }, { "epoch": 0.11268009690169578, "grad_norm": 0.1835545152425766, "learning_rate": 0.002, "loss": 2.5799, "step": 56560 }, { "epoch": 0.1127000191253347, "grad_norm": 0.1497005820274353, "learning_rate": 0.002, "loss": 2.5815, "step": 56570 }, { "epoch": 0.1127199413489736, "grad_norm": 0.19720299541950226, "learning_rate": 0.002, "loss": 2.5718, "step": 56580 }, { "epoch": 0.11273986357261252, "grad_norm": 0.14169961214065552, "learning_rate": 0.002, "loss": 2.5738, "step": 56590 }, { "epoch": 0.11275978579625144, "grad_norm": 0.14967471361160278, "learning_rate": 0.002, "loss": 2.575, "step": 56600 }, { "epoch": 0.11277970801989035, "grad_norm": 0.16861401498317719, "learning_rate": 0.002, "loss": 2.5721, "step": 56610 }, { "epoch": 0.11279963024352926, "grad_norm": 0.15939170122146606, "learning_rate": 0.002, "loss": 2.572, "step": 56620 }, { "epoch": 0.11281955246716817, "grad_norm": 0.15200160443782806, "learning_rate": 0.002, "loss": 2.5739, "step": 56630 }, { "epoch": 0.11283947469080709, "grad_norm": 0.15361455082893372, "learning_rate": 0.002, "loss": 2.5763, "step": 56640 }, { "epoch": 0.112859396914446, "grad_norm": 0.19822685420513153, "learning_rate": 0.002, "loss": 2.5751, "step": 56650 }, { "epoch": 0.11287931913808491, "grad_norm": 0.16046352684497833, "learning_rate": 0.002, "loss": 2.5657, "step": 56660 }, { "epoch": 0.11289924136172383, "grad_norm": 0.16401734948158264, "learning_rate": 0.002, "loss": 2.5513, "step": 56670 }, { "epoch": 0.11291916358536275, "grad_norm": 0.16806760430335999, "learning_rate": 0.002, "loss": 2.5712, "step": 56680 }, { "epoch": 0.11293908580900165, "grad_norm": 0.15561121702194214, "learning_rate": 0.002, "loss": 2.5493, "step": 56690 }, { "epoch": 0.11295900803264057, "grad_norm": 0.13792727887630463, "learning_rate": 0.002, "loss": 2.583, "step": 56700 }, { "epoch": 0.11297893025627949, "grad_norm": 0.17200349271297455, "learning_rate": 0.002, "loss": 2.5788, "step": 56710 }, { "epoch": 0.1129988524799184, "grad_norm": 0.13469314575195312, "learning_rate": 0.002, "loss": 2.5588, "step": 56720 }, { "epoch": 0.11301877470355731, "grad_norm": 0.14766685664653778, "learning_rate": 0.002, "loss": 2.5685, "step": 56730 }, { "epoch": 0.11303869692719623, "grad_norm": 0.20443420112133026, "learning_rate": 0.002, "loss": 2.5748, "step": 56740 }, { "epoch": 0.11305861915083514, "grad_norm": 0.1525326818227768, "learning_rate": 0.002, "loss": 2.5701, "step": 56750 }, { "epoch": 0.11307854137447405, "grad_norm": 0.1887425184249878, "learning_rate": 0.002, "loss": 2.5743, "step": 56760 }, { "epoch": 0.11309846359811297, "grad_norm": 0.15838472545146942, "learning_rate": 0.002, "loss": 2.5807, "step": 56770 }, { "epoch": 0.11311838582175188, "grad_norm": 0.16052474081516266, "learning_rate": 0.002, "loss": 2.5769, "step": 56780 }, { "epoch": 0.1131383080453908, "grad_norm": 0.16309227049350739, "learning_rate": 0.002, "loss": 2.5865, "step": 56790 }, { "epoch": 0.11315823026902971, "grad_norm": 0.21936534345149994, "learning_rate": 0.002, "loss": 2.5576, "step": 56800 }, { "epoch": 0.11317815249266862, "grad_norm": 0.1711040884256363, "learning_rate": 0.002, "loss": 2.589, "step": 56810 }, { "epoch": 0.11319807471630754, "grad_norm": 0.1601320207118988, "learning_rate": 0.002, "loss": 2.5592, "step": 56820 }, { "epoch": 0.11321799693994646, "grad_norm": 0.16846677660942078, "learning_rate": 0.002, "loss": 2.5764, "step": 56830 }, { "epoch": 0.11323791916358536, "grad_norm": 0.14258866012096405, "learning_rate": 0.002, "loss": 2.5614, "step": 56840 }, { "epoch": 0.11325784138722428, "grad_norm": 0.16725556552410126, "learning_rate": 0.002, "loss": 2.5646, "step": 56850 }, { "epoch": 0.1132777636108632, "grad_norm": 0.19475848972797394, "learning_rate": 0.002, "loss": 2.5603, "step": 56860 }, { "epoch": 0.1132976858345021, "grad_norm": 0.15974123775959015, "learning_rate": 0.002, "loss": 2.5767, "step": 56870 }, { "epoch": 0.11331760805814102, "grad_norm": 0.1527455598115921, "learning_rate": 0.002, "loss": 2.577, "step": 56880 }, { "epoch": 0.11333753028177992, "grad_norm": 0.2978796064853668, "learning_rate": 0.002, "loss": 2.5568, "step": 56890 }, { "epoch": 0.11335745250541884, "grad_norm": 0.15527158975601196, "learning_rate": 0.002, "loss": 2.5777, "step": 56900 }, { "epoch": 0.11337737472905776, "grad_norm": 0.15131784975528717, "learning_rate": 0.002, "loss": 2.5707, "step": 56910 }, { "epoch": 0.11339729695269667, "grad_norm": 0.23072436451911926, "learning_rate": 0.002, "loss": 2.5777, "step": 56920 }, { "epoch": 0.11341721917633558, "grad_norm": 0.15431100130081177, "learning_rate": 0.002, "loss": 2.5735, "step": 56930 }, { "epoch": 0.1134371413999745, "grad_norm": 0.16706156730651855, "learning_rate": 0.002, "loss": 2.5556, "step": 56940 }, { "epoch": 0.11345706362361341, "grad_norm": 0.17012131214141846, "learning_rate": 0.002, "loss": 2.5681, "step": 56950 }, { "epoch": 0.11347698584725233, "grad_norm": 0.1479964554309845, "learning_rate": 0.002, "loss": 2.5784, "step": 56960 }, { "epoch": 0.11349690807089124, "grad_norm": 0.1442147046327591, "learning_rate": 0.002, "loss": 2.5669, "step": 56970 }, { "epoch": 0.11351683029453015, "grad_norm": 0.17817582190036774, "learning_rate": 0.002, "loss": 2.5705, "step": 56980 }, { "epoch": 0.11353675251816907, "grad_norm": 0.15126071870326996, "learning_rate": 0.002, "loss": 2.5728, "step": 56990 }, { "epoch": 0.11355667474180799, "grad_norm": 0.1691775619983673, "learning_rate": 0.002, "loss": 2.575, "step": 57000 }, { "epoch": 0.11357659696544689, "grad_norm": 0.14499631524085999, "learning_rate": 0.002, "loss": 2.5751, "step": 57010 }, { "epoch": 0.11359651918908581, "grad_norm": 0.1860898733139038, "learning_rate": 0.002, "loss": 2.5788, "step": 57020 }, { "epoch": 0.11361644141272473, "grad_norm": 0.16840076446533203, "learning_rate": 0.002, "loss": 2.581, "step": 57030 }, { "epoch": 0.11363636363636363, "grad_norm": 0.15300340950489044, "learning_rate": 0.002, "loss": 2.5876, "step": 57040 }, { "epoch": 0.11365628586000255, "grad_norm": 0.17757797241210938, "learning_rate": 0.002, "loss": 2.5667, "step": 57050 }, { "epoch": 0.11367620808364147, "grad_norm": 0.14890429377555847, "learning_rate": 0.002, "loss": 2.5633, "step": 57060 }, { "epoch": 0.11369613030728037, "grad_norm": 0.1629704236984253, "learning_rate": 0.002, "loss": 2.5843, "step": 57070 }, { "epoch": 0.11371605253091929, "grad_norm": 0.13627788424491882, "learning_rate": 0.002, "loss": 2.5669, "step": 57080 }, { "epoch": 0.11373597475455821, "grad_norm": 0.16207264363765717, "learning_rate": 0.002, "loss": 2.573, "step": 57090 }, { "epoch": 0.11375589697819712, "grad_norm": 0.1637168973684311, "learning_rate": 0.002, "loss": 2.5749, "step": 57100 }, { "epoch": 0.11377581920183603, "grad_norm": 0.153682142496109, "learning_rate": 0.002, "loss": 2.5826, "step": 57110 }, { "epoch": 0.11379574142547495, "grad_norm": 0.16937404870986938, "learning_rate": 0.002, "loss": 2.5592, "step": 57120 }, { "epoch": 0.11381566364911386, "grad_norm": 0.16923798620700836, "learning_rate": 0.002, "loss": 2.571, "step": 57130 }, { "epoch": 0.11383558587275278, "grad_norm": 0.16154485940933228, "learning_rate": 0.002, "loss": 2.5627, "step": 57140 }, { "epoch": 0.11385550809639168, "grad_norm": 0.18778128921985626, "learning_rate": 0.002, "loss": 2.5695, "step": 57150 }, { "epoch": 0.1138754303200306, "grad_norm": 0.16725794970989227, "learning_rate": 0.002, "loss": 2.5922, "step": 57160 }, { "epoch": 0.11389535254366952, "grad_norm": 0.15388721227645874, "learning_rate": 0.002, "loss": 2.5737, "step": 57170 }, { "epoch": 0.11391527476730842, "grad_norm": 0.1806248128414154, "learning_rate": 0.002, "loss": 2.5804, "step": 57180 }, { "epoch": 0.11393519699094734, "grad_norm": 0.14219778776168823, "learning_rate": 0.002, "loss": 2.5531, "step": 57190 }, { "epoch": 0.11395511921458626, "grad_norm": 0.1361485868692398, "learning_rate": 0.002, "loss": 2.5865, "step": 57200 }, { "epoch": 0.11397504143822516, "grad_norm": 0.16914603114128113, "learning_rate": 0.002, "loss": 2.5695, "step": 57210 }, { "epoch": 0.11399496366186408, "grad_norm": 0.1384032517671585, "learning_rate": 0.002, "loss": 2.5758, "step": 57220 }, { "epoch": 0.114014885885503, "grad_norm": 0.16209128499031067, "learning_rate": 0.002, "loss": 2.5819, "step": 57230 }, { "epoch": 0.1140348081091419, "grad_norm": 0.15392006933689117, "learning_rate": 0.002, "loss": 2.5746, "step": 57240 }, { "epoch": 0.11405473033278082, "grad_norm": 0.1636122614145279, "learning_rate": 0.002, "loss": 2.5775, "step": 57250 }, { "epoch": 0.11407465255641974, "grad_norm": 0.1668013036251068, "learning_rate": 0.002, "loss": 2.5724, "step": 57260 }, { "epoch": 0.11409457478005865, "grad_norm": 0.17131583392620087, "learning_rate": 0.002, "loss": 2.5803, "step": 57270 }, { "epoch": 0.11411449700369757, "grad_norm": 0.16915655136108398, "learning_rate": 0.002, "loss": 2.5683, "step": 57280 }, { "epoch": 0.11413441922733648, "grad_norm": 0.14513711631298065, "learning_rate": 0.002, "loss": 2.5575, "step": 57290 }, { "epoch": 0.11415434145097539, "grad_norm": 0.15086299180984497, "learning_rate": 0.002, "loss": 2.568, "step": 57300 }, { "epoch": 0.1141742636746143, "grad_norm": 0.23190639913082123, "learning_rate": 0.002, "loss": 2.5641, "step": 57310 }, { "epoch": 0.11419418589825323, "grad_norm": 0.18143081665039062, "learning_rate": 0.002, "loss": 2.5714, "step": 57320 }, { "epoch": 0.11421410812189213, "grad_norm": 0.16175810992717743, "learning_rate": 0.002, "loss": 2.5562, "step": 57330 }, { "epoch": 0.11423403034553105, "grad_norm": 0.13964857161045074, "learning_rate": 0.002, "loss": 2.5866, "step": 57340 }, { "epoch": 0.11425395256916997, "grad_norm": 0.16515398025512695, "learning_rate": 0.002, "loss": 2.5772, "step": 57350 }, { "epoch": 0.11427387479280887, "grad_norm": 0.13746584951877594, "learning_rate": 0.002, "loss": 2.5645, "step": 57360 }, { "epoch": 0.11429379701644779, "grad_norm": 0.16144467890262604, "learning_rate": 0.002, "loss": 2.573, "step": 57370 }, { "epoch": 0.1143137192400867, "grad_norm": 0.15659582614898682, "learning_rate": 0.002, "loss": 2.5765, "step": 57380 }, { "epoch": 0.11433364146372561, "grad_norm": 0.139644056558609, "learning_rate": 0.002, "loss": 2.578, "step": 57390 }, { "epoch": 0.11435356368736453, "grad_norm": 0.1626472920179367, "learning_rate": 0.002, "loss": 2.5544, "step": 57400 }, { "epoch": 0.11437348591100344, "grad_norm": 0.1712077558040619, "learning_rate": 0.002, "loss": 2.5734, "step": 57410 }, { "epoch": 0.11439340813464235, "grad_norm": 0.12584827840328217, "learning_rate": 0.002, "loss": 2.5652, "step": 57420 }, { "epoch": 0.11441333035828127, "grad_norm": 0.1598069965839386, "learning_rate": 0.002, "loss": 2.5808, "step": 57430 }, { "epoch": 0.11443325258192018, "grad_norm": 0.16140703856945038, "learning_rate": 0.002, "loss": 2.5728, "step": 57440 }, { "epoch": 0.1144531748055591, "grad_norm": 0.14752502739429474, "learning_rate": 0.002, "loss": 2.5794, "step": 57450 }, { "epoch": 0.11447309702919801, "grad_norm": 0.15865492820739746, "learning_rate": 0.002, "loss": 2.5627, "step": 57460 }, { "epoch": 0.11449301925283692, "grad_norm": 0.155950129032135, "learning_rate": 0.002, "loss": 2.5511, "step": 57470 }, { "epoch": 0.11451294147647584, "grad_norm": 0.19799885153770447, "learning_rate": 0.002, "loss": 2.5882, "step": 57480 }, { "epoch": 0.11453286370011476, "grad_norm": 0.19038894772529602, "learning_rate": 0.002, "loss": 2.5774, "step": 57490 }, { "epoch": 0.11455278592375366, "grad_norm": 0.1392751932144165, "learning_rate": 0.002, "loss": 2.587, "step": 57500 }, { "epoch": 0.11457270814739258, "grad_norm": 0.16691547632217407, "learning_rate": 0.002, "loss": 2.5733, "step": 57510 }, { "epoch": 0.1145926303710315, "grad_norm": 0.1725742667913437, "learning_rate": 0.002, "loss": 2.569, "step": 57520 }, { "epoch": 0.1146125525946704, "grad_norm": 0.18660858273506165, "learning_rate": 0.002, "loss": 2.5801, "step": 57530 }, { "epoch": 0.11463247481830932, "grad_norm": 0.15337206423282623, "learning_rate": 0.002, "loss": 2.589, "step": 57540 }, { "epoch": 0.11465239704194824, "grad_norm": 0.16463470458984375, "learning_rate": 0.002, "loss": 2.5732, "step": 57550 }, { "epoch": 0.11467231926558714, "grad_norm": 0.17688514292240143, "learning_rate": 0.002, "loss": 2.5861, "step": 57560 }, { "epoch": 0.11469224148922606, "grad_norm": 0.19219355285167694, "learning_rate": 0.002, "loss": 2.5829, "step": 57570 }, { "epoch": 0.11471216371286498, "grad_norm": 0.14270052313804626, "learning_rate": 0.002, "loss": 2.5704, "step": 57580 }, { "epoch": 0.11473208593650389, "grad_norm": 0.17010900378227234, "learning_rate": 0.002, "loss": 2.5762, "step": 57590 }, { "epoch": 0.1147520081601428, "grad_norm": 0.16342781484127045, "learning_rate": 0.002, "loss": 2.5871, "step": 57600 }, { "epoch": 0.11477193038378172, "grad_norm": 0.16675741970539093, "learning_rate": 0.002, "loss": 2.5717, "step": 57610 }, { "epoch": 0.11479185260742063, "grad_norm": 0.1611311137676239, "learning_rate": 0.002, "loss": 2.591, "step": 57620 }, { "epoch": 0.11481177483105955, "grad_norm": 0.16531483829021454, "learning_rate": 0.002, "loss": 2.5754, "step": 57630 }, { "epoch": 0.11483169705469845, "grad_norm": 0.16122670471668243, "learning_rate": 0.002, "loss": 2.5738, "step": 57640 }, { "epoch": 0.11485161927833737, "grad_norm": 0.17770244181156158, "learning_rate": 0.002, "loss": 2.5852, "step": 57650 }, { "epoch": 0.11487154150197629, "grad_norm": 0.15723717212677002, "learning_rate": 0.002, "loss": 2.579, "step": 57660 }, { "epoch": 0.11489146372561519, "grad_norm": 0.15860691666603088, "learning_rate": 0.002, "loss": 2.5733, "step": 57670 }, { "epoch": 0.11491138594925411, "grad_norm": 0.14748725295066833, "learning_rate": 0.002, "loss": 2.5774, "step": 57680 }, { "epoch": 0.11493130817289303, "grad_norm": 0.16117455065250397, "learning_rate": 0.002, "loss": 2.5852, "step": 57690 }, { "epoch": 0.11495123039653193, "grad_norm": 0.1806642860174179, "learning_rate": 0.002, "loss": 2.576, "step": 57700 }, { "epoch": 0.11497115262017085, "grad_norm": 0.16978996992111206, "learning_rate": 0.002, "loss": 2.5887, "step": 57710 }, { "epoch": 0.11499107484380977, "grad_norm": 0.171907439827919, "learning_rate": 0.002, "loss": 2.5819, "step": 57720 }, { "epoch": 0.11501099706744868, "grad_norm": 0.1343069225549698, "learning_rate": 0.002, "loss": 2.5578, "step": 57730 }, { "epoch": 0.1150309192910876, "grad_norm": 0.18195605278015137, "learning_rate": 0.002, "loss": 2.5822, "step": 57740 }, { "epoch": 0.11505084151472651, "grad_norm": 0.1585054099559784, "learning_rate": 0.002, "loss": 2.5849, "step": 57750 }, { "epoch": 0.11507076373836542, "grad_norm": 0.1656120866537094, "learning_rate": 0.002, "loss": 2.5743, "step": 57760 }, { "epoch": 0.11509068596200434, "grad_norm": 0.18099308013916016, "learning_rate": 0.002, "loss": 2.5778, "step": 57770 }, { "epoch": 0.11511060818564325, "grad_norm": 0.14770276844501495, "learning_rate": 0.002, "loss": 2.5655, "step": 57780 }, { "epoch": 0.11513053040928216, "grad_norm": 0.15960249304771423, "learning_rate": 0.002, "loss": 2.5625, "step": 57790 }, { "epoch": 0.11515045263292108, "grad_norm": 0.18723246455192566, "learning_rate": 0.002, "loss": 2.5729, "step": 57800 }, { "epoch": 0.11517037485656, "grad_norm": 0.14867840707302094, "learning_rate": 0.002, "loss": 2.5766, "step": 57810 }, { "epoch": 0.1151902970801989, "grad_norm": 0.1311473697423935, "learning_rate": 0.002, "loss": 2.5589, "step": 57820 }, { "epoch": 0.11521021930383782, "grad_norm": 0.6471224427223206, "learning_rate": 0.002, "loss": 2.5695, "step": 57830 }, { "epoch": 0.11523014152747674, "grad_norm": 0.17528267204761505, "learning_rate": 0.002, "loss": 2.5687, "step": 57840 }, { "epoch": 0.11525006375111564, "grad_norm": 0.1528485268354416, "learning_rate": 0.002, "loss": 2.5805, "step": 57850 }, { "epoch": 0.11526998597475456, "grad_norm": 0.20090271532535553, "learning_rate": 0.002, "loss": 2.5951, "step": 57860 }, { "epoch": 0.11528990819839348, "grad_norm": 0.16410106420516968, "learning_rate": 0.002, "loss": 2.5632, "step": 57870 }, { "epoch": 0.11530983042203238, "grad_norm": 0.162248894572258, "learning_rate": 0.002, "loss": 2.5607, "step": 57880 }, { "epoch": 0.1153297526456713, "grad_norm": 0.155002161860466, "learning_rate": 0.002, "loss": 2.5689, "step": 57890 }, { "epoch": 0.1153496748693102, "grad_norm": 0.14787819981575012, "learning_rate": 0.002, "loss": 2.5712, "step": 57900 }, { "epoch": 0.11536959709294912, "grad_norm": 0.16512344777584076, "learning_rate": 0.002, "loss": 2.574, "step": 57910 }, { "epoch": 0.11538951931658804, "grad_norm": 0.1806543469429016, "learning_rate": 0.002, "loss": 2.5729, "step": 57920 }, { "epoch": 0.11540944154022695, "grad_norm": 0.147843599319458, "learning_rate": 0.002, "loss": 2.5748, "step": 57930 }, { "epoch": 0.11542936376386587, "grad_norm": 0.1600150614976883, "learning_rate": 0.002, "loss": 2.5693, "step": 57940 }, { "epoch": 0.11544928598750478, "grad_norm": 0.19390349090099335, "learning_rate": 0.002, "loss": 2.5802, "step": 57950 }, { "epoch": 0.11546920821114369, "grad_norm": 0.16348794102668762, "learning_rate": 0.002, "loss": 2.5837, "step": 57960 }, { "epoch": 0.11548913043478261, "grad_norm": 0.15283074975013733, "learning_rate": 0.002, "loss": 2.5703, "step": 57970 }, { "epoch": 0.11550905265842153, "grad_norm": 0.18660520017147064, "learning_rate": 0.002, "loss": 2.5635, "step": 57980 }, { "epoch": 0.11552897488206043, "grad_norm": 0.17042645812034607, "learning_rate": 0.002, "loss": 2.5858, "step": 57990 }, { "epoch": 0.11554889710569935, "grad_norm": 0.1633136123418808, "learning_rate": 0.002, "loss": 2.5712, "step": 58000 }, { "epoch": 0.11556881932933827, "grad_norm": 0.14602898061275482, "learning_rate": 0.002, "loss": 2.5731, "step": 58010 }, { "epoch": 0.11558874155297717, "grad_norm": 0.15901592373847961, "learning_rate": 0.002, "loss": 2.5884, "step": 58020 }, { "epoch": 0.11560866377661609, "grad_norm": 0.16721349954605103, "learning_rate": 0.002, "loss": 2.5757, "step": 58030 }, { "epoch": 0.11562858600025501, "grad_norm": 0.17372824251651764, "learning_rate": 0.002, "loss": 2.5678, "step": 58040 }, { "epoch": 0.11564850822389391, "grad_norm": 0.15663696825504303, "learning_rate": 0.002, "loss": 2.566, "step": 58050 }, { "epoch": 0.11566843044753283, "grad_norm": 0.14035972952842712, "learning_rate": 0.002, "loss": 2.577, "step": 58060 }, { "epoch": 0.11568835267117175, "grad_norm": 0.16121363639831543, "learning_rate": 0.002, "loss": 2.5894, "step": 58070 }, { "epoch": 0.11570827489481066, "grad_norm": 0.156107559800148, "learning_rate": 0.002, "loss": 2.5765, "step": 58080 }, { "epoch": 0.11572819711844957, "grad_norm": 0.17891071736812592, "learning_rate": 0.002, "loss": 2.5739, "step": 58090 }, { "epoch": 0.11574811934208849, "grad_norm": 0.16656066477298737, "learning_rate": 0.002, "loss": 2.5726, "step": 58100 }, { "epoch": 0.1157680415657274, "grad_norm": 0.1829211562871933, "learning_rate": 0.002, "loss": 2.5768, "step": 58110 }, { "epoch": 0.11578796378936632, "grad_norm": 0.1962161511182785, "learning_rate": 0.002, "loss": 2.5806, "step": 58120 }, { "epoch": 0.11580788601300522, "grad_norm": 0.1856348067522049, "learning_rate": 0.002, "loss": 2.5727, "step": 58130 }, { "epoch": 0.11582780823664414, "grad_norm": 0.18386057019233704, "learning_rate": 0.002, "loss": 2.5655, "step": 58140 }, { "epoch": 0.11584773046028306, "grad_norm": 0.15091796219348907, "learning_rate": 0.002, "loss": 2.5685, "step": 58150 }, { "epoch": 0.11586765268392196, "grad_norm": 0.17783606052398682, "learning_rate": 0.002, "loss": 2.5631, "step": 58160 }, { "epoch": 0.11588757490756088, "grad_norm": 0.19574059545993805, "learning_rate": 0.002, "loss": 2.5755, "step": 58170 }, { "epoch": 0.1159074971311998, "grad_norm": 0.14045730233192444, "learning_rate": 0.002, "loss": 2.5593, "step": 58180 }, { "epoch": 0.1159274193548387, "grad_norm": 0.17909428477287292, "learning_rate": 0.002, "loss": 2.5748, "step": 58190 }, { "epoch": 0.11594734157847762, "grad_norm": 0.184925377368927, "learning_rate": 0.002, "loss": 2.5726, "step": 58200 }, { "epoch": 0.11596726380211654, "grad_norm": 0.15282346308231354, "learning_rate": 0.002, "loss": 2.5741, "step": 58210 }, { "epoch": 0.11598718602575545, "grad_norm": 0.14430642127990723, "learning_rate": 0.002, "loss": 2.5755, "step": 58220 }, { "epoch": 0.11600710824939436, "grad_norm": 0.22684825956821442, "learning_rate": 0.002, "loss": 2.5754, "step": 58230 }, { "epoch": 0.11602703047303328, "grad_norm": 0.15399901568889618, "learning_rate": 0.002, "loss": 2.578, "step": 58240 }, { "epoch": 0.11604695269667219, "grad_norm": 0.14677374064922333, "learning_rate": 0.002, "loss": 2.571, "step": 58250 }, { "epoch": 0.1160668749203111, "grad_norm": 0.14678198099136353, "learning_rate": 0.002, "loss": 2.5561, "step": 58260 }, { "epoch": 0.11608679714395002, "grad_norm": 0.1693526953458786, "learning_rate": 0.002, "loss": 2.5728, "step": 58270 }, { "epoch": 0.11610671936758893, "grad_norm": 0.19359196722507477, "learning_rate": 0.002, "loss": 2.5667, "step": 58280 }, { "epoch": 0.11612664159122785, "grad_norm": 0.15987908840179443, "learning_rate": 0.002, "loss": 2.5654, "step": 58290 }, { "epoch": 0.11614656381486677, "grad_norm": 0.15129654109477997, "learning_rate": 0.002, "loss": 2.5645, "step": 58300 }, { "epoch": 0.11616648603850567, "grad_norm": 0.16926482319831848, "learning_rate": 0.002, "loss": 2.5786, "step": 58310 }, { "epoch": 0.11618640826214459, "grad_norm": 0.15994438529014587, "learning_rate": 0.002, "loss": 2.5735, "step": 58320 }, { "epoch": 0.11620633048578351, "grad_norm": 0.16361194849014282, "learning_rate": 0.002, "loss": 2.5614, "step": 58330 }, { "epoch": 0.11622625270942241, "grad_norm": 0.14586201310157776, "learning_rate": 0.002, "loss": 2.5756, "step": 58340 }, { "epoch": 0.11624617493306133, "grad_norm": 0.151519775390625, "learning_rate": 0.002, "loss": 2.5646, "step": 58350 }, { "epoch": 0.11626609715670025, "grad_norm": 0.1607511341571808, "learning_rate": 0.002, "loss": 2.5705, "step": 58360 }, { "epoch": 0.11628601938033915, "grad_norm": 0.17053982615470886, "learning_rate": 0.002, "loss": 2.5573, "step": 58370 }, { "epoch": 0.11630594160397807, "grad_norm": 0.15635846555233002, "learning_rate": 0.002, "loss": 2.5791, "step": 58380 }, { "epoch": 0.11632586382761698, "grad_norm": 0.1292433738708496, "learning_rate": 0.002, "loss": 2.565, "step": 58390 }, { "epoch": 0.1163457860512559, "grad_norm": 0.14051315188407898, "learning_rate": 0.002, "loss": 2.5789, "step": 58400 }, { "epoch": 0.11636570827489481, "grad_norm": 0.145244762301445, "learning_rate": 0.002, "loss": 2.5749, "step": 58410 }, { "epoch": 0.11638563049853372, "grad_norm": 0.19978222250938416, "learning_rate": 0.002, "loss": 2.5824, "step": 58420 }, { "epoch": 0.11640555272217264, "grad_norm": 0.15228423476219177, "learning_rate": 0.002, "loss": 2.5667, "step": 58430 }, { "epoch": 0.11642547494581156, "grad_norm": 0.16670861840248108, "learning_rate": 0.002, "loss": 2.5651, "step": 58440 }, { "epoch": 0.11644539716945046, "grad_norm": 0.15213622152805328, "learning_rate": 0.002, "loss": 2.5711, "step": 58450 }, { "epoch": 0.11646531939308938, "grad_norm": 0.15539731085300446, "learning_rate": 0.002, "loss": 2.571, "step": 58460 }, { "epoch": 0.1164852416167283, "grad_norm": 0.15961535274982452, "learning_rate": 0.002, "loss": 2.5772, "step": 58470 }, { "epoch": 0.1165051638403672, "grad_norm": 0.17744389176368713, "learning_rate": 0.002, "loss": 2.5754, "step": 58480 }, { "epoch": 0.11652508606400612, "grad_norm": 0.17900340259075165, "learning_rate": 0.002, "loss": 2.5585, "step": 58490 }, { "epoch": 0.11654500828764504, "grad_norm": 0.1539612114429474, "learning_rate": 0.002, "loss": 2.5625, "step": 58500 }, { "epoch": 0.11656493051128394, "grad_norm": 0.17469434440135956, "learning_rate": 0.002, "loss": 2.5684, "step": 58510 }, { "epoch": 0.11658485273492286, "grad_norm": 0.15721376240253448, "learning_rate": 0.002, "loss": 2.5579, "step": 58520 }, { "epoch": 0.11660477495856178, "grad_norm": 0.15597586333751678, "learning_rate": 0.002, "loss": 2.5633, "step": 58530 }, { "epoch": 0.11662469718220068, "grad_norm": 0.190299391746521, "learning_rate": 0.002, "loss": 2.5743, "step": 58540 }, { "epoch": 0.1166446194058396, "grad_norm": 0.148018941283226, "learning_rate": 0.002, "loss": 2.5694, "step": 58550 }, { "epoch": 0.11666454162947852, "grad_norm": 0.13527704775333405, "learning_rate": 0.002, "loss": 2.5808, "step": 58560 }, { "epoch": 0.11668446385311743, "grad_norm": 0.17920894920825958, "learning_rate": 0.002, "loss": 2.5832, "step": 58570 }, { "epoch": 0.11670438607675634, "grad_norm": 0.153656005859375, "learning_rate": 0.002, "loss": 2.5652, "step": 58580 }, { "epoch": 0.11672430830039526, "grad_norm": 0.15286371111869812, "learning_rate": 0.002, "loss": 2.5637, "step": 58590 }, { "epoch": 0.11674423052403417, "grad_norm": 0.18754956126213074, "learning_rate": 0.002, "loss": 2.5789, "step": 58600 }, { "epoch": 0.11676415274767309, "grad_norm": 0.16504715383052826, "learning_rate": 0.002, "loss": 2.5709, "step": 58610 }, { "epoch": 0.116784074971312, "grad_norm": 0.15892042219638824, "learning_rate": 0.002, "loss": 2.5726, "step": 58620 }, { "epoch": 0.11680399719495091, "grad_norm": 0.16419829428195953, "learning_rate": 0.002, "loss": 2.5845, "step": 58630 }, { "epoch": 0.11682391941858983, "grad_norm": 0.1807456612586975, "learning_rate": 0.002, "loss": 2.5747, "step": 58640 }, { "epoch": 0.11684384164222873, "grad_norm": 0.17357072234153748, "learning_rate": 0.002, "loss": 2.5752, "step": 58650 }, { "epoch": 0.11686376386586765, "grad_norm": 0.1415058970451355, "learning_rate": 0.002, "loss": 2.5841, "step": 58660 }, { "epoch": 0.11688368608950657, "grad_norm": 0.1603657603263855, "learning_rate": 0.002, "loss": 2.5777, "step": 58670 }, { "epoch": 0.11690360831314547, "grad_norm": 0.16982540488243103, "learning_rate": 0.002, "loss": 2.5752, "step": 58680 }, { "epoch": 0.11692353053678439, "grad_norm": 0.1499328911304474, "learning_rate": 0.002, "loss": 2.5675, "step": 58690 }, { "epoch": 0.11694345276042331, "grad_norm": 0.15601739287376404, "learning_rate": 0.002, "loss": 2.5687, "step": 58700 }, { "epoch": 0.11696337498406222, "grad_norm": 0.16595005989074707, "learning_rate": 0.002, "loss": 2.5759, "step": 58710 }, { "epoch": 0.11698329720770113, "grad_norm": 0.17965483665466309, "learning_rate": 0.002, "loss": 2.5686, "step": 58720 }, { "epoch": 0.11700321943134005, "grad_norm": 0.14535242319107056, "learning_rate": 0.002, "loss": 2.5654, "step": 58730 }, { "epoch": 0.11702314165497896, "grad_norm": 0.1582159847021103, "learning_rate": 0.002, "loss": 2.569, "step": 58740 }, { "epoch": 0.11704306387861788, "grad_norm": 0.20704418420791626, "learning_rate": 0.002, "loss": 2.5805, "step": 58750 }, { "epoch": 0.1170629861022568, "grad_norm": 0.16208118200302124, "learning_rate": 0.002, "loss": 2.5665, "step": 58760 }, { "epoch": 0.1170829083258957, "grad_norm": 0.17130893468856812, "learning_rate": 0.002, "loss": 2.5661, "step": 58770 }, { "epoch": 0.11710283054953462, "grad_norm": 0.14762143790721893, "learning_rate": 0.002, "loss": 2.572, "step": 58780 }, { "epoch": 0.11712275277317354, "grad_norm": 0.1540842205286026, "learning_rate": 0.002, "loss": 2.5749, "step": 58790 }, { "epoch": 0.11714267499681244, "grad_norm": 0.15259821712970734, "learning_rate": 0.002, "loss": 2.5644, "step": 58800 }, { "epoch": 0.11716259722045136, "grad_norm": 0.2026263028383255, "learning_rate": 0.002, "loss": 2.577, "step": 58810 }, { "epoch": 0.11718251944409028, "grad_norm": 0.15408667922019958, "learning_rate": 0.002, "loss": 2.5624, "step": 58820 }, { "epoch": 0.11720244166772918, "grad_norm": 0.1626800149679184, "learning_rate": 0.002, "loss": 2.5743, "step": 58830 }, { "epoch": 0.1172223638913681, "grad_norm": 0.15110370516777039, "learning_rate": 0.002, "loss": 2.5703, "step": 58840 }, { "epoch": 0.11724228611500702, "grad_norm": 0.1943674236536026, "learning_rate": 0.002, "loss": 2.5789, "step": 58850 }, { "epoch": 0.11726220833864592, "grad_norm": 0.14089567959308624, "learning_rate": 0.002, "loss": 2.5839, "step": 58860 }, { "epoch": 0.11728213056228484, "grad_norm": 0.19934965670108795, "learning_rate": 0.002, "loss": 2.5562, "step": 58870 }, { "epoch": 0.11730205278592376, "grad_norm": 0.16302868723869324, "learning_rate": 0.002, "loss": 2.58, "step": 58880 }, { "epoch": 0.11732197500956267, "grad_norm": 0.20092113316059113, "learning_rate": 0.002, "loss": 2.5795, "step": 58890 }, { "epoch": 0.11734189723320158, "grad_norm": 0.1717752069234848, "learning_rate": 0.002, "loss": 2.5622, "step": 58900 }, { "epoch": 0.11736181945684049, "grad_norm": 0.16376109421253204, "learning_rate": 0.002, "loss": 2.5928, "step": 58910 }, { "epoch": 0.1173817416804794, "grad_norm": 0.1601347029209137, "learning_rate": 0.002, "loss": 2.572, "step": 58920 }, { "epoch": 0.11740166390411833, "grad_norm": 0.1534191071987152, "learning_rate": 0.002, "loss": 2.5862, "step": 58930 }, { "epoch": 0.11742158612775723, "grad_norm": 0.17711912095546722, "learning_rate": 0.002, "loss": 2.5652, "step": 58940 }, { "epoch": 0.11744150835139615, "grad_norm": 0.17933641374111176, "learning_rate": 0.002, "loss": 2.588, "step": 58950 }, { "epoch": 0.11746143057503507, "grad_norm": 0.171842560172081, "learning_rate": 0.002, "loss": 2.5757, "step": 58960 }, { "epoch": 0.11748135279867397, "grad_norm": 0.1855241060256958, "learning_rate": 0.002, "loss": 2.585, "step": 58970 }, { "epoch": 0.11750127502231289, "grad_norm": 0.15059101581573486, "learning_rate": 0.002, "loss": 2.5604, "step": 58980 }, { "epoch": 0.11752119724595181, "grad_norm": 0.15254448354244232, "learning_rate": 0.002, "loss": 2.5819, "step": 58990 }, { "epoch": 0.11754111946959071, "grad_norm": 0.13958720862865448, "learning_rate": 0.002, "loss": 2.5863, "step": 59000 }, { "epoch": 0.11756104169322963, "grad_norm": 0.14544858038425446, "learning_rate": 0.002, "loss": 2.5714, "step": 59010 }, { "epoch": 0.11758096391686855, "grad_norm": 0.17466576397418976, "learning_rate": 0.002, "loss": 2.5734, "step": 59020 }, { "epoch": 0.11760088614050745, "grad_norm": 0.18693110346794128, "learning_rate": 0.002, "loss": 2.5828, "step": 59030 }, { "epoch": 0.11762080836414637, "grad_norm": 0.16092102229595184, "learning_rate": 0.002, "loss": 2.5698, "step": 59040 }, { "epoch": 0.11764073058778529, "grad_norm": 0.15613041818141937, "learning_rate": 0.002, "loss": 2.5729, "step": 59050 }, { "epoch": 0.1176606528114242, "grad_norm": 0.15704064071178436, "learning_rate": 0.002, "loss": 2.5946, "step": 59060 }, { "epoch": 0.11768057503506311, "grad_norm": 0.13478481769561768, "learning_rate": 0.002, "loss": 2.578, "step": 59070 }, { "epoch": 0.11770049725870203, "grad_norm": 0.18248699605464935, "learning_rate": 0.002, "loss": 2.5815, "step": 59080 }, { "epoch": 0.11772041948234094, "grad_norm": 0.15670648217201233, "learning_rate": 0.002, "loss": 2.5638, "step": 59090 }, { "epoch": 0.11774034170597986, "grad_norm": 0.14523404836654663, "learning_rate": 0.002, "loss": 2.559, "step": 59100 }, { "epoch": 0.11776026392961877, "grad_norm": 0.14067596197128296, "learning_rate": 0.002, "loss": 2.5749, "step": 59110 }, { "epoch": 0.11778018615325768, "grad_norm": 0.15503032505512238, "learning_rate": 0.002, "loss": 2.5672, "step": 59120 }, { "epoch": 0.1178001083768966, "grad_norm": 0.1619773507118225, "learning_rate": 0.002, "loss": 2.5688, "step": 59130 }, { "epoch": 0.1178200306005355, "grad_norm": 0.16113273799419403, "learning_rate": 0.002, "loss": 2.5676, "step": 59140 }, { "epoch": 0.11783995282417442, "grad_norm": 0.1758374124765396, "learning_rate": 0.002, "loss": 2.58, "step": 59150 }, { "epoch": 0.11785987504781334, "grad_norm": 0.17679499089717865, "learning_rate": 0.002, "loss": 2.5761, "step": 59160 }, { "epoch": 0.11787979727145224, "grad_norm": 0.14766214787960052, "learning_rate": 0.002, "loss": 2.5782, "step": 59170 }, { "epoch": 0.11789971949509116, "grad_norm": 0.1716434806585312, "learning_rate": 0.002, "loss": 2.5701, "step": 59180 }, { "epoch": 0.11791964171873008, "grad_norm": 0.15104736387729645, "learning_rate": 0.002, "loss": 2.5749, "step": 59190 }, { "epoch": 0.11793956394236899, "grad_norm": 0.15683282911777496, "learning_rate": 0.002, "loss": 2.5799, "step": 59200 }, { "epoch": 0.1179594861660079, "grad_norm": 0.14780665934085846, "learning_rate": 0.002, "loss": 2.5779, "step": 59210 }, { "epoch": 0.11797940838964682, "grad_norm": 0.17573139071464539, "learning_rate": 0.002, "loss": 2.5666, "step": 59220 }, { "epoch": 0.11799933061328573, "grad_norm": 0.17481529712677002, "learning_rate": 0.002, "loss": 2.5754, "step": 59230 }, { "epoch": 0.11801925283692465, "grad_norm": 0.1446005403995514, "learning_rate": 0.002, "loss": 2.5775, "step": 59240 }, { "epoch": 0.11803917506056356, "grad_norm": 0.18339073657989502, "learning_rate": 0.002, "loss": 2.5603, "step": 59250 }, { "epoch": 0.11805909728420247, "grad_norm": 0.17302429676055908, "learning_rate": 0.002, "loss": 2.5705, "step": 59260 }, { "epoch": 0.11807901950784139, "grad_norm": 0.16243627667427063, "learning_rate": 0.002, "loss": 2.5763, "step": 59270 }, { "epoch": 0.1180989417314803, "grad_norm": 0.15000610053539276, "learning_rate": 0.002, "loss": 2.5646, "step": 59280 }, { "epoch": 0.11811886395511921, "grad_norm": 0.15880125761032104, "learning_rate": 0.002, "loss": 2.5589, "step": 59290 }, { "epoch": 0.11813878617875813, "grad_norm": 0.15554691851139069, "learning_rate": 0.002, "loss": 2.5641, "step": 59300 }, { "epoch": 0.11815870840239705, "grad_norm": 0.1710810512304306, "learning_rate": 0.002, "loss": 2.5686, "step": 59310 }, { "epoch": 0.11817863062603595, "grad_norm": 0.16175203025341034, "learning_rate": 0.002, "loss": 2.5734, "step": 59320 }, { "epoch": 0.11819855284967487, "grad_norm": 0.14705200493335724, "learning_rate": 0.002, "loss": 2.5727, "step": 59330 }, { "epoch": 0.11821847507331379, "grad_norm": 0.13924066722393036, "learning_rate": 0.002, "loss": 2.5762, "step": 59340 }, { "epoch": 0.1182383972969527, "grad_norm": 0.17760710418224335, "learning_rate": 0.002, "loss": 2.5761, "step": 59350 }, { "epoch": 0.11825831952059161, "grad_norm": 0.16546829044818878, "learning_rate": 0.002, "loss": 2.5728, "step": 59360 }, { "epoch": 0.11827824174423053, "grad_norm": 0.17112132906913757, "learning_rate": 0.002, "loss": 2.5711, "step": 59370 }, { "epoch": 0.11829816396786944, "grad_norm": 0.13831603527069092, "learning_rate": 0.002, "loss": 2.5494, "step": 59380 }, { "epoch": 0.11831808619150835, "grad_norm": 0.17671748995780945, "learning_rate": 0.002, "loss": 2.5803, "step": 59390 }, { "epoch": 0.11833800841514726, "grad_norm": 0.16165335476398468, "learning_rate": 0.002, "loss": 2.5917, "step": 59400 }, { "epoch": 0.11835793063878618, "grad_norm": 0.15433408319950104, "learning_rate": 0.002, "loss": 2.5644, "step": 59410 }, { "epoch": 0.1183778528624251, "grad_norm": 0.15423797070980072, "learning_rate": 0.002, "loss": 2.5769, "step": 59420 }, { "epoch": 0.118397775086064, "grad_norm": 0.15402331948280334, "learning_rate": 0.002, "loss": 2.5686, "step": 59430 }, { "epoch": 0.11841769730970292, "grad_norm": 0.1620771586894989, "learning_rate": 0.002, "loss": 2.5784, "step": 59440 }, { "epoch": 0.11843761953334184, "grad_norm": 0.16822147369384766, "learning_rate": 0.002, "loss": 2.5697, "step": 59450 }, { "epoch": 0.11845754175698074, "grad_norm": 0.16871891915798187, "learning_rate": 0.002, "loss": 2.5703, "step": 59460 }, { "epoch": 0.11847746398061966, "grad_norm": 0.16323310136795044, "learning_rate": 0.002, "loss": 2.5843, "step": 59470 }, { "epoch": 0.11849738620425858, "grad_norm": 0.17204207181930542, "learning_rate": 0.002, "loss": 2.5843, "step": 59480 }, { "epoch": 0.11851730842789748, "grad_norm": 0.1352354884147644, "learning_rate": 0.002, "loss": 2.5504, "step": 59490 }, { "epoch": 0.1185372306515364, "grad_norm": 0.16069065034389496, "learning_rate": 0.002, "loss": 2.5802, "step": 59500 }, { "epoch": 0.11855715287517532, "grad_norm": 0.21411463618278503, "learning_rate": 0.002, "loss": 2.5822, "step": 59510 }, { "epoch": 0.11857707509881422, "grad_norm": 0.137746661901474, "learning_rate": 0.002, "loss": 2.5826, "step": 59520 }, { "epoch": 0.11859699732245314, "grad_norm": 0.16070713102817535, "learning_rate": 0.002, "loss": 2.5519, "step": 59530 }, { "epoch": 0.11861691954609206, "grad_norm": 0.14933794736862183, "learning_rate": 0.002, "loss": 2.6002, "step": 59540 }, { "epoch": 0.11863684176973097, "grad_norm": 0.1580069661140442, "learning_rate": 0.002, "loss": 2.5708, "step": 59550 }, { "epoch": 0.11865676399336988, "grad_norm": 0.15280380845069885, "learning_rate": 0.002, "loss": 2.5819, "step": 59560 }, { "epoch": 0.1186766862170088, "grad_norm": 0.193562850356102, "learning_rate": 0.002, "loss": 2.5712, "step": 59570 }, { "epoch": 0.11869660844064771, "grad_norm": 0.2020612210035324, "learning_rate": 0.002, "loss": 2.5716, "step": 59580 }, { "epoch": 0.11871653066428663, "grad_norm": 0.1466722935438156, "learning_rate": 0.002, "loss": 2.5659, "step": 59590 }, { "epoch": 0.11873645288792554, "grad_norm": 0.1621081382036209, "learning_rate": 0.002, "loss": 2.5777, "step": 59600 }, { "epoch": 0.11875637511156445, "grad_norm": 0.15278704464435577, "learning_rate": 0.002, "loss": 2.5864, "step": 59610 }, { "epoch": 0.11877629733520337, "grad_norm": 0.15521226823329926, "learning_rate": 0.002, "loss": 2.5887, "step": 59620 }, { "epoch": 0.11879621955884229, "grad_norm": 0.14462925493717194, "learning_rate": 0.002, "loss": 2.5604, "step": 59630 }, { "epoch": 0.11881614178248119, "grad_norm": 0.1528872549533844, "learning_rate": 0.002, "loss": 2.5642, "step": 59640 }, { "epoch": 0.11883606400612011, "grad_norm": 0.17497558891773224, "learning_rate": 0.002, "loss": 2.5716, "step": 59650 }, { "epoch": 0.11885598622975901, "grad_norm": 0.170231893658638, "learning_rate": 0.002, "loss": 2.5833, "step": 59660 }, { "epoch": 0.11887590845339793, "grad_norm": 0.17250578105449677, "learning_rate": 0.002, "loss": 2.5626, "step": 59670 }, { "epoch": 0.11889583067703685, "grad_norm": 0.16286812722682953, "learning_rate": 0.002, "loss": 2.5879, "step": 59680 }, { "epoch": 0.11891575290067576, "grad_norm": 0.15150479972362518, "learning_rate": 0.002, "loss": 2.5769, "step": 59690 }, { "epoch": 0.11893567512431467, "grad_norm": 0.19015483558177948, "learning_rate": 0.002, "loss": 2.581, "step": 59700 }, { "epoch": 0.11895559734795359, "grad_norm": 0.14405488967895508, "learning_rate": 0.002, "loss": 2.5699, "step": 59710 }, { "epoch": 0.1189755195715925, "grad_norm": 0.17297200858592987, "learning_rate": 0.002, "loss": 2.5816, "step": 59720 }, { "epoch": 0.11899544179523142, "grad_norm": 0.1471906453371048, "learning_rate": 0.002, "loss": 2.5704, "step": 59730 }, { "epoch": 0.11901536401887033, "grad_norm": 0.16288946568965912, "learning_rate": 0.002, "loss": 2.5714, "step": 59740 }, { "epoch": 0.11903528624250924, "grad_norm": 0.1968420445919037, "learning_rate": 0.002, "loss": 2.5797, "step": 59750 }, { "epoch": 0.11905520846614816, "grad_norm": 0.14018695056438446, "learning_rate": 0.002, "loss": 2.578, "step": 59760 }, { "epoch": 0.11907513068978708, "grad_norm": 0.22308216989040375, "learning_rate": 0.002, "loss": 2.5684, "step": 59770 }, { "epoch": 0.11909505291342598, "grad_norm": 0.15960760414600372, "learning_rate": 0.002, "loss": 2.5813, "step": 59780 }, { "epoch": 0.1191149751370649, "grad_norm": 0.15520890057086945, "learning_rate": 0.002, "loss": 2.5602, "step": 59790 }, { "epoch": 0.11913489736070382, "grad_norm": 0.17743782699108124, "learning_rate": 0.002, "loss": 2.5568, "step": 59800 }, { "epoch": 0.11915481958434272, "grad_norm": 0.13655748963356018, "learning_rate": 0.002, "loss": 2.5757, "step": 59810 }, { "epoch": 0.11917474180798164, "grad_norm": 0.1396893709897995, "learning_rate": 0.002, "loss": 2.5752, "step": 59820 }, { "epoch": 0.11919466403162056, "grad_norm": 0.21221207082271576, "learning_rate": 0.002, "loss": 2.5628, "step": 59830 }, { "epoch": 0.11921458625525946, "grad_norm": 0.14415085315704346, "learning_rate": 0.002, "loss": 2.5691, "step": 59840 }, { "epoch": 0.11923450847889838, "grad_norm": 0.1648951917886734, "learning_rate": 0.002, "loss": 2.5768, "step": 59850 }, { "epoch": 0.1192544307025373, "grad_norm": 0.18297739326953888, "learning_rate": 0.002, "loss": 2.5763, "step": 59860 }, { "epoch": 0.1192743529261762, "grad_norm": 0.15298496186733246, "learning_rate": 0.002, "loss": 2.5831, "step": 59870 }, { "epoch": 0.11929427514981512, "grad_norm": 0.15669390559196472, "learning_rate": 0.002, "loss": 2.5733, "step": 59880 }, { "epoch": 0.11931419737345404, "grad_norm": 0.19520090520381927, "learning_rate": 0.002, "loss": 2.5704, "step": 59890 }, { "epoch": 0.11933411959709295, "grad_norm": 0.1599288284778595, "learning_rate": 0.002, "loss": 2.5597, "step": 59900 }, { "epoch": 0.11935404182073187, "grad_norm": 0.16281625628471375, "learning_rate": 0.002, "loss": 2.5707, "step": 59910 }, { "epoch": 0.11937396404437077, "grad_norm": 0.1509997844696045, "learning_rate": 0.002, "loss": 2.5676, "step": 59920 }, { "epoch": 0.11939388626800969, "grad_norm": 0.16857081651687622, "learning_rate": 0.002, "loss": 2.5653, "step": 59930 }, { "epoch": 0.11941380849164861, "grad_norm": 0.15083210170269012, "learning_rate": 0.002, "loss": 2.561, "step": 59940 }, { "epoch": 0.11943373071528751, "grad_norm": 0.16164086759090424, "learning_rate": 0.002, "loss": 2.5788, "step": 59950 }, { "epoch": 0.11945365293892643, "grad_norm": 0.15826553106307983, "learning_rate": 0.002, "loss": 2.5633, "step": 59960 }, { "epoch": 0.11947357516256535, "grad_norm": 0.1648484617471695, "learning_rate": 0.002, "loss": 2.5608, "step": 59970 }, { "epoch": 0.11949349738620425, "grad_norm": 0.1599259078502655, "learning_rate": 0.002, "loss": 2.5672, "step": 59980 }, { "epoch": 0.11951341960984317, "grad_norm": 0.13896092772483826, "learning_rate": 0.002, "loss": 2.568, "step": 59990 }, { "epoch": 0.11953334183348209, "grad_norm": 0.2378443032503128, "learning_rate": 0.002, "loss": 2.5645, "step": 60000 }, { "epoch": 0.119553264057121, "grad_norm": 0.17368218302726746, "learning_rate": 0.002, "loss": 2.5895, "step": 60010 }, { "epoch": 0.11957318628075991, "grad_norm": 0.16128087043762207, "learning_rate": 0.002, "loss": 2.5779, "step": 60020 }, { "epoch": 0.11959310850439883, "grad_norm": 0.17856064438819885, "learning_rate": 0.002, "loss": 2.5817, "step": 60030 }, { "epoch": 0.11961303072803774, "grad_norm": 0.1643415093421936, "learning_rate": 0.002, "loss": 2.572, "step": 60040 }, { "epoch": 0.11963295295167665, "grad_norm": 0.15225976705551147, "learning_rate": 0.002, "loss": 2.5619, "step": 60050 }, { "epoch": 0.11965287517531557, "grad_norm": 0.16135358810424805, "learning_rate": 0.002, "loss": 2.5729, "step": 60060 }, { "epoch": 0.11967279739895448, "grad_norm": 0.15195152163505554, "learning_rate": 0.002, "loss": 2.5624, "step": 60070 }, { "epoch": 0.1196927196225934, "grad_norm": 0.13233287632465363, "learning_rate": 0.002, "loss": 2.5857, "step": 60080 }, { "epoch": 0.11971264184623231, "grad_norm": 0.19086948037147522, "learning_rate": 0.002, "loss": 2.5778, "step": 60090 }, { "epoch": 0.11973256406987122, "grad_norm": 0.18606577813625336, "learning_rate": 0.002, "loss": 2.5738, "step": 60100 }, { "epoch": 0.11975248629351014, "grad_norm": 0.15821434557437897, "learning_rate": 0.002, "loss": 2.579, "step": 60110 }, { "epoch": 0.11977240851714906, "grad_norm": 0.1609840989112854, "learning_rate": 0.002, "loss": 2.5756, "step": 60120 }, { "epoch": 0.11979233074078796, "grad_norm": 0.18939584493637085, "learning_rate": 0.002, "loss": 2.5833, "step": 60130 }, { "epoch": 0.11981225296442688, "grad_norm": 0.16131629049777985, "learning_rate": 0.002, "loss": 2.5812, "step": 60140 }, { "epoch": 0.11983217518806578, "grad_norm": 0.1719944179058075, "learning_rate": 0.002, "loss": 2.5924, "step": 60150 }, { "epoch": 0.1198520974117047, "grad_norm": 0.20398221909999847, "learning_rate": 0.002, "loss": 2.5967, "step": 60160 }, { "epoch": 0.11987201963534362, "grad_norm": 0.142421156167984, "learning_rate": 0.002, "loss": 2.5673, "step": 60170 }, { "epoch": 0.11989194185898253, "grad_norm": 0.16317367553710938, "learning_rate": 0.002, "loss": 2.5888, "step": 60180 }, { "epoch": 0.11991186408262144, "grad_norm": 0.14327265322208405, "learning_rate": 0.002, "loss": 2.5752, "step": 60190 }, { "epoch": 0.11993178630626036, "grad_norm": 0.16431331634521484, "learning_rate": 0.002, "loss": 2.5671, "step": 60200 }, { "epoch": 0.11995170852989927, "grad_norm": 0.17428164184093475, "learning_rate": 0.002, "loss": 2.5657, "step": 60210 }, { "epoch": 0.11997163075353819, "grad_norm": 0.17117902636528015, "learning_rate": 0.002, "loss": 2.5686, "step": 60220 }, { "epoch": 0.1199915529771771, "grad_norm": 0.15223105251789093, "learning_rate": 0.002, "loss": 2.5885, "step": 60230 }, { "epoch": 0.12001147520081601, "grad_norm": 0.16010861098766327, "learning_rate": 0.002, "loss": 2.5806, "step": 60240 }, { "epoch": 0.12003139742445493, "grad_norm": 0.15640874207019806, "learning_rate": 0.002, "loss": 2.5707, "step": 60250 }, { "epoch": 0.12005131964809385, "grad_norm": 0.16547368466854095, "learning_rate": 0.002, "loss": 2.5578, "step": 60260 }, { "epoch": 0.12007124187173275, "grad_norm": 0.17127031087875366, "learning_rate": 0.002, "loss": 2.5759, "step": 60270 }, { "epoch": 0.12009116409537167, "grad_norm": 0.16679330170154572, "learning_rate": 0.002, "loss": 2.5702, "step": 60280 }, { "epoch": 0.12011108631901059, "grad_norm": 0.1430831402540207, "learning_rate": 0.002, "loss": 2.5636, "step": 60290 }, { "epoch": 0.12013100854264949, "grad_norm": 0.15393632650375366, "learning_rate": 0.002, "loss": 2.5857, "step": 60300 }, { "epoch": 0.12015093076628841, "grad_norm": 0.14540955424308777, "learning_rate": 0.002, "loss": 2.5831, "step": 60310 }, { "epoch": 0.12017085298992733, "grad_norm": 0.16486649215221405, "learning_rate": 0.002, "loss": 2.5607, "step": 60320 }, { "epoch": 0.12019077521356623, "grad_norm": 0.16791445016860962, "learning_rate": 0.002, "loss": 2.566, "step": 60330 }, { "epoch": 0.12021069743720515, "grad_norm": 0.14868195354938507, "learning_rate": 0.002, "loss": 2.5747, "step": 60340 }, { "epoch": 0.12023061966084407, "grad_norm": 0.16190709173679352, "learning_rate": 0.002, "loss": 2.5783, "step": 60350 }, { "epoch": 0.12025054188448298, "grad_norm": 0.15964990854263306, "learning_rate": 0.002, "loss": 2.5742, "step": 60360 }, { "epoch": 0.1202704641081219, "grad_norm": 0.2159082442522049, "learning_rate": 0.002, "loss": 2.5671, "step": 60370 }, { "epoch": 0.12029038633176081, "grad_norm": 0.16359616816043854, "learning_rate": 0.002, "loss": 2.575, "step": 60380 }, { "epoch": 0.12031030855539972, "grad_norm": 0.16258952021598816, "learning_rate": 0.002, "loss": 2.5654, "step": 60390 }, { "epoch": 0.12033023077903864, "grad_norm": 0.18673628568649292, "learning_rate": 0.002, "loss": 2.5574, "step": 60400 }, { "epoch": 0.12035015300267754, "grad_norm": 0.16423162817955017, "learning_rate": 0.002, "loss": 2.5723, "step": 60410 }, { "epoch": 0.12037007522631646, "grad_norm": 0.14592139422893524, "learning_rate": 0.002, "loss": 2.5852, "step": 60420 }, { "epoch": 0.12038999744995538, "grad_norm": 0.1436840146780014, "learning_rate": 0.002, "loss": 2.5787, "step": 60430 }, { "epoch": 0.12040991967359428, "grad_norm": 0.18828268349170685, "learning_rate": 0.002, "loss": 2.5602, "step": 60440 }, { "epoch": 0.1204298418972332, "grad_norm": 0.16561198234558105, "learning_rate": 0.002, "loss": 2.5707, "step": 60450 }, { "epoch": 0.12044976412087212, "grad_norm": 0.1395123451948166, "learning_rate": 0.002, "loss": 2.5647, "step": 60460 }, { "epoch": 0.12046968634451102, "grad_norm": 0.1694621592760086, "learning_rate": 0.002, "loss": 2.5855, "step": 60470 }, { "epoch": 0.12048960856814994, "grad_norm": 0.13814465701580048, "learning_rate": 0.002, "loss": 2.5776, "step": 60480 }, { "epoch": 0.12050953079178886, "grad_norm": 0.18486106395721436, "learning_rate": 0.002, "loss": 2.5718, "step": 60490 }, { "epoch": 0.12052945301542776, "grad_norm": 0.19487036764621735, "learning_rate": 0.002, "loss": 2.5736, "step": 60500 }, { "epoch": 0.12054937523906668, "grad_norm": 0.14716000854969025, "learning_rate": 0.002, "loss": 2.576, "step": 60510 }, { "epoch": 0.1205692974627056, "grad_norm": 0.16593262553215027, "learning_rate": 0.002, "loss": 2.5591, "step": 60520 }, { "epoch": 0.1205892196863445, "grad_norm": 0.17922215163707733, "learning_rate": 0.002, "loss": 2.5704, "step": 60530 }, { "epoch": 0.12060914190998342, "grad_norm": 0.14433977007865906, "learning_rate": 0.002, "loss": 2.5656, "step": 60540 }, { "epoch": 0.12062906413362234, "grad_norm": 0.17013892531394958, "learning_rate": 0.002, "loss": 2.5647, "step": 60550 }, { "epoch": 0.12064898635726125, "grad_norm": 0.135910764336586, "learning_rate": 0.002, "loss": 2.5961, "step": 60560 }, { "epoch": 0.12066890858090017, "grad_norm": 0.17381928861141205, "learning_rate": 0.002, "loss": 2.5735, "step": 60570 }, { "epoch": 0.12068883080453909, "grad_norm": 0.15461887419223785, "learning_rate": 0.002, "loss": 2.5638, "step": 60580 }, { "epoch": 0.12070875302817799, "grad_norm": 0.17459717392921448, "learning_rate": 0.002, "loss": 2.5674, "step": 60590 }, { "epoch": 0.12072867525181691, "grad_norm": 0.17583180963993073, "learning_rate": 0.002, "loss": 2.5633, "step": 60600 }, { "epoch": 0.12074859747545583, "grad_norm": 0.15317294001579285, "learning_rate": 0.002, "loss": 2.5487, "step": 60610 }, { "epoch": 0.12076851969909473, "grad_norm": 0.15613681077957153, "learning_rate": 0.002, "loss": 2.5802, "step": 60620 }, { "epoch": 0.12078844192273365, "grad_norm": 0.2057972252368927, "learning_rate": 0.002, "loss": 2.5741, "step": 60630 }, { "epoch": 0.12080836414637257, "grad_norm": 0.19574964046478271, "learning_rate": 0.002, "loss": 2.5647, "step": 60640 }, { "epoch": 0.12082828637001147, "grad_norm": 0.14570234715938568, "learning_rate": 0.002, "loss": 2.5707, "step": 60650 }, { "epoch": 0.12084820859365039, "grad_norm": 0.15911054611206055, "learning_rate": 0.002, "loss": 2.5742, "step": 60660 }, { "epoch": 0.1208681308172893, "grad_norm": 0.14186254143714905, "learning_rate": 0.002, "loss": 2.5833, "step": 60670 }, { "epoch": 0.12088805304092821, "grad_norm": 0.22183585166931152, "learning_rate": 0.002, "loss": 2.5548, "step": 60680 }, { "epoch": 0.12090797526456713, "grad_norm": 0.14413529634475708, "learning_rate": 0.002, "loss": 2.5649, "step": 60690 }, { "epoch": 0.12092789748820604, "grad_norm": 0.14067837595939636, "learning_rate": 0.002, "loss": 2.5658, "step": 60700 }, { "epoch": 0.12094781971184496, "grad_norm": 0.14508852362632751, "learning_rate": 0.002, "loss": 2.5706, "step": 60710 }, { "epoch": 0.12096774193548387, "grad_norm": 0.16969706118106842, "learning_rate": 0.002, "loss": 2.5663, "step": 60720 }, { "epoch": 0.12098766415912278, "grad_norm": 0.16089844703674316, "learning_rate": 0.002, "loss": 2.5783, "step": 60730 }, { "epoch": 0.1210075863827617, "grad_norm": 0.13926251232624054, "learning_rate": 0.002, "loss": 2.574, "step": 60740 }, { "epoch": 0.12102750860640062, "grad_norm": 0.15989378094673157, "learning_rate": 0.002, "loss": 2.5699, "step": 60750 }, { "epoch": 0.12104743083003952, "grad_norm": 0.17433415353298187, "learning_rate": 0.002, "loss": 2.5798, "step": 60760 }, { "epoch": 0.12106735305367844, "grad_norm": 0.146018385887146, "learning_rate": 0.002, "loss": 2.5746, "step": 60770 }, { "epoch": 0.12108727527731736, "grad_norm": 0.1795864999294281, "learning_rate": 0.002, "loss": 2.5611, "step": 60780 }, { "epoch": 0.12110719750095626, "grad_norm": 0.14449259638786316, "learning_rate": 0.002, "loss": 2.5708, "step": 60790 }, { "epoch": 0.12112711972459518, "grad_norm": 0.16825686395168304, "learning_rate": 0.002, "loss": 2.5769, "step": 60800 }, { "epoch": 0.1211470419482341, "grad_norm": 0.2104288637638092, "learning_rate": 0.002, "loss": 2.5769, "step": 60810 }, { "epoch": 0.121166964171873, "grad_norm": 0.15274247527122498, "learning_rate": 0.002, "loss": 2.562, "step": 60820 }, { "epoch": 0.12118688639551192, "grad_norm": 0.1509944647550583, "learning_rate": 0.002, "loss": 2.5683, "step": 60830 }, { "epoch": 0.12120680861915084, "grad_norm": 0.1427115648984909, "learning_rate": 0.002, "loss": 2.5675, "step": 60840 }, { "epoch": 0.12122673084278975, "grad_norm": 0.17518547177314758, "learning_rate": 0.002, "loss": 2.5653, "step": 60850 }, { "epoch": 0.12124665306642866, "grad_norm": 0.1802491694688797, "learning_rate": 0.002, "loss": 2.5682, "step": 60860 }, { "epoch": 0.12126657529006758, "grad_norm": 0.18679049611091614, "learning_rate": 0.002, "loss": 2.5781, "step": 60870 }, { "epoch": 0.12128649751370649, "grad_norm": 0.15231943130493164, "learning_rate": 0.002, "loss": 2.5651, "step": 60880 }, { "epoch": 0.1213064197373454, "grad_norm": 0.15460164844989777, "learning_rate": 0.002, "loss": 2.5655, "step": 60890 }, { "epoch": 0.12132634196098432, "grad_norm": 0.14694255590438843, "learning_rate": 0.002, "loss": 2.5812, "step": 60900 }, { "epoch": 0.12134626418462323, "grad_norm": 0.19939878582954407, "learning_rate": 0.002, "loss": 2.5712, "step": 60910 }, { "epoch": 0.12136618640826215, "grad_norm": 0.15079088509082794, "learning_rate": 0.002, "loss": 2.5681, "step": 60920 }, { "epoch": 0.12138610863190105, "grad_norm": 0.15664659440517426, "learning_rate": 0.002, "loss": 2.5732, "step": 60930 }, { "epoch": 0.12140603085553997, "grad_norm": 0.20031200349330902, "learning_rate": 0.002, "loss": 2.5682, "step": 60940 }, { "epoch": 0.12142595307917889, "grad_norm": 0.137134850025177, "learning_rate": 0.002, "loss": 2.5862, "step": 60950 }, { "epoch": 0.1214458753028178, "grad_norm": 0.17062078416347504, "learning_rate": 0.002, "loss": 2.5805, "step": 60960 }, { "epoch": 0.12146579752645671, "grad_norm": 0.14389516413211823, "learning_rate": 0.002, "loss": 2.5782, "step": 60970 }, { "epoch": 0.12148571975009563, "grad_norm": 0.16147653758525848, "learning_rate": 0.002, "loss": 2.575, "step": 60980 }, { "epoch": 0.12150564197373454, "grad_norm": 0.16207800805568695, "learning_rate": 0.002, "loss": 2.5773, "step": 60990 }, { "epoch": 0.12152556419737345, "grad_norm": 0.16012465953826904, "learning_rate": 0.002, "loss": 2.5845, "step": 61000 }, { "epoch": 0.12154548642101237, "grad_norm": 0.1556144505739212, "learning_rate": 0.002, "loss": 2.5784, "step": 61010 }, { "epoch": 0.12156540864465128, "grad_norm": 0.14155083894729614, "learning_rate": 0.002, "loss": 2.5693, "step": 61020 }, { "epoch": 0.1215853308682902, "grad_norm": 0.14076265692710876, "learning_rate": 0.002, "loss": 2.5534, "step": 61030 }, { "epoch": 0.12160525309192911, "grad_norm": 0.13870732486248016, "learning_rate": 0.002, "loss": 2.5689, "step": 61040 }, { "epoch": 0.12162517531556802, "grad_norm": 0.14935170114040375, "learning_rate": 0.002, "loss": 2.5805, "step": 61050 }, { "epoch": 0.12164509753920694, "grad_norm": 0.15914605557918549, "learning_rate": 0.002, "loss": 2.5898, "step": 61060 }, { "epoch": 0.12166501976284586, "grad_norm": 0.16324929893016815, "learning_rate": 0.002, "loss": 2.5807, "step": 61070 }, { "epoch": 0.12168494198648476, "grad_norm": 0.14523069560527802, "learning_rate": 0.002, "loss": 2.5724, "step": 61080 }, { "epoch": 0.12170486421012368, "grad_norm": 0.17100368440151215, "learning_rate": 0.002, "loss": 2.5634, "step": 61090 }, { "epoch": 0.1217247864337626, "grad_norm": 0.13868394494056702, "learning_rate": 0.002, "loss": 2.5729, "step": 61100 }, { "epoch": 0.1217447086574015, "grad_norm": 0.17802192270755768, "learning_rate": 0.002, "loss": 2.5687, "step": 61110 }, { "epoch": 0.12176463088104042, "grad_norm": 0.1932566910982132, "learning_rate": 0.002, "loss": 2.5729, "step": 61120 }, { "epoch": 0.12178455310467934, "grad_norm": 0.16766579449176788, "learning_rate": 0.002, "loss": 2.5894, "step": 61130 }, { "epoch": 0.12180447532831824, "grad_norm": 0.1777302771806717, "learning_rate": 0.002, "loss": 2.5907, "step": 61140 }, { "epoch": 0.12182439755195716, "grad_norm": 0.15982931852340698, "learning_rate": 0.002, "loss": 2.5735, "step": 61150 }, { "epoch": 0.12184431977559607, "grad_norm": 0.14948223531246185, "learning_rate": 0.002, "loss": 2.5782, "step": 61160 }, { "epoch": 0.12186424199923498, "grad_norm": 0.16564379632472992, "learning_rate": 0.002, "loss": 2.5863, "step": 61170 }, { "epoch": 0.1218841642228739, "grad_norm": 0.1513909548521042, "learning_rate": 0.002, "loss": 2.5662, "step": 61180 }, { "epoch": 0.12190408644651281, "grad_norm": 0.1700671911239624, "learning_rate": 0.002, "loss": 2.5824, "step": 61190 }, { "epoch": 0.12192400867015173, "grad_norm": 0.1698608100414276, "learning_rate": 0.002, "loss": 2.5721, "step": 61200 }, { "epoch": 0.12194393089379064, "grad_norm": 0.1537874937057495, "learning_rate": 0.002, "loss": 2.5748, "step": 61210 }, { "epoch": 0.12196385311742955, "grad_norm": 0.15499813854694366, "learning_rate": 0.002, "loss": 2.5794, "step": 61220 }, { "epoch": 0.12198377534106847, "grad_norm": 0.1546381115913391, "learning_rate": 0.002, "loss": 2.5714, "step": 61230 }, { "epoch": 0.12200369756470739, "grad_norm": 0.21145905554294586, "learning_rate": 0.002, "loss": 2.5826, "step": 61240 }, { "epoch": 0.12202361978834629, "grad_norm": 0.1503669023513794, "learning_rate": 0.002, "loss": 2.5804, "step": 61250 }, { "epoch": 0.12204354201198521, "grad_norm": 0.16327902674674988, "learning_rate": 0.002, "loss": 2.5778, "step": 61260 }, { "epoch": 0.12206346423562413, "grad_norm": 0.16578884422779083, "learning_rate": 0.002, "loss": 2.5789, "step": 61270 }, { "epoch": 0.12208338645926303, "grad_norm": 0.1636091023683548, "learning_rate": 0.002, "loss": 2.5786, "step": 61280 }, { "epoch": 0.12210330868290195, "grad_norm": 0.12514051795005798, "learning_rate": 0.002, "loss": 2.5585, "step": 61290 }, { "epoch": 0.12212323090654087, "grad_norm": 0.20246319472789764, "learning_rate": 0.002, "loss": 2.5814, "step": 61300 }, { "epoch": 0.12214315313017977, "grad_norm": 0.16818514466285706, "learning_rate": 0.002, "loss": 2.5638, "step": 61310 }, { "epoch": 0.12216307535381869, "grad_norm": 0.17459936439990997, "learning_rate": 0.002, "loss": 2.5606, "step": 61320 }, { "epoch": 0.12218299757745761, "grad_norm": 0.16064757108688354, "learning_rate": 0.002, "loss": 2.5756, "step": 61330 }, { "epoch": 0.12220291980109652, "grad_norm": 0.1510593295097351, "learning_rate": 0.002, "loss": 2.5836, "step": 61340 }, { "epoch": 0.12222284202473543, "grad_norm": 0.17628918588161469, "learning_rate": 0.002, "loss": 2.5666, "step": 61350 }, { "epoch": 0.12224276424837435, "grad_norm": 0.20194610953330994, "learning_rate": 0.002, "loss": 2.5722, "step": 61360 }, { "epoch": 0.12226268647201326, "grad_norm": 0.16444014012813568, "learning_rate": 0.002, "loss": 2.5639, "step": 61370 }, { "epoch": 0.12228260869565218, "grad_norm": 0.14364536106586456, "learning_rate": 0.002, "loss": 2.5795, "step": 61380 }, { "epoch": 0.1223025309192911, "grad_norm": 0.1500132828950882, "learning_rate": 0.002, "loss": 2.5884, "step": 61390 }, { "epoch": 0.12232245314293, "grad_norm": 0.14109258353710175, "learning_rate": 0.002, "loss": 2.5932, "step": 61400 }, { "epoch": 0.12234237536656892, "grad_norm": 0.14097179472446442, "learning_rate": 0.002, "loss": 2.5805, "step": 61410 }, { "epoch": 0.12236229759020782, "grad_norm": 0.16692779958248138, "learning_rate": 0.002, "loss": 2.5802, "step": 61420 }, { "epoch": 0.12238221981384674, "grad_norm": 0.17264948785305023, "learning_rate": 0.002, "loss": 2.5828, "step": 61430 }, { "epoch": 0.12240214203748566, "grad_norm": 0.14784137904644012, "learning_rate": 0.002, "loss": 2.5745, "step": 61440 }, { "epoch": 0.12242206426112456, "grad_norm": 0.1397727131843567, "learning_rate": 0.002, "loss": 2.5812, "step": 61450 }, { "epoch": 0.12244198648476348, "grad_norm": 0.17935000360012054, "learning_rate": 0.002, "loss": 2.56, "step": 61460 }, { "epoch": 0.1224619087084024, "grad_norm": 0.16174836456775665, "learning_rate": 0.002, "loss": 2.5686, "step": 61470 }, { "epoch": 0.1224818309320413, "grad_norm": 0.1711372286081314, "learning_rate": 0.002, "loss": 2.5641, "step": 61480 }, { "epoch": 0.12250175315568022, "grad_norm": 0.15926454961299896, "learning_rate": 0.002, "loss": 2.5983, "step": 61490 }, { "epoch": 0.12252167537931914, "grad_norm": 0.16677981615066528, "learning_rate": 0.002, "loss": 2.583, "step": 61500 }, { "epoch": 0.12254159760295805, "grad_norm": 0.19016383588314056, "learning_rate": 0.002, "loss": 2.5688, "step": 61510 }, { "epoch": 0.12256151982659697, "grad_norm": 0.1674349009990692, "learning_rate": 0.002, "loss": 2.5792, "step": 61520 }, { "epoch": 0.12258144205023588, "grad_norm": 0.15695972740650177, "learning_rate": 0.002, "loss": 2.5781, "step": 61530 }, { "epoch": 0.12260136427387479, "grad_norm": 0.171773299574852, "learning_rate": 0.002, "loss": 2.5739, "step": 61540 }, { "epoch": 0.1226212864975137, "grad_norm": 0.14874325692653656, "learning_rate": 0.002, "loss": 2.5891, "step": 61550 }, { "epoch": 0.12264120872115263, "grad_norm": 0.1616503894329071, "learning_rate": 0.002, "loss": 2.5742, "step": 61560 }, { "epoch": 0.12266113094479153, "grad_norm": 0.1489204615354538, "learning_rate": 0.002, "loss": 2.576, "step": 61570 }, { "epoch": 0.12268105316843045, "grad_norm": 0.1659923493862152, "learning_rate": 0.002, "loss": 2.5778, "step": 61580 }, { "epoch": 0.12270097539206937, "grad_norm": 0.15831945836544037, "learning_rate": 0.002, "loss": 2.5782, "step": 61590 }, { "epoch": 0.12272089761570827, "grad_norm": 0.13968636095523834, "learning_rate": 0.002, "loss": 2.5679, "step": 61600 }, { "epoch": 0.12274081983934719, "grad_norm": 0.14346688985824585, "learning_rate": 0.002, "loss": 2.578, "step": 61610 }, { "epoch": 0.12276074206298611, "grad_norm": 0.21165147423744202, "learning_rate": 0.002, "loss": 2.5826, "step": 61620 }, { "epoch": 0.12278066428662501, "grad_norm": 0.18270768225193024, "learning_rate": 0.002, "loss": 2.58, "step": 61630 }, { "epoch": 0.12280058651026393, "grad_norm": 0.14974471926689148, "learning_rate": 0.002, "loss": 2.5814, "step": 61640 }, { "epoch": 0.12282050873390285, "grad_norm": 0.1896970123052597, "learning_rate": 0.002, "loss": 2.5761, "step": 61650 }, { "epoch": 0.12284043095754175, "grad_norm": 0.14699015021324158, "learning_rate": 0.002, "loss": 2.5655, "step": 61660 }, { "epoch": 0.12286035318118067, "grad_norm": 0.19076195359230042, "learning_rate": 0.002, "loss": 2.5723, "step": 61670 }, { "epoch": 0.12288027540481958, "grad_norm": 0.14995700120925903, "learning_rate": 0.002, "loss": 2.5708, "step": 61680 }, { "epoch": 0.1229001976284585, "grad_norm": 0.16950495541095734, "learning_rate": 0.002, "loss": 2.5615, "step": 61690 }, { "epoch": 0.12292011985209741, "grad_norm": 0.1511918306350708, "learning_rate": 0.002, "loss": 2.5752, "step": 61700 }, { "epoch": 0.12294004207573632, "grad_norm": 0.13770246505737305, "learning_rate": 0.002, "loss": 2.5711, "step": 61710 }, { "epoch": 0.12295996429937524, "grad_norm": 0.1722446233034134, "learning_rate": 0.002, "loss": 2.5583, "step": 61720 }, { "epoch": 0.12297988652301416, "grad_norm": 0.18456251919269562, "learning_rate": 0.002, "loss": 2.5634, "step": 61730 }, { "epoch": 0.12299980874665306, "grad_norm": 0.1529962569475174, "learning_rate": 0.002, "loss": 2.5791, "step": 61740 }, { "epoch": 0.12301973097029198, "grad_norm": 0.1638702005147934, "learning_rate": 0.002, "loss": 2.5806, "step": 61750 }, { "epoch": 0.1230396531939309, "grad_norm": 0.18300919234752655, "learning_rate": 0.002, "loss": 2.5593, "step": 61760 }, { "epoch": 0.1230595754175698, "grad_norm": 0.15808631479740143, "learning_rate": 0.002, "loss": 2.586, "step": 61770 }, { "epoch": 0.12307949764120872, "grad_norm": 0.1633647233247757, "learning_rate": 0.002, "loss": 2.5683, "step": 61780 }, { "epoch": 0.12309941986484764, "grad_norm": 0.15508143603801727, "learning_rate": 0.002, "loss": 2.5893, "step": 61790 }, { "epoch": 0.12311934208848654, "grad_norm": 0.16139836609363556, "learning_rate": 0.002, "loss": 2.5811, "step": 61800 }, { "epoch": 0.12313926431212546, "grad_norm": 0.1520974338054657, "learning_rate": 0.002, "loss": 2.5808, "step": 61810 }, { "epoch": 0.12315918653576438, "grad_norm": 0.18758989870548248, "learning_rate": 0.002, "loss": 2.5678, "step": 61820 }, { "epoch": 0.12317910875940329, "grad_norm": 0.18720616400241852, "learning_rate": 0.002, "loss": 2.576, "step": 61830 }, { "epoch": 0.1231990309830422, "grad_norm": 0.1575760692358017, "learning_rate": 0.002, "loss": 2.5668, "step": 61840 }, { "epoch": 0.12321895320668112, "grad_norm": 0.18694838881492615, "learning_rate": 0.002, "loss": 2.5665, "step": 61850 }, { "epoch": 0.12323887543032003, "grad_norm": 0.13572503626346588, "learning_rate": 0.002, "loss": 2.5647, "step": 61860 }, { "epoch": 0.12325879765395895, "grad_norm": 0.18118588626384735, "learning_rate": 0.002, "loss": 2.5746, "step": 61870 }, { "epoch": 0.12327871987759786, "grad_norm": 0.15307672321796417, "learning_rate": 0.002, "loss": 2.5687, "step": 61880 }, { "epoch": 0.12329864210123677, "grad_norm": 0.13866300880908966, "learning_rate": 0.002, "loss": 2.5742, "step": 61890 }, { "epoch": 0.12331856432487569, "grad_norm": 0.14599791169166565, "learning_rate": 0.002, "loss": 2.5809, "step": 61900 }, { "epoch": 0.1233384865485146, "grad_norm": 0.1674831658601761, "learning_rate": 0.002, "loss": 2.5721, "step": 61910 }, { "epoch": 0.12335840877215351, "grad_norm": 0.156724214553833, "learning_rate": 0.002, "loss": 2.585, "step": 61920 }, { "epoch": 0.12337833099579243, "grad_norm": 0.1573544144630432, "learning_rate": 0.002, "loss": 2.5772, "step": 61930 }, { "epoch": 0.12339825321943133, "grad_norm": 0.17583847045898438, "learning_rate": 0.002, "loss": 2.5956, "step": 61940 }, { "epoch": 0.12341817544307025, "grad_norm": 0.18935485184192657, "learning_rate": 0.002, "loss": 2.5769, "step": 61950 }, { "epoch": 0.12343809766670917, "grad_norm": 0.16622260212898254, "learning_rate": 0.002, "loss": 2.5735, "step": 61960 }, { "epoch": 0.12345801989034808, "grad_norm": 0.1662607192993164, "learning_rate": 0.002, "loss": 2.5705, "step": 61970 }, { "epoch": 0.123477942113987, "grad_norm": 0.1594652682542801, "learning_rate": 0.002, "loss": 2.5813, "step": 61980 }, { "epoch": 0.12349786433762591, "grad_norm": 0.14221957325935364, "learning_rate": 0.002, "loss": 2.5912, "step": 61990 }, { "epoch": 0.12351778656126482, "grad_norm": 0.1593816727399826, "learning_rate": 0.002, "loss": 2.5828, "step": 62000 }, { "epoch": 0.12353770878490374, "grad_norm": 0.17399649322032928, "learning_rate": 0.002, "loss": 2.5684, "step": 62010 }, { "epoch": 0.12355763100854265, "grad_norm": 0.16828866302967072, "learning_rate": 0.002, "loss": 2.5702, "step": 62020 }, { "epoch": 0.12357755323218156, "grad_norm": 0.1328427493572235, "learning_rate": 0.002, "loss": 2.5622, "step": 62030 }, { "epoch": 0.12359747545582048, "grad_norm": 0.1631116271018982, "learning_rate": 0.002, "loss": 2.5726, "step": 62040 }, { "epoch": 0.1236173976794594, "grad_norm": 0.17492163181304932, "learning_rate": 0.002, "loss": 2.5732, "step": 62050 }, { "epoch": 0.1236373199030983, "grad_norm": 0.19945575296878815, "learning_rate": 0.002, "loss": 2.5654, "step": 62060 }, { "epoch": 0.12365724212673722, "grad_norm": 0.1682065725326538, "learning_rate": 0.002, "loss": 2.5583, "step": 62070 }, { "epoch": 0.12367716435037614, "grad_norm": 0.17024774849414825, "learning_rate": 0.002, "loss": 2.5728, "step": 62080 }, { "epoch": 0.12369708657401504, "grad_norm": 0.1750349998474121, "learning_rate": 0.002, "loss": 2.5776, "step": 62090 }, { "epoch": 0.12371700879765396, "grad_norm": 0.16495493054389954, "learning_rate": 0.002, "loss": 2.5828, "step": 62100 }, { "epoch": 0.12373693102129288, "grad_norm": 0.16787400841712952, "learning_rate": 0.002, "loss": 2.581, "step": 62110 }, { "epoch": 0.12375685324493178, "grad_norm": 0.15451325476169586, "learning_rate": 0.002, "loss": 2.5795, "step": 62120 }, { "epoch": 0.1237767754685707, "grad_norm": 0.19254320859909058, "learning_rate": 0.002, "loss": 2.5932, "step": 62130 }, { "epoch": 0.12379669769220962, "grad_norm": 0.15483567118644714, "learning_rate": 0.002, "loss": 2.5685, "step": 62140 }, { "epoch": 0.12381661991584852, "grad_norm": 0.20795248448848724, "learning_rate": 0.002, "loss": 2.5718, "step": 62150 }, { "epoch": 0.12383654213948744, "grad_norm": 0.17767003178596497, "learning_rate": 0.002, "loss": 2.5749, "step": 62160 }, { "epoch": 0.12385646436312635, "grad_norm": 0.1507004201412201, "learning_rate": 0.002, "loss": 2.5748, "step": 62170 }, { "epoch": 0.12387638658676527, "grad_norm": 0.1826348900794983, "learning_rate": 0.002, "loss": 2.5697, "step": 62180 }, { "epoch": 0.12389630881040418, "grad_norm": 0.14405319094657898, "learning_rate": 0.002, "loss": 2.5684, "step": 62190 }, { "epoch": 0.12391623103404309, "grad_norm": 0.14551322162151337, "learning_rate": 0.002, "loss": 2.5845, "step": 62200 }, { "epoch": 0.12393615325768201, "grad_norm": 0.13449305295944214, "learning_rate": 0.002, "loss": 2.5653, "step": 62210 }, { "epoch": 0.12395607548132093, "grad_norm": 0.19457785785198212, "learning_rate": 0.002, "loss": 2.569, "step": 62220 }, { "epoch": 0.12397599770495983, "grad_norm": 0.15182603895664215, "learning_rate": 0.002, "loss": 2.5622, "step": 62230 }, { "epoch": 0.12399591992859875, "grad_norm": 0.15884174406528473, "learning_rate": 0.002, "loss": 2.5514, "step": 62240 }, { "epoch": 0.12401584215223767, "grad_norm": 0.1984759420156479, "learning_rate": 0.002, "loss": 2.5628, "step": 62250 }, { "epoch": 0.12403576437587657, "grad_norm": 0.1478194147348404, "learning_rate": 0.002, "loss": 2.5759, "step": 62260 }, { "epoch": 0.12405568659951549, "grad_norm": 0.15783461928367615, "learning_rate": 0.002, "loss": 2.5717, "step": 62270 }, { "epoch": 0.12407560882315441, "grad_norm": 0.1539667546749115, "learning_rate": 0.002, "loss": 2.5601, "step": 62280 }, { "epoch": 0.12409553104679331, "grad_norm": 0.17834441363811493, "learning_rate": 0.002, "loss": 2.5599, "step": 62290 }, { "epoch": 0.12411545327043223, "grad_norm": 0.169517382979393, "learning_rate": 0.002, "loss": 2.5698, "step": 62300 }, { "epoch": 0.12413537549407115, "grad_norm": 0.13333021104335785, "learning_rate": 0.002, "loss": 2.5651, "step": 62310 }, { "epoch": 0.12415529771771006, "grad_norm": 0.17664329707622528, "learning_rate": 0.002, "loss": 2.5718, "step": 62320 }, { "epoch": 0.12417521994134897, "grad_norm": 0.15790162980556488, "learning_rate": 0.002, "loss": 2.5705, "step": 62330 }, { "epoch": 0.12419514216498789, "grad_norm": 0.13258519768714905, "learning_rate": 0.002, "loss": 2.5825, "step": 62340 }, { "epoch": 0.1242150643886268, "grad_norm": 0.22458516061306, "learning_rate": 0.002, "loss": 2.5706, "step": 62350 }, { "epoch": 0.12423498661226572, "grad_norm": 0.14612118899822235, "learning_rate": 0.002, "loss": 2.5559, "step": 62360 }, { "epoch": 0.12425490883590463, "grad_norm": 0.13834446668624878, "learning_rate": 0.002, "loss": 2.5622, "step": 62370 }, { "epoch": 0.12427483105954354, "grad_norm": 0.16864381730556488, "learning_rate": 0.002, "loss": 2.5772, "step": 62380 }, { "epoch": 0.12429475328318246, "grad_norm": 0.1606111228466034, "learning_rate": 0.002, "loss": 2.5574, "step": 62390 }, { "epoch": 0.12431467550682138, "grad_norm": 0.13919273018836975, "learning_rate": 0.002, "loss": 2.5843, "step": 62400 }, { "epoch": 0.12433459773046028, "grad_norm": 0.16337382793426514, "learning_rate": 0.002, "loss": 2.5722, "step": 62410 }, { "epoch": 0.1243545199540992, "grad_norm": 0.15463939309120178, "learning_rate": 0.002, "loss": 2.5753, "step": 62420 }, { "epoch": 0.1243744421777381, "grad_norm": 0.15064245462417603, "learning_rate": 0.002, "loss": 2.5689, "step": 62430 }, { "epoch": 0.12439436440137702, "grad_norm": 0.2573016583919525, "learning_rate": 0.002, "loss": 2.5837, "step": 62440 }, { "epoch": 0.12441428662501594, "grad_norm": 0.19640196859836578, "learning_rate": 0.002, "loss": 2.5728, "step": 62450 }, { "epoch": 0.12443420884865485, "grad_norm": 0.15173150599002838, "learning_rate": 0.002, "loss": 2.5696, "step": 62460 }, { "epoch": 0.12445413107229376, "grad_norm": 0.17148058116436005, "learning_rate": 0.002, "loss": 2.5788, "step": 62470 }, { "epoch": 0.12447405329593268, "grad_norm": 0.14817625284194946, "learning_rate": 0.002, "loss": 2.5669, "step": 62480 }, { "epoch": 0.12449397551957159, "grad_norm": 0.1537788063287735, "learning_rate": 0.002, "loss": 2.5739, "step": 62490 }, { "epoch": 0.1245138977432105, "grad_norm": 0.2737075686454773, "learning_rate": 0.002, "loss": 2.5667, "step": 62500 }, { "epoch": 0.12453381996684942, "grad_norm": 0.18117083609104156, "learning_rate": 0.002, "loss": 2.5822, "step": 62510 }, { "epoch": 0.12455374219048833, "grad_norm": 0.19523999094963074, "learning_rate": 0.002, "loss": 2.5666, "step": 62520 }, { "epoch": 0.12457366441412725, "grad_norm": 0.16697758436203003, "learning_rate": 0.002, "loss": 2.574, "step": 62530 }, { "epoch": 0.12459358663776617, "grad_norm": 0.1951465904712677, "learning_rate": 0.002, "loss": 2.5602, "step": 62540 }, { "epoch": 0.12461350886140507, "grad_norm": 0.1409863829612732, "learning_rate": 0.002, "loss": 2.5714, "step": 62550 }, { "epoch": 0.12463343108504399, "grad_norm": 0.15425266325473785, "learning_rate": 0.002, "loss": 2.5565, "step": 62560 }, { "epoch": 0.12465335330868291, "grad_norm": 0.16719982028007507, "learning_rate": 0.002, "loss": 2.5806, "step": 62570 }, { "epoch": 0.12467327553232181, "grad_norm": 0.180659219622612, "learning_rate": 0.002, "loss": 2.5709, "step": 62580 }, { "epoch": 0.12469319775596073, "grad_norm": 0.13470560312271118, "learning_rate": 0.002, "loss": 2.5666, "step": 62590 }, { "epoch": 0.12471311997959965, "grad_norm": 0.1651850938796997, "learning_rate": 0.002, "loss": 2.5813, "step": 62600 }, { "epoch": 0.12473304220323855, "grad_norm": 0.16420279443264008, "learning_rate": 0.002, "loss": 2.5782, "step": 62610 }, { "epoch": 0.12475296442687747, "grad_norm": 0.13599424064159393, "learning_rate": 0.002, "loss": 2.5542, "step": 62620 }, { "epoch": 0.12477288665051639, "grad_norm": 0.14958198368549347, "learning_rate": 0.002, "loss": 2.5728, "step": 62630 }, { "epoch": 0.1247928088741553, "grad_norm": 0.1563379168510437, "learning_rate": 0.002, "loss": 2.5838, "step": 62640 }, { "epoch": 0.12481273109779421, "grad_norm": 0.17099829018115997, "learning_rate": 0.002, "loss": 2.5674, "step": 62650 }, { "epoch": 0.12483265332143313, "grad_norm": 0.18489426374435425, "learning_rate": 0.002, "loss": 2.586, "step": 62660 }, { "epoch": 0.12485257554507204, "grad_norm": 0.17505867779254913, "learning_rate": 0.002, "loss": 2.5872, "step": 62670 }, { "epoch": 0.12487249776871095, "grad_norm": 0.17305979132652283, "learning_rate": 0.002, "loss": 2.5822, "step": 62680 }, { "epoch": 0.12489241999234986, "grad_norm": 0.1410745084285736, "learning_rate": 0.002, "loss": 2.5733, "step": 62690 }, { "epoch": 0.12491234221598878, "grad_norm": 0.16857925057411194, "learning_rate": 0.002, "loss": 2.5648, "step": 62700 }, { "epoch": 0.1249322644396277, "grad_norm": 0.16492058336734772, "learning_rate": 0.002, "loss": 2.5732, "step": 62710 }, { "epoch": 0.1249521866632666, "grad_norm": 0.17862869799137115, "learning_rate": 0.002, "loss": 2.5713, "step": 62720 }, { "epoch": 0.12497210888690552, "grad_norm": 0.14848822355270386, "learning_rate": 0.002, "loss": 2.5788, "step": 62730 }, { "epoch": 0.12499203111054444, "grad_norm": 0.1741158664226532, "learning_rate": 0.002, "loss": 2.5726, "step": 62740 }, { "epoch": 0.12501195333418336, "grad_norm": 0.2118474841117859, "learning_rate": 0.002, "loss": 2.5744, "step": 62750 }, { "epoch": 0.12503187555782228, "grad_norm": 0.14133740961551666, "learning_rate": 0.002, "loss": 2.5858, "step": 62760 }, { "epoch": 0.12505179778146117, "grad_norm": 0.1700451374053955, "learning_rate": 0.002, "loss": 2.5744, "step": 62770 }, { "epoch": 0.12507172000510008, "grad_norm": 0.14651551842689514, "learning_rate": 0.002, "loss": 2.5582, "step": 62780 }, { "epoch": 0.125091642228739, "grad_norm": 0.1959046572446823, "learning_rate": 0.002, "loss": 2.5655, "step": 62790 }, { "epoch": 0.12511156445237792, "grad_norm": 0.15260787308216095, "learning_rate": 0.002, "loss": 2.5884, "step": 62800 }, { "epoch": 0.12513148667601684, "grad_norm": 0.14895042777061462, "learning_rate": 0.002, "loss": 2.5906, "step": 62810 }, { "epoch": 0.12515140889965573, "grad_norm": 0.15682914853096008, "learning_rate": 0.002, "loss": 2.5697, "step": 62820 }, { "epoch": 0.12517133112329465, "grad_norm": 0.16481855511665344, "learning_rate": 0.002, "loss": 2.5768, "step": 62830 }, { "epoch": 0.12519125334693357, "grad_norm": 0.1703219711780548, "learning_rate": 0.002, "loss": 2.5684, "step": 62840 }, { "epoch": 0.12521117557057249, "grad_norm": 0.17401504516601562, "learning_rate": 0.002, "loss": 2.5724, "step": 62850 }, { "epoch": 0.1252310977942114, "grad_norm": 0.1512814462184906, "learning_rate": 0.002, "loss": 2.5725, "step": 62860 }, { "epoch": 0.12525102001785032, "grad_norm": 0.15948793292045593, "learning_rate": 0.002, "loss": 2.5714, "step": 62870 }, { "epoch": 0.1252709422414892, "grad_norm": 0.1457166075706482, "learning_rate": 0.002, "loss": 2.5619, "step": 62880 }, { "epoch": 0.12529086446512813, "grad_norm": 0.16001881659030914, "learning_rate": 0.002, "loss": 2.5715, "step": 62890 }, { "epoch": 0.12531078668876705, "grad_norm": 0.17890439927577972, "learning_rate": 0.002, "loss": 2.5724, "step": 62900 }, { "epoch": 0.12533070891240597, "grad_norm": 0.14967937767505646, "learning_rate": 0.002, "loss": 2.5746, "step": 62910 }, { "epoch": 0.1253506311360449, "grad_norm": 0.1407821923494339, "learning_rate": 0.002, "loss": 2.577, "step": 62920 }, { "epoch": 0.1253705533596838, "grad_norm": 0.17332130670547485, "learning_rate": 0.002, "loss": 2.555, "step": 62930 }, { "epoch": 0.1253904755833227, "grad_norm": 0.15012507140636444, "learning_rate": 0.002, "loss": 2.5662, "step": 62940 }, { "epoch": 0.12541039780696162, "grad_norm": 0.17279791831970215, "learning_rate": 0.002, "loss": 2.5868, "step": 62950 }, { "epoch": 0.12543032003060053, "grad_norm": 0.17663653194904327, "learning_rate": 0.002, "loss": 2.5764, "step": 62960 }, { "epoch": 0.12545024225423945, "grad_norm": 0.16838482022285461, "learning_rate": 0.002, "loss": 2.578, "step": 62970 }, { "epoch": 0.12547016447787837, "grad_norm": 0.15350252389907837, "learning_rate": 0.002, "loss": 2.566, "step": 62980 }, { "epoch": 0.1254900867015173, "grad_norm": 0.14521653950214386, "learning_rate": 0.002, "loss": 2.5867, "step": 62990 }, { "epoch": 0.12551000892515618, "grad_norm": 0.17346499860286713, "learning_rate": 0.002, "loss": 2.5618, "step": 63000 }, { "epoch": 0.1255299311487951, "grad_norm": 0.14901231229305267, "learning_rate": 0.002, "loss": 2.5581, "step": 63010 }, { "epoch": 0.12554985337243402, "grad_norm": 0.16516496241092682, "learning_rate": 0.002, "loss": 2.5741, "step": 63020 }, { "epoch": 0.12556977559607294, "grad_norm": 0.1758066862821579, "learning_rate": 0.002, "loss": 2.5664, "step": 63030 }, { "epoch": 0.12558969781971185, "grad_norm": 0.144609734416008, "learning_rate": 0.002, "loss": 2.58, "step": 63040 }, { "epoch": 0.12560962004335074, "grad_norm": 0.14657820761203766, "learning_rate": 0.002, "loss": 2.5698, "step": 63050 }, { "epoch": 0.12562954226698966, "grad_norm": 0.19217920303344727, "learning_rate": 0.002, "loss": 2.5678, "step": 63060 }, { "epoch": 0.12564946449062858, "grad_norm": 0.18369486927986145, "learning_rate": 0.002, "loss": 2.5623, "step": 63070 }, { "epoch": 0.1256693867142675, "grad_norm": 0.14749634265899658, "learning_rate": 0.002, "loss": 2.5564, "step": 63080 }, { "epoch": 0.12568930893790642, "grad_norm": 0.16642151772975922, "learning_rate": 0.002, "loss": 2.5714, "step": 63090 }, { "epoch": 0.12570923116154534, "grad_norm": 0.16332519054412842, "learning_rate": 0.002, "loss": 2.5803, "step": 63100 }, { "epoch": 0.12572915338518423, "grad_norm": 0.17004439234733582, "learning_rate": 0.002, "loss": 2.5543, "step": 63110 }, { "epoch": 0.12574907560882315, "grad_norm": 0.14970272779464722, "learning_rate": 0.002, "loss": 2.561, "step": 63120 }, { "epoch": 0.12576899783246206, "grad_norm": 0.18391819298267365, "learning_rate": 0.002, "loss": 2.5739, "step": 63130 }, { "epoch": 0.12578892005610098, "grad_norm": 0.13092853128910065, "learning_rate": 0.002, "loss": 2.571, "step": 63140 }, { "epoch": 0.1258088422797399, "grad_norm": 0.2185916006565094, "learning_rate": 0.002, "loss": 2.5874, "step": 63150 }, { "epoch": 0.12582876450337882, "grad_norm": 0.16354043781757355, "learning_rate": 0.002, "loss": 2.5736, "step": 63160 }, { "epoch": 0.1258486867270177, "grad_norm": 0.18176595866680145, "learning_rate": 0.002, "loss": 2.57, "step": 63170 }, { "epoch": 0.12586860895065663, "grad_norm": 0.14655247330665588, "learning_rate": 0.002, "loss": 2.5649, "step": 63180 }, { "epoch": 0.12588853117429555, "grad_norm": 0.14851658046245575, "learning_rate": 0.002, "loss": 2.5734, "step": 63190 }, { "epoch": 0.12590845339793447, "grad_norm": 0.14355799555778503, "learning_rate": 0.002, "loss": 2.5809, "step": 63200 }, { "epoch": 0.12592837562157339, "grad_norm": 0.16623219847679138, "learning_rate": 0.002, "loss": 2.5734, "step": 63210 }, { "epoch": 0.1259482978452123, "grad_norm": 0.17069071531295776, "learning_rate": 0.002, "loss": 2.5885, "step": 63220 }, { "epoch": 0.1259682200688512, "grad_norm": 0.19440282881259918, "learning_rate": 0.002, "loss": 2.5771, "step": 63230 }, { "epoch": 0.1259881422924901, "grad_norm": 0.16875576972961426, "learning_rate": 0.002, "loss": 2.5932, "step": 63240 }, { "epoch": 0.12600806451612903, "grad_norm": 0.15912002325057983, "learning_rate": 0.002, "loss": 2.565, "step": 63250 }, { "epoch": 0.12602798673976795, "grad_norm": 0.19905401766300201, "learning_rate": 0.002, "loss": 2.5647, "step": 63260 }, { "epoch": 0.12604790896340687, "grad_norm": 0.18667002022266388, "learning_rate": 0.002, "loss": 2.5798, "step": 63270 }, { "epoch": 0.1260678311870458, "grad_norm": 0.15155626833438873, "learning_rate": 0.002, "loss": 2.5718, "step": 63280 }, { "epoch": 0.12608775341068468, "grad_norm": 0.17036694288253784, "learning_rate": 0.002, "loss": 2.5644, "step": 63290 }, { "epoch": 0.1261076756343236, "grad_norm": 0.14956390857696533, "learning_rate": 0.002, "loss": 2.5697, "step": 63300 }, { "epoch": 0.12612759785796251, "grad_norm": 0.1925261914730072, "learning_rate": 0.002, "loss": 2.5689, "step": 63310 }, { "epoch": 0.12614752008160143, "grad_norm": 0.15118667483329773, "learning_rate": 0.002, "loss": 2.5794, "step": 63320 }, { "epoch": 0.12616744230524035, "grad_norm": 0.1417974978685379, "learning_rate": 0.002, "loss": 2.5667, "step": 63330 }, { "epoch": 0.12618736452887924, "grad_norm": 0.1703980565071106, "learning_rate": 0.002, "loss": 2.5788, "step": 63340 }, { "epoch": 0.12620728675251816, "grad_norm": 0.16965843737125397, "learning_rate": 0.002, "loss": 2.5698, "step": 63350 }, { "epoch": 0.12622720897615708, "grad_norm": 0.18533672392368317, "learning_rate": 0.002, "loss": 2.5786, "step": 63360 }, { "epoch": 0.126247131199796, "grad_norm": 0.1754039227962494, "learning_rate": 0.002, "loss": 2.5708, "step": 63370 }, { "epoch": 0.12626705342343492, "grad_norm": 0.17505396902561188, "learning_rate": 0.002, "loss": 2.5618, "step": 63380 }, { "epoch": 0.12628697564707383, "grad_norm": 0.16053491830825806, "learning_rate": 0.002, "loss": 2.5822, "step": 63390 }, { "epoch": 0.12630689787071273, "grad_norm": 0.15875943005084991, "learning_rate": 0.002, "loss": 2.5667, "step": 63400 }, { "epoch": 0.12632682009435164, "grad_norm": 0.15615268051624298, "learning_rate": 0.002, "loss": 2.5706, "step": 63410 }, { "epoch": 0.12634674231799056, "grad_norm": 0.16118738055229187, "learning_rate": 0.002, "loss": 2.5672, "step": 63420 }, { "epoch": 0.12636666454162948, "grad_norm": 0.1776263415813446, "learning_rate": 0.002, "loss": 2.5728, "step": 63430 }, { "epoch": 0.1263865867652684, "grad_norm": 0.1946462094783783, "learning_rate": 0.002, "loss": 2.5713, "step": 63440 }, { "epoch": 0.12640650898890732, "grad_norm": 0.17537935078144073, "learning_rate": 0.002, "loss": 2.5612, "step": 63450 }, { "epoch": 0.1264264312125462, "grad_norm": 0.17372609674930573, "learning_rate": 0.002, "loss": 2.5694, "step": 63460 }, { "epoch": 0.12644635343618513, "grad_norm": 0.160383939743042, "learning_rate": 0.002, "loss": 2.5612, "step": 63470 }, { "epoch": 0.12646627565982405, "grad_norm": 0.17910252511501312, "learning_rate": 0.002, "loss": 2.5728, "step": 63480 }, { "epoch": 0.12648619788346296, "grad_norm": 0.14536380767822266, "learning_rate": 0.002, "loss": 2.5697, "step": 63490 }, { "epoch": 0.12650612010710188, "grad_norm": 0.18596184253692627, "learning_rate": 0.002, "loss": 2.5704, "step": 63500 }, { "epoch": 0.1265260423307408, "grad_norm": 0.14990532398223877, "learning_rate": 0.002, "loss": 2.5815, "step": 63510 }, { "epoch": 0.1265459645543797, "grad_norm": 0.1693122237920761, "learning_rate": 0.002, "loss": 2.5711, "step": 63520 }, { "epoch": 0.1265658867780186, "grad_norm": 0.1458946019411087, "learning_rate": 0.002, "loss": 2.5769, "step": 63530 }, { "epoch": 0.12658580900165753, "grad_norm": 0.16336403787136078, "learning_rate": 0.002, "loss": 2.5774, "step": 63540 }, { "epoch": 0.12660573122529645, "grad_norm": 0.18883037567138672, "learning_rate": 0.002, "loss": 2.5721, "step": 63550 }, { "epoch": 0.12662565344893537, "grad_norm": 0.14559130370616913, "learning_rate": 0.002, "loss": 2.5844, "step": 63560 }, { "epoch": 0.12664557567257426, "grad_norm": 0.1581253707408905, "learning_rate": 0.002, "loss": 2.5828, "step": 63570 }, { "epoch": 0.12666549789621318, "grad_norm": 0.1755651831626892, "learning_rate": 0.002, "loss": 2.5807, "step": 63580 }, { "epoch": 0.1266854201198521, "grad_norm": 0.18153022229671478, "learning_rate": 0.002, "loss": 2.5644, "step": 63590 }, { "epoch": 0.126705342343491, "grad_norm": 0.16113772988319397, "learning_rate": 0.002, "loss": 2.5797, "step": 63600 }, { "epoch": 0.12672526456712993, "grad_norm": 0.21938049793243408, "learning_rate": 0.002, "loss": 2.5877, "step": 63610 }, { "epoch": 0.12674518679076885, "grad_norm": 0.14171510934829712, "learning_rate": 0.002, "loss": 2.5669, "step": 63620 }, { "epoch": 0.12676510901440774, "grad_norm": 0.16214120388031006, "learning_rate": 0.002, "loss": 2.5778, "step": 63630 }, { "epoch": 0.12678503123804666, "grad_norm": 0.1558782011270523, "learning_rate": 0.002, "loss": 2.5724, "step": 63640 }, { "epoch": 0.12680495346168558, "grad_norm": 0.15937262773513794, "learning_rate": 0.002, "loss": 2.5802, "step": 63650 }, { "epoch": 0.1268248756853245, "grad_norm": 0.19181570410728455, "learning_rate": 0.002, "loss": 2.572, "step": 63660 }, { "epoch": 0.1268447979089634, "grad_norm": 0.15849490463733673, "learning_rate": 0.002, "loss": 2.5543, "step": 63670 }, { "epoch": 0.12686472013260233, "grad_norm": 0.15316464006900787, "learning_rate": 0.002, "loss": 2.5802, "step": 63680 }, { "epoch": 0.12688464235624122, "grad_norm": 0.16095568239688873, "learning_rate": 0.002, "loss": 2.5605, "step": 63690 }, { "epoch": 0.12690456457988014, "grad_norm": 0.16724473237991333, "learning_rate": 0.002, "loss": 2.578, "step": 63700 }, { "epoch": 0.12692448680351906, "grad_norm": 0.14530302584171295, "learning_rate": 0.002, "loss": 2.5655, "step": 63710 }, { "epoch": 0.12694440902715798, "grad_norm": 0.18783904612064362, "learning_rate": 0.002, "loss": 2.5578, "step": 63720 }, { "epoch": 0.1269643312507969, "grad_norm": 0.1703042984008789, "learning_rate": 0.002, "loss": 2.5633, "step": 63730 }, { "epoch": 0.12698425347443582, "grad_norm": 0.13420872390270233, "learning_rate": 0.002, "loss": 2.5728, "step": 63740 }, { "epoch": 0.1270041756980747, "grad_norm": 0.16614769399166107, "learning_rate": 0.002, "loss": 2.5757, "step": 63750 }, { "epoch": 0.12702409792171362, "grad_norm": 0.18149596452713013, "learning_rate": 0.002, "loss": 2.5817, "step": 63760 }, { "epoch": 0.12704402014535254, "grad_norm": 0.1722862720489502, "learning_rate": 0.002, "loss": 2.5714, "step": 63770 }, { "epoch": 0.12706394236899146, "grad_norm": 0.22835904359817505, "learning_rate": 0.002, "loss": 2.5872, "step": 63780 }, { "epoch": 0.12708386459263038, "grad_norm": 0.14530281722545624, "learning_rate": 0.002, "loss": 2.5827, "step": 63790 }, { "epoch": 0.1271037868162693, "grad_norm": 0.14632953703403473, "learning_rate": 0.002, "loss": 2.5696, "step": 63800 }, { "epoch": 0.1271237090399082, "grad_norm": 0.1664658784866333, "learning_rate": 0.002, "loss": 2.5821, "step": 63810 }, { "epoch": 0.1271436312635471, "grad_norm": 0.16886717081069946, "learning_rate": 0.002, "loss": 2.5891, "step": 63820 }, { "epoch": 0.12716355348718603, "grad_norm": 0.163367360830307, "learning_rate": 0.002, "loss": 2.5655, "step": 63830 }, { "epoch": 0.12718347571082494, "grad_norm": 0.16468285024166107, "learning_rate": 0.002, "loss": 2.5684, "step": 63840 }, { "epoch": 0.12720339793446386, "grad_norm": 0.15229232609272003, "learning_rate": 0.002, "loss": 2.5802, "step": 63850 }, { "epoch": 0.12722332015810275, "grad_norm": 0.13542120158672333, "learning_rate": 0.002, "loss": 2.5681, "step": 63860 }, { "epoch": 0.12724324238174167, "grad_norm": 0.17468029260635376, "learning_rate": 0.002, "loss": 2.5772, "step": 63870 }, { "epoch": 0.1272631646053806, "grad_norm": 0.1611625701189041, "learning_rate": 0.002, "loss": 2.5629, "step": 63880 }, { "epoch": 0.1272830868290195, "grad_norm": 0.18713460862636566, "learning_rate": 0.002, "loss": 2.5547, "step": 63890 }, { "epoch": 0.12730300905265843, "grad_norm": 0.16447137296199799, "learning_rate": 0.002, "loss": 2.5781, "step": 63900 }, { "epoch": 0.12732293127629735, "grad_norm": 0.15688468515872955, "learning_rate": 0.002, "loss": 2.5758, "step": 63910 }, { "epoch": 0.12734285349993624, "grad_norm": 0.17690204083919525, "learning_rate": 0.002, "loss": 2.5855, "step": 63920 }, { "epoch": 0.12736277572357516, "grad_norm": 0.16667668521404266, "learning_rate": 0.002, "loss": 2.575, "step": 63930 }, { "epoch": 0.12738269794721407, "grad_norm": 0.13200463354587555, "learning_rate": 0.002, "loss": 2.573, "step": 63940 }, { "epoch": 0.127402620170853, "grad_norm": 0.16158147156238556, "learning_rate": 0.002, "loss": 2.5724, "step": 63950 }, { "epoch": 0.1274225423944919, "grad_norm": 0.15797537565231323, "learning_rate": 0.002, "loss": 2.5767, "step": 63960 }, { "epoch": 0.12744246461813083, "grad_norm": 0.2322416454553604, "learning_rate": 0.002, "loss": 2.5726, "step": 63970 }, { "epoch": 0.12746238684176972, "grad_norm": 0.19535711407661438, "learning_rate": 0.002, "loss": 2.5733, "step": 63980 }, { "epoch": 0.12748230906540864, "grad_norm": 0.13864465057849884, "learning_rate": 0.002, "loss": 2.5853, "step": 63990 }, { "epoch": 0.12750223128904756, "grad_norm": 0.15698131918907166, "learning_rate": 0.002, "loss": 2.5676, "step": 64000 }, { "epoch": 0.12752215351268648, "grad_norm": 0.1529923975467682, "learning_rate": 0.002, "loss": 2.5818, "step": 64010 }, { "epoch": 0.1275420757363254, "grad_norm": 0.149643674492836, "learning_rate": 0.002, "loss": 2.5631, "step": 64020 }, { "epoch": 0.1275619979599643, "grad_norm": 0.15409764647483826, "learning_rate": 0.002, "loss": 2.5733, "step": 64030 }, { "epoch": 0.1275819201836032, "grad_norm": 0.18528594076633453, "learning_rate": 0.002, "loss": 2.5657, "step": 64040 }, { "epoch": 0.12760184240724212, "grad_norm": 0.16995206475257874, "learning_rate": 0.002, "loss": 2.5718, "step": 64050 }, { "epoch": 0.12762176463088104, "grad_norm": 0.17766544222831726, "learning_rate": 0.002, "loss": 2.5731, "step": 64060 }, { "epoch": 0.12764168685451996, "grad_norm": 0.1621304452419281, "learning_rate": 0.002, "loss": 2.5702, "step": 64070 }, { "epoch": 0.12766160907815888, "grad_norm": 0.15111888945102692, "learning_rate": 0.002, "loss": 2.582, "step": 64080 }, { "epoch": 0.12768153130179777, "grad_norm": 0.13683421909809113, "learning_rate": 0.002, "loss": 2.5522, "step": 64090 }, { "epoch": 0.1277014535254367, "grad_norm": 0.20921264588832855, "learning_rate": 0.002, "loss": 2.5789, "step": 64100 }, { "epoch": 0.1277213757490756, "grad_norm": 0.15172088146209717, "learning_rate": 0.002, "loss": 2.5804, "step": 64110 }, { "epoch": 0.12774129797271452, "grad_norm": 0.15784937143325806, "learning_rate": 0.002, "loss": 2.565, "step": 64120 }, { "epoch": 0.12776122019635344, "grad_norm": 0.1950494796037674, "learning_rate": 0.002, "loss": 2.5755, "step": 64130 }, { "epoch": 0.12778114241999236, "grad_norm": 0.14260391891002655, "learning_rate": 0.002, "loss": 2.5825, "step": 64140 }, { "epoch": 0.12780106464363125, "grad_norm": 0.15416832268238068, "learning_rate": 0.002, "loss": 2.5805, "step": 64150 }, { "epoch": 0.12782098686727017, "grad_norm": 0.19600823521614075, "learning_rate": 0.002, "loss": 2.5859, "step": 64160 }, { "epoch": 0.1278409090909091, "grad_norm": 0.14778712391853333, "learning_rate": 0.002, "loss": 2.5728, "step": 64170 }, { "epoch": 0.127860831314548, "grad_norm": 0.15510521829128265, "learning_rate": 0.002, "loss": 2.5849, "step": 64180 }, { "epoch": 0.12788075353818693, "grad_norm": 0.16371795535087585, "learning_rate": 0.002, "loss": 2.58, "step": 64190 }, { "epoch": 0.12790067576182584, "grad_norm": 0.14273622632026672, "learning_rate": 0.002, "loss": 2.5822, "step": 64200 }, { "epoch": 0.12792059798546473, "grad_norm": 0.18710637092590332, "learning_rate": 0.002, "loss": 2.576, "step": 64210 }, { "epoch": 0.12794052020910365, "grad_norm": 0.1800726354122162, "learning_rate": 0.002, "loss": 2.562, "step": 64220 }, { "epoch": 0.12796044243274257, "grad_norm": 0.15788310766220093, "learning_rate": 0.002, "loss": 2.566, "step": 64230 }, { "epoch": 0.1279803646563815, "grad_norm": 0.14346490800380707, "learning_rate": 0.002, "loss": 2.5787, "step": 64240 }, { "epoch": 0.1280002868800204, "grad_norm": 0.15531699359416962, "learning_rate": 0.002, "loss": 2.5699, "step": 64250 }, { "epoch": 0.12802020910365933, "grad_norm": 0.15105007588863373, "learning_rate": 0.002, "loss": 2.5828, "step": 64260 }, { "epoch": 0.12804013132729822, "grad_norm": 0.160893052816391, "learning_rate": 0.002, "loss": 2.5665, "step": 64270 }, { "epoch": 0.12806005355093714, "grad_norm": 0.1582086682319641, "learning_rate": 0.002, "loss": 2.5734, "step": 64280 }, { "epoch": 0.12807997577457605, "grad_norm": 0.17953017354011536, "learning_rate": 0.002, "loss": 2.5666, "step": 64290 }, { "epoch": 0.12809989799821497, "grad_norm": 0.14656861126422882, "learning_rate": 0.002, "loss": 2.582, "step": 64300 }, { "epoch": 0.1281198202218539, "grad_norm": 0.17471100389957428, "learning_rate": 0.002, "loss": 2.5725, "step": 64310 }, { "epoch": 0.12813974244549278, "grad_norm": 0.15493083000183105, "learning_rate": 0.002, "loss": 2.567, "step": 64320 }, { "epoch": 0.1281596646691317, "grad_norm": 0.14772042632102966, "learning_rate": 0.002, "loss": 2.5716, "step": 64330 }, { "epoch": 0.12817958689277062, "grad_norm": 0.1595008820295334, "learning_rate": 0.002, "loss": 2.5725, "step": 64340 }, { "epoch": 0.12819950911640954, "grad_norm": 0.16866029798984528, "learning_rate": 0.002, "loss": 2.5704, "step": 64350 }, { "epoch": 0.12821943134004846, "grad_norm": 0.15708909928798676, "learning_rate": 0.002, "loss": 2.5656, "step": 64360 }, { "epoch": 0.12823935356368737, "grad_norm": 0.18181098997592926, "learning_rate": 0.002, "loss": 2.5651, "step": 64370 }, { "epoch": 0.12825927578732627, "grad_norm": 0.15895406901836395, "learning_rate": 0.002, "loss": 2.5728, "step": 64380 }, { "epoch": 0.12827919801096518, "grad_norm": 0.13478752970695496, "learning_rate": 0.002, "loss": 2.5652, "step": 64390 }, { "epoch": 0.1282991202346041, "grad_norm": 0.17508374154567719, "learning_rate": 0.002, "loss": 2.5747, "step": 64400 }, { "epoch": 0.12831904245824302, "grad_norm": 0.18247708678245544, "learning_rate": 0.002, "loss": 2.579, "step": 64410 }, { "epoch": 0.12833896468188194, "grad_norm": 0.1595645546913147, "learning_rate": 0.002, "loss": 2.5661, "step": 64420 }, { "epoch": 0.12835888690552086, "grad_norm": 0.18514464795589447, "learning_rate": 0.002, "loss": 2.5707, "step": 64430 }, { "epoch": 0.12837880912915975, "grad_norm": 0.15312246978282928, "learning_rate": 0.002, "loss": 2.5788, "step": 64440 }, { "epoch": 0.12839873135279867, "grad_norm": 0.1828213483095169, "learning_rate": 0.002, "loss": 2.5709, "step": 64450 }, { "epoch": 0.12841865357643759, "grad_norm": 0.17784641683101654, "learning_rate": 0.002, "loss": 2.5777, "step": 64460 }, { "epoch": 0.1284385758000765, "grad_norm": 0.1442687213420868, "learning_rate": 0.002, "loss": 2.5641, "step": 64470 }, { "epoch": 0.12845849802371542, "grad_norm": 0.14602641761302948, "learning_rate": 0.002, "loss": 2.5618, "step": 64480 }, { "epoch": 0.12847842024735434, "grad_norm": 0.16834110021591187, "learning_rate": 0.002, "loss": 2.5769, "step": 64490 }, { "epoch": 0.12849834247099323, "grad_norm": 0.17101942002773285, "learning_rate": 0.002, "loss": 2.5748, "step": 64500 }, { "epoch": 0.12851826469463215, "grad_norm": 0.18265138566493988, "learning_rate": 0.002, "loss": 2.5707, "step": 64510 }, { "epoch": 0.12853818691827107, "grad_norm": 0.15986080467700958, "learning_rate": 0.002, "loss": 2.5841, "step": 64520 }, { "epoch": 0.12855810914191, "grad_norm": 0.1443597823381424, "learning_rate": 0.002, "loss": 2.5648, "step": 64530 }, { "epoch": 0.1285780313655489, "grad_norm": 0.18072284758090973, "learning_rate": 0.002, "loss": 2.5735, "step": 64540 }, { "epoch": 0.12859795358918782, "grad_norm": 0.1658676266670227, "learning_rate": 0.002, "loss": 2.5749, "step": 64550 }, { "epoch": 0.12861787581282672, "grad_norm": 0.20085987448692322, "learning_rate": 0.002, "loss": 2.5811, "step": 64560 }, { "epoch": 0.12863779803646563, "grad_norm": 0.13680578768253326, "learning_rate": 0.002, "loss": 2.5884, "step": 64570 }, { "epoch": 0.12865772026010455, "grad_norm": 0.19338271021842957, "learning_rate": 0.002, "loss": 2.5873, "step": 64580 }, { "epoch": 0.12867764248374347, "grad_norm": 0.162871316075325, "learning_rate": 0.002, "loss": 2.5757, "step": 64590 }, { "epoch": 0.1286975647073824, "grad_norm": 0.14233382046222687, "learning_rate": 0.002, "loss": 2.5762, "step": 64600 }, { "epoch": 0.12871748693102128, "grad_norm": 0.18684746325016022, "learning_rate": 0.002, "loss": 2.5671, "step": 64610 }, { "epoch": 0.1287374091546602, "grad_norm": 0.13922496140003204, "learning_rate": 0.002, "loss": 2.5782, "step": 64620 }, { "epoch": 0.12875733137829912, "grad_norm": 0.14288797974586487, "learning_rate": 0.002, "loss": 2.5859, "step": 64630 }, { "epoch": 0.12877725360193804, "grad_norm": 0.17104493081569672, "learning_rate": 0.002, "loss": 2.5571, "step": 64640 }, { "epoch": 0.12879717582557695, "grad_norm": 0.147172749042511, "learning_rate": 0.002, "loss": 2.5532, "step": 64650 }, { "epoch": 0.12881709804921587, "grad_norm": 0.16266241669654846, "learning_rate": 0.002, "loss": 2.5639, "step": 64660 }, { "epoch": 0.12883702027285476, "grad_norm": 0.15653742849826813, "learning_rate": 0.002, "loss": 2.5677, "step": 64670 }, { "epoch": 0.12885694249649368, "grad_norm": 0.15621884167194366, "learning_rate": 0.002, "loss": 2.5718, "step": 64680 }, { "epoch": 0.1288768647201326, "grad_norm": 0.12388361990451813, "learning_rate": 0.002, "loss": 2.5807, "step": 64690 }, { "epoch": 0.12889678694377152, "grad_norm": 0.15571041405200958, "learning_rate": 0.002, "loss": 2.5678, "step": 64700 }, { "epoch": 0.12891670916741044, "grad_norm": 0.14539408683776855, "learning_rate": 0.002, "loss": 2.5732, "step": 64710 }, { "epoch": 0.12893663139104936, "grad_norm": 0.1415540874004364, "learning_rate": 0.002, "loss": 2.5606, "step": 64720 }, { "epoch": 0.12895655361468825, "grad_norm": 0.17267490923404694, "learning_rate": 0.002, "loss": 2.5815, "step": 64730 }, { "epoch": 0.12897647583832716, "grad_norm": 0.1643027663230896, "learning_rate": 0.002, "loss": 2.5839, "step": 64740 }, { "epoch": 0.12899639806196608, "grad_norm": 0.17892025411128998, "learning_rate": 0.002, "loss": 2.5785, "step": 64750 }, { "epoch": 0.129016320285605, "grad_norm": 0.12562713027000427, "learning_rate": 0.002, "loss": 2.5729, "step": 64760 }, { "epoch": 0.12903624250924392, "grad_norm": 0.16695541143417358, "learning_rate": 0.002, "loss": 2.5684, "step": 64770 }, { "epoch": 0.12905616473288284, "grad_norm": 0.1626463532447815, "learning_rate": 0.002, "loss": 2.5623, "step": 64780 }, { "epoch": 0.12907608695652173, "grad_norm": 0.1848723590373993, "learning_rate": 0.002, "loss": 2.5779, "step": 64790 }, { "epoch": 0.12909600918016065, "grad_norm": 0.15543541312217712, "learning_rate": 0.002, "loss": 2.5635, "step": 64800 }, { "epoch": 0.12911593140379957, "grad_norm": 0.15516668558120728, "learning_rate": 0.002, "loss": 2.5731, "step": 64810 }, { "epoch": 0.12913585362743848, "grad_norm": 0.1793934851884842, "learning_rate": 0.002, "loss": 2.5534, "step": 64820 }, { "epoch": 0.1291557758510774, "grad_norm": 0.1587754637002945, "learning_rate": 0.002, "loss": 2.5786, "step": 64830 }, { "epoch": 0.1291756980747163, "grad_norm": 0.14120398461818695, "learning_rate": 0.002, "loss": 2.5739, "step": 64840 }, { "epoch": 0.1291956202983552, "grad_norm": 0.15058189630508423, "learning_rate": 0.002, "loss": 2.5644, "step": 64850 }, { "epoch": 0.12921554252199413, "grad_norm": 0.1790708303451538, "learning_rate": 0.002, "loss": 2.5824, "step": 64860 }, { "epoch": 0.12923546474563305, "grad_norm": 0.14546653628349304, "learning_rate": 0.002, "loss": 2.5824, "step": 64870 }, { "epoch": 0.12925538696927197, "grad_norm": 0.16449645161628723, "learning_rate": 0.002, "loss": 2.565, "step": 64880 }, { "epoch": 0.1292753091929109, "grad_norm": 0.16079232096672058, "learning_rate": 0.002, "loss": 2.5606, "step": 64890 }, { "epoch": 0.12929523141654978, "grad_norm": 0.16502328217029572, "learning_rate": 0.002, "loss": 2.5523, "step": 64900 }, { "epoch": 0.1293151536401887, "grad_norm": 0.1658296138048172, "learning_rate": 0.002, "loss": 2.5863, "step": 64910 }, { "epoch": 0.12933507586382761, "grad_norm": 0.16567885875701904, "learning_rate": 0.002, "loss": 2.5746, "step": 64920 }, { "epoch": 0.12935499808746653, "grad_norm": 0.16669046878814697, "learning_rate": 0.002, "loss": 2.5569, "step": 64930 }, { "epoch": 0.12937492031110545, "grad_norm": 0.15396873652935028, "learning_rate": 0.002, "loss": 2.5699, "step": 64940 }, { "epoch": 0.12939484253474437, "grad_norm": 0.14789390563964844, "learning_rate": 0.002, "loss": 2.5734, "step": 64950 }, { "epoch": 0.12941476475838326, "grad_norm": 0.15003244578838348, "learning_rate": 0.002, "loss": 2.5752, "step": 64960 }, { "epoch": 0.12943468698202218, "grad_norm": 0.15145541727542877, "learning_rate": 0.002, "loss": 2.5799, "step": 64970 }, { "epoch": 0.1294546092056611, "grad_norm": 0.14763692021369934, "learning_rate": 0.002, "loss": 2.5685, "step": 64980 }, { "epoch": 0.12947453142930002, "grad_norm": 0.15004967153072357, "learning_rate": 0.002, "loss": 2.5601, "step": 64990 }, { "epoch": 0.12949445365293893, "grad_norm": 0.15789583325386047, "learning_rate": 0.002, "loss": 2.5755, "step": 65000 }, { "epoch": 0.12951437587657785, "grad_norm": 0.1787686049938202, "learning_rate": 0.002, "loss": 2.5723, "step": 65010 }, { "epoch": 0.12953429810021674, "grad_norm": 0.1492435783147812, "learning_rate": 0.002, "loss": 2.5707, "step": 65020 }, { "epoch": 0.12955422032385566, "grad_norm": 0.16802605986595154, "learning_rate": 0.002, "loss": 2.581, "step": 65030 }, { "epoch": 0.12957414254749458, "grad_norm": 0.16405530273914337, "learning_rate": 0.002, "loss": 2.5872, "step": 65040 }, { "epoch": 0.1295940647711335, "grad_norm": 0.17497758567333221, "learning_rate": 0.002, "loss": 2.5799, "step": 65050 }, { "epoch": 0.12961398699477242, "grad_norm": 0.15658080577850342, "learning_rate": 0.002, "loss": 2.5787, "step": 65060 }, { "epoch": 0.1296339092184113, "grad_norm": 0.15474256873130798, "learning_rate": 0.002, "loss": 2.5803, "step": 65070 }, { "epoch": 0.12965383144205023, "grad_norm": 0.19609281420707703, "learning_rate": 0.002, "loss": 2.5777, "step": 65080 }, { "epoch": 0.12967375366568915, "grad_norm": 0.15641556680202484, "learning_rate": 0.002, "loss": 2.5746, "step": 65090 }, { "epoch": 0.12969367588932806, "grad_norm": 0.2114478051662445, "learning_rate": 0.002, "loss": 2.5631, "step": 65100 }, { "epoch": 0.12971359811296698, "grad_norm": 0.1662994921207428, "learning_rate": 0.002, "loss": 2.5731, "step": 65110 }, { "epoch": 0.1297335203366059, "grad_norm": 0.14654317498207092, "learning_rate": 0.002, "loss": 2.5708, "step": 65120 }, { "epoch": 0.1297534425602448, "grad_norm": 0.14339379966259003, "learning_rate": 0.002, "loss": 2.5721, "step": 65130 }, { "epoch": 0.1297733647838837, "grad_norm": 0.14182403683662415, "learning_rate": 0.002, "loss": 2.5655, "step": 65140 }, { "epoch": 0.12979328700752263, "grad_norm": 0.1725817322731018, "learning_rate": 0.002, "loss": 2.5701, "step": 65150 }, { "epoch": 0.12981320923116155, "grad_norm": 0.17497563362121582, "learning_rate": 0.002, "loss": 2.5748, "step": 65160 }, { "epoch": 0.12983313145480047, "grad_norm": 0.1690622866153717, "learning_rate": 0.002, "loss": 2.5774, "step": 65170 }, { "epoch": 0.12985305367843938, "grad_norm": 0.1716756671667099, "learning_rate": 0.002, "loss": 2.5729, "step": 65180 }, { "epoch": 0.12987297590207827, "grad_norm": 0.17420262098312378, "learning_rate": 0.002, "loss": 2.5768, "step": 65190 }, { "epoch": 0.1298928981257172, "grad_norm": 0.14906851947307587, "learning_rate": 0.002, "loss": 2.5803, "step": 65200 }, { "epoch": 0.1299128203493561, "grad_norm": 0.18783186376094818, "learning_rate": 0.002, "loss": 2.5684, "step": 65210 }, { "epoch": 0.12993274257299503, "grad_norm": 0.15758982300758362, "learning_rate": 0.002, "loss": 2.5819, "step": 65220 }, { "epoch": 0.12995266479663395, "grad_norm": 0.1494109183549881, "learning_rate": 0.002, "loss": 2.5653, "step": 65230 }, { "epoch": 0.12997258702027287, "grad_norm": 0.14892041683197021, "learning_rate": 0.002, "loss": 2.576, "step": 65240 }, { "epoch": 0.12999250924391176, "grad_norm": 0.18468423187732697, "learning_rate": 0.002, "loss": 2.5663, "step": 65250 }, { "epoch": 0.13001243146755068, "grad_norm": 0.17349152266979218, "learning_rate": 0.002, "loss": 2.5681, "step": 65260 }, { "epoch": 0.1300323536911896, "grad_norm": 0.19102022051811218, "learning_rate": 0.002, "loss": 2.5653, "step": 65270 }, { "epoch": 0.1300522759148285, "grad_norm": 0.15130887925624847, "learning_rate": 0.002, "loss": 2.5703, "step": 65280 }, { "epoch": 0.13007219813846743, "grad_norm": 0.1685878187417984, "learning_rate": 0.002, "loss": 2.581, "step": 65290 }, { "epoch": 0.13009212036210635, "grad_norm": 0.15818814933300018, "learning_rate": 0.002, "loss": 2.5647, "step": 65300 }, { "epoch": 0.13011204258574524, "grad_norm": 0.19046637415885925, "learning_rate": 0.002, "loss": 2.5768, "step": 65310 }, { "epoch": 0.13013196480938416, "grad_norm": 0.14939798414707184, "learning_rate": 0.002, "loss": 2.5811, "step": 65320 }, { "epoch": 0.13015188703302308, "grad_norm": 0.17305533587932587, "learning_rate": 0.002, "loss": 2.5794, "step": 65330 }, { "epoch": 0.130171809256662, "grad_norm": 0.13895632326602936, "learning_rate": 0.002, "loss": 2.5629, "step": 65340 }, { "epoch": 0.13019173148030092, "grad_norm": 0.17159900069236755, "learning_rate": 0.002, "loss": 2.5746, "step": 65350 }, { "epoch": 0.1302116537039398, "grad_norm": 0.15524353086948395, "learning_rate": 0.002, "loss": 2.5798, "step": 65360 }, { "epoch": 0.13023157592757872, "grad_norm": 0.14830996096134186, "learning_rate": 0.002, "loss": 2.5712, "step": 65370 }, { "epoch": 0.13025149815121764, "grad_norm": 0.16239498555660248, "learning_rate": 0.002, "loss": 2.5795, "step": 65380 }, { "epoch": 0.13027142037485656, "grad_norm": 0.1509844809770584, "learning_rate": 0.002, "loss": 2.581, "step": 65390 }, { "epoch": 0.13029134259849548, "grad_norm": 0.15821011364459991, "learning_rate": 0.002, "loss": 2.5923, "step": 65400 }, { "epoch": 0.1303112648221344, "grad_norm": 0.16456826031208038, "learning_rate": 0.002, "loss": 2.5698, "step": 65410 }, { "epoch": 0.1303311870457733, "grad_norm": 0.1847943216562271, "learning_rate": 0.002, "loss": 2.5817, "step": 65420 }, { "epoch": 0.1303511092694122, "grad_norm": 0.15403863787651062, "learning_rate": 0.002, "loss": 2.5771, "step": 65430 }, { "epoch": 0.13037103149305113, "grad_norm": 0.16321499645709991, "learning_rate": 0.002, "loss": 2.5774, "step": 65440 }, { "epoch": 0.13039095371669004, "grad_norm": 0.1807841807603836, "learning_rate": 0.002, "loss": 2.5865, "step": 65450 }, { "epoch": 0.13041087594032896, "grad_norm": 0.15111728012561798, "learning_rate": 0.002, "loss": 2.5594, "step": 65460 }, { "epoch": 0.13043079816396788, "grad_norm": 0.15956342220306396, "learning_rate": 0.002, "loss": 2.5819, "step": 65470 }, { "epoch": 0.13045072038760677, "grad_norm": 0.164947971701622, "learning_rate": 0.002, "loss": 2.5714, "step": 65480 }, { "epoch": 0.1304706426112457, "grad_norm": 0.206295907497406, "learning_rate": 0.002, "loss": 2.5706, "step": 65490 }, { "epoch": 0.1304905648348846, "grad_norm": 0.142253577709198, "learning_rate": 0.002, "loss": 2.553, "step": 65500 }, { "epoch": 0.13051048705852353, "grad_norm": 0.17667783796787262, "learning_rate": 0.002, "loss": 2.5743, "step": 65510 }, { "epoch": 0.13053040928216245, "grad_norm": 0.15529392659664154, "learning_rate": 0.002, "loss": 2.5738, "step": 65520 }, { "epoch": 0.13055033150580136, "grad_norm": 0.14933143556118011, "learning_rate": 0.002, "loss": 2.5892, "step": 65530 }, { "epoch": 0.13057025372944026, "grad_norm": 0.1752609759569168, "learning_rate": 0.002, "loss": 2.5676, "step": 65540 }, { "epoch": 0.13059017595307917, "grad_norm": 0.1662846803665161, "learning_rate": 0.002, "loss": 2.5962, "step": 65550 }, { "epoch": 0.1306100981767181, "grad_norm": 0.2059967964887619, "learning_rate": 0.002, "loss": 2.5688, "step": 65560 }, { "epoch": 0.130630020400357, "grad_norm": 0.1708315759897232, "learning_rate": 0.002, "loss": 2.5748, "step": 65570 }, { "epoch": 0.13064994262399593, "grad_norm": 0.14936715364456177, "learning_rate": 0.002, "loss": 2.5755, "step": 65580 }, { "epoch": 0.13066986484763482, "grad_norm": 0.1751689463853836, "learning_rate": 0.002, "loss": 2.595, "step": 65590 }, { "epoch": 0.13068978707127374, "grad_norm": 0.16374801099300385, "learning_rate": 0.002, "loss": 2.5783, "step": 65600 }, { "epoch": 0.13070970929491266, "grad_norm": 0.13189288973808289, "learning_rate": 0.002, "loss": 2.5664, "step": 65610 }, { "epoch": 0.13072963151855158, "grad_norm": 0.17468668520450592, "learning_rate": 0.002, "loss": 2.5662, "step": 65620 }, { "epoch": 0.1307495537421905, "grad_norm": 0.16667041182518005, "learning_rate": 0.002, "loss": 2.5684, "step": 65630 }, { "epoch": 0.1307694759658294, "grad_norm": 0.15423771739006042, "learning_rate": 0.002, "loss": 2.5847, "step": 65640 }, { "epoch": 0.1307893981894683, "grad_norm": 0.17704132199287415, "learning_rate": 0.002, "loss": 2.566, "step": 65650 }, { "epoch": 0.13080932041310722, "grad_norm": 0.13647039234638214, "learning_rate": 0.002, "loss": 2.5708, "step": 65660 }, { "epoch": 0.13082924263674614, "grad_norm": 0.1909256875514984, "learning_rate": 0.002, "loss": 2.5703, "step": 65670 }, { "epoch": 0.13084916486038506, "grad_norm": 0.1710045039653778, "learning_rate": 0.002, "loss": 2.5564, "step": 65680 }, { "epoch": 0.13086908708402398, "grad_norm": 0.1696901172399521, "learning_rate": 0.002, "loss": 2.5782, "step": 65690 }, { "epoch": 0.1308890093076629, "grad_norm": 0.1396632045507431, "learning_rate": 0.002, "loss": 2.5616, "step": 65700 }, { "epoch": 0.1309089315313018, "grad_norm": 0.17789770662784576, "learning_rate": 0.002, "loss": 2.5583, "step": 65710 }, { "epoch": 0.1309288537549407, "grad_norm": 0.14853820204734802, "learning_rate": 0.002, "loss": 2.5677, "step": 65720 }, { "epoch": 0.13094877597857962, "grad_norm": 0.13254040479660034, "learning_rate": 0.002, "loss": 2.5656, "step": 65730 }, { "epoch": 0.13096869820221854, "grad_norm": 0.17818188667297363, "learning_rate": 0.002, "loss": 2.5632, "step": 65740 }, { "epoch": 0.13098862042585746, "grad_norm": 0.16854245960712433, "learning_rate": 0.002, "loss": 2.5665, "step": 65750 }, { "epoch": 0.13100854264949638, "grad_norm": 0.17398454248905182, "learning_rate": 0.002, "loss": 2.5697, "step": 65760 }, { "epoch": 0.13102846487313527, "grad_norm": 0.2022208422422409, "learning_rate": 0.002, "loss": 2.5695, "step": 65770 }, { "epoch": 0.1310483870967742, "grad_norm": 0.17882323265075684, "learning_rate": 0.002, "loss": 2.5783, "step": 65780 }, { "epoch": 0.1310683093204131, "grad_norm": 0.13494843244552612, "learning_rate": 0.002, "loss": 2.5846, "step": 65790 }, { "epoch": 0.13108823154405203, "grad_norm": 0.1501566767692566, "learning_rate": 0.002, "loss": 2.5772, "step": 65800 }, { "epoch": 0.13110815376769094, "grad_norm": 0.13763773441314697, "learning_rate": 0.002, "loss": 2.5759, "step": 65810 }, { "epoch": 0.13112807599132986, "grad_norm": 0.16815820336341858, "learning_rate": 0.002, "loss": 2.5697, "step": 65820 }, { "epoch": 0.13114799821496875, "grad_norm": 0.15430733561515808, "learning_rate": 0.002, "loss": 2.5647, "step": 65830 }, { "epoch": 0.13116792043860767, "grad_norm": 0.16319604218006134, "learning_rate": 0.002, "loss": 2.5683, "step": 65840 }, { "epoch": 0.1311878426622466, "grad_norm": 0.1505453735589981, "learning_rate": 0.002, "loss": 2.5785, "step": 65850 }, { "epoch": 0.1312077648858855, "grad_norm": 0.2167617380619049, "learning_rate": 0.002, "loss": 2.561, "step": 65860 }, { "epoch": 0.13122768710952443, "grad_norm": 0.17089147865772247, "learning_rate": 0.002, "loss": 2.5862, "step": 65870 }, { "epoch": 0.13124760933316332, "grad_norm": 0.17501963675022125, "learning_rate": 0.002, "loss": 2.5794, "step": 65880 }, { "epoch": 0.13126753155680224, "grad_norm": 0.1359715312719345, "learning_rate": 0.002, "loss": 2.5895, "step": 65890 }, { "epoch": 0.13128745378044115, "grad_norm": 0.16226649284362793, "learning_rate": 0.002, "loss": 2.5731, "step": 65900 }, { "epoch": 0.13130737600408007, "grad_norm": 0.16480380296707153, "learning_rate": 0.002, "loss": 2.5791, "step": 65910 }, { "epoch": 0.131327298227719, "grad_norm": 0.15673379600048065, "learning_rate": 0.002, "loss": 2.5774, "step": 65920 }, { "epoch": 0.1313472204513579, "grad_norm": 0.139391228556633, "learning_rate": 0.002, "loss": 2.579, "step": 65930 }, { "epoch": 0.1313671426749968, "grad_norm": 0.19816483557224274, "learning_rate": 0.002, "loss": 2.5611, "step": 65940 }, { "epoch": 0.13138706489863572, "grad_norm": 0.19285477697849274, "learning_rate": 0.002, "loss": 2.5669, "step": 65950 }, { "epoch": 0.13140698712227464, "grad_norm": 0.17280301451683044, "learning_rate": 0.002, "loss": 2.5667, "step": 65960 }, { "epoch": 0.13142690934591356, "grad_norm": 0.14530976116657257, "learning_rate": 0.002, "loss": 2.5601, "step": 65970 }, { "epoch": 0.13144683156955247, "grad_norm": 0.15266260504722595, "learning_rate": 0.002, "loss": 2.5817, "step": 65980 }, { "epoch": 0.1314667537931914, "grad_norm": 0.20903396606445312, "learning_rate": 0.002, "loss": 2.5661, "step": 65990 }, { "epoch": 0.13148667601683028, "grad_norm": 0.13808445632457733, "learning_rate": 0.002, "loss": 2.5563, "step": 66000 }, { "epoch": 0.1315065982404692, "grad_norm": 0.17158837616443634, "learning_rate": 0.002, "loss": 2.5849, "step": 66010 }, { "epoch": 0.13152652046410812, "grad_norm": 0.15515218675136566, "learning_rate": 0.002, "loss": 2.5739, "step": 66020 }, { "epoch": 0.13154644268774704, "grad_norm": 0.2194679230451584, "learning_rate": 0.002, "loss": 2.5878, "step": 66030 }, { "epoch": 0.13156636491138596, "grad_norm": 0.15087829530239105, "learning_rate": 0.002, "loss": 2.5551, "step": 66040 }, { "epoch": 0.13158628713502488, "grad_norm": 0.15077601373195648, "learning_rate": 0.002, "loss": 2.5648, "step": 66050 }, { "epoch": 0.13160620935866377, "grad_norm": 0.170048788189888, "learning_rate": 0.002, "loss": 2.5717, "step": 66060 }, { "epoch": 0.13162613158230269, "grad_norm": 0.19912715256214142, "learning_rate": 0.002, "loss": 2.5862, "step": 66070 }, { "epoch": 0.1316460538059416, "grad_norm": 0.1611431986093521, "learning_rate": 0.002, "loss": 2.563, "step": 66080 }, { "epoch": 0.13166597602958052, "grad_norm": 0.21183615922927856, "learning_rate": 0.002, "loss": 2.5684, "step": 66090 }, { "epoch": 0.13168589825321944, "grad_norm": 0.15366509556770325, "learning_rate": 0.002, "loss": 2.5757, "step": 66100 }, { "epoch": 0.13170582047685833, "grad_norm": 0.15568393468856812, "learning_rate": 0.002, "loss": 2.5705, "step": 66110 }, { "epoch": 0.13172574270049725, "grad_norm": 0.18866188824176788, "learning_rate": 0.002, "loss": 2.5654, "step": 66120 }, { "epoch": 0.13174566492413617, "grad_norm": 0.1508651226758957, "learning_rate": 0.002, "loss": 2.576, "step": 66130 }, { "epoch": 0.1317655871477751, "grad_norm": 0.13768506050109863, "learning_rate": 0.002, "loss": 2.572, "step": 66140 }, { "epoch": 0.131785509371414, "grad_norm": 0.2087404578924179, "learning_rate": 0.002, "loss": 2.5835, "step": 66150 }, { "epoch": 0.13180543159505292, "grad_norm": 0.14586232602596283, "learning_rate": 0.002, "loss": 2.5666, "step": 66160 }, { "epoch": 0.13182535381869182, "grad_norm": 0.1701720803976059, "learning_rate": 0.002, "loss": 2.578, "step": 66170 }, { "epoch": 0.13184527604233073, "grad_norm": 0.1397487372159958, "learning_rate": 0.002, "loss": 2.5758, "step": 66180 }, { "epoch": 0.13186519826596965, "grad_norm": 0.16133004426956177, "learning_rate": 0.002, "loss": 2.5751, "step": 66190 }, { "epoch": 0.13188512048960857, "grad_norm": 0.13579265773296356, "learning_rate": 0.002, "loss": 2.5665, "step": 66200 }, { "epoch": 0.1319050427132475, "grad_norm": 0.17087052762508392, "learning_rate": 0.002, "loss": 2.5778, "step": 66210 }, { "epoch": 0.1319249649368864, "grad_norm": 0.1382725089788437, "learning_rate": 0.002, "loss": 2.5688, "step": 66220 }, { "epoch": 0.1319448871605253, "grad_norm": 0.1804560273885727, "learning_rate": 0.002, "loss": 2.581, "step": 66230 }, { "epoch": 0.13196480938416422, "grad_norm": 0.15457995235919952, "learning_rate": 0.002, "loss": 2.5813, "step": 66240 }, { "epoch": 0.13198473160780314, "grad_norm": 0.16352137923240662, "learning_rate": 0.002, "loss": 2.5858, "step": 66250 }, { "epoch": 0.13200465383144205, "grad_norm": 0.15444722771644592, "learning_rate": 0.002, "loss": 2.5961, "step": 66260 }, { "epoch": 0.13202457605508097, "grad_norm": 0.16140185296535492, "learning_rate": 0.002, "loss": 2.5535, "step": 66270 }, { "epoch": 0.1320444982787199, "grad_norm": 0.20986223220825195, "learning_rate": 0.002, "loss": 2.5639, "step": 66280 }, { "epoch": 0.13206442050235878, "grad_norm": 0.17390652000904083, "learning_rate": 0.002, "loss": 2.5697, "step": 66290 }, { "epoch": 0.1320843427259977, "grad_norm": 0.16534951329231262, "learning_rate": 0.002, "loss": 2.5838, "step": 66300 }, { "epoch": 0.13210426494963662, "grad_norm": 0.1818656623363495, "learning_rate": 0.002, "loss": 2.5717, "step": 66310 }, { "epoch": 0.13212418717327554, "grad_norm": 0.18860891461372375, "learning_rate": 0.002, "loss": 2.5728, "step": 66320 }, { "epoch": 0.13214410939691446, "grad_norm": 0.15110966563224792, "learning_rate": 0.002, "loss": 2.5743, "step": 66330 }, { "epoch": 0.13216403162055335, "grad_norm": 0.1638670563697815, "learning_rate": 0.002, "loss": 2.5594, "step": 66340 }, { "epoch": 0.13218395384419226, "grad_norm": 0.18655091524124146, "learning_rate": 0.002, "loss": 2.5768, "step": 66350 }, { "epoch": 0.13220387606783118, "grad_norm": 0.16373789310455322, "learning_rate": 0.002, "loss": 2.5769, "step": 66360 }, { "epoch": 0.1322237982914701, "grad_norm": 0.16763778030872345, "learning_rate": 0.002, "loss": 2.5494, "step": 66370 }, { "epoch": 0.13224372051510902, "grad_norm": 0.1456666886806488, "learning_rate": 0.002, "loss": 2.5631, "step": 66380 }, { "epoch": 0.13226364273874794, "grad_norm": 0.16030354797840118, "learning_rate": 0.002, "loss": 2.5679, "step": 66390 }, { "epoch": 0.13228356496238683, "grad_norm": 0.14716172218322754, "learning_rate": 0.002, "loss": 2.5633, "step": 66400 }, { "epoch": 0.13230348718602575, "grad_norm": 0.1599043756723404, "learning_rate": 0.002, "loss": 2.5614, "step": 66410 }, { "epoch": 0.13232340940966467, "grad_norm": 0.15537457168102264, "learning_rate": 0.002, "loss": 2.5664, "step": 66420 }, { "epoch": 0.13234333163330358, "grad_norm": 0.14294785261154175, "learning_rate": 0.002, "loss": 2.5875, "step": 66430 }, { "epoch": 0.1323632538569425, "grad_norm": 0.19316896796226501, "learning_rate": 0.002, "loss": 2.5644, "step": 66440 }, { "epoch": 0.13238317608058142, "grad_norm": 0.16565725207328796, "learning_rate": 0.002, "loss": 2.5693, "step": 66450 }, { "epoch": 0.1324030983042203, "grad_norm": 0.15293416380882263, "learning_rate": 0.002, "loss": 2.5566, "step": 66460 }, { "epoch": 0.13242302052785923, "grad_norm": 0.1752716302871704, "learning_rate": 0.002, "loss": 2.5813, "step": 66470 }, { "epoch": 0.13244294275149815, "grad_norm": 0.1589961051940918, "learning_rate": 0.002, "loss": 2.5788, "step": 66480 }, { "epoch": 0.13246286497513707, "grad_norm": 0.16828590631484985, "learning_rate": 0.002, "loss": 2.5797, "step": 66490 }, { "epoch": 0.132482787198776, "grad_norm": 0.152476504445076, "learning_rate": 0.002, "loss": 2.5708, "step": 66500 }, { "epoch": 0.1325027094224149, "grad_norm": 0.14548377692699432, "learning_rate": 0.002, "loss": 2.5797, "step": 66510 }, { "epoch": 0.1325226316460538, "grad_norm": 0.16683146357536316, "learning_rate": 0.002, "loss": 2.5553, "step": 66520 }, { "epoch": 0.13254255386969271, "grad_norm": 0.14073446393013, "learning_rate": 0.002, "loss": 2.5697, "step": 66530 }, { "epoch": 0.13256247609333163, "grad_norm": 0.19764356315135956, "learning_rate": 0.002, "loss": 2.5675, "step": 66540 }, { "epoch": 0.13258239831697055, "grad_norm": 0.1558673083782196, "learning_rate": 0.002, "loss": 2.5616, "step": 66550 }, { "epoch": 0.13260232054060947, "grad_norm": 0.14582712948322296, "learning_rate": 0.002, "loss": 2.5727, "step": 66560 }, { "epoch": 0.1326222427642484, "grad_norm": 0.18938231468200684, "learning_rate": 0.002, "loss": 2.5803, "step": 66570 }, { "epoch": 0.13264216498788728, "grad_norm": 0.16925600171089172, "learning_rate": 0.002, "loss": 2.5733, "step": 66580 }, { "epoch": 0.1326620872115262, "grad_norm": 0.14317354559898376, "learning_rate": 0.002, "loss": 2.5562, "step": 66590 }, { "epoch": 0.13268200943516512, "grad_norm": 0.15972208976745605, "learning_rate": 0.002, "loss": 2.5704, "step": 66600 }, { "epoch": 0.13270193165880403, "grad_norm": 0.1542871743440628, "learning_rate": 0.002, "loss": 2.5574, "step": 66610 }, { "epoch": 0.13272185388244295, "grad_norm": 0.15934886038303375, "learning_rate": 0.002, "loss": 2.577, "step": 66620 }, { "epoch": 0.13274177610608184, "grad_norm": 0.14850173890590668, "learning_rate": 0.002, "loss": 2.5741, "step": 66630 }, { "epoch": 0.13276169832972076, "grad_norm": 0.15723824501037598, "learning_rate": 0.002, "loss": 2.5809, "step": 66640 }, { "epoch": 0.13278162055335968, "grad_norm": 0.16907259821891785, "learning_rate": 0.002, "loss": 2.5754, "step": 66650 }, { "epoch": 0.1328015427769986, "grad_norm": 0.12510140240192413, "learning_rate": 0.002, "loss": 2.5659, "step": 66660 }, { "epoch": 0.13282146500063752, "grad_norm": 0.1593034863471985, "learning_rate": 0.002, "loss": 2.5567, "step": 66670 }, { "epoch": 0.13284138722427644, "grad_norm": 0.16196836531162262, "learning_rate": 0.002, "loss": 2.5641, "step": 66680 }, { "epoch": 0.13286130944791533, "grad_norm": 0.1506481170654297, "learning_rate": 0.002, "loss": 2.565, "step": 66690 }, { "epoch": 0.13288123167155425, "grad_norm": 0.17672277987003326, "learning_rate": 0.002, "loss": 2.5738, "step": 66700 }, { "epoch": 0.13290115389519316, "grad_norm": 0.13355021178722382, "learning_rate": 0.002, "loss": 2.5689, "step": 66710 }, { "epoch": 0.13292107611883208, "grad_norm": 0.13213008642196655, "learning_rate": 0.002, "loss": 2.582, "step": 66720 }, { "epoch": 0.132940998342471, "grad_norm": 0.16283018887043, "learning_rate": 0.002, "loss": 2.5669, "step": 66730 }, { "epoch": 0.13296092056610992, "grad_norm": 0.14481979608535767, "learning_rate": 0.002, "loss": 2.5829, "step": 66740 }, { "epoch": 0.1329808427897488, "grad_norm": 0.1685216724872589, "learning_rate": 0.002, "loss": 2.578, "step": 66750 }, { "epoch": 0.13300076501338773, "grad_norm": 0.17749997973442078, "learning_rate": 0.002, "loss": 2.5632, "step": 66760 }, { "epoch": 0.13302068723702665, "grad_norm": 0.15550392866134644, "learning_rate": 0.002, "loss": 2.5892, "step": 66770 }, { "epoch": 0.13304060946066557, "grad_norm": 0.14768072962760925, "learning_rate": 0.002, "loss": 2.5498, "step": 66780 }, { "epoch": 0.13306053168430448, "grad_norm": 0.16630800068378448, "learning_rate": 0.002, "loss": 2.5626, "step": 66790 }, { "epoch": 0.1330804539079434, "grad_norm": 0.15421253442764282, "learning_rate": 0.002, "loss": 2.5763, "step": 66800 }, { "epoch": 0.1331003761315823, "grad_norm": 0.1492752581834793, "learning_rate": 0.002, "loss": 2.5767, "step": 66810 }, { "epoch": 0.1331202983552212, "grad_norm": 0.17131203413009644, "learning_rate": 0.002, "loss": 2.567, "step": 66820 }, { "epoch": 0.13314022057886013, "grad_norm": 0.15924210846424103, "learning_rate": 0.002, "loss": 2.5773, "step": 66830 }, { "epoch": 0.13316014280249905, "grad_norm": 0.17951196432113647, "learning_rate": 0.002, "loss": 2.5703, "step": 66840 }, { "epoch": 0.13318006502613797, "grad_norm": 0.17443318665027618, "learning_rate": 0.002, "loss": 2.5831, "step": 66850 }, { "epoch": 0.13319998724977686, "grad_norm": 0.150976300239563, "learning_rate": 0.002, "loss": 2.5526, "step": 66860 }, { "epoch": 0.13321990947341578, "grad_norm": 0.13524240255355835, "learning_rate": 0.002, "loss": 2.5685, "step": 66870 }, { "epoch": 0.1332398316970547, "grad_norm": 0.16225072741508484, "learning_rate": 0.002, "loss": 2.5828, "step": 66880 }, { "epoch": 0.1332597539206936, "grad_norm": 0.16250348091125488, "learning_rate": 0.002, "loss": 2.5712, "step": 66890 }, { "epoch": 0.13327967614433253, "grad_norm": 0.1682257056236267, "learning_rate": 0.002, "loss": 2.5724, "step": 66900 }, { "epoch": 0.13329959836797145, "grad_norm": 0.15569578111171722, "learning_rate": 0.002, "loss": 2.5685, "step": 66910 }, { "epoch": 0.13331952059161034, "grad_norm": 0.17196261882781982, "learning_rate": 0.002, "loss": 2.5633, "step": 66920 }, { "epoch": 0.13333944281524926, "grad_norm": 0.1674543023109436, "learning_rate": 0.002, "loss": 2.5515, "step": 66930 }, { "epoch": 0.13335936503888818, "grad_norm": 0.16255782544612885, "learning_rate": 0.002, "loss": 2.5656, "step": 66940 }, { "epoch": 0.1333792872625271, "grad_norm": 0.1546248495578766, "learning_rate": 0.002, "loss": 2.5652, "step": 66950 }, { "epoch": 0.13339920948616601, "grad_norm": 0.1642368584871292, "learning_rate": 0.002, "loss": 2.5764, "step": 66960 }, { "epoch": 0.13341913170980493, "grad_norm": 0.17277103662490845, "learning_rate": 0.002, "loss": 2.5671, "step": 66970 }, { "epoch": 0.13343905393344382, "grad_norm": 0.17219071090221405, "learning_rate": 0.002, "loss": 2.5609, "step": 66980 }, { "epoch": 0.13345897615708274, "grad_norm": 0.13986316323280334, "learning_rate": 0.002, "loss": 2.5692, "step": 66990 }, { "epoch": 0.13347889838072166, "grad_norm": 0.23683643341064453, "learning_rate": 0.002, "loss": 2.5873, "step": 67000 }, { "epoch": 0.13349882060436058, "grad_norm": 0.14078345894813538, "learning_rate": 0.002, "loss": 2.5811, "step": 67010 }, { "epoch": 0.1335187428279995, "grad_norm": 0.16324415802955627, "learning_rate": 0.002, "loss": 2.5761, "step": 67020 }, { "epoch": 0.13353866505163842, "grad_norm": 0.15528924763202667, "learning_rate": 0.002, "loss": 2.577, "step": 67030 }, { "epoch": 0.1335585872752773, "grad_norm": 0.16992607712745667, "learning_rate": 0.002, "loss": 2.5832, "step": 67040 }, { "epoch": 0.13357850949891623, "grad_norm": 0.18870092928409576, "learning_rate": 0.002, "loss": 2.5917, "step": 67050 }, { "epoch": 0.13359843172255514, "grad_norm": 0.1479162722826004, "learning_rate": 0.002, "loss": 2.5791, "step": 67060 }, { "epoch": 0.13361835394619406, "grad_norm": 0.20987382531166077, "learning_rate": 0.002, "loss": 2.5858, "step": 67070 }, { "epoch": 0.13363827616983298, "grad_norm": 0.15907855331897736, "learning_rate": 0.002, "loss": 2.5727, "step": 67080 }, { "epoch": 0.13365819839347187, "grad_norm": 0.1480841189622879, "learning_rate": 0.002, "loss": 2.5657, "step": 67090 }, { "epoch": 0.1336781206171108, "grad_norm": 0.14949250221252441, "learning_rate": 0.002, "loss": 2.5642, "step": 67100 }, { "epoch": 0.1336980428407497, "grad_norm": 0.16702347993850708, "learning_rate": 0.002, "loss": 2.5687, "step": 67110 }, { "epoch": 0.13371796506438863, "grad_norm": 0.1561422348022461, "learning_rate": 0.002, "loss": 2.5614, "step": 67120 }, { "epoch": 0.13373788728802755, "grad_norm": 0.19035695493221283, "learning_rate": 0.002, "loss": 2.5843, "step": 67130 }, { "epoch": 0.13375780951166646, "grad_norm": 0.15297192335128784, "learning_rate": 0.002, "loss": 2.587, "step": 67140 }, { "epoch": 0.13377773173530536, "grad_norm": 0.16997669637203217, "learning_rate": 0.002, "loss": 2.5655, "step": 67150 }, { "epoch": 0.13379765395894427, "grad_norm": 0.15312151610851288, "learning_rate": 0.002, "loss": 2.5711, "step": 67160 }, { "epoch": 0.1338175761825832, "grad_norm": 0.14926542341709137, "learning_rate": 0.002, "loss": 2.5661, "step": 67170 }, { "epoch": 0.1338374984062221, "grad_norm": 0.15271389484405518, "learning_rate": 0.002, "loss": 2.5587, "step": 67180 }, { "epoch": 0.13385742062986103, "grad_norm": 0.1498708873987198, "learning_rate": 0.002, "loss": 2.5771, "step": 67190 }, { "epoch": 0.13387734285349995, "grad_norm": 0.13610732555389404, "learning_rate": 0.002, "loss": 2.5782, "step": 67200 }, { "epoch": 0.13389726507713884, "grad_norm": 0.19373619556427002, "learning_rate": 0.002, "loss": 2.5681, "step": 67210 }, { "epoch": 0.13391718730077776, "grad_norm": 0.15465319156646729, "learning_rate": 0.002, "loss": 2.5667, "step": 67220 }, { "epoch": 0.13393710952441668, "grad_norm": 0.14944703876972198, "learning_rate": 0.002, "loss": 2.5831, "step": 67230 }, { "epoch": 0.1339570317480556, "grad_norm": 0.1480633020401001, "learning_rate": 0.002, "loss": 2.5657, "step": 67240 }, { "epoch": 0.1339769539716945, "grad_norm": 0.17991551756858826, "learning_rate": 0.002, "loss": 2.5666, "step": 67250 }, { "epoch": 0.13399687619533343, "grad_norm": 0.14380711317062378, "learning_rate": 0.002, "loss": 2.5749, "step": 67260 }, { "epoch": 0.13401679841897232, "grad_norm": 0.14520478248596191, "learning_rate": 0.002, "loss": 2.5615, "step": 67270 }, { "epoch": 0.13403672064261124, "grad_norm": 0.15061582624912262, "learning_rate": 0.002, "loss": 2.5866, "step": 67280 }, { "epoch": 0.13405664286625016, "grad_norm": 0.1798543632030487, "learning_rate": 0.002, "loss": 2.5606, "step": 67290 }, { "epoch": 0.13407656508988908, "grad_norm": 0.1750979870557785, "learning_rate": 0.002, "loss": 2.5558, "step": 67300 }, { "epoch": 0.134096487313528, "grad_norm": 0.14041471481323242, "learning_rate": 0.002, "loss": 2.5782, "step": 67310 }, { "epoch": 0.13411640953716691, "grad_norm": 0.1701684445142746, "learning_rate": 0.002, "loss": 2.5723, "step": 67320 }, { "epoch": 0.1341363317608058, "grad_norm": 0.16480940580368042, "learning_rate": 0.002, "loss": 2.562, "step": 67330 }, { "epoch": 0.13415625398444472, "grad_norm": 0.1669369637966156, "learning_rate": 0.002, "loss": 2.5595, "step": 67340 }, { "epoch": 0.13417617620808364, "grad_norm": 0.14545604586601257, "learning_rate": 0.002, "loss": 2.5665, "step": 67350 }, { "epoch": 0.13419609843172256, "grad_norm": 0.1596897691488266, "learning_rate": 0.002, "loss": 2.5666, "step": 67360 }, { "epoch": 0.13421602065536148, "grad_norm": 0.19203922152519226, "learning_rate": 0.002, "loss": 2.5705, "step": 67370 }, { "epoch": 0.13423594287900037, "grad_norm": 0.16494789719581604, "learning_rate": 0.002, "loss": 2.5685, "step": 67380 }, { "epoch": 0.1342558651026393, "grad_norm": 0.1548108458518982, "learning_rate": 0.002, "loss": 2.5751, "step": 67390 }, { "epoch": 0.1342757873262782, "grad_norm": 0.1418614387512207, "learning_rate": 0.002, "loss": 2.5696, "step": 67400 }, { "epoch": 0.13429570954991712, "grad_norm": 0.20781266689300537, "learning_rate": 0.002, "loss": 2.5793, "step": 67410 }, { "epoch": 0.13431563177355604, "grad_norm": 0.18205323815345764, "learning_rate": 0.002, "loss": 2.5811, "step": 67420 }, { "epoch": 0.13433555399719496, "grad_norm": 0.13984115421772003, "learning_rate": 0.002, "loss": 2.5718, "step": 67430 }, { "epoch": 0.13435547622083385, "grad_norm": 0.1430966556072235, "learning_rate": 0.002, "loss": 2.5604, "step": 67440 }, { "epoch": 0.13437539844447277, "grad_norm": 0.19841152429580688, "learning_rate": 0.002, "loss": 2.562, "step": 67450 }, { "epoch": 0.1343953206681117, "grad_norm": 0.13812248408794403, "learning_rate": 0.002, "loss": 2.5761, "step": 67460 }, { "epoch": 0.1344152428917506, "grad_norm": 0.1432044953107834, "learning_rate": 0.002, "loss": 2.5779, "step": 67470 }, { "epoch": 0.13443516511538953, "grad_norm": 0.1676706224679947, "learning_rate": 0.002, "loss": 2.5755, "step": 67480 }, { "epoch": 0.13445508733902845, "grad_norm": 0.18089202046394348, "learning_rate": 0.002, "loss": 2.5616, "step": 67490 }, { "epoch": 0.13447500956266734, "grad_norm": 0.1947411596775055, "learning_rate": 0.002, "loss": 2.5677, "step": 67500 }, { "epoch": 0.13449493178630625, "grad_norm": 0.1663188636302948, "learning_rate": 0.002, "loss": 2.5669, "step": 67510 }, { "epoch": 0.13451485400994517, "grad_norm": 0.1473340541124344, "learning_rate": 0.002, "loss": 2.5825, "step": 67520 }, { "epoch": 0.1345347762335841, "grad_norm": 0.141653373837471, "learning_rate": 0.002, "loss": 2.5704, "step": 67530 }, { "epoch": 0.134554698457223, "grad_norm": 0.15517401695251465, "learning_rate": 0.002, "loss": 2.5731, "step": 67540 }, { "epoch": 0.13457462068086193, "grad_norm": 0.15999959409236908, "learning_rate": 0.002, "loss": 2.5577, "step": 67550 }, { "epoch": 0.13459454290450082, "grad_norm": 0.16060985624790192, "learning_rate": 0.002, "loss": 2.5637, "step": 67560 }, { "epoch": 0.13461446512813974, "grad_norm": 0.17826974391937256, "learning_rate": 0.002, "loss": 2.5665, "step": 67570 }, { "epoch": 0.13463438735177866, "grad_norm": 0.18917188048362732, "learning_rate": 0.002, "loss": 2.5747, "step": 67580 }, { "epoch": 0.13465430957541757, "grad_norm": 0.14927135407924652, "learning_rate": 0.002, "loss": 2.5707, "step": 67590 }, { "epoch": 0.1346742317990565, "grad_norm": 0.15794029831886292, "learning_rate": 0.002, "loss": 2.5685, "step": 67600 }, { "epoch": 0.13469415402269538, "grad_norm": 0.16273503005504608, "learning_rate": 0.002, "loss": 2.573, "step": 67610 }, { "epoch": 0.1347140762463343, "grad_norm": 0.19143559038639069, "learning_rate": 0.002, "loss": 2.5665, "step": 67620 }, { "epoch": 0.13473399846997322, "grad_norm": 0.160466730594635, "learning_rate": 0.002, "loss": 2.5717, "step": 67630 }, { "epoch": 0.13475392069361214, "grad_norm": 0.14910121262073517, "learning_rate": 0.002, "loss": 2.5527, "step": 67640 }, { "epoch": 0.13477384291725106, "grad_norm": 0.15795373916625977, "learning_rate": 0.002, "loss": 2.5704, "step": 67650 }, { "epoch": 0.13479376514088998, "grad_norm": 0.15817633271217346, "learning_rate": 0.002, "loss": 2.573, "step": 67660 }, { "epoch": 0.13481368736452887, "grad_norm": 0.13776306807994843, "learning_rate": 0.002, "loss": 2.5754, "step": 67670 }, { "epoch": 0.13483360958816779, "grad_norm": 0.2000943422317505, "learning_rate": 0.002, "loss": 2.5642, "step": 67680 }, { "epoch": 0.1348535318118067, "grad_norm": 0.1425074338912964, "learning_rate": 0.002, "loss": 2.5626, "step": 67690 }, { "epoch": 0.13487345403544562, "grad_norm": 0.1583099067211151, "learning_rate": 0.002, "loss": 2.561, "step": 67700 }, { "epoch": 0.13489337625908454, "grad_norm": 0.13768495619297028, "learning_rate": 0.002, "loss": 2.587, "step": 67710 }, { "epoch": 0.13491329848272346, "grad_norm": 0.1666397750377655, "learning_rate": 0.002, "loss": 2.5691, "step": 67720 }, { "epoch": 0.13493322070636235, "grad_norm": 0.19125352799892426, "learning_rate": 0.002, "loss": 2.5666, "step": 67730 }, { "epoch": 0.13495314293000127, "grad_norm": 0.1641017347574234, "learning_rate": 0.002, "loss": 2.5772, "step": 67740 }, { "epoch": 0.1349730651536402, "grad_norm": 0.15643319487571716, "learning_rate": 0.002, "loss": 2.5701, "step": 67750 }, { "epoch": 0.1349929873772791, "grad_norm": 0.1497127115726471, "learning_rate": 0.002, "loss": 2.5861, "step": 67760 }, { "epoch": 0.13501290960091802, "grad_norm": 0.17476491630077362, "learning_rate": 0.002, "loss": 2.575, "step": 67770 }, { "epoch": 0.13503283182455694, "grad_norm": 0.17960400879383087, "learning_rate": 0.002, "loss": 2.5626, "step": 67780 }, { "epoch": 0.13505275404819583, "grad_norm": 0.13523927330970764, "learning_rate": 0.002, "loss": 2.5668, "step": 67790 }, { "epoch": 0.13507267627183475, "grad_norm": 0.1840248703956604, "learning_rate": 0.002, "loss": 2.5765, "step": 67800 }, { "epoch": 0.13509259849547367, "grad_norm": 0.16978268325328827, "learning_rate": 0.002, "loss": 2.5678, "step": 67810 }, { "epoch": 0.1351125207191126, "grad_norm": 0.1837778389453888, "learning_rate": 0.002, "loss": 2.5719, "step": 67820 }, { "epoch": 0.1351324429427515, "grad_norm": 0.17635172605514526, "learning_rate": 0.002, "loss": 2.5476, "step": 67830 }, { "epoch": 0.1351523651663904, "grad_norm": 0.1480056494474411, "learning_rate": 0.002, "loss": 2.5633, "step": 67840 }, { "epoch": 0.13517228739002932, "grad_norm": 0.1587955504655838, "learning_rate": 0.002, "loss": 2.568, "step": 67850 }, { "epoch": 0.13519220961366823, "grad_norm": 0.14127160608768463, "learning_rate": 0.002, "loss": 2.5853, "step": 67860 }, { "epoch": 0.13521213183730715, "grad_norm": 0.18438977003097534, "learning_rate": 0.002, "loss": 2.5788, "step": 67870 }, { "epoch": 0.13523205406094607, "grad_norm": 0.15434607863426208, "learning_rate": 0.002, "loss": 2.5859, "step": 67880 }, { "epoch": 0.135251976284585, "grad_norm": 0.14692379534244537, "learning_rate": 0.002, "loss": 2.5644, "step": 67890 }, { "epoch": 0.13527189850822388, "grad_norm": 0.17985251545906067, "learning_rate": 0.002, "loss": 2.5666, "step": 67900 }, { "epoch": 0.1352918207318628, "grad_norm": 0.17868052423000336, "learning_rate": 0.002, "loss": 2.5849, "step": 67910 }, { "epoch": 0.13531174295550172, "grad_norm": 0.1619051992893219, "learning_rate": 0.002, "loss": 2.5761, "step": 67920 }, { "epoch": 0.13533166517914064, "grad_norm": 0.1583954393863678, "learning_rate": 0.002, "loss": 2.5862, "step": 67930 }, { "epoch": 0.13535158740277956, "grad_norm": 0.1375933438539505, "learning_rate": 0.002, "loss": 2.5751, "step": 67940 }, { "epoch": 0.13537150962641847, "grad_norm": 0.16923785209655762, "learning_rate": 0.002, "loss": 2.5835, "step": 67950 }, { "epoch": 0.13539143185005736, "grad_norm": 0.18846473097801208, "learning_rate": 0.002, "loss": 2.5602, "step": 67960 }, { "epoch": 0.13541135407369628, "grad_norm": 0.15300917625427246, "learning_rate": 0.002, "loss": 2.5633, "step": 67970 }, { "epoch": 0.1354312762973352, "grad_norm": 0.15616632997989655, "learning_rate": 0.002, "loss": 2.56, "step": 67980 }, { "epoch": 0.13545119852097412, "grad_norm": 0.16618086397647858, "learning_rate": 0.002, "loss": 2.5702, "step": 67990 }, { "epoch": 0.13547112074461304, "grad_norm": 0.18173198401927948, "learning_rate": 0.002, "loss": 2.574, "step": 68000 }, { "epoch": 0.13549104296825196, "grad_norm": 0.14074517786502838, "learning_rate": 0.002, "loss": 2.56, "step": 68010 }, { "epoch": 0.13551096519189085, "grad_norm": 0.13680511713027954, "learning_rate": 0.002, "loss": 2.5665, "step": 68020 }, { "epoch": 0.13553088741552977, "grad_norm": 0.15944018959999084, "learning_rate": 0.002, "loss": 2.578, "step": 68030 }, { "epoch": 0.13555080963916868, "grad_norm": 0.15947073698043823, "learning_rate": 0.002, "loss": 2.5554, "step": 68040 }, { "epoch": 0.1355707318628076, "grad_norm": 0.17808955907821655, "learning_rate": 0.002, "loss": 2.5735, "step": 68050 }, { "epoch": 0.13559065408644652, "grad_norm": 0.15198037028312683, "learning_rate": 0.002, "loss": 2.5756, "step": 68060 }, { "epoch": 0.13561057631008544, "grad_norm": 0.1774771809577942, "learning_rate": 0.002, "loss": 2.5704, "step": 68070 }, { "epoch": 0.13563049853372433, "grad_norm": 0.1538463681936264, "learning_rate": 0.002, "loss": 2.5783, "step": 68080 }, { "epoch": 0.13565042075736325, "grad_norm": 0.15743406116962433, "learning_rate": 0.002, "loss": 2.5757, "step": 68090 }, { "epoch": 0.13567034298100217, "grad_norm": 0.18522612750530243, "learning_rate": 0.002, "loss": 2.5598, "step": 68100 }, { "epoch": 0.1356902652046411, "grad_norm": 0.16010887920856476, "learning_rate": 0.002, "loss": 2.5741, "step": 68110 }, { "epoch": 0.13571018742828, "grad_norm": 0.16460347175598145, "learning_rate": 0.002, "loss": 2.5647, "step": 68120 }, { "epoch": 0.1357301096519189, "grad_norm": 0.15203867852687836, "learning_rate": 0.002, "loss": 2.5641, "step": 68130 }, { "epoch": 0.13575003187555781, "grad_norm": 0.17847703397274017, "learning_rate": 0.002, "loss": 2.5781, "step": 68140 }, { "epoch": 0.13576995409919673, "grad_norm": 0.1737683266401291, "learning_rate": 0.002, "loss": 2.561, "step": 68150 }, { "epoch": 0.13578987632283565, "grad_norm": 0.14223261177539825, "learning_rate": 0.002, "loss": 2.5647, "step": 68160 }, { "epoch": 0.13580979854647457, "grad_norm": 0.22104725241661072, "learning_rate": 0.002, "loss": 2.5597, "step": 68170 }, { "epoch": 0.1358297207701135, "grad_norm": 0.14441625773906708, "learning_rate": 0.002, "loss": 2.5653, "step": 68180 }, { "epoch": 0.13584964299375238, "grad_norm": 0.18454734981060028, "learning_rate": 0.002, "loss": 2.5702, "step": 68190 }, { "epoch": 0.1358695652173913, "grad_norm": 0.17312565445899963, "learning_rate": 0.002, "loss": 2.5728, "step": 68200 }, { "epoch": 0.13588948744103022, "grad_norm": 0.15685252845287323, "learning_rate": 0.002, "loss": 2.5687, "step": 68210 }, { "epoch": 0.13590940966466913, "grad_norm": 0.15031521022319794, "learning_rate": 0.002, "loss": 2.5727, "step": 68220 }, { "epoch": 0.13592933188830805, "grad_norm": 0.14561909437179565, "learning_rate": 0.002, "loss": 2.5685, "step": 68230 }, { "epoch": 0.13594925411194697, "grad_norm": 0.1493324637413025, "learning_rate": 0.002, "loss": 2.5641, "step": 68240 }, { "epoch": 0.13596917633558586, "grad_norm": 0.16757133603096008, "learning_rate": 0.002, "loss": 2.5613, "step": 68250 }, { "epoch": 0.13598909855922478, "grad_norm": 0.20227567851543427, "learning_rate": 0.002, "loss": 2.5794, "step": 68260 }, { "epoch": 0.1360090207828637, "grad_norm": 0.15056554973125458, "learning_rate": 0.002, "loss": 2.5714, "step": 68270 }, { "epoch": 0.13602894300650262, "grad_norm": 0.14571458101272583, "learning_rate": 0.002, "loss": 2.5702, "step": 68280 }, { "epoch": 0.13604886523014154, "grad_norm": 0.1624085009098053, "learning_rate": 0.002, "loss": 2.5941, "step": 68290 }, { "epoch": 0.13606878745378045, "grad_norm": 0.13934823870658875, "learning_rate": 0.002, "loss": 2.5769, "step": 68300 }, { "epoch": 0.13608870967741934, "grad_norm": 0.1559671014547348, "learning_rate": 0.002, "loss": 2.5683, "step": 68310 }, { "epoch": 0.13610863190105826, "grad_norm": 0.1591733992099762, "learning_rate": 0.002, "loss": 2.5626, "step": 68320 }, { "epoch": 0.13612855412469718, "grad_norm": 0.16537053883075714, "learning_rate": 0.002, "loss": 2.5631, "step": 68330 }, { "epoch": 0.1361484763483361, "grad_norm": 0.12246416509151459, "learning_rate": 0.002, "loss": 2.5595, "step": 68340 }, { "epoch": 0.13616839857197502, "grad_norm": 0.1521185338497162, "learning_rate": 0.002, "loss": 2.576, "step": 68350 }, { "epoch": 0.1361883207956139, "grad_norm": 0.16654862463474274, "learning_rate": 0.002, "loss": 2.5795, "step": 68360 }, { "epoch": 0.13620824301925283, "grad_norm": 0.15672850608825684, "learning_rate": 0.002, "loss": 2.5808, "step": 68370 }, { "epoch": 0.13622816524289175, "grad_norm": 0.16534438729286194, "learning_rate": 0.002, "loss": 2.5723, "step": 68380 }, { "epoch": 0.13624808746653067, "grad_norm": 0.16961202025413513, "learning_rate": 0.002, "loss": 2.566, "step": 68390 }, { "epoch": 0.13626800969016958, "grad_norm": 0.15864035487174988, "learning_rate": 0.002, "loss": 2.5779, "step": 68400 }, { "epoch": 0.1362879319138085, "grad_norm": 0.16456560790538788, "learning_rate": 0.002, "loss": 2.5769, "step": 68410 }, { "epoch": 0.1363078541374474, "grad_norm": 0.16864953935146332, "learning_rate": 0.002, "loss": 2.5533, "step": 68420 }, { "epoch": 0.1363277763610863, "grad_norm": 0.16724322736263275, "learning_rate": 0.002, "loss": 2.5745, "step": 68430 }, { "epoch": 0.13634769858472523, "grad_norm": 0.13829559087753296, "learning_rate": 0.002, "loss": 2.5668, "step": 68440 }, { "epoch": 0.13636762080836415, "grad_norm": 0.16933219134807587, "learning_rate": 0.002, "loss": 2.5601, "step": 68450 }, { "epoch": 0.13638754303200307, "grad_norm": 0.20040619373321533, "learning_rate": 0.002, "loss": 2.5784, "step": 68460 }, { "epoch": 0.13640746525564199, "grad_norm": 0.1432691514492035, "learning_rate": 0.002, "loss": 2.5677, "step": 68470 }, { "epoch": 0.13642738747928088, "grad_norm": 0.19392770528793335, "learning_rate": 0.002, "loss": 2.5627, "step": 68480 }, { "epoch": 0.1364473097029198, "grad_norm": 0.15905840694904327, "learning_rate": 0.002, "loss": 2.5677, "step": 68490 }, { "epoch": 0.1364672319265587, "grad_norm": 0.15368294715881348, "learning_rate": 0.002, "loss": 2.5711, "step": 68500 }, { "epoch": 0.13648715415019763, "grad_norm": 0.16574731469154358, "learning_rate": 0.002, "loss": 2.5739, "step": 68510 }, { "epoch": 0.13650707637383655, "grad_norm": 0.181473970413208, "learning_rate": 0.002, "loss": 2.5667, "step": 68520 }, { "epoch": 0.13652699859747547, "grad_norm": 0.18724077939987183, "learning_rate": 0.002, "loss": 2.565, "step": 68530 }, { "epoch": 0.13654692082111436, "grad_norm": 0.15072597563266754, "learning_rate": 0.002, "loss": 2.5724, "step": 68540 }, { "epoch": 0.13656684304475328, "grad_norm": 0.14113447070121765, "learning_rate": 0.002, "loss": 2.5712, "step": 68550 }, { "epoch": 0.1365867652683922, "grad_norm": 0.16788911819458008, "learning_rate": 0.002, "loss": 2.5565, "step": 68560 }, { "epoch": 0.13660668749203111, "grad_norm": 0.17029283940792084, "learning_rate": 0.002, "loss": 2.5679, "step": 68570 }, { "epoch": 0.13662660971567003, "grad_norm": 0.1399916112422943, "learning_rate": 0.002, "loss": 2.5451, "step": 68580 }, { "epoch": 0.13664653193930895, "grad_norm": 0.2014005184173584, "learning_rate": 0.002, "loss": 2.5735, "step": 68590 }, { "epoch": 0.13666645416294784, "grad_norm": 0.1625974476337433, "learning_rate": 0.002, "loss": 2.5608, "step": 68600 }, { "epoch": 0.13668637638658676, "grad_norm": 0.18971769511699677, "learning_rate": 0.002, "loss": 2.5672, "step": 68610 }, { "epoch": 0.13670629861022568, "grad_norm": 0.18130461871623993, "learning_rate": 0.002, "loss": 2.5636, "step": 68620 }, { "epoch": 0.1367262208338646, "grad_norm": 0.14164723455905914, "learning_rate": 0.002, "loss": 2.586, "step": 68630 }, { "epoch": 0.13674614305750352, "grad_norm": 0.13883580267429352, "learning_rate": 0.002, "loss": 2.5721, "step": 68640 }, { "epoch": 0.1367660652811424, "grad_norm": 0.13487446308135986, "learning_rate": 0.002, "loss": 2.5768, "step": 68650 }, { "epoch": 0.13678598750478133, "grad_norm": 0.18247388303279877, "learning_rate": 0.002, "loss": 2.5604, "step": 68660 }, { "epoch": 0.13680590972842024, "grad_norm": 0.16461524367332458, "learning_rate": 0.002, "loss": 2.5758, "step": 68670 }, { "epoch": 0.13682583195205916, "grad_norm": 0.15689553320407867, "learning_rate": 0.002, "loss": 2.5654, "step": 68680 }, { "epoch": 0.13684575417569808, "grad_norm": 0.18401753902435303, "learning_rate": 0.002, "loss": 2.5544, "step": 68690 }, { "epoch": 0.136865676399337, "grad_norm": 0.14919021725654602, "learning_rate": 0.002, "loss": 2.5882, "step": 68700 }, { "epoch": 0.1368855986229759, "grad_norm": 0.1476687490940094, "learning_rate": 0.002, "loss": 2.5799, "step": 68710 }, { "epoch": 0.1369055208466148, "grad_norm": 0.15803948044776917, "learning_rate": 0.002, "loss": 2.5688, "step": 68720 }, { "epoch": 0.13692544307025373, "grad_norm": 0.14540806412696838, "learning_rate": 0.002, "loss": 2.5725, "step": 68730 }, { "epoch": 0.13694536529389265, "grad_norm": 0.19797082245349884, "learning_rate": 0.002, "loss": 2.5865, "step": 68740 }, { "epoch": 0.13696528751753156, "grad_norm": 0.15723514556884766, "learning_rate": 0.002, "loss": 2.5986, "step": 68750 }, { "epoch": 0.13698520974117048, "grad_norm": 0.13769619166851044, "learning_rate": 0.002, "loss": 2.5681, "step": 68760 }, { "epoch": 0.13700513196480937, "grad_norm": 0.14641666412353516, "learning_rate": 0.002, "loss": 2.5591, "step": 68770 }, { "epoch": 0.1370250541884483, "grad_norm": 0.1955542117357254, "learning_rate": 0.002, "loss": 2.5779, "step": 68780 }, { "epoch": 0.1370449764120872, "grad_norm": 0.16752216219902039, "learning_rate": 0.002, "loss": 2.5817, "step": 68790 }, { "epoch": 0.13706489863572613, "grad_norm": 0.17574365437030792, "learning_rate": 0.002, "loss": 2.5659, "step": 68800 }, { "epoch": 0.13708482085936505, "grad_norm": 0.16473761200904846, "learning_rate": 0.002, "loss": 2.5736, "step": 68810 }, { "epoch": 0.13710474308300397, "grad_norm": 0.14547349512577057, "learning_rate": 0.002, "loss": 2.5891, "step": 68820 }, { "epoch": 0.13712466530664286, "grad_norm": 0.15557195246219635, "learning_rate": 0.002, "loss": 2.5643, "step": 68830 }, { "epoch": 0.13714458753028178, "grad_norm": 0.16033053398132324, "learning_rate": 0.002, "loss": 2.5682, "step": 68840 }, { "epoch": 0.1371645097539207, "grad_norm": 0.1604738086462021, "learning_rate": 0.002, "loss": 2.5679, "step": 68850 }, { "epoch": 0.1371844319775596, "grad_norm": 0.15264171361923218, "learning_rate": 0.002, "loss": 2.577, "step": 68860 }, { "epoch": 0.13720435420119853, "grad_norm": 0.1465560793876648, "learning_rate": 0.002, "loss": 2.5697, "step": 68870 }, { "epoch": 0.13722427642483742, "grad_norm": 0.18071360886096954, "learning_rate": 0.002, "loss": 2.5756, "step": 68880 }, { "epoch": 0.13724419864847634, "grad_norm": 0.17338603734970093, "learning_rate": 0.002, "loss": 2.5657, "step": 68890 }, { "epoch": 0.13726412087211526, "grad_norm": 0.1496754288673401, "learning_rate": 0.002, "loss": 2.5685, "step": 68900 }, { "epoch": 0.13728404309575418, "grad_norm": 0.18890869617462158, "learning_rate": 0.002, "loss": 2.5606, "step": 68910 }, { "epoch": 0.1373039653193931, "grad_norm": 0.14927352964878082, "learning_rate": 0.002, "loss": 2.554, "step": 68920 }, { "epoch": 0.137323887543032, "grad_norm": 0.14114299416542053, "learning_rate": 0.002, "loss": 2.5589, "step": 68930 }, { "epoch": 0.1373438097666709, "grad_norm": 0.19729851186275482, "learning_rate": 0.002, "loss": 2.5507, "step": 68940 }, { "epoch": 0.13736373199030982, "grad_norm": 0.16453590989112854, "learning_rate": 0.002, "loss": 2.5942, "step": 68950 }, { "epoch": 0.13738365421394874, "grad_norm": 0.15192349255084991, "learning_rate": 0.002, "loss": 2.5651, "step": 68960 }, { "epoch": 0.13740357643758766, "grad_norm": 0.17927196621894836, "learning_rate": 0.002, "loss": 2.5634, "step": 68970 }, { "epoch": 0.13742349866122658, "grad_norm": 0.13883492350578308, "learning_rate": 0.002, "loss": 2.587, "step": 68980 }, { "epoch": 0.1374434208848655, "grad_norm": 0.14897306263446808, "learning_rate": 0.002, "loss": 2.5677, "step": 68990 }, { "epoch": 0.1374633431085044, "grad_norm": 0.1407339721918106, "learning_rate": 0.002, "loss": 2.5732, "step": 69000 }, { "epoch": 0.1374832653321433, "grad_norm": 0.1772945374250412, "learning_rate": 0.002, "loss": 2.581, "step": 69010 }, { "epoch": 0.13750318755578222, "grad_norm": 0.1886056661605835, "learning_rate": 0.002, "loss": 2.5672, "step": 69020 }, { "epoch": 0.13752310977942114, "grad_norm": 0.16687193512916565, "learning_rate": 0.002, "loss": 2.5637, "step": 69030 }, { "epoch": 0.13754303200306006, "grad_norm": 0.1584150791168213, "learning_rate": 0.002, "loss": 2.56, "step": 69040 }, { "epoch": 0.13756295422669898, "grad_norm": 0.15917685627937317, "learning_rate": 0.002, "loss": 2.5717, "step": 69050 }, { "epoch": 0.13758287645033787, "grad_norm": 0.1446828991174698, "learning_rate": 0.002, "loss": 2.5731, "step": 69060 }, { "epoch": 0.1376027986739768, "grad_norm": 0.18155670166015625, "learning_rate": 0.002, "loss": 2.5743, "step": 69070 }, { "epoch": 0.1376227208976157, "grad_norm": 0.16279113292694092, "learning_rate": 0.002, "loss": 2.5649, "step": 69080 }, { "epoch": 0.13764264312125463, "grad_norm": 0.18286818265914917, "learning_rate": 0.002, "loss": 2.5529, "step": 69090 }, { "epoch": 0.13766256534489354, "grad_norm": 0.15356391668319702, "learning_rate": 0.002, "loss": 2.5679, "step": 69100 }, { "epoch": 0.13768248756853244, "grad_norm": 0.13790269196033478, "learning_rate": 0.002, "loss": 2.559, "step": 69110 }, { "epoch": 0.13770240979217135, "grad_norm": 0.1545812338590622, "learning_rate": 0.002, "loss": 2.5733, "step": 69120 }, { "epoch": 0.13772233201581027, "grad_norm": 0.1597985178232193, "learning_rate": 0.002, "loss": 2.5747, "step": 69130 }, { "epoch": 0.1377422542394492, "grad_norm": 0.15096351504325867, "learning_rate": 0.002, "loss": 2.5682, "step": 69140 }, { "epoch": 0.1377621764630881, "grad_norm": 0.1730606108903885, "learning_rate": 0.002, "loss": 2.5714, "step": 69150 }, { "epoch": 0.13778209868672703, "grad_norm": 0.16250452399253845, "learning_rate": 0.002, "loss": 2.5653, "step": 69160 }, { "epoch": 0.13780202091036592, "grad_norm": 0.21962396800518036, "learning_rate": 0.002, "loss": 2.5612, "step": 69170 }, { "epoch": 0.13782194313400484, "grad_norm": 0.14857928454875946, "learning_rate": 0.002, "loss": 2.5702, "step": 69180 }, { "epoch": 0.13784186535764376, "grad_norm": 0.1778540015220642, "learning_rate": 0.002, "loss": 2.5649, "step": 69190 }, { "epoch": 0.13786178758128267, "grad_norm": 0.17604032158851624, "learning_rate": 0.002, "loss": 2.5667, "step": 69200 }, { "epoch": 0.1378817098049216, "grad_norm": 0.45126521587371826, "learning_rate": 0.002, "loss": 2.588, "step": 69210 }, { "epoch": 0.1379016320285605, "grad_norm": 0.18333129584789276, "learning_rate": 0.002, "loss": 2.5713, "step": 69220 }, { "epoch": 0.1379215542521994, "grad_norm": 0.2061375230550766, "learning_rate": 0.002, "loss": 2.5882, "step": 69230 }, { "epoch": 0.13794147647583832, "grad_norm": 0.13610950112342834, "learning_rate": 0.002, "loss": 2.563, "step": 69240 }, { "epoch": 0.13796139869947724, "grad_norm": 0.14279919862747192, "learning_rate": 0.002, "loss": 2.5731, "step": 69250 }, { "epoch": 0.13798132092311616, "grad_norm": 0.21045105159282684, "learning_rate": 0.002, "loss": 2.5744, "step": 69260 }, { "epoch": 0.13800124314675508, "grad_norm": 0.12609419226646423, "learning_rate": 0.002, "loss": 2.5668, "step": 69270 }, { "epoch": 0.138021165370394, "grad_norm": 0.1371026635169983, "learning_rate": 0.002, "loss": 2.5766, "step": 69280 }, { "epoch": 0.13804108759403289, "grad_norm": 0.18569989502429962, "learning_rate": 0.002, "loss": 2.5594, "step": 69290 }, { "epoch": 0.1380610098176718, "grad_norm": 0.15352070331573486, "learning_rate": 0.002, "loss": 2.5702, "step": 69300 }, { "epoch": 0.13808093204131072, "grad_norm": 0.14786407351493835, "learning_rate": 0.002, "loss": 2.5848, "step": 69310 }, { "epoch": 0.13810085426494964, "grad_norm": 0.16525501012802124, "learning_rate": 0.002, "loss": 2.5672, "step": 69320 }, { "epoch": 0.13812077648858856, "grad_norm": 0.1949002742767334, "learning_rate": 0.002, "loss": 2.5753, "step": 69330 }, { "epoch": 0.13814069871222748, "grad_norm": 0.14255425333976746, "learning_rate": 0.002, "loss": 2.5651, "step": 69340 }, { "epoch": 0.13816062093586637, "grad_norm": 0.1447933316230774, "learning_rate": 0.002, "loss": 2.5836, "step": 69350 }, { "epoch": 0.1381805431595053, "grad_norm": 0.15261846780776978, "learning_rate": 0.002, "loss": 2.5773, "step": 69360 }, { "epoch": 0.1382004653831442, "grad_norm": 0.15481559932231903, "learning_rate": 0.002, "loss": 2.5659, "step": 69370 }, { "epoch": 0.13822038760678312, "grad_norm": 0.17545703053474426, "learning_rate": 0.002, "loss": 2.5628, "step": 69380 }, { "epoch": 0.13824030983042204, "grad_norm": 0.15251795947551727, "learning_rate": 0.002, "loss": 2.5664, "step": 69390 }, { "epoch": 0.13826023205406093, "grad_norm": 0.1660403311252594, "learning_rate": 0.002, "loss": 2.56, "step": 69400 }, { "epoch": 0.13828015427769985, "grad_norm": 0.1669314205646515, "learning_rate": 0.002, "loss": 2.5535, "step": 69410 }, { "epoch": 0.13830007650133877, "grad_norm": 0.17187045514583588, "learning_rate": 0.002, "loss": 2.5757, "step": 69420 }, { "epoch": 0.1383199987249777, "grad_norm": 0.1602901667356491, "learning_rate": 0.002, "loss": 2.5706, "step": 69430 }, { "epoch": 0.1383399209486166, "grad_norm": 0.15832304954528809, "learning_rate": 0.002, "loss": 2.5811, "step": 69440 }, { "epoch": 0.13835984317225553, "grad_norm": 0.1543005257844925, "learning_rate": 0.002, "loss": 2.57, "step": 69450 }, { "epoch": 0.13837976539589442, "grad_norm": 0.15398818254470825, "learning_rate": 0.002, "loss": 2.5792, "step": 69460 }, { "epoch": 0.13839968761953333, "grad_norm": 0.19122445583343506, "learning_rate": 0.002, "loss": 2.5834, "step": 69470 }, { "epoch": 0.13841960984317225, "grad_norm": 0.14575105905532837, "learning_rate": 0.002, "loss": 2.5613, "step": 69480 }, { "epoch": 0.13843953206681117, "grad_norm": 0.17235979437828064, "learning_rate": 0.002, "loss": 2.5768, "step": 69490 }, { "epoch": 0.1384594542904501, "grad_norm": 0.15947820246219635, "learning_rate": 0.002, "loss": 2.5685, "step": 69500 }, { "epoch": 0.138479376514089, "grad_norm": 0.173174187541008, "learning_rate": 0.002, "loss": 2.5659, "step": 69510 }, { "epoch": 0.1384992987377279, "grad_norm": 0.1451895833015442, "learning_rate": 0.002, "loss": 2.5788, "step": 69520 }, { "epoch": 0.13851922096136682, "grad_norm": 0.1795358657836914, "learning_rate": 0.002, "loss": 2.5746, "step": 69530 }, { "epoch": 0.13853914318500574, "grad_norm": 0.158599391579628, "learning_rate": 0.002, "loss": 2.5745, "step": 69540 }, { "epoch": 0.13855906540864465, "grad_norm": 0.17084382474422455, "learning_rate": 0.002, "loss": 2.5887, "step": 69550 }, { "epoch": 0.13857898763228357, "grad_norm": 0.14949244260787964, "learning_rate": 0.002, "loss": 2.5729, "step": 69560 }, { "epoch": 0.1385989098559225, "grad_norm": 0.14141926169395447, "learning_rate": 0.002, "loss": 2.5612, "step": 69570 }, { "epoch": 0.13861883207956138, "grad_norm": 0.14432525634765625, "learning_rate": 0.002, "loss": 2.5705, "step": 69580 }, { "epoch": 0.1386387543032003, "grad_norm": 0.17737215757369995, "learning_rate": 0.002, "loss": 2.574, "step": 69590 }, { "epoch": 0.13865867652683922, "grad_norm": 0.12393266707658768, "learning_rate": 0.002, "loss": 2.5686, "step": 69600 }, { "epoch": 0.13867859875047814, "grad_norm": 0.1831592470407486, "learning_rate": 0.002, "loss": 2.5712, "step": 69610 }, { "epoch": 0.13869852097411706, "grad_norm": 0.17542089521884918, "learning_rate": 0.002, "loss": 2.5832, "step": 69620 }, { "epoch": 0.13871844319775595, "grad_norm": 0.14874643087387085, "learning_rate": 0.002, "loss": 2.5652, "step": 69630 }, { "epoch": 0.13873836542139487, "grad_norm": 0.19825869798660278, "learning_rate": 0.002, "loss": 2.5656, "step": 69640 }, { "epoch": 0.13875828764503378, "grad_norm": 0.15477363765239716, "learning_rate": 0.002, "loss": 2.559, "step": 69650 }, { "epoch": 0.1387782098686727, "grad_norm": 0.12618185579776764, "learning_rate": 0.002, "loss": 2.5707, "step": 69660 }, { "epoch": 0.13879813209231162, "grad_norm": 0.16748808324337006, "learning_rate": 0.002, "loss": 2.5656, "step": 69670 }, { "epoch": 0.13881805431595054, "grad_norm": 0.14030204713344574, "learning_rate": 0.002, "loss": 2.5713, "step": 69680 }, { "epoch": 0.13883797653958943, "grad_norm": 0.15765443444252014, "learning_rate": 0.002, "loss": 2.5702, "step": 69690 }, { "epoch": 0.13885789876322835, "grad_norm": 0.1523274928331375, "learning_rate": 0.002, "loss": 2.5797, "step": 69700 }, { "epoch": 0.13887782098686727, "grad_norm": 0.15549153089523315, "learning_rate": 0.002, "loss": 2.562, "step": 69710 }, { "epoch": 0.13889774321050619, "grad_norm": 0.1541234254837036, "learning_rate": 0.002, "loss": 2.5757, "step": 69720 }, { "epoch": 0.1389176654341451, "grad_norm": 0.1768193393945694, "learning_rate": 0.002, "loss": 2.5703, "step": 69730 }, { "epoch": 0.13893758765778402, "grad_norm": 0.14768482744693756, "learning_rate": 0.002, "loss": 2.5801, "step": 69740 }, { "epoch": 0.1389575098814229, "grad_norm": 0.17281395196914673, "learning_rate": 0.002, "loss": 2.5696, "step": 69750 }, { "epoch": 0.13897743210506183, "grad_norm": 0.16435590386390686, "learning_rate": 0.002, "loss": 2.5677, "step": 69760 }, { "epoch": 0.13899735432870075, "grad_norm": 0.16004355251789093, "learning_rate": 0.002, "loss": 2.5698, "step": 69770 }, { "epoch": 0.13901727655233967, "grad_norm": 0.13648325204849243, "learning_rate": 0.002, "loss": 2.5606, "step": 69780 }, { "epoch": 0.1390371987759786, "grad_norm": 0.34989815950393677, "learning_rate": 0.002, "loss": 2.5637, "step": 69790 }, { "epoch": 0.1390571209996175, "grad_norm": 0.15288174152374268, "learning_rate": 0.002, "loss": 2.5742, "step": 69800 }, { "epoch": 0.1390770432232564, "grad_norm": 0.1436648666858673, "learning_rate": 0.002, "loss": 2.5802, "step": 69810 }, { "epoch": 0.13909696544689532, "grad_norm": 0.16522617638111115, "learning_rate": 0.002, "loss": 2.5733, "step": 69820 }, { "epoch": 0.13911688767053423, "grad_norm": 0.17524130642414093, "learning_rate": 0.002, "loss": 2.5628, "step": 69830 }, { "epoch": 0.13913680989417315, "grad_norm": 0.14583003520965576, "learning_rate": 0.002, "loss": 2.5659, "step": 69840 }, { "epoch": 0.13915673211781207, "grad_norm": 0.13049708306789398, "learning_rate": 0.002, "loss": 2.5782, "step": 69850 }, { "epoch": 0.13917665434145096, "grad_norm": 0.1855136901140213, "learning_rate": 0.002, "loss": 2.5667, "step": 69860 }, { "epoch": 0.13919657656508988, "grad_norm": 0.1573568731546402, "learning_rate": 0.002, "loss": 2.5653, "step": 69870 }, { "epoch": 0.1392164987887288, "grad_norm": 0.16137196123600006, "learning_rate": 0.002, "loss": 2.5632, "step": 69880 }, { "epoch": 0.13923642101236772, "grad_norm": 0.1434684544801712, "learning_rate": 0.002, "loss": 2.576, "step": 69890 }, { "epoch": 0.13925634323600664, "grad_norm": 0.1808966100215912, "learning_rate": 0.002, "loss": 2.5845, "step": 69900 }, { "epoch": 0.13927626545964555, "grad_norm": 0.14251594245433807, "learning_rate": 0.002, "loss": 2.5866, "step": 69910 }, { "epoch": 0.13929618768328444, "grad_norm": 0.16800475120544434, "learning_rate": 0.002, "loss": 2.5581, "step": 69920 }, { "epoch": 0.13931610990692336, "grad_norm": 0.1703600436449051, "learning_rate": 0.002, "loss": 2.5644, "step": 69930 }, { "epoch": 0.13933603213056228, "grad_norm": 0.16981379687786102, "learning_rate": 0.002, "loss": 2.5773, "step": 69940 }, { "epoch": 0.1393559543542012, "grad_norm": 0.15522192418575287, "learning_rate": 0.002, "loss": 2.5887, "step": 69950 }, { "epoch": 0.13937587657784012, "grad_norm": 0.1441066414117813, "learning_rate": 0.002, "loss": 2.565, "step": 69960 }, { "epoch": 0.13939579880147904, "grad_norm": 0.15745411813259125, "learning_rate": 0.002, "loss": 2.5573, "step": 69970 }, { "epoch": 0.13941572102511793, "grad_norm": 0.1592022180557251, "learning_rate": 0.002, "loss": 2.579, "step": 69980 }, { "epoch": 0.13943564324875685, "grad_norm": 0.15025164186954498, "learning_rate": 0.002, "loss": 2.5801, "step": 69990 }, { "epoch": 0.13945556547239576, "grad_norm": 0.1546677052974701, "learning_rate": 0.002, "loss": 2.5698, "step": 70000 }, { "epoch": 0.13947548769603468, "grad_norm": 0.1655009239912033, "learning_rate": 0.002, "loss": 2.5732, "step": 70010 }, { "epoch": 0.1394954099196736, "grad_norm": 0.15963436663150787, "learning_rate": 0.002, "loss": 2.5938, "step": 70020 }, { "epoch": 0.13951533214331252, "grad_norm": 0.153428316116333, "learning_rate": 0.002, "loss": 2.5882, "step": 70030 }, { "epoch": 0.1395352543669514, "grad_norm": 0.15052305161952972, "learning_rate": 0.002, "loss": 2.5564, "step": 70040 }, { "epoch": 0.13955517659059033, "grad_norm": 0.16633513569831848, "learning_rate": 0.002, "loss": 2.5825, "step": 70050 }, { "epoch": 0.13957509881422925, "grad_norm": 0.15596778690814972, "learning_rate": 0.002, "loss": 2.572, "step": 70060 }, { "epoch": 0.13959502103786817, "grad_norm": 0.16862249374389648, "learning_rate": 0.002, "loss": 2.5467, "step": 70070 }, { "epoch": 0.13961494326150709, "grad_norm": 0.15530188381671906, "learning_rate": 0.002, "loss": 2.5752, "step": 70080 }, { "epoch": 0.139634865485146, "grad_norm": 0.1525282859802246, "learning_rate": 0.002, "loss": 2.5876, "step": 70090 }, { "epoch": 0.1396547877087849, "grad_norm": 0.2691187858581543, "learning_rate": 0.002, "loss": 2.5779, "step": 70100 }, { "epoch": 0.1396747099324238, "grad_norm": 0.15853603184223175, "learning_rate": 0.002, "loss": 2.5569, "step": 70110 }, { "epoch": 0.13969463215606273, "grad_norm": 0.13038311898708344, "learning_rate": 0.002, "loss": 2.5763, "step": 70120 }, { "epoch": 0.13971455437970165, "grad_norm": 0.1443050652742386, "learning_rate": 0.002, "loss": 2.5751, "step": 70130 }, { "epoch": 0.13973447660334057, "grad_norm": 0.2080540508031845, "learning_rate": 0.002, "loss": 2.568, "step": 70140 }, { "epoch": 0.13975439882697946, "grad_norm": 0.15941789746284485, "learning_rate": 0.002, "loss": 2.5655, "step": 70150 }, { "epoch": 0.13977432105061838, "grad_norm": 0.14814616739749908, "learning_rate": 0.002, "loss": 2.562, "step": 70160 }, { "epoch": 0.1397942432742573, "grad_norm": 0.14888960123062134, "learning_rate": 0.002, "loss": 2.5621, "step": 70170 }, { "epoch": 0.13981416549789621, "grad_norm": 0.15820549428462982, "learning_rate": 0.002, "loss": 2.5721, "step": 70180 }, { "epoch": 0.13983408772153513, "grad_norm": 0.13155311346054077, "learning_rate": 0.002, "loss": 2.5673, "step": 70190 }, { "epoch": 0.13985400994517405, "grad_norm": 0.15366297960281372, "learning_rate": 0.002, "loss": 2.5754, "step": 70200 }, { "epoch": 0.13987393216881294, "grad_norm": 0.1663692593574524, "learning_rate": 0.002, "loss": 2.576, "step": 70210 }, { "epoch": 0.13989385439245186, "grad_norm": 0.1604674607515335, "learning_rate": 0.002, "loss": 2.585, "step": 70220 }, { "epoch": 0.13991377661609078, "grad_norm": 0.20942744612693787, "learning_rate": 0.002, "loss": 2.5694, "step": 70230 }, { "epoch": 0.1399336988397297, "grad_norm": 0.17523624002933502, "learning_rate": 0.002, "loss": 2.5702, "step": 70240 }, { "epoch": 0.13995362106336862, "grad_norm": 0.16382981836795807, "learning_rate": 0.002, "loss": 2.5719, "step": 70250 }, { "epoch": 0.13997354328700753, "grad_norm": 0.16245192289352417, "learning_rate": 0.002, "loss": 2.5607, "step": 70260 }, { "epoch": 0.13999346551064643, "grad_norm": 0.16745193302631378, "learning_rate": 0.002, "loss": 2.5804, "step": 70270 }, { "epoch": 0.14001338773428534, "grad_norm": 0.1412881463766098, "learning_rate": 0.002, "loss": 2.5833, "step": 70280 }, { "epoch": 0.14003330995792426, "grad_norm": 0.14424820244312286, "learning_rate": 0.002, "loss": 2.5683, "step": 70290 }, { "epoch": 0.14005323218156318, "grad_norm": 0.1537065953016281, "learning_rate": 0.002, "loss": 2.5778, "step": 70300 }, { "epoch": 0.1400731544052021, "grad_norm": 0.17634111642837524, "learning_rate": 0.002, "loss": 2.5692, "step": 70310 }, { "epoch": 0.14009307662884102, "grad_norm": 0.19324526190757751, "learning_rate": 0.002, "loss": 2.5513, "step": 70320 }, { "epoch": 0.1401129988524799, "grad_norm": 0.14010803401470184, "learning_rate": 0.002, "loss": 2.5761, "step": 70330 }, { "epoch": 0.14013292107611883, "grad_norm": 0.14217551052570343, "learning_rate": 0.002, "loss": 2.5796, "step": 70340 }, { "epoch": 0.14015284329975775, "grad_norm": 0.17257200181484222, "learning_rate": 0.002, "loss": 2.5722, "step": 70350 }, { "epoch": 0.14017276552339666, "grad_norm": 0.1726231575012207, "learning_rate": 0.002, "loss": 2.5687, "step": 70360 }, { "epoch": 0.14019268774703558, "grad_norm": 0.15159764885902405, "learning_rate": 0.002, "loss": 2.5703, "step": 70370 }, { "epoch": 0.14021260997067447, "grad_norm": 0.15291078388690948, "learning_rate": 0.002, "loss": 2.5793, "step": 70380 }, { "epoch": 0.1402325321943134, "grad_norm": 0.18663610517978668, "learning_rate": 0.002, "loss": 2.5804, "step": 70390 }, { "epoch": 0.1402524544179523, "grad_norm": 0.21514928340911865, "learning_rate": 0.002, "loss": 2.5902, "step": 70400 }, { "epoch": 0.14027237664159123, "grad_norm": 0.16833339631557465, "learning_rate": 0.002, "loss": 2.5837, "step": 70410 }, { "epoch": 0.14029229886523015, "grad_norm": 0.14132370054721832, "learning_rate": 0.002, "loss": 2.5597, "step": 70420 }, { "epoch": 0.14031222108886907, "grad_norm": 0.21664489805698395, "learning_rate": 0.002, "loss": 2.5664, "step": 70430 }, { "epoch": 0.14033214331250796, "grad_norm": 0.13562168180942535, "learning_rate": 0.002, "loss": 2.5726, "step": 70440 }, { "epoch": 0.14035206553614687, "grad_norm": 0.1510695368051529, "learning_rate": 0.002, "loss": 2.5657, "step": 70450 }, { "epoch": 0.1403719877597858, "grad_norm": 0.17496386170387268, "learning_rate": 0.002, "loss": 2.5515, "step": 70460 }, { "epoch": 0.1403919099834247, "grad_norm": 0.1861063539981842, "learning_rate": 0.002, "loss": 2.5811, "step": 70470 }, { "epoch": 0.14041183220706363, "grad_norm": 0.14998571574687958, "learning_rate": 0.002, "loss": 2.5601, "step": 70480 }, { "epoch": 0.14043175443070255, "grad_norm": 0.13685111701488495, "learning_rate": 0.002, "loss": 2.5773, "step": 70490 }, { "epoch": 0.14045167665434144, "grad_norm": 0.16309292614459991, "learning_rate": 0.002, "loss": 2.5728, "step": 70500 }, { "epoch": 0.14047159887798036, "grad_norm": 0.16302086412906647, "learning_rate": 0.002, "loss": 2.566, "step": 70510 }, { "epoch": 0.14049152110161928, "grad_norm": 0.171661838889122, "learning_rate": 0.002, "loss": 2.5695, "step": 70520 }, { "epoch": 0.1405114433252582, "grad_norm": 0.1565091907978058, "learning_rate": 0.002, "loss": 2.5791, "step": 70530 }, { "epoch": 0.1405313655488971, "grad_norm": 0.1565815806388855, "learning_rate": 0.002, "loss": 2.5597, "step": 70540 }, { "epoch": 0.14055128777253603, "grad_norm": 0.1815580129623413, "learning_rate": 0.002, "loss": 2.5732, "step": 70550 }, { "epoch": 0.14057120999617492, "grad_norm": 0.14388447999954224, "learning_rate": 0.002, "loss": 2.5805, "step": 70560 }, { "epoch": 0.14059113221981384, "grad_norm": 0.14782147109508514, "learning_rate": 0.002, "loss": 2.5654, "step": 70570 }, { "epoch": 0.14061105444345276, "grad_norm": 0.15473374724388123, "learning_rate": 0.002, "loss": 2.5636, "step": 70580 }, { "epoch": 0.14063097666709168, "grad_norm": 0.17717935144901276, "learning_rate": 0.002, "loss": 2.5856, "step": 70590 }, { "epoch": 0.1406508988907306, "grad_norm": 0.17373871803283691, "learning_rate": 0.002, "loss": 2.5653, "step": 70600 }, { "epoch": 0.14067082111436952, "grad_norm": 0.15910905599594116, "learning_rate": 0.002, "loss": 2.5519, "step": 70610 }, { "epoch": 0.1406907433380084, "grad_norm": 0.1609538346529007, "learning_rate": 0.002, "loss": 2.5787, "step": 70620 }, { "epoch": 0.14071066556164732, "grad_norm": 0.17990165948867798, "learning_rate": 0.002, "loss": 2.5667, "step": 70630 }, { "epoch": 0.14073058778528624, "grad_norm": 0.1799970418214798, "learning_rate": 0.002, "loss": 2.5792, "step": 70640 }, { "epoch": 0.14075051000892516, "grad_norm": 0.1771971434354782, "learning_rate": 0.002, "loss": 2.5765, "step": 70650 }, { "epoch": 0.14077043223256408, "grad_norm": 0.15576963126659393, "learning_rate": 0.002, "loss": 2.5734, "step": 70660 }, { "epoch": 0.14079035445620297, "grad_norm": 0.1836930364370346, "learning_rate": 0.002, "loss": 2.5786, "step": 70670 }, { "epoch": 0.1408102766798419, "grad_norm": 0.17998921871185303, "learning_rate": 0.002, "loss": 2.5844, "step": 70680 }, { "epoch": 0.1408301989034808, "grad_norm": 0.17962677776813507, "learning_rate": 0.002, "loss": 2.5817, "step": 70690 }, { "epoch": 0.14085012112711973, "grad_norm": 0.15056319534778595, "learning_rate": 0.002, "loss": 2.576, "step": 70700 }, { "epoch": 0.14087004335075864, "grad_norm": 0.21420161426067352, "learning_rate": 0.002, "loss": 2.5888, "step": 70710 }, { "epoch": 0.14088996557439756, "grad_norm": 0.16002950072288513, "learning_rate": 0.002, "loss": 2.5616, "step": 70720 }, { "epoch": 0.14090988779803645, "grad_norm": 0.1388445794582367, "learning_rate": 0.002, "loss": 2.5675, "step": 70730 }, { "epoch": 0.14092981002167537, "grad_norm": 0.1406273990869522, "learning_rate": 0.002, "loss": 2.5529, "step": 70740 }, { "epoch": 0.1409497322453143, "grad_norm": 0.22405412793159485, "learning_rate": 0.002, "loss": 2.5848, "step": 70750 }, { "epoch": 0.1409696544689532, "grad_norm": 0.14041826128959656, "learning_rate": 0.002, "loss": 2.5549, "step": 70760 }, { "epoch": 0.14098957669259213, "grad_norm": 0.17160841822624207, "learning_rate": 0.002, "loss": 2.564, "step": 70770 }, { "epoch": 0.14100949891623105, "grad_norm": 0.16193369030952454, "learning_rate": 0.002, "loss": 2.5793, "step": 70780 }, { "epoch": 0.14102942113986994, "grad_norm": 0.14363643527030945, "learning_rate": 0.002, "loss": 2.5725, "step": 70790 }, { "epoch": 0.14104934336350886, "grad_norm": 0.218790203332901, "learning_rate": 0.002, "loss": 2.5663, "step": 70800 }, { "epoch": 0.14106926558714777, "grad_norm": 0.1431330442428589, "learning_rate": 0.002, "loss": 2.5683, "step": 70810 }, { "epoch": 0.1410891878107867, "grad_norm": 0.20665355026721954, "learning_rate": 0.002, "loss": 2.5453, "step": 70820 }, { "epoch": 0.1411091100344256, "grad_norm": 0.15514928102493286, "learning_rate": 0.002, "loss": 2.5687, "step": 70830 }, { "epoch": 0.14112903225806453, "grad_norm": 0.16254456341266632, "learning_rate": 0.002, "loss": 2.5739, "step": 70840 }, { "epoch": 0.14114895448170342, "grad_norm": 0.15157900750637054, "learning_rate": 0.002, "loss": 2.5567, "step": 70850 }, { "epoch": 0.14116887670534234, "grad_norm": 0.1646088808774948, "learning_rate": 0.002, "loss": 2.5898, "step": 70860 }, { "epoch": 0.14118879892898126, "grad_norm": 0.15327176451683044, "learning_rate": 0.002, "loss": 2.5788, "step": 70870 }, { "epoch": 0.14120872115262018, "grad_norm": 0.1768125742673874, "learning_rate": 0.002, "loss": 2.5697, "step": 70880 }, { "epoch": 0.1412286433762591, "grad_norm": 0.17737255990505219, "learning_rate": 0.002, "loss": 2.5768, "step": 70890 }, { "epoch": 0.14124856559989798, "grad_norm": 0.15565526485443115, "learning_rate": 0.002, "loss": 2.5725, "step": 70900 }, { "epoch": 0.1412684878235369, "grad_norm": 0.1547059416770935, "learning_rate": 0.002, "loss": 2.566, "step": 70910 }, { "epoch": 0.14128841004717582, "grad_norm": 0.14380425214767456, "learning_rate": 0.002, "loss": 2.5622, "step": 70920 }, { "epoch": 0.14130833227081474, "grad_norm": 0.19726157188415527, "learning_rate": 0.002, "loss": 2.5583, "step": 70930 }, { "epoch": 0.14132825449445366, "grad_norm": 0.1937198042869568, "learning_rate": 0.002, "loss": 2.5743, "step": 70940 }, { "epoch": 0.14134817671809258, "grad_norm": 0.14715997874736786, "learning_rate": 0.002, "loss": 2.5651, "step": 70950 }, { "epoch": 0.14136809894173147, "grad_norm": 0.14327749609947205, "learning_rate": 0.002, "loss": 2.5766, "step": 70960 }, { "epoch": 0.1413880211653704, "grad_norm": 0.18887567520141602, "learning_rate": 0.002, "loss": 2.566, "step": 70970 }, { "epoch": 0.1414079433890093, "grad_norm": 0.17414864897727966, "learning_rate": 0.002, "loss": 2.569, "step": 70980 }, { "epoch": 0.14142786561264822, "grad_norm": 0.14918218553066254, "learning_rate": 0.002, "loss": 2.5687, "step": 70990 }, { "epoch": 0.14144778783628714, "grad_norm": 0.18619203567504883, "learning_rate": 0.002, "loss": 2.5671, "step": 71000 }, { "epoch": 0.14146771005992606, "grad_norm": 0.16028964519500732, "learning_rate": 0.002, "loss": 2.5659, "step": 71010 }, { "epoch": 0.14148763228356495, "grad_norm": 0.18129847943782806, "learning_rate": 0.002, "loss": 2.5455, "step": 71020 }, { "epoch": 0.14150755450720387, "grad_norm": 0.1509724110364914, "learning_rate": 0.002, "loss": 2.5537, "step": 71030 }, { "epoch": 0.1415274767308428, "grad_norm": 0.16917146742343903, "learning_rate": 0.002, "loss": 2.5706, "step": 71040 }, { "epoch": 0.1415473989544817, "grad_norm": 0.45372194051742554, "learning_rate": 0.002, "loss": 2.5852, "step": 71050 }, { "epoch": 0.14156732117812063, "grad_norm": 0.1619877815246582, "learning_rate": 0.002, "loss": 2.5807, "step": 71060 }, { "epoch": 0.14158724340175954, "grad_norm": 0.15901310741901398, "learning_rate": 0.002, "loss": 2.5767, "step": 71070 }, { "epoch": 0.14160716562539843, "grad_norm": 0.17984646558761597, "learning_rate": 0.002, "loss": 2.5763, "step": 71080 }, { "epoch": 0.14162708784903735, "grad_norm": 0.14440155029296875, "learning_rate": 0.002, "loss": 2.5743, "step": 71090 }, { "epoch": 0.14164701007267627, "grad_norm": 0.16901153326034546, "learning_rate": 0.002, "loss": 2.5865, "step": 71100 }, { "epoch": 0.1416669322963152, "grad_norm": 0.13627532124519348, "learning_rate": 0.002, "loss": 2.5704, "step": 71110 }, { "epoch": 0.1416868545199541, "grad_norm": 0.14069458842277527, "learning_rate": 0.002, "loss": 2.564, "step": 71120 }, { "epoch": 0.141706776743593, "grad_norm": 0.1449994146823883, "learning_rate": 0.002, "loss": 2.5771, "step": 71130 }, { "epoch": 0.14172669896723192, "grad_norm": 0.17080573737621307, "learning_rate": 0.002, "loss": 2.5675, "step": 71140 }, { "epoch": 0.14174662119087084, "grad_norm": 0.1530093401670456, "learning_rate": 0.002, "loss": 2.5752, "step": 71150 }, { "epoch": 0.14176654341450975, "grad_norm": 0.17158035933971405, "learning_rate": 0.002, "loss": 2.5669, "step": 71160 }, { "epoch": 0.14178646563814867, "grad_norm": 0.16227346658706665, "learning_rate": 0.002, "loss": 2.569, "step": 71170 }, { "epoch": 0.1418063878617876, "grad_norm": 0.16056953370571136, "learning_rate": 0.002, "loss": 2.5813, "step": 71180 }, { "epoch": 0.14182631008542648, "grad_norm": 0.15547849237918854, "learning_rate": 0.002, "loss": 2.5546, "step": 71190 }, { "epoch": 0.1418462323090654, "grad_norm": 0.17156226933002472, "learning_rate": 0.002, "loss": 2.5915, "step": 71200 }, { "epoch": 0.14186615453270432, "grad_norm": 0.1632668823003769, "learning_rate": 0.002, "loss": 2.5717, "step": 71210 }, { "epoch": 0.14188607675634324, "grad_norm": 0.15409494936466217, "learning_rate": 0.002, "loss": 2.5775, "step": 71220 }, { "epoch": 0.14190599897998216, "grad_norm": 0.15953193604946136, "learning_rate": 0.002, "loss": 2.5578, "step": 71230 }, { "epoch": 0.14192592120362107, "grad_norm": 0.18329201638698578, "learning_rate": 0.002, "loss": 2.5756, "step": 71240 }, { "epoch": 0.14194584342725997, "grad_norm": 0.1661294847726822, "learning_rate": 0.002, "loss": 2.5809, "step": 71250 }, { "epoch": 0.14196576565089888, "grad_norm": 0.18033206462860107, "learning_rate": 0.002, "loss": 2.5603, "step": 71260 }, { "epoch": 0.1419856878745378, "grad_norm": 0.16991026699543, "learning_rate": 0.002, "loss": 2.588, "step": 71270 }, { "epoch": 0.14200561009817672, "grad_norm": 0.16024576127529144, "learning_rate": 0.002, "loss": 2.574, "step": 71280 }, { "epoch": 0.14202553232181564, "grad_norm": 0.13606593012809753, "learning_rate": 0.002, "loss": 2.5834, "step": 71290 }, { "epoch": 0.14204545454545456, "grad_norm": 0.18716734647750854, "learning_rate": 0.002, "loss": 2.5694, "step": 71300 }, { "epoch": 0.14206537676909345, "grad_norm": 0.15704688429832458, "learning_rate": 0.002, "loss": 2.5687, "step": 71310 }, { "epoch": 0.14208529899273237, "grad_norm": 0.15325850248336792, "learning_rate": 0.002, "loss": 2.5606, "step": 71320 }, { "epoch": 0.14210522121637129, "grad_norm": 0.16420769691467285, "learning_rate": 0.002, "loss": 2.5642, "step": 71330 }, { "epoch": 0.1421251434400102, "grad_norm": 0.1731787621974945, "learning_rate": 0.002, "loss": 2.5737, "step": 71340 }, { "epoch": 0.14214506566364912, "grad_norm": 0.17695434391498566, "learning_rate": 0.002, "loss": 2.5727, "step": 71350 }, { "epoch": 0.14216498788728804, "grad_norm": 0.16197197139263153, "learning_rate": 0.002, "loss": 2.5703, "step": 71360 }, { "epoch": 0.14218491011092693, "grad_norm": 0.14503000676631927, "learning_rate": 0.002, "loss": 2.5668, "step": 71370 }, { "epoch": 0.14220483233456585, "grad_norm": 0.17044687271118164, "learning_rate": 0.002, "loss": 2.5791, "step": 71380 }, { "epoch": 0.14222475455820477, "grad_norm": 0.154036283493042, "learning_rate": 0.002, "loss": 2.5781, "step": 71390 }, { "epoch": 0.1422446767818437, "grad_norm": 0.4964090883731842, "learning_rate": 0.002, "loss": 2.5815, "step": 71400 }, { "epoch": 0.1422645990054826, "grad_norm": 0.1419556438922882, "learning_rate": 0.002, "loss": 2.5793, "step": 71410 }, { "epoch": 0.1422845212291215, "grad_norm": 0.171599343419075, "learning_rate": 0.002, "loss": 2.5687, "step": 71420 }, { "epoch": 0.14230444345276042, "grad_norm": 0.13790418207645416, "learning_rate": 0.002, "loss": 2.5865, "step": 71430 }, { "epoch": 0.14232436567639933, "grad_norm": 0.19451570510864258, "learning_rate": 0.002, "loss": 2.5859, "step": 71440 }, { "epoch": 0.14234428790003825, "grad_norm": 0.18815694749355316, "learning_rate": 0.002, "loss": 2.5772, "step": 71450 }, { "epoch": 0.14236421012367717, "grad_norm": 0.15738359093666077, "learning_rate": 0.002, "loss": 2.5758, "step": 71460 }, { "epoch": 0.1423841323473161, "grad_norm": 0.14142630994319916, "learning_rate": 0.002, "loss": 2.5789, "step": 71470 }, { "epoch": 0.14240405457095498, "grad_norm": 0.21291877329349518, "learning_rate": 0.002, "loss": 2.5702, "step": 71480 }, { "epoch": 0.1424239767945939, "grad_norm": 0.160009503364563, "learning_rate": 0.002, "loss": 2.5729, "step": 71490 }, { "epoch": 0.14244389901823282, "grad_norm": 0.17505495250225067, "learning_rate": 0.002, "loss": 2.5594, "step": 71500 }, { "epoch": 0.14246382124187174, "grad_norm": 0.17821399867534637, "learning_rate": 0.002, "loss": 2.5523, "step": 71510 }, { "epoch": 0.14248374346551065, "grad_norm": 0.1643551141023636, "learning_rate": 0.002, "loss": 2.5583, "step": 71520 }, { "epoch": 0.14250366568914957, "grad_norm": 0.14902979135513306, "learning_rate": 0.002, "loss": 2.5765, "step": 71530 }, { "epoch": 0.14252358791278846, "grad_norm": 0.16920478641986847, "learning_rate": 0.002, "loss": 2.5663, "step": 71540 }, { "epoch": 0.14254351013642738, "grad_norm": 0.16064825654029846, "learning_rate": 0.002, "loss": 2.5681, "step": 71550 }, { "epoch": 0.1425634323600663, "grad_norm": 0.1548851728439331, "learning_rate": 0.002, "loss": 2.5756, "step": 71560 }, { "epoch": 0.14258335458370522, "grad_norm": 0.16294807195663452, "learning_rate": 0.002, "loss": 2.5682, "step": 71570 }, { "epoch": 0.14260327680734414, "grad_norm": 0.15657296776771545, "learning_rate": 0.002, "loss": 2.5633, "step": 71580 }, { "epoch": 0.14262319903098306, "grad_norm": 0.1712602972984314, "learning_rate": 0.002, "loss": 2.5698, "step": 71590 }, { "epoch": 0.14264312125462195, "grad_norm": 0.1687106192111969, "learning_rate": 0.002, "loss": 2.574, "step": 71600 }, { "epoch": 0.14266304347826086, "grad_norm": 0.13817904889583588, "learning_rate": 0.002, "loss": 2.577, "step": 71610 }, { "epoch": 0.14268296570189978, "grad_norm": 0.16885237395763397, "learning_rate": 0.002, "loss": 2.5685, "step": 71620 }, { "epoch": 0.1427028879255387, "grad_norm": 0.166541188955307, "learning_rate": 0.002, "loss": 2.5757, "step": 71630 }, { "epoch": 0.14272281014917762, "grad_norm": 0.17484872043132782, "learning_rate": 0.002, "loss": 2.5735, "step": 71640 }, { "epoch": 0.1427427323728165, "grad_norm": 0.1987346112728119, "learning_rate": 0.002, "loss": 2.5611, "step": 71650 }, { "epoch": 0.14276265459645543, "grad_norm": 0.1454603523015976, "learning_rate": 0.002, "loss": 2.5909, "step": 71660 }, { "epoch": 0.14278257682009435, "grad_norm": 0.15726281702518463, "learning_rate": 0.002, "loss": 2.5687, "step": 71670 }, { "epoch": 0.14280249904373327, "grad_norm": 0.15258219838142395, "learning_rate": 0.002, "loss": 2.5605, "step": 71680 }, { "epoch": 0.14282242126737218, "grad_norm": 0.15142452716827393, "learning_rate": 0.002, "loss": 2.5844, "step": 71690 }, { "epoch": 0.1428423434910111, "grad_norm": 0.15247616171836853, "learning_rate": 0.002, "loss": 2.5677, "step": 71700 }, { "epoch": 0.14286226571465, "grad_norm": 0.1590690016746521, "learning_rate": 0.002, "loss": 2.5671, "step": 71710 }, { "epoch": 0.1428821879382889, "grad_norm": 0.18498198688030243, "learning_rate": 0.002, "loss": 2.5663, "step": 71720 }, { "epoch": 0.14290211016192783, "grad_norm": 0.18120163679122925, "learning_rate": 0.002, "loss": 2.5619, "step": 71730 }, { "epoch": 0.14292203238556675, "grad_norm": 0.13961222767829895, "learning_rate": 0.002, "loss": 2.5726, "step": 71740 }, { "epoch": 0.14294195460920567, "grad_norm": 0.14603304862976074, "learning_rate": 0.002, "loss": 2.5796, "step": 71750 }, { "epoch": 0.1429618768328446, "grad_norm": 0.14339466392993927, "learning_rate": 0.002, "loss": 2.5587, "step": 71760 }, { "epoch": 0.14298179905648348, "grad_norm": 0.2184414565563202, "learning_rate": 0.002, "loss": 2.5686, "step": 71770 }, { "epoch": 0.1430017212801224, "grad_norm": 0.15602897107601166, "learning_rate": 0.002, "loss": 2.5613, "step": 71780 }, { "epoch": 0.14302164350376131, "grad_norm": 0.16756607592105865, "learning_rate": 0.002, "loss": 2.5775, "step": 71790 }, { "epoch": 0.14304156572740023, "grad_norm": 0.16355839371681213, "learning_rate": 0.002, "loss": 2.5656, "step": 71800 }, { "epoch": 0.14306148795103915, "grad_norm": 0.17028672993183136, "learning_rate": 0.002, "loss": 2.5769, "step": 71810 }, { "epoch": 0.14308141017467807, "grad_norm": 0.16939420998096466, "learning_rate": 0.002, "loss": 2.5788, "step": 71820 }, { "epoch": 0.14310133239831696, "grad_norm": 0.13004903495311737, "learning_rate": 0.002, "loss": 2.571, "step": 71830 }, { "epoch": 0.14312125462195588, "grad_norm": 0.1694222390651703, "learning_rate": 0.002, "loss": 2.5622, "step": 71840 }, { "epoch": 0.1431411768455948, "grad_norm": 0.17754589021205902, "learning_rate": 0.002, "loss": 2.5644, "step": 71850 }, { "epoch": 0.14316109906923372, "grad_norm": 0.16471163928508759, "learning_rate": 0.002, "loss": 2.5563, "step": 71860 }, { "epoch": 0.14318102129287263, "grad_norm": 0.16787585616111755, "learning_rate": 0.002, "loss": 2.5839, "step": 71870 }, { "epoch": 0.14320094351651153, "grad_norm": 0.2123035490512848, "learning_rate": 0.002, "loss": 2.5719, "step": 71880 }, { "epoch": 0.14322086574015044, "grad_norm": 0.14073508977890015, "learning_rate": 0.002, "loss": 2.5713, "step": 71890 }, { "epoch": 0.14324078796378936, "grad_norm": 0.15057887136936188, "learning_rate": 0.002, "loss": 2.5666, "step": 71900 }, { "epoch": 0.14326071018742828, "grad_norm": 0.18771418929100037, "learning_rate": 0.002, "loss": 2.5684, "step": 71910 }, { "epoch": 0.1432806324110672, "grad_norm": 0.16558893024921417, "learning_rate": 0.002, "loss": 2.5768, "step": 71920 }, { "epoch": 0.14330055463470612, "grad_norm": 0.18257561326026917, "learning_rate": 0.002, "loss": 2.5719, "step": 71930 }, { "epoch": 0.143320476858345, "grad_norm": 0.16462171077728271, "learning_rate": 0.002, "loss": 2.5701, "step": 71940 }, { "epoch": 0.14334039908198393, "grad_norm": 0.16105039417743683, "learning_rate": 0.002, "loss": 2.5723, "step": 71950 }, { "epoch": 0.14336032130562285, "grad_norm": 0.1755685955286026, "learning_rate": 0.002, "loss": 2.5726, "step": 71960 }, { "epoch": 0.14338024352926176, "grad_norm": 0.14384225010871887, "learning_rate": 0.002, "loss": 2.5683, "step": 71970 }, { "epoch": 0.14340016575290068, "grad_norm": 0.13005049526691437, "learning_rate": 0.002, "loss": 2.5697, "step": 71980 }, { "epoch": 0.1434200879765396, "grad_norm": 0.20026685297489166, "learning_rate": 0.002, "loss": 2.5777, "step": 71990 }, { "epoch": 0.1434400102001785, "grad_norm": 0.2564525604248047, "learning_rate": 0.002, "loss": 2.5785, "step": 72000 }, { "epoch": 0.1434599324238174, "grad_norm": 0.1538124680519104, "learning_rate": 0.002, "loss": 2.5552, "step": 72010 }, { "epoch": 0.14347985464745633, "grad_norm": 0.17168143391609192, "learning_rate": 0.002, "loss": 2.5751, "step": 72020 }, { "epoch": 0.14349977687109525, "grad_norm": 0.14855371415615082, "learning_rate": 0.002, "loss": 2.5697, "step": 72030 }, { "epoch": 0.14351969909473417, "grad_norm": 0.15380693972110748, "learning_rate": 0.002, "loss": 2.5726, "step": 72040 }, { "epoch": 0.14353962131837308, "grad_norm": 0.1896754801273346, "learning_rate": 0.002, "loss": 2.5864, "step": 72050 }, { "epoch": 0.14355954354201197, "grad_norm": 0.1562412977218628, "learning_rate": 0.002, "loss": 2.5772, "step": 72060 }, { "epoch": 0.1435794657656509, "grad_norm": 0.15795078873634338, "learning_rate": 0.002, "loss": 2.558, "step": 72070 }, { "epoch": 0.1435993879892898, "grad_norm": 0.1796104609966278, "learning_rate": 0.002, "loss": 2.552, "step": 72080 }, { "epoch": 0.14361931021292873, "grad_norm": 0.16962970793247223, "learning_rate": 0.002, "loss": 2.5752, "step": 72090 }, { "epoch": 0.14363923243656765, "grad_norm": 0.14266705513000488, "learning_rate": 0.002, "loss": 2.5734, "step": 72100 }, { "epoch": 0.14365915466020657, "grad_norm": 0.15186943113803864, "learning_rate": 0.002, "loss": 2.5759, "step": 72110 }, { "epoch": 0.14367907688384546, "grad_norm": 0.20305892825126648, "learning_rate": 0.002, "loss": 2.598, "step": 72120 }, { "epoch": 0.14369899910748438, "grad_norm": 0.18439273536205292, "learning_rate": 0.002, "loss": 2.5779, "step": 72130 }, { "epoch": 0.1437189213311233, "grad_norm": 0.15781311690807343, "learning_rate": 0.002, "loss": 2.5681, "step": 72140 }, { "epoch": 0.1437388435547622, "grad_norm": 0.17500214278697968, "learning_rate": 0.002, "loss": 2.5589, "step": 72150 }, { "epoch": 0.14375876577840113, "grad_norm": 0.15804897248744965, "learning_rate": 0.002, "loss": 2.5801, "step": 72160 }, { "epoch": 0.14377868800204002, "grad_norm": 0.1654340624809265, "learning_rate": 0.002, "loss": 2.5802, "step": 72170 }, { "epoch": 0.14379861022567894, "grad_norm": 0.1660407930612564, "learning_rate": 0.002, "loss": 2.5621, "step": 72180 }, { "epoch": 0.14381853244931786, "grad_norm": 0.19933049380779266, "learning_rate": 0.002, "loss": 2.5682, "step": 72190 }, { "epoch": 0.14383845467295678, "grad_norm": 0.15763601660728455, "learning_rate": 0.002, "loss": 2.5685, "step": 72200 }, { "epoch": 0.1438583768965957, "grad_norm": 0.13859163224697113, "learning_rate": 0.002, "loss": 2.5775, "step": 72210 }, { "epoch": 0.14387829912023462, "grad_norm": 0.15501008927822113, "learning_rate": 0.002, "loss": 2.5699, "step": 72220 }, { "epoch": 0.1438982213438735, "grad_norm": 0.174190491437912, "learning_rate": 0.002, "loss": 2.5756, "step": 72230 }, { "epoch": 0.14391814356751242, "grad_norm": 0.18153642117977142, "learning_rate": 0.002, "loss": 2.5879, "step": 72240 }, { "epoch": 0.14393806579115134, "grad_norm": 0.16950538754463196, "learning_rate": 0.002, "loss": 2.5716, "step": 72250 }, { "epoch": 0.14395798801479026, "grad_norm": 0.1519431471824646, "learning_rate": 0.002, "loss": 2.5684, "step": 72260 }, { "epoch": 0.14397791023842918, "grad_norm": 0.17861896753311157, "learning_rate": 0.002, "loss": 2.5743, "step": 72270 }, { "epoch": 0.1439978324620681, "grad_norm": 0.16017460823059082, "learning_rate": 0.002, "loss": 2.5611, "step": 72280 }, { "epoch": 0.144017754685707, "grad_norm": 0.13298562169075012, "learning_rate": 0.002, "loss": 2.576, "step": 72290 }, { "epoch": 0.1440376769093459, "grad_norm": 0.18035534024238586, "learning_rate": 0.002, "loss": 2.5759, "step": 72300 }, { "epoch": 0.14405759913298483, "grad_norm": 0.15079155564308167, "learning_rate": 0.002, "loss": 2.5951, "step": 72310 }, { "epoch": 0.14407752135662374, "grad_norm": 0.14604566991329193, "learning_rate": 0.002, "loss": 2.5777, "step": 72320 }, { "epoch": 0.14409744358026266, "grad_norm": 0.19577980041503906, "learning_rate": 0.002, "loss": 2.5796, "step": 72330 }, { "epoch": 0.14411736580390158, "grad_norm": 0.16542334854602814, "learning_rate": 0.002, "loss": 2.5761, "step": 72340 }, { "epoch": 0.14413728802754047, "grad_norm": 0.19631284475326538, "learning_rate": 0.002, "loss": 2.5699, "step": 72350 }, { "epoch": 0.1441572102511794, "grad_norm": 0.1339428722858429, "learning_rate": 0.002, "loss": 2.5747, "step": 72360 }, { "epoch": 0.1441771324748183, "grad_norm": 0.16919559240341187, "learning_rate": 0.002, "loss": 2.5819, "step": 72370 }, { "epoch": 0.14419705469845723, "grad_norm": 0.16126662492752075, "learning_rate": 0.002, "loss": 2.5623, "step": 72380 }, { "epoch": 0.14421697692209615, "grad_norm": 0.15824878215789795, "learning_rate": 0.002, "loss": 2.5769, "step": 72390 }, { "epoch": 0.14423689914573504, "grad_norm": 0.15471401810646057, "learning_rate": 0.002, "loss": 2.5764, "step": 72400 }, { "epoch": 0.14425682136937396, "grad_norm": 0.1818322092294693, "learning_rate": 0.002, "loss": 2.5643, "step": 72410 }, { "epoch": 0.14427674359301287, "grad_norm": 0.13258281350135803, "learning_rate": 0.002, "loss": 2.576, "step": 72420 }, { "epoch": 0.1442966658166518, "grad_norm": 0.1845541149377823, "learning_rate": 0.002, "loss": 2.5737, "step": 72430 }, { "epoch": 0.1443165880402907, "grad_norm": 0.16829481720924377, "learning_rate": 0.002, "loss": 2.5749, "step": 72440 }, { "epoch": 0.14433651026392963, "grad_norm": 0.15854497253894806, "learning_rate": 0.002, "loss": 2.5692, "step": 72450 }, { "epoch": 0.14435643248756852, "grad_norm": 0.13559852540493011, "learning_rate": 0.002, "loss": 2.5717, "step": 72460 }, { "epoch": 0.14437635471120744, "grad_norm": 0.1549326628446579, "learning_rate": 0.002, "loss": 2.5671, "step": 72470 }, { "epoch": 0.14439627693484636, "grad_norm": 0.14924080669879913, "learning_rate": 0.002, "loss": 2.5676, "step": 72480 }, { "epoch": 0.14441619915848528, "grad_norm": 0.17030201852321625, "learning_rate": 0.002, "loss": 2.5673, "step": 72490 }, { "epoch": 0.1444361213821242, "grad_norm": 0.15747523307800293, "learning_rate": 0.002, "loss": 2.5539, "step": 72500 }, { "epoch": 0.1444560436057631, "grad_norm": 0.16696113348007202, "learning_rate": 0.002, "loss": 2.5812, "step": 72510 }, { "epoch": 0.144475965829402, "grad_norm": 0.18150795996189117, "learning_rate": 0.002, "loss": 2.5898, "step": 72520 }, { "epoch": 0.14449588805304092, "grad_norm": 0.33919116854667664, "learning_rate": 0.002, "loss": 2.5769, "step": 72530 }, { "epoch": 0.14451581027667984, "grad_norm": 0.14763113856315613, "learning_rate": 0.002, "loss": 2.5713, "step": 72540 }, { "epoch": 0.14453573250031876, "grad_norm": 0.15798121690750122, "learning_rate": 0.002, "loss": 2.5795, "step": 72550 }, { "epoch": 0.14455565472395768, "grad_norm": 0.13370293378829956, "learning_rate": 0.002, "loss": 2.5732, "step": 72560 }, { "epoch": 0.1445755769475966, "grad_norm": 0.1743467003107071, "learning_rate": 0.002, "loss": 2.5907, "step": 72570 }, { "epoch": 0.1445954991712355, "grad_norm": 0.16260354220867157, "learning_rate": 0.002, "loss": 2.5991, "step": 72580 }, { "epoch": 0.1446154213948744, "grad_norm": 0.15631388127803802, "learning_rate": 0.002, "loss": 2.571, "step": 72590 }, { "epoch": 0.14463534361851332, "grad_norm": 0.16434872150421143, "learning_rate": 0.002, "loss": 2.564, "step": 72600 }, { "epoch": 0.14465526584215224, "grad_norm": 0.17342866957187653, "learning_rate": 0.002, "loss": 2.5651, "step": 72610 }, { "epoch": 0.14467518806579116, "grad_norm": 0.1548866182565689, "learning_rate": 0.002, "loss": 2.5699, "step": 72620 }, { "epoch": 0.14469511028943005, "grad_norm": 0.20836737751960754, "learning_rate": 0.002, "loss": 2.5674, "step": 72630 }, { "epoch": 0.14471503251306897, "grad_norm": 0.1537151038646698, "learning_rate": 0.002, "loss": 2.5605, "step": 72640 }, { "epoch": 0.1447349547367079, "grad_norm": 0.16368764638900757, "learning_rate": 0.002, "loss": 2.5873, "step": 72650 }, { "epoch": 0.1447548769603468, "grad_norm": 0.1541728526353836, "learning_rate": 0.002, "loss": 2.5799, "step": 72660 }, { "epoch": 0.14477479918398573, "grad_norm": 0.14336585998535156, "learning_rate": 0.002, "loss": 2.5566, "step": 72670 }, { "epoch": 0.14479472140762464, "grad_norm": 0.19220183789730072, "learning_rate": 0.002, "loss": 2.5707, "step": 72680 }, { "epoch": 0.14481464363126353, "grad_norm": 0.15362587571144104, "learning_rate": 0.002, "loss": 2.5784, "step": 72690 }, { "epoch": 0.14483456585490245, "grad_norm": 0.1606425642967224, "learning_rate": 0.002, "loss": 2.5856, "step": 72700 }, { "epoch": 0.14485448807854137, "grad_norm": 0.14107368886470795, "learning_rate": 0.002, "loss": 2.5531, "step": 72710 }, { "epoch": 0.1448744103021803, "grad_norm": 0.15776322782039642, "learning_rate": 0.002, "loss": 2.5707, "step": 72720 }, { "epoch": 0.1448943325258192, "grad_norm": 0.15850986540317535, "learning_rate": 0.002, "loss": 2.5766, "step": 72730 }, { "epoch": 0.14491425474945813, "grad_norm": 0.13776253163814545, "learning_rate": 0.002, "loss": 2.5566, "step": 72740 }, { "epoch": 0.14493417697309702, "grad_norm": 0.18935945630073547, "learning_rate": 0.002, "loss": 2.5862, "step": 72750 }, { "epoch": 0.14495409919673594, "grad_norm": 0.15516400337219238, "learning_rate": 0.002, "loss": 2.5862, "step": 72760 }, { "epoch": 0.14497402142037485, "grad_norm": 0.15876322984695435, "learning_rate": 0.002, "loss": 2.563, "step": 72770 }, { "epoch": 0.14499394364401377, "grad_norm": 0.17883576452732086, "learning_rate": 0.002, "loss": 2.5638, "step": 72780 }, { "epoch": 0.1450138658676527, "grad_norm": 0.15410543978214264, "learning_rate": 0.002, "loss": 2.556, "step": 72790 }, { "epoch": 0.1450337880912916, "grad_norm": 0.16018804907798767, "learning_rate": 0.002, "loss": 2.57, "step": 72800 }, { "epoch": 0.1450537103149305, "grad_norm": 0.1866169422864914, "learning_rate": 0.002, "loss": 2.5641, "step": 72810 }, { "epoch": 0.14507363253856942, "grad_norm": 0.16560636460781097, "learning_rate": 0.002, "loss": 2.5649, "step": 72820 }, { "epoch": 0.14509355476220834, "grad_norm": 0.15316210687160492, "learning_rate": 0.002, "loss": 2.5699, "step": 72830 }, { "epoch": 0.14511347698584726, "grad_norm": 0.20739935338497162, "learning_rate": 0.002, "loss": 2.5722, "step": 72840 }, { "epoch": 0.14513339920948617, "grad_norm": 0.16838975250720978, "learning_rate": 0.002, "loss": 2.5872, "step": 72850 }, { "epoch": 0.1451533214331251, "grad_norm": 0.1386212706565857, "learning_rate": 0.002, "loss": 2.5706, "step": 72860 }, { "epoch": 0.14517324365676398, "grad_norm": 0.2059319019317627, "learning_rate": 0.002, "loss": 2.5704, "step": 72870 }, { "epoch": 0.1451931658804029, "grad_norm": 0.14365792274475098, "learning_rate": 0.002, "loss": 2.5502, "step": 72880 }, { "epoch": 0.14521308810404182, "grad_norm": 0.1494522988796234, "learning_rate": 0.002, "loss": 2.576, "step": 72890 }, { "epoch": 0.14523301032768074, "grad_norm": 0.14463384449481964, "learning_rate": 0.002, "loss": 2.5657, "step": 72900 }, { "epoch": 0.14525293255131966, "grad_norm": 0.13890576362609863, "learning_rate": 0.002, "loss": 2.583, "step": 72910 }, { "epoch": 0.14527285477495855, "grad_norm": 0.1909000128507614, "learning_rate": 0.002, "loss": 2.5709, "step": 72920 }, { "epoch": 0.14529277699859747, "grad_norm": 0.1586645096540451, "learning_rate": 0.002, "loss": 2.5589, "step": 72930 }, { "epoch": 0.14531269922223639, "grad_norm": 0.1847989708185196, "learning_rate": 0.002, "loss": 2.5701, "step": 72940 }, { "epoch": 0.1453326214458753, "grad_norm": 0.16052204370498657, "learning_rate": 0.002, "loss": 2.5708, "step": 72950 }, { "epoch": 0.14535254366951422, "grad_norm": 0.15678413212299347, "learning_rate": 0.002, "loss": 2.5814, "step": 72960 }, { "epoch": 0.14537246589315314, "grad_norm": 0.1552082747220993, "learning_rate": 0.002, "loss": 2.5789, "step": 72970 }, { "epoch": 0.14539238811679203, "grad_norm": 0.17410671710968018, "learning_rate": 0.002, "loss": 2.5765, "step": 72980 }, { "epoch": 0.14541231034043095, "grad_norm": 0.1650477796792984, "learning_rate": 0.002, "loss": 2.5685, "step": 72990 }, { "epoch": 0.14543223256406987, "grad_norm": 0.16998495161533356, "learning_rate": 0.002, "loss": 2.5788, "step": 73000 }, { "epoch": 0.1454521547877088, "grad_norm": 0.1935778260231018, "learning_rate": 0.002, "loss": 2.5845, "step": 73010 }, { "epoch": 0.1454720770113477, "grad_norm": 0.14788693189620972, "learning_rate": 0.002, "loss": 2.5717, "step": 73020 }, { "epoch": 0.14549199923498662, "grad_norm": 0.1458928883075714, "learning_rate": 0.002, "loss": 2.5795, "step": 73030 }, { "epoch": 0.14551192145862551, "grad_norm": 0.23927831649780273, "learning_rate": 0.002, "loss": 2.563, "step": 73040 }, { "epoch": 0.14553184368226443, "grad_norm": 0.15333512425422668, "learning_rate": 0.002, "loss": 2.5758, "step": 73050 }, { "epoch": 0.14555176590590335, "grad_norm": 0.14875274896621704, "learning_rate": 0.002, "loss": 2.5829, "step": 73060 }, { "epoch": 0.14557168812954227, "grad_norm": 0.18429169058799744, "learning_rate": 0.002, "loss": 2.574, "step": 73070 }, { "epoch": 0.1455916103531812, "grad_norm": 0.17034657299518585, "learning_rate": 0.002, "loss": 2.5568, "step": 73080 }, { "epoch": 0.1456115325768201, "grad_norm": 0.1603582352399826, "learning_rate": 0.002, "loss": 2.5849, "step": 73090 }, { "epoch": 0.145631454800459, "grad_norm": 0.1504620909690857, "learning_rate": 0.002, "loss": 2.5636, "step": 73100 }, { "epoch": 0.14565137702409792, "grad_norm": 0.17828816175460815, "learning_rate": 0.002, "loss": 2.5786, "step": 73110 }, { "epoch": 0.14567129924773684, "grad_norm": 0.16735072433948517, "learning_rate": 0.002, "loss": 2.5628, "step": 73120 }, { "epoch": 0.14569122147137575, "grad_norm": 0.17312203347682953, "learning_rate": 0.002, "loss": 2.5703, "step": 73130 }, { "epoch": 0.14571114369501467, "grad_norm": 0.14592215418815613, "learning_rate": 0.002, "loss": 2.569, "step": 73140 }, { "epoch": 0.14573106591865356, "grad_norm": 0.1452113538980484, "learning_rate": 0.002, "loss": 2.5677, "step": 73150 }, { "epoch": 0.14575098814229248, "grad_norm": 0.16964450478553772, "learning_rate": 0.002, "loss": 2.5625, "step": 73160 }, { "epoch": 0.1457709103659314, "grad_norm": 0.15055803954601288, "learning_rate": 0.002, "loss": 2.5772, "step": 73170 }, { "epoch": 0.14579083258957032, "grad_norm": 0.21764826774597168, "learning_rate": 0.002, "loss": 2.5669, "step": 73180 }, { "epoch": 0.14581075481320924, "grad_norm": 0.14600048959255219, "learning_rate": 0.002, "loss": 2.5779, "step": 73190 }, { "epoch": 0.14583067703684816, "grad_norm": 0.18829336762428284, "learning_rate": 0.002, "loss": 2.5671, "step": 73200 }, { "epoch": 0.14585059926048705, "grad_norm": 0.16640764474868774, "learning_rate": 0.002, "loss": 2.5722, "step": 73210 }, { "epoch": 0.14587052148412596, "grad_norm": 0.15926992893218994, "learning_rate": 0.002, "loss": 2.5684, "step": 73220 }, { "epoch": 0.14589044370776488, "grad_norm": 0.15076228976249695, "learning_rate": 0.002, "loss": 2.5656, "step": 73230 }, { "epoch": 0.1459103659314038, "grad_norm": 0.15522757172584534, "learning_rate": 0.002, "loss": 2.5627, "step": 73240 }, { "epoch": 0.14593028815504272, "grad_norm": 0.153245747089386, "learning_rate": 0.002, "loss": 2.5714, "step": 73250 }, { "epoch": 0.14595021037868164, "grad_norm": 0.1576579213142395, "learning_rate": 0.002, "loss": 2.5655, "step": 73260 }, { "epoch": 0.14597013260232053, "grad_norm": 0.12977774441242218, "learning_rate": 0.002, "loss": 2.5761, "step": 73270 }, { "epoch": 0.14599005482595945, "grad_norm": 0.22722561657428741, "learning_rate": 0.002, "loss": 2.5715, "step": 73280 }, { "epoch": 0.14600997704959837, "grad_norm": 0.1500042974948883, "learning_rate": 0.002, "loss": 2.5753, "step": 73290 }, { "epoch": 0.14602989927323728, "grad_norm": 0.1746789515018463, "learning_rate": 0.002, "loss": 2.5734, "step": 73300 }, { "epoch": 0.1460498214968762, "grad_norm": 0.15826602280139923, "learning_rate": 0.002, "loss": 2.5703, "step": 73310 }, { "epoch": 0.14606974372051512, "grad_norm": 0.1830594837665558, "learning_rate": 0.002, "loss": 2.5909, "step": 73320 }, { "epoch": 0.146089665944154, "grad_norm": 0.1458112597465515, "learning_rate": 0.002, "loss": 2.5646, "step": 73330 }, { "epoch": 0.14610958816779293, "grad_norm": 0.159394308924675, "learning_rate": 0.002, "loss": 2.563, "step": 73340 }, { "epoch": 0.14612951039143185, "grad_norm": 0.16969257593154907, "learning_rate": 0.002, "loss": 2.5668, "step": 73350 }, { "epoch": 0.14614943261507077, "grad_norm": 0.1373578906059265, "learning_rate": 0.002, "loss": 2.5837, "step": 73360 }, { "epoch": 0.1461693548387097, "grad_norm": 0.18574222922325134, "learning_rate": 0.002, "loss": 2.5771, "step": 73370 }, { "epoch": 0.1461892770623486, "grad_norm": 0.18405477702617645, "learning_rate": 0.002, "loss": 2.5812, "step": 73380 }, { "epoch": 0.1462091992859875, "grad_norm": 0.16400249302387238, "learning_rate": 0.002, "loss": 2.5496, "step": 73390 }, { "epoch": 0.14622912150962641, "grad_norm": 0.14184880256652832, "learning_rate": 0.002, "loss": 2.5611, "step": 73400 }, { "epoch": 0.14624904373326533, "grad_norm": 0.17228059470653534, "learning_rate": 0.002, "loss": 2.5757, "step": 73410 }, { "epoch": 0.14626896595690425, "grad_norm": 0.16178105771541595, "learning_rate": 0.002, "loss": 2.5944, "step": 73420 }, { "epoch": 0.14628888818054317, "grad_norm": 0.15661479532718658, "learning_rate": 0.002, "loss": 2.565, "step": 73430 }, { "epoch": 0.14630881040418206, "grad_norm": 0.1509159952402115, "learning_rate": 0.002, "loss": 2.5682, "step": 73440 }, { "epoch": 0.14632873262782098, "grad_norm": 0.18109160661697388, "learning_rate": 0.002, "loss": 2.585, "step": 73450 }, { "epoch": 0.1463486548514599, "grad_norm": 0.15568143129348755, "learning_rate": 0.002, "loss": 2.5748, "step": 73460 }, { "epoch": 0.14636857707509882, "grad_norm": 0.13907580077648163, "learning_rate": 0.002, "loss": 2.5947, "step": 73470 }, { "epoch": 0.14638849929873773, "grad_norm": 0.14607353508472443, "learning_rate": 0.002, "loss": 2.5663, "step": 73480 }, { "epoch": 0.14640842152237665, "grad_norm": 0.16631627082824707, "learning_rate": 0.002, "loss": 2.5846, "step": 73490 }, { "epoch": 0.14642834374601554, "grad_norm": 0.13940463960170746, "learning_rate": 0.002, "loss": 2.5664, "step": 73500 }, { "epoch": 0.14644826596965446, "grad_norm": 0.20768043398857117, "learning_rate": 0.002, "loss": 2.5569, "step": 73510 }, { "epoch": 0.14646818819329338, "grad_norm": 0.1720007061958313, "learning_rate": 0.002, "loss": 2.5741, "step": 73520 }, { "epoch": 0.1464881104169323, "grad_norm": 0.14940840005874634, "learning_rate": 0.002, "loss": 2.5748, "step": 73530 }, { "epoch": 0.14650803264057122, "grad_norm": 0.18222352862358093, "learning_rate": 0.002, "loss": 2.5697, "step": 73540 }, { "epoch": 0.14652795486421014, "grad_norm": 0.1757742166519165, "learning_rate": 0.002, "loss": 2.5793, "step": 73550 }, { "epoch": 0.14654787708784903, "grad_norm": 0.1463581621646881, "learning_rate": 0.002, "loss": 2.5601, "step": 73560 }, { "epoch": 0.14656779931148795, "grad_norm": 0.1347801834344864, "learning_rate": 0.002, "loss": 2.5806, "step": 73570 }, { "epoch": 0.14658772153512686, "grad_norm": 0.16178977489471436, "learning_rate": 0.002, "loss": 2.5722, "step": 73580 }, { "epoch": 0.14660764375876578, "grad_norm": 0.1757846176624298, "learning_rate": 0.002, "loss": 2.5813, "step": 73590 }, { "epoch": 0.1466275659824047, "grad_norm": 0.13094516098499298, "learning_rate": 0.002, "loss": 2.5818, "step": 73600 }, { "epoch": 0.14664748820604362, "grad_norm": 0.20809867978096008, "learning_rate": 0.002, "loss": 2.5636, "step": 73610 }, { "epoch": 0.1466674104296825, "grad_norm": 0.14995114505290985, "learning_rate": 0.002, "loss": 2.5789, "step": 73620 }, { "epoch": 0.14668733265332143, "grad_norm": 0.1435210108757019, "learning_rate": 0.002, "loss": 2.5483, "step": 73630 }, { "epoch": 0.14670725487696035, "grad_norm": 0.16594433784484863, "learning_rate": 0.002, "loss": 2.5641, "step": 73640 }, { "epoch": 0.14672717710059927, "grad_norm": 0.21031810343265533, "learning_rate": 0.002, "loss": 2.5652, "step": 73650 }, { "epoch": 0.14674709932423818, "grad_norm": 0.18007396161556244, "learning_rate": 0.002, "loss": 2.5684, "step": 73660 }, { "epoch": 0.14676702154787707, "grad_norm": 0.17211034893989563, "learning_rate": 0.002, "loss": 2.5642, "step": 73670 }, { "epoch": 0.146786943771516, "grad_norm": 0.16084451973438263, "learning_rate": 0.002, "loss": 2.5752, "step": 73680 }, { "epoch": 0.1468068659951549, "grad_norm": 0.17919108271598816, "learning_rate": 0.002, "loss": 2.5646, "step": 73690 }, { "epoch": 0.14682678821879383, "grad_norm": 0.14144635200500488, "learning_rate": 0.002, "loss": 2.5523, "step": 73700 }, { "epoch": 0.14684671044243275, "grad_norm": 0.1318414807319641, "learning_rate": 0.002, "loss": 2.5515, "step": 73710 }, { "epoch": 0.14686663266607167, "grad_norm": 0.14941202104091644, "learning_rate": 0.002, "loss": 2.5683, "step": 73720 }, { "epoch": 0.14688655488971056, "grad_norm": 0.15138734877109528, "learning_rate": 0.002, "loss": 2.5693, "step": 73730 }, { "epoch": 0.14690647711334948, "grad_norm": 0.16954845190048218, "learning_rate": 0.002, "loss": 2.5643, "step": 73740 }, { "epoch": 0.1469263993369884, "grad_norm": 0.14746971428394318, "learning_rate": 0.002, "loss": 2.5724, "step": 73750 }, { "epoch": 0.1469463215606273, "grad_norm": 0.18690907955169678, "learning_rate": 0.002, "loss": 2.5797, "step": 73760 }, { "epoch": 0.14696624378426623, "grad_norm": 0.18786020576953888, "learning_rate": 0.002, "loss": 2.5636, "step": 73770 }, { "epoch": 0.14698616600790515, "grad_norm": 0.16923944652080536, "learning_rate": 0.002, "loss": 2.5742, "step": 73780 }, { "epoch": 0.14700608823154404, "grad_norm": 0.12818703055381775, "learning_rate": 0.002, "loss": 2.5654, "step": 73790 }, { "epoch": 0.14702601045518296, "grad_norm": 0.15424363315105438, "learning_rate": 0.002, "loss": 2.5671, "step": 73800 }, { "epoch": 0.14704593267882188, "grad_norm": 0.1341468244791031, "learning_rate": 0.002, "loss": 2.5876, "step": 73810 }, { "epoch": 0.1470658549024608, "grad_norm": 0.19118696451187134, "learning_rate": 0.002, "loss": 2.5773, "step": 73820 }, { "epoch": 0.14708577712609971, "grad_norm": 0.1603936403989792, "learning_rate": 0.002, "loss": 2.5863, "step": 73830 }, { "epoch": 0.14710569934973863, "grad_norm": 0.13801392912864685, "learning_rate": 0.002, "loss": 2.5672, "step": 73840 }, { "epoch": 0.14712562157337752, "grad_norm": 0.17924581468105316, "learning_rate": 0.002, "loss": 2.5788, "step": 73850 }, { "epoch": 0.14714554379701644, "grad_norm": 0.1618177890777588, "learning_rate": 0.002, "loss": 2.5691, "step": 73860 }, { "epoch": 0.14716546602065536, "grad_norm": 0.1317095309495926, "learning_rate": 0.002, "loss": 2.5575, "step": 73870 }, { "epoch": 0.14718538824429428, "grad_norm": 0.1846734881401062, "learning_rate": 0.002, "loss": 2.5692, "step": 73880 }, { "epoch": 0.1472053104679332, "grad_norm": 0.1802176684141159, "learning_rate": 0.002, "loss": 2.5717, "step": 73890 }, { "epoch": 0.1472252326915721, "grad_norm": 0.15747882425785065, "learning_rate": 0.002, "loss": 2.5721, "step": 73900 }, { "epoch": 0.147245154915211, "grad_norm": 0.21211287379264832, "learning_rate": 0.002, "loss": 2.5633, "step": 73910 }, { "epoch": 0.14726507713884993, "grad_norm": 0.15733148157596588, "learning_rate": 0.002, "loss": 2.5873, "step": 73920 }, { "epoch": 0.14728499936248884, "grad_norm": 0.14549443125724792, "learning_rate": 0.002, "loss": 2.5803, "step": 73930 }, { "epoch": 0.14730492158612776, "grad_norm": 0.14196422696113586, "learning_rate": 0.002, "loss": 2.5616, "step": 73940 }, { "epoch": 0.14732484380976668, "grad_norm": 0.15751934051513672, "learning_rate": 0.002, "loss": 2.5729, "step": 73950 }, { "epoch": 0.14734476603340557, "grad_norm": 0.16086609661579132, "learning_rate": 0.002, "loss": 2.5769, "step": 73960 }, { "epoch": 0.1473646882570445, "grad_norm": 0.1820647120475769, "learning_rate": 0.002, "loss": 2.5613, "step": 73970 }, { "epoch": 0.1473846104806834, "grad_norm": 0.16875198483467102, "learning_rate": 0.002, "loss": 2.5727, "step": 73980 }, { "epoch": 0.14740453270432233, "grad_norm": 0.1554860919713974, "learning_rate": 0.002, "loss": 2.5501, "step": 73990 }, { "epoch": 0.14742445492796125, "grad_norm": 0.15655255317687988, "learning_rate": 0.002, "loss": 2.5688, "step": 74000 }, { "epoch": 0.14744437715160016, "grad_norm": 0.16953568160533905, "learning_rate": 0.002, "loss": 2.5693, "step": 74010 }, { "epoch": 0.14746429937523906, "grad_norm": 0.15716680884361267, "learning_rate": 0.002, "loss": 2.569, "step": 74020 }, { "epoch": 0.14748422159887797, "grad_norm": 0.19527944922447205, "learning_rate": 0.002, "loss": 2.5643, "step": 74030 }, { "epoch": 0.1475041438225169, "grad_norm": 0.158710315823555, "learning_rate": 0.002, "loss": 2.5596, "step": 74040 }, { "epoch": 0.1475240660461558, "grad_norm": 0.15494011342525482, "learning_rate": 0.002, "loss": 2.5644, "step": 74050 }, { "epoch": 0.14754398826979473, "grad_norm": 0.15808187425136566, "learning_rate": 0.002, "loss": 2.5776, "step": 74060 }, { "epoch": 0.14756391049343365, "grad_norm": 0.16516894102096558, "learning_rate": 0.002, "loss": 2.5739, "step": 74070 }, { "epoch": 0.14758383271707254, "grad_norm": 0.14812615513801575, "learning_rate": 0.002, "loss": 2.5682, "step": 74080 }, { "epoch": 0.14760375494071146, "grad_norm": 0.2111034244298935, "learning_rate": 0.002, "loss": 2.5794, "step": 74090 }, { "epoch": 0.14762367716435038, "grad_norm": 0.17232045531272888, "learning_rate": 0.002, "loss": 2.566, "step": 74100 }, { "epoch": 0.1476435993879893, "grad_norm": 0.14414356648921967, "learning_rate": 0.002, "loss": 2.5763, "step": 74110 }, { "epoch": 0.1476635216116282, "grad_norm": 0.15661795437335968, "learning_rate": 0.002, "loss": 2.5702, "step": 74120 }, { "epoch": 0.14768344383526713, "grad_norm": 0.16868281364440918, "learning_rate": 0.002, "loss": 2.5895, "step": 74130 }, { "epoch": 0.14770336605890602, "grad_norm": 0.16127659380435944, "learning_rate": 0.002, "loss": 2.5784, "step": 74140 }, { "epoch": 0.14772328828254494, "grad_norm": 0.1628182828426361, "learning_rate": 0.002, "loss": 2.561, "step": 74150 }, { "epoch": 0.14774321050618386, "grad_norm": 0.25608399510383606, "learning_rate": 0.002, "loss": 2.5724, "step": 74160 }, { "epoch": 0.14776313272982278, "grad_norm": 0.14735540747642517, "learning_rate": 0.002, "loss": 2.5734, "step": 74170 }, { "epoch": 0.1477830549534617, "grad_norm": 0.13863666355609894, "learning_rate": 0.002, "loss": 2.571, "step": 74180 }, { "epoch": 0.1478029771771006, "grad_norm": 0.1651427298784256, "learning_rate": 0.002, "loss": 2.5758, "step": 74190 }, { "epoch": 0.1478228994007395, "grad_norm": 0.14892122149467468, "learning_rate": 0.002, "loss": 2.5641, "step": 74200 }, { "epoch": 0.14784282162437842, "grad_norm": 0.1539974957704544, "learning_rate": 0.002, "loss": 2.5851, "step": 74210 }, { "epoch": 0.14786274384801734, "grad_norm": 0.15537747740745544, "learning_rate": 0.002, "loss": 2.5445, "step": 74220 }, { "epoch": 0.14788266607165626, "grad_norm": 0.170095294713974, "learning_rate": 0.002, "loss": 2.5765, "step": 74230 }, { "epoch": 0.14790258829529518, "grad_norm": 0.2300211787223816, "learning_rate": 0.002, "loss": 2.5694, "step": 74240 }, { "epoch": 0.14792251051893407, "grad_norm": 0.13694804906845093, "learning_rate": 0.002, "loss": 2.589, "step": 74250 }, { "epoch": 0.147942432742573, "grad_norm": 0.16314974427223206, "learning_rate": 0.002, "loss": 2.5716, "step": 74260 }, { "epoch": 0.1479623549662119, "grad_norm": 0.15033622086048126, "learning_rate": 0.002, "loss": 2.58, "step": 74270 }, { "epoch": 0.14798227718985082, "grad_norm": 0.16837725043296814, "learning_rate": 0.002, "loss": 2.5797, "step": 74280 }, { "epoch": 0.14800219941348974, "grad_norm": 0.15278765559196472, "learning_rate": 0.002, "loss": 2.5559, "step": 74290 }, { "epoch": 0.14802212163712866, "grad_norm": 0.13382337987422943, "learning_rate": 0.002, "loss": 2.5838, "step": 74300 }, { "epoch": 0.14804204386076755, "grad_norm": 0.13911810517311096, "learning_rate": 0.002, "loss": 2.5768, "step": 74310 }, { "epoch": 0.14806196608440647, "grad_norm": 0.16128885746002197, "learning_rate": 0.002, "loss": 2.5643, "step": 74320 }, { "epoch": 0.1480818883080454, "grad_norm": 0.17241111397743225, "learning_rate": 0.002, "loss": 2.5792, "step": 74330 }, { "epoch": 0.1481018105316843, "grad_norm": 0.1597806215286255, "learning_rate": 0.002, "loss": 2.5763, "step": 74340 }, { "epoch": 0.14812173275532323, "grad_norm": 0.16300857067108154, "learning_rate": 0.002, "loss": 2.5737, "step": 74350 }, { "epoch": 0.14814165497896215, "grad_norm": 0.24546656012535095, "learning_rate": 0.002, "loss": 2.5859, "step": 74360 }, { "epoch": 0.14816157720260104, "grad_norm": 0.1695651262998581, "learning_rate": 0.002, "loss": 2.5675, "step": 74370 }, { "epoch": 0.14818149942623995, "grad_norm": 0.15012304484844208, "learning_rate": 0.002, "loss": 2.5654, "step": 74380 }, { "epoch": 0.14820142164987887, "grad_norm": 0.14712686836719513, "learning_rate": 0.002, "loss": 2.5863, "step": 74390 }, { "epoch": 0.1482213438735178, "grad_norm": 0.17515714466571808, "learning_rate": 0.002, "loss": 2.5688, "step": 74400 }, { "epoch": 0.1482412660971567, "grad_norm": 0.16534557938575745, "learning_rate": 0.002, "loss": 2.5828, "step": 74410 }, { "epoch": 0.1482611883207956, "grad_norm": 0.18403786420822144, "learning_rate": 0.002, "loss": 2.5637, "step": 74420 }, { "epoch": 0.14828111054443452, "grad_norm": 0.16270236670970917, "learning_rate": 0.002, "loss": 2.5904, "step": 74430 }, { "epoch": 0.14830103276807344, "grad_norm": 0.19745421409606934, "learning_rate": 0.002, "loss": 2.5779, "step": 74440 }, { "epoch": 0.14832095499171236, "grad_norm": 0.14153361320495605, "learning_rate": 0.002, "loss": 2.5817, "step": 74450 }, { "epoch": 0.14834087721535127, "grad_norm": 0.17637157440185547, "learning_rate": 0.002, "loss": 2.5572, "step": 74460 }, { "epoch": 0.1483607994389902, "grad_norm": 0.16605636477470398, "learning_rate": 0.002, "loss": 2.5688, "step": 74470 }, { "epoch": 0.14838072166262908, "grad_norm": 0.18579739332199097, "learning_rate": 0.002, "loss": 2.5637, "step": 74480 }, { "epoch": 0.148400643886268, "grad_norm": 0.1761472523212433, "learning_rate": 0.002, "loss": 2.574, "step": 74490 }, { "epoch": 0.14842056610990692, "grad_norm": 0.17662514746189117, "learning_rate": 0.002, "loss": 2.5612, "step": 74500 }, { "epoch": 0.14844048833354584, "grad_norm": 0.18232619762420654, "learning_rate": 0.002, "loss": 2.567, "step": 74510 }, { "epoch": 0.14846041055718476, "grad_norm": 0.1612139344215393, "learning_rate": 0.002, "loss": 2.5853, "step": 74520 }, { "epoch": 0.14848033278082368, "grad_norm": 0.1534319967031479, "learning_rate": 0.002, "loss": 2.5787, "step": 74530 }, { "epoch": 0.14850025500446257, "grad_norm": 0.17106257379055023, "learning_rate": 0.002, "loss": 2.5778, "step": 74540 }, { "epoch": 0.14852017722810149, "grad_norm": 0.19660061597824097, "learning_rate": 0.002, "loss": 2.559, "step": 74550 }, { "epoch": 0.1485400994517404, "grad_norm": 0.13459281623363495, "learning_rate": 0.002, "loss": 2.5749, "step": 74560 }, { "epoch": 0.14856002167537932, "grad_norm": 0.16146038472652435, "learning_rate": 0.002, "loss": 2.5619, "step": 74570 }, { "epoch": 0.14857994389901824, "grad_norm": 0.12869702279567719, "learning_rate": 0.002, "loss": 2.5671, "step": 74580 }, { "epoch": 0.14859986612265716, "grad_norm": 0.14354431629180908, "learning_rate": 0.002, "loss": 2.5681, "step": 74590 }, { "epoch": 0.14861978834629605, "grad_norm": 0.159704327583313, "learning_rate": 0.002, "loss": 2.5718, "step": 74600 }, { "epoch": 0.14863971056993497, "grad_norm": 0.14169737696647644, "learning_rate": 0.002, "loss": 2.5808, "step": 74610 }, { "epoch": 0.1486596327935739, "grad_norm": 0.1954466849565506, "learning_rate": 0.002, "loss": 2.5719, "step": 74620 }, { "epoch": 0.1486795550172128, "grad_norm": 0.16861040890216827, "learning_rate": 0.002, "loss": 2.5807, "step": 74630 }, { "epoch": 0.14869947724085172, "grad_norm": 0.15895840525627136, "learning_rate": 0.002, "loss": 2.5783, "step": 74640 }, { "epoch": 0.14871939946449061, "grad_norm": 0.15640407800674438, "learning_rate": 0.002, "loss": 2.5707, "step": 74650 }, { "epoch": 0.14873932168812953, "grad_norm": 0.16537141799926758, "learning_rate": 0.002, "loss": 2.5684, "step": 74660 }, { "epoch": 0.14875924391176845, "grad_norm": 0.15388263761997223, "learning_rate": 0.002, "loss": 2.565, "step": 74670 }, { "epoch": 0.14877916613540737, "grad_norm": 0.1943850964307785, "learning_rate": 0.002, "loss": 2.5582, "step": 74680 }, { "epoch": 0.1487990883590463, "grad_norm": 0.16132843494415283, "learning_rate": 0.002, "loss": 2.5687, "step": 74690 }, { "epoch": 0.1488190105826852, "grad_norm": 0.15792331099510193, "learning_rate": 0.002, "loss": 2.5771, "step": 74700 }, { "epoch": 0.1488389328063241, "grad_norm": 0.16486845910549164, "learning_rate": 0.002, "loss": 2.5715, "step": 74710 }, { "epoch": 0.14885885502996302, "grad_norm": 0.1667107492685318, "learning_rate": 0.002, "loss": 2.5564, "step": 74720 }, { "epoch": 0.14887877725360193, "grad_norm": 0.14667952060699463, "learning_rate": 0.002, "loss": 2.5736, "step": 74730 }, { "epoch": 0.14889869947724085, "grad_norm": 0.1385408639907837, "learning_rate": 0.002, "loss": 2.5921, "step": 74740 }, { "epoch": 0.14891862170087977, "grad_norm": 0.1746615767478943, "learning_rate": 0.002, "loss": 2.5732, "step": 74750 }, { "epoch": 0.1489385439245187, "grad_norm": 0.17173385620117188, "learning_rate": 0.002, "loss": 2.5745, "step": 74760 }, { "epoch": 0.14895846614815758, "grad_norm": 0.180008664727211, "learning_rate": 0.002, "loss": 2.5548, "step": 74770 }, { "epoch": 0.1489783883717965, "grad_norm": 0.15472400188446045, "learning_rate": 0.002, "loss": 2.5734, "step": 74780 }, { "epoch": 0.14899831059543542, "grad_norm": 0.14180588722229004, "learning_rate": 0.002, "loss": 2.5752, "step": 74790 }, { "epoch": 0.14901823281907434, "grad_norm": 0.16120962798595428, "learning_rate": 0.002, "loss": 2.5713, "step": 74800 }, { "epoch": 0.14903815504271326, "grad_norm": 0.1446605771780014, "learning_rate": 0.002, "loss": 2.5643, "step": 74810 }, { "epoch": 0.14905807726635217, "grad_norm": 0.14421139657497406, "learning_rate": 0.002, "loss": 2.5646, "step": 74820 }, { "epoch": 0.14907799948999106, "grad_norm": 0.16279995441436768, "learning_rate": 0.002, "loss": 2.5673, "step": 74830 }, { "epoch": 0.14909792171362998, "grad_norm": 0.17126880586147308, "learning_rate": 0.002, "loss": 2.5798, "step": 74840 }, { "epoch": 0.1491178439372689, "grad_norm": 0.16951824724674225, "learning_rate": 0.002, "loss": 2.578, "step": 74850 }, { "epoch": 0.14913776616090782, "grad_norm": 0.16824452579021454, "learning_rate": 0.002, "loss": 2.5672, "step": 74860 }, { "epoch": 0.14915768838454674, "grad_norm": 0.1920049488544464, "learning_rate": 0.002, "loss": 2.5867, "step": 74870 }, { "epoch": 0.14917761060818566, "grad_norm": 0.1460687518119812, "learning_rate": 0.002, "loss": 2.5842, "step": 74880 }, { "epoch": 0.14919753283182455, "grad_norm": 0.1490045189857483, "learning_rate": 0.002, "loss": 2.5732, "step": 74890 }, { "epoch": 0.14921745505546347, "grad_norm": 0.14897264540195465, "learning_rate": 0.002, "loss": 2.5732, "step": 74900 }, { "epoch": 0.14923737727910238, "grad_norm": 0.16578581929206848, "learning_rate": 0.002, "loss": 2.5759, "step": 74910 }, { "epoch": 0.1492572995027413, "grad_norm": 0.1489747315645218, "learning_rate": 0.002, "loss": 2.5652, "step": 74920 }, { "epoch": 0.14927722172638022, "grad_norm": 0.16184571385383606, "learning_rate": 0.002, "loss": 2.563, "step": 74930 }, { "epoch": 0.1492971439500191, "grad_norm": 0.16260521113872528, "learning_rate": 0.002, "loss": 2.5633, "step": 74940 }, { "epoch": 0.14931706617365803, "grad_norm": 0.17067953944206238, "learning_rate": 0.002, "loss": 2.5821, "step": 74950 }, { "epoch": 0.14933698839729695, "grad_norm": 0.14901623129844666, "learning_rate": 0.002, "loss": 2.5822, "step": 74960 }, { "epoch": 0.14935691062093587, "grad_norm": 0.1998402327299118, "learning_rate": 0.002, "loss": 2.5694, "step": 74970 }, { "epoch": 0.1493768328445748, "grad_norm": 0.17796842753887177, "learning_rate": 0.002, "loss": 2.5656, "step": 74980 }, { "epoch": 0.1493967550682137, "grad_norm": 0.20457038283348083, "learning_rate": 0.002, "loss": 2.5987, "step": 74990 }, { "epoch": 0.1494166772918526, "grad_norm": 0.15123598277568817, "learning_rate": 0.002, "loss": 2.5729, "step": 75000 }, { "epoch": 0.14943659951549151, "grad_norm": 0.1666271835565567, "learning_rate": 0.002, "loss": 2.5629, "step": 75010 }, { "epoch": 0.14945652173913043, "grad_norm": 0.19641166925430298, "learning_rate": 0.002, "loss": 2.5607, "step": 75020 }, { "epoch": 0.14947644396276935, "grad_norm": 0.1362026333808899, "learning_rate": 0.002, "loss": 2.5677, "step": 75030 }, { "epoch": 0.14949636618640827, "grad_norm": 0.19994311034679413, "learning_rate": 0.002, "loss": 2.5822, "step": 75040 }, { "epoch": 0.1495162884100472, "grad_norm": 0.15790128707885742, "learning_rate": 0.002, "loss": 2.5513, "step": 75050 }, { "epoch": 0.14953621063368608, "grad_norm": 0.14132322371006012, "learning_rate": 0.002, "loss": 2.5757, "step": 75060 }, { "epoch": 0.149556132857325, "grad_norm": 0.1835366189479828, "learning_rate": 0.002, "loss": 2.5694, "step": 75070 }, { "epoch": 0.14957605508096392, "grad_norm": 0.1539289802312851, "learning_rate": 0.002, "loss": 2.5571, "step": 75080 }, { "epoch": 0.14959597730460283, "grad_norm": 0.131785050034523, "learning_rate": 0.002, "loss": 2.5558, "step": 75090 }, { "epoch": 0.14961589952824175, "grad_norm": 0.17638984322547913, "learning_rate": 0.002, "loss": 2.5648, "step": 75100 }, { "epoch": 0.14963582175188067, "grad_norm": 0.1722584217786789, "learning_rate": 0.002, "loss": 2.5887, "step": 75110 }, { "epoch": 0.14965574397551956, "grad_norm": 0.1409997195005417, "learning_rate": 0.002, "loss": 2.5715, "step": 75120 }, { "epoch": 0.14967566619915848, "grad_norm": 0.146687850356102, "learning_rate": 0.002, "loss": 2.5757, "step": 75130 }, { "epoch": 0.1496955884227974, "grad_norm": 0.18599283695220947, "learning_rate": 0.002, "loss": 2.5707, "step": 75140 }, { "epoch": 0.14971551064643632, "grad_norm": 0.15907998383045197, "learning_rate": 0.002, "loss": 2.5628, "step": 75150 }, { "epoch": 0.14973543287007524, "grad_norm": 0.17892400920391083, "learning_rate": 0.002, "loss": 2.5759, "step": 75160 }, { "epoch": 0.14975535509371413, "grad_norm": 0.18868154287338257, "learning_rate": 0.002, "loss": 2.5811, "step": 75170 }, { "epoch": 0.14977527731735304, "grad_norm": 0.16043055057525635, "learning_rate": 0.002, "loss": 2.5662, "step": 75180 }, { "epoch": 0.14979519954099196, "grad_norm": 0.1551884412765503, "learning_rate": 0.002, "loss": 2.5622, "step": 75190 }, { "epoch": 0.14981512176463088, "grad_norm": 0.14529123902320862, "learning_rate": 0.002, "loss": 2.5561, "step": 75200 }, { "epoch": 0.1498350439882698, "grad_norm": 0.1646806001663208, "learning_rate": 0.002, "loss": 2.5622, "step": 75210 }, { "epoch": 0.14985496621190872, "grad_norm": 0.15620030462741852, "learning_rate": 0.002, "loss": 2.5708, "step": 75220 }, { "epoch": 0.1498748884355476, "grad_norm": 0.1390834003686905, "learning_rate": 0.002, "loss": 2.5586, "step": 75230 }, { "epoch": 0.14989481065918653, "grad_norm": 0.20051546394824982, "learning_rate": 0.002, "loss": 2.5949, "step": 75240 }, { "epoch": 0.14991473288282545, "grad_norm": 0.16268466413021088, "learning_rate": 0.002, "loss": 2.5761, "step": 75250 }, { "epoch": 0.14993465510646437, "grad_norm": 0.14517004787921906, "learning_rate": 0.002, "loss": 2.5751, "step": 75260 }, { "epoch": 0.14995457733010328, "grad_norm": 0.16413669288158417, "learning_rate": 0.002, "loss": 2.5788, "step": 75270 }, { "epoch": 0.1499744995537422, "grad_norm": 0.15045872330665588, "learning_rate": 0.002, "loss": 2.5849, "step": 75280 }, { "epoch": 0.1499944217773811, "grad_norm": 0.19099180400371552, "learning_rate": 0.002, "loss": 2.5817, "step": 75290 }, { "epoch": 0.15001434400102, "grad_norm": 0.18578527867794037, "learning_rate": 0.002, "loss": 2.5722, "step": 75300 }, { "epoch": 0.15003426622465893, "grad_norm": 0.13953934609889984, "learning_rate": 0.002, "loss": 2.5762, "step": 75310 }, { "epoch": 0.15005418844829785, "grad_norm": 0.18928119540214539, "learning_rate": 0.002, "loss": 2.5786, "step": 75320 }, { "epoch": 0.15007411067193677, "grad_norm": 0.1452006995677948, "learning_rate": 0.002, "loss": 2.5636, "step": 75330 }, { "epoch": 0.15009403289557569, "grad_norm": 0.17648398876190186, "learning_rate": 0.002, "loss": 2.5665, "step": 75340 }, { "epoch": 0.15011395511921458, "grad_norm": 0.16955478489398956, "learning_rate": 0.002, "loss": 2.5712, "step": 75350 }, { "epoch": 0.1501338773428535, "grad_norm": 0.1846725046634674, "learning_rate": 0.002, "loss": 2.5756, "step": 75360 }, { "epoch": 0.1501537995664924, "grad_norm": 0.2127208709716797, "learning_rate": 0.002, "loss": 2.5692, "step": 75370 }, { "epoch": 0.15017372179013133, "grad_norm": 0.17696444690227509, "learning_rate": 0.002, "loss": 2.5641, "step": 75380 }, { "epoch": 0.15019364401377025, "grad_norm": 0.17373791337013245, "learning_rate": 0.002, "loss": 2.5702, "step": 75390 }, { "epoch": 0.15021356623740917, "grad_norm": 0.17402975261211395, "learning_rate": 0.002, "loss": 2.5674, "step": 75400 }, { "epoch": 0.15023348846104806, "grad_norm": 0.20731164515018463, "learning_rate": 0.002, "loss": 2.559, "step": 75410 }, { "epoch": 0.15025341068468698, "grad_norm": 0.14472341537475586, "learning_rate": 0.002, "loss": 2.5539, "step": 75420 }, { "epoch": 0.1502733329083259, "grad_norm": 0.17734825611114502, "learning_rate": 0.002, "loss": 2.5719, "step": 75430 }, { "epoch": 0.15029325513196481, "grad_norm": 0.14691025018692017, "learning_rate": 0.002, "loss": 2.5698, "step": 75440 }, { "epoch": 0.15031317735560373, "grad_norm": 0.13720431923866272, "learning_rate": 0.002, "loss": 2.5699, "step": 75450 }, { "epoch": 0.15033309957924262, "grad_norm": 0.15889699757099152, "learning_rate": 0.002, "loss": 2.5616, "step": 75460 }, { "epoch": 0.15035302180288154, "grad_norm": 0.1508512943983078, "learning_rate": 0.002, "loss": 2.5691, "step": 75470 }, { "epoch": 0.15037294402652046, "grad_norm": 0.16923345625400543, "learning_rate": 0.002, "loss": 2.5761, "step": 75480 }, { "epoch": 0.15039286625015938, "grad_norm": 0.16814304888248444, "learning_rate": 0.002, "loss": 2.5445, "step": 75490 }, { "epoch": 0.1504127884737983, "grad_norm": 0.17047260701656342, "learning_rate": 0.002, "loss": 2.571, "step": 75500 }, { "epoch": 0.15043271069743722, "grad_norm": 0.14480046927928925, "learning_rate": 0.002, "loss": 2.5767, "step": 75510 }, { "epoch": 0.1504526329210761, "grad_norm": 0.12686122953891754, "learning_rate": 0.002, "loss": 2.5747, "step": 75520 }, { "epoch": 0.15047255514471503, "grad_norm": 0.15119539201259613, "learning_rate": 0.002, "loss": 2.5594, "step": 75530 }, { "epoch": 0.15049247736835394, "grad_norm": 0.14327819645404816, "learning_rate": 0.002, "loss": 2.5506, "step": 75540 }, { "epoch": 0.15051239959199286, "grad_norm": 0.19953858852386475, "learning_rate": 0.002, "loss": 2.5716, "step": 75550 }, { "epoch": 0.15053232181563178, "grad_norm": 0.14768588542938232, "learning_rate": 0.002, "loss": 2.5743, "step": 75560 }, { "epoch": 0.1505522440392707, "grad_norm": 0.16677270829677582, "learning_rate": 0.002, "loss": 2.5546, "step": 75570 }, { "epoch": 0.1505721662629096, "grad_norm": 0.14059798419475555, "learning_rate": 0.002, "loss": 2.5531, "step": 75580 }, { "epoch": 0.1505920884865485, "grad_norm": 0.20091426372528076, "learning_rate": 0.002, "loss": 2.5641, "step": 75590 }, { "epoch": 0.15061201071018743, "grad_norm": 0.14358964562416077, "learning_rate": 0.002, "loss": 2.5584, "step": 75600 }, { "epoch": 0.15063193293382635, "grad_norm": 0.17256507277488708, "learning_rate": 0.002, "loss": 2.5597, "step": 75610 }, { "epoch": 0.15065185515746526, "grad_norm": 0.18000386655330658, "learning_rate": 0.002, "loss": 2.5659, "step": 75620 }, { "epoch": 0.15067177738110418, "grad_norm": 0.15690070390701294, "learning_rate": 0.002, "loss": 2.5719, "step": 75630 }, { "epoch": 0.15069169960474307, "grad_norm": 0.14711758494377136, "learning_rate": 0.002, "loss": 2.5692, "step": 75640 }, { "epoch": 0.150711621828382, "grad_norm": 0.14434829354286194, "learning_rate": 0.002, "loss": 2.58, "step": 75650 }, { "epoch": 0.1507315440520209, "grad_norm": 0.17093276977539062, "learning_rate": 0.002, "loss": 2.5804, "step": 75660 }, { "epoch": 0.15075146627565983, "grad_norm": 0.19949503242969513, "learning_rate": 0.002, "loss": 2.5663, "step": 75670 }, { "epoch": 0.15077138849929875, "grad_norm": 0.16073964536190033, "learning_rate": 0.002, "loss": 2.5751, "step": 75680 }, { "epoch": 0.15079131072293764, "grad_norm": 0.16527464985847473, "learning_rate": 0.002, "loss": 2.6031, "step": 75690 }, { "epoch": 0.15081123294657656, "grad_norm": 0.20376276969909668, "learning_rate": 0.002, "loss": 2.5681, "step": 75700 }, { "epoch": 0.15083115517021548, "grad_norm": 0.18202507495880127, "learning_rate": 0.002, "loss": 2.57, "step": 75710 }, { "epoch": 0.1508510773938544, "grad_norm": 0.17030276358127594, "learning_rate": 0.002, "loss": 2.5677, "step": 75720 }, { "epoch": 0.1508709996174933, "grad_norm": 0.17332129180431366, "learning_rate": 0.002, "loss": 2.569, "step": 75730 }, { "epoch": 0.15089092184113223, "grad_norm": 0.1892944574356079, "learning_rate": 0.002, "loss": 2.569, "step": 75740 }, { "epoch": 0.15091084406477112, "grad_norm": 0.1578553318977356, "learning_rate": 0.002, "loss": 2.5848, "step": 75750 }, { "epoch": 0.15093076628841004, "grad_norm": 0.14366045594215393, "learning_rate": 0.002, "loss": 2.5768, "step": 75760 }, { "epoch": 0.15095068851204896, "grad_norm": 0.21200795471668243, "learning_rate": 0.002, "loss": 2.5746, "step": 75770 }, { "epoch": 0.15097061073568788, "grad_norm": 0.14229445159435272, "learning_rate": 0.002, "loss": 2.5715, "step": 75780 }, { "epoch": 0.1509905329593268, "grad_norm": 0.17994888126850128, "learning_rate": 0.002, "loss": 2.5549, "step": 75790 }, { "epoch": 0.1510104551829657, "grad_norm": 0.19947992265224457, "learning_rate": 0.002, "loss": 2.5676, "step": 75800 }, { "epoch": 0.1510303774066046, "grad_norm": 0.16223570704460144, "learning_rate": 0.002, "loss": 2.5895, "step": 75810 }, { "epoch": 0.15105029963024352, "grad_norm": 0.17103558778762817, "learning_rate": 0.002, "loss": 2.5657, "step": 75820 }, { "epoch": 0.15107022185388244, "grad_norm": 0.13363035023212433, "learning_rate": 0.002, "loss": 2.575, "step": 75830 }, { "epoch": 0.15109014407752136, "grad_norm": 0.23956933617591858, "learning_rate": 0.002, "loss": 2.5751, "step": 75840 }, { "epoch": 0.15111006630116028, "grad_norm": 0.15830335021018982, "learning_rate": 0.002, "loss": 2.5634, "step": 75850 }, { "epoch": 0.1511299885247992, "grad_norm": 0.16033081710338593, "learning_rate": 0.002, "loss": 2.5788, "step": 75860 }, { "epoch": 0.1511499107484381, "grad_norm": 0.1646009087562561, "learning_rate": 0.002, "loss": 2.5714, "step": 75870 }, { "epoch": 0.151169832972077, "grad_norm": 0.17580056190490723, "learning_rate": 0.002, "loss": 2.5735, "step": 75880 }, { "epoch": 0.15118975519571592, "grad_norm": 0.1707291603088379, "learning_rate": 0.002, "loss": 2.5524, "step": 75890 }, { "epoch": 0.15120967741935484, "grad_norm": 0.22763843834400177, "learning_rate": 0.002, "loss": 2.5798, "step": 75900 }, { "epoch": 0.15122959964299376, "grad_norm": 0.14336179196834564, "learning_rate": 0.002, "loss": 2.5736, "step": 75910 }, { "epoch": 0.15124952186663265, "grad_norm": 0.16630247235298157, "learning_rate": 0.002, "loss": 2.5698, "step": 75920 }, { "epoch": 0.15126944409027157, "grad_norm": 0.16589313745498657, "learning_rate": 0.002, "loss": 2.5761, "step": 75930 }, { "epoch": 0.1512893663139105, "grad_norm": 0.15258225798606873, "learning_rate": 0.002, "loss": 2.5628, "step": 75940 }, { "epoch": 0.1513092885375494, "grad_norm": 0.19897176325321198, "learning_rate": 0.002, "loss": 2.5896, "step": 75950 }, { "epoch": 0.15132921076118833, "grad_norm": 0.14485745131969452, "learning_rate": 0.002, "loss": 2.5795, "step": 75960 }, { "epoch": 0.15134913298482724, "grad_norm": 0.15711134672164917, "learning_rate": 0.002, "loss": 2.5769, "step": 75970 }, { "epoch": 0.15136905520846614, "grad_norm": 0.1538253128528595, "learning_rate": 0.002, "loss": 2.5602, "step": 75980 }, { "epoch": 0.15138897743210505, "grad_norm": 0.15953393280506134, "learning_rate": 0.002, "loss": 2.5639, "step": 75990 }, { "epoch": 0.15140889965574397, "grad_norm": 0.16443845629692078, "learning_rate": 0.002, "loss": 2.5615, "step": 76000 }, { "epoch": 0.1514288218793829, "grad_norm": 0.19432689249515533, "learning_rate": 0.002, "loss": 2.5546, "step": 76010 }, { "epoch": 0.1514487441030218, "grad_norm": 0.151634082198143, "learning_rate": 0.002, "loss": 2.5667, "step": 76020 }, { "epoch": 0.15146866632666073, "grad_norm": 0.17686501145362854, "learning_rate": 0.002, "loss": 2.5795, "step": 76030 }, { "epoch": 0.15148858855029962, "grad_norm": 0.1696094572544098, "learning_rate": 0.002, "loss": 2.5662, "step": 76040 }, { "epoch": 0.15150851077393854, "grad_norm": 0.16157586872577667, "learning_rate": 0.002, "loss": 2.585, "step": 76050 }, { "epoch": 0.15152843299757746, "grad_norm": 0.14854861795902252, "learning_rate": 0.002, "loss": 2.582, "step": 76060 }, { "epoch": 0.15154835522121637, "grad_norm": 0.17422406375408173, "learning_rate": 0.002, "loss": 2.5605, "step": 76070 }, { "epoch": 0.1515682774448553, "grad_norm": 0.18459023535251617, "learning_rate": 0.002, "loss": 2.5772, "step": 76080 }, { "epoch": 0.1515881996684942, "grad_norm": 0.17073950171470642, "learning_rate": 0.002, "loss": 2.5745, "step": 76090 }, { "epoch": 0.1516081218921331, "grad_norm": 0.153699591755867, "learning_rate": 0.002, "loss": 2.5809, "step": 76100 }, { "epoch": 0.15162804411577202, "grad_norm": 0.16811828315258026, "learning_rate": 0.002, "loss": 2.5767, "step": 76110 }, { "epoch": 0.15164796633941094, "grad_norm": 0.1591244637966156, "learning_rate": 0.002, "loss": 2.5854, "step": 76120 }, { "epoch": 0.15166788856304986, "grad_norm": 0.18733642995357513, "learning_rate": 0.002, "loss": 2.5775, "step": 76130 }, { "epoch": 0.15168781078668878, "grad_norm": 0.15185675024986267, "learning_rate": 0.002, "loss": 2.5814, "step": 76140 }, { "epoch": 0.1517077330103277, "grad_norm": 0.1483265608549118, "learning_rate": 0.002, "loss": 2.5675, "step": 76150 }, { "epoch": 0.15172765523396659, "grad_norm": 0.14864367246627808, "learning_rate": 0.002, "loss": 2.5752, "step": 76160 }, { "epoch": 0.1517475774576055, "grad_norm": 0.15654605627059937, "learning_rate": 0.002, "loss": 2.5694, "step": 76170 }, { "epoch": 0.15176749968124442, "grad_norm": 0.1575547456741333, "learning_rate": 0.002, "loss": 2.5776, "step": 76180 }, { "epoch": 0.15178742190488334, "grad_norm": 0.18541137874126434, "learning_rate": 0.002, "loss": 2.5691, "step": 76190 }, { "epoch": 0.15180734412852226, "grad_norm": 0.14781644940376282, "learning_rate": 0.002, "loss": 2.5781, "step": 76200 }, { "epoch": 0.15182726635216115, "grad_norm": 0.18757662177085876, "learning_rate": 0.002, "loss": 2.5693, "step": 76210 }, { "epoch": 0.15184718857580007, "grad_norm": 0.16218937933444977, "learning_rate": 0.002, "loss": 2.559, "step": 76220 }, { "epoch": 0.151867110799439, "grad_norm": 0.17400890588760376, "learning_rate": 0.002, "loss": 2.5792, "step": 76230 }, { "epoch": 0.1518870330230779, "grad_norm": 0.18136849999427795, "learning_rate": 0.002, "loss": 2.5733, "step": 76240 }, { "epoch": 0.15190695524671682, "grad_norm": 0.15183411538600922, "learning_rate": 0.002, "loss": 2.5601, "step": 76250 }, { "epoch": 0.15192687747035574, "grad_norm": 0.1630222201347351, "learning_rate": 0.002, "loss": 2.5694, "step": 76260 }, { "epoch": 0.15194679969399463, "grad_norm": 0.15541069209575653, "learning_rate": 0.002, "loss": 2.5954, "step": 76270 }, { "epoch": 0.15196672191763355, "grad_norm": 0.19274728000164032, "learning_rate": 0.002, "loss": 2.562, "step": 76280 }, { "epoch": 0.15198664414127247, "grad_norm": 0.14470608532428741, "learning_rate": 0.002, "loss": 2.5796, "step": 76290 }, { "epoch": 0.1520065663649114, "grad_norm": 0.1471289098262787, "learning_rate": 0.002, "loss": 2.5754, "step": 76300 }, { "epoch": 0.1520264885885503, "grad_norm": 0.14009001851081848, "learning_rate": 0.002, "loss": 2.5574, "step": 76310 }, { "epoch": 0.15204641081218923, "grad_norm": 0.1669403463602066, "learning_rate": 0.002, "loss": 2.5699, "step": 76320 }, { "epoch": 0.15206633303582812, "grad_norm": 0.16269062459468842, "learning_rate": 0.002, "loss": 2.5757, "step": 76330 }, { "epoch": 0.15208625525946703, "grad_norm": 0.1540805846452713, "learning_rate": 0.002, "loss": 2.5695, "step": 76340 }, { "epoch": 0.15210617748310595, "grad_norm": 0.16079817712306976, "learning_rate": 0.002, "loss": 2.5602, "step": 76350 }, { "epoch": 0.15212609970674487, "grad_norm": 0.16260702908039093, "learning_rate": 0.002, "loss": 2.5617, "step": 76360 }, { "epoch": 0.1521460219303838, "grad_norm": 0.1424059271812439, "learning_rate": 0.002, "loss": 2.5503, "step": 76370 }, { "epoch": 0.1521659441540227, "grad_norm": 0.23017480969429016, "learning_rate": 0.002, "loss": 2.5687, "step": 76380 }, { "epoch": 0.1521858663776616, "grad_norm": 0.149581640958786, "learning_rate": 0.002, "loss": 2.5511, "step": 76390 }, { "epoch": 0.15220578860130052, "grad_norm": 0.14122524857521057, "learning_rate": 0.002, "loss": 2.5673, "step": 76400 }, { "epoch": 0.15222571082493944, "grad_norm": 0.23869696259498596, "learning_rate": 0.002, "loss": 2.5684, "step": 76410 }, { "epoch": 0.15224563304857835, "grad_norm": 0.16174215078353882, "learning_rate": 0.002, "loss": 2.5618, "step": 76420 }, { "epoch": 0.15226555527221727, "grad_norm": 0.1780991554260254, "learning_rate": 0.002, "loss": 2.5815, "step": 76430 }, { "epoch": 0.15228547749585616, "grad_norm": 0.15951746702194214, "learning_rate": 0.002, "loss": 2.58, "step": 76440 }, { "epoch": 0.15230539971949508, "grad_norm": 0.1448291838169098, "learning_rate": 0.002, "loss": 2.5654, "step": 76450 }, { "epoch": 0.152325321943134, "grad_norm": 0.19335080683231354, "learning_rate": 0.002, "loss": 2.5636, "step": 76460 }, { "epoch": 0.15234524416677292, "grad_norm": 0.15347635746002197, "learning_rate": 0.002, "loss": 2.5726, "step": 76470 }, { "epoch": 0.15236516639041184, "grad_norm": 0.153292715549469, "learning_rate": 0.002, "loss": 2.5723, "step": 76480 }, { "epoch": 0.15238508861405076, "grad_norm": 0.23359879851341248, "learning_rate": 0.002, "loss": 2.5645, "step": 76490 }, { "epoch": 0.15240501083768965, "grad_norm": 0.14896172285079956, "learning_rate": 0.002, "loss": 2.5554, "step": 76500 }, { "epoch": 0.15242493306132857, "grad_norm": 0.14889131486415863, "learning_rate": 0.002, "loss": 2.5762, "step": 76510 }, { "epoch": 0.15244485528496748, "grad_norm": 0.18589860200881958, "learning_rate": 0.002, "loss": 2.5601, "step": 76520 }, { "epoch": 0.1524647775086064, "grad_norm": 0.15430167317390442, "learning_rate": 0.002, "loss": 2.5563, "step": 76530 }, { "epoch": 0.15248469973224532, "grad_norm": 0.1286526620388031, "learning_rate": 0.002, "loss": 2.5784, "step": 76540 }, { "epoch": 0.15250462195588424, "grad_norm": 0.1940145492553711, "learning_rate": 0.002, "loss": 2.5604, "step": 76550 }, { "epoch": 0.15252454417952313, "grad_norm": 0.15666650235652924, "learning_rate": 0.002, "loss": 2.5773, "step": 76560 }, { "epoch": 0.15254446640316205, "grad_norm": 0.15819339454174042, "learning_rate": 0.002, "loss": 2.5643, "step": 76570 }, { "epoch": 0.15256438862680097, "grad_norm": 0.15403856337070465, "learning_rate": 0.002, "loss": 2.5566, "step": 76580 }, { "epoch": 0.15258431085043989, "grad_norm": 0.16969034075737, "learning_rate": 0.002, "loss": 2.5687, "step": 76590 }, { "epoch": 0.1526042330740788, "grad_norm": 0.19223514199256897, "learning_rate": 0.002, "loss": 2.5779, "step": 76600 }, { "epoch": 0.15262415529771772, "grad_norm": 0.15283243358135223, "learning_rate": 0.002, "loss": 2.5716, "step": 76610 }, { "epoch": 0.1526440775213566, "grad_norm": 0.14777542650699615, "learning_rate": 0.002, "loss": 2.5729, "step": 76620 }, { "epoch": 0.15266399974499553, "grad_norm": 0.18027573823928833, "learning_rate": 0.002, "loss": 2.5739, "step": 76630 }, { "epoch": 0.15268392196863445, "grad_norm": 0.2140243500471115, "learning_rate": 0.002, "loss": 2.5727, "step": 76640 }, { "epoch": 0.15270384419227337, "grad_norm": 0.1327463835477829, "learning_rate": 0.002, "loss": 2.5755, "step": 76650 }, { "epoch": 0.1527237664159123, "grad_norm": 0.16756702959537506, "learning_rate": 0.002, "loss": 2.5725, "step": 76660 }, { "epoch": 0.15274368863955118, "grad_norm": 0.1863207221031189, "learning_rate": 0.002, "loss": 2.5633, "step": 76670 }, { "epoch": 0.1527636108631901, "grad_norm": 0.1682351529598236, "learning_rate": 0.002, "loss": 2.5827, "step": 76680 }, { "epoch": 0.15278353308682902, "grad_norm": 0.1685394048690796, "learning_rate": 0.002, "loss": 2.5661, "step": 76690 }, { "epoch": 0.15280345531046793, "grad_norm": 0.14616768062114716, "learning_rate": 0.002, "loss": 2.5522, "step": 76700 }, { "epoch": 0.15282337753410685, "grad_norm": 0.15662787854671478, "learning_rate": 0.002, "loss": 2.5898, "step": 76710 }, { "epoch": 0.15284329975774577, "grad_norm": 0.1673668473958969, "learning_rate": 0.002, "loss": 2.5595, "step": 76720 }, { "epoch": 0.15286322198138466, "grad_norm": 0.17678302526474, "learning_rate": 0.002, "loss": 2.5629, "step": 76730 }, { "epoch": 0.15288314420502358, "grad_norm": 0.18536242842674255, "learning_rate": 0.002, "loss": 2.5824, "step": 76740 }, { "epoch": 0.1529030664286625, "grad_norm": 0.14672529697418213, "learning_rate": 0.002, "loss": 2.5745, "step": 76750 }, { "epoch": 0.15292298865230142, "grad_norm": 0.18774248659610748, "learning_rate": 0.002, "loss": 2.5704, "step": 76760 }, { "epoch": 0.15294291087594034, "grad_norm": 0.14537492394447327, "learning_rate": 0.002, "loss": 2.5716, "step": 76770 }, { "epoch": 0.15296283309957925, "grad_norm": 0.19932235777378082, "learning_rate": 0.002, "loss": 2.562, "step": 76780 }, { "epoch": 0.15298275532321814, "grad_norm": 0.16589820384979248, "learning_rate": 0.002, "loss": 2.5665, "step": 76790 }, { "epoch": 0.15300267754685706, "grad_norm": 0.13534832000732422, "learning_rate": 0.002, "loss": 2.58, "step": 76800 }, { "epoch": 0.15302259977049598, "grad_norm": 0.17719081044197083, "learning_rate": 0.002, "loss": 2.5809, "step": 76810 }, { "epoch": 0.1530425219941349, "grad_norm": 0.16426876187324524, "learning_rate": 0.002, "loss": 2.5794, "step": 76820 }, { "epoch": 0.15306244421777382, "grad_norm": 0.16295026242733002, "learning_rate": 0.002, "loss": 2.5655, "step": 76830 }, { "epoch": 0.15308236644141274, "grad_norm": 0.18406353890895844, "learning_rate": 0.002, "loss": 2.5543, "step": 76840 }, { "epoch": 0.15310228866505163, "grad_norm": 0.18287193775177002, "learning_rate": 0.002, "loss": 2.5687, "step": 76850 }, { "epoch": 0.15312221088869055, "grad_norm": 0.14136818051338196, "learning_rate": 0.002, "loss": 2.5778, "step": 76860 }, { "epoch": 0.15314213311232946, "grad_norm": 0.160012885928154, "learning_rate": 0.002, "loss": 2.5773, "step": 76870 }, { "epoch": 0.15316205533596838, "grad_norm": 0.16327667236328125, "learning_rate": 0.002, "loss": 2.5693, "step": 76880 }, { "epoch": 0.1531819775596073, "grad_norm": 0.15716496109962463, "learning_rate": 0.002, "loss": 2.5635, "step": 76890 }, { "epoch": 0.15320189978324622, "grad_norm": 0.13880474865436554, "learning_rate": 0.002, "loss": 2.583, "step": 76900 }, { "epoch": 0.1532218220068851, "grad_norm": 0.18823780119419098, "learning_rate": 0.002, "loss": 2.5786, "step": 76910 }, { "epoch": 0.15324174423052403, "grad_norm": 0.17940251529216766, "learning_rate": 0.002, "loss": 2.5729, "step": 76920 }, { "epoch": 0.15326166645416295, "grad_norm": 0.16270454227924347, "learning_rate": 0.002, "loss": 2.5575, "step": 76930 }, { "epoch": 0.15328158867780187, "grad_norm": 0.1646086871623993, "learning_rate": 0.002, "loss": 2.568, "step": 76940 }, { "epoch": 0.15330151090144079, "grad_norm": 0.18564562499523163, "learning_rate": 0.002, "loss": 2.5696, "step": 76950 }, { "epoch": 0.15332143312507968, "grad_norm": 0.16666021943092346, "learning_rate": 0.002, "loss": 2.5545, "step": 76960 }, { "epoch": 0.1533413553487186, "grad_norm": 0.15799400210380554, "learning_rate": 0.002, "loss": 2.5765, "step": 76970 }, { "epoch": 0.1533612775723575, "grad_norm": 0.1598246842622757, "learning_rate": 0.002, "loss": 2.5605, "step": 76980 }, { "epoch": 0.15338119979599643, "grad_norm": 0.16720272600650787, "learning_rate": 0.002, "loss": 2.5715, "step": 76990 }, { "epoch": 0.15340112201963535, "grad_norm": 0.1449412703514099, "learning_rate": 0.002, "loss": 2.5627, "step": 77000 }, { "epoch": 0.15342104424327427, "grad_norm": 0.16464315354824066, "learning_rate": 0.002, "loss": 2.5707, "step": 77010 }, { "epoch": 0.15344096646691316, "grad_norm": 0.14863060414791107, "learning_rate": 0.002, "loss": 2.5773, "step": 77020 }, { "epoch": 0.15346088869055208, "grad_norm": 0.19540970027446747, "learning_rate": 0.002, "loss": 2.5716, "step": 77030 }, { "epoch": 0.153480810914191, "grad_norm": 0.14588500559329987, "learning_rate": 0.002, "loss": 2.5772, "step": 77040 }, { "epoch": 0.15350073313782991, "grad_norm": 0.14386673271656036, "learning_rate": 0.002, "loss": 2.5642, "step": 77050 }, { "epoch": 0.15352065536146883, "grad_norm": 0.16641801595687866, "learning_rate": 0.002, "loss": 2.5748, "step": 77060 }, { "epoch": 0.15354057758510775, "grad_norm": 0.1807568073272705, "learning_rate": 0.002, "loss": 2.5713, "step": 77070 }, { "epoch": 0.15356049980874664, "grad_norm": 0.1703776866197586, "learning_rate": 0.002, "loss": 2.591, "step": 77080 }, { "epoch": 0.15358042203238556, "grad_norm": 0.15802817046642303, "learning_rate": 0.002, "loss": 2.5706, "step": 77090 }, { "epoch": 0.15360034425602448, "grad_norm": 0.14671745896339417, "learning_rate": 0.002, "loss": 2.5604, "step": 77100 }, { "epoch": 0.1536202664796634, "grad_norm": 0.16340495645999908, "learning_rate": 0.002, "loss": 2.5699, "step": 77110 }, { "epoch": 0.15364018870330232, "grad_norm": 0.1809334009885788, "learning_rate": 0.002, "loss": 2.5716, "step": 77120 }, { "epoch": 0.15366011092694123, "grad_norm": 0.17284157872200012, "learning_rate": 0.002, "loss": 2.569, "step": 77130 }, { "epoch": 0.15368003315058013, "grad_norm": 0.15686872601509094, "learning_rate": 0.002, "loss": 2.5625, "step": 77140 }, { "epoch": 0.15369995537421904, "grad_norm": 0.15810741484165192, "learning_rate": 0.002, "loss": 2.5684, "step": 77150 }, { "epoch": 0.15371987759785796, "grad_norm": 0.18756203353405, "learning_rate": 0.002, "loss": 2.5754, "step": 77160 }, { "epoch": 0.15373979982149688, "grad_norm": 0.18083174526691437, "learning_rate": 0.002, "loss": 2.5681, "step": 77170 }, { "epoch": 0.1537597220451358, "grad_norm": 0.14541883766651154, "learning_rate": 0.002, "loss": 2.5617, "step": 77180 }, { "epoch": 0.1537796442687747, "grad_norm": 0.17901423573493958, "learning_rate": 0.002, "loss": 2.5814, "step": 77190 }, { "epoch": 0.1537995664924136, "grad_norm": 0.17407888174057007, "learning_rate": 0.002, "loss": 2.5863, "step": 77200 }, { "epoch": 0.15381948871605253, "grad_norm": 0.17631296813488007, "learning_rate": 0.002, "loss": 2.5832, "step": 77210 }, { "epoch": 0.15383941093969145, "grad_norm": 0.16078388690948486, "learning_rate": 0.002, "loss": 2.576, "step": 77220 }, { "epoch": 0.15385933316333036, "grad_norm": 0.17223109304904938, "learning_rate": 0.002, "loss": 2.5544, "step": 77230 }, { "epoch": 0.15387925538696928, "grad_norm": 0.1584550440311432, "learning_rate": 0.002, "loss": 2.567, "step": 77240 }, { "epoch": 0.15389917761060817, "grad_norm": 0.1721736192703247, "learning_rate": 0.002, "loss": 2.5663, "step": 77250 }, { "epoch": 0.1539190998342471, "grad_norm": 0.14226152002811432, "learning_rate": 0.002, "loss": 2.5832, "step": 77260 }, { "epoch": 0.153939022057886, "grad_norm": 0.17302323877811432, "learning_rate": 0.002, "loss": 2.5919, "step": 77270 }, { "epoch": 0.15395894428152493, "grad_norm": 0.1688668429851532, "learning_rate": 0.002, "loss": 2.5775, "step": 77280 }, { "epoch": 0.15397886650516385, "grad_norm": 0.14800408482551575, "learning_rate": 0.002, "loss": 2.5657, "step": 77290 }, { "epoch": 0.15399878872880277, "grad_norm": 0.20857948064804077, "learning_rate": 0.002, "loss": 2.5672, "step": 77300 }, { "epoch": 0.15401871095244166, "grad_norm": 0.14996200799942017, "learning_rate": 0.002, "loss": 2.5554, "step": 77310 }, { "epoch": 0.15403863317608057, "grad_norm": 0.1542806327342987, "learning_rate": 0.002, "loss": 2.5662, "step": 77320 }, { "epoch": 0.1540585553997195, "grad_norm": 0.15307560563087463, "learning_rate": 0.002, "loss": 2.5788, "step": 77330 }, { "epoch": 0.1540784776233584, "grad_norm": 0.19415396451950073, "learning_rate": 0.002, "loss": 2.5607, "step": 77340 }, { "epoch": 0.15409839984699733, "grad_norm": 0.15347003936767578, "learning_rate": 0.002, "loss": 2.5712, "step": 77350 }, { "epoch": 0.15411832207063625, "grad_norm": 0.17296120524406433, "learning_rate": 0.002, "loss": 2.5677, "step": 77360 }, { "epoch": 0.15413824429427514, "grad_norm": 0.16489136219024658, "learning_rate": 0.002, "loss": 2.5739, "step": 77370 }, { "epoch": 0.15415816651791406, "grad_norm": 0.1421600580215454, "learning_rate": 0.002, "loss": 2.5645, "step": 77380 }, { "epoch": 0.15417808874155298, "grad_norm": 0.3381616771221161, "learning_rate": 0.002, "loss": 2.5722, "step": 77390 }, { "epoch": 0.1541980109651919, "grad_norm": 0.16334249079227448, "learning_rate": 0.002, "loss": 2.5751, "step": 77400 }, { "epoch": 0.1542179331888308, "grad_norm": 0.1344793438911438, "learning_rate": 0.002, "loss": 2.5695, "step": 77410 }, { "epoch": 0.15423785541246973, "grad_norm": 0.18172767758369446, "learning_rate": 0.002, "loss": 2.5563, "step": 77420 }, { "epoch": 0.15425777763610862, "grad_norm": 0.13845014572143555, "learning_rate": 0.002, "loss": 2.572, "step": 77430 }, { "epoch": 0.15427769985974754, "grad_norm": 0.1563357710838318, "learning_rate": 0.002, "loss": 2.5756, "step": 77440 }, { "epoch": 0.15429762208338646, "grad_norm": 0.16706368327140808, "learning_rate": 0.002, "loss": 2.5644, "step": 77450 }, { "epoch": 0.15431754430702538, "grad_norm": 0.1628609001636505, "learning_rate": 0.002, "loss": 2.5696, "step": 77460 }, { "epoch": 0.1543374665306643, "grad_norm": 0.16564041376113892, "learning_rate": 0.002, "loss": 2.574, "step": 77470 }, { "epoch": 0.1543573887543032, "grad_norm": 0.13048607110977173, "learning_rate": 0.002, "loss": 2.5755, "step": 77480 }, { "epoch": 0.1543773109779421, "grad_norm": 0.16734354197978973, "learning_rate": 0.002, "loss": 2.5544, "step": 77490 }, { "epoch": 0.15439723320158102, "grad_norm": 0.16919659078121185, "learning_rate": 0.002, "loss": 2.5684, "step": 77500 }, { "epoch": 0.15441715542521994, "grad_norm": 0.15039098262786865, "learning_rate": 0.002, "loss": 2.5566, "step": 77510 }, { "epoch": 0.15443707764885886, "grad_norm": 0.17310278117656708, "learning_rate": 0.002, "loss": 2.5779, "step": 77520 }, { "epoch": 0.15445699987249778, "grad_norm": 0.18035784363746643, "learning_rate": 0.002, "loss": 2.5768, "step": 77530 }, { "epoch": 0.15447692209613667, "grad_norm": 0.17365314066410065, "learning_rate": 0.002, "loss": 2.5942, "step": 77540 }, { "epoch": 0.1544968443197756, "grad_norm": 0.16181229054927826, "learning_rate": 0.002, "loss": 2.5794, "step": 77550 }, { "epoch": 0.1545167665434145, "grad_norm": 0.15925399959087372, "learning_rate": 0.002, "loss": 2.5636, "step": 77560 }, { "epoch": 0.15453668876705343, "grad_norm": 0.15747743844985962, "learning_rate": 0.002, "loss": 2.5608, "step": 77570 }, { "epoch": 0.15455661099069234, "grad_norm": 0.1432071328163147, "learning_rate": 0.002, "loss": 2.5708, "step": 77580 }, { "epoch": 0.15457653321433126, "grad_norm": 0.1818591207265854, "learning_rate": 0.002, "loss": 2.5647, "step": 77590 }, { "epoch": 0.15459645543797015, "grad_norm": 0.1510617882013321, "learning_rate": 0.002, "loss": 2.5811, "step": 77600 }, { "epoch": 0.15461637766160907, "grad_norm": 0.155595600605011, "learning_rate": 0.002, "loss": 2.5706, "step": 77610 }, { "epoch": 0.154636299885248, "grad_norm": 0.1780790537595749, "learning_rate": 0.002, "loss": 2.559, "step": 77620 }, { "epoch": 0.1546562221088869, "grad_norm": 0.1780966967344284, "learning_rate": 0.002, "loss": 2.5676, "step": 77630 }, { "epoch": 0.15467614433252583, "grad_norm": 0.18341311812400818, "learning_rate": 0.002, "loss": 2.5592, "step": 77640 }, { "epoch": 0.15469606655616475, "grad_norm": 0.18220551311969757, "learning_rate": 0.002, "loss": 2.5716, "step": 77650 }, { "epoch": 0.15471598877980364, "grad_norm": 0.14812125265598297, "learning_rate": 0.002, "loss": 2.5625, "step": 77660 }, { "epoch": 0.15473591100344256, "grad_norm": 0.1565926969051361, "learning_rate": 0.002, "loss": 2.5604, "step": 77670 }, { "epoch": 0.15475583322708147, "grad_norm": 0.17368492484092712, "learning_rate": 0.002, "loss": 2.561, "step": 77680 }, { "epoch": 0.1547757554507204, "grad_norm": 0.15611055493354797, "learning_rate": 0.002, "loss": 2.5659, "step": 77690 }, { "epoch": 0.1547956776743593, "grad_norm": 0.1386117786169052, "learning_rate": 0.002, "loss": 2.5643, "step": 77700 }, { "epoch": 0.1548155998979982, "grad_norm": 0.19132249057292938, "learning_rate": 0.002, "loss": 2.5791, "step": 77710 }, { "epoch": 0.15483552212163712, "grad_norm": 0.15489518642425537, "learning_rate": 0.002, "loss": 2.5729, "step": 77720 }, { "epoch": 0.15485544434527604, "grad_norm": 0.18263986706733704, "learning_rate": 0.002, "loss": 2.5709, "step": 77730 }, { "epoch": 0.15487536656891496, "grad_norm": 0.1641358733177185, "learning_rate": 0.002, "loss": 2.5729, "step": 77740 }, { "epoch": 0.15489528879255388, "grad_norm": 0.1807432770729065, "learning_rate": 0.002, "loss": 2.5695, "step": 77750 }, { "epoch": 0.1549152110161928, "grad_norm": 0.16336089372634888, "learning_rate": 0.002, "loss": 2.5513, "step": 77760 }, { "epoch": 0.15493513323983168, "grad_norm": 0.16268840432167053, "learning_rate": 0.002, "loss": 2.5945, "step": 77770 }, { "epoch": 0.1549550554634706, "grad_norm": 0.14282414317131042, "learning_rate": 0.002, "loss": 2.5846, "step": 77780 }, { "epoch": 0.15497497768710952, "grad_norm": 0.1592177003622055, "learning_rate": 0.002, "loss": 2.5515, "step": 77790 }, { "epoch": 0.15499489991074844, "grad_norm": 0.1459006816148758, "learning_rate": 0.002, "loss": 2.5811, "step": 77800 }, { "epoch": 0.15501482213438736, "grad_norm": 0.1630050241947174, "learning_rate": 0.002, "loss": 2.5791, "step": 77810 }, { "epoch": 0.15503474435802628, "grad_norm": 0.158159077167511, "learning_rate": 0.002, "loss": 2.5617, "step": 77820 }, { "epoch": 0.15505466658166517, "grad_norm": 0.18823403120040894, "learning_rate": 0.002, "loss": 2.5898, "step": 77830 }, { "epoch": 0.1550745888053041, "grad_norm": 0.15808351337909698, "learning_rate": 0.002, "loss": 2.5765, "step": 77840 }, { "epoch": 0.155094511028943, "grad_norm": 0.14927227795124054, "learning_rate": 0.002, "loss": 2.5718, "step": 77850 }, { "epoch": 0.15511443325258192, "grad_norm": 0.17287206649780273, "learning_rate": 0.002, "loss": 2.5712, "step": 77860 }, { "epoch": 0.15513435547622084, "grad_norm": 0.16487635672092438, "learning_rate": 0.002, "loss": 2.5802, "step": 77870 }, { "epoch": 0.15515427769985976, "grad_norm": 0.16261287033557892, "learning_rate": 0.002, "loss": 2.5716, "step": 77880 }, { "epoch": 0.15517419992349865, "grad_norm": 0.16307274997234344, "learning_rate": 0.002, "loss": 2.5737, "step": 77890 }, { "epoch": 0.15519412214713757, "grad_norm": 0.15195128321647644, "learning_rate": 0.002, "loss": 2.5691, "step": 77900 }, { "epoch": 0.1552140443707765, "grad_norm": 0.18043570220470428, "learning_rate": 0.002, "loss": 2.5814, "step": 77910 }, { "epoch": 0.1552339665944154, "grad_norm": 0.13381162285804749, "learning_rate": 0.002, "loss": 2.5579, "step": 77920 }, { "epoch": 0.15525388881805433, "grad_norm": 0.1709202527999878, "learning_rate": 0.002, "loss": 2.5855, "step": 77930 }, { "epoch": 0.15527381104169322, "grad_norm": 0.16965755820274353, "learning_rate": 0.002, "loss": 2.5581, "step": 77940 }, { "epoch": 0.15529373326533213, "grad_norm": 0.17309045791625977, "learning_rate": 0.002, "loss": 2.5786, "step": 77950 }, { "epoch": 0.15531365548897105, "grad_norm": 0.15534347295761108, "learning_rate": 0.002, "loss": 2.5849, "step": 77960 }, { "epoch": 0.15533357771260997, "grad_norm": 0.16636133193969727, "learning_rate": 0.002, "loss": 2.5684, "step": 77970 }, { "epoch": 0.1553534999362489, "grad_norm": 0.16080452501773834, "learning_rate": 0.002, "loss": 2.5614, "step": 77980 }, { "epoch": 0.1553734221598878, "grad_norm": 0.1570054441690445, "learning_rate": 0.002, "loss": 2.5734, "step": 77990 }, { "epoch": 0.1553933443835267, "grad_norm": 0.15439815819263458, "learning_rate": 0.002, "loss": 2.5799, "step": 78000 }, { "epoch": 0.15541326660716562, "grad_norm": 0.1602666825056076, "learning_rate": 0.002, "loss": 2.5641, "step": 78010 }, { "epoch": 0.15543318883080454, "grad_norm": 0.16870087385177612, "learning_rate": 0.002, "loss": 2.5709, "step": 78020 }, { "epoch": 0.15545311105444345, "grad_norm": 0.14689858257770538, "learning_rate": 0.002, "loss": 2.5626, "step": 78030 }, { "epoch": 0.15547303327808237, "grad_norm": 0.15918542444705963, "learning_rate": 0.002, "loss": 2.5573, "step": 78040 }, { "epoch": 0.1554929555017213, "grad_norm": 0.1688879281282425, "learning_rate": 0.002, "loss": 2.5784, "step": 78050 }, { "epoch": 0.15551287772536018, "grad_norm": 0.1561308652162552, "learning_rate": 0.002, "loss": 2.5723, "step": 78060 }, { "epoch": 0.1555327999489991, "grad_norm": 0.1342402994632721, "learning_rate": 0.002, "loss": 2.5621, "step": 78070 }, { "epoch": 0.15555272217263802, "grad_norm": 0.2159346491098404, "learning_rate": 0.002, "loss": 2.5672, "step": 78080 }, { "epoch": 0.15557264439627694, "grad_norm": 0.1399071365594864, "learning_rate": 0.002, "loss": 2.5691, "step": 78090 }, { "epoch": 0.15559256661991586, "grad_norm": 0.16413255035877228, "learning_rate": 0.002, "loss": 2.5462, "step": 78100 }, { "epoch": 0.15561248884355477, "grad_norm": 0.15328046679496765, "learning_rate": 0.002, "loss": 2.5696, "step": 78110 }, { "epoch": 0.15563241106719367, "grad_norm": 0.1831279844045639, "learning_rate": 0.002, "loss": 2.5551, "step": 78120 }, { "epoch": 0.15565233329083258, "grad_norm": 0.15516653656959534, "learning_rate": 0.002, "loss": 2.5724, "step": 78130 }, { "epoch": 0.1556722555144715, "grad_norm": 0.17366553843021393, "learning_rate": 0.002, "loss": 2.577, "step": 78140 }, { "epoch": 0.15569217773811042, "grad_norm": 0.19110007584095, "learning_rate": 0.002, "loss": 2.5655, "step": 78150 }, { "epoch": 0.15571209996174934, "grad_norm": 0.16940762102603912, "learning_rate": 0.002, "loss": 2.577, "step": 78160 }, { "epoch": 0.15573202218538826, "grad_norm": 0.1544974446296692, "learning_rate": 0.002, "loss": 2.5506, "step": 78170 }, { "epoch": 0.15575194440902715, "grad_norm": 0.17753270268440247, "learning_rate": 0.002, "loss": 2.5587, "step": 78180 }, { "epoch": 0.15577186663266607, "grad_norm": 0.19558803737163544, "learning_rate": 0.002, "loss": 2.5732, "step": 78190 }, { "epoch": 0.15579178885630499, "grad_norm": 0.14349205791950226, "learning_rate": 0.002, "loss": 2.5746, "step": 78200 }, { "epoch": 0.1558117110799439, "grad_norm": 0.15599049627780914, "learning_rate": 0.002, "loss": 2.5643, "step": 78210 }, { "epoch": 0.15583163330358282, "grad_norm": 0.17201070487499237, "learning_rate": 0.002, "loss": 2.573, "step": 78220 }, { "epoch": 0.1558515555272217, "grad_norm": 0.16686537861824036, "learning_rate": 0.002, "loss": 2.5636, "step": 78230 }, { "epoch": 0.15587147775086063, "grad_norm": 0.15597227215766907, "learning_rate": 0.002, "loss": 2.5818, "step": 78240 }, { "epoch": 0.15589139997449955, "grad_norm": 0.1725764125585556, "learning_rate": 0.002, "loss": 2.5685, "step": 78250 }, { "epoch": 0.15591132219813847, "grad_norm": 0.15298296511173248, "learning_rate": 0.002, "loss": 2.5809, "step": 78260 }, { "epoch": 0.1559312444217774, "grad_norm": 0.16636405885219574, "learning_rate": 0.002, "loss": 2.5745, "step": 78270 }, { "epoch": 0.1559511666454163, "grad_norm": 0.17588891088962555, "learning_rate": 0.002, "loss": 2.5751, "step": 78280 }, { "epoch": 0.1559710888690552, "grad_norm": 0.16032874584197998, "learning_rate": 0.002, "loss": 2.5646, "step": 78290 }, { "epoch": 0.15599101109269412, "grad_norm": 0.1379738301038742, "learning_rate": 0.002, "loss": 2.5828, "step": 78300 }, { "epoch": 0.15601093331633303, "grad_norm": 0.17841576039791107, "learning_rate": 0.002, "loss": 2.5695, "step": 78310 }, { "epoch": 0.15603085553997195, "grad_norm": 0.17327366769313812, "learning_rate": 0.002, "loss": 2.5648, "step": 78320 }, { "epoch": 0.15605077776361087, "grad_norm": 0.13405492901802063, "learning_rate": 0.002, "loss": 2.5582, "step": 78330 }, { "epoch": 0.1560706999872498, "grad_norm": 0.17816190421581268, "learning_rate": 0.002, "loss": 2.5727, "step": 78340 }, { "epoch": 0.15609062221088868, "grad_norm": 0.14339850842952728, "learning_rate": 0.002, "loss": 2.5719, "step": 78350 }, { "epoch": 0.1561105444345276, "grad_norm": 0.16063275933265686, "learning_rate": 0.002, "loss": 2.5768, "step": 78360 }, { "epoch": 0.15613046665816652, "grad_norm": 0.16210761666297913, "learning_rate": 0.002, "loss": 2.5767, "step": 78370 }, { "epoch": 0.15615038888180544, "grad_norm": 0.2074580043554306, "learning_rate": 0.002, "loss": 2.566, "step": 78380 }, { "epoch": 0.15617031110544435, "grad_norm": 0.14707233011722565, "learning_rate": 0.002, "loss": 2.5696, "step": 78390 }, { "epoch": 0.15619023332908327, "grad_norm": 0.16145531833171844, "learning_rate": 0.002, "loss": 2.5669, "step": 78400 }, { "epoch": 0.15621015555272216, "grad_norm": 0.17163699865341187, "learning_rate": 0.002, "loss": 2.5745, "step": 78410 }, { "epoch": 0.15623007777636108, "grad_norm": 0.1646619290113449, "learning_rate": 0.002, "loss": 2.5773, "step": 78420 }, { "epoch": 0.15625, "grad_norm": 0.16410261392593384, "learning_rate": 0.002, "loss": 2.5554, "step": 78430 }, { "epoch": 0.15626992222363892, "grad_norm": 0.17230381071567535, "learning_rate": 0.002, "loss": 2.5687, "step": 78440 }, { "epoch": 0.15628984444727784, "grad_norm": 0.1522379219532013, "learning_rate": 0.002, "loss": 2.5572, "step": 78450 }, { "epoch": 0.15630976667091673, "grad_norm": 0.15997834503650665, "learning_rate": 0.002, "loss": 2.5742, "step": 78460 }, { "epoch": 0.15632968889455565, "grad_norm": 0.14062386751174927, "learning_rate": 0.002, "loss": 2.5797, "step": 78470 }, { "epoch": 0.15634961111819456, "grad_norm": 0.3357407748699188, "learning_rate": 0.002, "loss": 2.5625, "step": 78480 }, { "epoch": 0.15636953334183348, "grad_norm": 0.15053236484527588, "learning_rate": 0.002, "loss": 2.5871, "step": 78490 }, { "epoch": 0.1563894555654724, "grad_norm": 0.169541135430336, "learning_rate": 0.002, "loss": 2.5653, "step": 78500 }, { "epoch": 0.15640937778911132, "grad_norm": 0.184866264462471, "learning_rate": 0.002, "loss": 2.5856, "step": 78510 }, { "epoch": 0.1564293000127502, "grad_norm": 0.1730102300643921, "learning_rate": 0.002, "loss": 2.5729, "step": 78520 }, { "epoch": 0.15644922223638913, "grad_norm": 0.16983121633529663, "learning_rate": 0.002, "loss": 2.5714, "step": 78530 }, { "epoch": 0.15646914446002805, "grad_norm": 0.2040262222290039, "learning_rate": 0.002, "loss": 2.5528, "step": 78540 }, { "epoch": 0.15648906668366697, "grad_norm": 0.12507250905036926, "learning_rate": 0.002, "loss": 2.5645, "step": 78550 }, { "epoch": 0.15650898890730588, "grad_norm": 0.14932864904403687, "learning_rate": 0.002, "loss": 2.5674, "step": 78560 }, { "epoch": 0.1565289111309448, "grad_norm": 0.20371313393115997, "learning_rate": 0.002, "loss": 2.5754, "step": 78570 }, { "epoch": 0.1565488333545837, "grad_norm": 0.16611690819263458, "learning_rate": 0.002, "loss": 2.5765, "step": 78580 }, { "epoch": 0.1565687555782226, "grad_norm": 0.28610435128211975, "learning_rate": 0.002, "loss": 2.5685, "step": 78590 }, { "epoch": 0.15658867780186153, "grad_norm": 0.15158866345882416, "learning_rate": 0.002, "loss": 2.5756, "step": 78600 }, { "epoch": 0.15660860002550045, "grad_norm": 0.15193147957324982, "learning_rate": 0.002, "loss": 2.5705, "step": 78610 }, { "epoch": 0.15662852224913937, "grad_norm": 0.16268965601921082, "learning_rate": 0.002, "loss": 2.57, "step": 78620 }, { "epoch": 0.1566484444727783, "grad_norm": 0.1666150540113449, "learning_rate": 0.002, "loss": 2.5747, "step": 78630 }, { "epoch": 0.15666836669641718, "grad_norm": 0.1822151243686676, "learning_rate": 0.002, "loss": 2.5623, "step": 78640 }, { "epoch": 0.1566882889200561, "grad_norm": 0.15264996886253357, "learning_rate": 0.002, "loss": 2.5757, "step": 78650 }, { "epoch": 0.15670821114369501, "grad_norm": 0.16722863912582397, "learning_rate": 0.002, "loss": 2.5746, "step": 78660 }, { "epoch": 0.15672813336733393, "grad_norm": 0.14880001544952393, "learning_rate": 0.002, "loss": 2.5657, "step": 78670 }, { "epoch": 0.15674805559097285, "grad_norm": 0.17058537900447845, "learning_rate": 0.002, "loss": 2.5704, "step": 78680 }, { "epoch": 0.15676797781461174, "grad_norm": 0.16494391858577728, "learning_rate": 0.002, "loss": 2.5708, "step": 78690 }, { "epoch": 0.15678790003825066, "grad_norm": 0.15245358645915985, "learning_rate": 0.002, "loss": 2.5539, "step": 78700 }, { "epoch": 0.15680782226188958, "grad_norm": 0.15989172458648682, "learning_rate": 0.002, "loss": 2.5808, "step": 78710 }, { "epoch": 0.1568277444855285, "grad_norm": 0.16048677265644073, "learning_rate": 0.002, "loss": 2.5655, "step": 78720 }, { "epoch": 0.15684766670916742, "grad_norm": 0.14360177516937256, "learning_rate": 0.002, "loss": 2.5602, "step": 78730 }, { "epoch": 0.15686758893280633, "grad_norm": 0.3408118188381195, "learning_rate": 0.002, "loss": 2.5807, "step": 78740 }, { "epoch": 0.15688751115644523, "grad_norm": 0.15962645411491394, "learning_rate": 0.002, "loss": 2.5804, "step": 78750 }, { "epoch": 0.15690743338008414, "grad_norm": 0.1852322369813919, "learning_rate": 0.002, "loss": 2.5738, "step": 78760 }, { "epoch": 0.15692735560372306, "grad_norm": 0.16386543214321136, "learning_rate": 0.002, "loss": 2.5785, "step": 78770 }, { "epoch": 0.15694727782736198, "grad_norm": 0.19936516880989075, "learning_rate": 0.002, "loss": 2.5684, "step": 78780 }, { "epoch": 0.1569672000510009, "grad_norm": 0.15762455761432648, "learning_rate": 0.002, "loss": 2.5742, "step": 78790 }, { "epoch": 0.15698712227463982, "grad_norm": 0.1634620577096939, "learning_rate": 0.002, "loss": 2.574, "step": 78800 }, { "epoch": 0.1570070444982787, "grad_norm": 0.1770232766866684, "learning_rate": 0.002, "loss": 2.5767, "step": 78810 }, { "epoch": 0.15702696672191763, "grad_norm": 0.1434529423713684, "learning_rate": 0.002, "loss": 2.5766, "step": 78820 }, { "epoch": 0.15704688894555655, "grad_norm": 0.14229488372802734, "learning_rate": 0.002, "loss": 2.5902, "step": 78830 }, { "epoch": 0.15706681116919546, "grad_norm": 0.15962855517864227, "learning_rate": 0.002, "loss": 2.5762, "step": 78840 }, { "epoch": 0.15708673339283438, "grad_norm": 0.17319145798683167, "learning_rate": 0.002, "loss": 2.5835, "step": 78850 }, { "epoch": 0.1571066556164733, "grad_norm": 0.1859215795993805, "learning_rate": 0.002, "loss": 2.5754, "step": 78860 }, { "epoch": 0.1571265778401122, "grad_norm": 0.15351064503192902, "learning_rate": 0.002, "loss": 2.5643, "step": 78870 }, { "epoch": 0.1571465000637511, "grad_norm": 0.18130098283290863, "learning_rate": 0.002, "loss": 2.5781, "step": 78880 }, { "epoch": 0.15716642228739003, "grad_norm": 0.20127932727336884, "learning_rate": 0.002, "loss": 2.5556, "step": 78890 }, { "epoch": 0.15718634451102895, "grad_norm": 0.18301129341125488, "learning_rate": 0.002, "loss": 2.5649, "step": 78900 }, { "epoch": 0.15720626673466787, "grad_norm": 0.1486809253692627, "learning_rate": 0.002, "loss": 2.5815, "step": 78910 }, { "epoch": 0.15722618895830678, "grad_norm": 0.16223709285259247, "learning_rate": 0.002, "loss": 2.5794, "step": 78920 }, { "epoch": 0.15724611118194567, "grad_norm": 0.15414002537727356, "learning_rate": 0.002, "loss": 2.5863, "step": 78930 }, { "epoch": 0.1572660334055846, "grad_norm": 0.14625057578086853, "learning_rate": 0.002, "loss": 2.5671, "step": 78940 }, { "epoch": 0.1572859556292235, "grad_norm": 0.2001679539680481, "learning_rate": 0.002, "loss": 2.5761, "step": 78950 }, { "epoch": 0.15730587785286243, "grad_norm": 0.14827750623226166, "learning_rate": 0.002, "loss": 2.5524, "step": 78960 }, { "epoch": 0.15732580007650135, "grad_norm": 0.20359820127487183, "learning_rate": 0.002, "loss": 2.5596, "step": 78970 }, { "epoch": 0.15734572230014024, "grad_norm": 0.16553160548210144, "learning_rate": 0.002, "loss": 2.5688, "step": 78980 }, { "epoch": 0.15736564452377916, "grad_norm": 0.19125232100486755, "learning_rate": 0.002, "loss": 2.5677, "step": 78990 }, { "epoch": 0.15738556674741808, "grad_norm": 0.17296281456947327, "learning_rate": 0.002, "loss": 2.5623, "step": 79000 }, { "epoch": 0.157405488971057, "grad_norm": 0.1783662885427475, "learning_rate": 0.002, "loss": 2.5749, "step": 79010 }, { "epoch": 0.1574254111946959, "grad_norm": 0.1699357032775879, "learning_rate": 0.002, "loss": 2.6043, "step": 79020 }, { "epoch": 0.15744533341833483, "grad_norm": 0.16150055825710297, "learning_rate": 0.002, "loss": 2.5756, "step": 79030 }, { "epoch": 0.15746525564197372, "grad_norm": 0.1573149859905243, "learning_rate": 0.002, "loss": 2.5686, "step": 79040 }, { "epoch": 0.15748517786561264, "grad_norm": 0.19342342019081116, "learning_rate": 0.002, "loss": 2.5761, "step": 79050 }, { "epoch": 0.15750510008925156, "grad_norm": 0.16130834817886353, "learning_rate": 0.002, "loss": 2.5787, "step": 79060 }, { "epoch": 0.15752502231289048, "grad_norm": 0.16049212217330933, "learning_rate": 0.002, "loss": 2.5653, "step": 79070 }, { "epoch": 0.1575449445365294, "grad_norm": 0.19631917774677277, "learning_rate": 0.002, "loss": 2.5804, "step": 79080 }, { "epoch": 0.15756486676016832, "grad_norm": 0.17302650213241577, "learning_rate": 0.002, "loss": 2.5626, "step": 79090 }, { "epoch": 0.1575847889838072, "grad_norm": 0.15492743253707886, "learning_rate": 0.002, "loss": 2.5795, "step": 79100 }, { "epoch": 0.15760471120744612, "grad_norm": 0.14632941782474518, "learning_rate": 0.002, "loss": 2.5717, "step": 79110 }, { "epoch": 0.15762463343108504, "grad_norm": 0.15468578040599823, "learning_rate": 0.002, "loss": 2.5758, "step": 79120 }, { "epoch": 0.15764455565472396, "grad_norm": 0.15324710309505463, "learning_rate": 0.002, "loss": 2.5591, "step": 79130 }, { "epoch": 0.15766447787836288, "grad_norm": 0.15622113645076752, "learning_rate": 0.002, "loss": 2.5618, "step": 79140 }, { "epoch": 0.1576844001020018, "grad_norm": 0.15900634229183197, "learning_rate": 0.002, "loss": 2.565, "step": 79150 }, { "epoch": 0.1577043223256407, "grad_norm": 0.18290260434150696, "learning_rate": 0.002, "loss": 2.5747, "step": 79160 }, { "epoch": 0.1577242445492796, "grad_norm": 0.15020206570625305, "learning_rate": 0.002, "loss": 2.5615, "step": 79170 }, { "epoch": 0.15774416677291853, "grad_norm": 0.1763254851102829, "learning_rate": 0.002, "loss": 2.557, "step": 79180 }, { "epoch": 0.15776408899655744, "grad_norm": 0.1452508121728897, "learning_rate": 0.002, "loss": 2.5624, "step": 79190 }, { "epoch": 0.15778401122019636, "grad_norm": 0.14668096601963043, "learning_rate": 0.002, "loss": 2.5604, "step": 79200 }, { "epoch": 0.15780393344383525, "grad_norm": 0.1704513430595398, "learning_rate": 0.002, "loss": 2.5673, "step": 79210 }, { "epoch": 0.15782385566747417, "grad_norm": 0.16794267296791077, "learning_rate": 0.002, "loss": 2.5791, "step": 79220 }, { "epoch": 0.1578437778911131, "grad_norm": 0.14891692996025085, "learning_rate": 0.002, "loss": 2.5714, "step": 79230 }, { "epoch": 0.157863700114752, "grad_norm": 0.1779974102973938, "learning_rate": 0.002, "loss": 2.5841, "step": 79240 }, { "epoch": 0.15788362233839093, "grad_norm": 0.16115908324718475, "learning_rate": 0.002, "loss": 2.5523, "step": 79250 }, { "epoch": 0.15790354456202985, "grad_norm": 0.16506557166576385, "learning_rate": 0.002, "loss": 2.5771, "step": 79260 }, { "epoch": 0.15792346678566874, "grad_norm": 0.15605154633522034, "learning_rate": 0.002, "loss": 2.5857, "step": 79270 }, { "epoch": 0.15794338900930766, "grad_norm": 0.14947102963924408, "learning_rate": 0.002, "loss": 2.5682, "step": 79280 }, { "epoch": 0.15796331123294657, "grad_norm": 0.1421549767255783, "learning_rate": 0.002, "loss": 2.5622, "step": 79290 }, { "epoch": 0.1579832334565855, "grad_norm": 0.15302444994449615, "learning_rate": 0.002, "loss": 2.5675, "step": 79300 }, { "epoch": 0.1580031556802244, "grad_norm": 0.16864262521266937, "learning_rate": 0.002, "loss": 2.5707, "step": 79310 }, { "epoch": 0.15802307790386333, "grad_norm": 0.2079445719718933, "learning_rate": 0.002, "loss": 2.5575, "step": 79320 }, { "epoch": 0.15804300012750222, "grad_norm": 0.14878270030021667, "learning_rate": 0.002, "loss": 2.5579, "step": 79330 }, { "epoch": 0.15806292235114114, "grad_norm": 0.14711931347846985, "learning_rate": 0.002, "loss": 2.5748, "step": 79340 }, { "epoch": 0.15808284457478006, "grad_norm": 0.16170942783355713, "learning_rate": 0.002, "loss": 2.5562, "step": 79350 }, { "epoch": 0.15810276679841898, "grad_norm": 0.16596952080726624, "learning_rate": 0.002, "loss": 2.575, "step": 79360 }, { "epoch": 0.1581226890220579, "grad_norm": 0.19821396470069885, "learning_rate": 0.002, "loss": 2.5801, "step": 79370 }, { "epoch": 0.1581426112456968, "grad_norm": 0.1525673121213913, "learning_rate": 0.002, "loss": 2.5792, "step": 79380 }, { "epoch": 0.1581625334693357, "grad_norm": 0.1545812487602234, "learning_rate": 0.002, "loss": 2.5827, "step": 79390 }, { "epoch": 0.15818245569297462, "grad_norm": 0.15900050103664398, "learning_rate": 0.002, "loss": 2.5619, "step": 79400 }, { "epoch": 0.15820237791661354, "grad_norm": 0.1593083143234253, "learning_rate": 0.002, "loss": 2.5719, "step": 79410 }, { "epoch": 0.15822230014025246, "grad_norm": 0.17876824736595154, "learning_rate": 0.002, "loss": 2.5695, "step": 79420 }, { "epoch": 0.15824222236389138, "grad_norm": 0.16524462401866913, "learning_rate": 0.002, "loss": 2.5789, "step": 79430 }, { "epoch": 0.15826214458753027, "grad_norm": 0.15572768449783325, "learning_rate": 0.002, "loss": 2.578, "step": 79440 }, { "epoch": 0.1582820668111692, "grad_norm": 0.158866748213768, "learning_rate": 0.002, "loss": 2.573, "step": 79450 }, { "epoch": 0.1583019890348081, "grad_norm": 0.17216835916042328, "learning_rate": 0.002, "loss": 2.5602, "step": 79460 }, { "epoch": 0.15832191125844702, "grad_norm": 0.1466420292854309, "learning_rate": 0.002, "loss": 2.561, "step": 79470 }, { "epoch": 0.15834183348208594, "grad_norm": 0.14215615391731262, "learning_rate": 0.002, "loss": 2.5702, "step": 79480 }, { "epoch": 0.15836175570572486, "grad_norm": 0.17570282518863678, "learning_rate": 0.002, "loss": 2.5822, "step": 79490 }, { "epoch": 0.15838167792936375, "grad_norm": 0.20401841402053833, "learning_rate": 0.002, "loss": 2.5781, "step": 79500 }, { "epoch": 0.15840160015300267, "grad_norm": 0.14147645235061646, "learning_rate": 0.002, "loss": 2.562, "step": 79510 }, { "epoch": 0.1584215223766416, "grad_norm": 0.1255093663930893, "learning_rate": 0.002, "loss": 2.5761, "step": 79520 }, { "epoch": 0.1584414446002805, "grad_norm": 0.16431820392608643, "learning_rate": 0.002, "loss": 2.5773, "step": 79530 }, { "epoch": 0.15846136682391943, "grad_norm": 0.19090940058231354, "learning_rate": 0.002, "loss": 2.5607, "step": 79540 }, { "epoch": 0.15848128904755834, "grad_norm": 0.14433836936950684, "learning_rate": 0.002, "loss": 2.5681, "step": 79550 }, { "epoch": 0.15850121127119723, "grad_norm": 0.16046373546123505, "learning_rate": 0.002, "loss": 2.5785, "step": 79560 }, { "epoch": 0.15852113349483615, "grad_norm": 0.17517054080963135, "learning_rate": 0.002, "loss": 2.5787, "step": 79570 }, { "epoch": 0.15854105571847507, "grad_norm": 0.15587931871414185, "learning_rate": 0.002, "loss": 2.5655, "step": 79580 }, { "epoch": 0.158560977942114, "grad_norm": 0.16439250111579895, "learning_rate": 0.002, "loss": 2.5703, "step": 79590 }, { "epoch": 0.1585809001657529, "grad_norm": 0.1566476821899414, "learning_rate": 0.002, "loss": 2.5811, "step": 79600 }, { "epoch": 0.15860082238939183, "grad_norm": 0.1749705821275711, "learning_rate": 0.002, "loss": 2.5672, "step": 79610 }, { "epoch": 0.15862074461303072, "grad_norm": 0.15748925507068634, "learning_rate": 0.002, "loss": 2.5685, "step": 79620 }, { "epoch": 0.15864066683666964, "grad_norm": 0.16524866223335266, "learning_rate": 0.002, "loss": 2.5683, "step": 79630 }, { "epoch": 0.15866058906030855, "grad_norm": 0.13593930006027222, "learning_rate": 0.002, "loss": 2.5801, "step": 79640 }, { "epoch": 0.15868051128394747, "grad_norm": 0.1476747840642929, "learning_rate": 0.002, "loss": 2.5793, "step": 79650 }, { "epoch": 0.1587004335075864, "grad_norm": 0.18799446523189545, "learning_rate": 0.002, "loss": 2.5744, "step": 79660 }, { "epoch": 0.1587203557312253, "grad_norm": 0.15901872515678406, "learning_rate": 0.002, "loss": 2.5605, "step": 79670 }, { "epoch": 0.1587402779548642, "grad_norm": 0.18893617391586304, "learning_rate": 0.002, "loss": 2.5732, "step": 79680 }, { "epoch": 0.15876020017850312, "grad_norm": 0.15279781818389893, "learning_rate": 0.002, "loss": 2.571, "step": 79690 }, { "epoch": 0.15878012240214204, "grad_norm": 0.13174016773700714, "learning_rate": 0.002, "loss": 2.5695, "step": 79700 }, { "epoch": 0.15880004462578096, "grad_norm": 0.19243282079696655, "learning_rate": 0.002, "loss": 2.5668, "step": 79710 }, { "epoch": 0.15881996684941987, "grad_norm": 0.14562824368476868, "learning_rate": 0.002, "loss": 2.5759, "step": 79720 }, { "epoch": 0.15883988907305877, "grad_norm": 0.13726192712783813, "learning_rate": 0.002, "loss": 2.5765, "step": 79730 }, { "epoch": 0.15885981129669768, "grad_norm": 0.20254087448120117, "learning_rate": 0.002, "loss": 2.5787, "step": 79740 }, { "epoch": 0.1588797335203366, "grad_norm": 0.16841663420200348, "learning_rate": 0.002, "loss": 2.5626, "step": 79750 }, { "epoch": 0.15889965574397552, "grad_norm": 0.138205885887146, "learning_rate": 0.002, "loss": 2.5805, "step": 79760 }, { "epoch": 0.15891957796761444, "grad_norm": 0.16607075929641724, "learning_rate": 0.002, "loss": 2.5665, "step": 79770 }, { "epoch": 0.15893950019125336, "grad_norm": 0.1608564853668213, "learning_rate": 0.002, "loss": 2.573, "step": 79780 }, { "epoch": 0.15895942241489225, "grad_norm": 0.13026027381420135, "learning_rate": 0.002, "loss": 2.5596, "step": 79790 }, { "epoch": 0.15897934463853117, "grad_norm": 0.14586667716503143, "learning_rate": 0.002, "loss": 2.5745, "step": 79800 }, { "epoch": 0.15899926686217009, "grad_norm": 0.17617258429527283, "learning_rate": 0.002, "loss": 2.5733, "step": 79810 }, { "epoch": 0.159019189085809, "grad_norm": 0.1577731966972351, "learning_rate": 0.002, "loss": 2.5721, "step": 79820 }, { "epoch": 0.15903911130944792, "grad_norm": 0.15229979157447815, "learning_rate": 0.002, "loss": 2.5592, "step": 79830 }, { "epoch": 0.15905903353308684, "grad_norm": 0.1870598942041397, "learning_rate": 0.002, "loss": 2.5648, "step": 79840 }, { "epoch": 0.15907895575672573, "grad_norm": 0.17862044274806976, "learning_rate": 0.002, "loss": 2.5678, "step": 79850 }, { "epoch": 0.15909887798036465, "grad_norm": 0.18349355459213257, "learning_rate": 0.002, "loss": 2.5596, "step": 79860 }, { "epoch": 0.15911880020400357, "grad_norm": 0.14472700655460358, "learning_rate": 0.002, "loss": 2.5678, "step": 79870 }, { "epoch": 0.1591387224276425, "grad_norm": 0.15918149054050446, "learning_rate": 0.002, "loss": 2.5745, "step": 79880 }, { "epoch": 0.1591586446512814, "grad_norm": 0.17683619260787964, "learning_rate": 0.002, "loss": 2.5509, "step": 79890 }, { "epoch": 0.15917856687492032, "grad_norm": 0.16660508513450623, "learning_rate": 0.002, "loss": 2.5691, "step": 79900 }, { "epoch": 0.15919848909855921, "grad_norm": 0.14511863887310028, "learning_rate": 0.002, "loss": 2.5764, "step": 79910 }, { "epoch": 0.15921841132219813, "grad_norm": 0.1475403904914856, "learning_rate": 0.002, "loss": 2.5711, "step": 79920 }, { "epoch": 0.15923833354583705, "grad_norm": 0.16786542534828186, "learning_rate": 0.002, "loss": 2.5884, "step": 79930 }, { "epoch": 0.15925825576947597, "grad_norm": 0.16929340362548828, "learning_rate": 0.002, "loss": 2.5653, "step": 79940 }, { "epoch": 0.1592781779931149, "grad_norm": 0.20607930421829224, "learning_rate": 0.002, "loss": 2.5617, "step": 79950 }, { "epoch": 0.15929810021675378, "grad_norm": 0.17303499579429626, "learning_rate": 0.002, "loss": 2.5897, "step": 79960 }, { "epoch": 0.1593180224403927, "grad_norm": 0.17256738245487213, "learning_rate": 0.002, "loss": 2.554, "step": 79970 }, { "epoch": 0.15933794466403162, "grad_norm": 0.1639208048582077, "learning_rate": 0.002, "loss": 2.5747, "step": 79980 }, { "epoch": 0.15935786688767054, "grad_norm": 0.13374194502830505, "learning_rate": 0.002, "loss": 2.5626, "step": 79990 }, { "epoch": 0.15937778911130945, "grad_norm": 0.18860377371311188, "learning_rate": 0.002, "loss": 2.5614, "step": 80000 }, { "epoch": 0.15939771133494837, "grad_norm": 0.14358995854854584, "learning_rate": 0.002, "loss": 2.5736, "step": 80010 }, { "epoch": 0.15941763355858726, "grad_norm": 0.18091264367103577, "learning_rate": 0.002, "loss": 2.5679, "step": 80020 }, { "epoch": 0.15943755578222618, "grad_norm": 0.21073326468467712, "learning_rate": 0.002, "loss": 2.5567, "step": 80030 }, { "epoch": 0.1594574780058651, "grad_norm": 0.14739620685577393, "learning_rate": 0.002, "loss": 2.57, "step": 80040 }, { "epoch": 0.15947740022950402, "grad_norm": 0.18585336208343506, "learning_rate": 0.002, "loss": 2.567, "step": 80050 }, { "epoch": 0.15949732245314294, "grad_norm": 0.14872203767299652, "learning_rate": 0.002, "loss": 2.5621, "step": 80060 }, { "epoch": 0.15951724467678186, "grad_norm": 0.15235865116119385, "learning_rate": 0.002, "loss": 2.563, "step": 80070 }, { "epoch": 0.15953716690042075, "grad_norm": 0.14210526645183563, "learning_rate": 0.002, "loss": 2.5733, "step": 80080 }, { "epoch": 0.15955708912405966, "grad_norm": 0.19350139796733856, "learning_rate": 0.002, "loss": 2.5634, "step": 80090 }, { "epoch": 0.15957701134769858, "grad_norm": 0.15125778317451477, "learning_rate": 0.002, "loss": 2.5696, "step": 80100 }, { "epoch": 0.1595969335713375, "grad_norm": 0.16646672785282135, "learning_rate": 0.002, "loss": 2.5619, "step": 80110 }, { "epoch": 0.15961685579497642, "grad_norm": 0.1922224760055542, "learning_rate": 0.002, "loss": 2.58, "step": 80120 }, { "epoch": 0.15963677801861534, "grad_norm": 0.1462346464395523, "learning_rate": 0.002, "loss": 2.5688, "step": 80130 }, { "epoch": 0.15965670024225423, "grad_norm": 0.1444501429796219, "learning_rate": 0.002, "loss": 2.5722, "step": 80140 }, { "epoch": 0.15967662246589315, "grad_norm": 0.18619026243686676, "learning_rate": 0.002, "loss": 2.5579, "step": 80150 }, { "epoch": 0.15969654468953207, "grad_norm": 0.1578066349029541, "learning_rate": 0.002, "loss": 2.5806, "step": 80160 }, { "epoch": 0.15971646691317098, "grad_norm": 0.17651252448558807, "learning_rate": 0.002, "loss": 2.5702, "step": 80170 }, { "epoch": 0.1597363891368099, "grad_norm": 0.17305876314640045, "learning_rate": 0.002, "loss": 2.5634, "step": 80180 }, { "epoch": 0.15975631136044882, "grad_norm": 0.1457013189792633, "learning_rate": 0.002, "loss": 2.5732, "step": 80190 }, { "epoch": 0.1597762335840877, "grad_norm": 0.1891794055700302, "learning_rate": 0.002, "loss": 2.5788, "step": 80200 }, { "epoch": 0.15979615580772663, "grad_norm": 0.1673521250486374, "learning_rate": 0.002, "loss": 2.5566, "step": 80210 }, { "epoch": 0.15981607803136555, "grad_norm": 0.17343387007713318, "learning_rate": 0.002, "loss": 2.5701, "step": 80220 }, { "epoch": 0.15983600025500447, "grad_norm": 0.1878683716058731, "learning_rate": 0.002, "loss": 2.5635, "step": 80230 }, { "epoch": 0.1598559224786434, "grad_norm": 0.1430187076330185, "learning_rate": 0.002, "loss": 2.5661, "step": 80240 }, { "epoch": 0.15987584470228228, "grad_norm": 0.14384257793426514, "learning_rate": 0.002, "loss": 2.5572, "step": 80250 }, { "epoch": 0.1598957669259212, "grad_norm": 0.18153516948223114, "learning_rate": 0.002, "loss": 2.5776, "step": 80260 }, { "epoch": 0.15991568914956011, "grad_norm": 0.14555856585502625, "learning_rate": 0.002, "loss": 2.5576, "step": 80270 }, { "epoch": 0.15993561137319903, "grad_norm": 0.1473679393529892, "learning_rate": 0.002, "loss": 2.5653, "step": 80280 }, { "epoch": 0.15995553359683795, "grad_norm": 0.17002612352371216, "learning_rate": 0.002, "loss": 2.5728, "step": 80290 }, { "epoch": 0.15997545582047687, "grad_norm": 0.1948019564151764, "learning_rate": 0.002, "loss": 2.5862, "step": 80300 }, { "epoch": 0.15999537804411576, "grad_norm": 0.1718120127916336, "learning_rate": 0.002, "loss": 2.5655, "step": 80310 }, { "epoch": 0.16001530026775468, "grad_norm": 0.14906252920627594, "learning_rate": 0.002, "loss": 2.5616, "step": 80320 }, { "epoch": 0.1600352224913936, "grad_norm": 0.14576934278011322, "learning_rate": 0.002, "loss": 2.5502, "step": 80330 }, { "epoch": 0.16005514471503252, "grad_norm": 0.21040132641792297, "learning_rate": 0.002, "loss": 2.5643, "step": 80340 }, { "epoch": 0.16007506693867143, "grad_norm": 0.14411072432994843, "learning_rate": 0.002, "loss": 2.5626, "step": 80350 }, { "epoch": 0.16009498916231035, "grad_norm": 0.16904237866401672, "learning_rate": 0.002, "loss": 2.5606, "step": 80360 }, { "epoch": 0.16011491138594924, "grad_norm": 0.18066120147705078, "learning_rate": 0.002, "loss": 2.5718, "step": 80370 }, { "epoch": 0.16013483360958816, "grad_norm": 0.14983826875686646, "learning_rate": 0.002, "loss": 2.5609, "step": 80380 }, { "epoch": 0.16015475583322708, "grad_norm": 0.1605643630027771, "learning_rate": 0.002, "loss": 2.5931, "step": 80390 }, { "epoch": 0.160174678056866, "grad_norm": 0.15102843940258026, "learning_rate": 0.002, "loss": 2.563, "step": 80400 }, { "epoch": 0.16019460028050492, "grad_norm": 0.17635148763656616, "learning_rate": 0.002, "loss": 2.5717, "step": 80410 }, { "epoch": 0.16021452250414384, "grad_norm": 0.15365581214427948, "learning_rate": 0.002, "loss": 2.5688, "step": 80420 }, { "epoch": 0.16023444472778273, "grad_norm": 0.19173677265644073, "learning_rate": 0.002, "loss": 2.5632, "step": 80430 }, { "epoch": 0.16025436695142165, "grad_norm": 0.15214921534061432, "learning_rate": 0.002, "loss": 2.5805, "step": 80440 }, { "epoch": 0.16027428917506056, "grad_norm": 0.16345666348934174, "learning_rate": 0.002, "loss": 2.5705, "step": 80450 }, { "epoch": 0.16029421139869948, "grad_norm": 0.16654253005981445, "learning_rate": 0.002, "loss": 2.5744, "step": 80460 }, { "epoch": 0.1603141336223384, "grad_norm": 0.16577006876468658, "learning_rate": 0.002, "loss": 2.5698, "step": 80470 }, { "epoch": 0.1603340558459773, "grad_norm": 0.14713089168071747, "learning_rate": 0.002, "loss": 2.5828, "step": 80480 }, { "epoch": 0.1603539780696162, "grad_norm": 0.15908680856227875, "learning_rate": 0.002, "loss": 2.572, "step": 80490 }, { "epoch": 0.16037390029325513, "grad_norm": 0.15978294610977173, "learning_rate": 0.002, "loss": 2.5896, "step": 80500 }, { "epoch": 0.16039382251689405, "grad_norm": 0.13769787549972534, "learning_rate": 0.002, "loss": 2.5645, "step": 80510 }, { "epoch": 0.16041374474053297, "grad_norm": 0.1460118293762207, "learning_rate": 0.002, "loss": 2.567, "step": 80520 }, { "epoch": 0.16043366696417188, "grad_norm": 0.1512690633535385, "learning_rate": 0.002, "loss": 2.5628, "step": 80530 }, { "epoch": 0.16045358918781077, "grad_norm": 0.1578173041343689, "learning_rate": 0.002, "loss": 2.5658, "step": 80540 }, { "epoch": 0.1604735114114497, "grad_norm": 0.18872597813606262, "learning_rate": 0.002, "loss": 2.5573, "step": 80550 }, { "epoch": 0.1604934336350886, "grad_norm": 0.1494522988796234, "learning_rate": 0.002, "loss": 2.5502, "step": 80560 }, { "epoch": 0.16051335585872753, "grad_norm": 0.15336887538433075, "learning_rate": 0.002, "loss": 2.5569, "step": 80570 }, { "epoch": 0.16053327808236645, "grad_norm": 0.18798039853572845, "learning_rate": 0.002, "loss": 2.5722, "step": 80580 }, { "epoch": 0.16055320030600537, "grad_norm": 0.1685478687286377, "learning_rate": 0.002, "loss": 2.57, "step": 80590 }, { "epoch": 0.16057312252964426, "grad_norm": 0.1618262231349945, "learning_rate": 0.002, "loss": 2.572, "step": 80600 }, { "epoch": 0.16059304475328318, "grad_norm": 0.18306082487106323, "learning_rate": 0.002, "loss": 2.5795, "step": 80610 }, { "epoch": 0.1606129669769221, "grad_norm": 0.1403096318244934, "learning_rate": 0.002, "loss": 2.5644, "step": 80620 }, { "epoch": 0.160632889200561, "grad_norm": 0.1769614815711975, "learning_rate": 0.002, "loss": 2.5824, "step": 80630 }, { "epoch": 0.16065281142419993, "grad_norm": 0.18199455738067627, "learning_rate": 0.002, "loss": 2.5767, "step": 80640 }, { "epoch": 0.16067273364783885, "grad_norm": 0.1666662096977234, "learning_rate": 0.002, "loss": 2.5839, "step": 80650 }, { "epoch": 0.16069265587147774, "grad_norm": 0.16391074657440186, "learning_rate": 0.002, "loss": 2.573, "step": 80660 }, { "epoch": 0.16071257809511666, "grad_norm": 0.14537353813648224, "learning_rate": 0.002, "loss": 2.5741, "step": 80670 }, { "epoch": 0.16073250031875558, "grad_norm": 0.19330061972141266, "learning_rate": 0.002, "loss": 2.5713, "step": 80680 }, { "epoch": 0.1607524225423945, "grad_norm": 0.15957313776016235, "learning_rate": 0.002, "loss": 2.5702, "step": 80690 }, { "epoch": 0.16077234476603341, "grad_norm": 0.15754613280296326, "learning_rate": 0.002, "loss": 2.5563, "step": 80700 }, { "epoch": 0.1607922669896723, "grad_norm": 0.160816490650177, "learning_rate": 0.002, "loss": 2.5551, "step": 80710 }, { "epoch": 0.16081218921331122, "grad_norm": 0.15843379497528076, "learning_rate": 0.002, "loss": 2.5624, "step": 80720 }, { "epoch": 0.16083211143695014, "grad_norm": 0.16467538475990295, "learning_rate": 0.002, "loss": 2.5781, "step": 80730 }, { "epoch": 0.16085203366058906, "grad_norm": 0.15985918045043945, "learning_rate": 0.002, "loss": 2.5817, "step": 80740 }, { "epoch": 0.16087195588422798, "grad_norm": 0.177053764462471, "learning_rate": 0.002, "loss": 2.5691, "step": 80750 }, { "epoch": 0.1608918781078669, "grad_norm": 0.17266467213630676, "learning_rate": 0.002, "loss": 2.5719, "step": 80760 }, { "epoch": 0.1609118003315058, "grad_norm": 0.1425759643316269, "learning_rate": 0.002, "loss": 2.5555, "step": 80770 }, { "epoch": 0.1609317225551447, "grad_norm": 0.14542236924171448, "learning_rate": 0.002, "loss": 2.5856, "step": 80780 }, { "epoch": 0.16095164477878363, "grad_norm": 0.16678783297538757, "learning_rate": 0.002, "loss": 2.5794, "step": 80790 }, { "epoch": 0.16097156700242254, "grad_norm": 0.18486659228801727, "learning_rate": 0.002, "loss": 2.5661, "step": 80800 }, { "epoch": 0.16099148922606146, "grad_norm": 0.16131193935871124, "learning_rate": 0.002, "loss": 2.5706, "step": 80810 }, { "epoch": 0.16101141144970038, "grad_norm": 0.14146248996257782, "learning_rate": 0.002, "loss": 2.5765, "step": 80820 }, { "epoch": 0.16103133367333927, "grad_norm": 0.13116255402565002, "learning_rate": 0.002, "loss": 2.5677, "step": 80830 }, { "epoch": 0.1610512558969782, "grad_norm": 0.16376112401485443, "learning_rate": 0.002, "loss": 2.5705, "step": 80840 }, { "epoch": 0.1610711781206171, "grad_norm": 0.15543882548809052, "learning_rate": 0.002, "loss": 2.5612, "step": 80850 }, { "epoch": 0.16109110034425603, "grad_norm": 0.158160001039505, "learning_rate": 0.002, "loss": 2.5567, "step": 80860 }, { "epoch": 0.16111102256789495, "grad_norm": 0.19208131730556488, "learning_rate": 0.002, "loss": 2.5737, "step": 80870 }, { "epoch": 0.16113094479153386, "grad_norm": 0.14489498734474182, "learning_rate": 0.002, "loss": 2.5628, "step": 80880 }, { "epoch": 0.16115086701517276, "grad_norm": 0.15609462559223175, "learning_rate": 0.002, "loss": 2.5769, "step": 80890 }, { "epoch": 0.16117078923881167, "grad_norm": 0.1924210786819458, "learning_rate": 0.002, "loss": 2.5721, "step": 80900 }, { "epoch": 0.1611907114624506, "grad_norm": 0.14849483966827393, "learning_rate": 0.002, "loss": 2.5926, "step": 80910 }, { "epoch": 0.1612106336860895, "grad_norm": 0.16674315929412842, "learning_rate": 0.002, "loss": 2.5745, "step": 80920 }, { "epoch": 0.16123055590972843, "grad_norm": 0.3316620886325836, "learning_rate": 0.002, "loss": 2.5719, "step": 80930 }, { "epoch": 0.16125047813336735, "grad_norm": 0.160212442278862, "learning_rate": 0.002, "loss": 2.5804, "step": 80940 }, { "epoch": 0.16127040035700624, "grad_norm": 0.2121664434671402, "learning_rate": 0.002, "loss": 2.5668, "step": 80950 }, { "epoch": 0.16129032258064516, "grad_norm": 0.12608398497104645, "learning_rate": 0.002, "loss": 2.5646, "step": 80960 }, { "epoch": 0.16131024480428408, "grad_norm": 0.14839239418506622, "learning_rate": 0.002, "loss": 2.5708, "step": 80970 }, { "epoch": 0.161330167027923, "grad_norm": 0.16726620495319366, "learning_rate": 0.002, "loss": 2.5831, "step": 80980 }, { "epoch": 0.1613500892515619, "grad_norm": 0.16882756352424622, "learning_rate": 0.002, "loss": 2.5772, "step": 80990 }, { "epoch": 0.1613700114752008, "grad_norm": 0.13216069340705872, "learning_rate": 0.002, "loss": 2.5706, "step": 81000 }, { "epoch": 0.16138993369883972, "grad_norm": 0.18633022904396057, "learning_rate": 0.002, "loss": 2.5879, "step": 81010 }, { "epoch": 0.16140985592247864, "grad_norm": 0.2106911987066269, "learning_rate": 0.002, "loss": 2.5933, "step": 81020 }, { "epoch": 0.16142977814611756, "grad_norm": 0.1493290662765503, "learning_rate": 0.002, "loss": 2.566, "step": 81030 }, { "epoch": 0.16144970036975648, "grad_norm": 0.1495373547077179, "learning_rate": 0.002, "loss": 2.5775, "step": 81040 }, { "epoch": 0.1614696225933954, "grad_norm": 0.1684562861919403, "learning_rate": 0.002, "loss": 2.5718, "step": 81050 }, { "epoch": 0.1614895448170343, "grad_norm": 0.16145974397659302, "learning_rate": 0.002, "loss": 2.5772, "step": 81060 }, { "epoch": 0.1615094670406732, "grad_norm": 0.1716577410697937, "learning_rate": 0.002, "loss": 2.5712, "step": 81070 }, { "epoch": 0.16152938926431212, "grad_norm": 0.17635871469974518, "learning_rate": 0.002, "loss": 2.5822, "step": 81080 }, { "epoch": 0.16154931148795104, "grad_norm": 0.16539672017097473, "learning_rate": 0.002, "loss": 2.5628, "step": 81090 }, { "epoch": 0.16156923371158996, "grad_norm": 0.1679617017507553, "learning_rate": 0.002, "loss": 2.5794, "step": 81100 }, { "epoch": 0.16158915593522888, "grad_norm": 0.1368364691734314, "learning_rate": 0.002, "loss": 2.5799, "step": 81110 }, { "epoch": 0.16160907815886777, "grad_norm": 0.21645377576351166, "learning_rate": 0.002, "loss": 2.5622, "step": 81120 }, { "epoch": 0.1616290003825067, "grad_norm": 0.1654212325811386, "learning_rate": 0.002, "loss": 2.5792, "step": 81130 }, { "epoch": 0.1616489226061456, "grad_norm": 0.17733940482139587, "learning_rate": 0.002, "loss": 2.574, "step": 81140 }, { "epoch": 0.16166884482978452, "grad_norm": 0.14483126997947693, "learning_rate": 0.002, "loss": 2.5731, "step": 81150 }, { "epoch": 0.16168876705342344, "grad_norm": 0.16326193511486053, "learning_rate": 0.002, "loss": 2.5736, "step": 81160 }, { "epoch": 0.16170868927706236, "grad_norm": 0.15289820730686188, "learning_rate": 0.002, "loss": 2.5673, "step": 81170 }, { "epoch": 0.16172861150070125, "grad_norm": 0.1700149029493332, "learning_rate": 0.002, "loss": 2.568, "step": 81180 }, { "epoch": 0.16174853372434017, "grad_norm": 0.1628590077161789, "learning_rate": 0.002, "loss": 2.5773, "step": 81190 }, { "epoch": 0.1617684559479791, "grad_norm": 0.191719651222229, "learning_rate": 0.002, "loss": 2.5606, "step": 81200 }, { "epoch": 0.161788378171618, "grad_norm": 0.18764756619930267, "learning_rate": 0.002, "loss": 2.5727, "step": 81210 }, { "epoch": 0.16180830039525693, "grad_norm": 0.1673259437084198, "learning_rate": 0.002, "loss": 2.5825, "step": 81220 }, { "epoch": 0.16182822261889582, "grad_norm": 0.15331189334392548, "learning_rate": 0.002, "loss": 2.5745, "step": 81230 }, { "epoch": 0.16184814484253474, "grad_norm": 0.1558113992214203, "learning_rate": 0.002, "loss": 2.578, "step": 81240 }, { "epoch": 0.16186806706617365, "grad_norm": 0.17664624750614166, "learning_rate": 0.002, "loss": 2.5595, "step": 81250 }, { "epoch": 0.16188798928981257, "grad_norm": 0.15812955796718597, "learning_rate": 0.002, "loss": 2.5639, "step": 81260 }, { "epoch": 0.1619079115134515, "grad_norm": 0.16165882349014282, "learning_rate": 0.002, "loss": 2.5627, "step": 81270 }, { "epoch": 0.1619278337370904, "grad_norm": 0.1669301986694336, "learning_rate": 0.002, "loss": 2.5763, "step": 81280 }, { "epoch": 0.1619477559607293, "grad_norm": 0.1757449358701706, "learning_rate": 0.002, "loss": 2.5807, "step": 81290 }, { "epoch": 0.16196767818436822, "grad_norm": 0.12736546993255615, "learning_rate": 0.002, "loss": 2.5834, "step": 81300 }, { "epoch": 0.16198760040800714, "grad_norm": 0.16246964037418365, "learning_rate": 0.002, "loss": 2.5713, "step": 81310 }, { "epoch": 0.16200752263164606, "grad_norm": 0.1569730043411255, "learning_rate": 0.002, "loss": 2.5682, "step": 81320 }, { "epoch": 0.16202744485528497, "grad_norm": 0.22059790790081024, "learning_rate": 0.002, "loss": 2.5715, "step": 81330 }, { "epoch": 0.1620473670789239, "grad_norm": 0.13032270967960358, "learning_rate": 0.002, "loss": 2.5761, "step": 81340 }, { "epoch": 0.16206728930256278, "grad_norm": 0.158677339553833, "learning_rate": 0.002, "loss": 2.5416, "step": 81350 }, { "epoch": 0.1620872115262017, "grad_norm": 0.1677256077528, "learning_rate": 0.002, "loss": 2.5711, "step": 81360 }, { "epoch": 0.16210713374984062, "grad_norm": 0.14229045808315277, "learning_rate": 0.002, "loss": 2.5489, "step": 81370 }, { "epoch": 0.16212705597347954, "grad_norm": 0.18335694074630737, "learning_rate": 0.002, "loss": 2.5648, "step": 81380 }, { "epoch": 0.16214697819711846, "grad_norm": 0.15352343022823334, "learning_rate": 0.002, "loss": 2.5695, "step": 81390 }, { "epoch": 0.16216690042075738, "grad_norm": 0.16551879048347473, "learning_rate": 0.002, "loss": 2.5584, "step": 81400 }, { "epoch": 0.16218682264439627, "grad_norm": 0.17825037240982056, "learning_rate": 0.002, "loss": 2.5794, "step": 81410 }, { "epoch": 0.16220674486803519, "grad_norm": 0.15651826560497284, "learning_rate": 0.002, "loss": 2.5551, "step": 81420 }, { "epoch": 0.1622266670916741, "grad_norm": 0.20809337496757507, "learning_rate": 0.002, "loss": 2.5843, "step": 81430 }, { "epoch": 0.16224658931531302, "grad_norm": 0.16904312372207642, "learning_rate": 0.002, "loss": 2.5692, "step": 81440 }, { "epoch": 0.16226651153895194, "grad_norm": 0.1682640016078949, "learning_rate": 0.002, "loss": 2.5815, "step": 81450 }, { "epoch": 0.16228643376259083, "grad_norm": 0.18409566581249237, "learning_rate": 0.002, "loss": 2.581, "step": 81460 }, { "epoch": 0.16230635598622975, "grad_norm": 0.15261812508106232, "learning_rate": 0.002, "loss": 2.5615, "step": 81470 }, { "epoch": 0.16232627820986867, "grad_norm": 0.16962897777557373, "learning_rate": 0.002, "loss": 2.5763, "step": 81480 }, { "epoch": 0.1623462004335076, "grad_norm": 0.17475049197673798, "learning_rate": 0.002, "loss": 2.5768, "step": 81490 }, { "epoch": 0.1623661226571465, "grad_norm": 0.1703285425901413, "learning_rate": 0.002, "loss": 2.5762, "step": 81500 }, { "epoch": 0.16238604488078542, "grad_norm": 0.15892823040485382, "learning_rate": 0.002, "loss": 2.5779, "step": 81510 }, { "epoch": 0.16240596710442431, "grad_norm": 0.16303099691867828, "learning_rate": 0.002, "loss": 2.5835, "step": 81520 }, { "epoch": 0.16242588932806323, "grad_norm": 0.14195838570594788, "learning_rate": 0.002, "loss": 2.562, "step": 81530 }, { "epoch": 0.16244581155170215, "grad_norm": 0.17686676979064941, "learning_rate": 0.002, "loss": 2.5708, "step": 81540 }, { "epoch": 0.16246573377534107, "grad_norm": 0.1534966081380844, "learning_rate": 0.002, "loss": 2.5774, "step": 81550 }, { "epoch": 0.16248565599898, "grad_norm": 0.17547188699245453, "learning_rate": 0.002, "loss": 2.5662, "step": 81560 }, { "epoch": 0.1625055782226189, "grad_norm": 0.14563947916030884, "learning_rate": 0.002, "loss": 2.5644, "step": 81570 }, { "epoch": 0.1625255004462578, "grad_norm": 0.15720736980438232, "learning_rate": 0.002, "loss": 2.5736, "step": 81580 }, { "epoch": 0.16254542266989672, "grad_norm": 0.1401984840631485, "learning_rate": 0.002, "loss": 2.5626, "step": 81590 }, { "epoch": 0.16256534489353563, "grad_norm": 0.16519568860530853, "learning_rate": 0.002, "loss": 2.5701, "step": 81600 }, { "epoch": 0.16258526711717455, "grad_norm": 0.17683343589305878, "learning_rate": 0.002, "loss": 2.5793, "step": 81610 }, { "epoch": 0.16260518934081347, "grad_norm": 0.14285047352313995, "learning_rate": 0.002, "loss": 2.5683, "step": 81620 }, { "epoch": 0.1626251115644524, "grad_norm": 0.1871613711118698, "learning_rate": 0.002, "loss": 2.5668, "step": 81630 }, { "epoch": 0.16264503378809128, "grad_norm": 0.14317640662193298, "learning_rate": 0.002, "loss": 2.571, "step": 81640 }, { "epoch": 0.1626649560117302, "grad_norm": 0.13125833868980408, "learning_rate": 0.002, "loss": 2.5718, "step": 81650 }, { "epoch": 0.16268487823536912, "grad_norm": 0.15071608126163483, "learning_rate": 0.002, "loss": 2.5847, "step": 81660 }, { "epoch": 0.16270480045900804, "grad_norm": 0.16033366322517395, "learning_rate": 0.002, "loss": 2.569, "step": 81670 }, { "epoch": 0.16272472268264696, "grad_norm": 0.1611219346523285, "learning_rate": 0.002, "loss": 2.5572, "step": 81680 }, { "epoch": 0.16274464490628587, "grad_norm": 0.14511732757091522, "learning_rate": 0.002, "loss": 2.569, "step": 81690 }, { "epoch": 0.16276456712992476, "grad_norm": 0.17370106279850006, "learning_rate": 0.002, "loss": 2.5774, "step": 81700 }, { "epoch": 0.16278448935356368, "grad_norm": 0.1702999770641327, "learning_rate": 0.002, "loss": 2.5656, "step": 81710 }, { "epoch": 0.1628044115772026, "grad_norm": 0.1574431210756302, "learning_rate": 0.002, "loss": 2.5572, "step": 81720 }, { "epoch": 0.16282433380084152, "grad_norm": 0.16548927128314972, "learning_rate": 0.002, "loss": 2.5589, "step": 81730 }, { "epoch": 0.16284425602448044, "grad_norm": 0.17752642929553986, "learning_rate": 0.002, "loss": 2.5741, "step": 81740 }, { "epoch": 0.16286417824811933, "grad_norm": 0.16630716621875763, "learning_rate": 0.002, "loss": 2.5583, "step": 81750 }, { "epoch": 0.16288410047175825, "grad_norm": 0.15239255130290985, "learning_rate": 0.002, "loss": 2.5575, "step": 81760 }, { "epoch": 0.16290402269539717, "grad_norm": 0.157831609249115, "learning_rate": 0.002, "loss": 2.5686, "step": 81770 }, { "epoch": 0.16292394491903608, "grad_norm": 0.17602895200252533, "learning_rate": 0.002, "loss": 2.583, "step": 81780 }, { "epoch": 0.162943867142675, "grad_norm": 0.14567327499389648, "learning_rate": 0.002, "loss": 2.585, "step": 81790 }, { "epoch": 0.16296378936631392, "grad_norm": 0.14968977868556976, "learning_rate": 0.002, "loss": 2.5605, "step": 81800 }, { "epoch": 0.1629837115899528, "grad_norm": 0.14281339943408966, "learning_rate": 0.002, "loss": 2.5643, "step": 81810 }, { "epoch": 0.16300363381359173, "grad_norm": 0.1736707240343094, "learning_rate": 0.002, "loss": 2.5663, "step": 81820 }, { "epoch": 0.16302355603723065, "grad_norm": 0.1548752337694168, "learning_rate": 0.002, "loss": 2.5684, "step": 81830 }, { "epoch": 0.16304347826086957, "grad_norm": 0.1724936068058014, "learning_rate": 0.002, "loss": 2.5706, "step": 81840 }, { "epoch": 0.16306340048450849, "grad_norm": 0.14735257625579834, "learning_rate": 0.002, "loss": 2.573, "step": 81850 }, { "epoch": 0.1630833227081474, "grad_norm": 0.15701402723789215, "learning_rate": 0.002, "loss": 2.5541, "step": 81860 }, { "epoch": 0.1631032449317863, "grad_norm": 0.16728894412517548, "learning_rate": 0.002, "loss": 2.5815, "step": 81870 }, { "epoch": 0.1631231671554252, "grad_norm": 0.13711713254451752, "learning_rate": 0.002, "loss": 2.574, "step": 81880 }, { "epoch": 0.16314308937906413, "grad_norm": 0.17206068336963654, "learning_rate": 0.002, "loss": 2.5711, "step": 81890 }, { "epoch": 0.16316301160270305, "grad_norm": 0.17028863728046417, "learning_rate": 0.002, "loss": 2.5928, "step": 81900 }, { "epoch": 0.16318293382634197, "grad_norm": 0.16176839172840118, "learning_rate": 0.002, "loss": 2.5713, "step": 81910 }, { "epoch": 0.1632028560499809, "grad_norm": 0.15417103469371796, "learning_rate": 0.002, "loss": 2.5699, "step": 81920 }, { "epoch": 0.16322277827361978, "grad_norm": 0.16903430223464966, "learning_rate": 0.002, "loss": 2.5773, "step": 81930 }, { "epoch": 0.1632427004972587, "grad_norm": 0.1340823620557785, "learning_rate": 0.002, "loss": 2.5701, "step": 81940 }, { "epoch": 0.16326262272089762, "grad_norm": 0.2132549285888672, "learning_rate": 0.002, "loss": 2.5532, "step": 81950 }, { "epoch": 0.16328254494453653, "grad_norm": 0.19589722156524658, "learning_rate": 0.002, "loss": 2.5751, "step": 81960 }, { "epoch": 0.16330246716817545, "grad_norm": 0.19293993711471558, "learning_rate": 0.002, "loss": 2.5662, "step": 81970 }, { "epoch": 0.16332238939181434, "grad_norm": 0.17189675569534302, "learning_rate": 0.002, "loss": 2.5658, "step": 81980 }, { "epoch": 0.16334231161545326, "grad_norm": 0.1543211191892624, "learning_rate": 0.002, "loss": 2.5612, "step": 81990 }, { "epoch": 0.16336223383909218, "grad_norm": 0.17253966629505157, "learning_rate": 0.002, "loss": 2.577, "step": 82000 }, { "epoch": 0.1633821560627311, "grad_norm": 0.1621285229921341, "learning_rate": 0.002, "loss": 2.5661, "step": 82010 }, { "epoch": 0.16340207828637002, "grad_norm": 0.19531993567943573, "learning_rate": 0.002, "loss": 2.5634, "step": 82020 }, { "epoch": 0.16342200051000894, "grad_norm": 0.16583357751369476, "learning_rate": 0.002, "loss": 2.5815, "step": 82030 }, { "epoch": 0.16344192273364783, "grad_norm": 0.15021610260009766, "learning_rate": 0.002, "loss": 2.5643, "step": 82040 }, { "epoch": 0.16346184495728674, "grad_norm": 0.1644100695848465, "learning_rate": 0.002, "loss": 2.5635, "step": 82050 }, { "epoch": 0.16348176718092566, "grad_norm": 0.1396777331829071, "learning_rate": 0.002, "loss": 2.5779, "step": 82060 }, { "epoch": 0.16350168940456458, "grad_norm": 0.16015556454658508, "learning_rate": 0.002, "loss": 2.5635, "step": 82070 }, { "epoch": 0.1635216116282035, "grad_norm": 0.16242867708206177, "learning_rate": 0.002, "loss": 2.5621, "step": 82080 }, { "epoch": 0.16354153385184242, "grad_norm": 0.18355652689933777, "learning_rate": 0.002, "loss": 2.5801, "step": 82090 }, { "epoch": 0.1635614560754813, "grad_norm": 0.15293747186660767, "learning_rate": 0.002, "loss": 2.5779, "step": 82100 }, { "epoch": 0.16358137829912023, "grad_norm": 0.1807098537683487, "learning_rate": 0.002, "loss": 2.5778, "step": 82110 }, { "epoch": 0.16360130052275915, "grad_norm": 0.17107513546943665, "learning_rate": 0.002, "loss": 2.5803, "step": 82120 }, { "epoch": 0.16362122274639807, "grad_norm": 0.17425356805324554, "learning_rate": 0.002, "loss": 2.5683, "step": 82130 }, { "epoch": 0.16364114497003698, "grad_norm": 0.15428946912288666, "learning_rate": 0.002, "loss": 2.577, "step": 82140 }, { "epoch": 0.1636610671936759, "grad_norm": 0.20754389464855194, "learning_rate": 0.002, "loss": 2.5752, "step": 82150 }, { "epoch": 0.1636809894173148, "grad_norm": 0.18769343197345734, "learning_rate": 0.002, "loss": 2.581, "step": 82160 }, { "epoch": 0.1637009116409537, "grad_norm": 0.1591642051935196, "learning_rate": 0.002, "loss": 2.5846, "step": 82170 }, { "epoch": 0.16372083386459263, "grad_norm": 0.1540181189775467, "learning_rate": 0.002, "loss": 2.5613, "step": 82180 }, { "epoch": 0.16374075608823155, "grad_norm": 0.18947775661945343, "learning_rate": 0.002, "loss": 2.5781, "step": 82190 }, { "epoch": 0.16376067831187047, "grad_norm": 0.1403030902147293, "learning_rate": 0.002, "loss": 2.5734, "step": 82200 }, { "epoch": 0.16378060053550939, "grad_norm": 0.16198930144309998, "learning_rate": 0.002, "loss": 2.5606, "step": 82210 }, { "epoch": 0.16380052275914828, "grad_norm": 0.21512751281261444, "learning_rate": 0.002, "loss": 2.5742, "step": 82220 }, { "epoch": 0.1638204449827872, "grad_norm": 0.17974334955215454, "learning_rate": 0.002, "loss": 2.5883, "step": 82230 }, { "epoch": 0.1638403672064261, "grad_norm": 0.1900114268064499, "learning_rate": 0.002, "loss": 2.5622, "step": 82240 }, { "epoch": 0.16386028943006503, "grad_norm": 0.16244341433048248, "learning_rate": 0.002, "loss": 2.5669, "step": 82250 }, { "epoch": 0.16388021165370395, "grad_norm": 0.14065396785736084, "learning_rate": 0.002, "loss": 2.5638, "step": 82260 }, { "epoch": 0.16390013387734284, "grad_norm": 0.1771843433380127, "learning_rate": 0.002, "loss": 2.5803, "step": 82270 }, { "epoch": 0.16392005610098176, "grad_norm": 0.2213711142539978, "learning_rate": 0.002, "loss": 2.5831, "step": 82280 }, { "epoch": 0.16393997832462068, "grad_norm": 0.1558905988931656, "learning_rate": 0.002, "loss": 2.568, "step": 82290 }, { "epoch": 0.1639599005482596, "grad_norm": 0.15274059772491455, "learning_rate": 0.002, "loss": 2.5663, "step": 82300 }, { "epoch": 0.16397982277189851, "grad_norm": 0.1642187237739563, "learning_rate": 0.002, "loss": 2.5656, "step": 82310 }, { "epoch": 0.16399974499553743, "grad_norm": 0.14649707078933716, "learning_rate": 0.002, "loss": 2.5682, "step": 82320 }, { "epoch": 0.16401966721917632, "grad_norm": 0.14862953126430511, "learning_rate": 0.002, "loss": 2.5654, "step": 82330 }, { "epoch": 0.16403958944281524, "grad_norm": 0.18553394079208374, "learning_rate": 0.002, "loss": 2.5628, "step": 82340 }, { "epoch": 0.16405951166645416, "grad_norm": 0.15191404521465302, "learning_rate": 0.002, "loss": 2.5803, "step": 82350 }, { "epoch": 0.16407943389009308, "grad_norm": 0.14715147018432617, "learning_rate": 0.002, "loss": 2.5616, "step": 82360 }, { "epoch": 0.164099356113732, "grad_norm": 0.15163558721542358, "learning_rate": 0.002, "loss": 2.5901, "step": 82370 }, { "epoch": 0.16411927833737092, "grad_norm": 0.1659148931503296, "learning_rate": 0.002, "loss": 2.5569, "step": 82380 }, { "epoch": 0.1641392005610098, "grad_norm": 0.1583521068096161, "learning_rate": 0.002, "loss": 2.5592, "step": 82390 }, { "epoch": 0.16415912278464873, "grad_norm": 0.17535139620304108, "learning_rate": 0.002, "loss": 2.5712, "step": 82400 }, { "epoch": 0.16417904500828764, "grad_norm": 0.17824292182922363, "learning_rate": 0.002, "loss": 2.5715, "step": 82410 }, { "epoch": 0.16419896723192656, "grad_norm": 0.1422661393880844, "learning_rate": 0.002, "loss": 2.5647, "step": 82420 }, { "epoch": 0.16421888945556548, "grad_norm": 0.17785421013832092, "learning_rate": 0.002, "loss": 2.5728, "step": 82430 }, { "epoch": 0.1642388116792044, "grad_norm": 0.17850077152252197, "learning_rate": 0.002, "loss": 2.5776, "step": 82440 }, { "epoch": 0.1642587339028433, "grad_norm": 0.15362273156642914, "learning_rate": 0.002, "loss": 2.5708, "step": 82450 }, { "epoch": 0.1642786561264822, "grad_norm": 0.15524183213710785, "learning_rate": 0.002, "loss": 2.5724, "step": 82460 }, { "epoch": 0.16429857835012113, "grad_norm": 0.14426471292972565, "learning_rate": 0.002, "loss": 2.5626, "step": 82470 }, { "epoch": 0.16431850057376005, "grad_norm": 0.1748981475830078, "learning_rate": 0.002, "loss": 2.5701, "step": 82480 }, { "epoch": 0.16433842279739896, "grad_norm": 0.1461622714996338, "learning_rate": 0.002, "loss": 2.5539, "step": 82490 }, { "epoch": 0.16435834502103785, "grad_norm": 0.18028415739536285, "learning_rate": 0.002, "loss": 2.565, "step": 82500 }, { "epoch": 0.16437826724467677, "grad_norm": 0.1680668443441391, "learning_rate": 0.002, "loss": 2.5586, "step": 82510 }, { "epoch": 0.1643981894683157, "grad_norm": 0.17700345814228058, "learning_rate": 0.002, "loss": 2.5669, "step": 82520 }, { "epoch": 0.1644181116919546, "grad_norm": 0.14592280983924866, "learning_rate": 0.002, "loss": 2.5736, "step": 82530 }, { "epoch": 0.16443803391559353, "grad_norm": 0.1434636116027832, "learning_rate": 0.002, "loss": 2.5684, "step": 82540 }, { "epoch": 0.16445795613923245, "grad_norm": 0.194308340549469, "learning_rate": 0.002, "loss": 2.5681, "step": 82550 }, { "epoch": 0.16447787836287134, "grad_norm": 0.14028500020503998, "learning_rate": 0.002, "loss": 2.574, "step": 82560 }, { "epoch": 0.16449780058651026, "grad_norm": 0.20648761093616486, "learning_rate": 0.002, "loss": 2.5719, "step": 82570 }, { "epoch": 0.16451772281014918, "grad_norm": 0.17030102014541626, "learning_rate": 0.002, "loss": 2.549, "step": 82580 }, { "epoch": 0.1645376450337881, "grad_norm": 0.15806223452091217, "learning_rate": 0.002, "loss": 2.5704, "step": 82590 }, { "epoch": 0.164557567257427, "grad_norm": 0.14130744338035583, "learning_rate": 0.002, "loss": 2.5696, "step": 82600 }, { "epoch": 0.16457748948106593, "grad_norm": 0.14727745950222015, "learning_rate": 0.002, "loss": 2.5667, "step": 82610 }, { "epoch": 0.16459741170470482, "grad_norm": 0.15428902208805084, "learning_rate": 0.002, "loss": 2.5703, "step": 82620 }, { "epoch": 0.16461733392834374, "grad_norm": 0.18417493999004364, "learning_rate": 0.002, "loss": 2.5757, "step": 82630 }, { "epoch": 0.16463725615198266, "grad_norm": 0.1862713247537613, "learning_rate": 0.002, "loss": 2.569, "step": 82640 }, { "epoch": 0.16465717837562158, "grad_norm": 0.13777852058410645, "learning_rate": 0.002, "loss": 2.5616, "step": 82650 }, { "epoch": 0.1646771005992605, "grad_norm": 0.1650768667459488, "learning_rate": 0.002, "loss": 2.5566, "step": 82660 }, { "epoch": 0.1646970228228994, "grad_norm": 0.18673191964626312, "learning_rate": 0.002, "loss": 2.5775, "step": 82670 }, { "epoch": 0.1647169450465383, "grad_norm": 0.15957359969615936, "learning_rate": 0.002, "loss": 2.5704, "step": 82680 }, { "epoch": 0.16473686727017722, "grad_norm": 0.1875907927751541, "learning_rate": 0.002, "loss": 2.5702, "step": 82690 }, { "epoch": 0.16475678949381614, "grad_norm": 0.17551691830158234, "learning_rate": 0.002, "loss": 2.5437, "step": 82700 }, { "epoch": 0.16477671171745506, "grad_norm": 0.1619766801595688, "learning_rate": 0.002, "loss": 2.5721, "step": 82710 }, { "epoch": 0.16479663394109398, "grad_norm": 0.1612657904624939, "learning_rate": 0.002, "loss": 2.5706, "step": 82720 }, { "epoch": 0.16481655616473287, "grad_norm": 0.17695029079914093, "learning_rate": 0.002, "loss": 2.5623, "step": 82730 }, { "epoch": 0.1648364783883718, "grad_norm": 0.1653468906879425, "learning_rate": 0.002, "loss": 2.5816, "step": 82740 }, { "epoch": 0.1648564006120107, "grad_norm": 0.15355809032917023, "learning_rate": 0.002, "loss": 2.5765, "step": 82750 }, { "epoch": 0.16487632283564962, "grad_norm": 0.1629849672317505, "learning_rate": 0.002, "loss": 2.5741, "step": 82760 }, { "epoch": 0.16489624505928854, "grad_norm": 0.1547549068927765, "learning_rate": 0.002, "loss": 2.5721, "step": 82770 }, { "epoch": 0.16491616728292746, "grad_norm": 0.15321539342403412, "learning_rate": 0.002, "loss": 2.5498, "step": 82780 }, { "epoch": 0.16493608950656635, "grad_norm": 0.15000605583190918, "learning_rate": 0.002, "loss": 2.5792, "step": 82790 }, { "epoch": 0.16495601173020527, "grad_norm": 0.1639324426651001, "learning_rate": 0.002, "loss": 2.5685, "step": 82800 }, { "epoch": 0.1649759339538442, "grad_norm": 0.13770997524261475, "learning_rate": 0.002, "loss": 2.5726, "step": 82810 }, { "epoch": 0.1649958561774831, "grad_norm": 0.16007724404335022, "learning_rate": 0.002, "loss": 2.5713, "step": 82820 }, { "epoch": 0.16501577840112203, "grad_norm": 0.16701503098011017, "learning_rate": 0.002, "loss": 2.568, "step": 82830 }, { "epoch": 0.16503570062476094, "grad_norm": 0.1381940245628357, "learning_rate": 0.002, "loss": 2.5756, "step": 82840 }, { "epoch": 0.16505562284839984, "grad_norm": 0.14384761452674866, "learning_rate": 0.002, "loss": 2.5714, "step": 82850 }, { "epoch": 0.16507554507203875, "grad_norm": 0.1634778380393982, "learning_rate": 0.002, "loss": 2.5648, "step": 82860 }, { "epoch": 0.16509546729567767, "grad_norm": 0.14106611907482147, "learning_rate": 0.002, "loss": 2.5648, "step": 82870 }, { "epoch": 0.1651153895193166, "grad_norm": 0.16731341183185577, "learning_rate": 0.002, "loss": 2.5609, "step": 82880 }, { "epoch": 0.1651353117429555, "grad_norm": 0.154665008187294, "learning_rate": 0.002, "loss": 2.5564, "step": 82890 }, { "epoch": 0.16515523396659443, "grad_norm": 0.1640072911977768, "learning_rate": 0.002, "loss": 2.5732, "step": 82900 }, { "epoch": 0.16517515619023332, "grad_norm": 0.13298948109149933, "learning_rate": 0.002, "loss": 2.5571, "step": 82910 }, { "epoch": 0.16519507841387224, "grad_norm": 0.20298528671264648, "learning_rate": 0.002, "loss": 2.5732, "step": 82920 }, { "epoch": 0.16521500063751116, "grad_norm": 0.13634152710437775, "learning_rate": 0.002, "loss": 2.5717, "step": 82930 }, { "epoch": 0.16523492286115007, "grad_norm": 0.16171938180923462, "learning_rate": 0.002, "loss": 2.5578, "step": 82940 }, { "epoch": 0.165254845084789, "grad_norm": 0.16628476977348328, "learning_rate": 0.002, "loss": 2.5604, "step": 82950 }, { "epoch": 0.1652747673084279, "grad_norm": 0.14884667098522186, "learning_rate": 0.002, "loss": 2.5668, "step": 82960 }, { "epoch": 0.1652946895320668, "grad_norm": 0.18051601946353912, "learning_rate": 0.002, "loss": 2.5825, "step": 82970 }, { "epoch": 0.16531461175570572, "grad_norm": 0.13890786468982697, "learning_rate": 0.002, "loss": 2.5552, "step": 82980 }, { "epoch": 0.16533453397934464, "grad_norm": 0.16216768324375153, "learning_rate": 0.002, "loss": 2.5865, "step": 82990 }, { "epoch": 0.16535445620298356, "grad_norm": 0.18195100128650665, "learning_rate": 0.002, "loss": 2.5788, "step": 83000 }, { "epoch": 0.16537437842662248, "grad_norm": 0.13164331018924713, "learning_rate": 0.002, "loss": 2.5716, "step": 83010 }, { "epoch": 0.16539430065026137, "grad_norm": 0.18491873145103455, "learning_rate": 0.002, "loss": 2.5785, "step": 83020 }, { "epoch": 0.16541422287390029, "grad_norm": 0.15293429791927338, "learning_rate": 0.002, "loss": 2.5659, "step": 83030 }, { "epoch": 0.1654341450975392, "grad_norm": 0.1744644045829773, "learning_rate": 0.002, "loss": 2.5723, "step": 83040 }, { "epoch": 0.16545406732117812, "grad_norm": 0.1731727570295334, "learning_rate": 0.002, "loss": 2.5714, "step": 83050 }, { "epoch": 0.16547398954481704, "grad_norm": 0.15626637637615204, "learning_rate": 0.002, "loss": 2.5777, "step": 83060 }, { "epoch": 0.16549391176845596, "grad_norm": 0.20632538199424744, "learning_rate": 0.002, "loss": 2.5613, "step": 83070 }, { "epoch": 0.16551383399209485, "grad_norm": 0.1764204055070877, "learning_rate": 0.002, "loss": 2.5818, "step": 83080 }, { "epoch": 0.16553375621573377, "grad_norm": 0.15235821902751923, "learning_rate": 0.002, "loss": 2.567, "step": 83090 }, { "epoch": 0.1655536784393727, "grad_norm": 0.19575923681259155, "learning_rate": 0.002, "loss": 2.5726, "step": 83100 }, { "epoch": 0.1655736006630116, "grad_norm": 0.15643660724163055, "learning_rate": 0.002, "loss": 2.5665, "step": 83110 }, { "epoch": 0.16559352288665052, "grad_norm": 0.1501968950033188, "learning_rate": 0.002, "loss": 2.5761, "step": 83120 }, { "epoch": 0.16561344511028944, "grad_norm": 0.14631053805351257, "learning_rate": 0.002, "loss": 2.5671, "step": 83130 }, { "epoch": 0.16563336733392833, "grad_norm": 0.1732793152332306, "learning_rate": 0.002, "loss": 2.5552, "step": 83140 }, { "epoch": 0.16565328955756725, "grad_norm": 0.15933451056480408, "learning_rate": 0.002, "loss": 2.5658, "step": 83150 }, { "epoch": 0.16567321178120617, "grad_norm": 0.1494007557630539, "learning_rate": 0.002, "loss": 2.5616, "step": 83160 }, { "epoch": 0.1656931340048451, "grad_norm": 0.13716156780719757, "learning_rate": 0.002, "loss": 2.5568, "step": 83170 }, { "epoch": 0.165713056228484, "grad_norm": 0.2149372398853302, "learning_rate": 0.002, "loss": 2.5698, "step": 83180 }, { "epoch": 0.16573297845212293, "grad_norm": 0.14846251904964447, "learning_rate": 0.002, "loss": 2.5696, "step": 83190 }, { "epoch": 0.16575290067576182, "grad_norm": 0.1493690460920334, "learning_rate": 0.002, "loss": 2.5665, "step": 83200 }, { "epoch": 0.16577282289940073, "grad_norm": 0.1813364177942276, "learning_rate": 0.002, "loss": 2.5526, "step": 83210 }, { "epoch": 0.16579274512303965, "grad_norm": 0.14847609400749207, "learning_rate": 0.002, "loss": 2.5669, "step": 83220 }, { "epoch": 0.16581266734667857, "grad_norm": 0.1531897634267807, "learning_rate": 0.002, "loss": 2.5635, "step": 83230 }, { "epoch": 0.1658325895703175, "grad_norm": 0.15090182423591614, "learning_rate": 0.002, "loss": 2.5595, "step": 83240 }, { "epoch": 0.16585251179395638, "grad_norm": 0.17563194036483765, "learning_rate": 0.002, "loss": 2.5739, "step": 83250 }, { "epoch": 0.1658724340175953, "grad_norm": 0.18955308198928833, "learning_rate": 0.002, "loss": 2.5551, "step": 83260 }, { "epoch": 0.16589235624123422, "grad_norm": 0.14657117426395416, "learning_rate": 0.002, "loss": 2.5852, "step": 83270 }, { "epoch": 0.16591227846487314, "grad_norm": 0.1501471847295761, "learning_rate": 0.002, "loss": 2.5684, "step": 83280 }, { "epoch": 0.16593220068851205, "grad_norm": 0.15867920219898224, "learning_rate": 0.002, "loss": 2.5604, "step": 83290 }, { "epoch": 0.16595212291215097, "grad_norm": 0.21075454354286194, "learning_rate": 0.002, "loss": 2.5863, "step": 83300 }, { "epoch": 0.16597204513578986, "grad_norm": 0.18188953399658203, "learning_rate": 0.002, "loss": 2.5827, "step": 83310 }, { "epoch": 0.16599196735942878, "grad_norm": 0.14885801076889038, "learning_rate": 0.002, "loss": 2.5807, "step": 83320 }, { "epoch": 0.1660118895830677, "grad_norm": 0.1519162952899933, "learning_rate": 0.002, "loss": 2.5738, "step": 83330 }, { "epoch": 0.16603181180670662, "grad_norm": 0.16995051503181458, "learning_rate": 0.002, "loss": 2.5735, "step": 83340 }, { "epoch": 0.16605173403034554, "grad_norm": 0.13988670706748962, "learning_rate": 0.002, "loss": 2.5651, "step": 83350 }, { "epoch": 0.16607165625398446, "grad_norm": 0.1891523152589798, "learning_rate": 0.002, "loss": 2.5868, "step": 83360 }, { "epoch": 0.16609157847762335, "grad_norm": 0.20768031477928162, "learning_rate": 0.002, "loss": 2.568, "step": 83370 }, { "epoch": 0.16611150070126227, "grad_norm": 0.16155065596103668, "learning_rate": 0.002, "loss": 2.5771, "step": 83380 }, { "epoch": 0.16613142292490118, "grad_norm": 0.14094583690166473, "learning_rate": 0.002, "loss": 2.5542, "step": 83390 }, { "epoch": 0.1661513451485401, "grad_norm": 0.19920091331005096, "learning_rate": 0.002, "loss": 2.5735, "step": 83400 }, { "epoch": 0.16617126737217902, "grad_norm": 0.14846602082252502, "learning_rate": 0.002, "loss": 2.5519, "step": 83410 }, { "epoch": 0.16619118959581794, "grad_norm": 0.1865772157907486, "learning_rate": 0.002, "loss": 2.5675, "step": 83420 }, { "epoch": 0.16621111181945683, "grad_norm": 0.221121683716774, "learning_rate": 0.002, "loss": 2.5858, "step": 83430 }, { "epoch": 0.16623103404309575, "grad_norm": 0.16071853041648865, "learning_rate": 0.002, "loss": 2.5793, "step": 83440 }, { "epoch": 0.16625095626673467, "grad_norm": 0.1735614389181137, "learning_rate": 0.002, "loss": 2.5637, "step": 83450 }, { "epoch": 0.16627087849037359, "grad_norm": 0.19180741906166077, "learning_rate": 0.002, "loss": 2.5687, "step": 83460 }, { "epoch": 0.1662908007140125, "grad_norm": 0.1557212471961975, "learning_rate": 0.002, "loss": 2.5519, "step": 83470 }, { "epoch": 0.1663107229376514, "grad_norm": 0.15706875920295715, "learning_rate": 0.002, "loss": 2.5762, "step": 83480 }, { "epoch": 0.1663306451612903, "grad_norm": 0.1707349568605423, "learning_rate": 0.002, "loss": 2.5642, "step": 83490 }, { "epoch": 0.16635056738492923, "grad_norm": 0.17460031807422638, "learning_rate": 0.002, "loss": 2.5803, "step": 83500 }, { "epoch": 0.16637048960856815, "grad_norm": 0.1406879872083664, "learning_rate": 0.002, "loss": 2.5713, "step": 83510 }, { "epoch": 0.16639041183220707, "grad_norm": 0.15711919963359833, "learning_rate": 0.002, "loss": 2.5728, "step": 83520 }, { "epoch": 0.166410334055846, "grad_norm": 0.1705131232738495, "learning_rate": 0.002, "loss": 2.5551, "step": 83530 }, { "epoch": 0.16643025627948488, "grad_norm": 0.1461789608001709, "learning_rate": 0.002, "loss": 2.5515, "step": 83540 }, { "epoch": 0.1664501785031238, "grad_norm": 0.1592944711446762, "learning_rate": 0.002, "loss": 2.5684, "step": 83550 }, { "epoch": 0.16647010072676272, "grad_norm": 0.17481736838817596, "learning_rate": 0.002, "loss": 2.5575, "step": 83560 }, { "epoch": 0.16649002295040163, "grad_norm": 0.16202561557292938, "learning_rate": 0.002, "loss": 2.5718, "step": 83570 }, { "epoch": 0.16650994517404055, "grad_norm": 0.15680861473083496, "learning_rate": 0.002, "loss": 2.5634, "step": 83580 }, { "epoch": 0.16652986739767947, "grad_norm": 0.15986303985118866, "learning_rate": 0.002, "loss": 2.5603, "step": 83590 }, { "epoch": 0.16654978962131836, "grad_norm": 0.15461324155330658, "learning_rate": 0.002, "loss": 2.5702, "step": 83600 }, { "epoch": 0.16656971184495728, "grad_norm": 0.15318235754966736, "learning_rate": 0.002, "loss": 2.5552, "step": 83610 }, { "epoch": 0.1665896340685962, "grad_norm": 0.20105168223381042, "learning_rate": 0.002, "loss": 2.5589, "step": 83620 }, { "epoch": 0.16660955629223512, "grad_norm": 0.150554820895195, "learning_rate": 0.002, "loss": 2.5623, "step": 83630 }, { "epoch": 0.16662947851587404, "grad_norm": 0.15234069526195526, "learning_rate": 0.002, "loss": 2.5797, "step": 83640 }, { "epoch": 0.16664940073951295, "grad_norm": 0.14132019877433777, "learning_rate": 0.002, "loss": 2.5858, "step": 83650 }, { "epoch": 0.16666932296315184, "grad_norm": 0.1413261592388153, "learning_rate": 0.002, "loss": 2.5791, "step": 83660 }, { "epoch": 0.16668924518679076, "grad_norm": 0.18813282251358032, "learning_rate": 0.002, "loss": 2.5676, "step": 83670 }, { "epoch": 0.16670916741042968, "grad_norm": 0.14823009073734283, "learning_rate": 0.002, "loss": 2.5541, "step": 83680 }, { "epoch": 0.1667290896340686, "grad_norm": 0.2138066440820694, "learning_rate": 0.002, "loss": 2.5632, "step": 83690 }, { "epoch": 0.16674901185770752, "grad_norm": 0.18016675114631653, "learning_rate": 0.002, "loss": 2.5728, "step": 83700 }, { "epoch": 0.16676893408134644, "grad_norm": 0.15923379361629486, "learning_rate": 0.002, "loss": 2.5677, "step": 83710 }, { "epoch": 0.16678885630498533, "grad_norm": 0.16959267854690552, "learning_rate": 0.002, "loss": 2.5834, "step": 83720 }, { "epoch": 0.16680877852862425, "grad_norm": 0.14613200724124908, "learning_rate": 0.002, "loss": 2.5745, "step": 83730 }, { "epoch": 0.16682870075226316, "grad_norm": 0.17919476330280304, "learning_rate": 0.002, "loss": 2.5569, "step": 83740 }, { "epoch": 0.16684862297590208, "grad_norm": 0.17280399799346924, "learning_rate": 0.002, "loss": 2.5776, "step": 83750 }, { "epoch": 0.166868545199541, "grad_norm": 0.1707645207643509, "learning_rate": 0.002, "loss": 2.5575, "step": 83760 }, { "epoch": 0.1668884674231799, "grad_norm": 0.16286221146583557, "learning_rate": 0.002, "loss": 2.5598, "step": 83770 }, { "epoch": 0.1669083896468188, "grad_norm": 0.16841405630111694, "learning_rate": 0.002, "loss": 2.5685, "step": 83780 }, { "epoch": 0.16692831187045773, "grad_norm": 0.15672427415847778, "learning_rate": 0.002, "loss": 2.5841, "step": 83790 }, { "epoch": 0.16694823409409665, "grad_norm": 0.14021672308444977, "learning_rate": 0.002, "loss": 2.569, "step": 83800 }, { "epoch": 0.16696815631773557, "grad_norm": 0.18323032557964325, "learning_rate": 0.002, "loss": 2.5652, "step": 83810 }, { "epoch": 0.16698807854137449, "grad_norm": 0.15528523921966553, "learning_rate": 0.002, "loss": 2.574, "step": 83820 }, { "epoch": 0.16700800076501338, "grad_norm": 0.15311433374881744, "learning_rate": 0.002, "loss": 2.5687, "step": 83830 }, { "epoch": 0.1670279229886523, "grad_norm": 0.19064173102378845, "learning_rate": 0.002, "loss": 2.5486, "step": 83840 }, { "epoch": 0.1670478452122912, "grad_norm": 0.16903561353683472, "learning_rate": 0.002, "loss": 2.5769, "step": 83850 }, { "epoch": 0.16706776743593013, "grad_norm": 0.17973287403583527, "learning_rate": 0.002, "loss": 2.5695, "step": 83860 }, { "epoch": 0.16708768965956905, "grad_norm": 0.1533282995223999, "learning_rate": 0.002, "loss": 2.5572, "step": 83870 }, { "epoch": 0.16710761188320797, "grad_norm": 0.18407492339611053, "learning_rate": 0.002, "loss": 2.5666, "step": 83880 }, { "epoch": 0.16712753410684686, "grad_norm": 0.2115996778011322, "learning_rate": 0.002, "loss": 2.5778, "step": 83890 }, { "epoch": 0.16714745633048578, "grad_norm": 0.13726161420345306, "learning_rate": 0.002, "loss": 2.5596, "step": 83900 }, { "epoch": 0.1671673785541247, "grad_norm": 0.21184486150741577, "learning_rate": 0.002, "loss": 2.5723, "step": 83910 }, { "epoch": 0.16718730077776361, "grad_norm": 0.19399407505989075, "learning_rate": 0.002, "loss": 2.5638, "step": 83920 }, { "epoch": 0.16720722300140253, "grad_norm": 0.148763969540596, "learning_rate": 0.002, "loss": 2.559, "step": 83930 }, { "epoch": 0.16722714522504145, "grad_norm": 0.1921631246805191, "learning_rate": 0.002, "loss": 2.5695, "step": 83940 }, { "epoch": 0.16724706744868034, "grad_norm": 0.14864493906497955, "learning_rate": 0.002, "loss": 2.5779, "step": 83950 }, { "epoch": 0.16726698967231926, "grad_norm": 0.1537480354309082, "learning_rate": 0.002, "loss": 2.5798, "step": 83960 }, { "epoch": 0.16728691189595818, "grad_norm": 0.1695370078086853, "learning_rate": 0.002, "loss": 2.5666, "step": 83970 }, { "epoch": 0.1673068341195971, "grad_norm": 0.16772955656051636, "learning_rate": 0.002, "loss": 2.5603, "step": 83980 }, { "epoch": 0.16732675634323602, "grad_norm": 0.15715311467647552, "learning_rate": 0.002, "loss": 2.5733, "step": 83990 }, { "epoch": 0.1673466785668749, "grad_norm": 0.19854414463043213, "learning_rate": 0.002, "loss": 2.5566, "step": 84000 }, { "epoch": 0.16736660079051383, "grad_norm": 0.20982876420021057, "learning_rate": 0.002, "loss": 2.5701, "step": 84010 }, { "epoch": 0.16738652301415274, "grad_norm": 0.1554366946220398, "learning_rate": 0.002, "loss": 2.5621, "step": 84020 }, { "epoch": 0.16740644523779166, "grad_norm": 0.1682939976453781, "learning_rate": 0.002, "loss": 2.5739, "step": 84030 }, { "epoch": 0.16742636746143058, "grad_norm": 0.18906818330287933, "learning_rate": 0.002, "loss": 2.5561, "step": 84040 }, { "epoch": 0.1674462896850695, "grad_norm": 0.16288670897483826, "learning_rate": 0.002, "loss": 2.5746, "step": 84050 }, { "epoch": 0.1674662119087084, "grad_norm": 0.14906080067157745, "learning_rate": 0.002, "loss": 2.5686, "step": 84060 }, { "epoch": 0.1674861341323473, "grad_norm": 0.1471283882856369, "learning_rate": 0.002, "loss": 2.5597, "step": 84070 }, { "epoch": 0.16750605635598623, "grad_norm": 0.14203687012195587, "learning_rate": 0.002, "loss": 2.5605, "step": 84080 }, { "epoch": 0.16752597857962515, "grad_norm": 0.19521178305149078, "learning_rate": 0.002, "loss": 2.574, "step": 84090 }, { "epoch": 0.16754590080326406, "grad_norm": 0.13657651841640472, "learning_rate": 0.002, "loss": 2.5702, "step": 84100 }, { "epoch": 0.16756582302690298, "grad_norm": 0.14989298582077026, "learning_rate": 0.002, "loss": 2.5626, "step": 84110 }, { "epoch": 0.16758574525054187, "grad_norm": 0.16066506505012512, "learning_rate": 0.002, "loss": 2.5704, "step": 84120 }, { "epoch": 0.1676056674741808, "grad_norm": 0.16577503085136414, "learning_rate": 0.002, "loss": 2.5748, "step": 84130 }, { "epoch": 0.1676255896978197, "grad_norm": 0.17607638239860535, "learning_rate": 0.002, "loss": 2.5731, "step": 84140 }, { "epoch": 0.16764551192145863, "grad_norm": 0.1453392207622528, "learning_rate": 0.002, "loss": 2.5649, "step": 84150 }, { "epoch": 0.16766543414509755, "grad_norm": 0.14911188185214996, "learning_rate": 0.002, "loss": 2.5843, "step": 84160 }, { "epoch": 0.16768535636873647, "grad_norm": 0.17240123450756073, "learning_rate": 0.002, "loss": 2.5601, "step": 84170 }, { "epoch": 0.16770527859237536, "grad_norm": 0.18018804490566254, "learning_rate": 0.002, "loss": 2.59, "step": 84180 }, { "epoch": 0.16772520081601427, "grad_norm": 0.16901201009750366, "learning_rate": 0.002, "loss": 2.5513, "step": 84190 }, { "epoch": 0.1677451230396532, "grad_norm": 0.16684448719024658, "learning_rate": 0.002, "loss": 2.5677, "step": 84200 }, { "epoch": 0.1677650452632921, "grad_norm": 0.15622316300868988, "learning_rate": 0.002, "loss": 2.564, "step": 84210 }, { "epoch": 0.16778496748693103, "grad_norm": 0.16452273726463318, "learning_rate": 0.002, "loss": 2.5659, "step": 84220 }, { "epoch": 0.16780488971056995, "grad_norm": 0.19253723323345184, "learning_rate": 0.002, "loss": 2.5705, "step": 84230 }, { "epoch": 0.16782481193420884, "grad_norm": 0.14189505577087402, "learning_rate": 0.002, "loss": 2.5546, "step": 84240 }, { "epoch": 0.16784473415784776, "grad_norm": 0.13758890330791473, "learning_rate": 0.002, "loss": 2.566, "step": 84250 }, { "epoch": 0.16786465638148668, "grad_norm": 0.1870776265859604, "learning_rate": 0.002, "loss": 2.5705, "step": 84260 }, { "epoch": 0.1678845786051256, "grad_norm": 0.16230574250221252, "learning_rate": 0.002, "loss": 2.5801, "step": 84270 }, { "epoch": 0.1679045008287645, "grad_norm": 0.144737109541893, "learning_rate": 0.002, "loss": 2.5718, "step": 84280 }, { "epoch": 0.1679244230524034, "grad_norm": 0.15311676263809204, "learning_rate": 0.002, "loss": 2.5544, "step": 84290 }, { "epoch": 0.16794434527604232, "grad_norm": 0.14962217211723328, "learning_rate": 0.002, "loss": 2.5568, "step": 84300 }, { "epoch": 0.16796426749968124, "grad_norm": 0.13694393634796143, "learning_rate": 0.002, "loss": 2.5596, "step": 84310 }, { "epoch": 0.16798418972332016, "grad_norm": 0.19004330039024353, "learning_rate": 0.002, "loss": 2.5721, "step": 84320 }, { "epoch": 0.16800411194695908, "grad_norm": 0.14946508407592773, "learning_rate": 0.002, "loss": 2.5772, "step": 84330 }, { "epoch": 0.168024034170598, "grad_norm": 0.1689818948507309, "learning_rate": 0.002, "loss": 2.5724, "step": 84340 }, { "epoch": 0.1680439563942369, "grad_norm": 0.16951429843902588, "learning_rate": 0.002, "loss": 2.5689, "step": 84350 }, { "epoch": 0.1680638786178758, "grad_norm": 0.14572587609291077, "learning_rate": 0.002, "loss": 2.5761, "step": 84360 }, { "epoch": 0.16808380084151472, "grad_norm": 0.14052559435367584, "learning_rate": 0.002, "loss": 2.565, "step": 84370 }, { "epoch": 0.16810372306515364, "grad_norm": 0.17404255270957947, "learning_rate": 0.002, "loss": 2.5628, "step": 84380 }, { "epoch": 0.16812364528879256, "grad_norm": 0.1778547763824463, "learning_rate": 0.002, "loss": 2.5662, "step": 84390 }, { "epoch": 0.16814356751243148, "grad_norm": 0.1497548669576645, "learning_rate": 0.002, "loss": 2.5604, "step": 84400 }, { "epoch": 0.16816348973607037, "grad_norm": 0.17285414040088654, "learning_rate": 0.002, "loss": 2.5795, "step": 84410 }, { "epoch": 0.1681834119597093, "grad_norm": 0.14902451634407043, "learning_rate": 0.002, "loss": 2.5911, "step": 84420 }, { "epoch": 0.1682033341833482, "grad_norm": 0.18589317798614502, "learning_rate": 0.002, "loss": 2.563, "step": 84430 }, { "epoch": 0.16822325640698713, "grad_norm": 0.2024531215429306, "learning_rate": 0.002, "loss": 2.5786, "step": 84440 }, { "epoch": 0.16824317863062604, "grad_norm": 0.16355861723423004, "learning_rate": 0.002, "loss": 2.5661, "step": 84450 }, { "epoch": 0.16826310085426496, "grad_norm": 0.16916202008724213, "learning_rate": 0.002, "loss": 2.5577, "step": 84460 }, { "epoch": 0.16828302307790385, "grad_norm": 0.2268778681755066, "learning_rate": 0.002, "loss": 2.5616, "step": 84470 }, { "epoch": 0.16830294530154277, "grad_norm": 0.16473311185836792, "learning_rate": 0.002, "loss": 2.5682, "step": 84480 }, { "epoch": 0.1683228675251817, "grad_norm": 0.16805729269981384, "learning_rate": 0.002, "loss": 2.5812, "step": 84490 }, { "epoch": 0.1683427897488206, "grad_norm": 0.24986760318279266, "learning_rate": 0.002, "loss": 2.5589, "step": 84500 }, { "epoch": 0.16836271197245953, "grad_norm": 0.17190702259540558, "learning_rate": 0.002, "loss": 2.5832, "step": 84510 }, { "epoch": 0.16838263419609842, "grad_norm": 0.15232117474079132, "learning_rate": 0.002, "loss": 2.5797, "step": 84520 }, { "epoch": 0.16840255641973734, "grad_norm": 0.1402958482503891, "learning_rate": 0.002, "loss": 2.5821, "step": 84530 }, { "epoch": 0.16842247864337626, "grad_norm": 0.1920320987701416, "learning_rate": 0.002, "loss": 2.5868, "step": 84540 }, { "epoch": 0.16844240086701517, "grad_norm": 0.1744132786989212, "learning_rate": 0.002, "loss": 2.5693, "step": 84550 }, { "epoch": 0.1684623230906541, "grad_norm": 0.16495421528816223, "learning_rate": 0.002, "loss": 2.5739, "step": 84560 }, { "epoch": 0.168482245314293, "grad_norm": 0.14516204595565796, "learning_rate": 0.002, "loss": 2.5747, "step": 84570 }, { "epoch": 0.1685021675379319, "grad_norm": 0.17087185382843018, "learning_rate": 0.002, "loss": 2.5725, "step": 84580 }, { "epoch": 0.16852208976157082, "grad_norm": 0.1871660202741623, "learning_rate": 0.002, "loss": 2.5661, "step": 84590 }, { "epoch": 0.16854201198520974, "grad_norm": 0.17561429738998413, "learning_rate": 0.002, "loss": 2.5575, "step": 84600 }, { "epoch": 0.16856193420884866, "grad_norm": 0.15604674816131592, "learning_rate": 0.002, "loss": 2.5778, "step": 84610 }, { "epoch": 0.16858185643248758, "grad_norm": 0.1693899929523468, "learning_rate": 0.002, "loss": 2.5837, "step": 84620 }, { "epoch": 0.1686017786561265, "grad_norm": 0.15865229070186615, "learning_rate": 0.002, "loss": 2.5617, "step": 84630 }, { "epoch": 0.16862170087976538, "grad_norm": 0.17869976162910461, "learning_rate": 0.002, "loss": 2.5783, "step": 84640 }, { "epoch": 0.1686416231034043, "grad_norm": 0.14669053256511688, "learning_rate": 0.002, "loss": 2.5754, "step": 84650 }, { "epoch": 0.16866154532704322, "grad_norm": 0.16128957271575928, "learning_rate": 0.002, "loss": 2.5614, "step": 84660 }, { "epoch": 0.16868146755068214, "grad_norm": 0.1629667729139328, "learning_rate": 0.002, "loss": 2.5694, "step": 84670 }, { "epoch": 0.16870138977432106, "grad_norm": 0.15815740823745728, "learning_rate": 0.002, "loss": 2.5728, "step": 84680 }, { "epoch": 0.16872131199795998, "grad_norm": 0.1575624942779541, "learning_rate": 0.002, "loss": 2.5758, "step": 84690 }, { "epoch": 0.16874123422159887, "grad_norm": 0.2234620302915573, "learning_rate": 0.002, "loss": 2.5662, "step": 84700 }, { "epoch": 0.1687611564452378, "grad_norm": 0.13433955609798431, "learning_rate": 0.002, "loss": 2.5654, "step": 84710 }, { "epoch": 0.1687810786688767, "grad_norm": 0.17355497181415558, "learning_rate": 0.002, "loss": 2.5742, "step": 84720 }, { "epoch": 0.16880100089251562, "grad_norm": 0.1453036218881607, "learning_rate": 0.002, "loss": 2.5767, "step": 84730 }, { "epoch": 0.16882092311615454, "grad_norm": 0.15932713449001312, "learning_rate": 0.002, "loss": 2.5652, "step": 84740 }, { "epoch": 0.16884084533979343, "grad_norm": 0.2330724149942398, "learning_rate": 0.002, "loss": 2.5899, "step": 84750 }, { "epoch": 0.16886076756343235, "grad_norm": 0.1728469431400299, "learning_rate": 0.002, "loss": 2.5666, "step": 84760 }, { "epoch": 0.16888068978707127, "grad_norm": 0.14185719192028046, "learning_rate": 0.002, "loss": 2.5678, "step": 84770 }, { "epoch": 0.1689006120107102, "grad_norm": 0.15536797046661377, "learning_rate": 0.002, "loss": 2.5755, "step": 84780 }, { "epoch": 0.1689205342343491, "grad_norm": 0.17583546042442322, "learning_rate": 0.002, "loss": 2.577, "step": 84790 }, { "epoch": 0.16894045645798803, "grad_norm": 0.16558055579662323, "learning_rate": 0.002, "loss": 2.5711, "step": 84800 }, { "epoch": 0.16896037868162692, "grad_norm": 0.2079523801803589, "learning_rate": 0.002, "loss": 2.5572, "step": 84810 }, { "epoch": 0.16898030090526583, "grad_norm": 0.22433866560459137, "learning_rate": 0.002, "loss": 2.5718, "step": 84820 }, { "epoch": 0.16900022312890475, "grad_norm": 0.1427345871925354, "learning_rate": 0.002, "loss": 2.5819, "step": 84830 }, { "epoch": 0.16902014535254367, "grad_norm": 0.15240103006362915, "learning_rate": 0.002, "loss": 2.5509, "step": 84840 }, { "epoch": 0.1690400675761826, "grad_norm": 0.1704740822315216, "learning_rate": 0.002, "loss": 2.5757, "step": 84850 }, { "epoch": 0.1690599897998215, "grad_norm": 0.14022451639175415, "learning_rate": 0.002, "loss": 2.5683, "step": 84860 }, { "epoch": 0.1690799120234604, "grad_norm": 0.1534218192100525, "learning_rate": 0.002, "loss": 2.571, "step": 84870 }, { "epoch": 0.16909983424709932, "grad_norm": 0.20886121690273285, "learning_rate": 0.002, "loss": 2.5724, "step": 84880 }, { "epoch": 0.16911975647073824, "grad_norm": 0.18422532081604004, "learning_rate": 0.002, "loss": 2.5696, "step": 84890 }, { "epoch": 0.16913967869437715, "grad_norm": 0.17142538726329803, "learning_rate": 0.002, "loss": 2.5454, "step": 84900 }, { "epoch": 0.16915960091801607, "grad_norm": 0.14717534184455872, "learning_rate": 0.002, "loss": 2.5793, "step": 84910 }, { "epoch": 0.169179523141655, "grad_norm": 0.18107040226459503, "learning_rate": 0.002, "loss": 2.5592, "step": 84920 }, { "epoch": 0.16919944536529388, "grad_norm": 0.149967759847641, "learning_rate": 0.002, "loss": 2.5625, "step": 84930 }, { "epoch": 0.1692193675889328, "grad_norm": 0.20541606843471527, "learning_rate": 0.002, "loss": 2.5775, "step": 84940 }, { "epoch": 0.16923928981257172, "grad_norm": 0.14976827800273895, "learning_rate": 0.002, "loss": 2.5567, "step": 84950 }, { "epoch": 0.16925921203621064, "grad_norm": 0.16252438724040985, "learning_rate": 0.002, "loss": 2.5739, "step": 84960 }, { "epoch": 0.16927913425984956, "grad_norm": 0.15373095870018005, "learning_rate": 0.002, "loss": 2.5688, "step": 84970 }, { "epoch": 0.16929905648348847, "grad_norm": 0.1464916467666626, "learning_rate": 0.002, "loss": 2.5667, "step": 84980 }, { "epoch": 0.16931897870712737, "grad_norm": 0.16016538441181183, "learning_rate": 0.002, "loss": 2.5844, "step": 84990 }, { "epoch": 0.16933890093076628, "grad_norm": 0.22088848054409027, "learning_rate": 0.002, "loss": 2.5802, "step": 85000 }, { "epoch": 0.1693588231544052, "grad_norm": 0.16039353609085083, "learning_rate": 0.002, "loss": 2.5698, "step": 85010 }, { "epoch": 0.16937874537804412, "grad_norm": 0.16512736678123474, "learning_rate": 0.002, "loss": 2.5702, "step": 85020 }, { "epoch": 0.16939866760168304, "grad_norm": 0.14364247024059296, "learning_rate": 0.002, "loss": 2.5553, "step": 85030 }, { "epoch": 0.16941858982532193, "grad_norm": 0.20316658914089203, "learning_rate": 0.002, "loss": 2.5967, "step": 85040 }, { "epoch": 0.16943851204896085, "grad_norm": 0.1629815697669983, "learning_rate": 0.002, "loss": 2.5673, "step": 85050 }, { "epoch": 0.16945843427259977, "grad_norm": 0.15025921165943146, "learning_rate": 0.002, "loss": 2.5529, "step": 85060 }, { "epoch": 0.16947835649623869, "grad_norm": 0.15658457577228546, "learning_rate": 0.002, "loss": 2.5535, "step": 85070 }, { "epoch": 0.1694982787198776, "grad_norm": 0.17794084548950195, "learning_rate": 0.002, "loss": 2.5678, "step": 85080 }, { "epoch": 0.16951820094351652, "grad_norm": 0.14102739095687866, "learning_rate": 0.002, "loss": 2.5689, "step": 85090 }, { "epoch": 0.1695381231671554, "grad_norm": 0.13527993857860565, "learning_rate": 0.002, "loss": 2.5825, "step": 85100 }, { "epoch": 0.16955804539079433, "grad_norm": 0.17920975387096405, "learning_rate": 0.002, "loss": 2.5754, "step": 85110 }, { "epoch": 0.16957796761443325, "grad_norm": 0.14438098669052124, "learning_rate": 0.002, "loss": 2.5758, "step": 85120 }, { "epoch": 0.16959788983807217, "grad_norm": 0.18151837587356567, "learning_rate": 0.002, "loss": 2.577, "step": 85130 }, { "epoch": 0.1696178120617111, "grad_norm": 0.1506878137588501, "learning_rate": 0.002, "loss": 2.5609, "step": 85140 }, { "epoch": 0.16963773428535, "grad_norm": 0.13713061809539795, "learning_rate": 0.002, "loss": 2.5498, "step": 85150 }, { "epoch": 0.1696576565089889, "grad_norm": 0.2277509719133377, "learning_rate": 0.002, "loss": 2.5689, "step": 85160 }, { "epoch": 0.16967757873262782, "grad_norm": 0.1453552097082138, "learning_rate": 0.002, "loss": 2.5711, "step": 85170 }, { "epoch": 0.16969750095626673, "grad_norm": 0.14585822820663452, "learning_rate": 0.002, "loss": 2.557, "step": 85180 }, { "epoch": 0.16971742317990565, "grad_norm": 0.178033247590065, "learning_rate": 0.002, "loss": 2.5861, "step": 85190 }, { "epoch": 0.16973734540354457, "grad_norm": 0.16980494558811188, "learning_rate": 0.002, "loss": 2.5759, "step": 85200 }, { "epoch": 0.1697572676271835, "grad_norm": 0.15145882964134216, "learning_rate": 0.002, "loss": 2.5681, "step": 85210 }, { "epoch": 0.16977718985082238, "grad_norm": 0.17853285372257233, "learning_rate": 0.002, "loss": 2.565, "step": 85220 }, { "epoch": 0.1697971120744613, "grad_norm": 0.13858561217784882, "learning_rate": 0.002, "loss": 2.5835, "step": 85230 }, { "epoch": 0.16981703429810022, "grad_norm": 0.15258556604385376, "learning_rate": 0.002, "loss": 2.5682, "step": 85240 }, { "epoch": 0.16983695652173914, "grad_norm": 0.14185094833374023, "learning_rate": 0.002, "loss": 2.5513, "step": 85250 }, { "epoch": 0.16985687874537805, "grad_norm": 0.17178033292293549, "learning_rate": 0.002, "loss": 2.5647, "step": 85260 }, { "epoch": 0.16987680096901694, "grad_norm": 0.15783189237117767, "learning_rate": 0.002, "loss": 2.5864, "step": 85270 }, { "epoch": 0.16989672319265586, "grad_norm": 0.20506523549556732, "learning_rate": 0.002, "loss": 2.589, "step": 85280 }, { "epoch": 0.16991664541629478, "grad_norm": 0.1553214192390442, "learning_rate": 0.002, "loss": 2.5905, "step": 85290 }, { "epoch": 0.1699365676399337, "grad_norm": 0.1546805500984192, "learning_rate": 0.002, "loss": 2.5611, "step": 85300 }, { "epoch": 0.16995648986357262, "grad_norm": 0.15457683801651, "learning_rate": 0.002, "loss": 2.5678, "step": 85310 }, { "epoch": 0.16997641208721154, "grad_norm": 0.22112734615802765, "learning_rate": 0.002, "loss": 2.5828, "step": 85320 }, { "epoch": 0.16999633431085043, "grad_norm": 0.17218995094299316, "learning_rate": 0.002, "loss": 2.5616, "step": 85330 }, { "epoch": 0.17001625653448935, "grad_norm": 0.14143279194831848, "learning_rate": 0.002, "loss": 2.5629, "step": 85340 }, { "epoch": 0.17003617875812826, "grad_norm": 0.20286864042282104, "learning_rate": 0.002, "loss": 2.5753, "step": 85350 }, { "epoch": 0.17005610098176718, "grad_norm": 0.16388316452503204, "learning_rate": 0.002, "loss": 2.5674, "step": 85360 }, { "epoch": 0.1700760232054061, "grad_norm": 0.14262229204177856, "learning_rate": 0.002, "loss": 2.5766, "step": 85370 }, { "epoch": 0.17009594542904502, "grad_norm": 0.20776550471782684, "learning_rate": 0.002, "loss": 2.573, "step": 85380 }, { "epoch": 0.1701158676526839, "grad_norm": 0.14186875522136688, "learning_rate": 0.002, "loss": 2.577, "step": 85390 }, { "epoch": 0.17013578987632283, "grad_norm": 0.17306089401245117, "learning_rate": 0.002, "loss": 2.5712, "step": 85400 }, { "epoch": 0.17015571209996175, "grad_norm": 0.13580738008022308, "learning_rate": 0.002, "loss": 2.578, "step": 85410 }, { "epoch": 0.17017563432360067, "grad_norm": 0.1836722046136856, "learning_rate": 0.002, "loss": 2.5788, "step": 85420 }, { "epoch": 0.17019555654723958, "grad_norm": 0.14998488128185272, "learning_rate": 0.002, "loss": 2.5822, "step": 85430 }, { "epoch": 0.1702154787708785, "grad_norm": 0.1560945212841034, "learning_rate": 0.002, "loss": 2.5759, "step": 85440 }, { "epoch": 0.1702354009945174, "grad_norm": 0.15680229663848877, "learning_rate": 0.002, "loss": 2.5687, "step": 85450 }, { "epoch": 0.1702553232181563, "grad_norm": 0.19293949007987976, "learning_rate": 0.002, "loss": 2.569, "step": 85460 }, { "epoch": 0.17027524544179523, "grad_norm": 0.1561514139175415, "learning_rate": 0.002, "loss": 2.5782, "step": 85470 }, { "epoch": 0.17029516766543415, "grad_norm": 0.3945885896682739, "learning_rate": 0.002, "loss": 2.553, "step": 85480 }, { "epoch": 0.17031508988907307, "grad_norm": 0.1518079936504364, "learning_rate": 0.002, "loss": 2.5646, "step": 85490 }, { "epoch": 0.17033501211271196, "grad_norm": 0.15061518549919128, "learning_rate": 0.002, "loss": 2.585, "step": 85500 }, { "epoch": 0.17035493433635088, "grad_norm": 0.1419876217842102, "learning_rate": 0.002, "loss": 2.5804, "step": 85510 }, { "epoch": 0.1703748565599898, "grad_norm": 0.17155370116233826, "learning_rate": 0.002, "loss": 2.5612, "step": 85520 }, { "epoch": 0.17039477878362871, "grad_norm": 0.17131705582141876, "learning_rate": 0.002, "loss": 2.561, "step": 85530 }, { "epoch": 0.17041470100726763, "grad_norm": 0.1776905357837677, "learning_rate": 0.002, "loss": 2.5654, "step": 85540 }, { "epoch": 0.17043462323090655, "grad_norm": 0.1853463500738144, "learning_rate": 0.002, "loss": 2.5637, "step": 85550 }, { "epoch": 0.17045454545454544, "grad_norm": 0.1912422776222229, "learning_rate": 0.002, "loss": 2.5762, "step": 85560 }, { "epoch": 0.17047446767818436, "grad_norm": 0.17135480046272278, "learning_rate": 0.002, "loss": 2.5774, "step": 85570 }, { "epoch": 0.17049438990182328, "grad_norm": 0.1412178874015808, "learning_rate": 0.002, "loss": 2.5631, "step": 85580 }, { "epoch": 0.1705143121254622, "grad_norm": 0.147405207157135, "learning_rate": 0.002, "loss": 2.5805, "step": 85590 }, { "epoch": 0.17053423434910112, "grad_norm": 0.18525825440883636, "learning_rate": 0.002, "loss": 2.5778, "step": 85600 }, { "epoch": 0.17055415657274003, "grad_norm": 0.14867348968982697, "learning_rate": 0.002, "loss": 2.5935, "step": 85610 }, { "epoch": 0.17057407879637893, "grad_norm": 0.14633382856845856, "learning_rate": 0.002, "loss": 2.5599, "step": 85620 }, { "epoch": 0.17059400102001784, "grad_norm": 0.15995877981185913, "learning_rate": 0.002, "loss": 2.5756, "step": 85630 }, { "epoch": 0.17061392324365676, "grad_norm": 0.1562827080488205, "learning_rate": 0.002, "loss": 2.5812, "step": 85640 }, { "epoch": 0.17063384546729568, "grad_norm": 0.17255784571170807, "learning_rate": 0.002, "loss": 2.5549, "step": 85650 }, { "epoch": 0.1706537676909346, "grad_norm": 0.15361739695072174, "learning_rate": 0.002, "loss": 2.548, "step": 85660 }, { "epoch": 0.17067368991457352, "grad_norm": 0.1916070282459259, "learning_rate": 0.002, "loss": 2.5724, "step": 85670 }, { "epoch": 0.1706936121382124, "grad_norm": 0.20454786717891693, "learning_rate": 0.002, "loss": 2.5771, "step": 85680 }, { "epoch": 0.17071353436185133, "grad_norm": 0.14709030091762543, "learning_rate": 0.002, "loss": 2.5636, "step": 85690 }, { "epoch": 0.17073345658549025, "grad_norm": 0.14638985693454742, "learning_rate": 0.002, "loss": 2.5683, "step": 85700 }, { "epoch": 0.17075337880912916, "grad_norm": 0.21201582252979279, "learning_rate": 0.002, "loss": 2.5605, "step": 85710 }, { "epoch": 0.17077330103276808, "grad_norm": 0.141075000166893, "learning_rate": 0.002, "loss": 2.572, "step": 85720 }, { "epoch": 0.170793223256407, "grad_norm": 0.21534650027751923, "learning_rate": 0.002, "loss": 2.5708, "step": 85730 }, { "epoch": 0.1708131454800459, "grad_norm": 0.1466340720653534, "learning_rate": 0.002, "loss": 2.5808, "step": 85740 }, { "epoch": 0.1708330677036848, "grad_norm": 0.1601051688194275, "learning_rate": 0.002, "loss": 2.5743, "step": 85750 }, { "epoch": 0.17085298992732373, "grad_norm": 0.165643110871315, "learning_rate": 0.002, "loss": 2.565, "step": 85760 }, { "epoch": 0.17087291215096265, "grad_norm": 0.13627398014068604, "learning_rate": 0.002, "loss": 2.5726, "step": 85770 }, { "epoch": 0.17089283437460157, "grad_norm": 0.15126807987689972, "learning_rate": 0.002, "loss": 2.5662, "step": 85780 }, { "epoch": 0.17091275659824046, "grad_norm": 0.2221672236919403, "learning_rate": 0.002, "loss": 2.5765, "step": 85790 }, { "epoch": 0.17093267882187937, "grad_norm": 0.18060049414634705, "learning_rate": 0.002, "loss": 2.5675, "step": 85800 }, { "epoch": 0.1709526010455183, "grad_norm": 0.17670458555221558, "learning_rate": 0.002, "loss": 2.581, "step": 85810 }, { "epoch": 0.1709725232691572, "grad_norm": 0.1528731882572174, "learning_rate": 0.002, "loss": 2.5493, "step": 85820 }, { "epoch": 0.17099244549279613, "grad_norm": 0.1338634192943573, "learning_rate": 0.002, "loss": 2.571, "step": 85830 }, { "epoch": 0.17101236771643505, "grad_norm": 0.19892671704292297, "learning_rate": 0.002, "loss": 2.5678, "step": 85840 }, { "epoch": 0.17103228994007394, "grad_norm": 0.16529317200183868, "learning_rate": 0.002, "loss": 2.5645, "step": 85850 }, { "epoch": 0.17105221216371286, "grad_norm": 0.14808009564876556, "learning_rate": 0.002, "loss": 2.5868, "step": 85860 }, { "epoch": 0.17107213438735178, "grad_norm": 0.16976863145828247, "learning_rate": 0.002, "loss": 2.5788, "step": 85870 }, { "epoch": 0.1710920566109907, "grad_norm": 0.1666228175163269, "learning_rate": 0.002, "loss": 2.5735, "step": 85880 }, { "epoch": 0.1711119788346296, "grad_norm": 0.17117416858673096, "learning_rate": 0.002, "loss": 2.5724, "step": 85890 }, { "epoch": 0.17113190105826853, "grad_norm": 0.15055547654628754, "learning_rate": 0.002, "loss": 2.5659, "step": 85900 }, { "epoch": 0.17115182328190742, "grad_norm": 0.15613922476768494, "learning_rate": 0.002, "loss": 2.5794, "step": 85910 }, { "epoch": 0.17117174550554634, "grad_norm": 0.16311536729335785, "learning_rate": 0.002, "loss": 2.5668, "step": 85920 }, { "epoch": 0.17119166772918526, "grad_norm": 0.1532801240682602, "learning_rate": 0.002, "loss": 2.5708, "step": 85930 }, { "epoch": 0.17121158995282418, "grad_norm": 0.18480835855007172, "learning_rate": 0.002, "loss": 2.5671, "step": 85940 }, { "epoch": 0.1712315121764631, "grad_norm": 0.1690027117729187, "learning_rate": 0.002, "loss": 2.5841, "step": 85950 }, { "epoch": 0.17125143440010202, "grad_norm": 0.16790224611759186, "learning_rate": 0.002, "loss": 2.57, "step": 85960 }, { "epoch": 0.1712713566237409, "grad_norm": 0.18611766397953033, "learning_rate": 0.002, "loss": 2.5793, "step": 85970 }, { "epoch": 0.17129127884737982, "grad_norm": 0.16229096055030823, "learning_rate": 0.002, "loss": 2.5725, "step": 85980 }, { "epoch": 0.17131120107101874, "grad_norm": 0.15763449668884277, "learning_rate": 0.002, "loss": 2.5632, "step": 85990 }, { "epoch": 0.17133112329465766, "grad_norm": 0.19819426536560059, "learning_rate": 0.002, "loss": 2.5695, "step": 86000 }, { "epoch": 0.17135104551829658, "grad_norm": 0.1699950397014618, "learning_rate": 0.002, "loss": 2.5649, "step": 86010 }, { "epoch": 0.17137096774193547, "grad_norm": 0.20166602730751038, "learning_rate": 0.002, "loss": 2.5758, "step": 86020 }, { "epoch": 0.1713908899655744, "grad_norm": 0.15893025696277618, "learning_rate": 0.002, "loss": 2.5601, "step": 86030 }, { "epoch": 0.1714108121892133, "grad_norm": 0.16415207087993622, "learning_rate": 0.002, "loss": 2.5715, "step": 86040 }, { "epoch": 0.17143073441285223, "grad_norm": 0.17957694828510284, "learning_rate": 0.002, "loss": 2.5812, "step": 86050 }, { "epoch": 0.17145065663649114, "grad_norm": 0.12906493246555328, "learning_rate": 0.002, "loss": 2.568, "step": 86060 }, { "epoch": 0.17147057886013006, "grad_norm": 0.16189435124397278, "learning_rate": 0.002, "loss": 2.5629, "step": 86070 }, { "epoch": 0.17149050108376895, "grad_norm": 0.16085152328014374, "learning_rate": 0.002, "loss": 2.5723, "step": 86080 }, { "epoch": 0.17151042330740787, "grad_norm": 0.16912871599197388, "learning_rate": 0.002, "loss": 2.5726, "step": 86090 }, { "epoch": 0.1715303455310468, "grad_norm": 0.15865856409072876, "learning_rate": 0.002, "loss": 2.5926, "step": 86100 }, { "epoch": 0.1715502677546857, "grad_norm": 0.15190574526786804, "learning_rate": 0.002, "loss": 2.5672, "step": 86110 }, { "epoch": 0.17157018997832463, "grad_norm": 0.1670912504196167, "learning_rate": 0.002, "loss": 2.5653, "step": 86120 }, { "epoch": 0.17159011220196355, "grad_norm": 0.15275506675243378, "learning_rate": 0.002, "loss": 2.5791, "step": 86130 }, { "epoch": 0.17161003442560244, "grad_norm": 0.16836877167224884, "learning_rate": 0.002, "loss": 2.5565, "step": 86140 }, { "epoch": 0.17162995664924136, "grad_norm": 0.16305457055568695, "learning_rate": 0.002, "loss": 2.5716, "step": 86150 }, { "epoch": 0.17164987887288027, "grad_norm": 0.15508690476417542, "learning_rate": 0.002, "loss": 2.5597, "step": 86160 }, { "epoch": 0.1716698010965192, "grad_norm": 0.15989799797534943, "learning_rate": 0.002, "loss": 2.5707, "step": 86170 }, { "epoch": 0.1716897233201581, "grad_norm": 0.15481500327587128, "learning_rate": 0.002, "loss": 2.5777, "step": 86180 }, { "epoch": 0.17170964554379703, "grad_norm": 0.1783352941274643, "learning_rate": 0.002, "loss": 2.5616, "step": 86190 }, { "epoch": 0.17172956776743592, "grad_norm": 0.15615563094615936, "learning_rate": 0.002, "loss": 2.593, "step": 86200 }, { "epoch": 0.17174948999107484, "grad_norm": 0.17264339327812195, "learning_rate": 0.002, "loss": 2.5631, "step": 86210 }, { "epoch": 0.17176941221471376, "grad_norm": 0.13902096450328827, "learning_rate": 0.002, "loss": 2.5594, "step": 86220 }, { "epoch": 0.17178933443835268, "grad_norm": 0.166082501411438, "learning_rate": 0.002, "loss": 2.5662, "step": 86230 }, { "epoch": 0.1718092566619916, "grad_norm": 0.15863832831382751, "learning_rate": 0.002, "loss": 2.5561, "step": 86240 }, { "epoch": 0.17182917888563048, "grad_norm": 0.18490073084831238, "learning_rate": 0.002, "loss": 2.55, "step": 86250 }, { "epoch": 0.1718491011092694, "grad_norm": 0.19890065491199493, "learning_rate": 0.002, "loss": 2.569, "step": 86260 }, { "epoch": 0.17186902333290832, "grad_norm": 0.1556917130947113, "learning_rate": 0.002, "loss": 2.573, "step": 86270 }, { "epoch": 0.17188894555654724, "grad_norm": 0.20790299773216248, "learning_rate": 0.002, "loss": 2.5627, "step": 86280 }, { "epoch": 0.17190886778018616, "grad_norm": 0.14331848919391632, "learning_rate": 0.002, "loss": 2.5727, "step": 86290 }, { "epoch": 0.17192879000382508, "grad_norm": 0.15006417036056519, "learning_rate": 0.002, "loss": 2.5472, "step": 86300 }, { "epoch": 0.17194871222746397, "grad_norm": 0.1719401776790619, "learning_rate": 0.002, "loss": 2.5746, "step": 86310 }, { "epoch": 0.1719686344511029, "grad_norm": 0.17201654613018036, "learning_rate": 0.002, "loss": 2.5791, "step": 86320 }, { "epoch": 0.1719885566747418, "grad_norm": 0.1758849173784256, "learning_rate": 0.002, "loss": 2.5688, "step": 86330 }, { "epoch": 0.17200847889838072, "grad_norm": 0.16877435147762299, "learning_rate": 0.002, "loss": 2.5691, "step": 86340 }, { "epoch": 0.17202840112201964, "grad_norm": 0.15907038748264313, "learning_rate": 0.002, "loss": 2.5677, "step": 86350 }, { "epoch": 0.17204832334565856, "grad_norm": 0.14509275555610657, "learning_rate": 0.002, "loss": 2.5655, "step": 86360 }, { "epoch": 0.17206824556929745, "grad_norm": 0.15500974655151367, "learning_rate": 0.002, "loss": 2.5836, "step": 86370 }, { "epoch": 0.17208816779293637, "grad_norm": 0.1674761325120926, "learning_rate": 0.002, "loss": 2.57, "step": 86380 }, { "epoch": 0.1721080900165753, "grad_norm": 0.14691540598869324, "learning_rate": 0.002, "loss": 2.5675, "step": 86390 }, { "epoch": 0.1721280122402142, "grad_norm": 0.18200945854187012, "learning_rate": 0.002, "loss": 2.5784, "step": 86400 }, { "epoch": 0.17214793446385313, "grad_norm": 0.18651646375656128, "learning_rate": 0.002, "loss": 2.5559, "step": 86410 }, { "epoch": 0.17216785668749204, "grad_norm": 0.18578973412513733, "learning_rate": 0.002, "loss": 2.5666, "step": 86420 }, { "epoch": 0.17218777891113093, "grad_norm": 0.14248859882354736, "learning_rate": 0.002, "loss": 2.5694, "step": 86430 }, { "epoch": 0.17220770113476985, "grad_norm": 0.1766522079706192, "learning_rate": 0.002, "loss": 2.5557, "step": 86440 }, { "epoch": 0.17222762335840877, "grad_norm": 0.1760503202676773, "learning_rate": 0.002, "loss": 2.5817, "step": 86450 }, { "epoch": 0.1722475455820477, "grad_norm": 0.16663067042827606, "learning_rate": 0.002, "loss": 2.5661, "step": 86460 }, { "epoch": 0.1722674678056866, "grad_norm": 0.151969313621521, "learning_rate": 0.002, "loss": 2.5765, "step": 86470 }, { "epoch": 0.17228739002932553, "grad_norm": 0.17898622155189514, "learning_rate": 0.002, "loss": 2.5588, "step": 86480 }, { "epoch": 0.17230731225296442, "grad_norm": 0.20945791900157928, "learning_rate": 0.002, "loss": 2.5667, "step": 86490 }, { "epoch": 0.17232723447660334, "grad_norm": 0.1994609832763672, "learning_rate": 0.002, "loss": 2.5658, "step": 86500 }, { "epoch": 0.17234715670024225, "grad_norm": 0.13715024292469025, "learning_rate": 0.002, "loss": 2.5708, "step": 86510 }, { "epoch": 0.17236707892388117, "grad_norm": 0.14713022112846375, "learning_rate": 0.002, "loss": 2.5487, "step": 86520 }, { "epoch": 0.1723870011475201, "grad_norm": 0.17139975726604462, "learning_rate": 0.002, "loss": 2.5699, "step": 86530 }, { "epoch": 0.17240692337115898, "grad_norm": 0.13576309382915497, "learning_rate": 0.002, "loss": 2.5661, "step": 86540 }, { "epoch": 0.1724268455947979, "grad_norm": 0.1380937546491623, "learning_rate": 0.002, "loss": 2.5706, "step": 86550 }, { "epoch": 0.17244676781843682, "grad_norm": 0.16846804320812225, "learning_rate": 0.002, "loss": 2.5658, "step": 86560 }, { "epoch": 0.17246669004207574, "grad_norm": 0.15707653760910034, "learning_rate": 0.002, "loss": 2.5707, "step": 86570 }, { "epoch": 0.17248661226571466, "grad_norm": 0.1517738252878189, "learning_rate": 0.002, "loss": 2.5642, "step": 86580 }, { "epoch": 0.17250653448935357, "grad_norm": 0.17153407633304596, "learning_rate": 0.002, "loss": 2.5704, "step": 86590 }, { "epoch": 0.17252645671299247, "grad_norm": 0.15694963932037354, "learning_rate": 0.002, "loss": 2.5635, "step": 86600 }, { "epoch": 0.17254637893663138, "grad_norm": 0.1392410397529602, "learning_rate": 0.002, "loss": 2.5665, "step": 86610 }, { "epoch": 0.1725663011602703, "grad_norm": 0.16644108295440674, "learning_rate": 0.002, "loss": 2.578, "step": 86620 }, { "epoch": 0.17258622338390922, "grad_norm": 0.1622840315103531, "learning_rate": 0.002, "loss": 2.5719, "step": 86630 }, { "epoch": 0.17260614560754814, "grad_norm": 0.13688327372074127, "learning_rate": 0.002, "loss": 2.5721, "step": 86640 }, { "epoch": 0.17262606783118706, "grad_norm": 0.1725768893957138, "learning_rate": 0.002, "loss": 2.567, "step": 86650 }, { "epoch": 0.17264599005482595, "grad_norm": 0.19631041586399078, "learning_rate": 0.002, "loss": 2.5638, "step": 86660 }, { "epoch": 0.17266591227846487, "grad_norm": 0.13967882096767426, "learning_rate": 0.002, "loss": 2.5519, "step": 86670 }, { "epoch": 0.17268583450210379, "grad_norm": 0.1405952423810959, "learning_rate": 0.002, "loss": 2.5699, "step": 86680 }, { "epoch": 0.1727057567257427, "grad_norm": 0.17153319716453552, "learning_rate": 0.002, "loss": 2.5765, "step": 86690 }, { "epoch": 0.17272567894938162, "grad_norm": 0.17723527550697327, "learning_rate": 0.002, "loss": 2.5607, "step": 86700 }, { "epoch": 0.17274560117302054, "grad_norm": 0.14510415494441986, "learning_rate": 0.002, "loss": 2.558, "step": 86710 }, { "epoch": 0.17276552339665943, "grad_norm": 0.17553548514842987, "learning_rate": 0.002, "loss": 2.5519, "step": 86720 }, { "epoch": 0.17278544562029835, "grad_norm": 0.16037902235984802, "learning_rate": 0.002, "loss": 2.5743, "step": 86730 }, { "epoch": 0.17280536784393727, "grad_norm": 0.21114419400691986, "learning_rate": 0.002, "loss": 2.5722, "step": 86740 }, { "epoch": 0.1728252900675762, "grad_norm": 0.18919995427131653, "learning_rate": 0.002, "loss": 2.5779, "step": 86750 }, { "epoch": 0.1728452122912151, "grad_norm": 0.19104117155075073, "learning_rate": 0.002, "loss": 2.5627, "step": 86760 }, { "epoch": 0.172865134514854, "grad_norm": 0.16845037043094635, "learning_rate": 0.002, "loss": 2.5732, "step": 86770 }, { "epoch": 0.17288505673849291, "grad_norm": 0.14090649783611298, "learning_rate": 0.002, "loss": 2.5754, "step": 86780 }, { "epoch": 0.17290497896213183, "grad_norm": 0.15311598777770996, "learning_rate": 0.002, "loss": 2.5648, "step": 86790 }, { "epoch": 0.17292490118577075, "grad_norm": 0.1605374664068222, "learning_rate": 0.002, "loss": 2.5708, "step": 86800 }, { "epoch": 0.17294482340940967, "grad_norm": 0.16473960876464844, "learning_rate": 0.002, "loss": 2.5707, "step": 86810 }, { "epoch": 0.1729647456330486, "grad_norm": 0.18831408023834229, "learning_rate": 0.002, "loss": 2.5864, "step": 86820 }, { "epoch": 0.17298466785668748, "grad_norm": 0.1411716341972351, "learning_rate": 0.002, "loss": 2.5712, "step": 86830 }, { "epoch": 0.1730045900803264, "grad_norm": 0.17704865336418152, "learning_rate": 0.002, "loss": 2.5664, "step": 86840 }, { "epoch": 0.17302451230396532, "grad_norm": 0.1658766269683838, "learning_rate": 0.002, "loss": 2.567, "step": 86850 }, { "epoch": 0.17304443452760424, "grad_norm": 0.15924686193466187, "learning_rate": 0.002, "loss": 2.5625, "step": 86860 }, { "epoch": 0.17306435675124315, "grad_norm": 0.24422432482242584, "learning_rate": 0.002, "loss": 2.5748, "step": 86870 }, { "epoch": 0.17308427897488207, "grad_norm": 0.14205731451511383, "learning_rate": 0.002, "loss": 2.5684, "step": 86880 }, { "epoch": 0.17310420119852096, "grad_norm": 0.15906189382076263, "learning_rate": 0.002, "loss": 2.5591, "step": 86890 }, { "epoch": 0.17312412342215988, "grad_norm": 0.16310739517211914, "learning_rate": 0.002, "loss": 2.5527, "step": 86900 }, { "epoch": 0.1731440456457988, "grad_norm": 0.13556934893131256, "learning_rate": 0.002, "loss": 2.5623, "step": 86910 }, { "epoch": 0.17316396786943772, "grad_norm": 0.17388051748275757, "learning_rate": 0.002, "loss": 2.575, "step": 86920 }, { "epoch": 0.17318389009307664, "grad_norm": 0.16249065101146698, "learning_rate": 0.002, "loss": 2.5724, "step": 86930 }, { "epoch": 0.17320381231671556, "grad_norm": 0.1818801909685135, "learning_rate": 0.002, "loss": 2.5743, "step": 86940 }, { "epoch": 0.17322373454035445, "grad_norm": 0.1530754268169403, "learning_rate": 0.002, "loss": 2.5828, "step": 86950 }, { "epoch": 0.17324365676399336, "grad_norm": 0.1775769293308258, "learning_rate": 0.002, "loss": 2.585, "step": 86960 }, { "epoch": 0.17326357898763228, "grad_norm": 0.1568218618631363, "learning_rate": 0.002, "loss": 2.5693, "step": 86970 }, { "epoch": 0.1732835012112712, "grad_norm": 0.19148768484592438, "learning_rate": 0.002, "loss": 2.5582, "step": 86980 }, { "epoch": 0.17330342343491012, "grad_norm": 0.15552864968776703, "learning_rate": 0.002, "loss": 2.5586, "step": 86990 }, { "epoch": 0.17332334565854904, "grad_norm": 0.15129967033863068, "learning_rate": 0.002, "loss": 2.5725, "step": 87000 }, { "epoch": 0.17334326788218793, "grad_norm": 0.15862953662872314, "learning_rate": 0.002, "loss": 2.5803, "step": 87010 }, { "epoch": 0.17336319010582685, "grad_norm": 0.14995309710502625, "learning_rate": 0.002, "loss": 2.5702, "step": 87020 }, { "epoch": 0.17338311232946577, "grad_norm": 0.14237892627716064, "learning_rate": 0.002, "loss": 2.5765, "step": 87030 }, { "epoch": 0.17340303455310468, "grad_norm": 0.1961721032857895, "learning_rate": 0.002, "loss": 2.5673, "step": 87040 }, { "epoch": 0.1734229567767436, "grad_norm": 0.15051054954528809, "learning_rate": 0.002, "loss": 2.5704, "step": 87050 }, { "epoch": 0.1734428790003825, "grad_norm": 0.18737003207206726, "learning_rate": 0.002, "loss": 2.591, "step": 87060 }, { "epoch": 0.1734628012240214, "grad_norm": 0.15320494771003723, "learning_rate": 0.002, "loss": 2.5711, "step": 87070 }, { "epoch": 0.17348272344766033, "grad_norm": 0.15200275182724, "learning_rate": 0.002, "loss": 2.5807, "step": 87080 }, { "epoch": 0.17350264567129925, "grad_norm": 0.17225342988967896, "learning_rate": 0.002, "loss": 2.5556, "step": 87090 }, { "epoch": 0.17352256789493817, "grad_norm": 0.1598169356584549, "learning_rate": 0.002, "loss": 2.5602, "step": 87100 }, { "epoch": 0.1735424901185771, "grad_norm": 0.16435091197490692, "learning_rate": 0.002, "loss": 2.5797, "step": 87110 }, { "epoch": 0.17356241234221598, "grad_norm": 0.15878358483314514, "learning_rate": 0.002, "loss": 2.5785, "step": 87120 }, { "epoch": 0.1735823345658549, "grad_norm": 0.20190218091011047, "learning_rate": 0.002, "loss": 2.564, "step": 87130 }, { "epoch": 0.17360225678949381, "grad_norm": 0.16196508705615997, "learning_rate": 0.002, "loss": 2.5623, "step": 87140 }, { "epoch": 0.17362217901313273, "grad_norm": 0.18135176599025726, "learning_rate": 0.002, "loss": 2.5747, "step": 87150 }, { "epoch": 0.17364210123677165, "grad_norm": 0.16241486370563507, "learning_rate": 0.002, "loss": 2.5741, "step": 87160 }, { "epoch": 0.17366202346041057, "grad_norm": 0.1511552929878235, "learning_rate": 0.002, "loss": 2.5645, "step": 87170 }, { "epoch": 0.17368194568404946, "grad_norm": 0.15789951384067535, "learning_rate": 0.002, "loss": 2.5642, "step": 87180 }, { "epoch": 0.17370186790768838, "grad_norm": 0.1443144977092743, "learning_rate": 0.002, "loss": 2.577, "step": 87190 }, { "epoch": 0.1737217901313273, "grad_norm": 0.1622972935438156, "learning_rate": 0.002, "loss": 2.5653, "step": 87200 }, { "epoch": 0.17374171235496622, "grad_norm": 0.12954019010066986, "learning_rate": 0.002, "loss": 2.5631, "step": 87210 }, { "epoch": 0.17376163457860513, "grad_norm": 0.15584254264831543, "learning_rate": 0.002, "loss": 2.554, "step": 87220 }, { "epoch": 0.17378155680224405, "grad_norm": 0.20190277695655823, "learning_rate": 0.002, "loss": 2.5738, "step": 87230 }, { "epoch": 0.17380147902588294, "grad_norm": 0.1581936776638031, "learning_rate": 0.002, "loss": 2.5576, "step": 87240 }, { "epoch": 0.17382140124952186, "grad_norm": 0.150729238986969, "learning_rate": 0.002, "loss": 2.5716, "step": 87250 }, { "epoch": 0.17384132347316078, "grad_norm": 0.14381803572177887, "learning_rate": 0.002, "loss": 2.5796, "step": 87260 }, { "epoch": 0.1738612456967997, "grad_norm": 0.16141989827156067, "learning_rate": 0.002, "loss": 2.5607, "step": 87270 }, { "epoch": 0.17388116792043862, "grad_norm": 0.16475707292556763, "learning_rate": 0.002, "loss": 2.5753, "step": 87280 }, { "epoch": 0.1739010901440775, "grad_norm": 0.1456843614578247, "learning_rate": 0.002, "loss": 2.5641, "step": 87290 }, { "epoch": 0.17392101236771643, "grad_norm": 0.15834656357765198, "learning_rate": 0.002, "loss": 2.5663, "step": 87300 }, { "epoch": 0.17394093459135535, "grad_norm": 0.16838398575782776, "learning_rate": 0.002, "loss": 2.5675, "step": 87310 }, { "epoch": 0.17396085681499426, "grad_norm": 0.1654181182384491, "learning_rate": 0.002, "loss": 2.5709, "step": 87320 }, { "epoch": 0.17398077903863318, "grad_norm": 0.14985838532447815, "learning_rate": 0.002, "loss": 2.574, "step": 87330 }, { "epoch": 0.1740007012622721, "grad_norm": 0.13541877269744873, "learning_rate": 0.002, "loss": 2.5696, "step": 87340 }, { "epoch": 0.174020623485911, "grad_norm": 0.15694209933280945, "learning_rate": 0.002, "loss": 2.5697, "step": 87350 }, { "epoch": 0.1740405457095499, "grad_norm": 0.1962677538394928, "learning_rate": 0.002, "loss": 2.5724, "step": 87360 }, { "epoch": 0.17406046793318883, "grad_norm": 0.1569688320159912, "learning_rate": 0.002, "loss": 2.5732, "step": 87370 }, { "epoch": 0.17408039015682775, "grad_norm": 0.19363771378993988, "learning_rate": 0.002, "loss": 2.5728, "step": 87380 }, { "epoch": 0.17410031238046667, "grad_norm": 0.1416642665863037, "learning_rate": 0.002, "loss": 2.5619, "step": 87390 }, { "epoch": 0.17412023460410558, "grad_norm": 0.13516512513160706, "learning_rate": 0.002, "loss": 2.5719, "step": 87400 }, { "epoch": 0.17414015682774447, "grad_norm": 0.1918295919895172, "learning_rate": 0.002, "loss": 2.5761, "step": 87410 }, { "epoch": 0.1741600790513834, "grad_norm": 0.14894412457942963, "learning_rate": 0.002, "loss": 2.5651, "step": 87420 }, { "epoch": 0.1741800012750223, "grad_norm": 0.15572400391101837, "learning_rate": 0.002, "loss": 2.5714, "step": 87430 }, { "epoch": 0.17419992349866123, "grad_norm": 0.17860619723796844, "learning_rate": 0.002, "loss": 2.5818, "step": 87440 }, { "epoch": 0.17421984572230015, "grad_norm": 0.15205299854278564, "learning_rate": 0.002, "loss": 2.5844, "step": 87450 }, { "epoch": 0.17423976794593907, "grad_norm": 0.15253515541553497, "learning_rate": 0.002, "loss": 2.5686, "step": 87460 }, { "epoch": 0.17425969016957796, "grad_norm": 0.14992816746234894, "learning_rate": 0.002, "loss": 2.5729, "step": 87470 }, { "epoch": 0.17427961239321688, "grad_norm": 0.16067193448543549, "learning_rate": 0.002, "loss": 2.5729, "step": 87480 }, { "epoch": 0.1742995346168558, "grad_norm": 0.18135592341423035, "learning_rate": 0.002, "loss": 2.5665, "step": 87490 }, { "epoch": 0.1743194568404947, "grad_norm": 0.1497582346200943, "learning_rate": 0.002, "loss": 2.5873, "step": 87500 }, { "epoch": 0.17433937906413363, "grad_norm": 0.16600315272808075, "learning_rate": 0.002, "loss": 2.5765, "step": 87510 }, { "epoch": 0.17435930128777252, "grad_norm": 0.1432974487543106, "learning_rate": 0.002, "loss": 2.5578, "step": 87520 }, { "epoch": 0.17437922351141144, "grad_norm": 0.1493396759033203, "learning_rate": 0.002, "loss": 2.5654, "step": 87530 }, { "epoch": 0.17439914573505036, "grad_norm": 0.1424267739057541, "learning_rate": 0.002, "loss": 2.5478, "step": 87540 }, { "epoch": 0.17441906795868928, "grad_norm": 0.2185417115688324, "learning_rate": 0.002, "loss": 2.5707, "step": 87550 }, { "epoch": 0.1744389901823282, "grad_norm": 0.14934122562408447, "learning_rate": 0.002, "loss": 2.5729, "step": 87560 }, { "epoch": 0.17445891240596711, "grad_norm": 0.15720675885677338, "learning_rate": 0.002, "loss": 2.5769, "step": 87570 }, { "epoch": 0.174478834629606, "grad_norm": 0.19782723486423492, "learning_rate": 0.002, "loss": 2.5766, "step": 87580 }, { "epoch": 0.17449875685324492, "grad_norm": 0.15470626950263977, "learning_rate": 0.002, "loss": 2.5718, "step": 87590 }, { "epoch": 0.17451867907688384, "grad_norm": 0.17991790175437927, "learning_rate": 0.002, "loss": 2.5727, "step": 87600 }, { "epoch": 0.17453860130052276, "grad_norm": 0.1462937593460083, "learning_rate": 0.002, "loss": 2.5888, "step": 87610 }, { "epoch": 0.17455852352416168, "grad_norm": 0.17092499136924744, "learning_rate": 0.002, "loss": 2.5696, "step": 87620 }, { "epoch": 0.1745784457478006, "grad_norm": 0.131900355219841, "learning_rate": 0.002, "loss": 2.5459, "step": 87630 }, { "epoch": 0.1745983679714395, "grad_norm": 0.15914028882980347, "learning_rate": 0.002, "loss": 2.565, "step": 87640 }, { "epoch": 0.1746182901950784, "grad_norm": 0.15565474331378937, "learning_rate": 0.002, "loss": 2.5675, "step": 87650 }, { "epoch": 0.17463821241871733, "grad_norm": 0.16741681098937988, "learning_rate": 0.002, "loss": 2.568, "step": 87660 }, { "epoch": 0.17465813464235624, "grad_norm": 0.16099846363067627, "learning_rate": 0.002, "loss": 2.5763, "step": 87670 }, { "epoch": 0.17467805686599516, "grad_norm": 0.18189503252506256, "learning_rate": 0.002, "loss": 2.573, "step": 87680 }, { "epoch": 0.17469797908963408, "grad_norm": 0.141300231218338, "learning_rate": 0.002, "loss": 2.5723, "step": 87690 }, { "epoch": 0.17471790131327297, "grad_norm": 0.18379823863506317, "learning_rate": 0.002, "loss": 2.5691, "step": 87700 }, { "epoch": 0.1747378235369119, "grad_norm": 0.33094045519828796, "learning_rate": 0.002, "loss": 2.5749, "step": 87710 }, { "epoch": 0.1747577457605508, "grad_norm": 0.14934957027435303, "learning_rate": 0.002, "loss": 2.5691, "step": 87720 }, { "epoch": 0.17477766798418973, "grad_norm": 0.1374179571866989, "learning_rate": 0.002, "loss": 2.5722, "step": 87730 }, { "epoch": 0.17479759020782865, "grad_norm": 0.16141819953918457, "learning_rate": 0.002, "loss": 2.5588, "step": 87740 }, { "epoch": 0.17481751243146756, "grad_norm": 0.21149899065494537, "learning_rate": 0.002, "loss": 2.5853, "step": 87750 }, { "epoch": 0.17483743465510646, "grad_norm": 0.13890697062015533, "learning_rate": 0.002, "loss": 2.559, "step": 87760 }, { "epoch": 0.17485735687874537, "grad_norm": 0.14185120165348053, "learning_rate": 0.002, "loss": 2.5566, "step": 87770 }, { "epoch": 0.1748772791023843, "grad_norm": 0.1539301872253418, "learning_rate": 0.002, "loss": 2.5516, "step": 87780 }, { "epoch": 0.1748972013260232, "grad_norm": 0.1682654768228531, "learning_rate": 0.002, "loss": 2.5763, "step": 87790 }, { "epoch": 0.17491712354966213, "grad_norm": 0.19368666410446167, "learning_rate": 0.002, "loss": 2.5668, "step": 87800 }, { "epoch": 0.17493704577330102, "grad_norm": 0.1627095639705658, "learning_rate": 0.002, "loss": 2.5749, "step": 87810 }, { "epoch": 0.17495696799693994, "grad_norm": 0.14827480912208557, "learning_rate": 0.002, "loss": 2.5887, "step": 87820 }, { "epoch": 0.17497689022057886, "grad_norm": 0.16934692859649658, "learning_rate": 0.002, "loss": 2.5699, "step": 87830 }, { "epoch": 0.17499681244421778, "grad_norm": 0.1690441071987152, "learning_rate": 0.002, "loss": 2.5716, "step": 87840 }, { "epoch": 0.1750167346678567, "grad_norm": 0.15275320410728455, "learning_rate": 0.002, "loss": 2.5712, "step": 87850 }, { "epoch": 0.1750366568914956, "grad_norm": 0.1593727171421051, "learning_rate": 0.002, "loss": 2.5639, "step": 87860 }, { "epoch": 0.1750565791151345, "grad_norm": 0.18713101744651794, "learning_rate": 0.002, "loss": 2.5771, "step": 87870 }, { "epoch": 0.17507650133877342, "grad_norm": 0.14550861716270447, "learning_rate": 0.002, "loss": 2.5723, "step": 87880 }, { "epoch": 0.17509642356241234, "grad_norm": 0.211653470993042, "learning_rate": 0.002, "loss": 2.5608, "step": 87890 }, { "epoch": 0.17511634578605126, "grad_norm": 0.1822977215051651, "learning_rate": 0.002, "loss": 2.5603, "step": 87900 }, { "epoch": 0.17513626800969018, "grad_norm": 0.19450855255126953, "learning_rate": 0.002, "loss": 2.5622, "step": 87910 }, { "epoch": 0.1751561902333291, "grad_norm": 0.17455804347991943, "learning_rate": 0.002, "loss": 2.5849, "step": 87920 }, { "epoch": 0.175176112456968, "grad_norm": 0.1518774926662445, "learning_rate": 0.002, "loss": 2.5513, "step": 87930 }, { "epoch": 0.1751960346806069, "grad_norm": 0.1686762571334839, "learning_rate": 0.002, "loss": 2.5799, "step": 87940 }, { "epoch": 0.17521595690424582, "grad_norm": 0.15947242081165314, "learning_rate": 0.002, "loss": 2.5789, "step": 87950 }, { "epoch": 0.17523587912788474, "grad_norm": 0.13682821393013, "learning_rate": 0.002, "loss": 2.5643, "step": 87960 }, { "epoch": 0.17525580135152366, "grad_norm": 0.1681966483592987, "learning_rate": 0.002, "loss": 2.5797, "step": 87970 }, { "epoch": 0.17527572357516258, "grad_norm": 0.18096812069416046, "learning_rate": 0.002, "loss": 2.5623, "step": 87980 }, { "epoch": 0.17529564579880147, "grad_norm": 0.1704324334859848, "learning_rate": 0.002, "loss": 2.5704, "step": 87990 }, { "epoch": 0.1753155680224404, "grad_norm": 0.1598210632801056, "learning_rate": 0.002, "loss": 2.571, "step": 88000 }, { "epoch": 0.1753354902460793, "grad_norm": 0.18079295754432678, "learning_rate": 0.002, "loss": 2.5553, "step": 88010 }, { "epoch": 0.17535541246971822, "grad_norm": 0.18901090323925018, "learning_rate": 0.002, "loss": 2.5669, "step": 88020 }, { "epoch": 0.17537533469335714, "grad_norm": 0.15129804611206055, "learning_rate": 0.002, "loss": 2.5652, "step": 88030 }, { "epoch": 0.17539525691699603, "grad_norm": 0.1697484850883484, "learning_rate": 0.002, "loss": 2.5537, "step": 88040 }, { "epoch": 0.17541517914063495, "grad_norm": 0.1531979739665985, "learning_rate": 0.002, "loss": 2.5725, "step": 88050 }, { "epoch": 0.17543510136427387, "grad_norm": 0.1652020364999771, "learning_rate": 0.002, "loss": 2.5788, "step": 88060 }, { "epoch": 0.1754550235879128, "grad_norm": 0.19220393896102905, "learning_rate": 0.002, "loss": 2.5705, "step": 88070 }, { "epoch": 0.1754749458115517, "grad_norm": 0.1478395313024521, "learning_rate": 0.002, "loss": 2.5638, "step": 88080 }, { "epoch": 0.17549486803519063, "grad_norm": 0.18753273785114288, "learning_rate": 0.002, "loss": 2.5739, "step": 88090 }, { "epoch": 0.17551479025882952, "grad_norm": 0.21250179409980774, "learning_rate": 0.002, "loss": 2.5789, "step": 88100 }, { "epoch": 0.17553471248246844, "grad_norm": 0.14212600886821747, "learning_rate": 0.002, "loss": 2.5794, "step": 88110 }, { "epoch": 0.17555463470610735, "grad_norm": 0.14982010424137115, "learning_rate": 0.002, "loss": 2.5761, "step": 88120 }, { "epoch": 0.17557455692974627, "grad_norm": 0.17838303744792938, "learning_rate": 0.002, "loss": 2.5739, "step": 88130 }, { "epoch": 0.1755944791533852, "grad_norm": 0.15427124500274658, "learning_rate": 0.002, "loss": 2.5595, "step": 88140 }, { "epoch": 0.1756144013770241, "grad_norm": 0.13882234692573547, "learning_rate": 0.002, "loss": 2.5521, "step": 88150 }, { "epoch": 0.175634323600663, "grad_norm": 0.1704559326171875, "learning_rate": 0.002, "loss": 2.5727, "step": 88160 }, { "epoch": 0.17565424582430192, "grad_norm": 0.154791459441185, "learning_rate": 0.002, "loss": 2.5632, "step": 88170 }, { "epoch": 0.17567416804794084, "grad_norm": 0.1864013671875, "learning_rate": 0.002, "loss": 2.5767, "step": 88180 }, { "epoch": 0.17569409027157976, "grad_norm": 0.16246449947357178, "learning_rate": 0.002, "loss": 2.5801, "step": 88190 }, { "epoch": 0.17571401249521867, "grad_norm": 0.1429736465215683, "learning_rate": 0.002, "loss": 2.553, "step": 88200 }, { "epoch": 0.1757339347188576, "grad_norm": 0.171335831284523, "learning_rate": 0.002, "loss": 2.5634, "step": 88210 }, { "epoch": 0.17575385694249648, "grad_norm": 0.16183364391326904, "learning_rate": 0.002, "loss": 2.5823, "step": 88220 }, { "epoch": 0.1757737791661354, "grad_norm": 0.17426864802837372, "learning_rate": 0.002, "loss": 2.5875, "step": 88230 }, { "epoch": 0.17579370138977432, "grad_norm": 0.1770438700914383, "learning_rate": 0.002, "loss": 2.5748, "step": 88240 }, { "epoch": 0.17581362361341324, "grad_norm": 0.17686276137828827, "learning_rate": 0.002, "loss": 2.5718, "step": 88250 }, { "epoch": 0.17583354583705216, "grad_norm": 0.17667706310749054, "learning_rate": 0.002, "loss": 2.5746, "step": 88260 }, { "epoch": 0.17585346806069105, "grad_norm": 0.15693946182727814, "learning_rate": 0.002, "loss": 2.5641, "step": 88270 }, { "epoch": 0.17587339028432997, "grad_norm": 0.16729949414730072, "learning_rate": 0.002, "loss": 2.5809, "step": 88280 }, { "epoch": 0.17589331250796889, "grad_norm": 0.17384907603263855, "learning_rate": 0.002, "loss": 2.5717, "step": 88290 }, { "epoch": 0.1759132347316078, "grad_norm": 0.13429202139377594, "learning_rate": 0.002, "loss": 2.5592, "step": 88300 }, { "epoch": 0.17593315695524672, "grad_norm": 0.19719120860099792, "learning_rate": 0.002, "loss": 2.5723, "step": 88310 }, { "epoch": 0.17595307917888564, "grad_norm": 0.16475722193717957, "learning_rate": 0.002, "loss": 2.5588, "step": 88320 }, { "epoch": 0.17597300140252453, "grad_norm": 0.2454730123281479, "learning_rate": 0.002, "loss": 2.5849, "step": 88330 }, { "epoch": 0.17599292362616345, "grad_norm": 0.14747995138168335, "learning_rate": 0.002, "loss": 2.5746, "step": 88340 }, { "epoch": 0.17601284584980237, "grad_norm": 0.15604503452777863, "learning_rate": 0.002, "loss": 2.572, "step": 88350 }, { "epoch": 0.1760327680734413, "grad_norm": 0.1735510677099228, "learning_rate": 0.002, "loss": 2.5704, "step": 88360 }, { "epoch": 0.1760526902970802, "grad_norm": 0.16971315443515778, "learning_rate": 0.002, "loss": 2.5727, "step": 88370 }, { "epoch": 0.17607261252071912, "grad_norm": 0.14362283051013947, "learning_rate": 0.002, "loss": 2.5656, "step": 88380 }, { "epoch": 0.17609253474435801, "grad_norm": 0.16795076429843903, "learning_rate": 0.002, "loss": 2.5649, "step": 88390 }, { "epoch": 0.17611245696799693, "grad_norm": 0.16139711439609528, "learning_rate": 0.002, "loss": 2.5642, "step": 88400 }, { "epoch": 0.17613237919163585, "grad_norm": 0.1372656375169754, "learning_rate": 0.002, "loss": 2.5747, "step": 88410 }, { "epoch": 0.17615230141527477, "grad_norm": 0.16682705283164978, "learning_rate": 0.002, "loss": 2.5668, "step": 88420 }, { "epoch": 0.1761722236389137, "grad_norm": 0.17596504092216492, "learning_rate": 0.002, "loss": 2.5737, "step": 88430 }, { "epoch": 0.1761921458625526, "grad_norm": 0.13481536507606506, "learning_rate": 0.002, "loss": 2.56, "step": 88440 }, { "epoch": 0.1762120680861915, "grad_norm": 0.14728713035583496, "learning_rate": 0.002, "loss": 2.5758, "step": 88450 }, { "epoch": 0.17623199030983042, "grad_norm": 0.14587534964084625, "learning_rate": 0.002, "loss": 2.5765, "step": 88460 }, { "epoch": 0.17625191253346933, "grad_norm": 0.16519515216350555, "learning_rate": 0.002, "loss": 2.5694, "step": 88470 }, { "epoch": 0.17627183475710825, "grad_norm": 0.15246880054473877, "learning_rate": 0.002, "loss": 2.5552, "step": 88480 }, { "epoch": 0.17629175698074717, "grad_norm": 0.14382044970989227, "learning_rate": 0.002, "loss": 2.5611, "step": 88490 }, { "epoch": 0.1763116792043861, "grad_norm": 0.17391550540924072, "learning_rate": 0.002, "loss": 2.5898, "step": 88500 }, { "epoch": 0.17633160142802498, "grad_norm": 0.13514626026153564, "learning_rate": 0.002, "loss": 2.5671, "step": 88510 }, { "epoch": 0.1763515236516639, "grad_norm": 0.16315282881259918, "learning_rate": 0.002, "loss": 2.5784, "step": 88520 }, { "epoch": 0.17637144587530282, "grad_norm": 0.16530871391296387, "learning_rate": 0.002, "loss": 2.5707, "step": 88530 }, { "epoch": 0.17639136809894174, "grad_norm": 0.1524963527917862, "learning_rate": 0.002, "loss": 2.5699, "step": 88540 }, { "epoch": 0.17641129032258066, "grad_norm": 0.1550305336713791, "learning_rate": 0.002, "loss": 2.5724, "step": 88550 }, { "epoch": 0.17643121254621955, "grad_norm": 0.136598140001297, "learning_rate": 0.002, "loss": 2.5718, "step": 88560 }, { "epoch": 0.17645113476985846, "grad_norm": 0.17760026454925537, "learning_rate": 0.002, "loss": 2.5922, "step": 88570 }, { "epoch": 0.17647105699349738, "grad_norm": 0.1951930820941925, "learning_rate": 0.002, "loss": 2.5738, "step": 88580 }, { "epoch": 0.1764909792171363, "grad_norm": 0.14544644951820374, "learning_rate": 0.002, "loss": 2.5744, "step": 88590 }, { "epoch": 0.17651090144077522, "grad_norm": 0.14492090046405792, "learning_rate": 0.002, "loss": 2.5704, "step": 88600 }, { "epoch": 0.17653082366441414, "grad_norm": 0.18326087296009064, "learning_rate": 0.002, "loss": 2.5689, "step": 88610 }, { "epoch": 0.17655074588805303, "grad_norm": 0.1422017365694046, "learning_rate": 0.002, "loss": 2.5665, "step": 88620 }, { "epoch": 0.17657066811169195, "grad_norm": 0.16225707530975342, "learning_rate": 0.002, "loss": 2.5642, "step": 88630 }, { "epoch": 0.17659059033533087, "grad_norm": 0.18680298328399658, "learning_rate": 0.002, "loss": 2.5559, "step": 88640 }, { "epoch": 0.17661051255896978, "grad_norm": 0.13221757113933563, "learning_rate": 0.002, "loss": 2.567, "step": 88650 }, { "epoch": 0.1766304347826087, "grad_norm": 0.14458808302879333, "learning_rate": 0.002, "loss": 2.5596, "step": 88660 }, { "epoch": 0.17665035700624762, "grad_norm": 0.13410218060016632, "learning_rate": 0.002, "loss": 2.5755, "step": 88670 }, { "epoch": 0.1766702792298865, "grad_norm": 0.18174508213996887, "learning_rate": 0.002, "loss": 2.5775, "step": 88680 }, { "epoch": 0.17669020145352543, "grad_norm": 0.17635497450828552, "learning_rate": 0.002, "loss": 2.5656, "step": 88690 }, { "epoch": 0.17671012367716435, "grad_norm": 0.16884072124958038, "learning_rate": 0.002, "loss": 2.5727, "step": 88700 }, { "epoch": 0.17673004590080327, "grad_norm": 0.15657556056976318, "learning_rate": 0.002, "loss": 2.5641, "step": 88710 }, { "epoch": 0.17674996812444219, "grad_norm": 0.14373551309108734, "learning_rate": 0.002, "loss": 2.5606, "step": 88720 }, { "epoch": 0.1767698903480811, "grad_norm": 0.1989525705575943, "learning_rate": 0.002, "loss": 2.5673, "step": 88730 }, { "epoch": 0.17678981257172, "grad_norm": 0.1776818484067917, "learning_rate": 0.002, "loss": 2.5622, "step": 88740 }, { "epoch": 0.1768097347953589, "grad_norm": 0.23302572965621948, "learning_rate": 0.002, "loss": 2.5463, "step": 88750 }, { "epoch": 0.17682965701899783, "grad_norm": 0.16295039653778076, "learning_rate": 0.002, "loss": 2.5607, "step": 88760 }, { "epoch": 0.17684957924263675, "grad_norm": 0.19180764257907867, "learning_rate": 0.002, "loss": 2.5659, "step": 88770 }, { "epoch": 0.17686950146627567, "grad_norm": 0.1514338254928589, "learning_rate": 0.002, "loss": 2.5739, "step": 88780 }, { "epoch": 0.17688942368991456, "grad_norm": 0.18827654421329498, "learning_rate": 0.002, "loss": 2.568, "step": 88790 }, { "epoch": 0.17690934591355348, "grad_norm": 0.1540728658437729, "learning_rate": 0.002, "loss": 2.5726, "step": 88800 }, { "epoch": 0.1769292681371924, "grad_norm": 0.1533101350069046, "learning_rate": 0.002, "loss": 2.5663, "step": 88810 }, { "epoch": 0.17694919036083132, "grad_norm": 0.16611041128635406, "learning_rate": 0.002, "loss": 2.5708, "step": 88820 }, { "epoch": 0.17696911258447023, "grad_norm": 0.14913195371627808, "learning_rate": 0.002, "loss": 2.5565, "step": 88830 }, { "epoch": 0.17698903480810915, "grad_norm": 0.16142289340496063, "learning_rate": 0.002, "loss": 2.5665, "step": 88840 }, { "epoch": 0.17700895703174804, "grad_norm": 0.1482188105583191, "learning_rate": 0.002, "loss": 2.5671, "step": 88850 }, { "epoch": 0.17702887925538696, "grad_norm": 0.17651507258415222, "learning_rate": 0.002, "loss": 2.5675, "step": 88860 }, { "epoch": 0.17704880147902588, "grad_norm": 0.15046286582946777, "learning_rate": 0.002, "loss": 2.5592, "step": 88870 }, { "epoch": 0.1770687237026648, "grad_norm": 0.16998730599880219, "learning_rate": 0.002, "loss": 2.5738, "step": 88880 }, { "epoch": 0.17708864592630372, "grad_norm": 0.16258402168750763, "learning_rate": 0.002, "loss": 2.5589, "step": 88890 }, { "epoch": 0.17710856814994264, "grad_norm": 0.15135210752487183, "learning_rate": 0.002, "loss": 2.5693, "step": 88900 }, { "epoch": 0.17712849037358153, "grad_norm": 0.1834985315799713, "learning_rate": 0.002, "loss": 2.5554, "step": 88910 }, { "epoch": 0.17714841259722044, "grad_norm": 0.19724039733409882, "learning_rate": 0.002, "loss": 2.5773, "step": 88920 }, { "epoch": 0.17716833482085936, "grad_norm": 0.16071607172489166, "learning_rate": 0.002, "loss": 2.5798, "step": 88930 }, { "epoch": 0.17718825704449828, "grad_norm": 0.20904278755187988, "learning_rate": 0.002, "loss": 2.5671, "step": 88940 }, { "epoch": 0.1772081792681372, "grad_norm": 0.14228397607803345, "learning_rate": 0.002, "loss": 2.5715, "step": 88950 }, { "epoch": 0.17722810149177612, "grad_norm": 0.18177396059036255, "learning_rate": 0.002, "loss": 2.559, "step": 88960 }, { "epoch": 0.177248023715415, "grad_norm": 0.17531707882881165, "learning_rate": 0.002, "loss": 2.5673, "step": 88970 }, { "epoch": 0.17726794593905393, "grad_norm": 0.13881173729896545, "learning_rate": 0.002, "loss": 2.5689, "step": 88980 }, { "epoch": 0.17728786816269285, "grad_norm": 0.1561977118253708, "learning_rate": 0.002, "loss": 2.573, "step": 88990 }, { "epoch": 0.17730779038633177, "grad_norm": 0.1466272920370102, "learning_rate": 0.002, "loss": 2.5698, "step": 89000 }, { "epoch": 0.17732771260997068, "grad_norm": 0.1486227959394455, "learning_rate": 0.002, "loss": 2.5824, "step": 89010 }, { "epoch": 0.1773476348336096, "grad_norm": 0.1683216542005539, "learning_rate": 0.002, "loss": 2.5579, "step": 89020 }, { "epoch": 0.1773675570572485, "grad_norm": 0.167994424700737, "learning_rate": 0.002, "loss": 2.5698, "step": 89030 }, { "epoch": 0.1773874792808874, "grad_norm": 0.15204206109046936, "learning_rate": 0.002, "loss": 2.5654, "step": 89040 }, { "epoch": 0.17740740150452633, "grad_norm": 0.17440952360630035, "learning_rate": 0.002, "loss": 2.568, "step": 89050 }, { "epoch": 0.17742732372816525, "grad_norm": 0.1556740254163742, "learning_rate": 0.002, "loss": 2.5766, "step": 89060 }, { "epoch": 0.17744724595180417, "grad_norm": 0.20247480273246765, "learning_rate": 0.002, "loss": 2.5828, "step": 89070 }, { "epoch": 0.17746716817544306, "grad_norm": 0.15753135085105896, "learning_rate": 0.002, "loss": 2.5815, "step": 89080 }, { "epoch": 0.17748709039908198, "grad_norm": 0.18454699218273163, "learning_rate": 0.002, "loss": 2.5739, "step": 89090 }, { "epoch": 0.1775070126227209, "grad_norm": 0.15129436552524567, "learning_rate": 0.002, "loss": 2.5826, "step": 89100 }, { "epoch": 0.1775269348463598, "grad_norm": 0.18589915335178375, "learning_rate": 0.002, "loss": 2.5682, "step": 89110 }, { "epoch": 0.17754685706999873, "grad_norm": 0.15082231163978577, "learning_rate": 0.002, "loss": 2.5541, "step": 89120 }, { "epoch": 0.17756677929363765, "grad_norm": 0.1730530709028244, "learning_rate": 0.002, "loss": 2.5597, "step": 89130 }, { "epoch": 0.17758670151727654, "grad_norm": 0.1417243331670761, "learning_rate": 0.002, "loss": 2.5792, "step": 89140 }, { "epoch": 0.17760662374091546, "grad_norm": 0.13867272436618805, "learning_rate": 0.002, "loss": 2.5819, "step": 89150 }, { "epoch": 0.17762654596455438, "grad_norm": 0.15786150097846985, "learning_rate": 0.002, "loss": 2.5766, "step": 89160 }, { "epoch": 0.1776464681881933, "grad_norm": 0.13720783591270447, "learning_rate": 0.002, "loss": 2.5712, "step": 89170 }, { "epoch": 0.17766639041183221, "grad_norm": 0.16735705733299255, "learning_rate": 0.002, "loss": 2.5597, "step": 89180 }, { "epoch": 0.17768631263547113, "grad_norm": 0.1741272360086441, "learning_rate": 0.002, "loss": 2.5627, "step": 89190 }, { "epoch": 0.17770623485911002, "grad_norm": 0.16925160586833954, "learning_rate": 0.002, "loss": 2.5703, "step": 89200 }, { "epoch": 0.17772615708274894, "grad_norm": 0.17495204508304596, "learning_rate": 0.002, "loss": 2.5678, "step": 89210 }, { "epoch": 0.17774607930638786, "grad_norm": 0.15792083740234375, "learning_rate": 0.002, "loss": 2.5717, "step": 89220 }, { "epoch": 0.17776600153002678, "grad_norm": 0.15015682578086853, "learning_rate": 0.002, "loss": 2.5672, "step": 89230 }, { "epoch": 0.1777859237536657, "grad_norm": 0.15373989939689636, "learning_rate": 0.002, "loss": 2.5684, "step": 89240 }, { "epoch": 0.17780584597730462, "grad_norm": 0.18931472301483154, "learning_rate": 0.002, "loss": 2.5669, "step": 89250 }, { "epoch": 0.1778257682009435, "grad_norm": 0.1583186239004135, "learning_rate": 0.002, "loss": 2.5682, "step": 89260 }, { "epoch": 0.17784569042458243, "grad_norm": 0.19599369168281555, "learning_rate": 0.002, "loss": 2.5789, "step": 89270 }, { "epoch": 0.17786561264822134, "grad_norm": 0.1650450974702835, "learning_rate": 0.002, "loss": 2.5803, "step": 89280 }, { "epoch": 0.17788553487186026, "grad_norm": 0.1833372712135315, "learning_rate": 0.002, "loss": 2.5526, "step": 89290 }, { "epoch": 0.17790545709549918, "grad_norm": 0.1550859808921814, "learning_rate": 0.002, "loss": 2.5832, "step": 89300 }, { "epoch": 0.17792537931913807, "grad_norm": 0.18702086806297302, "learning_rate": 0.002, "loss": 2.5725, "step": 89310 }, { "epoch": 0.177945301542777, "grad_norm": 0.1433279812335968, "learning_rate": 0.002, "loss": 2.5735, "step": 89320 }, { "epoch": 0.1779652237664159, "grad_norm": 0.18717840313911438, "learning_rate": 0.002, "loss": 2.5518, "step": 89330 }, { "epoch": 0.17798514599005483, "grad_norm": 0.14656081795692444, "learning_rate": 0.002, "loss": 2.5566, "step": 89340 }, { "epoch": 0.17800506821369375, "grad_norm": 0.19627608358860016, "learning_rate": 0.002, "loss": 2.5485, "step": 89350 }, { "epoch": 0.17802499043733266, "grad_norm": 0.15810871124267578, "learning_rate": 0.002, "loss": 2.5801, "step": 89360 }, { "epoch": 0.17804491266097155, "grad_norm": 0.16511686146259308, "learning_rate": 0.002, "loss": 2.5669, "step": 89370 }, { "epoch": 0.17806483488461047, "grad_norm": 0.16067354381084442, "learning_rate": 0.002, "loss": 2.5698, "step": 89380 }, { "epoch": 0.1780847571082494, "grad_norm": 0.13552789390087128, "learning_rate": 0.002, "loss": 2.5493, "step": 89390 }, { "epoch": 0.1781046793318883, "grad_norm": 0.16917186975479126, "learning_rate": 0.002, "loss": 2.5675, "step": 89400 }, { "epoch": 0.17812460155552723, "grad_norm": 0.15928110480308533, "learning_rate": 0.002, "loss": 2.562, "step": 89410 }, { "epoch": 0.17814452377916615, "grad_norm": 0.1759432852268219, "learning_rate": 0.002, "loss": 2.57, "step": 89420 }, { "epoch": 0.17816444600280504, "grad_norm": 0.169419065117836, "learning_rate": 0.002, "loss": 2.5685, "step": 89430 }, { "epoch": 0.17818436822644396, "grad_norm": 0.16593246161937714, "learning_rate": 0.002, "loss": 2.5681, "step": 89440 }, { "epoch": 0.17820429045008288, "grad_norm": 0.1779443919658661, "learning_rate": 0.002, "loss": 2.5605, "step": 89450 }, { "epoch": 0.1782242126737218, "grad_norm": 0.1447446495294571, "learning_rate": 0.002, "loss": 2.5656, "step": 89460 }, { "epoch": 0.1782441348973607, "grad_norm": 0.22816677391529083, "learning_rate": 0.002, "loss": 2.582, "step": 89470 }, { "epoch": 0.17826405712099963, "grad_norm": 0.15309137105941772, "learning_rate": 0.002, "loss": 2.5623, "step": 89480 }, { "epoch": 0.17828397934463852, "grad_norm": 0.16180619597434998, "learning_rate": 0.002, "loss": 2.5678, "step": 89490 }, { "epoch": 0.17830390156827744, "grad_norm": 0.18695330619812012, "learning_rate": 0.002, "loss": 2.5563, "step": 89500 }, { "epoch": 0.17832382379191636, "grad_norm": 0.1444185972213745, "learning_rate": 0.002, "loss": 2.568, "step": 89510 }, { "epoch": 0.17834374601555528, "grad_norm": 0.16645312309265137, "learning_rate": 0.002, "loss": 2.5719, "step": 89520 }, { "epoch": 0.1783636682391942, "grad_norm": 0.15943028032779694, "learning_rate": 0.002, "loss": 2.5705, "step": 89530 }, { "epoch": 0.17838359046283309, "grad_norm": 0.14721660315990448, "learning_rate": 0.002, "loss": 2.5653, "step": 89540 }, { "epoch": 0.178403512686472, "grad_norm": 0.17102165520191193, "learning_rate": 0.002, "loss": 2.5769, "step": 89550 }, { "epoch": 0.17842343491011092, "grad_norm": 0.13687792420387268, "learning_rate": 0.002, "loss": 2.5686, "step": 89560 }, { "epoch": 0.17844335713374984, "grad_norm": 0.1705983430147171, "learning_rate": 0.002, "loss": 2.5737, "step": 89570 }, { "epoch": 0.17846327935738876, "grad_norm": 0.1612972468137741, "learning_rate": 0.002, "loss": 2.5527, "step": 89580 }, { "epoch": 0.17848320158102768, "grad_norm": 0.1728430539369583, "learning_rate": 0.002, "loss": 2.5585, "step": 89590 }, { "epoch": 0.17850312380466657, "grad_norm": 0.23347310721874237, "learning_rate": 0.002, "loss": 2.5689, "step": 89600 }, { "epoch": 0.1785230460283055, "grad_norm": 0.14215493202209473, "learning_rate": 0.002, "loss": 2.5709, "step": 89610 }, { "epoch": 0.1785429682519444, "grad_norm": 0.15842773020267487, "learning_rate": 0.002, "loss": 2.5544, "step": 89620 }, { "epoch": 0.17856289047558332, "grad_norm": 0.1591123789548874, "learning_rate": 0.002, "loss": 2.5679, "step": 89630 }, { "epoch": 0.17858281269922224, "grad_norm": 0.14920112490653992, "learning_rate": 0.002, "loss": 2.5507, "step": 89640 }, { "epoch": 0.17860273492286116, "grad_norm": 0.1496700644493103, "learning_rate": 0.002, "loss": 2.5636, "step": 89650 }, { "epoch": 0.17862265714650005, "grad_norm": 0.21475082635879517, "learning_rate": 0.002, "loss": 2.5708, "step": 89660 }, { "epoch": 0.17864257937013897, "grad_norm": 0.14199870824813843, "learning_rate": 0.002, "loss": 2.5563, "step": 89670 }, { "epoch": 0.1786625015937779, "grad_norm": 0.15717852115631104, "learning_rate": 0.002, "loss": 2.5827, "step": 89680 }, { "epoch": 0.1786824238174168, "grad_norm": 0.1688677817583084, "learning_rate": 0.002, "loss": 2.5602, "step": 89690 }, { "epoch": 0.17870234604105573, "grad_norm": 0.15736232697963715, "learning_rate": 0.002, "loss": 2.5658, "step": 89700 }, { "epoch": 0.17872226826469464, "grad_norm": 0.17591646313667297, "learning_rate": 0.002, "loss": 2.5735, "step": 89710 }, { "epoch": 0.17874219048833354, "grad_norm": 0.22358450293540955, "learning_rate": 0.002, "loss": 2.5721, "step": 89720 }, { "epoch": 0.17876211271197245, "grad_norm": 0.1566239893436432, "learning_rate": 0.002, "loss": 2.5638, "step": 89730 }, { "epoch": 0.17878203493561137, "grad_norm": 0.14194431900978088, "learning_rate": 0.002, "loss": 2.5716, "step": 89740 }, { "epoch": 0.1788019571592503, "grad_norm": 0.1816556304693222, "learning_rate": 0.002, "loss": 2.5551, "step": 89750 }, { "epoch": 0.1788218793828892, "grad_norm": 0.16054075956344604, "learning_rate": 0.002, "loss": 2.5708, "step": 89760 }, { "epoch": 0.17884180160652813, "grad_norm": 0.17124608159065247, "learning_rate": 0.002, "loss": 2.5435, "step": 89770 }, { "epoch": 0.17886172383016702, "grad_norm": 0.16301190853118896, "learning_rate": 0.002, "loss": 2.5606, "step": 89780 }, { "epoch": 0.17888164605380594, "grad_norm": 0.20137141644954681, "learning_rate": 0.002, "loss": 2.5623, "step": 89790 }, { "epoch": 0.17890156827744486, "grad_norm": 0.15882587432861328, "learning_rate": 0.002, "loss": 2.5663, "step": 89800 }, { "epoch": 0.17892149050108377, "grad_norm": 0.18862877786159515, "learning_rate": 0.002, "loss": 2.5553, "step": 89810 }, { "epoch": 0.1789414127247227, "grad_norm": 0.1382642239332199, "learning_rate": 0.002, "loss": 2.5687, "step": 89820 }, { "epoch": 0.17896133494836158, "grad_norm": 0.18293042480945587, "learning_rate": 0.002, "loss": 2.5594, "step": 89830 }, { "epoch": 0.1789812571720005, "grad_norm": 0.18351446092128754, "learning_rate": 0.002, "loss": 2.5756, "step": 89840 }, { "epoch": 0.17900117939563942, "grad_norm": 0.1560199111700058, "learning_rate": 0.002, "loss": 2.5741, "step": 89850 }, { "epoch": 0.17902110161927834, "grad_norm": 0.1889725774526596, "learning_rate": 0.002, "loss": 2.5784, "step": 89860 }, { "epoch": 0.17904102384291726, "grad_norm": 0.1722606122493744, "learning_rate": 0.002, "loss": 2.5728, "step": 89870 }, { "epoch": 0.17906094606655618, "grad_norm": 0.15599218010902405, "learning_rate": 0.002, "loss": 2.5752, "step": 89880 }, { "epoch": 0.17908086829019507, "grad_norm": 0.15705744922161102, "learning_rate": 0.002, "loss": 2.5672, "step": 89890 }, { "epoch": 0.17910079051383399, "grad_norm": 0.1512589305639267, "learning_rate": 0.002, "loss": 2.576, "step": 89900 }, { "epoch": 0.1791207127374729, "grad_norm": 0.17285771667957306, "learning_rate": 0.002, "loss": 2.5784, "step": 89910 }, { "epoch": 0.17914063496111182, "grad_norm": 0.14006830751895905, "learning_rate": 0.002, "loss": 2.5764, "step": 89920 }, { "epoch": 0.17916055718475074, "grad_norm": 0.15749458968639374, "learning_rate": 0.002, "loss": 2.5575, "step": 89930 }, { "epoch": 0.17918047940838966, "grad_norm": 0.18603841960430145, "learning_rate": 0.002, "loss": 2.571, "step": 89940 }, { "epoch": 0.17920040163202855, "grad_norm": 0.16454572975635529, "learning_rate": 0.002, "loss": 2.5745, "step": 89950 }, { "epoch": 0.17922032385566747, "grad_norm": 0.14366517961025238, "learning_rate": 0.002, "loss": 2.5687, "step": 89960 }, { "epoch": 0.1792402460793064, "grad_norm": 0.17664450407028198, "learning_rate": 0.002, "loss": 2.5806, "step": 89970 }, { "epoch": 0.1792601683029453, "grad_norm": 0.13759025931358337, "learning_rate": 0.002, "loss": 2.565, "step": 89980 }, { "epoch": 0.17928009052658422, "grad_norm": 0.19826483726501465, "learning_rate": 0.002, "loss": 2.5788, "step": 89990 }, { "epoch": 0.17930001275022314, "grad_norm": 0.784926176071167, "learning_rate": 0.002, "loss": 2.5694, "step": 90000 }, { "epoch": 0.17931993497386203, "grad_norm": 0.14817634224891663, "learning_rate": 0.002, "loss": 2.5682, "step": 90010 }, { "epoch": 0.17933985719750095, "grad_norm": 0.18611684441566467, "learning_rate": 0.002, "loss": 2.5792, "step": 90020 }, { "epoch": 0.17935977942113987, "grad_norm": 0.16020461916923523, "learning_rate": 0.002, "loss": 2.5819, "step": 90030 }, { "epoch": 0.1793797016447788, "grad_norm": 0.16642333567142487, "learning_rate": 0.002, "loss": 2.5711, "step": 90040 }, { "epoch": 0.1793996238684177, "grad_norm": 0.1452418714761734, "learning_rate": 0.002, "loss": 2.5685, "step": 90050 }, { "epoch": 0.1794195460920566, "grad_norm": 0.17450164258480072, "learning_rate": 0.002, "loss": 2.567, "step": 90060 }, { "epoch": 0.17943946831569552, "grad_norm": 0.15461422502994537, "learning_rate": 0.002, "loss": 2.5636, "step": 90070 }, { "epoch": 0.17945939053933443, "grad_norm": 0.17760775983333588, "learning_rate": 0.002, "loss": 2.5743, "step": 90080 }, { "epoch": 0.17947931276297335, "grad_norm": 0.13067060708999634, "learning_rate": 0.002, "loss": 2.5653, "step": 90090 }, { "epoch": 0.17949923498661227, "grad_norm": 0.1737469583749771, "learning_rate": 0.002, "loss": 2.5809, "step": 90100 }, { "epoch": 0.1795191572102512, "grad_norm": 0.15324382483959198, "learning_rate": 0.002, "loss": 2.5686, "step": 90110 }, { "epoch": 0.17953907943389008, "grad_norm": 0.14138154685497284, "learning_rate": 0.002, "loss": 2.5611, "step": 90120 }, { "epoch": 0.179559001657529, "grad_norm": 0.18938304483890533, "learning_rate": 0.002, "loss": 2.5616, "step": 90130 }, { "epoch": 0.17957892388116792, "grad_norm": 0.17572051286697388, "learning_rate": 0.002, "loss": 2.5723, "step": 90140 }, { "epoch": 0.17959884610480684, "grad_norm": 0.16533975303173065, "learning_rate": 0.002, "loss": 2.5696, "step": 90150 }, { "epoch": 0.17961876832844575, "grad_norm": 0.17164559662342072, "learning_rate": 0.002, "loss": 2.5691, "step": 90160 }, { "epoch": 0.17963869055208467, "grad_norm": 0.17173565924167633, "learning_rate": 0.002, "loss": 2.5659, "step": 90170 }, { "epoch": 0.17965861277572356, "grad_norm": 0.15243159234523773, "learning_rate": 0.002, "loss": 2.5739, "step": 90180 }, { "epoch": 0.17967853499936248, "grad_norm": 0.14064182341098785, "learning_rate": 0.002, "loss": 2.5802, "step": 90190 }, { "epoch": 0.1796984572230014, "grad_norm": 0.15190891921520233, "learning_rate": 0.002, "loss": 2.5627, "step": 90200 }, { "epoch": 0.17971837944664032, "grad_norm": 0.191780686378479, "learning_rate": 0.002, "loss": 2.5806, "step": 90210 }, { "epoch": 0.17973830167027924, "grad_norm": 0.152940571308136, "learning_rate": 0.002, "loss": 2.5645, "step": 90220 }, { "epoch": 0.17975822389391816, "grad_norm": 0.16049084067344666, "learning_rate": 0.002, "loss": 2.5756, "step": 90230 }, { "epoch": 0.17977814611755705, "grad_norm": 0.16922852396965027, "learning_rate": 0.002, "loss": 2.5874, "step": 90240 }, { "epoch": 0.17979806834119597, "grad_norm": 0.17441010475158691, "learning_rate": 0.002, "loss": 2.5672, "step": 90250 }, { "epoch": 0.17981799056483488, "grad_norm": 0.1771007776260376, "learning_rate": 0.002, "loss": 2.5661, "step": 90260 }, { "epoch": 0.1798379127884738, "grad_norm": 0.15275464951992035, "learning_rate": 0.002, "loss": 2.5696, "step": 90270 }, { "epoch": 0.17985783501211272, "grad_norm": 0.16654115915298462, "learning_rate": 0.002, "loss": 2.5812, "step": 90280 }, { "epoch": 0.1798777572357516, "grad_norm": 0.17543742060661316, "learning_rate": 0.002, "loss": 2.5768, "step": 90290 }, { "epoch": 0.17989767945939053, "grad_norm": 0.1876937597990036, "learning_rate": 0.002, "loss": 2.5595, "step": 90300 }, { "epoch": 0.17991760168302945, "grad_norm": 0.16828139126300812, "learning_rate": 0.002, "loss": 2.5696, "step": 90310 }, { "epoch": 0.17993752390666837, "grad_norm": 0.14123263955116272, "learning_rate": 0.002, "loss": 2.5693, "step": 90320 }, { "epoch": 0.17995744613030729, "grad_norm": 0.15935377776622772, "learning_rate": 0.002, "loss": 2.5645, "step": 90330 }, { "epoch": 0.1799773683539462, "grad_norm": 0.17617355287075043, "learning_rate": 0.002, "loss": 2.5625, "step": 90340 }, { "epoch": 0.1799972905775851, "grad_norm": 0.1576499044895172, "learning_rate": 0.002, "loss": 2.5775, "step": 90350 }, { "epoch": 0.180017212801224, "grad_norm": 0.16740381717681885, "learning_rate": 0.002, "loss": 2.5813, "step": 90360 }, { "epoch": 0.18003713502486293, "grad_norm": 0.15648877620697021, "learning_rate": 0.002, "loss": 2.5622, "step": 90370 }, { "epoch": 0.18005705724850185, "grad_norm": 0.17065022885799408, "learning_rate": 0.002, "loss": 2.5679, "step": 90380 }, { "epoch": 0.18007697947214077, "grad_norm": 0.17052680253982544, "learning_rate": 0.002, "loss": 2.57, "step": 90390 }, { "epoch": 0.1800969016957797, "grad_norm": 0.1937614381313324, "learning_rate": 0.002, "loss": 2.5641, "step": 90400 }, { "epoch": 0.18011682391941858, "grad_norm": 0.21418622136116028, "learning_rate": 0.002, "loss": 2.568, "step": 90410 }, { "epoch": 0.1801367461430575, "grad_norm": 0.15958237648010254, "learning_rate": 0.002, "loss": 2.5617, "step": 90420 }, { "epoch": 0.18015666836669642, "grad_norm": 0.1513962596654892, "learning_rate": 0.002, "loss": 2.5651, "step": 90430 }, { "epoch": 0.18017659059033533, "grad_norm": 0.17913652956485748, "learning_rate": 0.002, "loss": 2.5741, "step": 90440 }, { "epoch": 0.18019651281397425, "grad_norm": 0.1479996293783188, "learning_rate": 0.002, "loss": 2.5714, "step": 90450 }, { "epoch": 0.18021643503761317, "grad_norm": 0.16672955453395844, "learning_rate": 0.002, "loss": 2.5627, "step": 90460 }, { "epoch": 0.18023635726125206, "grad_norm": 0.24012991786003113, "learning_rate": 0.002, "loss": 2.5639, "step": 90470 }, { "epoch": 0.18025627948489098, "grad_norm": 0.14081762731075287, "learning_rate": 0.002, "loss": 2.5727, "step": 90480 }, { "epoch": 0.1802762017085299, "grad_norm": 0.15539896488189697, "learning_rate": 0.002, "loss": 2.566, "step": 90490 }, { "epoch": 0.18029612393216882, "grad_norm": 0.15730315446853638, "learning_rate": 0.002, "loss": 2.5679, "step": 90500 }, { "epoch": 0.18031604615580774, "grad_norm": 0.1482611447572708, "learning_rate": 0.002, "loss": 2.5697, "step": 90510 }, { "epoch": 0.18033596837944665, "grad_norm": 0.1861347109079361, "learning_rate": 0.002, "loss": 2.5779, "step": 90520 }, { "epoch": 0.18035589060308554, "grad_norm": 0.17245252430438995, "learning_rate": 0.002, "loss": 2.5677, "step": 90530 }, { "epoch": 0.18037581282672446, "grad_norm": 0.1511736512184143, "learning_rate": 0.002, "loss": 2.5735, "step": 90540 }, { "epoch": 0.18039573505036338, "grad_norm": 0.1556672900915146, "learning_rate": 0.002, "loss": 2.5859, "step": 90550 }, { "epoch": 0.1804156572740023, "grad_norm": 0.15000680088996887, "learning_rate": 0.002, "loss": 2.5635, "step": 90560 }, { "epoch": 0.18043557949764122, "grad_norm": 0.17591413855552673, "learning_rate": 0.002, "loss": 2.5623, "step": 90570 }, { "epoch": 0.1804555017212801, "grad_norm": 0.14339898526668549, "learning_rate": 0.002, "loss": 2.5614, "step": 90580 }, { "epoch": 0.18047542394491903, "grad_norm": 0.17495103180408478, "learning_rate": 0.002, "loss": 2.5581, "step": 90590 }, { "epoch": 0.18049534616855795, "grad_norm": 0.1666499227285385, "learning_rate": 0.002, "loss": 2.5654, "step": 90600 }, { "epoch": 0.18051526839219686, "grad_norm": 0.14538632333278656, "learning_rate": 0.002, "loss": 2.5773, "step": 90610 }, { "epoch": 0.18053519061583578, "grad_norm": 0.17682789266109467, "learning_rate": 0.002, "loss": 2.5797, "step": 90620 }, { "epoch": 0.1805551128394747, "grad_norm": 0.17995305359363556, "learning_rate": 0.002, "loss": 2.5771, "step": 90630 }, { "epoch": 0.1805750350631136, "grad_norm": 0.16308380663394928, "learning_rate": 0.002, "loss": 2.5582, "step": 90640 }, { "epoch": 0.1805949572867525, "grad_norm": 0.14646577835083008, "learning_rate": 0.002, "loss": 2.5657, "step": 90650 }, { "epoch": 0.18061487951039143, "grad_norm": 0.1667591631412506, "learning_rate": 0.002, "loss": 2.5575, "step": 90660 }, { "epoch": 0.18063480173403035, "grad_norm": 0.17181085050106049, "learning_rate": 0.002, "loss": 2.5807, "step": 90670 }, { "epoch": 0.18065472395766927, "grad_norm": 0.17092972993850708, "learning_rate": 0.002, "loss": 2.5664, "step": 90680 }, { "epoch": 0.18067464618130818, "grad_norm": 0.1802230179309845, "learning_rate": 0.002, "loss": 2.5659, "step": 90690 }, { "epoch": 0.18069456840494708, "grad_norm": 0.17168164253234863, "learning_rate": 0.002, "loss": 2.5743, "step": 90700 }, { "epoch": 0.180714490628586, "grad_norm": 0.15401801466941833, "learning_rate": 0.002, "loss": 2.564, "step": 90710 }, { "epoch": 0.1807344128522249, "grad_norm": 0.15136076509952545, "learning_rate": 0.002, "loss": 2.5617, "step": 90720 }, { "epoch": 0.18075433507586383, "grad_norm": 0.16330008208751678, "learning_rate": 0.002, "loss": 2.5591, "step": 90730 }, { "epoch": 0.18077425729950275, "grad_norm": 0.1490423083305359, "learning_rate": 0.002, "loss": 2.5624, "step": 90740 }, { "epoch": 0.18079417952314167, "grad_norm": 0.19595347344875336, "learning_rate": 0.002, "loss": 2.5573, "step": 90750 }, { "epoch": 0.18081410174678056, "grad_norm": 0.16375307738780975, "learning_rate": 0.002, "loss": 2.5688, "step": 90760 }, { "epoch": 0.18083402397041948, "grad_norm": 0.182376891374588, "learning_rate": 0.002, "loss": 2.5655, "step": 90770 }, { "epoch": 0.1808539461940584, "grad_norm": 0.16512247920036316, "learning_rate": 0.002, "loss": 2.5912, "step": 90780 }, { "epoch": 0.18087386841769731, "grad_norm": 0.15981554985046387, "learning_rate": 0.002, "loss": 2.5696, "step": 90790 }, { "epoch": 0.18089379064133623, "grad_norm": 0.16177219152450562, "learning_rate": 0.002, "loss": 2.5701, "step": 90800 }, { "epoch": 0.18091371286497512, "grad_norm": 0.15126696228981018, "learning_rate": 0.002, "loss": 2.5505, "step": 90810 }, { "epoch": 0.18093363508861404, "grad_norm": 0.15437059104442596, "learning_rate": 0.002, "loss": 2.5603, "step": 90820 }, { "epoch": 0.18095355731225296, "grad_norm": 0.16786016523838043, "learning_rate": 0.002, "loss": 2.567, "step": 90830 }, { "epoch": 0.18097347953589188, "grad_norm": 0.20449741184711456, "learning_rate": 0.002, "loss": 2.5668, "step": 90840 }, { "epoch": 0.1809934017595308, "grad_norm": 0.18576541543006897, "learning_rate": 0.002, "loss": 2.574, "step": 90850 }, { "epoch": 0.18101332398316972, "grad_norm": 0.15281079709529877, "learning_rate": 0.002, "loss": 2.5648, "step": 90860 }, { "epoch": 0.1810332462068086, "grad_norm": 0.16551046073436737, "learning_rate": 0.002, "loss": 2.5639, "step": 90870 }, { "epoch": 0.18105316843044753, "grad_norm": 0.171574667096138, "learning_rate": 0.002, "loss": 2.5771, "step": 90880 }, { "epoch": 0.18107309065408644, "grad_norm": 0.16537299752235413, "learning_rate": 0.002, "loss": 2.5683, "step": 90890 }, { "epoch": 0.18109301287772536, "grad_norm": 0.1636427342891693, "learning_rate": 0.002, "loss": 2.564, "step": 90900 }, { "epoch": 0.18111293510136428, "grad_norm": 0.18635539710521698, "learning_rate": 0.002, "loss": 2.5723, "step": 90910 }, { "epoch": 0.1811328573250032, "grad_norm": 0.15481579303741455, "learning_rate": 0.002, "loss": 2.568, "step": 90920 }, { "epoch": 0.1811527795486421, "grad_norm": 0.1290980726480484, "learning_rate": 0.002, "loss": 2.5576, "step": 90930 }, { "epoch": 0.181172701772281, "grad_norm": 0.17645327746868134, "learning_rate": 0.002, "loss": 2.5524, "step": 90940 }, { "epoch": 0.18119262399591993, "grad_norm": 0.1887977123260498, "learning_rate": 0.002, "loss": 2.5534, "step": 90950 }, { "epoch": 0.18121254621955885, "grad_norm": 0.15824663639068604, "learning_rate": 0.002, "loss": 2.5618, "step": 90960 }, { "epoch": 0.18123246844319776, "grad_norm": 0.16558417677879333, "learning_rate": 0.002, "loss": 2.5739, "step": 90970 }, { "epoch": 0.18125239066683668, "grad_norm": 0.19557924568653107, "learning_rate": 0.002, "loss": 2.5594, "step": 90980 }, { "epoch": 0.18127231289047557, "grad_norm": 0.166811004281044, "learning_rate": 0.002, "loss": 2.5561, "step": 90990 }, { "epoch": 0.1812922351141145, "grad_norm": 0.17463307082653046, "learning_rate": 0.002, "loss": 2.5638, "step": 91000 }, { "epoch": 0.1813121573377534, "grad_norm": 0.1601092666387558, "learning_rate": 0.002, "loss": 2.5585, "step": 91010 }, { "epoch": 0.18133207956139233, "grad_norm": 0.17051100730895996, "learning_rate": 0.002, "loss": 2.573, "step": 91020 }, { "epoch": 0.18135200178503125, "grad_norm": 0.15686732530593872, "learning_rate": 0.002, "loss": 2.5499, "step": 91030 }, { "epoch": 0.18137192400867014, "grad_norm": 0.15105922520160675, "learning_rate": 0.002, "loss": 2.5843, "step": 91040 }, { "epoch": 0.18139184623230906, "grad_norm": 0.14324951171875, "learning_rate": 0.002, "loss": 2.5636, "step": 91050 }, { "epoch": 0.18141176845594797, "grad_norm": 0.17344093322753906, "learning_rate": 0.002, "loss": 2.5656, "step": 91060 }, { "epoch": 0.1814316906795869, "grad_norm": 0.1770228147506714, "learning_rate": 0.002, "loss": 2.5533, "step": 91070 }, { "epoch": 0.1814516129032258, "grad_norm": 0.17494577169418335, "learning_rate": 0.002, "loss": 2.5588, "step": 91080 }, { "epoch": 0.18147153512686473, "grad_norm": 0.16538994014263153, "learning_rate": 0.002, "loss": 2.5703, "step": 91090 }, { "epoch": 0.18149145735050362, "grad_norm": 0.15318739414215088, "learning_rate": 0.002, "loss": 2.5714, "step": 91100 }, { "epoch": 0.18151137957414254, "grad_norm": 0.15305359661579132, "learning_rate": 0.002, "loss": 2.5741, "step": 91110 }, { "epoch": 0.18153130179778146, "grad_norm": 0.1854787915945053, "learning_rate": 0.002, "loss": 2.5706, "step": 91120 }, { "epoch": 0.18155122402142038, "grad_norm": 0.15691275894641876, "learning_rate": 0.002, "loss": 2.5487, "step": 91130 }, { "epoch": 0.1815711462450593, "grad_norm": 0.16781792044639587, "learning_rate": 0.002, "loss": 2.5586, "step": 91140 }, { "epoch": 0.1815910684686982, "grad_norm": 0.14594732224941254, "learning_rate": 0.002, "loss": 2.565, "step": 91150 }, { "epoch": 0.1816109906923371, "grad_norm": 0.16191668808460236, "learning_rate": 0.002, "loss": 2.5515, "step": 91160 }, { "epoch": 0.18163091291597602, "grad_norm": 0.13993434607982635, "learning_rate": 0.002, "loss": 2.5605, "step": 91170 }, { "epoch": 0.18165083513961494, "grad_norm": 0.15033958852291107, "learning_rate": 0.002, "loss": 2.5569, "step": 91180 }, { "epoch": 0.18167075736325386, "grad_norm": 0.15350233018398285, "learning_rate": 0.002, "loss": 2.5613, "step": 91190 }, { "epoch": 0.18169067958689278, "grad_norm": 0.1773151159286499, "learning_rate": 0.002, "loss": 2.5716, "step": 91200 }, { "epoch": 0.1817106018105317, "grad_norm": 0.16966129839420319, "learning_rate": 0.002, "loss": 2.5708, "step": 91210 }, { "epoch": 0.1817305240341706, "grad_norm": 0.17531660199165344, "learning_rate": 0.002, "loss": 2.5786, "step": 91220 }, { "epoch": 0.1817504462578095, "grad_norm": 0.14965087175369263, "learning_rate": 0.002, "loss": 2.5715, "step": 91230 }, { "epoch": 0.18177036848144842, "grad_norm": 0.14517593383789062, "learning_rate": 0.002, "loss": 2.5536, "step": 91240 }, { "epoch": 0.18179029070508734, "grad_norm": 0.174927219748497, "learning_rate": 0.002, "loss": 2.5739, "step": 91250 }, { "epoch": 0.18181021292872626, "grad_norm": 0.1392853558063507, "learning_rate": 0.002, "loss": 2.5712, "step": 91260 }, { "epoch": 0.18183013515236518, "grad_norm": 0.15846914052963257, "learning_rate": 0.002, "loss": 2.5669, "step": 91270 }, { "epoch": 0.18185005737600407, "grad_norm": 0.1463920921087265, "learning_rate": 0.002, "loss": 2.5541, "step": 91280 }, { "epoch": 0.181869979599643, "grad_norm": 0.1956729292869568, "learning_rate": 0.002, "loss": 2.5582, "step": 91290 }, { "epoch": 0.1818899018232819, "grad_norm": 0.15076927840709686, "learning_rate": 0.002, "loss": 2.555, "step": 91300 }, { "epoch": 0.18190982404692083, "grad_norm": 0.14366702735424042, "learning_rate": 0.002, "loss": 2.5685, "step": 91310 }, { "epoch": 0.18192974627055974, "grad_norm": 0.14774735271930695, "learning_rate": 0.002, "loss": 2.5578, "step": 91320 }, { "epoch": 0.18194966849419864, "grad_norm": 0.1418924778699875, "learning_rate": 0.002, "loss": 2.5562, "step": 91330 }, { "epoch": 0.18196959071783755, "grad_norm": 0.14149174094200134, "learning_rate": 0.002, "loss": 2.558, "step": 91340 }, { "epoch": 0.18198951294147647, "grad_norm": 0.23469096422195435, "learning_rate": 0.002, "loss": 2.5749, "step": 91350 }, { "epoch": 0.1820094351651154, "grad_norm": 0.1601303517818451, "learning_rate": 0.002, "loss": 2.5666, "step": 91360 }, { "epoch": 0.1820293573887543, "grad_norm": 0.145355686545372, "learning_rate": 0.002, "loss": 2.5631, "step": 91370 }, { "epoch": 0.18204927961239323, "grad_norm": 0.2078544646501541, "learning_rate": 0.002, "loss": 2.5756, "step": 91380 }, { "epoch": 0.18206920183603212, "grad_norm": 0.15847216546535492, "learning_rate": 0.002, "loss": 2.5694, "step": 91390 }, { "epoch": 0.18208912405967104, "grad_norm": 0.19775599241256714, "learning_rate": 0.002, "loss": 2.5612, "step": 91400 }, { "epoch": 0.18210904628330996, "grad_norm": 0.14329349994659424, "learning_rate": 0.002, "loss": 2.5731, "step": 91410 }, { "epoch": 0.18212896850694887, "grad_norm": 0.16017428040504456, "learning_rate": 0.002, "loss": 2.5556, "step": 91420 }, { "epoch": 0.1821488907305878, "grad_norm": 0.1702159345149994, "learning_rate": 0.002, "loss": 2.5734, "step": 91430 }, { "epoch": 0.1821688129542267, "grad_norm": 0.17284375429153442, "learning_rate": 0.002, "loss": 2.5683, "step": 91440 }, { "epoch": 0.1821887351778656, "grad_norm": 0.16562075912952423, "learning_rate": 0.002, "loss": 2.5708, "step": 91450 }, { "epoch": 0.18220865740150452, "grad_norm": 0.1566023975610733, "learning_rate": 0.002, "loss": 2.5685, "step": 91460 }, { "epoch": 0.18222857962514344, "grad_norm": 0.18106384575366974, "learning_rate": 0.002, "loss": 2.5475, "step": 91470 }, { "epoch": 0.18224850184878236, "grad_norm": 0.1431950330734253, "learning_rate": 0.002, "loss": 2.5583, "step": 91480 }, { "epoch": 0.18226842407242128, "grad_norm": 0.20326855778694153, "learning_rate": 0.002, "loss": 2.5769, "step": 91490 }, { "epoch": 0.1822883462960602, "grad_norm": 0.14937515556812286, "learning_rate": 0.002, "loss": 2.5763, "step": 91500 }, { "epoch": 0.18230826851969908, "grad_norm": 0.16699698567390442, "learning_rate": 0.002, "loss": 2.5696, "step": 91510 }, { "epoch": 0.182328190743338, "grad_norm": 0.15268559753894806, "learning_rate": 0.002, "loss": 2.5688, "step": 91520 }, { "epoch": 0.18234811296697692, "grad_norm": 0.1715381145477295, "learning_rate": 0.002, "loss": 2.5674, "step": 91530 }, { "epoch": 0.18236803519061584, "grad_norm": 0.1619168519973755, "learning_rate": 0.002, "loss": 2.5664, "step": 91540 }, { "epoch": 0.18238795741425476, "grad_norm": 0.14197948575019836, "learning_rate": 0.002, "loss": 2.5899, "step": 91550 }, { "epoch": 0.18240787963789365, "grad_norm": 0.18939751386642456, "learning_rate": 0.002, "loss": 2.5667, "step": 91560 }, { "epoch": 0.18242780186153257, "grad_norm": 0.14784526824951172, "learning_rate": 0.002, "loss": 2.5731, "step": 91570 }, { "epoch": 0.1824477240851715, "grad_norm": 0.16226543486118317, "learning_rate": 0.002, "loss": 2.5693, "step": 91580 }, { "epoch": 0.1824676463088104, "grad_norm": 0.17122331261634827, "learning_rate": 0.002, "loss": 2.5457, "step": 91590 }, { "epoch": 0.18248756853244932, "grad_norm": 0.1568470299243927, "learning_rate": 0.002, "loss": 2.5619, "step": 91600 }, { "epoch": 0.18250749075608824, "grad_norm": 0.15921252965927124, "learning_rate": 0.002, "loss": 2.5769, "step": 91610 }, { "epoch": 0.18252741297972713, "grad_norm": 0.1660270094871521, "learning_rate": 0.002, "loss": 2.5637, "step": 91620 }, { "epoch": 0.18254733520336605, "grad_norm": 0.21184827387332916, "learning_rate": 0.002, "loss": 2.572, "step": 91630 }, { "epoch": 0.18256725742700497, "grad_norm": 0.1657244712114334, "learning_rate": 0.002, "loss": 2.5697, "step": 91640 }, { "epoch": 0.1825871796506439, "grad_norm": 0.15118181705474854, "learning_rate": 0.002, "loss": 2.5644, "step": 91650 }, { "epoch": 0.1826071018742828, "grad_norm": 0.14966781437397003, "learning_rate": 0.002, "loss": 2.5668, "step": 91660 }, { "epoch": 0.18262702409792173, "grad_norm": 0.14174197614192963, "learning_rate": 0.002, "loss": 2.5497, "step": 91670 }, { "epoch": 0.18264694632156062, "grad_norm": 0.18396234512329102, "learning_rate": 0.002, "loss": 2.5697, "step": 91680 }, { "epoch": 0.18266686854519953, "grad_norm": 0.19387102127075195, "learning_rate": 0.002, "loss": 2.578, "step": 91690 }, { "epoch": 0.18268679076883845, "grad_norm": 0.14763568341732025, "learning_rate": 0.002, "loss": 2.5739, "step": 91700 }, { "epoch": 0.18270671299247737, "grad_norm": 0.1517603099346161, "learning_rate": 0.002, "loss": 2.5648, "step": 91710 }, { "epoch": 0.1827266352161163, "grad_norm": 0.16329152882099152, "learning_rate": 0.002, "loss": 2.5683, "step": 91720 }, { "epoch": 0.1827465574397552, "grad_norm": 0.17256513237953186, "learning_rate": 0.002, "loss": 2.5677, "step": 91730 }, { "epoch": 0.1827664796633941, "grad_norm": 0.17980173230171204, "learning_rate": 0.002, "loss": 2.5708, "step": 91740 }, { "epoch": 0.18278640188703302, "grad_norm": 0.16640929877758026, "learning_rate": 0.002, "loss": 2.5603, "step": 91750 }, { "epoch": 0.18280632411067194, "grad_norm": 0.17340677976608276, "learning_rate": 0.002, "loss": 2.5771, "step": 91760 }, { "epoch": 0.18282624633431085, "grad_norm": 0.16002696752548218, "learning_rate": 0.002, "loss": 2.5716, "step": 91770 }, { "epoch": 0.18284616855794977, "grad_norm": 0.15833885967731476, "learning_rate": 0.002, "loss": 2.5617, "step": 91780 }, { "epoch": 0.1828660907815887, "grad_norm": 0.14991866052150726, "learning_rate": 0.002, "loss": 2.5702, "step": 91790 }, { "epoch": 0.18288601300522758, "grad_norm": 0.1614663153886795, "learning_rate": 0.002, "loss": 2.5588, "step": 91800 }, { "epoch": 0.1829059352288665, "grad_norm": 0.1600591391324997, "learning_rate": 0.002, "loss": 2.5708, "step": 91810 }, { "epoch": 0.18292585745250542, "grad_norm": 0.1483132392168045, "learning_rate": 0.002, "loss": 2.5803, "step": 91820 }, { "epoch": 0.18294577967614434, "grad_norm": 0.14335863292217255, "learning_rate": 0.002, "loss": 2.5675, "step": 91830 }, { "epoch": 0.18296570189978326, "grad_norm": 0.16492527723312378, "learning_rate": 0.002, "loss": 2.5777, "step": 91840 }, { "epoch": 0.18298562412342215, "grad_norm": 0.1454937756061554, "learning_rate": 0.002, "loss": 2.5711, "step": 91850 }, { "epoch": 0.18300554634706107, "grad_norm": 0.16065095365047455, "learning_rate": 0.002, "loss": 2.5782, "step": 91860 }, { "epoch": 0.18302546857069998, "grad_norm": 0.1926264762878418, "learning_rate": 0.002, "loss": 2.5721, "step": 91870 }, { "epoch": 0.1830453907943389, "grad_norm": 0.16111251711845398, "learning_rate": 0.002, "loss": 2.5676, "step": 91880 }, { "epoch": 0.18306531301797782, "grad_norm": 0.15070562064647675, "learning_rate": 0.002, "loss": 2.5609, "step": 91890 }, { "epoch": 0.18308523524161674, "grad_norm": 0.17005108296871185, "learning_rate": 0.002, "loss": 2.5844, "step": 91900 }, { "epoch": 0.18310515746525563, "grad_norm": 0.14267398416996002, "learning_rate": 0.002, "loss": 2.563, "step": 91910 }, { "epoch": 0.18312507968889455, "grad_norm": 0.16854022443294525, "learning_rate": 0.002, "loss": 2.5755, "step": 91920 }, { "epoch": 0.18314500191253347, "grad_norm": 0.14459674060344696, "learning_rate": 0.002, "loss": 2.5794, "step": 91930 }, { "epoch": 0.18316492413617239, "grad_norm": 0.2066766619682312, "learning_rate": 0.002, "loss": 2.5709, "step": 91940 }, { "epoch": 0.1831848463598113, "grad_norm": 0.16629457473754883, "learning_rate": 0.002, "loss": 2.5661, "step": 91950 }, { "epoch": 0.18320476858345022, "grad_norm": 0.1722547858953476, "learning_rate": 0.002, "loss": 2.5685, "step": 91960 }, { "epoch": 0.1832246908070891, "grad_norm": 0.17631302773952484, "learning_rate": 0.002, "loss": 2.5746, "step": 91970 }, { "epoch": 0.18324461303072803, "grad_norm": 0.1418311893939972, "learning_rate": 0.002, "loss": 2.5784, "step": 91980 }, { "epoch": 0.18326453525436695, "grad_norm": 0.17470897734165192, "learning_rate": 0.002, "loss": 2.565, "step": 91990 }, { "epoch": 0.18328445747800587, "grad_norm": 0.16845610737800598, "learning_rate": 0.002, "loss": 2.5743, "step": 92000 }, { "epoch": 0.1833043797016448, "grad_norm": 0.17187590897083282, "learning_rate": 0.002, "loss": 2.5681, "step": 92010 }, { "epoch": 0.1833243019252837, "grad_norm": 0.14121830463409424, "learning_rate": 0.002, "loss": 2.5719, "step": 92020 }, { "epoch": 0.1833442241489226, "grad_norm": 0.18350140750408173, "learning_rate": 0.002, "loss": 2.5628, "step": 92030 }, { "epoch": 0.18336414637256152, "grad_norm": 0.1437668800354004, "learning_rate": 0.002, "loss": 2.5675, "step": 92040 }, { "epoch": 0.18338406859620043, "grad_norm": 0.17694054543972015, "learning_rate": 0.002, "loss": 2.5726, "step": 92050 }, { "epoch": 0.18340399081983935, "grad_norm": 0.17807450890541077, "learning_rate": 0.002, "loss": 2.5634, "step": 92060 }, { "epoch": 0.18342391304347827, "grad_norm": 0.16981343924999237, "learning_rate": 0.002, "loss": 2.566, "step": 92070 }, { "epoch": 0.18344383526711716, "grad_norm": 0.1804117113351822, "learning_rate": 0.002, "loss": 2.5499, "step": 92080 }, { "epoch": 0.18346375749075608, "grad_norm": 0.15655355155467987, "learning_rate": 0.002, "loss": 2.5815, "step": 92090 }, { "epoch": 0.183483679714395, "grad_norm": 0.14347121119499207, "learning_rate": 0.002, "loss": 2.574, "step": 92100 }, { "epoch": 0.18350360193803392, "grad_norm": 0.1712193340063095, "learning_rate": 0.002, "loss": 2.5758, "step": 92110 }, { "epoch": 0.18352352416167284, "grad_norm": 0.15593883395195007, "learning_rate": 0.002, "loss": 2.5782, "step": 92120 }, { "epoch": 0.18354344638531175, "grad_norm": 0.15643945336341858, "learning_rate": 0.002, "loss": 2.5631, "step": 92130 }, { "epoch": 0.18356336860895064, "grad_norm": 0.13039858639240265, "learning_rate": 0.002, "loss": 2.5638, "step": 92140 }, { "epoch": 0.18358329083258956, "grad_norm": 0.16896094381809235, "learning_rate": 0.002, "loss": 2.5601, "step": 92150 }, { "epoch": 0.18360321305622848, "grad_norm": 0.1539618819952011, "learning_rate": 0.002, "loss": 2.5545, "step": 92160 }, { "epoch": 0.1836231352798674, "grad_norm": 0.16260182857513428, "learning_rate": 0.002, "loss": 2.5748, "step": 92170 }, { "epoch": 0.18364305750350632, "grad_norm": 0.23701222240924835, "learning_rate": 0.002, "loss": 2.5505, "step": 92180 }, { "epoch": 0.18366297972714524, "grad_norm": 0.16191035509109497, "learning_rate": 0.002, "loss": 2.5854, "step": 92190 }, { "epoch": 0.18368290195078413, "grad_norm": 0.14550736546516418, "learning_rate": 0.002, "loss": 2.5709, "step": 92200 }, { "epoch": 0.18370282417442305, "grad_norm": 0.17375828325748444, "learning_rate": 0.002, "loss": 2.5625, "step": 92210 }, { "epoch": 0.18372274639806196, "grad_norm": 0.17958851158618927, "learning_rate": 0.002, "loss": 2.5711, "step": 92220 }, { "epoch": 0.18374266862170088, "grad_norm": 0.1700936108827591, "learning_rate": 0.002, "loss": 2.5623, "step": 92230 }, { "epoch": 0.1837625908453398, "grad_norm": 0.15050151944160461, "learning_rate": 0.002, "loss": 2.5606, "step": 92240 }, { "epoch": 0.18378251306897872, "grad_norm": 0.1991700381040573, "learning_rate": 0.002, "loss": 2.5654, "step": 92250 }, { "epoch": 0.1838024352926176, "grad_norm": 0.16857515275478363, "learning_rate": 0.002, "loss": 2.5548, "step": 92260 }, { "epoch": 0.18382235751625653, "grad_norm": 0.1950606256723404, "learning_rate": 0.002, "loss": 2.5663, "step": 92270 }, { "epoch": 0.18384227973989545, "grad_norm": 0.15620577335357666, "learning_rate": 0.002, "loss": 2.5822, "step": 92280 }, { "epoch": 0.18386220196353437, "grad_norm": 0.19935226440429688, "learning_rate": 0.002, "loss": 2.5559, "step": 92290 }, { "epoch": 0.18388212418717328, "grad_norm": 0.14487580955028534, "learning_rate": 0.002, "loss": 2.5645, "step": 92300 }, { "epoch": 0.18390204641081218, "grad_norm": 0.18155308067798615, "learning_rate": 0.002, "loss": 2.5747, "step": 92310 }, { "epoch": 0.1839219686344511, "grad_norm": 0.16593991219997406, "learning_rate": 0.002, "loss": 2.5691, "step": 92320 }, { "epoch": 0.18394189085809, "grad_norm": 0.1595865786075592, "learning_rate": 0.002, "loss": 2.5667, "step": 92330 }, { "epoch": 0.18396181308172893, "grad_norm": 0.16045409440994263, "learning_rate": 0.002, "loss": 2.5667, "step": 92340 }, { "epoch": 0.18398173530536785, "grad_norm": 0.14365145564079285, "learning_rate": 0.002, "loss": 2.5528, "step": 92350 }, { "epoch": 0.18400165752900677, "grad_norm": 0.1558002531528473, "learning_rate": 0.002, "loss": 2.5602, "step": 92360 }, { "epoch": 0.18402157975264566, "grad_norm": 0.18603643774986267, "learning_rate": 0.002, "loss": 2.5706, "step": 92370 }, { "epoch": 0.18404150197628458, "grad_norm": 0.15404339134693146, "learning_rate": 0.002, "loss": 2.5649, "step": 92380 }, { "epoch": 0.1840614241999235, "grad_norm": 0.14254586398601532, "learning_rate": 0.002, "loss": 2.5638, "step": 92390 }, { "epoch": 0.18408134642356241, "grad_norm": 0.15574637055397034, "learning_rate": 0.002, "loss": 2.5488, "step": 92400 }, { "epoch": 0.18410126864720133, "grad_norm": 0.15636242926120758, "learning_rate": 0.002, "loss": 2.5722, "step": 92410 }, { "epoch": 0.18412119087084025, "grad_norm": 0.16762562096118927, "learning_rate": 0.002, "loss": 2.5732, "step": 92420 }, { "epoch": 0.18414111309447914, "grad_norm": 0.1465432345867157, "learning_rate": 0.002, "loss": 2.5683, "step": 92430 }, { "epoch": 0.18416103531811806, "grad_norm": 0.14456254243850708, "learning_rate": 0.002, "loss": 2.5905, "step": 92440 }, { "epoch": 0.18418095754175698, "grad_norm": 0.1583731472492218, "learning_rate": 0.002, "loss": 2.5692, "step": 92450 }, { "epoch": 0.1842008797653959, "grad_norm": 0.20711290836334229, "learning_rate": 0.002, "loss": 2.5698, "step": 92460 }, { "epoch": 0.18422080198903482, "grad_norm": 0.16327209770679474, "learning_rate": 0.002, "loss": 2.5673, "step": 92470 }, { "epoch": 0.18424072421267373, "grad_norm": 0.15927420556545258, "learning_rate": 0.002, "loss": 2.5444, "step": 92480 }, { "epoch": 0.18426064643631263, "grad_norm": 0.20381289720535278, "learning_rate": 0.002, "loss": 2.5701, "step": 92490 }, { "epoch": 0.18428056865995154, "grad_norm": 0.15984252095222473, "learning_rate": 0.002, "loss": 2.5697, "step": 92500 }, { "epoch": 0.18430049088359046, "grad_norm": 0.13955272734165192, "learning_rate": 0.002, "loss": 2.5727, "step": 92510 }, { "epoch": 0.18432041310722938, "grad_norm": 0.16714589297771454, "learning_rate": 0.002, "loss": 2.5766, "step": 92520 }, { "epoch": 0.1843403353308683, "grad_norm": 0.15741688013076782, "learning_rate": 0.002, "loss": 2.5699, "step": 92530 }, { "epoch": 0.18436025755450722, "grad_norm": 0.1906486302614212, "learning_rate": 0.002, "loss": 2.5786, "step": 92540 }, { "epoch": 0.1843801797781461, "grad_norm": 0.1573704481124878, "learning_rate": 0.002, "loss": 2.5513, "step": 92550 }, { "epoch": 0.18440010200178503, "grad_norm": 0.15067708492279053, "learning_rate": 0.002, "loss": 2.5664, "step": 92560 }, { "epoch": 0.18442002422542395, "grad_norm": 0.17495833337306976, "learning_rate": 0.002, "loss": 2.5605, "step": 92570 }, { "epoch": 0.18443994644906286, "grad_norm": 0.14582154154777527, "learning_rate": 0.002, "loss": 2.5713, "step": 92580 }, { "epoch": 0.18445986867270178, "grad_norm": 0.16206513345241547, "learning_rate": 0.002, "loss": 2.5577, "step": 92590 }, { "epoch": 0.18447979089634067, "grad_norm": 0.16671675443649292, "learning_rate": 0.002, "loss": 2.5723, "step": 92600 }, { "epoch": 0.1844997131199796, "grad_norm": 0.15268081426620483, "learning_rate": 0.002, "loss": 2.5688, "step": 92610 }, { "epoch": 0.1845196353436185, "grad_norm": 0.20754429697990417, "learning_rate": 0.002, "loss": 2.5823, "step": 92620 }, { "epoch": 0.18453955756725743, "grad_norm": 0.1608603298664093, "learning_rate": 0.002, "loss": 2.5801, "step": 92630 }, { "epoch": 0.18455947979089635, "grad_norm": 0.14507685601711273, "learning_rate": 0.002, "loss": 2.5695, "step": 92640 }, { "epoch": 0.18457940201453527, "grad_norm": 0.1567744016647339, "learning_rate": 0.002, "loss": 2.5768, "step": 92650 }, { "epoch": 0.18459932423817416, "grad_norm": 0.16926273703575134, "learning_rate": 0.002, "loss": 2.5474, "step": 92660 }, { "epoch": 0.18461924646181307, "grad_norm": 0.12636986374855042, "learning_rate": 0.002, "loss": 2.56, "step": 92670 }, { "epoch": 0.184639168685452, "grad_norm": 0.1499575525522232, "learning_rate": 0.002, "loss": 2.575, "step": 92680 }, { "epoch": 0.1846590909090909, "grad_norm": 0.15470227599143982, "learning_rate": 0.002, "loss": 2.5644, "step": 92690 }, { "epoch": 0.18467901313272983, "grad_norm": 0.20892757177352905, "learning_rate": 0.002, "loss": 2.5629, "step": 92700 }, { "epoch": 0.18469893535636875, "grad_norm": 0.15180879831314087, "learning_rate": 0.002, "loss": 2.5722, "step": 92710 }, { "epoch": 0.18471885758000764, "grad_norm": 0.14790493249893188, "learning_rate": 0.002, "loss": 2.5811, "step": 92720 }, { "epoch": 0.18473877980364656, "grad_norm": 0.17677144706249237, "learning_rate": 0.002, "loss": 2.5719, "step": 92730 }, { "epoch": 0.18475870202728548, "grad_norm": 0.15677905082702637, "learning_rate": 0.002, "loss": 2.5649, "step": 92740 }, { "epoch": 0.1847786242509244, "grad_norm": 0.1775682121515274, "learning_rate": 0.002, "loss": 2.5558, "step": 92750 }, { "epoch": 0.1847985464745633, "grad_norm": 0.15475109219551086, "learning_rate": 0.002, "loss": 2.5533, "step": 92760 }, { "epoch": 0.18481846869820223, "grad_norm": 0.1674329936504364, "learning_rate": 0.002, "loss": 2.5755, "step": 92770 }, { "epoch": 0.18483839092184112, "grad_norm": 0.16603493690490723, "learning_rate": 0.002, "loss": 2.5649, "step": 92780 }, { "epoch": 0.18485831314548004, "grad_norm": 0.17941761016845703, "learning_rate": 0.002, "loss": 2.5649, "step": 92790 }, { "epoch": 0.18487823536911896, "grad_norm": 0.14902085065841675, "learning_rate": 0.002, "loss": 2.5625, "step": 92800 }, { "epoch": 0.18489815759275788, "grad_norm": 0.14866627752780914, "learning_rate": 0.002, "loss": 2.5757, "step": 92810 }, { "epoch": 0.1849180798163968, "grad_norm": 0.28406497836112976, "learning_rate": 0.002, "loss": 2.5574, "step": 92820 }, { "epoch": 0.1849380020400357, "grad_norm": 0.14842596650123596, "learning_rate": 0.002, "loss": 2.5657, "step": 92830 }, { "epoch": 0.1849579242636746, "grad_norm": 0.13564124703407288, "learning_rate": 0.002, "loss": 2.5501, "step": 92840 }, { "epoch": 0.18497784648731352, "grad_norm": 0.14531715214252472, "learning_rate": 0.002, "loss": 2.5883, "step": 92850 }, { "epoch": 0.18499776871095244, "grad_norm": 0.1564270555973053, "learning_rate": 0.002, "loss": 2.5687, "step": 92860 }, { "epoch": 0.18501769093459136, "grad_norm": 0.19262030720710754, "learning_rate": 0.002, "loss": 2.5898, "step": 92870 }, { "epoch": 0.18503761315823028, "grad_norm": 0.17658093571662903, "learning_rate": 0.002, "loss": 2.5657, "step": 92880 }, { "epoch": 0.18505753538186917, "grad_norm": 0.15954342484474182, "learning_rate": 0.002, "loss": 2.5783, "step": 92890 }, { "epoch": 0.1850774576055081, "grad_norm": 0.20785778760910034, "learning_rate": 0.002, "loss": 2.5681, "step": 92900 }, { "epoch": 0.185097379829147, "grad_norm": 0.15589188039302826, "learning_rate": 0.002, "loss": 2.5665, "step": 92910 }, { "epoch": 0.18511730205278593, "grad_norm": 0.14568796753883362, "learning_rate": 0.002, "loss": 2.5542, "step": 92920 }, { "epoch": 0.18513722427642484, "grad_norm": 0.1851813644170761, "learning_rate": 0.002, "loss": 2.5588, "step": 92930 }, { "epoch": 0.18515714650006376, "grad_norm": 0.16111372411251068, "learning_rate": 0.002, "loss": 2.5748, "step": 92940 }, { "epoch": 0.18517706872370265, "grad_norm": 0.15535593032836914, "learning_rate": 0.002, "loss": 2.5726, "step": 92950 }, { "epoch": 0.18519699094734157, "grad_norm": 0.1934089958667755, "learning_rate": 0.002, "loss": 2.5753, "step": 92960 }, { "epoch": 0.1852169131709805, "grad_norm": 0.14653617143630981, "learning_rate": 0.002, "loss": 2.5722, "step": 92970 }, { "epoch": 0.1852368353946194, "grad_norm": 0.1886061578989029, "learning_rate": 0.002, "loss": 2.5527, "step": 92980 }, { "epoch": 0.18525675761825833, "grad_norm": 0.1630096286535263, "learning_rate": 0.002, "loss": 2.566, "step": 92990 }, { "epoch": 0.18527667984189725, "grad_norm": 0.17013166844844818, "learning_rate": 0.002, "loss": 2.5734, "step": 93000 }, { "epoch": 0.18529660206553614, "grad_norm": 0.17169784009456635, "learning_rate": 0.002, "loss": 2.5614, "step": 93010 }, { "epoch": 0.18531652428917506, "grad_norm": 0.16038477420806885, "learning_rate": 0.002, "loss": 2.5546, "step": 93020 }, { "epoch": 0.18533644651281397, "grad_norm": 0.1484776735305786, "learning_rate": 0.002, "loss": 2.5645, "step": 93030 }, { "epoch": 0.1853563687364529, "grad_norm": 0.18642503023147583, "learning_rate": 0.002, "loss": 2.551, "step": 93040 }, { "epoch": 0.1853762909600918, "grad_norm": 0.15690504014492035, "learning_rate": 0.002, "loss": 2.5573, "step": 93050 }, { "epoch": 0.1853962131837307, "grad_norm": 0.14888612926006317, "learning_rate": 0.002, "loss": 2.5783, "step": 93060 }, { "epoch": 0.18541613540736962, "grad_norm": 0.16188791394233704, "learning_rate": 0.002, "loss": 2.5666, "step": 93070 }, { "epoch": 0.18543605763100854, "grad_norm": 0.16153468191623688, "learning_rate": 0.002, "loss": 2.5672, "step": 93080 }, { "epoch": 0.18545597985464746, "grad_norm": 0.14910076558589935, "learning_rate": 0.002, "loss": 2.5643, "step": 93090 }, { "epoch": 0.18547590207828638, "grad_norm": 0.17061194777488708, "learning_rate": 0.002, "loss": 2.5719, "step": 93100 }, { "epoch": 0.1854958243019253, "grad_norm": 0.15388835966587067, "learning_rate": 0.002, "loss": 2.5605, "step": 93110 }, { "epoch": 0.18551574652556418, "grad_norm": 0.1714327037334442, "learning_rate": 0.002, "loss": 2.5681, "step": 93120 }, { "epoch": 0.1855356687492031, "grad_norm": 0.1799965798854828, "learning_rate": 0.002, "loss": 2.5658, "step": 93130 }, { "epoch": 0.18555559097284202, "grad_norm": 0.16038647294044495, "learning_rate": 0.002, "loss": 2.5881, "step": 93140 }, { "epoch": 0.18557551319648094, "grad_norm": 0.15770331025123596, "learning_rate": 0.002, "loss": 2.5645, "step": 93150 }, { "epoch": 0.18559543542011986, "grad_norm": 0.15469805896282196, "learning_rate": 0.002, "loss": 2.5577, "step": 93160 }, { "epoch": 0.18561535764375878, "grad_norm": 0.14426067471504211, "learning_rate": 0.002, "loss": 2.5722, "step": 93170 }, { "epoch": 0.18563527986739767, "grad_norm": 0.14601898193359375, "learning_rate": 0.002, "loss": 2.5642, "step": 93180 }, { "epoch": 0.1856552020910366, "grad_norm": 0.16252559423446655, "learning_rate": 0.002, "loss": 2.5684, "step": 93190 }, { "epoch": 0.1856751243146755, "grad_norm": 0.18743668496608734, "learning_rate": 0.002, "loss": 2.5714, "step": 93200 }, { "epoch": 0.18569504653831442, "grad_norm": 0.223649799823761, "learning_rate": 0.002, "loss": 2.5743, "step": 93210 }, { "epoch": 0.18571496876195334, "grad_norm": 0.154107466340065, "learning_rate": 0.002, "loss": 2.5689, "step": 93220 }, { "epoch": 0.18573489098559226, "grad_norm": 0.14906886219978333, "learning_rate": 0.002, "loss": 2.5665, "step": 93230 }, { "epoch": 0.18575481320923115, "grad_norm": 0.14931465685367584, "learning_rate": 0.002, "loss": 2.5689, "step": 93240 }, { "epoch": 0.18577473543287007, "grad_norm": 0.1753404140472412, "learning_rate": 0.002, "loss": 2.5734, "step": 93250 }, { "epoch": 0.185794657656509, "grad_norm": 0.17836740612983704, "learning_rate": 0.002, "loss": 2.5451, "step": 93260 }, { "epoch": 0.1858145798801479, "grad_norm": 0.15129454433918, "learning_rate": 0.002, "loss": 2.586, "step": 93270 }, { "epoch": 0.18583450210378682, "grad_norm": 0.18654870986938477, "learning_rate": 0.002, "loss": 2.5603, "step": 93280 }, { "epoch": 0.18585442432742574, "grad_norm": 0.20407354831695557, "learning_rate": 0.002, "loss": 2.553, "step": 93290 }, { "epoch": 0.18587434655106463, "grad_norm": 0.15064319968223572, "learning_rate": 0.002, "loss": 2.5642, "step": 93300 }, { "epoch": 0.18589426877470355, "grad_norm": 0.1814108043909073, "learning_rate": 0.002, "loss": 2.5685, "step": 93310 }, { "epoch": 0.18591419099834247, "grad_norm": 0.1483093500137329, "learning_rate": 0.002, "loss": 2.566, "step": 93320 }, { "epoch": 0.1859341132219814, "grad_norm": 0.18491590023040771, "learning_rate": 0.002, "loss": 2.5696, "step": 93330 }, { "epoch": 0.1859540354456203, "grad_norm": 0.18424157798290253, "learning_rate": 0.002, "loss": 2.5666, "step": 93340 }, { "epoch": 0.1859739576692592, "grad_norm": 0.15901710093021393, "learning_rate": 0.002, "loss": 2.5736, "step": 93350 }, { "epoch": 0.18599387989289812, "grad_norm": 0.16493664681911469, "learning_rate": 0.002, "loss": 2.561, "step": 93360 }, { "epoch": 0.18601380211653704, "grad_norm": 0.15463726222515106, "learning_rate": 0.002, "loss": 2.5846, "step": 93370 }, { "epoch": 0.18603372434017595, "grad_norm": 0.1673620194196701, "learning_rate": 0.002, "loss": 2.5614, "step": 93380 }, { "epoch": 0.18605364656381487, "grad_norm": 0.17027975618839264, "learning_rate": 0.002, "loss": 2.5757, "step": 93390 }, { "epoch": 0.1860735687874538, "grad_norm": 0.15653641521930695, "learning_rate": 0.002, "loss": 2.5729, "step": 93400 }, { "epoch": 0.18609349101109268, "grad_norm": 0.14936214685440063, "learning_rate": 0.002, "loss": 2.5676, "step": 93410 }, { "epoch": 0.1861134132347316, "grad_norm": 0.1591835618019104, "learning_rate": 0.002, "loss": 2.5596, "step": 93420 }, { "epoch": 0.18613333545837052, "grad_norm": 0.17054668068885803, "learning_rate": 0.002, "loss": 2.5665, "step": 93430 }, { "epoch": 0.18615325768200944, "grad_norm": 0.16152890026569366, "learning_rate": 0.002, "loss": 2.5677, "step": 93440 }, { "epoch": 0.18617317990564836, "grad_norm": 0.1592484712600708, "learning_rate": 0.002, "loss": 2.5814, "step": 93450 }, { "epoch": 0.18619310212928727, "grad_norm": 0.18283793330192566, "learning_rate": 0.002, "loss": 2.5643, "step": 93460 }, { "epoch": 0.18621302435292617, "grad_norm": 0.17432011663913727, "learning_rate": 0.002, "loss": 2.5829, "step": 93470 }, { "epoch": 0.18623294657656508, "grad_norm": 0.1691291332244873, "learning_rate": 0.002, "loss": 2.5741, "step": 93480 }, { "epoch": 0.186252868800204, "grad_norm": 0.15287846326828003, "learning_rate": 0.002, "loss": 2.5753, "step": 93490 }, { "epoch": 0.18627279102384292, "grad_norm": 0.17741648852825165, "learning_rate": 0.002, "loss": 2.5495, "step": 93500 }, { "epoch": 0.18629271324748184, "grad_norm": 0.1639847457408905, "learning_rate": 0.002, "loss": 2.5517, "step": 93510 }, { "epoch": 0.18631263547112076, "grad_norm": 0.16306309401988983, "learning_rate": 0.002, "loss": 2.559, "step": 93520 }, { "epoch": 0.18633255769475965, "grad_norm": 0.14581803977489471, "learning_rate": 0.002, "loss": 2.5567, "step": 93530 }, { "epoch": 0.18635247991839857, "grad_norm": 0.16855664551258087, "learning_rate": 0.002, "loss": 2.5665, "step": 93540 }, { "epoch": 0.18637240214203749, "grad_norm": 0.1586124151945114, "learning_rate": 0.002, "loss": 2.5513, "step": 93550 }, { "epoch": 0.1863923243656764, "grad_norm": 0.1740208864212036, "learning_rate": 0.002, "loss": 2.5624, "step": 93560 }, { "epoch": 0.18641224658931532, "grad_norm": 0.169528990983963, "learning_rate": 0.002, "loss": 2.5852, "step": 93570 }, { "epoch": 0.1864321688129542, "grad_norm": 0.161590576171875, "learning_rate": 0.002, "loss": 2.5685, "step": 93580 }, { "epoch": 0.18645209103659313, "grad_norm": 0.1757260262966156, "learning_rate": 0.002, "loss": 2.5781, "step": 93590 }, { "epoch": 0.18647201326023205, "grad_norm": 0.1650460809469223, "learning_rate": 0.002, "loss": 2.571, "step": 93600 }, { "epoch": 0.18649193548387097, "grad_norm": 0.165512353181839, "learning_rate": 0.002, "loss": 2.5793, "step": 93610 }, { "epoch": 0.1865118577075099, "grad_norm": 0.16678044199943542, "learning_rate": 0.002, "loss": 2.5652, "step": 93620 }, { "epoch": 0.1865317799311488, "grad_norm": 0.17960421741008759, "learning_rate": 0.002, "loss": 2.5563, "step": 93630 }, { "epoch": 0.1865517021547877, "grad_norm": 0.18100036680698395, "learning_rate": 0.002, "loss": 2.5662, "step": 93640 }, { "epoch": 0.18657162437842661, "grad_norm": 0.14772088825702667, "learning_rate": 0.002, "loss": 2.5626, "step": 93650 }, { "epoch": 0.18659154660206553, "grad_norm": 0.20954440534114838, "learning_rate": 0.002, "loss": 2.5723, "step": 93660 }, { "epoch": 0.18661146882570445, "grad_norm": 0.14822299778461456, "learning_rate": 0.002, "loss": 2.5664, "step": 93670 }, { "epoch": 0.18663139104934337, "grad_norm": 0.1355128437280655, "learning_rate": 0.002, "loss": 2.5793, "step": 93680 }, { "epoch": 0.1866513132729823, "grad_norm": 0.15761972963809967, "learning_rate": 0.002, "loss": 2.5756, "step": 93690 }, { "epoch": 0.18667123549662118, "grad_norm": 0.17103944718837738, "learning_rate": 0.002, "loss": 2.5614, "step": 93700 }, { "epoch": 0.1866911577202601, "grad_norm": 0.15779156982898712, "learning_rate": 0.002, "loss": 2.5698, "step": 93710 }, { "epoch": 0.18671107994389902, "grad_norm": 0.16592499613761902, "learning_rate": 0.002, "loss": 2.5605, "step": 93720 }, { "epoch": 0.18673100216753794, "grad_norm": 0.14047656953334808, "learning_rate": 0.002, "loss": 2.5595, "step": 93730 }, { "epoch": 0.18675092439117685, "grad_norm": 0.15690384805202484, "learning_rate": 0.002, "loss": 2.5616, "step": 93740 }, { "epoch": 0.18677084661481577, "grad_norm": 0.1829160898923874, "learning_rate": 0.002, "loss": 2.5771, "step": 93750 }, { "epoch": 0.18679076883845466, "grad_norm": 0.1695609986782074, "learning_rate": 0.002, "loss": 2.571, "step": 93760 }, { "epoch": 0.18681069106209358, "grad_norm": 0.15527158975601196, "learning_rate": 0.002, "loss": 2.5664, "step": 93770 }, { "epoch": 0.1868306132857325, "grad_norm": 0.1580348014831543, "learning_rate": 0.002, "loss": 2.5689, "step": 93780 }, { "epoch": 0.18685053550937142, "grad_norm": 0.14759770035743713, "learning_rate": 0.002, "loss": 2.5593, "step": 93790 }, { "epoch": 0.18687045773301034, "grad_norm": 0.20023775100708008, "learning_rate": 0.002, "loss": 2.563, "step": 93800 }, { "epoch": 0.18689037995664926, "grad_norm": 0.1538354754447937, "learning_rate": 0.002, "loss": 2.5607, "step": 93810 }, { "epoch": 0.18691030218028815, "grad_norm": 0.15788382291793823, "learning_rate": 0.002, "loss": 2.5624, "step": 93820 }, { "epoch": 0.18693022440392706, "grad_norm": 0.13997302949428558, "learning_rate": 0.002, "loss": 2.5752, "step": 93830 }, { "epoch": 0.18695014662756598, "grad_norm": 0.1946125626564026, "learning_rate": 0.002, "loss": 2.5663, "step": 93840 }, { "epoch": 0.1869700688512049, "grad_norm": 0.1446564942598343, "learning_rate": 0.002, "loss": 2.5648, "step": 93850 }, { "epoch": 0.18698999107484382, "grad_norm": 0.17117689549922943, "learning_rate": 0.002, "loss": 2.5765, "step": 93860 }, { "epoch": 0.1870099132984827, "grad_norm": 0.15784455835819244, "learning_rate": 0.002, "loss": 2.5537, "step": 93870 }, { "epoch": 0.18702983552212163, "grad_norm": 0.1685025542974472, "learning_rate": 0.002, "loss": 2.5783, "step": 93880 }, { "epoch": 0.18704975774576055, "grad_norm": 0.17694327235221863, "learning_rate": 0.002, "loss": 2.5645, "step": 93890 }, { "epoch": 0.18706967996939947, "grad_norm": 0.15249386429786682, "learning_rate": 0.002, "loss": 2.5606, "step": 93900 }, { "epoch": 0.18708960219303838, "grad_norm": 0.18796804547309875, "learning_rate": 0.002, "loss": 2.5651, "step": 93910 }, { "epoch": 0.1871095244166773, "grad_norm": 0.1539144515991211, "learning_rate": 0.002, "loss": 2.5755, "step": 93920 }, { "epoch": 0.1871294466403162, "grad_norm": 0.14881011843681335, "learning_rate": 0.002, "loss": 2.5698, "step": 93930 }, { "epoch": 0.1871493688639551, "grad_norm": 0.16969731450080872, "learning_rate": 0.002, "loss": 2.5575, "step": 93940 }, { "epoch": 0.18716929108759403, "grad_norm": 0.1683986335992813, "learning_rate": 0.002, "loss": 2.5604, "step": 93950 }, { "epoch": 0.18718921331123295, "grad_norm": 0.15891318023204803, "learning_rate": 0.002, "loss": 2.5701, "step": 93960 }, { "epoch": 0.18720913553487187, "grad_norm": 0.14041487872600555, "learning_rate": 0.002, "loss": 2.5769, "step": 93970 }, { "epoch": 0.1872290577585108, "grad_norm": 0.17494656145572662, "learning_rate": 0.002, "loss": 2.5773, "step": 93980 }, { "epoch": 0.18724897998214968, "grad_norm": 0.16142894327640533, "learning_rate": 0.002, "loss": 2.5754, "step": 93990 }, { "epoch": 0.1872689022057886, "grad_norm": 0.17468662559986115, "learning_rate": 0.002, "loss": 2.5795, "step": 94000 }, { "epoch": 0.18728882442942751, "grad_norm": 0.2283419519662857, "learning_rate": 0.002, "loss": 2.5694, "step": 94010 }, { "epoch": 0.18730874665306643, "grad_norm": 0.16270004212856293, "learning_rate": 0.002, "loss": 2.5598, "step": 94020 }, { "epoch": 0.18732866887670535, "grad_norm": 0.18990789353847504, "learning_rate": 0.002, "loss": 2.5819, "step": 94030 }, { "epoch": 0.18734859110034427, "grad_norm": 0.16685891151428223, "learning_rate": 0.002, "loss": 2.5627, "step": 94040 }, { "epoch": 0.18736851332398316, "grad_norm": 0.14724291861057281, "learning_rate": 0.002, "loss": 2.564, "step": 94050 }, { "epoch": 0.18738843554762208, "grad_norm": 0.18562494218349457, "learning_rate": 0.002, "loss": 2.5607, "step": 94060 }, { "epoch": 0.187408357771261, "grad_norm": 0.18382664024829865, "learning_rate": 0.002, "loss": 2.5858, "step": 94070 }, { "epoch": 0.18742827999489992, "grad_norm": 0.1478714644908905, "learning_rate": 0.002, "loss": 2.5671, "step": 94080 }, { "epoch": 0.18744820221853883, "grad_norm": 0.20344294607639313, "learning_rate": 0.002, "loss": 2.5742, "step": 94090 }, { "epoch": 0.18746812444217772, "grad_norm": 0.1885995864868164, "learning_rate": 0.002, "loss": 2.5655, "step": 94100 }, { "epoch": 0.18748804666581664, "grad_norm": 0.15892355144023895, "learning_rate": 0.002, "loss": 2.5752, "step": 94110 }, { "epoch": 0.18750796888945556, "grad_norm": 0.16807569563388824, "learning_rate": 0.002, "loss": 2.5728, "step": 94120 }, { "epoch": 0.18752789111309448, "grad_norm": 0.17005831003189087, "learning_rate": 0.002, "loss": 2.5792, "step": 94130 }, { "epoch": 0.1875478133367334, "grad_norm": 0.20063695311546326, "learning_rate": 0.002, "loss": 2.5657, "step": 94140 }, { "epoch": 0.18756773556037232, "grad_norm": 0.16282862424850464, "learning_rate": 0.002, "loss": 2.5499, "step": 94150 }, { "epoch": 0.1875876577840112, "grad_norm": 0.1474553346633911, "learning_rate": 0.002, "loss": 2.5604, "step": 94160 }, { "epoch": 0.18760758000765013, "grad_norm": 0.14650997519493103, "learning_rate": 0.002, "loss": 2.5713, "step": 94170 }, { "epoch": 0.18762750223128905, "grad_norm": 0.15725824236869812, "learning_rate": 0.002, "loss": 2.5804, "step": 94180 }, { "epoch": 0.18764742445492796, "grad_norm": 0.15635690093040466, "learning_rate": 0.002, "loss": 2.5595, "step": 94190 }, { "epoch": 0.18766734667856688, "grad_norm": 0.14333532750606537, "learning_rate": 0.002, "loss": 2.5788, "step": 94200 }, { "epoch": 0.1876872689022058, "grad_norm": 0.1845376044511795, "learning_rate": 0.002, "loss": 2.565, "step": 94210 }, { "epoch": 0.1877071911258447, "grad_norm": 0.16008833050727844, "learning_rate": 0.002, "loss": 2.578, "step": 94220 }, { "epoch": 0.1877271133494836, "grad_norm": 0.16522079706192017, "learning_rate": 0.002, "loss": 2.5608, "step": 94230 }, { "epoch": 0.18774703557312253, "grad_norm": 0.15672913193702698, "learning_rate": 0.002, "loss": 2.5661, "step": 94240 }, { "epoch": 0.18776695779676145, "grad_norm": 0.1644962579011917, "learning_rate": 0.002, "loss": 2.5759, "step": 94250 }, { "epoch": 0.18778688002040037, "grad_norm": 0.14048559963703156, "learning_rate": 0.002, "loss": 2.5719, "step": 94260 }, { "epoch": 0.18780680224403928, "grad_norm": 0.16562741994857788, "learning_rate": 0.002, "loss": 2.5686, "step": 94270 }, { "epoch": 0.18782672446767817, "grad_norm": 0.1620166003704071, "learning_rate": 0.002, "loss": 2.5693, "step": 94280 }, { "epoch": 0.1878466466913171, "grad_norm": 0.18294306099414825, "learning_rate": 0.002, "loss": 2.5597, "step": 94290 }, { "epoch": 0.187866568914956, "grad_norm": 0.14931681752204895, "learning_rate": 0.002, "loss": 2.564, "step": 94300 }, { "epoch": 0.18788649113859493, "grad_norm": 0.2496020495891571, "learning_rate": 0.002, "loss": 2.5718, "step": 94310 }, { "epoch": 0.18790641336223385, "grad_norm": 0.15676355361938477, "learning_rate": 0.002, "loss": 2.5699, "step": 94320 }, { "epoch": 0.18792633558587274, "grad_norm": 0.18002043664455414, "learning_rate": 0.002, "loss": 2.5541, "step": 94330 }, { "epoch": 0.18794625780951166, "grad_norm": 0.1577557474374771, "learning_rate": 0.002, "loss": 2.5671, "step": 94340 }, { "epoch": 0.18796618003315058, "grad_norm": 0.18261875212192535, "learning_rate": 0.002, "loss": 2.5702, "step": 94350 }, { "epoch": 0.1879861022567895, "grad_norm": 0.14121559262275696, "learning_rate": 0.002, "loss": 2.5678, "step": 94360 }, { "epoch": 0.1880060244804284, "grad_norm": 0.1690906435251236, "learning_rate": 0.002, "loss": 2.5725, "step": 94370 }, { "epoch": 0.18802594670406733, "grad_norm": 0.1563083678483963, "learning_rate": 0.002, "loss": 2.5686, "step": 94380 }, { "epoch": 0.18804586892770622, "grad_norm": 0.15448488295078278, "learning_rate": 0.002, "loss": 2.5808, "step": 94390 }, { "epoch": 0.18806579115134514, "grad_norm": 0.18213346600532532, "learning_rate": 0.002, "loss": 2.5672, "step": 94400 }, { "epoch": 0.18808571337498406, "grad_norm": 0.172489196062088, "learning_rate": 0.002, "loss": 2.5716, "step": 94410 }, { "epoch": 0.18810563559862298, "grad_norm": 0.14598087966442108, "learning_rate": 0.002, "loss": 2.583, "step": 94420 }, { "epoch": 0.1881255578222619, "grad_norm": 0.17095109820365906, "learning_rate": 0.002, "loss": 2.5856, "step": 94430 }, { "epoch": 0.18814548004590081, "grad_norm": 0.16609622538089752, "learning_rate": 0.002, "loss": 2.5656, "step": 94440 }, { "epoch": 0.1881654022695397, "grad_norm": 0.16431206464767456, "learning_rate": 0.002, "loss": 2.5651, "step": 94450 }, { "epoch": 0.18818532449317862, "grad_norm": 0.17005617916584015, "learning_rate": 0.002, "loss": 2.5752, "step": 94460 }, { "epoch": 0.18820524671681754, "grad_norm": 0.15945352613925934, "learning_rate": 0.002, "loss": 2.5676, "step": 94470 }, { "epoch": 0.18822516894045646, "grad_norm": 0.17128685116767883, "learning_rate": 0.002, "loss": 2.5739, "step": 94480 }, { "epoch": 0.18824509116409538, "grad_norm": 0.16324451565742493, "learning_rate": 0.002, "loss": 2.5728, "step": 94490 }, { "epoch": 0.1882650133877343, "grad_norm": 0.1798207014799118, "learning_rate": 0.002, "loss": 2.5659, "step": 94500 }, { "epoch": 0.1882849356113732, "grad_norm": 0.1789417564868927, "learning_rate": 0.002, "loss": 2.5516, "step": 94510 }, { "epoch": 0.1883048578350121, "grad_norm": 0.17544780671596527, "learning_rate": 0.002, "loss": 2.5595, "step": 94520 }, { "epoch": 0.18832478005865103, "grad_norm": 0.1688699722290039, "learning_rate": 0.002, "loss": 2.548, "step": 94530 }, { "epoch": 0.18834470228228994, "grad_norm": 0.18195219337940216, "learning_rate": 0.002, "loss": 2.5697, "step": 94540 }, { "epoch": 0.18836462450592886, "grad_norm": 0.16412466764450073, "learning_rate": 0.002, "loss": 2.573, "step": 94550 }, { "epoch": 0.18838454672956778, "grad_norm": 0.1488889753818512, "learning_rate": 0.002, "loss": 2.5588, "step": 94560 }, { "epoch": 0.18840446895320667, "grad_norm": 0.2004890739917755, "learning_rate": 0.002, "loss": 2.5775, "step": 94570 }, { "epoch": 0.1884243911768456, "grad_norm": 0.1706233024597168, "learning_rate": 0.002, "loss": 2.5533, "step": 94580 }, { "epoch": 0.1884443134004845, "grad_norm": 0.18635523319244385, "learning_rate": 0.002, "loss": 2.5773, "step": 94590 }, { "epoch": 0.18846423562412343, "grad_norm": 0.153604194521904, "learning_rate": 0.002, "loss": 2.5602, "step": 94600 }, { "epoch": 0.18848415784776235, "grad_norm": 0.16974782943725586, "learning_rate": 0.002, "loss": 2.5733, "step": 94610 }, { "epoch": 0.18850408007140124, "grad_norm": 0.13825704157352448, "learning_rate": 0.002, "loss": 2.5618, "step": 94620 }, { "epoch": 0.18852400229504016, "grad_norm": 0.21854561567306519, "learning_rate": 0.002, "loss": 2.5844, "step": 94630 }, { "epoch": 0.18854392451867907, "grad_norm": 0.1631125509738922, "learning_rate": 0.002, "loss": 2.5807, "step": 94640 }, { "epoch": 0.188563846742318, "grad_norm": 0.15895821154117584, "learning_rate": 0.002, "loss": 2.5619, "step": 94650 }, { "epoch": 0.1885837689659569, "grad_norm": 0.16524440050125122, "learning_rate": 0.002, "loss": 2.5636, "step": 94660 }, { "epoch": 0.18860369118959583, "grad_norm": 0.16313326358795166, "learning_rate": 0.002, "loss": 2.5744, "step": 94670 }, { "epoch": 0.18862361341323472, "grad_norm": 0.17680595815181732, "learning_rate": 0.002, "loss": 2.5666, "step": 94680 }, { "epoch": 0.18864353563687364, "grad_norm": 0.15843136608600616, "learning_rate": 0.002, "loss": 2.5552, "step": 94690 }, { "epoch": 0.18866345786051256, "grad_norm": 0.13765336573123932, "learning_rate": 0.002, "loss": 2.5638, "step": 94700 }, { "epoch": 0.18868338008415148, "grad_norm": 0.20407958328723907, "learning_rate": 0.002, "loss": 2.5562, "step": 94710 }, { "epoch": 0.1887033023077904, "grad_norm": 0.16436007618904114, "learning_rate": 0.002, "loss": 2.5755, "step": 94720 }, { "epoch": 0.1887232245314293, "grad_norm": 0.16284073889255524, "learning_rate": 0.002, "loss": 2.5794, "step": 94730 }, { "epoch": 0.1887431467550682, "grad_norm": 0.144573375582695, "learning_rate": 0.002, "loss": 2.5591, "step": 94740 }, { "epoch": 0.18876306897870712, "grad_norm": 0.18934279680252075, "learning_rate": 0.002, "loss": 2.5711, "step": 94750 }, { "epoch": 0.18878299120234604, "grad_norm": 0.19159945845603943, "learning_rate": 0.002, "loss": 2.5667, "step": 94760 }, { "epoch": 0.18880291342598496, "grad_norm": 0.16689613461494446, "learning_rate": 0.002, "loss": 2.5782, "step": 94770 }, { "epoch": 0.18882283564962388, "grad_norm": 0.15456794202327728, "learning_rate": 0.002, "loss": 2.5774, "step": 94780 }, { "epoch": 0.1888427578732628, "grad_norm": 0.19124269485473633, "learning_rate": 0.002, "loss": 2.571, "step": 94790 }, { "epoch": 0.18886268009690169, "grad_norm": 0.17965932190418243, "learning_rate": 0.002, "loss": 2.555, "step": 94800 }, { "epoch": 0.1888826023205406, "grad_norm": 0.17824706435203552, "learning_rate": 0.002, "loss": 2.5661, "step": 94810 }, { "epoch": 0.18890252454417952, "grad_norm": 0.1657422035932541, "learning_rate": 0.002, "loss": 2.5637, "step": 94820 }, { "epoch": 0.18892244676781844, "grad_norm": 0.17403686046600342, "learning_rate": 0.002, "loss": 2.5791, "step": 94830 }, { "epoch": 0.18894236899145736, "grad_norm": 0.13497495651245117, "learning_rate": 0.002, "loss": 2.5718, "step": 94840 }, { "epoch": 0.18896229121509625, "grad_norm": 0.16084259748458862, "learning_rate": 0.002, "loss": 2.5604, "step": 94850 }, { "epoch": 0.18898221343873517, "grad_norm": 0.1600169688463211, "learning_rate": 0.002, "loss": 2.5574, "step": 94860 }, { "epoch": 0.1890021356623741, "grad_norm": 0.1720127910375595, "learning_rate": 0.002, "loss": 2.5771, "step": 94870 }, { "epoch": 0.189022057886013, "grad_norm": 0.14680948853492737, "learning_rate": 0.002, "loss": 2.5643, "step": 94880 }, { "epoch": 0.18904198010965192, "grad_norm": 0.19937995076179504, "learning_rate": 0.002, "loss": 2.576, "step": 94890 }, { "epoch": 0.18906190233329084, "grad_norm": 0.14841154217720032, "learning_rate": 0.002, "loss": 2.5607, "step": 94900 }, { "epoch": 0.18908182455692973, "grad_norm": 0.15490177273750305, "learning_rate": 0.002, "loss": 2.5586, "step": 94910 }, { "epoch": 0.18910174678056865, "grad_norm": 0.17088137567043304, "learning_rate": 0.002, "loss": 2.5689, "step": 94920 }, { "epoch": 0.18912166900420757, "grad_norm": 0.1538381576538086, "learning_rate": 0.002, "loss": 2.5732, "step": 94930 }, { "epoch": 0.1891415912278465, "grad_norm": 0.14316242933273315, "learning_rate": 0.002, "loss": 2.5847, "step": 94940 }, { "epoch": 0.1891615134514854, "grad_norm": 0.17630881071090698, "learning_rate": 0.002, "loss": 2.576, "step": 94950 }, { "epoch": 0.18918143567512433, "grad_norm": 0.16629379987716675, "learning_rate": 0.002, "loss": 2.5698, "step": 94960 }, { "epoch": 0.18920135789876322, "grad_norm": 0.18701203167438507, "learning_rate": 0.002, "loss": 2.5739, "step": 94970 }, { "epoch": 0.18922128012240214, "grad_norm": 0.1573166847229004, "learning_rate": 0.002, "loss": 2.5616, "step": 94980 }, { "epoch": 0.18924120234604105, "grad_norm": 0.1625208854675293, "learning_rate": 0.002, "loss": 2.5639, "step": 94990 }, { "epoch": 0.18926112456967997, "grad_norm": 0.16744765639305115, "learning_rate": 0.002, "loss": 2.5629, "step": 95000 }, { "epoch": 0.1892810467933189, "grad_norm": 0.1551259309053421, "learning_rate": 0.002, "loss": 2.56, "step": 95010 }, { "epoch": 0.1893009690169578, "grad_norm": 0.1730870008468628, "learning_rate": 0.002, "loss": 2.5493, "step": 95020 }, { "epoch": 0.1893208912405967, "grad_norm": 0.18282173573970795, "learning_rate": 0.002, "loss": 2.5588, "step": 95030 }, { "epoch": 0.18934081346423562, "grad_norm": 0.15293197333812714, "learning_rate": 0.002, "loss": 2.5569, "step": 95040 }, { "epoch": 0.18936073568787454, "grad_norm": 0.15982559323310852, "learning_rate": 0.002, "loss": 2.5652, "step": 95050 }, { "epoch": 0.18938065791151346, "grad_norm": 0.18470506370067596, "learning_rate": 0.002, "loss": 2.5615, "step": 95060 }, { "epoch": 0.18940058013515237, "grad_norm": 0.14880767464637756, "learning_rate": 0.002, "loss": 2.5648, "step": 95070 }, { "epoch": 0.18942050235879127, "grad_norm": 0.1650516241788864, "learning_rate": 0.002, "loss": 2.5665, "step": 95080 }, { "epoch": 0.18944042458243018, "grad_norm": 0.1713220775127411, "learning_rate": 0.002, "loss": 2.5575, "step": 95090 }, { "epoch": 0.1894603468060691, "grad_norm": 0.18039226531982422, "learning_rate": 0.002, "loss": 2.5642, "step": 95100 }, { "epoch": 0.18948026902970802, "grad_norm": 0.15867741405963898, "learning_rate": 0.002, "loss": 2.5559, "step": 95110 }, { "epoch": 0.18950019125334694, "grad_norm": 0.15039825439453125, "learning_rate": 0.002, "loss": 2.552, "step": 95120 }, { "epoch": 0.18952011347698586, "grad_norm": 0.21812650561332703, "learning_rate": 0.002, "loss": 2.5715, "step": 95130 }, { "epoch": 0.18954003570062475, "grad_norm": 0.141108438372612, "learning_rate": 0.002, "loss": 2.5684, "step": 95140 }, { "epoch": 0.18955995792426367, "grad_norm": 0.1611384004354477, "learning_rate": 0.002, "loss": 2.5646, "step": 95150 }, { "epoch": 0.18957988014790259, "grad_norm": 0.18326927721500397, "learning_rate": 0.002, "loss": 2.58, "step": 95160 }, { "epoch": 0.1895998023715415, "grad_norm": 0.19818760454654694, "learning_rate": 0.002, "loss": 2.5771, "step": 95170 }, { "epoch": 0.18961972459518042, "grad_norm": 0.18769831955432892, "learning_rate": 0.002, "loss": 2.5653, "step": 95180 }, { "epoch": 0.18963964681881934, "grad_norm": 0.15676943957805634, "learning_rate": 0.002, "loss": 2.5696, "step": 95190 }, { "epoch": 0.18965956904245823, "grad_norm": 0.18551097810268402, "learning_rate": 0.002, "loss": 2.5633, "step": 95200 }, { "epoch": 0.18967949126609715, "grad_norm": 0.15381252765655518, "learning_rate": 0.002, "loss": 2.5762, "step": 95210 }, { "epoch": 0.18969941348973607, "grad_norm": 0.17608243227005005, "learning_rate": 0.002, "loss": 2.5657, "step": 95220 }, { "epoch": 0.189719335713375, "grad_norm": 0.14928270876407623, "learning_rate": 0.002, "loss": 2.5724, "step": 95230 }, { "epoch": 0.1897392579370139, "grad_norm": 0.19165095686912537, "learning_rate": 0.002, "loss": 2.5718, "step": 95240 }, { "epoch": 0.18975918016065282, "grad_norm": 0.1505809873342514, "learning_rate": 0.002, "loss": 2.5606, "step": 95250 }, { "epoch": 0.18977910238429171, "grad_norm": 0.14786814153194427, "learning_rate": 0.002, "loss": 2.5838, "step": 95260 }, { "epoch": 0.18979902460793063, "grad_norm": 0.13122104108333588, "learning_rate": 0.002, "loss": 2.5725, "step": 95270 }, { "epoch": 0.18981894683156955, "grad_norm": 0.18107372522354126, "learning_rate": 0.002, "loss": 2.5751, "step": 95280 }, { "epoch": 0.18983886905520847, "grad_norm": 0.17690037190914154, "learning_rate": 0.002, "loss": 2.5614, "step": 95290 }, { "epoch": 0.1898587912788474, "grad_norm": 0.15276774764060974, "learning_rate": 0.002, "loss": 2.5703, "step": 95300 }, { "epoch": 0.1898787135024863, "grad_norm": 0.16616038978099823, "learning_rate": 0.002, "loss": 2.564, "step": 95310 }, { "epoch": 0.1898986357261252, "grad_norm": 0.18884797394275665, "learning_rate": 0.002, "loss": 2.5782, "step": 95320 }, { "epoch": 0.18991855794976412, "grad_norm": 0.16632048785686493, "learning_rate": 0.002, "loss": 2.5693, "step": 95330 }, { "epoch": 0.18993848017340303, "grad_norm": 0.16965748369693756, "learning_rate": 0.002, "loss": 2.5637, "step": 95340 }, { "epoch": 0.18995840239704195, "grad_norm": 0.17653337121009827, "learning_rate": 0.002, "loss": 2.5618, "step": 95350 }, { "epoch": 0.18997832462068087, "grad_norm": 0.18258538842201233, "learning_rate": 0.002, "loss": 2.5612, "step": 95360 }, { "epoch": 0.18999824684431976, "grad_norm": 0.1575007140636444, "learning_rate": 0.002, "loss": 2.5671, "step": 95370 }, { "epoch": 0.19001816906795868, "grad_norm": 0.14838473498821259, "learning_rate": 0.002, "loss": 2.5707, "step": 95380 }, { "epoch": 0.1900380912915976, "grad_norm": 0.1571759730577469, "learning_rate": 0.002, "loss": 2.573, "step": 95390 }, { "epoch": 0.19005801351523652, "grad_norm": 0.17331255972385406, "learning_rate": 0.002, "loss": 2.5707, "step": 95400 }, { "epoch": 0.19007793573887544, "grad_norm": 0.16866235435009003, "learning_rate": 0.002, "loss": 2.57, "step": 95410 }, { "epoch": 0.19009785796251435, "grad_norm": 0.22209934890270233, "learning_rate": 0.002, "loss": 2.5729, "step": 95420 }, { "epoch": 0.19011778018615325, "grad_norm": 0.1582535356283188, "learning_rate": 0.002, "loss": 2.5833, "step": 95430 }, { "epoch": 0.19013770240979216, "grad_norm": 0.1493268460035324, "learning_rate": 0.002, "loss": 2.5773, "step": 95440 }, { "epoch": 0.19015762463343108, "grad_norm": 0.14930206537246704, "learning_rate": 0.002, "loss": 2.567, "step": 95450 }, { "epoch": 0.19017754685707, "grad_norm": 0.138713538646698, "learning_rate": 0.002, "loss": 2.5719, "step": 95460 }, { "epoch": 0.19019746908070892, "grad_norm": 0.15389497578144073, "learning_rate": 0.002, "loss": 2.5563, "step": 95470 }, { "epoch": 0.19021739130434784, "grad_norm": 0.1704082489013672, "learning_rate": 0.002, "loss": 2.5693, "step": 95480 }, { "epoch": 0.19023731352798673, "grad_norm": 0.1860683709383011, "learning_rate": 0.002, "loss": 2.5593, "step": 95490 }, { "epoch": 0.19025723575162565, "grad_norm": 0.16595181822776794, "learning_rate": 0.002, "loss": 2.5614, "step": 95500 }, { "epoch": 0.19027715797526457, "grad_norm": 0.15642553567886353, "learning_rate": 0.002, "loss": 2.5556, "step": 95510 }, { "epoch": 0.19029708019890348, "grad_norm": 0.20731192827224731, "learning_rate": 0.002, "loss": 2.557, "step": 95520 }, { "epoch": 0.1903170024225424, "grad_norm": 0.14983509480953217, "learning_rate": 0.002, "loss": 2.5691, "step": 95530 }, { "epoch": 0.19033692464618132, "grad_norm": 0.17852284014225006, "learning_rate": 0.002, "loss": 2.5551, "step": 95540 }, { "epoch": 0.1903568468698202, "grad_norm": 0.14877741038799286, "learning_rate": 0.002, "loss": 2.5884, "step": 95550 }, { "epoch": 0.19037676909345913, "grad_norm": 0.17066234350204468, "learning_rate": 0.002, "loss": 2.5805, "step": 95560 }, { "epoch": 0.19039669131709805, "grad_norm": 0.15807990729808807, "learning_rate": 0.002, "loss": 2.5617, "step": 95570 }, { "epoch": 0.19041661354073697, "grad_norm": 0.17762257158756256, "learning_rate": 0.002, "loss": 2.5627, "step": 95580 }, { "epoch": 0.19043653576437589, "grad_norm": 0.159628227353096, "learning_rate": 0.002, "loss": 2.5604, "step": 95590 }, { "epoch": 0.19045645798801478, "grad_norm": 0.25138986110687256, "learning_rate": 0.002, "loss": 2.5808, "step": 95600 }, { "epoch": 0.1904763802116537, "grad_norm": 0.14648711681365967, "learning_rate": 0.002, "loss": 2.5649, "step": 95610 }, { "epoch": 0.1904963024352926, "grad_norm": 0.147511288523674, "learning_rate": 0.002, "loss": 2.5627, "step": 95620 }, { "epoch": 0.19051622465893153, "grad_norm": 0.19255219399929047, "learning_rate": 0.002, "loss": 2.5736, "step": 95630 }, { "epoch": 0.19053614688257045, "grad_norm": 0.16773539781570435, "learning_rate": 0.002, "loss": 2.5512, "step": 95640 }, { "epoch": 0.19055606910620937, "grad_norm": 0.1428954154253006, "learning_rate": 0.002, "loss": 2.5639, "step": 95650 }, { "epoch": 0.19057599132984826, "grad_norm": 0.17191381752490997, "learning_rate": 0.002, "loss": 2.5669, "step": 95660 }, { "epoch": 0.19059591355348718, "grad_norm": 0.14695709943771362, "learning_rate": 0.002, "loss": 2.5654, "step": 95670 }, { "epoch": 0.1906158357771261, "grad_norm": 0.209974467754364, "learning_rate": 0.002, "loss": 2.5623, "step": 95680 }, { "epoch": 0.19063575800076502, "grad_norm": 0.22392208874225616, "learning_rate": 0.002, "loss": 2.5875, "step": 95690 }, { "epoch": 0.19065568022440393, "grad_norm": 0.1593385934829712, "learning_rate": 0.002, "loss": 2.5637, "step": 95700 }, { "epoch": 0.19067560244804285, "grad_norm": 0.16594016551971436, "learning_rate": 0.002, "loss": 2.5513, "step": 95710 }, { "epoch": 0.19069552467168174, "grad_norm": 0.18043139576911926, "learning_rate": 0.002, "loss": 2.577, "step": 95720 }, { "epoch": 0.19071544689532066, "grad_norm": 0.16813905537128448, "learning_rate": 0.002, "loss": 2.5756, "step": 95730 }, { "epoch": 0.19073536911895958, "grad_norm": 0.203253373503685, "learning_rate": 0.002, "loss": 2.5656, "step": 95740 }, { "epoch": 0.1907552913425985, "grad_norm": 0.14167924225330353, "learning_rate": 0.002, "loss": 2.5632, "step": 95750 }, { "epoch": 0.19077521356623742, "grad_norm": 0.2331952005624771, "learning_rate": 0.002, "loss": 2.5585, "step": 95760 }, { "epoch": 0.19079513578987634, "grad_norm": 0.15019699931144714, "learning_rate": 0.002, "loss": 2.5656, "step": 95770 }, { "epoch": 0.19081505801351523, "grad_norm": 0.15087367594242096, "learning_rate": 0.002, "loss": 2.5479, "step": 95780 }, { "epoch": 0.19083498023715414, "grad_norm": 0.18477284908294678, "learning_rate": 0.002, "loss": 2.5672, "step": 95790 }, { "epoch": 0.19085490246079306, "grad_norm": 0.1585923433303833, "learning_rate": 0.002, "loss": 2.5815, "step": 95800 }, { "epoch": 0.19087482468443198, "grad_norm": 0.19647948443889618, "learning_rate": 0.002, "loss": 2.5899, "step": 95810 }, { "epoch": 0.1908947469080709, "grad_norm": 0.14314432442188263, "learning_rate": 0.002, "loss": 2.5614, "step": 95820 }, { "epoch": 0.19091466913170982, "grad_norm": 0.2006121128797531, "learning_rate": 0.002, "loss": 2.5727, "step": 95830 }, { "epoch": 0.1909345913553487, "grad_norm": 0.19219285249710083, "learning_rate": 0.002, "loss": 2.5739, "step": 95840 }, { "epoch": 0.19095451357898763, "grad_norm": 0.16108393669128418, "learning_rate": 0.002, "loss": 2.5634, "step": 95850 }, { "epoch": 0.19097443580262655, "grad_norm": 0.17205655574798584, "learning_rate": 0.002, "loss": 2.5598, "step": 95860 }, { "epoch": 0.19099435802626546, "grad_norm": 0.16621524095535278, "learning_rate": 0.002, "loss": 2.5722, "step": 95870 }, { "epoch": 0.19101428024990438, "grad_norm": 0.18999846279621124, "learning_rate": 0.002, "loss": 2.5677, "step": 95880 }, { "epoch": 0.19103420247354327, "grad_norm": 0.1662120670080185, "learning_rate": 0.002, "loss": 2.5675, "step": 95890 }, { "epoch": 0.1910541246971822, "grad_norm": 0.1526353508234024, "learning_rate": 0.002, "loss": 2.5631, "step": 95900 }, { "epoch": 0.1910740469208211, "grad_norm": 0.1497640609741211, "learning_rate": 0.002, "loss": 2.5545, "step": 95910 }, { "epoch": 0.19109396914446003, "grad_norm": 0.23975816369056702, "learning_rate": 0.002, "loss": 2.5672, "step": 95920 }, { "epoch": 0.19111389136809895, "grad_norm": 0.15832704305648804, "learning_rate": 0.002, "loss": 2.5642, "step": 95930 }, { "epoch": 0.19113381359173787, "grad_norm": 0.14882420003414154, "learning_rate": 0.002, "loss": 2.5599, "step": 95940 }, { "epoch": 0.19115373581537676, "grad_norm": 0.1714400202035904, "learning_rate": 0.002, "loss": 2.5639, "step": 95950 }, { "epoch": 0.19117365803901568, "grad_norm": 0.1843264400959015, "learning_rate": 0.002, "loss": 2.5518, "step": 95960 }, { "epoch": 0.1911935802626546, "grad_norm": 0.1833076924085617, "learning_rate": 0.002, "loss": 2.5701, "step": 95970 }, { "epoch": 0.1912135024862935, "grad_norm": 0.146879181265831, "learning_rate": 0.002, "loss": 2.5582, "step": 95980 }, { "epoch": 0.19123342470993243, "grad_norm": 0.19702406227588654, "learning_rate": 0.002, "loss": 2.5537, "step": 95990 }, { "epoch": 0.19125334693357135, "grad_norm": 0.15295080840587616, "learning_rate": 0.002, "loss": 2.5607, "step": 96000 }, { "epoch": 0.19127326915721024, "grad_norm": 0.1859445869922638, "learning_rate": 0.002, "loss": 2.5637, "step": 96010 }, { "epoch": 0.19129319138084916, "grad_norm": 0.17203238606452942, "learning_rate": 0.002, "loss": 2.5655, "step": 96020 }, { "epoch": 0.19131311360448808, "grad_norm": 0.1482851356267929, "learning_rate": 0.002, "loss": 2.5725, "step": 96030 }, { "epoch": 0.191333035828127, "grad_norm": 0.16400305926799774, "learning_rate": 0.002, "loss": 2.5494, "step": 96040 }, { "epoch": 0.19135295805176591, "grad_norm": 0.18833376467227936, "learning_rate": 0.002, "loss": 2.5616, "step": 96050 }, { "epoch": 0.19137288027540483, "grad_norm": 0.17176376283168793, "learning_rate": 0.002, "loss": 2.5652, "step": 96060 }, { "epoch": 0.19139280249904372, "grad_norm": 0.16252468526363373, "learning_rate": 0.002, "loss": 2.5647, "step": 96070 }, { "epoch": 0.19141272472268264, "grad_norm": 0.1677773892879486, "learning_rate": 0.002, "loss": 2.5605, "step": 96080 }, { "epoch": 0.19143264694632156, "grad_norm": 0.15780892968177795, "learning_rate": 0.002, "loss": 2.5639, "step": 96090 }, { "epoch": 0.19145256916996048, "grad_norm": 0.15584486722946167, "learning_rate": 0.002, "loss": 2.5551, "step": 96100 }, { "epoch": 0.1914724913935994, "grad_norm": 0.15180298686027527, "learning_rate": 0.002, "loss": 2.5653, "step": 96110 }, { "epoch": 0.1914924136172383, "grad_norm": 0.18719270825386047, "learning_rate": 0.002, "loss": 2.576, "step": 96120 }, { "epoch": 0.1915123358408772, "grad_norm": 0.18372070789337158, "learning_rate": 0.002, "loss": 2.5768, "step": 96130 }, { "epoch": 0.19153225806451613, "grad_norm": 0.13478198647499084, "learning_rate": 0.002, "loss": 2.5642, "step": 96140 }, { "epoch": 0.19155218028815504, "grad_norm": 0.162414088845253, "learning_rate": 0.002, "loss": 2.575, "step": 96150 }, { "epoch": 0.19157210251179396, "grad_norm": 0.1865256279706955, "learning_rate": 0.002, "loss": 2.5644, "step": 96160 }, { "epoch": 0.19159202473543288, "grad_norm": 0.16172562539577484, "learning_rate": 0.002, "loss": 2.5656, "step": 96170 }, { "epoch": 0.19161194695907177, "grad_norm": 0.14396415650844574, "learning_rate": 0.002, "loss": 2.5554, "step": 96180 }, { "epoch": 0.1916318691827107, "grad_norm": 0.18828748166561127, "learning_rate": 0.002, "loss": 2.5887, "step": 96190 }, { "epoch": 0.1916517914063496, "grad_norm": 0.1686515361070633, "learning_rate": 0.002, "loss": 2.5677, "step": 96200 }, { "epoch": 0.19167171362998853, "grad_norm": 0.16554763913154602, "learning_rate": 0.002, "loss": 2.5725, "step": 96210 }, { "epoch": 0.19169163585362745, "grad_norm": 0.1472441852092743, "learning_rate": 0.002, "loss": 2.563, "step": 96220 }, { "epoch": 0.19171155807726636, "grad_norm": 0.1946445107460022, "learning_rate": 0.002, "loss": 2.5606, "step": 96230 }, { "epoch": 0.19173148030090525, "grad_norm": 0.1304626613855362, "learning_rate": 0.002, "loss": 2.5783, "step": 96240 }, { "epoch": 0.19175140252454417, "grad_norm": 0.21669569611549377, "learning_rate": 0.002, "loss": 2.5771, "step": 96250 }, { "epoch": 0.1917713247481831, "grad_norm": 0.16150422394275665, "learning_rate": 0.002, "loss": 2.5863, "step": 96260 }, { "epoch": 0.191791246971822, "grad_norm": 0.17701561748981476, "learning_rate": 0.002, "loss": 2.5756, "step": 96270 }, { "epoch": 0.19181116919546093, "grad_norm": 0.13333411514759064, "learning_rate": 0.002, "loss": 2.5736, "step": 96280 }, { "epoch": 0.19183109141909985, "grad_norm": 0.17280752956867218, "learning_rate": 0.002, "loss": 2.5563, "step": 96290 }, { "epoch": 0.19185101364273874, "grad_norm": 0.1637319177389145, "learning_rate": 0.002, "loss": 2.5772, "step": 96300 }, { "epoch": 0.19187093586637766, "grad_norm": 0.150619238615036, "learning_rate": 0.002, "loss": 2.5449, "step": 96310 }, { "epoch": 0.19189085809001658, "grad_norm": 0.16961045563220978, "learning_rate": 0.002, "loss": 2.5716, "step": 96320 }, { "epoch": 0.1919107803136555, "grad_norm": 0.17300008237361908, "learning_rate": 0.002, "loss": 2.5764, "step": 96330 }, { "epoch": 0.1919307025372944, "grad_norm": 0.17363592982292175, "learning_rate": 0.002, "loss": 2.5675, "step": 96340 }, { "epoch": 0.1919506247609333, "grad_norm": 0.17194987833499908, "learning_rate": 0.002, "loss": 2.5674, "step": 96350 }, { "epoch": 0.19197054698457222, "grad_norm": 0.19593004882335663, "learning_rate": 0.002, "loss": 2.5713, "step": 96360 }, { "epoch": 0.19199046920821114, "grad_norm": 0.1817786693572998, "learning_rate": 0.002, "loss": 2.5696, "step": 96370 }, { "epoch": 0.19201039143185006, "grad_norm": 0.17473191022872925, "learning_rate": 0.002, "loss": 2.5616, "step": 96380 }, { "epoch": 0.19203031365548898, "grad_norm": 0.15577219426631927, "learning_rate": 0.002, "loss": 2.5622, "step": 96390 }, { "epoch": 0.1920502358791279, "grad_norm": 0.18327127397060394, "learning_rate": 0.002, "loss": 2.5577, "step": 96400 }, { "epoch": 0.19207015810276679, "grad_norm": 0.14811231195926666, "learning_rate": 0.002, "loss": 2.5695, "step": 96410 }, { "epoch": 0.1920900803264057, "grad_norm": 0.1963697075843811, "learning_rate": 0.002, "loss": 2.5599, "step": 96420 }, { "epoch": 0.19211000255004462, "grad_norm": 0.15586192905902863, "learning_rate": 0.002, "loss": 2.5782, "step": 96430 }, { "epoch": 0.19212992477368354, "grad_norm": 0.1464751809835434, "learning_rate": 0.002, "loss": 2.5745, "step": 96440 }, { "epoch": 0.19214984699732246, "grad_norm": 0.16535764932632446, "learning_rate": 0.002, "loss": 2.556, "step": 96450 }, { "epoch": 0.19216976922096138, "grad_norm": 0.17530599236488342, "learning_rate": 0.002, "loss": 2.5882, "step": 96460 }, { "epoch": 0.19218969144460027, "grad_norm": 0.15008780360221863, "learning_rate": 0.002, "loss": 2.5718, "step": 96470 }, { "epoch": 0.1922096136682392, "grad_norm": 0.1836296170949936, "learning_rate": 0.002, "loss": 2.5815, "step": 96480 }, { "epoch": 0.1922295358918781, "grad_norm": 0.15607106685638428, "learning_rate": 0.002, "loss": 2.5504, "step": 96490 }, { "epoch": 0.19224945811551702, "grad_norm": 0.17076584696769714, "learning_rate": 0.002, "loss": 2.5812, "step": 96500 }, { "epoch": 0.19226938033915594, "grad_norm": 0.15590530633926392, "learning_rate": 0.002, "loss": 2.5748, "step": 96510 }, { "epoch": 0.19228930256279486, "grad_norm": 0.1742234081029892, "learning_rate": 0.002, "loss": 2.5515, "step": 96520 }, { "epoch": 0.19230922478643375, "grad_norm": 0.1761523187160492, "learning_rate": 0.002, "loss": 2.5704, "step": 96530 }, { "epoch": 0.19232914701007267, "grad_norm": 0.15902498364448547, "learning_rate": 0.002, "loss": 2.5688, "step": 96540 }, { "epoch": 0.1923490692337116, "grad_norm": 0.18001046776771545, "learning_rate": 0.002, "loss": 2.5575, "step": 96550 }, { "epoch": 0.1923689914573505, "grad_norm": 0.1638111025094986, "learning_rate": 0.002, "loss": 2.583, "step": 96560 }, { "epoch": 0.19238891368098943, "grad_norm": 0.17281968891620636, "learning_rate": 0.002, "loss": 2.5607, "step": 96570 }, { "epoch": 0.19240883590462834, "grad_norm": 0.17261643707752228, "learning_rate": 0.002, "loss": 2.5635, "step": 96580 }, { "epoch": 0.19242875812826724, "grad_norm": 0.14955247938632965, "learning_rate": 0.002, "loss": 2.5542, "step": 96590 }, { "epoch": 0.19244868035190615, "grad_norm": 0.1664975881576538, "learning_rate": 0.002, "loss": 2.5686, "step": 96600 }, { "epoch": 0.19246860257554507, "grad_norm": 0.1575751006603241, "learning_rate": 0.002, "loss": 2.5719, "step": 96610 }, { "epoch": 0.192488524799184, "grad_norm": 0.19283032417297363, "learning_rate": 0.002, "loss": 2.5757, "step": 96620 }, { "epoch": 0.1925084470228229, "grad_norm": 0.17622920870780945, "learning_rate": 0.002, "loss": 2.5477, "step": 96630 }, { "epoch": 0.1925283692464618, "grad_norm": 0.17437921464443207, "learning_rate": 0.002, "loss": 2.5629, "step": 96640 }, { "epoch": 0.19254829147010072, "grad_norm": 0.17843513190746307, "learning_rate": 0.002, "loss": 2.5751, "step": 96650 }, { "epoch": 0.19256821369373964, "grad_norm": 0.15892784297466278, "learning_rate": 0.002, "loss": 2.5668, "step": 96660 }, { "epoch": 0.19258813591737856, "grad_norm": 0.20877473056316376, "learning_rate": 0.002, "loss": 2.5678, "step": 96670 }, { "epoch": 0.19260805814101747, "grad_norm": 0.16915501654148102, "learning_rate": 0.002, "loss": 2.5493, "step": 96680 }, { "epoch": 0.1926279803646564, "grad_norm": 0.17073898017406464, "learning_rate": 0.002, "loss": 2.5617, "step": 96690 }, { "epoch": 0.19264790258829528, "grad_norm": 0.15491323173046112, "learning_rate": 0.002, "loss": 2.5711, "step": 96700 }, { "epoch": 0.1926678248119342, "grad_norm": 0.15360958874225616, "learning_rate": 0.002, "loss": 2.5635, "step": 96710 }, { "epoch": 0.19268774703557312, "grad_norm": 0.17305217683315277, "learning_rate": 0.002, "loss": 2.574, "step": 96720 }, { "epoch": 0.19270766925921204, "grad_norm": 0.18056945502758026, "learning_rate": 0.002, "loss": 2.5602, "step": 96730 }, { "epoch": 0.19272759148285096, "grad_norm": 0.1685863882303238, "learning_rate": 0.002, "loss": 2.5618, "step": 96740 }, { "epoch": 0.19274751370648988, "grad_norm": 0.14518855512142181, "learning_rate": 0.002, "loss": 2.5422, "step": 96750 }, { "epoch": 0.19276743593012877, "grad_norm": 0.16012179851531982, "learning_rate": 0.002, "loss": 2.5708, "step": 96760 }, { "epoch": 0.19278735815376769, "grad_norm": 0.15724538266658783, "learning_rate": 0.002, "loss": 2.5679, "step": 96770 }, { "epoch": 0.1928072803774066, "grad_norm": 0.155580073595047, "learning_rate": 0.002, "loss": 2.5759, "step": 96780 }, { "epoch": 0.19282720260104552, "grad_norm": 0.15025538206100464, "learning_rate": 0.002, "loss": 2.5689, "step": 96790 }, { "epoch": 0.19284712482468444, "grad_norm": 0.16842123866081238, "learning_rate": 0.002, "loss": 2.5617, "step": 96800 }, { "epoch": 0.19286704704832336, "grad_norm": 0.15311415493488312, "learning_rate": 0.002, "loss": 2.5795, "step": 96810 }, { "epoch": 0.19288696927196225, "grad_norm": 0.1635136753320694, "learning_rate": 0.002, "loss": 2.5641, "step": 96820 }, { "epoch": 0.19290689149560117, "grad_norm": 0.1569327563047409, "learning_rate": 0.002, "loss": 2.584, "step": 96830 }, { "epoch": 0.1929268137192401, "grad_norm": 0.15504714846611023, "learning_rate": 0.002, "loss": 2.5769, "step": 96840 }, { "epoch": 0.192946735942879, "grad_norm": 0.14268675446510315, "learning_rate": 0.002, "loss": 2.574, "step": 96850 }, { "epoch": 0.19296665816651792, "grad_norm": 0.17717015743255615, "learning_rate": 0.002, "loss": 2.5593, "step": 96860 }, { "epoch": 0.19298658039015681, "grad_norm": 0.1595899760723114, "learning_rate": 0.002, "loss": 2.5712, "step": 96870 }, { "epoch": 0.19300650261379573, "grad_norm": 0.17514149844646454, "learning_rate": 0.002, "loss": 2.5679, "step": 96880 }, { "epoch": 0.19302642483743465, "grad_norm": 0.16986820101737976, "learning_rate": 0.002, "loss": 2.5788, "step": 96890 }, { "epoch": 0.19304634706107357, "grad_norm": 0.1620156466960907, "learning_rate": 0.002, "loss": 2.5625, "step": 96900 }, { "epoch": 0.1930662692847125, "grad_norm": 0.16604478657245636, "learning_rate": 0.002, "loss": 2.5701, "step": 96910 }, { "epoch": 0.1930861915083514, "grad_norm": 0.15069831907749176, "learning_rate": 0.002, "loss": 2.5508, "step": 96920 }, { "epoch": 0.1931061137319903, "grad_norm": 0.1516672670841217, "learning_rate": 0.002, "loss": 2.5576, "step": 96930 }, { "epoch": 0.19312603595562922, "grad_norm": 0.16700612008571625, "learning_rate": 0.002, "loss": 2.5705, "step": 96940 }, { "epoch": 0.19314595817926813, "grad_norm": 0.1584770232439041, "learning_rate": 0.002, "loss": 2.5756, "step": 96950 }, { "epoch": 0.19316588040290705, "grad_norm": 0.16867853701114655, "learning_rate": 0.002, "loss": 2.566, "step": 96960 }, { "epoch": 0.19318580262654597, "grad_norm": 0.18279190361499786, "learning_rate": 0.002, "loss": 2.5646, "step": 96970 }, { "epoch": 0.1932057248501849, "grad_norm": 0.1903582066297531, "learning_rate": 0.002, "loss": 2.5619, "step": 96980 }, { "epoch": 0.19322564707382378, "grad_norm": 0.14577417075634003, "learning_rate": 0.002, "loss": 2.5649, "step": 96990 }, { "epoch": 0.1932455692974627, "grad_norm": 0.14501139521598816, "learning_rate": 0.002, "loss": 2.5707, "step": 97000 }, { "epoch": 0.19326549152110162, "grad_norm": 0.18853312730789185, "learning_rate": 0.002, "loss": 2.5777, "step": 97010 }, { "epoch": 0.19328541374474054, "grad_norm": 0.1586494892835617, "learning_rate": 0.002, "loss": 2.5772, "step": 97020 }, { "epoch": 0.19330533596837945, "grad_norm": 0.21922431886196136, "learning_rate": 0.002, "loss": 2.5563, "step": 97030 }, { "epoch": 0.19332525819201837, "grad_norm": 0.1596805453300476, "learning_rate": 0.002, "loss": 2.5643, "step": 97040 }, { "epoch": 0.19334518041565726, "grad_norm": 0.15584930777549744, "learning_rate": 0.002, "loss": 2.57, "step": 97050 }, { "epoch": 0.19336510263929618, "grad_norm": 0.19413301348686218, "learning_rate": 0.002, "loss": 2.5604, "step": 97060 }, { "epoch": 0.1933850248629351, "grad_norm": 0.1628110110759735, "learning_rate": 0.002, "loss": 2.5727, "step": 97070 }, { "epoch": 0.19340494708657402, "grad_norm": 0.1517959088087082, "learning_rate": 0.002, "loss": 2.5516, "step": 97080 }, { "epoch": 0.19342486931021294, "grad_norm": 0.1776103526353836, "learning_rate": 0.002, "loss": 2.5649, "step": 97090 }, { "epoch": 0.19344479153385183, "grad_norm": 0.17219872772693634, "learning_rate": 0.002, "loss": 2.5767, "step": 97100 }, { "epoch": 0.19346471375749075, "grad_norm": 0.17151634395122528, "learning_rate": 0.002, "loss": 2.5709, "step": 97110 }, { "epoch": 0.19348463598112967, "grad_norm": 0.1670350283384323, "learning_rate": 0.002, "loss": 2.5573, "step": 97120 }, { "epoch": 0.19350455820476858, "grad_norm": 0.14034278690814972, "learning_rate": 0.002, "loss": 2.5696, "step": 97130 }, { "epoch": 0.1935244804284075, "grad_norm": 0.1689920723438263, "learning_rate": 0.002, "loss": 2.5668, "step": 97140 }, { "epoch": 0.19354440265204642, "grad_norm": 0.17620891332626343, "learning_rate": 0.002, "loss": 2.5793, "step": 97150 }, { "epoch": 0.1935643248756853, "grad_norm": 0.1805872768163681, "learning_rate": 0.002, "loss": 2.5649, "step": 97160 }, { "epoch": 0.19358424709932423, "grad_norm": 0.16076959669589996, "learning_rate": 0.002, "loss": 2.5735, "step": 97170 }, { "epoch": 0.19360416932296315, "grad_norm": 0.17617808282375336, "learning_rate": 0.002, "loss": 2.5728, "step": 97180 }, { "epoch": 0.19362409154660207, "grad_norm": 0.15351639688014984, "learning_rate": 0.002, "loss": 2.5644, "step": 97190 }, { "epoch": 0.19364401377024099, "grad_norm": 0.16264773905277252, "learning_rate": 0.002, "loss": 2.562, "step": 97200 }, { "epoch": 0.1936639359938799, "grad_norm": 0.16803032159805298, "learning_rate": 0.002, "loss": 2.5624, "step": 97210 }, { "epoch": 0.1936838582175188, "grad_norm": 0.14262929558753967, "learning_rate": 0.002, "loss": 2.5542, "step": 97220 }, { "epoch": 0.1937037804411577, "grad_norm": 0.1390748918056488, "learning_rate": 0.002, "loss": 2.5629, "step": 97230 }, { "epoch": 0.19372370266479663, "grad_norm": 0.19598820805549622, "learning_rate": 0.002, "loss": 2.575, "step": 97240 }, { "epoch": 0.19374362488843555, "grad_norm": 0.1496150642633438, "learning_rate": 0.002, "loss": 2.5615, "step": 97250 }, { "epoch": 0.19376354711207447, "grad_norm": 0.14178697764873505, "learning_rate": 0.002, "loss": 2.5541, "step": 97260 }, { "epoch": 0.1937834693357134, "grad_norm": 0.17325951159000397, "learning_rate": 0.002, "loss": 2.5503, "step": 97270 }, { "epoch": 0.19380339155935228, "grad_norm": 0.14600132405757904, "learning_rate": 0.002, "loss": 2.5828, "step": 97280 }, { "epoch": 0.1938233137829912, "grad_norm": 0.1679760366678238, "learning_rate": 0.002, "loss": 2.5582, "step": 97290 }, { "epoch": 0.19384323600663012, "grad_norm": 0.15724435448646545, "learning_rate": 0.002, "loss": 2.5766, "step": 97300 }, { "epoch": 0.19386315823026903, "grad_norm": 0.16560299694538116, "learning_rate": 0.002, "loss": 2.5686, "step": 97310 }, { "epoch": 0.19388308045390795, "grad_norm": 0.15594927966594696, "learning_rate": 0.002, "loss": 2.5785, "step": 97320 }, { "epoch": 0.19390300267754687, "grad_norm": 0.14067816734313965, "learning_rate": 0.002, "loss": 2.5788, "step": 97330 }, { "epoch": 0.19392292490118576, "grad_norm": 0.15533985197544098, "learning_rate": 0.002, "loss": 2.5639, "step": 97340 }, { "epoch": 0.19394284712482468, "grad_norm": 0.17885592579841614, "learning_rate": 0.002, "loss": 2.5755, "step": 97350 }, { "epoch": 0.1939627693484636, "grad_norm": 0.1680319756269455, "learning_rate": 0.002, "loss": 2.5536, "step": 97360 }, { "epoch": 0.19398269157210252, "grad_norm": 0.17991501092910767, "learning_rate": 0.002, "loss": 2.5649, "step": 97370 }, { "epoch": 0.19400261379574144, "grad_norm": 0.13916748762130737, "learning_rate": 0.002, "loss": 2.5725, "step": 97380 }, { "epoch": 0.19402253601938033, "grad_norm": 0.17040017247200012, "learning_rate": 0.002, "loss": 2.557, "step": 97390 }, { "epoch": 0.19404245824301924, "grad_norm": 0.1706661432981491, "learning_rate": 0.002, "loss": 2.5633, "step": 97400 }, { "epoch": 0.19406238046665816, "grad_norm": 0.14996080100536346, "learning_rate": 0.002, "loss": 2.5822, "step": 97410 }, { "epoch": 0.19408230269029708, "grad_norm": 0.16115900874137878, "learning_rate": 0.002, "loss": 2.5759, "step": 97420 }, { "epoch": 0.194102224913936, "grad_norm": 0.18360237777233124, "learning_rate": 0.002, "loss": 2.575, "step": 97430 }, { "epoch": 0.19412214713757492, "grad_norm": 0.1468920111656189, "learning_rate": 0.002, "loss": 2.5902, "step": 97440 }, { "epoch": 0.1941420693612138, "grad_norm": 0.17888866364955902, "learning_rate": 0.002, "loss": 2.5765, "step": 97450 }, { "epoch": 0.19416199158485273, "grad_norm": 0.16124142706394196, "learning_rate": 0.002, "loss": 2.5893, "step": 97460 }, { "epoch": 0.19418191380849165, "grad_norm": 0.16788935661315918, "learning_rate": 0.002, "loss": 2.5723, "step": 97470 }, { "epoch": 0.19420183603213056, "grad_norm": 0.18342867493629456, "learning_rate": 0.002, "loss": 2.5616, "step": 97480 }, { "epoch": 0.19422175825576948, "grad_norm": 0.16221268475055695, "learning_rate": 0.002, "loss": 2.5745, "step": 97490 }, { "epoch": 0.1942416804794084, "grad_norm": 0.16025294363498688, "learning_rate": 0.002, "loss": 2.5802, "step": 97500 }, { "epoch": 0.1942616027030473, "grad_norm": 0.16346138715744019, "learning_rate": 0.002, "loss": 2.5743, "step": 97510 }, { "epoch": 0.1942815249266862, "grad_norm": 0.1590014547109604, "learning_rate": 0.002, "loss": 2.583, "step": 97520 }, { "epoch": 0.19430144715032513, "grad_norm": 0.18011294305324554, "learning_rate": 0.002, "loss": 2.5759, "step": 97530 }, { "epoch": 0.19432136937396405, "grad_norm": 0.16063547134399414, "learning_rate": 0.002, "loss": 2.5732, "step": 97540 }, { "epoch": 0.19434129159760297, "grad_norm": 0.17657572031021118, "learning_rate": 0.002, "loss": 2.5691, "step": 97550 }, { "epoch": 0.19436121382124188, "grad_norm": 0.15226618945598602, "learning_rate": 0.002, "loss": 2.5754, "step": 97560 }, { "epoch": 0.19438113604488078, "grad_norm": 0.17886246740818024, "learning_rate": 0.002, "loss": 2.5662, "step": 97570 }, { "epoch": 0.1944010582685197, "grad_norm": 0.17155992984771729, "learning_rate": 0.002, "loss": 2.5704, "step": 97580 }, { "epoch": 0.1944209804921586, "grad_norm": 0.1996101289987564, "learning_rate": 0.002, "loss": 2.5765, "step": 97590 }, { "epoch": 0.19444090271579753, "grad_norm": 0.15109851956367493, "learning_rate": 0.002, "loss": 2.5725, "step": 97600 }, { "epoch": 0.19446082493943645, "grad_norm": 0.17251834273338318, "learning_rate": 0.002, "loss": 2.5658, "step": 97610 }, { "epoch": 0.19448074716307534, "grad_norm": 0.14974963665008545, "learning_rate": 0.002, "loss": 2.5784, "step": 97620 }, { "epoch": 0.19450066938671426, "grad_norm": 0.13605660200119019, "learning_rate": 0.002, "loss": 2.5657, "step": 97630 }, { "epoch": 0.19452059161035318, "grad_norm": 0.16354764997959137, "learning_rate": 0.002, "loss": 2.5708, "step": 97640 }, { "epoch": 0.1945405138339921, "grad_norm": 0.1668684333562851, "learning_rate": 0.002, "loss": 2.559, "step": 97650 }, { "epoch": 0.19456043605763101, "grad_norm": 0.17447307705879211, "learning_rate": 0.002, "loss": 2.5869, "step": 97660 }, { "epoch": 0.19458035828126993, "grad_norm": 0.17325663566589355, "learning_rate": 0.002, "loss": 2.5757, "step": 97670 }, { "epoch": 0.19460028050490882, "grad_norm": 0.15154194831848145, "learning_rate": 0.002, "loss": 2.5726, "step": 97680 }, { "epoch": 0.19462020272854774, "grad_norm": 0.1579408049583435, "learning_rate": 0.002, "loss": 2.5675, "step": 97690 }, { "epoch": 0.19464012495218666, "grad_norm": 0.16896790266036987, "learning_rate": 0.002, "loss": 2.5644, "step": 97700 }, { "epoch": 0.19466004717582558, "grad_norm": 0.16963514685630798, "learning_rate": 0.002, "loss": 2.5541, "step": 97710 }, { "epoch": 0.1946799693994645, "grad_norm": 0.19889402389526367, "learning_rate": 0.002, "loss": 2.5708, "step": 97720 }, { "epoch": 0.19469989162310342, "grad_norm": 0.14169971644878387, "learning_rate": 0.002, "loss": 2.5599, "step": 97730 }, { "epoch": 0.1947198138467423, "grad_norm": 0.18172864615917206, "learning_rate": 0.002, "loss": 2.5658, "step": 97740 }, { "epoch": 0.19473973607038123, "grad_norm": 0.14650622010231018, "learning_rate": 0.002, "loss": 2.5634, "step": 97750 }, { "epoch": 0.19475965829402014, "grad_norm": 0.20427291095256805, "learning_rate": 0.002, "loss": 2.5624, "step": 97760 }, { "epoch": 0.19477958051765906, "grad_norm": 0.15332238376140594, "learning_rate": 0.002, "loss": 2.5641, "step": 97770 }, { "epoch": 0.19479950274129798, "grad_norm": 0.1754481941461563, "learning_rate": 0.002, "loss": 2.5602, "step": 97780 }, { "epoch": 0.1948194249649369, "grad_norm": 0.19261784851551056, "learning_rate": 0.002, "loss": 2.5695, "step": 97790 }, { "epoch": 0.1948393471885758, "grad_norm": 0.16825775802135468, "learning_rate": 0.002, "loss": 2.5538, "step": 97800 }, { "epoch": 0.1948592694122147, "grad_norm": 0.1457490622997284, "learning_rate": 0.002, "loss": 2.5633, "step": 97810 }, { "epoch": 0.19487919163585363, "grad_norm": 0.15893539786338806, "learning_rate": 0.002, "loss": 2.5515, "step": 97820 }, { "epoch": 0.19489911385949255, "grad_norm": 0.185231015086174, "learning_rate": 0.002, "loss": 2.581, "step": 97830 }, { "epoch": 0.19491903608313146, "grad_norm": 0.16241559386253357, "learning_rate": 0.002, "loss": 2.5507, "step": 97840 }, { "epoch": 0.19493895830677035, "grad_norm": 0.17016394436359406, "learning_rate": 0.002, "loss": 2.5758, "step": 97850 }, { "epoch": 0.19495888053040927, "grad_norm": 0.17584189772605896, "learning_rate": 0.002, "loss": 2.5648, "step": 97860 }, { "epoch": 0.1949788027540482, "grad_norm": 0.15250711143016815, "learning_rate": 0.002, "loss": 2.5782, "step": 97870 }, { "epoch": 0.1949987249776871, "grad_norm": 0.17449729144573212, "learning_rate": 0.002, "loss": 2.5684, "step": 97880 }, { "epoch": 0.19501864720132603, "grad_norm": 0.1943172812461853, "learning_rate": 0.002, "loss": 2.5673, "step": 97890 }, { "epoch": 0.19503856942496495, "grad_norm": 0.18853841722011566, "learning_rate": 0.002, "loss": 2.5561, "step": 97900 }, { "epoch": 0.19505849164860384, "grad_norm": 0.1531028002500534, "learning_rate": 0.002, "loss": 2.5733, "step": 97910 }, { "epoch": 0.19507841387224276, "grad_norm": 0.17094489932060242, "learning_rate": 0.002, "loss": 2.5786, "step": 97920 }, { "epoch": 0.19509833609588167, "grad_norm": 0.16427426040172577, "learning_rate": 0.002, "loss": 2.5531, "step": 97930 }, { "epoch": 0.1951182583195206, "grad_norm": 0.16986210644245148, "learning_rate": 0.002, "loss": 2.5608, "step": 97940 }, { "epoch": 0.1951381805431595, "grad_norm": 0.14826133847236633, "learning_rate": 0.002, "loss": 2.5809, "step": 97950 }, { "epoch": 0.19515810276679843, "grad_norm": 0.2155958116054535, "learning_rate": 0.002, "loss": 2.5622, "step": 97960 }, { "epoch": 0.19517802499043732, "grad_norm": 0.15513202548027039, "learning_rate": 0.002, "loss": 2.5626, "step": 97970 }, { "epoch": 0.19519794721407624, "grad_norm": 0.14575092494487762, "learning_rate": 0.002, "loss": 2.5576, "step": 97980 }, { "epoch": 0.19521786943771516, "grad_norm": 0.17402030527591705, "learning_rate": 0.002, "loss": 2.5724, "step": 97990 }, { "epoch": 0.19523779166135408, "grad_norm": 0.14454691112041473, "learning_rate": 0.002, "loss": 2.5706, "step": 98000 }, { "epoch": 0.195257713884993, "grad_norm": 0.1493140459060669, "learning_rate": 0.002, "loss": 2.5701, "step": 98010 }, { "epoch": 0.1952776361086319, "grad_norm": 0.18438398838043213, "learning_rate": 0.002, "loss": 2.572, "step": 98020 }, { "epoch": 0.1952975583322708, "grad_norm": 0.15397590398788452, "learning_rate": 0.002, "loss": 2.5782, "step": 98030 }, { "epoch": 0.19531748055590972, "grad_norm": 0.16383925080299377, "learning_rate": 0.002, "loss": 2.5587, "step": 98040 }, { "epoch": 0.19533740277954864, "grad_norm": 0.1630524843931198, "learning_rate": 0.002, "loss": 2.5614, "step": 98050 }, { "epoch": 0.19535732500318756, "grad_norm": 0.1635276973247528, "learning_rate": 0.002, "loss": 2.5635, "step": 98060 }, { "epoch": 0.19537724722682648, "grad_norm": 0.1822616308927536, "learning_rate": 0.002, "loss": 2.574, "step": 98070 }, { "epoch": 0.1953971694504654, "grad_norm": 0.14616429805755615, "learning_rate": 0.002, "loss": 2.5793, "step": 98080 }, { "epoch": 0.1954170916741043, "grad_norm": 0.1648424118757248, "learning_rate": 0.002, "loss": 2.5551, "step": 98090 }, { "epoch": 0.1954370138977432, "grad_norm": 0.15651708841323853, "learning_rate": 0.002, "loss": 2.5686, "step": 98100 }, { "epoch": 0.19545693612138212, "grad_norm": 0.17710289359092712, "learning_rate": 0.002, "loss": 2.538, "step": 98110 }, { "epoch": 0.19547685834502104, "grad_norm": 0.17205463349819183, "learning_rate": 0.002, "loss": 2.5789, "step": 98120 }, { "epoch": 0.19549678056865996, "grad_norm": 0.16712726652622223, "learning_rate": 0.002, "loss": 2.5605, "step": 98130 }, { "epoch": 0.19551670279229885, "grad_norm": 0.17092378437519073, "learning_rate": 0.002, "loss": 2.5605, "step": 98140 }, { "epoch": 0.19553662501593777, "grad_norm": 0.16503135859966278, "learning_rate": 0.002, "loss": 2.5576, "step": 98150 }, { "epoch": 0.1955565472395767, "grad_norm": 0.14529070258140564, "learning_rate": 0.002, "loss": 2.5598, "step": 98160 }, { "epoch": 0.1955764694632156, "grad_norm": 0.1464788317680359, "learning_rate": 0.002, "loss": 2.5588, "step": 98170 }, { "epoch": 0.19559639168685453, "grad_norm": 0.15220317244529724, "learning_rate": 0.002, "loss": 2.5658, "step": 98180 }, { "epoch": 0.19561631391049344, "grad_norm": 0.187963604927063, "learning_rate": 0.002, "loss": 2.5768, "step": 98190 }, { "epoch": 0.19563623613413234, "grad_norm": 0.14737503230571747, "learning_rate": 0.002, "loss": 2.5597, "step": 98200 }, { "epoch": 0.19565615835777125, "grad_norm": 0.16412803530693054, "learning_rate": 0.002, "loss": 2.5676, "step": 98210 }, { "epoch": 0.19567608058141017, "grad_norm": 0.18703140318393707, "learning_rate": 0.002, "loss": 2.5648, "step": 98220 }, { "epoch": 0.1956960028050491, "grad_norm": 0.15024983882904053, "learning_rate": 0.002, "loss": 2.5593, "step": 98230 }, { "epoch": 0.195715925028688, "grad_norm": 0.18773522973060608, "learning_rate": 0.002, "loss": 2.5641, "step": 98240 }, { "epoch": 0.19573584725232693, "grad_norm": 0.19747406244277954, "learning_rate": 0.002, "loss": 2.5739, "step": 98250 }, { "epoch": 0.19575576947596582, "grad_norm": 0.1590682417154312, "learning_rate": 0.002, "loss": 2.5592, "step": 98260 }, { "epoch": 0.19577569169960474, "grad_norm": 0.1598537564277649, "learning_rate": 0.002, "loss": 2.5639, "step": 98270 }, { "epoch": 0.19579561392324366, "grad_norm": 0.2083946317434311, "learning_rate": 0.002, "loss": 2.5699, "step": 98280 }, { "epoch": 0.19581553614688257, "grad_norm": 0.14521196484565735, "learning_rate": 0.002, "loss": 2.5568, "step": 98290 }, { "epoch": 0.1958354583705215, "grad_norm": 0.16233201324939728, "learning_rate": 0.002, "loss": 2.5692, "step": 98300 }, { "epoch": 0.1958553805941604, "grad_norm": 0.13732939958572388, "learning_rate": 0.002, "loss": 2.5686, "step": 98310 }, { "epoch": 0.1958753028177993, "grad_norm": 0.1642100065946579, "learning_rate": 0.002, "loss": 2.5559, "step": 98320 }, { "epoch": 0.19589522504143822, "grad_norm": 0.15606676042079926, "learning_rate": 0.002, "loss": 2.5566, "step": 98330 }, { "epoch": 0.19591514726507714, "grad_norm": 0.15736113488674164, "learning_rate": 0.002, "loss": 2.5658, "step": 98340 }, { "epoch": 0.19593506948871606, "grad_norm": 0.2201710045337677, "learning_rate": 0.002, "loss": 2.5531, "step": 98350 }, { "epoch": 0.19595499171235498, "grad_norm": 0.1548076719045639, "learning_rate": 0.002, "loss": 2.569, "step": 98360 }, { "epoch": 0.19597491393599387, "grad_norm": 0.17518998682498932, "learning_rate": 0.002, "loss": 2.5595, "step": 98370 }, { "epoch": 0.19599483615963278, "grad_norm": 0.17156754434108734, "learning_rate": 0.002, "loss": 2.562, "step": 98380 }, { "epoch": 0.1960147583832717, "grad_norm": 0.17339441180229187, "learning_rate": 0.002, "loss": 2.5668, "step": 98390 }, { "epoch": 0.19603468060691062, "grad_norm": 0.16880832612514496, "learning_rate": 0.002, "loss": 2.5764, "step": 98400 }, { "epoch": 0.19605460283054954, "grad_norm": 0.15533331036567688, "learning_rate": 0.002, "loss": 2.5563, "step": 98410 }, { "epoch": 0.19607452505418846, "grad_norm": 0.1654607504606247, "learning_rate": 0.002, "loss": 2.5675, "step": 98420 }, { "epoch": 0.19609444727782735, "grad_norm": 0.19147200882434845, "learning_rate": 0.002, "loss": 2.5695, "step": 98430 }, { "epoch": 0.19611436950146627, "grad_norm": 0.1991884410381317, "learning_rate": 0.002, "loss": 2.5462, "step": 98440 }, { "epoch": 0.1961342917251052, "grad_norm": 0.15370376408100128, "learning_rate": 0.002, "loss": 2.571, "step": 98450 }, { "epoch": 0.1961542139487441, "grad_norm": 0.16617709398269653, "learning_rate": 0.002, "loss": 2.5576, "step": 98460 }, { "epoch": 0.19617413617238302, "grad_norm": 0.1578768789768219, "learning_rate": 0.002, "loss": 2.5577, "step": 98470 }, { "epoch": 0.19619405839602194, "grad_norm": 0.1791037917137146, "learning_rate": 0.002, "loss": 2.5738, "step": 98480 }, { "epoch": 0.19621398061966083, "grad_norm": 0.15227292478084564, "learning_rate": 0.002, "loss": 2.5665, "step": 98490 }, { "epoch": 0.19623390284329975, "grad_norm": 0.16889014840126038, "learning_rate": 0.002, "loss": 2.57, "step": 98500 }, { "epoch": 0.19625382506693867, "grad_norm": 0.1318821758031845, "learning_rate": 0.002, "loss": 2.5577, "step": 98510 }, { "epoch": 0.1962737472905776, "grad_norm": 0.1777392029762268, "learning_rate": 0.002, "loss": 2.5653, "step": 98520 }, { "epoch": 0.1962936695142165, "grad_norm": 0.14185674488544464, "learning_rate": 0.002, "loss": 2.5707, "step": 98530 }, { "epoch": 0.19631359173785543, "grad_norm": 0.16617949306964874, "learning_rate": 0.002, "loss": 2.5561, "step": 98540 }, { "epoch": 0.19633351396149432, "grad_norm": 0.1944490373134613, "learning_rate": 0.002, "loss": 2.588, "step": 98550 }, { "epoch": 0.19635343618513323, "grad_norm": 0.15281404554843903, "learning_rate": 0.002, "loss": 2.5655, "step": 98560 }, { "epoch": 0.19637335840877215, "grad_norm": 0.1641906201839447, "learning_rate": 0.002, "loss": 2.567, "step": 98570 }, { "epoch": 0.19639328063241107, "grad_norm": 0.17864882946014404, "learning_rate": 0.002, "loss": 2.5733, "step": 98580 }, { "epoch": 0.19641320285605, "grad_norm": 0.1842813938856125, "learning_rate": 0.002, "loss": 2.5632, "step": 98590 }, { "epoch": 0.1964331250796889, "grad_norm": 0.14839380979537964, "learning_rate": 0.002, "loss": 2.5572, "step": 98600 }, { "epoch": 0.1964530473033278, "grad_norm": 0.1775231510400772, "learning_rate": 0.002, "loss": 2.583, "step": 98610 }, { "epoch": 0.19647296952696672, "grad_norm": 0.15949559211730957, "learning_rate": 0.002, "loss": 2.5633, "step": 98620 }, { "epoch": 0.19649289175060564, "grad_norm": 0.19134971499443054, "learning_rate": 0.002, "loss": 2.5683, "step": 98630 }, { "epoch": 0.19651281397424455, "grad_norm": 0.14411771297454834, "learning_rate": 0.002, "loss": 2.5401, "step": 98640 }, { "epoch": 0.19653273619788347, "grad_norm": 0.22977392375469208, "learning_rate": 0.002, "loss": 2.5869, "step": 98650 }, { "epoch": 0.19655265842152236, "grad_norm": 0.17695613205432892, "learning_rate": 0.002, "loss": 2.5806, "step": 98660 }, { "epoch": 0.19657258064516128, "grad_norm": 0.1547793745994568, "learning_rate": 0.002, "loss": 2.5787, "step": 98670 }, { "epoch": 0.1965925028688002, "grad_norm": 0.14457182586193085, "learning_rate": 0.002, "loss": 2.5561, "step": 98680 }, { "epoch": 0.19661242509243912, "grad_norm": 0.15485242009162903, "learning_rate": 0.002, "loss": 2.5666, "step": 98690 }, { "epoch": 0.19663234731607804, "grad_norm": 0.155022993683815, "learning_rate": 0.002, "loss": 2.5725, "step": 98700 }, { "epoch": 0.19665226953971696, "grad_norm": 0.2635119557380676, "learning_rate": 0.002, "loss": 2.5652, "step": 98710 }, { "epoch": 0.19667219176335585, "grad_norm": 0.15857328474521637, "learning_rate": 0.002, "loss": 2.5639, "step": 98720 }, { "epoch": 0.19669211398699477, "grad_norm": 0.1657397300004959, "learning_rate": 0.002, "loss": 2.5806, "step": 98730 }, { "epoch": 0.19671203621063368, "grad_norm": 0.14468427002429962, "learning_rate": 0.002, "loss": 2.5622, "step": 98740 }, { "epoch": 0.1967319584342726, "grad_norm": 0.19202418625354767, "learning_rate": 0.002, "loss": 2.573, "step": 98750 }, { "epoch": 0.19675188065791152, "grad_norm": 0.1397162675857544, "learning_rate": 0.002, "loss": 2.5735, "step": 98760 }, { "epoch": 0.19677180288155044, "grad_norm": 0.16017532348632812, "learning_rate": 0.002, "loss": 2.5578, "step": 98770 }, { "epoch": 0.19679172510518933, "grad_norm": 0.1696399450302124, "learning_rate": 0.002, "loss": 2.5921, "step": 98780 }, { "epoch": 0.19681164732882825, "grad_norm": 0.17656663060188293, "learning_rate": 0.002, "loss": 2.5809, "step": 98790 }, { "epoch": 0.19683156955246717, "grad_norm": 0.17277218401432037, "learning_rate": 0.002, "loss": 2.5672, "step": 98800 }, { "epoch": 0.19685149177610609, "grad_norm": 0.16779091954231262, "learning_rate": 0.002, "loss": 2.5539, "step": 98810 }, { "epoch": 0.196871413999745, "grad_norm": 0.15824303030967712, "learning_rate": 0.002, "loss": 2.5818, "step": 98820 }, { "epoch": 0.19689133622338392, "grad_norm": 0.18203161656856537, "learning_rate": 0.002, "loss": 2.5705, "step": 98830 }, { "epoch": 0.1969112584470228, "grad_norm": 0.16158969700336456, "learning_rate": 0.002, "loss": 2.5714, "step": 98840 }, { "epoch": 0.19693118067066173, "grad_norm": 0.16528767347335815, "learning_rate": 0.002, "loss": 2.5572, "step": 98850 }, { "epoch": 0.19695110289430065, "grad_norm": 0.1492433100938797, "learning_rate": 0.002, "loss": 2.5581, "step": 98860 }, { "epoch": 0.19697102511793957, "grad_norm": 0.176987424492836, "learning_rate": 0.002, "loss": 2.5682, "step": 98870 }, { "epoch": 0.1969909473415785, "grad_norm": 0.18491916358470917, "learning_rate": 0.002, "loss": 2.5577, "step": 98880 }, { "epoch": 0.19701086956521738, "grad_norm": 0.17146839201450348, "learning_rate": 0.002, "loss": 2.5867, "step": 98890 }, { "epoch": 0.1970307917888563, "grad_norm": 0.1810244470834732, "learning_rate": 0.002, "loss": 2.5896, "step": 98900 }, { "epoch": 0.19705071401249522, "grad_norm": 0.1630474030971527, "learning_rate": 0.002, "loss": 2.5781, "step": 98910 }, { "epoch": 0.19707063623613413, "grad_norm": 0.1889691799879074, "learning_rate": 0.002, "loss": 2.5637, "step": 98920 }, { "epoch": 0.19709055845977305, "grad_norm": 0.1600913405418396, "learning_rate": 0.002, "loss": 2.5743, "step": 98930 }, { "epoch": 0.19711048068341197, "grad_norm": 0.18205656111240387, "learning_rate": 0.002, "loss": 2.5709, "step": 98940 }, { "epoch": 0.19713040290705086, "grad_norm": 0.15056206285953522, "learning_rate": 0.002, "loss": 2.5598, "step": 98950 }, { "epoch": 0.19715032513068978, "grad_norm": 0.17326441407203674, "learning_rate": 0.002, "loss": 2.5685, "step": 98960 }, { "epoch": 0.1971702473543287, "grad_norm": 0.1865469217300415, "learning_rate": 0.002, "loss": 2.5755, "step": 98970 }, { "epoch": 0.19719016957796762, "grad_norm": 0.16551358997821808, "learning_rate": 0.002, "loss": 2.5702, "step": 98980 }, { "epoch": 0.19721009180160654, "grad_norm": 0.16620822250843048, "learning_rate": 0.002, "loss": 2.5578, "step": 98990 }, { "epoch": 0.19723001402524545, "grad_norm": 0.1619132161140442, "learning_rate": 0.002, "loss": 2.5721, "step": 99000 }, { "epoch": 0.19724993624888434, "grad_norm": 0.16876785457134247, "learning_rate": 0.002, "loss": 2.5745, "step": 99010 }, { "epoch": 0.19726985847252326, "grad_norm": 0.13284003734588623, "learning_rate": 0.002, "loss": 2.5841, "step": 99020 }, { "epoch": 0.19728978069616218, "grad_norm": 0.20095095038414001, "learning_rate": 0.002, "loss": 2.5676, "step": 99030 }, { "epoch": 0.1973097029198011, "grad_norm": 0.1767987608909607, "learning_rate": 0.002, "loss": 2.5658, "step": 99040 }, { "epoch": 0.19732962514344002, "grad_norm": 0.16217349469661713, "learning_rate": 0.002, "loss": 2.5741, "step": 99050 }, { "epoch": 0.19734954736707894, "grad_norm": 0.1521153748035431, "learning_rate": 0.002, "loss": 2.5654, "step": 99060 }, { "epoch": 0.19736946959071783, "grad_norm": 0.23425473272800446, "learning_rate": 0.002, "loss": 2.5554, "step": 99070 }, { "epoch": 0.19738939181435675, "grad_norm": 0.13500159978866577, "learning_rate": 0.002, "loss": 2.5679, "step": 99080 }, { "epoch": 0.19740931403799566, "grad_norm": 0.17781077325344086, "learning_rate": 0.002, "loss": 2.5665, "step": 99090 }, { "epoch": 0.19742923626163458, "grad_norm": 0.16732068359851837, "learning_rate": 0.002, "loss": 2.5612, "step": 99100 }, { "epoch": 0.1974491584852735, "grad_norm": 0.17692509293556213, "learning_rate": 0.002, "loss": 2.5764, "step": 99110 }, { "epoch": 0.1974690807089124, "grad_norm": 0.15902426838874817, "learning_rate": 0.002, "loss": 2.5626, "step": 99120 }, { "epoch": 0.1974890029325513, "grad_norm": 0.15963010489940643, "learning_rate": 0.002, "loss": 2.5718, "step": 99130 }, { "epoch": 0.19750892515619023, "grad_norm": 0.184161975979805, "learning_rate": 0.002, "loss": 2.56, "step": 99140 }, { "epoch": 0.19752884737982915, "grad_norm": 0.21425104141235352, "learning_rate": 0.002, "loss": 2.5628, "step": 99150 }, { "epoch": 0.19754876960346807, "grad_norm": 0.1673491895198822, "learning_rate": 0.002, "loss": 2.5717, "step": 99160 }, { "epoch": 0.19756869182710698, "grad_norm": 0.17575708031654358, "learning_rate": 0.002, "loss": 2.5653, "step": 99170 }, { "epoch": 0.19758861405074588, "grad_norm": 0.16280639171600342, "learning_rate": 0.002, "loss": 2.5576, "step": 99180 }, { "epoch": 0.1976085362743848, "grad_norm": 0.1764625906944275, "learning_rate": 0.002, "loss": 2.5787, "step": 99190 }, { "epoch": 0.1976284584980237, "grad_norm": 0.15759453177452087, "learning_rate": 0.002, "loss": 2.5562, "step": 99200 }, { "epoch": 0.19764838072166263, "grad_norm": 0.1478487253189087, "learning_rate": 0.002, "loss": 2.5797, "step": 99210 }, { "epoch": 0.19766830294530155, "grad_norm": 0.16111335158348083, "learning_rate": 0.002, "loss": 2.5666, "step": 99220 }, { "epoch": 0.19768822516894047, "grad_norm": 0.18125119805335999, "learning_rate": 0.002, "loss": 2.5668, "step": 99230 }, { "epoch": 0.19770814739257936, "grad_norm": 0.18374913930892944, "learning_rate": 0.002, "loss": 2.567, "step": 99240 }, { "epoch": 0.19772806961621828, "grad_norm": 0.18341180682182312, "learning_rate": 0.002, "loss": 2.5542, "step": 99250 }, { "epoch": 0.1977479918398572, "grad_norm": 0.1740456372499466, "learning_rate": 0.002, "loss": 2.5737, "step": 99260 }, { "epoch": 0.19776791406349611, "grad_norm": 0.15457184612751007, "learning_rate": 0.002, "loss": 2.5653, "step": 99270 }, { "epoch": 0.19778783628713503, "grad_norm": 0.16581743955612183, "learning_rate": 0.002, "loss": 2.5795, "step": 99280 }, { "epoch": 0.19780775851077395, "grad_norm": 0.1584175080060959, "learning_rate": 0.002, "loss": 2.565, "step": 99290 }, { "epoch": 0.19782768073441284, "grad_norm": 0.17620477080345154, "learning_rate": 0.002, "loss": 2.568, "step": 99300 }, { "epoch": 0.19784760295805176, "grad_norm": 0.17601604759693146, "learning_rate": 0.002, "loss": 2.5532, "step": 99310 }, { "epoch": 0.19786752518169068, "grad_norm": 0.1559849977493286, "learning_rate": 0.002, "loss": 2.5811, "step": 99320 }, { "epoch": 0.1978874474053296, "grad_norm": 0.1719844937324524, "learning_rate": 0.002, "loss": 2.5741, "step": 99330 }, { "epoch": 0.19790736962896852, "grad_norm": 0.16895657777786255, "learning_rate": 0.002, "loss": 2.5744, "step": 99340 }, { "epoch": 0.19792729185260743, "grad_norm": 0.14726191759109497, "learning_rate": 0.002, "loss": 2.5685, "step": 99350 }, { "epoch": 0.19794721407624633, "grad_norm": 0.1701892763376236, "learning_rate": 0.002, "loss": 2.5548, "step": 99360 }, { "epoch": 0.19796713629988524, "grad_norm": 0.19597052037715912, "learning_rate": 0.002, "loss": 2.5717, "step": 99370 }, { "epoch": 0.19798705852352416, "grad_norm": 0.14338552951812744, "learning_rate": 0.002, "loss": 2.5694, "step": 99380 }, { "epoch": 0.19800698074716308, "grad_norm": 0.14351193606853485, "learning_rate": 0.002, "loss": 2.5533, "step": 99390 }, { "epoch": 0.198026902970802, "grad_norm": 0.16542695462703705, "learning_rate": 0.002, "loss": 2.5653, "step": 99400 }, { "epoch": 0.1980468251944409, "grad_norm": 0.18015004694461823, "learning_rate": 0.002, "loss": 2.5672, "step": 99410 }, { "epoch": 0.1980667474180798, "grad_norm": 0.20124168694019318, "learning_rate": 0.002, "loss": 2.5685, "step": 99420 }, { "epoch": 0.19808666964171873, "grad_norm": 0.17341533303260803, "learning_rate": 0.002, "loss": 2.5721, "step": 99430 }, { "epoch": 0.19810659186535765, "grad_norm": 0.14889821410179138, "learning_rate": 0.002, "loss": 2.5681, "step": 99440 }, { "epoch": 0.19812651408899656, "grad_norm": 0.20903250575065613, "learning_rate": 0.002, "loss": 2.5744, "step": 99450 }, { "epoch": 0.19814643631263548, "grad_norm": 0.17350450158119202, "learning_rate": 0.002, "loss": 2.5738, "step": 99460 }, { "epoch": 0.19816635853627437, "grad_norm": 0.1575455665588379, "learning_rate": 0.002, "loss": 2.5517, "step": 99470 }, { "epoch": 0.1981862807599133, "grad_norm": 0.1997537612915039, "learning_rate": 0.002, "loss": 2.5743, "step": 99480 }, { "epoch": 0.1982062029835522, "grad_norm": 0.15442076325416565, "learning_rate": 0.002, "loss": 2.5688, "step": 99490 }, { "epoch": 0.19822612520719113, "grad_norm": 0.17199398577213287, "learning_rate": 0.002, "loss": 2.5487, "step": 99500 }, { "epoch": 0.19824604743083005, "grad_norm": 0.2097899168729782, "learning_rate": 0.002, "loss": 2.5647, "step": 99510 }, { "epoch": 0.19826596965446897, "grad_norm": 0.18999582529067993, "learning_rate": 0.002, "loss": 2.5755, "step": 99520 }, { "epoch": 0.19828589187810786, "grad_norm": 0.15329867601394653, "learning_rate": 0.002, "loss": 2.5662, "step": 99530 }, { "epoch": 0.19830581410174677, "grad_norm": 0.19167537987232208, "learning_rate": 0.002, "loss": 2.5611, "step": 99540 }, { "epoch": 0.1983257363253857, "grad_norm": 0.16513627767562866, "learning_rate": 0.002, "loss": 2.5687, "step": 99550 }, { "epoch": 0.1983456585490246, "grad_norm": 0.13321349024772644, "learning_rate": 0.002, "loss": 2.576, "step": 99560 }, { "epoch": 0.19836558077266353, "grad_norm": 0.17126746475696564, "learning_rate": 0.002, "loss": 2.5536, "step": 99570 }, { "epoch": 0.19838550299630245, "grad_norm": 0.1911681741476059, "learning_rate": 0.002, "loss": 2.566, "step": 99580 }, { "epoch": 0.19840542521994134, "grad_norm": 0.15991730988025665, "learning_rate": 0.002, "loss": 2.5684, "step": 99590 }, { "epoch": 0.19842534744358026, "grad_norm": 0.16605684161186218, "learning_rate": 0.002, "loss": 2.5635, "step": 99600 }, { "epoch": 0.19844526966721918, "grad_norm": 0.1857091635465622, "learning_rate": 0.002, "loss": 2.5706, "step": 99610 }, { "epoch": 0.1984651918908581, "grad_norm": 0.1498018354177475, "learning_rate": 0.002, "loss": 2.5823, "step": 99620 }, { "epoch": 0.198485114114497, "grad_norm": 0.156010702252388, "learning_rate": 0.002, "loss": 2.5782, "step": 99630 }, { "epoch": 0.1985050363381359, "grad_norm": 0.2363409548997879, "learning_rate": 0.002, "loss": 2.5659, "step": 99640 }, { "epoch": 0.19852495856177482, "grad_norm": 0.16100408136844635, "learning_rate": 0.002, "loss": 2.5722, "step": 99650 }, { "epoch": 0.19854488078541374, "grad_norm": 0.1739356964826584, "learning_rate": 0.002, "loss": 2.5646, "step": 99660 }, { "epoch": 0.19856480300905266, "grad_norm": 0.1528719961643219, "learning_rate": 0.002, "loss": 2.5711, "step": 99670 }, { "epoch": 0.19858472523269158, "grad_norm": 0.15911799669265747, "learning_rate": 0.002, "loss": 2.5669, "step": 99680 }, { "epoch": 0.1986046474563305, "grad_norm": 0.16072946786880493, "learning_rate": 0.002, "loss": 2.5688, "step": 99690 }, { "epoch": 0.1986245696799694, "grad_norm": 0.16270111501216888, "learning_rate": 0.002, "loss": 2.5724, "step": 99700 }, { "epoch": 0.1986444919036083, "grad_norm": 0.14762680232524872, "learning_rate": 0.002, "loss": 2.5564, "step": 99710 }, { "epoch": 0.19866441412724722, "grad_norm": 0.1663968861103058, "learning_rate": 0.002, "loss": 2.5716, "step": 99720 }, { "epoch": 0.19868433635088614, "grad_norm": 0.17532891035079956, "learning_rate": 0.002, "loss": 2.5732, "step": 99730 }, { "epoch": 0.19870425857452506, "grad_norm": 0.22680863738059998, "learning_rate": 0.002, "loss": 2.5621, "step": 99740 }, { "epoch": 0.19872418079816398, "grad_norm": 0.16304844617843628, "learning_rate": 0.002, "loss": 2.5597, "step": 99750 }, { "epoch": 0.19874410302180287, "grad_norm": 0.13897475600242615, "learning_rate": 0.002, "loss": 2.5567, "step": 99760 }, { "epoch": 0.1987640252454418, "grad_norm": 0.17754876613616943, "learning_rate": 0.002, "loss": 2.567, "step": 99770 }, { "epoch": 0.1987839474690807, "grad_norm": 0.16831113398075104, "learning_rate": 0.002, "loss": 2.5739, "step": 99780 }, { "epoch": 0.19880386969271963, "grad_norm": 0.1705862283706665, "learning_rate": 0.002, "loss": 2.5603, "step": 99790 }, { "epoch": 0.19882379191635854, "grad_norm": 0.19521464407444, "learning_rate": 0.002, "loss": 2.5722, "step": 99800 }, { "epoch": 0.19884371413999746, "grad_norm": 0.16515319049358368, "learning_rate": 0.002, "loss": 2.5753, "step": 99810 }, { "epoch": 0.19886363636363635, "grad_norm": 0.16250723600387573, "learning_rate": 0.002, "loss": 2.5654, "step": 99820 }, { "epoch": 0.19888355858727527, "grad_norm": 0.19827471673488617, "learning_rate": 0.002, "loss": 2.5844, "step": 99830 }, { "epoch": 0.1989034808109142, "grad_norm": 0.15708981454372406, "learning_rate": 0.002, "loss": 2.5797, "step": 99840 }, { "epoch": 0.1989234030345531, "grad_norm": 0.13347576558589935, "learning_rate": 0.002, "loss": 2.5702, "step": 99850 }, { "epoch": 0.19894332525819203, "grad_norm": 0.1550813466310501, "learning_rate": 0.002, "loss": 2.5579, "step": 99860 }, { "epoch": 0.19896324748183092, "grad_norm": 0.196205735206604, "learning_rate": 0.002, "loss": 2.5693, "step": 99870 }, { "epoch": 0.19898316970546984, "grad_norm": 0.16375844180583954, "learning_rate": 0.002, "loss": 2.5815, "step": 99880 }, { "epoch": 0.19900309192910876, "grad_norm": 0.20712387561798096, "learning_rate": 0.002, "loss": 2.5618, "step": 99890 }, { "epoch": 0.19902301415274767, "grad_norm": 0.1555485725402832, "learning_rate": 0.002, "loss": 2.5672, "step": 99900 }, { "epoch": 0.1990429363763866, "grad_norm": 0.18789738416671753, "learning_rate": 0.002, "loss": 2.5805, "step": 99910 }, { "epoch": 0.1990628586000255, "grad_norm": 0.1595017910003662, "learning_rate": 0.002, "loss": 2.557, "step": 99920 }, { "epoch": 0.1990827808236644, "grad_norm": 0.17789782583713531, "learning_rate": 0.002, "loss": 2.556, "step": 99930 }, { "epoch": 0.19910270304730332, "grad_norm": 0.1672588586807251, "learning_rate": 0.002, "loss": 2.5669, "step": 99940 }, { "epoch": 0.19912262527094224, "grad_norm": 0.18678447604179382, "learning_rate": 0.002, "loss": 2.5825, "step": 99950 }, { "epoch": 0.19914254749458116, "grad_norm": 0.17717893421649933, "learning_rate": 0.002, "loss": 2.5699, "step": 99960 }, { "epoch": 0.19916246971822008, "grad_norm": 0.15046295523643494, "learning_rate": 0.002, "loss": 2.5669, "step": 99970 }, { "epoch": 0.199182391941859, "grad_norm": 0.164577454328537, "learning_rate": 0.002, "loss": 2.5685, "step": 99980 }, { "epoch": 0.19920231416549788, "grad_norm": 0.18528077006340027, "learning_rate": 0.002, "loss": 2.5842, "step": 99990 }, { "epoch": 0.1992222363891368, "grad_norm": 0.14388374984264374, "learning_rate": 0.002, "loss": 2.5694, "step": 100000 }, { "epoch": 0.19924215861277572, "grad_norm": 0.20154041051864624, "learning_rate": 0.002, "loss": 2.5703, "step": 100010 }, { "epoch": 0.19926208083641464, "grad_norm": 0.14156025648117065, "learning_rate": 0.002, "loss": 2.5572, "step": 100020 }, { "epoch": 0.19928200306005356, "grad_norm": 0.17060278356075287, "learning_rate": 0.002, "loss": 2.5903, "step": 100030 }, { "epoch": 0.19930192528369248, "grad_norm": 0.21102823317050934, "learning_rate": 0.002, "loss": 2.5537, "step": 100040 }, { "epoch": 0.19932184750733137, "grad_norm": 0.12940892577171326, "learning_rate": 0.002, "loss": 2.5673, "step": 100050 }, { "epoch": 0.1993417697309703, "grad_norm": 0.18917852640151978, "learning_rate": 0.002, "loss": 2.5603, "step": 100060 }, { "epoch": 0.1993616919546092, "grad_norm": 0.191401407122612, "learning_rate": 0.002, "loss": 2.5788, "step": 100070 }, { "epoch": 0.19938161417824812, "grad_norm": 0.14970187842845917, "learning_rate": 0.002, "loss": 2.5654, "step": 100080 }, { "epoch": 0.19940153640188704, "grad_norm": 0.17008769512176514, "learning_rate": 0.002, "loss": 2.5697, "step": 100090 }, { "epoch": 0.19942145862552596, "grad_norm": 0.1933782994747162, "learning_rate": 0.002, "loss": 2.5689, "step": 100100 }, { "epoch": 0.19944138084916485, "grad_norm": 0.177371084690094, "learning_rate": 0.002, "loss": 2.5571, "step": 100110 }, { "epoch": 0.19946130307280377, "grad_norm": 0.19034592807292938, "learning_rate": 0.002, "loss": 2.5728, "step": 100120 }, { "epoch": 0.1994812252964427, "grad_norm": 0.20155274868011475, "learning_rate": 0.002, "loss": 2.5668, "step": 100130 }, { "epoch": 0.1995011475200816, "grad_norm": 0.15205521881580353, "learning_rate": 0.002, "loss": 2.5555, "step": 100140 }, { "epoch": 0.19952106974372052, "grad_norm": 0.19851188361644745, "learning_rate": 0.002, "loss": 2.5515, "step": 100150 }, { "epoch": 0.19954099196735942, "grad_norm": 0.1383773237466812, "learning_rate": 0.002, "loss": 2.5849, "step": 100160 }, { "epoch": 0.19956091419099833, "grad_norm": 0.14737039804458618, "learning_rate": 0.002, "loss": 2.5649, "step": 100170 }, { "epoch": 0.19958083641463725, "grad_norm": 0.21267051994800568, "learning_rate": 0.002, "loss": 2.5661, "step": 100180 }, { "epoch": 0.19960075863827617, "grad_norm": 0.14634452760219574, "learning_rate": 0.002, "loss": 2.571, "step": 100190 }, { "epoch": 0.1996206808619151, "grad_norm": 0.15516121685504913, "learning_rate": 0.002, "loss": 2.5558, "step": 100200 }, { "epoch": 0.199640603085554, "grad_norm": 0.19210325181484222, "learning_rate": 0.002, "loss": 2.5767, "step": 100210 }, { "epoch": 0.1996605253091929, "grad_norm": 0.19914647936820984, "learning_rate": 0.002, "loss": 2.5696, "step": 100220 }, { "epoch": 0.19968044753283182, "grad_norm": 0.15618963539600372, "learning_rate": 0.002, "loss": 2.5666, "step": 100230 }, { "epoch": 0.19970036975647074, "grad_norm": 0.15831393003463745, "learning_rate": 0.002, "loss": 2.5603, "step": 100240 }, { "epoch": 0.19972029198010965, "grad_norm": 0.1712302565574646, "learning_rate": 0.002, "loss": 2.5587, "step": 100250 }, { "epoch": 0.19974021420374857, "grad_norm": 0.14509110152721405, "learning_rate": 0.002, "loss": 2.5537, "step": 100260 }, { "epoch": 0.1997601364273875, "grad_norm": 0.19437669217586517, "learning_rate": 0.002, "loss": 2.564, "step": 100270 }, { "epoch": 0.19978005865102638, "grad_norm": 0.15678909420967102, "learning_rate": 0.002, "loss": 2.5768, "step": 100280 }, { "epoch": 0.1997999808746653, "grad_norm": 0.14235983788967133, "learning_rate": 0.002, "loss": 2.5451, "step": 100290 }, { "epoch": 0.19981990309830422, "grad_norm": 0.17094603180885315, "learning_rate": 0.002, "loss": 2.5683, "step": 100300 }, { "epoch": 0.19983982532194314, "grad_norm": 0.1697799414396286, "learning_rate": 0.002, "loss": 2.5547, "step": 100310 }, { "epoch": 0.19985974754558206, "grad_norm": 0.17259721457958221, "learning_rate": 0.002, "loss": 2.5727, "step": 100320 }, { "epoch": 0.19987966976922097, "grad_norm": 0.18047818541526794, "learning_rate": 0.002, "loss": 2.5732, "step": 100330 }, { "epoch": 0.19989959199285987, "grad_norm": 0.1585705578327179, "learning_rate": 0.002, "loss": 2.5762, "step": 100340 }, { "epoch": 0.19991951421649878, "grad_norm": 0.18439863622188568, "learning_rate": 0.002, "loss": 2.5633, "step": 100350 }, { "epoch": 0.1999394364401377, "grad_norm": 0.1611270010471344, "learning_rate": 0.002, "loss": 2.5633, "step": 100360 }, { "epoch": 0.19995935866377662, "grad_norm": 0.1978951245546341, "learning_rate": 0.002, "loss": 2.5514, "step": 100370 }, { "epoch": 0.19997928088741554, "grad_norm": 0.17824311554431915, "learning_rate": 0.002, "loss": 2.5552, "step": 100380 }, { "epoch": 0.19999920311105443, "grad_norm": 0.1366884857416153, "learning_rate": 0.002, "loss": 2.5731, "step": 100390 }, { "epoch": 0.20001912533469335, "grad_norm": 0.15955641865730286, "learning_rate": 0.002, "loss": 2.556, "step": 100400 }, { "epoch": 0.20003904755833227, "grad_norm": 0.16953535377979279, "learning_rate": 0.002, "loss": 2.5649, "step": 100410 }, { "epoch": 0.20005896978197119, "grad_norm": 0.1662101000547409, "learning_rate": 0.002, "loss": 2.551, "step": 100420 }, { "epoch": 0.2000788920056101, "grad_norm": 0.18204353749752045, "learning_rate": 0.002, "loss": 2.5768, "step": 100430 }, { "epoch": 0.20009881422924902, "grad_norm": 0.17495712637901306, "learning_rate": 0.002, "loss": 2.5677, "step": 100440 }, { "epoch": 0.2001187364528879, "grad_norm": 0.16370609402656555, "learning_rate": 0.002, "loss": 2.5735, "step": 100450 }, { "epoch": 0.20013865867652683, "grad_norm": 0.15632542967796326, "learning_rate": 0.002, "loss": 2.5766, "step": 100460 }, { "epoch": 0.20015858090016575, "grad_norm": 0.17087619006633759, "learning_rate": 0.002, "loss": 2.5801, "step": 100470 }, { "epoch": 0.20017850312380467, "grad_norm": 0.13910911977291107, "learning_rate": 0.002, "loss": 2.5785, "step": 100480 }, { "epoch": 0.2001984253474436, "grad_norm": 0.16530200839042664, "learning_rate": 0.002, "loss": 2.5563, "step": 100490 }, { "epoch": 0.2002183475710825, "grad_norm": 0.14271612465381622, "learning_rate": 0.002, "loss": 2.5687, "step": 100500 }, { "epoch": 0.2002382697947214, "grad_norm": 0.14719204604625702, "learning_rate": 0.002, "loss": 2.5713, "step": 100510 }, { "epoch": 0.20025819201836031, "grad_norm": 0.17915324866771698, "learning_rate": 0.002, "loss": 2.5756, "step": 100520 }, { "epoch": 0.20027811424199923, "grad_norm": 0.15682771801948547, "learning_rate": 0.002, "loss": 2.5646, "step": 100530 }, { "epoch": 0.20029803646563815, "grad_norm": 0.1633162945508957, "learning_rate": 0.002, "loss": 2.5754, "step": 100540 }, { "epoch": 0.20031795868927707, "grad_norm": 0.16041573882102966, "learning_rate": 0.002, "loss": 2.5776, "step": 100550 }, { "epoch": 0.200337880912916, "grad_norm": 0.20592376589775085, "learning_rate": 0.002, "loss": 2.5773, "step": 100560 }, { "epoch": 0.20035780313655488, "grad_norm": 0.15592482686042786, "learning_rate": 0.002, "loss": 2.5739, "step": 100570 }, { "epoch": 0.2003777253601938, "grad_norm": 0.18250076472759247, "learning_rate": 0.002, "loss": 2.5577, "step": 100580 }, { "epoch": 0.20039764758383272, "grad_norm": 0.13683456182479858, "learning_rate": 0.002, "loss": 2.5648, "step": 100590 }, { "epoch": 0.20041756980747163, "grad_norm": 0.19639183580875397, "learning_rate": 0.002, "loss": 2.5612, "step": 100600 }, { "epoch": 0.20043749203111055, "grad_norm": 0.15242622792720795, "learning_rate": 0.002, "loss": 2.5479, "step": 100610 }, { "epoch": 0.20045741425474947, "grad_norm": 0.17056742310523987, "learning_rate": 0.002, "loss": 2.5846, "step": 100620 }, { "epoch": 0.20047733647838836, "grad_norm": 0.148423969745636, "learning_rate": 0.002, "loss": 2.5699, "step": 100630 }, { "epoch": 0.20049725870202728, "grad_norm": 0.15801529586315155, "learning_rate": 0.002, "loss": 2.5588, "step": 100640 }, { "epoch": 0.2005171809256662, "grad_norm": 0.15555572509765625, "learning_rate": 0.002, "loss": 2.5703, "step": 100650 }, { "epoch": 0.20053710314930512, "grad_norm": 0.1796732097864151, "learning_rate": 0.002, "loss": 2.5549, "step": 100660 }, { "epoch": 0.20055702537294404, "grad_norm": 0.1686195284128189, "learning_rate": 0.002, "loss": 2.5611, "step": 100670 }, { "epoch": 0.20057694759658293, "grad_norm": 0.1551171988248825, "learning_rate": 0.002, "loss": 2.5602, "step": 100680 }, { "epoch": 0.20059686982022185, "grad_norm": 0.14840927720069885, "learning_rate": 0.002, "loss": 2.5715, "step": 100690 }, { "epoch": 0.20061679204386076, "grad_norm": 0.15461593866348267, "learning_rate": 0.002, "loss": 2.5541, "step": 100700 }, { "epoch": 0.20063671426749968, "grad_norm": 0.16758674383163452, "learning_rate": 0.002, "loss": 2.5589, "step": 100710 }, { "epoch": 0.2006566364911386, "grad_norm": 0.17556200921535492, "learning_rate": 0.002, "loss": 2.5598, "step": 100720 }, { "epoch": 0.20067655871477752, "grad_norm": 0.1726893037557602, "learning_rate": 0.002, "loss": 2.5657, "step": 100730 }, { "epoch": 0.2006964809384164, "grad_norm": 0.14930084347724915, "learning_rate": 0.002, "loss": 2.5668, "step": 100740 }, { "epoch": 0.20071640316205533, "grad_norm": 0.16404277086257935, "learning_rate": 0.002, "loss": 2.5673, "step": 100750 }, { "epoch": 0.20073632538569425, "grad_norm": 0.18810072541236877, "learning_rate": 0.002, "loss": 2.554, "step": 100760 }, { "epoch": 0.20075624760933317, "grad_norm": 0.17957502603530884, "learning_rate": 0.002, "loss": 2.5703, "step": 100770 }, { "epoch": 0.20077616983297208, "grad_norm": 0.14896906912326813, "learning_rate": 0.002, "loss": 2.5574, "step": 100780 }, { "epoch": 0.200796092056611, "grad_norm": 0.16153892874717712, "learning_rate": 0.002, "loss": 2.5647, "step": 100790 }, { "epoch": 0.2008160142802499, "grad_norm": 0.1649348884820938, "learning_rate": 0.002, "loss": 2.5601, "step": 100800 }, { "epoch": 0.2008359365038888, "grad_norm": 0.207698255777359, "learning_rate": 0.002, "loss": 2.5552, "step": 100810 }, { "epoch": 0.20085585872752773, "grad_norm": 0.16147859394550323, "learning_rate": 0.002, "loss": 2.5671, "step": 100820 }, { "epoch": 0.20087578095116665, "grad_norm": 0.15832197666168213, "learning_rate": 0.002, "loss": 2.5584, "step": 100830 }, { "epoch": 0.20089570317480557, "grad_norm": 0.14592070877552032, "learning_rate": 0.002, "loss": 2.5529, "step": 100840 }, { "epoch": 0.2009156253984445, "grad_norm": 0.17019601166248322, "learning_rate": 0.002, "loss": 2.5564, "step": 100850 }, { "epoch": 0.20093554762208338, "grad_norm": 0.171650230884552, "learning_rate": 0.002, "loss": 2.5599, "step": 100860 }, { "epoch": 0.2009554698457223, "grad_norm": 0.15932030975818634, "learning_rate": 0.002, "loss": 2.5728, "step": 100870 }, { "epoch": 0.20097539206936121, "grad_norm": 0.16769374907016754, "learning_rate": 0.002, "loss": 2.5719, "step": 100880 }, { "epoch": 0.20099531429300013, "grad_norm": 0.17850708961486816, "learning_rate": 0.002, "loss": 2.5629, "step": 100890 }, { "epoch": 0.20101523651663905, "grad_norm": 0.16390345990657806, "learning_rate": 0.002, "loss": 2.5663, "step": 100900 }, { "epoch": 0.20103515874027794, "grad_norm": 0.17878268659114838, "learning_rate": 0.002, "loss": 2.5686, "step": 100910 }, { "epoch": 0.20105508096391686, "grad_norm": 0.16066643595695496, "learning_rate": 0.002, "loss": 2.5754, "step": 100920 }, { "epoch": 0.20107500318755578, "grad_norm": 0.18438585102558136, "learning_rate": 0.002, "loss": 2.5819, "step": 100930 }, { "epoch": 0.2010949254111947, "grad_norm": 0.15297795832157135, "learning_rate": 0.002, "loss": 2.5803, "step": 100940 }, { "epoch": 0.20111484763483362, "grad_norm": 0.1819915920495987, "learning_rate": 0.002, "loss": 2.5679, "step": 100950 }, { "epoch": 0.20113476985847253, "grad_norm": 0.13936538994312286, "learning_rate": 0.002, "loss": 2.5593, "step": 100960 }, { "epoch": 0.20115469208211142, "grad_norm": 0.17105858027935028, "learning_rate": 0.002, "loss": 2.5665, "step": 100970 }, { "epoch": 0.20117461430575034, "grad_norm": 0.16819162666797638, "learning_rate": 0.002, "loss": 2.5692, "step": 100980 }, { "epoch": 0.20119453652938926, "grad_norm": 0.17176347970962524, "learning_rate": 0.002, "loss": 2.5657, "step": 100990 }, { "epoch": 0.20121445875302818, "grad_norm": 0.17631159722805023, "learning_rate": 0.002, "loss": 2.5731, "step": 101000 }, { "epoch": 0.2012343809766671, "grad_norm": 0.14530692994594574, "learning_rate": 0.002, "loss": 2.5663, "step": 101010 }, { "epoch": 0.20125430320030602, "grad_norm": 0.2078896313905716, "learning_rate": 0.002, "loss": 2.5658, "step": 101020 }, { "epoch": 0.2012742254239449, "grad_norm": 0.1548277586698532, "learning_rate": 0.002, "loss": 2.5637, "step": 101030 }, { "epoch": 0.20129414764758383, "grad_norm": 0.15185581147670746, "learning_rate": 0.002, "loss": 2.5732, "step": 101040 }, { "epoch": 0.20131406987122274, "grad_norm": 0.15354380011558533, "learning_rate": 0.002, "loss": 2.549, "step": 101050 }, { "epoch": 0.20133399209486166, "grad_norm": 0.15338627994060516, "learning_rate": 0.002, "loss": 2.5599, "step": 101060 }, { "epoch": 0.20135391431850058, "grad_norm": 0.16896456480026245, "learning_rate": 0.002, "loss": 2.5804, "step": 101070 }, { "epoch": 0.2013738365421395, "grad_norm": 0.16919617354869843, "learning_rate": 0.002, "loss": 2.5566, "step": 101080 }, { "epoch": 0.2013937587657784, "grad_norm": 0.16834984719753265, "learning_rate": 0.002, "loss": 2.5492, "step": 101090 }, { "epoch": 0.2014136809894173, "grad_norm": 0.17311102151870728, "learning_rate": 0.002, "loss": 2.5583, "step": 101100 }, { "epoch": 0.20143360321305623, "grad_norm": 0.20222865045070648, "learning_rate": 0.002, "loss": 2.5757, "step": 101110 }, { "epoch": 0.20145352543669515, "grad_norm": 0.15852972865104675, "learning_rate": 0.002, "loss": 2.5639, "step": 101120 }, { "epoch": 0.20147344766033407, "grad_norm": 0.19989895820617676, "learning_rate": 0.002, "loss": 2.5652, "step": 101130 }, { "epoch": 0.20149336988397296, "grad_norm": 0.15401363372802734, "learning_rate": 0.002, "loss": 2.5839, "step": 101140 }, { "epoch": 0.20151329210761187, "grad_norm": 0.18519428372383118, "learning_rate": 0.002, "loss": 2.5597, "step": 101150 }, { "epoch": 0.2015332143312508, "grad_norm": 0.15920481085777283, "learning_rate": 0.002, "loss": 2.5603, "step": 101160 }, { "epoch": 0.2015531365548897, "grad_norm": 0.15956585109233856, "learning_rate": 0.002, "loss": 2.5622, "step": 101170 }, { "epoch": 0.20157305877852863, "grad_norm": 0.16046570241451263, "learning_rate": 0.002, "loss": 2.5645, "step": 101180 }, { "epoch": 0.20159298100216755, "grad_norm": 0.1493653804063797, "learning_rate": 0.002, "loss": 2.5726, "step": 101190 }, { "epoch": 0.20161290322580644, "grad_norm": 0.1998097449541092, "learning_rate": 0.002, "loss": 2.5807, "step": 101200 }, { "epoch": 0.20163282544944536, "grad_norm": 0.17166103422641754, "learning_rate": 0.002, "loss": 2.5873, "step": 101210 }, { "epoch": 0.20165274767308428, "grad_norm": 0.17122696340084076, "learning_rate": 0.002, "loss": 2.545, "step": 101220 }, { "epoch": 0.2016726698967232, "grad_norm": 0.2252981960773468, "learning_rate": 0.002, "loss": 2.5707, "step": 101230 }, { "epoch": 0.2016925921203621, "grad_norm": 0.15009035170078278, "learning_rate": 0.002, "loss": 2.59, "step": 101240 }, { "epoch": 0.20171251434400103, "grad_norm": 0.1755238026380539, "learning_rate": 0.002, "loss": 2.57, "step": 101250 }, { "epoch": 0.20173243656763992, "grad_norm": 0.1974908709526062, "learning_rate": 0.002, "loss": 2.5689, "step": 101260 }, { "epoch": 0.20175235879127884, "grad_norm": 0.15814489126205444, "learning_rate": 0.002, "loss": 2.5707, "step": 101270 }, { "epoch": 0.20177228101491776, "grad_norm": 0.15362019836902618, "learning_rate": 0.002, "loss": 2.5666, "step": 101280 }, { "epoch": 0.20179220323855668, "grad_norm": 0.18853576481342316, "learning_rate": 0.002, "loss": 2.579, "step": 101290 }, { "epoch": 0.2018121254621956, "grad_norm": 0.16090354323387146, "learning_rate": 0.002, "loss": 2.5459, "step": 101300 }, { "epoch": 0.20183204768583451, "grad_norm": 0.17258253693580627, "learning_rate": 0.002, "loss": 2.5648, "step": 101310 }, { "epoch": 0.2018519699094734, "grad_norm": 0.15424984693527222, "learning_rate": 0.002, "loss": 2.5593, "step": 101320 }, { "epoch": 0.20187189213311232, "grad_norm": 0.1679547280073166, "learning_rate": 0.002, "loss": 2.5643, "step": 101330 }, { "epoch": 0.20189181435675124, "grad_norm": 0.2257753312587738, "learning_rate": 0.002, "loss": 2.5741, "step": 101340 }, { "epoch": 0.20191173658039016, "grad_norm": 0.13472475111484528, "learning_rate": 0.002, "loss": 2.5839, "step": 101350 }, { "epoch": 0.20193165880402908, "grad_norm": 0.14885762333869934, "learning_rate": 0.002, "loss": 2.5841, "step": 101360 }, { "epoch": 0.201951581027668, "grad_norm": 0.16095885634422302, "learning_rate": 0.002, "loss": 2.568, "step": 101370 }, { "epoch": 0.2019715032513069, "grad_norm": 0.16697794198989868, "learning_rate": 0.002, "loss": 2.5724, "step": 101380 }, { "epoch": 0.2019914254749458, "grad_norm": 0.1418028175830841, "learning_rate": 0.002, "loss": 2.5625, "step": 101390 }, { "epoch": 0.20201134769858473, "grad_norm": 0.17030100524425507, "learning_rate": 0.002, "loss": 2.5658, "step": 101400 }, { "epoch": 0.20203126992222364, "grad_norm": 0.15736865997314453, "learning_rate": 0.002, "loss": 2.5624, "step": 101410 }, { "epoch": 0.20205119214586256, "grad_norm": 0.15070664882659912, "learning_rate": 0.002, "loss": 2.5628, "step": 101420 }, { "epoch": 0.20207111436950145, "grad_norm": 0.1737537831068039, "learning_rate": 0.002, "loss": 2.5634, "step": 101430 }, { "epoch": 0.20209103659314037, "grad_norm": 0.16889509558677673, "learning_rate": 0.002, "loss": 2.575, "step": 101440 }, { "epoch": 0.2021109588167793, "grad_norm": 0.15152278542518616, "learning_rate": 0.002, "loss": 2.5651, "step": 101450 }, { "epoch": 0.2021308810404182, "grad_norm": 0.19788990914821625, "learning_rate": 0.002, "loss": 2.5623, "step": 101460 }, { "epoch": 0.20215080326405713, "grad_norm": 0.1581737995147705, "learning_rate": 0.002, "loss": 2.5666, "step": 101470 }, { "epoch": 0.20217072548769605, "grad_norm": 0.15635272860527039, "learning_rate": 0.002, "loss": 2.5569, "step": 101480 }, { "epoch": 0.20219064771133494, "grad_norm": 0.16696570813655853, "learning_rate": 0.002, "loss": 2.5829, "step": 101490 }, { "epoch": 0.20221056993497386, "grad_norm": 0.17069002985954285, "learning_rate": 0.002, "loss": 2.5641, "step": 101500 }, { "epoch": 0.20223049215861277, "grad_norm": 0.16385102272033691, "learning_rate": 0.002, "loss": 2.5565, "step": 101510 }, { "epoch": 0.2022504143822517, "grad_norm": 0.23469232022762299, "learning_rate": 0.002, "loss": 2.5644, "step": 101520 }, { "epoch": 0.2022703366058906, "grad_norm": 0.16996395587921143, "learning_rate": 0.002, "loss": 2.5692, "step": 101530 }, { "epoch": 0.20229025882952953, "grad_norm": 0.17266809940338135, "learning_rate": 0.002, "loss": 2.564, "step": 101540 }, { "epoch": 0.20231018105316842, "grad_norm": 0.27747035026550293, "learning_rate": 0.002, "loss": 2.5654, "step": 101550 }, { "epoch": 0.20233010327680734, "grad_norm": 0.16270384192466736, "learning_rate": 0.002, "loss": 2.5749, "step": 101560 }, { "epoch": 0.20235002550044626, "grad_norm": 0.1684594601392746, "learning_rate": 0.002, "loss": 2.569, "step": 101570 }, { "epoch": 0.20236994772408518, "grad_norm": 0.16291968524456024, "learning_rate": 0.002, "loss": 2.5596, "step": 101580 }, { "epoch": 0.2023898699477241, "grad_norm": 0.2040882557630539, "learning_rate": 0.002, "loss": 2.5727, "step": 101590 }, { "epoch": 0.202409792171363, "grad_norm": 0.1838129460811615, "learning_rate": 0.002, "loss": 2.573, "step": 101600 }, { "epoch": 0.2024297143950019, "grad_norm": 0.16198624670505524, "learning_rate": 0.002, "loss": 2.5606, "step": 101610 }, { "epoch": 0.20244963661864082, "grad_norm": 0.16488167643547058, "learning_rate": 0.002, "loss": 2.5697, "step": 101620 }, { "epoch": 0.20246955884227974, "grad_norm": 0.20916533470153809, "learning_rate": 0.002, "loss": 2.5799, "step": 101630 }, { "epoch": 0.20248948106591866, "grad_norm": 0.1370449960231781, "learning_rate": 0.002, "loss": 2.5602, "step": 101640 }, { "epoch": 0.20250940328955758, "grad_norm": 0.13444405794143677, "learning_rate": 0.002, "loss": 2.5593, "step": 101650 }, { "epoch": 0.20252932551319647, "grad_norm": 0.20838865637779236, "learning_rate": 0.002, "loss": 2.5781, "step": 101660 }, { "epoch": 0.20254924773683539, "grad_norm": 0.16186177730560303, "learning_rate": 0.002, "loss": 2.5658, "step": 101670 }, { "epoch": 0.2025691699604743, "grad_norm": 0.15421082079410553, "learning_rate": 0.002, "loss": 2.5768, "step": 101680 }, { "epoch": 0.20258909218411322, "grad_norm": 0.14257405698299408, "learning_rate": 0.002, "loss": 2.5736, "step": 101690 }, { "epoch": 0.20260901440775214, "grad_norm": 0.1601354479789734, "learning_rate": 0.002, "loss": 2.5716, "step": 101700 }, { "epoch": 0.20262893663139106, "grad_norm": 0.18059466779232025, "learning_rate": 0.002, "loss": 2.5599, "step": 101710 }, { "epoch": 0.20264885885502995, "grad_norm": 0.17575162649154663, "learning_rate": 0.002, "loss": 2.5638, "step": 101720 }, { "epoch": 0.20266878107866887, "grad_norm": 0.17839489877223969, "learning_rate": 0.002, "loss": 2.5655, "step": 101730 }, { "epoch": 0.2026887033023078, "grad_norm": 0.16920015215873718, "learning_rate": 0.002, "loss": 2.5658, "step": 101740 }, { "epoch": 0.2027086255259467, "grad_norm": 0.1710265725851059, "learning_rate": 0.002, "loss": 2.5697, "step": 101750 }, { "epoch": 0.20272854774958562, "grad_norm": 0.16465547680854797, "learning_rate": 0.002, "loss": 2.5684, "step": 101760 }, { "epoch": 0.20274846997322454, "grad_norm": 0.22871126234531403, "learning_rate": 0.002, "loss": 2.5606, "step": 101770 }, { "epoch": 0.20276839219686343, "grad_norm": 0.13838577270507812, "learning_rate": 0.002, "loss": 2.5657, "step": 101780 }, { "epoch": 0.20278831442050235, "grad_norm": 0.16072851419448853, "learning_rate": 0.002, "loss": 2.5835, "step": 101790 }, { "epoch": 0.20280823664414127, "grad_norm": 0.16909806430339813, "learning_rate": 0.002, "loss": 2.5692, "step": 101800 }, { "epoch": 0.2028281588677802, "grad_norm": 0.1631534844636917, "learning_rate": 0.002, "loss": 2.5667, "step": 101810 }, { "epoch": 0.2028480810914191, "grad_norm": 0.1665046513080597, "learning_rate": 0.002, "loss": 2.5739, "step": 101820 }, { "epoch": 0.20286800331505803, "grad_norm": 0.15877149999141693, "learning_rate": 0.002, "loss": 2.5824, "step": 101830 }, { "epoch": 0.20288792553869692, "grad_norm": 0.175890251994133, "learning_rate": 0.002, "loss": 2.5864, "step": 101840 }, { "epoch": 0.20290784776233584, "grad_norm": 0.16197837889194489, "learning_rate": 0.002, "loss": 2.5625, "step": 101850 }, { "epoch": 0.20292776998597475, "grad_norm": 0.1676042228937149, "learning_rate": 0.002, "loss": 2.5654, "step": 101860 }, { "epoch": 0.20294769220961367, "grad_norm": 0.15876255929470062, "learning_rate": 0.002, "loss": 2.5617, "step": 101870 }, { "epoch": 0.2029676144332526, "grad_norm": 0.2234872281551361, "learning_rate": 0.002, "loss": 2.5775, "step": 101880 }, { "epoch": 0.20298753665689148, "grad_norm": 0.16820184886455536, "learning_rate": 0.002, "loss": 2.5766, "step": 101890 }, { "epoch": 0.2030074588805304, "grad_norm": 0.14833179116249084, "learning_rate": 0.002, "loss": 2.5569, "step": 101900 }, { "epoch": 0.20302738110416932, "grad_norm": 0.1720767319202423, "learning_rate": 0.002, "loss": 2.5759, "step": 101910 }, { "epoch": 0.20304730332780824, "grad_norm": 0.16419820487499237, "learning_rate": 0.002, "loss": 2.5568, "step": 101920 }, { "epoch": 0.20306722555144716, "grad_norm": 0.15355849266052246, "learning_rate": 0.002, "loss": 2.5777, "step": 101930 }, { "epoch": 0.20308714777508607, "grad_norm": 0.14862671494483948, "learning_rate": 0.002, "loss": 2.5504, "step": 101940 }, { "epoch": 0.20310706999872497, "grad_norm": 0.1690923124551773, "learning_rate": 0.002, "loss": 2.574, "step": 101950 }, { "epoch": 0.20312699222236388, "grad_norm": 0.15151305496692657, "learning_rate": 0.002, "loss": 2.5483, "step": 101960 }, { "epoch": 0.2031469144460028, "grad_norm": 0.17556606233119965, "learning_rate": 0.002, "loss": 2.5584, "step": 101970 }, { "epoch": 0.20316683666964172, "grad_norm": 0.14045199751853943, "learning_rate": 0.002, "loss": 2.5614, "step": 101980 }, { "epoch": 0.20318675889328064, "grad_norm": 0.19372732937335968, "learning_rate": 0.002, "loss": 2.5645, "step": 101990 }, { "epoch": 0.20320668111691956, "grad_norm": 0.17803707718849182, "learning_rate": 0.002, "loss": 2.5713, "step": 102000 }, { "epoch": 0.20322660334055845, "grad_norm": 0.17499405145645142, "learning_rate": 0.002, "loss": 2.5532, "step": 102010 }, { "epoch": 0.20324652556419737, "grad_norm": 0.1652694195508957, "learning_rate": 0.002, "loss": 2.5672, "step": 102020 }, { "epoch": 0.20326644778783629, "grad_norm": 0.1507202386856079, "learning_rate": 0.002, "loss": 2.5683, "step": 102030 }, { "epoch": 0.2032863700114752, "grad_norm": 0.18366022408008575, "learning_rate": 0.002, "loss": 2.5674, "step": 102040 }, { "epoch": 0.20330629223511412, "grad_norm": 0.17046189308166504, "learning_rate": 0.002, "loss": 2.5568, "step": 102050 }, { "epoch": 0.20332621445875304, "grad_norm": 0.14639736711978912, "learning_rate": 0.002, "loss": 2.5585, "step": 102060 }, { "epoch": 0.20334613668239193, "grad_norm": 0.18017616868019104, "learning_rate": 0.002, "loss": 2.5561, "step": 102070 }, { "epoch": 0.20336605890603085, "grad_norm": 0.134353905916214, "learning_rate": 0.002, "loss": 2.5703, "step": 102080 }, { "epoch": 0.20338598112966977, "grad_norm": 0.2086697220802307, "learning_rate": 0.002, "loss": 2.5684, "step": 102090 }, { "epoch": 0.2034059033533087, "grad_norm": 0.1466568112373352, "learning_rate": 0.002, "loss": 2.5649, "step": 102100 }, { "epoch": 0.2034258255769476, "grad_norm": 0.15384960174560547, "learning_rate": 0.002, "loss": 2.5548, "step": 102110 }, { "epoch": 0.20344574780058652, "grad_norm": 0.17826612293720245, "learning_rate": 0.002, "loss": 2.5835, "step": 102120 }, { "epoch": 0.20346567002422541, "grad_norm": 0.18257130682468414, "learning_rate": 0.002, "loss": 2.5734, "step": 102130 }, { "epoch": 0.20348559224786433, "grad_norm": 0.19792354106903076, "learning_rate": 0.002, "loss": 2.5662, "step": 102140 }, { "epoch": 0.20350551447150325, "grad_norm": 0.16274097561836243, "learning_rate": 0.002, "loss": 2.5797, "step": 102150 }, { "epoch": 0.20352543669514217, "grad_norm": 0.15507489442825317, "learning_rate": 0.002, "loss": 2.5707, "step": 102160 }, { "epoch": 0.2035453589187811, "grad_norm": 0.13496193289756775, "learning_rate": 0.002, "loss": 2.5658, "step": 102170 }, { "epoch": 0.20356528114241998, "grad_norm": 0.16252252459526062, "learning_rate": 0.002, "loss": 2.5655, "step": 102180 }, { "epoch": 0.2035852033660589, "grad_norm": 0.15904085338115692, "learning_rate": 0.002, "loss": 2.5604, "step": 102190 }, { "epoch": 0.20360512558969782, "grad_norm": 0.15844103693962097, "learning_rate": 0.002, "loss": 2.5701, "step": 102200 }, { "epoch": 0.20362504781333673, "grad_norm": 0.1701551228761673, "learning_rate": 0.002, "loss": 2.5724, "step": 102210 }, { "epoch": 0.20364497003697565, "grad_norm": 0.17348149418830872, "learning_rate": 0.002, "loss": 2.5754, "step": 102220 }, { "epoch": 0.20366489226061457, "grad_norm": 0.1594383418560028, "learning_rate": 0.002, "loss": 2.5716, "step": 102230 }, { "epoch": 0.20368481448425346, "grad_norm": 0.16971944272518158, "learning_rate": 0.002, "loss": 2.5911, "step": 102240 }, { "epoch": 0.20370473670789238, "grad_norm": 0.14865918457508087, "learning_rate": 0.002, "loss": 2.562, "step": 102250 }, { "epoch": 0.2037246589315313, "grad_norm": 0.14449308812618256, "learning_rate": 0.002, "loss": 2.5601, "step": 102260 }, { "epoch": 0.20374458115517022, "grad_norm": 0.1595192849636078, "learning_rate": 0.002, "loss": 2.5553, "step": 102270 }, { "epoch": 0.20376450337880914, "grad_norm": 0.19257254898548126, "learning_rate": 0.002, "loss": 2.5759, "step": 102280 }, { "epoch": 0.20378442560244805, "grad_norm": 0.16268664598464966, "learning_rate": 0.002, "loss": 2.5612, "step": 102290 }, { "epoch": 0.20380434782608695, "grad_norm": 0.1966942548751831, "learning_rate": 0.002, "loss": 2.5593, "step": 102300 }, { "epoch": 0.20382427004972586, "grad_norm": 0.2085588574409485, "learning_rate": 0.002, "loss": 2.5749, "step": 102310 }, { "epoch": 0.20384419227336478, "grad_norm": 0.20776581764221191, "learning_rate": 0.002, "loss": 2.5706, "step": 102320 }, { "epoch": 0.2038641144970037, "grad_norm": 0.1457056999206543, "learning_rate": 0.002, "loss": 2.5646, "step": 102330 }, { "epoch": 0.20388403672064262, "grad_norm": 0.1660812795162201, "learning_rate": 0.002, "loss": 2.5489, "step": 102340 }, { "epoch": 0.20390395894428154, "grad_norm": 0.1661578118801117, "learning_rate": 0.002, "loss": 2.5547, "step": 102350 }, { "epoch": 0.20392388116792043, "grad_norm": 0.1540021002292633, "learning_rate": 0.002, "loss": 2.5623, "step": 102360 }, { "epoch": 0.20394380339155935, "grad_norm": 0.17545992136001587, "learning_rate": 0.002, "loss": 2.5742, "step": 102370 }, { "epoch": 0.20396372561519827, "grad_norm": 0.1438221037387848, "learning_rate": 0.002, "loss": 2.5604, "step": 102380 }, { "epoch": 0.20398364783883718, "grad_norm": 0.15552392601966858, "learning_rate": 0.002, "loss": 2.5583, "step": 102390 }, { "epoch": 0.2040035700624761, "grad_norm": 0.15084105730056763, "learning_rate": 0.002, "loss": 2.564, "step": 102400 }, { "epoch": 0.204023492286115, "grad_norm": 0.18246307969093323, "learning_rate": 0.002, "loss": 2.5597, "step": 102410 }, { "epoch": 0.2040434145097539, "grad_norm": 0.1689063161611557, "learning_rate": 0.002, "loss": 2.564, "step": 102420 }, { "epoch": 0.20406333673339283, "grad_norm": 0.13796153664588928, "learning_rate": 0.002, "loss": 2.5711, "step": 102430 }, { "epoch": 0.20408325895703175, "grad_norm": 0.18827828764915466, "learning_rate": 0.002, "loss": 2.5585, "step": 102440 }, { "epoch": 0.20410318118067067, "grad_norm": 0.14097051322460175, "learning_rate": 0.002, "loss": 2.5665, "step": 102450 }, { "epoch": 0.20412310340430959, "grad_norm": 0.1686609536409378, "learning_rate": 0.002, "loss": 2.5675, "step": 102460 }, { "epoch": 0.20414302562794848, "grad_norm": 0.1792231798171997, "learning_rate": 0.002, "loss": 2.5579, "step": 102470 }, { "epoch": 0.2041629478515874, "grad_norm": 0.15863555669784546, "learning_rate": 0.002, "loss": 2.5809, "step": 102480 }, { "epoch": 0.2041828700752263, "grad_norm": 0.17163190245628357, "learning_rate": 0.002, "loss": 2.5817, "step": 102490 }, { "epoch": 0.20420279229886523, "grad_norm": 0.17879553139209747, "learning_rate": 0.002, "loss": 2.5678, "step": 102500 }, { "epoch": 0.20422271452250415, "grad_norm": 0.16095732152462006, "learning_rate": 0.002, "loss": 2.5686, "step": 102510 }, { "epoch": 0.20424263674614307, "grad_norm": 0.1684541404247284, "learning_rate": 0.002, "loss": 2.5641, "step": 102520 }, { "epoch": 0.20426255896978196, "grad_norm": 0.17190569639205933, "learning_rate": 0.002, "loss": 2.5576, "step": 102530 }, { "epoch": 0.20428248119342088, "grad_norm": 0.1803058385848999, "learning_rate": 0.002, "loss": 2.5578, "step": 102540 }, { "epoch": 0.2043024034170598, "grad_norm": 0.15925610065460205, "learning_rate": 0.002, "loss": 2.5692, "step": 102550 }, { "epoch": 0.20432232564069872, "grad_norm": 0.1511400192975998, "learning_rate": 0.002, "loss": 2.5519, "step": 102560 }, { "epoch": 0.20434224786433763, "grad_norm": 0.1925525665283203, "learning_rate": 0.002, "loss": 2.5571, "step": 102570 }, { "epoch": 0.20436217008797655, "grad_norm": 0.21282783150672913, "learning_rate": 0.002, "loss": 2.5682, "step": 102580 }, { "epoch": 0.20438209231161544, "grad_norm": 0.1749817430973053, "learning_rate": 0.002, "loss": 2.5649, "step": 102590 }, { "epoch": 0.20440201453525436, "grad_norm": 0.15403912961483002, "learning_rate": 0.002, "loss": 2.5609, "step": 102600 }, { "epoch": 0.20442193675889328, "grad_norm": 0.17852945625782013, "learning_rate": 0.002, "loss": 2.5691, "step": 102610 }, { "epoch": 0.2044418589825322, "grad_norm": 0.16130982339382172, "learning_rate": 0.002, "loss": 2.5633, "step": 102620 }, { "epoch": 0.20446178120617112, "grad_norm": 0.13725048303604126, "learning_rate": 0.002, "loss": 2.5856, "step": 102630 }, { "epoch": 0.20448170342981, "grad_norm": 0.19241510331630707, "learning_rate": 0.002, "loss": 2.576, "step": 102640 }, { "epoch": 0.20450162565344893, "grad_norm": 0.1344442516565323, "learning_rate": 0.002, "loss": 2.5765, "step": 102650 }, { "epoch": 0.20452154787708784, "grad_norm": 0.1509752869606018, "learning_rate": 0.002, "loss": 2.5542, "step": 102660 }, { "epoch": 0.20454147010072676, "grad_norm": 0.1811644285917282, "learning_rate": 0.002, "loss": 2.5651, "step": 102670 }, { "epoch": 0.20456139232436568, "grad_norm": 0.1578066647052765, "learning_rate": 0.002, "loss": 2.5731, "step": 102680 }, { "epoch": 0.2045813145480046, "grad_norm": 0.1591750681400299, "learning_rate": 0.002, "loss": 2.5579, "step": 102690 }, { "epoch": 0.2046012367716435, "grad_norm": 0.20499037206172943, "learning_rate": 0.002, "loss": 2.5656, "step": 102700 }, { "epoch": 0.2046211589952824, "grad_norm": 0.12557417154312134, "learning_rate": 0.002, "loss": 2.5477, "step": 102710 }, { "epoch": 0.20464108121892133, "grad_norm": 0.1574193686246872, "learning_rate": 0.002, "loss": 2.5693, "step": 102720 }, { "epoch": 0.20466100344256025, "grad_norm": 0.16915899515151978, "learning_rate": 0.002, "loss": 2.5586, "step": 102730 }, { "epoch": 0.20468092566619916, "grad_norm": 0.16197510063648224, "learning_rate": 0.002, "loss": 2.5711, "step": 102740 }, { "epoch": 0.20470084788983808, "grad_norm": 0.1649506837129593, "learning_rate": 0.002, "loss": 2.5463, "step": 102750 }, { "epoch": 0.20472077011347697, "grad_norm": 0.1531367301940918, "learning_rate": 0.002, "loss": 2.5624, "step": 102760 }, { "epoch": 0.2047406923371159, "grad_norm": 0.15128929913043976, "learning_rate": 0.002, "loss": 2.5604, "step": 102770 }, { "epoch": 0.2047606145607548, "grad_norm": 0.1929250955581665, "learning_rate": 0.002, "loss": 2.569, "step": 102780 }, { "epoch": 0.20478053678439373, "grad_norm": 0.17939145863056183, "learning_rate": 0.002, "loss": 2.569, "step": 102790 }, { "epoch": 0.20480045900803265, "grad_norm": 0.15545813739299774, "learning_rate": 0.002, "loss": 2.5714, "step": 102800 }, { "epoch": 0.20482038123167157, "grad_norm": 0.16821523010730743, "learning_rate": 0.002, "loss": 2.5604, "step": 102810 }, { "epoch": 0.20484030345531046, "grad_norm": 0.1769559532403946, "learning_rate": 0.002, "loss": 2.5585, "step": 102820 }, { "epoch": 0.20486022567894938, "grad_norm": 0.17560940980911255, "learning_rate": 0.002, "loss": 2.551, "step": 102830 }, { "epoch": 0.2048801479025883, "grad_norm": 0.15041813254356384, "learning_rate": 0.002, "loss": 2.5735, "step": 102840 }, { "epoch": 0.2049000701262272, "grad_norm": 0.15790654718875885, "learning_rate": 0.002, "loss": 2.5603, "step": 102850 }, { "epoch": 0.20491999234986613, "grad_norm": 0.18003058433532715, "learning_rate": 0.002, "loss": 2.5692, "step": 102860 }, { "epoch": 0.20493991457350505, "grad_norm": 0.14952802658081055, "learning_rate": 0.002, "loss": 2.5717, "step": 102870 }, { "epoch": 0.20495983679714394, "grad_norm": 0.19927705824375153, "learning_rate": 0.002, "loss": 2.5638, "step": 102880 }, { "epoch": 0.20497975902078286, "grad_norm": 0.18671435117721558, "learning_rate": 0.002, "loss": 2.5652, "step": 102890 }, { "epoch": 0.20499968124442178, "grad_norm": 0.17862415313720703, "learning_rate": 0.002, "loss": 2.5656, "step": 102900 }, { "epoch": 0.2050196034680607, "grad_norm": 0.1583148092031479, "learning_rate": 0.002, "loss": 2.565, "step": 102910 }, { "epoch": 0.20503952569169961, "grad_norm": 0.14417493343353271, "learning_rate": 0.002, "loss": 2.5684, "step": 102920 }, { "epoch": 0.2050594479153385, "grad_norm": 0.16276815533638, "learning_rate": 0.002, "loss": 2.5676, "step": 102930 }, { "epoch": 0.20507937013897742, "grad_norm": 0.19758844375610352, "learning_rate": 0.002, "loss": 2.5565, "step": 102940 }, { "epoch": 0.20509929236261634, "grad_norm": 0.16353332996368408, "learning_rate": 0.002, "loss": 2.5593, "step": 102950 }, { "epoch": 0.20511921458625526, "grad_norm": 0.14383381605148315, "learning_rate": 0.002, "loss": 2.5662, "step": 102960 }, { "epoch": 0.20513913680989418, "grad_norm": 0.21973015367984772, "learning_rate": 0.002, "loss": 2.5821, "step": 102970 }, { "epoch": 0.2051590590335331, "grad_norm": 0.15926620364189148, "learning_rate": 0.002, "loss": 2.5793, "step": 102980 }, { "epoch": 0.205178981257172, "grad_norm": 0.14868515729904175, "learning_rate": 0.002, "loss": 2.5644, "step": 102990 }, { "epoch": 0.2051989034808109, "grad_norm": 0.18646883964538574, "learning_rate": 0.002, "loss": 2.5605, "step": 103000 }, { "epoch": 0.20521882570444983, "grad_norm": 0.16468684375286102, "learning_rate": 0.002, "loss": 2.5663, "step": 103010 }, { "epoch": 0.20523874792808874, "grad_norm": 0.1553928703069687, "learning_rate": 0.002, "loss": 2.5755, "step": 103020 }, { "epoch": 0.20525867015172766, "grad_norm": 0.1616889089345932, "learning_rate": 0.002, "loss": 2.5607, "step": 103030 }, { "epoch": 0.20527859237536658, "grad_norm": 0.1446097493171692, "learning_rate": 0.002, "loss": 2.5667, "step": 103040 }, { "epoch": 0.20529851459900547, "grad_norm": 0.15537670254707336, "learning_rate": 0.002, "loss": 2.5634, "step": 103050 }, { "epoch": 0.2053184368226444, "grad_norm": 0.17658492922782898, "learning_rate": 0.002, "loss": 2.5677, "step": 103060 }, { "epoch": 0.2053383590462833, "grad_norm": 0.18179382383823395, "learning_rate": 0.002, "loss": 2.5576, "step": 103070 }, { "epoch": 0.20535828126992223, "grad_norm": 0.1414508819580078, "learning_rate": 0.002, "loss": 2.5708, "step": 103080 }, { "epoch": 0.20537820349356115, "grad_norm": 0.19324618577957153, "learning_rate": 0.002, "loss": 2.5784, "step": 103090 }, { "epoch": 0.20539812571720006, "grad_norm": 0.18340569734573364, "learning_rate": 0.002, "loss": 2.5701, "step": 103100 }, { "epoch": 0.20541804794083895, "grad_norm": 0.14755678176879883, "learning_rate": 0.002, "loss": 2.5676, "step": 103110 }, { "epoch": 0.20543797016447787, "grad_norm": 0.18144848942756653, "learning_rate": 0.002, "loss": 2.5668, "step": 103120 }, { "epoch": 0.2054578923881168, "grad_norm": 0.18448399007320404, "learning_rate": 0.002, "loss": 2.5726, "step": 103130 }, { "epoch": 0.2054778146117557, "grad_norm": 0.19248804450035095, "learning_rate": 0.002, "loss": 2.5654, "step": 103140 }, { "epoch": 0.20549773683539463, "grad_norm": 0.1750946342945099, "learning_rate": 0.002, "loss": 2.5757, "step": 103150 }, { "epoch": 0.20551765905903352, "grad_norm": 0.17486633360385895, "learning_rate": 0.002, "loss": 2.5737, "step": 103160 }, { "epoch": 0.20553758128267244, "grad_norm": 0.14928597211837769, "learning_rate": 0.002, "loss": 2.5554, "step": 103170 }, { "epoch": 0.20555750350631136, "grad_norm": 0.16718746721744537, "learning_rate": 0.002, "loss": 2.5641, "step": 103180 }, { "epoch": 0.20557742572995027, "grad_norm": 0.20892396569252014, "learning_rate": 0.002, "loss": 2.566, "step": 103190 }, { "epoch": 0.2055973479535892, "grad_norm": 0.15426649153232574, "learning_rate": 0.002, "loss": 2.5765, "step": 103200 }, { "epoch": 0.2056172701772281, "grad_norm": 0.15313094854354858, "learning_rate": 0.002, "loss": 2.5571, "step": 103210 }, { "epoch": 0.205637192400867, "grad_norm": 0.17655587196350098, "learning_rate": 0.002, "loss": 2.5711, "step": 103220 }, { "epoch": 0.20565711462450592, "grad_norm": 0.1480572372674942, "learning_rate": 0.002, "loss": 2.5641, "step": 103230 }, { "epoch": 0.20567703684814484, "grad_norm": 0.21448050439357758, "learning_rate": 0.002, "loss": 2.5582, "step": 103240 }, { "epoch": 0.20569695907178376, "grad_norm": 0.1586897373199463, "learning_rate": 0.002, "loss": 2.5613, "step": 103250 }, { "epoch": 0.20571688129542268, "grad_norm": 0.15504932403564453, "learning_rate": 0.002, "loss": 2.57, "step": 103260 }, { "epoch": 0.2057368035190616, "grad_norm": 0.18679648637771606, "learning_rate": 0.002, "loss": 2.5772, "step": 103270 }, { "epoch": 0.20575672574270049, "grad_norm": 0.14740706980228424, "learning_rate": 0.002, "loss": 2.5557, "step": 103280 }, { "epoch": 0.2057766479663394, "grad_norm": 0.1833616942167282, "learning_rate": 0.002, "loss": 2.5793, "step": 103290 }, { "epoch": 0.20579657018997832, "grad_norm": 0.18219615519046783, "learning_rate": 0.002, "loss": 2.5696, "step": 103300 }, { "epoch": 0.20581649241361724, "grad_norm": 0.1940591037273407, "learning_rate": 0.002, "loss": 2.5739, "step": 103310 }, { "epoch": 0.20583641463725616, "grad_norm": 0.14771318435668945, "learning_rate": 0.002, "loss": 2.5633, "step": 103320 }, { "epoch": 0.20585633686089508, "grad_norm": 0.14003194868564606, "learning_rate": 0.002, "loss": 2.5605, "step": 103330 }, { "epoch": 0.20587625908453397, "grad_norm": 0.20052030682563782, "learning_rate": 0.002, "loss": 2.5705, "step": 103340 }, { "epoch": 0.2058961813081729, "grad_norm": 0.18065471947193146, "learning_rate": 0.002, "loss": 2.5879, "step": 103350 }, { "epoch": 0.2059161035318118, "grad_norm": 0.15353460609912872, "learning_rate": 0.002, "loss": 2.5691, "step": 103360 }, { "epoch": 0.20593602575545072, "grad_norm": 0.16709226369857788, "learning_rate": 0.002, "loss": 2.5707, "step": 103370 }, { "epoch": 0.20595594797908964, "grad_norm": 0.142208993434906, "learning_rate": 0.002, "loss": 2.5714, "step": 103380 }, { "epoch": 0.20597587020272856, "grad_norm": 0.19313469529151917, "learning_rate": 0.002, "loss": 2.568, "step": 103390 }, { "epoch": 0.20599579242636745, "grad_norm": 0.15292611718177795, "learning_rate": 0.002, "loss": 2.578, "step": 103400 }, { "epoch": 0.20601571465000637, "grad_norm": 0.1540251225233078, "learning_rate": 0.002, "loss": 2.5494, "step": 103410 }, { "epoch": 0.2060356368736453, "grad_norm": 0.18011194467544556, "learning_rate": 0.002, "loss": 2.5689, "step": 103420 }, { "epoch": 0.2060555590972842, "grad_norm": 0.1635609269142151, "learning_rate": 0.002, "loss": 2.5621, "step": 103430 }, { "epoch": 0.20607548132092313, "grad_norm": 0.15392513573169708, "learning_rate": 0.002, "loss": 2.5635, "step": 103440 }, { "epoch": 0.20609540354456202, "grad_norm": 0.15405192971229553, "learning_rate": 0.002, "loss": 2.5593, "step": 103450 }, { "epoch": 0.20611532576820094, "grad_norm": 0.18394878506660461, "learning_rate": 0.002, "loss": 2.5702, "step": 103460 }, { "epoch": 0.20613524799183985, "grad_norm": 0.16827566921710968, "learning_rate": 0.002, "loss": 2.5521, "step": 103470 }, { "epoch": 0.20615517021547877, "grad_norm": 0.19152399897575378, "learning_rate": 0.002, "loss": 2.5566, "step": 103480 }, { "epoch": 0.2061750924391177, "grad_norm": 0.1756613552570343, "learning_rate": 0.002, "loss": 2.5679, "step": 103490 }, { "epoch": 0.2061950146627566, "grad_norm": 0.1561007797718048, "learning_rate": 0.002, "loss": 2.5638, "step": 103500 }, { "epoch": 0.2062149368863955, "grad_norm": 0.18651960790157318, "learning_rate": 0.002, "loss": 2.572, "step": 103510 }, { "epoch": 0.20623485911003442, "grad_norm": 0.1375199407339096, "learning_rate": 0.002, "loss": 2.5676, "step": 103520 }, { "epoch": 0.20625478133367334, "grad_norm": 0.17995311319828033, "learning_rate": 0.002, "loss": 2.5563, "step": 103530 }, { "epoch": 0.20627470355731226, "grad_norm": 0.15860944986343384, "learning_rate": 0.002, "loss": 2.5604, "step": 103540 }, { "epoch": 0.20629462578095117, "grad_norm": 0.15232422947883606, "learning_rate": 0.002, "loss": 2.5706, "step": 103550 }, { "epoch": 0.2063145480045901, "grad_norm": 0.15405534207820892, "learning_rate": 0.002, "loss": 2.5594, "step": 103560 }, { "epoch": 0.20633447022822898, "grad_norm": 0.1598171889781952, "learning_rate": 0.002, "loss": 2.5707, "step": 103570 }, { "epoch": 0.2063543924518679, "grad_norm": 0.202687606215477, "learning_rate": 0.002, "loss": 2.5552, "step": 103580 }, { "epoch": 0.20637431467550682, "grad_norm": 0.13450700044631958, "learning_rate": 0.002, "loss": 2.5668, "step": 103590 }, { "epoch": 0.20639423689914574, "grad_norm": 0.18912413716316223, "learning_rate": 0.002, "loss": 2.5616, "step": 103600 }, { "epoch": 0.20641415912278466, "grad_norm": 0.16691021621227264, "learning_rate": 0.002, "loss": 2.5667, "step": 103610 }, { "epoch": 0.20643408134642358, "grad_norm": 0.17779715359210968, "learning_rate": 0.002, "loss": 2.5746, "step": 103620 }, { "epoch": 0.20645400357006247, "grad_norm": 0.16467346251010895, "learning_rate": 0.002, "loss": 2.5536, "step": 103630 }, { "epoch": 0.20647392579370138, "grad_norm": 0.14476501941680908, "learning_rate": 0.002, "loss": 2.5695, "step": 103640 }, { "epoch": 0.2064938480173403, "grad_norm": 0.1693214476108551, "learning_rate": 0.002, "loss": 2.5626, "step": 103650 }, { "epoch": 0.20651377024097922, "grad_norm": 0.17937718331813812, "learning_rate": 0.002, "loss": 2.568, "step": 103660 }, { "epoch": 0.20653369246461814, "grad_norm": 0.15237855911254883, "learning_rate": 0.002, "loss": 2.5577, "step": 103670 }, { "epoch": 0.20655361468825703, "grad_norm": 0.18139277398586273, "learning_rate": 0.002, "loss": 2.5589, "step": 103680 }, { "epoch": 0.20657353691189595, "grad_norm": 0.17527686059474945, "learning_rate": 0.002, "loss": 2.5756, "step": 103690 }, { "epoch": 0.20659345913553487, "grad_norm": 0.15861795842647552, "learning_rate": 0.002, "loss": 2.5495, "step": 103700 }, { "epoch": 0.2066133813591738, "grad_norm": 0.15674394369125366, "learning_rate": 0.002, "loss": 2.5604, "step": 103710 }, { "epoch": 0.2066333035828127, "grad_norm": 0.1735173761844635, "learning_rate": 0.002, "loss": 2.5763, "step": 103720 }, { "epoch": 0.20665322580645162, "grad_norm": 0.155045747756958, "learning_rate": 0.002, "loss": 2.5757, "step": 103730 }, { "epoch": 0.20667314803009051, "grad_norm": 0.1426592320203781, "learning_rate": 0.002, "loss": 2.5567, "step": 103740 }, { "epoch": 0.20669307025372943, "grad_norm": 0.175858274102211, "learning_rate": 0.002, "loss": 2.5602, "step": 103750 }, { "epoch": 0.20671299247736835, "grad_norm": 0.1707083284854889, "learning_rate": 0.002, "loss": 2.5601, "step": 103760 }, { "epoch": 0.20673291470100727, "grad_norm": 0.13599014282226562, "learning_rate": 0.002, "loss": 2.5534, "step": 103770 }, { "epoch": 0.2067528369246462, "grad_norm": 0.20302647352218628, "learning_rate": 0.002, "loss": 2.5679, "step": 103780 }, { "epoch": 0.2067727591482851, "grad_norm": 0.19028866291046143, "learning_rate": 0.002, "loss": 2.5705, "step": 103790 }, { "epoch": 0.206792681371924, "grad_norm": 0.18189282715320587, "learning_rate": 0.002, "loss": 2.5611, "step": 103800 }, { "epoch": 0.20681260359556292, "grad_norm": 0.15052258968353271, "learning_rate": 0.002, "loss": 2.5734, "step": 103810 }, { "epoch": 0.20683252581920183, "grad_norm": 0.18589673936367035, "learning_rate": 0.002, "loss": 2.5665, "step": 103820 }, { "epoch": 0.20685244804284075, "grad_norm": 0.17025181651115417, "learning_rate": 0.002, "loss": 2.5523, "step": 103830 }, { "epoch": 0.20687237026647967, "grad_norm": 0.1645136922597885, "learning_rate": 0.002, "loss": 2.5811, "step": 103840 }, { "epoch": 0.2068922924901186, "grad_norm": 0.15823623538017273, "learning_rate": 0.002, "loss": 2.5756, "step": 103850 }, { "epoch": 0.20691221471375748, "grad_norm": 0.17125259339809418, "learning_rate": 0.002, "loss": 2.5609, "step": 103860 }, { "epoch": 0.2069321369373964, "grad_norm": 0.16992788016796112, "learning_rate": 0.002, "loss": 2.5619, "step": 103870 }, { "epoch": 0.20695205916103532, "grad_norm": 0.15919946134090424, "learning_rate": 0.002, "loss": 2.5698, "step": 103880 }, { "epoch": 0.20697198138467424, "grad_norm": 0.15679597854614258, "learning_rate": 0.002, "loss": 2.5557, "step": 103890 }, { "epoch": 0.20699190360831315, "grad_norm": 0.16917645931243896, "learning_rate": 0.002, "loss": 2.5685, "step": 103900 }, { "epoch": 0.20701182583195205, "grad_norm": 0.16070657968521118, "learning_rate": 0.002, "loss": 2.56, "step": 103910 }, { "epoch": 0.20703174805559096, "grad_norm": 0.1461886316537857, "learning_rate": 0.002, "loss": 2.5729, "step": 103920 }, { "epoch": 0.20705167027922988, "grad_norm": 0.17921938002109528, "learning_rate": 0.002, "loss": 2.5674, "step": 103930 }, { "epoch": 0.2070715925028688, "grad_norm": 0.2050265222787857, "learning_rate": 0.002, "loss": 2.5563, "step": 103940 }, { "epoch": 0.20709151472650772, "grad_norm": 0.15880635380744934, "learning_rate": 0.002, "loss": 2.5643, "step": 103950 }, { "epoch": 0.20711143695014664, "grad_norm": 0.16046257317066193, "learning_rate": 0.002, "loss": 2.5522, "step": 103960 }, { "epoch": 0.20713135917378553, "grad_norm": 0.19010038673877716, "learning_rate": 0.002, "loss": 2.5773, "step": 103970 }, { "epoch": 0.20715128139742445, "grad_norm": 0.17577633261680603, "learning_rate": 0.002, "loss": 2.5723, "step": 103980 }, { "epoch": 0.20717120362106337, "grad_norm": 0.16647373139858246, "learning_rate": 0.002, "loss": 2.5707, "step": 103990 }, { "epoch": 0.20719112584470228, "grad_norm": 0.194364532828331, "learning_rate": 0.002, "loss": 2.5696, "step": 104000 }, { "epoch": 0.2072110480683412, "grad_norm": 0.16191905736923218, "learning_rate": 0.002, "loss": 2.5779, "step": 104010 }, { "epoch": 0.20723097029198012, "grad_norm": 0.16643375158309937, "learning_rate": 0.002, "loss": 2.557, "step": 104020 }, { "epoch": 0.207250892515619, "grad_norm": 0.1520397812128067, "learning_rate": 0.002, "loss": 2.5528, "step": 104030 }, { "epoch": 0.20727081473925793, "grad_norm": 0.20006057620048523, "learning_rate": 0.002, "loss": 2.575, "step": 104040 }, { "epoch": 0.20729073696289685, "grad_norm": 0.168410524725914, "learning_rate": 0.002, "loss": 2.5591, "step": 104050 }, { "epoch": 0.20731065918653577, "grad_norm": 0.16324366629123688, "learning_rate": 0.002, "loss": 2.5729, "step": 104060 }, { "epoch": 0.20733058141017469, "grad_norm": 0.14932312071323395, "learning_rate": 0.002, "loss": 2.552, "step": 104070 }, { "epoch": 0.2073505036338136, "grad_norm": 0.2311592996120453, "learning_rate": 0.002, "loss": 2.5776, "step": 104080 }, { "epoch": 0.2073704258574525, "grad_norm": 0.1695619374513626, "learning_rate": 0.002, "loss": 2.5724, "step": 104090 }, { "epoch": 0.2073903480810914, "grad_norm": 0.14403843879699707, "learning_rate": 0.002, "loss": 2.5574, "step": 104100 }, { "epoch": 0.20741027030473033, "grad_norm": 0.19291692972183228, "learning_rate": 0.002, "loss": 2.5592, "step": 104110 }, { "epoch": 0.20743019252836925, "grad_norm": 0.13712915778160095, "learning_rate": 0.002, "loss": 2.5715, "step": 104120 }, { "epoch": 0.20745011475200817, "grad_norm": 0.17357945442199707, "learning_rate": 0.002, "loss": 2.5606, "step": 104130 }, { "epoch": 0.2074700369756471, "grad_norm": 0.16251584887504578, "learning_rate": 0.002, "loss": 2.5625, "step": 104140 }, { "epoch": 0.20748995919928598, "grad_norm": 0.19210121035575867, "learning_rate": 0.002, "loss": 2.5711, "step": 104150 }, { "epoch": 0.2075098814229249, "grad_norm": 0.1588120013475418, "learning_rate": 0.002, "loss": 2.562, "step": 104160 }, { "epoch": 0.20752980364656382, "grad_norm": 0.15383897721767426, "learning_rate": 0.002, "loss": 2.5697, "step": 104170 }, { "epoch": 0.20754972587020273, "grad_norm": 0.16316545009613037, "learning_rate": 0.002, "loss": 2.5774, "step": 104180 }, { "epoch": 0.20756964809384165, "grad_norm": 0.13941901922225952, "learning_rate": 0.002, "loss": 2.5499, "step": 104190 }, { "epoch": 0.20758957031748054, "grad_norm": 0.2100335955619812, "learning_rate": 0.002, "loss": 2.5596, "step": 104200 }, { "epoch": 0.20760949254111946, "grad_norm": 0.16673444211483002, "learning_rate": 0.002, "loss": 2.5638, "step": 104210 }, { "epoch": 0.20762941476475838, "grad_norm": 0.16197557747364044, "learning_rate": 0.002, "loss": 2.5788, "step": 104220 }, { "epoch": 0.2076493369883973, "grad_norm": 0.18454349040985107, "learning_rate": 0.002, "loss": 2.5556, "step": 104230 }, { "epoch": 0.20766925921203622, "grad_norm": 0.17353588342666626, "learning_rate": 0.002, "loss": 2.5505, "step": 104240 }, { "epoch": 0.20768918143567514, "grad_norm": 0.1733122020959854, "learning_rate": 0.002, "loss": 2.5915, "step": 104250 }, { "epoch": 0.20770910365931403, "grad_norm": 0.15037381649017334, "learning_rate": 0.002, "loss": 2.5498, "step": 104260 }, { "epoch": 0.20772902588295294, "grad_norm": 0.17018365859985352, "learning_rate": 0.002, "loss": 2.5694, "step": 104270 }, { "epoch": 0.20774894810659186, "grad_norm": 0.1636267900466919, "learning_rate": 0.002, "loss": 2.5618, "step": 104280 }, { "epoch": 0.20776887033023078, "grad_norm": 0.16258925199508667, "learning_rate": 0.002, "loss": 2.5704, "step": 104290 }, { "epoch": 0.2077887925538697, "grad_norm": 0.15503403544425964, "learning_rate": 0.002, "loss": 2.5652, "step": 104300 }, { "epoch": 0.20780871477750862, "grad_norm": 0.158054918050766, "learning_rate": 0.002, "loss": 2.562, "step": 104310 }, { "epoch": 0.2078286370011475, "grad_norm": 0.17023412883281708, "learning_rate": 0.002, "loss": 2.5716, "step": 104320 }, { "epoch": 0.20784855922478643, "grad_norm": 0.15233686566352844, "learning_rate": 0.002, "loss": 2.5648, "step": 104330 }, { "epoch": 0.20786848144842535, "grad_norm": 0.16939020156860352, "learning_rate": 0.002, "loss": 2.5538, "step": 104340 }, { "epoch": 0.20788840367206426, "grad_norm": 0.1857394129037857, "learning_rate": 0.002, "loss": 2.5703, "step": 104350 }, { "epoch": 0.20790832589570318, "grad_norm": 0.15310074388980865, "learning_rate": 0.002, "loss": 2.5675, "step": 104360 }, { "epoch": 0.2079282481193421, "grad_norm": 0.16443222761154175, "learning_rate": 0.002, "loss": 2.5756, "step": 104370 }, { "epoch": 0.207948170342981, "grad_norm": 0.17486608028411865, "learning_rate": 0.002, "loss": 2.5606, "step": 104380 }, { "epoch": 0.2079680925666199, "grad_norm": 0.17437228560447693, "learning_rate": 0.002, "loss": 2.569, "step": 104390 }, { "epoch": 0.20798801479025883, "grad_norm": 0.1800200641155243, "learning_rate": 0.002, "loss": 2.5681, "step": 104400 }, { "epoch": 0.20800793701389775, "grad_norm": 0.17484784126281738, "learning_rate": 0.002, "loss": 2.5706, "step": 104410 }, { "epoch": 0.20802785923753667, "grad_norm": 0.1731007695198059, "learning_rate": 0.002, "loss": 2.5618, "step": 104420 }, { "epoch": 0.20804778146117556, "grad_norm": 0.1756972372531891, "learning_rate": 0.002, "loss": 2.5679, "step": 104430 }, { "epoch": 0.20806770368481448, "grad_norm": 0.1391318440437317, "learning_rate": 0.002, "loss": 2.5675, "step": 104440 }, { "epoch": 0.2080876259084534, "grad_norm": 0.1618025004863739, "learning_rate": 0.002, "loss": 2.5675, "step": 104450 }, { "epoch": 0.2081075481320923, "grad_norm": 0.16186736524105072, "learning_rate": 0.002, "loss": 2.5668, "step": 104460 }, { "epoch": 0.20812747035573123, "grad_norm": 0.16071708500385284, "learning_rate": 0.002, "loss": 2.571, "step": 104470 }, { "epoch": 0.20814739257937015, "grad_norm": 0.17816202342510223, "learning_rate": 0.002, "loss": 2.5655, "step": 104480 }, { "epoch": 0.20816731480300904, "grad_norm": 0.16053417325019836, "learning_rate": 0.002, "loss": 2.5534, "step": 104490 }, { "epoch": 0.20818723702664796, "grad_norm": 0.172299325466156, "learning_rate": 0.002, "loss": 2.5741, "step": 104500 }, { "epoch": 0.20820715925028688, "grad_norm": 0.16398292779922485, "learning_rate": 0.002, "loss": 2.5519, "step": 104510 }, { "epoch": 0.2082270814739258, "grad_norm": 0.19201667606830597, "learning_rate": 0.002, "loss": 2.5669, "step": 104520 }, { "epoch": 0.20824700369756471, "grad_norm": 0.15517640113830566, "learning_rate": 0.002, "loss": 2.5829, "step": 104530 }, { "epoch": 0.20826692592120363, "grad_norm": 0.16127705574035645, "learning_rate": 0.002, "loss": 2.5508, "step": 104540 }, { "epoch": 0.20828684814484252, "grad_norm": 0.16482137143611908, "learning_rate": 0.002, "loss": 2.5569, "step": 104550 }, { "epoch": 0.20830677036848144, "grad_norm": 0.18927831947803497, "learning_rate": 0.002, "loss": 2.5667, "step": 104560 }, { "epoch": 0.20832669259212036, "grad_norm": 0.16453227400779724, "learning_rate": 0.002, "loss": 2.5604, "step": 104570 }, { "epoch": 0.20834661481575928, "grad_norm": 0.163751482963562, "learning_rate": 0.002, "loss": 2.58, "step": 104580 }, { "epoch": 0.2083665370393982, "grad_norm": 0.16435852646827698, "learning_rate": 0.002, "loss": 2.5715, "step": 104590 }, { "epoch": 0.20838645926303712, "grad_norm": 0.20028375089168549, "learning_rate": 0.002, "loss": 2.5786, "step": 104600 }, { "epoch": 0.208406381486676, "grad_norm": 0.15516522526741028, "learning_rate": 0.002, "loss": 2.562, "step": 104610 }, { "epoch": 0.20842630371031493, "grad_norm": 0.15474355220794678, "learning_rate": 0.002, "loss": 2.5667, "step": 104620 }, { "epoch": 0.20844622593395384, "grad_norm": 0.1622052639722824, "learning_rate": 0.002, "loss": 2.5566, "step": 104630 }, { "epoch": 0.20846614815759276, "grad_norm": 0.1724308878183365, "learning_rate": 0.002, "loss": 2.5566, "step": 104640 }, { "epoch": 0.20848607038123168, "grad_norm": 0.15749363601207733, "learning_rate": 0.002, "loss": 2.5663, "step": 104650 }, { "epoch": 0.20850599260487057, "grad_norm": 0.16324934363365173, "learning_rate": 0.002, "loss": 2.5699, "step": 104660 }, { "epoch": 0.2085259148285095, "grad_norm": 0.15052703022956848, "learning_rate": 0.002, "loss": 2.5701, "step": 104670 }, { "epoch": 0.2085458370521484, "grad_norm": 0.18469418585300446, "learning_rate": 0.002, "loss": 2.5728, "step": 104680 }, { "epoch": 0.20856575927578733, "grad_norm": 0.19662927091121674, "learning_rate": 0.002, "loss": 2.5658, "step": 104690 }, { "epoch": 0.20858568149942625, "grad_norm": 0.16394709050655365, "learning_rate": 0.002, "loss": 2.5746, "step": 104700 }, { "epoch": 0.20860560372306516, "grad_norm": 0.19894030690193176, "learning_rate": 0.002, "loss": 2.5746, "step": 104710 }, { "epoch": 0.20862552594670405, "grad_norm": 0.15941545367240906, "learning_rate": 0.002, "loss": 2.5741, "step": 104720 }, { "epoch": 0.20864544817034297, "grad_norm": 0.17403629422187805, "learning_rate": 0.002, "loss": 2.5522, "step": 104730 }, { "epoch": 0.2086653703939819, "grad_norm": 0.16572368144989014, "learning_rate": 0.002, "loss": 2.5701, "step": 104740 }, { "epoch": 0.2086852926176208, "grad_norm": 0.16886001825332642, "learning_rate": 0.002, "loss": 2.5576, "step": 104750 }, { "epoch": 0.20870521484125973, "grad_norm": 0.1725902557373047, "learning_rate": 0.002, "loss": 2.5669, "step": 104760 }, { "epoch": 0.20872513706489865, "grad_norm": 0.1848624050617218, "learning_rate": 0.002, "loss": 2.5564, "step": 104770 }, { "epoch": 0.20874505928853754, "grad_norm": 0.14661993086338043, "learning_rate": 0.002, "loss": 2.5556, "step": 104780 }, { "epoch": 0.20876498151217646, "grad_norm": 0.16212287545204163, "learning_rate": 0.002, "loss": 2.5671, "step": 104790 }, { "epoch": 0.20878490373581537, "grad_norm": 0.22422297298908234, "learning_rate": 0.002, "loss": 2.5702, "step": 104800 }, { "epoch": 0.2088048259594543, "grad_norm": 0.1649848073720932, "learning_rate": 0.002, "loss": 2.5636, "step": 104810 }, { "epoch": 0.2088247481830932, "grad_norm": 0.14401893317699432, "learning_rate": 0.002, "loss": 2.5733, "step": 104820 }, { "epoch": 0.20884467040673213, "grad_norm": 0.16308659315109253, "learning_rate": 0.002, "loss": 2.5703, "step": 104830 }, { "epoch": 0.20886459263037102, "grad_norm": 0.14721138775348663, "learning_rate": 0.002, "loss": 2.5629, "step": 104840 }, { "epoch": 0.20888451485400994, "grad_norm": 0.1709248423576355, "learning_rate": 0.002, "loss": 2.5819, "step": 104850 }, { "epoch": 0.20890443707764886, "grad_norm": 0.1725451648235321, "learning_rate": 0.002, "loss": 2.5669, "step": 104860 }, { "epoch": 0.20892435930128778, "grad_norm": 0.17858876287937164, "learning_rate": 0.002, "loss": 2.5629, "step": 104870 }, { "epoch": 0.2089442815249267, "grad_norm": 0.1739012748003006, "learning_rate": 0.002, "loss": 2.5663, "step": 104880 }, { "epoch": 0.2089642037485656, "grad_norm": 0.17125654220581055, "learning_rate": 0.002, "loss": 2.5444, "step": 104890 }, { "epoch": 0.2089841259722045, "grad_norm": 0.18691352009773254, "learning_rate": 0.002, "loss": 2.5545, "step": 104900 }, { "epoch": 0.20900404819584342, "grad_norm": 0.17743989825248718, "learning_rate": 0.002, "loss": 2.5751, "step": 104910 }, { "epoch": 0.20902397041948234, "grad_norm": 0.13838405907154083, "learning_rate": 0.002, "loss": 2.5586, "step": 104920 }, { "epoch": 0.20904389264312126, "grad_norm": 0.1628706157207489, "learning_rate": 0.002, "loss": 2.5663, "step": 104930 }, { "epoch": 0.20906381486676018, "grad_norm": 0.17304861545562744, "learning_rate": 0.002, "loss": 2.5702, "step": 104940 }, { "epoch": 0.20908373709039907, "grad_norm": 0.17425566911697388, "learning_rate": 0.002, "loss": 2.5688, "step": 104950 }, { "epoch": 0.209103659314038, "grad_norm": 0.17563045024871826, "learning_rate": 0.002, "loss": 2.5813, "step": 104960 }, { "epoch": 0.2091235815376769, "grad_norm": 0.15019381046295166, "learning_rate": 0.002, "loss": 2.5692, "step": 104970 }, { "epoch": 0.20914350376131582, "grad_norm": 0.19236630201339722, "learning_rate": 0.002, "loss": 2.571, "step": 104980 }, { "epoch": 0.20916342598495474, "grad_norm": 0.14591039717197418, "learning_rate": 0.002, "loss": 2.5559, "step": 104990 }, { "epoch": 0.20918334820859366, "grad_norm": 0.17275206744670868, "learning_rate": 0.002, "loss": 2.5659, "step": 105000 }, { "epoch": 0.20920327043223255, "grad_norm": 0.14251303672790527, "learning_rate": 0.002, "loss": 2.5763, "step": 105010 }, { "epoch": 0.20922319265587147, "grad_norm": 0.1542084664106369, "learning_rate": 0.002, "loss": 2.5562, "step": 105020 }, { "epoch": 0.2092431148795104, "grad_norm": 0.1852869987487793, "learning_rate": 0.002, "loss": 2.5651, "step": 105030 }, { "epoch": 0.2092630371031493, "grad_norm": 0.1584002673625946, "learning_rate": 0.002, "loss": 2.5697, "step": 105040 }, { "epoch": 0.20928295932678823, "grad_norm": 0.17564977705478668, "learning_rate": 0.002, "loss": 2.571, "step": 105050 }, { "epoch": 0.20930288155042714, "grad_norm": 0.1530701071023941, "learning_rate": 0.002, "loss": 2.5726, "step": 105060 }, { "epoch": 0.20932280377406604, "grad_norm": 0.14132621884346008, "learning_rate": 0.002, "loss": 2.5627, "step": 105070 }, { "epoch": 0.20934272599770495, "grad_norm": 0.2071804404258728, "learning_rate": 0.002, "loss": 2.571, "step": 105080 }, { "epoch": 0.20936264822134387, "grad_norm": 0.1646510362625122, "learning_rate": 0.002, "loss": 2.5683, "step": 105090 }, { "epoch": 0.2093825704449828, "grad_norm": 0.1755818873643875, "learning_rate": 0.002, "loss": 2.5639, "step": 105100 }, { "epoch": 0.2094024926686217, "grad_norm": 0.16973115503787994, "learning_rate": 0.002, "loss": 2.5704, "step": 105110 }, { "epoch": 0.20942241489226063, "grad_norm": 0.15782952308654785, "learning_rate": 0.002, "loss": 2.5569, "step": 105120 }, { "epoch": 0.20944233711589952, "grad_norm": 0.1533195525407791, "learning_rate": 0.002, "loss": 2.5703, "step": 105130 }, { "epoch": 0.20946225933953844, "grad_norm": 0.14569136500358582, "learning_rate": 0.002, "loss": 2.5678, "step": 105140 }, { "epoch": 0.20948218156317736, "grad_norm": 0.16598543524742126, "learning_rate": 0.002, "loss": 2.5602, "step": 105150 }, { "epoch": 0.20950210378681627, "grad_norm": 0.18554657697677612, "learning_rate": 0.002, "loss": 2.5475, "step": 105160 }, { "epoch": 0.2095220260104552, "grad_norm": 0.17050446569919586, "learning_rate": 0.002, "loss": 2.5726, "step": 105170 }, { "epoch": 0.20954194823409408, "grad_norm": 0.2079896330833435, "learning_rate": 0.002, "loss": 2.5562, "step": 105180 }, { "epoch": 0.209561870457733, "grad_norm": 0.13344204425811768, "learning_rate": 0.002, "loss": 2.5643, "step": 105190 }, { "epoch": 0.20958179268137192, "grad_norm": 0.18200427293777466, "learning_rate": 0.002, "loss": 2.5648, "step": 105200 }, { "epoch": 0.20960171490501084, "grad_norm": 0.14950112998485565, "learning_rate": 0.002, "loss": 2.5552, "step": 105210 }, { "epoch": 0.20962163712864976, "grad_norm": 0.19938243925571442, "learning_rate": 0.002, "loss": 2.5778, "step": 105220 }, { "epoch": 0.20964155935228868, "grad_norm": 0.18014094233512878, "learning_rate": 0.002, "loss": 2.5752, "step": 105230 }, { "epoch": 0.20966148157592757, "grad_norm": 0.15897640585899353, "learning_rate": 0.002, "loss": 2.5736, "step": 105240 }, { "epoch": 0.20968140379956648, "grad_norm": 0.17217504978179932, "learning_rate": 0.002, "loss": 2.57, "step": 105250 }, { "epoch": 0.2097013260232054, "grad_norm": 0.15068256855010986, "learning_rate": 0.002, "loss": 2.5642, "step": 105260 }, { "epoch": 0.20972124824684432, "grad_norm": 0.18700221180915833, "learning_rate": 0.002, "loss": 2.5575, "step": 105270 }, { "epoch": 0.20974117047048324, "grad_norm": 0.1530480682849884, "learning_rate": 0.002, "loss": 2.5615, "step": 105280 }, { "epoch": 0.20976109269412216, "grad_norm": 0.19162560999393463, "learning_rate": 0.002, "loss": 2.5604, "step": 105290 }, { "epoch": 0.20978101491776105, "grad_norm": 0.20147523283958435, "learning_rate": 0.002, "loss": 2.5644, "step": 105300 }, { "epoch": 0.20980093714139997, "grad_norm": 0.154296413064003, "learning_rate": 0.002, "loss": 2.5659, "step": 105310 }, { "epoch": 0.2098208593650389, "grad_norm": 0.15678387880325317, "learning_rate": 0.002, "loss": 2.56, "step": 105320 }, { "epoch": 0.2098407815886778, "grad_norm": 0.16015951335430145, "learning_rate": 0.002, "loss": 2.5403, "step": 105330 }, { "epoch": 0.20986070381231672, "grad_norm": 0.14557677507400513, "learning_rate": 0.002, "loss": 2.5585, "step": 105340 }, { "epoch": 0.20988062603595564, "grad_norm": 0.23022174835205078, "learning_rate": 0.002, "loss": 2.5711, "step": 105350 }, { "epoch": 0.20990054825959453, "grad_norm": 0.1483049988746643, "learning_rate": 0.002, "loss": 2.58, "step": 105360 }, { "epoch": 0.20992047048323345, "grad_norm": 0.16949032247066498, "learning_rate": 0.002, "loss": 2.5606, "step": 105370 }, { "epoch": 0.20994039270687237, "grad_norm": 0.15196941792964935, "learning_rate": 0.002, "loss": 2.5672, "step": 105380 }, { "epoch": 0.2099603149305113, "grad_norm": 0.15060120820999146, "learning_rate": 0.002, "loss": 2.5579, "step": 105390 }, { "epoch": 0.2099802371541502, "grad_norm": 0.20842042565345764, "learning_rate": 0.002, "loss": 2.5653, "step": 105400 }, { "epoch": 0.21000015937778913, "grad_norm": 0.15706339478492737, "learning_rate": 0.002, "loss": 2.5586, "step": 105410 }, { "epoch": 0.21002008160142802, "grad_norm": 0.14400307834148407, "learning_rate": 0.002, "loss": 2.5771, "step": 105420 }, { "epoch": 0.21004000382506693, "grad_norm": 0.1523948460817337, "learning_rate": 0.002, "loss": 2.5682, "step": 105430 }, { "epoch": 0.21005992604870585, "grad_norm": 0.19596268236637115, "learning_rate": 0.002, "loss": 2.5764, "step": 105440 }, { "epoch": 0.21007984827234477, "grad_norm": 0.1667804718017578, "learning_rate": 0.002, "loss": 2.5526, "step": 105450 }, { "epoch": 0.2100997704959837, "grad_norm": 0.16567516326904297, "learning_rate": 0.002, "loss": 2.5732, "step": 105460 }, { "epoch": 0.21011969271962258, "grad_norm": 0.17419202625751495, "learning_rate": 0.002, "loss": 2.5593, "step": 105470 }, { "epoch": 0.2101396149432615, "grad_norm": 0.1768644005060196, "learning_rate": 0.002, "loss": 2.5733, "step": 105480 }, { "epoch": 0.21015953716690042, "grad_norm": 0.1479852944612503, "learning_rate": 0.002, "loss": 2.5739, "step": 105490 }, { "epoch": 0.21017945939053934, "grad_norm": 0.17376765608787537, "learning_rate": 0.002, "loss": 2.5705, "step": 105500 }, { "epoch": 0.21019938161417825, "grad_norm": 0.17031246423721313, "learning_rate": 0.002, "loss": 2.5713, "step": 105510 }, { "epoch": 0.21021930383781717, "grad_norm": 0.1531708985567093, "learning_rate": 0.002, "loss": 2.5601, "step": 105520 }, { "epoch": 0.21023922606145606, "grad_norm": 0.16283683478832245, "learning_rate": 0.002, "loss": 2.5698, "step": 105530 }, { "epoch": 0.21025914828509498, "grad_norm": 0.16463477909564972, "learning_rate": 0.002, "loss": 2.5712, "step": 105540 }, { "epoch": 0.2102790705087339, "grad_norm": 0.167402446269989, "learning_rate": 0.002, "loss": 2.5686, "step": 105550 }, { "epoch": 0.21029899273237282, "grad_norm": 0.1471744179725647, "learning_rate": 0.002, "loss": 2.5732, "step": 105560 }, { "epoch": 0.21031891495601174, "grad_norm": 0.1729823350906372, "learning_rate": 0.002, "loss": 2.5596, "step": 105570 }, { "epoch": 0.21033883717965066, "grad_norm": 0.16651910543441772, "learning_rate": 0.002, "loss": 2.5802, "step": 105580 }, { "epoch": 0.21035875940328955, "grad_norm": 0.18458063900470734, "learning_rate": 0.002, "loss": 2.5747, "step": 105590 }, { "epoch": 0.21037868162692847, "grad_norm": 0.17854563891887665, "learning_rate": 0.002, "loss": 2.5697, "step": 105600 }, { "epoch": 0.21039860385056738, "grad_norm": 0.14881210029125214, "learning_rate": 0.002, "loss": 2.5646, "step": 105610 }, { "epoch": 0.2104185260742063, "grad_norm": 0.18282461166381836, "learning_rate": 0.002, "loss": 2.5538, "step": 105620 }, { "epoch": 0.21043844829784522, "grad_norm": 0.1694033145904541, "learning_rate": 0.002, "loss": 2.5635, "step": 105630 }, { "epoch": 0.21045837052148414, "grad_norm": 0.17791882157325745, "learning_rate": 0.002, "loss": 2.5762, "step": 105640 }, { "epoch": 0.21047829274512303, "grad_norm": 0.14359073340892792, "learning_rate": 0.002, "loss": 2.5746, "step": 105650 }, { "epoch": 0.21049821496876195, "grad_norm": 0.1773776412010193, "learning_rate": 0.002, "loss": 2.5703, "step": 105660 }, { "epoch": 0.21051813719240087, "grad_norm": 0.17023172974586487, "learning_rate": 0.002, "loss": 2.5796, "step": 105670 }, { "epoch": 0.21053805941603979, "grad_norm": 0.2204558402299881, "learning_rate": 0.002, "loss": 2.5519, "step": 105680 }, { "epoch": 0.2105579816396787, "grad_norm": 0.17496897280216217, "learning_rate": 0.002, "loss": 2.5787, "step": 105690 }, { "epoch": 0.2105779038633176, "grad_norm": 0.1379285752773285, "learning_rate": 0.002, "loss": 2.5592, "step": 105700 }, { "epoch": 0.2105978260869565, "grad_norm": 0.1588941067457199, "learning_rate": 0.002, "loss": 2.5627, "step": 105710 }, { "epoch": 0.21061774831059543, "grad_norm": 0.18973492085933685, "learning_rate": 0.002, "loss": 2.5792, "step": 105720 }, { "epoch": 0.21063767053423435, "grad_norm": 0.1399322897195816, "learning_rate": 0.002, "loss": 2.5373, "step": 105730 }, { "epoch": 0.21065759275787327, "grad_norm": 0.1364157795906067, "learning_rate": 0.002, "loss": 2.5587, "step": 105740 }, { "epoch": 0.2106775149815122, "grad_norm": 0.16142523288726807, "learning_rate": 0.002, "loss": 2.5596, "step": 105750 }, { "epoch": 0.21069743720515108, "grad_norm": 0.1547536998987198, "learning_rate": 0.002, "loss": 2.5725, "step": 105760 }, { "epoch": 0.21071735942879, "grad_norm": 0.15923623740673065, "learning_rate": 0.002, "loss": 2.5694, "step": 105770 }, { "epoch": 0.21073728165242891, "grad_norm": 0.15477268397808075, "learning_rate": 0.002, "loss": 2.5525, "step": 105780 }, { "epoch": 0.21075720387606783, "grad_norm": 0.17607736587524414, "learning_rate": 0.002, "loss": 2.5648, "step": 105790 }, { "epoch": 0.21077712609970675, "grad_norm": 0.1629815548658371, "learning_rate": 0.002, "loss": 2.5624, "step": 105800 }, { "epoch": 0.21079704832334567, "grad_norm": 0.14183612167835236, "learning_rate": 0.002, "loss": 2.5627, "step": 105810 }, { "epoch": 0.21081697054698456, "grad_norm": 0.17611710727214813, "learning_rate": 0.002, "loss": 2.5645, "step": 105820 }, { "epoch": 0.21083689277062348, "grad_norm": 0.18959522247314453, "learning_rate": 0.002, "loss": 2.5708, "step": 105830 }, { "epoch": 0.2108568149942624, "grad_norm": 0.15336866676807404, "learning_rate": 0.002, "loss": 2.5713, "step": 105840 }, { "epoch": 0.21087673721790132, "grad_norm": 0.1482386440038681, "learning_rate": 0.002, "loss": 2.5615, "step": 105850 }, { "epoch": 0.21089665944154024, "grad_norm": 0.17919744551181793, "learning_rate": 0.002, "loss": 2.5638, "step": 105860 }, { "epoch": 0.21091658166517915, "grad_norm": 0.16670013964176178, "learning_rate": 0.002, "loss": 2.5563, "step": 105870 }, { "epoch": 0.21093650388881804, "grad_norm": 0.19569531083106995, "learning_rate": 0.002, "loss": 2.5567, "step": 105880 }, { "epoch": 0.21095642611245696, "grad_norm": 0.16584976017475128, "learning_rate": 0.002, "loss": 2.5764, "step": 105890 }, { "epoch": 0.21097634833609588, "grad_norm": 0.16388478875160217, "learning_rate": 0.002, "loss": 2.5738, "step": 105900 }, { "epoch": 0.2109962705597348, "grad_norm": 0.15277884900569916, "learning_rate": 0.002, "loss": 2.5799, "step": 105910 }, { "epoch": 0.21101619278337372, "grad_norm": 0.15655598044395447, "learning_rate": 0.002, "loss": 2.5643, "step": 105920 }, { "epoch": 0.2110361150070126, "grad_norm": 0.17720939218997955, "learning_rate": 0.002, "loss": 2.566, "step": 105930 }, { "epoch": 0.21105603723065153, "grad_norm": 0.18317225575447083, "learning_rate": 0.002, "loss": 2.5521, "step": 105940 }, { "epoch": 0.21107595945429045, "grad_norm": 0.14563648402690887, "learning_rate": 0.002, "loss": 2.5527, "step": 105950 }, { "epoch": 0.21109588167792936, "grad_norm": 0.21841198205947876, "learning_rate": 0.002, "loss": 2.5721, "step": 105960 }, { "epoch": 0.21111580390156828, "grad_norm": 0.17374397814273834, "learning_rate": 0.002, "loss": 2.5678, "step": 105970 }, { "epoch": 0.2111357261252072, "grad_norm": 0.15784145891666412, "learning_rate": 0.002, "loss": 2.5726, "step": 105980 }, { "epoch": 0.2111556483488461, "grad_norm": 0.16238191723823547, "learning_rate": 0.002, "loss": 2.563, "step": 105990 }, { "epoch": 0.211175570572485, "grad_norm": 0.1911279708147049, "learning_rate": 0.002, "loss": 2.5728, "step": 106000 }, { "epoch": 0.21119549279612393, "grad_norm": 0.16467814147472382, "learning_rate": 0.002, "loss": 2.5632, "step": 106010 }, { "epoch": 0.21121541501976285, "grad_norm": 0.17195504903793335, "learning_rate": 0.002, "loss": 2.5609, "step": 106020 }, { "epoch": 0.21123533724340177, "grad_norm": 0.16039367020130157, "learning_rate": 0.002, "loss": 2.5771, "step": 106030 }, { "epoch": 0.21125525946704068, "grad_norm": 0.1553913950920105, "learning_rate": 0.002, "loss": 2.5598, "step": 106040 }, { "epoch": 0.21127518169067958, "grad_norm": 0.15579354763031006, "learning_rate": 0.002, "loss": 2.5791, "step": 106050 }, { "epoch": 0.2112951039143185, "grad_norm": 0.13662847876548767, "learning_rate": 0.002, "loss": 2.5635, "step": 106060 }, { "epoch": 0.2113150261379574, "grad_norm": 0.15915021300315857, "learning_rate": 0.002, "loss": 2.5734, "step": 106070 }, { "epoch": 0.21133494836159633, "grad_norm": 0.17267131805419922, "learning_rate": 0.002, "loss": 2.5678, "step": 106080 }, { "epoch": 0.21135487058523525, "grad_norm": 0.1856832206249237, "learning_rate": 0.002, "loss": 2.5644, "step": 106090 }, { "epoch": 0.21137479280887417, "grad_norm": 0.19529256224632263, "learning_rate": 0.002, "loss": 2.5738, "step": 106100 }, { "epoch": 0.21139471503251306, "grad_norm": 0.13942040503025055, "learning_rate": 0.002, "loss": 2.5711, "step": 106110 }, { "epoch": 0.21141463725615198, "grad_norm": 0.15758417546749115, "learning_rate": 0.002, "loss": 2.5645, "step": 106120 }, { "epoch": 0.2114345594797909, "grad_norm": 0.15577535331249237, "learning_rate": 0.002, "loss": 2.5692, "step": 106130 }, { "epoch": 0.21145448170342981, "grad_norm": 0.1713103950023651, "learning_rate": 0.002, "loss": 2.5802, "step": 106140 }, { "epoch": 0.21147440392706873, "grad_norm": 0.21729208528995514, "learning_rate": 0.002, "loss": 2.5635, "step": 106150 }, { "epoch": 0.21149432615070765, "grad_norm": 0.15694460272789001, "learning_rate": 0.002, "loss": 2.5674, "step": 106160 }, { "epoch": 0.21151424837434654, "grad_norm": 0.15167392790317535, "learning_rate": 0.002, "loss": 2.5622, "step": 106170 }, { "epoch": 0.21153417059798546, "grad_norm": 0.22632525861263275, "learning_rate": 0.002, "loss": 2.5708, "step": 106180 }, { "epoch": 0.21155409282162438, "grad_norm": 0.17617402970790863, "learning_rate": 0.002, "loss": 2.5716, "step": 106190 }, { "epoch": 0.2115740150452633, "grad_norm": 0.14645829796791077, "learning_rate": 0.002, "loss": 2.5631, "step": 106200 }, { "epoch": 0.21159393726890222, "grad_norm": 0.1748650074005127, "learning_rate": 0.002, "loss": 2.5726, "step": 106210 }, { "epoch": 0.2116138594925411, "grad_norm": 0.16777856647968292, "learning_rate": 0.002, "loss": 2.5591, "step": 106220 }, { "epoch": 0.21163378171618002, "grad_norm": 0.15706215798854828, "learning_rate": 0.002, "loss": 2.559, "step": 106230 }, { "epoch": 0.21165370393981894, "grad_norm": 0.15399371087551117, "learning_rate": 0.002, "loss": 2.5645, "step": 106240 }, { "epoch": 0.21167362616345786, "grad_norm": 0.18908758461475372, "learning_rate": 0.002, "loss": 2.5571, "step": 106250 }, { "epoch": 0.21169354838709678, "grad_norm": 0.1689993441104889, "learning_rate": 0.002, "loss": 2.5677, "step": 106260 }, { "epoch": 0.2117134706107357, "grad_norm": 0.16068851947784424, "learning_rate": 0.002, "loss": 2.5827, "step": 106270 }, { "epoch": 0.2117333928343746, "grad_norm": 0.1637958139181137, "learning_rate": 0.002, "loss": 2.5547, "step": 106280 }, { "epoch": 0.2117533150580135, "grad_norm": 0.1994314044713974, "learning_rate": 0.002, "loss": 2.5768, "step": 106290 }, { "epoch": 0.21177323728165243, "grad_norm": 0.1574186533689499, "learning_rate": 0.002, "loss": 2.5788, "step": 106300 }, { "epoch": 0.21179315950529135, "grad_norm": 0.1776430457830429, "learning_rate": 0.002, "loss": 2.5609, "step": 106310 }, { "epoch": 0.21181308172893026, "grad_norm": 0.19306713342666626, "learning_rate": 0.002, "loss": 2.5683, "step": 106320 }, { "epoch": 0.21183300395256918, "grad_norm": 0.17401346564292908, "learning_rate": 0.002, "loss": 2.5641, "step": 106330 }, { "epoch": 0.21185292617620807, "grad_norm": 0.1593584567308426, "learning_rate": 0.002, "loss": 2.5725, "step": 106340 }, { "epoch": 0.211872848399847, "grad_norm": 0.18758299946784973, "learning_rate": 0.002, "loss": 2.5787, "step": 106350 }, { "epoch": 0.2118927706234859, "grad_norm": 0.16622814536094666, "learning_rate": 0.002, "loss": 2.5564, "step": 106360 }, { "epoch": 0.21191269284712483, "grad_norm": 0.15272866189479828, "learning_rate": 0.002, "loss": 2.5618, "step": 106370 }, { "epoch": 0.21193261507076375, "grad_norm": 0.2128344625234604, "learning_rate": 0.002, "loss": 2.5811, "step": 106380 }, { "epoch": 0.21195253729440267, "grad_norm": 0.17057056725025177, "learning_rate": 0.002, "loss": 2.5622, "step": 106390 }, { "epoch": 0.21197245951804156, "grad_norm": 0.1731570065021515, "learning_rate": 0.002, "loss": 2.5656, "step": 106400 }, { "epoch": 0.21199238174168047, "grad_norm": 0.17431385815143585, "learning_rate": 0.002, "loss": 2.5796, "step": 106410 }, { "epoch": 0.2120123039653194, "grad_norm": 0.1493910849094391, "learning_rate": 0.002, "loss": 2.5523, "step": 106420 }, { "epoch": 0.2120322261889583, "grad_norm": 0.14833498001098633, "learning_rate": 0.002, "loss": 2.5653, "step": 106430 }, { "epoch": 0.21205214841259723, "grad_norm": 0.16503161191940308, "learning_rate": 0.002, "loss": 2.5713, "step": 106440 }, { "epoch": 0.21207207063623612, "grad_norm": 0.1624317616224289, "learning_rate": 0.002, "loss": 2.5611, "step": 106450 }, { "epoch": 0.21209199285987504, "grad_norm": 0.1959758698940277, "learning_rate": 0.002, "loss": 2.5652, "step": 106460 }, { "epoch": 0.21211191508351396, "grad_norm": 0.172500878572464, "learning_rate": 0.002, "loss": 2.56, "step": 106470 }, { "epoch": 0.21213183730715288, "grad_norm": 0.18936070799827576, "learning_rate": 0.002, "loss": 2.5657, "step": 106480 }, { "epoch": 0.2121517595307918, "grad_norm": 0.15095949172973633, "learning_rate": 0.002, "loss": 2.5627, "step": 106490 }, { "epoch": 0.2121716817544307, "grad_norm": 0.1834898293018341, "learning_rate": 0.002, "loss": 2.5543, "step": 106500 }, { "epoch": 0.2121916039780696, "grad_norm": 0.17028120160102844, "learning_rate": 0.002, "loss": 2.5578, "step": 106510 }, { "epoch": 0.21221152620170852, "grad_norm": 0.1594310998916626, "learning_rate": 0.002, "loss": 2.5887, "step": 106520 }, { "epoch": 0.21223144842534744, "grad_norm": 0.1876022070646286, "learning_rate": 0.002, "loss": 2.5648, "step": 106530 }, { "epoch": 0.21225137064898636, "grad_norm": 0.14298184216022491, "learning_rate": 0.002, "loss": 2.5538, "step": 106540 }, { "epoch": 0.21227129287262528, "grad_norm": 0.19056783616542816, "learning_rate": 0.002, "loss": 2.5547, "step": 106550 }, { "epoch": 0.2122912150962642, "grad_norm": 0.20147745311260223, "learning_rate": 0.002, "loss": 2.5797, "step": 106560 }, { "epoch": 0.2123111373199031, "grad_norm": 0.17809323966503143, "learning_rate": 0.002, "loss": 2.5668, "step": 106570 }, { "epoch": 0.212331059543542, "grad_norm": 0.17167851328849792, "learning_rate": 0.002, "loss": 2.5729, "step": 106580 }, { "epoch": 0.21235098176718092, "grad_norm": 0.16371862590312958, "learning_rate": 0.002, "loss": 2.5467, "step": 106590 }, { "epoch": 0.21237090399081984, "grad_norm": 0.17998023331165314, "learning_rate": 0.002, "loss": 2.5705, "step": 106600 }, { "epoch": 0.21239082621445876, "grad_norm": 0.16395145654678345, "learning_rate": 0.002, "loss": 2.5572, "step": 106610 }, { "epoch": 0.21241074843809768, "grad_norm": 0.19379819929599762, "learning_rate": 0.002, "loss": 2.5769, "step": 106620 }, { "epoch": 0.21243067066173657, "grad_norm": 0.1538742631673813, "learning_rate": 0.002, "loss": 2.5737, "step": 106630 }, { "epoch": 0.2124505928853755, "grad_norm": 0.16486148536205292, "learning_rate": 0.002, "loss": 2.5983, "step": 106640 }, { "epoch": 0.2124705151090144, "grad_norm": 0.17628739774227142, "learning_rate": 0.002, "loss": 2.5612, "step": 106650 }, { "epoch": 0.21249043733265333, "grad_norm": 0.14856642484664917, "learning_rate": 0.002, "loss": 2.5642, "step": 106660 }, { "epoch": 0.21251035955629224, "grad_norm": 0.16959905624389648, "learning_rate": 0.002, "loss": 2.5659, "step": 106670 }, { "epoch": 0.21253028177993114, "grad_norm": 0.19059640169143677, "learning_rate": 0.002, "loss": 2.5661, "step": 106680 }, { "epoch": 0.21255020400357005, "grad_norm": 0.1646786481142044, "learning_rate": 0.002, "loss": 2.5827, "step": 106690 }, { "epoch": 0.21257012622720897, "grad_norm": 0.1514260321855545, "learning_rate": 0.002, "loss": 2.5778, "step": 106700 }, { "epoch": 0.2125900484508479, "grad_norm": 0.2129444181919098, "learning_rate": 0.002, "loss": 2.5552, "step": 106710 }, { "epoch": 0.2126099706744868, "grad_norm": 0.18783928453922272, "learning_rate": 0.002, "loss": 2.5645, "step": 106720 }, { "epoch": 0.21262989289812573, "grad_norm": 0.16277506947517395, "learning_rate": 0.002, "loss": 2.5806, "step": 106730 }, { "epoch": 0.21264981512176462, "grad_norm": 0.15552450716495514, "learning_rate": 0.002, "loss": 2.5801, "step": 106740 }, { "epoch": 0.21266973734540354, "grad_norm": 0.17860816419124603, "learning_rate": 0.002, "loss": 2.5673, "step": 106750 }, { "epoch": 0.21268965956904246, "grad_norm": 0.170627623796463, "learning_rate": 0.002, "loss": 2.5621, "step": 106760 }, { "epoch": 0.21270958179268137, "grad_norm": 0.1442793756723404, "learning_rate": 0.002, "loss": 2.5708, "step": 106770 }, { "epoch": 0.2127295040163203, "grad_norm": 0.19649305939674377, "learning_rate": 0.002, "loss": 2.5745, "step": 106780 }, { "epoch": 0.2127494262399592, "grad_norm": 0.19680386781692505, "learning_rate": 0.002, "loss": 2.5727, "step": 106790 }, { "epoch": 0.2127693484635981, "grad_norm": 0.1742323935031891, "learning_rate": 0.002, "loss": 2.5605, "step": 106800 }, { "epoch": 0.21278927068723702, "grad_norm": 0.1672305464744568, "learning_rate": 0.002, "loss": 2.5611, "step": 106810 }, { "epoch": 0.21280919291087594, "grad_norm": 0.16788990795612335, "learning_rate": 0.002, "loss": 2.5508, "step": 106820 }, { "epoch": 0.21282911513451486, "grad_norm": 0.13874518871307373, "learning_rate": 0.002, "loss": 2.5487, "step": 106830 }, { "epoch": 0.21284903735815378, "grad_norm": 0.15665259957313538, "learning_rate": 0.002, "loss": 2.5711, "step": 106840 }, { "epoch": 0.2128689595817927, "grad_norm": 0.17791405320167542, "learning_rate": 0.002, "loss": 2.5609, "step": 106850 }, { "epoch": 0.21288888180543158, "grad_norm": 0.15786448121070862, "learning_rate": 0.002, "loss": 2.569, "step": 106860 }, { "epoch": 0.2129088040290705, "grad_norm": 0.1503126621246338, "learning_rate": 0.002, "loss": 2.5623, "step": 106870 }, { "epoch": 0.21292872625270942, "grad_norm": 0.17440003156661987, "learning_rate": 0.002, "loss": 2.5622, "step": 106880 }, { "epoch": 0.21294864847634834, "grad_norm": 0.14767777919769287, "learning_rate": 0.002, "loss": 2.5688, "step": 106890 }, { "epoch": 0.21296857069998726, "grad_norm": 0.14375831186771393, "learning_rate": 0.002, "loss": 2.5818, "step": 106900 }, { "epoch": 0.21298849292362618, "grad_norm": 0.18990173935890198, "learning_rate": 0.002, "loss": 2.5673, "step": 106910 }, { "epoch": 0.21300841514726507, "grad_norm": 0.1439257264137268, "learning_rate": 0.002, "loss": 2.5586, "step": 106920 }, { "epoch": 0.213028337370904, "grad_norm": 0.18319492042064667, "learning_rate": 0.002, "loss": 2.578, "step": 106930 }, { "epoch": 0.2130482595945429, "grad_norm": 0.1636279821395874, "learning_rate": 0.002, "loss": 2.5694, "step": 106940 }, { "epoch": 0.21306818181818182, "grad_norm": 0.19757778942584991, "learning_rate": 0.002, "loss": 2.5645, "step": 106950 }, { "epoch": 0.21308810404182074, "grad_norm": 0.1726708859205246, "learning_rate": 0.002, "loss": 2.5643, "step": 106960 }, { "epoch": 0.21310802626545963, "grad_norm": 0.1335529237985611, "learning_rate": 0.002, "loss": 2.5623, "step": 106970 }, { "epoch": 0.21312794848909855, "grad_norm": 0.21886467933654785, "learning_rate": 0.002, "loss": 2.5632, "step": 106980 }, { "epoch": 0.21314787071273747, "grad_norm": 0.16225691139698029, "learning_rate": 0.002, "loss": 2.5644, "step": 106990 }, { "epoch": 0.2131677929363764, "grad_norm": 0.17395377159118652, "learning_rate": 0.002, "loss": 2.5648, "step": 107000 }, { "epoch": 0.2131877151600153, "grad_norm": 0.1610785573720932, "learning_rate": 0.002, "loss": 2.5748, "step": 107010 }, { "epoch": 0.21320763738365422, "grad_norm": 0.17618869245052338, "learning_rate": 0.002, "loss": 2.575, "step": 107020 }, { "epoch": 0.21322755960729312, "grad_norm": 0.16367444396018982, "learning_rate": 0.002, "loss": 2.5644, "step": 107030 }, { "epoch": 0.21324748183093203, "grad_norm": 0.16589491069316864, "learning_rate": 0.002, "loss": 2.5676, "step": 107040 }, { "epoch": 0.21326740405457095, "grad_norm": 0.16908690333366394, "learning_rate": 0.002, "loss": 2.5578, "step": 107050 }, { "epoch": 0.21328732627820987, "grad_norm": 0.17069436609745026, "learning_rate": 0.002, "loss": 2.5654, "step": 107060 }, { "epoch": 0.2133072485018488, "grad_norm": 0.16146965324878693, "learning_rate": 0.002, "loss": 2.5674, "step": 107070 }, { "epoch": 0.2133271707254877, "grad_norm": 0.17822259664535522, "learning_rate": 0.002, "loss": 2.5518, "step": 107080 }, { "epoch": 0.2133470929491266, "grad_norm": 0.17025083303451538, "learning_rate": 0.002, "loss": 2.5485, "step": 107090 }, { "epoch": 0.21336701517276552, "grad_norm": 0.17236028611660004, "learning_rate": 0.002, "loss": 2.5762, "step": 107100 }, { "epoch": 0.21338693739640444, "grad_norm": 0.14916865527629852, "learning_rate": 0.002, "loss": 2.5729, "step": 107110 }, { "epoch": 0.21340685962004335, "grad_norm": 0.17053654789924622, "learning_rate": 0.002, "loss": 2.5676, "step": 107120 }, { "epoch": 0.21342678184368227, "grad_norm": 0.1350255161523819, "learning_rate": 0.002, "loss": 2.5719, "step": 107130 }, { "epoch": 0.2134467040673212, "grad_norm": 0.18491195142269135, "learning_rate": 0.002, "loss": 2.5659, "step": 107140 }, { "epoch": 0.21346662629096008, "grad_norm": 0.17038707435131073, "learning_rate": 0.002, "loss": 2.5666, "step": 107150 }, { "epoch": 0.213486548514599, "grad_norm": 0.1555350124835968, "learning_rate": 0.002, "loss": 2.5761, "step": 107160 }, { "epoch": 0.21350647073823792, "grad_norm": 0.1583530157804489, "learning_rate": 0.002, "loss": 2.561, "step": 107170 }, { "epoch": 0.21352639296187684, "grad_norm": 0.1730995625257492, "learning_rate": 0.002, "loss": 2.5828, "step": 107180 }, { "epoch": 0.21354631518551576, "grad_norm": 0.1940915435552597, "learning_rate": 0.002, "loss": 2.5729, "step": 107190 }, { "epoch": 0.21356623740915465, "grad_norm": 0.16511492431163788, "learning_rate": 0.002, "loss": 2.5666, "step": 107200 }, { "epoch": 0.21358615963279357, "grad_norm": 0.1637299507856369, "learning_rate": 0.002, "loss": 2.5638, "step": 107210 }, { "epoch": 0.21360608185643248, "grad_norm": 0.16189594566822052, "learning_rate": 0.002, "loss": 2.5714, "step": 107220 }, { "epoch": 0.2136260040800714, "grad_norm": 0.17844432592391968, "learning_rate": 0.002, "loss": 2.5671, "step": 107230 }, { "epoch": 0.21364592630371032, "grad_norm": 0.17307400703430176, "learning_rate": 0.002, "loss": 2.5586, "step": 107240 }, { "epoch": 0.21366584852734924, "grad_norm": 0.15676096081733704, "learning_rate": 0.002, "loss": 2.5632, "step": 107250 }, { "epoch": 0.21368577075098813, "grad_norm": 0.1806281954050064, "learning_rate": 0.002, "loss": 2.5614, "step": 107260 }, { "epoch": 0.21370569297462705, "grad_norm": 0.16037914156913757, "learning_rate": 0.002, "loss": 2.5798, "step": 107270 }, { "epoch": 0.21372561519826597, "grad_norm": 0.155903160572052, "learning_rate": 0.002, "loss": 2.5785, "step": 107280 }, { "epoch": 0.21374553742190489, "grad_norm": 0.19630715250968933, "learning_rate": 0.002, "loss": 2.5714, "step": 107290 }, { "epoch": 0.2137654596455438, "grad_norm": 0.16838210821151733, "learning_rate": 0.002, "loss": 2.5656, "step": 107300 }, { "epoch": 0.21378538186918272, "grad_norm": 0.16841338574886322, "learning_rate": 0.002, "loss": 2.5598, "step": 107310 }, { "epoch": 0.2138053040928216, "grad_norm": 0.16775286197662354, "learning_rate": 0.002, "loss": 2.5699, "step": 107320 }, { "epoch": 0.21382522631646053, "grad_norm": 0.17213156819343567, "learning_rate": 0.002, "loss": 2.5696, "step": 107330 }, { "epoch": 0.21384514854009945, "grad_norm": 0.14916321635246277, "learning_rate": 0.002, "loss": 2.5542, "step": 107340 }, { "epoch": 0.21386507076373837, "grad_norm": 0.19721977412700653, "learning_rate": 0.002, "loss": 2.5755, "step": 107350 }, { "epoch": 0.2138849929873773, "grad_norm": 0.16817684471607208, "learning_rate": 0.002, "loss": 2.5503, "step": 107360 }, { "epoch": 0.2139049152110162, "grad_norm": 0.13909637928009033, "learning_rate": 0.002, "loss": 2.5583, "step": 107370 }, { "epoch": 0.2139248374346551, "grad_norm": 0.1763712763786316, "learning_rate": 0.002, "loss": 2.5778, "step": 107380 }, { "epoch": 0.21394475965829401, "grad_norm": 0.16320569813251495, "learning_rate": 0.002, "loss": 2.5692, "step": 107390 }, { "epoch": 0.21396468188193293, "grad_norm": 0.1592206060886383, "learning_rate": 0.002, "loss": 2.572, "step": 107400 }, { "epoch": 0.21398460410557185, "grad_norm": 0.18219996988773346, "learning_rate": 0.002, "loss": 2.5691, "step": 107410 }, { "epoch": 0.21400452632921077, "grad_norm": 0.158196359872818, "learning_rate": 0.002, "loss": 2.5601, "step": 107420 }, { "epoch": 0.2140244485528497, "grad_norm": 0.1667645275592804, "learning_rate": 0.002, "loss": 2.5571, "step": 107430 }, { "epoch": 0.21404437077648858, "grad_norm": 0.14380888640880585, "learning_rate": 0.002, "loss": 2.5553, "step": 107440 }, { "epoch": 0.2140642930001275, "grad_norm": 0.1652611345052719, "learning_rate": 0.002, "loss": 2.5728, "step": 107450 }, { "epoch": 0.21408421522376642, "grad_norm": 0.1779594123363495, "learning_rate": 0.002, "loss": 2.5653, "step": 107460 }, { "epoch": 0.21410413744740533, "grad_norm": 0.16959421336650848, "learning_rate": 0.002, "loss": 2.5801, "step": 107470 }, { "epoch": 0.21412405967104425, "grad_norm": 0.17332297563552856, "learning_rate": 0.002, "loss": 2.5654, "step": 107480 }, { "epoch": 0.21414398189468314, "grad_norm": 0.16774655878543854, "learning_rate": 0.002, "loss": 2.5753, "step": 107490 }, { "epoch": 0.21416390411832206, "grad_norm": 0.1514235883951187, "learning_rate": 0.002, "loss": 2.5595, "step": 107500 }, { "epoch": 0.21418382634196098, "grad_norm": 0.1705854833126068, "learning_rate": 0.002, "loss": 2.573, "step": 107510 }, { "epoch": 0.2142037485655999, "grad_norm": 0.20053744316101074, "learning_rate": 0.002, "loss": 2.5788, "step": 107520 }, { "epoch": 0.21422367078923882, "grad_norm": 0.16982166469097137, "learning_rate": 0.002, "loss": 2.5639, "step": 107530 }, { "epoch": 0.21424359301287774, "grad_norm": 0.17278344929218292, "learning_rate": 0.002, "loss": 2.5689, "step": 107540 }, { "epoch": 0.21426351523651663, "grad_norm": 0.1599215567111969, "learning_rate": 0.002, "loss": 2.5567, "step": 107550 }, { "epoch": 0.21428343746015555, "grad_norm": 0.17579235136508942, "learning_rate": 0.002, "loss": 2.59, "step": 107560 }, { "epoch": 0.21430335968379446, "grad_norm": 0.14977681636810303, "learning_rate": 0.002, "loss": 2.5668, "step": 107570 }, { "epoch": 0.21432328190743338, "grad_norm": 0.16630661487579346, "learning_rate": 0.002, "loss": 2.5643, "step": 107580 }, { "epoch": 0.2143432041310723, "grad_norm": 0.13973885774612427, "learning_rate": 0.002, "loss": 2.5769, "step": 107590 }, { "epoch": 0.21436312635471122, "grad_norm": 0.16867955029010773, "learning_rate": 0.002, "loss": 2.5652, "step": 107600 }, { "epoch": 0.2143830485783501, "grad_norm": 0.1557255983352661, "learning_rate": 0.002, "loss": 2.5751, "step": 107610 }, { "epoch": 0.21440297080198903, "grad_norm": 0.1550445556640625, "learning_rate": 0.002, "loss": 2.5554, "step": 107620 }, { "epoch": 0.21442289302562795, "grad_norm": 0.14207731187343597, "learning_rate": 0.002, "loss": 2.5609, "step": 107630 }, { "epoch": 0.21444281524926687, "grad_norm": 0.16484080255031586, "learning_rate": 0.002, "loss": 2.5729, "step": 107640 }, { "epoch": 0.21446273747290578, "grad_norm": 0.17460446059703827, "learning_rate": 0.002, "loss": 2.5721, "step": 107650 }, { "epoch": 0.2144826596965447, "grad_norm": 0.15579155087471008, "learning_rate": 0.002, "loss": 2.5496, "step": 107660 }, { "epoch": 0.2145025819201836, "grad_norm": 0.15317392349243164, "learning_rate": 0.002, "loss": 2.556, "step": 107670 }, { "epoch": 0.2145225041438225, "grad_norm": 0.17169904708862305, "learning_rate": 0.002, "loss": 2.5641, "step": 107680 }, { "epoch": 0.21454242636746143, "grad_norm": 0.1624254286289215, "learning_rate": 0.002, "loss": 2.554, "step": 107690 }, { "epoch": 0.21456234859110035, "grad_norm": 0.16062521934509277, "learning_rate": 0.002, "loss": 2.5612, "step": 107700 }, { "epoch": 0.21458227081473927, "grad_norm": 0.15118227899074554, "learning_rate": 0.002, "loss": 2.5672, "step": 107710 }, { "epoch": 0.21460219303837816, "grad_norm": 0.13404589891433716, "learning_rate": 0.002, "loss": 2.5581, "step": 107720 }, { "epoch": 0.21462211526201708, "grad_norm": 0.17424090206623077, "learning_rate": 0.002, "loss": 2.5648, "step": 107730 }, { "epoch": 0.214642037485656, "grad_norm": 0.1845293939113617, "learning_rate": 0.002, "loss": 2.58, "step": 107740 }, { "epoch": 0.21466195970929491, "grad_norm": 0.16788052022457123, "learning_rate": 0.002, "loss": 2.5641, "step": 107750 }, { "epoch": 0.21468188193293383, "grad_norm": 0.1470147967338562, "learning_rate": 0.002, "loss": 2.5658, "step": 107760 }, { "epoch": 0.21470180415657275, "grad_norm": 0.15350323915481567, "learning_rate": 0.002, "loss": 2.5619, "step": 107770 }, { "epoch": 0.21472172638021164, "grad_norm": 0.14536455273628235, "learning_rate": 0.002, "loss": 2.5614, "step": 107780 }, { "epoch": 0.21474164860385056, "grad_norm": 0.19229315221309662, "learning_rate": 0.002, "loss": 2.5734, "step": 107790 }, { "epoch": 0.21476157082748948, "grad_norm": 0.1430317759513855, "learning_rate": 0.002, "loss": 2.5671, "step": 107800 }, { "epoch": 0.2147814930511284, "grad_norm": 0.1610838919878006, "learning_rate": 0.002, "loss": 2.58, "step": 107810 }, { "epoch": 0.21480141527476732, "grad_norm": 0.1819777637720108, "learning_rate": 0.002, "loss": 2.5557, "step": 107820 }, { "epoch": 0.21482133749840623, "grad_norm": 0.17046961188316345, "learning_rate": 0.002, "loss": 2.5545, "step": 107830 }, { "epoch": 0.21484125972204512, "grad_norm": 0.1563551127910614, "learning_rate": 0.002, "loss": 2.5702, "step": 107840 }, { "epoch": 0.21486118194568404, "grad_norm": 0.17386789619922638, "learning_rate": 0.002, "loss": 2.579, "step": 107850 }, { "epoch": 0.21488110416932296, "grad_norm": 0.17366600036621094, "learning_rate": 0.002, "loss": 2.5687, "step": 107860 }, { "epoch": 0.21490102639296188, "grad_norm": 0.16374337673187256, "learning_rate": 0.002, "loss": 2.5674, "step": 107870 }, { "epoch": 0.2149209486166008, "grad_norm": 0.17925168573856354, "learning_rate": 0.002, "loss": 2.5571, "step": 107880 }, { "epoch": 0.21494087084023972, "grad_norm": 0.15387077629566193, "learning_rate": 0.002, "loss": 2.5716, "step": 107890 }, { "epoch": 0.2149607930638786, "grad_norm": 0.165199875831604, "learning_rate": 0.002, "loss": 2.5597, "step": 107900 }, { "epoch": 0.21498071528751753, "grad_norm": 0.15176112949848175, "learning_rate": 0.002, "loss": 2.5644, "step": 107910 }, { "epoch": 0.21500063751115644, "grad_norm": 0.17482568323612213, "learning_rate": 0.002, "loss": 2.561, "step": 107920 }, { "epoch": 0.21502055973479536, "grad_norm": 0.1599978804588318, "learning_rate": 0.002, "loss": 2.5638, "step": 107930 }, { "epoch": 0.21504048195843428, "grad_norm": 0.14764821529388428, "learning_rate": 0.002, "loss": 2.5586, "step": 107940 }, { "epoch": 0.21506040418207317, "grad_norm": 0.1902785450220108, "learning_rate": 0.002, "loss": 2.566, "step": 107950 }, { "epoch": 0.2150803264057121, "grad_norm": 0.16458527743816376, "learning_rate": 0.002, "loss": 2.5706, "step": 107960 }, { "epoch": 0.215100248629351, "grad_norm": 0.1282818466424942, "learning_rate": 0.002, "loss": 2.5709, "step": 107970 }, { "epoch": 0.21512017085298993, "grad_norm": 0.1753414124250412, "learning_rate": 0.002, "loss": 2.5841, "step": 107980 }, { "epoch": 0.21514009307662885, "grad_norm": 0.15143829584121704, "learning_rate": 0.002, "loss": 2.5654, "step": 107990 }, { "epoch": 0.21516001530026777, "grad_norm": 0.163518026471138, "learning_rate": 0.002, "loss": 2.5792, "step": 108000 }, { "epoch": 0.21517993752390666, "grad_norm": 0.1783466339111328, "learning_rate": 0.002, "loss": 2.5744, "step": 108010 }, { "epoch": 0.21519985974754557, "grad_norm": 0.1525169461965561, "learning_rate": 0.002, "loss": 2.5656, "step": 108020 }, { "epoch": 0.2152197819711845, "grad_norm": 0.15329213440418243, "learning_rate": 0.002, "loss": 2.5613, "step": 108030 }, { "epoch": 0.2152397041948234, "grad_norm": 0.19182829558849335, "learning_rate": 0.002, "loss": 2.5734, "step": 108040 }, { "epoch": 0.21525962641846233, "grad_norm": 0.15746022760868073, "learning_rate": 0.002, "loss": 2.5555, "step": 108050 }, { "epoch": 0.21527954864210125, "grad_norm": 0.1468856781721115, "learning_rate": 0.002, "loss": 2.5823, "step": 108060 }, { "epoch": 0.21529947086574014, "grad_norm": 0.17095397412776947, "learning_rate": 0.002, "loss": 2.5597, "step": 108070 }, { "epoch": 0.21531939308937906, "grad_norm": 0.15145790576934814, "learning_rate": 0.002, "loss": 2.5643, "step": 108080 }, { "epoch": 0.21533931531301798, "grad_norm": 0.18226590752601624, "learning_rate": 0.002, "loss": 2.5559, "step": 108090 }, { "epoch": 0.2153592375366569, "grad_norm": 0.1632550209760666, "learning_rate": 0.002, "loss": 2.5704, "step": 108100 }, { "epoch": 0.2153791597602958, "grad_norm": 0.2046424001455307, "learning_rate": 0.002, "loss": 2.5778, "step": 108110 }, { "epoch": 0.21539908198393473, "grad_norm": 0.1568140685558319, "learning_rate": 0.002, "loss": 2.5575, "step": 108120 }, { "epoch": 0.21541900420757362, "grad_norm": 0.17047268152236938, "learning_rate": 0.002, "loss": 2.5646, "step": 108130 }, { "epoch": 0.21543892643121254, "grad_norm": 0.20472921431064606, "learning_rate": 0.002, "loss": 2.5745, "step": 108140 }, { "epoch": 0.21545884865485146, "grad_norm": 0.16504846513271332, "learning_rate": 0.002, "loss": 2.5571, "step": 108150 }, { "epoch": 0.21547877087849038, "grad_norm": 0.17433835566043854, "learning_rate": 0.002, "loss": 2.5474, "step": 108160 }, { "epoch": 0.2154986931021293, "grad_norm": 0.18202468752861023, "learning_rate": 0.002, "loss": 2.5612, "step": 108170 }, { "epoch": 0.21551861532576821, "grad_norm": 0.17352251708507538, "learning_rate": 0.002, "loss": 2.5626, "step": 108180 }, { "epoch": 0.2155385375494071, "grad_norm": 0.14071594178676605, "learning_rate": 0.002, "loss": 2.5755, "step": 108190 }, { "epoch": 0.21555845977304602, "grad_norm": 0.16376890242099762, "learning_rate": 0.002, "loss": 2.5592, "step": 108200 }, { "epoch": 0.21557838199668494, "grad_norm": 0.15386565029621124, "learning_rate": 0.002, "loss": 2.5603, "step": 108210 }, { "epoch": 0.21559830422032386, "grad_norm": 0.15654826164245605, "learning_rate": 0.002, "loss": 2.5836, "step": 108220 }, { "epoch": 0.21561822644396278, "grad_norm": 0.1549147516489029, "learning_rate": 0.002, "loss": 2.5782, "step": 108230 }, { "epoch": 0.21563814866760167, "grad_norm": 0.18606923520565033, "learning_rate": 0.002, "loss": 2.57, "step": 108240 }, { "epoch": 0.2156580708912406, "grad_norm": 0.15233340859413147, "learning_rate": 0.002, "loss": 2.5734, "step": 108250 }, { "epoch": 0.2156779931148795, "grad_norm": 0.16585911810398102, "learning_rate": 0.002, "loss": 2.5733, "step": 108260 }, { "epoch": 0.21569791533851843, "grad_norm": 0.15961188077926636, "learning_rate": 0.002, "loss": 2.5633, "step": 108270 }, { "epoch": 0.21571783756215734, "grad_norm": 0.1871635615825653, "learning_rate": 0.002, "loss": 2.5642, "step": 108280 }, { "epoch": 0.21573775978579626, "grad_norm": 0.1482921540737152, "learning_rate": 0.002, "loss": 2.5681, "step": 108290 }, { "epoch": 0.21575768200943515, "grad_norm": 0.1577243208885193, "learning_rate": 0.002, "loss": 2.5698, "step": 108300 }, { "epoch": 0.21577760423307407, "grad_norm": 0.18950790166854858, "learning_rate": 0.002, "loss": 2.5697, "step": 108310 }, { "epoch": 0.215797526456713, "grad_norm": 0.1293003112077713, "learning_rate": 0.002, "loss": 2.566, "step": 108320 }, { "epoch": 0.2158174486803519, "grad_norm": 0.16679823398590088, "learning_rate": 0.002, "loss": 2.5742, "step": 108330 }, { "epoch": 0.21583737090399083, "grad_norm": 0.18579253554344177, "learning_rate": 0.002, "loss": 2.5825, "step": 108340 }, { "epoch": 0.21585729312762975, "grad_norm": 0.1649124026298523, "learning_rate": 0.002, "loss": 2.5831, "step": 108350 }, { "epoch": 0.21587721535126864, "grad_norm": 0.15026365220546722, "learning_rate": 0.002, "loss": 2.5729, "step": 108360 }, { "epoch": 0.21589713757490755, "grad_norm": 0.16529406607151031, "learning_rate": 0.002, "loss": 2.5773, "step": 108370 }, { "epoch": 0.21591705979854647, "grad_norm": 0.1414581537246704, "learning_rate": 0.002, "loss": 2.5595, "step": 108380 }, { "epoch": 0.2159369820221854, "grad_norm": 0.17809955775737762, "learning_rate": 0.002, "loss": 2.5611, "step": 108390 }, { "epoch": 0.2159569042458243, "grad_norm": 0.16815757751464844, "learning_rate": 0.002, "loss": 2.5872, "step": 108400 }, { "epoch": 0.21597682646946323, "grad_norm": 0.13704964518547058, "learning_rate": 0.002, "loss": 2.5739, "step": 108410 }, { "epoch": 0.21599674869310212, "grad_norm": 0.16724592447280884, "learning_rate": 0.002, "loss": 2.5608, "step": 108420 }, { "epoch": 0.21601667091674104, "grad_norm": 0.1968352049589157, "learning_rate": 0.002, "loss": 2.554, "step": 108430 }, { "epoch": 0.21603659314037996, "grad_norm": 0.20868581533432007, "learning_rate": 0.002, "loss": 2.5512, "step": 108440 }, { "epoch": 0.21605651536401888, "grad_norm": 0.164603129029274, "learning_rate": 0.002, "loss": 2.5811, "step": 108450 }, { "epoch": 0.2160764375876578, "grad_norm": 0.3037519156932831, "learning_rate": 0.002, "loss": 2.5568, "step": 108460 }, { "epoch": 0.21609635981129668, "grad_norm": 0.14700978994369507, "learning_rate": 0.002, "loss": 2.5621, "step": 108470 }, { "epoch": 0.2161162820349356, "grad_norm": 0.17470552027225494, "learning_rate": 0.002, "loss": 2.5679, "step": 108480 }, { "epoch": 0.21613620425857452, "grad_norm": 0.14485089480876923, "learning_rate": 0.002, "loss": 2.5554, "step": 108490 }, { "epoch": 0.21615612648221344, "grad_norm": 0.18401473760604858, "learning_rate": 0.002, "loss": 2.5645, "step": 108500 }, { "epoch": 0.21617604870585236, "grad_norm": 0.1528838723897934, "learning_rate": 0.002, "loss": 2.5608, "step": 108510 }, { "epoch": 0.21619597092949128, "grad_norm": 0.14262409508228302, "learning_rate": 0.002, "loss": 2.5687, "step": 108520 }, { "epoch": 0.21621589315313017, "grad_norm": 0.1521330177783966, "learning_rate": 0.002, "loss": 2.5826, "step": 108530 }, { "epoch": 0.21623581537676909, "grad_norm": 0.18854327499866486, "learning_rate": 0.002, "loss": 2.5524, "step": 108540 }, { "epoch": 0.216255737600408, "grad_norm": 0.16133293509483337, "learning_rate": 0.002, "loss": 2.5506, "step": 108550 }, { "epoch": 0.21627565982404692, "grad_norm": 0.1636057198047638, "learning_rate": 0.002, "loss": 2.5717, "step": 108560 }, { "epoch": 0.21629558204768584, "grad_norm": 0.15250742435455322, "learning_rate": 0.002, "loss": 2.5691, "step": 108570 }, { "epoch": 0.21631550427132476, "grad_norm": 0.17970705032348633, "learning_rate": 0.002, "loss": 2.5667, "step": 108580 }, { "epoch": 0.21633542649496365, "grad_norm": 0.18140296638011932, "learning_rate": 0.002, "loss": 2.5604, "step": 108590 }, { "epoch": 0.21635534871860257, "grad_norm": 0.14932137727737427, "learning_rate": 0.002, "loss": 2.5619, "step": 108600 }, { "epoch": 0.2163752709422415, "grad_norm": 0.16262635588645935, "learning_rate": 0.002, "loss": 2.5798, "step": 108610 }, { "epoch": 0.2163951931658804, "grad_norm": 0.1683257818222046, "learning_rate": 0.002, "loss": 2.5688, "step": 108620 }, { "epoch": 0.21641511538951932, "grad_norm": 0.15839742124080658, "learning_rate": 0.002, "loss": 2.5612, "step": 108630 }, { "epoch": 0.21643503761315824, "grad_norm": 0.16661694645881653, "learning_rate": 0.002, "loss": 2.5644, "step": 108640 }, { "epoch": 0.21645495983679713, "grad_norm": 0.1837698370218277, "learning_rate": 0.002, "loss": 2.5662, "step": 108650 }, { "epoch": 0.21647488206043605, "grad_norm": 0.15209268033504486, "learning_rate": 0.002, "loss": 2.5778, "step": 108660 }, { "epoch": 0.21649480428407497, "grad_norm": 0.15522898733615875, "learning_rate": 0.002, "loss": 2.5611, "step": 108670 }, { "epoch": 0.2165147265077139, "grad_norm": 0.15692584216594696, "learning_rate": 0.002, "loss": 2.5739, "step": 108680 }, { "epoch": 0.2165346487313528, "grad_norm": 0.17451485991477966, "learning_rate": 0.002, "loss": 2.5757, "step": 108690 }, { "epoch": 0.2165545709549917, "grad_norm": 0.15997442603111267, "learning_rate": 0.002, "loss": 2.5686, "step": 108700 }, { "epoch": 0.21657449317863062, "grad_norm": 0.18740855157375336, "learning_rate": 0.002, "loss": 2.5606, "step": 108710 }, { "epoch": 0.21659441540226954, "grad_norm": 0.1577666997909546, "learning_rate": 0.002, "loss": 2.5618, "step": 108720 }, { "epoch": 0.21661433762590845, "grad_norm": 0.16169746220111847, "learning_rate": 0.002, "loss": 2.5708, "step": 108730 }, { "epoch": 0.21663425984954737, "grad_norm": 0.14945456385612488, "learning_rate": 0.002, "loss": 2.5579, "step": 108740 }, { "epoch": 0.2166541820731863, "grad_norm": 0.16520807147026062, "learning_rate": 0.002, "loss": 2.5711, "step": 108750 }, { "epoch": 0.21667410429682518, "grad_norm": 0.1588623970746994, "learning_rate": 0.002, "loss": 2.5719, "step": 108760 }, { "epoch": 0.2166940265204641, "grad_norm": 0.14494666457176208, "learning_rate": 0.002, "loss": 2.5558, "step": 108770 }, { "epoch": 0.21671394874410302, "grad_norm": 0.1643371880054474, "learning_rate": 0.002, "loss": 2.5754, "step": 108780 }, { "epoch": 0.21673387096774194, "grad_norm": 0.1469433456659317, "learning_rate": 0.002, "loss": 2.5671, "step": 108790 }, { "epoch": 0.21675379319138086, "grad_norm": 0.17891696095466614, "learning_rate": 0.002, "loss": 2.5634, "step": 108800 }, { "epoch": 0.21677371541501977, "grad_norm": 0.21482045948505402, "learning_rate": 0.002, "loss": 2.5692, "step": 108810 }, { "epoch": 0.21679363763865866, "grad_norm": 0.17898792028427124, "learning_rate": 0.002, "loss": 2.552, "step": 108820 }, { "epoch": 0.21681355986229758, "grad_norm": 0.14228913187980652, "learning_rate": 0.002, "loss": 2.5448, "step": 108830 }, { "epoch": 0.2168334820859365, "grad_norm": 0.16119058430194855, "learning_rate": 0.002, "loss": 2.5509, "step": 108840 }, { "epoch": 0.21685340430957542, "grad_norm": 0.17742951214313507, "learning_rate": 0.002, "loss": 2.5599, "step": 108850 }, { "epoch": 0.21687332653321434, "grad_norm": 0.1774977445602417, "learning_rate": 0.002, "loss": 2.5674, "step": 108860 }, { "epoch": 0.21689324875685326, "grad_norm": 0.17729103565216064, "learning_rate": 0.002, "loss": 2.5656, "step": 108870 }, { "epoch": 0.21691317098049215, "grad_norm": 0.14466378092765808, "learning_rate": 0.002, "loss": 2.5578, "step": 108880 }, { "epoch": 0.21693309320413107, "grad_norm": 0.15836015343666077, "learning_rate": 0.002, "loss": 2.5767, "step": 108890 }, { "epoch": 0.21695301542776999, "grad_norm": 0.16463913023471832, "learning_rate": 0.002, "loss": 2.5773, "step": 108900 }, { "epoch": 0.2169729376514089, "grad_norm": 0.1593909114599228, "learning_rate": 0.002, "loss": 2.5661, "step": 108910 }, { "epoch": 0.21699285987504782, "grad_norm": 0.2250717729330063, "learning_rate": 0.002, "loss": 2.5715, "step": 108920 }, { "epoch": 0.21701278209868674, "grad_norm": 0.1836932748556137, "learning_rate": 0.002, "loss": 2.5604, "step": 108930 }, { "epoch": 0.21703270432232563, "grad_norm": 0.17385101318359375, "learning_rate": 0.002, "loss": 2.574, "step": 108940 }, { "epoch": 0.21705262654596455, "grad_norm": 0.16157811880111694, "learning_rate": 0.002, "loss": 2.5808, "step": 108950 }, { "epoch": 0.21707254876960347, "grad_norm": 0.17035438120365143, "learning_rate": 0.002, "loss": 2.5754, "step": 108960 }, { "epoch": 0.2170924709932424, "grad_norm": 0.14764289557933807, "learning_rate": 0.002, "loss": 2.5635, "step": 108970 }, { "epoch": 0.2171123932168813, "grad_norm": 0.157292902469635, "learning_rate": 0.002, "loss": 2.5904, "step": 108980 }, { "epoch": 0.2171323154405202, "grad_norm": 0.1951279491186142, "learning_rate": 0.002, "loss": 2.5722, "step": 108990 }, { "epoch": 0.21715223766415911, "grad_norm": 0.15211021900177002, "learning_rate": 0.002, "loss": 2.5554, "step": 109000 }, { "epoch": 0.21717215988779803, "grad_norm": 0.18200553953647614, "learning_rate": 0.002, "loss": 2.5842, "step": 109010 }, { "epoch": 0.21719208211143695, "grad_norm": 0.18790210783481598, "learning_rate": 0.002, "loss": 2.5675, "step": 109020 }, { "epoch": 0.21721200433507587, "grad_norm": 0.16078171133995056, "learning_rate": 0.002, "loss": 2.5644, "step": 109030 }, { "epoch": 0.2172319265587148, "grad_norm": 0.15811946988105774, "learning_rate": 0.002, "loss": 2.5678, "step": 109040 }, { "epoch": 0.21725184878235368, "grad_norm": 0.17366445064544678, "learning_rate": 0.002, "loss": 2.5656, "step": 109050 }, { "epoch": 0.2172717710059926, "grad_norm": 0.1516503542661667, "learning_rate": 0.002, "loss": 2.5649, "step": 109060 }, { "epoch": 0.21729169322963152, "grad_norm": 0.15627720952033997, "learning_rate": 0.002, "loss": 2.5811, "step": 109070 }, { "epoch": 0.21731161545327043, "grad_norm": 0.15759484469890594, "learning_rate": 0.002, "loss": 2.5695, "step": 109080 }, { "epoch": 0.21733153767690935, "grad_norm": 0.1515544056892395, "learning_rate": 0.002, "loss": 2.5524, "step": 109090 }, { "epoch": 0.21735145990054827, "grad_norm": 0.18253982067108154, "learning_rate": 0.002, "loss": 2.5598, "step": 109100 }, { "epoch": 0.21737138212418716, "grad_norm": 0.1520545482635498, "learning_rate": 0.002, "loss": 2.5666, "step": 109110 }, { "epoch": 0.21739130434782608, "grad_norm": 0.16064053773880005, "learning_rate": 0.002, "loss": 2.5738, "step": 109120 }, { "epoch": 0.217411226571465, "grad_norm": 0.22279983758926392, "learning_rate": 0.002, "loss": 2.5867, "step": 109130 }, { "epoch": 0.21743114879510392, "grad_norm": 0.17566485702991486, "learning_rate": 0.002, "loss": 2.5711, "step": 109140 }, { "epoch": 0.21745107101874284, "grad_norm": 0.1733556091785431, "learning_rate": 0.002, "loss": 2.5732, "step": 109150 }, { "epoch": 0.21747099324238175, "grad_norm": 0.16651593148708344, "learning_rate": 0.002, "loss": 2.5702, "step": 109160 }, { "epoch": 0.21749091546602065, "grad_norm": 0.1444730907678604, "learning_rate": 0.002, "loss": 2.5712, "step": 109170 }, { "epoch": 0.21751083768965956, "grad_norm": 0.15900883078575134, "learning_rate": 0.002, "loss": 2.5655, "step": 109180 }, { "epoch": 0.21753075991329848, "grad_norm": 0.2051251381635666, "learning_rate": 0.002, "loss": 2.5642, "step": 109190 }, { "epoch": 0.2175506821369374, "grad_norm": 0.13719268143177032, "learning_rate": 0.002, "loss": 2.5806, "step": 109200 }, { "epoch": 0.21757060436057632, "grad_norm": 0.1644635647535324, "learning_rate": 0.002, "loss": 2.5617, "step": 109210 }, { "epoch": 0.2175905265842152, "grad_norm": 0.16961625218391418, "learning_rate": 0.002, "loss": 2.57, "step": 109220 }, { "epoch": 0.21761044880785413, "grad_norm": 0.17060619592666626, "learning_rate": 0.002, "loss": 2.5646, "step": 109230 }, { "epoch": 0.21763037103149305, "grad_norm": 0.144463449716568, "learning_rate": 0.002, "loss": 2.573, "step": 109240 }, { "epoch": 0.21765029325513197, "grad_norm": 0.1551365703344345, "learning_rate": 0.002, "loss": 2.5635, "step": 109250 }, { "epoch": 0.21767021547877088, "grad_norm": 0.19155707955360413, "learning_rate": 0.002, "loss": 2.5546, "step": 109260 }, { "epoch": 0.2176901377024098, "grad_norm": 0.1677946150302887, "learning_rate": 0.002, "loss": 2.5679, "step": 109270 }, { "epoch": 0.2177100599260487, "grad_norm": 0.13879285752773285, "learning_rate": 0.002, "loss": 2.5597, "step": 109280 }, { "epoch": 0.2177299821496876, "grad_norm": 0.17075331509113312, "learning_rate": 0.002, "loss": 2.5661, "step": 109290 }, { "epoch": 0.21774990437332653, "grad_norm": 0.17005005478858948, "learning_rate": 0.002, "loss": 2.5526, "step": 109300 }, { "epoch": 0.21776982659696545, "grad_norm": 0.19050072133541107, "learning_rate": 0.002, "loss": 2.5596, "step": 109310 }, { "epoch": 0.21778974882060437, "grad_norm": 0.172128364443779, "learning_rate": 0.002, "loss": 2.5617, "step": 109320 }, { "epoch": 0.21780967104424329, "grad_norm": 0.18406365811824799, "learning_rate": 0.002, "loss": 2.562, "step": 109330 }, { "epoch": 0.21782959326788218, "grad_norm": 0.15535031259059906, "learning_rate": 0.002, "loss": 2.5642, "step": 109340 }, { "epoch": 0.2178495154915211, "grad_norm": 0.1846771538257599, "learning_rate": 0.002, "loss": 2.5676, "step": 109350 }, { "epoch": 0.21786943771516, "grad_norm": 0.17688487470149994, "learning_rate": 0.002, "loss": 2.5785, "step": 109360 }, { "epoch": 0.21788935993879893, "grad_norm": 0.15575698018074036, "learning_rate": 0.002, "loss": 2.5705, "step": 109370 }, { "epoch": 0.21790928216243785, "grad_norm": 0.19168336689472198, "learning_rate": 0.002, "loss": 2.5661, "step": 109380 }, { "epoch": 0.21792920438607677, "grad_norm": 0.14933906495571136, "learning_rate": 0.002, "loss": 2.5586, "step": 109390 }, { "epoch": 0.21794912660971566, "grad_norm": 0.1982831358909607, "learning_rate": 0.002, "loss": 2.5616, "step": 109400 }, { "epoch": 0.21796904883335458, "grad_norm": 0.13649891316890717, "learning_rate": 0.002, "loss": 2.5737, "step": 109410 }, { "epoch": 0.2179889710569935, "grad_norm": 0.16747525334358215, "learning_rate": 0.002, "loss": 2.5729, "step": 109420 }, { "epoch": 0.21800889328063242, "grad_norm": 0.1706613302230835, "learning_rate": 0.002, "loss": 2.5796, "step": 109430 }, { "epoch": 0.21802881550427133, "grad_norm": 0.164070725440979, "learning_rate": 0.002, "loss": 2.5679, "step": 109440 }, { "epoch": 0.21804873772791022, "grad_norm": 0.15234589576721191, "learning_rate": 0.002, "loss": 2.5627, "step": 109450 }, { "epoch": 0.21806865995154914, "grad_norm": 0.1509542614221573, "learning_rate": 0.002, "loss": 2.5676, "step": 109460 }, { "epoch": 0.21808858217518806, "grad_norm": 0.17306573688983917, "learning_rate": 0.002, "loss": 2.5612, "step": 109470 }, { "epoch": 0.21810850439882698, "grad_norm": 0.15900537371635437, "learning_rate": 0.002, "loss": 2.5753, "step": 109480 }, { "epoch": 0.2181284266224659, "grad_norm": 0.20690366625785828, "learning_rate": 0.002, "loss": 2.5635, "step": 109490 }, { "epoch": 0.21814834884610482, "grad_norm": 0.1678663194179535, "learning_rate": 0.002, "loss": 2.5602, "step": 109500 }, { "epoch": 0.2181682710697437, "grad_norm": 0.14630016684532166, "learning_rate": 0.002, "loss": 2.576, "step": 109510 }, { "epoch": 0.21818819329338263, "grad_norm": 0.18241524696350098, "learning_rate": 0.002, "loss": 2.5751, "step": 109520 }, { "epoch": 0.21820811551702154, "grad_norm": 0.18619969487190247, "learning_rate": 0.002, "loss": 2.5609, "step": 109530 }, { "epoch": 0.21822803774066046, "grad_norm": 0.171891450881958, "learning_rate": 0.002, "loss": 2.5597, "step": 109540 }, { "epoch": 0.21824795996429938, "grad_norm": 0.19104567170143127, "learning_rate": 0.002, "loss": 2.5716, "step": 109550 }, { "epoch": 0.2182678821879383, "grad_norm": 0.16562201082706451, "learning_rate": 0.002, "loss": 2.5682, "step": 109560 }, { "epoch": 0.2182878044115772, "grad_norm": 0.151076078414917, "learning_rate": 0.002, "loss": 2.5607, "step": 109570 }, { "epoch": 0.2183077266352161, "grad_norm": 0.16694071888923645, "learning_rate": 0.002, "loss": 2.5828, "step": 109580 }, { "epoch": 0.21832764885885503, "grad_norm": 0.19741351902484894, "learning_rate": 0.002, "loss": 2.5594, "step": 109590 }, { "epoch": 0.21834757108249395, "grad_norm": 0.19112570583820343, "learning_rate": 0.002, "loss": 2.5626, "step": 109600 }, { "epoch": 0.21836749330613286, "grad_norm": 0.15771669149398804, "learning_rate": 0.002, "loss": 2.5666, "step": 109610 }, { "epoch": 0.21838741552977178, "grad_norm": 0.14287780225276947, "learning_rate": 0.002, "loss": 2.5637, "step": 109620 }, { "epoch": 0.21840733775341067, "grad_norm": 0.16707243025302887, "learning_rate": 0.002, "loss": 2.5653, "step": 109630 }, { "epoch": 0.2184272599770496, "grad_norm": 0.157909095287323, "learning_rate": 0.002, "loss": 2.5671, "step": 109640 }, { "epoch": 0.2184471822006885, "grad_norm": 0.15686941146850586, "learning_rate": 0.002, "loss": 2.5556, "step": 109650 }, { "epoch": 0.21846710442432743, "grad_norm": 0.16113540530204773, "learning_rate": 0.002, "loss": 2.5645, "step": 109660 }, { "epoch": 0.21848702664796635, "grad_norm": 0.2048395574092865, "learning_rate": 0.002, "loss": 2.559, "step": 109670 }, { "epoch": 0.21850694887160527, "grad_norm": 0.17232175171375275, "learning_rate": 0.002, "loss": 2.5636, "step": 109680 }, { "epoch": 0.21852687109524416, "grad_norm": 0.19698357582092285, "learning_rate": 0.002, "loss": 2.577, "step": 109690 }, { "epoch": 0.21854679331888308, "grad_norm": 0.1481245458126068, "learning_rate": 0.002, "loss": 2.5867, "step": 109700 }, { "epoch": 0.218566715542522, "grad_norm": 0.14906518161296844, "learning_rate": 0.002, "loss": 2.5632, "step": 109710 }, { "epoch": 0.2185866377661609, "grad_norm": 0.15522992610931396, "learning_rate": 0.002, "loss": 2.5672, "step": 109720 }, { "epoch": 0.21860655998979983, "grad_norm": 0.17819668352603912, "learning_rate": 0.002, "loss": 2.5589, "step": 109730 }, { "epoch": 0.21862648221343872, "grad_norm": 0.1567392200231552, "learning_rate": 0.002, "loss": 2.5697, "step": 109740 }, { "epoch": 0.21864640443707764, "grad_norm": 0.16850122809410095, "learning_rate": 0.002, "loss": 2.5642, "step": 109750 }, { "epoch": 0.21866632666071656, "grad_norm": 0.1620212197303772, "learning_rate": 0.002, "loss": 2.5605, "step": 109760 }, { "epoch": 0.21868624888435548, "grad_norm": 0.1653996855020523, "learning_rate": 0.002, "loss": 2.5621, "step": 109770 }, { "epoch": 0.2187061711079944, "grad_norm": 0.18961390852928162, "learning_rate": 0.002, "loss": 2.5736, "step": 109780 }, { "epoch": 0.21872609333163331, "grad_norm": 0.14723899960517883, "learning_rate": 0.002, "loss": 2.5747, "step": 109790 }, { "epoch": 0.2187460155552722, "grad_norm": 0.17750975489616394, "learning_rate": 0.002, "loss": 2.5642, "step": 109800 }, { "epoch": 0.21876593777891112, "grad_norm": 0.16462492942810059, "learning_rate": 0.002, "loss": 2.5747, "step": 109810 }, { "epoch": 0.21878586000255004, "grad_norm": 0.17187978327274323, "learning_rate": 0.002, "loss": 2.574, "step": 109820 }, { "epoch": 0.21880578222618896, "grad_norm": 0.14530763030052185, "learning_rate": 0.002, "loss": 2.5777, "step": 109830 }, { "epoch": 0.21882570444982788, "grad_norm": 0.16535808145999908, "learning_rate": 0.002, "loss": 2.5702, "step": 109840 }, { "epoch": 0.2188456266734668, "grad_norm": 0.1653662621974945, "learning_rate": 0.002, "loss": 2.5708, "step": 109850 }, { "epoch": 0.2188655488971057, "grad_norm": 0.1656845211982727, "learning_rate": 0.002, "loss": 2.573, "step": 109860 }, { "epoch": 0.2188854711207446, "grad_norm": 0.1825815737247467, "learning_rate": 0.002, "loss": 2.5491, "step": 109870 }, { "epoch": 0.21890539334438353, "grad_norm": 0.17527014017105103, "learning_rate": 0.002, "loss": 2.5559, "step": 109880 }, { "epoch": 0.21892531556802244, "grad_norm": 0.1368963122367859, "learning_rate": 0.002, "loss": 2.5685, "step": 109890 }, { "epoch": 0.21894523779166136, "grad_norm": 0.16900594532489777, "learning_rate": 0.002, "loss": 2.5603, "step": 109900 }, { "epoch": 0.21896516001530028, "grad_norm": 0.1436736285686493, "learning_rate": 0.002, "loss": 2.5646, "step": 109910 }, { "epoch": 0.21898508223893917, "grad_norm": 0.17321909964084625, "learning_rate": 0.002, "loss": 2.5709, "step": 109920 }, { "epoch": 0.2190050044625781, "grad_norm": 0.16572678089141846, "learning_rate": 0.002, "loss": 2.5607, "step": 109930 }, { "epoch": 0.219024926686217, "grad_norm": 0.16220958530902863, "learning_rate": 0.002, "loss": 2.5651, "step": 109940 }, { "epoch": 0.21904484890985593, "grad_norm": 0.18057407438755035, "learning_rate": 0.002, "loss": 2.5672, "step": 109950 }, { "epoch": 0.21906477113349485, "grad_norm": 0.18747974932193756, "learning_rate": 0.002, "loss": 2.5538, "step": 109960 }, { "epoch": 0.21908469335713374, "grad_norm": 0.2095540165901184, "learning_rate": 0.002, "loss": 2.5638, "step": 109970 }, { "epoch": 0.21910461558077265, "grad_norm": 0.17073597013950348, "learning_rate": 0.002, "loss": 2.5439, "step": 109980 }, { "epoch": 0.21912453780441157, "grad_norm": 0.16705414652824402, "learning_rate": 0.002, "loss": 2.5718, "step": 109990 }, { "epoch": 0.2191444600280505, "grad_norm": 0.16135317087173462, "learning_rate": 0.002, "loss": 2.5613, "step": 110000 }, { "epoch": 0.2191643822516894, "grad_norm": 0.19044718146324158, "learning_rate": 0.002, "loss": 2.5558, "step": 110010 }, { "epoch": 0.21918430447532833, "grad_norm": 0.18413196504116058, "learning_rate": 0.002, "loss": 2.5646, "step": 110020 }, { "epoch": 0.21920422669896722, "grad_norm": 0.16643334925174713, "learning_rate": 0.002, "loss": 2.5729, "step": 110030 }, { "epoch": 0.21922414892260614, "grad_norm": 0.15621830523014069, "learning_rate": 0.002, "loss": 2.5662, "step": 110040 }, { "epoch": 0.21924407114624506, "grad_norm": 0.1768646240234375, "learning_rate": 0.002, "loss": 2.5572, "step": 110050 }, { "epoch": 0.21926399336988397, "grad_norm": 0.17714709043502808, "learning_rate": 0.002, "loss": 2.5541, "step": 110060 }, { "epoch": 0.2192839155935229, "grad_norm": 0.16810666024684906, "learning_rate": 0.002, "loss": 2.5742, "step": 110070 }, { "epoch": 0.2193038378171618, "grad_norm": 0.15530219674110413, "learning_rate": 0.002, "loss": 2.557, "step": 110080 }, { "epoch": 0.2193237600408007, "grad_norm": 0.13524703681468964, "learning_rate": 0.002, "loss": 2.5603, "step": 110090 }, { "epoch": 0.21934368226443962, "grad_norm": 0.19319750368595123, "learning_rate": 0.002, "loss": 2.5706, "step": 110100 }, { "epoch": 0.21936360448807854, "grad_norm": 0.15028563141822815, "learning_rate": 0.002, "loss": 2.5637, "step": 110110 }, { "epoch": 0.21938352671171746, "grad_norm": 0.23044666647911072, "learning_rate": 0.002, "loss": 2.5751, "step": 110120 }, { "epoch": 0.21940344893535638, "grad_norm": 0.16353702545166016, "learning_rate": 0.002, "loss": 2.5725, "step": 110130 }, { "epoch": 0.2194233711589953, "grad_norm": 0.16907468438148499, "learning_rate": 0.002, "loss": 2.5615, "step": 110140 }, { "epoch": 0.21944329338263419, "grad_norm": 0.1327180117368698, "learning_rate": 0.002, "loss": 2.5683, "step": 110150 }, { "epoch": 0.2194632156062731, "grad_norm": 0.19413137435913086, "learning_rate": 0.002, "loss": 2.5682, "step": 110160 }, { "epoch": 0.21948313782991202, "grad_norm": 0.1830797642469406, "learning_rate": 0.002, "loss": 2.5719, "step": 110170 }, { "epoch": 0.21950306005355094, "grad_norm": 0.15359129011631012, "learning_rate": 0.002, "loss": 2.557, "step": 110180 }, { "epoch": 0.21952298227718986, "grad_norm": 0.1774529367685318, "learning_rate": 0.002, "loss": 2.55, "step": 110190 }, { "epoch": 0.21954290450082878, "grad_norm": 0.1798110008239746, "learning_rate": 0.002, "loss": 2.563, "step": 110200 }, { "epoch": 0.21956282672446767, "grad_norm": 0.17858974635601044, "learning_rate": 0.002, "loss": 2.5712, "step": 110210 }, { "epoch": 0.2195827489481066, "grad_norm": 0.1493287831544876, "learning_rate": 0.002, "loss": 2.5579, "step": 110220 }, { "epoch": 0.2196026711717455, "grad_norm": 0.17649225890636444, "learning_rate": 0.002, "loss": 2.5646, "step": 110230 }, { "epoch": 0.21962259339538442, "grad_norm": 0.17277173697948456, "learning_rate": 0.002, "loss": 2.5763, "step": 110240 }, { "epoch": 0.21964251561902334, "grad_norm": 0.15146635472774506, "learning_rate": 0.002, "loss": 2.5727, "step": 110250 }, { "epoch": 0.21966243784266223, "grad_norm": 0.1604073941707611, "learning_rate": 0.002, "loss": 2.5778, "step": 110260 }, { "epoch": 0.21968236006630115, "grad_norm": 0.1781945377588272, "learning_rate": 0.002, "loss": 2.563, "step": 110270 }, { "epoch": 0.21970228228994007, "grad_norm": 0.14234283566474915, "learning_rate": 0.002, "loss": 2.5708, "step": 110280 }, { "epoch": 0.219722204513579, "grad_norm": 0.18718688189983368, "learning_rate": 0.002, "loss": 2.5738, "step": 110290 }, { "epoch": 0.2197421267372179, "grad_norm": 0.1654738336801529, "learning_rate": 0.002, "loss": 2.5706, "step": 110300 }, { "epoch": 0.21976204896085683, "grad_norm": 0.21280212700366974, "learning_rate": 0.002, "loss": 2.5444, "step": 110310 }, { "epoch": 0.21978197118449572, "grad_norm": 0.16810843348503113, "learning_rate": 0.002, "loss": 2.5591, "step": 110320 }, { "epoch": 0.21980189340813464, "grad_norm": 0.1482694447040558, "learning_rate": 0.002, "loss": 2.569, "step": 110330 }, { "epoch": 0.21982181563177355, "grad_norm": 0.18940982222557068, "learning_rate": 0.002, "loss": 2.5729, "step": 110340 }, { "epoch": 0.21984173785541247, "grad_norm": 0.18418504297733307, "learning_rate": 0.002, "loss": 2.5588, "step": 110350 }, { "epoch": 0.2198616600790514, "grad_norm": 0.17635412514209747, "learning_rate": 0.002, "loss": 2.5595, "step": 110360 }, { "epoch": 0.2198815823026903, "grad_norm": 0.18070077896118164, "learning_rate": 0.002, "loss": 2.5665, "step": 110370 }, { "epoch": 0.2199015045263292, "grad_norm": 0.18841715157032013, "learning_rate": 0.002, "loss": 2.5569, "step": 110380 }, { "epoch": 0.21992142674996812, "grad_norm": 0.15118719637393951, "learning_rate": 0.002, "loss": 2.5713, "step": 110390 }, { "epoch": 0.21994134897360704, "grad_norm": 0.15013502538204193, "learning_rate": 0.002, "loss": 2.5524, "step": 110400 }, { "epoch": 0.21996127119724596, "grad_norm": 0.21010009944438934, "learning_rate": 0.002, "loss": 2.561, "step": 110410 }, { "epoch": 0.21998119342088487, "grad_norm": 0.17249953746795654, "learning_rate": 0.002, "loss": 2.5761, "step": 110420 }, { "epoch": 0.2200011156445238, "grad_norm": 0.1466125100851059, "learning_rate": 0.002, "loss": 2.5748, "step": 110430 }, { "epoch": 0.22002103786816268, "grad_norm": 0.17100507020950317, "learning_rate": 0.002, "loss": 2.5679, "step": 110440 }, { "epoch": 0.2200409600918016, "grad_norm": 0.1506771743297577, "learning_rate": 0.002, "loss": 2.5655, "step": 110450 }, { "epoch": 0.22006088231544052, "grad_norm": 0.1676706224679947, "learning_rate": 0.002, "loss": 2.5688, "step": 110460 }, { "epoch": 0.22008080453907944, "grad_norm": 0.1912592351436615, "learning_rate": 0.002, "loss": 2.5792, "step": 110470 }, { "epoch": 0.22010072676271836, "grad_norm": 0.16526219248771667, "learning_rate": 0.002, "loss": 2.5616, "step": 110480 }, { "epoch": 0.22012064898635725, "grad_norm": 0.1831122636795044, "learning_rate": 0.002, "loss": 2.5548, "step": 110490 }, { "epoch": 0.22014057120999617, "grad_norm": 0.16447819769382477, "learning_rate": 0.002, "loss": 2.5652, "step": 110500 }, { "epoch": 0.22016049343363508, "grad_norm": 0.1809333711862564, "learning_rate": 0.002, "loss": 2.5711, "step": 110510 }, { "epoch": 0.220180415657274, "grad_norm": 0.15743222832679749, "learning_rate": 0.002, "loss": 2.5648, "step": 110520 }, { "epoch": 0.22020033788091292, "grad_norm": 0.21629159152507782, "learning_rate": 0.002, "loss": 2.5642, "step": 110530 }, { "epoch": 0.22022026010455184, "grad_norm": 0.18281246721744537, "learning_rate": 0.002, "loss": 2.5733, "step": 110540 }, { "epoch": 0.22024018232819073, "grad_norm": 0.17549863457679749, "learning_rate": 0.002, "loss": 2.5611, "step": 110550 }, { "epoch": 0.22026010455182965, "grad_norm": 0.16053617000579834, "learning_rate": 0.002, "loss": 2.5597, "step": 110560 }, { "epoch": 0.22028002677546857, "grad_norm": 0.18865837156772614, "learning_rate": 0.002, "loss": 2.5478, "step": 110570 }, { "epoch": 0.2202999489991075, "grad_norm": 0.19758883118629456, "learning_rate": 0.002, "loss": 2.5543, "step": 110580 }, { "epoch": 0.2203198712227464, "grad_norm": 0.1490599513053894, "learning_rate": 0.002, "loss": 2.5752, "step": 110590 }, { "epoch": 0.22033979344638532, "grad_norm": 0.16702218353748322, "learning_rate": 0.002, "loss": 2.5555, "step": 110600 }, { "epoch": 0.22035971567002421, "grad_norm": 0.17447814345359802, "learning_rate": 0.002, "loss": 2.5546, "step": 110610 }, { "epoch": 0.22037963789366313, "grad_norm": 0.16407674551010132, "learning_rate": 0.002, "loss": 2.5642, "step": 110620 }, { "epoch": 0.22039956011730205, "grad_norm": 0.15054851770401, "learning_rate": 0.002, "loss": 2.5479, "step": 110630 }, { "epoch": 0.22041948234094097, "grad_norm": 0.1665622442960739, "learning_rate": 0.002, "loss": 2.563, "step": 110640 }, { "epoch": 0.2204394045645799, "grad_norm": 0.13889987766742706, "learning_rate": 0.002, "loss": 2.5615, "step": 110650 }, { "epoch": 0.2204593267882188, "grad_norm": 0.16162176430225372, "learning_rate": 0.002, "loss": 2.558, "step": 110660 }, { "epoch": 0.2204792490118577, "grad_norm": 0.1522274613380432, "learning_rate": 0.002, "loss": 2.5679, "step": 110670 }, { "epoch": 0.22049917123549662, "grad_norm": 0.15569226443767548, "learning_rate": 0.002, "loss": 2.5718, "step": 110680 }, { "epoch": 0.22051909345913553, "grad_norm": 0.1599321812391281, "learning_rate": 0.002, "loss": 2.5727, "step": 110690 }, { "epoch": 0.22053901568277445, "grad_norm": 0.15732842683792114, "learning_rate": 0.002, "loss": 2.554, "step": 110700 }, { "epoch": 0.22055893790641337, "grad_norm": 0.18613766133785248, "learning_rate": 0.002, "loss": 2.5829, "step": 110710 }, { "epoch": 0.22057886013005226, "grad_norm": 0.1568477600812912, "learning_rate": 0.002, "loss": 2.5608, "step": 110720 }, { "epoch": 0.22059878235369118, "grad_norm": 0.17346541583538055, "learning_rate": 0.002, "loss": 2.587, "step": 110730 }, { "epoch": 0.2206187045773301, "grad_norm": 0.1618669629096985, "learning_rate": 0.002, "loss": 2.5521, "step": 110740 }, { "epoch": 0.22063862680096902, "grad_norm": 0.16876645386219025, "learning_rate": 0.002, "loss": 2.5558, "step": 110750 }, { "epoch": 0.22065854902460794, "grad_norm": 0.19533057510852814, "learning_rate": 0.002, "loss": 2.5643, "step": 110760 }, { "epoch": 0.22067847124824685, "grad_norm": 0.15509141981601715, "learning_rate": 0.002, "loss": 2.5609, "step": 110770 }, { "epoch": 0.22069839347188575, "grad_norm": 0.1432565599679947, "learning_rate": 0.002, "loss": 2.5638, "step": 110780 }, { "epoch": 0.22071831569552466, "grad_norm": 0.15072835981845856, "learning_rate": 0.002, "loss": 2.5561, "step": 110790 }, { "epoch": 0.22073823791916358, "grad_norm": 0.1823243796825409, "learning_rate": 0.002, "loss": 2.5762, "step": 110800 }, { "epoch": 0.2207581601428025, "grad_norm": 0.15432977676391602, "learning_rate": 0.002, "loss": 2.5665, "step": 110810 }, { "epoch": 0.22077808236644142, "grad_norm": 0.17398250102996826, "learning_rate": 0.002, "loss": 2.5845, "step": 110820 }, { "epoch": 0.22079800459008034, "grad_norm": 0.16402703523635864, "learning_rate": 0.002, "loss": 2.561, "step": 110830 }, { "epoch": 0.22081792681371923, "grad_norm": 0.18794545531272888, "learning_rate": 0.002, "loss": 2.5558, "step": 110840 }, { "epoch": 0.22083784903735815, "grad_norm": 0.1528303176164627, "learning_rate": 0.002, "loss": 2.5602, "step": 110850 }, { "epoch": 0.22085777126099707, "grad_norm": 0.15229149162769318, "learning_rate": 0.002, "loss": 2.5384, "step": 110860 }, { "epoch": 0.22087769348463598, "grad_norm": 0.14186140894889832, "learning_rate": 0.002, "loss": 2.577, "step": 110870 }, { "epoch": 0.2208976157082749, "grad_norm": 0.2385934293270111, "learning_rate": 0.002, "loss": 2.5753, "step": 110880 }, { "epoch": 0.22091753793191382, "grad_norm": 0.16190718114376068, "learning_rate": 0.002, "loss": 2.5868, "step": 110890 }, { "epoch": 0.2209374601555527, "grad_norm": 0.15044181048870087, "learning_rate": 0.002, "loss": 2.5631, "step": 110900 }, { "epoch": 0.22095738237919163, "grad_norm": 0.14509817957878113, "learning_rate": 0.002, "loss": 2.5811, "step": 110910 }, { "epoch": 0.22097730460283055, "grad_norm": 0.1918390840291977, "learning_rate": 0.002, "loss": 2.5651, "step": 110920 }, { "epoch": 0.22099722682646947, "grad_norm": 0.15852780640125275, "learning_rate": 0.002, "loss": 2.5645, "step": 110930 }, { "epoch": 0.22101714905010839, "grad_norm": 0.18942682445049286, "learning_rate": 0.002, "loss": 2.5697, "step": 110940 }, { "epoch": 0.2210370712737473, "grad_norm": 0.2629131078720093, "learning_rate": 0.002, "loss": 2.5767, "step": 110950 }, { "epoch": 0.2210569934973862, "grad_norm": 0.1563640832901001, "learning_rate": 0.002, "loss": 2.5768, "step": 110960 }, { "epoch": 0.2210769157210251, "grad_norm": 0.1654512584209442, "learning_rate": 0.002, "loss": 2.5727, "step": 110970 }, { "epoch": 0.22109683794466403, "grad_norm": 0.17024514079093933, "learning_rate": 0.002, "loss": 2.5541, "step": 110980 }, { "epoch": 0.22111676016830295, "grad_norm": 0.172697052359581, "learning_rate": 0.002, "loss": 2.5667, "step": 110990 }, { "epoch": 0.22113668239194187, "grad_norm": 0.1666800081729889, "learning_rate": 0.002, "loss": 2.5742, "step": 111000 }, { "epoch": 0.22115660461558076, "grad_norm": 0.17487087845802307, "learning_rate": 0.002, "loss": 2.5678, "step": 111010 }, { "epoch": 0.22117652683921968, "grad_norm": 0.1884094923734665, "learning_rate": 0.002, "loss": 2.5618, "step": 111020 }, { "epoch": 0.2211964490628586, "grad_norm": 0.13414807617664337, "learning_rate": 0.002, "loss": 2.576, "step": 111030 }, { "epoch": 0.22121637128649752, "grad_norm": 0.1529611349105835, "learning_rate": 0.002, "loss": 2.5903, "step": 111040 }, { "epoch": 0.22123629351013643, "grad_norm": 0.16111910343170166, "learning_rate": 0.002, "loss": 2.5601, "step": 111050 }, { "epoch": 0.22125621573377535, "grad_norm": 0.1473293900489807, "learning_rate": 0.002, "loss": 2.5696, "step": 111060 }, { "epoch": 0.22127613795741424, "grad_norm": 0.1788608431816101, "learning_rate": 0.002, "loss": 2.5775, "step": 111070 }, { "epoch": 0.22129606018105316, "grad_norm": 0.15447881817817688, "learning_rate": 0.002, "loss": 2.57, "step": 111080 }, { "epoch": 0.22131598240469208, "grad_norm": 0.1532970517873764, "learning_rate": 0.002, "loss": 2.5653, "step": 111090 }, { "epoch": 0.221335904628331, "grad_norm": 0.18292810022830963, "learning_rate": 0.002, "loss": 2.5391, "step": 111100 }, { "epoch": 0.22135582685196992, "grad_norm": 0.1769779771566391, "learning_rate": 0.002, "loss": 2.5706, "step": 111110 }, { "epoch": 0.22137574907560884, "grad_norm": 0.19329696893692017, "learning_rate": 0.002, "loss": 2.5556, "step": 111120 }, { "epoch": 0.22139567129924773, "grad_norm": 0.1798548400402069, "learning_rate": 0.002, "loss": 2.5762, "step": 111130 }, { "epoch": 0.22141559352288664, "grad_norm": 0.16255350410938263, "learning_rate": 0.002, "loss": 2.5592, "step": 111140 }, { "epoch": 0.22143551574652556, "grad_norm": 0.14900533854961395, "learning_rate": 0.002, "loss": 2.5568, "step": 111150 }, { "epoch": 0.22145543797016448, "grad_norm": 0.16967736184597015, "learning_rate": 0.002, "loss": 2.5661, "step": 111160 }, { "epoch": 0.2214753601938034, "grad_norm": 0.17858625948429108, "learning_rate": 0.002, "loss": 2.557, "step": 111170 }, { "epoch": 0.22149528241744232, "grad_norm": 0.15242499113082886, "learning_rate": 0.002, "loss": 2.5612, "step": 111180 }, { "epoch": 0.2215152046410812, "grad_norm": 0.20792394876480103, "learning_rate": 0.002, "loss": 2.5344, "step": 111190 }, { "epoch": 0.22153512686472013, "grad_norm": 0.16190959513187408, "learning_rate": 0.002, "loss": 2.5882, "step": 111200 }, { "epoch": 0.22155504908835905, "grad_norm": 0.18929986655712128, "learning_rate": 0.002, "loss": 2.5731, "step": 111210 }, { "epoch": 0.22157497131199796, "grad_norm": 0.14916937053203583, "learning_rate": 0.002, "loss": 2.5633, "step": 111220 }, { "epoch": 0.22159489353563688, "grad_norm": 0.20305483043193817, "learning_rate": 0.002, "loss": 2.5677, "step": 111230 }, { "epoch": 0.22161481575927577, "grad_norm": 0.1693038046360016, "learning_rate": 0.002, "loss": 2.5532, "step": 111240 }, { "epoch": 0.2216347379829147, "grad_norm": 0.18944509327411652, "learning_rate": 0.002, "loss": 2.5697, "step": 111250 }, { "epoch": 0.2216546602065536, "grad_norm": 0.14891967177391052, "learning_rate": 0.002, "loss": 2.5612, "step": 111260 }, { "epoch": 0.22167458243019253, "grad_norm": 0.20959064364433289, "learning_rate": 0.002, "loss": 2.5666, "step": 111270 }, { "epoch": 0.22169450465383145, "grad_norm": 0.16940785944461823, "learning_rate": 0.002, "loss": 2.5855, "step": 111280 }, { "epoch": 0.22171442687747037, "grad_norm": 0.15696482360363007, "learning_rate": 0.002, "loss": 2.5505, "step": 111290 }, { "epoch": 0.22173434910110926, "grad_norm": 0.21013113856315613, "learning_rate": 0.002, "loss": 2.5645, "step": 111300 }, { "epoch": 0.22175427132474818, "grad_norm": 0.1734333634376526, "learning_rate": 0.002, "loss": 2.5713, "step": 111310 }, { "epoch": 0.2217741935483871, "grad_norm": 0.18132099509239197, "learning_rate": 0.002, "loss": 2.5731, "step": 111320 }, { "epoch": 0.221794115772026, "grad_norm": 0.1634264439344406, "learning_rate": 0.002, "loss": 2.5609, "step": 111330 }, { "epoch": 0.22181403799566493, "grad_norm": 0.13989578187465668, "learning_rate": 0.002, "loss": 2.568, "step": 111340 }, { "epoch": 0.22183396021930385, "grad_norm": 0.1705915927886963, "learning_rate": 0.002, "loss": 2.5787, "step": 111350 }, { "epoch": 0.22185388244294274, "grad_norm": 0.15948037803173065, "learning_rate": 0.002, "loss": 2.5566, "step": 111360 }, { "epoch": 0.22187380466658166, "grad_norm": 0.19721463322639465, "learning_rate": 0.002, "loss": 2.5701, "step": 111370 }, { "epoch": 0.22189372689022058, "grad_norm": 0.16866137087345123, "learning_rate": 0.002, "loss": 2.5763, "step": 111380 }, { "epoch": 0.2219136491138595, "grad_norm": 0.15926405787467957, "learning_rate": 0.002, "loss": 2.5726, "step": 111390 }, { "epoch": 0.22193357133749841, "grad_norm": 0.15047410130500793, "learning_rate": 0.002, "loss": 2.5633, "step": 111400 }, { "epoch": 0.22195349356113733, "grad_norm": 0.15101827681064606, "learning_rate": 0.002, "loss": 2.5634, "step": 111410 }, { "epoch": 0.22197341578477622, "grad_norm": 0.17153432965278625, "learning_rate": 0.002, "loss": 2.5701, "step": 111420 }, { "epoch": 0.22199333800841514, "grad_norm": 0.22177468240261078, "learning_rate": 0.002, "loss": 2.5695, "step": 111430 }, { "epoch": 0.22201326023205406, "grad_norm": 0.15092962980270386, "learning_rate": 0.002, "loss": 2.5772, "step": 111440 }, { "epoch": 0.22203318245569298, "grad_norm": 0.1553894281387329, "learning_rate": 0.002, "loss": 2.5464, "step": 111450 }, { "epoch": 0.2220531046793319, "grad_norm": 0.16149309277534485, "learning_rate": 0.002, "loss": 2.5667, "step": 111460 }, { "epoch": 0.2220730269029708, "grad_norm": 0.1651029884815216, "learning_rate": 0.002, "loss": 2.569, "step": 111470 }, { "epoch": 0.2220929491266097, "grad_norm": 0.15557017922401428, "learning_rate": 0.002, "loss": 2.5674, "step": 111480 }, { "epoch": 0.22211287135024863, "grad_norm": 0.15784958004951477, "learning_rate": 0.002, "loss": 2.5687, "step": 111490 }, { "epoch": 0.22213279357388754, "grad_norm": 0.1890467256307602, "learning_rate": 0.002, "loss": 2.5677, "step": 111500 }, { "epoch": 0.22215271579752646, "grad_norm": 0.1661372035741806, "learning_rate": 0.002, "loss": 2.559, "step": 111510 }, { "epoch": 0.22217263802116538, "grad_norm": 0.17245382070541382, "learning_rate": 0.002, "loss": 2.5607, "step": 111520 }, { "epoch": 0.22219256024480427, "grad_norm": 0.15469500422477722, "learning_rate": 0.002, "loss": 2.5698, "step": 111530 }, { "epoch": 0.2222124824684432, "grad_norm": 0.14877890050411224, "learning_rate": 0.002, "loss": 2.5589, "step": 111540 }, { "epoch": 0.2222324046920821, "grad_norm": 0.15747423470020294, "learning_rate": 0.002, "loss": 2.5567, "step": 111550 }, { "epoch": 0.22225232691572103, "grad_norm": 0.19795560836791992, "learning_rate": 0.002, "loss": 2.5582, "step": 111560 }, { "epoch": 0.22227224913935995, "grad_norm": 0.16973541676998138, "learning_rate": 0.002, "loss": 2.5693, "step": 111570 }, { "epoch": 0.22229217136299886, "grad_norm": 0.14017587900161743, "learning_rate": 0.002, "loss": 2.5615, "step": 111580 }, { "epoch": 0.22231209358663775, "grad_norm": 0.1607135534286499, "learning_rate": 0.002, "loss": 2.5552, "step": 111590 }, { "epoch": 0.22233201581027667, "grad_norm": 0.16969642043113708, "learning_rate": 0.002, "loss": 2.5669, "step": 111600 }, { "epoch": 0.2223519380339156, "grad_norm": 0.14708979427814484, "learning_rate": 0.002, "loss": 2.5745, "step": 111610 }, { "epoch": 0.2223718602575545, "grad_norm": 0.19224707782268524, "learning_rate": 0.002, "loss": 2.5567, "step": 111620 }, { "epoch": 0.22239178248119343, "grad_norm": 0.16451825201511383, "learning_rate": 0.002, "loss": 2.5673, "step": 111630 }, { "epoch": 0.22241170470483235, "grad_norm": 0.20423313975334167, "learning_rate": 0.002, "loss": 2.5596, "step": 111640 }, { "epoch": 0.22243162692847124, "grad_norm": 0.1571882665157318, "learning_rate": 0.002, "loss": 2.5689, "step": 111650 }, { "epoch": 0.22245154915211016, "grad_norm": 0.142737478017807, "learning_rate": 0.002, "loss": 2.5612, "step": 111660 }, { "epoch": 0.22247147137574907, "grad_norm": 0.17703472077846527, "learning_rate": 0.002, "loss": 2.5694, "step": 111670 }, { "epoch": 0.222491393599388, "grad_norm": 0.15697471797466278, "learning_rate": 0.002, "loss": 2.5792, "step": 111680 }, { "epoch": 0.2225113158230269, "grad_norm": 0.1752573847770691, "learning_rate": 0.002, "loss": 2.5575, "step": 111690 }, { "epoch": 0.22253123804666583, "grad_norm": 0.1536467969417572, "learning_rate": 0.002, "loss": 2.5757, "step": 111700 }, { "epoch": 0.22255116027030472, "grad_norm": 0.1554233580827713, "learning_rate": 0.002, "loss": 2.5662, "step": 111710 }, { "epoch": 0.22257108249394364, "grad_norm": 0.1632165014743805, "learning_rate": 0.002, "loss": 2.5628, "step": 111720 }, { "epoch": 0.22259100471758256, "grad_norm": 0.18546372652053833, "learning_rate": 0.002, "loss": 2.5559, "step": 111730 }, { "epoch": 0.22261092694122148, "grad_norm": 0.17378616333007812, "learning_rate": 0.002, "loss": 2.5713, "step": 111740 }, { "epoch": 0.2226308491648604, "grad_norm": 0.16348323225975037, "learning_rate": 0.002, "loss": 2.5651, "step": 111750 }, { "epoch": 0.22265077138849929, "grad_norm": 0.1705244928598404, "learning_rate": 0.002, "loss": 2.562, "step": 111760 }, { "epoch": 0.2226706936121382, "grad_norm": 0.16395743191242218, "learning_rate": 0.002, "loss": 2.5448, "step": 111770 }, { "epoch": 0.22269061583577712, "grad_norm": 0.17653349041938782, "learning_rate": 0.002, "loss": 2.5727, "step": 111780 }, { "epoch": 0.22271053805941604, "grad_norm": 0.15439198911190033, "learning_rate": 0.002, "loss": 2.5689, "step": 111790 }, { "epoch": 0.22273046028305496, "grad_norm": 0.1504918783903122, "learning_rate": 0.002, "loss": 2.5748, "step": 111800 }, { "epoch": 0.22275038250669388, "grad_norm": 0.17878225445747375, "learning_rate": 0.002, "loss": 2.567, "step": 111810 }, { "epoch": 0.22277030473033277, "grad_norm": 0.1477428525686264, "learning_rate": 0.002, "loss": 2.5746, "step": 111820 }, { "epoch": 0.2227902269539717, "grad_norm": 0.1720605492591858, "learning_rate": 0.002, "loss": 2.5779, "step": 111830 }, { "epoch": 0.2228101491776106, "grad_norm": 0.1585579365491867, "learning_rate": 0.002, "loss": 2.5788, "step": 111840 }, { "epoch": 0.22283007140124952, "grad_norm": 0.15061290562152863, "learning_rate": 0.002, "loss": 2.5689, "step": 111850 }, { "epoch": 0.22284999362488844, "grad_norm": 0.16000071167945862, "learning_rate": 0.002, "loss": 2.5687, "step": 111860 }, { "epoch": 0.22286991584852736, "grad_norm": 0.1523435413837433, "learning_rate": 0.002, "loss": 2.5659, "step": 111870 }, { "epoch": 0.22288983807216625, "grad_norm": 0.1858755350112915, "learning_rate": 0.002, "loss": 2.5715, "step": 111880 }, { "epoch": 0.22290976029580517, "grad_norm": 0.1461518406867981, "learning_rate": 0.002, "loss": 2.5689, "step": 111890 }, { "epoch": 0.2229296825194441, "grad_norm": 0.16991488635540009, "learning_rate": 0.002, "loss": 2.5573, "step": 111900 }, { "epoch": 0.222949604743083, "grad_norm": 0.15133224427700043, "learning_rate": 0.002, "loss": 2.5655, "step": 111910 }, { "epoch": 0.22296952696672193, "grad_norm": 0.19319283962249756, "learning_rate": 0.002, "loss": 2.5707, "step": 111920 }, { "epoch": 0.22298944919036084, "grad_norm": 0.1676228791475296, "learning_rate": 0.002, "loss": 2.5791, "step": 111930 }, { "epoch": 0.22300937141399974, "grad_norm": 0.1565941572189331, "learning_rate": 0.002, "loss": 2.5776, "step": 111940 }, { "epoch": 0.22302929363763865, "grad_norm": 0.18460533022880554, "learning_rate": 0.002, "loss": 2.571, "step": 111950 }, { "epoch": 0.22304921586127757, "grad_norm": 0.18952757120132446, "learning_rate": 0.002, "loss": 2.5717, "step": 111960 }, { "epoch": 0.2230691380849165, "grad_norm": 0.14153799414634705, "learning_rate": 0.002, "loss": 2.5751, "step": 111970 }, { "epoch": 0.2230890603085554, "grad_norm": 0.1585305631160736, "learning_rate": 0.002, "loss": 2.5577, "step": 111980 }, { "epoch": 0.2231089825321943, "grad_norm": 0.1499050259590149, "learning_rate": 0.002, "loss": 2.5661, "step": 111990 }, { "epoch": 0.22312890475583322, "grad_norm": 0.1799476593732834, "learning_rate": 0.002, "loss": 2.5482, "step": 112000 }, { "epoch": 0.22314882697947214, "grad_norm": 0.16497179865837097, "learning_rate": 0.002, "loss": 2.5645, "step": 112010 }, { "epoch": 0.22316874920311106, "grad_norm": 0.15443488955497742, "learning_rate": 0.002, "loss": 2.5534, "step": 112020 }, { "epoch": 0.22318867142674997, "grad_norm": 0.16829317808151245, "learning_rate": 0.002, "loss": 2.5721, "step": 112030 }, { "epoch": 0.2232085936503889, "grad_norm": 0.18181411921977997, "learning_rate": 0.002, "loss": 2.5653, "step": 112040 }, { "epoch": 0.22322851587402778, "grad_norm": 0.16079290211200714, "learning_rate": 0.002, "loss": 2.5751, "step": 112050 }, { "epoch": 0.2232484380976667, "grad_norm": 0.16889022290706635, "learning_rate": 0.002, "loss": 2.5687, "step": 112060 }, { "epoch": 0.22326836032130562, "grad_norm": 0.1609075665473938, "learning_rate": 0.002, "loss": 2.5641, "step": 112070 }, { "epoch": 0.22328828254494454, "grad_norm": 0.17130133509635925, "learning_rate": 0.002, "loss": 2.5734, "step": 112080 }, { "epoch": 0.22330820476858346, "grad_norm": 0.18845482170581818, "learning_rate": 0.002, "loss": 2.5663, "step": 112090 }, { "epoch": 0.22332812699222238, "grad_norm": 0.14741486310958862, "learning_rate": 0.002, "loss": 2.5726, "step": 112100 }, { "epoch": 0.22334804921586127, "grad_norm": 0.18253283202648163, "learning_rate": 0.002, "loss": 2.5841, "step": 112110 }, { "epoch": 0.22336797143950018, "grad_norm": 0.18629047274589539, "learning_rate": 0.002, "loss": 2.5573, "step": 112120 }, { "epoch": 0.2233878936631391, "grad_norm": 0.15672488510608673, "learning_rate": 0.002, "loss": 2.5812, "step": 112130 }, { "epoch": 0.22340781588677802, "grad_norm": 0.15951800346374512, "learning_rate": 0.002, "loss": 2.5641, "step": 112140 }, { "epoch": 0.22342773811041694, "grad_norm": 0.16895312070846558, "learning_rate": 0.002, "loss": 2.5545, "step": 112150 }, { "epoch": 0.22344766033405586, "grad_norm": 0.19598613679409027, "learning_rate": 0.002, "loss": 2.5653, "step": 112160 }, { "epoch": 0.22346758255769475, "grad_norm": 0.18489718437194824, "learning_rate": 0.002, "loss": 2.557, "step": 112170 }, { "epoch": 0.22348750478133367, "grad_norm": 0.16065315902233124, "learning_rate": 0.002, "loss": 2.5549, "step": 112180 }, { "epoch": 0.2235074270049726, "grad_norm": 0.15255185961723328, "learning_rate": 0.002, "loss": 2.5637, "step": 112190 }, { "epoch": 0.2235273492286115, "grad_norm": 0.15665486454963684, "learning_rate": 0.002, "loss": 2.5724, "step": 112200 }, { "epoch": 0.22354727145225042, "grad_norm": 0.18688379228115082, "learning_rate": 0.002, "loss": 2.5558, "step": 112210 }, { "epoch": 0.22356719367588934, "grad_norm": 0.19135279953479767, "learning_rate": 0.002, "loss": 2.5659, "step": 112220 }, { "epoch": 0.22358711589952823, "grad_norm": 0.1665443778038025, "learning_rate": 0.002, "loss": 2.5547, "step": 112230 }, { "epoch": 0.22360703812316715, "grad_norm": 0.1528417468070984, "learning_rate": 0.002, "loss": 2.5706, "step": 112240 }, { "epoch": 0.22362696034680607, "grad_norm": 0.16612929105758667, "learning_rate": 0.002, "loss": 2.5652, "step": 112250 }, { "epoch": 0.223646882570445, "grad_norm": 0.16019520163536072, "learning_rate": 0.002, "loss": 2.5681, "step": 112260 }, { "epoch": 0.2236668047940839, "grad_norm": 0.20286385715007782, "learning_rate": 0.002, "loss": 2.5594, "step": 112270 }, { "epoch": 0.2236867270177228, "grad_norm": 0.1653074324131012, "learning_rate": 0.002, "loss": 2.55, "step": 112280 }, { "epoch": 0.22370664924136172, "grad_norm": 0.14004507660865784, "learning_rate": 0.002, "loss": 2.5758, "step": 112290 }, { "epoch": 0.22372657146500063, "grad_norm": 0.1511581540107727, "learning_rate": 0.002, "loss": 2.5604, "step": 112300 }, { "epoch": 0.22374649368863955, "grad_norm": 0.163386270403862, "learning_rate": 0.002, "loss": 2.5721, "step": 112310 }, { "epoch": 0.22376641591227847, "grad_norm": 0.15884555876255035, "learning_rate": 0.002, "loss": 2.5589, "step": 112320 }, { "epoch": 0.2237863381359174, "grad_norm": 0.16299524903297424, "learning_rate": 0.002, "loss": 2.5776, "step": 112330 }, { "epoch": 0.22380626035955628, "grad_norm": 0.16278760135173798, "learning_rate": 0.002, "loss": 2.5699, "step": 112340 }, { "epoch": 0.2238261825831952, "grad_norm": 0.2045169472694397, "learning_rate": 0.002, "loss": 2.5782, "step": 112350 }, { "epoch": 0.22384610480683412, "grad_norm": 0.14902153611183167, "learning_rate": 0.002, "loss": 2.5588, "step": 112360 }, { "epoch": 0.22386602703047304, "grad_norm": 0.1746961623430252, "learning_rate": 0.002, "loss": 2.5491, "step": 112370 }, { "epoch": 0.22388594925411195, "grad_norm": 0.16703805327415466, "learning_rate": 0.002, "loss": 2.5678, "step": 112380 }, { "epoch": 0.22390587147775087, "grad_norm": 0.1626061201095581, "learning_rate": 0.002, "loss": 2.5613, "step": 112390 }, { "epoch": 0.22392579370138976, "grad_norm": 0.189732164144516, "learning_rate": 0.002, "loss": 2.5669, "step": 112400 }, { "epoch": 0.22394571592502868, "grad_norm": 0.16507525742053986, "learning_rate": 0.002, "loss": 2.5569, "step": 112410 }, { "epoch": 0.2239656381486676, "grad_norm": 0.16675028204917908, "learning_rate": 0.002, "loss": 2.5633, "step": 112420 }, { "epoch": 0.22398556037230652, "grad_norm": 0.15648239850997925, "learning_rate": 0.002, "loss": 2.5575, "step": 112430 }, { "epoch": 0.22400548259594544, "grad_norm": 0.15794795751571655, "learning_rate": 0.002, "loss": 2.57, "step": 112440 }, { "epoch": 0.22402540481958436, "grad_norm": 0.16979171335697174, "learning_rate": 0.002, "loss": 2.5725, "step": 112450 }, { "epoch": 0.22404532704322325, "grad_norm": 0.16915692389011383, "learning_rate": 0.002, "loss": 2.5599, "step": 112460 }, { "epoch": 0.22406524926686217, "grad_norm": 0.15622122585773468, "learning_rate": 0.002, "loss": 2.5652, "step": 112470 }, { "epoch": 0.22408517149050108, "grad_norm": 0.14800304174423218, "learning_rate": 0.002, "loss": 2.5698, "step": 112480 }, { "epoch": 0.22410509371414, "grad_norm": 0.19492357969284058, "learning_rate": 0.002, "loss": 2.5733, "step": 112490 }, { "epoch": 0.22412501593777892, "grad_norm": 0.16109900176525116, "learning_rate": 0.002, "loss": 2.5572, "step": 112500 }, { "epoch": 0.2241449381614178, "grad_norm": 0.14967991411685944, "learning_rate": 0.002, "loss": 2.5548, "step": 112510 }, { "epoch": 0.22416486038505673, "grad_norm": 0.1644328385591507, "learning_rate": 0.002, "loss": 2.5631, "step": 112520 }, { "epoch": 0.22418478260869565, "grad_norm": 0.16010461747646332, "learning_rate": 0.002, "loss": 2.559, "step": 112530 }, { "epoch": 0.22420470483233457, "grad_norm": 0.169692724943161, "learning_rate": 0.002, "loss": 2.5634, "step": 112540 }, { "epoch": 0.22422462705597349, "grad_norm": 0.15742315351963043, "learning_rate": 0.002, "loss": 2.5696, "step": 112550 }, { "epoch": 0.2242445492796124, "grad_norm": 0.16127629578113556, "learning_rate": 0.002, "loss": 2.557, "step": 112560 }, { "epoch": 0.2242644715032513, "grad_norm": 0.1665169596672058, "learning_rate": 0.002, "loss": 2.5742, "step": 112570 }, { "epoch": 0.2242843937268902, "grad_norm": 0.14272646605968475, "learning_rate": 0.002, "loss": 2.5606, "step": 112580 }, { "epoch": 0.22430431595052913, "grad_norm": 0.1961163729429245, "learning_rate": 0.002, "loss": 2.5699, "step": 112590 }, { "epoch": 0.22432423817416805, "grad_norm": 0.17238536477088928, "learning_rate": 0.002, "loss": 2.5702, "step": 112600 }, { "epoch": 0.22434416039780697, "grad_norm": 0.15253937244415283, "learning_rate": 0.002, "loss": 2.5732, "step": 112610 }, { "epoch": 0.2243640826214459, "grad_norm": 0.16971492767333984, "learning_rate": 0.002, "loss": 2.5663, "step": 112620 }, { "epoch": 0.22438400484508478, "grad_norm": 0.16550344228744507, "learning_rate": 0.002, "loss": 2.5629, "step": 112630 }, { "epoch": 0.2244039270687237, "grad_norm": 0.1793367713689804, "learning_rate": 0.002, "loss": 2.5726, "step": 112640 }, { "epoch": 0.22442384929236261, "grad_norm": 0.1624850630760193, "learning_rate": 0.002, "loss": 2.5575, "step": 112650 }, { "epoch": 0.22444377151600153, "grad_norm": 0.1785895824432373, "learning_rate": 0.002, "loss": 2.5671, "step": 112660 }, { "epoch": 0.22446369373964045, "grad_norm": 0.18937894701957703, "learning_rate": 0.002, "loss": 2.5777, "step": 112670 }, { "epoch": 0.22448361596327937, "grad_norm": 0.15713512897491455, "learning_rate": 0.002, "loss": 2.5641, "step": 112680 }, { "epoch": 0.22450353818691826, "grad_norm": 0.2167767733335495, "learning_rate": 0.002, "loss": 2.5607, "step": 112690 }, { "epoch": 0.22452346041055718, "grad_norm": 0.15761566162109375, "learning_rate": 0.002, "loss": 2.5678, "step": 112700 }, { "epoch": 0.2245433826341961, "grad_norm": 0.1474352478981018, "learning_rate": 0.002, "loss": 2.565, "step": 112710 }, { "epoch": 0.22456330485783502, "grad_norm": 0.15973477065563202, "learning_rate": 0.002, "loss": 2.5736, "step": 112720 }, { "epoch": 0.22458322708147394, "grad_norm": 0.2068123072385788, "learning_rate": 0.002, "loss": 2.5619, "step": 112730 }, { "epoch": 0.22460314930511283, "grad_norm": 0.15634427964687347, "learning_rate": 0.002, "loss": 2.5649, "step": 112740 }, { "epoch": 0.22462307152875174, "grad_norm": 0.17424753308296204, "learning_rate": 0.002, "loss": 2.5519, "step": 112750 }, { "epoch": 0.22464299375239066, "grad_norm": 0.15080119669437408, "learning_rate": 0.002, "loss": 2.5671, "step": 112760 }, { "epoch": 0.22466291597602958, "grad_norm": 0.1774575412273407, "learning_rate": 0.002, "loss": 2.5591, "step": 112770 }, { "epoch": 0.2246828381996685, "grad_norm": 0.17398898303508759, "learning_rate": 0.002, "loss": 2.5569, "step": 112780 }, { "epoch": 0.22470276042330742, "grad_norm": 0.16713847219944, "learning_rate": 0.002, "loss": 2.5577, "step": 112790 }, { "epoch": 0.2247226826469463, "grad_norm": 0.1639227569103241, "learning_rate": 0.002, "loss": 2.5734, "step": 112800 }, { "epoch": 0.22474260487058523, "grad_norm": 0.17627237737178802, "learning_rate": 0.002, "loss": 2.566, "step": 112810 }, { "epoch": 0.22476252709422415, "grad_norm": 0.15359537303447723, "learning_rate": 0.002, "loss": 2.5634, "step": 112820 }, { "epoch": 0.22478244931786306, "grad_norm": 0.1627998650074005, "learning_rate": 0.002, "loss": 2.5635, "step": 112830 }, { "epoch": 0.22480237154150198, "grad_norm": 0.1898818165063858, "learning_rate": 0.002, "loss": 2.5764, "step": 112840 }, { "epoch": 0.2248222937651409, "grad_norm": 0.14848272502422333, "learning_rate": 0.002, "loss": 2.5557, "step": 112850 }, { "epoch": 0.2248422159887798, "grad_norm": 0.17708010971546173, "learning_rate": 0.002, "loss": 2.5788, "step": 112860 }, { "epoch": 0.2248621382124187, "grad_norm": 0.2193853259086609, "learning_rate": 0.002, "loss": 2.5595, "step": 112870 }, { "epoch": 0.22488206043605763, "grad_norm": 0.14416085183620453, "learning_rate": 0.002, "loss": 2.553, "step": 112880 }, { "epoch": 0.22490198265969655, "grad_norm": 0.17700451612472534, "learning_rate": 0.002, "loss": 2.5721, "step": 112890 }, { "epoch": 0.22492190488333547, "grad_norm": 0.1735304892063141, "learning_rate": 0.002, "loss": 2.5515, "step": 112900 }, { "epoch": 0.22494182710697438, "grad_norm": 0.15251396596431732, "learning_rate": 0.002, "loss": 2.564, "step": 112910 }, { "epoch": 0.22496174933061328, "grad_norm": 0.14017154276371002, "learning_rate": 0.002, "loss": 2.5719, "step": 112920 }, { "epoch": 0.2249816715542522, "grad_norm": 0.1899491548538208, "learning_rate": 0.002, "loss": 2.5716, "step": 112930 }, { "epoch": 0.2250015937778911, "grad_norm": 0.17951177060604095, "learning_rate": 0.002, "loss": 2.5681, "step": 112940 }, { "epoch": 0.22502151600153003, "grad_norm": 0.15533019602298737, "learning_rate": 0.002, "loss": 2.57, "step": 112950 }, { "epoch": 0.22504143822516895, "grad_norm": 0.1365678608417511, "learning_rate": 0.002, "loss": 2.5651, "step": 112960 }, { "epoch": 0.22506136044880787, "grad_norm": 0.16772109270095825, "learning_rate": 0.002, "loss": 2.5566, "step": 112970 }, { "epoch": 0.22508128267244676, "grad_norm": 0.16236421465873718, "learning_rate": 0.002, "loss": 2.5548, "step": 112980 }, { "epoch": 0.22510120489608568, "grad_norm": 0.20725341141223907, "learning_rate": 0.002, "loss": 2.5662, "step": 112990 }, { "epoch": 0.2251211271197246, "grad_norm": 0.17475464940071106, "learning_rate": 0.002, "loss": 2.5587, "step": 113000 }, { "epoch": 0.22514104934336351, "grad_norm": 0.15719471871852875, "learning_rate": 0.002, "loss": 2.5526, "step": 113010 }, { "epoch": 0.22516097156700243, "grad_norm": 0.1845298558473587, "learning_rate": 0.002, "loss": 2.5659, "step": 113020 }, { "epoch": 0.22518089379064132, "grad_norm": 0.1824796199798584, "learning_rate": 0.002, "loss": 2.5668, "step": 113030 }, { "epoch": 0.22520081601428024, "grad_norm": 0.16117466986179352, "learning_rate": 0.002, "loss": 2.5576, "step": 113040 }, { "epoch": 0.22522073823791916, "grad_norm": 0.18254481256008148, "learning_rate": 0.002, "loss": 2.5459, "step": 113050 }, { "epoch": 0.22524066046155808, "grad_norm": 0.20393434166908264, "learning_rate": 0.002, "loss": 2.5737, "step": 113060 }, { "epoch": 0.225260582685197, "grad_norm": 0.16050055623054504, "learning_rate": 0.002, "loss": 2.5717, "step": 113070 }, { "epoch": 0.22528050490883592, "grad_norm": 0.15939415991306305, "learning_rate": 0.002, "loss": 2.5811, "step": 113080 }, { "epoch": 0.2253004271324748, "grad_norm": 0.15281158685684204, "learning_rate": 0.002, "loss": 2.5579, "step": 113090 }, { "epoch": 0.22532034935611372, "grad_norm": 0.1660081297159195, "learning_rate": 0.002, "loss": 2.5589, "step": 113100 }, { "epoch": 0.22534027157975264, "grad_norm": 0.16386577486991882, "learning_rate": 0.002, "loss": 2.5666, "step": 113110 }, { "epoch": 0.22536019380339156, "grad_norm": 0.15600477159023285, "learning_rate": 0.002, "loss": 2.5782, "step": 113120 }, { "epoch": 0.22538011602703048, "grad_norm": 0.18170417845249176, "learning_rate": 0.002, "loss": 2.5638, "step": 113130 }, { "epoch": 0.2254000382506694, "grad_norm": 0.16234715282917023, "learning_rate": 0.002, "loss": 2.5529, "step": 113140 }, { "epoch": 0.2254199604743083, "grad_norm": 0.14457187056541443, "learning_rate": 0.002, "loss": 2.5646, "step": 113150 }, { "epoch": 0.2254398826979472, "grad_norm": 0.1792987436056137, "learning_rate": 0.002, "loss": 2.5666, "step": 113160 }, { "epoch": 0.22545980492158613, "grad_norm": 0.1729467362165451, "learning_rate": 0.002, "loss": 2.5732, "step": 113170 }, { "epoch": 0.22547972714522505, "grad_norm": 0.151455819606781, "learning_rate": 0.002, "loss": 2.5491, "step": 113180 }, { "epoch": 0.22549964936886396, "grad_norm": 0.15479353070259094, "learning_rate": 0.002, "loss": 2.5672, "step": 113190 }, { "epoch": 0.22551957159250288, "grad_norm": 0.16562560200691223, "learning_rate": 0.002, "loss": 2.5605, "step": 113200 }, { "epoch": 0.22553949381614177, "grad_norm": 0.15240263938903809, "learning_rate": 0.002, "loss": 2.5671, "step": 113210 }, { "epoch": 0.2255594160397807, "grad_norm": 0.18459440767765045, "learning_rate": 0.002, "loss": 2.568, "step": 113220 }, { "epoch": 0.2255793382634196, "grad_norm": 0.14201895892620087, "learning_rate": 0.002, "loss": 2.5633, "step": 113230 }, { "epoch": 0.22559926048705853, "grad_norm": 0.17612551152706146, "learning_rate": 0.002, "loss": 2.5678, "step": 113240 }, { "epoch": 0.22561918271069745, "grad_norm": 0.1577901691198349, "learning_rate": 0.002, "loss": 2.5639, "step": 113250 }, { "epoch": 0.22563910493433634, "grad_norm": 0.18694110214710236, "learning_rate": 0.002, "loss": 2.5592, "step": 113260 }, { "epoch": 0.22565902715797526, "grad_norm": 0.15062281489372253, "learning_rate": 0.002, "loss": 2.5847, "step": 113270 }, { "epoch": 0.22567894938161417, "grad_norm": 0.16053533554077148, "learning_rate": 0.002, "loss": 2.5563, "step": 113280 }, { "epoch": 0.2256988716052531, "grad_norm": 0.1517108678817749, "learning_rate": 0.002, "loss": 2.5559, "step": 113290 }, { "epoch": 0.225718793828892, "grad_norm": 0.18316073715686798, "learning_rate": 0.002, "loss": 2.5647, "step": 113300 }, { "epoch": 0.22573871605253093, "grad_norm": 0.15659956634044647, "learning_rate": 0.002, "loss": 2.5614, "step": 113310 }, { "epoch": 0.22575863827616982, "grad_norm": 0.17379355430603027, "learning_rate": 0.002, "loss": 2.5532, "step": 113320 }, { "epoch": 0.22577856049980874, "grad_norm": 0.16068097949028015, "learning_rate": 0.002, "loss": 2.5843, "step": 113330 }, { "epoch": 0.22579848272344766, "grad_norm": 0.2147577852010727, "learning_rate": 0.002, "loss": 2.5804, "step": 113340 }, { "epoch": 0.22581840494708658, "grad_norm": 0.15067748725414276, "learning_rate": 0.002, "loss": 2.5678, "step": 113350 }, { "epoch": 0.2258383271707255, "grad_norm": 0.16980141401290894, "learning_rate": 0.002, "loss": 2.5814, "step": 113360 }, { "epoch": 0.2258582493943644, "grad_norm": 0.16369414329528809, "learning_rate": 0.002, "loss": 2.5641, "step": 113370 }, { "epoch": 0.2258781716180033, "grad_norm": 0.1656021624803543, "learning_rate": 0.002, "loss": 2.556, "step": 113380 }, { "epoch": 0.22589809384164222, "grad_norm": 0.16167104244232178, "learning_rate": 0.002, "loss": 2.5557, "step": 113390 }, { "epoch": 0.22591801606528114, "grad_norm": 0.1657421886920929, "learning_rate": 0.002, "loss": 2.5791, "step": 113400 }, { "epoch": 0.22593793828892006, "grad_norm": 0.16459716856479645, "learning_rate": 0.002, "loss": 2.5704, "step": 113410 }, { "epoch": 0.22595786051255898, "grad_norm": 0.170238196849823, "learning_rate": 0.002, "loss": 2.5593, "step": 113420 }, { "epoch": 0.2259777827361979, "grad_norm": 0.18825219571590424, "learning_rate": 0.002, "loss": 2.5769, "step": 113430 }, { "epoch": 0.2259977049598368, "grad_norm": 0.16360393166542053, "learning_rate": 0.002, "loss": 2.5511, "step": 113440 }, { "epoch": 0.2260176271834757, "grad_norm": 0.17614302039146423, "learning_rate": 0.002, "loss": 2.5588, "step": 113450 }, { "epoch": 0.22603754940711462, "grad_norm": 0.16481876373291016, "learning_rate": 0.002, "loss": 2.5595, "step": 113460 }, { "epoch": 0.22605747163075354, "grad_norm": 0.15495944023132324, "learning_rate": 0.002, "loss": 2.5667, "step": 113470 }, { "epoch": 0.22607739385439246, "grad_norm": 0.18967927992343903, "learning_rate": 0.002, "loss": 2.5831, "step": 113480 }, { "epoch": 0.22609731607803135, "grad_norm": 0.14830875396728516, "learning_rate": 0.002, "loss": 2.5765, "step": 113490 }, { "epoch": 0.22611723830167027, "grad_norm": 0.1513117253780365, "learning_rate": 0.002, "loss": 2.5671, "step": 113500 }, { "epoch": 0.2261371605253092, "grad_norm": 0.1943269670009613, "learning_rate": 0.002, "loss": 2.5814, "step": 113510 }, { "epoch": 0.2261570827489481, "grad_norm": 0.15847864747047424, "learning_rate": 0.002, "loss": 2.5546, "step": 113520 }, { "epoch": 0.22617700497258703, "grad_norm": 0.1545657217502594, "learning_rate": 0.002, "loss": 2.5638, "step": 113530 }, { "epoch": 0.22619692719622594, "grad_norm": 0.20369215309619904, "learning_rate": 0.002, "loss": 2.5682, "step": 113540 }, { "epoch": 0.22621684941986483, "grad_norm": 0.16705681383609772, "learning_rate": 0.002, "loss": 2.5467, "step": 113550 }, { "epoch": 0.22623677164350375, "grad_norm": 0.14191138744354248, "learning_rate": 0.002, "loss": 2.5591, "step": 113560 }, { "epoch": 0.22625669386714267, "grad_norm": 0.19342659413814545, "learning_rate": 0.002, "loss": 2.573, "step": 113570 }, { "epoch": 0.2262766160907816, "grad_norm": 0.14187310636043549, "learning_rate": 0.002, "loss": 2.5608, "step": 113580 }, { "epoch": 0.2262965383144205, "grad_norm": 0.1534070521593094, "learning_rate": 0.002, "loss": 2.5713, "step": 113590 }, { "epoch": 0.22631646053805943, "grad_norm": 0.19392603635787964, "learning_rate": 0.002, "loss": 2.5727, "step": 113600 }, { "epoch": 0.22633638276169832, "grad_norm": 0.19973473250865936, "learning_rate": 0.002, "loss": 2.5739, "step": 113610 }, { "epoch": 0.22635630498533724, "grad_norm": 0.16299504041671753, "learning_rate": 0.002, "loss": 2.5688, "step": 113620 }, { "epoch": 0.22637622720897616, "grad_norm": 0.14799274504184723, "learning_rate": 0.002, "loss": 2.5537, "step": 113630 }, { "epoch": 0.22639614943261507, "grad_norm": 0.1710561364889145, "learning_rate": 0.002, "loss": 2.5563, "step": 113640 }, { "epoch": 0.226416071656254, "grad_norm": 0.1621163934469223, "learning_rate": 0.002, "loss": 2.5784, "step": 113650 }, { "epoch": 0.2264359938798929, "grad_norm": 0.15317465364933014, "learning_rate": 0.002, "loss": 2.5747, "step": 113660 }, { "epoch": 0.2264559161035318, "grad_norm": 0.15606917440891266, "learning_rate": 0.002, "loss": 2.5566, "step": 113670 }, { "epoch": 0.22647583832717072, "grad_norm": 0.16628089547157288, "learning_rate": 0.002, "loss": 2.5554, "step": 113680 }, { "epoch": 0.22649576055080964, "grad_norm": 0.17445997893810272, "learning_rate": 0.002, "loss": 2.5627, "step": 113690 }, { "epoch": 0.22651568277444856, "grad_norm": 0.15359452366828918, "learning_rate": 0.002, "loss": 2.56, "step": 113700 }, { "epoch": 0.22653560499808748, "grad_norm": 0.1754123717546463, "learning_rate": 0.002, "loss": 2.5518, "step": 113710 }, { "epoch": 0.2265555272217264, "grad_norm": 0.15354163944721222, "learning_rate": 0.002, "loss": 2.5549, "step": 113720 }, { "epoch": 0.22657544944536528, "grad_norm": 0.15445244312286377, "learning_rate": 0.002, "loss": 2.5647, "step": 113730 }, { "epoch": 0.2265953716690042, "grad_norm": 0.4024697542190552, "learning_rate": 0.002, "loss": 2.5766, "step": 113740 }, { "epoch": 0.22661529389264312, "grad_norm": 0.159665048122406, "learning_rate": 0.002, "loss": 2.5661, "step": 113750 }, { "epoch": 0.22663521611628204, "grad_norm": 0.18926583230495453, "learning_rate": 0.002, "loss": 2.5614, "step": 113760 }, { "epoch": 0.22665513833992096, "grad_norm": 0.16025406122207642, "learning_rate": 0.002, "loss": 2.572, "step": 113770 }, { "epoch": 0.22667506056355985, "grad_norm": 0.12787610292434692, "learning_rate": 0.002, "loss": 2.5699, "step": 113780 }, { "epoch": 0.22669498278719877, "grad_norm": 0.12927773594856262, "learning_rate": 0.002, "loss": 2.5598, "step": 113790 }, { "epoch": 0.2267149050108377, "grad_norm": 0.17832666635513306, "learning_rate": 0.002, "loss": 2.5763, "step": 113800 }, { "epoch": 0.2267348272344766, "grad_norm": 0.18511010706424713, "learning_rate": 0.002, "loss": 2.5786, "step": 113810 }, { "epoch": 0.22675474945811552, "grad_norm": 0.15246914327144623, "learning_rate": 0.002, "loss": 2.5616, "step": 113820 }, { "epoch": 0.22677467168175444, "grad_norm": 0.20436526834964752, "learning_rate": 0.002, "loss": 2.5661, "step": 113830 }, { "epoch": 0.22679459390539333, "grad_norm": 0.17691299319267273, "learning_rate": 0.002, "loss": 2.5556, "step": 113840 }, { "epoch": 0.22681451612903225, "grad_norm": 0.17248816788196564, "learning_rate": 0.002, "loss": 2.5677, "step": 113850 }, { "epoch": 0.22683443835267117, "grad_norm": 0.14185847342014313, "learning_rate": 0.002, "loss": 2.5608, "step": 113860 }, { "epoch": 0.2268543605763101, "grad_norm": 0.22503936290740967, "learning_rate": 0.002, "loss": 2.57, "step": 113870 }, { "epoch": 0.226874282799949, "grad_norm": 0.18071508407592773, "learning_rate": 0.002, "loss": 2.5698, "step": 113880 }, { "epoch": 0.22689420502358792, "grad_norm": 0.16996698081493378, "learning_rate": 0.002, "loss": 2.5731, "step": 113890 }, { "epoch": 0.22691412724722682, "grad_norm": 0.13675788044929504, "learning_rate": 0.002, "loss": 2.5552, "step": 113900 }, { "epoch": 0.22693404947086573, "grad_norm": 0.17118430137634277, "learning_rate": 0.002, "loss": 2.5714, "step": 113910 }, { "epoch": 0.22695397169450465, "grad_norm": 0.19436922669410706, "learning_rate": 0.002, "loss": 2.5742, "step": 113920 }, { "epoch": 0.22697389391814357, "grad_norm": 0.14104942977428436, "learning_rate": 0.002, "loss": 2.5623, "step": 113930 }, { "epoch": 0.2269938161417825, "grad_norm": 0.14515751600265503, "learning_rate": 0.002, "loss": 2.5664, "step": 113940 }, { "epoch": 0.2270137383654214, "grad_norm": 0.16030213236808777, "learning_rate": 0.002, "loss": 2.5551, "step": 113950 }, { "epoch": 0.2270336605890603, "grad_norm": 0.1649269312620163, "learning_rate": 0.002, "loss": 2.5552, "step": 113960 }, { "epoch": 0.22705358281269922, "grad_norm": 0.1702864021062851, "learning_rate": 0.002, "loss": 2.5637, "step": 113970 }, { "epoch": 0.22707350503633814, "grad_norm": 0.17474552989006042, "learning_rate": 0.002, "loss": 2.5534, "step": 113980 }, { "epoch": 0.22709342725997705, "grad_norm": 0.15033188462257385, "learning_rate": 0.002, "loss": 2.5593, "step": 113990 }, { "epoch": 0.22711334948361597, "grad_norm": 0.15631061792373657, "learning_rate": 0.002, "loss": 2.5854, "step": 114000 }, { "epoch": 0.22713327170725486, "grad_norm": 0.16186580061912537, "learning_rate": 0.002, "loss": 2.5732, "step": 114010 }, { "epoch": 0.22715319393089378, "grad_norm": 0.1581808477640152, "learning_rate": 0.002, "loss": 2.5639, "step": 114020 }, { "epoch": 0.2271731161545327, "grad_norm": 0.17905667424201965, "learning_rate": 0.002, "loss": 2.5781, "step": 114030 }, { "epoch": 0.22719303837817162, "grad_norm": 0.1771790087223053, "learning_rate": 0.002, "loss": 2.586, "step": 114040 }, { "epoch": 0.22721296060181054, "grad_norm": 0.16950994729995728, "learning_rate": 0.002, "loss": 2.5616, "step": 114050 }, { "epoch": 0.22723288282544946, "grad_norm": 0.15207576751708984, "learning_rate": 0.002, "loss": 2.5723, "step": 114060 }, { "epoch": 0.22725280504908835, "grad_norm": 0.1925041824579239, "learning_rate": 0.002, "loss": 2.572, "step": 114070 }, { "epoch": 0.22727272727272727, "grad_norm": 0.17769205570220947, "learning_rate": 0.002, "loss": 2.5511, "step": 114080 }, { "epoch": 0.22729264949636618, "grad_norm": 0.1755414456129074, "learning_rate": 0.002, "loss": 2.5586, "step": 114090 }, { "epoch": 0.2273125717200051, "grad_norm": 0.15037870407104492, "learning_rate": 0.002, "loss": 2.559, "step": 114100 }, { "epoch": 0.22733249394364402, "grad_norm": 0.1752575933933258, "learning_rate": 0.002, "loss": 2.5546, "step": 114110 }, { "epoch": 0.22735241616728294, "grad_norm": 0.17261803150177002, "learning_rate": 0.002, "loss": 2.5629, "step": 114120 }, { "epoch": 0.22737233839092183, "grad_norm": 0.17853765189647675, "learning_rate": 0.002, "loss": 2.5743, "step": 114130 }, { "epoch": 0.22739226061456075, "grad_norm": 0.14453409612178802, "learning_rate": 0.002, "loss": 2.5599, "step": 114140 }, { "epoch": 0.22741218283819967, "grad_norm": 0.16215567290782928, "learning_rate": 0.002, "loss": 2.5692, "step": 114150 }, { "epoch": 0.22743210506183859, "grad_norm": 0.18891598284244537, "learning_rate": 0.002, "loss": 2.5738, "step": 114160 }, { "epoch": 0.2274520272854775, "grad_norm": 0.1495029777288437, "learning_rate": 0.002, "loss": 2.5671, "step": 114170 }, { "epoch": 0.22747194950911642, "grad_norm": 0.20118454098701477, "learning_rate": 0.002, "loss": 2.5587, "step": 114180 }, { "epoch": 0.2274918717327553, "grad_norm": 0.15579675137996674, "learning_rate": 0.002, "loss": 2.572, "step": 114190 }, { "epoch": 0.22751179395639423, "grad_norm": 0.16246731579303741, "learning_rate": 0.002, "loss": 2.5556, "step": 114200 }, { "epoch": 0.22753171618003315, "grad_norm": 0.18694931268692017, "learning_rate": 0.002, "loss": 2.5567, "step": 114210 }, { "epoch": 0.22755163840367207, "grad_norm": 0.15505903959274292, "learning_rate": 0.002, "loss": 2.576, "step": 114220 }, { "epoch": 0.227571560627311, "grad_norm": 0.15590150654315948, "learning_rate": 0.002, "loss": 2.5729, "step": 114230 }, { "epoch": 0.2275914828509499, "grad_norm": 0.17165599763393402, "learning_rate": 0.002, "loss": 2.5613, "step": 114240 }, { "epoch": 0.2276114050745888, "grad_norm": 0.17015601694583893, "learning_rate": 0.002, "loss": 2.5691, "step": 114250 }, { "epoch": 0.22763132729822771, "grad_norm": 0.21524158120155334, "learning_rate": 0.002, "loss": 2.5678, "step": 114260 }, { "epoch": 0.22765124952186663, "grad_norm": 0.18203024566173553, "learning_rate": 0.002, "loss": 2.5787, "step": 114270 }, { "epoch": 0.22767117174550555, "grad_norm": 0.1968025267124176, "learning_rate": 0.002, "loss": 2.5613, "step": 114280 }, { "epoch": 0.22769109396914447, "grad_norm": 0.13343361020088196, "learning_rate": 0.002, "loss": 2.5582, "step": 114290 }, { "epoch": 0.22771101619278336, "grad_norm": 0.17013053596019745, "learning_rate": 0.002, "loss": 2.5646, "step": 114300 }, { "epoch": 0.22773093841642228, "grad_norm": 0.16247154772281647, "learning_rate": 0.002, "loss": 2.566, "step": 114310 }, { "epoch": 0.2277508606400612, "grad_norm": 0.1773829311132431, "learning_rate": 0.002, "loss": 2.5733, "step": 114320 }, { "epoch": 0.22777078286370012, "grad_norm": 0.18119612336158752, "learning_rate": 0.002, "loss": 2.5728, "step": 114330 }, { "epoch": 0.22779070508733903, "grad_norm": 0.1452716588973999, "learning_rate": 0.002, "loss": 2.5454, "step": 114340 }, { "epoch": 0.22781062731097795, "grad_norm": 0.14521706104278564, "learning_rate": 0.002, "loss": 2.5725, "step": 114350 }, { "epoch": 0.22783054953461684, "grad_norm": 0.1894063502550125, "learning_rate": 0.002, "loss": 2.5766, "step": 114360 }, { "epoch": 0.22785047175825576, "grad_norm": 0.1690223217010498, "learning_rate": 0.002, "loss": 2.5595, "step": 114370 }, { "epoch": 0.22787039398189468, "grad_norm": 0.15194173157215118, "learning_rate": 0.002, "loss": 2.5822, "step": 114380 }, { "epoch": 0.2278903162055336, "grad_norm": 0.16632604598999023, "learning_rate": 0.002, "loss": 2.5618, "step": 114390 }, { "epoch": 0.22791023842917252, "grad_norm": 0.2002274990081787, "learning_rate": 0.002, "loss": 2.58, "step": 114400 }, { "epoch": 0.22793016065281144, "grad_norm": 0.1707785725593567, "learning_rate": 0.002, "loss": 2.5765, "step": 114410 }, { "epoch": 0.22795008287645033, "grad_norm": 0.17132875323295593, "learning_rate": 0.002, "loss": 2.572, "step": 114420 }, { "epoch": 0.22797000510008925, "grad_norm": 0.17996688187122345, "learning_rate": 0.002, "loss": 2.5616, "step": 114430 }, { "epoch": 0.22798992732372816, "grad_norm": 0.17091870307922363, "learning_rate": 0.002, "loss": 2.5523, "step": 114440 }, { "epoch": 0.22800984954736708, "grad_norm": 0.1528640240430832, "learning_rate": 0.002, "loss": 2.5765, "step": 114450 }, { "epoch": 0.228029771771006, "grad_norm": 0.19492857158184052, "learning_rate": 0.002, "loss": 2.557, "step": 114460 }, { "epoch": 0.22804969399464492, "grad_norm": 0.1979261040687561, "learning_rate": 0.002, "loss": 2.5818, "step": 114470 }, { "epoch": 0.2280696162182838, "grad_norm": 0.14466845989227295, "learning_rate": 0.002, "loss": 2.5549, "step": 114480 }, { "epoch": 0.22808953844192273, "grad_norm": 0.19997739791870117, "learning_rate": 0.002, "loss": 2.573, "step": 114490 }, { "epoch": 0.22810946066556165, "grad_norm": 0.17884500324726105, "learning_rate": 0.002, "loss": 2.5563, "step": 114500 }, { "epoch": 0.22812938288920057, "grad_norm": 0.14851783215999603, "learning_rate": 0.002, "loss": 2.5771, "step": 114510 }, { "epoch": 0.22814930511283948, "grad_norm": 0.15910907089710236, "learning_rate": 0.002, "loss": 2.5573, "step": 114520 }, { "epoch": 0.22816922733647838, "grad_norm": 0.17670407891273499, "learning_rate": 0.002, "loss": 2.5652, "step": 114530 }, { "epoch": 0.2281891495601173, "grad_norm": 0.1605920046567917, "learning_rate": 0.002, "loss": 2.5536, "step": 114540 }, { "epoch": 0.2282090717837562, "grad_norm": 0.18377180397510529, "learning_rate": 0.002, "loss": 2.57, "step": 114550 }, { "epoch": 0.22822899400739513, "grad_norm": 0.19401612877845764, "learning_rate": 0.002, "loss": 2.5585, "step": 114560 }, { "epoch": 0.22824891623103405, "grad_norm": 0.16796423494815826, "learning_rate": 0.002, "loss": 2.5729, "step": 114570 }, { "epoch": 0.22826883845467297, "grad_norm": 0.17858776450157166, "learning_rate": 0.002, "loss": 2.5607, "step": 114580 }, { "epoch": 0.22828876067831186, "grad_norm": 0.15333695709705353, "learning_rate": 0.002, "loss": 2.5729, "step": 114590 }, { "epoch": 0.22830868290195078, "grad_norm": 0.1954335719347, "learning_rate": 0.002, "loss": 2.5684, "step": 114600 }, { "epoch": 0.2283286051255897, "grad_norm": 0.15878812968730927, "learning_rate": 0.002, "loss": 2.5581, "step": 114610 }, { "epoch": 0.2283485273492286, "grad_norm": 0.14792190492153168, "learning_rate": 0.002, "loss": 2.5563, "step": 114620 }, { "epoch": 0.22836844957286753, "grad_norm": 0.19573037326335907, "learning_rate": 0.002, "loss": 2.5644, "step": 114630 }, { "epoch": 0.22838837179650645, "grad_norm": 0.15276391804218292, "learning_rate": 0.002, "loss": 2.5766, "step": 114640 }, { "epoch": 0.22840829402014534, "grad_norm": 0.22656553983688354, "learning_rate": 0.002, "loss": 2.5583, "step": 114650 }, { "epoch": 0.22842821624378426, "grad_norm": 0.17526142299175262, "learning_rate": 0.002, "loss": 2.5585, "step": 114660 }, { "epoch": 0.22844813846742318, "grad_norm": 0.1574479341506958, "learning_rate": 0.002, "loss": 2.5877, "step": 114670 }, { "epoch": 0.2284680606910621, "grad_norm": 0.16729462146759033, "learning_rate": 0.002, "loss": 2.5674, "step": 114680 }, { "epoch": 0.22848798291470102, "grad_norm": 0.16336017847061157, "learning_rate": 0.002, "loss": 2.5619, "step": 114690 }, { "epoch": 0.22850790513833993, "grad_norm": 0.14814357459545135, "learning_rate": 0.002, "loss": 2.5579, "step": 114700 }, { "epoch": 0.22852782736197882, "grad_norm": 0.1642196774482727, "learning_rate": 0.002, "loss": 2.5584, "step": 114710 }, { "epoch": 0.22854774958561774, "grad_norm": 0.17198313772678375, "learning_rate": 0.002, "loss": 2.5455, "step": 114720 }, { "epoch": 0.22856767180925666, "grad_norm": 0.16090907156467438, "learning_rate": 0.002, "loss": 2.5546, "step": 114730 }, { "epoch": 0.22858759403289558, "grad_norm": 0.16088199615478516, "learning_rate": 0.002, "loss": 2.5676, "step": 114740 }, { "epoch": 0.2286075162565345, "grad_norm": 0.17104189097881317, "learning_rate": 0.002, "loss": 2.5587, "step": 114750 }, { "epoch": 0.2286274384801734, "grad_norm": 0.18407262861728668, "learning_rate": 0.002, "loss": 2.5585, "step": 114760 }, { "epoch": 0.2286473607038123, "grad_norm": 0.149140864610672, "learning_rate": 0.002, "loss": 2.5811, "step": 114770 }, { "epoch": 0.22866728292745123, "grad_norm": 0.15678730607032776, "learning_rate": 0.002, "loss": 2.5876, "step": 114780 }, { "epoch": 0.22868720515109014, "grad_norm": 0.1836448758840561, "learning_rate": 0.002, "loss": 2.5511, "step": 114790 }, { "epoch": 0.22870712737472906, "grad_norm": 0.15718059241771698, "learning_rate": 0.002, "loss": 2.5742, "step": 114800 }, { "epoch": 0.22872704959836798, "grad_norm": 0.15291635692119598, "learning_rate": 0.002, "loss": 2.5552, "step": 114810 }, { "epoch": 0.22874697182200687, "grad_norm": 0.17558395862579346, "learning_rate": 0.002, "loss": 2.5712, "step": 114820 }, { "epoch": 0.2287668940456458, "grad_norm": 0.18507020175457, "learning_rate": 0.002, "loss": 2.5772, "step": 114830 }, { "epoch": 0.2287868162692847, "grad_norm": 0.15056440234184265, "learning_rate": 0.002, "loss": 2.5651, "step": 114840 }, { "epoch": 0.22880673849292363, "grad_norm": 0.14955659210681915, "learning_rate": 0.002, "loss": 2.5741, "step": 114850 }, { "epoch": 0.22882666071656255, "grad_norm": 0.19096848368644714, "learning_rate": 0.002, "loss": 2.5485, "step": 114860 }, { "epoch": 0.22884658294020147, "grad_norm": 0.1719992607831955, "learning_rate": 0.002, "loss": 2.5594, "step": 114870 }, { "epoch": 0.22886650516384036, "grad_norm": 0.16585172712802887, "learning_rate": 0.002, "loss": 2.5657, "step": 114880 }, { "epoch": 0.22888642738747927, "grad_norm": 0.17044684290885925, "learning_rate": 0.002, "loss": 2.5501, "step": 114890 }, { "epoch": 0.2289063496111182, "grad_norm": 0.16643230617046356, "learning_rate": 0.002, "loss": 2.5498, "step": 114900 }, { "epoch": 0.2289262718347571, "grad_norm": 0.16670449078083038, "learning_rate": 0.002, "loss": 2.5619, "step": 114910 }, { "epoch": 0.22894619405839603, "grad_norm": 0.18941184878349304, "learning_rate": 0.002, "loss": 2.5649, "step": 114920 }, { "epoch": 0.22896611628203495, "grad_norm": 0.15292644500732422, "learning_rate": 0.002, "loss": 2.5817, "step": 114930 }, { "epoch": 0.22898603850567384, "grad_norm": 0.17576098442077637, "learning_rate": 0.002, "loss": 2.5763, "step": 114940 }, { "epoch": 0.22900596072931276, "grad_norm": 0.1544104665517807, "learning_rate": 0.002, "loss": 2.5862, "step": 114950 }, { "epoch": 0.22902588295295168, "grad_norm": 0.13762924075126648, "learning_rate": 0.002, "loss": 2.5659, "step": 114960 }, { "epoch": 0.2290458051765906, "grad_norm": 0.18327759206295013, "learning_rate": 0.002, "loss": 2.5667, "step": 114970 }, { "epoch": 0.2290657274002295, "grad_norm": 0.16444696485996246, "learning_rate": 0.002, "loss": 2.5739, "step": 114980 }, { "epoch": 0.22908564962386843, "grad_norm": 0.1537347286939621, "learning_rate": 0.002, "loss": 2.5695, "step": 114990 }, { "epoch": 0.22910557184750732, "grad_norm": 0.16927628219127655, "learning_rate": 0.002, "loss": 2.5746, "step": 115000 }, { "epoch": 0.22912549407114624, "grad_norm": 0.21711860597133636, "learning_rate": 0.002, "loss": 2.5703, "step": 115010 }, { "epoch": 0.22914541629478516, "grad_norm": 0.14397810399532318, "learning_rate": 0.002, "loss": 2.5806, "step": 115020 }, { "epoch": 0.22916533851842408, "grad_norm": 0.1827598512172699, "learning_rate": 0.002, "loss": 2.5686, "step": 115030 }, { "epoch": 0.229185260742063, "grad_norm": 0.1853436529636383, "learning_rate": 0.002, "loss": 2.5701, "step": 115040 }, { "epoch": 0.2292051829657019, "grad_norm": 0.13741762936115265, "learning_rate": 0.002, "loss": 2.5693, "step": 115050 }, { "epoch": 0.2292251051893408, "grad_norm": 0.183857262134552, "learning_rate": 0.002, "loss": 2.5508, "step": 115060 }, { "epoch": 0.22924502741297972, "grad_norm": 0.18677349388599396, "learning_rate": 0.002, "loss": 2.5526, "step": 115070 }, { "epoch": 0.22926494963661864, "grad_norm": 0.16489577293395996, "learning_rate": 0.002, "loss": 2.548, "step": 115080 }, { "epoch": 0.22928487186025756, "grad_norm": 0.15418536961078644, "learning_rate": 0.002, "loss": 2.5649, "step": 115090 }, { "epoch": 0.22930479408389648, "grad_norm": 0.18053331971168518, "learning_rate": 0.002, "loss": 2.5693, "step": 115100 }, { "epoch": 0.22932471630753537, "grad_norm": 0.15256667137145996, "learning_rate": 0.002, "loss": 2.5414, "step": 115110 }, { "epoch": 0.2293446385311743, "grad_norm": 0.20131957530975342, "learning_rate": 0.002, "loss": 2.5479, "step": 115120 }, { "epoch": 0.2293645607548132, "grad_norm": 0.17670473456382751, "learning_rate": 0.002, "loss": 2.5764, "step": 115130 }, { "epoch": 0.22938448297845213, "grad_norm": 0.14969037473201752, "learning_rate": 0.002, "loss": 2.5752, "step": 115140 }, { "epoch": 0.22940440520209104, "grad_norm": 0.16245776414871216, "learning_rate": 0.002, "loss": 2.5823, "step": 115150 }, { "epoch": 0.22942432742572996, "grad_norm": 0.172838494181633, "learning_rate": 0.002, "loss": 2.5627, "step": 115160 }, { "epoch": 0.22944424964936885, "grad_norm": 0.15943345427513123, "learning_rate": 0.002, "loss": 2.5764, "step": 115170 }, { "epoch": 0.22946417187300777, "grad_norm": 0.18026065826416016, "learning_rate": 0.002, "loss": 2.5676, "step": 115180 }, { "epoch": 0.2294840940966467, "grad_norm": 0.17831343412399292, "learning_rate": 0.002, "loss": 2.571, "step": 115190 }, { "epoch": 0.2295040163202856, "grad_norm": 0.165022611618042, "learning_rate": 0.002, "loss": 2.5624, "step": 115200 }, { "epoch": 0.22952393854392453, "grad_norm": 0.16165409982204437, "learning_rate": 0.002, "loss": 2.553, "step": 115210 }, { "epoch": 0.22954386076756345, "grad_norm": 0.164758563041687, "learning_rate": 0.002, "loss": 2.5567, "step": 115220 }, { "epoch": 0.22956378299120234, "grad_norm": 0.13360726833343506, "learning_rate": 0.002, "loss": 2.5641, "step": 115230 }, { "epoch": 0.22958370521484125, "grad_norm": 0.17306366562843323, "learning_rate": 0.002, "loss": 2.5547, "step": 115240 }, { "epoch": 0.22960362743848017, "grad_norm": 0.17665736377239227, "learning_rate": 0.002, "loss": 2.5607, "step": 115250 }, { "epoch": 0.2296235496621191, "grad_norm": 0.15615922212600708, "learning_rate": 0.002, "loss": 2.5691, "step": 115260 }, { "epoch": 0.229643471885758, "grad_norm": 0.15449140965938568, "learning_rate": 0.002, "loss": 2.5598, "step": 115270 }, { "epoch": 0.2296633941093969, "grad_norm": 0.2009965032339096, "learning_rate": 0.002, "loss": 2.5535, "step": 115280 }, { "epoch": 0.22968331633303582, "grad_norm": 0.15584945678710938, "learning_rate": 0.002, "loss": 2.5827, "step": 115290 }, { "epoch": 0.22970323855667474, "grad_norm": 0.17576846480369568, "learning_rate": 0.002, "loss": 2.5662, "step": 115300 }, { "epoch": 0.22972316078031366, "grad_norm": 0.1875840276479721, "learning_rate": 0.002, "loss": 2.5562, "step": 115310 }, { "epoch": 0.22974308300395258, "grad_norm": 0.16684603691101074, "learning_rate": 0.002, "loss": 2.56, "step": 115320 }, { "epoch": 0.2297630052275915, "grad_norm": 0.16872374713420868, "learning_rate": 0.002, "loss": 2.5708, "step": 115330 }, { "epoch": 0.22978292745123038, "grad_norm": 0.16305384039878845, "learning_rate": 0.002, "loss": 2.5579, "step": 115340 }, { "epoch": 0.2298028496748693, "grad_norm": 0.1516839563846588, "learning_rate": 0.002, "loss": 2.5764, "step": 115350 }, { "epoch": 0.22982277189850822, "grad_norm": 0.15218810737133026, "learning_rate": 0.002, "loss": 2.5482, "step": 115360 }, { "epoch": 0.22984269412214714, "grad_norm": 0.159126877784729, "learning_rate": 0.002, "loss": 2.5844, "step": 115370 }, { "epoch": 0.22986261634578606, "grad_norm": 0.16638606786727905, "learning_rate": 0.002, "loss": 2.5727, "step": 115380 }, { "epoch": 0.22988253856942498, "grad_norm": 0.15908971428871155, "learning_rate": 0.002, "loss": 2.5501, "step": 115390 }, { "epoch": 0.22990246079306387, "grad_norm": 0.17900481820106506, "learning_rate": 0.002, "loss": 2.5382, "step": 115400 }, { "epoch": 0.22992238301670279, "grad_norm": 0.17075037956237793, "learning_rate": 0.002, "loss": 2.5745, "step": 115410 }, { "epoch": 0.2299423052403417, "grad_norm": 0.15937545895576477, "learning_rate": 0.002, "loss": 2.5528, "step": 115420 }, { "epoch": 0.22996222746398062, "grad_norm": 0.1584848314523697, "learning_rate": 0.002, "loss": 2.5648, "step": 115430 }, { "epoch": 0.22998214968761954, "grad_norm": 0.18235348165035248, "learning_rate": 0.002, "loss": 2.5697, "step": 115440 }, { "epoch": 0.23000207191125846, "grad_norm": 0.14344029128551483, "learning_rate": 0.002, "loss": 2.5703, "step": 115450 }, { "epoch": 0.23002199413489735, "grad_norm": 0.1474844217300415, "learning_rate": 0.002, "loss": 2.5683, "step": 115460 }, { "epoch": 0.23004191635853627, "grad_norm": 0.16404838860034943, "learning_rate": 0.002, "loss": 2.5634, "step": 115470 }, { "epoch": 0.2300618385821752, "grad_norm": 0.17277619242668152, "learning_rate": 0.002, "loss": 2.5678, "step": 115480 }, { "epoch": 0.2300817608058141, "grad_norm": 0.15832312405109406, "learning_rate": 0.002, "loss": 2.5595, "step": 115490 }, { "epoch": 0.23010168302945302, "grad_norm": 0.16693340241909027, "learning_rate": 0.002, "loss": 2.5667, "step": 115500 }, { "epoch": 0.23012160525309192, "grad_norm": 0.1881699413061142, "learning_rate": 0.002, "loss": 2.5572, "step": 115510 }, { "epoch": 0.23014152747673083, "grad_norm": 0.21381579339504242, "learning_rate": 0.002, "loss": 2.5692, "step": 115520 }, { "epoch": 0.23016144970036975, "grad_norm": 0.1489175260066986, "learning_rate": 0.002, "loss": 2.5685, "step": 115530 }, { "epoch": 0.23018137192400867, "grad_norm": 0.14399203658103943, "learning_rate": 0.002, "loss": 2.5558, "step": 115540 }, { "epoch": 0.2302012941476476, "grad_norm": 0.15152963995933533, "learning_rate": 0.002, "loss": 2.5564, "step": 115550 }, { "epoch": 0.2302212163712865, "grad_norm": 0.24271541833877563, "learning_rate": 0.002, "loss": 2.5608, "step": 115560 }, { "epoch": 0.2302411385949254, "grad_norm": 0.16780096292495728, "learning_rate": 0.002, "loss": 2.5713, "step": 115570 }, { "epoch": 0.23026106081856432, "grad_norm": 0.15090055763721466, "learning_rate": 0.002, "loss": 2.5634, "step": 115580 }, { "epoch": 0.23028098304220324, "grad_norm": 0.1802966296672821, "learning_rate": 0.002, "loss": 2.562, "step": 115590 }, { "epoch": 0.23030090526584215, "grad_norm": 0.20941311120986938, "learning_rate": 0.002, "loss": 2.5669, "step": 115600 }, { "epoch": 0.23032082748948107, "grad_norm": 0.16186879575252533, "learning_rate": 0.002, "loss": 2.5517, "step": 115610 }, { "epoch": 0.23034074971312, "grad_norm": 0.19952131807804108, "learning_rate": 0.002, "loss": 2.5801, "step": 115620 }, { "epoch": 0.23036067193675888, "grad_norm": 0.17290620505809784, "learning_rate": 0.002, "loss": 2.5698, "step": 115630 }, { "epoch": 0.2303805941603978, "grad_norm": 0.18067008256912231, "learning_rate": 0.002, "loss": 2.5643, "step": 115640 }, { "epoch": 0.23040051638403672, "grad_norm": 0.17774821817874908, "learning_rate": 0.002, "loss": 2.5542, "step": 115650 }, { "epoch": 0.23042043860767564, "grad_norm": 0.15958663821220398, "learning_rate": 0.002, "loss": 2.5715, "step": 115660 }, { "epoch": 0.23044036083131456, "grad_norm": 0.17030514776706696, "learning_rate": 0.002, "loss": 2.5724, "step": 115670 }, { "epoch": 0.23046028305495347, "grad_norm": 0.18658402562141418, "learning_rate": 0.002, "loss": 2.5636, "step": 115680 }, { "epoch": 0.23048020527859236, "grad_norm": 0.16742046177387238, "learning_rate": 0.002, "loss": 2.575, "step": 115690 }, { "epoch": 0.23050012750223128, "grad_norm": 0.17785342037677765, "learning_rate": 0.002, "loss": 2.5591, "step": 115700 }, { "epoch": 0.2305200497258702, "grad_norm": 0.18870514631271362, "learning_rate": 0.002, "loss": 2.5675, "step": 115710 }, { "epoch": 0.23053997194950912, "grad_norm": 0.16035951673984528, "learning_rate": 0.002, "loss": 2.5477, "step": 115720 }, { "epoch": 0.23055989417314804, "grad_norm": 0.17652317881584167, "learning_rate": 0.002, "loss": 2.5605, "step": 115730 }, { "epoch": 0.23057981639678696, "grad_norm": 0.1628103256225586, "learning_rate": 0.002, "loss": 2.5716, "step": 115740 }, { "epoch": 0.23059973862042585, "grad_norm": 0.21208208799362183, "learning_rate": 0.002, "loss": 2.5702, "step": 115750 }, { "epoch": 0.23061966084406477, "grad_norm": 0.16945932805538177, "learning_rate": 0.002, "loss": 2.5726, "step": 115760 }, { "epoch": 0.23063958306770369, "grad_norm": 0.1699218451976776, "learning_rate": 0.002, "loss": 2.5809, "step": 115770 }, { "epoch": 0.2306595052913426, "grad_norm": 0.14887213706970215, "learning_rate": 0.002, "loss": 2.5742, "step": 115780 }, { "epoch": 0.23067942751498152, "grad_norm": 0.17436522245407104, "learning_rate": 0.002, "loss": 2.5634, "step": 115790 }, { "epoch": 0.2306993497386204, "grad_norm": 0.1526283174753189, "learning_rate": 0.002, "loss": 2.5598, "step": 115800 }, { "epoch": 0.23071927196225933, "grad_norm": 0.32314562797546387, "learning_rate": 0.002, "loss": 2.5603, "step": 115810 }, { "epoch": 0.23073919418589825, "grad_norm": 0.17122574150562286, "learning_rate": 0.002, "loss": 2.5738, "step": 115820 }, { "epoch": 0.23075911640953717, "grad_norm": 0.14081837236881256, "learning_rate": 0.002, "loss": 2.5736, "step": 115830 }, { "epoch": 0.2307790386331761, "grad_norm": 0.18194910883903503, "learning_rate": 0.002, "loss": 2.5731, "step": 115840 }, { "epoch": 0.230798960856815, "grad_norm": 0.13650724291801453, "learning_rate": 0.002, "loss": 2.5643, "step": 115850 }, { "epoch": 0.2308188830804539, "grad_norm": 0.15401677787303925, "learning_rate": 0.002, "loss": 2.553, "step": 115860 }, { "epoch": 0.23083880530409281, "grad_norm": 0.1730876863002777, "learning_rate": 0.002, "loss": 2.5626, "step": 115870 }, { "epoch": 0.23085872752773173, "grad_norm": 0.15535584092140198, "learning_rate": 0.002, "loss": 2.5754, "step": 115880 }, { "epoch": 0.23087864975137065, "grad_norm": 0.18983855843544006, "learning_rate": 0.002, "loss": 2.5722, "step": 115890 }, { "epoch": 0.23089857197500957, "grad_norm": 0.15950153768062592, "learning_rate": 0.002, "loss": 2.5653, "step": 115900 }, { "epoch": 0.2309184941986485, "grad_norm": 0.15940970182418823, "learning_rate": 0.002, "loss": 2.5681, "step": 115910 }, { "epoch": 0.23093841642228738, "grad_norm": 0.18899688124656677, "learning_rate": 0.002, "loss": 2.5605, "step": 115920 }, { "epoch": 0.2309583386459263, "grad_norm": 0.16455093026161194, "learning_rate": 0.002, "loss": 2.5704, "step": 115930 }, { "epoch": 0.23097826086956522, "grad_norm": 0.1727285087108612, "learning_rate": 0.002, "loss": 2.5622, "step": 115940 }, { "epoch": 0.23099818309320413, "grad_norm": 0.17771394550800323, "learning_rate": 0.002, "loss": 2.5695, "step": 115950 }, { "epoch": 0.23101810531684305, "grad_norm": 0.194777250289917, "learning_rate": 0.002, "loss": 2.5522, "step": 115960 }, { "epoch": 0.23103802754048197, "grad_norm": 0.15298041701316833, "learning_rate": 0.002, "loss": 2.565, "step": 115970 }, { "epoch": 0.23105794976412086, "grad_norm": 0.17749351263046265, "learning_rate": 0.002, "loss": 2.5698, "step": 115980 }, { "epoch": 0.23107787198775978, "grad_norm": 0.15102343261241913, "learning_rate": 0.002, "loss": 2.5716, "step": 115990 }, { "epoch": 0.2310977942113987, "grad_norm": 0.16962489485740662, "learning_rate": 0.002, "loss": 2.5625, "step": 116000 }, { "epoch": 0.23111771643503762, "grad_norm": 0.20217177271842957, "learning_rate": 0.002, "loss": 2.5637, "step": 116010 }, { "epoch": 0.23113763865867654, "grad_norm": 0.19259202480316162, "learning_rate": 0.002, "loss": 2.5687, "step": 116020 }, { "epoch": 0.23115756088231543, "grad_norm": 0.15311011672019958, "learning_rate": 0.002, "loss": 2.5745, "step": 116030 }, { "epoch": 0.23117748310595435, "grad_norm": 0.15393464267253876, "learning_rate": 0.002, "loss": 2.5672, "step": 116040 }, { "epoch": 0.23119740532959326, "grad_norm": 0.16907131671905518, "learning_rate": 0.002, "loss": 2.5609, "step": 116050 }, { "epoch": 0.23121732755323218, "grad_norm": 0.16181108355522156, "learning_rate": 0.002, "loss": 2.5668, "step": 116060 }, { "epoch": 0.2312372497768711, "grad_norm": 0.1610611528158188, "learning_rate": 0.002, "loss": 2.5522, "step": 116070 }, { "epoch": 0.23125717200051002, "grad_norm": 0.16419367492198944, "learning_rate": 0.002, "loss": 2.5583, "step": 116080 }, { "epoch": 0.2312770942241489, "grad_norm": 0.21941787004470825, "learning_rate": 0.002, "loss": 2.5749, "step": 116090 }, { "epoch": 0.23129701644778783, "grad_norm": 0.15464748442173004, "learning_rate": 0.002, "loss": 2.5489, "step": 116100 }, { "epoch": 0.23131693867142675, "grad_norm": 0.1557684987783432, "learning_rate": 0.002, "loss": 2.5647, "step": 116110 }, { "epoch": 0.23133686089506567, "grad_norm": 0.19679176807403564, "learning_rate": 0.002, "loss": 2.5597, "step": 116120 }, { "epoch": 0.23135678311870458, "grad_norm": 0.18542182445526123, "learning_rate": 0.002, "loss": 2.5564, "step": 116130 }, { "epoch": 0.2313767053423435, "grad_norm": 0.19065681099891663, "learning_rate": 0.002, "loss": 2.5505, "step": 116140 }, { "epoch": 0.2313966275659824, "grad_norm": 0.18373271822929382, "learning_rate": 0.002, "loss": 2.5735, "step": 116150 }, { "epoch": 0.2314165497896213, "grad_norm": 0.16405387222766876, "learning_rate": 0.002, "loss": 2.5619, "step": 116160 }, { "epoch": 0.23143647201326023, "grad_norm": 0.16387566924095154, "learning_rate": 0.002, "loss": 2.5921, "step": 116170 }, { "epoch": 0.23145639423689915, "grad_norm": 0.15777529776096344, "learning_rate": 0.002, "loss": 2.5609, "step": 116180 }, { "epoch": 0.23147631646053807, "grad_norm": 0.15371505916118622, "learning_rate": 0.002, "loss": 2.5584, "step": 116190 }, { "epoch": 0.23149623868417699, "grad_norm": 0.16026367247104645, "learning_rate": 0.002, "loss": 2.5706, "step": 116200 }, { "epoch": 0.23151616090781588, "grad_norm": 0.147023007273674, "learning_rate": 0.002, "loss": 2.5747, "step": 116210 }, { "epoch": 0.2315360831314548, "grad_norm": 0.14484521746635437, "learning_rate": 0.002, "loss": 2.5563, "step": 116220 }, { "epoch": 0.2315560053550937, "grad_norm": 0.1734744757413864, "learning_rate": 0.002, "loss": 2.5531, "step": 116230 }, { "epoch": 0.23157592757873263, "grad_norm": 0.155508890748024, "learning_rate": 0.002, "loss": 2.563, "step": 116240 }, { "epoch": 0.23159584980237155, "grad_norm": 0.17121809720993042, "learning_rate": 0.002, "loss": 2.5629, "step": 116250 }, { "epoch": 0.23161577202601044, "grad_norm": 0.15344932675361633, "learning_rate": 0.002, "loss": 2.5637, "step": 116260 }, { "epoch": 0.23163569424964936, "grad_norm": 0.1688162088394165, "learning_rate": 0.002, "loss": 2.5566, "step": 116270 }, { "epoch": 0.23165561647328828, "grad_norm": 0.19247740507125854, "learning_rate": 0.002, "loss": 2.5619, "step": 116280 }, { "epoch": 0.2316755386969272, "grad_norm": 0.18452882766723633, "learning_rate": 0.002, "loss": 2.5585, "step": 116290 }, { "epoch": 0.23169546092056612, "grad_norm": 0.16462677717208862, "learning_rate": 0.002, "loss": 2.5627, "step": 116300 }, { "epoch": 0.23171538314420503, "grad_norm": 0.14846482872962952, "learning_rate": 0.002, "loss": 2.5665, "step": 116310 }, { "epoch": 0.23173530536784392, "grad_norm": 0.1785111129283905, "learning_rate": 0.002, "loss": 2.5692, "step": 116320 }, { "epoch": 0.23175522759148284, "grad_norm": 0.18853025138378143, "learning_rate": 0.002, "loss": 2.5605, "step": 116330 }, { "epoch": 0.23177514981512176, "grad_norm": 0.16693206131458282, "learning_rate": 0.002, "loss": 2.5641, "step": 116340 }, { "epoch": 0.23179507203876068, "grad_norm": 0.1646692156791687, "learning_rate": 0.002, "loss": 2.5603, "step": 116350 }, { "epoch": 0.2318149942623996, "grad_norm": 0.1882455199956894, "learning_rate": 0.002, "loss": 2.5648, "step": 116360 }, { "epoch": 0.23183491648603852, "grad_norm": 0.17404748499393463, "learning_rate": 0.002, "loss": 2.5641, "step": 116370 }, { "epoch": 0.2318548387096774, "grad_norm": 0.2197793871164322, "learning_rate": 0.002, "loss": 2.5548, "step": 116380 }, { "epoch": 0.23187476093331633, "grad_norm": 0.19081901013851166, "learning_rate": 0.002, "loss": 2.5656, "step": 116390 }, { "epoch": 0.23189468315695524, "grad_norm": 0.16246701776981354, "learning_rate": 0.002, "loss": 2.5511, "step": 116400 }, { "epoch": 0.23191460538059416, "grad_norm": 0.13703012466430664, "learning_rate": 0.002, "loss": 2.5823, "step": 116410 }, { "epoch": 0.23193452760423308, "grad_norm": 0.16762235760688782, "learning_rate": 0.002, "loss": 2.5598, "step": 116420 }, { "epoch": 0.231954449827872, "grad_norm": 0.18235082924365997, "learning_rate": 0.002, "loss": 2.561, "step": 116430 }, { "epoch": 0.2319743720515109, "grad_norm": 0.14375412464141846, "learning_rate": 0.002, "loss": 2.5796, "step": 116440 }, { "epoch": 0.2319942942751498, "grad_norm": 0.16433998942375183, "learning_rate": 0.002, "loss": 2.5667, "step": 116450 }, { "epoch": 0.23201421649878873, "grad_norm": 0.15041764080524445, "learning_rate": 0.002, "loss": 2.5634, "step": 116460 }, { "epoch": 0.23203413872242765, "grad_norm": 0.17357702553272247, "learning_rate": 0.002, "loss": 2.5563, "step": 116470 }, { "epoch": 0.23205406094606656, "grad_norm": 0.16044172644615173, "learning_rate": 0.002, "loss": 2.54, "step": 116480 }, { "epoch": 0.23207398316970548, "grad_norm": 0.16834068298339844, "learning_rate": 0.002, "loss": 2.5653, "step": 116490 }, { "epoch": 0.23209390539334437, "grad_norm": 0.18760186433792114, "learning_rate": 0.002, "loss": 2.567, "step": 116500 }, { "epoch": 0.2321138276169833, "grad_norm": 0.1764361411333084, "learning_rate": 0.002, "loss": 2.5938, "step": 116510 }, { "epoch": 0.2321337498406222, "grad_norm": 0.17586329579353333, "learning_rate": 0.002, "loss": 2.569, "step": 116520 }, { "epoch": 0.23215367206426113, "grad_norm": 0.19490312039852142, "learning_rate": 0.002, "loss": 2.5628, "step": 116530 }, { "epoch": 0.23217359428790005, "grad_norm": 0.18742060661315918, "learning_rate": 0.002, "loss": 2.5671, "step": 116540 }, { "epoch": 0.23219351651153894, "grad_norm": 0.1531662940979004, "learning_rate": 0.002, "loss": 2.5603, "step": 116550 }, { "epoch": 0.23221343873517786, "grad_norm": 0.16798701882362366, "learning_rate": 0.002, "loss": 2.5494, "step": 116560 }, { "epoch": 0.23223336095881678, "grad_norm": 0.16997502744197845, "learning_rate": 0.002, "loss": 2.5613, "step": 116570 }, { "epoch": 0.2322532831824557, "grad_norm": 0.17341278493404388, "learning_rate": 0.002, "loss": 2.5571, "step": 116580 }, { "epoch": 0.2322732054060946, "grad_norm": 0.1695123165845871, "learning_rate": 0.002, "loss": 2.5568, "step": 116590 }, { "epoch": 0.23229312762973353, "grad_norm": 0.17164477705955505, "learning_rate": 0.002, "loss": 2.5675, "step": 116600 }, { "epoch": 0.23231304985337242, "grad_norm": 0.17946304380893707, "learning_rate": 0.002, "loss": 2.5712, "step": 116610 }, { "epoch": 0.23233297207701134, "grad_norm": 0.1611025035381317, "learning_rate": 0.002, "loss": 2.5693, "step": 116620 }, { "epoch": 0.23235289430065026, "grad_norm": 0.1559453010559082, "learning_rate": 0.002, "loss": 2.5436, "step": 116630 }, { "epoch": 0.23237281652428918, "grad_norm": 0.4413302540779114, "learning_rate": 0.002, "loss": 2.5665, "step": 116640 }, { "epoch": 0.2323927387479281, "grad_norm": 0.1922641098499298, "learning_rate": 0.002, "loss": 2.5644, "step": 116650 }, { "epoch": 0.23241266097156701, "grad_norm": 0.15331731736660004, "learning_rate": 0.002, "loss": 2.5724, "step": 116660 }, { "epoch": 0.2324325831952059, "grad_norm": 0.18376010656356812, "learning_rate": 0.002, "loss": 2.576, "step": 116670 }, { "epoch": 0.23245250541884482, "grad_norm": 0.1584126055240631, "learning_rate": 0.002, "loss": 2.5683, "step": 116680 }, { "epoch": 0.23247242764248374, "grad_norm": 0.1826065629720688, "learning_rate": 0.002, "loss": 2.5744, "step": 116690 }, { "epoch": 0.23249234986612266, "grad_norm": 0.15195021033287048, "learning_rate": 0.002, "loss": 2.555, "step": 116700 }, { "epoch": 0.23251227208976158, "grad_norm": 0.17905746400356293, "learning_rate": 0.002, "loss": 2.5484, "step": 116710 }, { "epoch": 0.2325321943134005, "grad_norm": 0.1732911765575409, "learning_rate": 0.002, "loss": 2.555, "step": 116720 }, { "epoch": 0.2325521165370394, "grad_norm": 0.16064344346523285, "learning_rate": 0.002, "loss": 2.5676, "step": 116730 }, { "epoch": 0.2325720387606783, "grad_norm": 0.15024709701538086, "learning_rate": 0.002, "loss": 2.5698, "step": 116740 }, { "epoch": 0.23259196098431723, "grad_norm": 0.1777328997850418, "learning_rate": 0.002, "loss": 2.5668, "step": 116750 }, { "epoch": 0.23261188320795614, "grad_norm": 0.15480396151542664, "learning_rate": 0.002, "loss": 2.5644, "step": 116760 }, { "epoch": 0.23263180543159506, "grad_norm": 0.14926326274871826, "learning_rate": 0.002, "loss": 2.5636, "step": 116770 }, { "epoch": 0.23265172765523395, "grad_norm": 0.17273099720478058, "learning_rate": 0.002, "loss": 2.5723, "step": 116780 }, { "epoch": 0.23267164987887287, "grad_norm": 0.1632327437400818, "learning_rate": 0.002, "loss": 2.5636, "step": 116790 }, { "epoch": 0.2326915721025118, "grad_norm": 0.17017391324043274, "learning_rate": 0.002, "loss": 2.5599, "step": 116800 }, { "epoch": 0.2327114943261507, "grad_norm": 0.13987044990062714, "learning_rate": 0.002, "loss": 2.5568, "step": 116810 }, { "epoch": 0.23273141654978963, "grad_norm": 0.1921035200357437, "learning_rate": 0.002, "loss": 2.5755, "step": 116820 }, { "epoch": 0.23275133877342855, "grad_norm": 0.1918819546699524, "learning_rate": 0.002, "loss": 2.5463, "step": 116830 }, { "epoch": 0.23277126099706744, "grad_norm": 0.14359168708324432, "learning_rate": 0.002, "loss": 2.5688, "step": 116840 }, { "epoch": 0.23279118322070635, "grad_norm": 0.15504270792007446, "learning_rate": 0.002, "loss": 2.5648, "step": 116850 }, { "epoch": 0.23281110544434527, "grad_norm": 0.18258777260780334, "learning_rate": 0.002, "loss": 2.5854, "step": 116860 }, { "epoch": 0.2328310276679842, "grad_norm": 0.16505615413188934, "learning_rate": 0.002, "loss": 2.5695, "step": 116870 }, { "epoch": 0.2328509498916231, "grad_norm": 0.14023981988430023, "learning_rate": 0.002, "loss": 2.5545, "step": 116880 }, { "epoch": 0.23287087211526203, "grad_norm": 0.15190328657627106, "learning_rate": 0.002, "loss": 2.5603, "step": 116890 }, { "epoch": 0.23289079433890092, "grad_norm": 0.19686204195022583, "learning_rate": 0.002, "loss": 2.5692, "step": 116900 }, { "epoch": 0.23291071656253984, "grad_norm": 0.17900535464286804, "learning_rate": 0.002, "loss": 2.5615, "step": 116910 }, { "epoch": 0.23293063878617876, "grad_norm": 0.14637045562267303, "learning_rate": 0.002, "loss": 2.5594, "step": 116920 }, { "epoch": 0.23295056100981767, "grad_norm": 0.15481001138687134, "learning_rate": 0.002, "loss": 2.5628, "step": 116930 }, { "epoch": 0.2329704832334566, "grad_norm": 0.16086135804653168, "learning_rate": 0.002, "loss": 2.5766, "step": 116940 }, { "epoch": 0.2329904054570955, "grad_norm": 0.16127195954322815, "learning_rate": 0.002, "loss": 2.5743, "step": 116950 }, { "epoch": 0.2330103276807344, "grad_norm": 0.19316208362579346, "learning_rate": 0.002, "loss": 2.5635, "step": 116960 }, { "epoch": 0.23303024990437332, "grad_norm": 0.17262765765190125, "learning_rate": 0.002, "loss": 2.5691, "step": 116970 }, { "epoch": 0.23305017212801224, "grad_norm": 0.20850756764411926, "learning_rate": 0.002, "loss": 2.5688, "step": 116980 }, { "epoch": 0.23307009435165116, "grad_norm": 0.18336255848407745, "learning_rate": 0.002, "loss": 2.574, "step": 116990 }, { "epoch": 0.23309001657529008, "grad_norm": 0.17222699522972107, "learning_rate": 0.002, "loss": 2.555, "step": 117000 }, { "epoch": 0.233109938798929, "grad_norm": 0.1471174955368042, "learning_rate": 0.002, "loss": 2.5576, "step": 117010 }, { "epoch": 0.23312986102256789, "grad_norm": 0.15252996981143951, "learning_rate": 0.002, "loss": 2.5754, "step": 117020 }, { "epoch": 0.2331497832462068, "grad_norm": 0.18859753012657166, "learning_rate": 0.002, "loss": 2.5689, "step": 117030 }, { "epoch": 0.23316970546984572, "grad_norm": 0.1817503124475479, "learning_rate": 0.002, "loss": 2.5748, "step": 117040 }, { "epoch": 0.23318962769348464, "grad_norm": 0.14817966520786285, "learning_rate": 0.002, "loss": 2.5585, "step": 117050 }, { "epoch": 0.23320954991712356, "grad_norm": 0.2265937328338623, "learning_rate": 0.002, "loss": 2.5704, "step": 117060 }, { "epoch": 0.23322947214076245, "grad_norm": 0.17045150697231293, "learning_rate": 0.002, "loss": 2.5711, "step": 117070 }, { "epoch": 0.23324939436440137, "grad_norm": 0.16468942165374756, "learning_rate": 0.002, "loss": 2.5762, "step": 117080 }, { "epoch": 0.2332693165880403, "grad_norm": 0.1553841233253479, "learning_rate": 0.002, "loss": 2.5709, "step": 117090 }, { "epoch": 0.2332892388116792, "grad_norm": 0.17925181984901428, "learning_rate": 0.002, "loss": 2.5711, "step": 117100 }, { "epoch": 0.23330916103531812, "grad_norm": 0.15680116415023804, "learning_rate": 0.002, "loss": 2.5677, "step": 117110 }, { "epoch": 0.23332908325895704, "grad_norm": 0.17794466018676758, "learning_rate": 0.002, "loss": 2.5674, "step": 117120 }, { "epoch": 0.23334900548259593, "grad_norm": 0.20333130657672882, "learning_rate": 0.002, "loss": 2.5774, "step": 117130 }, { "epoch": 0.23336892770623485, "grad_norm": 0.17024238407611847, "learning_rate": 0.002, "loss": 2.5741, "step": 117140 }, { "epoch": 0.23338884992987377, "grad_norm": 0.18807923793792725, "learning_rate": 0.002, "loss": 2.5632, "step": 117150 }, { "epoch": 0.2334087721535127, "grad_norm": 0.15008579194545746, "learning_rate": 0.002, "loss": 2.5664, "step": 117160 }, { "epoch": 0.2334286943771516, "grad_norm": 0.1700938642024994, "learning_rate": 0.002, "loss": 2.5791, "step": 117170 }, { "epoch": 0.23344861660079053, "grad_norm": 0.1663770228624344, "learning_rate": 0.002, "loss": 2.5531, "step": 117180 }, { "epoch": 0.23346853882442942, "grad_norm": 0.177504763007164, "learning_rate": 0.002, "loss": 2.5641, "step": 117190 }, { "epoch": 0.23348846104806834, "grad_norm": 0.1778542548418045, "learning_rate": 0.002, "loss": 2.5793, "step": 117200 }, { "epoch": 0.23350838327170725, "grad_norm": 0.1612814962863922, "learning_rate": 0.002, "loss": 2.5603, "step": 117210 }, { "epoch": 0.23352830549534617, "grad_norm": 0.1736304759979248, "learning_rate": 0.002, "loss": 2.5654, "step": 117220 }, { "epoch": 0.2335482277189851, "grad_norm": 0.17440032958984375, "learning_rate": 0.002, "loss": 2.5806, "step": 117230 }, { "epoch": 0.233568149942624, "grad_norm": 0.19461330771446228, "learning_rate": 0.002, "loss": 2.5854, "step": 117240 }, { "epoch": 0.2335880721662629, "grad_norm": 0.16807089745998383, "learning_rate": 0.002, "loss": 2.5717, "step": 117250 }, { "epoch": 0.23360799438990182, "grad_norm": 0.175617054104805, "learning_rate": 0.002, "loss": 2.5623, "step": 117260 }, { "epoch": 0.23362791661354074, "grad_norm": 0.13768839836120605, "learning_rate": 0.002, "loss": 2.5573, "step": 117270 }, { "epoch": 0.23364783883717966, "grad_norm": 0.1717415153980255, "learning_rate": 0.002, "loss": 2.5684, "step": 117280 }, { "epoch": 0.23366776106081857, "grad_norm": 0.17046314477920532, "learning_rate": 0.002, "loss": 2.5654, "step": 117290 }, { "epoch": 0.23368768328445746, "grad_norm": 0.1711018830537796, "learning_rate": 0.002, "loss": 2.5593, "step": 117300 }, { "epoch": 0.23370760550809638, "grad_norm": 0.14563055336475372, "learning_rate": 0.002, "loss": 2.5602, "step": 117310 }, { "epoch": 0.2337275277317353, "grad_norm": 0.2034311443567276, "learning_rate": 0.002, "loss": 2.5648, "step": 117320 }, { "epoch": 0.23374744995537422, "grad_norm": 0.15972502529621124, "learning_rate": 0.002, "loss": 2.5599, "step": 117330 }, { "epoch": 0.23376737217901314, "grad_norm": 0.1438179910182953, "learning_rate": 0.002, "loss": 2.5746, "step": 117340 }, { "epoch": 0.23378729440265206, "grad_norm": 0.15612468123435974, "learning_rate": 0.002, "loss": 2.5683, "step": 117350 }, { "epoch": 0.23380721662629095, "grad_norm": 0.16119177639484406, "learning_rate": 0.002, "loss": 2.5599, "step": 117360 }, { "epoch": 0.23382713884992987, "grad_norm": 0.16641919314861298, "learning_rate": 0.002, "loss": 2.5617, "step": 117370 }, { "epoch": 0.23384706107356878, "grad_norm": 0.20943333208560944, "learning_rate": 0.002, "loss": 2.5762, "step": 117380 }, { "epoch": 0.2338669832972077, "grad_norm": 0.16217023134231567, "learning_rate": 0.002, "loss": 2.5645, "step": 117390 }, { "epoch": 0.23388690552084662, "grad_norm": 0.15370963513851166, "learning_rate": 0.002, "loss": 2.5499, "step": 117400 }, { "epoch": 0.23390682774448554, "grad_norm": 0.1754576861858368, "learning_rate": 0.002, "loss": 2.5622, "step": 117410 }, { "epoch": 0.23392674996812443, "grad_norm": 0.19093571603298187, "learning_rate": 0.002, "loss": 2.5703, "step": 117420 }, { "epoch": 0.23394667219176335, "grad_norm": 0.1700279414653778, "learning_rate": 0.002, "loss": 2.5611, "step": 117430 }, { "epoch": 0.23396659441540227, "grad_norm": 0.17193017899990082, "learning_rate": 0.002, "loss": 2.5507, "step": 117440 }, { "epoch": 0.2339865166390412, "grad_norm": 0.1567603200674057, "learning_rate": 0.002, "loss": 2.5688, "step": 117450 }, { "epoch": 0.2340064388626801, "grad_norm": 0.1611037254333496, "learning_rate": 0.002, "loss": 2.5651, "step": 117460 }, { "epoch": 0.23402636108631902, "grad_norm": 0.20304130017757416, "learning_rate": 0.002, "loss": 2.5697, "step": 117470 }, { "epoch": 0.23404628330995791, "grad_norm": 0.1583016961812973, "learning_rate": 0.002, "loss": 2.5675, "step": 117480 }, { "epoch": 0.23406620553359683, "grad_norm": 0.1835184097290039, "learning_rate": 0.002, "loss": 2.5537, "step": 117490 }, { "epoch": 0.23408612775723575, "grad_norm": 0.16254064440727234, "learning_rate": 0.002, "loss": 2.5641, "step": 117500 }, { "epoch": 0.23410604998087467, "grad_norm": 0.15686661005020142, "learning_rate": 0.002, "loss": 2.5739, "step": 117510 }, { "epoch": 0.2341259722045136, "grad_norm": 0.1533992886543274, "learning_rate": 0.002, "loss": 2.5858, "step": 117520 }, { "epoch": 0.23414589442815248, "grad_norm": 0.23732376098632812, "learning_rate": 0.002, "loss": 2.569, "step": 117530 }, { "epoch": 0.2341658166517914, "grad_norm": 0.13385197520256042, "learning_rate": 0.002, "loss": 2.561, "step": 117540 }, { "epoch": 0.23418573887543032, "grad_norm": 0.1686142235994339, "learning_rate": 0.002, "loss": 2.5645, "step": 117550 }, { "epoch": 0.23420566109906923, "grad_norm": 0.1570844203233719, "learning_rate": 0.002, "loss": 2.5785, "step": 117560 }, { "epoch": 0.23422558332270815, "grad_norm": 0.19347332417964935, "learning_rate": 0.002, "loss": 2.5675, "step": 117570 }, { "epoch": 0.23424550554634707, "grad_norm": 0.1674286425113678, "learning_rate": 0.002, "loss": 2.5721, "step": 117580 }, { "epoch": 0.23426542776998596, "grad_norm": 0.15224601328372955, "learning_rate": 0.002, "loss": 2.5803, "step": 117590 }, { "epoch": 0.23428534999362488, "grad_norm": 0.15761835873126984, "learning_rate": 0.002, "loss": 2.5663, "step": 117600 }, { "epoch": 0.2343052722172638, "grad_norm": 0.15821661055088043, "learning_rate": 0.002, "loss": 2.5678, "step": 117610 }, { "epoch": 0.23432519444090272, "grad_norm": 0.15412098169326782, "learning_rate": 0.002, "loss": 2.5725, "step": 117620 }, { "epoch": 0.23434511666454164, "grad_norm": 0.17065761983394623, "learning_rate": 0.002, "loss": 2.584, "step": 117630 }, { "epoch": 0.23436503888818055, "grad_norm": 0.14731653034687042, "learning_rate": 0.002, "loss": 2.5668, "step": 117640 }, { "epoch": 0.23438496111181945, "grad_norm": 0.1811225712299347, "learning_rate": 0.002, "loss": 2.5635, "step": 117650 }, { "epoch": 0.23440488333545836, "grad_norm": 0.17726001143455505, "learning_rate": 0.002, "loss": 2.5629, "step": 117660 }, { "epoch": 0.23442480555909728, "grad_norm": 0.16587713360786438, "learning_rate": 0.002, "loss": 2.5658, "step": 117670 }, { "epoch": 0.2344447277827362, "grad_norm": 0.18311607837677002, "learning_rate": 0.002, "loss": 2.5514, "step": 117680 }, { "epoch": 0.23446465000637512, "grad_norm": 0.24421142041683197, "learning_rate": 0.002, "loss": 2.5489, "step": 117690 }, { "epoch": 0.23448457223001404, "grad_norm": 0.15733447670936584, "learning_rate": 0.002, "loss": 2.5578, "step": 117700 }, { "epoch": 0.23450449445365293, "grad_norm": 0.16207174956798553, "learning_rate": 0.002, "loss": 2.565, "step": 117710 }, { "epoch": 0.23452441667729185, "grad_norm": 0.16348111629486084, "learning_rate": 0.002, "loss": 2.5815, "step": 117720 }, { "epoch": 0.23454433890093077, "grad_norm": 0.19602516293525696, "learning_rate": 0.002, "loss": 2.5611, "step": 117730 }, { "epoch": 0.23456426112456968, "grad_norm": 0.1741107702255249, "learning_rate": 0.002, "loss": 2.555, "step": 117740 }, { "epoch": 0.2345841833482086, "grad_norm": 0.20958831906318665, "learning_rate": 0.002, "loss": 2.5851, "step": 117750 }, { "epoch": 0.23460410557184752, "grad_norm": 0.14827372133731842, "learning_rate": 0.002, "loss": 2.5645, "step": 117760 }, { "epoch": 0.2346240277954864, "grad_norm": 0.1495741754770279, "learning_rate": 0.002, "loss": 2.5845, "step": 117770 }, { "epoch": 0.23464395001912533, "grad_norm": 0.16658684611320496, "learning_rate": 0.002, "loss": 2.5579, "step": 117780 }, { "epoch": 0.23466387224276425, "grad_norm": 0.19801238179206848, "learning_rate": 0.002, "loss": 2.5552, "step": 117790 }, { "epoch": 0.23468379446640317, "grad_norm": 0.16585002839565277, "learning_rate": 0.002, "loss": 2.5816, "step": 117800 }, { "epoch": 0.23470371669004209, "grad_norm": 0.15088529884815216, "learning_rate": 0.002, "loss": 2.5519, "step": 117810 }, { "epoch": 0.23472363891368098, "grad_norm": 0.18839624524116516, "learning_rate": 0.002, "loss": 2.5682, "step": 117820 }, { "epoch": 0.2347435611373199, "grad_norm": 0.16129128634929657, "learning_rate": 0.002, "loss": 2.5606, "step": 117830 }, { "epoch": 0.2347634833609588, "grad_norm": 0.1601232886314392, "learning_rate": 0.002, "loss": 2.5694, "step": 117840 }, { "epoch": 0.23478340558459773, "grad_norm": 0.16526848077774048, "learning_rate": 0.002, "loss": 2.5648, "step": 117850 }, { "epoch": 0.23480332780823665, "grad_norm": 0.17454081773757935, "learning_rate": 0.002, "loss": 2.5807, "step": 117860 }, { "epoch": 0.23482325003187557, "grad_norm": 0.16703493893146515, "learning_rate": 0.002, "loss": 2.5578, "step": 117870 }, { "epoch": 0.23484317225551446, "grad_norm": 0.1778455376625061, "learning_rate": 0.002, "loss": 2.5728, "step": 117880 }, { "epoch": 0.23486309447915338, "grad_norm": 0.15508082509040833, "learning_rate": 0.002, "loss": 2.5507, "step": 117890 }, { "epoch": 0.2348830167027923, "grad_norm": 0.16479504108428955, "learning_rate": 0.002, "loss": 2.5812, "step": 117900 }, { "epoch": 0.23490293892643122, "grad_norm": 0.17719684541225433, "learning_rate": 0.002, "loss": 2.5688, "step": 117910 }, { "epoch": 0.23492286115007013, "grad_norm": 0.15988801419734955, "learning_rate": 0.002, "loss": 2.5738, "step": 117920 }, { "epoch": 0.23494278337370905, "grad_norm": 0.18005681037902832, "learning_rate": 0.002, "loss": 2.5579, "step": 117930 }, { "epoch": 0.23496270559734794, "grad_norm": 0.1780998259782791, "learning_rate": 0.002, "loss": 2.5595, "step": 117940 }, { "epoch": 0.23498262782098686, "grad_norm": 0.14905187487602234, "learning_rate": 0.002, "loss": 2.5624, "step": 117950 }, { "epoch": 0.23500255004462578, "grad_norm": 0.14388950169086456, "learning_rate": 0.002, "loss": 2.5738, "step": 117960 }, { "epoch": 0.2350224722682647, "grad_norm": 0.22644434869289398, "learning_rate": 0.002, "loss": 2.5651, "step": 117970 }, { "epoch": 0.23504239449190362, "grad_norm": 0.25756773352622986, "learning_rate": 0.002, "loss": 2.557, "step": 117980 }, { "epoch": 0.23506231671554254, "grad_norm": 0.16712351143360138, "learning_rate": 0.002, "loss": 2.5602, "step": 117990 }, { "epoch": 0.23508223893918143, "grad_norm": 0.16286224126815796, "learning_rate": 0.002, "loss": 2.5751, "step": 118000 }, { "epoch": 0.23510216116282034, "grad_norm": 0.16765832901000977, "learning_rate": 0.002, "loss": 2.576, "step": 118010 }, { "epoch": 0.23512208338645926, "grad_norm": 0.3303685486316681, "learning_rate": 0.002, "loss": 2.5584, "step": 118020 }, { "epoch": 0.23514200561009818, "grad_norm": 0.20845447480678558, "learning_rate": 0.002, "loss": 2.575, "step": 118030 }, { "epoch": 0.2351619278337371, "grad_norm": 0.15532547235488892, "learning_rate": 0.002, "loss": 2.5781, "step": 118040 }, { "epoch": 0.235181850057376, "grad_norm": 0.17392025887966156, "learning_rate": 0.002, "loss": 2.5465, "step": 118050 }, { "epoch": 0.2352017722810149, "grad_norm": 0.16923551261425018, "learning_rate": 0.002, "loss": 2.5762, "step": 118060 }, { "epoch": 0.23522169450465383, "grad_norm": 0.1924189180135727, "learning_rate": 0.002, "loss": 2.5605, "step": 118070 }, { "epoch": 0.23524161672829275, "grad_norm": 0.14973345398902893, "learning_rate": 0.002, "loss": 2.5694, "step": 118080 }, { "epoch": 0.23526153895193166, "grad_norm": 0.17327487468719482, "learning_rate": 0.002, "loss": 2.5531, "step": 118090 }, { "epoch": 0.23528146117557058, "grad_norm": 0.1857168972492218, "learning_rate": 0.002, "loss": 2.5696, "step": 118100 }, { "epoch": 0.23530138339920947, "grad_norm": 0.1733434647321701, "learning_rate": 0.002, "loss": 2.5722, "step": 118110 }, { "epoch": 0.2353213056228484, "grad_norm": 0.15320126712322235, "learning_rate": 0.002, "loss": 2.5691, "step": 118120 }, { "epoch": 0.2353412278464873, "grad_norm": 0.1656416803598404, "learning_rate": 0.002, "loss": 2.552, "step": 118130 }, { "epoch": 0.23536115007012623, "grad_norm": 0.16724221408367157, "learning_rate": 0.002, "loss": 2.5627, "step": 118140 }, { "epoch": 0.23538107229376515, "grad_norm": 0.16067838668823242, "learning_rate": 0.002, "loss": 2.5776, "step": 118150 }, { "epoch": 0.23540099451740407, "grad_norm": 0.1743420958518982, "learning_rate": 0.002, "loss": 2.5635, "step": 118160 }, { "epoch": 0.23542091674104296, "grad_norm": 0.16040997207164764, "learning_rate": 0.002, "loss": 2.5818, "step": 118170 }, { "epoch": 0.23544083896468188, "grad_norm": 0.14867474138736725, "learning_rate": 0.002, "loss": 2.5671, "step": 118180 }, { "epoch": 0.2354607611883208, "grad_norm": 0.16561686992645264, "learning_rate": 0.002, "loss": 2.5701, "step": 118190 }, { "epoch": 0.2354806834119597, "grad_norm": 0.18907083570957184, "learning_rate": 0.002, "loss": 2.5608, "step": 118200 }, { "epoch": 0.23550060563559863, "grad_norm": 0.2101968228816986, "learning_rate": 0.002, "loss": 2.5685, "step": 118210 }, { "epoch": 0.23552052785923755, "grad_norm": 0.18114280700683594, "learning_rate": 0.002, "loss": 2.5717, "step": 118220 }, { "epoch": 0.23554045008287644, "grad_norm": 0.16252437233924866, "learning_rate": 0.002, "loss": 2.5709, "step": 118230 }, { "epoch": 0.23556037230651536, "grad_norm": 0.17342163622379303, "learning_rate": 0.002, "loss": 2.5623, "step": 118240 }, { "epoch": 0.23558029453015428, "grad_norm": 0.21160124242305756, "learning_rate": 0.002, "loss": 2.5751, "step": 118250 }, { "epoch": 0.2356002167537932, "grad_norm": 0.15541110932826996, "learning_rate": 0.002, "loss": 2.5608, "step": 118260 }, { "epoch": 0.23562013897743211, "grad_norm": 0.17426669597625732, "learning_rate": 0.002, "loss": 2.5647, "step": 118270 }, { "epoch": 0.235640061201071, "grad_norm": 0.1707051694393158, "learning_rate": 0.002, "loss": 2.5682, "step": 118280 }, { "epoch": 0.23565998342470992, "grad_norm": 0.14580142498016357, "learning_rate": 0.002, "loss": 2.5591, "step": 118290 }, { "epoch": 0.23567990564834884, "grad_norm": 0.1657366156578064, "learning_rate": 0.002, "loss": 2.5708, "step": 118300 }, { "epoch": 0.23569982787198776, "grad_norm": 0.1987573802471161, "learning_rate": 0.002, "loss": 2.5737, "step": 118310 }, { "epoch": 0.23571975009562668, "grad_norm": 0.1550772488117218, "learning_rate": 0.002, "loss": 2.5822, "step": 118320 }, { "epoch": 0.2357396723192656, "grad_norm": 0.1876343935728073, "learning_rate": 0.002, "loss": 2.5675, "step": 118330 }, { "epoch": 0.2357595945429045, "grad_norm": 0.18149729073047638, "learning_rate": 0.002, "loss": 2.564, "step": 118340 }, { "epoch": 0.2357795167665434, "grad_norm": 0.14772258698940277, "learning_rate": 0.002, "loss": 2.5726, "step": 118350 }, { "epoch": 0.23579943899018233, "grad_norm": 0.1899775266647339, "learning_rate": 0.002, "loss": 2.5668, "step": 118360 }, { "epoch": 0.23581936121382124, "grad_norm": 0.15913154184818268, "learning_rate": 0.002, "loss": 2.5664, "step": 118370 }, { "epoch": 0.23583928343746016, "grad_norm": 0.1905754953622818, "learning_rate": 0.002, "loss": 2.5696, "step": 118380 }, { "epoch": 0.23585920566109908, "grad_norm": 0.1594778150320053, "learning_rate": 0.002, "loss": 2.5586, "step": 118390 }, { "epoch": 0.23587912788473797, "grad_norm": 0.19394727051258087, "learning_rate": 0.002, "loss": 2.563, "step": 118400 }, { "epoch": 0.2358990501083769, "grad_norm": 0.16129952669143677, "learning_rate": 0.002, "loss": 2.5731, "step": 118410 }, { "epoch": 0.2359189723320158, "grad_norm": 0.15925097465515137, "learning_rate": 0.002, "loss": 2.5724, "step": 118420 }, { "epoch": 0.23593889455565473, "grad_norm": 0.20318464934825897, "learning_rate": 0.002, "loss": 2.5607, "step": 118430 }, { "epoch": 0.23595881677929365, "grad_norm": 0.1565612405538559, "learning_rate": 0.002, "loss": 2.5703, "step": 118440 }, { "epoch": 0.23597873900293256, "grad_norm": 0.18987810611724854, "learning_rate": 0.002, "loss": 2.5637, "step": 118450 }, { "epoch": 0.23599866122657145, "grad_norm": 0.15898412466049194, "learning_rate": 0.002, "loss": 2.561, "step": 118460 }, { "epoch": 0.23601858345021037, "grad_norm": 0.18035152554512024, "learning_rate": 0.002, "loss": 2.5815, "step": 118470 }, { "epoch": 0.2360385056738493, "grad_norm": 0.15700088441371918, "learning_rate": 0.002, "loss": 2.5579, "step": 118480 }, { "epoch": 0.2360584278974882, "grad_norm": 0.1940508931875229, "learning_rate": 0.002, "loss": 2.5725, "step": 118490 }, { "epoch": 0.23607835012112713, "grad_norm": 0.18905562162399292, "learning_rate": 0.002, "loss": 2.571, "step": 118500 }, { "epoch": 0.23609827234476605, "grad_norm": 0.1809842884540558, "learning_rate": 0.002, "loss": 2.5737, "step": 118510 }, { "epoch": 0.23611819456840494, "grad_norm": 0.14798034727573395, "learning_rate": 0.002, "loss": 2.568, "step": 118520 }, { "epoch": 0.23613811679204386, "grad_norm": 0.14348134398460388, "learning_rate": 0.002, "loss": 2.5722, "step": 118530 }, { "epoch": 0.23615803901568277, "grad_norm": 0.20076614618301392, "learning_rate": 0.002, "loss": 2.5656, "step": 118540 }, { "epoch": 0.2361779612393217, "grad_norm": 0.18165555596351624, "learning_rate": 0.002, "loss": 2.557, "step": 118550 }, { "epoch": 0.2361978834629606, "grad_norm": 0.16408787667751312, "learning_rate": 0.002, "loss": 2.5705, "step": 118560 }, { "epoch": 0.2362178056865995, "grad_norm": 0.15930035710334778, "learning_rate": 0.002, "loss": 2.5609, "step": 118570 }, { "epoch": 0.23623772791023842, "grad_norm": 0.1711670458316803, "learning_rate": 0.002, "loss": 2.5813, "step": 118580 }, { "epoch": 0.23625765013387734, "grad_norm": 0.18837660551071167, "learning_rate": 0.002, "loss": 2.5728, "step": 118590 }, { "epoch": 0.23627757235751626, "grad_norm": 0.18006287515163422, "learning_rate": 0.002, "loss": 2.5706, "step": 118600 }, { "epoch": 0.23629749458115518, "grad_norm": 0.17830319702625275, "learning_rate": 0.002, "loss": 2.5482, "step": 118610 }, { "epoch": 0.2363174168047941, "grad_norm": 0.15022575855255127, "learning_rate": 0.002, "loss": 2.5579, "step": 118620 }, { "epoch": 0.23633733902843299, "grad_norm": 0.15627866983413696, "learning_rate": 0.002, "loss": 2.5612, "step": 118630 }, { "epoch": 0.2363572612520719, "grad_norm": 0.19829939305782318, "learning_rate": 0.002, "loss": 2.5706, "step": 118640 }, { "epoch": 0.23637718347571082, "grad_norm": 0.18811210989952087, "learning_rate": 0.002, "loss": 2.5583, "step": 118650 }, { "epoch": 0.23639710569934974, "grad_norm": 0.13009773194789886, "learning_rate": 0.002, "loss": 2.5688, "step": 118660 }, { "epoch": 0.23641702792298866, "grad_norm": 0.15436838567256927, "learning_rate": 0.002, "loss": 2.5521, "step": 118670 }, { "epoch": 0.23643695014662758, "grad_norm": 0.18290677666664124, "learning_rate": 0.002, "loss": 2.5692, "step": 118680 }, { "epoch": 0.23645687237026647, "grad_norm": 0.1653725504875183, "learning_rate": 0.002, "loss": 2.5742, "step": 118690 }, { "epoch": 0.2364767945939054, "grad_norm": 0.1647162139415741, "learning_rate": 0.002, "loss": 2.5676, "step": 118700 }, { "epoch": 0.2364967168175443, "grad_norm": 0.16156911849975586, "learning_rate": 0.002, "loss": 2.5726, "step": 118710 }, { "epoch": 0.23651663904118322, "grad_norm": 0.1481451690196991, "learning_rate": 0.002, "loss": 2.5612, "step": 118720 }, { "epoch": 0.23653656126482214, "grad_norm": 0.1673586517572403, "learning_rate": 0.002, "loss": 2.5572, "step": 118730 }, { "epoch": 0.23655648348846106, "grad_norm": 0.1659892499446869, "learning_rate": 0.002, "loss": 2.5654, "step": 118740 }, { "epoch": 0.23657640571209995, "grad_norm": 0.1867797076702118, "learning_rate": 0.002, "loss": 2.5769, "step": 118750 }, { "epoch": 0.23659632793573887, "grad_norm": 0.1723976582288742, "learning_rate": 0.002, "loss": 2.5517, "step": 118760 }, { "epoch": 0.2366162501593778, "grad_norm": 0.15805934369564056, "learning_rate": 0.002, "loss": 2.5649, "step": 118770 }, { "epoch": 0.2366361723830167, "grad_norm": 0.16247719526290894, "learning_rate": 0.002, "loss": 2.5637, "step": 118780 }, { "epoch": 0.23665609460665563, "grad_norm": 0.16651591658592224, "learning_rate": 0.002, "loss": 2.5586, "step": 118790 }, { "epoch": 0.23667601683029452, "grad_norm": 0.16011391580104828, "learning_rate": 0.002, "loss": 2.5547, "step": 118800 }, { "epoch": 0.23669593905393344, "grad_norm": 0.14629825949668884, "learning_rate": 0.002, "loss": 2.5625, "step": 118810 }, { "epoch": 0.23671586127757235, "grad_norm": 0.282479465007782, "learning_rate": 0.002, "loss": 2.5588, "step": 118820 }, { "epoch": 0.23673578350121127, "grad_norm": 0.1446869820356369, "learning_rate": 0.002, "loss": 2.57, "step": 118830 }, { "epoch": 0.2367557057248502, "grad_norm": 0.15358546376228333, "learning_rate": 0.002, "loss": 2.5663, "step": 118840 }, { "epoch": 0.2367756279484891, "grad_norm": 0.17068438231945038, "learning_rate": 0.002, "loss": 2.5762, "step": 118850 }, { "epoch": 0.236795550172128, "grad_norm": 0.16157107055187225, "learning_rate": 0.002, "loss": 2.5617, "step": 118860 }, { "epoch": 0.23681547239576692, "grad_norm": 0.15342172980308533, "learning_rate": 0.002, "loss": 2.5599, "step": 118870 }, { "epoch": 0.23683539461940584, "grad_norm": 0.17001253366470337, "learning_rate": 0.002, "loss": 2.5654, "step": 118880 }, { "epoch": 0.23685531684304476, "grad_norm": 0.15737341344356537, "learning_rate": 0.002, "loss": 2.5559, "step": 118890 }, { "epoch": 0.23687523906668367, "grad_norm": 0.16411660611629486, "learning_rate": 0.002, "loss": 2.5778, "step": 118900 }, { "epoch": 0.2368951612903226, "grad_norm": 0.16712208092212677, "learning_rate": 0.002, "loss": 2.5558, "step": 118910 }, { "epoch": 0.23691508351396148, "grad_norm": 0.22246280312538147, "learning_rate": 0.002, "loss": 2.58, "step": 118920 }, { "epoch": 0.2369350057376004, "grad_norm": 0.1611519604921341, "learning_rate": 0.002, "loss": 2.564, "step": 118930 }, { "epoch": 0.23695492796123932, "grad_norm": 0.17834459245204926, "learning_rate": 0.002, "loss": 2.5568, "step": 118940 }, { "epoch": 0.23697485018487824, "grad_norm": 0.15419867634773254, "learning_rate": 0.002, "loss": 2.5771, "step": 118950 }, { "epoch": 0.23699477240851716, "grad_norm": 0.16199567914009094, "learning_rate": 0.002, "loss": 2.5781, "step": 118960 }, { "epoch": 0.23701469463215608, "grad_norm": 0.17934498190879822, "learning_rate": 0.002, "loss": 2.5587, "step": 118970 }, { "epoch": 0.23703461685579497, "grad_norm": 0.18306194245815277, "learning_rate": 0.002, "loss": 2.5671, "step": 118980 }, { "epoch": 0.23705453907943388, "grad_norm": 0.1627856194972992, "learning_rate": 0.002, "loss": 2.5555, "step": 118990 }, { "epoch": 0.2370744613030728, "grad_norm": 0.15933416783809662, "learning_rate": 0.002, "loss": 2.5564, "step": 119000 }, { "epoch": 0.23709438352671172, "grad_norm": 0.1618032604455948, "learning_rate": 0.002, "loss": 2.5536, "step": 119010 }, { "epoch": 0.23711430575035064, "grad_norm": 0.21889039874076843, "learning_rate": 0.002, "loss": 2.5702, "step": 119020 }, { "epoch": 0.23713422797398956, "grad_norm": 0.18913309276103973, "learning_rate": 0.002, "loss": 2.5758, "step": 119030 }, { "epoch": 0.23715415019762845, "grad_norm": 0.1735134720802307, "learning_rate": 0.002, "loss": 2.5712, "step": 119040 }, { "epoch": 0.23717407242126737, "grad_norm": 0.15698714554309845, "learning_rate": 0.002, "loss": 2.5626, "step": 119050 }, { "epoch": 0.2371939946449063, "grad_norm": 0.1470717191696167, "learning_rate": 0.002, "loss": 2.5643, "step": 119060 }, { "epoch": 0.2372139168685452, "grad_norm": 0.20892640948295593, "learning_rate": 0.002, "loss": 2.5501, "step": 119070 }, { "epoch": 0.23723383909218412, "grad_norm": 0.1946018636226654, "learning_rate": 0.002, "loss": 2.554, "step": 119080 }, { "epoch": 0.23725376131582301, "grad_norm": 0.15587256848812103, "learning_rate": 0.002, "loss": 2.5749, "step": 119090 }, { "epoch": 0.23727368353946193, "grad_norm": 0.17947234213352203, "learning_rate": 0.002, "loss": 2.5704, "step": 119100 }, { "epoch": 0.23729360576310085, "grad_norm": 0.15261396765708923, "learning_rate": 0.002, "loss": 2.5657, "step": 119110 }, { "epoch": 0.23731352798673977, "grad_norm": 0.15453629195690155, "learning_rate": 0.002, "loss": 2.5699, "step": 119120 }, { "epoch": 0.2373334502103787, "grad_norm": 0.19130012392997742, "learning_rate": 0.002, "loss": 2.5533, "step": 119130 }, { "epoch": 0.2373533724340176, "grad_norm": 0.15542201697826385, "learning_rate": 0.002, "loss": 2.5775, "step": 119140 }, { "epoch": 0.2373732946576565, "grad_norm": 0.1765022873878479, "learning_rate": 0.002, "loss": 2.5722, "step": 119150 }, { "epoch": 0.23739321688129542, "grad_norm": 0.16837060451507568, "learning_rate": 0.002, "loss": 2.547, "step": 119160 }, { "epoch": 0.23741313910493433, "grad_norm": 0.16035211086273193, "learning_rate": 0.002, "loss": 2.5635, "step": 119170 }, { "epoch": 0.23743306132857325, "grad_norm": 0.16245444118976593, "learning_rate": 0.002, "loss": 2.564, "step": 119180 }, { "epoch": 0.23745298355221217, "grad_norm": 0.16110235452651978, "learning_rate": 0.002, "loss": 2.5837, "step": 119190 }, { "epoch": 0.2374729057758511, "grad_norm": 0.18407289683818817, "learning_rate": 0.002, "loss": 2.566, "step": 119200 }, { "epoch": 0.23749282799948998, "grad_norm": 0.17186929285526276, "learning_rate": 0.002, "loss": 2.5722, "step": 119210 }, { "epoch": 0.2375127502231289, "grad_norm": 0.15411360561847687, "learning_rate": 0.002, "loss": 2.5639, "step": 119220 }, { "epoch": 0.23753267244676782, "grad_norm": 0.19480594992637634, "learning_rate": 0.002, "loss": 2.5544, "step": 119230 }, { "epoch": 0.23755259467040674, "grad_norm": 0.20315785706043243, "learning_rate": 0.002, "loss": 2.5798, "step": 119240 }, { "epoch": 0.23757251689404565, "grad_norm": 0.14082171022891998, "learning_rate": 0.002, "loss": 2.5695, "step": 119250 }, { "epoch": 0.23759243911768457, "grad_norm": 0.1851949691772461, "learning_rate": 0.002, "loss": 2.5734, "step": 119260 }, { "epoch": 0.23761236134132346, "grad_norm": 0.254787802696228, "learning_rate": 0.002, "loss": 2.5629, "step": 119270 }, { "epoch": 0.23763228356496238, "grad_norm": 0.1718423068523407, "learning_rate": 0.002, "loss": 2.5453, "step": 119280 }, { "epoch": 0.2376522057886013, "grad_norm": 0.18497979640960693, "learning_rate": 0.002, "loss": 2.5704, "step": 119290 }, { "epoch": 0.23767212801224022, "grad_norm": 0.16674457490444183, "learning_rate": 0.002, "loss": 2.5703, "step": 119300 }, { "epoch": 0.23769205023587914, "grad_norm": 0.22718872129917145, "learning_rate": 0.002, "loss": 2.563, "step": 119310 }, { "epoch": 0.23771197245951803, "grad_norm": 0.15179170668125153, "learning_rate": 0.002, "loss": 2.5625, "step": 119320 }, { "epoch": 0.23773189468315695, "grad_norm": 0.1807364672422409, "learning_rate": 0.002, "loss": 2.5793, "step": 119330 }, { "epoch": 0.23775181690679587, "grad_norm": 0.1904730200767517, "learning_rate": 0.002, "loss": 2.5677, "step": 119340 }, { "epoch": 0.23777173913043478, "grad_norm": 0.14843104779720306, "learning_rate": 0.002, "loss": 2.5571, "step": 119350 }, { "epoch": 0.2377916613540737, "grad_norm": 0.1450379192829132, "learning_rate": 0.002, "loss": 2.569, "step": 119360 }, { "epoch": 0.23781158357771262, "grad_norm": 0.14603029191493988, "learning_rate": 0.002, "loss": 2.5632, "step": 119370 }, { "epoch": 0.2378315058013515, "grad_norm": 0.22556912899017334, "learning_rate": 0.002, "loss": 2.5695, "step": 119380 }, { "epoch": 0.23785142802499043, "grad_norm": 0.17019802331924438, "learning_rate": 0.002, "loss": 2.5625, "step": 119390 }, { "epoch": 0.23787135024862935, "grad_norm": 0.13373400270938873, "learning_rate": 0.002, "loss": 2.5625, "step": 119400 }, { "epoch": 0.23789127247226827, "grad_norm": 0.1873604953289032, "learning_rate": 0.002, "loss": 2.5793, "step": 119410 }, { "epoch": 0.23791119469590719, "grad_norm": 0.1823202669620514, "learning_rate": 0.002, "loss": 2.5644, "step": 119420 }, { "epoch": 0.2379311169195461, "grad_norm": 0.16153965890407562, "learning_rate": 0.002, "loss": 2.5562, "step": 119430 }, { "epoch": 0.237951039143185, "grad_norm": 0.1686936765909195, "learning_rate": 0.002, "loss": 2.566, "step": 119440 }, { "epoch": 0.2379709613668239, "grad_norm": 0.16669347882270813, "learning_rate": 0.002, "loss": 2.557, "step": 119450 }, { "epoch": 0.23799088359046283, "grad_norm": 0.1778223216533661, "learning_rate": 0.002, "loss": 2.5698, "step": 119460 }, { "epoch": 0.23801080581410175, "grad_norm": 0.15224215388298035, "learning_rate": 0.002, "loss": 2.5624, "step": 119470 }, { "epoch": 0.23803072803774067, "grad_norm": 0.1733742654323578, "learning_rate": 0.002, "loss": 2.5679, "step": 119480 }, { "epoch": 0.2380506502613796, "grad_norm": 0.1595071256160736, "learning_rate": 0.002, "loss": 2.5644, "step": 119490 }, { "epoch": 0.23807057248501848, "grad_norm": 0.14974531531333923, "learning_rate": 0.002, "loss": 2.5622, "step": 119500 }, { "epoch": 0.2380904947086574, "grad_norm": 0.19004617631435394, "learning_rate": 0.002, "loss": 2.567, "step": 119510 }, { "epoch": 0.23811041693229631, "grad_norm": 0.14688320457935333, "learning_rate": 0.002, "loss": 2.5601, "step": 119520 }, { "epoch": 0.23813033915593523, "grad_norm": 0.17298215627670288, "learning_rate": 0.002, "loss": 2.5532, "step": 119530 }, { "epoch": 0.23815026137957415, "grad_norm": 0.15396547317504883, "learning_rate": 0.002, "loss": 2.5664, "step": 119540 }, { "epoch": 0.23817018360321304, "grad_norm": 0.12823975086212158, "learning_rate": 0.002, "loss": 2.5854, "step": 119550 }, { "epoch": 0.23819010582685196, "grad_norm": 0.22116892039775848, "learning_rate": 0.002, "loss": 2.5576, "step": 119560 }, { "epoch": 0.23821002805049088, "grad_norm": 0.15745116770267487, "learning_rate": 0.002, "loss": 2.5718, "step": 119570 }, { "epoch": 0.2382299502741298, "grad_norm": 0.16873174905776978, "learning_rate": 0.002, "loss": 2.5714, "step": 119580 }, { "epoch": 0.23824987249776872, "grad_norm": 0.21476158499717712, "learning_rate": 0.002, "loss": 2.5519, "step": 119590 }, { "epoch": 0.23826979472140764, "grad_norm": 0.16992972791194916, "learning_rate": 0.002, "loss": 2.5539, "step": 119600 }, { "epoch": 0.23828971694504653, "grad_norm": 0.16945737600326538, "learning_rate": 0.002, "loss": 2.557, "step": 119610 }, { "epoch": 0.23830963916868544, "grad_norm": 0.13473649322986603, "learning_rate": 0.002, "loss": 2.573, "step": 119620 }, { "epoch": 0.23832956139232436, "grad_norm": 0.15435366332530975, "learning_rate": 0.002, "loss": 2.5655, "step": 119630 }, { "epoch": 0.23834948361596328, "grad_norm": 0.1985565423965454, "learning_rate": 0.002, "loss": 2.5603, "step": 119640 }, { "epoch": 0.2383694058396022, "grad_norm": 0.1815613955259323, "learning_rate": 0.002, "loss": 2.564, "step": 119650 }, { "epoch": 0.23838932806324112, "grad_norm": 0.17054477334022522, "learning_rate": 0.002, "loss": 2.57, "step": 119660 }, { "epoch": 0.23840925028688, "grad_norm": 0.152380108833313, "learning_rate": 0.002, "loss": 2.5638, "step": 119670 }, { "epoch": 0.23842917251051893, "grad_norm": 0.16266028583049774, "learning_rate": 0.002, "loss": 2.5542, "step": 119680 }, { "epoch": 0.23844909473415785, "grad_norm": 0.1835618019104004, "learning_rate": 0.002, "loss": 2.5717, "step": 119690 }, { "epoch": 0.23846901695779676, "grad_norm": 0.1785624772310257, "learning_rate": 0.002, "loss": 2.5609, "step": 119700 }, { "epoch": 0.23848893918143568, "grad_norm": 0.2287922352552414, "learning_rate": 0.002, "loss": 2.5723, "step": 119710 }, { "epoch": 0.2385088614050746, "grad_norm": 0.14830438792705536, "learning_rate": 0.002, "loss": 2.564, "step": 119720 }, { "epoch": 0.2385287836287135, "grad_norm": 0.17099854350090027, "learning_rate": 0.002, "loss": 2.5578, "step": 119730 }, { "epoch": 0.2385487058523524, "grad_norm": 0.16492225229740143, "learning_rate": 0.002, "loss": 2.5576, "step": 119740 }, { "epoch": 0.23856862807599133, "grad_norm": 0.1724727749824524, "learning_rate": 0.002, "loss": 2.5556, "step": 119750 }, { "epoch": 0.23858855029963025, "grad_norm": 0.1468820869922638, "learning_rate": 0.002, "loss": 2.5563, "step": 119760 }, { "epoch": 0.23860847252326917, "grad_norm": 0.17608016729354858, "learning_rate": 0.002, "loss": 2.552, "step": 119770 }, { "epoch": 0.23862839474690808, "grad_norm": 0.1899498552083969, "learning_rate": 0.002, "loss": 2.5722, "step": 119780 }, { "epoch": 0.23864831697054698, "grad_norm": 0.19513250887393951, "learning_rate": 0.002, "loss": 2.5591, "step": 119790 }, { "epoch": 0.2386682391941859, "grad_norm": 0.1586586833000183, "learning_rate": 0.002, "loss": 2.5719, "step": 119800 }, { "epoch": 0.2386881614178248, "grad_norm": 0.16810472309589386, "learning_rate": 0.002, "loss": 2.5673, "step": 119810 }, { "epoch": 0.23870808364146373, "grad_norm": 0.20954343676567078, "learning_rate": 0.002, "loss": 2.5653, "step": 119820 }, { "epoch": 0.23872800586510265, "grad_norm": 0.1748606562614441, "learning_rate": 0.002, "loss": 2.5855, "step": 119830 }, { "epoch": 0.23874792808874154, "grad_norm": 0.18831931054592133, "learning_rate": 0.002, "loss": 2.5823, "step": 119840 }, { "epoch": 0.23876785031238046, "grad_norm": 0.16719858348369598, "learning_rate": 0.002, "loss": 2.5669, "step": 119850 }, { "epoch": 0.23878777253601938, "grad_norm": 0.15835760533809662, "learning_rate": 0.002, "loss": 2.5554, "step": 119860 }, { "epoch": 0.2388076947596583, "grad_norm": 0.17761960625648499, "learning_rate": 0.002, "loss": 2.5548, "step": 119870 }, { "epoch": 0.23882761698329721, "grad_norm": 0.16584229469299316, "learning_rate": 0.002, "loss": 2.5719, "step": 119880 }, { "epoch": 0.23884753920693613, "grad_norm": 0.17487157881259918, "learning_rate": 0.002, "loss": 2.5657, "step": 119890 }, { "epoch": 0.23886746143057502, "grad_norm": 0.17516449093818665, "learning_rate": 0.002, "loss": 2.5656, "step": 119900 }, { "epoch": 0.23888738365421394, "grad_norm": 0.1587902158498764, "learning_rate": 0.002, "loss": 2.5629, "step": 119910 }, { "epoch": 0.23890730587785286, "grad_norm": 0.1792755126953125, "learning_rate": 0.002, "loss": 2.5414, "step": 119920 }, { "epoch": 0.23892722810149178, "grad_norm": 0.15162180364131927, "learning_rate": 0.002, "loss": 2.5724, "step": 119930 }, { "epoch": 0.2389471503251307, "grad_norm": 0.16241085529327393, "learning_rate": 0.002, "loss": 2.568, "step": 119940 }, { "epoch": 0.23896707254876962, "grad_norm": 0.16735565662384033, "learning_rate": 0.002, "loss": 2.5461, "step": 119950 }, { "epoch": 0.2389869947724085, "grad_norm": 0.16347238421440125, "learning_rate": 0.002, "loss": 2.5755, "step": 119960 }, { "epoch": 0.23900691699604742, "grad_norm": 0.16550776362419128, "learning_rate": 0.002, "loss": 2.5602, "step": 119970 }, { "epoch": 0.23902683921968634, "grad_norm": 0.16125130653381348, "learning_rate": 0.002, "loss": 2.5639, "step": 119980 }, { "epoch": 0.23904676144332526, "grad_norm": 0.1539924442768097, "learning_rate": 0.002, "loss": 2.5735, "step": 119990 }, { "epoch": 0.23906668366696418, "grad_norm": 0.15933991968631744, "learning_rate": 0.002, "loss": 2.5636, "step": 120000 }, { "epoch": 0.2390866058906031, "grad_norm": 0.17110833525657654, "learning_rate": 0.002, "loss": 2.5698, "step": 120010 }, { "epoch": 0.239106528114242, "grad_norm": 0.1667124480009079, "learning_rate": 0.002, "loss": 2.5694, "step": 120020 }, { "epoch": 0.2391264503378809, "grad_norm": 0.15774314105510712, "learning_rate": 0.002, "loss": 2.5776, "step": 120030 }, { "epoch": 0.23914637256151983, "grad_norm": 0.15620796382427216, "learning_rate": 0.002, "loss": 2.5823, "step": 120040 }, { "epoch": 0.23916629478515875, "grad_norm": 0.15285012125968933, "learning_rate": 0.002, "loss": 2.5761, "step": 120050 }, { "epoch": 0.23918621700879766, "grad_norm": 0.15933752059936523, "learning_rate": 0.002, "loss": 2.5589, "step": 120060 }, { "epoch": 0.23920613923243655, "grad_norm": 0.17002731561660767, "learning_rate": 0.002, "loss": 2.5712, "step": 120070 }, { "epoch": 0.23922606145607547, "grad_norm": 0.17002451419830322, "learning_rate": 0.002, "loss": 2.5636, "step": 120080 }, { "epoch": 0.2392459836797144, "grad_norm": 0.18234717845916748, "learning_rate": 0.002, "loss": 2.5757, "step": 120090 }, { "epoch": 0.2392659059033533, "grad_norm": 0.15923593938350677, "learning_rate": 0.002, "loss": 2.5667, "step": 120100 }, { "epoch": 0.23928582812699223, "grad_norm": 0.15574127435684204, "learning_rate": 0.002, "loss": 2.5703, "step": 120110 }, { "epoch": 0.23930575035063115, "grad_norm": 0.15277141332626343, "learning_rate": 0.002, "loss": 2.5628, "step": 120120 }, { "epoch": 0.23932567257427004, "grad_norm": 0.21206943690776825, "learning_rate": 0.002, "loss": 2.5629, "step": 120130 }, { "epoch": 0.23934559479790896, "grad_norm": 0.16982391476631165, "learning_rate": 0.002, "loss": 2.5591, "step": 120140 }, { "epoch": 0.23936551702154787, "grad_norm": 0.1667654812335968, "learning_rate": 0.002, "loss": 2.5715, "step": 120150 }, { "epoch": 0.2393854392451868, "grad_norm": 0.1654253900051117, "learning_rate": 0.002, "loss": 2.5666, "step": 120160 }, { "epoch": 0.2394053614688257, "grad_norm": 0.15753628313541412, "learning_rate": 0.002, "loss": 2.5632, "step": 120170 }, { "epoch": 0.23942528369246463, "grad_norm": 0.1700180619955063, "learning_rate": 0.002, "loss": 2.5889, "step": 120180 }, { "epoch": 0.23944520591610352, "grad_norm": 0.17337451875209808, "learning_rate": 0.002, "loss": 2.5605, "step": 120190 }, { "epoch": 0.23946512813974244, "grad_norm": 0.20398598909378052, "learning_rate": 0.002, "loss": 2.5738, "step": 120200 }, { "epoch": 0.23948505036338136, "grad_norm": 0.16500160098075867, "learning_rate": 0.002, "loss": 2.572, "step": 120210 }, { "epoch": 0.23950497258702028, "grad_norm": 0.17890219390392303, "learning_rate": 0.002, "loss": 2.5581, "step": 120220 }, { "epoch": 0.2395248948106592, "grad_norm": 0.18235477805137634, "learning_rate": 0.002, "loss": 2.5766, "step": 120230 }, { "epoch": 0.2395448170342981, "grad_norm": 0.15689359605312347, "learning_rate": 0.002, "loss": 2.5585, "step": 120240 }, { "epoch": 0.239564739257937, "grad_norm": 0.18240168690681458, "learning_rate": 0.002, "loss": 2.5614, "step": 120250 }, { "epoch": 0.23958466148157592, "grad_norm": 0.16253210604190826, "learning_rate": 0.002, "loss": 2.5718, "step": 120260 }, { "epoch": 0.23960458370521484, "grad_norm": 0.16529378294944763, "learning_rate": 0.002, "loss": 2.5604, "step": 120270 }, { "epoch": 0.23962450592885376, "grad_norm": 0.19768889248371124, "learning_rate": 0.002, "loss": 2.5746, "step": 120280 }, { "epoch": 0.23964442815249268, "grad_norm": 0.1742989867925644, "learning_rate": 0.002, "loss": 2.5609, "step": 120290 }, { "epoch": 0.23966435037613157, "grad_norm": 0.169939786195755, "learning_rate": 0.002, "loss": 2.5704, "step": 120300 }, { "epoch": 0.2396842725997705, "grad_norm": 0.16887010633945465, "learning_rate": 0.002, "loss": 2.5694, "step": 120310 }, { "epoch": 0.2397041948234094, "grad_norm": 0.17879298329353333, "learning_rate": 0.002, "loss": 2.5571, "step": 120320 }, { "epoch": 0.23972411704704832, "grad_norm": 0.14360105991363525, "learning_rate": 0.002, "loss": 2.5703, "step": 120330 }, { "epoch": 0.23974403927068724, "grad_norm": 0.18826983869075775, "learning_rate": 0.002, "loss": 2.5535, "step": 120340 }, { "epoch": 0.23976396149432616, "grad_norm": 0.16578549146652222, "learning_rate": 0.002, "loss": 2.5781, "step": 120350 }, { "epoch": 0.23978388371796505, "grad_norm": 0.1862110048532486, "learning_rate": 0.002, "loss": 2.5707, "step": 120360 }, { "epoch": 0.23980380594160397, "grad_norm": 0.1926242709159851, "learning_rate": 0.002, "loss": 2.5651, "step": 120370 }, { "epoch": 0.2398237281652429, "grad_norm": 0.14477482438087463, "learning_rate": 0.002, "loss": 2.5728, "step": 120380 }, { "epoch": 0.2398436503888818, "grad_norm": 0.17109927535057068, "learning_rate": 0.002, "loss": 2.5695, "step": 120390 }, { "epoch": 0.23986357261252073, "grad_norm": 0.16224727034568787, "learning_rate": 0.002, "loss": 2.5633, "step": 120400 }, { "epoch": 0.23988349483615964, "grad_norm": 0.1484639197587967, "learning_rate": 0.002, "loss": 2.5686, "step": 120410 }, { "epoch": 0.23990341705979853, "grad_norm": 0.18176940083503723, "learning_rate": 0.002, "loss": 2.565, "step": 120420 }, { "epoch": 0.23992333928343745, "grad_norm": 0.14862853288650513, "learning_rate": 0.002, "loss": 2.5527, "step": 120430 }, { "epoch": 0.23994326150707637, "grad_norm": 0.19836348295211792, "learning_rate": 0.002, "loss": 2.5657, "step": 120440 }, { "epoch": 0.2399631837307153, "grad_norm": 0.17057597637176514, "learning_rate": 0.002, "loss": 2.5662, "step": 120450 }, { "epoch": 0.2399831059543542, "grad_norm": 0.15657645463943481, "learning_rate": 0.002, "loss": 2.5674, "step": 120460 }, { "epoch": 0.24000302817799313, "grad_norm": 0.162431538105011, "learning_rate": 0.002, "loss": 2.5582, "step": 120470 }, { "epoch": 0.24002295040163202, "grad_norm": 0.15930573642253876, "learning_rate": 0.002, "loss": 2.5646, "step": 120480 }, { "epoch": 0.24004287262527094, "grad_norm": 0.18196694552898407, "learning_rate": 0.002, "loss": 2.5838, "step": 120490 }, { "epoch": 0.24006279484890986, "grad_norm": 0.15484215319156647, "learning_rate": 0.002, "loss": 2.5661, "step": 120500 }, { "epoch": 0.24008271707254877, "grad_norm": 0.17218953371047974, "learning_rate": 0.002, "loss": 2.5582, "step": 120510 }, { "epoch": 0.2401026392961877, "grad_norm": 0.16973192989826202, "learning_rate": 0.002, "loss": 2.5468, "step": 120520 }, { "epoch": 0.2401225615198266, "grad_norm": 0.19019493460655212, "learning_rate": 0.002, "loss": 2.5773, "step": 120530 }, { "epoch": 0.2401424837434655, "grad_norm": 0.23450566828250885, "learning_rate": 0.002, "loss": 2.5642, "step": 120540 }, { "epoch": 0.24016240596710442, "grad_norm": 0.1719801425933838, "learning_rate": 0.002, "loss": 2.5686, "step": 120550 }, { "epoch": 0.24018232819074334, "grad_norm": 0.1633114516735077, "learning_rate": 0.002, "loss": 2.5632, "step": 120560 }, { "epoch": 0.24020225041438226, "grad_norm": 0.1882692128419876, "learning_rate": 0.002, "loss": 2.5715, "step": 120570 }, { "epoch": 0.24022217263802118, "grad_norm": 0.16380226612091064, "learning_rate": 0.002, "loss": 2.5676, "step": 120580 }, { "epoch": 0.24024209486166007, "grad_norm": 0.16527406871318817, "learning_rate": 0.002, "loss": 2.5611, "step": 120590 }, { "epoch": 0.24026201708529898, "grad_norm": 0.17831869423389435, "learning_rate": 0.002, "loss": 2.545, "step": 120600 }, { "epoch": 0.2402819393089379, "grad_norm": 0.17150045931339264, "learning_rate": 0.002, "loss": 2.5732, "step": 120610 }, { "epoch": 0.24030186153257682, "grad_norm": 0.18695519864559174, "learning_rate": 0.002, "loss": 2.5759, "step": 120620 }, { "epoch": 0.24032178375621574, "grad_norm": 0.14347611367702484, "learning_rate": 0.002, "loss": 2.5474, "step": 120630 }, { "epoch": 0.24034170597985466, "grad_norm": 0.14618130028247833, "learning_rate": 0.002, "loss": 2.5708, "step": 120640 }, { "epoch": 0.24036162820349355, "grad_norm": 0.21110525727272034, "learning_rate": 0.002, "loss": 2.5809, "step": 120650 }, { "epoch": 0.24038155042713247, "grad_norm": 0.1431102454662323, "learning_rate": 0.002, "loss": 2.5649, "step": 120660 }, { "epoch": 0.2404014726507714, "grad_norm": 0.1859748363494873, "learning_rate": 0.002, "loss": 2.5495, "step": 120670 }, { "epoch": 0.2404213948744103, "grad_norm": 0.1494356393814087, "learning_rate": 0.002, "loss": 2.5541, "step": 120680 }, { "epoch": 0.24044131709804922, "grad_norm": 0.4044959843158722, "learning_rate": 0.002, "loss": 2.5737, "step": 120690 }, { "epoch": 0.24046123932168814, "grad_norm": 0.22635704278945923, "learning_rate": 0.002, "loss": 2.5702, "step": 120700 }, { "epoch": 0.24048116154532703, "grad_norm": 0.1673574298620224, "learning_rate": 0.002, "loss": 2.5718, "step": 120710 }, { "epoch": 0.24050108376896595, "grad_norm": 0.16641749441623688, "learning_rate": 0.002, "loss": 2.562, "step": 120720 }, { "epoch": 0.24052100599260487, "grad_norm": 0.1719776690006256, "learning_rate": 0.002, "loss": 2.57, "step": 120730 }, { "epoch": 0.2405409282162438, "grad_norm": 0.18997861444950104, "learning_rate": 0.002, "loss": 2.5551, "step": 120740 }, { "epoch": 0.2405608504398827, "grad_norm": 0.17052975296974182, "learning_rate": 0.002, "loss": 2.575, "step": 120750 }, { "epoch": 0.24058077266352162, "grad_norm": 0.15452872216701508, "learning_rate": 0.002, "loss": 2.5688, "step": 120760 }, { "epoch": 0.24060069488716052, "grad_norm": 0.15134426951408386, "learning_rate": 0.002, "loss": 2.5623, "step": 120770 }, { "epoch": 0.24062061711079943, "grad_norm": 0.17670930922031403, "learning_rate": 0.002, "loss": 2.5803, "step": 120780 }, { "epoch": 0.24064053933443835, "grad_norm": 0.21086004376411438, "learning_rate": 0.002, "loss": 2.5671, "step": 120790 }, { "epoch": 0.24066046155807727, "grad_norm": 0.15945996344089508, "learning_rate": 0.002, "loss": 2.5497, "step": 120800 }, { "epoch": 0.2406803837817162, "grad_norm": 0.18716278672218323, "learning_rate": 0.002, "loss": 2.5707, "step": 120810 }, { "epoch": 0.24070030600535508, "grad_norm": 0.18678002059459686, "learning_rate": 0.002, "loss": 2.5546, "step": 120820 }, { "epoch": 0.240720228228994, "grad_norm": 0.14927837252616882, "learning_rate": 0.002, "loss": 2.5762, "step": 120830 }, { "epoch": 0.24074015045263292, "grad_norm": 0.1933104395866394, "learning_rate": 0.002, "loss": 2.5534, "step": 120840 }, { "epoch": 0.24076007267627184, "grad_norm": 0.13724760711193085, "learning_rate": 0.002, "loss": 2.5733, "step": 120850 }, { "epoch": 0.24077999489991075, "grad_norm": 0.1644960641860962, "learning_rate": 0.002, "loss": 2.5599, "step": 120860 }, { "epoch": 0.24079991712354967, "grad_norm": 0.154790997505188, "learning_rate": 0.002, "loss": 2.5515, "step": 120870 }, { "epoch": 0.24081983934718856, "grad_norm": 0.17820391058921814, "learning_rate": 0.002, "loss": 2.5529, "step": 120880 }, { "epoch": 0.24083976157082748, "grad_norm": 0.20097796618938446, "learning_rate": 0.002, "loss": 2.5647, "step": 120890 }, { "epoch": 0.2408596837944664, "grad_norm": 0.1417156457901001, "learning_rate": 0.002, "loss": 2.5731, "step": 120900 }, { "epoch": 0.24087960601810532, "grad_norm": 0.1500081717967987, "learning_rate": 0.002, "loss": 2.5621, "step": 120910 }, { "epoch": 0.24089952824174424, "grad_norm": 0.20325374603271484, "learning_rate": 0.002, "loss": 2.5557, "step": 120920 }, { "epoch": 0.24091945046538316, "grad_norm": 0.17246069014072418, "learning_rate": 0.002, "loss": 2.5694, "step": 120930 }, { "epoch": 0.24093937268902205, "grad_norm": 0.17201119661331177, "learning_rate": 0.002, "loss": 2.5627, "step": 120940 }, { "epoch": 0.24095929491266097, "grad_norm": 0.16733592748641968, "learning_rate": 0.002, "loss": 2.5638, "step": 120950 }, { "epoch": 0.24097921713629988, "grad_norm": 0.1528891772031784, "learning_rate": 0.002, "loss": 2.5378, "step": 120960 }, { "epoch": 0.2409991393599388, "grad_norm": 0.14295285940170288, "learning_rate": 0.002, "loss": 2.5754, "step": 120970 }, { "epoch": 0.24101906158357772, "grad_norm": 0.1685667634010315, "learning_rate": 0.002, "loss": 2.5762, "step": 120980 }, { "epoch": 0.24103898380721664, "grad_norm": 0.19915267825126648, "learning_rate": 0.002, "loss": 2.5626, "step": 120990 }, { "epoch": 0.24105890603085553, "grad_norm": 0.1590791642665863, "learning_rate": 0.002, "loss": 2.5769, "step": 121000 }, { "epoch": 0.24107882825449445, "grad_norm": 0.17480358481407166, "learning_rate": 0.002, "loss": 2.5594, "step": 121010 }, { "epoch": 0.24109875047813337, "grad_norm": 0.14296121895313263, "learning_rate": 0.002, "loss": 2.5719, "step": 121020 }, { "epoch": 0.24111867270177229, "grad_norm": 0.17082622647285461, "learning_rate": 0.002, "loss": 2.5764, "step": 121030 }, { "epoch": 0.2411385949254112, "grad_norm": 0.18831825256347656, "learning_rate": 0.002, "loss": 2.5618, "step": 121040 }, { "epoch": 0.2411585171490501, "grad_norm": 0.16688227653503418, "learning_rate": 0.002, "loss": 2.5562, "step": 121050 }, { "epoch": 0.241178439372689, "grad_norm": 0.16890183091163635, "learning_rate": 0.002, "loss": 2.5474, "step": 121060 }, { "epoch": 0.24119836159632793, "grad_norm": 0.18050311505794525, "learning_rate": 0.002, "loss": 2.5779, "step": 121070 }, { "epoch": 0.24121828381996685, "grad_norm": 0.16325825452804565, "learning_rate": 0.002, "loss": 2.557, "step": 121080 }, { "epoch": 0.24123820604360577, "grad_norm": 0.16110673546791077, "learning_rate": 0.002, "loss": 2.5597, "step": 121090 }, { "epoch": 0.2412581282672447, "grad_norm": 0.18379506468772888, "learning_rate": 0.002, "loss": 2.5623, "step": 121100 }, { "epoch": 0.24127805049088358, "grad_norm": 0.18022628128528595, "learning_rate": 0.002, "loss": 2.5655, "step": 121110 }, { "epoch": 0.2412979727145225, "grad_norm": 0.16244615614414215, "learning_rate": 0.002, "loss": 2.5662, "step": 121120 }, { "epoch": 0.24131789493816141, "grad_norm": 0.13360421359539032, "learning_rate": 0.002, "loss": 2.5665, "step": 121130 }, { "epoch": 0.24133781716180033, "grad_norm": 0.20774051547050476, "learning_rate": 0.002, "loss": 2.5669, "step": 121140 }, { "epoch": 0.24135773938543925, "grad_norm": 0.17423021793365479, "learning_rate": 0.002, "loss": 2.5674, "step": 121150 }, { "epoch": 0.24137766160907817, "grad_norm": 0.1560872495174408, "learning_rate": 0.002, "loss": 2.5654, "step": 121160 }, { "epoch": 0.24139758383271706, "grad_norm": 0.16904205083847046, "learning_rate": 0.002, "loss": 2.5759, "step": 121170 }, { "epoch": 0.24141750605635598, "grad_norm": 0.16765464842319489, "learning_rate": 0.002, "loss": 2.5712, "step": 121180 }, { "epoch": 0.2414374282799949, "grad_norm": 0.1750032603740692, "learning_rate": 0.002, "loss": 2.5555, "step": 121190 }, { "epoch": 0.24145735050363382, "grad_norm": 0.24125319719314575, "learning_rate": 0.002, "loss": 2.579, "step": 121200 }, { "epoch": 0.24147727272727273, "grad_norm": 0.15902908146381378, "learning_rate": 0.002, "loss": 2.5644, "step": 121210 }, { "epoch": 0.24149719495091165, "grad_norm": 0.14369037747383118, "learning_rate": 0.002, "loss": 2.5769, "step": 121220 }, { "epoch": 0.24151711717455054, "grad_norm": 0.18473556637763977, "learning_rate": 0.002, "loss": 2.5517, "step": 121230 }, { "epoch": 0.24153703939818946, "grad_norm": 0.1568974256515503, "learning_rate": 0.002, "loss": 2.553, "step": 121240 }, { "epoch": 0.24155696162182838, "grad_norm": 0.19790995121002197, "learning_rate": 0.002, "loss": 2.5797, "step": 121250 }, { "epoch": 0.2415768838454673, "grad_norm": 0.21919676661491394, "learning_rate": 0.002, "loss": 2.555, "step": 121260 }, { "epoch": 0.24159680606910622, "grad_norm": 0.16999013721942902, "learning_rate": 0.002, "loss": 2.5739, "step": 121270 }, { "epoch": 0.24161672829274514, "grad_norm": 0.16770455241203308, "learning_rate": 0.002, "loss": 2.5445, "step": 121280 }, { "epoch": 0.24163665051638403, "grad_norm": 0.16049998998641968, "learning_rate": 0.002, "loss": 2.5621, "step": 121290 }, { "epoch": 0.24165657274002295, "grad_norm": 0.15779343247413635, "learning_rate": 0.002, "loss": 2.5606, "step": 121300 }, { "epoch": 0.24167649496366186, "grad_norm": 0.21714763343334198, "learning_rate": 0.002, "loss": 2.5685, "step": 121310 }, { "epoch": 0.24169641718730078, "grad_norm": 0.15620847046375275, "learning_rate": 0.002, "loss": 2.5605, "step": 121320 }, { "epoch": 0.2417163394109397, "grad_norm": 0.1627434492111206, "learning_rate": 0.002, "loss": 2.5579, "step": 121330 }, { "epoch": 0.2417362616345786, "grad_norm": 0.16732880473136902, "learning_rate": 0.002, "loss": 2.5613, "step": 121340 }, { "epoch": 0.2417561838582175, "grad_norm": 0.17741499841213226, "learning_rate": 0.002, "loss": 2.5624, "step": 121350 }, { "epoch": 0.24177610608185643, "grad_norm": 0.1933036893606186, "learning_rate": 0.002, "loss": 2.5594, "step": 121360 }, { "epoch": 0.24179602830549535, "grad_norm": 0.15792381763458252, "learning_rate": 0.002, "loss": 2.5644, "step": 121370 }, { "epoch": 0.24181595052913427, "grad_norm": 0.15465736389160156, "learning_rate": 0.002, "loss": 2.5639, "step": 121380 }, { "epoch": 0.24183587275277318, "grad_norm": 0.1556195467710495, "learning_rate": 0.002, "loss": 2.566, "step": 121390 }, { "epoch": 0.24185579497641208, "grad_norm": 0.14882254600524902, "learning_rate": 0.002, "loss": 2.5549, "step": 121400 }, { "epoch": 0.241875717200051, "grad_norm": 0.1571483165025711, "learning_rate": 0.002, "loss": 2.5521, "step": 121410 }, { "epoch": 0.2418956394236899, "grad_norm": 0.1565283238887787, "learning_rate": 0.002, "loss": 2.566, "step": 121420 }, { "epoch": 0.24191556164732883, "grad_norm": 0.15582245588302612, "learning_rate": 0.002, "loss": 2.5755, "step": 121430 }, { "epoch": 0.24193548387096775, "grad_norm": 0.17189180850982666, "learning_rate": 0.002, "loss": 2.5638, "step": 121440 }, { "epoch": 0.24195540609460667, "grad_norm": 0.20037484169006348, "learning_rate": 0.002, "loss": 2.5744, "step": 121450 }, { "epoch": 0.24197532831824556, "grad_norm": 0.15890377759933472, "learning_rate": 0.002, "loss": 2.568, "step": 121460 }, { "epoch": 0.24199525054188448, "grad_norm": 0.15462443232536316, "learning_rate": 0.002, "loss": 2.5636, "step": 121470 }, { "epoch": 0.2420151727655234, "grad_norm": 0.16020208597183228, "learning_rate": 0.002, "loss": 2.568, "step": 121480 }, { "epoch": 0.2420350949891623, "grad_norm": 0.18801140785217285, "learning_rate": 0.002, "loss": 2.5689, "step": 121490 }, { "epoch": 0.24205501721280123, "grad_norm": 0.14372964203357697, "learning_rate": 0.002, "loss": 2.5726, "step": 121500 }, { "epoch": 0.24207493943644015, "grad_norm": 0.20314548909664154, "learning_rate": 0.002, "loss": 2.5705, "step": 121510 }, { "epoch": 0.24209486166007904, "grad_norm": 0.14948305487632751, "learning_rate": 0.002, "loss": 2.5747, "step": 121520 }, { "epoch": 0.24211478388371796, "grad_norm": 0.17128592729568481, "learning_rate": 0.002, "loss": 2.572, "step": 121530 }, { "epoch": 0.24213470610735688, "grad_norm": 0.15697066485881805, "learning_rate": 0.002, "loss": 2.5735, "step": 121540 }, { "epoch": 0.2421546283309958, "grad_norm": 0.1678590029478073, "learning_rate": 0.002, "loss": 2.5697, "step": 121550 }, { "epoch": 0.24217455055463472, "grad_norm": 0.18490783870220184, "learning_rate": 0.002, "loss": 2.5406, "step": 121560 }, { "epoch": 0.2421944727782736, "grad_norm": 0.14373888075351715, "learning_rate": 0.002, "loss": 2.5648, "step": 121570 }, { "epoch": 0.24221439500191252, "grad_norm": 0.1919088065624237, "learning_rate": 0.002, "loss": 2.5643, "step": 121580 }, { "epoch": 0.24223431722555144, "grad_norm": 0.18444211781024933, "learning_rate": 0.002, "loss": 2.5726, "step": 121590 }, { "epoch": 0.24225423944919036, "grad_norm": 0.16800852119922638, "learning_rate": 0.002, "loss": 2.5555, "step": 121600 }, { "epoch": 0.24227416167282928, "grad_norm": 0.16844314336776733, "learning_rate": 0.002, "loss": 2.572, "step": 121610 }, { "epoch": 0.2422940838964682, "grad_norm": 0.15245622396469116, "learning_rate": 0.002, "loss": 2.5711, "step": 121620 }, { "epoch": 0.2423140061201071, "grad_norm": 0.16682273149490356, "learning_rate": 0.002, "loss": 2.5663, "step": 121630 }, { "epoch": 0.242333928343746, "grad_norm": 0.18515650928020477, "learning_rate": 0.002, "loss": 2.5517, "step": 121640 }, { "epoch": 0.24235385056738493, "grad_norm": 0.19096466898918152, "learning_rate": 0.002, "loss": 2.5789, "step": 121650 }, { "epoch": 0.24237377279102384, "grad_norm": 0.13962383568286896, "learning_rate": 0.002, "loss": 2.5537, "step": 121660 }, { "epoch": 0.24239369501466276, "grad_norm": 0.1449468582868576, "learning_rate": 0.002, "loss": 2.5617, "step": 121670 }, { "epoch": 0.24241361723830168, "grad_norm": 0.14918455481529236, "learning_rate": 0.002, "loss": 2.5612, "step": 121680 }, { "epoch": 0.24243353946194057, "grad_norm": 0.19118288159370422, "learning_rate": 0.002, "loss": 2.5634, "step": 121690 }, { "epoch": 0.2424534616855795, "grad_norm": 0.15386906266212463, "learning_rate": 0.002, "loss": 2.5805, "step": 121700 }, { "epoch": 0.2424733839092184, "grad_norm": 0.1758866161108017, "learning_rate": 0.002, "loss": 2.563, "step": 121710 }, { "epoch": 0.24249330613285733, "grad_norm": 0.16836482286453247, "learning_rate": 0.002, "loss": 2.5683, "step": 121720 }, { "epoch": 0.24251322835649625, "grad_norm": 0.14476023614406586, "learning_rate": 0.002, "loss": 2.5671, "step": 121730 }, { "epoch": 0.24253315058013517, "grad_norm": 0.18993501365184784, "learning_rate": 0.002, "loss": 2.5717, "step": 121740 }, { "epoch": 0.24255307280377406, "grad_norm": 0.15277180075645447, "learning_rate": 0.002, "loss": 2.5602, "step": 121750 }, { "epoch": 0.24257299502741297, "grad_norm": 0.17590998113155365, "learning_rate": 0.002, "loss": 2.5747, "step": 121760 }, { "epoch": 0.2425929172510519, "grad_norm": 0.16890183091163635, "learning_rate": 0.002, "loss": 2.571, "step": 121770 }, { "epoch": 0.2426128394746908, "grad_norm": 0.16019271314144135, "learning_rate": 0.002, "loss": 2.5769, "step": 121780 }, { "epoch": 0.24263276169832973, "grad_norm": 0.1640002429485321, "learning_rate": 0.002, "loss": 2.5712, "step": 121790 }, { "epoch": 0.24265268392196865, "grad_norm": 0.16935284435749054, "learning_rate": 0.002, "loss": 2.5786, "step": 121800 }, { "epoch": 0.24267260614560754, "grad_norm": 0.14878304302692413, "learning_rate": 0.002, "loss": 2.5596, "step": 121810 }, { "epoch": 0.24269252836924646, "grad_norm": 0.16359896957874298, "learning_rate": 0.002, "loss": 2.5712, "step": 121820 }, { "epoch": 0.24271245059288538, "grad_norm": 0.16217470169067383, "learning_rate": 0.002, "loss": 2.559, "step": 121830 }, { "epoch": 0.2427323728165243, "grad_norm": 0.1808660328388214, "learning_rate": 0.002, "loss": 2.5489, "step": 121840 }, { "epoch": 0.2427522950401632, "grad_norm": 0.16758722066879272, "learning_rate": 0.002, "loss": 2.5628, "step": 121850 }, { "epoch": 0.2427722172638021, "grad_norm": 0.16623073816299438, "learning_rate": 0.002, "loss": 2.5647, "step": 121860 }, { "epoch": 0.24279213948744102, "grad_norm": 0.1977420151233673, "learning_rate": 0.002, "loss": 2.5576, "step": 121870 }, { "epoch": 0.24281206171107994, "grad_norm": 0.19470275938510895, "learning_rate": 0.002, "loss": 2.56, "step": 121880 }, { "epoch": 0.24283198393471886, "grad_norm": 0.16118592023849487, "learning_rate": 0.002, "loss": 2.5644, "step": 121890 }, { "epoch": 0.24285190615835778, "grad_norm": 0.15606382489204407, "learning_rate": 0.002, "loss": 2.556, "step": 121900 }, { "epoch": 0.2428718283819967, "grad_norm": 0.18046151101589203, "learning_rate": 0.002, "loss": 2.5652, "step": 121910 }, { "epoch": 0.2428917506056356, "grad_norm": 0.15938465297222137, "learning_rate": 0.002, "loss": 2.5693, "step": 121920 }, { "epoch": 0.2429116728292745, "grad_norm": 0.17260080575942993, "learning_rate": 0.002, "loss": 2.5726, "step": 121930 }, { "epoch": 0.24293159505291342, "grad_norm": 0.17974378168582916, "learning_rate": 0.002, "loss": 2.5633, "step": 121940 }, { "epoch": 0.24295151727655234, "grad_norm": 0.17100825905799866, "learning_rate": 0.002, "loss": 2.5563, "step": 121950 }, { "epoch": 0.24297143950019126, "grad_norm": 0.15516284108161926, "learning_rate": 0.002, "loss": 2.5665, "step": 121960 }, { "epoch": 0.24299136172383018, "grad_norm": 0.1572832316160202, "learning_rate": 0.002, "loss": 2.5738, "step": 121970 }, { "epoch": 0.24301128394746907, "grad_norm": 0.2766196131706238, "learning_rate": 0.002, "loss": 2.5733, "step": 121980 }, { "epoch": 0.243031206171108, "grad_norm": 0.18782858550548553, "learning_rate": 0.002, "loss": 2.5767, "step": 121990 }, { "epoch": 0.2430511283947469, "grad_norm": 0.15903297066688538, "learning_rate": 0.002, "loss": 2.5613, "step": 122000 }, { "epoch": 0.24307105061838583, "grad_norm": 0.18368150293827057, "learning_rate": 0.002, "loss": 2.5709, "step": 122010 }, { "epoch": 0.24309097284202474, "grad_norm": 0.15078966319561005, "learning_rate": 0.002, "loss": 2.5601, "step": 122020 }, { "epoch": 0.24311089506566366, "grad_norm": 0.16066263616085052, "learning_rate": 0.002, "loss": 2.5642, "step": 122030 }, { "epoch": 0.24313081728930255, "grad_norm": 0.18870028853416443, "learning_rate": 0.002, "loss": 2.5749, "step": 122040 }, { "epoch": 0.24315073951294147, "grad_norm": 0.17427778244018555, "learning_rate": 0.002, "loss": 2.5575, "step": 122050 }, { "epoch": 0.2431706617365804, "grad_norm": 0.15186555683612823, "learning_rate": 0.002, "loss": 2.5695, "step": 122060 }, { "epoch": 0.2431905839602193, "grad_norm": 0.15927645564079285, "learning_rate": 0.002, "loss": 2.5612, "step": 122070 }, { "epoch": 0.24321050618385823, "grad_norm": 0.19494491815567017, "learning_rate": 0.002, "loss": 2.5565, "step": 122080 }, { "epoch": 0.24323042840749712, "grad_norm": 0.15250766277313232, "learning_rate": 0.002, "loss": 2.572, "step": 122090 }, { "epoch": 0.24325035063113604, "grad_norm": 0.1640930324792862, "learning_rate": 0.002, "loss": 2.5418, "step": 122100 }, { "epoch": 0.24327027285477495, "grad_norm": 0.15499258041381836, "learning_rate": 0.002, "loss": 2.5652, "step": 122110 }, { "epoch": 0.24329019507841387, "grad_norm": 0.15608596801757812, "learning_rate": 0.002, "loss": 2.5591, "step": 122120 }, { "epoch": 0.2433101173020528, "grad_norm": 0.1715088039636612, "learning_rate": 0.002, "loss": 2.5616, "step": 122130 }, { "epoch": 0.2433300395256917, "grad_norm": 0.17958471179008484, "learning_rate": 0.002, "loss": 2.5524, "step": 122140 }, { "epoch": 0.2433499617493306, "grad_norm": 0.1481684446334839, "learning_rate": 0.002, "loss": 2.5737, "step": 122150 }, { "epoch": 0.24336988397296952, "grad_norm": 0.18673141300678253, "learning_rate": 0.002, "loss": 2.5648, "step": 122160 }, { "epoch": 0.24338980619660844, "grad_norm": 0.1490231305360794, "learning_rate": 0.002, "loss": 2.5848, "step": 122170 }, { "epoch": 0.24340972842024736, "grad_norm": 0.14216859638690948, "learning_rate": 0.002, "loss": 2.5672, "step": 122180 }, { "epoch": 0.24342965064388628, "grad_norm": 0.18897022306919098, "learning_rate": 0.002, "loss": 2.5735, "step": 122190 }, { "epoch": 0.2434495728675252, "grad_norm": 0.15772207081317902, "learning_rate": 0.002, "loss": 2.574, "step": 122200 }, { "epoch": 0.24346949509116408, "grad_norm": 0.16473336517810822, "learning_rate": 0.002, "loss": 2.5719, "step": 122210 }, { "epoch": 0.243489417314803, "grad_norm": 0.18017661571502686, "learning_rate": 0.002, "loss": 2.577, "step": 122220 }, { "epoch": 0.24350933953844192, "grad_norm": 0.17240914702415466, "learning_rate": 0.002, "loss": 2.5651, "step": 122230 }, { "epoch": 0.24352926176208084, "grad_norm": 0.19027669727802277, "learning_rate": 0.002, "loss": 2.575, "step": 122240 }, { "epoch": 0.24354918398571976, "grad_norm": 0.16172760725021362, "learning_rate": 0.002, "loss": 2.5775, "step": 122250 }, { "epoch": 0.24356910620935868, "grad_norm": 0.16110451519489288, "learning_rate": 0.002, "loss": 2.5716, "step": 122260 }, { "epoch": 0.24358902843299757, "grad_norm": 0.18820837140083313, "learning_rate": 0.002, "loss": 2.5479, "step": 122270 }, { "epoch": 0.24360895065663649, "grad_norm": 0.1980934739112854, "learning_rate": 0.002, "loss": 2.56, "step": 122280 }, { "epoch": 0.2436288728802754, "grad_norm": 0.15769612789154053, "learning_rate": 0.002, "loss": 2.5641, "step": 122290 }, { "epoch": 0.24364879510391432, "grad_norm": 0.16260561347007751, "learning_rate": 0.002, "loss": 2.5545, "step": 122300 }, { "epoch": 0.24366871732755324, "grad_norm": 0.16198688745498657, "learning_rate": 0.002, "loss": 2.563, "step": 122310 }, { "epoch": 0.24368863955119213, "grad_norm": 0.18476876616477966, "learning_rate": 0.002, "loss": 2.5602, "step": 122320 }, { "epoch": 0.24370856177483105, "grad_norm": 0.18397794663906097, "learning_rate": 0.002, "loss": 2.5639, "step": 122330 }, { "epoch": 0.24372848399846997, "grad_norm": 0.175333172082901, "learning_rate": 0.002, "loss": 2.5657, "step": 122340 }, { "epoch": 0.2437484062221089, "grad_norm": 0.1916879266500473, "learning_rate": 0.002, "loss": 2.5647, "step": 122350 }, { "epoch": 0.2437683284457478, "grad_norm": 0.19017226994037628, "learning_rate": 0.002, "loss": 2.5729, "step": 122360 }, { "epoch": 0.24378825066938672, "grad_norm": 0.15249575674533844, "learning_rate": 0.002, "loss": 2.5667, "step": 122370 }, { "epoch": 0.24380817289302562, "grad_norm": 0.16605032980442047, "learning_rate": 0.002, "loss": 2.5813, "step": 122380 }, { "epoch": 0.24382809511666453, "grad_norm": 0.1767624169588089, "learning_rate": 0.002, "loss": 2.5611, "step": 122390 }, { "epoch": 0.24384801734030345, "grad_norm": 0.15730862319469452, "learning_rate": 0.002, "loss": 2.576, "step": 122400 }, { "epoch": 0.24386793956394237, "grad_norm": 0.15611210465431213, "learning_rate": 0.002, "loss": 2.5613, "step": 122410 }, { "epoch": 0.2438878617875813, "grad_norm": 0.1471523493528366, "learning_rate": 0.002, "loss": 2.5639, "step": 122420 }, { "epoch": 0.2439077840112202, "grad_norm": 0.13814692199230194, "learning_rate": 0.002, "loss": 2.5628, "step": 122430 }, { "epoch": 0.2439277062348591, "grad_norm": 0.17419931292533875, "learning_rate": 0.002, "loss": 2.5688, "step": 122440 }, { "epoch": 0.24394762845849802, "grad_norm": 0.15444530546665192, "learning_rate": 0.002, "loss": 2.5633, "step": 122450 }, { "epoch": 0.24396755068213694, "grad_norm": 0.16744081676006317, "learning_rate": 0.002, "loss": 2.5596, "step": 122460 }, { "epoch": 0.24398747290577585, "grad_norm": 0.17269332706928253, "learning_rate": 0.002, "loss": 2.5677, "step": 122470 }, { "epoch": 0.24400739512941477, "grad_norm": 0.1675572395324707, "learning_rate": 0.002, "loss": 2.573, "step": 122480 }, { "epoch": 0.2440273173530537, "grad_norm": 0.15385642647743225, "learning_rate": 0.002, "loss": 2.5711, "step": 122490 }, { "epoch": 0.24404723957669258, "grad_norm": 0.16864971816539764, "learning_rate": 0.002, "loss": 2.5722, "step": 122500 }, { "epoch": 0.2440671618003315, "grad_norm": 0.1670227199792862, "learning_rate": 0.002, "loss": 2.57, "step": 122510 }, { "epoch": 0.24408708402397042, "grad_norm": 0.17459073662757874, "learning_rate": 0.002, "loss": 2.5698, "step": 122520 }, { "epoch": 0.24410700624760934, "grad_norm": 0.1682344675064087, "learning_rate": 0.002, "loss": 2.5813, "step": 122530 }, { "epoch": 0.24412692847124826, "grad_norm": 0.18890738487243652, "learning_rate": 0.002, "loss": 2.5508, "step": 122540 }, { "epoch": 0.24414685069488717, "grad_norm": 0.18147405982017517, "learning_rate": 0.002, "loss": 2.5636, "step": 122550 }, { "epoch": 0.24416677291852606, "grad_norm": 0.16464880108833313, "learning_rate": 0.002, "loss": 2.5619, "step": 122560 }, { "epoch": 0.24418669514216498, "grad_norm": 0.1853012889623642, "learning_rate": 0.002, "loss": 2.5587, "step": 122570 }, { "epoch": 0.2442066173658039, "grad_norm": 0.1651972234249115, "learning_rate": 0.002, "loss": 2.5715, "step": 122580 }, { "epoch": 0.24422653958944282, "grad_norm": 0.1859489530324936, "learning_rate": 0.002, "loss": 2.5678, "step": 122590 }, { "epoch": 0.24424646181308174, "grad_norm": 0.16538511216640472, "learning_rate": 0.002, "loss": 2.5574, "step": 122600 }, { "epoch": 0.24426638403672063, "grad_norm": 0.22887690365314484, "learning_rate": 0.002, "loss": 2.5673, "step": 122610 }, { "epoch": 0.24428630626035955, "grad_norm": 0.17902232706546783, "learning_rate": 0.002, "loss": 2.5589, "step": 122620 }, { "epoch": 0.24430622848399847, "grad_norm": 0.16253751516342163, "learning_rate": 0.002, "loss": 2.5526, "step": 122630 }, { "epoch": 0.24432615070763739, "grad_norm": 0.17605172097682953, "learning_rate": 0.002, "loss": 2.5774, "step": 122640 }, { "epoch": 0.2443460729312763, "grad_norm": 0.18369503319263458, "learning_rate": 0.002, "loss": 2.5497, "step": 122650 }, { "epoch": 0.24436599515491522, "grad_norm": 0.17161065340042114, "learning_rate": 0.002, "loss": 2.5638, "step": 122660 }, { "epoch": 0.2443859173785541, "grad_norm": 0.18544718623161316, "learning_rate": 0.002, "loss": 2.5669, "step": 122670 }, { "epoch": 0.24440583960219303, "grad_norm": 0.1697622686624527, "learning_rate": 0.002, "loss": 2.564, "step": 122680 }, { "epoch": 0.24442576182583195, "grad_norm": 0.1737491339445114, "learning_rate": 0.002, "loss": 2.5586, "step": 122690 }, { "epoch": 0.24444568404947087, "grad_norm": 0.14575815200805664, "learning_rate": 0.002, "loss": 2.5589, "step": 122700 }, { "epoch": 0.2444656062731098, "grad_norm": 0.15135614573955536, "learning_rate": 0.002, "loss": 2.5753, "step": 122710 }, { "epoch": 0.2444855284967487, "grad_norm": 0.1823195368051529, "learning_rate": 0.002, "loss": 2.5667, "step": 122720 }, { "epoch": 0.2445054507203876, "grad_norm": 0.21589750051498413, "learning_rate": 0.002, "loss": 2.5702, "step": 122730 }, { "epoch": 0.24452537294402651, "grad_norm": 0.16248974204063416, "learning_rate": 0.002, "loss": 2.5754, "step": 122740 }, { "epoch": 0.24454529516766543, "grad_norm": 0.14523012936115265, "learning_rate": 0.002, "loss": 2.557, "step": 122750 }, { "epoch": 0.24456521739130435, "grad_norm": 0.15518245100975037, "learning_rate": 0.002, "loss": 2.5686, "step": 122760 }, { "epoch": 0.24458513961494327, "grad_norm": 0.17348401248455048, "learning_rate": 0.002, "loss": 2.5562, "step": 122770 }, { "epoch": 0.2446050618385822, "grad_norm": 0.19627057015895844, "learning_rate": 0.002, "loss": 2.5659, "step": 122780 }, { "epoch": 0.24462498406222108, "grad_norm": 0.16830448806285858, "learning_rate": 0.002, "loss": 2.5596, "step": 122790 }, { "epoch": 0.24464490628586, "grad_norm": 0.13782377541065216, "learning_rate": 0.002, "loss": 2.5636, "step": 122800 }, { "epoch": 0.24466482850949892, "grad_norm": 0.16888509690761566, "learning_rate": 0.002, "loss": 2.5576, "step": 122810 }, { "epoch": 0.24468475073313783, "grad_norm": 0.18147151172161102, "learning_rate": 0.002, "loss": 2.5598, "step": 122820 }, { "epoch": 0.24470467295677675, "grad_norm": 0.17153103649616241, "learning_rate": 0.002, "loss": 2.5623, "step": 122830 }, { "epoch": 0.24472459518041564, "grad_norm": 0.15142366290092468, "learning_rate": 0.002, "loss": 2.5832, "step": 122840 }, { "epoch": 0.24474451740405456, "grad_norm": 0.16022451221942902, "learning_rate": 0.002, "loss": 2.5527, "step": 122850 }, { "epoch": 0.24476443962769348, "grad_norm": 0.1600903570652008, "learning_rate": 0.002, "loss": 2.5764, "step": 122860 }, { "epoch": 0.2447843618513324, "grad_norm": 0.1702086180448532, "learning_rate": 0.002, "loss": 2.5701, "step": 122870 }, { "epoch": 0.24480428407497132, "grad_norm": 0.19258423149585724, "learning_rate": 0.002, "loss": 2.5791, "step": 122880 }, { "epoch": 0.24482420629861024, "grad_norm": 0.16564267873764038, "learning_rate": 0.002, "loss": 2.5699, "step": 122890 }, { "epoch": 0.24484412852224913, "grad_norm": 0.16160781681537628, "learning_rate": 0.002, "loss": 2.5688, "step": 122900 }, { "epoch": 0.24486405074588805, "grad_norm": 0.15192562341690063, "learning_rate": 0.002, "loss": 2.566, "step": 122910 }, { "epoch": 0.24488397296952696, "grad_norm": 0.15589675307273865, "learning_rate": 0.002, "loss": 2.559, "step": 122920 }, { "epoch": 0.24490389519316588, "grad_norm": 0.1734277307987213, "learning_rate": 0.002, "loss": 2.5697, "step": 122930 }, { "epoch": 0.2449238174168048, "grad_norm": 0.17278912663459778, "learning_rate": 0.002, "loss": 2.565, "step": 122940 }, { "epoch": 0.24494373964044372, "grad_norm": 0.17033016681671143, "learning_rate": 0.002, "loss": 2.5773, "step": 122950 }, { "epoch": 0.2449636618640826, "grad_norm": 0.1537700891494751, "learning_rate": 0.002, "loss": 2.5559, "step": 122960 }, { "epoch": 0.24498358408772153, "grad_norm": 0.15364035964012146, "learning_rate": 0.002, "loss": 2.5557, "step": 122970 }, { "epoch": 0.24500350631136045, "grad_norm": 0.197538822889328, "learning_rate": 0.002, "loss": 2.5752, "step": 122980 }, { "epoch": 0.24502342853499937, "grad_norm": 0.1710096001625061, "learning_rate": 0.002, "loss": 2.5449, "step": 122990 }, { "epoch": 0.24504335075863828, "grad_norm": 0.14570146799087524, "learning_rate": 0.002, "loss": 2.5453, "step": 123000 }, { "epoch": 0.2450632729822772, "grad_norm": 0.1791611760854721, "learning_rate": 0.002, "loss": 2.5477, "step": 123010 }, { "epoch": 0.2450831952059161, "grad_norm": 0.1820969581604004, "learning_rate": 0.002, "loss": 2.5582, "step": 123020 }, { "epoch": 0.245103117429555, "grad_norm": 0.16726544499397278, "learning_rate": 0.002, "loss": 2.5533, "step": 123030 }, { "epoch": 0.24512303965319393, "grad_norm": 0.15066353976726532, "learning_rate": 0.002, "loss": 2.5673, "step": 123040 }, { "epoch": 0.24514296187683285, "grad_norm": 0.18682871758937836, "learning_rate": 0.002, "loss": 2.5499, "step": 123050 }, { "epoch": 0.24516288410047177, "grad_norm": 0.17864884436130524, "learning_rate": 0.002, "loss": 2.5679, "step": 123060 }, { "epoch": 0.24518280632411066, "grad_norm": 0.1517755687236786, "learning_rate": 0.002, "loss": 2.5684, "step": 123070 }, { "epoch": 0.24520272854774958, "grad_norm": 0.1970718652009964, "learning_rate": 0.002, "loss": 2.5771, "step": 123080 }, { "epoch": 0.2452226507713885, "grad_norm": 0.17557427287101746, "learning_rate": 0.002, "loss": 2.5534, "step": 123090 }, { "epoch": 0.2452425729950274, "grad_norm": 0.16877314448356628, "learning_rate": 0.002, "loss": 2.5494, "step": 123100 }, { "epoch": 0.24526249521866633, "grad_norm": 0.14522986114025116, "learning_rate": 0.002, "loss": 2.5668, "step": 123110 }, { "epoch": 0.24528241744230525, "grad_norm": 0.1551731526851654, "learning_rate": 0.002, "loss": 2.5657, "step": 123120 }, { "epoch": 0.24530233966594414, "grad_norm": 0.1829393357038498, "learning_rate": 0.002, "loss": 2.5677, "step": 123130 }, { "epoch": 0.24532226188958306, "grad_norm": 0.15468762814998627, "learning_rate": 0.002, "loss": 2.5596, "step": 123140 }, { "epoch": 0.24534218411322198, "grad_norm": 0.17593242228031158, "learning_rate": 0.002, "loss": 2.5709, "step": 123150 }, { "epoch": 0.2453621063368609, "grad_norm": 0.16551701724529266, "learning_rate": 0.002, "loss": 2.5607, "step": 123160 }, { "epoch": 0.24538202856049982, "grad_norm": 0.17599605023860931, "learning_rate": 0.002, "loss": 2.5747, "step": 123170 }, { "epoch": 0.24540195078413873, "grad_norm": 0.15229392051696777, "learning_rate": 0.002, "loss": 2.5724, "step": 123180 }, { "epoch": 0.24542187300777762, "grad_norm": 0.18809635937213898, "learning_rate": 0.002, "loss": 2.5738, "step": 123190 }, { "epoch": 0.24544179523141654, "grad_norm": 0.1401064693927765, "learning_rate": 0.002, "loss": 2.5616, "step": 123200 }, { "epoch": 0.24546171745505546, "grad_norm": 0.17075946927070618, "learning_rate": 0.002, "loss": 2.5598, "step": 123210 }, { "epoch": 0.24548163967869438, "grad_norm": 0.18588609993457794, "learning_rate": 0.002, "loss": 2.5721, "step": 123220 }, { "epoch": 0.2455015619023333, "grad_norm": 0.16702553629875183, "learning_rate": 0.002, "loss": 2.5754, "step": 123230 }, { "epoch": 0.24552148412597222, "grad_norm": 0.1390378773212433, "learning_rate": 0.002, "loss": 2.5641, "step": 123240 }, { "epoch": 0.2455414063496111, "grad_norm": 0.16938744485378265, "learning_rate": 0.002, "loss": 2.5916, "step": 123250 }, { "epoch": 0.24556132857325003, "grad_norm": 0.14016976952552795, "learning_rate": 0.002, "loss": 2.5482, "step": 123260 }, { "epoch": 0.24558125079688894, "grad_norm": 0.15887124836444855, "learning_rate": 0.002, "loss": 2.5836, "step": 123270 }, { "epoch": 0.24560117302052786, "grad_norm": 0.15387459099292755, "learning_rate": 0.002, "loss": 2.5785, "step": 123280 }, { "epoch": 0.24562109524416678, "grad_norm": 0.15247169137001038, "learning_rate": 0.002, "loss": 2.5643, "step": 123290 }, { "epoch": 0.2456410174678057, "grad_norm": 0.17703461647033691, "learning_rate": 0.002, "loss": 2.5618, "step": 123300 }, { "epoch": 0.2456609396914446, "grad_norm": 0.16703392565250397, "learning_rate": 0.002, "loss": 2.5726, "step": 123310 }, { "epoch": 0.2456808619150835, "grad_norm": 0.15596260130405426, "learning_rate": 0.002, "loss": 2.5567, "step": 123320 }, { "epoch": 0.24570078413872243, "grad_norm": 0.1802835315465927, "learning_rate": 0.002, "loss": 2.5586, "step": 123330 }, { "epoch": 0.24572070636236135, "grad_norm": 0.19020172953605652, "learning_rate": 0.002, "loss": 2.5632, "step": 123340 }, { "epoch": 0.24574062858600026, "grad_norm": 0.16589851677417755, "learning_rate": 0.002, "loss": 2.5691, "step": 123350 }, { "epoch": 0.24576055080963916, "grad_norm": 0.17191527783870697, "learning_rate": 0.002, "loss": 2.5581, "step": 123360 }, { "epoch": 0.24578047303327807, "grad_norm": 0.16206078231334686, "learning_rate": 0.002, "loss": 2.5738, "step": 123370 }, { "epoch": 0.245800395256917, "grad_norm": 0.14983512461185455, "learning_rate": 0.002, "loss": 2.5673, "step": 123380 }, { "epoch": 0.2458203174805559, "grad_norm": 0.1861473172903061, "learning_rate": 0.002, "loss": 2.5621, "step": 123390 }, { "epoch": 0.24584023970419483, "grad_norm": 0.2065517008304596, "learning_rate": 0.002, "loss": 2.5617, "step": 123400 }, { "epoch": 0.24586016192783375, "grad_norm": 0.1722930669784546, "learning_rate": 0.002, "loss": 2.5695, "step": 123410 }, { "epoch": 0.24588008415147264, "grad_norm": 0.14149613678455353, "learning_rate": 0.002, "loss": 2.5602, "step": 123420 }, { "epoch": 0.24590000637511156, "grad_norm": 0.1682872772216797, "learning_rate": 0.002, "loss": 2.5703, "step": 123430 }, { "epoch": 0.24591992859875048, "grad_norm": 0.1840379238128662, "learning_rate": 0.002, "loss": 2.551, "step": 123440 }, { "epoch": 0.2459398508223894, "grad_norm": 0.15019673109054565, "learning_rate": 0.002, "loss": 2.573, "step": 123450 }, { "epoch": 0.2459597730460283, "grad_norm": 0.1761137843132019, "learning_rate": 0.002, "loss": 2.5754, "step": 123460 }, { "epoch": 0.24597969526966723, "grad_norm": 0.15912991762161255, "learning_rate": 0.002, "loss": 2.5602, "step": 123470 }, { "epoch": 0.24599961749330612, "grad_norm": 0.21196497976779938, "learning_rate": 0.002, "loss": 2.574, "step": 123480 }, { "epoch": 0.24601953971694504, "grad_norm": 0.1808105856180191, "learning_rate": 0.002, "loss": 2.5534, "step": 123490 }, { "epoch": 0.24603946194058396, "grad_norm": 0.15101578831672668, "learning_rate": 0.002, "loss": 2.5603, "step": 123500 }, { "epoch": 0.24605938416422288, "grad_norm": 0.1654086410999298, "learning_rate": 0.002, "loss": 2.5612, "step": 123510 }, { "epoch": 0.2460793063878618, "grad_norm": 0.17370866239070892, "learning_rate": 0.002, "loss": 2.5615, "step": 123520 }, { "epoch": 0.24609922861150071, "grad_norm": 0.18288974463939667, "learning_rate": 0.002, "loss": 2.5598, "step": 123530 }, { "epoch": 0.2461191508351396, "grad_norm": 0.18491575121879578, "learning_rate": 0.002, "loss": 2.5571, "step": 123540 }, { "epoch": 0.24613907305877852, "grad_norm": 0.18825766444206238, "learning_rate": 0.002, "loss": 2.5611, "step": 123550 }, { "epoch": 0.24615899528241744, "grad_norm": 0.158733531832695, "learning_rate": 0.002, "loss": 2.5692, "step": 123560 }, { "epoch": 0.24617891750605636, "grad_norm": 0.1517539769411087, "learning_rate": 0.002, "loss": 2.5578, "step": 123570 }, { "epoch": 0.24619883972969528, "grad_norm": 0.18916738033294678, "learning_rate": 0.002, "loss": 2.5703, "step": 123580 }, { "epoch": 0.24621876195333417, "grad_norm": 0.17019112408161163, "learning_rate": 0.002, "loss": 2.5593, "step": 123590 }, { "epoch": 0.2462386841769731, "grad_norm": 0.165449321269989, "learning_rate": 0.002, "loss": 2.5452, "step": 123600 }, { "epoch": 0.246258606400612, "grad_norm": 0.14745864272117615, "learning_rate": 0.002, "loss": 2.5696, "step": 123610 }, { "epoch": 0.24627852862425093, "grad_norm": 0.21650166809558868, "learning_rate": 0.002, "loss": 2.5757, "step": 123620 }, { "epoch": 0.24629845084788984, "grad_norm": 0.17729732394218445, "learning_rate": 0.002, "loss": 2.5659, "step": 123630 }, { "epoch": 0.24631837307152876, "grad_norm": 0.16065052151679993, "learning_rate": 0.002, "loss": 2.5687, "step": 123640 }, { "epoch": 0.24633829529516765, "grad_norm": 0.16535404324531555, "learning_rate": 0.002, "loss": 2.5832, "step": 123650 }, { "epoch": 0.24635821751880657, "grad_norm": 0.17420879006385803, "learning_rate": 0.002, "loss": 2.5536, "step": 123660 }, { "epoch": 0.2463781397424455, "grad_norm": 0.1839851588010788, "learning_rate": 0.002, "loss": 2.5654, "step": 123670 }, { "epoch": 0.2463980619660844, "grad_norm": 0.1652984321117401, "learning_rate": 0.002, "loss": 2.5684, "step": 123680 }, { "epoch": 0.24641798418972333, "grad_norm": 0.16905757784843445, "learning_rate": 0.002, "loss": 2.5834, "step": 123690 }, { "epoch": 0.24643790641336225, "grad_norm": 0.16408085823059082, "learning_rate": 0.002, "loss": 2.5681, "step": 123700 }, { "epoch": 0.24645782863700114, "grad_norm": 0.16434240341186523, "learning_rate": 0.002, "loss": 2.5651, "step": 123710 }, { "epoch": 0.24647775086064005, "grad_norm": 0.17369194328784943, "learning_rate": 0.002, "loss": 2.5649, "step": 123720 }, { "epoch": 0.24649767308427897, "grad_norm": 0.18027330935001373, "learning_rate": 0.002, "loss": 2.5726, "step": 123730 }, { "epoch": 0.2465175953079179, "grad_norm": 0.18206344544887543, "learning_rate": 0.002, "loss": 2.567, "step": 123740 }, { "epoch": 0.2465375175315568, "grad_norm": 0.18983109295368195, "learning_rate": 0.002, "loss": 2.5561, "step": 123750 }, { "epoch": 0.24655743975519573, "grad_norm": 0.16263076663017273, "learning_rate": 0.002, "loss": 2.5765, "step": 123760 }, { "epoch": 0.24657736197883462, "grad_norm": 0.17973262071609497, "learning_rate": 0.002, "loss": 2.5517, "step": 123770 }, { "epoch": 0.24659728420247354, "grad_norm": 0.19839023053646088, "learning_rate": 0.002, "loss": 2.5481, "step": 123780 }, { "epoch": 0.24661720642611246, "grad_norm": 0.16969206929206848, "learning_rate": 0.002, "loss": 2.5671, "step": 123790 }, { "epoch": 0.24663712864975137, "grad_norm": 0.16884027421474457, "learning_rate": 0.002, "loss": 2.5667, "step": 123800 }, { "epoch": 0.2466570508733903, "grad_norm": 0.21679796278476715, "learning_rate": 0.002, "loss": 2.5689, "step": 123810 }, { "epoch": 0.2466769730970292, "grad_norm": 0.15646784007549286, "learning_rate": 0.002, "loss": 2.5741, "step": 123820 }, { "epoch": 0.2466968953206681, "grad_norm": 0.17639459669589996, "learning_rate": 0.002, "loss": 2.5657, "step": 123830 }, { "epoch": 0.24671681754430702, "grad_norm": 0.15822386741638184, "learning_rate": 0.002, "loss": 2.5577, "step": 123840 }, { "epoch": 0.24673673976794594, "grad_norm": 0.16530673205852509, "learning_rate": 0.002, "loss": 2.557, "step": 123850 }, { "epoch": 0.24675666199158486, "grad_norm": 0.19263190031051636, "learning_rate": 0.002, "loss": 2.5667, "step": 123860 }, { "epoch": 0.24677658421522378, "grad_norm": 0.17559535801410675, "learning_rate": 0.002, "loss": 2.5655, "step": 123870 }, { "epoch": 0.24679650643886267, "grad_norm": 0.14685627818107605, "learning_rate": 0.002, "loss": 2.5566, "step": 123880 }, { "epoch": 0.24681642866250159, "grad_norm": 0.16125693917274475, "learning_rate": 0.002, "loss": 2.5729, "step": 123890 }, { "epoch": 0.2468363508861405, "grad_norm": 0.17151693999767303, "learning_rate": 0.002, "loss": 2.5707, "step": 123900 }, { "epoch": 0.24685627310977942, "grad_norm": 0.14938734471797943, "learning_rate": 0.002, "loss": 2.5651, "step": 123910 }, { "epoch": 0.24687619533341834, "grad_norm": 0.17166340351104736, "learning_rate": 0.002, "loss": 2.5711, "step": 123920 }, { "epoch": 0.24689611755705726, "grad_norm": 0.18100188672542572, "learning_rate": 0.002, "loss": 2.5703, "step": 123930 }, { "epoch": 0.24691603978069615, "grad_norm": 0.17647860944271088, "learning_rate": 0.002, "loss": 2.5689, "step": 123940 }, { "epoch": 0.24693596200433507, "grad_norm": 0.1769508272409439, "learning_rate": 0.002, "loss": 2.5656, "step": 123950 }, { "epoch": 0.246955884227974, "grad_norm": 0.17670480906963348, "learning_rate": 0.002, "loss": 2.575, "step": 123960 }, { "epoch": 0.2469758064516129, "grad_norm": 0.20934249460697174, "learning_rate": 0.002, "loss": 2.5633, "step": 123970 }, { "epoch": 0.24699572867525182, "grad_norm": 0.17201755940914154, "learning_rate": 0.002, "loss": 2.5748, "step": 123980 }, { "epoch": 0.24701565089889074, "grad_norm": 0.17599426209926605, "learning_rate": 0.002, "loss": 2.5686, "step": 123990 }, { "epoch": 0.24703557312252963, "grad_norm": 0.1888827681541443, "learning_rate": 0.002, "loss": 2.569, "step": 124000 }, { "epoch": 0.24705549534616855, "grad_norm": 0.15660345554351807, "learning_rate": 0.002, "loss": 2.5587, "step": 124010 }, { "epoch": 0.24707541756980747, "grad_norm": 0.14609728753566742, "learning_rate": 0.002, "loss": 2.546, "step": 124020 }, { "epoch": 0.2470953397934464, "grad_norm": 0.20465894043445587, "learning_rate": 0.002, "loss": 2.5705, "step": 124030 }, { "epoch": 0.2471152620170853, "grad_norm": 0.20927101373672485, "learning_rate": 0.002, "loss": 2.5657, "step": 124040 }, { "epoch": 0.24713518424072423, "grad_norm": 0.1818414181470871, "learning_rate": 0.002, "loss": 2.5572, "step": 124050 }, { "epoch": 0.24715510646436312, "grad_norm": 0.1669066995382309, "learning_rate": 0.002, "loss": 2.5655, "step": 124060 }, { "epoch": 0.24717502868800204, "grad_norm": 0.1722613275051117, "learning_rate": 0.002, "loss": 2.5676, "step": 124070 }, { "epoch": 0.24719495091164095, "grad_norm": 0.1590532660484314, "learning_rate": 0.002, "loss": 2.5654, "step": 124080 }, { "epoch": 0.24721487313527987, "grad_norm": 0.171881303191185, "learning_rate": 0.002, "loss": 2.5587, "step": 124090 }, { "epoch": 0.2472347953589188, "grad_norm": 0.17181897163391113, "learning_rate": 0.002, "loss": 2.5586, "step": 124100 }, { "epoch": 0.24725471758255768, "grad_norm": 0.15254636108875275, "learning_rate": 0.002, "loss": 2.5675, "step": 124110 }, { "epoch": 0.2472746398061966, "grad_norm": 0.15903188288211823, "learning_rate": 0.002, "loss": 2.581, "step": 124120 }, { "epoch": 0.24729456202983552, "grad_norm": 0.17150533199310303, "learning_rate": 0.002, "loss": 2.5708, "step": 124130 }, { "epoch": 0.24731448425347444, "grad_norm": 0.15086397528648376, "learning_rate": 0.002, "loss": 2.5797, "step": 124140 }, { "epoch": 0.24733440647711336, "grad_norm": 0.16103968024253845, "learning_rate": 0.002, "loss": 2.5681, "step": 124150 }, { "epoch": 0.24735432870075227, "grad_norm": 0.17657971382141113, "learning_rate": 0.002, "loss": 2.5711, "step": 124160 }, { "epoch": 0.24737425092439116, "grad_norm": 0.16476783156394958, "learning_rate": 0.002, "loss": 2.5778, "step": 124170 }, { "epoch": 0.24739417314803008, "grad_norm": 0.15763843059539795, "learning_rate": 0.002, "loss": 2.5534, "step": 124180 }, { "epoch": 0.247414095371669, "grad_norm": 0.17384491860866547, "learning_rate": 0.002, "loss": 2.5432, "step": 124190 }, { "epoch": 0.24743401759530792, "grad_norm": 0.1455041617155075, "learning_rate": 0.002, "loss": 2.585, "step": 124200 }, { "epoch": 0.24745393981894684, "grad_norm": 0.1688048392534256, "learning_rate": 0.002, "loss": 2.5714, "step": 124210 }, { "epoch": 0.24747386204258576, "grad_norm": 0.1743258535861969, "learning_rate": 0.002, "loss": 2.5704, "step": 124220 }, { "epoch": 0.24749378426622465, "grad_norm": 0.17240099608898163, "learning_rate": 0.002, "loss": 2.5691, "step": 124230 }, { "epoch": 0.24751370648986357, "grad_norm": 0.154449462890625, "learning_rate": 0.002, "loss": 2.5657, "step": 124240 }, { "epoch": 0.24753362871350248, "grad_norm": 0.14134915173053741, "learning_rate": 0.002, "loss": 2.5654, "step": 124250 }, { "epoch": 0.2475535509371414, "grad_norm": 0.1633605659008026, "learning_rate": 0.002, "loss": 2.5485, "step": 124260 }, { "epoch": 0.24757347316078032, "grad_norm": 0.1655343621969223, "learning_rate": 0.002, "loss": 2.5516, "step": 124270 }, { "epoch": 0.24759339538441924, "grad_norm": 0.14572079479694366, "learning_rate": 0.002, "loss": 2.5735, "step": 124280 }, { "epoch": 0.24761331760805813, "grad_norm": 0.21560505032539368, "learning_rate": 0.002, "loss": 2.5566, "step": 124290 }, { "epoch": 0.24763323983169705, "grad_norm": 0.18957555294036865, "learning_rate": 0.002, "loss": 2.5679, "step": 124300 }, { "epoch": 0.24765316205533597, "grad_norm": 0.13882021605968475, "learning_rate": 0.002, "loss": 2.5578, "step": 124310 }, { "epoch": 0.2476730842789749, "grad_norm": 0.17379146814346313, "learning_rate": 0.002, "loss": 2.5625, "step": 124320 }, { "epoch": 0.2476930065026138, "grad_norm": 0.17894196510314941, "learning_rate": 0.002, "loss": 2.577, "step": 124330 }, { "epoch": 0.2477129287262527, "grad_norm": 0.18192364275455475, "learning_rate": 0.002, "loss": 2.5851, "step": 124340 }, { "epoch": 0.24773285094989161, "grad_norm": 0.1734207719564438, "learning_rate": 0.002, "loss": 2.5762, "step": 124350 }, { "epoch": 0.24775277317353053, "grad_norm": 0.14289627969264984, "learning_rate": 0.002, "loss": 2.5629, "step": 124360 }, { "epoch": 0.24777269539716945, "grad_norm": 0.18005044758319855, "learning_rate": 0.002, "loss": 2.5667, "step": 124370 }, { "epoch": 0.24779261762080837, "grad_norm": 0.16745805740356445, "learning_rate": 0.002, "loss": 2.5533, "step": 124380 }, { "epoch": 0.2478125398444473, "grad_norm": 0.20236237347126007, "learning_rate": 0.002, "loss": 2.5832, "step": 124390 }, { "epoch": 0.24783246206808618, "grad_norm": 0.1624758243560791, "learning_rate": 0.002, "loss": 2.569, "step": 124400 }, { "epoch": 0.2478523842917251, "grad_norm": 0.17934457957744598, "learning_rate": 0.002, "loss": 2.551, "step": 124410 }, { "epoch": 0.24787230651536402, "grad_norm": 0.15623168647289276, "learning_rate": 0.002, "loss": 2.559, "step": 124420 }, { "epoch": 0.24789222873900293, "grad_norm": 0.1597059965133667, "learning_rate": 0.002, "loss": 2.5607, "step": 124430 }, { "epoch": 0.24791215096264185, "grad_norm": 0.1595689356327057, "learning_rate": 0.002, "loss": 2.5613, "step": 124440 }, { "epoch": 0.24793207318628077, "grad_norm": 0.2098294198513031, "learning_rate": 0.002, "loss": 2.573, "step": 124450 }, { "epoch": 0.24795199540991966, "grad_norm": 0.15977205336093903, "learning_rate": 0.002, "loss": 2.5805, "step": 124460 }, { "epoch": 0.24797191763355858, "grad_norm": 0.16995534300804138, "learning_rate": 0.002, "loss": 2.5653, "step": 124470 }, { "epoch": 0.2479918398571975, "grad_norm": 0.16960613429546356, "learning_rate": 0.002, "loss": 2.5649, "step": 124480 }, { "epoch": 0.24801176208083642, "grad_norm": 0.2010951191186905, "learning_rate": 0.002, "loss": 2.5595, "step": 124490 }, { "epoch": 0.24803168430447534, "grad_norm": 0.1428745836019516, "learning_rate": 0.002, "loss": 2.549, "step": 124500 }, { "epoch": 0.24805160652811425, "grad_norm": 0.18089094758033752, "learning_rate": 0.002, "loss": 2.559, "step": 124510 }, { "epoch": 0.24807152875175315, "grad_norm": 0.16634643077850342, "learning_rate": 0.002, "loss": 2.57, "step": 124520 }, { "epoch": 0.24809145097539206, "grad_norm": 0.1641116887331009, "learning_rate": 0.002, "loss": 2.5821, "step": 124530 }, { "epoch": 0.24811137319903098, "grad_norm": 0.1661594808101654, "learning_rate": 0.002, "loss": 2.5563, "step": 124540 }, { "epoch": 0.2481312954226699, "grad_norm": 0.1678975224494934, "learning_rate": 0.002, "loss": 2.5763, "step": 124550 }, { "epoch": 0.24815121764630882, "grad_norm": 0.14815615117549896, "learning_rate": 0.002, "loss": 2.5747, "step": 124560 }, { "epoch": 0.24817113986994774, "grad_norm": 0.15500925481319427, "learning_rate": 0.002, "loss": 2.5797, "step": 124570 }, { "epoch": 0.24819106209358663, "grad_norm": 0.1774366796016693, "learning_rate": 0.002, "loss": 2.5649, "step": 124580 }, { "epoch": 0.24821098431722555, "grad_norm": 0.17689430713653564, "learning_rate": 0.002, "loss": 2.5509, "step": 124590 }, { "epoch": 0.24823090654086447, "grad_norm": 0.1469539999961853, "learning_rate": 0.002, "loss": 2.5716, "step": 124600 }, { "epoch": 0.24825082876450338, "grad_norm": 0.1584157645702362, "learning_rate": 0.002, "loss": 2.5586, "step": 124610 }, { "epoch": 0.2482707509881423, "grad_norm": 0.21866218745708466, "learning_rate": 0.002, "loss": 2.5704, "step": 124620 }, { "epoch": 0.2482906732117812, "grad_norm": 0.17047074437141418, "learning_rate": 0.002, "loss": 2.5479, "step": 124630 }, { "epoch": 0.2483105954354201, "grad_norm": 0.18049415946006775, "learning_rate": 0.002, "loss": 2.5635, "step": 124640 }, { "epoch": 0.24833051765905903, "grad_norm": 0.18642359972000122, "learning_rate": 0.002, "loss": 2.5698, "step": 124650 }, { "epoch": 0.24835043988269795, "grad_norm": 0.179312601685524, "learning_rate": 0.002, "loss": 2.5664, "step": 124660 }, { "epoch": 0.24837036210633687, "grad_norm": 0.14835211634635925, "learning_rate": 0.002, "loss": 2.5572, "step": 124670 }, { "epoch": 0.24839028432997579, "grad_norm": 0.17052635550498962, "learning_rate": 0.002, "loss": 2.5541, "step": 124680 }, { "epoch": 0.24841020655361468, "grad_norm": 0.1616811603307724, "learning_rate": 0.002, "loss": 2.5614, "step": 124690 }, { "epoch": 0.2484301287772536, "grad_norm": 0.1573282778263092, "learning_rate": 0.002, "loss": 2.5568, "step": 124700 }, { "epoch": 0.2484500510008925, "grad_norm": 0.19834446907043457, "learning_rate": 0.002, "loss": 2.5612, "step": 124710 }, { "epoch": 0.24846997322453143, "grad_norm": 0.1728050708770752, "learning_rate": 0.002, "loss": 2.562, "step": 124720 }, { "epoch": 0.24848989544817035, "grad_norm": 0.18182730674743652, "learning_rate": 0.002, "loss": 2.5561, "step": 124730 }, { "epoch": 0.24850981767180927, "grad_norm": 0.19057540595531464, "learning_rate": 0.002, "loss": 2.5755, "step": 124740 }, { "epoch": 0.24852973989544816, "grad_norm": 0.14609840512275696, "learning_rate": 0.002, "loss": 2.5663, "step": 124750 }, { "epoch": 0.24854966211908708, "grad_norm": 0.1659381240606308, "learning_rate": 0.002, "loss": 2.5716, "step": 124760 }, { "epoch": 0.248569584342726, "grad_norm": 0.15317708253860474, "learning_rate": 0.002, "loss": 2.5643, "step": 124770 }, { "epoch": 0.24858950656636492, "grad_norm": 0.18143455684185028, "learning_rate": 0.002, "loss": 2.5669, "step": 124780 }, { "epoch": 0.24860942879000383, "grad_norm": 0.1808784008026123, "learning_rate": 0.002, "loss": 2.5665, "step": 124790 }, { "epoch": 0.24862935101364275, "grad_norm": 0.18054106831550598, "learning_rate": 0.002, "loss": 2.5718, "step": 124800 }, { "epoch": 0.24864927323728164, "grad_norm": 0.1474328488111496, "learning_rate": 0.002, "loss": 2.5615, "step": 124810 }, { "epoch": 0.24866919546092056, "grad_norm": 0.17399285733699799, "learning_rate": 0.002, "loss": 2.5502, "step": 124820 }, { "epoch": 0.24868911768455948, "grad_norm": 0.1492041051387787, "learning_rate": 0.002, "loss": 2.5825, "step": 124830 }, { "epoch": 0.2487090399081984, "grad_norm": 0.1558111310005188, "learning_rate": 0.002, "loss": 2.5558, "step": 124840 }, { "epoch": 0.24872896213183732, "grad_norm": 0.19617846608161926, "learning_rate": 0.002, "loss": 2.5552, "step": 124850 }, { "epoch": 0.2487488843554762, "grad_norm": 0.14804668724536896, "learning_rate": 0.002, "loss": 2.5487, "step": 124860 }, { "epoch": 0.24876880657911513, "grad_norm": 0.1983332335948944, "learning_rate": 0.002, "loss": 2.562, "step": 124870 }, { "epoch": 0.24878872880275404, "grad_norm": 0.15431268513202667, "learning_rate": 0.002, "loss": 2.568, "step": 124880 }, { "epoch": 0.24880865102639296, "grad_norm": 0.16215045750141144, "learning_rate": 0.002, "loss": 2.5624, "step": 124890 }, { "epoch": 0.24882857325003188, "grad_norm": 0.14266079664230347, "learning_rate": 0.002, "loss": 2.5564, "step": 124900 }, { "epoch": 0.2488484954736708, "grad_norm": 0.17299437522888184, "learning_rate": 0.002, "loss": 2.5727, "step": 124910 }, { "epoch": 0.2488684176973097, "grad_norm": 0.1567070186138153, "learning_rate": 0.002, "loss": 2.5548, "step": 124920 }, { "epoch": 0.2488883399209486, "grad_norm": 0.13888245820999146, "learning_rate": 0.002, "loss": 2.5645, "step": 124930 }, { "epoch": 0.24890826214458753, "grad_norm": 0.3825458586215973, "learning_rate": 0.002, "loss": 2.5652, "step": 124940 }, { "epoch": 0.24892818436822645, "grad_norm": 0.16188064217567444, "learning_rate": 0.002, "loss": 2.5729, "step": 124950 }, { "epoch": 0.24894810659186536, "grad_norm": 0.17213557660579681, "learning_rate": 0.002, "loss": 2.5617, "step": 124960 }, { "epoch": 0.24896802881550428, "grad_norm": 0.15917691588401794, "learning_rate": 0.002, "loss": 2.5598, "step": 124970 }, { "epoch": 0.24898795103914317, "grad_norm": 0.15509769320487976, "learning_rate": 0.002, "loss": 2.5515, "step": 124980 }, { "epoch": 0.2490078732627821, "grad_norm": 0.1836046725511551, "learning_rate": 0.002, "loss": 2.5616, "step": 124990 }, { "epoch": 0.249027795486421, "grad_norm": 0.19304266571998596, "learning_rate": 0.002, "loss": 2.5486, "step": 125000 }, { "epoch": 0.24904771771005993, "grad_norm": 0.17162029445171356, "learning_rate": 0.002, "loss": 2.5452, "step": 125010 }, { "epoch": 0.24906763993369885, "grad_norm": 0.14682629704475403, "learning_rate": 0.002, "loss": 2.568, "step": 125020 }, { "epoch": 0.24908756215733777, "grad_norm": 0.2270711213350296, "learning_rate": 0.002, "loss": 2.5741, "step": 125030 }, { "epoch": 0.24910748438097666, "grad_norm": 0.1721300184726715, "learning_rate": 0.002, "loss": 2.5665, "step": 125040 }, { "epoch": 0.24912740660461558, "grad_norm": 0.1845356523990631, "learning_rate": 0.002, "loss": 2.5638, "step": 125050 }, { "epoch": 0.2491473288282545, "grad_norm": 0.1678810864686966, "learning_rate": 0.002, "loss": 2.5655, "step": 125060 }, { "epoch": 0.2491672510518934, "grad_norm": 0.17423364520072937, "learning_rate": 0.002, "loss": 2.5622, "step": 125070 }, { "epoch": 0.24918717327553233, "grad_norm": 0.18425637483596802, "learning_rate": 0.002, "loss": 2.5572, "step": 125080 }, { "epoch": 0.24920709549917122, "grad_norm": 0.1766483187675476, "learning_rate": 0.002, "loss": 2.577, "step": 125090 }, { "epoch": 0.24922701772281014, "grad_norm": 0.1987217515707016, "learning_rate": 0.002, "loss": 2.5518, "step": 125100 }, { "epoch": 0.24924693994644906, "grad_norm": 0.16598892211914062, "learning_rate": 0.002, "loss": 2.5568, "step": 125110 }, { "epoch": 0.24926686217008798, "grad_norm": 0.18306344747543335, "learning_rate": 0.002, "loss": 2.5755, "step": 125120 }, { "epoch": 0.2492867843937269, "grad_norm": 0.1577683836221695, "learning_rate": 0.002, "loss": 2.5588, "step": 125130 }, { "epoch": 0.24930670661736581, "grad_norm": 0.14038294553756714, "learning_rate": 0.002, "loss": 2.5618, "step": 125140 }, { "epoch": 0.2493266288410047, "grad_norm": 0.16523253917694092, "learning_rate": 0.002, "loss": 2.576, "step": 125150 }, { "epoch": 0.24934655106464362, "grad_norm": 0.14553521573543549, "learning_rate": 0.002, "loss": 2.5612, "step": 125160 }, { "epoch": 0.24936647328828254, "grad_norm": 0.17442966997623444, "learning_rate": 0.002, "loss": 2.557, "step": 125170 }, { "epoch": 0.24938639551192146, "grad_norm": 0.19318461418151855, "learning_rate": 0.002, "loss": 2.5655, "step": 125180 }, { "epoch": 0.24940631773556038, "grad_norm": 0.1442302167415619, "learning_rate": 0.002, "loss": 2.5618, "step": 125190 }, { "epoch": 0.2494262399591993, "grad_norm": 0.18675431609153748, "learning_rate": 0.002, "loss": 2.5497, "step": 125200 }, { "epoch": 0.2494461621828382, "grad_norm": 0.15884122252464294, "learning_rate": 0.002, "loss": 2.5399, "step": 125210 }, { "epoch": 0.2494660844064771, "grad_norm": 0.17202334105968475, "learning_rate": 0.002, "loss": 2.5768, "step": 125220 }, { "epoch": 0.24948600663011603, "grad_norm": 0.16151615977287292, "learning_rate": 0.002, "loss": 2.5674, "step": 125230 }, { "epoch": 0.24950592885375494, "grad_norm": 0.25008633732795715, "learning_rate": 0.002, "loss": 2.5684, "step": 125240 }, { "epoch": 0.24952585107739386, "grad_norm": 0.17708618938922882, "learning_rate": 0.002, "loss": 2.58, "step": 125250 }, { "epoch": 0.24954577330103278, "grad_norm": 0.15049634873867035, "learning_rate": 0.002, "loss": 2.5566, "step": 125260 }, { "epoch": 0.24956569552467167, "grad_norm": 0.14121118187904358, "learning_rate": 0.002, "loss": 2.5555, "step": 125270 }, { "epoch": 0.2495856177483106, "grad_norm": 0.17857547104358673, "learning_rate": 0.002, "loss": 2.5567, "step": 125280 }, { "epoch": 0.2496055399719495, "grad_norm": 0.15307782590389252, "learning_rate": 0.002, "loss": 2.5776, "step": 125290 }, { "epoch": 0.24962546219558843, "grad_norm": 0.1976276934146881, "learning_rate": 0.002, "loss": 2.5506, "step": 125300 }, { "epoch": 0.24964538441922735, "grad_norm": 0.14970988035202026, "learning_rate": 0.002, "loss": 2.56, "step": 125310 }, { "epoch": 0.24966530664286626, "grad_norm": 0.1567850410938263, "learning_rate": 0.002, "loss": 2.5591, "step": 125320 }, { "epoch": 0.24968522886650515, "grad_norm": 0.1690051108598709, "learning_rate": 0.002, "loss": 2.562, "step": 125330 }, { "epoch": 0.24970515109014407, "grad_norm": 0.17150524258613586, "learning_rate": 0.002, "loss": 2.5562, "step": 125340 }, { "epoch": 0.249725073313783, "grad_norm": 0.17994774878025055, "learning_rate": 0.002, "loss": 2.5745, "step": 125350 }, { "epoch": 0.2497449955374219, "grad_norm": 0.1681414395570755, "learning_rate": 0.002, "loss": 2.5689, "step": 125360 }, { "epoch": 0.24976491776106083, "grad_norm": 0.15133577585220337, "learning_rate": 0.002, "loss": 2.5701, "step": 125370 }, { "epoch": 0.24978483998469972, "grad_norm": 0.1583365648984909, "learning_rate": 0.002, "loss": 2.561, "step": 125380 }, { "epoch": 0.24980476220833864, "grad_norm": 0.19721946120262146, "learning_rate": 0.002, "loss": 2.5635, "step": 125390 }, { "epoch": 0.24982468443197756, "grad_norm": 0.15345875918865204, "learning_rate": 0.002, "loss": 2.5615, "step": 125400 }, { "epoch": 0.24984460665561647, "grad_norm": 0.22814632952213287, "learning_rate": 0.002, "loss": 2.5685, "step": 125410 }, { "epoch": 0.2498645288792554, "grad_norm": 0.14314721524715424, "learning_rate": 0.002, "loss": 2.5658, "step": 125420 }, { "epoch": 0.2498844511028943, "grad_norm": 0.16022858023643494, "learning_rate": 0.002, "loss": 2.5614, "step": 125430 }, { "epoch": 0.2499043733265332, "grad_norm": 0.15590564906597137, "learning_rate": 0.002, "loss": 2.5624, "step": 125440 }, { "epoch": 0.24992429555017212, "grad_norm": 0.18417567014694214, "learning_rate": 0.002, "loss": 2.5575, "step": 125450 }, { "epoch": 0.24994421777381104, "grad_norm": 0.14570002257823944, "learning_rate": 0.002, "loss": 2.5553, "step": 125460 }, { "epoch": 0.24996413999744996, "grad_norm": 0.20083753764629364, "learning_rate": 0.002, "loss": 2.5619, "step": 125470 }, { "epoch": 0.24998406222108888, "grad_norm": 0.1804915815591812, "learning_rate": 0.002, "loss": 2.5588, "step": 125480 }, { "epoch": 0.25000398444472777, "grad_norm": 0.1807543933391571, "learning_rate": 0.002, "loss": 2.5647, "step": 125490 }, { "epoch": 0.2500239066683667, "grad_norm": 0.17013593018054962, "learning_rate": 0.002, "loss": 2.5812, "step": 125500 }, { "epoch": 0.2500438288920056, "grad_norm": 0.161858469247818, "learning_rate": 0.002, "loss": 2.5595, "step": 125510 }, { "epoch": 0.25006375111564455, "grad_norm": 0.15330326557159424, "learning_rate": 0.002, "loss": 2.5597, "step": 125520 }, { "epoch": 0.25008367333928344, "grad_norm": 0.16505566239356995, "learning_rate": 0.002, "loss": 2.5695, "step": 125530 }, { "epoch": 0.25010359556292233, "grad_norm": 0.17769962549209595, "learning_rate": 0.002, "loss": 2.5707, "step": 125540 }, { "epoch": 0.2501235177865613, "grad_norm": 0.18216551840305328, "learning_rate": 0.002, "loss": 2.5561, "step": 125550 }, { "epoch": 0.25014344001020017, "grad_norm": 0.189927875995636, "learning_rate": 0.002, "loss": 2.566, "step": 125560 }, { "epoch": 0.2501633622338391, "grad_norm": 0.15443889796733856, "learning_rate": 0.002, "loss": 2.5626, "step": 125570 }, { "epoch": 0.250183284457478, "grad_norm": 0.14834286272525787, "learning_rate": 0.002, "loss": 2.545, "step": 125580 }, { "epoch": 0.2502032066811169, "grad_norm": 0.1745433509349823, "learning_rate": 0.002, "loss": 2.5779, "step": 125590 }, { "epoch": 0.25022312890475584, "grad_norm": 0.1630559116601944, "learning_rate": 0.002, "loss": 2.5531, "step": 125600 }, { "epoch": 0.25024305112839473, "grad_norm": 0.1808317005634308, "learning_rate": 0.002, "loss": 2.5623, "step": 125610 }, { "epoch": 0.2502629733520337, "grad_norm": 0.16293567419052124, "learning_rate": 0.002, "loss": 2.5669, "step": 125620 }, { "epoch": 0.25028289557567257, "grad_norm": 0.14858177304267883, "learning_rate": 0.002, "loss": 2.5881, "step": 125630 }, { "epoch": 0.25030281779931146, "grad_norm": 0.2594813406467438, "learning_rate": 0.002, "loss": 2.5647, "step": 125640 }, { "epoch": 0.2503227400229504, "grad_norm": 0.1542104184627533, "learning_rate": 0.002, "loss": 2.5758, "step": 125650 }, { "epoch": 0.2503426622465893, "grad_norm": 0.15570847690105438, "learning_rate": 0.002, "loss": 2.5603, "step": 125660 }, { "epoch": 0.25036258447022824, "grad_norm": 0.14658930897712708, "learning_rate": 0.002, "loss": 2.5588, "step": 125670 }, { "epoch": 0.25038250669386714, "grad_norm": 0.19184184074401855, "learning_rate": 0.002, "loss": 2.5713, "step": 125680 }, { "epoch": 0.2504024289175061, "grad_norm": 0.170756995677948, "learning_rate": 0.002, "loss": 2.5385, "step": 125690 }, { "epoch": 0.25042235114114497, "grad_norm": 0.19781434535980225, "learning_rate": 0.002, "loss": 2.565, "step": 125700 }, { "epoch": 0.25044227336478386, "grad_norm": 0.16815361380577087, "learning_rate": 0.002, "loss": 2.5797, "step": 125710 }, { "epoch": 0.2504621955884228, "grad_norm": 0.15323945879936218, "learning_rate": 0.002, "loss": 2.564, "step": 125720 }, { "epoch": 0.2504821178120617, "grad_norm": 0.13694946467876434, "learning_rate": 0.002, "loss": 2.5521, "step": 125730 }, { "epoch": 0.25050204003570065, "grad_norm": 0.1495833396911621, "learning_rate": 0.002, "loss": 2.5609, "step": 125740 }, { "epoch": 0.25052196225933954, "grad_norm": 0.1859680861234665, "learning_rate": 0.002, "loss": 2.5604, "step": 125750 }, { "epoch": 0.2505418844829784, "grad_norm": 0.1786421835422516, "learning_rate": 0.002, "loss": 2.575, "step": 125760 }, { "epoch": 0.2505618067066174, "grad_norm": 0.1689399778842926, "learning_rate": 0.002, "loss": 2.5735, "step": 125770 }, { "epoch": 0.25058172893025626, "grad_norm": 0.1843002289533615, "learning_rate": 0.002, "loss": 2.5521, "step": 125780 }, { "epoch": 0.2506016511538952, "grad_norm": 0.15625908970832825, "learning_rate": 0.002, "loss": 2.5686, "step": 125790 }, { "epoch": 0.2506215733775341, "grad_norm": 0.21372447907924652, "learning_rate": 0.002, "loss": 2.549, "step": 125800 }, { "epoch": 0.25064149560117305, "grad_norm": 0.16271695494651794, "learning_rate": 0.002, "loss": 2.5707, "step": 125810 }, { "epoch": 0.25066141782481194, "grad_norm": 0.18924298882484436, "learning_rate": 0.002, "loss": 2.5752, "step": 125820 }, { "epoch": 0.25068134004845083, "grad_norm": 0.17934761941432953, "learning_rate": 0.002, "loss": 2.5477, "step": 125830 }, { "epoch": 0.2507012622720898, "grad_norm": 0.1565181463956833, "learning_rate": 0.002, "loss": 2.5866, "step": 125840 }, { "epoch": 0.25072118449572867, "grad_norm": 0.23185205459594727, "learning_rate": 0.002, "loss": 2.5744, "step": 125850 }, { "epoch": 0.2507411067193676, "grad_norm": 0.15562185645103455, "learning_rate": 0.002, "loss": 2.5674, "step": 125860 }, { "epoch": 0.2507610289430065, "grad_norm": 0.186961367726326, "learning_rate": 0.002, "loss": 2.545, "step": 125870 }, { "epoch": 0.2507809511666454, "grad_norm": 0.15886569023132324, "learning_rate": 0.002, "loss": 2.5532, "step": 125880 }, { "epoch": 0.25080087339028434, "grad_norm": 0.15794454514980316, "learning_rate": 0.002, "loss": 2.5678, "step": 125890 }, { "epoch": 0.25082079561392323, "grad_norm": 0.18170879781246185, "learning_rate": 0.002, "loss": 2.5643, "step": 125900 }, { "epoch": 0.2508407178375622, "grad_norm": 0.18634703755378723, "learning_rate": 0.002, "loss": 2.548, "step": 125910 }, { "epoch": 0.25086064006120107, "grad_norm": 0.17497296631336212, "learning_rate": 0.002, "loss": 2.559, "step": 125920 }, { "epoch": 0.25088056228483996, "grad_norm": 0.15816742181777954, "learning_rate": 0.002, "loss": 2.584, "step": 125930 }, { "epoch": 0.2509004845084789, "grad_norm": 0.17593978345394135, "learning_rate": 0.002, "loss": 2.5649, "step": 125940 }, { "epoch": 0.2509204067321178, "grad_norm": 0.16445714235305786, "learning_rate": 0.002, "loss": 2.5514, "step": 125950 }, { "epoch": 0.25094032895575674, "grad_norm": 0.15934711694717407, "learning_rate": 0.002, "loss": 2.5699, "step": 125960 }, { "epoch": 0.25096025117939563, "grad_norm": 0.18782909214496613, "learning_rate": 0.002, "loss": 2.5565, "step": 125970 }, { "epoch": 0.2509801734030346, "grad_norm": 0.1416718065738678, "learning_rate": 0.002, "loss": 2.5606, "step": 125980 }, { "epoch": 0.25100009562667347, "grad_norm": 0.16977842152118683, "learning_rate": 0.002, "loss": 2.5778, "step": 125990 }, { "epoch": 0.25102001785031236, "grad_norm": 0.16930587589740753, "learning_rate": 0.002, "loss": 2.5605, "step": 126000 }, { "epoch": 0.2510399400739513, "grad_norm": 0.1680903434753418, "learning_rate": 0.002, "loss": 2.5607, "step": 126010 }, { "epoch": 0.2510598622975902, "grad_norm": 0.14651232957839966, "learning_rate": 0.002, "loss": 2.5806, "step": 126020 }, { "epoch": 0.25107978452122914, "grad_norm": 0.17818300426006317, "learning_rate": 0.002, "loss": 2.5668, "step": 126030 }, { "epoch": 0.25109970674486803, "grad_norm": 0.14385820925235748, "learning_rate": 0.002, "loss": 2.561, "step": 126040 }, { "epoch": 0.2511196289685069, "grad_norm": 0.18886983394622803, "learning_rate": 0.002, "loss": 2.5761, "step": 126050 }, { "epoch": 0.25113955119214587, "grad_norm": 0.16445450484752655, "learning_rate": 0.002, "loss": 2.5557, "step": 126060 }, { "epoch": 0.25115947341578476, "grad_norm": 0.16383488476276398, "learning_rate": 0.002, "loss": 2.5751, "step": 126070 }, { "epoch": 0.2511793956394237, "grad_norm": 0.14809666574001312, "learning_rate": 0.002, "loss": 2.5715, "step": 126080 }, { "epoch": 0.2511993178630626, "grad_norm": 0.1695433109998703, "learning_rate": 0.002, "loss": 2.552, "step": 126090 }, { "epoch": 0.2512192400867015, "grad_norm": 0.2064918726682663, "learning_rate": 0.002, "loss": 2.585, "step": 126100 }, { "epoch": 0.25123916231034044, "grad_norm": 0.1547926962375641, "learning_rate": 0.002, "loss": 2.5499, "step": 126110 }, { "epoch": 0.2512590845339793, "grad_norm": 0.15194553136825562, "learning_rate": 0.002, "loss": 2.5571, "step": 126120 }, { "epoch": 0.2512790067576183, "grad_norm": 0.2064722180366516, "learning_rate": 0.002, "loss": 2.5635, "step": 126130 }, { "epoch": 0.25129892898125716, "grad_norm": 0.15504054725170135, "learning_rate": 0.002, "loss": 2.559, "step": 126140 }, { "epoch": 0.2513188512048961, "grad_norm": 0.16778939962387085, "learning_rate": 0.002, "loss": 2.5733, "step": 126150 }, { "epoch": 0.251338773428535, "grad_norm": 0.16727487742900848, "learning_rate": 0.002, "loss": 2.5707, "step": 126160 }, { "epoch": 0.2513586956521739, "grad_norm": 0.15325281023979187, "learning_rate": 0.002, "loss": 2.5796, "step": 126170 }, { "epoch": 0.25137861787581284, "grad_norm": 0.15134850144386292, "learning_rate": 0.002, "loss": 2.5637, "step": 126180 }, { "epoch": 0.25139854009945173, "grad_norm": 0.19226187467575073, "learning_rate": 0.002, "loss": 2.5788, "step": 126190 }, { "epoch": 0.2514184623230907, "grad_norm": 0.16836535930633545, "learning_rate": 0.002, "loss": 2.5688, "step": 126200 }, { "epoch": 0.25143838454672957, "grad_norm": 0.1792726069688797, "learning_rate": 0.002, "loss": 2.574, "step": 126210 }, { "epoch": 0.25145830677036846, "grad_norm": 0.16641560196876526, "learning_rate": 0.002, "loss": 2.5686, "step": 126220 }, { "epoch": 0.2514782289940074, "grad_norm": 0.17659421265125275, "learning_rate": 0.002, "loss": 2.5669, "step": 126230 }, { "epoch": 0.2514981512176463, "grad_norm": 0.16237850487232208, "learning_rate": 0.002, "loss": 2.5698, "step": 126240 }, { "epoch": 0.25151807344128524, "grad_norm": 0.17894764244556427, "learning_rate": 0.002, "loss": 2.556, "step": 126250 }, { "epoch": 0.25153799566492413, "grad_norm": 0.20288307964801788, "learning_rate": 0.002, "loss": 2.5593, "step": 126260 }, { "epoch": 0.2515579178885631, "grad_norm": 0.16809393465518951, "learning_rate": 0.002, "loss": 2.549, "step": 126270 }, { "epoch": 0.25157784011220197, "grad_norm": 0.17738690972328186, "learning_rate": 0.002, "loss": 2.562, "step": 126280 }, { "epoch": 0.25159776233584086, "grad_norm": 0.16883321106433868, "learning_rate": 0.002, "loss": 2.5689, "step": 126290 }, { "epoch": 0.2516176845594798, "grad_norm": 0.15076524019241333, "learning_rate": 0.002, "loss": 2.5738, "step": 126300 }, { "epoch": 0.2516376067831187, "grad_norm": 0.1877991110086441, "learning_rate": 0.002, "loss": 2.5565, "step": 126310 }, { "epoch": 0.25165752900675764, "grad_norm": 0.15672895312309265, "learning_rate": 0.002, "loss": 2.5641, "step": 126320 }, { "epoch": 0.25167745123039653, "grad_norm": 0.1681843250989914, "learning_rate": 0.002, "loss": 2.566, "step": 126330 }, { "epoch": 0.2516973734540354, "grad_norm": 0.14567849040031433, "learning_rate": 0.002, "loss": 2.5656, "step": 126340 }, { "epoch": 0.25171729567767437, "grad_norm": 0.20729877054691315, "learning_rate": 0.002, "loss": 2.5612, "step": 126350 }, { "epoch": 0.25173721790131326, "grad_norm": 0.1865459382534027, "learning_rate": 0.002, "loss": 2.5676, "step": 126360 }, { "epoch": 0.2517571401249522, "grad_norm": 0.1591530293226242, "learning_rate": 0.002, "loss": 2.5649, "step": 126370 }, { "epoch": 0.2517770623485911, "grad_norm": 0.15817897021770477, "learning_rate": 0.002, "loss": 2.5597, "step": 126380 }, { "epoch": 0.25179698457223, "grad_norm": 0.16082943975925446, "learning_rate": 0.002, "loss": 2.5585, "step": 126390 }, { "epoch": 0.25181690679586893, "grad_norm": 0.17931489646434784, "learning_rate": 0.002, "loss": 2.5582, "step": 126400 }, { "epoch": 0.2518368290195078, "grad_norm": 0.17550136148929596, "learning_rate": 0.002, "loss": 2.5704, "step": 126410 }, { "epoch": 0.25185675124314677, "grad_norm": 0.15371015667915344, "learning_rate": 0.002, "loss": 2.5497, "step": 126420 }, { "epoch": 0.25187667346678566, "grad_norm": 0.142452210187912, "learning_rate": 0.002, "loss": 2.5609, "step": 126430 }, { "epoch": 0.2518965956904246, "grad_norm": 0.1604292094707489, "learning_rate": 0.002, "loss": 2.5674, "step": 126440 }, { "epoch": 0.2519165179140635, "grad_norm": 0.1828443557024002, "learning_rate": 0.002, "loss": 2.5647, "step": 126450 }, { "epoch": 0.2519364401377024, "grad_norm": 0.150207057595253, "learning_rate": 0.002, "loss": 2.5496, "step": 126460 }, { "epoch": 0.25195636236134134, "grad_norm": 0.14683440327644348, "learning_rate": 0.002, "loss": 2.5505, "step": 126470 }, { "epoch": 0.2519762845849802, "grad_norm": 0.17764535546302795, "learning_rate": 0.002, "loss": 2.5732, "step": 126480 }, { "epoch": 0.25199620680861917, "grad_norm": 0.1560370773077011, "learning_rate": 0.002, "loss": 2.5529, "step": 126490 }, { "epoch": 0.25201612903225806, "grad_norm": 0.17582768201828003, "learning_rate": 0.002, "loss": 2.5745, "step": 126500 }, { "epoch": 0.25203605125589695, "grad_norm": 0.1993257701396942, "learning_rate": 0.002, "loss": 2.5648, "step": 126510 }, { "epoch": 0.2520559734795359, "grad_norm": 0.14284902811050415, "learning_rate": 0.002, "loss": 2.5502, "step": 126520 }, { "epoch": 0.2520758957031748, "grad_norm": 0.17108966410160065, "learning_rate": 0.002, "loss": 2.5539, "step": 126530 }, { "epoch": 0.25209581792681374, "grad_norm": 0.1665021926164627, "learning_rate": 0.002, "loss": 2.5615, "step": 126540 }, { "epoch": 0.2521157401504526, "grad_norm": 0.21824204921722412, "learning_rate": 0.002, "loss": 2.5454, "step": 126550 }, { "epoch": 0.2521356623740916, "grad_norm": 0.1582687497138977, "learning_rate": 0.002, "loss": 2.5595, "step": 126560 }, { "epoch": 0.25215558459773046, "grad_norm": 0.17593994736671448, "learning_rate": 0.002, "loss": 2.5643, "step": 126570 }, { "epoch": 0.25217550682136936, "grad_norm": 0.19192418456077576, "learning_rate": 0.002, "loss": 2.5465, "step": 126580 }, { "epoch": 0.2521954290450083, "grad_norm": 0.22542043030261993, "learning_rate": 0.002, "loss": 2.5558, "step": 126590 }, { "epoch": 0.2522153512686472, "grad_norm": 0.18576201796531677, "learning_rate": 0.002, "loss": 2.5523, "step": 126600 }, { "epoch": 0.25223527349228614, "grad_norm": 0.1817188560962677, "learning_rate": 0.002, "loss": 2.5605, "step": 126610 }, { "epoch": 0.25225519571592503, "grad_norm": 0.1648363620042801, "learning_rate": 0.002, "loss": 2.5592, "step": 126620 }, { "epoch": 0.2522751179395639, "grad_norm": 0.14018063247203827, "learning_rate": 0.002, "loss": 2.5505, "step": 126630 }, { "epoch": 0.25229504016320287, "grad_norm": 0.17934170365333557, "learning_rate": 0.002, "loss": 2.5684, "step": 126640 }, { "epoch": 0.25231496238684176, "grad_norm": 0.15921545028686523, "learning_rate": 0.002, "loss": 2.5658, "step": 126650 }, { "epoch": 0.2523348846104807, "grad_norm": 0.1662522852420807, "learning_rate": 0.002, "loss": 2.559, "step": 126660 }, { "epoch": 0.2523548068341196, "grad_norm": 0.18156442046165466, "learning_rate": 0.002, "loss": 2.5714, "step": 126670 }, { "epoch": 0.2523747290577585, "grad_norm": 0.17692820727825165, "learning_rate": 0.002, "loss": 2.5673, "step": 126680 }, { "epoch": 0.25239465128139743, "grad_norm": 0.14160332083702087, "learning_rate": 0.002, "loss": 2.5608, "step": 126690 }, { "epoch": 0.2524145735050363, "grad_norm": 0.16420605778694153, "learning_rate": 0.002, "loss": 2.5685, "step": 126700 }, { "epoch": 0.25243449572867527, "grad_norm": 0.1559978872537613, "learning_rate": 0.002, "loss": 2.5576, "step": 126710 }, { "epoch": 0.25245441795231416, "grad_norm": 0.16939067840576172, "learning_rate": 0.002, "loss": 2.5691, "step": 126720 }, { "epoch": 0.2524743401759531, "grad_norm": 0.16309642791748047, "learning_rate": 0.002, "loss": 2.5642, "step": 126730 }, { "epoch": 0.252494262399592, "grad_norm": 0.1381266713142395, "learning_rate": 0.002, "loss": 2.5679, "step": 126740 }, { "epoch": 0.2525141846232309, "grad_norm": 0.18978960812091827, "learning_rate": 0.002, "loss": 2.5681, "step": 126750 }, { "epoch": 0.25253410684686983, "grad_norm": 0.19244372844696045, "learning_rate": 0.002, "loss": 2.5494, "step": 126760 }, { "epoch": 0.2525540290705087, "grad_norm": 0.1767721176147461, "learning_rate": 0.002, "loss": 2.562, "step": 126770 }, { "epoch": 0.25257395129414767, "grad_norm": 0.16217799484729767, "learning_rate": 0.002, "loss": 2.5789, "step": 126780 }, { "epoch": 0.25259387351778656, "grad_norm": 0.15801288187503815, "learning_rate": 0.002, "loss": 2.5442, "step": 126790 }, { "epoch": 0.25261379574142545, "grad_norm": 0.1833079606294632, "learning_rate": 0.002, "loss": 2.564, "step": 126800 }, { "epoch": 0.2526337179650644, "grad_norm": 0.17259345948696136, "learning_rate": 0.002, "loss": 2.5733, "step": 126810 }, { "epoch": 0.2526536401887033, "grad_norm": 0.1776174157857895, "learning_rate": 0.002, "loss": 2.5623, "step": 126820 }, { "epoch": 0.25267356241234223, "grad_norm": 0.15741372108459473, "learning_rate": 0.002, "loss": 2.5576, "step": 126830 }, { "epoch": 0.2526934846359811, "grad_norm": 0.17657291889190674, "learning_rate": 0.002, "loss": 2.5534, "step": 126840 }, { "epoch": 0.25271340685962, "grad_norm": 0.1841297298669815, "learning_rate": 0.002, "loss": 2.5631, "step": 126850 }, { "epoch": 0.25273332908325896, "grad_norm": 0.14376066625118256, "learning_rate": 0.002, "loss": 2.5663, "step": 126860 }, { "epoch": 0.25275325130689785, "grad_norm": 0.17067661881446838, "learning_rate": 0.002, "loss": 2.538, "step": 126870 }, { "epoch": 0.2527731735305368, "grad_norm": 0.17977283895015717, "learning_rate": 0.002, "loss": 2.5529, "step": 126880 }, { "epoch": 0.2527930957541757, "grad_norm": 0.17546360194683075, "learning_rate": 0.002, "loss": 2.5686, "step": 126890 }, { "epoch": 0.25281301797781464, "grad_norm": 0.18623095750808716, "learning_rate": 0.002, "loss": 2.5588, "step": 126900 }, { "epoch": 0.2528329402014535, "grad_norm": 0.14884714782238007, "learning_rate": 0.002, "loss": 2.5673, "step": 126910 }, { "epoch": 0.2528528624250924, "grad_norm": 0.21827931702136993, "learning_rate": 0.002, "loss": 2.5631, "step": 126920 }, { "epoch": 0.25287278464873136, "grad_norm": 0.1520724594593048, "learning_rate": 0.002, "loss": 2.5566, "step": 126930 }, { "epoch": 0.25289270687237025, "grad_norm": 0.17983224987983704, "learning_rate": 0.002, "loss": 2.5519, "step": 126940 }, { "epoch": 0.2529126290960092, "grad_norm": 0.16813227534294128, "learning_rate": 0.002, "loss": 2.5632, "step": 126950 }, { "epoch": 0.2529325513196481, "grad_norm": 0.179561585187912, "learning_rate": 0.002, "loss": 2.5751, "step": 126960 }, { "epoch": 0.252952473543287, "grad_norm": 0.18694542348384857, "learning_rate": 0.002, "loss": 2.5664, "step": 126970 }, { "epoch": 0.25297239576692593, "grad_norm": 0.18540385365486145, "learning_rate": 0.002, "loss": 2.5762, "step": 126980 }, { "epoch": 0.2529923179905648, "grad_norm": 0.1394713670015335, "learning_rate": 0.002, "loss": 2.5631, "step": 126990 }, { "epoch": 0.25301224021420377, "grad_norm": 0.17798057198524475, "learning_rate": 0.002, "loss": 2.5835, "step": 127000 }, { "epoch": 0.25303216243784266, "grad_norm": 0.17264123260974884, "learning_rate": 0.002, "loss": 2.5584, "step": 127010 }, { "epoch": 0.2530520846614816, "grad_norm": 0.19604229927062988, "learning_rate": 0.002, "loss": 2.556, "step": 127020 }, { "epoch": 0.2530720068851205, "grad_norm": 0.18946492671966553, "learning_rate": 0.002, "loss": 2.5604, "step": 127030 }, { "epoch": 0.2530919291087594, "grad_norm": 0.17068953812122345, "learning_rate": 0.002, "loss": 2.5582, "step": 127040 }, { "epoch": 0.25311185133239833, "grad_norm": 0.1539221853017807, "learning_rate": 0.002, "loss": 2.5589, "step": 127050 }, { "epoch": 0.2531317735560372, "grad_norm": 0.1667182594537735, "learning_rate": 0.002, "loss": 2.5561, "step": 127060 }, { "epoch": 0.25315169577967617, "grad_norm": 0.1833382248878479, "learning_rate": 0.002, "loss": 2.5653, "step": 127070 }, { "epoch": 0.25317161800331506, "grad_norm": 0.14580687880516052, "learning_rate": 0.002, "loss": 2.5448, "step": 127080 }, { "epoch": 0.25319154022695395, "grad_norm": 0.15438692271709442, "learning_rate": 0.002, "loss": 2.5785, "step": 127090 }, { "epoch": 0.2532114624505929, "grad_norm": 0.17690593004226685, "learning_rate": 0.002, "loss": 2.5637, "step": 127100 }, { "epoch": 0.2532313846742318, "grad_norm": 0.15619467198848724, "learning_rate": 0.002, "loss": 2.5538, "step": 127110 }, { "epoch": 0.25325130689787073, "grad_norm": 0.16991497576236725, "learning_rate": 0.002, "loss": 2.5633, "step": 127120 }, { "epoch": 0.2532712291215096, "grad_norm": 0.16470688581466675, "learning_rate": 0.002, "loss": 2.5598, "step": 127130 }, { "epoch": 0.2532911513451485, "grad_norm": 0.17473505437374115, "learning_rate": 0.002, "loss": 2.56, "step": 127140 }, { "epoch": 0.25331107356878746, "grad_norm": 0.17762252688407898, "learning_rate": 0.002, "loss": 2.546, "step": 127150 }, { "epoch": 0.25333099579242635, "grad_norm": 0.14835435152053833, "learning_rate": 0.002, "loss": 2.5639, "step": 127160 }, { "epoch": 0.2533509180160653, "grad_norm": 0.15033751726150513, "learning_rate": 0.002, "loss": 2.5579, "step": 127170 }, { "epoch": 0.2533708402397042, "grad_norm": 0.1427927166223526, "learning_rate": 0.002, "loss": 2.5636, "step": 127180 }, { "epoch": 0.25339076246334313, "grad_norm": 0.1838611513376236, "learning_rate": 0.002, "loss": 2.5585, "step": 127190 }, { "epoch": 0.253410684686982, "grad_norm": 0.14260560274124146, "learning_rate": 0.002, "loss": 2.5568, "step": 127200 }, { "epoch": 0.2534306069106209, "grad_norm": 0.2212563008069992, "learning_rate": 0.002, "loss": 2.5588, "step": 127210 }, { "epoch": 0.25345052913425986, "grad_norm": 0.16325220465660095, "learning_rate": 0.002, "loss": 2.5589, "step": 127220 }, { "epoch": 0.25347045135789875, "grad_norm": 0.17510825395584106, "learning_rate": 0.002, "loss": 2.5611, "step": 127230 }, { "epoch": 0.2534903735815377, "grad_norm": 0.15328270196914673, "learning_rate": 0.002, "loss": 2.5683, "step": 127240 }, { "epoch": 0.2535102958051766, "grad_norm": 0.14847764372825623, "learning_rate": 0.002, "loss": 2.5809, "step": 127250 }, { "epoch": 0.2535302180288155, "grad_norm": 0.16165296733379364, "learning_rate": 0.002, "loss": 2.5746, "step": 127260 }, { "epoch": 0.2535501402524544, "grad_norm": 0.18412987887859344, "learning_rate": 0.002, "loss": 2.5769, "step": 127270 }, { "epoch": 0.2535700624760933, "grad_norm": 0.14967285096645355, "learning_rate": 0.002, "loss": 2.5548, "step": 127280 }, { "epoch": 0.25358998469973226, "grad_norm": 0.15864475071430206, "learning_rate": 0.002, "loss": 2.5569, "step": 127290 }, { "epoch": 0.25360990692337115, "grad_norm": 0.16001132130622864, "learning_rate": 0.002, "loss": 2.5602, "step": 127300 }, { "epoch": 0.2536298291470101, "grad_norm": 0.18475346267223358, "learning_rate": 0.002, "loss": 2.5518, "step": 127310 }, { "epoch": 0.253649751370649, "grad_norm": 0.15480093657970428, "learning_rate": 0.002, "loss": 2.5508, "step": 127320 }, { "epoch": 0.2536696735942879, "grad_norm": 0.18573659658432007, "learning_rate": 0.002, "loss": 2.5833, "step": 127330 }, { "epoch": 0.2536895958179268, "grad_norm": 0.17168289422988892, "learning_rate": 0.002, "loss": 2.5604, "step": 127340 }, { "epoch": 0.2537095180415657, "grad_norm": 0.17276056110858917, "learning_rate": 0.002, "loss": 2.5674, "step": 127350 }, { "epoch": 0.25372944026520466, "grad_norm": 0.18414422869682312, "learning_rate": 0.002, "loss": 2.5735, "step": 127360 }, { "epoch": 0.25374936248884356, "grad_norm": 0.15320970118045807, "learning_rate": 0.002, "loss": 2.5644, "step": 127370 }, { "epoch": 0.25376928471248245, "grad_norm": 0.15426166355609894, "learning_rate": 0.002, "loss": 2.5447, "step": 127380 }, { "epoch": 0.2537892069361214, "grad_norm": 0.18890774250030518, "learning_rate": 0.002, "loss": 2.5508, "step": 127390 }, { "epoch": 0.2538091291597603, "grad_norm": 0.1857513040304184, "learning_rate": 0.002, "loss": 2.5706, "step": 127400 }, { "epoch": 0.25382905138339923, "grad_norm": 0.1478116810321808, "learning_rate": 0.002, "loss": 2.5631, "step": 127410 }, { "epoch": 0.2538489736070381, "grad_norm": 0.18786081671714783, "learning_rate": 0.002, "loss": 2.5646, "step": 127420 }, { "epoch": 0.253868895830677, "grad_norm": 0.17409248650074005, "learning_rate": 0.002, "loss": 2.5654, "step": 127430 }, { "epoch": 0.25388881805431596, "grad_norm": 0.1703788787126541, "learning_rate": 0.002, "loss": 2.5719, "step": 127440 }, { "epoch": 0.25390874027795485, "grad_norm": 0.17121239006519318, "learning_rate": 0.002, "loss": 2.5679, "step": 127450 }, { "epoch": 0.2539286625015938, "grad_norm": 0.14470434188842773, "learning_rate": 0.002, "loss": 2.5589, "step": 127460 }, { "epoch": 0.2539485847252327, "grad_norm": 0.19442211091518402, "learning_rate": 0.002, "loss": 2.574, "step": 127470 }, { "epoch": 0.25396850694887163, "grad_norm": 0.17499807476997375, "learning_rate": 0.002, "loss": 2.5571, "step": 127480 }, { "epoch": 0.2539884291725105, "grad_norm": 0.1713692992925644, "learning_rate": 0.002, "loss": 2.5596, "step": 127490 }, { "epoch": 0.2540083513961494, "grad_norm": 0.1460728943347931, "learning_rate": 0.002, "loss": 2.5791, "step": 127500 }, { "epoch": 0.25402827361978836, "grad_norm": 0.17136943340301514, "learning_rate": 0.002, "loss": 2.5688, "step": 127510 }, { "epoch": 0.25404819584342725, "grad_norm": 0.1839083731174469, "learning_rate": 0.002, "loss": 2.5587, "step": 127520 }, { "epoch": 0.2540681180670662, "grad_norm": 0.18384459614753723, "learning_rate": 0.002, "loss": 2.5574, "step": 127530 }, { "epoch": 0.2540880402907051, "grad_norm": 0.15438425540924072, "learning_rate": 0.002, "loss": 2.5379, "step": 127540 }, { "epoch": 0.254107962514344, "grad_norm": 0.16105929017066956, "learning_rate": 0.002, "loss": 2.5632, "step": 127550 }, { "epoch": 0.2541278847379829, "grad_norm": 0.15523050725460052, "learning_rate": 0.002, "loss": 2.5542, "step": 127560 }, { "epoch": 0.2541478069616218, "grad_norm": 0.152109295129776, "learning_rate": 0.002, "loss": 2.5401, "step": 127570 }, { "epoch": 0.25416772918526076, "grad_norm": 0.19364267587661743, "learning_rate": 0.002, "loss": 2.5748, "step": 127580 }, { "epoch": 0.25418765140889965, "grad_norm": 0.15638351440429688, "learning_rate": 0.002, "loss": 2.557, "step": 127590 }, { "epoch": 0.2542075736325386, "grad_norm": 0.20329177379608154, "learning_rate": 0.002, "loss": 2.5541, "step": 127600 }, { "epoch": 0.2542274958561775, "grad_norm": 0.17879246175289154, "learning_rate": 0.002, "loss": 2.5755, "step": 127610 }, { "epoch": 0.2542474180798164, "grad_norm": 0.16392090916633606, "learning_rate": 0.002, "loss": 2.5598, "step": 127620 }, { "epoch": 0.2542673403034553, "grad_norm": 0.15629439055919647, "learning_rate": 0.002, "loss": 2.5686, "step": 127630 }, { "epoch": 0.2542872625270942, "grad_norm": 0.1826051026582718, "learning_rate": 0.002, "loss": 2.5537, "step": 127640 }, { "epoch": 0.25430718475073316, "grad_norm": 0.1580016016960144, "learning_rate": 0.002, "loss": 2.5719, "step": 127650 }, { "epoch": 0.25432710697437205, "grad_norm": 0.14772352576255798, "learning_rate": 0.002, "loss": 2.5522, "step": 127660 }, { "epoch": 0.25434702919801094, "grad_norm": 0.15131376683712006, "learning_rate": 0.002, "loss": 2.5792, "step": 127670 }, { "epoch": 0.2543669514216499, "grad_norm": 0.18404299020767212, "learning_rate": 0.002, "loss": 2.5753, "step": 127680 }, { "epoch": 0.2543868736452888, "grad_norm": 0.16602863371372223, "learning_rate": 0.002, "loss": 2.5582, "step": 127690 }, { "epoch": 0.2544067958689277, "grad_norm": 0.1675478219985962, "learning_rate": 0.002, "loss": 2.5639, "step": 127700 }, { "epoch": 0.2544267180925666, "grad_norm": 0.14674699306488037, "learning_rate": 0.002, "loss": 2.5531, "step": 127710 }, { "epoch": 0.2544466403162055, "grad_norm": 0.1617407500743866, "learning_rate": 0.002, "loss": 2.5633, "step": 127720 }, { "epoch": 0.25446656253984445, "grad_norm": 0.16477316617965698, "learning_rate": 0.002, "loss": 2.5491, "step": 127730 }, { "epoch": 0.25448648476348334, "grad_norm": 0.21177077293395996, "learning_rate": 0.002, "loss": 2.572, "step": 127740 }, { "epoch": 0.2545064069871223, "grad_norm": 0.16851535439491272, "learning_rate": 0.002, "loss": 2.5603, "step": 127750 }, { "epoch": 0.2545263292107612, "grad_norm": 0.15196074545383453, "learning_rate": 0.002, "loss": 2.5602, "step": 127760 }, { "epoch": 0.25454625143440013, "grad_norm": 0.17408877611160278, "learning_rate": 0.002, "loss": 2.5686, "step": 127770 }, { "epoch": 0.254566173658039, "grad_norm": 0.16909287869930267, "learning_rate": 0.002, "loss": 2.5774, "step": 127780 }, { "epoch": 0.2545860958816779, "grad_norm": 0.15396755933761597, "learning_rate": 0.002, "loss": 2.5644, "step": 127790 }, { "epoch": 0.25460601810531686, "grad_norm": 0.15928365290164948, "learning_rate": 0.002, "loss": 2.5734, "step": 127800 }, { "epoch": 0.25462594032895575, "grad_norm": 0.15322750806808472, "learning_rate": 0.002, "loss": 2.5631, "step": 127810 }, { "epoch": 0.2546458625525947, "grad_norm": 0.15906241536140442, "learning_rate": 0.002, "loss": 2.5695, "step": 127820 }, { "epoch": 0.2546657847762336, "grad_norm": 0.16838929057121277, "learning_rate": 0.002, "loss": 2.5629, "step": 127830 }, { "epoch": 0.2546857069998725, "grad_norm": 0.16909605264663696, "learning_rate": 0.002, "loss": 2.5568, "step": 127840 }, { "epoch": 0.2547056292235114, "grad_norm": 0.14908768236637115, "learning_rate": 0.002, "loss": 2.5563, "step": 127850 }, { "epoch": 0.2547255514471503, "grad_norm": 0.20219670236110687, "learning_rate": 0.002, "loss": 2.5707, "step": 127860 }, { "epoch": 0.25474547367078926, "grad_norm": 0.29515784978866577, "learning_rate": 0.002, "loss": 2.574, "step": 127870 }, { "epoch": 0.25476539589442815, "grad_norm": 0.1482856571674347, "learning_rate": 0.002, "loss": 2.5739, "step": 127880 }, { "epoch": 0.25478531811806704, "grad_norm": 0.17492328584194183, "learning_rate": 0.002, "loss": 2.5793, "step": 127890 }, { "epoch": 0.254805240341706, "grad_norm": 0.15728524327278137, "learning_rate": 0.002, "loss": 2.5714, "step": 127900 }, { "epoch": 0.2548251625653449, "grad_norm": 0.1342344731092453, "learning_rate": 0.002, "loss": 2.5755, "step": 127910 }, { "epoch": 0.2548450847889838, "grad_norm": 0.1683795154094696, "learning_rate": 0.002, "loss": 2.5687, "step": 127920 }, { "epoch": 0.2548650070126227, "grad_norm": 0.17255890369415283, "learning_rate": 0.002, "loss": 2.5555, "step": 127930 }, { "epoch": 0.25488492923626166, "grad_norm": 0.14862608909606934, "learning_rate": 0.002, "loss": 2.5571, "step": 127940 }, { "epoch": 0.25490485145990055, "grad_norm": 0.16230443120002747, "learning_rate": 0.002, "loss": 2.5543, "step": 127950 }, { "epoch": 0.25492477368353944, "grad_norm": 0.1572536826133728, "learning_rate": 0.002, "loss": 2.5637, "step": 127960 }, { "epoch": 0.2549446959071784, "grad_norm": 0.13944903016090393, "learning_rate": 0.002, "loss": 2.5685, "step": 127970 }, { "epoch": 0.2549646181308173, "grad_norm": 0.1658642292022705, "learning_rate": 0.002, "loss": 2.57, "step": 127980 }, { "epoch": 0.2549845403544562, "grad_norm": 0.17330043017864227, "learning_rate": 0.002, "loss": 2.5698, "step": 127990 }, { "epoch": 0.2550044625780951, "grad_norm": 0.16331979632377625, "learning_rate": 0.002, "loss": 2.5688, "step": 128000 }, { "epoch": 0.255024384801734, "grad_norm": 0.1988106667995453, "learning_rate": 0.002, "loss": 2.5489, "step": 128010 }, { "epoch": 0.25504430702537295, "grad_norm": 0.15916436910629272, "learning_rate": 0.002, "loss": 2.5638, "step": 128020 }, { "epoch": 0.25506422924901184, "grad_norm": 0.16616898775100708, "learning_rate": 0.002, "loss": 2.5701, "step": 128030 }, { "epoch": 0.2550841514726508, "grad_norm": 0.15503337979316711, "learning_rate": 0.002, "loss": 2.5484, "step": 128040 }, { "epoch": 0.2551040736962897, "grad_norm": 0.1514054834842682, "learning_rate": 0.002, "loss": 2.5651, "step": 128050 }, { "epoch": 0.2551239959199286, "grad_norm": 0.173799529671669, "learning_rate": 0.002, "loss": 2.5596, "step": 128060 }, { "epoch": 0.2551439181435675, "grad_norm": 0.15461517870426178, "learning_rate": 0.002, "loss": 2.5477, "step": 128070 }, { "epoch": 0.2551638403672064, "grad_norm": 0.15197105705738068, "learning_rate": 0.002, "loss": 2.5514, "step": 128080 }, { "epoch": 0.25518376259084535, "grad_norm": 0.1560031920671463, "learning_rate": 0.002, "loss": 2.5612, "step": 128090 }, { "epoch": 0.25520368481448424, "grad_norm": 0.17096872627735138, "learning_rate": 0.002, "loss": 2.5625, "step": 128100 }, { "epoch": 0.2552236070381232, "grad_norm": 0.1706695854663849, "learning_rate": 0.002, "loss": 2.5636, "step": 128110 }, { "epoch": 0.2552435292617621, "grad_norm": 0.17665353417396545, "learning_rate": 0.002, "loss": 2.5876, "step": 128120 }, { "epoch": 0.25526345148540097, "grad_norm": 0.15786772966384888, "learning_rate": 0.002, "loss": 2.5735, "step": 128130 }, { "epoch": 0.2552833737090399, "grad_norm": 0.17566367983818054, "learning_rate": 0.002, "loss": 2.5837, "step": 128140 }, { "epoch": 0.2553032959326788, "grad_norm": 0.15878121554851532, "learning_rate": 0.002, "loss": 2.5768, "step": 128150 }, { "epoch": 0.25532321815631775, "grad_norm": 0.1781301349401474, "learning_rate": 0.002, "loss": 2.5567, "step": 128160 }, { "epoch": 0.25534314037995665, "grad_norm": 0.19105662405490875, "learning_rate": 0.002, "loss": 2.5554, "step": 128170 }, { "epoch": 0.25536306260359554, "grad_norm": 0.16448059678077698, "learning_rate": 0.002, "loss": 2.5681, "step": 128180 }, { "epoch": 0.2553829848272345, "grad_norm": 0.23814824223518372, "learning_rate": 0.002, "loss": 2.5774, "step": 128190 }, { "epoch": 0.2554029070508734, "grad_norm": 0.13925328850746155, "learning_rate": 0.002, "loss": 2.5763, "step": 128200 }, { "epoch": 0.2554228292745123, "grad_norm": 0.1531752049922943, "learning_rate": 0.002, "loss": 2.5606, "step": 128210 }, { "epoch": 0.2554427514981512, "grad_norm": 0.15527299046516418, "learning_rate": 0.002, "loss": 2.5866, "step": 128220 }, { "epoch": 0.25546267372179016, "grad_norm": 0.15251897275447845, "learning_rate": 0.002, "loss": 2.564, "step": 128230 }, { "epoch": 0.25548259594542905, "grad_norm": 0.16185522079467773, "learning_rate": 0.002, "loss": 2.5567, "step": 128240 }, { "epoch": 0.25550251816906794, "grad_norm": 0.1904333531856537, "learning_rate": 0.002, "loss": 2.5541, "step": 128250 }, { "epoch": 0.2555224403927069, "grad_norm": 0.16993564367294312, "learning_rate": 0.002, "loss": 2.5709, "step": 128260 }, { "epoch": 0.2555423626163458, "grad_norm": 0.17632363736629486, "learning_rate": 0.002, "loss": 2.5589, "step": 128270 }, { "epoch": 0.2555622848399847, "grad_norm": 0.1731853485107422, "learning_rate": 0.002, "loss": 2.5712, "step": 128280 }, { "epoch": 0.2555822070636236, "grad_norm": 0.172820046544075, "learning_rate": 0.002, "loss": 2.5562, "step": 128290 }, { "epoch": 0.2556021292872625, "grad_norm": 0.16567769646644592, "learning_rate": 0.002, "loss": 2.5669, "step": 128300 }, { "epoch": 0.25562205151090145, "grad_norm": 0.19755485653877258, "learning_rate": 0.002, "loss": 2.5655, "step": 128310 }, { "epoch": 0.25564197373454034, "grad_norm": 0.15535816550254822, "learning_rate": 0.002, "loss": 2.5662, "step": 128320 }, { "epoch": 0.2556618959581793, "grad_norm": 0.23302516341209412, "learning_rate": 0.002, "loss": 2.563, "step": 128330 }, { "epoch": 0.2556818181818182, "grad_norm": 0.17676235735416412, "learning_rate": 0.002, "loss": 2.5678, "step": 128340 }, { "epoch": 0.2557017404054571, "grad_norm": 0.1650887429714203, "learning_rate": 0.002, "loss": 2.5633, "step": 128350 }, { "epoch": 0.255721662629096, "grad_norm": 0.18136730790138245, "learning_rate": 0.002, "loss": 2.5643, "step": 128360 }, { "epoch": 0.2557415848527349, "grad_norm": 0.1786692887544632, "learning_rate": 0.002, "loss": 2.562, "step": 128370 }, { "epoch": 0.25576150707637385, "grad_norm": 0.15647125244140625, "learning_rate": 0.002, "loss": 2.5532, "step": 128380 }, { "epoch": 0.25578142930001274, "grad_norm": 0.18778164684772491, "learning_rate": 0.002, "loss": 2.5607, "step": 128390 }, { "epoch": 0.2558013515236517, "grad_norm": 0.156102254986763, "learning_rate": 0.002, "loss": 2.5534, "step": 128400 }, { "epoch": 0.2558212737472906, "grad_norm": 0.14999079704284668, "learning_rate": 0.002, "loss": 2.5658, "step": 128410 }, { "epoch": 0.25584119597092947, "grad_norm": 0.2241174429655075, "learning_rate": 0.002, "loss": 2.5713, "step": 128420 }, { "epoch": 0.2558611181945684, "grad_norm": 0.14332133531570435, "learning_rate": 0.002, "loss": 2.5556, "step": 128430 }, { "epoch": 0.2558810404182073, "grad_norm": 0.16738013923168182, "learning_rate": 0.002, "loss": 2.5776, "step": 128440 }, { "epoch": 0.25590096264184625, "grad_norm": 0.1519709974527359, "learning_rate": 0.002, "loss": 2.5603, "step": 128450 }, { "epoch": 0.25592088486548514, "grad_norm": 0.17616838216781616, "learning_rate": 0.002, "loss": 2.5719, "step": 128460 }, { "epoch": 0.25594080708912403, "grad_norm": 0.18009132146835327, "learning_rate": 0.002, "loss": 2.5697, "step": 128470 }, { "epoch": 0.255960729312763, "grad_norm": 0.1868477463722229, "learning_rate": 0.002, "loss": 2.5612, "step": 128480 }, { "epoch": 0.25598065153640187, "grad_norm": 0.16108758747577667, "learning_rate": 0.002, "loss": 2.5729, "step": 128490 }, { "epoch": 0.2560005737600408, "grad_norm": 0.15857990086078644, "learning_rate": 0.002, "loss": 2.5677, "step": 128500 }, { "epoch": 0.2560204959836797, "grad_norm": 0.16789878904819489, "learning_rate": 0.002, "loss": 2.5661, "step": 128510 }, { "epoch": 0.25604041820731865, "grad_norm": 0.16304507851600647, "learning_rate": 0.002, "loss": 2.5711, "step": 128520 }, { "epoch": 0.25606034043095754, "grad_norm": 0.20612859725952148, "learning_rate": 0.002, "loss": 2.5684, "step": 128530 }, { "epoch": 0.25608026265459644, "grad_norm": 0.16530463099479675, "learning_rate": 0.002, "loss": 2.5607, "step": 128540 }, { "epoch": 0.2561001848782354, "grad_norm": 0.14214256405830383, "learning_rate": 0.002, "loss": 2.5578, "step": 128550 }, { "epoch": 0.2561201071018743, "grad_norm": 0.19173531234264374, "learning_rate": 0.002, "loss": 2.5837, "step": 128560 }, { "epoch": 0.2561400293255132, "grad_norm": 0.1519676297903061, "learning_rate": 0.002, "loss": 2.5579, "step": 128570 }, { "epoch": 0.2561599515491521, "grad_norm": 0.16953545808792114, "learning_rate": 0.002, "loss": 2.5608, "step": 128580 }, { "epoch": 0.256179873772791, "grad_norm": 0.17193542420864105, "learning_rate": 0.002, "loss": 2.5746, "step": 128590 }, { "epoch": 0.25619979599642995, "grad_norm": 0.1653844267129898, "learning_rate": 0.002, "loss": 2.5793, "step": 128600 }, { "epoch": 0.25621971822006884, "grad_norm": 0.15332582592964172, "learning_rate": 0.002, "loss": 2.5639, "step": 128610 }, { "epoch": 0.2562396404437078, "grad_norm": 0.20384839177131653, "learning_rate": 0.002, "loss": 2.5701, "step": 128620 }, { "epoch": 0.2562595626673467, "grad_norm": 0.15545764565467834, "learning_rate": 0.002, "loss": 2.579, "step": 128630 }, { "epoch": 0.25627948489098556, "grad_norm": 0.17422552406787872, "learning_rate": 0.002, "loss": 2.5543, "step": 128640 }, { "epoch": 0.2562994071146245, "grad_norm": 0.16503477096557617, "learning_rate": 0.002, "loss": 2.5435, "step": 128650 }, { "epoch": 0.2563193293382634, "grad_norm": 0.21138638257980347, "learning_rate": 0.002, "loss": 2.5634, "step": 128660 }, { "epoch": 0.25633925156190235, "grad_norm": 0.16682986915111542, "learning_rate": 0.002, "loss": 2.5765, "step": 128670 }, { "epoch": 0.25635917378554124, "grad_norm": 0.14841221272945404, "learning_rate": 0.002, "loss": 2.563, "step": 128680 }, { "epoch": 0.2563790960091802, "grad_norm": 0.18961049616336823, "learning_rate": 0.002, "loss": 2.5763, "step": 128690 }, { "epoch": 0.2563990182328191, "grad_norm": 0.1820845901966095, "learning_rate": 0.002, "loss": 2.5481, "step": 128700 }, { "epoch": 0.25641894045645797, "grad_norm": 0.1761098951101303, "learning_rate": 0.002, "loss": 2.5488, "step": 128710 }, { "epoch": 0.2564388626800969, "grad_norm": 0.15295955538749695, "learning_rate": 0.002, "loss": 2.5642, "step": 128720 }, { "epoch": 0.2564587849037358, "grad_norm": 0.547760009765625, "learning_rate": 0.002, "loss": 2.5689, "step": 128730 }, { "epoch": 0.25647870712737475, "grad_norm": 0.15736927092075348, "learning_rate": 0.002, "loss": 2.5648, "step": 128740 }, { "epoch": 0.25649862935101364, "grad_norm": 0.1384267956018448, "learning_rate": 0.002, "loss": 2.5772, "step": 128750 }, { "epoch": 0.25651855157465253, "grad_norm": 0.1601848602294922, "learning_rate": 0.002, "loss": 2.5607, "step": 128760 }, { "epoch": 0.2565384737982915, "grad_norm": 0.17504502832889557, "learning_rate": 0.002, "loss": 2.5709, "step": 128770 }, { "epoch": 0.25655839602193037, "grad_norm": 0.1623956263065338, "learning_rate": 0.002, "loss": 2.5626, "step": 128780 }, { "epoch": 0.2565783182455693, "grad_norm": 0.17092522978782654, "learning_rate": 0.002, "loss": 2.5796, "step": 128790 }, { "epoch": 0.2565982404692082, "grad_norm": 0.1791975349187851, "learning_rate": 0.002, "loss": 2.5505, "step": 128800 }, { "epoch": 0.25661816269284715, "grad_norm": 0.17087578773498535, "learning_rate": 0.002, "loss": 2.5615, "step": 128810 }, { "epoch": 0.25663808491648604, "grad_norm": 0.15919683873653412, "learning_rate": 0.002, "loss": 2.5664, "step": 128820 }, { "epoch": 0.25665800714012493, "grad_norm": 0.1455489844083786, "learning_rate": 0.002, "loss": 2.5731, "step": 128830 }, { "epoch": 0.2566779293637639, "grad_norm": 0.1635754555463791, "learning_rate": 0.002, "loss": 2.5687, "step": 128840 }, { "epoch": 0.25669785158740277, "grad_norm": 0.18221503496170044, "learning_rate": 0.002, "loss": 2.5739, "step": 128850 }, { "epoch": 0.2567177738110417, "grad_norm": 0.1815904676914215, "learning_rate": 0.002, "loss": 2.5614, "step": 128860 }, { "epoch": 0.2567376960346806, "grad_norm": 0.16180048882961273, "learning_rate": 0.002, "loss": 2.5374, "step": 128870 }, { "epoch": 0.2567576182583195, "grad_norm": 0.164592906832695, "learning_rate": 0.002, "loss": 2.5666, "step": 128880 }, { "epoch": 0.25677754048195844, "grad_norm": 0.16645382344722748, "learning_rate": 0.002, "loss": 2.5642, "step": 128890 }, { "epoch": 0.25679746270559733, "grad_norm": 0.21401625871658325, "learning_rate": 0.002, "loss": 2.5642, "step": 128900 }, { "epoch": 0.2568173849292363, "grad_norm": 0.1472158581018448, "learning_rate": 0.002, "loss": 2.5611, "step": 128910 }, { "epoch": 0.25683730715287517, "grad_norm": 0.14322173595428467, "learning_rate": 0.002, "loss": 2.5736, "step": 128920 }, { "epoch": 0.25685722937651406, "grad_norm": 0.21846264600753784, "learning_rate": 0.002, "loss": 2.5632, "step": 128930 }, { "epoch": 0.256877151600153, "grad_norm": 0.18156664073467255, "learning_rate": 0.002, "loss": 2.5675, "step": 128940 }, { "epoch": 0.2568970738237919, "grad_norm": 0.1733412742614746, "learning_rate": 0.002, "loss": 2.5912, "step": 128950 }, { "epoch": 0.25691699604743085, "grad_norm": 0.18368932604789734, "learning_rate": 0.002, "loss": 2.5496, "step": 128960 }, { "epoch": 0.25693691827106974, "grad_norm": 0.17108315229415894, "learning_rate": 0.002, "loss": 2.5595, "step": 128970 }, { "epoch": 0.2569568404947087, "grad_norm": 0.1773134171962738, "learning_rate": 0.002, "loss": 2.572, "step": 128980 }, { "epoch": 0.2569767627183476, "grad_norm": 0.1813167780637741, "learning_rate": 0.002, "loss": 2.5565, "step": 128990 }, { "epoch": 0.25699668494198646, "grad_norm": 0.1606154441833496, "learning_rate": 0.002, "loss": 2.5584, "step": 129000 }, { "epoch": 0.2570166071656254, "grad_norm": 0.16689568758010864, "learning_rate": 0.002, "loss": 2.5592, "step": 129010 }, { "epoch": 0.2570365293892643, "grad_norm": 0.15871863067150116, "learning_rate": 0.002, "loss": 2.5478, "step": 129020 }, { "epoch": 0.25705645161290325, "grad_norm": 0.17505361139774323, "learning_rate": 0.002, "loss": 2.5789, "step": 129030 }, { "epoch": 0.25707637383654214, "grad_norm": 0.2150108516216278, "learning_rate": 0.002, "loss": 2.5589, "step": 129040 }, { "epoch": 0.25709629606018103, "grad_norm": 0.17074711620807648, "learning_rate": 0.002, "loss": 2.5618, "step": 129050 }, { "epoch": 0.25711621828382, "grad_norm": 0.1502232700586319, "learning_rate": 0.002, "loss": 2.5629, "step": 129060 }, { "epoch": 0.25713614050745887, "grad_norm": 0.18930232524871826, "learning_rate": 0.002, "loss": 2.5609, "step": 129070 }, { "epoch": 0.2571560627310978, "grad_norm": 0.16889388859272003, "learning_rate": 0.002, "loss": 2.5818, "step": 129080 }, { "epoch": 0.2571759849547367, "grad_norm": 0.1556578278541565, "learning_rate": 0.002, "loss": 2.5699, "step": 129090 }, { "epoch": 0.25719590717837565, "grad_norm": 0.20298123359680176, "learning_rate": 0.002, "loss": 2.5539, "step": 129100 }, { "epoch": 0.25721582940201454, "grad_norm": 0.17487309873104095, "learning_rate": 0.002, "loss": 2.5726, "step": 129110 }, { "epoch": 0.25723575162565343, "grad_norm": 0.1488332450389862, "learning_rate": 0.002, "loss": 2.5552, "step": 129120 }, { "epoch": 0.2572556738492924, "grad_norm": 0.18138666450977325, "learning_rate": 0.002, "loss": 2.5667, "step": 129130 }, { "epoch": 0.25727559607293127, "grad_norm": 0.19362957775592804, "learning_rate": 0.002, "loss": 2.5635, "step": 129140 }, { "epoch": 0.2572955182965702, "grad_norm": 0.16469702124595642, "learning_rate": 0.002, "loss": 2.5619, "step": 129150 }, { "epoch": 0.2573154405202091, "grad_norm": 0.1515527367591858, "learning_rate": 0.002, "loss": 2.5711, "step": 129160 }, { "epoch": 0.257335362743848, "grad_norm": 0.1759643405675888, "learning_rate": 0.002, "loss": 2.5481, "step": 129170 }, { "epoch": 0.25735528496748694, "grad_norm": 0.12931205332279205, "learning_rate": 0.002, "loss": 2.5597, "step": 129180 }, { "epoch": 0.25737520719112583, "grad_norm": 0.1584039181470871, "learning_rate": 0.002, "loss": 2.5619, "step": 129190 }, { "epoch": 0.2573951294147648, "grad_norm": 0.14777781069278717, "learning_rate": 0.002, "loss": 2.5529, "step": 129200 }, { "epoch": 0.25741505163840367, "grad_norm": 0.18901635706424713, "learning_rate": 0.002, "loss": 2.5577, "step": 129210 }, { "epoch": 0.25743497386204256, "grad_norm": 0.1800132542848587, "learning_rate": 0.002, "loss": 2.5631, "step": 129220 }, { "epoch": 0.2574548960856815, "grad_norm": 0.14321382343769073, "learning_rate": 0.002, "loss": 2.5596, "step": 129230 }, { "epoch": 0.2574748183093204, "grad_norm": 0.1612631231546402, "learning_rate": 0.002, "loss": 2.5577, "step": 129240 }, { "epoch": 0.25749474053295934, "grad_norm": 0.1778447926044464, "learning_rate": 0.002, "loss": 2.5622, "step": 129250 }, { "epoch": 0.25751466275659823, "grad_norm": 0.1607654094696045, "learning_rate": 0.002, "loss": 2.5636, "step": 129260 }, { "epoch": 0.2575345849802372, "grad_norm": 0.19056327641010284, "learning_rate": 0.002, "loss": 2.5768, "step": 129270 }, { "epoch": 0.25755450720387607, "grad_norm": 0.142116978764534, "learning_rate": 0.002, "loss": 2.5703, "step": 129280 }, { "epoch": 0.25757442942751496, "grad_norm": 0.16511505842208862, "learning_rate": 0.002, "loss": 2.5645, "step": 129290 }, { "epoch": 0.2575943516511539, "grad_norm": 0.16109099984169006, "learning_rate": 0.002, "loss": 2.5525, "step": 129300 }, { "epoch": 0.2576142738747928, "grad_norm": 0.214262917637825, "learning_rate": 0.002, "loss": 2.5675, "step": 129310 }, { "epoch": 0.25763419609843174, "grad_norm": 0.1888698786497116, "learning_rate": 0.002, "loss": 2.5825, "step": 129320 }, { "epoch": 0.25765411832207064, "grad_norm": 0.16465893387794495, "learning_rate": 0.002, "loss": 2.5622, "step": 129330 }, { "epoch": 0.2576740405457095, "grad_norm": 0.16866962611675262, "learning_rate": 0.002, "loss": 2.5719, "step": 129340 }, { "epoch": 0.2576939627693485, "grad_norm": 0.18893329799175262, "learning_rate": 0.002, "loss": 2.5528, "step": 129350 }, { "epoch": 0.25771388499298736, "grad_norm": 0.1680012196302414, "learning_rate": 0.002, "loss": 2.5677, "step": 129360 }, { "epoch": 0.2577338072166263, "grad_norm": 0.1557118445634842, "learning_rate": 0.002, "loss": 2.5438, "step": 129370 }, { "epoch": 0.2577537294402652, "grad_norm": 0.19477520883083344, "learning_rate": 0.002, "loss": 2.5546, "step": 129380 }, { "epoch": 0.2577736516639041, "grad_norm": 0.15645667910575867, "learning_rate": 0.002, "loss": 2.5686, "step": 129390 }, { "epoch": 0.25779357388754304, "grad_norm": 0.17845040559768677, "learning_rate": 0.002, "loss": 2.5571, "step": 129400 }, { "epoch": 0.25781349611118193, "grad_norm": 0.1656326949596405, "learning_rate": 0.002, "loss": 2.5687, "step": 129410 }, { "epoch": 0.2578334183348209, "grad_norm": 0.17670688033103943, "learning_rate": 0.002, "loss": 2.5595, "step": 129420 }, { "epoch": 0.25785334055845976, "grad_norm": 0.19714108109474182, "learning_rate": 0.002, "loss": 2.5808, "step": 129430 }, { "epoch": 0.2578732627820987, "grad_norm": 0.18843674659729004, "learning_rate": 0.002, "loss": 2.5547, "step": 129440 }, { "epoch": 0.2578931850057376, "grad_norm": 0.1722341924905777, "learning_rate": 0.002, "loss": 2.5778, "step": 129450 }, { "epoch": 0.2579131072293765, "grad_norm": 0.19218453764915466, "learning_rate": 0.002, "loss": 2.5582, "step": 129460 }, { "epoch": 0.25793302945301544, "grad_norm": 0.19798049330711365, "learning_rate": 0.002, "loss": 2.5641, "step": 129470 }, { "epoch": 0.25795295167665433, "grad_norm": 0.15652675926685333, "learning_rate": 0.002, "loss": 2.5697, "step": 129480 }, { "epoch": 0.2579728739002933, "grad_norm": 0.1704174131155014, "learning_rate": 0.002, "loss": 2.561, "step": 129490 }, { "epoch": 0.25799279612393217, "grad_norm": 0.1579539179801941, "learning_rate": 0.002, "loss": 2.5701, "step": 129500 }, { "epoch": 0.25801271834757106, "grad_norm": 0.20920568704605103, "learning_rate": 0.002, "loss": 2.5688, "step": 129510 }, { "epoch": 0.25803264057121, "grad_norm": 0.17250953614711761, "learning_rate": 0.002, "loss": 2.5698, "step": 129520 }, { "epoch": 0.2580525627948489, "grad_norm": 0.14396674931049347, "learning_rate": 0.002, "loss": 2.55, "step": 129530 }, { "epoch": 0.25807248501848784, "grad_norm": 0.15893211960792542, "learning_rate": 0.002, "loss": 2.5477, "step": 129540 }, { "epoch": 0.25809240724212673, "grad_norm": 0.1544836163520813, "learning_rate": 0.002, "loss": 2.5833, "step": 129550 }, { "epoch": 0.2581123294657657, "grad_norm": 0.25714218616485596, "learning_rate": 0.002, "loss": 2.569, "step": 129560 }, { "epoch": 0.25813225168940457, "grad_norm": 0.17745962738990784, "learning_rate": 0.002, "loss": 2.5735, "step": 129570 }, { "epoch": 0.25815217391304346, "grad_norm": 0.14331765472888947, "learning_rate": 0.002, "loss": 2.5629, "step": 129580 }, { "epoch": 0.2581720961366824, "grad_norm": 0.16253772377967834, "learning_rate": 0.002, "loss": 2.5607, "step": 129590 }, { "epoch": 0.2581920183603213, "grad_norm": 0.15319539606571198, "learning_rate": 0.002, "loss": 2.5675, "step": 129600 }, { "epoch": 0.25821194058396024, "grad_norm": 0.1654764711856842, "learning_rate": 0.002, "loss": 2.5547, "step": 129610 }, { "epoch": 0.25823186280759913, "grad_norm": 0.15954138338565826, "learning_rate": 0.002, "loss": 2.5716, "step": 129620 }, { "epoch": 0.258251785031238, "grad_norm": 0.12987597286701202, "learning_rate": 0.002, "loss": 2.5682, "step": 129630 }, { "epoch": 0.25827170725487697, "grad_norm": 0.1656024307012558, "learning_rate": 0.002, "loss": 2.5594, "step": 129640 }, { "epoch": 0.25829162947851586, "grad_norm": 0.1641569584608078, "learning_rate": 0.002, "loss": 2.5513, "step": 129650 }, { "epoch": 0.2583115517021548, "grad_norm": 0.19166065752506256, "learning_rate": 0.002, "loss": 2.5657, "step": 129660 }, { "epoch": 0.2583314739257937, "grad_norm": 0.15702411532402039, "learning_rate": 0.002, "loss": 2.5722, "step": 129670 }, { "epoch": 0.2583513961494326, "grad_norm": 0.1659289002418518, "learning_rate": 0.002, "loss": 2.5546, "step": 129680 }, { "epoch": 0.25837131837307153, "grad_norm": 0.19311070442199707, "learning_rate": 0.002, "loss": 2.5524, "step": 129690 }, { "epoch": 0.2583912405967104, "grad_norm": 0.15112197399139404, "learning_rate": 0.002, "loss": 2.5556, "step": 129700 }, { "epoch": 0.25841116282034937, "grad_norm": 0.16109953820705414, "learning_rate": 0.002, "loss": 2.5639, "step": 129710 }, { "epoch": 0.25843108504398826, "grad_norm": 0.16230838000774384, "learning_rate": 0.002, "loss": 2.5762, "step": 129720 }, { "epoch": 0.2584510072676272, "grad_norm": 0.15161505341529846, "learning_rate": 0.002, "loss": 2.5599, "step": 129730 }, { "epoch": 0.2584709294912661, "grad_norm": 0.23299306631088257, "learning_rate": 0.002, "loss": 2.5649, "step": 129740 }, { "epoch": 0.258490851714905, "grad_norm": 0.13538774847984314, "learning_rate": 0.002, "loss": 2.5792, "step": 129750 }, { "epoch": 0.25851077393854394, "grad_norm": 0.164861261844635, "learning_rate": 0.002, "loss": 2.5602, "step": 129760 }, { "epoch": 0.2585306961621828, "grad_norm": 0.16294224560260773, "learning_rate": 0.002, "loss": 2.5486, "step": 129770 }, { "epoch": 0.2585506183858218, "grad_norm": 0.168387308716774, "learning_rate": 0.002, "loss": 2.5745, "step": 129780 }, { "epoch": 0.25857054060946066, "grad_norm": 0.1495293825864792, "learning_rate": 0.002, "loss": 2.575, "step": 129790 }, { "epoch": 0.25859046283309955, "grad_norm": 0.1633681058883667, "learning_rate": 0.002, "loss": 2.5785, "step": 129800 }, { "epoch": 0.2586103850567385, "grad_norm": 0.17125003039836884, "learning_rate": 0.002, "loss": 2.5816, "step": 129810 }, { "epoch": 0.2586303072803774, "grad_norm": 0.1766221523284912, "learning_rate": 0.002, "loss": 2.5723, "step": 129820 }, { "epoch": 0.25865022950401634, "grad_norm": 0.20854420959949493, "learning_rate": 0.002, "loss": 2.558, "step": 129830 }, { "epoch": 0.25867015172765523, "grad_norm": 0.1876886487007141, "learning_rate": 0.002, "loss": 2.5563, "step": 129840 }, { "epoch": 0.2586900739512942, "grad_norm": 0.1944417953491211, "learning_rate": 0.002, "loss": 2.5612, "step": 129850 }, { "epoch": 0.25870999617493307, "grad_norm": 0.15296372771263123, "learning_rate": 0.002, "loss": 2.5581, "step": 129860 }, { "epoch": 0.25872991839857196, "grad_norm": 0.20531611144542694, "learning_rate": 0.002, "loss": 2.5657, "step": 129870 }, { "epoch": 0.2587498406222109, "grad_norm": 0.1481611281633377, "learning_rate": 0.002, "loss": 2.5512, "step": 129880 }, { "epoch": 0.2587697628458498, "grad_norm": 0.14465460181236267, "learning_rate": 0.002, "loss": 2.5661, "step": 129890 }, { "epoch": 0.25878968506948874, "grad_norm": 0.15912450850009918, "learning_rate": 0.002, "loss": 2.5687, "step": 129900 }, { "epoch": 0.25880960729312763, "grad_norm": 0.1738116294145584, "learning_rate": 0.002, "loss": 2.5651, "step": 129910 }, { "epoch": 0.2588295295167665, "grad_norm": 0.16269570589065552, "learning_rate": 0.002, "loss": 2.5616, "step": 129920 }, { "epoch": 0.25884945174040547, "grad_norm": 0.15850527584552765, "learning_rate": 0.002, "loss": 2.5598, "step": 129930 }, { "epoch": 0.25886937396404436, "grad_norm": 0.17756573855876923, "learning_rate": 0.002, "loss": 2.5651, "step": 129940 }, { "epoch": 0.2588892961876833, "grad_norm": 0.1729932576417923, "learning_rate": 0.002, "loss": 2.5605, "step": 129950 }, { "epoch": 0.2589092184113222, "grad_norm": 0.17098842561244965, "learning_rate": 0.002, "loss": 2.5667, "step": 129960 }, { "epoch": 0.2589291406349611, "grad_norm": 0.16217422485351562, "learning_rate": 0.002, "loss": 2.5654, "step": 129970 }, { "epoch": 0.25894906285860003, "grad_norm": 0.1848951280117035, "learning_rate": 0.002, "loss": 2.5634, "step": 129980 }, { "epoch": 0.2589689850822389, "grad_norm": 0.26784560084342957, "learning_rate": 0.002, "loss": 2.5622, "step": 129990 }, { "epoch": 0.25898890730587787, "grad_norm": 0.1876213550567627, "learning_rate": 0.002, "loss": 2.5699, "step": 130000 }, { "epoch": 0.25900882952951676, "grad_norm": 0.16688886284828186, "learning_rate": 0.002, "loss": 2.5612, "step": 130010 }, { "epoch": 0.2590287517531557, "grad_norm": 0.17293858528137207, "learning_rate": 0.002, "loss": 2.5534, "step": 130020 }, { "epoch": 0.2590486739767946, "grad_norm": 0.16622279584407806, "learning_rate": 0.002, "loss": 2.5606, "step": 130030 }, { "epoch": 0.2590685962004335, "grad_norm": 0.1768999695777893, "learning_rate": 0.002, "loss": 2.5957, "step": 130040 }, { "epoch": 0.25908851842407243, "grad_norm": 0.1696961224079132, "learning_rate": 0.002, "loss": 2.5742, "step": 130050 }, { "epoch": 0.2591084406477113, "grad_norm": 0.2008729875087738, "learning_rate": 0.002, "loss": 2.5465, "step": 130060 }, { "epoch": 0.25912836287135027, "grad_norm": 0.15342211723327637, "learning_rate": 0.002, "loss": 2.5632, "step": 130070 }, { "epoch": 0.25914828509498916, "grad_norm": 0.15867255628108978, "learning_rate": 0.002, "loss": 2.5573, "step": 130080 }, { "epoch": 0.25916820731862805, "grad_norm": 0.164998859167099, "learning_rate": 0.002, "loss": 2.5538, "step": 130090 }, { "epoch": 0.259188129542267, "grad_norm": 0.2010193169116974, "learning_rate": 0.002, "loss": 2.5678, "step": 130100 }, { "epoch": 0.2592080517659059, "grad_norm": 0.14947807788848877, "learning_rate": 0.002, "loss": 2.5524, "step": 130110 }, { "epoch": 0.25922797398954484, "grad_norm": 0.20210230350494385, "learning_rate": 0.002, "loss": 2.5721, "step": 130120 }, { "epoch": 0.2592478962131837, "grad_norm": 0.15481877326965332, "learning_rate": 0.002, "loss": 2.5579, "step": 130130 }, { "epoch": 0.2592678184368226, "grad_norm": 0.15714696049690247, "learning_rate": 0.002, "loss": 2.5559, "step": 130140 }, { "epoch": 0.25928774066046156, "grad_norm": 0.19997213780879974, "learning_rate": 0.002, "loss": 2.5727, "step": 130150 }, { "epoch": 0.25930766288410045, "grad_norm": 0.17362132668495178, "learning_rate": 0.002, "loss": 2.5546, "step": 130160 }, { "epoch": 0.2593275851077394, "grad_norm": 0.15263591706752777, "learning_rate": 0.002, "loss": 2.5695, "step": 130170 }, { "epoch": 0.2593475073313783, "grad_norm": 0.18040794134140015, "learning_rate": 0.002, "loss": 2.5716, "step": 130180 }, { "epoch": 0.25936742955501724, "grad_norm": 0.1575922966003418, "learning_rate": 0.002, "loss": 2.5816, "step": 130190 }, { "epoch": 0.25938735177865613, "grad_norm": 0.1914416402578354, "learning_rate": 0.002, "loss": 2.5496, "step": 130200 }, { "epoch": 0.259407274002295, "grad_norm": 0.16792011260986328, "learning_rate": 0.002, "loss": 2.5566, "step": 130210 }, { "epoch": 0.25942719622593396, "grad_norm": 0.19718657433986664, "learning_rate": 0.002, "loss": 2.5562, "step": 130220 }, { "epoch": 0.25944711844957286, "grad_norm": 0.14970768988132477, "learning_rate": 0.002, "loss": 2.5618, "step": 130230 }, { "epoch": 0.2594670406732118, "grad_norm": 0.1625363975763321, "learning_rate": 0.002, "loss": 2.555, "step": 130240 }, { "epoch": 0.2594869628968507, "grad_norm": 0.16912831366062164, "learning_rate": 0.002, "loss": 2.5622, "step": 130250 }, { "epoch": 0.2595068851204896, "grad_norm": 0.16571499407291412, "learning_rate": 0.002, "loss": 2.5613, "step": 130260 }, { "epoch": 0.25952680734412853, "grad_norm": 0.15543103218078613, "learning_rate": 0.002, "loss": 2.56, "step": 130270 }, { "epoch": 0.2595467295677674, "grad_norm": 0.15915510058403015, "learning_rate": 0.002, "loss": 2.5672, "step": 130280 }, { "epoch": 0.25956665179140637, "grad_norm": 0.17617009580135345, "learning_rate": 0.002, "loss": 2.558, "step": 130290 }, { "epoch": 0.25958657401504526, "grad_norm": 0.18902641534805298, "learning_rate": 0.002, "loss": 2.553, "step": 130300 }, { "epoch": 0.2596064962386842, "grad_norm": 0.17654162645339966, "learning_rate": 0.002, "loss": 2.5657, "step": 130310 }, { "epoch": 0.2596264184623231, "grad_norm": 0.15783703327178955, "learning_rate": 0.002, "loss": 2.5499, "step": 130320 }, { "epoch": 0.259646340685962, "grad_norm": 0.1747649759054184, "learning_rate": 0.002, "loss": 2.5567, "step": 130330 }, { "epoch": 0.25966626290960093, "grad_norm": 0.1538393646478653, "learning_rate": 0.002, "loss": 2.5701, "step": 130340 }, { "epoch": 0.2596861851332398, "grad_norm": 0.26825082302093506, "learning_rate": 0.002, "loss": 2.5646, "step": 130350 }, { "epoch": 0.25970610735687877, "grad_norm": 0.1614643633365631, "learning_rate": 0.002, "loss": 2.5585, "step": 130360 }, { "epoch": 0.25972602958051766, "grad_norm": 0.1356123983860016, "learning_rate": 0.002, "loss": 2.5716, "step": 130370 }, { "epoch": 0.25974595180415655, "grad_norm": 0.14970599114894867, "learning_rate": 0.002, "loss": 2.5504, "step": 130380 }, { "epoch": 0.2597658740277955, "grad_norm": 0.18343383073806763, "learning_rate": 0.002, "loss": 2.5615, "step": 130390 }, { "epoch": 0.2597857962514344, "grad_norm": 0.16735857725143433, "learning_rate": 0.002, "loss": 2.562, "step": 130400 }, { "epoch": 0.25980571847507333, "grad_norm": 0.16897089779376984, "learning_rate": 0.002, "loss": 2.5628, "step": 130410 }, { "epoch": 0.2598256406987122, "grad_norm": 0.15689681470394135, "learning_rate": 0.002, "loss": 2.558, "step": 130420 }, { "epoch": 0.2598455629223511, "grad_norm": 0.17144279181957245, "learning_rate": 0.002, "loss": 2.5558, "step": 130430 }, { "epoch": 0.25986548514599006, "grad_norm": 0.196544349193573, "learning_rate": 0.002, "loss": 2.5524, "step": 130440 }, { "epoch": 0.25988540736962895, "grad_norm": 0.20455598831176758, "learning_rate": 0.002, "loss": 2.554, "step": 130450 }, { "epoch": 0.2599053295932679, "grad_norm": 0.15817256271839142, "learning_rate": 0.002, "loss": 2.5668, "step": 130460 }, { "epoch": 0.2599252518169068, "grad_norm": 0.17085060477256775, "learning_rate": 0.002, "loss": 2.5684, "step": 130470 }, { "epoch": 0.25994517404054573, "grad_norm": 0.15328951179981232, "learning_rate": 0.002, "loss": 2.5787, "step": 130480 }, { "epoch": 0.2599650962641846, "grad_norm": 0.15658125281333923, "learning_rate": 0.002, "loss": 2.5816, "step": 130490 }, { "epoch": 0.2599850184878235, "grad_norm": 0.1653231382369995, "learning_rate": 0.002, "loss": 2.5623, "step": 130500 }, { "epoch": 0.26000494071146246, "grad_norm": 0.15688775479793549, "learning_rate": 0.002, "loss": 2.552, "step": 130510 }, { "epoch": 0.26002486293510135, "grad_norm": 0.2068265974521637, "learning_rate": 0.002, "loss": 2.5667, "step": 130520 }, { "epoch": 0.2600447851587403, "grad_norm": 0.18215233087539673, "learning_rate": 0.002, "loss": 2.5707, "step": 130530 }, { "epoch": 0.2600647073823792, "grad_norm": 0.15264984965324402, "learning_rate": 0.002, "loss": 2.5645, "step": 130540 }, { "epoch": 0.2600846296060181, "grad_norm": 0.19388195872306824, "learning_rate": 0.002, "loss": 2.5595, "step": 130550 }, { "epoch": 0.260104551829657, "grad_norm": 0.19431275129318237, "learning_rate": 0.002, "loss": 2.5634, "step": 130560 }, { "epoch": 0.2601244740532959, "grad_norm": 0.1722269356250763, "learning_rate": 0.002, "loss": 2.5612, "step": 130570 }, { "epoch": 0.26014439627693486, "grad_norm": 0.1464116871356964, "learning_rate": 0.002, "loss": 2.5528, "step": 130580 }, { "epoch": 0.26016431850057375, "grad_norm": 0.1855517327785492, "learning_rate": 0.002, "loss": 2.5496, "step": 130590 }, { "epoch": 0.2601842407242127, "grad_norm": 0.16329945623874664, "learning_rate": 0.002, "loss": 2.5608, "step": 130600 }, { "epoch": 0.2602041629478516, "grad_norm": 0.15740403532981873, "learning_rate": 0.002, "loss": 2.5525, "step": 130610 }, { "epoch": 0.2602240851714905, "grad_norm": 0.21803943812847137, "learning_rate": 0.002, "loss": 2.5815, "step": 130620 }, { "epoch": 0.26024400739512943, "grad_norm": 0.18188466131687164, "learning_rate": 0.002, "loss": 2.558, "step": 130630 }, { "epoch": 0.2602639296187683, "grad_norm": 0.188604474067688, "learning_rate": 0.002, "loss": 2.5736, "step": 130640 }, { "epoch": 0.26028385184240727, "grad_norm": 0.15701983869075775, "learning_rate": 0.002, "loss": 2.572, "step": 130650 }, { "epoch": 0.26030377406604616, "grad_norm": 0.145180344581604, "learning_rate": 0.002, "loss": 2.5739, "step": 130660 }, { "epoch": 0.26032369628968505, "grad_norm": 0.15657727420330048, "learning_rate": 0.002, "loss": 2.563, "step": 130670 }, { "epoch": 0.260343618513324, "grad_norm": 0.16315120458602905, "learning_rate": 0.002, "loss": 2.5645, "step": 130680 }, { "epoch": 0.2603635407369629, "grad_norm": 0.15774676203727722, "learning_rate": 0.002, "loss": 2.5635, "step": 130690 }, { "epoch": 0.26038346296060183, "grad_norm": 0.17245782911777496, "learning_rate": 0.002, "loss": 2.5616, "step": 130700 }, { "epoch": 0.2604033851842407, "grad_norm": 0.1687169224023819, "learning_rate": 0.002, "loss": 2.568, "step": 130710 }, { "epoch": 0.2604233074078796, "grad_norm": 0.17764964699745178, "learning_rate": 0.002, "loss": 2.5746, "step": 130720 }, { "epoch": 0.26044322963151856, "grad_norm": 0.16885152459144592, "learning_rate": 0.002, "loss": 2.5752, "step": 130730 }, { "epoch": 0.26046315185515745, "grad_norm": 0.2006707638502121, "learning_rate": 0.002, "loss": 2.5659, "step": 130740 }, { "epoch": 0.2604830740787964, "grad_norm": 0.14892886579036713, "learning_rate": 0.002, "loss": 2.5647, "step": 130750 }, { "epoch": 0.2605029963024353, "grad_norm": 0.15015503764152527, "learning_rate": 0.002, "loss": 2.566, "step": 130760 }, { "epoch": 0.26052291852607423, "grad_norm": 0.170554518699646, "learning_rate": 0.002, "loss": 2.5553, "step": 130770 }, { "epoch": 0.2605428407497131, "grad_norm": 0.1647031009197235, "learning_rate": 0.002, "loss": 2.569, "step": 130780 }, { "epoch": 0.260562762973352, "grad_norm": 0.162831112742424, "learning_rate": 0.002, "loss": 2.5694, "step": 130790 }, { "epoch": 0.26058268519699096, "grad_norm": 0.14229519665241241, "learning_rate": 0.002, "loss": 2.5586, "step": 130800 }, { "epoch": 0.26060260742062985, "grad_norm": 0.1849862039089203, "learning_rate": 0.002, "loss": 2.5644, "step": 130810 }, { "epoch": 0.2606225296442688, "grad_norm": 0.16760529577732086, "learning_rate": 0.002, "loss": 2.5463, "step": 130820 }, { "epoch": 0.2606424518679077, "grad_norm": 0.16167640686035156, "learning_rate": 0.002, "loss": 2.5707, "step": 130830 }, { "epoch": 0.2606623740915466, "grad_norm": 0.13915054500102997, "learning_rate": 0.002, "loss": 2.5773, "step": 130840 }, { "epoch": 0.2606822963151855, "grad_norm": 0.19410383701324463, "learning_rate": 0.002, "loss": 2.5629, "step": 130850 }, { "epoch": 0.2607022185388244, "grad_norm": 0.15939342975616455, "learning_rate": 0.002, "loss": 2.5542, "step": 130860 }, { "epoch": 0.26072214076246336, "grad_norm": 0.15037210285663605, "learning_rate": 0.002, "loss": 2.57, "step": 130870 }, { "epoch": 0.26074206298610225, "grad_norm": 0.1786697655916214, "learning_rate": 0.002, "loss": 2.5611, "step": 130880 }, { "epoch": 0.26076198520974114, "grad_norm": 0.15824256837368011, "learning_rate": 0.002, "loss": 2.5658, "step": 130890 }, { "epoch": 0.2607819074333801, "grad_norm": 0.16845592856407166, "learning_rate": 0.002, "loss": 2.5635, "step": 130900 }, { "epoch": 0.260801829657019, "grad_norm": 0.1876085251569748, "learning_rate": 0.002, "loss": 2.5625, "step": 130910 }, { "epoch": 0.2608217518806579, "grad_norm": 0.16211076080799103, "learning_rate": 0.002, "loss": 2.553, "step": 130920 }, { "epoch": 0.2608416741042968, "grad_norm": 0.16790564358234406, "learning_rate": 0.002, "loss": 2.5457, "step": 130930 }, { "epoch": 0.26086159632793576, "grad_norm": 0.14682768285274506, "learning_rate": 0.002, "loss": 2.5745, "step": 130940 }, { "epoch": 0.26088151855157465, "grad_norm": 0.1795915812253952, "learning_rate": 0.002, "loss": 2.56, "step": 130950 }, { "epoch": 0.26090144077521354, "grad_norm": 0.1650993674993515, "learning_rate": 0.002, "loss": 2.5757, "step": 130960 }, { "epoch": 0.2609213629988525, "grad_norm": 0.1476919949054718, "learning_rate": 0.002, "loss": 2.5827, "step": 130970 }, { "epoch": 0.2609412852224914, "grad_norm": 0.17637331783771515, "learning_rate": 0.002, "loss": 2.5604, "step": 130980 }, { "epoch": 0.2609612074461303, "grad_norm": 0.16959424316883087, "learning_rate": 0.002, "loss": 2.5709, "step": 130990 }, { "epoch": 0.2609811296697692, "grad_norm": 0.19507654011249542, "learning_rate": 0.002, "loss": 2.5612, "step": 131000 }, { "epoch": 0.2610010518934081, "grad_norm": 0.14412768185138702, "learning_rate": 0.002, "loss": 2.5612, "step": 131010 }, { "epoch": 0.26102097411704706, "grad_norm": 0.1782122403383255, "learning_rate": 0.002, "loss": 2.5666, "step": 131020 }, { "epoch": 0.26104089634068595, "grad_norm": 0.18392139673233032, "learning_rate": 0.002, "loss": 2.5436, "step": 131030 }, { "epoch": 0.2610608185643249, "grad_norm": 0.16334401071071625, "learning_rate": 0.002, "loss": 2.5739, "step": 131040 }, { "epoch": 0.2610807407879638, "grad_norm": 0.17115677893161774, "learning_rate": 0.002, "loss": 2.5608, "step": 131050 }, { "epoch": 0.26110066301160273, "grad_norm": 0.17323024570941925, "learning_rate": 0.002, "loss": 2.5753, "step": 131060 }, { "epoch": 0.2611205852352416, "grad_norm": 0.1640264093875885, "learning_rate": 0.002, "loss": 2.5497, "step": 131070 }, { "epoch": 0.2611405074588805, "grad_norm": 0.2100670337677002, "learning_rate": 0.002, "loss": 2.5674, "step": 131080 }, { "epoch": 0.26116042968251946, "grad_norm": 0.1864924430847168, "learning_rate": 0.002, "loss": 2.5662, "step": 131090 }, { "epoch": 0.26118035190615835, "grad_norm": 0.18226921558380127, "learning_rate": 0.002, "loss": 2.5662, "step": 131100 }, { "epoch": 0.2612002741297973, "grad_norm": 0.17614984512329102, "learning_rate": 0.002, "loss": 2.5847, "step": 131110 }, { "epoch": 0.2612201963534362, "grad_norm": 0.17839674651622772, "learning_rate": 0.002, "loss": 2.558, "step": 131120 }, { "epoch": 0.2612401185770751, "grad_norm": 0.1774735450744629, "learning_rate": 0.002, "loss": 2.5736, "step": 131130 }, { "epoch": 0.261260040800714, "grad_norm": 0.16459015011787415, "learning_rate": 0.002, "loss": 2.569, "step": 131140 }, { "epoch": 0.2612799630243529, "grad_norm": 0.1812446266412735, "learning_rate": 0.002, "loss": 2.5676, "step": 131150 }, { "epoch": 0.26129988524799186, "grad_norm": 0.17939502000808716, "learning_rate": 0.002, "loss": 2.5795, "step": 131160 }, { "epoch": 0.26131980747163075, "grad_norm": 0.17765238881111145, "learning_rate": 0.002, "loss": 2.5593, "step": 131170 }, { "epoch": 0.26133972969526964, "grad_norm": 0.1597050279378891, "learning_rate": 0.002, "loss": 2.572, "step": 131180 }, { "epoch": 0.2613596519189086, "grad_norm": 0.19040259718894958, "learning_rate": 0.002, "loss": 2.561, "step": 131190 }, { "epoch": 0.2613795741425475, "grad_norm": 0.16292324662208557, "learning_rate": 0.002, "loss": 2.5566, "step": 131200 }, { "epoch": 0.2613994963661864, "grad_norm": 0.14220085740089417, "learning_rate": 0.002, "loss": 2.56, "step": 131210 }, { "epoch": 0.2614194185898253, "grad_norm": 0.23900878429412842, "learning_rate": 0.002, "loss": 2.5688, "step": 131220 }, { "epoch": 0.26143934081346426, "grad_norm": 0.17708808183670044, "learning_rate": 0.002, "loss": 2.5653, "step": 131230 }, { "epoch": 0.26145926303710315, "grad_norm": 0.17954277992248535, "learning_rate": 0.002, "loss": 2.5644, "step": 131240 }, { "epoch": 0.26147918526074204, "grad_norm": 0.15264511108398438, "learning_rate": 0.002, "loss": 2.5629, "step": 131250 }, { "epoch": 0.261499107484381, "grad_norm": 0.17113183438777924, "learning_rate": 0.002, "loss": 2.5776, "step": 131260 }, { "epoch": 0.2615190297080199, "grad_norm": 0.15758205950260162, "learning_rate": 0.002, "loss": 2.5548, "step": 131270 }, { "epoch": 0.2615389519316588, "grad_norm": 0.14923591911792755, "learning_rate": 0.002, "loss": 2.5612, "step": 131280 }, { "epoch": 0.2615588741552977, "grad_norm": 0.18109002709388733, "learning_rate": 0.002, "loss": 2.5745, "step": 131290 }, { "epoch": 0.2615787963789366, "grad_norm": 0.23827935755252838, "learning_rate": 0.002, "loss": 2.5633, "step": 131300 }, { "epoch": 0.26159871860257555, "grad_norm": 0.21297140419483185, "learning_rate": 0.002, "loss": 2.5638, "step": 131310 }, { "epoch": 0.26161864082621444, "grad_norm": 0.1695757955312729, "learning_rate": 0.002, "loss": 2.5575, "step": 131320 }, { "epoch": 0.2616385630498534, "grad_norm": 0.18444359302520752, "learning_rate": 0.002, "loss": 2.5644, "step": 131330 }, { "epoch": 0.2616584852734923, "grad_norm": 0.14794065058231354, "learning_rate": 0.002, "loss": 2.5673, "step": 131340 }, { "epoch": 0.2616784074971312, "grad_norm": 0.16877241432666779, "learning_rate": 0.002, "loss": 2.5765, "step": 131350 }, { "epoch": 0.2616983297207701, "grad_norm": 0.19872012734413147, "learning_rate": 0.002, "loss": 2.5617, "step": 131360 }, { "epoch": 0.261718251944409, "grad_norm": 0.15955732762813568, "learning_rate": 0.002, "loss": 2.5615, "step": 131370 }, { "epoch": 0.26173817416804795, "grad_norm": 0.16126035153865814, "learning_rate": 0.002, "loss": 2.5556, "step": 131380 }, { "epoch": 0.26175809639168685, "grad_norm": 0.1455448865890503, "learning_rate": 0.002, "loss": 2.5511, "step": 131390 }, { "epoch": 0.2617780186153258, "grad_norm": 0.1883818507194519, "learning_rate": 0.002, "loss": 2.5735, "step": 131400 }, { "epoch": 0.2617979408389647, "grad_norm": 0.15572679042816162, "learning_rate": 0.002, "loss": 2.5787, "step": 131410 }, { "epoch": 0.2618178630626036, "grad_norm": 0.1932082176208496, "learning_rate": 0.002, "loss": 2.5555, "step": 131420 }, { "epoch": 0.2618377852862425, "grad_norm": 0.17823590338230133, "learning_rate": 0.002, "loss": 2.5539, "step": 131430 }, { "epoch": 0.2618577075098814, "grad_norm": 0.15041209757328033, "learning_rate": 0.002, "loss": 2.5599, "step": 131440 }, { "epoch": 0.26187762973352036, "grad_norm": 0.17346835136413574, "learning_rate": 0.002, "loss": 2.5814, "step": 131450 }, { "epoch": 0.26189755195715925, "grad_norm": 0.1786017119884491, "learning_rate": 0.002, "loss": 2.5752, "step": 131460 }, { "epoch": 0.26191747418079814, "grad_norm": 0.15621288120746613, "learning_rate": 0.002, "loss": 2.5662, "step": 131470 }, { "epoch": 0.2619373964044371, "grad_norm": 0.18632890284061432, "learning_rate": 0.002, "loss": 2.5664, "step": 131480 }, { "epoch": 0.261957318628076, "grad_norm": 0.16043893992900848, "learning_rate": 0.002, "loss": 2.562, "step": 131490 }, { "epoch": 0.2619772408517149, "grad_norm": 0.15750598907470703, "learning_rate": 0.002, "loss": 2.5584, "step": 131500 }, { "epoch": 0.2619971630753538, "grad_norm": 0.1678643673658371, "learning_rate": 0.002, "loss": 2.5623, "step": 131510 }, { "epoch": 0.26201708529899276, "grad_norm": 0.1684429794549942, "learning_rate": 0.002, "loss": 2.5668, "step": 131520 }, { "epoch": 0.26203700752263165, "grad_norm": 0.1630808264017105, "learning_rate": 0.002, "loss": 2.5618, "step": 131530 }, { "epoch": 0.26205692974627054, "grad_norm": 0.1390686184167862, "learning_rate": 0.002, "loss": 2.5809, "step": 131540 }, { "epoch": 0.2620768519699095, "grad_norm": 0.1610843390226364, "learning_rate": 0.002, "loss": 2.5663, "step": 131550 }, { "epoch": 0.2620967741935484, "grad_norm": 0.20872357487678528, "learning_rate": 0.002, "loss": 2.554, "step": 131560 }, { "epoch": 0.2621166964171873, "grad_norm": 0.15646299719810486, "learning_rate": 0.002, "loss": 2.574, "step": 131570 }, { "epoch": 0.2621366186408262, "grad_norm": 0.16917690634727478, "learning_rate": 0.002, "loss": 2.5599, "step": 131580 }, { "epoch": 0.2621565408644651, "grad_norm": 0.16337765753269196, "learning_rate": 0.002, "loss": 2.5707, "step": 131590 }, { "epoch": 0.26217646308810405, "grad_norm": 0.18271198868751526, "learning_rate": 0.002, "loss": 2.5685, "step": 131600 }, { "epoch": 0.26219638531174294, "grad_norm": 0.16049842536449432, "learning_rate": 0.002, "loss": 2.5667, "step": 131610 }, { "epoch": 0.2622163075353819, "grad_norm": 0.1736445426940918, "learning_rate": 0.002, "loss": 2.5767, "step": 131620 }, { "epoch": 0.2622362297590208, "grad_norm": 0.20834974944591522, "learning_rate": 0.002, "loss": 2.5765, "step": 131630 }, { "epoch": 0.2622561519826597, "grad_norm": 0.18213637173175812, "learning_rate": 0.002, "loss": 2.5692, "step": 131640 }, { "epoch": 0.2622760742062986, "grad_norm": 0.15088719129562378, "learning_rate": 0.002, "loss": 2.5562, "step": 131650 }, { "epoch": 0.2622959964299375, "grad_norm": 0.17090052366256714, "learning_rate": 0.002, "loss": 2.5552, "step": 131660 }, { "epoch": 0.26231591865357645, "grad_norm": 0.16164299845695496, "learning_rate": 0.002, "loss": 2.5779, "step": 131670 }, { "epoch": 0.26233584087721534, "grad_norm": 0.1763537973165512, "learning_rate": 0.002, "loss": 2.5663, "step": 131680 }, { "epoch": 0.2623557631008543, "grad_norm": 0.15596245229244232, "learning_rate": 0.002, "loss": 2.5496, "step": 131690 }, { "epoch": 0.2623756853244932, "grad_norm": 0.17935235798358917, "learning_rate": 0.002, "loss": 2.5751, "step": 131700 }, { "epoch": 0.26239560754813207, "grad_norm": 0.15123535692691803, "learning_rate": 0.002, "loss": 2.5688, "step": 131710 }, { "epoch": 0.262415529771771, "grad_norm": 0.16636712849140167, "learning_rate": 0.002, "loss": 2.5551, "step": 131720 }, { "epoch": 0.2624354519954099, "grad_norm": 0.17866231501102448, "learning_rate": 0.002, "loss": 2.5675, "step": 131730 }, { "epoch": 0.26245537421904885, "grad_norm": 0.16901995241641998, "learning_rate": 0.002, "loss": 2.5558, "step": 131740 }, { "epoch": 0.26247529644268774, "grad_norm": 0.17383873462677002, "learning_rate": 0.002, "loss": 2.5553, "step": 131750 }, { "epoch": 0.26249521866632664, "grad_norm": 0.17521972954273224, "learning_rate": 0.002, "loss": 2.5675, "step": 131760 }, { "epoch": 0.2625151408899656, "grad_norm": 0.14390341937541962, "learning_rate": 0.002, "loss": 2.5775, "step": 131770 }, { "epoch": 0.26253506311360447, "grad_norm": 0.1846880465745926, "learning_rate": 0.002, "loss": 2.55, "step": 131780 }, { "epoch": 0.2625549853372434, "grad_norm": 0.173673614859581, "learning_rate": 0.002, "loss": 2.565, "step": 131790 }, { "epoch": 0.2625749075608823, "grad_norm": 0.22621940076351166, "learning_rate": 0.002, "loss": 2.5586, "step": 131800 }, { "epoch": 0.26259482978452126, "grad_norm": 0.16261506080627441, "learning_rate": 0.002, "loss": 2.5746, "step": 131810 }, { "epoch": 0.26261475200816015, "grad_norm": 0.23240943253040314, "learning_rate": 0.002, "loss": 2.5623, "step": 131820 }, { "epoch": 0.26263467423179904, "grad_norm": 0.1707332879304886, "learning_rate": 0.002, "loss": 2.544, "step": 131830 }, { "epoch": 0.262654596455438, "grad_norm": 0.17460666596889496, "learning_rate": 0.002, "loss": 2.5655, "step": 131840 }, { "epoch": 0.2626745186790769, "grad_norm": 0.15540459752082825, "learning_rate": 0.002, "loss": 2.5704, "step": 131850 }, { "epoch": 0.2626944409027158, "grad_norm": 0.1564788967370987, "learning_rate": 0.002, "loss": 2.5512, "step": 131860 }, { "epoch": 0.2627143631263547, "grad_norm": 0.19721226394176483, "learning_rate": 0.002, "loss": 2.5711, "step": 131870 }, { "epoch": 0.2627342853499936, "grad_norm": 0.18021836876869202, "learning_rate": 0.002, "loss": 2.5784, "step": 131880 }, { "epoch": 0.26275420757363255, "grad_norm": 0.15843577682971954, "learning_rate": 0.002, "loss": 2.5508, "step": 131890 }, { "epoch": 0.26277412979727144, "grad_norm": 0.16972000896930695, "learning_rate": 0.002, "loss": 2.5622, "step": 131900 }, { "epoch": 0.2627940520209104, "grad_norm": 0.17003728449344635, "learning_rate": 0.002, "loss": 2.5634, "step": 131910 }, { "epoch": 0.2628139742445493, "grad_norm": 0.15069268643856049, "learning_rate": 0.002, "loss": 2.5399, "step": 131920 }, { "epoch": 0.26283389646818817, "grad_norm": 0.15374217927455902, "learning_rate": 0.002, "loss": 2.5526, "step": 131930 }, { "epoch": 0.2628538186918271, "grad_norm": 0.22686700522899628, "learning_rate": 0.002, "loss": 2.5562, "step": 131940 }, { "epoch": 0.262873740915466, "grad_norm": 0.16578781604766846, "learning_rate": 0.002, "loss": 2.5659, "step": 131950 }, { "epoch": 0.26289366313910495, "grad_norm": 0.1784341037273407, "learning_rate": 0.002, "loss": 2.5744, "step": 131960 }, { "epoch": 0.26291358536274384, "grad_norm": 0.19059891998767853, "learning_rate": 0.002, "loss": 2.5518, "step": 131970 }, { "epoch": 0.2629335075863828, "grad_norm": 0.15532785654067993, "learning_rate": 0.002, "loss": 2.5763, "step": 131980 }, { "epoch": 0.2629534298100217, "grad_norm": 0.1908460557460785, "learning_rate": 0.002, "loss": 2.5597, "step": 131990 }, { "epoch": 0.26297335203366057, "grad_norm": 0.1801259070634842, "learning_rate": 0.002, "loss": 2.5625, "step": 132000 }, { "epoch": 0.2629932742572995, "grad_norm": 0.14396342635154724, "learning_rate": 0.002, "loss": 2.5576, "step": 132010 }, { "epoch": 0.2630131964809384, "grad_norm": 0.18001507222652435, "learning_rate": 0.002, "loss": 2.5726, "step": 132020 }, { "epoch": 0.26303311870457735, "grad_norm": 0.16400612890720367, "learning_rate": 0.002, "loss": 2.5493, "step": 132030 }, { "epoch": 0.26305304092821624, "grad_norm": 0.17660287022590637, "learning_rate": 0.002, "loss": 2.5719, "step": 132040 }, { "epoch": 0.26307296315185513, "grad_norm": 0.1671033650636673, "learning_rate": 0.002, "loss": 2.5648, "step": 132050 }, { "epoch": 0.2630928853754941, "grad_norm": 0.2022257149219513, "learning_rate": 0.002, "loss": 2.5585, "step": 132060 }, { "epoch": 0.26311280759913297, "grad_norm": 0.1639380156993866, "learning_rate": 0.002, "loss": 2.5668, "step": 132070 }, { "epoch": 0.2631327298227719, "grad_norm": 0.1537715494632721, "learning_rate": 0.002, "loss": 2.557, "step": 132080 }, { "epoch": 0.2631526520464108, "grad_norm": 0.14372090995311737, "learning_rate": 0.002, "loss": 2.5662, "step": 132090 }, { "epoch": 0.26317257427004975, "grad_norm": 0.19203579425811768, "learning_rate": 0.002, "loss": 2.5705, "step": 132100 }, { "epoch": 0.26319249649368864, "grad_norm": 0.17667728662490845, "learning_rate": 0.002, "loss": 2.5662, "step": 132110 }, { "epoch": 0.26321241871732753, "grad_norm": 0.16139382123947144, "learning_rate": 0.002, "loss": 2.5597, "step": 132120 }, { "epoch": 0.2632323409409665, "grad_norm": 0.167593315243721, "learning_rate": 0.002, "loss": 2.5593, "step": 132130 }, { "epoch": 0.26325226316460537, "grad_norm": 0.16068947315216064, "learning_rate": 0.002, "loss": 2.5549, "step": 132140 }, { "epoch": 0.2632721853882443, "grad_norm": 0.17375722527503967, "learning_rate": 0.002, "loss": 2.5594, "step": 132150 }, { "epoch": 0.2632921076118832, "grad_norm": 0.21189920604228973, "learning_rate": 0.002, "loss": 2.5666, "step": 132160 }, { "epoch": 0.2633120298355221, "grad_norm": 0.16188856959342957, "learning_rate": 0.002, "loss": 2.5714, "step": 132170 }, { "epoch": 0.26333195205916105, "grad_norm": 0.18771646916866302, "learning_rate": 0.002, "loss": 2.5643, "step": 132180 }, { "epoch": 0.26335187428279994, "grad_norm": 0.18995346128940582, "learning_rate": 0.002, "loss": 2.5593, "step": 132190 }, { "epoch": 0.2633717965064389, "grad_norm": 0.18757539987564087, "learning_rate": 0.002, "loss": 2.5663, "step": 132200 }, { "epoch": 0.2633917187300778, "grad_norm": 0.16346517205238342, "learning_rate": 0.002, "loss": 2.5744, "step": 132210 }, { "epoch": 0.26341164095371666, "grad_norm": 0.19451500475406647, "learning_rate": 0.002, "loss": 2.5496, "step": 132220 }, { "epoch": 0.2634315631773556, "grad_norm": 0.17729730904102325, "learning_rate": 0.002, "loss": 2.5597, "step": 132230 }, { "epoch": 0.2634514854009945, "grad_norm": 0.20385819673538208, "learning_rate": 0.002, "loss": 2.5562, "step": 132240 }, { "epoch": 0.26347140762463345, "grad_norm": 0.15891191363334656, "learning_rate": 0.002, "loss": 2.5554, "step": 132250 }, { "epoch": 0.26349132984827234, "grad_norm": 0.16952486336231232, "learning_rate": 0.002, "loss": 2.5651, "step": 132260 }, { "epoch": 0.2635112520719113, "grad_norm": 0.16301576793193817, "learning_rate": 0.002, "loss": 2.5609, "step": 132270 }, { "epoch": 0.2635311742955502, "grad_norm": 0.1659189909696579, "learning_rate": 0.002, "loss": 2.5707, "step": 132280 }, { "epoch": 0.26355109651918907, "grad_norm": 0.17000865936279297, "learning_rate": 0.002, "loss": 2.5737, "step": 132290 }, { "epoch": 0.263571018742828, "grad_norm": 0.17015238106250763, "learning_rate": 0.002, "loss": 2.5598, "step": 132300 }, { "epoch": 0.2635909409664669, "grad_norm": 0.14480559527873993, "learning_rate": 0.002, "loss": 2.5643, "step": 132310 }, { "epoch": 0.26361086319010585, "grad_norm": 0.1693960279226303, "learning_rate": 0.002, "loss": 2.5519, "step": 132320 }, { "epoch": 0.26363078541374474, "grad_norm": 0.16209881007671356, "learning_rate": 0.002, "loss": 2.5846, "step": 132330 }, { "epoch": 0.26365070763738363, "grad_norm": 0.16086046397686005, "learning_rate": 0.002, "loss": 2.5576, "step": 132340 }, { "epoch": 0.2636706298610226, "grad_norm": 0.2241487056016922, "learning_rate": 0.002, "loss": 2.5781, "step": 132350 }, { "epoch": 0.26369055208466147, "grad_norm": 0.20097728073596954, "learning_rate": 0.002, "loss": 2.5633, "step": 132360 }, { "epoch": 0.2637104743083004, "grad_norm": 0.15076416730880737, "learning_rate": 0.002, "loss": 2.5669, "step": 132370 }, { "epoch": 0.2637303965319393, "grad_norm": 0.1867658644914627, "learning_rate": 0.002, "loss": 2.5658, "step": 132380 }, { "epoch": 0.26375031875557825, "grad_norm": 0.2026589959859848, "learning_rate": 0.002, "loss": 2.5608, "step": 132390 }, { "epoch": 0.26377024097921714, "grad_norm": 0.14877022802829742, "learning_rate": 0.002, "loss": 2.5661, "step": 132400 }, { "epoch": 0.26379016320285603, "grad_norm": 0.15173308551311493, "learning_rate": 0.002, "loss": 2.5683, "step": 132410 }, { "epoch": 0.263810085426495, "grad_norm": 0.16216890513896942, "learning_rate": 0.002, "loss": 2.5865, "step": 132420 }, { "epoch": 0.26383000765013387, "grad_norm": 0.1747463047504425, "learning_rate": 0.002, "loss": 2.5646, "step": 132430 }, { "epoch": 0.2638499298737728, "grad_norm": 0.1686152219772339, "learning_rate": 0.002, "loss": 2.5612, "step": 132440 }, { "epoch": 0.2638698520974117, "grad_norm": 0.19614748656749725, "learning_rate": 0.002, "loss": 2.5772, "step": 132450 }, { "epoch": 0.2638897743210506, "grad_norm": 0.18875260651111603, "learning_rate": 0.002, "loss": 2.5625, "step": 132460 }, { "epoch": 0.26390969654468954, "grad_norm": 0.22056391835212708, "learning_rate": 0.002, "loss": 2.5644, "step": 132470 }, { "epoch": 0.26392961876832843, "grad_norm": 0.184481680393219, "learning_rate": 0.002, "loss": 2.5541, "step": 132480 }, { "epoch": 0.2639495409919674, "grad_norm": 0.16787008941173553, "learning_rate": 0.002, "loss": 2.5676, "step": 132490 }, { "epoch": 0.26396946321560627, "grad_norm": 0.18761809170246124, "learning_rate": 0.002, "loss": 2.5526, "step": 132500 }, { "epoch": 0.26398938543924516, "grad_norm": 0.19057486951351166, "learning_rate": 0.002, "loss": 2.5638, "step": 132510 }, { "epoch": 0.2640093076628841, "grad_norm": 0.17647860944271088, "learning_rate": 0.002, "loss": 2.5802, "step": 132520 }, { "epoch": 0.264029229886523, "grad_norm": 0.1613360494375229, "learning_rate": 0.002, "loss": 2.5956, "step": 132530 }, { "epoch": 0.26404915211016194, "grad_norm": 0.23801666498184204, "learning_rate": 0.002, "loss": 2.576, "step": 132540 }, { "epoch": 0.26406907433380084, "grad_norm": 0.14633478224277496, "learning_rate": 0.002, "loss": 2.5745, "step": 132550 }, { "epoch": 0.2640889965574398, "grad_norm": 0.1799386590719223, "learning_rate": 0.002, "loss": 2.5661, "step": 132560 }, { "epoch": 0.26410891878107867, "grad_norm": 0.14353585243225098, "learning_rate": 0.002, "loss": 2.5644, "step": 132570 }, { "epoch": 0.26412884100471756, "grad_norm": 0.20030732452869415, "learning_rate": 0.002, "loss": 2.5569, "step": 132580 }, { "epoch": 0.2641487632283565, "grad_norm": 0.1942760944366455, "learning_rate": 0.002, "loss": 2.5452, "step": 132590 }, { "epoch": 0.2641686854519954, "grad_norm": 0.19600625336170197, "learning_rate": 0.002, "loss": 2.5587, "step": 132600 }, { "epoch": 0.26418860767563435, "grad_norm": 0.15873883664608002, "learning_rate": 0.002, "loss": 2.5659, "step": 132610 }, { "epoch": 0.26420852989927324, "grad_norm": 0.15976868569850922, "learning_rate": 0.002, "loss": 2.5559, "step": 132620 }, { "epoch": 0.2642284521229121, "grad_norm": 0.23323562741279602, "learning_rate": 0.002, "loss": 2.5615, "step": 132630 }, { "epoch": 0.2642483743465511, "grad_norm": 0.15917132794857025, "learning_rate": 0.002, "loss": 2.5489, "step": 132640 }, { "epoch": 0.26426829657018996, "grad_norm": 0.15740515291690826, "learning_rate": 0.002, "loss": 2.5766, "step": 132650 }, { "epoch": 0.2642882187938289, "grad_norm": 0.1647242158651352, "learning_rate": 0.002, "loss": 2.5756, "step": 132660 }, { "epoch": 0.2643081410174678, "grad_norm": 0.17455746233463287, "learning_rate": 0.002, "loss": 2.5614, "step": 132670 }, { "epoch": 0.2643280632411067, "grad_norm": 0.19974370300769806, "learning_rate": 0.002, "loss": 2.5663, "step": 132680 }, { "epoch": 0.26434798546474564, "grad_norm": 0.16963735222816467, "learning_rate": 0.002, "loss": 2.5524, "step": 132690 }, { "epoch": 0.26436790768838453, "grad_norm": 0.16546151041984558, "learning_rate": 0.002, "loss": 2.5941, "step": 132700 }, { "epoch": 0.2643878299120235, "grad_norm": 0.17548178136348724, "learning_rate": 0.002, "loss": 2.5683, "step": 132710 }, { "epoch": 0.26440775213566237, "grad_norm": 0.1545340120792389, "learning_rate": 0.002, "loss": 2.5723, "step": 132720 }, { "epoch": 0.2644276743593013, "grad_norm": 0.21055559813976288, "learning_rate": 0.002, "loss": 2.5525, "step": 132730 }, { "epoch": 0.2644475965829402, "grad_norm": 0.1539921760559082, "learning_rate": 0.002, "loss": 2.564, "step": 132740 }, { "epoch": 0.2644675188065791, "grad_norm": 0.17330247163772583, "learning_rate": 0.002, "loss": 2.5569, "step": 132750 }, { "epoch": 0.26448744103021804, "grad_norm": 0.19268327951431274, "learning_rate": 0.002, "loss": 2.5675, "step": 132760 }, { "epoch": 0.26450736325385693, "grad_norm": 0.15518802404403687, "learning_rate": 0.002, "loss": 2.5681, "step": 132770 }, { "epoch": 0.2645272854774959, "grad_norm": 0.15902282297611237, "learning_rate": 0.002, "loss": 2.5628, "step": 132780 }, { "epoch": 0.26454720770113477, "grad_norm": 0.14860017597675323, "learning_rate": 0.002, "loss": 2.5619, "step": 132790 }, { "epoch": 0.26456712992477366, "grad_norm": 0.1643804907798767, "learning_rate": 0.002, "loss": 2.5609, "step": 132800 }, { "epoch": 0.2645870521484126, "grad_norm": 0.2145196795463562, "learning_rate": 0.002, "loss": 2.5567, "step": 132810 }, { "epoch": 0.2646069743720515, "grad_norm": 0.15799568593502045, "learning_rate": 0.002, "loss": 2.5582, "step": 132820 }, { "epoch": 0.26462689659569044, "grad_norm": 0.15780213475227356, "learning_rate": 0.002, "loss": 2.5603, "step": 132830 }, { "epoch": 0.26464681881932933, "grad_norm": 0.1495615839958191, "learning_rate": 0.002, "loss": 2.5611, "step": 132840 }, { "epoch": 0.2646667410429683, "grad_norm": 0.1514994502067566, "learning_rate": 0.002, "loss": 2.5463, "step": 132850 }, { "epoch": 0.26468666326660717, "grad_norm": 0.18305343389511108, "learning_rate": 0.002, "loss": 2.5636, "step": 132860 }, { "epoch": 0.26470658549024606, "grad_norm": 0.16327272355556488, "learning_rate": 0.002, "loss": 2.5644, "step": 132870 }, { "epoch": 0.264726507713885, "grad_norm": 0.1665475070476532, "learning_rate": 0.002, "loss": 2.578, "step": 132880 }, { "epoch": 0.2647464299375239, "grad_norm": 0.15460069477558136, "learning_rate": 0.002, "loss": 2.561, "step": 132890 }, { "epoch": 0.26476635216116284, "grad_norm": 0.20682395994663239, "learning_rate": 0.002, "loss": 2.5529, "step": 132900 }, { "epoch": 0.26478627438480173, "grad_norm": 0.16018494963645935, "learning_rate": 0.002, "loss": 2.5711, "step": 132910 }, { "epoch": 0.2648061966084406, "grad_norm": 0.18829569220542908, "learning_rate": 0.002, "loss": 2.5483, "step": 132920 }, { "epoch": 0.26482611883207957, "grad_norm": 0.16918455064296722, "learning_rate": 0.002, "loss": 2.5596, "step": 132930 }, { "epoch": 0.26484604105571846, "grad_norm": 0.15717357397079468, "learning_rate": 0.002, "loss": 2.5694, "step": 132940 }, { "epoch": 0.2648659632793574, "grad_norm": 0.16893506050109863, "learning_rate": 0.002, "loss": 2.5785, "step": 132950 }, { "epoch": 0.2648858855029963, "grad_norm": 0.1646820604801178, "learning_rate": 0.002, "loss": 2.5647, "step": 132960 }, { "epoch": 0.2649058077266352, "grad_norm": 0.1516866683959961, "learning_rate": 0.002, "loss": 2.5604, "step": 132970 }, { "epoch": 0.26492572995027414, "grad_norm": 0.15059956908226013, "learning_rate": 0.002, "loss": 2.562, "step": 132980 }, { "epoch": 0.264945652173913, "grad_norm": 0.1555296778678894, "learning_rate": 0.002, "loss": 2.5744, "step": 132990 }, { "epoch": 0.264965574397552, "grad_norm": 0.1668715476989746, "learning_rate": 0.002, "loss": 2.5664, "step": 133000 }, { "epoch": 0.26498549662119086, "grad_norm": 0.1613154113292694, "learning_rate": 0.002, "loss": 2.5689, "step": 133010 }, { "epoch": 0.2650054188448298, "grad_norm": 0.1588013619184494, "learning_rate": 0.002, "loss": 2.5633, "step": 133020 }, { "epoch": 0.2650253410684687, "grad_norm": 0.1766051948070526, "learning_rate": 0.002, "loss": 2.5633, "step": 133030 }, { "epoch": 0.2650452632921076, "grad_norm": 0.1912434697151184, "learning_rate": 0.002, "loss": 2.5705, "step": 133040 }, { "epoch": 0.26506518551574654, "grad_norm": 0.1800108104944229, "learning_rate": 0.002, "loss": 2.5729, "step": 133050 }, { "epoch": 0.26508510773938543, "grad_norm": 0.15913936495780945, "learning_rate": 0.002, "loss": 2.5542, "step": 133060 }, { "epoch": 0.2651050299630244, "grad_norm": 0.19458161294460297, "learning_rate": 0.002, "loss": 2.5586, "step": 133070 }, { "epoch": 0.26512495218666327, "grad_norm": 0.1688801348209381, "learning_rate": 0.002, "loss": 2.5651, "step": 133080 }, { "epoch": 0.26514487441030216, "grad_norm": 0.17024441063404083, "learning_rate": 0.002, "loss": 2.5703, "step": 133090 }, { "epoch": 0.2651647966339411, "grad_norm": 0.1623542457818985, "learning_rate": 0.002, "loss": 2.5673, "step": 133100 }, { "epoch": 0.26518471885758, "grad_norm": 0.17299629747867584, "learning_rate": 0.002, "loss": 2.5758, "step": 133110 }, { "epoch": 0.26520464108121894, "grad_norm": 0.14634940028190613, "learning_rate": 0.002, "loss": 2.5638, "step": 133120 }, { "epoch": 0.26522456330485783, "grad_norm": 0.17044052481651306, "learning_rate": 0.002, "loss": 2.56, "step": 133130 }, { "epoch": 0.2652444855284968, "grad_norm": 0.19621552526950836, "learning_rate": 0.002, "loss": 2.5727, "step": 133140 }, { "epoch": 0.26526440775213567, "grad_norm": 0.15958064794540405, "learning_rate": 0.002, "loss": 2.5644, "step": 133150 }, { "epoch": 0.26528432997577456, "grad_norm": 0.19282492995262146, "learning_rate": 0.002, "loss": 2.555, "step": 133160 }, { "epoch": 0.2653042521994135, "grad_norm": 0.209207683801651, "learning_rate": 0.002, "loss": 2.5564, "step": 133170 }, { "epoch": 0.2653241744230524, "grad_norm": 0.18319179117679596, "learning_rate": 0.002, "loss": 2.5842, "step": 133180 }, { "epoch": 0.26534409664669134, "grad_norm": 0.14925819635391235, "learning_rate": 0.002, "loss": 2.5583, "step": 133190 }, { "epoch": 0.26536401887033023, "grad_norm": 0.1686817854642868, "learning_rate": 0.002, "loss": 2.5704, "step": 133200 }, { "epoch": 0.2653839410939691, "grad_norm": 0.1831749528646469, "learning_rate": 0.002, "loss": 2.5631, "step": 133210 }, { "epoch": 0.26540386331760807, "grad_norm": 0.17930637300014496, "learning_rate": 0.002, "loss": 2.5679, "step": 133220 }, { "epoch": 0.26542378554124696, "grad_norm": 0.15807156264781952, "learning_rate": 0.002, "loss": 2.5499, "step": 133230 }, { "epoch": 0.2654437077648859, "grad_norm": 0.17800071835517883, "learning_rate": 0.002, "loss": 2.5588, "step": 133240 }, { "epoch": 0.2654636299885248, "grad_norm": 0.1471141129732132, "learning_rate": 0.002, "loss": 2.5612, "step": 133250 }, { "epoch": 0.2654835522121637, "grad_norm": 0.193057119846344, "learning_rate": 0.002, "loss": 2.5691, "step": 133260 }, { "epoch": 0.26550347443580263, "grad_norm": 0.16805507242679596, "learning_rate": 0.002, "loss": 2.5726, "step": 133270 }, { "epoch": 0.2655233966594415, "grad_norm": 0.14798760414123535, "learning_rate": 0.002, "loss": 2.5679, "step": 133280 }, { "epoch": 0.26554331888308047, "grad_norm": 0.18424434959888458, "learning_rate": 0.002, "loss": 2.564, "step": 133290 }, { "epoch": 0.26556324110671936, "grad_norm": 0.1678055375814438, "learning_rate": 0.002, "loss": 2.5623, "step": 133300 }, { "epoch": 0.2655831633303583, "grad_norm": 0.14338593184947968, "learning_rate": 0.002, "loss": 2.571, "step": 133310 }, { "epoch": 0.2656030855539972, "grad_norm": 0.33869317173957825, "learning_rate": 0.002, "loss": 2.5723, "step": 133320 }, { "epoch": 0.2656230077776361, "grad_norm": 0.1637164056301117, "learning_rate": 0.002, "loss": 2.5602, "step": 133330 }, { "epoch": 0.26564293000127503, "grad_norm": 0.1817837655544281, "learning_rate": 0.002, "loss": 2.5561, "step": 133340 }, { "epoch": 0.2656628522249139, "grad_norm": 0.17310579121112823, "learning_rate": 0.002, "loss": 2.5604, "step": 133350 }, { "epoch": 0.26568277444855287, "grad_norm": 0.16009347140789032, "learning_rate": 0.002, "loss": 2.5576, "step": 133360 }, { "epoch": 0.26570269667219176, "grad_norm": 0.1489671915769577, "learning_rate": 0.002, "loss": 2.549, "step": 133370 }, { "epoch": 0.26572261889583065, "grad_norm": 0.17984528839588165, "learning_rate": 0.002, "loss": 2.554, "step": 133380 }, { "epoch": 0.2657425411194696, "grad_norm": 0.17162351310253143, "learning_rate": 0.002, "loss": 2.5589, "step": 133390 }, { "epoch": 0.2657624633431085, "grad_norm": 0.13715490698814392, "learning_rate": 0.002, "loss": 2.5599, "step": 133400 }, { "epoch": 0.26578238556674744, "grad_norm": 0.16123732924461365, "learning_rate": 0.002, "loss": 2.5734, "step": 133410 }, { "epoch": 0.2658023077903863, "grad_norm": 0.1715172529220581, "learning_rate": 0.002, "loss": 2.5662, "step": 133420 }, { "epoch": 0.2658222300140252, "grad_norm": 0.1629626303911209, "learning_rate": 0.002, "loss": 2.5741, "step": 133430 }, { "epoch": 0.26584215223766416, "grad_norm": 0.17048799991607666, "learning_rate": 0.002, "loss": 2.564, "step": 133440 }, { "epoch": 0.26586207446130306, "grad_norm": 0.16054841876029968, "learning_rate": 0.002, "loss": 2.5575, "step": 133450 }, { "epoch": 0.265881996684942, "grad_norm": 0.1993216723203659, "learning_rate": 0.002, "loss": 2.5681, "step": 133460 }, { "epoch": 0.2659019189085809, "grad_norm": 0.148122638463974, "learning_rate": 0.002, "loss": 2.5551, "step": 133470 }, { "epoch": 0.26592184113221984, "grad_norm": 0.19217358529567719, "learning_rate": 0.002, "loss": 2.5608, "step": 133480 }, { "epoch": 0.26594176335585873, "grad_norm": 0.16454336047172546, "learning_rate": 0.002, "loss": 2.5676, "step": 133490 }, { "epoch": 0.2659616855794976, "grad_norm": 0.19108256697654724, "learning_rate": 0.002, "loss": 2.5694, "step": 133500 }, { "epoch": 0.26598160780313657, "grad_norm": 0.15538465976715088, "learning_rate": 0.002, "loss": 2.5631, "step": 133510 }, { "epoch": 0.26600153002677546, "grad_norm": 0.15241125226020813, "learning_rate": 0.002, "loss": 2.5665, "step": 133520 }, { "epoch": 0.2660214522504144, "grad_norm": 0.16598278284072876, "learning_rate": 0.002, "loss": 2.5539, "step": 133530 }, { "epoch": 0.2660413744740533, "grad_norm": 0.17314526438713074, "learning_rate": 0.002, "loss": 2.5526, "step": 133540 }, { "epoch": 0.2660612966976922, "grad_norm": 0.19342340528964996, "learning_rate": 0.002, "loss": 2.5599, "step": 133550 }, { "epoch": 0.26608121892133113, "grad_norm": 0.1609547734260559, "learning_rate": 0.002, "loss": 2.5558, "step": 133560 }, { "epoch": 0.26610114114497, "grad_norm": 0.1628473848104477, "learning_rate": 0.002, "loss": 2.5667, "step": 133570 }, { "epoch": 0.26612106336860897, "grad_norm": 0.16037052869796753, "learning_rate": 0.002, "loss": 2.5688, "step": 133580 }, { "epoch": 0.26614098559224786, "grad_norm": 0.2064531445503235, "learning_rate": 0.002, "loss": 2.5691, "step": 133590 }, { "epoch": 0.2661609078158868, "grad_norm": 0.17233477532863617, "learning_rate": 0.002, "loss": 2.5627, "step": 133600 }, { "epoch": 0.2661808300395257, "grad_norm": 0.16922569274902344, "learning_rate": 0.002, "loss": 2.5742, "step": 133610 }, { "epoch": 0.2662007522631646, "grad_norm": 0.18664808571338654, "learning_rate": 0.002, "loss": 2.5538, "step": 133620 }, { "epoch": 0.26622067448680353, "grad_norm": 0.15967603027820587, "learning_rate": 0.002, "loss": 2.5805, "step": 133630 }, { "epoch": 0.2662405967104424, "grad_norm": 0.17376089096069336, "learning_rate": 0.002, "loss": 2.5429, "step": 133640 }, { "epoch": 0.26626051893408137, "grad_norm": 0.14666631817817688, "learning_rate": 0.002, "loss": 2.5512, "step": 133650 }, { "epoch": 0.26628044115772026, "grad_norm": 0.21593834459781647, "learning_rate": 0.002, "loss": 2.5727, "step": 133660 }, { "epoch": 0.26630036338135915, "grad_norm": 0.16346170008182526, "learning_rate": 0.002, "loss": 2.571, "step": 133670 }, { "epoch": 0.2663202856049981, "grad_norm": 0.16940376162528992, "learning_rate": 0.002, "loss": 2.5691, "step": 133680 }, { "epoch": 0.266340207828637, "grad_norm": 0.15472766757011414, "learning_rate": 0.002, "loss": 2.5593, "step": 133690 }, { "epoch": 0.26636013005227593, "grad_norm": 0.17035019397735596, "learning_rate": 0.002, "loss": 2.5458, "step": 133700 }, { "epoch": 0.2663800522759148, "grad_norm": 0.15926143527030945, "learning_rate": 0.002, "loss": 2.5539, "step": 133710 }, { "epoch": 0.2663999744995537, "grad_norm": 0.17404977977275848, "learning_rate": 0.002, "loss": 2.5677, "step": 133720 }, { "epoch": 0.26641989672319266, "grad_norm": 0.1537749171257019, "learning_rate": 0.002, "loss": 2.5773, "step": 133730 }, { "epoch": 0.26643981894683155, "grad_norm": 0.22368668019771576, "learning_rate": 0.002, "loss": 2.5644, "step": 133740 }, { "epoch": 0.2664597411704705, "grad_norm": 0.14183226227760315, "learning_rate": 0.002, "loss": 2.5722, "step": 133750 }, { "epoch": 0.2664796633941094, "grad_norm": 0.14079062640666962, "learning_rate": 0.002, "loss": 2.5576, "step": 133760 }, { "epoch": 0.26649958561774834, "grad_norm": 0.14674068987369537, "learning_rate": 0.002, "loss": 2.5578, "step": 133770 }, { "epoch": 0.2665195078413872, "grad_norm": 0.17478518187999725, "learning_rate": 0.002, "loss": 2.5592, "step": 133780 }, { "epoch": 0.2665394300650261, "grad_norm": 0.19482123851776123, "learning_rate": 0.002, "loss": 2.5527, "step": 133790 }, { "epoch": 0.26655935228866506, "grad_norm": 0.1695808619260788, "learning_rate": 0.002, "loss": 2.5685, "step": 133800 }, { "epoch": 0.26657927451230395, "grad_norm": 0.14990763366222382, "learning_rate": 0.002, "loss": 2.5739, "step": 133810 }, { "epoch": 0.2665991967359429, "grad_norm": 0.17719025909900665, "learning_rate": 0.002, "loss": 2.5478, "step": 133820 }, { "epoch": 0.2666191189595818, "grad_norm": 0.1826125979423523, "learning_rate": 0.002, "loss": 2.5678, "step": 133830 }, { "epoch": 0.2666390411832207, "grad_norm": 0.15684279799461365, "learning_rate": 0.002, "loss": 2.5741, "step": 133840 }, { "epoch": 0.26665896340685963, "grad_norm": 0.21629616618156433, "learning_rate": 0.002, "loss": 2.5683, "step": 133850 }, { "epoch": 0.2666788856304985, "grad_norm": 0.15437917411327362, "learning_rate": 0.002, "loss": 2.5616, "step": 133860 }, { "epoch": 0.26669880785413747, "grad_norm": 0.1580176204442978, "learning_rate": 0.002, "loss": 2.5581, "step": 133870 }, { "epoch": 0.26671873007777636, "grad_norm": 0.14790278673171997, "learning_rate": 0.002, "loss": 2.5714, "step": 133880 }, { "epoch": 0.2667386523014153, "grad_norm": 0.18049725890159607, "learning_rate": 0.002, "loss": 2.5657, "step": 133890 }, { "epoch": 0.2667585745250542, "grad_norm": 0.14633242785930634, "learning_rate": 0.002, "loss": 2.5651, "step": 133900 }, { "epoch": 0.2667784967486931, "grad_norm": 0.500381588935852, "learning_rate": 0.002, "loss": 2.5546, "step": 133910 }, { "epoch": 0.26679841897233203, "grad_norm": 0.19132818281650543, "learning_rate": 0.002, "loss": 2.5682, "step": 133920 }, { "epoch": 0.2668183411959709, "grad_norm": 0.14984671771526337, "learning_rate": 0.002, "loss": 2.5675, "step": 133930 }, { "epoch": 0.26683826341960987, "grad_norm": 0.15060630440711975, "learning_rate": 0.002, "loss": 2.5512, "step": 133940 }, { "epoch": 0.26685818564324876, "grad_norm": 0.16020874679088593, "learning_rate": 0.002, "loss": 2.5593, "step": 133950 }, { "epoch": 0.26687810786688765, "grad_norm": 0.1561887413263321, "learning_rate": 0.002, "loss": 2.5577, "step": 133960 }, { "epoch": 0.2668980300905266, "grad_norm": 0.1795101910829544, "learning_rate": 0.002, "loss": 2.571, "step": 133970 }, { "epoch": 0.2669179523141655, "grad_norm": 0.18512016534805298, "learning_rate": 0.002, "loss": 2.5522, "step": 133980 }, { "epoch": 0.26693787453780443, "grad_norm": 0.47624263167381287, "learning_rate": 0.002, "loss": 2.5598, "step": 133990 }, { "epoch": 0.2669577967614433, "grad_norm": 0.167728990316391, "learning_rate": 0.002, "loss": 2.5622, "step": 134000 }, { "epoch": 0.2669777189850822, "grad_norm": 0.22711405158042908, "learning_rate": 0.002, "loss": 2.5729, "step": 134010 }, { "epoch": 0.26699764120872116, "grad_norm": 0.15258090198040009, "learning_rate": 0.002, "loss": 2.5639, "step": 134020 }, { "epoch": 0.26701756343236005, "grad_norm": 0.17138583958148956, "learning_rate": 0.002, "loss": 2.5692, "step": 134030 }, { "epoch": 0.267037485655999, "grad_norm": 0.13928738236427307, "learning_rate": 0.002, "loss": 2.5681, "step": 134040 }, { "epoch": 0.2670574078796379, "grad_norm": 0.18909497559070587, "learning_rate": 0.002, "loss": 2.557, "step": 134050 }, { "epoch": 0.26707733010327683, "grad_norm": 0.14187119901180267, "learning_rate": 0.002, "loss": 2.5541, "step": 134060 }, { "epoch": 0.2670972523269157, "grad_norm": 0.18845732510089874, "learning_rate": 0.002, "loss": 2.5441, "step": 134070 }, { "epoch": 0.2671171745505546, "grad_norm": 0.18448743224143982, "learning_rate": 0.002, "loss": 2.5715, "step": 134080 }, { "epoch": 0.26713709677419356, "grad_norm": 0.15738551318645477, "learning_rate": 0.002, "loss": 2.5592, "step": 134090 }, { "epoch": 0.26715701899783245, "grad_norm": 0.17701204121112823, "learning_rate": 0.002, "loss": 2.5503, "step": 134100 }, { "epoch": 0.2671769412214714, "grad_norm": 0.152839794754982, "learning_rate": 0.002, "loss": 2.5715, "step": 134110 }, { "epoch": 0.2671968634451103, "grad_norm": 0.21208158135414124, "learning_rate": 0.002, "loss": 2.5374, "step": 134120 }, { "epoch": 0.2672167856687492, "grad_norm": 0.1670064926147461, "learning_rate": 0.002, "loss": 2.5492, "step": 134130 }, { "epoch": 0.2672367078923881, "grad_norm": 0.1973458081483841, "learning_rate": 0.002, "loss": 2.5575, "step": 134140 }, { "epoch": 0.267256630116027, "grad_norm": 0.1889694631099701, "learning_rate": 0.002, "loss": 2.5673, "step": 134150 }, { "epoch": 0.26727655233966596, "grad_norm": 0.1717890202999115, "learning_rate": 0.002, "loss": 2.575, "step": 134160 }, { "epoch": 0.26729647456330485, "grad_norm": 0.15842188894748688, "learning_rate": 0.002, "loss": 2.5698, "step": 134170 }, { "epoch": 0.26731639678694374, "grad_norm": 0.18251335620880127, "learning_rate": 0.002, "loss": 2.5678, "step": 134180 }, { "epoch": 0.2673363190105827, "grad_norm": 0.16329063475131989, "learning_rate": 0.002, "loss": 2.5695, "step": 134190 }, { "epoch": 0.2673562412342216, "grad_norm": 0.2118118554353714, "learning_rate": 0.002, "loss": 2.5654, "step": 134200 }, { "epoch": 0.2673761634578605, "grad_norm": 0.14748899638652802, "learning_rate": 0.002, "loss": 2.5655, "step": 134210 }, { "epoch": 0.2673960856814994, "grad_norm": 0.18777000904083252, "learning_rate": 0.002, "loss": 2.5766, "step": 134220 }, { "epoch": 0.26741600790513836, "grad_norm": 0.17869989573955536, "learning_rate": 0.002, "loss": 2.5593, "step": 134230 }, { "epoch": 0.26743593012877726, "grad_norm": 0.21212784945964813, "learning_rate": 0.002, "loss": 2.5781, "step": 134240 }, { "epoch": 0.26745585235241615, "grad_norm": 0.1670299917459488, "learning_rate": 0.002, "loss": 2.5681, "step": 134250 }, { "epoch": 0.2674757745760551, "grad_norm": 0.1608980894088745, "learning_rate": 0.002, "loss": 2.5657, "step": 134260 }, { "epoch": 0.267495696799694, "grad_norm": 0.1546105295419693, "learning_rate": 0.002, "loss": 2.5618, "step": 134270 }, { "epoch": 0.26751561902333293, "grad_norm": 0.17647582292556763, "learning_rate": 0.002, "loss": 2.5634, "step": 134280 }, { "epoch": 0.2675355412469718, "grad_norm": 0.16648858785629272, "learning_rate": 0.002, "loss": 2.5643, "step": 134290 }, { "epoch": 0.2675554634706107, "grad_norm": 0.18755000829696655, "learning_rate": 0.002, "loss": 2.5673, "step": 134300 }, { "epoch": 0.26757538569424966, "grad_norm": 0.1776306927204132, "learning_rate": 0.002, "loss": 2.5722, "step": 134310 }, { "epoch": 0.26759530791788855, "grad_norm": 0.17152272164821625, "learning_rate": 0.002, "loss": 2.5657, "step": 134320 }, { "epoch": 0.2676152301415275, "grad_norm": 0.17426259815692902, "learning_rate": 0.002, "loss": 2.5666, "step": 134330 }, { "epoch": 0.2676351523651664, "grad_norm": 0.22202721238136292, "learning_rate": 0.002, "loss": 2.5725, "step": 134340 }, { "epoch": 0.26765507458880533, "grad_norm": 0.16378597915172577, "learning_rate": 0.002, "loss": 2.5716, "step": 134350 }, { "epoch": 0.2676749968124442, "grad_norm": 0.15536585450172424, "learning_rate": 0.002, "loss": 2.5519, "step": 134360 }, { "epoch": 0.2676949190360831, "grad_norm": 0.16225726902484894, "learning_rate": 0.002, "loss": 2.5621, "step": 134370 }, { "epoch": 0.26771484125972206, "grad_norm": 0.17001841962337494, "learning_rate": 0.002, "loss": 2.5706, "step": 134380 }, { "epoch": 0.26773476348336095, "grad_norm": 0.17271007597446442, "learning_rate": 0.002, "loss": 2.5603, "step": 134390 }, { "epoch": 0.2677546857069999, "grad_norm": 0.1673114001750946, "learning_rate": 0.002, "loss": 2.577, "step": 134400 }, { "epoch": 0.2677746079306388, "grad_norm": 0.19296567142009735, "learning_rate": 0.002, "loss": 2.5669, "step": 134410 }, { "epoch": 0.2677945301542777, "grad_norm": 0.16330327093601227, "learning_rate": 0.002, "loss": 2.5743, "step": 134420 }, { "epoch": 0.2678144523779166, "grad_norm": 0.1900511234998703, "learning_rate": 0.002, "loss": 2.5645, "step": 134430 }, { "epoch": 0.2678343746015555, "grad_norm": 0.16460156440734863, "learning_rate": 0.002, "loss": 2.5462, "step": 134440 }, { "epoch": 0.26785429682519446, "grad_norm": 0.17771750688552856, "learning_rate": 0.002, "loss": 2.5584, "step": 134450 }, { "epoch": 0.26787421904883335, "grad_norm": 0.14352376759052277, "learning_rate": 0.002, "loss": 2.5678, "step": 134460 }, { "epoch": 0.26789414127247224, "grad_norm": 0.2329074889421463, "learning_rate": 0.002, "loss": 2.5632, "step": 134470 }, { "epoch": 0.2679140634961112, "grad_norm": 0.1668931543827057, "learning_rate": 0.002, "loss": 2.5753, "step": 134480 }, { "epoch": 0.2679339857197501, "grad_norm": 0.15106382966041565, "learning_rate": 0.002, "loss": 2.549, "step": 134490 }, { "epoch": 0.267953907943389, "grad_norm": 0.16097760200500488, "learning_rate": 0.002, "loss": 2.5501, "step": 134500 }, { "epoch": 0.2679738301670279, "grad_norm": 0.1588696837425232, "learning_rate": 0.002, "loss": 2.5632, "step": 134510 }, { "epoch": 0.26799375239066686, "grad_norm": 0.1521788388490677, "learning_rate": 0.002, "loss": 2.5523, "step": 134520 }, { "epoch": 0.26801367461430575, "grad_norm": 0.17612789571285248, "learning_rate": 0.002, "loss": 2.5604, "step": 134530 }, { "epoch": 0.26803359683794464, "grad_norm": 0.1828598827123642, "learning_rate": 0.002, "loss": 2.5615, "step": 134540 }, { "epoch": 0.2680535190615836, "grad_norm": 0.1536654531955719, "learning_rate": 0.002, "loss": 2.5503, "step": 134550 }, { "epoch": 0.2680734412852225, "grad_norm": 0.18509770929813385, "learning_rate": 0.002, "loss": 2.5626, "step": 134560 }, { "epoch": 0.2680933635088614, "grad_norm": 0.1546768993139267, "learning_rate": 0.002, "loss": 2.5583, "step": 134570 }, { "epoch": 0.2681132857325003, "grad_norm": 0.1694307029247284, "learning_rate": 0.002, "loss": 2.5711, "step": 134580 }, { "epoch": 0.2681332079561392, "grad_norm": 0.16646532714366913, "learning_rate": 0.002, "loss": 2.5674, "step": 134590 }, { "epoch": 0.26815313017977815, "grad_norm": 0.14088812470436096, "learning_rate": 0.002, "loss": 2.5663, "step": 134600 }, { "epoch": 0.26817305240341704, "grad_norm": 0.16935281455516815, "learning_rate": 0.002, "loss": 2.5821, "step": 134610 }, { "epoch": 0.268192974627056, "grad_norm": 0.15461547672748566, "learning_rate": 0.002, "loss": 2.5734, "step": 134620 }, { "epoch": 0.2682128968506949, "grad_norm": 0.17842598259449005, "learning_rate": 0.002, "loss": 2.5593, "step": 134630 }, { "epoch": 0.26823281907433383, "grad_norm": 0.1712278127670288, "learning_rate": 0.002, "loss": 2.5605, "step": 134640 }, { "epoch": 0.2682527412979727, "grad_norm": 0.19257569313049316, "learning_rate": 0.002, "loss": 2.5752, "step": 134650 }, { "epoch": 0.2682726635216116, "grad_norm": 0.16688647866249084, "learning_rate": 0.002, "loss": 2.5571, "step": 134660 }, { "epoch": 0.26829258574525056, "grad_norm": 0.17188821732997894, "learning_rate": 0.002, "loss": 2.5509, "step": 134670 }, { "epoch": 0.26831250796888945, "grad_norm": 0.1740586757659912, "learning_rate": 0.002, "loss": 2.5631, "step": 134680 }, { "epoch": 0.2683324301925284, "grad_norm": 0.1932525783777237, "learning_rate": 0.002, "loss": 2.5602, "step": 134690 }, { "epoch": 0.2683523524161673, "grad_norm": 0.17237116396427155, "learning_rate": 0.002, "loss": 2.581, "step": 134700 }, { "epoch": 0.2683722746398062, "grad_norm": 0.19010794162750244, "learning_rate": 0.002, "loss": 2.5699, "step": 134710 }, { "epoch": 0.2683921968634451, "grad_norm": 0.16345880925655365, "learning_rate": 0.002, "loss": 2.5602, "step": 134720 }, { "epoch": 0.268412119087084, "grad_norm": 0.15932154655456543, "learning_rate": 0.002, "loss": 2.5652, "step": 134730 }, { "epoch": 0.26843204131072296, "grad_norm": 0.21012672781944275, "learning_rate": 0.002, "loss": 2.5535, "step": 134740 }, { "epoch": 0.26845196353436185, "grad_norm": 0.15164591372013092, "learning_rate": 0.002, "loss": 2.5624, "step": 134750 }, { "epoch": 0.26847188575800074, "grad_norm": 0.1687682867050171, "learning_rate": 0.002, "loss": 2.5486, "step": 134760 }, { "epoch": 0.2684918079816397, "grad_norm": 0.18132063746452332, "learning_rate": 0.002, "loss": 2.5655, "step": 134770 }, { "epoch": 0.2685117302052786, "grad_norm": 0.18844367563724518, "learning_rate": 0.002, "loss": 2.5625, "step": 134780 }, { "epoch": 0.2685316524289175, "grad_norm": 0.1750982254743576, "learning_rate": 0.002, "loss": 2.5753, "step": 134790 }, { "epoch": 0.2685515746525564, "grad_norm": 0.14894507825374603, "learning_rate": 0.002, "loss": 2.5729, "step": 134800 }, { "epoch": 0.26857149687619536, "grad_norm": 0.15080957114696503, "learning_rate": 0.002, "loss": 2.5662, "step": 134810 }, { "epoch": 0.26859141909983425, "grad_norm": 0.17452940344810486, "learning_rate": 0.002, "loss": 2.5594, "step": 134820 }, { "epoch": 0.26861134132347314, "grad_norm": 0.16564342379570007, "learning_rate": 0.002, "loss": 2.5558, "step": 134830 }, { "epoch": 0.2686312635471121, "grad_norm": 0.1636650264263153, "learning_rate": 0.002, "loss": 2.5729, "step": 134840 }, { "epoch": 0.268651185770751, "grad_norm": 0.15805551409721375, "learning_rate": 0.002, "loss": 2.5485, "step": 134850 }, { "epoch": 0.2686711079943899, "grad_norm": 0.16596949100494385, "learning_rate": 0.002, "loss": 2.5869, "step": 134860 }, { "epoch": 0.2686910302180288, "grad_norm": 0.16833189129829407, "learning_rate": 0.002, "loss": 2.5798, "step": 134870 }, { "epoch": 0.2687109524416677, "grad_norm": 0.17626002430915833, "learning_rate": 0.002, "loss": 2.575, "step": 134880 }, { "epoch": 0.26873087466530665, "grad_norm": 0.16276417672634125, "learning_rate": 0.002, "loss": 2.5474, "step": 134890 }, { "epoch": 0.26875079688894554, "grad_norm": 0.1754242181777954, "learning_rate": 0.002, "loss": 2.5572, "step": 134900 }, { "epoch": 0.2687707191125845, "grad_norm": 0.15520904958248138, "learning_rate": 0.002, "loss": 2.5502, "step": 134910 }, { "epoch": 0.2687906413362234, "grad_norm": 0.1563774198293686, "learning_rate": 0.002, "loss": 2.5732, "step": 134920 }, { "epoch": 0.26881056355986227, "grad_norm": 0.16629330813884735, "learning_rate": 0.002, "loss": 2.5684, "step": 134930 }, { "epoch": 0.2688304857835012, "grad_norm": 0.19581004977226257, "learning_rate": 0.002, "loss": 2.5626, "step": 134940 }, { "epoch": 0.2688504080071401, "grad_norm": 0.1737951934337616, "learning_rate": 0.002, "loss": 2.5596, "step": 134950 }, { "epoch": 0.26887033023077905, "grad_norm": 0.1502337008714676, "learning_rate": 0.002, "loss": 2.5824, "step": 134960 }, { "epoch": 0.26889025245441794, "grad_norm": 0.14995257556438446, "learning_rate": 0.002, "loss": 2.5742, "step": 134970 }, { "epoch": 0.2689101746780569, "grad_norm": 0.17198075354099274, "learning_rate": 0.002, "loss": 2.5515, "step": 134980 }, { "epoch": 0.2689300969016958, "grad_norm": 0.6429318189620972, "learning_rate": 0.002, "loss": 2.559, "step": 134990 }, { "epoch": 0.26895001912533467, "grad_norm": 0.15053479373455048, "learning_rate": 0.002, "loss": 2.5605, "step": 135000 }, { "epoch": 0.2689699413489736, "grad_norm": 0.1510578840970993, "learning_rate": 0.002, "loss": 2.5687, "step": 135010 }, { "epoch": 0.2689898635726125, "grad_norm": 0.21036870777606964, "learning_rate": 0.002, "loss": 2.5649, "step": 135020 }, { "epoch": 0.26900978579625145, "grad_norm": 0.1904321163892746, "learning_rate": 0.002, "loss": 2.5661, "step": 135030 }, { "epoch": 0.26902970801989035, "grad_norm": 0.17092885076999664, "learning_rate": 0.002, "loss": 2.5715, "step": 135040 }, { "epoch": 0.26904963024352924, "grad_norm": 0.1787658929824829, "learning_rate": 0.002, "loss": 2.5804, "step": 135050 }, { "epoch": 0.2690695524671682, "grad_norm": 0.16710004210472107, "learning_rate": 0.002, "loss": 2.5557, "step": 135060 }, { "epoch": 0.2690894746908071, "grad_norm": 0.15617702901363373, "learning_rate": 0.002, "loss": 2.5663, "step": 135070 }, { "epoch": 0.269109396914446, "grad_norm": 0.1685962975025177, "learning_rate": 0.002, "loss": 2.5588, "step": 135080 }, { "epoch": 0.2691293191380849, "grad_norm": 0.1964014768600464, "learning_rate": 0.002, "loss": 2.5758, "step": 135090 }, { "epoch": 0.26914924136172386, "grad_norm": 0.15578541159629822, "learning_rate": 0.002, "loss": 2.5587, "step": 135100 }, { "epoch": 0.26916916358536275, "grad_norm": 0.1466100513935089, "learning_rate": 0.002, "loss": 2.5609, "step": 135110 }, { "epoch": 0.26918908580900164, "grad_norm": 0.16371069848537445, "learning_rate": 0.002, "loss": 2.5697, "step": 135120 }, { "epoch": 0.2692090080326406, "grad_norm": 0.18994703888893127, "learning_rate": 0.002, "loss": 2.5488, "step": 135130 }, { "epoch": 0.2692289302562795, "grad_norm": 0.1348075121641159, "learning_rate": 0.002, "loss": 2.5571, "step": 135140 }, { "epoch": 0.2692488524799184, "grad_norm": 0.16074563562870026, "learning_rate": 0.002, "loss": 2.5838, "step": 135150 }, { "epoch": 0.2692687747035573, "grad_norm": 0.18040087819099426, "learning_rate": 0.002, "loss": 2.5751, "step": 135160 }, { "epoch": 0.2692886969271962, "grad_norm": 0.2031852900981903, "learning_rate": 0.002, "loss": 2.5803, "step": 135170 }, { "epoch": 0.26930861915083515, "grad_norm": 0.17943540215492249, "learning_rate": 0.002, "loss": 2.5596, "step": 135180 }, { "epoch": 0.26932854137447404, "grad_norm": 0.15456274151802063, "learning_rate": 0.002, "loss": 2.559, "step": 135190 }, { "epoch": 0.269348463598113, "grad_norm": 0.16246874630451202, "learning_rate": 0.002, "loss": 2.5558, "step": 135200 }, { "epoch": 0.2693683858217519, "grad_norm": 0.1621769368648529, "learning_rate": 0.002, "loss": 2.5686, "step": 135210 }, { "epoch": 0.26938830804539077, "grad_norm": 0.16372498869895935, "learning_rate": 0.002, "loss": 2.557, "step": 135220 }, { "epoch": 0.2694082302690297, "grad_norm": 0.17367927730083466, "learning_rate": 0.002, "loss": 2.5695, "step": 135230 }, { "epoch": 0.2694281524926686, "grad_norm": 0.18403498828411102, "learning_rate": 0.002, "loss": 2.5682, "step": 135240 }, { "epoch": 0.26944807471630755, "grad_norm": 0.18680283427238464, "learning_rate": 0.002, "loss": 2.5604, "step": 135250 }, { "epoch": 0.26946799693994644, "grad_norm": 0.15915945172309875, "learning_rate": 0.002, "loss": 2.5793, "step": 135260 }, { "epoch": 0.2694879191635854, "grad_norm": 0.16710403561592102, "learning_rate": 0.002, "loss": 2.5768, "step": 135270 }, { "epoch": 0.2695078413872243, "grad_norm": 0.20226772129535675, "learning_rate": 0.002, "loss": 2.5691, "step": 135280 }, { "epoch": 0.26952776361086317, "grad_norm": 0.13754497468471527, "learning_rate": 0.002, "loss": 2.5729, "step": 135290 }, { "epoch": 0.2695476858345021, "grad_norm": 0.16517208516597748, "learning_rate": 0.002, "loss": 2.5531, "step": 135300 }, { "epoch": 0.269567608058141, "grad_norm": 0.20782272517681122, "learning_rate": 0.002, "loss": 2.5807, "step": 135310 }, { "epoch": 0.26958753028177995, "grad_norm": 0.15772034227848053, "learning_rate": 0.002, "loss": 2.5595, "step": 135320 }, { "epoch": 0.26960745250541884, "grad_norm": 0.148477241396904, "learning_rate": 0.002, "loss": 2.5532, "step": 135330 }, { "epoch": 0.26962737472905773, "grad_norm": 0.15043331682682037, "learning_rate": 0.002, "loss": 2.5652, "step": 135340 }, { "epoch": 0.2696472969526967, "grad_norm": 0.2118658870458603, "learning_rate": 0.002, "loss": 2.5628, "step": 135350 }, { "epoch": 0.26966721917633557, "grad_norm": 0.17325296998023987, "learning_rate": 0.002, "loss": 2.5614, "step": 135360 }, { "epoch": 0.2696871413999745, "grad_norm": 0.1479720175266266, "learning_rate": 0.002, "loss": 2.56, "step": 135370 }, { "epoch": 0.2697070636236134, "grad_norm": 0.16600088775157928, "learning_rate": 0.002, "loss": 2.5598, "step": 135380 }, { "epoch": 0.26972698584725235, "grad_norm": 0.19861865043640137, "learning_rate": 0.002, "loss": 2.5638, "step": 135390 }, { "epoch": 0.26974690807089124, "grad_norm": 0.16857413947582245, "learning_rate": 0.002, "loss": 2.5574, "step": 135400 }, { "epoch": 0.26976683029453014, "grad_norm": 0.1688011884689331, "learning_rate": 0.002, "loss": 2.5455, "step": 135410 }, { "epoch": 0.2697867525181691, "grad_norm": 0.17542961239814758, "learning_rate": 0.002, "loss": 2.5668, "step": 135420 }, { "epoch": 0.269806674741808, "grad_norm": 0.17148934304714203, "learning_rate": 0.002, "loss": 2.5757, "step": 135430 }, { "epoch": 0.2698265969654469, "grad_norm": 0.19316458702087402, "learning_rate": 0.002, "loss": 2.5755, "step": 135440 }, { "epoch": 0.2698465191890858, "grad_norm": 0.15542040765285492, "learning_rate": 0.002, "loss": 2.5727, "step": 135450 }, { "epoch": 0.2698664414127247, "grad_norm": 0.14904381334781647, "learning_rate": 0.002, "loss": 2.558, "step": 135460 }, { "epoch": 0.26988636363636365, "grad_norm": 0.156570166349411, "learning_rate": 0.002, "loss": 2.5574, "step": 135470 }, { "epoch": 0.26990628586000254, "grad_norm": 0.18158069252967834, "learning_rate": 0.002, "loss": 2.5521, "step": 135480 }, { "epoch": 0.2699262080836415, "grad_norm": 0.16673055291175842, "learning_rate": 0.002, "loss": 2.5479, "step": 135490 }, { "epoch": 0.2699461303072804, "grad_norm": 0.16301700472831726, "learning_rate": 0.002, "loss": 2.5673, "step": 135500 }, { "epoch": 0.26996605253091926, "grad_norm": 0.18055109679698944, "learning_rate": 0.002, "loss": 2.5832, "step": 135510 }, { "epoch": 0.2699859747545582, "grad_norm": 0.21099688112735748, "learning_rate": 0.002, "loss": 2.552, "step": 135520 }, { "epoch": 0.2700058969781971, "grad_norm": 0.15360595285892487, "learning_rate": 0.002, "loss": 2.5558, "step": 135530 }, { "epoch": 0.27002581920183605, "grad_norm": 0.17672386765480042, "learning_rate": 0.002, "loss": 2.5571, "step": 135540 }, { "epoch": 0.27004574142547494, "grad_norm": 0.19335055351257324, "learning_rate": 0.002, "loss": 2.5538, "step": 135550 }, { "epoch": 0.2700656636491139, "grad_norm": 0.16126276552677155, "learning_rate": 0.002, "loss": 2.569, "step": 135560 }, { "epoch": 0.2700855858727528, "grad_norm": 0.17940419912338257, "learning_rate": 0.002, "loss": 2.5717, "step": 135570 }, { "epoch": 0.27010550809639167, "grad_norm": 0.16666007041931152, "learning_rate": 0.002, "loss": 2.5619, "step": 135580 }, { "epoch": 0.2701254303200306, "grad_norm": 0.13993211090564728, "learning_rate": 0.002, "loss": 2.5634, "step": 135590 }, { "epoch": 0.2701453525436695, "grad_norm": 0.16354452073574066, "learning_rate": 0.002, "loss": 2.548, "step": 135600 }, { "epoch": 0.27016527476730845, "grad_norm": 0.17659756541252136, "learning_rate": 0.002, "loss": 2.5676, "step": 135610 }, { "epoch": 0.27018519699094734, "grad_norm": 0.17747557163238525, "learning_rate": 0.002, "loss": 2.5643, "step": 135620 }, { "epoch": 0.27020511921458623, "grad_norm": 0.1856164038181305, "learning_rate": 0.002, "loss": 2.5544, "step": 135630 }, { "epoch": 0.2702250414382252, "grad_norm": 0.1683865189552307, "learning_rate": 0.002, "loss": 2.5569, "step": 135640 }, { "epoch": 0.27024496366186407, "grad_norm": 0.1577645242214203, "learning_rate": 0.002, "loss": 2.5578, "step": 135650 }, { "epoch": 0.270264885885503, "grad_norm": 0.18466590344905853, "learning_rate": 0.002, "loss": 2.5651, "step": 135660 }, { "epoch": 0.2702848081091419, "grad_norm": 0.1552349179983139, "learning_rate": 0.002, "loss": 2.5551, "step": 135670 }, { "epoch": 0.2703047303327808, "grad_norm": 0.16918213665485382, "learning_rate": 0.002, "loss": 2.5758, "step": 135680 }, { "epoch": 0.27032465255641974, "grad_norm": 0.16454051434993744, "learning_rate": 0.002, "loss": 2.5572, "step": 135690 }, { "epoch": 0.27034457478005863, "grad_norm": 0.17628975212574005, "learning_rate": 0.002, "loss": 2.5643, "step": 135700 }, { "epoch": 0.2703644970036976, "grad_norm": 0.17457258701324463, "learning_rate": 0.002, "loss": 2.5635, "step": 135710 }, { "epoch": 0.27038441922733647, "grad_norm": 0.18359677493572235, "learning_rate": 0.002, "loss": 2.5643, "step": 135720 }, { "epoch": 0.2704043414509754, "grad_norm": 0.2039678692817688, "learning_rate": 0.002, "loss": 2.5529, "step": 135730 }, { "epoch": 0.2704242636746143, "grad_norm": 0.15495119988918304, "learning_rate": 0.002, "loss": 2.5634, "step": 135740 }, { "epoch": 0.2704441858982532, "grad_norm": 0.15641407668590546, "learning_rate": 0.002, "loss": 2.5532, "step": 135750 }, { "epoch": 0.27046410812189214, "grad_norm": 0.15544818341732025, "learning_rate": 0.002, "loss": 2.5491, "step": 135760 }, { "epoch": 0.27048403034553103, "grad_norm": 0.15567553043365479, "learning_rate": 0.002, "loss": 2.575, "step": 135770 }, { "epoch": 0.27050395256917, "grad_norm": 0.20403796434402466, "learning_rate": 0.002, "loss": 2.5627, "step": 135780 }, { "epoch": 0.27052387479280887, "grad_norm": 0.16403113305568695, "learning_rate": 0.002, "loss": 2.5639, "step": 135790 }, { "epoch": 0.27054379701644776, "grad_norm": 0.15259231626987457, "learning_rate": 0.002, "loss": 2.5653, "step": 135800 }, { "epoch": 0.2705637192400867, "grad_norm": 0.16530363261699677, "learning_rate": 0.002, "loss": 2.5515, "step": 135810 }, { "epoch": 0.2705836414637256, "grad_norm": 0.16540639102458954, "learning_rate": 0.002, "loss": 2.5582, "step": 135820 }, { "epoch": 0.27060356368736455, "grad_norm": 0.15596194565296173, "learning_rate": 0.002, "loss": 2.5724, "step": 135830 }, { "epoch": 0.27062348591100344, "grad_norm": 0.17452961206436157, "learning_rate": 0.002, "loss": 2.5634, "step": 135840 }, { "epoch": 0.2706434081346424, "grad_norm": 0.16481050848960876, "learning_rate": 0.002, "loss": 2.5554, "step": 135850 }, { "epoch": 0.2706633303582813, "grad_norm": 0.16037438809871674, "learning_rate": 0.002, "loss": 2.568, "step": 135860 }, { "epoch": 0.27068325258192016, "grad_norm": 0.18493686616420746, "learning_rate": 0.002, "loss": 2.5635, "step": 135870 }, { "epoch": 0.2707031748055591, "grad_norm": 0.13234302401542664, "learning_rate": 0.002, "loss": 2.5624, "step": 135880 }, { "epoch": 0.270723097029198, "grad_norm": 0.2168886512517929, "learning_rate": 0.002, "loss": 2.554, "step": 135890 }, { "epoch": 0.27074301925283695, "grad_norm": 0.18003402650356293, "learning_rate": 0.002, "loss": 2.5797, "step": 135900 }, { "epoch": 0.27076294147647584, "grad_norm": 0.14325012266635895, "learning_rate": 0.002, "loss": 2.5658, "step": 135910 }, { "epoch": 0.27078286370011473, "grad_norm": 0.22104144096374512, "learning_rate": 0.002, "loss": 2.565, "step": 135920 }, { "epoch": 0.2708027859237537, "grad_norm": 0.17139145731925964, "learning_rate": 0.002, "loss": 2.5609, "step": 135930 }, { "epoch": 0.27082270814739257, "grad_norm": 0.18123884499073029, "learning_rate": 0.002, "loss": 2.5733, "step": 135940 }, { "epoch": 0.2708426303710315, "grad_norm": 0.1826089471578598, "learning_rate": 0.002, "loss": 2.5549, "step": 135950 }, { "epoch": 0.2708625525946704, "grad_norm": 0.1812889277935028, "learning_rate": 0.002, "loss": 2.5542, "step": 135960 }, { "epoch": 0.2708824748183093, "grad_norm": 0.14924569427967072, "learning_rate": 0.002, "loss": 2.5567, "step": 135970 }, { "epoch": 0.27090239704194824, "grad_norm": 0.24643220007419586, "learning_rate": 0.002, "loss": 2.5613, "step": 135980 }, { "epoch": 0.27092231926558713, "grad_norm": 0.14890070259571075, "learning_rate": 0.002, "loss": 2.5623, "step": 135990 }, { "epoch": 0.2709422414892261, "grad_norm": 0.1475580930709839, "learning_rate": 0.002, "loss": 2.5639, "step": 136000 }, { "epoch": 0.27096216371286497, "grad_norm": 0.17986012995243073, "learning_rate": 0.002, "loss": 2.5494, "step": 136010 }, { "epoch": 0.2709820859365039, "grad_norm": 0.15459509193897247, "learning_rate": 0.002, "loss": 2.5624, "step": 136020 }, { "epoch": 0.2710020081601428, "grad_norm": 0.14710307121276855, "learning_rate": 0.002, "loss": 2.5628, "step": 136030 }, { "epoch": 0.2710219303837817, "grad_norm": 0.3995579481124878, "learning_rate": 0.002, "loss": 2.5584, "step": 136040 }, { "epoch": 0.27104185260742064, "grad_norm": 0.15261614322662354, "learning_rate": 0.002, "loss": 2.5823, "step": 136050 }, { "epoch": 0.27106177483105953, "grad_norm": 0.15538930892944336, "learning_rate": 0.002, "loss": 2.5736, "step": 136060 }, { "epoch": 0.2710816970546985, "grad_norm": 0.17713700234889984, "learning_rate": 0.002, "loss": 2.5739, "step": 136070 }, { "epoch": 0.27110161927833737, "grad_norm": 0.16144151985645294, "learning_rate": 0.002, "loss": 2.5641, "step": 136080 }, { "epoch": 0.27112154150197626, "grad_norm": 0.16213348507881165, "learning_rate": 0.002, "loss": 2.5706, "step": 136090 }, { "epoch": 0.2711414637256152, "grad_norm": 0.18956439197063446, "learning_rate": 0.002, "loss": 2.5744, "step": 136100 }, { "epoch": 0.2711613859492541, "grad_norm": 0.14010180532932281, "learning_rate": 0.002, "loss": 2.5563, "step": 136110 }, { "epoch": 0.27118130817289304, "grad_norm": 0.17177879810333252, "learning_rate": 0.002, "loss": 2.5717, "step": 136120 }, { "epoch": 0.27120123039653193, "grad_norm": 0.1707640141248703, "learning_rate": 0.002, "loss": 2.5585, "step": 136130 }, { "epoch": 0.2712211526201709, "grad_norm": 0.19204024970531464, "learning_rate": 0.002, "loss": 2.5662, "step": 136140 }, { "epoch": 0.27124107484380977, "grad_norm": 0.15798328816890717, "learning_rate": 0.002, "loss": 2.5705, "step": 136150 }, { "epoch": 0.27126099706744866, "grad_norm": 0.15596076846122742, "learning_rate": 0.002, "loss": 2.5709, "step": 136160 }, { "epoch": 0.2712809192910876, "grad_norm": 0.17257875204086304, "learning_rate": 0.002, "loss": 2.5803, "step": 136170 }, { "epoch": 0.2713008415147265, "grad_norm": 0.15873141586780548, "learning_rate": 0.002, "loss": 2.5659, "step": 136180 }, { "epoch": 0.27132076373836544, "grad_norm": 0.1605065017938614, "learning_rate": 0.002, "loss": 2.5668, "step": 136190 }, { "epoch": 0.27134068596200434, "grad_norm": 0.15917715430259705, "learning_rate": 0.002, "loss": 2.5756, "step": 136200 }, { "epoch": 0.2713606081856432, "grad_norm": 0.15917102992534637, "learning_rate": 0.002, "loss": 2.5725, "step": 136210 }, { "epoch": 0.2713805304092822, "grad_norm": 0.16469548642635345, "learning_rate": 0.002, "loss": 2.5615, "step": 136220 }, { "epoch": 0.27140045263292106, "grad_norm": 0.1920800805091858, "learning_rate": 0.002, "loss": 2.5595, "step": 136230 }, { "epoch": 0.27142037485656, "grad_norm": 0.16159948706626892, "learning_rate": 0.002, "loss": 2.5699, "step": 136240 }, { "epoch": 0.2714402970801989, "grad_norm": 0.1786143183708191, "learning_rate": 0.002, "loss": 2.5767, "step": 136250 }, { "epoch": 0.2714602193038378, "grad_norm": 0.18081800639629364, "learning_rate": 0.002, "loss": 2.5612, "step": 136260 }, { "epoch": 0.27148014152747674, "grad_norm": 0.17939898371696472, "learning_rate": 0.002, "loss": 2.5735, "step": 136270 }, { "epoch": 0.27150006375111563, "grad_norm": 0.16778458654880524, "learning_rate": 0.002, "loss": 2.5647, "step": 136280 }, { "epoch": 0.2715199859747546, "grad_norm": 0.1634594351053238, "learning_rate": 0.002, "loss": 2.5742, "step": 136290 }, { "epoch": 0.27153990819839346, "grad_norm": 0.16163627803325653, "learning_rate": 0.002, "loss": 2.5603, "step": 136300 }, { "epoch": 0.2715598304220324, "grad_norm": 0.18858514726161957, "learning_rate": 0.002, "loss": 2.5722, "step": 136310 }, { "epoch": 0.2715797526456713, "grad_norm": 0.1515595018863678, "learning_rate": 0.002, "loss": 2.5792, "step": 136320 }, { "epoch": 0.2715996748693102, "grad_norm": 0.16998903453350067, "learning_rate": 0.002, "loss": 2.5475, "step": 136330 }, { "epoch": 0.27161959709294914, "grad_norm": 0.1881949007511139, "learning_rate": 0.002, "loss": 2.5727, "step": 136340 }, { "epoch": 0.27163951931658803, "grad_norm": 0.1664326936006546, "learning_rate": 0.002, "loss": 2.5646, "step": 136350 }, { "epoch": 0.271659441540227, "grad_norm": 0.1454952210187912, "learning_rate": 0.002, "loss": 2.5548, "step": 136360 }, { "epoch": 0.27167936376386587, "grad_norm": 0.1717023402452469, "learning_rate": 0.002, "loss": 2.5636, "step": 136370 }, { "epoch": 0.27169928598750476, "grad_norm": 0.1946565806865692, "learning_rate": 0.002, "loss": 2.5632, "step": 136380 }, { "epoch": 0.2717192082111437, "grad_norm": 0.1818363219499588, "learning_rate": 0.002, "loss": 2.5669, "step": 136390 }, { "epoch": 0.2717391304347826, "grad_norm": 0.1560204178094864, "learning_rate": 0.002, "loss": 2.5513, "step": 136400 }, { "epoch": 0.27175905265842154, "grad_norm": 0.16932085156440735, "learning_rate": 0.002, "loss": 2.5612, "step": 136410 }, { "epoch": 0.27177897488206043, "grad_norm": 0.16673685610294342, "learning_rate": 0.002, "loss": 2.5568, "step": 136420 }, { "epoch": 0.2717988971056994, "grad_norm": 0.14351807534694672, "learning_rate": 0.002, "loss": 2.559, "step": 136430 }, { "epoch": 0.27181881932933827, "grad_norm": 0.14798341691493988, "learning_rate": 0.002, "loss": 2.5665, "step": 136440 }, { "epoch": 0.27183874155297716, "grad_norm": 0.1681973785161972, "learning_rate": 0.002, "loss": 2.57, "step": 136450 }, { "epoch": 0.2718586637766161, "grad_norm": 0.2182823121547699, "learning_rate": 0.002, "loss": 2.5588, "step": 136460 }, { "epoch": 0.271878586000255, "grad_norm": 0.1459421068429947, "learning_rate": 0.002, "loss": 2.5619, "step": 136470 }, { "epoch": 0.27189850822389394, "grad_norm": 0.14725813269615173, "learning_rate": 0.002, "loss": 2.553, "step": 136480 }, { "epoch": 0.27191843044753283, "grad_norm": 0.13588280975818634, "learning_rate": 0.002, "loss": 2.5535, "step": 136490 }, { "epoch": 0.2719383526711717, "grad_norm": 0.19449397921562195, "learning_rate": 0.002, "loss": 2.567, "step": 136500 }, { "epoch": 0.27195827489481067, "grad_norm": 0.1640390157699585, "learning_rate": 0.002, "loss": 2.5562, "step": 136510 }, { "epoch": 0.27197819711844956, "grad_norm": 0.1412353366613388, "learning_rate": 0.002, "loss": 2.5676, "step": 136520 }, { "epoch": 0.2719981193420885, "grad_norm": 0.17212975025177002, "learning_rate": 0.002, "loss": 2.5562, "step": 136530 }, { "epoch": 0.2720180415657274, "grad_norm": 0.15397626161575317, "learning_rate": 0.002, "loss": 2.5704, "step": 136540 }, { "epoch": 0.2720379637893663, "grad_norm": 0.18960343301296234, "learning_rate": 0.002, "loss": 2.5494, "step": 136550 }, { "epoch": 0.27205788601300523, "grad_norm": 0.19337846338748932, "learning_rate": 0.002, "loss": 2.5594, "step": 136560 }, { "epoch": 0.2720778082366441, "grad_norm": 0.15983495116233826, "learning_rate": 0.002, "loss": 2.5644, "step": 136570 }, { "epoch": 0.27209773046028307, "grad_norm": 0.18226686120033264, "learning_rate": 0.002, "loss": 2.5807, "step": 136580 }, { "epoch": 0.27211765268392196, "grad_norm": 0.1682508885860443, "learning_rate": 0.002, "loss": 2.5736, "step": 136590 }, { "epoch": 0.2721375749075609, "grad_norm": 0.17174573242664337, "learning_rate": 0.002, "loss": 2.5508, "step": 136600 }, { "epoch": 0.2721574971311998, "grad_norm": 0.2889564037322998, "learning_rate": 0.002, "loss": 2.5539, "step": 136610 }, { "epoch": 0.2721774193548387, "grad_norm": 0.20588497817516327, "learning_rate": 0.002, "loss": 2.5532, "step": 136620 }, { "epoch": 0.27219734157847764, "grad_norm": 0.14818818867206573, "learning_rate": 0.002, "loss": 2.5686, "step": 136630 }, { "epoch": 0.2722172638021165, "grad_norm": 0.15479092299938202, "learning_rate": 0.002, "loss": 2.5496, "step": 136640 }, { "epoch": 0.2722371860257555, "grad_norm": 0.17687666416168213, "learning_rate": 0.002, "loss": 2.5589, "step": 136650 }, { "epoch": 0.27225710824939436, "grad_norm": 0.1746886670589447, "learning_rate": 0.002, "loss": 2.5527, "step": 136660 }, { "epoch": 0.27227703047303325, "grad_norm": 0.19280102849006653, "learning_rate": 0.002, "loss": 2.5659, "step": 136670 }, { "epoch": 0.2722969526966722, "grad_norm": 0.19682125747203827, "learning_rate": 0.002, "loss": 2.5667, "step": 136680 }, { "epoch": 0.2723168749203111, "grad_norm": 0.15646371245384216, "learning_rate": 0.002, "loss": 2.5738, "step": 136690 }, { "epoch": 0.27233679714395004, "grad_norm": 0.13978834450244904, "learning_rate": 0.002, "loss": 2.5647, "step": 136700 }, { "epoch": 0.27235671936758893, "grad_norm": 0.19855114817619324, "learning_rate": 0.002, "loss": 2.5559, "step": 136710 }, { "epoch": 0.2723766415912278, "grad_norm": 0.1734199970960617, "learning_rate": 0.002, "loss": 2.5594, "step": 136720 }, { "epoch": 0.27239656381486677, "grad_norm": 0.14195796847343445, "learning_rate": 0.002, "loss": 2.5605, "step": 136730 }, { "epoch": 0.27241648603850566, "grad_norm": 0.15878815948963165, "learning_rate": 0.002, "loss": 2.558, "step": 136740 }, { "epoch": 0.2724364082621446, "grad_norm": 0.2272043526172638, "learning_rate": 0.002, "loss": 2.5635, "step": 136750 }, { "epoch": 0.2724563304857835, "grad_norm": 0.16116952896118164, "learning_rate": 0.002, "loss": 2.5567, "step": 136760 }, { "epoch": 0.27247625270942244, "grad_norm": 0.19517751038074493, "learning_rate": 0.002, "loss": 2.563, "step": 136770 }, { "epoch": 0.27249617493306133, "grad_norm": 0.14678364992141724, "learning_rate": 0.002, "loss": 2.5764, "step": 136780 }, { "epoch": 0.2725160971567002, "grad_norm": 0.18189509212970734, "learning_rate": 0.002, "loss": 2.5742, "step": 136790 }, { "epoch": 0.27253601938033917, "grad_norm": 0.18256519734859467, "learning_rate": 0.002, "loss": 2.5607, "step": 136800 }, { "epoch": 0.27255594160397806, "grad_norm": 0.18906423449516296, "learning_rate": 0.002, "loss": 2.5675, "step": 136810 }, { "epoch": 0.272575863827617, "grad_norm": 0.14962810277938843, "learning_rate": 0.002, "loss": 2.5629, "step": 136820 }, { "epoch": 0.2725957860512559, "grad_norm": 0.1500730663537979, "learning_rate": 0.002, "loss": 2.5506, "step": 136830 }, { "epoch": 0.2726157082748948, "grad_norm": 0.1835772693157196, "learning_rate": 0.002, "loss": 2.5607, "step": 136840 }, { "epoch": 0.27263563049853373, "grad_norm": 0.16273072361946106, "learning_rate": 0.002, "loss": 2.5534, "step": 136850 }, { "epoch": 0.2726555527221726, "grad_norm": 0.15190261602401733, "learning_rate": 0.002, "loss": 2.5719, "step": 136860 }, { "epoch": 0.27267547494581157, "grad_norm": 0.1983330398797989, "learning_rate": 0.002, "loss": 2.5497, "step": 136870 }, { "epoch": 0.27269539716945046, "grad_norm": 0.1795731633901596, "learning_rate": 0.002, "loss": 2.5609, "step": 136880 }, { "epoch": 0.2727153193930894, "grad_norm": 0.1862451583147049, "learning_rate": 0.002, "loss": 2.5661, "step": 136890 }, { "epoch": 0.2727352416167283, "grad_norm": 0.17694063484668732, "learning_rate": 0.002, "loss": 2.5584, "step": 136900 }, { "epoch": 0.2727551638403672, "grad_norm": 0.15653961896896362, "learning_rate": 0.002, "loss": 2.5738, "step": 136910 }, { "epoch": 0.27277508606400613, "grad_norm": 0.18974363803863525, "learning_rate": 0.002, "loss": 2.5608, "step": 136920 }, { "epoch": 0.272795008287645, "grad_norm": 0.15296077728271484, "learning_rate": 0.002, "loss": 2.5547, "step": 136930 }, { "epoch": 0.27281493051128397, "grad_norm": 0.15884830057621002, "learning_rate": 0.002, "loss": 2.5636, "step": 136940 }, { "epoch": 0.27283485273492286, "grad_norm": 0.1713431477546692, "learning_rate": 0.002, "loss": 2.5634, "step": 136950 }, { "epoch": 0.27285477495856175, "grad_norm": 0.17597787082195282, "learning_rate": 0.002, "loss": 2.5553, "step": 136960 }, { "epoch": 0.2728746971822007, "grad_norm": 0.18574677407741547, "learning_rate": 0.002, "loss": 2.5625, "step": 136970 }, { "epoch": 0.2728946194058396, "grad_norm": 0.1618015021085739, "learning_rate": 0.002, "loss": 2.5679, "step": 136980 }, { "epoch": 0.27291454162947854, "grad_norm": 0.18427489697933197, "learning_rate": 0.002, "loss": 2.5567, "step": 136990 }, { "epoch": 0.2729344638531174, "grad_norm": 0.18697518110275269, "learning_rate": 0.002, "loss": 2.5677, "step": 137000 }, { "epoch": 0.2729543860767563, "grad_norm": 0.15663893520832062, "learning_rate": 0.002, "loss": 2.5623, "step": 137010 }, { "epoch": 0.27297430830039526, "grad_norm": 0.18522796034812927, "learning_rate": 0.002, "loss": 2.5522, "step": 137020 }, { "epoch": 0.27299423052403415, "grad_norm": 0.17232584953308105, "learning_rate": 0.002, "loss": 2.5505, "step": 137030 }, { "epoch": 0.2730141527476731, "grad_norm": 0.17026445269584656, "learning_rate": 0.002, "loss": 2.5598, "step": 137040 }, { "epoch": 0.273034074971312, "grad_norm": 0.18808956444263458, "learning_rate": 0.002, "loss": 2.5727, "step": 137050 }, { "epoch": 0.27305399719495094, "grad_norm": 0.1737118363380432, "learning_rate": 0.002, "loss": 2.5757, "step": 137060 }, { "epoch": 0.27307391941858983, "grad_norm": 0.13685335218906403, "learning_rate": 0.002, "loss": 2.5626, "step": 137070 }, { "epoch": 0.2730938416422287, "grad_norm": 0.1702864021062851, "learning_rate": 0.002, "loss": 2.5648, "step": 137080 }, { "epoch": 0.27311376386586766, "grad_norm": 0.16520462930202484, "learning_rate": 0.002, "loss": 2.57, "step": 137090 }, { "epoch": 0.27313368608950656, "grad_norm": 0.2098814994096756, "learning_rate": 0.002, "loss": 2.5626, "step": 137100 }, { "epoch": 0.2731536083131455, "grad_norm": 0.18381938338279724, "learning_rate": 0.002, "loss": 2.5364, "step": 137110 }, { "epoch": 0.2731735305367844, "grad_norm": 0.1515503227710724, "learning_rate": 0.002, "loss": 2.5754, "step": 137120 }, { "epoch": 0.2731934527604233, "grad_norm": 0.18316325545310974, "learning_rate": 0.002, "loss": 2.56, "step": 137130 }, { "epoch": 0.27321337498406223, "grad_norm": 0.1988406628370285, "learning_rate": 0.002, "loss": 2.5625, "step": 137140 }, { "epoch": 0.2732332972077011, "grad_norm": 0.19962938129901886, "learning_rate": 0.002, "loss": 2.557, "step": 137150 }, { "epoch": 0.27325321943134007, "grad_norm": 0.1515369713306427, "learning_rate": 0.002, "loss": 2.5818, "step": 137160 }, { "epoch": 0.27327314165497896, "grad_norm": 0.1939469426870346, "learning_rate": 0.002, "loss": 2.5595, "step": 137170 }, { "epoch": 0.2732930638786179, "grad_norm": 0.16696177423000336, "learning_rate": 0.002, "loss": 2.5573, "step": 137180 }, { "epoch": 0.2733129861022568, "grad_norm": 0.15028133988380432, "learning_rate": 0.002, "loss": 2.5497, "step": 137190 }, { "epoch": 0.2733329083258957, "grad_norm": 0.1489776223897934, "learning_rate": 0.002, "loss": 2.5562, "step": 137200 }, { "epoch": 0.27335283054953463, "grad_norm": 0.1595449596643448, "learning_rate": 0.002, "loss": 2.5715, "step": 137210 }, { "epoch": 0.2733727527731735, "grad_norm": 0.217304527759552, "learning_rate": 0.002, "loss": 2.5723, "step": 137220 }, { "epoch": 0.27339267499681247, "grad_norm": 0.14696502685546875, "learning_rate": 0.002, "loss": 2.5659, "step": 137230 }, { "epoch": 0.27341259722045136, "grad_norm": 0.19398532807826996, "learning_rate": 0.002, "loss": 2.5583, "step": 137240 }, { "epoch": 0.27343251944409025, "grad_norm": 0.1707824021577835, "learning_rate": 0.002, "loss": 2.5705, "step": 137250 }, { "epoch": 0.2734524416677292, "grad_norm": 0.16754715144634247, "learning_rate": 0.002, "loss": 2.5698, "step": 137260 }, { "epoch": 0.2734723638913681, "grad_norm": 0.20184262096881866, "learning_rate": 0.002, "loss": 2.5516, "step": 137270 }, { "epoch": 0.27349228611500703, "grad_norm": 0.15019403398036957, "learning_rate": 0.002, "loss": 2.5613, "step": 137280 }, { "epoch": 0.2735122083386459, "grad_norm": 0.17978866398334503, "learning_rate": 0.002, "loss": 2.5648, "step": 137290 }, { "epoch": 0.2735321305622848, "grad_norm": 0.16021235287189484, "learning_rate": 0.002, "loss": 2.5516, "step": 137300 }, { "epoch": 0.27355205278592376, "grad_norm": 0.16379623115062714, "learning_rate": 0.002, "loss": 2.5611, "step": 137310 }, { "epoch": 0.27357197500956265, "grad_norm": 0.1515803188085556, "learning_rate": 0.002, "loss": 2.5626, "step": 137320 }, { "epoch": 0.2735918972332016, "grad_norm": 0.21595963835716248, "learning_rate": 0.002, "loss": 2.5678, "step": 137330 }, { "epoch": 0.2736118194568405, "grad_norm": 0.16363821923732758, "learning_rate": 0.002, "loss": 2.5538, "step": 137340 }, { "epoch": 0.27363174168047943, "grad_norm": 0.16420233249664307, "learning_rate": 0.002, "loss": 2.5691, "step": 137350 }, { "epoch": 0.2736516639041183, "grad_norm": 0.18802765011787415, "learning_rate": 0.002, "loss": 2.5641, "step": 137360 }, { "epoch": 0.2736715861277572, "grad_norm": 0.16271129250526428, "learning_rate": 0.002, "loss": 2.5784, "step": 137370 }, { "epoch": 0.27369150835139616, "grad_norm": 0.1606723815202713, "learning_rate": 0.002, "loss": 2.5629, "step": 137380 }, { "epoch": 0.27371143057503505, "grad_norm": 0.17283689975738525, "learning_rate": 0.002, "loss": 2.5745, "step": 137390 }, { "epoch": 0.273731352798674, "grad_norm": 0.1527918428182602, "learning_rate": 0.002, "loss": 2.5596, "step": 137400 }, { "epoch": 0.2737512750223129, "grad_norm": 0.22626522183418274, "learning_rate": 0.002, "loss": 2.5616, "step": 137410 }, { "epoch": 0.2737711972459518, "grad_norm": 0.17017066478729248, "learning_rate": 0.002, "loss": 2.5982, "step": 137420 }, { "epoch": 0.2737911194695907, "grad_norm": 0.15064737200737, "learning_rate": 0.002, "loss": 2.5753, "step": 137430 }, { "epoch": 0.2738110416932296, "grad_norm": 0.14621008932590485, "learning_rate": 0.002, "loss": 2.5645, "step": 137440 }, { "epoch": 0.27383096391686856, "grad_norm": 0.1597508043050766, "learning_rate": 0.002, "loss": 2.5529, "step": 137450 }, { "epoch": 0.27385088614050745, "grad_norm": 0.1799263060092926, "learning_rate": 0.002, "loss": 2.5698, "step": 137460 }, { "epoch": 0.27387080836414635, "grad_norm": 0.18217012286186218, "learning_rate": 0.002, "loss": 2.5467, "step": 137470 }, { "epoch": 0.2738907305877853, "grad_norm": 0.16145336627960205, "learning_rate": 0.002, "loss": 2.565, "step": 137480 }, { "epoch": 0.2739106528114242, "grad_norm": 0.1743769347667694, "learning_rate": 0.002, "loss": 2.5588, "step": 137490 }, { "epoch": 0.27393057503506313, "grad_norm": 0.13911262154579163, "learning_rate": 0.002, "loss": 2.5456, "step": 137500 }, { "epoch": 0.273950497258702, "grad_norm": 0.15848402678966522, "learning_rate": 0.002, "loss": 2.5532, "step": 137510 }, { "epoch": 0.27397041948234097, "grad_norm": 0.1440209597349167, "learning_rate": 0.002, "loss": 2.5727, "step": 137520 }, { "epoch": 0.27399034170597986, "grad_norm": 0.1766732931137085, "learning_rate": 0.002, "loss": 2.5826, "step": 137530 }, { "epoch": 0.27401026392961875, "grad_norm": 0.16523127257823944, "learning_rate": 0.002, "loss": 2.5585, "step": 137540 }, { "epoch": 0.2740301861532577, "grad_norm": 0.15136200189590454, "learning_rate": 0.002, "loss": 2.5579, "step": 137550 }, { "epoch": 0.2740501083768966, "grad_norm": 0.1533278077840805, "learning_rate": 0.002, "loss": 2.5639, "step": 137560 }, { "epoch": 0.27407003060053553, "grad_norm": 0.15829606354236603, "learning_rate": 0.002, "loss": 2.5732, "step": 137570 }, { "epoch": 0.2740899528241744, "grad_norm": 0.19635087251663208, "learning_rate": 0.002, "loss": 2.5628, "step": 137580 }, { "epoch": 0.2741098750478133, "grad_norm": 0.14345777034759521, "learning_rate": 0.002, "loss": 2.5648, "step": 137590 }, { "epoch": 0.27412979727145226, "grad_norm": 0.15317831933498383, "learning_rate": 0.002, "loss": 2.5756, "step": 137600 }, { "epoch": 0.27414971949509115, "grad_norm": 0.1801021695137024, "learning_rate": 0.002, "loss": 2.5759, "step": 137610 }, { "epoch": 0.2741696417187301, "grad_norm": 0.1607522964477539, "learning_rate": 0.002, "loss": 2.5762, "step": 137620 }, { "epoch": 0.274189563942369, "grad_norm": 0.16752153635025024, "learning_rate": 0.002, "loss": 2.5772, "step": 137630 }, { "epoch": 0.27420948616600793, "grad_norm": 0.19066065549850464, "learning_rate": 0.002, "loss": 2.5635, "step": 137640 }, { "epoch": 0.2742294083896468, "grad_norm": 0.1547929346561432, "learning_rate": 0.002, "loss": 2.5709, "step": 137650 }, { "epoch": 0.2742493306132857, "grad_norm": 0.16059328615665436, "learning_rate": 0.002, "loss": 2.5812, "step": 137660 }, { "epoch": 0.27426925283692466, "grad_norm": 0.1544366180896759, "learning_rate": 0.002, "loss": 2.5674, "step": 137670 }, { "epoch": 0.27428917506056355, "grad_norm": 0.18682946264743805, "learning_rate": 0.002, "loss": 2.5645, "step": 137680 }, { "epoch": 0.2743090972842025, "grad_norm": 0.17435434460639954, "learning_rate": 0.002, "loss": 2.5529, "step": 137690 }, { "epoch": 0.2743290195078414, "grad_norm": 0.15194307267665863, "learning_rate": 0.002, "loss": 2.5667, "step": 137700 }, { "epoch": 0.2743489417314803, "grad_norm": 0.1816454976797104, "learning_rate": 0.002, "loss": 2.5609, "step": 137710 }, { "epoch": 0.2743688639551192, "grad_norm": 0.1683974713087082, "learning_rate": 0.002, "loss": 2.563, "step": 137720 }, { "epoch": 0.2743887861787581, "grad_norm": 0.16526401042938232, "learning_rate": 0.002, "loss": 2.5728, "step": 137730 }, { "epoch": 0.27440870840239706, "grad_norm": 0.154788076877594, "learning_rate": 0.002, "loss": 2.5657, "step": 137740 }, { "epoch": 0.27442863062603595, "grad_norm": 0.16248807311058044, "learning_rate": 0.002, "loss": 2.5755, "step": 137750 }, { "epoch": 0.27444855284967484, "grad_norm": 0.14601290225982666, "learning_rate": 0.002, "loss": 2.5572, "step": 137760 }, { "epoch": 0.2744684750733138, "grad_norm": 0.1651184856891632, "learning_rate": 0.002, "loss": 2.5656, "step": 137770 }, { "epoch": 0.2744883972969527, "grad_norm": 0.15175838768482208, "learning_rate": 0.002, "loss": 2.5514, "step": 137780 }, { "epoch": 0.2745083195205916, "grad_norm": 0.18845175206661224, "learning_rate": 0.002, "loss": 2.5577, "step": 137790 }, { "epoch": 0.2745282417442305, "grad_norm": 0.18992897868156433, "learning_rate": 0.002, "loss": 2.5635, "step": 137800 }, { "epoch": 0.27454816396786946, "grad_norm": 0.16720937192440033, "learning_rate": 0.002, "loss": 2.5637, "step": 137810 }, { "epoch": 0.27456808619150835, "grad_norm": 0.1773984730243683, "learning_rate": 0.002, "loss": 2.5648, "step": 137820 }, { "epoch": 0.27458800841514724, "grad_norm": 0.18039555847644806, "learning_rate": 0.002, "loss": 2.5459, "step": 137830 }, { "epoch": 0.2746079306387862, "grad_norm": 0.14599451422691345, "learning_rate": 0.002, "loss": 2.5709, "step": 137840 }, { "epoch": 0.2746278528624251, "grad_norm": 0.1801750808954239, "learning_rate": 0.002, "loss": 2.5607, "step": 137850 }, { "epoch": 0.274647775086064, "grad_norm": 0.17029623687267303, "learning_rate": 0.002, "loss": 2.5623, "step": 137860 }, { "epoch": 0.2746676973097029, "grad_norm": 0.18294298648834229, "learning_rate": 0.002, "loss": 2.5604, "step": 137870 }, { "epoch": 0.2746876195333418, "grad_norm": 0.22213084995746613, "learning_rate": 0.002, "loss": 2.5554, "step": 137880 }, { "epoch": 0.27470754175698076, "grad_norm": 0.1841093748807907, "learning_rate": 0.002, "loss": 2.5521, "step": 137890 }, { "epoch": 0.27472746398061965, "grad_norm": 0.19625045359134674, "learning_rate": 0.002, "loss": 2.5473, "step": 137900 }, { "epoch": 0.2747473862042586, "grad_norm": 0.18103773891925812, "learning_rate": 0.002, "loss": 2.5509, "step": 137910 }, { "epoch": 0.2747673084278975, "grad_norm": 0.1666310578584671, "learning_rate": 0.002, "loss": 2.5824, "step": 137920 }, { "epoch": 0.27478723065153643, "grad_norm": 0.1450013369321823, "learning_rate": 0.002, "loss": 2.5555, "step": 137930 }, { "epoch": 0.2748071528751753, "grad_norm": 0.19719652831554413, "learning_rate": 0.002, "loss": 2.5621, "step": 137940 }, { "epoch": 0.2748270750988142, "grad_norm": 0.16811424493789673, "learning_rate": 0.002, "loss": 2.5689, "step": 137950 }, { "epoch": 0.27484699732245316, "grad_norm": 0.15417777001857758, "learning_rate": 0.002, "loss": 2.5585, "step": 137960 }, { "epoch": 0.27486691954609205, "grad_norm": 0.1811131238937378, "learning_rate": 0.002, "loss": 2.5692, "step": 137970 }, { "epoch": 0.274886841769731, "grad_norm": 0.15607520937919617, "learning_rate": 0.002, "loss": 2.5555, "step": 137980 }, { "epoch": 0.2749067639933699, "grad_norm": 0.1641385555267334, "learning_rate": 0.002, "loss": 2.5666, "step": 137990 }, { "epoch": 0.2749266862170088, "grad_norm": 0.14724473655223846, "learning_rate": 0.002, "loss": 2.5569, "step": 138000 }, { "epoch": 0.2749466084406477, "grad_norm": 0.16978508234024048, "learning_rate": 0.002, "loss": 2.5639, "step": 138010 }, { "epoch": 0.2749665306642866, "grad_norm": 0.151886448264122, "learning_rate": 0.002, "loss": 2.5756, "step": 138020 }, { "epoch": 0.27498645288792556, "grad_norm": 0.15787217020988464, "learning_rate": 0.002, "loss": 2.5776, "step": 138030 }, { "epoch": 0.27500637511156445, "grad_norm": 0.20512326061725616, "learning_rate": 0.002, "loss": 2.5619, "step": 138040 }, { "epoch": 0.27502629733520334, "grad_norm": 0.16684041917324066, "learning_rate": 0.002, "loss": 2.5739, "step": 138050 }, { "epoch": 0.2750462195588423, "grad_norm": 0.17661604285240173, "learning_rate": 0.002, "loss": 2.5704, "step": 138060 }, { "epoch": 0.2750661417824812, "grad_norm": 0.15663249790668488, "learning_rate": 0.002, "loss": 2.5695, "step": 138070 }, { "epoch": 0.2750860640061201, "grad_norm": 0.18591487407684326, "learning_rate": 0.002, "loss": 2.5632, "step": 138080 }, { "epoch": 0.275105986229759, "grad_norm": 0.14575056731700897, "learning_rate": 0.002, "loss": 2.5656, "step": 138090 }, { "epoch": 0.27512590845339796, "grad_norm": 0.15035368502140045, "learning_rate": 0.002, "loss": 2.5513, "step": 138100 }, { "epoch": 0.27514583067703685, "grad_norm": 0.15769557654857635, "learning_rate": 0.002, "loss": 2.5818, "step": 138110 }, { "epoch": 0.27516575290067574, "grad_norm": 0.16953791677951813, "learning_rate": 0.002, "loss": 2.564, "step": 138120 }, { "epoch": 0.2751856751243147, "grad_norm": 0.16808980703353882, "learning_rate": 0.002, "loss": 2.5609, "step": 138130 }, { "epoch": 0.2752055973479536, "grad_norm": 0.17203615605831146, "learning_rate": 0.002, "loss": 2.5417, "step": 138140 }, { "epoch": 0.2752255195715925, "grad_norm": 0.1823679506778717, "learning_rate": 0.002, "loss": 2.5581, "step": 138150 }, { "epoch": 0.2752454417952314, "grad_norm": 0.17067644000053406, "learning_rate": 0.002, "loss": 2.5649, "step": 138160 }, { "epoch": 0.2752653640188703, "grad_norm": 0.18518105149269104, "learning_rate": 0.002, "loss": 2.5544, "step": 138170 }, { "epoch": 0.27528528624250925, "grad_norm": 0.21952398121356964, "learning_rate": 0.002, "loss": 2.5426, "step": 138180 }, { "epoch": 0.27530520846614814, "grad_norm": 0.1597633808851242, "learning_rate": 0.002, "loss": 2.5744, "step": 138190 }, { "epoch": 0.2753251306897871, "grad_norm": 0.14632971584796906, "learning_rate": 0.002, "loss": 2.5582, "step": 138200 }, { "epoch": 0.275345052913426, "grad_norm": 0.15199849009513855, "learning_rate": 0.002, "loss": 2.5565, "step": 138210 }, { "epoch": 0.27536497513706487, "grad_norm": 0.16185995936393738, "learning_rate": 0.002, "loss": 2.5667, "step": 138220 }, { "epoch": 0.2753848973607038, "grad_norm": 0.16226282715797424, "learning_rate": 0.002, "loss": 2.5543, "step": 138230 }, { "epoch": 0.2754048195843427, "grad_norm": 0.16124561429023743, "learning_rate": 0.002, "loss": 2.5704, "step": 138240 }, { "epoch": 0.27542474180798165, "grad_norm": 0.16607196629047394, "learning_rate": 0.002, "loss": 2.5673, "step": 138250 }, { "epoch": 0.27544466403162055, "grad_norm": 0.16313259303569794, "learning_rate": 0.002, "loss": 2.5676, "step": 138260 }, { "epoch": 0.2754645862552595, "grad_norm": 0.16970205307006836, "learning_rate": 0.002, "loss": 2.5555, "step": 138270 }, { "epoch": 0.2754845084788984, "grad_norm": 0.21628929674625397, "learning_rate": 0.002, "loss": 2.567, "step": 138280 }, { "epoch": 0.2755044307025373, "grad_norm": 0.17326216399669647, "learning_rate": 0.002, "loss": 2.5703, "step": 138290 }, { "epoch": 0.2755243529261762, "grad_norm": 0.15281154215335846, "learning_rate": 0.002, "loss": 2.5658, "step": 138300 }, { "epoch": 0.2755442751498151, "grad_norm": 0.17686806619167328, "learning_rate": 0.002, "loss": 2.5793, "step": 138310 }, { "epoch": 0.27556419737345406, "grad_norm": 0.1905340701341629, "learning_rate": 0.002, "loss": 2.5655, "step": 138320 }, { "epoch": 0.27558411959709295, "grad_norm": 0.15996688604354858, "learning_rate": 0.002, "loss": 2.5526, "step": 138330 }, { "epoch": 0.27560404182073184, "grad_norm": 0.14656153321266174, "learning_rate": 0.002, "loss": 2.5696, "step": 138340 }, { "epoch": 0.2756239640443708, "grad_norm": 0.16767489910125732, "learning_rate": 0.002, "loss": 2.563, "step": 138350 }, { "epoch": 0.2756438862680097, "grad_norm": 0.18050937354564667, "learning_rate": 0.002, "loss": 2.5716, "step": 138360 }, { "epoch": 0.2756638084916486, "grad_norm": 0.1692548245191574, "learning_rate": 0.002, "loss": 2.5527, "step": 138370 }, { "epoch": 0.2756837307152875, "grad_norm": 0.17119930684566498, "learning_rate": 0.002, "loss": 2.5683, "step": 138380 }, { "epoch": 0.27570365293892646, "grad_norm": 0.18255624175071716, "learning_rate": 0.002, "loss": 2.5667, "step": 138390 }, { "epoch": 0.27572357516256535, "grad_norm": 0.15338271856307983, "learning_rate": 0.002, "loss": 2.5652, "step": 138400 }, { "epoch": 0.27574349738620424, "grad_norm": 0.17032405734062195, "learning_rate": 0.002, "loss": 2.5743, "step": 138410 }, { "epoch": 0.2757634196098432, "grad_norm": 0.1644747406244278, "learning_rate": 0.002, "loss": 2.5528, "step": 138420 }, { "epoch": 0.2757833418334821, "grad_norm": 0.16731847822666168, "learning_rate": 0.002, "loss": 2.569, "step": 138430 }, { "epoch": 0.275803264057121, "grad_norm": 0.1471085101366043, "learning_rate": 0.002, "loss": 2.5574, "step": 138440 }, { "epoch": 0.2758231862807599, "grad_norm": 0.15984773635864258, "learning_rate": 0.002, "loss": 2.5608, "step": 138450 }, { "epoch": 0.2758431085043988, "grad_norm": 0.1842116415500641, "learning_rate": 0.002, "loss": 2.5724, "step": 138460 }, { "epoch": 0.27586303072803775, "grad_norm": 0.14812913537025452, "learning_rate": 0.002, "loss": 2.5644, "step": 138470 }, { "epoch": 0.27588295295167664, "grad_norm": 0.1473444253206253, "learning_rate": 0.002, "loss": 2.5601, "step": 138480 }, { "epoch": 0.2759028751753156, "grad_norm": 0.17585232853889465, "learning_rate": 0.002, "loss": 2.5526, "step": 138490 }, { "epoch": 0.2759227973989545, "grad_norm": 0.19884882867336273, "learning_rate": 0.002, "loss": 2.5702, "step": 138500 }, { "epoch": 0.27594271962259337, "grad_norm": 0.1579364538192749, "learning_rate": 0.002, "loss": 2.5705, "step": 138510 }, { "epoch": 0.2759626418462323, "grad_norm": 0.1461963802576065, "learning_rate": 0.002, "loss": 2.5804, "step": 138520 }, { "epoch": 0.2759825640698712, "grad_norm": 0.1784759759902954, "learning_rate": 0.002, "loss": 2.565, "step": 138530 }, { "epoch": 0.27600248629351015, "grad_norm": 0.16351602971553802, "learning_rate": 0.002, "loss": 2.5764, "step": 138540 }, { "epoch": 0.27602240851714904, "grad_norm": 0.1641855388879776, "learning_rate": 0.002, "loss": 2.5578, "step": 138550 }, { "epoch": 0.276042330740788, "grad_norm": 0.21714940667152405, "learning_rate": 0.002, "loss": 2.5563, "step": 138560 }, { "epoch": 0.2760622529644269, "grad_norm": 0.17195351421833038, "learning_rate": 0.002, "loss": 2.5746, "step": 138570 }, { "epoch": 0.27608217518806577, "grad_norm": 0.14247344434261322, "learning_rate": 0.002, "loss": 2.5725, "step": 138580 }, { "epoch": 0.2761020974117047, "grad_norm": 0.1788186877965927, "learning_rate": 0.002, "loss": 2.5505, "step": 138590 }, { "epoch": 0.2761220196353436, "grad_norm": 0.18238382041454315, "learning_rate": 0.002, "loss": 2.554, "step": 138600 }, { "epoch": 0.27614194185898255, "grad_norm": 0.1645541489124298, "learning_rate": 0.002, "loss": 2.5562, "step": 138610 }, { "epoch": 0.27616186408262144, "grad_norm": 0.21565566956996918, "learning_rate": 0.002, "loss": 2.5602, "step": 138620 }, { "epoch": 0.27618178630626034, "grad_norm": 0.18003220856189728, "learning_rate": 0.002, "loss": 2.5632, "step": 138630 }, { "epoch": 0.2762017085298993, "grad_norm": 0.17207390069961548, "learning_rate": 0.002, "loss": 2.564, "step": 138640 }, { "epoch": 0.27622163075353817, "grad_norm": 0.22001081705093384, "learning_rate": 0.002, "loss": 2.572, "step": 138650 }, { "epoch": 0.2762415529771771, "grad_norm": 0.14214515686035156, "learning_rate": 0.002, "loss": 2.5576, "step": 138660 }, { "epoch": 0.276261475200816, "grad_norm": 0.18758974969387054, "learning_rate": 0.002, "loss": 2.5483, "step": 138670 }, { "epoch": 0.27628139742445496, "grad_norm": 0.1748327910900116, "learning_rate": 0.002, "loss": 2.5604, "step": 138680 }, { "epoch": 0.27630131964809385, "grad_norm": 0.15984053909778595, "learning_rate": 0.002, "loss": 2.5705, "step": 138690 }, { "epoch": 0.27632124187173274, "grad_norm": 0.20920410752296448, "learning_rate": 0.002, "loss": 2.5614, "step": 138700 }, { "epoch": 0.2763411640953717, "grad_norm": 0.15799342095851898, "learning_rate": 0.002, "loss": 2.5714, "step": 138710 }, { "epoch": 0.2763610863190106, "grad_norm": 0.15692569315433502, "learning_rate": 0.002, "loss": 2.5458, "step": 138720 }, { "epoch": 0.2763810085426495, "grad_norm": 0.17071235179901123, "learning_rate": 0.002, "loss": 2.5542, "step": 138730 }, { "epoch": 0.2764009307662884, "grad_norm": 0.1662781834602356, "learning_rate": 0.002, "loss": 2.5652, "step": 138740 }, { "epoch": 0.2764208529899273, "grad_norm": 0.14315065741539001, "learning_rate": 0.002, "loss": 2.5808, "step": 138750 }, { "epoch": 0.27644077521356625, "grad_norm": 0.2503339946269989, "learning_rate": 0.002, "loss": 2.5546, "step": 138760 }, { "epoch": 0.27646069743720514, "grad_norm": 0.20118917524814606, "learning_rate": 0.002, "loss": 2.551, "step": 138770 }, { "epoch": 0.2764806196608441, "grad_norm": 0.155329629778862, "learning_rate": 0.002, "loss": 2.557, "step": 138780 }, { "epoch": 0.276500541884483, "grad_norm": 0.16723282635211945, "learning_rate": 0.002, "loss": 2.5607, "step": 138790 }, { "epoch": 0.27652046410812187, "grad_norm": 0.17332607507705688, "learning_rate": 0.002, "loss": 2.5543, "step": 138800 }, { "epoch": 0.2765403863317608, "grad_norm": 0.1746232509613037, "learning_rate": 0.002, "loss": 2.5706, "step": 138810 }, { "epoch": 0.2765603085553997, "grad_norm": 0.1947309672832489, "learning_rate": 0.002, "loss": 2.5678, "step": 138820 }, { "epoch": 0.27658023077903865, "grad_norm": 0.17339147627353668, "learning_rate": 0.002, "loss": 2.5653, "step": 138830 }, { "epoch": 0.27660015300267754, "grad_norm": 0.1656847596168518, "learning_rate": 0.002, "loss": 2.5727, "step": 138840 }, { "epoch": 0.2766200752263165, "grad_norm": 0.14740465581417084, "learning_rate": 0.002, "loss": 2.5784, "step": 138850 }, { "epoch": 0.2766399974499554, "grad_norm": 0.15840619802474976, "learning_rate": 0.002, "loss": 2.5754, "step": 138860 }, { "epoch": 0.27665991967359427, "grad_norm": 0.1443382054567337, "learning_rate": 0.002, "loss": 2.5683, "step": 138870 }, { "epoch": 0.2766798418972332, "grad_norm": 0.18546068668365479, "learning_rate": 0.002, "loss": 2.5533, "step": 138880 }, { "epoch": 0.2766997641208721, "grad_norm": 0.16517572104930878, "learning_rate": 0.002, "loss": 2.5556, "step": 138890 }, { "epoch": 0.27671968634451105, "grad_norm": 0.1623094528913498, "learning_rate": 0.002, "loss": 2.5711, "step": 138900 }, { "epoch": 0.27673960856814994, "grad_norm": 0.14798493683338165, "learning_rate": 0.002, "loss": 2.5801, "step": 138910 }, { "epoch": 0.27675953079178883, "grad_norm": 0.1631108969449997, "learning_rate": 0.002, "loss": 2.5648, "step": 138920 }, { "epoch": 0.2767794530154278, "grad_norm": 0.15528815984725952, "learning_rate": 0.002, "loss": 2.5612, "step": 138930 }, { "epoch": 0.27679937523906667, "grad_norm": 0.18656504154205322, "learning_rate": 0.002, "loss": 2.5665, "step": 138940 }, { "epoch": 0.2768192974627056, "grad_norm": 0.15734118223190308, "learning_rate": 0.002, "loss": 2.5801, "step": 138950 }, { "epoch": 0.2768392196863445, "grad_norm": 0.1921108514070511, "learning_rate": 0.002, "loss": 2.5616, "step": 138960 }, { "epoch": 0.2768591419099834, "grad_norm": 0.18825848400592804, "learning_rate": 0.002, "loss": 2.5754, "step": 138970 }, { "epoch": 0.27687906413362234, "grad_norm": 0.1485370248556137, "learning_rate": 0.002, "loss": 2.5546, "step": 138980 }, { "epoch": 0.27689898635726123, "grad_norm": 0.16860006749629974, "learning_rate": 0.002, "loss": 2.5606, "step": 138990 }, { "epoch": 0.2769189085809002, "grad_norm": 0.15679316222667694, "learning_rate": 0.002, "loss": 2.5689, "step": 139000 }, { "epoch": 0.27693883080453907, "grad_norm": 0.18956848978996277, "learning_rate": 0.002, "loss": 2.559, "step": 139010 }, { "epoch": 0.276958753028178, "grad_norm": 0.14012199640274048, "learning_rate": 0.002, "loss": 2.5716, "step": 139020 }, { "epoch": 0.2769786752518169, "grad_norm": 0.16128912568092346, "learning_rate": 0.002, "loss": 2.557, "step": 139030 }, { "epoch": 0.2769985974754558, "grad_norm": 0.15578830242156982, "learning_rate": 0.002, "loss": 2.5813, "step": 139040 }, { "epoch": 0.27701851969909475, "grad_norm": 0.19199532270431519, "learning_rate": 0.002, "loss": 2.5564, "step": 139050 }, { "epoch": 0.27703844192273364, "grad_norm": 0.17817828059196472, "learning_rate": 0.002, "loss": 2.5657, "step": 139060 }, { "epoch": 0.2770583641463726, "grad_norm": 0.1682598888874054, "learning_rate": 0.002, "loss": 2.5635, "step": 139070 }, { "epoch": 0.2770782863700115, "grad_norm": 0.18314486742019653, "learning_rate": 0.002, "loss": 2.5648, "step": 139080 }, { "epoch": 0.27709820859365036, "grad_norm": 0.16489355266094208, "learning_rate": 0.002, "loss": 2.5703, "step": 139090 }, { "epoch": 0.2771181308172893, "grad_norm": 0.19667749106884003, "learning_rate": 0.002, "loss": 2.5619, "step": 139100 }, { "epoch": 0.2771380530409282, "grad_norm": 0.18611690402030945, "learning_rate": 0.002, "loss": 2.5706, "step": 139110 }, { "epoch": 0.27715797526456715, "grad_norm": 0.15214042365550995, "learning_rate": 0.002, "loss": 2.5615, "step": 139120 }, { "epoch": 0.27717789748820604, "grad_norm": 0.1563318371772766, "learning_rate": 0.002, "loss": 2.565, "step": 139130 }, { "epoch": 0.277197819711845, "grad_norm": 0.1711803823709488, "learning_rate": 0.002, "loss": 2.5729, "step": 139140 }, { "epoch": 0.2772177419354839, "grad_norm": 0.20474965870380402, "learning_rate": 0.002, "loss": 2.5626, "step": 139150 }, { "epoch": 0.27723766415912277, "grad_norm": 0.16115927696228027, "learning_rate": 0.002, "loss": 2.5586, "step": 139160 }, { "epoch": 0.2772575863827617, "grad_norm": 0.1667485386133194, "learning_rate": 0.002, "loss": 2.5765, "step": 139170 }, { "epoch": 0.2772775086064006, "grad_norm": 0.17767789959907532, "learning_rate": 0.002, "loss": 2.5341, "step": 139180 }, { "epoch": 0.27729743083003955, "grad_norm": 0.20326969027519226, "learning_rate": 0.002, "loss": 2.5795, "step": 139190 }, { "epoch": 0.27731735305367844, "grad_norm": 0.17946697771549225, "learning_rate": 0.002, "loss": 2.5806, "step": 139200 }, { "epoch": 0.27733727527731733, "grad_norm": 0.14657433331012726, "learning_rate": 0.002, "loss": 2.5658, "step": 139210 }, { "epoch": 0.2773571975009563, "grad_norm": 0.29001232981681824, "learning_rate": 0.002, "loss": 2.5575, "step": 139220 }, { "epoch": 0.27737711972459517, "grad_norm": 0.16301855444908142, "learning_rate": 0.002, "loss": 2.552, "step": 139230 }, { "epoch": 0.2773970419482341, "grad_norm": 0.189032182097435, "learning_rate": 0.002, "loss": 2.5595, "step": 139240 }, { "epoch": 0.277416964171873, "grad_norm": 0.1595931351184845, "learning_rate": 0.002, "loss": 2.5556, "step": 139250 }, { "epoch": 0.2774368863955119, "grad_norm": 0.145517036318779, "learning_rate": 0.002, "loss": 2.5551, "step": 139260 }, { "epoch": 0.27745680861915084, "grad_norm": 0.1716337502002716, "learning_rate": 0.002, "loss": 2.57, "step": 139270 }, { "epoch": 0.27747673084278973, "grad_norm": 0.1866489201784134, "learning_rate": 0.002, "loss": 2.5505, "step": 139280 }, { "epoch": 0.2774966530664287, "grad_norm": 0.17662350833415985, "learning_rate": 0.002, "loss": 2.5843, "step": 139290 }, { "epoch": 0.27751657529006757, "grad_norm": 0.1653432846069336, "learning_rate": 0.002, "loss": 2.5585, "step": 139300 }, { "epoch": 0.2775364975137065, "grad_norm": 0.166707843542099, "learning_rate": 0.002, "loss": 2.5601, "step": 139310 }, { "epoch": 0.2775564197373454, "grad_norm": 0.1602475941181183, "learning_rate": 0.002, "loss": 2.5496, "step": 139320 }, { "epoch": 0.2775763419609843, "grad_norm": 0.1903926283121109, "learning_rate": 0.002, "loss": 2.562, "step": 139330 }, { "epoch": 0.27759626418462324, "grad_norm": 0.15758465230464935, "learning_rate": 0.002, "loss": 2.5559, "step": 139340 }, { "epoch": 0.27761618640826213, "grad_norm": 0.19742217659950256, "learning_rate": 0.002, "loss": 2.5678, "step": 139350 }, { "epoch": 0.2776361086319011, "grad_norm": 0.1757020354270935, "learning_rate": 0.002, "loss": 2.5734, "step": 139360 }, { "epoch": 0.27765603085553997, "grad_norm": 0.16284464299678802, "learning_rate": 0.002, "loss": 2.5612, "step": 139370 }, { "epoch": 0.27767595307917886, "grad_norm": 0.18124163150787354, "learning_rate": 0.002, "loss": 2.5593, "step": 139380 }, { "epoch": 0.2776958753028178, "grad_norm": 0.16720229387283325, "learning_rate": 0.002, "loss": 2.5725, "step": 139390 }, { "epoch": 0.2777157975264567, "grad_norm": 0.19545209407806396, "learning_rate": 0.002, "loss": 2.5543, "step": 139400 }, { "epoch": 0.27773571975009564, "grad_norm": 0.19998106360435486, "learning_rate": 0.002, "loss": 2.5573, "step": 139410 }, { "epoch": 0.27775564197373454, "grad_norm": 0.13871648907661438, "learning_rate": 0.002, "loss": 2.5635, "step": 139420 }, { "epoch": 0.2777755641973735, "grad_norm": 0.16074495017528534, "learning_rate": 0.002, "loss": 2.566, "step": 139430 }, { "epoch": 0.27779548642101237, "grad_norm": 0.19374004006385803, "learning_rate": 0.002, "loss": 2.5556, "step": 139440 }, { "epoch": 0.27781540864465126, "grad_norm": 0.17264056205749512, "learning_rate": 0.002, "loss": 2.5705, "step": 139450 }, { "epoch": 0.2778353308682902, "grad_norm": 0.1758195012807846, "learning_rate": 0.002, "loss": 2.5566, "step": 139460 }, { "epoch": 0.2778552530919291, "grad_norm": 0.14786338806152344, "learning_rate": 0.002, "loss": 2.5481, "step": 139470 }, { "epoch": 0.27787517531556805, "grad_norm": 0.17688797414302826, "learning_rate": 0.002, "loss": 2.5677, "step": 139480 }, { "epoch": 0.27789509753920694, "grad_norm": 0.19869451224803925, "learning_rate": 0.002, "loss": 2.5572, "step": 139490 }, { "epoch": 0.2779150197628458, "grad_norm": 0.15535792708396912, "learning_rate": 0.002, "loss": 2.561, "step": 139500 }, { "epoch": 0.2779349419864848, "grad_norm": 0.1527172476053238, "learning_rate": 0.002, "loss": 2.5579, "step": 139510 }, { "epoch": 0.27795486421012366, "grad_norm": 0.15524822473526, "learning_rate": 0.002, "loss": 2.551, "step": 139520 }, { "epoch": 0.2779747864337626, "grad_norm": 0.1894112527370453, "learning_rate": 0.002, "loss": 2.551, "step": 139530 }, { "epoch": 0.2779947086574015, "grad_norm": 0.16214881837368011, "learning_rate": 0.002, "loss": 2.5497, "step": 139540 }, { "epoch": 0.2780146308810404, "grad_norm": 0.15411144495010376, "learning_rate": 0.002, "loss": 2.5478, "step": 139550 }, { "epoch": 0.27803455310467934, "grad_norm": 0.17786869406700134, "learning_rate": 0.002, "loss": 2.5551, "step": 139560 }, { "epoch": 0.27805447532831823, "grad_norm": 0.17197181284427643, "learning_rate": 0.002, "loss": 2.5581, "step": 139570 }, { "epoch": 0.2780743975519572, "grad_norm": 0.16179265081882477, "learning_rate": 0.002, "loss": 2.5612, "step": 139580 }, { "epoch": 0.27809431977559607, "grad_norm": 0.1937054842710495, "learning_rate": 0.002, "loss": 2.5675, "step": 139590 }, { "epoch": 0.278114241999235, "grad_norm": 0.34766337275505066, "learning_rate": 0.002, "loss": 2.5532, "step": 139600 }, { "epoch": 0.2781341642228739, "grad_norm": 0.13846774399280548, "learning_rate": 0.002, "loss": 2.5569, "step": 139610 }, { "epoch": 0.2781540864465128, "grad_norm": 0.17920176684856415, "learning_rate": 0.002, "loss": 2.5576, "step": 139620 }, { "epoch": 0.27817400867015174, "grad_norm": 0.16738399863243103, "learning_rate": 0.002, "loss": 2.56, "step": 139630 }, { "epoch": 0.27819393089379063, "grad_norm": 0.13853950798511505, "learning_rate": 0.002, "loss": 2.5613, "step": 139640 }, { "epoch": 0.2782138531174296, "grad_norm": 0.1868840605020523, "learning_rate": 0.002, "loss": 2.5832, "step": 139650 }, { "epoch": 0.27823377534106847, "grad_norm": 0.15177038311958313, "learning_rate": 0.002, "loss": 2.583, "step": 139660 }, { "epoch": 0.27825369756470736, "grad_norm": 0.1829700767993927, "learning_rate": 0.002, "loss": 2.562, "step": 139670 }, { "epoch": 0.2782736197883463, "grad_norm": 0.1904526650905609, "learning_rate": 0.002, "loss": 2.5734, "step": 139680 }, { "epoch": 0.2782935420119852, "grad_norm": 0.14990097284317017, "learning_rate": 0.002, "loss": 2.5713, "step": 139690 }, { "epoch": 0.27831346423562414, "grad_norm": 0.16729836165905, "learning_rate": 0.002, "loss": 2.5685, "step": 139700 }, { "epoch": 0.27833338645926303, "grad_norm": 0.18126839399337769, "learning_rate": 0.002, "loss": 2.5533, "step": 139710 }, { "epoch": 0.2783533086829019, "grad_norm": 0.16016453504562378, "learning_rate": 0.002, "loss": 2.5534, "step": 139720 }, { "epoch": 0.27837323090654087, "grad_norm": 0.16721990704536438, "learning_rate": 0.002, "loss": 2.5475, "step": 139730 }, { "epoch": 0.27839315313017976, "grad_norm": 0.1637699455022812, "learning_rate": 0.002, "loss": 2.5458, "step": 139740 }, { "epoch": 0.2784130753538187, "grad_norm": 0.16670584678649902, "learning_rate": 0.002, "loss": 2.5774, "step": 139750 }, { "epoch": 0.2784329975774576, "grad_norm": 0.16791781783103943, "learning_rate": 0.002, "loss": 2.5689, "step": 139760 }, { "epoch": 0.27845291980109654, "grad_norm": 0.16988253593444824, "learning_rate": 0.002, "loss": 2.5575, "step": 139770 }, { "epoch": 0.27847284202473543, "grad_norm": 0.18347138166427612, "learning_rate": 0.002, "loss": 2.559, "step": 139780 }, { "epoch": 0.2784927642483743, "grad_norm": 0.15705469250679016, "learning_rate": 0.002, "loss": 2.5697, "step": 139790 }, { "epoch": 0.27851268647201327, "grad_norm": 0.162425696849823, "learning_rate": 0.002, "loss": 2.5589, "step": 139800 }, { "epoch": 0.27853260869565216, "grad_norm": 0.15267081558704376, "learning_rate": 0.002, "loss": 2.5695, "step": 139810 }, { "epoch": 0.2785525309192911, "grad_norm": 0.15029287338256836, "learning_rate": 0.002, "loss": 2.5707, "step": 139820 }, { "epoch": 0.27857245314293, "grad_norm": 0.17615889012813568, "learning_rate": 0.002, "loss": 2.5681, "step": 139830 }, { "epoch": 0.2785923753665689, "grad_norm": 0.17672207951545715, "learning_rate": 0.002, "loss": 2.5515, "step": 139840 }, { "epoch": 0.27861229759020784, "grad_norm": 0.18618090450763702, "learning_rate": 0.002, "loss": 2.5744, "step": 139850 }, { "epoch": 0.2786322198138467, "grad_norm": 0.15534020960330963, "learning_rate": 0.002, "loss": 2.5617, "step": 139860 }, { "epoch": 0.2786521420374857, "grad_norm": 0.18345147371292114, "learning_rate": 0.002, "loss": 2.5663, "step": 139870 }, { "epoch": 0.27867206426112456, "grad_norm": 0.15672029554843903, "learning_rate": 0.002, "loss": 2.5607, "step": 139880 }, { "epoch": 0.2786919864847635, "grad_norm": 0.16917206346988678, "learning_rate": 0.002, "loss": 2.5547, "step": 139890 }, { "epoch": 0.2787119087084024, "grad_norm": 0.19803346693515778, "learning_rate": 0.002, "loss": 2.5669, "step": 139900 }, { "epoch": 0.2787318309320413, "grad_norm": 0.18023280799388885, "learning_rate": 0.002, "loss": 2.5495, "step": 139910 }, { "epoch": 0.27875175315568024, "grad_norm": 0.15090835094451904, "learning_rate": 0.002, "loss": 2.5614, "step": 139920 }, { "epoch": 0.27877167537931913, "grad_norm": 0.178721085190773, "learning_rate": 0.002, "loss": 2.5827, "step": 139930 }, { "epoch": 0.2787915976029581, "grad_norm": 0.25489485263824463, "learning_rate": 0.002, "loss": 2.5632, "step": 139940 }, { "epoch": 0.27881151982659697, "grad_norm": 0.17222823202610016, "learning_rate": 0.002, "loss": 2.554, "step": 139950 }, { "epoch": 0.27883144205023586, "grad_norm": 0.15848861634731293, "learning_rate": 0.002, "loss": 2.5797, "step": 139960 }, { "epoch": 0.2788513642738748, "grad_norm": 0.16366559267044067, "learning_rate": 0.002, "loss": 2.5574, "step": 139970 }, { "epoch": 0.2788712864975137, "grad_norm": 0.17592762410640717, "learning_rate": 0.002, "loss": 2.5672, "step": 139980 }, { "epoch": 0.27889120872115264, "grad_norm": 0.15210482478141785, "learning_rate": 0.002, "loss": 2.5583, "step": 139990 }, { "epoch": 0.27891113094479153, "grad_norm": 0.1717086285352707, "learning_rate": 0.002, "loss": 2.5681, "step": 140000 }, { "epoch": 0.2789310531684304, "grad_norm": 0.213675394654274, "learning_rate": 0.002, "loss": 2.5639, "step": 140010 }, { "epoch": 0.27895097539206937, "grad_norm": 0.16547097265720367, "learning_rate": 0.002, "loss": 2.5602, "step": 140020 }, { "epoch": 0.27897089761570826, "grad_norm": 0.15138614177703857, "learning_rate": 0.002, "loss": 2.5708, "step": 140030 }, { "epoch": 0.2789908198393472, "grad_norm": 0.18805231153964996, "learning_rate": 0.002, "loss": 2.5539, "step": 140040 }, { "epoch": 0.2790107420629861, "grad_norm": 0.17348642647266388, "learning_rate": 0.002, "loss": 2.5792, "step": 140050 }, { "epoch": 0.27903066428662504, "grad_norm": 0.13982999324798584, "learning_rate": 0.002, "loss": 2.5599, "step": 140060 }, { "epoch": 0.27905058651026393, "grad_norm": 0.15816596150398254, "learning_rate": 0.002, "loss": 2.5782, "step": 140070 }, { "epoch": 0.2790705087339028, "grad_norm": 0.17490074038505554, "learning_rate": 0.002, "loss": 2.5707, "step": 140080 }, { "epoch": 0.27909043095754177, "grad_norm": 0.1688765436410904, "learning_rate": 0.002, "loss": 2.5639, "step": 140090 }, { "epoch": 0.27911035318118066, "grad_norm": 0.17272603511810303, "learning_rate": 0.002, "loss": 2.5564, "step": 140100 }, { "epoch": 0.2791302754048196, "grad_norm": 0.20301927626132965, "learning_rate": 0.002, "loss": 2.5745, "step": 140110 }, { "epoch": 0.2791501976284585, "grad_norm": 0.16188059747219086, "learning_rate": 0.002, "loss": 2.5751, "step": 140120 }, { "epoch": 0.2791701198520974, "grad_norm": 0.16369156539440155, "learning_rate": 0.002, "loss": 2.5591, "step": 140130 }, { "epoch": 0.27919004207573633, "grad_norm": 0.15558560192584991, "learning_rate": 0.002, "loss": 2.564, "step": 140140 }, { "epoch": 0.2792099642993752, "grad_norm": 0.15995250642299652, "learning_rate": 0.002, "loss": 2.5581, "step": 140150 }, { "epoch": 0.27922988652301417, "grad_norm": 0.1470188945531845, "learning_rate": 0.002, "loss": 2.5573, "step": 140160 }, { "epoch": 0.27924980874665306, "grad_norm": 0.179533913731575, "learning_rate": 0.002, "loss": 2.5588, "step": 140170 }, { "epoch": 0.279269730970292, "grad_norm": 0.1841043084859848, "learning_rate": 0.002, "loss": 2.5604, "step": 140180 }, { "epoch": 0.2792896531939309, "grad_norm": 0.17017541825771332, "learning_rate": 0.002, "loss": 2.566, "step": 140190 }, { "epoch": 0.2793095754175698, "grad_norm": 0.14670564234256744, "learning_rate": 0.002, "loss": 2.5656, "step": 140200 }, { "epoch": 0.27932949764120873, "grad_norm": 0.1535194367170334, "learning_rate": 0.002, "loss": 2.5613, "step": 140210 }, { "epoch": 0.2793494198648476, "grad_norm": 0.1605745404958725, "learning_rate": 0.002, "loss": 2.5858, "step": 140220 }, { "epoch": 0.27936934208848657, "grad_norm": 0.15495507419109344, "learning_rate": 0.002, "loss": 2.5647, "step": 140230 }, { "epoch": 0.27938926431212546, "grad_norm": 0.1651165783405304, "learning_rate": 0.002, "loss": 2.5691, "step": 140240 }, { "epoch": 0.27940918653576435, "grad_norm": 0.16253018379211426, "learning_rate": 0.002, "loss": 2.5603, "step": 140250 }, { "epoch": 0.2794291087594033, "grad_norm": 0.15598781406879425, "learning_rate": 0.002, "loss": 2.5708, "step": 140260 }, { "epoch": 0.2794490309830422, "grad_norm": 0.18093737959861755, "learning_rate": 0.002, "loss": 2.5678, "step": 140270 }, { "epoch": 0.27946895320668114, "grad_norm": 0.1973543018102646, "learning_rate": 0.002, "loss": 2.5537, "step": 140280 }, { "epoch": 0.27948887543032, "grad_norm": 0.1708284467458725, "learning_rate": 0.002, "loss": 2.5517, "step": 140290 }, { "epoch": 0.2795087976539589, "grad_norm": 0.14669328927993774, "learning_rate": 0.002, "loss": 2.5512, "step": 140300 }, { "epoch": 0.27952871987759786, "grad_norm": 0.23464606702327728, "learning_rate": 0.002, "loss": 2.5797, "step": 140310 }, { "epoch": 0.27954864210123676, "grad_norm": 0.16324934363365173, "learning_rate": 0.002, "loss": 2.5788, "step": 140320 }, { "epoch": 0.2795685643248757, "grad_norm": 0.18246743083000183, "learning_rate": 0.002, "loss": 2.5742, "step": 140330 }, { "epoch": 0.2795884865485146, "grad_norm": 0.20106105506420135, "learning_rate": 0.002, "loss": 2.566, "step": 140340 }, { "epoch": 0.27960840877215354, "grad_norm": 0.17090360820293427, "learning_rate": 0.002, "loss": 2.5582, "step": 140350 }, { "epoch": 0.27962833099579243, "grad_norm": 0.20201128721237183, "learning_rate": 0.002, "loss": 2.5621, "step": 140360 }, { "epoch": 0.2796482532194313, "grad_norm": 0.1657535433769226, "learning_rate": 0.002, "loss": 2.5618, "step": 140370 }, { "epoch": 0.27966817544307027, "grad_norm": 0.20382751524448395, "learning_rate": 0.002, "loss": 2.5685, "step": 140380 }, { "epoch": 0.27968809766670916, "grad_norm": 0.19871091842651367, "learning_rate": 0.002, "loss": 2.5525, "step": 140390 }, { "epoch": 0.2797080198903481, "grad_norm": 0.18653981387615204, "learning_rate": 0.002, "loss": 2.5544, "step": 140400 }, { "epoch": 0.279727942113987, "grad_norm": 0.15699826180934906, "learning_rate": 0.002, "loss": 2.5638, "step": 140410 }, { "epoch": 0.2797478643376259, "grad_norm": 0.14865775406360626, "learning_rate": 0.002, "loss": 2.5465, "step": 140420 }, { "epoch": 0.27976778656126483, "grad_norm": 0.16630467772483826, "learning_rate": 0.002, "loss": 2.5731, "step": 140430 }, { "epoch": 0.2797877087849037, "grad_norm": 0.1545727550983429, "learning_rate": 0.002, "loss": 2.5743, "step": 140440 }, { "epoch": 0.27980763100854267, "grad_norm": 0.15230660140514374, "learning_rate": 0.002, "loss": 2.5676, "step": 140450 }, { "epoch": 0.27982755323218156, "grad_norm": 0.15513800084590912, "learning_rate": 0.002, "loss": 2.566, "step": 140460 }, { "epoch": 0.27984747545582045, "grad_norm": 0.16420705616474152, "learning_rate": 0.002, "loss": 2.566, "step": 140470 }, { "epoch": 0.2798673976794594, "grad_norm": 0.1685810089111328, "learning_rate": 0.002, "loss": 2.5624, "step": 140480 }, { "epoch": 0.2798873199030983, "grad_norm": 0.1753903031349182, "learning_rate": 0.002, "loss": 2.5555, "step": 140490 }, { "epoch": 0.27990724212673723, "grad_norm": 0.16861970722675323, "learning_rate": 0.002, "loss": 2.5623, "step": 140500 }, { "epoch": 0.2799271643503761, "grad_norm": 0.16718462109565735, "learning_rate": 0.002, "loss": 2.5598, "step": 140510 }, { "epoch": 0.27994708657401507, "grad_norm": 0.19371075928211212, "learning_rate": 0.002, "loss": 2.5501, "step": 140520 }, { "epoch": 0.27996700879765396, "grad_norm": 0.1621483713388443, "learning_rate": 0.002, "loss": 2.5519, "step": 140530 }, { "epoch": 0.27998693102129285, "grad_norm": 0.1624443084001541, "learning_rate": 0.002, "loss": 2.5645, "step": 140540 }, { "epoch": 0.2800068532449318, "grad_norm": 0.16551214456558228, "learning_rate": 0.002, "loss": 2.5685, "step": 140550 }, { "epoch": 0.2800267754685707, "grad_norm": 0.18373511731624603, "learning_rate": 0.002, "loss": 2.5658, "step": 140560 }, { "epoch": 0.28004669769220963, "grad_norm": 0.1578601598739624, "learning_rate": 0.002, "loss": 2.5485, "step": 140570 }, { "epoch": 0.2800666199158485, "grad_norm": 0.1415277123451233, "learning_rate": 0.002, "loss": 2.5627, "step": 140580 }, { "epoch": 0.2800865421394874, "grad_norm": 0.17808395624160767, "learning_rate": 0.002, "loss": 2.5712, "step": 140590 }, { "epoch": 0.28010646436312636, "grad_norm": 0.16474980115890503, "learning_rate": 0.002, "loss": 2.5847, "step": 140600 }, { "epoch": 0.28012638658676525, "grad_norm": 0.17996713519096375, "learning_rate": 0.002, "loss": 2.5757, "step": 140610 }, { "epoch": 0.2801463088104042, "grad_norm": 0.1732461005449295, "learning_rate": 0.002, "loss": 2.5569, "step": 140620 }, { "epoch": 0.2801662310340431, "grad_norm": 0.16499654948711395, "learning_rate": 0.002, "loss": 2.5654, "step": 140630 }, { "epoch": 0.28018615325768204, "grad_norm": 0.1759646236896515, "learning_rate": 0.002, "loss": 2.5615, "step": 140640 }, { "epoch": 0.2802060754813209, "grad_norm": 0.1633378267288208, "learning_rate": 0.002, "loss": 2.5585, "step": 140650 }, { "epoch": 0.2802259977049598, "grad_norm": 0.1649494171142578, "learning_rate": 0.002, "loss": 2.5597, "step": 140660 }, { "epoch": 0.28024591992859876, "grad_norm": 0.16396117210388184, "learning_rate": 0.002, "loss": 2.5812, "step": 140670 }, { "epoch": 0.28026584215223765, "grad_norm": 0.1680159568786621, "learning_rate": 0.002, "loss": 2.5597, "step": 140680 }, { "epoch": 0.2802857643758766, "grad_norm": 0.23276665806770325, "learning_rate": 0.002, "loss": 2.5784, "step": 140690 }, { "epoch": 0.2803056865995155, "grad_norm": 0.1488521248102188, "learning_rate": 0.002, "loss": 2.5641, "step": 140700 }, { "epoch": 0.2803256088231544, "grad_norm": 0.14662764966487885, "learning_rate": 0.002, "loss": 2.5649, "step": 140710 }, { "epoch": 0.28034553104679333, "grad_norm": 0.15782015025615692, "learning_rate": 0.002, "loss": 2.5641, "step": 140720 }, { "epoch": 0.2803654532704322, "grad_norm": 0.1552278697490692, "learning_rate": 0.002, "loss": 2.5668, "step": 140730 }, { "epoch": 0.28038537549407117, "grad_norm": 0.15204671025276184, "learning_rate": 0.002, "loss": 2.5742, "step": 140740 }, { "epoch": 0.28040529771771006, "grad_norm": 0.16343744099140167, "learning_rate": 0.002, "loss": 2.5842, "step": 140750 }, { "epoch": 0.28042521994134895, "grad_norm": 0.1432293802499771, "learning_rate": 0.002, "loss": 2.5669, "step": 140760 }, { "epoch": 0.2804451421649879, "grad_norm": 0.1666451096534729, "learning_rate": 0.002, "loss": 2.564, "step": 140770 }, { "epoch": 0.2804650643886268, "grad_norm": 0.17200632393360138, "learning_rate": 0.002, "loss": 2.5608, "step": 140780 }, { "epoch": 0.28048498661226573, "grad_norm": 0.17389445006847382, "learning_rate": 0.002, "loss": 2.5556, "step": 140790 }, { "epoch": 0.2805049088359046, "grad_norm": 0.17733579874038696, "learning_rate": 0.002, "loss": 2.555, "step": 140800 }, { "epoch": 0.28052483105954357, "grad_norm": 0.1698005050420761, "learning_rate": 0.002, "loss": 2.5495, "step": 140810 }, { "epoch": 0.28054475328318246, "grad_norm": 0.15665920078754425, "learning_rate": 0.002, "loss": 2.5677, "step": 140820 }, { "epoch": 0.28056467550682135, "grad_norm": 0.16821926832199097, "learning_rate": 0.002, "loss": 2.5721, "step": 140830 }, { "epoch": 0.2805845977304603, "grad_norm": 0.16772088408470154, "learning_rate": 0.002, "loss": 2.5498, "step": 140840 }, { "epoch": 0.2806045199540992, "grad_norm": 0.15489144623279572, "learning_rate": 0.002, "loss": 2.5725, "step": 140850 }, { "epoch": 0.28062444217773813, "grad_norm": 0.21796974539756775, "learning_rate": 0.002, "loss": 2.5679, "step": 140860 }, { "epoch": 0.280644364401377, "grad_norm": 0.13810379803180695, "learning_rate": 0.002, "loss": 2.5518, "step": 140870 }, { "epoch": 0.2806642866250159, "grad_norm": 0.21892918646335602, "learning_rate": 0.002, "loss": 2.5602, "step": 140880 }, { "epoch": 0.28068420884865486, "grad_norm": 0.1603883057832718, "learning_rate": 0.002, "loss": 2.5658, "step": 140890 }, { "epoch": 0.28070413107229375, "grad_norm": 0.1475551277399063, "learning_rate": 0.002, "loss": 2.5667, "step": 140900 }, { "epoch": 0.2807240532959327, "grad_norm": 0.1573604792356491, "learning_rate": 0.002, "loss": 2.562, "step": 140910 }, { "epoch": 0.2807439755195716, "grad_norm": 0.17565055191516876, "learning_rate": 0.002, "loss": 2.557, "step": 140920 }, { "epoch": 0.28076389774321053, "grad_norm": 0.14652428030967712, "learning_rate": 0.002, "loss": 2.57, "step": 140930 }, { "epoch": 0.2807838199668494, "grad_norm": 0.16636092960834503, "learning_rate": 0.002, "loss": 2.5834, "step": 140940 }, { "epoch": 0.2808037421904883, "grad_norm": 0.15786507725715637, "learning_rate": 0.002, "loss": 2.5793, "step": 140950 }, { "epoch": 0.28082366441412726, "grad_norm": 0.18531300127506256, "learning_rate": 0.002, "loss": 2.5588, "step": 140960 }, { "epoch": 0.28084358663776615, "grad_norm": 0.1844656765460968, "learning_rate": 0.002, "loss": 2.5641, "step": 140970 }, { "epoch": 0.2808635088614051, "grad_norm": 0.2000398337841034, "learning_rate": 0.002, "loss": 2.5571, "step": 140980 }, { "epoch": 0.280883431085044, "grad_norm": 0.19192299246788025, "learning_rate": 0.002, "loss": 2.571, "step": 140990 }, { "epoch": 0.2809033533086829, "grad_norm": 0.16096916794776917, "learning_rate": 0.002, "loss": 2.5633, "step": 141000 }, { "epoch": 0.2809232755323218, "grad_norm": 0.16291317343711853, "learning_rate": 0.002, "loss": 2.5623, "step": 141010 }, { "epoch": 0.2809431977559607, "grad_norm": 0.14393797516822815, "learning_rate": 0.002, "loss": 2.5602, "step": 141020 }, { "epoch": 0.28096311997959966, "grad_norm": 0.15585196018218994, "learning_rate": 0.002, "loss": 2.5553, "step": 141030 }, { "epoch": 0.28098304220323855, "grad_norm": 0.17958374321460724, "learning_rate": 0.002, "loss": 2.5672, "step": 141040 }, { "epoch": 0.28100296442687744, "grad_norm": 0.18968656659126282, "learning_rate": 0.002, "loss": 2.5506, "step": 141050 }, { "epoch": 0.2810228866505164, "grad_norm": 0.16801495850086212, "learning_rate": 0.002, "loss": 2.551, "step": 141060 }, { "epoch": 0.2810428088741553, "grad_norm": 0.15048763155937195, "learning_rate": 0.002, "loss": 2.5762, "step": 141070 }, { "epoch": 0.2810627310977942, "grad_norm": 0.18257544934749603, "learning_rate": 0.002, "loss": 2.5523, "step": 141080 }, { "epoch": 0.2810826533214331, "grad_norm": 0.17007072269916534, "learning_rate": 0.002, "loss": 2.5627, "step": 141090 }, { "epoch": 0.28110257554507206, "grad_norm": 0.202705517411232, "learning_rate": 0.002, "loss": 2.5545, "step": 141100 }, { "epoch": 0.28112249776871095, "grad_norm": 0.20073202252388, "learning_rate": 0.002, "loss": 2.5647, "step": 141110 }, { "epoch": 0.28114241999234985, "grad_norm": 0.1554606407880783, "learning_rate": 0.002, "loss": 2.5607, "step": 141120 }, { "epoch": 0.2811623422159888, "grad_norm": 0.16756616532802582, "learning_rate": 0.002, "loss": 2.5735, "step": 141130 }, { "epoch": 0.2811822644396277, "grad_norm": 0.16639961302280426, "learning_rate": 0.002, "loss": 2.5629, "step": 141140 }, { "epoch": 0.28120218666326663, "grad_norm": 0.16597189009189606, "learning_rate": 0.002, "loss": 2.5649, "step": 141150 }, { "epoch": 0.2812221088869055, "grad_norm": 0.1447550654411316, "learning_rate": 0.002, "loss": 2.5753, "step": 141160 }, { "epoch": 0.2812420311105444, "grad_norm": 0.185570627450943, "learning_rate": 0.002, "loss": 2.5588, "step": 141170 }, { "epoch": 0.28126195333418336, "grad_norm": 0.21346884965896606, "learning_rate": 0.002, "loss": 2.5455, "step": 141180 }, { "epoch": 0.28128187555782225, "grad_norm": 0.19435396790504456, "learning_rate": 0.002, "loss": 2.5635, "step": 141190 }, { "epoch": 0.2813017977814612, "grad_norm": 0.18841472268104553, "learning_rate": 0.002, "loss": 2.5546, "step": 141200 }, { "epoch": 0.2813217200051001, "grad_norm": 0.17018704116344452, "learning_rate": 0.002, "loss": 2.5674, "step": 141210 }, { "epoch": 0.28134164222873903, "grad_norm": 0.15278783440589905, "learning_rate": 0.002, "loss": 2.5566, "step": 141220 }, { "epoch": 0.2813615644523779, "grad_norm": 0.16619011759757996, "learning_rate": 0.002, "loss": 2.5582, "step": 141230 }, { "epoch": 0.2813814866760168, "grad_norm": 0.18997101485729218, "learning_rate": 0.002, "loss": 2.5826, "step": 141240 }, { "epoch": 0.28140140889965576, "grad_norm": 0.1704902946949005, "learning_rate": 0.002, "loss": 2.57, "step": 141250 }, { "epoch": 0.28142133112329465, "grad_norm": 0.16420845687389374, "learning_rate": 0.002, "loss": 2.5533, "step": 141260 }, { "epoch": 0.2814412533469336, "grad_norm": 0.17966009676456451, "learning_rate": 0.002, "loss": 2.5637, "step": 141270 }, { "epoch": 0.2814611755705725, "grad_norm": 0.18276913464069366, "learning_rate": 0.002, "loss": 2.5657, "step": 141280 }, { "epoch": 0.2814810977942114, "grad_norm": 0.17478056252002716, "learning_rate": 0.002, "loss": 2.5532, "step": 141290 }, { "epoch": 0.2815010200178503, "grad_norm": 0.21030572056770325, "learning_rate": 0.002, "loss": 2.559, "step": 141300 }, { "epoch": 0.2815209422414892, "grad_norm": 0.16084039211273193, "learning_rate": 0.002, "loss": 2.5515, "step": 141310 }, { "epoch": 0.28154086446512816, "grad_norm": 0.15420284867286682, "learning_rate": 0.002, "loss": 2.5659, "step": 141320 }, { "epoch": 0.28156078668876705, "grad_norm": 0.16168910264968872, "learning_rate": 0.002, "loss": 2.5555, "step": 141330 }, { "epoch": 0.28158070891240594, "grad_norm": 0.17480279505252838, "learning_rate": 0.002, "loss": 2.5695, "step": 141340 }, { "epoch": 0.2816006311360449, "grad_norm": 0.16277047991752625, "learning_rate": 0.002, "loss": 2.5714, "step": 141350 }, { "epoch": 0.2816205533596838, "grad_norm": 0.19277331233024597, "learning_rate": 0.002, "loss": 2.5685, "step": 141360 }, { "epoch": 0.2816404755833227, "grad_norm": 0.15481604635715485, "learning_rate": 0.002, "loss": 2.5691, "step": 141370 }, { "epoch": 0.2816603978069616, "grad_norm": 0.16259147226810455, "learning_rate": 0.002, "loss": 2.5678, "step": 141380 }, { "epoch": 0.28168032003060056, "grad_norm": 0.19589707255363464, "learning_rate": 0.002, "loss": 2.5831, "step": 141390 }, { "epoch": 0.28170024225423945, "grad_norm": 0.1562921404838562, "learning_rate": 0.002, "loss": 2.5709, "step": 141400 }, { "epoch": 0.28172016447787834, "grad_norm": 0.1320403814315796, "learning_rate": 0.002, "loss": 2.5602, "step": 141410 }, { "epoch": 0.2817400867015173, "grad_norm": 0.19289231300354004, "learning_rate": 0.002, "loss": 2.5602, "step": 141420 }, { "epoch": 0.2817600089251562, "grad_norm": 0.14885181188583374, "learning_rate": 0.002, "loss": 2.5611, "step": 141430 }, { "epoch": 0.2817799311487951, "grad_norm": 0.18015223741531372, "learning_rate": 0.002, "loss": 2.5721, "step": 141440 }, { "epoch": 0.281799853372434, "grad_norm": 0.16691303253173828, "learning_rate": 0.002, "loss": 2.5627, "step": 141450 }, { "epoch": 0.2818197755960729, "grad_norm": 0.14923831820487976, "learning_rate": 0.002, "loss": 2.5533, "step": 141460 }, { "epoch": 0.28183969781971185, "grad_norm": 0.1722390055656433, "learning_rate": 0.002, "loss": 2.57, "step": 141470 }, { "epoch": 0.28185962004335074, "grad_norm": 0.1639217883348465, "learning_rate": 0.002, "loss": 2.5649, "step": 141480 }, { "epoch": 0.2818795422669897, "grad_norm": 0.17413774132728577, "learning_rate": 0.002, "loss": 2.5738, "step": 141490 }, { "epoch": 0.2818994644906286, "grad_norm": 0.1559947431087494, "learning_rate": 0.002, "loss": 2.5611, "step": 141500 }, { "epoch": 0.2819193867142675, "grad_norm": 0.17144958674907684, "learning_rate": 0.002, "loss": 2.5723, "step": 141510 }, { "epoch": 0.2819393089379064, "grad_norm": 0.14354747533798218, "learning_rate": 0.002, "loss": 2.5667, "step": 141520 }, { "epoch": 0.2819592311615453, "grad_norm": 0.19703374803066254, "learning_rate": 0.002, "loss": 2.5538, "step": 141530 }, { "epoch": 0.28197915338518426, "grad_norm": 0.1842264086008072, "learning_rate": 0.002, "loss": 2.5666, "step": 141540 }, { "epoch": 0.28199907560882315, "grad_norm": 0.18729253113269806, "learning_rate": 0.002, "loss": 2.5514, "step": 141550 }, { "epoch": 0.2820189978324621, "grad_norm": 0.1603318750858307, "learning_rate": 0.002, "loss": 2.5583, "step": 141560 }, { "epoch": 0.282038920056101, "grad_norm": 0.1765473484992981, "learning_rate": 0.002, "loss": 2.5568, "step": 141570 }, { "epoch": 0.2820588422797399, "grad_norm": 0.14252255856990814, "learning_rate": 0.002, "loss": 2.5528, "step": 141580 }, { "epoch": 0.2820787645033788, "grad_norm": 0.16596925258636475, "learning_rate": 0.002, "loss": 2.5645, "step": 141590 }, { "epoch": 0.2820986867270177, "grad_norm": 0.20179858803749084, "learning_rate": 0.002, "loss": 2.5613, "step": 141600 }, { "epoch": 0.28211860895065666, "grad_norm": 0.15175361931324005, "learning_rate": 0.002, "loss": 2.5621, "step": 141610 }, { "epoch": 0.28213853117429555, "grad_norm": 0.18773774802684784, "learning_rate": 0.002, "loss": 2.55, "step": 141620 }, { "epoch": 0.28215845339793444, "grad_norm": 0.16240663826465607, "learning_rate": 0.002, "loss": 2.5638, "step": 141630 }, { "epoch": 0.2821783756215734, "grad_norm": 0.13595467805862427, "learning_rate": 0.002, "loss": 2.5563, "step": 141640 }, { "epoch": 0.2821982978452123, "grad_norm": 0.2413184940814972, "learning_rate": 0.002, "loss": 2.562, "step": 141650 }, { "epoch": 0.2822182200688512, "grad_norm": 0.1597345620393753, "learning_rate": 0.002, "loss": 2.5763, "step": 141660 }, { "epoch": 0.2822381422924901, "grad_norm": 0.20627161860466003, "learning_rate": 0.002, "loss": 2.5652, "step": 141670 }, { "epoch": 0.28225806451612906, "grad_norm": 0.17298643290996552, "learning_rate": 0.002, "loss": 2.572, "step": 141680 }, { "epoch": 0.28227798673976795, "grad_norm": 0.15495800971984863, "learning_rate": 0.002, "loss": 2.5582, "step": 141690 }, { "epoch": 0.28229790896340684, "grad_norm": 0.16705459356307983, "learning_rate": 0.002, "loss": 2.5589, "step": 141700 }, { "epoch": 0.2823178311870458, "grad_norm": 0.21580539643764496, "learning_rate": 0.002, "loss": 2.5543, "step": 141710 }, { "epoch": 0.2823377534106847, "grad_norm": 0.17482444643974304, "learning_rate": 0.002, "loss": 2.5782, "step": 141720 }, { "epoch": 0.2823576756343236, "grad_norm": 0.14700846374034882, "learning_rate": 0.002, "loss": 2.5676, "step": 141730 }, { "epoch": 0.2823775978579625, "grad_norm": 0.18149541318416595, "learning_rate": 0.002, "loss": 2.5619, "step": 141740 }, { "epoch": 0.2823975200816014, "grad_norm": 0.19089734554290771, "learning_rate": 0.002, "loss": 2.5627, "step": 141750 }, { "epoch": 0.28241744230524035, "grad_norm": 0.15942877531051636, "learning_rate": 0.002, "loss": 2.5614, "step": 141760 }, { "epoch": 0.28243736452887924, "grad_norm": 0.2083245813846588, "learning_rate": 0.002, "loss": 2.5555, "step": 141770 }, { "epoch": 0.2824572867525182, "grad_norm": 0.16175499558448792, "learning_rate": 0.002, "loss": 2.5729, "step": 141780 }, { "epoch": 0.2824772089761571, "grad_norm": 0.15899395942687988, "learning_rate": 0.002, "loss": 2.5634, "step": 141790 }, { "epoch": 0.28249713119979597, "grad_norm": 0.16817156970500946, "learning_rate": 0.002, "loss": 2.5753, "step": 141800 }, { "epoch": 0.2825170534234349, "grad_norm": 0.17760427296161652, "learning_rate": 0.002, "loss": 2.5705, "step": 141810 }, { "epoch": 0.2825369756470738, "grad_norm": 0.16692504286766052, "learning_rate": 0.002, "loss": 2.5538, "step": 141820 }, { "epoch": 0.28255689787071275, "grad_norm": 0.14241668581962585, "learning_rate": 0.002, "loss": 2.5594, "step": 141830 }, { "epoch": 0.28257682009435164, "grad_norm": 0.20075944066047668, "learning_rate": 0.002, "loss": 2.5516, "step": 141840 }, { "epoch": 0.2825967423179906, "grad_norm": 0.2017577588558197, "learning_rate": 0.002, "loss": 2.5782, "step": 141850 }, { "epoch": 0.2826166645416295, "grad_norm": 0.14740422368049622, "learning_rate": 0.002, "loss": 2.5649, "step": 141860 }, { "epoch": 0.28263658676526837, "grad_norm": 0.17696069180965424, "learning_rate": 0.002, "loss": 2.5478, "step": 141870 }, { "epoch": 0.2826565089889073, "grad_norm": 0.17614856362342834, "learning_rate": 0.002, "loss": 2.5687, "step": 141880 }, { "epoch": 0.2826764312125462, "grad_norm": 0.17793340981006622, "learning_rate": 0.002, "loss": 2.5626, "step": 141890 }, { "epoch": 0.28269635343618515, "grad_norm": 0.13626934587955475, "learning_rate": 0.002, "loss": 2.5651, "step": 141900 }, { "epoch": 0.28271627565982405, "grad_norm": 0.15461920201778412, "learning_rate": 0.002, "loss": 2.5601, "step": 141910 }, { "epoch": 0.28273619788346294, "grad_norm": 0.157074972987175, "learning_rate": 0.002, "loss": 2.5738, "step": 141920 }, { "epoch": 0.2827561201071019, "grad_norm": 0.16209249198436737, "learning_rate": 0.002, "loss": 2.5568, "step": 141930 }, { "epoch": 0.2827760423307408, "grad_norm": 0.15716885030269623, "learning_rate": 0.002, "loss": 2.5626, "step": 141940 }, { "epoch": 0.2827959645543797, "grad_norm": 0.160092294216156, "learning_rate": 0.002, "loss": 2.572, "step": 141950 }, { "epoch": 0.2828158867780186, "grad_norm": 0.22142665088176727, "learning_rate": 0.002, "loss": 2.5563, "step": 141960 }, { "epoch": 0.28283580900165756, "grad_norm": 0.2069828063249588, "learning_rate": 0.002, "loss": 2.5758, "step": 141970 }, { "epoch": 0.28285573122529645, "grad_norm": 0.16508536040782928, "learning_rate": 0.002, "loss": 2.571, "step": 141980 }, { "epoch": 0.28287565344893534, "grad_norm": 0.15875723958015442, "learning_rate": 0.002, "loss": 2.5685, "step": 141990 }, { "epoch": 0.2828955756725743, "grad_norm": 0.15053203701972961, "learning_rate": 0.002, "loss": 2.5769, "step": 142000 }, { "epoch": 0.2829154978962132, "grad_norm": 0.1854197233915329, "learning_rate": 0.002, "loss": 2.5744, "step": 142010 }, { "epoch": 0.2829354201198521, "grad_norm": 0.17226311564445496, "learning_rate": 0.002, "loss": 2.5611, "step": 142020 }, { "epoch": 0.282955342343491, "grad_norm": 0.1618805080652237, "learning_rate": 0.002, "loss": 2.5673, "step": 142030 }, { "epoch": 0.2829752645671299, "grad_norm": 0.14349579811096191, "learning_rate": 0.002, "loss": 2.5777, "step": 142040 }, { "epoch": 0.28299518679076885, "grad_norm": 0.15448825061321259, "learning_rate": 0.002, "loss": 2.5832, "step": 142050 }, { "epoch": 0.28301510901440774, "grad_norm": 0.16501958668231964, "learning_rate": 0.002, "loss": 2.5634, "step": 142060 }, { "epoch": 0.2830350312380467, "grad_norm": 0.16771826148033142, "learning_rate": 0.002, "loss": 2.5594, "step": 142070 }, { "epoch": 0.2830549534616856, "grad_norm": 0.18567660450935364, "learning_rate": 0.002, "loss": 2.5799, "step": 142080 }, { "epoch": 0.28307487568532447, "grad_norm": 0.1717420518398285, "learning_rate": 0.002, "loss": 2.5621, "step": 142090 }, { "epoch": 0.2830947979089634, "grad_norm": 0.20176678895950317, "learning_rate": 0.002, "loss": 2.5677, "step": 142100 }, { "epoch": 0.2831147201326023, "grad_norm": 0.17731499671936035, "learning_rate": 0.002, "loss": 2.5677, "step": 142110 }, { "epoch": 0.28313464235624125, "grad_norm": 0.16868460178375244, "learning_rate": 0.002, "loss": 2.559, "step": 142120 }, { "epoch": 0.28315456457988014, "grad_norm": 0.15156009793281555, "learning_rate": 0.002, "loss": 2.5623, "step": 142130 }, { "epoch": 0.2831744868035191, "grad_norm": 0.16736742854118347, "learning_rate": 0.002, "loss": 2.5457, "step": 142140 }, { "epoch": 0.283194409027158, "grad_norm": 0.178883895277977, "learning_rate": 0.002, "loss": 2.5672, "step": 142150 }, { "epoch": 0.28321433125079687, "grad_norm": 0.1553480327129364, "learning_rate": 0.002, "loss": 2.5576, "step": 142160 }, { "epoch": 0.2832342534744358, "grad_norm": 0.18442901968955994, "learning_rate": 0.002, "loss": 2.5759, "step": 142170 }, { "epoch": 0.2832541756980747, "grad_norm": 0.15661512315273285, "learning_rate": 0.002, "loss": 2.5554, "step": 142180 }, { "epoch": 0.28327409792171365, "grad_norm": 0.20614632964134216, "learning_rate": 0.002, "loss": 2.5671, "step": 142190 }, { "epoch": 0.28329402014535254, "grad_norm": 0.14371231198310852, "learning_rate": 0.002, "loss": 2.5632, "step": 142200 }, { "epoch": 0.28331394236899143, "grad_norm": 0.15514442324638367, "learning_rate": 0.002, "loss": 2.5642, "step": 142210 }, { "epoch": 0.2833338645926304, "grad_norm": 0.18715161085128784, "learning_rate": 0.002, "loss": 2.5677, "step": 142220 }, { "epoch": 0.28335378681626927, "grad_norm": 0.16869868338108063, "learning_rate": 0.002, "loss": 2.573, "step": 142230 }, { "epoch": 0.2833737090399082, "grad_norm": 0.16157791018486023, "learning_rate": 0.002, "loss": 2.5648, "step": 142240 }, { "epoch": 0.2833936312635471, "grad_norm": 0.20916621387004852, "learning_rate": 0.002, "loss": 2.5594, "step": 142250 }, { "epoch": 0.283413553487186, "grad_norm": 0.15864166617393494, "learning_rate": 0.002, "loss": 2.5619, "step": 142260 }, { "epoch": 0.28343347571082494, "grad_norm": 0.2580792009830475, "learning_rate": 0.002, "loss": 2.565, "step": 142270 }, { "epoch": 0.28345339793446384, "grad_norm": 0.14609837532043457, "learning_rate": 0.002, "loss": 2.5686, "step": 142280 }, { "epoch": 0.2834733201581028, "grad_norm": 0.16179493069648743, "learning_rate": 0.002, "loss": 2.569, "step": 142290 }, { "epoch": 0.2834932423817417, "grad_norm": 0.16004100441932678, "learning_rate": 0.002, "loss": 2.5544, "step": 142300 }, { "epoch": 0.2835131646053806, "grad_norm": 0.17284196615219116, "learning_rate": 0.002, "loss": 2.5755, "step": 142310 }, { "epoch": 0.2835330868290195, "grad_norm": 0.19748781621456146, "learning_rate": 0.002, "loss": 2.5555, "step": 142320 }, { "epoch": 0.2835530090526584, "grad_norm": 0.16951653361320496, "learning_rate": 0.002, "loss": 2.5663, "step": 142330 }, { "epoch": 0.28357293127629735, "grad_norm": 0.16043224930763245, "learning_rate": 0.002, "loss": 2.5768, "step": 142340 }, { "epoch": 0.28359285349993624, "grad_norm": 0.17018121480941772, "learning_rate": 0.002, "loss": 2.5513, "step": 142350 }, { "epoch": 0.2836127757235752, "grad_norm": 0.16027268767356873, "learning_rate": 0.002, "loss": 2.5652, "step": 142360 }, { "epoch": 0.2836326979472141, "grad_norm": 0.14504997432231903, "learning_rate": 0.002, "loss": 2.5697, "step": 142370 }, { "epoch": 0.28365262017085296, "grad_norm": 0.15145382285118103, "learning_rate": 0.002, "loss": 2.5517, "step": 142380 }, { "epoch": 0.2836725423944919, "grad_norm": 0.17977862060070038, "learning_rate": 0.002, "loss": 2.5648, "step": 142390 }, { "epoch": 0.2836924646181308, "grad_norm": 0.17016850411891937, "learning_rate": 0.002, "loss": 2.5681, "step": 142400 }, { "epoch": 0.28371238684176975, "grad_norm": 0.1724727600812912, "learning_rate": 0.002, "loss": 2.5642, "step": 142410 }, { "epoch": 0.28373230906540864, "grad_norm": 0.16417215764522552, "learning_rate": 0.002, "loss": 2.5566, "step": 142420 }, { "epoch": 0.2837522312890476, "grad_norm": 0.24517719447612762, "learning_rate": 0.002, "loss": 2.5537, "step": 142430 }, { "epoch": 0.2837721535126865, "grad_norm": 0.17217917740345, "learning_rate": 0.002, "loss": 2.5624, "step": 142440 }, { "epoch": 0.28379207573632537, "grad_norm": 0.15288889408111572, "learning_rate": 0.002, "loss": 2.5629, "step": 142450 }, { "epoch": 0.2838119979599643, "grad_norm": 0.15587329864501953, "learning_rate": 0.002, "loss": 2.5538, "step": 142460 }, { "epoch": 0.2838319201836032, "grad_norm": 0.2023632526397705, "learning_rate": 0.002, "loss": 2.5625, "step": 142470 }, { "epoch": 0.28385184240724215, "grad_norm": 0.1659768670797348, "learning_rate": 0.002, "loss": 2.5734, "step": 142480 }, { "epoch": 0.28387176463088104, "grad_norm": 0.20629645884037018, "learning_rate": 0.002, "loss": 2.5707, "step": 142490 }, { "epoch": 0.28389168685451993, "grad_norm": 0.16331128776073456, "learning_rate": 0.002, "loss": 2.5513, "step": 142500 }, { "epoch": 0.2839116090781589, "grad_norm": 0.17823748290538788, "learning_rate": 0.002, "loss": 2.5737, "step": 142510 }, { "epoch": 0.28393153130179777, "grad_norm": 0.1595972776412964, "learning_rate": 0.002, "loss": 2.5434, "step": 142520 }, { "epoch": 0.2839514535254367, "grad_norm": 0.16379554569721222, "learning_rate": 0.002, "loss": 2.5686, "step": 142530 }, { "epoch": 0.2839713757490756, "grad_norm": 0.17280839383602142, "learning_rate": 0.002, "loss": 2.5489, "step": 142540 }, { "epoch": 0.2839912979727145, "grad_norm": 0.19226615130901337, "learning_rate": 0.002, "loss": 2.5584, "step": 142550 }, { "epoch": 0.28401122019635344, "grad_norm": 0.1508118063211441, "learning_rate": 0.002, "loss": 2.5537, "step": 142560 }, { "epoch": 0.28403114241999233, "grad_norm": 0.2022876888513565, "learning_rate": 0.002, "loss": 2.5603, "step": 142570 }, { "epoch": 0.2840510646436313, "grad_norm": 0.16272622346878052, "learning_rate": 0.002, "loss": 2.57, "step": 142580 }, { "epoch": 0.28407098686727017, "grad_norm": 0.14131467044353485, "learning_rate": 0.002, "loss": 2.557, "step": 142590 }, { "epoch": 0.2840909090909091, "grad_norm": 0.1572580188512802, "learning_rate": 0.002, "loss": 2.5669, "step": 142600 }, { "epoch": 0.284110831314548, "grad_norm": 0.16192880272865295, "learning_rate": 0.002, "loss": 2.5663, "step": 142610 }, { "epoch": 0.2841307535381869, "grad_norm": 0.1741844117641449, "learning_rate": 0.002, "loss": 2.572, "step": 142620 }, { "epoch": 0.28415067576182584, "grad_norm": 0.144920215010643, "learning_rate": 0.002, "loss": 2.5474, "step": 142630 }, { "epoch": 0.28417059798546473, "grad_norm": 0.17942863702774048, "learning_rate": 0.002, "loss": 2.5673, "step": 142640 }, { "epoch": 0.2841905202091037, "grad_norm": 0.15618665516376495, "learning_rate": 0.002, "loss": 2.5664, "step": 142650 }, { "epoch": 0.28421044243274257, "grad_norm": 0.14430183172225952, "learning_rate": 0.002, "loss": 2.5669, "step": 142660 }, { "epoch": 0.28423036465638146, "grad_norm": 0.170572429895401, "learning_rate": 0.002, "loss": 2.56, "step": 142670 }, { "epoch": 0.2842502868800204, "grad_norm": 0.1721796840429306, "learning_rate": 0.002, "loss": 2.5645, "step": 142680 }, { "epoch": 0.2842702091036593, "grad_norm": 0.16601133346557617, "learning_rate": 0.002, "loss": 2.5588, "step": 142690 }, { "epoch": 0.28429013132729825, "grad_norm": 0.14482298493385315, "learning_rate": 0.002, "loss": 2.5606, "step": 142700 }, { "epoch": 0.28431005355093714, "grad_norm": 0.15826864540576935, "learning_rate": 0.002, "loss": 2.5516, "step": 142710 }, { "epoch": 0.2843299757745761, "grad_norm": 0.1651933193206787, "learning_rate": 0.002, "loss": 2.5606, "step": 142720 }, { "epoch": 0.284349897998215, "grad_norm": 0.15327638387680054, "learning_rate": 0.002, "loss": 2.5639, "step": 142730 }, { "epoch": 0.28436982022185386, "grad_norm": 0.22042761743068695, "learning_rate": 0.002, "loss": 2.5709, "step": 142740 }, { "epoch": 0.2843897424454928, "grad_norm": 0.1726568192243576, "learning_rate": 0.002, "loss": 2.5583, "step": 142750 }, { "epoch": 0.2844096646691317, "grad_norm": 0.16271330416202545, "learning_rate": 0.002, "loss": 2.5528, "step": 142760 }, { "epoch": 0.28442958689277065, "grad_norm": 0.17892970144748688, "learning_rate": 0.002, "loss": 2.5576, "step": 142770 }, { "epoch": 0.28444950911640954, "grad_norm": 0.1402113437652588, "learning_rate": 0.002, "loss": 2.5724, "step": 142780 }, { "epoch": 0.28446943134004843, "grad_norm": 0.1614799052476883, "learning_rate": 0.002, "loss": 2.5682, "step": 142790 }, { "epoch": 0.2844893535636874, "grad_norm": 0.1787571907043457, "learning_rate": 0.002, "loss": 2.5583, "step": 142800 }, { "epoch": 0.28450927578732627, "grad_norm": 0.20575428009033203, "learning_rate": 0.002, "loss": 2.5669, "step": 142810 }, { "epoch": 0.2845291980109652, "grad_norm": 0.1526271253824234, "learning_rate": 0.002, "loss": 2.5485, "step": 142820 }, { "epoch": 0.2845491202346041, "grad_norm": 0.16577285528182983, "learning_rate": 0.002, "loss": 2.5618, "step": 142830 }, { "epoch": 0.284569042458243, "grad_norm": 0.16163264214992523, "learning_rate": 0.002, "loss": 2.5616, "step": 142840 }, { "epoch": 0.28458896468188194, "grad_norm": 0.17051436007022858, "learning_rate": 0.002, "loss": 2.565, "step": 142850 }, { "epoch": 0.28460888690552083, "grad_norm": 0.19562402367591858, "learning_rate": 0.002, "loss": 2.5593, "step": 142860 }, { "epoch": 0.2846288091291598, "grad_norm": 0.17244300246238708, "learning_rate": 0.002, "loss": 2.5685, "step": 142870 }, { "epoch": 0.28464873135279867, "grad_norm": 0.15016081929206848, "learning_rate": 0.002, "loss": 2.5669, "step": 142880 }, { "epoch": 0.2846686535764376, "grad_norm": 0.17249897122383118, "learning_rate": 0.002, "loss": 2.5679, "step": 142890 }, { "epoch": 0.2846885758000765, "grad_norm": 0.22952859103679657, "learning_rate": 0.002, "loss": 2.5679, "step": 142900 }, { "epoch": 0.2847084980237154, "grad_norm": 0.14994069933891296, "learning_rate": 0.002, "loss": 2.5819, "step": 142910 }, { "epoch": 0.28472842024735434, "grad_norm": 0.14605741202831268, "learning_rate": 0.002, "loss": 2.5634, "step": 142920 }, { "epoch": 0.28474834247099323, "grad_norm": 0.19768574833869934, "learning_rate": 0.002, "loss": 2.5673, "step": 142930 }, { "epoch": 0.2847682646946322, "grad_norm": 0.16066576540470123, "learning_rate": 0.002, "loss": 2.5562, "step": 142940 }, { "epoch": 0.28478818691827107, "grad_norm": 0.14611472189426422, "learning_rate": 0.002, "loss": 2.5638, "step": 142950 }, { "epoch": 0.28480810914190996, "grad_norm": 0.16792945563793182, "learning_rate": 0.002, "loss": 2.5773, "step": 142960 }, { "epoch": 0.2848280313655489, "grad_norm": 0.1962738335132599, "learning_rate": 0.002, "loss": 2.5522, "step": 142970 }, { "epoch": 0.2848479535891878, "grad_norm": 0.16909895837306976, "learning_rate": 0.002, "loss": 2.5631, "step": 142980 }, { "epoch": 0.28486787581282674, "grad_norm": 0.19385504722595215, "learning_rate": 0.002, "loss": 2.5687, "step": 142990 }, { "epoch": 0.28488779803646563, "grad_norm": 0.1946302354335785, "learning_rate": 0.002, "loss": 2.566, "step": 143000 }, { "epoch": 0.2849077202601045, "grad_norm": 0.15075823664665222, "learning_rate": 0.002, "loss": 2.5474, "step": 143010 }, { "epoch": 0.28492764248374347, "grad_norm": 0.19585129618644714, "learning_rate": 0.002, "loss": 2.5724, "step": 143020 }, { "epoch": 0.28494756470738236, "grad_norm": 0.18863140046596527, "learning_rate": 0.002, "loss": 2.5572, "step": 143030 }, { "epoch": 0.2849674869310213, "grad_norm": 0.19311724603176117, "learning_rate": 0.002, "loss": 2.5625, "step": 143040 }, { "epoch": 0.2849874091546602, "grad_norm": 0.1753331571817398, "learning_rate": 0.002, "loss": 2.5655, "step": 143050 }, { "epoch": 0.28500733137829914, "grad_norm": 0.14212463796138763, "learning_rate": 0.002, "loss": 2.5709, "step": 143060 }, { "epoch": 0.28502725360193804, "grad_norm": 0.18518798053264618, "learning_rate": 0.002, "loss": 2.5537, "step": 143070 }, { "epoch": 0.2850471758255769, "grad_norm": 0.19586360454559326, "learning_rate": 0.002, "loss": 2.5599, "step": 143080 }, { "epoch": 0.2850670980492159, "grad_norm": 0.171969473361969, "learning_rate": 0.002, "loss": 2.5593, "step": 143090 }, { "epoch": 0.28508702027285476, "grad_norm": 0.16775958240032196, "learning_rate": 0.002, "loss": 2.5531, "step": 143100 }, { "epoch": 0.2851069424964937, "grad_norm": 0.16016939282417297, "learning_rate": 0.002, "loss": 2.5659, "step": 143110 }, { "epoch": 0.2851268647201326, "grad_norm": 0.15162205696105957, "learning_rate": 0.002, "loss": 2.5662, "step": 143120 }, { "epoch": 0.2851467869437715, "grad_norm": 0.1943107545375824, "learning_rate": 0.002, "loss": 2.5655, "step": 143130 }, { "epoch": 0.28516670916741044, "grad_norm": 0.1823999434709549, "learning_rate": 0.002, "loss": 2.5702, "step": 143140 }, { "epoch": 0.28518663139104933, "grad_norm": 0.14940452575683594, "learning_rate": 0.002, "loss": 2.5657, "step": 143150 }, { "epoch": 0.2852065536146883, "grad_norm": 0.24424102902412415, "learning_rate": 0.002, "loss": 2.5806, "step": 143160 }, { "epoch": 0.28522647583832716, "grad_norm": 0.16285252571105957, "learning_rate": 0.002, "loss": 2.5669, "step": 143170 }, { "epoch": 0.2852463980619661, "grad_norm": 0.1781267374753952, "learning_rate": 0.002, "loss": 2.5738, "step": 143180 }, { "epoch": 0.285266320285605, "grad_norm": 0.18416787683963776, "learning_rate": 0.002, "loss": 2.556, "step": 143190 }, { "epoch": 0.2852862425092439, "grad_norm": 0.1701795756816864, "learning_rate": 0.002, "loss": 2.5582, "step": 143200 }, { "epoch": 0.28530616473288284, "grad_norm": 0.18281158804893494, "learning_rate": 0.002, "loss": 2.558, "step": 143210 }, { "epoch": 0.28532608695652173, "grad_norm": 0.1516878306865692, "learning_rate": 0.002, "loss": 2.5515, "step": 143220 }, { "epoch": 0.2853460091801607, "grad_norm": 0.21501682698726654, "learning_rate": 0.002, "loss": 2.5678, "step": 143230 }, { "epoch": 0.28536593140379957, "grad_norm": 0.16066639125347137, "learning_rate": 0.002, "loss": 2.5532, "step": 143240 }, { "epoch": 0.28538585362743846, "grad_norm": 0.17318642139434814, "learning_rate": 0.002, "loss": 2.5689, "step": 143250 }, { "epoch": 0.2854057758510774, "grad_norm": 0.16078053414821625, "learning_rate": 0.002, "loss": 2.5503, "step": 143260 }, { "epoch": 0.2854256980747163, "grad_norm": 0.17301663756370544, "learning_rate": 0.002, "loss": 2.5626, "step": 143270 }, { "epoch": 0.28544562029835524, "grad_norm": 0.15010637044906616, "learning_rate": 0.002, "loss": 2.5525, "step": 143280 }, { "epoch": 0.28546554252199413, "grad_norm": 0.1610253006219864, "learning_rate": 0.002, "loss": 2.5588, "step": 143290 }, { "epoch": 0.285485464745633, "grad_norm": 0.19387300312519073, "learning_rate": 0.002, "loss": 2.558, "step": 143300 }, { "epoch": 0.28550538696927197, "grad_norm": 0.14960570633411407, "learning_rate": 0.002, "loss": 2.5656, "step": 143310 }, { "epoch": 0.28552530919291086, "grad_norm": 0.14417684078216553, "learning_rate": 0.002, "loss": 2.5696, "step": 143320 }, { "epoch": 0.2855452314165498, "grad_norm": 0.19015385210514069, "learning_rate": 0.002, "loss": 2.5648, "step": 143330 }, { "epoch": 0.2855651536401887, "grad_norm": 0.16869524121284485, "learning_rate": 0.002, "loss": 2.5634, "step": 143340 }, { "epoch": 0.28558507586382764, "grad_norm": 0.1907496154308319, "learning_rate": 0.002, "loss": 2.5654, "step": 143350 }, { "epoch": 0.28560499808746653, "grad_norm": 0.17070724070072174, "learning_rate": 0.002, "loss": 2.5626, "step": 143360 }, { "epoch": 0.2856249203111054, "grad_norm": 0.18524950742721558, "learning_rate": 0.002, "loss": 2.5621, "step": 143370 }, { "epoch": 0.28564484253474437, "grad_norm": 0.17311322689056396, "learning_rate": 0.002, "loss": 2.5609, "step": 143380 }, { "epoch": 0.28566476475838326, "grad_norm": 0.15409834682941437, "learning_rate": 0.002, "loss": 2.5724, "step": 143390 }, { "epoch": 0.2856846869820222, "grad_norm": 0.14529301226139069, "learning_rate": 0.002, "loss": 2.558, "step": 143400 }, { "epoch": 0.2857046092056611, "grad_norm": 0.18303638696670532, "learning_rate": 0.002, "loss": 2.5752, "step": 143410 }, { "epoch": 0.2857245314293, "grad_norm": 0.183396577835083, "learning_rate": 0.002, "loss": 2.5609, "step": 143420 }, { "epoch": 0.28574445365293893, "grad_norm": 0.1664671003818512, "learning_rate": 0.002, "loss": 2.577, "step": 143430 }, { "epoch": 0.2857643758765778, "grad_norm": 0.1786530762910843, "learning_rate": 0.002, "loss": 2.5719, "step": 143440 }, { "epoch": 0.28578429810021677, "grad_norm": 0.16600660979747772, "learning_rate": 0.002, "loss": 2.5565, "step": 143450 }, { "epoch": 0.28580422032385566, "grad_norm": 0.19642972946166992, "learning_rate": 0.002, "loss": 2.551, "step": 143460 }, { "epoch": 0.2858241425474946, "grad_norm": 0.1924341470003128, "learning_rate": 0.002, "loss": 2.5746, "step": 143470 }, { "epoch": 0.2858440647711335, "grad_norm": 0.1680668592453003, "learning_rate": 0.002, "loss": 2.5527, "step": 143480 }, { "epoch": 0.2858639869947724, "grad_norm": 0.15928217768669128, "learning_rate": 0.002, "loss": 2.5712, "step": 143490 }, { "epoch": 0.28588390921841134, "grad_norm": 0.15745104849338531, "learning_rate": 0.002, "loss": 2.5458, "step": 143500 }, { "epoch": 0.2859038314420502, "grad_norm": 0.14900556206703186, "learning_rate": 0.002, "loss": 2.5728, "step": 143510 }, { "epoch": 0.2859237536656892, "grad_norm": 0.1752777397632599, "learning_rate": 0.002, "loss": 2.556, "step": 143520 }, { "epoch": 0.28594367588932806, "grad_norm": 0.1754533052444458, "learning_rate": 0.002, "loss": 2.5503, "step": 143530 }, { "epoch": 0.28596359811296695, "grad_norm": 0.15591222047805786, "learning_rate": 0.002, "loss": 2.5596, "step": 143540 }, { "epoch": 0.2859835203366059, "grad_norm": 0.19853590428829193, "learning_rate": 0.002, "loss": 2.559, "step": 143550 }, { "epoch": 0.2860034425602448, "grad_norm": 0.1621423214673996, "learning_rate": 0.002, "loss": 2.5513, "step": 143560 }, { "epoch": 0.28602336478388374, "grad_norm": 0.1795533001422882, "learning_rate": 0.002, "loss": 2.5651, "step": 143570 }, { "epoch": 0.28604328700752263, "grad_norm": 0.1523423194885254, "learning_rate": 0.002, "loss": 2.5694, "step": 143580 }, { "epoch": 0.2860632092311615, "grad_norm": 0.14919784665107727, "learning_rate": 0.002, "loss": 2.5633, "step": 143590 }, { "epoch": 0.28608313145480047, "grad_norm": 0.19536006450653076, "learning_rate": 0.002, "loss": 2.5588, "step": 143600 }, { "epoch": 0.28610305367843936, "grad_norm": 0.21150726079940796, "learning_rate": 0.002, "loss": 2.5709, "step": 143610 }, { "epoch": 0.2861229759020783, "grad_norm": 0.15898005664348602, "learning_rate": 0.002, "loss": 2.5601, "step": 143620 }, { "epoch": 0.2861428981257172, "grad_norm": 0.17462602257728577, "learning_rate": 0.002, "loss": 2.551, "step": 143630 }, { "epoch": 0.28616282034935614, "grad_norm": 0.16491711139678955, "learning_rate": 0.002, "loss": 2.5727, "step": 143640 }, { "epoch": 0.28618274257299503, "grad_norm": 0.17695631086826324, "learning_rate": 0.002, "loss": 2.5627, "step": 143650 }, { "epoch": 0.2862026647966339, "grad_norm": 0.1689499169588089, "learning_rate": 0.002, "loss": 2.5577, "step": 143660 }, { "epoch": 0.28622258702027287, "grad_norm": 0.1751052588224411, "learning_rate": 0.002, "loss": 2.5592, "step": 143670 }, { "epoch": 0.28624250924391176, "grad_norm": 0.1926402598619461, "learning_rate": 0.002, "loss": 2.5628, "step": 143680 }, { "epoch": 0.2862624314675507, "grad_norm": 0.18200601637363434, "learning_rate": 0.002, "loss": 2.5743, "step": 143690 }, { "epoch": 0.2862823536911896, "grad_norm": 0.1524413526058197, "learning_rate": 0.002, "loss": 2.5685, "step": 143700 }, { "epoch": 0.2863022759148285, "grad_norm": 0.1813240647315979, "learning_rate": 0.002, "loss": 2.5676, "step": 143710 }, { "epoch": 0.28632219813846743, "grad_norm": 0.16278234124183655, "learning_rate": 0.002, "loss": 2.5607, "step": 143720 }, { "epoch": 0.2863421203621063, "grad_norm": 0.18384312093257904, "learning_rate": 0.002, "loss": 2.5687, "step": 143730 }, { "epoch": 0.28636204258574527, "grad_norm": 0.15174008905887604, "learning_rate": 0.002, "loss": 2.5677, "step": 143740 }, { "epoch": 0.28638196480938416, "grad_norm": 0.16853001713752747, "learning_rate": 0.002, "loss": 2.5613, "step": 143750 }, { "epoch": 0.28640188703302305, "grad_norm": 0.1635768860578537, "learning_rate": 0.002, "loss": 2.5552, "step": 143760 }, { "epoch": 0.286421809256662, "grad_norm": 0.1664254516363144, "learning_rate": 0.002, "loss": 2.5631, "step": 143770 }, { "epoch": 0.2864417314803009, "grad_norm": 0.17335012555122375, "learning_rate": 0.002, "loss": 2.5608, "step": 143780 }, { "epoch": 0.28646165370393983, "grad_norm": 0.1833721101284027, "learning_rate": 0.002, "loss": 2.5693, "step": 143790 }, { "epoch": 0.2864815759275787, "grad_norm": 0.17175382375717163, "learning_rate": 0.002, "loss": 2.5757, "step": 143800 }, { "epoch": 0.28650149815121767, "grad_norm": 0.1777687817811966, "learning_rate": 0.002, "loss": 2.5702, "step": 143810 }, { "epoch": 0.28652142037485656, "grad_norm": 0.18193510174751282, "learning_rate": 0.002, "loss": 2.5595, "step": 143820 }, { "epoch": 0.28654134259849545, "grad_norm": 0.1380859613418579, "learning_rate": 0.002, "loss": 2.5507, "step": 143830 }, { "epoch": 0.2865612648221344, "grad_norm": 0.168066143989563, "learning_rate": 0.002, "loss": 2.5411, "step": 143840 }, { "epoch": 0.2865811870457733, "grad_norm": 0.1562560796737671, "learning_rate": 0.002, "loss": 2.5733, "step": 143850 }, { "epoch": 0.28660110926941224, "grad_norm": 0.1693487912416458, "learning_rate": 0.002, "loss": 2.5631, "step": 143860 }, { "epoch": 0.2866210314930511, "grad_norm": 0.17586131393909454, "learning_rate": 0.002, "loss": 2.5587, "step": 143870 }, { "epoch": 0.28664095371669, "grad_norm": 0.15392671525478363, "learning_rate": 0.002, "loss": 2.5672, "step": 143880 }, { "epoch": 0.28666087594032896, "grad_norm": 0.16610153019428253, "learning_rate": 0.002, "loss": 2.5466, "step": 143890 }, { "epoch": 0.28668079816396785, "grad_norm": 0.18706288933753967, "learning_rate": 0.002, "loss": 2.5525, "step": 143900 }, { "epoch": 0.2867007203876068, "grad_norm": 0.19700729846954346, "learning_rate": 0.002, "loss": 2.5554, "step": 143910 }, { "epoch": 0.2867206426112457, "grad_norm": 0.16507747769355774, "learning_rate": 0.002, "loss": 2.5703, "step": 143920 }, { "epoch": 0.28674056483488464, "grad_norm": 0.18685434758663177, "learning_rate": 0.002, "loss": 2.5464, "step": 143930 }, { "epoch": 0.2867604870585235, "grad_norm": 0.19401557743549347, "learning_rate": 0.002, "loss": 2.5553, "step": 143940 }, { "epoch": 0.2867804092821624, "grad_norm": 0.16882015764713287, "learning_rate": 0.002, "loss": 2.5609, "step": 143950 }, { "epoch": 0.28680033150580136, "grad_norm": 0.17949077486991882, "learning_rate": 0.002, "loss": 2.5794, "step": 143960 }, { "epoch": 0.28682025372944026, "grad_norm": 0.15673403441905975, "learning_rate": 0.002, "loss": 2.5494, "step": 143970 }, { "epoch": 0.2868401759530792, "grad_norm": 0.16814522445201874, "learning_rate": 0.002, "loss": 2.5724, "step": 143980 }, { "epoch": 0.2868600981767181, "grad_norm": 0.17341865599155426, "learning_rate": 0.002, "loss": 2.5715, "step": 143990 }, { "epoch": 0.286880020400357, "grad_norm": 0.17335787415504456, "learning_rate": 0.002, "loss": 2.5665, "step": 144000 }, { "epoch": 0.28689994262399593, "grad_norm": 0.1472318023443222, "learning_rate": 0.002, "loss": 2.5585, "step": 144010 }, { "epoch": 0.2869198648476348, "grad_norm": 0.19820190966129303, "learning_rate": 0.002, "loss": 2.5651, "step": 144020 }, { "epoch": 0.28693978707127377, "grad_norm": 0.16524899005889893, "learning_rate": 0.002, "loss": 2.5766, "step": 144030 }, { "epoch": 0.28695970929491266, "grad_norm": 0.14250244200229645, "learning_rate": 0.002, "loss": 2.5589, "step": 144040 }, { "epoch": 0.28697963151855155, "grad_norm": 0.2078358680009842, "learning_rate": 0.002, "loss": 2.5625, "step": 144050 }, { "epoch": 0.2869995537421905, "grad_norm": 0.16297629475593567, "learning_rate": 0.002, "loss": 2.5746, "step": 144060 }, { "epoch": 0.2870194759658294, "grad_norm": 0.15225836634635925, "learning_rate": 0.002, "loss": 2.5572, "step": 144070 }, { "epoch": 0.28703939818946833, "grad_norm": 0.15411673486232758, "learning_rate": 0.002, "loss": 2.566, "step": 144080 }, { "epoch": 0.2870593204131072, "grad_norm": 0.15874257683753967, "learning_rate": 0.002, "loss": 2.5522, "step": 144090 }, { "epoch": 0.28707924263674617, "grad_norm": 0.19545497000217438, "learning_rate": 0.002, "loss": 2.5578, "step": 144100 }, { "epoch": 0.28709916486038506, "grad_norm": 0.2109961211681366, "learning_rate": 0.002, "loss": 2.5675, "step": 144110 }, { "epoch": 0.28711908708402395, "grad_norm": 0.20566706359386444, "learning_rate": 0.002, "loss": 2.5589, "step": 144120 }, { "epoch": 0.2871390093076629, "grad_norm": 0.1599162369966507, "learning_rate": 0.002, "loss": 2.5597, "step": 144130 }, { "epoch": 0.2871589315313018, "grad_norm": 0.16735303401947021, "learning_rate": 0.002, "loss": 2.5672, "step": 144140 }, { "epoch": 0.28717885375494073, "grad_norm": 0.19853904843330383, "learning_rate": 0.002, "loss": 2.5695, "step": 144150 }, { "epoch": 0.2871987759785796, "grad_norm": 0.15237829089164734, "learning_rate": 0.002, "loss": 2.5713, "step": 144160 }, { "epoch": 0.2872186982022185, "grad_norm": 0.16121584177017212, "learning_rate": 0.002, "loss": 2.5722, "step": 144170 }, { "epoch": 0.28723862042585746, "grad_norm": 0.1718721091747284, "learning_rate": 0.002, "loss": 2.5732, "step": 144180 }, { "epoch": 0.28725854264949635, "grad_norm": 0.17770391702651978, "learning_rate": 0.002, "loss": 2.5506, "step": 144190 }, { "epoch": 0.2872784648731353, "grad_norm": 0.1657462865114212, "learning_rate": 0.002, "loss": 2.5481, "step": 144200 }, { "epoch": 0.2872983870967742, "grad_norm": 0.19488629698753357, "learning_rate": 0.002, "loss": 2.5516, "step": 144210 }, { "epoch": 0.28731830932041313, "grad_norm": 0.1849423050880432, "learning_rate": 0.002, "loss": 2.5684, "step": 144220 }, { "epoch": 0.287338231544052, "grad_norm": 0.17289675772190094, "learning_rate": 0.002, "loss": 2.5729, "step": 144230 }, { "epoch": 0.2873581537676909, "grad_norm": 0.18633943796157837, "learning_rate": 0.002, "loss": 2.5737, "step": 144240 }, { "epoch": 0.28737807599132986, "grad_norm": 0.15094883739948273, "learning_rate": 0.002, "loss": 2.5542, "step": 144250 }, { "epoch": 0.28739799821496875, "grad_norm": 0.17323489487171173, "learning_rate": 0.002, "loss": 2.5589, "step": 144260 }, { "epoch": 0.2874179204386077, "grad_norm": 0.18096177279949188, "learning_rate": 0.002, "loss": 2.5551, "step": 144270 }, { "epoch": 0.2874378426622466, "grad_norm": 0.17882530391216278, "learning_rate": 0.002, "loss": 2.5437, "step": 144280 }, { "epoch": 0.2874577648858855, "grad_norm": 0.14485499262809753, "learning_rate": 0.002, "loss": 2.5671, "step": 144290 }, { "epoch": 0.2874776871095244, "grad_norm": 0.19686545431613922, "learning_rate": 0.002, "loss": 2.5826, "step": 144300 }, { "epoch": 0.2874976093331633, "grad_norm": 0.15963439643383026, "learning_rate": 0.002, "loss": 2.5581, "step": 144310 }, { "epoch": 0.28751753155680226, "grad_norm": 0.18713229894638062, "learning_rate": 0.002, "loss": 2.5631, "step": 144320 }, { "epoch": 0.28753745378044115, "grad_norm": 0.16515883803367615, "learning_rate": 0.002, "loss": 2.5662, "step": 144330 }, { "epoch": 0.28755737600408005, "grad_norm": 0.13910473883152008, "learning_rate": 0.002, "loss": 2.5625, "step": 144340 }, { "epoch": 0.287577298227719, "grad_norm": 0.17170080542564392, "learning_rate": 0.002, "loss": 2.5627, "step": 144350 }, { "epoch": 0.2875972204513579, "grad_norm": 0.17655286192893982, "learning_rate": 0.002, "loss": 2.564, "step": 144360 }, { "epoch": 0.28761714267499683, "grad_norm": 0.19577454030513763, "learning_rate": 0.002, "loss": 2.5623, "step": 144370 }, { "epoch": 0.2876370648986357, "grad_norm": 0.14090798795223236, "learning_rate": 0.002, "loss": 2.591, "step": 144380 }, { "epoch": 0.28765698712227467, "grad_norm": 0.2011304497718811, "learning_rate": 0.002, "loss": 2.5719, "step": 144390 }, { "epoch": 0.28767690934591356, "grad_norm": 0.15238456428050995, "learning_rate": 0.002, "loss": 2.5614, "step": 144400 }, { "epoch": 0.28769683156955245, "grad_norm": 0.19838149845600128, "learning_rate": 0.002, "loss": 2.565, "step": 144410 }, { "epoch": 0.2877167537931914, "grad_norm": 0.19740955531597137, "learning_rate": 0.002, "loss": 2.5694, "step": 144420 }, { "epoch": 0.2877366760168303, "grad_norm": 0.18815699219703674, "learning_rate": 0.002, "loss": 2.5579, "step": 144430 }, { "epoch": 0.28775659824046923, "grad_norm": 0.18418093025684357, "learning_rate": 0.002, "loss": 2.5646, "step": 144440 }, { "epoch": 0.2877765204641081, "grad_norm": 0.1497143805027008, "learning_rate": 0.002, "loss": 2.5777, "step": 144450 }, { "epoch": 0.287796442687747, "grad_norm": 0.13604983687400818, "learning_rate": 0.002, "loss": 2.5738, "step": 144460 }, { "epoch": 0.28781636491138596, "grad_norm": 0.18403826653957367, "learning_rate": 0.002, "loss": 2.5535, "step": 144470 }, { "epoch": 0.28783628713502485, "grad_norm": 0.17511127889156342, "learning_rate": 0.002, "loss": 2.5538, "step": 144480 }, { "epoch": 0.2878562093586638, "grad_norm": 0.18930767476558685, "learning_rate": 0.002, "loss": 2.5672, "step": 144490 }, { "epoch": 0.2878761315823027, "grad_norm": 0.19104450941085815, "learning_rate": 0.002, "loss": 2.5575, "step": 144500 }, { "epoch": 0.2878960538059416, "grad_norm": 0.16105690598487854, "learning_rate": 0.002, "loss": 2.5568, "step": 144510 }, { "epoch": 0.2879159760295805, "grad_norm": 0.18189087510108948, "learning_rate": 0.002, "loss": 2.5733, "step": 144520 }, { "epoch": 0.2879358982532194, "grad_norm": 0.1493779569864273, "learning_rate": 0.002, "loss": 2.5734, "step": 144530 }, { "epoch": 0.28795582047685836, "grad_norm": 0.17173130810260773, "learning_rate": 0.002, "loss": 2.5571, "step": 144540 }, { "epoch": 0.28797574270049725, "grad_norm": 0.18267613649368286, "learning_rate": 0.002, "loss": 2.5673, "step": 144550 }, { "epoch": 0.2879956649241362, "grad_norm": 0.21876241266727448, "learning_rate": 0.002, "loss": 2.576, "step": 144560 }, { "epoch": 0.2880155871477751, "grad_norm": 0.14370116591453552, "learning_rate": 0.002, "loss": 2.5716, "step": 144570 }, { "epoch": 0.288035509371414, "grad_norm": 0.13990846276283264, "learning_rate": 0.002, "loss": 2.5527, "step": 144580 }, { "epoch": 0.2880554315950529, "grad_norm": 0.15341754257678986, "learning_rate": 0.002, "loss": 2.569, "step": 144590 }, { "epoch": 0.2880753538186918, "grad_norm": 0.1673603504896164, "learning_rate": 0.002, "loss": 2.5562, "step": 144600 }, { "epoch": 0.28809527604233076, "grad_norm": 0.22710779309272766, "learning_rate": 0.002, "loss": 2.5526, "step": 144610 }, { "epoch": 0.28811519826596965, "grad_norm": 0.15590700507164001, "learning_rate": 0.002, "loss": 2.5495, "step": 144620 }, { "epoch": 0.28813512048960854, "grad_norm": 0.17204323410987854, "learning_rate": 0.002, "loss": 2.5623, "step": 144630 }, { "epoch": 0.2881550427132475, "grad_norm": 0.19309298694133759, "learning_rate": 0.002, "loss": 2.5654, "step": 144640 }, { "epoch": 0.2881749649368864, "grad_norm": 0.15421487390995026, "learning_rate": 0.002, "loss": 2.5748, "step": 144650 }, { "epoch": 0.2881948871605253, "grad_norm": 0.18327084183692932, "learning_rate": 0.002, "loss": 2.5579, "step": 144660 }, { "epoch": 0.2882148093841642, "grad_norm": 0.1942087858915329, "learning_rate": 0.002, "loss": 2.5612, "step": 144670 }, { "epoch": 0.28823473160780316, "grad_norm": 0.148911714553833, "learning_rate": 0.002, "loss": 2.5758, "step": 144680 }, { "epoch": 0.28825465383144205, "grad_norm": 0.1680528074502945, "learning_rate": 0.002, "loss": 2.5752, "step": 144690 }, { "epoch": 0.28827457605508094, "grad_norm": 0.17302951216697693, "learning_rate": 0.002, "loss": 2.5914, "step": 144700 }, { "epoch": 0.2882944982787199, "grad_norm": 0.20574013888835907, "learning_rate": 0.002, "loss": 2.5565, "step": 144710 }, { "epoch": 0.2883144205023588, "grad_norm": 0.18342070281505585, "learning_rate": 0.002, "loss": 2.5558, "step": 144720 }, { "epoch": 0.2883343427259977, "grad_norm": 0.1782567799091339, "learning_rate": 0.002, "loss": 2.5654, "step": 144730 }, { "epoch": 0.2883542649496366, "grad_norm": 0.16925238072872162, "learning_rate": 0.002, "loss": 2.5734, "step": 144740 }, { "epoch": 0.2883741871732755, "grad_norm": 0.1367015540599823, "learning_rate": 0.002, "loss": 2.5692, "step": 144750 }, { "epoch": 0.28839410939691446, "grad_norm": 0.13842305541038513, "learning_rate": 0.002, "loss": 2.5778, "step": 144760 }, { "epoch": 0.28841403162055335, "grad_norm": 0.1574474275112152, "learning_rate": 0.002, "loss": 2.5663, "step": 144770 }, { "epoch": 0.2884339538441923, "grad_norm": 0.16616719961166382, "learning_rate": 0.002, "loss": 2.5588, "step": 144780 }, { "epoch": 0.2884538760678312, "grad_norm": 0.1984870582818985, "learning_rate": 0.002, "loss": 2.5537, "step": 144790 }, { "epoch": 0.2884737982914701, "grad_norm": 0.14568212628364563, "learning_rate": 0.002, "loss": 2.5673, "step": 144800 }, { "epoch": 0.288493720515109, "grad_norm": 0.18700788915157318, "learning_rate": 0.002, "loss": 2.5672, "step": 144810 }, { "epoch": 0.2885136427387479, "grad_norm": 0.2043485790491104, "learning_rate": 0.002, "loss": 2.5728, "step": 144820 }, { "epoch": 0.28853356496238686, "grad_norm": 0.17096620798110962, "learning_rate": 0.002, "loss": 2.5712, "step": 144830 }, { "epoch": 0.28855348718602575, "grad_norm": 0.16270828247070312, "learning_rate": 0.002, "loss": 2.5642, "step": 144840 }, { "epoch": 0.2885734094096647, "grad_norm": 0.17483893036842346, "learning_rate": 0.002, "loss": 2.5436, "step": 144850 }, { "epoch": 0.2885933316333036, "grad_norm": 0.161327064037323, "learning_rate": 0.002, "loss": 2.5645, "step": 144860 }, { "epoch": 0.2886132538569425, "grad_norm": 0.19077149033546448, "learning_rate": 0.002, "loss": 2.5711, "step": 144870 }, { "epoch": 0.2886331760805814, "grad_norm": 0.15770165622234344, "learning_rate": 0.002, "loss": 2.5658, "step": 144880 }, { "epoch": 0.2886530983042203, "grad_norm": 0.1458963006734848, "learning_rate": 0.002, "loss": 2.5559, "step": 144890 }, { "epoch": 0.28867302052785926, "grad_norm": 0.15291713178157806, "learning_rate": 0.002, "loss": 2.5665, "step": 144900 }, { "epoch": 0.28869294275149815, "grad_norm": 0.17835360765457153, "learning_rate": 0.002, "loss": 2.5501, "step": 144910 }, { "epoch": 0.28871286497513704, "grad_norm": 0.1832766979932785, "learning_rate": 0.002, "loss": 2.5493, "step": 144920 }, { "epoch": 0.288732787198776, "grad_norm": 0.15763549506664276, "learning_rate": 0.002, "loss": 2.571, "step": 144930 }, { "epoch": 0.2887527094224149, "grad_norm": 0.18520669639110565, "learning_rate": 0.002, "loss": 2.5651, "step": 144940 }, { "epoch": 0.2887726316460538, "grad_norm": 0.15095053613185883, "learning_rate": 0.002, "loss": 2.5583, "step": 144950 }, { "epoch": 0.2887925538696927, "grad_norm": 0.1739719659090042, "learning_rate": 0.002, "loss": 2.5554, "step": 144960 }, { "epoch": 0.28881247609333166, "grad_norm": 0.1507539004087448, "learning_rate": 0.002, "loss": 2.5631, "step": 144970 }, { "epoch": 0.28883239831697055, "grad_norm": 0.14509601891040802, "learning_rate": 0.002, "loss": 2.5563, "step": 144980 }, { "epoch": 0.28885232054060944, "grad_norm": 0.18030421435832977, "learning_rate": 0.002, "loss": 2.5665, "step": 144990 }, { "epoch": 0.2888722427642484, "grad_norm": 0.15171413123607635, "learning_rate": 0.002, "loss": 2.5566, "step": 145000 }, { "epoch": 0.2888921649878873, "grad_norm": 0.16358961164951324, "learning_rate": 0.002, "loss": 2.5547, "step": 145010 }, { "epoch": 0.2889120872115262, "grad_norm": 0.19026611745357513, "learning_rate": 0.002, "loss": 2.5585, "step": 145020 }, { "epoch": 0.2889320094351651, "grad_norm": 0.15109284222126007, "learning_rate": 0.002, "loss": 2.5572, "step": 145030 }, { "epoch": 0.288951931658804, "grad_norm": 0.14986322820186615, "learning_rate": 0.002, "loss": 2.5726, "step": 145040 }, { "epoch": 0.28897185388244295, "grad_norm": 0.1719459444284439, "learning_rate": 0.002, "loss": 2.5614, "step": 145050 }, { "epoch": 0.28899177610608184, "grad_norm": 0.1545795202255249, "learning_rate": 0.002, "loss": 2.5506, "step": 145060 }, { "epoch": 0.2890116983297208, "grad_norm": 0.1769547462463379, "learning_rate": 0.002, "loss": 2.5626, "step": 145070 }, { "epoch": 0.2890316205533597, "grad_norm": 0.14710256457328796, "learning_rate": 0.002, "loss": 2.5482, "step": 145080 }, { "epoch": 0.28905154277699857, "grad_norm": 0.17552489042282104, "learning_rate": 0.002, "loss": 2.5654, "step": 145090 }, { "epoch": 0.2890714650006375, "grad_norm": 0.17034368216991425, "learning_rate": 0.002, "loss": 2.5804, "step": 145100 }, { "epoch": 0.2890913872242764, "grad_norm": 0.16016022861003876, "learning_rate": 0.002, "loss": 2.5697, "step": 145110 }, { "epoch": 0.28911130944791535, "grad_norm": 0.1626288741827011, "learning_rate": 0.002, "loss": 2.5589, "step": 145120 }, { "epoch": 0.28913123167155425, "grad_norm": 0.1844516545534134, "learning_rate": 0.002, "loss": 2.5596, "step": 145130 }, { "epoch": 0.2891511538951932, "grad_norm": 0.15381194651126862, "learning_rate": 0.002, "loss": 2.5611, "step": 145140 }, { "epoch": 0.2891710761188321, "grad_norm": 0.18712225556373596, "learning_rate": 0.002, "loss": 2.5487, "step": 145150 }, { "epoch": 0.289190998342471, "grad_norm": 0.20788463950157166, "learning_rate": 0.002, "loss": 2.5627, "step": 145160 }, { "epoch": 0.2892109205661099, "grad_norm": 0.1584213376045227, "learning_rate": 0.002, "loss": 2.56, "step": 145170 }, { "epoch": 0.2892308427897488, "grad_norm": 0.15638595819473267, "learning_rate": 0.002, "loss": 2.5573, "step": 145180 }, { "epoch": 0.28925076501338776, "grad_norm": 0.15559513866901398, "learning_rate": 0.002, "loss": 2.5491, "step": 145190 }, { "epoch": 0.28927068723702665, "grad_norm": 0.22527803480625153, "learning_rate": 0.002, "loss": 2.5646, "step": 145200 }, { "epoch": 0.28929060946066554, "grad_norm": 0.16381308436393738, "learning_rate": 0.002, "loss": 2.5584, "step": 145210 }, { "epoch": 0.2893105316843045, "grad_norm": 0.14420709013938904, "learning_rate": 0.002, "loss": 2.5542, "step": 145220 }, { "epoch": 0.2893304539079434, "grad_norm": 0.16040946543216705, "learning_rate": 0.002, "loss": 2.5668, "step": 145230 }, { "epoch": 0.2893503761315823, "grad_norm": 0.17467492818832397, "learning_rate": 0.002, "loss": 2.5638, "step": 145240 }, { "epoch": 0.2893702983552212, "grad_norm": 0.13705042004585266, "learning_rate": 0.002, "loss": 2.5543, "step": 145250 }, { "epoch": 0.2893902205788601, "grad_norm": 0.1785883754491806, "learning_rate": 0.002, "loss": 2.5633, "step": 145260 }, { "epoch": 0.28941014280249905, "grad_norm": 0.2022959291934967, "learning_rate": 0.002, "loss": 2.563, "step": 145270 }, { "epoch": 0.28943006502613794, "grad_norm": 0.14315518736839294, "learning_rate": 0.002, "loss": 2.561, "step": 145280 }, { "epoch": 0.2894499872497769, "grad_norm": 0.17175279557704926, "learning_rate": 0.002, "loss": 2.5659, "step": 145290 }, { "epoch": 0.2894699094734158, "grad_norm": 0.1697082370519638, "learning_rate": 0.002, "loss": 2.5672, "step": 145300 }, { "epoch": 0.2894898316970547, "grad_norm": 0.15203377604484558, "learning_rate": 0.002, "loss": 2.5765, "step": 145310 }, { "epoch": 0.2895097539206936, "grad_norm": 0.17869673669338226, "learning_rate": 0.002, "loss": 2.5615, "step": 145320 }, { "epoch": 0.2895296761443325, "grad_norm": 0.17408466339111328, "learning_rate": 0.002, "loss": 2.5658, "step": 145330 }, { "epoch": 0.28954959836797145, "grad_norm": 0.1718287467956543, "learning_rate": 0.002, "loss": 2.5596, "step": 145340 }, { "epoch": 0.28956952059161034, "grad_norm": 0.16701239347457886, "learning_rate": 0.002, "loss": 2.5572, "step": 145350 }, { "epoch": 0.2895894428152493, "grad_norm": 0.16958853602409363, "learning_rate": 0.002, "loss": 2.5599, "step": 145360 }, { "epoch": 0.2896093650388882, "grad_norm": 0.15668050944805145, "learning_rate": 0.002, "loss": 2.5774, "step": 145370 }, { "epoch": 0.28962928726252707, "grad_norm": 0.1912543773651123, "learning_rate": 0.002, "loss": 2.5605, "step": 145380 }, { "epoch": 0.289649209486166, "grad_norm": 0.15727970004081726, "learning_rate": 0.002, "loss": 2.5592, "step": 145390 }, { "epoch": 0.2896691317098049, "grad_norm": 0.5423529744148254, "learning_rate": 0.002, "loss": 2.5644, "step": 145400 }, { "epoch": 0.28968905393344385, "grad_norm": 0.170111283659935, "learning_rate": 0.002, "loss": 2.5817, "step": 145410 }, { "epoch": 0.28970897615708274, "grad_norm": 0.1409509927034378, "learning_rate": 0.002, "loss": 2.5728, "step": 145420 }, { "epoch": 0.2897288983807217, "grad_norm": 0.171727254986763, "learning_rate": 0.002, "loss": 2.5753, "step": 145430 }, { "epoch": 0.2897488206043606, "grad_norm": 0.1692168414592743, "learning_rate": 0.002, "loss": 2.5768, "step": 145440 }, { "epoch": 0.28976874282799947, "grad_norm": 0.13967280089855194, "learning_rate": 0.002, "loss": 2.5743, "step": 145450 }, { "epoch": 0.2897886650516384, "grad_norm": 0.2361176759004593, "learning_rate": 0.002, "loss": 2.5576, "step": 145460 }, { "epoch": 0.2898085872752773, "grad_norm": 0.1630849540233612, "learning_rate": 0.002, "loss": 2.5657, "step": 145470 }, { "epoch": 0.28982850949891625, "grad_norm": 0.19414283335208893, "learning_rate": 0.002, "loss": 2.5622, "step": 145480 }, { "epoch": 0.28984843172255514, "grad_norm": 0.14648877084255219, "learning_rate": 0.002, "loss": 2.5779, "step": 145490 }, { "epoch": 0.28986835394619404, "grad_norm": 0.19412755966186523, "learning_rate": 0.002, "loss": 2.568, "step": 145500 }, { "epoch": 0.289888276169833, "grad_norm": 0.1540287733078003, "learning_rate": 0.002, "loss": 2.5554, "step": 145510 }, { "epoch": 0.28990819839347187, "grad_norm": 0.1463668793439865, "learning_rate": 0.002, "loss": 2.5701, "step": 145520 }, { "epoch": 0.2899281206171108, "grad_norm": 0.21605148911476135, "learning_rate": 0.002, "loss": 2.5488, "step": 145530 }, { "epoch": 0.2899480428407497, "grad_norm": 0.18578146398067474, "learning_rate": 0.002, "loss": 2.5677, "step": 145540 }, { "epoch": 0.2899679650643886, "grad_norm": 0.2223224937915802, "learning_rate": 0.002, "loss": 2.5641, "step": 145550 }, { "epoch": 0.28998788728802755, "grad_norm": 0.15917669236660004, "learning_rate": 0.002, "loss": 2.5613, "step": 145560 }, { "epoch": 0.29000780951166644, "grad_norm": 0.1518273800611496, "learning_rate": 0.002, "loss": 2.5671, "step": 145570 }, { "epoch": 0.2900277317353054, "grad_norm": 0.17730167508125305, "learning_rate": 0.002, "loss": 2.5614, "step": 145580 }, { "epoch": 0.2900476539589443, "grad_norm": 0.21777182817459106, "learning_rate": 0.002, "loss": 2.5652, "step": 145590 }, { "epoch": 0.2900675761825832, "grad_norm": 0.1705234944820404, "learning_rate": 0.002, "loss": 2.5524, "step": 145600 }, { "epoch": 0.2900874984062221, "grad_norm": 0.1720016747713089, "learning_rate": 0.002, "loss": 2.5655, "step": 145610 }, { "epoch": 0.290107420629861, "grad_norm": 0.17047090828418732, "learning_rate": 0.002, "loss": 2.5655, "step": 145620 }, { "epoch": 0.29012734285349995, "grad_norm": 0.16607484221458435, "learning_rate": 0.002, "loss": 2.5716, "step": 145630 }, { "epoch": 0.29014726507713884, "grad_norm": 0.19103474915027618, "learning_rate": 0.002, "loss": 2.5615, "step": 145640 }, { "epoch": 0.2901671873007778, "grad_norm": 0.16195891797542572, "learning_rate": 0.002, "loss": 2.5775, "step": 145650 }, { "epoch": 0.2901871095244167, "grad_norm": 0.1464824676513672, "learning_rate": 0.002, "loss": 2.5702, "step": 145660 }, { "epoch": 0.29020703174805557, "grad_norm": 0.18820111453533173, "learning_rate": 0.002, "loss": 2.5613, "step": 145670 }, { "epoch": 0.2902269539716945, "grad_norm": 0.21510757505893707, "learning_rate": 0.002, "loss": 2.5571, "step": 145680 }, { "epoch": 0.2902468761953334, "grad_norm": 0.17309962213039398, "learning_rate": 0.002, "loss": 2.5459, "step": 145690 }, { "epoch": 0.29026679841897235, "grad_norm": 0.4288616180419922, "learning_rate": 0.002, "loss": 2.5673, "step": 145700 }, { "epoch": 0.29028672064261124, "grad_norm": 0.18765906989574432, "learning_rate": 0.002, "loss": 2.563, "step": 145710 }, { "epoch": 0.2903066428662502, "grad_norm": 0.15767763555049896, "learning_rate": 0.002, "loss": 2.5678, "step": 145720 }, { "epoch": 0.2903265650898891, "grad_norm": 0.20512062311172485, "learning_rate": 0.002, "loss": 2.5662, "step": 145730 }, { "epoch": 0.29034648731352797, "grad_norm": 0.15452028810977936, "learning_rate": 0.002, "loss": 2.565, "step": 145740 }, { "epoch": 0.2903664095371669, "grad_norm": 0.15496554970741272, "learning_rate": 0.002, "loss": 2.5705, "step": 145750 }, { "epoch": 0.2903863317608058, "grad_norm": 0.18225041031837463, "learning_rate": 0.002, "loss": 2.5528, "step": 145760 }, { "epoch": 0.29040625398444475, "grad_norm": 0.14523033797740936, "learning_rate": 0.002, "loss": 2.5484, "step": 145770 }, { "epoch": 0.29042617620808364, "grad_norm": 0.18685780465602875, "learning_rate": 0.002, "loss": 2.5699, "step": 145780 }, { "epoch": 0.29044609843172253, "grad_norm": 0.14348216354846954, "learning_rate": 0.002, "loss": 2.5796, "step": 145790 }, { "epoch": 0.2904660206553615, "grad_norm": 0.15595188736915588, "learning_rate": 0.002, "loss": 2.556, "step": 145800 }, { "epoch": 0.29048594287900037, "grad_norm": 0.13825857639312744, "learning_rate": 0.002, "loss": 2.5448, "step": 145810 }, { "epoch": 0.2905058651026393, "grad_norm": 0.1796499639749527, "learning_rate": 0.002, "loss": 2.5711, "step": 145820 }, { "epoch": 0.2905257873262782, "grad_norm": 0.15929999947547913, "learning_rate": 0.002, "loss": 2.5668, "step": 145830 }, { "epoch": 0.2905457095499171, "grad_norm": 0.16619107127189636, "learning_rate": 0.002, "loss": 2.5591, "step": 145840 }, { "epoch": 0.29056563177355604, "grad_norm": 0.20277166366577148, "learning_rate": 0.002, "loss": 2.5528, "step": 145850 }, { "epoch": 0.29058555399719493, "grad_norm": 0.14671145379543304, "learning_rate": 0.002, "loss": 2.5723, "step": 145860 }, { "epoch": 0.2906054762208339, "grad_norm": 0.18406371772289276, "learning_rate": 0.002, "loss": 2.5661, "step": 145870 }, { "epoch": 0.29062539844447277, "grad_norm": 0.14700673520565033, "learning_rate": 0.002, "loss": 2.5723, "step": 145880 }, { "epoch": 0.2906453206681117, "grad_norm": 0.16114836931228638, "learning_rate": 0.002, "loss": 2.5853, "step": 145890 }, { "epoch": 0.2906652428917506, "grad_norm": 0.20353087782859802, "learning_rate": 0.002, "loss": 2.5497, "step": 145900 }, { "epoch": 0.2906851651153895, "grad_norm": 0.16217397153377533, "learning_rate": 0.002, "loss": 2.5422, "step": 145910 }, { "epoch": 0.29070508733902845, "grad_norm": 0.17215721309185028, "learning_rate": 0.002, "loss": 2.57, "step": 145920 }, { "epoch": 0.29072500956266734, "grad_norm": 0.25819289684295654, "learning_rate": 0.002, "loss": 2.5647, "step": 145930 }, { "epoch": 0.2907449317863063, "grad_norm": 0.17939700186252594, "learning_rate": 0.002, "loss": 2.5657, "step": 145940 }, { "epoch": 0.2907648540099452, "grad_norm": 0.1948026716709137, "learning_rate": 0.002, "loss": 2.5515, "step": 145950 }, { "epoch": 0.29078477623358406, "grad_norm": 0.1570216864347458, "learning_rate": 0.002, "loss": 2.5494, "step": 145960 }, { "epoch": 0.290804698457223, "grad_norm": 0.1991645097732544, "learning_rate": 0.002, "loss": 2.5692, "step": 145970 }, { "epoch": 0.2908246206808619, "grad_norm": 0.1670815646648407, "learning_rate": 0.002, "loss": 2.5678, "step": 145980 }, { "epoch": 0.29084454290450085, "grad_norm": 0.1706862449645996, "learning_rate": 0.002, "loss": 2.5664, "step": 145990 }, { "epoch": 0.29086446512813974, "grad_norm": 0.17850042879581451, "learning_rate": 0.002, "loss": 2.5617, "step": 146000 }, { "epoch": 0.2908843873517787, "grad_norm": 0.1471022069454193, "learning_rate": 0.002, "loss": 2.5601, "step": 146010 }, { "epoch": 0.2909043095754176, "grad_norm": 0.1617433875799179, "learning_rate": 0.002, "loss": 2.5585, "step": 146020 }, { "epoch": 0.29092423179905647, "grad_norm": 0.16136857867240906, "learning_rate": 0.002, "loss": 2.5565, "step": 146030 }, { "epoch": 0.2909441540226954, "grad_norm": 0.1449802815914154, "learning_rate": 0.002, "loss": 2.55, "step": 146040 }, { "epoch": 0.2909640762463343, "grad_norm": 0.18795016407966614, "learning_rate": 0.002, "loss": 2.5595, "step": 146050 }, { "epoch": 0.29098399846997325, "grad_norm": 0.1666661500930786, "learning_rate": 0.002, "loss": 2.5657, "step": 146060 }, { "epoch": 0.29100392069361214, "grad_norm": 0.16433697938919067, "learning_rate": 0.002, "loss": 2.5566, "step": 146070 }, { "epoch": 0.29102384291725103, "grad_norm": 0.16998571157455444, "learning_rate": 0.002, "loss": 2.5698, "step": 146080 }, { "epoch": 0.29104376514089, "grad_norm": 0.15117834508419037, "learning_rate": 0.002, "loss": 2.5663, "step": 146090 }, { "epoch": 0.29106368736452887, "grad_norm": 0.16403727233409882, "learning_rate": 0.002, "loss": 2.5651, "step": 146100 }, { "epoch": 0.2910836095881678, "grad_norm": 0.17163307964801788, "learning_rate": 0.002, "loss": 2.5558, "step": 146110 }, { "epoch": 0.2911035318118067, "grad_norm": 0.17726069688796997, "learning_rate": 0.002, "loss": 2.56, "step": 146120 }, { "epoch": 0.2911234540354456, "grad_norm": 0.20157116651535034, "learning_rate": 0.002, "loss": 2.5532, "step": 146130 }, { "epoch": 0.29114337625908454, "grad_norm": 0.1299348771572113, "learning_rate": 0.002, "loss": 2.5634, "step": 146140 }, { "epoch": 0.29116329848272343, "grad_norm": 0.15780653059482574, "learning_rate": 0.002, "loss": 2.564, "step": 146150 }, { "epoch": 0.2911832207063624, "grad_norm": 0.15739454329013824, "learning_rate": 0.002, "loss": 2.5505, "step": 146160 }, { "epoch": 0.29120314293000127, "grad_norm": 0.16797880828380585, "learning_rate": 0.002, "loss": 2.5731, "step": 146170 }, { "epoch": 0.2912230651536402, "grad_norm": 0.16347618401050568, "learning_rate": 0.002, "loss": 2.5623, "step": 146180 }, { "epoch": 0.2912429873772791, "grad_norm": 0.16421499848365784, "learning_rate": 0.002, "loss": 2.571, "step": 146190 }, { "epoch": 0.291262909600918, "grad_norm": 0.19759470224380493, "learning_rate": 0.002, "loss": 2.5626, "step": 146200 }, { "epoch": 0.29128283182455694, "grad_norm": 0.18683184683322906, "learning_rate": 0.002, "loss": 2.5826, "step": 146210 }, { "epoch": 0.29130275404819583, "grad_norm": 0.15755656361579895, "learning_rate": 0.002, "loss": 2.5685, "step": 146220 }, { "epoch": 0.2913226762718348, "grad_norm": 0.17530763149261475, "learning_rate": 0.002, "loss": 2.5733, "step": 146230 }, { "epoch": 0.29134259849547367, "grad_norm": 0.15862570703029633, "learning_rate": 0.002, "loss": 2.5552, "step": 146240 }, { "epoch": 0.29136252071911256, "grad_norm": 0.16544978320598602, "learning_rate": 0.002, "loss": 2.5667, "step": 146250 }, { "epoch": 0.2913824429427515, "grad_norm": 0.20882616937160492, "learning_rate": 0.002, "loss": 2.5588, "step": 146260 }, { "epoch": 0.2914023651663904, "grad_norm": 0.16058027744293213, "learning_rate": 0.002, "loss": 2.5595, "step": 146270 }, { "epoch": 0.29142228739002934, "grad_norm": 0.1718166470527649, "learning_rate": 0.002, "loss": 2.5536, "step": 146280 }, { "epoch": 0.29144220961366823, "grad_norm": 0.18908031284809113, "learning_rate": 0.002, "loss": 2.5601, "step": 146290 }, { "epoch": 0.2914621318373071, "grad_norm": 0.4026181101799011, "learning_rate": 0.002, "loss": 2.5685, "step": 146300 }, { "epoch": 0.29148205406094607, "grad_norm": 0.14928461611270905, "learning_rate": 0.002, "loss": 2.5532, "step": 146310 }, { "epoch": 0.29150197628458496, "grad_norm": 0.1775681972503662, "learning_rate": 0.002, "loss": 2.5684, "step": 146320 }, { "epoch": 0.2915218985082239, "grad_norm": 0.1752031296491623, "learning_rate": 0.002, "loss": 2.5651, "step": 146330 }, { "epoch": 0.2915418207318628, "grad_norm": 0.16505984961986542, "learning_rate": 0.002, "loss": 2.5667, "step": 146340 }, { "epoch": 0.29156174295550175, "grad_norm": 0.17819897830486298, "learning_rate": 0.002, "loss": 2.5903, "step": 146350 }, { "epoch": 0.29158166517914064, "grad_norm": 0.17820024490356445, "learning_rate": 0.002, "loss": 2.5679, "step": 146360 }, { "epoch": 0.2916015874027795, "grad_norm": 0.18437843024730682, "learning_rate": 0.002, "loss": 2.5593, "step": 146370 }, { "epoch": 0.2916215096264185, "grad_norm": 0.1608983725309372, "learning_rate": 0.002, "loss": 2.5766, "step": 146380 }, { "epoch": 0.29164143185005736, "grad_norm": 0.1555274873971939, "learning_rate": 0.002, "loss": 2.5583, "step": 146390 }, { "epoch": 0.2916613540736963, "grad_norm": 0.17655958235263824, "learning_rate": 0.002, "loss": 2.5577, "step": 146400 }, { "epoch": 0.2916812762973352, "grad_norm": 0.1661803424358368, "learning_rate": 0.002, "loss": 2.5566, "step": 146410 }, { "epoch": 0.2917011985209741, "grad_norm": 0.14781086146831512, "learning_rate": 0.002, "loss": 2.573, "step": 146420 }, { "epoch": 0.29172112074461304, "grad_norm": 0.1760723888874054, "learning_rate": 0.002, "loss": 2.5634, "step": 146430 }, { "epoch": 0.29174104296825193, "grad_norm": 0.19195842742919922, "learning_rate": 0.002, "loss": 2.5672, "step": 146440 }, { "epoch": 0.2917609651918909, "grad_norm": 0.1648845225572586, "learning_rate": 0.002, "loss": 2.5598, "step": 146450 }, { "epoch": 0.29178088741552977, "grad_norm": 0.17386111617088318, "learning_rate": 0.002, "loss": 2.567, "step": 146460 }, { "epoch": 0.2918008096391687, "grad_norm": 0.16220790147781372, "learning_rate": 0.002, "loss": 2.5743, "step": 146470 }, { "epoch": 0.2918207318628076, "grad_norm": 0.18258027732372284, "learning_rate": 0.002, "loss": 2.5638, "step": 146480 }, { "epoch": 0.2918406540864465, "grad_norm": 0.175845667719841, "learning_rate": 0.002, "loss": 2.5744, "step": 146490 }, { "epoch": 0.29186057631008544, "grad_norm": 0.155806764960289, "learning_rate": 0.002, "loss": 2.5577, "step": 146500 }, { "epoch": 0.29188049853372433, "grad_norm": 0.1642867773771286, "learning_rate": 0.002, "loss": 2.5549, "step": 146510 }, { "epoch": 0.2919004207573633, "grad_norm": 0.20611511170864105, "learning_rate": 0.002, "loss": 2.5509, "step": 146520 }, { "epoch": 0.29192034298100217, "grad_norm": 0.17015527188777924, "learning_rate": 0.002, "loss": 2.5471, "step": 146530 }, { "epoch": 0.29194026520464106, "grad_norm": 0.18651455640792847, "learning_rate": 0.002, "loss": 2.5611, "step": 146540 }, { "epoch": 0.29196018742828, "grad_norm": 0.1787329465150833, "learning_rate": 0.002, "loss": 2.5638, "step": 146550 }, { "epoch": 0.2919801096519189, "grad_norm": 0.1730961799621582, "learning_rate": 0.002, "loss": 2.5614, "step": 146560 }, { "epoch": 0.29200003187555784, "grad_norm": 0.1622660905122757, "learning_rate": 0.002, "loss": 2.5711, "step": 146570 }, { "epoch": 0.29201995409919673, "grad_norm": 0.16369886696338654, "learning_rate": 0.002, "loss": 2.5542, "step": 146580 }, { "epoch": 0.2920398763228356, "grad_norm": 0.15052689611911774, "learning_rate": 0.002, "loss": 2.5678, "step": 146590 }, { "epoch": 0.29205979854647457, "grad_norm": 0.18213988840579987, "learning_rate": 0.002, "loss": 2.5479, "step": 146600 }, { "epoch": 0.29207972077011346, "grad_norm": 0.19324743747711182, "learning_rate": 0.002, "loss": 2.5581, "step": 146610 }, { "epoch": 0.2920996429937524, "grad_norm": 0.1603924185037613, "learning_rate": 0.002, "loss": 2.5684, "step": 146620 }, { "epoch": 0.2921195652173913, "grad_norm": 0.1911231428384781, "learning_rate": 0.002, "loss": 2.5565, "step": 146630 }, { "epoch": 0.29213948744103024, "grad_norm": 0.17388299107551575, "learning_rate": 0.002, "loss": 2.5735, "step": 146640 }, { "epoch": 0.29215940966466913, "grad_norm": 0.16511715948581696, "learning_rate": 0.002, "loss": 2.5718, "step": 146650 }, { "epoch": 0.292179331888308, "grad_norm": 0.16199596226215363, "learning_rate": 0.002, "loss": 2.5712, "step": 146660 }, { "epoch": 0.29219925411194697, "grad_norm": 0.17217648029327393, "learning_rate": 0.002, "loss": 2.5537, "step": 146670 }, { "epoch": 0.29221917633558586, "grad_norm": 0.16639167070388794, "learning_rate": 0.002, "loss": 2.5729, "step": 146680 }, { "epoch": 0.2922390985592248, "grad_norm": 0.16515670716762543, "learning_rate": 0.002, "loss": 2.5639, "step": 146690 }, { "epoch": 0.2922590207828637, "grad_norm": 0.1720670759677887, "learning_rate": 0.002, "loss": 2.5667, "step": 146700 }, { "epoch": 0.2922789430065026, "grad_norm": 0.18891911208629608, "learning_rate": 0.002, "loss": 2.5542, "step": 146710 }, { "epoch": 0.29229886523014154, "grad_norm": 0.14853183925151825, "learning_rate": 0.002, "loss": 2.5527, "step": 146720 }, { "epoch": 0.2923187874537804, "grad_norm": 0.15664604306221008, "learning_rate": 0.002, "loss": 2.5398, "step": 146730 }, { "epoch": 0.2923387096774194, "grad_norm": 0.18593725562095642, "learning_rate": 0.002, "loss": 2.5622, "step": 146740 }, { "epoch": 0.29235863190105826, "grad_norm": 0.14299516379833221, "learning_rate": 0.002, "loss": 2.5613, "step": 146750 }, { "epoch": 0.2923785541246972, "grad_norm": 0.17166781425476074, "learning_rate": 0.002, "loss": 2.5635, "step": 146760 }, { "epoch": 0.2923984763483361, "grad_norm": 0.15210004150867462, "learning_rate": 0.002, "loss": 2.5632, "step": 146770 }, { "epoch": 0.292418398571975, "grad_norm": 0.1442963033914566, "learning_rate": 0.002, "loss": 2.5649, "step": 146780 }, { "epoch": 0.29243832079561394, "grad_norm": 0.15102989971637726, "learning_rate": 0.002, "loss": 2.5589, "step": 146790 }, { "epoch": 0.29245824301925283, "grad_norm": 0.15015944838523865, "learning_rate": 0.002, "loss": 2.5775, "step": 146800 }, { "epoch": 0.2924781652428918, "grad_norm": 0.17291444540023804, "learning_rate": 0.002, "loss": 2.567, "step": 146810 }, { "epoch": 0.29249808746653067, "grad_norm": 0.1848314255475998, "learning_rate": 0.002, "loss": 2.5516, "step": 146820 }, { "epoch": 0.29251800969016956, "grad_norm": 0.1700584590435028, "learning_rate": 0.002, "loss": 2.561, "step": 146830 }, { "epoch": 0.2925379319138085, "grad_norm": 0.1641874462366104, "learning_rate": 0.002, "loss": 2.5563, "step": 146840 }, { "epoch": 0.2925578541374474, "grad_norm": 0.1784629374742508, "learning_rate": 0.002, "loss": 2.5628, "step": 146850 }, { "epoch": 0.29257777636108634, "grad_norm": 0.18319199979305267, "learning_rate": 0.002, "loss": 2.5681, "step": 146860 }, { "epoch": 0.29259769858472523, "grad_norm": 0.1709112823009491, "learning_rate": 0.002, "loss": 2.5803, "step": 146870 }, { "epoch": 0.2926176208083641, "grad_norm": 0.16348783671855927, "learning_rate": 0.002, "loss": 2.5667, "step": 146880 }, { "epoch": 0.29263754303200307, "grad_norm": 0.17469096183776855, "learning_rate": 0.002, "loss": 2.5549, "step": 146890 }, { "epoch": 0.29265746525564196, "grad_norm": 0.1624179631471634, "learning_rate": 0.002, "loss": 2.5558, "step": 146900 }, { "epoch": 0.2926773874792809, "grad_norm": 0.1470821350812912, "learning_rate": 0.002, "loss": 2.5721, "step": 146910 }, { "epoch": 0.2926973097029198, "grad_norm": 0.15255515277385712, "learning_rate": 0.002, "loss": 2.5721, "step": 146920 }, { "epoch": 0.29271723192655874, "grad_norm": 0.18291406333446503, "learning_rate": 0.002, "loss": 2.563, "step": 146930 }, { "epoch": 0.29273715415019763, "grad_norm": 0.16390934586524963, "learning_rate": 0.002, "loss": 2.5704, "step": 146940 }, { "epoch": 0.2927570763738365, "grad_norm": 0.16444824635982513, "learning_rate": 0.002, "loss": 2.5434, "step": 146950 }, { "epoch": 0.29277699859747547, "grad_norm": 0.17591232061386108, "learning_rate": 0.002, "loss": 2.5696, "step": 146960 }, { "epoch": 0.29279692082111436, "grad_norm": 0.1583576500415802, "learning_rate": 0.002, "loss": 2.558, "step": 146970 }, { "epoch": 0.2928168430447533, "grad_norm": 0.19984303414821625, "learning_rate": 0.002, "loss": 2.5566, "step": 146980 }, { "epoch": 0.2928367652683922, "grad_norm": 0.16715383529663086, "learning_rate": 0.002, "loss": 2.5582, "step": 146990 }, { "epoch": 0.2928566874920311, "grad_norm": 0.1746893674135208, "learning_rate": 0.002, "loss": 2.5557, "step": 147000 }, { "epoch": 0.29287660971567003, "grad_norm": 0.15417860448360443, "learning_rate": 0.002, "loss": 2.5672, "step": 147010 }, { "epoch": 0.2928965319393089, "grad_norm": 0.15736256539821625, "learning_rate": 0.002, "loss": 2.551, "step": 147020 }, { "epoch": 0.29291645416294787, "grad_norm": 0.571026623249054, "learning_rate": 0.002, "loss": 2.5564, "step": 147030 }, { "epoch": 0.29293637638658676, "grad_norm": 0.162140890955925, "learning_rate": 0.002, "loss": 2.5708, "step": 147040 }, { "epoch": 0.29295629861022565, "grad_norm": 0.15201431512832642, "learning_rate": 0.002, "loss": 2.5635, "step": 147050 }, { "epoch": 0.2929762208338646, "grad_norm": 0.15031246840953827, "learning_rate": 0.002, "loss": 2.5687, "step": 147060 }, { "epoch": 0.2929961430575035, "grad_norm": 0.1660400778055191, "learning_rate": 0.002, "loss": 2.5663, "step": 147070 }, { "epoch": 0.29301606528114243, "grad_norm": 0.23751609027385712, "learning_rate": 0.002, "loss": 2.5689, "step": 147080 }, { "epoch": 0.2930359875047813, "grad_norm": 0.1577318161725998, "learning_rate": 0.002, "loss": 2.5433, "step": 147090 }, { "epoch": 0.29305590972842027, "grad_norm": 0.17063212394714355, "learning_rate": 0.002, "loss": 2.5646, "step": 147100 }, { "epoch": 0.29307583195205916, "grad_norm": 0.16629844903945923, "learning_rate": 0.002, "loss": 2.5575, "step": 147110 }, { "epoch": 0.29309575417569805, "grad_norm": 0.14976955950260162, "learning_rate": 0.002, "loss": 2.5621, "step": 147120 }, { "epoch": 0.293115676399337, "grad_norm": 0.2002071738243103, "learning_rate": 0.002, "loss": 2.5689, "step": 147130 }, { "epoch": 0.2931355986229759, "grad_norm": 0.1408665031194687, "learning_rate": 0.002, "loss": 2.554, "step": 147140 }, { "epoch": 0.29315552084661484, "grad_norm": 0.14470753073692322, "learning_rate": 0.002, "loss": 2.5356, "step": 147150 }, { "epoch": 0.2931754430702537, "grad_norm": 0.18240168690681458, "learning_rate": 0.002, "loss": 2.5548, "step": 147160 }, { "epoch": 0.2931953652938926, "grad_norm": 0.14686834812164307, "learning_rate": 0.002, "loss": 2.5758, "step": 147170 }, { "epoch": 0.29321528751753156, "grad_norm": 0.15476612746715546, "learning_rate": 0.002, "loss": 2.5674, "step": 147180 }, { "epoch": 0.29323520974117046, "grad_norm": 0.14612740278244019, "learning_rate": 0.002, "loss": 2.5691, "step": 147190 }, { "epoch": 0.2932551319648094, "grad_norm": 0.13590264320373535, "learning_rate": 0.002, "loss": 2.5567, "step": 147200 }, { "epoch": 0.2932750541884483, "grad_norm": 0.1862790435552597, "learning_rate": 0.002, "loss": 2.5669, "step": 147210 }, { "epoch": 0.29329497641208724, "grad_norm": 0.1475953310728073, "learning_rate": 0.002, "loss": 2.5609, "step": 147220 }, { "epoch": 0.29331489863572613, "grad_norm": 0.1510322242975235, "learning_rate": 0.002, "loss": 2.5613, "step": 147230 }, { "epoch": 0.293334820859365, "grad_norm": 0.17199428379535675, "learning_rate": 0.002, "loss": 2.5586, "step": 147240 }, { "epoch": 0.29335474308300397, "grad_norm": 0.17335012555122375, "learning_rate": 0.002, "loss": 2.563, "step": 147250 }, { "epoch": 0.29337466530664286, "grad_norm": 0.16781781613826752, "learning_rate": 0.002, "loss": 2.567, "step": 147260 }, { "epoch": 0.2933945875302818, "grad_norm": 0.21720586717128754, "learning_rate": 0.002, "loss": 2.5734, "step": 147270 }, { "epoch": 0.2934145097539207, "grad_norm": 0.18056811392307281, "learning_rate": 0.002, "loss": 2.5499, "step": 147280 }, { "epoch": 0.2934344319775596, "grad_norm": 0.17846696078777313, "learning_rate": 0.002, "loss": 2.5448, "step": 147290 }, { "epoch": 0.29345435420119853, "grad_norm": 0.20428235828876495, "learning_rate": 0.002, "loss": 2.5549, "step": 147300 }, { "epoch": 0.2934742764248374, "grad_norm": 0.1563778668642044, "learning_rate": 0.002, "loss": 2.5625, "step": 147310 }, { "epoch": 0.29349419864847637, "grad_norm": 0.1558084934949875, "learning_rate": 0.002, "loss": 2.5464, "step": 147320 }, { "epoch": 0.29351412087211526, "grad_norm": 0.17203019559383392, "learning_rate": 0.002, "loss": 2.5589, "step": 147330 }, { "epoch": 0.29353404309575415, "grad_norm": 0.1536666750907898, "learning_rate": 0.002, "loss": 2.573, "step": 147340 }, { "epoch": 0.2935539653193931, "grad_norm": 0.18178243935108185, "learning_rate": 0.002, "loss": 2.5512, "step": 147350 }, { "epoch": 0.293573887543032, "grad_norm": 0.14598487317562103, "learning_rate": 0.002, "loss": 2.552, "step": 147360 }, { "epoch": 0.29359380976667093, "grad_norm": 0.18589626252651215, "learning_rate": 0.002, "loss": 2.5588, "step": 147370 }, { "epoch": 0.2936137319903098, "grad_norm": 0.1773405522108078, "learning_rate": 0.002, "loss": 2.5582, "step": 147380 }, { "epoch": 0.29363365421394877, "grad_norm": 0.14907459914684296, "learning_rate": 0.002, "loss": 2.5738, "step": 147390 }, { "epoch": 0.29365357643758766, "grad_norm": 0.16665640473365784, "learning_rate": 0.002, "loss": 2.5494, "step": 147400 }, { "epoch": 0.29367349866122655, "grad_norm": 0.16266338527202606, "learning_rate": 0.002, "loss": 2.5635, "step": 147410 }, { "epoch": 0.2936934208848655, "grad_norm": 0.20718733966350555, "learning_rate": 0.002, "loss": 2.5605, "step": 147420 }, { "epoch": 0.2937133431085044, "grad_norm": 0.16341350972652435, "learning_rate": 0.002, "loss": 2.5599, "step": 147430 }, { "epoch": 0.29373326533214333, "grad_norm": 0.15569405257701874, "learning_rate": 0.002, "loss": 2.5627, "step": 147440 }, { "epoch": 0.2937531875557822, "grad_norm": 0.16574758291244507, "learning_rate": 0.002, "loss": 2.5704, "step": 147450 }, { "epoch": 0.2937731097794211, "grad_norm": 0.15594296157360077, "learning_rate": 0.002, "loss": 2.568, "step": 147460 }, { "epoch": 0.29379303200306006, "grad_norm": 0.15797969698905945, "learning_rate": 0.002, "loss": 2.5757, "step": 147470 }, { "epoch": 0.29381295422669895, "grad_norm": 0.16821424663066864, "learning_rate": 0.002, "loss": 2.564, "step": 147480 }, { "epoch": 0.2938328764503379, "grad_norm": 0.14106598496437073, "learning_rate": 0.002, "loss": 2.5544, "step": 147490 }, { "epoch": 0.2938527986739768, "grad_norm": 0.15666504204273224, "learning_rate": 0.002, "loss": 2.5623, "step": 147500 }, { "epoch": 0.29387272089761574, "grad_norm": 0.16487105190753937, "learning_rate": 0.002, "loss": 2.5689, "step": 147510 }, { "epoch": 0.2938926431212546, "grad_norm": 0.19883865118026733, "learning_rate": 0.002, "loss": 2.5769, "step": 147520 }, { "epoch": 0.2939125653448935, "grad_norm": 0.1783977746963501, "learning_rate": 0.002, "loss": 2.5654, "step": 147530 }, { "epoch": 0.29393248756853246, "grad_norm": 0.16561757028102875, "learning_rate": 0.002, "loss": 2.5702, "step": 147540 }, { "epoch": 0.29395240979217135, "grad_norm": 0.1510280966758728, "learning_rate": 0.002, "loss": 2.5802, "step": 147550 }, { "epoch": 0.2939723320158103, "grad_norm": 0.16395623981952667, "learning_rate": 0.002, "loss": 2.5568, "step": 147560 }, { "epoch": 0.2939922542394492, "grad_norm": 0.18812046945095062, "learning_rate": 0.002, "loss": 2.5701, "step": 147570 }, { "epoch": 0.2940121764630881, "grad_norm": 0.1497485190629959, "learning_rate": 0.002, "loss": 2.5693, "step": 147580 }, { "epoch": 0.29403209868672703, "grad_norm": 0.17985470592975616, "learning_rate": 0.002, "loss": 2.5629, "step": 147590 }, { "epoch": 0.2940520209103659, "grad_norm": 0.1943022608757019, "learning_rate": 0.002, "loss": 2.5499, "step": 147600 }, { "epoch": 0.29407194313400487, "grad_norm": 0.15013858675956726, "learning_rate": 0.002, "loss": 2.5667, "step": 147610 }, { "epoch": 0.29409186535764376, "grad_norm": 0.1753414124250412, "learning_rate": 0.002, "loss": 2.5631, "step": 147620 }, { "epoch": 0.29411178758128265, "grad_norm": 0.146358922123909, "learning_rate": 0.002, "loss": 2.5693, "step": 147630 }, { "epoch": 0.2941317098049216, "grad_norm": 0.16083107888698578, "learning_rate": 0.002, "loss": 2.563, "step": 147640 }, { "epoch": 0.2941516320285605, "grad_norm": 0.15548785030841827, "learning_rate": 0.002, "loss": 2.57, "step": 147650 }, { "epoch": 0.29417155425219943, "grad_norm": 0.16675829887390137, "learning_rate": 0.002, "loss": 2.5603, "step": 147660 }, { "epoch": 0.2941914764758383, "grad_norm": 0.15366876125335693, "learning_rate": 0.002, "loss": 2.5693, "step": 147670 }, { "epoch": 0.29421139869947727, "grad_norm": 0.14622484147548676, "learning_rate": 0.002, "loss": 2.5556, "step": 147680 }, { "epoch": 0.29423132092311616, "grad_norm": 0.16360892355442047, "learning_rate": 0.002, "loss": 2.5588, "step": 147690 }, { "epoch": 0.29425124314675505, "grad_norm": 0.19169555604457855, "learning_rate": 0.002, "loss": 2.5525, "step": 147700 }, { "epoch": 0.294271165370394, "grad_norm": 0.17428331077098846, "learning_rate": 0.002, "loss": 2.5681, "step": 147710 }, { "epoch": 0.2942910875940329, "grad_norm": 0.1801401674747467, "learning_rate": 0.002, "loss": 2.5782, "step": 147720 }, { "epoch": 0.29431100981767183, "grad_norm": 0.18776363134384155, "learning_rate": 0.002, "loss": 2.5711, "step": 147730 }, { "epoch": 0.2943309320413107, "grad_norm": 0.18622618913650513, "learning_rate": 0.002, "loss": 2.5647, "step": 147740 }, { "epoch": 0.2943508542649496, "grad_norm": 0.17411139607429504, "learning_rate": 0.002, "loss": 2.5741, "step": 147750 }, { "epoch": 0.29437077648858856, "grad_norm": 0.17551679909229279, "learning_rate": 0.002, "loss": 2.5643, "step": 147760 }, { "epoch": 0.29439069871222745, "grad_norm": 0.1577775627374649, "learning_rate": 0.002, "loss": 2.5581, "step": 147770 }, { "epoch": 0.2944106209358664, "grad_norm": 0.1769738495349884, "learning_rate": 0.002, "loss": 2.5694, "step": 147780 }, { "epoch": 0.2944305431595053, "grad_norm": 0.15914134681224823, "learning_rate": 0.002, "loss": 2.5623, "step": 147790 }, { "epoch": 0.2944504653831442, "grad_norm": 0.19187644124031067, "learning_rate": 0.002, "loss": 2.5601, "step": 147800 }, { "epoch": 0.2944703876067831, "grad_norm": 0.142894446849823, "learning_rate": 0.002, "loss": 2.5364, "step": 147810 }, { "epoch": 0.294490309830422, "grad_norm": 0.1668267846107483, "learning_rate": 0.002, "loss": 2.5731, "step": 147820 }, { "epoch": 0.29451023205406096, "grad_norm": 0.17064443230628967, "learning_rate": 0.002, "loss": 2.571, "step": 147830 }, { "epoch": 0.29453015427769985, "grad_norm": 0.15193255245685577, "learning_rate": 0.002, "loss": 2.5578, "step": 147840 }, { "epoch": 0.2945500765013388, "grad_norm": 0.1649392694234848, "learning_rate": 0.002, "loss": 2.5573, "step": 147850 }, { "epoch": 0.2945699987249777, "grad_norm": 0.19023913145065308, "learning_rate": 0.002, "loss": 2.5673, "step": 147860 }, { "epoch": 0.2945899209486166, "grad_norm": 0.16781538724899292, "learning_rate": 0.002, "loss": 2.564, "step": 147870 }, { "epoch": 0.2946098431722555, "grad_norm": 0.16935621201992035, "learning_rate": 0.002, "loss": 2.5582, "step": 147880 }, { "epoch": 0.2946297653958944, "grad_norm": 0.14416839182376862, "learning_rate": 0.002, "loss": 2.5773, "step": 147890 }, { "epoch": 0.29464968761953336, "grad_norm": 0.18554052710533142, "learning_rate": 0.002, "loss": 2.5685, "step": 147900 }, { "epoch": 0.29466960984317225, "grad_norm": 0.1585799902677536, "learning_rate": 0.002, "loss": 2.5754, "step": 147910 }, { "epoch": 0.29468953206681114, "grad_norm": 0.17028534412384033, "learning_rate": 0.002, "loss": 2.5676, "step": 147920 }, { "epoch": 0.2947094542904501, "grad_norm": 0.13731183111667633, "learning_rate": 0.002, "loss": 2.5697, "step": 147930 }, { "epoch": 0.294729376514089, "grad_norm": 0.19189108908176422, "learning_rate": 0.002, "loss": 2.5482, "step": 147940 }, { "epoch": 0.2947492987377279, "grad_norm": 0.17267848551273346, "learning_rate": 0.002, "loss": 2.57, "step": 147950 }, { "epoch": 0.2947692209613668, "grad_norm": 0.15330623090267181, "learning_rate": 0.002, "loss": 2.5748, "step": 147960 }, { "epoch": 0.29478914318500576, "grad_norm": 0.17015495896339417, "learning_rate": 0.002, "loss": 2.5649, "step": 147970 }, { "epoch": 0.29480906540864465, "grad_norm": 0.15370576083660126, "learning_rate": 0.002, "loss": 2.5616, "step": 147980 }, { "epoch": 0.29482898763228355, "grad_norm": 0.16169773042201996, "learning_rate": 0.002, "loss": 2.5577, "step": 147990 }, { "epoch": 0.2948489098559225, "grad_norm": 0.19327978789806366, "learning_rate": 0.002, "loss": 2.559, "step": 148000 }, { "epoch": 0.2948688320795614, "grad_norm": 0.1665196418762207, "learning_rate": 0.002, "loss": 2.5697, "step": 148010 }, { "epoch": 0.29488875430320033, "grad_norm": 0.1655711531639099, "learning_rate": 0.002, "loss": 2.5723, "step": 148020 }, { "epoch": 0.2949086765268392, "grad_norm": 0.20674796402454376, "learning_rate": 0.002, "loss": 2.5637, "step": 148030 }, { "epoch": 0.2949285987504781, "grad_norm": 0.1465035229921341, "learning_rate": 0.002, "loss": 2.5651, "step": 148040 }, { "epoch": 0.29494852097411706, "grad_norm": 0.17136457562446594, "learning_rate": 0.002, "loss": 2.5645, "step": 148050 }, { "epoch": 0.29496844319775595, "grad_norm": 0.17544950544834137, "learning_rate": 0.002, "loss": 2.5603, "step": 148060 }, { "epoch": 0.2949883654213949, "grad_norm": 0.14989729225635529, "learning_rate": 0.002, "loss": 2.5618, "step": 148070 }, { "epoch": 0.2950082876450338, "grad_norm": 0.17682376503944397, "learning_rate": 0.002, "loss": 2.565, "step": 148080 }, { "epoch": 0.2950282098686727, "grad_norm": 0.16440561413764954, "learning_rate": 0.002, "loss": 2.5601, "step": 148090 }, { "epoch": 0.2950481320923116, "grad_norm": 0.2039395421743393, "learning_rate": 0.002, "loss": 2.5656, "step": 148100 }, { "epoch": 0.2950680543159505, "grad_norm": 0.1573086529970169, "learning_rate": 0.002, "loss": 2.5769, "step": 148110 }, { "epoch": 0.29508797653958946, "grad_norm": 0.17214658856391907, "learning_rate": 0.002, "loss": 2.5731, "step": 148120 }, { "epoch": 0.29510789876322835, "grad_norm": 0.15629245340824127, "learning_rate": 0.002, "loss": 2.5522, "step": 148130 }, { "epoch": 0.2951278209868673, "grad_norm": 0.15917332470417023, "learning_rate": 0.002, "loss": 2.5689, "step": 148140 }, { "epoch": 0.2951477432105062, "grad_norm": 0.17374907433986664, "learning_rate": 0.002, "loss": 2.5701, "step": 148150 }, { "epoch": 0.2951676654341451, "grad_norm": 0.1594332456588745, "learning_rate": 0.002, "loss": 2.5704, "step": 148160 }, { "epoch": 0.295187587657784, "grad_norm": 0.18890705704689026, "learning_rate": 0.002, "loss": 2.5686, "step": 148170 }, { "epoch": 0.2952075098814229, "grad_norm": 0.15187868475914001, "learning_rate": 0.002, "loss": 2.564, "step": 148180 }, { "epoch": 0.29522743210506186, "grad_norm": 0.15384463965892792, "learning_rate": 0.002, "loss": 2.5655, "step": 148190 }, { "epoch": 0.29524735432870075, "grad_norm": 0.2048705518245697, "learning_rate": 0.002, "loss": 2.5876, "step": 148200 }, { "epoch": 0.29526727655233964, "grad_norm": 0.1535142958164215, "learning_rate": 0.002, "loss": 2.5543, "step": 148210 }, { "epoch": 0.2952871987759786, "grad_norm": 0.1621309220790863, "learning_rate": 0.002, "loss": 2.5493, "step": 148220 }, { "epoch": 0.2953071209996175, "grad_norm": 0.1567484438419342, "learning_rate": 0.002, "loss": 2.5747, "step": 148230 }, { "epoch": 0.2953270432232564, "grad_norm": 0.15087127685546875, "learning_rate": 0.002, "loss": 2.5722, "step": 148240 }, { "epoch": 0.2953469654468953, "grad_norm": 0.20766815543174744, "learning_rate": 0.002, "loss": 2.5661, "step": 148250 }, { "epoch": 0.29536688767053426, "grad_norm": 0.1727260947227478, "learning_rate": 0.002, "loss": 2.555, "step": 148260 }, { "epoch": 0.29538680989417315, "grad_norm": 0.1588594913482666, "learning_rate": 0.002, "loss": 2.5555, "step": 148270 }, { "epoch": 0.29540673211781204, "grad_norm": 0.16481749713420868, "learning_rate": 0.002, "loss": 2.5633, "step": 148280 }, { "epoch": 0.295426654341451, "grad_norm": 0.1660950481891632, "learning_rate": 0.002, "loss": 2.5522, "step": 148290 }, { "epoch": 0.2954465765650899, "grad_norm": 0.16279760003089905, "learning_rate": 0.002, "loss": 2.575, "step": 148300 }, { "epoch": 0.2954664987887288, "grad_norm": 0.17518335580825806, "learning_rate": 0.002, "loss": 2.5597, "step": 148310 }, { "epoch": 0.2954864210123677, "grad_norm": 0.15825967490673065, "learning_rate": 0.002, "loss": 2.5729, "step": 148320 }, { "epoch": 0.2955063432360066, "grad_norm": 0.16897405683994293, "learning_rate": 0.002, "loss": 2.5584, "step": 148330 }, { "epoch": 0.29552626545964555, "grad_norm": 0.17204546928405762, "learning_rate": 0.002, "loss": 2.5668, "step": 148340 }, { "epoch": 0.29554618768328444, "grad_norm": 0.17927363514900208, "learning_rate": 0.002, "loss": 2.5601, "step": 148350 }, { "epoch": 0.2955661099069234, "grad_norm": 0.15702904760837555, "learning_rate": 0.002, "loss": 2.5614, "step": 148360 }, { "epoch": 0.2955860321305623, "grad_norm": 0.16919033229351044, "learning_rate": 0.002, "loss": 2.566, "step": 148370 }, { "epoch": 0.2956059543542012, "grad_norm": 0.19312646985054016, "learning_rate": 0.002, "loss": 2.5682, "step": 148380 }, { "epoch": 0.2956258765778401, "grad_norm": 0.1858425736427307, "learning_rate": 0.002, "loss": 2.561, "step": 148390 }, { "epoch": 0.295645798801479, "grad_norm": 0.147919163107872, "learning_rate": 0.002, "loss": 2.5713, "step": 148400 }, { "epoch": 0.29566572102511796, "grad_norm": 0.17980286478996277, "learning_rate": 0.002, "loss": 2.5612, "step": 148410 }, { "epoch": 0.29568564324875685, "grad_norm": 0.17170006036758423, "learning_rate": 0.002, "loss": 2.568, "step": 148420 }, { "epoch": 0.2957055654723958, "grad_norm": 0.16956806182861328, "learning_rate": 0.002, "loss": 2.5611, "step": 148430 }, { "epoch": 0.2957254876960347, "grad_norm": 0.16660791635513306, "learning_rate": 0.002, "loss": 2.574, "step": 148440 }, { "epoch": 0.2957454099196736, "grad_norm": 0.20451760292053223, "learning_rate": 0.002, "loss": 2.5618, "step": 148450 }, { "epoch": 0.2957653321433125, "grad_norm": 0.19151762127876282, "learning_rate": 0.002, "loss": 2.578, "step": 148460 }, { "epoch": 0.2957852543669514, "grad_norm": 0.14791662991046906, "learning_rate": 0.002, "loss": 2.5525, "step": 148470 }, { "epoch": 0.29580517659059036, "grad_norm": 0.17231038212776184, "learning_rate": 0.002, "loss": 2.5802, "step": 148480 }, { "epoch": 0.29582509881422925, "grad_norm": 0.18364953994750977, "learning_rate": 0.002, "loss": 2.5551, "step": 148490 }, { "epoch": 0.29584502103786814, "grad_norm": 0.1723666787147522, "learning_rate": 0.002, "loss": 2.5573, "step": 148500 }, { "epoch": 0.2958649432615071, "grad_norm": 0.18001174926757812, "learning_rate": 0.002, "loss": 2.5587, "step": 148510 }, { "epoch": 0.295884865485146, "grad_norm": 0.15986792743206024, "learning_rate": 0.002, "loss": 2.5349, "step": 148520 }, { "epoch": 0.2959047877087849, "grad_norm": 0.16068775951862335, "learning_rate": 0.002, "loss": 2.5673, "step": 148530 }, { "epoch": 0.2959247099324238, "grad_norm": 0.15072958171367645, "learning_rate": 0.002, "loss": 2.5693, "step": 148540 }, { "epoch": 0.2959446321560627, "grad_norm": 0.15550003945827484, "learning_rate": 0.002, "loss": 2.5692, "step": 148550 }, { "epoch": 0.29596455437970165, "grad_norm": 0.19134938716888428, "learning_rate": 0.002, "loss": 2.5753, "step": 148560 }, { "epoch": 0.29598447660334054, "grad_norm": 0.15798380970954895, "learning_rate": 0.002, "loss": 2.5726, "step": 148570 }, { "epoch": 0.2960043988269795, "grad_norm": 0.18066734075546265, "learning_rate": 0.002, "loss": 2.5518, "step": 148580 }, { "epoch": 0.2960243210506184, "grad_norm": 0.1745389997959137, "learning_rate": 0.002, "loss": 2.5616, "step": 148590 }, { "epoch": 0.2960442432742573, "grad_norm": 0.17924344539642334, "learning_rate": 0.002, "loss": 2.5651, "step": 148600 }, { "epoch": 0.2960641654978962, "grad_norm": 0.17068614065647125, "learning_rate": 0.002, "loss": 2.5823, "step": 148610 }, { "epoch": 0.2960840877215351, "grad_norm": 0.1744663268327713, "learning_rate": 0.002, "loss": 2.5547, "step": 148620 }, { "epoch": 0.29610400994517405, "grad_norm": 0.14815033972263336, "learning_rate": 0.002, "loss": 2.5594, "step": 148630 }, { "epoch": 0.29612393216881294, "grad_norm": 0.15206792950630188, "learning_rate": 0.002, "loss": 2.5488, "step": 148640 }, { "epoch": 0.2961438543924519, "grad_norm": 0.19120918214321136, "learning_rate": 0.002, "loss": 2.5602, "step": 148650 }, { "epoch": 0.2961637766160908, "grad_norm": 0.15952470898628235, "learning_rate": 0.002, "loss": 2.553, "step": 148660 }, { "epoch": 0.29618369883972967, "grad_norm": 0.1634291261434555, "learning_rate": 0.002, "loss": 2.554, "step": 148670 }, { "epoch": 0.2962036210633686, "grad_norm": 0.16968050599098206, "learning_rate": 0.002, "loss": 2.57, "step": 148680 }, { "epoch": 0.2962235432870075, "grad_norm": 0.17239443957805634, "learning_rate": 0.002, "loss": 2.5764, "step": 148690 }, { "epoch": 0.29624346551064645, "grad_norm": 0.1576828956604004, "learning_rate": 0.002, "loss": 2.555, "step": 148700 }, { "epoch": 0.29626338773428534, "grad_norm": 0.16491331160068512, "learning_rate": 0.002, "loss": 2.5692, "step": 148710 }, { "epoch": 0.2962833099579243, "grad_norm": 0.18101254105567932, "learning_rate": 0.002, "loss": 2.5632, "step": 148720 }, { "epoch": 0.2963032321815632, "grad_norm": 0.14665023982524872, "learning_rate": 0.002, "loss": 2.5687, "step": 148730 }, { "epoch": 0.29632315440520207, "grad_norm": 0.17904724180698395, "learning_rate": 0.002, "loss": 2.5532, "step": 148740 }, { "epoch": 0.296343076628841, "grad_norm": 0.13321396708488464, "learning_rate": 0.002, "loss": 2.5633, "step": 148750 }, { "epoch": 0.2963629988524799, "grad_norm": 0.1769064962863922, "learning_rate": 0.002, "loss": 2.5482, "step": 148760 }, { "epoch": 0.29638292107611885, "grad_norm": 0.18301331996917725, "learning_rate": 0.002, "loss": 2.5519, "step": 148770 }, { "epoch": 0.29640284329975775, "grad_norm": 0.14010308682918549, "learning_rate": 0.002, "loss": 2.5471, "step": 148780 }, { "epoch": 0.29642276552339664, "grad_norm": 0.15848317742347717, "learning_rate": 0.002, "loss": 2.5614, "step": 148790 }, { "epoch": 0.2964426877470356, "grad_norm": 0.2408958226442337, "learning_rate": 0.002, "loss": 2.5656, "step": 148800 }, { "epoch": 0.2964626099706745, "grad_norm": 0.1550428867340088, "learning_rate": 0.002, "loss": 2.5757, "step": 148810 }, { "epoch": 0.2964825321943134, "grad_norm": 0.13980911672115326, "learning_rate": 0.002, "loss": 2.5777, "step": 148820 }, { "epoch": 0.2965024544179523, "grad_norm": 0.16117632389068604, "learning_rate": 0.002, "loss": 2.562, "step": 148830 }, { "epoch": 0.2965223766415912, "grad_norm": 0.14316213130950928, "learning_rate": 0.002, "loss": 2.5787, "step": 148840 }, { "epoch": 0.29654229886523015, "grad_norm": 0.17903168499469757, "learning_rate": 0.002, "loss": 2.565, "step": 148850 }, { "epoch": 0.29656222108886904, "grad_norm": 0.20590120553970337, "learning_rate": 0.002, "loss": 2.5763, "step": 148860 }, { "epoch": 0.296582143312508, "grad_norm": 0.1688242256641388, "learning_rate": 0.002, "loss": 2.5576, "step": 148870 }, { "epoch": 0.2966020655361469, "grad_norm": 0.148885577917099, "learning_rate": 0.002, "loss": 2.5681, "step": 148880 }, { "epoch": 0.2966219877597858, "grad_norm": 0.16474954783916473, "learning_rate": 0.002, "loss": 2.5559, "step": 148890 }, { "epoch": 0.2966419099834247, "grad_norm": 0.15207041800022125, "learning_rate": 0.002, "loss": 2.5614, "step": 148900 }, { "epoch": 0.2966618322070636, "grad_norm": 0.17722047865390778, "learning_rate": 0.002, "loss": 2.5703, "step": 148910 }, { "epoch": 0.29668175443070255, "grad_norm": 0.17329056560993195, "learning_rate": 0.002, "loss": 2.5704, "step": 148920 }, { "epoch": 0.29670167665434144, "grad_norm": 0.16277441382408142, "learning_rate": 0.002, "loss": 2.5675, "step": 148930 }, { "epoch": 0.2967215988779804, "grad_norm": 0.17846384644508362, "learning_rate": 0.002, "loss": 2.5648, "step": 148940 }, { "epoch": 0.2967415211016193, "grad_norm": 0.16610878705978394, "learning_rate": 0.002, "loss": 2.5593, "step": 148950 }, { "epoch": 0.29676144332525817, "grad_norm": 0.15566572546958923, "learning_rate": 0.002, "loss": 2.5596, "step": 148960 }, { "epoch": 0.2967813655488971, "grad_norm": 0.15105146169662476, "learning_rate": 0.002, "loss": 2.5545, "step": 148970 }, { "epoch": 0.296801287772536, "grad_norm": 0.19662919640541077, "learning_rate": 0.002, "loss": 2.5705, "step": 148980 }, { "epoch": 0.29682120999617495, "grad_norm": 0.16398735344409943, "learning_rate": 0.002, "loss": 2.5493, "step": 148990 }, { "epoch": 0.29684113221981384, "grad_norm": 0.16312047839164734, "learning_rate": 0.002, "loss": 2.5607, "step": 149000 }, { "epoch": 0.2968610544434528, "grad_norm": 0.16505959630012512, "learning_rate": 0.002, "loss": 2.5548, "step": 149010 }, { "epoch": 0.2968809766670917, "grad_norm": 0.17230387032032013, "learning_rate": 0.002, "loss": 2.5665, "step": 149020 }, { "epoch": 0.29690089889073057, "grad_norm": 0.18444949388504028, "learning_rate": 0.002, "loss": 2.5568, "step": 149030 }, { "epoch": 0.2969208211143695, "grad_norm": 0.22885064780712128, "learning_rate": 0.002, "loss": 2.5543, "step": 149040 }, { "epoch": 0.2969407433380084, "grad_norm": 0.1764555126428604, "learning_rate": 0.002, "loss": 2.5478, "step": 149050 }, { "epoch": 0.29696066556164735, "grad_norm": 0.16386015713214874, "learning_rate": 0.002, "loss": 2.5709, "step": 149060 }, { "epoch": 0.29698058778528624, "grad_norm": 0.13953013718128204, "learning_rate": 0.002, "loss": 2.5536, "step": 149070 }, { "epoch": 0.29700051000892513, "grad_norm": 0.16111911833286285, "learning_rate": 0.002, "loss": 2.5565, "step": 149080 }, { "epoch": 0.2970204322325641, "grad_norm": 0.1812627613544464, "learning_rate": 0.002, "loss": 2.5777, "step": 149090 }, { "epoch": 0.29704035445620297, "grad_norm": 0.16417968273162842, "learning_rate": 0.002, "loss": 2.5883, "step": 149100 }, { "epoch": 0.2970602766798419, "grad_norm": 0.16362948715686798, "learning_rate": 0.002, "loss": 2.5579, "step": 149110 }, { "epoch": 0.2970801989034808, "grad_norm": 0.17937816679477692, "learning_rate": 0.002, "loss": 2.578, "step": 149120 }, { "epoch": 0.2971001211271197, "grad_norm": 0.16760702431201935, "learning_rate": 0.002, "loss": 2.5478, "step": 149130 }, { "epoch": 0.29712004335075864, "grad_norm": 0.22409573197364807, "learning_rate": 0.002, "loss": 2.5681, "step": 149140 }, { "epoch": 0.29713996557439754, "grad_norm": 0.16766677796840668, "learning_rate": 0.002, "loss": 2.5736, "step": 149150 }, { "epoch": 0.2971598877980365, "grad_norm": 0.18099330365657806, "learning_rate": 0.002, "loss": 2.5584, "step": 149160 }, { "epoch": 0.2971798100216754, "grad_norm": 0.17885437607765198, "learning_rate": 0.002, "loss": 2.5641, "step": 149170 }, { "epoch": 0.2971997322453143, "grad_norm": 0.1732618510723114, "learning_rate": 0.002, "loss": 2.5481, "step": 149180 }, { "epoch": 0.2972196544689532, "grad_norm": 0.1518992930650711, "learning_rate": 0.002, "loss": 2.5746, "step": 149190 }, { "epoch": 0.2972395766925921, "grad_norm": 0.14690622687339783, "learning_rate": 0.002, "loss": 2.5501, "step": 149200 }, { "epoch": 0.29725949891623105, "grad_norm": 0.21526141464710236, "learning_rate": 0.002, "loss": 2.5699, "step": 149210 }, { "epoch": 0.29727942113986994, "grad_norm": 0.156462162733078, "learning_rate": 0.002, "loss": 2.5562, "step": 149220 }, { "epoch": 0.2972993433635089, "grad_norm": 0.17593589425086975, "learning_rate": 0.002, "loss": 2.5526, "step": 149230 }, { "epoch": 0.2973192655871478, "grad_norm": 0.19515585899353027, "learning_rate": 0.002, "loss": 2.5611, "step": 149240 }, { "epoch": 0.29733918781078666, "grad_norm": 0.14612555503845215, "learning_rate": 0.002, "loss": 2.5599, "step": 149250 }, { "epoch": 0.2973591100344256, "grad_norm": 0.1656196415424347, "learning_rate": 0.002, "loss": 2.5693, "step": 149260 }, { "epoch": 0.2973790322580645, "grad_norm": 0.21618950366973877, "learning_rate": 0.002, "loss": 2.5782, "step": 149270 }, { "epoch": 0.29739895448170345, "grad_norm": 0.17603282630443573, "learning_rate": 0.002, "loss": 2.5754, "step": 149280 }, { "epoch": 0.29741887670534234, "grad_norm": 0.1824801117181778, "learning_rate": 0.002, "loss": 2.5684, "step": 149290 }, { "epoch": 0.29743879892898123, "grad_norm": 0.15289394557476044, "learning_rate": 0.002, "loss": 2.5621, "step": 149300 }, { "epoch": 0.2974587211526202, "grad_norm": 0.16590343415737152, "learning_rate": 0.002, "loss": 2.5738, "step": 149310 }, { "epoch": 0.29747864337625907, "grad_norm": 0.16949908435344696, "learning_rate": 0.002, "loss": 2.5591, "step": 149320 }, { "epoch": 0.297498565599898, "grad_norm": 0.2011265903711319, "learning_rate": 0.002, "loss": 2.5656, "step": 149330 }, { "epoch": 0.2975184878235369, "grad_norm": 0.17604994773864746, "learning_rate": 0.002, "loss": 2.554, "step": 149340 }, { "epoch": 0.29753841004717585, "grad_norm": 0.16156823933124542, "learning_rate": 0.002, "loss": 2.5767, "step": 149350 }, { "epoch": 0.29755833227081474, "grad_norm": 0.19135929644107819, "learning_rate": 0.002, "loss": 2.5489, "step": 149360 }, { "epoch": 0.29757825449445363, "grad_norm": 0.20324309170246124, "learning_rate": 0.002, "loss": 2.5632, "step": 149370 }, { "epoch": 0.2975981767180926, "grad_norm": 0.20092810690402985, "learning_rate": 0.002, "loss": 2.5614, "step": 149380 }, { "epoch": 0.29761809894173147, "grad_norm": 0.15105248987674713, "learning_rate": 0.002, "loss": 2.5695, "step": 149390 }, { "epoch": 0.2976380211653704, "grad_norm": 0.16877694427967072, "learning_rate": 0.002, "loss": 2.5771, "step": 149400 }, { "epoch": 0.2976579433890093, "grad_norm": 0.14318887889385223, "learning_rate": 0.002, "loss": 2.5701, "step": 149410 }, { "epoch": 0.2976778656126482, "grad_norm": 0.156294047832489, "learning_rate": 0.002, "loss": 2.5528, "step": 149420 }, { "epoch": 0.29769778783628714, "grad_norm": 0.18018168210983276, "learning_rate": 0.002, "loss": 2.572, "step": 149430 }, { "epoch": 0.29771771005992603, "grad_norm": 0.14849711954593658, "learning_rate": 0.002, "loss": 2.5716, "step": 149440 }, { "epoch": 0.297737632283565, "grad_norm": 0.15607476234436035, "learning_rate": 0.002, "loss": 2.5685, "step": 149450 }, { "epoch": 0.29775755450720387, "grad_norm": 0.1739954799413681, "learning_rate": 0.002, "loss": 2.5549, "step": 149460 }, { "epoch": 0.2977774767308428, "grad_norm": 0.17267122864723206, "learning_rate": 0.002, "loss": 2.573, "step": 149470 }, { "epoch": 0.2977973989544817, "grad_norm": 0.16966088116168976, "learning_rate": 0.002, "loss": 2.549, "step": 149480 }, { "epoch": 0.2978173211781206, "grad_norm": 0.15375012159347534, "learning_rate": 0.002, "loss": 2.5716, "step": 149490 }, { "epoch": 0.29783724340175954, "grad_norm": 0.15637625753879547, "learning_rate": 0.002, "loss": 2.5731, "step": 149500 }, { "epoch": 0.29785716562539843, "grad_norm": 0.1829470694065094, "learning_rate": 0.002, "loss": 2.5634, "step": 149510 }, { "epoch": 0.2978770878490374, "grad_norm": 0.18374434113502502, "learning_rate": 0.002, "loss": 2.5732, "step": 149520 }, { "epoch": 0.29789701007267627, "grad_norm": 0.16866429150104523, "learning_rate": 0.002, "loss": 2.5657, "step": 149530 }, { "epoch": 0.29791693229631516, "grad_norm": 0.16936056315898895, "learning_rate": 0.002, "loss": 2.5436, "step": 149540 }, { "epoch": 0.2979368545199541, "grad_norm": 0.18166321516036987, "learning_rate": 0.002, "loss": 2.5597, "step": 149550 }, { "epoch": 0.297956776743593, "grad_norm": 0.18848863244056702, "learning_rate": 0.002, "loss": 2.5563, "step": 149560 }, { "epoch": 0.29797669896723195, "grad_norm": 0.14761248230934143, "learning_rate": 0.002, "loss": 2.5595, "step": 149570 }, { "epoch": 0.29799662119087084, "grad_norm": 0.17294283211231232, "learning_rate": 0.002, "loss": 2.5704, "step": 149580 }, { "epoch": 0.2980165434145097, "grad_norm": 0.15615613758563995, "learning_rate": 0.002, "loss": 2.5618, "step": 149590 }, { "epoch": 0.2980364656381487, "grad_norm": 0.14950913190841675, "learning_rate": 0.002, "loss": 2.5644, "step": 149600 }, { "epoch": 0.29805638786178756, "grad_norm": 0.16975729167461395, "learning_rate": 0.002, "loss": 2.5625, "step": 149610 }, { "epoch": 0.2980763100854265, "grad_norm": 0.1766974776983261, "learning_rate": 0.002, "loss": 2.5496, "step": 149620 }, { "epoch": 0.2980962323090654, "grad_norm": 0.15542584657669067, "learning_rate": 0.002, "loss": 2.5569, "step": 149630 }, { "epoch": 0.29811615453270435, "grad_norm": 0.16714364290237427, "learning_rate": 0.002, "loss": 2.5607, "step": 149640 }, { "epoch": 0.29813607675634324, "grad_norm": 0.17973852157592773, "learning_rate": 0.002, "loss": 2.5692, "step": 149650 }, { "epoch": 0.29815599897998213, "grad_norm": 0.15095780789852142, "learning_rate": 0.002, "loss": 2.5553, "step": 149660 }, { "epoch": 0.2981759212036211, "grad_norm": 0.15847119688987732, "learning_rate": 0.002, "loss": 2.5731, "step": 149670 }, { "epoch": 0.29819584342725997, "grad_norm": 0.15840886533260345, "learning_rate": 0.002, "loss": 2.5695, "step": 149680 }, { "epoch": 0.2982157656508989, "grad_norm": 0.15210223197937012, "learning_rate": 0.002, "loss": 2.5623, "step": 149690 }, { "epoch": 0.2982356878745378, "grad_norm": 0.1862611323595047, "learning_rate": 0.002, "loss": 2.5593, "step": 149700 }, { "epoch": 0.2982556100981767, "grad_norm": 0.17406564950942993, "learning_rate": 0.002, "loss": 2.5507, "step": 149710 }, { "epoch": 0.29827553232181564, "grad_norm": 0.16255022585391998, "learning_rate": 0.002, "loss": 2.5667, "step": 149720 }, { "epoch": 0.29829545454545453, "grad_norm": 0.15790510177612305, "learning_rate": 0.002, "loss": 2.5707, "step": 149730 }, { "epoch": 0.2983153767690935, "grad_norm": 0.1807117909193039, "learning_rate": 0.002, "loss": 2.5647, "step": 149740 }, { "epoch": 0.29833529899273237, "grad_norm": 0.1393972784280777, "learning_rate": 0.002, "loss": 2.5631, "step": 149750 }, { "epoch": 0.2983552212163713, "grad_norm": 0.16602441668510437, "learning_rate": 0.002, "loss": 2.5547, "step": 149760 }, { "epoch": 0.2983751434400102, "grad_norm": 0.21630088984966278, "learning_rate": 0.002, "loss": 2.5585, "step": 149770 }, { "epoch": 0.2983950656636491, "grad_norm": 0.1603427529335022, "learning_rate": 0.002, "loss": 2.5566, "step": 149780 }, { "epoch": 0.29841498788728804, "grad_norm": 0.18692296743392944, "learning_rate": 0.002, "loss": 2.5631, "step": 149790 }, { "epoch": 0.29843491011092693, "grad_norm": 0.1567676067352295, "learning_rate": 0.002, "loss": 2.5602, "step": 149800 }, { "epoch": 0.2984548323345659, "grad_norm": 0.17184960842132568, "learning_rate": 0.002, "loss": 2.5612, "step": 149810 }, { "epoch": 0.29847475455820477, "grad_norm": 0.17089946568012238, "learning_rate": 0.002, "loss": 2.5699, "step": 149820 }, { "epoch": 0.29849467678184366, "grad_norm": 0.16060557961463928, "learning_rate": 0.002, "loss": 2.5667, "step": 149830 }, { "epoch": 0.2985145990054826, "grad_norm": 0.17845207452774048, "learning_rate": 0.002, "loss": 2.5545, "step": 149840 }, { "epoch": 0.2985345212291215, "grad_norm": 0.16322708129882812, "learning_rate": 0.002, "loss": 2.5609, "step": 149850 }, { "epoch": 0.29855444345276044, "grad_norm": 0.20158790051937103, "learning_rate": 0.002, "loss": 2.5675, "step": 149860 }, { "epoch": 0.29857436567639933, "grad_norm": 0.15148411691188812, "learning_rate": 0.002, "loss": 2.5602, "step": 149870 }, { "epoch": 0.2985942879000382, "grad_norm": 0.14455176889896393, "learning_rate": 0.002, "loss": 2.5512, "step": 149880 }, { "epoch": 0.29861421012367717, "grad_norm": 0.23782147467136383, "learning_rate": 0.002, "loss": 2.5677, "step": 149890 }, { "epoch": 0.29863413234731606, "grad_norm": 0.1517125368118286, "learning_rate": 0.002, "loss": 2.5407, "step": 149900 }, { "epoch": 0.298654054570955, "grad_norm": 0.18135681748390198, "learning_rate": 0.002, "loss": 2.5666, "step": 149910 }, { "epoch": 0.2986739767945939, "grad_norm": 0.15655528008937836, "learning_rate": 0.002, "loss": 2.5696, "step": 149920 }, { "epoch": 0.29869389901823284, "grad_norm": 0.15745626389980316, "learning_rate": 0.002, "loss": 2.5613, "step": 149930 }, { "epoch": 0.29871382124187174, "grad_norm": 0.15131667256355286, "learning_rate": 0.002, "loss": 2.5582, "step": 149940 }, { "epoch": 0.2987337434655106, "grad_norm": 0.17082130908966064, "learning_rate": 0.002, "loss": 2.5846, "step": 149950 }, { "epoch": 0.2987536656891496, "grad_norm": 0.19561947882175446, "learning_rate": 0.002, "loss": 2.5472, "step": 149960 }, { "epoch": 0.29877358791278846, "grad_norm": 0.1729964166879654, "learning_rate": 0.002, "loss": 2.5597, "step": 149970 }, { "epoch": 0.2987935101364274, "grad_norm": 0.20467260479927063, "learning_rate": 0.002, "loss": 2.556, "step": 149980 }, { "epoch": 0.2988134323600663, "grad_norm": 0.1454477608203888, "learning_rate": 0.002, "loss": 2.5636, "step": 149990 }, { "epoch": 0.2988333545837052, "grad_norm": 0.21048301458358765, "learning_rate": 0.002, "loss": 2.5733, "step": 150000 }, { "epoch": 0.29885327680734414, "grad_norm": 0.16040906310081482, "learning_rate": 0.002, "loss": 2.5602, "step": 150010 }, { "epoch": 0.29887319903098303, "grad_norm": 0.16445040702819824, "learning_rate": 0.002, "loss": 2.5444, "step": 150020 }, { "epoch": 0.298893121254622, "grad_norm": 0.17982694506645203, "learning_rate": 0.002, "loss": 2.5649, "step": 150030 }, { "epoch": 0.29891304347826086, "grad_norm": 0.15480266511440277, "learning_rate": 0.002, "loss": 2.5735, "step": 150040 }, { "epoch": 0.2989329657018998, "grad_norm": 0.18783044815063477, "learning_rate": 0.002, "loss": 2.5526, "step": 150050 }, { "epoch": 0.2989528879255387, "grad_norm": 0.1664038449525833, "learning_rate": 0.002, "loss": 2.5604, "step": 150060 }, { "epoch": 0.2989728101491776, "grad_norm": 0.15442389249801636, "learning_rate": 0.002, "loss": 2.5644, "step": 150070 }, { "epoch": 0.29899273237281654, "grad_norm": 0.17988142371177673, "learning_rate": 0.002, "loss": 2.565, "step": 150080 }, { "epoch": 0.29901265459645543, "grad_norm": 0.18156133592128754, "learning_rate": 0.002, "loss": 2.5563, "step": 150090 }, { "epoch": 0.2990325768200944, "grad_norm": 0.14733563363552094, "learning_rate": 0.002, "loss": 2.5657, "step": 150100 }, { "epoch": 0.29905249904373327, "grad_norm": 0.17663055658340454, "learning_rate": 0.002, "loss": 2.5586, "step": 150110 }, { "epoch": 0.29907242126737216, "grad_norm": 0.15153460204601288, "learning_rate": 0.002, "loss": 2.573, "step": 150120 }, { "epoch": 0.2990923434910111, "grad_norm": 0.15752774477005005, "learning_rate": 0.002, "loss": 2.5624, "step": 150130 }, { "epoch": 0.29911226571465, "grad_norm": 0.25930988788604736, "learning_rate": 0.002, "loss": 2.5559, "step": 150140 }, { "epoch": 0.29913218793828894, "grad_norm": 0.15224836766719818, "learning_rate": 0.002, "loss": 2.5643, "step": 150150 }, { "epoch": 0.29915211016192783, "grad_norm": 0.2015756368637085, "learning_rate": 0.002, "loss": 2.5606, "step": 150160 }, { "epoch": 0.2991720323855667, "grad_norm": 0.16860757768154144, "learning_rate": 0.002, "loss": 2.5695, "step": 150170 }, { "epoch": 0.29919195460920567, "grad_norm": 0.1718798726797104, "learning_rate": 0.002, "loss": 2.5792, "step": 150180 }, { "epoch": 0.29921187683284456, "grad_norm": 0.1516995131969452, "learning_rate": 0.002, "loss": 2.5548, "step": 150190 }, { "epoch": 0.2992317990564835, "grad_norm": 0.17310141026973724, "learning_rate": 0.002, "loss": 2.5661, "step": 150200 }, { "epoch": 0.2992517212801224, "grad_norm": 0.21641604602336884, "learning_rate": 0.002, "loss": 2.5616, "step": 150210 }, { "epoch": 0.29927164350376134, "grad_norm": 0.1539575606584549, "learning_rate": 0.002, "loss": 2.559, "step": 150220 }, { "epoch": 0.29929156572740023, "grad_norm": 0.1660746932029724, "learning_rate": 0.002, "loss": 2.558, "step": 150230 }, { "epoch": 0.2993114879510391, "grad_norm": 0.19120921194553375, "learning_rate": 0.002, "loss": 2.567, "step": 150240 }, { "epoch": 0.29933141017467807, "grad_norm": 0.15027101337909698, "learning_rate": 0.002, "loss": 2.5605, "step": 150250 }, { "epoch": 0.29935133239831696, "grad_norm": 0.17108458280563354, "learning_rate": 0.002, "loss": 2.5487, "step": 150260 }, { "epoch": 0.2993712546219559, "grad_norm": 0.16943933069705963, "learning_rate": 0.002, "loss": 2.55, "step": 150270 }, { "epoch": 0.2993911768455948, "grad_norm": 0.1606822907924652, "learning_rate": 0.002, "loss": 2.5551, "step": 150280 }, { "epoch": 0.2994110990692337, "grad_norm": 0.15656037628650665, "learning_rate": 0.002, "loss": 2.5642, "step": 150290 }, { "epoch": 0.29943102129287263, "grad_norm": 0.1763162612915039, "learning_rate": 0.002, "loss": 2.5649, "step": 150300 }, { "epoch": 0.2994509435165115, "grad_norm": 0.14744828641414642, "learning_rate": 0.002, "loss": 2.5632, "step": 150310 }, { "epoch": 0.29947086574015047, "grad_norm": 0.1718360334634781, "learning_rate": 0.002, "loss": 2.5681, "step": 150320 }, { "epoch": 0.29949078796378936, "grad_norm": 0.1707569807767868, "learning_rate": 0.002, "loss": 2.5644, "step": 150330 }, { "epoch": 0.29951071018742825, "grad_norm": 0.15530116856098175, "learning_rate": 0.002, "loss": 2.5662, "step": 150340 }, { "epoch": 0.2995306324110672, "grad_norm": 0.17152085900306702, "learning_rate": 0.002, "loss": 2.5522, "step": 150350 }, { "epoch": 0.2995505546347061, "grad_norm": 0.18367883563041687, "learning_rate": 0.002, "loss": 2.5648, "step": 150360 }, { "epoch": 0.29957047685834504, "grad_norm": 0.19962763786315918, "learning_rate": 0.002, "loss": 2.5662, "step": 150370 }, { "epoch": 0.2995903990819839, "grad_norm": 0.15213462710380554, "learning_rate": 0.002, "loss": 2.5641, "step": 150380 }, { "epoch": 0.2996103213056229, "grad_norm": 0.20028738677501678, "learning_rate": 0.002, "loss": 2.556, "step": 150390 }, { "epoch": 0.29963024352926176, "grad_norm": 0.18664854764938354, "learning_rate": 0.002, "loss": 2.5635, "step": 150400 }, { "epoch": 0.29965016575290065, "grad_norm": 0.17952083051204681, "learning_rate": 0.002, "loss": 2.573, "step": 150410 }, { "epoch": 0.2996700879765396, "grad_norm": 0.16466611623764038, "learning_rate": 0.002, "loss": 2.5458, "step": 150420 }, { "epoch": 0.2996900102001785, "grad_norm": 0.18950755894184113, "learning_rate": 0.002, "loss": 2.5587, "step": 150430 }, { "epoch": 0.29970993242381744, "grad_norm": 0.19987280666828156, "learning_rate": 0.002, "loss": 2.5444, "step": 150440 }, { "epoch": 0.29972985464745633, "grad_norm": 0.17318838834762573, "learning_rate": 0.002, "loss": 2.5478, "step": 150450 }, { "epoch": 0.2997497768710952, "grad_norm": 0.15044741332530975, "learning_rate": 0.002, "loss": 2.5636, "step": 150460 }, { "epoch": 0.29976969909473417, "grad_norm": 0.2304019182920456, "learning_rate": 0.002, "loss": 2.562, "step": 150470 }, { "epoch": 0.29978962131837306, "grad_norm": 0.17758437991142273, "learning_rate": 0.002, "loss": 2.5478, "step": 150480 }, { "epoch": 0.299809543542012, "grad_norm": 0.178116574883461, "learning_rate": 0.002, "loss": 2.5538, "step": 150490 }, { "epoch": 0.2998294657656509, "grad_norm": 0.15482765436172485, "learning_rate": 0.002, "loss": 2.5551, "step": 150500 }, { "epoch": 0.29984938798928984, "grad_norm": 0.17925995588302612, "learning_rate": 0.002, "loss": 2.5706, "step": 150510 }, { "epoch": 0.29986931021292873, "grad_norm": 0.18220652639865875, "learning_rate": 0.002, "loss": 2.5538, "step": 150520 }, { "epoch": 0.2998892324365676, "grad_norm": 0.15586575865745544, "learning_rate": 0.002, "loss": 2.5641, "step": 150530 }, { "epoch": 0.29990915466020657, "grad_norm": 0.16732119023799896, "learning_rate": 0.002, "loss": 2.5669, "step": 150540 }, { "epoch": 0.29992907688384546, "grad_norm": 0.16110636293888092, "learning_rate": 0.002, "loss": 2.5645, "step": 150550 }, { "epoch": 0.2999489991074844, "grad_norm": 0.17728862166404724, "learning_rate": 0.002, "loss": 2.5556, "step": 150560 }, { "epoch": 0.2999689213311233, "grad_norm": 0.17720961570739746, "learning_rate": 0.002, "loss": 2.5589, "step": 150570 }, { "epoch": 0.2999888435547622, "grad_norm": 0.14478424191474915, "learning_rate": 0.002, "loss": 2.5605, "step": 150580 }, { "epoch": 0.30000876577840113, "grad_norm": 0.17860552668571472, "learning_rate": 0.002, "loss": 2.5551, "step": 150590 }, { "epoch": 0.30002868800204, "grad_norm": 0.17316271364688873, "learning_rate": 0.002, "loss": 2.5581, "step": 150600 }, { "epoch": 0.30004861022567897, "grad_norm": 0.16556981205940247, "learning_rate": 0.002, "loss": 2.5725, "step": 150610 }, { "epoch": 0.30006853244931786, "grad_norm": 0.1651853322982788, "learning_rate": 0.002, "loss": 2.5707, "step": 150620 }, { "epoch": 0.30008845467295675, "grad_norm": 0.16175545752048492, "learning_rate": 0.002, "loss": 2.5559, "step": 150630 }, { "epoch": 0.3001083768965957, "grad_norm": 0.19845665991306305, "learning_rate": 0.002, "loss": 2.5463, "step": 150640 }, { "epoch": 0.3001282991202346, "grad_norm": 0.14028653502464294, "learning_rate": 0.002, "loss": 2.5814, "step": 150650 }, { "epoch": 0.30014822134387353, "grad_norm": 0.197490856051445, "learning_rate": 0.002, "loss": 2.5631, "step": 150660 }, { "epoch": 0.3001681435675124, "grad_norm": 0.14561349153518677, "learning_rate": 0.002, "loss": 2.5684, "step": 150670 }, { "epoch": 0.30018806579115137, "grad_norm": 0.15743161737918854, "learning_rate": 0.002, "loss": 2.5655, "step": 150680 }, { "epoch": 0.30020798801479026, "grad_norm": 0.16847123205661774, "learning_rate": 0.002, "loss": 2.5681, "step": 150690 }, { "epoch": 0.30022791023842915, "grad_norm": 0.16037078201770782, "learning_rate": 0.002, "loss": 2.5607, "step": 150700 }, { "epoch": 0.3002478324620681, "grad_norm": 0.16583102941513062, "learning_rate": 0.002, "loss": 2.5817, "step": 150710 }, { "epoch": 0.300267754685707, "grad_norm": 0.1568591296672821, "learning_rate": 0.002, "loss": 2.5712, "step": 150720 }, { "epoch": 0.30028767690934594, "grad_norm": 0.15214420855045319, "learning_rate": 0.002, "loss": 2.5465, "step": 150730 }, { "epoch": 0.3003075991329848, "grad_norm": 0.1925007849931717, "learning_rate": 0.002, "loss": 2.5578, "step": 150740 }, { "epoch": 0.3003275213566237, "grad_norm": 0.14278802275657654, "learning_rate": 0.002, "loss": 2.5658, "step": 150750 }, { "epoch": 0.30034744358026266, "grad_norm": 0.17479835450649261, "learning_rate": 0.002, "loss": 2.5578, "step": 150760 }, { "epoch": 0.30036736580390155, "grad_norm": 0.1617516130208969, "learning_rate": 0.002, "loss": 2.5692, "step": 150770 }, { "epoch": 0.3003872880275405, "grad_norm": 0.17556358873844147, "learning_rate": 0.002, "loss": 2.569, "step": 150780 }, { "epoch": 0.3004072102511794, "grad_norm": 0.17709176242351532, "learning_rate": 0.002, "loss": 2.5628, "step": 150790 }, { "epoch": 0.30042713247481834, "grad_norm": 0.1570228934288025, "learning_rate": 0.002, "loss": 2.56, "step": 150800 }, { "epoch": 0.3004470546984572, "grad_norm": 0.19453099370002747, "learning_rate": 0.002, "loss": 2.5612, "step": 150810 }, { "epoch": 0.3004669769220961, "grad_norm": 0.15958410501480103, "learning_rate": 0.002, "loss": 2.5558, "step": 150820 }, { "epoch": 0.30048689914573506, "grad_norm": 0.1550063043832779, "learning_rate": 0.002, "loss": 2.5673, "step": 150830 }, { "epoch": 0.30050682136937396, "grad_norm": 0.5517074465751648, "learning_rate": 0.002, "loss": 2.5741, "step": 150840 }, { "epoch": 0.3005267435930129, "grad_norm": 0.1573631465435028, "learning_rate": 0.002, "loss": 2.5723, "step": 150850 }, { "epoch": 0.3005466658166518, "grad_norm": 0.16259324550628662, "learning_rate": 0.002, "loss": 2.5598, "step": 150860 }, { "epoch": 0.3005665880402907, "grad_norm": 0.1601344347000122, "learning_rate": 0.002, "loss": 2.5763, "step": 150870 }, { "epoch": 0.30058651026392963, "grad_norm": 0.15962454676628113, "learning_rate": 0.002, "loss": 2.5573, "step": 150880 }, { "epoch": 0.3006064324875685, "grad_norm": 0.1500423401594162, "learning_rate": 0.002, "loss": 2.5815, "step": 150890 }, { "epoch": 0.30062635471120747, "grad_norm": 0.19425101578235626, "learning_rate": 0.002, "loss": 2.5644, "step": 150900 }, { "epoch": 0.30064627693484636, "grad_norm": 0.16701635718345642, "learning_rate": 0.002, "loss": 2.5653, "step": 150910 }, { "epoch": 0.30066619915848525, "grad_norm": 0.16528423130512238, "learning_rate": 0.002, "loss": 2.5479, "step": 150920 }, { "epoch": 0.3006861213821242, "grad_norm": 0.18046028912067413, "learning_rate": 0.002, "loss": 2.5545, "step": 150930 }, { "epoch": 0.3007060436057631, "grad_norm": 0.16200120747089386, "learning_rate": 0.002, "loss": 2.5655, "step": 150940 }, { "epoch": 0.30072596582940203, "grad_norm": 0.14460307359695435, "learning_rate": 0.002, "loss": 2.5657, "step": 150950 }, { "epoch": 0.3007458880530409, "grad_norm": 0.17974144220352173, "learning_rate": 0.002, "loss": 2.564, "step": 150960 }, { "epoch": 0.30076581027667987, "grad_norm": 0.1881464272737503, "learning_rate": 0.002, "loss": 2.5636, "step": 150970 }, { "epoch": 0.30078573250031876, "grad_norm": 0.22467529773712158, "learning_rate": 0.002, "loss": 2.5854, "step": 150980 }, { "epoch": 0.30080565472395765, "grad_norm": 0.1581152081489563, "learning_rate": 0.002, "loss": 2.5601, "step": 150990 }, { "epoch": 0.3008255769475966, "grad_norm": 0.1609727442264557, "learning_rate": 0.002, "loss": 2.5626, "step": 151000 }, { "epoch": 0.3008454991712355, "grad_norm": 0.16581347584724426, "learning_rate": 0.002, "loss": 2.5611, "step": 151010 }, { "epoch": 0.30086542139487443, "grad_norm": 0.1412922739982605, "learning_rate": 0.002, "loss": 2.5725, "step": 151020 }, { "epoch": 0.3008853436185133, "grad_norm": 0.22001461684703827, "learning_rate": 0.002, "loss": 2.5849, "step": 151030 }, { "epoch": 0.3009052658421522, "grad_norm": 0.17484331130981445, "learning_rate": 0.002, "loss": 2.5708, "step": 151040 }, { "epoch": 0.30092518806579116, "grad_norm": 0.1644919365644455, "learning_rate": 0.002, "loss": 2.5565, "step": 151050 }, { "epoch": 0.30094511028943005, "grad_norm": 0.22042711079120636, "learning_rate": 0.002, "loss": 2.5628, "step": 151060 }, { "epoch": 0.300965032513069, "grad_norm": 0.15982258319854736, "learning_rate": 0.002, "loss": 2.5538, "step": 151070 }, { "epoch": 0.3009849547367079, "grad_norm": 0.16479021310806274, "learning_rate": 0.002, "loss": 2.56, "step": 151080 }, { "epoch": 0.3010048769603468, "grad_norm": 0.1738661229610443, "learning_rate": 0.002, "loss": 2.5673, "step": 151090 }, { "epoch": 0.3010247991839857, "grad_norm": 0.17789186537265778, "learning_rate": 0.002, "loss": 2.5596, "step": 151100 }, { "epoch": 0.3010447214076246, "grad_norm": 0.1922767460346222, "learning_rate": 0.002, "loss": 2.5649, "step": 151110 }, { "epoch": 0.30106464363126356, "grad_norm": 0.1792479157447815, "learning_rate": 0.002, "loss": 2.5674, "step": 151120 }, { "epoch": 0.30108456585490245, "grad_norm": 0.14533157646656036, "learning_rate": 0.002, "loss": 2.5557, "step": 151130 }, { "epoch": 0.3011044880785414, "grad_norm": 0.14787296950817108, "learning_rate": 0.002, "loss": 2.5737, "step": 151140 }, { "epoch": 0.3011244103021803, "grad_norm": 0.16433677077293396, "learning_rate": 0.002, "loss": 2.5634, "step": 151150 }, { "epoch": 0.3011443325258192, "grad_norm": 0.17000457644462585, "learning_rate": 0.002, "loss": 2.5582, "step": 151160 }, { "epoch": 0.3011642547494581, "grad_norm": 0.17165274918079376, "learning_rate": 0.002, "loss": 2.56, "step": 151170 }, { "epoch": 0.301184176973097, "grad_norm": 0.2223895788192749, "learning_rate": 0.002, "loss": 2.5529, "step": 151180 }, { "epoch": 0.30120409919673596, "grad_norm": 0.17335668206214905, "learning_rate": 0.002, "loss": 2.5638, "step": 151190 }, { "epoch": 0.30122402142037485, "grad_norm": 0.1955403834581375, "learning_rate": 0.002, "loss": 2.5442, "step": 151200 }, { "epoch": 0.30124394364401375, "grad_norm": 0.18747437000274658, "learning_rate": 0.002, "loss": 2.5549, "step": 151210 }, { "epoch": 0.3012638658676527, "grad_norm": 0.18223991990089417, "learning_rate": 0.002, "loss": 2.563, "step": 151220 }, { "epoch": 0.3012837880912916, "grad_norm": 0.15092292428016663, "learning_rate": 0.002, "loss": 2.5694, "step": 151230 }, { "epoch": 0.30130371031493053, "grad_norm": 0.1681336611509323, "learning_rate": 0.002, "loss": 2.5498, "step": 151240 }, { "epoch": 0.3013236325385694, "grad_norm": 0.15772543847560883, "learning_rate": 0.002, "loss": 2.5673, "step": 151250 }, { "epoch": 0.30134355476220837, "grad_norm": 0.16611985862255096, "learning_rate": 0.002, "loss": 2.5595, "step": 151260 }, { "epoch": 0.30136347698584726, "grad_norm": 0.14805161952972412, "learning_rate": 0.002, "loss": 2.5652, "step": 151270 }, { "epoch": 0.30138339920948615, "grad_norm": 0.1673867106437683, "learning_rate": 0.002, "loss": 2.5739, "step": 151280 }, { "epoch": 0.3014033214331251, "grad_norm": 0.16311907768249512, "learning_rate": 0.002, "loss": 2.5526, "step": 151290 }, { "epoch": 0.301423243656764, "grad_norm": 0.17771708965301514, "learning_rate": 0.002, "loss": 2.5582, "step": 151300 }, { "epoch": 0.30144316588040293, "grad_norm": 0.14674995839595795, "learning_rate": 0.002, "loss": 2.5691, "step": 151310 }, { "epoch": 0.3014630881040418, "grad_norm": 0.19595012068748474, "learning_rate": 0.002, "loss": 2.5627, "step": 151320 }, { "epoch": 0.3014830103276807, "grad_norm": 0.1536238044500351, "learning_rate": 0.002, "loss": 2.5741, "step": 151330 }, { "epoch": 0.30150293255131966, "grad_norm": 0.1737993359565735, "learning_rate": 0.002, "loss": 2.5695, "step": 151340 }, { "epoch": 0.30152285477495855, "grad_norm": 0.1830049455165863, "learning_rate": 0.002, "loss": 2.5787, "step": 151350 }, { "epoch": 0.3015427769985975, "grad_norm": 0.15949440002441406, "learning_rate": 0.002, "loss": 2.5687, "step": 151360 }, { "epoch": 0.3015626992222364, "grad_norm": 0.14570051431655884, "learning_rate": 0.002, "loss": 2.569, "step": 151370 }, { "epoch": 0.3015826214458753, "grad_norm": 0.17081020772457123, "learning_rate": 0.002, "loss": 2.5531, "step": 151380 }, { "epoch": 0.3016025436695142, "grad_norm": 0.19755074381828308, "learning_rate": 0.002, "loss": 2.556, "step": 151390 }, { "epoch": 0.3016224658931531, "grad_norm": 0.15800505876541138, "learning_rate": 0.002, "loss": 2.5757, "step": 151400 }, { "epoch": 0.30164238811679206, "grad_norm": 0.15614727139472961, "learning_rate": 0.002, "loss": 2.5559, "step": 151410 }, { "epoch": 0.30166231034043095, "grad_norm": 0.1577587127685547, "learning_rate": 0.002, "loss": 2.558, "step": 151420 }, { "epoch": 0.3016822325640699, "grad_norm": 0.1802353858947754, "learning_rate": 0.002, "loss": 2.555, "step": 151430 }, { "epoch": 0.3017021547877088, "grad_norm": 0.17369437217712402, "learning_rate": 0.002, "loss": 2.5705, "step": 151440 }, { "epoch": 0.3017220770113477, "grad_norm": 0.16712389886379242, "learning_rate": 0.002, "loss": 2.5576, "step": 151450 }, { "epoch": 0.3017419992349866, "grad_norm": 0.17464838922023773, "learning_rate": 0.002, "loss": 2.5597, "step": 151460 }, { "epoch": 0.3017619214586255, "grad_norm": 0.1714552938938141, "learning_rate": 0.002, "loss": 2.5537, "step": 151470 }, { "epoch": 0.30178184368226446, "grad_norm": 0.16601203382015228, "learning_rate": 0.002, "loss": 2.5726, "step": 151480 }, { "epoch": 0.30180176590590335, "grad_norm": 0.175046905875206, "learning_rate": 0.002, "loss": 2.5598, "step": 151490 }, { "epoch": 0.30182168812954224, "grad_norm": 0.1409042477607727, "learning_rate": 0.002, "loss": 2.5635, "step": 151500 }, { "epoch": 0.3018416103531812, "grad_norm": 0.16467532515525818, "learning_rate": 0.002, "loss": 2.557, "step": 151510 }, { "epoch": 0.3018615325768201, "grad_norm": 0.15073104202747345, "learning_rate": 0.002, "loss": 2.5576, "step": 151520 }, { "epoch": 0.301881454800459, "grad_norm": 0.18043620884418488, "learning_rate": 0.002, "loss": 2.5578, "step": 151530 }, { "epoch": 0.3019013770240979, "grad_norm": 0.21047374606132507, "learning_rate": 0.002, "loss": 2.5511, "step": 151540 }, { "epoch": 0.30192129924773686, "grad_norm": 0.14966769516468048, "learning_rate": 0.002, "loss": 2.5723, "step": 151550 }, { "epoch": 0.30194122147137575, "grad_norm": 0.16182081401348114, "learning_rate": 0.002, "loss": 2.5551, "step": 151560 }, { "epoch": 0.30196114369501464, "grad_norm": 0.1429513394832611, "learning_rate": 0.002, "loss": 2.5664, "step": 151570 }, { "epoch": 0.3019810659186536, "grad_norm": 0.16400976479053497, "learning_rate": 0.002, "loss": 2.5605, "step": 151580 }, { "epoch": 0.3020009881422925, "grad_norm": 0.18180108070373535, "learning_rate": 0.002, "loss": 2.5732, "step": 151590 }, { "epoch": 0.3020209103659314, "grad_norm": 0.16492238640785217, "learning_rate": 0.002, "loss": 2.5427, "step": 151600 }, { "epoch": 0.3020408325895703, "grad_norm": 0.14653559029102325, "learning_rate": 0.002, "loss": 2.5662, "step": 151610 }, { "epoch": 0.3020607548132092, "grad_norm": 0.17448239028453827, "learning_rate": 0.002, "loss": 2.5562, "step": 151620 }, { "epoch": 0.30208067703684816, "grad_norm": 0.17190346121788025, "learning_rate": 0.002, "loss": 2.5624, "step": 151630 }, { "epoch": 0.30210059926048705, "grad_norm": 0.1657271534204483, "learning_rate": 0.002, "loss": 2.5559, "step": 151640 }, { "epoch": 0.302120521484126, "grad_norm": 0.19466452300548553, "learning_rate": 0.002, "loss": 2.5639, "step": 151650 }, { "epoch": 0.3021404437077649, "grad_norm": 0.16911445558071136, "learning_rate": 0.002, "loss": 2.5684, "step": 151660 }, { "epoch": 0.3021603659314038, "grad_norm": 0.17726512253284454, "learning_rate": 0.002, "loss": 2.569, "step": 151670 }, { "epoch": 0.3021802881550427, "grad_norm": 0.16510936617851257, "learning_rate": 0.002, "loss": 2.578, "step": 151680 }, { "epoch": 0.3022002103786816, "grad_norm": 0.16917426884174347, "learning_rate": 0.002, "loss": 2.5732, "step": 151690 }, { "epoch": 0.30222013260232056, "grad_norm": 0.17083917558193207, "learning_rate": 0.002, "loss": 2.56, "step": 151700 }, { "epoch": 0.30224005482595945, "grad_norm": 0.1766945868730545, "learning_rate": 0.002, "loss": 2.5514, "step": 151710 }, { "epoch": 0.3022599770495984, "grad_norm": 0.18328599631786346, "learning_rate": 0.002, "loss": 2.57, "step": 151720 }, { "epoch": 0.3022798992732373, "grad_norm": 0.18438281118869781, "learning_rate": 0.002, "loss": 2.561, "step": 151730 }, { "epoch": 0.3022998214968762, "grad_norm": 0.16515184938907623, "learning_rate": 0.002, "loss": 2.5705, "step": 151740 }, { "epoch": 0.3023197437205151, "grad_norm": 0.14806261658668518, "learning_rate": 0.002, "loss": 2.557, "step": 151750 }, { "epoch": 0.302339665944154, "grad_norm": 0.1684998720884323, "learning_rate": 0.002, "loss": 2.5608, "step": 151760 }, { "epoch": 0.30235958816779296, "grad_norm": 0.5655364990234375, "learning_rate": 0.002, "loss": 2.5745, "step": 151770 }, { "epoch": 0.30237951039143185, "grad_norm": 0.16406293213367462, "learning_rate": 0.002, "loss": 2.5745, "step": 151780 }, { "epoch": 0.30239943261507074, "grad_norm": 0.17484070360660553, "learning_rate": 0.002, "loss": 2.5586, "step": 151790 }, { "epoch": 0.3024193548387097, "grad_norm": 0.16529856622219086, "learning_rate": 0.002, "loss": 2.575, "step": 151800 }, { "epoch": 0.3024392770623486, "grad_norm": 0.20746123790740967, "learning_rate": 0.002, "loss": 2.5654, "step": 151810 }, { "epoch": 0.3024591992859875, "grad_norm": 0.15247821807861328, "learning_rate": 0.002, "loss": 2.5563, "step": 151820 }, { "epoch": 0.3024791215096264, "grad_norm": 0.1813245564699173, "learning_rate": 0.002, "loss": 2.5725, "step": 151830 }, { "epoch": 0.3024990437332653, "grad_norm": 0.16002358496189117, "learning_rate": 0.002, "loss": 2.5703, "step": 151840 }, { "epoch": 0.30251896595690425, "grad_norm": 0.15439870953559875, "learning_rate": 0.002, "loss": 2.5676, "step": 151850 }, { "epoch": 0.30253888818054314, "grad_norm": 0.14700746536254883, "learning_rate": 0.002, "loss": 2.5733, "step": 151860 }, { "epoch": 0.3025588104041821, "grad_norm": 0.20120415091514587, "learning_rate": 0.002, "loss": 2.5461, "step": 151870 }, { "epoch": 0.302578732627821, "grad_norm": 0.16214370727539062, "learning_rate": 0.002, "loss": 2.5719, "step": 151880 }, { "epoch": 0.3025986548514599, "grad_norm": 0.18376128375530243, "learning_rate": 0.002, "loss": 2.5662, "step": 151890 }, { "epoch": 0.3026185770750988, "grad_norm": 0.13746969401836395, "learning_rate": 0.002, "loss": 2.5715, "step": 151900 }, { "epoch": 0.3026384992987377, "grad_norm": 0.1617671698331833, "learning_rate": 0.002, "loss": 2.558, "step": 151910 }, { "epoch": 0.30265842152237665, "grad_norm": 0.18050187826156616, "learning_rate": 0.002, "loss": 2.5779, "step": 151920 }, { "epoch": 0.30267834374601554, "grad_norm": 0.1607121080160141, "learning_rate": 0.002, "loss": 2.5601, "step": 151930 }, { "epoch": 0.3026982659696545, "grad_norm": 0.17027278244495392, "learning_rate": 0.002, "loss": 2.5633, "step": 151940 }, { "epoch": 0.3027181881932934, "grad_norm": 0.18084318935871124, "learning_rate": 0.002, "loss": 2.5561, "step": 151950 }, { "epoch": 0.30273811041693227, "grad_norm": 0.16498498618602753, "learning_rate": 0.002, "loss": 2.5497, "step": 151960 }, { "epoch": 0.3027580326405712, "grad_norm": 0.20804066956043243, "learning_rate": 0.002, "loss": 2.5628, "step": 151970 }, { "epoch": 0.3027779548642101, "grad_norm": 0.17081612348556519, "learning_rate": 0.002, "loss": 2.5826, "step": 151980 }, { "epoch": 0.30279787708784905, "grad_norm": 0.14612369239330292, "learning_rate": 0.002, "loss": 2.535, "step": 151990 }, { "epoch": 0.30281779931148795, "grad_norm": 0.16618593037128448, "learning_rate": 0.002, "loss": 2.5522, "step": 152000 }, { "epoch": 0.3028377215351269, "grad_norm": 0.2000967264175415, "learning_rate": 0.002, "loss": 2.5788, "step": 152010 }, { "epoch": 0.3028576437587658, "grad_norm": 0.14721716940402985, "learning_rate": 0.002, "loss": 2.5558, "step": 152020 }, { "epoch": 0.3028775659824047, "grad_norm": 0.1552603393793106, "learning_rate": 0.002, "loss": 2.5714, "step": 152030 }, { "epoch": 0.3028974882060436, "grad_norm": 0.16943922638893127, "learning_rate": 0.002, "loss": 2.5583, "step": 152040 }, { "epoch": 0.3029174104296825, "grad_norm": 0.15945519506931305, "learning_rate": 0.002, "loss": 2.5647, "step": 152050 }, { "epoch": 0.30293733265332146, "grad_norm": 0.15923117101192474, "learning_rate": 0.002, "loss": 2.5586, "step": 152060 }, { "epoch": 0.30295725487696035, "grad_norm": 0.145456463098526, "learning_rate": 0.002, "loss": 2.5622, "step": 152070 }, { "epoch": 0.30297717710059924, "grad_norm": 0.16612930595874786, "learning_rate": 0.002, "loss": 2.5669, "step": 152080 }, { "epoch": 0.3029970993242382, "grad_norm": 0.15002058446407318, "learning_rate": 0.002, "loss": 2.5504, "step": 152090 }, { "epoch": 0.3030170215478771, "grad_norm": 0.16747179627418518, "learning_rate": 0.002, "loss": 2.5644, "step": 152100 }, { "epoch": 0.303036943771516, "grad_norm": 0.16976280510425568, "learning_rate": 0.002, "loss": 2.5513, "step": 152110 }, { "epoch": 0.3030568659951549, "grad_norm": 0.16012239456176758, "learning_rate": 0.002, "loss": 2.5465, "step": 152120 }, { "epoch": 0.3030767882187938, "grad_norm": 0.15601158142089844, "learning_rate": 0.002, "loss": 2.5501, "step": 152130 }, { "epoch": 0.30309671044243275, "grad_norm": 0.15037503838539124, "learning_rate": 0.002, "loss": 2.5723, "step": 152140 }, { "epoch": 0.30311663266607164, "grad_norm": 0.14276960492134094, "learning_rate": 0.002, "loss": 2.5588, "step": 152150 }, { "epoch": 0.3031365548897106, "grad_norm": 0.166106179356575, "learning_rate": 0.002, "loss": 2.5661, "step": 152160 }, { "epoch": 0.3031564771133495, "grad_norm": 0.15212197601795197, "learning_rate": 0.002, "loss": 2.5775, "step": 152170 }, { "epoch": 0.3031763993369884, "grad_norm": 0.15659518539905548, "learning_rate": 0.002, "loss": 2.5728, "step": 152180 }, { "epoch": 0.3031963215606273, "grad_norm": 0.1651037186384201, "learning_rate": 0.002, "loss": 2.5547, "step": 152190 }, { "epoch": 0.3032162437842662, "grad_norm": 0.15489840507507324, "learning_rate": 0.002, "loss": 2.5605, "step": 152200 }, { "epoch": 0.30323616600790515, "grad_norm": 0.15398745238780975, "learning_rate": 0.002, "loss": 2.5512, "step": 152210 }, { "epoch": 0.30325608823154404, "grad_norm": 0.20530405640602112, "learning_rate": 0.002, "loss": 2.5692, "step": 152220 }, { "epoch": 0.303276010455183, "grad_norm": 0.15273144841194153, "learning_rate": 0.002, "loss": 2.5571, "step": 152230 }, { "epoch": 0.3032959326788219, "grad_norm": 0.1612929254770279, "learning_rate": 0.002, "loss": 2.575, "step": 152240 }, { "epoch": 0.30331585490246077, "grad_norm": 0.15208835899829865, "learning_rate": 0.002, "loss": 2.542, "step": 152250 }, { "epoch": 0.3033357771260997, "grad_norm": 0.22658781707286835, "learning_rate": 0.002, "loss": 2.5564, "step": 152260 }, { "epoch": 0.3033556993497386, "grad_norm": 0.17005862295627594, "learning_rate": 0.002, "loss": 2.5522, "step": 152270 }, { "epoch": 0.30337562157337755, "grad_norm": 0.18694928288459778, "learning_rate": 0.002, "loss": 2.5674, "step": 152280 }, { "epoch": 0.30339554379701644, "grad_norm": 0.15930990874767303, "learning_rate": 0.002, "loss": 2.5514, "step": 152290 }, { "epoch": 0.3034154660206554, "grad_norm": 0.16353178024291992, "learning_rate": 0.002, "loss": 2.5674, "step": 152300 }, { "epoch": 0.3034353882442943, "grad_norm": 0.14644761383533478, "learning_rate": 0.002, "loss": 2.5494, "step": 152310 }, { "epoch": 0.30345531046793317, "grad_norm": 0.20914989709854126, "learning_rate": 0.002, "loss": 2.5673, "step": 152320 }, { "epoch": 0.3034752326915721, "grad_norm": 0.16133281588554382, "learning_rate": 0.002, "loss": 2.5671, "step": 152330 }, { "epoch": 0.303495154915211, "grad_norm": 0.15077033638954163, "learning_rate": 0.002, "loss": 2.563, "step": 152340 }, { "epoch": 0.30351507713884995, "grad_norm": 0.17578521370887756, "learning_rate": 0.002, "loss": 2.5674, "step": 152350 }, { "epoch": 0.30353499936248884, "grad_norm": 0.16551414132118225, "learning_rate": 0.002, "loss": 2.5818, "step": 152360 }, { "epoch": 0.30355492158612774, "grad_norm": 0.18333011865615845, "learning_rate": 0.002, "loss": 2.572, "step": 152370 }, { "epoch": 0.3035748438097667, "grad_norm": 0.1579999178647995, "learning_rate": 0.002, "loss": 2.5535, "step": 152380 }, { "epoch": 0.30359476603340557, "grad_norm": 0.1642412543296814, "learning_rate": 0.002, "loss": 2.5642, "step": 152390 }, { "epoch": 0.3036146882570445, "grad_norm": 0.20317751169204712, "learning_rate": 0.002, "loss": 2.5692, "step": 152400 }, { "epoch": 0.3036346104806834, "grad_norm": 0.18416377902030945, "learning_rate": 0.002, "loss": 2.5711, "step": 152410 }, { "epoch": 0.3036545327043223, "grad_norm": 0.13936516642570496, "learning_rate": 0.002, "loss": 2.5591, "step": 152420 }, { "epoch": 0.30367445492796125, "grad_norm": 0.1616406887769699, "learning_rate": 0.002, "loss": 2.5631, "step": 152430 }, { "epoch": 0.30369437715160014, "grad_norm": 0.14963386952877045, "learning_rate": 0.002, "loss": 2.5676, "step": 152440 }, { "epoch": 0.3037142993752391, "grad_norm": 0.17320816218852997, "learning_rate": 0.002, "loss": 2.5595, "step": 152450 }, { "epoch": 0.303734221598878, "grad_norm": 0.14448675513267517, "learning_rate": 0.002, "loss": 2.5519, "step": 152460 }, { "epoch": 0.3037541438225169, "grad_norm": 0.1425190567970276, "learning_rate": 0.002, "loss": 2.5736, "step": 152470 }, { "epoch": 0.3037740660461558, "grad_norm": 0.14939479529857635, "learning_rate": 0.002, "loss": 2.544, "step": 152480 }, { "epoch": 0.3037939882697947, "grad_norm": 0.18649500608444214, "learning_rate": 0.002, "loss": 2.5705, "step": 152490 }, { "epoch": 0.30381391049343365, "grad_norm": 0.19751761853694916, "learning_rate": 0.002, "loss": 2.5745, "step": 152500 }, { "epoch": 0.30383383271707254, "grad_norm": 0.17157380282878876, "learning_rate": 0.002, "loss": 2.5626, "step": 152510 }, { "epoch": 0.3038537549407115, "grad_norm": 0.19018009305000305, "learning_rate": 0.002, "loss": 2.5582, "step": 152520 }, { "epoch": 0.3038736771643504, "grad_norm": 0.17814408242702484, "learning_rate": 0.002, "loss": 2.5599, "step": 152530 }, { "epoch": 0.30389359938798927, "grad_norm": 0.14621876180171967, "learning_rate": 0.002, "loss": 2.5666, "step": 152540 }, { "epoch": 0.3039135216116282, "grad_norm": 0.16674596071243286, "learning_rate": 0.002, "loss": 2.5517, "step": 152550 }, { "epoch": 0.3039334438352671, "grad_norm": 0.16806209087371826, "learning_rate": 0.002, "loss": 2.5518, "step": 152560 }, { "epoch": 0.30395336605890605, "grad_norm": 0.16263233125209808, "learning_rate": 0.002, "loss": 2.5498, "step": 152570 }, { "epoch": 0.30397328828254494, "grad_norm": 0.1501440554857254, "learning_rate": 0.002, "loss": 2.5582, "step": 152580 }, { "epoch": 0.30399321050618383, "grad_norm": 0.14795856177806854, "learning_rate": 0.002, "loss": 2.5759, "step": 152590 }, { "epoch": 0.3040131327298228, "grad_norm": 0.1678202897310257, "learning_rate": 0.002, "loss": 2.5775, "step": 152600 }, { "epoch": 0.30403305495346167, "grad_norm": 0.17365944385528564, "learning_rate": 0.002, "loss": 2.5585, "step": 152610 }, { "epoch": 0.3040529771771006, "grad_norm": 0.18713700771331787, "learning_rate": 0.002, "loss": 2.5669, "step": 152620 }, { "epoch": 0.3040728994007395, "grad_norm": 0.16323016583919525, "learning_rate": 0.002, "loss": 2.5612, "step": 152630 }, { "epoch": 0.30409282162437845, "grad_norm": 0.17264682054519653, "learning_rate": 0.002, "loss": 2.5552, "step": 152640 }, { "epoch": 0.30411274384801734, "grad_norm": 0.16947506368160248, "learning_rate": 0.002, "loss": 2.5645, "step": 152650 }, { "epoch": 0.30413266607165623, "grad_norm": 0.16096003353595734, "learning_rate": 0.002, "loss": 2.5659, "step": 152660 }, { "epoch": 0.3041525882952952, "grad_norm": 0.20616371929645538, "learning_rate": 0.002, "loss": 2.57, "step": 152670 }, { "epoch": 0.30417251051893407, "grad_norm": 0.14331085979938507, "learning_rate": 0.002, "loss": 2.5425, "step": 152680 }, { "epoch": 0.304192432742573, "grad_norm": 0.14864784479141235, "learning_rate": 0.002, "loss": 2.561, "step": 152690 }, { "epoch": 0.3042123549662119, "grad_norm": 0.14139795303344727, "learning_rate": 0.002, "loss": 2.5582, "step": 152700 }, { "epoch": 0.3042322771898508, "grad_norm": 0.18680627644062042, "learning_rate": 0.002, "loss": 2.5647, "step": 152710 }, { "epoch": 0.30425219941348974, "grad_norm": 0.16681604087352753, "learning_rate": 0.002, "loss": 2.5538, "step": 152720 }, { "epoch": 0.30427212163712863, "grad_norm": 0.17988359928131104, "learning_rate": 0.002, "loss": 2.5518, "step": 152730 }, { "epoch": 0.3042920438607676, "grad_norm": 0.14472629129886627, "learning_rate": 0.002, "loss": 2.5535, "step": 152740 }, { "epoch": 0.30431196608440647, "grad_norm": 0.19322189688682556, "learning_rate": 0.002, "loss": 2.5583, "step": 152750 }, { "epoch": 0.3043318883080454, "grad_norm": 0.16887861490249634, "learning_rate": 0.002, "loss": 2.5575, "step": 152760 }, { "epoch": 0.3043518105316843, "grad_norm": 0.15641438961029053, "learning_rate": 0.002, "loss": 2.5858, "step": 152770 }, { "epoch": 0.3043717327553232, "grad_norm": 0.25032156705856323, "learning_rate": 0.002, "loss": 2.5638, "step": 152780 }, { "epoch": 0.30439165497896215, "grad_norm": 0.1925996094942093, "learning_rate": 0.002, "loss": 2.5521, "step": 152790 }, { "epoch": 0.30441157720260104, "grad_norm": 0.18370048701763153, "learning_rate": 0.002, "loss": 2.5725, "step": 152800 }, { "epoch": 0.30443149942624, "grad_norm": 0.14949475228786469, "learning_rate": 0.002, "loss": 2.5624, "step": 152810 }, { "epoch": 0.3044514216498789, "grad_norm": 0.17419447004795074, "learning_rate": 0.002, "loss": 2.5724, "step": 152820 }, { "epoch": 0.30447134387351776, "grad_norm": 0.16868101060390472, "learning_rate": 0.002, "loss": 2.567, "step": 152830 }, { "epoch": 0.3044912660971567, "grad_norm": 0.1726890653371811, "learning_rate": 0.002, "loss": 2.5686, "step": 152840 }, { "epoch": 0.3045111883207956, "grad_norm": 0.17422513663768768, "learning_rate": 0.002, "loss": 2.5586, "step": 152850 }, { "epoch": 0.30453111054443455, "grad_norm": 0.15577973425388336, "learning_rate": 0.002, "loss": 2.5717, "step": 152860 }, { "epoch": 0.30455103276807344, "grad_norm": 0.19826464354991913, "learning_rate": 0.002, "loss": 2.5492, "step": 152870 }, { "epoch": 0.30457095499171233, "grad_norm": 0.18497300148010254, "learning_rate": 0.002, "loss": 2.5797, "step": 152880 }, { "epoch": 0.3045908772153513, "grad_norm": 0.15501463413238525, "learning_rate": 0.002, "loss": 2.5574, "step": 152890 }, { "epoch": 0.30461079943899017, "grad_norm": 0.22952091693878174, "learning_rate": 0.002, "loss": 2.5683, "step": 152900 }, { "epoch": 0.3046307216626291, "grad_norm": 0.16895675659179688, "learning_rate": 0.002, "loss": 2.574, "step": 152910 }, { "epoch": 0.304650643886268, "grad_norm": 0.17018885910511017, "learning_rate": 0.002, "loss": 2.5655, "step": 152920 }, { "epoch": 0.30467056610990695, "grad_norm": 0.2026084065437317, "learning_rate": 0.002, "loss": 2.5569, "step": 152930 }, { "epoch": 0.30469048833354584, "grad_norm": 0.15519532561302185, "learning_rate": 0.002, "loss": 2.5648, "step": 152940 }, { "epoch": 0.30471041055718473, "grad_norm": 0.1856948286294937, "learning_rate": 0.002, "loss": 2.5579, "step": 152950 }, { "epoch": 0.3047303327808237, "grad_norm": 0.15283919870853424, "learning_rate": 0.002, "loss": 2.5571, "step": 152960 }, { "epoch": 0.30475025500446257, "grad_norm": 0.16809961199760437, "learning_rate": 0.002, "loss": 2.5728, "step": 152970 }, { "epoch": 0.3047701772281015, "grad_norm": 0.15682260692119598, "learning_rate": 0.002, "loss": 2.5795, "step": 152980 }, { "epoch": 0.3047900994517404, "grad_norm": 0.20642945170402527, "learning_rate": 0.002, "loss": 2.5457, "step": 152990 }, { "epoch": 0.3048100216753793, "grad_norm": 0.16674329340457916, "learning_rate": 0.002, "loss": 2.5704, "step": 153000 }, { "epoch": 0.30482994389901824, "grad_norm": 0.1513805389404297, "learning_rate": 0.002, "loss": 2.5564, "step": 153010 }, { "epoch": 0.30484986612265713, "grad_norm": 0.20825853943824768, "learning_rate": 0.002, "loss": 2.5644, "step": 153020 }, { "epoch": 0.3048697883462961, "grad_norm": 0.15191231667995453, "learning_rate": 0.002, "loss": 2.5715, "step": 153030 }, { "epoch": 0.30488971056993497, "grad_norm": 0.15835155546665192, "learning_rate": 0.002, "loss": 2.5556, "step": 153040 }, { "epoch": 0.3049096327935739, "grad_norm": 0.1680315136909485, "learning_rate": 0.002, "loss": 2.5633, "step": 153050 }, { "epoch": 0.3049295550172128, "grad_norm": 0.16529807448387146, "learning_rate": 0.002, "loss": 2.5597, "step": 153060 }, { "epoch": 0.3049494772408517, "grad_norm": 0.1777535080909729, "learning_rate": 0.002, "loss": 2.5695, "step": 153070 }, { "epoch": 0.30496939946449064, "grad_norm": 0.15355262160301208, "learning_rate": 0.002, "loss": 2.5434, "step": 153080 }, { "epoch": 0.30498932168812953, "grad_norm": 0.16536755859851837, "learning_rate": 0.002, "loss": 2.5586, "step": 153090 }, { "epoch": 0.3050092439117685, "grad_norm": 0.15674881637096405, "learning_rate": 0.002, "loss": 2.5597, "step": 153100 }, { "epoch": 0.30502916613540737, "grad_norm": 0.18197347223758698, "learning_rate": 0.002, "loss": 2.5776, "step": 153110 }, { "epoch": 0.30504908835904626, "grad_norm": 0.17796678841114044, "learning_rate": 0.002, "loss": 2.5721, "step": 153120 }, { "epoch": 0.3050690105826852, "grad_norm": 0.21880251169204712, "learning_rate": 0.002, "loss": 2.5714, "step": 153130 }, { "epoch": 0.3050889328063241, "grad_norm": 0.1556106060743332, "learning_rate": 0.002, "loss": 2.5624, "step": 153140 }, { "epoch": 0.30510885502996304, "grad_norm": 0.1429225206375122, "learning_rate": 0.002, "loss": 2.5514, "step": 153150 }, { "epoch": 0.30512877725360193, "grad_norm": 0.17503449320793152, "learning_rate": 0.002, "loss": 2.5612, "step": 153160 }, { "epoch": 0.3051486994772408, "grad_norm": 0.16866710782051086, "learning_rate": 0.002, "loss": 2.5591, "step": 153170 }, { "epoch": 0.30516862170087977, "grad_norm": 0.21970999240875244, "learning_rate": 0.002, "loss": 2.5646, "step": 153180 }, { "epoch": 0.30518854392451866, "grad_norm": 0.16167931258678436, "learning_rate": 0.002, "loss": 2.5595, "step": 153190 }, { "epoch": 0.3052084661481576, "grad_norm": 0.14988771080970764, "learning_rate": 0.002, "loss": 2.5501, "step": 153200 }, { "epoch": 0.3052283883717965, "grad_norm": 0.15100528299808502, "learning_rate": 0.002, "loss": 2.5431, "step": 153210 }, { "epoch": 0.30524831059543545, "grad_norm": 0.1432880312204361, "learning_rate": 0.002, "loss": 2.5638, "step": 153220 }, { "epoch": 0.30526823281907434, "grad_norm": 0.16664211452007294, "learning_rate": 0.002, "loss": 2.577, "step": 153230 }, { "epoch": 0.3052881550427132, "grad_norm": 0.14490224421024323, "learning_rate": 0.002, "loss": 2.5613, "step": 153240 }, { "epoch": 0.3053080772663522, "grad_norm": 0.1747274547815323, "learning_rate": 0.002, "loss": 2.552, "step": 153250 }, { "epoch": 0.30532799948999106, "grad_norm": 0.17027516663074493, "learning_rate": 0.002, "loss": 2.5679, "step": 153260 }, { "epoch": 0.30534792171363, "grad_norm": 0.20931319892406464, "learning_rate": 0.002, "loss": 2.5653, "step": 153270 }, { "epoch": 0.3053678439372689, "grad_norm": 0.1815011352300644, "learning_rate": 0.002, "loss": 2.5663, "step": 153280 }, { "epoch": 0.3053877661609078, "grad_norm": 0.18766966462135315, "learning_rate": 0.002, "loss": 2.5644, "step": 153290 }, { "epoch": 0.30540768838454674, "grad_norm": 0.1492927372455597, "learning_rate": 0.002, "loss": 2.5523, "step": 153300 }, { "epoch": 0.30542761060818563, "grad_norm": 0.19786182045936584, "learning_rate": 0.002, "loss": 2.5665, "step": 153310 }, { "epoch": 0.3054475328318246, "grad_norm": 0.16895699501037598, "learning_rate": 0.002, "loss": 2.5468, "step": 153320 }, { "epoch": 0.30546745505546347, "grad_norm": 0.14794690907001495, "learning_rate": 0.002, "loss": 2.5472, "step": 153330 }, { "epoch": 0.30548737727910236, "grad_norm": 0.17938725650310516, "learning_rate": 0.002, "loss": 2.5683, "step": 153340 }, { "epoch": 0.3055072995027413, "grad_norm": 0.20033779740333557, "learning_rate": 0.002, "loss": 2.5609, "step": 153350 }, { "epoch": 0.3055272217263802, "grad_norm": 0.16144105792045593, "learning_rate": 0.002, "loss": 2.5599, "step": 153360 }, { "epoch": 0.30554714395001914, "grad_norm": 0.2046923190355301, "learning_rate": 0.002, "loss": 2.5464, "step": 153370 }, { "epoch": 0.30556706617365803, "grad_norm": 0.16599521040916443, "learning_rate": 0.002, "loss": 2.5625, "step": 153380 }, { "epoch": 0.305586988397297, "grad_norm": 0.18739475309848785, "learning_rate": 0.002, "loss": 2.564, "step": 153390 }, { "epoch": 0.30560691062093587, "grad_norm": 0.17953264713287354, "learning_rate": 0.002, "loss": 2.5467, "step": 153400 }, { "epoch": 0.30562683284457476, "grad_norm": 0.15335507690906525, "learning_rate": 0.002, "loss": 2.5543, "step": 153410 }, { "epoch": 0.3056467550682137, "grad_norm": 0.14327135682106018, "learning_rate": 0.002, "loss": 2.5545, "step": 153420 }, { "epoch": 0.3056666772918526, "grad_norm": 0.16714967787265778, "learning_rate": 0.002, "loss": 2.5576, "step": 153430 }, { "epoch": 0.30568659951549154, "grad_norm": 0.18203923106193542, "learning_rate": 0.002, "loss": 2.5597, "step": 153440 }, { "epoch": 0.30570652173913043, "grad_norm": 0.14803890883922577, "learning_rate": 0.002, "loss": 2.5544, "step": 153450 }, { "epoch": 0.3057264439627693, "grad_norm": 0.16565237939357758, "learning_rate": 0.002, "loss": 2.554, "step": 153460 }, { "epoch": 0.30574636618640827, "grad_norm": 0.14678940176963806, "learning_rate": 0.002, "loss": 2.5488, "step": 153470 }, { "epoch": 0.30576628841004716, "grad_norm": 0.18705396354198456, "learning_rate": 0.002, "loss": 2.5477, "step": 153480 }, { "epoch": 0.3057862106336861, "grad_norm": 0.16221974790096283, "learning_rate": 0.002, "loss": 2.5649, "step": 153490 }, { "epoch": 0.305806132857325, "grad_norm": 0.14811667799949646, "learning_rate": 0.002, "loss": 2.5669, "step": 153500 }, { "epoch": 0.30582605508096394, "grad_norm": 0.14938747882843018, "learning_rate": 0.002, "loss": 2.5547, "step": 153510 }, { "epoch": 0.30584597730460283, "grad_norm": 0.18263931572437286, "learning_rate": 0.002, "loss": 2.558, "step": 153520 }, { "epoch": 0.3058658995282417, "grad_norm": 0.1776438057422638, "learning_rate": 0.002, "loss": 2.562, "step": 153530 }, { "epoch": 0.30588582175188067, "grad_norm": 0.19652721285820007, "learning_rate": 0.002, "loss": 2.5638, "step": 153540 }, { "epoch": 0.30590574397551956, "grad_norm": 0.15977153182029724, "learning_rate": 0.002, "loss": 2.5684, "step": 153550 }, { "epoch": 0.3059256661991585, "grad_norm": 0.15599235892295837, "learning_rate": 0.002, "loss": 2.5516, "step": 153560 }, { "epoch": 0.3059455884227974, "grad_norm": 0.17493362724781036, "learning_rate": 0.002, "loss": 2.5624, "step": 153570 }, { "epoch": 0.3059655106464363, "grad_norm": 0.15079265832901, "learning_rate": 0.002, "loss": 2.5676, "step": 153580 }, { "epoch": 0.30598543287007524, "grad_norm": 0.19744344055652618, "learning_rate": 0.002, "loss": 2.5583, "step": 153590 }, { "epoch": 0.3060053550937141, "grad_norm": 0.14647679030895233, "learning_rate": 0.002, "loss": 2.5502, "step": 153600 }, { "epoch": 0.3060252773173531, "grad_norm": 0.1599959135055542, "learning_rate": 0.002, "loss": 2.5675, "step": 153610 }, { "epoch": 0.30604519954099196, "grad_norm": 0.162521630525589, "learning_rate": 0.002, "loss": 2.5678, "step": 153620 }, { "epoch": 0.30606512176463085, "grad_norm": 0.17234274744987488, "learning_rate": 0.002, "loss": 2.5589, "step": 153630 }, { "epoch": 0.3060850439882698, "grad_norm": 0.15319949388504028, "learning_rate": 0.002, "loss": 2.5627, "step": 153640 }, { "epoch": 0.3061049662119087, "grad_norm": 0.13978393375873566, "learning_rate": 0.002, "loss": 2.5608, "step": 153650 }, { "epoch": 0.30612488843554764, "grad_norm": 0.16804425418376923, "learning_rate": 0.002, "loss": 2.572, "step": 153660 }, { "epoch": 0.30614481065918653, "grad_norm": 0.17492267489433289, "learning_rate": 0.002, "loss": 2.5649, "step": 153670 }, { "epoch": 0.3061647328828255, "grad_norm": 0.16196854412555695, "learning_rate": 0.002, "loss": 2.576, "step": 153680 }, { "epoch": 0.30618465510646437, "grad_norm": 0.1482948660850525, "learning_rate": 0.002, "loss": 2.5615, "step": 153690 }, { "epoch": 0.30620457733010326, "grad_norm": 0.15160809457302094, "learning_rate": 0.002, "loss": 2.5621, "step": 153700 }, { "epoch": 0.3062244995537422, "grad_norm": 0.1885698437690735, "learning_rate": 0.002, "loss": 2.5612, "step": 153710 }, { "epoch": 0.3062444217773811, "grad_norm": 0.17293564975261688, "learning_rate": 0.002, "loss": 2.5584, "step": 153720 }, { "epoch": 0.30626434400102004, "grad_norm": 0.15152131021022797, "learning_rate": 0.002, "loss": 2.5658, "step": 153730 }, { "epoch": 0.30628426622465893, "grad_norm": 0.1677922010421753, "learning_rate": 0.002, "loss": 2.5714, "step": 153740 }, { "epoch": 0.3063041884482978, "grad_norm": 0.1808193325996399, "learning_rate": 0.002, "loss": 2.5651, "step": 153750 }, { "epoch": 0.30632411067193677, "grad_norm": 0.2006339132785797, "learning_rate": 0.002, "loss": 2.5701, "step": 153760 }, { "epoch": 0.30634403289557566, "grad_norm": 0.20719948410987854, "learning_rate": 0.002, "loss": 2.5692, "step": 153770 }, { "epoch": 0.3063639551192146, "grad_norm": 0.14395852386951447, "learning_rate": 0.002, "loss": 2.5612, "step": 153780 }, { "epoch": 0.3063838773428535, "grad_norm": 0.16951227188110352, "learning_rate": 0.002, "loss": 2.5701, "step": 153790 }, { "epoch": 0.30640379956649244, "grad_norm": 0.17665787041187286, "learning_rate": 0.002, "loss": 2.5575, "step": 153800 }, { "epoch": 0.30642372179013133, "grad_norm": 0.1687290072441101, "learning_rate": 0.002, "loss": 2.5532, "step": 153810 }, { "epoch": 0.3064436440137702, "grad_norm": 0.1610642969608307, "learning_rate": 0.002, "loss": 2.5685, "step": 153820 }, { "epoch": 0.30646356623740917, "grad_norm": 0.1684805005788803, "learning_rate": 0.002, "loss": 2.5633, "step": 153830 }, { "epoch": 0.30648348846104806, "grad_norm": 0.20454177260398865, "learning_rate": 0.002, "loss": 2.5693, "step": 153840 }, { "epoch": 0.306503410684687, "grad_norm": 0.16477011144161224, "learning_rate": 0.002, "loss": 2.5624, "step": 153850 }, { "epoch": 0.3065233329083259, "grad_norm": 0.15246178209781647, "learning_rate": 0.002, "loss": 2.5559, "step": 153860 }, { "epoch": 0.3065432551319648, "grad_norm": 0.24157437682151794, "learning_rate": 0.002, "loss": 2.5776, "step": 153870 }, { "epoch": 0.30656317735560373, "grad_norm": 0.16983693838119507, "learning_rate": 0.002, "loss": 2.5726, "step": 153880 }, { "epoch": 0.3065830995792426, "grad_norm": 0.1512671709060669, "learning_rate": 0.002, "loss": 2.5532, "step": 153890 }, { "epoch": 0.30660302180288157, "grad_norm": 0.16018477082252502, "learning_rate": 0.002, "loss": 2.5786, "step": 153900 }, { "epoch": 0.30662294402652046, "grad_norm": 0.15370267629623413, "learning_rate": 0.002, "loss": 2.5582, "step": 153910 }, { "epoch": 0.30664286625015935, "grad_norm": 0.175360769033432, "learning_rate": 0.002, "loss": 2.5533, "step": 153920 }, { "epoch": 0.3066627884737983, "grad_norm": 0.16464169323444366, "learning_rate": 0.002, "loss": 2.5807, "step": 153930 }, { "epoch": 0.3066827106974372, "grad_norm": 0.15043824911117554, "learning_rate": 0.002, "loss": 2.5689, "step": 153940 }, { "epoch": 0.30670263292107613, "grad_norm": 0.17095540463924408, "learning_rate": 0.002, "loss": 2.5678, "step": 153950 }, { "epoch": 0.306722555144715, "grad_norm": 0.16260448098182678, "learning_rate": 0.002, "loss": 2.5629, "step": 153960 }, { "epoch": 0.30674247736835397, "grad_norm": 0.1572699397802353, "learning_rate": 0.002, "loss": 2.5571, "step": 153970 }, { "epoch": 0.30676239959199286, "grad_norm": 0.1643902063369751, "learning_rate": 0.002, "loss": 2.5733, "step": 153980 }, { "epoch": 0.30678232181563175, "grad_norm": 0.15111428499221802, "learning_rate": 0.002, "loss": 2.554, "step": 153990 }, { "epoch": 0.3068022440392707, "grad_norm": 0.1488407552242279, "learning_rate": 0.002, "loss": 2.5672, "step": 154000 }, { "epoch": 0.3068221662629096, "grad_norm": 0.15930509567260742, "learning_rate": 0.002, "loss": 2.5509, "step": 154010 }, { "epoch": 0.30684208848654854, "grad_norm": 0.1745593398809433, "learning_rate": 0.002, "loss": 2.5637, "step": 154020 }, { "epoch": 0.3068620107101874, "grad_norm": 0.16583482921123505, "learning_rate": 0.002, "loss": 2.5582, "step": 154030 }, { "epoch": 0.3068819329338263, "grad_norm": 0.15585707128047943, "learning_rate": 0.002, "loss": 2.5662, "step": 154040 }, { "epoch": 0.30690185515746526, "grad_norm": 0.17419809103012085, "learning_rate": 0.002, "loss": 2.568, "step": 154050 }, { "epoch": 0.30692177738110415, "grad_norm": 0.1743248552083969, "learning_rate": 0.002, "loss": 2.5548, "step": 154060 }, { "epoch": 0.3069416996047431, "grad_norm": 0.17672792077064514, "learning_rate": 0.002, "loss": 2.5529, "step": 154070 }, { "epoch": 0.306961621828382, "grad_norm": 0.15485697984695435, "learning_rate": 0.002, "loss": 2.5698, "step": 154080 }, { "epoch": 0.3069815440520209, "grad_norm": 0.14926940202713013, "learning_rate": 0.002, "loss": 2.5695, "step": 154090 }, { "epoch": 0.30700146627565983, "grad_norm": 0.18422728776931763, "learning_rate": 0.002, "loss": 2.5719, "step": 154100 }, { "epoch": 0.3070213884992987, "grad_norm": 0.3624538481235504, "learning_rate": 0.002, "loss": 2.5707, "step": 154110 }, { "epoch": 0.30704131072293767, "grad_norm": 0.1689148098230362, "learning_rate": 0.002, "loss": 2.5644, "step": 154120 }, { "epoch": 0.30706123294657656, "grad_norm": 0.19467391073703766, "learning_rate": 0.002, "loss": 2.5582, "step": 154130 }, { "epoch": 0.3070811551702155, "grad_norm": 0.17714911699295044, "learning_rate": 0.002, "loss": 2.5629, "step": 154140 }, { "epoch": 0.3071010773938544, "grad_norm": 0.14612127840518951, "learning_rate": 0.002, "loss": 2.5566, "step": 154150 }, { "epoch": 0.3071209996174933, "grad_norm": 0.18323743343353271, "learning_rate": 0.002, "loss": 2.5636, "step": 154160 }, { "epoch": 0.30714092184113223, "grad_norm": 0.16462382674217224, "learning_rate": 0.002, "loss": 2.5682, "step": 154170 }, { "epoch": 0.3071608440647711, "grad_norm": 0.15441052615642548, "learning_rate": 0.002, "loss": 2.5697, "step": 154180 }, { "epoch": 0.30718076628841007, "grad_norm": 0.1681988537311554, "learning_rate": 0.002, "loss": 2.5649, "step": 154190 }, { "epoch": 0.30720068851204896, "grad_norm": 0.16941222548484802, "learning_rate": 0.002, "loss": 2.5419, "step": 154200 }, { "epoch": 0.30722061073568785, "grad_norm": 0.16362054646015167, "learning_rate": 0.002, "loss": 2.5675, "step": 154210 }, { "epoch": 0.3072405329593268, "grad_norm": 0.16868680715560913, "learning_rate": 0.002, "loss": 2.5603, "step": 154220 }, { "epoch": 0.3072604551829657, "grad_norm": 0.1467781364917755, "learning_rate": 0.002, "loss": 2.5601, "step": 154230 }, { "epoch": 0.30728037740660463, "grad_norm": 0.1635197401046753, "learning_rate": 0.002, "loss": 2.5494, "step": 154240 }, { "epoch": 0.3073002996302435, "grad_norm": 0.18222849071025848, "learning_rate": 0.002, "loss": 2.5593, "step": 154250 }, { "epoch": 0.30732022185388247, "grad_norm": 0.1451074481010437, "learning_rate": 0.002, "loss": 2.561, "step": 154260 }, { "epoch": 0.30734014407752136, "grad_norm": 0.1544479876756668, "learning_rate": 0.002, "loss": 2.5671, "step": 154270 }, { "epoch": 0.30736006630116025, "grad_norm": 0.1581842303276062, "learning_rate": 0.002, "loss": 2.5718, "step": 154280 }, { "epoch": 0.3073799885247992, "grad_norm": 0.19602198898792267, "learning_rate": 0.002, "loss": 2.565, "step": 154290 }, { "epoch": 0.3073999107484381, "grad_norm": 0.19398175179958344, "learning_rate": 0.002, "loss": 2.57, "step": 154300 }, { "epoch": 0.30741983297207703, "grad_norm": 0.1611044704914093, "learning_rate": 0.002, "loss": 2.5527, "step": 154310 }, { "epoch": 0.3074397551957159, "grad_norm": 0.173162043094635, "learning_rate": 0.002, "loss": 2.5673, "step": 154320 }, { "epoch": 0.3074596774193548, "grad_norm": 0.17381414771080017, "learning_rate": 0.002, "loss": 2.5728, "step": 154330 }, { "epoch": 0.30747959964299376, "grad_norm": 0.17513123154640198, "learning_rate": 0.002, "loss": 2.568, "step": 154340 }, { "epoch": 0.30749952186663265, "grad_norm": 0.2422357201576233, "learning_rate": 0.002, "loss": 2.5677, "step": 154350 }, { "epoch": 0.3075194440902716, "grad_norm": 0.14228850603103638, "learning_rate": 0.002, "loss": 2.5569, "step": 154360 }, { "epoch": 0.3075393663139105, "grad_norm": 0.1612798422574997, "learning_rate": 0.002, "loss": 2.567, "step": 154370 }, { "epoch": 0.3075592885375494, "grad_norm": 0.1793639212846756, "learning_rate": 0.002, "loss": 2.5655, "step": 154380 }, { "epoch": 0.3075792107611883, "grad_norm": 0.1551319807767868, "learning_rate": 0.002, "loss": 2.5674, "step": 154390 }, { "epoch": 0.3075991329848272, "grad_norm": 0.15447913110256195, "learning_rate": 0.002, "loss": 2.5632, "step": 154400 }, { "epoch": 0.30761905520846616, "grad_norm": 0.17673316597938538, "learning_rate": 0.002, "loss": 2.5671, "step": 154410 }, { "epoch": 0.30763897743210505, "grad_norm": 0.16750618815422058, "learning_rate": 0.002, "loss": 2.5755, "step": 154420 }, { "epoch": 0.307658899655744, "grad_norm": 0.19086125493049622, "learning_rate": 0.002, "loss": 2.5675, "step": 154430 }, { "epoch": 0.3076788218793829, "grad_norm": 0.16562926769256592, "learning_rate": 0.002, "loss": 2.5656, "step": 154440 }, { "epoch": 0.3076987441030218, "grad_norm": 0.1601993292570114, "learning_rate": 0.002, "loss": 2.5587, "step": 154450 }, { "epoch": 0.30771866632666073, "grad_norm": 0.1703374981880188, "learning_rate": 0.002, "loss": 2.5624, "step": 154460 }, { "epoch": 0.3077385885502996, "grad_norm": 0.1900409311056137, "learning_rate": 0.002, "loss": 2.557, "step": 154470 }, { "epoch": 0.30775851077393857, "grad_norm": 0.14127500355243683, "learning_rate": 0.002, "loss": 2.5681, "step": 154480 }, { "epoch": 0.30777843299757746, "grad_norm": 0.18081599473953247, "learning_rate": 0.002, "loss": 2.5604, "step": 154490 }, { "epoch": 0.30779835522121635, "grad_norm": 0.15154965221881866, "learning_rate": 0.002, "loss": 2.5569, "step": 154500 }, { "epoch": 0.3078182774448553, "grad_norm": 0.1702193319797516, "learning_rate": 0.002, "loss": 2.5705, "step": 154510 }, { "epoch": 0.3078381996684942, "grad_norm": 0.16086445748806, "learning_rate": 0.002, "loss": 2.5813, "step": 154520 }, { "epoch": 0.30785812189213313, "grad_norm": 0.16575931012630463, "learning_rate": 0.002, "loss": 2.5545, "step": 154530 }, { "epoch": 0.307878044115772, "grad_norm": 0.1876976042985916, "learning_rate": 0.002, "loss": 2.5569, "step": 154540 }, { "epoch": 0.30789796633941097, "grad_norm": 0.18595317006111145, "learning_rate": 0.002, "loss": 2.5696, "step": 154550 }, { "epoch": 0.30791788856304986, "grad_norm": 0.13854502141475677, "learning_rate": 0.002, "loss": 2.5478, "step": 154560 }, { "epoch": 0.30793781078668875, "grad_norm": 0.15416152775287628, "learning_rate": 0.002, "loss": 2.5613, "step": 154570 }, { "epoch": 0.3079577330103277, "grad_norm": 0.1520124226808548, "learning_rate": 0.002, "loss": 2.5525, "step": 154580 }, { "epoch": 0.3079776552339666, "grad_norm": 0.20407527685165405, "learning_rate": 0.002, "loss": 2.5566, "step": 154590 }, { "epoch": 0.30799757745760553, "grad_norm": 0.16881221532821655, "learning_rate": 0.002, "loss": 2.5679, "step": 154600 }, { "epoch": 0.3080174996812444, "grad_norm": 0.14562031626701355, "learning_rate": 0.002, "loss": 2.5587, "step": 154610 }, { "epoch": 0.3080374219048833, "grad_norm": 0.17199242115020752, "learning_rate": 0.002, "loss": 2.5655, "step": 154620 }, { "epoch": 0.30805734412852226, "grad_norm": 0.17506949603557587, "learning_rate": 0.002, "loss": 2.5627, "step": 154630 }, { "epoch": 0.30807726635216115, "grad_norm": 0.15464767813682556, "learning_rate": 0.002, "loss": 2.5524, "step": 154640 }, { "epoch": 0.3080971885758001, "grad_norm": 0.2067991942167282, "learning_rate": 0.002, "loss": 2.5624, "step": 154650 }, { "epoch": 0.308117110799439, "grad_norm": 0.15309758484363556, "learning_rate": 0.002, "loss": 2.5692, "step": 154660 }, { "epoch": 0.3081370330230779, "grad_norm": 0.16302213072776794, "learning_rate": 0.002, "loss": 2.5749, "step": 154670 }, { "epoch": 0.3081569552467168, "grad_norm": 0.21523621678352356, "learning_rate": 0.002, "loss": 2.5619, "step": 154680 }, { "epoch": 0.3081768774703557, "grad_norm": 0.18097452819347382, "learning_rate": 0.002, "loss": 2.556, "step": 154690 }, { "epoch": 0.30819679969399466, "grad_norm": 0.14760419726371765, "learning_rate": 0.002, "loss": 2.5687, "step": 154700 }, { "epoch": 0.30821672191763355, "grad_norm": 0.17545194923877716, "learning_rate": 0.002, "loss": 2.5622, "step": 154710 }, { "epoch": 0.3082366441412725, "grad_norm": 0.16009342670440674, "learning_rate": 0.002, "loss": 2.5513, "step": 154720 }, { "epoch": 0.3082565663649114, "grad_norm": 0.1674147993326187, "learning_rate": 0.002, "loss": 2.5594, "step": 154730 }, { "epoch": 0.3082764885885503, "grad_norm": 0.17563560605049133, "learning_rate": 0.002, "loss": 2.5499, "step": 154740 }, { "epoch": 0.3082964108121892, "grad_norm": 0.17623353004455566, "learning_rate": 0.002, "loss": 2.5582, "step": 154750 }, { "epoch": 0.3083163330358281, "grad_norm": 0.1595218926668167, "learning_rate": 0.002, "loss": 2.5474, "step": 154760 }, { "epoch": 0.30833625525946706, "grad_norm": 0.1610594540834427, "learning_rate": 0.002, "loss": 2.5675, "step": 154770 }, { "epoch": 0.30835617748310595, "grad_norm": 0.16739283502101898, "learning_rate": 0.002, "loss": 2.5582, "step": 154780 }, { "epoch": 0.30837609970674484, "grad_norm": 0.15616348385810852, "learning_rate": 0.002, "loss": 2.5496, "step": 154790 }, { "epoch": 0.3083960219303838, "grad_norm": 0.25066709518432617, "learning_rate": 0.002, "loss": 2.5496, "step": 154800 }, { "epoch": 0.3084159441540227, "grad_norm": 0.16715478897094727, "learning_rate": 0.002, "loss": 2.5644, "step": 154810 }, { "epoch": 0.3084358663776616, "grad_norm": 0.16317474842071533, "learning_rate": 0.002, "loss": 2.5665, "step": 154820 }, { "epoch": 0.3084557886013005, "grad_norm": 0.17077405750751495, "learning_rate": 0.002, "loss": 2.566, "step": 154830 }, { "epoch": 0.30847571082493946, "grad_norm": 0.15406058728694916, "learning_rate": 0.002, "loss": 2.5679, "step": 154840 }, { "epoch": 0.30849563304857835, "grad_norm": 0.14980082213878632, "learning_rate": 0.002, "loss": 2.5678, "step": 154850 }, { "epoch": 0.30851555527221725, "grad_norm": 0.18957321345806122, "learning_rate": 0.002, "loss": 2.5758, "step": 154860 }, { "epoch": 0.3085354774958562, "grad_norm": 0.14920812845230103, "learning_rate": 0.002, "loss": 2.5666, "step": 154870 }, { "epoch": 0.3085553997194951, "grad_norm": 0.17235888540744781, "learning_rate": 0.002, "loss": 2.5593, "step": 154880 }, { "epoch": 0.30857532194313403, "grad_norm": 0.16782644391059875, "learning_rate": 0.002, "loss": 2.5692, "step": 154890 }, { "epoch": 0.3085952441667729, "grad_norm": 0.20406624674797058, "learning_rate": 0.002, "loss": 2.5571, "step": 154900 }, { "epoch": 0.3086151663904118, "grad_norm": 0.17605818808078766, "learning_rate": 0.002, "loss": 2.5688, "step": 154910 }, { "epoch": 0.30863508861405076, "grad_norm": 0.1690976917743683, "learning_rate": 0.002, "loss": 2.5599, "step": 154920 }, { "epoch": 0.30865501083768965, "grad_norm": 0.18683664500713348, "learning_rate": 0.002, "loss": 2.5634, "step": 154930 }, { "epoch": 0.3086749330613286, "grad_norm": 0.18985246121883392, "learning_rate": 0.002, "loss": 2.5662, "step": 154940 }, { "epoch": 0.3086948552849675, "grad_norm": 0.16817207634449005, "learning_rate": 0.002, "loss": 2.559, "step": 154950 }, { "epoch": 0.3087147775086064, "grad_norm": 0.15215277671813965, "learning_rate": 0.002, "loss": 2.5726, "step": 154960 }, { "epoch": 0.3087346997322453, "grad_norm": 0.16906918585300446, "learning_rate": 0.002, "loss": 2.5686, "step": 154970 }, { "epoch": 0.3087546219558842, "grad_norm": 0.1685880571603775, "learning_rate": 0.002, "loss": 2.5539, "step": 154980 }, { "epoch": 0.30877454417952316, "grad_norm": 0.16180264949798584, "learning_rate": 0.002, "loss": 2.5528, "step": 154990 }, { "epoch": 0.30879446640316205, "grad_norm": 0.25270578265190125, "learning_rate": 0.002, "loss": 2.5662, "step": 155000 }, { "epoch": 0.308814388626801, "grad_norm": 0.1477765589952469, "learning_rate": 0.002, "loss": 2.5436, "step": 155010 }, { "epoch": 0.3088343108504399, "grad_norm": 0.14407160878181458, "learning_rate": 0.002, "loss": 2.5544, "step": 155020 }, { "epoch": 0.3088542330740788, "grad_norm": 0.16648639738559723, "learning_rate": 0.002, "loss": 2.5667, "step": 155030 }, { "epoch": 0.3088741552977177, "grad_norm": 0.1587011069059372, "learning_rate": 0.002, "loss": 2.5583, "step": 155040 }, { "epoch": 0.3088940775213566, "grad_norm": 0.1979697048664093, "learning_rate": 0.002, "loss": 2.5474, "step": 155050 }, { "epoch": 0.30891399974499556, "grad_norm": 0.18290463089942932, "learning_rate": 0.002, "loss": 2.5699, "step": 155060 }, { "epoch": 0.30893392196863445, "grad_norm": 0.18153263628482819, "learning_rate": 0.002, "loss": 2.5576, "step": 155070 }, { "epoch": 0.30895384419227334, "grad_norm": 0.19404633343219757, "learning_rate": 0.002, "loss": 2.5665, "step": 155080 }, { "epoch": 0.3089737664159123, "grad_norm": 0.1639605015516281, "learning_rate": 0.002, "loss": 2.5598, "step": 155090 }, { "epoch": 0.3089936886395512, "grad_norm": 0.147393137216568, "learning_rate": 0.002, "loss": 2.5536, "step": 155100 }, { "epoch": 0.3090136108631901, "grad_norm": 0.1679803431034088, "learning_rate": 0.002, "loss": 2.5589, "step": 155110 }, { "epoch": 0.309033533086829, "grad_norm": 0.16893132030963898, "learning_rate": 0.002, "loss": 2.57, "step": 155120 }, { "epoch": 0.3090534553104679, "grad_norm": 0.16052460670471191, "learning_rate": 0.002, "loss": 2.5651, "step": 155130 }, { "epoch": 0.30907337753410685, "grad_norm": 0.2056979537010193, "learning_rate": 0.002, "loss": 2.5722, "step": 155140 }, { "epoch": 0.30909329975774574, "grad_norm": 0.17269712686538696, "learning_rate": 0.002, "loss": 2.561, "step": 155150 }, { "epoch": 0.3091132219813847, "grad_norm": 0.13480636477470398, "learning_rate": 0.002, "loss": 2.5543, "step": 155160 }, { "epoch": 0.3091331442050236, "grad_norm": 0.1614934504032135, "learning_rate": 0.002, "loss": 2.5682, "step": 155170 }, { "epoch": 0.3091530664286625, "grad_norm": 0.20441214740276337, "learning_rate": 0.002, "loss": 2.546, "step": 155180 }, { "epoch": 0.3091729886523014, "grad_norm": 0.15875771641731262, "learning_rate": 0.002, "loss": 2.5543, "step": 155190 }, { "epoch": 0.3091929108759403, "grad_norm": 0.24116584658622742, "learning_rate": 0.002, "loss": 2.5653, "step": 155200 }, { "epoch": 0.30921283309957925, "grad_norm": 0.150725319981575, "learning_rate": 0.002, "loss": 2.5534, "step": 155210 }, { "epoch": 0.30923275532321814, "grad_norm": 0.16230913996696472, "learning_rate": 0.002, "loss": 2.5575, "step": 155220 }, { "epoch": 0.3092526775468571, "grad_norm": 0.17134752869606018, "learning_rate": 0.002, "loss": 2.5571, "step": 155230 }, { "epoch": 0.309272599770496, "grad_norm": 0.17487291991710663, "learning_rate": 0.002, "loss": 2.5374, "step": 155240 }, { "epoch": 0.3092925219941349, "grad_norm": 0.1480570286512375, "learning_rate": 0.002, "loss": 2.5574, "step": 155250 }, { "epoch": 0.3093124442177738, "grad_norm": 0.18289358913898468, "learning_rate": 0.002, "loss": 2.5685, "step": 155260 }, { "epoch": 0.3093323664414127, "grad_norm": 0.1769896000623703, "learning_rate": 0.002, "loss": 2.5726, "step": 155270 }, { "epoch": 0.30935228866505166, "grad_norm": 0.18903985619544983, "learning_rate": 0.002, "loss": 2.5486, "step": 155280 }, { "epoch": 0.30937221088869055, "grad_norm": 0.1745549589395523, "learning_rate": 0.002, "loss": 2.5726, "step": 155290 }, { "epoch": 0.3093921331123295, "grad_norm": 0.17869694530963898, "learning_rate": 0.002, "loss": 2.5694, "step": 155300 }, { "epoch": 0.3094120553359684, "grad_norm": 0.16119731962680817, "learning_rate": 0.002, "loss": 2.5863, "step": 155310 }, { "epoch": 0.3094319775596073, "grad_norm": 0.1657218337059021, "learning_rate": 0.002, "loss": 2.558, "step": 155320 }, { "epoch": 0.3094518997832462, "grad_norm": 0.14899343252182007, "learning_rate": 0.002, "loss": 2.5631, "step": 155330 }, { "epoch": 0.3094718220068851, "grad_norm": 0.20287767052650452, "learning_rate": 0.002, "loss": 2.5608, "step": 155340 }, { "epoch": 0.30949174423052406, "grad_norm": 0.15829119086265564, "learning_rate": 0.002, "loss": 2.5461, "step": 155350 }, { "epoch": 0.30951166645416295, "grad_norm": 0.1666381061077118, "learning_rate": 0.002, "loss": 2.5731, "step": 155360 }, { "epoch": 0.30953158867780184, "grad_norm": 0.1679789125919342, "learning_rate": 0.002, "loss": 2.5741, "step": 155370 }, { "epoch": 0.3095515109014408, "grad_norm": 0.16351908445358276, "learning_rate": 0.002, "loss": 2.5521, "step": 155380 }, { "epoch": 0.3095714331250797, "grad_norm": 0.17139668762683868, "learning_rate": 0.002, "loss": 2.5542, "step": 155390 }, { "epoch": 0.3095913553487186, "grad_norm": 0.1789097934961319, "learning_rate": 0.002, "loss": 2.563, "step": 155400 }, { "epoch": 0.3096112775723575, "grad_norm": 0.15012343227863312, "learning_rate": 0.002, "loss": 2.5668, "step": 155410 }, { "epoch": 0.3096311997959964, "grad_norm": 0.14265786111354828, "learning_rate": 0.002, "loss": 2.5722, "step": 155420 }, { "epoch": 0.30965112201963535, "grad_norm": 0.16949674487113953, "learning_rate": 0.002, "loss": 2.5761, "step": 155430 }, { "epoch": 0.30967104424327424, "grad_norm": 0.17587317526340485, "learning_rate": 0.002, "loss": 2.5645, "step": 155440 }, { "epoch": 0.3096909664669132, "grad_norm": 0.16311903297901154, "learning_rate": 0.002, "loss": 2.5608, "step": 155450 }, { "epoch": 0.3097108886905521, "grad_norm": 0.1809464693069458, "learning_rate": 0.002, "loss": 2.552, "step": 155460 }, { "epoch": 0.309730810914191, "grad_norm": 0.156824991106987, "learning_rate": 0.002, "loss": 2.5517, "step": 155470 }, { "epoch": 0.3097507331378299, "grad_norm": 0.18040890991687775, "learning_rate": 0.002, "loss": 2.5574, "step": 155480 }, { "epoch": 0.3097706553614688, "grad_norm": 0.14136652648448944, "learning_rate": 0.002, "loss": 2.559, "step": 155490 }, { "epoch": 0.30979057758510775, "grad_norm": 0.17136722803115845, "learning_rate": 0.002, "loss": 2.5552, "step": 155500 }, { "epoch": 0.30981049980874664, "grad_norm": 0.15531790256500244, "learning_rate": 0.002, "loss": 2.5594, "step": 155510 }, { "epoch": 0.3098304220323856, "grad_norm": 0.40507808327674866, "learning_rate": 0.002, "loss": 2.56, "step": 155520 }, { "epoch": 0.3098503442560245, "grad_norm": 0.15891391038894653, "learning_rate": 0.002, "loss": 2.5633, "step": 155530 }, { "epoch": 0.30987026647966337, "grad_norm": 0.17522205412387848, "learning_rate": 0.002, "loss": 2.566, "step": 155540 }, { "epoch": 0.3098901887033023, "grad_norm": 0.16152487695217133, "learning_rate": 0.002, "loss": 2.5594, "step": 155550 }, { "epoch": 0.3099101109269412, "grad_norm": 0.14018036425113678, "learning_rate": 0.002, "loss": 2.5484, "step": 155560 }, { "epoch": 0.30993003315058015, "grad_norm": 0.15241600573062897, "learning_rate": 0.002, "loss": 2.5491, "step": 155570 }, { "epoch": 0.30994995537421904, "grad_norm": 0.19691458344459534, "learning_rate": 0.002, "loss": 2.5647, "step": 155580 }, { "epoch": 0.309969877597858, "grad_norm": 0.1536998450756073, "learning_rate": 0.002, "loss": 2.5737, "step": 155590 }, { "epoch": 0.3099897998214969, "grad_norm": 0.1689327210187912, "learning_rate": 0.002, "loss": 2.5553, "step": 155600 }, { "epoch": 0.31000972204513577, "grad_norm": 0.18551988899707794, "learning_rate": 0.002, "loss": 2.5725, "step": 155610 }, { "epoch": 0.3100296442687747, "grad_norm": 0.17902377247810364, "learning_rate": 0.002, "loss": 2.574, "step": 155620 }, { "epoch": 0.3100495664924136, "grad_norm": 0.1560482680797577, "learning_rate": 0.002, "loss": 2.5705, "step": 155630 }, { "epoch": 0.31006948871605255, "grad_norm": 0.1725250780582428, "learning_rate": 0.002, "loss": 2.5626, "step": 155640 }, { "epoch": 0.31008941093969145, "grad_norm": 0.1627543419599533, "learning_rate": 0.002, "loss": 2.559, "step": 155650 }, { "epoch": 0.31010933316333034, "grad_norm": 0.15750940144062042, "learning_rate": 0.002, "loss": 2.5521, "step": 155660 }, { "epoch": 0.3101292553869693, "grad_norm": 0.14675255119800568, "learning_rate": 0.002, "loss": 2.5738, "step": 155670 }, { "epoch": 0.3101491776106082, "grad_norm": 0.1724756956100464, "learning_rate": 0.002, "loss": 2.573, "step": 155680 }, { "epoch": 0.3101690998342471, "grad_norm": 0.1624387949705124, "learning_rate": 0.002, "loss": 2.5503, "step": 155690 }, { "epoch": 0.310189022057886, "grad_norm": 0.15674905478954315, "learning_rate": 0.002, "loss": 2.5729, "step": 155700 }, { "epoch": 0.3102089442815249, "grad_norm": 0.15700465440750122, "learning_rate": 0.002, "loss": 2.568, "step": 155710 }, { "epoch": 0.31022886650516385, "grad_norm": 0.17299184203147888, "learning_rate": 0.002, "loss": 2.5696, "step": 155720 }, { "epoch": 0.31024878872880274, "grad_norm": 0.18348293006420135, "learning_rate": 0.002, "loss": 2.5582, "step": 155730 }, { "epoch": 0.3102687109524417, "grad_norm": 0.16149936616420746, "learning_rate": 0.002, "loss": 2.5597, "step": 155740 }, { "epoch": 0.3102886331760806, "grad_norm": 0.15441466867923737, "learning_rate": 0.002, "loss": 2.5518, "step": 155750 }, { "epoch": 0.3103085553997195, "grad_norm": 0.16902591288089752, "learning_rate": 0.002, "loss": 2.5673, "step": 155760 }, { "epoch": 0.3103284776233584, "grad_norm": 0.1600186675786972, "learning_rate": 0.002, "loss": 2.5753, "step": 155770 }, { "epoch": 0.3103483998469973, "grad_norm": 0.15568724274635315, "learning_rate": 0.002, "loss": 2.5627, "step": 155780 }, { "epoch": 0.31036832207063625, "grad_norm": 0.16499273478984833, "learning_rate": 0.002, "loss": 2.5889, "step": 155790 }, { "epoch": 0.31038824429427514, "grad_norm": 0.17268013954162598, "learning_rate": 0.002, "loss": 2.5758, "step": 155800 }, { "epoch": 0.3104081665179141, "grad_norm": 0.1477573662996292, "learning_rate": 0.002, "loss": 2.5535, "step": 155810 }, { "epoch": 0.310428088741553, "grad_norm": 0.1770562380552292, "learning_rate": 0.002, "loss": 2.5579, "step": 155820 }, { "epoch": 0.31044801096519187, "grad_norm": 0.14920461177825928, "learning_rate": 0.002, "loss": 2.5563, "step": 155830 }, { "epoch": 0.3104679331888308, "grad_norm": 0.17697773873806, "learning_rate": 0.002, "loss": 2.5559, "step": 155840 }, { "epoch": 0.3104878554124697, "grad_norm": 0.15434995293617249, "learning_rate": 0.002, "loss": 2.563, "step": 155850 }, { "epoch": 0.31050777763610865, "grad_norm": 0.15251347422599792, "learning_rate": 0.002, "loss": 2.5656, "step": 155860 }, { "epoch": 0.31052769985974754, "grad_norm": 0.17353375256061554, "learning_rate": 0.002, "loss": 2.5751, "step": 155870 }, { "epoch": 0.31054762208338643, "grad_norm": 0.16209255158901215, "learning_rate": 0.002, "loss": 2.5668, "step": 155880 }, { "epoch": 0.3105675443070254, "grad_norm": 0.20836681127548218, "learning_rate": 0.002, "loss": 2.5689, "step": 155890 }, { "epoch": 0.31058746653066427, "grad_norm": 0.14487825334072113, "learning_rate": 0.002, "loss": 2.5647, "step": 155900 }, { "epoch": 0.3106073887543032, "grad_norm": 0.14233902096748352, "learning_rate": 0.002, "loss": 2.5688, "step": 155910 }, { "epoch": 0.3106273109779421, "grad_norm": 0.1544981598854065, "learning_rate": 0.002, "loss": 2.551, "step": 155920 }, { "epoch": 0.31064723320158105, "grad_norm": 0.18448664247989655, "learning_rate": 0.002, "loss": 2.5657, "step": 155930 }, { "epoch": 0.31066715542521994, "grad_norm": 0.19185154139995575, "learning_rate": 0.002, "loss": 2.555, "step": 155940 }, { "epoch": 0.31068707764885883, "grad_norm": 0.17852604389190674, "learning_rate": 0.002, "loss": 2.5689, "step": 155950 }, { "epoch": 0.3107069998724978, "grad_norm": 0.15290026366710663, "learning_rate": 0.002, "loss": 2.5648, "step": 155960 }, { "epoch": 0.31072692209613667, "grad_norm": 0.1757221221923828, "learning_rate": 0.002, "loss": 2.5563, "step": 155970 }, { "epoch": 0.3107468443197756, "grad_norm": 0.15805083513259888, "learning_rate": 0.002, "loss": 2.5621, "step": 155980 }, { "epoch": 0.3107667665434145, "grad_norm": 0.1529826670885086, "learning_rate": 0.002, "loss": 2.5654, "step": 155990 }, { "epoch": 0.3107866887670534, "grad_norm": 0.17084656655788422, "learning_rate": 0.002, "loss": 2.5787, "step": 156000 }, { "epoch": 0.31080661099069234, "grad_norm": 0.168635293841362, "learning_rate": 0.002, "loss": 2.562, "step": 156010 }, { "epoch": 0.31082653321433124, "grad_norm": 0.1502845138311386, "learning_rate": 0.002, "loss": 2.5602, "step": 156020 }, { "epoch": 0.3108464554379702, "grad_norm": 0.16862118244171143, "learning_rate": 0.002, "loss": 2.5603, "step": 156030 }, { "epoch": 0.3108663776616091, "grad_norm": 0.1621696799993515, "learning_rate": 0.002, "loss": 2.5644, "step": 156040 }, { "epoch": 0.310886299885248, "grad_norm": 0.1795773059129715, "learning_rate": 0.002, "loss": 2.5612, "step": 156050 }, { "epoch": 0.3109062221088869, "grad_norm": 0.17563003301620483, "learning_rate": 0.002, "loss": 2.5489, "step": 156060 }, { "epoch": 0.3109261443325258, "grad_norm": 0.1455945074558258, "learning_rate": 0.002, "loss": 2.5703, "step": 156070 }, { "epoch": 0.31094606655616475, "grad_norm": 0.15766239166259766, "learning_rate": 0.002, "loss": 2.5691, "step": 156080 }, { "epoch": 0.31096598877980364, "grad_norm": 0.171942800283432, "learning_rate": 0.002, "loss": 2.5558, "step": 156090 }, { "epoch": 0.3109859110034426, "grad_norm": 0.18133777379989624, "learning_rate": 0.002, "loss": 2.5552, "step": 156100 }, { "epoch": 0.3110058332270815, "grad_norm": 0.14786207675933838, "learning_rate": 0.002, "loss": 2.5553, "step": 156110 }, { "epoch": 0.31102575545072036, "grad_norm": 0.21940378844738007, "learning_rate": 0.002, "loss": 2.584, "step": 156120 }, { "epoch": 0.3110456776743593, "grad_norm": 0.1805347353219986, "learning_rate": 0.002, "loss": 2.5723, "step": 156130 }, { "epoch": 0.3110655998979982, "grad_norm": 0.15912146866321564, "learning_rate": 0.002, "loss": 2.5678, "step": 156140 }, { "epoch": 0.31108552212163715, "grad_norm": 0.15063652396202087, "learning_rate": 0.002, "loss": 2.5622, "step": 156150 }, { "epoch": 0.31110544434527604, "grad_norm": 0.22801779210567474, "learning_rate": 0.002, "loss": 2.5528, "step": 156160 }, { "epoch": 0.31112536656891493, "grad_norm": 0.14570510387420654, "learning_rate": 0.002, "loss": 2.5495, "step": 156170 }, { "epoch": 0.3111452887925539, "grad_norm": 0.18582525849342346, "learning_rate": 0.002, "loss": 2.5509, "step": 156180 }, { "epoch": 0.31116521101619277, "grad_norm": 0.15082338452339172, "learning_rate": 0.002, "loss": 2.5467, "step": 156190 }, { "epoch": 0.3111851332398317, "grad_norm": 0.17448292672634125, "learning_rate": 0.002, "loss": 2.5625, "step": 156200 }, { "epoch": 0.3112050554634706, "grad_norm": 0.1889197826385498, "learning_rate": 0.002, "loss": 2.5604, "step": 156210 }, { "epoch": 0.31122497768710955, "grad_norm": 0.16348931193351746, "learning_rate": 0.002, "loss": 2.5523, "step": 156220 }, { "epoch": 0.31124489991074844, "grad_norm": 0.16488470137119293, "learning_rate": 0.002, "loss": 2.5548, "step": 156230 }, { "epoch": 0.31126482213438733, "grad_norm": 0.18125098943710327, "learning_rate": 0.002, "loss": 2.5564, "step": 156240 }, { "epoch": 0.3112847443580263, "grad_norm": 0.15922655165195465, "learning_rate": 0.002, "loss": 2.5584, "step": 156250 }, { "epoch": 0.31130466658166517, "grad_norm": 0.16815701127052307, "learning_rate": 0.002, "loss": 2.5741, "step": 156260 }, { "epoch": 0.3113245888053041, "grad_norm": 0.1504151076078415, "learning_rate": 0.002, "loss": 2.5572, "step": 156270 }, { "epoch": 0.311344511028943, "grad_norm": 0.14590886235237122, "learning_rate": 0.002, "loss": 2.5592, "step": 156280 }, { "epoch": 0.3113644332525819, "grad_norm": 0.18265904486179352, "learning_rate": 0.002, "loss": 2.5442, "step": 156290 }, { "epoch": 0.31138435547622084, "grad_norm": 0.1620369702577591, "learning_rate": 0.002, "loss": 2.5603, "step": 156300 }, { "epoch": 0.31140427769985973, "grad_norm": 0.17733533680438995, "learning_rate": 0.002, "loss": 2.5742, "step": 156310 }, { "epoch": 0.3114241999234987, "grad_norm": 0.17035190761089325, "learning_rate": 0.002, "loss": 2.5782, "step": 156320 }, { "epoch": 0.31144412214713757, "grad_norm": 0.15576830506324768, "learning_rate": 0.002, "loss": 2.5536, "step": 156330 }, { "epoch": 0.3114640443707765, "grad_norm": 0.20487208664417267, "learning_rate": 0.002, "loss": 2.5559, "step": 156340 }, { "epoch": 0.3114839665944154, "grad_norm": 0.14615637063980103, "learning_rate": 0.002, "loss": 2.546, "step": 156350 }, { "epoch": 0.3115038888180543, "grad_norm": 0.1616392880678177, "learning_rate": 0.002, "loss": 2.557, "step": 156360 }, { "epoch": 0.31152381104169324, "grad_norm": 0.19066371023654938, "learning_rate": 0.002, "loss": 2.5537, "step": 156370 }, { "epoch": 0.31154373326533213, "grad_norm": 0.1443023979663849, "learning_rate": 0.002, "loss": 2.5759, "step": 156380 }, { "epoch": 0.3115636554889711, "grad_norm": 0.20842194557189941, "learning_rate": 0.002, "loss": 2.5753, "step": 156390 }, { "epoch": 0.31158357771260997, "grad_norm": 0.1650157868862152, "learning_rate": 0.002, "loss": 2.5645, "step": 156400 }, { "epoch": 0.31160349993624886, "grad_norm": 0.17839495837688446, "learning_rate": 0.002, "loss": 2.5692, "step": 156410 }, { "epoch": 0.3116234221598878, "grad_norm": 0.16917015612125397, "learning_rate": 0.002, "loss": 2.5662, "step": 156420 }, { "epoch": 0.3116433443835267, "grad_norm": 0.152169868350029, "learning_rate": 0.002, "loss": 2.5551, "step": 156430 }, { "epoch": 0.31166326660716565, "grad_norm": 0.14167864620685577, "learning_rate": 0.002, "loss": 2.5638, "step": 156440 }, { "epoch": 0.31168318883080454, "grad_norm": 0.19422860443592072, "learning_rate": 0.002, "loss": 2.5665, "step": 156450 }, { "epoch": 0.3117031110544434, "grad_norm": 0.17765377461910248, "learning_rate": 0.002, "loss": 2.568, "step": 156460 }, { "epoch": 0.3117230332780824, "grad_norm": 0.16905193030834198, "learning_rate": 0.002, "loss": 2.5678, "step": 156470 }, { "epoch": 0.31174295550172126, "grad_norm": 0.1616445779800415, "learning_rate": 0.002, "loss": 2.5565, "step": 156480 }, { "epoch": 0.3117628777253602, "grad_norm": 0.17058688402175903, "learning_rate": 0.002, "loss": 2.5639, "step": 156490 }, { "epoch": 0.3117827999489991, "grad_norm": 0.18819096684455872, "learning_rate": 0.002, "loss": 2.5565, "step": 156500 }, { "epoch": 0.31180272217263805, "grad_norm": 0.2189258337020874, "learning_rate": 0.002, "loss": 2.5688, "step": 156510 }, { "epoch": 0.31182264439627694, "grad_norm": 0.16717217862606049, "learning_rate": 0.002, "loss": 2.5546, "step": 156520 }, { "epoch": 0.31184256661991583, "grad_norm": 0.17488519847393036, "learning_rate": 0.002, "loss": 2.5589, "step": 156530 }, { "epoch": 0.3118624888435548, "grad_norm": 0.17287756502628326, "learning_rate": 0.002, "loss": 2.5787, "step": 156540 }, { "epoch": 0.31188241106719367, "grad_norm": 0.18649601936340332, "learning_rate": 0.002, "loss": 2.5551, "step": 156550 }, { "epoch": 0.3119023332908326, "grad_norm": 0.16536951065063477, "learning_rate": 0.002, "loss": 2.5606, "step": 156560 }, { "epoch": 0.3119222555144715, "grad_norm": 0.160433828830719, "learning_rate": 0.002, "loss": 2.5672, "step": 156570 }, { "epoch": 0.3119421777381104, "grad_norm": 0.19448670744895935, "learning_rate": 0.002, "loss": 2.5621, "step": 156580 }, { "epoch": 0.31196209996174934, "grad_norm": 0.18214087188243866, "learning_rate": 0.002, "loss": 2.5702, "step": 156590 }, { "epoch": 0.31198202218538823, "grad_norm": 0.1556110978126526, "learning_rate": 0.002, "loss": 2.5673, "step": 156600 }, { "epoch": 0.3120019444090272, "grad_norm": 0.16389857232570648, "learning_rate": 0.002, "loss": 2.547, "step": 156610 }, { "epoch": 0.31202186663266607, "grad_norm": 0.16837134957313538, "learning_rate": 0.002, "loss": 2.5755, "step": 156620 }, { "epoch": 0.31204178885630496, "grad_norm": 0.15363872051239014, "learning_rate": 0.002, "loss": 2.56, "step": 156630 }, { "epoch": 0.3120617110799439, "grad_norm": 0.1613827645778656, "learning_rate": 0.002, "loss": 2.5534, "step": 156640 }, { "epoch": 0.3120816333035828, "grad_norm": 0.16113907098770142, "learning_rate": 0.002, "loss": 2.5665, "step": 156650 }, { "epoch": 0.31210155552722174, "grad_norm": 0.23334939777851105, "learning_rate": 0.002, "loss": 2.5686, "step": 156660 }, { "epoch": 0.31212147775086063, "grad_norm": 0.16247530281543732, "learning_rate": 0.002, "loss": 2.5582, "step": 156670 }, { "epoch": 0.3121413999744996, "grad_norm": 0.16033445298671722, "learning_rate": 0.002, "loss": 2.5664, "step": 156680 }, { "epoch": 0.31216132219813847, "grad_norm": 0.1756967008113861, "learning_rate": 0.002, "loss": 2.5616, "step": 156690 }, { "epoch": 0.31218124442177736, "grad_norm": 0.151889368891716, "learning_rate": 0.002, "loss": 2.5644, "step": 156700 }, { "epoch": 0.3122011666454163, "grad_norm": 0.1763795167207718, "learning_rate": 0.002, "loss": 2.5555, "step": 156710 }, { "epoch": 0.3122210888690552, "grad_norm": 0.1837596744298935, "learning_rate": 0.002, "loss": 2.5586, "step": 156720 }, { "epoch": 0.31224101109269414, "grad_norm": 0.16366340219974518, "learning_rate": 0.002, "loss": 2.5556, "step": 156730 }, { "epoch": 0.31226093331633303, "grad_norm": 0.15037909150123596, "learning_rate": 0.002, "loss": 2.552, "step": 156740 }, { "epoch": 0.3122808555399719, "grad_norm": 0.1965627372264862, "learning_rate": 0.002, "loss": 2.5444, "step": 156750 }, { "epoch": 0.31230077776361087, "grad_norm": 0.1569821536540985, "learning_rate": 0.002, "loss": 2.5657, "step": 156760 }, { "epoch": 0.31232069998724976, "grad_norm": 0.17918996512889862, "learning_rate": 0.002, "loss": 2.5704, "step": 156770 }, { "epoch": 0.3123406222108887, "grad_norm": 0.17573890089988708, "learning_rate": 0.002, "loss": 2.5593, "step": 156780 }, { "epoch": 0.3123605444345276, "grad_norm": 0.1532944291830063, "learning_rate": 0.002, "loss": 2.5683, "step": 156790 }, { "epoch": 0.31238046665816654, "grad_norm": 0.1554030328989029, "learning_rate": 0.002, "loss": 2.5592, "step": 156800 }, { "epoch": 0.31240038888180544, "grad_norm": 0.17347511649131775, "learning_rate": 0.002, "loss": 2.5477, "step": 156810 }, { "epoch": 0.3124203111054443, "grad_norm": 0.17968960106372833, "learning_rate": 0.002, "loss": 2.5558, "step": 156820 }, { "epoch": 0.3124402333290833, "grad_norm": 0.16256265342235565, "learning_rate": 0.002, "loss": 2.5635, "step": 156830 }, { "epoch": 0.31246015555272216, "grad_norm": 0.19090521335601807, "learning_rate": 0.002, "loss": 2.5583, "step": 156840 }, { "epoch": 0.3124800777763611, "grad_norm": 0.20688845217227936, "learning_rate": 0.002, "loss": 2.5591, "step": 156850 }, { "epoch": 0.3125, "grad_norm": 0.14659027755260468, "learning_rate": 0.002, "loss": 2.5664, "step": 156860 }, { "epoch": 0.3125199222236389, "grad_norm": 0.19231322407722473, "learning_rate": 0.002, "loss": 2.5712, "step": 156870 }, { "epoch": 0.31253984444727784, "grad_norm": 0.1511104851961136, "learning_rate": 0.002, "loss": 2.5607, "step": 156880 }, { "epoch": 0.3125597666709167, "grad_norm": 0.14535656571388245, "learning_rate": 0.002, "loss": 2.554, "step": 156890 }, { "epoch": 0.3125796888945557, "grad_norm": 0.17615479230880737, "learning_rate": 0.002, "loss": 2.5682, "step": 156900 }, { "epoch": 0.31259961111819456, "grad_norm": 0.1919853389263153, "learning_rate": 0.002, "loss": 2.5681, "step": 156910 }, { "epoch": 0.31261953334183346, "grad_norm": 0.17959731817245483, "learning_rate": 0.002, "loss": 2.5552, "step": 156920 }, { "epoch": 0.3126394555654724, "grad_norm": 0.16345392167568207, "learning_rate": 0.002, "loss": 2.5569, "step": 156930 }, { "epoch": 0.3126593777891113, "grad_norm": 0.15035851299762726, "learning_rate": 0.002, "loss": 2.5528, "step": 156940 }, { "epoch": 0.31267930001275024, "grad_norm": 0.18120911717414856, "learning_rate": 0.002, "loss": 2.5797, "step": 156950 }, { "epoch": 0.31269922223638913, "grad_norm": 0.1989908218383789, "learning_rate": 0.002, "loss": 2.5655, "step": 156960 }, { "epoch": 0.3127191444600281, "grad_norm": 0.1555190086364746, "learning_rate": 0.002, "loss": 2.5678, "step": 156970 }, { "epoch": 0.31273906668366697, "grad_norm": 0.15588991343975067, "learning_rate": 0.002, "loss": 2.5689, "step": 156980 }, { "epoch": 0.31275898890730586, "grad_norm": 0.1899508833885193, "learning_rate": 0.002, "loss": 2.5518, "step": 156990 }, { "epoch": 0.3127789111309448, "grad_norm": 0.18083438277244568, "learning_rate": 0.002, "loss": 2.5747, "step": 157000 }, { "epoch": 0.3127988333545837, "grad_norm": 0.1569991111755371, "learning_rate": 0.002, "loss": 2.5566, "step": 157010 }, { "epoch": 0.31281875557822264, "grad_norm": 0.1973470151424408, "learning_rate": 0.002, "loss": 2.5576, "step": 157020 }, { "epoch": 0.31283867780186153, "grad_norm": 0.1670352816581726, "learning_rate": 0.002, "loss": 2.5532, "step": 157030 }, { "epoch": 0.3128586000255004, "grad_norm": 0.20455235242843628, "learning_rate": 0.002, "loss": 2.5594, "step": 157040 }, { "epoch": 0.31287852224913937, "grad_norm": 0.15381218492984772, "learning_rate": 0.002, "loss": 2.568, "step": 157050 }, { "epoch": 0.31289844447277826, "grad_norm": 0.15013188123703003, "learning_rate": 0.002, "loss": 2.5684, "step": 157060 }, { "epoch": 0.3129183666964172, "grad_norm": 0.1900729387998581, "learning_rate": 0.002, "loss": 2.5578, "step": 157070 }, { "epoch": 0.3129382889200561, "grad_norm": 0.17660100758075714, "learning_rate": 0.002, "loss": 2.562, "step": 157080 }, { "epoch": 0.31295821114369504, "grad_norm": 0.17464682459831238, "learning_rate": 0.002, "loss": 2.5598, "step": 157090 }, { "epoch": 0.31297813336733393, "grad_norm": 0.5820977091789246, "learning_rate": 0.002, "loss": 2.5679, "step": 157100 }, { "epoch": 0.3129980555909728, "grad_norm": 0.18344441056251526, "learning_rate": 0.002, "loss": 2.5719, "step": 157110 }, { "epoch": 0.31301797781461177, "grad_norm": 0.14689868688583374, "learning_rate": 0.002, "loss": 2.5628, "step": 157120 }, { "epoch": 0.31303790003825066, "grad_norm": 0.1555059552192688, "learning_rate": 0.002, "loss": 2.5546, "step": 157130 }, { "epoch": 0.3130578222618896, "grad_norm": 0.15744556486606598, "learning_rate": 0.002, "loss": 2.5646, "step": 157140 }, { "epoch": 0.3130777444855285, "grad_norm": 0.16590583324432373, "learning_rate": 0.002, "loss": 2.5668, "step": 157150 }, { "epoch": 0.3130976667091674, "grad_norm": 0.1603497862815857, "learning_rate": 0.002, "loss": 2.5605, "step": 157160 }, { "epoch": 0.31311758893280633, "grad_norm": 0.14814890921115875, "learning_rate": 0.002, "loss": 2.5679, "step": 157170 }, { "epoch": 0.3131375111564452, "grad_norm": 0.15978173911571503, "learning_rate": 0.002, "loss": 2.5693, "step": 157180 }, { "epoch": 0.31315743338008417, "grad_norm": 0.16867583990097046, "learning_rate": 0.002, "loss": 2.5691, "step": 157190 }, { "epoch": 0.31317735560372306, "grad_norm": 0.192554771900177, "learning_rate": 0.002, "loss": 2.5648, "step": 157200 }, { "epoch": 0.31319727782736195, "grad_norm": 0.14830268919467926, "learning_rate": 0.002, "loss": 2.5645, "step": 157210 }, { "epoch": 0.3132172000510009, "grad_norm": 0.1764550507068634, "learning_rate": 0.002, "loss": 2.5558, "step": 157220 }, { "epoch": 0.3132371222746398, "grad_norm": 0.17110390961170197, "learning_rate": 0.002, "loss": 2.5618, "step": 157230 }, { "epoch": 0.31325704449827874, "grad_norm": 0.14394782483577728, "learning_rate": 0.002, "loss": 2.5647, "step": 157240 }, { "epoch": 0.3132769667219176, "grad_norm": 0.16549508273601532, "learning_rate": 0.002, "loss": 2.5732, "step": 157250 }, { "epoch": 0.3132968889455566, "grad_norm": 0.20328480005264282, "learning_rate": 0.002, "loss": 2.5634, "step": 157260 }, { "epoch": 0.31331681116919546, "grad_norm": 0.14937278628349304, "learning_rate": 0.002, "loss": 2.5538, "step": 157270 }, { "epoch": 0.31333673339283435, "grad_norm": 0.15106412768363953, "learning_rate": 0.002, "loss": 2.5709, "step": 157280 }, { "epoch": 0.3133566556164733, "grad_norm": 0.16293449699878693, "learning_rate": 0.002, "loss": 2.5732, "step": 157290 }, { "epoch": 0.3133765778401122, "grad_norm": 0.19531814754009247, "learning_rate": 0.002, "loss": 2.5569, "step": 157300 }, { "epoch": 0.31339650006375114, "grad_norm": 0.1939082145690918, "learning_rate": 0.002, "loss": 2.5422, "step": 157310 }, { "epoch": 0.31341642228739003, "grad_norm": 0.19934749603271484, "learning_rate": 0.002, "loss": 2.5641, "step": 157320 }, { "epoch": 0.3134363445110289, "grad_norm": 0.21389971673488617, "learning_rate": 0.002, "loss": 2.5696, "step": 157330 }, { "epoch": 0.31345626673466787, "grad_norm": 0.17203862965106964, "learning_rate": 0.002, "loss": 2.5615, "step": 157340 }, { "epoch": 0.31347618895830676, "grad_norm": 0.16261498630046844, "learning_rate": 0.002, "loss": 2.5633, "step": 157350 }, { "epoch": 0.3134961111819457, "grad_norm": 0.14798331260681152, "learning_rate": 0.002, "loss": 2.5547, "step": 157360 }, { "epoch": 0.3135160334055846, "grad_norm": 0.1512799710035324, "learning_rate": 0.002, "loss": 2.5575, "step": 157370 }, { "epoch": 0.3135359556292235, "grad_norm": 0.1628936529159546, "learning_rate": 0.002, "loss": 2.553, "step": 157380 }, { "epoch": 0.31355587785286243, "grad_norm": 0.1644279509782791, "learning_rate": 0.002, "loss": 2.5376, "step": 157390 }, { "epoch": 0.3135758000765013, "grad_norm": 0.1928325593471527, "learning_rate": 0.002, "loss": 2.5757, "step": 157400 }, { "epoch": 0.31359572230014027, "grad_norm": 0.15948545932769775, "learning_rate": 0.002, "loss": 2.5628, "step": 157410 }, { "epoch": 0.31361564452377916, "grad_norm": 0.1697506159543991, "learning_rate": 0.002, "loss": 2.5512, "step": 157420 }, { "epoch": 0.3136355667474181, "grad_norm": 0.14877428114414215, "learning_rate": 0.002, "loss": 2.5724, "step": 157430 }, { "epoch": 0.313655488971057, "grad_norm": 0.17114369571208954, "learning_rate": 0.002, "loss": 2.5723, "step": 157440 }, { "epoch": 0.3136754111946959, "grad_norm": 0.15863382816314697, "learning_rate": 0.002, "loss": 2.5568, "step": 157450 }, { "epoch": 0.31369533341833483, "grad_norm": 0.1535607874393463, "learning_rate": 0.002, "loss": 2.5515, "step": 157460 }, { "epoch": 0.3137152556419737, "grad_norm": 0.15240414440631866, "learning_rate": 0.002, "loss": 2.5713, "step": 157470 }, { "epoch": 0.31373517786561267, "grad_norm": 0.17258815467357635, "learning_rate": 0.002, "loss": 2.5627, "step": 157480 }, { "epoch": 0.31375510008925156, "grad_norm": 0.16456717252731323, "learning_rate": 0.002, "loss": 2.5562, "step": 157490 }, { "epoch": 0.31377502231289045, "grad_norm": 0.16036079823970795, "learning_rate": 0.002, "loss": 2.555, "step": 157500 }, { "epoch": 0.3137949445365294, "grad_norm": 0.18647299706935883, "learning_rate": 0.002, "loss": 2.549, "step": 157510 }, { "epoch": 0.3138148667601683, "grad_norm": 0.1844732016324997, "learning_rate": 0.002, "loss": 2.5535, "step": 157520 }, { "epoch": 0.31383478898380723, "grad_norm": 0.20998218655586243, "learning_rate": 0.002, "loss": 2.5564, "step": 157530 }, { "epoch": 0.3138547112074461, "grad_norm": 0.15233223140239716, "learning_rate": 0.002, "loss": 2.5644, "step": 157540 }, { "epoch": 0.31387463343108507, "grad_norm": 0.18096496164798737, "learning_rate": 0.002, "loss": 2.5636, "step": 157550 }, { "epoch": 0.31389455565472396, "grad_norm": 0.16953182220458984, "learning_rate": 0.002, "loss": 2.5553, "step": 157560 }, { "epoch": 0.31391447787836285, "grad_norm": 0.17641225457191467, "learning_rate": 0.002, "loss": 2.5593, "step": 157570 }, { "epoch": 0.3139344001020018, "grad_norm": 0.1494104117155075, "learning_rate": 0.002, "loss": 2.566, "step": 157580 }, { "epoch": 0.3139543223256407, "grad_norm": 0.1629096120595932, "learning_rate": 0.002, "loss": 2.5526, "step": 157590 }, { "epoch": 0.31397424454927964, "grad_norm": 0.1550700068473816, "learning_rate": 0.002, "loss": 2.5682, "step": 157600 }, { "epoch": 0.3139941667729185, "grad_norm": 0.14477789402008057, "learning_rate": 0.002, "loss": 2.5512, "step": 157610 }, { "epoch": 0.3140140889965574, "grad_norm": 0.1665681004524231, "learning_rate": 0.002, "loss": 2.5586, "step": 157620 }, { "epoch": 0.31403401122019636, "grad_norm": 0.1744915097951889, "learning_rate": 0.002, "loss": 2.5494, "step": 157630 }, { "epoch": 0.31405393344383525, "grad_norm": 0.1933743953704834, "learning_rate": 0.002, "loss": 2.565, "step": 157640 }, { "epoch": 0.3140738556674742, "grad_norm": 0.16491249203681946, "learning_rate": 0.002, "loss": 2.5583, "step": 157650 }, { "epoch": 0.3140937778911131, "grad_norm": 0.17391864955425262, "learning_rate": 0.002, "loss": 2.5663, "step": 157660 }, { "epoch": 0.314113700114752, "grad_norm": 0.16037939488887787, "learning_rate": 0.002, "loss": 2.5623, "step": 157670 }, { "epoch": 0.3141336223383909, "grad_norm": 0.15277329087257385, "learning_rate": 0.002, "loss": 2.5557, "step": 157680 }, { "epoch": 0.3141535445620298, "grad_norm": 0.1694074422121048, "learning_rate": 0.002, "loss": 2.5595, "step": 157690 }, { "epoch": 0.31417346678566876, "grad_norm": 0.24586135149002075, "learning_rate": 0.002, "loss": 2.5644, "step": 157700 }, { "epoch": 0.31419338900930766, "grad_norm": 0.2121090441942215, "learning_rate": 0.002, "loss": 2.5676, "step": 157710 }, { "epoch": 0.3142133112329466, "grad_norm": 0.1645212173461914, "learning_rate": 0.002, "loss": 2.5618, "step": 157720 }, { "epoch": 0.3142332334565855, "grad_norm": 0.16393926739692688, "learning_rate": 0.002, "loss": 2.5716, "step": 157730 }, { "epoch": 0.3142531556802244, "grad_norm": 0.16717174649238586, "learning_rate": 0.002, "loss": 2.5542, "step": 157740 }, { "epoch": 0.31427307790386333, "grad_norm": 0.15057039260864258, "learning_rate": 0.002, "loss": 2.5549, "step": 157750 }, { "epoch": 0.3142930001275022, "grad_norm": 0.18550334870815277, "learning_rate": 0.002, "loss": 2.5356, "step": 157760 }, { "epoch": 0.31431292235114117, "grad_norm": 0.1462491899728775, "learning_rate": 0.002, "loss": 2.5688, "step": 157770 }, { "epoch": 0.31433284457478006, "grad_norm": 0.2087540328502655, "learning_rate": 0.002, "loss": 2.5809, "step": 157780 }, { "epoch": 0.31435276679841895, "grad_norm": 0.1759607493877411, "learning_rate": 0.002, "loss": 2.5585, "step": 157790 }, { "epoch": 0.3143726890220579, "grad_norm": 0.1584227979183197, "learning_rate": 0.002, "loss": 2.558, "step": 157800 }, { "epoch": 0.3143926112456968, "grad_norm": 0.20335078239440918, "learning_rate": 0.002, "loss": 2.5575, "step": 157810 }, { "epoch": 0.31441253346933573, "grad_norm": 0.15744319558143616, "learning_rate": 0.002, "loss": 2.565, "step": 157820 }, { "epoch": 0.3144324556929746, "grad_norm": 0.15940916538238525, "learning_rate": 0.002, "loss": 2.5552, "step": 157830 }, { "epoch": 0.31445237791661357, "grad_norm": 0.15091514587402344, "learning_rate": 0.002, "loss": 2.5688, "step": 157840 }, { "epoch": 0.31447230014025246, "grad_norm": 0.16192087531089783, "learning_rate": 0.002, "loss": 2.5725, "step": 157850 }, { "epoch": 0.31449222236389135, "grad_norm": 0.18994933366775513, "learning_rate": 0.002, "loss": 2.5665, "step": 157860 }, { "epoch": 0.3145121445875303, "grad_norm": 0.15888434648513794, "learning_rate": 0.002, "loss": 2.5604, "step": 157870 }, { "epoch": 0.3145320668111692, "grad_norm": 0.19712378084659576, "learning_rate": 0.002, "loss": 2.5596, "step": 157880 }, { "epoch": 0.31455198903480813, "grad_norm": 0.15638816356658936, "learning_rate": 0.002, "loss": 2.5708, "step": 157890 }, { "epoch": 0.314571911258447, "grad_norm": 0.19904766976833344, "learning_rate": 0.002, "loss": 2.5708, "step": 157900 }, { "epoch": 0.3145918334820859, "grad_norm": 0.16607359051704407, "learning_rate": 0.002, "loss": 2.5479, "step": 157910 }, { "epoch": 0.31461175570572486, "grad_norm": 0.15382690727710724, "learning_rate": 0.002, "loss": 2.5632, "step": 157920 }, { "epoch": 0.31463167792936375, "grad_norm": 0.18436981737613678, "learning_rate": 0.002, "loss": 2.5616, "step": 157930 }, { "epoch": 0.3146516001530027, "grad_norm": 0.15982870757579803, "learning_rate": 0.002, "loss": 2.5665, "step": 157940 }, { "epoch": 0.3146715223766416, "grad_norm": 0.15979471802711487, "learning_rate": 0.002, "loss": 2.5551, "step": 157950 }, { "epoch": 0.3146914446002805, "grad_norm": 0.15686897933483124, "learning_rate": 0.002, "loss": 2.5558, "step": 157960 }, { "epoch": 0.3147113668239194, "grad_norm": 0.18662676215171814, "learning_rate": 0.002, "loss": 2.5705, "step": 157970 }, { "epoch": 0.3147312890475583, "grad_norm": 0.20740777254104614, "learning_rate": 0.002, "loss": 2.5655, "step": 157980 }, { "epoch": 0.31475121127119726, "grad_norm": 0.14688633382320404, "learning_rate": 0.002, "loss": 2.5543, "step": 157990 }, { "epoch": 0.31477113349483615, "grad_norm": 0.18304336071014404, "learning_rate": 0.002, "loss": 2.5521, "step": 158000 }, { "epoch": 0.3147910557184751, "grad_norm": 0.17211033403873444, "learning_rate": 0.002, "loss": 2.5497, "step": 158010 }, { "epoch": 0.314810977942114, "grad_norm": 0.18197298049926758, "learning_rate": 0.002, "loss": 2.5629, "step": 158020 }, { "epoch": 0.3148309001657529, "grad_norm": 0.17968617379665375, "learning_rate": 0.002, "loss": 2.5549, "step": 158030 }, { "epoch": 0.3148508223893918, "grad_norm": 0.16741135716438293, "learning_rate": 0.002, "loss": 2.5861, "step": 158040 }, { "epoch": 0.3148707446130307, "grad_norm": 0.1513260453939438, "learning_rate": 0.002, "loss": 2.5629, "step": 158050 }, { "epoch": 0.31489066683666966, "grad_norm": 0.1522570252418518, "learning_rate": 0.002, "loss": 2.5543, "step": 158060 }, { "epoch": 0.31491058906030855, "grad_norm": 0.1445297747850418, "learning_rate": 0.002, "loss": 2.5617, "step": 158070 }, { "epoch": 0.31493051128394745, "grad_norm": 0.16758640110492706, "learning_rate": 0.002, "loss": 2.5548, "step": 158080 }, { "epoch": 0.3149504335075864, "grad_norm": 0.1695052683353424, "learning_rate": 0.002, "loss": 2.5774, "step": 158090 }, { "epoch": 0.3149703557312253, "grad_norm": 0.15314814448356628, "learning_rate": 0.002, "loss": 2.5632, "step": 158100 }, { "epoch": 0.31499027795486423, "grad_norm": 0.17443028092384338, "learning_rate": 0.002, "loss": 2.5613, "step": 158110 }, { "epoch": 0.3150102001785031, "grad_norm": 0.16482721269130707, "learning_rate": 0.002, "loss": 2.5565, "step": 158120 }, { "epoch": 0.315030122402142, "grad_norm": 0.1617262363433838, "learning_rate": 0.002, "loss": 2.5622, "step": 158130 }, { "epoch": 0.31505004462578096, "grad_norm": 0.15208600461483002, "learning_rate": 0.002, "loss": 2.5617, "step": 158140 }, { "epoch": 0.31506996684941985, "grad_norm": 0.16190622746944427, "learning_rate": 0.002, "loss": 2.5492, "step": 158150 }, { "epoch": 0.3150898890730588, "grad_norm": 0.21048618853092194, "learning_rate": 0.002, "loss": 2.5651, "step": 158160 }, { "epoch": 0.3151098112966977, "grad_norm": 0.16357740759849548, "learning_rate": 0.002, "loss": 2.5606, "step": 158170 }, { "epoch": 0.31512973352033663, "grad_norm": 0.20645998418331146, "learning_rate": 0.002, "loss": 2.5633, "step": 158180 }, { "epoch": 0.3151496557439755, "grad_norm": 0.16237881779670715, "learning_rate": 0.002, "loss": 2.5481, "step": 158190 }, { "epoch": 0.3151695779676144, "grad_norm": 0.19931940734386444, "learning_rate": 0.002, "loss": 2.5529, "step": 158200 }, { "epoch": 0.31518950019125336, "grad_norm": 0.15162692964076996, "learning_rate": 0.002, "loss": 2.5781, "step": 158210 }, { "epoch": 0.31520942241489225, "grad_norm": 0.1745939701795578, "learning_rate": 0.002, "loss": 2.5508, "step": 158220 }, { "epoch": 0.3152293446385312, "grad_norm": 0.16013646125793457, "learning_rate": 0.002, "loss": 2.5516, "step": 158230 }, { "epoch": 0.3152492668621701, "grad_norm": 0.1671905815601349, "learning_rate": 0.002, "loss": 2.5517, "step": 158240 }, { "epoch": 0.315269189085809, "grad_norm": 0.20421141386032104, "learning_rate": 0.002, "loss": 2.5703, "step": 158250 }, { "epoch": 0.3152891113094479, "grad_norm": 0.17917500436306, "learning_rate": 0.002, "loss": 2.5458, "step": 158260 }, { "epoch": 0.3153090335330868, "grad_norm": 0.14379741251468658, "learning_rate": 0.002, "loss": 2.564, "step": 158270 }, { "epoch": 0.31532895575672576, "grad_norm": 0.1546134501695633, "learning_rate": 0.002, "loss": 2.5642, "step": 158280 }, { "epoch": 0.31534887798036465, "grad_norm": 0.1674443483352661, "learning_rate": 0.002, "loss": 2.5733, "step": 158290 }, { "epoch": 0.3153688002040036, "grad_norm": 0.17368195950984955, "learning_rate": 0.002, "loss": 2.5561, "step": 158300 }, { "epoch": 0.3153887224276425, "grad_norm": 0.1969294399023056, "learning_rate": 0.002, "loss": 2.5537, "step": 158310 }, { "epoch": 0.3154086446512814, "grad_norm": 0.18175220489501953, "learning_rate": 0.002, "loss": 2.5632, "step": 158320 }, { "epoch": 0.3154285668749203, "grad_norm": 0.15639394521713257, "learning_rate": 0.002, "loss": 2.56, "step": 158330 }, { "epoch": 0.3154484890985592, "grad_norm": 0.21006810665130615, "learning_rate": 0.002, "loss": 2.5453, "step": 158340 }, { "epoch": 0.31546841132219816, "grad_norm": 0.1543159931898117, "learning_rate": 0.002, "loss": 2.5621, "step": 158350 }, { "epoch": 0.31548833354583705, "grad_norm": 0.16397185623645782, "learning_rate": 0.002, "loss": 2.5586, "step": 158360 }, { "epoch": 0.31550825576947594, "grad_norm": 0.19331349432468414, "learning_rate": 0.002, "loss": 2.5505, "step": 158370 }, { "epoch": 0.3155281779931149, "grad_norm": 0.1984233856201172, "learning_rate": 0.002, "loss": 2.5612, "step": 158380 }, { "epoch": 0.3155481002167538, "grad_norm": 0.1626904159784317, "learning_rate": 0.002, "loss": 2.5724, "step": 158390 }, { "epoch": 0.3155680224403927, "grad_norm": 0.16194786131381989, "learning_rate": 0.002, "loss": 2.5691, "step": 158400 }, { "epoch": 0.3155879446640316, "grad_norm": 0.16829226911067963, "learning_rate": 0.002, "loss": 2.5665, "step": 158410 }, { "epoch": 0.3156078668876705, "grad_norm": 0.16278792917728424, "learning_rate": 0.002, "loss": 2.5562, "step": 158420 }, { "epoch": 0.31562778911130945, "grad_norm": 0.17688366770744324, "learning_rate": 0.002, "loss": 2.5717, "step": 158430 }, { "epoch": 0.31564771133494834, "grad_norm": 0.15380994975566864, "learning_rate": 0.002, "loss": 2.5565, "step": 158440 }, { "epoch": 0.3156676335585873, "grad_norm": 0.18552149832248688, "learning_rate": 0.002, "loss": 2.5595, "step": 158450 }, { "epoch": 0.3156875557822262, "grad_norm": 0.16374890506267548, "learning_rate": 0.002, "loss": 2.5648, "step": 158460 }, { "epoch": 0.3157074780058651, "grad_norm": 0.19462573528289795, "learning_rate": 0.002, "loss": 2.563, "step": 158470 }, { "epoch": 0.315727400229504, "grad_norm": 0.16955021023750305, "learning_rate": 0.002, "loss": 2.5659, "step": 158480 }, { "epoch": 0.3157473224531429, "grad_norm": 0.1670476794242859, "learning_rate": 0.002, "loss": 2.5549, "step": 158490 }, { "epoch": 0.31576724467678186, "grad_norm": 0.19620293378829956, "learning_rate": 0.002, "loss": 2.5584, "step": 158500 }, { "epoch": 0.31578716690042075, "grad_norm": 0.18216733634471893, "learning_rate": 0.002, "loss": 2.5565, "step": 158510 }, { "epoch": 0.3158070891240597, "grad_norm": 0.1603134423494339, "learning_rate": 0.002, "loss": 2.5533, "step": 158520 }, { "epoch": 0.3158270113476986, "grad_norm": 0.20030727982521057, "learning_rate": 0.002, "loss": 2.5624, "step": 158530 }, { "epoch": 0.3158469335713375, "grad_norm": 0.17899426817893982, "learning_rate": 0.002, "loss": 2.5766, "step": 158540 }, { "epoch": 0.3158668557949764, "grad_norm": 0.16793464124202728, "learning_rate": 0.002, "loss": 2.5645, "step": 158550 }, { "epoch": 0.3158867780186153, "grad_norm": 0.16282546520233154, "learning_rate": 0.002, "loss": 2.5464, "step": 158560 }, { "epoch": 0.31590670024225426, "grad_norm": 0.17057478427886963, "learning_rate": 0.002, "loss": 2.5562, "step": 158570 }, { "epoch": 0.31592662246589315, "grad_norm": 0.13925160467624664, "learning_rate": 0.002, "loss": 2.5865, "step": 158580 }, { "epoch": 0.3159465446895321, "grad_norm": 0.18461033701896667, "learning_rate": 0.002, "loss": 2.5612, "step": 158590 }, { "epoch": 0.315966466913171, "grad_norm": 0.1555943638086319, "learning_rate": 0.002, "loss": 2.5726, "step": 158600 }, { "epoch": 0.3159863891368099, "grad_norm": 0.18868939578533173, "learning_rate": 0.002, "loss": 2.5405, "step": 158610 }, { "epoch": 0.3160063113604488, "grad_norm": 0.18626096844673157, "learning_rate": 0.002, "loss": 2.5534, "step": 158620 }, { "epoch": 0.3160262335840877, "grad_norm": 0.14614218473434448, "learning_rate": 0.002, "loss": 2.5531, "step": 158630 }, { "epoch": 0.31604615580772666, "grad_norm": 0.1697119027376175, "learning_rate": 0.002, "loss": 2.5665, "step": 158640 }, { "epoch": 0.31606607803136555, "grad_norm": 0.18253971636295319, "learning_rate": 0.002, "loss": 2.5832, "step": 158650 }, { "epoch": 0.31608600025500444, "grad_norm": 0.16132324934005737, "learning_rate": 0.002, "loss": 2.5611, "step": 158660 }, { "epoch": 0.3161059224786434, "grad_norm": 0.16768652200698853, "learning_rate": 0.002, "loss": 2.5642, "step": 158670 }, { "epoch": 0.3161258447022823, "grad_norm": 0.18971849977970123, "learning_rate": 0.002, "loss": 2.5548, "step": 158680 }, { "epoch": 0.3161457669259212, "grad_norm": 0.16694264113903046, "learning_rate": 0.002, "loss": 2.5498, "step": 158690 }, { "epoch": 0.3161656891495601, "grad_norm": 0.15524010360240936, "learning_rate": 0.002, "loss": 2.5769, "step": 158700 }, { "epoch": 0.316185611373199, "grad_norm": 0.17972569167613983, "learning_rate": 0.002, "loss": 2.562, "step": 158710 }, { "epoch": 0.31620553359683795, "grad_norm": 0.1958874613046646, "learning_rate": 0.002, "loss": 2.5849, "step": 158720 }, { "epoch": 0.31622545582047684, "grad_norm": 0.19628788530826569, "learning_rate": 0.002, "loss": 2.5677, "step": 158730 }, { "epoch": 0.3162453780441158, "grad_norm": 0.17661158740520477, "learning_rate": 0.002, "loss": 2.5743, "step": 158740 }, { "epoch": 0.3162653002677547, "grad_norm": 0.1793961077928543, "learning_rate": 0.002, "loss": 2.5662, "step": 158750 }, { "epoch": 0.3162852224913936, "grad_norm": 0.16166085004806519, "learning_rate": 0.002, "loss": 2.5681, "step": 158760 }, { "epoch": 0.3163051447150325, "grad_norm": 0.193276047706604, "learning_rate": 0.002, "loss": 2.5744, "step": 158770 }, { "epoch": 0.3163250669386714, "grad_norm": 0.14810584485530853, "learning_rate": 0.002, "loss": 2.558, "step": 158780 }, { "epoch": 0.31634498916231035, "grad_norm": 0.15386423468589783, "learning_rate": 0.002, "loss": 2.5782, "step": 158790 }, { "epoch": 0.31636491138594924, "grad_norm": 0.1482681781053543, "learning_rate": 0.002, "loss": 2.5763, "step": 158800 }, { "epoch": 0.3163848336095882, "grad_norm": 0.1576385498046875, "learning_rate": 0.002, "loss": 2.5505, "step": 158810 }, { "epoch": 0.3164047558332271, "grad_norm": 0.1670500785112381, "learning_rate": 0.002, "loss": 2.5665, "step": 158820 }, { "epoch": 0.31642467805686597, "grad_norm": 0.14864201843738556, "learning_rate": 0.002, "loss": 2.5584, "step": 158830 }, { "epoch": 0.3164446002805049, "grad_norm": 0.16882315278053284, "learning_rate": 0.002, "loss": 2.5614, "step": 158840 }, { "epoch": 0.3164645225041438, "grad_norm": 0.14890331029891968, "learning_rate": 0.002, "loss": 2.5642, "step": 158850 }, { "epoch": 0.31648444472778275, "grad_norm": 0.18866509199142456, "learning_rate": 0.002, "loss": 2.563, "step": 158860 }, { "epoch": 0.31650436695142165, "grad_norm": 0.14108797907829285, "learning_rate": 0.002, "loss": 2.5553, "step": 158870 }, { "epoch": 0.31652428917506054, "grad_norm": 0.18048295378684998, "learning_rate": 0.002, "loss": 2.5604, "step": 158880 }, { "epoch": 0.3165442113986995, "grad_norm": 0.17196640372276306, "learning_rate": 0.002, "loss": 2.5672, "step": 158890 }, { "epoch": 0.3165641336223384, "grad_norm": 0.15277712047100067, "learning_rate": 0.002, "loss": 2.5561, "step": 158900 }, { "epoch": 0.3165840558459773, "grad_norm": 0.18725161254405975, "learning_rate": 0.002, "loss": 2.5746, "step": 158910 }, { "epoch": 0.3166039780696162, "grad_norm": 0.14258575439453125, "learning_rate": 0.002, "loss": 2.5471, "step": 158920 }, { "epoch": 0.31662390029325516, "grad_norm": 0.20303115248680115, "learning_rate": 0.002, "loss": 2.5517, "step": 158930 }, { "epoch": 0.31664382251689405, "grad_norm": 0.18284395337104797, "learning_rate": 0.002, "loss": 2.5536, "step": 158940 }, { "epoch": 0.31666374474053294, "grad_norm": 0.17859132587909698, "learning_rate": 0.002, "loss": 2.5712, "step": 158950 }, { "epoch": 0.3166836669641719, "grad_norm": 0.13565053045749664, "learning_rate": 0.002, "loss": 2.5662, "step": 158960 }, { "epoch": 0.3167035891878108, "grad_norm": 0.14340192079544067, "learning_rate": 0.002, "loss": 2.5603, "step": 158970 }, { "epoch": 0.3167235114114497, "grad_norm": 0.18619796633720398, "learning_rate": 0.002, "loss": 2.5391, "step": 158980 }, { "epoch": 0.3167434336350886, "grad_norm": 0.16098767518997192, "learning_rate": 0.002, "loss": 2.568, "step": 158990 }, { "epoch": 0.3167633558587275, "grad_norm": 0.16467876732349396, "learning_rate": 0.002, "loss": 2.5706, "step": 159000 }, { "epoch": 0.31678327808236645, "grad_norm": 0.17500482499599457, "learning_rate": 0.002, "loss": 2.5719, "step": 159010 }, { "epoch": 0.31680320030600534, "grad_norm": 0.17827975749969482, "learning_rate": 0.002, "loss": 2.58, "step": 159020 }, { "epoch": 0.3168231225296443, "grad_norm": 0.1849500834941864, "learning_rate": 0.002, "loss": 2.5742, "step": 159030 }, { "epoch": 0.3168430447532832, "grad_norm": 0.16205035150051117, "learning_rate": 0.002, "loss": 2.5631, "step": 159040 }, { "epoch": 0.3168629669769221, "grad_norm": 0.18053829669952393, "learning_rate": 0.002, "loss": 2.5491, "step": 159050 }, { "epoch": 0.316882889200561, "grad_norm": 0.16040614247322083, "learning_rate": 0.002, "loss": 2.5682, "step": 159060 }, { "epoch": 0.3169028114241999, "grad_norm": 0.15936875343322754, "learning_rate": 0.002, "loss": 2.5422, "step": 159070 }, { "epoch": 0.31692273364783885, "grad_norm": 0.17203250527381897, "learning_rate": 0.002, "loss": 2.563, "step": 159080 }, { "epoch": 0.31694265587147774, "grad_norm": 0.16023136675357819, "learning_rate": 0.002, "loss": 2.5671, "step": 159090 }, { "epoch": 0.3169625780951167, "grad_norm": 0.1515296995639801, "learning_rate": 0.002, "loss": 2.5604, "step": 159100 }, { "epoch": 0.3169825003187556, "grad_norm": 0.16499438881874084, "learning_rate": 0.002, "loss": 2.57, "step": 159110 }, { "epoch": 0.31700242254239447, "grad_norm": 0.15719623863697052, "learning_rate": 0.002, "loss": 2.5528, "step": 159120 }, { "epoch": 0.3170223447660334, "grad_norm": 0.1770617961883545, "learning_rate": 0.002, "loss": 2.5712, "step": 159130 }, { "epoch": 0.3170422669896723, "grad_norm": 0.17188818752765656, "learning_rate": 0.002, "loss": 2.5696, "step": 159140 }, { "epoch": 0.31706218921331125, "grad_norm": 0.17225366830825806, "learning_rate": 0.002, "loss": 2.5667, "step": 159150 }, { "epoch": 0.31708211143695014, "grad_norm": 0.19394905865192413, "learning_rate": 0.002, "loss": 2.5644, "step": 159160 }, { "epoch": 0.31710203366058903, "grad_norm": 0.17967593669891357, "learning_rate": 0.002, "loss": 2.5627, "step": 159170 }, { "epoch": 0.317121955884228, "grad_norm": 0.1787864863872528, "learning_rate": 0.002, "loss": 2.5558, "step": 159180 }, { "epoch": 0.31714187810786687, "grad_norm": 0.16465714573860168, "learning_rate": 0.002, "loss": 2.5469, "step": 159190 }, { "epoch": 0.3171618003315058, "grad_norm": 0.15331928431987762, "learning_rate": 0.002, "loss": 2.5495, "step": 159200 }, { "epoch": 0.3171817225551447, "grad_norm": 0.174157053232193, "learning_rate": 0.002, "loss": 2.5518, "step": 159210 }, { "epoch": 0.31720164477878365, "grad_norm": 0.16616034507751465, "learning_rate": 0.002, "loss": 2.5731, "step": 159220 }, { "epoch": 0.31722156700242254, "grad_norm": 0.14904634654521942, "learning_rate": 0.002, "loss": 2.5679, "step": 159230 }, { "epoch": 0.31724148922606143, "grad_norm": 0.16530358791351318, "learning_rate": 0.002, "loss": 2.5518, "step": 159240 }, { "epoch": 0.3172614114497004, "grad_norm": 0.1730533242225647, "learning_rate": 0.002, "loss": 2.5618, "step": 159250 }, { "epoch": 0.31728133367333927, "grad_norm": 0.1717979460954666, "learning_rate": 0.002, "loss": 2.5733, "step": 159260 }, { "epoch": 0.3173012558969782, "grad_norm": 0.20590905845165253, "learning_rate": 0.002, "loss": 2.5845, "step": 159270 }, { "epoch": 0.3173211781206171, "grad_norm": 0.16753336787223816, "learning_rate": 0.002, "loss": 2.5683, "step": 159280 }, { "epoch": 0.317341100344256, "grad_norm": 0.18123690783977509, "learning_rate": 0.002, "loss": 2.5768, "step": 159290 }, { "epoch": 0.31736102256789495, "grad_norm": 0.1768491268157959, "learning_rate": 0.002, "loss": 2.5512, "step": 159300 }, { "epoch": 0.31738094479153384, "grad_norm": 0.20438964664936066, "learning_rate": 0.002, "loss": 2.5575, "step": 159310 }, { "epoch": 0.3174008670151728, "grad_norm": 0.19084854423999786, "learning_rate": 0.002, "loss": 2.5583, "step": 159320 }, { "epoch": 0.3174207892388117, "grad_norm": 0.15121521055698395, "learning_rate": 0.002, "loss": 2.5498, "step": 159330 }, { "epoch": 0.3174407114624506, "grad_norm": 0.19510239362716675, "learning_rate": 0.002, "loss": 2.5649, "step": 159340 }, { "epoch": 0.3174606336860895, "grad_norm": 0.1768016815185547, "learning_rate": 0.002, "loss": 2.5777, "step": 159350 }, { "epoch": 0.3174805559097284, "grad_norm": 0.15813058614730835, "learning_rate": 0.002, "loss": 2.5778, "step": 159360 }, { "epoch": 0.31750047813336735, "grad_norm": 0.22876524925231934, "learning_rate": 0.002, "loss": 2.5664, "step": 159370 }, { "epoch": 0.31752040035700624, "grad_norm": 0.1601734608411789, "learning_rate": 0.002, "loss": 2.5615, "step": 159380 }, { "epoch": 0.3175403225806452, "grad_norm": 0.17147326469421387, "learning_rate": 0.002, "loss": 2.5549, "step": 159390 }, { "epoch": 0.3175602448042841, "grad_norm": 0.1681884527206421, "learning_rate": 0.002, "loss": 2.5498, "step": 159400 }, { "epoch": 0.31758016702792297, "grad_norm": 0.15935471653938293, "learning_rate": 0.002, "loss": 2.559, "step": 159410 }, { "epoch": 0.3176000892515619, "grad_norm": 0.14781992137432098, "learning_rate": 0.002, "loss": 2.5717, "step": 159420 }, { "epoch": 0.3176200114752008, "grad_norm": 0.18085502088069916, "learning_rate": 0.002, "loss": 2.5545, "step": 159430 }, { "epoch": 0.31763993369883975, "grad_norm": 0.167722687125206, "learning_rate": 0.002, "loss": 2.5495, "step": 159440 }, { "epoch": 0.31765985592247864, "grad_norm": 0.2049427181482315, "learning_rate": 0.002, "loss": 2.5562, "step": 159450 }, { "epoch": 0.31767977814611753, "grad_norm": 0.1646757423877716, "learning_rate": 0.002, "loss": 2.5622, "step": 159460 }, { "epoch": 0.3176997003697565, "grad_norm": 0.18340660631656647, "learning_rate": 0.002, "loss": 2.57, "step": 159470 }, { "epoch": 0.31771962259339537, "grad_norm": 0.13797606527805328, "learning_rate": 0.002, "loss": 2.556, "step": 159480 }, { "epoch": 0.3177395448170343, "grad_norm": 0.1733027845621109, "learning_rate": 0.002, "loss": 2.5733, "step": 159490 }, { "epoch": 0.3177594670406732, "grad_norm": 0.14084145426750183, "learning_rate": 0.002, "loss": 2.5672, "step": 159500 }, { "epoch": 0.31777938926431215, "grad_norm": 0.18782377243041992, "learning_rate": 0.002, "loss": 2.5632, "step": 159510 }, { "epoch": 0.31779931148795104, "grad_norm": 0.1598878800868988, "learning_rate": 0.002, "loss": 2.5516, "step": 159520 }, { "epoch": 0.31781923371158993, "grad_norm": 0.18924878537654877, "learning_rate": 0.002, "loss": 2.5662, "step": 159530 }, { "epoch": 0.3178391559352289, "grad_norm": 0.17900246381759644, "learning_rate": 0.002, "loss": 2.5699, "step": 159540 }, { "epoch": 0.31785907815886777, "grad_norm": 0.15164805948734283, "learning_rate": 0.002, "loss": 2.5506, "step": 159550 }, { "epoch": 0.3178790003825067, "grad_norm": 0.16082169115543365, "learning_rate": 0.002, "loss": 2.5634, "step": 159560 }, { "epoch": 0.3178989226061456, "grad_norm": 0.16531001031398773, "learning_rate": 0.002, "loss": 2.5452, "step": 159570 }, { "epoch": 0.3179188448297845, "grad_norm": 0.13197027146816254, "learning_rate": 0.002, "loss": 2.571, "step": 159580 }, { "epoch": 0.31793876705342344, "grad_norm": 0.18864253163337708, "learning_rate": 0.002, "loss": 2.5644, "step": 159590 }, { "epoch": 0.31795868927706233, "grad_norm": 0.15610627830028534, "learning_rate": 0.002, "loss": 2.563, "step": 159600 }, { "epoch": 0.3179786115007013, "grad_norm": 0.16226239502429962, "learning_rate": 0.002, "loss": 2.5479, "step": 159610 }, { "epoch": 0.31799853372434017, "grad_norm": 0.20126010477542877, "learning_rate": 0.002, "loss": 2.553, "step": 159620 }, { "epoch": 0.3180184559479791, "grad_norm": 0.17676393687725067, "learning_rate": 0.002, "loss": 2.5643, "step": 159630 }, { "epoch": 0.318038378171618, "grad_norm": 0.17755301296710968, "learning_rate": 0.002, "loss": 2.5688, "step": 159640 }, { "epoch": 0.3180583003952569, "grad_norm": 0.16870655119419098, "learning_rate": 0.002, "loss": 2.5505, "step": 159650 }, { "epoch": 0.31807822261889585, "grad_norm": 0.1405622959136963, "learning_rate": 0.002, "loss": 2.553, "step": 159660 }, { "epoch": 0.31809814484253474, "grad_norm": 0.16665975749492645, "learning_rate": 0.002, "loss": 2.5733, "step": 159670 }, { "epoch": 0.3181180670661737, "grad_norm": 0.17117361724376678, "learning_rate": 0.002, "loss": 2.558, "step": 159680 }, { "epoch": 0.3181379892898126, "grad_norm": 0.200539231300354, "learning_rate": 0.002, "loss": 2.5719, "step": 159690 }, { "epoch": 0.31815791151345146, "grad_norm": 0.1717817336320877, "learning_rate": 0.002, "loss": 2.5742, "step": 159700 }, { "epoch": 0.3181778337370904, "grad_norm": 0.16169823706150055, "learning_rate": 0.002, "loss": 2.5714, "step": 159710 }, { "epoch": 0.3181977559607293, "grad_norm": 0.1805938333272934, "learning_rate": 0.002, "loss": 2.5569, "step": 159720 }, { "epoch": 0.31821767818436825, "grad_norm": 0.18440523743629456, "learning_rate": 0.002, "loss": 2.5574, "step": 159730 }, { "epoch": 0.31823760040800714, "grad_norm": 0.19822706282138824, "learning_rate": 0.002, "loss": 2.5664, "step": 159740 }, { "epoch": 0.31825752263164603, "grad_norm": 0.17336854338645935, "learning_rate": 0.002, "loss": 2.5732, "step": 159750 }, { "epoch": 0.318277444855285, "grad_norm": 0.14448820054531097, "learning_rate": 0.002, "loss": 2.5734, "step": 159760 }, { "epoch": 0.31829736707892387, "grad_norm": 0.1540815234184265, "learning_rate": 0.002, "loss": 2.5724, "step": 159770 }, { "epoch": 0.3183172893025628, "grad_norm": 0.1796332746744156, "learning_rate": 0.002, "loss": 2.5613, "step": 159780 }, { "epoch": 0.3183372115262017, "grad_norm": 0.1432262510061264, "learning_rate": 0.002, "loss": 2.5544, "step": 159790 }, { "epoch": 0.31835713374984065, "grad_norm": 0.15353763103485107, "learning_rate": 0.002, "loss": 2.5611, "step": 159800 }, { "epoch": 0.31837705597347954, "grad_norm": 0.20146042108535767, "learning_rate": 0.002, "loss": 2.5573, "step": 159810 }, { "epoch": 0.31839697819711843, "grad_norm": 0.17547167837619781, "learning_rate": 0.002, "loss": 2.5653, "step": 159820 }, { "epoch": 0.3184169004207574, "grad_norm": 0.1524663269519806, "learning_rate": 0.002, "loss": 2.5606, "step": 159830 }, { "epoch": 0.31843682264439627, "grad_norm": 0.16795748472213745, "learning_rate": 0.002, "loss": 2.563, "step": 159840 }, { "epoch": 0.3184567448680352, "grad_norm": 0.19262368977069855, "learning_rate": 0.002, "loss": 2.5535, "step": 159850 }, { "epoch": 0.3184766670916741, "grad_norm": 0.1467360109090805, "learning_rate": 0.002, "loss": 2.5438, "step": 159860 }, { "epoch": 0.318496589315313, "grad_norm": 0.20247727632522583, "learning_rate": 0.002, "loss": 2.555, "step": 159870 }, { "epoch": 0.31851651153895194, "grad_norm": 0.16279059648513794, "learning_rate": 0.002, "loss": 2.5531, "step": 159880 }, { "epoch": 0.31853643376259083, "grad_norm": 0.19169095158576965, "learning_rate": 0.002, "loss": 2.5649, "step": 159890 }, { "epoch": 0.3185563559862298, "grad_norm": 0.15270589292049408, "learning_rate": 0.002, "loss": 2.5547, "step": 159900 }, { "epoch": 0.31857627820986867, "grad_norm": 0.17397308349609375, "learning_rate": 0.002, "loss": 2.5486, "step": 159910 }, { "epoch": 0.31859620043350756, "grad_norm": 0.14893724024295807, "learning_rate": 0.002, "loss": 2.568, "step": 159920 }, { "epoch": 0.3186161226571465, "grad_norm": 0.1646760255098343, "learning_rate": 0.002, "loss": 2.5608, "step": 159930 }, { "epoch": 0.3186360448807854, "grad_norm": 0.14465215802192688, "learning_rate": 0.002, "loss": 2.5589, "step": 159940 }, { "epoch": 0.31865596710442434, "grad_norm": 0.17291443049907684, "learning_rate": 0.002, "loss": 2.5657, "step": 159950 }, { "epoch": 0.31867588932806323, "grad_norm": 0.17189818620681763, "learning_rate": 0.002, "loss": 2.5643, "step": 159960 }, { "epoch": 0.3186958115517022, "grad_norm": 0.14209464192390442, "learning_rate": 0.002, "loss": 2.5394, "step": 159970 }, { "epoch": 0.31871573377534107, "grad_norm": 0.15022359788417816, "learning_rate": 0.002, "loss": 2.5751, "step": 159980 }, { "epoch": 0.31873565599897996, "grad_norm": 0.1672479212284088, "learning_rate": 0.002, "loss": 2.5613, "step": 159990 }, { "epoch": 0.3187555782226189, "grad_norm": 0.15100884437561035, "learning_rate": 0.002, "loss": 2.5565, "step": 160000 }, { "epoch": 0.3187755004462578, "grad_norm": 0.1792910099029541, "learning_rate": 0.002, "loss": 2.549, "step": 160010 }, { "epoch": 0.31879542266989674, "grad_norm": 0.16030682623386383, "learning_rate": 0.002, "loss": 2.5564, "step": 160020 }, { "epoch": 0.31881534489353563, "grad_norm": 0.144120991230011, "learning_rate": 0.002, "loss": 2.5548, "step": 160030 }, { "epoch": 0.3188352671171745, "grad_norm": 0.16219019889831543, "learning_rate": 0.002, "loss": 2.5555, "step": 160040 }, { "epoch": 0.31885518934081347, "grad_norm": 0.16027973592281342, "learning_rate": 0.002, "loss": 2.5695, "step": 160050 }, { "epoch": 0.31887511156445236, "grad_norm": 0.19232775270938873, "learning_rate": 0.002, "loss": 2.5783, "step": 160060 }, { "epoch": 0.3188950337880913, "grad_norm": 0.15066103637218475, "learning_rate": 0.002, "loss": 2.5622, "step": 160070 }, { "epoch": 0.3189149560117302, "grad_norm": 0.2266988903284073, "learning_rate": 0.002, "loss": 2.5559, "step": 160080 }, { "epoch": 0.31893487823536915, "grad_norm": 0.17235159873962402, "learning_rate": 0.002, "loss": 2.5613, "step": 160090 }, { "epoch": 0.31895480045900804, "grad_norm": 0.1550845056772232, "learning_rate": 0.002, "loss": 2.5711, "step": 160100 }, { "epoch": 0.3189747226826469, "grad_norm": 0.18074098229408264, "learning_rate": 0.002, "loss": 2.5615, "step": 160110 }, { "epoch": 0.3189946449062859, "grad_norm": 0.14625973999500275, "learning_rate": 0.002, "loss": 2.5721, "step": 160120 }, { "epoch": 0.31901456712992476, "grad_norm": 0.19336049258708954, "learning_rate": 0.002, "loss": 2.5817, "step": 160130 }, { "epoch": 0.3190344893535637, "grad_norm": 0.15794233977794647, "learning_rate": 0.002, "loss": 2.5665, "step": 160140 }, { "epoch": 0.3190544115772026, "grad_norm": 0.16986480355262756, "learning_rate": 0.002, "loss": 2.5707, "step": 160150 }, { "epoch": 0.3190743338008415, "grad_norm": 0.18889231979846954, "learning_rate": 0.002, "loss": 2.5627, "step": 160160 }, { "epoch": 0.31909425602448044, "grad_norm": 0.16920943558216095, "learning_rate": 0.002, "loss": 2.555, "step": 160170 }, { "epoch": 0.31911417824811933, "grad_norm": 0.1559399664402008, "learning_rate": 0.002, "loss": 2.5625, "step": 160180 }, { "epoch": 0.3191341004717583, "grad_norm": 0.21363338828086853, "learning_rate": 0.002, "loss": 2.576, "step": 160190 }, { "epoch": 0.31915402269539717, "grad_norm": 0.16355925798416138, "learning_rate": 0.002, "loss": 2.5572, "step": 160200 }, { "epoch": 0.31917394491903606, "grad_norm": 0.19326546788215637, "learning_rate": 0.002, "loss": 2.5503, "step": 160210 }, { "epoch": 0.319193867142675, "grad_norm": 0.18491613864898682, "learning_rate": 0.002, "loss": 2.5727, "step": 160220 }, { "epoch": 0.3192137893663139, "grad_norm": 0.15815852582454681, "learning_rate": 0.002, "loss": 2.5699, "step": 160230 }, { "epoch": 0.31923371158995284, "grad_norm": 0.21722619235515594, "learning_rate": 0.002, "loss": 2.5675, "step": 160240 }, { "epoch": 0.31925363381359173, "grad_norm": 0.15965519845485687, "learning_rate": 0.002, "loss": 2.5608, "step": 160250 }, { "epoch": 0.3192735560372307, "grad_norm": 0.14818324148654938, "learning_rate": 0.002, "loss": 2.5789, "step": 160260 }, { "epoch": 0.31929347826086957, "grad_norm": 0.17107291519641876, "learning_rate": 0.002, "loss": 2.565, "step": 160270 }, { "epoch": 0.31931340048450846, "grad_norm": 0.15909890830516815, "learning_rate": 0.002, "loss": 2.562, "step": 160280 }, { "epoch": 0.3193333227081474, "grad_norm": 0.15859806537628174, "learning_rate": 0.002, "loss": 2.5573, "step": 160290 }, { "epoch": 0.3193532449317863, "grad_norm": 0.2421281933784485, "learning_rate": 0.002, "loss": 2.5654, "step": 160300 }, { "epoch": 0.31937316715542524, "grad_norm": 0.15236976742744446, "learning_rate": 0.002, "loss": 2.5624, "step": 160310 }, { "epoch": 0.31939308937906413, "grad_norm": 0.16781985759735107, "learning_rate": 0.002, "loss": 2.5783, "step": 160320 }, { "epoch": 0.319413011602703, "grad_norm": 0.1673882007598877, "learning_rate": 0.002, "loss": 2.538, "step": 160330 }, { "epoch": 0.31943293382634197, "grad_norm": 0.16798841953277588, "learning_rate": 0.002, "loss": 2.5594, "step": 160340 }, { "epoch": 0.31945285604998086, "grad_norm": 0.17909692227840424, "learning_rate": 0.002, "loss": 2.5485, "step": 160350 }, { "epoch": 0.3194727782736198, "grad_norm": 0.1542118489742279, "learning_rate": 0.002, "loss": 2.5578, "step": 160360 }, { "epoch": 0.3194927004972587, "grad_norm": 0.16334696114063263, "learning_rate": 0.002, "loss": 2.5671, "step": 160370 }, { "epoch": 0.31951262272089764, "grad_norm": 0.18749697506427765, "learning_rate": 0.002, "loss": 2.5605, "step": 160380 }, { "epoch": 0.31953254494453653, "grad_norm": 0.1582750380039215, "learning_rate": 0.002, "loss": 2.5544, "step": 160390 }, { "epoch": 0.3195524671681754, "grad_norm": 0.17053979635238647, "learning_rate": 0.002, "loss": 2.5632, "step": 160400 }, { "epoch": 0.31957238939181437, "grad_norm": 0.1734260767698288, "learning_rate": 0.002, "loss": 2.5565, "step": 160410 }, { "epoch": 0.31959231161545326, "grad_norm": 0.19086699187755585, "learning_rate": 0.002, "loss": 2.5682, "step": 160420 }, { "epoch": 0.3196122338390922, "grad_norm": 0.171525776386261, "learning_rate": 0.002, "loss": 2.5601, "step": 160430 }, { "epoch": 0.3196321560627311, "grad_norm": 0.14929890632629395, "learning_rate": 0.002, "loss": 2.5678, "step": 160440 }, { "epoch": 0.31965207828637, "grad_norm": 0.17256812751293182, "learning_rate": 0.002, "loss": 2.5634, "step": 160450 }, { "epoch": 0.31967200051000894, "grad_norm": 0.15330983698368073, "learning_rate": 0.002, "loss": 2.5693, "step": 160460 }, { "epoch": 0.3196919227336478, "grad_norm": 0.1457006335258484, "learning_rate": 0.002, "loss": 2.5724, "step": 160470 }, { "epoch": 0.3197118449572868, "grad_norm": 0.16244013607501984, "learning_rate": 0.002, "loss": 2.565, "step": 160480 }, { "epoch": 0.31973176718092566, "grad_norm": 0.17103509604930878, "learning_rate": 0.002, "loss": 2.5671, "step": 160490 }, { "epoch": 0.31975168940456455, "grad_norm": 0.18769586086273193, "learning_rate": 0.002, "loss": 2.5521, "step": 160500 }, { "epoch": 0.3197716116282035, "grad_norm": 0.1652127057313919, "learning_rate": 0.002, "loss": 2.5636, "step": 160510 }, { "epoch": 0.3197915338518424, "grad_norm": 0.1728532463312149, "learning_rate": 0.002, "loss": 2.5715, "step": 160520 }, { "epoch": 0.31981145607548134, "grad_norm": 0.15728633105754852, "learning_rate": 0.002, "loss": 2.5755, "step": 160530 }, { "epoch": 0.31983137829912023, "grad_norm": 0.1544853150844574, "learning_rate": 0.002, "loss": 2.5625, "step": 160540 }, { "epoch": 0.3198513005227592, "grad_norm": 0.17631489038467407, "learning_rate": 0.002, "loss": 2.5557, "step": 160550 }, { "epoch": 0.31987122274639807, "grad_norm": 0.19175216555595398, "learning_rate": 0.002, "loss": 2.5633, "step": 160560 }, { "epoch": 0.31989114497003696, "grad_norm": 0.17024189233779907, "learning_rate": 0.002, "loss": 2.5554, "step": 160570 }, { "epoch": 0.3199110671936759, "grad_norm": 0.17182305455207825, "learning_rate": 0.002, "loss": 2.5663, "step": 160580 }, { "epoch": 0.3199309894173148, "grad_norm": 0.1839793622493744, "learning_rate": 0.002, "loss": 2.5709, "step": 160590 }, { "epoch": 0.31995091164095374, "grad_norm": 0.17948219180107117, "learning_rate": 0.002, "loss": 2.5808, "step": 160600 }, { "epoch": 0.31997083386459263, "grad_norm": 0.17024441063404083, "learning_rate": 0.002, "loss": 2.5873, "step": 160610 }, { "epoch": 0.3199907560882315, "grad_norm": 0.1647052764892578, "learning_rate": 0.002, "loss": 2.5717, "step": 160620 }, { "epoch": 0.32001067831187047, "grad_norm": 0.20183782279491425, "learning_rate": 0.002, "loss": 2.5597, "step": 160630 }, { "epoch": 0.32003060053550936, "grad_norm": 0.213492289185524, "learning_rate": 0.002, "loss": 2.583, "step": 160640 }, { "epoch": 0.3200505227591483, "grad_norm": 0.1599874645471573, "learning_rate": 0.002, "loss": 2.5522, "step": 160650 }, { "epoch": 0.3200704449827872, "grad_norm": 0.15732571482658386, "learning_rate": 0.002, "loss": 2.5593, "step": 160660 }, { "epoch": 0.3200903672064261, "grad_norm": 0.15818637609481812, "learning_rate": 0.002, "loss": 2.5649, "step": 160670 }, { "epoch": 0.32011028943006503, "grad_norm": 0.1441793590784073, "learning_rate": 0.002, "loss": 2.5591, "step": 160680 }, { "epoch": 0.3201302116537039, "grad_norm": 0.15131960809230804, "learning_rate": 0.002, "loss": 2.5547, "step": 160690 }, { "epoch": 0.32015013387734287, "grad_norm": 0.16778920590877533, "learning_rate": 0.002, "loss": 2.5605, "step": 160700 }, { "epoch": 0.32017005610098176, "grad_norm": 0.17432911694049835, "learning_rate": 0.002, "loss": 2.5644, "step": 160710 }, { "epoch": 0.3201899783246207, "grad_norm": 0.17301183938980103, "learning_rate": 0.002, "loss": 2.5699, "step": 160720 }, { "epoch": 0.3202099005482596, "grad_norm": 0.1758536398410797, "learning_rate": 0.002, "loss": 2.5708, "step": 160730 }, { "epoch": 0.3202298227718985, "grad_norm": 0.177520290017128, "learning_rate": 0.002, "loss": 2.5657, "step": 160740 }, { "epoch": 0.32024974499553743, "grad_norm": 0.17074927687644958, "learning_rate": 0.002, "loss": 2.5493, "step": 160750 }, { "epoch": 0.3202696672191763, "grad_norm": 0.15580600500106812, "learning_rate": 0.002, "loss": 2.5714, "step": 160760 }, { "epoch": 0.32028958944281527, "grad_norm": 0.14061006903648376, "learning_rate": 0.002, "loss": 2.5498, "step": 160770 }, { "epoch": 0.32030951166645416, "grad_norm": 0.21683092415332794, "learning_rate": 0.002, "loss": 2.5721, "step": 160780 }, { "epoch": 0.32032943389009305, "grad_norm": 0.17237776517868042, "learning_rate": 0.002, "loss": 2.5682, "step": 160790 }, { "epoch": 0.320349356113732, "grad_norm": 0.2006186991930008, "learning_rate": 0.002, "loss": 2.5587, "step": 160800 }, { "epoch": 0.3203692783373709, "grad_norm": 0.18426738679409027, "learning_rate": 0.002, "loss": 2.5773, "step": 160810 }, { "epoch": 0.32038920056100983, "grad_norm": 0.14744961261749268, "learning_rate": 0.002, "loss": 2.5667, "step": 160820 }, { "epoch": 0.3204091227846487, "grad_norm": 0.20931874215602875, "learning_rate": 0.002, "loss": 2.5385, "step": 160830 }, { "epoch": 0.32042904500828767, "grad_norm": 0.16146458685398102, "learning_rate": 0.002, "loss": 2.5619, "step": 160840 }, { "epoch": 0.32044896723192656, "grad_norm": 0.16873039305210114, "learning_rate": 0.002, "loss": 2.5778, "step": 160850 }, { "epoch": 0.32046888945556545, "grad_norm": 0.19090840220451355, "learning_rate": 0.002, "loss": 2.5846, "step": 160860 }, { "epoch": 0.3204888116792044, "grad_norm": 0.16410832107067108, "learning_rate": 0.002, "loss": 2.5742, "step": 160870 }, { "epoch": 0.3205087339028433, "grad_norm": 0.14416931569576263, "learning_rate": 0.002, "loss": 2.5642, "step": 160880 }, { "epoch": 0.32052865612648224, "grad_norm": 0.19563303887844086, "learning_rate": 0.002, "loss": 2.562, "step": 160890 }, { "epoch": 0.3205485783501211, "grad_norm": 0.17796847224235535, "learning_rate": 0.002, "loss": 2.5744, "step": 160900 }, { "epoch": 0.32056850057376, "grad_norm": 0.14702941477298737, "learning_rate": 0.002, "loss": 2.5683, "step": 160910 }, { "epoch": 0.32058842279739896, "grad_norm": 0.18282794952392578, "learning_rate": 0.002, "loss": 2.5617, "step": 160920 }, { "epoch": 0.32060834502103785, "grad_norm": 0.1648232489824295, "learning_rate": 0.002, "loss": 2.5684, "step": 160930 }, { "epoch": 0.3206282672446768, "grad_norm": 0.19120000302791595, "learning_rate": 0.002, "loss": 2.5727, "step": 160940 }, { "epoch": 0.3206481894683157, "grad_norm": 0.2140505313873291, "learning_rate": 0.002, "loss": 2.5497, "step": 160950 }, { "epoch": 0.3206681116919546, "grad_norm": 0.1459323614835739, "learning_rate": 0.002, "loss": 2.5789, "step": 160960 }, { "epoch": 0.32068803391559353, "grad_norm": 0.16400976479053497, "learning_rate": 0.002, "loss": 2.5371, "step": 160970 }, { "epoch": 0.3207079561392324, "grad_norm": 0.16268858313560486, "learning_rate": 0.002, "loss": 2.5442, "step": 160980 }, { "epoch": 0.32072787836287137, "grad_norm": 0.1636252999305725, "learning_rate": 0.002, "loss": 2.5696, "step": 160990 }, { "epoch": 0.32074780058651026, "grad_norm": 0.17321167886257172, "learning_rate": 0.002, "loss": 2.5665, "step": 161000 }, { "epoch": 0.3207677228101492, "grad_norm": 0.16962961852550507, "learning_rate": 0.002, "loss": 2.5578, "step": 161010 }, { "epoch": 0.3207876450337881, "grad_norm": 0.15095339715480804, "learning_rate": 0.002, "loss": 2.5727, "step": 161020 }, { "epoch": 0.320807567257427, "grad_norm": 0.20590536296367645, "learning_rate": 0.002, "loss": 2.5656, "step": 161030 }, { "epoch": 0.32082748948106593, "grad_norm": 0.17785924673080444, "learning_rate": 0.002, "loss": 2.5538, "step": 161040 }, { "epoch": 0.3208474117047048, "grad_norm": 0.1819128394126892, "learning_rate": 0.002, "loss": 2.5498, "step": 161050 }, { "epoch": 0.32086733392834377, "grad_norm": 0.15683504939079285, "learning_rate": 0.002, "loss": 2.5807, "step": 161060 }, { "epoch": 0.32088725615198266, "grad_norm": 0.18296219408512115, "learning_rate": 0.002, "loss": 2.5725, "step": 161070 }, { "epoch": 0.32090717837562155, "grad_norm": 0.1505608707666397, "learning_rate": 0.002, "loss": 2.5691, "step": 161080 }, { "epoch": 0.3209271005992605, "grad_norm": 0.20561081171035767, "learning_rate": 0.002, "loss": 2.5697, "step": 161090 }, { "epoch": 0.3209470228228994, "grad_norm": 0.1645238846540451, "learning_rate": 0.002, "loss": 2.5562, "step": 161100 }, { "epoch": 0.32096694504653833, "grad_norm": 0.16234906017780304, "learning_rate": 0.002, "loss": 2.5504, "step": 161110 }, { "epoch": 0.3209868672701772, "grad_norm": 0.16528740525245667, "learning_rate": 0.002, "loss": 2.5666, "step": 161120 }, { "epoch": 0.32100678949381617, "grad_norm": 0.15569254755973816, "learning_rate": 0.002, "loss": 2.5753, "step": 161130 }, { "epoch": 0.32102671171745506, "grad_norm": 0.1725345253944397, "learning_rate": 0.002, "loss": 2.5699, "step": 161140 }, { "epoch": 0.32104663394109395, "grad_norm": 0.152582585811615, "learning_rate": 0.002, "loss": 2.5496, "step": 161150 }, { "epoch": 0.3210665561647329, "grad_norm": 0.1538693606853485, "learning_rate": 0.002, "loss": 2.5548, "step": 161160 }, { "epoch": 0.3210864783883718, "grad_norm": 0.19786961376667023, "learning_rate": 0.002, "loss": 2.569, "step": 161170 }, { "epoch": 0.32110640061201073, "grad_norm": 0.15049616992473602, "learning_rate": 0.002, "loss": 2.5455, "step": 161180 }, { "epoch": 0.3211263228356496, "grad_norm": 0.22312715649604797, "learning_rate": 0.002, "loss": 2.5609, "step": 161190 }, { "epoch": 0.3211462450592885, "grad_norm": 0.17698074877262115, "learning_rate": 0.002, "loss": 2.5679, "step": 161200 }, { "epoch": 0.32116616728292746, "grad_norm": 0.15800665318965912, "learning_rate": 0.002, "loss": 2.552, "step": 161210 }, { "epoch": 0.32118608950656635, "grad_norm": 0.18141406774520874, "learning_rate": 0.002, "loss": 2.5662, "step": 161220 }, { "epoch": 0.3212060117302053, "grad_norm": 0.18512490391731262, "learning_rate": 0.002, "loss": 2.5573, "step": 161230 }, { "epoch": 0.3212259339538442, "grad_norm": 0.1896050125360489, "learning_rate": 0.002, "loss": 2.5724, "step": 161240 }, { "epoch": 0.3212458561774831, "grad_norm": 0.17979037761688232, "learning_rate": 0.002, "loss": 2.5678, "step": 161250 }, { "epoch": 0.321265778401122, "grad_norm": 0.15924403071403503, "learning_rate": 0.002, "loss": 2.5532, "step": 161260 }, { "epoch": 0.3212857006247609, "grad_norm": 0.17042674124240875, "learning_rate": 0.002, "loss": 2.5655, "step": 161270 }, { "epoch": 0.32130562284839986, "grad_norm": 0.16254016757011414, "learning_rate": 0.002, "loss": 2.5617, "step": 161280 }, { "epoch": 0.32132554507203875, "grad_norm": 0.183878555893898, "learning_rate": 0.002, "loss": 2.567, "step": 161290 }, { "epoch": 0.3213454672956777, "grad_norm": 0.21231915056705475, "learning_rate": 0.002, "loss": 2.5496, "step": 161300 }, { "epoch": 0.3213653895193166, "grad_norm": 0.30540090799331665, "learning_rate": 0.002, "loss": 2.5652, "step": 161310 }, { "epoch": 0.3213853117429555, "grad_norm": 0.16276341676712036, "learning_rate": 0.002, "loss": 2.5716, "step": 161320 }, { "epoch": 0.32140523396659443, "grad_norm": 0.15401709079742432, "learning_rate": 0.002, "loss": 2.5422, "step": 161330 }, { "epoch": 0.3214251561902333, "grad_norm": 0.16607517004013062, "learning_rate": 0.002, "loss": 2.5692, "step": 161340 }, { "epoch": 0.32144507841387226, "grad_norm": 0.15667609870433807, "learning_rate": 0.002, "loss": 2.5628, "step": 161350 }, { "epoch": 0.32146500063751116, "grad_norm": 0.1562385857105255, "learning_rate": 0.002, "loss": 2.5654, "step": 161360 }, { "epoch": 0.32148492286115005, "grad_norm": 0.1661105453968048, "learning_rate": 0.002, "loss": 2.5707, "step": 161370 }, { "epoch": 0.321504845084789, "grad_norm": 0.1803433895111084, "learning_rate": 0.002, "loss": 2.5636, "step": 161380 }, { "epoch": 0.3215247673084279, "grad_norm": 0.1649913489818573, "learning_rate": 0.002, "loss": 2.5543, "step": 161390 }, { "epoch": 0.32154468953206683, "grad_norm": 0.21048617362976074, "learning_rate": 0.002, "loss": 2.5519, "step": 161400 }, { "epoch": 0.3215646117557057, "grad_norm": 0.13504692912101746, "learning_rate": 0.002, "loss": 2.5662, "step": 161410 }, { "epoch": 0.3215845339793446, "grad_norm": 0.15513746440410614, "learning_rate": 0.002, "loss": 2.5612, "step": 161420 }, { "epoch": 0.32160445620298356, "grad_norm": 0.15645849704742432, "learning_rate": 0.002, "loss": 2.5579, "step": 161430 }, { "epoch": 0.32162437842662245, "grad_norm": 0.18617448210716248, "learning_rate": 0.002, "loss": 2.5664, "step": 161440 }, { "epoch": 0.3216443006502614, "grad_norm": 0.1428021788597107, "learning_rate": 0.002, "loss": 2.559, "step": 161450 }, { "epoch": 0.3216642228739003, "grad_norm": 0.18747226893901825, "learning_rate": 0.002, "loss": 2.5675, "step": 161460 }, { "epoch": 0.32168414509753923, "grad_norm": 0.15034718811511993, "learning_rate": 0.002, "loss": 2.56, "step": 161470 }, { "epoch": 0.3217040673211781, "grad_norm": 0.2085323929786682, "learning_rate": 0.002, "loss": 2.5575, "step": 161480 }, { "epoch": 0.321723989544817, "grad_norm": 0.14212818443775177, "learning_rate": 0.002, "loss": 2.5629, "step": 161490 }, { "epoch": 0.32174391176845596, "grad_norm": 0.15285314619541168, "learning_rate": 0.002, "loss": 2.5515, "step": 161500 }, { "epoch": 0.32176383399209485, "grad_norm": 0.15983150899410248, "learning_rate": 0.002, "loss": 2.5614, "step": 161510 }, { "epoch": 0.3217837562157338, "grad_norm": 0.19280733168125153, "learning_rate": 0.002, "loss": 2.5645, "step": 161520 }, { "epoch": 0.3218036784393727, "grad_norm": 0.15497533977031708, "learning_rate": 0.002, "loss": 2.5516, "step": 161530 }, { "epoch": 0.3218236006630116, "grad_norm": 0.19430506229400635, "learning_rate": 0.002, "loss": 2.5686, "step": 161540 }, { "epoch": 0.3218435228866505, "grad_norm": 0.18946924805641174, "learning_rate": 0.002, "loss": 2.5469, "step": 161550 }, { "epoch": 0.3218634451102894, "grad_norm": 0.15371176600456238, "learning_rate": 0.002, "loss": 2.547, "step": 161560 }, { "epoch": 0.32188336733392836, "grad_norm": 0.19524678587913513, "learning_rate": 0.002, "loss": 2.5615, "step": 161570 }, { "epoch": 0.32190328955756725, "grad_norm": 0.16724875569343567, "learning_rate": 0.002, "loss": 2.5627, "step": 161580 }, { "epoch": 0.3219232117812062, "grad_norm": 0.1508246511220932, "learning_rate": 0.002, "loss": 2.564, "step": 161590 }, { "epoch": 0.3219431340048451, "grad_norm": 0.22329702973365784, "learning_rate": 0.002, "loss": 2.5783, "step": 161600 }, { "epoch": 0.321963056228484, "grad_norm": 0.19497722387313843, "learning_rate": 0.002, "loss": 2.5606, "step": 161610 }, { "epoch": 0.3219829784521229, "grad_norm": 0.16990885138511658, "learning_rate": 0.002, "loss": 2.5627, "step": 161620 }, { "epoch": 0.3220029006757618, "grad_norm": 0.18549580872058868, "learning_rate": 0.002, "loss": 2.5675, "step": 161630 }, { "epoch": 0.32202282289940076, "grad_norm": 0.17504973709583282, "learning_rate": 0.002, "loss": 2.5767, "step": 161640 }, { "epoch": 0.32204274512303965, "grad_norm": 0.1626708060503006, "learning_rate": 0.002, "loss": 2.5608, "step": 161650 }, { "epoch": 0.32206266734667854, "grad_norm": 0.18143506348133087, "learning_rate": 0.002, "loss": 2.5628, "step": 161660 }, { "epoch": 0.3220825895703175, "grad_norm": 0.1636240929365158, "learning_rate": 0.002, "loss": 2.5568, "step": 161670 }, { "epoch": 0.3221025117939564, "grad_norm": 0.13431352376937866, "learning_rate": 0.002, "loss": 2.5538, "step": 161680 }, { "epoch": 0.3221224340175953, "grad_norm": 0.20155386626720428, "learning_rate": 0.002, "loss": 2.5469, "step": 161690 }, { "epoch": 0.3221423562412342, "grad_norm": 0.16247747838497162, "learning_rate": 0.002, "loss": 2.5804, "step": 161700 }, { "epoch": 0.3221622784648731, "grad_norm": 0.17495709657669067, "learning_rate": 0.002, "loss": 2.5595, "step": 161710 }, { "epoch": 0.32218220068851205, "grad_norm": 0.16153353452682495, "learning_rate": 0.002, "loss": 2.5487, "step": 161720 }, { "epoch": 0.32220212291215095, "grad_norm": 0.15323300659656525, "learning_rate": 0.002, "loss": 2.5578, "step": 161730 }, { "epoch": 0.3222220451357899, "grad_norm": 0.1535932719707489, "learning_rate": 0.002, "loss": 2.5662, "step": 161740 }, { "epoch": 0.3222419673594288, "grad_norm": 0.19829559326171875, "learning_rate": 0.002, "loss": 2.5574, "step": 161750 }, { "epoch": 0.32226188958306773, "grad_norm": 0.15069837868213654, "learning_rate": 0.002, "loss": 2.5495, "step": 161760 }, { "epoch": 0.3222818118067066, "grad_norm": 0.16424621641635895, "learning_rate": 0.002, "loss": 2.5615, "step": 161770 }, { "epoch": 0.3223017340303455, "grad_norm": 0.1651918739080429, "learning_rate": 0.002, "loss": 2.5532, "step": 161780 }, { "epoch": 0.32232165625398446, "grad_norm": 0.17933864891529083, "learning_rate": 0.002, "loss": 2.5556, "step": 161790 }, { "epoch": 0.32234157847762335, "grad_norm": 0.18604488670825958, "learning_rate": 0.002, "loss": 2.5736, "step": 161800 }, { "epoch": 0.3223615007012623, "grad_norm": 0.17142555117607117, "learning_rate": 0.002, "loss": 2.5691, "step": 161810 }, { "epoch": 0.3223814229249012, "grad_norm": 0.15830734372138977, "learning_rate": 0.002, "loss": 2.5664, "step": 161820 }, { "epoch": 0.3224013451485401, "grad_norm": 0.16146479547023773, "learning_rate": 0.002, "loss": 2.5633, "step": 161830 }, { "epoch": 0.322421267372179, "grad_norm": 0.14552509784698486, "learning_rate": 0.002, "loss": 2.5847, "step": 161840 }, { "epoch": 0.3224411895958179, "grad_norm": 0.16936592757701874, "learning_rate": 0.002, "loss": 2.5497, "step": 161850 }, { "epoch": 0.32246111181945686, "grad_norm": 0.17316387593746185, "learning_rate": 0.002, "loss": 2.5714, "step": 161860 }, { "epoch": 0.32248103404309575, "grad_norm": 0.15194888412952423, "learning_rate": 0.002, "loss": 2.5527, "step": 161870 }, { "epoch": 0.3225009562667347, "grad_norm": 0.16983339190483093, "learning_rate": 0.002, "loss": 2.5686, "step": 161880 }, { "epoch": 0.3225208784903736, "grad_norm": 0.1642684042453766, "learning_rate": 0.002, "loss": 2.5581, "step": 161890 }, { "epoch": 0.3225408007140125, "grad_norm": 0.16912004351615906, "learning_rate": 0.002, "loss": 2.5695, "step": 161900 }, { "epoch": 0.3225607229376514, "grad_norm": 0.15890267491340637, "learning_rate": 0.002, "loss": 2.5587, "step": 161910 }, { "epoch": 0.3225806451612903, "grad_norm": 0.15748019516468048, "learning_rate": 0.002, "loss": 2.552, "step": 161920 }, { "epoch": 0.32260056738492926, "grad_norm": 0.15496212244033813, "learning_rate": 0.002, "loss": 2.5641, "step": 161930 }, { "epoch": 0.32262048960856815, "grad_norm": 0.20024512708187103, "learning_rate": 0.002, "loss": 2.5576, "step": 161940 }, { "epoch": 0.32264041183220704, "grad_norm": 0.18522873520851135, "learning_rate": 0.002, "loss": 2.5538, "step": 161950 }, { "epoch": 0.322660334055846, "grad_norm": 0.1701144427061081, "learning_rate": 0.002, "loss": 2.5673, "step": 161960 }, { "epoch": 0.3226802562794849, "grad_norm": 0.16218797862529755, "learning_rate": 0.002, "loss": 2.5533, "step": 161970 }, { "epoch": 0.3227001785031238, "grad_norm": 0.15800240635871887, "learning_rate": 0.002, "loss": 2.5656, "step": 161980 }, { "epoch": 0.3227201007267627, "grad_norm": 0.14860574901103973, "learning_rate": 0.002, "loss": 2.5689, "step": 161990 }, { "epoch": 0.3227400229504016, "grad_norm": 0.1794944703578949, "learning_rate": 0.002, "loss": 2.5438, "step": 162000 }, { "epoch": 0.32275994517404055, "grad_norm": 0.14546924829483032, "learning_rate": 0.002, "loss": 2.5673, "step": 162010 }, { "epoch": 0.32277986739767944, "grad_norm": 0.2182384431362152, "learning_rate": 0.002, "loss": 2.5524, "step": 162020 }, { "epoch": 0.3227997896213184, "grad_norm": 0.15273384749889374, "learning_rate": 0.002, "loss": 2.5614, "step": 162030 }, { "epoch": 0.3228197118449573, "grad_norm": 0.15449775755405426, "learning_rate": 0.002, "loss": 2.5575, "step": 162040 }, { "epoch": 0.3228396340685962, "grad_norm": 0.15866531431674957, "learning_rate": 0.002, "loss": 2.5589, "step": 162050 }, { "epoch": 0.3228595562922351, "grad_norm": 0.16861681640148163, "learning_rate": 0.002, "loss": 2.553, "step": 162060 }, { "epoch": 0.322879478515874, "grad_norm": 0.16314467787742615, "learning_rate": 0.002, "loss": 2.5474, "step": 162070 }, { "epoch": 0.32289940073951295, "grad_norm": 0.18082085251808167, "learning_rate": 0.002, "loss": 2.5795, "step": 162080 }, { "epoch": 0.32291932296315184, "grad_norm": 0.17899096012115479, "learning_rate": 0.002, "loss": 2.5741, "step": 162090 }, { "epoch": 0.3229392451867908, "grad_norm": 0.15245455503463745, "learning_rate": 0.002, "loss": 2.5513, "step": 162100 }, { "epoch": 0.3229591674104297, "grad_norm": 0.15695396065711975, "learning_rate": 0.002, "loss": 2.5649, "step": 162110 }, { "epoch": 0.3229790896340686, "grad_norm": 0.20147444307804108, "learning_rate": 0.002, "loss": 2.5762, "step": 162120 }, { "epoch": 0.3229990118577075, "grad_norm": 0.1681949645280838, "learning_rate": 0.002, "loss": 2.5578, "step": 162130 }, { "epoch": 0.3230189340813464, "grad_norm": 0.1654004156589508, "learning_rate": 0.002, "loss": 2.5608, "step": 162140 }, { "epoch": 0.32303885630498536, "grad_norm": 0.1823701709508896, "learning_rate": 0.002, "loss": 2.5617, "step": 162150 }, { "epoch": 0.32305877852862425, "grad_norm": 0.14643900096416473, "learning_rate": 0.002, "loss": 2.5615, "step": 162160 }, { "epoch": 0.32307870075226314, "grad_norm": 0.16543079912662506, "learning_rate": 0.002, "loss": 2.5685, "step": 162170 }, { "epoch": 0.3230986229759021, "grad_norm": 0.1632044017314911, "learning_rate": 0.002, "loss": 2.5555, "step": 162180 }, { "epoch": 0.323118545199541, "grad_norm": 0.20077911019325256, "learning_rate": 0.002, "loss": 2.5683, "step": 162190 }, { "epoch": 0.3231384674231799, "grad_norm": 0.15346606075763702, "learning_rate": 0.002, "loss": 2.5653, "step": 162200 }, { "epoch": 0.3231583896468188, "grad_norm": 0.18225249648094177, "learning_rate": 0.002, "loss": 2.5622, "step": 162210 }, { "epoch": 0.32317831187045776, "grad_norm": 0.15694056451320648, "learning_rate": 0.002, "loss": 2.5732, "step": 162220 }, { "epoch": 0.32319823409409665, "grad_norm": 0.1481652706861496, "learning_rate": 0.002, "loss": 2.5539, "step": 162230 }, { "epoch": 0.32321815631773554, "grad_norm": 0.17453670501708984, "learning_rate": 0.002, "loss": 2.5819, "step": 162240 }, { "epoch": 0.3232380785413745, "grad_norm": 0.1688912957906723, "learning_rate": 0.002, "loss": 2.5689, "step": 162250 }, { "epoch": 0.3232580007650134, "grad_norm": 0.17504757642745972, "learning_rate": 0.002, "loss": 2.5601, "step": 162260 }, { "epoch": 0.3232779229886523, "grad_norm": 0.17946578562259674, "learning_rate": 0.002, "loss": 2.5724, "step": 162270 }, { "epoch": 0.3232978452122912, "grad_norm": 0.15736088156700134, "learning_rate": 0.002, "loss": 2.5482, "step": 162280 }, { "epoch": 0.3233177674359301, "grad_norm": 0.1830545961856842, "learning_rate": 0.002, "loss": 2.5533, "step": 162290 }, { "epoch": 0.32333768965956905, "grad_norm": 0.1625288873910904, "learning_rate": 0.002, "loss": 2.5745, "step": 162300 }, { "epoch": 0.32335761188320794, "grad_norm": 0.1666811853647232, "learning_rate": 0.002, "loss": 2.5527, "step": 162310 }, { "epoch": 0.3233775341068469, "grad_norm": 0.16361841559410095, "learning_rate": 0.002, "loss": 2.5736, "step": 162320 }, { "epoch": 0.3233974563304858, "grad_norm": 0.1635541021823883, "learning_rate": 0.002, "loss": 2.545, "step": 162330 }, { "epoch": 0.3234173785541247, "grad_norm": 0.1761711984872818, "learning_rate": 0.002, "loss": 2.5724, "step": 162340 }, { "epoch": 0.3234373007777636, "grad_norm": 0.18007102608680725, "learning_rate": 0.002, "loss": 2.5588, "step": 162350 }, { "epoch": 0.3234572230014025, "grad_norm": 0.18430379033088684, "learning_rate": 0.002, "loss": 2.5536, "step": 162360 }, { "epoch": 0.32347714522504145, "grad_norm": 0.16058821976184845, "learning_rate": 0.002, "loss": 2.5684, "step": 162370 }, { "epoch": 0.32349706744868034, "grad_norm": 0.19078406691551208, "learning_rate": 0.002, "loss": 2.5737, "step": 162380 }, { "epoch": 0.3235169896723193, "grad_norm": 0.14625713229179382, "learning_rate": 0.002, "loss": 2.5586, "step": 162390 }, { "epoch": 0.3235369118959582, "grad_norm": 0.1620531529188156, "learning_rate": 0.002, "loss": 2.5557, "step": 162400 }, { "epoch": 0.32355683411959707, "grad_norm": 0.17306935787200928, "learning_rate": 0.002, "loss": 2.5638, "step": 162410 }, { "epoch": 0.323576756343236, "grad_norm": 0.17212027311325073, "learning_rate": 0.002, "loss": 2.551, "step": 162420 }, { "epoch": 0.3235966785668749, "grad_norm": 0.1759404093027115, "learning_rate": 0.002, "loss": 2.5503, "step": 162430 }, { "epoch": 0.32361660079051385, "grad_norm": 0.1787867397069931, "learning_rate": 0.002, "loss": 2.5568, "step": 162440 }, { "epoch": 0.32363652301415274, "grad_norm": 0.15355005860328674, "learning_rate": 0.002, "loss": 2.5669, "step": 162450 }, { "epoch": 0.32365644523779163, "grad_norm": 0.16009816527366638, "learning_rate": 0.002, "loss": 2.5529, "step": 162460 }, { "epoch": 0.3236763674614306, "grad_norm": 0.16230367124080658, "learning_rate": 0.002, "loss": 2.5647, "step": 162470 }, { "epoch": 0.32369628968506947, "grad_norm": 0.1790153533220291, "learning_rate": 0.002, "loss": 2.5484, "step": 162480 }, { "epoch": 0.3237162119087084, "grad_norm": 0.16230975091457367, "learning_rate": 0.002, "loss": 2.5523, "step": 162490 }, { "epoch": 0.3237361341323473, "grad_norm": 0.16123250126838684, "learning_rate": 0.002, "loss": 2.5787, "step": 162500 }, { "epoch": 0.32375605635598625, "grad_norm": 0.18224576115608215, "learning_rate": 0.002, "loss": 2.5451, "step": 162510 }, { "epoch": 0.32377597857962515, "grad_norm": 0.17847268283367157, "learning_rate": 0.002, "loss": 2.5662, "step": 162520 }, { "epoch": 0.32379590080326404, "grad_norm": 0.20965129137039185, "learning_rate": 0.002, "loss": 2.5621, "step": 162530 }, { "epoch": 0.323815823026903, "grad_norm": 0.14022722840309143, "learning_rate": 0.002, "loss": 2.5653, "step": 162540 }, { "epoch": 0.3238357452505419, "grad_norm": 0.15779195725917816, "learning_rate": 0.002, "loss": 2.5674, "step": 162550 }, { "epoch": 0.3238556674741808, "grad_norm": 0.17174236476421356, "learning_rate": 0.002, "loss": 2.5534, "step": 162560 }, { "epoch": 0.3238755896978197, "grad_norm": 0.18397119641304016, "learning_rate": 0.002, "loss": 2.5813, "step": 162570 }, { "epoch": 0.3238955119214586, "grad_norm": 0.18385833501815796, "learning_rate": 0.002, "loss": 2.5593, "step": 162580 }, { "epoch": 0.32391543414509755, "grad_norm": 0.15382120013237, "learning_rate": 0.002, "loss": 2.5589, "step": 162590 }, { "epoch": 0.32393535636873644, "grad_norm": 0.2120542824268341, "learning_rate": 0.002, "loss": 2.5725, "step": 162600 }, { "epoch": 0.3239552785923754, "grad_norm": 0.15963396430015564, "learning_rate": 0.002, "loss": 2.5745, "step": 162610 }, { "epoch": 0.3239752008160143, "grad_norm": 0.18724410235881805, "learning_rate": 0.002, "loss": 2.5573, "step": 162620 }, { "epoch": 0.3239951230396532, "grad_norm": 0.15022271871566772, "learning_rate": 0.002, "loss": 2.5476, "step": 162630 }, { "epoch": 0.3240150452632921, "grad_norm": 0.23538263142108917, "learning_rate": 0.002, "loss": 2.5585, "step": 162640 }, { "epoch": 0.324034967486931, "grad_norm": 0.15164020657539368, "learning_rate": 0.002, "loss": 2.562, "step": 162650 }, { "epoch": 0.32405488971056995, "grad_norm": 0.15678104758262634, "learning_rate": 0.002, "loss": 2.5551, "step": 162660 }, { "epoch": 0.32407481193420884, "grad_norm": 0.19986861944198608, "learning_rate": 0.002, "loss": 2.556, "step": 162670 }, { "epoch": 0.3240947341578478, "grad_norm": 0.16149389743804932, "learning_rate": 0.002, "loss": 2.5608, "step": 162680 }, { "epoch": 0.3241146563814867, "grad_norm": 0.16482795774936676, "learning_rate": 0.002, "loss": 2.5576, "step": 162690 }, { "epoch": 0.32413457860512557, "grad_norm": 0.16811642050743103, "learning_rate": 0.002, "loss": 2.5546, "step": 162700 }, { "epoch": 0.3241545008287645, "grad_norm": 0.17927244305610657, "learning_rate": 0.002, "loss": 2.5635, "step": 162710 }, { "epoch": 0.3241744230524034, "grad_norm": 0.15446670353412628, "learning_rate": 0.002, "loss": 2.5636, "step": 162720 }, { "epoch": 0.32419434527604235, "grad_norm": 0.16715742647647858, "learning_rate": 0.002, "loss": 2.5699, "step": 162730 }, { "epoch": 0.32421426749968124, "grad_norm": 0.1647413969039917, "learning_rate": 0.002, "loss": 2.5693, "step": 162740 }, { "epoch": 0.32423418972332013, "grad_norm": 0.15459774434566498, "learning_rate": 0.002, "loss": 2.5511, "step": 162750 }, { "epoch": 0.3242541119469591, "grad_norm": 0.17088830471038818, "learning_rate": 0.002, "loss": 2.555, "step": 162760 }, { "epoch": 0.32427403417059797, "grad_norm": 0.21095037460327148, "learning_rate": 0.002, "loss": 2.5726, "step": 162770 }, { "epoch": 0.3242939563942369, "grad_norm": 0.13525152206420898, "learning_rate": 0.002, "loss": 2.5623, "step": 162780 }, { "epoch": 0.3243138786178758, "grad_norm": 0.19071133434772491, "learning_rate": 0.002, "loss": 2.5691, "step": 162790 }, { "epoch": 0.32433380084151475, "grad_norm": 0.14544813334941864, "learning_rate": 0.002, "loss": 2.5633, "step": 162800 }, { "epoch": 0.32435372306515364, "grad_norm": 0.17698293924331665, "learning_rate": 0.002, "loss": 2.5658, "step": 162810 }, { "epoch": 0.32437364528879253, "grad_norm": 0.15665556490421295, "learning_rate": 0.002, "loss": 2.5599, "step": 162820 }, { "epoch": 0.3243935675124315, "grad_norm": 0.14459146559238434, "learning_rate": 0.002, "loss": 2.5719, "step": 162830 }, { "epoch": 0.32441348973607037, "grad_norm": 0.1699252426624298, "learning_rate": 0.002, "loss": 2.5645, "step": 162840 }, { "epoch": 0.3244334119597093, "grad_norm": 0.16279183328151703, "learning_rate": 0.002, "loss": 2.5661, "step": 162850 }, { "epoch": 0.3244533341833482, "grad_norm": 0.17729797959327698, "learning_rate": 0.002, "loss": 2.5643, "step": 162860 }, { "epoch": 0.3244732564069871, "grad_norm": 0.16589146852493286, "learning_rate": 0.002, "loss": 2.561, "step": 162870 }, { "epoch": 0.32449317863062604, "grad_norm": 0.17234204709529877, "learning_rate": 0.002, "loss": 2.5538, "step": 162880 }, { "epoch": 0.32451310085426494, "grad_norm": 0.18661926686763763, "learning_rate": 0.002, "loss": 2.5618, "step": 162890 }, { "epoch": 0.3245330230779039, "grad_norm": 0.18733635544776917, "learning_rate": 0.002, "loss": 2.569, "step": 162900 }, { "epoch": 0.3245529453015428, "grad_norm": 0.18456237018108368, "learning_rate": 0.002, "loss": 2.5811, "step": 162910 }, { "epoch": 0.32457286752518166, "grad_norm": 0.17040608823299408, "learning_rate": 0.002, "loss": 2.5674, "step": 162920 }, { "epoch": 0.3245927897488206, "grad_norm": 0.15795059502124786, "learning_rate": 0.002, "loss": 2.5622, "step": 162930 }, { "epoch": 0.3246127119724595, "grad_norm": 0.1541837900876999, "learning_rate": 0.002, "loss": 2.561, "step": 162940 }, { "epoch": 0.32463263419609845, "grad_norm": 0.19153982400894165, "learning_rate": 0.002, "loss": 2.5693, "step": 162950 }, { "epoch": 0.32465255641973734, "grad_norm": 0.16320371627807617, "learning_rate": 0.002, "loss": 2.5632, "step": 162960 }, { "epoch": 0.3246724786433763, "grad_norm": 0.19301657378673553, "learning_rate": 0.002, "loss": 2.5543, "step": 162970 }, { "epoch": 0.3246924008670152, "grad_norm": 0.16519814729690552, "learning_rate": 0.002, "loss": 2.5665, "step": 162980 }, { "epoch": 0.32471232309065406, "grad_norm": 0.16339823603630066, "learning_rate": 0.002, "loss": 2.5617, "step": 162990 }, { "epoch": 0.324732245314293, "grad_norm": 0.18449726700782776, "learning_rate": 0.002, "loss": 2.5614, "step": 163000 }, { "epoch": 0.3247521675379319, "grad_norm": 0.1791008859872818, "learning_rate": 0.002, "loss": 2.5663, "step": 163010 }, { "epoch": 0.32477208976157085, "grad_norm": 0.1656438559293747, "learning_rate": 0.002, "loss": 2.5592, "step": 163020 }, { "epoch": 0.32479201198520974, "grad_norm": 0.2020069807767868, "learning_rate": 0.002, "loss": 2.5665, "step": 163030 }, { "epoch": 0.32481193420884863, "grad_norm": 0.15380528569221497, "learning_rate": 0.002, "loss": 2.5536, "step": 163040 }, { "epoch": 0.3248318564324876, "grad_norm": 0.17292553186416626, "learning_rate": 0.002, "loss": 2.5583, "step": 163050 }, { "epoch": 0.32485177865612647, "grad_norm": 0.1566876769065857, "learning_rate": 0.002, "loss": 2.5546, "step": 163060 }, { "epoch": 0.3248717008797654, "grad_norm": 0.1752852350473404, "learning_rate": 0.002, "loss": 2.5657, "step": 163070 }, { "epoch": 0.3248916231034043, "grad_norm": 0.17874987423419952, "learning_rate": 0.002, "loss": 2.5442, "step": 163080 }, { "epoch": 0.32491154532704325, "grad_norm": 0.180388405919075, "learning_rate": 0.002, "loss": 2.571, "step": 163090 }, { "epoch": 0.32493146755068214, "grad_norm": 0.19597750902175903, "learning_rate": 0.002, "loss": 2.5518, "step": 163100 }, { "epoch": 0.32495138977432103, "grad_norm": 0.15348102152347565, "learning_rate": 0.002, "loss": 2.5682, "step": 163110 }, { "epoch": 0.32497131199796, "grad_norm": 0.17851348221302032, "learning_rate": 0.002, "loss": 2.5623, "step": 163120 }, { "epoch": 0.32499123422159887, "grad_norm": 0.1658497005701065, "learning_rate": 0.002, "loss": 2.5699, "step": 163130 }, { "epoch": 0.3250111564452378, "grad_norm": 0.15855352580547333, "learning_rate": 0.002, "loss": 2.5687, "step": 163140 }, { "epoch": 0.3250310786688767, "grad_norm": 0.2073485553264618, "learning_rate": 0.002, "loss": 2.5708, "step": 163150 }, { "epoch": 0.3250510008925156, "grad_norm": 0.16672132909297943, "learning_rate": 0.002, "loss": 2.5628, "step": 163160 }, { "epoch": 0.32507092311615454, "grad_norm": 0.14955954253673553, "learning_rate": 0.002, "loss": 2.5739, "step": 163170 }, { "epoch": 0.32509084533979343, "grad_norm": 0.16155362129211426, "learning_rate": 0.002, "loss": 2.5559, "step": 163180 }, { "epoch": 0.3251107675634324, "grad_norm": 0.1587754338979721, "learning_rate": 0.002, "loss": 2.549, "step": 163190 }, { "epoch": 0.32513068978707127, "grad_norm": 0.16146379709243774, "learning_rate": 0.002, "loss": 2.5658, "step": 163200 }, { "epoch": 0.32515061201071016, "grad_norm": 0.1602058857679367, "learning_rate": 0.002, "loss": 2.5474, "step": 163210 }, { "epoch": 0.3251705342343491, "grad_norm": 0.18086527287960052, "learning_rate": 0.002, "loss": 2.5659, "step": 163220 }, { "epoch": 0.325190456457988, "grad_norm": 0.16411833465099335, "learning_rate": 0.002, "loss": 2.5782, "step": 163230 }, { "epoch": 0.32521037868162694, "grad_norm": 0.16369707882404327, "learning_rate": 0.002, "loss": 2.5618, "step": 163240 }, { "epoch": 0.32523030090526583, "grad_norm": 0.1608780324459076, "learning_rate": 0.002, "loss": 2.5761, "step": 163250 }, { "epoch": 0.3252502231289048, "grad_norm": 0.2289341390132904, "learning_rate": 0.002, "loss": 2.5568, "step": 163260 }, { "epoch": 0.32527014535254367, "grad_norm": 0.17266318202018738, "learning_rate": 0.002, "loss": 2.5463, "step": 163270 }, { "epoch": 0.32529006757618256, "grad_norm": 0.16364389657974243, "learning_rate": 0.002, "loss": 2.5627, "step": 163280 }, { "epoch": 0.3253099897998215, "grad_norm": 0.14825567603111267, "learning_rate": 0.002, "loss": 2.5719, "step": 163290 }, { "epoch": 0.3253299120234604, "grad_norm": 0.21537528932094574, "learning_rate": 0.002, "loss": 2.5577, "step": 163300 }, { "epoch": 0.32534983424709935, "grad_norm": 0.1474008858203888, "learning_rate": 0.002, "loss": 2.564, "step": 163310 }, { "epoch": 0.32536975647073824, "grad_norm": 0.15088625252246857, "learning_rate": 0.002, "loss": 2.5707, "step": 163320 }, { "epoch": 0.3253896786943771, "grad_norm": 0.17471186816692352, "learning_rate": 0.002, "loss": 2.5616, "step": 163330 }, { "epoch": 0.3254096009180161, "grad_norm": 0.16283752024173737, "learning_rate": 0.002, "loss": 2.5751, "step": 163340 }, { "epoch": 0.32542952314165496, "grad_norm": 0.2131931632757187, "learning_rate": 0.002, "loss": 2.5649, "step": 163350 }, { "epoch": 0.3254494453652939, "grad_norm": 0.1414114236831665, "learning_rate": 0.002, "loss": 2.5581, "step": 163360 }, { "epoch": 0.3254693675889328, "grad_norm": 0.1709144413471222, "learning_rate": 0.002, "loss": 2.5679, "step": 163370 }, { "epoch": 0.32548928981257175, "grad_norm": 0.14275206625461578, "learning_rate": 0.002, "loss": 2.5522, "step": 163380 }, { "epoch": 0.32550921203621064, "grad_norm": 0.18925213813781738, "learning_rate": 0.002, "loss": 2.5662, "step": 163390 }, { "epoch": 0.32552913425984953, "grad_norm": 0.15699134767055511, "learning_rate": 0.002, "loss": 2.5598, "step": 163400 }, { "epoch": 0.3255490564834885, "grad_norm": 0.16000552475452423, "learning_rate": 0.002, "loss": 2.5684, "step": 163410 }, { "epoch": 0.32556897870712737, "grad_norm": 0.18626457452774048, "learning_rate": 0.002, "loss": 2.5599, "step": 163420 }, { "epoch": 0.3255889009307663, "grad_norm": 0.1642681211233139, "learning_rate": 0.002, "loss": 2.5659, "step": 163430 }, { "epoch": 0.3256088231544052, "grad_norm": 0.17108015716075897, "learning_rate": 0.002, "loss": 2.5579, "step": 163440 }, { "epoch": 0.3256287453780441, "grad_norm": 0.17632362246513367, "learning_rate": 0.002, "loss": 2.5643, "step": 163450 }, { "epoch": 0.32564866760168304, "grad_norm": 0.1554969698190689, "learning_rate": 0.002, "loss": 2.5615, "step": 163460 }, { "epoch": 0.32566858982532193, "grad_norm": 0.17613239586353302, "learning_rate": 0.002, "loss": 2.5677, "step": 163470 }, { "epoch": 0.3256885120489609, "grad_norm": 0.18370123207569122, "learning_rate": 0.002, "loss": 2.5459, "step": 163480 }, { "epoch": 0.32570843427259977, "grad_norm": 0.21078559756278992, "learning_rate": 0.002, "loss": 2.5629, "step": 163490 }, { "epoch": 0.32572835649623866, "grad_norm": 0.17825095355510712, "learning_rate": 0.002, "loss": 2.5628, "step": 163500 }, { "epoch": 0.3257482787198776, "grad_norm": 0.17014722526073456, "learning_rate": 0.002, "loss": 2.5649, "step": 163510 }, { "epoch": 0.3257682009435165, "grad_norm": 0.14966736733913422, "learning_rate": 0.002, "loss": 2.5582, "step": 163520 }, { "epoch": 0.32578812316715544, "grad_norm": 0.17364618182182312, "learning_rate": 0.002, "loss": 2.5484, "step": 163530 }, { "epoch": 0.32580804539079433, "grad_norm": 0.16451533138751984, "learning_rate": 0.002, "loss": 2.5561, "step": 163540 }, { "epoch": 0.3258279676144333, "grad_norm": 0.20840518176555634, "learning_rate": 0.002, "loss": 2.5702, "step": 163550 }, { "epoch": 0.32584788983807217, "grad_norm": 0.16959841549396515, "learning_rate": 0.002, "loss": 2.5558, "step": 163560 }, { "epoch": 0.32586781206171106, "grad_norm": 0.20557215809822083, "learning_rate": 0.002, "loss": 2.555, "step": 163570 }, { "epoch": 0.32588773428535, "grad_norm": 0.17820978164672852, "learning_rate": 0.002, "loss": 2.5575, "step": 163580 }, { "epoch": 0.3259076565089889, "grad_norm": 0.15616720914840698, "learning_rate": 0.002, "loss": 2.5676, "step": 163590 }, { "epoch": 0.32592757873262784, "grad_norm": 0.16076341271400452, "learning_rate": 0.002, "loss": 2.5601, "step": 163600 }, { "epoch": 0.32594750095626673, "grad_norm": 0.18754942715168, "learning_rate": 0.002, "loss": 2.5797, "step": 163610 }, { "epoch": 0.3259674231799056, "grad_norm": 0.169357568025589, "learning_rate": 0.002, "loss": 2.5581, "step": 163620 }, { "epoch": 0.32598734540354457, "grad_norm": 0.18356479704380035, "learning_rate": 0.002, "loss": 2.5613, "step": 163630 }, { "epoch": 0.32600726762718346, "grad_norm": 0.1670588105916977, "learning_rate": 0.002, "loss": 2.5774, "step": 163640 }, { "epoch": 0.3260271898508224, "grad_norm": 0.15876083076000214, "learning_rate": 0.002, "loss": 2.5721, "step": 163650 }, { "epoch": 0.3260471120744613, "grad_norm": 0.23085816204547882, "learning_rate": 0.002, "loss": 2.562, "step": 163660 }, { "epoch": 0.3260670342981002, "grad_norm": 0.1457938849925995, "learning_rate": 0.002, "loss": 2.5662, "step": 163670 }, { "epoch": 0.32608695652173914, "grad_norm": 0.17690645158290863, "learning_rate": 0.002, "loss": 2.5473, "step": 163680 }, { "epoch": 0.326106878745378, "grad_norm": 0.1699766218662262, "learning_rate": 0.002, "loss": 2.5647, "step": 163690 }, { "epoch": 0.32612680096901697, "grad_norm": 0.1867230236530304, "learning_rate": 0.002, "loss": 2.5687, "step": 163700 }, { "epoch": 0.32614672319265586, "grad_norm": 0.1457536518573761, "learning_rate": 0.002, "loss": 2.5595, "step": 163710 }, { "epoch": 0.3261666454162948, "grad_norm": 0.155457004904747, "learning_rate": 0.002, "loss": 2.5805, "step": 163720 }, { "epoch": 0.3261865676399337, "grad_norm": 0.1923408806324005, "learning_rate": 0.002, "loss": 2.5553, "step": 163730 }, { "epoch": 0.3262064898635726, "grad_norm": 0.19410590827465057, "learning_rate": 0.002, "loss": 2.5783, "step": 163740 }, { "epoch": 0.32622641208721154, "grad_norm": 0.18656380474567413, "learning_rate": 0.002, "loss": 2.5717, "step": 163750 }, { "epoch": 0.3262463343108504, "grad_norm": 0.172644704580307, "learning_rate": 0.002, "loss": 2.5597, "step": 163760 }, { "epoch": 0.3262662565344894, "grad_norm": 0.15723086893558502, "learning_rate": 0.002, "loss": 2.5507, "step": 163770 }, { "epoch": 0.32628617875812826, "grad_norm": 0.19014938175678253, "learning_rate": 0.002, "loss": 2.5481, "step": 163780 }, { "epoch": 0.32630610098176716, "grad_norm": 0.16524066030979156, "learning_rate": 0.002, "loss": 2.5619, "step": 163790 }, { "epoch": 0.3263260232054061, "grad_norm": 0.14776895940303802, "learning_rate": 0.002, "loss": 2.5659, "step": 163800 }, { "epoch": 0.326345945429045, "grad_norm": 0.18199895322322845, "learning_rate": 0.002, "loss": 2.5559, "step": 163810 }, { "epoch": 0.32636586765268394, "grad_norm": 0.17446599900722504, "learning_rate": 0.002, "loss": 2.566, "step": 163820 }, { "epoch": 0.32638578987632283, "grad_norm": 0.13193120062351227, "learning_rate": 0.002, "loss": 2.5652, "step": 163830 }, { "epoch": 0.3264057120999618, "grad_norm": 0.17271633446216583, "learning_rate": 0.002, "loss": 2.559, "step": 163840 }, { "epoch": 0.32642563432360067, "grad_norm": 0.16014260053634644, "learning_rate": 0.002, "loss": 2.5614, "step": 163850 }, { "epoch": 0.32644555654723956, "grad_norm": 0.18041664361953735, "learning_rate": 0.002, "loss": 2.5672, "step": 163860 }, { "epoch": 0.3264654787708785, "grad_norm": 0.1487799882888794, "learning_rate": 0.002, "loss": 2.5551, "step": 163870 }, { "epoch": 0.3264854009945174, "grad_norm": 0.18322855234146118, "learning_rate": 0.002, "loss": 2.5717, "step": 163880 }, { "epoch": 0.32650532321815634, "grad_norm": 0.17693190276622772, "learning_rate": 0.002, "loss": 2.5582, "step": 163890 }, { "epoch": 0.32652524544179523, "grad_norm": 0.19506336748600006, "learning_rate": 0.002, "loss": 2.5706, "step": 163900 }, { "epoch": 0.3265451676654341, "grad_norm": 0.15042205154895782, "learning_rate": 0.002, "loss": 2.5647, "step": 163910 }, { "epoch": 0.32656508988907307, "grad_norm": 0.18745499849319458, "learning_rate": 0.002, "loss": 2.5726, "step": 163920 }, { "epoch": 0.32658501211271196, "grad_norm": 0.1661699116230011, "learning_rate": 0.002, "loss": 2.5698, "step": 163930 }, { "epoch": 0.3266049343363509, "grad_norm": 0.2094891518354416, "learning_rate": 0.002, "loss": 2.568, "step": 163940 }, { "epoch": 0.3266248565599898, "grad_norm": 0.17093966901302338, "learning_rate": 0.002, "loss": 2.544, "step": 163950 }, { "epoch": 0.3266447787836287, "grad_norm": 0.14651097357273102, "learning_rate": 0.002, "loss": 2.5449, "step": 163960 }, { "epoch": 0.32666470100726763, "grad_norm": 0.18133744597434998, "learning_rate": 0.002, "loss": 2.5634, "step": 163970 }, { "epoch": 0.3266846232309065, "grad_norm": 0.18377627432346344, "learning_rate": 0.002, "loss": 2.5581, "step": 163980 }, { "epoch": 0.32670454545454547, "grad_norm": 0.17437340319156647, "learning_rate": 0.002, "loss": 2.5538, "step": 163990 }, { "epoch": 0.32672446767818436, "grad_norm": 0.17595337331295013, "learning_rate": 0.002, "loss": 2.554, "step": 164000 }, { "epoch": 0.3267443899018233, "grad_norm": 0.1348109394311905, "learning_rate": 0.002, "loss": 2.5418, "step": 164010 }, { "epoch": 0.3267643121254622, "grad_norm": 0.1575264185667038, "learning_rate": 0.002, "loss": 2.5519, "step": 164020 }, { "epoch": 0.3267842343491011, "grad_norm": 0.1579340100288391, "learning_rate": 0.002, "loss": 2.5513, "step": 164030 }, { "epoch": 0.32680415657274003, "grad_norm": 0.18567629158496857, "learning_rate": 0.002, "loss": 2.5473, "step": 164040 }, { "epoch": 0.3268240787963789, "grad_norm": 0.1833738535642624, "learning_rate": 0.002, "loss": 2.5503, "step": 164050 }, { "epoch": 0.32684400102001787, "grad_norm": 0.16195568442344666, "learning_rate": 0.002, "loss": 2.5665, "step": 164060 }, { "epoch": 0.32686392324365676, "grad_norm": 0.14978180825710297, "learning_rate": 0.002, "loss": 2.56, "step": 164070 }, { "epoch": 0.32688384546729565, "grad_norm": 0.1832910180091858, "learning_rate": 0.002, "loss": 2.5367, "step": 164080 }, { "epoch": 0.3269037676909346, "grad_norm": 0.2201685905456543, "learning_rate": 0.002, "loss": 2.5721, "step": 164090 }, { "epoch": 0.3269236899145735, "grad_norm": 0.16999822854995728, "learning_rate": 0.002, "loss": 2.568, "step": 164100 }, { "epoch": 0.32694361213821244, "grad_norm": 0.17064905166625977, "learning_rate": 0.002, "loss": 2.568, "step": 164110 }, { "epoch": 0.3269635343618513, "grad_norm": 0.16790395975112915, "learning_rate": 0.002, "loss": 2.5721, "step": 164120 }, { "epoch": 0.3269834565854903, "grad_norm": 0.15642863512039185, "learning_rate": 0.002, "loss": 2.5679, "step": 164130 }, { "epoch": 0.32700337880912916, "grad_norm": 0.16218367218971252, "learning_rate": 0.002, "loss": 2.5534, "step": 164140 }, { "epoch": 0.32702330103276805, "grad_norm": 0.1607675701379776, "learning_rate": 0.002, "loss": 2.5618, "step": 164150 }, { "epoch": 0.327043223256407, "grad_norm": 0.15506386756896973, "learning_rate": 0.002, "loss": 2.5612, "step": 164160 }, { "epoch": 0.3270631454800459, "grad_norm": 0.2157905399799347, "learning_rate": 0.002, "loss": 2.5655, "step": 164170 }, { "epoch": 0.32708306770368484, "grad_norm": 0.15665921568870544, "learning_rate": 0.002, "loss": 2.5699, "step": 164180 }, { "epoch": 0.32710298992732373, "grad_norm": 0.1408725380897522, "learning_rate": 0.002, "loss": 2.5529, "step": 164190 }, { "epoch": 0.3271229121509626, "grad_norm": 0.15849225223064423, "learning_rate": 0.002, "loss": 2.554, "step": 164200 }, { "epoch": 0.32714283437460157, "grad_norm": 0.17467860877513885, "learning_rate": 0.002, "loss": 2.5553, "step": 164210 }, { "epoch": 0.32716275659824046, "grad_norm": 0.1735021322965622, "learning_rate": 0.002, "loss": 2.5748, "step": 164220 }, { "epoch": 0.3271826788218794, "grad_norm": 0.15257009863853455, "learning_rate": 0.002, "loss": 2.5545, "step": 164230 }, { "epoch": 0.3272026010455183, "grad_norm": 0.16362741589546204, "learning_rate": 0.002, "loss": 2.5575, "step": 164240 }, { "epoch": 0.3272225232691572, "grad_norm": 0.18548472225666046, "learning_rate": 0.002, "loss": 2.5622, "step": 164250 }, { "epoch": 0.32724244549279613, "grad_norm": 0.14154717326164246, "learning_rate": 0.002, "loss": 2.5577, "step": 164260 }, { "epoch": 0.327262367716435, "grad_norm": 0.15881270170211792, "learning_rate": 0.002, "loss": 2.5495, "step": 164270 }, { "epoch": 0.32728228994007397, "grad_norm": 0.15656021237373352, "learning_rate": 0.002, "loss": 2.5651, "step": 164280 }, { "epoch": 0.32730221216371286, "grad_norm": 0.17449699342250824, "learning_rate": 0.002, "loss": 2.5663, "step": 164290 }, { "epoch": 0.3273221343873518, "grad_norm": 0.19980092346668243, "learning_rate": 0.002, "loss": 2.5514, "step": 164300 }, { "epoch": 0.3273420566109907, "grad_norm": 0.17907899618148804, "learning_rate": 0.002, "loss": 2.5529, "step": 164310 }, { "epoch": 0.3273619788346296, "grad_norm": 0.1566634327173233, "learning_rate": 0.002, "loss": 2.5709, "step": 164320 }, { "epoch": 0.32738190105826853, "grad_norm": 0.1999489963054657, "learning_rate": 0.002, "loss": 2.5548, "step": 164330 }, { "epoch": 0.3274018232819074, "grad_norm": 0.1551847904920578, "learning_rate": 0.002, "loss": 2.5459, "step": 164340 }, { "epoch": 0.32742174550554637, "grad_norm": 0.1714172661304474, "learning_rate": 0.002, "loss": 2.5684, "step": 164350 }, { "epoch": 0.32744166772918526, "grad_norm": 0.1570586860179901, "learning_rate": 0.002, "loss": 2.559, "step": 164360 }, { "epoch": 0.32746158995282415, "grad_norm": 0.1991429477930069, "learning_rate": 0.002, "loss": 2.5558, "step": 164370 }, { "epoch": 0.3274815121764631, "grad_norm": 0.1659899801015854, "learning_rate": 0.002, "loss": 2.5445, "step": 164380 }, { "epoch": 0.327501434400102, "grad_norm": 0.15607678890228271, "learning_rate": 0.002, "loss": 2.577, "step": 164390 }, { "epoch": 0.32752135662374093, "grad_norm": 0.16496442258358002, "learning_rate": 0.002, "loss": 2.5594, "step": 164400 }, { "epoch": 0.3275412788473798, "grad_norm": 0.20787391066551208, "learning_rate": 0.002, "loss": 2.5716, "step": 164410 }, { "epoch": 0.32756120107101877, "grad_norm": 0.15518802404403687, "learning_rate": 0.002, "loss": 2.5629, "step": 164420 }, { "epoch": 0.32758112329465766, "grad_norm": 0.1614619940519333, "learning_rate": 0.002, "loss": 2.5762, "step": 164430 }, { "epoch": 0.32760104551829655, "grad_norm": 0.16259980201721191, "learning_rate": 0.002, "loss": 2.5484, "step": 164440 }, { "epoch": 0.3276209677419355, "grad_norm": 0.23360346257686615, "learning_rate": 0.002, "loss": 2.5586, "step": 164450 }, { "epoch": 0.3276408899655744, "grad_norm": 0.17854426801204681, "learning_rate": 0.002, "loss": 2.5629, "step": 164460 }, { "epoch": 0.32766081218921334, "grad_norm": 0.1355261355638504, "learning_rate": 0.002, "loss": 2.548, "step": 164470 }, { "epoch": 0.3276807344128522, "grad_norm": 0.18340638279914856, "learning_rate": 0.002, "loss": 2.5478, "step": 164480 }, { "epoch": 0.3277006566364911, "grad_norm": 0.16899387538433075, "learning_rate": 0.002, "loss": 2.5711, "step": 164490 }, { "epoch": 0.32772057886013006, "grad_norm": 0.18675591051578522, "learning_rate": 0.002, "loss": 2.5562, "step": 164500 }, { "epoch": 0.32774050108376895, "grad_norm": 0.1801707148551941, "learning_rate": 0.002, "loss": 2.5573, "step": 164510 }, { "epoch": 0.3277604233074079, "grad_norm": 0.17527596652507782, "learning_rate": 0.002, "loss": 2.578, "step": 164520 }, { "epoch": 0.3277803455310468, "grad_norm": 0.14627908170223236, "learning_rate": 0.002, "loss": 2.5666, "step": 164530 }, { "epoch": 0.3278002677546857, "grad_norm": 0.170656219124794, "learning_rate": 0.002, "loss": 2.5588, "step": 164540 }, { "epoch": 0.3278201899783246, "grad_norm": 0.19801673293113708, "learning_rate": 0.002, "loss": 2.5453, "step": 164550 }, { "epoch": 0.3278401122019635, "grad_norm": 0.16603471338748932, "learning_rate": 0.002, "loss": 2.5566, "step": 164560 }, { "epoch": 0.32786003442560246, "grad_norm": 0.159372478723526, "learning_rate": 0.002, "loss": 2.5581, "step": 164570 }, { "epoch": 0.32787995664924136, "grad_norm": 0.17362543940544128, "learning_rate": 0.002, "loss": 2.5552, "step": 164580 }, { "epoch": 0.3278998788728803, "grad_norm": 0.2109624147415161, "learning_rate": 0.002, "loss": 2.5621, "step": 164590 }, { "epoch": 0.3279198010965192, "grad_norm": 0.18789364397525787, "learning_rate": 0.002, "loss": 2.5397, "step": 164600 }, { "epoch": 0.3279397233201581, "grad_norm": 0.16692988574504852, "learning_rate": 0.002, "loss": 2.5602, "step": 164610 }, { "epoch": 0.32795964554379703, "grad_norm": 0.147538423538208, "learning_rate": 0.002, "loss": 2.5515, "step": 164620 }, { "epoch": 0.3279795677674359, "grad_norm": 0.1602070927619934, "learning_rate": 0.002, "loss": 2.5595, "step": 164630 }, { "epoch": 0.32799948999107487, "grad_norm": 0.17031335830688477, "learning_rate": 0.002, "loss": 2.563, "step": 164640 }, { "epoch": 0.32801941221471376, "grad_norm": 0.16362100839614868, "learning_rate": 0.002, "loss": 2.5553, "step": 164650 }, { "epoch": 0.32803933443835265, "grad_norm": 0.14775089919567108, "learning_rate": 0.002, "loss": 2.5494, "step": 164660 }, { "epoch": 0.3280592566619916, "grad_norm": 0.14787277579307556, "learning_rate": 0.002, "loss": 2.5499, "step": 164670 }, { "epoch": 0.3280791788856305, "grad_norm": 0.19048897922039032, "learning_rate": 0.002, "loss": 2.5636, "step": 164680 }, { "epoch": 0.32809910110926943, "grad_norm": 0.16737881302833557, "learning_rate": 0.002, "loss": 2.5658, "step": 164690 }, { "epoch": 0.3281190233329083, "grad_norm": 0.1441466063261032, "learning_rate": 0.002, "loss": 2.5671, "step": 164700 }, { "epoch": 0.3281389455565472, "grad_norm": 0.17028415203094482, "learning_rate": 0.002, "loss": 2.5597, "step": 164710 }, { "epoch": 0.32815886778018616, "grad_norm": 0.16267463564872742, "learning_rate": 0.002, "loss": 2.5383, "step": 164720 }, { "epoch": 0.32817879000382505, "grad_norm": 0.1710597723722458, "learning_rate": 0.002, "loss": 2.5567, "step": 164730 }, { "epoch": 0.328198712227464, "grad_norm": 0.18861259520053864, "learning_rate": 0.002, "loss": 2.5696, "step": 164740 }, { "epoch": 0.3282186344511029, "grad_norm": 0.2041037380695343, "learning_rate": 0.002, "loss": 2.565, "step": 164750 }, { "epoch": 0.32823855667474183, "grad_norm": 0.1570776402950287, "learning_rate": 0.002, "loss": 2.5662, "step": 164760 }, { "epoch": 0.3282584788983807, "grad_norm": 0.1885172724723816, "learning_rate": 0.002, "loss": 2.5542, "step": 164770 }, { "epoch": 0.3282784011220196, "grad_norm": 0.18642480671405792, "learning_rate": 0.002, "loss": 2.5511, "step": 164780 }, { "epoch": 0.32829832334565856, "grad_norm": 0.16186736524105072, "learning_rate": 0.002, "loss": 2.554, "step": 164790 }, { "epoch": 0.32831824556929745, "grad_norm": 0.18185512721538544, "learning_rate": 0.002, "loss": 2.5606, "step": 164800 }, { "epoch": 0.3283381677929364, "grad_norm": 0.15839412808418274, "learning_rate": 0.002, "loss": 2.5578, "step": 164810 }, { "epoch": 0.3283580900165753, "grad_norm": 0.20376832783222198, "learning_rate": 0.002, "loss": 2.5624, "step": 164820 }, { "epoch": 0.3283780122402142, "grad_norm": 0.14744824171066284, "learning_rate": 0.002, "loss": 2.562, "step": 164830 }, { "epoch": 0.3283979344638531, "grad_norm": 0.16499511897563934, "learning_rate": 0.002, "loss": 2.5433, "step": 164840 }, { "epoch": 0.328417856687492, "grad_norm": 0.17972183227539062, "learning_rate": 0.002, "loss": 2.5669, "step": 164850 }, { "epoch": 0.32843777891113096, "grad_norm": 0.17168620228767395, "learning_rate": 0.002, "loss": 2.5575, "step": 164860 }, { "epoch": 0.32845770113476985, "grad_norm": 0.1548077017068863, "learning_rate": 0.002, "loss": 2.5513, "step": 164870 }, { "epoch": 0.3284776233584088, "grad_norm": 0.16235721111297607, "learning_rate": 0.002, "loss": 2.5593, "step": 164880 }, { "epoch": 0.3284975455820477, "grad_norm": 0.19162702560424805, "learning_rate": 0.002, "loss": 2.5637, "step": 164890 }, { "epoch": 0.3285174678056866, "grad_norm": 0.16322427988052368, "learning_rate": 0.002, "loss": 2.5497, "step": 164900 }, { "epoch": 0.3285373900293255, "grad_norm": 0.1824915111064911, "learning_rate": 0.002, "loss": 2.5714, "step": 164910 }, { "epoch": 0.3285573122529644, "grad_norm": 0.18457850813865662, "learning_rate": 0.002, "loss": 2.5819, "step": 164920 }, { "epoch": 0.32857723447660336, "grad_norm": 0.1960083395242691, "learning_rate": 0.002, "loss": 2.5621, "step": 164930 }, { "epoch": 0.32859715670024225, "grad_norm": 0.16378234326839447, "learning_rate": 0.002, "loss": 2.5632, "step": 164940 }, { "epoch": 0.32861707892388115, "grad_norm": 0.17821508646011353, "learning_rate": 0.002, "loss": 2.55, "step": 164950 }, { "epoch": 0.3286370011475201, "grad_norm": 0.1505291908979416, "learning_rate": 0.002, "loss": 2.574, "step": 164960 }, { "epoch": 0.328656923371159, "grad_norm": 0.15430377423763275, "learning_rate": 0.002, "loss": 2.5541, "step": 164970 }, { "epoch": 0.32867684559479793, "grad_norm": 0.1810346245765686, "learning_rate": 0.002, "loss": 2.5611, "step": 164980 }, { "epoch": 0.3286967678184368, "grad_norm": 0.16782191395759583, "learning_rate": 0.002, "loss": 2.5588, "step": 164990 }, { "epoch": 0.3287166900420757, "grad_norm": 0.2014903575181961, "learning_rate": 0.002, "loss": 2.558, "step": 165000 }, { "epoch": 0.32873661226571466, "grad_norm": 0.14610052108764648, "learning_rate": 0.002, "loss": 2.5742, "step": 165010 }, { "epoch": 0.32875653448935355, "grad_norm": 0.15425752103328705, "learning_rate": 0.002, "loss": 2.5533, "step": 165020 }, { "epoch": 0.3287764567129925, "grad_norm": 0.1893545240163803, "learning_rate": 0.002, "loss": 2.5734, "step": 165030 }, { "epoch": 0.3287963789366314, "grad_norm": 0.17515969276428223, "learning_rate": 0.002, "loss": 2.5647, "step": 165040 }, { "epoch": 0.32881630116027033, "grad_norm": 0.1878032684326172, "learning_rate": 0.002, "loss": 2.5595, "step": 165050 }, { "epoch": 0.3288362233839092, "grad_norm": 0.1526818871498108, "learning_rate": 0.002, "loss": 2.5499, "step": 165060 }, { "epoch": 0.3288561456075481, "grad_norm": 0.17039456963539124, "learning_rate": 0.002, "loss": 2.5777, "step": 165070 }, { "epoch": 0.32887606783118706, "grad_norm": 0.16569039225578308, "learning_rate": 0.002, "loss": 2.5574, "step": 165080 }, { "epoch": 0.32889599005482595, "grad_norm": 0.19955770671367645, "learning_rate": 0.002, "loss": 2.5658, "step": 165090 }, { "epoch": 0.3289159122784649, "grad_norm": 0.17004406452178955, "learning_rate": 0.002, "loss": 2.5639, "step": 165100 }, { "epoch": 0.3289358345021038, "grad_norm": 0.21335071325302124, "learning_rate": 0.002, "loss": 2.569, "step": 165110 }, { "epoch": 0.3289557567257427, "grad_norm": 0.16892871260643005, "learning_rate": 0.002, "loss": 2.5591, "step": 165120 }, { "epoch": 0.3289756789493816, "grad_norm": 0.15452276170253754, "learning_rate": 0.002, "loss": 2.5669, "step": 165130 }, { "epoch": 0.3289956011730205, "grad_norm": 0.15095050632953644, "learning_rate": 0.002, "loss": 2.5723, "step": 165140 }, { "epoch": 0.32901552339665946, "grad_norm": 0.18644706904888153, "learning_rate": 0.002, "loss": 2.5546, "step": 165150 }, { "epoch": 0.32903544562029835, "grad_norm": 0.17212089896202087, "learning_rate": 0.002, "loss": 2.571, "step": 165160 }, { "epoch": 0.3290553678439373, "grad_norm": 0.16533401608467102, "learning_rate": 0.002, "loss": 2.5768, "step": 165170 }, { "epoch": 0.3290752900675762, "grad_norm": 0.15319305658340454, "learning_rate": 0.002, "loss": 2.547, "step": 165180 }, { "epoch": 0.3290952122912151, "grad_norm": 0.16510196030139923, "learning_rate": 0.002, "loss": 2.5512, "step": 165190 }, { "epoch": 0.329115134514854, "grad_norm": 0.17097041010856628, "learning_rate": 0.002, "loss": 2.5555, "step": 165200 }, { "epoch": 0.3291350567384929, "grad_norm": 0.17966563999652863, "learning_rate": 0.002, "loss": 2.5535, "step": 165210 }, { "epoch": 0.32915497896213186, "grad_norm": 0.1628524363040924, "learning_rate": 0.002, "loss": 2.5735, "step": 165220 }, { "epoch": 0.32917490118577075, "grad_norm": 0.1788908690214157, "learning_rate": 0.002, "loss": 2.5641, "step": 165230 }, { "epoch": 0.32919482340940964, "grad_norm": 0.1627204716205597, "learning_rate": 0.002, "loss": 2.5591, "step": 165240 }, { "epoch": 0.3292147456330486, "grad_norm": 0.15641580522060394, "learning_rate": 0.002, "loss": 2.5621, "step": 165250 }, { "epoch": 0.3292346678566875, "grad_norm": 0.16605500876903534, "learning_rate": 0.002, "loss": 2.5528, "step": 165260 }, { "epoch": 0.3292545900803264, "grad_norm": 0.16745257377624512, "learning_rate": 0.002, "loss": 2.5549, "step": 165270 }, { "epoch": 0.3292745123039653, "grad_norm": 0.1605389267206192, "learning_rate": 0.002, "loss": 2.5689, "step": 165280 }, { "epoch": 0.3292944345276042, "grad_norm": 0.17055849730968475, "learning_rate": 0.002, "loss": 2.554, "step": 165290 }, { "epoch": 0.32931435675124315, "grad_norm": 0.14114944636821747, "learning_rate": 0.002, "loss": 2.5582, "step": 165300 }, { "epoch": 0.32933427897488204, "grad_norm": 0.18555240333080292, "learning_rate": 0.002, "loss": 2.5749, "step": 165310 }, { "epoch": 0.329354201198521, "grad_norm": 0.17927996814250946, "learning_rate": 0.002, "loss": 2.5592, "step": 165320 }, { "epoch": 0.3293741234221599, "grad_norm": 0.16950881481170654, "learning_rate": 0.002, "loss": 2.5493, "step": 165330 }, { "epoch": 0.3293940456457988, "grad_norm": 0.1556909829378128, "learning_rate": 0.002, "loss": 2.5708, "step": 165340 }, { "epoch": 0.3294139678694377, "grad_norm": 0.15319383144378662, "learning_rate": 0.002, "loss": 2.5569, "step": 165350 }, { "epoch": 0.3294338900930766, "grad_norm": 0.16917017102241516, "learning_rate": 0.002, "loss": 2.563, "step": 165360 }, { "epoch": 0.32945381231671556, "grad_norm": 0.15426886081695557, "learning_rate": 0.002, "loss": 2.5686, "step": 165370 }, { "epoch": 0.32947373454035445, "grad_norm": 0.1864193230867386, "learning_rate": 0.002, "loss": 2.5421, "step": 165380 }, { "epoch": 0.3294936567639934, "grad_norm": 0.16220341622829437, "learning_rate": 0.002, "loss": 2.5621, "step": 165390 }, { "epoch": 0.3295135789876323, "grad_norm": 0.18226738274097443, "learning_rate": 0.002, "loss": 2.5632, "step": 165400 }, { "epoch": 0.3295335012112712, "grad_norm": 0.2047717422246933, "learning_rate": 0.002, "loss": 2.5619, "step": 165410 }, { "epoch": 0.3295534234349101, "grad_norm": 0.16502629220485687, "learning_rate": 0.002, "loss": 2.5716, "step": 165420 }, { "epoch": 0.329573345658549, "grad_norm": 0.1510353982448578, "learning_rate": 0.002, "loss": 2.5577, "step": 165430 }, { "epoch": 0.32959326788218796, "grad_norm": 0.15690059959888458, "learning_rate": 0.002, "loss": 2.5581, "step": 165440 }, { "epoch": 0.32961319010582685, "grad_norm": 0.17542771995067596, "learning_rate": 0.002, "loss": 2.5473, "step": 165450 }, { "epoch": 0.32963311232946574, "grad_norm": 0.1670352965593338, "learning_rate": 0.002, "loss": 2.5649, "step": 165460 }, { "epoch": 0.3296530345531047, "grad_norm": 0.1695561408996582, "learning_rate": 0.002, "loss": 2.5586, "step": 165470 }, { "epoch": 0.3296729567767436, "grad_norm": 0.18397532403469086, "learning_rate": 0.002, "loss": 2.5627, "step": 165480 }, { "epoch": 0.3296928790003825, "grad_norm": 0.19246776401996613, "learning_rate": 0.002, "loss": 2.5607, "step": 165490 }, { "epoch": 0.3297128012240214, "grad_norm": 0.17445607483386993, "learning_rate": 0.002, "loss": 2.556, "step": 165500 }, { "epoch": 0.32973272344766036, "grad_norm": 0.2119435966014862, "learning_rate": 0.002, "loss": 2.5734, "step": 165510 }, { "epoch": 0.32975264567129925, "grad_norm": 0.14570599794387817, "learning_rate": 0.002, "loss": 2.5578, "step": 165520 }, { "epoch": 0.32977256789493814, "grad_norm": 0.17381203174591064, "learning_rate": 0.002, "loss": 2.5702, "step": 165530 }, { "epoch": 0.3297924901185771, "grad_norm": 0.17755126953125, "learning_rate": 0.002, "loss": 2.5694, "step": 165540 }, { "epoch": 0.329812412342216, "grad_norm": 0.13846749067306519, "learning_rate": 0.002, "loss": 2.5601, "step": 165550 }, { "epoch": 0.3298323345658549, "grad_norm": 0.19225981831550598, "learning_rate": 0.002, "loss": 2.5497, "step": 165560 }, { "epoch": 0.3298522567894938, "grad_norm": 0.17231595516204834, "learning_rate": 0.002, "loss": 2.55, "step": 165570 }, { "epoch": 0.3298721790131327, "grad_norm": 0.20825223624706268, "learning_rate": 0.002, "loss": 2.5443, "step": 165580 }, { "epoch": 0.32989210123677165, "grad_norm": 0.1848054826259613, "learning_rate": 0.002, "loss": 2.5701, "step": 165590 }, { "epoch": 0.32991202346041054, "grad_norm": 0.14720463752746582, "learning_rate": 0.002, "loss": 2.5599, "step": 165600 }, { "epoch": 0.3299319456840495, "grad_norm": 0.1947196125984192, "learning_rate": 0.002, "loss": 2.5645, "step": 165610 }, { "epoch": 0.3299518679076884, "grad_norm": 0.15799298882484436, "learning_rate": 0.002, "loss": 2.553, "step": 165620 }, { "epoch": 0.3299717901313273, "grad_norm": 0.18742558360099792, "learning_rate": 0.002, "loss": 2.5512, "step": 165630 }, { "epoch": 0.3299917123549662, "grad_norm": 0.16117729246616364, "learning_rate": 0.002, "loss": 2.5544, "step": 165640 }, { "epoch": 0.3300116345786051, "grad_norm": 0.1455935537815094, "learning_rate": 0.002, "loss": 2.5734, "step": 165650 }, { "epoch": 0.33003155680224405, "grad_norm": 0.15693223476409912, "learning_rate": 0.002, "loss": 2.5729, "step": 165660 }, { "epoch": 0.33005147902588294, "grad_norm": 0.16812187433242798, "learning_rate": 0.002, "loss": 2.5419, "step": 165670 }, { "epoch": 0.3300714012495219, "grad_norm": 0.15443715453147888, "learning_rate": 0.002, "loss": 2.5493, "step": 165680 }, { "epoch": 0.3300913234731608, "grad_norm": 0.172388955950737, "learning_rate": 0.002, "loss": 2.555, "step": 165690 }, { "epoch": 0.33011124569679967, "grad_norm": 0.1810944676399231, "learning_rate": 0.002, "loss": 2.5608, "step": 165700 }, { "epoch": 0.3301311679204386, "grad_norm": 0.19264589250087738, "learning_rate": 0.002, "loss": 2.5578, "step": 165710 }, { "epoch": 0.3301510901440775, "grad_norm": 0.16770592331886292, "learning_rate": 0.002, "loss": 2.5467, "step": 165720 }, { "epoch": 0.33017101236771645, "grad_norm": 0.18176418542861938, "learning_rate": 0.002, "loss": 2.5632, "step": 165730 }, { "epoch": 0.33019093459135535, "grad_norm": 0.15017111599445343, "learning_rate": 0.002, "loss": 2.5715, "step": 165740 }, { "epoch": 0.33021085681499424, "grad_norm": 0.15026763081550598, "learning_rate": 0.002, "loss": 2.5646, "step": 165750 }, { "epoch": 0.3302307790386332, "grad_norm": 0.19802939891815186, "learning_rate": 0.002, "loss": 2.5537, "step": 165760 }, { "epoch": 0.3302507012622721, "grad_norm": 0.16941478848457336, "learning_rate": 0.002, "loss": 2.5481, "step": 165770 }, { "epoch": 0.330270623485911, "grad_norm": 0.18292154371738434, "learning_rate": 0.002, "loss": 2.5729, "step": 165780 }, { "epoch": 0.3302905457095499, "grad_norm": 0.15012504160404205, "learning_rate": 0.002, "loss": 2.5715, "step": 165790 }, { "epoch": 0.33031046793318886, "grad_norm": 0.15393303334712982, "learning_rate": 0.002, "loss": 2.5524, "step": 165800 }, { "epoch": 0.33033039015682775, "grad_norm": 0.1822497844696045, "learning_rate": 0.002, "loss": 2.5598, "step": 165810 }, { "epoch": 0.33035031238046664, "grad_norm": 0.17520223557949066, "learning_rate": 0.002, "loss": 2.5502, "step": 165820 }, { "epoch": 0.3303702346041056, "grad_norm": 0.1886737197637558, "learning_rate": 0.002, "loss": 2.5634, "step": 165830 }, { "epoch": 0.3303901568277445, "grad_norm": 0.1637725681066513, "learning_rate": 0.002, "loss": 2.5712, "step": 165840 }, { "epoch": 0.3304100790513834, "grad_norm": 0.18308432400226593, "learning_rate": 0.002, "loss": 2.5725, "step": 165850 }, { "epoch": 0.3304300012750223, "grad_norm": 0.1608697772026062, "learning_rate": 0.002, "loss": 2.5615, "step": 165860 }, { "epoch": 0.3304499234986612, "grad_norm": 0.16094769537448883, "learning_rate": 0.002, "loss": 2.5511, "step": 165870 }, { "epoch": 0.33046984572230015, "grad_norm": 0.15327169001102448, "learning_rate": 0.002, "loss": 2.5576, "step": 165880 }, { "epoch": 0.33048976794593904, "grad_norm": 0.17603863775730133, "learning_rate": 0.002, "loss": 2.5518, "step": 165890 }, { "epoch": 0.330509690169578, "grad_norm": 0.14506736397743225, "learning_rate": 0.002, "loss": 2.5621, "step": 165900 }, { "epoch": 0.3305296123932169, "grad_norm": 0.1728547215461731, "learning_rate": 0.002, "loss": 2.5635, "step": 165910 }, { "epoch": 0.3305495346168558, "grad_norm": 0.14729639887809753, "learning_rate": 0.002, "loss": 2.5586, "step": 165920 }, { "epoch": 0.3305694568404947, "grad_norm": 0.17459915578365326, "learning_rate": 0.002, "loss": 2.5461, "step": 165930 }, { "epoch": 0.3305893790641336, "grad_norm": 0.16270753741264343, "learning_rate": 0.002, "loss": 2.5582, "step": 165940 }, { "epoch": 0.33060930128777255, "grad_norm": 0.1614094078540802, "learning_rate": 0.002, "loss": 2.5569, "step": 165950 }, { "epoch": 0.33062922351141144, "grad_norm": 0.1873154193162918, "learning_rate": 0.002, "loss": 2.549, "step": 165960 }, { "epoch": 0.3306491457350504, "grad_norm": 0.20470723509788513, "learning_rate": 0.002, "loss": 2.5671, "step": 165970 }, { "epoch": 0.3306690679586893, "grad_norm": 0.1572018414735794, "learning_rate": 0.002, "loss": 2.5526, "step": 165980 }, { "epoch": 0.33068899018232817, "grad_norm": 0.16422894597053528, "learning_rate": 0.002, "loss": 2.5549, "step": 165990 }, { "epoch": 0.3307089124059671, "grad_norm": 0.1732693463563919, "learning_rate": 0.002, "loss": 2.5662, "step": 166000 }, { "epoch": 0.330728834629606, "grad_norm": 0.14774645864963531, "learning_rate": 0.002, "loss": 2.5566, "step": 166010 }, { "epoch": 0.33074875685324495, "grad_norm": 0.15693379938602448, "learning_rate": 0.002, "loss": 2.5564, "step": 166020 }, { "epoch": 0.33076867907688384, "grad_norm": 0.15435966849327087, "learning_rate": 0.002, "loss": 2.561, "step": 166030 }, { "epoch": 0.33078860130052273, "grad_norm": 0.1916649043560028, "learning_rate": 0.002, "loss": 2.5613, "step": 166040 }, { "epoch": 0.3308085235241617, "grad_norm": 0.14494934678077698, "learning_rate": 0.002, "loss": 2.5683, "step": 166050 }, { "epoch": 0.33082844574780057, "grad_norm": 0.20684672892093658, "learning_rate": 0.002, "loss": 2.5592, "step": 166060 }, { "epoch": 0.3308483679714395, "grad_norm": 0.15061314404010773, "learning_rate": 0.002, "loss": 2.5529, "step": 166070 }, { "epoch": 0.3308682901950784, "grad_norm": 0.1641865074634552, "learning_rate": 0.002, "loss": 2.5702, "step": 166080 }, { "epoch": 0.33088821241871735, "grad_norm": 0.18277420103549957, "learning_rate": 0.002, "loss": 2.5623, "step": 166090 }, { "epoch": 0.33090813464235624, "grad_norm": 0.14470046758651733, "learning_rate": 0.002, "loss": 2.5611, "step": 166100 }, { "epoch": 0.33092805686599513, "grad_norm": 0.17688661813735962, "learning_rate": 0.002, "loss": 2.5583, "step": 166110 }, { "epoch": 0.3309479790896341, "grad_norm": 0.1700563281774521, "learning_rate": 0.002, "loss": 2.5535, "step": 166120 }, { "epoch": 0.33096790131327297, "grad_norm": 0.2162330597639084, "learning_rate": 0.002, "loss": 2.5428, "step": 166130 }, { "epoch": 0.3309878235369119, "grad_norm": 0.16053135693073273, "learning_rate": 0.002, "loss": 2.562, "step": 166140 }, { "epoch": 0.3310077457605508, "grad_norm": 0.17911697924137115, "learning_rate": 0.002, "loss": 2.5679, "step": 166150 }, { "epoch": 0.3310276679841897, "grad_norm": 0.16844195127487183, "learning_rate": 0.002, "loss": 2.5647, "step": 166160 }, { "epoch": 0.33104759020782865, "grad_norm": 0.1712525635957718, "learning_rate": 0.002, "loss": 2.5589, "step": 166170 }, { "epoch": 0.33106751243146754, "grad_norm": 0.1731821447610855, "learning_rate": 0.002, "loss": 2.5544, "step": 166180 }, { "epoch": 0.3310874346551065, "grad_norm": 0.17465592920780182, "learning_rate": 0.002, "loss": 2.5559, "step": 166190 }, { "epoch": 0.3311073568787454, "grad_norm": 0.20603026449680328, "learning_rate": 0.002, "loss": 2.5601, "step": 166200 }, { "epoch": 0.33112727910238426, "grad_norm": 0.19386026263237, "learning_rate": 0.002, "loss": 2.5673, "step": 166210 }, { "epoch": 0.3311472013260232, "grad_norm": 0.1767072230577469, "learning_rate": 0.002, "loss": 2.565, "step": 166220 }, { "epoch": 0.3311671235496621, "grad_norm": 0.16024993360042572, "learning_rate": 0.002, "loss": 2.5685, "step": 166230 }, { "epoch": 0.33118704577330105, "grad_norm": 0.1373317390680313, "learning_rate": 0.002, "loss": 2.5504, "step": 166240 }, { "epoch": 0.33120696799693994, "grad_norm": 0.17171740531921387, "learning_rate": 0.002, "loss": 2.5653, "step": 166250 }, { "epoch": 0.3312268902205789, "grad_norm": 0.1785656064748764, "learning_rate": 0.002, "loss": 2.5648, "step": 166260 }, { "epoch": 0.3312468124442178, "grad_norm": 0.18551896512508392, "learning_rate": 0.002, "loss": 2.5638, "step": 166270 }, { "epoch": 0.33126673466785667, "grad_norm": 0.16642403602600098, "learning_rate": 0.002, "loss": 2.5594, "step": 166280 }, { "epoch": 0.3312866568914956, "grad_norm": 0.15765444934368134, "learning_rate": 0.002, "loss": 2.5543, "step": 166290 }, { "epoch": 0.3313065791151345, "grad_norm": 0.1698213368654251, "learning_rate": 0.002, "loss": 2.5543, "step": 166300 }, { "epoch": 0.33132650133877345, "grad_norm": 0.2041553258895874, "learning_rate": 0.002, "loss": 2.5591, "step": 166310 }, { "epoch": 0.33134642356241234, "grad_norm": 0.1355598419904709, "learning_rate": 0.002, "loss": 2.5598, "step": 166320 }, { "epoch": 0.33136634578605123, "grad_norm": 0.17352961003780365, "learning_rate": 0.002, "loss": 2.5686, "step": 166330 }, { "epoch": 0.3313862680096902, "grad_norm": 0.16026078164577484, "learning_rate": 0.002, "loss": 2.5712, "step": 166340 }, { "epoch": 0.33140619023332907, "grad_norm": 0.14563186466693878, "learning_rate": 0.002, "loss": 2.5705, "step": 166350 }, { "epoch": 0.331426112456968, "grad_norm": 0.174718976020813, "learning_rate": 0.002, "loss": 2.5501, "step": 166360 }, { "epoch": 0.3314460346806069, "grad_norm": 0.17489585280418396, "learning_rate": 0.002, "loss": 2.5815, "step": 166370 }, { "epoch": 0.33146595690424585, "grad_norm": 0.1497519612312317, "learning_rate": 0.002, "loss": 2.5606, "step": 166380 }, { "epoch": 0.33148587912788474, "grad_norm": 0.19798381626605988, "learning_rate": 0.002, "loss": 2.5635, "step": 166390 }, { "epoch": 0.33150580135152363, "grad_norm": 0.15423713624477386, "learning_rate": 0.002, "loss": 2.5615, "step": 166400 }, { "epoch": 0.3315257235751626, "grad_norm": 0.15582935512065887, "learning_rate": 0.002, "loss": 2.5666, "step": 166410 }, { "epoch": 0.33154564579880147, "grad_norm": 0.17201852798461914, "learning_rate": 0.002, "loss": 2.5481, "step": 166420 }, { "epoch": 0.3315655680224404, "grad_norm": 0.16599440574645996, "learning_rate": 0.002, "loss": 2.5643, "step": 166430 }, { "epoch": 0.3315854902460793, "grad_norm": 0.16186165809631348, "learning_rate": 0.002, "loss": 2.5642, "step": 166440 }, { "epoch": 0.3316054124697182, "grad_norm": 0.12979118525981903, "learning_rate": 0.002, "loss": 2.5592, "step": 166450 }, { "epoch": 0.33162533469335714, "grad_norm": 0.17441172897815704, "learning_rate": 0.002, "loss": 2.5581, "step": 166460 }, { "epoch": 0.33164525691699603, "grad_norm": 0.16840030252933502, "learning_rate": 0.002, "loss": 2.5566, "step": 166470 }, { "epoch": 0.331665179140635, "grad_norm": 0.1527387797832489, "learning_rate": 0.002, "loss": 2.5725, "step": 166480 }, { "epoch": 0.33168510136427387, "grad_norm": 0.16487716138362885, "learning_rate": 0.002, "loss": 2.5593, "step": 166490 }, { "epoch": 0.33170502358791276, "grad_norm": 0.14595919847488403, "learning_rate": 0.002, "loss": 2.559, "step": 166500 }, { "epoch": 0.3317249458115517, "grad_norm": 0.17018763720989227, "learning_rate": 0.002, "loss": 2.5611, "step": 166510 }, { "epoch": 0.3317448680351906, "grad_norm": 0.14512692391872406, "learning_rate": 0.002, "loss": 2.5599, "step": 166520 }, { "epoch": 0.33176479025882954, "grad_norm": 0.18039971590042114, "learning_rate": 0.002, "loss": 2.5664, "step": 166530 }, { "epoch": 0.33178471248246844, "grad_norm": 0.17089731991291046, "learning_rate": 0.002, "loss": 2.5487, "step": 166540 }, { "epoch": 0.3318046347061074, "grad_norm": 0.1437951773405075, "learning_rate": 0.002, "loss": 2.568, "step": 166550 }, { "epoch": 0.3318245569297463, "grad_norm": 0.15712012350559235, "learning_rate": 0.002, "loss": 2.5699, "step": 166560 }, { "epoch": 0.33184447915338516, "grad_norm": 0.1682976931333542, "learning_rate": 0.002, "loss": 2.5725, "step": 166570 }, { "epoch": 0.3318644013770241, "grad_norm": 0.17148694396018982, "learning_rate": 0.002, "loss": 2.5655, "step": 166580 }, { "epoch": 0.331884323600663, "grad_norm": 0.16392791271209717, "learning_rate": 0.002, "loss": 2.5502, "step": 166590 }, { "epoch": 0.33190424582430195, "grad_norm": 0.19609037041664124, "learning_rate": 0.002, "loss": 2.5796, "step": 166600 }, { "epoch": 0.33192416804794084, "grad_norm": 0.17069187760353088, "learning_rate": 0.002, "loss": 2.5643, "step": 166610 }, { "epoch": 0.33194409027157973, "grad_norm": 0.15455716848373413, "learning_rate": 0.002, "loss": 2.5575, "step": 166620 }, { "epoch": 0.3319640124952187, "grad_norm": 0.22321777045726776, "learning_rate": 0.002, "loss": 2.577, "step": 166630 }, { "epoch": 0.33198393471885757, "grad_norm": 0.148403599858284, "learning_rate": 0.002, "loss": 2.5652, "step": 166640 }, { "epoch": 0.3320038569424965, "grad_norm": 0.2009030431509018, "learning_rate": 0.002, "loss": 2.5511, "step": 166650 }, { "epoch": 0.3320237791661354, "grad_norm": 0.19407223165035248, "learning_rate": 0.002, "loss": 2.5703, "step": 166660 }, { "epoch": 0.33204370138977435, "grad_norm": 0.15298298001289368, "learning_rate": 0.002, "loss": 2.5605, "step": 166670 }, { "epoch": 0.33206362361341324, "grad_norm": 0.17398880422115326, "learning_rate": 0.002, "loss": 2.571, "step": 166680 }, { "epoch": 0.33208354583705213, "grad_norm": 0.18452930450439453, "learning_rate": 0.002, "loss": 2.5546, "step": 166690 }, { "epoch": 0.3321034680606911, "grad_norm": 0.1620745062828064, "learning_rate": 0.002, "loss": 2.5708, "step": 166700 }, { "epoch": 0.33212339028432997, "grad_norm": 0.16618263721466064, "learning_rate": 0.002, "loss": 2.559, "step": 166710 }, { "epoch": 0.3321433125079689, "grad_norm": 0.15323372185230255, "learning_rate": 0.002, "loss": 2.5654, "step": 166720 }, { "epoch": 0.3321632347316078, "grad_norm": 0.20350489020347595, "learning_rate": 0.002, "loss": 2.543, "step": 166730 }, { "epoch": 0.3321831569552467, "grad_norm": 0.17525614798069, "learning_rate": 0.002, "loss": 2.5738, "step": 166740 }, { "epoch": 0.33220307917888564, "grad_norm": 0.15985484421253204, "learning_rate": 0.002, "loss": 2.5552, "step": 166750 }, { "epoch": 0.33222300140252453, "grad_norm": 0.1608370542526245, "learning_rate": 0.002, "loss": 2.5579, "step": 166760 }, { "epoch": 0.3322429236261635, "grad_norm": 0.20471124351024628, "learning_rate": 0.002, "loss": 2.5542, "step": 166770 }, { "epoch": 0.33226284584980237, "grad_norm": 0.15293554961681366, "learning_rate": 0.002, "loss": 2.5621, "step": 166780 }, { "epoch": 0.33228276807344126, "grad_norm": 0.17625683546066284, "learning_rate": 0.002, "loss": 2.5567, "step": 166790 }, { "epoch": 0.3323026902970802, "grad_norm": 0.16070374846458435, "learning_rate": 0.002, "loss": 2.5542, "step": 166800 }, { "epoch": 0.3323226125207191, "grad_norm": 0.1416037678718567, "learning_rate": 0.002, "loss": 2.5441, "step": 166810 }, { "epoch": 0.33234253474435804, "grad_norm": 0.15421441197395325, "learning_rate": 0.002, "loss": 2.5643, "step": 166820 }, { "epoch": 0.33236245696799693, "grad_norm": 0.1347217559814453, "learning_rate": 0.002, "loss": 2.5591, "step": 166830 }, { "epoch": 0.3323823791916359, "grad_norm": 0.20661067962646484, "learning_rate": 0.002, "loss": 2.5587, "step": 166840 }, { "epoch": 0.33240230141527477, "grad_norm": 0.17939385771751404, "learning_rate": 0.002, "loss": 2.5789, "step": 166850 }, { "epoch": 0.33242222363891366, "grad_norm": 0.1487528383731842, "learning_rate": 0.002, "loss": 2.5726, "step": 166860 }, { "epoch": 0.3324421458625526, "grad_norm": 0.20736292004585266, "learning_rate": 0.002, "loss": 2.5575, "step": 166870 }, { "epoch": 0.3324620680861915, "grad_norm": 0.15824906527996063, "learning_rate": 0.002, "loss": 2.5515, "step": 166880 }, { "epoch": 0.33248199030983044, "grad_norm": 0.17501379549503326, "learning_rate": 0.002, "loss": 2.5471, "step": 166890 }, { "epoch": 0.33250191253346933, "grad_norm": 0.16289523243904114, "learning_rate": 0.002, "loss": 2.5578, "step": 166900 }, { "epoch": 0.3325218347571082, "grad_norm": 0.14662742614746094, "learning_rate": 0.002, "loss": 2.5667, "step": 166910 }, { "epoch": 0.33254175698074717, "grad_norm": 0.16364112496376038, "learning_rate": 0.002, "loss": 2.565, "step": 166920 }, { "epoch": 0.33256167920438606, "grad_norm": 0.21637685596942902, "learning_rate": 0.002, "loss": 2.5729, "step": 166930 }, { "epoch": 0.332581601428025, "grad_norm": 0.17374026775360107, "learning_rate": 0.002, "loss": 2.5681, "step": 166940 }, { "epoch": 0.3326015236516639, "grad_norm": 0.1618415117263794, "learning_rate": 0.002, "loss": 2.5809, "step": 166950 }, { "epoch": 0.3326214458753028, "grad_norm": 0.15582692623138428, "learning_rate": 0.002, "loss": 2.5675, "step": 166960 }, { "epoch": 0.33264136809894174, "grad_norm": 0.15209469199180603, "learning_rate": 0.002, "loss": 2.5701, "step": 166970 }, { "epoch": 0.3326612903225806, "grad_norm": 0.15957440435886383, "learning_rate": 0.002, "loss": 2.5414, "step": 166980 }, { "epoch": 0.3326812125462196, "grad_norm": 0.1501852422952652, "learning_rate": 0.002, "loss": 2.551, "step": 166990 }, { "epoch": 0.33270113476985846, "grad_norm": 0.1682843267917633, "learning_rate": 0.002, "loss": 2.5561, "step": 167000 }, { "epoch": 0.3327210569934974, "grad_norm": 0.18996110558509827, "learning_rate": 0.002, "loss": 2.559, "step": 167010 }, { "epoch": 0.3327409792171363, "grad_norm": 0.16317471861839294, "learning_rate": 0.002, "loss": 2.5633, "step": 167020 }, { "epoch": 0.3327609014407752, "grad_norm": 0.1790846437215805, "learning_rate": 0.002, "loss": 2.5657, "step": 167030 }, { "epoch": 0.33278082366441414, "grad_norm": 0.16294071078300476, "learning_rate": 0.002, "loss": 2.5659, "step": 167040 }, { "epoch": 0.33280074588805303, "grad_norm": 0.16094569861888885, "learning_rate": 0.002, "loss": 2.5747, "step": 167050 }, { "epoch": 0.332820668111692, "grad_norm": 0.16427138447761536, "learning_rate": 0.002, "loss": 2.5687, "step": 167060 }, { "epoch": 0.33284059033533087, "grad_norm": 0.16670580208301544, "learning_rate": 0.002, "loss": 2.5654, "step": 167070 }, { "epoch": 0.33286051255896976, "grad_norm": 0.1685362309217453, "learning_rate": 0.002, "loss": 2.5679, "step": 167080 }, { "epoch": 0.3328804347826087, "grad_norm": 0.17308858036994934, "learning_rate": 0.002, "loss": 2.5446, "step": 167090 }, { "epoch": 0.3329003570062476, "grad_norm": 0.18214119970798492, "learning_rate": 0.002, "loss": 2.5664, "step": 167100 }, { "epoch": 0.33292027922988654, "grad_norm": 0.16021782159805298, "learning_rate": 0.002, "loss": 2.5545, "step": 167110 }, { "epoch": 0.33294020145352543, "grad_norm": 0.17796321213245392, "learning_rate": 0.002, "loss": 2.568, "step": 167120 }, { "epoch": 0.3329601236771644, "grad_norm": 0.15559329092502594, "learning_rate": 0.002, "loss": 2.5665, "step": 167130 }, { "epoch": 0.33298004590080327, "grad_norm": 0.160727396607399, "learning_rate": 0.002, "loss": 2.5511, "step": 167140 }, { "epoch": 0.33299996812444216, "grad_norm": 0.16699044406414032, "learning_rate": 0.002, "loss": 2.5652, "step": 167150 }, { "epoch": 0.3330198903480811, "grad_norm": 0.17152181267738342, "learning_rate": 0.002, "loss": 2.5658, "step": 167160 }, { "epoch": 0.33303981257172, "grad_norm": 0.17481577396392822, "learning_rate": 0.002, "loss": 2.5446, "step": 167170 }, { "epoch": 0.33305973479535894, "grad_norm": 0.18229421973228455, "learning_rate": 0.002, "loss": 2.5681, "step": 167180 }, { "epoch": 0.33307965701899783, "grad_norm": 0.18854685127735138, "learning_rate": 0.002, "loss": 2.5572, "step": 167190 }, { "epoch": 0.3330995792426367, "grad_norm": 0.14749769866466522, "learning_rate": 0.002, "loss": 2.556, "step": 167200 }, { "epoch": 0.33311950146627567, "grad_norm": 0.1796991527080536, "learning_rate": 0.002, "loss": 2.5538, "step": 167210 }, { "epoch": 0.33313942368991456, "grad_norm": 0.14779789745807648, "learning_rate": 0.002, "loss": 2.5586, "step": 167220 }, { "epoch": 0.3331593459135535, "grad_norm": 0.18879561126232147, "learning_rate": 0.002, "loss": 2.5786, "step": 167230 }, { "epoch": 0.3331792681371924, "grad_norm": 0.1434803009033203, "learning_rate": 0.002, "loss": 2.5737, "step": 167240 }, { "epoch": 0.3331991903608313, "grad_norm": 0.19126203656196594, "learning_rate": 0.002, "loss": 2.5716, "step": 167250 }, { "epoch": 0.33321911258447023, "grad_norm": 0.17715716361999512, "learning_rate": 0.002, "loss": 2.5647, "step": 167260 }, { "epoch": 0.3332390348081091, "grad_norm": 0.1614990532398224, "learning_rate": 0.002, "loss": 2.558, "step": 167270 }, { "epoch": 0.33325895703174807, "grad_norm": 0.16061431169509888, "learning_rate": 0.002, "loss": 2.5729, "step": 167280 }, { "epoch": 0.33327887925538696, "grad_norm": 0.17221985757350922, "learning_rate": 0.002, "loss": 2.5636, "step": 167290 }, { "epoch": 0.3332988014790259, "grad_norm": 0.18338868021965027, "learning_rate": 0.002, "loss": 2.5577, "step": 167300 }, { "epoch": 0.3333187237026648, "grad_norm": 0.16859890520572662, "learning_rate": 0.002, "loss": 2.5525, "step": 167310 }, { "epoch": 0.3333386459263037, "grad_norm": 0.18410266935825348, "learning_rate": 0.002, "loss": 2.5669, "step": 167320 }, { "epoch": 0.33335856814994264, "grad_norm": 0.1452624350786209, "learning_rate": 0.002, "loss": 2.5832, "step": 167330 }, { "epoch": 0.3333784903735815, "grad_norm": 0.14527514576911926, "learning_rate": 0.002, "loss": 2.5602, "step": 167340 }, { "epoch": 0.3333984125972205, "grad_norm": 0.23211199045181274, "learning_rate": 0.002, "loss": 2.559, "step": 167350 }, { "epoch": 0.33341833482085936, "grad_norm": 0.1687575727701187, "learning_rate": 0.002, "loss": 2.5546, "step": 167360 }, { "epoch": 0.33343825704449825, "grad_norm": 0.1678168624639511, "learning_rate": 0.002, "loss": 2.5917, "step": 167370 }, { "epoch": 0.3334581792681372, "grad_norm": 0.15730084478855133, "learning_rate": 0.002, "loss": 2.5725, "step": 167380 }, { "epoch": 0.3334781014917761, "grad_norm": 0.15462784469127655, "learning_rate": 0.002, "loss": 2.5671, "step": 167390 }, { "epoch": 0.33349802371541504, "grad_norm": 0.15826262533664703, "learning_rate": 0.002, "loss": 2.5643, "step": 167400 }, { "epoch": 0.33351794593905393, "grad_norm": 0.15708069503307343, "learning_rate": 0.002, "loss": 2.5499, "step": 167410 }, { "epoch": 0.3335378681626929, "grad_norm": 0.18024148046970367, "learning_rate": 0.002, "loss": 2.5707, "step": 167420 }, { "epoch": 0.33355779038633177, "grad_norm": 0.16048569977283478, "learning_rate": 0.002, "loss": 2.5639, "step": 167430 }, { "epoch": 0.33357771260997066, "grad_norm": 0.15191775560379028, "learning_rate": 0.002, "loss": 2.5534, "step": 167440 }, { "epoch": 0.3335976348336096, "grad_norm": 0.16466012597084045, "learning_rate": 0.002, "loss": 2.5679, "step": 167450 }, { "epoch": 0.3336175570572485, "grad_norm": 0.1673206090927124, "learning_rate": 0.002, "loss": 2.5758, "step": 167460 }, { "epoch": 0.33363747928088744, "grad_norm": 0.18486997485160828, "learning_rate": 0.002, "loss": 2.5682, "step": 167470 }, { "epoch": 0.33365740150452633, "grad_norm": 0.18295545876026154, "learning_rate": 0.002, "loss": 2.5505, "step": 167480 }, { "epoch": 0.3336773237281652, "grad_norm": 0.17804549634456635, "learning_rate": 0.002, "loss": 2.5537, "step": 167490 }, { "epoch": 0.33369724595180417, "grad_norm": 0.16292986273765564, "learning_rate": 0.002, "loss": 2.5408, "step": 167500 }, { "epoch": 0.33371716817544306, "grad_norm": 0.16247691214084625, "learning_rate": 0.002, "loss": 2.5838, "step": 167510 }, { "epoch": 0.333737090399082, "grad_norm": 0.1886381059885025, "learning_rate": 0.002, "loss": 2.5628, "step": 167520 }, { "epoch": 0.3337570126227209, "grad_norm": 0.14910061657428741, "learning_rate": 0.002, "loss": 2.5599, "step": 167530 }, { "epoch": 0.3337769348463598, "grad_norm": 0.16257187724113464, "learning_rate": 0.002, "loss": 2.5641, "step": 167540 }, { "epoch": 0.33379685706999873, "grad_norm": 0.1702667623758316, "learning_rate": 0.002, "loss": 2.5554, "step": 167550 }, { "epoch": 0.3338167792936376, "grad_norm": 0.1608332246541977, "learning_rate": 0.002, "loss": 2.5704, "step": 167560 }, { "epoch": 0.33383670151727657, "grad_norm": 0.17995500564575195, "learning_rate": 0.002, "loss": 2.5473, "step": 167570 }, { "epoch": 0.33385662374091546, "grad_norm": 0.178684264421463, "learning_rate": 0.002, "loss": 2.5642, "step": 167580 }, { "epoch": 0.3338765459645544, "grad_norm": 0.15371647477149963, "learning_rate": 0.002, "loss": 2.5684, "step": 167590 }, { "epoch": 0.3338964681881933, "grad_norm": 0.1659175157546997, "learning_rate": 0.002, "loss": 2.55, "step": 167600 }, { "epoch": 0.3339163904118322, "grad_norm": 0.22717630863189697, "learning_rate": 0.002, "loss": 2.5572, "step": 167610 }, { "epoch": 0.33393631263547113, "grad_norm": 0.1465182900428772, "learning_rate": 0.002, "loss": 2.5632, "step": 167620 }, { "epoch": 0.33395623485911, "grad_norm": 0.18843615055084229, "learning_rate": 0.002, "loss": 2.5758, "step": 167630 }, { "epoch": 0.33397615708274897, "grad_norm": 0.15516811609268188, "learning_rate": 0.002, "loss": 2.5642, "step": 167640 }, { "epoch": 0.33399607930638786, "grad_norm": 0.18883982300758362, "learning_rate": 0.002, "loss": 2.5641, "step": 167650 }, { "epoch": 0.33401600153002675, "grad_norm": 0.16725443303585052, "learning_rate": 0.002, "loss": 2.5517, "step": 167660 }, { "epoch": 0.3340359237536657, "grad_norm": 0.1945393830537796, "learning_rate": 0.002, "loss": 2.5603, "step": 167670 }, { "epoch": 0.3340558459773046, "grad_norm": 0.15300069749355316, "learning_rate": 0.002, "loss": 2.556, "step": 167680 }, { "epoch": 0.33407576820094353, "grad_norm": 0.16545894742012024, "learning_rate": 0.002, "loss": 2.5413, "step": 167690 }, { "epoch": 0.3340956904245824, "grad_norm": 0.13682688772678375, "learning_rate": 0.002, "loss": 2.5563, "step": 167700 }, { "epoch": 0.3341156126482213, "grad_norm": 0.1556994467973709, "learning_rate": 0.002, "loss": 2.5638, "step": 167710 }, { "epoch": 0.33413553487186026, "grad_norm": 0.17053784430027008, "learning_rate": 0.002, "loss": 2.5558, "step": 167720 }, { "epoch": 0.33415545709549915, "grad_norm": 0.19416843354701996, "learning_rate": 0.002, "loss": 2.5693, "step": 167730 }, { "epoch": 0.3341753793191381, "grad_norm": 0.16150905191898346, "learning_rate": 0.002, "loss": 2.5664, "step": 167740 }, { "epoch": 0.334195301542777, "grad_norm": 0.14115989208221436, "learning_rate": 0.002, "loss": 2.5563, "step": 167750 }, { "epoch": 0.33421522376641594, "grad_norm": 0.18188805878162384, "learning_rate": 0.002, "loss": 2.5575, "step": 167760 }, { "epoch": 0.3342351459900548, "grad_norm": 0.17756234109401703, "learning_rate": 0.002, "loss": 2.5747, "step": 167770 }, { "epoch": 0.3342550682136937, "grad_norm": 0.16810987889766693, "learning_rate": 0.002, "loss": 2.5656, "step": 167780 }, { "epoch": 0.33427499043733266, "grad_norm": 0.18416865170001984, "learning_rate": 0.002, "loss": 2.569, "step": 167790 }, { "epoch": 0.33429491266097155, "grad_norm": 0.14702317118644714, "learning_rate": 0.002, "loss": 2.5514, "step": 167800 }, { "epoch": 0.3343148348846105, "grad_norm": 0.17598046362400055, "learning_rate": 0.002, "loss": 2.5537, "step": 167810 }, { "epoch": 0.3343347571082494, "grad_norm": 0.16066066920757294, "learning_rate": 0.002, "loss": 2.5586, "step": 167820 }, { "epoch": 0.3343546793318883, "grad_norm": 0.14980551600456238, "learning_rate": 0.002, "loss": 2.5573, "step": 167830 }, { "epoch": 0.33437460155552723, "grad_norm": 0.16295450925827026, "learning_rate": 0.002, "loss": 2.5674, "step": 167840 }, { "epoch": 0.3343945237791661, "grad_norm": 0.20643934607505798, "learning_rate": 0.002, "loss": 2.5654, "step": 167850 }, { "epoch": 0.33441444600280507, "grad_norm": 0.17117689549922943, "learning_rate": 0.002, "loss": 2.5601, "step": 167860 }, { "epoch": 0.33443436822644396, "grad_norm": 0.18470078706741333, "learning_rate": 0.002, "loss": 2.566, "step": 167870 }, { "epoch": 0.3344542904500829, "grad_norm": 0.14756430685520172, "learning_rate": 0.002, "loss": 2.5557, "step": 167880 }, { "epoch": 0.3344742126737218, "grad_norm": 0.20259715616703033, "learning_rate": 0.002, "loss": 2.5749, "step": 167890 }, { "epoch": 0.3344941348973607, "grad_norm": 0.20359788835048676, "learning_rate": 0.002, "loss": 2.5683, "step": 167900 }, { "epoch": 0.33451405712099963, "grad_norm": 0.1578255593776703, "learning_rate": 0.002, "loss": 2.5693, "step": 167910 }, { "epoch": 0.3345339793446385, "grad_norm": 0.16442592442035675, "learning_rate": 0.002, "loss": 2.5559, "step": 167920 }, { "epoch": 0.33455390156827747, "grad_norm": 0.1428544521331787, "learning_rate": 0.002, "loss": 2.5476, "step": 167930 }, { "epoch": 0.33457382379191636, "grad_norm": 0.21415427327156067, "learning_rate": 0.002, "loss": 2.5508, "step": 167940 }, { "epoch": 0.33459374601555525, "grad_norm": 0.21432267129421234, "learning_rate": 0.002, "loss": 2.5568, "step": 167950 }, { "epoch": 0.3346136682391942, "grad_norm": 0.1531028151512146, "learning_rate": 0.002, "loss": 2.5555, "step": 167960 }, { "epoch": 0.3346335904628331, "grad_norm": 0.17261581122875214, "learning_rate": 0.002, "loss": 2.5537, "step": 167970 }, { "epoch": 0.33465351268647203, "grad_norm": 0.2578425407409668, "learning_rate": 0.002, "loss": 2.556, "step": 167980 }, { "epoch": 0.3346734349101109, "grad_norm": 0.1977914422750473, "learning_rate": 0.002, "loss": 2.557, "step": 167990 }, { "epoch": 0.3346933571337498, "grad_norm": 0.17076875269412994, "learning_rate": 0.002, "loss": 2.5599, "step": 168000 }, { "epoch": 0.33471327935738876, "grad_norm": 0.16217640042304993, "learning_rate": 0.002, "loss": 2.5693, "step": 168010 }, { "epoch": 0.33473320158102765, "grad_norm": 0.15474538505077362, "learning_rate": 0.002, "loss": 2.5538, "step": 168020 }, { "epoch": 0.3347531238046666, "grad_norm": 0.16502819955348969, "learning_rate": 0.002, "loss": 2.559, "step": 168030 }, { "epoch": 0.3347730460283055, "grad_norm": 0.17918311059474945, "learning_rate": 0.002, "loss": 2.5559, "step": 168040 }, { "epoch": 0.33479296825194443, "grad_norm": 0.1498228907585144, "learning_rate": 0.002, "loss": 2.5567, "step": 168050 }, { "epoch": 0.3348128904755833, "grad_norm": 0.14841674268245697, "learning_rate": 0.002, "loss": 2.5623, "step": 168060 }, { "epoch": 0.3348328126992222, "grad_norm": 0.18594706058502197, "learning_rate": 0.002, "loss": 2.5592, "step": 168070 }, { "epoch": 0.33485273492286116, "grad_norm": 0.16600510478019714, "learning_rate": 0.002, "loss": 2.5556, "step": 168080 }, { "epoch": 0.33487265714650005, "grad_norm": 0.1719689816236496, "learning_rate": 0.002, "loss": 2.5657, "step": 168090 }, { "epoch": 0.334892579370139, "grad_norm": 0.16986343264579773, "learning_rate": 0.002, "loss": 2.5608, "step": 168100 }, { "epoch": 0.3349125015937779, "grad_norm": 0.15806929767131805, "learning_rate": 0.002, "loss": 2.5683, "step": 168110 }, { "epoch": 0.3349324238174168, "grad_norm": 0.180002823472023, "learning_rate": 0.002, "loss": 2.551, "step": 168120 }, { "epoch": 0.3349523460410557, "grad_norm": 0.16331031918525696, "learning_rate": 0.002, "loss": 2.5709, "step": 168130 }, { "epoch": 0.3349722682646946, "grad_norm": 0.1829807609319687, "learning_rate": 0.002, "loss": 2.5571, "step": 168140 }, { "epoch": 0.33499219048833356, "grad_norm": 0.19200488924980164, "learning_rate": 0.002, "loss": 2.5647, "step": 168150 }, { "epoch": 0.33501211271197245, "grad_norm": 0.17311277985572815, "learning_rate": 0.002, "loss": 2.5585, "step": 168160 }, { "epoch": 0.3350320349356114, "grad_norm": 0.16094107925891876, "learning_rate": 0.002, "loss": 2.5593, "step": 168170 }, { "epoch": 0.3350519571592503, "grad_norm": 0.15255022048950195, "learning_rate": 0.002, "loss": 2.5668, "step": 168180 }, { "epoch": 0.3350718793828892, "grad_norm": 0.16812442243099213, "learning_rate": 0.002, "loss": 2.5621, "step": 168190 }, { "epoch": 0.33509180160652813, "grad_norm": 0.1696658879518509, "learning_rate": 0.002, "loss": 2.5595, "step": 168200 }, { "epoch": 0.335111723830167, "grad_norm": 0.4065704643726349, "learning_rate": 0.002, "loss": 2.5661, "step": 168210 }, { "epoch": 0.33513164605380596, "grad_norm": 0.18133659660816193, "learning_rate": 0.002, "loss": 2.5716, "step": 168220 }, { "epoch": 0.33515156827744486, "grad_norm": 0.14925961196422577, "learning_rate": 0.002, "loss": 2.5683, "step": 168230 }, { "epoch": 0.33517149050108375, "grad_norm": 0.15290671586990356, "learning_rate": 0.002, "loss": 2.5741, "step": 168240 }, { "epoch": 0.3351914127247227, "grad_norm": 0.18467994034290314, "learning_rate": 0.002, "loss": 2.5634, "step": 168250 }, { "epoch": 0.3352113349483616, "grad_norm": 0.1665026694536209, "learning_rate": 0.002, "loss": 2.5551, "step": 168260 }, { "epoch": 0.33523125717200053, "grad_norm": 0.16660644114017487, "learning_rate": 0.002, "loss": 2.5605, "step": 168270 }, { "epoch": 0.3352511793956394, "grad_norm": 0.15776574611663818, "learning_rate": 0.002, "loss": 2.5688, "step": 168280 }, { "epoch": 0.3352711016192783, "grad_norm": 0.1903175711631775, "learning_rate": 0.002, "loss": 2.5541, "step": 168290 }, { "epoch": 0.33529102384291726, "grad_norm": 0.16286778450012207, "learning_rate": 0.002, "loss": 2.5548, "step": 168300 }, { "epoch": 0.33531094606655615, "grad_norm": 0.15653513371944427, "learning_rate": 0.002, "loss": 2.5775, "step": 168310 }, { "epoch": 0.3353308682901951, "grad_norm": 0.19793067872524261, "learning_rate": 0.002, "loss": 2.5585, "step": 168320 }, { "epoch": 0.335350790513834, "grad_norm": 0.16463729739189148, "learning_rate": 0.002, "loss": 2.5749, "step": 168330 }, { "epoch": 0.33537071273747293, "grad_norm": 0.20676589012145996, "learning_rate": 0.002, "loss": 2.5532, "step": 168340 }, { "epoch": 0.3353906349611118, "grad_norm": 0.1497904658317566, "learning_rate": 0.002, "loss": 2.5675, "step": 168350 }, { "epoch": 0.3354105571847507, "grad_norm": 0.17298251390457153, "learning_rate": 0.002, "loss": 2.5642, "step": 168360 }, { "epoch": 0.33543047940838966, "grad_norm": 0.19317489862442017, "learning_rate": 0.002, "loss": 2.5404, "step": 168370 }, { "epoch": 0.33545040163202855, "grad_norm": 0.16254013776779175, "learning_rate": 0.002, "loss": 2.5564, "step": 168380 }, { "epoch": 0.3354703238556675, "grad_norm": 0.20378409326076508, "learning_rate": 0.002, "loss": 2.5492, "step": 168390 }, { "epoch": 0.3354902460793064, "grad_norm": 0.16732485592365265, "learning_rate": 0.002, "loss": 2.566, "step": 168400 }, { "epoch": 0.3355101683029453, "grad_norm": 0.17610792815685272, "learning_rate": 0.002, "loss": 2.5623, "step": 168410 }, { "epoch": 0.3355300905265842, "grad_norm": 0.16206081211566925, "learning_rate": 0.002, "loss": 2.5499, "step": 168420 }, { "epoch": 0.3355500127502231, "grad_norm": 0.16609244048595428, "learning_rate": 0.002, "loss": 2.5633, "step": 168430 }, { "epoch": 0.33556993497386206, "grad_norm": 0.19250740110874176, "learning_rate": 0.002, "loss": 2.5512, "step": 168440 }, { "epoch": 0.33558985719750095, "grad_norm": 0.17041003704071045, "learning_rate": 0.002, "loss": 2.5631, "step": 168450 }, { "epoch": 0.3356097794211399, "grad_norm": 0.17168936133384705, "learning_rate": 0.002, "loss": 2.5451, "step": 168460 }, { "epoch": 0.3356297016447788, "grad_norm": 0.16133950650691986, "learning_rate": 0.002, "loss": 2.571, "step": 168470 }, { "epoch": 0.3356496238684177, "grad_norm": 0.14873912930488586, "learning_rate": 0.002, "loss": 2.5582, "step": 168480 }, { "epoch": 0.3356695460920566, "grad_norm": 0.20897653698921204, "learning_rate": 0.002, "loss": 2.5646, "step": 168490 }, { "epoch": 0.3356894683156955, "grad_norm": 0.17694544792175293, "learning_rate": 0.002, "loss": 2.5723, "step": 168500 }, { "epoch": 0.33570939053933446, "grad_norm": 0.16600508987903595, "learning_rate": 0.002, "loss": 2.559, "step": 168510 }, { "epoch": 0.33572931276297335, "grad_norm": 0.17078089714050293, "learning_rate": 0.002, "loss": 2.5756, "step": 168520 }, { "epoch": 0.33574923498661224, "grad_norm": 0.18937508761882782, "learning_rate": 0.002, "loss": 2.5648, "step": 168530 }, { "epoch": 0.3357691572102512, "grad_norm": 0.15679103136062622, "learning_rate": 0.002, "loss": 2.565, "step": 168540 }, { "epoch": 0.3357890794338901, "grad_norm": 0.1607552468776703, "learning_rate": 0.002, "loss": 2.5485, "step": 168550 }, { "epoch": 0.335809001657529, "grad_norm": 0.16305126249790192, "learning_rate": 0.002, "loss": 2.5623, "step": 168560 }, { "epoch": 0.3358289238811679, "grad_norm": 0.17399798333644867, "learning_rate": 0.002, "loss": 2.5655, "step": 168570 }, { "epoch": 0.3358488461048068, "grad_norm": 0.14427003264427185, "learning_rate": 0.002, "loss": 2.5659, "step": 168580 }, { "epoch": 0.33586876832844575, "grad_norm": 0.2037247121334076, "learning_rate": 0.002, "loss": 2.5687, "step": 168590 }, { "epoch": 0.33588869055208465, "grad_norm": 0.24436472356319427, "learning_rate": 0.002, "loss": 2.5797, "step": 168600 }, { "epoch": 0.3359086127757236, "grad_norm": 0.15289250016212463, "learning_rate": 0.002, "loss": 2.5745, "step": 168610 }, { "epoch": 0.3359285349993625, "grad_norm": 0.182164266705513, "learning_rate": 0.002, "loss": 2.5781, "step": 168620 }, { "epoch": 0.33594845722300143, "grad_norm": 0.1494549661874771, "learning_rate": 0.002, "loss": 2.5839, "step": 168630 }, { "epoch": 0.3359683794466403, "grad_norm": 0.16713248193264008, "learning_rate": 0.002, "loss": 2.5649, "step": 168640 }, { "epoch": 0.3359883016702792, "grad_norm": 0.18021385371685028, "learning_rate": 0.002, "loss": 2.5523, "step": 168650 }, { "epoch": 0.33600822389391816, "grad_norm": 0.15723498165607452, "learning_rate": 0.002, "loss": 2.5633, "step": 168660 }, { "epoch": 0.33602814611755705, "grad_norm": 0.16291742026805878, "learning_rate": 0.002, "loss": 2.555, "step": 168670 }, { "epoch": 0.336048068341196, "grad_norm": 0.15477949380874634, "learning_rate": 0.002, "loss": 2.5677, "step": 168680 }, { "epoch": 0.3360679905648349, "grad_norm": 0.206380233168602, "learning_rate": 0.002, "loss": 2.5584, "step": 168690 }, { "epoch": 0.3360879127884738, "grad_norm": 0.15563061833381653, "learning_rate": 0.002, "loss": 2.5656, "step": 168700 }, { "epoch": 0.3361078350121127, "grad_norm": 0.2014816403388977, "learning_rate": 0.002, "loss": 2.582, "step": 168710 }, { "epoch": 0.3361277572357516, "grad_norm": 0.18628333508968353, "learning_rate": 0.002, "loss": 2.5552, "step": 168720 }, { "epoch": 0.33614767945939056, "grad_norm": 0.15607470273971558, "learning_rate": 0.002, "loss": 2.5587, "step": 168730 }, { "epoch": 0.33616760168302945, "grad_norm": 0.15574026107788086, "learning_rate": 0.002, "loss": 2.5571, "step": 168740 }, { "epoch": 0.33618752390666834, "grad_norm": 0.21519678831100464, "learning_rate": 0.002, "loss": 2.5562, "step": 168750 }, { "epoch": 0.3362074461303073, "grad_norm": 0.16960085928440094, "learning_rate": 0.002, "loss": 2.5572, "step": 168760 }, { "epoch": 0.3362273683539462, "grad_norm": 0.1727646142244339, "learning_rate": 0.002, "loss": 2.5533, "step": 168770 }, { "epoch": 0.3362472905775851, "grad_norm": 0.1564214527606964, "learning_rate": 0.002, "loss": 2.5629, "step": 168780 }, { "epoch": 0.336267212801224, "grad_norm": 0.16894641518592834, "learning_rate": 0.002, "loss": 2.5742, "step": 168790 }, { "epoch": 0.33628713502486296, "grad_norm": 0.17434215545654297, "learning_rate": 0.002, "loss": 2.5645, "step": 168800 }, { "epoch": 0.33630705724850185, "grad_norm": 0.17004230618476868, "learning_rate": 0.002, "loss": 2.5706, "step": 168810 }, { "epoch": 0.33632697947214074, "grad_norm": 0.15315845608711243, "learning_rate": 0.002, "loss": 2.572, "step": 168820 }, { "epoch": 0.3363469016957797, "grad_norm": 0.2054724246263504, "learning_rate": 0.002, "loss": 2.5863, "step": 168830 }, { "epoch": 0.3363668239194186, "grad_norm": 0.17670705914497375, "learning_rate": 0.002, "loss": 2.5606, "step": 168840 }, { "epoch": 0.3363867461430575, "grad_norm": 0.17283478379249573, "learning_rate": 0.002, "loss": 2.5569, "step": 168850 }, { "epoch": 0.3364066683666964, "grad_norm": 0.17793746292591095, "learning_rate": 0.002, "loss": 2.565, "step": 168860 }, { "epoch": 0.3364265905903353, "grad_norm": 0.1568973809480667, "learning_rate": 0.002, "loss": 2.5603, "step": 168870 }, { "epoch": 0.33644651281397425, "grad_norm": 0.17919763922691345, "learning_rate": 0.002, "loss": 2.5552, "step": 168880 }, { "epoch": 0.33646643503761314, "grad_norm": 0.16618649661540985, "learning_rate": 0.002, "loss": 2.5541, "step": 168890 }, { "epoch": 0.3364863572612521, "grad_norm": 0.16433347761631012, "learning_rate": 0.002, "loss": 2.5528, "step": 168900 }, { "epoch": 0.336506279484891, "grad_norm": 0.1552724540233612, "learning_rate": 0.002, "loss": 2.5555, "step": 168910 }, { "epoch": 0.3365262017085299, "grad_norm": 0.1742940992116928, "learning_rate": 0.002, "loss": 2.5624, "step": 168920 }, { "epoch": 0.3365461239321688, "grad_norm": 0.16883233189582825, "learning_rate": 0.002, "loss": 2.5517, "step": 168930 }, { "epoch": 0.3365660461558077, "grad_norm": 0.16469711065292358, "learning_rate": 0.002, "loss": 2.5595, "step": 168940 }, { "epoch": 0.33658596837944665, "grad_norm": 0.1973785161972046, "learning_rate": 0.002, "loss": 2.5461, "step": 168950 }, { "epoch": 0.33660589060308554, "grad_norm": 0.18330496549606323, "learning_rate": 0.002, "loss": 2.582, "step": 168960 }, { "epoch": 0.3366258128267245, "grad_norm": 0.1749931424856186, "learning_rate": 0.002, "loss": 2.5569, "step": 168970 }, { "epoch": 0.3366457350503634, "grad_norm": 0.14571112394332886, "learning_rate": 0.002, "loss": 2.5633, "step": 168980 }, { "epoch": 0.3366656572740023, "grad_norm": 0.2270231693983078, "learning_rate": 0.002, "loss": 2.5575, "step": 168990 }, { "epoch": 0.3366855794976412, "grad_norm": 0.18131278455257416, "learning_rate": 0.002, "loss": 2.5626, "step": 169000 }, { "epoch": 0.3367055017212801, "grad_norm": 0.17182865738868713, "learning_rate": 0.002, "loss": 2.559, "step": 169010 }, { "epoch": 0.33672542394491906, "grad_norm": 0.17006230354309082, "learning_rate": 0.002, "loss": 2.566, "step": 169020 }, { "epoch": 0.33674534616855795, "grad_norm": 0.1567729413509369, "learning_rate": 0.002, "loss": 2.5682, "step": 169030 }, { "epoch": 0.33676526839219684, "grad_norm": 0.15931794047355652, "learning_rate": 0.002, "loss": 2.561, "step": 169040 }, { "epoch": 0.3367851906158358, "grad_norm": 0.15121035277843475, "learning_rate": 0.002, "loss": 2.5588, "step": 169050 }, { "epoch": 0.3368051128394747, "grad_norm": 0.16609270870685577, "learning_rate": 0.002, "loss": 2.5532, "step": 169060 }, { "epoch": 0.3368250350631136, "grad_norm": 0.15033042430877686, "learning_rate": 0.002, "loss": 2.5509, "step": 169070 }, { "epoch": 0.3368449572867525, "grad_norm": 0.17967773973941803, "learning_rate": 0.002, "loss": 2.5568, "step": 169080 }, { "epoch": 0.33686487951039146, "grad_norm": 0.15665000677108765, "learning_rate": 0.002, "loss": 2.5496, "step": 169090 }, { "epoch": 0.33688480173403035, "grad_norm": 0.17826178669929504, "learning_rate": 0.002, "loss": 2.5653, "step": 169100 }, { "epoch": 0.33690472395766924, "grad_norm": 0.18721726536750793, "learning_rate": 0.002, "loss": 2.5695, "step": 169110 }, { "epoch": 0.3369246461813082, "grad_norm": 0.14864657819271088, "learning_rate": 0.002, "loss": 2.5543, "step": 169120 }, { "epoch": 0.3369445684049471, "grad_norm": 0.17827285826206207, "learning_rate": 0.002, "loss": 2.5498, "step": 169130 }, { "epoch": 0.336964490628586, "grad_norm": 0.15492162108421326, "learning_rate": 0.002, "loss": 2.5637, "step": 169140 }, { "epoch": 0.3369844128522249, "grad_norm": 0.15752951800823212, "learning_rate": 0.002, "loss": 2.5559, "step": 169150 }, { "epoch": 0.3370043350758638, "grad_norm": 0.14393804967403412, "learning_rate": 0.002, "loss": 2.575, "step": 169160 }, { "epoch": 0.33702425729950275, "grad_norm": 0.1740637868642807, "learning_rate": 0.002, "loss": 2.5593, "step": 169170 }, { "epoch": 0.33704417952314164, "grad_norm": 0.14262330532073975, "learning_rate": 0.002, "loss": 2.5537, "step": 169180 }, { "epoch": 0.3370641017467806, "grad_norm": 0.14935657382011414, "learning_rate": 0.002, "loss": 2.5617, "step": 169190 }, { "epoch": 0.3370840239704195, "grad_norm": 0.17063292860984802, "learning_rate": 0.002, "loss": 2.5781, "step": 169200 }, { "epoch": 0.3371039461940584, "grad_norm": 0.16675418615341187, "learning_rate": 0.002, "loss": 2.5586, "step": 169210 }, { "epoch": 0.3371238684176973, "grad_norm": 0.17241860926151276, "learning_rate": 0.002, "loss": 2.5555, "step": 169220 }, { "epoch": 0.3371437906413362, "grad_norm": 0.1912640631198883, "learning_rate": 0.002, "loss": 2.5694, "step": 169230 }, { "epoch": 0.33716371286497515, "grad_norm": 0.17328861355781555, "learning_rate": 0.002, "loss": 2.5494, "step": 169240 }, { "epoch": 0.33718363508861404, "grad_norm": 0.17115703225135803, "learning_rate": 0.002, "loss": 2.5587, "step": 169250 }, { "epoch": 0.337203557312253, "grad_norm": 0.17448964715003967, "learning_rate": 0.002, "loss": 2.5642, "step": 169260 }, { "epoch": 0.3372234795358919, "grad_norm": 0.15425729751586914, "learning_rate": 0.002, "loss": 2.5761, "step": 169270 }, { "epoch": 0.33724340175953077, "grad_norm": 0.2149883359670639, "learning_rate": 0.002, "loss": 2.565, "step": 169280 }, { "epoch": 0.3372633239831697, "grad_norm": 0.15606041252613068, "learning_rate": 0.002, "loss": 2.5518, "step": 169290 }, { "epoch": 0.3372832462068086, "grad_norm": 0.16813059151172638, "learning_rate": 0.002, "loss": 2.57, "step": 169300 }, { "epoch": 0.33730316843044755, "grad_norm": 0.17189082503318787, "learning_rate": 0.002, "loss": 2.5532, "step": 169310 }, { "epoch": 0.33732309065408644, "grad_norm": 0.178177610039711, "learning_rate": 0.002, "loss": 2.5623, "step": 169320 }, { "epoch": 0.33734301287772533, "grad_norm": 0.1355638951063156, "learning_rate": 0.002, "loss": 2.5607, "step": 169330 }, { "epoch": 0.3373629351013643, "grad_norm": 0.15375621616840363, "learning_rate": 0.002, "loss": 2.5684, "step": 169340 }, { "epoch": 0.33738285732500317, "grad_norm": 0.16599971055984497, "learning_rate": 0.002, "loss": 2.5533, "step": 169350 }, { "epoch": 0.3374027795486421, "grad_norm": 0.1848987638950348, "learning_rate": 0.002, "loss": 2.5683, "step": 169360 }, { "epoch": 0.337422701772281, "grad_norm": 0.14539611339569092, "learning_rate": 0.002, "loss": 2.5433, "step": 169370 }, { "epoch": 0.33744262399591995, "grad_norm": 0.1924567073583603, "learning_rate": 0.002, "loss": 2.5604, "step": 169380 }, { "epoch": 0.33746254621955885, "grad_norm": 0.1699267476797104, "learning_rate": 0.002, "loss": 2.5648, "step": 169390 }, { "epoch": 0.33748246844319774, "grad_norm": 0.1573268622159958, "learning_rate": 0.002, "loss": 2.5543, "step": 169400 }, { "epoch": 0.3375023906668367, "grad_norm": 0.16734625399112701, "learning_rate": 0.002, "loss": 2.5473, "step": 169410 }, { "epoch": 0.3375223128904756, "grad_norm": 0.19182154536247253, "learning_rate": 0.002, "loss": 2.5706, "step": 169420 }, { "epoch": 0.3375422351141145, "grad_norm": 0.15354490280151367, "learning_rate": 0.002, "loss": 2.5491, "step": 169430 }, { "epoch": 0.3375621573377534, "grad_norm": 0.15026846528053284, "learning_rate": 0.002, "loss": 2.5681, "step": 169440 }, { "epoch": 0.3375820795613923, "grad_norm": 0.1622893363237381, "learning_rate": 0.002, "loss": 2.5518, "step": 169450 }, { "epoch": 0.33760200178503125, "grad_norm": 0.15761682391166687, "learning_rate": 0.002, "loss": 2.5661, "step": 169460 }, { "epoch": 0.33762192400867014, "grad_norm": 0.16396717727184296, "learning_rate": 0.002, "loss": 2.5667, "step": 169470 }, { "epoch": 0.3376418462323091, "grad_norm": 0.24241383373737335, "learning_rate": 0.002, "loss": 2.5574, "step": 169480 }, { "epoch": 0.337661768455948, "grad_norm": 0.15771368145942688, "learning_rate": 0.002, "loss": 2.5635, "step": 169490 }, { "epoch": 0.33768169067958687, "grad_norm": 0.1639937162399292, "learning_rate": 0.002, "loss": 2.5438, "step": 169500 }, { "epoch": 0.3377016129032258, "grad_norm": 0.1578643023967743, "learning_rate": 0.002, "loss": 2.5816, "step": 169510 }, { "epoch": 0.3377215351268647, "grad_norm": 0.16586269438266754, "learning_rate": 0.002, "loss": 2.5488, "step": 169520 }, { "epoch": 0.33774145735050365, "grad_norm": 0.15500743687152863, "learning_rate": 0.002, "loss": 2.5583, "step": 169530 }, { "epoch": 0.33776137957414254, "grad_norm": 0.14957238733768463, "learning_rate": 0.002, "loss": 2.5434, "step": 169540 }, { "epoch": 0.3377813017977815, "grad_norm": 0.13431619107723236, "learning_rate": 0.002, "loss": 2.562, "step": 169550 }, { "epoch": 0.3378012240214204, "grad_norm": 0.15942002832889557, "learning_rate": 0.002, "loss": 2.5636, "step": 169560 }, { "epoch": 0.33782114624505927, "grad_norm": 0.16314388811588287, "learning_rate": 0.002, "loss": 2.5723, "step": 169570 }, { "epoch": 0.3378410684686982, "grad_norm": 0.16791151463985443, "learning_rate": 0.002, "loss": 2.5638, "step": 169580 }, { "epoch": 0.3378609906923371, "grad_norm": 0.1836293637752533, "learning_rate": 0.002, "loss": 2.5855, "step": 169590 }, { "epoch": 0.33788091291597605, "grad_norm": 0.1912558227777481, "learning_rate": 0.002, "loss": 2.5619, "step": 169600 }, { "epoch": 0.33790083513961494, "grad_norm": 0.16036777198314667, "learning_rate": 0.002, "loss": 2.5591, "step": 169610 }, { "epoch": 0.33792075736325383, "grad_norm": 0.15696527063846588, "learning_rate": 0.002, "loss": 2.5703, "step": 169620 }, { "epoch": 0.3379406795868928, "grad_norm": 0.1657344251871109, "learning_rate": 0.002, "loss": 2.5637, "step": 169630 }, { "epoch": 0.33796060181053167, "grad_norm": 0.16240479052066803, "learning_rate": 0.002, "loss": 2.5702, "step": 169640 }, { "epoch": 0.3379805240341706, "grad_norm": 0.16558966040611267, "learning_rate": 0.002, "loss": 2.5486, "step": 169650 }, { "epoch": 0.3380004462578095, "grad_norm": 0.1375090777873993, "learning_rate": 0.002, "loss": 2.5535, "step": 169660 }, { "epoch": 0.33802036848144845, "grad_norm": 0.1969837099313736, "learning_rate": 0.002, "loss": 2.5607, "step": 169670 }, { "epoch": 0.33804029070508734, "grad_norm": 0.1574854999780655, "learning_rate": 0.002, "loss": 2.5678, "step": 169680 }, { "epoch": 0.33806021292872623, "grad_norm": 0.18859004974365234, "learning_rate": 0.002, "loss": 2.5684, "step": 169690 }, { "epoch": 0.3380801351523652, "grad_norm": 0.17093388736248016, "learning_rate": 0.002, "loss": 2.5558, "step": 169700 }, { "epoch": 0.33810005737600407, "grad_norm": 0.1710122674703598, "learning_rate": 0.002, "loss": 2.5461, "step": 169710 }, { "epoch": 0.338119979599643, "grad_norm": 0.1358940750360489, "learning_rate": 0.002, "loss": 2.571, "step": 169720 }, { "epoch": 0.3381399018232819, "grad_norm": 0.1465289443731308, "learning_rate": 0.002, "loss": 2.5528, "step": 169730 }, { "epoch": 0.3381598240469208, "grad_norm": 0.1669919490814209, "learning_rate": 0.002, "loss": 2.5611, "step": 169740 }, { "epoch": 0.33817974627055974, "grad_norm": 0.18264371156692505, "learning_rate": 0.002, "loss": 2.5629, "step": 169750 }, { "epoch": 0.33819966849419864, "grad_norm": 0.17407898604869843, "learning_rate": 0.002, "loss": 2.5668, "step": 169760 }, { "epoch": 0.3382195907178376, "grad_norm": 0.1660844385623932, "learning_rate": 0.002, "loss": 2.5709, "step": 169770 }, { "epoch": 0.3382395129414765, "grad_norm": 0.15676362812519073, "learning_rate": 0.002, "loss": 2.5731, "step": 169780 }, { "epoch": 0.33825943516511536, "grad_norm": 0.14780038595199585, "learning_rate": 0.002, "loss": 2.5638, "step": 169790 }, { "epoch": 0.3382793573887543, "grad_norm": 0.18803627789020538, "learning_rate": 0.002, "loss": 2.5676, "step": 169800 }, { "epoch": 0.3382992796123932, "grad_norm": 0.16399991512298584, "learning_rate": 0.002, "loss": 2.5501, "step": 169810 }, { "epoch": 0.33831920183603215, "grad_norm": 0.15028785169124603, "learning_rate": 0.002, "loss": 2.5699, "step": 169820 }, { "epoch": 0.33833912405967104, "grad_norm": 0.23857951164245605, "learning_rate": 0.002, "loss": 2.5629, "step": 169830 }, { "epoch": 0.33835904628331, "grad_norm": 0.14760556817054749, "learning_rate": 0.002, "loss": 2.5675, "step": 169840 }, { "epoch": 0.3383789685069489, "grad_norm": 0.1679139882326126, "learning_rate": 0.002, "loss": 2.5649, "step": 169850 }, { "epoch": 0.33839889073058776, "grad_norm": 0.1841282844543457, "learning_rate": 0.002, "loss": 2.5532, "step": 169860 }, { "epoch": 0.3384188129542267, "grad_norm": 0.1504439264535904, "learning_rate": 0.002, "loss": 2.564, "step": 169870 }, { "epoch": 0.3384387351778656, "grad_norm": 0.16219666600227356, "learning_rate": 0.002, "loss": 2.5609, "step": 169880 }, { "epoch": 0.33845865740150455, "grad_norm": 0.17379175126552582, "learning_rate": 0.002, "loss": 2.5548, "step": 169890 }, { "epoch": 0.33847857962514344, "grad_norm": 0.16988179087638855, "learning_rate": 0.002, "loss": 2.5488, "step": 169900 }, { "epoch": 0.33849850184878233, "grad_norm": 0.19274982810020447, "learning_rate": 0.002, "loss": 2.57, "step": 169910 }, { "epoch": 0.3385184240724213, "grad_norm": 0.18252795934677124, "learning_rate": 0.002, "loss": 2.5734, "step": 169920 }, { "epoch": 0.33853834629606017, "grad_norm": 0.16825570166110992, "learning_rate": 0.002, "loss": 2.5609, "step": 169930 }, { "epoch": 0.3385582685196991, "grad_norm": 0.20665492117404938, "learning_rate": 0.002, "loss": 2.5592, "step": 169940 }, { "epoch": 0.338578190743338, "grad_norm": 0.15652446448802948, "learning_rate": 0.002, "loss": 2.5674, "step": 169950 }, { "epoch": 0.33859811296697695, "grad_norm": 0.16286391019821167, "learning_rate": 0.002, "loss": 2.5573, "step": 169960 }, { "epoch": 0.33861803519061584, "grad_norm": 0.1736544668674469, "learning_rate": 0.002, "loss": 2.5758, "step": 169970 }, { "epoch": 0.33863795741425473, "grad_norm": 0.209181547164917, "learning_rate": 0.002, "loss": 2.5659, "step": 169980 }, { "epoch": 0.3386578796378937, "grad_norm": 0.18166233599185944, "learning_rate": 0.002, "loss": 2.5631, "step": 169990 }, { "epoch": 0.33867780186153257, "grad_norm": 0.16454444825649261, "learning_rate": 0.002, "loss": 2.5649, "step": 170000 }, { "epoch": 0.3386977240851715, "grad_norm": 0.17829225957393646, "learning_rate": 0.002, "loss": 2.5714, "step": 170010 }, { "epoch": 0.3387176463088104, "grad_norm": 0.14500775933265686, "learning_rate": 0.002, "loss": 2.5608, "step": 170020 }, { "epoch": 0.3387375685324493, "grad_norm": 0.13952411711215973, "learning_rate": 0.002, "loss": 2.5519, "step": 170030 }, { "epoch": 0.33875749075608824, "grad_norm": 0.16656185686588287, "learning_rate": 0.002, "loss": 2.557, "step": 170040 }, { "epoch": 0.33877741297972713, "grad_norm": 0.14878565073013306, "learning_rate": 0.002, "loss": 2.5618, "step": 170050 }, { "epoch": 0.3387973352033661, "grad_norm": 0.19320853054523468, "learning_rate": 0.002, "loss": 2.5632, "step": 170060 }, { "epoch": 0.33881725742700497, "grad_norm": 0.16767607629299164, "learning_rate": 0.002, "loss": 2.5675, "step": 170070 }, { "epoch": 0.33883717965064386, "grad_norm": 0.21239924430847168, "learning_rate": 0.002, "loss": 2.5702, "step": 170080 }, { "epoch": 0.3388571018742828, "grad_norm": 0.19221320748329163, "learning_rate": 0.002, "loss": 2.5726, "step": 170090 }, { "epoch": 0.3388770240979217, "grad_norm": 0.1425238847732544, "learning_rate": 0.002, "loss": 2.5552, "step": 170100 }, { "epoch": 0.33889694632156064, "grad_norm": 0.1700451672077179, "learning_rate": 0.002, "loss": 2.5449, "step": 170110 }, { "epoch": 0.33891686854519953, "grad_norm": 0.19215591251850128, "learning_rate": 0.002, "loss": 2.5504, "step": 170120 }, { "epoch": 0.3389367907688385, "grad_norm": 0.19305378198623657, "learning_rate": 0.002, "loss": 2.5767, "step": 170130 }, { "epoch": 0.33895671299247737, "grad_norm": 0.16783997416496277, "learning_rate": 0.002, "loss": 2.5451, "step": 170140 }, { "epoch": 0.33897663521611626, "grad_norm": 0.23560427129268646, "learning_rate": 0.002, "loss": 2.5699, "step": 170150 }, { "epoch": 0.3389965574397552, "grad_norm": 0.17987534403800964, "learning_rate": 0.002, "loss": 2.5612, "step": 170160 }, { "epoch": 0.3390164796633941, "grad_norm": 0.1500796377658844, "learning_rate": 0.002, "loss": 2.5596, "step": 170170 }, { "epoch": 0.33903640188703305, "grad_norm": 0.1577429324388504, "learning_rate": 0.002, "loss": 2.554, "step": 170180 }, { "epoch": 0.33905632411067194, "grad_norm": 0.13961344957351685, "learning_rate": 0.002, "loss": 2.5548, "step": 170190 }, { "epoch": 0.3390762463343108, "grad_norm": 0.27450302243232727, "learning_rate": 0.002, "loss": 2.5519, "step": 170200 }, { "epoch": 0.3390961685579498, "grad_norm": 0.1575033962726593, "learning_rate": 0.002, "loss": 2.5832, "step": 170210 }, { "epoch": 0.33911609078158866, "grad_norm": 0.19395975768566132, "learning_rate": 0.002, "loss": 2.5784, "step": 170220 }, { "epoch": 0.3391360130052276, "grad_norm": 0.1799110621213913, "learning_rate": 0.002, "loss": 2.5547, "step": 170230 }, { "epoch": 0.3391559352288665, "grad_norm": 0.1844470053911209, "learning_rate": 0.002, "loss": 2.5687, "step": 170240 }, { "epoch": 0.3391758574525054, "grad_norm": 0.139390766620636, "learning_rate": 0.002, "loss": 2.5621, "step": 170250 }, { "epoch": 0.33919577967614434, "grad_norm": 0.2100904881954193, "learning_rate": 0.002, "loss": 2.5606, "step": 170260 }, { "epoch": 0.33921570189978323, "grad_norm": 0.1966012716293335, "learning_rate": 0.002, "loss": 2.5592, "step": 170270 }, { "epoch": 0.3392356241234222, "grad_norm": 0.1720990687608719, "learning_rate": 0.002, "loss": 2.5649, "step": 170280 }, { "epoch": 0.33925554634706107, "grad_norm": 0.14507263898849487, "learning_rate": 0.002, "loss": 2.5487, "step": 170290 }, { "epoch": 0.3392754685707, "grad_norm": 0.16847911477088928, "learning_rate": 0.002, "loss": 2.5672, "step": 170300 }, { "epoch": 0.3392953907943389, "grad_norm": 0.1911022961139679, "learning_rate": 0.002, "loss": 2.5655, "step": 170310 }, { "epoch": 0.3393153130179778, "grad_norm": 0.35906410217285156, "learning_rate": 0.002, "loss": 2.5601, "step": 170320 }, { "epoch": 0.33933523524161674, "grad_norm": 0.17331579327583313, "learning_rate": 0.002, "loss": 2.5687, "step": 170330 }, { "epoch": 0.33935515746525563, "grad_norm": 0.1670525074005127, "learning_rate": 0.002, "loss": 2.5635, "step": 170340 }, { "epoch": 0.3393750796888946, "grad_norm": 0.1764031946659088, "learning_rate": 0.002, "loss": 2.5601, "step": 170350 }, { "epoch": 0.33939500191253347, "grad_norm": 0.22543910145759583, "learning_rate": 0.002, "loss": 2.5764, "step": 170360 }, { "epoch": 0.33941492413617236, "grad_norm": 0.17450354993343353, "learning_rate": 0.002, "loss": 2.5644, "step": 170370 }, { "epoch": 0.3394348463598113, "grad_norm": 0.150390163064003, "learning_rate": 0.002, "loss": 2.5611, "step": 170380 }, { "epoch": 0.3394547685834502, "grad_norm": 0.18929266929626465, "learning_rate": 0.002, "loss": 2.5726, "step": 170390 }, { "epoch": 0.33947469080708914, "grad_norm": 0.15163114666938782, "learning_rate": 0.002, "loss": 2.5708, "step": 170400 }, { "epoch": 0.33949461303072803, "grad_norm": 0.2022823691368103, "learning_rate": 0.002, "loss": 2.5775, "step": 170410 }, { "epoch": 0.339514535254367, "grad_norm": 0.15685546398162842, "learning_rate": 0.002, "loss": 2.5711, "step": 170420 }, { "epoch": 0.33953445747800587, "grad_norm": 0.15432308614253998, "learning_rate": 0.002, "loss": 2.5512, "step": 170430 }, { "epoch": 0.33955437970164476, "grad_norm": 0.20047755539417267, "learning_rate": 0.002, "loss": 2.5694, "step": 170440 }, { "epoch": 0.3395743019252837, "grad_norm": 0.15489915013313293, "learning_rate": 0.002, "loss": 2.5602, "step": 170450 }, { "epoch": 0.3395942241489226, "grad_norm": 0.14661726355552673, "learning_rate": 0.002, "loss": 2.5591, "step": 170460 }, { "epoch": 0.33961414637256154, "grad_norm": 0.19297285377979279, "learning_rate": 0.002, "loss": 2.5675, "step": 170470 }, { "epoch": 0.33963406859620043, "grad_norm": 0.2255575805902481, "learning_rate": 0.002, "loss": 2.5729, "step": 170480 }, { "epoch": 0.3396539908198393, "grad_norm": 0.16612735390663147, "learning_rate": 0.002, "loss": 2.5626, "step": 170490 }, { "epoch": 0.33967391304347827, "grad_norm": 0.16387715935707092, "learning_rate": 0.002, "loss": 2.5711, "step": 170500 }, { "epoch": 0.33969383526711716, "grad_norm": 0.1642318218946457, "learning_rate": 0.002, "loss": 2.5683, "step": 170510 }, { "epoch": 0.3397137574907561, "grad_norm": 0.16141781210899353, "learning_rate": 0.002, "loss": 2.5553, "step": 170520 }, { "epoch": 0.339733679714395, "grad_norm": 0.172447070479393, "learning_rate": 0.002, "loss": 2.5581, "step": 170530 }, { "epoch": 0.3397536019380339, "grad_norm": 0.20062774419784546, "learning_rate": 0.002, "loss": 2.5578, "step": 170540 }, { "epoch": 0.33977352416167284, "grad_norm": 0.16688436269760132, "learning_rate": 0.002, "loss": 2.5592, "step": 170550 }, { "epoch": 0.3397934463853117, "grad_norm": 0.16281567513942719, "learning_rate": 0.002, "loss": 2.5627, "step": 170560 }, { "epoch": 0.33981336860895067, "grad_norm": 0.15573304891586304, "learning_rate": 0.002, "loss": 2.5684, "step": 170570 }, { "epoch": 0.33983329083258956, "grad_norm": 0.1703205704689026, "learning_rate": 0.002, "loss": 2.5653, "step": 170580 }, { "epoch": 0.3398532130562285, "grad_norm": 0.17528848350048065, "learning_rate": 0.002, "loss": 2.5559, "step": 170590 }, { "epoch": 0.3398731352798674, "grad_norm": 0.21172474324703217, "learning_rate": 0.002, "loss": 2.5497, "step": 170600 }, { "epoch": 0.3398930575035063, "grad_norm": 0.15671484172344208, "learning_rate": 0.002, "loss": 2.5582, "step": 170610 }, { "epoch": 0.33991297972714524, "grad_norm": 0.1619970053434372, "learning_rate": 0.002, "loss": 2.5409, "step": 170620 }, { "epoch": 0.3399329019507841, "grad_norm": 0.18790248036384583, "learning_rate": 0.002, "loss": 2.5571, "step": 170630 }, { "epoch": 0.3399528241744231, "grad_norm": 0.17224174737930298, "learning_rate": 0.002, "loss": 2.5588, "step": 170640 }, { "epoch": 0.33997274639806196, "grad_norm": 0.2184170037508011, "learning_rate": 0.002, "loss": 2.5681, "step": 170650 }, { "epoch": 0.33999266862170086, "grad_norm": 0.1858297437429428, "learning_rate": 0.002, "loss": 2.555, "step": 170660 }, { "epoch": 0.3400125908453398, "grad_norm": 0.16744905710220337, "learning_rate": 0.002, "loss": 2.5691, "step": 170670 }, { "epoch": 0.3400325130689787, "grad_norm": 0.1934184730052948, "learning_rate": 0.002, "loss": 2.5663, "step": 170680 }, { "epoch": 0.34005243529261764, "grad_norm": 0.15935109555721283, "learning_rate": 0.002, "loss": 2.5563, "step": 170690 }, { "epoch": 0.34007235751625653, "grad_norm": 0.17529135942459106, "learning_rate": 0.002, "loss": 2.5566, "step": 170700 }, { "epoch": 0.3400922797398955, "grad_norm": 0.14838983118534088, "learning_rate": 0.002, "loss": 2.5446, "step": 170710 }, { "epoch": 0.34011220196353437, "grad_norm": 0.20554235577583313, "learning_rate": 0.002, "loss": 2.563, "step": 170720 }, { "epoch": 0.34013212418717326, "grad_norm": 0.1835789829492569, "learning_rate": 0.002, "loss": 2.5558, "step": 170730 }, { "epoch": 0.3401520464108122, "grad_norm": 0.16788962483406067, "learning_rate": 0.002, "loss": 2.5522, "step": 170740 }, { "epoch": 0.3401719686344511, "grad_norm": 0.17756465077400208, "learning_rate": 0.002, "loss": 2.5638, "step": 170750 }, { "epoch": 0.34019189085809004, "grad_norm": 0.16601957380771637, "learning_rate": 0.002, "loss": 2.5636, "step": 170760 }, { "epoch": 0.34021181308172893, "grad_norm": 0.16113369166851044, "learning_rate": 0.002, "loss": 2.5548, "step": 170770 }, { "epoch": 0.3402317353053678, "grad_norm": 0.16695469617843628, "learning_rate": 0.002, "loss": 2.5632, "step": 170780 }, { "epoch": 0.34025165752900677, "grad_norm": 0.17957456409931183, "learning_rate": 0.002, "loss": 2.5633, "step": 170790 }, { "epoch": 0.34027157975264566, "grad_norm": 0.1461828202009201, "learning_rate": 0.002, "loss": 2.5575, "step": 170800 }, { "epoch": 0.3402915019762846, "grad_norm": 0.15048517286777496, "learning_rate": 0.002, "loss": 2.5637, "step": 170810 }, { "epoch": 0.3403114241999235, "grad_norm": 0.17026326060295105, "learning_rate": 0.002, "loss": 2.5614, "step": 170820 }, { "epoch": 0.3403313464235624, "grad_norm": 0.14869306981563568, "learning_rate": 0.002, "loss": 2.5654, "step": 170830 }, { "epoch": 0.34035126864720133, "grad_norm": 0.2326396256685257, "learning_rate": 0.002, "loss": 2.5557, "step": 170840 }, { "epoch": 0.3403711908708402, "grad_norm": 0.15614092350006104, "learning_rate": 0.002, "loss": 2.5587, "step": 170850 }, { "epoch": 0.34039111309447917, "grad_norm": 0.17159774899482727, "learning_rate": 0.002, "loss": 2.5534, "step": 170860 }, { "epoch": 0.34041103531811806, "grad_norm": 0.16650693118572235, "learning_rate": 0.002, "loss": 2.5768, "step": 170870 }, { "epoch": 0.340430957541757, "grad_norm": 0.14950744807720184, "learning_rate": 0.002, "loss": 2.5695, "step": 170880 }, { "epoch": 0.3404508797653959, "grad_norm": 0.15891937911510468, "learning_rate": 0.002, "loss": 2.5519, "step": 170890 }, { "epoch": 0.3404708019890348, "grad_norm": 0.1562538594007492, "learning_rate": 0.002, "loss": 2.5459, "step": 170900 }, { "epoch": 0.34049072421267373, "grad_norm": 0.19307664036750793, "learning_rate": 0.002, "loss": 2.5763, "step": 170910 }, { "epoch": 0.3405106464363126, "grad_norm": 0.19155119359493256, "learning_rate": 0.002, "loss": 2.5582, "step": 170920 }, { "epoch": 0.34053056865995157, "grad_norm": 0.173259437084198, "learning_rate": 0.002, "loss": 2.5592, "step": 170930 }, { "epoch": 0.34055049088359046, "grad_norm": 0.17541487514972687, "learning_rate": 0.002, "loss": 2.5668, "step": 170940 }, { "epoch": 0.34057041310722935, "grad_norm": 0.15376052260398865, "learning_rate": 0.002, "loss": 2.5638, "step": 170950 }, { "epoch": 0.3405903353308683, "grad_norm": 0.1561255156993866, "learning_rate": 0.002, "loss": 2.5481, "step": 170960 }, { "epoch": 0.3406102575545072, "grad_norm": 0.17565588653087616, "learning_rate": 0.002, "loss": 2.5569, "step": 170970 }, { "epoch": 0.34063017977814614, "grad_norm": 0.1478695571422577, "learning_rate": 0.002, "loss": 2.5636, "step": 170980 }, { "epoch": 0.340650102001785, "grad_norm": 0.18132466077804565, "learning_rate": 0.002, "loss": 2.5608, "step": 170990 }, { "epoch": 0.3406700242254239, "grad_norm": 0.1668948382139206, "learning_rate": 0.002, "loss": 2.5473, "step": 171000 }, { "epoch": 0.34068994644906286, "grad_norm": 0.17148959636688232, "learning_rate": 0.002, "loss": 2.5648, "step": 171010 }, { "epoch": 0.34070986867270175, "grad_norm": 0.16507269442081451, "learning_rate": 0.002, "loss": 2.5637, "step": 171020 }, { "epoch": 0.3407297908963407, "grad_norm": 0.1645267903804779, "learning_rate": 0.002, "loss": 2.5627, "step": 171030 }, { "epoch": 0.3407497131199796, "grad_norm": 0.16260647773742676, "learning_rate": 0.002, "loss": 2.5559, "step": 171040 }, { "epoch": 0.34076963534361854, "grad_norm": 0.1888284683227539, "learning_rate": 0.002, "loss": 2.5566, "step": 171050 }, { "epoch": 0.34078955756725743, "grad_norm": 0.21834656596183777, "learning_rate": 0.002, "loss": 2.582, "step": 171060 }, { "epoch": 0.3408094797908963, "grad_norm": 0.15462128818035126, "learning_rate": 0.002, "loss": 2.5575, "step": 171070 }, { "epoch": 0.34082940201453527, "grad_norm": 0.16040664911270142, "learning_rate": 0.002, "loss": 2.5617, "step": 171080 }, { "epoch": 0.34084932423817416, "grad_norm": 0.22731834650039673, "learning_rate": 0.002, "loss": 2.5553, "step": 171090 }, { "epoch": 0.3408692464618131, "grad_norm": 0.17419783771038055, "learning_rate": 0.002, "loss": 2.5665, "step": 171100 }, { "epoch": 0.340889168685452, "grad_norm": 0.17189103364944458, "learning_rate": 0.002, "loss": 2.5527, "step": 171110 }, { "epoch": 0.3409090909090909, "grad_norm": 0.16981226205825806, "learning_rate": 0.002, "loss": 2.5537, "step": 171120 }, { "epoch": 0.34092901313272983, "grad_norm": 0.17144830524921417, "learning_rate": 0.002, "loss": 2.5588, "step": 171130 }, { "epoch": 0.3409489353563687, "grad_norm": 0.17077288031578064, "learning_rate": 0.002, "loss": 2.5649, "step": 171140 }, { "epoch": 0.34096885758000767, "grad_norm": 0.1718793660402298, "learning_rate": 0.002, "loss": 2.5669, "step": 171150 }, { "epoch": 0.34098877980364656, "grad_norm": 0.1482587456703186, "learning_rate": 0.002, "loss": 2.5677, "step": 171160 }, { "epoch": 0.3410087020272855, "grad_norm": 0.172510027885437, "learning_rate": 0.002, "loss": 2.5624, "step": 171170 }, { "epoch": 0.3410286242509244, "grad_norm": 0.16147693991661072, "learning_rate": 0.002, "loss": 2.5594, "step": 171180 }, { "epoch": 0.3410485464745633, "grad_norm": 0.15015843510627747, "learning_rate": 0.002, "loss": 2.5731, "step": 171190 }, { "epoch": 0.34106846869820223, "grad_norm": 0.1881273239850998, "learning_rate": 0.002, "loss": 2.5537, "step": 171200 }, { "epoch": 0.3410883909218411, "grad_norm": 0.17179401218891144, "learning_rate": 0.002, "loss": 2.5493, "step": 171210 }, { "epoch": 0.34110831314548007, "grad_norm": 0.16079667210578918, "learning_rate": 0.002, "loss": 2.561, "step": 171220 }, { "epoch": 0.34112823536911896, "grad_norm": 0.1921289712190628, "learning_rate": 0.002, "loss": 2.5493, "step": 171230 }, { "epoch": 0.34114815759275785, "grad_norm": 0.1865321844816208, "learning_rate": 0.002, "loss": 2.5692, "step": 171240 }, { "epoch": 0.3411680798163968, "grad_norm": 0.13951091468334198, "learning_rate": 0.002, "loss": 2.5591, "step": 171250 }, { "epoch": 0.3411880020400357, "grad_norm": 0.15192513167858124, "learning_rate": 0.002, "loss": 2.5556, "step": 171260 }, { "epoch": 0.34120792426367463, "grad_norm": 0.21901556849479675, "learning_rate": 0.002, "loss": 2.5577, "step": 171270 }, { "epoch": 0.3412278464873135, "grad_norm": 0.16287463903427124, "learning_rate": 0.002, "loss": 2.5649, "step": 171280 }, { "epoch": 0.3412477687109524, "grad_norm": 0.1586035043001175, "learning_rate": 0.002, "loss": 2.5644, "step": 171290 }, { "epoch": 0.34126769093459136, "grad_norm": 0.16270962357521057, "learning_rate": 0.002, "loss": 2.571, "step": 171300 }, { "epoch": 0.34128761315823025, "grad_norm": 0.16305147111415863, "learning_rate": 0.002, "loss": 2.5723, "step": 171310 }, { "epoch": 0.3413075353818692, "grad_norm": 0.1711772084236145, "learning_rate": 0.002, "loss": 2.5674, "step": 171320 }, { "epoch": 0.3413274576055081, "grad_norm": 0.14462243020534515, "learning_rate": 0.002, "loss": 2.5688, "step": 171330 }, { "epoch": 0.34134737982914704, "grad_norm": 0.21240326762199402, "learning_rate": 0.002, "loss": 2.5695, "step": 171340 }, { "epoch": 0.3413673020527859, "grad_norm": 0.17886504530906677, "learning_rate": 0.002, "loss": 2.5681, "step": 171350 }, { "epoch": 0.3413872242764248, "grad_norm": 0.17066322267055511, "learning_rate": 0.002, "loss": 2.5499, "step": 171360 }, { "epoch": 0.34140714650006376, "grad_norm": 0.15756072103977203, "learning_rate": 0.002, "loss": 2.5642, "step": 171370 }, { "epoch": 0.34142706872370265, "grad_norm": 0.16466854512691498, "learning_rate": 0.002, "loss": 2.5708, "step": 171380 }, { "epoch": 0.3414469909473416, "grad_norm": 0.17105020582675934, "learning_rate": 0.002, "loss": 2.5706, "step": 171390 }, { "epoch": 0.3414669131709805, "grad_norm": 0.16876955330371857, "learning_rate": 0.002, "loss": 2.572, "step": 171400 }, { "epoch": 0.3414868353946194, "grad_norm": 0.17662587761878967, "learning_rate": 0.002, "loss": 2.5508, "step": 171410 }, { "epoch": 0.3415067576182583, "grad_norm": 0.16004256904125214, "learning_rate": 0.002, "loss": 2.5721, "step": 171420 }, { "epoch": 0.3415266798418972, "grad_norm": 0.19821065664291382, "learning_rate": 0.002, "loss": 2.5625, "step": 171430 }, { "epoch": 0.34154660206553616, "grad_norm": 0.16338755190372467, "learning_rate": 0.002, "loss": 2.5685, "step": 171440 }, { "epoch": 0.34156652428917506, "grad_norm": 0.17638832330703735, "learning_rate": 0.002, "loss": 2.5664, "step": 171450 }, { "epoch": 0.341586446512814, "grad_norm": 0.17610317468643188, "learning_rate": 0.002, "loss": 2.5621, "step": 171460 }, { "epoch": 0.3416063687364529, "grad_norm": 0.17118649184703827, "learning_rate": 0.002, "loss": 2.5483, "step": 171470 }, { "epoch": 0.3416262909600918, "grad_norm": 0.19521485269069672, "learning_rate": 0.002, "loss": 2.5606, "step": 171480 }, { "epoch": 0.34164621318373073, "grad_norm": 0.17457826435565948, "learning_rate": 0.002, "loss": 2.5557, "step": 171490 }, { "epoch": 0.3416661354073696, "grad_norm": 0.18997132778167725, "learning_rate": 0.002, "loss": 2.5411, "step": 171500 }, { "epoch": 0.34168605763100857, "grad_norm": 0.15548940002918243, "learning_rate": 0.002, "loss": 2.5669, "step": 171510 }, { "epoch": 0.34170597985464746, "grad_norm": 0.20533883571624756, "learning_rate": 0.002, "loss": 2.5537, "step": 171520 }, { "epoch": 0.34172590207828635, "grad_norm": 0.15785741806030273, "learning_rate": 0.002, "loss": 2.5608, "step": 171530 }, { "epoch": 0.3417458243019253, "grad_norm": 0.1687796413898468, "learning_rate": 0.002, "loss": 2.5642, "step": 171540 }, { "epoch": 0.3417657465255642, "grad_norm": 0.18814508616924286, "learning_rate": 0.002, "loss": 2.5648, "step": 171550 }, { "epoch": 0.34178566874920313, "grad_norm": 0.1766563355922699, "learning_rate": 0.002, "loss": 2.5603, "step": 171560 }, { "epoch": 0.341805590972842, "grad_norm": 0.1531008929014206, "learning_rate": 0.002, "loss": 2.5651, "step": 171570 }, { "epoch": 0.3418255131964809, "grad_norm": 0.1576099991798401, "learning_rate": 0.002, "loss": 2.5645, "step": 171580 }, { "epoch": 0.34184543542011986, "grad_norm": 0.16924577951431274, "learning_rate": 0.002, "loss": 2.5617, "step": 171590 }, { "epoch": 0.34186535764375875, "grad_norm": 0.16195900738239288, "learning_rate": 0.002, "loss": 2.5579, "step": 171600 }, { "epoch": 0.3418852798673977, "grad_norm": 0.15984350442886353, "learning_rate": 0.002, "loss": 2.5576, "step": 171610 }, { "epoch": 0.3419052020910366, "grad_norm": 0.17243283987045288, "learning_rate": 0.002, "loss": 2.571, "step": 171620 }, { "epoch": 0.34192512431467553, "grad_norm": 0.2094206064939499, "learning_rate": 0.002, "loss": 2.5716, "step": 171630 }, { "epoch": 0.3419450465383144, "grad_norm": 0.14970262348651886, "learning_rate": 0.002, "loss": 2.5535, "step": 171640 }, { "epoch": 0.3419649687619533, "grad_norm": 0.18362122774124146, "learning_rate": 0.002, "loss": 2.5522, "step": 171650 }, { "epoch": 0.34198489098559226, "grad_norm": 0.18485891819000244, "learning_rate": 0.002, "loss": 2.5625, "step": 171660 }, { "epoch": 0.34200481320923115, "grad_norm": 0.16575030982494354, "learning_rate": 0.002, "loss": 2.5712, "step": 171670 }, { "epoch": 0.3420247354328701, "grad_norm": 0.15553323924541473, "learning_rate": 0.002, "loss": 2.5546, "step": 171680 }, { "epoch": 0.342044657656509, "grad_norm": 0.14732129871845245, "learning_rate": 0.002, "loss": 2.5472, "step": 171690 }, { "epoch": 0.3420645798801479, "grad_norm": 0.19030283391475677, "learning_rate": 0.002, "loss": 2.5565, "step": 171700 }, { "epoch": 0.3420845021037868, "grad_norm": 0.18769508600234985, "learning_rate": 0.002, "loss": 2.5683, "step": 171710 }, { "epoch": 0.3421044243274257, "grad_norm": 0.1942070722579956, "learning_rate": 0.002, "loss": 2.558, "step": 171720 }, { "epoch": 0.34212434655106466, "grad_norm": 0.16967090964317322, "learning_rate": 0.002, "loss": 2.5691, "step": 171730 }, { "epoch": 0.34214426877470355, "grad_norm": 0.16560211777687073, "learning_rate": 0.002, "loss": 2.5572, "step": 171740 }, { "epoch": 0.34216419099834244, "grad_norm": 0.1687958985567093, "learning_rate": 0.002, "loss": 2.5534, "step": 171750 }, { "epoch": 0.3421841132219814, "grad_norm": 0.21193285286426544, "learning_rate": 0.002, "loss": 2.5452, "step": 171760 }, { "epoch": 0.3422040354456203, "grad_norm": 0.15345191955566406, "learning_rate": 0.002, "loss": 2.5686, "step": 171770 }, { "epoch": 0.3422239576692592, "grad_norm": 0.19590556621551514, "learning_rate": 0.002, "loss": 2.5612, "step": 171780 }, { "epoch": 0.3422438798928981, "grad_norm": 0.13265477120876312, "learning_rate": 0.002, "loss": 2.5612, "step": 171790 }, { "epoch": 0.34226380211653706, "grad_norm": 0.158162459731102, "learning_rate": 0.002, "loss": 2.5597, "step": 171800 }, { "epoch": 0.34228372434017595, "grad_norm": 0.19438302516937256, "learning_rate": 0.002, "loss": 2.5674, "step": 171810 }, { "epoch": 0.34230364656381485, "grad_norm": 0.17490093410015106, "learning_rate": 0.002, "loss": 2.5355, "step": 171820 }, { "epoch": 0.3423235687874538, "grad_norm": 0.17838338017463684, "learning_rate": 0.002, "loss": 2.569, "step": 171830 }, { "epoch": 0.3423434910110927, "grad_norm": 0.17808333039283752, "learning_rate": 0.002, "loss": 2.554, "step": 171840 }, { "epoch": 0.34236341323473163, "grad_norm": 0.18183347582817078, "learning_rate": 0.002, "loss": 2.5589, "step": 171850 }, { "epoch": 0.3423833354583705, "grad_norm": 0.1459619551897049, "learning_rate": 0.002, "loss": 2.5621, "step": 171860 }, { "epoch": 0.3424032576820094, "grad_norm": 0.16890661418437958, "learning_rate": 0.002, "loss": 2.5526, "step": 171870 }, { "epoch": 0.34242317990564836, "grad_norm": 0.16787242889404297, "learning_rate": 0.002, "loss": 2.5694, "step": 171880 }, { "epoch": 0.34244310212928725, "grad_norm": 0.1633068472146988, "learning_rate": 0.002, "loss": 2.5574, "step": 171890 }, { "epoch": 0.3424630243529262, "grad_norm": 0.1620199829339981, "learning_rate": 0.002, "loss": 2.569, "step": 171900 }, { "epoch": 0.3424829465765651, "grad_norm": 0.16409416496753693, "learning_rate": 0.002, "loss": 2.5518, "step": 171910 }, { "epoch": 0.34250286880020403, "grad_norm": 0.1619596779346466, "learning_rate": 0.002, "loss": 2.5555, "step": 171920 }, { "epoch": 0.3425227910238429, "grad_norm": 0.16283145546913147, "learning_rate": 0.002, "loss": 2.5613, "step": 171930 }, { "epoch": 0.3425427132474818, "grad_norm": 0.1994960904121399, "learning_rate": 0.002, "loss": 2.5435, "step": 171940 }, { "epoch": 0.34256263547112076, "grad_norm": 0.14488548040390015, "learning_rate": 0.002, "loss": 2.5583, "step": 171950 }, { "epoch": 0.34258255769475965, "grad_norm": 0.1811494082212448, "learning_rate": 0.002, "loss": 2.5542, "step": 171960 }, { "epoch": 0.3426024799183986, "grad_norm": 0.13309301435947418, "learning_rate": 0.002, "loss": 2.566, "step": 171970 }, { "epoch": 0.3426224021420375, "grad_norm": 0.1910192370414734, "learning_rate": 0.002, "loss": 2.5755, "step": 171980 }, { "epoch": 0.3426423243656764, "grad_norm": 0.17893347144126892, "learning_rate": 0.002, "loss": 2.5644, "step": 171990 }, { "epoch": 0.3426622465893153, "grad_norm": 0.14934289455413818, "learning_rate": 0.002, "loss": 2.5653, "step": 172000 }, { "epoch": 0.3426821688129542, "grad_norm": 0.19396841526031494, "learning_rate": 0.002, "loss": 2.5427, "step": 172010 }, { "epoch": 0.34270209103659316, "grad_norm": 0.15184138715267181, "learning_rate": 0.002, "loss": 2.5634, "step": 172020 }, { "epoch": 0.34272201326023205, "grad_norm": 0.20373843610286713, "learning_rate": 0.002, "loss": 2.5602, "step": 172030 }, { "epoch": 0.34274193548387094, "grad_norm": 0.16122902929782867, "learning_rate": 0.002, "loss": 2.5735, "step": 172040 }, { "epoch": 0.3427618577075099, "grad_norm": 0.1480952948331833, "learning_rate": 0.002, "loss": 2.552, "step": 172050 }, { "epoch": 0.3427817799311488, "grad_norm": 0.1644015610218048, "learning_rate": 0.002, "loss": 2.5584, "step": 172060 }, { "epoch": 0.3428017021547877, "grad_norm": 0.20004044473171234, "learning_rate": 0.002, "loss": 2.5614, "step": 172070 }, { "epoch": 0.3428216243784266, "grad_norm": 0.16551713645458221, "learning_rate": 0.002, "loss": 2.5638, "step": 172080 }, { "epoch": 0.34284154660206556, "grad_norm": 0.17155566811561584, "learning_rate": 0.002, "loss": 2.5816, "step": 172090 }, { "epoch": 0.34286146882570445, "grad_norm": 0.14600870013237, "learning_rate": 0.002, "loss": 2.5803, "step": 172100 }, { "epoch": 0.34288139104934334, "grad_norm": 0.1780070811510086, "learning_rate": 0.002, "loss": 2.5622, "step": 172110 }, { "epoch": 0.3429013132729823, "grad_norm": 0.15889960527420044, "learning_rate": 0.002, "loss": 2.5803, "step": 172120 }, { "epoch": 0.3429212354966212, "grad_norm": 0.17153491079807281, "learning_rate": 0.002, "loss": 2.5646, "step": 172130 }, { "epoch": 0.3429411577202601, "grad_norm": 0.15808376669883728, "learning_rate": 0.002, "loss": 2.5485, "step": 172140 }, { "epoch": 0.342961079943899, "grad_norm": 0.15380795300006866, "learning_rate": 0.002, "loss": 2.5341, "step": 172150 }, { "epoch": 0.3429810021675379, "grad_norm": 0.1669338345527649, "learning_rate": 0.002, "loss": 2.5699, "step": 172160 }, { "epoch": 0.34300092439117685, "grad_norm": 0.17908082902431488, "learning_rate": 0.002, "loss": 2.5646, "step": 172170 }, { "epoch": 0.34302084661481574, "grad_norm": 0.21598002314567566, "learning_rate": 0.002, "loss": 2.5775, "step": 172180 }, { "epoch": 0.3430407688384547, "grad_norm": 0.15486684441566467, "learning_rate": 0.002, "loss": 2.5569, "step": 172190 }, { "epoch": 0.3430606910620936, "grad_norm": 0.18292494118213654, "learning_rate": 0.002, "loss": 2.5628, "step": 172200 }, { "epoch": 0.3430806132857325, "grad_norm": 0.18674243986606598, "learning_rate": 0.002, "loss": 2.5516, "step": 172210 }, { "epoch": 0.3431005355093714, "grad_norm": 0.17469315230846405, "learning_rate": 0.002, "loss": 2.5669, "step": 172220 }, { "epoch": 0.3431204577330103, "grad_norm": 0.17710992693901062, "learning_rate": 0.002, "loss": 2.5462, "step": 172230 }, { "epoch": 0.34314037995664926, "grad_norm": 0.1709839403629303, "learning_rate": 0.002, "loss": 2.5749, "step": 172240 }, { "epoch": 0.34316030218028815, "grad_norm": 0.1568222939968109, "learning_rate": 0.002, "loss": 2.56, "step": 172250 }, { "epoch": 0.3431802244039271, "grad_norm": 0.15880663692951202, "learning_rate": 0.002, "loss": 2.5599, "step": 172260 }, { "epoch": 0.343200146627566, "grad_norm": 0.1750265508890152, "learning_rate": 0.002, "loss": 2.5699, "step": 172270 }, { "epoch": 0.3432200688512049, "grad_norm": 0.19604063034057617, "learning_rate": 0.002, "loss": 2.5518, "step": 172280 }, { "epoch": 0.3432399910748438, "grad_norm": 0.14880405366420746, "learning_rate": 0.002, "loss": 2.5465, "step": 172290 }, { "epoch": 0.3432599132984827, "grad_norm": 0.16141337156295776, "learning_rate": 0.002, "loss": 2.5663, "step": 172300 }, { "epoch": 0.34327983552212166, "grad_norm": 0.15052755177021027, "learning_rate": 0.002, "loss": 2.5516, "step": 172310 }, { "epoch": 0.34329975774576055, "grad_norm": 0.17622047662734985, "learning_rate": 0.002, "loss": 2.5637, "step": 172320 }, { "epoch": 0.34331967996939944, "grad_norm": 0.1463148593902588, "learning_rate": 0.002, "loss": 2.5582, "step": 172330 }, { "epoch": 0.3433396021930384, "grad_norm": 0.17265558242797852, "learning_rate": 0.002, "loss": 2.5585, "step": 172340 }, { "epoch": 0.3433595244166773, "grad_norm": 0.1854015588760376, "learning_rate": 0.002, "loss": 2.5601, "step": 172350 }, { "epoch": 0.3433794466403162, "grad_norm": 0.16120201349258423, "learning_rate": 0.002, "loss": 2.56, "step": 172360 }, { "epoch": 0.3433993688639551, "grad_norm": 0.14937737584114075, "learning_rate": 0.002, "loss": 2.5691, "step": 172370 }, { "epoch": 0.34341929108759406, "grad_norm": 0.18166913092136383, "learning_rate": 0.002, "loss": 2.5518, "step": 172380 }, { "epoch": 0.34343921331123295, "grad_norm": 0.16668957471847534, "learning_rate": 0.002, "loss": 2.5525, "step": 172390 }, { "epoch": 0.34345913553487184, "grad_norm": 0.15418872237205505, "learning_rate": 0.002, "loss": 2.5625, "step": 172400 }, { "epoch": 0.3434790577585108, "grad_norm": 0.17697462439537048, "learning_rate": 0.002, "loss": 2.5552, "step": 172410 }, { "epoch": 0.3434989799821497, "grad_norm": 0.18202251195907593, "learning_rate": 0.002, "loss": 2.5552, "step": 172420 }, { "epoch": 0.3435189022057886, "grad_norm": 0.17174750566482544, "learning_rate": 0.002, "loss": 2.5539, "step": 172430 }, { "epoch": 0.3435388244294275, "grad_norm": 0.1489834040403366, "learning_rate": 0.002, "loss": 2.5534, "step": 172440 }, { "epoch": 0.3435587466530664, "grad_norm": 0.1511654406785965, "learning_rate": 0.002, "loss": 2.5705, "step": 172450 }, { "epoch": 0.34357866887670535, "grad_norm": 0.1551634818315506, "learning_rate": 0.002, "loss": 2.5532, "step": 172460 }, { "epoch": 0.34359859110034424, "grad_norm": 0.19300955533981323, "learning_rate": 0.002, "loss": 2.5583, "step": 172470 }, { "epoch": 0.3436185133239832, "grad_norm": 0.1771051287651062, "learning_rate": 0.002, "loss": 2.5647, "step": 172480 }, { "epoch": 0.3436384355476221, "grad_norm": 0.16005918383598328, "learning_rate": 0.002, "loss": 2.5607, "step": 172490 }, { "epoch": 0.34365835777126097, "grad_norm": 0.16263744235038757, "learning_rate": 0.002, "loss": 2.5668, "step": 172500 }, { "epoch": 0.3436782799948999, "grad_norm": 0.1648116409778595, "learning_rate": 0.002, "loss": 2.5688, "step": 172510 }, { "epoch": 0.3436982022185388, "grad_norm": 0.17148636281490326, "learning_rate": 0.002, "loss": 2.5694, "step": 172520 }, { "epoch": 0.34371812444217775, "grad_norm": 0.16867220401763916, "learning_rate": 0.002, "loss": 2.5671, "step": 172530 }, { "epoch": 0.34373804666581664, "grad_norm": 0.19859972596168518, "learning_rate": 0.002, "loss": 2.5679, "step": 172540 }, { "epoch": 0.3437579688894556, "grad_norm": 0.16485537588596344, "learning_rate": 0.002, "loss": 2.5591, "step": 172550 }, { "epoch": 0.3437778911130945, "grad_norm": 0.14855453372001648, "learning_rate": 0.002, "loss": 2.5706, "step": 172560 }, { "epoch": 0.34379781333673337, "grad_norm": 0.15145206451416016, "learning_rate": 0.002, "loss": 2.5594, "step": 172570 }, { "epoch": 0.3438177355603723, "grad_norm": 0.1913745403289795, "learning_rate": 0.002, "loss": 2.5649, "step": 172580 }, { "epoch": 0.3438376577840112, "grad_norm": 0.18216462433338165, "learning_rate": 0.002, "loss": 2.5456, "step": 172590 }, { "epoch": 0.34385758000765015, "grad_norm": 0.15306633710861206, "learning_rate": 0.002, "loss": 2.569, "step": 172600 }, { "epoch": 0.34387750223128905, "grad_norm": 0.14285051822662354, "learning_rate": 0.002, "loss": 2.5577, "step": 172610 }, { "epoch": 0.34389742445492794, "grad_norm": 0.2200314998626709, "learning_rate": 0.002, "loss": 2.567, "step": 172620 }, { "epoch": 0.3439173466785669, "grad_norm": 0.14485709369182587, "learning_rate": 0.002, "loss": 2.564, "step": 172630 }, { "epoch": 0.3439372689022058, "grad_norm": 0.18748845160007477, "learning_rate": 0.002, "loss": 2.574, "step": 172640 }, { "epoch": 0.3439571911258447, "grad_norm": 0.13548758625984192, "learning_rate": 0.002, "loss": 2.5492, "step": 172650 }, { "epoch": 0.3439771133494836, "grad_norm": 0.143419548869133, "learning_rate": 0.002, "loss": 2.5492, "step": 172660 }, { "epoch": 0.34399703557312256, "grad_norm": 0.16043829917907715, "learning_rate": 0.002, "loss": 2.5589, "step": 172670 }, { "epoch": 0.34401695779676145, "grad_norm": 0.1455797702074051, "learning_rate": 0.002, "loss": 2.5569, "step": 172680 }, { "epoch": 0.34403688002040034, "grad_norm": 0.15360847115516663, "learning_rate": 0.002, "loss": 2.5602, "step": 172690 }, { "epoch": 0.3440568022440393, "grad_norm": 0.1624508500099182, "learning_rate": 0.002, "loss": 2.5562, "step": 172700 }, { "epoch": 0.3440767244676782, "grad_norm": 0.1730039119720459, "learning_rate": 0.002, "loss": 2.5636, "step": 172710 }, { "epoch": 0.3440966466913171, "grad_norm": 0.14988382160663605, "learning_rate": 0.002, "loss": 2.5482, "step": 172720 }, { "epoch": 0.344116568914956, "grad_norm": 0.16664212942123413, "learning_rate": 0.002, "loss": 2.5604, "step": 172730 }, { "epoch": 0.3441364911385949, "grad_norm": 0.17262989282608032, "learning_rate": 0.002, "loss": 2.5779, "step": 172740 }, { "epoch": 0.34415641336223385, "grad_norm": 0.1971777230501175, "learning_rate": 0.002, "loss": 2.5434, "step": 172750 }, { "epoch": 0.34417633558587274, "grad_norm": 0.15407489240169525, "learning_rate": 0.002, "loss": 2.5575, "step": 172760 }, { "epoch": 0.3441962578095117, "grad_norm": 0.15419593453407288, "learning_rate": 0.002, "loss": 2.5574, "step": 172770 }, { "epoch": 0.3442161800331506, "grad_norm": 0.1774814873933792, "learning_rate": 0.002, "loss": 2.5533, "step": 172780 }, { "epoch": 0.34423610225678947, "grad_norm": 0.15526430308818817, "learning_rate": 0.002, "loss": 2.5551, "step": 172790 }, { "epoch": 0.3442560244804284, "grad_norm": 0.15018601715564728, "learning_rate": 0.002, "loss": 2.5597, "step": 172800 }, { "epoch": 0.3442759467040673, "grad_norm": 0.13344277441501617, "learning_rate": 0.002, "loss": 2.5597, "step": 172810 }, { "epoch": 0.34429586892770625, "grad_norm": 0.13973698019981384, "learning_rate": 0.002, "loss": 2.5657, "step": 172820 }, { "epoch": 0.34431579115134514, "grad_norm": 0.16976287961006165, "learning_rate": 0.002, "loss": 2.5489, "step": 172830 }, { "epoch": 0.3443357133749841, "grad_norm": 0.17100386321544647, "learning_rate": 0.002, "loss": 2.5686, "step": 172840 }, { "epoch": 0.344355635598623, "grad_norm": 0.1646200269460678, "learning_rate": 0.002, "loss": 2.5634, "step": 172850 }, { "epoch": 0.34437555782226187, "grad_norm": 0.1686001569032669, "learning_rate": 0.002, "loss": 2.5697, "step": 172860 }, { "epoch": 0.3443954800459008, "grad_norm": 0.23265035450458527, "learning_rate": 0.002, "loss": 2.5551, "step": 172870 }, { "epoch": 0.3444154022695397, "grad_norm": 0.2488122135400772, "learning_rate": 0.002, "loss": 2.5709, "step": 172880 }, { "epoch": 0.34443532449317865, "grad_norm": 0.1952684223651886, "learning_rate": 0.002, "loss": 2.5802, "step": 172890 }, { "epoch": 0.34445524671681754, "grad_norm": 0.15597037971019745, "learning_rate": 0.002, "loss": 2.5528, "step": 172900 }, { "epoch": 0.34447516894045643, "grad_norm": 0.3256012797355652, "learning_rate": 0.002, "loss": 2.5523, "step": 172910 }, { "epoch": 0.3444950911640954, "grad_norm": 0.1534660905599594, "learning_rate": 0.002, "loss": 2.5668, "step": 172920 }, { "epoch": 0.34451501338773427, "grad_norm": 0.22408446669578552, "learning_rate": 0.002, "loss": 2.5685, "step": 172930 }, { "epoch": 0.3445349356113732, "grad_norm": 0.1565764844417572, "learning_rate": 0.002, "loss": 2.5678, "step": 172940 }, { "epoch": 0.3445548578350121, "grad_norm": 0.16348664462566376, "learning_rate": 0.002, "loss": 2.5429, "step": 172950 }, { "epoch": 0.34457478005865105, "grad_norm": 0.1855451911687851, "learning_rate": 0.002, "loss": 2.5628, "step": 172960 }, { "epoch": 0.34459470228228994, "grad_norm": 0.17049238085746765, "learning_rate": 0.002, "loss": 2.5658, "step": 172970 }, { "epoch": 0.34461462450592883, "grad_norm": 0.15500949323177338, "learning_rate": 0.002, "loss": 2.5685, "step": 172980 }, { "epoch": 0.3446345467295678, "grad_norm": 0.18019649386405945, "learning_rate": 0.002, "loss": 2.5653, "step": 172990 }, { "epoch": 0.34465446895320667, "grad_norm": 0.1652926802635193, "learning_rate": 0.002, "loss": 2.5576, "step": 173000 }, { "epoch": 0.3446743911768456, "grad_norm": 0.14422351121902466, "learning_rate": 0.002, "loss": 2.5712, "step": 173010 }, { "epoch": 0.3446943134004845, "grad_norm": 0.20564694702625275, "learning_rate": 0.002, "loss": 2.5775, "step": 173020 }, { "epoch": 0.3447142356241234, "grad_norm": 0.1962530016899109, "learning_rate": 0.002, "loss": 2.5606, "step": 173030 }, { "epoch": 0.34473415784776235, "grad_norm": 0.17792892456054688, "learning_rate": 0.002, "loss": 2.5592, "step": 173040 }, { "epoch": 0.34475408007140124, "grad_norm": 0.15239457786083221, "learning_rate": 0.002, "loss": 2.5453, "step": 173050 }, { "epoch": 0.3447740022950402, "grad_norm": 0.17628543078899384, "learning_rate": 0.002, "loss": 2.5629, "step": 173060 }, { "epoch": 0.3447939245186791, "grad_norm": 0.15182705223560333, "learning_rate": 0.002, "loss": 2.5705, "step": 173070 }, { "epoch": 0.34481384674231796, "grad_norm": 0.17441673576831818, "learning_rate": 0.002, "loss": 2.5646, "step": 173080 }, { "epoch": 0.3448337689659569, "grad_norm": 0.19264750182628632, "learning_rate": 0.002, "loss": 2.5758, "step": 173090 }, { "epoch": 0.3448536911895958, "grad_norm": 0.173818901181221, "learning_rate": 0.002, "loss": 2.5604, "step": 173100 }, { "epoch": 0.34487361341323475, "grad_norm": 0.15034638345241547, "learning_rate": 0.002, "loss": 2.5671, "step": 173110 }, { "epoch": 0.34489353563687364, "grad_norm": 0.18115262687206268, "learning_rate": 0.002, "loss": 2.5643, "step": 173120 }, { "epoch": 0.3449134578605126, "grad_norm": 0.20740512013435364, "learning_rate": 0.002, "loss": 2.542, "step": 173130 }, { "epoch": 0.3449333800841515, "grad_norm": 0.16619406640529633, "learning_rate": 0.002, "loss": 2.555, "step": 173140 }, { "epoch": 0.34495330230779037, "grad_norm": 0.1555974930524826, "learning_rate": 0.002, "loss": 2.5666, "step": 173150 }, { "epoch": 0.3449732245314293, "grad_norm": 0.16116492450237274, "learning_rate": 0.002, "loss": 2.561, "step": 173160 }, { "epoch": 0.3449931467550682, "grad_norm": 0.38481971621513367, "learning_rate": 0.002, "loss": 2.5588, "step": 173170 }, { "epoch": 0.34501306897870715, "grad_norm": 0.20759721100330353, "learning_rate": 0.002, "loss": 2.527, "step": 173180 }, { "epoch": 0.34503299120234604, "grad_norm": 0.1778421252965927, "learning_rate": 0.002, "loss": 2.5636, "step": 173190 }, { "epoch": 0.34505291342598493, "grad_norm": 0.15045374631881714, "learning_rate": 0.002, "loss": 2.5537, "step": 173200 }, { "epoch": 0.3450728356496239, "grad_norm": 0.15781831741333008, "learning_rate": 0.002, "loss": 2.5716, "step": 173210 }, { "epoch": 0.34509275787326277, "grad_norm": 0.1776704639196396, "learning_rate": 0.002, "loss": 2.5482, "step": 173220 }, { "epoch": 0.3451126800969017, "grad_norm": 0.21241290867328644, "learning_rate": 0.002, "loss": 2.5555, "step": 173230 }, { "epoch": 0.3451326023205406, "grad_norm": 0.1622932106256485, "learning_rate": 0.002, "loss": 2.5467, "step": 173240 }, { "epoch": 0.34515252454417955, "grad_norm": 0.13247837126255035, "learning_rate": 0.002, "loss": 2.5605, "step": 173250 }, { "epoch": 0.34517244676781844, "grad_norm": 0.15536920726299286, "learning_rate": 0.002, "loss": 2.5569, "step": 173260 }, { "epoch": 0.34519236899145733, "grad_norm": 0.16825544834136963, "learning_rate": 0.002, "loss": 2.5619, "step": 173270 }, { "epoch": 0.3452122912150963, "grad_norm": 0.34265539050102234, "learning_rate": 0.002, "loss": 2.5599, "step": 173280 }, { "epoch": 0.34523221343873517, "grad_norm": 0.1516713947057724, "learning_rate": 0.002, "loss": 2.5718, "step": 173290 }, { "epoch": 0.3452521356623741, "grad_norm": 0.20565646886825562, "learning_rate": 0.002, "loss": 2.5721, "step": 173300 }, { "epoch": 0.345272057886013, "grad_norm": 0.1735295057296753, "learning_rate": 0.002, "loss": 2.5611, "step": 173310 }, { "epoch": 0.3452919801096519, "grad_norm": 0.19333581626415253, "learning_rate": 0.002, "loss": 2.5498, "step": 173320 }, { "epoch": 0.34531190233329084, "grad_norm": 0.16551679372787476, "learning_rate": 0.002, "loss": 2.5592, "step": 173330 }, { "epoch": 0.34533182455692973, "grad_norm": 0.1796194165945053, "learning_rate": 0.002, "loss": 2.5662, "step": 173340 }, { "epoch": 0.3453517467805687, "grad_norm": 0.15608631074428558, "learning_rate": 0.002, "loss": 2.5618, "step": 173350 }, { "epoch": 0.34537166900420757, "grad_norm": 0.2085745483636856, "learning_rate": 0.002, "loss": 2.559, "step": 173360 }, { "epoch": 0.34539159122784646, "grad_norm": 0.17630627751350403, "learning_rate": 0.002, "loss": 2.5701, "step": 173370 }, { "epoch": 0.3454115134514854, "grad_norm": 0.16517353057861328, "learning_rate": 0.002, "loss": 2.5554, "step": 173380 }, { "epoch": 0.3454314356751243, "grad_norm": 0.1551111787557602, "learning_rate": 0.002, "loss": 2.5579, "step": 173390 }, { "epoch": 0.34545135789876324, "grad_norm": 0.16545671224594116, "learning_rate": 0.002, "loss": 2.565, "step": 173400 }, { "epoch": 0.34547128012240214, "grad_norm": 0.1721290498971939, "learning_rate": 0.002, "loss": 2.5547, "step": 173410 }, { "epoch": 0.3454912023460411, "grad_norm": 0.18096725642681122, "learning_rate": 0.002, "loss": 2.569, "step": 173420 }, { "epoch": 0.34551112456968, "grad_norm": 0.18699772655963898, "learning_rate": 0.002, "loss": 2.5582, "step": 173430 }, { "epoch": 0.34553104679331886, "grad_norm": 0.16469839215278625, "learning_rate": 0.002, "loss": 2.572, "step": 173440 }, { "epoch": 0.3455509690169578, "grad_norm": 0.19808347523212433, "learning_rate": 0.002, "loss": 2.5628, "step": 173450 }, { "epoch": 0.3455708912405967, "grad_norm": 0.1608469933271408, "learning_rate": 0.002, "loss": 2.571, "step": 173460 }, { "epoch": 0.34559081346423565, "grad_norm": 0.14697954058647156, "learning_rate": 0.002, "loss": 2.5695, "step": 173470 }, { "epoch": 0.34561073568787454, "grad_norm": 0.1564764380455017, "learning_rate": 0.002, "loss": 2.5686, "step": 173480 }, { "epoch": 0.34563065791151343, "grad_norm": 0.16711600124835968, "learning_rate": 0.002, "loss": 2.5579, "step": 173490 }, { "epoch": 0.3456505801351524, "grad_norm": 0.18411590158939362, "learning_rate": 0.002, "loss": 2.586, "step": 173500 }, { "epoch": 0.34567050235879127, "grad_norm": 0.1562885195016861, "learning_rate": 0.002, "loss": 2.5631, "step": 173510 }, { "epoch": 0.3456904245824302, "grad_norm": 0.1437423676252365, "learning_rate": 0.002, "loss": 2.557, "step": 173520 }, { "epoch": 0.3457103468060691, "grad_norm": 0.15881718695163727, "learning_rate": 0.002, "loss": 2.5619, "step": 173530 }, { "epoch": 0.345730269029708, "grad_norm": 0.15185585618019104, "learning_rate": 0.002, "loss": 2.5658, "step": 173540 }, { "epoch": 0.34575019125334694, "grad_norm": 0.15270943939685822, "learning_rate": 0.002, "loss": 2.5504, "step": 173550 }, { "epoch": 0.34577011347698583, "grad_norm": 0.1554114669561386, "learning_rate": 0.002, "loss": 2.561, "step": 173560 }, { "epoch": 0.3457900357006248, "grad_norm": 0.1593504697084427, "learning_rate": 0.002, "loss": 2.5606, "step": 173570 }, { "epoch": 0.34580995792426367, "grad_norm": 0.5569582581520081, "learning_rate": 0.002, "loss": 2.5633, "step": 173580 }, { "epoch": 0.3458298801479026, "grad_norm": 0.14859835803508759, "learning_rate": 0.002, "loss": 2.5578, "step": 173590 }, { "epoch": 0.3458498023715415, "grad_norm": 0.17699046432971954, "learning_rate": 0.002, "loss": 2.5667, "step": 173600 }, { "epoch": 0.3458697245951804, "grad_norm": 0.1749831885099411, "learning_rate": 0.002, "loss": 2.569, "step": 173610 }, { "epoch": 0.34588964681881934, "grad_norm": 0.19845132529735565, "learning_rate": 0.002, "loss": 2.5576, "step": 173620 }, { "epoch": 0.34590956904245823, "grad_norm": 0.14945746958255768, "learning_rate": 0.002, "loss": 2.5633, "step": 173630 }, { "epoch": 0.3459294912660972, "grad_norm": 0.157182514667511, "learning_rate": 0.002, "loss": 2.565, "step": 173640 }, { "epoch": 0.34594941348973607, "grad_norm": 0.15247313678264618, "learning_rate": 0.002, "loss": 2.5513, "step": 173650 }, { "epoch": 0.34596933571337496, "grad_norm": 0.16720394790172577, "learning_rate": 0.002, "loss": 2.5604, "step": 173660 }, { "epoch": 0.3459892579370139, "grad_norm": 0.17075949907302856, "learning_rate": 0.002, "loss": 2.5756, "step": 173670 }, { "epoch": 0.3460091801606528, "grad_norm": 0.16502588987350464, "learning_rate": 0.002, "loss": 2.5659, "step": 173680 }, { "epoch": 0.34602910238429174, "grad_norm": 0.16988112032413483, "learning_rate": 0.002, "loss": 2.5496, "step": 173690 }, { "epoch": 0.34604902460793063, "grad_norm": 0.14900951087474823, "learning_rate": 0.002, "loss": 2.5578, "step": 173700 }, { "epoch": 0.3460689468315696, "grad_norm": 0.15276949107646942, "learning_rate": 0.002, "loss": 2.5624, "step": 173710 }, { "epoch": 0.34608886905520847, "grad_norm": 0.16044682264328003, "learning_rate": 0.002, "loss": 2.5802, "step": 173720 }, { "epoch": 0.34610879127884736, "grad_norm": 0.17434674501419067, "learning_rate": 0.002, "loss": 2.555, "step": 173730 }, { "epoch": 0.3461287135024863, "grad_norm": 0.1521400660276413, "learning_rate": 0.002, "loss": 2.572, "step": 173740 }, { "epoch": 0.3461486357261252, "grad_norm": 0.16736413538455963, "learning_rate": 0.002, "loss": 2.5775, "step": 173750 }, { "epoch": 0.34616855794976414, "grad_norm": 0.15707944333553314, "learning_rate": 0.002, "loss": 2.5734, "step": 173760 }, { "epoch": 0.34618848017340303, "grad_norm": 0.14422638714313507, "learning_rate": 0.002, "loss": 2.5436, "step": 173770 }, { "epoch": 0.3462084023970419, "grad_norm": 0.18293684720993042, "learning_rate": 0.002, "loss": 2.5631, "step": 173780 }, { "epoch": 0.34622832462068087, "grad_norm": 0.14848753809928894, "learning_rate": 0.002, "loss": 2.5684, "step": 173790 }, { "epoch": 0.34624824684431976, "grad_norm": 0.15966616570949554, "learning_rate": 0.002, "loss": 2.5453, "step": 173800 }, { "epoch": 0.3462681690679587, "grad_norm": 0.19317318499088287, "learning_rate": 0.002, "loss": 2.5607, "step": 173810 }, { "epoch": 0.3462880912915976, "grad_norm": 0.1637699007987976, "learning_rate": 0.002, "loss": 2.5638, "step": 173820 }, { "epoch": 0.3463080135152365, "grad_norm": 0.1480533927679062, "learning_rate": 0.002, "loss": 2.5494, "step": 173830 }, { "epoch": 0.34632793573887544, "grad_norm": 0.2068050354719162, "learning_rate": 0.002, "loss": 2.5503, "step": 173840 }, { "epoch": 0.3463478579625143, "grad_norm": 0.14649958908557892, "learning_rate": 0.002, "loss": 2.5554, "step": 173850 }, { "epoch": 0.3463677801861533, "grad_norm": 0.17937017977237701, "learning_rate": 0.002, "loss": 2.5757, "step": 173860 }, { "epoch": 0.34638770240979216, "grad_norm": 0.16849489510059357, "learning_rate": 0.002, "loss": 2.5675, "step": 173870 }, { "epoch": 0.3464076246334311, "grad_norm": 0.17679910361766815, "learning_rate": 0.002, "loss": 2.5588, "step": 173880 }, { "epoch": 0.34642754685707, "grad_norm": 0.16271282732486725, "learning_rate": 0.002, "loss": 2.564, "step": 173890 }, { "epoch": 0.3464474690807089, "grad_norm": 0.24301135540008545, "learning_rate": 0.002, "loss": 2.569, "step": 173900 }, { "epoch": 0.34646739130434784, "grad_norm": 0.14569304883480072, "learning_rate": 0.002, "loss": 2.5519, "step": 173910 }, { "epoch": 0.34648731352798673, "grad_norm": 0.1510026752948761, "learning_rate": 0.002, "loss": 2.5699, "step": 173920 }, { "epoch": 0.3465072357516257, "grad_norm": 0.26664644479751587, "learning_rate": 0.002, "loss": 2.5463, "step": 173930 }, { "epoch": 0.34652715797526457, "grad_norm": 0.15343184769153595, "learning_rate": 0.002, "loss": 2.5521, "step": 173940 }, { "epoch": 0.34654708019890346, "grad_norm": 0.1527622491121292, "learning_rate": 0.002, "loss": 2.5562, "step": 173950 }, { "epoch": 0.3465670024225424, "grad_norm": 0.1734364628791809, "learning_rate": 0.002, "loss": 2.561, "step": 173960 }, { "epoch": 0.3465869246461813, "grad_norm": 0.151186004281044, "learning_rate": 0.002, "loss": 2.5586, "step": 173970 }, { "epoch": 0.34660684686982024, "grad_norm": 0.18840602040290833, "learning_rate": 0.002, "loss": 2.554, "step": 173980 }, { "epoch": 0.34662676909345913, "grad_norm": 0.19553664326667786, "learning_rate": 0.002, "loss": 2.5606, "step": 173990 }, { "epoch": 0.3466466913170981, "grad_norm": 0.17550447583198547, "learning_rate": 0.002, "loss": 2.5603, "step": 174000 }, { "epoch": 0.34666661354073697, "grad_norm": 0.1602250635623932, "learning_rate": 0.002, "loss": 2.5719, "step": 174010 }, { "epoch": 0.34668653576437586, "grad_norm": 0.15850841999053955, "learning_rate": 0.002, "loss": 2.5652, "step": 174020 }, { "epoch": 0.3467064579880148, "grad_norm": 0.16111880540847778, "learning_rate": 0.002, "loss": 2.5548, "step": 174030 }, { "epoch": 0.3467263802116537, "grad_norm": 0.18353167176246643, "learning_rate": 0.002, "loss": 2.5554, "step": 174040 }, { "epoch": 0.34674630243529264, "grad_norm": 0.16187144815921783, "learning_rate": 0.002, "loss": 2.5681, "step": 174050 }, { "epoch": 0.34676622465893153, "grad_norm": 0.1608688086271286, "learning_rate": 0.002, "loss": 2.5467, "step": 174060 }, { "epoch": 0.3467861468825704, "grad_norm": 0.1611410677433014, "learning_rate": 0.002, "loss": 2.5598, "step": 174070 }, { "epoch": 0.34680606910620937, "grad_norm": 0.17726877331733704, "learning_rate": 0.002, "loss": 2.5536, "step": 174080 }, { "epoch": 0.34682599132984826, "grad_norm": 0.17160938680171967, "learning_rate": 0.002, "loss": 2.5748, "step": 174090 }, { "epoch": 0.3468459135534872, "grad_norm": 0.1555514931678772, "learning_rate": 0.002, "loss": 2.5696, "step": 174100 }, { "epoch": 0.3468658357771261, "grad_norm": 0.17217877507209778, "learning_rate": 0.002, "loss": 2.567, "step": 174110 }, { "epoch": 0.346885758000765, "grad_norm": 0.2149728387594223, "learning_rate": 0.002, "loss": 2.5475, "step": 174120 }, { "epoch": 0.34690568022440393, "grad_norm": 0.14276987314224243, "learning_rate": 0.002, "loss": 2.5856, "step": 174130 }, { "epoch": 0.3469256024480428, "grad_norm": 0.15461833775043488, "learning_rate": 0.002, "loss": 2.5622, "step": 174140 }, { "epoch": 0.34694552467168177, "grad_norm": 0.1652442365884781, "learning_rate": 0.002, "loss": 2.5633, "step": 174150 }, { "epoch": 0.34696544689532066, "grad_norm": 0.14021216332912445, "learning_rate": 0.002, "loss": 2.5704, "step": 174160 }, { "epoch": 0.3469853691189596, "grad_norm": 0.1499863564968109, "learning_rate": 0.002, "loss": 2.5717, "step": 174170 }, { "epoch": 0.3470052913425985, "grad_norm": 0.18295761942863464, "learning_rate": 0.002, "loss": 2.5625, "step": 174180 }, { "epoch": 0.3470252135662374, "grad_norm": 0.16697898507118225, "learning_rate": 0.002, "loss": 2.5704, "step": 174190 }, { "epoch": 0.34704513578987634, "grad_norm": 0.16385404765605927, "learning_rate": 0.002, "loss": 2.5541, "step": 174200 }, { "epoch": 0.3470650580135152, "grad_norm": 0.19889581203460693, "learning_rate": 0.002, "loss": 2.5565, "step": 174210 }, { "epoch": 0.3470849802371542, "grad_norm": 0.16061487793922424, "learning_rate": 0.002, "loss": 2.5664, "step": 174220 }, { "epoch": 0.34710490246079306, "grad_norm": 0.14120781421661377, "learning_rate": 0.002, "loss": 2.555, "step": 174230 }, { "epoch": 0.34712482468443195, "grad_norm": 0.20133952796459198, "learning_rate": 0.002, "loss": 2.5614, "step": 174240 }, { "epoch": 0.3471447469080709, "grad_norm": 0.1520363986492157, "learning_rate": 0.002, "loss": 2.5618, "step": 174250 }, { "epoch": 0.3471646691317098, "grad_norm": 0.16265951097011566, "learning_rate": 0.002, "loss": 2.5638, "step": 174260 }, { "epoch": 0.34718459135534874, "grad_norm": 0.15001346170902252, "learning_rate": 0.002, "loss": 2.556, "step": 174270 }, { "epoch": 0.34720451357898763, "grad_norm": 0.18441657721996307, "learning_rate": 0.002, "loss": 2.5607, "step": 174280 }, { "epoch": 0.3472244358026265, "grad_norm": 0.16253472864627838, "learning_rate": 0.002, "loss": 2.5583, "step": 174290 }, { "epoch": 0.34724435802626546, "grad_norm": 0.17897826433181763, "learning_rate": 0.002, "loss": 2.5397, "step": 174300 }, { "epoch": 0.34726428024990436, "grad_norm": 0.17740356922149658, "learning_rate": 0.002, "loss": 2.5548, "step": 174310 }, { "epoch": 0.3472842024735433, "grad_norm": 0.18877506256103516, "learning_rate": 0.002, "loss": 2.565, "step": 174320 }, { "epoch": 0.3473041246971822, "grad_norm": 0.13993597030639648, "learning_rate": 0.002, "loss": 2.5625, "step": 174330 }, { "epoch": 0.34732404692082114, "grad_norm": 0.17995885014533997, "learning_rate": 0.002, "loss": 2.5706, "step": 174340 }, { "epoch": 0.34734396914446003, "grad_norm": 0.18730807304382324, "learning_rate": 0.002, "loss": 2.5688, "step": 174350 }, { "epoch": 0.3473638913680989, "grad_norm": 0.1583741009235382, "learning_rate": 0.002, "loss": 2.545, "step": 174360 }, { "epoch": 0.34738381359173787, "grad_norm": 0.152598574757576, "learning_rate": 0.002, "loss": 2.5667, "step": 174370 }, { "epoch": 0.34740373581537676, "grad_norm": 0.19468489289283752, "learning_rate": 0.002, "loss": 2.562, "step": 174380 }, { "epoch": 0.3474236580390157, "grad_norm": 0.14932076632976532, "learning_rate": 0.002, "loss": 2.5592, "step": 174390 }, { "epoch": 0.3474435802626546, "grad_norm": 0.1875365972518921, "learning_rate": 0.002, "loss": 2.5646, "step": 174400 }, { "epoch": 0.3474635024862935, "grad_norm": 0.19327691197395325, "learning_rate": 0.002, "loss": 2.5545, "step": 174410 }, { "epoch": 0.34748342470993243, "grad_norm": 0.156337708234787, "learning_rate": 0.002, "loss": 2.5695, "step": 174420 }, { "epoch": 0.3475033469335713, "grad_norm": 0.20275382697582245, "learning_rate": 0.002, "loss": 2.5604, "step": 174430 }, { "epoch": 0.34752326915721027, "grad_norm": 0.21044489741325378, "learning_rate": 0.002, "loss": 2.5509, "step": 174440 }, { "epoch": 0.34754319138084916, "grad_norm": 0.15007337927818298, "learning_rate": 0.002, "loss": 2.5577, "step": 174450 }, { "epoch": 0.3475631136044881, "grad_norm": 0.15883757174015045, "learning_rate": 0.002, "loss": 2.5698, "step": 174460 }, { "epoch": 0.347583035828127, "grad_norm": 0.15801605582237244, "learning_rate": 0.002, "loss": 2.5823, "step": 174470 }, { "epoch": 0.3476029580517659, "grad_norm": 0.16638889908790588, "learning_rate": 0.002, "loss": 2.5601, "step": 174480 }, { "epoch": 0.34762288027540483, "grad_norm": 0.1574421525001526, "learning_rate": 0.002, "loss": 2.5719, "step": 174490 }, { "epoch": 0.3476428024990437, "grad_norm": 0.14630454778671265, "learning_rate": 0.002, "loss": 2.5512, "step": 174500 }, { "epoch": 0.34766272472268267, "grad_norm": 0.16828541457653046, "learning_rate": 0.002, "loss": 2.5645, "step": 174510 }, { "epoch": 0.34768264694632156, "grad_norm": 0.1735081672668457, "learning_rate": 0.002, "loss": 2.5648, "step": 174520 }, { "epoch": 0.34770256916996045, "grad_norm": 0.1902439147233963, "learning_rate": 0.002, "loss": 2.5502, "step": 174530 }, { "epoch": 0.3477224913935994, "grad_norm": 0.1396341323852539, "learning_rate": 0.002, "loss": 2.5717, "step": 174540 }, { "epoch": 0.3477424136172383, "grad_norm": 0.15138106048107147, "learning_rate": 0.002, "loss": 2.5572, "step": 174550 }, { "epoch": 0.34776233584087723, "grad_norm": 0.158991277217865, "learning_rate": 0.002, "loss": 2.5612, "step": 174560 }, { "epoch": 0.3477822580645161, "grad_norm": 0.16236095130443573, "learning_rate": 0.002, "loss": 2.5496, "step": 174570 }, { "epoch": 0.347802180288155, "grad_norm": 0.16287247836589813, "learning_rate": 0.002, "loss": 2.5634, "step": 174580 }, { "epoch": 0.34782210251179396, "grad_norm": 0.21014025807380676, "learning_rate": 0.002, "loss": 2.5653, "step": 174590 }, { "epoch": 0.34784202473543285, "grad_norm": 0.15868160128593445, "learning_rate": 0.002, "loss": 2.5649, "step": 174600 }, { "epoch": 0.3478619469590718, "grad_norm": 0.17460383474826813, "learning_rate": 0.002, "loss": 2.5726, "step": 174610 }, { "epoch": 0.3478818691827107, "grad_norm": 0.16461992263793945, "learning_rate": 0.002, "loss": 2.5574, "step": 174620 }, { "epoch": 0.34790179140634964, "grad_norm": 0.16296912729740143, "learning_rate": 0.002, "loss": 2.5536, "step": 174630 }, { "epoch": 0.3479217136299885, "grad_norm": 0.17331761121749878, "learning_rate": 0.002, "loss": 2.5594, "step": 174640 }, { "epoch": 0.3479416358536274, "grad_norm": 0.15190638601779938, "learning_rate": 0.002, "loss": 2.568, "step": 174650 }, { "epoch": 0.34796155807726636, "grad_norm": 0.18356947600841522, "learning_rate": 0.002, "loss": 2.5493, "step": 174660 }, { "epoch": 0.34798148030090525, "grad_norm": 0.16622179746627808, "learning_rate": 0.002, "loss": 2.5499, "step": 174670 }, { "epoch": 0.3480014025245442, "grad_norm": 0.16059710085391998, "learning_rate": 0.002, "loss": 2.5582, "step": 174680 }, { "epoch": 0.3480213247481831, "grad_norm": 0.18138572573661804, "learning_rate": 0.002, "loss": 2.5666, "step": 174690 }, { "epoch": 0.348041246971822, "grad_norm": 0.15511709451675415, "learning_rate": 0.002, "loss": 2.5587, "step": 174700 }, { "epoch": 0.34806116919546093, "grad_norm": 0.2020404189825058, "learning_rate": 0.002, "loss": 2.5645, "step": 174710 }, { "epoch": 0.3480810914190998, "grad_norm": 0.18556323647499084, "learning_rate": 0.002, "loss": 2.5433, "step": 174720 }, { "epoch": 0.34810101364273877, "grad_norm": 0.14013656973838806, "learning_rate": 0.002, "loss": 2.5629, "step": 174730 }, { "epoch": 0.34812093586637766, "grad_norm": 0.1771717369556427, "learning_rate": 0.002, "loss": 2.5614, "step": 174740 }, { "epoch": 0.3481408580900166, "grad_norm": 0.17514830827713013, "learning_rate": 0.002, "loss": 2.569, "step": 174750 }, { "epoch": 0.3481607803136555, "grad_norm": 0.14436455070972443, "learning_rate": 0.002, "loss": 2.5604, "step": 174760 }, { "epoch": 0.3481807025372944, "grad_norm": 0.1658133864402771, "learning_rate": 0.002, "loss": 2.5548, "step": 174770 }, { "epoch": 0.34820062476093333, "grad_norm": 0.1585383415222168, "learning_rate": 0.002, "loss": 2.563, "step": 174780 }, { "epoch": 0.3482205469845722, "grad_norm": 0.1915869414806366, "learning_rate": 0.002, "loss": 2.5463, "step": 174790 }, { "epoch": 0.34824046920821117, "grad_norm": 0.1656457483768463, "learning_rate": 0.002, "loss": 2.5757, "step": 174800 }, { "epoch": 0.34826039143185006, "grad_norm": 0.18411028385162354, "learning_rate": 0.002, "loss": 2.5399, "step": 174810 }, { "epoch": 0.34828031365548895, "grad_norm": 0.1889318972826004, "learning_rate": 0.002, "loss": 2.5479, "step": 174820 }, { "epoch": 0.3483002358791279, "grad_norm": 0.1819753795862198, "learning_rate": 0.002, "loss": 2.5698, "step": 174830 }, { "epoch": 0.3483201581027668, "grad_norm": 0.1822487860918045, "learning_rate": 0.002, "loss": 2.556, "step": 174840 }, { "epoch": 0.34834008032640573, "grad_norm": 0.1612420231103897, "learning_rate": 0.002, "loss": 2.5456, "step": 174850 }, { "epoch": 0.3483600025500446, "grad_norm": 0.17504455149173737, "learning_rate": 0.002, "loss": 2.5591, "step": 174860 }, { "epoch": 0.3483799247736835, "grad_norm": 0.17348098754882812, "learning_rate": 0.002, "loss": 2.5797, "step": 174870 }, { "epoch": 0.34839984699732246, "grad_norm": 0.1568276584148407, "learning_rate": 0.002, "loss": 2.5636, "step": 174880 }, { "epoch": 0.34841976922096135, "grad_norm": 0.1618994027376175, "learning_rate": 0.002, "loss": 2.5489, "step": 174890 }, { "epoch": 0.3484396914446003, "grad_norm": 0.16193342208862305, "learning_rate": 0.002, "loss": 2.5615, "step": 174900 }, { "epoch": 0.3484596136682392, "grad_norm": 0.18698342144489288, "learning_rate": 0.002, "loss": 2.558, "step": 174910 }, { "epoch": 0.34847953589187813, "grad_norm": 0.1610725224018097, "learning_rate": 0.002, "loss": 2.565, "step": 174920 }, { "epoch": 0.348499458115517, "grad_norm": 0.16356994211673737, "learning_rate": 0.002, "loss": 2.5696, "step": 174930 }, { "epoch": 0.3485193803391559, "grad_norm": 0.19281119108200073, "learning_rate": 0.002, "loss": 2.5631, "step": 174940 }, { "epoch": 0.34853930256279486, "grad_norm": 0.17690160870552063, "learning_rate": 0.002, "loss": 2.5574, "step": 174950 }, { "epoch": 0.34855922478643375, "grad_norm": 0.1514437049627304, "learning_rate": 0.002, "loss": 2.5611, "step": 174960 }, { "epoch": 0.3485791470100727, "grad_norm": 0.1846124529838562, "learning_rate": 0.002, "loss": 2.5632, "step": 174970 }, { "epoch": 0.3485990692337116, "grad_norm": 0.1695294827222824, "learning_rate": 0.002, "loss": 2.5636, "step": 174980 }, { "epoch": 0.3486189914573505, "grad_norm": 0.18391641974449158, "learning_rate": 0.002, "loss": 2.5423, "step": 174990 }, { "epoch": 0.3486389136809894, "grad_norm": 0.14087019860744476, "learning_rate": 0.002, "loss": 2.5716, "step": 175000 }, { "epoch": 0.3486588359046283, "grad_norm": 0.1857890635728836, "learning_rate": 0.002, "loss": 2.5554, "step": 175010 }, { "epoch": 0.34867875812826726, "grad_norm": 0.17765305936336517, "learning_rate": 0.002, "loss": 2.556, "step": 175020 }, { "epoch": 0.34869868035190615, "grad_norm": 0.1663685441017151, "learning_rate": 0.002, "loss": 2.5563, "step": 175030 }, { "epoch": 0.34871860257554504, "grad_norm": 0.137828528881073, "learning_rate": 0.002, "loss": 2.5501, "step": 175040 }, { "epoch": 0.348738524799184, "grad_norm": 0.17881454527378082, "learning_rate": 0.002, "loss": 2.5501, "step": 175050 }, { "epoch": 0.3487584470228229, "grad_norm": 0.15635251998901367, "learning_rate": 0.002, "loss": 2.5627, "step": 175060 }, { "epoch": 0.34877836924646183, "grad_norm": 0.14087773859500885, "learning_rate": 0.002, "loss": 2.543, "step": 175070 }, { "epoch": 0.3487982914701007, "grad_norm": 0.17168289422988892, "learning_rate": 0.002, "loss": 2.5625, "step": 175080 }, { "epoch": 0.34881821369373966, "grad_norm": 0.21000376343727112, "learning_rate": 0.002, "loss": 2.5645, "step": 175090 }, { "epoch": 0.34883813591737856, "grad_norm": 0.1546323448419571, "learning_rate": 0.002, "loss": 2.5554, "step": 175100 }, { "epoch": 0.34885805814101745, "grad_norm": 0.14139075577259064, "learning_rate": 0.002, "loss": 2.5527, "step": 175110 }, { "epoch": 0.3488779803646564, "grad_norm": 0.164213627576828, "learning_rate": 0.002, "loss": 2.5431, "step": 175120 }, { "epoch": 0.3488979025882953, "grad_norm": 0.16447697579860687, "learning_rate": 0.002, "loss": 2.5736, "step": 175130 }, { "epoch": 0.34891782481193423, "grad_norm": 0.16298328340053558, "learning_rate": 0.002, "loss": 2.5513, "step": 175140 }, { "epoch": 0.3489377470355731, "grad_norm": 0.1423364132642746, "learning_rate": 0.002, "loss": 2.5479, "step": 175150 }, { "epoch": 0.348957669259212, "grad_norm": 0.17378021776676178, "learning_rate": 0.002, "loss": 2.5552, "step": 175160 }, { "epoch": 0.34897759148285096, "grad_norm": 0.1578332483768463, "learning_rate": 0.002, "loss": 2.5527, "step": 175170 }, { "epoch": 0.34899751370648985, "grad_norm": 0.15394000709056854, "learning_rate": 0.002, "loss": 2.5706, "step": 175180 }, { "epoch": 0.3490174359301288, "grad_norm": 0.14493782818317413, "learning_rate": 0.002, "loss": 2.5559, "step": 175190 }, { "epoch": 0.3490373581537677, "grad_norm": 0.1858118623495102, "learning_rate": 0.002, "loss": 2.5701, "step": 175200 }, { "epoch": 0.34905728037740663, "grad_norm": 0.16986538469791412, "learning_rate": 0.002, "loss": 2.5649, "step": 175210 }, { "epoch": 0.3490772026010455, "grad_norm": 0.21568936109542847, "learning_rate": 0.002, "loss": 2.5695, "step": 175220 }, { "epoch": 0.3490971248246844, "grad_norm": 0.15866386890411377, "learning_rate": 0.002, "loss": 2.5517, "step": 175230 }, { "epoch": 0.34911704704832336, "grad_norm": 0.19150540232658386, "learning_rate": 0.002, "loss": 2.5495, "step": 175240 }, { "epoch": 0.34913696927196225, "grad_norm": 0.1358456313610077, "learning_rate": 0.002, "loss": 2.5569, "step": 175250 }, { "epoch": 0.3491568914956012, "grad_norm": 0.14210307598114014, "learning_rate": 0.002, "loss": 2.5676, "step": 175260 }, { "epoch": 0.3491768137192401, "grad_norm": 0.20925650000572205, "learning_rate": 0.002, "loss": 2.5444, "step": 175270 }, { "epoch": 0.349196735942879, "grad_norm": 0.1571871042251587, "learning_rate": 0.002, "loss": 2.5664, "step": 175280 }, { "epoch": 0.3492166581665179, "grad_norm": 0.16010898351669312, "learning_rate": 0.002, "loss": 2.5486, "step": 175290 }, { "epoch": 0.3492365803901568, "grad_norm": 0.17785990238189697, "learning_rate": 0.002, "loss": 2.5598, "step": 175300 }, { "epoch": 0.34925650261379576, "grad_norm": 0.15780740976333618, "learning_rate": 0.002, "loss": 2.5584, "step": 175310 }, { "epoch": 0.34927642483743465, "grad_norm": 0.17232047021389008, "learning_rate": 0.002, "loss": 2.5532, "step": 175320 }, { "epoch": 0.34929634706107354, "grad_norm": 0.16908569633960724, "learning_rate": 0.002, "loss": 2.552, "step": 175330 }, { "epoch": 0.3493162692847125, "grad_norm": 0.18180827796459198, "learning_rate": 0.002, "loss": 2.5667, "step": 175340 }, { "epoch": 0.3493361915083514, "grad_norm": 0.15846623480319977, "learning_rate": 0.002, "loss": 2.5565, "step": 175350 }, { "epoch": 0.3493561137319903, "grad_norm": 0.18505744636058807, "learning_rate": 0.002, "loss": 2.567, "step": 175360 }, { "epoch": 0.3493760359556292, "grad_norm": 0.18433405458927155, "learning_rate": 0.002, "loss": 2.5706, "step": 175370 }, { "epoch": 0.34939595817926816, "grad_norm": 0.1803504228591919, "learning_rate": 0.002, "loss": 2.5666, "step": 175380 }, { "epoch": 0.34941588040290705, "grad_norm": 0.17690062522888184, "learning_rate": 0.002, "loss": 2.5491, "step": 175390 }, { "epoch": 0.34943580262654594, "grad_norm": 0.1762847602367401, "learning_rate": 0.002, "loss": 2.5649, "step": 175400 }, { "epoch": 0.3494557248501849, "grad_norm": 0.1444820761680603, "learning_rate": 0.002, "loss": 2.5479, "step": 175410 }, { "epoch": 0.3494756470738238, "grad_norm": 0.1584169864654541, "learning_rate": 0.002, "loss": 2.5604, "step": 175420 }, { "epoch": 0.3494955692974627, "grad_norm": 0.1438862383365631, "learning_rate": 0.002, "loss": 2.561, "step": 175430 }, { "epoch": 0.3495154915211016, "grad_norm": 0.16645671427249908, "learning_rate": 0.002, "loss": 2.5697, "step": 175440 }, { "epoch": 0.3495354137447405, "grad_norm": 0.17414715886116028, "learning_rate": 0.002, "loss": 2.5701, "step": 175450 }, { "epoch": 0.34955533596837945, "grad_norm": 0.19041788578033447, "learning_rate": 0.002, "loss": 2.5558, "step": 175460 }, { "epoch": 0.34957525819201835, "grad_norm": 0.17948012053966522, "learning_rate": 0.002, "loss": 2.5604, "step": 175470 }, { "epoch": 0.3495951804156573, "grad_norm": 0.17243823409080505, "learning_rate": 0.002, "loss": 2.5577, "step": 175480 }, { "epoch": 0.3496151026392962, "grad_norm": 0.15780480206012726, "learning_rate": 0.002, "loss": 2.5549, "step": 175490 }, { "epoch": 0.34963502486293513, "grad_norm": 0.16111525893211365, "learning_rate": 0.002, "loss": 2.5399, "step": 175500 }, { "epoch": 0.349654947086574, "grad_norm": 0.17080014944076538, "learning_rate": 0.002, "loss": 2.5491, "step": 175510 }, { "epoch": 0.3496748693102129, "grad_norm": 0.16999918222427368, "learning_rate": 0.002, "loss": 2.5777, "step": 175520 }, { "epoch": 0.34969479153385186, "grad_norm": 0.1525517702102661, "learning_rate": 0.002, "loss": 2.563, "step": 175530 }, { "epoch": 0.34971471375749075, "grad_norm": 0.24058030545711517, "learning_rate": 0.002, "loss": 2.5686, "step": 175540 }, { "epoch": 0.3497346359811297, "grad_norm": 0.1576860398054123, "learning_rate": 0.002, "loss": 2.5576, "step": 175550 }, { "epoch": 0.3497545582047686, "grad_norm": 0.17702405154705048, "learning_rate": 0.002, "loss": 2.5641, "step": 175560 }, { "epoch": 0.3497744804284075, "grad_norm": 0.1684587597846985, "learning_rate": 0.002, "loss": 2.553, "step": 175570 }, { "epoch": 0.3497944026520464, "grad_norm": 0.1511654108762741, "learning_rate": 0.002, "loss": 2.5248, "step": 175580 }, { "epoch": 0.3498143248756853, "grad_norm": 0.1482144594192505, "learning_rate": 0.002, "loss": 2.5697, "step": 175590 }, { "epoch": 0.34983424709932426, "grad_norm": 0.16879256069660187, "learning_rate": 0.002, "loss": 2.5522, "step": 175600 }, { "epoch": 0.34985416932296315, "grad_norm": 0.19441592693328857, "learning_rate": 0.002, "loss": 2.5562, "step": 175610 }, { "epoch": 0.34987409154660204, "grad_norm": 0.18183696269989014, "learning_rate": 0.002, "loss": 2.5753, "step": 175620 }, { "epoch": 0.349894013770241, "grad_norm": 0.15040907263755798, "learning_rate": 0.002, "loss": 2.556, "step": 175630 }, { "epoch": 0.3499139359938799, "grad_norm": 0.18719413876533508, "learning_rate": 0.002, "loss": 2.5629, "step": 175640 }, { "epoch": 0.3499338582175188, "grad_norm": 0.17597462236881256, "learning_rate": 0.002, "loss": 2.553, "step": 175650 }, { "epoch": 0.3499537804411577, "grad_norm": 0.17653068900108337, "learning_rate": 0.002, "loss": 2.5615, "step": 175660 }, { "epoch": 0.34997370266479666, "grad_norm": 0.17312811315059662, "learning_rate": 0.002, "loss": 2.5586, "step": 175670 }, { "epoch": 0.34999362488843555, "grad_norm": 0.15809795260429382, "learning_rate": 0.002, "loss": 2.5833, "step": 175680 }, { "epoch": 0.35001354711207444, "grad_norm": 0.17421869933605194, "learning_rate": 0.002, "loss": 2.5664, "step": 175690 }, { "epoch": 0.3500334693357134, "grad_norm": 0.16043174266815186, "learning_rate": 0.002, "loss": 2.5532, "step": 175700 }, { "epoch": 0.3500533915593523, "grad_norm": 0.1777850091457367, "learning_rate": 0.002, "loss": 2.5481, "step": 175710 }, { "epoch": 0.3500733137829912, "grad_norm": 0.15506280958652496, "learning_rate": 0.002, "loss": 2.5738, "step": 175720 }, { "epoch": 0.3500932360066301, "grad_norm": 0.15438924729824066, "learning_rate": 0.002, "loss": 2.5667, "step": 175730 }, { "epoch": 0.350113158230269, "grad_norm": 0.15428711473941803, "learning_rate": 0.002, "loss": 2.5521, "step": 175740 }, { "epoch": 0.35013308045390795, "grad_norm": 0.18429169058799744, "learning_rate": 0.002, "loss": 2.5566, "step": 175750 }, { "epoch": 0.35015300267754684, "grad_norm": 0.15089254081249237, "learning_rate": 0.002, "loss": 2.5755, "step": 175760 }, { "epoch": 0.3501729249011858, "grad_norm": 0.2158755511045456, "learning_rate": 0.002, "loss": 2.5454, "step": 175770 }, { "epoch": 0.3501928471248247, "grad_norm": 0.15039755403995514, "learning_rate": 0.002, "loss": 2.5457, "step": 175780 }, { "epoch": 0.35021276934846357, "grad_norm": 0.1847236454486847, "learning_rate": 0.002, "loss": 2.5595, "step": 175790 }, { "epoch": 0.3502326915721025, "grad_norm": 0.15115074813365936, "learning_rate": 0.002, "loss": 2.5591, "step": 175800 }, { "epoch": 0.3502526137957414, "grad_norm": 0.1603843867778778, "learning_rate": 0.002, "loss": 2.5533, "step": 175810 }, { "epoch": 0.35027253601938035, "grad_norm": 0.16946810483932495, "learning_rate": 0.002, "loss": 2.5618, "step": 175820 }, { "epoch": 0.35029245824301924, "grad_norm": 0.1953267753124237, "learning_rate": 0.002, "loss": 2.5543, "step": 175830 }, { "epoch": 0.3503123804666582, "grad_norm": 0.15906839072704315, "learning_rate": 0.002, "loss": 2.5574, "step": 175840 }, { "epoch": 0.3503323026902971, "grad_norm": 0.14721029996871948, "learning_rate": 0.002, "loss": 2.5575, "step": 175850 }, { "epoch": 0.350352224913936, "grad_norm": 0.16430658102035522, "learning_rate": 0.002, "loss": 2.5626, "step": 175860 }, { "epoch": 0.3503721471375749, "grad_norm": 0.1893148273229599, "learning_rate": 0.002, "loss": 2.5663, "step": 175870 }, { "epoch": 0.3503920693612138, "grad_norm": 0.1695031076669693, "learning_rate": 0.002, "loss": 2.5587, "step": 175880 }, { "epoch": 0.35041199158485276, "grad_norm": 0.1688169240951538, "learning_rate": 0.002, "loss": 2.5671, "step": 175890 }, { "epoch": 0.35043191380849165, "grad_norm": 0.15985791385173798, "learning_rate": 0.002, "loss": 2.5638, "step": 175900 }, { "epoch": 0.35045183603213054, "grad_norm": 0.15069961547851562, "learning_rate": 0.002, "loss": 2.5563, "step": 175910 }, { "epoch": 0.3504717582557695, "grad_norm": 0.17277298867702484, "learning_rate": 0.002, "loss": 2.5684, "step": 175920 }, { "epoch": 0.3504916804794084, "grad_norm": 0.15882377326488495, "learning_rate": 0.002, "loss": 2.566, "step": 175930 }, { "epoch": 0.3505116027030473, "grad_norm": 0.1879591941833496, "learning_rate": 0.002, "loss": 2.5677, "step": 175940 }, { "epoch": 0.3505315249266862, "grad_norm": 0.16293199360370636, "learning_rate": 0.002, "loss": 2.561, "step": 175950 }, { "epoch": 0.35055144715032516, "grad_norm": 0.15989115834236145, "learning_rate": 0.002, "loss": 2.5562, "step": 175960 }, { "epoch": 0.35057136937396405, "grad_norm": 0.16168907284736633, "learning_rate": 0.002, "loss": 2.5522, "step": 175970 }, { "epoch": 0.35059129159760294, "grad_norm": 0.14316825568675995, "learning_rate": 0.002, "loss": 2.5571, "step": 175980 }, { "epoch": 0.3506112138212419, "grad_norm": 0.17041997611522675, "learning_rate": 0.002, "loss": 2.5595, "step": 175990 }, { "epoch": 0.3506311360448808, "grad_norm": 0.18165968358516693, "learning_rate": 0.002, "loss": 2.567, "step": 176000 }, { "epoch": 0.3506510582685197, "grad_norm": 0.16999824345111847, "learning_rate": 0.002, "loss": 2.5746, "step": 176010 }, { "epoch": 0.3506709804921586, "grad_norm": 0.149795264005661, "learning_rate": 0.002, "loss": 2.5684, "step": 176020 }, { "epoch": 0.3506909027157975, "grad_norm": 0.1451924741268158, "learning_rate": 0.002, "loss": 2.5571, "step": 176030 }, { "epoch": 0.35071082493943645, "grad_norm": 0.19042958319187164, "learning_rate": 0.002, "loss": 2.5698, "step": 176040 }, { "epoch": 0.35073074716307534, "grad_norm": 0.16050130128860474, "learning_rate": 0.002, "loss": 2.571, "step": 176050 }, { "epoch": 0.3507506693867143, "grad_norm": 0.18933044373989105, "learning_rate": 0.002, "loss": 2.5634, "step": 176060 }, { "epoch": 0.3507705916103532, "grad_norm": 0.14507068693637848, "learning_rate": 0.002, "loss": 2.5573, "step": 176070 }, { "epoch": 0.35079051383399207, "grad_norm": 0.19500209391117096, "learning_rate": 0.002, "loss": 2.5632, "step": 176080 }, { "epoch": 0.350810436057631, "grad_norm": 0.18980665504932404, "learning_rate": 0.002, "loss": 2.5744, "step": 176090 }, { "epoch": 0.3508303582812699, "grad_norm": 0.16822008788585663, "learning_rate": 0.002, "loss": 2.5613, "step": 176100 }, { "epoch": 0.35085028050490885, "grad_norm": 0.15783467888832092, "learning_rate": 0.002, "loss": 2.5528, "step": 176110 }, { "epoch": 0.35087020272854774, "grad_norm": 0.17322948575019836, "learning_rate": 0.002, "loss": 2.5486, "step": 176120 }, { "epoch": 0.3508901249521867, "grad_norm": 0.1706656813621521, "learning_rate": 0.002, "loss": 2.5579, "step": 176130 }, { "epoch": 0.3509100471758256, "grad_norm": 0.14438402652740479, "learning_rate": 0.002, "loss": 2.5624, "step": 176140 }, { "epoch": 0.35092996939946447, "grad_norm": 0.16908107697963715, "learning_rate": 0.002, "loss": 2.558, "step": 176150 }, { "epoch": 0.3509498916231034, "grad_norm": 0.15693974494934082, "learning_rate": 0.002, "loss": 2.5722, "step": 176160 }, { "epoch": 0.3509698138467423, "grad_norm": 0.15531788766384125, "learning_rate": 0.002, "loss": 2.5528, "step": 176170 }, { "epoch": 0.35098973607038125, "grad_norm": 0.17720511555671692, "learning_rate": 0.002, "loss": 2.5566, "step": 176180 }, { "epoch": 0.35100965829402014, "grad_norm": 0.22895999252796173, "learning_rate": 0.002, "loss": 2.5735, "step": 176190 }, { "epoch": 0.35102958051765903, "grad_norm": 0.1485077291727066, "learning_rate": 0.002, "loss": 2.5516, "step": 176200 }, { "epoch": 0.351049502741298, "grad_norm": 0.17610380053520203, "learning_rate": 0.002, "loss": 2.54, "step": 176210 }, { "epoch": 0.35106942496493687, "grad_norm": 0.1432119756937027, "learning_rate": 0.002, "loss": 2.5623, "step": 176220 }, { "epoch": 0.3510893471885758, "grad_norm": 0.18931786715984344, "learning_rate": 0.002, "loss": 2.5826, "step": 176230 }, { "epoch": 0.3511092694122147, "grad_norm": 0.24067257344722748, "learning_rate": 0.002, "loss": 2.5643, "step": 176240 }, { "epoch": 0.35112919163585365, "grad_norm": 0.14763277769088745, "learning_rate": 0.002, "loss": 2.5576, "step": 176250 }, { "epoch": 0.35114911385949255, "grad_norm": 0.18893638253211975, "learning_rate": 0.002, "loss": 2.5657, "step": 176260 }, { "epoch": 0.35116903608313144, "grad_norm": 0.18379701673984528, "learning_rate": 0.002, "loss": 2.5749, "step": 176270 }, { "epoch": 0.3511889583067704, "grad_norm": 0.16770559549331665, "learning_rate": 0.002, "loss": 2.5369, "step": 176280 }, { "epoch": 0.3512088805304093, "grad_norm": 0.14630603790283203, "learning_rate": 0.002, "loss": 2.5544, "step": 176290 }, { "epoch": 0.3512288027540482, "grad_norm": 0.21013711392879486, "learning_rate": 0.002, "loss": 2.5789, "step": 176300 }, { "epoch": 0.3512487249776871, "grad_norm": 0.16132742166519165, "learning_rate": 0.002, "loss": 2.5466, "step": 176310 }, { "epoch": 0.351268647201326, "grad_norm": 0.18327485024929047, "learning_rate": 0.002, "loss": 2.5589, "step": 176320 }, { "epoch": 0.35128856942496495, "grad_norm": 0.17135444283485413, "learning_rate": 0.002, "loss": 2.5476, "step": 176330 }, { "epoch": 0.35130849164860384, "grad_norm": 0.16575391590595245, "learning_rate": 0.002, "loss": 2.5582, "step": 176340 }, { "epoch": 0.3513284138722428, "grad_norm": 0.1649755835533142, "learning_rate": 0.002, "loss": 2.5561, "step": 176350 }, { "epoch": 0.3513483360958817, "grad_norm": 0.1817605048418045, "learning_rate": 0.002, "loss": 2.563, "step": 176360 }, { "epoch": 0.35136825831952057, "grad_norm": 0.21165300905704498, "learning_rate": 0.002, "loss": 2.5529, "step": 176370 }, { "epoch": 0.3513881805431595, "grad_norm": 0.15123777091503143, "learning_rate": 0.002, "loss": 2.5486, "step": 176380 }, { "epoch": 0.3514081027667984, "grad_norm": 0.18930046260356903, "learning_rate": 0.002, "loss": 2.5597, "step": 176390 }, { "epoch": 0.35142802499043735, "grad_norm": 0.14802667498588562, "learning_rate": 0.002, "loss": 2.5539, "step": 176400 }, { "epoch": 0.35144794721407624, "grad_norm": 0.16402405500411987, "learning_rate": 0.002, "loss": 2.5533, "step": 176410 }, { "epoch": 0.3514678694377152, "grad_norm": 0.14990633726119995, "learning_rate": 0.002, "loss": 2.5614, "step": 176420 }, { "epoch": 0.3514877916613541, "grad_norm": 0.18687456846237183, "learning_rate": 0.002, "loss": 2.554, "step": 176430 }, { "epoch": 0.35150771388499297, "grad_norm": 0.2054678201675415, "learning_rate": 0.002, "loss": 2.5562, "step": 176440 }, { "epoch": 0.3515276361086319, "grad_norm": 0.15405653417110443, "learning_rate": 0.002, "loss": 2.5597, "step": 176450 }, { "epoch": 0.3515475583322708, "grad_norm": 0.15927430987358093, "learning_rate": 0.002, "loss": 2.5611, "step": 176460 }, { "epoch": 0.35156748055590975, "grad_norm": 0.15851853787899017, "learning_rate": 0.002, "loss": 2.5609, "step": 176470 }, { "epoch": 0.35158740277954864, "grad_norm": 0.17056481540203094, "learning_rate": 0.002, "loss": 2.5529, "step": 176480 }, { "epoch": 0.35160732500318753, "grad_norm": 0.17171402275562286, "learning_rate": 0.002, "loss": 2.5515, "step": 176490 }, { "epoch": 0.3516272472268265, "grad_norm": 0.17245832085609436, "learning_rate": 0.002, "loss": 2.5617, "step": 176500 }, { "epoch": 0.35164716945046537, "grad_norm": 0.15179426968097687, "learning_rate": 0.002, "loss": 2.5482, "step": 176510 }, { "epoch": 0.3516670916741043, "grad_norm": 0.15568140149116516, "learning_rate": 0.002, "loss": 2.576, "step": 176520 }, { "epoch": 0.3516870138977432, "grad_norm": 0.20941205322742462, "learning_rate": 0.002, "loss": 2.55, "step": 176530 }, { "epoch": 0.3517069361213821, "grad_norm": 0.15129464864730835, "learning_rate": 0.002, "loss": 2.5398, "step": 176540 }, { "epoch": 0.35172685834502104, "grad_norm": 0.15995609760284424, "learning_rate": 0.002, "loss": 2.5563, "step": 176550 }, { "epoch": 0.35174678056865993, "grad_norm": 0.20289193093776703, "learning_rate": 0.002, "loss": 2.5688, "step": 176560 }, { "epoch": 0.3517667027922989, "grad_norm": 0.15132932364940643, "learning_rate": 0.002, "loss": 2.573, "step": 176570 }, { "epoch": 0.35178662501593777, "grad_norm": 0.165603905916214, "learning_rate": 0.002, "loss": 2.571, "step": 176580 }, { "epoch": 0.3518065472395767, "grad_norm": 0.1566096991300583, "learning_rate": 0.002, "loss": 2.5555, "step": 176590 }, { "epoch": 0.3518264694632156, "grad_norm": 0.1753896325826645, "learning_rate": 0.002, "loss": 2.56, "step": 176600 }, { "epoch": 0.3518463916868545, "grad_norm": 0.20458774268627167, "learning_rate": 0.002, "loss": 2.5513, "step": 176610 }, { "epoch": 0.35186631391049344, "grad_norm": 0.2080589085817337, "learning_rate": 0.002, "loss": 2.5567, "step": 176620 }, { "epoch": 0.35188623613413234, "grad_norm": 0.3782423734664917, "learning_rate": 0.002, "loss": 2.5522, "step": 176630 }, { "epoch": 0.3519061583577713, "grad_norm": 0.17228445410728455, "learning_rate": 0.002, "loss": 2.5614, "step": 176640 }, { "epoch": 0.35192608058141017, "grad_norm": 0.16067518293857574, "learning_rate": 0.002, "loss": 2.5404, "step": 176650 }, { "epoch": 0.35194600280504906, "grad_norm": 0.14464569091796875, "learning_rate": 0.002, "loss": 2.5427, "step": 176660 }, { "epoch": 0.351965925028688, "grad_norm": 0.17820627987384796, "learning_rate": 0.002, "loss": 2.5485, "step": 176670 }, { "epoch": 0.3519858472523269, "grad_norm": 0.1504824310541153, "learning_rate": 0.002, "loss": 2.5586, "step": 176680 }, { "epoch": 0.35200576947596585, "grad_norm": 0.20201699435710907, "learning_rate": 0.002, "loss": 2.5581, "step": 176690 }, { "epoch": 0.35202569169960474, "grad_norm": 0.14853960275650024, "learning_rate": 0.002, "loss": 2.5546, "step": 176700 }, { "epoch": 0.3520456139232437, "grad_norm": 0.14869993925094604, "learning_rate": 0.002, "loss": 2.5581, "step": 176710 }, { "epoch": 0.3520655361468826, "grad_norm": 0.16641686856746674, "learning_rate": 0.002, "loss": 2.5783, "step": 176720 }, { "epoch": 0.35208545837052146, "grad_norm": 0.15484750270843506, "learning_rate": 0.002, "loss": 2.5514, "step": 176730 }, { "epoch": 0.3521053805941604, "grad_norm": 0.17455005645751953, "learning_rate": 0.002, "loss": 2.56, "step": 176740 }, { "epoch": 0.3521253028177993, "grad_norm": 0.16883935034275055, "learning_rate": 0.002, "loss": 2.5541, "step": 176750 }, { "epoch": 0.35214522504143825, "grad_norm": 0.1546235829591751, "learning_rate": 0.002, "loss": 2.553, "step": 176760 }, { "epoch": 0.35216514726507714, "grad_norm": 0.20094329118728638, "learning_rate": 0.002, "loss": 2.5591, "step": 176770 }, { "epoch": 0.35218506948871603, "grad_norm": 0.154944509267807, "learning_rate": 0.002, "loss": 2.5439, "step": 176780 }, { "epoch": 0.352204991712355, "grad_norm": 0.15600839257240295, "learning_rate": 0.002, "loss": 2.5662, "step": 176790 }, { "epoch": 0.35222491393599387, "grad_norm": 0.1715189665555954, "learning_rate": 0.002, "loss": 2.558, "step": 176800 }, { "epoch": 0.3522448361596328, "grad_norm": 0.18309016525745392, "learning_rate": 0.002, "loss": 2.5805, "step": 176810 }, { "epoch": 0.3522647583832717, "grad_norm": 0.14221316576004028, "learning_rate": 0.002, "loss": 2.5576, "step": 176820 }, { "epoch": 0.3522846806069106, "grad_norm": 0.18761169910430908, "learning_rate": 0.002, "loss": 2.5686, "step": 176830 }, { "epoch": 0.35230460283054954, "grad_norm": 0.15446822345256805, "learning_rate": 0.002, "loss": 2.548, "step": 176840 }, { "epoch": 0.35232452505418843, "grad_norm": 0.1921870857477188, "learning_rate": 0.002, "loss": 2.5791, "step": 176850 }, { "epoch": 0.3523444472778274, "grad_norm": 0.1740591824054718, "learning_rate": 0.002, "loss": 2.5765, "step": 176860 }, { "epoch": 0.35236436950146627, "grad_norm": 0.18396779894828796, "learning_rate": 0.002, "loss": 2.5562, "step": 176870 }, { "epoch": 0.3523842917251052, "grad_norm": 0.15443600714206696, "learning_rate": 0.002, "loss": 2.569, "step": 176880 }, { "epoch": 0.3524042139487441, "grad_norm": 0.17315910756587982, "learning_rate": 0.002, "loss": 2.5629, "step": 176890 }, { "epoch": 0.352424136172383, "grad_norm": 0.17250025272369385, "learning_rate": 0.002, "loss": 2.5469, "step": 176900 }, { "epoch": 0.35244405839602194, "grad_norm": 0.19226743280887604, "learning_rate": 0.002, "loss": 2.5532, "step": 176910 }, { "epoch": 0.35246398061966083, "grad_norm": 0.1653471440076828, "learning_rate": 0.002, "loss": 2.5553, "step": 176920 }, { "epoch": 0.3524839028432998, "grad_norm": 0.18358099460601807, "learning_rate": 0.002, "loss": 2.5613, "step": 176930 }, { "epoch": 0.35250382506693867, "grad_norm": 0.16627535223960876, "learning_rate": 0.002, "loss": 2.5738, "step": 176940 }, { "epoch": 0.35252374729057756, "grad_norm": 0.17144693434238434, "learning_rate": 0.002, "loss": 2.5588, "step": 176950 }, { "epoch": 0.3525436695142165, "grad_norm": 0.19442278146743774, "learning_rate": 0.002, "loss": 2.5613, "step": 176960 }, { "epoch": 0.3525635917378554, "grad_norm": 0.1480870097875595, "learning_rate": 0.002, "loss": 2.5489, "step": 176970 }, { "epoch": 0.35258351396149434, "grad_norm": 0.15876589715480804, "learning_rate": 0.002, "loss": 2.5569, "step": 176980 }, { "epoch": 0.35260343618513323, "grad_norm": 0.17764492332935333, "learning_rate": 0.002, "loss": 2.5559, "step": 176990 }, { "epoch": 0.3526233584087722, "grad_norm": 0.16664466261863708, "learning_rate": 0.002, "loss": 2.5497, "step": 177000 }, { "epoch": 0.35264328063241107, "grad_norm": 0.15188032388687134, "learning_rate": 0.002, "loss": 2.549, "step": 177010 }, { "epoch": 0.35266320285604996, "grad_norm": 0.16283096373081207, "learning_rate": 0.002, "loss": 2.55, "step": 177020 }, { "epoch": 0.3526831250796889, "grad_norm": 0.15133589506149292, "learning_rate": 0.002, "loss": 2.5679, "step": 177030 }, { "epoch": 0.3527030473033278, "grad_norm": 0.1988382488489151, "learning_rate": 0.002, "loss": 2.5614, "step": 177040 }, { "epoch": 0.35272296952696675, "grad_norm": 0.19550912082195282, "learning_rate": 0.002, "loss": 2.5579, "step": 177050 }, { "epoch": 0.35274289175060564, "grad_norm": 0.1569257527589798, "learning_rate": 0.002, "loss": 2.5709, "step": 177060 }, { "epoch": 0.3527628139742445, "grad_norm": 0.16686072945594788, "learning_rate": 0.002, "loss": 2.5693, "step": 177070 }, { "epoch": 0.3527827361978835, "grad_norm": 0.1534084677696228, "learning_rate": 0.002, "loss": 2.5488, "step": 177080 }, { "epoch": 0.35280265842152236, "grad_norm": 0.15838992595672607, "learning_rate": 0.002, "loss": 2.5638, "step": 177090 }, { "epoch": 0.3528225806451613, "grad_norm": 0.1666247546672821, "learning_rate": 0.002, "loss": 2.5594, "step": 177100 }, { "epoch": 0.3528425028688002, "grad_norm": 0.16547560691833496, "learning_rate": 0.002, "loss": 2.5594, "step": 177110 }, { "epoch": 0.3528624250924391, "grad_norm": 0.1572365015745163, "learning_rate": 0.002, "loss": 2.5558, "step": 177120 }, { "epoch": 0.35288234731607804, "grad_norm": 0.15446266531944275, "learning_rate": 0.002, "loss": 2.5568, "step": 177130 }, { "epoch": 0.35290226953971693, "grad_norm": 0.15838393568992615, "learning_rate": 0.002, "loss": 2.5566, "step": 177140 }, { "epoch": 0.3529221917633559, "grad_norm": 0.15896174311637878, "learning_rate": 0.002, "loss": 2.5592, "step": 177150 }, { "epoch": 0.35294211398699477, "grad_norm": 0.16435520350933075, "learning_rate": 0.002, "loss": 2.5556, "step": 177160 }, { "epoch": 0.3529620362106337, "grad_norm": 0.15184691548347473, "learning_rate": 0.002, "loss": 2.568, "step": 177170 }, { "epoch": 0.3529819584342726, "grad_norm": 0.14474333822727203, "learning_rate": 0.002, "loss": 2.5751, "step": 177180 }, { "epoch": 0.3530018806579115, "grad_norm": 0.17585861682891846, "learning_rate": 0.002, "loss": 2.5547, "step": 177190 }, { "epoch": 0.35302180288155044, "grad_norm": 0.2062595635652542, "learning_rate": 0.002, "loss": 2.5546, "step": 177200 }, { "epoch": 0.35304172510518933, "grad_norm": 0.16518574953079224, "learning_rate": 0.002, "loss": 2.5615, "step": 177210 }, { "epoch": 0.3530616473288283, "grad_norm": 0.151576966047287, "learning_rate": 0.002, "loss": 2.5741, "step": 177220 }, { "epoch": 0.35308156955246717, "grad_norm": 0.18067054450511932, "learning_rate": 0.002, "loss": 2.5634, "step": 177230 }, { "epoch": 0.35310149177610606, "grad_norm": 0.1381715089082718, "learning_rate": 0.002, "loss": 2.5643, "step": 177240 }, { "epoch": 0.353121413999745, "grad_norm": 0.19958625733852386, "learning_rate": 0.002, "loss": 2.5577, "step": 177250 }, { "epoch": 0.3531413362233839, "grad_norm": 0.15311163663864136, "learning_rate": 0.002, "loss": 2.5661, "step": 177260 }, { "epoch": 0.35316125844702284, "grad_norm": 0.1575794816017151, "learning_rate": 0.002, "loss": 2.5517, "step": 177270 }, { "epoch": 0.35318118067066173, "grad_norm": 0.16248422861099243, "learning_rate": 0.002, "loss": 2.5507, "step": 177280 }, { "epoch": 0.3532011028943006, "grad_norm": 0.1553306132555008, "learning_rate": 0.002, "loss": 2.5477, "step": 177290 }, { "epoch": 0.35322102511793957, "grad_norm": 0.15205515921115875, "learning_rate": 0.002, "loss": 2.5654, "step": 177300 }, { "epoch": 0.35324094734157846, "grad_norm": 0.15892089903354645, "learning_rate": 0.002, "loss": 2.5402, "step": 177310 }, { "epoch": 0.3532608695652174, "grad_norm": 0.15348781645298004, "learning_rate": 0.002, "loss": 2.5552, "step": 177320 }, { "epoch": 0.3532807917888563, "grad_norm": 0.191446453332901, "learning_rate": 0.002, "loss": 2.5664, "step": 177330 }, { "epoch": 0.35330071401249524, "grad_norm": 0.16774626076221466, "learning_rate": 0.002, "loss": 2.5542, "step": 177340 }, { "epoch": 0.35332063623613413, "grad_norm": 0.1869681030511856, "learning_rate": 0.002, "loss": 2.5615, "step": 177350 }, { "epoch": 0.353340558459773, "grad_norm": 0.16348335146903992, "learning_rate": 0.002, "loss": 2.5687, "step": 177360 }, { "epoch": 0.35336048068341197, "grad_norm": 0.19738829135894775, "learning_rate": 0.002, "loss": 2.5606, "step": 177370 }, { "epoch": 0.35338040290705086, "grad_norm": 0.1953718513250351, "learning_rate": 0.002, "loss": 2.58, "step": 177380 }, { "epoch": 0.3534003251306898, "grad_norm": 0.20350529253482819, "learning_rate": 0.002, "loss": 2.5548, "step": 177390 }, { "epoch": 0.3534202473543287, "grad_norm": 0.1614709347486496, "learning_rate": 0.002, "loss": 2.5586, "step": 177400 }, { "epoch": 0.3534401695779676, "grad_norm": 0.19706739485263824, "learning_rate": 0.002, "loss": 2.559, "step": 177410 }, { "epoch": 0.35346009180160654, "grad_norm": 0.14030754566192627, "learning_rate": 0.002, "loss": 2.57, "step": 177420 }, { "epoch": 0.3534800140252454, "grad_norm": 0.22690671682357788, "learning_rate": 0.002, "loss": 2.5619, "step": 177430 }, { "epoch": 0.35349993624888437, "grad_norm": 0.15454399585723877, "learning_rate": 0.002, "loss": 2.5778, "step": 177440 }, { "epoch": 0.35351985847252326, "grad_norm": 0.15537692606449127, "learning_rate": 0.002, "loss": 2.5537, "step": 177450 }, { "epoch": 0.3535397806961622, "grad_norm": 0.1863955706357956, "learning_rate": 0.002, "loss": 2.5596, "step": 177460 }, { "epoch": 0.3535597029198011, "grad_norm": 0.16545742750167847, "learning_rate": 0.002, "loss": 2.5512, "step": 177470 }, { "epoch": 0.35357962514344, "grad_norm": 0.17850306630134583, "learning_rate": 0.002, "loss": 2.5535, "step": 177480 }, { "epoch": 0.35359954736707894, "grad_norm": 0.16891862452030182, "learning_rate": 0.002, "loss": 2.5607, "step": 177490 }, { "epoch": 0.3536194695907178, "grad_norm": 0.1570141762495041, "learning_rate": 0.002, "loss": 2.5544, "step": 177500 }, { "epoch": 0.3536393918143568, "grad_norm": 0.1981441229581833, "learning_rate": 0.002, "loss": 2.5658, "step": 177510 }, { "epoch": 0.35365931403799566, "grad_norm": 0.18204817175865173, "learning_rate": 0.002, "loss": 2.5714, "step": 177520 }, { "epoch": 0.35367923626163456, "grad_norm": 0.15110497176647186, "learning_rate": 0.002, "loss": 2.5543, "step": 177530 }, { "epoch": 0.3536991584852735, "grad_norm": 0.1445019394159317, "learning_rate": 0.002, "loss": 2.5532, "step": 177540 }, { "epoch": 0.3537190807089124, "grad_norm": 0.19399453699588776, "learning_rate": 0.002, "loss": 2.5702, "step": 177550 }, { "epoch": 0.35373900293255134, "grad_norm": 0.13100287318229675, "learning_rate": 0.002, "loss": 2.571, "step": 177560 }, { "epoch": 0.35375892515619023, "grad_norm": 0.1749170571565628, "learning_rate": 0.002, "loss": 2.5682, "step": 177570 }, { "epoch": 0.3537788473798291, "grad_norm": 0.14930249750614166, "learning_rate": 0.002, "loss": 2.5674, "step": 177580 }, { "epoch": 0.35379876960346807, "grad_norm": 0.17059126496315002, "learning_rate": 0.002, "loss": 2.5483, "step": 177590 }, { "epoch": 0.35381869182710696, "grad_norm": 0.15067778527736664, "learning_rate": 0.002, "loss": 2.5619, "step": 177600 }, { "epoch": 0.3538386140507459, "grad_norm": 0.25725147128105164, "learning_rate": 0.002, "loss": 2.5576, "step": 177610 }, { "epoch": 0.3538585362743848, "grad_norm": 0.14906936883926392, "learning_rate": 0.002, "loss": 2.5714, "step": 177620 }, { "epoch": 0.35387845849802374, "grad_norm": 0.18144483864307404, "learning_rate": 0.002, "loss": 2.5626, "step": 177630 }, { "epoch": 0.35389838072166263, "grad_norm": 0.148411363363266, "learning_rate": 0.002, "loss": 2.5628, "step": 177640 }, { "epoch": 0.3539183029453015, "grad_norm": 0.18660153448581696, "learning_rate": 0.002, "loss": 2.535, "step": 177650 }, { "epoch": 0.35393822516894047, "grad_norm": 0.15472657978534698, "learning_rate": 0.002, "loss": 2.566, "step": 177660 }, { "epoch": 0.35395814739257936, "grad_norm": 0.15907570719718933, "learning_rate": 0.002, "loss": 2.5631, "step": 177670 }, { "epoch": 0.3539780696162183, "grad_norm": 0.16543197631835938, "learning_rate": 0.002, "loss": 2.5709, "step": 177680 }, { "epoch": 0.3539979918398572, "grad_norm": 0.15768234431743622, "learning_rate": 0.002, "loss": 2.5564, "step": 177690 }, { "epoch": 0.3540179140634961, "grad_norm": 0.16422656178474426, "learning_rate": 0.002, "loss": 2.5796, "step": 177700 }, { "epoch": 0.35403783628713503, "grad_norm": 0.20825111865997314, "learning_rate": 0.002, "loss": 2.5539, "step": 177710 }, { "epoch": 0.3540577585107739, "grad_norm": 0.17451150715351105, "learning_rate": 0.002, "loss": 2.5531, "step": 177720 }, { "epoch": 0.35407768073441287, "grad_norm": 0.1564915031194687, "learning_rate": 0.002, "loss": 2.5415, "step": 177730 }, { "epoch": 0.35409760295805176, "grad_norm": 0.15820884704589844, "learning_rate": 0.002, "loss": 2.5525, "step": 177740 }, { "epoch": 0.3541175251816907, "grad_norm": 0.16157272458076477, "learning_rate": 0.002, "loss": 2.559, "step": 177750 }, { "epoch": 0.3541374474053296, "grad_norm": 0.20582014322280884, "learning_rate": 0.002, "loss": 2.5933, "step": 177760 }, { "epoch": 0.3541573696289685, "grad_norm": 0.1485394835472107, "learning_rate": 0.002, "loss": 2.5633, "step": 177770 }, { "epoch": 0.35417729185260743, "grad_norm": 0.20828399062156677, "learning_rate": 0.002, "loss": 2.5688, "step": 177780 }, { "epoch": 0.3541972140762463, "grad_norm": 0.14077767729759216, "learning_rate": 0.002, "loss": 2.5594, "step": 177790 }, { "epoch": 0.35421713629988527, "grad_norm": 0.14902497828006744, "learning_rate": 0.002, "loss": 2.5658, "step": 177800 }, { "epoch": 0.35423705852352416, "grad_norm": 0.16592593491077423, "learning_rate": 0.002, "loss": 2.5672, "step": 177810 }, { "epoch": 0.35425698074716305, "grad_norm": 0.18433421850204468, "learning_rate": 0.002, "loss": 2.5524, "step": 177820 }, { "epoch": 0.354276902970802, "grad_norm": 0.15093132853507996, "learning_rate": 0.002, "loss": 2.5569, "step": 177830 }, { "epoch": 0.3542968251944409, "grad_norm": 0.2218940258026123, "learning_rate": 0.002, "loss": 2.5714, "step": 177840 }, { "epoch": 0.35431674741807984, "grad_norm": 0.16640739142894745, "learning_rate": 0.002, "loss": 2.5527, "step": 177850 }, { "epoch": 0.3543366696417187, "grad_norm": 0.1761658787727356, "learning_rate": 0.002, "loss": 2.5735, "step": 177860 }, { "epoch": 0.3543565918653576, "grad_norm": 0.14703254401683807, "learning_rate": 0.002, "loss": 2.5538, "step": 177870 }, { "epoch": 0.35437651408899656, "grad_norm": 0.19032248854637146, "learning_rate": 0.002, "loss": 2.56, "step": 177880 }, { "epoch": 0.35439643631263545, "grad_norm": 0.1839720904827118, "learning_rate": 0.002, "loss": 2.5506, "step": 177890 }, { "epoch": 0.3544163585362744, "grad_norm": 0.15219855308532715, "learning_rate": 0.002, "loss": 2.557, "step": 177900 }, { "epoch": 0.3544362807599133, "grad_norm": 0.15545304119586945, "learning_rate": 0.002, "loss": 2.5585, "step": 177910 }, { "epoch": 0.35445620298355224, "grad_norm": 0.18917882442474365, "learning_rate": 0.002, "loss": 2.5823, "step": 177920 }, { "epoch": 0.35447612520719113, "grad_norm": 0.17534558475017548, "learning_rate": 0.002, "loss": 2.5626, "step": 177930 }, { "epoch": 0.35449604743083, "grad_norm": 0.17786604166030884, "learning_rate": 0.002, "loss": 2.5699, "step": 177940 }, { "epoch": 0.35451596965446897, "grad_norm": 0.154519721865654, "learning_rate": 0.002, "loss": 2.5587, "step": 177950 }, { "epoch": 0.35453589187810786, "grad_norm": 0.17551623284816742, "learning_rate": 0.002, "loss": 2.5463, "step": 177960 }, { "epoch": 0.3545558141017468, "grad_norm": 0.18380248546600342, "learning_rate": 0.002, "loss": 2.5664, "step": 177970 }, { "epoch": 0.3545757363253857, "grad_norm": 0.15125033259391785, "learning_rate": 0.002, "loss": 2.5479, "step": 177980 }, { "epoch": 0.3545956585490246, "grad_norm": 0.15043625235557556, "learning_rate": 0.002, "loss": 2.5551, "step": 177990 }, { "epoch": 0.35461558077266353, "grad_norm": 0.1717669665813446, "learning_rate": 0.002, "loss": 2.5513, "step": 178000 }, { "epoch": 0.3546355029963024, "grad_norm": 0.16874811053276062, "learning_rate": 0.002, "loss": 2.5544, "step": 178010 }, { "epoch": 0.35465542521994137, "grad_norm": 0.16106106340885162, "learning_rate": 0.002, "loss": 2.5669, "step": 178020 }, { "epoch": 0.35467534744358026, "grad_norm": 0.1446998417377472, "learning_rate": 0.002, "loss": 2.5688, "step": 178030 }, { "epoch": 0.3546952696672192, "grad_norm": 0.1563170850276947, "learning_rate": 0.002, "loss": 2.5575, "step": 178040 }, { "epoch": 0.3547151918908581, "grad_norm": 0.1567201465368271, "learning_rate": 0.002, "loss": 2.5631, "step": 178050 }, { "epoch": 0.354735114114497, "grad_norm": 0.1517007052898407, "learning_rate": 0.002, "loss": 2.5641, "step": 178060 }, { "epoch": 0.35475503633813593, "grad_norm": 0.17131617665290833, "learning_rate": 0.002, "loss": 2.5622, "step": 178070 }, { "epoch": 0.3547749585617748, "grad_norm": 0.16273756325244904, "learning_rate": 0.002, "loss": 2.5745, "step": 178080 }, { "epoch": 0.35479488078541377, "grad_norm": 0.17936263978481293, "learning_rate": 0.002, "loss": 2.5649, "step": 178090 }, { "epoch": 0.35481480300905266, "grad_norm": 0.15846878290176392, "learning_rate": 0.002, "loss": 2.5756, "step": 178100 }, { "epoch": 0.35483472523269155, "grad_norm": 0.1633330136537552, "learning_rate": 0.002, "loss": 2.5456, "step": 178110 }, { "epoch": 0.3548546474563305, "grad_norm": 0.15593403577804565, "learning_rate": 0.002, "loss": 2.5597, "step": 178120 }, { "epoch": 0.3548745696799694, "grad_norm": 0.15494593977928162, "learning_rate": 0.002, "loss": 2.5671, "step": 178130 }, { "epoch": 0.35489449190360833, "grad_norm": 0.22458845376968384, "learning_rate": 0.002, "loss": 2.5585, "step": 178140 }, { "epoch": 0.3549144141272472, "grad_norm": 0.15966933965682983, "learning_rate": 0.002, "loss": 2.5679, "step": 178150 }, { "epoch": 0.3549343363508861, "grad_norm": 0.1846694052219391, "learning_rate": 0.002, "loss": 2.5576, "step": 178160 }, { "epoch": 0.35495425857452506, "grad_norm": 0.17399881780147552, "learning_rate": 0.002, "loss": 2.5508, "step": 178170 }, { "epoch": 0.35497418079816395, "grad_norm": 0.1777515560388565, "learning_rate": 0.002, "loss": 2.5541, "step": 178180 }, { "epoch": 0.3549941030218029, "grad_norm": 0.15402844548225403, "learning_rate": 0.002, "loss": 2.5577, "step": 178190 }, { "epoch": 0.3550140252454418, "grad_norm": 0.1965395212173462, "learning_rate": 0.002, "loss": 2.5596, "step": 178200 }, { "epoch": 0.35503394746908074, "grad_norm": 0.16545066237449646, "learning_rate": 0.002, "loss": 2.5518, "step": 178210 }, { "epoch": 0.3550538696927196, "grad_norm": 0.1779487431049347, "learning_rate": 0.002, "loss": 2.5442, "step": 178220 }, { "epoch": 0.3550737919163585, "grad_norm": 0.24686278402805328, "learning_rate": 0.002, "loss": 2.559, "step": 178230 }, { "epoch": 0.35509371413999746, "grad_norm": 0.15748931467533112, "learning_rate": 0.002, "loss": 2.5652, "step": 178240 }, { "epoch": 0.35511363636363635, "grad_norm": 0.15892910957336426, "learning_rate": 0.002, "loss": 2.5737, "step": 178250 }, { "epoch": 0.3551335585872753, "grad_norm": 0.1465137004852295, "learning_rate": 0.002, "loss": 2.5558, "step": 178260 }, { "epoch": 0.3551534808109142, "grad_norm": 0.20794159173965454, "learning_rate": 0.002, "loss": 2.5648, "step": 178270 }, { "epoch": 0.3551734030345531, "grad_norm": 0.14546723663806915, "learning_rate": 0.002, "loss": 2.5545, "step": 178280 }, { "epoch": 0.355193325258192, "grad_norm": 0.14688095450401306, "learning_rate": 0.002, "loss": 2.5563, "step": 178290 }, { "epoch": 0.3552132474818309, "grad_norm": 0.18416056036949158, "learning_rate": 0.002, "loss": 2.5635, "step": 178300 }, { "epoch": 0.35523316970546986, "grad_norm": 0.16926005482673645, "learning_rate": 0.002, "loss": 2.5673, "step": 178310 }, { "epoch": 0.35525309192910876, "grad_norm": 0.16725951433181763, "learning_rate": 0.002, "loss": 2.5621, "step": 178320 }, { "epoch": 0.35527301415274765, "grad_norm": 0.1436900645494461, "learning_rate": 0.002, "loss": 2.5627, "step": 178330 }, { "epoch": 0.3552929363763866, "grad_norm": 0.17041468620300293, "learning_rate": 0.002, "loss": 2.5692, "step": 178340 }, { "epoch": 0.3553128586000255, "grad_norm": 0.1969873011112213, "learning_rate": 0.002, "loss": 2.5696, "step": 178350 }, { "epoch": 0.35533278082366443, "grad_norm": 0.14795494079589844, "learning_rate": 0.002, "loss": 2.5557, "step": 178360 }, { "epoch": 0.3553527030473033, "grad_norm": 0.1613176316022873, "learning_rate": 0.002, "loss": 2.5651, "step": 178370 }, { "epoch": 0.35537262527094227, "grad_norm": 0.15228955447673798, "learning_rate": 0.002, "loss": 2.5696, "step": 178380 }, { "epoch": 0.35539254749458116, "grad_norm": 0.2164023071527481, "learning_rate": 0.002, "loss": 2.5593, "step": 178390 }, { "epoch": 0.35541246971822005, "grad_norm": 0.1710158735513687, "learning_rate": 0.002, "loss": 2.5527, "step": 178400 }, { "epoch": 0.355432391941859, "grad_norm": 0.13956797122955322, "learning_rate": 0.002, "loss": 2.5638, "step": 178410 }, { "epoch": 0.3554523141654979, "grad_norm": 0.15635406970977783, "learning_rate": 0.002, "loss": 2.5488, "step": 178420 }, { "epoch": 0.35547223638913683, "grad_norm": 0.1609693318605423, "learning_rate": 0.002, "loss": 2.5659, "step": 178430 }, { "epoch": 0.3554921586127757, "grad_norm": 0.16753683984279633, "learning_rate": 0.002, "loss": 2.5498, "step": 178440 }, { "epoch": 0.3555120808364146, "grad_norm": 0.15141935646533966, "learning_rate": 0.002, "loss": 2.5482, "step": 178450 }, { "epoch": 0.35553200306005356, "grad_norm": 0.17587174475193024, "learning_rate": 0.002, "loss": 2.5719, "step": 178460 }, { "epoch": 0.35555192528369245, "grad_norm": 0.14260846376419067, "learning_rate": 0.002, "loss": 2.5562, "step": 178470 }, { "epoch": 0.3555718475073314, "grad_norm": 0.16264493763446808, "learning_rate": 0.002, "loss": 2.5753, "step": 178480 }, { "epoch": 0.3555917697309703, "grad_norm": 0.1588616669178009, "learning_rate": 0.002, "loss": 2.5533, "step": 178490 }, { "epoch": 0.35561169195460923, "grad_norm": 0.18022693693637848, "learning_rate": 0.002, "loss": 2.5641, "step": 178500 }, { "epoch": 0.3556316141782481, "grad_norm": 0.15986497700214386, "learning_rate": 0.002, "loss": 2.574, "step": 178510 }, { "epoch": 0.355651536401887, "grad_norm": 0.17973878979682922, "learning_rate": 0.002, "loss": 2.5821, "step": 178520 }, { "epoch": 0.35567145862552596, "grad_norm": 0.20849330723285675, "learning_rate": 0.002, "loss": 2.5704, "step": 178530 }, { "epoch": 0.35569138084916485, "grad_norm": 0.1483144313097, "learning_rate": 0.002, "loss": 2.5616, "step": 178540 }, { "epoch": 0.3557113030728038, "grad_norm": 0.17437011003494263, "learning_rate": 0.002, "loss": 2.5491, "step": 178550 }, { "epoch": 0.3557312252964427, "grad_norm": 0.17004384100437164, "learning_rate": 0.002, "loss": 2.548, "step": 178560 }, { "epoch": 0.3557511475200816, "grad_norm": 0.1698905974626541, "learning_rate": 0.002, "loss": 2.5549, "step": 178570 }, { "epoch": 0.3557710697437205, "grad_norm": 0.14835228025913239, "learning_rate": 0.002, "loss": 2.5664, "step": 178580 }, { "epoch": 0.3557909919673594, "grad_norm": 0.20988892018795013, "learning_rate": 0.002, "loss": 2.5477, "step": 178590 }, { "epoch": 0.35581091419099836, "grad_norm": 0.14864566922187805, "learning_rate": 0.002, "loss": 2.5602, "step": 178600 }, { "epoch": 0.35583083641463725, "grad_norm": 0.17029301822185516, "learning_rate": 0.002, "loss": 2.5681, "step": 178610 }, { "epoch": 0.35585075863827614, "grad_norm": 0.19424688816070557, "learning_rate": 0.002, "loss": 2.5624, "step": 178620 }, { "epoch": 0.3558706808619151, "grad_norm": 0.16540826857089996, "learning_rate": 0.002, "loss": 2.5575, "step": 178630 }, { "epoch": 0.355890603085554, "grad_norm": 0.1561846137046814, "learning_rate": 0.002, "loss": 2.5822, "step": 178640 }, { "epoch": 0.3559105253091929, "grad_norm": 0.1630096137523651, "learning_rate": 0.002, "loss": 2.5668, "step": 178650 }, { "epoch": 0.3559304475328318, "grad_norm": 0.16161131858825684, "learning_rate": 0.002, "loss": 2.5611, "step": 178660 }, { "epoch": 0.35595036975647076, "grad_norm": 0.17085984349250793, "learning_rate": 0.002, "loss": 2.5563, "step": 178670 }, { "epoch": 0.35597029198010965, "grad_norm": 0.1720975935459137, "learning_rate": 0.002, "loss": 2.571, "step": 178680 }, { "epoch": 0.35599021420374855, "grad_norm": 0.16536298394203186, "learning_rate": 0.002, "loss": 2.5583, "step": 178690 }, { "epoch": 0.3560101364273875, "grad_norm": 0.1573236882686615, "learning_rate": 0.002, "loss": 2.5506, "step": 178700 }, { "epoch": 0.3560300586510264, "grad_norm": 0.1924138218164444, "learning_rate": 0.002, "loss": 2.5493, "step": 178710 }, { "epoch": 0.35604998087466533, "grad_norm": 0.1598539650440216, "learning_rate": 0.002, "loss": 2.5559, "step": 178720 }, { "epoch": 0.3560699030983042, "grad_norm": 0.14354105293750763, "learning_rate": 0.002, "loss": 2.541, "step": 178730 }, { "epoch": 0.3560898253219431, "grad_norm": 0.17331331968307495, "learning_rate": 0.002, "loss": 2.572, "step": 178740 }, { "epoch": 0.35610974754558206, "grad_norm": 0.1474083513021469, "learning_rate": 0.002, "loss": 2.5568, "step": 178750 }, { "epoch": 0.35612966976922095, "grad_norm": 0.1370389312505722, "learning_rate": 0.002, "loss": 2.5489, "step": 178760 }, { "epoch": 0.3561495919928599, "grad_norm": 0.15983781218528748, "learning_rate": 0.002, "loss": 2.5644, "step": 178770 }, { "epoch": 0.3561695142164988, "grad_norm": 0.1687294840812683, "learning_rate": 0.002, "loss": 2.5721, "step": 178780 }, { "epoch": 0.35618943644013773, "grad_norm": 0.15450730919837952, "learning_rate": 0.002, "loss": 2.5582, "step": 178790 }, { "epoch": 0.3562093586637766, "grad_norm": 0.2044048309326172, "learning_rate": 0.002, "loss": 2.5548, "step": 178800 }, { "epoch": 0.3562292808874155, "grad_norm": 0.1610679030418396, "learning_rate": 0.002, "loss": 2.5754, "step": 178810 }, { "epoch": 0.35624920311105446, "grad_norm": 0.15465304255485535, "learning_rate": 0.002, "loss": 2.5627, "step": 178820 }, { "epoch": 0.35626912533469335, "grad_norm": 0.15748445689678192, "learning_rate": 0.002, "loss": 2.578, "step": 178830 }, { "epoch": 0.3562890475583323, "grad_norm": 0.17577774822711945, "learning_rate": 0.002, "loss": 2.5661, "step": 178840 }, { "epoch": 0.3563089697819712, "grad_norm": 0.1761617511510849, "learning_rate": 0.002, "loss": 2.5634, "step": 178850 }, { "epoch": 0.3563288920056101, "grad_norm": 0.18072260916233063, "learning_rate": 0.002, "loss": 2.5753, "step": 178860 }, { "epoch": 0.356348814229249, "grad_norm": 0.16740043461322784, "learning_rate": 0.002, "loss": 2.5482, "step": 178870 }, { "epoch": 0.3563687364528879, "grad_norm": 0.18677176535129547, "learning_rate": 0.002, "loss": 2.5645, "step": 178880 }, { "epoch": 0.35638865867652686, "grad_norm": 0.19401374459266663, "learning_rate": 0.002, "loss": 2.558, "step": 178890 }, { "epoch": 0.35640858090016575, "grad_norm": 0.15900744497776031, "learning_rate": 0.002, "loss": 2.5484, "step": 178900 }, { "epoch": 0.35642850312380464, "grad_norm": 0.16076752543449402, "learning_rate": 0.002, "loss": 2.5411, "step": 178910 }, { "epoch": 0.3564484253474436, "grad_norm": 0.1516319215297699, "learning_rate": 0.002, "loss": 2.5644, "step": 178920 }, { "epoch": 0.3564683475710825, "grad_norm": 0.19564588367938995, "learning_rate": 0.002, "loss": 2.5607, "step": 178930 }, { "epoch": 0.3564882697947214, "grad_norm": 0.15034954249858856, "learning_rate": 0.002, "loss": 2.5587, "step": 178940 }, { "epoch": 0.3565081920183603, "grad_norm": 0.1711595505475998, "learning_rate": 0.002, "loss": 2.5501, "step": 178950 }, { "epoch": 0.35652811424199926, "grad_norm": 0.13834615051746368, "learning_rate": 0.002, "loss": 2.5708, "step": 178960 }, { "epoch": 0.35654803646563815, "grad_norm": 0.14809037744998932, "learning_rate": 0.002, "loss": 2.5662, "step": 178970 }, { "epoch": 0.35656795868927704, "grad_norm": 0.2048802524805069, "learning_rate": 0.002, "loss": 2.5684, "step": 178980 }, { "epoch": 0.356587880912916, "grad_norm": 0.18425171077251434, "learning_rate": 0.002, "loss": 2.562, "step": 178990 }, { "epoch": 0.3566078031365549, "grad_norm": 0.16837884485721588, "learning_rate": 0.002, "loss": 2.5764, "step": 179000 }, { "epoch": 0.3566277253601938, "grad_norm": 0.17706607282161713, "learning_rate": 0.002, "loss": 2.5458, "step": 179010 }, { "epoch": 0.3566476475838327, "grad_norm": 0.5434151291847229, "learning_rate": 0.002, "loss": 2.5735, "step": 179020 }, { "epoch": 0.3566675698074716, "grad_norm": 0.30309048295021057, "learning_rate": 0.002, "loss": 2.574, "step": 179030 }, { "epoch": 0.35668749203111055, "grad_norm": 0.1468692570924759, "learning_rate": 0.002, "loss": 2.5565, "step": 179040 }, { "epoch": 0.35670741425474944, "grad_norm": 0.1677100509405136, "learning_rate": 0.002, "loss": 2.5585, "step": 179050 }, { "epoch": 0.3567273364783884, "grad_norm": 0.17558099329471588, "learning_rate": 0.002, "loss": 2.553, "step": 179060 }, { "epoch": 0.3567472587020273, "grad_norm": 0.1684246063232422, "learning_rate": 0.002, "loss": 2.5645, "step": 179070 }, { "epoch": 0.35676718092566617, "grad_norm": 0.16150784492492676, "learning_rate": 0.002, "loss": 2.5664, "step": 179080 }, { "epoch": 0.3567871031493051, "grad_norm": 0.16423030197620392, "learning_rate": 0.002, "loss": 2.5559, "step": 179090 }, { "epoch": 0.356807025372944, "grad_norm": 0.1573384404182434, "learning_rate": 0.002, "loss": 2.5617, "step": 179100 }, { "epoch": 0.35682694759658296, "grad_norm": 0.20650698244571686, "learning_rate": 0.002, "loss": 2.5597, "step": 179110 }, { "epoch": 0.35684686982022185, "grad_norm": 0.14342884719371796, "learning_rate": 0.002, "loss": 2.569, "step": 179120 }, { "epoch": 0.3568667920438608, "grad_norm": 0.1698395311832428, "learning_rate": 0.002, "loss": 2.5609, "step": 179130 }, { "epoch": 0.3568867142674997, "grad_norm": 0.16618646681308746, "learning_rate": 0.002, "loss": 2.5592, "step": 179140 }, { "epoch": 0.3569066364911386, "grad_norm": 0.15877807140350342, "learning_rate": 0.002, "loss": 2.5545, "step": 179150 }, { "epoch": 0.3569265587147775, "grad_norm": 0.19747939705848694, "learning_rate": 0.002, "loss": 2.5664, "step": 179160 }, { "epoch": 0.3569464809384164, "grad_norm": 0.1592107117176056, "learning_rate": 0.002, "loss": 2.556, "step": 179170 }, { "epoch": 0.35696640316205536, "grad_norm": 0.202830508351326, "learning_rate": 0.002, "loss": 2.5469, "step": 179180 }, { "epoch": 0.35698632538569425, "grad_norm": 0.1932213455438614, "learning_rate": 0.002, "loss": 2.5529, "step": 179190 }, { "epoch": 0.35700624760933314, "grad_norm": 0.18409661948680878, "learning_rate": 0.002, "loss": 2.5573, "step": 179200 }, { "epoch": 0.3570261698329721, "grad_norm": 0.1631820648908615, "learning_rate": 0.002, "loss": 2.5614, "step": 179210 }, { "epoch": 0.357046092056611, "grad_norm": 0.1446564644575119, "learning_rate": 0.002, "loss": 2.5507, "step": 179220 }, { "epoch": 0.3570660142802499, "grad_norm": 0.17604805529117584, "learning_rate": 0.002, "loss": 2.5694, "step": 179230 }, { "epoch": 0.3570859365038888, "grad_norm": 0.1547682136297226, "learning_rate": 0.002, "loss": 2.5593, "step": 179240 }, { "epoch": 0.35710585872752776, "grad_norm": 0.1939024031162262, "learning_rate": 0.002, "loss": 2.5488, "step": 179250 }, { "epoch": 0.35712578095116665, "grad_norm": 0.15366648137569427, "learning_rate": 0.002, "loss": 2.5572, "step": 179260 }, { "epoch": 0.35714570317480554, "grad_norm": 0.14353786408901215, "learning_rate": 0.002, "loss": 2.5642, "step": 179270 }, { "epoch": 0.3571656253984445, "grad_norm": 0.16784407198429108, "learning_rate": 0.002, "loss": 2.5458, "step": 179280 }, { "epoch": 0.3571855476220834, "grad_norm": 0.1700640618801117, "learning_rate": 0.002, "loss": 2.5348, "step": 179290 }, { "epoch": 0.3572054698457223, "grad_norm": 0.1474529355764389, "learning_rate": 0.002, "loss": 2.5586, "step": 179300 }, { "epoch": 0.3572253920693612, "grad_norm": 0.17045824229717255, "learning_rate": 0.002, "loss": 2.5648, "step": 179310 }, { "epoch": 0.3572453142930001, "grad_norm": 0.1880740523338318, "learning_rate": 0.002, "loss": 2.5518, "step": 179320 }, { "epoch": 0.35726523651663905, "grad_norm": 0.16038727760314941, "learning_rate": 0.002, "loss": 2.5655, "step": 179330 }, { "epoch": 0.35728515874027794, "grad_norm": 0.14519353210926056, "learning_rate": 0.002, "loss": 2.5575, "step": 179340 }, { "epoch": 0.3573050809639169, "grad_norm": 0.1767306923866272, "learning_rate": 0.002, "loss": 2.5638, "step": 179350 }, { "epoch": 0.3573250031875558, "grad_norm": 0.15385034680366516, "learning_rate": 0.002, "loss": 2.5437, "step": 179360 }, { "epoch": 0.35734492541119467, "grad_norm": 0.15835556387901306, "learning_rate": 0.002, "loss": 2.5701, "step": 179370 }, { "epoch": 0.3573648476348336, "grad_norm": 0.14836347103118896, "learning_rate": 0.002, "loss": 2.5578, "step": 179380 }, { "epoch": 0.3573847698584725, "grad_norm": 0.1527249962091446, "learning_rate": 0.002, "loss": 2.5729, "step": 179390 }, { "epoch": 0.35740469208211145, "grad_norm": 0.1550351083278656, "learning_rate": 0.002, "loss": 2.5702, "step": 179400 }, { "epoch": 0.35742461430575034, "grad_norm": 0.14161711931228638, "learning_rate": 0.002, "loss": 2.558, "step": 179410 }, { "epoch": 0.3574445365293893, "grad_norm": 0.15265777707099915, "learning_rate": 0.002, "loss": 2.5499, "step": 179420 }, { "epoch": 0.3574644587530282, "grad_norm": 0.15992072224617004, "learning_rate": 0.002, "loss": 2.5596, "step": 179430 }, { "epoch": 0.35748438097666707, "grad_norm": 0.16606053709983826, "learning_rate": 0.002, "loss": 2.5668, "step": 179440 }, { "epoch": 0.357504303200306, "grad_norm": 0.1879088133573532, "learning_rate": 0.002, "loss": 2.5604, "step": 179450 }, { "epoch": 0.3575242254239449, "grad_norm": 0.1990872323513031, "learning_rate": 0.002, "loss": 2.5679, "step": 179460 }, { "epoch": 0.35754414764758385, "grad_norm": 0.1928267627954483, "learning_rate": 0.002, "loss": 2.5663, "step": 179470 }, { "epoch": 0.35756406987122274, "grad_norm": 0.14728645980358124, "learning_rate": 0.002, "loss": 2.5525, "step": 179480 }, { "epoch": 0.35758399209486164, "grad_norm": 0.1766178011894226, "learning_rate": 0.002, "loss": 2.5675, "step": 179490 }, { "epoch": 0.3576039143185006, "grad_norm": 0.1854456514120102, "learning_rate": 0.002, "loss": 2.5573, "step": 179500 }, { "epoch": 0.3576238365421395, "grad_norm": 0.15991993248462677, "learning_rate": 0.002, "loss": 2.5618, "step": 179510 }, { "epoch": 0.3576437587657784, "grad_norm": 0.23622652888298035, "learning_rate": 0.002, "loss": 2.5637, "step": 179520 }, { "epoch": 0.3576636809894173, "grad_norm": 0.18177035450935364, "learning_rate": 0.002, "loss": 2.5512, "step": 179530 }, { "epoch": 0.35768360321305626, "grad_norm": 0.16482491791248322, "learning_rate": 0.002, "loss": 2.572, "step": 179540 }, { "epoch": 0.35770352543669515, "grad_norm": 0.30957797169685364, "learning_rate": 0.002, "loss": 2.5679, "step": 179550 }, { "epoch": 0.35772344766033404, "grad_norm": 0.22451259195804596, "learning_rate": 0.002, "loss": 2.5554, "step": 179560 }, { "epoch": 0.357743369883973, "grad_norm": 0.1569010317325592, "learning_rate": 0.002, "loss": 2.5645, "step": 179570 }, { "epoch": 0.3577632921076119, "grad_norm": 0.1508697271347046, "learning_rate": 0.002, "loss": 2.5566, "step": 179580 }, { "epoch": 0.3577832143312508, "grad_norm": 0.14627040922641754, "learning_rate": 0.002, "loss": 2.5601, "step": 179590 }, { "epoch": 0.3578031365548897, "grad_norm": 0.1630205512046814, "learning_rate": 0.002, "loss": 2.5545, "step": 179600 }, { "epoch": 0.3578230587785286, "grad_norm": 0.1842205822467804, "learning_rate": 0.002, "loss": 2.5682, "step": 179610 }, { "epoch": 0.35784298100216755, "grad_norm": 0.16087017953395844, "learning_rate": 0.002, "loss": 2.569, "step": 179620 }, { "epoch": 0.35786290322580644, "grad_norm": 0.15238486230373383, "learning_rate": 0.002, "loss": 2.5653, "step": 179630 }, { "epoch": 0.3578828254494454, "grad_norm": 0.1900770664215088, "learning_rate": 0.002, "loss": 2.5525, "step": 179640 }, { "epoch": 0.3579027476730843, "grad_norm": 0.17055024206638336, "learning_rate": 0.002, "loss": 2.5698, "step": 179650 }, { "epoch": 0.35792266989672317, "grad_norm": 0.19147725403308868, "learning_rate": 0.002, "loss": 2.57, "step": 179660 }, { "epoch": 0.3579425921203621, "grad_norm": 0.17240214347839355, "learning_rate": 0.002, "loss": 2.5633, "step": 179670 }, { "epoch": 0.357962514344001, "grad_norm": 0.1431332528591156, "learning_rate": 0.002, "loss": 2.5632, "step": 179680 }, { "epoch": 0.35798243656763995, "grad_norm": 0.15548330545425415, "learning_rate": 0.002, "loss": 2.5529, "step": 179690 }, { "epoch": 0.35800235879127884, "grad_norm": 0.2107442319393158, "learning_rate": 0.002, "loss": 2.5735, "step": 179700 }, { "epoch": 0.3580222810149178, "grad_norm": 0.1531393676996231, "learning_rate": 0.002, "loss": 2.5462, "step": 179710 }, { "epoch": 0.3580422032385567, "grad_norm": 0.147080659866333, "learning_rate": 0.002, "loss": 2.5624, "step": 179720 }, { "epoch": 0.35806212546219557, "grad_norm": 0.23510144650936127, "learning_rate": 0.002, "loss": 2.5567, "step": 179730 }, { "epoch": 0.3580820476858345, "grad_norm": 0.1570644974708557, "learning_rate": 0.002, "loss": 2.5742, "step": 179740 }, { "epoch": 0.3581019699094734, "grad_norm": 0.16669949889183044, "learning_rate": 0.002, "loss": 2.5795, "step": 179750 }, { "epoch": 0.35812189213311235, "grad_norm": 0.19913965463638306, "learning_rate": 0.002, "loss": 2.5585, "step": 179760 }, { "epoch": 0.35814181435675124, "grad_norm": 0.16420042514801025, "learning_rate": 0.002, "loss": 2.5729, "step": 179770 }, { "epoch": 0.35816173658039013, "grad_norm": 0.17934057116508484, "learning_rate": 0.002, "loss": 2.5547, "step": 179780 }, { "epoch": 0.3581816588040291, "grad_norm": 0.1801263391971588, "learning_rate": 0.002, "loss": 2.5589, "step": 179790 }, { "epoch": 0.35820158102766797, "grad_norm": 0.16712741553783417, "learning_rate": 0.002, "loss": 2.5517, "step": 179800 }, { "epoch": 0.3582215032513069, "grad_norm": 0.15320615470409393, "learning_rate": 0.002, "loss": 2.5627, "step": 179810 }, { "epoch": 0.3582414254749458, "grad_norm": 0.17917989194393158, "learning_rate": 0.002, "loss": 2.5458, "step": 179820 }, { "epoch": 0.3582613476985847, "grad_norm": 0.19352352619171143, "learning_rate": 0.002, "loss": 2.5677, "step": 179830 }, { "epoch": 0.35828126992222364, "grad_norm": 0.16940361261367798, "learning_rate": 0.002, "loss": 2.5571, "step": 179840 }, { "epoch": 0.35830119214586253, "grad_norm": 0.14928656816482544, "learning_rate": 0.002, "loss": 2.5578, "step": 179850 }, { "epoch": 0.3583211143695015, "grad_norm": 0.18907757103443146, "learning_rate": 0.002, "loss": 2.5548, "step": 179860 }, { "epoch": 0.35834103659314037, "grad_norm": 0.19152745604515076, "learning_rate": 0.002, "loss": 2.5705, "step": 179870 }, { "epoch": 0.3583609588167793, "grad_norm": 0.15821772813796997, "learning_rate": 0.002, "loss": 2.5622, "step": 179880 }, { "epoch": 0.3583808810404182, "grad_norm": 0.16011247038841248, "learning_rate": 0.002, "loss": 2.5492, "step": 179890 }, { "epoch": 0.3584008032640571, "grad_norm": 0.21461452543735504, "learning_rate": 0.002, "loss": 2.5567, "step": 179900 }, { "epoch": 0.35842072548769605, "grad_norm": 0.18490557372570038, "learning_rate": 0.002, "loss": 2.5421, "step": 179910 }, { "epoch": 0.35844064771133494, "grad_norm": 0.15959736704826355, "learning_rate": 0.002, "loss": 2.5519, "step": 179920 }, { "epoch": 0.3584605699349739, "grad_norm": 0.1800232231616974, "learning_rate": 0.002, "loss": 2.5567, "step": 179930 }, { "epoch": 0.3584804921586128, "grad_norm": 0.18615376949310303, "learning_rate": 0.002, "loss": 2.5616, "step": 179940 }, { "epoch": 0.35850041438225166, "grad_norm": 0.15285837650299072, "learning_rate": 0.002, "loss": 2.5608, "step": 179950 }, { "epoch": 0.3585203366058906, "grad_norm": 0.1756954789161682, "learning_rate": 0.002, "loss": 2.5697, "step": 179960 }, { "epoch": 0.3585402588295295, "grad_norm": 0.17136642336845398, "learning_rate": 0.002, "loss": 2.565, "step": 179970 }, { "epoch": 0.35856018105316845, "grad_norm": 0.18137283623218536, "learning_rate": 0.002, "loss": 2.5552, "step": 179980 }, { "epoch": 0.35858010327680734, "grad_norm": 0.17735044658184052, "learning_rate": 0.002, "loss": 2.5727, "step": 179990 }, { "epoch": 0.3586000255004463, "grad_norm": 0.1687253713607788, "learning_rate": 0.002, "loss": 2.5567, "step": 180000 }, { "epoch": 0.3586199477240852, "grad_norm": 0.17088212072849274, "learning_rate": 0.002, "loss": 2.568, "step": 180010 }, { "epoch": 0.35863986994772407, "grad_norm": 0.18709705770015717, "learning_rate": 0.002, "loss": 2.566, "step": 180020 }, { "epoch": 0.358659792171363, "grad_norm": 0.1501307338476181, "learning_rate": 0.002, "loss": 2.5644, "step": 180030 }, { "epoch": 0.3586797143950019, "grad_norm": 0.2269585132598877, "learning_rate": 0.002, "loss": 2.5514, "step": 180040 }, { "epoch": 0.35869963661864085, "grad_norm": 0.19554542005062103, "learning_rate": 0.002, "loss": 2.5584, "step": 180050 }, { "epoch": 0.35871955884227974, "grad_norm": 0.16052992641925812, "learning_rate": 0.002, "loss": 2.5677, "step": 180060 }, { "epoch": 0.35873948106591863, "grad_norm": 0.1840229034423828, "learning_rate": 0.002, "loss": 2.5624, "step": 180070 }, { "epoch": 0.3587594032895576, "grad_norm": 0.18654809892177582, "learning_rate": 0.002, "loss": 2.5521, "step": 180080 }, { "epoch": 0.35877932551319647, "grad_norm": 0.14875274896621704, "learning_rate": 0.002, "loss": 2.5694, "step": 180090 }, { "epoch": 0.3587992477368354, "grad_norm": 0.18341675400733948, "learning_rate": 0.002, "loss": 2.5605, "step": 180100 }, { "epoch": 0.3588191699604743, "grad_norm": 0.16623720526695251, "learning_rate": 0.002, "loss": 2.5667, "step": 180110 }, { "epoch": 0.3588390921841132, "grad_norm": 0.4119715094566345, "learning_rate": 0.002, "loss": 2.5519, "step": 180120 }, { "epoch": 0.35885901440775214, "grad_norm": 0.15640494227409363, "learning_rate": 0.002, "loss": 2.5546, "step": 180130 }, { "epoch": 0.35887893663139103, "grad_norm": 0.17512571811676025, "learning_rate": 0.002, "loss": 2.5614, "step": 180140 }, { "epoch": 0.35889885885503, "grad_norm": 0.15622933208942413, "learning_rate": 0.002, "loss": 2.5636, "step": 180150 }, { "epoch": 0.35891878107866887, "grad_norm": 0.1631789356470108, "learning_rate": 0.002, "loss": 2.5686, "step": 180160 }, { "epoch": 0.3589387033023078, "grad_norm": 0.16044233739376068, "learning_rate": 0.002, "loss": 2.5634, "step": 180170 }, { "epoch": 0.3589586255259467, "grad_norm": 0.17007584869861603, "learning_rate": 0.002, "loss": 2.5652, "step": 180180 }, { "epoch": 0.3589785477495856, "grad_norm": 0.13626450300216675, "learning_rate": 0.002, "loss": 2.5614, "step": 180190 }, { "epoch": 0.35899846997322454, "grad_norm": 0.1913544088602066, "learning_rate": 0.002, "loss": 2.5778, "step": 180200 }, { "epoch": 0.35901839219686343, "grad_norm": 0.17624865472316742, "learning_rate": 0.002, "loss": 2.5569, "step": 180210 }, { "epoch": 0.3590383144205024, "grad_norm": 0.1672595590353012, "learning_rate": 0.002, "loss": 2.5635, "step": 180220 }, { "epoch": 0.35905823664414127, "grad_norm": 0.14057549834251404, "learning_rate": 0.002, "loss": 2.5592, "step": 180230 }, { "epoch": 0.35907815886778016, "grad_norm": 0.21061518788337708, "learning_rate": 0.002, "loss": 2.5599, "step": 180240 }, { "epoch": 0.3590980810914191, "grad_norm": 0.15379251539707184, "learning_rate": 0.002, "loss": 2.5563, "step": 180250 }, { "epoch": 0.359118003315058, "grad_norm": 0.13674207031726837, "learning_rate": 0.002, "loss": 2.5484, "step": 180260 }, { "epoch": 0.35913792553869694, "grad_norm": 0.17220625281333923, "learning_rate": 0.002, "loss": 2.5655, "step": 180270 }, { "epoch": 0.35915784776233584, "grad_norm": 0.14015524089336395, "learning_rate": 0.002, "loss": 2.5673, "step": 180280 }, { "epoch": 0.3591777699859748, "grad_norm": 0.18703456223011017, "learning_rate": 0.002, "loss": 2.5603, "step": 180290 }, { "epoch": 0.3591976922096137, "grad_norm": 0.16930252313613892, "learning_rate": 0.002, "loss": 2.569, "step": 180300 }, { "epoch": 0.35921761443325256, "grad_norm": 0.2143256813287735, "learning_rate": 0.002, "loss": 2.5655, "step": 180310 }, { "epoch": 0.3592375366568915, "grad_norm": 0.1468808650970459, "learning_rate": 0.002, "loss": 2.5591, "step": 180320 }, { "epoch": 0.3592574588805304, "grad_norm": 0.1518317312002182, "learning_rate": 0.002, "loss": 2.5737, "step": 180330 }, { "epoch": 0.35927738110416935, "grad_norm": 0.17966602742671967, "learning_rate": 0.002, "loss": 2.5555, "step": 180340 }, { "epoch": 0.35929730332780824, "grad_norm": 0.18848402798175812, "learning_rate": 0.002, "loss": 2.5694, "step": 180350 }, { "epoch": 0.35931722555144713, "grad_norm": 0.14612290263175964, "learning_rate": 0.002, "loss": 2.5361, "step": 180360 }, { "epoch": 0.3593371477750861, "grad_norm": 0.19126808643341064, "learning_rate": 0.002, "loss": 2.5702, "step": 180370 }, { "epoch": 0.35935706999872497, "grad_norm": 0.1626848727464676, "learning_rate": 0.002, "loss": 2.569, "step": 180380 }, { "epoch": 0.3593769922223639, "grad_norm": 0.18566013872623444, "learning_rate": 0.002, "loss": 2.5505, "step": 180390 }, { "epoch": 0.3593969144460028, "grad_norm": 0.16429884731769562, "learning_rate": 0.002, "loss": 2.5657, "step": 180400 }, { "epoch": 0.3594168366696417, "grad_norm": 0.17186498641967773, "learning_rate": 0.002, "loss": 2.5413, "step": 180410 }, { "epoch": 0.35943675889328064, "grad_norm": 0.20070946216583252, "learning_rate": 0.002, "loss": 2.5649, "step": 180420 }, { "epoch": 0.35945668111691953, "grad_norm": 0.15948471426963806, "learning_rate": 0.002, "loss": 2.5685, "step": 180430 }, { "epoch": 0.3594766033405585, "grad_norm": 0.16194017231464386, "learning_rate": 0.002, "loss": 2.5648, "step": 180440 }, { "epoch": 0.35949652556419737, "grad_norm": 0.17038476467132568, "learning_rate": 0.002, "loss": 2.5526, "step": 180450 }, { "epoch": 0.3595164477878363, "grad_norm": 0.19929730892181396, "learning_rate": 0.002, "loss": 2.5651, "step": 180460 }, { "epoch": 0.3595363700114752, "grad_norm": 0.1586206704378128, "learning_rate": 0.002, "loss": 2.5556, "step": 180470 }, { "epoch": 0.3595562922351141, "grad_norm": 0.1546359658241272, "learning_rate": 0.002, "loss": 2.5545, "step": 180480 }, { "epoch": 0.35957621445875304, "grad_norm": 0.1722186803817749, "learning_rate": 0.002, "loss": 2.5583, "step": 180490 }, { "epoch": 0.35959613668239193, "grad_norm": 0.16627956926822662, "learning_rate": 0.002, "loss": 2.5583, "step": 180500 }, { "epoch": 0.3596160589060309, "grad_norm": 0.18324069678783417, "learning_rate": 0.002, "loss": 2.5666, "step": 180510 }, { "epoch": 0.35963598112966977, "grad_norm": 0.14218702912330627, "learning_rate": 0.002, "loss": 2.5547, "step": 180520 }, { "epoch": 0.35965590335330866, "grad_norm": 0.16255535185337067, "learning_rate": 0.002, "loss": 2.5596, "step": 180530 }, { "epoch": 0.3596758255769476, "grad_norm": 0.1724640280008316, "learning_rate": 0.002, "loss": 2.5552, "step": 180540 }, { "epoch": 0.3596957478005865, "grad_norm": 0.17691044509410858, "learning_rate": 0.002, "loss": 2.5595, "step": 180550 }, { "epoch": 0.35971567002422544, "grad_norm": 0.15882156789302826, "learning_rate": 0.002, "loss": 2.5535, "step": 180560 }, { "epoch": 0.35973559224786433, "grad_norm": 0.156052365899086, "learning_rate": 0.002, "loss": 2.5698, "step": 180570 }, { "epoch": 0.3597555144715032, "grad_norm": 0.17630749940872192, "learning_rate": 0.002, "loss": 2.5565, "step": 180580 }, { "epoch": 0.35977543669514217, "grad_norm": 0.16936863958835602, "learning_rate": 0.002, "loss": 2.5486, "step": 180590 }, { "epoch": 0.35979535891878106, "grad_norm": 0.16820721328258514, "learning_rate": 0.002, "loss": 2.5594, "step": 180600 }, { "epoch": 0.35981528114242, "grad_norm": 0.17244677245616913, "learning_rate": 0.002, "loss": 2.5503, "step": 180610 }, { "epoch": 0.3598352033660589, "grad_norm": 0.18394125998020172, "learning_rate": 0.002, "loss": 2.5582, "step": 180620 }, { "epoch": 0.35985512558969784, "grad_norm": 0.15146580338478088, "learning_rate": 0.002, "loss": 2.5692, "step": 180630 }, { "epoch": 0.35987504781333673, "grad_norm": 0.19939112663269043, "learning_rate": 0.002, "loss": 2.5626, "step": 180640 }, { "epoch": 0.3598949700369756, "grad_norm": 0.1824774444103241, "learning_rate": 0.002, "loss": 2.5683, "step": 180650 }, { "epoch": 0.35991489226061457, "grad_norm": 0.16907966136932373, "learning_rate": 0.002, "loss": 2.5403, "step": 180660 }, { "epoch": 0.35993481448425346, "grad_norm": 0.1700935959815979, "learning_rate": 0.002, "loss": 2.5484, "step": 180670 }, { "epoch": 0.3599547367078924, "grad_norm": 0.14969898760318756, "learning_rate": 0.002, "loss": 2.555, "step": 180680 }, { "epoch": 0.3599746589315313, "grad_norm": 0.1520157903432846, "learning_rate": 0.002, "loss": 2.5701, "step": 180690 }, { "epoch": 0.3599945811551702, "grad_norm": 0.18915513157844543, "learning_rate": 0.002, "loss": 2.5629, "step": 180700 }, { "epoch": 0.36001450337880914, "grad_norm": 0.1475003957748413, "learning_rate": 0.002, "loss": 2.5524, "step": 180710 }, { "epoch": 0.360034425602448, "grad_norm": 0.17146001756191254, "learning_rate": 0.002, "loss": 2.5571, "step": 180720 }, { "epoch": 0.360054347826087, "grad_norm": 0.17861974239349365, "learning_rate": 0.002, "loss": 2.5692, "step": 180730 }, { "epoch": 0.36007427004972586, "grad_norm": 0.15893076360225677, "learning_rate": 0.002, "loss": 2.5485, "step": 180740 }, { "epoch": 0.3600941922733648, "grad_norm": 0.16829460859298706, "learning_rate": 0.002, "loss": 2.5673, "step": 180750 }, { "epoch": 0.3601141144970037, "grad_norm": 0.14532074332237244, "learning_rate": 0.002, "loss": 2.5656, "step": 180760 }, { "epoch": 0.3601340367206426, "grad_norm": 0.1503920555114746, "learning_rate": 0.002, "loss": 2.5672, "step": 180770 }, { "epoch": 0.36015395894428154, "grad_norm": 0.18566524982452393, "learning_rate": 0.002, "loss": 2.5736, "step": 180780 }, { "epoch": 0.36017388116792043, "grad_norm": 0.17812858521938324, "learning_rate": 0.002, "loss": 2.5515, "step": 180790 }, { "epoch": 0.3601938033915594, "grad_norm": 0.14185477793216705, "learning_rate": 0.002, "loss": 2.5747, "step": 180800 }, { "epoch": 0.36021372561519827, "grad_norm": 0.2074068933725357, "learning_rate": 0.002, "loss": 2.5559, "step": 180810 }, { "epoch": 0.36023364783883716, "grad_norm": 0.19515687227249146, "learning_rate": 0.002, "loss": 2.5549, "step": 180820 }, { "epoch": 0.3602535700624761, "grad_norm": 0.14813069999217987, "learning_rate": 0.002, "loss": 2.5766, "step": 180830 }, { "epoch": 0.360273492286115, "grad_norm": 0.15056094527244568, "learning_rate": 0.002, "loss": 2.5555, "step": 180840 }, { "epoch": 0.36029341450975394, "grad_norm": 0.16716411709785461, "learning_rate": 0.002, "loss": 2.5773, "step": 180850 }, { "epoch": 0.36031333673339283, "grad_norm": 0.1498710662126541, "learning_rate": 0.002, "loss": 2.5784, "step": 180860 }, { "epoch": 0.3603332589570317, "grad_norm": 0.1703415811061859, "learning_rate": 0.002, "loss": 2.5604, "step": 180870 }, { "epoch": 0.36035318118067067, "grad_norm": 0.18591566383838654, "learning_rate": 0.002, "loss": 2.5554, "step": 180880 }, { "epoch": 0.36037310340430956, "grad_norm": 0.16238175332546234, "learning_rate": 0.002, "loss": 2.5618, "step": 180890 }, { "epoch": 0.3603930256279485, "grad_norm": 0.17621426284313202, "learning_rate": 0.002, "loss": 2.5677, "step": 180900 }, { "epoch": 0.3604129478515874, "grad_norm": 0.15464894473552704, "learning_rate": 0.002, "loss": 2.5755, "step": 180910 }, { "epoch": 0.36043287007522634, "grad_norm": 0.1929248571395874, "learning_rate": 0.002, "loss": 2.5567, "step": 180920 }, { "epoch": 0.36045279229886523, "grad_norm": 0.17708706855773926, "learning_rate": 0.002, "loss": 2.5603, "step": 180930 }, { "epoch": 0.3604727145225041, "grad_norm": 0.16135048866271973, "learning_rate": 0.002, "loss": 2.5538, "step": 180940 }, { "epoch": 0.36049263674614307, "grad_norm": 0.19111014902591705, "learning_rate": 0.002, "loss": 2.5593, "step": 180950 }, { "epoch": 0.36051255896978196, "grad_norm": 0.16558195650577545, "learning_rate": 0.002, "loss": 2.5619, "step": 180960 }, { "epoch": 0.3605324811934209, "grad_norm": 0.18744724988937378, "learning_rate": 0.002, "loss": 2.5549, "step": 180970 }, { "epoch": 0.3605524034170598, "grad_norm": 0.17394663393497467, "learning_rate": 0.002, "loss": 2.5649, "step": 180980 }, { "epoch": 0.3605723256406987, "grad_norm": 0.16464479267597198, "learning_rate": 0.002, "loss": 2.5578, "step": 180990 }, { "epoch": 0.36059224786433763, "grad_norm": 0.14768671989440918, "learning_rate": 0.002, "loss": 2.5564, "step": 181000 }, { "epoch": 0.3606121700879765, "grad_norm": 0.16244077682495117, "learning_rate": 0.002, "loss": 2.5728, "step": 181010 }, { "epoch": 0.36063209231161547, "grad_norm": 0.14969605207443237, "learning_rate": 0.002, "loss": 2.5704, "step": 181020 }, { "epoch": 0.36065201453525436, "grad_norm": 0.1630566269159317, "learning_rate": 0.002, "loss": 2.5597, "step": 181030 }, { "epoch": 0.3606719367588933, "grad_norm": 0.15896885097026825, "learning_rate": 0.002, "loss": 2.5382, "step": 181040 }, { "epoch": 0.3606918589825322, "grad_norm": 0.1858590841293335, "learning_rate": 0.002, "loss": 2.562, "step": 181050 }, { "epoch": 0.3607117812061711, "grad_norm": 0.1752358078956604, "learning_rate": 0.002, "loss": 2.5639, "step": 181060 }, { "epoch": 0.36073170342981004, "grad_norm": 0.1950642317533493, "learning_rate": 0.002, "loss": 2.5522, "step": 181070 }, { "epoch": 0.3607516256534489, "grad_norm": 0.1536089926958084, "learning_rate": 0.002, "loss": 2.5601, "step": 181080 }, { "epoch": 0.3607715478770879, "grad_norm": 0.158983513712883, "learning_rate": 0.002, "loss": 2.5422, "step": 181090 }, { "epoch": 0.36079147010072676, "grad_norm": 0.15423746407032013, "learning_rate": 0.002, "loss": 2.5623, "step": 181100 }, { "epoch": 0.36081139232436565, "grad_norm": 0.15732510387897491, "learning_rate": 0.002, "loss": 2.5607, "step": 181110 }, { "epoch": 0.3608313145480046, "grad_norm": 0.15679191052913666, "learning_rate": 0.002, "loss": 2.5637, "step": 181120 }, { "epoch": 0.3608512367716435, "grad_norm": 0.15616358816623688, "learning_rate": 0.002, "loss": 2.5623, "step": 181130 }, { "epoch": 0.36087115899528244, "grad_norm": 0.1572994589805603, "learning_rate": 0.002, "loss": 2.5686, "step": 181140 }, { "epoch": 0.36089108121892133, "grad_norm": 0.3365967273712158, "learning_rate": 0.002, "loss": 2.5643, "step": 181150 }, { "epoch": 0.3609110034425602, "grad_norm": 0.16397681832313538, "learning_rate": 0.002, "loss": 2.5585, "step": 181160 }, { "epoch": 0.36093092566619916, "grad_norm": 0.17583253979682922, "learning_rate": 0.002, "loss": 2.5496, "step": 181170 }, { "epoch": 0.36095084788983806, "grad_norm": 0.18472617864608765, "learning_rate": 0.002, "loss": 2.5468, "step": 181180 }, { "epoch": 0.360970770113477, "grad_norm": 0.1444738209247589, "learning_rate": 0.002, "loss": 2.5613, "step": 181190 }, { "epoch": 0.3609906923371159, "grad_norm": 0.16104359924793243, "learning_rate": 0.002, "loss": 2.5641, "step": 181200 }, { "epoch": 0.36101061456075484, "grad_norm": 0.18206989765167236, "learning_rate": 0.002, "loss": 2.5555, "step": 181210 }, { "epoch": 0.36103053678439373, "grad_norm": 0.16128955781459808, "learning_rate": 0.002, "loss": 2.5585, "step": 181220 }, { "epoch": 0.3610504590080326, "grad_norm": 0.21241901814937592, "learning_rate": 0.002, "loss": 2.5811, "step": 181230 }, { "epoch": 0.36107038123167157, "grad_norm": 0.14018821716308594, "learning_rate": 0.002, "loss": 2.5499, "step": 181240 }, { "epoch": 0.36109030345531046, "grad_norm": 0.17106996476650238, "learning_rate": 0.002, "loss": 2.5569, "step": 181250 }, { "epoch": 0.3611102256789494, "grad_norm": 0.1692872792482376, "learning_rate": 0.002, "loss": 2.5662, "step": 181260 }, { "epoch": 0.3611301479025883, "grad_norm": 0.15386858582496643, "learning_rate": 0.002, "loss": 2.5521, "step": 181270 }, { "epoch": 0.3611500701262272, "grad_norm": 0.17456117272377014, "learning_rate": 0.002, "loss": 2.5681, "step": 181280 }, { "epoch": 0.36116999234986613, "grad_norm": 0.16938038170337677, "learning_rate": 0.002, "loss": 2.5639, "step": 181290 }, { "epoch": 0.361189914573505, "grad_norm": 0.1640668511390686, "learning_rate": 0.002, "loss": 2.555, "step": 181300 }, { "epoch": 0.36120983679714397, "grad_norm": 0.16841061413288116, "learning_rate": 0.002, "loss": 2.5514, "step": 181310 }, { "epoch": 0.36122975902078286, "grad_norm": 0.14453904330730438, "learning_rate": 0.002, "loss": 2.5594, "step": 181320 }, { "epoch": 0.36124968124442175, "grad_norm": 0.15588292479515076, "learning_rate": 0.002, "loss": 2.5748, "step": 181330 }, { "epoch": 0.3612696034680607, "grad_norm": 0.17608577013015747, "learning_rate": 0.002, "loss": 2.5687, "step": 181340 }, { "epoch": 0.3612895256916996, "grad_norm": 0.18911035358905792, "learning_rate": 0.002, "loss": 2.5653, "step": 181350 }, { "epoch": 0.36130944791533853, "grad_norm": 0.15352602303028107, "learning_rate": 0.002, "loss": 2.5637, "step": 181360 }, { "epoch": 0.3613293701389774, "grad_norm": 0.19250956177711487, "learning_rate": 0.002, "loss": 2.5417, "step": 181370 }, { "epoch": 0.36134929236261637, "grad_norm": 0.15660317242145538, "learning_rate": 0.002, "loss": 2.5556, "step": 181380 }, { "epoch": 0.36136921458625526, "grad_norm": 0.16502009332180023, "learning_rate": 0.002, "loss": 2.5402, "step": 181390 }, { "epoch": 0.36138913680989415, "grad_norm": 0.16576282680034637, "learning_rate": 0.002, "loss": 2.5547, "step": 181400 }, { "epoch": 0.3614090590335331, "grad_norm": 0.19206081330776215, "learning_rate": 0.002, "loss": 2.5572, "step": 181410 }, { "epoch": 0.361428981257172, "grad_norm": 0.17184560000896454, "learning_rate": 0.002, "loss": 2.5583, "step": 181420 }, { "epoch": 0.36144890348081093, "grad_norm": 0.14547280967235565, "learning_rate": 0.002, "loss": 2.5622, "step": 181430 }, { "epoch": 0.3614688257044498, "grad_norm": 0.16157197952270508, "learning_rate": 0.002, "loss": 2.5595, "step": 181440 }, { "epoch": 0.3614887479280887, "grad_norm": 0.16130971908569336, "learning_rate": 0.002, "loss": 2.5627, "step": 181450 }, { "epoch": 0.36150867015172766, "grad_norm": 0.15709006786346436, "learning_rate": 0.002, "loss": 2.5613, "step": 181460 }, { "epoch": 0.36152859237536655, "grad_norm": 0.15709179639816284, "learning_rate": 0.002, "loss": 2.556, "step": 181470 }, { "epoch": 0.3615485145990055, "grad_norm": 0.1551908403635025, "learning_rate": 0.002, "loss": 2.5622, "step": 181480 }, { "epoch": 0.3615684368226444, "grad_norm": 0.17204774916172028, "learning_rate": 0.002, "loss": 2.5598, "step": 181490 }, { "epoch": 0.36158835904628334, "grad_norm": 0.15631532669067383, "learning_rate": 0.002, "loss": 2.5759, "step": 181500 }, { "epoch": 0.3616082812699222, "grad_norm": 0.17053470015525818, "learning_rate": 0.002, "loss": 2.5666, "step": 181510 }, { "epoch": 0.3616282034935611, "grad_norm": 0.16240769624710083, "learning_rate": 0.002, "loss": 2.5649, "step": 181520 }, { "epoch": 0.36164812571720006, "grad_norm": 0.2070261687040329, "learning_rate": 0.002, "loss": 2.5572, "step": 181530 }, { "epoch": 0.36166804794083895, "grad_norm": 0.1574004590511322, "learning_rate": 0.002, "loss": 2.5557, "step": 181540 }, { "epoch": 0.3616879701644779, "grad_norm": 0.13964833319187164, "learning_rate": 0.002, "loss": 2.5661, "step": 181550 }, { "epoch": 0.3617078923881168, "grad_norm": 0.17996887862682343, "learning_rate": 0.002, "loss": 2.5475, "step": 181560 }, { "epoch": 0.3617278146117557, "grad_norm": 0.18154582381248474, "learning_rate": 0.002, "loss": 2.5627, "step": 181570 }, { "epoch": 0.36174773683539463, "grad_norm": 0.17016373574733734, "learning_rate": 0.002, "loss": 2.5595, "step": 181580 }, { "epoch": 0.3617676590590335, "grad_norm": 0.18382281064987183, "learning_rate": 0.002, "loss": 2.5656, "step": 181590 }, { "epoch": 0.36178758128267247, "grad_norm": 0.21118979156017303, "learning_rate": 0.002, "loss": 2.5525, "step": 181600 }, { "epoch": 0.36180750350631136, "grad_norm": 0.15647080540657043, "learning_rate": 0.002, "loss": 2.5628, "step": 181610 }, { "epoch": 0.36182742572995025, "grad_norm": 0.14587156474590302, "learning_rate": 0.002, "loss": 2.5664, "step": 181620 }, { "epoch": 0.3618473479535892, "grad_norm": 0.14888529479503632, "learning_rate": 0.002, "loss": 2.5513, "step": 181630 }, { "epoch": 0.3618672701772281, "grad_norm": 0.1918720155954361, "learning_rate": 0.002, "loss": 2.5592, "step": 181640 }, { "epoch": 0.36188719240086703, "grad_norm": 0.14671480655670166, "learning_rate": 0.002, "loss": 2.5576, "step": 181650 }, { "epoch": 0.3619071146245059, "grad_norm": 0.16690826416015625, "learning_rate": 0.002, "loss": 2.5534, "step": 181660 }, { "epoch": 0.36192703684814487, "grad_norm": 0.1842232197523117, "learning_rate": 0.002, "loss": 2.5562, "step": 181670 }, { "epoch": 0.36194695907178376, "grad_norm": 0.17441251873970032, "learning_rate": 0.002, "loss": 2.5608, "step": 181680 }, { "epoch": 0.36196688129542265, "grad_norm": 0.1770031750202179, "learning_rate": 0.002, "loss": 2.5599, "step": 181690 }, { "epoch": 0.3619868035190616, "grad_norm": 0.17432482540607452, "learning_rate": 0.002, "loss": 2.544, "step": 181700 }, { "epoch": 0.3620067257427005, "grad_norm": 0.17052751779556274, "learning_rate": 0.002, "loss": 2.5735, "step": 181710 }, { "epoch": 0.36202664796633943, "grad_norm": 0.15644119679927826, "learning_rate": 0.002, "loss": 2.5513, "step": 181720 }, { "epoch": 0.3620465701899783, "grad_norm": 0.15270228683948517, "learning_rate": 0.002, "loss": 2.5583, "step": 181730 }, { "epoch": 0.3620664924136172, "grad_norm": 0.16648581624031067, "learning_rate": 0.002, "loss": 2.5656, "step": 181740 }, { "epoch": 0.36208641463725616, "grad_norm": 0.13770568370819092, "learning_rate": 0.002, "loss": 2.5584, "step": 181750 }, { "epoch": 0.36210633686089505, "grad_norm": 0.1434936374425888, "learning_rate": 0.002, "loss": 2.5591, "step": 181760 }, { "epoch": 0.362126259084534, "grad_norm": 0.20177263021469116, "learning_rate": 0.002, "loss": 2.559, "step": 181770 }, { "epoch": 0.3621461813081729, "grad_norm": 0.1799919605255127, "learning_rate": 0.002, "loss": 2.5622, "step": 181780 }, { "epoch": 0.36216610353181183, "grad_norm": 0.15059281885623932, "learning_rate": 0.002, "loss": 2.552, "step": 181790 }, { "epoch": 0.3621860257554507, "grad_norm": 0.15291853249073029, "learning_rate": 0.002, "loss": 2.5646, "step": 181800 }, { "epoch": 0.3622059479790896, "grad_norm": 0.17593732476234436, "learning_rate": 0.002, "loss": 2.5666, "step": 181810 }, { "epoch": 0.36222587020272856, "grad_norm": 0.16391469538211823, "learning_rate": 0.002, "loss": 2.5686, "step": 181820 }, { "epoch": 0.36224579242636745, "grad_norm": 0.16592824459075928, "learning_rate": 0.002, "loss": 2.5675, "step": 181830 }, { "epoch": 0.3622657146500064, "grad_norm": 0.1704872101545334, "learning_rate": 0.002, "loss": 2.5527, "step": 181840 }, { "epoch": 0.3622856368736453, "grad_norm": 0.16171018779277802, "learning_rate": 0.002, "loss": 2.5557, "step": 181850 }, { "epoch": 0.3623055590972842, "grad_norm": 0.16222451627254486, "learning_rate": 0.002, "loss": 2.5527, "step": 181860 }, { "epoch": 0.3623254813209231, "grad_norm": 0.18126580119132996, "learning_rate": 0.002, "loss": 2.5493, "step": 181870 }, { "epoch": 0.362345403544562, "grad_norm": 0.1840122938156128, "learning_rate": 0.002, "loss": 2.5692, "step": 181880 }, { "epoch": 0.36236532576820096, "grad_norm": 0.14802099764347076, "learning_rate": 0.002, "loss": 2.5513, "step": 181890 }, { "epoch": 0.36238524799183985, "grad_norm": 0.15851789712905884, "learning_rate": 0.002, "loss": 2.5801, "step": 181900 }, { "epoch": 0.36240517021547874, "grad_norm": 0.21245530247688293, "learning_rate": 0.002, "loss": 2.5635, "step": 181910 }, { "epoch": 0.3624250924391177, "grad_norm": 0.16873884201049805, "learning_rate": 0.002, "loss": 2.5612, "step": 181920 }, { "epoch": 0.3624450146627566, "grad_norm": 0.15298837423324585, "learning_rate": 0.002, "loss": 2.5553, "step": 181930 }, { "epoch": 0.36246493688639553, "grad_norm": 0.20389516651630402, "learning_rate": 0.002, "loss": 2.5426, "step": 181940 }, { "epoch": 0.3624848591100344, "grad_norm": 0.16804876923561096, "learning_rate": 0.002, "loss": 2.554, "step": 181950 }, { "epoch": 0.36250478133367336, "grad_norm": 0.16216810047626495, "learning_rate": 0.002, "loss": 2.5543, "step": 181960 }, { "epoch": 0.36252470355731226, "grad_norm": 0.15821078419685364, "learning_rate": 0.002, "loss": 2.5708, "step": 181970 }, { "epoch": 0.36254462578095115, "grad_norm": 0.1654847413301468, "learning_rate": 0.002, "loss": 2.5599, "step": 181980 }, { "epoch": 0.3625645480045901, "grad_norm": 0.15422111749649048, "learning_rate": 0.002, "loss": 2.5606, "step": 181990 }, { "epoch": 0.362584470228229, "grad_norm": 0.16250865161418915, "learning_rate": 0.002, "loss": 2.5675, "step": 182000 }, { "epoch": 0.36260439245186793, "grad_norm": 0.16549637913703918, "learning_rate": 0.002, "loss": 2.546, "step": 182010 }, { "epoch": 0.3626243146755068, "grad_norm": 0.21697935461997986, "learning_rate": 0.002, "loss": 2.5604, "step": 182020 }, { "epoch": 0.3626442368991457, "grad_norm": 0.12690229713916779, "learning_rate": 0.002, "loss": 2.5556, "step": 182030 }, { "epoch": 0.36266415912278466, "grad_norm": 0.16254349052906036, "learning_rate": 0.002, "loss": 2.5508, "step": 182040 }, { "epoch": 0.36268408134642355, "grad_norm": 0.1628897339105606, "learning_rate": 0.002, "loss": 2.5583, "step": 182050 }, { "epoch": 0.3627040035700625, "grad_norm": 0.18031607568264008, "learning_rate": 0.002, "loss": 2.5543, "step": 182060 }, { "epoch": 0.3627239257937014, "grad_norm": 0.19401507079601288, "learning_rate": 0.002, "loss": 2.5664, "step": 182070 }, { "epoch": 0.3627438480173403, "grad_norm": 0.14180900156497955, "learning_rate": 0.002, "loss": 2.5651, "step": 182080 }, { "epoch": 0.3627637702409792, "grad_norm": 0.1902828961610794, "learning_rate": 0.002, "loss": 2.5545, "step": 182090 }, { "epoch": 0.3627836924646181, "grad_norm": 0.17137043178081512, "learning_rate": 0.002, "loss": 2.5544, "step": 182100 }, { "epoch": 0.36280361468825706, "grad_norm": 0.15429861843585968, "learning_rate": 0.002, "loss": 2.5633, "step": 182110 }, { "epoch": 0.36282353691189595, "grad_norm": 0.19940441846847534, "learning_rate": 0.002, "loss": 2.5714, "step": 182120 }, { "epoch": 0.3628434591355349, "grad_norm": 0.1658743917942047, "learning_rate": 0.002, "loss": 2.552, "step": 182130 }, { "epoch": 0.3628633813591738, "grad_norm": 0.19752828776836395, "learning_rate": 0.002, "loss": 2.5594, "step": 182140 }, { "epoch": 0.3628833035828127, "grad_norm": 0.16029787063598633, "learning_rate": 0.002, "loss": 2.5562, "step": 182150 }, { "epoch": 0.3629032258064516, "grad_norm": 0.16484549641609192, "learning_rate": 0.002, "loss": 2.5598, "step": 182160 }, { "epoch": 0.3629231480300905, "grad_norm": 0.1669483333826065, "learning_rate": 0.002, "loss": 2.5478, "step": 182170 }, { "epoch": 0.36294307025372946, "grad_norm": 0.20010413229465485, "learning_rate": 0.002, "loss": 2.5574, "step": 182180 }, { "epoch": 0.36296299247736835, "grad_norm": 0.15899978578090668, "learning_rate": 0.002, "loss": 2.5686, "step": 182190 }, { "epoch": 0.36298291470100724, "grad_norm": 0.49072566628456116, "learning_rate": 0.002, "loss": 2.5661, "step": 182200 }, { "epoch": 0.3630028369246462, "grad_norm": 0.18251460790634155, "learning_rate": 0.002, "loss": 2.572, "step": 182210 }, { "epoch": 0.3630227591482851, "grad_norm": 0.1390625387430191, "learning_rate": 0.002, "loss": 2.5638, "step": 182220 }, { "epoch": 0.363042681371924, "grad_norm": 0.1509367972612381, "learning_rate": 0.002, "loss": 2.5558, "step": 182230 }, { "epoch": 0.3630626035955629, "grad_norm": 0.1746554672718048, "learning_rate": 0.002, "loss": 2.5551, "step": 182240 }, { "epoch": 0.36308252581920186, "grad_norm": 0.168218195438385, "learning_rate": 0.002, "loss": 2.5738, "step": 182250 }, { "epoch": 0.36310244804284075, "grad_norm": 0.18169112503528595, "learning_rate": 0.002, "loss": 2.5608, "step": 182260 }, { "epoch": 0.36312237026647964, "grad_norm": 0.18426500260829926, "learning_rate": 0.002, "loss": 2.5629, "step": 182270 }, { "epoch": 0.3631422924901186, "grad_norm": 0.16788321733474731, "learning_rate": 0.002, "loss": 2.5558, "step": 182280 }, { "epoch": 0.3631622147137575, "grad_norm": 0.180575892329216, "learning_rate": 0.002, "loss": 2.5581, "step": 182290 }, { "epoch": 0.3631821369373964, "grad_norm": 0.1470191776752472, "learning_rate": 0.002, "loss": 2.5678, "step": 182300 }, { "epoch": 0.3632020591610353, "grad_norm": 0.1519857943058014, "learning_rate": 0.002, "loss": 2.5752, "step": 182310 }, { "epoch": 0.3632219813846742, "grad_norm": 0.1483292281627655, "learning_rate": 0.002, "loss": 2.5618, "step": 182320 }, { "epoch": 0.36324190360831315, "grad_norm": 0.16576357185840607, "learning_rate": 0.002, "loss": 2.5653, "step": 182330 }, { "epoch": 0.36326182583195205, "grad_norm": 0.1804569661617279, "learning_rate": 0.002, "loss": 2.5635, "step": 182340 }, { "epoch": 0.363281748055591, "grad_norm": 0.15720783174037933, "learning_rate": 0.002, "loss": 2.5668, "step": 182350 }, { "epoch": 0.3633016702792299, "grad_norm": 0.17627745866775513, "learning_rate": 0.002, "loss": 2.5532, "step": 182360 }, { "epoch": 0.3633215925028688, "grad_norm": 0.15791535377502441, "learning_rate": 0.002, "loss": 2.5618, "step": 182370 }, { "epoch": 0.3633415147265077, "grad_norm": 0.20523202419281006, "learning_rate": 0.002, "loss": 2.568, "step": 182380 }, { "epoch": 0.3633614369501466, "grad_norm": 0.1573903113603592, "learning_rate": 0.002, "loss": 2.567, "step": 182390 }, { "epoch": 0.36338135917378556, "grad_norm": 0.15841174125671387, "learning_rate": 0.002, "loss": 2.5629, "step": 182400 }, { "epoch": 0.36340128139742445, "grad_norm": 0.1648988127708435, "learning_rate": 0.002, "loss": 2.57, "step": 182410 }, { "epoch": 0.3634212036210634, "grad_norm": 0.16723059117794037, "learning_rate": 0.002, "loss": 2.5647, "step": 182420 }, { "epoch": 0.3634411258447023, "grad_norm": 0.15679684281349182, "learning_rate": 0.002, "loss": 2.5724, "step": 182430 }, { "epoch": 0.3634610480683412, "grad_norm": 0.14800599217414856, "learning_rate": 0.002, "loss": 2.5615, "step": 182440 }, { "epoch": 0.3634809702919801, "grad_norm": 0.17830637097358704, "learning_rate": 0.002, "loss": 2.5513, "step": 182450 }, { "epoch": 0.363500892515619, "grad_norm": 0.15406179428100586, "learning_rate": 0.002, "loss": 2.5756, "step": 182460 }, { "epoch": 0.36352081473925796, "grad_norm": 0.1376650035381317, "learning_rate": 0.002, "loss": 2.5543, "step": 182470 }, { "epoch": 0.36354073696289685, "grad_norm": 0.16830500960350037, "learning_rate": 0.002, "loss": 2.5555, "step": 182480 }, { "epoch": 0.36356065918653574, "grad_norm": 0.18450245261192322, "learning_rate": 0.002, "loss": 2.5427, "step": 182490 }, { "epoch": 0.3635805814101747, "grad_norm": 0.18782591819763184, "learning_rate": 0.002, "loss": 2.5668, "step": 182500 }, { "epoch": 0.3636005036338136, "grad_norm": 0.18202517926692963, "learning_rate": 0.002, "loss": 2.5599, "step": 182510 }, { "epoch": 0.3636204258574525, "grad_norm": 0.1509978175163269, "learning_rate": 0.002, "loss": 2.5688, "step": 182520 }, { "epoch": 0.3636403480810914, "grad_norm": 0.1717810183763504, "learning_rate": 0.002, "loss": 2.56, "step": 182530 }, { "epoch": 0.36366027030473036, "grad_norm": 0.15532615780830383, "learning_rate": 0.002, "loss": 2.571, "step": 182540 }, { "epoch": 0.36368019252836925, "grad_norm": 0.2196662724018097, "learning_rate": 0.002, "loss": 2.5672, "step": 182550 }, { "epoch": 0.36370011475200814, "grad_norm": 0.17699027061462402, "learning_rate": 0.002, "loss": 2.5681, "step": 182560 }, { "epoch": 0.3637200369756471, "grad_norm": 0.15475593507289886, "learning_rate": 0.002, "loss": 2.5644, "step": 182570 }, { "epoch": 0.363739959199286, "grad_norm": 0.17258745431900024, "learning_rate": 0.002, "loss": 2.567, "step": 182580 }, { "epoch": 0.3637598814229249, "grad_norm": 0.13965722918510437, "learning_rate": 0.002, "loss": 2.5635, "step": 182590 }, { "epoch": 0.3637798036465638, "grad_norm": 0.2211204469203949, "learning_rate": 0.002, "loss": 2.5594, "step": 182600 }, { "epoch": 0.3637997258702027, "grad_norm": 0.18587768077850342, "learning_rate": 0.002, "loss": 2.5482, "step": 182610 }, { "epoch": 0.36381964809384165, "grad_norm": 0.17984610795974731, "learning_rate": 0.002, "loss": 2.5655, "step": 182620 }, { "epoch": 0.36383957031748054, "grad_norm": 0.200619637966156, "learning_rate": 0.002, "loss": 2.5517, "step": 182630 }, { "epoch": 0.3638594925411195, "grad_norm": 0.1378941386938095, "learning_rate": 0.002, "loss": 2.5763, "step": 182640 }, { "epoch": 0.3638794147647584, "grad_norm": 0.2315753698348999, "learning_rate": 0.002, "loss": 2.5551, "step": 182650 }, { "epoch": 0.36389933698839727, "grad_norm": 0.16344772279262543, "learning_rate": 0.002, "loss": 2.5538, "step": 182660 }, { "epoch": 0.3639192592120362, "grad_norm": 0.16808857023715973, "learning_rate": 0.002, "loss": 2.5706, "step": 182670 }, { "epoch": 0.3639391814356751, "grad_norm": 0.1568877398967743, "learning_rate": 0.002, "loss": 2.5722, "step": 182680 }, { "epoch": 0.36395910365931405, "grad_norm": 0.189086452126503, "learning_rate": 0.002, "loss": 2.5741, "step": 182690 }, { "epoch": 0.36397902588295294, "grad_norm": 0.17891491949558258, "learning_rate": 0.002, "loss": 2.5677, "step": 182700 }, { "epoch": 0.3639989481065919, "grad_norm": 0.18299902975559235, "learning_rate": 0.002, "loss": 2.5571, "step": 182710 }, { "epoch": 0.3640188703302308, "grad_norm": 0.2635987401008606, "learning_rate": 0.002, "loss": 2.5709, "step": 182720 }, { "epoch": 0.3640387925538697, "grad_norm": 0.1752505749464035, "learning_rate": 0.002, "loss": 2.5571, "step": 182730 }, { "epoch": 0.3640587147775086, "grad_norm": 0.1769602745771408, "learning_rate": 0.002, "loss": 2.5644, "step": 182740 }, { "epoch": 0.3640786370011475, "grad_norm": 0.14157161116600037, "learning_rate": 0.002, "loss": 2.56, "step": 182750 }, { "epoch": 0.36409855922478646, "grad_norm": 0.16536274552345276, "learning_rate": 0.002, "loss": 2.5614, "step": 182760 }, { "epoch": 0.36411848144842535, "grad_norm": 0.18502235412597656, "learning_rate": 0.002, "loss": 2.5598, "step": 182770 }, { "epoch": 0.36413840367206424, "grad_norm": 0.18585427105426788, "learning_rate": 0.002, "loss": 2.549, "step": 182780 }, { "epoch": 0.3641583258957032, "grad_norm": 0.15699729323387146, "learning_rate": 0.002, "loss": 2.5668, "step": 182790 }, { "epoch": 0.3641782481193421, "grad_norm": 0.17230285704135895, "learning_rate": 0.002, "loss": 2.5651, "step": 182800 }, { "epoch": 0.364198170342981, "grad_norm": 0.1596287190914154, "learning_rate": 0.002, "loss": 2.5527, "step": 182810 }, { "epoch": 0.3642180925666199, "grad_norm": 0.19236458837985992, "learning_rate": 0.002, "loss": 2.5558, "step": 182820 }, { "epoch": 0.36423801479025886, "grad_norm": 0.16506215929985046, "learning_rate": 0.002, "loss": 2.569, "step": 182830 }, { "epoch": 0.36425793701389775, "grad_norm": 0.2867625057697296, "learning_rate": 0.002, "loss": 2.5614, "step": 182840 }, { "epoch": 0.36427785923753664, "grad_norm": 0.1685187965631485, "learning_rate": 0.002, "loss": 2.5741, "step": 182850 }, { "epoch": 0.3642977814611756, "grad_norm": 0.21293042600154877, "learning_rate": 0.002, "loss": 2.5576, "step": 182860 }, { "epoch": 0.3643177036848145, "grad_norm": 0.20204639434814453, "learning_rate": 0.002, "loss": 2.5693, "step": 182870 }, { "epoch": 0.3643376259084534, "grad_norm": 0.17430981993675232, "learning_rate": 0.002, "loss": 2.5713, "step": 182880 }, { "epoch": 0.3643575481320923, "grad_norm": 0.15886786580085754, "learning_rate": 0.002, "loss": 2.57, "step": 182890 }, { "epoch": 0.3643774703557312, "grad_norm": 0.18807516992092133, "learning_rate": 0.002, "loss": 2.5614, "step": 182900 }, { "epoch": 0.36439739257937015, "grad_norm": 0.14052143692970276, "learning_rate": 0.002, "loss": 2.5596, "step": 182910 }, { "epoch": 0.36441731480300904, "grad_norm": 0.20568320155143738, "learning_rate": 0.002, "loss": 2.5613, "step": 182920 }, { "epoch": 0.364437237026648, "grad_norm": 0.14835600554943085, "learning_rate": 0.002, "loss": 2.5631, "step": 182930 }, { "epoch": 0.3644571592502869, "grad_norm": 0.19215218722820282, "learning_rate": 0.002, "loss": 2.5475, "step": 182940 }, { "epoch": 0.36447708147392577, "grad_norm": 0.17475104331970215, "learning_rate": 0.002, "loss": 2.5602, "step": 182950 }, { "epoch": 0.3644970036975647, "grad_norm": 0.14255443215370178, "learning_rate": 0.002, "loss": 2.557, "step": 182960 }, { "epoch": 0.3645169259212036, "grad_norm": 0.16979704797267914, "learning_rate": 0.002, "loss": 2.5567, "step": 182970 }, { "epoch": 0.36453684814484255, "grad_norm": 0.15878832340240479, "learning_rate": 0.002, "loss": 2.5651, "step": 182980 }, { "epoch": 0.36455677036848144, "grad_norm": 0.21681290864944458, "learning_rate": 0.002, "loss": 2.5486, "step": 182990 }, { "epoch": 0.3645766925921204, "grad_norm": 0.15635624527931213, "learning_rate": 0.002, "loss": 2.5648, "step": 183000 }, { "epoch": 0.3645966148157593, "grad_norm": 0.16953708231449127, "learning_rate": 0.002, "loss": 2.5594, "step": 183010 }, { "epoch": 0.36461653703939817, "grad_norm": 0.15441177785396576, "learning_rate": 0.002, "loss": 2.5683, "step": 183020 }, { "epoch": 0.3646364592630371, "grad_norm": 0.1901201605796814, "learning_rate": 0.002, "loss": 2.5616, "step": 183030 }, { "epoch": 0.364656381486676, "grad_norm": 0.1605655401945114, "learning_rate": 0.002, "loss": 2.5666, "step": 183040 }, { "epoch": 0.36467630371031495, "grad_norm": 0.18018704652786255, "learning_rate": 0.002, "loss": 2.5572, "step": 183050 }, { "epoch": 0.36469622593395384, "grad_norm": 0.15043312311172485, "learning_rate": 0.002, "loss": 2.5647, "step": 183060 }, { "epoch": 0.36471614815759273, "grad_norm": 0.17571894824504852, "learning_rate": 0.002, "loss": 2.5668, "step": 183070 }, { "epoch": 0.3647360703812317, "grad_norm": 0.14744888246059418, "learning_rate": 0.002, "loss": 2.5639, "step": 183080 }, { "epoch": 0.36475599260487057, "grad_norm": 0.21840529143810272, "learning_rate": 0.002, "loss": 2.5671, "step": 183090 }, { "epoch": 0.3647759148285095, "grad_norm": 0.1640988290309906, "learning_rate": 0.002, "loss": 2.5641, "step": 183100 }, { "epoch": 0.3647958370521484, "grad_norm": 0.15570467710494995, "learning_rate": 0.002, "loss": 2.5425, "step": 183110 }, { "epoch": 0.3648157592757873, "grad_norm": 0.15857259929180145, "learning_rate": 0.002, "loss": 2.5456, "step": 183120 }, { "epoch": 0.36483568149942625, "grad_norm": 0.19623059034347534, "learning_rate": 0.002, "loss": 2.5688, "step": 183130 }, { "epoch": 0.36485560372306514, "grad_norm": 0.16378669440746307, "learning_rate": 0.002, "loss": 2.5617, "step": 183140 }, { "epoch": 0.3648755259467041, "grad_norm": 0.15338893234729767, "learning_rate": 0.002, "loss": 2.5537, "step": 183150 }, { "epoch": 0.364895448170343, "grad_norm": 0.1502002626657486, "learning_rate": 0.002, "loss": 2.5594, "step": 183160 }, { "epoch": 0.3649153703939819, "grad_norm": 0.2199755162000656, "learning_rate": 0.002, "loss": 2.5778, "step": 183170 }, { "epoch": 0.3649352926176208, "grad_norm": 0.1906084418296814, "learning_rate": 0.002, "loss": 2.5614, "step": 183180 }, { "epoch": 0.3649552148412597, "grad_norm": 0.15728260576725006, "learning_rate": 0.002, "loss": 2.5533, "step": 183190 }, { "epoch": 0.36497513706489865, "grad_norm": 0.13271887600421906, "learning_rate": 0.002, "loss": 2.5596, "step": 183200 }, { "epoch": 0.36499505928853754, "grad_norm": 0.17260856926441193, "learning_rate": 0.002, "loss": 2.5502, "step": 183210 }, { "epoch": 0.3650149815121765, "grad_norm": 0.2322140485048294, "learning_rate": 0.002, "loss": 2.5682, "step": 183220 }, { "epoch": 0.3650349037358154, "grad_norm": 0.15668776631355286, "learning_rate": 0.002, "loss": 2.5562, "step": 183230 }, { "epoch": 0.36505482595945427, "grad_norm": 0.14103884994983673, "learning_rate": 0.002, "loss": 2.5543, "step": 183240 }, { "epoch": 0.3650747481830932, "grad_norm": 0.2076057344675064, "learning_rate": 0.002, "loss": 2.5572, "step": 183250 }, { "epoch": 0.3650946704067321, "grad_norm": 0.15885187685489655, "learning_rate": 0.002, "loss": 2.5632, "step": 183260 }, { "epoch": 0.36511459263037105, "grad_norm": 0.3827323317527771, "learning_rate": 0.002, "loss": 2.5809, "step": 183270 }, { "epoch": 0.36513451485400994, "grad_norm": 0.1611851453781128, "learning_rate": 0.002, "loss": 2.5604, "step": 183280 }, { "epoch": 0.3651544370776489, "grad_norm": 0.16352035105228424, "learning_rate": 0.002, "loss": 2.5446, "step": 183290 }, { "epoch": 0.3651743593012878, "grad_norm": 0.15746109187602997, "learning_rate": 0.002, "loss": 2.5626, "step": 183300 }, { "epoch": 0.36519428152492667, "grad_norm": 0.16155743598937988, "learning_rate": 0.002, "loss": 2.5568, "step": 183310 }, { "epoch": 0.3652142037485656, "grad_norm": 0.151140034198761, "learning_rate": 0.002, "loss": 2.5547, "step": 183320 }, { "epoch": 0.3652341259722045, "grad_norm": 0.19777417182922363, "learning_rate": 0.002, "loss": 2.555, "step": 183330 }, { "epoch": 0.36525404819584345, "grad_norm": 0.18234644830226898, "learning_rate": 0.002, "loss": 2.5618, "step": 183340 }, { "epoch": 0.36527397041948234, "grad_norm": 0.1827193647623062, "learning_rate": 0.002, "loss": 2.5658, "step": 183350 }, { "epoch": 0.36529389264312123, "grad_norm": 0.14751093089580536, "learning_rate": 0.002, "loss": 2.565, "step": 183360 }, { "epoch": 0.3653138148667602, "grad_norm": 0.18669994175434113, "learning_rate": 0.002, "loss": 2.5632, "step": 183370 }, { "epoch": 0.36533373709039907, "grad_norm": 0.15649384260177612, "learning_rate": 0.002, "loss": 2.5627, "step": 183380 }, { "epoch": 0.365353659314038, "grad_norm": 0.21112067997455597, "learning_rate": 0.002, "loss": 2.5731, "step": 183390 }, { "epoch": 0.3653735815376769, "grad_norm": 0.1772671788930893, "learning_rate": 0.002, "loss": 2.5537, "step": 183400 }, { "epoch": 0.3653935037613158, "grad_norm": 0.14271053671836853, "learning_rate": 0.002, "loss": 2.5612, "step": 183410 }, { "epoch": 0.36541342598495474, "grad_norm": 0.17216306924819946, "learning_rate": 0.002, "loss": 2.5706, "step": 183420 }, { "epoch": 0.36543334820859363, "grad_norm": 0.16580665111541748, "learning_rate": 0.002, "loss": 2.5546, "step": 183430 }, { "epoch": 0.3654532704322326, "grad_norm": 0.18822968006134033, "learning_rate": 0.002, "loss": 2.5539, "step": 183440 }, { "epoch": 0.36547319265587147, "grad_norm": 0.18158836662769318, "learning_rate": 0.002, "loss": 2.571, "step": 183450 }, { "epoch": 0.3654931148795104, "grad_norm": 0.13780798017978668, "learning_rate": 0.002, "loss": 2.5484, "step": 183460 }, { "epoch": 0.3655130371031493, "grad_norm": 0.16657310724258423, "learning_rate": 0.002, "loss": 2.5578, "step": 183470 }, { "epoch": 0.3655329593267882, "grad_norm": 0.1542263627052307, "learning_rate": 0.002, "loss": 2.564, "step": 183480 }, { "epoch": 0.36555288155042714, "grad_norm": 0.20551800727844238, "learning_rate": 0.002, "loss": 2.553, "step": 183490 }, { "epoch": 0.36557280377406604, "grad_norm": 0.197641059756279, "learning_rate": 0.002, "loss": 2.5615, "step": 183500 }, { "epoch": 0.365592725997705, "grad_norm": 0.14195571839809418, "learning_rate": 0.002, "loss": 2.5686, "step": 183510 }, { "epoch": 0.36561264822134387, "grad_norm": 0.17029127478599548, "learning_rate": 0.002, "loss": 2.568, "step": 183520 }, { "epoch": 0.36563257044498276, "grad_norm": 0.20244313776493073, "learning_rate": 0.002, "loss": 2.5612, "step": 183530 }, { "epoch": 0.3656524926686217, "grad_norm": 0.1572655290365219, "learning_rate": 0.002, "loss": 2.5619, "step": 183540 }, { "epoch": 0.3656724148922606, "grad_norm": 0.16804338991641998, "learning_rate": 0.002, "loss": 2.5649, "step": 183550 }, { "epoch": 0.36569233711589955, "grad_norm": 0.153716042637825, "learning_rate": 0.002, "loss": 2.5518, "step": 183560 }, { "epoch": 0.36571225933953844, "grad_norm": 0.19127286970615387, "learning_rate": 0.002, "loss": 2.5654, "step": 183570 }, { "epoch": 0.3657321815631774, "grad_norm": 0.17412397265434265, "learning_rate": 0.002, "loss": 2.5582, "step": 183580 }, { "epoch": 0.3657521037868163, "grad_norm": 0.15960057079792023, "learning_rate": 0.002, "loss": 2.5614, "step": 183590 }, { "epoch": 0.36577202601045516, "grad_norm": 0.5872549414634705, "learning_rate": 0.002, "loss": 2.5666, "step": 183600 }, { "epoch": 0.3657919482340941, "grad_norm": 0.15926925837993622, "learning_rate": 0.002, "loss": 2.5718, "step": 183610 }, { "epoch": 0.365811870457733, "grad_norm": 0.17625673115253448, "learning_rate": 0.002, "loss": 2.5443, "step": 183620 }, { "epoch": 0.36583179268137195, "grad_norm": 0.15346090495586395, "learning_rate": 0.002, "loss": 2.5511, "step": 183630 }, { "epoch": 0.36585171490501084, "grad_norm": 0.14600682258605957, "learning_rate": 0.002, "loss": 2.5607, "step": 183640 }, { "epoch": 0.36587163712864973, "grad_norm": 0.18479809165000916, "learning_rate": 0.002, "loss": 2.5636, "step": 183650 }, { "epoch": 0.3658915593522887, "grad_norm": 0.15272337198257446, "learning_rate": 0.002, "loss": 2.5616, "step": 183660 }, { "epoch": 0.36591148157592757, "grad_norm": 0.14584870636463165, "learning_rate": 0.002, "loss": 2.5653, "step": 183670 }, { "epoch": 0.3659314037995665, "grad_norm": 0.22659099102020264, "learning_rate": 0.002, "loss": 2.5601, "step": 183680 }, { "epoch": 0.3659513260232054, "grad_norm": 0.1555831879377365, "learning_rate": 0.002, "loss": 2.5591, "step": 183690 }, { "epoch": 0.3659712482468443, "grad_norm": 0.17729395627975464, "learning_rate": 0.002, "loss": 2.5429, "step": 183700 }, { "epoch": 0.36599117047048324, "grad_norm": 0.1669548898935318, "learning_rate": 0.002, "loss": 2.5557, "step": 183710 }, { "epoch": 0.36601109269412213, "grad_norm": 0.15184170007705688, "learning_rate": 0.002, "loss": 2.5592, "step": 183720 }, { "epoch": 0.3660310149177611, "grad_norm": 0.13734902441501617, "learning_rate": 0.002, "loss": 2.5555, "step": 183730 }, { "epoch": 0.36605093714139997, "grad_norm": 0.20187048614025116, "learning_rate": 0.002, "loss": 2.5733, "step": 183740 }, { "epoch": 0.3660708593650389, "grad_norm": 0.1755751520395279, "learning_rate": 0.002, "loss": 2.5524, "step": 183750 }, { "epoch": 0.3660907815886778, "grad_norm": 0.14761808514595032, "learning_rate": 0.002, "loss": 2.5713, "step": 183760 }, { "epoch": 0.3661107038123167, "grad_norm": 0.1820678561925888, "learning_rate": 0.002, "loss": 2.5492, "step": 183770 }, { "epoch": 0.36613062603595564, "grad_norm": 0.15985438227653503, "learning_rate": 0.002, "loss": 2.5543, "step": 183780 }, { "epoch": 0.36615054825959453, "grad_norm": 0.15949028730392456, "learning_rate": 0.002, "loss": 2.5581, "step": 183790 }, { "epoch": 0.3661704704832335, "grad_norm": 0.1837800145149231, "learning_rate": 0.002, "loss": 2.572, "step": 183800 }, { "epoch": 0.36619039270687237, "grad_norm": 0.1707509309053421, "learning_rate": 0.002, "loss": 2.5553, "step": 183810 }, { "epoch": 0.36621031493051126, "grad_norm": 0.17154081165790558, "learning_rate": 0.002, "loss": 2.5618, "step": 183820 }, { "epoch": 0.3662302371541502, "grad_norm": 0.17113474011421204, "learning_rate": 0.002, "loss": 2.5636, "step": 183830 }, { "epoch": 0.3662501593777891, "grad_norm": 0.17442063987255096, "learning_rate": 0.002, "loss": 2.5638, "step": 183840 }, { "epoch": 0.36627008160142804, "grad_norm": 0.1578027755022049, "learning_rate": 0.002, "loss": 2.5675, "step": 183850 }, { "epoch": 0.36629000382506693, "grad_norm": 0.1650589257478714, "learning_rate": 0.002, "loss": 2.5673, "step": 183860 }, { "epoch": 0.3663099260487058, "grad_norm": 0.1506933867931366, "learning_rate": 0.002, "loss": 2.5483, "step": 183870 }, { "epoch": 0.36632984827234477, "grad_norm": 0.21010197699069977, "learning_rate": 0.002, "loss": 2.5521, "step": 183880 }, { "epoch": 0.36634977049598366, "grad_norm": 0.16797319054603577, "learning_rate": 0.002, "loss": 2.5652, "step": 183890 }, { "epoch": 0.3663696927196226, "grad_norm": 0.15354788303375244, "learning_rate": 0.002, "loss": 2.5555, "step": 183900 }, { "epoch": 0.3663896149432615, "grad_norm": 0.14641372859477997, "learning_rate": 0.002, "loss": 2.5618, "step": 183910 }, { "epoch": 0.36640953716690045, "grad_norm": 0.19306610524654388, "learning_rate": 0.002, "loss": 2.5508, "step": 183920 }, { "epoch": 0.36642945939053934, "grad_norm": 0.14721164107322693, "learning_rate": 0.002, "loss": 2.5577, "step": 183930 }, { "epoch": 0.3664493816141782, "grad_norm": 0.15623390674591064, "learning_rate": 0.002, "loss": 2.5502, "step": 183940 }, { "epoch": 0.3664693038378172, "grad_norm": 0.19129692018032074, "learning_rate": 0.002, "loss": 2.5493, "step": 183950 }, { "epoch": 0.36648922606145606, "grad_norm": 0.15498556196689606, "learning_rate": 0.002, "loss": 2.5644, "step": 183960 }, { "epoch": 0.366509148285095, "grad_norm": 0.16362889111042023, "learning_rate": 0.002, "loss": 2.5661, "step": 183970 }, { "epoch": 0.3665290705087339, "grad_norm": 0.15934672951698303, "learning_rate": 0.002, "loss": 2.559, "step": 183980 }, { "epoch": 0.3665489927323728, "grad_norm": 0.1706572324037552, "learning_rate": 0.002, "loss": 2.5584, "step": 183990 }, { "epoch": 0.36656891495601174, "grad_norm": 0.17460821568965912, "learning_rate": 0.002, "loss": 2.5564, "step": 184000 }, { "epoch": 0.36658883717965063, "grad_norm": 0.184792160987854, "learning_rate": 0.002, "loss": 2.5644, "step": 184010 }, { "epoch": 0.3666087594032896, "grad_norm": 0.15466564893722534, "learning_rate": 0.002, "loss": 2.5482, "step": 184020 }, { "epoch": 0.36662868162692847, "grad_norm": 0.18959461152553558, "learning_rate": 0.002, "loss": 2.5669, "step": 184030 }, { "epoch": 0.3666486038505674, "grad_norm": 0.16781413555145264, "learning_rate": 0.002, "loss": 2.5608, "step": 184040 }, { "epoch": 0.3666685260742063, "grad_norm": 0.1759650707244873, "learning_rate": 0.002, "loss": 2.5603, "step": 184050 }, { "epoch": 0.3666884482978452, "grad_norm": 0.1900138258934021, "learning_rate": 0.002, "loss": 2.5504, "step": 184060 }, { "epoch": 0.36670837052148414, "grad_norm": 0.1830594688653946, "learning_rate": 0.002, "loss": 2.541, "step": 184070 }, { "epoch": 0.36672829274512303, "grad_norm": 0.19022801518440247, "learning_rate": 0.002, "loss": 2.5667, "step": 184080 }, { "epoch": 0.366748214968762, "grad_norm": 0.19267548620700836, "learning_rate": 0.002, "loss": 2.5668, "step": 184090 }, { "epoch": 0.36676813719240087, "grad_norm": 0.18250931799411774, "learning_rate": 0.002, "loss": 2.5716, "step": 184100 }, { "epoch": 0.36678805941603976, "grad_norm": 0.14458434283733368, "learning_rate": 0.002, "loss": 2.5598, "step": 184110 }, { "epoch": 0.3668079816396787, "grad_norm": 0.14571329951286316, "learning_rate": 0.002, "loss": 2.5521, "step": 184120 }, { "epoch": 0.3668279038633176, "grad_norm": 0.17766833305358887, "learning_rate": 0.002, "loss": 2.5481, "step": 184130 }, { "epoch": 0.36684782608695654, "grad_norm": 0.16344013810157776, "learning_rate": 0.002, "loss": 2.5619, "step": 184140 }, { "epoch": 0.36686774831059543, "grad_norm": 0.15814195573329926, "learning_rate": 0.002, "loss": 2.5445, "step": 184150 }, { "epoch": 0.3668876705342343, "grad_norm": 0.15979380905628204, "learning_rate": 0.002, "loss": 2.5517, "step": 184160 }, { "epoch": 0.36690759275787327, "grad_norm": 0.1739165037870407, "learning_rate": 0.002, "loss": 2.5591, "step": 184170 }, { "epoch": 0.36692751498151216, "grad_norm": 0.14982417225837708, "learning_rate": 0.002, "loss": 2.5484, "step": 184180 }, { "epoch": 0.3669474372051511, "grad_norm": 0.17318975925445557, "learning_rate": 0.002, "loss": 2.5618, "step": 184190 }, { "epoch": 0.36696735942879, "grad_norm": 0.169324591755867, "learning_rate": 0.002, "loss": 2.5518, "step": 184200 }, { "epoch": 0.36698728165242894, "grad_norm": 0.15842752158641815, "learning_rate": 0.002, "loss": 2.5689, "step": 184210 }, { "epoch": 0.36700720387606783, "grad_norm": 0.17054593563079834, "learning_rate": 0.002, "loss": 2.5632, "step": 184220 }, { "epoch": 0.3670271260997067, "grad_norm": 0.1466226577758789, "learning_rate": 0.002, "loss": 2.5514, "step": 184230 }, { "epoch": 0.36704704832334567, "grad_norm": 0.16284365952014923, "learning_rate": 0.002, "loss": 2.5624, "step": 184240 }, { "epoch": 0.36706697054698456, "grad_norm": 0.1541852355003357, "learning_rate": 0.002, "loss": 2.5685, "step": 184250 }, { "epoch": 0.3670868927706235, "grad_norm": 0.1723078191280365, "learning_rate": 0.002, "loss": 2.556, "step": 184260 }, { "epoch": 0.3671068149942624, "grad_norm": 0.1642150729894638, "learning_rate": 0.002, "loss": 2.5607, "step": 184270 }, { "epoch": 0.3671267372179013, "grad_norm": 0.15086154639720917, "learning_rate": 0.002, "loss": 2.554, "step": 184280 }, { "epoch": 0.36714665944154024, "grad_norm": 0.1532299965620041, "learning_rate": 0.002, "loss": 2.5563, "step": 184290 }, { "epoch": 0.3671665816651791, "grad_norm": 0.19139458239078522, "learning_rate": 0.002, "loss": 2.5551, "step": 184300 }, { "epoch": 0.36718650388881807, "grad_norm": 0.14680109918117523, "learning_rate": 0.002, "loss": 2.5723, "step": 184310 }, { "epoch": 0.36720642611245696, "grad_norm": 0.1674022376537323, "learning_rate": 0.002, "loss": 2.5521, "step": 184320 }, { "epoch": 0.3672263483360959, "grad_norm": 0.15209795534610748, "learning_rate": 0.002, "loss": 2.5641, "step": 184330 }, { "epoch": 0.3672462705597348, "grad_norm": 0.15790964663028717, "learning_rate": 0.002, "loss": 2.5504, "step": 184340 }, { "epoch": 0.3672661927833737, "grad_norm": 0.15297463536262512, "learning_rate": 0.002, "loss": 2.5565, "step": 184350 }, { "epoch": 0.36728611500701264, "grad_norm": 0.2097042053937912, "learning_rate": 0.002, "loss": 2.5652, "step": 184360 }, { "epoch": 0.3673060372306515, "grad_norm": 0.15309187769889832, "learning_rate": 0.002, "loss": 2.5676, "step": 184370 }, { "epoch": 0.3673259594542905, "grad_norm": 0.17332282662391663, "learning_rate": 0.002, "loss": 2.5571, "step": 184380 }, { "epoch": 0.36734588167792936, "grad_norm": 0.1520170420408249, "learning_rate": 0.002, "loss": 2.5727, "step": 184390 }, { "epoch": 0.36736580390156826, "grad_norm": 0.16101910173892975, "learning_rate": 0.002, "loss": 2.5483, "step": 184400 }, { "epoch": 0.3673857261252072, "grad_norm": 0.18016694486141205, "learning_rate": 0.002, "loss": 2.5689, "step": 184410 }, { "epoch": 0.3674056483488461, "grad_norm": 0.1739245504140854, "learning_rate": 0.002, "loss": 2.568, "step": 184420 }, { "epoch": 0.36742557057248504, "grad_norm": 0.1751827746629715, "learning_rate": 0.002, "loss": 2.5514, "step": 184430 }, { "epoch": 0.36744549279612393, "grad_norm": 0.16420243680477142, "learning_rate": 0.002, "loss": 2.5677, "step": 184440 }, { "epoch": 0.3674654150197628, "grad_norm": 0.15017655491828918, "learning_rate": 0.002, "loss": 2.5551, "step": 184450 }, { "epoch": 0.36748533724340177, "grad_norm": 0.1719960868358612, "learning_rate": 0.002, "loss": 2.574, "step": 184460 }, { "epoch": 0.36750525946704066, "grad_norm": 0.1769179403781891, "learning_rate": 0.002, "loss": 2.5573, "step": 184470 }, { "epoch": 0.3675251816906796, "grad_norm": 0.14965912699699402, "learning_rate": 0.002, "loss": 2.5501, "step": 184480 }, { "epoch": 0.3675451039143185, "grad_norm": 0.15500593185424805, "learning_rate": 0.002, "loss": 2.5541, "step": 184490 }, { "epoch": 0.36756502613795744, "grad_norm": 0.19555436074733734, "learning_rate": 0.002, "loss": 2.5551, "step": 184500 }, { "epoch": 0.36758494836159633, "grad_norm": 0.15127034485340118, "learning_rate": 0.002, "loss": 2.5555, "step": 184510 }, { "epoch": 0.3676048705852352, "grad_norm": 0.1734279990196228, "learning_rate": 0.002, "loss": 2.5758, "step": 184520 }, { "epoch": 0.36762479280887417, "grad_norm": 0.17382113635540009, "learning_rate": 0.002, "loss": 2.5468, "step": 184530 }, { "epoch": 0.36764471503251306, "grad_norm": 0.17438311874866486, "learning_rate": 0.002, "loss": 2.5727, "step": 184540 }, { "epoch": 0.367664637256152, "grad_norm": 0.18524330854415894, "learning_rate": 0.002, "loss": 2.5556, "step": 184550 }, { "epoch": 0.3676845594797909, "grad_norm": 0.17111213505268097, "learning_rate": 0.002, "loss": 2.5758, "step": 184560 }, { "epoch": 0.3677044817034298, "grad_norm": 0.15093836188316345, "learning_rate": 0.002, "loss": 2.5737, "step": 184570 }, { "epoch": 0.36772440392706873, "grad_norm": 0.15995216369628906, "learning_rate": 0.002, "loss": 2.5649, "step": 184580 }, { "epoch": 0.3677443261507076, "grad_norm": 0.2212604135274887, "learning_rate": 0.002, "loss": 2.5328, "step": 184590 }, { "epoch": 0.36776424837434657, "grad_norm": 0.173554465174675, "learning_rate": 0.002, "loss": 2.5556, "step": 184600 }, { "epoch": 0.36778417059798546, "grad_norm": 0.16742102801799774, "learning_rate": 0.002, "loss": 2.5619, "step": 184610 }, { "epoch": 0.36780409282162435, "grad_norm": 0.18818092346191406, "learning_rate": 0.002, "loss": 2.5719, "step": 184620 }, { "epoch": 0.3678240150452633, "grad_norm": 0.13866305351257324, "learning_rate": 0.002, "loss": 2.5694, "step": 184630 }, { "epoch": 0.3678439372689022, "grad_norm": 0.21182365715503693, "learning_rate": 0.002, "loss": 2.5437, "step": 184640 }, { "epoch": 0.36786385949254113, "grad_norm": 0.17413699626922607, "learning_rate": 0.002, "loss": 2.5591, "step": 184650 }, { "epoch": 0.36788378171618, "grad_norm": 0.16145732998847961, "learning_rate": 0.002, "loss": 2.5618, "step": 184660 }, { "epoch": 0.36790370393981897, "grad_norm": 0.15865488350391388, "learning_rate": 0.002, "loss": 2.5697, "step": 184670 }, { "epoch": 0.36792362616345786, "grad_norm": 0.20362761616706848, "learning_rate": 0.002, "loss": 2.5646, "step": 184680 }, { "epoch": 0.36794354838709675, "grad_norm": 0.1491481363773346, "learning_rate": 0.002, "loss": 2.5662, "step": 184690 }, { "epoch": 0.3679634706107357, "grad_norm": 0.1792547106742859, "learning_rate": 0.002, "loss": 2.5495, "step": 184700 }, { "epoch": 0.3679833928343746, "grad_norm": 0.2502483129501343, "learning_rate": 0.002, "loss": 2.5632, "step": 184710 }, { "epoch": 0.36800331505801354, "grad_norm": 0.19228902459144592, "learning_rate": 0.002, "loss": 2.5545, "step": 184720 }, { "epoch": 0.3680232372816524, "grad_norm": 0.16108214855194092, "learning_rate": 0.002, "loss": 2.5707, "step": 184730 }, { "epoch": 0.3680431595052913, "grad_norm": 0.1670679897069931, "learning_rate": 0.002, "loss": 2.5753, "step": 184740 }, { "epoch": 0.36806308172893026, "grad_norm": 0.18328289687633514, "learning_rate": 0.002, "loss": 2.55, "step": 184750 }, { "epoch": 0.36808300395256915, "grad_norm": 0.18183769285678864, "learning_rate": 0.002, "loss": 2.558, "step": 184760 }, { "epoch": 0.3681029261762081, "grad_norm": 0.1638748198747635, "learning_rate": 0.002, "loss": 2.5691, "step": 184770 }, { "epoch": 0.368122848399847, "grad_norm": 0.16959123313426971, "learning_rate": 0.002, "loss": 2.5505, "step": 184780 }, { "epoch": 0.36814277062348594, "grad_norm": 0.1510283648967743, "learning_rate": 0.002, "loss": 2.5598, "step": 184790 }, { "epoch": 0.36816269284712483, "grad_norm": 0.19327189028263092, "learning_rate": 0.002, "loss": 2.5637, "step": 184800 }, { "epoch": 0.3681826150707637, "grad_norm": 0.1607748121023178, "learning_rate": 0.002, "loss": 2.5566, "step": 184810 }, { "epoch": 0.36820253729440267, "grad_norm": 0.17251881957054138, "learning_rate": 0.002, "loss": 2.5715, "step": 184820 }, { "epoch": 0.36822245951804156, "grad_norm": 0.16602574288845062, "learning_rate": 0.002, "loss": 2.5562, "step": 184830 }, { "epoch": 0.3682423817416805, "grad_norm": 0.16262561082839966, "learning_rate": 0.002, "loss": 2.5543, "step": 184840 }, { "epoch": 0.3682623039653194, "grad_norm": 0.20320498943328857, "learning_rate": 0.002, "loss": 2.5571, "step": 184850 }, { "epoch": 0.3682822261889583, "grad_norm": 0.19662943482398987, "learning_rate": 0.002, "loss": 2.5597, "step": 184860 }, { "epoch": 0.36830214841259723, "grad_norm": 0.18615353107452393, "learning_rate": 0.002, "loss": 2.5551, "step": 184870 }, { "epoch": 0.3683220706362361, "grad_norm": 0.17929629981517792, "learning_rate": 0.002, "loss": 2.5682, "step": 184880 }, { "epoch": 0.36834199285987507, "grad_norm": 0.18099510669708252, "learning_rate": 0.002, "loss": 2.5581, "step": 184890 }, { "epoch": 0.36836191508351396, "grad_norm": 0.14101482927799225, "learning_rate": 0.002, "loss": 2.562, "step": 184900 }, { "epoch": 0.36838183730715285, "grad_norm": 0.16944168508052826, "learning_rate": 0.002, "loss": 2.566, "step": 184910 }, { "epoch": 0.3684017595307918, "grad_norm": 0.169955313205719, "learning_rate": 0.002, "loss": 2.5635, "step": 184920 }, { "epoch": 0.3684216817544307, "grad_norm": 0.15002559125423431, "learning_rate": 0.002, "loss": 2.5652, "step": 184930 }, { "epoch": 0.36844160397806963, "grad_norm": 0.20875096321105957, "learning_rate": 0.002, "loss": 2.5632, "step": 184940 }, { "epoch": 0.3684615262017085, "grad_norm": 0.17316651344299316, "learning_rate": 0.002, "loss": 2.5586, "step": 184950 }, { "epoch": 0.36848144842534747, "grad_norm": 0.16084949672222137, "learning_rate": 0.002, "loss": 2.5565, "step": 184960 }, { "epoch": 0.36850137064898636, "grad_norm": 0.15604597330093384, "learning_rate": 0.002, "loss": 2.5665, "step": 184970 }, { "epoch": 0.36852129287262525, "grad_norm": 0.184683158993721, "learning_rate": 0.002, "loss": 2.5592, "step": 184980 }, { "epoch": 0.3685412150962642, "grad_norm": 0.15506592392921448, "learning_rate": 0.002, "loss": 2.5497, "step": 184990 }, { "epoch": 0.3685611373199031, "grad_norm": 0.16162407398223877, "learning_rate": 0.002, "loss": 2.5668, "step": 185000 }, { "epoch": 0.36858105954354203, "grad_norm": 0.1489691436290741, "learning_rate": 0.002, "loss": 2.5531, "step": 185010 }, { "epoch": 0.3686009817671809, "grad_norm": 0.1957191675901413, "learning_rate": 0.002, "loss": 2.5684, "step": 185020 }, { "epoch": 0.3686209039908198, "grad_norm": 0.15665386617183685, "learning_rate": 0.002, "loss": 2.5652, "step": 185030 }, { "epoch": 0.36864082621445876, "grad_norm": 0.21535365283489227, "learning_rate": 0.002, "loss": 2.5721, "step": 185040 }, { "epoch": 0.36866074843809765, "grad_norm": 0.1878652572631836, "learning_rate": 0.002, "loss": 2.5615, "step": 185050 }, { "epoch": 0.3686806706617366, "grad_norm": 0.15190167725086212, "learning_rate": 0.002, "loss": 2.5654, "step": 185060 }, { "epoch": 0.3687005928853755, "grad_norm": 0.17557953298091888, "learning_rate": 0.002, "loss": 2.5471, "step": 185070 }, { "epoch": 0.36872051510901444, "grad_norm": 0.1931954026222229, "learning_rate": 0.002, "loss": 2.5684, "step": 185080 }, { "epoch": 0.3687404373326533, "grad_norm": 0.1627959907054901, "learning_rate": 0.002, "loss": 2.5542, "step": 185090 }, { "epoch": 0.3687603595562922, "grad_norm": 0.17416469752788544, "learning_rate": 0.002, "loss": 2.5794, "step": 185100 }, { "epoch": 0.36878028177993116, "grad_norm": 0.17242756485939026, "learning_rate": 0.002, "loss": 2.5476, "step": 185110 }, { "epoch": 0.36880020400357005, "grad_norm": 0.17817968130111694, "learning_rate": 0.002, "loss": 2.5625, "step": 185120 }, { "epoch": 0.368820126227209, "grad_norm": 0.16156260669231415, "learning_rate": 0.002, "loss": 2.5693, "step": 185130 }, { "epoch": 0.3688400484508479, "grad_norm": 0.16287089884281158, "learning_rate": 0.002, "loss": 2.5554, "step": 185140 }, { "epoch": 0.3688599706744868, "grad_norm": 0.155453622341156, "learning_rate": 0.002, "loss": 2.5677, "step": 185150 }, { "epoch": 0.3688798928981257, "grad_norm": 0.21667030453681946, "learning_rate": 0.002, "loss": 2.579, "step": 185160 }, { "epoch": 0.3688998151217646, "grad_norm": 0.14433877170085907, "learning_rate": 0.002, "loss": 2.5389, "step": 185170 }, { "epoch": 0.36891973734540356, "grad_norm": 0.1869814246892929, "learning_rate": 0.002, "loss": 2.5592, "step": 185180 }, { "epoch": 0.36893965956904246, "grad_norm": 0.16243058443069458, "learning_rate": 0.002, "loss": 2.5531, "step": 185190 }, { "epoch": 0.36895958179268135, "grad_norm": 0.18699632585048676, "learning_rate": 0.002, "loss": 2.5676, "step": 185200 }, { "epoch": 0.3689795040163203, "grad_norm": 0.1580006331205368, "learning_rate": 0.002, "loss": 2.5709, "step": 185210 }, { "epoch": 0.3689994262399592, "grad_norm": 0.19323448836803436, "learning_rate": 0.002, "loss": 2.5704, "step": 185220 }, { "epoch": 0.36901934846359813, "grad_norm": 0.23073217272758484, "learning_rate": 0.002, "loss": 2.5644, "step": 185230 }, { "epoch": 0.369039270687237, "grad_norm": 0.17400284111499786, "learning_rate": 0.002, "loss": 2.5595, "step": 185240 }, { "epoch": 0.36905919291087597, "grad_norm": 0.1765490025281906, "learning_rate": 0.002, "loss": 2.5666, "step": 185250 }, { "epoch": 0.36907911513451486, "grad_norm": 0.1929173320531845, "learning_rate": 0.002, "loss": 2.5549, "step": 185260 }, { "epoch": 0.36909903735815375, "grad_norm": 0.18214955925941467, "learning_rate": 0.002, "loss": 2.5532, "step": 185270 }, { "epoch": 0.3691189595817927, "grad_norm": 0.15926606953144073, "learning_rate": 0.002, "loss": 2.5543, "step": 185280 }, { "epoch": 0.3691388818054316, "grad_norm": 0.14192454516887665, "learning_rate": 0.002, "loss": 2.5725, "step": 185290 }, { "epoch": 0.36915880402907053, "grad_norm": 0.19869723916053772, "learning_rate": 0.002, "loss": 2.5638, "step": 185300 }, { "epoch": 0.3691787262527094, "grad_norm": 0.16110116243362427, "learning_rate": 0.002, "loss": 2.5565, "step": 185310 }, { "epoch": 0.3691986484763483, "grad_norm": 0.15616773068904877, "learning_rate": 0.002, "loss": 2.5565, "step": 185320 }, { "epoch": 0.36921857069998726, "grad_norm": 0.1797930747270584, "learning_rate": 0.002, "loss": 2.554, "step": 185330 }, { "epoch": 0.36923849292362615, "grad_norm": 0.1607910394668579, "learning_rate": 0.002, "loss": 2.5598, "step": 185340 }, { "epoch": 0.3692584151472651, "grad_norm": 0.15475110709667206, "learning_rate": 0.002, "loss": 2.5477, "step": 185350 }, { "epoch": 0.369278337370904, "grad_norm": 0.1630920171737671, "learning_rate": 0.002, "loss": 2.5563, "step": 185360 }, { "epoch": 0.3692982595945429, "grad_norm": 0.18627768754959106, "learning_rate": 0.002, "loss": 2.5581, "step": 185370 }, { "epoch": 0.3693181818181818, "grad_norm": 0.16263076663017273, "learning_rate": 0.002, "loss": 2.5646, "step": 185380 }, { "epoch": 0.3693381040418207, "grad_norm": 0.15532907843589783, "learning_rate": 0.002, "loss": 2.5651, "step": 185390 }, { "epoch": 0.36935802626545966, "grad_norm": 0.18411122262477875, "learning_rate": 0.002, "loss": 2.5635, "step": 185400 }, { "epoch": 0.36937794848909855, "grad_norm": 0.14660830795764923, "learning_rate": 0.002, "loss": 2.5751, "step": 185410 }, { "epoch": 0.3693978707127375, "grad_norm": 0.16118304431438446, "learning_rate": 0.002, "loss": 2.5554, "step": 185420 }, { "epoch": 0.3694177929363764, "grad_norm": 0.16143645346164703, "learning_rate": 0.002, "loss": 2.5583, "step": 185430 }, { "epoch": 0.3694377151600153, "grad_norm": 0.18174733221530914, "learning_rate": 0.002, "loss": 2.5638, "step": 185440 }, { "epoch": 0.3694576373836542, "grad_norm": 0.18579082190990448, "learning_rate": 0.002, "loss": 2.5605, "step": 185450 }, { "epoch": 0.3694775596072931, "grad_norm": 0.150612473487854, "learning_rate": 0.002, "loss": 2.5584, "step": 185460 }, { "epoch": 0.36949748183093206, "grad_norm": 0.1521514505147934, "learning_rate": 0.002, "loss": 2.5441, "step": 185470 }, { "epoch": 0.36951740405457095, "grad_norm": 0.1504511833190918, "learning_rate": 0.002, "loss": 2.5655, "step": 185480 }, { "epoch": 0.36953732627820984, "grad_norm": 0.18979698419570923, "learning_rate": 0.002, "loss": 2.5544, "step": 185490 }, { "epoch": 0.3695572485018488, "grad_norm": 0.18776240944862366, "learning_rate": 0.002, "loss": 2.5765, "step": 185500 }, { "epoch": 0.3695771707254877, "grad_norm": 0.1757306605577469, "learning_rate": 0.002, "loss": 2.5505, "step": 185510 }, { "epoch": 0.3695970929491266, "grad_norm": 0.16161760687828064, "learning_rate": 0.002, "loss": 2.5578, "step": 185520 }, { "epoch": 0.3696170151727655, "grad_norm": 0.18510818481445312, "learning_rate": 0.002, "loss": 2.5552, "step": 185530 }, { "epoch": 0.36963693739640446, "grad_norm": 0.20155592262744904, "learning_rate": 0.002, "loss": 2.5734, "step": 185540 }, { "epoch": 0.36965685962004335, "grad_norm": 0.16155309975147247, "learning_rate": 0.002, "loss": 2.5867, "step": 185550 }, { "epoch": 0.36967678184368225, "grad_norm": 0.1878606230020523, "learning_rate": 0.002, "loss": 2.5709, "step": 185560 }, { "epoch": 0.3696967040673212, "grad_norm": 0.19299186766147614, "learning_rate": 0.002, "loss": 2.5695, "step": 185570 }, { "epoch": 0.3697166262909601, "grad_norm": 0.15426264703273773, "learning_rate": 0.002, "loss": 2.5494, "step": 185580 }, { "epoch": 0.36973654851459903, "grad_norm": 0.1614704728126526, "learning_rate": 0.002, "loss": 2.568, "step": 185590 }, { "epoch": 0.3697564707382379, "grad_norm": 0.16005493700504303, "learning_rate": 0.002, "loss": 2.5677, "step": 185600 }, { "epoch": 0.3697763929618768, "grad_norm": 0.16628144681453705, "learning_rate": 0.002, "loss": 2.56, "step": 185610 }, { "epoch": 0.36979631518551576, "grad_norm": 0.14253492653369904, "learning_rate": 0.002, "loss": 2.5562, "step": 185620 }, { "epoch": 0.36981623740915465, "grad_norm": 0.18252603709697723, "learning_rate": 0.002, "loss": 2.5531, "step": 185630 }, { "epoch": 0.3698361596327936, "grad_norm": 0.18689891695976257, "learning_rate": 0.002, "loss": 2.566, "step": 185640 }, { "epoch": 0.3698560818564325, "grad_norm": 0.1484488993883133, "learning_rate": 0.002, "loss": 2.542, "step": 185650 }, { "epoch": 0.3698760040800714, "grad_norm": 0.164910688996315, "learning_rate": 0.002, "loss": 2.5636, "step": 185660 }, { "epoch": 0.3698959263037103, "grad_norm": 0.17040669918060303, "learning_rate": 0.002, "loss": 2.5597, "step": 185670 }, { "epoch": 0.3699158485273492, "grad_norm": 0.2022211253643036, "learning_rate": 0.002, "loss": 2.5666, "step": 185680 }, { "epoch": 0.36993577075098816, "grad_norm": 0.173987478017807, "learning_rate": 0.002, "loss": 2.5611, "step": 185690 }, { "epoch": 0.36995569297462705, "grad_norm": 0.14414797723293304, "learning_rate": 0.002, "loss": 2.561, "step": 185700 }, { "epoch": 0.369975615198266, "grad_norm": 0.18266460299491882, "learning_rate": 0.002, "loss": 2.5443, "step": 185710 }, { "epoch": 0.3699955374219049, "grad_norm": 0.16401849687099457, "learning_rate": 0.002, "loss": 2.5581, "step": 185720 }, { "epoch": 0.3700154596455438, "grad_norm": 0.1663299947977066, "learning_rate": 0.002, "loss": 2.5807, "step": 185730 }, { "epoch": 0.3700353818691827, "grad_norm": 0.18664632737636566, "learning_rate": 0.002, "loss": 2.5611, "step": 185740 }, { "epoch": 0.3700553040928216, "grad_norm": 0.19508756697177887, "learning_rate": 0.002, "loss": 2.5642, "step": 185750 }, { "epoch": 0.37007522631646056, "grad_norm": 0.1828942447900772, "learning_rate": 0.002, "loss": 2.5468, "step": 185760 }, { "epoch": 0.37009514854009945, "grad_norm": 0.17716681957244873, "learning_rate": 0.002, "loss": 2.5728, "step": 185770 }, { "epoch": 0.37011507076373834, "grad_norm": 0.14164605736732483, "learning_rate": 0.002, "loss": 2.5583, "step": 185780 }, { "epoch": 0.3701349929873773, "grad_norm": 0.1637648046016693, "learning_rate": 0.002, "loss": 2.5714, "step": 185790 }, { "epoch": 0.3701549152110162, "grad_norm": 0.16844774782657623, "learning_rate": 0.002, "loss": 2.5541, "step": 185800 }, { "epoch": 0.3701748374346551, "grad_norm": 0.17909035086631775, "learning_rate": 0.002, "loss": 2.5645, "step": 185810 }, { "epoch": 0.370194759658294, "grad_norm": 0.15426769852638245, "learning_rate": 0.002, "loss": 2.5718, "step": 185820 }, { "epoch": 0.37021468188193296, "grad_norm": 0.16455824673175812, "learning_rate": 0.002, "loss": 2.5657, "step": 185830 }, { "epoch": 0.37023460410557185, "grad_norm": 0.1649867594242096, "learning_rate": 0.002, "loss": 2.5728, "step": 185840 }, { "epoch": 0.37025452632921074, "grad_norm": 0.1512833833694458, "learning_rate": 0.002, "loss": 2.5681, "step": 185850 }, { "epoch": 0.3702744485528497, "grad_norm": 0.15680037438869476, "learning_rate": 0.002, "loss": 2.566, "step": 185860 }, { "epoch": 0.3702943707764886, "grad_norm": 0.17069396376609802, "learning_rate": 0.002, "loss": 2.5736, "step": 185870 }, { "epoch": 0.3703142930001275, "grad_norm": 0.18074238300323486, "learning_rate": 0.002, "loss": 2.5702, "step": 185880 }, { "epoch": 0.3703342152237664, "grad_norm": 0.18256127834320068, "learning_rate": 0.002, "loss": 2.552, "step": 185890 }, { "epoch": 0.3703541374474053, "grad_norm": 0.23496660590171814, "learning_rate": 0.002, "loss": 2.5395, "step": 185900 }, { "epoch": 0.37037405967104425, "grad_norm": 0.17503803968429565, "learning_rate": 0.002, "loss": 2.5573, "step": 185910 }, { "epoch": 0.37039398189468314, "grad_norm": 0.16109900176525116, "learning_rate": 0.002, "loss": 2.5495, "step": 185920 }, { "epoch": 0.3704139041183221, "grad_norm": 0.18450243771076202, "learning_rate": 0.002, "loss": 2.5597, "step": 185930 }, { "epoch": 0.370433826341961, "grad_norm": 0.1419268250465393, "learning_rate": 0.002, "loss": 2.5499, "step": 185940 }, { "epoch": 0.37045374856559987, "grad_norm": 0.16435454785823822, "learning_rate": 0.002, "loss": 2.5455, "step": 185950 }, { "epoch": 0.3704736707892388, "grad_norm": 0.15979886054992676, "learning_rate": 0.002, "loss": 2.5526, "step": 185960 }, { "epoch": 0.3704935930128777, "grad_norm": 0.17205511033535004, "learning_rate": 0.002, "loss": 2.5645, "step": 185970 }, { "epoch": 0.37051351523651666, "grad_norm": 0.18463042378425598, "learning_rate": 0.002, "loss": 2.5469, "step": 185980 }, { "epoch": 0.37053343746015555, "grad_norm": 0.1603326052427292, "learning_rate": 0.002, "loss": 2.5621, "step": 185990 }, { "epoch": 0.3705533596837945, "grad_norm": 0.17564338445663452, "learning_rate": 0.002, "loss": 2.5705, "step": 186000 }, { "epoch": 0.3705732819074334, "grad_norm": 0.1631360650062561, "learning_rate": 0.002, "loss": 2.5714, "step": 186010 }, { "epoch": 0.3705932041310723, "grad_norm": 0.17900238931179047, "learning_rate": 0.002, "loss": 2.5544, "step": 186020 }, { "epoch": 0.3706131263547112, "grad_norm": 0.17705444991588593, "learning_rate": 0.002, "loss": 2.5684, "step": 186030 }, { "epoch": 0.3706330485783501, "grad_norm": 0.16855096817016602, "learning_rate": 0.002, "loss": 2.5623, "step": 186040 }, { "epoch": 0.37065297080198906, "grad_norm": 0.17944055795669556, "learning_rate": 0.002, "loss": 2.5598, "step": 186050 }, { "epoch": 0.37067289302562795, "grad_norm": 0.17143000662326813, "learning_rate": 0.002, "loss": 2.5497, "step": 186060 }, { "epoch": 0.37069281524926684, "grad_norm": 0.1701061725616455, "learning_rate": 0.002, "loss": 2.5486, "step": 186070 }, { "epoch": 0.3707127374729058, "grad_norm": 0.16148217022418976, "learning_rate": 0.002, "loss": 2.5502, "step": 186080 }, { "epoch": 0.3707326596965447, "grad_norm": 0.13499929010868073, "learning_rate": 0.002, "loss": 2.5668, "step": 186090 }, { "epoch": 0.3707525819201836, "grad_norm": 0.1600206047296524, "learning_rate": 0.002, "loss": 2.5749, "step": 186100 }, { "epoch": 0.3707725041438225, "grad_norm": 0.17499560117721558, "learning_rate": 0.002, "loss": 2.556, "step": 186110 }, { "epoch": 0.3707924263674614, "grad_norm": 0.17807406187057495, "learning_rate": 0.002, "loss": 2.5631, "step": 186120 }, { "epoch": 0.37081234859110035, "grad_norm": 0.1684800684452057, "learning_rate": 0.002, "loss": 2.559, "step": 186130 }, { "epoch": 0.37083227081473924, "grad_norm": 0.16027584671974182, "learning_rate": 0.002, "loss": 2.5647, "step": 186140 }, { "epoch": 0.3708521930383782, "grad_norm": 0.22034743428230286, "learning_rate": 0.002, "loss": 2.5648, "step": 186150 }, { "epoch": 0.3708721152620171, "grad_norm": 0.15552423894405365, "learning_rate": 0.002, "loss": 2.5601, "step": 186160 }, { "epoch": 0.370892037485656, "grad_norm": 0.15041843056678772, "learning_rate": 0.002, "loss": 2.5465, "step": 186170 }, { "epoch": 0.3709119597092949, "grad_norm": 0.1934482902288437, "learning_rate": 0.002, "loss": 2.5533, "step": 186180 }, { "epoch": 0.3709318819329338, "grad_norm": 0.16798177361488342, "learning_rate": 0.002, "loss": 2.5691, "step": 186190 }, { "epoch": 0.37095180415657275, "grad_norm": 0.16086623072624207, "learning_rate": 0.002, "loss": 2.565, "step": 186200 }, { "epoch": 0.37097172638021164, "grad_norm": 0.1732865422964096, "learning_rate": 0.002, "loss": 2.5643, "step": 186210 }, { "epoch": 0.3709916486038506, "grad_norm": 0.19520872831344604, "learning_rate": 0.002, "loss": 2.555, "step": 186220 }, { "epoch": 0.3710115708274895, "grad_norm": 0.17484736442565918, "learning_rate": 0.002, "loss": 2.5634, "step": 186230 }, { "epoch": 0.37103149305112837, "grad_norm": 0.15482796728610992, "learning_rate": 0.002, "loss": 2.5641, "step": 186240 }, { "epoch": 0.3710514152747673, "grad_norm": 0.16346393525600433, "learning_rate": 0.002, "loss": 2.5616, "step": 186250 }, { "epoch": 0.3710713374984062, "grad_norm": 0.16105222702026367, "learning_rate": 0.002, "loss": 2.556, "step": 186260 }, { "epoch": 0.37109125972204515, "grad_norm": 0.15925553441047668, "learning_rate": 0.002, "loss": 2.5622, "step": 186270 }, { "epoch": 0.37111118194568404, "grad_norm": 0.16477295756340027, "learning_rate": 0.002, "loss": 2.5595, "step": 186280 }, { "epoch": 0.371131104169323, "grad_norm": 0.18771933019161224, "learning_rate": 0.002, "loss": 2.586, "step": 186290 }, { "epoch": 0.3711510263929619, "grad_norm": 0.16371706128120422, "learning_rate": 0.002, "loss": 2.5554, "step": 186300 }, { "epoch": 0.37117094861660077, "grad_norm": 0.1559755653142929, "learning_rate": 0.002, "loss": 2.5683, "step": 186310 }, { "epoch": 0.3711908708402397, "grad_norm": 0.1846160888671875, "learning_rate": 0.002, "loss": 2.5552, "step": 186320 }, { "epoch": 0.3712107930638786, "grad_norm": 0.1923617571592331, "learning_rate": 0.002, "loss": 2.5661, "step": 186330 }, { "epoch": 0.37123071528751755, "grad_norm": 0.176063671708107, "learning_rate": 0.002, "loss": 2.5633, "step": 186340 }, { "epoch": 0.37125063751115644, "grad_norm": 0.1332884579896927, "learning_rate": 0.002, "loss": 2.5507, "step": 186350 }, { "epoch": 0.37127055973479534, "grad_norm": 0.15763889253139496, "learning_rate": 0.002, "loss": 2.5543, "step": 186360 }, { "epoch": 0.3712904819584343, "grad_norm": 0.15699675679206848, "learning_rate": 0.002, "loss": 2.5592, "step": 186370 }, { "epoch": 0.3713104041820732, "grad_norm": 0.16990946233272552, "learning_rate": 0.002, "loss": 2.5604, "step": 186380 }, { "epoch": 0.3713303264057121, "grad_norm": 0.16014859080314636, "learning_rate": 0.002, "loss": 2.5653, "step": 186390 }, { "epoch": 0.371350248629351, "grad_norm": 0.15860795974731445, "learning_rate": 0.002, "loss": 2.5831, "step": 186400 }, { "epoch": 0.3713701708529899, "grad_norm": 0.18131692707538605, "learning_rate": 0.002, "loss": 2.5765, "step": 186410 }, { "epoch": 0.37139009307662885, "grad_norm": 0.16829921305179596, "learning_rate": 0.002, "loss": 2.568, "step": 186420 }, { "epoch": 0.37141001530026774, "grad_norm": 0.162256121635437, "learning_rate": 0.002, "loss": 2.5726, "step": 186430 }, { "epoch": 0.3714299375239067, "grad_norm": 0.16326992213726044, "learning_rate": 0.002, "loss": 2.5572, "step": 186440 }, { "epoch": 0.3714498597475456, "grad_norm": 0.1393078863620758, "learning_rate": 0.002, "loss": 2.565, "step": 186450 }, { "epoch": 0.3714697819711845, "grad_norm": 0.2028130143880844, "learning_rate": 0.002, "loss": 2.5748, "step": 186460 }, { "epoch": 0.3714897041948234, "grad_norm": 0.2047542780637741, "learning_rate": 0.002, "loss": 2.5618, "step": 186470 }, { "epoch": 0.3715096264184623, "grad_norm": 0.15429139137268066, "learning_rate": 0.002, "loss": 2.5743, "step": 186480 }, { "epoch": 0.37152954864210125, "grad_norm": 0.14934848248958588, "learning_rate": 0.002, "loss": 2.5533, "step": 186490 }, { "epoch": 0.37154947086574014, "grad_norm": 0.16912618279457092, "learning_rate": 0.002, "loss": 2.5503, "step": 186500 }, { "epoch": 0.3715693930893791, "grad_norm": 0.16235174238681793, "learning_rate": 0.002, "loss": 2.5619, "step": 186510 }, { "epoch": 0.371589315313018, "grad_norm": 0.1877082884311676, "learning_rate": 0.002, "loss": 2.5723, "step": 186520 }, { "epoch": 0.37160923753665687, "grad_norm": 0.16028916835784912, "learning_rate": 0.002, "loss": 2.5518, "step": 186530 }, { "epoch": 0.3716291597602958, "grad_norm": 0.1406344324350357, "learning_rate": 0.002, "loss": 2.5515, "step": 186540 }, { "epoch": 0.3716490819839347, "grad_norm": 0.1670786291360855, "learning_rate": 0.002, "loss": 2.5637, "step": 186550 }, { "epoch": 0.37166900420757365, "grad_norm": 0.21781909465789795, "learning_rate": 0.002, "loss": 2.554, "step": 186560 }, { "epoch": 0.37168892643121254, "grad_norm": 0.16984732449054718, "learning_rate": 0.002, "loss": 2.5475, "step": 186570 }, { "epoch": 0.3717088486548515, "grad_norm": 0.18958529829978943, "learning_rate": 0.002, "loss": 2.5662, "step": 186580 }, { "epoch": 0.3717287708784904, "grad_norm": 0.17395786941051483, "learning_rate": 0.002, "loss": 2.5679, "step": 186590 }, { "epoch": 0.37174869310212927, "grad_norm": 0.1657400131225586, "learning_rate": 0.002, "loss": 2.5615, "step": 186600 }, { "epoch": 0.3717686153257682, "grad_norm": 0.17501991987228394, "learning_rate": 0.002, "loss": 2.5601, "step": 186610 }, { "epoch": 0.3717885375494071, "grad_norm": 0.17588001489639282, "learning_rate": 0.002, "loss": 2.5744, "step": 186620 }, { "epoch": 0.37180845977304605, "grad_norm": 0.16627661883831024, "learning_rate": 0.002, "loss": 2.5828, "step": 186630 }, { "epoch": 0.37182838199668494, "grad_norm": 0.13811267912387848, "learning_rate": 0.002, "loss": 2.5619, "step": 186640 }, { "epoch": 0.37184830422032383, "grad_norm": 0.16779956221580505, "learning_rate": 0.002, "loss": 2.5629, "step": 186650 }, { "epoch": 0.3718682264439628, "grad_norm": 0.17416948080062866, "learning_rate": 0.002, "loss": 2.5683, "step": 186660 }, { "epoch": 0.37188814866760167, "grad_norm": 0.1765154004096985, "learning_rate": 0.002, "loss": 2.5615, "step": 186670 }, { "epoch": 0.3719080708912406, "grad_norm": 0.16225239634513855, "learning_rate": 0.002, "loss": 2.5596, "step": 186680 }, { "epoch": 0.3719279931148795, "grad_norm": 0.17147792875766754, "learning_rate": 0.002, "loss": 2.5548, "step": 186690 }, { "epoch": 0.3719479153385184, "grad_norm": 0.1582738310098648, "learning_rate": 0.002, "loss": 2.5636, "step": 186700 }, { "epoch": 0.37196783756215734, "grad_norm": 0.1745816022157669, "learning_rate": 0.002, "loss": 2.5631, "step": 186710 }, { "epoch": 0.37198775978579623, "grad_norm": 0.15019336342811584, "learning_rate": 0.002, "loss": 2.5596, "step": 186720 }, { "epoch": 0.3720076820094352, "grad_norm": 0.18682938814163208, "learning_rate": 0.002, "loss": 2.5542, "step": 186730 }, { "epoch": 0.37202760423307407, "grad_norm": 0.16444332897663116, "learning_rate": 0.002, "loss": 2.5561, "step": 186740 }, { "epoch": 0.372047526456713, "grad_norm": 0.15888181328773499, "learning_rate": 0.002, "loss": 2.5671, "step": 186750 }, { "epoch": 0.3720674486803519, "grad_norm": 0.1825760006904602, "learning_rate": 0.002, "loss": 2.562, "step": 186760 }, { "epoch": 0.3720873709039908, "grad_norm": 0.14036312699317932, "learning_rate": 0.002, "loss": 2.5577, "step": 186770 }, { "epoch": 0.37210729312762975, "grad_norm": 0.18557807803153992, "learning_rate": 0.002, "loss": 2.5596, "step": 186780 }, { "epoch": 0.37212721535126864, "grad_norm": 0.21583618223667145, "learning_rate": 0.002, "loss": 2.5589, "step": 186790 }, { "epoch": 0.3721471375749076, "grad_norm": 0.17826823890209198, "learning_rate": 0.002, "loss": 2.5542, "step": 186800 }, { "epoch": 0.3721670597985465, "grad_norm": 0.1585446298122406, "learning_rate": 0.002, "loss": 2.5551, "step": 186810 }, { "epoch": 0.37218698202218536, "grad_norm": 0.14639949798583984, "learning_rate": 0.002, "loss": 2.5409, "step": 186820 }, { "epoch": 0.3722069042458243, "grad_norm": 0.18084579706192017, "learning_rate": 0.002, "loss": 2.5628, "step": 186830 }, { "epoch": 0.3722268264694632, "grad_norm": 0.12850339710712433, "learning_rate": 0.002, "loss": 2.5722, "step": 186840 }, { "epoch": 0.37224674869310215, "grad_norm": 0.17389731109142303, "learning_rate": 0.002, "loss": 2.5674, "step": 186850 }, { "epoch": 0.37226667091674104, "grad_norm": 0.15526674687862396, "learning_rate": 0.002, "loss": 2.5455, "step": 186860 }, { "epoch": 0.37228659314038, "grad_norm": 0.18128733336925507, "learning_rate": 0.002, "loss": 2.5507, "step": 186870 }, { "epoch": 0.3723065153640189, "grad_norm": 0.17560744285583496, "learning_rate": 0.002, "loss": 2.5597, "step": 186880 }, { "epoch": 0.37232643758765777, "grad_norm": 0.1927127242088318, "learning_rate": 0.002, "loss": 2.5648, "step": 186890 }, { "epoch": 0.3723463598112967, "grad_norm": 0.1785968840122223, "learning_rate": 0.002, "loss": 2.5627, "step": 186900 }, { "epoch": 0.3723662820349356, "grad_norm": 0.19992923736572266, "learning_rate": 0.002, "loss": 2.5619, "step": 186910 }, { "epoch": 0.37238620425857455, "grad_norm": 0.16610141098499298, "learning_rate": 0.002, "loss": 2.5528, "step": 186920 }, { "epoch": 0.37240612648221344, "grad_norm": 0.194230854511261, "learning_rate": 0.002, "loss": 2.5551, "step": 186930 }, { "epoch": 0.37242604870585233, "grad_norm": 0.1636398583650589, "learning_rate": 0.002, "loss": 2.5579, "step": 186940 }, { "epoch": 0.3724459709294913, "grad_norm": 0.16245806217193604, "learning_rate": 0.002, "loss": 2.5562, "step": 186950 }, { "epoch": 0.37246589315313017, "grad_norm": 0.1801283061504364, "learning_rate": 0.002, "loss": 2.5619, "step": 186960 }, { "epoch": 0.3724858153767691, "grad_norm": 0.16177862882614136, "learning_rate": 0.002, "loss": 2.5653, "step": 186970 }, { "epoch": 0.372505737600408, "grad_norm": 0.1984063982963562, "learning_rate": 0.002, "loss": 2.5661, "step": 186980 }, { "epoch": 0.3725256598240469, "grad_norm": 0.15643642842769623, "learning_rate": 0.002, "loss": 2.5521, "step": 186990 }, { "epoch": 0.37254558204768584, "grad_norm": 0.1571362465620041, "learning_rate": 0.002, "loss": 2.5629, "step": 187000 }, { "epoch": 0.37256550427132473, "grad_norm": 0.15845248103141785, "learning_rate": 0.002, "loss": 2.5673, "step": 187010 }, { "epoch": 0.3725854264949637, "grad_norm": 0.17496390640735626, "learning_rate": 0.002, "loss": 2.5422, "step": 187020 }, { "epoch": 0.37260534871860257, "grad_norm": 0.17578843235969543, "learning_rate": 0.002, "loss": 2.5474, "step": 187030 }, { "epoch": 0.3726252709422415, "grad_norm": 0.17108868062496185, "learning_rate": 0.002, "loss": 2.571, "step": 187040 }, { "epoch": 0.3726451931658804, "grad_norm": 0.146854966878891, "learning_rate": 0.002, "loss": 2.551, "step": 187050 }, { "epoch": 0.3726651153895193, "grad_norm": 0.18382805585861206, "learning_rate": 0.002, "loss": 2.5679, "step": 187060 }, { "epoch": 0.37268503761315824, "grad_norm": 0.13504649698734283, "learning_rate": 0.002, "loss": 2.5499, "step": 187070 }, { "epoch": 0.37270495983679713, "grad_norm": 0.17151835560798645, "learning_rate": 0.002, "loss": 2.5516, "step": 187080 }, { "epoch": 0.3727248820604361, "grad_norm": 0.16079081594944, "learning_rate": 0.002, "loss": 2.5722, "step": 187090 }, { "epoch": 0.37274480428407497, "grad_norm": 0.15726423263549805, "learning_rate": 0.002, "loss": 2.5573, "step": 187100 }, { "epoch": 0.37276472650771386, "grad_norm": 0.18555212020874023, "learning_rate": 0.002, "loss": 2.5453, "step": 187110 }, { "epoch": 0.3727846487313528, "grad_norm": 0.15918514132499695, "learning_rate": 0.002, "loss": 2.5525, "step": 187120 }, { "epoch": 0.3728045709549917, "grad_norm": 0.17141562700271606, "learning_rate": 0.002, "loss": 2.5692, "step": 187130 }, { "epoch": 0.37282449317863064, "grad_norm": 0.14792388677597046, "learning_rate": 0.002, "loss": 2.5669, "step": 187140 }, { "epoch": 0.37284441540226954, "grad_norm": 0.17869538068771362, "learning_rate": 0.002, "loss": 2.5397, "step": 187150 }, { "epoch": 0.3728643376259084, "grad_norm": 0.15840278565883636, "learning_rate": 0.002, "loss": 2.5486, "step": 187160 }, { "epoch": 0.3728842598495474, "grad_norm": 0.1659000962972641, "learning_rate": 0.002, "loss": 2.5728, "step": 187170 }, { "epoch": 0.37290418207318626, "grad_norm": 0.1629798412322998, "learning_rate": 0.002, "loss": 2.5665, "step": 187180 }, { "epoch": 0.3729241042968252, "grad_norm": 0.2099083811044693, "learning_rate": 0.002, "loss": 2.5661, "step": 187190 }, { "epoch": 0.3729440265204641, "grad_norm": 0.16320468485355377, "learning_rate": 0.002, "loss": 2.5584, "step": 187200 }, { "epoch": 0.37296394874410305, "grad_norm": 0.15070895850658417, "learning_rate": 0.002, "loss": 2.5565, "step": 187210 }, { "epoch": 0.37298387096774194, "grad_norm": 0.15346865355968475, "learning_rate": 0.002, "loss": 2.5604, "step": 187220 }, { "epoch": 0.37300379319138083, "grad_norm": 0.1642216295003891, "learning_rate": 0.002, "loss": 2.5547, "step": 187230 }, { "epoch": 0.3730237154150198, "grad_norm": 0.15473148226737976, "learning_rate": 0.002, "loss": 2.5587, "step": 187240 }, { "epoch": 0.37304363763865866, "grad_norm": 0.16055412590503693, "learning_rate": 0.002, "loss": 2.5664, "step": 187250 }, { "epoch": 0.3730635598622976, "grad_norm": 0.16474154591560364, "learning_rate": 0.002, "loss": 2.567, "step": 187260 }, { "epoch": 0.3730834820859365, "grad_norm": 0.150762677192688, "learning_rate": 0.002, "loss": 2.5646, "step": 187270 }, { "epoch": 0.3731034043095754, "grad_norm": 0.14991143345832825, "learning_rate": 0.002, "loss": 2.5673, "step": 187280 }, { "epoch": 0.37312332653321434, "grad_norm": 0.21015740931034088, "learning_rate": 0.002, "loss": 2.5708, "step": 187290 }, { "epoch": 0.37314324875685323, "grad_norm": 0.14948409795761108, "learning_rate": 0.002, "loss": 2.5605, "step": 187300 }, { "epoch": 0.3731631709804922, "grad_norm": 0.17412877082824707, "learning_rate": 0.002, "loss": 2.5545, "step": 187310 }, { "epoch": 0.37318309320413107, "grad_norm": 0.16732648015022278, "learning_rate": 0.002, "loss": 2.5696, "step": 187320 }, { "epoch": 0.37320301542777, "grad_norm": 0.1897709220647812, "learning_rate": 0.002, "loss": 2.5455, "step": 187330 }, { "epoch": 0.3732229376514089, "grad_norm": 0.14785532653331757, "learning_rate": 0.002, "loss": 2.5584, "step": 187340 }, { "epoch": 0.3732428598750478, "grad_norm": 0.14071761071681976, "learning_rate": 0.002, "loss": 2.5553, "step": 187350 }, { "epoch": 0.37326278209868674, "grad_norm": 0.16174781322479248, "learning_rate": 0.002, "loss": 2.564, "step": 187360 }, { "epoch": 0.37328270432232563, "grad_norm": 0.15063750743865967, "learning_rate": 0.002, "loss": 2.5617, "step": 187370 }, { "epoch": 0.3733026265459646, "grad_norm": 0.15038926899433136, "learning_rate": 0.002, "loss": 2.5462, "step": 187380 }, { "epoch": 0.37332254876960347, "grad_norm": 0.19091017544269562, "learning_rate": 0.002, "loss": 2.5535, "step": 187390 }, { "epoch": 0.37334247099324236, "grad_norm": 0.1657014638185501, "learning_rate": 0.002, "loss": 2.5654, "step": 187400 }, { "epoch": 0.3733623932168813, "grad_norm": 0.18725010752677917, "learning_rate": 0.002, "loss": 2.5685, "step": 187410 }, { "epoch": 0.3733823154405202, "grad_norm": 0.14620813727378845, "learning_rate": 0.002, "loss": 2.5433, "step": 187420 }, { "epoch": 0.37340223766415914, "grad_norm": 0.21275411546230316, "learning_rate": 0.002, "loss": 2.5558, "step": 187430 }, { "epoch": 0.37342215988779803, "grad_norm": 0.16581639647483826, "learning_rate": 0.002, "loss": 2.571, "step": 187440 }, { "epoch": 0.3734420821114369, "grad_norm": 0.14838917553424835, "learning_rate": 0.002, "loss": 2.5735, "step": 187450 }, { "epoch": 0.37346200433507587, "grad_norm": 0.15146467089653015, "learning_rate": 0.002, "loss": 2.5549, "step": 187460 }, { "epoch": 0.37348192655871476, "grad_norm": 0.18004918098449707, "learning_rate": 0.002, "loss": 2.5524, "step": 187470 }, { "epoch": 0.3735018487823537, "grad_norm": 0.18526433408260345, "learning_rate": 0.002, "loss": 2.5587, "step": 187480 }, { "epoch": 0.3735217710059926, "grad_norm": 0.15388959646224976, "learning_rate": 0.002, "loss": 2.5644, "step": 187490 }, { "epoch": 0.37354169322963154, "grad_norm": 0.1732819825410843, "learning_rate": 0.002, "loss": 2.5655, "step": 187500 }, { "epoch": 0.37356161545327043, "grad_norm": 0.18966218829154968, "learning_rate": 0.002, "loss": 2.5618, "step": 187510 }, { "epoch": 0.3735815376769093, "grad_norm": 0.17194412648677826, "learning_rate": 0.002, "loss": 2.5636, "step": 187520 }, { "epoch": 0.37360145990054827, "grad_norm": 0.16407416760921478, "learning_rate": 0.002, "loss": 2.55, "step": 187530 }, { "epoch": 0.37362138212418716, "grad_norm": 0.1687493771314621, "learning_rate": 0.002, "loss": 2.5668, "step": 187540 }, { "epoch": 0.3736413043478261, "grad_norm": 0.14733099937438965, "learning_rate": 0.002, "loss": 2.5667, "step": 187550 }, { "epoch": 0.373661226571465, "grad_norm": 0.19040422141551971, "learning_rate": 0.002, "loss": 2.5544, "step": 187560 }, { "epoch": 0.3736811487951039, "grad_norm": 0.17487101256847382, "learning_rate": 0.002, "loss": 2.5621, "step": 187570 }, { "epoch": 0.37370107101874284, "grad_norm": 0.18736347556114197, "learning_rate": 0.002, "loss": 2.5659, "step": 187580 }, { "epoch": 0.3737209932423817, "grad_norm": 0.16781798005104065, "learning_rate": 0.002, "loss": 2.5657, "step": 187590 }, { "epoch": 0.3737409154660207, "grad_norm": 0.15680243074893951, "learning_rate": 0.002, "loss": 2.5479, "step": 187600 }, { "epoch": 0.37376083768965956, "grad_norm": 0.1758233606815338, "learning_rate": 0.002, "loss": 2.5578, "step": 187610 }, { "epoch": 0.3737807599132985, "grad_norm": 0.15246306359767914, "learning_rate": 0.002, "loss": 2.5608, "step": 187620 }, { "epoch": 0.3738006821369374, "grad_norm": 0.1654765009880066, "learning_rate": 0.002, "loss": 2.5679, "step": 187630 }, { "epoch": 0.3738206043605763, "grad_norm": 0.17276765406131744, "learning_rate": 0.002, "loss": 2.5622, "step": 187640 }, { "epoch": 0.37384052658421524, "grad_norm": 0.15877015888690948, "learning_rate": 0.002, "loss": 2.5538, "step": 187650 }, { "epoch": 0.37386044880785413, "grad_norm": 0.14540037512779236, "learning_rate": 0.002, "loss": 2.5615, "step": 187660 }, { "epoch": 0.3738803710314931, "grad_norm": 0.1474667489528656, "learning_rate": 0.002, "loss": 2.5523, "step": 187670 }, { "epoch": 0.37390029325513197, "grad_norm": 0.16595986485481262, "learning_rate": 0.002, "loss": 2.5471, "step": 187680 }, { "epoch": 0.37392021547877086, "grad_norm": 0.16623660922050476, "learning_rate": 0.002, "loss": 2.5617, "step": 187690 }, { "epoch": 0.3739401377024098, "grad_norm": 0.17826826870441437, "learning_rate": 0.002, "loss": 2.5582, "step": 187700 }, { "epoch": 0.3739600599260487, "grad_norm": 0.13932940363883972, "learning_rate": 0.002, "loss": 2.5715, "step": 187710 }, { "epoch": 0.37397998214968764, "grad_norm": 0.15767060220241547, "learning_rate": 0.002, "loss": 2.5628, "step": 187720 }, { "epoch": 0.37399990437332653, "grad_norm": 0.1675458401441574, "learning_rate": 0.002, "loss": 2.5522, "step": 187730 }, { "epoch": 0.3740198265969654, "grad_norm": 0.18408195674419403, "learning_rate": 0.002, "loss": 2.5535, "step": 187740 }, { "epoch": 0.37403974882060437, "grad_norm": 0.14982318878173828, "learning_rate": 0.002, "loss": 2.5603, "step": 187750 }, { "epoch": 0.37405967104424326, "grad_norm": 0.17156802117824554, "learning_rate": 0.002, "loss": 2.5569, "step": 187760 }, { "epoch": 0.3740795932678822, "grad_norm": 0.19659386575222015, "learning_rate": 0.002, "loss": 2.556, "step": 187770 }, { "epoch": 0.3740995154915211, "grad_norm": 0.15860629081726074, "learning_rate": 0.002, "loss": 2.556, "step": 187780 }, { "epoch": 0.37411943771516004, "grad_norm": 0.16842392086982727, "learning_rate": 0.002, "loss": 2.5581, "step": 187790 }, { "epoch": 0.37413935993879893, "grad_norm": 0.20450459420681, "learning_rate": 0.002, "loss": 2.5613, "step": 187800 }, { "epoch": 0.3741592821624378, "grad_norm": 0.16839179396629333, "learning_rate": 0.002, "loss": 2.5562, "step": 187810 }, { "epoch": 0.37417920438607677, "grad_norm": 0.19366644322872162, "learning_rate": 0.002, "loss": 2.5608, "step": 187820 }, { "epoch": 0.37419912660971566, "grad_norm": 0.17536890506744385, "learning_rate": 0.002, "loss": 2.5705, "step": 187830 }, { "epoch": 0.3742190488333546, "grad_norm": 0.15655605494976044, "learning_rate": 0.002, "loss": 2.5561, "step": 187840 }, { "epoch": 0.3742389710569935, "grad_norm": 0.15619908273220062, "learning_rate": 0.002, "loss": 2.5605, "step": 187850 }, { "epoch": 0.3742588932806324, "grad_norm": 0.15172748267650604, "learning_rate": 0.002, "loss": 2.5493, "step": 187860 }, { "epoch": 0.37427881550427133, "grad_norm": 0.171345517039299, "learning_rate": 0.002, "loss": 2.5532, "step": 187870 }, { "epoch": 0.3742987377279102, "grad_norm": 0.1517019271850586, "learning_rate": 0.002, "loss": 2.5586, "step": 187880 }, { "epoch": 0.37431865995154917, "grad_norm": 0.15216661989688873, "learning_rate": 0.002, "loss": 2.5706, "step": 187890 }, { "epoch": 0.37433858217518806, "grad_norm": 0.20940734446048737, "learning_rate": 0.002, "loss": 2.565, "step": 187900 }, { "epoch": 0.37435850439882695, "grad_norm": 0.14272905886173248, "learning_rate": 0.002, "loss": 2.5666, "step": 187910 }, { "epoch": 0.3743784266224659, "grad_norm": 0.2212427854537964, "learning_rate": 0.002, "loss": 2.5677, "step": 187920 }, { "epoch": 0.3743983488461048, "grad_norm": 0.19564785063266754, "learning_rate": 0.002, "loss": 2.5608, "step": 187930 }, { "epoch": 0.37441827106974374, "grad_norm": 0.15958407521247864, "learning_rate": 0.002, "loss": 2.5575, "step": 187940 }, { "epoch": 0.3744381932933826, "grad_norm": 0.155436173081398, "learning_rate": 0.002, "loss": 2.5475, "step": 187950 }, { "epoch": 0.3744581155170216, "grad_norm": 0.21413685381412506, "learning_rate": 0.002, "loss": 2.5667, "step": 187960 }, { "epoch": 0.37447803774066046, "grad_norm": 0.1566641926765442, "learning_rate": 0.002, "loss": 2.5721, "step": 187970 }, { "epoch": 0.37449795996429935, "grad_norm": 0.158636212348938, "learning_rate": 0.002, "loss": 2.5655, "step": 187980 }, { "epoch": 0.3745178821879383, "grad_norm": 0.17660225927829742, "learning_rate": 0.002, "loss": 2.539, "step": 187990 }, { "epoch": 0.3745378044115772, "grad_norm": 0.16945873200893402, "learning_rate": 0.002, "loss": 2.568, "step": 188000 }, { "epoch": 0.37455772663521614, "grad_norm": 0.17215900123119354, "learning_rate": 0.002, "loss": 2.5546, "step": 188010 }, { "epoch": 0.37457764885885503, "grad_norm": 0.17640967667102814, "learning_rate": 0.002, "loss": 2.55, "step": 188020 }, { "epoch": 0.3745975710824939, "grad_norm": 0.17861180007457733, "learning_rate": 0.002, "loss": 2.5701, "step": 188030 }, { "epoch": 0.37461749330613286, "grad_norm": 0.18625076115131378, "learning_rate": 0.002, "loss": 2.559, "step": 188040 }, { "epoch": 0.37463741552977176, "grad_norm": 0.14714130759239197, "learning_rate": 0.002, "loss": 2.5482, "step": 188050 }, { "epoch": 0.3746573377534107, "grad_norm": 0.218602254986763, "learning_rate": 0.002, "loss": 2.5617, "step": 188060 }, { "epoch": 0.3746772599770496, "grad_norm": 0.14055991172790527, "learning_rate": 0.002, "loss": 2.5449, "step": 188070 }, { "epoch": 0.37469718220068854, "grad_norm": 0.18356963992118835, "learning_rate": 0.002, "loss": 2.5677, "step": 188080 }, { "epoch": 0.37471710442432743, "grad_norm": 0.16960686445236206, "learning_rate": 0.002, "loss": 2.5749, "step": 188090 }, { "epoch": 0.3747370266479663, "grad_norm": 0.17286010086536407, "learning_rate": 0.002, "loss": 2.5485, "step": 188100 }, { "epoch": 0.37475694887160527, "grad_norm": 0.1769038885831833, "learning_rate": 0.002, "loss": 2.5663, "step": 188110 }, { "epoch": 0.37477687109524416, "grad_norm": 0.21624909341335297, "learning_rate": 0.002, "loss": 2.5467, "step": 188120 }, { "epoch": 0.3747967933188831, "grad_norm": 0.17733469605445862, "learning_rate": 0.002, "loss": 2.5443, "step": 188130 }, { "epoch": 0.374816715542522, "grad_norm": 0.1691708266735077, "learning_rate": 0.002, "loss": 2.5547, "step": 188140 }, { "epoch": 0.3748366377661609, "grad_norm": 0.15717245638370514, "learning_rate": 0.002, "loss": 2.5612, "step": 188150 }, { "epoch": 0.37485655998979983, "grad_norm": 0.18359407782554626, "learning_rate": 0.002, "loss": 2.5507, "step": 188160 }, { "epoch": 0.3748764822134387, "grad_norm": 0.18347306549549103, "learning_rate": 0.002, "loss": 2.5409, "step": 188170 }, { "epoch": 0.37489640443707767, "grad_norm": 0.1736762672662735, "learning_rate": 0.002, "loss": 2.5518, "step": 188180 }, { "epoch": 0.37491632666071656, "grad_norm": 0.15475274622440338, "learning_rate": 0.002, "loss": 2.5504, "step": 188190 }, { "epoch": 0.37493624888435545, "grad_norm": 0.19932910799980164, "learning_rate": 0.002, "loss": 2.572, "step": 188200 }, { "epoch": 0.3749561711079944, "grad_norm": 0.1330323964357376, "learning_rate": 0.002, "loss": 2.5651, "step": 188210 }, { "epoch": 0.3749760933316333, "grad_norm": 0.18171417713165283, "learning_rate": 0.002, "loss": 2.5511, "step": 188220 }, { "epoch": 0.37499601555527223, "grad_norm": 0.14907436072826385, "learning_rate": 0.002, "loss": 2.5821, "step": 188230 }, { "epoch": 0.3750159377789111, "grad_norm": 0.16559840738773346, "learning_rate": 0.002, "loss": 2.5658, "step": 188240 }, { "epoch": 0.37503586000255007, "grad_norm": 0.18880324065685272, "learning_rate": 0.002, "loss": 2.5523, "step": 188250 }, { "epoch": 0.37505578222618896, "grad_norm": 0.15946637094020844, "learning_rate": 0.002, "loss": 2.5678, "step": 188260 }, { "epoch": 0.37507570444982785, "grad_norm": 0.17524121701717377, "learning_rate": 0.002, "loss": 2.5756, "step": 188270 }, { "epoch": 0.3750956266734668, "grad_norm": 0.16549479961395264, "learning_rate": 0.002, "loss": 2.5573, "step": 188280 }, { "epoch": 0.3751155488971057, "grad_norm": 0.18287251889705658, "learning_rate": 0.002, "loss": 2.5631, "step": 188290 }, { "epoch": 0.37513547112074463, "grad_norm": 0.16552360355854034, "learning_rate": 0.002, "loss": 2.5674, "step": 188300 }, { "epoch": 0.3751553933443835, "grad_norm": 0.16337546706199646, "learning_rate": 0.002, "loss": 2.5579, "step": 188310 }, { "epoch": 0.3751753155680224, "grad_norm": 0.15189822018146515, "learning_rate": 0.002, "loss": 2.5508, "step": 188320 }, { "epoch": 0.37519523779166136, "grad_norm": 0.21514365077018738, "learning_rate": 0.002, "loss": 2.5606, "step": 188330 }, { "epoch": 0.37521516001530025, "grad_norm": 0.16894569993019104, "learning_rate": 0.002, "loss": 2.5646, "step": 188340 }, { "epoch": 0.3752350822389392, "grad_norm": 0.13807177543640137, "learning_rate": 0.002, "loss": 2.5545, "step": 188350 }, { "epoch": 0.3752550044625781, "grad_norm": 0.1501394361257553, "learning_rate": 0.002, "loss": 2.5523, "step": 188360 }, { "epoch": 0.37527492668621704, "grad_norm": 0.17850814759731293, "learning_rate": 0.002, "loss": 2.5734, "step": 188370 }, { "epoch": 0.3752948489098559, "grad_norm": 0.1502048522233963, "learning_rate": 0.002, "loss": 2.5715, "step": 188380 }, { "epoch": 0.3753147711334948, "grad_norm": 0.15127910673618317, "learning_rate": 0.002, "loss": 2.5603, "step": 188390 }, { "epoch": 0.37533469335713376, "grad_norm": 0.16114650666713715, "learning_rate": 0.002, "loss": 2.5613, "step": 188400 }, { "epoch": 0.37535461558077265, "grad_norm": 0.20942844450473785, "learning_rate": 0.002, "loss": 2.5702, "step": 188410 }, { "epoch": 0.3753745378044116, "grad_norm": 0.19844529032707214, "learning_rate": 0.002, "loss": 2.5735, "step": 188420 }, { "epoch": 0.3753944600280505, "grad_norm": 0.1488158106803894, "learning_rate": 0.002, "loss": 2.5589, "step": 188430 }, { "epoch": 0.3754143822516894, "grad_norm": 0.16002319753170013, "learning_rate": 0.002, "loss": 2.5506, "step": 188440 }, { "epoch": 0.37543430447532833, "grad_norm": 0.17802020907402039, "learning_rate": 0.002, "loss": 2.5669, "step": 188450 }, { "epoch": 0.3754542266989672, "grad_norm": 0.15813839435577393, "learning_rate": 0.002, "loss": 2.5546, "step": 188460 }, { "epoch": 0.37547414892260617, "grad_norm": 0.1511310338973999, "learning_rate": 0.002, "loss": 2.5638, "step": 188470 }, { "epoch": 0.37549407114624506, "grad_norm": 0.16201601922512054, "learning_rate": 0.002, "loss": 2.5516, "step": 188480 }, { "epoch": 0.37551399336988395, "grad_norm": 0.1634806990623474, "learning_rate": 0.002, "loss": 2.5613, "step": 188490 }, { "epoch": 0.3755339155935229, "grad_norm": 0.16890043020248413, "learning_rate": 0.002, "loss": 2.5475, "step": 188500 }, { "epoch": 0.3755538378171618, "grad_norm": 0.16815578937530518, "learning_rate": 0.002, "loss": 2.564, "step": 188510 }, { "epoch": 0.37557376004080073, "grad_norm": 0.16472941637039185, "learning_rate": 0.002, "loss": 2.5589, "step": 188520 }, { "epoch": 0.3755936822644396, "grad_norm": 0.16527125239372253, "learning_rate": 0.002, "loss": 2.5722, "step": 188530 }, { "epoch": 0.37561360448807857, "grad_norm": 0.15387102961540222, "learning_rate": 0.002, "loss": 2.5446, "step": 188540 }, { "epoch": 0.37563352671171746, "grad_norm": 0.2066163867712021, "learning_rate": 0.002, "loss": 2.5709, "step": 188550 }, { "epoch": 0.37565344893535635, "grad_norm": 0.18103627860546112, "learning_rate": 0.002, "loss": 2.5635, "step": 188560 }, { "epoch": 0.3756733711589953, "grad_norm": 0.2103787064552307, "learning_rate": 0.002, "loss": 2.576, "step": 188570 }, { "epoch": 0.3756932933826342, "grad_norm": 0.15510998666286469, "learning_rate": 0.002, "loss": 2.5578, "step": 188580 }, { "epoch": 0.37571321560627313, "grad_norm": 0.17188702523708344, "learning_rate": 0.002, "loss": 2.5689, "step": 188590 }, { "epoch": 0.375733137829912, "grad_norm": 0.15699167549610138, "learning_rate": 0.002, "loss": 2.5667, "step": 188600 }, { "epoch": 0.3757530600535509, "grad_norm": 0.1696908175945282, "learning_rate": 0.002, "loss": 2.564, "step": 188610 }, { "epoch": 0.37577298227718986, "grad_norm": 0.1857919842004776, "learning_rate": 0.002, "loss": 2.5559, "step": 188620 }, { "epoch": 0.37579290450082875, "grad_norm": 0.16887886822223663, "learning_rate": 0.002, "loss": 2.5471, "step": 188630 }, { "epoch": 0.3758128267244677, "grad_norm": 0.1749476194381714, "learning_rate": 0.002, "loss": 2.5629, "step": 188640 }, { "epoch": 0.3758327489481066, "grad_norm": 0.17542998492717743, "learning_rate": 0.002, "loss": 2.5784, "step": 188650 }, { "epoch": 0.3758526711717455, "grad_norm": 0.15654496848583221, "learning_rate": 0.002, "loss": 2.5557, "step": 188660 }, { "epoch": 0.3758725933953844, "grad_norm": 0.15679042041301727, "learning_rate": 0.002, "loss": 2.5533, "step": 188670 }, { "epoch": 0.3758925156190233, "grad_norm": 0.21080192923545837, "learning_rate": 0.002, "loss": 2.5651, "step": 188680 }, { "epoch": 0.37591243784266226, "grad_norm": 0.18475665152072906, "learning_rate": 0.002, "loss": 2.5576, "step": 188690 }, { "epoch": 0.37593236006630115, "grad_norm": 0.16113506257534027, "learning_rate": 0.002, "loss": 2.5654, "step": 188700 }, { "epoch": 0.3759522822899401, "grad_norm": 0.14279890060424805, "learning_rate": 0.002, "loss": 2.5571, "step": 188710 }, { "epoch": 0.375972204513579, "grad_norm": 0.16883395612239838, "learning_rate": 0.002, "loss": 2.5588, "step": 188720 }, { "epoch": 0.3759921267372179, "grad_norm": 0.1527751386165619, "learning_rate": 0.002, "loss": 2.562, "step": 188730 }, { "epoch": 0.3760120489608568, "grad_norm": 0.1501077115535736, "learning_rate": 0.002, "loss": 2.5591, "step": 188740 }, { "epoch": 0.3760319711844957, "grad_norm": 0.16567420959472656, "learning_rate": 0.002, "loss": 2.5578, "step": 188750 }, { "epoch": 0.37605189340813466, "grad_norm": 0.19036176800727844, "learning_rate": 0.002, "loss": 2.5555, "step": 188760 }, { "epoch": 0.37607181563177355, "grad_norm": 0.1806315779685974, "learning_rate": 0.002, "loss": 2.5784, "step": 188770 }, { "epoch": 0.37609173785541244, "grad_norm": 0.17173539102077484, "learning_rate": 0.002, "loss": 2.5485, "step": 188780 }, { "epoch": 0.3761116600790514, "grad_norm": 0.1438584178686142, "learning_rate": 0.002, "loss": 2.565, "step": 188790 }, { "epoch": 0.3761315823026903, "grad_norm": 0.16029348969459534, "learning_rate": 0.002, "loss": 2.5374, "step": 188800 }, { "epoch": 0.37615150452632923, "grad_norm": 0.19126088917255402, "learning_rate": 0.002, "loss": 2.5651, "step": 188810 }, { "epoch": 0.3761714267499681, "grad_norm": 0.14673644304275513, "learning_rate": 0.002, "loss": 2.5639, "step": 188820 }, { "epoch": 0.37619134897360706, "grad_norm": 0.15611955523490906, "learning_rate": 0.002, "loss": 2.5642, "step": 188830 }, { "epoch": 0.37621127119724596, "grad_norm": 0.15387646853923798, "learning_rate": 0.002, "loss": 2.5437, "step": 188840 }, { "epoch": 0.37623119342088485, "grad_norm": 0.17499640583992004, "learning_rate": 0.002, "loss": 2.5517, "step": 188850 }, { "epoch": 0.3762511156445238, "grad_norm": 0.16694499552249908, "learning_rate": 0.002, "loss": 2.5497, "step": 188860 }, { "epoch": 0.3762710378681627, "grad_norm": 0.15041452646255493, "learning_rate": 0.002, "loss": 2.55, "step": 188870 }, { "epoch": 0.37629096009180163, "grad_norm": 0.18526464700698853, "learning_rate": 0.002, "loss": 2.5714, "step": 188880 }, { "epoch": 0.3763108823154405, "grad_norm": 0.13632316887378693, "learning_rate": 0.002, "loss": 2.5735, "step": 188890 }, { "epoch": 0.3763308045390794, "grad_norm": 0.1746416687965393, "learning_rate": 0.002, "loss": 2.552, "step": 188900 }, { "epoch": 0.37635072676271836, "grad_norm": 0.16463951766490936, "learning_rate": 0.002, "loss": 2.5554, "step": 188910 }, { "epoch": 0.37637064898635725, "grad_norm": 0.16311335563659668, "learning_rate": 0.002, "loss": 2.557, "step": 188920 }, { "epoch": 0.3763905712099962, "grad_norm": 0.16084374487400055, "learning_rate": 0.002, "loss": 2.5588, "step": 188930 }, { "epoch": 0.3764104934336351, "grad_norm": 0.16957159340381622, "learning_rate": 0.002, "loss": 2.5598, "step": 188940 }, { "epoch": 0.376430415657274, "grad_norm": 0.18082907795906067, "learning_rate": 0.002, "loss": 2.5576, "step": 188950 }, { "epoch": 0.3764503378809129, "grad_norm": 0.1419457495212555, "learning_rate": 0.002, "loss": 2.5673, "step": 188960 }, { "epoch": 0.3764702601045518, "grad_norm": 0.17407017946243286, "learning_rate": 0.002, "loss": 2.5622, "step": 188970 }, { "epoch": 0.37649018232819076, "grad_norm": 0.1638975292444229, "learning_rate": 0.002, "loss": 2.5585, "step": 188980 }, { "epoch": 0.37651010455182965, "grad_norm": 0.2016851007938385, "learning_rate": 0.002, "loss": 2.5757, "step": 188990 }, { "epoch": 0.3765300267754686, "grad_norm": 0.19852474331855774, "learning_rate": 0.002, "loss": 2.5617, "step": 189000 }, { "epoch": 0.3765499489991075, "grad_norm": 0.16654685139656067, "learning_rate": 0.002, "loss": 2.5662, "step": 189010 }, { "epoch": 0.3765698712227464, "grad_norm": 0.1477799266576767, "learning_rate": 0.002, "loss": 2.5597, "step": 189020 }, { "epoch": 0.3765897934463853, "grad_norm": 0.1653004139661789, "learning_rate": 0.002, "loss": 2.5562, "step": 189030 }, { "epoch": 0.3766097156700242, "grad_norm": 0.16662508249282837, "learning_rate": 0.002, "loss": 2.5642, "step": 189040 }, { "epoch": 0.37662963789366316, "grad_norm": 0.15597234666347504, "learning_rate": 0.002, "loss": 2.5543, "step": 189050 }, { "epoch": 0.37664956011730205, "grad_norm": 0.1533946990966797, "learning_rate": 0.002, "loss": 2.5575, "step": 189060 }, { "epoch": 0.37666948234094094, "grad_norm": 0.15933705866336823, "learning_rate": 0.002, "loss": 2.5542, "step": 189070 }, { "epoch": 0.3766894045645799, "grad_norm": 0.14948692917823792, "learning_rate": 0.002, "loss": 2.5722, "step": 189080 }, { "epoch": 0.3767093267882188, "grad_norm": 0.15046611428260803, "learning_rate": 0.002, "loss": 2.5756, "step": 189090 }, { "epoch": 0.3767292490118577, "grad_norm": 0.18992552161216736, "learning_rate": 0.002, "loss": 2.5614, "step": 189100 }, { "epoch": 0.3767491712354966, "grad_norm": 0.18246257305145264, "learning_rate": 0.002, "loss": 2.567, "step": 189110 }, { "epoch": 0.37676909345913556, "grad_norm": 0.168402299284935, "learning_rate": 0.002, "loss": 2.5703, "step": 189120 }, { "epoch": 0.37678901568277445, "grad_norm": 0.1444823145866394, "learning_rate": 0.002, "loss": 2.5597, "step": 189130 }, { "epoch": 0.37680893790641334, "grad_norm": 0.2002536803483963, "learning_rate": 0.002, "loss": 2.5713, "step": 189140 }, { "epoch": 0.3768288601300523, "grad_norm": 0.15851642191410065, "learning_rate": 0.002, "loss": 2.5579, "step": 189150 }, { "epoch": 0.3768487823536912, "grad_norm": 0.22023877501487732, "learning_rate": 0.002, "loss": 2.56, "step": 189160 }, { "epoch": 0.3768687045773301, "grad_norm": 0.17728593945503235, "learning_rate": 0.002, "loss": 2.5526, "step": 189170 }, { "epoch": 0.376888626800969, "grad_norm": 0.1688718944787979, "learning_rate": 0.002, "loss": 2.5552, "step": 189180 }, { "epoch": 0.3769085490246079, "grad_norm": 0.17555469274520874, "learning_rate": 0.002, "loss": 2.5519, "step": 189190 }, { "epoch": 0.37692847124824685, "grad_norm": 0.16379140317440033, "learning_rate": 0.002, "loss": 2.5579, "step": 189200 }, { "epoch": 0.37694839347188575, "grad_norm": 0.16583995521068573, "learning_rate": 0.002, "loss": 2.5696, "step": 189210 }, { "epoch": 0.3769683156955247, "grad_norm": 0.1727524846792221, "learning_rate": 0.002, "loss": 2.5538, "step": 189220 }, { "epoch": 0.3769882379191636, "grad_norm": 0.1633220911026001, "learning_rate": 0.002, "loss": 2.5611, "step": 189230 }, { "epoch": 0.3770081601428025, "grad_norm": 0.15316994488239288, "learning_rate": 0.002, "loss": 2.5676, "step": 189240 }, { "epoch": 0.3770280823664414, "grad_norm": 0.1504000723361969, "learning_rate": 0.002, "loss": 2.5775, "step": 189250 }, { "epoch": 0.3770480045900803, "grad_norm": 0.19160105288028717, "learning_rate": 0.002, "loss": 2.5747, "step": 189260 }, { "epoch": 0.37706792681371926, "grad_norm": 0.18338993191719055, "learning_rate": 0.002, "loss": 2.5555, "step": 189270 }, { "epoch": 0.37708784903735815, "grad_norm": 0.15688423812389374, "learning_rate": 0.002, "loss": 2.5687, "step": 189280 }, { "epoch": 0.3771077712609971, "grad_norm": 0.14065688848495483, "learning_rate": 0.002, "loss": 2.5534, "step": 189290 }, { "epoch": 0.377127693484636, "grad_norm": 0.2130192220211029, "learning_rate": 0.002, "loss": 2.5641, "step": 189300 }, { "epoch": 0.3771476157082749, "grad_norm": 0.15729281306266785, "learning_rate": 0.002, "loss": 2.5651, "step": 189310 }, { "epoch": 0.3771675379319138, "grad_norm": 0.13647201657295227, "learning_rate": 0.002, "loss": 2.5545, "step": 189320 }, { "epoch": 0.3771874601555527, "grad_norm": 0.19029797613620758, "learning_rate": 0.002, "loss": 2.5546, "step": 189330 }, { "epoch": 0.37720738237919166, "grad_norm": 0.1964830458164215, "learning_rate": 0.002, "loss": 2.5429, "step": 189340 }, { "epoch": 0.37722730460283055, "grad_norm": 0.14735455811023712, "learning_rate": 0.002, "loss": 2.5707, "step": 189350 }, { "epoch": 0.37724722682646944, "grad_norm": 0.18563532829284668, "learning_rate": 0.002, "loss": 2.5583, "step": 189360 }, { "epoch": 0.3772671490501084, "grad_norm": 0.16391220688819885, "learning_rate": 0.002, "loss": 2.5668, "step": 189370 }, { "epoch": 0.3772870712737473, "grad_norm": 0.14223754405975342, "learning_rate": 0.002, "loss": 2.5491, "step": 189380 }, { "epoch": 0.3773069934973862, "grad_norm": 0.15073758363723755, "learning_rate": 0.002, "loss": 2.5517, "step": 189390 }, { "epoch": 0.3773269157210251, "grad_norm": 0.19706706702709198, "learning_rate": 0.002, "loss": 2.5556, "step": 189400 }, { "epoch": 0.377346837944664, "grad_norm": 0.17314864695072174, "learning_rate": 0.002, "loss": 2.5588, "step": 189410 }, { "epoch": 0.37736676016830295, "grad_norm": 0.14992910623550415, "learning_rate": 0.002, "loss": 2.5558, "step": 189420 }, { "epoch": 0.37738668239194184, "grad_norm": 0.17128798365592957, "learning_rate": 0.002, "loss": 2.5497, "step": 189430 }, { "epoch": 0.3774066046155808, "grad_norm": 0.1792306900024414, "learning_rate": 0.002, "loss": 2.5465, "step": 189440 }, { "epoch": 0.3774265268392197, "grad_norm": 0.19350667297840118, "learning_rate": 0.002, "loss": 2.5531, "step": 189450 }, { "epoch": 0.3774464490628586, "grad_norm": 0.17802399396896362, "learning_rate": 0.002, "loss": 2.5623, "step": 189460 }, { "epoch": 0.3774663712864975, "grad_norm": 0.14387771487236023, "learning_rate": 0.002, "loss": 2.5736, "step": 189470 }, { "epoch": 0.3774862935101364, "grad_norm": 0.18919579684734344, "learning_rate": 0.002, "loss": 2.5729, "step": 189480 }, { "epoch": 0.37750621573377535, "grad_norm": 0.17966994643211365, "learning_rate": 0.002, "loss": 2.5487, "step": 189490 }, { "epoch": 0.37752613795741424, "grad_norm": 0.1422855257987976, "learning_rate": 0.002, "loss": 2.5506, "step": 189500 }, { "epoch": 0.3775460601810532, "grad_norm": 0.1641935408115387, "learning_rate": 0.002, "loss": 2.5638, "step": 189510 }, { "epoch": 0.3775659824046921, "grad_norm": 0.1669338345527649, "learning_rate": 0.002, "loss": 2.559, "step": 189520 }, { "epoch": 0.37758590462833097, "grad_norm": 0.18274831771850586, "learning_rate": 0.002, "loss": 2.5621, "step": 189530 }, { "epoch": 0.3776058268519699, "grad_norm": 0.17544735968112946, "learning_rate": 0.002, "loss": 2.5563, "step": 189540 }, { "epoch": 0.3776257490756088, "grad_norm": 0.144264817237854, "learning_rate": 0.002, "loss": 2.5526, "step": 189550 }, { "epoch": 0.37764567129924775, "grad_norm": 0.1968582719564438, "learning_rate": 0.002, "loss": 2.561, "step": 189560 }, { "epoch": 0.37766559352288664, "grad_norm": 0.1499139964580536, "learning_rate": 0.002, "loss": 2.5678, "step": 189570 }, { "epoch": 0.3776855157465256, "grad_norm": 0.1946176439523697, "learning_rate": 0.002, "loss": 2.5606, "step": 189580 }, { "epoch": 0.3777054379701645, "grad_norm": 0.1409326195716858, "learning_rate": 0.002, "loss": 2.5595, "step": 189590 }, { "epoch": 0.37772536019380337, "grad_norm": 0.15544584393501282, "learning_rate": 0.002, "loss": 2.5567, "step": 189600 }, { "epoch": 0.3777452824174423, "grad_norm": 0.15106788277626038, "learning_rate": 0.002, "loss": 2.5426, "step": 189610 }, { "epoch": 0.3777652046410812, "grad_norm": 0.15870144963264465, "learning_rate": 0.002, "loss": 2.5692, "step": 189620 }, { "epoch": 0.37778512686472016, "grad_norm": 0.15889303386211395, "learning_rate": 0.002, "loss": 2.5685, "step": 189630 }, { "epoch": 0.37780504908835905, "grad_norm": 0.16346292197704315, "learning_rate": 0.002, "loss": 2.5652, "step": 189640 }, { "epoch": 0.37782497131199794, "grad_norm": 0.17433051764965057, "learning_rate": 0.002, "loss": 2.5538, "step": 189650 }, { "epoch": 0.3778448935356369, "grad_norm": 0.19612272083759308, "learning_rate": 0.002, "loss": 2.5604, "step": 189660 }, { "epoch": 0.3778648157592758, "grad_norm": 0.16671085357666016, "learning_rate": 0.002, "loss": 2.5542, "step": 189670 }, { "epoch": 0.3778847379829147, "grad_norm": 0.1632964313030243, "learning_rate": 0.002, "loss": 2.562, "step": 189680 }, { "epoch": 0.3779046602065536, "grad_norm": 0.16009046137332916, "learning_rate": 0.002, "loss": 2.5568, "step": 189690 }, { "epoch": 0.3779245824301925, "grad_norm": 0.19134825468063354, "learning_rate": 0.002, "loss": 2.5668, "step": 189700 }, { "epoch": 0.37794450465383145, "grad_norm": 0.16802051663398743, "learning_rate": 0.002, "loss": 2.5672, "step": 189710 }, { "epoch": 0.37796442687747034, "grad_norm": 0.19886617362499237, "learning_rate": 0.002, "loss": 2.5636, "step": 189720 }, { "epoch": 0.3779843491011093, "grad_norm": 0.1627701222896576, "learning_rate": 0.002, "loss": 2.5552, "step": 189730 }, { "epoch": 0.3780042713247482, "grad_norm": 0.17305903136730194, "learning_rate": 0.002, "loss": 2.5718, "step": 189740 }, { "epoch": 0.3780241935483871, "grad_norm": 0.18560859560966492, "learning_rate": 0.002, "loss": 2.5748, "step": 189750 }, { "epoch": 0.378044115772026, "grad_norm": 0.15976786613464355, "learning_rate": 0.002, "loss": 2.5591, "step": 189760 }, { "epoch": 0.3780640379956649, "grad_norm": 0.17049331963062286, "learning_rate": 0.002, "loss": 2.5615, "step": 189770 }, { "epoch": 0.37808396021930385, "grad_norm": 0.14500004053115845, "learning_rate": 0.002, "loss": 2.5515, "step": 189780 }, { "epoch": 0.37810388244294274, "grad_norm": 0.18839293718338013, "learning_rate": 0.002, "loss": 2.5547, "step": 189790 }, { "epoch": 0.3781238046665817, "grad_norm": 0.17087015509605408, "learning_rate": 0.002, "loss": 2.556, "step": 189800 }, { "epoch": 0.3781437268902206, "grad_norm": 0.13686054944992065, "learning_rate": 0.002, "loss": 2.5586, "step": 189810 }, { "epoch": 0.37816364911385947, "grad_norm": 0.15840141475200653, "learning_rate": 0.002, "loss": 2.5693, "step": 189820 }, { "epoch": 0.3781835713374984, "grad_norm": 0.18033233284950256, "learning_rate": 0.002, "loss": 2.5692, "step": 189830 }, { "epoch": 0.3782034935611373, "grad_norm": 0.19073086977005005, "learning_rate": 0.002, "loss": 2.5645, "step": 189840 }, { "epoch": 0.37822341578477625, "grad_norm": 0.19516894221305847, "learning_rate": 0.002, "loss": 2.5523, "step": 189850 }, { "epoch": 0.37824333800841514, "grad_norm": 0.16017770767211914, "learning_rate": 0.002, "loss": 2.5668, "step": 189860 }, { "epoch": 0.3782632602320541, "grad_norm": 0.17028547823429108, "learning_rate": 0.002, "loss": 2.565, "step": 189870 }, { "epoch": 0.378283182455693, "grad_norm": 0.18242578208446503, "learning_rate": 0.002, "loss": 2.566, "step": 189880 }, { "epoch": 0.37830310467933187, "grad_norm": 0.18464022874832153, "learning_rate": 0.002, "loss": 2.5573, "step": 189890 }, { "epoch": 0.3783230269029708, "grad_norm": 0.1750504970550537, "learning_rate": 0.002, "loss": 2.5619, "step": 189900 }, { "epoch": 0.3783429491266097, "grad_norm": 0.19381117820739746, "learning_rate": 0.002, "loss": 2.5388, "step": 189910 }, { "epoch": 0.37836287135024865, "grad_norm": 0.19578269124031067, "learning_rate": 0.002, "loss": 2.582, "step": 189920 }, { "epoch": 0.37838279357388754, "grad_norm": 0.17283669114112854, "learning_rate": 0.002, "loss": 2.5534, "step": 189930 }, { "epoch": 0.37840271579752643, "grad_norm": 0.18108713626861572, "learning_rate": 0.002, "loss": 2.5677, "step": 189940 }, { "epoch": 0.3784226380211654, "grad_norm": 0.16774146258831024, "learning_rate": 0.002, "loss": 2.5591, "step": 189950 }, { "epoch": 0.37844256024480427, "grad_norm": 0.17340955138206482, "learning_rate": 0.002, "loss": 2.5518, "step": 189960 }, { "epoch": 0.3784624824684432, "grad_norm": 0.17600364983081818, "learning_rate": 0.002, "loss": 2.5443, "step": 189970 }, { "epoch": 0.3784824046920821, "grad_norm": 0.16074754297733307, "learning_rate": 0.002, "loss": 2.571, "step": 189980 }, { "epoch": 0.378502326915721, "grad_norm": 0.15786878764629364, "learning_rate": 0.002, "loss": 2.5603, "step": 189990 }, { "epoch": 0.37852224913935995, "grad_norm": 0.1945749670267105, "learning_rate": 0.002, "loss": 2.5559, "step": 190000 }, { "epoch": 0.37854217136299884, "grad_norm": 0.16793715953826904, "learning_rate": 0.002, "loss": 2.5622, "step": 190010 }, { "epoch": 0.3785620935866378, "grad_norm": 0.17639559507369995, "learning_rate": 0.002, "loss": 2.5591, "step": 190020 }, { "epoch": 0.3785820158102767, "grad_norm": 0.15499480068683624, "learning_rate": 0.002, "loss": 2.5607, "step": 190030 }, { "epoch": 0.3786019380339156, "grad_norm": 0.17178505659103394, "learning_rate": 0.002, "loss": 2.5635, "step": 190040 }, { "epoch": 0.3786218602575545, "grad_norm": 0.1901884526014328, "learning_rate": 0.002, "loss": 2.5596, "step": 190050 }, { "epoch": 0.3786417824811934, "grad_norm": 0.18062956631183624, "learning_rate": 0.002, "loss": 2.5558, "step": 190060 }, { "epoch": 0.37866170470483235, "grad_norm": 0.1651521474123001, "learning_rate": 0.002, "loss": 2.5584, "step": 190070 }, { "epoch": 0.37868162692847124, "grad_norm": 0.22788481414318085, "learning_rate": 0.002, "loss": 2.5606, "step": 190080 }, { "epoch": 0.3787015491521102, "grad_norm": 0.16742953658103943, "learning_rate": 0.002, "loss": 2.5638, "step": 190090 }, { "epoch": 0.3787214713757491, "grad_norm": 0.16416490077972412, "learning_rate": 0.002, "loss": 2.5563, "step": 190100 }, { "epoch": 0.37874139359938797, "grad_norm": 0.18593861162662506, "learning_rate": 0.002, "loss": 2.5451, "step": 190110 }, { "epoch": 0.3787613158230269, "grad_norm": 0.22818933427333832, "learning_rate": 0.002, "loss": 2.557, "step": 190120 }, { "epoch": 0.3787812380466658, "grad_norm": 0.14777438342571259, "learning_rate": 0.002, "loss": 2.5642, "step": 190130 }, { "epoch": 0.37880116027030475, "grad_norm": 0.16715732216835022, "learning_rate": 0.002, "loss": 2.5353, "step": 190140 }, { "epoch": 0.37882108249394364, "grad_norm": 0.1622638702392578, "learning_rate": 0.002, "loss": 2.5579, "step": 190150 }, { "epoch": 0.37884100471758253, "grad_norm": 0.15796442329883575, "learning_rate": 0.002, "loss": 2.5674, "step": 190160 }, { "epoch": 0.3788609269412215, "grad_norm": 0.17809970676898956, "learning_rate": 0.002, "loss": 2.5624, "step": 190170 }, { "epoch": 0.37888084916486037, "grad_norm": 0.16705799102783203, "learning_rate": 0.002, "loss": 2.5498, "step": 190180 }, { "epoch": 0.3789007713884993, "grad_norm": 0.16877982020378113, "learning_rate": 0.002, "loss": 2.5472, "step": 190190 }, { "epoch": 0.3789206936121382, "grad_norm": 0.17577338218688965, "learning_rate": 0.002, "loss": 2.5679, "step": 190200 }, { "epoch": 0.37894061583577715, "grad_norm": 0.1602669656276703, "learning_rate": 0.002, "loss": 2.5535, "step": 190210 }, { "epoch": 0.37896053805941604, "grad_norm": 0.15470708906650543, "learning_rate": 0.002, "loss": 2.5687, "step": 190220 }, { "epoch": 0.37898046028305493, "grad_norm": 0.16151162981987, "learning_rate": 0.002, "loss": 2.5579, "step": 190230 }, { "epoch": 0.3790003825066939, "grad_norm": 0.1678270697593689, "learning_rate": 0.002, "loss": 2.5554, "step": 190240 }, { "epoch": 0.37902030473033277, "grad_norm": 0.15190917253494263, "learning_rate": 0.002, "loss": 2.554, "step": 190250 }, { "epoch": 0.3790402269539717, "grad_norm": 0.17373217642307281, "learning_rate": 0.002, "loss": 2.5538, "step": 190260 }, { "epoch": 0.3790601491776106, "grad_norm": 0.18373151123523712, "learning_rate": 0.002, "loss": 2.5658, "step": 190270 }, { "epoch": 0.3790800714012495, "grad_norm": 0.18712669610977173, "learning_rate": 0.002, "loss": 2.5639, "step": 190280 }, { "epoch": 0.37909999362488844, "grad_norm": 0.18608228862285614, "learning_rate": 0.002, "loss": 2.566, "step": 190290 }, { "epoch": 0.37911991584852733, "grad_norm": 0.16817690432071686, "learning_rate": 0.002, "loss": 2.5477, "step": 190300 }, { "epoch": 0.3791398380721663, "grad_norm": 0.14957775175571442, "learning_rate": 0.002, "loss": 2.5704, "step": 190310 }, { "epoch": 0.37915976029580517, "grad_norm": 0.16935515403747559, "learning_rate": 0.002, "loss": 2.5647, "step": 190320 }, { "epoch": 0.3791796825194441, "grad_norm": 0.16774660348892212, "learning_rate": 0.002, "loss": 2.5495, "step": 190330 }, { "epoch": 0.379199604743083, "grad_norm": 0.170798659324646, "learning_rate": 0.002, "loss": 2.5574, "step": 190340 }, { "epoch": 0.3792195269667219, "grad_norm": 0.18154440820217133, "learning_rate": 0.002, "loss": 2.5584, "step": 190350 }, { "epoch": 0.37923944919036084, "grad_norm": 0.14928321540355682, "learning_rate": 0.002, "loss": 2.5574, "step": 190360 }, { "epoch": 0.37925937141399974, "grad_norm": 0.20322386920452118, "learning_rate": 0.002, "loss": 2.5655, "step": 190370 }, { "epoch": 0.3792792936376387, "grad_norm": 0.16715896129608154, "learning_rate": 0.002, "loss": 2.5591, "step": 190380 }, { "epoch": 0.37929921586127757, "grad_norm": 0.16887912154197693, "learning_rate": 0.002, "loss": 2.5501, "step": 190390 }, { "epoch": 0.37931913808491646, "grad_norm": 0.1817820966243744, "learning_rate": 0.002, "loss": 2.5697, "step": 190400 }, { "epoch": 0.3793390603085554, "grad_norm": 0.16110333800315857, "learning_rate": 0.002, "loss": 2.5519, "step": 190410 }, { "epoch": 0.3793589825321943, "grad_norm": 0.16167137026786804, "learning_rate": 0.002, "loss": 2.5512, "step": 190420 }, { "epoch": 0.37937890475583325, "grad_norm": 0.214253231883049, "learning_rate": 0.002, "loss": 2.5726, "step": 190430 }, { "epoch": 0.37939882697947214, "grad_norm": 0.14149804413318634, "learning_rate": 0.002, "loss": 2.5543, "step": 190440 }, { "epoch": 0.379418749203111, "grad_norm": 0.14060182869434357, "learning_rate": 0.002, "loss": 2.5582, "step": 190450 }, { "epoch": 0.37943867142675, "grad_norm": 0.1568208932876587, "learning_rate": 0.002, "loss": 2.5645, "step": 190460 }, { "epoch": 0.37945859365038886, "grad_norm": 0.1721307337284088, "learning_rate": 0.002, "loss": 2.5588, "step": 190470 }, { "epoch": 0.3794785158740278, "grad_norm": 0.17429499328136444, "learning_rate": 0.002, "loss": 2.5597, "step": 190480 }, { "epoch": 0.3794984380976667, "grad_norm": 0.16876348853111267, "learning_rate": 0.002, "loss": 2.5622, "step": 190490 }, { "epoch": 0.37951836032130565, "grad_norm": 0.18210376799106598, "learning_rate": 0.002, "loss": 2.5593, "step": 190500 }, { "epoch": 0.37953828254494454, "grad_norm": 0.17943963408470154, "learning_rate": 0.002, "loss": 2.5731, "step": 190510 }, { "epoch": 0.37955820476858343, "grad_norm": 0.15474461019039154, "learning_rate": 0.002, "loss": 2.5454, "step": 190520 }, { "epoch": 0.3795781269922224, "grad_norm": 0.16575811803340912, "learning_rate": 0.002, "loss": 2.5628, "step": 190530 }, { "epoch": 0.37959804921586127, "grad_norm": 0.15914934873580933, "learning_rate": 0.002, "loss": 2.5629, "step": 190540 }, { "epoch": 0.3796179714395002, "grad_norm": 0.18503984808921814, "learning_rate": 0.002, "loss": 2.5533, "step": 190550 }, { "epoch": 0.3796378936631391, "grad_norm": 0.178971529006958, "learning_rate": 0.002, "loss": 2.5588, "step": 190560 }, { "epoch": 0.379657815886778, "grad_norm": 0.17329226434230804, "learning_rate": 0.002, "loss": 2.5656, "step": 190570 }, { "epoch": 0.37967773811041694, "grad_norm": 0.16858559846878052, "learning_rate": 0.002, "loss": 2.5524, "step": 190580 }, { "epoch": 0.37969766033405583, "grad_norm": 0.146092489361763, "learning_rate": 0.002, "loss": 2.5596, "step": 190590 }, { "epoch": 0.3797175825576948, "grad_norm": 0.18036973476409912, "learning_rate": 0.002, "loss": 2.5619, "step": 190600 }, { "epoch": 0.37973750478133367, "grad_norm": 0.16898506879806519, "learning_rate": 0.002, "loss": 2.5607, "step": 190610 }, { "epoch": 0.3797574270049726, "grad_norm": 0.17321555316448212, "learning_rate": 0.002, "loss": 2.5488, "step": 190620 }, { "epoch": 0.3797773492286115, "grad_norm": 0.14422887563705444, "learning_rate": 0.002, "loss": 2.5596, "step": 190630 }, { "epoch": 0.3797972714522504, "grad_norm": 0.1831585168838501, "learning_rate": 0.002, "loss": 2.5599, "step": 190640 }, { "epoch": 0.37981719367588934, "grad_norm": 0.18751198053359985, "learning_rate": 0.002, "loss": 2.5604, "step": 190650 }, { "epoch": 0.37983711589952823, "grad_norm": 0.1423666626214981, "learning_rate": 0.002, "loss": 2.5556, "step": 190660 }, { "epoch": 0.3798570381231672, "grad_norm": 0.1645408272743225, "learning_rate": 0.002, "loss": 2.5549, "step": 190670 }, { "epoch": 0.37987696034680607, "grad_norm": 0.14729031920433044, "learning_rate": 0.002, "loss": 2.5479, "step": 190680 }, { "epoch": 0.37989688257044496, "grad_norm": 0.17721380293369293, "learning_rate": 0.002, "loss": 2.5512, "step": 190690 }, { "epoch": 0.3799168047940839, "grad_norm": 0.17548790574073792, "learning_rate": 0.002, "loss": 2.5656, "step": 190700 }, { "epoch": 0.3799367270177228, "grad_norm": 0.18428196012973785, "learning_rate": 0.002, "loss": 2.5548, "step": 190710 }, { "epoch": 0.37995664924136174, "grad_norm": 0.1500236690044403, "learning_rate": 0.002, "loss": 2.5397, "step": 190720 }, { "epoch": 0.37997657146500063, "grad_norm": 0.16705867648124695, "learning_rate": 0.002, "loss": 2.5669, "step": 190730 }, { "epoch": 0.3799964936886395, "grad_norm": 0.19324450194835663, "learning_rate": 0.002, "loss": 2.549, "step": 190740 }, { "epoch": 0.38001641591227847, "grad_norm": 0.19486142694950104, "learning_rate": 0.002, "loss": 2.5583, "step": 190750 }, { "epoch": 0.38003633813591736, "grad_norm": 0.14521309733390808, "learning_rate": 0.002, "loss": 2.5565, "step": 190760 }, { "epoch": 0.3800562603595563, "grad_norm": 0.15386787056922913, "learning_rate": 0.002, "loss": 2.5666, "step": 190770 }, { "epoch": 0.3800761825831952, "grad_norm": 0.16044096648693085, "learning_rate": 0.002, "loss": 2.5481, "step": 190780 }, { "epoch": 0.38009610480683415, "grad_norm": 0.148888498544693, "learning_rate": 0.002, "loss": 2.5641, "step": 190790 }, { "epoch": 0.38011602703047304, "grad_norm": 0.5144585371017456, "learning_rate": 0.002, "loss": 2.5545, "step": 190800 }, { "epoch": 0.3801359492541119, "grad_norm": 0.20060144364833832, "learning_rate": 0.002, "loss": 2.5705, "step": 190810 }, { "epoch": 0.3801558714777509, "grad_norm": 0.18797805905342102, "learning_rate": 0.002, "loss": 2.5716, "step": 190820 }, { "epoch": 0.38017579370138976, "grad_norm": 0.177887961268425, "learning_rate": 0.002, "loss": 2.5683, "step": 190830 }, { "epoch": 0.3801957159250287, "grad_norm": 0.17151762545108795, "learning_rate": 0.002, "loss": 2.5586, "step": 190840 }, { "epoch": 0.3802156381486676, "grad_norm": 0.1979065239429474, "learning_rate": 0.002, "loss": 2.5667, "step": 190850 }, { "epoch": 0.3802355603723065, "grad_norm": 0.15188507735729218, "learning_rate": 0.002, "loss": 2.5745, "step": 190860 }, { "epoch": 0.38025548259594544, "grad_norm": 0.16012154519557953, "learning_rate": 0.002, "loss": 2.5548, "step": 190870 }, { "epoch": 0.38027540481958433, "grad_norm": 0.2189890593290329, "learning_rate": 0.002, "loss": 2.5461, "step": 190880 }, { "epoch": 0.3802953270432233, "grad_norm": 0.1561223268508911, "learning_rate": 0.002, "loss": 2.5539, "step": 190890 }, { "epoch": 0.38031524926686217, "grad_norm": 0.20001178979873657, "learning_rate": 0.002, "loss": 2.5629, "step": 190900 }, { "epoch": 0.38033517149050106, "grad_norm": 0.12993398308753967, "learning_rate": 0.002, "loss": 2.5681, "step": 190910 }, { "epoch": 0.38035509371414, "grad_norm": 0.16610576212406158, "learning_rate": 0.002, "loss": 2.5667, "step": 190920 }, { "epoch": 0.3803750159377789, "grad_norm": 0.17618460953235626, "learning_rate": 0.002, "loss": 2.5693, "step": 190930 }, { "epoch": 0.38039493816141784, "grad_norm": 0.19359101355075836, "learning_rate": 0.002, "loss": 2.5643, "step": 190940 }, { "epoch": 0.38041486038505673, "grad_norm": 0.17168757319450378, "learning_rate": 0.002, "loss": 2.5478, "step": 190950 }, { "epoch": 0.3804347826086957, "grad_norm": 0.174867644906044, "learning_rate": 0.002, "loss": 2.5496, "step": 190960 }, { "epoch": 0.38045470483233457, "grad_norm": 0.2001717984676361, "learning_rate": 0.002, "loss": 2.5547, "step": 190970 }, { "epoch": 0.38047462705597346, "grad_norm": 0.15987323224544525, "learning_rate": 0.002, "loss": 2.5628, "step": 190980 }, { "epoch": 0.3804945492796124, "grad_norm": 0.16429750621318817, "learning_rate": 0.002, "loss": 2.5623, "step": 190990 }, { "epoch": 0.3805144715032513, "grad_norm": 0.13304761052131653, "learning_rate": 0.002, "loss": 2.5735, "step": 191000 }, { "epoch": 0.38053439372689024, "grad_norm": 0.15831147134304047, "learning_rate": 0.002, "loss": 2.5713, "step": 191010 }, { "epoch": 0.38055431595052913, "grad_norm": 0.17578065395355225, "learning_rate": 0.002, "loss": 2.5705, "step": 191020 }, { "epoch": 0.380574238174168, "grad_norm": 0.19160953164100647, "learning_rate": 0.002, "loss": 2.5629, "step": 191030 }, { "epoch": 0.38059416039780697, "grad_norm": 0.1560218632221222, "learning_rate": 0.002, "loss": 2.5366, "step": 191040 }, { "epoch": 0.38061408262144586, "grad_norm": 0.20001105964183807, "learning_rate": 0.002, "loss": 2.5501, "step": 191050 }, { "epoch": 0.3806340048450848, "grad_norm": 0.1617973893880844, "learning_rate": 0.002, "loss": 2.557, "step": 191060 }, { "epoch": 0.3806539270687237, "grad_norm": 0.20162442326545715, "learning_rate": 0.002, "loss": 2.5591, "step": 191070 }, { "epoch": 0.38067384929236264, "grad_norm": 0.1538524329662323, "learning_rate": 0.002, "loss": 2.5668, "step": 191080 }, { "epoch": 0.38069377151600153, "grad_norm": 0.17913994193077087, "learning_rate": 0.002, "loss": 2.5622, "step": 191090 }, { "epoch": 0.3807136937396404, "grad_norm": 0.15854063630104065, "learning_rate": 0.002, "loss": 2.5516, "step": 191100 }, { "epoch": 0.38073361596327937, "grad_norm": 0.15197023749351501, "learning_rate": 0.002, "loss": 2.5446, "step": 191110 }, { "epoch": 0.38075353818691826, "grad_norm": 0.14431069791316986, "learning_rate": 0.002, "loss": 2.5809, "step": 191120 }, { "epoch": 0.3807734604105572, "grad_norm": 0.15492764115333557, "learning_rate": 0.002, "loss": 2.5523, "step": 191130 }, { "epoch": 0.3807933826341961, "grad_norm": 0.18526864051818848, "learning_rate": 0.002, "loss": 2.5566, "step": 191140 }, { "epoch": 0.380813304857835, "grad_norm": 0.16530902683734894, "learning_rate": 0.002, "loss": 2.5625, "step": 191150 }, { "epoch": 0.38083322708147394, "grad_norm": 0.210574671626091, "learning_rate": 0.002, "loss": 2.549, "step": 191160 }, { "epoch": 0.3808531493051128, "grad_norm": 0.17043502628803253, "learning_rate": 0.002, "loss": 2.5607, "step": 191170 }, { "epoch": 0.38087307152875177, "grad_norm": 0.13460931181907654, "learning_rate": 0.002, "loss": 2.5657, "step": 191180 }, { "epoch": 0.38089299375239066, "grad_norm": 0.16554923355579376, "learning_rate": 0.002, "loss": 2.5723, "step": 191190 }, { "epoch": 0.38091291597602955, "grad_norm": 0.16627132892608643, "learning_rate": 0.002, "loss": 2.5693, "step": 191200 }, { "epoch": 0.3809328381996685, "grad_norm": 0.17208963632583618, "learning_rate": 0.002, "loss": 2.5699, "step": 191210 }, { "epoch": 0.3809527604233074, "grad_norm": 0.17489837110042572, "learning_rate": 0.002, "loss": 2.5489, "step": 191220 }, { "epoch": 0.38097268264694634, "grad_norm": 0.16716915369033813, "learning_rate": 0.002, "loss": 2.5647, "step": 191230 }, { "epoch": 0.3809926048705852, "grad_norm": 0.16960127651691437, "learning_rate": 0.002, "loss": 2.5571, "step": 191240 }, { "epoch": 0.3810125270942242, "grad_norm": 0.13786521553993225, "learning_rate": 0.002, "loss": 2.5697, "step": 191250 }, { "epoch": 0.38103244931786306, "grad_norm": 0.15675978362560272, "learning_rate": 0.002, "loss": 2.5605, "step": 191260 }, { "epoch": 0.38105237154150196, "grad_norm": 0.1470523625612259, "learning_rate": 0.002, "loss": 2.5428, "step": 191270 }, { "epoch": 0.3810722937651409, "grad_norm": 0.1612277776002884, "learning_rate": 0.002, "loss": 2.5535, "step": 191280 }, { "epoch": 0.3810922159887798, "grad_norm": 0.18779060244560242, "learning_rate": 0.002, "loss": 2.5678, "step": 191290 }, { "epoch": 0.38111213821241874, "grad_norm": 0.16215243935585022, "learning_rate": 0.002, "loss": 2.5753, "step": 191300 }, { "epoch": 0.38113206043605763, "grad_norm": 0.1432073712348938, "learning_rate": 0.002, "loss": 2.5601, "step": 191310 }, { "epoch": 0.3811519826596965, "grad_norm": 0.19326001405715942, "learning_rate": 0.002, "loss": 2.5683, "step": 191320 }, { "epoch": 0.38117190488333547, "grad_norm": 0.1931016892194748, "learning_rate": 0.002, "loss": 2.5522, "step": 191330 }, { "epoch": 0.38119182710697436, "grad_norm": 0.19803674519062042, "learning_rate": 0.002, "loss": 2.5645, "step": 191340 }, { "epoch": 0.3812117493306133, "grad_norm": 0.17499271035194397, "learning_rate": 0.002, "loss": 2.5637, "step": 191350 }, { "epoch": 0.3812316715542522, "grad_norm": 0.15971191227436066, "learning_rate": 0.002, "loss": 2.5511, "step": 191360 }, { "epoch": 0.38125159377789114, "grad_norm": 0.1479516178369522, "learning_rate": 0.002, "loss": 2.5557, "step": 191370 }, { "epoch": 0.38127151600153003, "grad_norm": 0.1894339621067047, "learning_rate": 0.002, "loss": 2.5652, "step": 191380 }, { "epoch": 0.3812914382251689, "grad_norm": 0.1654757261276245, "learning_rate": 0.002, "loss": 2.5618, "step": 191390 }, { "epoch": 0.38131136044880787, "grad_norm": 0.15518306195735931, "learning_rate": 0.002, "loss": 2.5673, "step": 191400 }, { "epoch": 0.38133128267244676, "grad_norm": 0.17171534895896912, "learning_rate": 0.002, "loss": 2.5577, "step": 191410 }, { "epoch": 0.3813512048960857, "grad_norm": 0.18226444721221924, "learning_rate": 0.002, "loss": 2.5444, "step": 191420 }, { "epoch": 0.3813711271197246, "grad_norm": 0.145472913980484, "learning_rate": 0.002, "loss": 2.5702, "step": 191430 }, { "epoch": 0.3813910493433635, "grad_norm": 0.21107769012451172, "learning_rate": 0.002, "loss": 2.5621, "step": 191440 }, { "epoch": 0.38141097156700243, "grad_norm": 0.18963372707366943, "learning_rate": 0.002, "loss": 2.5687, "step": 191450 }, { "epoch": 0.3814308937906413, "grad_norm": 0.15707099437713623, "learning_rate": 0.002, "loss": 2.5626, "step": 191460 }, { "epoch": 0.38145081601428027, "grad_norm": 0.16043345630168915, "learning_rate": 0.002, "loss": 2.5587, "step": 191470 }, { "epoch": 0.38147073823791916, "grad_norm": 0.16254498064517975, "learning_rate": 0.002, "loss": 2.5625, "step": 191480 }, { "epoch": 0.38149066046155805, "grad_norm": 0.13307051360607147, "learning_rate": 0.002, "loss": 2.5718, "step": 191490 }, { "epoch": 0.381510582685197, "grad_norm": 0.19225162267684937, "learning_rate": 0.002, "loss": 2.5608, "step": 191500 }, { "epoch": 0.3815305049088359, "grad_norm": 0.12659895420074463, "learning_rate": 0.002, "loss": 2.5612, "step": 191510 }, { "epoch": 0.38155042713247483, "grad_norm": 0.18685224652290344, "learning_rate": 0.002, "loss": 2.5551, "step": 191520 }, { "epoch": 0.3815703493561137, "grad_norm": 0.15843036770820618, "learning_rate": 0.002, "loss": 2.553, "step": 191530 }, { "epoch": 0.38159027157975267, "grad_norm": 0.15919318795204163, "learning_rate": 0.002, "loss": 2.5641, "step": 191540 }, { "epoch": 0.38161019380339156, "grad_norm": 0.1745838075876236, "learning_rate": 0.002, "loss": 2.5554, "step": 191550 }, { "epoch": 0.38163011602703045, "grad_norm": 0.1690751612186432, "learning_rate": 0.002, "loss": 2.5527, "step": 191560 }, { "epoch": 0.3816500382506694, "grad_norm": 0.17778508365154266, "learning_rate": 0.002, "loss": 2.5638, "step": 191570 }, { "epoch": 0.3816699604743083, "grad_norm": 0.1689547747373581, "learning_rate": 0.002, "loss": 2.5645, "step": 191580 }, { "epoch": 0.38168988269794724, "grad_norm": 0.17073185741901398, "learning_rate": 0.002, "loss": 2.5583, "step": 191590 }, { "epoch": 0.3817098049215861, "grad_norm": 0.17607229948043823, "learning_rate": 0.002, "loss": 2.5453, "step": 191600 }, { "epoch": 0.381729727145225, "grad_norm": 0.15738150477409363, "learning_rate": 0.002, "loss": 2.555, "step": 191610 }, { "epoch": 0.38174964936886396, "grad_norm": 0.17852061986923218, "learning_rate": 0.002, "loss": 2.5827, "step": 191620 }, { "epoch": 0.38176957159250285, "grad_norm": 0.19496585428714752, "learning_rate": 0.002, "loss": 2.5574, "step": 191630 }, { "epoch": 0.3817894938161418, "grad_norm": 0.17848922312259674, "learning_rate": 0.002, "loss": 2.565, "step": 191640 }, { "epoch": 0.3818094160397807, "grad_norm": 0.14828020334243774, "learning_rate": 0.002, "loss": 2.5611, "step": 191650 }, { "epoch": 0.38182933826341964, "grad_norm": 0.13458393514156342, "learning_rate": 0.002, "loss": 2.5798, "step": 191660 }, { "epoch": 0.38184926048705853, "grad_norm": 0.1757318526506424, "learning_rate": 0.002, "loss": 2.562, "step": 191670 }, { "epoch": 0.3818691827106974, "grad_norm": 0.16346487402915955, "learning_rate": 0.002, "loss": 2.5605, "step": 191680 }, { "epoch": 0.38188910493433637, "grad_norm": 0.1545460969209671, "learning_rate": 0.002, "loss": 2.5509, "step": 191690 }, { "epoch": 0.38190902715797526, "grad_norm": 0.17718158662319183, "learning_rate": 0.002, "loss": 2.5775, "step": 191700 }, { "epoch": 0.3819289493816142, "grad_norm": 0.15111488103866577, "learning_rate": 0.002, "loss": 2.5617, "step": 191710 }, { "epoch": 0.3819488716052531, "grad_norm": 0.16197098791599274, "learning_rate": 0.002, "loss": 2.5502, "step": 191720 }, { "epoch": 0.381968793828892, "grad_norm": 0.16449794173240662, "learning_rate": 0.002, "loss": 2.5489, "step": 191730 }, { "epoch": 0.38198871605253093, "grad_norm": 0.17305608093738556, "learning_rate": 0.002, "loss": 2.5289, "step": 191740 }, { "epoch": 0.3820086382761698, "grad_norm": 0.1890968382358551, "learning_rate": 0.002, "loss": 2.5632, "step": 191750 }, { "epoch": 0.38202856049980877, "grad_norm": 0.14141368865966797, "learning_rate": 0.002, "loss": 2.5683, "step": 191760 }, { "epoch": 0.38204848272344766, "grad_norm": 0.1660633087158203, "learning_rate": 0.002, "loss": 2.5747, "step": 191770 }, { "epoch": 0.38206840494708655, "grad_norm": 0.1634497195482254, "learning_rate": 0.002, "loss": 2.5593, "step": 191780 }, { "epoch": 0.3820883271707255, "grad_norm": 0.16524191200733185, "learning_rate": 0.002, "loss": 2.5738, "step": 191790 }, { "epoch": 0.3821082493943644, "grad_norm": 0.16344957053661346, "learning_rate": 0.002, "loss": 2.5521, "step": 191800 }, { "epoch": 0.38212817161800333, "grad_norm": 0.17259863018989563, "learning_rate": 0.002, "loss": 2.5426, "step": 191810 }, { "epoch": 0.3821480938416422, "grad_norm": 0.16971708834171295, "learning_rate": 0.002, "loss": 2.555, "step": 191820 }, { "epoch": 0.38216801606528117, "grad_norm": 0.1628832072019577, "learning_rate": 0.002, "loss": 2.5562, "step": 191830 }, { "epoch": 0.38218793828892006, "grad_norm": 0.15278373658657074, "learning_rate": 0.002, "loss": 2.5628, "step": 191840 }, { "epoch": 0.38220786051255895, "grad_norm": 0.1660769134759903, "learning_rate": 0.002, "loss": 2.5681, "step": 191850 }, { "epoch": 0.3822277827361979, "grad_norm": 0.1884118765592575, "learning_rate": 0.002, "loss": 2.5521, "step": 191860 }, { "epoch": 0.3822477049598368, "grad_norm": 0.15670976042747498, "learning_rate": 0.002, "loss": 2.5585, "step": 191870 }, { "epoch": 0.38226762718347573, "grad_norm": 0.1922304481267929, "learning_rate": 0.002, "loss": 2.5504, "step": 191880 }, { "epoch": 0.3822875494071146, "grad_norm": 0.14071108400821686, "learning_rate": 0.002, "loss": 2.5601, "step": 191890 }, { "epoch": 0.3823074716307535, "grad_norm": 0.22946970164775848, "learning_rate": 0.002, "loss": 2.5546, "step": 191900 }, { "epoch": 0.38232739385439246, "grad_norm": 0.1850564032793045, "learning_rate": 0.002, "loss": 2.5589, "step": 191910 }, { "epoch": 0.38234731607803135, "grad_norm": 0.19930203258991241, "learning_rate": 0.002, "loss": 2.5689, "step": 191920 }, { "epoch": 0.3823672383016703, "grad_norm": 0.19941268861293793, "learning_rate": 0.002, "loss": 2.5694, "step": 191930 }, { "epoch": 0.3823871605253092, "grad_norm": 0.14972350001335144, "learning_rate": 0.002, "loss": 2.5665, "step": 191940 }, { "epoch": 0.3824070827489481, "grad_norm": 0.15886862576007843, "learning_rate": 0.002, "loss": 2.5419, "step": 191950 }, { "epoch": 0.382427004972587, "grad_norm": 0.19691170752048492, "learning_rate": 0.002, "loss": 2.5586, "step": 191960 }, { "epoch": 0.3824469271962259, "grad_norm": 0.16074523329734802, "learning_rate": 0.002, "loss": 2.5604, "step": 191970 }, { "epoch": 0.38246684941986486, "grad_norm": 0.17747259140014648, "learning_rate": 0.002, "loss": 2.5638, "step": 191980 }, { "epoch": 0.38248677164350375, "grad_norm": 0.1391696333885193, "learning_rate": 0.002, "loss": 2.5606, "step": 191990 }, { "epoch": 0.3825066938671427, "grad_norm": 0.17042505741119385, "learning_rate": 0.002, "loss": 2.5664, "step": 192000 }, { "epoch": 0.3825266160907816, "grad_norm": 0.1763283759355545, "learning_rate": 0.002, "loss": 2.5667, "step": 192010 }, { "epoch": 0.3825465383144205, "grad_norm": 0.17854683101177216, "learning_rate": 0.002, "loss": 2.5689, "step": 192020 }, { "epoch": 0.3825664605380594, "grad_norm": 0.15640871226787567, "learning_rate": 0.002, "loss": 2.5585, "step": 192030 }, { "epoch": 0.3825863827616983, "grad_norm": 0.16391977667808533, "learning_rate": 0.002, "loss": 2.5574, "step": 192040 }, { "epoch": 0.38260630498533726, "grad_norm": 0.14643776416778564, "learning_rate": 0.002, "loss": 2.5781, "step": 192050 }, { "epoch": 0.38262622720897616, "grad_norm": 0.1576082557439804, "learning_rate": 0.002, "loss": 2.5789, "step": 192060 }, { "epoch": 0.38264614943261505, "grad_norm": 0.15831467509269714, "learning_rate": 0.002, "loss": 2.584, "step": 192070 }, { "epoch": 0.382666071656254, "grad_norm": 0.18557024002075195, "learning_rate": 0.002, "loss": 2.5735, "step": 192080 }, { "epoch": 0.3826859938798929, "grad_norm": 0.1780441254377365, "learning_rate": 0.002, "loss": 2.573, "step": 192090 }, { "epoch": 0.38270591610353183, "grad_norm": 0.1544855386018753, "learning_rate": 0.002, "loss": 2.565, "step": 192100 }, { "epoch": 0.3827258383271707, "grad_norm": 0.14097218215465546, "learning_rate": 0.002, "loss": 2.5521, "step": 192110 }, { "epoch": 0.38274576055080967, "grad_norm": 0.21294419467449188, "learning_rate": 0.002, "loss": 2.5464, "step": 192120 }, { "epoch": 0.38276568277444856, "grad_norm": 0.16178981959819794, "learning_rate": 0.002, "loss": 2.5465, "step": 192130 }, { "epoch": 0.38278560499808745, "grad_norm": 0.1877707839012146, "learning_rate": 0.002, "loss": 2.5637, "step": 192140 }, { "epoch": 0.3828055272217264, "grad_norm": 0.16956119239330292, "learning_rate": 0.002, "loss": 2.5689, "step": 192150 }, { "epoch": 0.3828254494453653, "grad_norm": 0.17163726687431335, "learning_rate": 0.002, "loss": 2.5633, "step": 192160 }, { "epoch": 0.38284537166900423, "grad_norm": 0.15689007937908173, "learning_rate": 0.002, "loss": 2.5522, "step": 192170 }, { "epoch": 0.3828652938926431, "grad_norm": 0.1813630610704422, "learning_rate": 0.002, "loss": 2.5418, "step": 192180 }, { "epoch": 0.382885216116282, "grad_norm": 0.20620986819267273, "learning_rate": 0.002, "loss": 2.5661, "step": 192190 }, { "epoch": 0.38290513833992096, "grad_norm": 0.1698906421661377, "learning_rate": 0.002, "loss": 2.569, "step": 192200 }, { "epoch": 0.38292506056355985, "grad_norm": 0.1527644246816635, "learning_rate": 0.002, "loss": 2.565, "step": 192210 }, { "epoch": 0.3829449827871988, "grad_norm": 0.1574476659297943, "learning_rate": 0.002, "loss": 2.5665, "step": 192220 }, { "epoch": 0.3829649050108377, "grad_norm": 0.20511125028133392, "learning_rate": 0.002, "loss": 2.5609, "step": 192230 }, { "epoch": 0.3829848272344766, "grad_norm": 0.16480858623981476, "learning_rate": 0.002, "loss": 2.5583, "step": 192240 }, { "epoch": 0.3830047494581155, "grad_norm": 0.2573065459728241, "learning_rate": 0.002, "loss": 2.5408, "step": 192250 }, { "epoch": 0.3830246716817544, "grad_norm": 0.16336220502853394, "learning_rate": 0.002, "loss": 2.5645, "step": 192260 }, { "epoch": 0.38304459390539336, "grad_norm": 0.1405174732208252, "learning_rate": 0.002, "loss": 2.5568, "step": 192270 }, { "epoch": 0.38306451612903225, "grad_norm": 0.17280681431293488, "learning_rate": 0.002, "loss": 2.5677, "step": 192280 }, { "epoch": 0.3830844383526712, "grad_norm": 0.16190312802791595, "learning_rate": 0.002, "loss": 2.5605, "step": 192290 }, { "epoch": 0.3831043605763101, "grad_norm": 0.15560086071491241, "learning_rate": 0.002, "loss": 2.5613, "step": 192300 }, { "epoch": 0.383124282799949, "grad_norm": 0.14832521975040436, "learning_rate": 0.002, "loss": 2.5433, "step": 192310 }, { "epoch": 0.3831442050235879, "grad_norm": 0.17997927963733673, "learning_rate": 0.002, "loss": 2.5737, "step": 192320 }, { "epoch": 0.3831641272472268, "grad_norm": 0.1775009036064148, "learning_rate": 0.002, "loss": 2.5657, "step": 192330 }, { "epoch": 0.38318404947086576, "grad_norm": 0.13885492086410522, "learning_rate": 0.002, "loss": 2.5711, "step": 192340 }, { "epoch": 0.38320397169450465, "grad_norm": 0.1790986806154251, "learning_rate": 0.002, "loss": 2.567, "step": 192350 }, { "epoch": 0.38322389391814354, "grad_norm": 0.15377578139305115, "learning_rate": 0.002, "loss": 2.5614, "step": 192360 }, { "epoch": 0.3832438161417825, "grad_norm": 0.17996346950531006, "learning_rate": 0.002, "loss": 2.5542, "step": 192370 }, { "epoch": 0.3832637383654214, "grad_norm": 0.17546041309833527, "learning_rate": 0.002, "loss": 2.5502, "step": 192380 }, { "epoch": 0.3832836605890603, "grad_norm": 0.15064239501953125, "learning_rate": 0.002, "loss": 2.5537, "step": 192390 }, { "epoch": 0.3833035828126992, "grad_norm": 0.15550574660301208, "learning_rate": 0.002, "loss": 2.5648, "step": 192400 }, { "epoch": 0.38332350503633816, "grad_norm": 0.1668132096529007, "learning_rate": 0.002, "loss": 2.5569, "step": 192410 }, { "epoch": 0.38334342725997705, "grad_norm": 0.17377358675003052, "learning_rate": 0.002, "loss": 2.5617, "step": 192420 }, { "epoch": 0.38336334948361594, "grad_norm": 0.15068446099758148, "learning_rate": 0.002, "loss": 2.5634, "step": 192430 }, { "epoch": 0.3833832717072549, "grad_norm": 0.1658378541469574, "learning_rate": 0.002, "loss": 2.5577, "step": 192440 }, { "epoch": 0.3834031939308938, "grad_norm": 0.18069404363632202, "learning_rate": 0.002, "loss": 2.5521, "step": 192450 }, { "epoch": 0.38342311615453273, "grad_norm": 0.1463102251291275, "learning_rate": 0.002, "loss": 2.5685, "step": 192460 }, { "epoch": 0.3834430383781716, "grad_norm": 0.16982850432395935, "learning_rate": 0.002, "loss": 2.5565, "step": 192470 }, { "epoch": 0.3834629606018105, "grad_norm": 0.19722965359687805, "learning_rate": 0.002, "loss": 2.5697, "step": 192480 }, { "epoch": 0.38348288282544946, "grad_norm": 0.14349783957004547, "learning_rate": 0.002, "loss": 2.5673, "step": 192490 }, { "epoch": 0.38350280504908835, "grad_norm": 0.16910259425640106, "learning_rate": 0.002, "loss": 2.5691, "step": 192500 }, { "epoch": 0.3835227272727273, "grad_norm": 0.14598287642002106, "learning_rate": 0.002, "loss": 2.566, "step": 192510 }, { "epoch": 0.3835426494963662, "grad_norm": 0.18787968158721924, "learning_rate": 0.002, "loss": 2.565, "step": 192520 }, { "epoch": 0.3835625717200051, "grad_norm": 0.1494021713733673, "learning_rate": 0.002, "loss": 2.5445, "step": 192530 }, { "epoch": 0.383582493943644, "grad_norm": 0.1844417154788971, "learning_rate": 0.002, "loss": 2.56, "step": 192540 }, { "epoch": 0.3836024161672829, "grad_norm": 0.1942552775144577, "learning_rate": 0.002, "loss": 2.5708, "step": 192550 }, { "epoch": 0.38362233839092186, "grad_norm": 0.1636449694633484, "learning_rate": 0.002, "loss": 2.5636, "step": 192560 }, { "epoch": 0.38364226061456075, "grad_norm": 0.17535889148712158, "learning_rate": 0.002, "loss": 2.5534, "step": 192570 }, { "epoch": 0.3836621828381997, "grad_norm": 0.16636811196804047, "learning_rate": 0.002, "loss": 2.5613, "step": 192580 }, { "epoch": 0.3836821050618386, "grad_norm": 0.1669543832540512, "learning_rate": 0.002, "loss": 2.5683, "step": 192590 }, { "epoch": 0.3837020272854775, "grad_norm": 0.16217853128910065, "learning_rate": 0.002, "loss": 2.5561, "step": 192600 }, { "epoch": 0.3837219495091164, "grad_norm": 0.1676168143749237, "learning_rate": 0.002, "loss": 2.5626, "step": 192610 }, { "epoch": 0.3837418717327553, "grad_norm": 0.17879420518875122, "learning_rate": 0.002, "loss": 2.5841, "step": 192620 }, { "epoch": 0.38376179395639426, "grad_norm": 0.176467165350914, "learning_rate": 0.002, "loss": 2.5598, "step": 192630 }, { "epoch": 0.38378171618003315, "grad_norm": 0.1557103395462036, "learning_rate": 0.002, "loss": 2.5682, "step": 192640 }, { "epoch": 0.38380163840367204, "grad_norm": 0.17264920473098755, "learning_rate": 0.002, "loss": 2.5576, "step": 192650 }, { "epoch": 0.383821560627311, "grad_norm": 0.1678740382194519, "learning_rate": 0.002, "loss": 2.5554, "step": 192660 }, { "epoch": 0.3838414828509499, "grad_norm": 0.17205746471881866, "learning_rate": 0.002, "loss": 2.5544, "step": 192670 }, { "epoch": 0.3838614050745888, "grad_norm": 0.18221236765384674, "learning_rate": 0.002, "loss": 2.5631, "step": 192680 }, { "epoch": 0.3838813272982277, "grad_norm": 0.15643829107284546, "learning_rate": 0.002, "loss": 2.5601, "step": 192690 }, { "epoch": 0.3839012495218666, "grad_norm": 0.1539677530527115, "learning_rate": 0.002, "loss": 2.5621, "step": 192700 }, { "epoch": 0.38392117174550555, "grad_norm": 0.15413053333759308, "learning_rate": 0.002, "loss": 2.5524, "step": 192710 }, { "epoch": 0.38394109396914444, "grad_norm": 0.16209092736244202, "learning_rate": 0.002, "loss": 2.5713, "step": 192720 }, { "epoch": 0.3839610161927834, "grad_norm": 0.16324244439601898, "learning_rate": 0.002, "loss": 2.5601, "step": 192730 }, { "epoch": 0.3839809384164223, "grad_norm": 0.15873564779758453, "learning_rate": 0.002, "loss": 2.5692, "step": 192740 }, { "epoch": 0.3840008606400612, "grad_norm": 0.14883603155612946, "learning_rate": 0.002, "loss": 2.5501, "step": 192750 }, { "epoch": 0.3840207828637001, "grad_norm": 0.15677280724048615, "learning_rate": 0.002, "loss": 2.5671, "step": 192760 }, { "epoch": 0.384040705087339, "grad_norm": 0.16566838324069977, "learning_rate": 0.002, "loss": 2.5464, "step": 192770 }, { "epoch": 0.38406062731097795, "grad_norm": 0.23312492668628693, "learning_rate": 0.002, "loss": 2.5587, "step": 192780 }, { "epoch": 0.38408054953461684, "grad_norm": 0.15292906761169434, "learning_rate": 0.002, "loss": 2.551, "step": 192790 }, { "epoch": 0.3841004717582558, "grad_norm": 0.16698475182056427, "learning_rate": 0.002, "loss": 2.5478, "step": 192800 }, { "epoch": 0.3841203939818947, "grad_norm": 0.1566413789987564, "learning_rate": 0.002, "loss": 2.5565, "step": 192810 }, { "epoch": 0.38414031620553357, "grad_norm": 0.2712630331516266, "learning_rate": 0.002, "loss": 2.562, "step": 192820 }, { "epoch": 0.3841602384291725, "grad_norm": 0.16621921956539154, "learning_rate": 0.002, "loss": 2.5717, "step": 192830 }, { "epoch": 0.3841801606528114, "grad_norm": 0.1583469808101654, "learning_rate": 0.002, "loss": 2.5496, "step": 192840 }, { "epoch": 0.38420008287645036, "grad_norm": 0.179013192653656, "learning_rate": 0.002, "loss": 2.5625, "step": 192850 }, { "epoch": 0.38422000510008925, "grad_norm": 0.15362954139709473, "learning_rate": 0.002, "loss": 2.5594, "step": 192860 }, { "epoch": 0.3842399273237282, "grad_norm": 0.17124266922473907, "learning_rate": 0.002, "loss": 2.5536, "step": 192870 }, { "epoch": 0.3842598495473671, "grad_norm": 0.16861885786056519, "learning_rate": 0.002, "loss": 2.5536, "step": 192880 }, { "epoch": 0.384279771771006, "grad_norm": 0.2443905472755432, "learning_rate": 0.002, "loss": 2.5611, "step": 192890 }, { "epoch": 0.3842996939946449, "grad_norm": 0.1495138555765152, "learning_rate": 0.002, "loss": 2.5602, "step": 192900 }, { "epoch": 0.3843196162182838, "grad_norm": 0.13633811473846436, "learning_rate": 0.002, "loss": 2.568, "step": 192910 }, { "epoch": 0.38433953844192276, "grad_norm": 0.20043149590492249, "learning_rate": 0.002, "loss": 2.5789, "step": 192920 }, { "epoch": 0.38435946066556165, "grad_norm": 0.1690746247768402, "learning_rate": 0.002, "loss": 2.5526, "step": 192930 }, { "epoch": 0.38437938288920054, "grad_norm": 0.17812761664390564, "learning_rate": 0.002, "loss": 2.5557, "step": 192940 }, { "epoch": 0.3843993051128395, "grad_norm": 0.15348617732524872, "learning_rate": 0.002, "loss": 2.5591, "step": 192950 }, { "epoch": 0.3844192273364784, "grad_norm": 0.15809181332588196, "learning_rate": 0.002, "loss": 2.5644, "step": 192960 }, { "epoch": 0.3844391495601173, "grad_norm": 0.14354625344276428, "learning_rate": 0.002, "loss": 2.5583, "step": 192970 }, { "epoch": 0.3844590717837562, "grad_norm": 0.18896090984344482, "learning_rate": 0.002, "loss": 2.5629, "step": 192980 }, { "epoch": 0.3844789940073951, "grad_norm": 0.15807873010635376, "learning_rate": 0.002, "loss": 2.5597, "step": 192990 }, { "epoch": 0.38449891623103405, "grad_norm": 0.1759803295135498, "learning_rate": 0.002, "loss": 2.5427, "step": 193000 }, { "epoch": 0.38451883845467294, "grad_norm": 0.18632571399211884, "learning_rate": 0.002, "loss": 2.5636, "step": 193010 }, { "epoch": 0.3845387606783119, "grad_norm": 0.19452181458473206, "learning_rate": 0.002, "loss": 2.5574, "step": 193020 }, { "epoch": 0.3845586829019508, "grad_norm": 0.160271555185318, "learning_rate": 0.002, "loss": 2.5688, "step": 193030 }, { "epoch": 0.3845786051255897, "grad_norm": 0.14506429433822632, "learning_rate": 0.002, "loss": 2.5586, "step": 193040 }, { "epoch": 0.3845985273492286, "grad_norm": 0.2064811736345291, "learning_rate": 0.002, "loss": 2.5677, "step": 193050 }, { "epoch": 0.3846184495728675, "grad_norm": 0.19303369522094727, "learning_rate": 0.002, "loss": 2.5669, "step": 193060 }, { "epoch": 0.38463837179650645, "grad_norm": 0.16600729525089264, "learning_rate": 0.002, "loss": 2.5593, "step": 193070 }, { "epoch": 0.38465829402014534, "grad_norm": 0.1564738154411316, "learning_rate": 0.002, "loss": 2.5487, "step": 193080 }, { "epoch": 0.3846782162437843, "grad_norm": 0.1673063486814499, "learning_rate": 0.002, "loss": 2.5418, "step": 193090 }, { "epoch": 0.3846981384674232, "grad_norm": 0.1819988191127777, "learning_rate": 0.002, "loss": 2.5713, "step": 193100 }, { "epoch": 0.38471806069106207, "grad_norm": 0.15195214748382568, "learning_rate": 0.002, "loss": 2.559, "step": 193110 }, { "epoch": 0.384737982914701, "grad_norm": 0.1559544801712036, "learning_rate": 0.002, "loss": 2.5602, "step": 193120 }, { "epoch": 0.3847579051383399, "grad_norm": 0.19568246603012085, "learning_rate": 0.002, "loss": 2.5587, "step": 193130 }, { "epoch": 0.38477782736197885, "grad_norm": 0.15884773433208466, "learning_rate": 0.002, "loss": 2.5592, "step": 193140 }, { "epoch": 0.38479774958561774, "grad_norm": 0.15085171163082123, "learning_rate": 0.002, "loss": 2.569, "step": 193150 }, { "epoch": 0.3848176718092567, "grad_norm": 0.17134365439414978, "learning_rate": 0.002, "loss": 2.5719, "step": 193160 }, { "epoch": 0.3848375940328956, "grad_norm": 0.22106647491455078, "learning_rate": 0.002, "loss": 2.5499, "step": 193170 }, { "epoch": 0.38485751625653447, "grad_norm": 0.15136927366256714, "learning_rate": 0.002, "loss": 2.5551, "step": 193180 }, { "epoch": 0.3848774384801734, "grad_norm": 0.1868198961019516, "learning_rate": 0.002, "loss": 2.559, "step": 193190 }, { "epoch": 0.3848973607038123, "grad_norm": 0.16308973729610443, "learning_rate": 0.002, "loss": 2.5593, "step": 193200 }, { "epoch": 0.38491728292745125, "grad_norm": 0.16374729573726654, "learning_rate": 0.002, "loss": 2.5737, "step": 193210 }, { "epoch": 0.38493720515109014, "grad_norm": 0.15579760074615479, "learning_rate": 0.002, "loss": 2.5642, "step": 193220 }, { "epoch": 0.38495712737472904, "grad_norm": 0.15191783010959625, "learning_rate": 0.002, "loss": 2.5425, "step": 193230 }, { "epoch": 0.384977049598368, "grad_norm": 0.16248270869255066, "learning_rate": 0.002, "loss": 2.5662, "step": 193240 }, { "epoch": 0.3849969718220069, "grad_norm": 0.16765645146369934, "learning_rate": 0.002, "loss": 2.567, "step": 193250 }, { "epoch": 0.3850168940456458, "grad_norm": 0.15671561658382416, "learning_rate": 0.002, "loss": 2.5696, "step": 193260 }, { "epoch": 0.3850368162692847, "grad_norm": 0.1584886610507965, "learning_rate": 0.002, "loss": 2.55, "step": 193270 }, { "epoch": 0.3850567384929236, "grad_norm": 0.21605661511421204, "learning_rate": 0.002, "loss": 2.5645, "step": 193280 }, { "epoch": 0.38507666071656255, "grad_norm": 0.21039029955863953, "learning_rate": 0.002, "loss": 2.5593, "step": 193290 }, { "epoch": 0.38509658294020144, "grad_norm": 0.16745756566524506, "learning_rate": 0.002, "loss": 2.5645, "step": 193300 }, { "epoch": 0.3851165051638404, "grad_norm": 0.15581901371479034, "learning_rate": 0.002, "loss": 2.5362, "step": 193310 }, { "epoch": 0.3851364273874793, "grad_norm": 0.18339179456233978, "learning_rate": 0.002, "loss": 2.5497, "step": 193320 }, { "epoch": 0.3851563496111182, "grad_norm": 0.17610324919223785, "learning_rate": 0.002, "loss": 2.5699, "step": 193330 }, { "epoch": 0.3851762718347571, "grad_norm": 0.1460505574941635, "learning_rate": 0.002, "loss": 2.5527, "step": 193340 }, { "epoch": 0.385196194058396, "grad_norm": 0.137032151222229, "learning_rate": 0.002, "loss": 2.5459, "step": 193350 }, { "epoch": 0.38521611628203495, "grad_norm": 0.16904819011688232, "learning_rate": 0.002, "loss": 2.5481, "step": 193360 }, { "epoch": 0.38523603850567384, "grad_norm": 0.14860719442367554, "learning_rate": 0.002, "loss": 2.5492, "step": 193370 }, { "epoch": 0.3852559607293128, "grad_norm": 0.17265449464321136, "learning_rate": 0.002, "loss": 2.5651, "step": 193380 }, { "epoch": 0.3852758829529517, "grad_norm": 0.16586443781852722, "learning_rate": 0.002, "loss": 2.5537, "step": 193390 }, { "epoch": 0.38529580517659057, "grad_norm": 0.16677527129650116, "learning_rate": 0.002, "loss": 2.5486, "step": 193400 }, { "epoch": 0.3853157274002295, "grad_norm": 0.1653081178665161, "learning_rate": 0.002, "loss": 2.5627, "step": 193410 }, { "epoch": 0.3853356496238684, "grad_norm": 0.16196191310882568, "learning_rate": 0.002, "loss": 2.5639, "step": 193420 }, { "epoch": 0.38535557184750735, "grad_norm": 0.15312372148036957, "learning_rate": 0.002, "loss": 2.557, "step": 193430 }, { "epoch": 0.38537549407114624, "grad_norm": 0.15290974080562592, "learning_rate": 0.002, "loss": 2.5698, "step": 193440 }, { "epoch": 0.38539541629478513, "grad_norm": 0.20622940361499786, "learning_rate": 0.002, "loss": 2.5456, "step": 193450 }, { "epoch": 0.3854153385184241, "grad_norm": 0.17752036452293396, "learning_rate": 0.002, "loss": 2.5678, "step": 193460 }, { "epoch": 0.38543526074206297, "grad_norm": 0.16380935907363892, "learning_rate": 0.002, "loss": 2.5617, "step": 193470 }, { "epoch": 0.3854551829657019, "grad_norm": 0.15707574784755707, "learning_rate": 0.002, "loss": 2.5385, "step": 193480 }, { "epoch": 0.3854751051893408, "grad_norm": 0.16740843653678894, "learning_rate": 0.002, "loss": 2.5456, "step": 193490 }, { "epoch": 0.38549502741297975, "grad_norm": 0.17329321801662445, "learning_rate": 0.002, "loss": 2.562, "step": 193500 }, { "epoch": 0.38551494963661864, "grad_norm": 0.1925400197505951, "learning_rate": 0.002, "loss": 2.5606, "step": 193510 }, { "epoch": 0.38553487186025753, "grad_norm": 0.16222047805786133, "learning_rate": 0.002, "loss": 2.5661, "step": 193520 }, { "epoch": 0.3855547940838965, "grad_norm": 0.16745659708976746, "learning_rate": 0.002, "loss": 2.5485, "step": 193530 }, { "epoch": 0.38557471630753537, "grad_norm": 0.202735036611557, "learning_rate": 0.002, "loss": 2.5491, "step": 193540 }, { "epoch": 0.3855946385311743, "grad_norm": 0.15917229652404785, "learning_rate": 0.002, "loss": 2.559, "step": 193550 }, { "epoch": 0.3856145607548132, "grad_norm": 0.21429696679115295, "learning_rate": 0.002, "loss": 2.5578, "step": 193560 }, { "epoch": 0.3856344829784521, "grad_norm": 0.1480902135372162, "learning_rate": 0.002, "loss": 2.5705, "step": 193570 }, { "epoch": 0.38565440520209104, "grad_norm": 0.17708075046539307, "learning_rate": 0.002, "loss": 2.5774, "step": 193580 }, { "epoch": 0.38567432742572993, "grad_norm": 0.15630339086055756, "learning_rate": 0.002, "loss": 2.5591, "step": 193590 }, { "epoch": 0.3856942496493689, "grad_norm": 0.1810305267572403, "learning_rate": 0.002, "loss": 2.5569, "step": 193600 }, { "epoch": 0.38571417187300777, "grad_norm": 0.1923801600933075, "learning_rate": 0.002, "loss": 2.5499, "step": 193610 }, { "epoch": 0.3857340940966467, "grad_norm": 0.1436241865158081, "learning_rate": 0.002, "loss": 2.559, "step": 193620 }, { "epoch": 0.3857540163202856, "grad_norm": 0.17509137094020844, "learning_rate": 0.002, "loss": 2.5748, "step": 193630 }, { "epoch": 0.3857739385439245, "grad_norm": 0.16681718826293945, "learning_rate": 0.002, "loss": 2.5505, "step": 193640 }, { "epoch": 0.38579386076756345, "grad_norm": 0.15318255126476288, "learning_rate": 0.002, "loss": 2.5603, "step": 193650 }, { "epoch": 0.38581378299120234, "grad_norm": 0.1461741328239441, "learning_rate": 0.002, "loss": 2.559, "step": 193660 }, { "epoch": 0.3858337052148413, "grad_norm": 0.17471209168434143, "learning_rate": 0.002, "loss": 2.5601, "step": 193670 }, { "epoch": 0.3858536274384802, "grad_norm": 0.18103307485580444, "learning_rate": 0.002, "loss": 2.5553, "step": 193680 }, { "epoch": 0.38587354966211906, "grad_norm": 0.15549422800540924, "learning_rate": 0.002, "loss": 2.5451, "step": 193690 }, { "epoch": 0.385893471885758, "grad_norm": 0.1679815798997879, "learning_rate": 0.002, "loss": 2.5552, "step": 193700 }, { "epoch": 0.3859133941093969, "grad_norm": 0.20665521919727325, "learning_rate": 0.002, "loss": 2.5624, "step": 193710 }, { "epoch": 0.38593331633303585, "grad_norm": 0.1626921445131302, "learning_rate": 0.002, "loss": 2.5571, "step": 193720 }, { "epoch": 0.38595323855667474, "grad_norm": 0.14786207675933838, "learning_rate": 0.002, "loss": 2.5655, "step": 193730 }, { "epoch": 0.38597316078031363, "grad_norm": 0.1879998743534088, "learning_rate": 0.002, "loss": 2.5708, "step": 193740 }, { "epoch": 0.3859930830039526, "grad_norm": 0.1704116314649582, "learning_rate": 0.002, "loss": 2.5708, "step": 193750 }, { "epoch": 0.38601300522759147, "grad_norm": 0.17661641538143158, "learning_rate": 0.002, "loss": 2.5541, "step": 193760 }, { "epoch": 0.3860329274512304, "grad_norm": 0.17697584629058838, "learning_rate": 0.002, "loss": 2.552, "step": 193770 }, { "epoch": 0.3860528496748693, "grad_norm": 0.15562956035137177, "learning_rate": 0.002, "loss": 2.5625, "step": 193780 }, { "epoch": 0.38607277189850825, "grad_norm": 0.15797726809978485, "learning_rate": 0.002, "loss": 2.5719, "step": 193790 }, { "epoch": 0.38609269412214714, "grad_norm": 0.1607244908809662, "learning_rate": 0.002, "loss": 2.5522, "step": 193800 }, { "epoch": 0.38611261634578603, "grad_norm": 0.17714503407478333, "learning_rate": 0.002, "loss": 2.5644, "step": 193810 }, { "epoch": 0.386132538569425, "grad_norm": 0.15848413109779358, "learning_rate": 0.002, "loss": 2.5642, "step": 193820 }, { "epoch": 0.38615246079306387, "grad_norm": 0.1598910540342331, "learning_rate": 0.002, "loss": 2.5489, "step": 193830 }, { "epoch": 0.3861723830167028, "grad_norm": 0.23139536380767822, "learning_rate": 0.002, "loss": 2.5754, "step": 193840 }, { "epoch": 0.3861923052403417, "grad_norm": 0.17127372324466705, "learning_rate": 0.002, "loss": 2.5596, "step": 193850 }, { "epoch": 0.3862122274639806, "grad_norm": 0.1622626781463623, "learning_rate": 0.002, "loss": 2.5735, "step": 193860 }, { "epoch": 0.38623214968761954, "grad_norm": 0.1808663308620453, "learning_rate": 0.002, "loss": 2.5564, "step": 193870 }, { "epoch": 0.38625207191125843, "grad_norm": 0.17204007506370544, "learning_rate": 0.002, "loss": 2.5513, "step": 193880 }, { "epoch": 0.3862719941348974, "grad_norm": 0.15429258346557617, "learning_rate": 0.002, "loss": 2.5498, "step": 193890 }, { "epoch": 0.38629191635853627, "grad_norm": 0.15172144770622253, "learning_rate": 0.002, "loss": 2.5731, "step": 193900 }, { "epoch": 0.3863118385821752, "grad_norm": 0.16644899547100067, "learning_rate": 0.002, "loss": 2.5482, "step": 193910 }, { "epoch": 0.3863317608058141, "grad_norm": 0.1957196593284607, "learning_rate": 0.002, "loss": 2.5694, "step": 193920 }, { "epoch": 0.386351683029453, "grad_norm": 0.1633756160736084, "learning_rate": 0.002, "loss": 2.561, "step": 193930 }, { "epoch": 0.38637160525309194, "grad_norm": 0.1739984005689621, "learning_rate": 0.002, "loss": 2.5563, "step": 193940 }, { "epoch": 0.38639152747673083, "grad_norm": 0.18906956911087036, "learning_rate": 0.002, "loss": 2.5422, "step": 193950 }, { "epoch": 0.3864114497003698, "grad_norm": 0.17274415493011475, "learning_rate": 0.002, "loss": 2.5629, "step": 193960 }, { "epoch": 0.38643137192400867, "grad_norm": 0.16981571912765503, "learning_rate": 0.002, "loss": 2.5753, "step": 193970 }, { "epoch": 0.38645129414764756, "grad_norm": 0.17128486931324005, "learning_rate": 0.002, "loss": 2.5515, "step": 193980 }, { "epoch": 0.3864712163712865, "grad_norm": 0.19767487049102783, "learning_rate": 0.002, "loss": 2.5495, "step": 193990 }, { "epoch": 0.3864911385949254, "grad_norm": 0.1782245635986328, "learning_rate": 0.002, "loss": 2.5451, "step": 194000 }, { "epoch": 0.38651106081856434, "grad_norm": 0.14336349070072174, "learning_rate": 0.002, "loss": 2.5588, "step": 194010 }, { "epoch": 0.38653098304220324, "grad_norm": 0.2250564992427826, "learning_rate": 0.002, "loss": 2.5572, "step": 194020 }, { "epoch": 0.3865509052658421, "grad_norm": 0.1713390052318573, "learning_rate": 0.002, "loss": 2.5553, "step": 194030 }, { "epoch": 0.3865708274894811, "grad_norm": 0.16113850474357605, "learning_rate": 0.002, "loss": 2.5545, "step": 194040 }, { "epoch": 0.38659074971311996, "grad_norm": 0.16166211664676666, "learning_rate": 0.002, "loss": 2.5485, "step": 194050 }, { "epoch": 0.3866106719367589, "grad_norm": 0.1626141518354416, "learning_rate": 0.002, "loss": 2.5615, "step": 194060 }, { "epoch": 0.3866305941603978, "grad_norm": 0.20527249574661255, "learning_rate": 0.002, "loss": 2.562, "step": 194070 }, { "epoch": 0.38665051638403675, "grad_norm": 0.15160344541072845, "learning_rate": 0.002, "loss": 2.5619, "step": 194080 }, { "epoch": 0.38667043860767564, "grad_norm": 0.2198113203048706, "learning_rate": 0.002, "loss": 2.5532, "step": 194090 }, { "epoch": 0.38669036083131453, "grad_norm": 0.1568170040845871, "learning_rate": 0.002, "loss": 2.5584, "step": 194100 }, { "epoch": 0.3867102830549535, "grad_norm": 0.16190195083618164, "learning_rate": 0.002, "loss": 2.5482, "step": 194110 }, { "epoch": 0.38673020527859236, "grad_norm": 0.21740633249282837, "learning_rate": 0.002, "loss": 2.5568, "step": 194120 }, { "epoch": 0.3867501275022313, "grad_norm": 0.18757224082946777, "learning_rate": 0.002, "loss": 2.5662, "step": 194130 }, { "epoch": 0.3867700497258702, "grad_norm": 0.15717892348766327, "learning_rate": 0.002, "loss": 2.5665, "step": 194140 }, { "epoch": 0.3867899719495091, "grad_norm": 0.17506220936775208, "learning_rate": 0.002, "loss": 2.571, "step": 194150 }, { "epoch": 0.38680989417314804, "grad_norm": 0.21185535192489624, "learning_rate": 0.002, "loss": 2.5798, "step": 194160 }, { "epoch": 0.38682981639678693, "grad_norm": 0.18651391565799713, "learning_rate": 0.002, "loss": 2.5645, "step": 194170 }, { "epoch": 0.3868497386204259, "grad_norm": 0.19101057946681976, "learning_rate": 0.002, "loss": 2.5436, "step": 194180 }, { "epoch": 0.38686966084406477, "grad_norm": 0.1553489863872528, "learning_rate": 0.002, "loss": 2.5525, "step": 194190 }, { "epoch": 0.38688958306770366, "grad_norm": 0.18371598422527313, "learning_rate": 0.002, "loss": 2.5733, "step": 194200 }, { "epoch": 0.3869095052913426, "grad_norm": 0.1777281016111374, "learning_rate": 0.002, "loss": 2.5726, "step": 194210 }, { "epoch": 0.3869294275149815, "grad_norm": 0.13869334757328033, "learning_rate": 0.002, "loss": 2.5564, "step": 194220 }, { "epoch": 0.38694934973862044, "grad_norm": 0.16046862304210663, "learning_rate": 0.002, "loss": 2.5598, "step": 194230 }, { "epoch": 0.38696927196225933, "grad_norm": 0.19190028309822083, "learning_rate": 0.002, "loss": 2.5401, "step": 194240 }, { "epoch": 0.3869891941858983, "grad_norm": 0.15407657623291016, "learning_rate": 0.002, "loss": 2.5529, "step": 194250 }, { "epoch": 0.38700911640953717, "grad_norm": 0.15249554812908173, "learning_rate": 0.002, "loss": 2.5786, "step": 194260 }, { "epoch": 0.38702903863317606, "grad_norm": 0.16263903677463531, "learning_rate": 0.002, "loss": 2.5629, "step": 194270 }, { "epoch": 0.387048960856815, "grad_norm": 0.1860172599554062, "learning_rate": 0.002, "loss": 2.5618, "step": 194280 }, { "epoch": 0.3870688830804539, "grad_norm": 0.15529906749725342, "learning_rate": 0.002, "loss": 2.562, "step": 194290 }, { "epoch": 0.38708880530409284, "grad_norm": 0.17330236732959747, "learning_rate": 0.002, "loss": 2.5544, "step": 194300 }, { "epoch": 0.38710872752773173, "grad_norm": 0.18770308792591095, "learning_rate": 0.002, "loss": 2.5599, "step": 194310 }, { "epoch": 0.3871286497513706, "grad_norm": 0.16349738836288452, "learning_rate": 0.002, "loss": 2.5491, "step": 194320 }, { "epoch": 0.38714857197500957, "grad_norm": 0.19371815025806427, "learning_rate": 0.002, "loss": 2.5546, "step": 194330 }, { "epoch": 0.38716849419864846, "grad_norm": 0.16536949574947357, "learning_rate": 0.002, "loss": 2.5423, "step": 194340 }, { "epoch": 0.3871884164222874, "grad_norm": 0.1537965089082718, "learning_rate": 0.002, "loss": 2.5761, "step": 194350 }, { "epoch": 0.3872083386459263, "grad_norm": 0.1679820716381073, "learning_rate": 0.002, "loss": 2.5625, "step": 194360 }, { "epoch": 0.38722826086956524, "grad_norm": 0.1794111579656601, "learning_rate": 0.002, "loss": 2.5687, "step": 194370 }, { "epoch": 0.38724818309320413, "grad_norm": 0.16767197847366333, "learning_rate": 0.002, "loss": 2.5464, "step": 194380 }, { "epoch": 0.387268105316843, "grad_norm": 0.22137045860290527, "learning_rate": 0.002, "loss": 2.5606, "step": 194390 }, { "epoch": 0.38728802754048197, "grad_norm": 0.16788162291049957, "learning_rate": 0.002, "loss": 2.563, "step": 194400 }, { "epoch": 0.38730794976412086, "grad_norm": 0.16312915086746216, "learning_rate": 0.002, "loss": 2.5647, "step": 194410 }, { "epoch": 0.3873278719877598, "grad_norm": 0.18046265840530396, "learning_rate": 0.002, "loss": 2.5682, "step": 194420 }, { "epoch": 0.3873477942113987, "grad_norm": 0.15182043612003326, "learning_rate": 0.002, "loss": 2.565, "step": 194430 }, { "epoch": 0.3873677164350376, "grad_norm": 0.18619714677333832, "learning_rate": 0.002, "loss": 2.5444, "step": 194440 }, { "epoch": 0.38738763865867654, "grad_norm": 0.17233142256736755, "learning_rate": 0.002, "loss": 2.5378, "step": 194450 }, { "epoch": 0.3874075608823154, "grad_norm": 0.1468806117773056, "learning_rate": 0.002, "loss": 2.5482, "step": 194460 }, { "epoch": 0.3874274831059544, "grad_norm": 0.21673227846622467, "learning_rate": 0.002, "loss": 2.5803, "step": 194470 }, { "epoch": 0.38744740532959326, "grad_norm": 0.15860815346240997, "learning_rate": 0.002, "loss": 2.5769, "step": 194480 }, { "epoch": 0.38746732755323215, "grad_norm": 0.14312419295310974, "learning_rate": 0.002, "loss": 2.577, "step": 194490 }, { "epoch": 0.3874872497768711, "grad_norm": 0.19523103535175323, "learning_rate": 0.002, "loss": 2.5671, "step": 194500 }, { "epoch": 0.38750717200051, "grad_norm": 0.15841542184352875, "learning_rate": 0.002, "loss": 2.5554, "step": 194510 }, { "epoch": 0.38752709422414894, "grad_norm": 0.17219528555870056, "learning_rate": 0.002, "loss": 2.5626, "step": 194520 }, { "epoch": 0.38754701644778783, "grad_norm": 0.15379969775676727, "learning_rate": 0.002, "loss": 2.5424, "step": 194530 }, { "epoch": 0.3875669386714268, "grad_norm": 0.16877152025699615, "learning_rate": 0.002, "loss": 2.5607, "step": 194540 }, { "epoch": 0.38758686089506567, "grad_norm": 0.1790589541196823, "learning_rate": 0.002, "loss": 2.5561, "step": 194550 }, { "epoch": 0.38760678311870456, "grad_norm": 0.18128621578216553, "learning_rate": 0.002, "loss": 2.5755, "step": 194560 }, { "epoch": 0.3876267053423435, "grad_norm": 0.1410233974456787, "learning_rate": 0.002, "loss": 2.5562, "step": 194570 }, { "epoch": 0.3876466275659824, "grad_norm": 0.18393445014953613, "learning_rate": 0.002, "loss": 2.5581, "step": 194580 }, { "epoch": 0.38766654978962134, "grad_norm": 0.16531158983707428, "learning_rate": 0.002, "loss": 2.5504, "step": 194590 }, { "epoch": 0.38768647201326023, "grad_norm": 0.16250181198120117, "learning_rate": 0.002, "loss": 2.5579, "step": 194600 }, { "epoch": 0.3877063942368991, "grad_norm": 0.22780315577983856, "learning_rate": 0.002, "loss": 2.567, "step": 194610 }, { "epoch": 0.38772631646053807, "grad_norm": 0.17906931042671204, "learning_rate": 0.002, "loss": 2.5508, "step": 194620 }, { "epoch": 0.38774623868417696, "grad_norm": 0.14996883273124695, "learning_rate": 0.002, "loss": 2.5684, "step": 194630 }, { "epoch": 0.3877661609078159, "grad_norm": 0.16592144966125488, "learning_rate": 0.002, "loss": 2.5565, "step": 194640 }, { "epoch": 0.3877860831314548, "grad_norm": 0.17913837730884552, "learning_rate": 0.002, "loss": 2.5614, "step": 194650 }, { "epoch": 0.38780600535509374, "grad_norm": 0.15571704506874084, "learning_rate": 0.002, "loss": 2.5669, "step": 194660 }, { "epoch": 0.38782592757873263, "grad_norm": 0.17859512567520142, "learning_rate": 0.002, "loss": 2.5421, "step": 194670 }, { "epoch": 0.3878458498023715, "grad_norm": 0.18086640536785126, "learning_rate": 0.002, "loss": 2.5636, "step": 194680 }, { "epoch": 0.38786577202601047, "grad_norm": 0.18261383473873138, "learning_rate": 0.002, "loss": 2.5367, "step": 194690 }, { "epoch": 0.38788569424964936, "grad_norm": 0.1895429939031601, "learning_rate": 0.002, "loss": 2.5693, "step": 194700 }, { "epoch": 0.3879056164732883, "grad_norm": 0.1876603662967682, "learning_rate": 0.002, "loss": 2.5679, "step": 194710 }, { "epoch": 0.3879255386969272, "grad_norm": 0.17476142942905426, "learning_rate": 0.002, "loss": 2.5527, "step": 194720 }, { "epoch": 0.3879454609205661, "grad_norm": 0.18138332664966583, "learning_rate": 0.002, "loss": 2.5655, "step": 194730 }, { "epoch": 0.38796538314420503, "grad_norm": 0.1691875010728836, "learning_rate": 0.002, "loss": 2.5478, "step": 194740 }, { "epoch": 0.3879853053678439, "grad_norm": 0.16954076290130615, "learning_rate": 0.002, "loss": 2.5658, "step": 194750 }, { "epoch": 0.38800522759148287, "grad_norm": 0.16791561245918274, "learning_rate": 0.002, "loss": 2.5515, "step": 194760 }, { "epoch": 0.38802514981512176, "grad_norm": 0.17944088578224182, "learning_rate": 0.002, "loss": 2.5379, "step": 194770 }, { "epoch": 0.38804507203876065, "grad_norm": 0.16234439611434937, "learning_rate": 0.002, "loss": 2.5496, "step": 194780 }, { "epoch": 0.3880649942623996, "grad_norm": 0.1547975391149521, "learning_rate": 0.002, "loss": 2.5569, "step": 194790 }, { "epoch": 0.3880849164860385, "grad_norm": 0.16138727962970734, "learning_rate": 0.002, "loss": 2.5639, "step": 194800 }, { "epoch": 0.38810483870967744, "grad_norm": 0.18759238719940186, "learning_rate": 0.002, "loss": 2.5666, "step": 194810 }, { "epoch": 0.3881247609333163, "grad_norm": 0.18778552114963531, "learning_rate": 0.002, "loss": 2.5658, "step": 194820 }, { "epoch": 0.3881446831569553, "grad_norm": 0.14746348559856415, "learning_rate": 0.002, "loss": 2.5378, "step": 194830 }, { "epoch": 0.38816460538059416, "grad_norm": 0.172735795378685, "learning_rate": 0.002, "loss": 2.5446, "step": 194840 }, { "epoch": 0.38818452760423305, "grad_norm": 0.16411137580871582, "learning_rate": 0.002, "loss": 2.5536, "step": 194850 }, { "epoch": 0.388204449827872, "grad_norm": 0.20131266117095947, "learning_rate": 0.002, "loss": 2.5617, "step": 194860 }, { "epoch": 0.3882243720515109, "grad_norm": 0.1560828685760498, "learning_rate": 0.002, "loss": 2.5856, "step": 194870 }, { "epoch": 0.38824429427514984, "grad_norm": 0.14636258780956268, "learning_rate": 0.002, "loss": 2.5628, "step": 194880 }, { "epoch": 0.38826421649878873, "grad_norm": 0.18964792788028717, "learning_rate": 0.002, "loss": 2.5536, "step": 194890 }, { "epoch": 0.3882841387224276, "grad_norm": 0.19109231233596802, "learning_rate": 0.002, "loss": 2.5528, "step": 194900 }, { "epoch": 0.38830406094606656, "grad_norm": 0.1647355705499649, "learning_rate": 0.002, "loss": 2.567, "step": 194910 }, { "epoch": 0.38832398316970546, "grad_norm": 0.17144528031349182, "learning_rate": 0.002, "loss": 2.5598, "step": 194920 }, { "epoch": 0.3883439053933444, "grad_norm": 0.129025399684906, "learning_rate": 0.002, "loss": 2.5539, "step": 194930 }, { "epoch": 0.3883638276169833, "grad_norm": 0.15511788427829742, "learning_rate": 0.002, "loss": 2.5519, "step": 194940 }, { "epoch": 0.3883837498406222, "grad_norm": 0.19524960219860077, "learning_rate": 0.002, "loss": 2.5699, "step": 194950 }, { "epoch": 0.38840367206426113, "grad_norm": 0.19294658303260803, "learning_rate": 0.002, "loss": 2.5713, "step": 194960 }, { "epoch": 0.3884235942879, "grad_norm": 0.14699684083461761, "learning_rate": 0.002, "loss": 2.5402, "step": 194970 }, { "epoch": 0.38844351651153897, "grad_norm": 0.195618137717247, "learning_rate": 0.002, "loss": 2.5678, "step": 194980 }, { "epoch": 0.38846343873517786, "grad_norm": 0.15564289689064026, "learning_rate": 0.002, "loss": 2.5558, "step": 194990 }, { "epoch": 0.3884833609588168, "grad_norm": 0.16902682185173035, "learning_rate": 0.002, "loss": 2.5715, "step": 195000 }, { "epoch": 0.3885032831824557, "grad_norm": 0.14015555381774902, "learning_rate": 0.002, "loss": 2.5765, "step": 195010 }, { "epoch": 0.3885232054060946, "grad_norm": 0.1684814691543579, "learning_rate": 0.002, "loss": 2.5598, "step": 195020 }, { "epoch": 0.38854312762973353, "grad_norm": 0.16993966698646545, "learning_rate": 0.002, "loss": 2.5534, "step": 195030 }, { "epoch": 0.3885630498533724, "grad_norm": 0.19472777843475342, "learning_rate": 0.002, "loss": 2.5623, "step": 195040 }, { "epoch": 0.38858297207701137, "grad_norm": 0.1567508578300476, "learning_rate": 0.002, "loss": 2.5612, "step": 195050 }, { "epoch": 0.38860289430065026, "grad_norm": 0.14572927355766296, "learning_rate": 0.002, "loss": 2.5536, "step": 195060 }, { "epoch": 0.38862281652428915, "grad_norm": 0.16393999755382538, "learning_rate": 0.002, "loss": 2.574, "step": 195070 }, { "epoch": 0.3886427387479281, "grad_norm": 0.14465409517288208, "learning_rate": 0.002, "loss": 2.5746, "step": 195080 }, { "epoch": 0.388662660971567, "grad_norm": 0.20093929767608643, "learning_rate": 0.002, "loss": 2.5558, "step": 195090 }, { "epoch": 0.38868258319520593, "grad_norm": 0.1484673023223877, "learning_rate": 0.002, "loss": 2.5725, "step": 195100 }, { "epoch": 0.3887025054188448, "grad_norm": 0.16811177134513855, "learning_rate": 0.002, "loss": 2.5629, "step": 195110 }, { "epoch": 0.38872242764248377, "grad_norm": 0.17085246741771698, "learning_rate": 0.002, "loss": 2.5701, "step": 195120 }, { "epoch": 0.38874234986612266, "grad_norm": 0.15923047065734863, "learning_rate": 0.002, "loss": 2.5662, "step": 195130 }, { "epoch": 0.38876227208976155, "grad_norm": 0.21253283321857452, "learning_rate": 0.002, "loss": 2.5611, "step": 195140 }, { "epoch": 0.3887821943134005, "grad_norm": 0.17579135298728943, "learning_rate": 0.002, "loss": 2.5586, "step": 195150 }, { "epoch": 0.3888021165370394, "grad_norm": 0.17078019678592682, "learning_rate": 0.002, "loss": 2.5648, "step": 195160 }, { "epoch": 0.38882203876067833, "grad_norm": 0.14885245263576508, "learning_rate": 0.002, "loss": 2.5579, "step": 195170 }, { "epoch": 0.3888419609843172, "grad_norm": 0.15825676918029785, "learning_rate": 0.002, "loss": 2.5758, "step": 195180 }, { "epoch": 0.3888618832079561, "grad_norm": 0.17827913165092468, "learning_rate": 0.002, "loss": 2.5598, "step": 195190 }, { "epoch": 0.38888180543159506, "grad_norm": 0.3431965410709381, "learning_rate": 0.002, "loss": 2.5656, "step": 195200 }, { "epoch": 0.38890172765523395, "grad_norm": 0.16087481379508972, "learning_rate": 0.002, "loss": 2.559, "step": 195210 }, { "epoch": 0.3889216498788729, "grad_norm": 0.13591940701007843, "learning_rate": 0.002, "loss": 2.5712, "step": 195220 }, { "epoch": 0.3889415721025118, "grad_norm": 0.15011383593082428, "learning_rate": 0.002, "loss": 2.5582, "step": 195230 }, { "epoch": 0.3889614943261507, "grad_norm": 0.15673395991325378, "learning_rate": 0.002, "loss": 2.5548, "step": 195240 }, { "epoch": 0.3889814165497896, "grad_norm": 0.17588196694850922, "learning_rate": 0.002, "loss": 2.5614, "step": 195250 }, { "epoch": 0.3890013387734285, "grad_norm": 0.1774500161409378, "learning_rate": 0.002, "loss": 2.5453, "step": 195260 }, { "epoch": 0.38902126099706746, "grad_norm": 0.1675121784210205, "learning_rate": 0.002, "loss": 2.5536, "step": 195270 }, { "epoch": 0.38904118322070635, "grad_norm": 0.2007264345884323, "learning_rate": 0.002, "loss": 2.5653, "step": 195280 }, { "epoch": 0.3890611054443453, "grad_norm": 0.15020795166492462, "learning_rate": 0.002, "loss": 2.5648, "step": 195290 }, { "epoch": 0.3890810276679842, "grad_norm": 0.18683259189128876, "learning_rate": 0.002, "loss": 2.5501, "step": 195300 }, { "epoch": 0.3891009498916231, "grad_norm": 0.16709324717521667, "learning_rate": 0.002, "loss": 2.5651, "step": 195310 }, { "epoch": 0.38912087211526203, "grad_norm": 0.16688048839569092, "learning_rate": 0.002, "loss": 2.5577, "step": 195320 }, { "epoch": 0.3891407943389009, "grad_norm": 0.16691359877586365, "learning_rate": 0.002, "loss": 2.5616, "step": 195330 }, { "epoch": 0.38916071656253987, "grad_norm": 0.19822731614112854, "learning_rate": 0.002, "loss": 2.5567, "step": 195340 }, { "epoch": 0.38918063878617876, "grad_norm": 0.1392950713634491, "learning_rate": 0.002, "loss": 2.5624, "step": 195350 }, { "epoch": 0.38920056100981765, "grad_norm": 0.14479704201221466, "learning_rate": 0.002, "loss": 2.5547, "step": 195360 }, { "epoch": 0.3892204832334566, "grad_norm": 0.16960863769054413, "learning_rate": 0.002, "loss": 2.5711, "step": 195370 }, { "epoch": 0.3892404054570955, "grad_norm": 0.17161116003990173, "learning_rate": 0.002, "loss": 2.5761, "step": 195380 }, { "epoch": 0.38926032768073443, "grad_norm": 0.14327774941921234, "learning_rate": 0.002, "loss": 2.5715, "step": 195390 }, { "epoch": 0.3892802499043733, "grad_norm": 0.2260434329509735, "learning_rate": 0.002, "loss": 2.5472, "step": 195400 }, { "epoch": 0.38930017212801227, "grad_norm": 0.16460095345973969, "learning_rate": 0.002, "loss": 2.5542, "step": 195410 }, { "epoch": 0.38932009435165116, "grad_norm": 0.1672971397638321, "learning_rate": 0.002, "loss": 2.5716, "step": 195420 }, { "epoch": 0.38934001657529005, "grad_norm": 0.1528043895959854, "learning_rate": 0.002, "loss": 2.5636, "step": 195430 }, { "epoch": 0.389359938798929, "grad_norm": 0.5129961967468262, "learning_rate": 0.002, "loss": 2.5676, "step": 195440 }, { "epoch": 0.3893798610225679, "grad_norm": 0.1658756583929062, "learning_rate": 0.002, "loss": 2.5829, "step": 195450 }, { "epoch": 0.38939978324620683, "grad_norm": 0.1339050531387329, "learning_rate": 0.002, "loss": 2.5603, "step": 195460 }, { "epoch": 0.3894197054698457, "grad_norm": 0.1731453388929367, "learning_rate": 0.002, "loss": 2.5567, "step": 195470 }, { "epoch": 0.3894396276934846, "grad_norm": 0.16697679460048676, "learning_rate": 0.002, "loss": 2.5592, "step": 195480 }, { "epoch": 0.38945954991712356, "grad_norm": 0.17107704281806946, "learning_rate": 0.002, "loss": 2.5481, "step": 195490 }, { "epoch": 0.38947947214076245, "grad_norm": 0.16198869049549103, "learning_rate": 0.002, "loss": 2.5649, "step": 195500 }, { "epoch": 0.3894993943644014, "grad_norm": 0.16021926701068878, "learning_rate": 0.002, "loss": 2.5593, "step": 195510 }, { "epoch": 0.3895193165880403, "grad_norm": 0.15925481915473938, "learning_rate": 0.002, "loss": 2.5653, "step": 195520 }, { "epoch": 0.3895392388116792, "grad_norm": 0.15092983841896057, "learning_rate": 0.002, "loss": 2.5702, "step": 195530 }, { "epoch": 0.3895591610353181, "grad_norm": 0.17347842454910278, "learning_rate": 0.002, "loss": 2.5439, "step": 195540 }, { "epoch": 0.389579083258957, "grad_norm": 0.173436239361763, "learning_rate": 0.002, "loss": 2.5654, "step": 195550 }, { "epoch": 0.38959900548259596, "grad_norm": 0.1673828661441803, "learning_rate": 0.002, "loss": 2.5694, "step": 195560 }, { "epoch": 0.38961892770623485, "grad_norm": 0.19124527275562286, "learning_rate": 0.002, "loss": 2.5497, "step": 195570 }, { "epoch": 0.3896388499298738, "grad_norm": 0.1421954482793808, "learning_rate": 0.002, "loss": 2.5707, "step": 195580 }, { "epoch": 0.3896587721535127, "grad_norm": 0.2129744589328766, "learning_rate": 0.002, "loss": 2.5716, "step": 195590 }, { "epoch": 0.3896786943771516, "grad_norm": 0.16797596216201782, "learning_rate": 0.002, "loss": 2.5572, "step": 195600 }, { "epoch": 0.3896986166007905, "grad_norm": 0.20939558744430542, "learning_rate": 0.002, "loss": 2.5712, "step": 195610 }, { "epoch": 0.3897185388244294, "grad_norm": 0.17500628530979156, "learning_rate": 0.002, "loss": 2.5654, "step": 195620 }, { "epoch": 0.38973846104806836, "grad_norm": 0.13840864598751068, "learning_rate": 0.002, "loss": 2.5555, "step": 195630 }, { "epoch": 0.38975838327170725, "grad_norm": 0.18507695198059082, "learning_rate": 0.002, "loss": 2.5604, "step": 195640 }, { "epoch": 0.38977830549534614, "grad_norm": 0.15216049551963806, "learning_rate": 0.002, "loss": 2.5672, "step": 195650 }, { "epoch": 0.3897982277189851, "grad_norm": 0.16749164462089539, "learning_rate": 0.002, "loss": 2.5588, "step": 195660 }, { "epoch": 0.389818149942624, "grad_norm": 0.1934240460395813, "learning_rate": 0.002, "loss": 2.565, "step": 195670 }, { "epoch": 0.38983807216626293, "grad_norm": 0.160059854388237, "learning_rate": 0.002, "loss": 2.5663, "step": 195680 }, { "epoch": 0.3898579943899018, "grad_norm": 0.21044816076755524, "learning_rate": 0.002, "loss": 2.5582, "step": 195690 }, { "epoch": 0.3898779166135407, "grad_norm": 0.15253229439258575, "learning_rate": 0.002, "loss": 2.5698, "step": 195700 }, { "epoch": 0.38989783883717966, "grad_norm": 0.17432670295238495, "learning_rate": 0.002, "loss": 2.5617, "step": 195710 }, { "epoch": 0.38991776106081855, "grad_norm": 0.1986628770828247, "learning_rate": 0.002, "loss": 2.5492, "step": 195720 }, { "epoch": 0.3899376832844575, "grad_norm": 0.16894197463989258, "learning_rate": 0.002, "loss": 2.5592, "step": 195730 }, { "epoch": 0.3899576055080964, "grad_norm": 0.19444015622138977, "learning_rate": 0.002, "loss": 2.5537, "step": 195740 }, { "epoch": 0.38997752773173533, "grad_norm": 0.16487927734851837, "learning_rate": 0.002, "loss": 2.5665, "step": 195750 }, { "epoch": 0.3899974499553742, "grad_norm": 0.19100956618785858, "learning_rate": 0.002, "loss": 2.5628, "step": 195760 }, { "epoch": 0.3900173721790131, "grad_norm": 0.19083233177661896, "learning_rate": 0.002, "loss": 2.5623, "step": 195770 }, { "epoch": 0.39003729440265206, "grad_norm": 0.2077787220478058, "learning_rate": 0.002, "loss": 2.5871, "step": 195780 }, { "epoch": 0.39005721662629095, "grad_norm": 0.18952779471874237, "learning_rate": 0.002, "loss": 2.5585, "step": 195790 }, { "epoch": 0.3900771388499299, "grad_norm": 0.1610690951347351, "learning_rate": 0.002, "loss": 2.5626, "step": 195800 }, { "epoch": 0.3900970610735688, "grad_norm": 0.15106800198554993, "learning_rate": 0.002, "loss": 2.564, "step": 195810 }, { "epoch": 0.3901169832972077, "grad_norm": 0.19084769487380981, "learning_rate": 0.002, "loss": 2.5635, "step": 195820 }, { "epoch": 0.3901369055208466, "grad_norm": 0.1945209801197052, "learning_rate": 0.002, "loss": 2.5671, "step": 195830 }, { "epoch": 0.3901568277444855, "grad_norm": 0.16797293722629547, "learning_rate": 0.002, "loss": 2.5821, "step": 195840 }, { "epoch": 0.39017674996812446, "grad_norm": 0.16429626941680908, "learning_rate": 0.002, "loss": 2.5576, "step": 195850 }, { "epoch": 0.39019667219176335, "grad_norm": 0.18471983075141907, "learning_rate": 0.002, "loss": 2.5563, "step": 195860 }, { "epoch": 0.3902165944154023, "grad_norm": 0.16903331875801086, "learning_rate": 0.002, "loss": 2.5565, "step": 195870 }, { "epoch": 0.3902365166390412, "grad_norm": 0.1702275425195694, "learning_rate": 0.002, "loss": 2.5562, "step": 195880 }, { "epoch": 0.3902564388626801, "grad_norm": 0.17018245160579681, "learning_rate": 0.002, "loss": 2.5566, "step": 195890 }, { "epoch": 0.390276361086319, "grad_norm": 0.14686493575572968, "learning_rate": 0.002, "loss": 2.5754, "step": 195900 }, { "epoch": 0.3902962833099579, "grad_norm": 0.18161419034004211, "learning_rate": 0.002, "loss": 2.5769, "step": 195910 }, { "epoch": 0.39031620553359686, "grad_norm": 0.2015472799539566, "learning_rate": 0.002, "loss": 2.5643, "step": 195920 }, { "epoch": 0.39033612775723575, "grad_norm": 0.16433647274971008, "learning_rate": 0.002, "loss": 2.5869, "step": 195930 }, { "epoch": 0.39035604998087464, "grad_norm": 0.16632504761219025, "learning_rate": 0.002, "loss": 2.5681, "step": 195940 }, { "epoch": 0.3903759722045136, "grad_norm": 0.16867908835411072, "learning_rate": 0.002, "loss": 2.5614, "step": 195950 }, { "epoch": 0.3903958944281525, "grad_norm": 0.16910865902900696, "learning_rate": 0.002, "loss": 2.5653, "step": 195960 }, { "epoch": 0.3904158166517914, "grad_norm": 0.16454190015792847, "learning_rate": 0.002, "loss": 2.5497, "step": 195970 }, { "epoch": 0.3904357388754303, "grad_norm": 0.2231585532426834, "learning_rate": 0.002, "loss": 2.5456, "step": 195980 }, { "epoch": 0.3904556610990692, "grad_norm": 0.1614319235086441, "learning_rate": 0.002, "loss": 2.5458, "step": 195990 }, { "epoch": 0.39047558332270815, "grad_norm": 0.18292850255966187, "learning_rate": 0.002, "loss": 2.5561, "step": 196000 }, { "epoch": 0.39049550554634704, "grad_norm": 0.1513662487268448, "learning_rate": 0.002, "loss": 2.5525, "step": 196010 }, { "epoch": 0.390515427769986, "grad_norm": 0.17753615975379944, "learning_rate": 0.002, "loss": 2.5547, "step": 196020 }, { "epoch": 0.3905353499936249, "grad_norm": 0.15121281147003174, "learning_rate": 0.002, "loss": 2.5491, "step": 196030 }, { "epoch": 0.3905552722172638, "grad_norm": 0.14274099469184875, "learning_rate": 0.002, "loss": 2.562, "step": 196040 }, { "epoch": 0.3905751944409027, "grad_norm": 0.16369618475437164, "learning_rate": 0.002, "loss": 2.5652, "step": 196050 }, { "epoch": 0.3905951166645416, "grad_norm": 0.1549137383699417, "learning_rate": 0.002, "loss": 2.5508, "step": 196060 }, { "epoch": 0.39061503888818055, "grad_norm": 0.16823633015155792, "learning_rate": 0.002, "loss": 2.5525, "step": 196070 }, { "epoch": 0.39063496111181945, "grad_norm": 0.16285286843776703, "learning_rate": 0.002, "loss": 2.5723, "step": 196080 }, { "epoch": 0.3906548833354584, "grad_norm": 0.17891742289066315, "learning_rate": 0.002, "loss": 2.5587, "step": 196090 }, { "epoch": 0.3906748055590973, "grad_norm": 0.18978793919086456, "learning_rate": 0.002, "loss": 2.5786, "step": 196100 }, { "epoch": 0.3906947277827362, "grad_norm": 0.18278002738952637, "learning_rate": 0.002, "loss": 2.554, "step": 196110 }, { "epoch": 0.3907146500063751, "grad_norm": 0.17215166985988617, "learning_rate": 0.002, "loss": 2.5399, "step": 196120 }, { "epoch": 0.390734572230014, "grad_norm": 0.17682111263275146, "learning_rate": 0.002, "loss": 2.5507, "step": 196130 }, { "epoch": 0.39075449445365296, "grad_norm": 0.1603621542453766, "learning_rate": 0.002, "loss": 2.5781, "step": 196140 }, { "epoch": 0.39077441667729185, "grad_norm": 0.16299547255039215, "learning_rate": 0.002, "loss": 2.5527, "step": 196150 }, { "epoch": 0.3907943389009308, "grad_norm": 0.16175295412540436, "learning_rate": 0.002, "loss": 2.5434, "step": 196160 }, { "epoch": 0.3908142611245697, "grad_norm": 0.166988343000412, "learning_rate": 0.002, "loss": 2.5391, "step": 196170 }, { "epoch": 0.3908341833482086, "grad_norm": 0.14704594016075134, "learning_rate": 0.002, "loss": 2.5548, "step": 196180 }, { "epoch": 0.3908541055718475, "grad_norm": 0.19679205119609833, "learning_rate": 0.002, "loss": 2.5615, "step": 196190 }, { "epoch": 0.3908740277954864, "grad_norm": 0.16886769235134125, "learning_rate": 0.002, "loss": 2.5473, "step": 196200 }, { "epoch": 0.39089395001912536, "grad_norm": 0.17825306951999664, "learning_rate": 0.002, "loss": 2.5636, "step": 196210 }, { "epoch": 0.39091387224276425, "grad_norm": 0.18439637124538422, "learning_rate": 0.002, "loss": 2.5659, "step": 196220 }, { "epoch": 0.39093379446640314, "grad_norm": 0.1679782271385193, "learning_rate": 0.002, "loss": 2.5497, "step": 196230 }, { "epoch": 0.3909537166900421, "grad_norm": 0.19533132016658783, "learning_rate": 0.002, "loss": 2.5517, "step": 196240 }, { "epoch": 0.390973638913681, "grad_norm": 0.181807741522789, "learning_rate": 0.002, "loss": 2.5629, "step": 196250 }, { "epoch": 0.3909935611373199, "grad_norm": 0.197254940867424, "learning_rate": 0.002, "loss": 2.5459, "step": 196260 }, { "epoch": 0.3910134833609588, "grad_norm": 0.19085897505283356, "learning_rate": 0.002, "loss": 2.5504, "step": 196270 }, { "epoch": 0.3910334055845977, "grad_norm": 0.15105830132961273, "learning_rate": 0.002, "loss": 2.5703, "step": 196280 }, { "epoch": 0.39105332780823665, "grad_norm": 0.147566020488739, "learning_rate": 0.002, "loss": 2.5527, "step": 196290 }, { "epoch": 0.39107325003187554, "grad_norm": 0.17183445394039154, "learning_rate": 0.002, "loss": 2.5648, "step": 196300 }, { "epoch": 0.3910931722555145, "grad_norm": 0.21447661519050598, "learning_rate": 0.002, "loss": 2.5502, "step": 196310 }, { "epoch": 0.3911130944791534, "grad_norm": 0.1394004076719284, "learning_rate": 0.002, "loss": 2.5564, "step": 196320 }, { "epoch": 0.3911330167027923, "grad_norm": 0.1799359917640686, "learning_rate": 0.002, "loss": 2.5614, "step": 196330 }, { "epoch": 0.3911529389264312, "grad_norm": 0.15952810645103455, "learning_rate": 0.002, "loss": 2.5598, "step": 196340 }, { "epoch": 0.3911728611500701, "grad_norm": 0.15809611976146698, "learning_rate": 0.002, "loss": 2.5541, "step": 196350 }, { "epoch": 0.39119278337370905, "grad_norm": 0.21444950997829437, "learning_rate": 0.002, "loss": 2.559, "step": 196360 }, { "epoch": 0.39121270559734794, "grad_norm": 0.16091813147068024, "learning_rate": 0.002, "loss": 2.5643, "step": 196370 }, { "epoch": 0.3912326278209869, "grad_norm": 0.2058117389678955, "learning_rate": 0.002, "loss": 2.5673, "step": 196380 }, { "epoch": 0.3912525500446258, "grad_norm": 0.15714475512504578, "learning_rate": 0.002, "loss": 2.5623, "step": 196390 }, { "epoch": 0.39127247226826467, "grad_norm": 0.5160945057868958, "learning_rate": 0.002, "loss": 2.5551, "step": 196400 }, { "epoch": 0.3912923944919036, "grad_norm": 0.17735227942466736, "learning_rate": 0.002, "loss": 2.5723, "step": 196410 }, { "epoch": 0.3913123167155425, "grad_norm": 0.17638005316257477, "learning_rate": 0.002, "loss": 2.5693, "step": 196420 }, { "epoch": 0.39133223893918145, "grad_norm": 0.18127037584781647, "learning_rate": 0.002, "loss": 2.5616, "step": 196430 }, { "epoch": 0.39135216116282034, "grad_norm": 0.15255308151245117, "learning_rate": 0.002, "loss": 2.5534, "step": 196440 }, { "epoch": 0.3913720833864593, "grad_norm": 0.1678781807422638, "learning_rate": 0.002, "loss": 2.5595, "step": 196450 }, { "epoch": 0.3913920056100982, "grad_norm": 0.18867729604244232, "learning_rate": 0.002, "loss": 2.573, "step": 196460 }, { "epoch": 0.39141192783373707, "grad_norm": 0.15545842051506042, "learning_rate": 0.002, "loss": 2.5569, "step": 196470 }, { "epoch": 0.391431850057376, "grad_norm": 0.2330562323331833, "learning_rate": 0.002, "loss": 2.5578, "step": 196480 }, { "epoch": 0.3914517722810149, "grad_norm": 0.15957655012607574, "learning_rate": 0.002, "loss": 2.5564, "step": 196490 }, { "epoch": 0.39147169450465386, "grad_norm": 0.2176745980978012, "learning_rate": 0.002, "loss": 2.5539, "step": 196500 }, { "epoch": 0.39149161672829275, "grad_norm": 0.14850546419620514, "learning_rate": 0.002, "loss": 2.5453, "step": 196510 }, { "epoch": 0.39151153895193164, "grad_norm": 0.15722526609897614, "learning_rate": 0.002, "loss": 2.5534, "step": 196520 }, { "epoch": 0.3915314611755706, "grad_norm": 0.17303426563739777, "learning_rate": 0.002, "loss": 2.554, "step": 196530 }, { "epoch": 0.3915513833992095, "grad_norm": 0.1603371500968933, "learning_rate": 0.002, "loss": 2.5683, "step": 196540 }, { "epoch": 0.3915713056228484, "grad_norm": 0.17017078399658203, "learning_rate": 0.002, "loss": 2.5507, "step": 196550 }, { "epoch": 0.3915912278464873, "grad_norm": 0.14460337162017822, "learning_rate": 0.002, "loss": 2.5836, "step": 196560 }, { "epoch": 0.3916111500701262, "grad_norm": 0.17352181673049927, "learning_rate": 0.002, "loss": 2.5549, "step": 196570 }, { "epoch": 0.39163107229376515, "grad_norm": 0.19536325335502625, "learning_rate": 0.002, "loss": 2.557, "step": 196580 }, { "epoch": 0.39165099451740404, "grad_norm": 0.15876206755638123, "learning_rate": 0.002, "loss": 2.5496, "step": 196590 }, { "epoch": 0.391670916741043, "grad_norm": 0.18438200652599335, "learning_rate": 0.002, "loss": 2.5656, "step": 196600 }, { "epoch": 0.3916908389646819, "grad_norm": 0.3076505661010742, "learning_rate": 0.002, "loss": 2.551, "step": 196610 }, { "epoch": 0.3917107611883208, "grad_norm": 0.16812579333782196, "learning_rate": 0.002, "loss": 2.5467, "step": 196620 }, { "epoch": 0.3917306834119597, "grad_norm": 0.1609705686569214, "learning_rate": 0.002, "loss": 2.5473, "step": 196630 }, { "epoch": 0.3917506056355986, "grad_norm": 0.18297146260738373, "learning_rate": 0.002, "loss": 2.5574, "step": 196640 }, { "epoch": 0.39177052785923755, "grad_norm": 0.15835019946098328, "learning_rate": 0.002, "loss": 2.5564, "step": 196650 }, { "epoch": 0.39179045008287644, "grad_norm": 0.1780354231595993, "learning_rate": 0.002, "loss": 2.5625, "step": 196660 }, { "epoch": 0.3918103723065154, "grad_norm": 0.1748107224702835, "learning_rate": 0.002, "loss": 2.5524, "step": 196670 }, { "epoch": 0.3918302945301543, "grad_norm": 0.18817053735256195, "learning_rate": 0.002, "loss": 2.5632, "step": 196680 }, { "epoch": 0.39185021675379317, "grad_norm": 0.17213621735572815, "learning_rate": 0.002, "loss": 2.5701, "step": 196690 }, { "epoch": 0.3918701389774321, "grad_norm": 0.1652209460735321, "learning_rate": 0.002, "loss": 2.5648, "step": 196700 }, { "epoch": 0.391890061201071, "grad_norm": 0.16984841227531433, "learning_rate": 0.002, "loss": 2.5444, "step": 196710 }, { "epoch": 0.39190998342470995, "grad_norm": 0.21032942831516266, "learning_rate": 0.002, "loss": 2.5588, "step": 196720 }, { "epoch": 0.39192990564834884, "grad_norm": 0.15960446000099182, "learning_rate": 0.002, "loss": 2.5669, "step": 196730 }, { "epoch": 0.39194982787198773, "grad_norm": 0.16627585887908936, "learning_rate": 0.002, "loss": 2.5629, "step": 196740 }, { "epoch": 0.3919697500956267, "grad_norm": 0.1668708473443985, "learning_rate": 0.002, "loss": 2.5706, "step": 196750 }, { "epoch": 0.39198967231926557, "grad_norm": 0.17291411757469177, "learning_rate": 0.002, "loss": 2.5602, "step": 196760 }, { "epoch": 0.3920095945429045, "grad_norm": 0.18252071738243103, "learning_rate": 0.002, "loss": 2.5627, "step": 196770 }, { "epoch": 0.3920295167665434, "grad_norm": 0.1639980971813202, "learning_rate": 0.002, "loss": 2.5507, "step": 196780 }, { "epoch": 0.39204943899018235, "grad_norm": 0.17624342441558838, "learning_rate": 0.002, "loss": 2.5382, "step": 196790 }, { "epoch": 0.39206936121382124, "grad_norm": 0.18326441943645477, "learning_rate": 0.002, "loss": 2.5631, "step": 196800 }, { "epoch": 0.39208928343746013, "grad_norm": 0.18903209269046783, "learning_rate": 0.002, "loss": 2.5641, "step": 196810 }, { "epoch": 0.3921092056610991, "grad_norm": 0.14977851510047913, "learning_rate": 0.002, "loss": 2.5618, "step": 196820 }, { "epoch": 0.39212912788473797, "grad_norm": 0.15258458256721497, "learning_rate": 0.002, "loss": 2.5596, "step": 196830 }, { "epoch": 0.3921490501083769, "grad_norm": 0.14278735220432281, "learning_rate": 0.002, "loss": 2.5603, "step": 196840 }, { "epoch": 0.3921689723320158, "grad_norm": 0.2193489819765091, "learning_rate": 0.002, "loss": 2.5648, "step": 196850 }, { "epoch": 0.3921888945556547, "grad_norm": 0.17262090742588043, "learning_rate": 0.002, "loss": 2.5679, "step": 196860 }, { "epoch": 0.39220881677929365, "grad_norm": 0.1468927264213562, "learning_rate": 0.002, "loss": 2.5579, "step": 196870 }, { "epoch": 0.39222873900293254, "grad_norm": 0.20550300180912018, "learning_rate": 0.002, "loss": 2.5476, "step": 196880 }, { "epoch": 0.3922486612265715, "grad_norm": 0.21367602050304413, "learning_rate": 0.002, "loss": 2.5589, "step": 196890 }, { "epoch": 0.3922685834502104, "grad_norm": 0.19683955609798431, "learning_rate": 0.002, "loss": 2.5563, "step": 196900 }, { "epoch": 0.3922885056738493, "grad_norm": 0.1454572081565857, "learning_rate": 0.002, "loss": 2.5485, "step": 196910 }, { "epoch": 0.3923084278974882, "grad_norm": 0.19187875092029572, "learning_rate": 0.002, "loss": 2.5633, "step": 196920 }, { "epoch": 0.3923283501211271, "grad_norm": 0.19189929962158203, "learning_rate": 0.002, "loss": 2.5485, "step": 196930 }, { "epoch": 0.39234827234476605, "grad_norm": 0.1638582944869995, "learning_rate": 0.002, "loss": 2.559, "step": 196940 }, { "epoch": 0.39236819456840494, "grad_norm": 0.17975200712680817, "learning_rate": 0.002, "loss": 2.5561, "step": 196950 }, { "epoch": 0.3923881167920439, "grad_norm": 0.18181754648685455, "learning_rate": 0.002, "loss": 2.5748, "step": 196960 }, { "epoch": 0.3924080390156828, "grad_norm": 0.17583726346492767, "learning_rate": 0.002, "loss": 2.5624, "step": 196970 }, { "epoch": 0.39242796123932167, "grad_norm": 0.15199553966522217, "learning_rate": 0.002, "loss": 2.5701, "step": 196980 }, { "epoch": 0.3924478834629606, "grad_norm": 0.17660483717918396, "learning_rate": 0.002, "loss": 2.5526, "step": 196990 }, { "epoch": 0.3924678056865995, "grad_norm": 0.24860486388206482, "learning_rate": 0.002, "loss": 2.569, "step": 197000 }, { "epoch": 0.39248772791023845, "grad_norm": 0.15039274096488953, "learning_rate": 0.002, "loss": 2.5519, "step": 197010 }, { "epoch": 0.39250765013387734, "grad_norm": 0.1524086445569992, "learning_rate": 0.002, "loss": 2.5606, "step": 197020 }, { "epoch": 0.39252757235751623, "grad_norm": 0.17029984295368195, "learning_rate": 0.002, "loss": 2.5665, "step": 197030 }, { "epoch": 0.3925474945811552, "grad_norm": 0.16129669547080994, "learning_rate": 0.002, "loss": 2.5632, "step": 197040 }, { "epoch": 0.39256741680479407, "grad_norm": 0.16565003991127014, "learning_rate": 0.002, "loss": 2.5598, "step": 197050 }, { "epoch": 0.392587339028433, "grad_norm": 0.24431174993515015, "learning_rate": 0.002, "loss": 2.5565, "step": 197060 }, { "epoch": 0.3926072612520719, "grad_norm": 0.18560382723808289, "learning_rate": 0.002, "loss": 2.5399, "step": 197070 }, { "epoch": 0.39262718347571085, "grad_norm": 0.16689881682395935, "learning_rate": 0.002, "loss": 2.5621, "step": 197080 }, { "epoch": 0.39264710569934974, "grad_norm": 0.15127940475940704, "learning_rate": 0.002, "loss": 2.5506, "step": 197090 }, { "epoch": 0.39266702792298863, "grad_norm": 0.21280643343925476, "learning_rate": 0.002, "loss": 2.5589, "step": 197100 }, { "epoch": 0.3926869501466276, "grad_norm": 0.17115136981010437, "learning_rate": 0.002, "loss": 2.5384, "step": 197110 }, { "epoch": 0.39270687237026647, "grad_norm": 0.14902466535568237, "learning_rate": 0.002, "loss": 2.5507, "step": 197120 }, { "epoch": 0.3927267945939054, "grad_norm": 0.15875378251075745, "learning_rate": 0.002, "loss": 2.5628, "step": 197130 }, { "epoch": 0.3927467168175443, "grad_norm": 0.16639980673789978, "learning_rate": 0.002, "loss": 2.56, "step": 197140 }, { "epoch": 0.3927666390411832, "grad_norm": 0.16362370550632477, "learning_rate": 0.002, "loss": 2.5564, "step": 197150 }, { "epoch": 0.39278656126482214, "grad_norm": 0.16204676032066345, "learning_rate": 0.002, "loss": 2.5507, "step": 197160 }, { "epoch": 0.39280648348846103, "grad_norm": 0.16075816750526428, "learning_rate": 0.002, "loss": 2.5499, "step": 197170 }, { "epoch": 0.3928264057121, "grad_norm": 0.18768443167209625, "learning_rate": 0.002, "loss": 2.5646, "step": 197180 }, { "epoch": 0.39284632793573887, "grad_norm": 0.1521918624639511, "learning_rate": 0.002, "loss": 2.5686, "step": 197190 }, { "epoch": 0.3928662501593778, "grad_norm": 0.1398209184408188, "learning_rate": 0.002, "loss": 2.555, "step": 197200 }, { "epoch": 0.3928861723830167, "grad_norm": 0.1668758988380432, "learning_rate": 0.002, "loss": 2.5601, "step": 197210 }, { "epoch": 0.3929060946066556, "grad_norm": 0.1330733597278595, "learning_rate": 0.002, "loss": 2.5627, "step": 197220 }, { "epoch": 0.39292601683029454, "grad_norm": 0.15169678628444672, "learning_rate": 0.002, "loss": 2.5627, "step": 197230 }, { "epoch": 0.39294593905393344, "grad_norm": 0.19577865302562714, "learning_rate": 0.002, "loss": 2.5778, "step": 197240 }, { "epoch": 0.3929658612775724, "grad_norm": 0.16971921920776367, "learning_rate": 0.002, "loss": 2.5598, "step": 197250 }, { "epoch": 0.39298578350121127, "grad_norm": 0.16360223293304443, "learning_rate": 0.002, "loss": 2.5459, "step": 197260 }, { "epoch": 0.39300570572485016, "grad_norm": 0.16148412227630615, "learning_rate": 0.002, "loss": 2.5517, "step": 197270 }, { "epoch": 0.3930256279484891, "grad_norm": 0.18162499368190765, "learning_rate": 0.002, "loss": 2.57, "step": 197280 }, { "epoch": 0.393045550172128, "grad_norm": 0.16264738142490387, "learning_rate": 0.002, "loss": 2.5609, "step": 197290 }, { "epoch": 0.39306547239576695, "grad_norm": 0.13683387637138367, "learning_rate": 0.002, "loss": 2.5538, "step": 197300 }, { "epoch": 0.39308539461940584, "grad_norm": 0.48104292154312134, "learning_rate": 0.002, "loss": 2.5662, "step": 197310 }, { "epoch": 0.3931053168430447, "grad_norm": 0.177927166223526, "learning_rate": 0.002, "loss": 2.5689, "step": 197320 }, { "epoch": 0.3931252390666837, "grad_norm": 0.15601381659507751, "learning_rate": 0.002, "loss": 2.5504, "step": 197330 }, { "epoch": 0.39314516129032256, "grad_norm": 0.18823057413101196, "learning_rate": 0.002, "loss": 2.5554, "step": 197340 }, { "epoch": 0.3931650835139615, "grad_norm": 0.1890740543603897, "learning_rate": 0.002, "loss": 2.5542, "step": 197350 }, { "epoch": 0.3931850057376004, "grad_norm": 0.23068802058696747, "learning_rate": 0.002, "loss": 2.5569, "step": 197360 }, { "epoch": 0.39320492796123935, "grad_norm": 0.17804890871047974, "learning_rate": 0.002, "loss": 2.5769, "step": 197370 }, { "epoch": 0.39322485018487824, "grad_norm": 0.1640060544013977, "learning_rate": 0.002, "loss": 2.5535, "step": 197380 }, { "epoch": 0.39324477240851713, "grad_norm": 0.15474867820739746, "learning_rate": 0.002, "loss": 2.5559, "step": 197390 }, { "epoch": 0.3932646946321561, "grad_norm": 0.2244194597005844, "learning_rate": 0.002, "loss": 2.5555, "step": 197400 }, { "epoch": 0.39328461685579497, "grad_norm": 0.16189119219779968, "learning_rate": 0.002, "loss": 2.5528, "step": 197410 }, { "epoch": 0.3933045390794339, "grad_norm": 0.20676936209201813, "learning_rate": 0.002, "loss": 2.5701, "step": 197420 }, { "epoch": 0.3933244613030728, "grad_norm": 0.1621081382036209, "learning_rate": 0.002, "loss": 2.5507, "step": 197430 }, { "epoch": 0.3933443835267117, "grad_norm": 0.1767876297235489, "learning_rate": 0.002, "loss": 2.567, "step": 197440 }, { "epoch": 0.39336430575035064, "grad_norm": 0.17155961692333221, "learning_rate": 0.002, "loss": 2.5596, "step": 197450 }, { "epoch": 0.39338422797398953, "grad_norm": 0.15606072545051575, "learning_rate": 0.002, "loss": 2.5566, "step": 197460 }, { "epoch": 0.3934041501976285, "grad_norm": 0.17530658841133118, "learning_rate": 0.002, "loss": 2.5559, "step": 197470 }, { "epoch": 0.39342407242126737, "grad_norm": 0.1732325404882431, "learning_rate": 0.002, "loss": 2.5446, "step": 197480 }, { "epoch": 0.39344399464490626, "grad_norm": 0.1641794592142105, "learning_rate": 0.002, "loss": 2.5744, "step": 197490 }, { "epoch": 0.3934639168685452, "grad_norm": 0.16897083818912506, "learning_rate": 0.002, "loss": 2.563, "step": 197500 }, { "epoch": 0.3934838390921841, "grad_norm": 0.19811874628067017, "learning_rate": 0.002, "loss": 2.556, "step": 197510 }, { "epoch": 0.39350376131582304, "grad_norm": 0.1748919039964676, "learning_rate": 0.002, "loss": 2.5745, "step": 197520 }, { "epoch": 0.39352368353946193, "grad_norm": 0.19677351415157318, "learning_rate": 0.002, "loss": 2.5571, "step": 197530 }, { "epoch": 0.3935436057631009, "grad_norm": 0.1610693633556366, "learning_rate": 0.002, "loss": 2.5676, "step": 197540 }, { "epoch": 0.39356352798673977, "grad_norm": 0.15878695249557495, "learning_rate": 0.002, "loss": 2.564, "step": 197550 }, { "epoch": 0.39358345021037866, "grad_norm": 0.1803366243839264, "learning_rate": 0.002, "loss": 2.561, "step": 197560 }, { "epoch": 0.3936033724340176, "grad_norm": 0.1636819839477539, "learning_rate": 0.002, "loss": 2.5439, "step": 197570 }, { "epoch": 0.3936232946576565, "grad_norm": 0.16183020174503326, "learning_rate": 0.002, "loss": 2.5692, "step": 197580 }, { "epoch": 0.39364321688129544, "grad_norm": 0.17090961337089539, "learning_rate": 0.002, "loss": 2.5637, "step": 197590 }, { "epoch": 0.39366313910493433, "grad_norm": 0.17698334157466888, "learning_rate": 0.002, "loss": 2.5585, "step": 197600 }, { "epoch": 0.3936830613285732, "grad_norm": 0.1838841736316681, "learning_rate": 0.002, "loss": 2.5516, "step": 197610 }, { "epoch": 0.39370298355221217, "grad_norm": 0.18689249455928802, "learning_rate": 0.002, "loss": 2.5615, "step": 197620 }, { "epoch": 0.39372290577585106, "grad_norm": 0.15071983635425568, "learning_rate": 0.002, "loss": 2.5628, "step": 197630 }, { "epoch": 0.39374282799949, "grad_norm": 0.14495843648910522, "learning_rate": 0.002, "loss": 2.5587, "step": 197640 }, { "epoch": 0.3937627502231289, "grad_norm": 0.18263839185237885, "learning_rate": 0.002, "loss": 2.5612, "step": 197650 }, { "epoch": 0.39378267244676785, "grad_norm": 0.14267097413539886, "learning_rate": 0.002, "loss": 2.5642, "step": 197660 }, { "epoch": 0.39380259467040674, "grad_norm": 0.16054387390613556, "learning_rate": 0.002, "loss": 2.5612, "step": 197670 }, { "epoch": 0.3938225168940456, "grad_norm": 0.14593623578548431, "learning_rate": 0.002, "loss": 2.576, "step": 197680 }, { "epoch": 0.3938424391176846, "grad_norm": 0.17937426269054413, "learning_rate": 0.002, "loss": 2.5753, "step": 197690 }, { "epoch": 0.39386236134132346, "grad_norm": 0.20959414541721344, "learning_rate": 0.002, "loss": 2.5541, "step": 197700 }, { "epoch": 0.3938822835649624, "grad_norm": 0.1829570233821869, "learning_rate": 0.002, "loss": 2.5505, "step": 197710 }, { "epoch": 0.3939022057886013, "grad_norm": 0.14443105459213257, "learning_rate": 0.002, "loss": 2.5821, "step": 197720 }, { "epoch": 0.3939221280122402, "grad_norm": 0.15665303170681, "learning_rate": 0.002, "loss": 2.5587, "step": 197730 }, { "epoch": 0.39394205023587914, "grad_norm": 0.5892079472541809, "learning_rate": 0.002, "loss": 2.5526, "step": 197740 }, { "epoch": 0.39396197245951803, "grad_norm": 0.20545998215675354, "learning_rate": 0.002, "loss": 2.5752, "step": 197750 }, { "epoch": 0.393981894683157, "grad_norm": 0.1418054699897766, "learning_rate": 0.002, "loss": 2.5592, "step": 197760 }, { "epoch": 0.39400181690679587, "grad_norm": 0.15907183289527893, "learning_rate": 0.002, "loss": 2.5663, "step": 197770 }, { "epoch": 0.39402173913043476, "grad_norm": 0.20743510127067566, "learning_rate": 0.002, "loss": 2.5363, "step": 197780 }, { "epoch": 0.3940416613540737, "grad_norm": 0.15540972352027893, "learning_rate": 0.002, "loss": 2.5604, "step": 197790 }, { "epoch": 0.3940615835777126, "grad_norm": 0.1594126671552658, "learning_rate": 0.002, "loss": 2.5578, "step": 197800 }, { "epoch": 0.39408150580135154, "grad_norm": 0.21250741183757782, "learning_rate": 0.002, "loss": 2.5461, "step": 197810 }, { "epoch": 0.39410142802499043, "grad_norm": 0.16922758519649506, "learning_rate": 0.002, "loss": 2.5577, "step": 197820 }, { "epoch": 0.3941213502486294, "grad_norm": 0.14898943901062012, "learning_rate": 0.002, "loss": 2.5696, "step": 197830 }, { "epoch": 0.39414127247226827, "grad_norm": 0.2076539397239685, "learning_rate": 0.002, "loss": 2.5658, "step": 197840 }, { "epoch": 0.39416119469590716, "grad_norm": 0.19509640336036682, "learning_rate": 0.002, "loss": 2.5615, "step": 197850 }, { "epoch": 0.3941811169195461, "grad_norm": 0.14925609529018402, "learning_rate": 0.002, "loss": 2.5505, "step": 197860 }, { "epoch": 0.394201039143185, "grad_norm": 0.1405082792043686, "learning_rate": 0.002, "loss": 2.5547, "step": 197870 }, { "epoch": 0.39422096136682394, "grad_norm": 0.24331238865852356, "learning_rate": 0.002, "loss": 2.5647, "step": 197880 }, { "epoch": 0.39424088359046283, "grad_norm": 0.15370145440101624, "learning_rate": 0.002, "loss": 2.5718, "step": 197890 }, { "epoch": 0.3942608058141017, "grad_norm": 0.14239105582237244, "learning_rate": 0.002, "loss": 2.5601, "step": 197900 }, { "epoch": 0.39428072803774067, "grad_norm": 0.15806274116039276, "learning_rate": 0.002, "loss": 2.5559, "step": 197910 }, { "epoch": 0.39430065026137956, "grad_norm": 0.15757720172405243, "learning_rate": 0.002, "loss": 2.5414, "step": 197920 }, { "epoch": 0.3943205724850185, "grad_norm": 0.1636834442615509, "learning_rate": 0.002, "loss": 2.563, "step": 197930 }, { "epoch": 0.3943404947086574, "grad_norm": 0.15792512893676758, "learning_rate": 0.002, "loss": 2.5607, "step": 197940 }, { "epoch": 0.39436041693229634, "grad_norm": 0.1568213552236557, "learning_rate": 0.002, "loss": 2.5659, "step": 197950 }, { "epoch": 0.39438033915593523, "grad_norm": 0.19282874464988708, "learning_rate": 0.002, "loss": 2.5756, "step": 197960 }, { "epoch": 0.3944002613795741, "grad_norm": 0.14840514957904816, "learning_rate": 0.002, "loss": 2.5348, "step": 197970 }, { "epoch": 0.39442018360321307, "grad_norm": 0.1455630511045456, "learning_rate": 0.002, "loss": 2.568, "step": 197980 }, { "epoch": 0.39444010582685196, "grad_norm": 0.18411658704280853, "learning_rate": 0.002, "loss": 2.5655, "step": 197990 }, { "epoch": 0.3944600280504909, "grad_norm": 0.1583726853132248, "learning_rate": 0.002, "loss": 2.5487, "step": 198000 }, { "epoch": 0.3944799502741298, "grad_norm": 0.15111301839351654, "learning_rate": 0.002, "loss": 2.5485, "step": 198010 }, { "epoch": 0.3944998724977687, "grad_norm": 0.1571592390537262, "learning_rate": 0.002, "loss": 2.5596, "step": 198020 }, { "epoch": 0.39451979472140764, "grad_norm": 0.1679225116968155, "learning_rate": 0.002, "loss": 2.5621, "step": 198030 }, { "epoch": 0.3945397169450465, "grad_norm": 0.18630243837833405, "learning_rate": 0.002, "loss": 2.5635, "step": 198040 }, { "epoch": 0.39455963916868547, "grad_norm": 0.16363883018493652, "learning_rate": 0.002, "loss": 2.5672, "step": 198050 }, { "epoch": 0.39457956139232436, "grad_norm": 0.15961094200611115, "learning_rate": 0.002, "loss": 2.5704, "step": 198060 }, { "epoch": 0.39459948361596325, "grad_norm": 0.15841594338417053, "learning_rate": 0.002, "loss": 2.5563, "step": 198070 }, { "epoch": 0.3946194058396022, "grad_norm": 0.1579066514968872, "learning_rate": 0.002, "loss": 2.5633, "step": 198080 }, { "epoch": 0.3946393280632411, "grad_norm": 0.29005929827690125, "learning_rate": 0.002, "loss": 2.5342, "step": 198090 }, { "epoch": 0.39465925028688004, "grad_norm": 0.16373182833194733, "learning_rate": 0.002, "loss": 2.5585, "step": 198100 }, { "epoch": 0.3946791725105189, "grad_norm": 0.14697642624378204, "learning_rate": 0.002, "loss": 2.5674, "step": 198110 }, { "epoch": 0.3946990947341579, "grad_norm": 0.19500337541103363, "learning_rate": 0.002, "loss": 2.5506, "step": 198120 }, { "epoch": 0.39471901695779676, "grad_norm": 0.17195101082324982, "learning_rate": 0.002, "loss": 2.5686, "step": 198130 }, { "epoch": 0.39473893918143566, "grad_norm": 0.17767958343029022, "learning_rate": 0.002, "loss": 2.5603, "step": 198140 }, { "epoch": 0.3947588614050746, "grad_norm": 0.1566222608089447, "learning_rate": 0.002, "loss": 2.544, "step": 198150 }, { "epoch": 0.3947787836287135, "grad_norm": 0.16213154792785645, "learning_rate": 0.002, "loss": 2.5625, "step": 198160 }, { "epoch": 0.39479870585235244, "grad_norm": 0.1684776246547699, "learning_rate": 0.002, "loss": 2.5724, "step": 198170 }, { "epoch": 0.39481862807599133, "grad_norm": 0.18750345706939697, "learning_rate": 0.002, "loss": 2.5597, "step": 198180 }, { "epoch": 0.3948385502996302, "grad_norm": 0.17161980271339417, "learning_rate": 0.002, "loss": 2.568, "step": 198190 }, { "epoch": 0.39485847252326917, "grad_norm": 0.20081178843975067, "learning_rate": 0.002, "loss": 2.5621, "step": 198200 }, { "epoch": 0.39487839474690806, "grad_norm": 0.16740721464157104, "learning_rate": 0.002, "loss": 2.5666, "step": 198210 }, { "epoch": 0.394898316970547, "grad_norm": 0.1460500955581665, "learning_rate": 0.002, "loss": 2.5583, "step": 198220 }, { "epoch": 0.3949182391941859, "grad_norm": 0.18133892118930817, "learning_rate": 0.002, "loss": 2.5704, "step": 198230 }, { "epoch": 0.3949381614178248, "grad_norm": 0.20412175357341766, "learning_rate": 0.002, "loss": 2.5563, "step": 198240 }, { "epoch": 0.39495808364146373, "grad_norm": 0.1638384759426117, "learning_rate": 0.002, "loss": 2.569, "step": 198250 }, { "epoch": 0.3949780058651026, "grad_norm": 0.14734472334384918, "learning_rate": 0.002, "loss": 2.5594, "step": 198260 }, { "epoch": 0.39499792808874157, "grad_norm": 0.16312816739082336, "learning_rate": 0.002, "loss": 2.5556, "step": 198270 }, { "epoch": 0.39501785031238046, "grad_norm": 0.1695173978805542, "learning_rate": 0.002, "loss": 2.557, "step": 198280 }, { "epoch": 0.3950377725360194, "grad_norm": 0.20153315365314484, "learning_rate": 0.002, "loss": 2.5616, "step": 198290 }, { "epoch": 0.3950576947596583, "grad_norm": 0.14675499498844147, "learning_rate": 0.002, "loss": 2.5637, "step": 198300 }, { "epoch": 0.3950776169832972, "grad_norm": 0.17603863775730133, "learning_rate": 0.002, "loss": 2.5769, "step": 198310 }, { "epoch": 0.39509753920693613, "grad_norm": 0.2002449482679367, "learning_rate": 0.002, "loss": 2.5495, "step": 198320 }, { "epoch": 0.395117461430575, "grad_norm": 0.1516846865415573, "learning_rate": 0.002, "loss": 2.5601, "step": 198330 }, { "epoch": 0.39513738365421397, "grad_norm": 0.16132977604866028, "learning_rate": 0.002, "loss": 2.5696, "step": 198340 }, { "epoch": 0.39515730587785286, "grad_norm": 0.1519983857870102, "learning_rate": 0.002, "loss": 2.5518, "step": 198350 }, { "epoch": 0.39517722810149175, "grad_norm": 0.29212453961372375, "learning_rate": 0.002, "loss": 2.558, "step": 198360 }, { "epoch": 0.3951971503251307, "grad_norm": 0.17281894385814667, "learning_rate": 0.002, "loss": 2.5629, "step": 198370 }, { "epoch": 0.3952170725487696, "grad_norm": 0.15173037350177765, "learning_rate": 0.002, "loss": 2.5567, "step": 198380 }, { "epoch": 0.39523699477240853, "grad_norm": 0.1495228409767151, "learning_rate": 0.002, "loss": 2.56, "step": 198390 }, { "epoch": 0.3952569169960474, "grad_norm": 0.15531525015830994, "learning_rate": 0.002, "loss": 2.5584, "step": 198400 }, { "epoch": 0.39527683921968637, "grad_norm": 0.17171180248260498, "learning_rate": 0.002, "loss": 2.5447, "step": 198410 }, { "epoch": 0.39529676144332526, "grad_norm": 0.16189005970954895, "learning_rate": 0.002, "loss": 2.558, "step": 198420 }, { "epoch": 0.39531668366696415, "grad_norm": 0.18119177222251892, "learning_rate": 0.002, "loss": 2.5579, "step": 198430 }, { "epoch": 0.3953366058906031, "grad_norm": 0.1789180040359497, "learning_rate": 0.002, "loss": 2.551, "step": 198440 }, { "epoch": 0.395356528114242, "grad_norm": 0.16708774864673615, "learning_rate": 0.002, "loss": 2.5596, "step": 198450 }, { "epoch": 0.39537645033788094, "grad_norm": 0.13947851955890656, "learning_rate": 0.002, "loss": 2.5573, "step": 198460 }, { "epoch": 0.3953963725615198, "grad_norm": 0.19679710268974304, "learning_rate": 0.002, "loss": 2.5629, "step": 198470 }, { "epoch": 0.3954162947851587, "grad_norm": 0.15217538177967072, "learning_rate": 0.002, "loss": 2.5613, "step": 198480 }, { "epoch": 0.39543621700879766, "grad_norm": 0.1572570651769638, "learning_rate": 0.002, "loss": 2.5556, "step": 198490 }, { "epoch": 0.39545613923243655, "grad_norm": 0.14167611300945282, "learning_rate": 0.002, "loss": 2.5639, "step": 198500 }, { "epoch": 0.3954760614560755, "grad_norm": 0.21909132599830627, "learning_rate": 0.002, "loss": 2.5659, "step": 198510 }, { "epoch": 0.3954959836797144, "grad_norm": 0.15012316405773163, "learning_rate": 0.002, "loss": 2.5652, "step": 198520 }, { "epoch": 0.3955159059033533, "grad_norm": 0.15523892641067505, "learning_rate": 0.002, "loss": 2.5576, "step": 198530 }, { "epoch": 0.39553582812699223, "grad_norm": 0.1696968972682953, "learning_rate": 0.002, "loss": 2.5652, "step": 198540 }, { "epoch": 0.3955557503506311, "grad_norm": 0.17661288380622864, "learning_rate": 0.002, "loss": 2.5465, "step": 198550 }, { "epoch": 0.39557567257427007, "grad_norm": 0.1866147518157959, "learning_rate": 0.002, "loss": 2.5779, "step": 198560 }, { "epoch": 0.39559559479790896, "grad_norm": 0.15408065915107727, "learning_rate": 0.002, "loss": 2.5545, "step": 198570 }, { "epoch": 0.3956155170215479, "grad_norm": 0.1724051535129547, "learning_rate": 0.002, "loss": 2.57, "step": 198580 }, { "epoch": 0.3956354392451868, "grad_norm": 0.16662442684173584, "learning_rate": 0.002, "loss": 2.5455, "step": 198590 }, { "epoch": 0.3956553614688257, "grad_norm": 0.19174638390541077, "learning_rate": 0.002, "loss": 2.565, "step": 198600 }, { "epoch": 0.39567528369246463, "grad_norm": 0.15252496302127838, "learning_rate": 0.002, "loss": 2.5646, "step": 198610 }, { "epoch": 0.3956952059161035, "grad_norm": 0.19275589287281036, "learning_rate": 0.002, "loss": 2.5556, "step": 198620 }, { "epoch": 0.39571512813974247, "grad_norm": 0.1556425541639328, "learning_rate": 0.002, "loss": 2.5718, "step": 198630 }, { "epoch": 0.39573505036338136, "grad_norm": 0.1835697591304779, "learning_rate": 0.002, "loss": 2.5586, "step": 198640 }, { "epoch": 0.39575497258702025, "grad_norm": 0.15944765508174896, "learning_rate": 0.002, "loss": 2.5631, "step": 198650 }, { "epoch": 0.3957748948106592, "grad_norm": 0.1689758449792862, "learning_rate": 0.002, "loss": 2.5712, "step": 198660 }, { "epoch": 0.3957948170342981, "grad_norm": 0.1719733029603958, "learning_rate": 0.002, "loss": 2.5623, "step": 198670 }, { "epoch": 0.39581473925793703, "grad_norm": 0.1740494668483734, "learning_rate": 0.002, "loss": 2.5427, "step": 198680 }, { "epoch": 0.3958346614815759, "grad_norm": 0.1760249137878418, "learning_rate": 0.002, "loss": 2.5543, "step": 198690 }, { "epoch": 0.39585458370521487, "grad_norm": 0.16901740431785583, "learning_rate": 0.002, "loss": 2.5519, "step": 198700 }, { "epoch": 0.39587450592885376, "grad_norm": 0.1908290833234787, "learning_rate": 0.002, "loss": 2.5537, "step": 198710 }, { "epoch": 0.39589442815249265, "grad_norm": 0.15845836699008942, "learning_rate": 0.002, "loss": 2.5615, "step": 198720 }, { "epoch": 0.3959143503761316, "grad_norm": 0.15681445598602295, "learning_rate": 0.002, "loss": 2.5615, "step": 198730 }, { "epoch": 0.3959342725997705, "grad_norm": 0.17404694855213165, "learning_rate": 0.002, "loss": 2.5487, "step": 198740 }, { "epoch": 0.39595419482340943, "grad_norm": 0.1526707410812378, "learning_rate": 0.002, "loss": 2.5577, "step": 198750 }, { "epoch": 0.3959741170470483, "grad_norm": 0.1670631617307663, "learning_rate": 0.002, "loss": 2.5499, "step": 198760 }, { "epoch": 0.3959940392706872, "grad_norm": 0.14666244387626648, "learning_rate": 0.002, "loss": 2.5616, "step": 198770 }, { "epoch": 0.39601396149432616, "grad_norm": 0.20162805914878845, "learning_rate": 0.002, "loss": 2.5608, "step": 198780 }, { "epoch": 0.39603388371796505, "grad_norm": 0.16687393188476562, "learning_rate": 0.002, "loss": 2.5367, "step": 198790 }, { "epoch": 0.396053805941604, "grad_norm": 0.1894773244857788, "learning_rate": 0.002, "loss": 2.5645, "step": 198800 }, { "epoch": 0.3960737281652429, "grad_norm": 0.1882852166891098, "learning_rate": 0.002, "loss": 2.5463, "step": 198810 }, { "epoch": 0.3960936503888818, "grad_norm": 0.18997123837471008, "learning_rate": 0.002, "loss": 2.5591, "step": 198820 }, { "epoch": 0.3961135726125207, "grad_norm": 0.17695797979831696, "learning_rate": 0.002, "loss": 2.5637, "step": 198830 }, { "epoch": 0.3961334948361596, "grad_norm": 0.14483021199703217, "learning_rate": 0.002, "loss": 2.5729, "step": 198840 }, { "epoch": 0.39615341705979856, "grad_norm": 0.17946721613407135, "learning_rate": 0.002, "loss": 2.5624, "step": 198850 }, { "epoch": 0.39617333928343745, "grad_norm": 0.14831951260566711, "learning_rate": 0.002, "loss": 2.5522, "step": 198860 }, { "epoch": 0.3961932615070764, "grad_norm": 0.1436406970024109, "learning_rate": 0.002, "loss": 2.5533, "step": 198870 }, { "epoch": 0.3962131837307153, "grad_norm": 0.19140149652957916, "learning_rate": 0.002, "loss": 2.5588, "step": 198880 }, { "epoch": 0.3962331059543542, "grad_norm": 0.17312082648277283, "learning_rate": 0.002, "loss": 2.5633, "step": 198890 }, { "epoch": 0.3962530281779931, "grad_norm": 0.13709191977977753, "learning_rate": 0.002, "loss": 2.5623, "step": 198900 }, { "epoch": 0.396272950401632, "grad_norm": 0.1862933486700058, "learning_rate": 0.002, "loss": 2.5533, "step": 198910 }, { "epoch": 0.39629287262527096, "grad_norm": 0.18240657448768616, "learning_rate": 0.002, "loss": 2.561, "step": 198920 }, { "epoch": 0.39631279484890986, "grad_norm": 0.16336551308631897, "learning_rate": 0.002, "loss": 2.5545, "step": 198930 }, { "epoch": 0.39633271707254875, "grad_norm": 0.14520440995693207, "learning_rate": 0.002, "loss": 2.5824, "step": 198940 }, { "epoch": 0.3963526392961877, "grad_norm": 0.2024487555027008, "learning_rate": 0.002, "loss": 2.555, "step": 198950 }, { "epoch": 0.3963725615198266, "grad_norm": 0.18222026526927948, "learning_rate": 0.002, "loss": 2.5631, "step": 198960 }, { "epoch": 0.39639248374346553, "grad_norm": 0.15955130755901337, "learning_rate": 0.002, "loss": 2.5599, "step": 198970 }, { "epoch": 0.3964124059671044, "grad_norm": 0.1689767688512802, "learning_rate": 0.002, "loss": 2.5705, "step": 198980 }, { "epoch": 0.3964323281907433, "grad_norm": 0.20393185317516327, "learning_rate": 0.002, "loss": 2.5472, "step": 198990 }, { "epoch": 0.39645225041438226, "grad_norm": 0.18047921359539032, "learning_rate": 0.002, "loss": 2.5537, "step": 199000 }, { "epoch": 0.39647217263802115, "grad_norm": 0.17422088980674744, "learning_rate": 0.002, "loss": 2.5695, "step": 199010 }, { "epoch": 0.3964920948616601, "grad_norm": 0.1668022722005844, "learning_rate": 0.002, "loss": 2.5712, "step": 199020 }, { "epoch": 0.396512017085299, "grad_norm": 0.21419064700603485, "learning_rate": 0.002, "loss": 2.5513, "step": 199030 }, { "epoch": 0.39653193930893793, "grad_norm": 0.18503426015377045, "learning_rate": 0.002, "loss": 2.5591, "step": 199040 }, { "epoch": 0.3965518615325768, "grad_norm": 0.16405615210533142, "learning_rate": 0.002, "loss": 2.5677, "step": 199050 }, { "epoch": 0.3965717837562157, "grad_norm": 0.1790941059589386, "learning_rate": 0.002, "loss": 2.5672, "step": 199060 }, { "epoch": 0.39659170597985466, "grad_norm": 0.16432736814022064, "learning_rate": 0.002, "loss": 2.5495, "step": 199070 }, { "epoch": 0.39661162820349355, "grad_norm": 0.1437799483537674, "learning_rate": 0.002, "loss": 2.566, "step": 199080 }, { "epoch": 0.3966315504271325, "grad_norm": 0.15028345584869385, "learning_rate": 0.002, "loss": 2.5592, "step": 199090 }, { "epoch": 0.3966514726507714, "grad_norm": 0.28622159361839294, "learning_rate": 0.002, "loss": 2.5654, "step": 199100 }, { "epoch": 0.3966713948744103, "grad_norm": 0.17169316112995148, "learning_rate": 0.002, "loss": 2.5435, "step": 199110 }, { "epoch": 0.3966913170980492, "grad_norm": 0.14589180052280426, "learning_rate": 0.002, "loss": 2.574, "step": 199120 }, { "epoch": 0.3967112393216881, "grad_norm": 0.1998584270477295, "learning_rate": 0.002, "loss": 2.5536, "step": 199130 }, { "epoch": 0.39673116154532706, "grad_norm": 0.17172832787036896, "learning_rate": 0.002, "loss": 2.5539, "step": 199140 }, { "epoch": 0.39675108376896595, "grad_norm": 0.1809876561164856, "learning_rate": 0.002, "loss": 2.5726, "step": 199150 }, { "epoch": 0.3967710059926049, "grad_norm": 0.15165868401527405, "learning_rate": 0.002, "loss": 2.5699, "step": 199160 }, { "epoch": 0.3967909282162438, "grad_norm": 0.15998618304729462, "learning_rate": 0.002, "loss": 2.5553, "step": 199170 }, { "epoch": 0.3968108504398827, "grad_norm": 0.1706007719039917, "learning_rate": 0.002, "loss": 2.5621, "step": 199180 }, { "epoch": 0.3968307726635216, "grad_norm": 0.18864992260932922, "learning_rate": 0.002, "loss": 2.5635, "step": 199190 }, { "epoch": 0.3968506948871605, "grad_norm": 0.16537508368492126, "learning_rate": 0.002, "loss": 2.5498, "step": 199200 }, { "epoch": 0.39687061711079946, "grad_norm": 0.16943874955177307, "learning_rate": 0.002, "loss": 2.5557, "step": 199210 }, { "epoch": 0.39689053933443835, "grad_norm": 0.17111420631408691, "learning_rate": 0.002, "loss": 2.5482, "step": 199220 }, { "epoch": 0.39691046155807724, "grad_norm": 0.16188141703605652, "learning_rate": 0.002, "loss": 2.5491, "step": 199230 }, { "epoch": 0.3969303837817162, "grad_norm": 0.21807533502578735, "learning_rate": 0.002, "loss": 2.5776, "step": 199240 }, { "epoch": 0.3969503060053551, "grad_norm": 0.15684057772159576, "learning_rate": 0.002, "loss": 2.5603, "step": 199250 }, { "epoch": 0.396970228228994, "grad_norm": 0.1633707731962204, "learning_rate": 0.002, "loss": 2.5394, "step": 199260 }, { "epoch": 0.3969901504526329, "grad_norm": 0.18521751463413239, "learning_rate": 0.002, "loss": 2.5541, "step": 199270 }, { "epoch": 0.3970100726762718, "grad_norm": 0.1778537780046463, "learning_rate": 0.002, "loss": 2.5614, "step": 199280 }, { "epoch": 0.39702999489991075, "grad_norm": 0.1725381463766098, "learning_rate": 0.002, "loss": 2.5659, "step": 199290 }, { "epoch": 0.39704991712354964, "grad_norm": 0.15134476125240326, "learning_rate": 0.002, "loss": 2.5605, "step": 199300 }, { "epoch": 0.3970698393471886, "grad_norm": 0.17612911760807037, "learning_rate": 0.002, "loss": 2.5687, "step": 199310 }, { "epoch": 0.3970897615708275, "grad_norm": 0.16727831959724426, "learning_rate": 0.002, "loss": 2.5571, "step": 199320 }, { "epoch": 0.39710968379446643, "grad_norm": 0.1600678712129593, "learning_rate": 0.002, "loss": 2.5631, "step": 199330 }, { "epoch": 0.3971296060181053, "grad_norm": 0.16344542801380157, "learning_rate": 0.002, "loss": 2.5552, "step": 199340 }, { "epoch": 0.3971495282417442, "grad_norm": 0.1530427783727646, "learning_rate": 0.002, "loss": 2.5558, "step": 199350 }, { "epoch": 0.39716945046538316, "grad_norm": 0.16321146488189697, "learning_rate": 0.002, "loss": 2.5539, "step": 199360 }, { "epoch": 0.39718937268902205, "grad_norm": 0.1574154794216156, "learning_rate": 0.002, "loss": 2.5542, "step": 199370 }, { "epoch": 0.397209294912661, "grad_norm": 0.16433323919773102, "learning_rate": 0.002, "loss": 2.5503, "step": 199380 }, { "epoch": 0.3972292171362999, "grad_norm": 0.19198010861873627, "learning_rate": 0.002, "loss": 2.5474, "step": 199390 }, { "epoch": 0.3972491393599388, "grad_norm": 0.18454645574092865, "learning_rate": 0.002, "loss": 2.5542, "step": 199400 }, { "epoch": 0.3972690615835777, "grad_norm": 0.17371419072151184, "learning_rate": 0.002, "loss": 2.5556, "step": 199410 }, { "epoch": 0.3972889838072166, "grad_norm": 0.1726900339126587, "learning_rate": 0.002, "loss": 2.5495, "step": 199420 }, { "epoch": 0.39730890603085556, "grad_norm": 0.16964960098266602, "learning_rate": 0.002, "loss": 2.5481, "step": 199430 }, { "epoch": 0.39732882825449445, "grad_norm": 0.15452423691749573, "learning_rate": 0.002, "loss": 2.5614, "step": 199440 }, { "epoch": 0.3973487504781334, "grad_norm": 0.16028432548046112, "learning_rate": 0.002, "loss": 2.5588, "step": 199450 }, { "epoch": 0.3973686727017723, "grad_norm": 0.1760648936033249, "learning_rate": 0.002, "loss": 2.5483, "step": 199460 }, { "epoch": 0.3973885949254112, "grad_norm": 0.17368975281715393, "learning_rate": 0.002, "loss": 2.5637, "step": 199470 }, { "epoch": 0.3974085171490501, "grad_norm": 0.14469096064567566, "learning_rate": 0.002, "loss": 2.5701, "step": 199480 }, { "epoch": 0.397428439372689, "grad_norm": 0.17719601094722748, "learning_rate": 0.002, "loss": 2.5532, "step": 199490 }, { "epoch": 0.39744836159632796, "grad_norm": 0.14437615871429443, "learning_rate": 0.002, "loss": 2.5647, "step": 199500 }, { "epoch": 0.39746828381996685, "grad_norm": 0.1832849532365799, "learning_rate": 0.002, "loss": 2.5699, "step": 199510 }, { "epoch": 0.39748820604360574, "grad_norm": 0.16373635828495026, "learning_rate": 0.002, "loss": 2.5555, "step": 199520 }, { "epoch": 0.3975081282672447, "grad_norm": 0.16330000758171082, "learning_rate": 0.002, "loss": 2.566, "step": 199530 }, { "epoch": 0.3975280504908836, "grad_norm": 0.16648690402507782, "learning_rate": 0.002, "loss": 2.5537, "step": 199540 }, { "epoch": 0.3975479727145225, "grad_norm": 0.19257047772407532, "learning_rate": 0.002, "loss": 2.5515, "step": 199550 }, { "epoch": 0.3975678949381614, "grad_norm": 0.13822127878665924, "learning_rate": 0.002, "loss": 2.5586, "step": 199560 }, { "epoch": 0.3975878171618003, "grad_norm": 0.14314739406108856, "learning_rate": 0.002, "loss": 2.5618, "step": 199570 }, { "epoch": 0.39760773938543925, "grad_norm": 0.1605144441127777, "learning_rate": 0.002, "loss": 2.5588, "step": 199580 }, { "epoch": 0.39762766160907814, "grad_norm": 0.17683397233486176, "learning_rate": 0.002, "loss": 2.5678, "step": 199590 }, { "epoch": 0.3976475838327171, "grad_norm": 0.15846675634384155, "learning_rate": 0.002, "loss": 2.5497, "step": 199600 }, { "epoch": 0.397667506056356, "grad_norm": 0.17159759998321533, "learning_rate": 0.002, "loss": 2.5571, "step": 199610 }, { "epoch": 0.3976874282799949, "grad_norm": 0.15020734071731567, "learning_rate": 0.002, "loss": 2.563, "step": 199620 }, { "epoch": 0.3977073505036338, "grad_norm": 0.1759052574634552, "learning_rate": 0.002, "loss": 2.5581, "step": 199630 }, { "epoch": 0.3977272727272727, "grad_norm": 0.16119125485420227, "learning_rate": 0.002, "loss": 2.5597, "step": 199640 }, { "epoch": 0.39774719495091165, "grad_norm": 0.18085592985153198, "learning_rate": 0.002, "loss": 2.5491, "step": 199650 }, { "epoch": 0.39776711717455054, "grad_norm": 0.21080084145069122, "learning_rate": 0.002, "loss": 2.5573, "step": 199660 }, { "epoch": 0.3977870393981895, "grad_norm": 0.1536712944507599, "learning_rate": 0.002, "loss": 2.5584, "step": 199670 }, { "epoch": 0.3978069616218284, "grad_norm": 0.15146368741989136, "learning_rate": 0.002, "loss": 2.5663, "step": 199680 }, { "epoch": 0.39782688384546727, "grad_norm": 0.15022052824497223, "learning_rate": 0.002, "loss": 2.5735, "step": 199690 }, { "epoch": 0.3978468060691062, "grad_norm": 0.1578563004732132, "learning_rate": 0.002, "loss": 2.5624, "step": 199700 }, { "epoch": 0.3978667282927451, "grad_norm": 0.14625345170497894, "learning_rate": 0.002, "loss": 2.5576, "step": 199710 }, { "epoch": 0.39788665051638406, "grad_norm": 0.14168448746204376, "learning_rate": 0.002, "loss": 2.5553, "step": 199720 }, { "epoch": 0.39790657274002295, "grad_norm": 0.18570643663406372, "learning_rate": 0.002, "loss": 2.5719, "step": 199730 }, { "epoch": 0.39792649496366184, "grad_norm": 0.1713765561580658, "learning_rate": 0.002, "loss": 2.554, "step": 199740 }, { "epoch": 0.3979464171873008, "grad_norm": 0.17890778183937073, "learning_rate": 0.002, "loss": 2.5676, "step": 199750 }, { "epoch": 0.3979663394109397, "grad_norm": 0.17561563849449158, "learning_rate": 0.002, "loss": 2.551, "step": 199760 }, { "epoch": 0.3979862616345786, "grad_norm": 0.1674659699201584, "learning_rate": 0.002, "loss": 2.5558, "step": 199770 }, { "epoch": 0.3980061838582175, "grad_norm": 0.18219256401062012, "learning_rate": 0.002, "loss": 2.552, "step": 199780 }, { "epoch": 0.39802610608185646, "grad_norm": 0.15004463493824005, "learning_rate": 0.002, "loss": 2.5576, "step": 199790 }, { "epoch": 0.39804602830549535, "grad_norm": 0.17581242322921753, "learning_rate": 0.002, "loss": 2.5749, "step": 199800 }, { "epoch": 0.39806595052913424, "grad_norm": 0.15095561742782593, "learning_rate": 0.002, "loss": 2.5541, "step": 199810 }, { "epoch": 0.3980858727527732, "grad_norm": 0.17619723081588745, "learning_rate": 0.002, "loss": 2.5602, "step": 199820 }, { "epoch": 0.3981057949764121, "grad_norm": 0.19523143768310547, "learning_rate": 0.002, "loss": 2.5748, "step": 199830 }, { "epoch": 0.398125717200051, "grad_norm": 0.16667848825454712, "learning_rate": 0.002, "loss": 2.5671, "step": 199840 }, { "epoch": 0.3981456394236899, "grad_norm": 0.19984817504882812, "learning_rate": 0.002, "loss": 2.5456, "step": 199850 }, { "epoch": 0.3981655616473288, "grad_norm": 0.14697803556919098, "learning_rate": 0.002, "loss": 2.5667, "step": 199860 }, { "epoch": 0.39818548387096775, "grad_norm": 0.22012731432914734, "learning_rate": 0.002, "loss": 2.5601, "step": 199870 }, { "epoch": 0.39820540609460664, "grad_norm": 0.18488091230392456, "learning_rate": 0.002, "loss": 2.574, "step": 199880 }, { "epoch": 0.3982253283182456, "grad_norm": 0.17102637887001038, "learning_rate": 0.002, "loss": 2.5525, "step": 199890 }, { "epoch": 0.3982452505418845, "grad_norm": 0.13411010801792145, "learning_rate": 0.002, "loss": 2.5752, "step": 199900 }, { "epoch": 0.3982651727655234, "grad_norm": 0.1960875689983368, "learning_rate": 0.002, "loss": 2.5627, "step": 199910 }, { "epoch": 0.3982850949891623, "grad_norm": 0.15843118727207184, "learning_rate": 0.002, "loss": 2.5517, "step": 199920 }, { "epoch": 0.3983050172128012, "grad_norm": 0.17515675723552704, "learning_rate": 0.002, "loss": 2.5713, "step": 199930 }, { "epoch": 0.39832493943644015, "grad_norm": 0.16367554664611816, "learning_rate": 0.002, "loss": 2.5568, "step": 199940 }, { "epoch": 0.39834486166007904, "grad_norm": 0.22971659898757935, "learning_rate": 0.002, "loss": 2.5652, "step": 199950 }, { "epoch": 0.398364783883718, "grad_norm": 0.1873597949743271, "learning_rate": 0.002, "loss": 2.5615, "step": 199960 }, { "epoch": 0.3983847061073569, "grad_norm": 0.16610702872276306, "learning_rate": 0.002, "loss": 2.5535, "step": 199970 }, { "epoch": 0.39840462833099577, "grad_norm": 0.1656104326248169, "learning_rate": 0.002, "loss": 2.575, "step": 199980 }, { "epoch": 0.3984245505546347, "grad_norm": 0.1497565507888794, "learning_rate": 0.002, "loss": 2.5633, "step": 199990 }, { "epoch": 0.3984444727782736, "grad_norm": 0.18902166187763214, "learning_rate": 0.002, "loss": 2.56, "step": 200000 }, { "epoch": 0.39846439500191255, "grad_norm": 0.17447641491889954, "learning_rate": 0.002, "loss": 2.5716, "step": 200010 }, { "epoch": 0.39848431722555144, "grad_norm": 0.1556258201599121, "learning_rate": 0.002, "loss": 2.5605, "step": 200020 }, { "epoch": 0.39850423944919033, "grad_norm": 0.17865397036075592, "learning_rate": 0.002, "loss": 2.5769, "step": 200030 }, { "epoch": 0.3985241616728293, "grad_norm": 0.20754748582839966, "learning_rate": 0.002, "loss": 2.5584, "step": 200040 }, { "epoch": 0.39854408389646817, "grad_norm": 0.1606023907661438, "learning_rate": 0.002, "loss": 2.5709, "step": 200050 }, { "epoch": 0.3985640061201071, "grad_norm": 0.16243450343608856, "learning_rate": 0.002, "loss": 2.5723, "step": 200060 }, { "epoch": 0.398583928343746, "grad_norm": 0.15582744777202606, "learning_rate": 0.002, "loss": 2.5606, "step": 200070 }, { "epoch": 0.39860385056738495, "grad_norm": 0.16618531942367554, "learning_rate": 0.002, "loss": 2.5466, "step": 200080 }, { "epoch": 0.39862377279102384, "grad_norm": 0.23198655247688293, "learning_rate": 0.002, "loss": 2.5653, "step": 200090 }, { "epoch": 0.39864369501466274, "grad_norm": 0.18078075349330902, "learning_rate": 0.002, "loss": 2.5574, "step": 200100 }, { "epoch": 0.3986636172383017, "grad_norm": 0.17524750530719757, "learning_rate": 0.002, "loss": 2.5481, "step": 200110 }, { "epoch": 0.3986835394619406, "grad_norm": 0.1474635750055313, "learning_rate": 0.002, "loss": 2.5701, "step": 200120 }, { "epoch": 0.3987034616855795, "grad_norm": 0.17482253909111023, "learning_rate": 0.002, "loss": 2.5787, "step": 200130 }, { "epoch": 0.3987233839092184, "grad_norm": 0.1726129949092865, "learning_rate": 0.002, "loss": 2.5621, "step": 200140 }, { "epoch": 0.3987433061328573, "grad_norm": 0.15352226793766022, "learning_rate": 0.002, "loss": 2.568, "step": 200150 }, { "epoch": 0.39876322835649625, "grad_norm": 0.18275131285190582, "learning_rate": 0.002, "loss": 2.5412, "step": 200160 }, { "epoch": 0.39878315058013514, "grad_norm": 0.15987227857112885, "learning_rate": 0.002, "loss": 2.5713, "step": 200170 }, { "epoch": 0.3988030728037741, "grad_norm": 0.18019239604473114, "learning_rate": 0.002, "loss": 2.5591, "step": 200180 }, { "epoch": 0.398822995027413, "grad_norm": 0.16213305294513702, "learning_rate": 0.002, "loss": 2.5566, "step": 200190 }, { "epoch": 0.3988429172510519, "grad_norm": 0.15409009158611298, "learning_rate": 0.002, "loss": 2.5631, "step": 200200 }, { "epoch": 0.3988628394746908, "grad_norm": 0.20104815065860748, "learning_rate": 0.002, "loss": 2.5553, "step": 200210 }, { "epoch": 0.3988827616983297, "grad_norm": 0.16399122774600983, "learning_rate": 0.002, "loss": 2.5542, "step": 200220 }, { "epoch": 0.39890268392196865, "grad_norm": 0.15036578476428986, "learning_rate": 0.002, "loss": 2.5524, "step": 200230 }, { "epoch": 0.39892260614560754, "grad_norm": 0.13634605705738068, "learning_rate": 0.002, "loss": 2.5585, "step": 200240 }, { "epoch": 0.3989425283692465, "grad_norm": 0.16162535548210144, "learning_rate": 0.002, "loss": 2.5489, "step": 200250 }, { "epoch": 0.3989624505928854, "grad_norm": 0.17994460463523865, "learning_rate": 0.002, "loss": 2.5825, "step": 200260 }, { "epoch": 0.39898237281652427, "grad_norm": 0.15663336217403412, "learning_rate": 0.002, "loss": 2.565, "step": 200270 }, { "epoch": 0.3990022950401632, "grad_norm": 0.16238008439540863, "learning_rate": 0.002, "loss": 2.5664, "step": 200280 }, { "epoch": 0.3990222172638021, "grad_norm": 0.21309666335582733, "learning_rate": 0.002, "loss": 2.5691, "step": 200290 }, { "epoch": 0.39904213948744105, "grad_norm": 0.15351253747940063, "learning_rate": 0.002, "loss": 2.5729, "step": 200300 }, { "epoch": 0.39906206171107994, "grad_norm": 0.14823363721370697, "learning_rate": 0.002, "loss": 2.5538, "step": 200310 }, { "epoch": 0.39908198393471883, "grad_norm": 0.23356440663337708, "learning_rate": 0.002, "loss": 2.5559, "step": 200320 }, { "epoch": 0.3991019061583578, "grad_norm": 0.15856093168258667, "learning_rate": 0.002, "loss": 2.5565, "step": 200330 }, { "epoch": 0.39912182838199667, "grad_norm": 0.19266608357429504, "learning_rate": 0.002, "loss": 2.5473, "step": 200340 }, { "epoch": 0.3991417506056356, "grad_norm": 0.1445477306842804, "learning_rate": 0.002, "loss": 2.557, "step": 200350 }, { "epoch": 0.3991616728292745, "grad_norm": 0.1563785970211029, "learning_rate": 0.002, "loss": 2.5445, "step": 200360 }, { "epoch": 0.39918159505291345, "grad_norm": 0.1475880891084671, "learning_rate": 0.002, "loss": 2.5505, "step": 200370 }, { "epoch": 0.39920151727655234, "grad_norm": 0.17498336732387543, "learning_rate": 0.002, "loss": 2.5473, "step": 200380 }, { "epoch": 0.39922143950019123, "grad_norm": 0.1723276525735855, "learning_rate": 0.002, "loss": 2.5685, "step": 200390 }, { "epoch": 0.3992413617238302, "grad_norm": 0.16622953116893768, "learning_rate": 0.002, "loss": 2.5683, "step": 200400 }, { "epoch": 0.39926128394746907, "grad_norm": 0.1961342692375183, "learning_rate": 0.002, "loss": 2.5539, "step": 200410 }, { "epoch": 0.399281206171108, "grad_norm": 0.1528967022895813, "learning_rate": 0.002, "loss": 2.547, "step": 200420 }, { "epoch": 0.3993011283947469, "grad_norm": 0.16966646909713745, "learning_rate": 0.002, "loss": 2.5652, "step": 200430 }, { "epoch": 0.3993210506183858, "grad_norm": 0.1575658768415451, "learning_rate": 0.002, "loss": 2.5491, "step": 200440 }, { "epoch": 0.39934097284202474, "grad_norm": 0.16662614047527313, "learning_rate": 0.002, "loss": 2.5527, "step": 200450 }, { "epoch": 0.39936089506566363, "grad_norm": 0.16023516654968262, "learning_rate": 0.002, "loss": 2.5613, "step": 200460 }, { "epoch": 0.3993808172893026, "grad_norm": 0.14390742778778076, "learning_rate": 0.002, "loss": 2.5441, "step": 200470 }, { "epoch": 0.39940073951294147, "grad_norm": 0.16730928421020508, "learning_rate": 0.002, "loss": 2.5728, "step": 200480 }, { "epoch": 0.39942066173658036, "grad_norm": 0.15028436481952667, "learning_rate": 0.002, "loss": 2.566, "step": 200490 }, { "epoch": 0.3994405839602193, "grad_norm": 0.15638385713100433, "learning_rate": 0.002, "loss": 2.5505, "step": 200500 }, { "epoch": 0.3994605061838582, "grad_norm": 0.15692707896232605, "learning_rate": 0.002, "loss": 2.554, "step": 200510 }, { "epoch": 0.39948042840749715, "grad_norm": 0.1636517196893692, "learning_rate": 0.002, "loss": 2.5691, "step": 200520 }, { "epoch": 0.39950035063113604, "grad_norm": 0.17253004014492035, "learning_rate": 0.002, "loss": 2.5586, "step": 200530 }, { "epoch": 0.399520272854775, "grad_norm": 0.1998608410358429, "learning_rate": 0.002, "loss": 2.5712, "step": 200540 }, { "epoch": 0.3995401950784139, "grad_norm": 0.17190150916576385, "learning_rate": 0.002, "loss": 2.5512, "step": 200550 }, { "epoch": 0.39956011730205276, "grad_norm": 0.18244189023971558, "learning_rate": 0.002, "loss": 2.5651, "step": 200560 }, { "epoch": 0.3995800395256917, "grad_norm": 0.18794040381908417, "learning_rate": 0.002, "loss": 2.5707, "step": 200570 }, { "epoch": 0.3995999617493306, "grad_norm": 0.16095641255378723, "learning_rate": 0.002, "loss": 2.5635, "step": 200580 }, { "epoch": 0.39961988397296955, "grad_norm": 0.20859764516353607, "learning_rate": 0.002, "loss": 2.5707, "step": 200590 }, { "epoch": 0.39963980619660844, "grad_norm": 0.21252606809139252, "learning_rate": 0.002, "loss": 2.5495, "step": 200600 }, { "epoch": 0.39965972842024733, "grad_norm": 0.1446055918931961, "learning_rate": 0.002, "loss": 2.5531, "step": 200610 }, { "epoch": 0.3996796506438863, "grad_norm": 0.14415425062179565, "learning_rate": 0.002, "loss": 2.5558, "step": 200620 }, { "epoch": 0.39969957286752517, "grad_norm": 0.139023095369339, "learning_rate": 0.002, "loss": 2.5452, "step": 200630 }, { "epoch": 0.3997194950911641, "grad_norm": 0.20136134326457977, "learning_rate": 0.002, "loss": 2.5633, "step": 200640 }, { "epoch": 0.399739417314803, "grad_norm": 0.1846088320016861, "learning_rate": 0.002, "loss": 2.5673, "step": 200650 }, { "epoch": 0.39975933953844195, "grad_norm": 0.1825878620147705, "learning_rate": 0.002, "loss": 2.579, "step": 200660 }, { "epoch": 0.39977926176208084, "grad_norm": 0.1484319418668747, "learning_rate": 0.002, "loss": 2.5612, "step": 200670 }, { "epoch": 0.39979918398571973, "grad_norm": 0.15333238244056702, "learning_rate": 0.002, "loss": 2.545, "step": 200680 }, { "epoch": 0.3998191062093587, "grad_norm": 0.17888905107975006, "learning_rate": 0.002, "loss": 2.5405, "step": 200690 }, { "epoch": 0.39983902843299757, "grad_norm": 0.17579598724842072, "learning_rate": 0.002, "loss": 2.5695, "step": 200700 }, { "epoch": 0.3998589506566365, "grad_norm": 0.15472491085529327, "learning_rate": 0.002, "loss": 2.5638, "step": 200710 }, { "epoch": 0.3998788728802754, "grad_norm": 0.18742835521697998, "learning_rate": 0.002, "loss": 2.5663, "step": 200720 }, { "epoch": 0.3998987951039143, "grad_norm": 0.1484278440475464, "learning_rate": 0.002, "loss": 2.5645, "step": 200730 }, { "epoch": 0.39991871732755324, "grad_norm": 0.1776060163974762, "learning_rate": 0.002, "loss": 2.5428, "step": 200740 }, { "epoch": 0.39993863955119213, "grad_norm": 0.19230274856090546, "learning_rate": 0.002, "loss": 2.5634, "step": 200750 }, { "epoch": 0.3999585617748311, "grad_norm": 0.15061697363853455, "learning_rate": 0.002, "loss": 2.5649, "step": 200760 }, { "epoch": 0.39997848399846997, "grad_norm": 0.14759360253810883, "learning_rate": 0.002, "loss": 2.56, "step": 200770 }, { "epoch": 0.39999840622210886, "grad_norm": 0.17158043384552002, "learning_rate": 0.002, "loss": 2.5596, "step": 200780 }, { "epoch": 0.4000183284457478, "grad_norm": 0.14909416437149048, "learning_rate": 0.002, "loss": 2.5567, "step": 200790 }, { "epoch": 0.4000382506693867, "grad_norm": 0.18598996102809906, "learning_rate": 0.002, "loss": 2.5413, "step": 200800 }, { "epoch": 0.40005817289302564, "grad_norm": 0.15586209297180176, "learning_rate": 0.002, "loss": 2.5607, "step": 200810 }, { "epoch": 0.40007809511666453, "grad_norm": 0.1823660433292389, "learning_rate": 0.002, "loss": 2.5552, "step": 200820 }, { "epoch": 0.4000980173403035, "grad_norm": 0.16842976212501526, "learning_rate": 0.002, "loss": 2.5482, "step": 200830 }, { "epoch": 0.40011793956394237, "grad_norm": 0.1760101318359375, "learning_rate": 0.002, "loss": 2.5514, "step": 200840 }, { "epoch": 0.40013786178758126, "grad_norm": 0.15513113141059875, "learning_rate": 0.002, "loss": 2.5548, "step": 200850 }, { "epoch": 0.4001577840112202, "grad_norm": 0.16469097137451172, "learning_rate": 0.002, "loss": 2.5631, "step": 200860 }, { "epoch": 0.4001777062348591, "grad_norm": 0.18132223188877106, "learning_rate": 0.002, "loss": 2.5575, "step": 200870 }, { "epoch": 0.40019762845849804, "grad_norm": 0.19253075122833252, "learning_rate": 0.002, "loss": 2.5574, "step": 200880 }, { "epoch": 0.40021755068213694, "grad_norm": 0.18054114282131195, "learning_rate": 0.002, "loss": 2.5708, "step": 200890 }, { "epoch": 0.4002374729057758, "grad_norm": 0.14557889103889465, "learning_rate": 0.002, "loss": 2.5591, "step": 200900 }, { "epoch": 0.4002573951294148, "grad_norm": 0.1571858525276184, "learning_rate": 0.002, "loss": 2.5595, "step": 200910 }, { "epoch": 0.40027731735305366, "grad_norm": 0.17115488648414612, "learning_rate": 0.002, "loss": 2.5606, "step": 200920 }, { "epoch": 0.4002972395766926, "grad_norm": 0.22514039278030396, "learning_rate": 0.002, "loss": 2.5523, "step": 200930 }, { "epoch": 0.4003171618003315, "grad_norm": 0.16734540462493896, "learning_rate": 0.002, "loss": 2.5604, "step": 200940 }, { "epoch": 0.40033708402397045, "grad_norm": 0.15385089814662933, "learning_rate": 0.002, "loss": 2.5491, "step": 200950 }, { "epoch": 0.40035700624760934, "grad_norm": 0.2193337082862854, "learning_rate": 0.002, "loss": 2.5452, "step": 200960 }, { "epoch": 0.40037692847124823, "grad_norm": 0.16410741209983826, "learning_rate": 0.002, "loss": 2.5687, "step": 200970 }, { "epoch": 0.4003968506948872, "grad_norm": 0.16810603439807892, "learning_rate": 0.002, "loss": 2.566, "step": 200980 }, { "epoch": 0.40041677291852606, "grad_norm": 0.18342742323875427, "learning_rate": 0.002, "loss": 2.5603, "step": 200990 }, { "epoch": 0.400436695142165, "grad_norm": 0.19514432549476624, "learning_rate": 0.002, "loss": 2.5585, "step": 201000 }, { "epoch": 0.4004566173658039, "grad_norm": 0.15987984836101532, "learning_rate": 0.002, "loss": 2.5538, "step": 201010 }, { "epoch": 0.4004765395894428, "grad_norm": 0.18136772513389587, "learning_rate": 0.002, "loss": 2.5552, "step": 201020 }, { "epoch": 0.40049646181308174, "grad_norm": 0.158622607588768, "learning_rate": 0.002, "loss": 2.5444, "step": 201030 }, { "epoch": 0.40051638403672063, "grad_norm": 0.18852359056472778, "learning_rate": 0.002, "loss": 2.5654, "step": 201040 }, { "epoch": 0.4005363062603596, "grad_norm": 0.17981107532978058, "learning_rate": 0.002, "loss": 2.5728, "step": 201050 }, { "epoch": 0.40055622848399847, "grad_norm": 0.1560487002134323, "learning_rate": 0.002, "loss": 2.5419, "step": 201060 }, { "epoch": 0.40057615070763736, "grad_norm": 0.14840170741081238, "learning_rate": 0.002, "loss": 2.5705, "step": 201070 }, { "epoch": 0.4005960729312763, "grad_norm": 0.17557673156261444, "learning_rate": 0.002, "loss": 2.5552, "step": 201080 }, { "epoch": 0.4006159951549152, "grad_norm": 0.16244012117385864, "learning_rate": 0.002, "loss": 2.5694, "step": 201090 }, { "epoch": 0.40063591737855414, "grad_norm": 0.1927645355463028, "learning_rate": 0.002, "loss": 2.5628, "step": 201100 }, { "epoch": 0.40065583960219303, "grad_norm": 0.176402285695076, "learning_rate": 0.002, "loss": 2.5493, "step": 201110 }, { "epoch": 0.400675761825832, "grad_norm": 0.17655351758003235, "learning_rate": 0.002, "loss": 2.5435, "step": 201120 }, { "epoch": 0.40069568404947087, "grad_norm": 0.17783497273921967, "learning_rate": 0.002, "loss": 2.5409, "step": 201130 }, { "epoch": 0.40071560627310976, "grad_norm": 0.16547568142414093, "learning_rate": 0.002, "loss": 2.5511, "step": 201140 }, { "epoch": 0.4007355284967487, "grad_norm": 0.18427588045597076, "learning_rate": 0.002, "loss": 2.5653, "step": 201150 }, { "epoch": 0.4007554507203876, "grad_norm": 0.15545658767223358, "learning_rate": 0.002, "loss": 2.5719, "step": 201160 }, { "epoch": 0.40077537294402654, "grad_norm": 0.16688290238380432, "learning_rate": 0.002, "loss": 2.5553, "step": 201170 }, { "epoch": 0.40079529516766543, "grad_norm": 0.147928386926651, "learning_rate": 0.002, "loss": 2.5605, "step": 201180 }, { "epoch": 0.4008152173913043, "grad_norm": 0.18263651430606842, "learning_rate": 0.002, "loss": 2.5632, "step": 201190 }, { "epoch": 0.40083513961494327, "grad_norm": 0.19575555622577667, "learning_rate": 0.002, "loss": 2.574, "step": 201200 }, { "epoch": 0.40085506183858216, "grad_norm": 0.16656982898712158, "learning_rate": 0.002, "loss": 2.5587, "step": 201210 }, { "epoch": 0.4008749840622211, "grad_norm": 0.1761680245399475, "learning_rate": 0.002, "loss": 2.5583, "step": 201220 }, { "epoch": 0.40089490628586, "grad_norm": 0.19652649760246277, "learning_rate": 0.002, "loss": 2.564, "step": 201230 }, { "epoch": 0.40091482850949894, "grad_norm": 0.15167288482189178, "learning_rate": 0.002, "loss": 2.5529, "step": 201240 }, { "epoch": 0.40093475073313783, "grad_norm": 0.1726066768169403, "learning_rate": 0.002, "loss": 2.5627, "step": 201250 }, { "epoch": 0.4009546729567767, "grad_norm": 0.18545152246952057, "learning_rate": 0.002, "loss": 2.5615, "step": 201260 }, { "epoch": 0.40097459518041567, "grad_norm": 0.19892114400863647, "learning_rate": 0.002, "loss": 2.5811, "step": 201270 }, { "epoch": 0.40099451740405456, "grad_norm": 0.15812872350215912, "learning_rate": 0.002, "loss": 2.5477, "step": 201280 }, { "epoch": 0.4010144396276935, "grad_norm": 0.16463619470596313, "learning_rate": 0.002, "loss": 2.5609, "step": 201290 }, { "epoch": 0.4010343618513324, "grad_norm": 0.1489281952381134, "learning_rate": 0.002, "loss": 2.5459, "step": 201300 }, { "epoch": 0.4010542840749713, "grad_norm": 0.15147264301776886, "learning_rate": 0.002, "loss": 2.5454, "step": 201310 }, { "epoch": 0.40107420629861024, "grad_norm": 0.18556267023086548, "learning_rate": 0.002, "loss": 2.5604, "step": 201320 }, { "epoch": 0.4010941285222491, "grad_norm": 0.16205771267414093, "learning_rate": 0.002, "loss": 2.567, "step": 201330 }, { "epoch": 0.4011140507458881, "grad_norm": 0.17111897468566895, "learning_rate": 0.002, "loss": 2.5647, "step": 201340 }, { "epoch": 0.40113397296952696, "grad_norm": 0.14894090592861176, "learning_rate": 0.002, "loss": 2.5739, "step": 201350 }, { "epoch": 0.40115389519316585, "grad_norm": 0.18235889077186584, "learning_rate": 0.002, "loss": 2.5457, "step": 201360 }, { "epoch": 0.4011738174168048, "grad_norm": 0.15601976215839386, "learning_rate": 0.002, "loss": 2.5435, "step": 201370 }, { "epoch": 0.4011937396404437, "grad_norm": 0.17531023919582367, "learning_rate": 0.002, "loss": 2.565, "step": 201380 }, { "epoch": 0.40121366186408264, "grad_norm": 0.1597287356853485, "learning_rate": 0.002, "loss": 2.5496, "step": 201390 }, { "epoch": 0.40123358408772153, "grad_norm": 0.261613667011261, "learning_rate": 0.002, "loss": 2.5611, "step": 201400 }, { "epoch": 0.4012535063113605, "grad_norm": 0.19298772513866425, "learning_rate": 0.002, "loss": 2.5533, "step": 201410 }, { "epoch": 0.40127342853499937, "grad_norm": 0.1598571389913559, "learning_rate": 0.002, "loss": 2.5688, "step": 201420 }, { "epoch": 0.40129335075863826, "grad_norm": 0.16052058339118958, "learning_rate": 0.002, "loss": 2.5508, "step": 201430 }, { "epoch": 0.4013132729822772, "grad_norm": 0.16451145708560944, "learning_rate": 0.002, "loss": 2.558, "step": 201440 }, { "epoch": 0.4013331952059161, "grad_norm": 0.1795293688774109, "learning_rate": 0.002, "loss": 2.559, "step": 201450 }, { "epoch": 0.40135311742955504, "grad_norm": 0.16564664244651794, "learning_rate": 0.002, "loss": 2.569, "step": 201460 }, { "epoch": 0.40137303965319393, "grad_norm": 0.15237151086330414, "learning_rate": 0.002, "loss": 2.5699, "step": 201470 }, { "epoch": 0.4013929618768328, "grad_norm": 0.13793079555034637, "learning_rate": 0.002, "loss": 2.5503, "step": 201480 }, { "epoch": 0.40141288410047177, "grad_norm": 0.17051032185554504, "learning_rate": 0.002, "loss": 2.5653, "step": 201490 }, { "epoch": 0.40143280632411066, "grad_norm": 0.17813824117183685, "learning_rate": 0.002, "loss": 2.5584, "step": 201500 }, { "epoch": 0.4014527285477496, "grad_norm": 0.1699981689453125, "learning_rate": 0.002, "loss": 2.5711, "step": 201510 }, { "epoch": 0.4014726507713885, "grad_norm": 0.17341554164886475, "learning_rate": 0.002, "loss": 2.5698, "step": 201520 }, { "epoch": 0.4014925729950274, "grad_norm": 0.15563032031059265, "learning_rate": 0.002, "loss": 2.5555, "step": 201530 }, { "epoch": 0.40151249521866633, "grad_norm": 0.158951535820961, "learning_rate": 0.002, "loss": 2.5463, "step": 201540 }, { "epoch": 0.4015324174423052, "grad_norm": 0.1514386385679245, "learning_rate": 0.002, "loss": 2.5513, "step": 201550 }, { "epoch": 0.40155233966594417, "grad_norm": 0.15973953902721405, "learning_rate": 0.002, "loss": 2.5669, "step": 201560 }, { "epoch": 0.40157226188958306, "grad_norm": 0.1631687730550766, "learning_rate": 0.002, "loss": 2.5686, "step": 201570 }, { "epoch": 0.401592184113222, "grad_norm": 0.1754828542470932, "learning_rate": 0.002, "loss": 2.5585, "step": 201580 }, { "epoch": 0.4016121063368609, "grad_norm": 0.1410389095544815, "learning_rate": 0.002, "loss": 2.5758, "step": 201590 }, { "epoch": 0.4016320285604998, "grad_norm": 0.162696972489357, "learning_rate": 0.002, "loss": 2.568, "step": 201600 }, { "epoch": 0.40165195078413873, "grad_norm": 0.15817297995090485, "learning_rate": 0.002, "loss": 2.5858, "step": 201610 }, { "epoch": 0.4016718730077776, "grad_norm": 0.1575087010860443, "learning_rate": 0.002, "loss": 2.5597, "step": 201620 }, { "epoch": 0.40169179523141657, "grad_norm": 0.21419250965118408, "learning_rate": 0.002, "loss": 2.5571, "step": 201630 }, { "epoch": 0.40171171745505546, "grad_norm": 0.15560545027256012, "learning_rate": 0.002, "loss": 2.5586, "step": 201640 }, { "epoch": 0.40173163967869435, "grad_norm": 0.15983964502811432, "learning_rate": 0.002, "loss": 2.5556, "step": 201650 }, { "epoch": 0.4017515619023333, "grad_norm": 0.19650177657604218, "learning_rate": 0.002, "loss": 2.5628, "step": 201660 }, { "epoch": 0.4017714841259722, "grad_norm": 0.15671035647392273, "learning_rate": 0.002, "loss": 2.5511, "step": 201670 }, { "epoch": 0.40179140634961114, "grad_norm": 0.1745254099369049, "learning_rate": 0.002, "loss": 2.563, "step": 201680 }, { "epoch": 0.40181132857325, "grad_norm": 0.15096208453178406, "learning_rate": 0.002, "loss": 2.5571, "step": 201690 }, { "epoch": 0.401831250796889, "grad_norm": 0.15084484219551086, "learning_rate": 0.002, "loss": 2.5536, "step": 201700 }, { "epoch": 0.40185117302052786, "grad_norm": 0.17377005517482758, "learning_rate": 0.002, "loss": 2.5568, "step": 201710 }, { "epoch": 0.40187109524416675, "grad_norm": 0.1752104014158249, "learning_rate": 0.002, "loss": 2.5739, "step": 201720 }, { "epoch": 0.4018910174678057, "grad_norm": 0.16169829666614532, "learning_rate": 0.002, "loss": 2.559, "step": 201730 }, { "epoch": 0.4019109396914446, "grad_norm": 0.15392747521400452, "learning_rate": 0.002, "loss": 2.5573, "step": 201740 }, { "epoch": 0.40193086191508354, "grad_norm": 0.18168936669826508, "learning_rate": 0.002, "loss": 2.5685, "step": 201750 }, { "epoch": 0.40195078413872243, "grad_norm": 0.1581113487482071, "learning_rate": 0.002, "loss": 2.5558, "step": 201760 }, { "epoch": 0.4019707063623613, "grad_norm": 0.16997961699962616, "learning_rate": 0.002, "loss": 2.5637, "step": 201770 }, { "epoch": 0.40199062858600026, "grad_norm": 0.19051876664161682, "learning_rate": 0.002, "loss": 2.5611, "step": 201780 }, { "epoch": 0.40201055080963916, "grad_norm": 0.15753526985645294, "learning_rate": 0.002, "loss": 2.5485, "step": 201790 }, { "epoch": 0.4020304730332781, "grad_norm": 0.1897766888141632, "learning_rate": 0.002, "loss": 2.5773, "step": 201800 }, { "epoch": 0.402050395256917, "grad_norm": 0.18434758484363556, "learning_rate": 0.002, "loss": 2.5548, "step": 201810 }, { "epoch": 0.4020703174805559, "grad_norm": 0.14593085646629333, "learning_rate": 0.002, "loss": 2.5583, "step": 201820 }, { "epoch": 0.40209023970419483, "grad_norm": 0.1647026538848877, "learning_rate": 0.002, "loss": 2.5501, "step": 201830 }, { "epoch": 0.4021101619278337, "grad_norm": 0.19293375313282013, "learning_rate": 0.002, "loss": 2.5612, "step": 201840 }, { "epoch": 0.40213008415147267, "grad_norm": 0.17345520853996277, "learning_rate": 0.002, "loss": 2.5515, "step": 201850 }, { "epoch": 0.40215000637511156, "grad_norm": 0.16113728284835815, "learning_rate": 0.002, "loss": 2.5625, "step": 201860 }, { "epoch": 0.4021699285987505, "grad_norm": 0.15627042949199677, "learning_rate": 0.002, "loss": 2.5515, "step": 201870 }, { "epoch": 0.4021898508223894, "grad_norm": 0.19445708394050598, "learning_rate": 0.002, "loss": 2.5568, "step": 201880 }, { "epoch": 0.4022097730460283, "grad_norm": 0.20528696477413177, "learning_rate": 0.002, "loss": 2.5504, "step": 201890 }, { "epoch": 0.40222969526966723, "grad_norm": 0.1584165096282959, "learning_rate": 0.002, "loss": 2.5634, "step": 201900 }, { "epoch": 0.4022496174933061, "grad_norm": 0.14550423622131348, "learning_rate": 0.002, "loss": 2.5543, "step": 201910 }, { "epoch": 0.40226953971694507, "grad_norm": 0.18902641534805298, "learning_rate": 0.002, "loss": 2.5685, "step": 201920 }, { "epoch": 0.40228946194058396, "grad_norm": 0.15504898130893707, "learning_rate": 0.002, "loss": 2.5621, "step": 201930 }, { "epoch": 0.40230938416422285, "grad_norm": 0.1845720112323761, "learning_rate": 0.002, "loss": 2.5648, "step": 201940 }, { "epoch": 0.4023293063878618, "grad_norm": 0.1634412407875061, "learning_rate": 0.002, "loss": 2.5561, "step": 201950 }, { "epoch": 0.4023492286115007, "grad_norm": 0.19443686306476593, "learning_rate": 0.002, "loss": 2.5538, "step": 201960 }, { "epoch": 0.40236915083513963, "grad_norm": 0.17703241109848022, "learning_rate": 0.002, "loss": 2.5742, "step": 201970 }, { "epoch": 0.4023890730587785, "grad_norm": 0.1806725263595581, "learning_rate": 0.002, "loss": 2.5518, "step": 201980 }, { "epoch": 0.40240899528241747, "grad_norm": 0.17655716836452484, "learning_rate": 0.002, "loss": 2.5557, "step": 201990 }, { "epoch": 0.40242891750605636, "grad_norm": 0.14595018327236176, "learning_rate": 0.002, "loss": 2.5643, "step": 202000 }, { "epoch": 0.40244883972969525, "grad_norm": 0.1513250768184662, "learning_rate": 0.002, "loss": 2.5697, "step": 202010 }, { "epoch": 0.4024687619533342, "grad_norm": 0.16968636214733124, "learning_rate": 0.002, "loss": 2.5651, "step": 202020 }, { "epoch": 0.4024886841769731, "grad_norm": 0.17789289355278015, "learning_rate": 0.002, "loss": 2.5469, "step": 202030 }, { "epoch": 0.40250860640061203, "grad_norm": 0.1451755166053772, "learning_rate": 0.002, "loss": 2.559, "step": 202040 }, { "epoch": 0.4025285286242509, "grad_norm": 0.19855870306491852, "learning_rate": 0.002, "loss": 2.5544, "step": 202050 }, { "epoch": 0.4025484508478898, "grad_norm": 0.17404070496559143, "learning_rate": 0.002, "loss": 2.5551, "step": 202060 }, { "epoch": 0.40256837307152876, "grad_norm": 0.18251335620880127, "learning_rate": 0.002, "loss": 2.5563, "step": 202070 }, { "epoch": 0.40258829529516765, "grad_norm": 0.16082121431827545, "learning_rate": 0.002, "loss": 2.5539, "step": 202080 }, { "epoch": 0.4026082175188066, "grad_norm": 0.19803239405155182, "learning_rate": 0.002, "loss": 2.559, "step": 202090 }, { "epoch": 0.4026281397424455, "grad_norm": 0.15821479260921478, "learning_rate": 0.002, "loss": 2.5652, "step": 202100 }, { "epoch": 0.4026480619660844, "grad_norm": 0.14593276381492615, "learning_rate": 0.002, "loss": 2.5537, "step": 202110 }, { "epoch": 0.4026679841897233, "grad_norm": 0.18666903674602509, "learning_rate": 0.002, "loss": 2.5574, "step": 202120 }, { "epoch": 0.4026879064133622, "grad_norm": 0.16003258526325226, "learning_rate": 0.002, "loss": 2.5614, "step": 202130 }, { "epoch": 0.40270782863700116, "grad_norm": 0.16830627620220184, "learning_rate": 0.002, "loss": 2.5624, "step": 202140 }, { "epoch": 0.40272775086064005, "grad_norm": 0.1789199709892273, "learning_rate": 0.002, "loss": 2.574, "step": 202150 }, { "epoch": 0.402747673084279, "grad_norm": 0.1383604109287262, "learning_rate": 0.002, "loss": 2.5503, "step": 202160 }, { "epoch": 0.4027675953079179, "grad_norm": 0.20144504308700562, "learning_rate": 0.002, "loss": 2.5485, "step": 202170 }, { "epoch": 0.4027875175315568, "grad_norm": 0.3177656829357147, "learning_rate": 0.002, "loss": 2.5577, "step": 202180 }, { "epoch": 0.40280743975519573, "grad_norm": 0.19249358773231506, "learning_rate": 0.002, "loss": 2.5654, "step": 202190 }, { "epoch": 0.4028273619788346, "grad_norm": 0.14929717779159546, "learning_rate": 0.002, "loss": 2.5566, "step": 202200 }, { "epoch": 0.40284728420247357, "grad_norm": 0.16580629348754883, "learning_rate": 0.002, "loss": 2.5412, "step": 202210 }, { "epoch": 0.40286720642611246, "grad_norm": 0.16152042150497437, "learning_rate": 0.002, "loss": 2.5662, "step": 202220 }, { "epoch": 0.40288712864975135, "grad_norm": 0.17388752102851868, "learning_rate": 0.002, "loss": 2.5592, "step": 202230 }, { "epoch": 0.4029070508733903, "grad_norm": 0.17953327298164368, "learning_rate": 0.002, "loss": 2.5575, "step": 202240 }, { "epoch": 0.4029269730970292, "grad_norm": 0.18666476011276245, "learning_rate": 0.002, "loss": 2.5549, "step": 202250 }, { "epoch": 0.40294689532066813, "grad_norm": 0.1481618732213974, "learning_rate": 0.002, "loss": 2.5528, "step": 202260 }, { "epoch": 0.402966817544307, "grad_norm": 0.1620916873216629, "learning_rate": 0.002, "loss": 2.5557, "step": 202270 }, { "epoch": 0.4029867397679459, "grad_norm": 0.1739012748003006, "learning_rate": 0.002, "loss": 2.5456, "step": 202280 }, { "epoch": 0.40300666199158486, "grad_norm": 0.18375474214553833, "learning_rate": 0.002, "loss": 2.5607, "step": 202290 }, { "epoch": 0.40302658421522375, "grad_norm": 0.17786450684070587, "learning_rate": 0.002, "loss": 2.5599, "step": 202300 }, { "epoch": 0.4030465064388627, "grad_norm": 0.15051798522472382, "learning_rate": 0.002, "loss": 2.5453, "step": 202310 }, { "epoch": 0.4030664286625016, "grad_norm": 0.19451665878295898, "learning_rate": 0.002, "loss": 2.5737, "step": 202320 }, { "epoch": 0.40308635088614053, "grad_norm": 0.1963770091533661, "learning_rate": 0.002, "loss": 2.5564, "step": 202330 }, { "epoch": 0.4031062731097794, "grad_norm": 0.16291947662830353, "learning_rate": 0.002, "loss": 2.5673, "step": 202340 }, { "epoch": 0.4031261953334183, "grad_norm": 0.1521449238061905, "learning_rate": 0.002, "loss": 2.5728, "step": 202350 }, { "epoch": 0.40314611755705726, "grad_norm": 0.2426181137561798, "learning_rate": 0.002, "loss": 2.5621, "step": 202360 }, { "epoch": 0.40316603978069615, "grad_norm": 0.1506386548280716, "learning_rate": 0.002, "loss": 2.5595, "step": 202370 }, { "epoch": 0.4031859620043351, "grad_norm": 0.16032956540584564, "learning_rate": 0.002, "loss": 2.5733, "step": 202380 }, { "epoch": 0.403205884227974, "grad_norm": 0.1375754028558731, "learning_rate": 0.002, "loss": 2.5519, "step": 202390 }, { "epoch": 0.4032258064516129, "grad_norm": 0.16851654648780823, "learning_rate": 0.002, "loss": 2.5613, "step": 202400 }, { "epoch": 0.4032457286752518, "grad_norm": 0.15980267524719238, "learning_rate": 0.002, "loss": 2.563, "step": 202410 }, { "epoch": 0.4032656508988907, "grad_norm": 0.18948021531105042, "learning_rate": 0.002, "loss": 2.5616, "step": 202420 }, { "epoch": 0.40328557312252966, "grad_norm": 0.14565213024616241, "learning_rate": 0.002, "loss": 2.5479, "step": 202430 }, { "epoch": 0.40330549534616855, "grad_norm": 0.16664959490299225, "learning_rate": 0.002, "loss": 2.5651, "step": 202440 }, { "epoch": 0.4033254175698075, "grad_norm": 0.18019579350948334, "learning_rate": 0.002, "loss": 2.5519, "step": 202450 }, { "epoch": 0.4033453397934464, "grad_norm": 0.1422608643770218, "learning_rate": 0.002, "loss": 2.5652, "step": 202460 }, { "epoch": 0.4033652620170853, "grad_norm": 0.19397631287574768, "learning_rate": 0.002, "loss": 2.5524, "step": 202470 }, { "epoch": 0.4033851842407242, "grad_norm": 0.16867795586585999, "learning_rate": 0.002, "loss": 2.5578, "step": 202480 }, { "epoch": 0.4034051064643631, "grad_norm": 0.15957482159137726, "learning_rate": 0.002, "loss": 2.5467, "step": 202490 }, { "epoch": 0.40342502868800206, "grad_norm": 0.1820068359375, "learning_rate": 0.002, "loss": 2.5569, "step": 202500 }, { "epoch": 0.40344495091164095, "grad_norm": 0.17044270038604736, "learning_rate": 0.002, "loss": 2.5732, "step": 202510 }, { "epoch": 0.40346487313527984, "grad_norm": 0.1840265691280365, "learning_rate": 0.002, "loss": 2.5661, "step": 202520 }, { "epoch": 0.4034847953589188, "grad_norm": 0.17355170845985413, "learning_rate": 0.002, "loss": 2.5504, "step": 202530 }, { "epoch": 0.4035047175825577, "grad_norm": 0.14203837513923645, "learning_rate": 0.002, "loss": 2.5665, "step": 202540 }, { "epoch": 0.40352463980619663, "grad_norm": 0.14369560778141022, "learning_rate": 0.002, "loss": 2.5708, "step": 202550 }, { "epoch": 0.4035445620298355, "grad_norm": 0.2148248553276062, "learning_rate": 0.002, "loss": 2.5475, "step": 202560 }, { "epoch": 0.4035644842534744, "grad_norm": 0.15054473280906677, "learning_rate": 0.002, "loss": 2.5554, "step": 202570 }, { "epoch": 0.40358440647711336, "grad_norm": 0.17396056652069092, "learning_rate": 0.002, "loss": 2.56, "step": 202580 }, { "epoch": 0.40360432870075225, "grad_norm": 0.14289072155952454, "learning_rate": 0.002, "loss": 2.5523, "step": 202590 }, { "epoch": 0.4036242509243912, "grad_norm": 0.1643023043870926, "learning_rate": 0.002, "loss": 2.5389, "step": 202600 }, { "epoch": 0.4036441731480301, "grad_norm": 0.15885265171527863, "learning_rate": 0.002, "loss": 2.5572, "step": 202610 }, { "epoch": 0.40366409537166903, "grad_norm": 0.18868686258792877, "learning_rate": 0.002, "loss": 2.5593, "step": 202620 }, { "epoch": 0.4036840175953079, "grad_norm": 0.18829479813575745, "learning_rate": 0.002, "loss": 2.5578, "step": 202630 }, { "epoch": 0.4037039398189468, "grad_norm": 0.1683177500963211, "learning_rate": 0.002, "loss": 2.5558, "step": 202640 }, { "epoch": 0.40372386204258576, "grad_norm": 0.1784362941980362, "learning_rate": 0.002, "loss": 2.5538, "step": 202650 }, { "epoch": 0.40374378426622465, "grad_norm": 0.16398212313652039, "learning_rate": 0.002, "loss": 2.5559, "step": 202660 }, { "epoch": 0.4037637064898636, "grad_norm": 0.15582884848117828, "learning_rate": 0.002, "loss": 2.5721, "step": 202670 }, { "epoch": 0.4037836287135025, "grad_norm": 0.2080058455467224, "learning_rate": 0.002, "loss": 2.555, "step": 202680 }, { "epoch": 0.4038035509371414, "grad_norm": 0.1810850203037262, "learning_rate": 0.002, "loss": 2.5744, "step": 202690 }, { "epoch": 0.4038234731607803, "grad_norm": 0.17336997389793396, "learning_rate": 0.002, "loss": 2.5771, "step": 202700 }, { "epoch": 0.4038433953844192, "grad_norm": 0.17566782236099243, "learning_rate": 0.002, "loss": 2.5545, "step": 202710 }, { "epoch": 0.40386331760805816, "grad_norm": 0.18203015625476837, "learning_rate": 0.002, "loss": 2.5562, "step": 202720 }, { "epoch": 0.40388323983169705, "grad_norm": 0.19347678124904633, "learning_rate": 0.002, "loss": 2.5649, "step": 202730 }, { "epoch": 0.403903162055336, "grad_norm": 0.1793976128101349, "learning_rate": 0.002, "loss": 2.5637, "step": 202740 }, { "epoch": 0.4039230842789749, "grad_norm": 0.16844899952411652, "learning_rate": 0.002, "loss": 2.5583, "step": 202750 }, { "epoch": 0.4039430065026138, "grad_norm": 0.15681400895118713, "learning_rate": 0.002, "loss": 2.5623, "step": 202760 }, { "epoch": 0.4039629287262527, "grad_norm": 0.19846370816230774, "learning_rate": 0.002, "loss": 2.5618, "step": 202770 }, { "epoch": 0.4039828509498916, "grad_norm": 0.1627005636692047, "learning_rate": 0.002, "loss": 2.5738, "step": 202780 }, { "epoch": 0.40400277317353056, "grad_norm": 0.18122932314872742, "learning_rate": 0.002, "loss": 2.5542, "step": 202790 }, { "epoch": 0.40402269539716945, "grad_norm": 0.1471443623304367, "learning_rate": 0.002, "loss": 2.5634, "step": 202800 }, { "epoch": 0.40404261762080834, "grad_norm": 0.17579874396324158, "learning_rate": 0.002, "loss": 2.5539, "step": 202810 }, { "epoch": 0.4040625398444473, "grad_norm": 0.18883965909481049, "learning_rate": 0.002, "loss": 2.5312, "step": 202820 }, { "epoch": 0.4040824620680862, "grad_norm": 0.1678069531917572, "learning_rate": 0.002, "loss": 2.5626, "step": 202830 }, { "epoch": 0.4041023842917251, "grad_norm": 0.19002698361873627, "learning_rate": 0.002, "loss": 2.5712, "step": 202840 }, { "epoch": 0.404122306515364, "grad_norm": 0.16790235042572021, "learning_rate": 0.002, "loss": 2.5614, "step": 202850 }, { "epoch": 0.4041422287390029, "grad_norm": 0.17357631027698517, "learning_rate": 0.002, "loss": 2.5532, "step": 202860 }, { "epoch": 0.40416215096264185, "grad_norm": 0.1745840609073639, "learning_rate": 0.002, "loss": 2.572, "step": 202870 }, { "epoch": 0.40418207318628074, "grad_norm": 0.1544678509235382, "learning_rate": 0.002, "loss": 2.5501, "step": 202880 }, { "epoch": 0.4042019954099197, "grad_norm": 0.14310365915298462, "learning_rate": 0.002, "loss": 2.5549, "step": 202890 }, { "epoch": 0.4042219176335586, "grad_norm": 0.15787473320960999, "learning_rate": 0.002, "loss": 2.5548, "step": 202900 }, { "epoch": 0.4042418398571975, "grad_norm": 0.17291760444641113, "learning_rate": 0.002, "loss": 2.5525, "step": 202910 }, { "epoch": 0.4042617620808364, "grad_norm": 0.1537410020828247, "learning_rate": 0.002, "loss": 2.5537, "step": 202920 }, { "epoch": 0.4042816843044753, "grad_norm": 0.16652311384677887, "learning_rate": 0.002, "loss": 2.5657, "step": 202930 }, { "epoch": 0.40430160652811425, "grad_norm": 0.20300155878067017, "learning_rate": 0.002, "loss": 2.5615, "step": 202940 }, { "epoch": 0.40432152875175315, "grad_norm": 0.16845227777957916, "learning_rate": 0.002, "loss": 2.5651, "step": 202950 }, { "epoch": 0.4043414509753921, "grad_norm": 0.15236707031726837, "learning_rate": 0.002, "loss": 2.5606, "step": 202960 }, { "epoch": 0.404361373199031, "grad_norm": 0.18910442292690277, "learning_rate": 0.002, "loss": 2.5566, "step": 202970 }, { "epoch": 0.4043812954226699, "grad_norm": 0.1566660851240158, "learning_rate": 0.002, "loss": 2.5641, "step": 202980 }, { "epoch": 0.4044012176463088, "grad_norm": 0.20431365072727203, "learning_rate": 0.002, "loss": 2.5662, "step": 202990 }, { "epoch": 0.4044211398699477, "grad_norm": 0.1752009391784668, "learning_rate": 0.002, "loss": 2.5527, "step": 203000 }, { "epoch": 0.40444106209358666, "grad_norm": 0.16981448233127594, "learning_rate": 0.002, "loss": 2.567, "step": 203010 }, { "epoch": 0.40446098431722555, "grad_norm": 0.19228173792362213, "learning_rate": 0.002, "loss": 2.5567, "step": 203020 }, { "epoch": 0.40448090654086444, "grad_norm": 0.14191555976867676, "learning_rate": 0.002, "loss": 2.5444, "step": 203030 }, { "epoch": 0.4045008287645034, "grad_norm": 0.17687003314495087, "learning_rate": 0.002, "loss": 2.5538, "step": 203040 }, { "epoch": 0.4045207509881423, "grad_norm": 0.15463045239448547, "learning_rate": 0.002, "loss": 2.5656, "step": 203050 }, { "epoch": 0.4045406732117812, "grad_norm": 0.1444069892168045, "learning_rate": 0.002, "loss": 2.5686, "step": 203060 }, { "epoch": 0.4045605954354201, "grad_norm": 0.17672526836395264, "learning_rate": 0.002, "loss": 2.5691, "step": 203070 }, { "epoch": 0.40458051765905906, "grad_norm": 0.1758480817079544, "learning_rate": 0.002, "loss": 2.557, "step": 203080 }, { "epoch": 0.40460043988269795, "grad_norm": 0.17070312798023224, "learning_rate": 0.002, "loss": 2.5604, "step": 203090 }, { "epoch": 0.40462036210633684, "grad_norm": 0.15601696074008942, "learning_rate": 0.002, "loss": 2.5507, "step": 203100 }, { "epoch": 0.4046402843299758, "grad_norm": 0.16569477319717407, "learning_rate": 0.002, "loss": 2.5604, "step": 203110 }, { "epoch": 0.4046602065536147, "grad_norm": 0.14308786392211914, "learning_rate": 0.002, "loss": 2.5588, "step": 203120 }, { "epoch": 0.4046801287772536, "grad_norm": 0.16629883646965027, "learning_rate": 0.002, "loss": 2.5595, "step": 203130 }, { "epoch": 0.4047000510008925, "grad_norm": 0.2048937976360321, "learning_rate": 0.002, "loss": 2.5682, "step": 203140 }, { "epoch": 0.4047199732245314, "grad_norm": 0.16246645152568817, "learning_rate": 0.002, "loss": 2.5603, "step": 203150 }, { "epoch": 0.40473989544817035, "grad_norm": 0.1489691585302353, "learning_rate": 0.002, "loss": 2.5596, "step": 203160 }, { "epoch": 0.40475981767180924, "grad_norm": 0.18602629005908966, "learning_rate": 0.002, "loss": 2.5738, "step": 203170 }, { "epoch": 0.4047797398954482, "grad_norm": 0.18716715276241302, "learning_rate": 0.002, "loss": 2.5609, "step": 203180 }, { "epoch": 0.4047996621190871, "grad_norm": 0.18430079519748688, "learning_rate": 0.002, "loss": 2.5514, "step": 203190 }, { "epoch": 0.404819584342726, "grad_norm": 0.159574955701828, "learning_rate": 0.002, "loss": 2.5565, "step": 203200 }, { "epoch": 0.4048395065663649, "grad_norm": 0.22993768751621246, "learning_rate": 0.002, "loss": 2.5564, "step": 203210 }, { "epoch": 0.4048594287900038, "grad_norm": 0.2050376832485199, "learning_rate": 0.002, "loss": 2.5559, "step": 203220 }, { "epoch": 0.40487935101364275, "grad_norm": 0.1605292409658432, "learning_rate": 0.002, "loss": 2.5637, "step": 203230 }, { "epoch": 0.40489927323728164, "grad_norm": 0.16350078582763672, "learning_rate": 0.002, "loss": 2.5609, "step": 203240 }, { "epoch": 0.4049191954609206, "grad_norm": 0.22202011942863464, "learning_rate": 0.002, "loss": 2.5565, "step": 203250 }, { "epoch": 0.4049391176845595, "grad_norm": 0.1668490171432495, "learning_rate": 0.002, "loss": 2.5604, "step": 203260 }, { "epoch": 0.40495903990819837, "grad_norm": 0.17068129777908325, "learning_rate": 0.002, "loss": 2.5563, "step": 203270 }, { "epoch": 0.4049789621318373, "grad_norm": 0.15067505836486816, "learning_rate": 0.002, "loss": 2.5514, "step": 203280 }, { "epoch": 0.4049988843554762, "grad_norm": 0.14768874645233154, "learning_rate": 0.002, "loss": 2.5607, "step": 203290 }, { "epoch": 0.40501880657911515, "grad_norm": 0.13724081218242645, "learning_rate": 0.002, "loss": 2.5682, "step": 203300 }, { "epoch": 0.40503872880275404, "grad_norm": 0.14076440036296844, "learning_rate": 0.002, "loss": 2.5666, "step": 203310 }, { "epoch": 0.40505865102639294, "grad_norm": 0.18312086164951324, "learning_rate": 0.002, "loss": 2.5617, "step": 203320 }, { "epoch": 0.4050785732500319, "grad_norm": 0.1662585735321045, "learning_rate": 0.002, "loss": 2.5543, "step": 203330 }, { "epoch": 0.40509849547367077, "grad_norm": 0.15977458655834198, "learning_rate": 0.002, "loss": 2.5666, "step": 203340 }, { "epoch": 0.4051184176973097, "grad_norm": 0.19844657182693481, "learning_rate": 0.002, "loss": 2.5556, "step": 203350 }, { "epoch": 0.4051383399209486, "grad_norm": 0.14136070013046265, "learning_rate": 0.002, "loss": 2.5631, "step": 203360 }, { "epoch": 0.40515826214458756, "grad_norm": 0.17940206825733185, "learning_rate": 0.002, "loss": 2.5605, "step": 203370 }, { "epoch": 0.40517818436822645, "grad_norm": 0.18486396968364716, "learning_rate": 0.002, "loss": 2.5502, "step": 203380 }, { "epoch": 0.40519810659186534, "grad_norm": 0.1613779515028, "learning_rate": 0.002, "loss": 2.5705, "step": 203390 }, { "epoch": 0.4052180288155043, "grad_norm": 0.19492481648921967, "learning_rate": 0.002, "loss": 2.545, "step": 203400 }, { "epoch": 0.4052379510391432, "grad_norm": 0.1676243096590042, "learning_rate": 0.002, "loss": 2.5535, "step": 203410 }, { "epoch": 0.4052578732627821, "grad_norm": 0.16394145786762238, "learning_rate": 0.002, "loss": 2.5676, "step": 203420 }, { "epoch": 0.405277795486421, "grad_norm": 0.1810237467288971, "learning_rate": 0.002, "loss": 2.5508, "step": 203430 }, { "epoch": 0.4052977177100599, "grad_norm": 0.18092715740203857, "learning_rate": 0.002, "loss": 2.5554, "step": 203440 }, { "epoch": 0.40531763993369885, "grad_norm": 0.18868550658226013, "learning_rate": 0.002, "loss": 2.558, "step": 203450 }, { "epoch": 0.40533756215733774, "grad_norm": 0.15677644312381744, "learning_rate": 0.002, "loss": 2.541, "step": 203460 }, { "epoch": 0.4053574843809767, "grad_norm": 0.1586204171180725, "learning_rate": 0.002, "loss": 2.5603, "step": 203470 }, { "epoch": 0.4053774066046156, "grad_norm": 0.15435642004013062, "learning_rate": 0.002, "loss": 2.556, "step": 203480 }, { "epoch": 0.4053973288282545, "grad_norm": 0.15622657537460327, "learning_rate": 0.002, "loss": 2.5484, "step": 203490 }, { "epoch": 0.4054172510518934, "grad_norm": 0.20394687354564667, "learning_rate": 0.002, "loss": 2.5586, "step": 203500 }, { "epoch": 0.4054371732755323, "grad_norm": 0.1595090925693512, "learning_rate": 0.002, "loss": 2.5569, "step": 203510 }, { "epoch": 0.40545709549917125, "grad_norm": 0.2446966916322708, "learning_rate": 0.002, "loss": 2.5491, "step": 203520 }, { "epoch": 0.40547701772281014, "grad_norm": 0.17784029245376587, "learning_rate": 0.002, "loss": 2.5592, "step": 203530 }, { "epoch": 0.4054969399464491, "grad_norm": 0.17274656891822815, "learning_rate": 0.002, "loss": 2.5521, "step": 203540 }, { "epoch": 0.405516862170088, "grad_norm": 0.150737464427948, "learning_rate": 0.002, "loss": 2.5653, "step": 203550 }, { "epoch": 0.40553678439372687, "grad_norm": 0.18544109165668488, "learning_rate": 0.002, "loss": 2.5525, "step": 203560 }, { "epoch": 0.4055567066173658, "grad_norm": 0.15927822887897491, "learning_rate": 0.002, "loss": 2.5376, "step": 203570 }, { "epoch": 0.4055766288410047, "grad_norm": 0.1773337870836258, "learning_rate": 0.002, "loss": 2.5518, "step": 203580 }, { "epoch": 0.40559655106464365, "grad_norm": 0.18067988753318787, "learning_rate": 0.002, "loss": 2.5516, "step": 203590 }, { "epoch": 0.40561647328828254, "grad_norm": 0.15828590095043182, "learning_rate": 0.002, "loss": 2.5495, "step": 203600 }, { "epoch": 0.40563639551192143, "grad_norm": 0.17888924479484558, "learning_rate": 0.002, "loss": 2.56, "step": 203610 }, { "epoch": 0.4056563177355604, "grad_norm": 0.19689708948135376, "learning_rate": 0.002, "loss": 2.5473, "step": 203620 }, { "epoch": 0.40567623995919927, "grad_norm": 0.18276125192642212, "learning_rate": 0.002, "loss": 2.5539, "step": 203630 }, { "epoch": 0.4056961621828382, "grad_norm": 0.15604858100414276, "learning_rate": 0.002, "loss": 2.5543, "step": 203640 }, { "epoch": 0.4057160844064771, "grad_norm": 0.16880714893341064, "learning_rate": 0.002, "loss": 2.5702, "step": 203650 }, { "epoch": 0.40573600663011605, "grad_norm": 0.41491228342056274, "learning_rate": 0.002, "loss": 2.5618, "step": 203660 }, { "epoch": 0.40575592885375494, "grad_norm": 0.17434367537498474, "learning_rate": 0.002, "loss": 2.5544, "step": 203670 }, { "epoch": 0.40577585107739383, "grad_norm": 0.1839362233877182, "learning_rate": 0.002, "loss": 2.5494, "step": 203680 }, { "epoch": 0.4057957733010328, "grad_norm": 0.1630701869726181, "learning_rate": 0.002, "loss": 2.5578, "step": 203690 }, { "epoch": 0.40581569552467167, "grad_norm": 0.19084152579307556, "learning_rate": 0.002, "loss": 2.5833, "step": 203700 }, { "epoch": 0.4058356177483106, "grad_norm": 0.1699790507555008, "learning_rate": 0.002, "loss": 2.5475, "step": 203710 }, { "epoch": 0.4058555399719495, "grad_norm": 0.2014143168926239, "learning_rate": 0.002, "loss": 2.5508, "step": 203720 }, { "epoch": 0.4058754621955884, "grad_norm": 0.18309883773326874, "learning_rate": 0.002, "loss": 2.5617, "step": 203730 }, { "epoch": 0.40589538441922735, "grad_norm": 0.163537859916687, "learning_rate": 0.002, "loss": 2.5628, "step": 203740 }, { "epoch": 0.40591530664286624, "grad_norm": 0.14984259009361267, "learning_rate": 0.002, "loss": 2.5495, "step": 203750 }, { "epoch": 0.4059352288665052, "grad_norm": 0.1577586680650711, "learning_rate": 0.002, "loss": 2.5489, "step": 203760 }, { "epoch": 0.4059551510901441, "grad_norm": 0.15954189002513885, "learning_rate": 0.002, "loss": 2.5545, "step": 203770 }, { "epoch": 0.40597507331378296, "grad_norm": 0.13126693665981293, "learning_rate": 0.002, "loss": 2.561, "step": 203780 }, { "epoch": 0.4059949955374219, "grad_norm": 0.17157934606075287, "learning_rate": 0.002, "loss": 2.5618, "step": 203790 }, { "epoch": 0.4060149177610608, "grad_norm": 0.1642083376646042, "learning_rate": 0.002, "loss": 2.5565, "step": 203800 }, { "epoch": 0.40603483998469975, "grad_norm": 0.17798885703086853, "learning_rate": 0.002, "loss": 2.5563, "step": 203810 }, { "epoch": 0.40605476220833864, "grad_norm": 0.17911148071289062, "learning_rate": 0.002, "loss": 2.5495, "step": 203820 }, { "epoch": 0.4060746844319776, "grad_norm": 0.16872991621494293, "learning_rate": 0.002, "loss": 2.5582, "step": 203830 }, { "epoch": 0.4060946066556165, "grad_norm": 0.1604720503091812, "learning_rate": 0.002, "loss": 2.5615, "step": 203840 }, { "epoch": 0.40611452887925537, "grad_norm": 0.16638077795505524, "learning_rate": 0.002, "loss": 2.5583, "step": 203850 }, { "epoch": 0.4061344511028943, "grad_norm": 0.16820216178894043, "learning_rate": 0.002, "loss": 2.5597, "step": 203860 }, { "epoch": 0.4061543733265332, "grad_norm": 0.19036248326301575, "learning_rate": 0.002, "loss": 2.5626, "step": 203870 }, { "epoch": 0.40617429555017215, "grad_norm": 0.17452894151210785, "learning_rate": 0.002, "loss": 2.5617, "step": 203880 }, { "epoch": 0.40619421777381104, "grad_norm": 0.17537152767181396, "learning_rate": 0.002, "loss": 2.5527, "step": 203890 }, { "epoch": 0.40621413999744993, "grad_norm": 0.184411883354187, "learning_rate": 0.002, "loss": 2.5625, "step": 203900 }, { "epoch": 0.4062340622210889, "grad_norm": 0.16689246892929077, "learning_rate": 0.002, "loss": 2.5705, "step": 203910 }, { "epoch": 0.40625398444472777, "grad_norm": 0.14727851748466492, "learning_rate": 0.002, "loss": 2.5357, "step": 203920 }, { "epoch": 0.4062739066683667, "grad_norm": 0.16385886073112488, "learning_rate": 0.002, "loss": 2.56, "step": 203930 }, { "epoch": 0.4062938288920056, "grad_norm": 0.14354519546031952, "learning_rate": 0.002, "loss": 2.5729, "step": 203940 }, { "epoch": 0.40631375111564455, "grad_norm": 0.16026422381401062, "learning_rate": 0.002, "loss": 2.5526, "step": 203950 }, { "epoch": 0.40633367333928344, "grad_norm": 0.1408703774213791, "learning_rate": 0.002, "loss": 2.5658, "step": 203960 }, { "epoch": 0.40635359556292233, "grad_norm": 0.15898564457893372, "learning_rate": 0.002, "loss": 2.5528, "step": 203970 }, { "epoch": 0.4063735177865613, "grad_norm": 0.20963995158672333, "learning_rate": 0.002, "loss": 2.5507, "step": 203980 }, { "epoch": 0.40639344001020017, "grad_norm": 0.15096725523471832, "learning_rate": 0.002, "loss": 2.5489, "step": 203990 }, { "epoch": 0.4064133622338391, "grad_norm": 0.13967488706111908, "learning_rate": 0.002, "loss": 2.5701, "step": 204000 }, { "epoch": 0.406433284457478, "grad_norm": 0.19828516244888306, "learning_rate": 0.002, "loss": 2.5552, "step": 204010 }, { "epoch": 0.4064532066811169, "grad_norm": 0.16462835669517517, "learning_rate": 0.002, "loss": 2.5679, "step": 204020 }, { "epoch": 0.40647312890475584, "grad_norm": 0.17929977178573608, "learning_rate": 0.002, "loss": 2.5726, "step": 204030 }, { "epoch": 0.40649305112839473, "grad_norm": 0.1531791239976883, "learning_rate": 0.002, "loss": 2.5537, "step": 204040 }, { "epoch": 0.4065129733520337, "grad_norm": 0.15486273169517517, "learning_rate": 0.002, "loss": 2.5594, "step": 204050 }, { "epoch": 0.40653289557567257, "grad_norm": 0.16683940589427948, "learning_rate": 0.002, "loss": 2.5543, "step": 204060 }, { "epoch": 0.40655281779931146, "grad_norm": 0.18504565954208374, "learning_rate": 0.002, "loss": 2.5611, "step": 204070 }, { "epoch": 0.4065727400229504, "grad_norm": 0.1796608418226242, "learning_rate": 0.002, "loss": 2.5614, "step": 204080 }, { "epoch": 0.4065926622465893, "grad_norm": 0.17499519884586334, "learning_rate": 0.002, "loss": 2.5552, "step": 204090 }, { "epoch": 0.40661258447022824, "grad_norm": 0.18286354839801788, "learning_rate": 0.002, "loss": 2.5668, "step": 204100 }, { "epoch": 0.40663250669386714, "grad_norm": 0.16071230173110962, "learning_rate": 0.002, "loss": 2.548, "step": 204110 }, { "epoch": 0.4066524289175061, "grad_norm": 0.159116730093956, "learning_rate": 0.002, "loss": 2.5712, "step": 204120 }, { "epoch": 0.40667235114114497, "grad_norm": 0.15779989957809448, "learning_rate": 0.002, "loss": 2.5604, "step": 204130 }, { "epoch": 0.40669227336478386, "grad_norm": 0.14371559023857117, "learning_rate": 0.002, "loss": 2.5447, "step": 204140 }, { "epoch": 0.4067121955884228, "grad_norm": 0.16456139087677002, "learning_rate": 0.002, "loss": 2.5619, "step": 204150 }, { "epoch": 0.4067321178120617, "grad_norm": 0.18691608309745789, "learning_rate": 0.002, "loss": 2.5577, "step": 204160 }, { "epoch": 0.40675204003570065, "grad_norm": 0.13918660581111908, "learning_rate": 0.002, "loss": 2.5663, "step": 204170 }, { "epoch": 0.40677196225933954, "grad_norm": 0.21175213158130646, "learning_rate": 0.002, "loss": 2.5665, "step": 204180 }, { "epoch": 0.4067918844829784, "grad_norm": 0.13856945931911469, "learning_rate": 0.002, "loss": 2.546, "step": 204190 }, { "epoch": 0.4068118067066174, "grad_norm": 0.14851035177707672, "learning_rate": 0.002, "loss": 2.5729, "step": 204200 }, { "epoch": 0.40683172893025626, "grad_norm": 0.14889296889305115, "learning_rate": 0.002, "loss": 2.5722, "step": 204210 }, { "epoch": 0.4068516511538952, "grad_norm": 0.1587734967470169, "learning_rate": 0.002, "loss": 2.5589, "step": 204220 }, { "epoch": 0.4068715733775341, "grad_norm": 0.15687116980552673, "learning_rate": 0.002, "loss": 2.5587, "step": 204230 }, { "epoch": 0.40689149560117305, "grad_norm": 0.17843621969223022, "learning_rate": 0.002, "loss": 2.5786, "step": 204240 }, { "epoch": 0.40691141782481194, "grad_norm": 0.15941841900348663, "learning_rate": 0.002, "loss": 2.5653, "step": 204250 }, { "epoch": 0.40693134004845083, "grad_norm": 0.19193202257156372, "learning_rate": 0.002, "loss": 2.5558, "step": 204260 }, { "epoch": 0.4069512622720898, "grad_norm": 0.17474131286144257, "learning_rate": 0.002, "loss": 2.5635, "step": 204270 }, { "epoch": 0.40697118449572867, "grad_norm": 0.15690280497074127, "learning_rate": 0.002, "loss": 2.5643, "step": 204280 }, { "epoch": 0.4069911067193676, "grad_norm": 0.22528760135173798, "learning_rate": 0.002, "loss": 2.5445, "step": 204290 }, { "epoch": 0.4070110289430065, "grad_norm": 0.16628840565681458, "learning_rate": 0.002, "loss": 2.5449, "step": 204300 }, { "epoch": 0.4070309511666454, "grad_norm": 0.17618124186992645, "learning_rate": 0.002, "loss": 2.5398, "step": 204310 }, { "epoch": 0.40705087339028434, "grad_norm": 0.1483541876077652, "learning_rate": 0.002, "loss": 2.5559, "step": 204320 }, { "epoch": 0.40707079561392323, "grad_norm": 0.17648471891880035, "learning_rate": 0.002, "loss": 2.5486, "step": 204330 }, { "epoch": 0.4070907178375622, "grad_norm": 0.16351354122161865, "learning_rate": 0.002, "loss": 2.5538, "step": 204340 }, { "epoch": 0.40711064006120107, "grad_norm": 0.1430254578590393, "learning_rate": 0.002, "loss": 2.5626, "step": 204350 }, { "epoch": 0.40713056228483996, "grad_norm": 0.29765114188194275, "learning_rate": 0.002, "loss": 2.5755, "step": 204360 }, { "epoch": 0.4071504845084789, "grad_norm": 0.19510702788829803, "learning_rate": 0.002, "loss": 2.5663, "step": 204370 }, { "epoch": 0.4071704067321178, "grad_norm": 0.16739313304424286, "learning_rate": 0.002, "loss": 2.5483, "step": 204380 }, { "epoch": 0.40719032895575674, "grad_norm": 0.17224879562854767, "learning_rate": 0.002, "loss": 2.5558, "step": 204390 }, { "epoch": 0.40721025117939563, "grad_norm": 0.1780596524477005, "learning_rate": 0.002, "loss": 2.5453, "step": 204400 }, { "epoch": 0.4072301734030346, "grad_norm": 0.22212032973766327, "learning_rate": 0.002, "loss": 2.5545, "step": 204410 }, { "epoch": 0.40725009562667347, "grad_norm": 0.159954234957695, "learning_rate": 0.002, "loss": 2.5528, "step": 204420 }, { "epoch": 0.40727001785031236, "grad_norm": 0.15335312485694885, "learning_rate": 0.002, "loss": 2.5579, "step": 204430 }, { "epoch": 0.4072899400739513, "grad_norm": 0.4951367676258087, "learning_rate": 0.002, "loss": 2.5705, "step": 204440 }, { "epoch": 0.4073098622975902, "grad_norm": 0.21938645839691162, "learning_rate": 0.002, "loss": 2.5596, "step": 204450 }, { "epoch": 0.40732978452122914, "grad_norm": 0.18363729119300842, "learning_rate": 0.002, "loss": 2.5459, "step": 204460 }, { "epoch": 0.40734970674486803, "grad_norm": 0.165451779961586, "learning_rate": 0.002, "loss": 2.5694, "step": 204470 }, { "epoch": 0.4073696289685069, "grad_norm": 0.1593657284975052, "learning_rate": 0.002, "loss": 2.5565, "step": 204480 }, { "epoch": 0.40738955119214587, "grad_norm": 0.19595661759376526, "learning_rate": 0.002, "loss": 2.5514, "step": 204490 }, { "epoch": 0.40740947341578476, "grad_norm": 0.17497749626636505, "learning_rate": 0.002, "loss": 2.5612, "step": 204500 }, { "epoch": 0.4074293956394237, "grad_norm": 0.1715231090784073, "learning_rate": 0.002, "loss": 2.5669, "step": 204510 }, { "epoch": 0.4074493178630626, "grad_norm": 0.16584143042564392, "learning_rate": 0.002, "loss": 2.5609, "step": 204520 }, { "epoch": 0.4074692400867015, "grad_norm": 0.17123709619045258, "learning_rate": 0.002, "loss": 2.5512, "step": 204530 }, { "epoch": 0.40748916231034044, "grad_norm": 0.14655767381191254, "learning_rate": 0.002, "loss": 2.5477, "step": 204540 }, { "epoch": 0.4075090845339793, "grad_norm": 0.19411560893058777, "learning_rate": 0.002, "loss": 2.5545, "step": 204550 }, { "epoch": 0.4075290067576183, "grad_norm": 0.15180101990699768, "learning_rate": 0.002, "loss": 2.5708, "step": 204560 }, { "epoch": 0.40754892898125716, "grad_norm": 0.15683555603027344, "learning_rate": 0.002, "loss": 2.5509, "step": 204570 }, { "epoch": 0.4075688512048961, "grad_norm": 0.22598867118358612, "learning_rate": 0.002, "loss": 2.5699, "step": 204580 }, { "epoch": 0.407588773428535, "grad_norm": 0.18745911121368408, "learning_rate": 0.002, "loss": 2.56, "step": 204590 }, { "epoch": 0.4076086956521739, "grad_norm": 0.16406674683094025, "learning_rate": 0.002, "loss": 2.5392, "step": 204600 }, { "epoch": 0.40762861787581284, "grad_norm": 0.2134544551372528, "learning_rate": 0.002, "loss": 2.5699, "step": 204610 }, { "epoch": 0.40764854009945173, "grad_norm": 0.16735123097896576, "learning_rate": 0.002, "loss": 2.567, "step": 204620 }, { "epoch": 0.4076684623230907, "grad_norm": 0.162140354514122, "learning_rate": 0.002, "loss": 2.5616, "step": 204630 }, { "epoch": 0.40768838454672957, "grad_norm": 0.19073189795017242, "learning_rate": 0.002, "loss": 2.5598, "step": 204640 }, { "epoch": 0.40770830677036846, "grad_norm": 0.17603826522827148, "learning_rate": 0.002, "loss": 2.5489, "step": 204650 }, { "epoch": 0.4077282289940074, "grad_norm": 0.17268222570419312, "learning_rate": 0.002, "loss": 2.5551, "step": 204660 }, { "epoch": 0.4077481512176463, "grad_norm": 0.1574373096227646, "learning_rate": 0.002, "loss": 2.5503, "step": 204670 }, { "epoch": 0.40776807344128524, "grad_norm": 0.18171657621860504, "learning_rate": 0.002, "loss": 2.5731, "step": 204680 }, { "epoch": 0.40778799566492413, "grad_norm": 0.14012093842029572, "learning_rate": 0.002, "loss": 2.5666, "step": 204690 }, { "epoch": 0.4078079178885631, "grad_norm": 0.1940707415342331, "learning_rate": 0.002, "loss": 2.5692, "step": 204700 }, { "epoch": 0.40782784011220197, "grad_norm": 0.17329713702201843, "learning_rate": 0.002, "loss": 2.5577, "step": 204710 }, { "epoch": 0.40784776233584086, "grad_norm": 0.17616356909275055, "learning_rate": 0.002, "loss": 2.5686, "step": 204720 }, { "epoch": 0.4078676845594798, "grad_norm": 0.19386214017868042, "learning_rate": 0.002, "loss": 2.5599, "step": 204730 }, { "epoch": 0.4078876067831187, "grad_norm": 0.15308550000190735, "learning_rate": 0.002, "loss": 2.5612, "step": 204740 }, { "epoch": 0.40790752900675764, "grad_norm": 0.1441013216972351, "learning_rate": 0.002, "loss": 2.5834, "step": 204750 }, { "epoch": 0.40792745123039653, "grad_norm": 0.18693773448467255, "learning_rate": 0.002, "loss": 2.563, "step": 204760 }, { "epoch": 0.4079473734540354, "grad_norm": 0.18305924534797668, "learning_rate": 0.002, "loss": 2.5584, "step": 204770 }, { "epoch": 0.40796729567767437, "grad_norm": 0.1682431995868683, "learning_rate": 0.002, "loss": 2.5523, "step": 204780 }, { "epoch": 0.40798721790131326, "grad_norm": 0.15077605843544006, "learning_rate": 0.002, "loss": 2.5675, "step": 204790 }, { "epoch": 0.4080071401249522, "grad_norm": 0.17553800344467163, "learning_rate": 0.002, "loss": 2.5539, "step": 204800 }, { "epoch": 0.4080270623485911, "grad_norm": 0.17684169113636017, "learning_rate": 0.002, "loss": 2.5712, "step": 204810 }, { "epoch": 0.40804698457223, "grad_norm": 0.16126883029937744, "learning_rate": 0.002, "loss": 2.5626, "step": 204820 }, { "epoch": 0.40806690679586893, "grad_norm": 0.1732925921678543, "learning_rate": 0.002, "loss": 2.5568, "step": 204830 }, { "epoch": 0.4080868290195078, "grad_norm": 0.14132533967494965, "learning_rate": 0.002, "loss": 2.5641, "step": 204840 }, { "epoch": 0.40810675124314677, "grad_norm": 0.1679680496454239, "learning_rate": 0.002, "loss": 2.5514, "step": 204850 }, { "epoch": 0.40812667346678566, "grad_norm": 0.16231052577495575, "learning_rate": 0.002, "loss": 2.5646, "step": 204860 }, { "epoch": 0.4081465956904246, "grad_norm": 0.1494448333978653, "learning_rate": 0.002, "loss": 2.5786, "step": 204870 }, { "epoch": 0.4081665179140635, "grad_norm": 0.16414886713027954, "learning_rate": 0.002, "loss": 2.5406, "step": 204880 }, { "epoch": 0.4081864401377024, "grad_norm": 0.20132870972156525, "learning_rate": 0.002, "loss": 2.556, "step": 204890 }, { "epoch": 0.40820636236134134, "grad_norm": 0.17220930755138397, "learning_rate": 0.002, "loss": 2.5577, "step": 204900 }, { "epoch": 0.4082262845849802, "grad_norm": 0.15960998833179474, "learning_rate": 0.002, "loss": 2.534, "step": 204910 }, { "epoch": 0.40824620680861917, "grad_norm": 0.19852304458618164, "learning_rate": 0.002, "loss": 2.5606, "step": 204920 }, { "epoch": 0.40826612903225806, "grad_norm": 0.1653895080089569, "learning_rate": 0.002, "loss": 2.5586, "step": 204930 }, { "epoch": 0.40828605125589695, "grad_norm": 0.15990351140499115, "learning_rate": 0.002, "loss": 2.5656, "step": 204940 }, { "epoch": 0.4083059734795359, "grad_norm": 0.20994459092617035, "learning_rate": 0.002, "loss": 2.5632, "step": 204950 }, { "epoch": 0.4083258957031748, "grad_norm": 0.17169497907161713, "learning_rate": 0.002, "loss": 2.5581, "step": 204960 }, { "epoch": 0.40834581792681374, "grad_norm": 0.13808129727840424, "learning_rate": 0.002, "loss": 2.5671, "step": 204970 }, { "epoch": 0.4083657401504526, "grad_norm": 0.18241667747497559, "learning_rate": 0.002, "loss": 2.5542, "step": 204980 }, { "epoch": 0.4083856623740916, "grad_norm": 0.2127857655286789, "learning_rate": 0.002, "loss": 2.555, "step": 204990 }, { "epoch": 0.40840558459773046, "grad_norm": 0.1678575873374939, "learning_rate": 0.002, "loss": 2.5665, "step": 205000 }, { "epoch": 0.40842550682136936, "grad_norm": 0.15156203508377075, "learning_rate": 0.002, "loss": 2.5646, "step": 205010 }, { "epoch": 0.4084454290450083, "grad_norm": 0.173173189163208, "learning_rate": 0.002, "loss": 2.5533, "step": 205020 }, { "epoch": 0.4084653512686472, "grad_norm": 0.17233756184577942, "learning_rate": 0.002, "loss": 2.5567, "step": 205030 }, { "epoch": 0.40848527349228614, "grad_norm": 0.17206060886383057, "learning_rate": 0.002, "loss": 2.5628, "step": 205040 }, { "epoch": 0.40850519571592503, "grad_norm": 0.1555003970861435, "learning_rate": 0.002, "loss": 2.5507, "step": 205050 }, { "epoch": 0.4085251179395639, "grad_norm": 0.15426690876483917, "learning_rate": 0.002, "loss": 2.5485, "step": 205060 }, { "epoch": 0.40854504016320287, "grad_norm": 0.1869424730539322, "learning_rate": 0.002, "loss": 2.5587, "step": 205070 }, { "epoch": 0.40856496238684176, "grad_norm": 0.2238467037677765, "learning_rate": 0.002, "loss": 2.5677, "step": 205080 }, { "epoch": 0.4085848846104807, "grad_norm": 0.15068140625953674, "learning_rate": 0.002, "loss": 2.5398, "step": 205090 }, { "epoch": 0.4086048068341196, "grad_norm": 0.19047392904758453, "learning_rate": 0.002, "loss": 2.5629, "step": 205100 }, { "epoch": 0.4086247290577585, "grad_norm": 0.17914821207523346, "learning_rate": 0.002, "loss": 2.5579, "step": 205110 }, { "epoch": 0.40864465128139743, "grad_norm": 0.16872793436050415, "learning_rate": 0.002, "loss": 2.5577, "step": 205120 }, { "epoch": 0.4086645735050363, "grad_norm": 0.15858151018619537, "learning_rate": 0.002, "loss": 2.5373, "step": 205130 }, { "epoch": 0.40868449572867527, "grad_norm": 0.1700325906276703, "learning_rate": 0.002, "loss": 2.5519, "step": 205140 }, { "epoch": 0.40870441795231416, "grad_norm": 0.15679091215133667, "learning_rate": 0.002, "loss": 2.5637, "step": 205150 }, { "epoch": 0.4087243401759531, "grad_norm": 0.1953464299440384, "learning_rate": 0.002, "loss": 2.5458, "step": 205160 }, { "epoch": 0.408744262399592, "grad_norm": 0.1650213599205017, "learning_rate": 0.002, "loss": 2.5556, "step": 205170 }, { "epoch": 0.4087641846232309, "grad_norm": 0.16209955513477325, "learning_rate": 0.002, "loss": 2.5437, "step": 205180 }, { "epoch": 0.40878410684686983, "grad_norm": 0.23049254715442657, "learning_rate": 0.002, "loss": 2.5557, "step": 205190 }, { "epoch": 0.4088040290705087, "grad_norm": 0.15909434854984283, "learning_rate": 0.002, "loss": 2.5639, "step": 205200 }, { "epoch": 0.40882395129414767, "grad_norm": 0.15180805325508118, "learning_rate": 0.002, "loss": 2.5521, "step": 205210 }, { "epoch": 0.40884387351778656, "grad_norm": 0.17873550951480865, "learning_rate": 0.002, "loss": 2.5639, "step": 205220 }, { "epoch": 0.40886379574142545, "grad_norm": 0.1729542762041092, "learning_rate": 0.002, "loss": 2.5559, "step": 205230 }, { "epoch": 0.4088837179650644, "grad_norm": 0.15638482570648193, "learning_rate": 0.002, "loss": 2.5513, "step": 205240 }, { "epoch": 0.4089036401887033, "grad_norm": 0.1734534353017807, "learning_rate": 0.002, "loss": 2.5573, "step": 205250 }, { "epoch": 0.40892356241234223, "grad_norm": 0.15026114881038666, "learning_rate": 0.002, "loss": 2.547, "step": 205260 }, { "epoch": 0.4089434846359811, "grad_norm": 0.17730847001075745, "learning_rate": 0.002, "loss": 2.5749, "step": 205270 }, { "epoch": 0.40896340685962, "grad_norm": 0.17677661776542664, "learning_rate": 0.002, "loss": 2.5731, "step": 205280 }, { "epoch": 0.40898332908325896, "grad_norm": 0.14674052596092224, "learning_rate": 0.002, "loss": 2.5547, "step": 205290 }, { "epoch": 0.40900325130689785, "grad_norm": 0.17605748772621155, "learning_rate": 0.002, "loss": 2.5459, "step": 205300 }, { "epoch": 0.4090231735305368, "grad_norm": 0.15535730123519897, "learning_rate": 0.002, "loss": 2.5561, "step": 205310 }, { "epoch": 0.4090430957541757, "grad_norm": 0.18194419145584106, "learning_rate": 0.002, "loss": 2.5589, "step": 205320 }, { "epoch": 0.40906301797781464, "grad_norm": 0.15268449485301971, "learning_rate": 0.002, "loss": 2.5587, "step": 205330 }, { "epoch": 0.4090829402014535, "grad_norm": 0.14844739437103271, "learning_rate": 0.002, "loss": 2.5545, "step": 205340 }, { "epoch": 0.4091028624250924, "grad_norm": 0.1588442325592041, "learning_rate": 0.002, "loss": 2.548, "step": 205350 }, { "epoch": 0.40912278464873136, "grad_norm": 0.17238619923591614, "learning_rate": 0.002, "loss": 2.5663, "step": 205360 }, { "epoch": 0.40914270687237025, "grad_norm": 0.15757112205028534, "learning_rate": 0.002, "loss": 2.5586, "step": 205370 }, { "epoch": 0.4091626290960092, "grad_norm": 0.18208442628383636, "learning_rate": 0.002, "loss": 2.5659, "step": 205380 }, { "epoch": 0.4091825513196481, "grad_norm": 0.25135302543640137, "learning_rate": 0.002, "loss": 2.5672, "step": 205390 }, { "epoch": 0.409202473543287, "grad_norm": 0.16484639048576355, "learning_rate": 0.002, "loss": 2.5619, "step": 205400 }, { "epoch": 0.40922239576692593, "grad_norm": 0.15473221242427826, "learning_rate": 0.002, "loss": 2.5526, "step": 205410 }, { "epoch": 0.4092423179905648, "grad_norm": 0.20367726683616638, "learning_rate": 0.002, "loss": 2.5541, "step": 205420 }, { "epoch": 0.40926224021420377, "grad_norm": 0.16579003632068634, "learning_rate": 0.002, "loss": 2.5436, "step": 205430 }, { "epoch": 0.40928216243784266, "grad_norm": 0.20721031725406647, "learning_rate": 0.002, "loss": 2.5484, "step": 205440 }, { "epoch": 0.4093020846614816, "grad_norm": 0.17267383635044098, "learning_rate": 0.002, "loss": 2.5644, "step": 205450 }, { "epoch": 0.4093220068851205, "grad_norm": 0.1921870857477188, "learning_rate": 0.002, "loss": 2.5584, "step": 205460 }, { "epoch": 0.4093419291087594, "grad_norm": 0.16512200236320496, "learning_rate": 0.002, "loss": 2.5561, "step": 205470 }, { "epoch": 0.40936185133239833, "grad_norm": 0.157833069562912, "learning_rate": 0.002, "loss": 2.5678, "step": 205480 }, { "epoch": 0.4093817735560372, "grad_norm": 0.1849566102027893, "learning_rate": 0.002, "loss": 2.5607, "step": 205490 }, { "epoch": 0.40940169577967617, "grad_norm": 0.1722629964351654, "learning_rate": 0.002, "loss": 2.5423, "step": 205500 }, { "epoch": 0.40942161800331506, "grad_norm": 0.18299542367458344, "learning_rate": 0.002, "loss": 2.5598, "step": 205510 }, { "epoch": 0.40944154022695395, "grad_norm": 0.15675894916057587, "learning_rate": 0.002, "loss": 2.5514, "step": 205520 }, { "epoch": 0.4094614624505929, "grad_norm": 0.16593509912490845, "learning_rate": 0.002, "loss": 2.5697, "step": 205530 }, { "epoch": 0.4094813846742318, "grad_norm": 0.15827718377113342, "learning_rate": 0.002, "loss": 2.5481, "step": 205540 }, { "epoch": 0.40950130689787073, "grad_norm": 0.21563363075256348, "learning_rate": 0.002, "loss": 2.5569, "step": 205550 }, { "epoch": 0.4095212291215096, "grad_norm": 0.16697898507118225, "learning_rate": 0.002, "loss": 2.5736, "step": 205560 }, { "epoch": 0.4095411513451485, "grad_norm": 0.18111862242221832, "learning_rate": 0.002, "loss": 2.548, "step": 205570 }, { "epoch": 0.40956107356878746, "grad_norm": 0.1844039112329483, "learning_rate": 0.002, "loss": 2.5479, "step": 205580 }, { "epoch": 0.40958099579242635, "grad_norm": 0.16880841553211212, "learning_rate": 0.002, "loss": 2.5527, "step": 205590 }, { "epoch": 0.4096009180160653, "grad_norm": 0.16433291137218475, "learning_rate": 0.002, "loss": 2.5827, "step": 205600 }, { "epoch": 0.4096208402397042, "grad_norm": 0.18142151832580566, "learning_rate": 0.002, "loss": 2.5512, "step": 205610 }, { "epoch": 0.40964076246334313, "grad_norm": 0.16010357439517975, "learning_rate": 0.002, "loss": 2.5473, "step": 205620 }, { "epoch": 0.409660684686982, "grad_norm": 0.20685020089149475, "learning_rate": 0.002, "loss": 2.5536, "step": 205630 }, { "epoch": 0.4096806069106209, "grad_norm": 0.15654437243938446, "learning_rate": 0.002, "loss": 2.5453, "step": 205640 }, { "epoch": 0.40970052913425986, "grad_norm": 0.1639118790626526, "learning_rate": 0.002, "loss": 2.5609, "step": 205650 }, { "epoch": 0.40972045135789875, "grad_norm": 0.1897183358669281, "learning_rate": 0.002, "loss": 2.5734, "step": 205660 }, { "epoch": 0.4097403735815377, "grad_norm": 0.18407949805259705, "learning_rate": 0.002, "loss": 2.5737, "step": 205670 }, { "epoch": 0.4097602958051766, "grad_norm": 0.20254269242286682, "learning_rate": 0.002, "loss": 2.557, "step": 205680 }, { "epoch": 0.4097802180288155, "grad_norm": 0.1852811574935913, "learning_rate": 0.002, "loss": 2.5647, "step": 205690 }, { "epoch": 0.4098001402524544, "grad_norm": 0.159909188747406, "learning_rate": 0.002, "loss": 2.5635, "step": 205700 }, { "epoch": 0.4098200624760933, "grad_norm": 0.16287708282470703, "learning_rate": 0.002, "loss": 2.5456, "step": 205710 }, { "epoch": 0.40983998469973226, "grad_norm": 0.1696954369544983, "learning_rate": 0.002, "loss": 2.554, "step": 205720 }, { "epoch": 0.40985990692337115, "grad_norm": 0.17168891429901123, "learning_rate": 0.002, "loss": 2.5562, "step": 205730 }, { "epoch": 0.4098798291470101, "grad_norm": 0.19402334094047546, "learning_rate": 0.002, "loss": 2.5616, "step": 205740 }, { "epoch": 0.409899751370649, "grad_norm": 0.15354515612125397, "learning_rate": 0.002, "loss": 2.5626, "step": 205750 }, { "epoch": 0.4099196735942879, "grad_norm": 0.1579853892326355, "learning_rate": 0.002, "loss": 2.5592, "step": 205760 }, { "epoch": 0.4099395958179268, "grad_norm": 0.16471417248249054, "learning_rate": 0.002, "loss": 2.5552, "step": 205770 }, { "epoch": 0.4099595180415657, "grad_norm": 0.1739833652973175, "learning_rate": 0.002, "loss": 2.5726, "step": 205780 }, { "epoch": 0.40997944026520466, "grad_norm": 0.16352605819702148, "learning_rate": 0.002, "loss": 2.5575, "step": 205790 }, { "epoch": 0.40999936248884356, "grad_norm": 0.15811577439308167, "learning_rate": 0.002, "loss": 2.5707, "step": 205800 }, { "epoch": 0.41001928471248245, "grad_norm": 0.15864214301109314, "learning_rate": 0.002, "loss": 2.5554, "step": 205810 }, { "epoch": 0.4100392069361214, "grad_norm": 0.21284960210323334, "learning_rate": 0.002, "loss": 2.5521, "step": 205820 }, { "epoch": 0.4100591291597603, "grad_norm": 0.15883032977581024, "learning_rate": 0.002, "loss": 2.549, "step": 205830 }, { "epoch": 0.41007905138339923, "grad_norm": 0.18756158649921417, "learning_rate": 0.002, "loss": 2.5576, "step": 205840 }, { "epoch": 0.4100989736070381, "grad_norm": 0.17238786816596985, "learning_rate": 0.002, "loss": 2.567, "step": 205850 }, { "epoch": 0.410118895830677, "grad_norm": 0.1505754292011261, "learning_rate": 0.002, "loss": 2.5472, "step": 205860 }, { "epoch": 0.41013881805431596, "grad_norm": 0.1829570084810257, "learning_rate": 0.002, "loss": 2.5633, "step": 205870 }, { "epoch": 0.41015874027795485, "grad_norm": 0.19202333688735962, "learning_rate": 0.002, "loss": 2.5598, "step": 205880 }, { "epoch": 0.4101786625015938, "grad_norm": 0.14866903424263, "learning_rate": 0.002, "loss": 2.5599, "step": 205890 }, { "epoch": 0.4101985847252327, "grad_norm": 0.20188364386558533, "learning_rate": 0.002, "loss": 2.5549, "step": 205900 }, { "epoch": 0.41021850694887163, "grad_norm": 0.1607075184583664, "learning_rate": 0.002, "loss": 2.5549, "step": 205910 }, { "epoch": 0.4102384291725105, "grad_norm": 0.17877274751663208, "learning_rate": 0.002, "loss": 2.5533, "step": 205920 }, { "epoch": 0.4102583513961494, "grad_norm": 0.14436085522174835, "learning_rate": 0.002, "loss": 2.5702, "step": 205930 }, { "epoch": 0.41027827361978836, "grad_norm": 0.16280145943164825, "learning_rate": 0.002, "loss": 2.5511, "step": 205940 }, { "epoch": 0.41029819584342725, "grad_norm": 0.16464969515800476, "learning_rate": 0.002, "loss": 2.571, "step": 205950 }, { "epoch": 0.4103181180670662, "grad_norm": 0.18202641606330872, "learning_rate": 0.002, "loss": 2.5528, "step": 205960 }, { "epoch": 0.4103380402907051, "grad_norm": 0.1659672111272812, "learning_rate": 0.002, "loss": 2.5412, "step": 205970 }, { "epoch": 0.410357962514344, "grad_norm": 0.19604463875293732, "learning_rate": 0.002, "loss": 2.5515, "step": 205980 }, { "epoch": 0.4103778847379829, "grad_norm": 0.17441615462303162, "learning_rate": 0.002, "loss": 2.5409, "step": 205990 }, { "epoch": 0.4103978069616218, "grad_norm": 0.14934001863002777, "learning_rate": 0.002, "loss": 2.5558, "step": 206000 }, { "epoch": 0.41041772918526076, "grad_norm": 0.1889898031949997, "learning_rate": 0.002, "loss": 2.554, "step": 206010 }, { "epoch": 0.41043765140889965, "grad_norm": 0.1688818335533142, "learning_rate": 0.002, "loss": 2.5564, "step": 206020 }, { "epoch": 0.4104575736325386, "grad_norm": 0.16415785253047943, "learning_rate": 0.002, "loss": 2.5489, "step": 206030 }, { "epoch": 0.4104774958561775, "grad_norm": 0.15901334583759308, "learning_rate": 0.002, "loss": 2.5607, "step": 206040 }, { "epoch": 0.4104974180798164, "grad_norm": 0.17459134757518768, "learning_rate": 0.002, "loss": 2.5666, "step": 206050 }, { "epoch": 0.4105173403034553, "grad_norm": 0.14801949262619019, "learning_rate": 0.002, "loss": 2.5584, "step": 206060 }, { "epoch": 0.4105372625270942, "grad_norm": 0.18476052582263947, "learning_rate": 0.002, "loss": 2.5549, "step": 206070 }, { "epoch": 0.41055718475073316, "grad_norm": 0.1563401222229004, "learning_rate": 0.002, "loss": 2.5597, "step": 206080 }, { "epoch": 0.41057710697437205, "grad_norm": 0.1810603141784668, "learning_rate": 0.002, "loss": 2.5476, "step": 206090 }, { "epoch": 0.41059702919801094, "grad_norm": 0.1723809540271759, "learning_rate": 0.002, "loss": 2.5545, "step": 206100 }, { "epoch": 0.4106169514216499, "grad_norm": 0.18678733706474304, "learning_rate": 0.002, "loss": 2.5474, "step": 206110 }, { "epoch": 0.4106368736452888, "grad_norm": 0.1712794303894043, "learning_rate": 0.002, "loss": 2.5509, "step": 206120 }, { "epoch": 0.4106567958689277, "grad_norm": 0.1696234494447708, "learning_rate": 0.002, "loss": 2.5533, "step": 206130 }, { "epoch": 0.4106767180925666, "grad_norm": 0.17928987741470337, "learning_rate": 0.002, "loss": 2.5741, "step": 206140 }, { "epoch": 0.4106966403162055, "grad_norm": 0.15455412864685059, "learning_rate": 0.002, "loss": 2.5557, "step": 206150 }, { "epoch": 0.41071656253984445, "grad_norm": 0.19884519279003143, "learning_rate": 0.002, "loss": 2.5535, "step": 206160 }, { "epoch": 0.41073648476348334, "grad_norm": 0.17384092509746552, "learning_rate": 0.002, "loss": 2.5513, "step": 206170 }, { "epoch": 0.4107564069871223, "grad_norm": 0.15221790969371796, "learning_rate": 0.002, "loss": 2.5681, "step": 206180 }, { "epoch": 0.4107763292107612, "grad_norm": 0.1646072119474411, "learning_rate": 0.002, "loss": 2.5574, "step": 206190 }, { "epoch": 0.41079625143440013, "grad_norm": 0.19682903587818146, "learning_rate": 0.002, "loss": 2.5592, "step": 206200 }, { "epoch": 0.410816173658039, "grad_norm": 0.14432772994041443, "learning_rate": 0.002, "loss": 2.5561, "step": 206210 }, { "epoch": 0.4108360958816779, "grad_norm": 0.19001086056232452, "learning_rate": 0.002, "loss": 2.5625, "step": 206220 }, { "epoch": 0.41085601810531686, "grad_norm": 0.1463889479637146, "learning_rate": 0.002, "loss": 2.5499, "step": 206230 }, { "epoch": 0.41087594032895575, "grad_norm": 0.19131110608577728, "learning_rate": 0.002, "loss": 2.5577, "step": 206240 }, { "epoch": 0.4108958625525947, "grad_norm": 0.15517453849315643, "learning_rate": 0.002, "loss": 2.5614, "step": 206250 }, { "epoch": 0.4109157847762336, "grad_norm": 0.138727605342865, "learning_rate": 0.002, "loss": 2.5495, "step": 206260 }, { "epoch": 0.4109357069998725, "grad_norm": 0.16035176813602448, "learning_rate": 0.002, "loss": 2.5723, "step": 206270 }, { "epoch": 0.4109556292235114, "grad_norm": 0.15995560586452484, "learning_rate": 0.002, "loss": 2.5734, "step": 206280 }, { "epoch": 0.4109755514471503, "grad_norm": 0.15558026731014252, "learning_rate": 0.002, "loss": 2.5599, "step": 206290 }, { "epoch": 0.41099547367078926, "grad_norm": 0.17284642159938812, "learning_rate": 0.002, "loss": 2.5692, "step": 206300 }, { "epoch": 0.41101539589442815, "grad_norm": 0.14944225549697876, "learning_rate": 0.002, "loss": 2.5537, "step": 206310 }, { "epoch": 0.41103531811806704, "grad_norm": 0.20441149175167084, "learning_rate": 0.002, "loss": 2.5646, "step": 206320 }, { "epoch": 0.411055240341706, "grad_norm": 0.1639440357685089, "learning_rate": 0.002, "loss": 2.5617, "step": 206330 }, { "epoch": 0.4110751625653449, "grad_norm": 0.15476566553115845, "learning_rate": 0.002, "loss": 2.5512, "step": 206340 }, { "epoch": 0.4110950847889838, "grad_norm": 0.20268656313419342, "learning_rate": 0.002, "loss": 2.56, "step": 206350 }, { "epoch": 0.4111150070126227, "grad_norm": 0.16304612159729004, "learning_rate": 0.002, "loss": 2.5611, "step": 206360 }, { "epoch": 0.41113492923626166, "grad_norm": 0.1609562337398529, "learning_rate": 0.002, "loss": 2.5663, "step": 206370 }, { "epoch": 0.41115485145990055, "grad_norm": 0.14593519270420074, "learning_rate": 0.002, "loss": 2.5609, "step": 206380 }, { "epoch": 0.41117477368353944, "grad_norm": 0.21127526462078094, "learning_rate": 0.002, "loss": 2.5673, "step": 206390 }, { "epoch": 0.4111946959071784, "grad_norm": 0.13903088867664337, "learning_rate": 0.002, "loss": 2.5569, "step": 206400 }, { "epoch": 0.4112146181308173, "grad_norm": 0.15809130668640137, "learning_rate": 0.002, "loss": 2.5643, "step": 206410 }, { "epoch": 0.4112345403544562, "grad_norm": 0.16336889564990997, "learning_rate": 0.002, "loss": 2.5442, "step": 206420 }, { "epoch": 0.4112544625780951, "grad_norm": 0.14209061861038208, "learning_rate": 0.002, "loss": 2.5659, "step": 206430 }, { "epoch": 0.411274384801734, "grad_norm": 0.14278976619243622, "learning_rate": 0.002, "loss": 2.556, "step": 206440 }, { "epoch": 0.41129430702537295, "grad_norm": 0.15613582730293274, "learning_rate": 0.002, "loss": 2.5603, "step": 206450 }, { "epoch": 0.41131422924901184, "grad_norm": 0.15531152486801147, "learning_rate": 0.002, "loss": 2.5706, "step": 206460 }, { "epoch": 0.4113341514726508, "grad_norm": 0.15610848367214203, "learning_rate": 0.002, "loss": 2.5665, "step": 206470 }, { "epoch": 0.4113540736962897, "grad_norm": 0.17022745311260223, "learning_rate": 0.002, "loss": 2.5698, "step": 206480 }, { "epoch": 0.4113739959199286, "grad_norm": 0.16639798879623413, "learning_rate": 0.002, "loss": 2.573, "step": 206490 }, { "epoch": 0.4113939181435675, "grad_norm": 0.16007302701473236, "learning_rate": 0.002, "loss": 2.5625, "step": 206500 }, { "epoch": 0.4114138403672064, "grad_norm": 0.16230128705501556, "learning_rate": 0.002, "loss": 2.5597, "step": 206510 }, { "epoch": 0.41143376259084535, "grad_norm": 0.14235998690128326, "learning_rate": 0.002, "loss": 2.571, "step": 206520 }, { "epoch": 0.41145368481448424, "grad_norm": 0.1830979585647583, "learning_rate": 0.002, "loss": 2.5601, "step": 206530 }, { "epoch": 0.4114736070381232, "grad_norm": 0.194401815533638, "learning_rate": 0.002, "loss": 2.5578, "step": 206540 }, { "epoch": 0.4114935292617621, "grad_norm": 0.15002571046352386, "learning_rate": 0.002, "loss": 2.5392, "step": 206550 }, { "epoch": 0.41151345148540097, "grad_norm": 0.19369271397590637, "learning_rate": 0.002, "loss": 2.5647, "step": 206560 }, { "epoch": 0.4115333737090399, "grad_norm": 0.16482731699943542, "learning_rate": 0.002, "loss": 2.5611, "step": 206570 }, { "epoch": 0.4115532959326788, "grad_norm": 0.1400620937347412, "learning_rate": 0.002, "loss": 2.5616, "step": 206580 }, { "epoch": 0.41157321815631775, "grad_norm": 0.15410682559013367, "learning_rate": 0.002, "loss": 2.5617, "step": 206590 }, { "epoch": 0.41159314037995665, "grad_norm": 0.17278556525707245, "learning_rate": 0.002, "loss": 2.5767, "step": 206600 }, { "epoch": 0.41161306260359554, "grad_norm": 0.16200371086597443, "learning_rate": 0.002, "loss": 2.5493, "step": 206610 }, { "epoch": 0.4116329848272345, "grad_norm": 0.14788779616355896, "learning_rate": 0.002, "loss": 2.5608, "step": 206620 }, { "epoch": 0.4116529070508734, "grad_norm": 0.17684389650821686, "learning_rate": 0.002, "loss": 2.568, "step": 206630 }, { "epoch": 0.4116728292745123, "grad_norm": 0.1788264811038971, "learning_rate": 0.002, "loss": 2.549, "step": 206640 }, { "epoch": 0.4116927514981512, "grad_norm": 0.17118817567825317, "learning_rate": 0.002, "loss": 2.5712, "step": 206650 }, { "epoch": 0.41171267372179016, "grad_norm": 0.1495252549648285, "learning_rate": 0.002, "loss": 2.5485, "step": 206660 }, { "epoch": 0.41173259594542905, "grad_norm": 0.1768968552350998, "learning_rate": 0.002, "loss": 2.5572, "step": 206670 }, { "epoch": 0.41175251816906794, "grad_norm": 0.15777114033699036, "learning_rate": 0.002, "loss": 2.5518, "step": 206680 }, { "epoch": 0.4117724403927069, "grad_norm": 0.14024090766906738, "learning_rate": 0.002, "loss": 2.5618, "step": 206690 }, { "epoch": 0.4117923626163458, "grad_norm": 0.16352254152297974, "learning_rate": 0.002, "loss": 2.5524, "step": 206700 }, { "epoch": 0.4118122848399847, "grad_norm": 0.1710575670003891, "learning_rate": 0.002, "loss": 2.555, "step": 206710 }, { "epoch": 0.4118322070636236, "grad_norm": 0.17070983350276947, "learning_rate": 0.002, "loss": 2.5462, "step": 206720 }, { "epoch": 0.4118521292872625, "grad_norm": 0.1805489957332611, "learning_rate": 0.002, "loss": 2.5546, "step": 206730 }, { "epoch": 0.41187205151090145, "grad_norm": 0.19084006547927856, "learning_rate": 0.002, "loss": 2.5647, "step": 206740 }, { "epoch": 0.41189197373454034, "grad_norm": 0.1859414428472519, "learning_rate": 0.002, "loss": 2.559, "step": 206750 }, { "epoch": 0.4119118959581793, "grad_norm": 0.16139653325080872, "learning_rate": 0.002, "loss": 2.555, "step": 206760 }, { "epoch": 0.4119318181818182, "grad_norm": 0.17289501428604126, "learning_rate": 0.002, "loss": 2.5658, "step": 206770 }, { "epoch": 0.4119517404054571, "grad_norm": 0.18438464403152466, "learning_rate": 0.002, "loss": 2.5468, "step": 206780 }, { "epoch": 0.411971662629096, "grad_norm": 0.15089306235313416, "learning_rate": 0.002, "loss": 2.5703, "step": 206790 }, { "epoch": 0.4119915848527349, "grad_norm": 0.16381223499774933, "learning_rate": 0.002, "loss": 2.5455, "step": 206800 }, { "epoch": 0.41201150707637385, "grad_norm": 0.16500185430049896, "learning_rate": 0.002, "loss": 2.5575, "step": 206810 }, { "epoch": 0.41203142930001274, "grad_norm": 0.17398592829704285, "learning_rate": 0.002, "loss": 2.553, "step": 206820 }, { "epoch": 0.4120513515236517, "grad_norm": 0.17682614922523499, "learning_rate": 0.002, "loss": 2.5633, "step": 206830 }, { "epoch": 0.4120712737472906, "grad_norm": 0.1927652508020401, "learning_rate": 0.002, "loss": 2.5558, "step": 206840 }, { "epoch": 0.41209119597092947, "grad_norm": 0.19599677622318268, "learning_rate": 0.002, "loss": 2.5706, "step": 206850 }, { "epoch": 0.4121111181945684, "grad_norm": 0.16026994585990906, "learning_rate": 0.002, "loss": 2.5644, "step": 206860 }, { "epoch": 0.4121310404182073, "grad_norm": 0.1484433263540268, "learning_rate": 0.002, "loss": 2.5408, "step": 206870 }, { "epoch": 0.41215096264184625, "grad_norm": 0.16781754791736603, "learning_rate": 0.002, "loss": 2.5661, "step": 206880 }, { "epoch": 0.41217088486548514, "grad_norm": 0.1762302815914154, "learning_rate": 0.002, "loss": 2.5448, "step": 206890 }, { "epoch": 0.41219080708912403, "grad_norm": 0.18651647865772247, "learning_rate": 0.002, "loss": 2.5629, "step": 206900 }, { "epoch": 0.412210729312763, "grad_norm": 0.14868532121181488, "learning_rate": 0.002, "loss": 2.5479, "step": 206910 }, { "epoch": 0.41223065153640187, "grad_norm": 0.1829410046339035, "learning_rate": 0.002, "loss": 2.5454, "step": 206920 }, { "epoch": 0.4122505737600408, "grad_norm": 0.17020219564437866, "learning_rate": 0.002, "loss": 2.5618, "step": 206930 }, { "epoch": 0.4122704959836797, "grad_norm": 0.1780885010957718, "learning_rate": 0.002, "loss": 2.5503, "step": 206940 }, { "epoch": 0.41229041820731865, "grad_norm": 0.15315984189510345, "learning_rate": 0.002, "loss": 2.5751, "step": 206950 }, { "epoch": 0.41231034043095754, "grad_norm": 0.16415159404277802, "learning_rate": 0.002, "loss": 2.5717, "step": 206960 }, { "epoch": 0.41233026265459644, "grad_norm": 0.18230479955673218, "learning_rate": 0.002, "loss": 2.5415, "step": 206970 }, { "epoch": 0.4123501848782354, "grad_norm": 0.23853734135627747, "learning_rate": 0.002, "loss": 2.5632, "step": 206980 }, { "epoch": 0.4123701071018743, "grad_norm": 0.16116583347320557, "learning_rate": 0.002, "loss": 2.5685, "step": 206990 }, { "epoch": 0.4123900293255132, "grad_norm": 0.15620079636573792, "learning_rate": 0.002, "loss": 2.5601, "step": 207000 }, { "epoch": 0.4124099515491521, "grad_norm": 0.17452594637870789, "learning_rate": 0.002, "loss": 2.5619, "step": 207010 }, { "epoch": 0.412429873772791, "grad_norm": 0.1525033563375473, "learning_rate": 0.002, "loss": 2.5556, "step": 207020 }, { "epoch": 0.41244979599642995, "grad_norm": 0.18104368448257446, "learning_rate": 0.002, "loss": 2.5405, "step": 207030 }, { "epoch": 0.41246971822006884, "grad_norm": 0.15955619513988495, "learning_rate": 0.002, "loss": 2.557, "step": 207040 }, { "epoch": 0.4124896404437078, "grad_norm": 0.18706250190734863, "learning_rate": 0.002, "loss": 2.5702, "step": 207050 }, { "epoch": 0.4125095626673467, "grad_norm": 0.14212733507156372, "learning_rate": 0.002, "loss": 2.5604, "step": 207060 }, { "epoch": 0.41252948489098556, "grad_norm": 0.18212804198265076, "learning_rate": 0.002, "loss": 2.5459, "step": 207070 }, { "epoch": 0.4125494071146245, "grad_norm": 0.17660868167877197, "learning_rate": 0.002, "loss": 2.5626, "step": 207080 }, { "epoch": 0.4125693293382634, "grad_norm": 0.18736417591571808, "learning_rate": 0.002, "loss": 2.5681, "step": 207090 }, { "epoch": 0.41258925156190235, "grad_norm": 0.17625737190246582, "learning_rate": 0.002, "loss": 2.5683, "step": 207100 }, { "epoch": 0.41260917378554124, "grad_norm": 0.14265556633472443, "learning_rate": 0.002, "loss": 2.5497, "step": 207110 }, { "epoch": 0.4126290960091802, "grad_norm": 0.16230618953704834, "learning_rate": 0.002, "loss": 2.5515, "step": 207120 }, { "epoch": 0.4126490182328191, "grad_norm": 0.15027153491973877, "learning_rate": 0.002, "loss": 2.5509, "step": 207130 }, { "epoch": 0.41266894045645797, "grad_norm": 0.19240844249725342, "learning_rate": 0.002, "loss": 2.5581, "step": 207140 }, { "epoch": 0.4126888626800969, "grad_norm": 0.1453937292098999, "learning_rate": 0.002, "loss": 2.5403, "step": 207150 }, { "epoch": 0.4127087849037358, "grad_norm": 0.16059786081314087, "learning_rate": 0.002, "loss": 2.5566, "step": 207160 }, { "epoch": 0.41272870712737475, "grad_norm": 0.19342254102230072, "learning_rate": 0.002, "loss": 2.5677, "step": 207170 }, { "epoch": 0.41274862935101364, "grad_norm": 0.16295300424098969, "learning_rate": 0.002, "loss": 2.5608, "step": 207180 }, { "epoch": 0.41276855157465253, "grad_norm": 0.17646539211273193, "learning_rate": 0.002, "loss": 2.5712, "step": 207190 }, { "epoch": 0.4127884737982915, "grad_norm": 0.1580929309129715, "learning_rate": 0.002, "loss": 2.5701, "step": 207200 }, { "epoch": 0.41280839602193037, "grad_norm": 0.20195414125919342, "learning_rate": 0.002, "loss": 2.5607, "step": 207210 }, { "epoch": 0.4128283182455693, "grad_norm": 0.14707216620445251, "learning_rate": 0.002, "loss": 2.5608, "step": 207220 }, { "epoch": 0.4128482404692082, "grad_norm": 0.14961010217666626, "learning_rate": 0.002, "loss": 2.5595, "step": 207230 }, { "epoch": 0.41286816269284715, "grad_norm": 0.1600293219089508, "learning_rate": 0.002, "loss": 2.5478, "step": 207240 }, { "epoch": 0.41288808491648604, "grad_norm": 0.15729473531246185, "learning_rate": 0.002, "loss": 2.5734, "step": 207250 }, { "epoch": 0.41290800714012493, "grad_norm": 0.1675066202878952, "learning_rate": 0.002, "loss": 2.5621, "step": 207260 }, { "epoch": 0.4129279293637639, "grad_norm": 0.16247254610061646, "learning_rate": 0.002, "loss": 2.5582, "step": 207270 }, { "epoch": 0.41294785158740277, "grad_norm": 0.19423070549964905, "learning_rate": 0.002, "loss": 2.5517, "step": 207280 }, { "epoch": 0.4129677738110417, "grad_norm": 0.1497679054737091, "learning_rate": 0.002, "loss": 2.5504, "step": 207290 }, { "epoch": 0.4129876960346806, "grad_norm": 0.19389423727989197, "learning_rate": 0.002, "loss": 2.5416, "step": 207300 }, { "epoch": 0.4130076182583195, "grad_norm": 0.18694083392620087, "learning_rate": 0.002, "loss": 2.5494, "step": 207310 }, { "epoch": 0.41302754048195844, "grad_norm": 0.16884459555149078, "learning_rate": 0.002, "loss": 2.5605, "step": 207320 }, { "epoch": 0.41304746270559733, "grad_norm": 0.16135600209236145, "learning_rate": 0.002, "loss": 2.5582, "step": 207330 }, { "epoch": 0.4130673849292363, "grad_norm": 0.17218226194381714, "learning_rate": 0.002, "loss": 2.5636, "step": 207340 }, { "epoch": 0.41308730715287517, "grad_norm": 0.17250937223434448, "learning_rate": 0.002, "loss": 2.5541, "step": 207350 }, { "epoch": 0.41310722937651406, "grad_norm": 0.15627850592136383, "learning_rate": 0.002, "loss": 2.5623, "step": 207360 }, { "epoch": 0.413127151600153, "grad_norm": 0.17687375843524933, "learning_rate": 0.002, "loss": 2.5717, "step": 207370 }, { "epoch": 0.4131470738237919, "grad_norm": 0.18963851034641266, "learning_rate": 0.002, "loss": 2.5555, "step": 207380 }, { "epoch": 0.41316699604743085, "grad_norm": 0.17426659166812897, "learning_rate": 0.002, "loss": 2.5628, "step": 207390 }, { "epoch": 0.41318691827106974, "grad_norm": 0.1610637605190277, "learning_rate": 0.002, "loss": 2.5526, "step": 207400 }, { "epoch": 0.4132068404947087, "grad_norm": 0.13533760607242584, "learning_rate": 0.002, "loss": 2.5653, "step": 207410 }, { "epoch": 0.4132267627183476, "grad_norm": 0.247565358877182, "learning_rate": 0.002, "loss": 2.5683, "step": 207420 }, { "epoch": 0.41324668494198646, "grad_norm": 0.17391285300254822, "learning_rate": 0.002, "loss": 2.5542, "step": 207430 }, { "epoch": 0.4132666071656254, "grad_norm": 0.1502600759267807, "learning_rate": 0.002, "loss": 2.5693, "step": 207440 }, { "epoch": 0.4132865293892643, "grad_norm": 0.15638907253742218, "learning_rate": 0.002, "loss": 2.549, "step": 207450 }, { "epoch": 0.41330645161290325, "grad_norm": 0.15840843319892883, "learning_rate": 0.002, "loss": 2.5583, "step": 207460 }, { "epoch": 0.41332637383654214, "grad_norm": 0.1597670316696167, "learning_rate": 0.002, "loss": 2.5544, "step": 207470 }, { "epoch": 0.41334629606018103, "grad_norm": 0.15604941546916962, "learning_rate": 0.002, "loss": 2.5439, "step": 207480 }, { "epoch": 0.41336621828382, "grad_norm": 0.16955749690532684, "learning_rate": 0.002, "loss": 2.5576, "step": 207490 }, { "epoch": 0.41338614050745887, "grad_norm": 0.1543554663658142, "learning_rate": 0.002, "loss": 2.5681, "step": 207500 }, { "epoch": 0.4134060627310978, "grad_norm": 0.17100057005882263, "learning_rate": 0.002, "loss": 2.5621, "step": 207510 }, { "epoch": 0.4134259849547367, "grad_norm": 0.20917381346225739, "learning_rate": 0.002, "loss": 2.562, "step": 207520 }, { "epoch": 0.41344590717837565, "grad_norm": 0.16911132633686066, "learning_rate": 0.002, "loss": 2.5558, "step": 207530 }, { "epoch": 0.41346582940201454, "grad_norm": 0.17367984354496002, "learning_rate": 0.002, "loss": 2.5639, "step": 207540 }, { "epoch": 0.41348575162565343, "grad_norm": 0.1463090181350708, "learning_rate": 0.002, "loss": 2.5338, "step": 207550 }, { "epoch": 0.4135056738492924, "grad_norm": 0.16358120739459991, "learning_rate": 0.002, "loss": 2.568, "step": 207560 }, { "epoch": 0.41352559607293127, "grad_norm": 0.17201919853687286, "learning_rate": 0.002, "loss": 2.546, "step": 207570 }, { "epoch": 0.4135455182965702, "grad_norm": 0.18605855107307434, "learning_rate": 0.002, "loss": 2.5427, "step": 207580 }, { "epoch": 0.4135654405202091, "grad_norm": 0.1712620109319687, "learning_rate": 0.002, "loss": 2.5722, "step": 207590 }, { "epoch": 0.413585362743848, "grad_norm": 0.1991778165102005, "learning_rate": 0.002, "loss": 2.5669, "step": 207600 }, { "epoch": 0.41360528496748694, "grad_norm": 0.16698060929775238, "learning_rate": 0.002, "loss": 2.5487, "step": 207610 }, { "epoch": 0.41362520719112583, "grad_norm": 0.14735907316207886, "learning_rate": 0.002, "loss": 2.5682, "step": 207620 }, { "epoch": 0.4136451294147648, "grad_norm": 0.22304730117321014, "learning_rate": 0.002, "loss": 2.5526, "step": 207630 }, { "epoch": 0.41366505163840367, "grad_norm": 0.1817438006401062, "learning_rate": 0.002, "loss": 2.554, "step": 207640 }, { "epoch": 0.41368497386204256, "grad_norm": 0.18517225980758667, "learning_rate": 0.002, "loss": 2.5631, "step": 207650 }, { "epoch": 0.4137048960856815, "grad_norm": 0.15816941857337952, "learning_rate": 0.002, "loss": 2.5657, "step": 207660 }, { "epoch": 0.4137248183093204, "grad_norm": 0.18970853090286255, "learning_rate": 0.002, "loss": 2.5634, "step": 207670 }, { "epoch": 0.41374474053295934, "grad_norm": 0.1698209047317505, "learning_rate": 0.002, "loss": 2.5533, "step": 207680 }, { "epoch": 0.41376466275659823, "grad_norm": 0.18288522958755493, "learning_rate": 0.002, "loss": 2.5625, "step": 207690 }, { "epoch": 0.4137845849802372, "grad_norm": 0.1589701771736145, "learning_rate": 0.002, "loss": 2.5507, "step": 207700 }, { "epoch": 0.41380450720387607, "grad_norm": 0.1951432079076767, "learning_rate": 0.002, "loss": 2.576, "step": 207710 }, { "epoch": 0.41382442942751496, "grad_norm": 0.15183433890342712, "learning_rate": 0.002, "loss": 2.5534, "step": 207720 }, { "epoch": 0.4138443516511539, "grad_norm": 0.18118485808372498, "learning_rate": 0.002, "loss": 2.5537, "step": 207730 }, { "epoch": 0.4138642738747928, "grad_norm": 0.17025527358055115, "learning_rate": 0.002, "loss": 2.562, "step": 207740 }, { "epoch": 0.41388419609843174, "grad_norm": 0.17707329988479614, "learning_rate": 0.002, "loss": 2.562, "step": 207750 }, { "epoch": 0.41390411832207064, "grad_norm": 0.15346623957157135, "learning_rate": 0.002, "loss": 2.5554, "step": 207760 }, { "epoch": 0.4139240405457095, "grad_norm": 0.14808031916618347, "learning_rate": 0.002, "loss": 2.5598, "step": 207770 }, { "epoch": 0.4139439627693485, "grad_norm": 0.16287007927894592, "learning_rate": 0.002, "loss": 2.5658, "step": 207780 }, { "epoch": 0.41396388499298736, "grad_norm": 0.17400825023651123, "learning_rate": 0.002, "loss": 2.5541, "step": 207790 }, { "epoch": 0.4139838072166263, "grad_norm": 0.20355884730815887, "learning_rate": 0.002, "loss": 2.5667, "step": 207800 }, { "epoch": 0.4140037294402652, "grad_norm": 0.15981696546077728, "learning_rate": 0.002, "loss": 2.5763, "step": 207810 }, { "epoch": 0.4140236516639041, "grad_norm": 0.1619548797607422, "learning_rate": 0.002, "loss": 2.5579, "step": 207820 }, { "epoch": 0.41404357388754304, "grad_norm": 0.21800747513771057, "learning_rate": 0.002, "loss": 2.5626, "step": 207830 }, { "epoch": 0.41406349611118193, "grad_norm": 0.21427032351493835, "learning_rate": 0.002, "loss": 2.5725, "step": 207840 }, { "epoch": 0.4140834183348209, "grad_norm": 0.1648808866739273, "learning_rate": 0.002, "loss": 2.5796, "step": 207850 }, { "epoch": 0.41410334055845976, "grad_norm": 0.18465213477611542, "learning_rate": 0.002, "loss": 2.558, "step": 207860 }, { "epoch": 0.4141232627820987, "grad_norm": 0.18470823764801025, "learning_rate": 0.002, "loss": 2.5572, "step": 207870 }, { "epoch": 0.4141431850057376, "grad_norm": 0.16128405928611755, "learning_rate": 0.002, "loss": 2.5719, "step": 207880 }, { "epoch": 0.4141631072293765, "grad_norm": 0.15563365817070007, "learning_rate": 0.002, "loss": 2.5508, "step": 207890 }, { "epoch": 0.41418302945301544, "grad_norm": 0.17136250436306, "learning_rate": 0.002, "loss": 2.5492, "step": 207900 }, { "epoch": 0.41420295167665433, "grad_norm": 0.1583538055419922, "learning_rate": 0.002, "loss": 2.5676, "step": 207910 }, { "epoch": 0.4142228739002933, "grad_norm": 0.15562540292739868, "learning_rate": 0.002, "loss": 2.5393, "step": 207920 }, { "epoch": 0.41424279612393217, "grad_norm": 0.15502378344535828, "learning_rate": 0.002, "loss": 2.5577, "step": 207930 }, { "epoch": 0.41426271834757106, "grad_norm": 0.2007349729537964, "learning_rate": 0.002, "loss": 2.5712, "step": 207940 }, { "epoch": 0.41428264057121, "grad_norm": 0.17208999395370483, "learning_rate": 0.002, "loss": 2.5569, "step": 207950 }, { "epoch": 0.4143025627948489, "grad_norm": 0.18909914791584015, "learning_rate": 0.002, "loss": 2.5584, "step": 207960 }, { "epoch": 0.41432248501848784, "grad_norm": 0.1702945977449417, "learning_rate": 0.002, "loss": 2.5754, "step": 207970 }, { "epoch": 0.41434240724212673, "grad_norm": 0.14906571805477142, "learning_rate": 0.002, "loss": 2.548, "step": 207980 }, { "epoch": 0.4143623294657657, "grad_norm": 0.1509578675031662, "learning_rate": 0.002, "loss": 2.5465, "step": 207990 }, { "epoch": 0.41438225168940457, "grad_norm": 0.1693175584077835, "learning_rate": 0.002, "loss": 2.5744, "step": 208000 }, { "epoch": 0.41440217391304346, "grad_norm": 0.20616573095321655, "learning_rate": 0.002, "loss": 2.5583, "step": 208010 }, { "epoch": 0.4144220961366824, "grad_norm": 0.1431790143251419, "learning_rate": 0.002, "loss": 2.5371, "step": 208020 }, { "epoch": 0.4144420183603213, "grad_norm": 0.17696513235569, "learning_rate": 0.002, "loss": 2.5622, "step": 208030 }, { "epoch": 0.41446194058396024, "grad_norm": 0.16705337166786194, "learning_rate": 0.002, "loss": 2.559, "step": 208040 }, { "epoch": 0.41448186280759913, "grad_norm": 0.15436911582946777, "learning_rate": 0.002, "loss": 2.564, "step": 208050 }, { "epoch": 0.414501785031238, "grad_norm": 0.1831936091184616, "learning_rate": 0.002, "loss": 2.5591, "step": 208060 }, { "epoch": 0.41452170725487697, "grad_norm": 0.18676434457302094, "learning_rate": 0.002, "loss": 2.5557, "step": 208070 }, { "epoch": 0.41454162947851586, "grad_norm": 0.185267835855484, "learning_rate": 0.002, "loss": 2.5541, "step": 208080 }, { "epoch": 0.4145615517021548, "grad_norm": 0.16463536024093628, "learning_rate": 0.002, "loss": 2.5485, "step": 208090 }, { "epoch": 0.4145814739257937, "grad_norm": 0.1859482228755951, "learning_rate": 0.002, "loss": 2.5575, "step": 208100 }, { "epoch": 0.4146013961494326, "grad_norm": 0.1745142787694931, "learning_rate": 0.002, "loss": 2.5543, "step": 208110 }, { "epoch": 0.41462131837307153, "grad_norm": 0.20507927238941193, "learning_rate": 0.002, "loss": 2.5537, "step": 208120 }, { "epoch": 0.4146412405967104, "grad_norm": 0.171659454703331, "learning_rate": 0.002, "loss": 2.5579, "step": 208130 }, { "epoch": 0.41466116282034937, "grad_norm": 0.1858878880739212, "learning_rate": 0.002, "loss": 2.5585, "step": 208140 }, { "epoch": 0.41468108504398826, "grad_norm": 0.19188173115253448, "learning_rate": 0.002, "loss": 2.5732, "step": 208150 }, { "epoch": 0.4147010072676272, "grad_norm": 0.15814800560474396, "learning_rate": 0.002, "loss": 2.5618, "step": 208160 }, { "epoch": 0.4147209294912661, "grad_norm": 0.13716736435890198, "learning_rate": 0.002, "loss": 2.5465, "step": 208170 }, { "epoch": 0.414740851714905, "grad_norm": 0.17534328997135162, "learning_rate": 0.002, "loss": 2.5518, "step": 208180 }, { "epoch": 0.41476077393854394, "grad_norm": 0.1729656457901001, "learning_rate": 0.002, "loss": 2.5623, "step": 208190 }, { "epoch": 0.4147806961621828, "grad_norm": 0.16000589728355408, "learning_rate": 0.002, "loss": 2.5616, "step": 208200 }, { "epoch": 0.4148006183858218, "grad_norm": 0.18821579217910767, "learning_rate": 0.002, "loss": 2.5529, "step": 208210 }, { "epoch": 0.41482054060946066, "grad_norm": 0.14966370165348053, "learning_rate": 0.002, "loss": 2.5633, "step": 208220 }, { "epoch": 0.41484046283309955, "grad_norm": 0.17577391862869263, "learning_rate": 0.002, "loss": 2.5647, "step": 208230 }, { "epoch": 0.4148603850567385, "grad_norm": 0.18749238550662994, "learning_rate": 0.002, "loss": 2.5551, "step": 208240 }, { "epoch": 0.4148803072803774, "grad_norm": 0.17478704452514648, "learning_rate": 0.002, "loss": 2.5595, "step": 208250 }, { "epoch": 0.41490022950401634, "grad_norm": 0.14515647292137146, "learning_rate": 0.002, "loss": 2.568, "step": 208260 }, { "epoch": 0.41492015172765523, "grad_norm": 0.19069425761699677, "learning_rate": 0.002, "loss": 2.5591, "step": 208270 }, { "epoch": 0.4149400739512942, "grad_norm": 0.17213623225688934, "learning_rate": 0.002, "loss": 2.5574, "step": 208280 }, { "epoch": 0.41495999617493307, "grad_norm": 0.17867723107337952, "learning_rate": 0.002, "loss": 2.5779, "step": 208290 }, { "epoch": 0.41497991839857196, "grad_norm": 0.16104967892169952, "learning_rate": 0.002, "loss": 2.5611, "step": 208300 }, { "epoch": 0.4149998406222109, "grad_norm": 0.1664941906929016, "learning_rate": 0.002, "loss": 2.5446, "step": 208310 }, { "epoch": 0.4150197628458498, "grad_norm": 0.1774592250585556, "learning_rate": 0.002, "loss": 2.5638, "step": 208320 }, { "epoch": 0.41503968506948874, "grad_norm": 0.16853037476539612, "learning_rate": 0.002, "loss": 2.5655, "step": 208330 }, { "epoch": 0.41505960729312763, "grad_norm": 0.15767812728881836, "learning_rate": 0.002, "loss": 2.5708, "step": 208340 }, { "epoch": 0.4150795295167665, "grad_norm": 0.17666976153850555, "learning_rate": 0.002, "loss": 2.543, "step": 208350 }, { "epoch": 0.41509945174040547, "grad_norm": 0.1453232616186142, "learning_rate": 0.002, "loss": 2.5498, "step": 208360 }, { "epoch": 0.41511937396404436, "grad_norm": 0.18914052844047546, "learning_rate": 0.002, "loss": 2.5561, "step": 208370 }, { "epoch": 0.4151392961876833, "grad_norm": 0.15564359724521637, "learning_rate": 0.002, "loss": 2.5416, "step": 208380 }, { "epoch": 0.4151592184113222, "grad_norm": 0.16306163370609283, "learning_rate": 0.002, "loss": 2.5594, "step": 208390 }, { "epoch": 0.4151791406349611, "grad_norm": 0.1725044846534729, "learning_rate": 0.002, "loss": 2.5577, "step": 208400 }, { "epoch": 0.41519906285860003, "grad_norm": 0.15089985728263855, "learning_rate": 0.002, "loss": 2.5469, "step": 208410 }, { "epoch": 0.4152189850822389, "grad_norm": 0.15092818439006805, "learning_rate": 0.002, "loss": 2.5523, "step": 208420 }, { "epoch": 0.41523890730587787, "grad_norm": 0.15999051928520203, "learning_rate": 0.002, "loss": 2.5491, "step": 208430 }, { "epoch": 0.41525882952951676, "grad_norm": 0.23663359880447388, "learning_rate": 0.002, "loss": 2.5561, "step": 208440 }, { "epoch": 0.4152787517531557, "grad_norm": 0.18342804908752441, "learning_rate": 0.002, "loss": 2.555, "step": 208450 }, { "epoch": 0.4152986739767946, "grad_norm": 0.18746700882911682, "learning_rate": 0.002, "loss": 2.5438, "step": 208460 }, { "epoch": 0.4153185962004335, "grad_norm": 0.16979926824569702, "learning_rate": 0.002, "loss": 2.5595, "step": 208470 }, { "epoch": 0.41533851842407243, "grad_norm": 0.360022634267807, "learning_rate": 0.002, "loss": 2.5499, "step": 208480 }, { "epoch": 0.4153584406477113, "grad_norm": 0.16549570858478546, "learning_rate": 0.002, "loss": 2.5522, "step": 208490 }, { "epoch": 0.41537836287135027, "grad_norm": 0.19029907882213593, "learning_rate": 0.002, "loss": 2.5598, "step": 208500 }, { "epoch": 0.41539828509498916, "grad_norm": 0.1793459802865982, "learning_rate": 0.002, "loss": 2.5608, "step": 208510 }, { "epoch": 0.41541820731862805, "grad_norm": 0.17176957428455353, "learning_rate": 0.002, "loss": 2.5527, "step": 208520 }, { "epoch": 0.415438129542267, "grad_norm": 0.16318777203559875, "learning_rate": 0.002, "loss": 2.5505, "step": 208530 }, { "epoch": 0.4154580517659059, "grad_norm": 0.16253595054149628, "learning_rate": 0.002, "loss": 2.5573, "step": 208540 }, { "epoch": 0.41547797398954484, "grad_norm": 0.20305012166500092, "learning_rate": 0.002, "loss": 2.5468, "step": 208550 }, { "epoch": 0.4154978962131837, "grad_norm": 0.1524398773908615, "learning_rate": 0.002, "loss": 2.5476, "step": 208560 }, { "epoch": 0.4155178184368226, "grad_norm": 0.16239820420742035, "learning_rate": 0.002, "loss": 2.566, "step": 208570 }, { "epoch": 0.41553774066046156, "grad_norm": 0.19436082243919373, "learning_rate": 0.002, "loss": 2.5459, "step": 208580 }, { "epoch": 0.41555766288410045, "grad_norm": 0.15359143912792206, "learning_rate": 0.002, "loss": 2.5559, "step": 208590 }, { "epoch": 0.4155775851077394, "grad_norm": 0.1504640132188797, "learning_rate": 0.002, "loss": 2.5629, "step": 208600 }, { "epoch": 0.4155975073313783, "grad_norm": 0.15515778958797455, "learning_rate": 0.002, "loss": 2.5677, "step": 208610 }, { "epoch": 0.41561742955501724, "grad_norm": 0.18036359548568726, "learning_rate": 0.002, "loss": 2.5596, "step": 208620 }, { "epoch": 0.41563735177865613, "grad_norm": 0.16595673561096191, "learning_rate": 0.002, "loss": 2.5698, "step": 208630 }, { "epoch": 0.415657274002295, "grad_norm": 0.1712830811738968, "learning_rate": 0.002, "loss": 2.5449, "step": 208640 }, { "epoch": 0.41567719622593396, "grad_norm": 0.24350856244564056, "learning_rate": 0.002, "loss": 2.555, "step": 208650 }, { "epoch": 0.41569711844957286, "grad_norm": 0.15148420631885529, "learning_rate": 0.002, "loss": 2.5604, "step": 208660 }, { "epoch": 0.4157170406732118, "grad_norm": 0.1849907636642456, "learning_rate": 0.002, "loss": 2.5691, "step": 208670 }, { "epoch": 0.4157369628968507, "grad_norm": 0.13962574303150177, "learning_rate": 0.002, "loss": 2.5479, "step": 208680 }, { "epoch": 0.4157568851204896, "grad_norm": 0.1615733802318573, "learning_rate": 0.002, "loss": 2.5481, "step": 208690 }, { "epoch": 0.41577680734412853, "grad_norm": 0.16660179197788239, "learning_rate": 0.002, "loss": 2.5622, "step": 208700 }, { "epoch": 0.4157967295677674, "grad_norm": 0.17904405295848846, "learning_rate": 0.002, "loss": 2.584, "step": 208710 }, { "epoch": 0.41581665179140637, "grad_norm": 0.14897847175598145, "learning_rate": 0.002, "loss": 2.5443, "step": 208720 }, { "epoch": 0.41583657401504526, "grad_norm": 0.15769074857234955, "learning_rate": 0.002, "loss": 2.5508, "step": 208730 }, { "epoch": 0.4158564962386842, "grad_norm": 0.19324064254760742, "learning_rate": 0.002, "loss": 2.5613, "step": 208740 }, { "epoch": 0.4158764184623231, "grad_norm": 0.20795823633670807, "learning_rate": 0.002, "loss": 2.553, "step": 208750 }, { "epoch": 0.415896340685962, "grad_norm": 0.23179185390472412, "learning_rate": 0.002, "loss": 2.5537, "step": 208760 }, { "epoch": 0.41591626290960093, "grad_norm": 0.1527332067489624, "learning_rate": 0.002, "loss": 2.5582, "step": 208770 }, { "epoch": 0.4159361851332398, "grad_norm": 0.20218214392662048, "learning_rate": 0.002, "loss": 2.5545, "step": 208780 }, { "epoch": 0.41595610735687877, "grad_norm": 0.16466283798217773, "learning_rate": 0.002, "loss": 2.5635, "step": 208790 }, { "epoch": 0.41597602958051766, "grad_norm": 0.18863511085510254, "learning_rate": 0.002, "loss": 2.5611, "step": 208800 }, { "epoch": 0.41599595180415655, "grad_norm": 0.1872701495885849, "learning_rate": 0.002, "loss": 2.5298, "step": 208810 }, { "epoch": 0.4160158740277955, "grad_norm": 0.1910114288330078, "learning_rate": 0.002, "loss": 2.5582, "step": 208820 }, { "epoch": 0.4160357962514344, "grad_norm": 0.17135433852672577, "learning_rate": 0.002, "loss": 2.567, "step": 208830 }, { "epoch": 0.41605571847507333, "grad_norm": 0.17336654663085938, "learning_rate": 0.002, "loss": 2.5618, "step": 208840 }, { "epoch": 0.4160756406987122, "grad_norm": 0.22378215193748474, "learning_rate": 0.002, "loss": 2.5447, "step": 208850 }, { "epoch": 0.4160955629223511, "grad_norm": 0.16114521026611328, "learning_rate": 0.002, "loss": 2.5535, "step": 208860 }, { "epoch": 0.41611548514599006, "grad_norm": 0.15558531880378723, "learning_rate": 0.002, "loss": 2.5695, "step": 208870 }, { "epoch": 0.41613540736962895, "grad_norm": 0.21898701786994934, "learning_rate": 0.002, "loss": 2.5541, "step": 208880 }, { "epoch": 0.4161553295932679, "grad_norm": 0.1485738307237625, "learning_rate": 0.002, "loss": 2.565, "step": 208890 }, { "epoch": 0.4161752518169068, "grad_norm": 0.16666002571582794, "learning_rate": 0.002, "loss": 2.5728, "step": 208900 }, { "epoch": 0.41619517404054573, "grad_norm": 0.17635837197303772, "learning_rate": 0.002, "loss": 2.5565, "step": 208910 }, { "epoch": 0.4162150962641846, "grad_norm": 0.17131544649600983, "learning_rate": 0.002, "loss": 2.548, "step": 208920 }, { "epoch": 0.4162350184878235, "grad_norm": 0.17052745819091797, "learning_rate": 0.002, "loss": 2.5576, "step": 208930 }, { "epoch": 0.41625494071146246, "grad_norm": 0.1594994068145752, "learning_rate": 0.002, "loss": 2.5623, "step": 208940 }, { "epoch": 0.41627486293510135, "grad_norm": 0.17830274999141693, "learning_rate": 0.002, "loss": 2.5579, "step": 208950 }, { "epoch": 0.4162947851587403, "grad_norm": 0.14902497828006744, "learning_rate": 0.002, "loss": 2.5539, "step": 208960 }, { "epoch": 0.4163147073823792, "grad_norm": 0.13967633247375488, "learning_rate": 0.002, "loss": 2.5584, "step": 208970 }, { "epoch": 0.4163346296060181, "grad_norm": 0.16769854724407196, "learning_rate": 0.002, "loss": 2.5537, "step": 208980 }, { "epoch": 0.416354551829657, "grad_norm": 0.16530823707580566, "learning_rate": 0.002, "loss": 2.5552, "step": 208990 }, { "epoch": 0.4163744740532959, "grad_norm": 0.17949523031711578, "learning_rate": 0.002, "loss": 2.5577, "step": 209000 }, { "epoch": 0.41639439627693486, "grad_norm": 0.17803528904914856, "learning_rate": 0.002, "loss": 2.5515, "step": 209010 }, { "epoch": 0.41641431850057375, "grad_norm": 0.18462839722633362, "learning_rate": 0.002, "loss": 2.5894, "step": 209020 }, { "epoch": 0.4164342407242127, "grad_norm": 0.15987229347229004, "learning_rate": 0.002, "loss": 2.553, "step": 209030 }, { "epoch": 0.4164541629478516, "grad_norm": 0.16459247469902039, "learning_rate": 0.002, "loss": 2.548, "step": 209040 }, { "epoch": 0.4164740851714905, "grad_norm": 0.16159909963607788, "learning_rate": 0.002, "loss": 2.5573, "step": 209050 }, { "epoch": 0.41649400739512943, "grad_norm": 0.16035659611225128, "learning_rate": 0.002, "loss": 2.5671, "step": 209060 }, { "epoch": 0.4165139296187683, "grad_norm": 0.20713965594768524, "learning_rate": 0.002, "loss": 2.5644, "step": 209070 }, { "epoch": 0.41653385184240727, "grad_norm": 0.18287533521652222, "learning_rate": 0.002, "loss": 2.5464, "step": 209080 }, { "epoch": 0.41655377406604616, "grad_norm": 0.18541568517684937, "learning_rate": 0.002, "loss": 2.5578, "step": 209090 }, { "epoch": 0.41657369628968505, "grad_norm": 0.16057810187339783, "learning_rate": 0.002, "loss": 2.5645, "step": 209100 }, { "epoch": 0.416593618513324, "grad_norm": 0.16490325331687927, "learning_rate": 0.002, "loss": 2.557, "step": 209110 }, { "epoch": 0.4166135407369629, "grad_norm": 0.16506725549697876, "learning_rate": 0.002, "loss": 2.5558, "step": 209120 }, { "epoch": 0.41663346296060183, "grad_norm": 0.19243717193603516, "learning_rate": 0.002, "loss": 2.5697, "step": 209130 }, { "epoch": 0.4166533851842407, "grad_norm": 0.19772347807884216, "learning_rate": 0.002, "loss": 2.5577, "step": 209140 }, { "epoch": 0.4166733074078796, "grad_norm": 0.17959758639335632, "learning_rate": 0.002, "loss": 2.5768, "step": 209150 }, { "epoch": 0.41669322963151856, "grad_norm": 0.17083750665187836, "learning_rate": 0.002, "loss": 2.5674, "step": 209160 }, { "epoch": 0.41671315185515745, "grad_norm": 0.16479185223579407, "learning_rate": 0.002, "loss": 2.5589, "step": 209170 }, { "epoch": 0.4167330740787964, "grad_norm": 0.1815403401851654, "learning_rate": 0.002, "loss": 2.5582, "step": 209180 }, { "epoch": 0.4167529963024353, "grad_norm": 0.14364813268184662, "learning_rate": 0.002, "loss": 2.5508, "step": 209190 }, { "epoch": 0.41677291852607423, "grad_norm": 0.16397510468959808, "learning_rate": 0.002, "loss": 2.5692, "step": 209200 }, { "epoch": 0.4167928407497131, "grad_norm": 0.16819150745868683, "learning_rate": 0.002, "loss": 2.547, "step": 209210 }, { "epoch": 0.416812762973352, "grad_norm": 0.14303982257843018, "learning_rate": 0.002, "loss": 2.5528, "step": 209220 }, { "epoch": 0.41683268519699096, "grad_norm": 0.16233645379543304, "learning_rate": 0.002, "loss": 2.5498, "step": 209230 }, { "epoch": 0.41685260742062985, "grad_norm": 0.15125802159309387, "learning_rate": 0.002, "loss": 2.562, "step": 209240 }, { "epoch": 0.4168725296442688, "grad_norm": 0.1744021475315094, "learning_rate": 0.002, "loss": 2.5593, "step": 209250 }, { "epoch": 0.4168924518679077, "grad_norm": 0.18995623290538788, "learning_rate": 0.002, "loss": 2.5543, "step": 209260 }, { "epoch": 0.4169123740915466, "grad_norm": 0.16560114920139313, "learning_rate": 0.002, "loss": 2.5678, "step": 209270 }, { "epoch": 0.4169322963151855, "grad_norm": 0.1811479926109314, "learning_rate": 0.002, "loss": 2.5645, "step": 209280 }, { "epoch": 0.4169522185388244, "grad_norm": 0.20184378325939178, "learning_rate": 0.002, "loss": 2.5535, "step": 209290 }, { "epoch": 0.41697214076246336, "grad_norm": 0.17220768332481384, "learning_rate": 0.002, "loss": 2.5474, "step": 209300 }, { "epoch": 0.41699206298610225, "grad_norm": 0.17099705338478088, "learning_rate": 0.002, "loss": 2.5743, "step": 209310 }, { "epoch": 0.41701198520974114, "grad_norm": 0.18633197247982025, "learning_rate": 0.002, "loss": 2.5662, "step": 209320 }, { "epoch": 0.4170319074333801, "grad_norm": 0.20623411238193512, "learning_rate": 0.002, "loss": 2.5675, "step": 209330 }, { "epoch": 0.417051829657019, "grad_norm": 0.15979121625423431, "learning_rate": 0.002, "loss": 2.5575, "step": 209340 }, { "epoch": 0.4170717518806579, "grad_norm": 0.17675019800662994, "learning_rate": 0.002, "loss": 2.5591, "step": 209350 }, { "epoch": 0.4170916741042968, "grad_norm": 0.16830790042877197, "learning_rate": 0.002, "loss": 2.5586, "step": 209360 }, { "epoch": 0.41711159632793576, "grad_norm": 0.15704558789730072, "learning_rate": 0.002, "loss": 2.5576, "step": 209370 }, { "epoch": 0.41713151855157465, "grad_norm": 0.17471547424793243, "learning_rate": 0.002, "loss": 2.5694, "step": 209380 }, { "epoch": 0.41715144077521354, "grad_norm": 0.18687042593955994, "learning_rate": 0.002, "loss": 2.5675, "step": 209390 }, { "epoch": 0.4171713629988525, "grad_norm": 0.16739389300346375, "learning_rate": 0.002, "loss": 2.5537, "step": 209400 }, { "epoch": 0.4171912852224914, "grad_norm": 0.20071224868297577, "learning_rate": 0.002, "loss": 2.5723, "step": 209410 }, { "epoch": 0.4172112074461303, "grad_norm": 0.1608002632856369, "learning_rate": 0.002, "loss": 2.5546, "step": 209420 }, { "epoch": 0.4172311296697692, "grad_norm": 0.2470613270998001, "learning_rate": 0.002, "loss": 2.554, "step": 209430 }, { "epoch": 0.4172510518934081, "grad_norm": 0.16311971843242645, "learning_rate": 0.002, "loss": 2.5651, "step": 209440 }, { "epoch": 0.41727097411704706, "grad_norm": 0.16477727890014648, "learning_rate": 0.002, "loss": 2.555, "step": 209450 }, { "epoch": 0.41729089634068595, "grad_norm": 0.20252373814582825, "learning_rate": 0.002, "loss": 2.5709, "step": 209460 }, { "epoch": 0.4173108185643249, "grad_norm": 0.16430158913135529, "learning_rate": 0.002, "loss": 2.5619, "step": 209470 }, { "epoch": 0.4173307407879638, "grad_norm": 0.17867253720760345, "learning_rate": 0.002, "loss": 2.5474, "step": 209480 }, { "epoch": 0.41735066301160273, "grad_norm": 0.1848888099193573, "learning_rate": 0.002, "loss": 2.5604, "step": 209490 }, { "epoch": 0.4173705852352416, "grad_norm": 0.18137793242931366, "learning_rate": 0.002, "loss": 2.5434, "step": 209500 }, { "epoch": 0.4173905074588805, "grad_norm": 0.17888914048671722, "learning_rate": 0.002, "loss": 2.5661, "step": 209510 }, { "epoch": 0.41741042968251946, "grad_norm": 0.176829993724823, "learning_rate": 0.002, "loss": 2.5578, "step": 209520 }, { "epoch": 0.41743035190615835, "grad_norm": 0.17537270486354828, "learning_rate": 0.002, "loss": 2.5563, "step": 209530 }, { "epoch": 0.4174502741297973, "grad_norm": 0.19862037897109985, "learning_rate": 0.002, "loss": 2.5793, "step": 209540 }, { "epoch": 0.4174701963534362, "grad_norm": 0.16126245260238647, "learning_rate": 0.002, "loss": 2.5672, "step": 209550 }, { "epoch": 0.4174901185770751, "grad_norm": 0.21271666884422302, "learning_rate": 0.002, "loss": 2.5704, "step": 209560 }, { "epoch": 0.417510040800714, "grad_norm": 0.16309207677841187, "learning_rate": 0.002, "loss": 2.5605, "step": 209570 }, { "epoch": 0.4175299630243529, "grad_norm": 0.1467173993587494, "learning_rate": 0.002, "loss": 2.5635, "step": 209580 }, { "epoch": 0.41754988524799186, "grad_norm": 0.16179493069648743, "learning_rate": 0.002, "loss": 2.5673, "step": 209590 }, { "epoch": 0.41756980747163075, "grad_norm": 0.15805783867835999, "learning_rate": 0.002, "loss": 2.5583, "step": 209600 }, { "epoch": 0.41758972969526964, "grad_norm": 0.1769898235797882, "learning_rate": 0.002, "loss": 2.5634, "step": 209610 }, { "epoch": 0.4176096519189086, "grad_norm": 0.1591787338256836, "learning_rate": 0.002, "loss": 2.574, "step": 209620 }, { "epoch": 0.4176295741425475, "grad_norm": 0.17352701723575592, "learning_rate": 0.002, "loss": 2.5754, "step": 209630 }, { "epoch": 0.4176494963661864, "grad_norm": 0.16182810068130493, "learning_rate": 0.002, "loss": 2.5619, "step": 209640 }, { "epoch": 0.4176694185898253, "grad_norm": 0.15659157931804657, "learning_rate": 0.002, "loss": 2.547, "step": 209650 }, { "epoch": 0.41768934081346426, "grad_norm": 0.19383709132671356, "learning_rate": 0.002, "loss": 2.5688, "step": 209660 }, { "epoch": 0.41770926303710315, "grad_norm": 0.14728468656539917, "learning_rate": 0.002, "loss": 2.5673, "step": 209670 }, { "epoch": 0.41772918526074204, "grad_norm": 0.17278160154819489, "learning_rate": 0.002, "loss": 2.5551, "step": 209680 }, { "epoch": 0.417749107484381, "grad_norm": 0.17518408596515656, "learning_rate": 0.002, "loss": 2.5683, "step": 209690 }, { "epoch": 0.4177690297080199, "grad_norm": 0.1664082258939743, "learning_rate": 0.002, "loss": 2.5706, "step": 209700 }, { "epoch": 0.4177889519316588, "grad_norm": 0.19185543060302734, "learning_rate": 0.002, "loss": 2.5594, "step": 209710 }, { "epoch": 0.4178088741552977, "grad_norm": 0.15779022872447968, "learning_rate": 0.002, "loss": 2.5648, "step": 209720 }, { "epoch": 0.4178287963789366, "grad_norm": 0.23005352914333344, "learning_rate": 0.002, "loss": 2.5498, "step": 209730 }, { "epoch": 0.41784871860257555, "grad_norm": 0.16545836627483368, "learning_rate": 0.002, "loss": 2.5563, "step": 209740 }, { "epoch": 0.41786864082621444, "grad_norm": 0.14772729575634003, "learning_rate": 0.002, "loss": 2.5505, "step": 209750 }, { "epoch": 0.4178885630498534, "grad_norm": 0.17563757300376892, "learning_rate": 0.002, "loss": 2.5689, "step": 209760 }, { "epoch": 0.4179084852734923, "grad_norm": 0.20809617638587952, "learning_rate": 0.002, "loss": 2.5422, "step": 209770 }, { "epoch": 0.4179284074971312, "grad_norm": 0.1553719937801361, "learning_rate": 0.002, "loss": 2.5446, "step": 209780 }, { "epoch": 0.4179483297207701, "grad_norm": 0.1978713423013687, "learning_rate": 0.002, "loss": 2.5694, "step": 209790 }, { "epoch": 0.417968251944409, "grad_norm": 0.18414615094661713, "learning_rate": 0.002, "loss": 2.5612, "step": 209800 }, { "epoch": 0.41798817416804795, "grad_norm": 0.185078427195549, "learning_rate": 0.002, "loss": 2.5619, "step": 209810 }, { "epoch": 0.41800809639168685, "grad_norm": 0.15802276134490967, "learning_rate": 0.002, "loss": 2.5569, "step": 209820 }, { "epoch": 0.4180280186153258, "grad_norm": 0.15335074067115784, "learning_rate": 0.002, "loss": 2.5752, "step": 209830 }, { "epoch": 0.4180479408389647, "grad_norm": 0.16609661281108856, "learning_rate": 0.002, "loss": 2.5637, "step": 209840 }, { "epoch": 0.4180678630626036, "grad_norm": 0.1725999265909195, "learning_rate": 0.002, "loss": 2.5683, "step": 209850 }, { "epoch": 0.4180877852862425, "grad_norm": 0.18161198496818542, "learning_rate": 0.002, "loss": 2.5539, "step": 209860 }, { "epoch": 0.4181077075098814, "grad_norm": 0.20848022401332855, "learning_rate": 0.002, "loss": 2.5641, "step": 209870 }, { "epoch": 0.41812762973352036, "grad_norm": 0.17434091866016388, "learning_rate": 0.002, "loss": 2.5563, "step": 209880 }, { "epoch": 0.41814755195715925, "grad_norm": 0.15171122550964355, "learning_rate": 0.002, "loss": 2.5615, "step": 209890 }, { "epoch": 0.41816747418079814, "grad_norm": 0.15304836630821228, "learning_rate": 0.002, "loss": 2.5482, "step": 209900 }, { "epoch": 0.4181873964044371, "grad_norm": 0.20792187750339508, "learning_rate": 0.002, "loss": 2.5517, "step": 209910 }, { "epoch": 0.418207318628076, "grad_norm": 0.15068788826465607, "learning_rate": 0.002, "loss": 2.5795, "step": 209920 }, { "epoch": 0.4182272408517149, "grad_norm": 0.17014431953430176, "learning_rate": 0.002, "loss": 2.5508, "step": 209930 }, { "epoch": 0.4182471630753538, "grad_norm": 0.19825030863285065, "learning_rate": 0.002, "loss": 2.5663, "step": 209940 }, { "epoch": 0.41826708529899276, "grad_norm": 0.15458981692790985, "learning_rate": 0.002, "loss": 2.5482, "step": 209950 }, { "epoch": 0.41828700752263165, "grad_norm": 0.1899200826883316, "learning_rate": 0.002, "loss": 2.549, "step": 209960 }, { "epoch": 0.41830692974627054, "grad_norm": 0.19556589424610138, "learning_rate": 0.002, "loss": 2.5653, "step": 209970 }, { "epoch": 0.4183268519699095, "grad_norm": 0.2550482451915741, "learning_rate": 0.002, "loss": 2.5663, "step": 209980 }, { "epoch": 0.4183467741935484, "grad_norm": 0.1556217223405838, "learning_rate": 0.002, "loss": 2.5466, "step": 209990 }, { "epoch": 0.4183666964171873, "grad_norm": 0.17485368251800537, "learning_rate": 0.002, "loss": 2.5633, "step": 210000 }, { "epoch": 0.4183866186408262, "grad_norm": 0.22258007526397705, "learning_rate": 0.002, "loss": 2.5437, "step": 210010 }, { "epoch": 0.4184065408644651, "grad_norm": 0.15590405464172363, "learning_rate": 0.002, "loss": 2.5579, "step": 210020 }, { "epoch": 0.41842646308810405, "grad_norm": 0.1640356034040451, "learning_rate": 0.002, "loss": 2.573, "step": 210030 }, { "epoch": 0.41844638531174294, "grad_norm": 0.21893145143985748, "learning_rate": 0.002, "loss": 2.5696, "step": 210040 }, { "epoch": 0.4184663075353819, "grad_norm": 0.15713009238243103, "learning_rate": 0.002, "loss": 2.5581, "step": 210050 }, { "epoch": 0.4184862297590208, "grad_norm": 0.16238285601139069, "learning_rate": 0.002, "loss": 2.5575, "step": 210060 }, { "epoch": 0.4185061519826597, "grad_norm": 0.19444337487220764, "learning_rate": 0.002, "loss": 2.5443, "step": 210070 }, { "epoch": 0.4185260742062986, "grad_norm": 0.1986718773841858, "learning_rate": 0.002, "loss": 2.5542, "step": 210080 }, { "epoch": 0.4185459964299375, "grad_norm": 0.15135018527507782, "learning_rate": 0.002, "loss": 2.5566, "step": 210090 }, { "epoch": 0.41856591865357645, "grad_norm": 0.14189574122428894, "learning_rate": 0.002, "loss": 2.5619, "step": 210100 }, { "epoch": 0.41858584087721534, "grad_norm": 0.1575712263584137, "learning_rate": 0.002, "loss": 2.5546, "step": 210110 }, { "epoch": 0.4186057631008543, "grad_norm": 0.16192424297332764, "learning_rate": 0.002, "loss": 2.559, "step": 210120 }, { "epoch": 0.4186256853244932, "grad_norm": 0.21696393191814423, "learning_rate": 0.002, "loss": 2.556, "step": 210130 }, { "epoch": 0.41864560754813207, "grad_norm": 0.19191919267177582, "learning_rate": 0.002, "loss": 2.5701, "step": 210140 }, { "epoch": 0.418665529771771, "grad_norm": 0.17805521190166473, "learning_rate": 0.002, "loss": 2.5584, "step": 210150 }, { "epoch": 0.4186854519954099, "grad_norm": 0.20080840587615967, "learning_rate": 0.002, "loss": 2.5586, "step": 210160 }, { "epoch": 0.41870537421904885, "grad_norm": 0.15773870050907135, "learning_rate": 0.002, "loss": 2.5414, "step": 210170 }, { "epoch": 0.41872529644268774, "grad_norm": 0.1531272977590561, "learning_rate": 0.002, "loss": 2.5709, "step": 210180 }, { "epoch": 0.41874521866632664, "grad_norm": 0.14749331772327423, "learning_rate": 0.002, "loss": 2.5527, "step": 210190 }, { "epoch": 0.4187651408899656, "grad_norm": 0.1639779508113861, "learning_rate": 0.002, "loss": 2.568, "step": 210200 }, { "epoch": 0.41878506311360447, "grad_norm": 0.1546916663646698, "learning_rate": 0.002, "loss": 2.5546, "step": 210210 }, { "epoch": 0.4188049853372434, "grad_norm": 0.18134044110774994, "learning_rate": 0.002, "loss": 2.5426, "step": 210220 }, { "epoch": 0.4188249075608823, "grad_norm": 0.1534028947353363, "learning_rate": 0.002, "loss": 2.5541, "step": 210230 }, { "epoch": 0.41884482978452126, "grad_norm": 0.15019717812538147, "learning_rate": 0.002, "loss": 2.5371, "step": 210240 }, { "epoch": 0.41886475200816015, "grad_norm": 0.18423616886138916, "learning_rate": 0.002, "loss": 2.5616, "step": 210250 }, { "epoch": 0.41888467423179904, "grad_norm": 0.14991934597492218, "learning_rate": 0.002, "loss": 2.5639, "step": 210260 }, { "epoch": 0.418904596455438, "grad_norm": 0.15670286118984222, "learning_rate": 0.002, "loss": 2.548, "step": 210270 }, { "epoch": 0.4189245186790769, "grad_norm": 0.1800849288702011, "learning_rate": 0.002, "loss": 2.5527, "step": 210280 }, { "epoch": 0.4189444409027158, "grad_norm": 0.2016022652387619, "learning_rate": 0.002, "loss": 2.5713, "step": 210290 }, { "epoch": 0.4189643631263547, "grad_norm": 0.15854394435882568, "learning_rate": 0.002, "loss": 2.5715, "step": 210300 }, { "epoch": 0.4189842853499936, "grad_norm": 0.19349151849746704, "learning_rate": 0.002, "loss": 2.5559, "step": 210310 }, { "epoch": 0.41900420757363255, "grad_norm": 0.16003575921058655, "learning_rate": 0.002, "loss": 2.5477, "step": 210320 }, { "epoch": 0.41902412979727144, "grad_norm": 0.17828257381916046, "learning_rate": 0.002, "loss": 2.562, "step": 210330 }, { "epoch": 0.4190440520209104, "grad_norm": 0.1425916701555252, "learning_rate": 0.002, "loss": 2.5511, "step": 210340 }, { "epoch": 0.4190639742445493, "grad_norm": 0.15659931302070618, "learning_rate": 0.002, "loss": 2.5667, "step": 210350 }, { "epoch": 0.41908389646818817, "grad_norm": 0.1665114015340805, "learning_rate": 0.002, "loss": 2.5576, "step": 210360 }, { "epoch": 0.4191038186918271, "grad_norm": 0.16054297983646393, "learning_rate": 0.002, "loss": 2.5571, "step": 210370 }, { "epoch": 0.419123740915466, "grad_norm": 0.14411339163780212, "learning_rate": 0.002, "loss": 2.5503, "step": 210380 }, { "epoch": 0.41914366313910495, "grad_norm": 0.19456946849822998, "learning_rate": 0.002, "loss": 2.5505, "step": 210390 }, { "epoch": 0.41916358536274384, "grad_norm": 0.18245448172092438, "learning_rate": 0.002, "loss": 2.5482, "step": 210400 }, { "epoch": 0.4191835075863828, "grad_norm": 0.19662824273109436, "learning_rate": 0.002, "loss": 2.5629, "step": 210410 }, { "epoch": 0.4192034298100217, "grad_norm": 0.17251072824001312, "learning_rate": 0.002, "loss": 2.5633, "step": 210420 }, { "epoch": 0.41922335203366057, "grad_norm": 0.19264401495456696, "learning_rate": 0.002, "loss": 2.5706, "step": 210430 }, { "epoch": 0.4192432742572995, "grad_norm": 0.2070920467376709, "learning_rate": 0.002, "loss": 2.5589, "step": 210440 }, { "epoch": 0.4192631964809384, "grad_norm": 0.14942075312137604, "learning_rate": 0.002, "loss": 2.5514, "step": 210450 }, { "epoch": 0.41928311870457735, "grad_norm": 0.1566152572631836, "learning_rate": 0.002, "loss": 2.5668, "step": 210460 }, { "epoch": 0.41930304092821624, "grad_norm": 0.15018318593502045, "learning_rate": 0.002, "loss": 2.5563, "step": 210470 }, { "epoch": 0.41932296315185513, "grad_norm": 0.17428919672966003, "learning_rate": 0.002, "loss": 2.563, "step": 210480 }, { "epoch": 0.4193428853754941, "grad_norm": 0.20447200536727905, "learning_rate": 0.002, "loss": 2.5598, "step": 210490 }, { "epoch": 0.41936280759913297, "grad_norm": 0.1619463562965393, "learning_rate": 0.002, "loss": 2.5634, "step": 210500 }, { "epoch": 0.4193827298227719, "grad_norm": 0.15138742327690125, "learning_rate": 0.002, "loss": 2.5589, "step": 210510 }, { "epoch": 0.4194026520464108, "grad_norm": 0.2139270305633545, "learning_rate": 0.002, "loss": 2.57, "step": 210520 }, { "epoch": 0.41942257427004975, "grad_norm": 0.15940344333648682, "learning_rate": 0.002, "loss": 2.5679, "step": 210530 }, { "epoch": 0.41944249649368864, "grad_norm": 0.16422967612743378, "learning_rate": 0.002, "loss": 2.5679, "step": 210540 }, { "epoch": 0.41946241871732753, "grad_norm": 0.20722563564777374, "learning_rate": 0.002, "loss": 2.5636, "step": 210550 }, { "epoch": 0.4194823409409665, "grad_norm": 0.15992403030395508, "learning_rate": 0.002, "loss": 2.5548, "step": 210560 }, { "epoch": 0.41950226316460537, "grad_norm": 0.15495933592319489, "learning_rate": 0.002, "loss": 2.5497, "step": 210570 }, { "epoch": 0.4195221853882443, "grad_norm": 0.16455012559890747, "learning_rate": 0.002, "loss": 2.5563, "step": 210580 }, { "epoch": 0.4195421076118832, "grad_norm": 0.158624529838562, "learning_rate": 0.002, "loss": 2.5458, "step": 210590 }, { "epoch": 0.4195620298355221, "grad_norm": 0.17285415530204773, "learning_rate": 0.002, "loss": 2.5561, "step": 210600 }, { "epoch": 0.41958195205916105, "grad_norm": 0.16625681519508362, "learning_rate": 0.002, "loss": 2.5518, "step": 210610 }, { "epoch": 0.41960187428279994, "grad_norm": 0.17145130038261414, "learning_rate": 0.002, "loss": 2.5561, "step": 210620 }, { "epoch": 0.4196217965064389, "grad_norm": 0.1861163228750229, "learning_rate": 0.002, "loss": 2.5534, "step": 210630 }, { "epoch": 0.4196417187300778, "grad_norm": 0.18747229874134064, "learning_rate": 0.002, "loss": 2.566, "step": 210640 }, { "epoch": 0.41966164095371666, "grad_norm": 0.1582305133342743, "learning_rate": 0.002, "loss": 2.5522, "step": 210650 }, { "epoch": 0.4196815631773556, "grad_norm": 0.1772485375404358, "learning_rate": 0.002, "loss": 2.5775, "step": 210660 }, { "epoch": 0.4197014854009945, "grad_norm": 0.1530207097530365, "learning_rate": 0.002, "loss": 2.5537, "step": 210670 }, { "epoch": 0.41972140762463345, "grad_norm": 0.16097278892993927, "learning_rate": 0.002, "loss": 2.5533, "step": 210680 }, { "epoch": 0.41974132984827234, "grad_norm": 0.1782354712486267, "learning_rate": 0.002, "loss": 2.5562, "step": 210690 }, { "epoch": 0.4197612520719113, "grad_norm": 0.15617269277572632, "learning_rate": 0.002, "loss": 2.5556, "step": 210700 }, { "epoch": 0.4197811742955502, "grad_norm": 0.19928717613220215, "learning_rate": 0.002, "loss": 2.5485, "step": 210710 }, { "epoch": 0.41980109651918907, "grad_norm": 0.18235166370868683, "learning_rate": 0.002, "loss": 2.5618, "step": 210720 }, { "epoch": 0.419821018742828, "grad_norm": 0.14509965479373932, "learning_rate": 0.002, "loss": 2.5632, "step": 210730 }, { "epoch": 0.4198409409664669, "grad_norm": 0.18907634913921356, "learning_rate": 0.002, "loss": 2.5727, "step": 210740 }, { "epoch": 0.41986086319010585, "grad_norm": 0.17647400498390198, "learning_rate": 0.002, "loss": 2.5542, "step": 210750 }, { "epoch": 0.41988078541374474, "grad_norm": 0.1648387312889099, "learning_rate": 0.002, "loss": 2.5671, "step": 210760 }, { "epoch": 0.41990070763738363, "grad_norm": 0.16771067678928375, "learning_rate": 0.002, "loss": 2.5478, "step": 210770 }, { "epoch": 0.4199206298610226, "grad_norm": 0.14481684565544128, "learning_rate": 0.002, "loss": 2.552, "step": 210780 }, { "epoch": 0.41994055208466147, "grad_norm": 0.1951725333929062, "learning_rate": 0.002, "loss": 2.5551, "step": 210790 }, { "epoch": 0.4199604743083004, "grad_norm": 0.16746796667575836, "learning_rate": 0.002, "loss": 2.5733, "step": 210800 }, { "epoch": 0.4199803965319393, "grad_norm": 0.20277820527553558, "learning_rate": 0.002, "loss": 2.5471, "step": 210810 }, { "epoch": 0.42000031875557825, "grad_norm": 0.16864416003227234, "learning_rate": 0.002, "loss": 2.5786, "step": 210820 }, { "epoch": 0.42002024097921714, "grad_norm": 0.17839203774929047, "learning_rate": 0.002, "loss": 2.5631, "step": 210830 }, { "epoch": 0.42004016320285603, "grad_norm": 0.1632649451494217, "learning_rate": 0.002, "loss": 2.5729, "step": 210840 }, { "epoch": 0.420060085426495, "grad_norm": 0.16005396842956543, "learning_rate": 0.002, "loss": 2.5585, "step": 210850 }, { "epoch": 0.42008000765013387, "grad_norm": 0.13427835702896118, "learning_rate": 0.002, "loss": 2.5647, "step": 210860 }, { "epoch": 0.4200999298737728, "grad_norm": 0.1774260252714157, "learning_rate": 0.002, "loss": 2.5465, "step": 210870 }, { "epoch": 0.4201198520974117, "grad_norm": 0.16620998084545135, "learning_rate": 0.002, "loss": 2.5646, "step": 210880 }, { "epoch": 0.4201397743210506, "grad_norm": 0.1812511533498764, "learning_rate": 0.002, "loss": 2.5478, "step": 210890 }, { "epoch": 0.42015969654468954, "grad_norm": 0.21328994631767273, "learning_rate": 0.002, "loss": 2.5621, "step": 210900 }, { "epoch": 0.42017961876832843, "grad_norm": 0.15703709423542023, "learning_rate": 0.002, "loss": 2.5695, "step": 210910 }, { "epoch": 0.4201995409919674, "grad_norm": 0.1627749353647232, "learning_rate": 0.002, "loss": 2.5636, "step": 210920 }, { "epoch": 0.42021946321560627, "grad_norm": 0.14084871113300323, "learning_rate": 0.002, "loss": 2.5683, "step": 210930 }, { "epoch": 0.42023938543924516, "grad_norm": 0.180564284324646, "learning_rate": 0.002, "loss": 2.5552, "step": 210940 }, { "epoch": 0.4202593076628841, "grad_norm": 0.15297150611877441, "learning_rate": 0.002, "loss": 2.55, "step": 210950 }, { "epoch": 0.420279229886523, "grad_norm": 0.15939241647720337, "learning_rate": 0.002, "loss": 2.5649, "step": 210960 }, { "epoch": 0.42029915211016194, "grad_norm": 0.1638633906841278, "learning_rate": 0.002, "loss": 2.5453, "step": 210970 }, { "epoch": 0.42031907433380084, "grad_norm": 0.1709139198064804, "learning_rate": 0.002, "loss": 2.5625, "step": 210980 }, { "epoch": 0.4203389965574398, "grad_norm": 0.16012296080589294, "learning_rate": 0.002, "loss": 2.5496, "step": 210990 }, { "epoch": 0.42035891878107867, "grad_norm": 0.1699274331331253, "learning_rate": 0.002, "loss": 2.5635, "step": 211000 }, { "epoch": 0.42037884100471756, "grad_norm": 0.16233976185321808, "learning_rate": 0.002, "loss": 2.5646, "step": 211010 }, { "epoch": 0.4203987632283565, "grad_norm": 0.2001238465309143, "learning_rate": 0.002, "loss": 2.5752, "step": 211020 }, { "epoch": 0.4204186854519954, "grad_norm": 0.15081264078617096, "learning_rate": 0.002, "loss": 2.5657, "step": 211030 }, { "epoch": 0.42043860767563435, "grad_norm": 0.1955261528491974, "learning_rate": 0.002, "loss": 2.5615, "step": 211040 }, { "epoch": 0.42045852989927324, "grad_norm": 0.1402076929807663, "learning_rate": 0.002, "loss": 2.5564, "step": 211050 }, { "epoch": 0.4204784521229121, "grad_norm": 0.160676047205925, "learning_rate": 0.002, "loss": 2.5562, "step": 211060 }, { "epoch": 0.4204983743465511, "grad_norm": 0.14991827309131622, "learning_rate": 0.002, "loss": 2.5638, "step": 211070 }, { "epoch": 0.42051829657018996, "grad_norm": 0.17977313697338104, "learning_rate": 0.002, "loss": 2.5628, "step": 211080 }, { "epoch": 0.4205382187938289, "grad_norm": 0.14967966079711914, "learning_rate": 0.002, "loss": 2.5615, "step": 211090 }, { "epoch": 0.4205581410174678, "grad_norm": 0.16546589136123657, "learning_rate": 0.002, "loss": 2.559, "step": 211100 }, { "epoch": 0.4205780632411067, "grad_norm": 0.1602601706981659, "learning_rate": 0.002, "loss": 2.5714, "step": 211110 }, { "epoch": 0.42059798546474564, "grad_norm": 0.17221027612686157, "learning_rate": 0.002, "loss": 2.553, "step": 211120 }, { "epoch": 0.42061790768838453, "grad_norm": 0.20528866350650787, "learning_rate": 0.002, "loss": 2.5559, "step": 211130 }, { "epoch": 0.4206378299120235, "grad_norm": 0.16958943009376526, "learning_rate": 0.002, "loss": 2.5621, "step": 211140 }, { "epoch": 0.42065775213566237, "grad_norm": 0.1778414100408554, "learning_rate": 0.002, "loss": 2.569, "step": 211150 }, { "epoch": 0.4206776743593013, "grad_norm": 0.15880891680717468, "learning_rate": 0.002, "loss": 2.5661, "step": 211160 }, { "epoch": 0.4206975965829402, "grad_norm": 0.15390339493751526, "learning_rate": 0.002, "loss": 2.5536, "step": 211170 }, { "epoch": 0.4207175188065791, "grad_norm": 0.15115319192409515, "learning_rate": 0.002, "loss": 2.5584, "step": 211180 }, { "epoch": 0.42073744103021804, "grad_norm": 0.1444932371377945, "learning_rate": 0.002, "loss": 2.5659, "step": 211190 }, { "epoch": 0.42075736325385693, "grad_norm": 0.1997634470462799, "learning_rate": 0.002, "loss": 2.5608, "step": 211200 }, { "epoch": 0.4207772854774959, "grad_norm": 0.16406472027301788, "learning_rate": 0.002, "loss": 2.5571, "step": 211210 }, { "epoch": 0.42079720770113477, "grad_norm": 0.1744646281003952, "learning_rate": 0.002, "loss": 2.5472, "step": 211220 }, { "epoch": 0.42081712992477366, "grad_norm": 0.1590854674577713, "learning_rate": 0.002, "loss": 2.565, "step": 211230 }, { "epoch": 0.4208370521484126, "grad_norm": 0.21269944310188293, "learning_rate": 0.002, "loss": 2.5646, "step": 211240 }, { "epoch": 0.4208569743720515, "grad_norm": 0.1623012274503708, "learning_rate": 0.002, "loss": 2.5532, "step": 211250 }, { "epoch": 0.42087689659569044, "grad_norm": 0.15419308841228485, "learning_rate": 0.002, "loss": 2.5651, "step": 211260 }, { "epoch": 0.42089681881932933, "grad_norm": 0.18143215775489807, "learning_rate": 0.002, "loss": 2.5572, "step": 211270 }, { "epoch": 0.4209167410429683, "grad_norm": 0.23566891252994537, "learning_rate": 0.002, "loss": 2.5435, "step": 211280 }, { "epoch": 0.42093666326660717, "grad_norm": 0.1684630662202835, "learning_rate": 0.002, "loss": 2.5511, "step": 211290 }, { "epoch": 0.42095658549024606, "grad_norm": 0.20084993541240692, "learning_rate": 0.002, "loss": 2.5764, "step": 211300 }, { "epoch": 0.420976507713885, "grad_norm": 0.147973895072937, "learning_rate": 0.002, "loss": 2.5571, "step": 211310 }, { "epoch": 0.4209964299375239, "grad_norm": 0.16973230242729187, "learning_rate": 0.002, "loss": 2.5572, "step": 211320 }, { "epoch": 0.42101635216116284, "grad_norm": 0.16317027807235718, "learning_rate": 0.002, "loss": 2.5513, "step": 211330 }, { "epoch": 0.42103627438480173, "grad_norm": 0.15291538834571838, "learning_rate": 0.002, "loss": 2.5468, "step": 211340 }, { "epoch": 0.4210561966084406, "grad_norm": 0.1888847053050995, "learning_rate": 0.002, "loss": 2.5495, "step": 211350 }, { "epoch": 0.42107611883207957, "grad_norm": 0.1564875692129135, "learning_rate": 0.002, "loss": 2.5519, "step": 211360 }, { "epoch": 0.42109604105571846, "grad_norm": 0.16972362995147705, "learning_rate": 0.002, "loss": 2.5615, "step": 211370 }, { "epoch": 0.4211159632793574, "grad_norm": 0.14431919157505035, "learning_rate": 0.002, "loss": 2.5775, "step": 211380 }, { "epoch": 0.4211358855029963, "grad_norm": 0.20007748901844025, "learning_rate": 0.002, "loss": 2.5667, "step": 211390 }, { "epoch": 0.4211558077266352, "grad_norm": 0.18562817573547363, "learning_rate": 0.002, "loss": 2.5555, "step": 211400 }, { "epoch": 0.42117572995027414, "grad_norm": 0.15768125653266907, "learning_rate": 0.002, "loss": 2.5633, "step": 211410 }, { "epoch": 0.421195652173913, "grad_norm": 0.17714469134807587, "learning_rate": 0.002, "loss": 2.5688, "step": 211420 }, { "epoch": 0.421215574397552, "grad_norm": 0.151248961687088, "learning_rate": 0.002, "loss": 2.5606, "step": 211430 }, { "epoch": 0.42123549662119086, "grad_norm": 0.16076727211475372, "learning_rate": 0.002, "loss": 2.5638, "step": 211440 }, { "epoch": 0.4212554188448298, "grad_norm": 0.14961035549640656, "learning_rate": 0.002, "loss": 2.5576, "step": 211450 }, { "epoch": 0.4212753410684687, "grad_norm": 0.17033904790878296, "learning_rate": 0.002, "loss": 2.5695, "step": 211460 }, { "epoch": 0.4212952632921076, "grad_norm": 0.16585834324359894, "learning_rate": 0.002, "loss": 2.5624, "step": 211470 }, { "epoch": 0.42131518551574654, "grad_norm": 0.17569343745708466, "learning_rate": 0.002, "loss": 2.5563, "step": 211480 }, { "epoch": 0.42133510773938543, "grad_norm": 0.20507563650608063, "learning_rate": 0.002, "loss": 2.5738, "step": 211490 }, { "epoch": 0.4213550299630244, "grad_norm": 0.16085541248321533, "learning_rate": 0.002, "loss": 2.5561, "step": 211500 }, { "epoch": 0.42137495218666327, "grad_norm": 0.1905023604631424, "learning_rate": 0.002, "loss": 2.5726, "step": 211510 }, { "epoch": 0.42139487441030216, "grad_norm": 0.1558864414691925, "learning_rate": 0.002, "loss": 2.5626, "step": 211520 }, { "epoch": 0.4214147966339411, "grad_norm": 0.15307962894439697, "learning_rate": 0.002, "loss": 2.5612, "step": 211530 }, { "epoch": 0.42143471885758, "grad_norm": 0.16387854516506195, "learning_rate": 0.002, "loss": 2.5543, "step": 211540 }, { "epoch": 0.42145464108121894, "grad_norm": 0.1496570110321045, "learning_rate": 0.002, "loss": 2.5627, "step": 211550 }, { "epoch": 0.42147456330485783, "grad_norm": 0.20078760385513306, "learning_rate": 0.002, "loss": 2.5574, "step": 211560 }, { "epoch": 0.4214944855284968, "grad_norm": 0.15022113919258118, "learning_rate": 0.002, "loss": 2.5615, "step": 211570 }, { "epoch": 0.42151440775213567, "grad_norm": 0.1955980360507965, "learning_rate": 0.002, "loss": 2.5656, "step": 211580 }, { "epoch": 0.42153432997577456, "grad_norm": 0.16125686466693878, "learning_rate": 0.002, "loss": 2.5529, "step": 211590 }, { "epoch": 0.4215542521994135, "grad_norm": 0.1607937514781952, "learning_rate": 0.002, "loss": 2.5651, "step": 211600 }, { "epoch": 0.4215741744230524, "grad_norm": 0.1838698834180832, "learning_rate": 0.002, "loss": 2.5466, "step": 211610 }, { "epoch": 0.42159409664669134, "grad_norm": 0.16229020059108734, "learning_rate": 0.002, "loss": 2.5594, "step": 211620 }, { "epoch": 0.42161401887033023, "grad_norm": 0.17530310153961182, "learning_rate": 0.002, "loss": 2.5591, "step": 211630 }, { "epoch": 0.4216339410939691, "grad_norm": 0.2152419537305832, "learning_rate": 0.002, "loss": 2.5643, "step": 211640 }, { "epoch": 0.42165386331760807, "grad_norm": 0.16637656092643738, "learning_rate": 0.002, "loss": 2.5562, "step": 211650 }, { "epoch": 0.42167378554124696, "grad_norm": 0.14940716326236725, "learning_rate": 0.002, "loss": 2.5563, "step": 211660 }, { "epoch": 0.4216937077648859, "grad_norm": 0.16644835472106934, "learning_rate": 0.002, "loss": 2.5452, "step": 211670 }, { "epoch": 0.4217136299885248, "grad_norm": 0.18371546268463135, "learning_rate": 0.002, "loss": 2.574, "step": 211680 }, { "epoch": 0.4217335522121637, "grad_norm": 0.16415619850158691, "learning_rate": 0.002, "loss": 2.5686, "step": 211690 }, { "epoch": 0.42175347443580263, "grad_norm": 0.1920803189277649, "learning_rate": 0.002, "loss": 2.5592, "step": 211700 }, { "epoch": 0.4217733966594415, "grad_norm": 0.18909446895122528, "learning_rate": 0.002, "loss": 2.5566, "step": 211710 }, { "epoch": 0.42179331888308047, "grad_norm": 0.15529637038707733, "learning_rate": 0.002, "loss": 2.5399, "step": 211720 }, { "epoch": 0.42181324110671936, "grad_norm": 0.16487543284893036, "learning_rate": 0.002, "loss": 2.5638, "step": 211730 }, { "epoch": 0.4218331633303583, "grad_norm": 0.20190241932868958, "learning_rate": 0.002, "loss": 2.5688, "step": 211740 }, { "epoch": 0.4218530855539972, "grad_norm": 0.1926373541355133, "learning_rate": 0.002, "loss": 2.5614, "step": 211750 }, { "epoch": 0.4218730077776361, "grad_norm": 0.18278658390045166, "learning_rate": 0.002, "loss": 2.5565, "step": 211760 }, { "epoch": 0.42189293000127503, "grad_norm": 0.1716773957014084, "learning_rate": 0.002, "loss": 2.553, "step": 211770 }, { "epoch": 0.4219128522249139, "grad_norm": 0.21508833765983582, "learning_rate": 0.002, "loss": 2.5609, "step": 211780 }, { "epoch": 0.42193277444855287, "grad_norm": 0.16866011917591095, "learning_rate": 0.002, "loss": 2.5548, "step": 211790 }, { "epoch": 0.42195269667219176, "grad_norm": 0.2192656397819519, "learning_rate": 0.002, "loss": 2.5768, "step": 211800 }, { "epoch": 0.42197261889583065, "grad_norm": 0.16095858812332153, "learning_rate": 0.002, "loss": 2.5619, "step": 211810 }, { "epoch": 0.4219925411194696, "grad_norm": 0.1791132539510727, "learning_rate": 0.002, "loss": 2.5584, "step": 211820 }, { "epoch": 0.4220124633431085, "grad_norm": 0.21819695830345154, "learning_rate": 0.002, "loss": 2.5614, "step": 211830 }, { "epoch": 0.42203238556674744, "grad_norm": 0.1558268964290619, "learning_rate": 0.002, "loss": 2.571, "step": 211840 }, { "epoch": 0.4220523077903863, "grad_norm": 0.18562783300876617, "learning_rate": 0.002, "loss": 2.5667, "step": 211850 }, { "epoch": 0.4220722300140252, "grad_norm": 0.1493285447359085, "learning_rate": 0.002, "loss": 2.5773, "step": 211860 }, { "epoch": 0.42209215223766416, "grad_norm": 0.17305748164653778, "learning_rate": 0.002, "loss": 2.5651, "step": 211870 }, { "epoch": 0.42211207446130306, "grad_norm": 0.1462372988462448, "learning_rate": 0.002, "loss": 2.567, "step": 211880 }, { "epoch": 0.422131996684942, "grad_norm": 0.17748647928237915, "learning_rate": 0.002, "loss": 2.5648, "step": 211890 }, { "epoch": 0.4221519189085809, "grad_norm": 0.1802283376455307, "learning_rate": 0.002, "loss": 2.5495, "step": 211900 }, { "epoch": 0.42217184113221984, "grad_norm": 0.13456299901008606, "learning_rate": 0.002, "loss": 2.5439, "step": 211910 }, { "epoch": 0.42219176335585873, "grad_norm": 0.17908407747745514, "learning_rate": 0.002, "loss": 2.5586, "step": 211920 }, { "epoch": 0.4222116855794976, "grad_norm": 0.16722792387008667, "learning_rate": 0.002, "loss": 2.5515, "step": 211930 }, { "epoch": 0.42223160780313657, "grad_norm": 0.20063534379005432, "learning_rate": 0.002, "loss": 2.5619, "step": 211940 }, { "epoch": 0.42225153002677546, "grad_norm": 0.1537032425403595, "learning_rate": 0.002, "loss": 2.5623, "step": 211950 }, { "epoch": 0.4222714522504144, "grad_norm": 0.16976255178451538, "learning_rate": 0.002, "loss": 2.5522, "step": 211960 }, { "epoch": 0.4222913744740533, "grad_norm": 0.1883072555065155, "learning_rate": 0.002, "loss": 2.561, "step": 211970 }, { "epoch": 0.4223112966976922, "grad_norm": 0.17156995832920074, "learning_rate": 0.002, "loss": 2.5751, "step": 211980 }, { "epoch": 0.42233121892133113, "grad_norm": 0.1573190838098526, "learning_rate": 0.002, "loss": 2.5731, "step": 211990 }, { "epoch": 0.42235114114497, "grad_norm": 0.15654803812503815, "learning_rate": 0.002, "loss": 2.554, "step": 212000 }, { "epoch": 0.42237106336860897, "grad_norm": 0.161848247051239, "learning_rate": 0.002, "loss": 2.5431, "step": 212010 }, { "epoch": 0.42239098559224786, "grad_norm": 0.1770763248205185, "learning_rate": 0.002, "loss": 2.5577, "step": 212020 }, { "epoch": 0.4224109078158868, "grad_norm": 0.13530795276165009, "learning_rate": 0.002, "loss": 2.5638, "step": 212030 }, { "epoch": 0.4224308300395257, "grad_norm": 0.15068024396896362, "learning_rate": 0.002, "loss": 2.5633, "step": 212040 }, { "epoch": 0.4224507522631646, "grad_norm": 0.1803973764181137, "learning_rate": 0.002, "loss": 2.5569, "step": 212050 }, { "epoch": 0.42247067448680353, "grad_norm": 0.14924700558185577, "learning_rate": 0.002, "loss": 2.554, "step": 212060 }, { "epoch": 0.4224905967104424, "grad_norm": 0.15293662250041962, "learning_rate": 0.002, "loss": 2.5542, "step": 212070 }, { "epoch": 0.42251051893408137, "grad_norm": 0.17568667232990265, "learning_rate": 0.002, "loss": 2.5538, "step": 212080 }, { "epoch": 0.42253044115772026, "grad_norm": 0.16340312361717224, "learning_rate": 0.002, "loss": 2.5634, "step": 212090 }, { "epoch": 0.42255036338135915, "grad_norm": 0.17618057131767273, "learning_rate": 0.002, "loss": 2.5689, "step": 212100 }, { "epoch": 0.4225702856049981, "grad_norm": 0.1519756019115448, "learning_rate": 0.002, "loss": 2.5543, "step": 212110 }, { "epoch": 0.422590207828637, "grad_norm": 0.14514340460300446, "learning_rate": 0.002, "loss": 2.5528, "step": 212120 }, { "epoch": 0.42261013005227593, "grad_norm": 0.19207096099853516, "learning_rate": 0.002, "loss": 2.5549, "step": 212130 }, { "epoch": 0.4226300522759148, "grad_norm": 0.17021605372428894, "learning_rate": 0.002, "loss": 2.5398, "step": 212140 }, { "epoch": 0.4226499744995537, "grad_norm": 0.1541818231344223, "learning_rate": 0.002, "loss": 2.5547, "step": 212150 }, { "epoch": 0.42266989672319266, "grad_norm": 0.15268927812576294, "learning_rate": 0.002, "loss": 2.5568, "step": 212160 }, { "epoch": 0.42268981894683155, "grad_norm": 0.14193521440029144, "learning_rate": 0.002, "loss": 2.5492, "step": 212170 }, { "epoch": 0.4227097411704705, "grad_norm": 0.17746983468532562, "learning_rate": 0.002, "loss": 2.5776, "step": 212180 }, { "epoch": 0.4227296633941094, "grad_norm": 0.1692562848329544, "learning_rate": 0.002, "loss": 2.5788, "step": 212190 }, { "epoch": 0.42274958561774834, "grad_norm": 0.18549565970897675, "learning_rate": 0.002, "loss": 2.5433, "step": 212200 }, { "epoch": 0.4227695078413872, "grad_norm": 0.14349648356437683, "learning_rate": 0.002, "loss": 2.5514, "step": 212210 }, { "epoch": 0.4227894300650261, "grad_norm": 0.16361935436725616, "learning_rate": 0.002, "loss": 2.5606, "step": 212220 }, { "epoch": 0.42280935228866506, "grad_norm": 0.19397300481796265, "learning_rate": 0.002, "loss": 2.5643, "step": 212230 }, { "epoch": 0.42282927451230395, "grad_norm": 0.15570254623889923, "learning_rate": 0.002, "loss": 2.5606, "step": 212240 }, { "epoch": 0.4228491967359429, "grad_norm": 0.18448489904403687, "learning_rate": 0.002, "loss": 2.5741, "step": 212250 }, { "epoch": 0.4228691189595818, "grad_norm": 0.14798861742019653, "learning_rate": 0.002, "loss": 2.5498, "step": 212260 }, { "epoch": 0.4228890411832207, "grad_norm": 0.20052973926067352, "learning_rate": 0.002, "loss": 2.556, "step": 212270 }, { "epoch": 0.42290896340685963, "grad_norm": 0.20301491022109985, "learning_rate": 0.002, "loss": 2.5574, "step": 212280 }, { "epoch": 0.4229288856304985, "grad_norm": 0.15742142498493195, "learning_rate": 0.002, "loss": 2.5534, "step": 212290 }, { "epoch": 0.42294880785413747, "grad_norm": 0.1448126584291458, "learning_rate": 0.002, "loss": 2.5623, "step": 212300 }, { "epoch": 0.42296873007777636, "grad_norm": 0.16701921820640564, "learning_rate": 0.002, "loss": 2.564, "step": 212310 }, { "epoch": 0.4229886523014153, "grad_norm": 0.1882115602493286, "learning_rate": 0.002, "loss": 2.5741, "step": 212320 }, { "epoch": 0.4230085745250542, "grad_norm": 0.15739575028419495, "learning_rate": 0.002, "loss": 2.5583, "step": 212330 }, { "epoch": 0.4230284967486931, "grad_norm": 0.17531833052635193, "learning_rate": 0.002, "loss": 2.5556, "step": 212340 }, { "epoch": 0.42304841897233203, "grad_norm": 0.14851126074790955, "learning_rate": 0.002, "loss": 2.554, "step": 212350 }, { "epoch": 0.4230683411959709, "grad_norm": 0.16527727246284485, "learning_rate": 0.002, "loss": 2.5536, "step": 212360 }, { "epoch": 0.42308826341960987, "grad_norm": 0.1590452343225479, "learning_rate": 0.002, "loss": 2.5573, "step": 212370 }, { "epoch": 0.42310818564324876, "grad_norm": 0.17063842713832855, "learning_rate": 0.002, "loss": 2.5437, "step": 212380 }, { "epoch": 0.42312810786688765, "grad_norm": 0.16877080500125885, "learning_rate": 0.002, "loss": 2.5486, "step": 212390 }, { "epoch": 0.4231480300905266, "grad_norm": 0.20490562915802002, "learning_rate": 0.002, "loss": 2.5783, "step": 212400 }, { "epoch": 0.4231679523141655, "grad_norm": 0.15752236545085907, "learning_rate": 0.002, "loss": 2.5602, "step": 212410 }, { "epoch": 0.42318787453780443, "grad_norm": 0.1643371433019638, "learning_rate": 0.002, "loss": 2.5645, "step": 212420 }, { "epoch": 0.4232077967614433, "grad_norm": 0.170251727104187, "learning_rate": 0.002, "loss": 2.5494, "step": 212430 }, { "epoch": 0.4232277189850822, "grad_norm": 0.15062770247459412, "learning_rate": 0.002, "loss": 2.5577, "step": 212440 }, { "epoch": 0.42324764120872116, "grad_norm": 0.19447505474090576, "learning_rate": 0.002, "loss": 2.5538, "step": 212450 }, { "epoch": 0.42326756343236005, "grad_norm": 0.17173713445663452, "learning_rate": 0.002, "loss": 2.5603, "step": 212460 }, { "epoch": 0.423287485655999, "grad_norm": 0.1800074577331543, "learning_rate": 0.002, "loss": 2.5629, "step": 212470 }, { "epoch": 0.4233074078796379, "grad_norm": 0.17459361255168915, "learning_rate": 0.002, "loss": 2.5469, "step": 212480 }, { "epoch": 0.42332733010327683, "grad_norm": 0.16948963701725006, "learning_rate": 0.002, "loss": 2.5629, "step": 212490 }, { "epoch": 0.4233472523269157, "grad_norm": 0.1696060448884964, "learning_rate": 0.002, "loss": 2.5628, "step": 212500 }, { "epoch": 0.4233671745505546, "grad_norm": 0.22025713324546814, "learning_rate": 0.002, "loss": 2.5578, "step": 212510 }, { "epoch": 0.42338709677419356, "grad_norm": 0.15628497302532196, "learning_rate": 0.002, "loss": 2.572, "step": 212520 }, { "epoch": 0.42340701899783245, "grad_norm": 0.144851952791214, "learning_rate": 0.002, "loss": 2.5658, "step": 212530 }, { "epoch": 0.4234269412214714, "grad_norm": 0.16170692443847656, "learning_rate": 0.002, "loss": 2.5625, "step": 212540 }, { "epoch": 0.4234468634451103, "grad_norm": 0.19515392184257507, "learning_rate": 0.002, "loss": 2.5447, "step": 212550 }, { "epoch": 0.4234667856687492, "grad_norm": 0.16634146869182587, "learning_rate": 0.002, "loss": 2.5491, "step": 212560 }, { "epoch": 0.4234867078923881, "grad_norm": 0.16804587841033936, "learning_rate": 0.002, "loss": 2.5624, "step": 212570 }, { "epoch": 0.423506630116027, "grad_norm": 0.154673233628273, "learning_rate": 0.002, "loss": 2.5522, "step": 212580 }, { "epoch": 0.42352655233966596, "grad_norm": 0.21366152167320251, "learning_rate": 0.002, "loss": 2.5588, "step": 212590 }, { "epoch": 0.42354647456330485, "grad_norm": 0.15431901812553406, "learning_rate": 0.002, "loss": 2.5444, "step": 212600 }, { "epoch": 0.42356639678694374, "grad_norm": 0.17517659068107605, "learning_rate": 0.002, "loss": 2.5468, "step": 212610 }, { "epoch": 0.4235863190105827, "grad_norm": 0.1723855584859848, "learning_rate": 0.002, "loss": 2.5523, "step": 212620 }, { "epoch": 0.4236062412342216, "grad_norm": 0.15565381944179535, "learning_rate": 0.002, "loss": 2.5465, "step": 212630 }, { "epoch": 0.4236261634578605, "grad_norm": 0.1587425172328949, "learning_rate": 0.002, "loss": 2.568, "step": 212640 }, { "epoch": 0.4236460856814994, "grad_norm": 0.16154471039772034, "learning_rate": 0.002, "loss": 2.5655, "step": 212650 }, { "epoch": 0.42366600790513836, "grad_norm": 0.16570091247558594, "learning_rate": 0.002, "loss": 2.5653, "step": 212660 }, { "epoch": 0.42368593012877726, "grad_norm": 0.14419858157634735, "learning_rate": 0.002, "loss": 2.5518, "step": 212670 }, { "epoch": 0.42370585235241615, "grad_norm": 0.16292235255241394, "learning_rate": 0.002, "loss": 2.5539, "step": 212680 }, { "epoch": 0.4237257745760551, "grad_norm": 0.2754773795604706, "learning_rate": 0.002, "loss": 2.5529, "step": 212690 }, { "epoch": 0.423745696799694, "grad_norm": 0.20976658165454865, "learning_rate": 0.002, "loss": 2.5607, "step": 212700 }, { "epoch": 0.42376561902333293, "grad_norm": 0.16949932277202606, "learning_rate": 0.002, "loss": 2.5622, "step": 212710 }, { "epoch": 0.4237855412469718, "grad_norm": 0.20040635764598846, "learning_rate": 0.002, "loss": 2.5641, "step": 212720 }, { "epoch": 0.4238054634706107, "grad_norm": 0.16106685996055603, "learning_rate": 0.002, "loss": 2.559, "step": 212730 }, { "epoch": 0.42382538569424966, "grad_norm": 0.17398931086063385, "learning_rate": 0.002, "loss": 2.5623, "step": 212740 }, { "epoch": 0.42384530791788855, "grad_norm": 0.16901393234729767, "learning_rate": 0.002, "loss": 2.5646, "step": 212750 }, { "epoch": 0.4238652301415275, "grad_norm": 0.14841818809509277, "learning_rate": 0.002, "loss": 2.5557, "step": 212760 }, { "epoch": 0.4238851523651664, "grad_norm": 0.19428041577339172, "learning_rate": 0.002, "loss": 2.5667, "step": 212770 }, { "epoch": 0.42390507458880533, "grad_norm": 0.16988088190555573, "learning_rate": 0.002, "loss": 2.5688, "step": 212780 }, { "epoch": 0.4239249968124442, "grad_norm": 0.16779156029224396, "learning_rate": 0.002, "loss": 2.5674, "step": 212790 }, { "epoch": 0.4239449190360831, "grad_norm": 0.14112405478954315, "learning_rate": 0.002, "loss": 2.5474, "step": 212800 }, { "epoch": 0.42396484125972206, "grad_norm": 0.1739894151687622, "learning_rate": 0.002, "loss": 2.574, "step": 212810 }, { "epoch": 0.42398476348336095, "grad_norm": 0.18147848546504974, "learning_rate": 0.002, "loss": 2.5591, "step": 212820 }, { "epoch": 0.4240046857069999, "grad_norm": 0.149224191904068, "learning_rate": 0.002, "loss": 2.5549, "step": 212830 }, { "epoch": 0.4240246079306388, "grad_norm": 0.18964748084545135, "learning_rate": 0.002, "loss": 2.5629, "step": 212840 }, { "epoch": 0.4240445301542777, "grad_norm": 0.1842271089553833, "learning_rate": 0.002, "loss": 2.5644, "step": 212850 }, { "epoch": 0.4240644523779166, "grad_norm": 0.1670786291360855, "learning_rate": 0.002, "loss": 2.5582, "step": 212860 }, { "epoch": 0.4240843746015555, "grad_norm": 0.16798095405101776, "learning_rate": 0.002, "loss": 2.5695, "step": 212870 }, { "epoch": 0.42410429682519446, "grad_norm": 0.2405124455690384, "learning_rate": 0.002, "loss": 2.5554, "step": 212880 }, { "epoch": 0.42412421904883335, "grad_norm": 0.13389329612255096, "learning_rate": 0.002, "loss": 2.5437, "step": 212890 }, { "epoch": 0.42414414127247224, "grad_norm": 0.2305694967508316, "learning_rate": 0.002, "loss": 2.5513, "step": 212900 }, { "epoch": 0.4241640634961112, "grad_norm": 0.15778715908527374, "learning_rate": 0.002, "loss": 2.5591, "step": 212910 }, { "epoch": 0.4241839857197501, "grad_norm": 0.19766564667224884, "learning_rate": 0.002, "loss": 2.5438, "step": 212920 }, { "epoch": 0.424203907943389, "grad_norm": 0.20433208346366882, "learning_rate": 0.002, "loss": 2.5583, "step": 212930 }, { "epoch": 0.4242238301670279, "grad_norm": 0.15906135737895966, "learning_rate": 0.002, "loss": 2.5652, "step": 212940 }, { "epoch": 0.42424375239066686, "grad_norm": 0.172322615981102, "learning_rate": 0.002, "loss": 2.5471, "step": 212950 }, { "epoch": 0.42426367461430575, "grad_norm": 0.14443661272525787, "learning_rate": 0.002, "loss": 2.5678, "step": 212960 }, { "epoch": 0.42428359683794464, "grad_norm": 0.17938360571861267, "learning_rate": 0.002, "loss": 2.536, "step": 212970 }, { "epoch": 0.4243035190615836, "grad_norm": 0.17095781862735748, "learning_rate": 0.002, "loss": 2.5543, "step": 212980 }, { "epoch": 0.4243234412852225, "grad_norm": 0.1696360558271408, "learning_rate": 0.002, "loss": 2.5654, "step": 212990 }, { "epoch": 0.4243433635088614, "grad_norm": 0.24166803061962128, "learning_rate": 0.002, "loss": 2.5759, "step": 213000 }, { "epoch": 0.4243632857325003, "grad_norm": 0.18327116966247559, "learning_rate": 0.002, "loss": 2.5497, "step": 213010 }, { "epoch": 0.4243832079561392, "grad_norm": 0.16768072545528412, "learning_rate": 0.002, "loss": 2.572, "step": 213020 }, { "epoch": 0.42440313017977815, "grad_norm": 0.16827954351902008, "learning_rate": 0.002, "loss": 2.5516, "step": 213030 }, { "epoch": 0.42442305240341704, "grad_norm": 0.2280624955892563, "learning_rate": 0.002, "loss": 2.5486, "step": 213040 }, { "epoch": 0.424442974627056, "grad_norm": 0.15090300142765045, "learning_rate": 0.002, "loss": 2.5617, "step": 213050 }, { "epoch": 0.4244628968506949, "grad_norm": 0.15922118723392487, "learning_rate": 0.002, "loss": 2.5539, "step": 213060 }, { "epoch": 0.42448281907433383, "grad_norm": 0.17791643738746643, "learning_rate": 0.002, "loss": 2.5552, "step": 213070 }, { "epoch": 0.4245027412979727, "grad_norm": 0.159515380859375, "learning_rate": 0.002, "loss": 2.5597, "step": 213080 }, { "epoch": 0.4245226635216116, "grad_norm": 0.1890018880367279, "learning_rate": 0.002, "loss": 2.5618, "step": 213090 }, { "epoch": 0.42454258574525056, "grad_norm": 0.18675991892814636, "learning_rate": 0.002, "loss": 2.5698, "step": 213100 }, { "epoch": 0.42456250796888945, "grad_norm": 0.17173747718334198, "learning_rate": 0.002, "loss": 2.5667, "step": 213110 }, { "epoch": 0.4245824301925284, "grad_norm": 0.19013342261314392, "learning_rate": 0.002, "loss": 2.5604, "step": 213120 }, { "epoch": 0.4246023524161673, "grad_norm": 0.17100228369235992, "learning_rate": 0.002, "loss": 2.5444, "step": 213130 }, { "epoch": 0.4246222746398062, "grad_norm": 0.16947513818740845, "learning_rate": 0.002, "loss": 2.5567, "step": 213140 }, { "epoch": 0.4246421968634451, "grad_norm": 0.1510154753923416, "learning_rate": 0.002, "loss": 2.5441, "step": 213150 }, { "epoch": 0.424662119087084, "grad_norm": 0.1708163321018219, "learning_rate": 0.002, "loss": 2.564, "step": 213160 }, { "epoch": 0.42468204131072296, "grad_norm": 0.18972891569137573, "learning_rate": 0.002, "loss": 2.5608, "step": 213170 }, { "epoch": 0.42470196353436185, "grad_norm": 0.15605536103248596, "learning_rate": 0.002, "loss": 2.5623, "step": 213180 }, { "epoch": 0.42472188575800074, "grad_norm": 0.17644284665584564, "learning_rate": 0.002, "loss": 2.5626, "step": 213190 }, { "epoch": 0.4247418079816397, "grad_norm": 0.14592353999614716, "learning_rate": 0.002, "loss": 2.5525, "step": 213200 }, { "epoch": 0.4247617302052786, "grad_norm": 0.1789747029542923, "learning_rate": 0.002, "loss": 2.5535, "step": 213210 }, { "epoch": 0.4247816524289175, "grad_norm": 0.21320827305316925, "learning_rate": 0.002, "loss": 2.566, "step": 213220 }, { "epoch": 0.4248015746525564, "grad_norm": 0.17922456562519073, "learning_rate": 0.002, "loss": 2.5588, "step": 213230 }, { "epoch": 0.42482149687619536, "grad_norm": 0.13939985632896423, "learning_rate": 0.002, "loss": 2.5663, "step": 213240 }, { "epoch": 0.42484141909983425, "grad_norm": 0.15857025980949402, "learning_rate": 0.002, "loss": 2.5561, "step": 213250 }, { "epoch": 0.42486134132347314, "grad_norm": 0.15818609297275543, "learning_rate": 0.002, "loss": 2.5467, "step": 213260 }, { "epoch": 0.4248812635471121, "grad_norm": 0.20965680480003357, "learning_rate": 0.002, "loss": 2.5505, "step": 213270 }, { "epoch": 0.424901185770751, "grad_norm": 0.16291789710521698, "learning_rate": 0.002, "loss": 2.548, "step": 213280 }, { "epoch": 0.4249211079943899, "grad_norm": 0.17059573531150818, "learning_rate": 0.002, "loss": 2.5475, "step": 213290 }, { "epoch": 0.4249410302180288, "grad_norm": 0.18766912817955017, "learning_rate": 0.002, "loss": 2.5507, "step": 213300 }, { "epoch": 0.4249609524416677, "grad_norm": 0.21068541705608368, "learning_rate": 0.002, "loss": 2.5624, "step": 213310 }, { "epoch": 0.42498087466530665, "grad_norm": 0.15000344812870026, "learning_rate": 0.002, "loss": 2.5496, "step": 213320 }, { "epoch": 0.42500079688894554, "grad_norm": 0.1376819610595703, "learning_rate": 0.002, "loss": 2.5605, "step": 213330 }, { "epoch": 0.4250207191125845, "grad_norm": 0.1715095490217209, "learning_rate": 0.002, "loss": 2.5579, "step": 213340 }, { "epoch": 0.4250406413362234, "grad_norm": 0.18284575641155243, "learning_rate": 0.002, "loss": 2.5473, "step": 213350 }, { "epoch": 0.42506056355986227, "grad_norm": 0.16195954382419586, "learning_rate": 0.002, "loss": 2.5573, "step": 213360 }, { "epoch": 0.4250804857835012, "grad_norm": 0.15112634003162384, "learning_rate": 0.002, "loss": 2.548, "step": 213370 }, { "epoch": 0.4251004080071401, "grad_norm": 0.17664723098278046, "learning_rate": 0.002, "loss": 2.5545, "step": 213380 }, { "epoch": 0.42512033023077905, "grad_norm": 0.17576397955417633, "learning_rate": 0.002, "loss": 2.5579, "step": 213390 }, { "epoch": 0.42514025245441794, "grad_norm": 0.16191041469573975, "learning_rate": 0.002, "loss": 2.5639, "step": 213400 }, { "epoch": 0.4251601746780569, "grad_norm": 0.16177713871002197, "learning_rate": 0.002, "loss": 2.5522, "step": 213410 }, { "epoch": 0.4251800969016958, "grad_norm": 0.14746205508708954, "learning_rate": 0.002, "loss": 2.5662, "step": 213420 }, { "epoch": 0.42520001912533467, "grad_norm": 0.18318620324134827, "learning_rate": 0.002, "loss": 2.5627, "step": 213430 }, { "epoch": 0.4252199413489736, "grad_norm": 0.18901288509368896, "learning_rate": 0.002, "loss": 2.5533, "step": 213440 }, { "epoch": 0.4252398635726125, "grad_norm": 0.16484230756759644, "learning_rate": 0.002, "loss": 2.5643, "step": 213450 }, { "epoch": 0.42525978579625145, "grad_norm": 0.16008475422859192, "learning_rate": 0.002, "loss": 2.5619, "step": 213460 }, { "epoch": 0.42527970801989035, "grad_norm": 0.16523180902004242, "learning_rate": 0.002, "loss": 2.5614, "step": 213470 }, { "epoch": 0.42529963024352924, "grad_norm": 0.1439722627401352, "learning_rate": 0.002, "loss": 2.5558, "step": 213480 }, { "epoch": 0.4253195524671682, "grad_norm": 0.1668301522731781, "learning_rate": 0.002, "loss": 2.5637, "step": 213490 }, { "epoch": 0.4253394746908071, "grad_norm": 0.1835762858390808, "learning_rate": 0.002, "loss": 2.5475, "step": 213500 }, { "epoch": 0.425359396914446, "grad_norm": 0.1745966374874115, "learning_rate": 0.002, "loss": 2.5701, "step": 213510 }, { "epoch": 0.4253793191380849, "grad_norm": 0.16892683506011963, "learning_rate": 0.002, "loss": 2.5553, "step": 213520 }, { "epoch": 0.42539924136172386, "grad_norm": 0.14613214135169983, "learning_rate": 0.002, "loss": 2.5656, "step": 213530 }, { "epoch": 0.42541916358536275, "grad_norm": 0.17198573052883148, "learning_rate": 0.002, "loss": 2.5551, "step": 213540 }, { "epoch": 0.42543908580900164, "grad_norm": 0.16224470734596252, "learning_rate": 0.002, "loss": 2.5572, "step": 213550 }, { "epoch": 0.4254590080326406, "grad_norm": 0.14856886863708496, "learning_rate": 0.002, "loss": 2.5515, "step": 213560 }, { "epoch": 0.4254789302562795, "grad_norm": 0.19585557281970978, "learning_rate": 0.002, "loss": 2.5496, "step": 213570 }, { "epoch": 0.4254988524799184, "grad_norm": 0.1586657017469406, "learning_rate": 0.002, "loss": 2.5626, "step": 213580 }, { "epoch": 0.4255187747035573, "grad_norm": 0.16887880861759186, "learning_rate": 0.002, "loss": 2.5578, "step": 213590 }, { "epoch": 0.4255386969271962, "grad_norm": 0.15765923261642456, "learning_rate": 0.002, "loss": 2.5514, "step": 213600 }, { "epoch": 0.42555861915083515, "grad_norm": 0.1799733191728592, "learning_rate": 0.002, "loss": 2.552, "step": 213610 }, { "epoch": 0.42557854137447404, "grad_norm": 0.1700914055109024, "learning_rate": 0.002, "loss": 2.5718, "step": 213620 }, { "epoch": 0.425598463598113, "grad_norm": 0.16900964081287384, "learning_rate": 0.002, "loss": 2.5466, "step": 213630 }, { "epoch": 0.4256183858217519, "grad_norm": 0.17358385026454926, "learning_rate": 0.002, "loss": 2.5682, "step": 213640 }, { "epoch": 0.42563830804539077, "grad_norm": 0.17905214428901672, "learning_rate": 0.002, "loss": 2.5481, "step": 213650 }, { "epoch": 0.4256582302690297, "grad_norm": 0.1611027717590332, "learning_rate": 0.002, "loss": 2.5637, "step": 213660 }, { "epoch": 0.4256781524926686, "grad_norm": 0.1352737993001938, "learning_rate": 0.002, "loss": 2.5632, "step": 213670 }, { "epoch": 0.42569807471630755, "grad_norm": 0.21945001184940338, "learning_rate": 0.002, "loss": 2.5625, "step": 213680 }, { "epoch": 0.42571799693994644, "grad_norm": 0.18274711072444916, "learning_rate": 0.002, "loss": 2.5548, "step": 213690 }, { "epoch": 0.4257379191635854, "grad_norm": 0.15522988140583038, "learning_rate": 0.002, "loss": 2.5494, "step": 213700 }, { "epoch": 0.4257578413872243, "grad_norm": 0.2043851763010025, "learning_rate": 0.002, "loss": 2.5521, "step": 213710 }, { "epoch": 0.42577776361086317, "grad_norm": 0.15483348071575165, "learning_rate": 0.002, "loss": 2.5658, "step": 213720 }, { "epoch": 0.4257976858345021, "grad_norm": 0.20354440808296204, "learning_rate": 0.002, "loss": 2.5583, "step": 213730 }, { "epoch": 0.425817608058141, "grad_norm": 0.14821110665798187, "learning_rate": 0.002, "loss": 2.5631, "step": 213740 }, { "epoch": 0.42583753028177995, "grad_norm": 0.15768776834011078, "learning_rate": 0.002, "loss": 2.5691, "step": 213750 }, { "epoch": 0.42585745250541884, "grad_norm": 0.16702428460121155, "learning_rate": 0.002, "loss": 2.5545, "step": 213760 }, { "epoch": 0.42587737472905773, "grad_norm": 0.227646604180336, "learning_rate": 0.002, "loss": 2.559, "step": 213770 }, { "epoch": 0.4258972969526967, "grad_norm": 0.18414174020290375, "learning_rate": 0.002, "loss": 2.5474, "step": 213780 }, { "epoch": 0.42591721917633557, "grad_norm": 0.17310208082199097, "learning_rate": 0.002, "loss": 2.5501, "step": 213790 }, { "epoch": 0.4259371413999745, "grad_norm": 0.18403616547584534, "learning_rate": 0.002, "loss": 2.5685, "step": 213800 }, { "epoch": 0.4259570636236134, "grad_norm": 0.148345485329628, "learning_rate": 0.002, "loss": 2.5586, "step": 213810 }, { "epoch": 0.42597698584725235, "grad_norm": 0.1978977918624878, "learning_rate": 0.002, "loss": 2.5601, "step": 213820 }, { "epoch": 0.42599690807089124, "grad_norm": 0.1883230060338974, "learning_rate": 0.002, "loss": 2.5559, "step": 213830 }, { "epoch": 0.42601683029453014, "grad_norm": 0.167002871632576, "learning_rate": 0.002, "loss": 2.55, "step": 213840 }, { "epoch": 0.4260367525181691, "grad_norm": 0.16139665246009827, "learning_rate": 0.002, "loss": 2.5766, "step": 213850 }, { "epoch": 0.426056674741808, "grad_norm": 0.1750441938638687, "learning_rate": 0.002, "loss": 2.547, "step": 213860 }, { "epoch": 0.4260765969654469, "grad_norm": 0.16136044263839722, "learning_rate": 0.002, "loss": 2.5684, "step": 213870 }, { "epoch": 0.4260965191890858, "grad_norm": 0.19783347845077515, "learning_rate": 0.002, "loss": 2.5586, "step": 213880 }, { "epoch": 0.4261164414127247, "grad_norm": 0.16931825876235962, "learning_rate": 0.002, "loss": 2.5732, "step": 213890 }, { "epoch": 0.42613636363636365, "grad_norm": 0.1464148759841919, "learning_rate": 0.002, "loss": 2.54, "step": 213900 }, { "epoch": 0.42615628586000254, "grad_norm": 0.1846061646938324, "learning_rate": 0.002, "loss": 2.5612, "step": 213910 }, { "epoch": 0.4261762080836415, "grad_norm": 0.17895430326461792, "learning_rate": 0.002, "loss": 2.5504, "step": 213920 }, { "epoch": 0.4261961303072804, "grad_norm": 0.16509464383125305, "learning_rate": 0.002, "loss": 2.5718, "step": 213930 }, { "epoch": 0.42621605253091926, "grad_norm": 0.1689348667860031, "learning_rate": 0.002, "loss": 2.5524, "step": 213940 }, { "epoch": 0.4262359747545582, "grad_norm": 0.2042563557624817, "learning_rate": 0.002, "loss": 2.563, "step": 213950 }, { "epoch": 0.4262558969781971, "grad_norm": 0.18616163730621338, "learning_rate": 0.002, "loss": 2.557, "step": 213960 }, { "epoch": 0.42627581920183605, "grad_norm": 0.18059980869293213, "learning_rate": 0.002, "loss": 2.554, "step": 213970 }, { "epoch": 0.42629574142547494, "grad_norm": 0.15382759273052216, "learning_rate": 0.002, "loss": 2.5668, "step": 213980 }, { "epoch": 0.4263156636491139, "grad_norm": 0.2298728972673416, "learning_rate": 0.002, "loss": 2.5485, "step": 213990 }, { "epoch": 0.4263355858727528, "grad_norm": 0.1429290622472763, "learning_rate": 0.002, "loss": 2.5581, "step": 214000 }, { "epoch": 0.42635550809639167, "grad_norm": 0.15741005539894104, "learning_rate": 0.002, "loss": 2.5551, "step": 214010 }, { "epoch": 0.4263754303200306, "grad_norm": 0.17111749947071075, "learning_rate": 0.002, "loss": 2.5688, "step": 214020 }, { "epoch": 0.4263953525436695, "grad_norm": 0.17055915296077728, "learning_rate": 0.002, "loss": 2.5495, "step": 214030 }, { "epoch": 0.42641527476730845, "grad_norm": 0.13605600595474243, "learning_rate": 0.002, "loss": 2.568, "step": 214040 }, { "epoch": 0.42643519699094734, "grad_norm": 0.19085679948329926, "learning_rate": 0.002, "loss": 2.5615, "step": 214050 }, { "epoch": 0.42645511921458623, "grad_norm": 0.15172059834003448, "learning_rate": 0.002, "loss": 2.5598, "step": 214060 }, { "epoch": 0.4264750414382252, "grad_norm": 0.19001543521881104, "learning_rate": 0.002, "loss": 2.5631, "step": 214070 }, { "epoch": 0.42649496366186407, "grad_norm": 0.18504361808300018, "learning_rate": 0.002, "loss": 2.5671, "step": 214080 }, { "epoch": 0.426514885885503, "grad_norm": 0.16398043930530548, "learning_rate": 0.002, "loss": 2.5477, "step": 214090 }, { "epoch": 0.4265348081091419, "grad_norm": 0.15110303461551666, "learning_rate": 0.002, "loss": 2.543, "step": 214100 }, { "epoch": 0.4265547303327808, "grad_norm": 0.16465447843074799, "learning_rate": 0.002, "loss": 2.5405, "step": 214110 }, { "epoch": 0.42657465255641974, "grad_norm": 0.16726255416870117, "learning_rate": 0.002, "loss": 2.5734, "step": 214120 }, { "epoch": 0.42659457478005863, "grad_norm": 0.17362025380134583, "learning_rate": 0.002, "loss": 2.5555, "step": 214130 }, { "epoch": 0.4266144970036976, "grad_norm": 0.19359517097473145, "learning_rate": 0.002, "loss": 2.5629, "step": 214140 }, { "epoch": 0.42663441922733647, "grad_norm": 0.15735892951488495, "learning_rate": 0.002, "loss": 2.5638, "step": 214150 }, { "epoch": 0.4266543414509754, "grad_norm": 0.15065927803516388, "learning_rate": 0.002, "loss": 2.5558, "step": 214160 }, { "epoch": 0.4266742636746143, "grad_norm": 0.15408815443515778, "learning_rate": 0.002, "loss": 2.5754, "step": 214170 }, { "epoch": 0.4266941858982532, "grad_norm": 0.16347748041152954, "learning_rate": 0.002, "loss": 2.5644, "step": 214180 }, { "epoch": 0.42671410812189214, "grad_norm": 0.17719081044197083, "learning_rate": 0.002, "loss": 2.544, "step": 214190 }, { "epoch": 0.42673403034553103, "grad_norm": 0.15113307535648346, "learning_rate": 0.002, "loss": 2.5568, "step": 214200 }, { "epoch": 0.42675395256917, "grad_norm": 0.16661006212234497, "learning_rate": 0.002, "loss": 2.5744, "step": 214210 }, { "epoch": 0.42677387479280887, "grad_norm": 0.14593784511089325, "learning_rate": 0.002, "loss": 2.5579, "step": 214220 }, { "epoch": 0.42679379701644776, "grad_norm": 0.2036413848400116, "learning_rate": 0.002, "loss": 2.5717, "step": 214230 }, { "epoch": 0.4268137192400867, "grad_norm": 0.16138264536857605, "learning_rate": 0.002, "loss": 2.5556, "step": 214240 }, { "epoch": 0.4268336414637256, "grad_norm": 0.20422014594078064, "learning_rate": 0.002, "loss": 2.5788, "step": 214250 }, { "epoch": 0.42685356368736455, "grad_norm": 0.17665109038352966, "learning_rate": 0.002, "loss": 2.55, "step": 214260 }, { "epoch": 0.42687348591100344, "grad_norm": 0.15148571133613586, "learning_rate": 0.002, "loss": 2.5447, "step": 214270 }, { "epoch": 0.4268934081346424, "grad_norm": 0.16297978162765503, "learning_rate": 0.002, "loss": 2.5487, "step": 214280 }, { "epoch": 0.4269133303582813, "grad_norm": 0.19867387413978577, "learning_rate": 0.002, "loss": 2.5583, "step": 214290 }, { "epoch": 0.42693325258192016, "grad_norm": 0.15901874005794525, "learning_rate": 0.002, "loss": 2.5523, "step": 214300 }, { "epoch": 0.4269531748055591, "grad_norm": 0.19176170229911804, "learning_rate": 0.002, "loss": 2.5615, "step": 214310 }, { "epoch": 0.426973097029198, "grad_norm": 0.16112461686134338, "learning_rate": 0.002, "loss": 2.5618, "step": 214320 }, { "epoch": 0.42699301925283695, "grad_norm": 0.19058553874492645, "learning_rate": 0.002, "loss": 2.5702, "step": 214330 }, { "epoch": 0.42701294147647584, "grad_norm": 0.21877305209636688, "learning_rate": 0.002, "loss": 2.5579, "step": 214340 }, { "epoch": 0.42703286370011473, "grad_norm": 0.17646974325180054, "learning_rate": 0.002, "loss": 2.5687, "step": 214350 }, { "epoch": 0.4270527859237537, "grad_norm": 0.2330193966627121, "learning_rate": 0.002, "loss": 2.5581, "step": 214360 }, { "epoch": 0.42707270814739257, "grad_norm": 0.14824733138084412, "learning_rate": 0.002, "loss": 2.5567, "step": 214370 }, { "epoch": 0.4270926303710315, "grad_norm": 0.1646863967180252, "learning_rate": 0.002, "loss": 2.5555, "step": 214380 }, { "epoch": 0.4271125525946704, "grad_norm": 0.17485998570919037, "learning_rate": 0.002, "loss": 2.5508, "step": 214390 }, { "epoch": 0.4271324748183093, "grad_norm": 0.16211216151714325, "learning_rate": 0.002, "loss": 2.5699, "step": 214400 }, { "epoch": 0.42715239704194824, "grad_norm": 0.1904764324426651, "learning_rate": 0.002, "loss": 2.58, "step": 214410 }, { "epoch": 0.42717231926558713, "grad_norm": 0.1479577273130417, "learning_rate": 0.002, "loss": 2.5577, "step": 214420 }, { "epoch": 0.4271922414892261, "grad_norm": 0.17895734310150146, "learning_rate": 0.002, "loss": 2.5647, "step": 214430 }, { "epoch": 0.42721216371286497, "grad_norm": 0.16152001917362213, "learning_rate": 0.002, "loss": 2.5457, "step": 214440 }, { "epoch": 0.4272320859365039, "grad_norm": 0.2209191471338272, "learning_rate": 0.002, "loss": 2.5522, "step": 214450 }, { "epoch": 0.4272520081601428, "grad_norm": 0.1746266931295395, "learning_rate": 0.002, "loss": 2.5593, "step": 214460 }, { "epoch": 0.4272719303837817, "grad_norm": 0.14342884719371796, "learning_rate": 0.002, "loss": 2.5597, "step": 214470 }, { "epoch": 0.42729185260742064, "grad_norm": 0.16596730053424835, "learning_rate": 0.002, "loss": 2.5556, "step": 214480 }, { "epoch": 0.42731177483105953, "grad_norm": 0.16195863485336304, "learning_rate": 0.002, "loss": 2.5727, "step": 214490 }, { "epoch": 0.4273316970546985, "grad_norm": 0.15396183729171753, "learning_rate": 0.002, "loss": 2.5589, "step": 214500 }, { "epoch": 0.42735161927833737, "grad_norm": 0.16777287423610687, "learning_rate": 0.002, "loss": 2.5636, "step": 214510 }, { "epoch": 0.42737154150197626, "grad_norm": 0.18366846442222595, "learning_rate": 0.002, "loss": 2.5574, "step": 214520 }, { "epoch": 0.4273914637256152, "grad_norm": 0.19234400987625122, "learning_rate": 0.002, "loss": 2.5796, "step": 214530 }, { "epoch": 0.4274113859492541, "grad_norm": 0.2008352279663086, "learning_rate": 0.002, "loss": 2.5565, "step": 214540 }, { "epoch": 0.42743130817289304, "grad_norm": 0.17992229759693146, "learning_rate": 0.002, "loss": 2.5466, "step": 214550 }, { "epoch": 0.42745123039653193, "grad_norm": 0.16474305093288422, "learning_rate": 0.002, "loss": 2.5713, "step": 214560 }, { "epoch": 0.4274711526201709, "grad_norm": 0.14171835780143738, "learning_rate": 0.002, "loss": 2.5694, "step": 214570 }, { "epoch": 0.42749107484380977, "grad_norm": 0.21086205542087555, "learning_rate": 0.002, "loss": 2.5599, "step": 214580 }, { "epoch": 0.42751099706744866, "grad_norm": 0.18046952784061432, "learning_rate": 0.002, "loss": 2.5576, "step": 214590 }, { "epoch": 0.4275309192910876, "grad_norm": 0.19883809983730316, "learning_rate": 0.002, "loss": 2.5534, "step": 214600 }, { "epoch": 0.4275508415147265, "grad_norm": 0.1940130889415741, "learning_rate": 0.002, "loss": 2.5532, "step": 214610 }, { "epoch": 0.42757076373836544, "grad_norm": 0.1583442986011505, "learning_rate": 0.002, "loss": 2.5611, "step": 214620 }, { "epoch": 0.42759068596200434, "grad_norm": 0.17757387459278107, "learning_rate": 0.002, "loss": 2.5584, "step": 214630 }, { "epoch": 0.4276106081856432, "grad_norm": 0.15722890198230743, "learning_rate": 0.002, "loss": 2.567, "step": 214640 }, { "epoch": 0.4276305304092822, "grad_norm": 0.17072126269340515, "learning_rate": 0.002, "loss": 2.5514, "step": 214650 }, { "epoch": 0.42765045263292106, "grad_norm": 0.17175987362861633, "learning_rate": 0.002, "loss": 2.5477, "step": 214660 }, { "epoch": 0.42767037485656, "grad_norm": 0.16480466723442078, "learning_rate": 0.002, "loss": 2.561, "step": 214670 }, { "epoch": 0.4276902970801989, "grad_norm": 0.18217144906520844, "learning_rate": 0.002, "loss": 2.5643, "step": 214680 }, { "epoch": 0.4277102193038378, "grad_norm": 0.14438647031784058, "learning_rate": 0.002, "loss": 2.5615, "step": 214690 }, { "epoch": 0.42773014152747674, "grad_norm": 0.14082345366477966, "learning_rate": 0.002, "loss": 2.5558, "step": 214700 }, { "epoch": 0.42775006375111563, "grad_norm": 0.16991539299488068, "learning_rate": 0.002, "loss": 2.5604, "step": 214710 }, { "epoch": 0.4277699859747546, "grad_norm": 0.21566921472549438, "learning_rate": 0.002, "loss": 2.5626, "step": 214720 }, { "epoch": 0.42778990819839346, "grad_norm": 0.1613457202911377, "learning_rate": 0.002, "loss": 2.5612, "step": 214730 }, { "epoch": 0.4278098304220324, "grad_norm": 0.15012626349925995, "learning_rate": 0.002, "loss": 2.5664, "step": 214740 }, { "epoch": 0.4278297526456713, "grad_norm": 0.15422846376895905, "learning_rate": 0.002, "loss": 2.5608, "step": 214750 }, { "epoch": 0.4278496748693102, "grad_norm": 0.18783169984817505, "learning_rate": 0.002, "loss": 2.5671, "step": 214760 }, { "epoch": 0.42786959709294914, "grad_norm": 0.17621618509292603, "learning_rate": 0.002, "loss": 2.5721, "step": 214770 }, { "epoch": 0.42788951931658803, "grad_norm": 0.15273986756801605, "learning_rate": 0.002, "loss": 2.5483, "step": 214780 }, { "epoch": 0.427909441540227, "grad_norm": 0.1895984709262848, "learning_rate": 0.002, "loss": 2.5701, "step": 214790 }, { "epoch": 0.42792936376386587, "grad_norm": 0.1599118560552597, "learning_rate": 0.002, "loss": 2.5545, "step": 214800 }, { "epoch": 0.42794928598750476, "grad_norm": 0.1665988266468048, "learning_rate": 0.002, "loss": 2.5558, "step": 214810 }, { "epoch": 0.4279692082111437, "grad_norm": 0.20777061581611633, "learning_rate": 0.002, "loss": 2.5562, "step": 214820 }, { "epoch": 0.4279891304347826, "grad_norm": 0.17851534485816956, "learning_rate": 0.002, "loss": 2.5627, "step": 214830 }, { "epoch": 0.42800905265842154, "grad_norm": 0.18615183234214783, "learning_rate": 0.002, "loss": 2.5415, "step": 214840 }, { "epoch": 0.42802897488206043, "grad_norm": 0.14003831148147583, "learning_rate": 0.002, "loss": 2.5506, "step": 214850 }, { "epoch": 0.4280488971056994, "grad_norm": 0.2362472414970398, "learning_rate": 0.002, "loss": 2.561, "step": 214860 }, { "epoch": 0.42806881932933827, "grad_norm": 0.14672324061393738, "learning_rate": 0.002, "loss": 2.5702, "step": 214870 }, { "epoch": 0.42808874155297716, "grad_norm": 0.16538864374160767, "learning_rate": 0.002, "loss": 2.5645, "step": 214880 }, { "epoch": 0.4281086637766161, "grad_norm": 0.16843853890895844, "learning_rate": 0.002, "loss": 2.5561, "step": 214890 }, { "epoch": 0.428128586000255, "grad_norm": 0.19449375569820404, "learning_rate": 0.002, "loss": 2.5451, "step": 214900 }, { "epoch": 0.42814850822389394, "grad_norm": 0.1744309365749359, "learning_rate": 0.002, "loss": 2.5569, "step": 214910 }, { "epoch": 0.42816843044753283, "grad_norm": 0.17053768038749695, "learning_rate": 0.002, "loss": 2.573, "step": 214920 }, { "epoch": 0.4281883526711717, "grad_norm": 0.15083026885986328, "learning_rate": 0.002, "loss": 2.5677, "step": 214930 }, { "epoch": 0.42820827489481067, "grad_norm": 0.1914360672235489, "learning_rate": 0.002, "loss": 2.5697, "step": 214940 }, { "epoch": 0.42822819711844956, "grad_norm": 0.15473572909832, "learning_rate": 0.002, "loss": 2.5662, "step": 214950 }, { "epoch": 0.4282481193420885, "grad_norm": 0.15124785900115967, "learning_rate": 0.002, "loss": 2.567, "step": 214960 }, { "epoch": 0.4282680415657274, "grad_norm": 0.14591297507286072, "learning_rate": 0.002, "loss": 2.5637, "step": 214970 }, { "epoch": 0.4282879637893663, "grad_norm": 0.24198396503925323, "learning_rate": 0.002, "loss": 2.5583, "step": 214980 }, { "epoch": 0.42830788601300523, "grad_norm": 0.16164422035217285, "learning_rate": 0.002, "loss": 2.5572, "step": 214990 }, { "epoch": 0.4283278082366441, "grad_norm": 0.18536090850830078, "learning_rate": 0.002, "loss": 2.5498, "step": 215000 }, { "epoch": 0.42834773046028307, "grad_norm": 0.17750424146652222, "learning_rate": 0.002, "loss": 2.5564, "step": 215010 }, { "epoch": 0.42836765268392196, "grad_norm": 0.14759689569473267, "learning_rate": 0.002, "loss": 2.542, "step": 215020 }, { "epoch": 0.4283875749075609, "grad_norm": 0.20328231155872345, "learning_rate": 0.002, "loss": 2.5634, "step": 215030 }, { "epoch": 0.4284074971311998, "grad_norm": 0.16767513751983643, "learning_rate": 0.002, "loss": 2.5633, "step": 215040 }, { "epoch": 0.4284274193548387, "grad_norm": 0.1329980492591858, "learning_rate": 0.002, "loss": 2.5552, "step": 215050 }, { "epoch": 0.42844734157847764, "grad_norm": 0.23837487399578094, "learning_rate": 0.002, "loss": 2.5452, "step": 215060 }, { "epoch": 0.4284672638021165, "grad_norm": 0.15558934211730957, "learning_rate": 0.002, "loss": 2.5469, "step": 215070 }, { "epoch": 0.4284871860257555, "grad_norm": 0.15054799616336823, "learning_rate": 0.002, "loss": 2.5632, "step": 215080 }, { "epoch": 0.42850710824939436, "grad_norm": 0.1710101217031479, "learning_rate": 0.002, "loss": 2.545, "step": 215090 }, { "epoch": 0.42852703047303325, "grad_norm": 0.14217562973499298, "learning_rate": 0.002, "loss": 2.5538, "step": 215100 }, { "epoch": 0.4285469526966722, "grad_norm": 0.1891028732061386, "learning_rate": 0.002, "loss": 2.5574, "step": 215110 }, { "epoch": 0.4285668749203111, "grad_norm": 0.1688316911458969, "learning_rate": 0.002, "loss": 2.5545, "step": 215120 }, { "epoch": 0.42858679714395004, "grad_norm": 0.16895876824855804, "learning_rate": 0.002, "loss": 2.5633, "step": 215130 }, { "epoch": 0.42860671936758893, "grad_norm": 0.17085036635398865, "learning_rate": 0.002, "loss": 2.5571, "step": 215140 }, { "epoch": 0.4286266415912278, "grad_norm": 0.17612497508525848, "learning_rate": 0.002, "loss": 2.5545, "step": 215150 }, { "epoch": 0.42864656381486677, "grad_norm": 0.16159403324127197, "learning_rate": 0.002, "loss": 2.5348, "step": 215160 }, { "epoch": 0.42866648603850566, "grad_norm": 0.17263033986091614, "learning_rate": 0.002, "loss": 2.5598, "step": 215170 }, { "epoch": 0.4286864082621446, "grad_norm": 0.1592588722705841, "learning_rate": 0.002, "loss": 2.5688, "step": 215180 }, { "epoch": 0.4287063304857835, "grad_norm": 0.16636498272418976, "learning_rate": 0.002, "loss": 2.5546, "step": 215190 }, { "epoch": 0.42872625270942244, "grad_norm": 0.17726252973079681, "learning_rate": 0.002, "loss": 2.5608, "step": 215200 }, { "epoch": 0.42874617493306133, "grad_norm": 0.16169245541095734, "learning_rate": 0.002, "loss": 2.5504, "step": 215210 }, { "epoch": 0.4287660971567002, "grad_norm": 0.1715463548898697, "learning_rate": 0.002, "loss": 2.565, "step": 215220 }, { "epoch": 0.42878601938033917, "grad_norm": 0.20978520810604095, "learning_rate": 0.002, "loss": 2.5441, "step": 215230 }, { "epoch": 0.42880594160397806, "grad_norm": 0.16997891664505005, "learning_rate": 0.002, "loss": 2.5549, "step": 215240 }, { "epoch": 0.428825863827617, "grad_norm": 0.16504038870334625, "learning_rate": 0.002, "loss": 2.5564, "step": 215250 }, { "epoch": 0.4288457860512559, "grad_norm": 0.1801258623600006, "learning_rate": 0.002, "loss": 2.5553, "step": 215260 }, { "epoch": 0.4288657082748948, "grad_norm": 0.148710697889328, "learning_rate": 0.002, "loss": 2.5532, "step": 215270 }, { "epoch": 0.42888563049853373, "grad_norm": 0.14422738552093506, "learning_rate": 0.002, "loss": 2.5605, "step": 215280 }, { "epoch": 0.4289055527221726, "grad_norm": 0.1624249517917633, "learning_rate": 0.002, "loss": 2.5572, "step": 215290 }, { "epoch": 0.42892547494581157, "grad_norm": 0.1576193869113922, "learning_rate": 0.002, "loss": 2.5563, "step": 215300 }, { "epoch": 0.42894539716945046, "grad_norm": 0.15203450620174408, "learning_rate": 0.002, "loss": 2.5552, "step": 215310 }, { "epoch": 0.4289653193930894, "grad_norm": 0.14428368210792542, "learning_rate": 0.002, "loss": 2.5603, "step": 215320 }, { "epoch": 0.4289852416167283, "grad_norm": 0.18857035040855408, "learning_rate": 0.002, "loss": 2.5647, "step": 215330 }, { "epoch": 0.4290051638403672, "grad_norm": 0.16801148653030396, "learning_rate": 0.002, "loss": 2.564, "step": 215340 }, { "epoch": 0.42902508606400613, "grad_norm": 0.16211968660354614, "learning_rate": 0.002, "loss": 2.558, "step": 215350 }, { "epoch": 0.429045008287645, "grad_norm": 0.17566253244876862, "learning_rate": 0.002, "loss": 2.5391, "step": 215360 }, { "epoch": 0.42906493051128397, "grad_norm": 0.16346120834350586, "learning_rate": 0.002, "loss": 2.5646, "step": 215370 }, { "epoch": 0.42908485273492286, "grad_norm": 0.1555502712726593, "learning_rate": 0.002, "loss": 2.5655, "step": 215380 }, { "epoch": 0.42910477495856175, "grad_norm": 0.24684257805347443, "learning_rate": 0.002, "loss": 2.5476, "step": 215390 }, { "epoch": 0.4291246971822007, "grad_norm": 0.15979009866714478, "learning_rate": 0.002, "loss": 2.5581, "step": 215400 }, { "epoch": 0.4291446194058396, "grad_norm": 0.15426605939865112, "learning_rate": 0.002, "loss": 2.5583, "step": 215410 }, { "epoch": 0.42916454162947854, "grad_norm": 0.1723308116197586, "learning_rate": 0.002, "loss": 2.5388, "step": 215420 }, { "epoch": 0.4291844638531174, "grad_norm": 0.16557037830352783, "learning_rate": 0.002, "loss": 2.5566, "step": 215430 }, { "epoch": 0.4292043860767563, "grad_norm": 0.1805431842803955, "learning_rate": 0.002, "loss": 2.5621, "step": 215440 }, { "epoch": 0.42922430830039526, "grad_norm": 0.1644487977027893, "learning_rate": 0.002, "loss": 2.5667, "step": 215450 }, { "epoch": 0.42924423052403415, "grad_norm": 0.16977356374263763, "learning_rate": 0.002, "loss": 2.5616, "step": 215460 }, { "epoch": 0.4292641527476731, "grad_norm": 0.14475849270820618, "learning_rate": 0.002, "loss": 2.5532, "step": 215470 }, { "epoch": 0.429284074971312, "grad_norm": 0.18464389443397522, "learning_rate": 0.002, "loss": 2.5591, "step": 215480 }, { "epoch": 0.42930399719495094, "grad_norm": 0.16899271309375763, "learning_rate": 0.002, "loss": 2.5441, "step": 215490 }, { "epoch": 0.42932391941858983, "grad_norm": 0.18905360996723175, "learning_rate": 0.002, "loss": 2.5708, "step": 215500 }, { "epoch": 0.4293438416422287, "grad_norm": 0.16006332635879517, "learning_rate": 0.002, "loss": 2.553, "step": 215510 }, { "epoch": 0.42936376386586766, "grad_norm": 0.16097521781921387, "learning_rate": 0.002, "loss": 2.5586, "step": 215520 }, { "epoch": 0.42938368608950656, "grad_norm": 0.15433412790298462, "learning_rate": 0.002, "loss": 2.5591, "step": 215530 }, { "epoch": 0.4294036083131455, "grad_norm": 0.17703552544116974, "learning_rate": 0.002, "loss": 2.5514, "step": 215540 }, { "epoch": 0.4294235305367844, "grad_norm": 0.18747790157794952, "learning_rate": 0.002, "loss": 2.5508, "step": 215550 }, { "epoch": 0.4294434527604233, "grad_norm": 0.14486628770828247, "learning_rate": 0.002, "loss": 2.5722, "step": 215560 }, { "epoch": 0.42946337498406223, "grad_norm": 0.1533478945493698, "learning_rate": 0.002, "loss": 2.5524, "step": 215570 }, { "epoch": 0.4294832972077011, "grad_norm": 0.1763409823179245, "learning_rate": 0.002, "loss": 2.5549, "step": 215580 }, { "epoch": 0.42950321943134007, "grad_norm": 0.175742045044899, "learning_rate": 0.002, "loss": 2.5666, "step": 215590 }, { "epoch": 0.42952314165497896, "grad_norm": 0.15574918687343597, "learning_rate": 0.002, "loss": 2.5601, "step": 215600 }, { "epoch": 0.4295430638786179, "grad_norm": 0.16603241860866547, "learning_rate": 0.002, "loss": 2.5462, "step": 215610 }, { "epoch": 0.4295629861022568, "grad_norm": 0.17237535119056702, "learning_rate": 0.002, "loss": 2.564, "step": 215620 }, { "epoch": 0.4295829083258957, "grad_norm": 0.14858916401863098, "learning_rate": 0.002, "loss": 2.5448, "step": 215630 }, { "epoch": 0.42960283054953463, "grad_norm": 0.15904824435710907, "learning_rate": 0.002, "loss": 2.5577, "step": 215640 }, { "epoch": 0.4296227527731735, "grad_norm": 0.1845093071460724, "learning_rate": 0.002, "loss": 2.5685, "step": 215650 }, { "epoch": 0.42964267499681247, "grad_norm": 0.14892955124378204, "learning_rate": 0.002, "loss": 2.5557, "step": 215660 }, { "epoch": 0.42966259722045136, "grad_norm": 0.16929034888744354, "learning_rate": 0.002, "loss": 2.5473, "step": 215670 }, { "epoch": 0.42968251944409025, "grad_norm": 0.14758972823619843, "learning_rate": 0.002, "loss": 2.5754, "step": 215680 }, { "epoch": 0.4297024416677292, "grad_norm": 0.17222680151462555, "learning_rate": 0.002, "loss": 2.5578, "step": 215690 }, { "epoch": 0.4297223638913681, "grad_norm": 0.18792688846588135, "learning_rate": 0.002, "loss": 2.5752, "step": 215700 }, { "epoch": 0.42974228611500703, "grad_norm": 0.19020786881446838, "learning_rate": 0.002, "loss": 2.5506, "step": 215710 }, { "epoch": 0.4297622083386459, "grad_norm": 0.14617246389389038, "learning_rate": 0.002, "loss": 2.5741, "step": 215720 }, { "epoch": 0.4297821305622848, "grad_norm": 0.1534898430109024, "learning_rate": 0.002, "loss": 2.5506, "step": 215730 }, { "epoch": 0.42980205278592376, "grad_norm": 0.18565261363983154, "learning_rate": 0.002, "loss": 2.5536, "step": 215740 }, { "epoch": 0.42982197500956265, "grad_norm": 0.16810932755470276, "learning_rate": 0.002, "loss": 2.5574, "step": 215750 }, { "epoch": 0.4298418972332016, "grad_norm": 0.18343736231327057, "learning_rate": 0.002, "loss": 2.5552, "step": 215760 }, { "epoch": 0.4298618194568405, "grad_norm": 0.18111124634742737, "learning_rate": 0.002, "loss": 2.5631, "step": 215770 }, { "epoch": 0.42988174168047943, "grad_norm": 0.13834455609321594, "learning_rate": 0.002, "loss": 2.5652, "step": 215780 }, { "epoch": 0.4299016639041183, "grad_norm": 0.1697295755147934, "learning_rate": 0.002, "loss": 2.563, "step": 215790 }, { "epoch": 0.4299215861277572, "grad_norm": 0.17391209304332733, "learning_rate": 0.002, "loss": 2.5784, "step": 215800 }, { "epoch": 0.42994150835139616, "grad_norm": 0.13769760727882385, "learning_rate": 0.002, "loss": 2.5528, "step": 215810 }, { "epoch": 0.42996143057503505, "grad_norm": 0.1749725192785263, "learning_rate": 0.002, "loss": 2.5772, "step": 215820 }, { "epoch": 0.429981352798674, "grad_norm": 0.18360769748687744, "learning_rate": 0.002, "loss": 2.562, "step": 215830 }, { "epoch": 0.4300012750223129, "grad_norm": 0.16750381886959076, "learning_rate": 0.002, "loss": 2.5535, "step": 215840 }, { "epoch": 0.4300211972459518, "grad_norm": 0.16494718194007874, "learning_rate": 0.002, "loss": 2.5602, "step": 215850 }, { "epoch": 0.4300411194695907, "grad_norm": 0.19668680429458618, "learning_rate": 0.002, "loss": 2.5668, "step": 215860 }, { "epoch": 0.4300610416932296, "grad_norm": 0.206038236618042, "learning_rate": 0.002, "loss": 2.5604, "step": 215870 }, { "epoch": 0.43008096391686856, "grad_norm": 0.21282491087913513, "learning_rate": 0.002, "loss": 2.5455, "step": 215880 }, { "epoch": 0.43010088614050745, "grad_norm": 0.18321087956428528, "learning_rate": 0.002, "loss": 2.5713, "step": 215890 }, { "epoch": 0.43012080836414635, "grad_norm": 0.17819716036319733, "learning_rate": 0.002, "loss": 2.5499, "step": 215900 }, { "epoch": 0.4301407305877853, "grad_norm": 0.19874435663223267, "learning_rate": 0.002, "loss": 2.5618, "step": 215910 }, { "epoch": 0.4301606528114242, "grad_norm": 0.15611892938613892, "learning_rate": 0.002, "loss": 2.5573, "step": 215920 }, { "epoch": 0.43018057503506313, "grad_norm": 0.19804321229457855, "learning_rate": 0.002, "loss": 2.544, "step": 215930 }, { "epoch": 0.430200497258702, "grad_norm": 0.1645577847957611, "learning_rate": 0.002, "loss": 2.5461, "step": 215940 }, { "epoch": 0.43022041948234097, "grad_norm": 0.15615293383598328, "learning_rate": 0.002, "loss": 2.5615, "step": 215950 }, { "epoch": 0.43024034170597986, "grad_norm": 0.20181550085544586, "learning_rate": 0.002, "loss": 2.5695, "step": 215960 }, { "epoch": 0.43026026392961875, "grad_norm": 0.1811036765575409, "learning_rate": 0.002, "loss": 2.5653, "step": 215970 }, { "epoch": 0.4302801861532577, "grad_norm": 0.17419452965259552, "learning_rate": 0.002, "loss": 2.5563, "step": 215980 }, { "epoch": 0.4303001083768966, "grad_norm": 0.15600110590457916, "learning_rate": 0.002, "loss": 2.5617, "step": 215990 }, { "epoch": 0.43032003060053553, "grad_norm": 0.16957935690879822, "learning_rate": 0.002, "loss": 2.5461, "step": 216000 }, { "epoch": 0.4303399528241744, "grad_norm": 0.17246995866298676, "learning_rate": 0.002, "loss": 2.5425, "step": 216010 }, { "epoch": 0.4303598750478133, "grad_norm": 0.14030888676643372, "learning_rate": 0.002, "loss": 2.5506, "step": 216020 }, { "epoch": 0.43037979727145226, "grad_norm": 0.2212955206632614, "learning_rate": 0.002, "loss": 2.5658, "step": 216030 }, { "epoch": 0.43039971949509115, "grad_norm": 0.19174790382385254, "learning_rate": 0.002, "loss": 2.5566, "step": 216040 }, { "epoch": 0.4304196417187301, "grad_norm": 0.13477501273155212, "learning_rate": 0.002, "loss": 2.5701, "step": 216050 }, { "epoch": 0.430439563942369, "grad_norm": 0.21558605134487152, "learning_rate": 0.002, "loss": 2.545, "step": 216060 }, { "epoch": 0.43045948616600793, "grad_norm": 0.15763264894485474, "learning_rate": 0.002, "loss": 2.5525, "step": 216070 }, { "epoch": 0.4304794083896468, "grad_norm": 0.14971716701984406, "learning_rate": 0.002, "loss": 2.5566, "step": 216080 }, { "epoch": 0.4304993306132857, "grad_norm": 0.1568630188703537, "learning_rate": 0.002, "loss": 2.5596, "step": 216090 }, { "epoch": 0.43051925283692466, "grad_norm": 0.15474645793437958, "learning_rate": 0.002, "loss": 2.5415, "step": 216100 }, { "epoch": 0.43053917506056355, "grad_norm": 0.1616683006286621, "learning_rate": 0.002, "loss": 2.566, "step": 216110 }, { "epoch": 0.4305590972842025, "grad_norm": 0.14401380717754364, "learning_rate": 0.002, "loss": 2.5542, "step": 216120 }, { "epoch": 0.4305790195078414, "grad_norm": 0.1711409091949463, "learning_rate": 0.002, "loss": 2.5689, "step": 216130 }, { "epoch": 0.4305989417314803, "grad_norm": 0.17658698558807373, "learning_rate": 0.002, "loss": 2.5495, "step": 216140 }, { "epoch": 0.4306188639551192, "grad_norm": 0.1640537977218628, "learning_rate": 0.002, "loss": 2.5569, "step": 216150 }, { "epoch": 0.4306387861787581, "grad_norm": 0.15499381721019745, "learning_rate": 0.002, "loss": 2.5656, "step": 216160 }, { "epoch": 0.43065870840239706, "grad_norm": 0.18320198357105255, "learning_rate": 0.002, "loss": 2.5668, "step": 216170 }, { "epoch": 0.43067863062603595, "grad_norm": 0.16655299067497253, "learning_rate": 0.002, "loss": 2.5576, "step": 216180 }, { "epoch": 0.43069855284967484, "grad_norm": 0.17479552328586578, "learning_rate": 0.002, "loss": 2.5544, "step": 216190 }, { "epoch": 0.4307184750733138, "grad_norm": 0.15583927929401398, "learning_rate": 0.002, "loss": 2.5716, "step": 216200 }, { "epoch": 0.4307383972969527, "grad_norm": 0.1604304015636444, "learning_rate": 0.002, "loss": 2.5573, "step": 216210 }, { "epoch": 0.4307583195205916, "grad_norm": 0.18989762663841248, "learning_rate": 0.002, "loss": 2.5658, "step": 216220 }, { "epoch": 0.4307782417442305, "grad_norm": 0.15035350620746613, "learning_rate": 0.002, "loss": 2.5609, "step": 216230 }, { "epoch": 0.43079816396786946, "grad_norm": 0.21530397236347198, "learning_rate": 0.002, "loss": 2.5609, "step": 216240 }, { "epoch": 0.43081808619150835, "grad_norm": 0.17032986879348755, "learning_rate": 0.002, "loss": 2.5646, "step": 216250 }, { "epoch": 0.43083800841514724, "grad_norm": 0.15024401247501373, "learning_rate": 0.002, "loss": 2.5492, "step": 216260 }, { "epoch": 0.4308579306387862, "grad_norm": 0.2031233310699463, "learning_rate": 0.002, "loss": 2.566, "step": 216270 }, { "epoch": 0.4308778528624251, "grad_norm": 0.14514097571372986, "learning_rate": 0.002, "loss": 2.5608, "step": 216280 }, { "epoch": 0.430897775086064, "grad_norm": 0.16674792766571045, "learning_rate": 0.002, "loss": 2.5574, "step": 216290 }, { "epoch": 0.4309176973097029, "grad_norm": 0.16133597493171692, "learning_rate": 0.002, "loss": 2.5659, "step": 216300 }, { "epoch": 0.4309376195333418, "grad_norm": 0.14762131869792938, "learning_rate": 0.002, "loss": 2.5602, "step": 216310 }, { "epoch": 0.43095754175698076, "grad_norm": 0.1977129429578781, "learning_rate": 0.002, "loss": 2.5552, "step": 216320 }, { "epoch": 0.43097746398061965, "grad_norm": 0.1728532761335373, "learning_rate": 0.002, "loss": 2.5673, "step": 216330 }, { "epoch": 0.4309973862042586, "grad_norm": 0.19919870793819427, "learning_rate": 0.002, "loss": 2.5491, "step": 216340 }, { "epoch": 0.4310173084278975, "grad_norm": 0.1839684545993805, "learning_rate": 0.002, "loss": 2.5581, "step": 216350 }, { "epoch": 0.43103723065153643, "grad_norm": 0.18522556126117706, "learning_rate": 0.002, "loss": 2.5581, "step": 216360 }, { "epoch": 0.4310571528751753, "grad_norm": 0.15275397896766663, "learning_rate": 0.002, "loss": 2.5601, "step": 216370 }, { "epoch": 0.4310770750988142, "grad_norm": 0.1631932556629181, "learning_rate": 0.002, "loss": 2.5541, "step": 216380 }, { "epoch": 0.43109699732245316, "grad_norm": 0.18860436975955963, "learning_rate": 0.002, "loss": 2.5604, "step": 216390 }, { "epoch": 0.43111691954609205, "grad_norm": 0.18497006595134735, "learning_rate": 0.002, "loss": 2.5602, "step": 216400 }, { "epoch": 0.431136841769731, "grad_norm": 0.14147880673408508, "learning_rate": 0.002, "loss": 2.5563, "step": 216410 }, { "epoch": 0.4311567639933699, "grad_norm": 0.17393523454666138, "learning_rate": 0.002, "loss": 2.5691, "step": 216420 }, { "epoch": 0.4311766862170088, "grad_norm": 0.17135582864284515, "learning_rate": 0.002, "loss": 2.5613, "step": 216430 }, { "epoch": 0.4311966084406477, "grad_norm": 0.184386745095253, "learning_rate": 0.002, "loss": 2.564, "step": 216440 }, { "epoch": 0.4312165306642866, "grad_norm": 0.1801672875881195, "learning_rate": 0.002, "loss": 2.5511, "step": 216450 }, { "epoch": 0.43123645288792556, "grad_norm": 0.17709048092365265, "learning_rate": 0.002, "loss": 2.5587, "step": 216460 }, { "epoch": 0.43125637511156445, "grad_norm": 0.18875080347061157, "learning_rate": 0.002, "loss": 2.555, "step": 216470 }, { "epoch": 0.43127629733520334, "grad_norm": 0.18519863486289978, "learning_rate": 0.002, "loss": 2.5561, "step": 216480 }, { "epoch": 0.4312962195588423, "grad_norm": 0.18335437774658203, "learning_rate": 0.002, "loss": 2.5604, "step": 216490 }, { "epoch": 0.4313161417824812, "grad_norm": 0.18763768672943115, "learning_rate": 0.002, "loss": 2.5506, "step": 216500 }, { "epoch": 0.4313360640061201, "grad_norm": 0.17837630212306976, "learning_rate": 0.002, "loss": 2.5621, "step": 216510 }, { "epoch": 0.431355986229759, "grad_norm": 0.17131757736206055, "learning_rate": 0.002, "loss": 2.5533, "step": 216520 }, { "epoch": 0.43137590845339796, "grad_norm": 0.1533605009317398, "learning_rate": 0.002, "loss": 2.5537, "step": 216530 }, { "epoch": 0.43139583067703685, "grad_norm": 0.16480496525764465, "learning_rate": 0.002, "loss": 2.5539, "step": 216540 }, { "epoch": 0.43141575290067574, "grad_norm": 0.17801691591739655, "learning_rate": 0.002, "loss": 2.5505, "step": 216550 }, { "epoch": 0.4314356751243147, "grad_norm": 0.18000967800617218, "learning_rate": 0.002, "loss": 2.5603, "step": 216560 }, { "epoch": 0.4314555973479536, "grad_norm": 0.15567627549171448, "learning_rate": 0.002, "loss": 2.5317, "step": 216570 }, { "epoch": 0.4314755195715925, "grad_norm": 0.17716491222381592, "learning_rate": 0.002, "loss": 2.5553, "step": 216580 }, { "epoch": 0.4314954417952314, "grad_norm": 0.22924920916557312, "learning_rate": 0.002, "loss": 2.5497, "step": 216590 }, { "epoch": 0.4315153640188703, "grad_norm": 0.20806175470352173, "learning_rate": 0.002, "loss": 2.5667, "step": 216600 }, { "epoch": 0.43153528624250925, "grad_norm": 0.15625673532485962, "learning_rate": 0.002, "loss": 2.5411, "step": 216610 }, { "epoch": 0.43155520846614814, "grad_norm": 0.15329548716545105, "learning_rate": 0.002, "loss": 2.5573, "step": 216620 }, { "epoch": 0.4315751306897871, "grad_norm": 0.18170884251594543, "learning_rate": 0.002, "loss": 2.559, "step": 216630 }, { "epoch": 0.431595052913426, "grad_norm": 0.19511498510837555, "learning_rate": 0.002, "loss": 2.5667, "step": 216640 }, { "epoch": 0.43161497513706487, "grad_norm": 0.1452149748802185, "learning_rate": 0.002, "loss": 2.5562, "step": 216650 }, { "epoch": 0.4316348973607038, "grad_norm": 0.17606483399868011, "learning_rate": 0.002, "loss": 2.5626, "step": 216660 }, { "epoch": 0.4316548195843427, "grad_norm": 0.13976064324378967, "learning_rate": 0.002, "loss": 2.558, "step": 216670 }, { "epoch": 0.43167474180798165, "grad_norm": 0.1998063176870346, "learning_rate": 0.002, "loss": 2.5466, "step": 216680 }, { "epoch": 0.43169466403162055, "grad_norm": 0.15839114785194397, "learning_rate": 0.002, "loss": 2.5465, "step": 216690 }, { "epoch": 0.4317145862552595, "grad_norm": 0.17133411765098572, "learning_rate": 0.002, "loss": 2.5798, "step": 216700 }, { "epoch": 0.4317345084788984, "grad_norm": 0.2031690776348114, "learning_rate": 0.002, "loss": 2.5643, "step": 216710 }, { "epoch": 0.4317544307025373, "grad_norm": 0.1448117047548294, "learning_rate": 0.002, "loss": 2.5648, "step": 216720 }, { "epoch": 0.4317743529261762, "grad_norm": 0.15272249281406403, "learning_rate": 0.002, "loss": 2.5587, "step": 216730 }, { "epoch": 0.4317942751498151, "grad_norm": 0.35538122057914734, "learning_rate": 0.002, "loss": 2.5614, "step": 216740 }, { "epoch": 0.43181419737345406, "grad_norm": 0.1729462742805481, "learning_rate": 0.002, "loss": 2.5632, "step": 216750 }, { "epoch": 0.43183411959709295, "grad_norm": 0.16583450138568878, "learning_rate": 0.002, "loss": 2.5781, "step": 216760 }, { "epoch": 0.43185404182073184, "grad_norm": 0.16751064360141754, "learning_rate": 0.002, "loss": 2.5762, "step": 216770 }, { "epoch": 0.4318739640443708, "grad_norm": 0.17040300369262695, "learning_rate": 0.002, "loss": 2.5727, "step": 216780 }, { "epoch": 0.4318938862680097, "grad_norm": 0.16005651652812958, "learning_rate": 0.002, "loss": 2.5529, "step": 216790 }, { "epoch": 0.4319138084916486, "grad_norm": 0.18883487582206726, "learning_rate": 0.002, "loss": 2.5536, "step": 216800 }, { "epoch": 0.4319337307152875, "grad_norm": 0.18640747666358948, "learning_rate": 0.002, "loss": 2.5487, "step": 216810 }, { "epoch": 0.43195365293892646, "grad_norm": 0.12901955842971802, "learning_rate": 0.002, "loss": 2.5507, "step": 216820 }, { "epoch": 0.43197357516256535, "grad_norm": 0.2086113542318344, "learning_rate": 0.002, "loss": 2.568, "step": 216830 }, { "epoch": 0.43199349738620424, "grad_norm": 0.16149118542671204, "learning_rate": 0.002, "loss": 2.569, "step": 216840 }, { "epoch": 0.4320134196098432, "grad_norm": 0.16580937802791595, "learning_rate": 0.002, "loss": 2.5567, "step": 216850 }, { "epoch": 0.4320333418334821, "grad_norm": 0.1770785003900528, "learning_rate": 0.002, "loss": 2.5636, "step": 216860 }, { "epoch": 0.432053264057121, "grad_norm": 0.16087624430656433, "learning_rate": 0.002, "loss": 2.5602, "step": 216870 }, { "epoch": 0.4320731862807599, "grad_norm": 0.18195559084415436, "learning_rate": 0.002, "loss": 2.5499, "step": 216880 }, { "epoch": 0.4320931085043988, "grad_norm": 0.1525050699710846, "learning_rate": 0.002, "loss": 2.5599, "step": 216890 }, { "epoch": 0.43211303072803775, "grad_norm": 0.16648830473423004, "learning_rate": 0.002, "loss": 2.5712, "step": 216900 }, { "epoch": 0.43213295295167664, "grad_norm": 0.16154451668262482, "learning_rate": 0.002, "loss": 2.5635, "step": 216910 }, { "epoch": 0.4321528751753156, "grad_norm": 0.1824614703655243, "learning_rate": 0.002, "loss": 2.5572, "step": 216920 }, { "epoch": 0.4321727973989545, "grad_norm": 0.14729568362236023, "learning_rate": 0.002, "loss": 2.5601, "step": 216930 }, { "epoch": 0.43219271962259337, "grad_norm": 0.16338570415973663, "learning_rate": 0.002, "loss": 2.5702, "step": 216940 }, { "epoch": 0.4322126418462323, "grad_norm": 0.20565901696681976, "learning_rate": 0.002, "loss": 2.5632, "step": 216950 }, { "epoch": 0.4322325640698712, "grad_norm": 0.1786765158176422, "learning_rate": 0.002, "loss": 2.5648, "step": 216960 }, { "epoch": 0.43225248629351015, "grad_norm": 0.1558249443769455, "learning_rate": 0.002, "loss": 2.5439, "step": 216970 }, { "epoch": 0.43227240851714904, "grad_norm": 0.1527017503976822, "learning_rate": 0.002, "loss": 2.5658, "step": 216980 }, { "epoch": 0.432292330740788, "grad_norm": 0.19639551639556885, "learning_rate": 0.002, "loss": 2.5495, "step": 216990 }, { "epoch": 0.4323122529644269, "grad_norm": 0.15086115896701813, "learning_rate": 0.002, "loss": 2.5635, "step": 217000 }, { "epoch": 0.43233217518806577, "grad_norm": 0.15221406519412994, "learning_rate": 0.002, "loss": 2.5514, "step": 217010 }, { "epoch": 0.4323520974117047, "grad_norm": 0.1782096028327942, "learning_rate": 0.002, "loss": 2.5521, "step": 217020 }, { "epoch": 0.4323720196353436, "grad_norm": 0.1746370643377304, "learning_rate": 0.002, "loss": 2.564, "step": 217030 }, { "epoch": 0.43239194185898255, "grad_norm": 0.19385284185409546, "learning_rate": 0.002, "loss": 2.5554, "step": 217040 }, { "epoch": 0.43241186408262144, "grad_norm": 0.16425485908985138, "learning_rate": 0.002, "loss": 2.5659, "step": 217050 }, { "epoch": 0.43243178630626034, "grad_norm": 0.2269791215658188, "learning_rate": 0.002, "loss": 2.5515, "step": 217060 }, { "epoch": 0.4324517085298993, "grad_norm": 0.15482831001281738, "learning_rate": 0.002, "loss": 2.5516, "step": 217070 }, { "epoch": 0.43247163075353817, "grad_norm": 0.20739394426345825, "learning_rate": 0.002, "loss": 2.5489, "step": 217080 }, { "epoch": 0.4324915529771771, "grad_norm": 0.20330572128295898, "learning_rate": 0.002, "loss": 2.5663, "step": 217090 }, { "epoch": 0.432511475200816, "grad_norm": 0.194534033536911, "learning_rate": 0.002, "loss": 2.5582, "step": 217100 }, { "epoch": 0.43253139742445496, "grad_norm": 0.15088428556919098, "learning_rate": 0.002, "loss": 2.5647, "step": 217110 }, { "epoch": 0.43255131964809385, "grad_norm": 0.17031791806221008, "learning_rate": 0.002, "loss": 2.5548, "step": 217120 }, { "epoch": 0.43257124187173274, "grad_norm": 0.17930792272090912, "learning_rate": 0.002, "loss": 2.5476, "step": 217130 }, { "epoch": 0.4325911640953717, "grad_norm": 0.1658896654844284, "learning_rate": 0.002, "loss": 2.5616, "step": 217140 }, { "epoch": 0.4326110863190106, "grad_norm": 0.23470991849899292, "learning_rate": 0.002, "loss": 2.554, "step": 217150 }, { "epoch": 0.4326310085426495, "grad_norm": 0.18321025371551514, "learning_rate": 0.002, "loss": 2.5589, "step": 217160 }, { "epoch": 0.4326509307662884, "grad_norm": 0.15934045612812042, "learning_rate": 0.002, "loss": 2.552, "step": 217170 }, { "epoch": 0.4326708529899273, "grad_norm": 0.18235664069652557, "learning_rate": 0.002, "loss": 2.5643, "step": 217180 }, { "epoch": 0.43269077521356625, "grad_norm": 0.15176016092300415, "learning_rate": 0.002, "loss": 2.5572, "step": 217190 }, { "epoch": 0.43271069743720514, "grad_norm": 0.17353740334510803, "learning_rate": 0.002, "loss": 2.5803, "step": 217200 }, { "epoch": 0.4327306196608441, "grad_norm": 0.16206426918506622, "learning_rate": 0.002, "loss": 2.558, "step": 217210 }, { "epoch": 0.432750541884483, "grad_norm": 0.19061163067817688, "learning_rate": 0.002, "loss": 2.5638, "step": 217220 }, { "epoch": 0.43277046410812187, "grad_norm": 0.16117335855960846, "learning_rate": 0.002, "loss": 2.558, "step": 217230 }, { "epoch": 0.4327903863317608, "grad_norm": 0.2335789054632187, "learning_rate": 0.002, "loss": 2.5681, "step": 217240 }, { "epoch": 0.4328103085553997, "grad_norm": 0.1483955979347229, "learning_rate": 0.002, "loss": 2.5467, "step": 217250 }, { "epoch": 0.43283023077903865, "grad_norm": 0.1547776311635971, "learning_rate": 0.002, "loss": 2.5583, "step": 217260 }, { "epoch": 0.43285015300267754, "grad_norm": 0.18113139271736145, "learning_rate": 0.002, "loss": 2.5606, "step": 217270 }, { "epoch": 0.4328700752263165, "grad_norm": 0.16884136199951172, "learning_rate": 0.002, "loss": 2.5623, "step": 217280 }, { "epoch": 0.4328899974499554, "grad_norm": 0.1649954468011856, "learning_rate": 0.002, "loss": 2.5601, "step": 217290 }, { "epoch": 0.43290991967359427, "grad_norm": 0.15370428562164307, "learning_rate": 0.002, "loss": 2.561, "step": 217300 }, { "epoch": 0.4329298418972332, "grad_norm": 0.18115775287151337, "learning_rate": 0.002, "loss": 2.5635, "step": 217310 }, { "epoch": 0.4329497641208721, "grad_norm": 0.19717463850975037, "learning_rate": 0.002, "loss": 2.5661, "step": 217320 }, { "epoch": 0.43296968634451105, "grad_norm": 0.17634868621826172, "learning_rate": 0.002, "loss": 2.5539, "step": 217330 }, { "epoch": 0.43298960856814994, "grad_norm": 0.16346505284309387, "learning_rate": 0.002, "loss": 2.5511, "step": 217340 }, { "epoch": 0.43300953079178883, "grad_norm": 0.1744609773159027, "learning_rate": 0.002, "loss": 2.5707, "step": 217350 }, { "epoch": 0.4330294530154278, "grad_norm": 0.15514089167118073, "learning_rate": 0.002, "loss": 2.5479, "step": 217360 }, { "epoch": 0.43304937523906667, "grad_norm": 0.1709384173154831, "learning_rate": 0.002, "loss": 2.5538, "step": 217370 }, { "epoch": 0.4330692974627056, "grad_norm": 0.1629827469587326, "learning_rate": 0.002, "loss": 2.554, "step": 217380 }, { "epoch": 0.4330892196863445, "grad_norm": 0.16259628534317017, "learning_rate": 0.002, "loss": 2.5505, "step": 217390 }, { "epoch": 0.4331091419099834, "grad_norm": 0.16286145150661469, "learning_rate": 0.002, "loss": 2.5629, "step": 217400 }, { "epoch": 0.43312906413362234, "grad_norm": 0.15223057568073273, "learning_rate": 0.002, "loss": 2.5526, "step": 217410 }, { "epoch": 0.43314898635726123, "grad_norm": 0.22246652841567993, "learning_rate": 0.002, "loss": 2.5472, "step": 217420 }, { "epoch": 0.4331689085809002, "grad_norm": 0.15602748095989227, "learning_rate": 0.002, "loss": 2.563, "step": 217430 }, { "epoch": 0.43318883080453907, "grad_norm": 0.17152522504329681, "learning_rate": 0.002, "loss": 2.5793, "step": 217440 }, { "epoch": 0.433208753028178, "grad_norm": 0.1673523187637329, "learning_rate": 0.002, "loss": 2.5707, "step": 217450 }, { "epoch": 0.4332286752518169, "grad_norm": 0.1410014033317566, "learning_rate": 0.002, "loss": 2.5481, "step": 217460 }, { "epoch": 0.4332485974754558, "grad_norm": 0.16126853227615356, "learning_rate": 0.002, "loss": 2.5544, "step": 217470 }, { "epoch": 0.43326851969909475, "grad_norm": 0.1563136726617813, "learning_rate": 0.002, "loss": 2.561, "step": 217480 }, { "epoch": 0.43328844192273364, "grad_norm": 0.15957026183605194, "learning_rate": 0.002, "loss": 2.5677, "step": 217490 }, { "epoch": 0.4333083641463726, "grad_norm": 0.16323629021644592, "learning_rate": 0.002, "loss": 2.5603, "step": 217500 }, { "epoch": 0.4333282863700115, "grad_norm": 0.16877323389053345, "learning_rate": 0.002, "loss": 2.5555, "step": 217510 }, { "epoch": 0.43334820859365036, "grad_norm": 0.19312210381031036, "learning_rate": 0.002, "loss": 2.5606, "step": 217520 }, { "epoch": 0.4333681308172893, "grad_norm": 0.15842066705226898, "learning_rate": 0.002, "loss": 2.5564, "step": 217530 }, { "epoch": 0.4333880530409282, "grad_norm": 0.15806874632835388, "learning_rate": 0.002, "loss": 2.5754, "step": 217540 }, { "epoch": 0.43340797526456715, "grad_norm": 0.17298012971878052, "learning_rate": 0.002, "loss": 2.5627, "step": 217550 }, { "epoch": 0.43342789748820604, "grad_norm": 0.1771918386220932, "learning_rate": 0.002, "loss": 2.5554, "step": 217560 }, { "epoch": 0.433447819711845, "grad_norm": 0.15119808912277222, "learning_rate": 0.002, "loss": 2.564, "step": 217570 }, { "epoch": 0.4334677419354839, "grad_norm": 0.1775839924812317, "learning_rate": 0.002, "loss": 2.5555, "step": 217580 }, { "epoch": 0.43348766415912277, "grad_norm": 0.14002592861652374, "learning_rate": 0.002, "loss": 2.5833, "step": 217590 }, { "epoch": 0.4335075863827617, "grad_norm": 0.15838737785816193, "learning_rate": 0.002, "loss": 2.5604, "step": 217600 }, { "epoch": 0.4335275086064006, "grad_norm": 0.1972854733467102, "learning_rate": 0.002, "loss": 2.5721, "step": 217610 }, { "epoch": 0.43354743083003955, "grad_norm": 0.165352925658226, "learning_rate": 0.002, "loss": 2.5551, "step": 217620 }, { "epoch": 0.43356735305367844, "grad_norm": 0.1447458118200302, "learning_rate": 0.002, "loss": 2.5531, "step": 217630 }, { "epoch": 0.43358727527731733, "grad_norm": 0.15330424904823303, "learning_rate": 0.002, "loss": 2.5629, "step": 217640 }, { "epoch": 0.4336071975009563, "grad_norm": 0.1679094284772873, "learning_rate": 0.002, "loss": 2.5641, "step": 217650 }, { "epoch": 0.43362711972459517, "grad_norm": 0.15532849729061127, "learning_rate": 0.002, "loss": 2.5483, "step": 217660 }, { "epoch": 0.4336470419482341, "grad_norm": 0.1635379046201706, "learning_rate": 0.002, "loss": 2.5566, "step": 217670 }, { "epoch": 0.433666964171873, "grad_norm": 0.16071945428848267, "learning_rate": 0.002, "loss": 2.5632, "step": 217680 }, { "epoch": 0.4336868863955119, "grad_norm": 0.17721927165985107, "learning_rate": 0.002, "loss": 2.5582, "step": 217690 }, { "epoch": 0.43370680861915084, "grad_norm": 0.15359021723270416, "learning_rate": 0.002, "loss": 2.5665, "step": 217700 }, { "epoch": 0.43372673084278973, "grad_norm": 0.15814624726772308, "learning_rate": 0.002, "loss": 2.5655, "step": 217710 }, { "epoch": 0.4337466530664287, "grad_norm": 0.17975519597530365, "learning_rate": 0.002, "loss": 2.5476, "step": 217720 }, { "epoch": 0.43376657529006757, "grad_norm": 0.1552865356206894, "learning_rate": 0.002, "loss": 2.5697, "step": 217730 }, { "epoch": 0.4337864975137065, "grad_norm": 0.1880425363779068, "learning_rate": 0.002, "loss": 2.5649, "step": 217740 }, { "epoch": 0.4338064197373454, "grad_norm": 0.1657515913248062, "learning_rate": 0.002, "loss": 2.5527, "step": 217750 }, { "epoch": 0.4338263419609843, "grad_norm": 0.16221746802330017, "learning_rate": 0.002, "loss": 2.5531, "step": 217760 }, { "epoch": 0.43384626418462324, "grad_norm": 0.16082245111465454, "learning_rate": 0.002, "loss": 2.5563, "step": 217770 }, { "epoch": 0.43386618640826213, "grad_norm": 0.18376348912715912, "learning_rate": 0.002, "loss": 2.554, "step": 217780 }, { "epoch": 0.4338861086319011, "grad_norm": 0.2070908099412918, "learning_rate": 0.002, "loss": 2.562, "step": 217790 }, { "epoch": 0.43390603085553997, "grad_norm": 0.15284782648086548, "learning_rate": 0.002, "loss": 2.576, "step": 217800 }, { "epoch": 0.43392595307917886, "grad_norm": 0.16090503334999084, "learning_rate": 0.002, "loss": 2.5439, "step": 217810 }, { "epoch": 0.4339458753028178, "grad_norm": 0.16175459325313568, "learning_rate": 0.002, "loss": 2.5568, "step": 217820 }, { "epoch": 0.4339657975264567, "grad_norm": 0.1589672565460205, "learning_rate": 0.002, "loss": 2.5553, "step": 217830 }, { "epoch": 0.43398571975009564, "grad_norm": 0.19224068522453308, "learning_rate": 0.002, "loss": 2.5607, "step": 217840 }, { "epoch": 0.43400564197373454, "grad_norm": 0.18374831974506378, "learning_rate": 0.002, "loss": 2.5544, "step": 217850 }, { "epoch": 0.4340255641973735, "grad_norm": 0.1584290713071823, "learning_rate": 0.002, "loss": 2.5467, "step": 217860 }, { "epoch": 0.43404548642101237, "grad_norm": 0.18620970845222473, "learning_rate": 0.002, "loss": 2.56, "step": 217870 }, { "epoch": 0.43406540864465126, "grad_norm": 0.15980176627635956, "learning_rate": 0.002, "loss": 2.564, "step": 217880 }, { "epoch": 0.4340853308682902, "grad_norm": 0.1906077116727829, "learning_rate": 0.002, "loss": 2.5501, "step": 217890 }, { "epoch": 0.4341052530919291, "grad_norm": 0.1625310778617859, "learning_rate": 0.002, "loss": 2.5482, "step": 217900 }, { "epoch": 0.43412517531556805, "grad_norm": 0.1411139816045761, "learning_rate": 0.002, "loss": 2.5548, "step": 217910 }, { "epoch": 0.43414509753920694, "grad_norm": 0.1819046586751938, "learning_rate": 0.002, "loss": 2.5616, "step": 217920 }, { "epoch": 0.4341650197628458, "grad_norm": 0.1911439299583435, "learning_rate": 0.002, "loss": 2.5616, "step": 217930 }, { "epoch": 0.4341849419864848, "grad_norm": 0.14982447028160095, "learning_rate": 0.002, "loss": 2.5449, "step": 217940 }, { "epoch": 0.43420486421012366, "grad_norm": 0.1592216044664383, "learning_rate": 0.002, "loss": 2.5752, "step": 217950 }, { "epoch": 0.4342247864337626, "grad_norm": 0.1422802209854126, "learning_rate": 0.002, "loss": 2.5626, "step": 217960 }, { "epoch": 0.4342447086574015, "grad_norm": 0.17095449566841125, "learning_rate": 0.002, "loss": 2.5618, "step": 217970 }, { "epoch": 0.4342646308810404, "grad_norm": 0.17815448343753815, "learning_rate": 0.002, "loss": 2.5586, "step": 217980 }, { "epoch": 0.43428455310467934, "grad_norm": 0.13598448038101196, "learning_rate": 0.002, "loss": 2.5512, "step": 217990 }, { "epoch": 0.43430447532831823, "grad_norm": 0.16223788261413574, "learning_rate": 0.002, "loss": 2.5583, "step": 218000 }, { "epoch": 0.4343243975519572, "grad_norm": 0.15156614780426025, "learning_rate": 0.002, "loss": 2.5432, "step": 218010 }, { "epoch": 0.43434431977559607, "grad_norm": 0.14875206351280212, "learning_rate": 0.002, "loss": 2.5518, "step": 218020 }, { "epoch": 0.434364241999235, "grad_norm": 0.15790408849716187, "learning_rate": 0.002, "loss": 2.562, "step": 218030 }, { "epoch": 0.4343841642228739, "grad_norm": 0.1733882874250412, "learning_rate": 0.002, "loss": 2.5522, "step": 218040 }, { "epoch": 0.4344040864465128, "grad_norm": 0.16353446245193481, "learning_rate": 0.002, "loss": 2.5557, "step": 218050 }, { "epoch": 0.43442400867015174, "grad_norm": 0.17905403673648834, "learning_rate": 0.002, "loss": 2.5725, "step": 218060 }, { "epoch": 0.43444393089379063, "grad_norm": 0.18649886548519135, "learning_rate": 0.002, "loss": 2.5631, "step": 218070 }, { "epoch": 0.4344638531174296, "grad_norm": 0.17033295333385468, "learning_rate": 0.002, "loss": 2.5702, "step": 218080 }, { "epoch": 0.43448377534106847, "grad_norm": 0.16453635692596436, "learning_rate": 0.002, "loss": 2.5654, "step": 218090 }, { "epoch": 0.43450369756470736, "grad_norm": 0.17497733235359192, "learning_rate": 0.002, "loss": 2.5641, "step": 218100 }, { "epoch": 0.4345236197883463, "grad_norm": 0.18830661475658417, "learning_rate": 0.002, "loss": 2.5694, "step": 218110 }, { "epoch": 0.4345435420119852, "grad_norm": 0.17653369903564453, "learning_rate": 0.002, "loss": 2.5702, "step": 218120 }, { "epoch": 0.43456346423562414, "grad_norm": 0.19054701924324036, "learning_rate": 0.002, "loss": 2.5677, "step": 218130 }, { "epoch": 0.43458338645926303, "grad_norm": 0.15389886498451233, "learning_rate": 0.002, "loss": 2.5523, "step": 218140 }, { "epoch": 0.4346033086829019, "grad_norm": 0.17572425305843353, "learning_rate": 0.002, "loss": 2.5531, "step": 218150 }, { "epoch": 0.43462323090654087, "grad_norm": 0.16429679095745087, "learning_rate": 0.002, "loss": 2.5503, "step": 218160 }, { "epoch": 0.43464315313017976, "grad_norm": 0.1643153876066208, "learning_rate": 0.002, "loss": 2.556, "step": 218170 }, { "epoch": 0.4346630753538187, "grad_norm": 0.17587974667549133, "learning_rate": 0.002, "loss": 2.5593, "step": 218180 }, { "epoch": 0.4346829975774576, "grad_norm": 0.13456182181835175, "learning_rate": 0.002, "loss": 2.5609, "step": 218190 }, { "epoch": 0.43470291980109654, "grad_norm": 0.17429828643798828, "learning_rate": 0.002, "loss": 2.5519, "step": 218200 }, { "epoch": 0.43472284202473543, "grad_norm": 0.1790657937526703, "learning_rate": 0.002, "loss": 2.5652, "step": 218210 }, { "epoch": 0.4347427642483743, "grad_norm": 0.18920691311359406, "learning_rate": 0.002, "loss": 2.5562, "step": 218220 }, { "epoch": 0.43476268647201327, "grad_norm": 0.16127490997314453, "learning_rate": 0.002, "loss": 2.5488, "step": 218230 }, { "epoch": 0.43478260869565216, "grad_norm": 0.16672301292419434, "learning_rate": 0.002, "loss": 2.5513, "step": 218240 }, { "epoch": 0.4348025309192911, "grad_norm": 0.17334915697574615, "learning_rate": 0.002, "loss": 2.5505, "step": 218250 }, { "epoch": 0.43482245314293, "grad_norm": 0.18699420988559723, "learning_rate": 0.002, "loss": 2.554, "step": 218260 }, { "epoch": 0.4348423753665689, "grad_norm": 0.17171739041805267, "learning_rate": 0.002, "loss": 2.5505, "step": 218270 }, { "epoch": 0.43486229759020784, "grad_norm": 0.16118396818637848, "learning_rate": 0.002, "loss": 2.561, "step": 218280 }, { "epoch": 0.4348822198138467, "grad_norm": 0.14457325637340546, "learning_rate": 0.002, "loss": 2.5529, "step": 218290 }, { "epoch": 0.4349021420374857, "grad_norm": 0.1977241039276123, "learning_rate": 0.002, "loss": 2.5503, "step": 218300 }, { "epoch": 0.43492206426112456, "grad_norm": 0.17011311650276184, "learning_rate": 0.002, "loss": 2.5618, "step": 218310 }, { "epoch": 0.4349419864847635, "grad_norm": 0.16642896831035614, "learning_rate": 0.002, "loss": 2.5577, "step": 218320 }, { "epoch": 0.4349619087084024, "grad_norm": 0.1954270750284195, "learning_rate": 0.002, "loss": 2.5568, "step": 218330 }, { "epoch": 0.4349818309320413, "grad_norm": 0.275518000125885, "learning_rate": 0.002, "loss": 2.5673, "step": 218340 }, { "epoch": 0.43500175315568024, "grad_norm": 0.19930629432201385, "learning_rate": 0.002, "loss": 2.5555, "step": 218350 }, { "epoch": 0.43502167537931913, "grad_norm": 0.1607036292552948, "learning_rate": 0.002, "loss": 2.5573, "step": 218360 }, { "epoch": 0.4350415976029581, "grad_norm": 0.17907452583312988, "learning_rate": 0.002, "loss": 2.5524, "step": 218370 }, { "epoch": 0.43506151982659697, "grad_norm": 0.16148880124092102, "learning_rate": 0.002, "loss": 2.5451, "step": 218380 }, { "epoch": 0.43508144205023586, "grad_norm": 0.19493046402931213, "learning_rate": 0.002, "loss": 2.5678, "step": 218390 }, { "epoch": 0.4351013642738748, "grad_norm": 0.17397692799568176, "learning_rate": 0.002, "loss": 2.5641, "step": 218400 }, { "epoch": 0.4351212864975137, "grad_norm": 0.14960826933383942, "learning_rate": 0.002, "loss": 2.5601, "step": 218410 }, { "epoch": 0.43514120872115264, "grad_norm": 0.1743331104516983, "learning_rate": 0.002, "loss": 2.5531, "step": 218420 }, { "epoch": 0.43516113094479153, "grad_norm": 0.15768840909004211, "learning_rate": 0.002, "loss": 2.5571, "step": 218430 }, { "epoch": 0.4351810531684304, "grad_norm": 0.14737705886363983, "learning_rate": 0.002, "loss": 2.568, "step": 218440 }, { "epoch": 0.43520097539206937, "grad_norm": 0.14816680550575256, "learning_rate": 0.002, "loss": 2.5436, "step": 218450 }, { "epoch": 0.43522089761570826, "grad_norm": 0.18756191432476044, "learning_rate": 0.002, "loss": 2.5576, "step": 218460 }, { "epoch": 0.4352408198393472, "grad_norm": 0.1918901801109314, "learning_rate": 0.002, "loss": 2.5532, "step": 218470 }, { "epoch": 0.4352607420629861, "grad_norm": 0.17387130856513977, "learning_rate": 0.002, "loss": 2.54, "step": 218480 }, { "epoch": 0.43528066428662504, "grad_norm": 0.15688924491405487, "learning_rate": 0.002, "loss": 2.56, "step": 218490 }, { "epoch": 0.43530058651026393, "grad_norm": 0.1500186175107956, "learning_rate": 0.002, "loss": 2.5491, "step": 218500 }, { "epoch": 0.4353205087339028, "grad_norm": 0.21740178763866425, "learning_rate": 0.002, "loss": 2.5646, "step": 218510 }, { "epoch": 0.43534043095754177, "grad_norm": 0.15912337601184845, "learning_rate": 0.002, "loss": 2.5547, "step": 218520 }, { "epoch": 0.43536035318118066, "grad_norm": 0.16823285818099976, "learning_rate": 0.002, "loss": 2.5561, "step": 218530 }, { "epoch": 0.4353802754048196, "grad_norm": 0.19524718821048737, "learning_rate": 0.002, "loss": 2.553, "step": 218540 }, { "epoch": 0.4354001976284585, "grad_norm": 0.14472149312496185, "learning_rate": 0.002, "loss": 2.562, "step": 218550 }, { "epoch": 0.4354201198520974, "grad_norm": 0.18869692087173462, "learning_rate": 0.002, "loss": 2.5742, "step": 218560 }, { "epoch": 0.43544004207573633, "grad_norm": 0.16743558645248413, "learning_rate": 0.002, "loss": 2.57, "step": 218570 }, { "epoch": 0.4354599642993752, "grad_norm": 0.18608389794826508, "learning_rate": 0.002, "loss": 2.5626, "step": 218580 }, { "epoch": 0.43547988652301417, "grad_norm": 0.15977689623832703, "learning_rate": 0.002, "loss": 2.5697, "step": 218590 }, { "epoch": 0.43549980874665306, "grad_norm": 0.20192448794841766, "learning_rate": 0.002, "loss": 2.5599, "step": 218600 }, { "epoch": 0.435519730970292, "grad_norm": 0.15288425981998444, "learning_rate": 0.002, "loss": 2.5605, "step": 218610 }, { "epoch": 0.4355396531939309, "grad_norm": 0.18257461488246918, "learning_rate": 0.002, "loss": 2.5705, "step": 218620 }, { "epoch": 0.4355595754175698, "grad_norm": 0.15958046913146973, "learning_rate": 0.002, "loss": 2.5571, "step": 218630 }, { "epoch": 0.43557949764120873, "grad_norm": 0.1789177656173706, "learning_rate": 0.002, "loss": 2.5617, "step": 218640 }, { "epoch": 0.4355994198648476, "grad_norm": 0.14782698452472687, "learning_rate": 0.002, "loss": 2.5472, "step": 218650 }, { "epoch": 0.43561934208848657, "grad_norm": 0.18474915623664856, "learning_rate": 0.002, "loss": 2.5702, "step": 218660 }, { "epoch": 0.43563926431212546, "grad_norm": 0.14502474665641785, "learning_rate": 0.002, "loss": 2.5465, "step": 218670 }, { "epoch": 0.43565918653576435, "grad_norm": 0.15344439446926117, "learning_rate": 0.002, "loss": 2.564, "step": 218680 }, { "epoch": 0.4356791087594033, "grad_norm": 0.17884330451488495, "learning_rate": 0.002, "loss": 2.5488, "step": 218690 }, { "epoch": 0.4356990309830422, "grad_norm": 0.15610283613204956, "learning_rate": 0.002, "loss": 2.5654, "step": 218700 }, { "epoch": 0.43571895320668114, "grad_norm": 0.1772608757019043, "learning_rate": 0.002, "loss": 2.5518, "step": 218710 }, { "epoch": 0.43573887543032, "grad_norm": 0.16671574115753174, "learning_rate": 0.002, "loss": 2.5454, "step": 218720 }, { "epoch": 0.4357587976539589, "grad_norm": 0.1899748146533966, "learning_rate": 0.002, "loss": 2.5611, "step": 218730 }, { "epoch": 0.43577871987759786, "grad_norm": 0.19043570756912231, "learning_rate": 0.002, "loss": 2.558, "step": 218740 }, { "epoch": 0.43579864210123676, "grad_norm": 0.15972867608070374, "learning_rate": 0.002, "loss": 2.5514, "step": 218750 }, { "epoch": 0.4358185643248757, "grad_norm": 0.22986264526844025, "learning_rate": 0.002, "loss": 2.5692, "step": 218760 }, { "epoch": 0.4358384865485146, "grad_norm": 0.1783965677022934, "learning_rate": 0.002, "loss": 2.5593, "step": 218770 }, { "epoch": 0.43585840877215354, "grad_norm": 0.17517514526844025, "learning_rate": 0.002, "loss": 2.5484, "step": 218780 }, { "epoch": 0.43587833099579243, "grad_norm": 0.19712325930595398, "learning_rate": 0.002, "loss": 2.5744, "step": 218790 }, { "epoch": 0.4358982532194313, "grad_norm": 0.14808839559555054, "learning_rate": 0.002, "loss": 2.5498, "step": 218800 }, { "epoch": 0.43591817544307027, "grad_norm": 0.16346575319766998, "learning_rate": 0.002, "loss": 2.5348, "step": 218810 }, { "epoch": 0.43593809766670916, "grad_norm": 0.19423839449882507, "learning_rate": 0.002, "loss": 2.5617, "step": 218820 }, { "epoch": 0.4359580198903481, "grad_norm": 0.172640860080719, "learning_rate": 0.002, "loss": 2.562, "step": 218830 }, { "epoch": 0.435977942113987, "grad_norm": 0.160010427236557, "learning_rate": 0.002, "loss": 2.5565, "step": 218840 }, { "epoch": 0.4359978643376259, "grad_norm": 0.1671026051044464, "learning_rate": 0.002, "loss": 2.5611, "step": 218850 }, { "epoch": 0.43601778656126483, "grad_norm": 0.4492987096309662, "learning_rate": 0.002, "loss": 2.5542, "step": 218860 }, { "epoch": 0.4360377087849037, "grad_norm": 0.19748708605766296, "learning_rate": 0.002, "loss": 2.5575, "step": 218870 }, { "epoch": 0.43605763100854267, "grad_norm": 0.1454145312309265, "learning_rate": 0.002, "loss": 2.5668, "step": 218880 }, { "epoch": 0.43607755323218156, "grad_norm": 0.17423373460769653, "learning_rate": 0.002, "loss": 2.5653, "step": 218890 }, { "epoch": 0.43609747545582045, "grad_norm": 0.14663287997245789, "learning_rate": 0.002, "loss": 2.5624, "step": 218900 }, { "epoch": 0.4361173976794594, "grad_norm": 0.19449788331985474, "learning_rate": 0.002, "loss": 2.5686, "step": 218910 }, { "epoch": 0.4361373199030983, "grad_norm": 0.20583388209342957, "learning_rate": 0.002, "loss": 2.56, "step": 218920 }, { "epoch": 0.43615724212673723, "grad_norm": 0.16538043320178986, "learning_rate": 0.002, "loss": 2.5613, "step": 218930 }, { "epoch": 0.4361771643503761, "grad_norm": 0.1620977818965912, "learning_rate": 0.002, "loss": 2.5594, "step": 218940 }, { "epoch": 0.43619708657401507, "grad_norm": 0.2002328783273697, "learning_rate": 0.002, "loss": 2.5669, "step": 218950 }, { "epoch": 0.43621700879765396, "grad_norm": 0.15291495621204376, "learning_rate": 0.002, "loss": 2.563, "step": 218960 }, { "epoch": 0.43623693102129285, "grad_norm": 0.151100754737854, "learning_rate": 0.002, "loss": 2.5446, "step": 218970 }, { "epoch": 0.4362568532449318, "grad_norm": 0.16914838552474976, "learning_rate": 0.002, "loss": 2.5498, "step": 218980 }, { "epoch": 0.4362767754685707, "grad_norm": 0.15709298849105835, "learning_rate": 0.002, "loss": 2.5707, "step": 218990 }, { "epoch": 0.43629669769220963, "grad_norm": 0.1604171097278595, "learning_rate": 0.002, "loss": 2.5517, "step": 219000 }, { "epoch": 0.4363166199158485, "grad_norm": 0.1389351785182953, "learning_rate": 0.002, "loss": 2.5585, "step": 219010 }, { "epoch": 0.4363365421394874, "grad_norm": 0.17438150942325592, "learning_rate": 0.002, "loss": 2.5593, "step": 219020 }, { "epoch": 0.43635646436312636, "grad_norm": 0.23255732655525208, "learning_rate": 0.002, "loss": 2.5655, "step": 219030 }, { "epoch": 0.43637638658676525, "grad_norm": 0.17024675011634827, "learning_rate": 0.002, "loss": 2.5579, "step": 219040 }, { "epoch": 0.4363963088104042, "grad_norm": 0.20549039542675018, "learning_rate": 0.002, "loss": 2.5592, "step": 219050 }, { "epoch": 0.4364162310340431, "grad_norm": 0.17365215718746185, "learning_rate": 0.002, "loss": 2.5605, "step": 219060 }, { "epoch": 0.43643615325768204, "grad_norm": 0.1596255749464035, "learning_rate": 0.002, "loss": 2.5707, "step": 219070 }, { "epoch": 0.4364560754813209, "grad_norm": 0.14759960770606995, "learning_rate": 0.002, "loss": 2.5681, "step": 219080 }, { "epoch": 0.4364759977049598, "grad_norm": 0.14237169921398163, "learning_rate": 0.002, "loss": 2.5531, "step": 219090 }, { "epoch": 0.43649591992859876, "grad_norm": 0.2746083736419678, "learning_rate": 0.002, "loss": 2.5561, "step": 219100 }, { "epoch": 0.43651584215223765, "grad_norm": 0.15414756536483765, "learning_rate": 0.002, "loss": 2.5524, "step": 219110 }, { "epoch": 0.4365357643758766, "grad_norm": 0.17738941311836243, "learning_rate": 0.002, "loss": 2.5692, "step": 219120 }, { "epoch": 0.4365556865995155, "grad_norm": 0.15912647545337677, "learning_rate": 0.002, "loss": 2.5596, "step": 219130 }, { "epoch": 0.4365756088231544, "grad_norm": 0.1807912439107895, "learning_rate": 0.002, "loss": 2.5663, "step": 219140 }, { "epoch": 0.43659553104679333, "grad_norm": 0.20993557572364807, "learning_rate": 0.002, "loss": 2.5584, "step": 219150 }, { "epoch": 0.4366154532704322, "grad_norm": 0.1579313725233078, "learning_rate": 0.002, "loss": 2.5616, "step": 219160 }, { "epoch": 0.43663537549407117, "grad_norm": 0.15713068842887878, "learning_rate": 0.002, "loss": 2.5637, "step": 219170 }, { "epoch": 0.43665529771771006, "grad_norm": 0.1751096099615097, "learning_rate": 0.002, "loss": 2.5588, "step": 219180 }, { "epoch": 0.43667521994134895, "grad_norm": 0.20619995892047882, "learning_rate": 0.002, "loss": 2.5687, "step": 219190 }, { "epoch": 0.4366951421649879, "grad_norm": 0.16232340037822723, "learning_rate": 0.002, "loss": 2.5605, "step": 219200 }, { "epoch": 0.4367150643886268, "grad_norm": 0.15155284106731415, "learning_rate": 0.002, "loss": 2.5659, "step": 219210 }, { "epoch": 0.43673498661226573, "grad_norm": 0.1658710390329361, "learning_rate": 0.002, "loss": 2.5504, "step": 219220 }, { "epoch": 0.4367549088359046, "grad_norm": 0.16823984682559967, "learning_rate": 0.002, "loss": 2.5394, "step": 219230 }, { "epoch": 0.43677483105954357, "grad_norm": 0.15448804199695587, "learning_rate": 0.002, "loss": 2.5571, "step": 219240 }, { "epoch": 0.43679475328318246, "grad_norm": 0.1703099012374878, "learning_rate": 0.002, "loss": 2.5596, "step": 219250 }, { "epoch": 0.43681467550682135, "grad_norm": 0.18131773173809052, "learning_rate": 0.002, "loss": 2.5676, "step": 219260 }, { "epoch": 0.4368345977304603, "grad_norm": 0.16926701366901398, "learning_rate": 0.002, "loss": 2.5414, "step": 219270 }, { "epoch": 0.4368545199540992, "grad_norm": 0.17932234704494476, "learning_rate": 0.002, "loss": 2.5562, "step": 219280 }, { "epoch": 0.43687444217773813, "grad_norm": 0.17718788981437683, "learning_rate": 0.002, "loss": 2.5623, "step": 219290 }, { "epoch": 0.436894364401377, "grad_norm": 0.1764393448829651, "learning_rate": 0.002, "loss": 2.5671, "step": 219300 }, { "epoch": 0.4369142866250159, "grad_norm": 0.1696639358997345, "learning_rate": 0.002, "loss": 2.5562, "step": 219310 }, { "epoch": 0.43693420884865486, "grad_norm": 0.18210916221141815, "learning_rate": 0.002, "loss": 2.5603, "step": 219320 }, { "epoch": 0.43695413107229375, "grad_norm": 0.14540798962116241, "learning_rate": 0.002, "loss": 2.5469, "step": 219330 }, { "epoch": 0.4369740532959327, "grad_norm": 0.17410790920257568, "learning_rate": 0.002, "loss": 2.5534, "step": 219340 }, { "epoch": 0.4369939755195716, "grad_norm": 0.15019427239894867, "learning_rate": 0.002, "loss": 2.5461, "step": 219350 }, { "epoch": 0.43701389774321053, "grad_norm": 0.2026394158601761, "learning_rate": 0.002, "loss": 2.5713, "step": 219360 }, { "epoch": 0.4370338199668494, "grad_norm": 0.14781907200813293, "learning_rate": 0.002, "loss": 2.551, "step": 219370 }, { "epoch": 0.4370537421904883, "grad_norm": 0.16680912673473358, "learning_rate": 0.002, "loss": 2.568, "step": 219380 }, { "epoch": 0.43707366441412726, "grad_norm": 0.152220219373703, "learning_rate": 0.002, "loss": 2.5461, "step": 219390 }, { "epoch": 0.43709358663776615, "grad_norm": 0.181121364235878, "learning_rate": 0.002, "loss": 2.555, "step": 219400 }, { "epoch": 0.4371135088614051, "grad_norm": 0.151297464966774, "learning_rate": 0.002, "loss": 2.5629, "step": 219410 }, { "epoch": 0.437133431085044, "grad_norm": 0.19813567399978638, "learning_rate": 0.002, "loss": 2.5682, "step": 219420 }, { "epoch": 0.4371533533086829, "grad_norm": 0.18220502138137817, "learning_rate": 0.002, "loss": 2.5482, "step": 219430 }, { "epoch": 0.4371732755323218, "grad_norm": 0.16175803542137146, "learning_rate": 0.002, "loss": 2.5507, "step": 219440 }, { "epoch": 0.4371931977559607, "grad_norm": 0.16150082647800446, "learning_rate": 0.002, "loss": 2.5466, "step": 219450 }, { "epoch": 0.43721311997959966, "grad_norm": 0.1613379716873169, "learning_rate": 0.002, "loss": 2.5525, "step": 219460 }, { "epoch": 0.43723304220323855, "grad_norm": 0.17386838793754578, "learning_rate": 0.002, "loss": 2.55, "step": 219470 }, { "epoch": 0.43725296442687744, "grad_norm": 0.148654505610466, "learning_rate": 0.002, "loss": 2.5536, "step": 219480 }, { "epoch": 0.4372728866505164, "grad_norm": 0.1610032021999359, "learning_rate": 0.002, "loss": 2.5569, "step": 219490 }, { "epoch": 0.4372928088741553, "grad_norm": 0.16161277890205383, "learning_rate": 0.002, "loss": 2.5651, "step": 219500 }, { "epoch": 0.4373127310977942, "grad_norm": 0.16342519223690033, "learning_rate": 0.002, "loss": 2.5502, "step": 219510 }, { "epoch": 0.4373326533214331, "grad_norm": 0.1879425346851349, "learning_rate": 0.002, "loss": 2.5678, "step": 219520 }, { "epoch": 0.43735257554507206, "grad_norm": 0.15107308328151703, "learning_rate": 0.002, "loss": 2.5467, "step": 219530 }, { "epoch": 0.43737249776871095, "grad_norm": 0.1760699599981308, "learning_rate": 0.002, "loss": 2.5442, "step": 219540 }, { "epoch": 0.43739241999234985, "grad_norm": 0.20089641213417053, "learning_rate": 0.002, "loss": 2.5551, "step": 219550 }, { "epoch": 0.4374123422159888, "grad_norm": 0.20146815478801727, "learning_rate": 0.002, "loss": 2.5462, "step": 219560 }, { "epoch": 0.4374322644396277, "grad_norm": 0.22219614684581757, "learning_rate": 0.002, "loss": 2.5518, "step": 219570 }, { "epoch": 0.43745218666326663, "grad_norm": 0.16389095783233643, "learning_rate": 0.002, "loss": 2.5676, "step": 219580 }, { "epoch": 0.4374721088869055, "grad_norm": 0.1827024519443512, "learning_rate": 0.002, "loss": 2.5566, "step": 219590 }, { "epoch": 0.4374920311105444, "grad_norm": 0.22353264689445496, "learning_rate": 0.002, "loss": 2.5569, "step": 219600 }, { "epoch": 0.43751195333418336, "grad_norm": 0.15214312076568604, "learning_rate": 0.002, "loss": 2.5615, "step": 219610 }, { "epoch": 0.43753187555782225, "grad_norm": 0.2116755098104477, "learning_rate": 0.002, "loss": 2.5705, "step": 219620 }, { "epoch": 0.4375517977814612, "grad_norm": 0.17732684314250946, "learning_rate": 0.002, "loss": 2.566, "step": 219630 }, { "epoch": 0.4375717200051001, "grad_norm": 0.15249209105968475, "learning_rate": 0.002, "loss": 2.5632, "step": 219640 }, { "epoch": 0.43759164222873903, "grad_norm": 0.1943763643503189, "learning_rate": 0.002, "loss": 2.5631, "step": 219650 }, { "epoch": 0.4376115644523779, "grad_norm": 0.1839413344860077, "learning_rate": 0.002, "loss": 2.5604, "step": 219660 }, { "epoch": 0.4376314866760168, "grad_norm": 0.166574627161026, "learning_rate": 0.002, "loss": 2.554, "step": 219670 }, { "epoch": 0.43765140889965576, "grad_norm": 0.1846134066581726, "learning_rate": 0.002, "loss": 2.5704, "step": 219680 }, { "epoch": 0.43767133112329465, "grad_norm": 0.15235474705696106, "learning_rate": 0.002, "loss": 2.5544, "step": 219690 }, { "epoch": 0.4376912533469336, "grad_norm": 0.1685243546962738, "learning_rate": 0.002, "loss": 2.558, "step": 219700 }, { "epoch": 0.4377111755705725, "grad_norm": 0.16196219623088837, "learning_rate": 0.002, "loss": 2.5691, "step": 219710 }, { "epoch": 0.4377310977942114, "grad_norm": 0.21233691275119781, "learning_rate": 0.002, "loss": 2.5711, "step": 219720 }, { "epoch": 0.4377510200178503, "grad_norm": 0.17875318229198456, "learning_rate": 0.002, "loss": 2.5543, "step": 219730 }, { "epoch": 0.4377709422414892, "grad_norm": 0.18756107985973358, "learning_rate": 0.002, "loss": 2.5393, "step": 219740 }, { "epoch": 0.43779086446512816, "grad_norm": 0.18039482831954956, "learning_rate": 0.002, "loss": 2.5544, "step": 219750 }, { "epoch": 0.43781078668876705, "grad_norm": 0.1570565402507782, "learning_rate": 0.002, "loss": 2.565, "step": 219760 }, { "epoch": 0.43783070891240594, "grad_norm": 0.1713789850473404, "learning_rate": 0.002, "loss": 2.5438, "step": 219770 }, { "epoch": 0.4378506311360449, "grad_norm": 0.18257704377174377, "learning_rate": 0.002, "loss": 2.566, "step": 219780 }, { "epoch": 0.4378705533596838, "grad_norm": 0.15634450316429138, "learning_rate": 0.002, "loss": 2.5526, "step": 219790 }, { "epoch": 0.4378904755833227, "grad_norm": 0.17941023409366608, "learning_rate": 0.002, "loss": 2.5718, "step": 219800 }, { "epoch": 0.4379103978069616, "grad_norm": 0.1714896857738495, "learning_rate": 0.002, "loss": 2.5725, "step": 219810 }, { "epoch": 0.43793032003060056, "grad_norm": 0.18188849091529846, "learning_rate": 0.002, "loss": 2.553, "step": 219820 }, { "epoch": 0.43795024225423945, "grad_norm": 0.16850247979164124, "learning_rate": 0.002, "loss": 2.56, "step": 219830 }, { "epoch": 0.43797016447787834, "grad_norm": 0.15437422692775726, "learning_rate": 0.002, "loss": 2.5468, "step": 219840 }, { "epoch": 0.4379900867015173, "grad_norm": 0.22928699851036072, "learning_rate": 0.002, "loss": 2.5565, "step": 219850 }, { "epoch": 0.4380100089251562, "grad_norm": 0.1483020931482315, "learning_rate": 0.002, "loss": 2.5742, "step": 219860 }, { "epoch": 0.4380299311487951, "grad_norm": 0.16279050707817078, "learning_rate": 0.002, "loss": 2.5501, "step": 219870 }, { "epoch": 0.438049853372434, "grad_norm": 0.16218611598014832, "learning_rate": 0.002, "loss": 2.5477, "step": 219880 }, { "epoch": 0.4380697755960729, "grad_norm": 0.17965063452720642, "learning_rate": 0.002, "loss": 2.5655, "step": 219890 }, { "epoch": 0.43808969781971185, "grad_norm": 0.17334584891796112, "learning_rate": 0.002, "loss": 2.5735, "step": 219900 }, { "epoch": 0.43810962004335074, "grad_norm": 0.1409982591867447, "learning_rate": 0.002, "loss": 2.5462, "step": 219910 }, { "epoch": 0.4381295422669897, "grad_norm": 0.22283686697483063, "learning_rate": 0.002, "loss": 2.5569, "step": 219920 }, { "epoch": 0.4381494644906286, "grad_norm": 0.17344620823860168, "learning_rate": 0.002, "loss": 2.5599, "step": 219930 }, { "epoch": 0.4381693867142675, "grad_norm": 0.1784568876028061, "learning_rate": 0.002, "loss": 2.5595, "step": 219940 }, { "epoch": 0.4381893089379064, "grad_norm": 0.19996868073940277, "learning_rate": 0.002, "loss": 2.5608, "step": 219950 }, { "epoch": 0.4382092311615453, "grad_norm": 0.1842781901359558, "learning_rate": 0.002, "loss": 2.5644, "step": 219960 }, { "epoch": 0.43822915338518426, "grad_norm": 0.13568834960460663, "learning_rate": 0.002, "loss": 2.5468, "step": 219970 }, { "epoch": 0.43824907560882315, "grad_norm": 0.15425622463226318, "learning_rate": 0.002, "loss": 2.5663, "step": 219980 }, { "epoch": 0.4382689978324621, "grad_norm": 0.20429739356040955, "learning_rate": 0.002, "loss": 2.5705, "step": 219990 }, { "epoch": 0.438288920056101, "grad_norm": 0.17645974457263947, "learning_rate": 0.002, "loss": 2.5507, "step": 220000 }, { "epoch": 0.4383088422797399, "grad_norm": 0.18206563591957092, "learning_rate": 0.002, "loss": 2.5772, "step": 220010 }, { "epoch": 0.4383287645033788, "grad_norm": 0.14882293343544006, "learning_rate": 0.002, "loss": 2.5563, "step": 220020 }, { "epoch": 0.4383486867270177, "grad_norm": 0.19226709008216858, "learning_rate": 0.002, "loss": 2.5579, "step": 220030 }, { "epoch": 0.43836860895065666, "grad_norm": 0.19653885066509247, "learning_rate": 0.002, "loss": 2.562, "step": 220040 }, { "epoch": 0.43838853117429555, "grad_norm": 0.16322331130504608, "learning_rate": 0.002, "loss": 2.5621, "step": 220050 }, { "epoch": 0.43840845339793444, "grad_norm": 0.15859700739383698, "learning_rate": 0.002, "loss": 2.5479, "step": 220060 }, { "epoch": 0.4384283756215734, "grad_norm": 0.200092151761055, "learning_rate": 0.002, "loss": 2.5611, "step": 220070 }, { "epoch": 0.4384482978452123, "grad_norm": 0.16334855556488037, "learning_rate": 0.002, "loss": 2.5519, "step": 220080 }, { "epoch": 0.4384682200688512, "grad_norm": 0.16924728453159332, "learning_rate": 0.002, "loss": 2.5617, "step": 220090 }, { "epoch": 0.4384881422924901, "grad_norm": 0.1605769544839859, "learning_rate": 0.002, "loss": 2.5519, "step": 220100 }, { "epoch": 0.43850806451612906, "grad_norm": 0.1526762694120407, "learning_rate": 0.002, "loss": 2.5579, "step": 220110 }, { "epoch": 0.43852798673976795, "grad_norm": 0.1773698925971985, "learning_rate": 0.002, "loss": 2.553, "step": 220120 }, { "epoch": 0.43854790896340684, "grad_norm": 0.15362617373466492, "learning_rate": 0.002, "loss": 2.5525, "step": 220130 }, { "epoch": 0.4385678311870458, "grad_norm": 0.16434653103351593, "learning_rate": 0.002, "loss": 2.559, "step": 220140 }, { "epoch": 0.4385877534106847, "grad_norm": 0.16933386027812958, "learning_rate": 0.002, "loss": 2.5583, "step": 220150 }, { "epoch": 0.4386076756343236, "grad_norm": 0.17355668544769287, "learning_rate": 0.002, "loss": 2.561, "step": 220160 }, { "epoch": 0.4386275978579625, "grad_norm": 0.16991545259952545, "learning_rate": 0.002, "loss": 2.5408, "step": 220170 }, { "epoch": 0.4386475200816014, "grad_norm": 0.2128186970949173, "learning_rate": 0.002, "loss": 2.539, "step": 220180 }, { "epoch": 0.43866744230524035, "grad_norm": 0.14594976603984833, "learning_rate": 0.002, "loss": 2.562, "step": 220190 }, { "epoch": 0.43868736452887924, "grad_norm": 0.16590629518032074, "learning_rate": 0.002, "loss": 2.5526, "step": 220200 }, { "epoch": 0.4387072867525182, "grad_norm": 0.1848749965429306, "learning_rate": 0.002, "loss": 2.5623, "step": 220210 }, { "epoch": 0.4387272089761571, "grad_norm": 0.17037852108478546, "learning_rate": 0.002, "loss": 2.542, "step": 220220 }, { "epoch": 0.43874713119979597, "grad_norm": 0.15619616210460663, "learning_rate": 0.002, "loss": 2.5616, "step": 220230 }, { "epoch": 0.4387670534234349, "grad_norm": 0.1868542581796646, "learning_rate": 0.002, "loss": 2.5638, "step": 220240 }, { "epoch": 0.4387869756470738, "grad_norm": 0.17447812855243683, "learning_rate": 0.002, "loss": 2.5566, "step": 220250 }, { "epoch": 0.43880689787071275, "grad_norm": 0.1518578976392746, "learning_rate": 0.002, "loss": 2.5706, "step": 220260 }, { "epoch": 0.43882682009435164, "grad_norm": 0.1548837423324585, "learning_rate": 0.002, "loss": 2.5658, "step": 220270 }, { "epoch": 0.4388467423179906, "grad_norm": 0.20203261077404022, "learning_rate": 0.002, "loss": 2.5578, "step": 220280 }, { "epoch": 0.4388666645416295, "grad_norm": 0.1787240207195282, "learning_rate": 0.002, "loss": 2.5616, "step": 220290 }, { "epoch": 0.43888658676526837, "grad_norm": 0.16242896020412445, "learning_rate": 0.002, "loss": 2.5466, "step": 220300 }, { "epoch": 0.4389065089889073, "grad_norm": 0.17503760755062103, "learning_rate": 0.002, "loss": 2.5512, "step": 220310 }, { "epoch": 0.4389264312125462, "grad_norm": 0.12668175995349884, "learning_rate": 0.002, "loss": 2.5543, "step": 220320 }, { "epoch": 0.43894635343618515, "grad_norm": 0.1775463968515396, "learning_rate": 0.002, "loss": 2.5609, "step": 220330 }, { "epoch": 0.43896627565982405, "grad_norm": 0.15020601451396942, "learning_rate": 0.002, "loss": 2.5571, "step": 220340 }, { "epoch": 0.43898619788346294, "grad_norm": 0.17056918144226074, "learning_rate": 0.002, "loss": 2.5449, "step": 220350 }, { "epoch": 0.4390061201071019, "grad_norm": 0.1737743318080902, "learning_rate": 0.002, "loss": 2.5551, "step": 220360 }, { "epoch": 0.4390260423307408, "grad_norm": 0.13467107713222504, "learning_rate": 0.002, "loss": 2.556, "step": 220370 }, { "epoch": 0.4390459645543797, "grad_norm": 0.1448243409395218, "learning_rate": 0.002, "loss": 2.5567, "step": 220380 }, { "epoch": 0.4390658867780186, "grad_norm": 0.1634880155324936, "learning_rate": 0.002, "loss": 2.5697, "step": 220390 }, { "epoch": 0.43908580900165756, "grad_norm": 0.17092977464199066, "learning_rate": 0.002, "loss": 2.5512, "step": 220400 }, { "epoch": 0.43910573122529645, "grad_norm": 0.1875012367963791, "learning_rate": 0.002, "loss": 2.5633, "step": 220410 }, { "epoch": 0.43912565344893534, "grad_norm": 0.17310470342636108, "learning_rate": 0.002, "loss": 2.5632, "step": 220420 }, { "epoch": 0.4391455756725743, "grad_norm": 0.17473214864730835, "learning_rate": 0.002, "loss": 2.547, "step": 220430 }, { "epoch": 0.4391654978962132, "grad_norm": 0.16467174887657166, "learning_rate": 0.002, "loss": 2.5558, "step": 220440 }, { "epoch": 0.4391854201198521, "grad_norm": 0.16188760101795197, "learning_rate": 0.002, "loss": 2.5623, "step": 220450 }, { "epoch": 0.439205342343491, "grad_norm": 0.1701732873916626, "learning_rate": 0.002, "loss": 2.558, "step": 220460 }, { "epoch": 0.4392252645671299, "grad_norm": 0.18729650974273682, "learning_rate": 0.002, "loss": 2.5583, "step": 220470 }, { "epoch": 0.43924518679076885, "grad_norm": 0.17910538613796234, "learning_rate": 0.002, "loss": 2.5491, "step": 220480 }, { "epoch": 0.43926510901440774, "grad_norm": 0.1765294224023819, "learning_rate": 0.002, "loss": 2.5616, "step": 220490 }, { "epoch": 0.4392850312380467, "grad_norm": 0.21454572677612305, "learning_rate": 0.002, "loss": 2.5619, "step": 220500 }, { "epoch": 0.4393049534616856, "grad_norm": 0.15016494691371918, "learning_rate": 0.002, "loss": 2.5591, "step": 220510 }, { "epoch": 0.43932487568532447, "grad_norm": 0.16724394261837006, "learning_rate": 0.002, "loss": 2.5582, "step": 220520 }, { "epoch": 0.4393447979089634, "grad_norm": 0.1643584817647934, "learning_rate": 0.002, "loss": 2.5644, "step": 220530 }, { "epoch": 0.4393647201326023, "grad_norm": 0.1779773086309433, "learning_rate": 0.002, "loss": 2.556, "step": 220540 }, { "epoch": 0.43938464235624125, "grad_norm": 0.15983456373214722, "learning_rate": 0.002, "loss": 2.5582, "step": 220550 }, { "epoch": 0.43940456457988014, "grad_norm": 0.14421086013317108, "learning_rate": 0.002, "loss": 2.5619, "step": 220560 }, { "epoch": 0.4394244868035191, "grad_norm": 0.15970568358898163, "learning_rate": 0.002, "loss": 2.568, "step": 220570 }, { "epoch": 0.439444409027158, "grad_norm": 0.15747664868831635, "learning_rate": 0.002, "loss": 2.5609, "step": 220580 }, { "epoch": 0.43946433125079687, "grad_norm": 0.2172389179468155, "learning_rate": 0.002, "loss": 2.5658, "step": 220590 }, { "epoch": 0.4394842534744358, "grad_norm": 0.16230858862400055, "learning_rate": 0.002, "loss": 2.5561, "step": 220600 }, { "epoch": 0.4395041756980747, "grad_norm": 0.1515478789806366, "learning_rate": 0.002, "loss": 2.5634, "step": 220610 }, { "epoch": 0.43952409792171365, "grad_norm": 0.20926155149936676, "learning_rate": 0.002, "loss": 2.5767, "step": 220620 }, { "epoch": 0.43954402014535254, "grad_norm": 0.1607515811920166, "learning_rate": 0.002, "loss": 2.5664, "step": 220630 }, { "epoch": 0.43956394236899143, "grad_norm": 0.15586863458156586, "learning_rate": 0.002, "loss": 2.5574, "step": 220640 }, { "epoch": 0.4395838645926304, "grad_norm": 0.19154828786849976, "learning_rate": 0.002, "loss": 2.5634, "step": 220650 }, { "epoch": 0.43960378681626927, "grad_norm": 0.17642687261104584, "learning_rate": 0.002, "loss": 2.5476, "step": 220660 }, { "epoch": 0.4396237090399082, "grad_norm": 0.14132072031497955, "learning_rate": 0.002, "loss": 2.5374, "step": 220670 }, { "epoch": 0.4396436312635471, "grad_norm": 0.18257439136505127, "learning_rate": 0.002, "loss": 2.5556, "step": 220680 }, { "epoch": 0.439663553487186, "grad_norm": 0.6540743112564087, "learning_rate": 0.002, "loss": 2.5619, "step": 220690 }, { "epoch": 0.43968347571082494, "grad_norm": 0.21208488941192627, "learning_rate": 0.002, "loss": 2.5525, "step": 220700 }, { "epoch": 0.43970339793446384, "grad_norm": 0.1516285091638565, "learning_rate": 0.002, "loss": 2.5459, "step": 220710 }, { "epoch": 0.4397233201581028, "grad_norm": 0.14498552680015564, "learning_rate": 0.002, "loss": 2.5522, "step": 220720 }, { "epoch": 0.4397432423817417, "grad_norm": 0.1821017563343048, "learning_rate": 0.002, "loss": 2.5471, "step": 220730 }, { "epoch": 0.4397631646053806, "grad_norm": 0.1770014762878418, "learning_rate": 0.002, "loss": 2.5666, "step": 220740 }, { "epoch": 0.4397830868290195, "grad_norm": 0.20397630333900452, "learning_rate": 0.002, "loss": 2.5452, "step": 220750 }, { "epoch": 0.4398030090526584, "grad_norm": 0.19995790719985962, "learning_rate": 0.002, "loss": 2.5698, "step": 220760 }, { "epoch": 0.43982293127629735, "grad_norm": 0.15327075123786926, "learning_rate": 0.002, "loss": 2.577, "step": 220770 }, { "epoch": 0.43984285349993624, "grad_norm": 0.1507043093442917, "learning_rate": 0.002, "loss": 2.5404, "step": 220780 }, { "epoch": 0.4398627757235752, "grad_norm": 0.16258792579174042, "learning_rate": 0.002, "loss": 2.5542, "step": 220790 }, { "epoch": 0.4398826979472141, "grad_norm": 0.20084530115127563, "learning_rate": 0.002, "loss": 2.5467, "step": 220800 }, { "epoch": 0.43990262017085296, "grad_norm": 0.1896854192018509, "learning_rate": 0.002, "loss": 2.5614, "step": 220810 }, { "epoch": 0.4399225423944919, "grad_norm": 0.1554131656885147, "learning_rate": 0.002, "loss": 2.5573, "step": 220820 }, { "epoch": 0.4399424646181308, "grad_norm": 0.15705697238445282, "learning_rate": 0.002, "loss": 2.5675, "step": 220830 }, { "epoch": 0.43996238684176975, "grad_norm": 0.19629481434822083, "learning_rate": 0.002, "loss": 2.5647, "step": 220840 }, { "epoch": 0.43998230906540864, "grad_norm": 0.16367968916893005, "learning_rate": 0.002, "loss": 2.5592, "step": 220850 }, { "epoch": 0.4400022312890476, "grad_norm": 0.14885884523391724, "learning_rate": 0.002, "loss": 2.5517, "step": 220860 }, { "epoch": 0.4400221535126865, "grad_norm": 0.18416593968868256, "learning_rate": 0.002, "loss": 2.5612, "step": 220870 }, { "epoch": 0.44004207573632537, "grad_norm": 0.16955232620239258, "learning_rate": 0.002, "loss": 2.5688, "step": 220880 }, { "epoch": 0.4400619979599643, "grad_norm": 0.1966833472251892, "learning_rate": 0.002, "loss": 2.5522, "step": 220890 }, { "epoch": 0.4400819201836032, "grad_norm": 0.16956739127635956, "learning_rate": 0.002, "loss": 2.5687, "step": 220900 }, { "epoch": 0.44010184240724215, "grad_norm": 0.15165306627750397, "learning_rate": 0.002, "loss": 2.5463, "step": 220910 }, { "epoch": 0.44012176463088104, "grad_norm": 0.20360741019248962, "learning_rate": 0.002, "loss": 2.5602, "step": 220920 }, { "epoch": 0.44014168685451993, "grad_norm": 0.16829079389572144, "learning_rate": 0.002, "loss": 2.5558, "step": 220930 }, { "epoch": 0.4401616090781589, "grad_norm": 0.1423140913248062, "learning_rate": 0.002, "loss": 2.5654, "step": 220940 }, { "epoch": 0.44018153130179777, "grad_norm": 0.16184499859809875, "learning_rate": 0.002, "loss": 2.5547, "step": 220950 }, { "epoch": 0.4402014535254367, "grad_norm": 0.19299256801605225, "learning_rate": 0.002, "loss": 2.5637, "step": 220960 }, { "epoch": 0.4402213757490756, "grad_norm": 0.18642832338809967, "learning_rate": 0.002, "loss": 2.551, "step": 220970 }, { "epoch": 0.4402412979727145, "grad_norm": 0.15816594660282135, "learning_rate": 0.002, "loss": 2.5581, "step": 220980 }, { "epoch": 0.44026122019635344, "grad_norm": 0.16073715686798096, "learning_rate": 0.002, "loss": 2.5588, "step": 220990 }, { "epoch": 0.44028114241999233, "grad_norm": 0.1745995432138443, "learning_rate": 0.002, "loss": 2.5517, "step": 221000 }, { "epoch": 0.4403010646436313, "grad_norm": 0.16522547602653503, "learning_rate": 0.002, "loss": 2.5661, "step": 221010 }, { "epoch": 0.44032098686727017, "grad_norm": 0.17043238878250122, "learning_rate": 0.002, "loss": 2.5375, "step": 221020 }, { "epoch": 0.4403409090909091, "grad_norm": 0.1640891134738922, "learning_rate": 0.002, "loss": 2.5589, "step": 221030 }, { "epoch": 0.440360831314548, "grad_norm": 0.1548963040113449, "learning_rate": 0.002, "loss": 2.5495, "step": 221040 }, { "epoch": 0.4403807535381869, "grad_norm": 0.17323201894760132, "learning_rate": 0.002, "loss": 2.5442, "step": 221050 }, { "epoch": 0.44040067576182584, "grad_norm": 0.15140467882156372, "learning_rate": 0.002, "loss": 2.5594, "step": 221060 }, { "epoch": 0.44042059798546473, "grad_norm": 0.16536562144756317, "learning_rate": 0.002, "loss": 2.5675, "step": 221070 }, { "epoch": 0.4404405202091037, "grad_norm": 0.16250179708003998, "learning_rate": 0.002, "loss": 2.5716, "step": 221080 }, { "epoch": 0.44046044243274257, "grad_norm": 0.1801517754793167, "learning_rate": 0.002, "loss": 2.5554, "step": 221090 }, { "epoch": 0.44048036465638146, "grad_norm": 0.1556086391210556, "learning_rate": 0.002, "loss": 2.5577, "step": 221100 }, { "epoch": 0.4405002868800204, "grad_norm": 0.17836397886276245, "learning_rate": 0.002, "loss": 2.5636, "step": 221110 }, { "epoch": 0.4405202091036593, "grad_norm": 0.1703149527311325, "learning_rate": 0.002, "loss": 2.5572, "step": 221120 }, { "epoch": 0.44054013132729825, "grad_norm": 0.27379634976387024, "learning_rate": 0.002, "loss": 2.5639, "step": 221130 }, { "epoch": 0.44056005355093714, "grad_norm": 0.16065281629562378, "learning_rate": 0.002, "loss": 2.5621, "step": 221140 }, { "epoch": 0.4405799757745761, "grad_norm": 0.20767483115196228, "learning_rate": 0.002, "loss": 2.5643, "step": 221150 }, { "epoch": 0.440599897998215, "grad_norm": 0.18593700230121613, "learning_rate": 0.002, "loss": 2.5621, "step": 221160 }, { "epoch": 0.44061982022185386, "grad_norm": 0.1650223731994629, "learning_rate": 0.002, "loss": 2.5713, "step": 221170 }, { "epoch": 0.4406397424454928, "grad_norm": 0.15263669192790985, "learning_rate": 0.002, "loss": 2.5627, "step": 221180 }, { "epoch": 0.4406596646691317, "grad_norm": 0.18655335903167725, "learning_rate": 0.002, "loss": 2.5641, "step": 221190 }, { "epoch": 0.44067958689277065, "grad_norm": 0.2008950561285019, "learning_rate": 0.002, "loss": 2.5613, "step": 221200 }, { "epoch": 0.44069950911640954, "grad_norm": 0.15253065526485443, "learning_rate": 0.002, "loss": 2.5656, "step": 221210 }, { "epoch": 0.44071943134004843, "grad_norm": 0.1467672884464264, "learning_rate": 0.002, "loss": 2.5644, "step": 221220 }, { "epoch": 0.4407393535636874, "grad_norm": 0.2214185744524002, "learning_rate": 0.002, "loss": 2.5629, "step": 221230 }, { "epoch": 0.44075927578732627, "grad_norm": 0.15649950504302979, "learning_rate": 0.002, "loss": 2.5705, "step": 221240 }, { "epoch": 0.4407791980109652, "grad_norm": 0.15891185402870178, "learning_rate": 0.002, "loss": 2.5633, "step": 221250 }, { "epoch": 0.4407991202346041, "grad_norm": 0.19716331362724304, "learning_rate": 0.002, "loss": 2.556, "step": 221260 }, { "epoch": 0.440819042458243, "grad_norm": 0.1893739104270935, "learning_rate": 0.002, "loss": 2.5567, "step": 221270 }, { "epoch": 0.44083896468188194, "grad_norm": 0.190421923995018, "learning_rate": 0.002, "loss": 2.5501, "step": 221280 }, { "epoch": 0.44085888690552083, "grad_norm": 0.19400052726268768, "learning_rate": 0.002, "loss": 2.5541, "step": 221290 }, { "epoch": 0.4408788091291598, "grad_norm": 0.15531082451343536, "learning_rate": 0.002, "loss": 2.5631, "step": 221300 }, { "epoch": 0.44089873135279867, "grad_norm": 0.16540680825710297, "learning_rate": 0.002, "loss": 2.5628, "step": 221310 }, { "epoch": 0.4409186535764376, "grad_norm": 0.17970718443393707, "learning_rate": 0.002, "loss": 2.5569, "step": 221320 }, { "epoch": 0.4409385758000765, "grad_norm": 0.22476693987846375, "learning_rate": 0.002, "loss": 2.5546, "step": 221330 }, { "epoch": 0.4409584980237154, "grad_norm": 0.1648181974887848, "learning_rate": 0.002, "loss": 2.5608, "step": 221340 }, { "epoch": 0.44097842024735434, "grad_norm": 0.1722707748413086, "learning_rate": 0.002, "loss": 2.5697, "step": 221350 }, { "epoch": 0.44099834247099323, "grad_norm": 0.20100823044776917, "learning_rate": 0.002, "loss": 2.5444, "step": 221360 }, { "epoch": 0.4410182646946322, "grad_norm": 0.19094055891036987, "learning_rate": 0.002, "loss": 2.5645, "step": 221370 }, { "epoch": 0.44103818691827107, "grad_norm": 0.16811688244342804, "learning_rate": 0.002, "loss": 2.5663, "step": 221380 }, { "epoch": 0.44105810914190996, "grad_norm": 0.15295147895812988, "learning_rate": 0.002, "loss": 2.5598, "step": 221390 }, { "epoch": 0.4410780313655489, "grad_norm": 0.1628420203924179, "learning_rate": 0.002, "loss": 2.5501, "step": 221400 }, { "epoch": 0.4410979535891878, "grad_norm": 0.19118008017539978, "learning_rate": 0.002, "loss": 2.5559, "step": 221410 }, { "epoch": 0.44111787581282674, "grad_norm": 0.15660591423511505, "learning_rate": 0.002, "loss": 2.5658, "step": 221420 }, { "epoch": 0.44113779803646563, "grad_norm": 0.19797012209892273, "learning_rate": 0.002, "loss": 2.556, "step": 221430 }, { "epoch": 0.4411577202601045, "grad_norm": 0.18064133822917938, "learning_rate": 0.002, "loss": 2.5509, "step": 221440 }, { "epoch": 0.44117764248374347, "grad_norm": 0.16420842707157135, "learning_rate": 0.002, "loss": 2.5552, "step": 221450 }, { "epoch": 0.44119756470738236, "grad_norm": 0.15879465639591217, "learning_rate": 0.002, "loss": 2.5604, "step": 221460 }, { "epoch": 0.4412174869310213, "grad_norm": 0.16250333189964294, "learning_rate": 0.002, "loss": 2.5519, "step": 221470 }, { "epoch": 0.4412374091546602, "grad_norm": 0.14798514544963837, "learning_rate": 0.002, "loss": 2.5427, "step": 221480 }, { "epoch": 0.44125733137829914, "grad_norm": 0.18611584603786469, "learning_rate": 0.002, "loss": 2.5562, "step": 221490 }, { "epoch": 0.44127725360193804, "grad_norm": 0.1793026179075241, "learning_rate": 0.002, "loss": 2.5495, "step": 221500 }, { "epoch": 0.4412971758255769, "grad_norm": 0.18399323523044586, "learning_rate": 0.002, "loss": 2.5662, "step": 221510 }, { "epoch": 0.4413170980492159, "grad_norm": 0.19250892102718353, "learning_rate": 0.002, "loss": 2.5539, "step": 221520 }, { "epoch": 0.44133702027285476, "grad_norm": 0.1832304298877716, "learning_rate": 0.002, "loss": 2.5581, "step": 221530 }, { "epoch": 0.4413569424964937, "grad_norm": 0.16425135731697083, "learning_rate": 0.002, "loss": 2.554, "step": 221540 }, { "epoch": 0.4413768647201326, "grad_norm": 0.1585485190153122, "learning_rate": 0.002, "loss": 2.5473, "step": 221550 }, { "epoch": 0.4413967869437715, "grad_norm": 0.20853006839752197, "learning_rate": 0.002, "loss": 2.5469, "step": 221560 }, { "epoch": 0.44141670916741044, "grad_norm": 0.1562817245721817, "learning_rate": 0.002, "loss": 2.5561, "step": 221570 }, { "epoch": 0.44143663139104933, "grad_norm": 0.16224628686904907, "learning_rate": 0.002, "loss": 2.5678, "step": 221580 }, { "epoch": 0.4414565536146883, "grad_norm": 0.14190205931663513, "learning_rate": 0.002, "loss": 2.5641, "step": 221590 }, { "epoch": 0.44147647583832716, "grad_norm": 0.160809725522995, "learning_rate": 0.002, "loss": 2.5392, "step": 221600 }, { "epoch": 0.4414963980619661, "grad_norm": 0.15957427024841309, "learning_rate": 0.002, "loss": 2.5581, "step": 221610 }, { "epoch": 0.441516320285605, "grad_norm": 0.19023802876472473, "learning_rate": 0.002, "loss": 2.5623, "step": 221620 }, { "epoch": 0.4415362425092439, "grad_norm": 0.15700626373291016, "learning_rate": 0.002, "loss": 2.5531, "step": 221630 }, { "epoch": 0.44155616473288284, "grad_norm": 0.1743100881576538, "learning_rate": 0.002, "loss": 2.567, "step": 221640 }, { "epoch": 0.44157608695652173, "grad_norm": 0.20314094424247742, "learning_rate": 0.002, "loss": 2.5512, "step": 221650 }, { "epoch": 0.4415960091801607, "grad_norm": 0.21631333231925964, "learning_rate": 0.002, "loss": 2.5635, "step": 221660 }, { "epoch": 0.44161593140379957, "grad_norm": 0.14183109998703003, "learning_rate": 0.002, "loss": 2.573, "step": 221670 }, { "epoch": 0.44163585362743846, "grad_norm": 0.16445960104465485, "learning_rate": 0.002, "loss": 2.5501, "step": 221680 }, { "epoch": 0.4416557758510774, "grad_norm": 0.15886469185352325, "learning_rate": 0.002, "loss": 2.571, "step": 221690 }, { "epoch": 0.4416756980747163, "grad_norm": 0.1691390573978424, "learning_rate": 0.002, "loss": 2.5642, "step": 221700 }, { "epoch": 0.44169562029835524, "grad_norm": 0.20827028155326843, "learning_rate": 0.002, "loss": 2.5591, "step": 221710 }, { "epoch": 0.44171554252199413, "grad_norm": 0.1538667529821396, "learning_rate": 0.002, "loss": 2.5697, "step": 221720 }, { "epoch": 0.441735464745633, "grad_norm": 0.14650383591651917, "learning_rate": 0.002, "loss": 2.5524, "step": 221730 }, { "epoch": 0.44175538696927197, "grad_norm": 0.17280803620815277, "learning_rate": 0.002, "loss": 2.5522, "step": 221740 }, { "epoch": 0.44177530919291086, "grad_norm": 0.17379403114318848, "learning_rate": 0.002, "loss": 2.5678, "step": 221750 }, { "epoch": 0.4417952314165498, "grad_norm": 0.1434297412633896, "learning_rate": 0.002, "loss": 2.5553, "step": 221760 }, { "epoch": 0.4418151536401887, "grad_norm": 0.17749248445034027, "learning_rate": 0.002, "loss": 2.5529, "step": 221770 }, { "epoch": 0.44183507586382764, "grad_norm": 0.16659130156040192, "learning_rate": 0.002, "loss": 2.5491, "step": 221780 }, { "epoch": 0.44185499808746653, "grad_norm": 0.21145567297935486, "learning_rate": 0.002, "loss": 2.5526, "step": 221790 }, { "epoch": 0.4418749203111054, "grad_norm": 0.17494335770606995, "learning_rate": 0.002, "loss": 2.556, "step": 221800 }, { "epoch": 0.44189484253474437, "grad_norm": 0.1590358465909958, "learning_rate": 0.002, "loss": 2.5602, "step": 221810 }, { "epoch": 0.44191476475838326, "grad_norm": 0.19355137646198273, "learning_rate": 0.002, "loss": 2.5626, "step": 221820 }, { "epoch": 0.4419346869820222, "grad_norm": 0.16233913600444794, "learning_rate": 0.002, "loss": 2.5743, "step": 221830 }, { "epoch": 0.4419546092056611, "grad_norm": 0.1563086360692978, "learning_rate": 0.002, "loss": 2.5432, "step": 221840 }, { "epoch": 0.4419745314293, "grad_norm": 0.17354130744934082, "learning_rate": 0.002, "loss": 2.5751, "step": 221850 }, { "epoch": 0.44199445365293893, "grad_norm": 0.16523003578186035, "learning_rate": 0.002, "loss": 2.559, "step": 221860 }, { "epoch": 0.4420143758765778, "grad_norm": 0.1417827308177948, "learning_rate": 0.002, "loss": 2.558, "step": 221870 }, { "epoch": 0.44203429810021677, "grad_norm": 0.19040334224700928, "learning_rate": 0.002, "loss": 2.5517, "step": 221880 }, { "epoch": 0.44205422032385566, "grad_norm": 0.14290623366832733, "learning_rate": 0.002, "loss": 2.5581, "step": 221890 }, { "epoch": 0.4420741425474946, "grad_norm": 0.1577254831790924, "learning_rate": 0.002, "loss": 2.5451, "step": 221900 }, { "epoch": 0.4420940647711335, "grad_norm": 0.1993543803691864, "learning_rate": 0.002, "loss": 2.5578, "step": 221910 }, { "epoch": 0.4421139869947724, "grad_norm": 0.2110176533460617, "learning_rate": 0.002, "loss": 2.5715, "step": 221920 }, { "epoch": 0.44213390921841134, "grad_norm": 0.16273930668830872, "learning_rate": 0.002, "loss": 2.5777, "step": 221930 }, { "epoch": 0.4421538314420502, "grad_norm": 0.14410750567913055, "learning_rate": 0.002, "loss": 2.5541, "step": 221940 }, { "epoch": 0.4421737536656892, "grad_norm": 0.19244907796382904, "learning_rate": 0.002, "loss": 2.5525, "step": 221950 }, { "epoch": 0.44219367588932806, "grad_norm": 0.16683344542980194, "learning_rate": 0.002, "loss": 2.562, "step": 221960 }, { "epoch": 0.44221359811296695, "grad_norm": 0.17081022262573242, "learning_rate": 0.002, "loss": 2.5634, "step": 221970 }, { "epoch": 0.4422335203366059, "grad_norm": 0.17625567317008972, "learning_rate": 0.002, "loss": 2.5497, "step": 221980 }, { "epoch": 0.4422534425602448, "grad_norm": 0.1690322756767273, "learning_rate": 0.002, "loss": 2.5586, "step": 221990 }, { "epoch": 0.44227336478388374, "grad_norm": 0.15526416897773743, "learning_rate": 0.002, "loss": 2.5617, "step": 222000 }, { "epoch": 0.44229328700752263, "grad_norm": 0.18228062987327576, "learning_rate": 0.002, "loss": 2.5571, "step": 222010 }, { "epoch": 0.4423132092311615, "grad_norm": 0.18175525963306427, "learning_rate": 0.002, "loss": 2.5603, "step": 222020 }, { "epoch": 0.44233313145480047, "grad_norm": 0.16804872453212738, "learning_rate": 0.002, "loss": 2.5577, "step": 222030 }, { "epoch": 0.44235305367843936, "grad_norm": 0.17816272377967834, "learning_rate": 0.002, "loss": 2.5691, "step": 222040 }, { "epoch": 0.4423729759020783, "grad_norm": 0.1994055211544037, "learning_rate": 0.002, "loss": 2.5556, "step": 222050 }, { "epoch": 0.4423928981257172, "grad_norm": 0.13653092086315155, "learning_rate": 0.002, "loss": 2.5603, "step": 222060 }, { "epoch": 0.44241282034935614, "grad_norm": 0.2020084261894226, "learning_rate": 0.002, "loss": 2.5422, "step": 222070 }, { "epoch": 0.44243274257299503, "grad_norm": 0.16575811803340912, "learning_rate": 0.002, "loss": 2.5469, "step": 222080 }, { "epoch": 0.4424526647966339, "grad_norm": 0.15966080129146576, "learning_rate": 0.002, "loss": 2.549, "step": 222090 }, { "epoch": 0.44247258702027287, "grad_norm": 0.20740142464637756, "learning_rate": 0.002, "loss": 2.554, "step": 222100 }, { "epoch": 0.44249250924391176, "grad_norm": 0.21008966863155365, "learning_rate": 0.002, "loss": 2.5668, "step": 222110 }, { "epoch": 0.4425124314675507, "grad_norm": 0.14675511419773102, "learning_rate": 0.002, "loss": 2.5757, "step": 222120 }, { "epoch": 0.4425323536911896, "grad_norm": 0.2046867311000824, "learning_rate": 0.002, "loss": 2.5458, "step": 222130 }, { "epoch": 0.4425522759148285, "grad_norm": 0.1522398591041565, "learning_rate": 0.002, "loss": 2.5546, "step": 222140 }, { "epoch": 0.44257219813846743, "grad_norm": 0.20014671981334686, "learning_rate": 0.002, "loss": 2.5699, "step": 222150 }, { "epoch": 0.4425921203621063, "grad_norm": 0.17146877944469452, "learning_rate": 0.002, "loss": 2.5648, "step": 222160 }, { "epoch": 0.44261204258574527, "grad_norm": 0.1479780375957489, "learning_rate": 0.002, "loss": 2.554, "step": 222170 }, { "epoch": 0.44263196480938416, "grad_norm": 0.20025116205215454, "learning_rate": 0.002, "loss": 2.5434, "step": 222180 }, { "epoch": 0.44265188703302305, "grad_norm": 0.1642744541168213, "learning_rate": 0.002, "loss": 2.5491, "step": 222190 }, { "epoch": 0.442671809256662, "grad_norm": 0.1565237045288086, "learning_rate": 0.002, "loss": 2.5483, "step": 222200 }, { "epoch": 0.4426917314803009, "grad_norm": 0.15793538093566895, "learning_rate": 0.002, "loss": 2.5576, "step": 222210 }, { "epoch": 0.44271165370393983, "grad_norm": 0.1564142256975174, "learning_rate": 0.002, "loss": 2.5635, "step": 222220 }, { "epoch": 0.4427315759275787, "grad_norm": 0.17527392506599426, "learning_rate": 0.002, "loss": 2.5646, "step": 222230 }, { "epoch": 0.44275149815121767, "grad_norm": 0.1548927128314972, "learning_rate": 0.002, "loss": 2.5488, "step": 222240 }, { "epoch": 0.44277142037485656, "grad_norm": 0.15902099013328552, "learning_rate": 0.002, "loss": 2.5516, "step": 222250 }, { "epoch": 0.44279134259849545, "grad_norm": 0.1461474597454071, "learning_rate": 0.002, "loss": 2.5585, "step": 222260 }, { "epoch": 0.4428112648221344, "grad_norm": 0.17165575921535492, "learning_rate": 0.002, "loss": 2.5473, "step": 222270 }, { "epoch": 0.4428311870457733, "grad_norm": 0.1747766137123108, "learning_rate": 0.002, "loss": 2.5684, "step": 222280 }, { "epoch": 0.44285110926941224, "grad_norm": 0.16345703601837158, "learning_rate": 0.002, "loss": 2.5815, "step": 222290 }, { "epoch": 0.4428710314930511, "grad_norm": 0.19225181639194489, "learning_rate": 0.002, "loss": 2.5403, "step": 222300 }, { "epoch": 0.44289095371669, "grad_norm": 0.1459375023841858, "learning_rate": 0.002, "loss": 2.5627, "step": 222310 }, { "epoch": 0.44291087594032896, "grad_norm": 0.18993037939071655, "learning_rate": 0.002, "loss": 2.5577, "step": 222320 }, { "epoch": 0.44293079816396785, "grad_norm": 0.17248325049877167, "learning_rate": 0.002, "loss": 2.5528, "step": 222330 }, { "epoch": 0.4429507203876068, "grad_norm": 0.18207895755767822, "learning_rate": 0.002, "loss": 2.5537, "step": 222340 }, { "epoch": 0.4429706426112457, "grad_norm": 0.18014492094516754, "learning_rate": 0.002, "loss": 2.5743, "step": 222350 }, { "epoch": 0.44299056483488464, "grad_norm": 0.17016850411891937, "learning_rate": 0.002, "loss": 2.5667, "step": 222360 }, { "epoch": 0.4430104870585235, "grad_norm": 0.19822457432746887, "learning_rate": 0.002, "loss": 2.5699, "step": 222370 }, { "epoch": 0.4430304092821624, "grad_norm": 0.16884030401706696, "learning_rate": 0.002, "loss": 2.5566, "step": 222380 }, { "epoch": 0.44305033150580136, "grad_norm": 0.19338247179985046, "learning_rate": 0.002, "loss": 2.5635, "step": 222390 }, { "epoch": 0.44307025372944026, "grad_norm": 0.15201689302921295, "learning_rate": 0.002, "loss": 2.5632, "step": 222400 }, { "epoch": 0.4430901759530792, "grad_norm": 0.16366371512413025, "learning_rate": 0.002, "loss": 2.5519, "step": 222410 }, { "epoch": 0.4431100981767181, "grad_norm": 0.16157414019107819, "learning_rate": 0.002, "loss": 2.557, "step": 222420 }, { "epoch": 0.443130020400357, "grad_norm": 0.17886176705360413, "learning_rate": 0.002, "loss": 2.5624, "step": 222430 }, { "epoch": 0.44314994262399593, "grad_norm": 0.1698872596025467, "learning_rate": 0.002, "loss": 2.5635, "step": 222440 }, { "epoch": 0.4431698648476348, "grad_norm": 0.1598903089761734, "learning_rate": 0.002, "loss": 2.5485, "step": 222450 }, { "epoch": 0.44318978707127377, "grad_norm": 0.15542817115783691, "learning_rate": 0.002, "loss": 2.5608, "step": 222460 }, { "epoch": 0.44320970929491266, "grad_norm": 0.17667311429977417, "learning_rate": 0.002, "loss": 2.5442, "step": 222470 }, { "epoch": 0.44322963151855155, "grad_norm": 0.19844555854797363, "learning_rate": 0.002, "loss": 2.5638, "step": 222480 }, { "epoch": 0.4432495537421905, "grad_norm": 0.1964719295501709, "learning_rate": 0.002, "loss": 2.5549, "step": 222490 }, { "epoch": 0.4432694759658294, "grad_norm": 0.1536717712879181, "learning_rate": 0.002, "loss": 2.5463, "step": 222500 }, { "epoch": 0.44328939818946833, "grad_norm": 0.16197802126407623, "learning_rate": 0.002, "loss": 2.5635, "step": 222510 }, { "epoch": 0.4433093204131072, "grad_norm": 0.23572786152362823, "learning_rate": 0.002, "loss": 2.5675, "step": 222520 }, { "epoch": 0.44332924263674617, "grad_norm": 0.13629984855651855, "learning_rate": 0.002, "loss": 2.5576, "step": 222530 }, { "epoch": 0.44334916486038506, "grad_norm": 0.20307472348213196, "learning_rate": 0.002, "loss": 2.557, "step": 222540 }, { "epoch": 0.44336908708402395, "grad_norm": 0.1457604020833969, "learning_rate": 0.002, "loss": 2.5463, "step": 222550 }, { "epoch": 0.4433890093076629, "grad_norm": 0.1926996409893036, "learning_rate": 0.002, "loss": 2.5582, "step": 222560 }, { "epoch": 0.4434089315313018, "grad_norm": 0.15664175152778625, "learning_rate": 0.002, "loss": 2.566, "step": 222570 }, { "epoch": 0.44342885375494073, "grad_norm": 0.14734680950641632, "learning_rate": 0.002, "loss": 2.5495, "step": 222580 }, { "epoch": 0.4434487759785796, "grad_norm": 0.18369492888450623, "learning_rate": 0.002, "loss": 2.5566, "step": 222590 }, { "epoch": 0.4434686982022185, "grad_norm": 0.15210339426994324, "learning_rate": 0.002, "loss": 2.5627, "step": 222600 }, { "epoch": 0.44348862042585746, "grad_norm": 0.16287770867347717, "learning_rate": 0.002, "loss": 2.5491, "step": 222610 }, { "epoch": 0.44350854264949635, "grad_norm": 0.15648801624774933, "learning_rate": 0.002, "loss": 2.5634, "step": 222620 }, { "epoch": 0.4435284648731353, "grad_norm": 0.1612447202205658, "learning_rate": 0.002, "loss": 2.5345, "step": 222630 }, { "epoch": 0.4435483870967742, "grad_norm": 0.18438281118869781, "learning_rate": 0.002, "loss": 2.5601, "step": 222640 }, { "epoch": 0.44356830932041313, "grad_norm": 0.18021029233932495, "learning_rate": 0.002, "loss": 2.5571, "step": 222650 }, { "epoch": 0.443588231544052, "grad_norm": 0.1529405564069748, "learning_rate": 0.002, "loss": 2.5542, "step": 222660 }, { "epoch": 0.4436081537676909, "grad_norm": 0.2034025341272354, "learning_rate": 0.002, "loss": 2.55, "step": 222670 }, { "epoch": 0.44362807599132986, "grad_norm": 0.2022470235824585, "learning_rate": 0.002, "loss": 2.5531, "step": 222680 }, { "epoch": 0.44364799821496875, "grad_norm": 0.164817675948143, "learning_rate": 0.002, "loss": 2.557, "step": 222690 }, { "epoch": 0.4436679204386077, "grad_norm": 0.1591472178697586, "learning_rate": 0.002, "loss": 2.5675, "step": 222700 }, { "epoch": 0.4436878426622466, "grad_norm": 0.2094138115644455, "learning_rate": 0.002, "loss": 2.5502, "step": 222710 }, { "epoch": 0.4437077648858855, "grad_norm": 0.17522278428077698, "learning_rate": 0.002, "loss": 2.5526, "step": 222720 }, { "epoch": 0.4437276871095244, "grad_norm": 0.16377943754196167, "learning_rate": 0.002, "loss": 2.5521, "step": 222730 }, { "epoch": 0.4437476093331633, "grad_norm": 0.18447640538215637, "learning_rate": 0.002, "loss": 2.5577, "step": 222740 }, { "epoch": 0.44376753155680226, "grad_norm": 0.16564714908599854, "learning_rate": 0.002, "loss": 2.5539, "step": 222750 }, { "epoch": 0.44378745378044115, "grad_norm": 0.15869541466236115, "learning_rate": 0.002, "loss": 2.5514, "step": 222760 }, { "epoch": 0.44380737600408005, "grad_norm": 0.17776338756084442, "learning_rate": 0.002, "loss": 2.5591, "step": 222770 }, { "epoch": 0.443827298227719, "grad_norm": 0.1592942476272583, "learning_rate": 0.002, "loss": 2.5603, "step": 222780 }, { "epoch": 0.4438472204513579, "grad_norm": 0.1602981686592102, "learning_rate": 0.002, "loss": 2.5567, "step": 222790 }, { "epoch": 0.44386714267499683, "grad_norm": 0.15274257957935333, "learning_rate": 0.002, "loss": 2.5605, "step": 222800 }, { "epoch": 0.4438870648986357, "grad_norm": 0.1491016000509262, "learning_rate": 0.002, "loss": 2.5598, "step": 222810 }, { "epoch": 0.44390698712227467, "grad_norm": 0.1606370359659195, "learning_rate": 0.002, "loss": 2.5646, "step": 222820 }, { "epoch": 0.44392690934591356, "grad_norm": 0.5176239609718323, "learning_rate": 0.002, "loss": 2.555, "step": 222830 }, { "epoch": 0.44394683156955245, "grad_norm": 0.18408037722110748, "learning_rate": 0.002, "loss": 2.5474, "step": 222840 }, { "epoch": 0.4439667537931914, "grad_norm": 0.1690720170736313, "learning_rate": 0.002, "loss": 2.5666, "step": 222850 }, { "epoch": 0.4439866760168303, "grad_norm": 0.13048656284809113, "learning_rate": 0.002, "loss": 2.5668, "step": 222860 }, { "epoch": 0.44400659824046923, "grad_norm": 0.18980947136878967, "learning_rate": 0.002, "loss": 2.5568, "step": 222870 }, { "epoch": 0.4440265204641081, "grad_norm": 0.17263858020305634, "learning_rate": 0.002, "loss": 2.5483, "step": 222880 }, { "epoch": 0.444046442687747, "grad_norm": 0.19850750267505646, "learning_rate": 0.002, "loss": 2.5754, "step": 222890 }, { "epoch": 0.44406636491138596, "grad_norm": 0.15720373392105103, "learning_rate": 0.002, "loss": 2.5666, "step": 222900 }, { "epoch": 0.44408628713502485, "grad_norm": 0.17117805778980255, "learning_rate": 0.002, "loss": 2.557, "step": 222910 }, { "epoch": 0.4441062093586638, "grad_norm": 0.16690783202648163, "learning_rate": 0.002, "loss": 2.5561, "step": 222920 }, { "epoch": 0.4441261315823027, "grad_norm": 0.14930863678455353, "learning_rate": 0.002, "loss": 2.5488, "step": 222930 }, { "epoch": 0.4441460538059416, "grad_norm": 0.1543225646018982, "learning_rate": 0.002, "loss": 2.5478, "step": 222940 }, { "epoch": 0.4441659760295805, "grad_norm": 0.16447612643241882, "learning_rate": 0.002, "loss": 2.5599, "step": 222950 }, { "epoch": 0.4441858982532194, "grad_norm": 0.19028832018375397, "learning_rate": 0.002, "loss": 2.556, "step": 222960 }, { "epoch": 0.44420582047685836, "grad_norm": 0.20002177357673645, "learning_rate": 0.002, "loss": 2.5598, "step": 222970 }, { "epoch": 0.44422574270049725, "grad_norm": 0.15555548667907715, "learning_rate": 0.002, "loss": 2.548, "step": 222980 }, { "epoch": 0.4442456649241362, "grad_norm": 0.19980186223983765, "learning_rate": 0.002, "loss": 2.5563, "step": 222990 }, { "epoch": 0.4442655871477751, "grad_norm": 0.18125854432582855, "learning_rate": 0.002, "loss": 2.5513, "step": 223000 }, { "epoch": 0.444285509371414, "grad_norm": 0.1536203920841217, "learning_rate": 0.002, "loss": 2.5725, "step": 223010 }, { "epoch": 0.4443054315950529, "grad_norm": 0.16406114399433136, "learning_rate": 0.002, "loss": 2.5576, "step": 223020 }, { "epoch": 0.4443253538186918, "grad_norm": 0.18370187282562256, "learning_rate": 0.002, "loss": 2.5527, "step": 223030 }, { "epoch": 0.44434527604233076, "grad_norm": 0.15696054697036743, "learning_rate": 0.002, "loss": 2.5725, "step": 223040 }, { "epoch": 0.44436519826596965, "grad_norm": 0.17171050608158112, "learning_rate": 0.002, "loss": 2.5597, "step": 223050 }, { "epoch": 0.44438512048960854, "grad_norm": 0.15807168185710907, "learning_rate": 0.002, "loss": 2.5666, "step": 223060 }, { "epoch": 0.4444050427132475, "grad_norm": 0.18010668456554413, "learning_rate": 0.002, "loss": 2.5544, "step": 223070 }, { "epoch": 0.4444249649368864, "grad_norm": 0.1394062340259552, "learning_rate": 0.002, "loss": 2.5556, "step": 223080 }, { "epoch": 0.4444448871605253, "grad_norm": 0.1614668071269989, "learning_rate": 0.002, "loss": 2.5534, "step": 223090 }, { "epoch": 0.4444648093841642, "grad_norm": 0.15253649652004242, "learning_rate": 0.002, "loss": 2.5491, "step": 223100 }, { "epoch": 0.44448473160780316, "grad_norm": 0.16032904386520386, "learning_rate": 0.002, "loss": 2.5519, "step": 223110 }, { "epoch": 0.44450465383144205, "grad_norm": 0.18979141116142273, "learning_rate": 0.002, "loss": 2.5446, "step": 223120 }, { "epoch": 0.44452457605508094, "grad_norm": 0.1510080248117447, "learning_rate": 0.002, "loss": 2.5564, "step": 223130 }, { "epoch": 0.4445444982787199, "grad_norm": 0.15482577681541443, "learning_rate": 0.002, "loss": 2.5552, "step": 223140 }, { "epoch": 0.4445644205023588, "grad_norm": 0.21931223571300507, "learning_rate": 0.002, "loss": 2.561, "step": 223150 }, { "epoch": 0.4445843427259977, "grad_norm": 0.16321806609630585, "learning_rate": 0.002, "loss": 2.5579, "step": 223160 }, { "epoch": 0.4446042649496366, "grad_norm": 0.14449693262577057, "learning_rate": 0.002, "loss": 2.5601, "step": 223170 }, { "epoch": 0.4446241871732755, "grad_norm": 0.26607123017311096, "learning_rate": 0.002, "loss": 2.5608, "step": 223180 }, { "epoch": 0.44464410939691446, "grad_norm": 0.158076673746109, "learning_rate": 0.002, "loss": 2.5693, "step": 223190 }, { "epoch": 0.44466403162055335, "grad_norm": 0.1520589292049408, "learning_rate": 0.002, "loss": 2.5596, "step": 223200 }, { "epoch": 0.4446839538441923, "grad_norm": 0.16755741834640503, "learning_rate": 0.002, "loss": 2.5717, "step": 223210 }, { "epoch": 0.4447038760678312, "grad_norm": 0.18445631861686707, "learning_rate": 0.002, "loss": 2.5798, "step": 223220 }, { "epoch": 0.4447237982914701, "grad_norm": 0.15161800384521484, "learning_rate": 0.002, "loss": 2.5609, "step": 223230 }, { "epoch": 0.444743720515109, "grad_norm": 0.17463769018650055, "learning_rate": 0.002, "loss": 2.5568, "step": 223240 }, { "epoch": 0.4447636427387479, "grad_norm": 0.18611468374729156, "learning_rate": 0.002, "loss": 2.5645, "step": 223250 }, { "epoch": 0.44478356496238686, "grad_norm": 0.15693122148513794, "learning_rate": 0.002, "loss": 2.5526, "step": 223260 }, { "epoch": 0.44480348718602575, "grad_norm": 0.19578586518764496, "learning_rate": 0.002, "loss": 2.5689, "step": 223270 }, { "epoch": 0.4448234094096647, "grad_norm": 0.1415942758321762, "learning_rate": 0.002, "loss": 2.5543, "step": 223280 }, { "epoch": 0.4448433316333036, "grad_norm": 0.1598239690065384, "learning_rate": 0.002, "loss": 2.5761, "step": 223290 }, { "epoch": 0.4448632538569425, "grad_norm": 0.1668609231710434, "learning_rate": 0.002, "loss": 2.5476, "step": 223300 }, { "epoch": 0.4448831760805814, "grad_norm": 0.14107996225357056, "learning_rate": 0.002, "loss": 2.5427, "step": 223310 }, { "epoch": 0.4449030983042203, "grad_norm": 0.17749512195587158, "learning_rate": 0.002, "loss": 2.5674, "step": 223320 }, { "epoch": 0.44492302052785926, "grad_norm": 0.15768049657344818, "learning_rate": 0.002, "loss": 2.5734, "step": 223330 }, { "epoch": 0.44494294275149815, "grad_norm": 0.1475277692079544, "learning_rate": 0.002, "loss": 2.5512, "step": 223340 }, { "epoch": 0.44496286497513704, "grad_norm": 0.1595279425382614, "learning_rate": 0.002, "loss": 2.5746, "step": 223350 }, { "epoch": 0.444982787198776, "grad_norm": 0.18298254907131195, "learning_rate": 0.002, "loss": 2.5561, "step": 223360 }, { "epoch": 0.4450027094224149, "grad_norm": 0.1816297471523285, "learning_rate": 0.002, "loss": 2.5484, "step": 223370 }, { "epoch": 0.4450226316460538, "grad_norm": 0.1685096025466919, "learning_rate": 0.002, "loss": 2.5634, "step": 223380 }, { "epoch": 0.4450425538696927, "grad_norm": 0.1710393875837326, "learning_rate": 0.002, "loss": 2.5694, "step": 223390 }, { "epoch": 0.44506247609333166, "grad_norm": 0.20119227468967438, "learning_rate": 0.002, "loss": 2.5712, "step": 223400 }, { "epoch": 0.44508239831697055, "grad_norm": 0.1917630434036255, "learning_rate": 0.002, "loss": 2.5805, "step": 223410 }, { "epoch": 0.44510232054060944, "grad_norm": 0.1942734718322754, "learning_rate": 0.002, "loss": 2.5481, "step": 223420 }, { "epoch": 0.4451222427642484, "grad_norm": 0.17040032148361206, "learning_rate": 0.002, "loss": 2.5595, "step": 223430 }, { "epoch": 0.4451421649878873, "grad_norm": 0.161033496260643, "learning_rate": 0.002, "loss": 2.5419, "step": 223440 }, { "epoch": 0.4451620872115262, "grad_norm": 0.15384802222251892, "learning_rate": 0.002, "loss": 2.5578, "step": 223450 }, { "epoch": 0.4451820094351651, "grad_norm": 0.17942199110984802, "learning_rate": 0.002, "loss": 2.5645, "step": 223460 }, { "epoch": 0.445201931658804, "grad_norm": 0.1647028923034668, "learning_rate": 0.002, "loss": 2.56, "step": 223470 }, { "epoch": 0.44522185388244295, "grad_norm": 0.17758198082447052, "learning_rate": 0.002, "loss": 2.5674, "step": 223480 }, { "epoch": 0.44524177610608184, "grad_norm": 0.17370617389678955, "learning_rate": 0.002, "loss": 2.5485, "step": 223490 }, { "epoch": 0.4452616983297208, "grad_norm": 0.14168331027030945, "learning_rate": 0.002, "loss": 2.5462, "step": 223500 }, { "epoch": 0.4452816205533597, "grad_norm": 0.19596123695373535, "learning_rate": 0.002, "loss": 2.548, "step": 223510 }, { "epoch": 0.44530154277699857, "grad_norm": 0.16950541734695435, "learning_rate": 0.002, "loss": 2.5627, "step": 223520 }, { "epoch": 0.4453214650006375, "grad_norm": 0.1766560673713684, "learning_rate": 0.002, "loss": 2.5723, "step": 223530 }, { "epoch": 0.4453413872242764, "grad_norm": 0.17511634528636932, "learning_rate": 0.002, "loss": 2.5585, "step": 223540 }, { "epoch": 0.44536130944791535, "grad_norm": 0.20280300080776215, "learning_rate": 0.002, "loss": 2.5403, "step": 223550 }, { "epoch": 0.44538123167155425, "grad_norm": 0.15435095131397247, "learning_rate": 0.002, "loss": 2.5695, "step": 223560 }, { "epoch": 0.4454011538951932, "grad_norm": 0.21424366533756256, "learning_rate": 0.002, "loss": 2.5594, "step": 223570 }, { "epoch": 0.4454210761188321, "grad_norm": 0.1744852513074875, "learning_rate": 0.002, "loss": 2.5539, "step": 223580 }, { "epoch": 0.445440998342471, "grad_norm": 0.1400020569562912, "learning_rate": 0.002, "loss": 2.5664, "step": 223590 }, { "epoch": 0.4454609205661099, "grad_norm": 0.17868705093860626, "learning_rate": 0.002, "loss": 2.5638, "step": 223600 }, { "epoch": 0.4454808427897488, "grad_norm": 0.16194181144237518, "learning_rate": 0.002, "loss": 2.5588, "step": 223610 }, { "epoch": 0.44550076501338776, "grad_norm": 0.15244334936141968, "learning_rate": 0.002, "loss": 2.5485, "step": 223620 }, { "epoch": 0.44552068723702665, "grad_norm": 0.21434397995471954, "learning_rate": 0.002, "loss": 2.5572, "step": 223630 }, { "epoch": 0.44554060946066554, "grad_norm": 0.16023987531661987, "learning_rate": 0.002, "loss": 2.5578, "step": 223640 }, { "epoch": 0.4455605316843045, "grad_norm": 0.15636254847049713, "learning_rate": 0.002, "loss": 2.5633, "step": 223650 }, { "epoch": 0.4455804539079434, "grad_norm": 0.17335598170757294, "learning_rate": 0.002, "loss": 2.5746, "step": 223660 }, { "epoch": 0.4456003761315823, "grad_norm": 0.16566430032253265, "learning_rate": 0.002, "loss": 2.5509, "step": 223670 }, { "epoch": 0.4456202983552212, "grad_norm": 0.2009851634502411, "learning_rate": 0.002, "loss": 2.5573, "step": 223680 }, { "epoch": 0.4456402205788601, "grad_norm": 0.17033012211322784, "learning_rate": 0.002, "loss": 2.5683, "step": 223690 }, { "epoch": 0.44566014280249905, "grad_norm": 0.13613931834697723, "learning_rate": 0.002, "loss": 2.5495, "step": 223700 }, { "epoch": 0.44568006502613794, "grad_norm": 0.16000628471374512, "learning_rate": 0.002, "loss": 2.554, "step": 223710 }, { "epoch": 0.4456999872497769, "grad_norm": 0.17130808532238007, "learning_rate": 0.002, "loss": 2.561, "step": 223720 }, { "epoch": 0.4457199094734158, "grad_norm": 0.18496698141098022, "learning_rate": 0.002, "loss": 2.5479, "step": 223730 }, { "epoch": 0.4457398316970547, "grad_norm": 0.1501416712999344, "learning_rate": 0.002, "loss": 2.5536, "step": 223740 }, { "epoch": 0.4457597539206936, "grad_norm": 0.1714308112859726, "learning_rate": 0.002, "loss": 2.557, "step": 223750 }, { "epoch": 0.4457796761443325, "grad_norm": 0.18169251084327698, "learning_rate": 0.002, "loss": 2.5535, "step": 223760 }, { "epoch": 0.44579959836797145, "grad_norm": 0.17380714416503906, "learning_rate": 0.002, "loss": 2.5548, "step": 223770 }, { "epoch": 0.44581952059161034, "grad_norm": 0.2049819380044937, "learning_rate": 0.002, "loss": 2.5664, "step": 223780 }, { "epoch": 0.4458394428152493, "grad_norm": 0.20538699626922607, "learning_rate": 0.002, "loss": 2.5507, "step": 223790 }, { "epoch": 0.4458593650388882, "grad_norm": 0.15169738233089447, "learning_rate": 0.002, "loss": 2.5601, "step": 223800 }, { "epoch": 0.44587928726252707, "grad_norm": 0.19690173864364624, "learning_rate": 0.002, "loss": 2.5659, "step": 223810 }, { "epoch": 0.445899209486166, "grad_norm": 0.17486576735973358, "learning_rate": 0.002, "loss": 2.5622, "step": 223820 }, { "epoch": 0.4459191317098049, "grad_norm": 0.18955275416374207, "learning_rate": 0.002, "loss": 2.5552, "step": 223830 }, { "epoch": 0.44593905393344385, "grad_norm": 0.18482773005962372, "learning_rate": 0.002, "loss": 2.5494, "step": 223840 }, { "epoch": 0.44595897615708274, "grad_norm": 0.18431515991687775, "learning_rate": 0.002, "loss": 2.5577, "step": 223850 }, { "epoch": 0.4459788983807217, "grad_norm": 0.2333676964044571, "learning_rate": 0.002, "loss": 2.5511, "step": 223860 }, { "epoch": 0.4459988206043606, "grad_norm": 0.1710727959871292, "learning_rate": 0.002, "loss": 2.5676, "step": 223870 }, { "epoch": 0.44601874282799947, "grad_norm": 0.1933424174785614, "learning_rate": 0.002, "loss": 2.5529, "step": 223880 }, { "epoch": 0.4460386650516384, "grad_norm": 0.16840760409832, "learning_rate": 0.002, "loss": 2.5837, "step": 223890 }, { "epoch": 0.4460585872752773, "grad_norm": 0.17028731107711792, "learning_rate": 0.002, "loss": 2.5553, "step": 223900 }, { "epoch": 0.44607850949891625, "grad_norm": 0.18908385932445526, "learning_rate": 0.002, "loss": 2.5667, "step": 223910 }, { "epoch": 0.44609843172255514, "grad_norm": 0.17030997574329376, "learning_rate": 0.002, "loss": 2.5487, "step": 223920 }, { "epoch": 0.44611835394619404, "grad_norm": 0.1683671921491623, "learning_rate": 0.002, "loss": 2.5646, "step": 223930 }, { "epoch": 0.446138276169833, "grad_norm": 0.1560579389333725, "learning_rate": 0.002, "loss": 2.5675, "step": 223940 }, { "epoch": 0.44615819839347187, "grad_norm": 0.19797317683696747, "learning_rate": 0.002, "loss": 2.5704, "step": 223950 }, { "epoch": 0.4461781206171108, "grad_norm": 0.15115083754062653, "learning_rate": 0.002, "loss": 2.5461, "step": 223960 }, { "epoch": 0.4461980428407497, "grad_norm": 0.22916090488433838, "learning_rate": 0.002, "loss": 2.552, "step": 223970 }, { "epoch": 0.4462179650643886, "grad_norm": 0.16465429961681366, "learning_rate": 0.002, "loss": 2.555, "step": 223980 }, { "epoch": 0.44623788728802755, "grad_norm": 0.1740158051252365, "learning_rate": 0.002, "loss": 2.5458, "step": 223990 }, { "epoch": 0.44625780951166644, "grad_norm": 0.15338890254497528, "learning_rate": 0.002, "loss": 2.5629, "step": 224000 }, { "epoch": 0.4462777317353054, "grad_norm": 0.21492141485214233, "learning_rate": 0.002, "loss": 2.563, "step": 224010 }, { "epoch": 0.4462976539589443, "grad_norm": 0.16810956597328186, "learning_rate": 0.002, "loss": 2.5445, "step": 224020 }, { "epoch": 0.4463175761825832, "grad_norm": 0.15433864295482635, "learning_rate": 0.002, "loss": 2.5595, "step": 224030 }, { "epoch": 0.4463374984062221, "grad_norm": 0.21356801688671112, "learning_rate": 0.002, "loss": 2.5554, "step": 224040 }, { "epoch": 0.446357420629861, "grad_norm": 0.15087270736694336, "learning_rate": 0.002, "loss": 2.5631, "step": 224050 }, { "epoch": 0.44637734285349995, "grad_norm": 0.21885891258716583, "learning_rate": 0.002, "loss": 2.5722, "step": 224060 }, { "epoch": 0.44639726507713884, "grad_norm": 0.15367308259010315, "learning_rate": 0.002, "loss": 2.5713, "step": 224070 }, { "epoch": 0.4464171873007778, "grad_norm": 0.1876562237739563, "learning_rate": 0.002, "loss": 2.5697, "step": 224080 }, { "epoch": 0.4464371095244167, "grad_norm": 0.16863073408603668, "learning_rate": 0.002, "loss": 2.5599, "step": 224090 }, { "epoch": 0.44645703174805557, "grad_norm": 0.1750398576259613, "learning_rate": 0.002, "loss": 2.551, "step": 224100 }, { "epoch": 0.4464769539716945, "grad_norm": 0.16206689178943634, "learning_rate": 0.002, "loss": 2.552, "step": 224110 }, { "epoch": 0.4464968761953334, "grad_norm": 0.18542811274528503, "learning_rate": 0.002, "loss": 2.5602, "step": 224120 }, { "epoch": 0.44651679841897235, "grad_norm": 0.15343457460403442, "learning_rate": 0.002, "loss": 2.5614, "step": 224130 }, { "epoch": 0.44653672064261124, "grad_norm": 0.16704334318637848, "learning_rate": 0.002, "loss": 2.5757, "step": 224140 }, { "epoch": 0.4465566428662502, "grad_norm": 0.1502779871225357, "learning_rate": 0.002, "loss": 2.5485, "step": 224150 }, { "epoch": 0.4465765650898891, "grad_norm": 0.16828875243663788, "learning_rate": 0.002, "loss": 2.5645, "step": 224160 }, { "epoch": 0.44659648731352797, "grad_norm": 0.15171080827713013, "learning_rate": 0.002, "loss": 2.5561, "step": 224170 }, { "epoch": 0.4466164095371669, "grad_norm": 0.17878487706184387, "learning_rate": 0.002, "loss": 2.5504, "step": 224180 }, { "epoch": 0.4466363317608058, "grad_norm": 0.18504619598388672, "learning_rate": 0.002, "loss": 2.5706, "step": 224190 }, { "epoch": 0.44665625398444475, "grad_norm": 0.18299177289009094, "learning_rate": 0.002, "loss": 2.5552, "step": 224200 }, { "epoch": 0.44667617620808364, "grad_norm": 0.17660151422023773, "learning_rate": 0.002, "loss": 2.5473, "step": 224210 }, { "epoch": 0.44669609843172253, "grad_norm": 0.16089917719364166, "learning_rate": 0.002, "loss": 2.5589, "step": 224220 }, { "epoch": 0.4467160206553615, "grad_norm": 0.16990713775157928, "learning_rate": 0.002, "loss": 2.559, "step": 224230 }, { "epoch": 0.44673594287900037, "grad_norm": 0.13908466696739197, "learning_rate": 0.002, "loss": 2.5584, "step": 224240 }, { "epoch": 0.4467558651026393, "grad_norm": 0.16334746778011322, "learning_rate": 0.002, "loss": 2.5596, "step": 224250 }, { "epoch": 0.4467757873262782, "grad_norm": 0.1503741443157196, "learning_rate": 0.002, "loss": 2.5375, "step": 224260 }, { "epoch": 0.4467957095499171, "grad_norm": 0.17818120121955872, "learning_rate": 0.002, "loss": 2.5568, "step": 224270 }, { "epoch": 0.44681563177355604, "grad_norm": 0.17217464745044708, "learning_rate": 0.002, "loss": 2.5681, "step": 224280 }, { "epoch": 0.44683555399719493, "grad_norm": 0.16389313340187073, "learning_rate": 0.002, "loss": 2.5714, "step": 224290 }, { "epoch": 0.4468554762208339, "grad_norm": 0.16250911355018616, "learning_rate": 0.002, "loss": 2.5602, "step": 224300 }, { "epoch": 0.44687539844447277, "grad_norm": 0.15885506570339203, "learning_rate": 0.002, "loss": 2.5639, "step": 224310 }, { "epoch": 0.4468953206681117, "grad_norm": 0.17702916264533997, "learning_rate": 0.002, "loss": 2.5493, "step": 224320 }, { "epoch": 0.4469152428917506, "grad_norm": 0.14971356093883514, "learning_rate": 0.002, "loss": 2.5664, "step": 224330 }, { "epoch": 0.4469351651153895, "grad_norm": 0.15485981106758118, "learning_rate": 0.002, "loss": 2.5701, "step": 224340 }, { "epoch": 0.44695508733902845, "grad_norm": 0.1733015924692154, "learning_rate": 0.002, "loss": 2.5503, "step": 224350 }, { "epoch": 0.44697500956266734, "grad_norm": 0.14467349648475647, "learning_rate": 0.002, "loss": 2.5466, "step": 224360 }, { "epoch": 0.4469949317863063, "grad_norm": 0.18767589330673218, "learning_rate": 0.002, "loss": 2.5697, "step": 224370 }, { "epoch": 0.4470148540099452, "grad_norm": 0.17012633383274078, "learning_rate": 0.002, "loss": 2.5542, "step": 224380 }, { "epoch": 0.44703477623358406, "grad_norm": 0.1595398336648941, "learning_rate": 0.002, "loss": 2.5606, "step": 224390 }, { "epoch": 0.447054698457223, "grad_norm": 0.13546833395957947, "learning_rate": 0.002, "loss": 2.5674, "step": 224400 }, { "epoch": 0.4470746206808619, "grad_norm": 0.15267007052898407, "learning_rate": 0.002, "loss": 2.5602, "step": 224410 }, { "epoch": 0.44709454290450085, "grad_norm": 0.19779998064041138, "learning_rate": 0.002, "loss": 2.5717, "step": 224420 }, { "epoch": 0.44711446512813974, "grad_norm": 0.15512561798095703, "learning_rate": 0.002, "loss": 2.5601, "step": 224430 }, { "epoch": 0.4471343873517787, "grad_norm": 0.1692197024822235, "learning_rate": 0.002, "loss": 2.5633, "step": 224440 }, { "epoch": 0.4471543095754176, "grad_norm": 0.15128308534622192, "learning_rate": 0.002, "loss": 2.5582, "step": 224450 }, { "epoch": 0.44717423179905647, "grad_norm": 0.14087681472301483, "learning_rate": 0.002, "loss": 2.5523, "step": 224460 }, { "epoch": 0.4471941540226954, "grad_norm": 0.17219670116901398, "learning_rate": 0.002, "loss": 2.5508, "step": 224470 }, { "epoch": 0.4472140762463343, "grad_norm": 0.15172450244426727, "learning_rate": 0.002, "loss": 2.5449, "step": 224480 }, { "epoch": 0.44723399846997325, "grad_norm": 0.19232092797756195, "learning_rate": 0.002, "loss": 2.5637, "step": 224490 }, { "epoch": 0.44725392069361214, "grad_norm": 0.1787535399198532, "learning_rate": 0.002, "loss": 2.5542, "step": 224500 }, { "epoch": 0.44727384291725103, "grad_norm": 0.1615271121263504, "learning_rate": 0.002, "loss": 2.5657, "step": 224510 }, { "epoch": 0.44729376514089, "grad_norm": 0.20154336094856262, "learning_rate": 0.002, "loss": 2.5614, "step": 224520 }, { "epoch": 0.44731368736452887, "grad_norm": 0.1636257767677307, "learning_rate": 0.002, "loss": 2.5439, "step": 224530 }, { "epoch": 0.4473336095881678, "grad_norm": 0.14517708122730255, "learning_rate": 0.002, "loss": 2.5461, "step": 224540 }, { "epoch": 0.4473535318118067, "grad_norm": 0.17689648270606995, "learning_rate": 0.002, "loss": 2.5742, "step": 224550 }, { "epoch": 0.4473734540354456, "grad_norm": 0.18033790588378906, "learning_rate": 0.002, "loss": 2.5476, "step": 224560 }, { "epoch": 0.44739337625908454, "grad_norm": 0.20864109694957733, "learning_rate": 0.002, "loss": 2.5632, "step": 224570 }, { "epoch": 0.44741329848272343, "grad_norm": 0.14979864656925201, "learning_rate": 0.002, "loss": 2.5544, "step": 224580 }, { "epoch": 0.4474332207063624, "grad_norm": 0.17553181946277618, "learning_rate": 0.002, "loss": 2.5434, "step": 224590 }, { "epoch": 0.44745314293000127, "grad_norm": 0.16526202857494354, "learning_rate": 0.002, "loss": 2.556, "step": 224600 }, { "epoch": 0.4474730651536402, "grad_norm": 0.15349449217319489, "learning_rate": 0.002, "loss": 2.5596, "step": 224610 }, { "epoch": 0.4474929873772791, "grad_norm": 0.19230177998542786, "learning_rate": 0.002, "loss": 2.5435, "step": 224620 }, { "epoch": 0.447512909600918, "grad_norm": 0.15976186096668243, "learning_rate": 0.002, "loss": 2.5626, "step": 224630 }, { "epoch": 0.44753283182455694, "grad_norm": 0.1739112287759781, "learning_rate": 0.002, "loss": 2.546, "step": 224640 }, { "epoch": 0.44755275404819583, "grad_norm": 0.20991379022598267, "learning_rate": 0.002, "loss": 2.5478, "step": 224650 }, { "epoch": 0.4475726762718348, "grad_norm": 0.14589931070804596, "learning_rate": 0.002, "loss": 2.563, "step": 224660 }, { "epoch": 0.44759259849547367, "grad_norm": 0.15901868045330048, "learning_rate": 0.002, "loss": 2.5644, "step": 224670 }, { "epoch": 0.44761252071911256, "grad_norm": 0.16751720011234283, "learning_rate": 0.002, "loss": 2.5461, "step": 224680 }, { "epoch": 0.4476324429427515, "grad_norm": 0.16016311943531036, "learning_rate": 0.002, "loss": 2.5631, "step": 224690 }, { "epoch": 0.4476523651663904, "grad_norm": 0.17019781470298767, "learning_rate": 0.002, "loss": 2.5612, "step": 224700 }, { "epoch": 0.44767228739002934, "grad_norm": 0.19813930988311768, "learning_rate": 0.002, "loss": 2.564, "step": 224710 }, { "epoch": 0.44769220961366823, "grad_norm": 0.1820860356092453, "learning_rate": 0.002, "loss": 2.5681, "step": 224720 }, { "epoch": 0.4477121318373071, "grad_norm": 0.13638684153556824, "learning_rate": 0.002, "loss": 2.5607, "step": 224730 }, { "epoch": 0.44773205406094607, "grad_norm": 0.20344607532024384, "learning_rate": 0.002, "loss": 2.5606, "step": 224740 }, { "epoch": 0.44775197628458496, "grad_norm": 0.15765343606472015, "learning_rate": 0.002, "loss": 2.542, "step": 224750 }, { "epoch": 0.4477718985082239, "grad_norm": 0.1491515338420868, "learning_rate": 0.002, "loss": 2.5503, "step": 224760 }, { "epoch": 0.4477918207318628, "grad_norm": 0.18005728721618652, "learning_rate": 0.002, "loss": 2.5551, "step": 224770 }, { "epoch": 0.44781174295550175, "grad_norm": 0.17545852065086365, "learning_rate": 0.002, "loss": 2.5543, "step": 224780 }, { "epoch": 0.44783166517914064, "grad_norm": 0.17210429906845093, "learning_rate": 0.002, "loss": 2.5632, "step": 224790 }, { "epoch": 0.4478515874027795, "grad_norm": 0.1647098958492279, "learning_rate": 0.002, "loss": 2.5649, "step": 224800 }, { "epoch": 0.4478715096264185, "grad_norm": 0.17105062305927277, "learning_rate": 0.002, "loss": 2.5754, "step": 224810 }, { "epoch": 0.44789143185005736, "grad_norm": 0.20328815281391144, "learning_rate": 0.002, "loss": 2.5505, "step": 224820 }, { "epoch": 0.4479113540736963, "grad_norm": 0.17073705792427063, "learning_rate": 0.002, "loss": 2.5535, "step": 224830 }, { "epoch": 0.4479312762973352, "grad_norm": 0.14360041916370392, "learning_rate": 0.002, "loss": 2.5633, "step": 224840 }, { "epoch": 0.4479511985209741, "grad_norm": 0.16160444915294647, "learning_rate": 0.002, "loss": 2.548, "step": 224850 }, { "epoch": 0.44797112074461304, "grad_norm": 0.16535936295986176, "learning_rate": 0.002, "loss": 2.553, "step": 224860 }, { "epoch": 0.44799104296825193, "grad_norm": 0.17804160714149475, "learning_rate": 0.002, "loss": 2.5536, "step": 224870 }, { "epoch": 0.4480109651918909, "grad_norm": 0.1659063845872879, "learning_rate": 0.002, "loss": 2.5664, "step": 224880 }, { "epoch": 0.44803088741552977, "grad_norm": 0.14475886523723602, "learning_rate": 0.002, "loss": 2.5667, "step": 224890 }, { "epoch": 0.4480508096391687, "grad_norm": 0.15016652643680573, "learning_rate": 0.002, "loss": 2.5545, "step": 224900 }, { "epoch": 0.4480707318628076, "grad_norm": 0.18802203238010406, "learning_rate": 0.002, "loss": 2.5713, "step": 224910 }, { "epoch": 0.4480906540864465, "grad_norm": 0.14148908853530884, "learning_rate": 0.002, "loss": 2.569, "step": 224920 }, { "epoch": 0.44811057631008544, "grad_norm": 0.14520058035850525, "learning_rate": 0.002, "loss": 2.5748, "step": 224930 }, { "epoch": 0.44813049853372433, "grad_norm": 0.1876661479473114, "learning_rate": 0.002, "loss": 2.5562, "step": 224940 }, { "epoch": 0.4481504207573633, "grad_norm": 0.1595640331506729, "learning_rate": 0.002, "loss": 2.5383, "step": 224950 }, { "epoch": 0.44817034298100217, "grad_norm": 0.18593569099903107, "learning_rate": 0.002, "loss": 2.556, "step": 224960 }, { "epoch": 0.44819026520464106, "grad_norm": 0.199042409658432, "learning_rate": 0.002, "loss": 2.5573, "step": 224970 }, { "epoch": 0.44821018742828, "grad_norm": 0.17014779150485992, "learning_rate": 0.002, "loss": 2.5533, "step": 224980 }, { "epoch": 0.4482301096519189, "grad_norm": 0.21447259187698364, "learning_rate": 0.002, "loss": 2.5674, "step": 224990 }, { "epoch": 0.44825003187555784, "grad_norm": 0.17039290070533752, "learning_rate": 0.002, "loss": 2.5538, "step": 225000 }, { "epoch": 0.44826995409919673, "grad_norm": 0.15590904653072357, "learning_rate": 0.002, "loss": 2.5563, "step": 225010 }, { "epoch": 0.4482898763228356, "grad_norm": 0.16832207143306732, "learning_rate": 0.002, "loss": 2.562, "step": 225020 }, { "epoch": 0.44830979854647457, "grad_norm": 0.17514806985855103, "learning_rate": 0.002, "loss": 2.5571, "step": 225030 }, { "epoch": 0.44832972077011346, "grad_norm": 0.1648453176021576, "learning_rate": 0.002, "loss": 2.5652, "step": 225040 }, { "epoch": 0.4483496429937524, "grad_norm": 0.17104275524616241, "learning_rate": 0.002, "loss": 2.5707, "step": 225050 }, { "epoch": 0.4483695652173913, "grad_norm": 0.17835599184036255, "learning_rate": 0.002, "loss": 2.5671, "step": 225060 }, { "epoch": 0.44838948744103024, "grad_norm": 0.19111664593219757, "learning_rate": 0.002, "loss": 2.5657, "step": 225070 }, { "epoch": 0.44840940966466913, "grad_norm": 0.21706366539001465, "learning_rate": 0.002, "loss": 2.5698, "step": 225080 }, { "epoch": 0.448429331888308, "grad_norm": 0.17079810798168182, "learning_rate": 0.002, "loss": 2.5579, "step": 225090 }, { "epoch": 0.44844925411194697, "grad_norm": 0.17732113599777222, "learning_rate": 0.002, "loss": 2.5707, "step": 225100 }, { "epoch": 0.44846917633558586, "grad_norm": 0.19162698090076447, "learning_rate": 0.002, "loss": 2.5507, "step": 225110 }, { "epoch": 0.4484890985592248, "grad_norm": 0.16933932900428772, "learning_rate": 0.002, "loss": 2.5674, "step": 225120 }, { "epoch": 0.4485090207828637, "grad_norm": 0.21403883397579193, "learning_rate": 0.002, "loss": 2.5623, "step": 225130 }, { "epoch": 0.4485289430065026, "grad_norm": 0.20699475705623627, "learning_rate": 0.002, "loss": 2.5611, "step": 225140 }, { "epoch": 0.44854886523014154, "grad_norm": 0.14596302807331085, "learning_rate": 0.002, "loss": 2.5605, "step": 225150 }, { "epoch": 0.4485687874537804, "grad_norm": 0.1599220335483551, "learning_rate": 0.002, "loss": 2.5745, "step": 225160 }, { "epoch": 0.4485887096774194, "grad_norm": 0.1879754364490509, "learning_rate": 0.002, "loss": 2.562, "step": 225170 }, { "epoch": 0.44860863190105826, "grad_norm": 0.14076438546180725, "learning_rate": 0.002, "loss": 2.5562, "step": 225180 }, { "epoch": 0.4486285541246972, "grad_norm": 0.17030832171440125, "learning_rate": 0.002, "loss": 2.5613, "step": 225190 }, { "epoch": 0.4486484763483361, "grad_norm": 0.16726632416248322, "learning_rate": 0.002, "loss": 2.5762, "step": 225200 }, { "epoch": 0.448668398571975, "grad_norm": 0.16115818917751312, "learning_rate": 0.002, "loss": 2.5587, "step": 225210 }, { "epoch": 0.44868832079561394, "grad_norm": 0.14859096705913544, "learning_rate": 0.002, "loss": 2.5671, "step": 225220 }, { "epoch": 0.44870824301925283, "grad_norm": 0.1739731878042221, "learning_rate": 0.002, "loss": 2.5557, "step": 225230 }, { "epoch": 0.4487281652428918, "grad_norm": 0.1721387505531311, "learning_rate": 0.002, "loss": 2.5433, "step": 225240 }, { "epoch": 0.44874808746653067, "grad_norm": 0.1594986915588379, "learning_rate": 0.002, "loss": 2.5538, "step": 225250 }, { "epoch": 0.44876800969016956, "grad_norm": 0.19573023915290833, "learning_rate": 0.002, "loss": 2.5619, "step": 225260 }, { "epoch": 0.4487879319138085, "grad_norm": 0.17902928590774536, "learning_rate": 0.002, "loss": 2.5656, "step": 225270 }, { "epoch": 0.4488078541374474, "grad_norm": 0.1988459825515747, "learning_rate": 0.002, "loss": 2.5635, "step": 225280 }, { "epoch": 0.44882777636108634, "grad_norm": 0.17319634556770325, "learning_rate": 0.002, "loss": 2.566, "step": 225290 }, { "epoch": 0.44884769858472523, "grad_norm": 0.13945372402668, "learning_rate": 0.002, "loss": 2.5621, "step": 225300 }, { "epoch": 0.4488676208083641, "grad_norm": 0.21065640449523926, "learning_rate": 0.002, "loss": 2.5578, "step": 225310 }, { "epoch": 0.44888754303200307, "grad_norm": 0.15751682221889496, "learning_rate": 0.002, "loss": 2.5798, "step": 225320 }, { "epoch": 0.44890746525564196, "grad_norm": 0.14435811340808868, "learning_rate": 0.002, "loss": 2.5582, "step": 225330 }, { "epoch": 0.4489273874792809, "grad_norm": 0.20997853577136993, "learning_rate": 0.002, "loss": 2.562, "step": 225340 }, { "epoch": 0.4489473097029198, "grad_norm": 0.16041098535060883, "learning_rate": 0.002, "loss": 2.5557, "step": 225350 }, { "epoch": 0.44896723192655874, "grad_norm": 0.15715935826301575, "learning_rate": 0.002, "loss": 2.559, "step": 225360 }, { "epoch": 0.44898715415019763, "grad_norm": 0.1584533005952835, "learning_rate": 0.002, "loss": 2.5577, "step": 225370 }, { "epoch": 0.4490070763738365, "grad_norm": 0.17705869674682617, "learning_rate": 0.002, "loss": 2.5579, "step": 225380 }, { "epoch": 0.44902699859747547, "grad_norm": 0.17990420758724213, "learning_rate": 0.002, "loss": 2.5385, "step": 225390 }, { "epoch": 0.44904692082111436, "grad_norm": 0.1618778109550476, "learning_rate": 0.002, "loss": 2.5568, "step": 225400 }, { "epoch": 0.4490668430447533, "grad_norm": 0.15045082569122314, "learning_rate": 0.002, "loss": 2.5618, "step": 225410 }, { "epoch": 0.4490867652683922, "grad_norm": 0.15613900125026703, "learning_rate": 0.002, "loss": 2.5681, "step": 225420 }, { "epoch": 0.4491066874920311, "grad_norm": 0.19795285165309906, "learning_rate": 0.002, "loss": 2.5556, "step": 225430 }, { "epoch": 0.44912660971567003, "grad_norm": 0.16465625166893005, "learning_rate": 0.002, "loss": 2.5395, "step": 225440 }, { "epoch": 0.4491465319393089, "grad_norm": 0.17963172495365143, "learning_rate": 0.002, "loss": 2.5643, "step": 225450 }, { "epoch": 0.44916645416294787, "grad_norm": 0.18170298635959625, "learning_rate": 0.002, "loss": 2.5651, "step": 225460 }, { "epoch": 0.44918637638658676, "grad_norm": 0.1651114523410797, "learning_rate": 0.002, "loss": 2.5538, "step": 225470 }, { "epoch": 0.44920629861022565, "grad_norm": 0.1371077299118042, "learning_rate": 0.002, "loss": 2.5664, "step": 225480 }, { "epoch": 0.4492262208338646, "grad_norm": 0.16477195918560028, "learning_rate": 0.002, "loss": 2.5551, "step": 225490 }, { "epoch": 0.4492461430575035, "grad_norm": 0.16035892069339752, "learning_rate": 0.002, "loss": 2.5463, "step": 225500 }, { "epoch": 0.44926606528114243, "grad_norm": 0.13174808025360107, "learning_rate": 0.002, "loss": 2.561, "step": 225510 }, { "epoch": 0.4492859875047813, "grad_norm": 0.1703098565340042, "learning_rate": 0.002, "loss": 2.5584, "step": 225520 }, { "epoch": 0.44930590972842027, "grad_norm": 0.15180940926074982, "learning_rate": 0.002, "loss": 2.5668, "step": 225530 }, { "epoch": 0.44932583195205916, "grad_norm": 0.17785139381885529, "learning_rate": 0.002, "loss": 2.5605, "step": 225540 }, { "epoch": 0.44934575417569805, "grad_norm": 0.17376428842544556, "learning_rate": 0.002, "loss": 2.5567, "step": 225550 }, { "epoch": 0.449365676399337, "grad_norm": 0.1599377989768982, "learning_rate": 0.002, "loss": 2.5544, "step": 225560 }, { "epoch": 0.4493855986229759, "grad_norm": 0.13915197551250458, "learning_rate": 0.002, "loss": 2.5659, "step": 225570 }, { "epoch": 0.44940552084661484, "grad_norm": 0.17280052602291107, "learning_rate": 0.002, "loss": 2.5575, "step": 225580 }, { "epoch": 0.4494254430702537, "grad_norm": 0.15364061295986176, "learning_rate": 0.002, "loss": 2.5497, "step": 225590 }, { "epoch": 0.4494453652938926, "grad_norm": 0.18003273010253906, "learning_rate": 0.002, "loss": 2.5694, "step": 225600 }, { "epoch": 0.44946528751753156, "grad_norm": 0.169560506939888, "learning_rate": 0.002, "loss": 2.5574, "step": 225610 }, { "epoch": 0.44948520974117046, "grad_norm": 0.1943410336971283, "learning_rate": 0.002, "loss": 2.548, "step": 225620 }, { "epoch": 0.4495051319648094, "grad_norm": 0.16895873844623566, "learning_rate": 0.002, "loss": 2.5465, "step": 225630 }, { "epoch": 0.4495250541884483, "grad_norm": 0.16577814519405365, "learning_rate": 0.002, "loss": 2.5645, "step": 225640 }, { "epoch": 0.44954497641208724, "grad_norm": 0.16899070143699646, "learning_rate": 0.002, "loss": 2.5624, "step": 225650 }, { "epoch": 0.44956489863572613, "grad_norm": 0.18657809495925903, "learning_rate": 0.002, "loss": 2.5697, "step": 225660 }, { "epoch": 0.449584820859365, "grad_norm": 0.1600387990474701, "learning_rate": 0.002, "loss": 2.5668, "step": 225670 }, { "epoch": 0.44960474308300397, "grad_norm": 0.18585637211799622, "learning_rate": 0.002, "loss": 2.5478, "step": 225680 }, { "epoch": 0.44962466530664286, "grad_norm": 0.20154157280921936, "learning_rate": 0.002, "loss": 2.5615, "step": 225690 }, { "epoch": 0.4496445875302818, "grad_norm": 0.16400787234306335, "learning_rate": 0.002, "loss": 2.5481, "step": 225700 }, { "epoch": 0.4496645097539207, "grad_norm": 0.17525695264339447, "learning_rate": 0.002, "loss": 2.5587, "step": 225710 }, { "epoch": 0.4496844319775596, "grad_norm": 0.166269451379776, "learning_rate": 0.002, "loss": 2.586, "step": 225720 }, { "epoch": 0.44970435420119853, "grad_norm": 0.15632788836956024, "learning_rate": 0.002, "loss": 2.5423, "step": 225730 }, { "epoch": 0.4497242764248374, "grad_norm": 0.16686218976974487, "learning_rate": 0.002, "loss": 2.5696, "step": 225740 }, { "epoch": 0.44974419864847637, "grad_norm": 0.14549119770526886, "learning_rate": 0.002, "loss": 2.5483, "step": 225750 }, { "epoch": 0.44976412087211526, "grad_norm": 0.17803075909614563, "learning_rate": 0.002, "loss": 2.5438, "step": 225760 }, { "epoch": 0.44978404309575415, "grad_norm": 0.16485758125782013, "learning_rate": 0.002, "loss": 2.5594, "step": 225770 }, { "epoch": 0.4498039653193931, "grad_norm": 0.14121142029762268, "learning_rate": 0.002, "loss": 2.5458, "step": 225780 }, { "epoch": 0.449823887543032, "grad_norm": 0.18524780869483948, "learning_rate": 0.002, "loss": 2.5696, "step": 225790 }, { "epoch": 0.44984380976667093, "grad_norm": 0.1870611160993576, "learning_rate": 0.002, "loss": 2.5334, "step": 225800 }, { "epoch": 0.4498637319903098, "grad_norm": 0.19961276650428772, "learning_rate": 0.002, "loss": 2.5389, "step": 225810 }, { "epoch": 0.44988365421394877, "grad_norm": 0.17863982915878296, "learning_rate": 0.002, "loss": 2.554, "step": 225820 }, { "epoch": 0.44990357643758766, "grad_norm": 0.16092485189437866, "learning_rate": 0.002, "loss": 2.5638, "step": 225830 }, { "epoch": 0.44992349866122655, "grad_norm": 0.16796688735485077, "learning_rate": 0.002, "loss": 2.5626, "step": 225840 }, { "epoch": 0.4499434208848655, "grad_norm": 0.14285220205783844, "learning_rate": 0.002, "loss": 2.5573, "step": 225850 }, { "epoch": 0.4499633431085044, "grad_norm": 0.20398157835006714, "learning_rate": 0.002, "loss": 2.5664, "step": 225860 }, { "epoch": 0.44998326533214333, "grad_norm": 0.160657599568367, "learning_rate": 0.002, "loss": 2.5615, "step": 225870 }, { "epoch": 0.4500031875557822, "grad_norm": 0.1715528964996338, "learning_rate": 0.002, "loss": 2.5518, "step": 225880 }, { "epoch": 0.4500231097794211, "grad_norm": 0.15507963299751282, "learning_rate": 0.002, "loss": 2.5719, "step": 225890 }, { "epoch": 0.45004303200306006, "grad_norm": 0.17121073603630066, "learning_rate": 0.002, "loss": 2.567, "step": 225900 }, { "epoch": 0.45006295422669895, "grad_norm": 0.18314345180988312, "learning_rate": 0.002, "loss": 2.5524, "step": 225910 }, { "epoch": 0.4500828764503379, "grad_norm": 0.17613491415977478, "learning_rate": 0.002, "loss": 2.5361, "step": 225920 }, { "epoch": 0.4501027986739768, "grad_norm": 0.18458908796310425, "learning_rate": 0.002, "loss": 2.5519, "step": 225930 }, { "epoch": 0.45012272089761574, "grad_norm": 0.16475661098957062, "learning_rate": 0.002, "loss": 2.5636, "step": 225940 }, { "epoch": 0.4501426431212546, "grad_norm": 0.14705932140350342, "learning_rate": 0.002, "loss": 2.5707, "step": 225950 }, { "epoch": 0.4501625653448935, "grad_norm": 0.19745641946792603, "learning_rate": 0.002, "loss": 2.5741, "step": 225960 }, { "epoch": 0.45018248756853246, "grad_norm": 0.15659639239311218, "learning_rate": 0.002, "loss": 2.5511, "step": 225970 }, { "epoch": 0.45020240979217135, "grad_norm": 0.15568552911281586, "learning_rate": 0.002, "loss": 2.5569, "step": 225980 }, { "epoch": 0.4502223320158103, "grad_norm": 0.16887353360652924, "learning_rate": 0.002, "loss": 2.5553, "step": 225990 }, { "epoch": 0.4502422542394492, "grad_norm": 0.16867931187152863, "learning_rate": 0.002, "loss": 2.5452, "step": 226000 }, { "epoch": 0.4502621764630881, "grad_norm": 0.1659322828054428, "learning_rate": 0.002, "loss": 2.5626, "step": 226010 }, { "epoch": 0.45028209868672703, "grad_norm": 0.20506340265274048, "learning_rate": 0.002, "loss": 2.5607, "step": 226020 }, { "epoch": 0.4503020209103659, "grad_norm": 0.1876775324344635, "learning_rate": 0.002, "loss": 2.5679, "step": 226030 }, { "epoch": 0.45032194313400487, "grad_norm": 0.16665104031562805, "learning_rate": 0.002, "loss": 2.5661, "step": 226040 }, { "epoch": 0.45034186535764376, "grad_norm": 0.15449920296669006, "learning_rate": 0.002, "loss": 2.5654, "step": 226050 }, { "epoch": 0.45036178758128265, "grad_norm": 0.16556213796138763, "learning_rate": 0.002, "loss": 2.5508, "step": 226060 }, { "epoch": 0.4503817098049216, "grad_norm": 0.1446525603532791, "learning_rate": 0.002, "loss": 2.5653, "step": 226070 }, { "epoch": 0.4504016320285605, "grad_norm": 0.17828485369682312, "learning_rate": 0.002, "loss": 2.5722, "step": 226080 }, { "epoch": 0.45042155425219943, "grad_norm": 0.1863417774438858, "learning_rate": 0.002, "loss": 2.5598, "step": 226090 }, { "epoch": 0.4504414764758383, "grad_norm": 0.1626480519771576, "learning_rate": 0.002, "loss": 2.5584, "step": 226100 }, { "epoch": 0.45046139869947727, "grad_norm": 0.14901579916477203, "learning_rate": 0.002, "loss": 2.5532, "step": 226110 }, { "epoch": 0.45048132092311616, "grad_norm": 0.16110587120056152, "learning_rate": 0.002, "loss": 2.5493, "step": 226120 }, { "epoch": 0.45050124314675505, "grad_norm": 0.19798292219638824, "learning_rate": 0.002, "loss": 2.557, "step": 226130 }, { "epoch": 0.450521165370394, "grad_norm": 0.2365497499704361, "learning_rate": 0.002, "loss": 2.5445, "step": 226140 }, { "epoch": 0.4505410875940329, "grad_norm": 0.160679891705513, "learning_rate": 0.002, "loss": 2.5498, "step": 226150 }, { "epoch": 0.45056100981767183, "grad_norm": 0.20049989223480225, "learning_rate": 0.002, "loss": 2.5502, "step": 226160 }, { "epoch": 0.4505809320413107, "grad_norm": 0.14180836081504822, "learning_rate": 0.002, "loss": 2.5562, "step": 226170 }, { "epoch": 0.4506008542649496, "grad_norm": 0.168339803814888, "learning_rate": 0.002, "loss": 2.5626, "step": 226180 }, { "epoch": 0.45062077648858856, "grad_norm": 0.16946612298488617, "learning_rate": 0.002, "loss": 2.5718, "step": 226190 }, { "epoch": 0.45064069871222745, "grad_norm": 0.15856802463531494, "learning_rate": 0.002, "loss": 2.5567, "step": 226200 }, { "epoch": 0.4506606209358664, "grad_norm": 0.18556061387062073, "learning_rate": 0.002, "loss": 2.5613, "step": 226210 }, { "epoch": 0.4506805431595053, "grad_norm": 0.18084897100925446, "learning_rate": 0.002, "loss": 2.5646, "step": 226220 }, { "epoch": 0.4507004653831442, "grad_norm": 0.14201462268829346, "learning_rate": 0.002, "loss": 2.5664, "step": 226230 }, { "epoch": 0.4507203876067831, "grad_norm": 0.1648225039243698, "learning_rate": 0.002, "loss": 2.5519, "step": 226240 }, { "epoch": 0.450740309830422, "grad_norm": 0.19046251475811005, "learning_rate": 0.002, "loss": 2.5541, "step": 226250 }, { "epoch": 0.45076023205406096, "grad_norm": 0.22245241701602936, "learning_rate": 0.002, "loss": 2.5525, "step": 226260 }, { "epoch": 0.45078015427769985, "grad_norm": 0.17440928518772125, "learning_rate": 0.002, "loss": 2.5583, "step": 226270 }, { "epoch": 0.4508000765013388, "grad_norm": 0.18444398045539856, "learning_rate": 0.002, "loss": 2.5459, "step": 226280 }, { "epoch": 0.4508199987249777, "grad_norm": 0.19192536175251007, "learning_rate": 0.002, "loss": 2.5566, "step": 226290 }, { "epoch": 0.4508399209486166, "grad_norm": 0.17485365271568298, "learning_rate": 0.002, "loss": 2.5518, "step": 226300 }, { "epoch": 0.4508598431722555, "grad_norm": 0.17091375589370728, "learning_rate": 0.002, "loss": 2.5751, "step": 226310 }, { "epoch": 0.4508797653958944, "grad_norm": 0.5718966126441956, "learning_rate": 0.002, "loss": 2.5506, "step": 226320 }, { "epoch": 0.45089968761953336, "grad_norm": 0.18632301688194275, "learning_rate": 0.002, "loss": 2.5657, "step": 226330 }, { "epoch": 0.45091960984317225, "grad_norm": 0.20956209301948547, "learning_rate": 0.002, "loss": 2.5676, "step": 226340 }, { "epoch": 0.45093953206681114, "grad_norm": 0.1868869811296463, "learning_rate": 0.002, "loss": 2.561, "step": 226350 }, { "epoch": 0.4509594542904501, "grad_norm": 0.1694122552871704, "learning_rate": 0.002, "loss": 2.5482, "step": 226360 }, { "epoch": 0.450979376514089, "grad_norm": 0.20282253623008728, "learning_rate": 0.002, "loss": 2.5585, "step": 226370 }, { "epoch": 0.4509992987377279, "grad_norm": 0.14594478905200958, "learning_rate": 0.002, "loss": 2.5617, "step": 226380 }, { "epoch": 0.4510192209613668, "grad_norm": 0.1602514237165451, "learning_rate": 0.002, "loss": 2.5485, "step": 226390 }, { "epoch": 0.45103914318500576, "grad_norm": 0.19944381713867188, "learning_rate": 0.002, "loss": 2.554, "step": 226400 }, { "epoch": 0.45105906540864465, "grad_norm": 0.16798695921897888, "learning_rate": 0.002, "loss": 2.5529, "step": 226410 }, { "epoch": 0.45107898763228355, "grad_norm": 0.20128898322582245, "learning_rate": 0.002, "loss": 2.5671, "step": 226420 }, { "epoch": 0.4510989098559225, "grad_norm": 0.1480359584093094, "learning_rate": 0.002, "loss": 2.5734, "step": 226430 }, { "epoch": 0.4511188320795614, "grad_norm": 0.14388875663280487, "learning_rate": 0.002, "loss": 2.5485, "step": 226440 }, { "epoch": 0.45113875430320033, "grad_norm": 0.16816067695617676, "learning_rate": 0.002, "loss": 2.5581, "step": 226450 }, { "epoch": 0.4511586765268392, "grad_norm": 0.17594948410987854, "learning_rate": 0.002, "loss": 2.5517, "step": 226460 }, { "epoch": 0.4511785987504781, "grad_norm": 0.14751338958740234, "learning_rate": 0.002, "loss": 2.5642, "step": 226470 }, { "epoch": 0.45119852097411706, "grad_norm": 0.20196591317653656, "learning_rate": 0.002, "loss": 2.5754, "step": 226480 }, { "epoch": 0.45121844319775595, "grad_norm": 0.16049295663833618, "learning_rate": 0.002, "loss": 2.561, "step": 226490 }, { "epoch": 0.4512383654213949, "grad_norm": 0.15617330372333527, "learning_rate": 0.002, "loss": 2.5665, "step": 226500 }, { "epoch": 0.4512582876450338, "grad_norm": 0.16877076029777527, "learning_rate": 0.002, "loss": 2.5604, "step": 226510 }, { "epoch": 0.4512782098686727, "grad_norm": 0.1877320408821106, "learning_rate": 0.002, "loss": 2.5552, "step": 226520 }, { "epoch": 0.4512981320923116, "grad_norm": 0.16308848559856415, "learning_rate": 0.002, "loss": 2.5506, "step": 226530 }, { "epoch": 0.4513180543159505, "grad_norm": 0.16377077996730804, "learning_rate": 0.002, "loss": 2.5742, "step": 226540 }, { "epoch": 0.45133797653958946, "grad_norm": 0.17971760034561157, "learning_rate": 0.002, "loss": 2.5627, "step": 226550 }, { "epoch": 0.45135789876322835, "grad_norm": 0.17102926969528198, "learning_rate": 0.002, "loss": 2.5441, "step": 226560 }, { "epoch": 0.4513778209868673, "grad_norm": 0.14615583419799805, "learning_rate": 0.002, "loss": 2.5484, "step": 226570 }, { "epoch": 0.4513977432105062, "grad_norm": 0.17635919153690338, "learning_rate": 0.002, "loss": 2.5604, "step": 226580 }, { "epoch": 0.4514176654341451, "grad_norm": 0.14802105724811554, "learning_rate": 0.002, "loss": 2.5367, "step": 226590 }, { "epoch": 0.451437587657784, "grad_norm": 0.15675358474254608, "learning_rate": 0.002, "loss": 2.5529, "step": 226600 }, { "epoch": 0.4514575098814229, "grad_norm": 0.17681308090686798, "learning_rate": 0.002, "loss": 2.5465, "step": 226610 }, { "epoch": 0.45147743210506186, "grad_norm": 0.19354157149791718, "learning_rate": 0.002, "loss": 2.5618, "step": 226620 }, { "epoch": 0.45149735432870075, "grad_norm": 0.17185433208942413, "learning_rate": 0.002, "loss": 2.5654, "step": 226630 }, { "epoch": 0.45151727655233964, "grad_norm": 0.18779180943965912, "learning_rate": 0.002, "loss": 2.5489, "step": 226640 }, { "epoch": 0.4515371987759786, "grad_norm": 0.15494735538959503, "learning_rate": 0.002, "loss": 2.5538, "step": 226650 }, { "epoch": 0.4515571209996175, "grad_norm": 0.1698269098997116, "learning_rate": 0.002, "loss": 2.5587, "step": 226660 }, { "epoch": 0.4515770432232564, "grad_norm": 0.20589925348758698, "learning_rate": 0.002, "loss": 2.565, "step": 226670 }, { "epoch": 0.4515969654468953, "grad_norm": 0.13909925520420074, "learning_rate": 0.002, "loss": 2.5628, "step": 226680 }, { "epoch": 0.45161688767053426, "grad_norm": 0.15700116753578186, "learning_rate": 0.002, "loss": 2.5636, "step": 226690 }, { "epoch": 0.45163680989417315, "grad_norm": 0.19303911924362183, "learning_rate": 0.002, "loss": 2.5552, "step": 226700 }, { "epoch": 0.45165673211781204, "grad_norm": 0.1804160326719284, "learning_rate": 0.002, "loss": 2.5619, "step": 226710 }, { "epoch": 0.451676654341451, "grad_norm": 0.17432385683059692, "learning_rate": 0.002, "loss": 2.5619, "step": 226720 }, { "epoch": 0.4516965765650899, "grad_norm": 0.18424957990646362, "learning_rate": 0.002, "loss": 2.5584, "step": 226730 }, { "epoch": 0.4517164987887288, "grad_norm": 0.16422152519226074, "learning_rate": 0.002, "loss": 2.5404, "step": 226740 }, { "epoch": 0.4517364210123677, "grad_norm": 0.16539941728115082, "learning_rate": 0.002, "loss": 2.5561, "step": 226750 }, { "epoch": 0.4517563432360066, "grad_norm": 0.1753484010696411, "learning_rate": 0.002, "loss": 2.5436, "step": 226760 }, { "epoch": 0.45177626545964555, "grad_norm": 0.2191738784313202, "learning_rate": 0.002, "loss": 2.5697, "step": 226770 }, { "epoch": 0.45179618768328444, "grad_norm": 0.1825713813304901, "learning_rate": 0.002, "loss": 2.5673, "step": 226780 }, { "epoch": 0.4518161099069234, "grad_norm": 0.1840221881866455, "learning_rate": 0.002, "loss": 2.5667, "step": 226790 }, { "epoch": 0.4518360321305623, "grad_norm": 0.16794373095035553, "learning_rate": 0.002, "loss": 2.5593, "step": 226800 }, { "epoch": 0.4518559543542012, "grad_norm": 0.15632154047489166, "learning_rate": 0.002, "loss": 2.5603, "step": 226810 }, { "epoch": 0.4518758765778401, "grad_norm": 0.1647825390100479, "learning_rate": 0.002, "loss": 2.557, "step": 226820 }, { "epoch": 0.451895798801479, "grad_norm": 0.16453291475772858, "learning_rate": 0.002, "loss": 2.5572, "step": 226830 }, { "epoch": 0.45191572102511796, "grad_norm": 0.19082584977149963, "learning_rate": 0.002, "loss": 2.5538, "step": 226840 }, { "epoch": 0.45193564324875685, "grad_norm": 0.1589684784412384, "learning_rate": 0.002, "loss": 2.5563, "step": 226850 }, { "epoch": 0.4519555654723958, "grad_norm": 0.19893710315227509, "learning_rate": 0.002, "loss": 2.5483, "step": 226860 }, { "epoch": 0.4519754876960347, "grad_norm": 0.16619160771369934, "learning_rate": 0.002, "loss": 2.555, "step": 226870 }, { "epoch": 0.4519954099196736, "grad_norm": 0.17527198791503906, "learning_rate": 0.002, "loss": 2.5759, "step": 226880 }, { "epoch": 0.4520153321433125, "grad_norm": 0.1933232992887497, "learning_rate": 0.002, "loss": 2.5567, "step": 226890 }, { "epoch": 0.4520352543669514, "grad_norm": 0.17785528302192688, "learning_rate": 0.002, "loss": 2.5723, "step": 226900 }, { "epoch": 0.45205517659059036, "grad_norm": 0.1551325023174286, "learning_rate": 0.002, "loss": 2.5588, "step": 226910 }, { "epoch": 0.45207509881422925, "grad_norm": 0.1830768585205078, "learning_rate": 0.002, "loss": 2.5686, "step": 226920 }, { "epoch": 0.45209502103786814, "grad_norm": 0.18261858820915222, "learning_rate": 0.002, "loss": 2.5635, "step": 226930 }, { "epoch": 0.4521149432615071, "grad_norm": 0.16057930886745453, "learning_rate": 0.002, "loss": 2.5661, "step": 226940 }, { "epoch": 0.452134865485146, "grad_norm": 0.16664183139801025, "learning_rate": 0.002, "loss": 2.5471, "step": 226950 }, { "epoch": 0.4521547877087849, "grad_norm": 0.176553875207901, "learning_rate": 0.002, "loss": 2.5487, "step": 226960 }, { "epoch": 0.4521747099324238, "grad_norm": 0.15605609118938446, "learning_rate": 0.002, "loss": 2.5684, "step": 226970 }, { "epoch": 0.4521946321560627, "grad_norm": 0.18340301513671875, "learning_rate": 0.002, "loss": 2.5626, "step": 226980 }, { "epoch": 0.45221455437970165, "grad_norm": 0.16661612689495087, "learning_rate": 0.002, "loss": 2.5386, "step": 226990 }, { "epoch": 0.45223447660334054, "grad_norm": 0.19269950687885284, "learning_rate": 0.002, "loss": 2.5615, "step": 227000 }, { "epoch": 0.4522543988269795, "grad_norm": 0.1705121099948883, "learning_rate": 0.002, "loss": 2.5433, "step": 227010 }, { "epoch": 0.4522743210506184, "grad_norm": 0.1703387349843979, "learning_rate": 0.002, "loss": 2.5597, "step": 227020 }, { "epoch": 0.4522942432742573, "grad_norm": 0.1706511676311493, "learning_rate": 0.002, "loss": 2.5415, "step": 227030 }, { "epoch": 0.4523141654978962, "grad_norm": 0.15494585037231445, "learning_rate": 0.002, "loss": 2.55, "step": 227040 }, { "epoch": 0.4523340877215351, "grad_norm": 0.20117715001106262, "learning_rate": 0.002, "loss": 2.5486, "step": 227050 }, { "epoch": 0.45235400994517405, "grad_norm": 0.15758880972862244, "learning_rate": 0.002, "loss": 2.5546, "step": 227060 }, { "epoch": 0.45237393216881294, "grad_norm": 0.21603776514530182, "learning_rate": 0.002, "loss": 2.5619, "step": 227070 }, { "epoch": 0.4523938543924519, "grad_norm": 0.16009725630283356, "learning_rate": 0.002, "loss": 2.5626, "step": 227080 }, { "epoch": 0.4524137766160908, "grad_norm": 0.18571999669075012, "learning_rate": 0.002, "loss": 2.5595, "step": 227090 }, { "epoch": 0.45243369883972967, "grad_norm": 0.1408826857805252, "learning_rate": 0.002, "loss": 2.5609, "step": 227100 }, { "epoch": 0.4524536210633686, "grad_norm": 0.17377018928527832, "learning_rate": 0.002, "loss": 2.5584, "step": 227110 }, { "epoch": 0.4524735432870075, "grad_norm": 0.16734236478805542, "learning_rate": 0.002, "loss": 2.5454, "step": 227120 }, { "epoch": 0.45249346551064645, "grad_norm": 0.15346798300743103, "learning_rate": 0.002, "loss": 2.548, "step": 227130 }, { "epoch": 0.45251338773428534, "grad_norm": 0.2010737657546997, "learning_rate": 0.002, "loss": 2.5622, "step": 227140 }, { "epoch": 0.4525333099579243, "grad_norm": 0.1579119712114334, "learning_rate": 0.002, "loss": 2.5694, "step": 227150 }, { "epoch": 0.4525532321815632, "grad_norm": 0.2120119333267212, "learning_rate": 0.002, "loss": 2.5508, "step": 227160 }, { "epoch": 0.45257315440520207, "grad_norm": 0.1657043844461441, "learning_rate": 0.002, "loss": 2.5565, "step": 227170 }, { "epoch": 0.452593076628841, "grad_norm": 0.19178134202957153, "learning_rate": 0.002, "loss": 2.5569, "step": 227180 }, { "epoch": 0.4526129988524799, "grad_norm": 0.15424402058124542, "learning_rate": 0.002, "loss": 2.5619, "step": 227190 }, { "epoch": 0.45263292107611885, "grad_norm": 0.17426691949367523, "learning_rate": 0.002, "loss": 2.567, "step": 227200 }, { "epoch": 0.45265284329975775, "grad_norm": 0.14364948868751526, "learning_rate": 0.002, "loss": 2.5543, "step": 227210 }, { "epoch": 0.45267276552339664, "grad_norm": 0.20126591622829437, "learning_rate": 0.002, "loss": 2.5656, "step": 227220 }, { "epoch": 0.4526926877470356, "grad_norm": 0.18261133134365082, "learning_rate": 0.002, "loss": 2.568, "step": 227230 }, { "epoch": 0.4527126099706745, "grad_norm": 0.18845705687999725, "learning_rate": 0.002, "loss": 2.551, "step": 227240 }, { "epoch": 0.4527325321943134, "grad_norm": 0.17486213147640228, "learning_rate": 0.002, "loss": 2.558, "step": 227250 }, { "epoch": 0.4527524544179523, "grad_norm": 0.18188929557800293, "learning_rate": 0.002, "loss": 2.5459, "step": 227260 }, { "epoch": 0.4527723766415912, "grad_norm": 0.16005174815654755, "learning_rate": 0.002, "loss": 2.5643, "step": 227270 }, { "epoch": 0.45279229886523015, "grad_norm": 0.19595399498939514, "learning_rate": 0.002, "loss": 2.5594, "step": 227280 }, { "epoch": 0.45281222108886904, "grad_norm": 0.16624733805656433, "learning_rate": 0.002, "loss": 2.5491, "step": 227290 }, { "epoch": 0.452832143312508, "grad_norm": 0.17464517056941986, "learning_rate": 0.002, "loss": 2.5513, "step": 227300 }, { "epoch": 0.4528520655361469, "grad_norm": 0.17902415990829468, "learning_rate": 0.002, "loss": 2.5654, "step": 227310 }, { "epoch": 0.4528719877597858, "grad_norm": 0.16778576374053955, "learning_rate": 0.002, "loss": 2.551, "step": 227320 }, { "epoch": 0.4528919099834247, "grad_norm": 0.15061303973197937, "learning_rate": 0.002, "loss": 2.545, "step": 227330 }, { "epoch": 0.4529118322070636, "grad_norm": 0.163257896900177, "learning_rate": 0.002, "loss": 2.5452, "step": 227340 }, { "epoch": 0.45293175443070255, "grad_norm": 0.1631697118282318, "learning_rate": 0.002, "loss": 2.5556, "step": 227350 }, { "epoch": 0.45295167665434144, "grad_norm": 0.1916850060224533, "learning_rate": 0.002, "loss": 2.5436, "step": 227360 }, { "epoch": 0.4529715988779804, "grad_norm": 0.17501874268054962, "learning_rate": 0.002, "loss": 2.5622, "step": 227370 }, { "epoch": 0.4529915211016193, "grad_norm": 0.16084828972816467, "learning_rate": 0.002, "loss": 2.548, "step": 227380 }, { "epoch": 0.45301144332525817, "grad_norm": 0.16112856566905975, "learning_rate": 0.002, "loss": 2.5605, "step": 227390 }, { "epoch": 0.4530313655488971, "grad_norm": 0.16737499833106995, "learning_rate": 0.002, "loss": 2.5685, "step": 227400 }, { "epoch": 0.453051287772536, "grad_norm": 0.20389799773693085, "learning_rate": 0.002, "loss": 2.5413, "step": 227410 }, { "epoch": 0.45307120999617495, "grad_norm": 0.16810579597949982, "learning_rate": 0.002, "loss": 2.5616, "step": 227420 }, { "epoch": 0.45309113221981384, "grad_norm": 0.15608812868595123, "learning_rate": 0.002, "loss": 2.5451, "step": 227430 }, { "epoch": 0.4531110544434528, "grad_norm": 0.1754792332649231, "learning_rate": 0.002, "loss": 2.5505, "step": 227440 }, { "epoch": 0.4531309766670917, "grad_norm": 0.20703807473182678, "learning_rate": 0.002, "loss": 2.552, "step": 227450 }, { "epoch": 0.45315089889073057, "grad_norm": 0.18979841470718384, "learning_rate": 0.002, "loss": 2.5584, "step": 227460 }, { "epoch": 0.4531708211143695, "grad_norm": 0.18278196454048157, "learning_rate": 0.002, "loss": 2.5628, "step": 227470 }, { "epoch": 0.4531907433380084, "grad_norm": 0.18018493056297302, "learning_rate": 0.002, "loss": 2.5759, "step": 227480 }, { "epoch": 0.45321066556164735, "grad_norm": 0.19330105185508728, "learning_rate": 0.002, "loss": 2.5472, "step": 227490 }, { "epoch": 0.45323058778528624, "grad_norm": 0.17007623612880707, "learning_rate": 0.002, "loss": 2.5566, "step": 227500 }, { "epoch": 0.45325051000892513, "grad_norm": 0.16275052726268768, "learning_rate": 0.002, "loss": 2.5715, "step": 227510 }, { "epoch": 0.4532704322325641, "grad_norm": 0.2072816789150238, "learning_rate": 0.002, "loss": 2.5572, "step": 227520 }, { "epoch": 0.45329035445620297, "grad_norm": 0.17502273619174957, "learning_rate": 0.002, "loss": 2.5616, "step": 227530 }, { "epoch": 0.4533102766798419, "grad_norm": 0.16136789321899414, "learning_rate": 0.002, "loss": 2.5503, "step": 227540 }, { "epoch": 0.4533301989034808, "grad_norm": 0.17171619832515717, "learning_rate": 0.002, "loss": 2.5501, "step": 227550 }, { "epoch": 0.4533501211271197, "grad_norm": 0.1542573869228363, "learning_rate": 0.002, "loss": 2.5595, "step": 227560 }, { "epoch": 0.45337004335075864, "grad_norm": 0.15395911037921906, "learning_rate": 0.002, "loss": 2.5425, "step": 227570 }, { "epoch": 0.45338996557439754, "grad_norm": 0.18551838397979736, "learning_rate": 0.002, "loss": 2.5504, "step": 227580 }, { "epoch": 0.4534098877980365, "grad_norm": 0.16731303930282593, "learning_rate": 0.002, "loss": 2.5691, "step": 227590 }, { "epoch": 0.4534298100216754, "grad_norm": 0.182424396276474, "learning_rate": 0.002, "loss": 2.5379, "step": 227600 }, { "epoch": 0.4534497322453143, "grad_norm": 0.16471688449382782, "learning_rate": 0.002, "loss": 2.5644, "step": 227610 }, { "epoch": 0.4534696544689532, "grad_norm": 0.16360822319984436, "learning_rate": 0.002, "loss": 2.5524, "step": 227620 }, { "epoch": 0.4534895766925921, "grad_norm": 0.16405899822711945, "learning_rate": 0.002, "loss": 2.5588, "step": 227630 }, { "epoch": 0.45350949891623105, "grad_norm": 0.1517646163702011, "learning_rate": 0.002, "loss": 2.5498, "step": 227640 }, { "epoch": 0.45352942113986994, "grad_norm": 0.18966339528560638, "learning_rate": 0.002, "loss": 2.5668, "step": 227650 }, { "epoch": 0.4535493433635089, "grad_norm": 0.16004414856433868, "learning_rate": 0.002, "loss": 2.5626, "step": 227660 }, { "epoch": 0.4535692655871478, "grad_norm": 0.18366920948028564, "learning_rate": 0.002, "loss": 2.5616, "step": 227670 }, { "epoch": 0.45358918781078666, "grad_norm": 0.16510067880153656, "learning_rate": 0.002, "loss": 2.5717, "step": 227680 }, { "epoch": 0.4536091100344256, "grad_norm": 0.20857183635234833, "learning_rate": 0.002, "loss": 2.5663, "step": 227690 }, { "epoch": 0.4536290322580645, "grad_norm": 0.16484662890434265, "learning_rate": 0.002, "loss": 2.5589, "step": 227700 }, { "epoch": 0.45364895448170345, "grad_norm": 0.1496754139661789, "learning_rate": 0.002, "loss": 2.5676, "step": 227710 }, { "epoch": 0.45366887670534234, "grad_norm": 0.16091087460517883, "learning_rate": 0.002, "loss": 2.5578, "step": 227720 }, { "epoch": 0.45368879892898123, "grad_norm": 0.15649718046188354, "learning_rate": 0.002, "loss": 2.5607, "step": 227730 }, { "epoch": 0.4537087211526202, "grad_norm": 0.18614093959331512, "learning_rate": 0.002, "loss": 2.5685, "step": 227740 }, { "epoch": 0.45372864337625907, "grad_norm": 0.16730551421642303, "learning_rate": 0.002, "loss": 2.5591, "step": 227750 }, { "epoch": 0.453748565599898, "grad_norm": 0.18799938261508942, "learning_rate": 0.002, "loss": 2.5783, "step": 227760 }, { "epoch": 0.4537684878235369, "grad_norm": 0.16128768026828766, "learning_rate": 0.002, "loss": 2.5625, "step": 227770 }, { "epoch": 0.45378841004717585, "grad_norm": 0.1955641210079193, "learning_rate": 0.002, "loss": 2.5502, "step": 227780 }, { "epoch": 0.45380833227081474, "grad_norm": 0.1598878651857376, "learning_rate": 0.002, "loss": 2.5542, "step": 227790 }, { "epoch": 0.45382825449445363, "grad_norm": 0.15849189460277557, "learning_rate": 0.002, "loss": 2.5646, "step": 227800 }, { "epoch": 0.4538481767180926, "grad_norm": 0.16457924246788025, "learning_rate": 0.002, "loss": 2.5454, "step": 227810 }, { "epoch": 0.45386809894173147, "grad_norm": 0.18968302011489868, "learning_rate": 0.002, "loss": 2.5566, "step": 227820 }, { "epoch": 0.4538880211653704, "grad_norm": 0.15098147094249725, "learning_rate": 0.002, "loss": 2.5454, "step": 227830 }, { "epoch": 0.4539079433890093, "grad_norm": 0.17829260230064392, "learning_rate": 0.002, "loss": 2.5546, "step": 227840 }, { "epoch": 0.4539278656126482, "grad_norm": 0.21166254580020905, "learning_rate": 0.002, "loss": 2.5343, "step": 227850 }, { "epoch": 0.45394778783628714, "grad_norm": 0.20579619705677032, "learning_rate": 0.002, "loss": 2.566, "step": 227860 }, { "epoch": 0.45396771005992603, "grad_norm": 0.18565808236598969, "learning_rate": 0.002, "loss": 2.5607, "step": 227870 }, { "epoch": 0.453987632283565, "grad_norm": 0.1681438535451889, "learning_rate": 0.002, "loss": 2.5699, "step": 227880 }, { "epoch": 0.45400755450720387, "grad_norm": 0.1749543994665146, "learning_rate": 0.002, "loss": 2.558, "step": 227890 }, { "epoch": 0.4540274767308428, "grad_norm": 0.15810780227184296, "learning_rate": 0.002, "loss": 2.5599, "step": 227900 }, { "epoch": 0.4540473989544817, "grad_norm": 0.18336547911167145, "learning_rate": 0.002, "loss": 2.5694, "step": 227910 }, { "epoch": 0.4540673211781206, "grad_norm": 0.21083708107471466, "learning_rate": 0.002, "loss": 2.5437, "step": 227920 }, { "epoch": 0.45408724340175954, "grad_norm": 0.15019750595092773, "learning_rate": 0.002, "loss": 2.5646, "step": 227930 }, { "epoch": 0.45410716562539843, "grad_norm": 0.1709241420030594, "learning_rate": 0.002, "loss": 2.5558, "step": 227940 }, { "epoch": 0.4541270878490374, "grad_norm": 0.16824032366275787, "learning_rate": 0.002, "loss": 2.5605, "step": 227950 }, { "epoch": 0.45414701007267627, "grad_norm": 0.17447389662265778, "learning_rate": 0.002, "loss": 2.5386, "step": 227960 }, { "epoch": 0.45416693229631516, "grad_norm": 0.17581984400749207, "learning_rate": 0.002, "loss": 2.5644, "step": 227970 }, { "epoch": 0.4541868545199541, "grad_norm": 0.16279937326908112, "learning_rate": 0.002, "loss": 2.5683, "step": 227980 }, { "epoch": 0.454206776743593, "grad_norm": 0.15328757464885712, "learning_rate": 0.002, "loss": 2.5695, "step": 227990 }, { "epoch": 0.45422669896723195, "grad_norm": 0.24008694291114807, "learning_rate": 0.002, "loss": 2.5481, "step": 228000 }, { "epoch": 0.45424662119087084, "grad_norm": 0.19470316171646118, "learning_rate": 0.002, "loss": 2.5521, "step": 228010 }, { "epoch": 0.4542665434145097, "grad_norm": 0.15638867020606995, "learning_rate": 0.002, "loss": 2.5555, "step": 228020 }, { "epoch": 0.4542864656381487, "grad_norm": 0.16379721462726593, "learning_rate": 0.002, "loss": 2.5531, "step": 228030 }, { "epoch": 0.45430638786178756, "grad_norm": 0.15343020856380463, "learning_rate": 0.002, "loss": 2.5599, "step": 228040 }, { "epoch": 0.4543263100854265, "grad_norm": 0.1425342559814453, "learning_rate": 0.002, "loss": 2.5493, "step": 228050 }, { "epoch": 0.4543462323090654, "grad_norm": 0.17375026643276215, "learning_rate": 0.002, "loss": 2.5476, "step": 228060 }, { "epoch": 0.45436615453270435, "grad_norm": 0.16574612259864807, "learning_rate": 0.002, "loss": 2.5614, "step": 228070 }, { "epoch": 0.45438607675634324, "grad_norm": 0.19657005369663239, "learning_rate": 0.002, "loss": 2.5483, "step": 228080 }, { "epoch": 0.45440599897998213, "grad_norm": 0.18489386141300201, "learning_rate": 0.002, "loss": 2.5747, "step": 228090 }, { "epoch": 0.4544259212036211, "grad_norm": 0.1886364370584488, "learning_rate": 0.002, "loss": 2.5693, "step": 228100 }, { "epoch": 0.45444584342725997, "grad_norm": 0.16696098446846008, "learning_rate": 0.002, "loss": 2.5747, "step": 228110 }, { "epoch": 0.4544657656508989, "grad_norm": 0.1491600126028061, "learning_rate": 0.002, "loss": 2.5637, "step": 228120 }, { "epoch": 0.4544856878745378, "grad_norm": 0.20248471200466156, "learning_rate": 0.002, "loss": 2.5597, "step": 228130 }, { "epoch": 0.4545056100981767, "grad_norm": 0.19522225856781006, "learning_rate": 0.002, "loss": 2.5619, "step": 228140 }, { "epoch": 0.45452553232181564, "grad_norm": 0.16164593398571014, "learning_rate": 0.002, "loss": 2.5659, "step": 228150 }, { "epoch": 0.45454545454545453, "grad_norm": 0.15872503817081451, "learning_rate": 0.002, "loss": 2.5567, "step": 228160 }, { "epoch": 0.4545653767690935, "grad_norm": 0.21425937116146088, "learning_rate": 0.002, "loss": 2.5445, "step": 228170 }, { "epoch": 0.45458529899273237, "grad_norm": 0.18792511522769928, "learning_rate": 0.002, "loss": 2.5488, "step": 228180 }, { "epoch": 0.4546052212163713, "grad_norm": 0.1659630686044693, "learning_rate": 0.002, "loss": 2.5489, "step": 228190 }, { "epoch": 0.4546251434400102, "grad_norm": 0.23572203516960144, "learning_rate": 0.002, "loss": 2.5782, "step": 228200 }, { "epoch": 0.4546450656636491, "grad_norm": 0.1819736361503601, "learning_rate": 0.002, "loss": 2.5596, "step": 228210 }, { "epoch": 0.45466498788728804, "grad_norm": 0.1526617407798767, "learning_rate": 0.002, "loss": 2.5521, "step": 228220 }, { "epoch": 0.45468491011092693, "grad_norm": 0.16559407114982605, "learning_rate": 0.002, "loss": 2.5628, "step": 228230 }, { "epoch": 0.4547048323345659, "grad_norm": 0.15737205743789673, "learning_rate": 0.002, "loss": 2.5554, "step": 228240 }, { "epoch": 0.45472475455820477, "grad_norm": 0.1413901299238205, "learning_rate": 0.002, "loss": 2.5501, "step": 228250 }, { "epoch": 0.45474467678184366, "grad_norm": 0.19314822554588318, "learning_rate": 0.002, "loss": 2.5744, "step": 228260 }, { "epoch": 0.4547645990054826, "grad_norm": 0.16321825981140137, "learning_rate": 0.002, "loss": 2.5672, "step": 228270 }, { "epoch": 0.4547845212291215, "grad_norm": 0.17187123000621796, "learning_rate": 0.002, "loss": 2.5481, "step": 228280 }, { "epoch": 0.45480444345276044, "grad_norm": 0.18274225294589996, "learning_rate": 0.002, "loss": 2.5668, "step": 228290 }, { "epoch": 0.45482436567639933, "grad_norm": 0.16232594847679138, "learning_rate": 0.002, "loss": 2.5483, "step": 228300 }, { "epoch": 0.4548442879000382, "grad_norm": 0.161698117852211, "learning_rate": 0.002, "loss": 2.5712, "step": 228310 }, { "epoch": 0.45486421012367717, "grad_norm": 0.1545773595571518, "learning_rate": 0.002, "loss": 2.5531, "step": 228320 }, { "epoch": 0.45488413234731606, "grad_norm": 0.18024833500385284, "learning_rate": 0.002, "loss": 2.5526, "step": 228330 }, { "epoch": 0.454904054570955, "grad_norm": 0.15442276000976562, "learning_rate": 0.002, "loss": 2.5659, "step": 228340 }, { "epoch": 0.4549239767945939, "grad_norm": 0.14537280797958374, "learning_rate": 0.002, "loss": 2.5514, "step": 228350 }, { "epoch": 0.45494389901823284, "grad_norm": 0.15600858628749847, "learning_rate": 0.002, "loss": 2.5536, "step": 228360 }, { "epoch": 0.45496382124187174, "grad_norm": 0.4875544309616089, "learning_rate": 0.002, "loss": 2.5583, "step": 228370 }, { "epoch": 0.4549837434655106, "grad_norm": 0.16851167380809784, "learning_rate": 0.002, "loss": 2.5564, "step": 228380 }, { "epoch": 0.4550036656891496, "grad_norm": 0.16186712682247162, "learning_rate": 0.002, "loss": 2.5486, "step": 228390 }, { "epoch": 0.45502358791278846, "grad_norm": 0.15841394662857056, "learning_rate": 0.002, "loss": 2.5471, "step": 228400 }, { "epoch": 0.4550435101364274, "grad_norm": 0.16021683812141418, "learning_rate": 0.002, "loss": 2.5625, "step": 228410 }, { "epoch": 0.4550634323600663, "grad_norm": 0.1771075576543808, "learning_rate": 0.002, "loss": 2.5623, "step": 228420 }, { "epoch": 0.4550833545837052, "grad_norm": 0.1351812779903412, "learning_rate": 0.002, "loss": 2.567, "step": 228430 }, { "epoch": 0.45510327680734414, "grad_norm": 0.2131190299987793, "learning_rate": 0.002, "loss": 2.5602, "step": 228440 }, { "epoch": 0.45512319903098303, "grad_norm": 0.15497158467769623, "learning_rate": 0.002, "loss": 2.5707, "step": 228450 }, { "epoch": 0.455143121254622, "grad_norm": 0.1609211266040802, "learning_rate": 0.002, "loss": 2.5552, "step": 228460 }, { "epoch": 0.45516304347826086, "grad_norm": 0.1723369061946869, "learning_rate": 0.002, "loss": 2.546, "step": 228470 }, { "epoch": 0.4551829657018998, "grad_norm": 0.15781459212303162, "learning_rate": 0.002, "loss": 2.5595, "step": 228480 }, { "epoch": 0.4552028879255387, "grad_norm": 0.1755257397890091, "learning_rate": 0.002, "loss": 2.5559, "step": 228490 }, { "epoch": 0.4552228101491776, "grad_norm": 0.19165045022964478, "learning_rate": 0.002, "loss": 2.5519, "step": 228500 }, { "epoch": 0.45524273237281654, "grad_norm": 0.1804550588130951, "learning_rate": 0.002, "loss": 2.5642, "step": 228510 }, { "epoch": 0.45526265459645543, "grad_norm": 0.1720426380634308, "learning_rate": 0.002, "loss": 2.5596, "step": 228520 }, { "epoch": 0.4552825768200944, "grad_norm": 0.18079142272472382, "learning_rate": 0.002, "loss": 2.5602, "step": 228530 }, { "epoch": 0.45530249904373327, "grad_norm": 0.18846628069877625, "learning_rate": 0.002, "loss": 2.5448, "step": 228540 }, { "epoch": 0.45532242126737216, "grad_norm": 0.1769213080406189, "learning_rate": 0.002, "loss": 2.5676, "step": 228550 }, { "epoch": 0.4553423434910111, "grad_norm": 0.19581381976604462, "learning_rate": 0.002, "loss": 2.5647, "step": 228560 }, { "epoch": 0.45536226571465, "grad_norm": 0.17208030819892883, "learning_rate": 0.002, "loss": 2.569, "step": 228570 }, { "epoch": 0.45538218793828894, "grad_norm": 0.14611349999904633, "learning_rate": 0.002, "loss": 2.549, "step": 228580 }, { "epoch": 0.45540211016192783, "grad_norm": 0.18874479830265045, "learning_rate": 0.002, "loss": 2.5508, "step": 228590 }, { "epoch": 0.4554220323855667, "grad_norm": 0.1575496792793274, "learning_rate": 0.002, "loss": 2.5678, "step": 228600 }, { "epoch": 0.45544195460920567, "grad_norm": 0.14639298617839813, "learning_rate": 0.002, "loss": 2.5507, "step": 228610 }, { "epoch": 0.45546187683284456, "grad_norm": 0.16067250072956085, "learning_rate": 0.002, "loss": 2.5718, "step": 228620 }, { "epoch": 0.4554817990564835, "grad_norm": 0.20021067559719086, "learning_rate": 0.002, "loss": 2.5659, "step": 228630 }, { "epoch": 0.4555017212801224, "grad_norm": 0.16558635234832764, "learning_rate": 0.002, "loss": 2.548, "step": 228640 }, { "epoch": 0.45552164350376134, "grad_norm": 0.15145601332187653, "learning_rate": 0.002, "loss": 2.5649, "step": 228650 }, { "epoch": 0.45554156572740023, "grad_norm": 0.20473606884479523, "learning_rate": 0.002, "loss": 2.5514, "step": 228660 }, { "epoch": 0.4555614879510391, "grad_norm": 0.15649186074733734, "learning_rate": 0.002, "loss": 2.5642, "step": 228670 }, { "epoch": 0.45558141017467807, "grad_norm": 0.1641916185617447, "learning_rate": 0.002, "loss": 2.5737, "step": 228680 }, { "epoch": 0.45560133239831696, "grad_norm": 0.17553719878196716, "learning_rate": 0.002, "loss": 2.5721, "step": 228690 }, { "epoch": 0.4556212546219559, "grad_norm": 0.15574103593826294, "learning_rate": 0.002, "loss": 2.5498, "step": 228700 }, { "epoch": 0.4556411768455948, "grad_norm": 0.16051946580410004, "learning_rate": 0.002, "loss": 2.5563, "step": 228710 }, { "epoch": 0.4556610990692337, "grad_norm": 0.1607203632593155, "learning_rate": 0.002, "loss": 2.5594, "step": 228720 }, { "epoch": 0.45568102129287263, "grad_norm": 0.1760071963071823, "learning_rate": 0.002, "loss": 2.5693, "step": 228730 }, { "epoch": 0.4557009435165115, "grad_norm": 0.1735096126794815, "learning_rate": 0.002, "loss": 2.5676, "step": 228740 }, { "epoch": 0.45572086574015047, "grad_norm": 0.17380154132843018, "learning_rate": 0.002, "loss": 2.5433, "step": 228750 }, { "epoch": 0.45574078796378936, "grad_norm": 0.1777304708957672, "learning_rate": 0.002, "loss": 2.5596, "step": 228760 }, { "epoch": 0.45576071018742825, "grad_norm": 0.16803327202796936, "learning_rate": 0.002, "loss": 2.566, "step": 228770 }, { "epoch": 0.4557806324110672, "grad_norm": 0.15369224548339844, "learning_rate": 0.002, "loss": 2.5583, "step": 228780 }, { "epoch": 0.4558005546347061, "grad_norm": 0.16165503859519958, "learning_rate": 0.002, "loss": 2.5533, "step": 228790 }, { "epoch": 0.45582047685834504, "grad_norm": 0.19167126715183258, "learning_rate": 0.002, "loss": 2.5657, "step": 228800 }, { "epoch": 0.4558403990819839, "grad_norm": 0.16569989919662476, "learning_rate": 0.002, "loss": 2.5461, "step": 228810 }, { "epoch": 0.4558603213056229, "grad_norm": 0.1547408550977707, "learning_rate": 0.002, "loss": 2.5711, "step": 228820 }, { "epoch": 0.45588024352926176, "grad_norm": 0.16081970930099487, "learning_rate": 0.002, "loss": 2.5682, "step": 228830 }, { "epoch": 0.45590016575290065, "grad_norm": 0.1783895641565323, "learning_rate": 0.002, "loss": 2.5678, "step": 228840 }, { "epoch": 0.4559200879765396, "grad_norm": 0.15679340064525604, "learning_rate": 0.002, "loss": 2.5524, "step": 228850 }, { "epoch": 0.4559400102001785, "grad_norm": 0.1875409632921219, "learning_rate": 0.002, "loss": 2.5564, "step": 228860 }, { "epoch": 0.45595993242381744, "grad_norm": 0.1876700073480606, "learning_rate": 0.002, "loss": 2.5594, "step": 228870 }, { "epoch": 0.45597985464745633, "grad_norm": 0.1428835541009903, "learning_rate": 0.002, "loss": 2.5512, "step": 228880 }, { "epoch": 0.4559997768710952, "grad_norm": 0.18167254328727722, "learning_rate": 0.002, "loss": 2.556, "step": 228890 }, { "epoch": 0.45601969909473417, "grad_norm": 0.1744144856929779, "learning_rate": 0.002, "loss": 2.5369, "step": 228900 }, { "epoch": 0.45603962131837306, "grad_norm": 0.17386752367019653, "learning_rate": 0.002, "loss": 2.5578, "step": 228910 }, { "epoch": 0.456059543542012, "grad_norm": 0.17798680067062378, "learning_rate": 0.002, "loss": 2.553, "step": 228920 }, { "epoch": 0.4560794657656509, "grad_norm": 0.1779695749282837, "learning_rate": 0.002, "loss": 2.557, "step": 228930 }, { "epoch": 0.45609938798928984, "grad_norm": 0.1627412736415863, "learning_rate": 0.002, "loss": 2.5598, "step": 228940 }, { "epoch": 0.45611931021292873, "grad_norm": 0.17435355484485626, "learning_rate": 0.002, "loss": 2.5465, "step": 228950 }, { "epoch": 0.4561392324365676, "grad_norm": 0.1730654239654541, "learning_rate": 0.002, "loss": 2.5623, "step": 228960 }, { "epoch": 0.45615915466020657, "grad_norm": 0.16483017802238464, "learning_rate": 0.002, "loss": 2.543, "step": 228970 }, { "epoch": 0.45617907688384546, "grad_norm": 0.1423090547323227, "learning_rate": 0.002, "loss": 2.541, "step": 228980 }, { "epoch": 0.4561989991074844, "grad_norm": 0.20061303675174713, "learning_rate": 0.002, "loss": 2.567, "step": 228990 }, { "epoch": 0.4562189213311233, "grad_norm": 0.16782426834106445, "learning_rate": 0.002, "loss": 2.5614, "step": 229000 }, { "epoch": 0.4562388435547622, "grad_norm": 0.15760697424411774, "learning_rate": 0.002, "loss": 2.5558, "step": 229010 }, { "epoch": 0.45625876577840113, "grad_norm": 0.17467692494392395, "learning_rate": 0.002, "loss": 2.5726, "step": 229020 }, { "epoch": 0.45627868800204, "grad_norm": 0.17706535756587982, "learning_rate": 0.002, "loss": 2.5526, "step": 229030 }, { "epoch": 0.45629861022567897, "grad_norm": 0.18272781372070312, "learning_rate": 0.002, "loss": 2.5564, "step": 229040 }, { "epoch": 0.45631853244931786, "grad_norm": 0.1730431765317917, "learning_rate": 0.002, "loss": 2.5624, "step": 229050 }, { "epoch": 0.45633845467295675, "grad_norm": 0.19132542610168457, "learning_rate": 0.002, "loss": 2.559, "step": 229060 }, { "epoch": 0.4563583768965957, "grad_norm": 0.1508907526731491, "learning_rate": 0.002, "loss": 2.5586, "step": 229070 }, { "epoch": 0.4563782991202346, "grad_norm": 0.19781555235385895, "learning_rate": 0.002, "loss": 2.5603, "step": 229080 }, { "epoch": 0.45639822134387353, "grad_norm": 0.20088346302509308, "learning_rate": 0.002, "loss": 2.5543, "step": 229090 }, { "epoch": 0.4564181435675124, "grad_norm": 0.1789168268442154, "learning_rate": 0.002, "loss": 2.5612, "step": 229100 }, { "epoch": 0.45643806579115137, "grad_norm": 0.21403898298740387, "learning_rate": 0.002, "loss": 2.5624, "step": 229110 }, { "epoch": 0.45645798801479026, "grad_norm": 0.20790496468544006, "learning_rate": 0.002, "loss": 2.5613, "step": 229120 }, { "epoch": 0.45647791023842915, "grad_norm": 0.17697574198246002, "learning_rate": 0.002, "loss": 2.5591, "step": 229130 }, { "epoch": 0.4564978324620681, "grad_norm": 0.1562131941318512, "learning_rate": 0.002, "loss": 2.5588, "step": 229140 }, { "epoch": 0.456517754685707, "grad_norm": 0.15010930597782135, "learning_rate": 0.002, "loss": 2.5585, "step": 229150 }, { "epoch": 0.45653767690934594, "grad_norm": 0.16659680008888245, "learning_rate": 0.002, "loss": 2.5483, "step": 229160 }, { "epoch": 0.4565575991329848, "grad_norm": 0.15144333243370056, "learning_rate": 0.002, "loss": 2.5536, "step": 229170 }, { "epoch": 0.4565775213566237, "grad_norm": 0.21117989718914032, "learning_rate": 0.002, "loss": 2.5549, "step": 229180 }, { "epoch": 0.45659744358026266, "grad_norm": 0.15995027124881744, "learning_rate": 0.002, "loss": 2.5634, "step": 229190 }, { "epoch": 0.45661736580390155, "grad_norm": 0.161806121468544, "learning_rate": 0.002, "loss": 2.5506, "step": 229200 }, { "epoch": 0.4566372880275405, "grad_norm": 0.19834601879119873, "learning_rate": 0.002, "loss": 2.5624, "step": 229210 }, { "epoch": 0.4566572102511794, "grad_norm": 0.14184710383415222, "learning_rate": 0.002, "loss": 2.5699, "step": 229220 }, { "epoch": 0.45667713247481834, "grad_norm": 0.1564067304134369, "learning_rate": 0.002, "loss": 2.555, "step": 229230 }, { "epoch": 0.4566970546984572, "grad_norm": 0.17281565070152283, "learning_rate": 0.002, "loss": 2.5639, "step": 229240 }, { "epoch": 0.4567169769220961, "grad_norm": 0.18800842761993408, "learning_rate": 0.002, "loss": 2.5727, "step": 229250 }, { "epoch": 0.45673689914573506, "grad_norm": 0.19637583196163177, "learning_rate": 0.002, "loss": 2.5484, "step": 229260 }, { "epoch": 0.45675682136937396, "grad_norm": 0.1559482216835022, "learning_rate": 0.002, "loss": 2.5556, "step": 229270 }, { "epoch": 0.4567767435930129, "grad_norm": 0.2010042816400528, "learning_rate": 0.002, "loss": 2.5611, "step": 229280 }, { "epoch": 0.4567966658166518, "grad_norm": 0.15952585637569427, "learning_rate": 0.002, "loss": 2.5697, "step": 229290 }, { "epoch": 0.4568165880402907, "grad_norm": 0.1665658950805664, "learning_rate": 0.002, "loss": 2.5636, "step": 229300 }, { "epoch": 0.45683651026392963, "grad_norm": 0.1587994396686554, "learning_rate": 0.002, "loss": 2.5471, "step": 229310 }, { "epoch": 0.4568564324875685, "grad_norm": 0.17052897810935974, "learning_rate": 0.002, "loss": 2.553, "step": 229320 }, { "epoch": 0.45687635471120747, "grad_norm": 0.175065815448761, "learning_rate": 0.002, "loss": 2.5525, "step": 229330 }, { "epoch": 0.45689627693484636, "grad_norm": 0.1894844025373459, "learning_rate": 0.002, "loss": 2.5564, "step": 229340 }, { "epoch": 0.45691619915848525, "grad_norm": 0.15544873476028442, "learning_rate": 0.002, "loss": 2.5559, "step": 229350 }, { "epoch": 0.4569361213821242, "grad_norm": 0.1572190821170807, "learning_rate": 0.002, "loss": 2.5585, "step": 229360 }, { "epoch": 0.4569560436057631, "grad_norm": 0.15675069391727448, "learning_rate": 0.002, "loss": 2.5542, "step": 229370 }, { "epoch": 0.45697596582940203, "grad_norm": 0.16842827200889587, "learning_rate": 0.002, "loss": 2.5561, "step": 229380 }, { "epoch": 0.4569958880530409, "grad_norm": 0.15218228101730347, "learning_rate": 0.002, "loss": 2.5555, "step": 229390 }, { "epoch": 0.45701581027667987, "grad_norm": 0.1991787850856781, "learning_rate": 0.002, "loss": 2.5631, "step": 229400 }, { "epoch": 0.45703573250031876, "grad_norm": 0.18676891922950745, "learning_rate": 0.002, "loss": 2.5655, "step": 229410 }, { "epoch": 0.45705565472395765, "grad_norm": 0.18170054256916046, "learning_rate": 0.002, "loss": 2.5705, "step": 229420 }, { "epoch": 0.4570755769475966, "grad_norm": 0.17746387422084808, "learning_rate": 0.002, "loss": 2.5711, "step": 229430 }, { "epoch": 0.4570954991712355, "grad_norm": 0.14842016994953156, "learning_rate": 0.002, "loss": 2.5546, "step": 229440 }, { "epoch": 0.45711542139487443, "grad_norm": 0.1729728728532791, "learning_rate": 0.002, "loss": 2.5495, "step": 229450 }, { "epoch": 0.4571353436185133, "grad_norm": 0.19296450912952423, "learning_rate": 0.002, "loss": 2.5665, "step": 229460 }, { "epoch": 0.4571552658421522, "grad_norm": 0.15860368311405182, "learning_rate": 0.002, "loss": 2.5565, "step": 229470 }, { "epoch": 0.45717518806579116, "grad_norm": 0.1691388636827469, "learning_rate": 0.002, "loss": 2.5509, "step": 229480 }, { "epoch": 0.45719511028943005, "grad_norm": 0.15109935402870178, "learning_rate": 0.002, "loss": 2.562, "step": 229490 }, { "epoch": 0.457215032513069, "grad_norm": 0.1558409482240677, "learning_rate": 0.002, "loss": 2.5515, "step": 229500 }, { "epoch": 0.4572349547367079, "grad_norm": 0.1473100483417511, "learning_rate": 0.002, "loss": 2.5527, "step": 229510 }, { "epoch": 0.4572548769603468, "grad_norm": 0.17551018297672272, "learning_rate": 0.002, "loss": 2.5395, "step": 229520 }, { "epoch": 0.4572747991839857, "grad_norm": 0.1985296607017517, "learning_rate": 0.002, "loss": 2.565, "step": 229530 }, { "epoch": 0.4572947214076246, "grad_norm": 0.1538143903017044, "learning_rate": 0.002, "loss": 2.5725, "step": 229540 }, { "epoch": 0.45731464363126356, "grad_norm": 0.19012542068958282, "learning_rate": 0.002, "loss": 2.5628, "step": 229550 }, { "epoch": 0.45733456585490245, "grad_norm": 0.20928408205509186, "learning_rate": 0.002, "loss": 2.5742, "step": 229560 }, { "epoch": 0.4573544880785414, "grad_norm": 0.15184535086154938, "learning_rate": 0.002, "loss": 2.558, "step": 229570 }, { "epoch": 0.4573744103021803, "grad_norm": 0.15554344654083252, "learning_rate": 0.002, "loss": 2.5457, "step": 229580 }, { "epoch": 0.4573943325258192, "grad_norm": 0.17195287346839905, "learning_rate": 0.002, "loss": 2.5566, "step": 229590 }, { "epoch": 0.4574142547494581, "grad_norm": 0.2050267457962036, "learning_rate": 0.002, "loss": 2.5617, "step": 229600 }, { "epoch": 0.457434176973097, "grad_norm": 0.17295387387275696, "learning_rate": 0.002, "loss": 2.5548, "step": 229610 }, { "epoch": 0.45745409919673596, "grad_norm": 0.16273358464241028, "learning_rate": 0.002, "loss": 2.5625, "step": 229620 }, { "epoch": 0.45747402142037485, "grad_norm": 0.15590925514698029, "learning_rate": 0.002, "loss": 2.56, "step": 229630 }, { "epoch": 0.45749394364401375, "grad_norm": 0.18013453483581543, "learning_rate": 0.002, "loss": 2.5566, "step": 229640 }, { "epoch": 0.4575138658676527, "grad_norm": 0.21491600573062897, "learning_rate": 0.002, "loss": 2.5714, "step": 229650 }, { "epoch": 0.4575337880912916, "grad_norm": 0.1509103775024414, "learning_rate": 0.002, "loss": 2.5709, "step": 229660 }, { "epoch": 0.45755371031493053, "grad_norm": 0.14738629758358002, "learning_rate": 0.002, "loss": 2.5446, "step": 229670 }, { "epoch": 0.4575736325385694, "grad_norm": 0.16522781550884247, "learning_rate": 0.002, "loss": 2.5466, "step": 229680 }, { "epoch": 0.45759355476220837, "grad_norm": 0.20093046128749847, "learning_rate": 0.002, "loss": 2.5591, "step": 229690 }, { "epoch": 0.45761347698584726, "grad_norm": 0.15652668476104736, "learning_rate": 0.002, "loss": 2.5754, "step": 229700 }, { "epoch": 0.45763339920948615, "grad_norm": 0.16913101077079773, "learning_rate": 0.002, "loss": 2.5482, "step": 229710 }, { "epoch": 0.4576533214331251, "grad_norm": 0.18787692487239838, "learning_rate": 0.002, "loss": 2.5708, "step": 229720 }, { "epoch": 0.457673243656764, "grad_norm": 0.1726001501083374, "learning_rate": 0.002, "loss": 2.5671, "step": 229730 }, { "epoch": 0.45769316588040293, "grad_norm": 0.15654487907886505, "learning_rate": 0.002, "loss": 2.5567, "step": 229740 }, { "epoch": 0.4577130881040418, "grad_norm": 0.1628345549106598, "learning_rate": 0.002, "loss": 2.555, "step": 229750 }, { "epoch": 0.4577330103276807, "grad_norm": 0.16376487910747528, "learning_rate": 0.002, "loss": 2.5467, "step": 229760 }, { "epoch": 0.45775293255131966, "grad_norm": 0.1552087366580963, "learning_rate": 0.002, "loss": 2.5441, "step": 229770 }, { "epoch": 0.45777285477495855, "grad_norm": 0.1600377857685089, "learning_rate": 0.002, "loss": 2.565, "step": 229780 }, { "epoch": 0.4577927769985975, "grad_norm": 0.20576541125774384, "learning_rate": 0.002, "loss": 2.5597, "step": 229790 }, { "epoch": 0.4578126992222364, "grad_norm": 0.17658405005931854, "learning_rate": 0.002, "loss": 2.5644, "step": 229800 }, { "epoch": 0.4578326214458753, "grad_norm": 0.16718502342700958, "learning_rate": 0.002, "loss": 2.5658, "step": 229810 }, { "epoch": 0.4578525436695142, "grad_norm": 0.1461479663848877, "learning_rate": 0.002, "loss": 2.5585, "step": 229820 }, { "epoch": 0.4578724658931531, "grad_norm": 0.1623111218214035, "learning_rate": 0.002, "loss": 2.5456, "step": 229830 }, { "epoch": 0.45789238811679206, "grad_norm": 0.17477278411388397, "learning_rate": 0.002, "loss": 2.5519, "step": 229840 }, { "epoch": 0.45791231034043095, "grad_norm": 0.22141382098197937, "learning_rate": 0.002, "loss": 2.567, "step": 229850 }, { "epoch": 0.4579322325640699, "grad_norm": 0.14113539457321167, "learning_rate": 0.002, "loss": 2.5639, "step": 229860 }, { "epoch": 0.4579521547877088, "grad_norm": 0.18663085997104645, "learning_rate": 0.002, "loss": 2.5545, "step": 229870 }, { "epoch": 0.4579720770113477, "grad_norm": 0.17110387980937958, "learning_rate": 0.002, "loss": 2.5609, "step": 229880 }, { "epoch": 0.4579919992349866, "grad_norm": 0.1904018074274063, "learning_rate": 0.002, "loss": 2.5615, "step": 229890 }, { "epoch": 0.4580119214586255, "grad_norm": 0.20137177407741547, "learning_rate": 0.002, "loss": 2.5829, "step": 229900 }, { "epoch": 0.45803184368226446, "grad_norm": 0.1862909346818924, "learning_rate": 0.002, "loss": 2.5587, "step": 229910 }, { "epoch": 0.45805176590590335, "grad_norm": 0.1609668731689453, "learning_rate": 0.002, "loss": 2.5621, "step": 229920 }, { "epoch": 0.45807168812954224, "grad_norm": 0.17622818052768707, "learning_rate": 0.002, "loss": 2.5573, "step": 229930 }, { "epoch": 0.4580916103531812, "grad_norm": 0.15818741917610168, "learning_rate": 0.002, "loss": 2.559, "step": 229940 }, { "epoch": 0.4581115325768201, "grad_norm": 0.17950963973999023, "learning_rate": 0.002, "loss": 2.5574, "step": 229950 }, { "epoch": 0.458131454800459, "grad_norm": 0.13952751457691193, "learning_rate": 0.002, "loss": 2.5583, "step": 229960 }, { "epoch": 0.4581513770240979, "grad_norm": 0.1591256856918335, "learning_rate": 0.002, "loss": 2.5618, "step": 229970 }, { "epoch": 0.45817129924773686, "grad_norm": 0.16159243881702423, "learning_rate": 0.002, "loss": 2.5598, "step": 229980 }, { "epoch": 0.45819122147137575, "grad_norm": 0.18199190497398376, "learning_rate": 0.002, "loss": 2.5684, "step": 229990 }, { "epoch": 0.45821114369501464, "grad_norm": 0.16361477971076965, "learning_rate": 0.002, "loss": 2.5494, "step": 230000 }, { "epoch": 0.4582310659186536, "grad_norm": 0.15646740794181824, "learning_rate": 0.002, "loss": 2.5432, "step": 230010 }, { "epoch": 0.4582509881422925, "grad_norm": 0.16410720348358154, "learning_rate": 0.002, "loss": 2.5545, "step": 230020 }, { "epoch": 0.4582709103659314, "grad_norm": 0.17065902054309845, "learning_rate": 0.002, "loss": 2.5558, "step": 230030 }, { "epoch": 0.4582908325895703, "grad_norm": 0.185048446059227, "learning_rate": 0.002, "loss": 2.5621, "step": 230040 }, { "epoch": 0.4583107548132092, "grad_norm": 0.20272959768772125, "learning_rate": 0.002, "loss": 2.5781, "step": 230050 }, { "epoch": 0.45833067703684816, "grad_norm": 0.17343883216381073, "learning_rate": 0.002, "loss": 2.5518, "step": 230060 }, { "epoch": 0.45835059926048705, "grad_norm": 0.1964217573404312, "learning_rate": 0.002, "loss": 2.5607, "step": 230070 }, { "epoch": 0.458370521484126, "grad_norm": 0.1656358242034912, "learning_rate": 0.002, "loss": 2.5462, "step": 230080 }, { "epoch": 0.4583904437077649, "grad_norm": 0.173484206199646, "learning_rate": 0.002, "loss": 2.5311, "step": 230090 }, { "epoch": 0.4584103659314038, "grad_norm": 0.15719176828861237, "learning_rate": 0.002, "loss": 2.5592, "step": 230100 }, { "epoch": 0.4584302881550427, "grad_norm": 0.17280840873718262, "learning_rate": 0.002, "loss": 2.5514, "step": 230110 }, { "epoch": 0.4584502103786816, "grad_norm": 0.18999645113945007, "learning_rate": 0.002, "loss": 2.5571, "step": 230120 }, { "epoch": 0.45847013260232056, "grad_norm": 0.17754550278186798, "learning_rate": 0.002, "loss": 2.55, "step": 230130 }, { "epoch": 0.45849005482595945, "grad_norm": 0.27950137853622437, "learning_rate": 0.002, "loss": 2.5678, "step": 230140 }, { "epoch": 0.4585099770495984, "grad_norm": 0.17073863744735718, "learning_rate": 0.002, "loss": 2.5749, "step": 230150 }, { "epoch": 0.4585298992732373, "grad_norm": 0.2028944492340088, "learning_rate": 0.002, "loss": 2.5513, "step": 230160 }, { "epoch": 0.4585498214968762, "grad_norm": 0.17745864391326904, "learning_rate": 0.002, "loss": 2.5425, "step": 230170 }, { "epoch": 0.4585697437205151, "grad_norm": 0.18490484356880188, "learning_rate": 0.002, "loss": 2.5588, "step": 230180 }, { "epoch": 0.458589665944154, "grad_norm": 0.16254790127277374, "learning_rate": 0.002, "loss": 2.548, "step": 230190 }, { "epoch": 0.45860958816779296, "grad_norm": 0.15287457406520844, "learning_rate": 0.002, "loss": 2.5505, "step": 230200 }, { "epoch": 0.45862951039143185, "grad_norm": 0.2059813141822815, "learning_rate": 0.002, "loss": 2.562, "step": 230210 }, { "epoch": 0.45864943261507074, "grad_norm": 0.15746285021305084, "learning_rate": 0.002, "loss": 2.5541, "step": 230220 }, { "epoch": 0.4586693548387097, "grad_norm": 0.1629459261894226, "learning_rate": 0.002, "loss": 2.5646, "step": 230230 }, { "epoch": 0.4586892770623486, "grad_norm": 0.16493134200572968, "learning_rate": 0.002, "loss": 2.5561, "step": 230240 }, { "epoch": 0.4587091992859875, "grad_norm": 0.15949606895446777, "learning_rate": 0.002, "loss": 2.5639, "step": 230250 }, { "epoch": 0.4587291215096264, "grad_norm": 0.15584881603717804, "learning_rate": 0.002, "loss": 2.5608, "step": 230260 }, { "epoch": 0.4587490437332653, "grad_norm": 0.18261608481407166, "learning_rate": 0.002, "loss": 2.5666, "step": 230270 }, { "epoch": 0.45876896595690425, "grad_norm": 0.14600686728954315, "learning_rate": 0.002, "loss": 2.5485, "step": 230280 }, { "epoch": 0.45878888818054314, "grad_norm": 0.16620869934558868, "learning_rate": 0.002, "loss": 2.5658, "step": 230290 }, { "epoch": 0.4588088104041821, "grad_norm": 0.17086952924728394, "learning_rate": 0.002, "loss": 2.5474, "step": 230300 }, { "epoch": 0.458828732627821, "grad_norm": 0.21536210179328918, "learning_rate": 0.002, "loss": 2.5624, "step": 230310 }, { "epoch": 0.4588486548514599, "grad_norm": 0.14374083280563354, "learning_rate": 0.002, "loss": 2.5759, "step": 230320 }, { "epoch": 0.4588685770750988, "grad_norm": 0.15157297253608704, "learning_rate": 0.002, "loss": 2.5431, "step": 230330 }, { "epoch": 0.4588884992987377, "grad_norm": 0.17108604311943054, "learning_rate": 0.002, "loss": 2.5581, "step": 230340 }, { "epoch": 0.45890842152237665, "grad_norm": 0.1767546385526657, "learning_rate": 0.002, "loss": 2.5531, "step": 230350 }, { "epoch": 0.45892834374601554, "grad_norm": 0.17403313517570496, "learning_rate": 0.002, "loss": 2.5455, "step": 230360 }, { "epoch": 0.4589482659696545, "grad_norm": 0.22170087695121765, "learning_rate": 0.002, "loss": 2.5488, "step": 230370 }, { "epoch": 0.4589681881932934, "grad_norm": 0.18152350187301636, "learning_rate": 0.002, "loss": 2.5651, "step": 230380 }, { "epoch": 0.45898811041693227, "grad_norm": 0.17754480242729187, "learning_rate": 0.002, "loss": 2.5516, "step": 230390 }, { "epoch": 0.4590080326405712, "grad_norm": 0.2083195000886917, "learning_rate": 0.002, "loss": 2.5593, "step": 230400 }, { "epoch": 0.4590279548642101, "grad_norm": 0.15336602926254272, "learning_rate": 0.002, "loss": 2.5549, "step": 230410 }, { "epoch": 0.45904787708784905, "grad_norm": 0.17068275809288025, "learning_rate": 0.002, "loss": 2.5613, "step": 230420 }, { "epoch": 0.45906779931148795, "grad_norm": 0.1869676262140274, "learning_rate": 0.002, "loss": 2.5419, "step": 230430 }, { "epoch": 0.4590877215351269, "grad_norm": 0.187748983502388, "learning_rate": 0.002, "loss": 2.5561, "step": 230440 }, { "epoch": 0.4591076437587658, "grad_norm": 0.1611638218164444, "learning_rate": 0.002, "loss": 2.556, "step": 230450 }, { "epoch": 0.4591275659824047, "grad_norm": 0.1770881861448288, "learning_rate": 0.002, "loss": 2.5498, "step": 230460 }, { "epoch": 0.4591474882060436, "grad_norm": 0.18030744791030884, "learning_rate": 0.002, "loss": 2.5557, "step": 230470 }, { "epoch": 0.4591674104296825, "grad_norm": 0.1826031506061554, "learning_rate": 0.002, "loss": 2.5552, "step": 230480 }, { "epoch": 0.45918733265332146, "grad_norm": 0.1697988659143448, "learning_rate": 0.002, "loss": 2.5439, "step": 230490 }, { "epoch": 0.45920725487696035, "grad_norm": 0.15636996924877167, "learning_rate": 0.002, "loss": 2.5419, "step": 230500 }, { "epoch": 0.45922717710059924, "grad_norm": 0.19641287624835968, "learning_rate": 0.002, "loss": 2.5608, "step": 230510 }, { "epoch": 0.4592470993242382, "grad_norm": 0.13925550878047943, "learning_rate": 0.002, "loss": 2.5621, "step": 230520 }, { "epoch": 0.4592670215478771, "grad_norm": 0.15290342271327972, "learning_rate": 0.002, "loss": 2.5429, "step": 230530 }, { "epoch": 0.459286943771516, "grad_norm": 0.17779624462127686, "learning_rate": 0.002, "loss": 2.548, "step": 230540 }, { "epoch": 0.4593068659951549, "grad_norm": 0.1540985405445099, "learning_rate": 0.002, "loss": 2.5494, "step": 230550 }, { "epoch": 0.4593267882187938, "grad_norm": 0.1613338589668274, "learning_rate": 0.002, "loss": 2.5535, "step": 230560 }, { "epoch": 0.45934671044243275, "grad_norm": 0.16721169650554657, "learning_rate": 0.002, "loss": 2.5625, "step": 230570 }, { "epoch": 0.45936663266607164, "grad_norm": 0.1604715883731842, "learning_rate": 0.002, "loss": 2.5436, "step": 230580 }, { "epoch": 0.4593865548897106, "grad_norm": 0.19221898913383484, "learning_rate": 0.002, "loss": 2.555, "step": 230590 }, { "epoch": 0.4594064771133495, "grad_norm": 0.17324833571910858, "learning_rate": 0.002, "loss": 2.5733, "step": 230600 }, { "epoch": 0.4594263993369884, "grad_norm": 0.15900209546089172, "learning_rate": 0.002, "loss": 2.5609, "step": 230610 }, { "epoch": 0.4594463215606273, "grad_norm": 0.15736700594425201, "learning_rate": 0.002, "loss": 2.5575, "step": 230620 }, { "epoch": 0.4594662437842662, "grad_norm": 0.17727236449718475, "learning_rate": 0.002, "loss": 2.5536, "step": 230630 }, { "epoch": 0.45948616600790515, "grad_norm": 0.19959034025669098, "learning_rate": 0.002, "loss": 2.5449, "step": 230640 }, { "epoch": 0.45950608823154404, "grad_norm": 0.15052415430545807, "learning_rate": 0.002, "loss": 2.5634, "step": 230650 }, { "epoch": 0.459526010455183, "grad_norm": 0.19236324727535248, "learning_rate": 0.002, "loss": 2.5623, "step": 230660 }, { "epoch": 0.4595459326788219, "grad_norm": 0.15537527203559875, "learning_rate": 0.002, "loss": 2.5485, "step": 230670 }, { "epoch": 0.45956585490246077, "grad_norm": 0.1809622049331665, "learning_rate": 0.002, "loss": 2.5512, "step": 230680 }, { "epoch": 0.4595857771260997, "grad_norm": 0.16862820088863373, "learning_rate": 0.002, "loss": 2.5581, "step": 230690 }, { "epoch": 0.4596056993497386, "grad_norm": 0.16159053146839142, "learning_rate": 0.002, "loss": 2.559, "step": 230700 }, { "epoch": 0.45962562157337755, "grad_norm": 0.17215847969055176, "learning_rate": 0.002, "loss": 2.5625, "step": 230710 }, { "epoch": 0.45964554379701644, "grad_norm": 0.19334135949611664, "learning_rate": 0.002, "loss": 2.567, "step": 230720 }, { "epoch": 0.4596654660206554, "grad_norm": 0.1872977763414383, "learning_rate": 0.002, "loss": 2.5505, "step": 230730 }, { "epoch": 0.4596853882442943, "grad_norm": 0.17564117908477783, "learning_rate": 0.002, "loss": 2.5618, "step": 230740 }, { "epoch": 0.45970531046793317, "grad_norm": 0.17855602502822876, "learning_rate": 0.002, "loss": 2.5539, "step": 230750 }, { "epoch": 0.4597252326915721, "grad_norm": 0.16857187449932098, "learning_rate": 0.002, "loss": 2.5416, "step": 230760 }, { "epoch": 0.459745154915211, "grad_norm": 0.16143661737442017, "learning_rate": 0.002, "loss": 2.5533, "step": 230770 }, { "epoch": 0.45976507713884995, "grad_norm": 0.18217621743679047, "learning_rate": 0.002, "loss": 2.5602, "step": 230780 }, { "epoch": 0.45978499936248884, "grad_norm": 0.15015749633312225, "learning_rate": 0.002, "loss": 2.5571, "step": 230790 }, { "epoch": 0.45980492158612774, "grad_norm": 0.17757092416286469, "learning_rate": 0.002, "loss": 2.5407, "step": 230800 }, { "epoch": 0.4598248438097667, "grad_norm": 0.1920585185289383, "learning_rate": 0.002, "loss": 2.5523, "step": 230810 }, { "epoch": 0.45984476603340557, "grad_norm": 0.19381698966026306, "learning_rate": 0.002, "loss": 2.5679, "step": 230820 }, { "epoch": 0.4598646882570445, "grad_norm": 0.19160963594913483, "learning_rate": 0.002, "loss": 2.5514, "step": 230830 }, { "epoch": 0.4598846104806834, "grad_norm": 0.1479370892047882, "learning_rate": 0.002, "loss": 2.554, "step": 230840 }, { "epoch": 0.4599045327043223, "grad_norm": 0.1759650856256485, "learning_rate": 0.002, "loss": 2.5657, "step": 230850 }, { "epoch": 0.45992445492796125, "grad_norm": 0.15283413231372833, "learning_rate": 0.002, "loss": 2.5462, "step": 230860 }, { "epoch": 0.45994437715160014, "grad_norm": 0.17147281765937805, "learning_rate": 0.002, "loss": 2.5497, "step": 230870 }, { "epoch": 0.4599642993752391, "grad_norm": 0.20606112480163574, "learning_rate": 0.002, "loss": 2.5654, "step": 230880 }, { "epoch": 0.459984221598878, "grad_norm": 0.15510913729667664, "learning_rate": 0.002, "loss": 2.5448, "step": 230890 }, { "epoch": 0.4600041438225169, "grad_norm": 0.13994504511356354, "learning_rate": 0.002, "loss": 2.559, "step": 230900 }, { "epoch": 0.4600240660461558, "grad_norm": 0.177148699760437, "learning_rate": 0.002, "loss": 2.5597, "step": 230910 }, { "epoch": 0.4600439882697947, "grad_norm": 0.17301489412784576, "learning_rate": 0.002, "loss": 2.5597, "step": 230920 }, { "epoch": 0.46006391049343365, "grad_norm": 0.19433046877384186, "learning_rate": 0.002, "loss": 2.5579, "step": 230930 }, { "epoch": 0.46008383271707254, "grad_norm": 0.16014954447746277, "learning_rate": 0.002, "loss": 2.547, "step": 230940 }, { "epoch": 0.4601037549407115, "grad_norm": 0.27141204476356506, "learning_rate": 0.002, "loss": 2.5741, "step": 230950 }, { "epoch": 0.4601236771643504, "grad_norm": 0.16735677421092987, "learning_rate": 0.002, "loss": 2.5713, "step": 230960 }, { "epoch": 0.46014359938798927, "grad_norm": 0.17169462144374847, "learning_rate": 0.002, "loss": 2.5676, "step": 230970 }, { "epoch": 0.4601635216116282, "grad_norm": 0.1675378531217575, "learning_rate": 0.002, "loss": 2.555, "step": 230980 }, { "epoch": 0.4601834438352671, "grad_norm": 0.1828964203596115, "learning_rate": 0.002, "loss": 2.5588, "step": 230990 }, { "epoch": 0.46020336605890605, "grad_norm": 0.18957717716693878, "learning_rate": 0.002, "loss": 2.5675, "step": 231000 }, { "epoch": 0.46022328828254494, "grad_norm": 0.20984044671058655, "learning_rate": 0.002, "loss": 2.5633, "step": 231010 }, { "epoch": 0.46024321050618383, "grad_norm": 0.1678292453289032, "learning_rate": 0.002, "loss": 2.5652, "step": 231020 }, { "epoch": 0.4602631327298228, "grad_norm": 0.161495178937912, "learning_rate": 0.002, "loss": 2.5582, "step": 231030 }, { "epoch": 0.46028305495346167, "grad_norm": 0.1464584320783615, "learning_rate": 0.002, "loss": 2.562, "step": 231040 }, { "epoch": 0.4603029771771006, "grad_norm": 0.18424534797668457, "learning_rate": 0.002, "loss": 2.5436, "step": 231050 }, { "epoch": 0.4603228994007395, "grad_norm": 0.17147637903690338, "learning_rate": 0.002, "loss": 2.5561, "step": 231060 }, { "epoch": 0.46034282162437845, "grad_norm": 0.1576378494501114, "learning_rate": 0.002, "loss": 2.5737, "step": 231070 }, { "epoch": 0.46036274384801734, "grad_norm": 0.17549015581607819, "learning_rate": 0.002, "loss": 2.5803, "step": 231080 }, { "epoch": 0.46038266607165623, "grad_norm": 0.18181289732456207, "learning_rate": 0.002, "loss": 2.5479, "step": 231090 }, { "epoch": 0.4604025882952952, "grad_norm": 0.1385096311569214, "learning_rate": 0.002, "loss": 2.5464, "step": 231100 }, { "epoch": 0.46042251051893407, "grad_norm": 0.1577434539794922, "learning_rate": 0.002, "loss": 2.5577, "step": 231110 }, { "epoch": 0.460442432742573, "grad_norm": 0.1751171350479126, "learning_rate": 0.002, "loss": 2.5645, "step": 231120 }, { "epoch": 0.4604623549662119, "grad_norm": 0.1562512218952179, "learning_rate": 0.002, "loss": 2.5644, "step": 231130 }, { "epoch": 0.4604822771898508, "grad_norm": 0.18136505782604218, "learning_rate": 0.002, "loss": 2.5427, "step": 231140 }, { "epoch": 0.46050219941348974, "grad_norm": 0.1852247416973114, "learning_rate": 0.002, "loss": 2.5414, "step": 231150 }, { "epoch": 0.46052212163712863, "grad_norm": 0.18532058596611023, "learning_rate": 0.002, "loss": 2.5556, "step": 231160 }, { "epoch": 0.4605420438607676, "grad_norm": 0.15117336809635162, "learning_rate": 0.002, "loss": 2.5626, "step": 231170 }, { "epoch": 0.46056196608440647, "grad_norm": 0.15196636319160461, "learning_rate": 0.002, "loss": 2.5586, "step": 231180 }, { "epoch": 0.4605818883080454, "grad_norm": 0.1566159725189209, "learning_rate": 0.002, "loss": 2.5521, "step": 231190 }, { "epoch": 0.4606018105316843, "grad_norm": 0.1767277866601944, "learning_rate": 0.002, "loss": 2.5641, "step": 231200 }, { "epoch": 0.4606217327553232, "grad_norm": 0.156819686293602, "learning_rate": 0.002, "loss": 2.5443, "step": 231210 }, { "epoch": 0.46064165497896215, "grad_norm": 0.18152423202991486, "learning_rate": 0.002, "loss": 2.5453, "step": 231220 }, { "epoch": 0.46066157720260104, "grad_norm": 0.19491073489189148, "learning_rate": 0.002, "loss": 2.5561, "step": 231230 }, { "epoch": 0.46068149942624, "grad_norm": 0.16374117136001587, "learning_rate": 0.002, "loss": 2.564, "step": 231240 }, { "epoch": 0.4607014216498789, "grad_norm": 0.17824503779411316, "learning_rate": 0.002, "loss": 2.5795, "step": 231250 }, { "epoch": 0.46072134387351776, "grad_norm": 0.14521439373493195, "learning_rate": 0.002, "loss": 2.5532, "step": 231260 }, { "epoch": 0.4607412660971567, "grad_norm": 0.19676680862903595, "learning_rate": 0.002, "loss": 2.568, "step": 231270 }, { "epoch": 0.4607611883207956, "grad_norm": 0.16806532442569733, "learning_rate": 0.002, "loss": 2.5665, "step": 231280 }, { "epoch": 0.46078111054443455, "grad_norm": 0.1684621125459671, "learning_rate": 0.002, "loss": 2.5556, "step": 231290 }, { "epoch": 0.46080103276807344, "grad_norm": 0.18588942289352417, "learning_rate": 0.002, "loss": 2.5668, "step": 231300 }, { "epoch": 0.46082095499171233, "grad_norm": 0.16082163155078888, "learning_rate": 0.002, "loss": 2.5583, "step": 231310 }, { "epoch": 0.4608408772153513, "grad_norm": 0.343761682510376, "learning_rate": 0.002, "loss": 2.5462, "step": 231320 }, { "epoch": 0.46086079943899017, "grad_norm": 0.20905649662017822, "learning_rate": 0.002, "loss": 2.5648, "step": 231330 }, { "epoch": 0.4608807216626291, "grad_norm": 0.15981364250183105, "learning_rate": 0.002, "loss": 2.5569, "step": 231340 }, { "epoch": 0.460900643886268, "grad_norm": 0.14731596410274506, "learning_rate": 0.002, "loss": 2.559, "step": 231350 }, { "epoch": 0.46092056610990695, "grad_norm": 0.15050260722637177, "learning_rate": 0.002, "loss": 2.548, "step": 231360 }, { "epoch": 0.46094048833354584, "grad_norm": 0.15215274691581726, "learning_rate": 0.002, "loss": 2.5553, "step": 231370 }, { "epoch": 0.46096041055718473, "grad_norm": 0.15358072519302368, "learning_rate": 0.002, "loss": 2.5708, "step": 231380 }, { "epoch": 0.4609803327808237, "grad_norm": 0.18277810513973236, "learning_rate": 0.002, "loss": 2.5687, "step": 231390 }, { "epoch": 0.46100025500446257, "grad_norm": 0.1733575016260147, "learning_rate": 0.002, "loss": 2.5465, "step": 231400 }, { "epoch": 0.4610201772281015, "grad_norm": 0.16798068583011627, "learning_rate": 0.002, "loss": 2.5583, "step": 231410 }, { "epoch": 0.4610400994517404, "grad_norm": 0.19745765626430511, "learning_rate": 0.002, "loss": 2.5869, "step": 231420 }, { "epoch": 0.4610600216753793, "grad_norm": 0.1791013777256012, "learning_rate": 0.002, "loss": 2.5625, "step": 231430 }, { "epoch": 0.46107994389901824, "grad_norm": 0.15353712439537048, "learning_rate": 0.002, "loss": 2.5529, "step": 231440 }, { "epoch": 0.46109986612265713, "grad_norm": 0.1619882732629776, "learning_rate": 0.002, "loss": 2.5504, "step": 231450 }, { "epoch": 0.4611197883462961, "grad_norm": 0.15493787825107574, "learning_rate": 0.002, "loss": 2.5486, "step": 231460 }, { "epoch": 0.46113971056993497, "grad_norm": 0.17762261629104614, "learning_rate": 0.002, "loss": 2.5485, "step": 231470 }, { "epoch": 0.4611596327935739, "grad_norm": 0.13827897608280182, "learning_rate": 0.002, "loss": 2.5476, "step": 231480 }, { "epoch": 0.4611795550172128, "grad_norm": 0.16101783514022827, "learning_rate": 0.002, "loss": 2.558, "step": 231490 }, { "epoch": 0.4611994772408517, "grad_norm": 0.15182659029960632, "learning_rate": 0.002, "loss": 2.5478, "step": 231500 }, { "epoch": 0.46121939946449064, "grad_norm": 0.17001718282699585, "learning_rate": 0.002, "loss": 2.5587, "step": 231510 }, { "epoch": 0.46123932168812953, "grad_norm": 0.17268797755241394, "learning_rate": 0.002, "loss": 2.5595, "step": 231520 }, { "epoch": 0.4612592439117685, "grad_norm": 0.1728685349225998, "learning_rate": 0.002, "loss": 2.5575, "step": 231530 }, { "epoch": 0.46127916613540737, "grad_norm": 0.15706966817378998, "learning_rate": 0.002, "loss": 2.569, "step": 231540 }, { "epoch": 0.46129908835904626, "grad_norm": 0.1851310133934021, "learning_rate": 0.002, "loss": 2.5609, "step": 231550 }, { "epoch": 0.4613190105826852, "grad_norm": 0.18566825985908508, "learning_rate": 0.002, "loss": 2.5639, "step": 231560 }, { "epoch": 0.4613389328063241, "grad_norm": 0.18371669948101044, "learning_rate": 0.002, "loss": 2.5587, "step": 231570 }, { "epoch": 0.46135885502996304, "grad_norm": 0.17773327231407166, "learning_rate": 0.002, "loss": 2.5523, "step": 231580 }, { "epoch": 0.46137877725360193, "grad_norm": 0.15551508963108063, "learning_rate": 0.002, "loss": 2.5475, "step": 231590 }, { "epoch": 0.4613986994772408, "grad_norm": 0.1993350088596344, "learning_rate": 0.002, "loss": 2.5499, "step": 231600 }, { "epoch": 0.46141862170087977, "grad_norm": 0.14938387274742126, "learning_rate": 0.002, "loss": 2.5643, "step": 231610 }, { "epoch": 0.46143854392451866, "grad_norm": 0.15235185623168945, "learning_rate": 0.002, "loss": 2.5574, "step": 231620 }, { "epoch": 0.4614584661481576, "grad_norm": 0.15816757082939148, "learning_rate": 0.002, "loss": 2.5624, "step": 231630 }, { "epoch": 0.4614783883717965, "grad_norm": 0.2821410298347473, "learning_rate": 0.002, "loss": 2.5635, "step": 231640 }, { "epoch": 0.46149831059543545, "grad_norm": 0.15443603694438934, "learning_rate": 0.002, "loss": 2.5502, "step": 231650 }, { "epoch": 0.46151823281907434, "grad_norm": 0.1866181641817093, "learning_rate": 0.002, "loss": 2.5582, "step": 231660 }, { "epoch": 0.4615381550427132, "grad_norm": 0.17984648048877716, "learning_rate": 0.002, "loss": 2.5573, "step": 231670 }, { "epoch": 0.4615580772663522, "grad_norm": 0.15995612740516663, "learning_rate": 0.002, "loss": 2.5466, "step": 231680 }, { "epoch": 0.46157799948999106, "grad_norm": 0.20521144568920135, "learning_rate": 0.002, "loss": 2.5623, "step": 231690 }, { "epoch": 0.46159792171363, "grad_norm": 0.1863696277141571, "learning_rate": 0.002, "loss": 2.548, "step": 231700 }, { "epoch": 0.4616178439372689, "grad_norm": 0.15504011511802673, "learning_rate": 0.002, "loss": 2.5543, "step": 231710 }, { "epoch": 0.4616377661609078, "grad_norm": 0.17747491598129272, "learning_rate": 0.002, "loss": 2.5608, "step": 231720 }, { "epoch": 0.46165768838454674, "grad_norm": 0.16923603415489197, "learning_rate": 0.002, "loss": 2.5485, "step": 231730 }, { "epoch": 0.46167761060818563, "grad_norm": 0.1449575126171112, "learning_rate": 0.002, "loss": 2.5509, "step": 231740 }, { "epoch": 0.4616975328318246, "grad_norm": 0.1731235384941101, "learning_rate": 0.002, "loss": 2.5549, "step": 231750 }, { "epoch": 0.46171745505546347, "grad_norm": 0.19420629739761353, "learning_rate": 0.002, "loss": 2.5702, "step": 231760 }, { "epoch": 0.46173737727910236, "grad_norm": 0.16612783074378967, "learning_rate": 0.002, "loss": 2.567, "step": 231770 }, { "epoch": 0.4617572995027413, "grad_norm": 0.19204562902450562, "learning_rate": 0.002, "loss": 2.5485, "step": 231780 }, { "epoch": 0.4617772217263802, "grad_norm": 0.15393199026584625, "learning_rate": 0.002, "loss": 2.5651, "step": 231790 }, { "epoch": 0.46179714395001914, "grad_norm": 0.17495712637901306, "learning_rate": 0.002, "loss": 2.5521, "step": 231800 }, { "epoch": 0.46181706617365803, "grad_norm": 0.16110508143901825, "learning_rate": 0.002, "loss": 2.5506, "step": 231810 }, { "epoch": 0.461836988397297, "grad_norm": 0.16898047924041748, "learning_rate": 0.002, "loss": 2.5633, "step": 231820 }, { "epoch": 0.46185691062093587, "grad_norm": 0.1554035246372223, "learning_rate": 0.002, "loss": 2.5489, "step": 231830 }, { "epoch": 0.46187683284457476, "grad_norm": 0.18173721432685852, "learning_rate": 0.002, "loss": 2.5624, "step": 231840 }, { "epoch": 0.4618967550682137, "grad_norm": 0.1484178751707077, "learning_rate": 0.002, "loss": 2.5595, "step": 231850 }, { "epoch": 0.4619166772918526, "grad_norm": 0.26904296875, "learning_rate": 0.002, "loss": 2.5578, "step": 231860 }, { "epoch": 0.46193659951549154, "grad_norm": 0.16648612916469574, "learning_rate": 0.002, "loss": 2.568, "step": 231870 }, { "epoch": 0.46195652173913043, "grad_norm": 0.15735772252082825, "learning_rate": 0.002, "loss": 2.5623, "step": 231880 }, { "epoch": 0.4619764439627693, "grad_norm": 0.17666463553905487, "learning_rate": 0.002, "loss": 2.5676, "step": 231890 }, { "epoch": 0.46199636618640827, "grad_norm": 0.20391540229320526, "learning_rate": 0.002, "loss": 2.5631, "step": 231900 }, { "epoch": 0.46201628841004716, "grad_norm": 0.19167007505893707, "learning_rate": 0.002, "loss": 2.5506, "step": 231910 }, { "epoch": 0.4620362106336861, "grad_norm": 0.16268163919448853, "learning_rate": 0.002, "loss": 2.561, "step": 231920 }, { "epoch": 0.462056132857325, "grad_norm": 0.14851336181163788, "learning_rate": 0.002, "loss": 2.5608, "step": 231930 }, { "epoch": 0.46207605508096394, "grad_norm": 0.1505555361509323, "learning_rate": 0.002, "loss": 2.578, "step": 231940 }, { "epoch": 0.46209597730460283, "grad_norm": 0.1947704255580902, "learning_rate": 0.002, "loss": 2.5544, "step": 231950 }, { "epoch": 0.4621158995282417, "grad_norm": 0.1684742569923401, "learning_rate": 0.002, "loss": 2.5566, "step": 231960 }, { "epoch": 0.46213582175188067, "grad_norm": 0.17149026691913605, "learning_rate": 0.002, "loss": 2.553, "step": 231970 }, { "epoch": 0.46215574397551956, "grad_norm": 0.15348143875598907, "learning_rate": 0.002, "loss": 2.545, "step": 231980 }, { "epoch": 0.4621756661991585, "grad_norm": 0.1623036414384842, "learning_rate": 0.002, "loss": 2.548, "step": 231990 }, { "epoch": 0.4621955884227974, "grad_norm": 0.2310701310634613, "learning_rate": 0.002, "loss": 2.5652, "step": 232000 }, { "epoch": 0.4622155106464363, "grad_norm": 0.15995021164417267, "learning_rate": 0.002, "loss": 2.5463, "step": 232010 }, { "epoch": 0.46223543287007524, "grad_norm": 0.17470374703407288, "learning_rate": 0.002, "loss": 2.5622, "step": 232020 }, { "epoch": 0.4622553550937141, "grad_norm": 0.17819631099700928, "learning_rate": 0.002, "loss": 2.561, "step": 232030 }, { "epoch": 0.4622752773173531, "grad_norm": 0.15859919786453247, "learning_rate": 0.002, "loss": 2.5444, "step": 232040 }, { "epoch": 0.46229519954099196, "grad_norm": 0.17938415706157684, "learning_rate": 0.002, "loss": 2.5423, "step": 232050 }, { "epoch": 0.46231512176463085, "grad_norm": 0.2077682912349701, "learning_rate": 0.002, "loss": 2.5626, "step": 232060 }, { "epoch": 0.4623350439882698, "grad_norm": 0.17862491309642792, "learning_rate": 0.002, "loss": 2.5476, "step": 232070 }, { "epoch": 0.4623549662119087, "grad_norm": 0.1788414716720581, "learning_rate": 0.002, "loss": 2.5439, "step": 232080 }, { "epoch": 0.46237488843554764, "grad_norm": 0.17049196362495422, "learning_rate": 0.002, "loss": 2.5411, "step": 232090 }, { "epoch": 0.46239481065918653, "grad_norm": 0.15121684968471527, "learning_rate": 0.002, "loss": 2.5636, "step": 232100 }, { "epoch": 0.4624147328828255, "grad_norm": 0.16508348286151886, "learning_rate": 0.002, "loss": 2.5504, "step": 232110 }, { "epoch": 0.46243465510646437, "grad_norm": 0.15795670449733734, "learning_rate": 0.002, "loss": 2.5726, "step": 232120 }, { "epoch": 0.46245457733010326, "grad_norm": 0.17450468242168427, "learning_rate": 0.002, "loss": 2.5518, "step": 232130 }, { "epoch": 0.4624744995537422, "grad_norm": 0.21767909824848175, "learning_rate": 0.002, "loss": 2.5666, "step": 232140 }, { "epoch": 0.4624944217773811, "grad_norm": 0.16652317345142365, "learning_rate": 0.002, "loss": 2.5509, "step": 232150 }, { "epoch": 0.46251434400102004, "grad_norm": 0.16804245114326477, "learning_rate": 0.002, "loss": 2.5645, "step": 232160 }, { "epoch": 0.46253426622465893, "grad_norm": 0.17058159410953522, "learning_rate": 0.002, "loss": 2.5631, "step": 232170 }, { "epoch": 0.4625541884482978, "grad_norm": 0.14761601388454437, "learning_rate": 0.002, "loss": 2.5584, "step": 232180 }, { "epoch": 0.46257411067193677, "grad_norm": 0.1655801236629486, "learning_rate": 0.002, "loss": 2.5625, "step": 232190 }, { "epoch": 0.46259403289557566, "grad_norm": 0.16092950105667114, "learning_rate": 0.002, "loss": 2.5593, "step": 232200 }, { "epoch": 0.4626139551192146, "grad_norm": 0.15394331514835358, "learning_rate": 0.002, "loss": 2.5556, "step": 232210 }, { "epoch": 0.4626338773428535, "grad_norm": 0.1708279550075531, "learning_rate": 0.002, "loss": 2.5542, "step": 232220 }, { "epoch": 0.46265379956649244, "grad_norm": 0.13966743648052216, "learning_rate": 0.002, "loss": 2.5621, "step": 232230 }, { "epoch": 0.46267372179013133, "grad_norm": 0.1592506319284439, "learning_rate": 0.002, "loss": 2.5598, "step": 232240 }, { "epoch": 0.4626936440137702, "grad_norm": 0.1383880376815796, "learning_rate": 0.002, "loss": 2.5459, "step": 232250 }, { "epoch": 0.46271356623740917, "grad_norm": 0.15513068437576294, "learning_rate": 0.002, "loss": 2.5536, "step": 232260 }, { "epoch": 0.46273348846104806, "grad_norm": 0.15347060561180115, "learning_rate": 0.002, "loss": 2.5588, "step": 232270 }, { "epoch": 0.462753410684687, "grad_norm": 0.19885388016700745, "learning_rate": 0.002, "loss": 2.5633, "step": 232280 }, { "epoch": 0.4627733329083259, "grad_norm": 0.14546918869018555, "learning_rate": 0.002, "loss": 2.5629, "step": 232290 }, { "epoch": 0.4627932551319648, "grad_norm": 0.1608266532421112, "learning_rate": 0.002, "loss": 2.5683, "step": 232300 }, { "epoch": 0.46281317735560373, "grad_norm": 0.16751916706562042, "learning_rate": 0.002, "loss": 2.5452, "step": 232310 }, { "epoch": 0.4628330995792426, "grad_norm": 0.19674083590507507, "learning_rate": 0.002, "loss": 2.56, "step": 232320 }, { "epoch": 0.46285302180288157, "grad_norm": 0.14659757912158966, "learning_rate": 0.002, "loss": 2.5626, "step": 232330 }, { "epoch": 0.46287294402652046, "grad_norm": 0.1765003651380539, "learning_rate": 0.002, "loss": 2.5628, "step": 232340 }, { "epoch": 0.46289286625015935, "grad_norm": 0.1491352915763855, "learning_rate": 0.002, "loss": 2.5476, "step": 232350 }, { "epoch": 0.4629127884737983, "grad_norm": 0.1835997998714447, "learning_rate": 0.002, "loss": 2.5619, "step": 232360 }, { "epoch": 0.4629327106974372, "grad_norm": 0.1856515258550644, "learning_rate": 0.002, "loss": 2.5646, "step": 232370 }, { "epoch": 0.46295263292107613, "grad_norm": 0.15535257756710052, "learning_rate": 0.002, "loss": 2.5634, "step": 232380 }, { "epoch": 0.462972555144715, "grad_norm": 0.15771180391311646, "learning_rate": 0.002, "loss": 2.5662, "step": 232390 }, { "epoch": 0.46299247736835397, "grad_norm": 0.16617846488952637, "learning_rate": 0.002, "loss": 2.565, "step": 232400 }, { "epoch": 0.46301239959199286, "grad_norm": 0.156544491648674, "learning_rate": 0.002, "loss": 2.5728, "step": 232410 }, { "epoch": 0.46303232181563175, "grad_norm": 0.1837935894727707, "learning_rate": 0.002, "loss": 2.5557, "step": 232420 }, { "epoch": 0.4630522440392707, "grad_norm": 0.15976187586784363, "learning_rate": 0.002, "loss": 2.5544, "step": 232430 }, { "epoch": 0.4630721662629096, "grad_norm": 0.22046035528182983, "learning_rate": 0.002, "loss": 2.5621, "step": 232440 }, { "epoch": 0.46309208848654854, "grad_norm": 0.15930110216140747, "learning_rate": 0.002, "loss": 2.5727, "step": 232450 }, { "epoch": 0.4631120107101874, "grad_norm": 0.1811872124671936, "learning_rate": 0.002, "loss": 2.5557, "step": 232460 }, { "epoch": 0.4631319329338263, "grad_norm": 0.19523552060127258, "learning_rate": 0.002, "loss": 2.5626, "step": 232470 }, { "epoch": 0.46315185515746526, "grad_norm": 0.1829419881105423, "learning_rate": 0.002, "loss": 2.5538, "step": 232480 }, { "epoch": 0.46317177738110415, "grad_norm": 0.15319600701332092, "learning_rate": 0.002, "loss": 2.5644, "step": 232490 }, { "epoch": 0.4631916996047431, "grad_norm": 0.16727620363235474, "learning_rate": 0.002, "loss": 2.5645, "step": 232500 }, { "epoch": 0.463211621828382, "grad_norm": 0.155502587556839, "learning_rate": 0.002, "loss": 2.5608, "step": 232510 }, { "epoch": 0.4632315440520209, "grad_norm": 0.3007086515426636, "learning_rate": 0.002, "loss": 2.5644, "step": 232520 }, { "epoch": 0.46325146627565983, "grad_norm": 0.1530933529138565, "learning_rate": 0.002, "loss": 2.5589, "step": 232530 }, { "epoch": 0.4632713884992987, "grad_norm": 0.17114794254302979, "learning_rate": 0.002, "loss": 2.5545, "step": 232540 }, { "epoch": 0.46329131072293767, "grad_norm": 0.18963642418384552, "learning_rate": 0.002, "loss": 2.559, "step": 232550 }, { "epoch": 0.46331123294657656, "grad_norm": 0.15363876521587372, "learning_rate": 0.002, "loss": 2.552, "step": 232560 }, { "epoch": 0.4633311551702155, "grad_norm": 0.2058020979166031, "learning_rate": 0.002, "loss": 2.563, "step": 232570 }, { "epoch": 0.4633510773938544, "grad_norm": 0.18123777210712433, "learning_rate": 0.002, "loss": 2.5497, "step": 232580 }, { "epoch": 0.4633709996174933, "grad_norm": 0.17295868694782257, "learning_rate": 0.002, "loss": 2.5548, "step": 232590 }, { "epoch": 0.46339092184113223, "grad_norm": 0.18926554918289185, "learning_rate": 0.002, "loss": 2.5674, "step": 232600 }, { "epoch": 0.4634108440647711, "grad_norm": 0.1617632657289505, "learning_rate": 0.002, "loss": 2.5558, "step": 232610 }, { "epoch": 0.46343076628841007, "grad_norm": 0.17392688989639282, "learning_rate": 0.002, "loss": 2.5687, "step": 232620 }, { "epoch": 0.46345068851204896, "grad_norm": 0.18557953834533691, "learning_rate": 0.002, "loss": 2.5658, "step": 232630 }, { "epoch": 0.46347061073568785, "grad_norm": 0.16357046365737915, "learning_rate": 0.002, "loss": 2.5541, "step": 232640 }, { "epoch": 0.4634905329593268, "grad_norm": 0.15119428932666779, "learning_rate": 0.002, "loss": 2.5625, "step": 232650 }, { "epoch": 0.4635104551829657, "grad_norm": 0.17072273790836334, "learning_rate": 0.002, "loss": 2.5635, "step": 232660 }, { "epoch": 0.46353037740660463, "grad_norm": 0.18379239737987518, "learning_rate": 0.002, "loss": 2.5704, "step": 232670 }, { "epoch": 0.4635502996302435, "grad_norm": 0.16313013434410095, "learning_rate": 0.002, "loss": 2.5579, "step": 232680 }, { "epoch": 0.46357022185388247, "grad_norm": 0.18225792050361633, "learning_rate": 0.002, "loss": 2.5608, "step": 232690 }, { "epoch": 0.46359014407752136, "grad_norm": 0.17273786664009094, "learning_rate": 0.002, "loss": 2.5592, "step": 232700 }, { "epoch": 0.46361006630116025, "grad_norm": 0.1822897046804428, "learning_rate": 0.002, "loss": 2.5399, "step": 232710 }, { "epoch": 0.4636299885247992, "grad_norm": 0.2257431298494339, "learning_rate": 0.002, "loss": 2.5657, "step": 232720 }, { "epoch": 0.4636499107484381, "grad_norm": 0.16904082894325256, "learning_rate": 0.002, "loss": 2.5484, "step": 232730 }, { "epoch": 0.46366983297207703, "grad_norm": 0.21191483736038208, "learning_rate": 0.002, "loss": 2.551, "step": 232740 }, { "epoch": 0.4636897551957159, "grad_norm": 0.2266102433204651, "learning_rate": 0.002, "loss": 2.5626, "step": 232750 }, { "epoch": 0.4637096774193548, "grad_norm": 0.15032131969928741, "learning_rate": 0.002, "loss": 2.5562, "step": 232760 }, { "epoch": 0.46372959964299376, "grad_norm": 0.16729736328125, "learning_rate": 0.002, "loss": 2.5665, "step": 232770 }, { "epoch": 0.46374952186663265, "grad_norm": 0.1667824685573578, "learning_rate": 0.002, "loss": 2.5626, "step": 232780 }, { "epoch": 0.4637694440902716, "grad_norm": 0.18192054331302643, "learning_rate": 0.002, "loss": 2.5695, "step": 232790 }, { "epoch": 0.4637893663139105, "grad_norm": 0.1532791405916214, "learning_rate": 0.002, "loss": 2.5577, "step": 232800 }, { "epoch": 0.4638092885375494, "grad_norm": 0.21435503661632538, "learning_rate": 0.002, "loss": 2.5721, "step": 232810 }, { "epoch": 0.4638292107611883, "grad_norm": 0.16012553870677948, "learning_rate": 0.002, "loss": 2.5624, "step": 232820 }, { "epoch": 0.4638491329848272, "grad_norm": 0.17873725295066833, "learning_rate": 0.002, "loss": 2.5409, "step": 232830 }, { "epoch": 0.46386905520846616, "grad_norm": 0.15898440778255463, "learning_rate": 0.002, "loss": 2.5522, "step": 232840 }, { "epoch": 0.46388897743210505, "grad_norm": 0.14377343654632568, "learning_rate": 0.002, "loss": 2.5585, "step": 232850 }, { "epoch": 0.463908899655744, "grad_norm": 0.15836483240127563, "learning_rate": 0.002, "loss": 2.5548, "step": 232860 }, { "epoch": 0.4639288218793829, "grad_norm": 0.21379207074642181, "learning_rate": 0.002, "loss": 2.5462, "step": 232870 }, { "epoch": 0.4639487441030218, "grad_norm": 0.14613115787506104, "learning_rate": 0.002, "loss": 2.5585, "step": 232880 }, { "epoch": 0.46396866632666073, "grad_norm": 0.17164644598960876, "learning_rate": 0.002, "loss": 2.5584, "step": 232890 }, { "epoch": 0.4639885885502996, "grad_norm": 0.18347375094890594, "learning_rate": 0.002, "loss": 2.5541, "step": 232900 }, { "epoch": 0.46400851077393857, "grad_norm": 0.15276630222797394, "learning_rate": 0.002, "loss": 2.5567, "step": 232910 }, { "epoch": 0.46402843299757746, "grad_norm": 0.1672772616147995, "learning_rate": 0.002, "loss": 2.5492, "step": 232920 }, { "epoch": 0.46404835522121635, "grad_norm": 0.14188089966773987, "learning_rate": 0.002, "loss": 2.5585, "step": 232930 }, { "epoch": 0.4640682774448553, "grad_norm": 0.16952228546142578, "learning_rate": 0.002, "loss": 2.5612, "step": 232940 }, { "epoch": 0.4640881996684942, "grad_norm": 0.1442747414112091, "learning_rate": 0.002, "loss": 2.5598, "step": 232950 }, { "epoch": 0.46410812189213313, "grad_norm": 0.15686364471912384, "learning_rate": 0.002, "loss": 2.5663, "step": 232960 }, { "epoch": 0.464128044115772, "grad_norm": 0.17593303322792053, "learning_rate": 0.002, "loss": 2.5651, "step": 232970 }, { "epoch": 0.46414796633941097, "grad_norm": 0.1720915585756302, "learning_rate": 0.002, "loss": 2.5504, "step": 232980 }, { "epoch": 0.46416788856304986, "grad_norm": 0.17809656262397766, "learning_rate": 0.002, "loss": 2.5518, "step": 232990 }, { "epoch": 0.46418781078668875, "grad_norm": 0.15201200544834137, "learning_rate": 0.002, "loss": 2.5571, "step": 233000 }, { "epoch": 0.4642077330103277, "grad_norm": 0.2090892195701599, "learning_rate": 0.002, "loss": 2.5695, "step": 233010 }, { "epoch": 0.4642276552339666, "grad_norm": 0.16057011485099792, "learning_rate": 0.002, "loss": 2.5693, "step": 233020 }, { "epoch": 0.46424757745760553, "grad_norm": 0.14543268084526062, "learning_rate": 0.002, "loss": 2.5487, "step": 233030 }, { "epoch": 0.4642674996812444, "grad_norm": 0.1657848358154297, "learning_rate": 0.002, "loss": 2.5629, "step": 233040 }, { "epoch": 0.4642874219048833, "grad_norm": 0.19247455894947052, "learning_rate": 0.002, "loss": 2.5736, "step": 233050 }, { "epoch": 0.46430734412852226, "grad_norm": 0.19545380771160126, "learning_rate": 0.002, "loss": 2.5623, "step": 233060 }, { "epoch": 0.46432726635216115, "grad_norm": 0.1645127236843109, "learning_rate": 0.002, "loss": 2.5598, "step": 233070 }, { "epoch": 0.4643471885758001, "grad_norm": 0.1699458509683609, "learning_rate": 0.002, "loss": 2.5455, "step": 233080 }, { "epoch": 0.464367110799439, "grad_norm": 0.158985897898674, "learning_rate": 0.002, "loss": 2.5579, "step": 233090 }, { "epoch": 0.4643870330230779, "grad_norm": 0.16249415278434753, "learning_rate": 0.002, "loss": 2.5539, "step": 233100 }, { "epoch": 0.4644069552467168, "grad_norm": 0.15075741708278656, "learning_rate": 0.002, "loss": 2.5562, "step": 233110 }, { "epoch": 0.4644268774703557, "grad_norm": 0.16366437077522278, "learning_rate": 0.002, "loss": 2.5427, "step": 233120 }, { "epoch": 0.46444679969399466, "grad_norm": 0.16372966766357422, "learning_rate": 0.002, "loss": 2.5563, "step": 233130 }, { "epoch": 0.46446672191763355, "grad_norm": 0.17576350271701813, "learning_rate": 0.002, "loss": 2.5598, "step": 233140 }, { "epoch": 0.4644866441412725, "grad_norm": 0.1529773771762848, "learning_rate": 0.002, "loss": 2.5521, "step": 233150 }, { "epoch": 0.4645065663649114, "grad_norm": 0.16768001019954681, "learning_rate": 0.002, "loss": 2.5511, "step": 233160 }, { "epoch": 0.4645264885885503, "grad_norm": 0.1729310154914856, "learning_rate": 0.002, "loss": 2.5562, "step": 233170 }, { "epoch": 0.4645464108121892, "grad_norm": 0.19133785367012024, "learning_rate": 0.002, "loss": 2.5454, "step": 233180 }, { "epoch": 0.4645663330358281, "grad_norm": 0.17446394264698029, "learning_rate": 0.002, "loss": 2.5607, "step": 233190 }, { "epoch": 0.46458625525946706, "grad_norm": 0.1821162849664688, "learning_rate": 0.002, "loss": 2.5494, "step": 233200 }, { "epoch": 0.46460617748310595, "grad_norm": 0.19940254092216492, "learning_rate": 0.002, "loss": 2.554, "step": 233210 }, { "epoch": 0.46462609970674484, "grad_norm": 0.15042199194431305, "learning_rate": 0.002, "loss": 2.5571, "step": 233220 }, { "epoch": 0.4646460219303838, "grad_norm": 0.16104277968406677, "learning_rate": 0.002, "loss": 2.5536, "step": 233230 }, { "epoch": 0.4646659441540227, "grad_norm": 0.20733705163002014, "learning_rate": 0.002, "loss": 2.5626, "step": 233240 }, { "epoch": 0.4646858663776616, "grad_norm": 0.1665794849395752, "learning_rate": 0.002, "loss": 2.5626, "step": 233250 }, { "epoch": 0.4647057886013005, "grad_norm": 0.1495499312877655, "learning_rate": 0.002, "loss": 2.551, "step": 233260 }, { "epoch": 0.46472571082493946, "grad_norm": 0.16256193816661835, "learning_rate": 0.002, "loss": 2.5594, "step": 233270 }, { "epoch": 0.46474563304857835, "grad_norm": 0.1411610096693039, "learning_rate": 0.002, "loss": 2.5779, "step": 233280 }, { "epoch": 0.46476555527221725, "grad_norm": 0.21937309205532074, "learning_rate": 0.002, "loss": 2.5701, "step": 233290 }, { "epoch": 0.4647854774958562, "grad_norm": 0.1557825654745102, "learning_rate": 0.002, "loss": 2.5513, "step": 233300 }, { "epoch": 0.4648053997194951, "grad_norm": 0.14359116554260254, "learning_rate": 0.002, "loss": 2.5534, "step": 233310 }, { "epoch": 0.46482532194313403, "grad_norm": 0.19670839607715607, "learning_rate": 0.002, "loss": 2.5546, "step": 233320 }, { "epoch": 0.4648452441667729, "grad_norm": 0.1861741840839386, "learning_rate": 0.002, "loss": 2.5697, "step": 233330 }, { "epoch": 0.4648651663904118, "grad_norm": 0.16338469088077545, "learning_rate": 0.002, "loss": 2.5564, "step": 233340 }, { "epoch": 0.46488508861405076, "grad_norm": 0.18975119292736053, "learning_rate": 0.002, "loss": 2.5639, "step": 233350 }, { "epoch": 0.46490501083768965, "grad_norm": 0.15341860055923462, "learning_rate": 0.002, "loss": 2.5492, "step": 233360 }, { "epoch": 0.4649249330613286, "grad_norm": 0.19605112075805664, "learning_rate": 0.002, "loss": 2.5504, "step": 233370 }, { "epoch": 0.4649448552849675, "grad_norm": 0.14481577277183533, "learning_rate": 0.002, "loss": 2.5515, "step": 233380 }, { "epoch": 0.4649647775086064, "grad_norm": 0.17772994935512543, "learning_rate": 0.002, "loss": 2.5631, "step": 233390 }, { "epoch": 0.4649846997322453, "grad_norm": 0.16876156628131866, "learning_rate": 0.002, "loss": 2.5591, "step": 233400 }, { "epoch": 0.4650046219558842, "grad_norm": 0.20268914103507996, "learning_rate": 0.002, "loss": 2.5421, "step": 233410 }, { "epoch": 0.46502454417952316, "grad_norm": 0.1611473262310028, "learning_rate": 0.002, "loss": 2.562, "step": 233420 }, { "epoch": 0.46504446640316205, "grad_norm": 0.17599166929721832, "learning_rate": 0.002, "loss": 2.553, "step": 233430 }, { "epoch": 0.465064388626801, "grad_norm": 0.15798720717430115, "learning_rate": 0.002, "loss": 2.57, "step": 233440 }, { "epoch": 0.4650843108504399, "grad_norm": 0.1703115999698639, "learning_rate": 0.002, "loss": 2.5694, "step": 233450 }, { "epoch": 0.4651042330740788, "grad_norm": 0.18788829445838928, "learning_rate": 0.002, "loss": 2.5608, "step": 233460 }, { "epoch": 0.4651241552977177, "grad_norm": 0.16895876824855804, "learning_rate": 0.002, "loss": 2.5637, "step": 233470 }, { "epoch": 0.4651440775213566, "grad_norm": 0.15748104453086853, "learning_rate": 0.002, "loss": 2.5325, "step": 233480 }, { "epoch": 0.46516399974499556, "grad_norm": 0.1944437474012375, "learning_rate": 0.002, "loss": 2.5699, "step": 233490 }, { "epoch": 0.46518392196863445, "grad_norm": 0.15046176314353943, "learning_rate": 0.002, "loss": 2.5677, "step": 233500 }, { "epoch": 0.46520384419227334, "grad_norm": 0.17767831683158875, "learning_rate": 0.002, "loss": 2.5749, "step": 233510 }, { "epoch": 0.4652237664159123, "grad_norm": 0.17406022548675537, "learning_rate": 0.002, "loss": 2.5511, "step": 233520 }, { "epoch": 0.4652436886395512, "grad_norm": 0.1617850363254547, "learning_rate": 0.002, "loss": 2.5607, "step": 233530 }, { "epoch": 0.4652636108631901, "grad_norm": 0.1442897468805313, "learning_rate": 0.002, "loss": 2.5611, "step": 233540 }, { "epoch": 0.465283533086829, "grad_norm": 0.21784991025924683, "learning_rate": 0.002, "loss": 2.5526, "step": 233550 }, { "epoch": 0.4653034553104679, "grad_norm": 0.140811949968338, "learning_rate": 0.002, "loss": 2.5345, "step": 233560 }, { "epoch": 0.46532337753410685, "grad_norm": 0.1532767415046692, "learning_rate": 0.002, "loss": 2.5547, "step": 233570 }, { "epoch": 0.46534329975774574, "grad_norm": 0.18464376032352448, "learning_rate": 0.002, "loss": 2.5595, "step": 233580 }, { "epoch": 0.4653632219813847, "grad_norm": 0.1662062704563141, "learning_rate": 0.002, "loss": 2.5579, "step": 233590 }, { "epoch": 0.4653831442050236, "grad_norm": 0.18150441348552704, "learning_rate": 0.002, "loss": 2.5649, "step": 233600 }, { "epoch": 0.4654030664286625, "grad_norm": 0.15814584493637085, "learning_rate": 0.002, "loss": 2.5499, "step": 233610 }, { "epoch": 0.4654229886523014, "grad_norm": 0.17016395926475525, "learning_rate": 0.002, "loss": 2.5444, "step": 233620 }, { "epoch": 0.4654429108759403, "grad_norm": 0.15652814507484436, "learning_rate": 0.002, "loss": 2.5429, "step": 233630 }, { "epoch": 0.46546283309957925, "grad_norm": 0.2280866801738739, "learning_rate": 0.002, "loss": 2.5529, "step": 233640 }, { "epoch": 0.46548275532321814, "grad_norm": 0.15962649881839752, "learning_rate": 0.002, "loss": 2.5593, "step": 233650 }, { "epoch": 0.4655026775468571, "grad_norm": 0.14862246811389923, "learning_rate": 0.002, "loss": 2.5537, "step": 233660 }, { "epoch": 0.465522599770496, "grad_norm": 0.16431675851345062, "learning_rate": 0.002, "loss": 2.5475, "step": 233670 }, { "epoch": 0.4655425219941349, "grad_norm": 0.17288729548454285, "learning_rate": 0.002, "loss": 2.55, "step": 233680 }, { "epoch": 0.4655624442177738, "grad_norm": 0.14703330397605896, "learning_rate": 0.002, "loss": 2.5452, "step": 233690 }, { "epoch": 0.4655823664414127, "grad_norm": 0.15663880109786987, "learning_rate": 0.002, "loss": 2.5558, "step": 233700 }, { "epoch": 0.46560228866505166, "grad_norm": 0.1621406525373459, "learning_rate": 0.002, "loss": 2.5684, "step": 233710 }, { "epoch": 0.46562221088869055, "grad_norm": 0.17788143455982208, "learning_rate": 0.002, "loss": 2.5497, "step": 233720 }, { "epoch": 0.4656421331123295, "grad_norm": 0.1810622662305832, "learning_rate": 0.002, "loss": 2.5751, "step": 233730 }, { "epoch": 0.4656620553359684, "grad_norm": 0.15934336185455322, "learning_rate": 0.002, "loss": 2.5515, "step": 233740 }, { "epoch": 0.4656819775596073, "grad_norm": 0.1913301944732666, "learning_rate": 0.002, "loss": 2.5662, "step": 233750 }, { "epoch": 0.4657018997832462, "grad_norm": 0.1816587597131729, "learning_rate": 0.002, "loss": 2.5653, "step": 233760 }, { "epoch": 0.4657218220068851, "grad_norm": 0.14987297356128693, "learning_rate": 0.002, "loss": 2.5706, "step": 233770 }, { "epoch": 0.46574174423052406, "grad_norm": 0.19476506114006042, "learning_rate": 0.002, "loss": 2.5616, "step": 233780 }, { "epoch": 0.46576166645416295, "grad_norm": 0.1603907197713852, "learning_rate": 0.002, "loss": 2.5697, "step": 233790 }, { "epoch": 0.46578158867780184, "grad_norm": 0.16588126122951508, "learning_rate": 0.002, "loss": 2.5632, "step": 233800 }, { "epoch": 0.4658015109014408, "grad_norm": 0.1554546356201172, "learning_rate": 0.002, "loss": 2.549, "step": 233810 }, { "epoch": 0.4658214331250797, "grad_norm": 0.17094461619853973, "learning_rate": 0.002, "loss": 2.5568, "step": 233820 }, { "epoch": 0.4658413553487186, "grad_norm": 0.15532289445400238, "learning_rate": 0.002, "loss": 2.5578, "step": 233830 }, { "epoch": 0.4658612775723575, "grad_norm": 0.17178350687026978, "learning_rate": 0.002, "loss": 2.56, "step": 233840 }, { "epoch": 0.4658811997959964, "grad_norm": 0.16289325058460236, "learning_rate": 0.002, "loss": 2.5592, "step": 233850 }, { "epoch": 0.46590112201963535, "grad_norm": 0.16332167387008667, "learning_rate": 0.002, "loss": 2.5657, "step": 233860 }, { "epoch": 0.46592104424327424, "grad_norm": 0.18358691036701202, "learning_rate": 0.002, "loss": 2.5572, "step": 233870 }, { "epoch": 0.4659409664669132, "grad_norm": 0.15864452719688416, "learning_rate": 0.002, "loss": 2.5574, "step": 233880 }, { "epoch": 0.4659608886905521, "grad_norm": 0.16757817566394806, "learning_rate": 0.002, "loss": 2.5605, "step": 233890 }, { "epoch": 0.465980810914191, "grad_norm": 0.1768975406885147, "learning_rate": 0.002, "loss": 2.556, "step": 233900 }, { "epoch": 0.4660007331378299, "grad_norm": 0.15644098818302155, "learning_rate": 0.002, "loss": 2.5328, "step": 233910 }, { "epoch": 0.4660206553614688, "grad_norm": 0.19964909553527832, "learning_rate": 0.002, "loss": 2.5604, "step": 233920 }, { "epoch": 0.46604057758510775, "grad_norm": 0.17491860687732697, "learning_rate": 0.002, "loss": 2.5521, "step": 233930 }, { "epoch": 0.46606049980874664, "grad_norm": 0.18846704065799713, "learning_rate": 0.002, "loss": 2.5491, "step": 233940 }, { "epoch": 0.4660804220323856, "grad_norm": 0.16232830286026, "learning_rate": 0.002, "loss": 2.5583, "step": 233950 }, { "epoch": 0.4661003442560245, "grad_norm": 0.16911742091178894, "learning_rate": 0.002, "loss": 2.5598, "step": 233960 }, { "epoch": 0.46612026647966337, "grad_norm": 0.16279231011867523, "learning_rate": 0.002, "loss": 2.5739, "step": 233970 }, { "epoch": 0.4661401887033023, "grad_norm": 0.15829521417617798, "learning_rate": 0.002, "loss": 2.5505, "step": 233980 }, { "epoch": 0.4661601109269412, "grad_norm": 0.16753706336021423, "learning_rate": 0.002, "loss": 2.5556, "step": 233990 }, { "epoch": 0.46618003315058015, "grad_norm": 0.218483105301857, "learning_rate": 0.002, "loss": 2.5551, "step": 234000 }, { "epoch": 0.46619995537421904, "grad_norm": 0.2030927836894989, "learning_rate": 0.002, "loss": 2.5624, "step": 234010 }, { "epoch": 0.466219877597858, "grad_norm": 0.1625056266784668, "learning_rate": 0.002, "loss": 2.5696, "step": 234020 }, { "epoch": 0.4662397998214969, "grad_norm": 0.1655999720096588, "learning_rate": 0.002, "loss": 2.5536, "step": 234030 }, { "epoch": 0.46625972204513577, "grad_norm": 0.14960616827011108, "learning_rate": 0.002, "loss": 2.5709, "step": 234040 }, { "epoch": 0.4662796442687747, "grad_norm": 0.15794983506202698, "learning_rate": 0.002, "loss": 2.563, "step": 234050 }, { "epoch": 0.4662995664924136, "grad_norm": 0.14734789729118347, "learning_rate": 0.002, "loss": 2.5656, "step": 234060 }, { "epoch": 0.46631948871605255, "grad_norm": 0.16517415642738342, "learning_rate": 0.002, "loss": 2.5621, "step": 234070 }, { "epoch": 0.46633941093969145, "grad_norm": 0.15887971222400665, "learning_rate": 0.002, "loss": 2.5598, "step": 234080 }, { "epoch": 0.46635933316333034, "grad_norm": 0.1395270675420761, "learning_rate": 0.002, "loss": 2.5579, "step": 234090 }, { "epoch": 0.4663792553869693, "grad_norm": 0.20290352404117584, "learning_rate": 0.002, "loss": 2.5352, "step": 234100 }, { "epoch": 0.4663991776106082, "grad_norm": 0.18324612081050873, "learning_rate": 0.002, "loss": 2.5466, "step": 234110 }, { "epoch": 0.4664190998342471, "grad_norm": 0.14485561847686768, "learning_rate": 0.002, "loss": 2.5634, "step": 234120 }, { "epoch": 0.466439022057886, "grad_norm": 0.16186626255512238, "learning_rate": 0.002, "loss": 2.5626, "step": 234130 }, { "epoch": 0.4664589442815249, "grad_norm": 0.16545844078063965, "learning_rate": 0.002, "loss": 2.5665, "step": 234140 }, { "epoch": 0.46647886650516385, "grad_norm": 0.17110376060009003, "learning_rate": 0.002, "loss": 2.5529, "step": 234150 }, { "epoch": 0.46649878872880274, "grad_norm": 0.17926794290542603, "learning_rate": 0.002, "loss": 2.5641, "step": 234160 }, { "epoch": 0.4665187109524417, "grad_norm": 0.2073821723461151, "learning_rate": 0.002, "loss": 2.5666, "step": 234170 }, { "epoch": 0.4665386331760806, "grad_norm": 0.15130232274532318, "learning_rate": 0.002, "loss": 2.552, "step": 234180 }, { "epoch": 0.4665585553997195, "grad_norm": 0.16920091211795807, "learning_rate": 0.002, "loss": 2.5378, "step": 234190 }, { "epoch": 0.4665784776233584, "grad_norm": 0.17925366759300232, "learning_rate": 0.002, "loss": 2.5734, "step": 234200 }, { "epoch": 0.4665983998469973, "grad_norm": 0.16933298110961914, "learning_rate": 0.002, "loss": 2.5687, "step": 234210 }, { "epoch": 0.46661832207063625, "grad_norm": 0.14944320917129517, "learning_rate": 0.002, "loss": 2.5503, "step": 234220 }, { "epoch": 0.46663824429427514, "grad_norm": 0.1576414555311203, "learning_rate": 0.002, "loss": 2.564, "step": 234230 }, { "epoch": 0.4666581665179141, "grad_norm": 0.14489039778709412, "learning_rate": 0.002, "loss": 2.5763, "step": 234240 }, { "epoch": 0.466678088741553, "grad_norm": 0.16025564074516296, "learning_rate": 0.002, "loss": 2.5603, "step": 234250 }, { "epoch": 0.46669801096519187, "grad_norm": 0.1576138734817505, "learning_rate": 0.002, "loss": 2.561, "step": 234260 }, { "epoch": 0.4667179331888308, "grad_norm": 0.15111875534057617, "learning_rate": 0.002, "loss": 2.5498, "step": 234270 }, { "epoch": 0.4667378554124697, "grad_norm": 0.13251632452011108, "learning_rate": 0.002, "loss": 2.5599, "step": 234280 }, { "epoch": 0.46675777763610865, "grad_norm": 0.16088199615478516, "learning_rate": 0.002, "loss": 2.554, "step": 234290 }, { "epoch": 0.46677769985974754, "grad_norm": 0.1629350632429123, "learning_rate": 0.002, "loss": 2.5711, "step": 234300 }, { "epoch": 0.46679762208338643, "grad_norm": 0.1457289308309555, "learning_rate": 0.002, "loss": 2.5504, "step": 234310 }, { "epoch": 0.4668175443070254, "grad_norm": 0.19558793306350708, "learning_rate": 0.002, "loss": 2.5549, "step": 234320 }, { "epoch": 0.46683746653066427, "grad_norm": 0.15928713977336884, "learning_rate": 0.002, "loss": 2.5489, "step": 234330 }, { "epoch": 0.4668573887543032, "grad_norm": 0.14728271961212158, "learning_rate": 0.002, "loss": 2.5652, "step": 234340 }, { "epoch": 0.4668773109779421, "grad_norm": 0.1614958643913269, "learning_rate": 0.002, "loss": 2.5592, "step": 234350 }, { "epoch": 0.46689723320158105, "grad_norm": 0.16764108836650848, "learning_rate": 0.002, "loss": 2.5787, "step": 234360 }, { "epoch": 0.46691715542521994, "grad_norm": 0.1766268014907837, "learning_rate": 0.002, "loss": 2.5674, "step": 234370 }, { "epoch": 0.46693707764885883, "grad_norm": 0.14828260242938995, "learning_rate": 0.002, "loss": 2.5757, "step": 234380 }, { "epoch": 0.4669569998724978, "grad_norm": 0.21683833003044128, "learning_rate": 0.002, "loss": 2.5708, "step": 234390 }, { "epoch": 0.46697692209613667, "grad_norm": 0.18891403079032898, "learning_rate": 0.002, "loss": 2.554, "step": 234400 }, { "epoch": 0.4669968443197756, "grad_norm": 0.14323285222053528, "learning_rate": 0.002, "loss": 2.5602, "step": 234410 }, { "epoch": 0.4670167665434145, "grad_norm": 0.1599636673927307, "learning_rate": 0.002, "loss": 2.5641, "step": 234420 }, { "epoch": 0.4670366887670534, "grad_norm": 0.15864573419094086, "learning_rate": 0.002, "loss": 2.5639, "step": 234430 }, { "epoch": 0.46705661099069234, "grad_norm": 0.1439984142780304, "learning_rate": 0.002, "loss": 2.5422, "step": 234440 }, { "epoch": 0.46707653321433124, "grad_norm": 0.2002120018005371, "learning_rate": 0.002, "loss": 2.5301, "step": 234450 }, { "epoch": 0.4670964554379702, "grad_norm": 0.18945778906345367, "learning_rate": 0.002, "loss": 2.5691, "step": 234460 }, { "epoch": 0.4671163776616091, "grad_norm": 0.16696195304393768, "learning_rate": 0.002, "loss": 2.5591, "step": 234470 }, { "epoch": 0.467136299885248, "grad_norm": 0.16216401755809784, "learning_rate": 0.002, "loss": 2.5635, "step": 234480 }, { "epoch": 0.4671562221088869, "grad_norm": 0.16475717723369598, "learning_rate": 0.002, "loss": 2.5563, "step": 234490 }, { "epoch": 0.4671761443325258, "grad_norm": 0.1955314576625824, "learning_rate": 0.002, "loss": 2.5503, "step": 234500 }, { "epoch": 0.46719606655616475, "grad_norm": 0.14989198744297028, "learning_rate": 0.002, "loss": 2.5657, "step": 234510 }, { "epoch": 0.46721598877980364, "grad_norm": 0.16660645604133606, "learning_rate": 0.002, "loss": 2.5554, "step": 234520 }, { "epoch": 0.4672359110034426, "grad_norm": 0.17142316699028015, "learning_rate": 0.002, "loss": 2.5708, "step": 234530 }, { "epoch": 0.4672558332270815, "grad_norm": 0.15341828763484955, "learning_rate": 0.002, "loss": 2.5577, "step": 234540 }, { "epoch": 0.46727575545072036, "grad_norm": 0.16995759308338165, "learning_rate": 0.002, "loss": 2.5694, "step": 234550 }, { "epoch": 0.4672956776743593, "grad_norm": 0.14304417371749878, "learning_rate": 0.002, "loss": 2.5575, "step": 234560 }, { "epoch": 0.4673155998979982, "grad_norm": 0.1738041788339615, "learning_rate": 0.002, "loss": 2.5756, "step": 234570 }, { "epoch": 0.46733552212163715, "grad_norm": 0.18813113868236542, "learning_rate": 0.002, "loss": 2.5581, "step": 234580 }, { "epoch": 0.46735544434527604, "grad_norm": 0.2056446224451065, "learning_rate": 0.002, "loss": 2.5599, "step": 234590 }, { "epoch": 0.46737536656891493, "grad_norm": 0.17717929184436798, "learning_rate": 0.002, "loss": 2.5735, "step": 234600 }, { "epoch": 0.4673952887925539, "grad_norm": 0.1504673808813095, "learning_rate": 0.002, "loss": 2.5518, "step": 234610 }, { "epoch": 0.46741521101619277, "grad_norm": 0.18682540953159332, "learning_rate": 0.002, "loss": 2.5505, "step": 234620 }, { "epoch": 0.4674351332398317, "grad_norm": 0.1904173195362091, "learning_rate": 0.002, "loss": 2.5473, "step": 234630 }, { "epoch": 0.4674550554634706, "grad_norm": 0.15886279940605164, "learning_rate": 0.002, "loss": 2.5583, "step": 234640 }, { "epoch": 0.46747497768710955, "grad_norm": 0.15286536514759064, "learning_rate": 0.002, "loss": 2.5708, "step": 234650 }, { "epoch": 0.46749489991074844, "grad_norm": 0.18097823858261108, "learning_rate": 0.002, "loss": 2.5677, "step": 234660 }, { "epoch": 0.46751482213438733, "grad_norm": 0.16153644025325775, "learning_rate": 0.002, "loss": 2.5545, "step": 234670 }, { "epoch": 0.4675347443580263, "grad_norm": 0.16469690203666687, "learning_rate": 0.002, "loss": 2.5604, "step": 234680 }, { "epoch": 0.46755466658166517, "grad_norm": 0.18195270001888275, "learning_rate": 0.002, "loss": 2.5374, "step": 234690 }, { "epoch": 0.4675745888053041, "grad_norm": 0.1636074185371399, "learning_rate": 0.002, "loss": 2.5649, "step": 234700 }, { "epoch": 0.467594511028943, "grad_norm": 0.15657031536102295, "learning_rate": 0.002, "loss": 2.5455, "step": 234710 }, { "epoch": 0.4676144332525819, "grad_norm": 0.18801282346248627, "learning_rate": 0.002, "loss": 2.5637, "step": 234720 }, { "epoch": 0.46763435547622084, "grad_norm": 0.16588474810123444, "learning_rate": 0.002, "loss": 2.5587, "step": 234730 }, { "epoch": 0.46765427769985973, "grad_norm": 0.18804225325584412, "learning_rate": 0.002, "loss": 2.5678, "step": 234740 }, { "epoch": 0.4676741999234987, "grad_norm": 0.1891925185918808, "learning_rate": 0.002, "loss": 2.5586, "step": 234750 }, { "epoch": 0.46769412214713757, "grad_norm": 0.16020908951759338, "learning_rate": 0.002, "loss": 2.5647, "step": 234760 }, { "epoch": 0.4677140443707765, "grad_norm": 0.16914135217666626, "learning_rate": 0.002, "loss": 2.5493, "step": 234770 }, { "epoch": 0.4677339665944154, "grad_norm": 0.15509136021137238, "learning_rate": 0.002, "loss": 2.5464, "step": 234780 }, { "epoch": 0.4677538888180543, "grad_norm": 0.6471368670463562, "learning_rate": 0.002, "loss": 2.5689, "step": 234790 }, { "epoch": 0.46777381104169324, "grad_norm": 0.17000404000282288, "learning_rate": 0.002, "loss": 2.5618, "step": 234800 }, { "epoch": 0.46779373326533213, "grad_norm": 0.1763521432876587, "learning_rate": 0.002, "loss": 2.5673, "step": 234810 }, { "epoch": 0.4678136554889711, "grad_norm": 0.1748405396938324, "learning_rate": 0.002, "loss": 2.5505, "step": 234820 }, { "epoch": 0.46783357771260997, "grad_norm": 0.17569313943386078, "learning_rate": 0.002, "loss": 2.5695, "step": 234830 }, { "epoch": 0.46785349993624886, "grad_norm": 0.15391169488430023, "learning_rate": 0.002, "loss": 2.5428, "step": 234840 }, { "epoch": 0.4678734221598878, "grad_norm": 0.17576399445533752, "learning_rate": 0.002, "loss": 2.5682, "step": 234850 }, { "epoch": 0.4678933443835267, "grad_norm": 0.20081359148025513, "learning_rate": 0.002, "loss": 2.5586, "step": 234860 }, { "epoch": 0.46791326660716565, "grad_norm": 0.16282783448696136, "learning_rate": 0.002, "loss": 2.5565, "step": 234870 }, { "epoch": 0.46793318883080454, "grad_norm": 0.16003955900669098, "learning_rate": 0.002, "loss": 2.5401, "step": 234880 }, { "epoch": 0.4679531110544434, "grad_norm": 0.16772237420082092, "learning_rate": 0.002, "loss": 2.5547, "step": 234890 }, { "epoch": 0.4679730332780824, "grad_norm": 0.22151288390159607, "learning_rate": 0.002, "loss": 2.5527, "step": 234900 }, { "epoch": 0.46799295550172126, "grad_norm": 0.17234373092651367, "learning_rate": 0.002, "loss": 2.5518, "step": 234910 }, { "epoch": 0.4680128777253602, "grad_norm": 0.1503712683916092, "learning_rate": 0.002, "loss": 2.568, "step": 234920 }, { "epoch": 0.4680327999489991, "grad_norm": 0.16297414898872375, "learning_rate": 0.002, "loss": 2.5497, "step": 234930 }, { "epoch": 0.46805272217263805, "grad_norm": 0.17943383753299713, "learning_rate": 0.002, "loss": 2.5505, "step": 234940 }, { "epoch": 0.46807264439627694, "grad_norm": 0.1717151403427124, "learning_rate": 0.002, "loss": 2.5634, "step": 234950 }, { "epoch": 0.46809256661991583, "grad_norm": 0.20615361630916595, "learning_rate": 0.002, "loss": 2.5562, "step": 234960 }, { "epoch": 0.4681124888435548, "grad_norm": 0.15514473617076874, "learning_rate": 0.002, "loss": 2.5497, "step": 234970 }, { "epoch": 0.46813241106719367, "grad_norm": 0.15648609399795532, "learning_rate": 0.002, "loss": 2.5649, "step": 234980 }, { "epoch": 0.4681523332908326, "grad_norm": 0.15231449902057648, "learning_rate": 0.002, "loss": 2.5666, "step": 234990 }, { "epoch": 0.4681722555144715, "grad_norm": 0.19707733392715454, "learning_rate": 0.002, "loss": 2.5664, "step": 235000 }, { "epoch": 0.4681921777381104, "grad_norm": 0.1687154471874237, "learning_rate": 0.002, "loss": 2.542, "step": 235010 }, { "epoch": 0.46821209996174934, "grad_norm": 0.1613350659608841, "learning_rate": 0.002, "loss": 2.5436, "step": 235020 }, { "epoch": 0.46823202218538823, "grad_norm": 0.17752020061016083, "learning_rate": 0.002, "loss": 2.5573, "step": 235030 }, { "epoch": 0.4682519444090272, "grad_norm": 0.1954328417778015, "learning_rate": 0.002, "loss": 2.5639, "step": 235040 }, { "epoch": 0.46827186663266607, "grad_norm": 0.15870025753974915, "learning_rate": 0.002, "loss": 2.5701, "step": 235050 }, { "epoch": 0.46829178885630496, "grad_norm": 0.18455949425697327, "learning_rate": 0.002, "loss": 2.5528, "step": 235060 }, { "epoch": 0.4683117110799439, "grad_norm": 0.16221776604652405, "learning_rate": 0.002, "loss": 2.5517, "step": 235070 }, { "epoch": 0.4683316333035828, "grad_norm": 0.2355617731809616, "learning_rate": 0.002, "loss": 2.5609, "step": 235080 }, { "epoch": 0.46835155552722174, "grad_norm": 0.1826597899198532, "learning_rate": 0.002, "loss": 2.5668, "step": 235090 }, { "epoch": 0.46837147775086063, "grad_norm": 0.13513070344924927, "learning_rate": 0.002, "loss": 2.5513, "step": 235100 }, { "epoch": 0.4683913999744996, "grad_norm": 0.18495143949985504, "learning_rate": 0.002, "loss": 2.5548, "step": 235110 }, { "epoch": 0.46841132219813847, "grad_norm": 0.1872429996728897, "learning_rate": 0.002, "loss": 2.5638, "step": 235120 }, { "epoch": 0.46843124442177736, "grad_norm": 0.1943982094526291, "learning_rate": 0.002, "loss": 2.553, "step": 235130 }, { "epoch": 0.4684511666454163, "grad_norm": 0.16302046179771423, "learning_rate": 0.002, "loss": 2.5452, "step": 235140 }, { "epoch": 0.4684710888690552, "grad_norm": 0.1570916473865509, "learning_rate": 0.002, "loss": 2.5599, "step": 235150 }, { "epoch": 0.46849101109269414, "grad_norm": 0.19657878577709198, "learning_rate": 0.002, "loss": 2.5499, "step": 235160 }, { "epoch": 0.46851093331633303, "grad_norm": 0.18830524384975433, "learning_rate": 0.002, "loss": 2.5576, "step": 235170 }, { "epoch": 0.4685308555399719, "grad_norm": 0.16757358610630035, "learning_rate": 0.002, "loss": 2.5763, "step": 235180 }, { "epoch": 0.46855077776361087, "grad_norm": 0.1762775033712387, "learning_rate": 0.002, "loss": 2.561, "step": 235190 }, { "epoch": 0.46857069998724976, "grad_norm": 0.16997192800045013, "learning_rate": 0.002, "loss": 2.5546, "step": 235200 }, { "epoch": 0.4685906222108887, "grad_norm": 0.19586127996444702, "learning_rate": 0.002, "loss": 2.5493, "step": 235210 }, { "epoch": 0.4686105444345276, "grad_norm": 0.1455696076154709, "learning_rate": 0.002, "loss": 2.5543, "step": 235220 }, { "epoch": 0.46863046665816654, "grad_norm": 0.1684240698814392, "learning_rate": 0.002, "loss": 2.5546, "step": 235230 }, { "epoch": 0.46865038888180544, "grad_norm": 0.1686076819896698, "learning_rate": 0.002, "loss": 2.5568, "step": 235240 }, { "epoch": 0.4686703111054443, "grad_norm": 0.1998317837715149, "learning_rate": 0.002, "loss": 2.5612, "step": 235250 }, { "epoch": 0.4686902333290833, "grad_norm": 0.15934021770954132, "learning_rate": 0.002, "loss": 2.5707, "step": 235260 }, { "epoch": 0.46871015555272216, "grad_norm": 0.1902022808790207, "learning_rate": 0.002, "loss": 2.5614, "step": 235270 }, { "epoch": 0.4687300777763611, "grad_norm": 0.198762908577919, "learning_rate": 0.002, "loss": 2.5636, "step": 235280 }, { "epoch": 0.46875, "grad_norm": 0.15646767616271973, "learning_rate": 0.002, "loss": 2.5615, "step": 235290 }, { "epoch": 0.4687699222236389, "grad_norm": 0.16603009402751923, "learning_rate": 0.002, "loss": 2.5595, "step": 235300 }, { "epoch": 0.46878984444727784, "grad_norm": 0.16008466482162476, "learning_rate": 0.002, "loss": 2.5618, "step": 235310 }, { "epoch": 0.4688097666709167, "grad_norm": 0.1292777806520462, "learning_rate": 0.002, "loss": 2.5599, "step": 235320 }, { "epoch": 0.4688296888945557, "grad_norm": 0.16893289983272552, "learning_rate": 0.002, "loss": 2.5549, "step": 235330 }, { "epoch": 0.46884961111819456, "grad_norm": 0.16415420174598694, "learning_rate": 0.002, "loss": 2.5593, "step": 235340 }, { "epoch": 0.46886953334183346, "grad_norm": 0.18212586641311646, "learning_rate": 0.002, "loss": 2.5587, "step": 235350 }, { "epoch": 0.4688894555654724, "grad_norm": 0.1731102019548416, "learning_rate": 0.002, "loss": 2.5589, "step": 235360 }, { "epoch": 0.4689093777891113, "grad_norm": 0.16096146404743195, "learning_rate": 0.002, "loss": 2.5668, "step": 235370 }, { "epoch": 0.46892930001275024, "grad_norm": 0.16242846846580505, "learning_rate": 0.002, "loss": 2.5514, "step": 235380 }, { "epoch": 0.46894922223638913, "grad_norm": 0.1687629222869873, "learning_rate": 0.002, "loss": 2.5648, "step": 235390 }, { "epoch": 0.4689691444600281, "grad_norm": 0.1800241768360138, "learning_rate": 0.002, "loss": 2.5668, "step": 235400 }, { "epoch": 0.46898906668366697, "grad_norm": 0.15952268242835999, "learning_rate": 0.002, "loss": 2.548, "step": 235410 }, { "epoch": 0.46900898890730586, "grad_norm": 0.15127107501029968, "learning_rate": 0.002, "loss": 2.5664, "step": 235420 }, { "epoch": 0.4690289111309448, "grad_norm": 0.13284985721111298, "learning_rate": 0.002, "loss": 2.5325, "step": 235430 }, { "epoch": 0.4690488333545837, "grad_norm": 0.18912546336650848, "learning_rate": 0.002, "loss": 2.5528, "step": 235440 }, { "epoch": 0.46906875557822264, "grad_norm": 0.15021321177482605, "learning_rate": 0.002, "loss": 2.5621, "step": 235450 }, { "epoch": 0.46908867780186153, "grad_norm": 0.16808348894119263, "learning_rate": 0.002, "loss": 2.5606, "step": 235460 }, { "epoch": 0.4691086000255004, "grad_norm": 0.15608017146587372, "learning_rate": 0.002, "loss": 2.5484, "step": 235470 }, { "epoch": 0.46912852224913937, "grad_norm": 0.2010965794324875, "learning_rate": 0.002, "loss": 2.5771, "step": 235480 }, { "epoch": 0.46914844447277826, "grad_norm": 0.16755463182926178, "learning_rate": 0.002, "loss": 2.5579, "step": 235490 }, { "epoch": 0.4691683666964172, "grad_norm": 0.17779259383678436, "learning_rate": 0.002, "loss": 2.5526, "step": 235500 }, { "epoch": 0.4691882889200561, "grad_norm": 0.1523032933473587, "learning_rate": 0.002, "loss": 2.5615, "step": 235510 }, { "epoch": 0.46920821114369504, "grad_norm": 0.18957041203975677, "learning_rate": 0.002, "loss": 2.5486, "step": 235520 }, { "epoch": 0.46922813336733393, "grad_norm": 0.13008281588554382, "learning_rate": 0.002, "loss": 2.5721, "step": 235530 }, { "epoch": 0.4692480555909728, "grad_norm": 0.2220209687948227, "learning_rate": 0.002, "loss": 2.558, "step": 235540 }, { "epoch": 0.46926797781461177, "grad_norm": 0.1631830781698227, "learning_rate": 0.002, "loss": 2.5568, "step": 235550 }, { "epoch": 0.46928790003825066, "grad_norm": 0.17010173201560974, "learning_rate": 0.002, "loss": 2.5575, "step": 235560 }, { "epoch": 0.4693078222618896, "grad_norm": 0.16261056065559387, "learning_rate": 0.002, "loss": 2.5662, "step": 235570 }, { "epoch": 0.4693277444855285, "grad_norm": 0.16345533728599548, "learning_rate": 0.002, "loss": 2.5575, "step": 235580 }, { "epoch": 0.4693476667091674, "grad_norm": 0.19200608134269714, "learning_rate": 0.002, "loss": 2.5763, "step": 235590 }, { "epoch": 0.46936758893280633, "grad_norm": 0.1696128100156784, "learning_rate": 0.002, "loss": 2.5646, "step": 235600 }, { "epoch": 0.4693875111564452, "grad_norm": 0.15562058985233307, "learning_rate": 0.002, "loss": 2.5617, "step": 235610 }, { "epoch": 0.46940743338008417, "grad_norm": 0.26208773255348206, "learning_rate": 0.002, "loss": 2.5576, "step": 235620 }, { "epoch": 0.46942735560372306, "grad_norm": 0.21230746805667877, "learning_rate": 0.002, "loss": 2.5677, "step": 235630 }, { "epoch": 0.46944727782736195, "grad_norm": 0.1673656851053238, "learning_rate": 0.002, "loss": 2.5536, "step": 235640 }, { "epoch": 0.4694672000510009, "grad_norm": 0.1553814560174942, "learning_rate": 0.002, "loss": 2.5517, "step": 235650 }, { "epoch": 0.4694871222746398, "grad_norm": 0.14980725944042206, "learning_rate": 0.002, "loss": 2.5572, "step": 235660 }, { "epoch": 0.46950704449827874, "grad_norm": 0.20667526125907898, "learning_rate": 0.002, "loss": 2.5439, "step": 235670 }, { "epoch": 0.4695269667219176, "grad_norm": 0.1732376664876938, "learning_rate": 0.002, "loss": 2.5501, "step": 235680 }, { "epoch": 0.4695468889455566, "grad_norm": 0.16116106510162354, "learning_rate": 0.002, "loss": 2.566, "step": 235690 }, { "epoch": 0.46956681116919546, "grad_norm": 0.16138316690921783, "learning_rate": 0.002, "loss": 2.565, "step": 235700 }, { "epoch": 0.46958673339283435, "grad_norm": 0.19787414371967316, "learning_rate": 0.002, "loss": 2.547, "step": 235710 }, { "epoch": 0.4696066556164733, "grad_norm": 0.15921242535114288, "learning_rate": 0.002, "loss": 2.5556, "step": 235720 }, { "epoch": 0.4696265778401122, "grad_norm": 0.16814541816711426, "learning_rate": 0.002, "loss": 2.5623, "step": 235730 }, { "epoch": 0.46964650006375114, "grad_norm": 0.1799229085445404, "learning_rate": 0.002, "loss": 2.5502, "step": 235740 }, { "epoch": 0.46966642228739003, "grad_norm": 0.18046747148036957, "learning_rate": 0.002, "loss": 2.5387, "step": 235750 }, { "epoch": 0.4696863445110289, "grad_norm": 0.155246302485466, "learning_rate": 0.002, "loss": 2.5474, "step": 235760 }, { "epoch": 0.46970626673466787, "grad_norm": 0.17777693271636963, "learning_rate": 0.002, "loss": 2.5676, "step": 235770 }, { "epoch": 0.46972618895830676, "grad_norm": 0.15040254592895508, "learning_rate": 0.002, "loss": 2.5452, "step": 235780 }, { "epoch": 0.4697461111819457, "grad_norm": 0.1948891282081604, "learning_rate": 0.002, "loss": 2.5568, "step": 235790 }, { "epoch": 0.4697660334055846, "grad_norm": 0.14939284324645996, "learning_rate": 0.002, "loss": 2.5538, "step": 235800 }, { "epoch": 0.4697859556292235, "grad_norm": 0.19396214187145233, "learning_rate": 0.002, "loss": 2.5587, "step": 235810 }, { "epoch": 0.46980587785286243, "grad_norm": 0.17739826440811157, "learning_rate": 0.002, "loss": 2.5575, "step": 235820 }, { "epoch": 0.4698258000765013, "grad_norm": 0.18140171468257904, "learning_rate": 0.002, "loss": 2.5556, "step": 235830 }, { "epoch": 0.46984572230014027, "grad_norm": 0.1666061133146286, "learning_rate": 0.002, "loss": 2.5757, "step": 235840 }, { "epoch": 0.46986564452377916, "grad_norm": 0.15975718200206757, "learning_rate": 0.002, "loss": 2.5603, "step": 235850 }, { "epoch": 0.4698855667474181, "grad_norm": 0.1611490696668625, "learning_rate": 0.002, "loss": 2.5425, "step": 235860 }, { "epoch": 0.469905488971057, "grad_norm": 0.15945672988891602, "learning_rate": 0.002, "loss": 2.545, "step": 235870 }, { "epoch": 0.4699254111946959, "grad_norm": 0.18495966494083405, "learning_rate": 0.002, "loss": 2.5581, "step": 235880 }, { "epoch": 0.46994533341833483, "grad_norm": 0.16996942460536957, "learning_rate": 0.002, "loss": 2.5421, "step": 235890 }, { "epoch": 0.4699652556419737, "grad_norm": 0.1369164139032364, "learning_rate": 0.002, "loss": 2.5685, "step": 235900 }, { "epoch": 0.46998517786561267, "grad_norm": 0.22991657257080078, "learning_rate": 0.002, "loss": 2.5708, "step": 235910 }, { "epoch": 0.47000510008925156, "grad_norm": 0.14934422075748444, "learning_rate": 0.002, "loss": 2.5585, "step": 235920 }, { "epoch": 0.47002502231289045, "grad_norm": 0.17908254265785217, "learning_rate": 0.002, "loss": 2.5609, "step": 235930 }, { "epoch": 0.4700449445365294, "grad_norm": 0.16238950192928314, "learning_rate": 0.002, "loss": 2.5682, "step": 235940 }, { "epoch": 0.4700648667601683, "grad_norm": 0.1453324556350708, "learning_rate": 0.002, "loss": 2.5549, "step": 235950 }, { "epoch": 0.47008478898380723, "grad_norm": 0.18930305540561676, "learning_rate": 0.002, "loss": 2.5767, "step": 235960 }, { "epoch": 0.4701047112074461, "grad_norm": 0.18773877620697021, "learning_rate": 0.002, "loss": 2.5735, "step": 235970 }, { "epoch": 0.47012463343108507, "grad_norm": 0.19741284847259521, "learning_rate": 0.002, "loss": 2.5657, "step": 235980 }, { "epoch": 0.47014455565472396, "grad_norm": 0.1604718267917633, "learning_rate": 0.002, "loss": 2.5636, "step": 235990 }, { "epoch": 0.47016447787836285, "grad_norm": 0.16780251264572144, "learning_rate": 0.002, "loss": 2.5488, "step": 236000 }, { "epoch": 0.4701844001020018, "grad_norm": 0.1785162091255188, "learning_rate": 0.002, "loss": 2.5496, "step": 236010 }, { "epoch": 0.4702043223256407, "grad_norm": 0.18666857481002808, "learning_rate": 0.002, "loss": 2.5787, "step": 236020 }, { "epoch": 0.47022424454927964, "grad_norm": 0.14579539000988007, "learning_rate": 0.002, "loss": 2.5497, "step": 236030 }, { "epoch": 0.4702441667729185, "grad_norm": 0.15645097196102142, "learning_rate": 0.002, "loss": 2.5697, "step": 236040 }, { "epoch": 0.4702640889965574, "grad_norm": 0.1787557452917099, "learning_rate": 0.002, "loss": 2.5609, "step": 236050 }, { "epoch": 0.47028401122019636, "grad_norm": 0.179165318608284, "learning_rate": 0.002, "loss": 2.5633, "step": 236060 }, { "epoch": 0.47030393344383525, "grad_norm": 0.1400165557861328, "learning_rate": 0.002, "loss": 2.5564, "step": 236070 }, { "epoch": 0.4703238556674742, "grad_norm": 0.16844353079795837, "learning_rate": 0.002, "loss": 2.5501, "step": 236080 }, { "epoch": 0.4703437778911131, "grad_norm": 0.1898018717765808, "learning_rate": 0.002, "loss": 2.5651, "step": 236090 }, { "epoch": 0.470363700114752, "grad_norm": 0.1764717549085617, "learning_rate": 0.002, "loss": 2.541, "step": 236100 }, { "epoch": 0.4703836223383909, "grad_norm": 0.18167611956596375, "learning_rate": 0.002, "loss": 2.5704, "step": 236110 }, { "epoch": 0.4704035445620298, "grad_norm": 0.16214972734451294, "learning_rate": 0.002, "loss": 2.5544, "step": 236120 }, { "epoch": 0.47042346678566876, "grad_norm": 0.25115087628364563, "learning_rate": 0.002, "loss": 2.5625, "step": 236130 }, { "epoch": 0.47044338900930766, "grad_norm": 0.16075581312179565, "learning_rate": 0.002, "loss": 2.5567, "step": 236140 }, { "epoch": 0.4704633112329466, "grad_norm": 0.17260198295116425, "learning_rate": 0.002, "loss": 2.5635, "step": 236150 }, { "epoch": 0.4704832334565855, "grad_norm": 0.1513885259628296, "learning_rate": 0.002, "loss": 2.5618, "step": 236160 }, { "epoch": 0.4705031556802244, "grad_norm": 0.20329545438289642, "learning_rate": 0.002, "loss": 2.5519, "step": 236170 }, { "epoch": 0.47052307790386333, "grad_norm": 0.1619333028793335, "learning_rate": 0.002, "loss": 2.5682, "step": 236180 }, { "epoch": 0.4705430001275022, "grad_norm": 0.1484827995300293, "learning_rate": 0.002, "loss": 2.5468, "step": 236190 }, { "epoch": 0.47056292235114117, "grad_norm": 0.1884831190109253, "learning_rate": 0.002, "loss": 2.5569, "step": 236200 }, { "epoch": 0.47058284457478006, "grad_norm": 0.1781531572341919, "learning_rate": 0.002, "loss": 2.5511, "step": 236210 }, { "epoch": 0.47060276679841895, "grad_norm": 0.14886517822742462, "learning_rate": 0.002, "loss": 2.5525, "step": 236220 }, { "epoch": 0.4706226890220579, "grad_norm": 0.15679234266281128, "learning_rate": 0.002, "loss": 2.5549, "step": 236230 }, { "epoch": 0.4706426112456968, "grad_norm": 0.1698714941740036, "learning_rate": 0.002, "loss": 2.5416, "step": 236240 }, { "epoch": 0.47066253346933573, "grad_norm": 0.17319639027118683, "learning_rate": 0.002, "loss": 2.5486, "step": 236250 }, { "epoch": 0.4706824556929746, "grad_norm": 0.15518556535243988, "learning_rate": 0.002, "loss": 2.5689, "step": 236260 }, { "epoch": 0.47070237791661357, "grad_norm": 0.1786395013332367, "learning_rate": 0.002, "loss": 2.5466, "step": 236270 }, { "epoch": 0.47072230014025246, "grad_norm": 0.1661144644021988, "learning_rate": 0.002, "loss": 2.555, "step": 236280 }, { "epoch": 0.47074222236389135, "grad_norm": 0.1599922925233841, "learning_rate": 0.002, "loss": 2.5506, "step": 236290 }, { "epoch": 0.4707621445875303, "grad_norm": 0.189886674284935, "learning_rate": 0.002, "loss": 2.5599, "step": 236300 }, { "epoch": 0.4707820668111692, "grad_norm": 0.17026011645793915, "learning_rate": 0.002, "loss": 2.5643, "step": 236310 }, { "epoch": 0.47080198903480813, "grad_norm": 0.17642587423324585, "learning_rate": 0.002, "loss": 2.5516, "step": 236320 }, { "epoch": 0.470821911258447, "grad_norm": 0.16186295449733734, "learning_rate": 0.002, "loss": 2.5681, "step": 236330 }, { "epoch": 0.4708418334820859, "grad_norm": 0.17001056671142578, "learning_rate": 0.002, "loss": 2.543, "step": 236340 }, { "epoch": 0.47086175570572486, "grad_norm": 0.1766759306192398, "learning_rate": 0.002, "loss": 2.5557, "step": 236350 }, { "epoch": 0.47088167792936375, "grad_norm": 0.1688016653060913, "learning_rate": 0.002, "loss": 2.5537, "step": 236360 }, { "epoch": 0.4709016001530027, "grad_norm": 0.16942326724529266, "learning_rate": 0.002, "loss": 2.5696, "step": 236370 }, { "epoch": 0.4709215223766416, "grad_norm": 0.18500931560993195, "learning_rate": 0.002, "loss": 2.5744, "step": 236380 }, { "epoch": 0.4709414446002805, "grad_norm": 0.16137345135211945, "learning_rate": 0.002, "loss": 2.5481, "step": 236390 }, { "epoch": 0.4709613668239194, "grad_norm": 0.16391131281852722, "learning_rate": 0.002, "loss": 2.5671, "step": 236400 }, { "epoch": 0.4709812890475583, "grad_norm": 0.16159212589263916, "learning_rate": 0.002, "loss": 2.547, "step": 236410 }, { "epoch": 0.47100121127119726, "grad_norm": 0.1547517031431198, "learning_rate": 0.002, "loss": 2.5661, "step": 236420 }, { "epoch": 0.47102113349483615, "grad_norm": 0.23535923659801483, "learning_rate": 0.002, "loss": 2.5627, "step": 236430 }, { "epoch": 0.4710410557184751, "grad_norm": 0.18037942051887512, "learning_rate": 0.002, "loss": 2.5476, "step": 236440 }, { "epoch": 0.471060977942114, "grad_norm": 0.1569398045539856, "learning_rate": 0.002, "loss": 2.555, "step": 236450 }, { "epoch": 0.4710809001657529, "grad_norm": 0.160997673869133, "learning_rate": 0.002, "loss": 2.5602, "step": 236460 }, { "epoch": 0.4711008223893918, "grad_norm": 0.14541856944561005, "learning_rate": 0.002, "loss": 2.5573, "step": 236470 }, { "epoch": 0.4711207446130307, "grad_norm": 0.19503824412822723, "learning_rate": 0.002, "loss": 2.5481, "step": 236480 }, { "epoch": 0.47114066683666966, "grad_norm": 0.16830575466156006, "learning_rate": 0.002, "loss": 2.5444, "step": 236490 }, { "epoch": 0.47116058906030855, "grad_norm": 0.15166708827018738, "learning_rate": 0.002, "loss": 2.5477, "step": 236500 }, { "epoch": 0.47118051128394745, "grad_norm": 0.16845756769180298, "learning_rate": 0.002, "loss": 2.572, "step": 236510 }, { "epoch": 0.4712004335075864, "grad_norm": 0.1808435469865799, "learning_rate": 0.002, "loss": 2.5494, "step": 236520 }, { "epoch": 0.4712203557312253, "grad_norm": 0.19389505684375763, "learning_rate": 0.002, "loss": 2.5658, "step": 236530 }, { "epoch": 0.47124027795486423, "grad_norm": 0.13804709911346436, "learning_rate": 0.002, "loss": 2.5462, "step": 236540 }, { "epoch": 0.4712602001785031, "grad_norm": 0.14643944799900055, "learning_rate": 0.002, "loss": 2.5491, "step": 236550 }, { "epoch": 0.471280122402142, "grad_norm": 0.21182528138160706, "learning_rate": 0.002, "loss": 2.5635, "step": 236560 }, { "epoch": 0.47130004462578096, "grad_norm": 0.13444308936595917, "learning_rate": 0.002, "loss": 2.5499, "step": 236570 }, { "epoch": 0.47131996684941985, "grad_norm": 0.15896843373775482, "learning_rate": 0.002, "loss": 2.5655, "step": 236580 }, { "epoch": 0.4713398890730588, "grad_norm": 0.16805849969387054, "learning_rate": 0.002, "loss": 2.5574, "step": 236590 }, { "epoch": 0.4713598112966977, "grad_norm": 0.1709931641817093, "learning_rate": 0.002, "loss": 2.5698, "step": 236600 }, { "epoch": 0.47137973352033663, "grad_norm": 0.15495063364505768, "learning_rate": 0.002, "loss": 2.5369, "step": 236610 }, { "epoch": 0.4713996557439755, "grad_norm": 0.20378907024860382, "learning_rate": 0.002, "loss": 2.568, "step": 236620 }, { "epoch": 0.4714195779676144, "grad_norm": 0.1683809459209442, "learning_rate": 0.002, "loss": 2.5589, "step": 236630 }, { "epoch": 0.47143950019125336, "grad_norm": 0.17728082835674286, "learning_rate": 0.002, "loss": 2.5592, "step": 236640 }, { "epoch": 0.47145942241489225, "grad_norm": 0.15037795901298523, "learning_rate": 0.002, "loss": 2.5584, "step": 236650 }, { "epoch": 0.4714793446385312, "grad_norm": 0.18436579406261444, "learning_rate": 0.002, "loss": 2.553, "step": 236660 }, { "epoch": 0.4714992668621701, "grad_norm": 0.15935927629470825, "learning_rate": 0.002, "loss": 2.5586, "step": 236670 }, { "epoch": 0.471519189085809, "grad_norm": 0.14574894309043884, "learning_rate": 0.002, "loss": 2.5538, "step": 236680 }, { "epoch": 0.4715391113094479, "grad_norm": 0.18977338075637817, "learning_rate": 0.002, "loss": 2.5649, "step": 236690 }, { "epoch": 0.4715590335330868, "grad_norm": 0.16626542806625366, "learning_rate": 0.002, "loss": 2.5641, "step": 236700 }, { "epoch": 0.47157895575672576, "grad_norm": 0.14187447726726532, "learning_rate": 0.002, "loss": 2.5708, "step": 236710 }, { "epoch": 0.47159887798036465, "grad_norm": 0.1861274093389511, "learning_rate": 0.002, "loss": 2.5718, "step": 236720 }, { "epoch": 0.4716188002040036, "grad_norm": 0.18975575268268585, "learning_rate": 0.002, "loss": 2.5553, "step": 236730 }, { "epoch": 0.4716387224276425, "grad_norm": 0.1989879608154297, "learning_rate": 0.002, "loss": 2.5555, "step": 236740 }, { "epoch": 0.4716586446512814, "grad_norm": 0.15029127895832062, "learning_rate": 0.002, "loss": 2.5654, "step": 236750 }, { "epoch": 0.4716785668749203, "grad_norm": 0.16846564412117004, "learning_rate": 0.002, "loss": 2.5578, "step": 236760 }, { "epoch": 0.4716984890985592, "grad_norm": 0.17983031272888184, "learning_rate": 0.002, "loss": 2.5684, "step": 236770 }, { "epoch": 0.47171841132219816, "grad_norm": 0.21986910700798035, "learning_rate": 0.002, "loss": 2.5494, "step": 236780 }, { "epoch": 0.47173833354583705, "grad_norm": 0.16199836134910583, "learning_rate": 0.002, "loss": 2.5678, "step": 236790 }, { "epoch": 0.47175825576947594, "grad_norm": 0.14471593499183655, "learning_rate": 0.002, "loss": 2.5756, "step": 236800 }, { "epoch": 0.4717781779931149, "grad_norm": 0.1683528572320938, "learning_rate": 0.002, "loss": 2.5637, "step": 236810 }, { "epoch": 0.4717981002167538, "grad_norm": 0.18893273174762726, "learning_rate": 0.002, "loss": 2.558, "step": 236820 }, { "epoch": 0.4718180224403927, "grad_norm": 0.15699876844882965, "learning_rate": 0.002, "loss": 2.5635, "step": 236830 }, { "epoch": 0.4718379446640316, "grad_norm": 0.174582839012146, "learning_rate": 0.002, "loss": 2.5625, "step": 236840 }, { "epoch": 0.4718578668876705, "grad_norm": 0.25694456696510315, "learning_rate": 0.002, "loss": 2.568, "step": 236850 }, { "epoch": 0.47187778911130945, "grad_norm": 0.1767992526292801, "learning_rate": 0.002, "loss": 2.5513, "step": 236860 }, { "epoch": 0.47189771133494834, "grad_norm": 0.1679897904396057, "learning_rate": 0.002, "loss": 2.5713, "step": 236870 }, { "epoch": 0.4719176335585873, "grad_norm": 0.16787904500961304, "learning_rate": 0.002, "loss": 2.5817, "step": 236880 }, { "epoch": 0.4719375557822262, "grad_norm": 0.17384225130081177, "learning_rate": 0.002, "loss": 2.5702, "step": 236890 }, { "epoch": 0.4719574780058651, "grad_norm": 0.17608964443206787, "learning_rate": 0.002, "loss": 2.549, "step": 236900 }, { "epoch": 0.471977400229504, "grad_norm": 0.1626795083284378, "learning_rate": 0.002, "loss": 2.551, "step": 236910 }, { "epoch": 0.4719973224531429, "grad_norm": 0.1465895026922226, "learning_rate": 0.002, "loss": 2.5489, "step": 236920 }, { "epoch": 0.47201724467678186, "grad_norm": 0.1678590029478073, "learning_rate": 0.002, "loss": 2.5623, "step": 236930 }, { "epoch": 0.47203716690042075, "grad_norm": 0.16523948311805725, "learning_rate": 0.002, "loss": 2.5686, "step": 236940 }, { "epoch": 0.4720570891240597, "grad_norm": 0.1709417849779129, "learning_rate": 0.002, "loss": 2.5662, "step": 236950 }, { "epoch": 0.4720770113476986, "grad_norm": 0.16823500394821167, "learning_rate": 0.002, "loss": 2.5628, "step": 236960 }, { "epoch": 0.4720969335713375, "grad_norm": 0.18028207123279572, "learning_rate": 0.002, "loss": 2.5724, "step": 236970 }, { "epoch": 0.4721168557949764, "grad_norm": 0.15316246449947357, "learning_rate": 0.002, "loss": 2.557, "step": 236980 }, { "epoch": 0.4721367780186153, "grad_norm": 0.18580250442028046, "learning_rate": 0.002, "loss": 2.5621, "step": 236990 }, { "epoch": 0.47215670024225426, "grad_norm": 0.18648399412631989, "learning_rate": 0.002, "loss": 2.5631, "step": 237000 }, { "epoch": 0.47217662246589315, "grad_norm": 0.19248154759407043, "learning_rate": 0.002, "loss": 2.5642, "step": 237010 }, { "epoch": 0.4721965446895321, "grad_norm": 0.1613023281097412, "learning_rate": 0.002, "loss": 2.5517, "step": 237020 }, { "epoch": 0.472216466913171, "grad_norm": 0.1891857236623764, "learning_rate": 0.002, "loss": 2.5517, "step": 237030 }, { "epoch": 0.4722363891368099, "grad_norm": 0.19203554093837738, "learning_rate": 0.002, "loss": 2.5661, "step": 237040 }, { "epoch": 0.4722563113604488, "grad_norm": 0.15274867415428162, "learning_rate": 0.002, "loss": 2.5595, "step": 237050 }, { "epoch": 0.4722762335840877, "grad_norm": 0.1700824499130249, "learning_rate": 0.002, "loss": 2.559, "step": 237060 }, { "epoch": 0.47229615580772666, "grad_norm": 0.16097807884216309, "learning_rate": 0.002, "loss": 2.5425, "step": 237070 }, { "epoch": 0.47231607803136555, "grad_norm": 0.20217569172382355, "learning_rate": 0.002, "loss": 2.5555, "step": 237080 }, { "epoch": 0.47233600025500444, "grad_norm": 0.18310296535491943, "learning_rate": 0.002, "loss": 2.5581, "step": 237090 }, { "epoch": 0.4723559224786434, "grad_norm": 0.16144675016403198, "learning_rate": 0.002, "loss": 2.5396, "step": 237100 }, { "epoch": 0.4723758447022823, "grad_norm": 0.16536784172058105, "learning_rate": 0.002, "loss": 2.5686, "step": 237110 }, { "epoch": 0.4723957669259212, "grad_norm": 0.17046745121479034, "learning_rate": 0.002, "loss": 2.5424, "step": 237120 }, { "epoch": 0.4724156891495601, "grad_norm": 0.24171781539916992, "learning_rate": 0.002, "loss": 2.56, "step": 237130 }, { "epoch": 0.472435611373199, "grad_norm": 0.15870489180088043, "learning_rate": 0.002, "loss": 2.5546, "step": 237140 }, { "epoch": 0.47245553359683795, "grad_norm": 0.16598466038703918, "learning_rate": 0.002, "loss": 2.5699, "step": 237150 }, { "epoch": 0.47247545582047684, "grad_norm": 0.16335728764533997, "learning_rate": 0.002, "loss": 2.5463, "step": 237160 }, { "epoch": 0.4724953780441158, "grad_norm": 0.1644285023212433, "learning_rate": 0.002, "loss": 2.5783, "step": 237170 }, { "epoch": 0.4725153002677547, "grad_norm": 0.13776984810829163, "learning_rate": 0.002, "loss": 2.5459, "step": 237180 }, { "epoch": 0.4725352224913936, "grad_norm": 0.17559809982776642, "learning_rate": 0.002, "loss": 2.5668, "step": 237190 }, { "epoch": 0.4725551447150325, "grad_norm": 0.19830024242401123, "learning_rate": 0.002, "loss": 2.5555, "step": 237200 }, { "epoch": 0.4725750669386714, "grad_norm": 0.1803443729877472, "learning_rate": 0.002, "loss": 2.5702, "step": 237210 }, { "epoch": 0.47259498916231035, "grad_norm": 0.18348516523838043, "learning_rate": 0.002, "loss": 2.5626, "step": 237220 }, { "epoch": 0.47261491138594924, "grad_norm": 0.1589469015598297, "learning_rate": 0.002, "loss": 2.5538, "step": 237230 }, { "epoch": 0.4726348336095882, "grad_norm": 0.1931280642747879, "learning_rate": 0.002, "loss": 2.5639, "step": 237240 }, { "epoch": 0.4726547558332271, "grad_norm": 0.16990816593170166, "learning_rate": 0.002, "loss": 2.5578, "step": 237250 }, { "epoch": 0.47267467805686597, "grad_norm": 0.18054570257663727, "learning_rate": 0.002, "loss": 2.5444, "step": 237260 }, { "epoch": 0.4726946002805049, "grad_norm": 0.16143231093883514, "learning_rate": 0.002, "loss": 2.5471, "step": 237270 }, { "epoch": 0.4727145225041438, "grad_norm": 0.16302502155303955, "learning_rate": 0.002, "loss": 2.5704, "step": 237280 }, { "epoch": 0.47273444472778275, "grad_norm": 0.15235282480716705, "learning_rate": 0.002, "loss": 2.568, "step": 237290 }, { "epoch": 0.47275436695142165, "grad_norm": 0.19217684864997864, "learning_rate": 0.002, "loss": 2.5636, "step": 237300 }, { "epoch": 0.47277428917506054, "grad_norm": 0.15922388434410095, "learning_rate": 0.002, "loss": 2.56, "step": 237310 }, { "epoch": 0.4727942113986995, "grad_norm": 0.19387519359588623, "learning_rate": 0.002, "loss": 2.5681, "step": 237320 }, { "epoch": 0.4728141336223384, "grad_norm": 0.1775541603565216, "learning_rate": 0.002, "loss": 2.5491, "step": 237330 }, { "epoch": 0.4728340558459773, "grad_norm": 0.17872974276542664, "learning_rate": 0.002, "loss": 2.5622, "step": 237340 }, { "epoch": 0.4728539780696162, "grad_norm": 0.1640501171350479, "learning_rate": 0.002, "loss": 2.5663, "step": 237350 }, { "epoch": 0.47287390029325516, "grad_norm": 0.19088414311408997, "learning_rate": 0.002, "loss": 2.5582, "step": 237360 }, { "epoch": 0.47289382251689405, "grad_norm": 0.16709977388381958, "learning_rate": 0.002, "loss": 2.572, "step": 237370 }, { "epoch": 0.47291374474053294, "grad_norm": 0.15319673717021942, "learning_rate": 0.002, "loss": 2.5652, "step": 237380 }, { "epoch": 0.4729336669641719, "grad_norm": 0.1654978096485138, "learning_rate": 0.002, "loss": 2.5573, "step": 237390 }, { "epoch": 0.4729535891878108, "grad_norm": 0.15435890853405, "learning_rate": 0.002, "loss": 2.5663, "step": 237400 }, { "epoch": 0.4729735114114497, "grad_norm": 0.21192391216754913, "learning_rate": 0.002, "loss": 2.5516, "step": 237410 }, { "epoch": 0.4729934336350886, "grad_norm": 0.15536822378635406, "learning_rate": 0.002, "loss": 2.5704, "step": 237420 }, { "epoch": 0.4730133558587275, "grad_norm": 0.15798547863960266, "learning_rate": 0.002, "loss": 2.549, "step": 237430 }, { "epoch": 0.47303327808236645, "grad_norm": 0.19630660116672516, "learning_rate": 0.002, "loss": 2.5525, "step": 237440 }, { "epoch": 0.47305320030600534, "grad_norm": 0.1715787649154663, "learning_rate": 0.002, "loss": 2.5383, "step": 237450 }, { "epoch": 0.4730731225296443, "grad_norm": 0.18207794427871704, "learning_rate": 0.002, "loss": 2.5589, "step": 237460 }, { "epoch": 0.4730930447532832, "grad_norm": 0.18536481261253357, "learning_rate": 0.002, "loss": 2.5446, "step": 237470 }, { "epoch": 0.4731129669769221, "grad_norm": 0.1538190245628357, "learning_rate": 0.002, "loss": 2.5639, "step": 237480 }, { "epoch": 0.473132889200561, "grad_norm": 0.1570426970720291, "learning_rate": 0.002, "loss": 2.5568, "step": 237490 }, { "epoch": 0.4731528114241999, "grad_norm": 0.203469917178154, "learning_rate": 0.002, "loss": 2.56, "step": 237500 }, { "epoch": 0.47317273364783885, "grad_norm": 0.14747878909111023, "learning_rate": 0.002, "loss": 2.5653, "step": 237510 }, { "epoch": 0.47319265587147774, "grad_norm": 0.18619954586029053, "learning_rate": 0.002, "loss": 2.5506, "step": 237520 }, { "epoch": 0.4732125780951167, "grad_norm": 0.1722266972064972, "learning_rate": 0.002, "loss": 2.5511, "step": 237530 }, { "epoch": 0.4732325003187556, "grad_norm": 0.1684424728155136, "learning_rate": 0.002, "loss": 2.5424, "step": 237540 }, { "epoch": 0.47325242254239447, "grad_norm": 0.1532810926437378, "learning_rate": 0.002, "loss": 2.5612, "step": 237550 }, { "epoch": 0.4732723447660334, "grad_norm": 0.21518024802207947, "learning_rate": 0.002, "loss": 2.5574, "step": 237560 }, { "epoch": 0.4732922669896723, "grad_norm": 0.17342065274715424, "learning_rate": 0.002, "loss": 2.553, "step": 237570 }, { "epoch": 0.47331218921331125, "grad_norm": 0.19354763627052307, "learning_rate": 0.002, "loss": 2.5507, "step": 237580 }, { "epoch": 0.47333211143695014, "grad_norm": 0.16243888437747955, "learning_rate": 0.002, "loss": 2.5569, "step": 237590 }, { "epoch": 0.47335203366058903, "grad_norm": 0.15304943919181824, "learning_rate": 0.002, "loss": 2.5557, "step": 237600 }, { "epoch": 0.473371955884228, "grad_norm": 0.19072440266609192, "learning_rate": 0.002, "loss": 2.5624, "step": 237610 }, { "epoch": 0.47339187810786687, "grad_norm": 0.16889290511608124, "learning_rate": 0.002, "loss": 2.5542, "step": 237620 }, { "epoch": 0.4734118003315058, "grad_norm": 0.15547136962413788, "learning_rate": 0.002, "loss": 2.5635, "step": 237630 }, { "epoch": 0.4734317225551447, "grad_norm": 0.15138058364391327, "learning_rate": 0.002, "loss": 2.5642, "step": 237640 }, { "epoch": 0.47345164477878365, "grad_norm": 0.14989657700061798, "learning_rate": 0.002, "loss": 2.5601, "step": 237650 }, { "epoch": 0.47347156700242254, "grad_norm": 0.17258091270923615, "learning_rate": 0.002, "loss": 2.5511, "step": 237660 }, { "epoch": 0.47349148922606143, "grad_norm": 0.18187867105007172, "learning_rate": 0.002, "loss": 2.5377, "step": 237670 }, { "epoch": 0.4735114114497004, "grad_norm": 0.1470808982849121, "learning_rate": 0.002, "loss": 2.5582, "step": 237680 }, { "epoch": 0.47353133367333927, "grad_norm": 0.18734966218471527, "learning_rate": 0.002, "loss": 2.5683, "step": 237690 }, { "epoch": 0.4735512558969782, "grad_norm": 0.21012845635414124, "learning_rate": 0.002, "loss": 2.5572, "step": 237700 }, { "epoch": 0.4735711781206171, "grad_norm": 0.16442641615867615, "learning_rate": 0.002, "loss": 2.5636, "step": 237710 }, { "epoch": 0.473591100344256, "grad_norm": 0.15838366746902466, "learning_rate": 0.002, "loss": 2.5602, "step": 237720 }, { "epoch": 0.47361102256789495, "grad_norm": 0.15436439216136932, "learning_rate": 0.002, "loss": 2.5609, "step": 237730 }, { "epoch": 0.47363094479153384, "grad_norm": 0.15838028490543365, "learning_rate": 0.002, "loss": 2.5575, "step": 237740 }, { "epoch": 0.4736508670151728, "grad_norm": 0.17697946727275848, "learning_rate": 0.002, "loss": 2.5654, "step": 237750 }, { "epoch": 0.4736707892388117, "grad_norm": 0.25830623507499695, "learning_rate": 0.002, "loss": 2.569, "step": 237760 }, { "epoch": 0.4736907114624506, "grad_norm": 0.17460790276527405, "learning_rate": 0.002, "loss": 2.5495, "step": 237770 }, { "epoch": 0.4737106336860895, "grad_norm": 0.13814106583595276, "learning_rate": 0.002, "loss": 2.5612, "step": 237780 }, { "epoch": 0.4737305559097284, "grad_norm": 0.15983012318611145, "learning_rate": 0.002, "loss": 2.569, "step": 237790 }, { "epoch": 0.47375047813336735, "grad_norm": 0.15455815196037292, "learning_rate": 0.002, "loss": 2.557, "step": 237800 }, { "epoch": 0.47377040035700624, "grad_norm": 0.15419743955135345, "learning_rate": 0.002, "loss": 2.5636, "step": 237810 }, { "epoch": 0.4737903225806452, "grad_norm": 0.15926285088062286, "learning_rate": 0.002, "loss": 2.5386, "step": 237820 }, { "epoch": 0.4738102448042841, "grad_norm": 0.19341367483139038, "learning_rate": 0.002, "loss": 2.5552, "step": 237830 }, { "epoch": 0.47383016702792297, "grad_norm": 0.15603747963905334, "learning_rate": 0.002, "loss": 2.5757, "step": 237840 }, { "epoch": 0.4738500892515619, "grad_norm": 0.1555807739496231, "learning_rate": 0.002, "loss": 2.5642, "step": 237850 }, { "epoch": 0.4738700114752008, "grad_norm": 0.16687248647212982, "learning_rate": 0.002, "loss": 2.568, "step": 237860 }, { "epoch": 0.47388993369883975, "grad_norm": 0.20381613075733185, "learning_rate": 0.002, "loss": 2.5707, "step": 237870 }, { "epoch": 0.47390985592247864, "grad_norm": 0.16707946360111237, "learning_rate": 0.002, "loss": 2.5727, "step": 237880 }, { "epoch": 0.47392977814611753, "grad_norm": 0.15476839244365692, "learning_rate": 0.002, "loss": 2.5483, "step": 237890 }, { "epoch": 0.4739497003697565, "grad_norm": 0.18579930067062378, "learning_rate": 0.002, "loss": 2.5621, "step": 237900 }, { "epoch": 0.47396962259339537, "grad_norm": 0.16852308809757233, "learning_rate": 0.002, "loss": 2.5538, "step": 237910 }, { "epoch": 0.4739895448170343, "grad_norm": 0.17871695756912231, "learning_rate": 0.002, "loss": 2.5717, "step": 237920 }, { "epoch": 0.4740094670406732, "grad_norm": 0.16353894770145416, "learning_rate": 0.002, "loss": 2.56, "step": 237930 }, { "epoch": 0.47402938926431215, "grad_norm": 0.2098909318447113, "learning_rate": 0.002, "loss": 2.5598, "step": 237940 }, { "epoch": 0.47404931148795104, "grad_norm": 0.18540434539318085, "learning_rate": 0.002, "loss": 2.5606, "step": 237950 }, { "epoch": 0.47406923371158993, "grad_norm": 0.19266073405742645, "learning_rate": 0.002, "loss": 2.5453, "step": 237960 }, { "epoch": 0.4740891559352289, "grad_norm": 0.14064215123653412, "learning_rate": 0.002, "loss": 2.5505, "step": 237970 }, { "epoch": 0.47410907815886777, "grad_norm": 0.14679327607154846, "learning_rate": 0.002, "loss": 2.5604, "step": 237980 }, { "epoch": 0.4741290003825067, "grad_norm": 0.14527073502540588, "learning_rate": 0.002, "loss": 2.5509, "step": 237990 }, { "epoch": 0.4741489226061456, "grad_norm": 0.251554399728775, "learning_rate": 0.002, "loss": 2.5683, "step": 238000 }, { "epoch": 0.4741688448297845, "grad_norm": 0.1723742038011551, "learning_rate": 0.002, "loss": 2.5691, "step": 238010 }, { "epoch": 0.47418876705342344, "grad_norm": 0.19928371906280518, "learning_rate": 0.002, "loss": 2.5596, "step": 238020 }, { "epoch": 0.47420868927706233, "grad_norm": 0.17986202239990234, "learning_rate": 0.002, "loss": 2.551, "step": 238030 }, { "epoch": 0.4742286115007013, "grad_norm": 0.1641017496585846, "learning_rate": 0.002, "loss": 2.5707, "step": 238040 }, { "epoch": 0.47424853372434017, "grad_norm": 0.1717945784330368, "learning_rate": 0.002, "loss": 2.5497, "step": 238050 }, { "epoch": 0.4742684559479791, "grad_norm": 0.1895013004541397, "learning_rate": 0.002, "loss": 2.5602, "step": 238060 }, { "epoch": 0.474288378171618, "grad_norm": 0.16928887367248535, "learning_rate": 0.002, "loss": 2.5745, "step": 238070 }, { "epoch": 0.4743083003952569, "grad_norm": 0.17732840776443481, "learning_rate": 0.002, "loss": 2.5524, "step": 238080 }, { "epoch": 0.47432822261889585, "grad_norm": 0.15661104023456573, "learning_rate": 0.002, "loss": 2.5689, "step": 238090 }, { "epoch": 0.47434814484253474, "grad_norm": 0.16526025533676147, "learning_rate": 0.002, "loss": 2.5424, "step": 238100 }, { "epoch": 0.4743680670661737, "grad_norm": 0.15927545726299286, "learning_rate": 0.002, "loss": 2.5632, "step": 238110 }, { "epoch": 0.4743879892898126, "grad_norm": 0.15309017896652222, "learning_rate": 0.002, "loss": 2.5593, "step": 238120 }, { "epoch": 0.47440791151345146, "grad_norm": 0.17562539875507355, "learning_rate": 0.002, "loss": 2.5718, "step": 238130 }, { "epoch": 0.4744278337370904, "grad_norm": 0.17774324119091034, "learning_rate": 0.002, "loss": 2.5835, "step": 238140 }, { "epoch": 0.4744477559607293, "grad_norm": 0.16924472153186798, "learning_rate": 0.002, "loss": 2.5655, "step": 238150 }, { "epoch": 0.47446767818436825, "grad_norm": 0.15794366598129272, "learning_rate": 0.002, "loss": 2.5667, "step": 238160 }, { "epoch": 0.47448760040800714, "grad_norm": 0.15016312897205353, "learning_rate": 0.002, "loss": 2.5519, "step": 238170 }, { "epoch": 0.47450752263164603, "grad_norm": 0.19831401109695435, "learning_rate": 0.002, "loss": 2.5439, "step": 238180 }, { "epoch": 0.474527444855285, "grad_norm": 0.17630340158939362, "learning_rate": 0.002, "loss": 2.5655, "step": 238190 }, { "epoch": 0.47454736707892387, "grad_norm": 0.17417296767234802, "learning_rate": 0.002, "loss": 2.5661, "step": 238200 }, { "epoch": 0.4745672893025628, "grad_norm": 0.1922968178987503, "learning_rate": 0.002, "loss": 2.5596, "step": 238210 }, { "epoch": 0.4745872115262017, "grad_norm": 0.15054312348365784, "learning_rate": 0.002, "loss": 2.5549, "step": 238220 }, { "epoch": 0.47460713374984065, "grad_norm": 0.17399978637695312, "learning_rate": 0.002, "loss": 2.5511, "step": 238230 }, { "epoch": 0.47462705597347954, "grad_norm": 0.15591903030872345, "learning_rate": 0.002, "loss": 2.5556, "step": 238240 }, { "epoch": 0.47464697819711843, "grad_norm": 0.18252764642238617, "learning_rate": 0.002, "loss": 2.5648, "step": 238250 }, { "epoch": 0.4746669004207574, "grad_norm": 0.17386695742607117, "learning_rate": 0.002, "loss": 2.5545, "step": 238260 }, { "epoch": 0.47468682264439627, "grad_norm": 0.16885831952095032, "learning_rate": 0.002, "loss": 2.5713, "step": 238270 }, { "epoch": 0.4747067448680352, "grad_norm": 0.13252881169319153, "learning_rate": 0.002, "loss": 2.548, "step": 238280 }, { "epoch": 0.4747266670916741, "grad_norm": 0.15737928450107574, "learning_rate": 0.002, "loss": 2.5733, "step": 238290 }, { "epoch": 0.474746589315313, "grad_norm": 0.1738874316215515, "learning_rate": 0.002, "loss": 2.562, "step": 238300 }, { "epoch": 0.47476651153895194, "grad_norm": 0.1777421087026596, "learning_rate": 0.002, "loss": 2.5495, "step": 238310 }, { "epoch": 0.47478643376259083, "grad_norm": 0.15398386120796204, "learning_rate": 0.002, "loss": 2.5526, "step": 238320 }, { "epoch": 0.4748063559862298, "grad_norm": 0.1849074810743332, "learning_rate": 0.002, "loss": 2.5652, "step": 238330 }, { "epoch": 0.47482627820986867, "grad_norm": 0.18614134192466736, "learning_rate": 0.002, "loss": 2.5703, "step": 238340 }, { "epoch": 0.47484620043350756, "grad_norm": 0.19400279223918915, "learning_rate": 0.002, "loss": 2.5599, "step": 238350 }, { "epoch": 0.4748661226571465, "grad_norm": 0.15948818624019623, "learning_rate": 0.002, "loss": 2.5507, "step": 238360 }, { "epoch": 0.4748860448807854, "grad_norm": 0.19724798202514648, "learning_rate": 0.002, "loss": 2.5563, "step": 238370 }, { "epoch": 0.47490596710442434, "grad_norm": 0.14807738363742828, "learning_rate": 0.002, "loss": 2.5635, "step": 238380 }, { "epoch": 0.47492588932806323, "grad_norm": 0.15964697301387787, "learning_rate": 0.002, "loss": 2.5443, "step": 238390 }, { "epoch": 0.4749458115517022, "grad_norm": 0.1926749348640442, "learning_rate": 0.002, "loss": 2.551, "step": 238400 }, { "epoch": 0.47496573377534107, "grad_norm": 0.1610485017299652, "learning_rate": 0.002, "loss": 2.5522, "step": 238410 }, { "epoch": 0.47498565599897996, "grad_norm": 0.18574433028697968, "learning_rate": 0.002, "loss": 2.5511, "step": 238420 }, { "epoch": 0.4750055782226189, "grad_norm": 0.1710229068994522, "learning_rate": 0.002, "loss": 2.5572, "step": 238430 }, { "epoch": 0.4750255004462578, "grad_norm": 0.16188915073871613, "learning_rate": 0.002, "loss": 2.5617, "step": 238440 }, { "epoch": 0.47504542266989674, "grad_norm": 0.18676215410232544, "learning_rate": 0.002, "loss": 2.5542, "step": 238450 }, { "epoch": 0.47506534489353563, "grad_norm": 0.15542636811733246, "learning_rate": 0.002, "loss": 2.5568, "step": 238460 }, { "epoch": 0.4750852671171745, "grad_norm": 0.15609484910964966, "learning_rate": 0.002, "loss": 2.5769, "step": 238470 }, { "epoch": 0.47510518934081347, "grad_norm": 0.21624474227428436, "learning_rate": 0.002, "loss": 2.5471, "step": 238480 }, { "epoch": 0.47512511156445236, "grad_norm": 0.16568905115127563, "learning_rate": 0.002, "loss": 2.5583, "step": 238490 }, { "epoch": 0.4751450337880913, "grad_norm": 0.1674773395061493, "learning_rate": 0.002, "loss": 2.5624, "step": 238500 }, { "epoch": 0.4751649560117302, "grad_norm": 0.1836039125919342, "learning_rate": 0.002, "loss": 2.5605, "step": 238510 }, { "epoch": 0.47518487823536915, "grad_norm": 0.16025224328041077, "learning_rate": 0.002, "loss": 2.5693, "step": 238520 }, { "epoch": 0.47520480045900804, "grad_norm": 0.20397476851940155, "learning_rate": 0.002, "loss": 2.5691, "step": 238530 }, { "epoch": 0.4752247226826469, "grad_norm": 0.15412911772727966, "learning_rate": 0.002, "loss": 2.5583, "step": 238540 }, { "epoch": 0.4752446449062859, "grad_norm": 0.16118207573890686, "learning_rate": 0.002, "loss": 2.5604, "step": 238550 }, { "epoch": 0.47526456712992476, "grad_norm": 0.15236614644527435, "learning_rate": 0.002, "loss": 2.5599, "step": 238560 }, { "epoch": 0.4752844893535637, "grad_norm": 0.18226921558380127, "learning_rate": 0.002, "loss": 2.5655, "step": 238570 }, { "epoch": 0.4753044115772026, "grad_norm": 0.1690279245376587, "learning_rate": 0.002, "loss": 2.5769, "step": 238580 }, { "epoch": 0.4753243338008415, "grad_norm": 0.17671771347522736, "learning_rate": 0.002, "loss": 2.5757, "step": 238590 }, { "epoch": 0.47534425602448044, "grad_norm": 0.1589881181716919, "learning_rate": 0.002, "loss": 2.5616, "step": 238600 }, { "epoch": 0.47536417824811933, "grad_norm": 0.16930902004241943, "learning_rate": 0.002, "loss": 2.5749, "step": 238610 }, { "epoch": 0.4753841004717583, "grad_norm": 0.1777905821800232, "learning_rate": 0.002, "loss": 2.5564, "step": 238620 }, { "epoch": 0.47540402269539717, "grad_norm": 0.14662544429302216, "learning_rate": 0.002, "loss": 2.5447, "step": 238630 }, { "epoch": 0.47542394491903606, "grad_norm": 0.15793198347091675, "learning_rate": 0.002, "loss": 2.5534, "step": 238640 }, { "epoch": 0.475443867142675, "grad_norm": 0.19661074876785278, "learning_rate": 0.002, "loss": 2.558, "step": 238650 }, { "epoch": 0.4754637893663139, "grad_norm": 0.18418951332569122, "learning_rate": 0.002, "loss": 2.5568, "step": 238660 }, { "epoch": 0.47548371158995284, "grad_norm": 0.14310117065906525, "learning_rate": 0.002, "loss": 2.5463, "step": 238670 }, { "epoch": 0.47550363381359173, "grad_norm": 0.15916742384433746, "learning_rate": 0.002, "loss": 2.5552, "step": 238680 }, { "epoch": 0.4755235560372307, "grad_norm": 0.15388177335262299, "learning_rate": 0.002, "loss": 2.5546, "step": 238690 }, { "epoch": 0.47554347826086957, "grad_norm": 0.16747938096523285, "learning_rate": 0.002, "loss": 2.5531, "step": 238700 }, { "epoch": 0.47556340048450846, "grad_norm": 0.16107158362865448, "learning_rate": 0.002, "loss": 2.5495, "step": 238710 }, { "epoch": 0.4755833227081474, "grad_norm": 0.1406428962945938, "learning_rate": 0.002, "loss": 2.5771, "step": 238720 }, { "epoch": 0.4756032449317863, "grad_norm": 0.16614864766597748, "learning_rate": 0.002, "loss": 2.5518, "step": 238730 }, { "epoch": 0.47562316715542524, "grad_norm": 0.1609688550233841, "learning_rate": 0.002, "loss": 2.562, "step": 238740 }, { "epoch": 0.47564308937906413, "grad_norm": 0.20266218483448029, "learning_rate": 0.002, "loss": 2.555, "step": 238750 }, { "epoch": 0.475663011602703, "grad_norm": 0.16819553077220917, "learning_rate": 0.002, "loss": 2.5541, "step": 238760 }, { "epoch": 0.47568293382634197, "grad_norm": 0.1971878856420517, "learning_rate": 0.002, "loss": 2.5536, "step": 238770 }, { "epoch": 0.47570285604998086, "grad_norm": 0.17869891226291656, "learning_rate": 0.002, "loss": 2.5709, "step": 238780 }, { "epoch": 0.4757227782736198, "grad_norm": 0.16611643135547638, "learning_rate": 0.002, "loss": 2.5589, "step": 238790 }, { "epoch": 0.4757427004972587, "grad_norm": 0.1432776302099228, "learning_rate": 0.002, "loss": 2.5608, "step": 238800 }, { "epoch": 0.47576262272089764, "grad_norm": 0.1633394956588745, "learning_rate": 0.002, "loss": 2.5598, "step": 238810 }, { "epoch": 0.47578254494453653, "grad_norm": 0.19379334151744843, "learning_rate": 0.002, "loss": 2.5561, "step": 238820 }, { "epoch": 0.4758024671681754, "grad_norm": 0.23725555837154388, "learning_rate": 0.002, "loss": 2.558, "step": 238830 }, { "epoch": 0.47582238939181437, "grad_norm": 0.13444265723228455, "learning_rate": 0.002, "loss": 2.5491, "step": 238840 }, { "epoch": 0.47584231161545326, "grad_norm": 0.15190057456493378, "learning_rate": 0.002, "loss": 2.5716, "step": 238850 }, { "epoch": 0.4758622338390922, "grad_norm": 0.15143977105617523, "learning_rate": 0.002, "loss": 2.5454, "step": 238860 }, { "epoch": 0.4758821560627311, "grad_norm": 0.23007196187973022, "learning_rate": 0.002, "loss": 2.5551, "step": 238870 }, { "epoch": 0.47590207828637, "grad_norm": 0.1559624820947647, "learning_rate": 0.002, "loss": 2.5652, "step": 238880 }, { "epoch": 0.47592200051000894, "grad_norm": 0.14798466861248016, "learning_rate": 0.002, "loss": 2.5588, "step": 238890 }, { "epoch": 0.4759419227336478, "grad_norm": 0.17417244613170624, "learning_rate": 0.002, "loss": 2.5558, "step": 238900 }, { "epoch": 0.4759618449572868, "grad_norm": 0.1744527667760849, "learning_rate": 0.002, "loss": 2.556, "step": 238910 }, { "epoch": 0.47598176718092566, "grad_norm": 0.14917287230491638, "learning_rate": 0.002, "loss": 2.5624, "step": 238920 }, { "epoch": 0.47600168940456455, "grad_norm": 0.1765373945236206, "learning_rate": 0.002, "loss": 2.5649, "step": 238930 }, { "epoch": 0.4760216116282035, "grad_norm": 0.1702948361635208, "learning_rate": 0.002, "loss": 2.549, "step": 238940 }, { "epoch": 0.4760415338518424, "grad_norm": 0.1684662103652954, "learning_rate": 0.002, "loss": 2.5634, "step": 238950 }, { "epoch": 0.47606145607548134, "grad_norm": 0.15960949659347534, "learning_rate": 0.002, "loss": 2.5781, "step": 238960 }, { "epoch": 0.47608137829912023, "grad_norm": 0.1748148500919342, "learning_rate": 0.002, "loss": 2.5524, "step": 238970 }, { "epoch": 0.4761013005227592, "grad_norm": 0.14781011641025543, "learning_rate": 0.002, "loss": 2.5614, "step": 238980 }, { "epoch": 0.47612122274639807, "grad_norm": 0.2243967354297638, "learning_rate": 0.002, "loss": 2.5639, "step": 238990 }, { "epoch": 0.47614114497003696, "grad_norm": 0.1801503747701645, "learning_rate": 0.002, "loss": 2.5531, "step": 239000 }, { "epoch": 0.4761610671936759, "grad_norm": 0.159221351146698, "learning_rate": 0.002, "loss": 2.5572, "step": 239010 }, { "epoch": 0.4761809894173148, "grad_norm": 0.22735683619976044, "learning_rate": 0.002, "loss": 2.5769, "step": 239020 }, { "epoch": 0.47620091164095374, "grad_norm": 0.16995356976985931, "learning_rate": 0.002, "loss": 2.5575, "step": 239030 }, { "epoch": 0.47622083386459263, "grad_norm": 0.1674281805753708, "learning_rate": 0.002, "loss": 2.5646, "step": 239040 }, { "epoch": 0.4762407560882315, "grad_norm": 0.17277678847312927, "learning_rate": 0.002, "loss": 2.5562, "step": 239050 }, { "epoch": 0.47626067831187047, "grad_norm": 0.20615094900131226, "learning_rate": 0.002, "loss": 2.5576, "step": 239060 }, { "epoch": 0.47628060053550936, "grad_norm": 0.2105616331100464, "learning_rate": 0.002, "loss": 2.5487, "step": 239070 }, { "epoch": 0.4763005227591483, "grad_norm": 0.16848503053188324, "learning_rate": 0.002, "loss": 2.548, "step": 239080 }, { "epoch": 0.4763204449827872, "grad_norm": 0.1558862328529358, "learning_rate": 0.002, "loss": 2.5433, "step": 239090 }, { "epoch": 0.4763403672064261, "grad_norm": 0.1796765923500061, "learning_rate": 0.002, "loss": 2.5577, "step": 239100 }, { "epoch": 0.47636028943006503, "grad_norm": 0.14462918043136597, "learning_rate": 0.002, "loss": 2.5625, "step": 239110 }, { "epoch": 0.4763802116537039, "grad_norm": 0.15016083419322968, "learning_rate": 0.002, "loss": 2.5421, "step": 239120 }, { "epoch": 0.47640013387734287, "grad_norm": 0.20998506247997284, "learning_rate": 0.002, "loss": 2.566, "step": 239130 }, { "epoch": 0.47642005610098176, "grad_norm": 0.15526996552944183, "learning_rate": 0.002, "loss": 2.5576, "step": 239140 }, { "epoch": 0.4764399783246207, "grad_norm": 0.1775905191898346, "learning_rate": 0.002, "loss": 2.5517, "step": 239150 }, { "epoch": 0.4764599005482596, "grad_norm": 0.14848089218139648, "learning_rate": 0.002, "loss": 2.5616, "step": 239160 }, { "epoch": 0.4764798227718985, "grad_norm": 0.21176201105117798, "learning_rate": 0.002, "loss": 2.5509, "step": 239170 }, { "epoch": 0.47649974499553743, "grad_norm": 0.19681261479854584, "learning_rate": 0.002, "loss": 2.5637, "step": 239180 }, { "epoch": 0.4765196672191763, "grad_norm": 0.16809265315532684, "learning_rate": 0.002, "loss": 2.5558, "step": 239190 }, { "epoch": 0.47653958944281527, "grad_norm": 0.1753198653459549, "learning_rate": 0.002, "loss": 2.554, "step": 239200 }, { "epoch": 0.47655951166645416, "grad_norm": 0.16559690237045288, "learning_rate": 0.002, "loss": 2.5508, "step": 239210 }, { "epoch": 0.47657943389009305, "grad_norm": 0.17305462062358856, "learning_rate": 0.002, "loss": 2.5576, "step": 239220 }, { "epoch": 0.476599356113732, "grad_norm": 0.1681012064218521, "learning_rate": 0.002, "loss": 2.5665, "step": 239230 }, { "epoch": 0.4766192783373709, "grad_norm": 0.18705083429813385, "learning_rate": 0.002, "loss": 2.5702, "step": 239240 }, { "epoch": 0.47663920056100983, "grad_norm": 0.15805643796920776, "learning_rate": 0.002, "loss": 2.548, "step": 239250 }, { "epoch": 0.4766591227846487, "grad_norm": 0.16550789773464203, "learning_rate": 0.002, "loss": 2.5565, "step": 239260 }, { "epoch": 0.47667904500828767, "grad_norm": 0.19427481293678284, "learning_rate": 0.002, "loss": 2.5656, "step": 239270 }, { "epoch": 0.47669896723192656, "grad_norm": 0.17370213568210602, "learning_rate": 0.002, "loss": 2.5545, "step": 239280 }, { "epoch": 0.47671888945556545, "grad_norm": 0.157850444316864, "learning_rate": 0.002, "loss": 2.5549, "step": 239290 }, { "epoch": 0.4767388116792044, "grad_norm": 0.17063693702220917, "learning_rate": 0.002, "loss": 2.5478, "step": 239300 }, { "epoch": 0.4767587339028433, "grad_norm": 0.16173246502876282, "learning_rate": 0.002, "loss": 2.5788, "step": 239310 }, { "epoch": 0.47677865612648224, "grad_norm": 0.178887739777565, "learning_rate": 0.002, "loss": 2.5655, "step": 239320 }, { "epoch": 0.4767985783501211, "grad_norm": 0.19511617720127106, "learning_rate": 0.002, "loss": 2.5498, "step": 239330 }, { "epoch": 0.47681850057376, "grad_norm": 0.17493748664855957, "learning_rate": 0.002, "loss": 2.5774, "step": 239340 }, { "epoch": 0.47683842279739896, "grad_norm": 0.43827104568481445, "learning_rate": 0.002, "loss": 2.5536, "step": 239350 }, { "epoch": 0.47685834502103785, "grad_norm": 0.1825726181268692, "learning_rate": 0.002, "loss": 2.5528, "step": 239360 }, { "epoch": 0.4768782672446768, "grad_norm": 0.15146572887897491, "learning_rate": 0.002, "loss": 2.5559, "step": 239370 }, { "epoch": 0.4768981894683157, "grad_norm": 0.18572689592838287, "learning_rate": 0.002, "loss": 2.5588, "step": 239380 }, { "epoch": 0.4769181116919546, "grad_norm": 0.1796935796737671, "learning_rate": 0.002, "loss": 2.5652, "step": 239390 }, { "epoch": 0.47693803391559353, "grad_norm": 0.1630149930715561, "learning_rate": 0.002, "loss": 2.5648, "step": 239400 }, { "epoch": 0.4769579561392324, "grad_norm": 0.1680598258972168, "learning_rate": 0.002, "loss": 2.5636, "step": 239410 }, { "epoch": 0.47697787836287137, "grad_norm": 0.20223046839237213, "learning_rate": 0.002, "loss": 2.568, "step": 239420 }, { "epoch": 0.47699780058651026, "grad_norm": 0.168659046292305, "learning_rate": 0.002, "loss": 2.547, "step": 239430 }, { "epoch": 0.4770177228101492, "grad_norm": 0.15302199125289917, "learning_rate": 0.002, "loss": 2.5565, "step": 239440 }, { "epoch": 0.4770376450337881, "grad_norm": 0.1647271364927292, "learning_rate": 0.002, "loss": 2.5457, "step": 239450 }, { "epoch": 0.477057567257427, "grad_norm": 0.16501684486865997, "learning_rate": 0.002, "loss": 2.5579, "step": 239460 }, { "epoch": 0.47707748948106593, "grad_norm": 0.1551050841808319, "learning_rate": 0.002, "loss": 2.5597, "step": 239470 }, { "epoch": 0.4770974117047048, "grad_norm": 0.19769498705863953, "learning_rate": 0.002, "loss": 2.5598, "step": 239480 }, { "epoch": 0.47711733392834377, "grad_norm": 0.18779677152633667, "learning_rate": 0.002, "loss": 2.5566, "step": 239490 }, { "epoch": 0.47713725615198266, "grad_norm": 0.17592792212963104, "learning_rate": 0.002, "loss": 2.5597, "step": 239500 }, { "epoch": 0.47715717837562155, "grad_norm": 0.16189908981323242, "learning_rate": 0.002, "loss": 2.5719, "step": 239510 }, { "epoch": 0.4771771005992605, "grad_norm": 0.17471003532409668, "learning_rate": 0.002, "loss": 2.5645, "step": 239520 }, { "epoch": 0.4771970228228994, "grad_norm": 0.1807732880115509, "learning_rate": 0.002, "loss": 2.5537, "step": 239530 }, { "epoch": 0.47721694504653833, "grad_norm": 0.19174787402153015, "learning_rate": 0.002, "loss": 2.5631, "step": 239540 }, { "epoch": 0.4772368672701772, "grad_norm": 0.19910138845443726, "learning_rate": 0.002, "loss": 2.5566, "step": 239550 }, { "epoch": 0.47725678949381617, "grad_norm": 0.23406828939914703, "learning_rate": 0.002, "loss": 2.5641, "step": 239560 }, { "epoch": 0.47727671171745506, "grad_norm": 0.4017425775527954, "learning_rate": 0.002, "loss": 2.5453, "step": 239570 }, { "epoch": 0.47729663394109395, "grad_norm": 0.1575355976819992, "learning_rate": 0.002, "loss": 2.5699, "step": 239580 }, { "epoch": 0.4773165561647329, "grad_norm": 0.15149056911468506, "learning_rate": 0.002, "loss": 2.5497, "step": 239590 }, { "epoch": 0.4773364783883718, "grad_norm": 0.18406689167022705, "learning_rate": 0.002, "loss": 2.5522, "step": 239600 }, { "epoch": 0.47735640061201073, "grad_norm": 0.1821458488702774, "learning_rate": 0.002, "loss": 2.5466, "step": 239610 }, { "epoch": 0.4773763228356496, "grad_norm": 0.17110754549503326, "learning_rate": 0.002, "loss": 2.5575, "step": 239620 }, { "epoch": 0.4773962450592885, "grad_norm": 0.19211408495903015, "learning_rate": 0.002, "loss": 2.5643, "step": 239630 }, { "epoch": 0.47741616728292746, "grad_norm": 0.16794955730438232, "learning_rate": 0.002, "loss": 2.5577, "step": 239640 }, { "epoch": 0.47743608950656635, "grad_norm": 0.14986518025398254, "learning_rate": 0.002, "loss": 2.5597, "step": 239650 }, { "epoch": 0.4774560117302053, "grad_norm": 0.18873925507068634, "learning_rate": 0.002, "loss": 2.5571, "step": 239660 }, { "epoch": 0.4774759339538442, "grad_norm": 0.19098031520843506, "learning_rate": 0.002, "loss": 2.5705, "step": 239670 }, { "epoch": 0.4774958561774831, "grad_norm": 0.15101081132888794, "learning_rate": 0.002, "loss": 2.5531, "step": 239680 }, { "epoch": 0.477515778401122, "grad_norm": 0.16277186572551727, "learning_rate": 0.002, "loss": 2.5539, "step": 239690 }, { "epoch": 0.4775357006247609, "grad_norm": 0.1907217651605606, "learning_rate": 0.002, "loss": 2.5666, "step": 239700 }, { "epoch": 0.47755562284839986, "grad_norm": 0.164423868060112, "learning_rate": 0.002, "loss": 2.5556, "step": 239710 }, { "epoch": 0.47757554507203875, "grad_norm": 0.18143102526664734, "learning_rate": 0.002, "loss": 2.5566, "step": 239720 }, { "epoch": 0.4775954672956777, "grad_norm": 0.15211036801338196, "learning_rate": 0.002, "loss": 2.551, "step": 239730 }, { "epoch": 0.4776153895193166, "grad_norm": 0.16324158012866974, "learning_rate": 0.002, "loss": 2.5606, "step": 239740 }, { "epoch": 0.4776353117429555, "grad_norm": 0.15714840590953827, "learning_rate": 0.002, "loss": 2.5717, "step": 239750 }, { "epoch": 0.47765523396659443, "grad_norm": 0.19802209734916687, "learning_rate": 0.002, "loss": 2.5597, "step": 239760 }, { "epoch": 0.4776751561902333, "grad_norm": 0.23201370239257812, "learning_rate": 0.002, "loss": 2.5438, "step": 239770 }, { "epoch": 0.47769507841387226, "grad_norm": 0.17395992577075958, "learning_rate": 0.002, "loss": 2.5569, "step": 239780 }, { "epoch": 0.47771500063751116, "grad_norm": 0.1592218577861786, "learning_rate": 0.002, "loss": 2.5627, "step": 239790 }, { "epoch": 0.47773492286115005, "grad_norm": 0.16196677088737488, "learning_rate": 0.002, "loss": 2.5588, "step": 239800 }, { "epoch": 0.477754845084789, "grad_norm": 0.17879945039749146, "learning_rate": 0.002, "loss": 2.5514, "step": 239810 }, { "epoch": 0.4777747673084279, "grad_norm": 0.17245689034461975, "learning_rate": 0.002, "loss": 2.5685, "step": 239820 }, { "epoch": 0.47779468953206683, "grad_norm": 0.17465312778949738, "learning_rate": 0.002, "loss": 2.5502, "step": 239830 }, { "epoch": 0.4778146117557057, "grad_norm": 0.1780305653810501, "learning_rate": 0.002, "loss": 2.5625, "step": 239840 }, { "epoch": 0.4778345339793446, "grad_norm": 0.19593745470046997, "learning_rate": 0.002, "loss": 2.5649, "step": 239850 }, { "epoch": 0.47785445620298356, "grad_norm": 0.15438556671142578, "learning_rate": 0.002, "loss": 2.5424, "step": 239860 }, { "epoch": 0.47787437842662245, "grad_norm": 0.17668882012367249, "learning_rate": 0.002, "loss": 2.5528, "step": 239870 }, { "epoch": 0.4778943006502614, "grad_norm": 0.16998785734176636, "learning_rate": 0.002, "loss": 2.5537, "step": 239880 }, { "epoch": 0.4779142228739003, "grad_norm": 0.15167568624019623, "learning_rate": 0.002, "loss": 2.5643, "step": 239890 }, { "epoch": 0.47793414509753923, "grad_norm": 0.20234662294387817, "learning_rate": 0.002, "loss": 2.558, "step": 239900 }, { "epoch": 0.4779540673211781, "grad_norm": 0.16057276725769043, "learning_rate": 0.002, "loss": 2.5597, "step": 239910 }, { "epoch": 0.477973989544817, "grad_norm": 0.14853155612945557, "learning_rate": 0.002, "loss": 2.5477, "step": 239920 }, { "epoch": 0.47799391176845596, "grad_norm": 0.17466573417186737, "learning_rate": 0.002, "loss": 2.5605, "step": 239930 }, { "epoch": 0.47801383399209485, "grad_norm": 0.21679769456386566, "learning_rate": 0.002, "loss": 2.5678, "step": 239940 }, { "epoch": 0.4780337562157338, "grad_norm": 0.19465209543704987, "learning_rate": 0.002, "loss": 2.5659, "step": 239950 }, { "epoch": 0.4780536784393727, "grad_norm": 0.15216530859470367, "learning_rate": 0.002, "loss": 2.5539, "step": 239960 }, { "epoch": 0.4780736006630116, "grad_norm": 0.17467224597930908, "learning_rate": 0.002, "loss": 2.5728, "step": 239970 }, { "epoch": 0.4780935228866505, "grad_norm": 0.17328403890132904, "learning_rate": 0.002, "loss": 2.5486, "step": 239980 }, { "epoch": 0.4781134451102894, "grad_norm": 0.2005103975534439, "learning_rate": 0.002, "loss": 2.5504, "step": 239990 }, { "epoch": 0.47813336733392836, "grad_norm": 0.20241400599479675, "learning_rate": 0.002, "loss": 2.5603, "step": 240000 }, { "epoch": 0.47815328955756725, "grad_norm": 0.18725383281707764, "learning_rate": 0.002, "loss": 2.5632, "step": 240010 }, { "epoch": 0.4781732117812062, "grad_norm": 0.1439632922410965, "learning_rate": 0.002, "loss": 2.5569, "step": 240020 }, { "epoch": 0.4781931340048451, "grad_norm": 0.16819903254508972, "learning_rate": 0.002, "loss": 2.5598, "step": 240030 }, { "epoch": 0.478213056228484, "grad_norm": 0.16554699838161469, "learning_rate": 0.002, "loss": 2.5422, "step": 240040 }, { "epoch": 0.4782329784521229, "grad_norm": 0.16097410023212433, "learning_rate": 0.002, "loss": 2.5646, "step": 240050 }, { "epoch": 0.4782529006757618, "grad_norm": 0.19897525012493134, "learning_rate": 0.002, "loss": 2.5558, "step": 240060 }, { "epoch": 0.47827282289940076, "grad_norm": 0.18272320926189423, "learning_rate": 0.002, "loss": 2.5716, "step": 240070 }, { "epoch": 0.47829274512303965, "grad_norm": 0.1796479970216751, "learning_rate": 0.002, "loss": 2.558, "step": 240080 }, { "epoch": 0.47831266734667854, "grad_norm": 0.194523885846138, "learning_rate": 0.002, "loss": 2.5657, "step": 240090 }, { "epoch": 0.4783325895703175, "grad_norm": 0.17399312555789948, "learning_rate": 0.002, "loss": 2.5603, "step": 240100 }, { "epoch": 0.4783525117939564, "grad_norm": 0.17829027771949768, "learning_rate": 0.002, "loss": 2.5672, "step": 240110 }, { "epoch": 0.4783724340175953, "grad_norm": 0.16520774364471436, "learning_rate": 0.002, "loss": 2.5648, "step": 240120 }, { "epoch": 0.4783923562412342, "grad_norm": 0.1700219213962555, "learning_rate": 0.002, "loss": 2.5558, "step": 240130 }, { "epoch": 0.4784122784648731, "grad_norm": 0.2152126133441925, "learning_rate": 0.002, "loss": 2.5348, "step": 240140 }, { "epoch": 0.47843220068851205, "grad_norm": 0.17113810777664185, "learning_rate": 0.002, "loss": 2.5495, "step": 240150 }, { "epoch": 0.47845212291215095, "grad_norm": 0.18937386572360992, "learning_rate": 0.002, "loss": 2.5502, "step": 240160 }, { "epoch": 0.4784720451357899, "grad_norm": 0.15637706220149994, "learning_rate": 0.002, "loss": 2.558, "step": 240170 }, { "epoch": 0.4784919673594288, "grad_norm": 0.16327159106731415, "learning_rate": 0.002, "loss": 2.5583, "step": 240180 }, { "epoch": 0.47851188958306773, "grad_norm": 0.1509498655796051, "learning_rate": 0.002, "loss": 2.545, "step": 240190 }, { "epoch": 0.4785318118067066, "grad_norm": 0.140177920460701, "learning_rate": 0.002, "loss": 2.5531, "step": 240200 }, { "epoch": 0.4785517340303455, "grad_norm": 0.17508254945278168, "learning_rate": 0.002, "loss": 2.5615, "step": 240210 }, { "epoch": 0.47857165625398446, "grad_norm": 0.17907965183258057, "learning_rate": 0.002, "loss": 2.5584, "step": 240220 }, { "epoch": 0.47859157847762335, "grad_norm": 0.19007670879364014, "learning_rate": 0.002, "loss": 2.5603, "step": 240230 }, { "epoch": 0.4786115007012623, "grad_norm": 0.2200559377670288, "learning_rate": 0.002, "loss": 2.5559, "step": 240240 }, { "epoch": 0.4786314229249012, "grad_norm": 0.15944772958755493, "learning_rate": 0.002, "loss": 2.5523, "step": 240250 }, { "epoch": 0.4786513451485401, "grad_norm": 0.1838260293006897, "learning_rate": 0.002, "loss": 2.5761, "step": 240260 }, { "epoch": 0.478671267372179, "grad_norm": 0.19710446894168854, "learning_rate": 0.002, "loss": 2.5589, "step": 240270 }, { "epoch": 0.4786911895958179, "grad_norm": 0.16088978946208954, "learning_rate": 0.002, "loss": 2.5596, "step": 240280 }, { "epoch": 0.47871111181945686, "grad_norm": 0.18422500789165497, "learning_rate": 0.002, "loss": 2.5676, "step": 240290 }, { "epoch": 0.47873103404309575, "grad_norm": 0.17162077128887177, "learning_rate": 0.002, "loss": 2.5473, "step": 240300 }, { "epoch": 0.4787509562667347, "grad_norm": 0.17485342919826508, "learning_rate": 0.002, "loss": 2.5576, "step": 240310 }, { "epoch": 0.4787708784903736, "grad_norm": 0.20073743164539337, "learning_rate": 0.002, "loss": 2.5467, "step": 240320 }, { "epoch": 0.4787908007140125, "grad_norm": 0.15168364346027374, "learning_rate": 0.002, "loss": 2.5645, "step": 240330 }, { "epoch": 0.4788107229376514, "grad_norm": 0.1426962912082672, "learning_rate": 0.002, "loss": 2.5673, "step": 240340 }, { "epoch": 0.4788306451612903, "grad_norm": 0.142486572265625, "learning_rate": 0.002, "loss": 2.5541, "step": 240350 }, { "epoch": 0.47885056738492926, "grad_norm": 0.1930549442768097, "learning_rate": 0.002, "loss": 2.5604, "step": 240360 }, { "epoch": 0.47887048960856815, "grad_norm": 0.1477552205324173, "learning_rate": 0.002, "loss": 2.5413, "step": 240370 }, { "epoch": 0.47889041183220704, "grad_norm": 0.19810736179351807, "learning_rate": 0.002, "loss": 2.5469, "step": 240380 }, { "epoch": 0.478910334055846, "grad_norm": 0.19021053612232208, "learning_rate": 0.002, "loss": 2.5612, "step": 240390 }, { "epoch": 0.4789302562794849, "grad_norm": 0.1974462866783142, "learning_rate": 0.002, "loss": 2.5598, "step": 240400 }, { "epoch": 0.4789501785031238, "grad_norm": 0.17173951864242554, "learning_rate": 0.002, "loss": 2.5504, "step": 240410 }, { "epoch": 0.4789701007267627, "grad_norm": 0.14798079431056976, "learning_rate": 0.002, "loss": 2.5605, "step": 240420 }, { "epoch": 0.4789900229504016, "grad_norm": 0.1968282014131546, "learning_rate": 0.002, "loss": 2.557, "step": 240430 }, { "epoch": 0.47900994517404055, "grad_norm": 0.1978951394557953, "learning_rate": 0.002, "loss": 2.562, "step": 240440 }, { "epoch": 0.47902986739767944, "grad_norm": 0.1801045686006546, "learning_rate": 0.002, "loss": 2.5571, "step": 240450 }, { "epoch": 0.4790497896213184, "grad_norm": 0.19207292795181274, "learning_rate": 0.002, "loss": 2.5582, "step": 240460 }, { "epoch": 0.4790697118449573, "grad_norm": 0.13454368710517883, "learning_rate": 0.002, "loss": 2.555, "step": 240470 }, { "epoch": 0.4790896340685962, "grad_norm": 0.15937082469463348, "learning_rate": 0.002, "loss": 2.5484, "step": 240480 }, { "epoch": 0.4791095562922351, "grad_norm": 0.17750650644302368, "learning_rate": 0.002, "loss": 2.5734, "step": 240490 }, { "epoch": 0.479129478515874, "grad_norm": 0.17151211202144623, "learning_rate": 0.002, "loss": 2.5653, "step": 240500 }, { "epoch": 0.47914940073951295, "grad_norm": 0.157280832529068, "learning_rate": 0.002, "loss": 2.5536, "step": 240510 }, { "epoch": 0.47916932296315184, "grad_norm": 0.19369050860404968, "learning_rate": 0.002, "loss": 2.558, "step": 240520 }, { "epoch": 0.4791892451867908, "grad_norm": 0.15093858540058136, "learning_rate": 0.002, "loss": 2.5635, "step": 240530 }, { "epoch": 0.4792091674104297, "grad_norm": 0.1677389144897461, "learning_rate": 0.002, "loss": 2.5659, "step": 240540 }, { "epoch": 0.4792290896340686, "grad_norm": 0.1881183683872223, "learning_rate": 0.002, "loss": 2.56, "step": 240550 }, { "epoch": 0.4792490118577075, "grad_norm": 0.1898544579744339, "learning_rate": 0.002, "loss": 2.5612, "step": 240560 }, { "epoch": 0.4792689340813464, "grad_norm": 0.17115287482738495, "learning_rate": 0.002, "loss": 2.5738, "step": 240570 }, { "epoch": 0.47928885630498536, "grad_norm": 0.14615345001220703, "learning_rate": 0.002, "loss": 2.5622, "step": 240580 }, { "epoch": 0.47930877852862425, "grad_norm": 0.18892136216163635, "learning_rate": 0.002, "loss": 2.5607, "step": 240590 }, { "epoch": 0.47932870075226314, "grad_norm": 0.15831315517425537, "learning_rate": 0.002, "loss": 2.5511, "step": 240600 }, { "epoch": 0.4793486229759021, "grad_norm": 0.1956695020198822, "learning_rate": 0.002, "loss": 2.5554, "step": 240610 }, { "epoch": 0.479368545199541, "grad_norm": 0.1929439902305603, "learning_rate": 0.002, "loss": 2.5747, "step": 240620 }, { "epoch": 0.4793884674231799, "grad_norm": 0.16137726604938507, "learning_rate": 0.002, "loss": 2.5523, "step": 240630 }, { "epoch": 0.4794083896468188, "grad_norm": 0.1967262625694275, "learning_rate": 0.002, "loss": 2.5553, "step": 240640 }, { "epoch": 0.47942831187045776, "grad_norm": 0.1748654991388321, "learning_rate": 0.002, "loss": 2.5533, "step": 240650 }, { "epoch": 0.47944823409409665, "grad_norm": 0.14058621227741241, "learning_rate": 0.002, "loss": 2.559, "step": 240660 }, { "epoch": 0.47946815631773554, "grad_norm": 0.19052761793136597, "learning_rate": 0.002, "loss": 2.5537, "step": 240670 }, { "epoch": 0.4794880785413745, "grad_norm": 0.16519251465797424, "learning_rate": 0.002, "loss": 2.5584, "step": 240680 }, { "epoch": 0.4795080007650134, "grad_norm": 0.1498178094625473, "learning_rate": 0.002, "loss": 2.5533, "step": 240690 }, { "epoch": 0.4795279229886523, "grad_norm": 0.19237196445465088, "learning_rate": 0.002, "loss": 2.5643, "step": 240700 }, { "epoch": 0.4795478452122912, "grad_norm": 0.17572014033794403, "learning_rate": 0.002, "loss": 2.5718, "step": 240710 }, { "epoch": 0.4795677674359301, "grad_norm": 0.3829589784145355, "learning_rate": 0.002, "loss": 2.5516, "step": 240720 }, { "epoch": 0.47958768965956905, "grad_norm": 0.18131129443645477, "learning_rate": 0.002, "loss": 2.5516, "step": 240730 }, { "epoch": 0.47960761188320794, "grad_norm": 0.15411223471164703, "learning_rate": 0.002, "loss": 2.5628, "step": 240740 }, { "epoch": 0.4796275341068469, "grad_norm": 0.16578443348407745, "learning_rate": 0.002, "loss": 2.5511, "step": 240750 }, { "epoch": 0.4796474563304858, "grad_norm": 0.15861405432224274, "learning_rate": 0.002, "loss": 2.5616, "step": 240760 }, { "epoch": 0.4796673785541247, "grad_norm": 0.16806437075138092, "learning_rate": 0.002, "loss": 2.5596, "step": 240770 }, { "epoch": 0.4796873007777636, "grad_norm": 0.17884443700313568, "learning_rate": 0.002, "loss": 2.5501, "step": 240780 }, { "epoch": 0.4797072230014025, "grad_norm": 0.22688381373882294, "learning_rate": 0.002, "loss": 2.5587, "step": 240790 }, { "epoch": 0.47972714522504145, "grad_norm": 0.16415084898471832, "learning_rate": 0.002, "loss": 2.5621, "step": 240800 }, { "epoch": 0.47974706744868034, "grad_norm": 0.1770022064447403, "learning_rate": 0.002, "loss": 2.5624, "step": 240810 }, { "epoch": 0.4797669896723193, "grad_norm": 0.16425010561943054, "learning_rate": 0.002, "loss": 2.5608, "step": 240820 }, { "epoch": 0.4797869118959582, "grad_norm": 0.16973212361335754, "learning_rate": 0.002, "loss": 2.5775, "step": 240830 }, { "epoch": 0.47980683411959707, "grad_norm": 0.1561296582221985, "learning_rate": 0.002, "loss": 2.5546, "step": 240840 }, { "epoch": 0.479826756343236, "grad_norm": 0.15268464386463165, "learning_rate": 0.002, "loss": 2.5685, "step": 240850 }, { "epoch": 0.4798466785668749, "grad_norm": 0.1705259531736374, "learning_rate": 0.002, "loss": 2.5664, "step": 240860 }, { "epoch": 0.47986660079051385, "grad_norm": 0.16364189982414246, "learning_rate": 0.002, "loss": 2.5683, "step": 240870 }, { "epoch": 0.47988652301415274, "grad_norm": 0.16078075766563416, "learning_rate": 0.002, "loss": 2.5595, "step": 240880 }, { "epoch": 0.47990644523779163, "grad_norm": 0.20905263721942902, "learning_rate": 0.002, "loss": 2.5706, "step": 240890 }, { "epoch": 0.4799263674614306, "grad_norm": 0.1892530471086502, "learning_rate": 0.002, "loss": 2.5618, "step": 240900 }, { "epoch": 0.47994628968506947, "grad_norm": 0.14542174339294434, "learning_rate": 0.002, "loss": 2.5565, "step": 240910 }, { "epoch": 0.4799662119087084, "grad_norm": 0.16234254837036133, "learning_rate": 0.002, "loss": 2.5669, "step": 240920 }, { "epoch": 0.4799861341323473, "grad_norm": 0.20253483951091766, "learning_rate": 0.002, "loss": 2.5451, "step": 240930 }, { "epoch": 0.48000605635598625, "grad_norm": 0.15545940399169922, "learning_rate": 0.002, "loss": 2.5584, "step": 240940 }, { "epoch": 0.48002597857962515, "grad_norm": 0.16373282670974731, "learning_rate": 0.002, "loss": 2.5528, "step": 240950 }, { "epoch": 0.48004590080326404, "grad_norm": 0.14740630984306335, "learning_rate": 0.002, "loss": 2.5605, "step": 240960 }, { "epoch": 0.480065823026903, "grad_norm": 0.17201684415340424, "learning_rate": 0.002, "loss": 2.5588, "step": 240970 }, { "epoch": 0.4800857452505419, "grad_norm": 0.16709144413471222, "learning_rate": 0.002, "loss": 2.5641, "step": 240980 }, { "epoch": 0.4801056674741808, "grad_norm": 0.17942024767398834, "learning_rate": 0.002, "loss": 2.5523, "step": 240990 }, { "epoch": 0.4801255896978197, "grad_norm": 0.1781633049249649, "learning_rate": 0.002, "loss": 2.5532, "step": 241000 }, { "epoch": 0.4801455119214586, "grad_norm": 0.17231972515583038, "learning_rate": 0.002, "loss": 2.5642, "step": 241010 }, { "epoch": 0.48016543414509755, "grad_norm": 0.15355074405670166, "learning_rate": 0.002, "loss": 2.5549, "step": 241020 }, { "epoch": 0.48018535636873644, "grad_norm": 0.19824723899364471, "learning_rate": 0.002, "loss": 2.5488, "step": 241030 }, { "epoch": 0.4802052785923754, "grad_norm": 0.1648206114768982, "learning_rate": 0.002, "loss": 2.5652, "step": 241040 }, { "epoch": 0.4802252008160143, "grad_norm": 0.16811883449554443, "learning_rate": 0.002, "loss": 2.5591, "step": 241050 }, { "epoch": 0.4802451230396532, "grad_norm": 0.15181703865528107, "learning_rate": 0.002, "loss": 2.549, "step": 241060 }, { "epoch": 0.4802650452632921, "grad_norm": 0.16242386400699615, "learning_rate": 0.002, "loss": 2.553, "step": 241070 }, { "epoch": 0.480284967486931, "grad_norm": 0.19558511674404144, "learning_rate": 0.002, "loss": 2.5525, "step": 241080 }, { "epoch": 0.48030488971056995, "grad_norm": 0.153625026345253, "learning_rate": 0.002, "loss": 2.5557, "step": 241090 }, { "epoch": 0.48032481193420884, "grad_norm": 0.18344387412071228, "learning_rate": 0.002, "loss": 2.5524, "step": 241100 }, { "epoch": 0.4803447341578478, "grad_norm": 0.15422604978084564, "learning_rate": 0.002, "loss": 2.5721, "step": 241110 }, { "epoch": 0.4803646563814867, "grad_norm": 0.17760175466537476, "learning_rate": 0.002, "loss": 2.5622, "step": 241120 }, { "epoch": 0.48038457860512557, "grad_norm": 0.16136574745178223, "learning_rate": 0.002, "loss": 2.559, "step": 241130 }, { "epoch": 0.4804045008287645, "grad_norm": 0.17249926924705505, "learning_rate": 0.002, "loss": 2.5452, "step": 241140 }, { "epoch": 0.4804244230524034, "grad_norm": 0.20422105491161346, "learning_rate": 0.002, "loss": 2.5455, "step": 241150 }, { "epoch": 0.48044434527604235, "grad_norm": 0.14695781469345093, "learning_rate": 0.002, "loss": 2.5498, "step": 241160 }, { "epoch": 0.48046426749968124, "grad_norm": 0.5802766680717468, "learning_rate": 0.002, "loss": 2.5447, "step": 241170 }, { "epoch": 0.48048418972332013, "grad_norm": 0.1593100130558014, "learning_rate": 0.002, "loss": 2.5506, "step": 241180 }, { "epoch": 0.4805041119469591, "grad_norm": 0.15267033874988556, "learning_rate": 0.002, "loss": 2.5601, "step": 241190 }, { "epoch": 0.48052403417059797, "grad_norm": 0.16637025773525238, "learning_rate": 0.002, "loss": 2.5328, "step": 241200 }, { "epoch": 0.4805439563942369, "grad_norm": 0.1453944444656372, "learning_rate": 0.002, "loss": 2.557, "step": 241210 }, { "epoch": 0.4805638786178758, "grad_norm": 0.16026322543621063, "learning_rate": 0.002, "loss": 2.5597, "step": 241220 }, { "epoch": 0.48058380084151475, "grad_norm": 0.1781938076019287, "learning_rate": 0.002, "loss": 2.5511, "step": 241230 }, { "epoch": 0.48060372306515364, "grad_norm": 0.15180709958076477, "learning_rate": 0.002, "loss": 2.559, "step": 241240 }, { "epoch": 0.48062364528879253, "grad_norm": 0.18343766033649445, "learning_rate": 0.002, "loss": 2.5707, "step": 241250 }, { "epoch": 0.4806435675124315, "grad_norm": 0.16029483079910278, "learning_rate": 0.002, "loss": 2.5593, "step": 241260 }, { "epoch": 0.48066348973607037, "grad_norm": 0.16356761753559113, "learning_rate": 0.002, "loss": 2.5552, "step": 241270 }, { "epoch": 0.4806834119597093, "grad_norm": 0.15845294296741486, "learning_rate": 0.002, "loss": 2.5621, "step": 241280 }, { "epoch": 0.4807033341833482, "grad_norm": 0.17570999264717102, "learning_rate": 0.002, "loss": 2.5506, "step": 241290 }, { "epoch": 0.4807232564069871, "grad_norm": 0.16666369140148163, "learning_rate": 0.002, "loss": 2.5622, "step": 241300 }, { "epoch": 0.48074317863062604, "grad_norm": 0.18234500288963318, "learning_rate": 0.002, "loss": 2.571, "step": 241310 }, { "epoch": 0.48076310085426494, "grad_norm": 0.16563373804092407, "learning_rate": 0.002, "loss": 2.5703, "step": 241320 }, { "epoch": 0.4807830230779039, "grad_norm": 0.15852901339530945, "learning_rate": 0.002, "loss": 2.5609, "step": 241330 }, { "epoch": 0.4808029453015428, "grad_norm": 0.14310169219970703, "learning_rate": 0.002, "loss": 2.5485, "step": 241340 }, { "epoch": 0.48082286752518166, "grad_norm": 0.15050779283046722, "learning_rate": 0.002, "loss": 2.556, "step": 241350 }, { "epoch": 0.4808427897488206, "grad_norm": 0.226283997297287, "learning_rate": 0.002, "loss": 2.5441, "step": 241360 }, { "epoch": 0.4808627119724595, "grad_norm": 0.15560553967952728, "learning_rate": 0.002, "loss": 2.5528, "step": 241370 }, { "epoch": 0.48088263419609845, "grad_norm": 0.2010544240474701, "learning_rate": 0.002, "loss": 2.5644, "step": 241380 }, { "epoch": 0.48090255641973734, "grad_norm": 0.15750780701637268, "learning_rate": 0.002, "loss": 2.5589, "step": 241390 }, { "epoch": 0.4809224786433763, "grad_norm": 0.1651296466588974, "learning_rate": 0.002, "loss": 2.5586, "step": 241400 }, { "epoch": 0.4809424008670152, "grad_norm": 0.18730787932872772, "learning_rate": 0.002, "loss": 2.5578, "step": 241410 }, { "epoch": 0.48096232309065406, "grad_norm": 0.17732155323028564, "learning_rate": 0.002, "loss": 2.5598, "step": 241420 }, { "epoch": 0.480982245314293, "grad_norm": 0.1526453197002411, "learning_rate": 0.002, "loss": 2.562, "step": 241430 }, { "epoch": 0.4810021675379319, "grad_norm": 0.16719584167003632, "learning_rate": 0.002, "loss": 2.5545, "step": 241440 }, { "epoch": 0.48102208976157085, "grad_norm": 0.18725192546844482, "learning_rate": 0.002, "loss": 2.5539, "step": 241450 }, { "epoch": 0.48104201198520974, "grad_norm": 0.15491704642772675, "learning_rate": 0.002, "loss": 2.5525, "step": 241460 }, { "epoch": 0.48106193420884863, "grad_norm": 0.1651488095521927, "learning_rate": 0.002, "loss": 2.5633, "step": 241470 }, { "epoch": 0.4810818564324876, "grad_norm": 0.17671692371368408, "learning_rate": 0.002, "loss": 2.5607, "step": 241480 }, { "epoch": 0.48110177865612647, "grad_norm": 0.1510787308216095, "learning_rate": 0.002, "loss": 2.5522, "step": 241490 }, { "epoch": 0.4811217008797654, "grad_norm": 0.16499567031860352, "learning_rate": 0.002, "loss": 2.5594, "step": 241500 }, { "epoch": 0.4811416231034043, "grad_norm": 0.21679070591926575, "learning_rate": 0.002, "loss": 2.5501, "step": 241510 }, { "epoch": 0.48116154532704325, "grad_norm": 0.1690482646226883, "learning_rate": 0.002, "loss": 2.562, "step": 241520 }, { "epoch": 0.48118146755068214, "grad_norm": 0.2140062004327774, "learning_rate": 0.002, "loss": 2.5496, "step": 241530 }, { "epoch": 0.48120138977432103, "grad_norm": 0.16888247430324554, "learning_rate": 0.002, "loss": 2.5646, "step": 241540 }, { "epoch": 0.48122131199796, "grad_norm": 0.15768636763095856, "learning_rate": 0.002, "loss": 2.5621, "step": 241550 }, { "epoch": 0.48124123422159887, "grad_norm": 0.18429508805274963, "learning_rate": 0.002, "loss": 2.5539, "step": 241560 }, { "epoch": 0.4812611564452378, "grad_norm": 0.18297304213047028, "learning_rate": 0.002, "loss": 2.5596, "step": 241570 }, { "epoch": 0.4812810786688767, "grad_norm": 0.17371949553489685, "learning_rate": 0.002, "loss": 2.561, "step": 241580 }, { "epoch": 0.4813010008925156, "grad_norm": 0.15748152136802673, "learning_rate": 0.002, "loss": 2.5643, "step": 241590 }, { "epoch": 0.48132092311615454, "grad_norm": 0.18734018504619598, "learning_rate": 0.002, "loss": 2.546, "step": 241600 }, { "epoch": 0.48134084533979343, "grad_norm": 0.20827937126159668, "learning_rate": 0.002, "loss": 2.5479, "step": 241610 }, { "epoch": 0.4813607675634324, "grad_norm": 0.1635572761297226, "learning_rate": 0.002, "loss": 2.5549, "step": 241620 }, { "epoch": 0.48138068978707127, "grad_norm": 0.18937847018241882, "learning_rate": 0.002, "loss": 2.567, "step": 241630 }, { "epoch": 0.48140061201071016, "grad_norm": 0.15357935428619385, "learning_rate": 0.002, "loss": 2.5618, "step": 241640 }, { "epoch": 0.4814205342343491, "grad_norm": 0.18159101903438568, "learning_rate": 0.002, "loss": 2.5527, "step": 241650 }, { "epoch": 0.481440456457988, "grad_norm": 0.15813487768173218, "learning_rate": 0.002, "loss": 2.5453, "step": 241660 }, { "epoch": 0.48146037868162694, "grad_norm": 0.20488451421260834, "learning_rate": 0.002, "loss": 2.575, "step": 241670 }, { "epoch": 0.48148030090526583, "grad_norm": 0.1966095119714737, "learning_rate": 0.002, "loss": 2.5465, "step": 241680 }, { "epoch": 0.4815002231289048, "grad_norm": 0.1621575653553009, "learning_rate": 0.002, "loss": 2.5584, "step": 241690 }, { "epoch": 0.48152014535254367, "grad_norm": 0.18703986704349518, "learning_rate": 0.002, "loss": 2.564, "step": 241700 }, { "epoch": 0.48154006757618256, "grad_norm": 0.17734107375144958, "learning_rate": 0.002, "loss": 2.5453, "step": 241710 }, { "epoch": 0.4815599897998215, "grad_norm": 0.15472157299518585, "learning_rate": 0.002, "loss": 2.5695, "step": 241720 }, { "epoch": 0.4815799120234604, "grad_norm": 0.18383291363716125, "learning_rate": 0.002, "loss": 2.5507, "step": 241730 }, { "epoch": 0.48159983424709935, "grad_norm": 0.18777190148830414, "learning_rate": 0.002, "loss": 2.5468, "step": 241740 }, { "epoch": 0.48161975647073824, "grad_norm": 0.1626344919204712, "learning_rate": 0.002, "loss": 2.5626, "step": 241750 }, { "epoch": 0.4816396786943771, "grad_norm": 0.15070883929729462, "learning_rate": 0.002, "loss": 2.547, "step": 241760 }, { "epoch": 0.4816596009180161, "grad_norm": 0.15709270536899567, "learning_rate": 0.002, "loss": 2.556, "step": 241770 }, { "epoch": 0.48167952314165496, "grad_norm": 0.19267340004444122, "learning_rate": 0.002, "loss": 2.5701, "step": 241780 }, { "epoch": 0.4816994453652939, "grad_norm": 0.1678045094013214, "learning_rate": 0.002, "loss": 2.5532, "step": 241790 }, { "epoch": 0.4817193675889328, "grad_norm": 0.1816978007555008, "learning_rate": 0.002, "loss": 2.5545, "step": 241800 }, { "epoch": 0.48173928981257175, "grad_norm": 0.17622071504592896, "learning_rate": 0.002, "loss": 2.5509, "step": 241810 }, { "epoch": 0.48175921203621064, "grad_norm": 0.17799261212348938, "learning_rate": 0.002, "loss": 2.5564, "step": 241820 }, { "epoch": 0.48177913425984953, "grad_norm": 0.1656762808561325, "learning_rate": 0.002, "loss": 2.5528, "step": 241830 }, { "epoch": 0.4817990564834885, "grad_norm": 0.14944611489772797, "learning_rate": 0.002, "loss": 2.5462, "step": 241840 }, { "epoch": 0.48181897870712737, "grad_norm": 0.18701522052288055, "learning_rate": 0.002, "loss": 2.5524, "step": 241850 }, { "epoch": 0.4818389009307663, "grad_norm": 0.14385199546813965, "learning_rate": 0.002, "loss": 2.5558, "step": 241860 }, { "epoch": 0.4818588231544052, "grad_norm": 0.19085301458835602, "learning_rate": 0.002, "loss": 2.5502, "step": 241870 }, { "epoch": 0.4818787453780441, "grad_norm": 0.16197837889194489, "learning_rate": 0.002, "loss": 2.5482, "step": 241880 }, { "epoch": 0.48189866760168304, "grad_norm": 0.17997996509075165, "learning_rate": 0.002, "loss": 2.5647, "step": 241890 }, { "epoch": 0.48191858982532193, "grad_norm": 0.16690517961978912, "learning_rate": 0.002, "loss": 2.5598, "step": 241900 }, { "epoch": 0.4819385120489609, "grad_norm": 0.16657711565494537, "learning_rate": 0.002, "loss": 2.5708, "step": 241910 }, { "epoch": 0.48195843427259977, "grad_norm": 0.15986324846744537, "learning_rate": 0.002, "loss": 2.563, "step": 241920 }, { "epoch": 0.48197835649623866, "grad_norm": 0.17701320350170135, "learning_rate": 0.002, "loss": 2.5558, "step": 241930 }, { "epoch": 0.4819982787198776, "grad_norm": 0.1710071861743927, "learning_rate": 0.002, "loss": 2.5594, "step": 241940 }, { "epoch": 0.4820182009435165, "grad_norm": 0.4414076805114746, "learning_rate": 0.002, "loss": 2.5534, "step": 241950 }, { "epoch": 0.48203812316715544, "grad_norm": 0.21056032180786133, "learning_rate": 0.002, "loss": 2.5696, "step": 241960 }, { "epoch": 0.48205804539079433, "grad_norm": 0.16869865357875824, "learning_rate": 0.002, "loss": 2.5483, "step": 241970 }, { "epoch": 0.4820779676144333, "grad_norm": 0.16103364527225494, "learning_rate": 0.002, "loss": 2.564, "step": 241980 }, { "epoch": 0.48209788983807217, "grad_norm": 0.19408220052719116, "learning_rate": 0.002, "loss": 2.5547, "step": 241990 }, { "epoch": 0.48211781206171106, "grad_norm": 0.16041851043701172, "learning_rate": 0.002, "loss": 2.5645, "step": 242000 }, { "epoch": 0.48213773428535, "grad_norm": 0.2002573311328888, "learning_rate": 0.002, "loss": 2.5741, "step": 242010 }, { "epoch": 0.4821576565089889, "grad_norm": 0.17027603089809418, "learning_rate": 0.002, "loss": 2.5553, "step": 242020 }, { "epoch": 0.48217757873262784, "grad_norm": 0.15533283352851868, "learning_rate": 0.002, "loss": 2.5557, "step": 242030 }, { "epoch": 0.48219750095626673, "grad_norm": 0.16109223663806915, "learning_rate": 0.002, "loss": 2.574, "step": 242040 }, { "epoch": 0.4822174231799056, "grad_norm": 0.18813695013523102, "learning_rate": 0.002, "loss": 2.5649, "step": 242050 }, { "epoch": 0.48223734540354457, "grad_norm": 0.15745273232460022, "learning_rate": 0.002, "loss": 2.5655, "step": 242060 }, { "epoch": 0.48225726762718346, "grad_norm": 0.1478835642337799, "learning_rate": 0.002, "loss": 2.5718, "step": 242070 }, { "epoch": 0.4822771898508224, "grad_norm": 0.19100302457809448, "learning_rate": 0.002, "loss": 2.5629, "step": 242080 }, { "epoch": 0.4822971120744613, "grad_norm": 0.17835398018360138, "learning_rate": 0.002, "loss": 2.5556, "step": 242090 }, { "epoch": 0.4823170342981002, "grad_norm": 0.13991566002368927, "learning_rate": 0.002, "loss": 2.5727, "step": 242100 }, { "epoch": 0.48233695652173914, "grad_norm": 0.1634771078824997, "learning_rate": 0.002, "loss": 2.5542, "step": 242110 }, { "epoch": 0.482356878745378, "grad_norm": 0.15525943040847778, "learning_rate": 0.002, "loss": 2.5545, "step": 242120 }, { "epoch": 0.48237680096901697, "grad_norm": 0.16997694969177246, "learning_rate": 0.002, "loss": 2.5504, "step": 242130 }, { "epoch": 0.48239672319265586, "grad_norm": 0.16045750677585602, "learning_rate": 0.002, "loss": 2.5694, "step": 242140 }, { "epoch": 0.4824166454162948, "grad_norm": 0.19634369015693665, "learning_rate": 0.002, "loss": 2.5599, "step": 242150 }, { "epoch": 0.4824365676399337, "grad_norm": 0.17905370891094208, "learning_rate": 0.002, "loss": 2.5482, "step": 242160 }, { "epoch": 0.4824564898635726, "grad_norm": 0.1473170816898346, "learning_rate": 0.002, "loss": 2.559, "step": 242170 }, { "epoch": 0.48247641208721154, "grad_norm": 0.1786259561777115, "learning_rate": 0.002, "loss": 2.5602, "step": 242180 }, { "epoch": 0.4824963343108504, "grad_norm": 0.15793471038341522, "learning_rate": 0.002, "loss": 2.5567, "step": 242190 }, { "epoch": 0.4825162565344894, "grad_norm": 0.15627218782901764, "learning_rate": 0.002, "loss": 2.5626, "step": 242200 }, { "epoch": 0.48253617875812826, "grad_norm": 0.17905791103839874, "learning_rate": 0.002, "loss": 2.5526, "step": 242210 }, { "epoch": 0.48255610098176716, "grad_norm": 0.1573525369167328, "learning_rate": 0.002, "loss": 2.5518, "step": 242220 }, { "epoch": 0.4825760232054061, "grad_norm": 0.15650640428066254, "learning_rate": 0.002, "loss": 2.5654, "step": 242230 }, { "epoch": 0.482595945429045, "grad_norm": 0.2263389229774475, "learning_rate": 0.002, "loss": 2.5782, "step": 242240 }, { "epoch": 0.48261586765268394, "grad_norm": 0.16569757461547852, "learning_rate": 0.002, "loss": 2.5553, "step": 242250 }, { "epoch": 0.48263578987632283, "grad_norm": 0.20243443548679352, "learning_rate": 0.002, "loss": 2.5677, "step": 242260 }, { "epoch": 0.4826557120999618, "grad_norm": 0.15061011910438538, "learning_rate": 0.002, "loss": 2.5532, "step": 242270 }, { "epoch": 0.48267563432360067, "grad_norm": 0.15511472523212433, "learning_rate": 0.002, "loss": 2.5582, "step": 242280 }, { "epoch": 0.48269555654723956, "grad_norm": 0.1515020728111267, "learning_rate": 0.002, "loss": 2.5599, "step": 242290 }, { "epoch": 0.4827154787708785, "grad_norm": 0.1812654584646225, "learning_rate": 0.002, "loss": 2.5639, "step": 242300 }, { "epoch": 0.4827354009945174, "grad_norm": 0.17597755789756775, "learning_rate": 0.002, "loss": 2.547, "step": 242310 }, { "epoch": 0.48275532321815634, "grad_norm": 0.2334595024585724, "learning_rate": 0.002, "loss": 2.5509, "step": 242320 }, { "epoch": 0.48277524544179523, "grad_norm": 0.14016097784042358, "learning_rate": 0.002, "loss": 2.5589, "step": 242330 }, { "epoch": 0.4827951676654341, "grad_norm": 0.14353424310684204, "learning_rate": 0.002, "loss": 2.5592, "step": 242340 }, { "epoch": 0.48281508988907307, "grad_norm": 0.1616479605436325, "learning_rate": 0.002, "loss": 2.5388, "step": 242350 }, { "epoch": 0.48283501211271196, "grad_norm": 0.2049250602722168, "learning_rate": 0.002, "loss": 2.5552, "step": 242360 }, { "epoch": 0.4828549343363509, "grad_norm": 0.1820588856935501, "learning_rate": 0.002, "loss": 2.5585, "step": 242370 }, { "epoch": 0.4828748565599898, "grad_norm": 0.1722264438867569, "learning_rate": 0.002, "loss": 2.5449, "step": 242380 }, { "epoch": 0.4828947787836287, "grad_norm": 0.15013493597507477, "learning_rate": 0.002, "loss": 2.5591, "step": 242390 }, { "epoch": 0.48291470100726763, "grad_norm": 0.16734136641025543, "learning_rate": 0.002, "loss": 2.567, "step": 242400 }, { "epoch": 0.4829346232309065, "grad_norm": 0.15221275389194489, "learning_rate": 0.002, "loss": 2.5608, "step": 242410 }, { "epoch": 0.48295454545454547, "grad_norm": 0.22364214062690735, "learning_rate": 0.002, "loss": 2.557, "step": 242420 }, { "epoch": 0.48297446767818436, "grad_norm": 0.18673349916934967, "learning_rate": 0.002, "loss": 2.5659, "step": 242430 }, { "epoch": 0.4829943899018233, "grad_norm": 0.16871032118797302, "learning_rate": 0.002, "loss": 2.5618, "step": 242440 }, { "epoch": 0.4830143121254622, "grad_norm": 0.18377187848091125, "learning_rate": 0.002, "loss": 2.5599, "step": 242450 }, { "epoch": 0.4830342343491011, "grad_norm": 0.19481800496578217, "learning_rate": 0.002, "loss": 2.5516, "step": 242460 }, { "epoch": 0.48305415657274003, "grad_norm": 0.15602412819862366, "learning_rate": 0.002, "loss": 2.5595, "step": 242470 }, { "epoch": 0.4830740787963789, "grad_norm": 0.20150363445281982, "learning_rate": 0.002, "loss": 2.5587, "step": 242480 }, { "epoch": 0.48309400102001787, "grad_norm": 0.15956063568592072, "learning_rate": 0.002, "loss": 2.5332, "step": 242490 }, { "epoch": 0.48311392324365676, "grad_norm": 0.1857500672340393, "learning_rate": 0.002, "loss": 2.5662, "step": 242500 }, { "epoch": 0.48313384546729565, "grad_norm": 0.1740289330482483, "learning_rate": 0.002, "loss": 2.5552, "step": 242510 }, { "epoch": 0.4831537676909346, "grad_norm": 0.166800394654274, "learning_rate": 0.002, "loss": 2.565, "step": 242520 }, { "epoch": 0.4831736899145735, "grad_norm": 0.16698375344276428, "learning_rate": 0.002, "loss": 2.5596, "step": 242530 }, { "epoch": 0.48319361213821244, "grad_norm": 0.14344504475593567, "learning_rate": 0.002, "loss": 2.5604, "step": 242540 }, { "epoch": 0.4832135343618513, "grad_norm": 0.14897757768630981, "learning_rate": 0.002, "loss": 2.5525, "step": 242550 }, { "epoch": 0.4832334565854903, "grad_norm": 0.19683799147605896, "learning_rate": 0.002, "loss": 2.5613, "step": 242560 }, { "epoch": 0.48325337880912916, "grad_norm": 0.18837913870811462, "learning_rate": 0.002, "loss": 2.5449, "step": 242570 }, { "epoch": 0.48327330103276805, "grad_norm": 0.14350616931915283, "learning_rate": 0.002, "loss": 2.5472, "step": 242580 }, { "epoch": 0.483293223256407, "grad_norm": 0.1633325219154358, "learning_rate": 0.002, "loss": 2.5618, "step": 242590 }, { "epoch": 0.4833131454800459, "grad_norm": 0.19162432849407196, "learning_rate": 0.002, "loss": 2.5489, "step": 242600 }, { "epoch": 0.48333306770368484, "grad_norm": 0.16766254603862762, "learning_rate": 0.002, "loss": 2.5622, "step": 242610 }, { "epoch": 0.48335298992732373, "grad_norm": 0.13899317383766174, "learning_rate": 0.002, "loss": 2.5714, "step": 242620 }, { "epoch": 0.4833729121509626, "grad_norm": 0.17703533172607422, "learning_rate": 0.002, "loss": 2.5481, "step": 242630 }, { "epoch": 0.48339283437460157, "grad_norm": 0.1512773334980011, "learning_rate": 0.002, "loss": 2.5399, "step": 242640 }, { "epoch": 0.48341275659824046, "grad_norm": 0.1720135360956192, "learning_rate": 0.002, "loss": 2.5627, "step": 242650 }, { "epoch": 0.4834326788218794, "grad_norm": 0.16734488308429718, "learning_rate": 0.002, "loss": 2.5488, "step": 242660 }, { "epoch": 0.4834526010455183, "grad_norm": 0.1344163864850998, "learning_rate": 0.002, "loss": 2.5495, "step": 242670 }, { "epoch": 0.4834725232691572, "grad_norm": 0.1640438735485077, "learning_rate": 0.002, "loss": 2.5582, "step": 242680 }, { "epoch": 0.48349244549279613, "grad_norm": 0.1583796739578247, "learning_rate": 0.002, "loss": 2.5522, "step": 242690 }, { "epoch": 0.483512367716435, "grad_norm": 0.15869922935962677, "learning_rate": 0.002, "loss": 2.5574, "step": 242700 }, { "epoch": 0.48353228994007397, "grad_norm": 0.1684013307094574, "learning_rate": 0.002, "loss": 2.5396, "step": 242710 }, { "epoch": 0.48355221216371286, "grad_norm": 0.1895112544298172, "learning_rate": 0.002, "loss": 2.5451, "step": 242720 }, { "epoch": 0.4835721343873518, "grad_norm": 0.16358493268489838, "learning_rate": 0.002, "loss": 2.542, "step": 242730 }, { "epoch": 0.4835920566109907, "grad_norm": 0.14328312873840332, "learning_rate": 0.002, "loss": 2.5439, "step": 242740 }, { "epoch": 0.4836119788346296, "grad_norm": 0.16615034639835358, "learning_rate": 0.002, "loss": 2.5803, "step": 242750 }, { "epoch": 0.48363190105826853, "grad_norm": 0.1743859350681305, "learning_rate": 0.002, "loss": 2.5628, "step": 242760 }, { "epoch": 0.4836518232819074, "grad_norm": 0.16422036290168762, "learning_rate": 0.002, "loss": 2.5739, "step": 242770 }, { "epoch": 0.48367174550554637, "grad_norm": 0.19275222718715668, "learning_rate": 0.002, "loss": 2.553, "step": 242780 }, { "epoch": 0.48369166772918526, "grad_norm": 0.1707279533147812, "learning_rate": 0.002, "loss": 2.5447, "step": 242790 }, { "epoch": 0.48371158995282415, "grad_norm": 0.17250265181064606, "learning_rate": 0.002, "loss": 2.5618, "step": 242800 }, { "epoch": 0.4837315121764631, "grad_norm": 0.19513151049613953, "learning_rate": 0.002, "loss": 2.5632, "step": 242810 }, { "epoch": 0.483751434400102, "grad_norm": 0.15213412046432495, "learning_rate": 0.002, "loss": 2.5488, "step": 242820 }, { "epoch": 0.48377135662374093, "grad_norm": 0.15723085403442383, "learning_rate": 0.002, "loss": 2.5558, "step": 242830 }, { "epoch": 0.4837912788473798, "grad_norm": 0.18153467774391174, "learning_rate": 0.002, "loss": 2.5422, "step": 242840 }, { "epoch": 0.48381120107101877, "grad_norm": 0.19134621322155, "learning_rate": 0.002, "loss": 2.5535, "step": 242850 }, { "epoch": 0.48383112329465766, "grad_norm": 0.19311316311359406, "learning_rate": 0.002, "loss": 2.5617, "step": 242860 }, { "epoch": 0.48385104551829655, "grad_norm": 0.15499579906463623, "learning_rate": 0.002, "loss": 2.5504, "step": 242870 }, { "epoch": 0.4838709677419355, "grad_norm": 0.1704588532447815, "learning_rate": 0.002, "loss": 2.5511, "step": 242880 }, { "epoch": 0.4838908899655744, "grad_norm": 0.15524345636367798, "learning_rate": 0.002, "loss": 2.5563, "step": 242890 }, { "epoch": 0.48391081218921334, "grad_norm": 0.1460874378681183, "learning_rate": 0.002, "loss": 2.5552, "step": 242900 }, { "epoch": 0.4839307344128522, "grad_norm": 0.1602761447429657, "learning_rate": 0.002, "loss": 2.5686, "step": 242910 }, { "epoch": 0.4839506566364911, "grad_norm": 0.2038916051387787, "learning_rate": 0.002, "loss": 2.5693, "step": 242920 }, { "epoch": 0.48397057886013006, "grad_norm": 0.1428433656692505, "learning_rate": 0.002, "loss": 2.5551, "step": 242930 }, { "epoch": 0.48399050108376895, "grad_norm": 0.14044292271137238, "learning_rate": 0.002, "loss": 2.5569, "step": 242940 }, { "epoch": 0.4840104233074079, "grad_norm": 0.15668366849422455, "learning_rate": 0.002, "loss": 2.5422, "step": 242950 }, { "epoch": 0.4840303455310468, "grad_norm": 0.17969034612178802, "learning_rate": 0.002, "loss": 2.5467, "step": 242960 }, { "epoch": 0.4840502677546857, "grad_norm": 0.16040967404842377, "learning_rate": 0.002, "loss": 2.5598, "step": 242970 }, { "epoch": 0.4840701899783246, "grad_norm": 0.18462388217449188, "learning_rate": 0.002, "loss": 2.5546, "step": 242980 }, { "epoch": 0.4840901122019635, "grad_norm": 0.16531208157539368, "learning_rate": 0.002, "loss": 2.5733, "step": 242990 }, { "epoch": 0.48411003442560246, "grad_norm": 0.20201286673545837, "learning_rate": 0.002, "loss": 2.559, "step": 243000 }, { "epoch": 0.48412995664924136, "grad_norm": 0.15263603627681732, "learning_rate": 0.002, "loss": 2.5676, "step": 243010 }, { "epoch": 0.4841498788728803, "grad_norm": 0.16102375090122223, "learning_rate": 0.002, "loss": 2.5542, "step": 243020 }, { "epoch": 0.4841698010965192, "grad_norm": 0.1703743189573288, "learning_rate": 0.002, "loss": 2.5684, "step": 243030 }, { "epoch": 0.4841897233201581, "grad_norm": 0.17196670174598694, "learning_rate": 0.002, "loss": 2.5429, "step": 243040 }, { "epoch": 0.48420964554379703, "grad_norm": 0.18353550136089325, "learning_rate": 0.002, "loss": 2.5487, "step": 243050 }, { "epoch": 0.4842295677674359, "grad_norm": 0.16323405504226685, "learning_rate": 0.002, "loss": 2.5534, "step": 243060 }, { "epoch": 0.48424948999107487, "grad_norm": 0.17892001569271088, "learning_rate": 0.002, "loss": 2.5586, "step": 243070 }, { "epoch": 0.48426941221471376, "grad_norm": 0.14804071187973022, "learning_rate": 0.002, "loss": 2.5447, "step": 243080 }, { "epoch": 0.48428933443835265, "grad_norm": 0.1393643170595169, "learning_rate": 0.002, "loss": 2.5533, "step": 243090 }, { "epoch": 0.4843092566619916, "grad_norm": 0.1878342181444168, "learning_rate": 0.002, "loss": 2.5516, "step": 243100 }, { "epoch": 0.4843291788856305, "grad_norm": 0.1581593155860901, "learning_rate": 0.002, "loss": 2.571, "step": 243110 }, { "epoch": 0.48434910110926943, "grad_norm": 0.16541172564029694, "learning_rate": 0.002, "loss": 2.5558, "step": 243120 }, { "epoch": 0.4843690233329083, "grad_norm": 0.19158779084682465, "learning_rate": 0.002, "loss": 2.5621, "step": 243130 }, { "epoch": 0.4843889455565472, "grad_norm": 0.16092996299266815, "learning_rate": 0.002, "loss": 2.5622, "step": 243140 }, { "epoch": 0.48440886778018616, "grad_norm": 0.190511092543602, "learning_rate": 0.002, "loss": 2.5702, "step": 243150 }, { "epoch": 0.48442879000382505, "grad_norm": 0.1704127937555313, "learning_rate": 0.002, "loss": 2.5554, "step": 243160 }, { "epoch": 0.484448712227464, "grad_norm": 0.19381499290466309, "learning_rate": 0.002, "loss": 2.5593, "step": 243170 }, { "epoch": 0.4844686344511029, "grad_norm": 0.13955457508563995, "learning_rate": 0.002, "loss": 2.5637, "step": 243180 }, { "epoch": 0.48448855667474183, "grad_norm": 0.1609272062778473, "learning_rate": 0.002, "loss": 2.5703, "step": 243190 }, { "epoch": 0.4845084788983807, "grad_norm": 0.1545650213956833, "learning_rate": 0.002, "loss": 2.5539, "step": 243200 }, { "epoch": 0.4845284011220196, "grad_norm": 0.1618167757987976, "learning_rate": 0.002, "loss": 2.548, "step": 243210 }, { "epoch": 0.48454832334565856, "grad_norm": 0.19733509421348572, "learning_rate": 0.002, "loss": 2.5703, "step": 243220 }, { "epoch": 0.48456824556929745, "grad_norm": 0.15951408445835114, "learning_rate": 0.002, "loss": 2.5559, "step": 243230 }, { "epoch": 0.4845881677929364, "grad_norm": 0.17461620271205902, "learning_rate": 0.002, "loss": 2.5562, "step": 243240 }, { "epoch": 0.4846080900165753, "grad_norm": 0.19100773334503174, "learning_rate": 0.002, "loss": 2.5513, "step": 243250 }, { "epoch": 0.4846280122402142, "grad_norm": 0.1541963815689087, "learning_rate": 0.002, "loss": 2.5535, "step": 243260 }, { "epoch": 0.4846479344638531, "grad_norm": 0.16238069534301758, "learning_rate": 0.002, "loss": 2.5554, "step": 243270 }, { "epoch": 0.484667856687492, "grad_norm": 0.21106719970703125, "learning_rate": 0.002, "loss": 2.5542, "step": 243280 }, { "epoch": 0.48468777891113096, "grad_norm": 0.15603920817375183, "learning_rate": 0.002, "loss": 2.5724, "step": 243290 }, { "epoch": 0.48470770113476985, "grad_norm": 0.1516238898038864, "learning_rate": 0.002, "loss": 2.5501, "step": 243300 }, { "epoch": 0.4847276233584088, "grad_norm": 0.1684318333864212, "learning_rate": 0.002, "loss": 2.5439, "step": 243310 }, { "epoch": 0.4847475455820477, "grad_norm": 0.16929368674755096, "learning_rate": 0.002, "loss": 2.5521, "step": 243320 }, { "epoch": 0.4847674678056866, "grad_norm": 0.1752917766571045, "learning_rate": 0.002, "loss": 2.5514, "step": 243330 }, { "epoch": 0.4847873900293255, "grad_norm": 0.1781092882156372, "learning_rate": 0.002, "loss": 2.5568, "step": 243340 }, { "epoch": 0.4848073122529644, "grad_norm": 0.17239055037498474, "learning_rate": 0.002, "loss": 2.5534, "step": 243350 }, { "epoch": 0.48482723447660336, "grad_norm": 0.1792202591896057, "learning_rate": 0.002, "loss": 2.5568, "step": 243360 }, { "epoch": 0.48484715670024225, "grad_norm": 0.18137671053409576, "learning_rate": 0.002, "loss": 2.5586, "step": 243370 }, { "epoch": 0.48486707892388115, "grad_norm": 0.1830470710992813, "learning_rate": 0.002, "loss": 2.5454, "step": 243380 }, { "epoch": 0.4848870011475201, "grad_norm": 0.1822129786014557, "learning_rate": 0.002, "loss": 2.5638, "step": 243390 }, { "epoch": 0.484906923371159, "grad_norm": 0.17638371884822845, "learning_rate": 0.002, "loss": 2.5486, "step": 243400 }, { "epoch": 0.48492684559479793, "grad_norm": 0.1765284538269043, "learning_rate": 0.002, "loss": 2.558, "step": 243410 }, { "epoch": 0.4849467678184368, "grad_norm": 0.1912441998720169, "learning_rate": 0.002, "loss": 2.5424, "step": 243420 }, { "epoch": 0.4849666900420757, "grad_norm": 0.1571609377861023, "learning_rate": 0.002, "loss": 2.5588, "step": 243430 }, { "epoch": 0.48498661226571466, "grad_norm": 0.17320337891578674, "learning_rate": 0.002, "loss": 2.5475, "step": 243440 }, { "epoch": 0.48500653448935355, "grad_norm": 0.17115093767642975, "learning_rate": 0.002, "loss": 2.5606, "step": 243450 }, { "epoch": 0.4850264567129925, "grad_norm": 0.15194718539714813, "learning_rate": 0.002, "loss": 2.561, "step": 243460 }, { "epoch": 0.4850463789366314, "grad_norm": 0.15913605690002441, "learning_rate": 0.002, "loss": 2.5525, "step": 243470 }, { "epoch": 0.48506630116027033, "grad_norm": 0.14386829733848572, "learning_rate": 0.002, "loss": 2.5427, "step": 243480 }, { "epoch": 0.4850862233839092, "grad_norm": 0.18337306380271912, "learning_rate": 0.002, "loss": 2.5563, "step": 243490 }, { "epoch": 0.4851061456075481, "grad_norm": 0.14562316238880157, "learning_rate": 0.002, "loss": 2.5466, "step": 243500 }, { "epoch": 0.48512606783118706, "grad_norm": 0.18488718569278717, "learning_rate": 0.002, "loss": 2.5491, "step": 243510 }, { "epoch": 0.48514599005482595, "grad_norm": 0.16574858129024506, "learning_rate": 0.002, "loss": 2.5811, "step": 243520 }, { "epoch": 0.4851659122784649, "grad_norm": 0.18131981790065765, "learning_rate": 0.002, "loss": 2.5518, "step": 243530 }, { "epoch": 0.4851858345021038, "grad_norm": 0.15334074199199677, "learning_rate": 0.002, "loss": 2.5536, "step": 243540 }, { "epoch": 0.4852057567257427, "grad_norm": 0.17359115183353424, "learning_rate": 0.002, "loss": 2.5515, "step": 243550 }, { "epoch": 0.4852256789493816, "grad_norm": 0.2348618358373642, "learning_rate": 0.002, "loss": 2.5593, "step": 243560 }, { "epoch": 0.4852456011730205, "grad_norm": 0.1425279676914215, "learning_rate": 0.002, "loss": 2.5571, "step": 243570 }, { "epoch": 0.48526552339665946, "grad_norm": 0.20703008770942688, "learning_rate": 0.002, "loss": 2.5414, "step": 243580 }, { "epoch": 0.48528544562029835, "grad_norm": 0.1636437028646469, "learning_rate": 0.002, "loss": 2.559, "step": 243590 }, { "epoch": 0.4853053678439373, "grad_norm": 0.17717573046684265, "learning_rate": 0.002, "loss": 2.5609, "step": 243600 }, { "epoch": 0.4853252900675762, "grad_norm": 0.18255190551280975, "learning_rate": 0.002, "loss": 2.5573, "step": 243610 }, { "epoch": 0.4853452122912151, "grad_norm": 0.17586004734039307, "learning_rate": 0.002, "loss": 2.5545, "step": 243620 }, { "epoch": 0.485365134514854, "grad_norm": 0.15542879700660706, "learning_rate": 0.002, "loss": 2.5574, "step": 243630 }, { "epoch": 0.4853850567384929, "grad_norm": 0.15072746574878693, "learning_rate": 0.002, "loss": 2.5584, "step": 243640 }, { "epoch": 0.48540497896213186, "grad_norm": 0.16271422803401947, "learning_rate": 0.002, "loss": 2.5587, "step": 243650 }, { "epoch": 0.48542490118577075, "grad_norm": 0.190350741147995, "learning_rate": 0.002, "loss": 2.5669, "step": 243660 }, { "epoch": 0.48544482340940964, "grad_norm": 0.1669577807188034, "learning_rate": 0.002, "loss": 2.5576, "step": 243670 }, { "epoch": 0.4854647456330486, "grad_norm": 0.15094693005084991, "learning_rate": 0.002, "loss": 2.5561, "step": 243680 }, { "epoch": 0.4854846678566875, "grad_norm": 0.14922338724136353, "learning_rate": 0.002, "loss": 2.5669, "step": 243690 }, { "epoch": 0.4855045900803264, "grad_norm": 0.15670078992843628, "learning_rate": 0.002, "loss": 2.5614, "step": 243700 }, { "epoch": 0.4855245123039653, "grad_norm": 0.22816146910190582, "learning_rate": 0.002, "loss": 2.5513, "step": 243710 }, { "epoch": 0.4855444345276042, "grad_norm": 0.16098111867904663, "learning_rate": 0.002, "loss": 2.5557, "step": 243720 }, { "epoch": 0.48556435675124315, "grad_norm": 0.21859823167324066, "learning_rate": 0.002, "loss": 2.5623, "step": 243730 }, { "epoch": 0.48558427897488204, "grad_norm": 0.1806020438671112, "learning_rate": 0.002, "loss": 2.5699, "step": 243740 }, { "epoch": 0.485604201198521, "grad_norm": 0.17112216353416443, "learning_rate": 0.002, "loss": 2.5689, "step": 243750 }, { "epoch": 0.4856241234221599, "grad_norm": 0.15109603106975555, "learning_rate": 0.002, "loss": 2.5629, "step": 243760 }, { "epoch": 0.4856440456457988, "grad_norm": 0.2121289223432541, "learning_rate": 0.002, "loss": 2.5565, "step": 243770 }, { "epoch": 0.4856639678694377, "grad_norm": 0.15538720786571503, "learning_rate": 0.002, "loss": 2.5592, "step": 243780 }, { "epoch": 0.4856838900930766, "grad_norm": 0.1855466067790985, "learning_rate": 0.002, "loss": 2.5581, "step": 243790 }, { "epoch": 0.48570381231671556, "grad_norm": 0.16812069714069366, "learning_rate": 0.002, "loss": 2.5641, "step": 243800 }, { "epoch": 0.48572373454035445, "grad_norm": 0.1677890568971634, "learning_rate": 0.002, "loss": 2.5612, "step": 243810 }, { "epoch": 0.4857436567639934, "grad_norm": 0.2318747192621231, "learning_rate": 0.002, "loss": 2.5614, "step": 243820 }, { "epoch": 0.4857635789876323, "grad_norm": 0.14841604232788086, "learning_rate": 0.002, "loss": 2.5569, "step": 243830 }, { "epoch": 0.4857835012112712, "grad_norm": 0.1655435562133789, "learning_rate": 0.002, "loss": 2.5482, "step": 243840 }, { "epoch": 0.4858034234349101, "grad_norm": 0.14902834594249725, "learning_rate": 0.002, "loss": 2.552, "step": 243850 }, { "epoch": 0.485823345658549, "grad_norm": 0.1936999261379242, "learning_rate": 0.002, "loss": 2.556, "step": 243860 }, { "epoch": 0.48584326788218796, "grad_norm": 0.1844162493944168, "learning_rate": 0.002, "loss": 2.5696, "step": 243870 }, { "epoch": 0.48586319010582685, "grad_norm": 0.16961050033569336, "learning_rate": 0.002, "loss": 2.5535, "step": 243880 }, { "epoch": 0.48588311232946574, "grad_norm": 0.17433273792266846, "learning_rate": 0.002, "loss": 2.5558, "step": 243890 }, { "epoch": 0.4859030345531047, "grad_norm": 0.1492019146680832, "learning_rate": 0.002, "loss": 2.561, "step": 243900 }, { "epoch": 0.4859229567767436, "grad_norm": 0.14648859202861786, "learning_rate": 0.002, "loss": 2.5631, "step": 243910 }, { "epoch": 0.4859428790003825, "grad_norm": 0.16212886571884155, "learning_rate": 0.002, "loss": 2.5644, "step": 243920 }, { "epoch": 0.4859628012240214, "grad_norm": 0.15274778008460999, "learning_rate": 0.002, "loss": 2.549, "step": 243930 }, { "epoch": 0.48598272344766036, "grad_norm": 0.15814703702926636, "learning_rate": 0.002, "loss": 2.5551, "step": 243940 }, { "epoch": 0.48600264567129925, "grad_norm": 0.16832704842090607, "learning_rate": 0.002, "loss": 2.5582, "step": 243950 }, { "epoch": 0.48602256789493814, "grad_norm": 0.18497061729431152, "learning_rate": 0.002, "loss": 2.5543, "step": 243960 }, { "epoch": 0.4860424901185771, "grad_norm": 0.1628459244966507, "learning_rate": 0.002, "loss": 2.5471, "step": 243970 }, { "epoch": 0.486062412342216, "grad_norm": 0.19202005863189697, "learning_rate": 0.002, "loss": 2.5793, "step": 243980 }, { "epoch": 0.4860823345658549, "grad_norm": 0.16023364663124084, "learning_rate": 0.002, "loss": 2.5476, "step": 243990 }, { "epoch": 0.4861022567894938, "grad_norm": 0.16706492006778717, "learning_rate": 0.002, "loss": 2.56, "step": 244000 }, { "epoch": 0.4861221790131327, "grad_norm": 0.18102841079235077, "learning_rate": 0.002, "loss": 2.5708, "step": 244010 }, { "epoch": 0.48614210123677165, "grad_norm": 0.2219690978527069, "learning_rate": 0.002, "loss": 2.5547, "step": 244020 }, { "epoch": 0.48616202346041054, "grad_norm": 0.14518281817436218, "learning_rate": 0.002, "loss": 2.5684, "step": 244030 }, { "epoch": 0.4861819456840495, "grad_norm": 0.17407551407814026, "learning_rate": 0.002, "loss": 2.5481, "step": 244040 }, { "epoch": 0.4862018679076884, "grad_norm": 0.17716658115386963, "learning_rate": 0.002, "loss": 2.5589, "step": 244050 }, { "epoch": 0.4862217901313273, "grad_norm": 0.1803881973028183, "learning_rate": 0.002, "loss": 2.5528, "step": 244060 }, { "epoch": 0.4862417123549662, "grad_norm": 0.15906858444213867, "learning_rate": 0.002, "loss": 2.5497, "step": 244070 }, { "epoch": 0.4862616345786051, "grad_norm": 0.15761876106262207, "learning_rate": 0.002, "loss": 2.5587, "step": 244080 }, { "epoch": 0.48628155680224405, "grad_norm": 0.17396007478237152, "learning_rate": 0.002, "loss": 2.5456, "step": 244090 }, { "epoch": 0.48630147902588294, "grad_norm": 0.19757701456546783, "learning_rate": 0.002, "loss": 2.5551, "step": 244100 }, { "epoch": 0.4863214012495219, "grad_norm": 0.14409805834293365, "learning_rate": 0.002, "loss": 2.5629, "step": 244110 }, { "epoch": 0.4863413234731608, "grad_norm": 0.18099328875541687, "learning_rate": 0.002, "loss": 2.536, "step": 244120 }, { "epoch": 0.48636124569679967, "grad_norm": 0.15153250098228455, "learning_rate": 0.002, "loss": 2.5538, "step": 244130 }, { "epoch": 0.4863811679204386, "grad_norm": 0.17599137127399445, "learning_rate": 0.002, "loss": 2.5708, "step": 244140 }, { "epoch": 0.4864010901440775, "grad_norm": 0.15340696275234222, "learning_rate": 0.002, "loss": 2.5601, "step": 244150 }, { "epoch": 0.48642101236771645, "grad_norm": 0.14975741505622864, "learning_rate": 0.002, "loss": 2.5549, "step": 244160 }, { "epoch": 0.48644093459135535, "grad_norm": 0.16599929332733154, "learning_rate": 0.002, "loss": 2.5452, "step": 244170 }, { "epoch": 0.48646085681499424, "grad_norm": 0.18410883843898773, "learning_rate": 0.002, "loss": 2.569, "step": 244180 }, { "epoch": 0.4864807790386332, "grad_norm": 0.15443120896816254, "learning_rate": 0.002, "loss": 2.5453, "step": 244190 }, { "epoch": 0.4865007012622721, "grad_norm": 0.16207407414913177, "learning_rate": 0.002, "loss": 2.5628, "step": 244200 }, { "epoch": 0.486520623485911, "grad_norm": 0.15307773649692535, "learning_rate": 0.002, "loss": 2.5486, "step": 244210 }, { "epoch": 0.4865405457095499, "grad_norm": 0.23535208404064178, "learning_rate": 0.002, "loss": 2.5533, "step": 244220 }, { "epoch": 0.48656046793318886, "grad_norm": 0.13804574310779572, "learning_rate": 0.002, "loss": 2.5688, "step": 244230 }, { "epoch": 0.48658039015682775, "grad_norm": 0.165349543094635, "learning_rate": 0.002, "loss": 2.5597, "step": 244240 }, { "epoch": 0.48660031238046664, "grad_norm": 0.1592189073562622, "learning_rate": 0.002, "loss": 2.551, "step": 244250 }, { "epoch": 0.4866202346041056, "grad_norm": 0.15363940596580505, "learning_rate": 0.002, "loss": 2.5592, "step": 244260 }, { "epoch": 0.4866401568277445, "grad_norm": 0.8574919700622559, "learning_rate": 0.002, "loss": 2.5609, "step": 244270 }, { "epoch": 0.4866600790513834, "grad_norm": 0.1874454915523529, "learning_rate": 0.002, "loss": 2.6064, "step": 244280 }, { "epoch": 0.4866800012750223, "grad_norm": 0.12558698654174805, "learning_rate": 0.002, "loss": 2.588, "step": 244290 }, { "epoch": 0.4866999234986612, "grad_norm": 0.13156452775001526, "learning_rate": 0.002, "loss": 2.5597, "step": 244300 }, { "epoch": 0.48671984572230015, "grad_norm": 0.18164387345314026, "learning_rate": 0.002, "loss": 2.5625, "step": 244310 }, { "epoch": 0.48673976794593904, "grad_norm": 0.13619916141033173, "learning_rate": 0.002, "loss": 2.5615, "step": 244320 }, { "epoch": 0.486759690169578, "grad_norm": 0.16391931474208832, "learning_rate": 0.002, "loss": 2.5476, "step": 244330 }, { "epoch": 0.4867796123932169, "grad_norm": 0.1648901402950287, "learning_rate": 0.002, "loss": 2.5698, "step": 244340 }, { "epoch": 0.4867995346168558, "grad_norm": 0.16116951406002045, "learning_rate": 0.002, "loss": 2.5631, "step": 244350 }, { "epoch": 0.4868194568404947, "grad_norm": 0.1764230728149414, "learning_rate": 0.002, "loss": 2.542, "step": 244360 }, { "epoch": 0.4868393790641336, "grad_norm": 0.14795267581939697, "learning_rate": 0.002, "loss": 2.5488, "step": 244370 }, { "epoch": 0.48685930128777255, "grad_norm": 0.1595456749200821, "learning_rate": 0.002, "loss": 2.5615, "step": 244380 }, { "epoch": 0.48687922351141144, "grad_norm": 0.19350579380989075, "learning_rate": 0.002, "loss": 2.5539, "step": 244390 }, { "epoch": 0.4868991457350504, "grad_norm": 0.14327619969844818, "learning_rate": 0.002, "loss": 2.5453, "step": 244400 }, { "epoch": 0.4869190679586893, "grad_norm": 0.16782987117767334, "learning_rate": 0.002, "loss": 2.5656, "step": 244410 }, { "epoch": 0.48693899018232817, "grad_norm": 0.15481388568878174, "learning_rate": 0.002, "loss": 2.5571, "step": 244420 }, { "epoch": 0.4869589124059671, "grad_norm": 0.15940527617931366, "learning_rate": 0.002, "loss": 2.547, "step": 244430 }, { "epoch": 0.486978834629606, "grad_norm": 0.1829177588224411, "learning_rate": 0.002, "loss": 2.5587, "step": 244440 }, { "epoch": 0.48699875685324495, "grad_norm": 0.15087471902370453, "learning_rate": 0.002, "loss": 2.5481, "step": 244450 }, { "epoch": 0.48701867907688384, "grad_norm": 0.1681654304265976, "learning_rate": 0.002, "loss": 2.5598, "step": 244460 }, { "epoch": 0.48703860130052273, "grad_norm": 0.14317578077316284, "learning_rate": 0.002, "loss": 2.5582, "step": 244470 }, { "epoch": 0.4870585235241617, "grad_norm": 0.1429426223039627, "learning_rate": 0.002, "loss": 2.5576, "step": 244480 }, { "epoch": 0.48707844574780057, "grad_norm": 0.16981132328510284, "learning_rate": 0.002, "loss": 2.5584, "step": 244490 }, { "epoch": 0.4870983679714395, "grad_norm": 0.21567298471927643, "learning_rate": 0.002, "loss": 2.5449, "step": 244500 }, { "epoch": 0.4871182901950784, "grad_norm": 0.17078375816345215, "learning_rate": 0.002, "loss": 2.5472, "step": 244510 }, { "epoch": 0.48713821241871735, "grad_norm": 0.14604102075099945, "learning_rate": 0.002, "loss": 2.5563, "step": 244520 }, { "epoch": 0.48715813464235624, "grad_norm": 0.16939957439899445, "learning_rate": 0.002, "loss": 2.5483, "step": 244530 }, { "epoch": 0.48717805686599513, "grad_norm": 0.15617135167121887, "learning_rate": 0.002, "loss": 2.5692, "step": 244540 }, { "epoch": 0.4871979790896341, "grad_norm": 0.18258751928806305, "learning_rate": 0.002, "loss": 2.5635, "step": 244550 }, { "epoch": 0.48721790131327297, "grad_norm": 0.15122897922992706, "learning_rate": 0.002, "loss": 2.571, "step": 244560 }, { "epoch": 0.4872378235369119, "grad_norm": 0.1624930053949356, "learning_rate": 0.002, "loss": 2.5658, "step": 244570 }, { "epoch": 0.4872577457605508, "grad_norm": 0.185356006026268, "learning_rate": 0.002, "loss": 2.5672, "step": 244580 }, { "epoch": 0.4872776679841897, "grad_norm": 0.15926730632781982, "learning_rate": 0.002, "loss": 2.5565, "step": 244590 }, { "epoch": 0.48729759020782865, "grad_norm": 0.1463940590620041, "learning_rate": 0.002, "loss": 2.5525, "step": 244600 }, { "epoch": 0.48731751243146754, "grad_norm": 0.21577464044094086, "learning_rate": 0.002, "loss": 2.5551, "step": 244610 }, { "epoch": 0.4873374346551065, "grad_norm": 0.1686210036277771, "learning_rate": 0.002, "loss": 2.5458, "step": 244620 }, { "epoch": 0.4873573568787454, "grad_norm": 0.15993890166282654, "learning_rate": 0.002, "loss": 2.5674, "step": 244630 }, { "epoch": 0.48737727910238426, "grad_norm": 0.1807023137807846, "learning_rate": 0.002, "loss": 2.5516, "step": 244640 }, { "epoch": 0.4873972013260232, "grad_norm": 0.17586421966552734, "learning_rate": 0.002, "loss": 2.5456, "step": 244650 }, { "epoch": 0.4874171235496621, "grad_norm": 0.1717759370803833, "learning_rate": 0.002, "loss": 2.565, "step": 244660 }, { "epoch": 0.48743704577330105, "grad_norm": 0.15805143117904663, "learning_rate": 0.002, "loss": 2.5448, "step": 244670 }, { "epoch": 0.48745696799693994, "grad_norm": 0.16642726957798004, "learning_rate": 0.002, "loss": 2.5545, "step": 244680 }, { "epoch": 0.4874768902205789, "grad_norm": 0.19358928501605988, "learning_rate": 0.002, "loss": 2.5618, "step": 244690 }, { "epoch": 0.4874968124442178, "grad_norm": 0.16430865228176117, "learning_rate": 0.002, "loss": 2.564, "step": 244700 }, { "epoch": 0.48751673466785667, "grad_norm": 0.16287583112716675, "learning_rate": 0.002, "loss": 2.561, "step": 244710 }, { "epoch": 0.4875366568914956, "grad_norm": 0.18179406225681305, "learning_rate": 0.002, "loss": 2.5726, "step": 244720 }, { "epoch": 0.4875565791151345, "grad_norm": 0.16627246141433716, "learning_rate": 0.002, "loss": 2.5757, "step": 244730 }, { "epoch": 0.48757650133877345, "grad_norm": 0.190920889377594, "learning_rate": 0.002, "loss": 2.5631, "step": 244740 }, { "epoch": 0.48759642356241234, "grad_norm": 0.18115368485450745, "learning_rate": 0.002, "loss": 2.5625, "step": 244750 }, { "epoch": 0.48761634578605123, "grad_norm": 0.19512124359607697, "learning_rate": 0.002, "loss": 2.5613, "step": 244760 }, { "epoch": 0.4876362680096902, "grad_norm": 0.13997036218643188, "learning_rate": 0.002, "loss": 2.541, "step": 244770 }, { "epoch": 0.48765619023332907, "grad_norm": 0.15964049100875854, "learning_rate": 0.002, "loss": 2.5575, "step": 244780 }, { "epoch": 0.487676112456968, "grad_norm": 0.20294170081615448, "learning_rate": 0.002, "loss": 2.5625, "step": 244790 }, { "epoch": 0.4876960346806069, "grad_norm": 0.1751004159450531, "learning_rate": 0.002, "loss": 2.5571, "step": 244800 }, { "epoch": 0.48771595690424585, "grad_norm": 0.1527380794286728, "learning_rate": 0.002, "loss": 2.5561, "step": 244810 }, { "epoch": 0.48773587912788474, "grad_norm": 0.18251605331897736, "learning_rate": 0.002, "loss": 2.5526, "step": 244820 }, { "epoch": 0.48775580135152363, "grad_norm": 0.18387077748775482, "learning_rate": 0.002, "loss": 2.5692, "step": 244830 }, { "epoch": 0.4877757235751626, "grad_norm": 0.1512122005224228, "learning_rate": 0.002, "loss": 2.5614, "step": 244840 }, { "epoch": 0.48779564579880147, "grad_norm": 0.16695134341716766, "learning_rate": 0.002, "loss": 2.5716, "step": 244850 }, { "epoch": 0.4878155680224404, "grad_norm": 0.1707516461610794, "learning_rate": 0.002, "loss": 2.542, "step": 244860 }, { "epoch": 0.4878354902460793, "grad_norm": 0.16650977730751038, "learning_rate": 0.002, "loss": 2.562, "step": 244870 }, { "epoch": 0.4878554124697182, "grad_norm": 0.17832469940185547, "learning_rate": 0.002, "loss": 2.5447, "step": 244880 }, { "epoch": 0.48787533469335714, "grad_norm": 0.18296527862548828, "learning_rate": 0.002, "loss": 2.5418, "step": 244890 }, { "epoch": 0.48789525691699603, "grad_norm": 0.16719934344291687, "learning_rate": 0.002, "loss": 2.567, "step": 244900 }, { "epoch": 0.487915179140635, "grad_norm": 0.15355375409126282, "learning_rate": 0.002, "loss": 2.5397, "step": 244910 }, { "epoch": 0.48793510136427387, "grad_norm": 0.17826583981513977, "learning_rate": 0.002, "loss": 2.5534, "step": 244920 }, { "epoch": 0.48795502358791276, "grad_norm": 0.15818646550178528, "learning_rate": 0.002, "loss": 2.5564, "step": 244930 }, { "epoch": 0.4879749458115517, "grad_norm": 0.18613168597221375, "learning_rate": 0.002, "loss": 2.5603, "step": 244940 }, { "epoch": 0.4879948680351906, "grad_norm": 0.17555972933769226, "learning_rate": 0.002, "loss": 2.551, "step": 244950 }, { "epoch": 0.48801479025882954, "grad_norm": 0.17471696436405182, "learning_rate": 0.002, "loss": 2.5553, "step": 244960 }, { "epoch": 0.48803471248246844, "grad_norm": 0.1698397696018219, "learning_rate": 0.002, "loss": 2.5471, "step": 244970 }, { "epoch": 0.4880546347061074, "grad_norm": 0.15726496279239655, "learning_rate": 0.002, "loss": 2.5524, "step": 244980 }, { "epoch": 0.4880745569297463, "grad_norm": 0.19463925063610077, "learning_rate": 0.002, "loss": 2.5374, "step": 244990 }, { "epoch": 0.48809447915338516, "grad_norm": 0.18863391876220703, "learning_rate": 0.002, "loss": 2.5613, "step": 245000 }, { "epoch": 0.4881144013770241, "grad_norm": 0.14961735904216766, "learning_rate": 0.002, "loss": 2.5601, "step": 245010 }, { "epoch": 0.488134323600663, "grad_norm": 0.1651226133108139, "learning_rate": 0.002, "loss": 2.5533, "step": 245020 }, { "epoch": 0.48815424582430195, "grad_norm": 0.13104280829429626, "learning_rate": 0.002, "loss": 2.5677, "step": 245030 }, { "epoch": 0.48817416804794084, "grad_norm": 0.19937670230865479, "learning_rate": 0.002, "loss": 2.5541, "step": 245040 }, { "epoch": 0.48819409027157973, "grad_norm": 0.15814882516860962, "learning_rate": 0.002, "loss": 2.5461, "step": 245050 }, { "epoch": 0.4882140124952187, "grad_norm": 0.1537308394908905, "learning_rate": 0.002, "loss": 2.5419, "step": 245060 }, { "epoch": 0.48823393471885757, "grad_norm": 0.15734092891216278, "learning_rate": 0.002, "loss": 2.5482, "step": 245070 }, { "epoch": 0.4882538569424965, "grad_norm": 0.22697660326957703, "learning_rate": 0.002, "loss": 2.5703, "step": 245080 }, { "epoch": 0.4882737791661354, "grad_norm": 0.16932818293571472, "learning_rate": 0.002, "loss": 2.5629, "step": 245090 }, { "epoch": 0.48829370138977435, "grad_norm": 0.15910981595516205, "learning_rate": 0.002, "loss": 2.55, "step": 245100 }, { "epoch": 0.48831362361341324, "grad_norm": 0.16475899517536163, "learning_rate": 0.002, "loss": 2.554, "step": 245110 }, { "epoch": 0.48833354583705213, "grad_norm": 0.20430870354175568, "learning_rate": 0.002, "loss": 2.5536, "step": 245120 }, { "epoch": 0.4883534680606911, "grad_norm": 0.17866811156272888, "learning_rate": 0.002, "loss": 2.5607, "step": 245130 }, { "epoch": 0.48837339028432997, "grad_norm": 0.14818070828914642, "learning_rate": 0.002, "loss": 2.5569, "step": 245140 }, { "epoch": 0.4883933125079689, "grad_norm": 0.16186657547950745, "learning_rate": 0.002, "loss": 2.5619, "step": 245150 }, { "epoch": 0.4884132347316078, "grad_norm": 0.18424084782600403, "learning_rate": 0.002, "loss": 2.5515, "step": 245160 }, { "epoch": 0.4884331569552467, "grad_norm": 0.17807011306285858, "learning_rate": 0.002, "loss": 2.5613, "step": 245170 }, { "epoch": 0.48845307917888564, "grad_norm": 0.15649156272411346, "learning_rate": 0.002, "loss": 2.5711, "step": 245180 }, { "epoch": 0.48847300140252453, "grad_norm": 0.17403222620487213, "learning_rate": 0.002, "loss": 2.567, "step": 245190 }, { "epoch": 0.4884929236261635, "grad_norm": 0.16806265711784363, "learning_rate": 0.002, "loss": 2.5804, "step": 245200 }, { "epoch": 0.48851284584980237, "grad_norm": 0.13213543593883514, "learning_rate": 0.002, "loss": 2.5693, "step": 245210 }, { "epoch": 0.48853276807344126, "grad_norm": 0.16156038641929626, "learning_rate": 0.002, "loss": 2.5461, "step": 245220 }, { "epoch": 0.4885526902970802, "grad_norm": 0.14657719433307648, "learning_rate": 0.002, "loss": 2.5701, "step": 245230 }, { "epoch": 0.4885726125207191, "grad_norm": 0.17108683288097382, "learning_rate": 0.002, "loss": 2.5544, "step": 245240 }, { "epoch": 0.48859253474435804, "grad_norm": 0.17866012454032898, "learning_rate": 0.002, "loss": 2.5507, "step": 245250 }, { "epoch": 0.48861245696799693, "grad_norm": 0.1571541279554367, "learning_rate": 0.002, "loss": 2.5506, "step": 245260 }, { "epoch": 0.4886323791916359, "grad_norm": 0.4191801846027374, "learning_rate": 0.002, "loss": 2.5626, "step": 245270 }, { "epoch": 0.48865230141527477, "grad_norm": 0.15165159106254578, "learning_rate": 0.002, "loss": 2.5618, "step": 245280 }, { "epoch": 0.48867222363891366, "grad_norm": 0.17128463089466095, "learning_rate": 0.002, "loss": 2.5639, "step": 245290 }, { "epoch": 0.4886921458625526, "grad_norm": 0.1966419816017151, "learning_rate": 0.002, "loss": 2.559, "step": 245300 }, { "epoch": 0.4887120680861915, "grad_norm": 0.14113523066043854, "learning_rate": 0.002, "loss": 2.5494, "step": 245310 }, { "epoch": 0.48873199030983044, "grad_norm": 0.15574754774570465, "learning_rate": 0.002, "loss": 2.5528, "step": 245320 }, { "epoch": 0.48875191253346933, "grad_norm": 0.21558032929897308, "learning_rate": 0.002, "loss": 2.5454, "step": 245330 }, { "epoch": 0.4887718347571082, "grad_norm": 0.13929134607315063, "learning_rate": 0.002, "loss": 2.5697, "step": 245340 }, { "epoch": 0.48879175698074717, "grad_norm": 0.22583426535129547, "learning_rate": 0.002, "loss": 2.5452, "step": 245350 }, { "epoch": 0.48881167920438606, "grad_norm": 0.19563792645931244, "learning_rate": 0.002, "loss": 2.5651, "step": 245360 }, { "epoch": 0.488831601428025, "grad_norm": 0.15870144963264465, "learning_rate": 0.002, "loss": 2.5514, "step": 245370 }, { "epoch": 0.4888515236516639, "grad_norm": 0.16426463425159454, "learning_rate": 0.002, "loss": 2.5536, "step": 245380 }, { "epoch": 0.4888714458753028, "grad_norm": 0.14440488815307617, "learning_rate": 0.002, "loss": 2.5638, "step": 245390 }, { "epoch": 0.48889136809894174, "grad_norm": 0.19393640756607056, "learning_rate": 0.002, "loss": 2.5753, "step": 245400 }, { "epoch": 0.4889112903225806, "grad_norm": 0.18709199130535126, "learning_rate": 0.002, "loss": 2.5716, "step": 245410 }, { "epoch": 0.4889312125462196, "grad_norm": 0.16036519408226013, "learning_rate": 0.002, "loss": 2.5655, "step": 245420 }, { "epoch": 0.48895113476985846, "grad_norm": 0.16935256123542786, "learning_rate": 0.002, "loss": 2.5617, "step": 245430 }, { "epoch": 0.4889710569934974, "grad_norm": 0.17601840198040009, "learning_rate": 0.002, "loss": 2.5431, "step": 245440 }, { "epoch": 0.4889909792171363, "grad_norm": 0.20724551379680634, "learning_rate": 0.002, "loss": 2.5746, "step": 245450 }, { "epoch": 0.4890109014407752, "grad_norm": 0.14444537460803986, "learning_rate": 0.002, "loss": 2.5524, "step": 245460 }, { "epoch": 0.48903082366441414, "grad_norm": 0.15816621482372284, "learning_rate": 0.002, "loss": 2.5563, "step": 245470 }, { "epoch": 0.48905074588805303, "grad_norm": 0.19159714877605438, "learning_rate": 0.002, "loss": 2.5509, "step": 245480 }, { "epoch": 0.489070668111692, "grad_norm": 0.16640396416187286, "learning_rate": 0.002, "loss": 2.5531, "step": 245490 }, { "epoch": 0.48909059033533087, "grad_norm": 0.1615244746208191, "learning_rate": 0.002, "loss": 2.56, "step": 245500 }, { "epoch": 0.48911051255896976, "grad_norm": 0.17225578427314758, "learning_rate": 0.002, "loss": 2.5582, "step": 245510 }, { "epoch": 0.4891304347826087, "grad_norm": 0.19440877437591553, "learning_rate": 0.002, "loss": 2.547, "step": 245520 }, { "epoch": 0.4891503570062476, "grad_norm": 0.16176924109458923, "learning_rate": 0.002, "loss": 2.5576, "step": 245530 }, { "epoch": 0.48917027922988654, "grad_norm": 0.17534908652305603, "learning_rate": 0.002, "loss": 2.5558, "step": 245540 }, { "epoch": 0.48919020145352543, "grad_norm": 0.16180960834026337, "learning_rate": 0.002, "loss": 2.5476, "step": 245550 }, { "epoch": 0.4892101236771644, "grad_norm": 0.20971401035785675, "learning_rate": 0.002, "loss": 2.568, "step": 245560 }, { "epoch": 0.48923004590080327, "grad_norm": 0.19587287306785583, "learning_rate": 0.002, "loss": 2.5397, "step": 245570 }, { "epoch": 0.48924996812444216, "grad_norm": 0.16418354213237762, "learning_rate": 0.002, "loss": 2.548, "step": 245580 }, { "epoch": 0.4892698903480811, "grad_norm": 0.17053402960300446, "learning_rate": 0.002, "loss": 2.5547, "step": 245590 }, { "epoch": 0.48928981257172, "grad_norm": 0.14127613604068756, "learning_rate": 0.002, "loss": 2.5574, "step": 245600 }, { "epoch": 0.48930973479535894, "grad_norm": 0.16640765964984894, "learning_rate": 0.002, "loss": 2.5456, "step": 245610 }, { "epoch": 0.48932965701899783, "grad_norm": 0.15753521025180817, "learning_rate": 0.002, "loss": 2.5644, "step": 245620 }, { "epoch": 0.4893495792426367, "grad_norm": 0.19327913224697113, "learning_rate": 0.002, "loss": 2.5561, "step": 245630 }, { "epoch": 0.48936950146627567, "grad_norm": 0.17550517618656158, "learning_rate": 0.002, "loss": 2.5573, "step": 245640 }, { "epoch": 0.48938942368991456, "grad_norm": 0.1547204852104187, "learning_rate": 0.002, "loss": 2.5438, "step": 245650 }, { "epoch": 0.4894093459135535, "grad_norm": 0.18888916075229645, "learning_rate": 0.002, "loss": 2.5526, "step": 245660 }, { "epoch": 0.4894292681371924, "grad_norm": 0.17988179624080658, "learning_rate": 0.002, "loss": 2.5564, "step": 245670 }, { "epoch": 0.4894491903608313, "grad_norm": 0.1682274341583252, "learning_rate": 0.002, "loss": 2.5591, "step": 245680 }, { "epoch": 0.48946911258447023, "grad_norm": 0.1568065583705902, "learning_rate": 0.002, "loss": 2.5499, "step": 245690 }, { "epoch": 0.4894890348081091, "grad_norm": 0.14535391330718994, "learning_rate": 0.002, "loss": 2.558, "step": 245700 }, { "epoch": 0.48950895703174807, "grad_norm": 0.1731732338666916, "learning_rate": 0.002, "loss": 2.5622, "step": 245710 }, { "epoch": 0.48952887925538696, "grad_norm": 0.20072564482688904, "learning_rate": 0.002, "loss": 2.5524, "step": 245720 }, { "epoch": 0.4895488014790259, "grad_norm": 0.18863436579704285, "learning_rate": 0.002, "loss": 2.5401, "step": 245730 }, { "epoch": 0.4895687237026648, "grad_norm": 0.16628095507621765, "learning_rate": 0.002, "loss": 2.556, "step": 245740 }, { "epoch": 0.4895886459263037, "grad_norm": 0.18221881985664368, "learning_rate": 0.002, "loss": 2.5585, "step": 245750 }, { "epoch": 0.48960856814994264, "grad_norm": 0.15503866970539093, "learning_rate": 0.002, "loss": 2.5619, "step": 245760 }, { "epoch": 0.4896284903735815, "grad_norm": 0.1428736299276352, "learning_rate": 0.002, "loss": 2.567, "step": 245770 }, { "epoch": 0.4896484125972205, "grad_norm": 0.3382616937160492, "learning_rate": 0.002, "loss": 2.5698, "step": 245780 }, { "epoch": 0.48966833482085936, "grad_norm": 0.17683173716068268, "learning_rate": 0.002, "loss": 2.5552, "step": 245790 }, { "epoch": 0.48968825704449825, "grad_norm": 0.19587458670139313, "learning_rate": 0.002, "loss": 2.552, "step": 245800 }, { "epoch": 0.4897081792681372, "grad_norm": 0.15610520541667938, "learning_rate": 0.002, "loss": 2.5672, "step": 245810 }, { "epoch": 0.4897281014917761, "grad_norm": 0.16193383932113647, "learning_rate": 0.002, "loss": 2.5531, "step": 245820 }, { "epoch": 0.48974802371541504, "grad_norm": 0.16488879919052124, "learning_rate": 0.002, "loss": 2.55, "step": 245830 }, { "epoch": 0.48976794593905393, "grad_norm": 0.1675615757703781, "learning_rate": 0.002, "loss": 2.56, "step": 245840 }, { "epoch": 0.4897878681626929, "grad_norm": 0.12892845273017883, "learning_rate": 0.002, "loss": 2.5514, "step": 245850 }, { "epoch": 0.48980779038633177, "grad_norm": 0.15855178236961365, "learning_rate": 0.002, "loss": 2.5469, "step": 245860 }, { "epoch": 0.48982771260997066, "grad_norm": 0.17766648530960083, "learning_rate": 0.002, "loss": 2.5782, "step": 245870 }, { "epoch": 0.4898476348336096, "grad_norm": 0.19958257675170898, "learning_rate": 0.002, "loss": 2.5494, "step": 245880 }, { "epoch": 0.4898675570572485, "grad_norm": 0.18994836509227753, "learning_rate": 0.002, "loss": 2.5631, "step": 245890 }, { "epoch": 0.48988747928088744, "grad_norm": 0.1489315778017044, "learning_rate": 0.002, "loss": 2.573, "step": 245900 }, { "epoch": 0.48990740150452633, "grad_norm": 0.21485395729541779, "learning_rate": 0.002, "loss": 2.5522, "step": 245910 }, { "epoch": 0.4899273237281652, "grad_norm": 0.18390290439128876, "learning_rate": 0.002, "loss": 2.5515, "step": 245920 }, { "epoch": 0.48994724595180417, "grad_norm": 0.14026860892772675, "learning_rate": 0.002, "loss": 2.553, "step": 245930 }, { "epoch": 0.48996716817544306, "grad_norm": 0.18073362112045288, "learning_rate": 0.002, "loss": 2.5693, "step": 245940 }, { "epoch": 0.489987090399082, "grad_norm": 0.14153151214122772, "learning_rate": 0.002, "loss": 2.5495, "step": 245950 }, { "epoch": 0.4900070126227209, "grad_norm": 0.19794024527072906, "learning_rate": 0.002, "loss": 2.56, "step": 245960 }, { "epoch": 0.4900269348463598, "grad_norm": 0.1558922678232193, "learning_rate": 0.002, "loss": 2.5599, "step": 245970 }, { "epoch": 0.49004685706999873, "grad_norm": 0.20145387947559357, "learning_rate": 0.002, "loss": 2.5651, "step": 245980 }, { "epoch": 0.4900667792936376, "grad_norm": 0.17363275587558746, "learning_rate": 0.002, "loss": 2.5573, "step": 245990 }, { "epoch": 0.49008670151727657, "grad_norm": 0.17082080245018005, "learning_rate": 0.002, "loss": 2.5564, "step": 246000 }, { "epoch": 0.49010662374091546, "grad_norm": 0.1942475587129593, "learning_rate": 0.002, "loss": 2.544, "step": 246010 }, { "epoch": 0.4901265459645544, "grad_norm": 0.16985538601875305, "learning_rate": 0.002, "loss": 2.5574, "step": 246020 }, { "epoch": 0.4901464681881933, "grad_norm": 0.1331302672624588, "learning_rate": 0.002, "loss": 2.5628, "step": 246030 }, { "epoch": 0.4901663904118322, "grad_norm": 0.16330397129058838, "learning_rate": 0.002, "loss": 2.552, "step": 246040 }, { "epoch": 0.49018631263547113, "grad_norm": 0.18116573989391327, "learning_rate": 0.002, "loss": 2.5491, "step": 246050 }, { "epoch": 0.49020623485911, "grad_norm": 0.1792932003736496, "learning_rate": 0.002, "loss": 2.558, "step": 246060 }, { "epoch": 0.49022615708274897, "grad_norm": 0.1723519116640091, "learning_rate": 0.002, "loss": 2.5628, "step": 246070 }, { "epoch": 0.49024607930638786, "grad_norm": 0.17208391427993774, "learning_rate": 0.002, "loss": 2.5489, "step": 246080 }, { "epoch": 0.49026600153002675, "grad_norm": 0.21101801097393036, "learning_rate": 0.002, "loss": 2.5651, "step": 246090 }, { "epoch": 0.4902859237536657, "grad_norm": 0.18146780133247375, "learning_rate": 0.002, "loss": 2.567, "step": 246100 }, { "epoch": 0.4903058459773046, "grad_norm": 0.15981431305408478, "learning_rate": 0.002, "loss": 2.5654, "step": 246110 }, { "epoch": 0.49032576820094353, "grad_norm": 0.1839805543422699, "learning_rate": 0.002, "loss": 2.5455, "step": 246120 }, { "epoch": 0.4903456904245824, "grad_norm": 0.16478529572486877, "learning_rate": 0.002, "loss": 2.5594, "step": 246130 }, { "epoch": 0.4903656126482213, "grad_norm": 0.19601157307624817, "learning_rate": 0.002, "loss": 2.575, "step": 246140 }, { "epoch": 0.49038553487186026, "grad_norm": 0.1449480652809143, "learning_rate": 0.002, "loss": 2.5434, "step": 246150 }, { "epoch": 0.49040545709549915, "grad_norm": 0.1648111343383789, "learning_rate": 0.002, "loss": 2.5484, "step": 246160 }, { "epoch": 0.4904253793191381, "grad_norm": 0.16098040342330933, "learning_rate": 0.002, "loss": 2.5574, "step": 246170 }, { "epoch": 0.490445301542777, "grad_norm": 0.14697206020355225, "learning_rate": 0.002, "loss": 2.5574, "step": 246180 }, { "epoch": 0.49046522376641594, "grad_norm": 0.16891828179359436, "learning_rate": 0.002, "loss": 2.5466, "step": 246190 }, { "epoch": 0.4904851459900548, "grad_norm": 0.15198364853858948, "learning_rate": 0.002, "loss": 2.5619, "step": 246200 }, { "epoch": 0.4905050682136937, "grad_norm": 0.2091386318206787, "learning_rate": 0.002, "loss": 2.5519, "step": 246210 }, { "epoch": 0.49052499043733266, "grad_norm": 0.16160587966442108, "learning_rate": 0.002, "loss": 2.5544, "step": 246220 }, { "epoch": 0.49054491266097155, "grad_norm": 0.17102190852165222, "learning_rate": 0.002, "loss": 2.5486, "step": 246230 }, { "epoch": 0.4905648348846105, "grad_norm": 0.17038553953170776, "learning_rate": 0.002, "loss": 2.5694, "step": 246240 }, { "epoch": 0.4905847571082494, "grad_norm": 0.16287870705127716, "learning_rate": 0.002, "loss": 2.5545, "step": 246250 }, { "epoch": 0.4906046793318883, "grad_norm": 0.18252527713775635, "learning_rate": 0.002, "loss": 2.5568, "step": 246260 }, { "epoch": 0.49062460155552723, "grad_norm": 0.18539801239967346, "learning_rate": 0.002, "loss": 2.5636, "step": 246270 }, { "epoch": 0.4906445237791661, "grad_norm": 0.1748475581407547, "learning_rate": 0.002, "loss": 2.5465, "step": 246280 }, { "epoch": 0.49066444600280507, "grad_norm": 0.1537550836801529, "learning_rate": 0.002, "loss": 2.5479, "step": 246290 }, { "epoch": 0.49068436822644396, "grad_norm": 0.15306460857391357, "learning_rate": 0.002, "loss": 2.5492, "step": 246300 }, { "epoch": 0.4907042904500829, "grad_norm": 0.1744430512189865, "learning_rate": 0.002, "loss": 2.5595, "step": 246310 }, { "epoch": 0.4907242126737218, "grad_norm": 0.16244204342365265, "learning_rate": 0.002, "loss": 2.5508, "step": 246320 }, { "epoch": 0.4907441348973607, "grad_norm": 0.1561405062675476, "learning_rate": 0.002, "loss": 2.5447, "step": 246330 }, { "epoch": 0.49076405712099963, "grad_norm": 0.18081112205982208, "learning_rate": 0.002, "loss": 2.564, "step": 246340 }, { "epoch": 0.4907839793446385, "grad_norm": 0.1622064709663391, "learning_rate": 0.002, "loss": 2.5601, "step": 246350 }, { "epoch": 0.49080390156827747, "grad_norm": 0.2152308225631714, "learning_rate": 0.002, "loss": 2.5592, "step": 246360 }, { "epoch": 0.49082382379191636, "grad_norm": 0.16380110383033752, "learning_rate": 0.002, "loss": 2.5563, "step": 246370 }, { "epoch": 0.49084374601555525, "grad_norm": 0.1565030962228775, "learning_rate": 0.002, "loss": 2.5599, "step": 246380 }, { "epoch": 0.4908636682391942, "grad_norm": 0.23067009449005127, "learning_rate": 0.002, "loss": 2.5599, "step": 246390 }, { "epoch": 0.4908835904628331, "grad_norm": 0.1841314435005188, "learning_rate": 0.002, "loss": 2.5698, "step": 246400 }, { "epoch": 0.49090351268647203, "grad_norm": 0.17961803078651428, "learning_rate": 0.002, "loss": 2.5522, "step": 246410 }, { "epoch": 0.4909234349101109, "grad_norm": 0.2000645101070404, "learning_rate": 0.002, "loss": 2.5528, "step": 246420 }, { "epoch": 0.4909433571337498, "grad_norm": 0.18509984016418457, "learning_rate": 0.002, "loss": 2.5616, "step": 246430 }, { "epoch": 0.49096327935738876, "grad_norm": 0.18647675216197968, "learning_rate": 0.002, "loss": 2.5573, "step": 246440 }, { "epoch": 0.49098320158102765, "grad_norm": 0.15558859705924988, "learning_rate": 0.002, "loss": 2.5569, "step": 246450 }, { "epoch": 0.4910031238046666, "grad_norm": 0.15936608612537384, "learning_rate": 0.002, "loss": 2.5575, "step": 246460 }, { "epoch": 0.4910230460283055, "grad_norm": 0.1915006786584854, "learning_rate": 0.002, "loss": 2.5521, "step": 246470 }, { "epoch": 0.49104296825194443, "grad_norm": 0.16439664363861084, "learning_rate": 0.002, "loss": 2.5509, "step": 246480 }, { "epoch": 0.4910628904755833, "grad_norm": 0.16526612639427185, "learning_rate": 0.002, "loss": 2.5612, "step": 246490 }, { "epoch": 0.4910828126992222, "grad_norm": 0.1588956117630005, "learning_rate": 0.002, "loss": 2.5727, "step": 246500 }, { "epoch": 0.49110273492286116, "grad_norm": 0.1614716500043869, "learning_rate": 0.002, "loss": 2.5637, "step": 246510 }, { "epoch": 0.49112265714650005, "grad_norm": 0.13453273475170135, "learning_rate": 0.002, "loss": 2.5504, "step": 246520 }, { "epoch": 0.491142579370139, "grad_norm": 0.15344835817813873, "learning_rate": 0.002, "loss": 2.5748, "step": 246530 }, { "epoch": 0.4911625015937779, "grad_norm": 0.21733030676841736, "learning_rate": 0.002, "loss": 2.5604, "step": 246540 }, { "epoch": 0.4911824238174168, "grad_norm": 0.15425704419612885, "learning_rate": 0.002, "loss": 2.5672, "step": 246550 }, { "epoch": 0.4912023460410557, "grad_norm": 0.17535461485385895, "learning_rate": 0.002, "loss": 2.5551, "step": 246560 }, { "epoch": 0.4912222682646946, "grad_norm": 0.19799137115478516, "learning_rate": 0.002, "loss": 2.5616, "step": 246570 }, { "epoch": 0.49124219048833356, "grad_norm": 0.16215045750141144, "learning_rate": 0.002, "loss": 2.558, "step": 246580 }, { "epoch": 0.49126211271197245, "grad_norm": 0.17979945242404938, "learning_rate": 0.002, "loss": 2.5586, "step": 246590 }, { "epoch": 0.4912820349356114, "grad_norm": 0.1646210104227066, "learning_rate": 0.002, "loss": 2.5708, "step": 246600 }, { "epoch": 0.4913019571592503, "grad_norm": 0.14453671872615814, "learning_rate": 0.002, "loss": 2.5587, "step": 246610 }, { "epoch": 0.4913218793828892, "grad_norm": 0.2014312893152237, "learning_rate": 0.002, "loss": 2.5613, "step": 246620 }, { "epoch": 0.49134180160652813, "grad_norm": 0.16772761940956116, "learning_rate": 0.002, "loss": 2.5742, "step": 246630 }, { "epoch": 0.491361723830167, "grad_norm": 0.16459988057613373, "learning_rate": 0.002, "loss": 2.5466, "step": 246640 }, { "epoch": 0.49138164605380596, "grad_norm": 0.1682225614786148, "learning_rate": 0.002, "loss": 2.5621, "step": 246650 }, { "epoch": 0.49140156827744486, "grad_norm": 0.15316268801689148, "learning_rate": 0.002, "loss": 2.5577, "step": 246660 }, { "epoch": 0.49142149050108375, "grad_norm": 0.16888956725597382, "learning_rate": 0.002, "loss": 2.5664, "step": 246670 }, { "epoch": 0.4914414127247227, "grad_norm": 0.18575769662857056, "learning_rate": 0.002, "loss": 2.5729, "step": 246680 }, { "epoch": 0.4914613349483616, "grad_norm": 0.1591188758611679, "learning_rate": 0.002, "loss": 2.5568, "step": 246690 }, { "epoch": 0.49148125717200053, "grad_norm": 0.20953692495822906, "learning_rate": 0.002, "loss": 2.555, "step": 246700 }, { "epoch": 0.4915011793956394, "grad_norm": 0.1448199599981308, "learning_rate": 0.002, "loss": 2.5637, "step": 246710 }, { "epoch": 0.4915211016192783, "grad_norm": 0.1609708070755005, "learning_rate": 0.002, "loss": 2.554, "step": 246720 }, { "epoch": 0.49154102384291726, "grad_norm": 0.1309739649295807, "learning_rate": 0.002, "loss": 2.5639, "step": 246730 }, { "epoch": 0.49156094606655615, "grad_norm": 0.18054290115833282, "learning_rate": 0.002, "loss": 2.5578, "step": 246740 }, { "epoch": 0.4915808682901951, "grad_norm": 0.27237969636917114, "learning_rate": 0.002, "loss": 2.5467, "step": 246750 }, { "epoch": 0.491600790513834, "grad_norm": 0.1906280368566513, "learning_rate": 0.002, "loss": 2.5544, "step": 246760 }, { "epoch": 0.49162071273747293, "grad_norm": 0.15503773093223572, "learning_rate": 0.002, "loss": 2.5655, "step": 246770 }, { "epoch": 0.4916406349611118, "grad_norm": 0.17762771248817444, "learning_rate": 0.002, "loss": 2.5552, "step": 246780 }, { "epoch": 0.4916605571847507, "grad_norm": 0.16407333314418793, "learning_rate": 0.002, "loss": 2.5664, "step": 246790 }, { "epoch": 0.49168047940838966, "grad_norm": 0.17329534888267517, "learning_rate": 0.002, "loss": 2.5535, "step": 246800 }, { "epoch": 0.49170040163202855, "grad_norm": 0.16463357210159302, "learning_rate": 0.002, "loss": 2.5585, "step": 246810 }, { "epoch": 0.4917203238556675, "grad_norm": 0.1812768131494522, "learning_rate": 0.002, "loss": 2.5797, "step": 246820 }, { "epoch": 0.4917402460793064, "grad_norm": 0.1514372080564499, "learning_rate": 0.002, "loss": 2.5571, "step": 246830 }, { "epoch": 0.4917601683029453, "grad_norm": 0.162374347448349, "learning_rate": 0.002, "loss": 2.554, "step": 246840 }, { "epoch": 0.4917800905265842, "grad_norm": 0.17395876348018646, "learning_rate": 0.002, "loss": 2.5491, "step": 246850 }, { "epoch": 0.4918000127502231, "grad_norm": 0.1579936146736145, "learning_rate": 0.002, "loss": 2.5662, "step": 246860 }, { "epoch": 0.49181993497386206, "grad_norm": 0.16342788934707642, "learning_rate": 0.002, "loss": 2.5559, "step": 246870 }, { "epoch": 0.49183985719750095, "grad_norm": 0.15749475359916687, "learning_rate": 0.002, "loss": 2.5581, "step": 246880 }, { "epoch": 0.4918597794211399, "grad_norm": 0.22492298483848572, "learning_rate": 0.002, "loss": 2.557, "step": 246890 }, { "epoch": 0.4918797016447788, "grad_norm": 0.16819508373737335, "learning_rate": 0.002, "loss": 2.5601, "step": 246900 }, { "epoch": 0.4918996238684177, "grad_norm": 0.16546562314033508, "learning_rate": 0.002, "loss": 2.5713, "step": 246910 }, { "epoch": 0.4919195460920566, "grad_norm": 0.13829092681407928, "learning_rate": 0.002, "loss": 2.5579, "step": 246920 }, { "epoch": 0.4919394683156955, "grad_norm": 0.18197865784168243, "learning_rate": 0.002, "loss": 2.5529, "step": 246930 }, { "epoch": 0.49195939053933446, "grad_norm": 0.19405940175056458, "learning_rate": 0.002, "loss": 2.5496, "step": 246940 }, { "epoch": 0.49197931276297335, "grad_norm": 0.15304307639598846, "learning_rate": 0.002, "loss": 2.5596, "step": 246950 }, { "epoch": 0.49199923498661224, "grad_norm": 0.15094299614429474, "learning_rate": 0.002, "loss": 2.5434, "step": 246960 }, { "epoch": 0.4920191572102512, "grad_norm": 0.15992209315299988, "learning_rate": 0.002, "loss": 2.5666, "step": 246970 }, { "epoch": 0.4920390794338901, "grad_norm": 0.16837075352668762, "learning_rate": 0.002, "loss": 2.5621, "step": 246980 }, { "epoch": 0.492059001657529, "grad_norm": 0.1715531051158905, "learning_rate": 0.002, "loss": 2.5605, "step": 246990 }, { "epoch": 0.4920789238811679, "grad_norm": 0.17500834167003632, "learning_rate": 0.002, "loss": 2.5751, "step": 247000 }, { "epoch": 0.4920988461048068, "grad_norm": 0.2201882302761078, "learning_rate": 0.002, "loss": 2.5587, "step": 247010 }, { "epoch": 0.49211876832844575, "grad_norm": 0.17277996242046356, "learning_rate": 0.002, "loss": 2.5515, "step": 247020 }, { "epoch": 0.49213869055208465, "grad_norm": 0.1726454496383667, "learning_rate": 0.002, "loss": 2.558, "step": 247030 }, { "epoch": 0.4921586127757236, "grad_norm": 0.17295768857002258, "learning_rate": 0.002, "loss": 2.5585, "step": 247040 }, { "epoch": 0.4921785349993625, "grad_norm": 0.16463854908943176, "learning_rate": 0.002, "loss": 2.5557, "step": 247050 }, { "epoch": 0.49219845722300143, "grad_norm": 0.17996470630168915, "learning_rate": 0.002, "loss": 2.5599, "step": 247060 }, { "epoch": 0.4922183794466403, "grad_norm": 0.20577353239059448, "learning_rate": 0.002, "loss": 2.5759, "step": 247070 }, { "epoch": 0.4922383016702792, "grad_norm": 0.18101724982261658, "learning_rate": 0.002, "loss": 2.5632, "step": 247080 }, { "epoch": 0.49225822389391816, "grad_norm": 0.19148950278759003, "learning_rate": 0.002, "loss": 2.5576, "step": 247090 }, { "epoch": 0.49227814611755705, "grad_norm": 0.18168465793132782, "learning_rate": 0.002, "loss": 2.5603, "step": 247100 }, { "epoch": 0.492298068341196, "grad_norm": 0.14693023264408112, "learning_rate": 0.002, "loss": 2.548, "step": 247110 }, { "epoch": 0.4923179905648349, "grad_norm": 0.17938008904457092, "learning_rate": 0.002, "loss": 2.5587, "step": 247120 }, { "epoch": 0.4923379127884738, "grad_norm": 0.18949785828590393, "learning_rate": 0.002, "loss": 2.5624, "step": 247130 }, { "epoch": 0.4923578350121127, "grad_norm": 0.1555071622133255, "learning_rate": 0.002, "loss": 2.5489, "step": 247140 }, { "epoch": 0.4923777572357516, "grad_norm": 0.20124144852161407, "learning_rate": 0.002, "loss": 2.5594, "step": 247150 }, { "epoch": 0.49239767945939056, "grad_norm": 0.15659616887569427, "learning_rate": 0.002, "loss": 2.5679, "step": 247160 }, { "epoch": 0.49241760168302945, "grad_norm": 0.17032523453235626, "learning_rate": 0.002, "loss": 2.5495, "step": 247170 }, { "epoch": 0.49243752390666834, "grad_norm": 0.19410328567028046, "learning_rate": 0.002, "loss": 2.5567, "step": 247180 }, { "epoch": 0.4924574461303073, "grad_norm": 0.1692616492509842, "learning_rate": 0.002, "loss": 2.5564, "step": 247190 }, { "epoch": 0.4924773683539462, "grad_norm": 0.15730540454387665, "learning_rate": 0.002, "loss": 2.5588, "step": 247200 }, { "epoch": 0.4924972905775851, "grad_norm": 0.17915278673171997, "learning_rate": 0.002, "loss": 2.5594, "step": 247210 }, { "epoch": 0.492517212801224, "grad_norm": 0.1564440131187439, "learning_rate": 0.002, "loss": 2.5574, "step": 247220 }, { "epoch": 0.49253713502486296, "grad_norm": 0.1775791049003601, "learning_rate": 0.002, "loss": 2.5525, "step": 247230 }, { "epoch": 0.49255705724850185, "grad_norm": 0.15260213613510132, "learning_rate": 0.002, "loss": 2.5578, "step": 247240 }, { "epoch": 0.49257697947214074, "grad_norm": 0.17762307822704315, "learning_rate": 0.002, "loss": 2.5496, "step": 247250 }, { "epoch": 0.4925969016957797, "grad_norm": 0.17090222239494324, "learning_rate": 0.002, "loss": 2.5398, "step": 247260 }, { "epoch": 0.4926168239194186, "grad_norm": 0.15844951570034027, "learning_rate": 0.002, "loss": 2.5548, "step": 247270 }, { "epoch": 0.4926367461430575, "grad_norm": 0.16015611588954926, "learning_rate": 0.002, "loss": 2.5624, "step": 247280 }, { "epoch": 0.4926566683666964, "grad_norm": 0.18850848078727722, "learning_rate": 0.002, "loss": 2.5494, "step": 247290 }, { "epoch": 0.4926765905903353, "grad_norm": 0.1634512096643448, "learning_rate": 0.002, "loss": 2.5604, "step": 247300 }, { "epoch": 0.49269651281397425, "grad_norm": 0.15385720133781433, "learning_rate": 0.002, "loss": 2.5513, "step": 247310 }, { "epoch": 0.49271643503761314, "grad_norm": 0.16953565180301666, "learning_rate": 0.002, "loss": 2.5669, "step": 247320 }, { "epoch": 0.4927363572612521, "grad_norm": 0.1573341190814972, "learning_rate": 0.002, "loss": 2.5608, "step": 247330 }, { "epoch": 0.492756279484891, "grad_norm": 0.15690022706985474, "learning_rate": 0.002, "loss": 2.5577, "step": 247340 }, { "epoch": 0.4927762017085299, "grad_norm": 0.19973786175251007, "learning_rate": 0.002, "loss": 2.5478, "step": 247350 }, { "epoch": 0.4927961239321688, "grad_norm": 0.14071977138519287, "learning_rate": 0.002, "loss": 2.5594, "step": 247360 }, { "epoch": 0.4928160461558077, "grad_norm": 0.17802850902080536, "learning_rate": 0.002, "loss": 2.5522, "step": 247370 }, { "epoch": 0.49283596837944665, "grad_norm": 0.1530248373746872, "learning_rate": 0.002, "loss": 2.5598, "step": 247380 }, { "epoch": 0.49285589060308554, "grad_norm": 0.1523626297712326, "learning_rate": 0.002, "loss": 2.5431, "step": 247390 }, { "epoch": 0.4928758128267245, "grad_norm": 0.17990384995937347, "learning_rate": 0.002, "loss": 2.5642, "step": 247400 }, { "epoch": 0.4928957350503634, "grad_norm": 0.2004372775554657, "learning_rate": 0.002, "loss": 2.5655, "step": 247410 }, { "epoch": 0.4929156572740023, "grad_norm": 0.15190422534942627, "learning_rate": 0.002, "loss": 2.5463, "step": 247420 }, { "epoch": 0.4929355794976412, "grad_norm": 0.1713273823261261, "learning_rate": 0.002, "loss": 2.5656, "step": 247430 }, { "epoch": 0.4929555017212801, "grad_norm": 0.18105211853981018, "learning_rate": 0.002, "loss": 2.552, "step": 247440 }, { "epoch": 0.49297542394491906, "grad_norm": 0.14994169771671295, "learning_rate": 0.002, "loss": 2.5544, "step": 247450 }, { "epoch": 0.49299534616855795, "grad_norm": 0.15314853191375732, "learning_rate": 0.002, "loss": 2.5431, "step": 247460 }, { "epoch": 0.49301526839219684, "grad_norm": 0.16739384829998016, "learning_rate": 0.002, "loss": 2.5576, "step": 247470 }, { "epoch": 0.4930351906158358, "grad_norm": 0.1920245736837387, "learning_rate": 0.002, "loss": 2.5603, "step": 247480 }, { "epoch": 0.4930551128394747, "grad_norm": 0.13566069304943085, "learning_rate": 0.002, "loss": 2.5572, "step": 247490 }, { "epoch": 0.4930750350631136, "grad_norm": 0.15538494288921356, "learning_rate": 0.002, "loss": 2.5554, "step": 247500 }, { "epoch": 0.4930949572867525, "grad_norm": 0.16757167875766754, "learning_rate": 0.002, "loss": 2.5674, "step": 247510 }, { "epoch": 0.49311487951039146, "grad_norm": 0.1471923589706421, "learning_rate": 0.002, "loss": 2.5518, "step": 247520 }, { "epoch": 0.49313480173403035, "grad_norm": 0.17047454416751862, "learning_rate": 0.002, "loss": 2.5503, "step": 247530 }, { "epoch": 0.49315472395766924, "grad_norm": 0.2030400186777115, "learning_rate": 0.002, "loss": 2.5403, "step": 247540 }, { "epoch": 0.4931746461813082, "grad_norm": 0.15240055322647095, "learning_rate": 0.002, "loss": 2.558, "step": 247550 }, { "epoch": 0.4931945684049471, "grad_norm": 0.1700279414653778, "learning_rate": 0.002, "loss": 2.5477, "step": 247560 }, { "epoch": 0.493214490628586, "grad_norm": 0.18032851815223694, "learning_rate": 0.002, "loss": 2.5649, "step": 247570 }, { "epoch": 0.4932344128522249, "grad_norm": 0.14829549193382263, "learning_rate": 0.002, "loss": 2.5675, "step": 247580 }, { "epoch": 0.4932543350758638, "grad_norm": 0.16833573579788208, "learning_rate": 0.002, "loss": 2.5624, "step": 247590 }, { "epoch": 0.49327425729950275, "grad_norm": 0.17080289125442505, "learning_rate": 0.002, "loss": 2.554, "step": 247600 }, { "epoch": 0.49329417952314164, "grad_norm": 0.16048362851142883, "learning_rate": 0.002, "loss": 2.5526, "step": 247610 }, { "epoch": 0.4933141017467806, "grad_norm": 0.29493248462677, "learning_rate": 0.002, "loss": 2.55, "step": 247620 }, { "epoch": 0.4933340239704195, "grad_norm": 0.17832212150096893, "learning_rate": 0.002, "loss": 2.5517, "step": 247630 }, { "epoch": 0.4933539461940584, "grad_norm": 0.15195216238498688, "learning_rate": 0.002, "loss": 2.5558, "step": 247640 }, { "epoch": 0.4933738684176973, "grad_norm": 0.16129544377326965, "learning_rate": 0.002, "loss": 2.5655, "step": 247650 }, { "epoch": 0.4933937906413362, "grad_norm": 0.15458351373672485, "learning_rate": 0.002, "loss": 2.5602, "step": 247660 }, { "epoch": 0.49341371286497515, "grad_norm": 0.1898542046546936, "learning_rate": 0.002, "loss": 2.536, "step": 247670 }, { "epoch": 0.49343363508861404, "grad_norm": 0.17556551098823547, "learning_rate": 0.002, "loss": 2.5483, "step": 247680 }, { "epoch": 0.493453557312253, "grad_norm": 0.17125608026981354, "learning_rate": 0.002, "loss": 2.5609, "step": 247690 }, { "epoch": 0.4934734795358919, "grad_norm": 0.15408432483673096, "learning_rate": 0.002, "loss": 2.5603, "step": 247700 }, { "epoch": 0.49349340175953077, "grad_norm": 0.15274977684020996, "learning_rate": 0.002, "loss": 2.5685, "step": 247710 }, { "epoch": 0.4935133239831697, "grad_norm": 0.1374136358499527, "learning_rate": 0.002, "loss": 2.5642, "step": 247720 }, { "epoch": 0.4935332462068086, "grad_norm": 0.19887211918830872, "learning_rate": 0.002, "loss": 2.5675, "step": 247730 }, { "epoch": 0.49355316843044755, "grad_norm": 0.16824257373809814, "learning_rate": 0.002, "loss": 2.5608, "step": 247740 }, { "epoch": 0.49357309065408644, "grad_norm": 0.16481968760490417, "learning_rate": 0.002, "loss": 2.564, "step": 247750 }, { "epoch": 0.49359301287772533, "grad_norm": 0.16560986638069153, "learning_rate": 0.002, "loss": 2.5487, "step": 247760 }, { "epoch": 0.4936129351013643, "grad_norm": 0.15299563109874725, "learning_rate": 0.002, "loss": 2.5613, "step": 247770 }, { "epoch": 0.49363285732500317, "grad_norm": 0.1499580442905426, "learning_rate": 0.002, "loss": 2.5353, "step": 247780 }, { "epoch": 0.4936527795486421, "grad_norm": 0.17444323003292084, "learning_rate": 0.002, "loss": 2.5763, "step": 247790 }, { "epoch": 0.493672701772281, "grad_norm": 0.16274812817573547, "learning_rate": 0.002, "loss": 2.5455, "step": 247800 }, { "epoch": 0.49369262399591995, "grad_norm": 0.17982874810695648, "learning_rate": 0.002, "loss": 2.5377, "step": 247810 }, { "epoch": 0.49371254621955885, "grad_norm": 0.17820894718170166, "learning_rate": 0.002, "loss": 2.553, "step": 247820 }, { "epoch": 0.49373246844319774, "grad_norm": 0.16743223369121552, "learning_rate": 0.002, "loss": 2.5442, "step": 247830 }, { "epoch": 0.4937523906668367, "grad_norm": 0.1853822022676468, "learning_rate": 0.002, "loss": 2.5623, "step": 247840 }, { "epoch": 0.4937723128904756, "grad_norm": 0.18247471749782562, "learning_rate": 0.002, "loss": 2.5558, "step": 247850 }, { "epoch": 0.4937922351141145, "grad_norm": 0.15921670198440552, "learning_rate": 0.002, "loss": 2.5651, "step": 247860 }, { "epoch": 0.4938121573377534, "grad_norm": 0.18076574802398682, "learning_rate": 0.002, "loss": 2.56, "step": 247870 }, { "epoch": 0.4938320795613923, "grad_norm": 0.17536208033561707, "learning_rate": 0.002, "loss": 2.5398, "step": 247880 }, { "epoch": 0.49385200178503125, "grad_norm": 0.15164627134799957, "learning_rate": 0.002, "loss": 2.5528, "step": 247890 }, { "epoch": 0.49387192400867014, "grad_norm": 0.19561903178691864, "learning_rate": 0.002, "loss": 2.5462, "step": 247900 }, { "epoch": 0.4938918462323091, "grad_norm": 0.15125396847724915, "learning_rate": 0.002, "loss": 2.5683, "step": 247910 }, { "epoch": 0.493911768455948, "grad_norm": 0.20720934867858887, "learning_rate": 0.002, "loss": 2.5562, "step": 247920 }, { "epoch": 0.49393169067958687, "grad_norm": 0.16973496973514557, "learning_rate": 0.002, "loss": 2.5571, "step": 247930 }, { "epoch": 0.4939516129032258, "grad_norm": 0.15346664190292358, "learning_rate": 0.002, "loss": 2.5448, "step": 247940 }, { "epoch": 0.4939715351268647, "grad_norm": 0.14946681261062622, "learning_rate": 0.002, "loss": 2.546, "step": 247950 }, { "epoch": 0.49399145735050365, "grad_norm": 0.1660563349723816, "learning_rate": 0.002, "loss": 2.5453, "step": 247960 }, { "epoch": 0.49401137957414254, "grad_norm": 0.17866002023220062, "learning_rate": 0.002, "loss": 2.5506, "step": 247970 }, { "epoch": 0.4940313017977815, "grad_norm": 0.1773892492055893, "learning_rate": 0.002, "loss": 2.5617, "step": 247980 }, { "epoch": 0.4940512240214204, "grad_norm": 0.1510363072156906, "learning_rate": 0.002, "loss": 2.5582, "step": 247990 }, { "epoch": 0.49407114624505927, "grad_norm": 0.1806795746088028, "learning_rate": 0.002, "loss": 2.566, "step": 248000 }, { "epoch": 0.4940910684686982, "grad_norm": 0.21204157173633575, "learning_rate": 0.002, "loss": 2.5548, "step": 248010 }, { "epoch": 0.4941109906923371, "grad_norm": 0.18585200607776642, "learning_rate": 0.002, "loss": 2.5581, "step": 248020 }, { "epoch": 0.49413091291597605, "grad_norm": 0.15088607370853424, "learning_rate": 0.002, "loss": 2.5707, "step": 248030 }, { "epoch": 0.49415083513961494, "grad_norm": 0.19382184743881226, "learning_rate": 0.002, "loss": 2.552, "step": 248040 }, { "epoch": 0.49417075736325383, "grad_norm": 0.17828261852264404, "learning_rate": 0.002, "loss": 2.5637, "step": 248050 }, { "epoch": 0.4941906795868928, "grad_norm": 0.1694241166114807, "learning_rate": 0.002, "loss": 2.5611, "step": 248060 }, { "epoch": 0.49421060181053167, "grad_norm": 0.2272396832704544, "learning_rate": 0.002, "loss": 2.5633, "step": 248070 }, { "epoch": 0.4942305240341706, "grad_norm": 0.17057570815086365, "learning_rate": 0.002, "loss": 2.548, "step": 248080 }, { "epoch": 0.4942504462578095, "grad_norm": 0.1640995889902115, "learning_rate": 0.002, "loss": 2.5528, "step": 248090 }, { "epoch": 0.49427036848144845, "grad_norm": 0.16091099381446838, "learning_rate": 0.002, "loss": 2.5662, "step": 248100 }, { "epoch": 0.49429029070508734, "grad_norm": 0.16799038648605347, "learning_rate": 0.002, "loss": 2.5682, "step": 248110 }, { "epoch": 0.49431021292872623, "grad_norm": 0.1857665628194809, "learning_rate": 0.002, "loss": 2.5495, "step": 248120 }, { "epoch": 0.4943301351523652, "grad_norm": 0.1507233828306198, "learning_rate": 0.002, "loss": 2.562, "step": 248130 }, { "epoch": 0.49435005737600407, "grad_norm": 0.1865071952342987, "learning_rate": 0.002, "loss": 2.5693, "step": 248140 }, { "epoch": 0.494369979599643, "grad_norm": 0.14932313561439514, "learning_rate": 0.002, "loss": 2.5587, "step": 248150 }, { "epoch": 0.4943899018232819, "grad_norm": 0.1698198765516281, "learning_rate": 0.002, "loss": 2.5533, "step": 248160 }, { "epoch": 0.4944098240469208, "grad_norm": 0.17371276021003723, "learning_rate": 0.002, "loss": 2.5567, "step": 248170 }, { "epoch": 0.49442974627055974, "grad_norm": 0.165127694606781, "learning_rate": 0.002, "loss": 2.574, "step": 248180 }, { "epoch": 0.49444966849419864, "grad_norm": 0.1927938163280487, "learning_rate": 0.002, "loss": 2.5624, "step": 248190 }, { "epoch": 0.4944695907178376, "grad_norm": 0.16459068655967712, "learning_rate": 0.002, "loss": 2.5557, "step": 248200 }, { "epoch": 0.4944895129414765, "grad_norm": 0.1496908962726593, "learning_rate": 0.002, "loss": 2.5583, "step": 248210 }, { "epoch": 0.49450943516511536, "grad_norm": 0.16058564186096191, "learning_rate": 0.002, "loss": 2.5545, "step": 248220 }, { "epoch": 0.4945293573887543, "grad_norm": 0.15508033335208893, "learning_rate": 0.002, "loss": 2.5557, "step": 248230 }, { "epoch": 0.4945492796123932, "grad_norm": 0.19377437233924866, "learning_rate": 0.002, "loss": 2.5573, "step": 248240 }, { "epoch": 0.49456920183603215, "grad_norm": 0.1458132416009903, "learning_rate": 0.002, "loss": 2.5608, "step": 248250 }, { "epoch": 0.49458912405967104, "grad_norm": 0.1924007534980774, "learning_rate": 0.002, "loss": 2.5596, "step": 248260 }, { "epoch": 0.49460904628331, "grad_norm": 0.15998795628547668, "learning_rate": 0.002, "loss": 2.558, "step": 248270 }, { "epoch": 0.4946289685069489, "grad_norm": 0.15320968627929688, "learning_rate": 0.002, "loss": 2.5493, "step": 248280 }, { "epoch": 0.49464889073058776, "grad_norm": 0.18679122626781464, "learning_rate": 0.002, "loss": 2.5566, "step": 248290 }, { "epoch": 0.4946688129542267, "grad_norm": 0.18823303282260895, "learning_rate": 0.002, "loss": 2.5608, "step": 248300 }, { "epoch": 0.4946887351778656, "grad_norm": 0.16042740643024445, "learning_rate": 0.002, "loss": 2.557, "step": 248310 }, { "epoch": 0.49470865740150455, "grad_norm": 0.14884938299655914, "learning_rate": 0.002, "loss": 2.5438, "step": 248320 }, { "epoch": 0.49472857962514344, "grad_norm": 0.1727757304906845, "learning_rate": 0.002, "loss": 2.5527, "step": 248330 }, { "epoch": 0.49474850184878233, "grad_norm": 0.1710580289363861, "learning_rate": 0.002, "loss": 2.5505, "step": 248340 }, { "epoch": 0.4947684240724213, "grad_norm": 0.1615237146615982, "learning_rate": 0.002, "loss": 2.57, "step": 248350 }, { "epoch": 0.49478834629606017, "grad_norm": 0.16132038831710815, "learning_rate": 0.002, "loss": 2.5572, "step": 248360 }, { "epoch": 0.4948082685196991, "grad_norm": 0.19475030899047852, "learning_rate": 0.002, "loss": 2.5595, "step": 248370 }, { "epoch": 0.494828190743338, "grad_norm": 0.16309596598148346, "learning_rate": 0.002, "loss": 2.5699, "step": 248380 }, { "epoch": 0.49484811296697695, "grad_norm": 0.17642684280872345, "learning_rate": 0.002, "loss": 2.5553, "step": 248390 }, { "epoch": 0.49486803519061584, "grad_norm": 0.20793867111206055, "learning_rate": 0.002, "loss": 2.562, "step": 248400 }, { "epoch": 0.49488795741425473, "grad_norm": 0.16046695411205292, "learning_rate": 0.002, "loss": 2.5626, "step": 248410 }, { "epoch": 0.4949078796378937, "grad_norm": 0.17242401838302612, "learning_rate": 0.002, "loss": 2.5418, "step": 248420 }, { "epoch": 0.49492780186153257, "grad_norm": 0.16297723352909088, "learning_rate": 0.002, "loss": 2.5545, "step": 248430 }, { "epoch": 0.4949477240851715, "grad_norm": 0.22958442568778992, "learning_rate": 0.002, "loss": 2.5562, "step": 248440 }, { "epoch": 0.4949676463088104, "grad_norm": 0.16346551477909088, "learning_rate": 0.002, "loss": 2.5599, "step": 248450 }, { "epoch": 0.4949875685324493, "grad_norm": 0.18228261172771454, "learning_rate": 0.002, "loss": 2.5611, "step": 248460 }, { "epoch": 0.49500749075608824, "grad_norm": 0.14543470740318298, "learning_rate": 0.002, "loss": 2.5487, "step": 248470 }, { "epoch": 0.49502741297972713, "grad_norm": 0.14533783495426178, "learning_rate": 0.002, "loss": 2.5588, "step": 248480 }, { "epoch": 0.4950473352033661, "grad_norm": 0.18811912834644318, "learning_rate": 0.002, "loss": 2.5633, "step": 248490 }, { "epoch": 0.49506725742700497, "grad_norm": 0.15769019722938538, "learning_rate": 0.002, "loss": 2.5399, "step": 248500 }, { "epoch": 0.49508717965064386, "grad_norm": 0.16858288645744324, "learning_rate": 0.002, "loss": 2.5623, "step": 248510 }, { "epoch": 0.4951071018742828, "grad_norm": 0.16980108618736267, "learning_rate": 0.002, "loss": 2.5432, "step": 248520 }, { "epoch": 0.4951270240979217, "grad_norm": 0.14864720404148102, "learning_rate": 0.002, "loss": 2.5626, "step": 248530 }, { "epoch": 0.49514694632156064, "grad_norm": 0.18764902651309967, "learning_rate": 0.002, "loss": 2.5706, "step": 248540 }, { "epoch": 0.49516686854519953, "grad_norm": 0.16237349808216095, "learning_rate": 0.002, "loss": 2.5566, "step": 248550 }, { "epoch": 0.4951867907688385, "grad_norm": 0.1673033982515335, "learning_rate": 0.002, "loss": 2.5645, "step": 248560 }, { "epoch": 0.49520671299247737, "grad_norm": 0.15080757439136505, "learning_rate": 0.002, "loss": 2.5696, "step": 248570 }, { "epoch": 0.49522663521611626, "grad_norm": 0.18705691397190094, "learning_rate": 0.002, "loss": 2.5742, "step": 248580 }, { "epoch": 0.4952465574397552, "grad_norm": 0.1813453584909439, "learning_rate": 0.002, "loss": 2.5411, "step": 248590 }, { "epoch": 0.4952664796633941, "grad_norm": 0.1337960809469223, "learning_rate": 0.002, "loss": 2.5617, "step": 248600 }, { "epoch": 0.49528640188703305, "grad_norm": 0.21889953315258026, "learning_rate": 0.002, "loss": 2.5636, "step": 248610 }, { "epoch": 0.49530632411067194, "grad_norm": 0.15657886862754822, "learning_rate": 0.002, "loss": 2.5616, "step": 248620 }, { "epoch": 0.4953262463343108, "grad_norm": 0.15619724988937378, "learning_rate": 0.002, "loss": 2.5579, "step": 248630 }, { "epoch": 0.4953461685579498, "grad_norm": 0.16296911239624023, "learning_rate": 0.002, "loss": 2.5634, "step": 248640 }, { "epoch": 0.49536609078158866, "grad_norm": 0.21185842156410217, "learning_rate": 0.002, "loss": 2.5453, "step": 248650 }, { "epoch": 0.4953860130052276, "grad_norm": 0.15769973397254944, "learning_rate": 0.002, "loss": 2.5469, "step": 248660 }, { "epoch": 0.4954059352288665, "grad_norm": 0.1469653993844986, "learning_rate": 0.002, "loss": 2.5705, "step": 248670 }, { "epoch": 0.4954258574525054, "grad_norm": 0.15320557355880737, "learning_rate": 0.002, "loss": 2.5625, "step": 248680 }, { "epoch": 0.49544577967614434, "grad_norm": 0.17469269037246704, "learning_rate": 0.002, "loss": 2.5513, "step": 248690 }, { "epoch": 0.49546570189978323, "grad_norm": 0.1480962634086609, "learning_rate": 0.002, "loss": 2.5675, "step": 248700 }, { "epoch": 0.4954856241234222, "grad_norm": 0.15233801305294037, "learning_rate": 0.002, "loss": 2.5543, "step": 248710 }, { "epoch": 0.49550554634706107, "grad_norm": 0.21026523411273956, "learning_rate": 0.002, "loss": 2.5518, "step": 248720 }, { "epoch": 0.4955254685707, "grad_norm": 0.20821741223335266, "learning_rate": 0.002, "loss": 2.5724, "step": 248730 }, { "epoch": 0.4955453907943389, "grad_norm": 0.16142483055591583, "learning_rate": 0.002, "loss": 2.5601, "step": 248740 }, { "epoch": 0.4955653130179778, "grad_norm": 0.17387673258781433, "learning_rate": 0.002, "loss": 2.5605, "step": 248750 }, { "epoch": 0.49558523524161674, "grad_norm": 0.19204860925674438, "learning_rate": 0.002, "loss": 2.5521, "step": 248760 }, { "epoch": 0.49560515746525563, "grad_norm": 0.1422937959432602, "learning_rate": 0.002, "loss": 2.5607, "step": 248770 }, { "epoch": 0.4956250796888946, "grad_norm": 0.15293271839618683, "learning_rate": 0.002, "loss": 2.558, "step": 248780 }, { "epoch": 0.49564500191253347, "grad_norm": 0.14502936601638794, "learning_rate": 0.002, "loss": 2.5543, "step": 248790 }, { "epoch": 0.49566492413617236, "grad_norm": 0.21575988829135895, "learning_rate": 0.002, "loss": 2.5628, "step": 248800 }, { "epoch": 0.4956848463598113, "grad_norm": 0.14925456047058105, "learning_rate": 0.002, "loss": 2.5647, "step": 248810 }, { "epoch": 0.4957047685834502, "grad_norm": 0.1820315420627594, "learning_rate": 0.002, "loss": 2.5657, "step": 248820 }, { "epoch": 0.49572469080708914, "grad_norm": 0.19141922891139984, "learning_rate": 0.002, "loss": 2.5401, "step": 248830 }, { "epoch": 0.49574461303072803, "grad_norm": 0.16630010306835175, "learning_rate": 0.002, "loss": 2.5642, "step": 248840 }, { "epoch": 0.495764535254367, "grad_norm": 0.15694405138492584, "learning_rate": 0.002, "loss": 2.5585, "step": 248850 }, { "epoch": 0.49578445747800587, "grad_norm": 0.1504100114107132, "learning_rate": 0.002, "loss": 2.5716, "step": 248860 }, { "epoch": 0.49580437970164476, "grad_norm": 0.17666250467300415, "learning_rate": 0.002, "loss": 2.55, "step": 248870 }, { "epoch": 0.4958243019252837, "grad_norm": 0.15585631132125854, "learning_rate": 0.002, "loss": 2.5552, "step": 248880 }, { "epoch": 0.4958442241489226, "grad_norm": 0.1486448049545288, "learning_rate": 0.002, "loss": 2.5632, "step": 248890 }, { "epoch": 0.49586414637256154, "grad_norm": 0.17554070055484772, "learning_rate": 0.002, "loss": 2.5545, "step": 248900 }, { "epoch": 0.49588406859620043, "grad_norm": 0.17984141409397125, "learning_rate": 0.002, "loss": 2.5482, "step": 248910 }, { "epoch": 0.4959039908198393, "grad_norm": 0.15538883209228516, "learning_rate": 0.002, "loss": 2.5515, "step": 248920 }, { "epoch": 0.49592391304347827, "grad_norm": 0.14751332998275757, "learning_rate": 0.002, "loss": 2.577, "step": 248930 }, { "epoch": 0.49594383526711716, "grad_norm": 0.19164052605628967, "learning_rate": 0.002, "loss": 2.5673, "step": 248940 }, { "epoch": 0.4959637574907561, "grad_norm": 0.15396836400032043, "learning_rate": 0.002, "loss": 2.554, "step": 248950 }, { "epoch": 0.495983679714395, "grad_norm": 0.1896994411945343, "learning_rate": 0.002, "loss": 2.5612, "step": 248960 }, { "epoch": 0.4960036019380339, "grad_norm": 0.1607162207365036, "learning_rate": 0.002, "loss": 2.5501, "step": 248970 }, { "epoch": 0.49602352416167284, "grad_norm": 0.16928523778915405, "learning_rate": 0.002, "loss": 2.5456, "step": 248980 }, { "epoch": 0.4960434463853117, "grad_norm": 0.18346424400806427, "learning_rate": 0.002, "loss": 2.5681, "step": 248990 }, { "epoch": 0.49606336860895067, "grad_norm": 0.16120900213718414, "learning_rate": 0.002, "loss": 2.5629, "step": 249000 }, { "epoch": 0.49608329083258956, "grad_norm": 0.15973889827728271, "learning_rate": 0.002, "loss": 2.5657, "step": 249010 }, { "epoch": 0.4961032130562285, "grad_norm": 0.1500917673110962, "learning_rate": 0.002, "loss": 2.5572, "step": 249020 }, { "epoch": 0.4961231352798674, "grad_norm": 0.22690574824810028, "learning_rate": 0.002, "loss": 2.5569, "step": 249030 }, { "epoch": 0.4961430575035063, "grad_norm": 0.17451399564743042, "learning_rate": 0.002, "loss": 2.5631, "step": 249040 }, { "epoch": 0.49616297972714524, "grad_norm": 0.1652333289384842, "learning_rate": 0.002, "loss": 2.5613, "step": 249050 }, { "epoch": 0.4961829019507841, "grad_norm": 0.15421855449676514, "learning_rate": 0.002, "loss": 2.5674, "step": 249060 }, { "epoch": 0.4962028241744231, "grad_norm": 0.19260869920253754, "learning_rate": 0.002, "loss": 2.5681, "step": 249070 }, { "epoch": 0.49622274639806196, "grad_norm": 0.1577615886926651, "learning_rate": 0.002, "loss": 2.5491, "step": 249080 }, { "epoch": 0.49624266862170086, "grad_norm": 0.16695444285869598, "learning_rate": 0.002, "loss": 2.5616, "step": 249090 }, { "epoch": 0.4962625908453398, "grad_norm": 0.14071357250213623, "learning_rate": 0.002, "loss": 2.5468, "step": 249100 }, { "epoch": 0.4962825130689787, "grad_norm": 0.16996575891971588, "learning_rate": 0.002, "loss": 2.5544, "step": 249110 }, { "epoch": 0.49630243529261764, "grad_norm": 0.1707802563905716, "learning_rate": 0.002, "loss": 2.5573, "step": 249120 }, { "epoch": 0.49632235751625653, "grad_norm": 0.16347546875476837, "learning_rate": 0.002, "loss": 2.5568, "step": 249130 }, { "epoch": 0.4963422797398955, "grad_norm": 0.17678013443946838, "learning_rate": 0.002, "loss": 2.5589, "step": 249140 }, { "epoch": 0.49636220196353437, "grad_norm": 0.17986661195755005, "learning_rate": 0.002, "loss": 2.5456, "step": 249150 }, { "epoch": 0.49638212418717326, "grad_norm": 0.16309042274951935, "learning_rate": 0.002, "loss": 2.5559, "step": 249160 }, { "epoch": 0.4964020464108122, "grad_norm": 0.16309159994125366, "learning_rate": 0.002, "loss": 2.5612, "step": 249170 }, { "epoch": 0.4964219686344511, "grad_norm": 0.173597514629364, "learning_rate": 0.002, "loss": 2.5556, "step": 249180 }, { "epoch": 0.49644189085809004, "grad_norm": 0.1666974425315857, "learning_rate": 0.002, "loss": 2.5539, "step": 249190 }, { "epoch": 0.49646181308172893, "grad_norm": 0.18489421904087067, "learning_rate": 0.002, "loss": 2.5714, "step": 249200 }, { "epoch": 0.4964817353053678, "grad_norm": 0.16735389828681946, "learning_rate": 0.002, "loss": 2.5765, "step": 249210 }, { "epoch": 0.49650165752900677, "grad_norm": 0.16370168328285217, "learning_rate": 0.002, "loss": 2.5668, "step": 249220 }, { "epoch": 0.49652157975264566, "grad_norm": 0.17859555780887604, "learning_rate": 0.002, "loss": 2.5615, "step": 249230 }, { "epoch": 0.4965415019762846, "grad_norm": 0.1900138258934021, "learning_rate": 0.002, "loss": 2.5647, "step": 249240 }, { "epoch": 0.4965614241999235, "grad_norm": 0.18719585239887238, "learning_rate": 0.002, "loss": 2.5617, "step": 249250 }, { "epoch": 0.4965813464235624, "grad_norm": 0.20158031582832336, "learning_rate": 0.002, "loss": 2.5795, "step": 249260 }, { "epoch": 0.49660126864720133, "grad_norm": 0.1518465280532837, "learning_rate": 0.002, "loss": 2.5553, "step": 249270 }, { "epoch": 0.4966211908708402, "grad_norm": 0.16919849812984467, "learning_rate": 0.002, "loss": 2.5674, "step": 249280 }, { "epoch": 0.49664111309447917, "grad_norm": 0.20716263353824615, "learning_rate": 0.002, "loss": 2.5586, "step": 249290 }, { "epoch": 0.49666103531811806, "grad_norm": 0.14929050207138062, "learning_rate": 0.002, "loss": 2.5509, "step": 249300 }, { "epoch": 0.496680957541757, "grad_norm": 0.1757047474384308, "learning_rate": 0.002, "loss": 2.5657, "step": 249310 }, { "epoch": 0.4967008797653959, "grad_norm": 0.17205724120140076, "learning_rate": 0.002, "loss": 2.5604, "step": 249320 }, { "epoch": 0.4967208019890348, "grad_norm": 0.16494125127792358, "learning_rate": 0.002, "loss": 2.5496, "step": 249330 }, { "epoch": 0.49674072421267373, "grad_norm": 0.17995156347751617, "learning_rate": 0.002, "loss": 2.5509, "step": 249340 }, { "epoch": 0.4967606464363126, "grad_norm": 0.1488538682460785, "learning_rate": 0.002, "loss": 2.5677, "step": 249350 }, { "epoch": 0.49678056865995157, "grad_norm": 0.1530451774597168, "learning_rate": 0.002, "loss": 2.5631, "step": 249360 }, { "epoch": 0.49680049088359046, "grad_norm": 0.16020886600017548, "learning_rate": 0.002, "loss": 2.5441, "step": 249370 }, { "epoch": 0.49682041310722935, "grad_norm": 0.1998608261346817, "learning_rate": 0.002, "loss": 2.5592, "step": 249380 }, { "epoch": 0.4968403353308683, "grad_norm": 0.14047279953956604, "learning_rate": 0.002, "loss": 2.5564, "step": 249390 }, { "epoch": 0.4968602575545072, "grad_norm": 0.258676677942276, "learning_rate": 0.002, "loss": 2.5611, "step": 249400 }, { "epoch": 0.49688017977814614, "grad_norm": 0.15138334035873413, "learning_rate": 0.002, "loss": 2.5674, "step": 249410 }, { "epoch": 0.496900102001785, "grad_norm": 0.14403001964092255, "learning_rate": 0.002, "loss": 2.5643, "step": 249420 }, { "epoch": 0.4969200242254239, "grad_norm": 0.19409924745559692, "learning_rate": 0.002, "loss": 2.5658, "step": 249430 }, { "epoch": 0.49693994644906286, "grad_norm": 0.15983083844184875, "learning_rate": 0.002, "loss": 2.5585, "step": 249440 }, { "epoch": 0.49695986867270175, "grad_norm": 0.1882641315460205, "learning_rate": 0.002, "loss": 2.5606, "step": 249450 }, { "epoch": 0.4969797908963407, "grad_norm": 0.1732475608587265, "learning_rate": 0.002, "loss": 2.5599, "step": 249460 }, { "epoch": 0.4969997131199796, "grad_norm": 0.13978128135204315, "learning_rate": 0.002, "loss": 2.5666, "step": 249470 }, { "epoch": 0.49701963534361854, "grad_norm": 0.15779843926429749, "learning_rate": 0.002, "loss": 2.5493, "step": 249480 }, { "epoch": 0.49703955756725743, "grad_norm": 0.2015724778175354, "learning_rate": 0.002, "loss": 2.549, "step": 249490 }, { "epoch": 0.4970594797908963, "grad_norm": 0.18459831178188324, "learning_rate": 0.002, "loss": 2.5693, "step": 249500 }, { "epoch": 0.49707940201453527, "grad_norm": 0.147929385304451, "learning_rate": 0.002, "loss": 2.5549, "step": 249510 }, { "epoch": 0.49709932423817416, "grad_norm": 0.17934143543243408, "learning_rate": 0.002, "loss": 2.5448, "step": 249520 }, { "epoch": 0.4971192464618131, "grad_norm": 0.15603363513946533, "learning_rate": 0.002, "loss": 2.5525, "step": 249530 }, { "epoch": 0.497139168685452, "grad_norm": 0.18804243206977844, "learning_rate": 0.002, "loss": 2.5402, "step": 249540 }, { "epoch": 0.4971590909090909, "grad_norm": 0.18797124922275543, "learning_rate": 0.002, "loss": 2.5628, "step": 249550 }, { "epoch": 0.49717901313272983, "grad_norm": 0.16520850360393524, "learning_rate": 0.002, "loss": 2.5368, "step": 249560 }, { "epoch": 0.4971989353563687, "grad_norm": 0.19510149955749512, "learning_rate": 0.002, "loss": 2.5436, "step": 249570 }, { "epoch": 0.49721885758000767, "grad_norm": 0.16249780356884003, "learning_rate": 0.002, "loss": 2.5616, "step": 249580 }, { "epoch": 0.49723877980364656, "grad_norm": 0.15928278863430023, "learning_rate": 0.002, "loss": 2.5641, "step": 249590 }, { "epoch": 0.4972587020272855, "grad_norm": 0.14747513830661774, "learning_rate": 0.002, "loss": 2.5629, "step": 249600 }, { "epoch": 0.4972786242509244, "grad_norm": 0.17824532091617584, "learning_rate": 0.002, "loss": 2.5563, "step": 249610 }, { "epoch": 0.4972985464745633, "grad_norm": 0.1756504774093628, "learning_rate": 0.002, "loss": 2.5458, "step": 249620 }, { "epoch": 0.49731846869820223, "grad_norm": 0.15399155020713806, "learning_rate": 0.002, "loss": 2.5636, "step": 249630 }, { "epoch": 0.4973383909218411, "grad_norm": 0.1738782674074173, "learning_rate": 0.002, "loss": 2.5454, "step": 249640 }, { "epoch": 0.49735831314548007, "grad_norm": 0.14416486024856567, "learning_rate": 0.002, "loss": 2.5469, "step": 249650 }, { "epoch": 0.49737823536911896, "grad_norm": 0.19734875857830048, "learning_rate": 0.002, "loss": 2.5502, "step": 249660 }, { "epoch": 0.49739815759275785, "grad_norm": 0.18916207551956177, "learning_rate": 0.002, "loss": 2.5627, "step": 249670 }, { "epoch": 0.4974180798163968, "grad_norm": 0.17972369492053986, "learning_rate": 0.002, "loss": 2.5639, "step": 249680 }, { "epoch": 0.4974380020400357, "grad_norm": 0.16459907591342926, "learning_rate": 0.002, "loss": 2.557, "step": 249690 }, { "epoch": 0.49745792426367463, "grad_norm": 0.18868392705917358, "learning_rate": 0.002, "loss": 2.562, "step": 249700 }, { "epoch": 0.4974778464873135, "grad_norm": 0.18082182109355927, "learning_rate": 0.002, "loss": 2.5504, "step": 249710 }, { "epoch": 0.4974977687109524, "grad_norm": 0.15832680463790894, "learning_rate": 0.002, "loss": 2.5523, "step": 249720 }, { "epoch": 0.49751769093459136, "grad_norm": 0.13908202946186066, "learning_rate": 0.002, "loss": 2.5509, "step": 249730 }, { "epoch": 0.49753761315823025, "grad_norm": 0.17225515842437744, "learning_rate": 0.002, "loss": 2.5499, "step": 249740 }, { "epoch": 0.4975575353818692, "grad_norm": 0.15948574244976044, "learning_rate": 0.002, "loss": 2.5758, "step": 249750 }, { "epoch": 0.4975774576055081, "grad_norm": 0.1881287544965744, "learning_rate": 0.002, "loss": 2.5594, "step": 249760 }, { "epoch": 0.49759737982914704, "grad_norm": 0.1906002163887024, "learning_rate": 0.002, "loss": 2.5658, "step": 249770 }, { "epoch": 0.4976173020527859, "grad_norm": 0.14268361032009125, "learning_rate": 0.002, "loss": 2.5637, "step": 249780 }, { "epoch": 0.4976372242764248, "grad_norm": 0.15368273854255676, "learning_rate": 0.002, "loss": 2.5624, "step": 249790 }, { "epoch": 0.49765714650006376, "grad_norm": 0.1874203383922577, "learning_rate": 0.002, "loss": 2.5569, "step": 249800 }, { "epoch": 0.49767706872370265, "grad_norm": 0.15106423199176788, "learning_rate": 0.002, "loss": 2.5438, "step": 249810 }, { "epoch": 0.4976969909473416, "grad_norm": 0.15681149065494537, "learning_rate": 0.002, "loss": 2.5592, "step": 249820 }, { "epoch": 0.4977169131709805, "grad_norm": 0.19121499359607697, "learning_rate": 0.002, "loss": 2.5499, "step": 249830 }, { "epoch": 0.4977368353946194, "grad_norm": 0.16908830404281616, "learning_rate": 0.002, "loss": 2.5398, "step": 249840 }, { "epoch": 0.4977567576182583, "grad_norm": 0.1643812209367752, "learning_rate": 0.002, "loss": 2.5492, "step": 249850 }, { "epoch": 0.4977766798418972, "grad_norm": 0.14150160551071167, "learning_rate": 0.002, "loss": 2.5572, "step": 249860 }, { "epoch": 0.49779660206553616, "grad_norm": 0.1687692254781723, "learning_rate": 0.002, "loss": 2.5759, "step": 249870 }, { "epoch": 0.49781652428917506, "grad_norm": 0.16977527737617493, "learning_rate": 0.002, "loss": 2.5664, "step": 249880 }, { "epoch": 0.497836446512814, "grad_norm": 0.16722747683525085, "learning_rate": 0.002, "loss": 2.564, "step": 249890 }, { "epoch": 0.4978563687364529, "grad_norm": 0.19548916816711426, "learning_rate": 0.002, "loss": 2.5615, "step": 249900 }, { "epoch": 0.4978762909600918, "grad_norm": 0.15412133932113647, "learning_rate": 0.002, "loss": 2.5585, "step": 249910 }, { "epoch": 0.49789621318373073, "grad_norm": 0.15649129450321198, "learning_rate": 0.002, "loss": 2.5514, "step": 249920 }, { "epoch": 0.4979161354073696, "grad_norm": 0.17224711179733276, "learning_rate": 0.002, "loss": 2.5582, "step": 249930 }, { "epoch": 0.49793605763100857, "grad_norm": 0.15223060548305511, "learning_rate": 0.002, "loss": 2.5584, "step": 249940 }, { "epoch": 0.49795597985464746, "grad_norm": 0.18102581799030304, "learning_rate": 0.002, "loss": 2.5577, "step": 249950 }, { "epoch": 0.49797590207828635, "grad_norm": 0.1606530249118805, "learning_rate": 0.002, "loss": 2.561, "step": 249960 }, { "epoch": 0.4979958243019253, "grad_norm": 0.22671030461788177, "learning_rate": 0.002, "loss": 2.5726, "step": 249970 }, { "epoch": 0.4980157465255642, "grad_norm": 0.14367519319057465, "learning_rate": 0.002, "loss": 2.5381, "step": 249980 }, { "epoch": 0.49803566874920313, "grad_norm": 0.15027236938476562, "learning_rate": 0.002, "loss": 2.5745, "step": 249990 }, { "epoch": 0.498055590972842, "grad_norm": 0.16157013177871704, "learning_rate": 0.002, "loss": 2.5697, "step": 250000 }, { "epoch": 0.4980755131964809, "grad_norm": 0.1823481172323227, "learning_rate": 0.002, "loss": 2.5655, "step": 250010 }, { "epoch": 0.49809543542011986, "grad_norm": 0.14843560755252838, "learning_rate": 0.002, "loss": 2.5623, "step": 250020 }, { "epoch": 0.49811535764375875, "grad_norm": 0.2330763190984726, "learning_rate": 0.002, "loss": 2.546, "step": 250030 }, { "epoch": 0.4981352798673977, "grad_norm": 0.1539386510848999, "learning_rate": 0.002, "loss": 2.5537, "step": 250040 }, { "epoch": 0.4981552020910366, "grad_norm": 0.14437180757522583, "learning_rate": 0.002, "loss": 2.5624, "step": 250050 }, { "epoch": 0.49817512431467553, "grad_norm": 0.18796753883361816, "learning_rate": 0.002, "loss": 2.5371, "step": 250060 }, { "epoch": 0.4981950465383144, "grad_norm": 0.1808117777109146, "learning_rate": 0.002, "loss": 2.5606, "step": 250070 }, { "epoch": 0.4982149687619533, "grad_norm": 0.17130811512470245, "learning_rate": 0.002, "loss": 2.5584, "step": 250080 }, { "epoch": 0.49823489098559226, "grad_norm": 0.14650608599185944, "learning_rate": 0.002, "loss": 2.5641, "step": 250090 }, { "epoch": 0.49825481320923115, "grad_norm": 0.1620267927646637, "learning_rate": 0.002, "loss": 2.5573, "step": 250100 }, { "epoch": 0.4982747354328701, "grad_norm": 0.1787964105606079, "learning_rate": 0.002, "loss": 2.5597, "step": 250110 }, { "epoch": 0.498294657656509, "grad_norm": 0.1737828254699707, "learning_rate": 0.002, "loss": 2.5503, "step": 250120 }, { "epoch": 0.4983145798801479, "grad_norm": 0.16317886114120483, "learning_rate": 0.002, "loss": 2.554, "step": 250130 }, { "epoch": 0.4983345021037868, "grad_norm": 0.17985236644744873, "learning_rate": 0.002, "loss": 2.5601, "step": 250140 }, { "epoch": 0.4983544243274257, "grad_norm": 0.1517058163881302, "learning_rate": 0.002, "loss": 2.5487, "step": 250150 }, { "epoch": 0.49837434655106466, "grad_norm": 0.1784961223602295, "learning_rate": 0.002, "loss": 2.5578, "step": 250160 }, { "epoch": 0.49839426877470355, "grad_norm": 0.17101049423217773, "learning_rate": 0.002, "loss": 2.5503, "step": 250170 }, { "epoch": 0.49841419099834244, "grad_norm": 0.19040162861347198, "learning_rate": 0.002, "loss": 2.5532, "step": 250180 }, { "epoch": 0.4984341132219814, "grad_norm": 0.16645681858062744, "learning_rate": 0.002, "loss": 2.5578, "step": 250190 }, { "epoch": 0.4984540354456203, "grad_norm": 0.18993529677391052, "learning_rate": 0.002, "loss": 2.5668, "step": 250200 }, { "epoch": 0.4984739576692592, "grad_norm": 0.15232203900814056, "learning_rate": 0.002, "loss": 2.5635, "step": 250210 }, { "epoch": 0.4984938798928981, "grad_norm": 0.167339026927948, "learning_rate": 0.002, "loss": 2.5716, "step": 250220 }, { "epoch": 0.49851380211653706, "grad_norm": 0.16817623376846313, "learning_rate": 0.002, "loss": 2.568, "step": 250230 }, { "epoch": 0.49853372434017595, "grad_norm": 0.18144385516643524, "learning_rate": 0.002, "loss": 2.5777, "step": 250240 }, { "epoch": 0.49855364656381485, "grad_norm": 0.14309024810791016, "learning_rate": 0.002, "loss": 2.569, "step": 250250 }, { "epoch": 0.4985735687874538, "grad_norm": 0.21601758897304535, "learning_rate": 0.002, "loss": 2.5607, "step": 250260 }, { "epoch": 0.4985934910110927, "grad_norm": 0.16618965566158295, "learning_rate": 0.002, "loss": 2.5644, "step": 250270 }, { "epoch": 0.49861341323473163, "grad_norm": 0.1731763482093811, "learning_rate": 0.002, "loss": 2.568, "step": 250280 }, { "epoch": 0.4986333354583705, "grad_norm": 0.1593797355890274, "learning_rate": 0.002, "loss": 2.5625, "step": 250290 }, { "epoch": 0.4986532576820094, "grad_norm": 0.19003459811210632, "learning_rate": 0.002, "loss": 2.5724, "step": 250300 }, { "epoch": 0.49867317990564836, "grad_norm": 0.15567544102668762, "learning_rate": 0.002, "loss": 2.5603, "step": 250310 }, { "epoch": 0.49869310212928725, "grad_norm": 0.16819307208061218, "learning_rate": 0.002, "loss": 2.5511, "step": 250320 }, { "epoch": 0.4987130243529262, "grad_norm": 0.14326104521751404, "learning_rate": 0.002, "loss": 2.5664, "step": 250330 }, { "epoch": 0.4987329465765651, "grad_norm": 0.18064291775226593, "learning_rate": 0.002, "loss": 2.5505, "step": 250340 }, { "epoch": 0.49875286880020403, "grad_norm": 0.18501222133636475, "learning_rate": 0.002, "loss": 2.5589, "step": 250350 }, { "epoch": 0.4987727910238429, "grad_norm": 0.19446054100990295, "learning_rate": 0.002, "loss": 2.5519, "step": 250360 }, { "epoch": 0.4987927132474818, "grad_norm": 0.1557685285806656, "learning_rate": 0.002, "loss": 2.5657, "step": 250370 }, { "epoch": 0.49881263547112076, "grad_norm": 0.15881779789924622, "learning_rate": 0.002, "loss": 2.5589, "step": 250380 }, { "epoch": 0.49883255769475965, "grad_norm": 0.16132718324661255, "learning_rate": 0.002, "loss": 2.5439, "step": 250390 }, { "epoch": 0.4988524799183986, "grad_norm": 0.17338915169239044, "learning_rate": 0.002, "loss": 2.5518, "step": 250400 }, { "epoch": 0.4988724021420375, "grad_norm": 0.17852488160133362, "learning_rate": 0.002, "loss": 2.5663, "step": 250410 }, { "epoch": 0.4988923243656764, "grad_norm": 0.19028234481811523, "learning_rate": 0.002, "loss": 2.5608, "step": 250420 }, { "epoch": 0.4989122465893153, "grad_norm": 0.17672035098075867, "learning_rate": 0.002, "loss": 2.5481, "step": 250430 }, { "epoch": 0.4989321688129542, "grad_norm": 0.15568868815898895, "learning_rate": 0.002, "loss": 2.5645, "step": 250440 }, { "epoch": 0.49895209103659316, "grad_norm": 0.15094321966171265, "learning_rate": 0.002, "loss": 2.562, "step": 250450 }, { "epoch": 0.49897201326023205, "grad_norm": 0.18928012251853943, "learning_rate": 0.002, "loss": 2.5677, "step": 250460 }, { "epoch": 0.49899193548387094, "grad_norm": 0.16466526687145233, "learning_rate": 0.002, "loss": 2.5705, "step": 250470 }, { "epoch": 0.4990118577075099, "grad_norm": 0.19273518025875092, "learning_rate": 0.002, "loss": 2.5538, "step": 250480 }, { "epoch": 0.4990317799311488, "grad_norm": 0.19617494940757751, "learning_rate": 0.002, "loss": 2.5549, "step": 250490 }, { "epoch": 0.4990517021547877, "grad_norm": 0.16518081724643707, "learning_rate": 0.002, "loss": 2.5748, "step": 250500 }, { "epoch": 0.4990716243784266, "grad_norm": 0.1609056442975998, "learning_rate": 0.002, "loss": 2.552, "step": 250510 }, { "epoch": 0.49909154660206556, "grad_norm": 0.16634772717952728, "learning_rate": 0.002, "loss": 2.5678, "step": 250520 }, { "epoch": 0.49911146882570445, "grad_norm": 0.1566299945116043, "learning_rate": 0.002, "loss": 2.5534, "step": 250530 }, { "epoch": 0.49913139104934334, "grad_norm": 0.17863468825817108, "learning_rate": 0.002, "loss": 2.5577, "step": 250540 }, { "epoch": 0.4991513132729823, "grad_norm": 0.17117533087730408, "learning_rate": 0.002, "loss": 2.5564, "step": 250550 }, { "epoch": 0.4991712354966212, "grad_norm": 0.15738606452941895, "learning_rate": 0.002, "loss": 2.575, "step": 250560 }, { "epoch": 0.4991911577202601, "grad_norm": 0.16368995606899261, "learning_rate": 0.002, "loss": 2.558, "step": 250570 }, { "epoch": 0.499211079943899, "grad_norm": 0.17690247297286987, "learning_rate": 0.002, "loss": 2.5563, "step": 250580 }, { "epoch": 0.4992310021675379, "grad_norm": 0.185204416513443, "learning_rate": 0.002, "loss": 2.5585, "step": 250590 }, { "epoch": 0.49925092439117685, "grad_norm": 0.15638242661952972, "learning_rate": 0.002, "loss": 2.5579, "step": 250600 }, { "epoch": 0.49927084661481574, "grad_norm": 0.1815185546875, "learning_rate": 0.002, "loss": 2.572, "step": 250610 }, { "epoch": 0.4992907688384547, "grad_norm": 0.16188086569309235, "learning_rate": 0.002, "loss": 2.5597, "step": 250620 }, { "epoch": 0.4993106910620936, "grad_norm": 0.1715361475944519, "learning_rate": 0.002, "loss": 2.5548, "step": 250630 }, { "epoch": 0.4993306132857325, "grad_norm": 0.18439996242523193, "learning_rate": 0.002, "loss": 2.5544, "step": 250640 }, { "epoch": 0.4993505355093714, "grad_norm": 0.20029141008853912, "learning_rate": 0.002, "loss": 2.5554, "step": 250650 }, { "epoch": 0.4993704577330103, "grad_norm": 0.17962899804115295, "learning_rate": 0.002, "loss": 2.5695, "step": 250660 }, { "epoch": 0.49939037995664926, "grad_norm": 0.170039102435112, "learning_rate": 0.002, "loss": 2.5495, "step": 250670 }, { "epoch": 0.49941030218028815, "grad_norm": 0.1945362538099289, "learning_rate": 0.002, "loss": 2.5805, "step": 250680 }, { "epoch": 0.4994302244039271, "grad_norm": 0.14043299853801727, "learning_rate": 0.002, "loss": 2.5519, "step": 250690 }, { "epoch": 0.499450146627566, "grad_norm": 0.1902664452791214, "learning_rate": 0.002, "loss": 2.5611, "step": 250700 }, { "epoch": 0.4994700688512049, "grad_norm": 0.15520460903644562, "learning_rate": 0.002, "loss": 2.551, "step": 250710 }, { "epoch": 0.4994899910748438, "grad_norm": 0.1624802201986313, "learning_rate": 0.002, "loss": 2.5505, "step": 250720 }, { "epoch": 0.4995099132984827, "grad_norm": 0.19161954522132874, "learning_rate": 0.002, "loss": 2.5476, "step": 250730 }, { "epoch": 0.49952983552212166, "grad_norm": 0.15469567477703094, "learning_rate": 0.002, "loss": 2.5661, "step": 250740 }, { "epoch": 0.49954975774576055, "grad_norm": 0.19880706071853638, "learning_rate": 0.002, "loss": 2.5638, "step": 250750 }, { "epoch": 0.49956967996939944, "grad_norm": 0.16824780404567719, "learning_rate": 0.002, "loss": 2.5652, "step": 250760 }, { "epoch": 0.4995896021930384, "grad_norm": 0.18440374732017517, "learning_rate": 0.002, "loss": 2.5563, "step": 250770 }, { "epoch": 0.4996095244166773, "grad_norm": 0.12784704566001892, "learning_rate": 0.002, "loss": 2.5577, "step": 250780 }, { "epoch": 0.4996294466403162, "grad_norm": 0.1850043386220932, "learning_rate": 0.002, "loss": 2.5656, "step": 250790 }, { "epoch": 0.4996493688639551, "grad_norm": 0.20028063654899597, "learning_rate": 0.002, "loss": 2.5648, "step": 250800 }, { "epoch": 0.49966929108759406, "grad_norm": 0.1758841872215271, "learning_rate": 0.002, "loss": 2.5601, "step": 250810 }, { "epoch": 0.49968921331123295, "grad_norm": 0.15529951453208923, "learning_rate": 0.002, "loss": 2.5398, "step": 250820 }, { "epoch": 0.49970913553487184, "grad_norm": 0.15265819430351257, "learning_rate": 0.002, "loss": 2.5493, "step": 250830 }, { "epoch": 0.4997290577585108, "grad_norm": 0.20557908713817596, "learning_rate": 0.002, "loss": 2.5518, "step": 250840 }, { "epoch": 0.4997489799821497, "grad_norm": 0.16829821467399597, "learning_rate": 0.002, "loss": 2.5551, "step": 250850 }, { "epoch": 0.4997689022057886, "grad_norm": 0.18313734233379364, "learning_rate": 0.002, "loss": 2.5533, "step": 250860 }, { "epoch": 0.4997888244294275, "grad_norm": 0.1724478304386139, "learning_rate": 0.002, "loss": 2.5486, "step": 250870 }, { "epoch": 0.4998087466530664, "grad_norm": 0.18339109420776367, "learning_rate": 0.002, "loss": 2.5475, "step": 250880 }, { "epoch": 0.49982866887670535, "grad_norm": 0.13908688724040985, "learning_rate": 0.002, "loss": 2.5774, "step": 250890 }, { "epoch": 0.49984859110034424, "grad_norm": 0.16729213297367096, "learning_rate": 0.002, "loss": 2.5599, "step": 250900 }, { "epoch": 0.4998685133239832, "grad_norm": 0.16385871171951294, "learning_rate": 0.002, "loss": 2.559, "step": 250910 }, { "epoch": 0.4998884355476221, "grad_norm": 0.15970854461193085, "learning_rate": 0.002, "loss": 2.5546, "step": 250920 }, { "epoch": 0.49990835777126097, "grad_norm": 0.17850418388843536, "learning_rate": 0.002, "loss": 2.554, "step": 250930 }, { "epoch": 0.4999282799948999, "grad_norm": 0.192815363407135, "learning_rate": 0.002, "loss": 2.5635, "step": 250940 }, { "epoch": 0.4999482022185388, "grad_norm": 0.15474990010261536, "learning_rate": 0.002, "loss": 2.561, "step": 250950 }, { "epoch": 0.49996812444217775, "grad_norm": 0.21517038345336914, "learning_rate": 0.002, "loss": 2.5586, "step": 250960 }, { "epoch": 0.49998804666581664, "grad_norm": 0.16058601438999176, "learning_rate": 0.002, "loss": 2.5538, "step": 250970 }, { "epoch": 0.5000079688894555, "grad_norm": 0.1669950932264328, "learning_rate": 0.002, "loss": 2.5494, "step": 250980 }, { "epoch": 0.5000278911130944, "grad_norm": 0.15575355291366577, "learning_rate": 0.002, "loss": 2.5518, "step": 250990 }, { "epoch": 0.5000478133367334, "grad_norm": 0.1753561794757843, "learning_rate": 0.002, "loss": 2.5745, "step": 251000 }, { "epoch": 0.5000677355603723, "grad_norm": 0.20956921577453613, "learning_rate": 0.002, "loss": 2.5574, "step": 251010 }, { "epoch": 0.5000876577840112, "grad_norm": 0.14635920524597168, "learning_rate": 0.002, "loss": 2.5597, "step": 251020 }, { "epoch": 0.5001075800076501, "grad_norm": 0.1498507261276245, "learning_rate": 0.002, "loss": 2.5367, "step": 251030 }, { "epoch": 0.5001275022312891, "grad_norm": 0.18564389646053314, "learning_rate": 0.002, "loss": 2.5637, "step": 251040 }, { "epoch": 0.500147424454928, "grad_norm": 0.14080363512039185, "learning_rate": 0.002, "loss": 2.5722, "step": 251050 }, { "epoch": 0.5001673466785669, "grad_norm": 0.16357147693634033, "learning_rate": 0.002, "loss": 2.5497, "step": 251060 }, { "epoch": 0.5001872689022058, "grad_norm": 0.1934603750705719, "learning_rate": 0.002, "loss": 2.5704, "step": 251070 }, { "epoch": 0.5002071911258447, "grad_norm": 0.1472480446100235, "learning_rate": 0.002, "loss": 2.548, "step": 251080 }, { "epoch": 0.5002271133494837, "grad_norm": 0.19993984699249268, "learning_rate": 0.002, "loss": 2.5671, "step": 251090 }, { "epoch": 0.5002470355731226, "grad_norm": 0.17543873190879822, "learning_rate": 0.002, "loss": 2.5769, "step": 251100 }, { "epoch": 0.5002669577967614, "grad_norm": 0.2219657301902771, "learning_rate": 0.002, "loss": 2.545, "step": 251110 }, { "epoch": 0.5002868800204003, "grad_norm": 0.18579880893230438, "learning_rate": 0.002, "loss": 2.5728, "step": 251120 }, { "epoch": 0.5003068022440392, "grad_norm": 0.44336867332458496, "learning_rate": 0.002, "loss": 2.5525, "step": 251130 }, { "epoch": 0.5003267244676782, "grad_norm": 0.1679714471101761, "learning_rate": 0.002, "loss": 2.5703, "step": 251140 }, { "epoch": 0.5003466466913171, "grad_norm": 0.19265811145305634, "learning_rate": 0.002, "loss": 2.5666, "step": 251150 }, { "epoch": 0.500366568914956, "grad_norm": 0.17283286154270172, "learning_rate": 0.002, "loss": 2.5584, "step": 251160 }, { "epoch": 0.5003864911385949, "grad_norm": 0.14801129698753357, "learning_rate": 0.002, "loss": 2.5597, "step": 251170 }, { "epoch": 0.5004064133622338, "grad_norm": 0.15954731404781342, "learning_rate": 0.002, "loss": 2.5526, "step": 251180 }, { "epoch": 0.5004263355858728, "grad_norm": 0.14940467476844788, "learning_rate": 0.002, "loss": 2.5548, "step": 251190 }, { "epoch": 0.5004462578095117, "grad_norm": 0.20126911997795105, "learning_rate": 0.002, "loss": 2.5453, "step": 251200 }, { "epoch": 0.5004661800331506, "grad_norm": 0.15481534600257874, "learning_rate": 0.002, "loss": 2.5521, "step": 251210 }, { "epoch": 0.5004861022567895, "grad_norm": 0.20680609345436096, "learning_rate": 0.002, "loss": 2.5575, "step": 251220 }, { "epoch": 0.5005060244804284, "grad_norm": 0.16310854256153107, "learning_rate": 0.002, "loss": 2.5628, "step": 251230 }, { "epoch": 0.5005259467040674, "grad_norm": 0.17981833219528198, "learning_rate": 0.002, "loss": 2.5458, "step": 251240 }, { "epoch": 0.5005458689277063, "grad_norm": 0.16708557307720184, "learning_rate": 0.002, "loss": 2.56, "step": 251250 }, { "epoch": 0.5005657911513451, "grad_norm": 0.22430770099163055, "learning_rate": 0.002, "loss": 2.564, "step": 251260 }, { "epoch": 0.500585713374984, "grad_norm": 0.1830308735370636, "learning_rate": 0.002, "loss": 2.5624, "step": 251270 }, { "epoch": 0.5006056355986229, "grad_norm": 0.1743297576904297, "learning_rate": 0.002, "loss": 2.5438, "step": 251280 }, { "epoch": 0.5006255578222619, "grad_norm": 0.1785021871328354, "learning_rate": 0.002, "loss": 2.5507, "step": 251290 }, { "epoch": 0.5006454800459008, "grad_norm": 0.1957801729440689, "learning_rate": 0.002, "loss": 2.5496, "step": 251300 }, { "epoch": 0.5006654022695397, "grad_norm": 0.1546156406402588, "learning_rate": 0.002, "loss": 2.5524, "step": 251310 }, { "epoch": 0.5006853244931786, "grad_norm": 0.15234562754631042, "learning_rate": 0.002, "loss": 2.5736, "step": 251320 }, { "epoch": 0.5007052467168176, "grad_norm": 0.1591183990240097, "learning_rate": 0.002, "loss": 2.5492, "step": 251330 }, { "epoch": 0.5007251689404565, "grad_norm": 0.21013350784778595, "learning_rate": 0.002, "loss": 2.5645, "step": 251340 }, { "epoch": 0.5007450911640954, "grad_norm": 0.17829212546348572, "learning_rate": 0.002, "loss": 2.5516, "step": 251350 }, { "epoch": 0.5007650133877343, "grad_norm": 0.16902197897434235, "learning_rate": 0.002, "loss": 2.5619, "step": 251360 }, { "epoch": 0.5007849356113732, "grad_norm": 0.15264379978179932, "learning_rate": 0.002, "loss": 2.5735, "step": 251370 }, { "epoch": 0.5008048578350122, "grad_norm": 0.1579328328371048, "learning_rate": 0.002, "loss": 2.5545, "step": 251380 }, { "epoch": 0.500824780058651, "grad_norm": 0.159034863114357, "learning_rate": 0.002, "loss": 2.563, "step": 251390 }, { "epoch": 0.5008447022822899, "grad_norm": 0.16377189755439758, "learning_rate": 0.002, "loss": 2.5734, "step": 251400 }, { "epoch": 0.5008646245059288, "grad_norm": 0.1406628042459488, "learning_rate": 0.002, "loss": 2.5429, "step": 251410 }, { "epoch": 0.5008845467295677, "grad_norm": 0.20382073521614075, "learning_rate": 0.002, "loss": 2.5558, "step": 251420 }, { "epoch": 0.5009044689532067, "grad_norm": 0.16820450127124786, "learning_rate": 0.002, "loss": 2.5477, "step": 251430 }, { "epoch": 0.5009243911768456, "grad_norm": 0.18565434217453003, "learning_rate": 0.002, "loss": 2.5588, "step": 251440 }, { "epoch": 0.5009443134004845, "grad_norm": 0.14179398119449615, "learning_rate": 0.002, "loss": 2.5552, "step": 251450 }, { "epoch": 0.5009642356241234, "grad_norm": 0.1797056645154953, "learning_rate": 0.002, "loss": 2.55, "step": 251460 }, { "epoch": 0.5009841578477623, "grad_norm": 0.16162529587745667, "learning_rate": 0.002, "loss": 2.5516, "step": 251470 }, { "epoch": 0.5010040800714013, "grad_norm": 0.19131121039390564, "learning_rate": 0.002, "loss": 2.5386, "step": 251480 }, { "epoch": 0.5010240022950402, "grad_norm": 0.1578371226787567, "learning_rate": 0.002, "loss": 2.5443, "step": 251490 }, { "epoch": 0.5010439245186791, "grad_norm": 0.1558881551027298, "learning_rate": 0.002, "loss": 2.5463, "step": 251500 }, { "epoch": 0.501063846742318, "grad_norm": 0.1859809309244156, "learning_rate": 0.002, "loss": 2.5575, "step": 251510 }, { "epoch": 0.5010837689659569, "grad_norm": 0.15324106812477112, "learning_rate": 0.002, "loss": 2.5502, "step": 251520 }, { "epoch": 0.5011036911895959, "grad_norm": 0.20317591726779938, "learning_rate": 0.002, "loss": 2.5646, "step": 251530 }, { "epoch": 0.5011236134132347, "grad_norm": 0.16924121975898743, "learning_rate": 0.002, "loss": 2.5474, "step": 251540 }, { "epoch": 0.5011435356368736, "grad_norm": 0.15735018253326416, "learning_rate": 0.002, "loss": 2.5505, "step": 251550 }, { "epoch": 0.5011634578605125, "grad_norm": 0.16857264935970306, "learning_rate": 0.002, "loss": 2.5515, "step": 251560 }, { "epoch": 0.5011833800841514, "grad_norm": 0.17638620734214783, "learning_rate": 0.002, "loss": 2.5515, "step": 251570 }, { "epoch": 0.5012033023077904, "grad_norm": 0.1770339161157608, "learning_rate": 0.002, "loss": 2.537, "step": 251580 }, { "epoch": 0.5012232245314293, "grad_norm": 0.17903129756450653, "learning_rate": 0.002, "loss": 2.5524, "step": 251590 }, { "epoch": 0.5012431467550682, "grad_norm": 0.15157924592494965, "learning_rate": 0.002, "loss": 2.5423, "step": 251600 }, { "epoch": 0.5012630689787071, "grad_norm": 0.1668206751346588, "learning_rate": 0.002, "loss": 2.5615, "step": 251610 }, { "epoch": 0.5012829912023461, "grad_norm": 0.1583307981491089, "learning_rate": 0.002, "loss": 2.5467, "step": 251620 }, { "epoch": 0.501302913425985, "grad_norm": 0.16029846668243408, "learning_rate": 0.002, "loss": 2.561, "step": 251630 }, { "epoch": 0.5013228356496239, "grad_norm": 0.19560405611991882, "learning_rate": 0.002, "loss": 2.5625, "step": 251640 }, { "epoch": 0.5013427578732628, "grad_norm": 0.16574987769126892, "learning_rate": 0.002, "loss": 2.5628, "step": 251650 }, { "epoch": 0.5013626800969017, "grad_norm": 0.1707693338394165, "learning_rate": 0.002, "loss": 2.5602, "step": 251660 }, { "epoch": 0.5013826023205407, "grad_norm": 0.15833239257335663, "learning_rate": 0.002, "loss": 2.5601, "step": 251670 }, { "epoch": 0.5014025245441796, "grad_norm": 0.15902279317378998, "learning_rate": 0.002, "loss": 2.5602, "step": 251680 }, { "epoch": 0.5014224467678184, "grad_norm": 0.1845516711473465, "learning_rate": 0.002, "loss": 2.5544, "step": 251690 }, { "epoch": 0.5014423689914573, "grad_norm": 0.20028774440288544, "learning_rate": 0.002, "loss": 2.5535, "step": 251700 }, { "epoch": 0.5014622912150962, "grad_norm": 0.1561126708984375, "learning_rate": 0.002, "loss": 2.5492, "step": 251710 }, { "epoch": 0.5014822134387352, "grad_norm": 0.20880186557769775, "learning_rate": 0.002, "loss": 2.558, "step": 251720 }, { "epoch": 0.5015021356623741, "grad_norm": 0.19194550812244415, "learning_rate": 0.002, "loss": 2.567, "step": 251730 }, { "epoch": 0.501522057886013, "grad_norm": 0.1771078109741211, "learning_rate": 0.002, "loss": 2.5612, "step": 251740 }, { "epoch": 0.5015419801096519, "grad_norm": 0.18537987768650055, "learning_rate": 0.002, "loss": 2.5677, "step": 251750 }, { "epoch": 0.5015619023332908, "grad_norm": 0.1785195767879486, "learning_rate": 0.002, "loss": 2.5633, "step": 251760 }, { "epoch": 0.5015818245569298, "grad_norm": 0.1588030755519867, "learning_rate": 0.002, "loss": 2.5399, "step": 251770 }, { "epoch": 0.5016017467805687, "grad_norm": 0.21561171114444733, "learning_rate": 0.002, "loss": 2.5477, "step": 251780 }, { "epoch": 0.5016216690042076, "grad_norm": 0.158696249127388, "learning_rate": 0.002, "loss": 2.571, "step": 251790 }, { "epoch": 0.5016415912278465, "grad_norm": 0.15633559226989746, "learning_rate": 0.002, "loss": 2.5551, "step": 251800 }, { "epoch": 0.5016615134514854, "grad_norm": 0.17343565821647644, "learning_rate": 0.002, "loss": 2.5429, "step": 251810 }, { "epoch": 0.5016814356751244, "grad_norm": 0.16774988174438477, "learning_rate": 0.002, "loss": 2.5573, "step": 251820 }, { "epoch": 0.5017013578987632, "grad_norm": 0.16775017976760864, "learning_rate": 0.002, "loss": 2.5674, "step": 251830 }, { "epoch": 0.5017212801224021, "grad_norm": 0.14035873115062714, "learning_rate": 0.002, "loss": 2.5536, "step": 251840 }, { "epoch": 0.501741202346041, "grad_norm": 0.19871051609516144, "learning_rate": 0.002, "loss": 2.567, "step": 251850 }, { "epoch": 0.5017611245696799, "grad_norm": 0.16876371204853058, "learning_rate": 0.002, "loss": 2.5441, "step": 251860 }, { "epoch": 0.5017810467933189, "grad_norm": 0.15662942826747894, "learning_rate": 0.002, "loss": 2.5481, "step": 251870 }, { "epoch": 0.5018009690169578, "grad_norm": 0.1526312232017517, "learning_rate": 0.002, "loss": 2.5647, "step": 251880 }, { "epoch": 0.5018208912405967, "grad_norm": 0.18536944687366486, "learning_rate": 0.002, "loss": 2.5295, "step": 251890 }, { "epoch": 0.5018408134642356, "grad_norm": 0.15232625603675842, "learning_rate": 0.002, "loss": 2.5531, "step": 251900 }, { "epoch": 0.5018607356878746, "grad_norm": 0.15002568066120148, "learning_rate": 0.002, "loss": 2.5348, "step": 251910 }, { "epoch": 0.5018806579115135, "grad_norm": 0.1653319150209427, "learning_rate": 0.002, "loss": 2.5694, "step": 251920 }, { "epoch": 0.5019005801351524, "grad_norm": 0.1569400131702423, "learning_rate": 0.002, "loss": 2.551, "step": 251930 }, { "epoch": 0.5019205023587913, "grad_norm": 0.17466126382350922, "learning_rate": 0.002, "loss": 2.552, "step": 251940 }, { "epoch": 0.5019404245824302, "grad_norm": 0.15467670559883118, "learning_rate": 0.002, "loss": 2.559, "step": 251950 }, { "epoch": 0.5019603468060692, "grad_norm": 0.16430361568927765, "learning_rate": 0.002, "loss": 2.569, "step": 251960 }, { "epoch": 0.501980269029708, "grad_norm": 0.16550998389720917, "learning_rate": 0.002, "loss": 2.5586, "step": 251970 }, { "epoch": 0.5020001912533469, "grad_norm": 0.1590609848499298, "learning_rate": 0.002, "loss": 2.5613, "step": 251980 }, { "epoch": 0.5020201134769858, "grad_norm": 0.1810707151889801, "learning_rate": 0.002, "loss": 2.5549, "step": 251990 }, { "epoch": 0.5020400357006247, "grad_norm": 0.17907610535621643, "learning_rate": 0.002, "loss": 2.5566, "step": 252000 }, { "epoch": 0.5020599579242637, "grad_norm": 0.15813395380973816, "learning_rate": 0.002, "loss": 2.5637, "step": 252010 }, { "epoch": 0.5020798801479026, "grad_norm": 0.16076740622520447, "learning_rate": 0.002, "loss": 2.5482, "step": 252020 }, { "epoch": 0.5020998023715415, "grad_norm": 0.169725701212883, "learning_rate": 0.002, "loss": 2.5501, "step": 252030 }, { "epoch": 0.5021197245951804, "grad_norm": 0.1542029082775116, "learning_rate": 0.002, "loss": 2.564, "step": 252040 }, { "epoch": 0.5021396468188193, "grad_norm": 0.1652536541223526, "learning_rate": 0.002, "loss": 2.5674, "step": 252050 }, { "epoch": 0.5021595690424583, "grad_norm": 0.17611061036586761, "learning_rate": 0.002, "loss": 2.5643, "step": 252060 }, { "epoch": 0.5021794912660972, "grad_norm": 0.16638264060020447, "learning_rate": 0.002, "loss": 2.5567, "step": 252070 }, { "epoch": 0.5021994134897361, "grad_norm": 0.1837136447429657, "learning_rate": 0.002, "loss": 2.5523, "step": 252080 }, { "epoch": 0.502219335713375, "grad_norm": 0.1923532634973526, "learning_rate": 0.002, "loss": 2.5548, "step": 252090 }, { "epoch": 0.5022392579370138, "grad_norm": 0.18838536739349365, "learning_rate": 0.002, "loss": 2.5687, "step": 252100 }, { "epoch": 0.5022591801606529, "grad_norm": 0.5845884084701538, "learning_rate": 0.002, "loss": 2.5679, "step": 252110 }, { "epoch": 0.5022791023842917, "grad_norm": 0.18632744252681732, "learning_rate": 0.002, "loss": 2.5664, "step": 252120 }, { "epoch": 0.5022990246079306, "grad_norm": 0.20161542296409607, "learning_rate": 0.002, "loss": 2.5525, "step": 252130 }, { "epoch": 0.5023189468315695, "grad_norm": 0.1629199981689453, "learning_rate": 0.002, "loss": 2.5697, "step": 252140 }, { "epoch": 0.5023388690552084, "grad_norm": 0.16375304758548737, "learning_rate": 0.002, "loss": 2.5542, "step": 252150 }, { "epoch": 0.5023587912788474, "grad_norm": 0.15000489354133606, "learning_rate": 0.002, "loss": 2.5697, "step": 252160 }, { "epoch": 0.5023787135024863, "grad_norm": 0.14191031455993652, "learning_rate": 0.002, "loss": 2.5597, "step": 252170 }, { "epoch": 0.5023986357261252, "grad_norm": 0.19277387857437134, "learning_rate": 0.002, "loss": 2.5594, "step": 252180 }, { "epoch": 0.5024185579497641, "grad_norm": 0.18162138760089874, "learning_rate": 0.002, "loss": 2.5528, "step": 252190 }, { "epoch": 0.502438480173403, "grad_norm": 0.16113324463367462, "learning_rate": 0.002, "loss": 2.5659, "step": 252200 }, { "epoch": 0.502458402397042, "grad_norm": 0.16722626984119415, "learning_rate": 0.002, "loss": 2.5643, "step": 252210 }, { "epoch": 0.5024783246206809, "grad_norm": 0.19138646125793457, "learning_rate": 0.002, "loss": 2.5758, "step": 252220 }, { "epoch": 0.5024982468443198, "grad_norm": 0.1700250506401062, "learning_rate": 0.002, "loss": 2.5719, "step": 252230 }, { "epoch": 0.5025181690679587, "grad_norm": 0.1822909265756607, "learning_rate": 0.002, "loss": 2.5655, "step": 252240 }, { "epoch": 0.5025380912915977, "grad_norm": 0.19094431400299072, "learning_rate": 0.002, "loss": 2.5661, "step": 252250 }, { "epoch": 0.5025580135152365, "grad_norm": 0.1759132742881775, "learning_rate": 0.002, "loss": 2.5507, "step": 252260 }, { "epoch": 0.5025779357388754, "grad_norm": 0.14681705832481384, "learning_rate": 0.002, "loss": 2.5576, "step": 252270 }, { "epoch": 0.5025978579625143, "grad_norm": 0.16980844736099243, "learning_rate": 0.002, "loss": 2.5545, "step": 252280 }, { "epoch": 0.5026177801861532, "grad_norm": 0.13760195672512054, "learning_rate": 0.002, "loss": 2.5521, "step": 252290 }, { "epoch": 0.5026377024097922, "grad_norm": 0.1873490810394287, "learning_rate": 0.002, "loss": 2.5478, "step": 252300 }, { "epoch": 0.5026576246334311, "grad_norm": 0.1657550185918808, "learning_rate": 0.002, "loss": 2.5496, "step": 252310 }, { "epoch": 0.50267754685707, "grad_norm": 0.15476688742637634, "learning_rate": 0.002, "loss": 2.5631, "step": 252320 }, { "epoch": 0.5026974690807089, "grad_norm": 0.17518295347690582, "learning_rate": 0.002, "loss": 2.5573, "step": 252330 }, { "epoch": 0.5027173913043478, "grad_norm": 0.15759919583797455, "learning_rate": 0.002, "loss": 2.5621, "step": 252340 }, { "epoch": 0.5027373135279868, "grad_norm": 0.17815163731575012, "learning_rate": 0.002, "loss": 2.5704, "step": 252350 }, { "epoch": 0.5027572357516257, "grad_norm": 0.16319981217384338, "learning_rate": 0.002, "loss": 2.5576, "step": 252360 }, { "epoch": 0.5027771579752646, "grad_norm": 0.17540834844112396, "learning_rate": 0.002, "loss": 2.5601, "step": 252370 }, { "epoch": 0.5027970801989035, "grad_norm": 0.17179229855537415, "learning_rate": 0.002, "loss": 2.541, "step": 252380 }, { "epoch": 0.5028170024225423, "grad_norm": 0.15535032749176025, "learning_rate": 0.002, "loss": 2.5472, "step": 252390 }, { "epoch": 0.5028369246461813, "grad_norm": 0.17804548144340515, "learning_rate": 0.002, "loss": 2.568, "step": 252400 }, { "epoch": 0.5028568468698202, "grad_norm": 0.14614342153072357, "learning_rate": 0.002, "loss": 2.5439, "step": 252410 }, { "epoch": 0.5028767690934591, "grad_norm": 0.16797871887683868, "learning_rate": 0.002, "loss": 2.5568, "step": 252420 }, { "epoch": 0.502896691317098, "grad_norm": 0.15428216755390167, "learning_rate": 0.002, "loss": 2.5592, "step": 252430 }, { "epoch": 0.5029166135407369, "grad_norm": 0.15748661756515503, "learning_rate": 0.002, "loss": 2.5573, "step": 252440 }, { "epoch": 0.5029365357643759, "grad_norm": 0.1839698702096939, "learning_rate": 0.002, "loss": 2.5585, "step": 252450 }, { "epoch": 0.5029564579880148, "grad_norm": 0.15694189071655273, "learning_rate": 0.002, "loss": 2.568, "step": 252460 }, { "epoch": 0.5029763802116537, "grad_norm": 0.1642783135175705, "learning_rate": 0.002, "loss": 2.5607, "step": 252470 }, { "epoch": 0.5029963024352926, "grad_norm": 0.14805766940116882, "learning_rate": 0.002, "loss": 2.5658, "step": 252480 }, { "epoch": 0.5030162246589315, "grad_norm": 0.17885443568229675, "learning_rate": 0.002, "loss": 2.5389, "step": 252490 }, { "epoch": 0.5030361468825705, "grad_norm": 0.18641233444213867, "learning_rate": 0.002, "loss": 2.5406, "step": 252500 }, { "epoch": 0.5030560691062094, "grad_norm": 0.1722763329744339, "learning_rate": 0.002, "loss": 2.5689, "step": 252510 }, { "epoch": 0.5030759913298483, "grad_norm": 0.14657922089099884, "learning_rate": 0.002, "loss": 2.5571, "step": 252520 }, { "epoch": 0.5030959135534872, "grad_norm": 0.15995100140571594, "learning_rate": 0.002, "loss": 2.5667, "step": 252530 }, { "epoch": 0.5031158357771262, "grad_norm": 0.18433833122253418, "learning_rate": 0.002, "loss": 2.5619, "step": 252540 }, { "epoch": 0.503135758000765, "grad_norm": 0.18431556224822998, "learning_rate": 0.002, "loss": 2.5592, "step": 252550 }, { "epoch": 0.5031556802244039, "grad_norm": 0.19413340091705322, "learning_rate": 0.002, "loss": 2.5522, "step": 252560 }, { "epoch": 0.5031756024480428, "grad_norm": 0.22869333624839783, "learning_rate": 0.002, "loss": 2.5588, "step": 252570 }, { "epoch": 0.5031955246716817, "grad_norm": 0.16851797699928284, "learning_rate": 0.002, "loss": 2.5515, "step": 252580 }, { "epoch": 0.5032154468953207, "grad_norm": 0.1817341297864914, "learning_rate": 0.002, "loss": 2.5615, "step": 252590 }, { "epoch": 0.5032353691189596, "grad_norm": 0.14107662439346313, "learning_rate": 0.002, "loss": 2.5643, "step": 252600 }, { "epoch": 0.5032552913425985, "grad_norm": 0.2003985047340393, "learning_rate": 0.002, "loss": 2.5655, "step": 252610 }, { "epoch": 0.5032752135662374, "grad_norm": 0.1535298377275467, "learning_rate": 0.002, "loss": 2.5475, "step": 252620 }, { "epoch": 0.5032951357898763, "grad_norm": 0.16764768958091736, "learning_rate": 0.002, "loss": 2.5427, "step": 252630 }, { "epoch": 0.5033150580135153, "grad_norm": 0.157411128282547, "learning_rate": 0.002, "loss": 2.5639, "step": 252640 }, { "epoch": 0.5033349802371542, "grad_norm": 0.17619042098522186, "learning_rate": 0.002, "loss": 2.5601, "step": 252650 }, { "epoch": 0.5033549024607931, "grad_norm": 0.18559950590133667, "learning_rate": 0.002, "loss": 2.5607, "step": 252660 }, { "epoch": 0.503374824684432, "grad_norm": 0.1550813913345337, "learning_rate": 0.002, "loss": 2.5521, "step": 252670 }, { "epoch": 0.5033947469080708, "grad_norm": 0.16398389637470245, "learning_rate": 0.002, "loss": 2.571, "step": 252680 }, { "epoch": 0.5034146691317098, "grad_norm": 0.18723569810390472, "learning_rate": 0.002, "loss": 2.5702, "step": 252690 }, { "epoch": 0.5034345913553487, "grad_norm": 0.16107675433158875, "learning_rate": 0.002, "loss": 2.5389, "step": 252700 }, { "epoch": 0.5034545135789876, "grad_norm": 0.1899464726448059, "learning_rate": 0.002, "loss": 2.5327, "step": 252710 }, { "epoch": 0.5034744358026265, "grad_norm": 0.13851222395896912, "learning_rate": 0.002, "loss": 2.5596, "step": 252720 }, { "epoch": 0.5034943580262654, "grad_norm": 0.1427386850118637, "learning_rate": 0.002, "loss": 2.5524, "step": 252730 }, { "epoch": 0.5035142802499044, "grad_norm": 0.19719308614730835, "learning_rate": 0.002, "loss": 2.551, "step": 252740 }, { "epoch": 0.5035342024735433, "grad_norm": 0.16209997236728668, "learning_rate": 0.002, "loss": 2.5549, "step": 252750 }, { "epoch": 0.5035541246971822, "grad_norm": 0.20930925011634827, "learning_rate": 0.002, "loss": 2.5685, "step": 252760 }, { "epoch": 0.5035740469208211, "grad_norm": 0.15154506266117096, "learning_rate": 0.002, "loss": 2.5441, "step": 252770 }, { "epoch": 0.50359396914446, "grad_norm": 0.167817622423172, "learning_rate": 0.002, "loss": 2.5529, "step": 252780 }, { "epoch": 0.503613891368099, "grad_norm": 0.14436177909374237, "learning_rate": 0.002, "loss": 2.5675, "step": 252790 }, { "epoch": 0.5036338135917379, "grad_norm": 0.15673863887786865, "learning_rate": 0.002, "loss": 2.568, "step": 252800 }, { "epoch": 0.5036537358153768, "grad_norm": 0.16111518442630768, "learning_rate": 0.002, "loss": 2.548, "step": 252810 }, { "epoch": 0.5036736580390156, "grad_norm": 0.2024657130241394, "learning_rate": 0.002, "loss": 2.566, "step": 252820 }, { "epoch": 0.5036935802626546, "grad_norm": 0.19714990258216858, "learning_rate": 0.002, "loss": 2.5572, "step": 252830 }, { "epoch": 0.5037135024862935, "grad_norm": 0.1676347255706787, "learning_rate": 0.002, "loss": 2.5593, "step": 252840 }, { "epoch": 0.5037334247099324, "grad_norm": 0.14536969363689423, "learning_rate": 0.002, "loss": 2.5641, "step": 252850 }, { "epoch": 0.5037533469335713, "grad_norm": 0.15198709070682526, "learning_rate": 0.002, "loss": 2.5446, "step": 252860 }, { "epoch": 0.5037732691572102, "grad_norm": 0.2120133340358734, "learning_rate": 0.002, "loss": 2.5535, "step": 252870 }, { "epoch": 0.5037931913808492, "grad_norm": 0.14850562810897827, "learning_rate": 0.002, "loss": 2.5497, "step": 252880 }, { "epoch": 0.5038131136044881, "grad_norm": 0.1769375205039978, "learning_rate": 0.002, "loss": 2.5535, "step": 252890 }, { "epoch": 0.503833035828127, "grad_norm": 0.1587071269750595, "learning_rate": 0.002, "loss": 2.5576, "step": 252900 }, { "epoch": 0.5038529580517659, "grad_norm": 0.18541908264160156, "learning_rate": 0.002, "loss": 2.5464, "step": 252910 }, { "epoch": 0.5038728802754048, "grad_norm": 0.1835709512233734, "learning_rate": 0.002, "loss": 2.5625, "step": 252920 }, { "epoch": 0.5038928024990438, "grad_norm": 0.14566129446029663, "learning_rate": 0.002, "loss": 2.5614, "step": 252930 }, { "epoch": 0.5039127247226827, "grad_norm": 0.18153122067451477, "learning_rate": 0.002, "loss": 2.5616, "step": 252940 }, { "epoch": 0.5039326469463216, "grad_norm": 0.1752903163433075, "learning_rate": 0.002, "loss": 2.563, "step": 252950 }, { "epoch": 0.5039525691699605, "grad_norm": 0.16729778051376343, "learning_rate": 0.002, "loss": 2.5697, "step": 252960 }, { "epoch": 0.5039724913935993, "grad_norm": 0.17646999657154083, "learning_rate": 0.002, "loss": 2.5653, "step": 252970 }, { "epoch": 0.5039924136172383, "grad_norm": 0.1818859726190567, "learning_rate": 0.002, "loss": 2.5479, "step": 252980 }, { "epoch": 0.5040123358408772, "grad_norm": 0.17591382563114166, "learning_rate": 0.002, "loss": 2.5579, "step": 252990 }, { "epoch": 0.5040322580645161, "grad_norm": 0.1677672117948532, "learning_rate": 0.002, "loss": 2.5612, "step": 253000 }, { "epoch": 0.504052180288155, "grad_norm": 0.18761023879051208, "learning_rate": 0.002, "loss": 2.5578, "step": 253010 }, { "epoch": 0.5040721025117939, "grad_norm": 0.1529410034418106, "learning_rate": 0.002, "loss": 2.5637, "step": 253020 }, { "epoch": 0.5040920247354329, "grad_norm": 0.1546381115913391, "learning_rate": 0.002, "loss": 2.5667, "step": 253030 }, { "epoch": 0.5041119469590718, "grad_norm": 0.170536071062088, "learning_rate": 0.002, "loss": 2.5538, "step": 253040 }, { "epoch": 0.5041318691827107, "grad_norm": 0.1903921365737915, "learning_rate": 0.002, "loss": 2.5518, "step": 253050 }, { "epoch": 0.5041517914063496, "grad_norm": 0.16265568137168884, "learning_rate": 0.002, "loss": 2.5617, "step": 253060 }, { "epoch": 0.5041717136299885, "grad_norm": 0.17691029608249664, "learning_rate": 0.002, "loss": 2.5677, "step": 253070 }, { "epoch": 0.5041916358536275, "grad_norm": 0.13831810653209686, "learning_rate": 0.002, "loss": 2.5458, "step": 253080 }, { "epoch": 0.5042115580772664, "grad_norm": 0.21582567691802979, "learning_rate": 0.002, "loss": 2.5597, "step": 253090 }, { "epoch": 0.5042314803009053, "grad_norm": 0.14603179693222046, "learning_rate": 0.002, "loss": 2.5644, "step": 253100 }, { "epoch": 0.5042514025245441, "grad_norm": 0.17671996355056763, "learning_rate": 0.002, "loss": 2.5542, "step": 253110 }, { "epoch": 0.5042713247481831, "grad_norm": 0.15011584758758545, "learning_rate": 0.002, "loss": 2.5653, "step": 253120 }, { "epoch": 0.504291246971822, "grad_norm": 0.16459284722805023, "learning_rate": 0.002, "loss": 2.5513, "step": 253130 }, { "epoch": 0.5043111691954609, "grad_norm": 0.1691935956478119, "learning_rate": 0.002, "loss": 2.5526, "step": 253140 }, { "epoch": 0.5043310914190998, "grad_norm": 0.1935904324054718, "learning_rate": 0.002, "loss": 2.5605, "step": 253150 }, { "epoch": 0.5043510136427387, "grad_norm": 0.18783676624298096, "learning_rate": 0.002, "loss": 2.555, "step": 253160 }, { "epoch": 0.5043709358663777, "grad_norm": 0.16631411015987396, "learning_rate": 0.002, "loss": 2.5489, "step": 253170 }, { "epoch": 0.5043908580900166, "grad_norm": 0.20960766077041626, "learning_rate": 0.002, "loss": 2.5526, "step": 253180 }, { "epoch": 0.5044107803136555, "grad_norm": 0.15682071447372437, "learning_rate": 0.002, "loss": 2.5603, "step": 253190 }, { "epoch": 0.5044307025372944, "grad_norm": 0.19818612933158875, "learning_rate": 0.002, "loss": 2.5478, "step": 253200 }, { "epoch": 0.5044506247609333, "grad_norm": 0.15666593611240387, "learning_rate": 0.002, "loss": 2.5475, "step": 253210 }, { "epoch": 0.5044705469845723, "grad_norm": 0.16950957477092743, "learning_rate": 0.002, "loss": 2.5552, "step": 253220 }, { "epoch": 0.5044904692082112, "grad_norm": 0.16892695426940918, "learning_rate": 0.002, "loss": 2.5501, "step": 253230 }, { "epoch": 0.5045103914318501, "grad_norm": 0.19529835879802704, "learning_rate": 0.002, "loss": 2.5571, "step": 253240 }, { "epoch": 0.504530313655489, "grad_norm": 0.17290937900543213, "learning_rate": 0.002, "loss": 2.5569, "step": 253250 }, { "epoch": 0.5045502358791278, "grad_norm": 0.16780255734920502, "learning_rate": 0.002, "loss": 2.5591, "step": 253260 }, { "epoch": 0.5045701581027668, "grad_norm": 0.15613645315170288, "learning_rate": 0.002, "loss": 2.5602, "step": 253270 }, { "epoch": 0.5045900803264057, "grad_norm": 0.1862722933292389, "learning_rate": 0.002, "loss": 2.5671, "step": 253280 }, { "epoch": 0.5046100025500446, "grad_norm": 0.1627615988254547, "learning_rate": 0.002, "loss": 2.5594, "step": 253290 }, { "epoch": 0.5046299247736835, "grad_norm": 0.1580864042043686, "learning_rate": 0.002, "loss": 2.5496, "step": 253300 }, { "epoch": 0.5046498469973224, "grad_norm": 0.1664864867925644, "learning_rate": 0.002, "loss": 2.5509, "step": 253310 }, { "epoch": 0.5046697692209614, "grad_norm": 0.15301184356212616, "learning_rate": 0.002, "loss": 2.5638, "step": 253320 }, { "epoch": 0.5046896914446003, "grad_norm": 0.17255553603172302, "learning_rate": 0.002, "loss": 2.5611, "step": 253330 }, { "epoch": 0.5047096136682392, "grad_norm": 0.18926209211349487, "learning_rate": 0.002, "loss": 2.5581, "step": 253340 }, { "epoch": 0.5047295358918781, "grad_norm": 0.1916869878768921, "learning_rate": 0.002, "loss": 2.5523, "step": 253350 }, { "epoch": 0.504749458115517, "grad_norm": 0.16052161157131195, "learning_rate": 0.002, "loss": 2.5556, "step": 253360 }, { "epoch": 0.504769380339156, "grad_norm": 0.1827705353498459, "learning_rate": 0.002, "loss": 2.5436, "step": 253370 }, { "epoch": 0.5047893025627949, "grad_norm": 0.15973782539367676, "learning_rate": 0.002, "loss": 2.5605, "step": 253380 }, { "epoch": 0.5048092247864338, "grad_norm": 0.1612125188112259, "learning_rate": 0.002, "loss": 2.5489, "step": 253390 }, { "epoch": 0.5048291470100726, "grad_norm": 0.14682312309741974, "learning_rate": 0.002, "loss": 2.555, "step": 253400 }, { "epoch": 0.5048490692337116, "grad_norm": 0.19537296891212463, "learning_rate": 0.002, "loss": 2.5545, "step": 253410 }, { "epoch": 0.5048689914573505, "grad_norm": 0.14242519438266754, "learning_rate": 0.002, "loss": 2.5596, "step": 253420 }, { "epoch": 0.5048889136809894, "grad_norm": 0.17642837762832642, "learning_rate": 0.002, "loss": 2.5789, "step": 253430 }, { "epoch": 0.5049088359046283, "grad_norm": 0.14201124012470245, "learning_rate": 0.002, "loss": 2.5365, "step": 253440 }, { "epoch": 0.5049287581282672, "grad_norm": 0.20252206921577454, "learning_rate": 0.002, "loss": 2.5632, "step": 253450 }, { "epoch": 0.5049486803519062, "grad_norm": 0.16570866107940674, "learning_rate": 0.002, "loss": 2.5703, "step": 253460 }, { "epoch": 0.5049686025755451, "grad_norm": 0.1719581037759781, "learning_rate": 0.002, "loss": 2.5542, "step": 253470 }, { "epoch": 0.504988524799184, "grad_norm": 0.16683439910411835, "learning_rate": 0.002, "loss": 2.5648, "step": 253480 }, { "epoch": 0.5050084470228229, "grad_norm": 0.16324536502361298, "learning_rate": 0.002, "loss": 2.5674, "step": 253490 }, { "epoch": 0.5050283692464618, "grad_norm": 0.14673560857772827, "learning_rate": 0.002, "loss": 2.5421, "step": 253500 }, { "epoch": 0.5050482914701008, "grad_norm": 0.18016743659973145, "learning_rate": 0.002, "loss": 2.5596, "step": 253510 }, { "epoch": 0.5050682136937397, "grad_norm": 0.18971922993659973, "learning_rate": 0.002, "loss": 2.5612, "step": 253520 }, { "epoch": 0.5050881359173786, "grad_norm": 0.166805237531662, "learning_rate": 0.002, "loss": 2.553, "step": 253530 }, { "epoch": 0.5051080581410174, "grad_norm": 0.14296090602874756, "learning_rate": 0.002, "loss": 2.5601, "step": 253540 }, { "epoch": 0.5051279803646563, "grad_norm": 0.18058453500270844, "learning_rate": 0.002, "loss": 2.5485, "step": 253550 }, { "epoch": 0.5051479025882953, "grad_norm": 0.20697738230228424, "learning_rate": 0.002, "loss": 2.5494, "step": 253560 }, { "epoch": 0.5051678248119342, "grad_norm": 0.15517473220825195, "learning_rate": 0.002, "loss": 2.5565, "step": 253570 }, { "epoch": 0.5051877470355731, "grad_norm": 0.1597076803445816, "learning_rate": 0.002, "loss": 2.5542, "step": 253580 }, { "epoch": 0.505207669259212, "grad_norm": 0.16152480244636536, "learning_rate": 0.002, "loss": 2.5532, "step": 253590 }, { "epoch": 0.5052275914828509, "grad_norm": 0.15601833164691925, "learning_rate": 0.002, "loss": 2.5695, "step": 253600 }, { "epoch": 0.5052475137064899, "grad_norm": 0.17189499735832214, "learning_rate": 0.002, "loss": 2.5722, "step": 253610 }, { "epoch": 0.5052674359301288, "grad_norm": 0.2073138803243637, "learning_rate": 0.002, "loss": 2.5697, "step": 253620 }, { "epoch": 0.5052873581537677, "grad_norm": 0.15652480721473694, "learning_rate": 0.002, "loss": 2.5495, "step": 253630 }, { "epoch": 0.5053072803774066, "grad_norm": 0.17257440090179443, "learning_rate": 0.002, "loss": 2.5527, "step": 253640 }, { "epoch": 0.5053272026010455, "grad_norm": 0.17746813595294952, "learning_rate": 0.002, "loss": 2.5463, "step": 253650 }, { "epoch": 0.5053471248246845, "grad_norm": 0.15832684934139252, "learning_rate": 0.002, "loss": 2.5575, "step": 253660 }, { "epoch": 0.5053670470483234, "grad_norm": 0.1824006289243698, "learning_rate": 0.002, "loss": 2.5629, "step": 253670 }, { "epoch": 0.5053869692719622, "grad_norm": 0.2021971344947815, "learning_rate": 0.002, "loss": 2.5564, "step": 253680 }, { "epoch": 0.5054068914956011, "grad_norm": 0.16523391008377075, "learning_rate": 0.002, "loss": 2.5531, "step": 253690 }, { "epoch": 0.50542681371924, "grad_norm": 0.6282917261123657, "learning_rate": 0.002, "loss": 2.5494, "step": 253700 }, { "epoch": 0.505446735942879, "grad_norm": 0.15800034999847412, "learning_rate": 0.002, "loss": 2.5462, "step": 253710 }, { "epoch": 0.5054666581665179, "grad_norm": 0.18188342452049255, "learning_rate": 0.002, "loss": 2.5526, "step": 253720 }, { "epoch": 0.5054865803901568, "grad_norm": 0.17785680294036865, "learning_rate": 0.002, "loss": 2.549, "step": 253730 }, { "epoch": 0.5055065026137957, "grad_norm": 0.17242619395256042, "learning_rate": 0.002, "loss": 2.5562, "step": 253740 }, { "epoch": 0.5055264248374347, "grad_norm": 0.16455629467964172, "learning_rate": 0.002, "loss": 2.5636, "step": 253750 }, { "epoch": 0.5055463470610736, "grad_norm": 0.17896762490272522, "learning_rate": 0.002, "loss": 2.5552, "step": 253760 }, { "epoch": 0.5055662692847125, "grad_norm": 0.14408817887306213, "learning_rate": 0.002, "loss": 2.5688, "step": 253770 }, { "epoch": 0.5055861915083514, "grad_norm": 0.32000425457954407, "learning_rate": 0.002, "loss": 2.5639, "step": 253780 }, { "epoch": 0.5056061137319903, "grad_norm": 0.1604538857936859, "learning_rate": 0.002, "loss": 2.5492, "step": 253790 }, { "epoch": 0.5056260359556293, "grad_norm": 0.13456076383590698, "learning_rate": 0.002, "loss": 2.5604, "step": 253800 }, { "epoch": 0.5056459581792682, "grad_norm": 0.19319267570972443, "learning_rate": 0.002, "loss": 2.5542, "step": 253810 }, { "epoch": 0.505665880402907, "grad_norm": 0.17424023151397705, "learning_rate": 0.002, "loss": 2.5538, "step": 253820 }, { "epoch": 0.5056858026265459, "grad_norm": 0.1641095131635666, "learning_rate": 0.002, "loss": 2.5718, "step": 253830 }, { "epoch": 0.5057057248501848, "grad_norm": 0.1488809585571289, "learning_rate": 0.002, "loss": 2.5319, "step": 253840 }, { "epoch": 0.5057256470738238, "grad_norm": 0.13701431453227997, "learning_rate": 0.002, "loss": 2.5418, "step": 253850 }, { "epoch": 0.5057455692974627, "grad_norm": 0.21361702680587769, "learning_rate": 0.002, "loss": 2.5473, "step": 253860 }, { "epoch": 0.5057654915211016, "grad_norm": 0.1874084621667862, "learning_rate": 0.002, "loss": 2.5436, "step": 253870 }, { "epoch": 0.5057854137447405, "grad_norm": 0.17576120793819427, "learning_rate": 0.002, "loss": 2.5454, "step": 253880 }, { "epoch": 0.5058053359683794, "grad_norm": 0.20784242451190948, "learning_rate": 0.002, "loss": 2.5674, "step": 253890 }, { "epoch": 0.5058252581920184, "grad_norm": 0.20511634647846222, "learning_rate": 0.002, "loss": 2.557, "step": 253900 }, { "epoch": 0.5058451804156573, "grad_norm": 0.20603007078170776, "learning_rate": 0.002, "loss": 2.5764, "step": 253910 }, { "epoch": 0.5058651026392962, "grad_norm": 0.15443553030490875, "learning_rate": 0.002, "loss": 2.5497, "step": 253920 }, { "epoch": 0.5058850248629351, "grad_norm": 0.15485133230686188, "learning_rate": 0.002, "loss": 2.5392, "step": 253930 }, { "epoch": 0.505904947086574, "grad_norm": 0.15833404660224915, "learning_rate": 0.002, "loss": 2.563, "step": 253940 }, { "epoch": 0.505924869310213, "grad_norm": 0.19449113309383392, "learning_rate": 0.002, "loss": 2.565, "step": 253950 }, { "epoch": 0.5059447915338519, "grad_norm": 0.1793171614408493, "learning_rate": 0.002, "loss": 2.5549, "step": 253960 }, { "epoch": 0.5059647137574907, "grad_norm": 0.16233545541763306, "learning_rate": 0.002, "loss": 2.5497, "step": 253970 }, { "epoch": 0.5059846359811296, "grad_norm": 0.19653145968914032, "learning_rate": 0.002, "loss": 2.5592, "step": 253980 }, { "epoch": 0.5060045582047685, "grad_norm": 0.1643124669790268, "learning_rate": 0.002, "loss": 2.5612, "step": 253990 }, { "epoch": 0.5060244804284075, "grad_norm": 0.15388774871826172, "learning_rate": 0.002, "loss": 2.5582, "step": 254000 }, { "epoch": 0.5060444026520464, "grad_norm": 0.2011011391878128, "learning_rate": 0.002, "loss": 2.5646, "step": 254010 }, { "epoch": 0.5060643248756853, "grad_norm": 0.2578554153442383, "learning_rate": 0.002, "loss": 2.5534, "step": 254020 }, { "epoch": 0.5060842470993242, "grad_norm": 0.14950710535049438, "learning_rate": 0.002, "loss": 2.5471, "step": 254030 }, { "epoch": 0.5061041693229632, "grad_norm": 0.14397789537906647, "learning_rate": 0.002, "loss": 2.5607, "step": 254040 }, { "epoch": 0.5061240915466021, "grad_norm": 0.1731257289648056, "learning_rate": 0.002, "loss": 2.5435, "step": 254050 }, { "epoch": 0.506144013770241, "grad_norm": 0.15896208584308624, "learning_rate": 0.002, "loss": 2.5517, "step": 254060 }, { "epoch": 0.5061639359938799, "grad_norm": 0.1843063086271286, "learning_rate": 0.002, "loss": 2.5672, "step": 254070 }, { "epoch": 0.5061838582175188, "grad_norm": 0.16898147761821747, "learning_rate": 0.002, "loss": 2.54, "step": 254080 }, { "epoch": 0.5062037804411578, "grad_norm": 0.1466064453125, "learning_rate": 0.002, "loss": 2.5512, "step": 254090 }, { "epoch": 0.5062237026647967, "grad_norm": 0.16128939390182495, "learning_rate": 0.002, "loss": 2.5423, "step": 254100 }, { "epoch": 0.5062436248884356, "grad_norm": 0.15797258913516998, "learning_rate": 0.002, "loss": 2.5545, "step": 254110 }, { "epoch": 0.5062635471120744, "grad_norm": 0.18725085258483887, "learning_rate": 0.002, "loss": 2.5483, "step": 254120 }, { "epoch": 0.5062834693357133, "grad_norm": 0.159295916557312, "learning_rate": 0.002, "loss": 2.5549, "step": 254130 }, { "epoch": 0.5063033915593523, "grad_norm": 0.15472017228603363, "learning_rate": 0.002, "loss": 2.5563, "step": 254140 }, { "epoch": 0.5063233137829912, "grad_norm": 0.1871255487203598, "learning_rate": 0.002, "loss": 2.5652, "step": 254150 }, { "epoch": 0.5063432360066301, "grad_norm": 0.14450787007808685, "learning_rate": 0.002, "loss": 2.55, "step": 254160 }, { "epoch": 0.506363158230269, "grad_norm": 0.17997343838214874, "learning_rate": 0.002, "loss": 2.5607, "step": 254170 }, { "epoch": 0.5063830804539079, "grad_norm": 0.14268234372138977, "learning_rate": 0.002, "loss": 2.5509, "step": 254180 }, { "epoch": 0.5064030026775469, "grad_norm": 0.1613040417432785, "learning_rate": 0.002, "loss": 2.5443, "step": 254190 }, { "epoch": 0.5064229249011858, "grad_norm": 0.17984284460544586, "learning_rate": 0.002, "loss": 2.5553, "step": 254200 }, { "epoch": 0.5064428471248247, "grad_norm": 0.1808459460735321, "learning_rate": 0.002, "loss": 2.5512, "step": 254210 }, { "epoch": 0.5064627693484636, "grad_norm": 0.15864373743534088, "learning_rate": 0.002, "loss": 2.5581, "step": 254220 }, { "epoch": 0.5064826915721025, "grad_norm": 0.1628996878862381, "learning_rate": 0.002, "loss": 2.548, "step": 254230 }, { "epoch": 0.5065026137957415, "grad_norm": 0.16366562247276306, "learning_rate": 0.002, "loss": 2.5672, "step": 254240 }, { "epoch": 0.5065225360193804, "grad_norm": 0.16324728727340698, "learning_rate": 0.002, "loss": 2.5546, "step": 254250 }, { "epoch": 0.5065424582430192, "grad_norm": 0.183691143989563, "learning_rate": 0.002, "loss": 2.5651, "step": 254260 }, { "epoch": 0.5065623804666581, "grad_norm": 0.3061813712120056, "learning_rate": 0.002, "loss": 2.5514, "step": 254270 }, { "epoch": 0.506582302690297, "grad_norm": 0.1847577691078186, "learning_rate": 0.002, "loss": 2.5599, "step": 254280 }, { "epoch": 0.506602224913936, "grad_norm": 0.1580638736486435, "learning_rate": 0.002, "loss": 2.5473, "step": 254290 }, { "epoch": 0.5066221471375749, "grad_norm": 0.17973944544792175, "learning_rate": 0.002, "loss": 2.551, "step": 254300 }, { "epoch": 0.5066420693612138, "grad_norm": 0.15086406469345093, "learning_rate": 0.002, "loss": 2.5558, "step": 254310 }, { "epoch": 0.5066619915848527, "grad_norm": 0.2218441516160965, "learning_rate": 0.002, "loss": 2.5582, "step": 254320 }, { "epoch": 0.5066819138084917, "grad_norm": 0.1498488485813141, "learning_rate": 0.002, "loss": 2.5627, "step": 254330 }, { "epoch": 0.5067018360321306, "grad_norm": 0.16877740621566772, "learning_rate": 0.002, "loss": 2.5504, "step": 254340 }, { "epoch": 0.5067217582557695, "grad_norm": 0.17228318750858307, "learning_rate": 0.002, "loss": 2.5448, "step": 254350 }, { "epoch": 0.5067416804794084, "grad_norm": 0.18260973691940308, "learning_rate": 0.002, "loss": 2.5735, "step": 254360 }, { "epoch": 0.5067616027030473, "grad_norm": 0.15726566314697266, "learning_rate": 0.002, "loss": 2.539, "step": 254370 }, { "epoch": 0.5067815249266863, "grad_norm": 0.17455056309700012, "learning_rate": 0.002, "loss": 2.5604, "step": 254380 }, { "epoch": 0.5068014471503252, "grad_norm": 0.16869302093982697, "learning_rate": 0.002, "loss": 2.565, "step": 254390 }, { "epoch": 0.506821369373964, "grad_norm": 0.1738743931055069, "learning_rate": 0.002, "loss": 2.5609, "step": 254400 }, { "epoch": 0.5068412915976029, "grad_norm": 0.15480877459049225, "learning_rate": 0.002, "loss": 2.5549, "step": 254410 }, { "epoch": 0.5068612138212418, "grad_norm": 0.1559358537197113, "learning_rate": 0.002, "loss": 2.5525, "step": 254420 }, { "epoch": 0.5068811360448808, "grad_norm": 0.17092645168304443, "learning_rate": 0.002, "loss": 2.5434, "step": 254430 }, { "epoch": 0.5069010582685197, "grad_norm": 0.1413806676864624, "learning_rate": 0.002, "loss": 2.5661, "step": 254440 }, { "epoch": 0.5069209804921586, "grad_norm": 0.17548587918281555, "learning_rate": 0.002, "loss": 2.5594, "step": 254450 }, { "epoch": 0.5069409027157975, "grad_norm": 0.15747667849063873, "learning_rate": 0.002, "loss": 2.5688, "step": 254460 }, { "epoch": 0.5069608249394364, "grad_norm": 0.16696421802043915, "learning_rate": 0.002, "loss": 2.5664, "step": 254470 }, { "epoch": 0.5069807471630754, "grad_norm": 0.18726420402526855, "learning_rate": 0.002, "loss": 2.5741, "step": 254480 }, { "epoch": 0.5070006693867143, "grad_norm": 0.16954346001148224, "learning_rate": 0.002, "loss": 2.5508, "step": 254490 }, { "epoch": 0.5070205916103532, "grad_norm": 0.1727101355791092, "learning_rate": 0.002, "loss": 2.5561, "step": 254500 }, { "epoch": 0.5070405138339921, "grad_norm": 0.17178687453269958, "learning_rate": 0.002, "loss": 2.5648, "step": 254510 }, { "epoch": 0.507060436057631, "grad_norm": 0.15241456031799316, "learning_rate": 0.002, "loss": 2.5571, "step": 254520 }, { "epoch": 0.50708035828127, "grad_norm": 0.15442369878292084, "learning_rate": 0.002, "loss": 2.5589, "step": 254530 }, { "epoch": 0.5071002805049089, "grad_norm": 0.16029518842697144, "learning_rate": 0.002, "loss": 2.5631, "step": 254540 }, { "epoch": 0.5071202027285477, "grad_norm": 0.15785397589206696, "learning_rate": 0.002, "loss": 2.5557, "step": 254550 }, { "epoch": 0.5071401249521866, "grad_norm": 0.17296242713928223, "learning_rate": 0.002, "loss": 2.5697, "step": 254560 }, { "epoch": 0.5071600471758255, "grad_norm": 0.17075671255588531, "learning_rate": 0.002, "loss": 2.5531, "step": 254570 }, { "epoch": 0.5071799693994645, "grad_norm": 0.15709970891475677, "learning_rate": 0.002, "loss": 2.5565, "step": 254580 }, { "epoch": 0.5071998916231034, "grad_norm": 0.13765469193458557, "learning_rate": 0.002, "loss": 2.5648, "step": 254590 }, { "epoch": 0.5072198138467423, "grad_norm": 0.21014991402626038, "learning_rate": 0.002, "loss": 2.5651, "step": 254600 }, { "epoch": 0.5072397360703812, "grad_norm": 0.15023557841777802, "learning_rate": 0.002, "loss": 2.5766, "step": 254610 }, { "epoch": 0.5072596582940202, "grad_norm": 0.16639362275600433, "learning_rate": 0.002, "loss": 2.5697, "step": 254620 }, { "epoch": 0.5072795805176591, "grad_norm": 0.19137413799762726, "learning_rate": 0.002, "loss": 2.5636, "step": 254630 }, { "epoch": 0.507299502741298, "grad_norm": 0.16509823501110077, "learning_rate": 0.002, "loss": 2.5555, "step": 254640 }, { "epoch": 0.5073194249649369, "grad_norm": 0.18273235857486725, "learning_rate": 0.002, "loss": 2.5622, "step": 254650 }, { "epoch": 0.5073393471885758, "grad_norm": 0.1569249927997589, "learning_rate": 0.002, "loss": 2.5631, "step": 254660 }, { "epoch": 0.5073592694122148, "grad_norm": 0.14997552335262299, "learning_rate": 0.002, "loss": 2.5631, "step": 254670 }, { "epoch": 0.5073791916358537, "grad_norm": 0.1871156394481659, "learning_rate": 0.002, "loss": 2.554, "step": 254680 }, { "epoch": 0.5073991138594925, "grad_norm": 0.1958077847957611, "learning_rate": 0.002, "loss": 2.5586, "step": 254690 }, { "epoch": 0.5074190360831314, "grad_norm": 0.17660650610923767, "learning_rate": 0.002, "loss": 2.567, "step": 254700 }, { "epoch": 0.5074389583067703, "grad_norm": 0.18295125663280487, "learning_rate": 0.002, "loss": 2.5539, "step": 254710 }, { "epoch": 0.5074588805304093, "grad_norm": 0.1804618388414383, "learning_rate": 0.002, "loss": 2.557, "step": 254720 }, { "epoch": 0.5074788027540482, "grad_norm": 0.3000031113624573, "learning_rate": 0.002, "loss": 2.5615, "step": 254730 }, { "epoch": 0.5074987249776871, "grad_norm": 0.15642638504505157, "learning_rate": 0.002, "loss": 2.5686, "step": 254740 }, { "epoch": 0.507518647201326, "grad_norm": 0.14637541770935059, "learning_rate": 0.002, "loss": 2.5506, "step": 254750 }, { "epoch": 0.5075385694249649, "grad_norm": 0.15602736175060272, "learning_rate": 0.002, "loss": 2.559, "step": 254760 }, { "epoch": 0.5075584916486039, "grad_norm": 0.16570769250392914, "learning_rate": 0.002, "loss": 2.5569, "step": 254770 }, { "epoch": 0.5075784138722428, "grad_norm": 0.18131595849990845, "learning_rate": 0.002, "loss": 2.5742, "step": 254780 }, { "epoch": 0.5075983360958817, "grad_norm": 0.17826253175735474, "learning_rate": 0.002, "loss": 2.5564, "step": 254790 }, { "epoch": 0.5076182583195206, "grad_norm": 0.15325674414634705, "learning_rate": 0.002, "loss": 2.5662, "step": 254800 }, { "epoch": 0.5076381805431595, "grad_norm": 0.16837023198604584, "learning_rate": 0.002, "loss": 2.5438, "step": 254810 }, { "epoch": 0.5076581027667985, "grad_norm": 0.16217082738876343, "learning_rate": 0.002, "loss": 2.5667, "step": 254820 }, { "epoch": 0.5076780249904373, "grad_norm": 0.18045830726623535, "learning_rate": 0.002, "loss": 2.562, "step": 254830 }, { "epoch": 0.5076979472140762, "grad_norm": 0.17191998660564423, "learning_rate": 0.002, "loss": 2.567, "step": 254840 }, { "epoch": 0.5077178694377151, "grad_norm": 0.16529321670532227, "learning_rate": 0.002, "loss": 2.5606, "step": 254850 }, { "epoch": 0.507737791661354, "grad_norm": 0.18113891780376434, "learning_rate": 0.002, "loss": 2.5467, "step": 254860 }, { "epoch": 0.507757713884993, "grad_norm": 0.16680166125297546, "learning_rate": 0.002, "loss": 2.5731, "step": 254870 }, { "epoch": 0.5077776361086319, "grad_norm": 0.17342321574687958, "learning_rate": 0.002, "loss": 2.5579, "step": 254880 }, { "epoch": 0.5077975583322708, "grad_norm": 0.1934664100408554, "learning_rate": 0.002, "loss": 2.5756, "step": 254890 }, { "epoch": 0.5078174805559097, "grad_norm": 0.16820889711380005, "learning_rate": 0.002, "loss": 2.5552, "step": 254900 }, { "epoch": 0.5078374027795487, "grad_norm": 0.172520250082016, "learning_rate": 0.002, "loss": 2.5485, "step": 254910 }, { "epoch": 0.5078573250031876, "grad_norm": 0.1399538815021515, "learning_rate": 0.002, "loss": 2.5568, "step": 254920 }, { "epoch": 0.5078772472268265, "grad_norm": 0.1638191044330597, "learning_rate": 0.002, "loss": 2.5642, "step": 254930 }, { "epoch": 0.5078971694504654, "grad_norm": 0.17878147959709167, "learning_rate": 0.002, "loss": 2.5436, "step": 254940 }, { "epoch": 0.5079170916741043, "grad_norm": 0.1751423329114914, "learning_rate": 0.002, "loss": 2.5818, "step": 254950 }, { "epoch": 0.5079370138977433, "grad_norm": 0.19140344858169556, "learning_rate": 0.002, "loss": 2.5648, "step": 254960 }, { "epoch": 0.5079569361213822, "grad_norm": 0.15586090087890625, "learning_rate": 0.002, "loss": 2.5511, "step": 254970 }, { "epoch": 0.507976858345021, "grad_norm": 0.18658201396465302, "learning_rate": 0.002, "loss": 2.5611, "step": 254980 }, { "epoch": 0.5079967805686599, "grad_norm": 0.17719730734825134, "learning_rate": 0.002, "loss": 2.5601, "step": 254990 }, { "epoch": 0.5080167027922988, "grad_norm": 0.1596856713294983, "learning_rate": 0.002, "loss": 2.5546, "step": 255000 }, { "epoch": 0.5080366250159378, "grad_norm": 0.2522900104522705, "learning_rate": 0.002, "loss": 2.55, "step": 255010 }, { "epoch": 0.5080565472395767, "grad_norm": 0.16538745164871216, "learning_rate": 0.002, "loss": 2.5649, "step": 255020 }, { "epoch": 0.5080764694632156, "grad_norm": 0.14083269238471985, "learning_rate": 0.002, "loss": 2.5522, "step": 255030 }, { "epoch": 0.5080963916868545, "grad_norm": 0.14514432847499847, "learning_rate": 0.002, "loss": 2.5475, "step": 255040 }, { "epoch": 0.5081163139104934, "grad_norm": 0.1844283640384674, "learning_rate": 0.002, "loss": 2.5613, "step": 255050 }, { "epoch": 0.5081362361341324, "grad_norm": 0.15299680829048157, "learning_rate": 0.002, "loss": 2.5497, "step": 255060 }, { "epoch": 0.5081561583577713, "grad_norm": 0.15706290304660797, "learning_rate": 0.002, "loss": 2.553, "step": 255070 }, { "epoch": 0.5081760805814102, "grad_norm": 0.1641596406698227, "learning_rate": 0.002, "loss": 2.545, "step": 255080 }, { "epoch": 0.5081960028050491, "grad_norm": 0.1549047827720642, "learning_rate": 0.002, "loss": 2.55, "step": 255090 }, { "epoch": 0.508215925028688, "grad_norm": 0.17481140792369843, "learning_rate": 0.002, "loss": 2.5612, "step": 255100 }, { "epoch": 0.508235847252327, "grad_norm": 0.18943467736244202, "learning_rate": 0.002, "loss": 2.569, "step": 255110 }, { "epoch": 0.5082557694759658, "grad_norm": 0.16307289898395538, "learning_rate": 0.002, "loss": 2.5494, "step": 255120 }, { "epoch": 0.5082756916996047, "grad_norm": 0.17327627539634705, "learning_rate": 0.002, "loss": 2.5557, "step": 255130 }, { "epoch": 0.5082956139232436, "grad_norm": 0.19215041399002075, "learning_rate": 0.002, "loss": 2.5576, "step": 255140 }, { "epoch": 0.5083155361468825, "grad_norm": 0.19981712102890015, "learning_rate": 0.002, "loss": 2.5604, "step": 255150 }, { "epoch": 0.5083354583705215, "grad_norm": 0.15053634345531464, "learning_rate": 0.002, "loss": 2.5435, "step": 255160 }, { "epoch": 0.5083553805941604, "grad_norm": 0.17127452790737152, "learning_rate": 0.002, "loss": 2.5597, "step": 255170 }, { "epoch": 0.5083753028177993, "grad_norm": 0.15850244462490082, "learning_rate": 0.002, "loss": 2.5672, "step": 255180 }, { "epoch": 0.5083952250414382, "grad_norm": 0.178798109292984, "learning_rate": 0.002, "loss": 2.5647, "step": 255190 }, { "epoch": 0.5084151472650772, "grad_norm": 0.18291036784648895, "learning_rate": 0.002, "loss": 2.5403, "step": 255200 }, { "epoch": 0.5084350694887161, "grad_norm": 0.18146295845508575, "learning_rate": 0.002, "loss": 2.5471, "step": 255210 }, { "epoch": 0.508454991712355, "grad_norm": 0.15382327139377594, "learning_rate": 0.002, "loss": 2.5547, "step": 255220 }, { "epoch": 0.5084749139359939, "grad_norm": 0.15636061131954193, "learning_rate": 0.002, "loss": 2.5617, "step": 255230 }, { "epoch": 0.5084948361596328, "grad_norm": 0.1728820651769638, "learning_rate": 0.002, "loss": 2.5768, "step": 255240 }, { "epoch": 0.5085147583832718, "grad_norm": 0.1538231372833252, "learning_rate": 0.002, "loss": 2.5613, "step": 255250 }, { "epoch": 0.5085346806069106, "grad_norm": 0.1942165195941925, "learning_rate": 0.002, "loss": 2.5576, "step": 255260 }, { "epoch": 0.5085546028305495, "grad_norm": 0.16738614439964294, "learning_rate": 0.002, "loss": 2.5753, "step": 255270 }, { "epoch": 0.5085745250541884, "grad_norm": 0.16639059782028198, "learning_rate": 0.002, "loss": 2.5461, "step": 255280 }, { "epoch": 0.5085944472778273, "grad_norm": 0.1745712161064148, "learning_rate": 0.002, "loss": 2.5487, "step": 255290 }, { "epoch": 0.5086143695014663, "grad_norm": 0.1444627344608307, "learning_rate": 0.002, "loss": 2.5631, "step": 255300 }, { "epoch": 0.5086342917251052, "grad_norm": 0.1611265391111374, "learning_rate": 0.002, "loss": 2.5607, "step": 255310 }, { "epoch": 0.5086542139487441, "grad_norm": 0.1698075383901596, "learning_rate": 0.002, "loss": 2.5591, "step": 255320 }, { "epoch": 0.508674136172383, "grad_norm": 0.12842653691768646, "learning_rate": 0.002, "loss": 2.5625, "step": 255330 }, { "epoch": 0.5086940583960219, "grad_norm": 0.14831626415252686, "learning_rate": 0.002, "loss": 2.5506, "step": 255340 }, { "epoch": 0.5087139806196609, "grad_norm": 0.17094610631465912, "learning_rate": 0.002, "loss": 2.5665, "step": 255350 }, { "epoch": 0.5087339028432998, "grad_norm": 0.22176086902618408, "learning_rate": 0.002, "loss": 2.5548, "step": 255360 }, { "epoch": 0.5087538250669387, "grad_norm": 0.1762888878583908, "learning_rate": 0.002, "loss": 2.5706, "step": 255370 }, { "epoch": 0.5087737472905776, "grad_norm": 0.19194191694259644, "learning_rate": 0.002, "loss": 2.5676, "step": 255380 }, { "epoch": 0.5087936695142165, "grad_norm": 0.16024141013622284, "learning_rate": 0.002, "loss": 2.5633, "step": 255390 }, { "epoch": 0.5088135917378555, "grad_norm": 0.15533232688903809, "learning_rate": 0.002, "loss": 2.5693, "step": 255400 }, { "epoch": 0.5088335139614943, "grad_norm": 0.2243977040052414, "learning_rate": 0.002, "loss": 2.5465, "step": 255410 }, { "epoch": 0.5088534361851332, "grad_norm": 0.18508560955524445, "learning_rate": 0.002, "loss": 2.5476, "step": 255420 }, { "epoch": 0.5088733584087721, "grad_norm": 0.1590159833431244, "learning_rate": 0.002, "loss": 2.5521, "step": 255430 }, { "epoch": 0.508893280632411, "grad_norm": 0.1521196961402893, "learning_rate": 0.002, "loss": 2.5667, "step": 255440 }, { "epoch": 0.50891320285605, "grad_norm": 0.1503319889307022, "learning_rate": 0.002, "loss": 2.5429, "step": 255450 }, { "epoch": 0.5089331250796889, "grad_norm": 0.15792876482009888, "learning_rate": 0.002, "loss": 2.5816, "step": 255460 }, { "epoch": 0.5089530473033278, "grad_norm": 0.22089144587516785, "learning_rate": 0.002, "loss": 2.5631, "step": 255470 }, { "epoch": 0.5089729695269667, "grad_norm": 0.18831588327884674, "learning_rate": 0.002, "loss": 2.5608, "step": 255480 }, { "epoch": 0.5089928917506056, "grad_norm": 0.17774057388305664, "learning_rate": 0.002, "loss": 2.5584, "step": 255490 }, { "epoch": 0.5090128139742446, "grad_norm": 0.16513697803020477, "learning_rate": 0.002, "loss": 2.5613, "step": 255500 }, { "epoch": 0.5090327361978835, "grad_norm": 0.1908426135778427, "learning_rate": 0.002, "loss": 2.57, "step": 255510 }, { "epoch": 0.5090526584215224, "grad_norm": 0.169847309589386, "learning_rate": 0.002, "loss": 2.5485, "step": 255520 }, { "epoch": 0.5090725806451613, "grad_norm": 0.1677486002445221, "learning_rate": 0.002, "loss": 2.5499, "step": 255530 }, { "epoch": 0.5090925028688003, "grad_norm": 0.1896335482597351, "learning_rate": 0.002, "loss": 2.5574, "step": 255540 }, { "epoch": 0.5091124250924391, "grad_norm": 0.1819293349981308, "learning_rate": 0.002, "loss": 2.5621, "step": 255550 }, { "epoch": 0.509132347316078, "grad_norm": 0.15465191006660461, "learning_rate": 0.002, "loss": 2.5544, "step": 255560 }, { "epoch": 0.5091522695397169, "grad_norm": 0.16187573969364166, "learning_rate": 0.002, "loss": 2.5523, "step": 255570 }, { "epoch": 0.5091721917633558, "grad_norm": 0.16123802959918976, "learning_rate": 0.002, "loss": 2.5553, "step": 255580 }, { "epoch": 0.5091921139869948, "grad_norm": 0.16912642121315002, "learning_rate": 0.002, "loss": 2.5654, "step": 255590 }, { "epoch": 0.5092120362106337, "grad_norm": 0.2004288285970688, "learning_rate": 0.002, "loss": 2.5666, "step": 255600 }, { "epoch": 0.5092319584342726, "grad_norm": 0.2050522416830063, "learning_rate": 0.002, "loss": 2.5664, "step": 255610 }, { "epoch": 0.5092518806579115, "grad_norm": 0.1621929407119751, "learning_rate": 0.002, "loss": 2.5703, "step": 255620 }, { "epoch": 0.5092718028815504, "grad_norm": 0.16305415332317352, "learning_rate": 0.002, "loss": 2.5663, "step": 255630 }, { "epoch": 0.5092917251051894, "grad_norm": 0.16369903087615967, "learning_rate": 0.002, "loss": 2.5654, "step": 255640 }, { "epoch": 0.5093116473288283, "grad_norm": 0.14280575513839722, "learning_rate": 0.002, "loss": 2.5479, "step": 255650 }, { "epoch": 0.5093315695524672, "grad_norm": 0.16846264898777008, "learning_rate": 0.002, "loss": 2.5512, "step": 255660 }, { "epoch": 0.5093514917761061, "grad_norm": 0.16182301938533783, "learning_rate": 0.002, "loss": 2.5436, "step": 255670 }, { "epoch": 0.509371413999745, "grad_norm": 0.17758773267269135, "learning_rate": 0.002, "loss": 2.5765, "step": 255680 }, { "epoch": 0.509391336223384, "grad_norm": 0.1639573574066162, "learning_rate": 0.002, "loss": 2.561, "step": 255690 }, { "epoch": 0.5094112584470228, "grad_norm": 0.20671144127845764, "learning_rate": 0.002, "loss": 2.5656, "step": 255700 }, { "epoch": 0.5094311806706617, "grad_norm": 0.1580667793750763, "learning_rate": 0.002, "loss": 2.5516, "step": 255710 }, { "epoch": 0.5094511028943006, "grad_norm": 0.2220378816127777, "learning_rate": 0.002, "loss": 2.5612, "step": 255720 }, { "epoch": 0.5094710251179395, "grad_norm": 0.17722325026988983, "learning_rate": 0.002, "loss": 2.5727, "step": 255730 }, { "epoch": 0.5094909473415785, "grad_norm": 0.14959588646888733, "learning_rate": 0.002, "loss": 2.5432, "step": 255740 }, { "epoch": 0.5095108695652174, "grad_norm": 0.17015989124774933, "learning_rate": 0.002, "loss": 2.5471, "step": 255750 }, { "epoch": 0.5095307917888563, "grad_norm": 0.17574264109134674, "learning_rate": 0.002, "loss": 2.5704, "step": 255760 }, { "epoch": 0.5095507140124952, "grad_norm": 0.19159859418869019, "learning_rate": 0.002, "loss": 2.5643, "step": 255770 }, { "epoch": 0.5095706362361341, "grad_norm": 0.13796788454055786, "learning_rate": 0.002, "loss": 2.5655, "step": 255780 }, { "epoch": 0.5095905584597731, "grad_norm": 0.1811663955450058, "learning_rate": 0.002, "loss": 2.5711, "step": 255790 }, { "epoch": 0.509610480683412, "grad_norm": 0.17033590376377106, "learning_rate": 0.002, "loss": 2.5543, "step": 255800 }, { "epoch": 0.5096304029070509, "grad_norm": 0.15475234389305115, "learning_rate": 0.002, "loss": 2.5591, "step": 255810 }, { "epoch": 0.5096503251306898, "grad_norm": 0.14611075818538666, "learning_rate": 0.002, "loss": 2.5621, "step": 255820 }, { "epoch": 0.5096702473543288, "grad_norm": 0.19661812484264374, "learning_rate": 0.002, "loss": 2.5435, "step": 255830 }, { "epoch": 0.5096901695779676, "grad_norm": 0.14262473583221436, "learning_rate": 0.002, "loss": 2.5469, "step": 255840 }, { "epoch": 0.5097100918016065, "grad_norm": 0.15391647815704346, "learning_rate": 0.002, "loss": 2.5474, "step": 255850 }, { "epoch": 0.5097300140252454, "grad_norm": 0.17342320084571838, "learning_rate": 0.002, "loss": 2.5646, "step": 255860 }, { "epoch": 0.5097499362488843, "grad_norm": 0.15557415783405304, "learning_rate": 0.002, "loss": 2.5513, "step": 255870 }, { "epoch": 0.5097698584725233, "grad_norm": 0.1536415070295334, "learning_rate": 0.002, "loss": 2.5621, "step": 255880 }, { "epoch": 0.5097897806961622, "grad_norm": 0.1760464310646057, "learning_rate": 0.002, "loss": 2.5555, "step": 255890 }, { "epoch": 0.5098097029198011, "grad_norm": 0.18147969245910645, "learning_rate": 0.002, "loss": 2.5649, "step": 255900 }, { "epoch": 0.50982962514344, "grad_norm": 0.16417255997657776, "learning_rate": 0.002, "loss": 2.5565, "step": 255910 }, { "epoch": 0.5098495473670789, "grad_norm": 0.16026869416236877, "learning_rate": 0.002, "loss": 2.5696, "step": 255920 }, { "epoch": 0.5098694695907179, "grad_norm": 0.17761380970478058, "learning_rate": 0.002, "loss": 2.5454, "step": 255930 }, { "epoch": 0.5098893918143568, "grad_norm": 0.19057995080947876, "learning_rate": 0.002, "loss": 2.5496, "step": 255940 }, { "epoch": 0.5099093140379957, "grad_norm": 0.15969684720039368, "learning_rate": 0.002, "loss": 2.554, "step": 255950 }, { "epoch": 0.5099292362616346, "grad_norm": 0.18659567832946777, "learning_rate": 0.002, "loss": 2.5662, "step": 255960 }, { "epoch": 0.5099491584852734, "grad_norm": 0.1614118069410324, "learning_rate": 0.002, "loss": 2.5402, "step": 255970 }, { "epoch": 0.5099690807089124, "grad_norm": 0.17463906109333038, "learning_rate": 0.002, "loss": 2.5697, "step": 255980 }, { "epoch": 0.5099890029325513, "grad_norm": 0.16848640143871307, "learning_rate": 0.002, "loss": 2.5537, "step": 255990 }, { "epoch": 0.5100089251561902, "grad_norm": 0.1890168935060501, "learning_rate": 0.002, "loss": 2.552, "step": 256000 }, { "epoch": 0.5100288473798291, "grad_norm": 0.15798696875572205, "learning_rate": 0.002, "loss": 2.5578, "step": 256010 }, { "epoch": 0.510048769603468, "grad_norm": 0.1794407069683075, "learning_rate": 0.002, "loss": 2.5461, "step": 256020 }, { "epoch": 0.510068691827107, "grad_norm": 0.17407222092151642, "learning_rate": 0.002, "loss": 2.5695, "step": 256030 }, { "epoch": 0.5100886140507459, "grad_norm": 0.15684276819229126, "learning_rate": 0.002, "loss": 2.5551, "step": 256040 }, { "epoch": 0.5101085362743848, "grad_norm": 0.18191410601139069, "learning_rate": 0.002, "loss": 2.5545, "step": 256050 }, { "epoch": 0.5101284584980237, "grad_norm": 0.18133093416690826, "learning_rate": 0.002, "loss": 2.5554, "step": 256060 }, { "epoch": 0.5101483807216626, "grad_norm": 0.23212605714797974, "learning_rate": 0.002, "loss": 2.5532, "step": 256070 }, { "epoch": 0.5101683029453016, "grad_norm": 0.16551214456558228, "learning_rate": 0.002, "loss": 2.5673, "step": 256080 }, { "epoch": 0.5101882251689405, "grad_norm": 0.16949045658111572, "learning_rate": 0.002, "loss": 2.5612, "step": 256090 }, { "epoch": 0.5102081473925794, "grad_norm": 0.17955219745635986, "learning_rate": 0.002, "loss": 2.5583, "step": 256100 }, { "epoch": 0.5102280696162182, "grad_norm": 0.15515723824501038, "learning_rate": 0.002, "loss": 2.5637, "step": 256110 }, { "epoch": 0.5102479918398573, "grad_norm": 0.15185946226119995, "learning_rate": 0.002, "loss": 2.5621, "step": 256120 }, { "epoch": 0.5102679140634961, "grad_norm": 0.17432044446468353, "learning_rate": 0.002, "loss": 2.5664, "step": 256130 }, { "epoch": 0.510287836287135, "grad_norm": 0.16740146279335022, "learning_rate": 0.002, "loss": 2.5573, "step": 256140 }, { "epoch": 0.5103077585107739, "grad_norm": 0.1686094105243683, "learning_rate": 0.002, "loss": 2.5534, "step": 256150 }, { "epoch": 0.5103276807344128, "grad_norm": 0.15008915960788727, "learning_rate": 0.002, "loss": 2.5439, "step": 256160 }, { "epoch": 0.5103476029580518, "grad_norm": 0.15659640729427338, "learning_rate": 0.002, "loss": 2.5579, "step": 256170 }, { "epoch": 0.5103675251816907, "grad_norm": 0.18573184311389923, "learning_rate": 0.002, "loss": 2.553, "step": 256180 }, { "epoch": 0.5103874474053296, "grad_norm": 0.1601649671792984, "learning_rate": 0.002, "loss": 2.5389, "step": 256190 }, { "epoch": 0.5104073696289685, "grad_norm": 0.1582890897989273, "learning_rate": 0.002, "loss": 2.5518, "step": 256200 }, { "epoch": 0.5104272918526074, "grad_norm": 0.27114394307136536, "learning_rate": 0.002, "loss": 2.5558, "step": 256210 }, { "epoch": 0.5104472140762464, "grad_norm": 0.14726412296295166, "learning_rate": 0.002, "loss": 2.5587, "step": 256220 }, { "epoch": 0.5104671362998853, "grad_norm": 0.1530989408493042, "learning_rate": 0.002, "loss": 2.5596, "step": 256230 }, { "epoch": 0.5104870585235242, "grad_norm": 0.15510812401771545, "learning_rate": 0.002, "loss": 2.5586, "step": 256240 }, { "epoch": 0.510506980747163, "grad_norm": 0.16686014831066132, "learning_rate": 0.002, "loss": 2.5478, "step": 256250 }, { "epoch": 0.5105269029708019, "grad_norm": 0.18891595304012299, "learning_rate": 0.002, "loss": 2.5435, "step": 256260 }, { "epoch": 0.510546825194441, "grad_norm": 0.20884281396865845, "learning_rate": 0.002, "loss": 2.5623, "step": 256270 }, { "epoch": 0.5105667474180798, "grad_norm": 0.20418867468833923, "learning_rate": 0.002, "loss": 2.5542, "step": 256280 }, { "epoch": 0.5105866696417187, "grad_norm": 0.1809288114309311, "learning_rate": 0.002, "loss": 2.5685, "step": 256290 }, { "epoch": 0.5106065918653576, "grad_norm": 0.15507306158542633, "learning_rate": 0.002, "loss": 2.5668, "step": 256300 }, { "epoch": 0.5106265140889965, "grad_norm": 0.16359291970729828, "learning_rate": 0.002, "loss": 2.5519, "step": 256310 }, { "epoch": 0.5106464363126355, "grad_norm": 0.19519516825675964, "learning_rate": 0.002, "loss": 2.5545, "step": 256320 }, { "epoch": 0.5106663585362744, "grad_norm": 0.1710263192653656, "learning_rate": 0.002, "loss": 2.5549, "step": 256330 }, { "epoch": 0.5106862807599133, "grad_norm": 0.16411545872688293, "learning_rate": 0.002, "loss": 2.5579, "step": 256340 }, { "epoch": 0.5107062029835522, "grad_norm": 0.15965501964092255, "learning_rate": 0.002, "loss": 2.5645, "step": 256350 }, { "epoch": 0.5107261252071911, "grad_norm": 0.16280429065227509, "learning_rate": 0.002, "loss": 2.5411, "step": 256360 }, { "epoch": 0.5107460474308301, "grad_norm": 0.15151681005954742, "learning_rate": 0.002, "loss": 2.5705, "step": 256370 }, { "epoch": 0.510765969654469, "grad_norm": 0.1563129723072052, "learning_rate": 0.002, "loss": 2.5749, "step": 256380 }, { "epoch": 0.5107858918781079, "grad_norm": 0.1660267859697342, "learning_rate": 0.002, "loss": 2.5643, "step": 256390 }, { "epoch": 0.5108058141017467, "grad_norm": 0.2089480608701706, "learning_rate": 0.002, "loss": 2.5665, "step": 256400 }, { "epoch": 0.5108257363253857, "grad_norm": 0.18356581032276154, "learning_rate": 0.002, "loss": 2.5447, "step": 256410 }, { "epoch": 0.5108456585490246, "grad_norm": 0.18767143785953522, "learning_rate": 0.002, "loss": 2.5577, "step": 256420 }, { "epoch": 0.5108655807726635, "grad_norm": 0.16480858623981476, "learning_rate": 0.002, "loss": 2.5509, "step": 256430 }, { "epoch": 0.5108855029963024, "grad_norm": 0.1510385423898697, "learning_rate": 0.002, "loss": 2.5556, "step": 256440 }, { "epoch": 0.5109054252199413, "grad_norm": 0.19316814839839935, "learning_rate": 0.002, "loss": 2.5517, "step": 256450 }, { "epoch": 0.5109253474435803, "grad_norm": 0.19643597304821014, "learning_rate": 0.002, "loss": 2.5608, "step": 256460 }, { "epoch": 0.5109452696672192, "grad_norm": 0.16483302414417267, "learning_rate": 0.002, "loss": 2.5509, "step": 256470 }, { "epoch": 0.5109651918908581, "grad_norm": 0.1520504653453827, "learning_rate": 0.002, "loss": 2.5438, "step": 256480 }, { "epoch": 0.510985114114497, "grad_norm": 0.16257938742637634, "learning_rate": 0.002, "loss": 2.5649, "step": 256490 }, { "epoch": 0.5110050363381359, "grad_norm": 0.15076181292533875, "learning_rate": 0.002, "loss": 2.5463, "step": 256500 }, { "epoch": 0.5110249585617749, "grad_norm": 0.17052900791168213, "learning_rate": 0.002, "loss": 2.5638, "step": 256510 }, { "epoch": 0.5110448807854138, "grad_norm": 0.19309931993484497, "learning_rate": 0.002, "loss": 2.5549, "step": 256520 }, { "epoch": 0.5110648030090527, "grad_norm": 0.15328502655029297, "learning_rate": 0.002, "loss": 2.5448, "step": 256530 }, { "epoch": 0.5110847252326916, "grad_norm": 0.17034442722797394, "learning_rate": 0.002, "loss": 2.5596, "step": 256540 }, { "epoch": 0.5111046474563304, "grad_norm": 0.19147343933582306, "learning_rate": 0.002, "loss": 2.5585, "step": 256550 }, { "epoch": 0.5111245696799694, "grad_norm": 0.15655797719955444, "learning_rate": 0.002, "loss": 2.5535, "step": 256560 }, { "epoch": 0.5111444919036083, "grad_norm": 0.18628211319446564, "learning_rate": 0.002, "loss": 2.569, "step": 256570 }, { "epoch": 0.5111644141272472, "grad_norm": 0.24530576169490814, "learning_rate": 0.002, "loss": 2.5643, "step": 256580 }, { "epoch": 0.5111843363508861, "grad_norm": 0.1866609901189804, "learning_rate": 0.002, "loss": 2.5543, "step": 256590 }, { "epoch": 0.511204258574525, "grad_norm": 0.14702320098876953, "learning_rate": 0.002, "loss": 2.5539, "step": 256600 }, { "epoch": 0.511224180798164, "grad_norm": 0.1596207618713379, "learning_rate": 0.002, "loss": 2.5489, "step": 256610 }, { "epoch": 0.5112441030218029, "grad_norm": 0.14822793006896973, "learning_rate": 0.002, "loss": 2.5713, "step": 256620 }, { "epoch": 0.5112640252454418, "grad_norm": 0.14085493981838226, "learning_rate": 0.002, "loss": 2.5527, "step": 256630 }, { "epoch": 0.5112839474690807, "grad_norm": 0.15303732454776764, "learning_rate": 0.002, "loss": 2.5592, "step": 256640 }, { "epoch": 0.5113038696927196, "grad_norm": 0.1825326681137085, "learning_rate": 0.002, "loss": 2.5445, "step": 256650 }, { "epoch": 0.5113237919163586, "grad_norm": 0.13444404304027557, "learning_rate": 0.002, "loss": 2.5557, "step": 256660 }, { "epoch": 0.5113437141399975, "grad_norm": 0.15486423671245575, "learning_rate": 0.002, "loss": 2.5553, "step": 256670 }, { "epoch": 0.5113636363636364, "grad_norm": 0.18200799822807312, "learning_rate": 0.002, "loss": 2.5508, "step": 256680 }, { "epoch": 0.5113835585872752, "grad_norm": 0.1843867152929306, "learning_rate": 0.002, "loss": 2.5553, "step": 256690 }, { "epoch": 0.5114034808109142, "grad_norm": 0.1616617888212204, "learning_rate": 0.002, "loss": 2.5551, "step": 256700 }, { "epoch": 0.5114234030345531, "grad_norm": 0.17453838884830475, "learning_rate": 0.002, "loss": 2.5399, "step": 256710 }, { "epoch": 0.511443325258192, "grad_norm": 0.1563020646572113, "learning_rate": 0.002, "loss": 2.5484, "step": 256720 }, { "epoch": 0.5114632474818309, "grad_norm": 0.1606241762638092, "learning_rate": 0.002, "loss": 2.563, "step": 256730 }, { "epoch": 0.5114831697054698, "grad_norm": 0.17418503761291504, "learning_rate": 0.002, "loss": 2.5548, "step": 256740 }, { "epoch": 0.5115030919291088, "grad_norm": 0.15304724872112274, "learning_rate": 0.002, "loss": 2.5711, "step": 256750 }, { "epoch": 0.5115230141527477, "grad_norm": 0.1504257768392563, "learning_rate": 0.002, "loss": 2.5577, "step": 256760 }, { "epoch": 0.5115429363763866, "grad_norm": 0.17030879855155945, "learning_rate": 0.002, "loss": 2.5454, "step": 256770 }, { "epoch": 0.5115628586000255, "grad_norm": 0.1601412147283554, "learning_rate": 0.002, "loss": 2.5648, "step": 256780 }, { "epoch": 0.5115827808236644, "grad_norm": 0.1867067515850067, "learning_rate": 0.002, "loss": 2.5702, "step": 256790 }, { "epoch": 0.5116027030473034, "grad_norm": 0.17973415553569794, "learning_rate": 0.002, "loss": 2.5519, "step": 256800 }, { "epoch": 0.5116226252709423, "grad_norm": 0.16156147420406342, "learning_rate": 0.002, "loss": 2.5474, "step": 256810 }, { "epoch": 0.5116425474945812, "grad_norm": 0.17128267884254456, "learning_rate": 0.002, "loss": 2.5661, "step": 256820 }, { "epoch": 0.51166246971822, "grad_norm": 0.18775540590286255, "learning_rate": 0.002, "loss": 2.546, "step": 256830 }, { "epoch": 0.5116823919418589, "grad_norm": 0.17004764080047607, "learning_rate": 0.002, "loss": 2.5471, "step": 256840 }, { "epoch": 0.5117023141654979, "grad_norm": 0.18355511128902435, "learning_rate": 0.002, "loss": 2.5649, "step": 256850 }, { "epoch": 0.5117222363891368, "grad_norm": 0.17380952835083008, "learning_rate": 0.002, "loss": 2.5588, "step": 256860 }, { "epoch": 0.5117421586127757, "grad_norm": 0.18211360275745392, "learning_rate": 0.002, "loss": 2.5612, "step": 256870 }, { "epoch": 0.5117620808364146, "grad_norm": 0.17981716990470886, "learning_rate": 0.002, "loss": 2.5543, "step": 256880 }, { "epoch": 0.5117820030600535, "grad_norm": 0.1351810097694397, "learning_rate": 0.002, "loss": 2.5647, "step": 256890 }, { "epoch": 0.5118019252836925, "grad_norm": 0.21451665461063385, "learning_rate": 0.002, "loss": 2.5466, "step": 256900 }, { "epoch": 0.5118218475073314, "grad_norm": 0.1621360182762146, "learning_rate": 0.002, "loss": 2.5516, "step": 256910 }, { "epoch": 0.5118417697309703, "grad_norm": 0.1437111496925354, "learning_rate": 0.002, "loss": 2.5647, "step": 256920 }, { "epoch": 0.5118616919546092, "grad_norm": 0.1871287077665329, "learning_rate": 0.002, "loss": 2.5652, "step": 256930 }, { "epoch": 0.5118816141782481, "grad_norm": 0.16469359397888184, "learning_rate": 0.002, "loss": 2.5516, "step": 256940 }, { "epoch": 0.5119015364018871, "grad_norm": 0.1860327273607254, "learning_rate": 0.002, "loss": 2.5682, "step": 256950 }, { "epoch": 0.511921458625526, "grad_norm": 0.1654728204011917, "learning_rate": 0.002, "loss": 2.5606, "step": 256960 }, { "epoch": 0.5119413808491649, "grad_norm": 0.18138504028320312, "learning_rate": 0.002, "loss": 2.5616, "step": 256970 }, { "epoch": 0.5119613030728037, "grad_norm": 0.16409537196159363, "learning_rate": 0.002, "loss": 2.5408, "step": 256980 }, { "epoch": 0.5119812252964426, "grad_norm": 0.16893288493156433, "learning_rate": 0.002, "loss": 2.5637, "step": 256990 }, { "epoch": 0.5120011475200816, "grad_norm": 0.15935441851615906, "learning_rate": 0.002, "loss": 2.5711, "step": 257000 }, { "epoch": 0.5120210697437205, "grad_norm": 0.16290472447872162, "learning_rate": 0.002, "loss": 2.5407, "step": 257010 }, { "epoch": 0.5120409919673594, "grad_norm": 0.6196982860565186, "learning_rate": 0.002, "loss": 2.5518, "step": 257020 }, { "epoch": 0.5120609141909983, "grad_norm": 0.18806542456150055, "learning_rate": 0.002, "loss": 2.5701, "step": 257030 }, { "epoch": 0.5120808364146373, "grad_norm": 0.14545220136642456, "learning_rate": 0.002, "loss": 2.5735, "step": 257040 }, { "epoch": 0.5121007586382762, "grad_norm": 0.1583542823791504, "learning_rate": 0.002, "loss": 2.5461, "step": 257050 }, { "epoch": 0.5121206808619151, "grad_norm": 0.1631423532962799, "learning_rate": 0.002, "loss": 2.5698, "step": 257060 }, { "epoch": 0.512140603085554, "grad_norm": 0.14765417575836182, "learning_rate": 0.002, "loss": 2.5473, "step": 257070 }, { "epoch": 0.5121605253091929, "grad_norm": 0.16228172183036804, "learning_rate": 0.002, "loss": 2.5644, "step": 257080 }, { "epoch": 0.5121804475328319, "grad_norm": 0.1550689935684204, "learning_rate": 0.002, "loss": 2.5592, "step": 257090 }, { "epoch": 0.5122003697564708, "grad_norm": 0.17202650010585785, "learning_rate": 0.002, "loss": 2.5608, "step": 257100 }, { "epoch": 0.5122202919801097, "grad_norm": 0.1641429364681244, "learning_rate": 0.002, "loss": 2.5438, "step": 257110 }, { "epoch": 0.5122402142037485, "grad_norm": 0.14135412871837616, "learning_rate": 0.002, "loss": 2.5668, "step": 257120 }, { "epoch": 0.5122601364273874, "grad_norm": 0.18382492661476135, "learning_rate": 0.002, "loss": 2.5545, "step": 257130 }, { "epoch": 0.5122800586510264, "grad_norm": 0.2201807200908661, "learning_rate": 0.002, "loss": 2.5614, "step": 257140 }, { "epoch": 0.5122999808746653, "grad_norm": 0.16431081295013428, "learning_rate": 0.002, "loss": 2.5464, "step": 257150 }, { "epoch": 0.5123199030983042, "grad_norm": 0.16964919865131378, "learning_rate": 0.002, "loss": 2.5607, "step": 257160 }, { "epoch": 0.5123398253219431, "grad_norm": 0.19925881922245026, "learning_rate": 0.002, "loss": 2.5812, "step": 257170 }, { "epoch": 0.512359747545582, "grad_norm": 0.1540539413690567, "learning_rate": 0.002, "loss": 2.5435, "step": 257180 }, { "epoch": 0.512379669769221, "grad_norm": 0.163628488779068, "learning_rate": 0.002, "loss": 2.5517, "step": 257190 }, { "epoch": 0.5123995919928599, "grad_norm": 0.18116572499275208, "learning_rate": 0.002, "loss": 2.5544, "step": 257200 }, { "epoch": 0.5124195142164988, "grad_norm": 0.21349266171455383, "learning_rate": 0.002, "loss": 2.5567, "step": 257210 }, { "epoch": 0.5124394364401377, "grad_norm": 0.1586657017469406, "learning_rate": 0.002, "loss": 2.5703, "step": 257220 }, { "epoch": 0.5124593586637766, "grad_norm": 0.18731503188610077, "learning_rate": 0.002, "loss": 2.5691, "step": 257230 }, { "epoch": 0.5124792808874156, "grad_norm": 0.17723585665225983, "learning_rate": 0.002, "loss": 2.557, "step": 257240 }, { "epoch": 0.5124992031110545, "grad_norm": 0.1637464016675949, "learning_rate": 0.002, "loss": 2.5615, "step": 257250 }, { "epoch": 0.5125191253346933, "grad_norm": 0.16963207721710205, "learning_rate": 0.002, "loss": 2.5566, "step": 257260 }, { "epoch": 0.5125390475583322, "grad_norm": 0.15056899189949036, "learning_rate": 0.002, "loss": 2.5698, "step": 257270 }, { "epoch": 0.5125589697819711, "grad_norm": 0.1502421498298645, "learning_rate": 0.002, "loss": 2.5576, "step": 257280 }, { "epoch": 0.5125788920056101, "grad_norm": 0.1719745397567749, "learning_rate": 0.002, "loss": 2.5743, "step": 257290 }, { "epoch": 0.512598814229249, "grad_norm": 0.16763350367546082, "learning_rate": 0.002, "loss": 2.5545, "step": 257300 }, { "epoch": 0.5126187364528879, "grad_norm": 0.1940145641565323, "learning_rate": 0.002, "loss": 2.559, "step": 257310 }, { "epoch": 0.5126386586765268, "grad_norm": 0.1679978221654892, "learning_rate": 0.002, "loss": 2.5574, "step": 257320 }, { "epoch": 0.5126585809001658, "grad_norm": 0.1792595535516739, "learning_rate": 0.002, "loss": 2.5517, "step": 257330 }, { "epoch": 0.5126785031238047, "grad_norm": 0.18893912434577942, "learning_rate": 0.002, "loss": 2.5614, "step": 257340 }, { "epoch": 0.5126984253474436, "grad_norm": 0.16080889105796814, "learning_rate": 0.002, "loss": 2.5593, "step": 257350 }, { "epoch": 0.5127183475710825, "grad_norm": 0.14259028434753418, "learning_rate": 0.002, "loss": 2.571, "step": 257360 }, { "epoch": 0.5127382697947214, "grad_norm": 0.15775741636753082, "learning_rate": 0.002, "loss": 2.5599, "step": 257370 }, { "epoch": 0.5127581920183604, "grad_norm": 0.19963684678077698, "learning_rate": 0.002, "loss": 2.571, "step": 257380 }, { "epoch": 0.5127781142419993, "grad_norm": 0.16386543214321136, "learning_rate": 0.002, "loss": 2.557, "step": 257390 }, { "epoch": 0.5127980364656382, "grad_norm": 0.20340731739997864, "learning_rate": 0.002, "loss": 2.5666, "step": 257400 }, { "epoch": 0.512817958689277, "grad_norm": 0.1737384796142578, "learning_rate": 0.002, "loss": 2.5642, "step": 257410 }, { "epoch": 0.5128378809129159, "grad_norm": 0.16469813883304596, "learning_rate": 0.002, "loss": 2.5672, "step": 257420 }, { "epoch": 0.5128578031365549, "grad_norm": 0.1712643802165985, "learning_rate": 0.002, "loss": 2.5512, "step": 257430 }, { "epoch": 0.5128777253601938, "grad_norm": 0.17312797904014587, "learning_rate": 0.002, "loss": 2.5604, "step": 257440 }, { "epoch": 0.5128976475838327, "grad_norm": 0.15475992858409882, "learning_rate": 0.002, "loss": 2.5585, "step": 257450 }, { "epoch": 0.5129175698074716, "grad_norm": 0.1724768877029419, "learning_rate": 0.002, "loss": 2.5478, "step": 257460 }, { "epoch": 0.5129374920311105, "grad_norm": 0.1891108602285385, "learning_rate": 0.002, "loss": 2.5651, "step": 257470 }, { "epoch": 0.5129574142547495, "grad_norm": 0.16415001451969147, "learning_rate": 0.002, "loss": 2.5565, "step": 257480 }, { "epoch": 0.5129773364783884, "grad_norm": 0.14630410075187683, "learning_rate": 0.002, "loss": 2.556, "step": 257490 }, { "epoch": 0.5129972587020273, "grad_norm": 0.16147424280643463, "learning_rate": 0.002, "loss": 2.5624, "step": 257500 }, { "epoch": 0.5130171809256662, "grad_norm": 0.17350076138973236, "learning_rate": 0.002, "loss": 2.5578, "step": 257510 }, { "epoch": 0.5130371031493051, "grad_norm": 0.1521918624639511, "learning_rate": 0.002, "loss": 2.5721, "step": 257520 }, { "epoch": 0.5130570253729441, "grad_norm": 0.16128982603549957, "learning_rate": 0.002, "loss": 2.5574, "step": 257530 }, { "epoch": 0.513076947596583, "grad_norm": 0.22471480071544647, "learning_rate": 0.002, "loss": 2.542, "step": 257540 }, { "epoch": 0.5130968698202218, "grad_norm": 0.16008298099040985, "learning_rate": 0.002, "loss": 2.5848, "step": 257550 }, { "epoch": 0.5131167920438607, "grad_norm": 0.17821168899536133, "learning_rate": 0.002, "loss": 2.5606, "step": 257560 }, { "epoch": 0.5131367142674996, "grad_norm": 0.15683582425117493, "learning_rate": 0.002, "loss": 2.5511, "step": 257570 }, { "epoch": 0.5131566364911386, "grad_norm": 0.1682589203119278, "learning_rate": 0.002, "loss": 2.5506, "step": 257580 }, { "epoch": 0.5131765587147775, "grad_norm": 0.18326158821582794, "learning_rate": 0.002, "loss": 2.5687, "step": 257590 }, { "epoch": 0.5131964809384164, "grad_norm": 0.1734497994184494, "learning_rate": 0.002, "loss": 2.5569, "step": 257600 }, { "epoch": 0.5132164031620553, "grad_norm": 0.1394243687391281, "learning_rate": 0.002, "loss": 2.556, "step": 257610 }, { "epoch": 0.5132363253856943, "grad_norm": 0.1758192628622055, "learning_rate": 0.002, "loss": 2.5539, "step": 257620 }, { "epoch": 0.5132562476093332, "grad_norm": 0.14251185953617096, "learning_rate": 0.002, "loss": 2.5513, "step": 257630 }, { "epoch": 0.5132761698329721, "grad_norm": 0.1909707933664322, "learning_rate": 0.002, "loss": 2.564, "step": 257640 }, { "epoch": 0.513296092056611, "grad_norm": 0.16324444115161896, "learning_rate": 0.002, "loss": 2.5598, "step": 257650 }, { "epoch": 0.5133160142802499, "grad_norm": 0.19806340336799622, "learning_rate": 0.002, "loss": 2.5674, "step": 257660 }, { "epoch": 0.5133359365038889, "grad_norm": 0.15302644670009613, "learning_rate": 0.002, "loss": 2.5532, "step": 257670 }, { "epoch": 0.5133558587275278, "grad_norm": 0.18161186575889587, "learning_rate": 0.002, "loss": 2.5623, "step": 257680 }, { "epoch": 0.5133757809511666, "grad_norm": 0.17487557232379913, "learning_rate": 0.002, "loss": 2.5636, "step": 257690 }, { "epoch": 0.5133957031748055, "grad_norm": 0.16756300628185272, "learning_rate": 0.002, "loss": 2.5705, "step": 257700 }, { "epoch": 0.5134156253984444, "grad_norm": 0.21569974720478058, "learning_rate": 0.002, "loss": 2.5528, "step": 257710 }, { "epoch": 0.5134355476220834, "grad_norm": 0.1794765591621399, "learning_rate": 0.002, "loss": 2.5556, "step": 257720 }, { "epoch": 0.5134554698457223, "grad_norm": 0.14270102977752686, "learning_rate": 0.002, "loss": 2.5489, "step": 257730 }, { "epoch": 0.5134753920693612, "grad_norm": 0.16132263839244843, "learning_rate": 0.002, "loss": 2.5378, "step": 257740 }, { "epoch": 0.5134953142930001, "grad_norm": 0.1637316197156906, "learning_rate": 0.002, "loss": 2.5675, "step": 257750 }, { "epoch": 0.513515236516639, "grad_norm": 0.15161430835723877, "learning_rate": 0.002, "loss": 2.5576, "step": 257760 }, { "epoch": 0.513535158740278, "grad_norm": 0.15912486612796783, "learning_rate": 0.002, "loss": 2.5676, "step": 257770 }, { "epoch": 0.5135550809639169, "grad_norm": 0.1633499413728714, "learning_rate": 0.002, "loss": 2.5595, "step": 257780 }, { "epoch": 0.5135750031875558, "grad_norm": 0.19111216068267822, "learning_rate": 0.002, "loss": 2.5364, "step": 257790 }, { "epoch": 0.5135949254111947, "grad_norm": 0.17707224190235138, "learning_rate": 0.002, "loss": 2.5715, "step": 257800 }, { "epoch": 0.5136148476348336, "grad_norm": 0.18441399931907654, "learning_rate": 0.002, "loss": 2.5565, "step": 257810 }, { "epoch": 0.5136347698584726, "grad_norm": 0.15313440561294556, "learning_rate": 0.002, "loss": 2.5522, "step": 257820 }, { "epoch": 0.5136546920821115, "grad_norm": 0.21131576597690582, "learning_rate": 0.002, "loss": 2.5552, "step": 257830 }, { "epoch": 0.5136746143057503, "grad_norm": 0.17367666959762573, "learning_rate": 0.002, "loss": 2.554, "step": 257840 }, { "epoch": 0.5136945365293892, "grad_norm": 0.14826814830303192, "learning_rate": 0.002, "loss": 2.5596, "step": 257850 }, { "epoch": 0.5137144587530281, "grad_norm": 0.16641175746917725, "learning_rate": 0.002, "loss": 2.5608, "step": 257860 }, { "epoch": 0.5137343809766671, "grad_norm": 0.18618440628051758, "learning_rate": 0.002, "loss": 2.5439, "step": 257870 }, { "epoch": 0.513754303200306, "grad_norm": 0.1622636765241623, "learning_rate": 0.002, "loss": 2.5462, "step": 257880 }, { "epoch": 0.5137742254239449, "grad_norm": 0.16881710290908813, "learning_rate": 0.002, "loss": 2.5526, "step": 257890 }, { "epoch": 0.5137941476475838, "grad_norm": 0.18052399158477783, "learning_rate": 0.002, "loss": 2.5543, "step": 257900 }, { "epoch": 0.5138140698712228, "grad_norm": 0.14637570083141327, "learning_rate": 0.002, "loss": 2.5535, "step": 257910 }, { "epoch": 0.5138339920948617, "grad_norm": 0.18665044009685516, "learning_rate": 0.002, "loss": 2.5533, "step": 257920 }, { "epoch": 0.5138539143185006, "grad_norm": 0.17535467445850372, "learning_rate": 0.002, "loss": 2.5362, "step": 257930 }, { "epoch": 0.5138738365421395, "grad_norm": 0.18797339498996735, "learning_rate": 0.002, "loss": 2.563, "step": 257940 }, { "epoch": 0.5138937587657784, "grad_norm": 0.15119917690753937, "learning_rate": 0.002, "loss": 2.5632, "step": 257950 }, { "epoch": 0.5139136809894174, "grad_norm": 0.1840539127588272, "learning_rate": 0.002, "loss": 2.5517, "step": 257960 }, { "epoch": 0.5139336032130563, "grad_norm": 0.15319418907165527, "learning_rate": 0.002, "loss": 2.5608, "step": 257970 }, { "epoch": 0.5139535254366951, "grad_norm": 0.1801312416791916, "learning_rate": 0.002, "loss": 2.5686, "step": 257980 }, { "epoch": 0.513973447660334, "grad_norm": 0.16097137331962585, "learning_rate": 0.002, "loss": 2.5574, "step": 257990 }, { "epoch": 0.5139933698839729, "grad_norm": 0.1907423883676529, "learning_rate": 0.002, "loss": 2.5551, "step": 258000 }, { "epoch": 0.5140132921076119, "grad_norm": 0.19379569590091705, "learning_rate": 0.002, "loss": 2.5544, "step": 258010 }, { "epoch": 0.5140332143312508, "grad_norm": 0.14104698598384857, "learning_rate": 0.002, "loss": 2.5671, "step": 258020 }, { "epoch": 0.5140531365548897, "grad_norm": 0.15730156004428864, "learning_rate": 0.002, "loss": 2.5578, "step": 258030 }, { "epoch": 0.5140730587785286, "grad_norm": 0.16350476443767548, "learning_rate": 0.002, "loss": 2.5614, "step": 258040 }, { "epoch": 0.5140929810021675, "grad_norm": 0.15024851262569427, "learning_rate": 0.002, "loss": 2.5511, "step": 258050 }, { "epoch": 0.5141129032258065, "grad_norm": 0.17415869235992432, "learning_rate": 0.002, "loss": 2.5473, "step": 258060 }, { "epoch": 0.5141328254494454, "grad_norm": 0.1973123848438263, "learning_rate": 0.002, "loss": 2.5534, "step": 258070 }, { "epoch": 0.5141527476730843, "grad_norm": 0.37715163826942444, "learning_rate": 0.002, "loss": 2.566, "step": 258080 }, { "epoch": 0.5141726698967232, "grad_norm": 0.16024816036224365, "learning_rate": 0.002, "loss": 2.5558, "step": 258090 }, { "epoch": 0.5141925921203621, "grad_norm": 0.21047091484069824, "learning_rate": 0.002, "loss": 2.5574, "step": 258100 }, { "epoch": 0.5142125143440011, "grad_norm": 0.16499005258083344, "learning_rate": 0.002, "loss": 2.5485, "step": 258110 }, { "epoch": 0.51423243656764, "grad_norm": 0.1569342166185379, "learning_rate": 0.002, "loss": 2.5591, "step": 258120 }, { "epoch": 0.5142523587912788, "grad_norm": 0.1761782467365265, "learning_rate": 0.002, "loss": 2.556, "step": 258130 }, { "epoch": 0.5142722810149177, "grad_norm": 0.2875434160232544, "learning_rate": 0.002, "loss": 2.5789, "step": 258140 }, { "epoch": 0.5142922032385566, "grad_norm": 0.16944071650505066, "learning_rate": 0.002, "loss": 2.5559, "step": 258150 }, { "epoch": 0.5143121254621956, "grad_norm": 0.1793152242898941, "learning_rate": 0.002, "loss": 2.5557, "step": 258160 }, { "epoch": 0.5143320476858345, "grad_norm": 0.142349511384964, "learning_rate": 0.002, "loss": 2.5615, "step": 258170 }, { "epoch": 0.5143519699094734, "grad_norm": 0.16452281177043915, "learning_rate": 0.002, "loss": 2.5601, "step": 258180 }, { "epoch": 0.5143718921331123, "grad_norm": 0.1771380603313446, "learning_rate": 0.002, "loss": 2.5506, "step": 258190 }, { "epoch": 0.5143918143567513, "grad_norm": 0.16801564395427704, "learning_rate": 0.002, "loss": 2.5555, "step": 258200 }, { "epoch": 0.5144117365803902, "grad_norm": 0.1976812332868576, "learning_rate": 0.002, "loss": 2.5694, "step": 258210 }, { "epoch": 0.5144316588040291, "grad_norm": 0.1542304903268814, "learning_rate": 0.002, "loss": 2.5654, "step": 258220 }, { "epoch": 0.514451581027668, "grad_norm": 0.14635752141475677, "learning_rate": 0.002, "loss": 2.5545, "step": 258230 }, { "epoch": 0.5144715032513069, "grad_norm": 0.1784251630306244, "learning_rate": 0.002, "loss": 2.5546, "step": 258240 }, { "epoch": 0.5144914254749459, "grad_norm": 0.1571430116891861, "learning_rate": 0.002, "loss": 2.5543, "step": 258250 }, { "epoch": 0.5145113476985848, "grad_norm": 0.1500183343887329, "learning_rate": 0.002, "loss": 2.5725, "step": 258260 }, { "epoch": 0.5145312699222236, "grad_norm": 0.1561061292886734, "learning_rate": 0.002, "loss": 2.5503, "step": 258270 }, { "epoch": 0.5145511921458625, "grad_norm": 0.19767509400844574, "learning_rate": 0.002, "loss": 2.5688, "step": 258280 }, { "epoch": 0.5145711143695014, "grad_norm": 0.15397484600543976, "learning_rate": 0.002, "loss": 2.5634, "step": 258290 }, { "epoch": 0.5145910365931404, "grad_norm": 0.1704598367214203, "learning_rate": 0.002, "loss": 2.5568, "step": 258300 }, { "epoch": 0.5146109588167793, "grad_norm": 0.21163296699523926, "learning_rate": 0.002, "loss": 2.567, "step": 258310 }, { "epoch": 0.5146308810404182, "grad_norm": 0.17653916776180267, "learning_rate": 0.002, "loss": 2.5643, "step": 258320 }, { "epoch": 0.5146508032640571, "grad_norm": 0.2029140591621399, "learning_rate": 0.002, "loss": 2.5538, "step": 258330 }, { "epoch": 0.514670725487696, "grad_norm": 0.17350445687770844, "learning_rate": 0.002, "loss": 2.5606, "step": 258340 }, { "epoch": 0.514690647711335, "grad_norm": 0.18721383810043335, "learning_rate": 0.002, "loss": 2.5506, "step": 258350 }, { "epoch": 0.5147105699349739, "grad_norm": 0.14942021667957306, "learning_rate": 0.002, "loss": 2.5561, "step": 258360 }, { "epoch": 0.5147304921586128, "grad_norm": 0.17497797310352325, "learning_rate": 0.002, "loss": 2.5682, "step": 258370 }, { "epoch": 0.5147504143822517, "grad_norm": 0.15558582544326782, "learning_rate": 0.002, "loss": 2.5573, "step": 258380 }, { "epoch": 0.5147703366058906, "grad_norm": 0.17533710598945618, "learning_rate": 0.002, "loss": 2.5494, "step": 258390 }, { "epoch": 0.5147902588295296, "grad_norm": 0.22870683670043945, "learning_rate": 0.002, "loss": 2.5652, "step": 258400 }, { "epoch": 0.5148101810531684, "grad_norm": 0.19561031460762024, "learning_rate": 0.002, "loss": 2.5385, "step": 258410 }, { "epoch": 0.5148301032768073, "grad_norm": 0.17042258381843567, "learning_rate": 0.002, "loss": 2.5556, "step": 258420 }, { "epoch": 0.5148500255004462, "grad_norm": 0.19188079237937927, "learning_rate": 0.002, "loss": 2.5532, "step": 258430 }, { "epoch": 0.5148699477240851, "grad_norm": 0.1516411304473877, "learning_rate": 0.002, "loss": 2.5613, "step": 258440 }, { "epoch": 0.5148898699477241, "grad_norm": 0.20909839868545532, "learning_rate": 0.002, "loss": 2.5634, "step": 258450 }, { "epoch": 0.514909792171363, "grad_norm": 0.18795117735862732, "learning_rate": 0.002, "loss": 2.5751, "step": 258460 }, { "epoch": 0.5149297143950019, "grad_norm": 0.14502300322055817, "learning_rate": 0.002, "loss": 2.5543, "step": 258470 }, { "epoch": 0.5149496366186408, "grad_norm": 0.18843472003936768, "learning_rate": 0.002, "loss": 2.5787, "step": 258480 }, { "epoch": 0.5149695588422798, "grad_norm": 0.18479055166244507, "learning_rate": 0.002, "loss": 2.5701, "step": 258490 }, { "epoch": 0.5149894810659187, "grad_norm": 0.16273657977581024, "learning_rate": 0.002, "loss": 2.5589, "step": 258500 }, { "epoch": 0.5150094032895576, "grad_norm": 0.16273470222949982, "learning_rate": 0.002, "loss": 2.5517, "step": 258510 }, { "epoch": 0.5150293255131965, "grad_norm": 0.21705716848373413, "learning_rate": 0.002, "loss": 2.5559, "step": 258520 }, { "epoch": 0.5150492477368354, "grad_norm": 0.14840608835220337, "learning_rate": 0.002, "loss": 2.5463, "step": 258530 }, { "epoch": 0.5150691699604744, "grad_norm": 0.16858890652656555, "learning_rate": 0.002, "loss": 2.5616, "step": 258540 }, { "epoch": 0.5150890921841133, "grad_norm": 0.16574949026107788, "learning_rate": 0.002, "loss": 2.5584, "step": 258550 }, { "epoch": 0.5151090144077521, "grad_norm": 0.181076779961586, "learning_rate": 0.002, "loss": 2.5583, "step": 258560 }, { "epoch": 0.515128936631391, "grad_norm": 0.15200944244861603, "learning_rate": 0.002, "loss": 2.5702, "step": 258570 }, { "epoch": 0.5151488588550299, "grad_norm": 0.15178832411766052, "learning_rate": 0.002, "loss": 2.5494, "step": 258580 }, { "epoch": 0.5151687810786689, "grad_norm": 0.17229557037353516, "learning_rate": 0.002, "loss": 2.5511, "step": 258590 }, { "epoch": 0.5151887033023078, "grad_norm": 0.1813596487045288, "learning_rate": 0.002, "loss": 2.5558, "step": 258600 }, { "epoch": 0.5152086255259467, "grad_norm": 0.1526978313922882, "learning_rate": 0.002, "loss": 2.5715, "step": 258610 }, { "epoch": 0.5152285477495856, "grad_norm": 0.14870357513427734, "learning_rate": 0.002, "loss": 2.5681, "step": 258620 }, { "epoch": 0.5152484699732245, "grad_norm": 0.17980855703353882, "learning_rate": 0.002, "loss": 2.5438, "step": 258630 }, { "epoch": 0.5152683921968635, "grad_norm": 0.1836060732603073, "learning_rate": 0.002, "loss": 2.5627, "step": 258640 }, { "epoch": 0.5152883144205024, "grad_norm": 0.18122310936450958, "learning_rate": 0.002, "loss": 2.5482, "step": 258650 }, { "epoch": 0.5153082366441413, "grad_norm": 0.1504644900560379, "learning_rate": 0.002, "loss": 2.5596, "step": 258660 }, { "epoch": 0.5153281588677802, "grad_norm": 0.1610080450773239, "learning_rate": 0.002, "loss": 2.5559, "step": 258670 }, { "epoch": 0.515348081091419, "grad_norm": 0.14916665852069855, "learning_rate": 0.002, "loss": 2.5511, "step": 258680 }, { "epoch": 0.515368003315058, "grad_norm": 0.206137016415596, "learning_rate": 0.002, "loss": 2.5772, "step": 258690 }, { "epoch": 0.515387925538697, "grad_norm": 0.14090517163276672, "learning_rate": 0.002, "loss": 2.5582, "step": 258700 }, { "epoch": 0.5154078477623358, "grad_norm": 0.2096107006072998, "learning_rate": 0.002, "loss": 2.5454, "step": 258710 }, { "epoch": 0.5154277699859747, "grad_norm": 0.17867091298103333, "learning_rate": 0.002, "loss": 2.5584, "step": 258720 }, { "epoch": 0.5154476922096136, "grad_norm": 0.17530317604541779, "learning_rate": 0.002, "loss": 2.5526, "step": 258730 }, { "epoch": 0.5154676144332526, "grad_norm": 0.18442566692829132, "learning_rate": 0.002, "loss": 2.5653, "step": 258740 }, { "epoch": 0.5154875366568915, "grad_norm": 0.17028148472309113, "learning_rate": 0.002, "loss": 2.5507, "step": 258750 }, { "epoch": 0.5155074588805304, "grad_norm": 0.16237393021583557, "learning_rate": 0.002, "loss": 2.5555, "step": 258760 }, { "epoch": 0.5155273811041693, "grad_norm": 0.17629365622997284, "learning_rate": 0.002, "loss": 2.5499, "step": 258770 }, { "epoch": 0.5155473033278082, "grad_norm": 0.21080195903778076, "learning_rate": 0.002, "loss": 2.5758, "step": 258780 }, { "epoch": 0.5155672255514472, "grad_norm": 0.15576083958148956, "learning_rate": 0.002, "loss": 2.5613, "step": 258790 }, { "epoch": 0.5155871477750861, "grad_norm": 0.16686595976352692, "learning_rate": 0.002, "loss": 2.551, "step": 258800 }, { "epoch": 0.515607069998725, "grad_norm": 0.21182145178318024, "learning_rate": 0.002, "loss": 2.5695, "step": 258810 }, { "epoch": 0.5156269922223639, "grad_norm": 0.1479663997888565, "learning_rate": 0.002, "loss": 2.5537, "step": 258820 }, { "epoch": 0.5156469144460029, "grad_norm": 0.18607313930988312, "learning_rate": 0.002, "loss": 2.5645, "step": 258830 }, { "epoch": 0.5156668366696417, "grad_norm": 0.1763240545988083, "learning_rate": 0.002, "loss": 2.5566, "step": 258840 }, { "epoch": 0.5156867588932806, "grad_norm": 0.1811669021844864, "learning_rate": 0.002, "loss": 2.548, "step": 258850 }, { "epoch": 0.5157066811169195, "grad_norm": 0.16571693122386932, "learning_rate": 0.002, "loss": 2.5513, "step": 258860 }, { "epoch": 0.5157266033405584, "grad_norm": 0.17332981526851654, "learning_rate": 0.002, "loss": 2.5609, "step": 258870 }, { "epoch": 0.5157465255641974, "grad_norm": 0.17136824131011963, "learning_rate": 0.002, "loss": 2.5471, "step": 258880 }, { "epoch": 0.5157664477878363, "grad_norm": 0.1711689978837967, "learning_rate": 0.002, "loss": 2.5483, "step": 258890 }, { "epoch": 0.5157863700114752, "grad_norm": 0.17383283376693726, "learning_rate": 0.002, "loss": 2.5567, "step": 258900 }, { "epoch": 0.5158062922351141, "grad_norm": 0.1764923483133316, "learning_rate": 0.002, "loss": 2.5495, "step": 258910 }, { "epoch": 0.515826214458753, "grad_norm": 0.18005318939685822, "learning_rate": 0.002, "loss": 2.5412, "step": 258920 }, { "epoch": 0.515846136682392, "grad_norm": 0.16203543543815613, "learning_rate": 0.002, "loss": 2.5504, "step": 258930 }, { "epoch": 0.5158660589060309, "grad_norm": 0.21370382606983185, "learning_rate": 0.002, "loss": 2.5615, "step": 258940 }, { "epoch": 0.5158859811296698, "grad_norm": 0.15920525789260864, "learning_rate": 0.002, "loss": 2.564, "step": 258950 }, { "epoch": 0.5159059033533087, "grad_norm": 0.17504578828811646, "learning_rate": 0.002, "loss": 2.5312, "step": 258960 }, { "epoch": 0.5159258255769475, "grad_norm": 0.15928660333156586, "learning_rate": 0.002, "loss": 2.5601, "step": 258970 }, { "epoch": 0.5159457478005866, "grad_norm": 0.17861856520175934, "learning_rate": 0.002, "loss": 2.5649, "step": 258980 }, { "epoch": 0.5159656700242254, "grad_norm": 0.15879446268081665, "learning_rate": 0.002, "loss": 2.5614, "step": 258990 }, { "epoch": 0.5159855922478643, "grad_norm": 0.1508483588695526, "learning_rate": 0.002, "loss": 2.5597, "step": 259000 }, { "epoch": 0.5160055144715032, "grad_norm": 0.15668165683746338, "learning_rate": 0.002, "loss": 2.5671, "step": 259010 }, { "epoch": 0.5160254366951421, "grad_norm": 0.19430316984653473, "learning_rate": 0.002, "loss": 2.5681, "step": 259020 }, { "epoch": 0.5160453589187811, "grad_norm": 0.1499301791191101, "learning_rate": 0.002, "loss": 2.5752, "step": 259030 }, { "epoch": 0.51606528114242, "grad_norm": 0.15644274652004242, "learning_rate": 0.002, "loss": 2.5597, "step": 259040 }, { "epoch": 0.5160852033660589, "grad_norm": 0.16160713136196136, "learning_rate": 0.002, "loss": 2.559, "step": 259050 }, { "epoch": 0.5161051255896978, "grad_norm": 0.20767880976200104, "learning_rate": 0.002, "loss": 2.5632, "step": 259060 }, { "epoch": 0.5161250478133367, "grad_norm": 0.15781109035015106, "learning_rate": 0.002, "loss": 2.5402, "step": 259070 }, { "epoch": 0.5161449700369757, "grad_norm": 0.15494738519191742, "learning_rate": 0.002, "loss": 2.5612, "step": 259080 }, { "epoch": 0.5161648922606146, "grad_norm": 0.23308627307415009, "learning_rate": 0.002, "loss": 2.5665, "step": 259090 }, { "epoch": 0.5161848144842535, "grad_norm": 0.1616525948047638, "learning_rate": 0.002, "loss": 2.5756, "step": 259100 }, { "epoch": 0.5162047367078924, "grad_norm": 0.17930467426776886, "learning_rate": 0.002, "loss": 2.5514, "step": 259110 }, { "epoch": 0.5162246589315314, "grad_norm": 0.1658736914396286, "learning_rate": 0.002, "loss": 2.5534, "step": 259120 }, { "epoch": 0.5162445811551702, "grad_norm": 0.13770277798175812, "learning_rate": 0.002, "loss": 2.5522, "step": 259130 }, { "epoch": 0.5162645033788091, "grad_norm": 0.1587877720594406, "learning_rate": 0.002, "loss": 2.5653, "step": 259140 }, { "epoch": 0.516284425602448, "grad_norm": 0.20419256389141083, "learning_rate": 0.002, "loss": 2.544, "step": 259150 }, { "epoch": 0.5163043478260869, "grad_norm": 0.17949743568897247, "learning_rate": 0.002, "loss": 2.5582, "step": 259160 }, { "epoch": 0.5163242700497259, "grad_norm": 0.14452990889549255, "learning_rate": 0.002, "loss": 2.5516, "step": 259170 }, { "epoch": 0.5163441922733648, "grad_norm": 0.17092974483966827, "learning_rate": 0.002, "loss": 2.5599, "step": 259180 }, { "epoch": 0.5163641144970037, "grad_norm": 0.16693313419818878, "learning_rate": 0.002, "loss": 2.5652, "step": 259190 }, { "epoch": 0.5163840367206426, "grad_norm": 0.15221159160137177, "learning_rate": 0.002, "loss": 2.5704, "step": 259200 }, { "epoch": 0.5164039589442815, "grad_norm": 0.1812458336353302, "learning_rate": 0.002, "loss": 2.5514, "step": 259210 }, { "epoch": 0.5164238811679205, "grad_norm": 0.17234106361865997, "learning_rate": 0.002, "loss": 2.5554, "step": 259220 }, { "epoch": 0.5164438033915594, "grad_norm": 0.14757870137691498, "learning_rate": 0.002, "loss": 2.5545, "step": 259230 }, { "epoch": 0.5164637256151983, "grad_norm": 0.181327223777771, "learning_rate": 0.002, "loss": 2.5594, "step": 259240 }, { "epoch": 0.5164836478388372, "grad_norm": 0.18892160058021545, "learning_rate": 0.002, "loss": 2.553, "step": 259250 }, { "epoch": 0.516503570062476, "grad_norm": 0.21121357381343842, "learning_rate": 0.002, "loss": 2.5491, "step": 259260 }, { "epoch": 0.516523492286115, "grad_norm": 0.13778258860111237, "learning_rate": 0.002, "loss": 2.5655, "step": 259270 }, { "epoch": 0.5165434145097539, "grad_norm": 0.17564746737480164, "learning_rate": 0.002, "loss": 2.5521, "step": 259280 }, { "epoch": 0.5165633367333928, "grad_norm": 0.149300217628479, "learning_rate": 0.002, "loss": 2.5622, "step": 259290 }, { "epoch": 0.5165832589570317, "grad_norm": 0.154453307390213, "learning_rate": 0.002, "loss": 2.5665, "step": 259300 }, { "epoch": 0.5166031811806706, "grad_norm": 0.19370314478874207, "learning_rate": 0.002, "loss": 2.5773, "step": 259310 }, { "epoch": 0.5166231034043096, "grad_norm": 0.15923871099948883, "learning_rate": 0.002, "loss": 2.552, "step": 259320 }, { "epoch": 0.5166430256279485, "grad_norm": 0.17570403218269348, "learning_rate": 0.002, "loss": 2.549, "step": 259330 }, { "epoch": 0.5166629478515874, "grad_norm": 0.18916989862918854, "learning_rate": 0.002, "loss": 2.5692, "step": 259340 }, { "epoch": 0.5166828700752263, "grad_norm": 0.16519694030284882, "learning_rate": 0.002, "loss": 2.5548, "step": 259350 }, { "epoch": 0.5167027922988652, "grad_norm": 0.19591973721981049, "learning_rate": 0.002, "loss": 2.5502, "step": 259360 }, { "epoch": 0.5167227145225042, "grad_norm": 0.16796451807022095, "learning_rate": 0.002, "loss": 2.552, "step": 259370 }, { "epoch": 0.5167426367461431, "grad_norm": 0.16242678463459015, "learning_rate": 0.002, "loss": 2.5631, "step": 259380 }, { "epoch": 0.516762558969782, "grad_norm": 0.17430131137371063, "learning_rate": 0.002, "loss": 2.575, "step": 259390 }, { "epoch": 0.5167824811934209, "grad_norm": 0.16519443690776825, "learning_rate": 0.002, "loss": 2.5614, "step": 259400 }, { "epoch": 0.5168024034170599, "grad_norm": 0.16206176578998566, "learning_rate": 0.002, "loss": 2.5585, "step": 259410 }, { "epoch": 0.5168223256406987, "grad_norm": 0.1562100052833557, "learning_rate": 0.002, "loss": 2.5604, "step": 259420 }, { "epoch": 0.5168422478643376, "grad_norm": 0.16365189850330353, "learning_rate": 0.002, "loss": 2.5474, "step": 259430 }, { "epoch": 0.5168621700879765, "grad_norm": 0.18328629434108734, "learning_rate": 0.002, "loss": 2.5526, "step": 259440 }, { "epoch": 0.5168820923116154, "grad_norm": 0.19828690588474274, "learning_rate": 0.002, "loss": 2.5604, "step": 259450 }, { "epoch": 0.5169020145352544, "grad_norm": 0.16395986080169678, "learning_rate": 0.002, "loss": 2.5509, "step": 259460 }, { "epoch": 0.5169219367588933, "grad_norm": 0.15873734652996063, "learning_rate": 0.002, "loss": 2.5549, "step": 259470 }, { "epoch": 0.5169418589825322, "grad_norm": 0.16876807808876038, "learning_rate": 0.002, "loss": 2.564, "step": 259480 }, { "epoch": 0.5169617812061711, "grad_norm": 0.1576530635356903, "learning_rate": 0.002, "loss": 2.5391, "step": 259490 }, { "epoch": 0.51698170342981, "grad_norm": 0.16115783154964447, "learning_rate": 0.002, "loss": 2.5675, "step": 259500 }, { "epoch": 0.517001625653449, "grad_norm": 0.1612151712179184, "learning_rate": 0.002, "loss": 2.5569, "step": 259510 }, { "epoch": 0.5170215478770879, "grad_norm": 0.19613401591777802, "learning_rate": 0.002, "loss": 2.5595, "step": 259520 }, { "epoch": 0.5170414701007268, "grad_norm": 0.1578100323677063, "learning_rate": 0.002, "loss": 2.5517, "step": 259530 }, { "epoch": 0.5170613923243657, "grad_norm": 0.22988499701023102, "learning_rate": 0.002, "loss": 2.5607, "step": 259540 }, { "epoch": 0.5170813145480045, "grad_norm": 0.1570974886417389, "learning_rate": 0.002, "loss": 2.5492, "step": 259550 }, { "epoch": 0.5171012367716435, "grad_norm": 0.14033128321170807, "learning_rate": 0.002, "loss": 2.5382, "step": 259560 }, { "epoch": 0.5171211589952824, "grad_norm": 0.16541685163974762, "learning_rate": 0.002, "loss": 2.5571, "step": 259570 }, { "epoch": 0.5171410812189213, "grad_norm": 0.17449820041656494, "learning_rate": 0.002, "loss": 2.5585, "step": 259580 }, { "epoch": 0.5171610034425602, "grad_norm": 0.16020788252353668, "learning_rate": 0.002, "loss": 2.5497, "step": 259590 }, { "epoch": 0.5171809256661991, "grad_norm": 0.17423763871192932, "learning_rate": 0.002, "loss": 2.5713, "step": 259600 }, { "epoch": 0.5172008478898381, "grad_norm": 0.1532042920589447, "learning_rate": 0.002, "loss": 2.5639, "step": 259610 }, { "epoch": 0.517220770113477, "grad_norm": 0.16905337572097778, "learning_rate": 0.002, "loss": 2.5617, "step": 259620 }, { "epoch": 0.5172406923371159, "grad_norm": 0.17414242029190063, "learning_rate": 0.002, "loss": 2.5461, "step": 259630 }, { "epoch": 0.5172606145607548, "grad_norm": 0.1736144721508026, "learning_rate": 0.002, "loss": 2.5531, "step": 259640 }, { "epoch": 0.5172805367843937, "grad_norm": 0.21307148039340973, "learning_rate": 0.002, "loss": 2.5546, "step": 259650 }, { "epoch": 0.5173004590080327, "grad_norm": 0.140547975897789, "learning_rate": 0.002, "loss": 2.549, "step": 259660 }, { "epoch": 0.5173203812316716, "grad_norm": 0.18322479724884033, "learning_rate": 0.002, "loss": 2.5644, "step": 259670 }, { "epoch": 0.5173403034553105, "grad_norm": 0.20135897397994995, "learning_rate": 0.002, "loss": 2.5531, "step": 259680 }, { "epoch": 0.5173602256789493, "grad_norm": 0.15447354316711426, "learning_rate": 0.002, "loss": 2.5585, "step": 259690 }, { "epoch": 0.5173801479025883, "grad_norm": 0.21471868455410004, "learning_rate": 0.002, "loss": 2.5608, "step": 259700 }, { "epoch": 0.5174000701262272, "grad_norm": 0.20636709034442902, "learning_rate": 0.002, "loss": 2.5718, "step": 259710 }, { "epoch": 0.5174199923498661, "grad_norm": 0.1742754578590393, "learning_rate": 0.002, "loss": 2.5618, "step": 259720 }, { "epoch": 0.517439914573505, "grad_norm": 0.17054805159568787, "learning_rate": 0.002, "loss": 2.5431, "step": 259730 }, { "epoch": 0.5174598367971439, "grad_norm": 0.1476590484380722, "learning_rate": 0.002, "loss": 2.5579, "step": 259740 }, { "epoch": 0.5174797590207829, "grad_norm": 0.18110691010951996, "learning_rate": 0.002, "loss": 2.5558, "step": 259750 }, { "epoch": 0.5174996812444218, "grad_norm": 0.13318100571632385, "learning_rate": 0.002, "loss": 2.551, "step": 259760 }, { "epoch": 0.5175196034680607, "grad_norm": 0.17926713824272156, "learning_rate": 0.002, "loss": 2.5498, "step": 259770 }, { "epoch": 0.5175395256916996, "grad_norm": 0.15451711416244507, "learning_rate": 0.002, "loss": 2.5639, "step": 259780 }, { "epoch": 0.5175594479153385, "grad_norm": 0.1517380028963089, "learning_rate": 0.002, "loss": 2.5579, "step": 259790 }, { "epoch": 0.5175793701389775, "grad_norm": 0.15652325749397278, "learning_rate": 0.002, "loss": 2.5454, "step": 259800 }, { "epoch": 0.5175992923626164, "grad_norm": 0.17111851274967194, "learning_rate": 0.002, "loss": 2.575, "step": 259810 }, { "epoch": 0.5176192145862553, "grad_norm": 0.15462513267993927, "learning_rate": 0.002, "loss": 2.5711, "step": 259820 }, { "epoch": 0.5176391368098942, "grad_norm": 0.14604885876178741, "learning_rate": 0.002, "loss": 2.5737, "step": 259830 }, { "epoch": 0.517659059033533, "grad_norm": 0.15546846389770508, "learning_rate": 0.002, "loss": 2.5568, "step": 259840 }, { "epoch": 0.517678981257172, "grad_norm": 0.15476062893867493, "learning_rate": 0.002, "loss": 2.5549, "step": 259850 }, { "epoch": 0.5176989034808109, "grad_norm": 0.1667802929878235, "learning_rate": 0.002, "loss": 2.5583, "step": 259860 }, { "epoch": 0.5177188257044498, "grad_norm": 0.1743198186159134, "learning_rate": 0.002, "loss": 2.5586, "step": 259870 }, { "epoch": 0.5177387479280887, "grad_norm": 0.18355624377727509, "learning_rate": 0.002, "loss": 2.5493, "step": 259880 }, { "epoch": 0.5177586701517276, "grad_norm": 0.14676722884178162, "learning_rate": 0.002, "loss": 2.5597, "step": 259890 }, { "epoch": 0.5177785923753666, "grad_norm": 0.17209883034229279, "learning_rate": 0.002, "loss": 2.5653, "step": 259900 }, { "epoch": 0.5177985145990055, "grad_norm": 0.1657586544752121, "learning_rate": 0.002, "loss": 2.5599, "step": 259910 }, { "epoch": 0.5178184368226444, "grad_norm": 0.2143327295780182, "learning_rate": 0.002, "loss": 2.5535, "step": 259920 }, { "epoch": 0.5178383590462833, "grad_norm": 0.1538359671831131, "learning_rate": 0.002, "loss": 2.5449, "step": 259930 }, { "epoch": 0.5178582812699222, "grad_norm": 0.16400638222694397, "learning_rate": 0.002, "loss": 2.5471, "step": 259940 }, { "epoch": 0.5178782034935612, "grad_norm": 0.17358149588108063, "learning_rate": 0.002, "loss": 2.5675, "step": 259950 }, { "epoch": 0.5178981257172001, "grad_norm": 0.1853923201560974, "learning_rate": 0.002, "loss": 2.5499, "step": 259960 }, { "epoch": 0.517918047940839, "grad_norm": 0.17998287081718445, "learning_rate": 0.002, "loss": 2.5578, "step": 259970 }, { "epoch": 0.5179379701644778, "grad_norm": 0.155923530459404, "learning_rate": 0.002, "loss": 2.5418, "step": 259980 }, { "epoch": 0.5179578923881168, "grad_norm": 0.16982628405094147, "learning_rate": 0.002, "loss": 2.5537, "step": 259990 }, { "epoch": 0.5179778146117557, "grad_norm": 0.17310796678066254, "learning_rate": 0.002, "loss": 2.5563, "step": 260000 }, { "epoch": 0.5179977368353946, "grad_norm": 0.17122076451778412, "learning_rate": 0.002, "loss": 2.5692, "step": 260010 }, { "epoch": 0.5180176590590335, "grad_norm": 0.14839626848697662, "learning_rate": 0.002, "loss": 2.553, "step": 260020 }, { "epoch": 0.5180375812826724, "grad_norm": 0.19302931427955627, "learning_rate": 0.002, "loss": 2.5675, "step": 260030 }, { "epoch": 0.5180575035063114, "grad_norm": 0.16439275443553925, "learning_rate": 0.002, "loss": 2.562, "step": 260040 }, { "epoch": 0.5180774257299503, "grad_norm": 0.19675731658935547, "learning_rate": 0.002, "loss": 2.5626, "step": 260050 }, { "epoch": 0.5180973479535892, "grad_norm": 0.15233971178531647, "learning_rate": 0.002, "loss": 2.554, "step": 260060 }, { "epoch": 0.5181172701772281, "grad_norm": 0.17492270469665527, "learning_rate": 0.002, "loss": 2.561, "step": 260070 }, { "epoch": 0.518137192400867, "grad_norm": 0.16898685693740845, "learning_rate": 0.002, "loss": 2.5631, "step": 260080 }, { "epoch": 0.518157114624506, "grad_norm": 0.16317090392112732, "learning_rate": 0.002, "loss": 2.5658, "step": 260090 }, { "epoch": 0.5181770368481449, "grad_norm": 0.16620036959648132, "learning_rate": 0.002, "loss": 2.5394, "step": 260100 }, { "epoch": 0.5181969590717838, "grad_norm": 0.1702839881181717, "learning_rate": 0.002, "loss": 2.5536, "step": 260110 }, { "epoch": 0.5182168812954226, "grad_norm": 0.1696777492761612, "learning_rate": 0.002, "loss": 2.5597, "step": 260120 }, { "epoch": 0.5182368035190615, "grad_norm": 0.17943473160266876, "learning_rate": 0.002, "loss": 2.5556, "step": 260130 }, { "epoch": 0.5182567257427005, "grad_norm": 0.19017186760902405, "learning_rate": 0.002, "loss": 2.5572, "step": 260140 }, { "epoch": 0.5182766479663394, "grad_norm": 0.18546608090400696, "learning_rate": 0.002, "loss": 2.5603, "step": 260150 }, { "epoch": 0.5182965701899783, "grad_norm": 0.16878542304039001, "learning_rate": 0.002, "loss": 2.5582, "step": 260160 }, { "epoch": 0.5183164924136172, "grad_norm": 0.15248143672943115, "learning_rate": 0.002, "loss": 2.5561, "step": 260170 }, { "epoch": 0.5183364146372561, "grad_norm": 0.17430971562862396, "learning_rate": 0.002, "loss": 2.5734, "step": 260180 }, { "epoch": 0.5183563368608951, "grad_norm": 0.17613255977630615, "learning_rate": 0.002, "loss": 2.5459, "step": 260190 }, { "epoch": 0.518376259084534, "grad_norm": 0.15530851483345032, "learning_rate": 0.002, "loss": 2.5618, "step": 260200 }, { "epoch": 0.5183961813081729, "grad_norm": 0.1582677960395813, "learning_rate": 0.002, "loss": 2.5439, "step": 260210 }, { "epoch": 0.5184161035318118, "grad_norm": 0.20084643363952637, "learning_rate": 0.002, "loss": 2.5759, "step": 260220 }, { "epoch": 0.5184360257554507, "grad_norm": 0.16678069531917572, "learning_rate": 0.002, "loss": 2.5508, "step": 260230 }, { "epoch": 0.5184559479790897, "grad_norm": 0.1679786741733551, "learning_rate": 0.002, "loss": 2.5632, "step": 260240 }, { "epoch": 0.5184758702027286, "grad_norm": 0.21637806296348572, "learning_rate": 0.002, "loss": 2.5552, "step": 260250 }, { "epoch": 0.5184957924263675, "grad_norm": 0.13944078981876373, "learning_rate": 0.002, "loss": 2.5639, "step": 260260 }, { "epoch": 0.5185157146500063, "grad_norm": 0.14781887829303741, "learning_rate": 0.002, "loss": 2.5414, "step": 260270 }, { "epoch": 0.5185356368736452, "grad_norm": 0.1627003252506256, "learning_rate": 0.002, "loss": 2.5442, "step": 260280 }, { "epoch": 0.5185555590972842, "grad_norm": 0.19906924664974213, "learning_rate": 0.002, "loss": 2.5528, "step": 260290 }, { "epoch": 0.5185754813209231, "grad_norm": 0.15502703189849854, "learning_rate": 0.002, "loss": 2.5553, "step": 260300 }, { "epoch": 0.518595403544562, "grad_norm": 0.1867927461862564, "learning_rate": 0.002, "loss": 2.5619, "step": 260310 }, { "epoch": 0.5186153257682009, "grad_norm": 0.1558171659708023, "learning_rate": 0.002, "loss": 2.5534, "step": 260320 }, { "epoch": 0.5186352479918399, "grad_norm": 0.1524137258529663, "learning_rate": 0.002, "loss": 2.5657, "step": 260330 }, { "epoch": 0.5186551702154788, "grad_norm": 0.1694382280111313, "learning_rate": 0.002, "loss": 2.5595, "step": 260340 }, { "epoch": 0.5186750924391177, "grad_norm": 0.16458255052566528, "learning_rate": 0.002, "loss": 2.5575, "step": 260350 }, { "epoch": 0.5186950146627566, "grad_norm": 0.204287588596344, "learning_rate": 0.002, "loss": 2.5712, "step": 260360 }, { "epoch": 0.5187149368863955, "grad_norm": 0.15070289373397827, "learning_rate": 0.002, "loss": 2.5561, "step": 260370 }, { "epoch": 0.5187348591100345, "grad_norm": 0.1544274091720581, "learning_rate": 0.002, "loss": 2.5573, "step": 260380 }, { "epoch": 0.5187547813336734, "grad_norm": 0.15738792717456818, "learning_rate": 0.002, "loss": 2.5605, "step": 260390 }, { "epoch": 0.5187747035573123, "grad_norm": 0.1784205585718155, "learning_rate": 0.002, "loss": 2.5528, "step": 260400 }, { "epoch": 0.5187946257809511, "grad_norm": 0.14626194536685944, "learning_rate": 0.002, "loss": 2.5506, "step": 260410 }, { "epoch": 0.51881454800459, "grad_norm": 0.18159884214401245, "learning_rate": 0.002, "loss": 2.5538, "step": 260420 }, { "epoch": 0.518834470228229, "grad_norm": 0.16612716019153595, "learning_rate": 0.002, "loss": 2.5623, "step": 260430 }, { "epoch": 0.5188543924518679, "grad_norm": 0.15761296451091766, "learning_rate": 0.002, "loss": 2.5689, "step": 260440 }, { "epoch": 0.5188743146755068, "grad_norm": 0.1605590134859085, "learning_rate": 0.002, "loss": 2.562, "step": 260450 }, { "epoch": 0.5188942368991457, "grad_norm": 0.17046043276786804, "learning_rate": 0.002, "loss": 2.5486, "step": 260460 }, { "epoch": 0.5189141591227846, "grad_norm": 0.17951999604701996, "learning_rate": 0.002, "loss": 2.5645, "step": 260470 }, { "epoch": 0.5189340813464236, "grad_norm": 0.17289410531520844, "learning_rate": 0.002, "loss": 2.5673, "step": 260480 }, { "epoch": 0.5189540035700625, "grad_norm": 0.1484612077474594, "learning_rate": 0.002, "loss": 2.5453, "step": 260490 }, { "epoch": 0.5189739257937014, "grad_norm": 0.23587681353092194, "learning_rate": 0.002, "loss": 2.5554, "step": 260500 }, { "epoch": 0.5189938480173403, "grad_norm": 0.1856308877468109, "learning_rate": 0.002, "loss": 2.565, "step": 260510 }, { "epoch": 0.5190137702409792, "grad_norm": 0.13479942083358765, "learning_rate": 0.002, "loss": 2.5468, "step": 260520 }, { "epoch": 0.5190336924646182, "grad_norm": 0.17797276377677917, "learning_rate": 0.002, "loss": 2.5623, "step": 260530 }, { "epoch": 0.5190536146882571, "grad_norm": 0.18652121722698212, "learning_rate": 0.002, "loss": 2.5596, "step": 260540 }, { "epoch": 0.519073536911896, "grad_norm": 0.19381272792816162, "learning_rate": 0.002, "loss": 2.5534, "step": 260550 }, { "epoch": 0.5190934591355348, "grad_norm": 0.16959716379642487, "learning_rate": 0.002, "loss": 2.5534, "step": 260560 }, { "epoch": 0.5191133813591737, "grad_norm": 0.16447287797927856, "learning_rate": 0.002, "loss": 2.5551, "step": 260570 }, { "epoch": 0.5191333035828127, "grad_norm": 0.13102054595947266, "learning_rate": 0.002, "loss": 2.5576, "step": 260580 }, { "epoch": 0.5191532258064516, "grad_norm": 0.18978500366210938, "learning_rate": 0.002, "loss": 2.5732, "step": 260590 }, { "epoch": 0.5191731480300905, "grad_norm": 0.15089839696884155, "learning_rate": 0.002, "loss": 2.556, "step": 260600 }, { "epoch": 0.5191930702537294, "grad_norm": 0.14516693353652954, "learning_rate": 0.002, "loss": 2.5533, "step": 260610 }, { "epoch": 0.5192129924773684, "grad_norm": 0.2264014482498169, "learning_rate": 0.002, "loss": 2.5697, "step": 260620 }, { "epoch": 0.5192329147010073, "grad_norm": 0.17191214859485626, "learning_rate": 0.002, "loss": 2.5688, "step": 260630 }, { "epoch": 0.5192528369246462, "grad_norm": 0.15283632278442383, "learning_rate": 0.002, "loss": 2.5512, "step": 260640 }, { "epoch": 0.5192727591482851, "grad_norm": 0.1409769207239151, "learning_rate": 0.002, "loss": 2.5662, "step": 260650 }, { "epoch": 0.519292681371924, "grad_norm": 0.17120203375816345, "learning_rate": 0.002, "loss": 2.5483, "step": 260660 }, { "epoch": 0.519312603595563, "grad_norm": 0.15921901166439056, "learning_rate": 0.002, "loss": 2.5605, "step": 260670 }, { "epoch": 0.5193325258192019, "grad_norm": 0.18984830379486084, "learning_rate": 0.002, "loss": 2.5693, "step": 260680 }, { "epoch": 0.5193524480428408, "grad_norm": 0.14936335384845734, "learning_rate": 0.002, "loss": 2.5412, "step": 260690 }, { "epoch": 0.5193723702664796, "grad_norm": 0.18148066103458405, "learning_rate": 0.002, "loss": 2.5525, "step": 260700 }, { "epoch": 0.5193922924901185, "grad_norm": 0.18889181315898895, "learning_rate": 0.002, "loss": 2.5662, "step": 260710 }, { "epoch": 0.5194122147137575, "grad_norm": 0.16540409624576569, "learning_rate": 0.002, "loss": 2.5486, "step": 260720 }, { "epoch": 0.5194321369373964, "grad_norm": 0.15372490882873535, "learning_rate": 0.002, "loss": 2.5592, "step": 260730 }, { "epoch": 0.5194520591610353, "grad_norm": 0.16357524693012238, "learning_rate": 0.002, "loss": 2.5482, "step": 260740 }, { "epoch": 0.5194719813846742, "grad_norm": 0.17341211438179016, "learning_rate": 0.002, "loss": 2.5456, "step": 260750 }, { "epoch": 0.5194919036083131, "grad_norm": 0.18843887746334076, "learning_rate": 0.002, "loss": 2.5639, "step": 260760 }, { "epoch": 0.5195118258319521, "grad_norm": 0.15368546545505524, "learning_rate": 0.002, "loss": 2.5583, "step": 260770 }, { "epoch": 0.519531748055591, "grad_norm": 0.14594390988349915, "learning_rate": 0.002, "loss": 2.5431, "step": 260780 }, { "epoch": 0.5195516702792299, "grad_norm": 0.1683751791715622, "learning_rate": 0.002, "loss": 2.5562, "step": 260790 }, { "epoch": 0.5195715925028688, "grad_norm": 0.15192407369613647, "learning_rate": 0.002, "loss": 2.5483, "step": 260800 }, { "epoch": 0.5195915147265077, "grad_norm": 0.14386087656021118, "learning_rate": 0.002, "loss": 2.567, "step": 260810 }, { "epoch": 0.5196114369501467, "grad_norm": 0.15634138882160187, "learning_rate": 0.002, "loss": 2.5551, "step": 260820 }, { "epoch": 0.5196313591737856, "grad_norm": 0.18045540153980255, "learning_rate": 0.002, "loss": 2.5547, "step": 260830 }, { "epoch": 0.5196512813974244, "grad_norm": 0.1836053729057312, "learning_rate": 0.002, "loss": 2.5619, "step": 260840 }, { "epoch": 0.5196712036210633, "grad_norm": 0.15407532453536987, "learning_rate": 0.002, "loss": 2.5482, "step": 260850 }, { "epoch": 0.5196911258447022, "grad_norm": 0.14793196320533752, "learning_rate": 0.002, "loss": 2.5544, "step": 260860 }, { "epoch": 0.5197110480683412, "grad_norm": 0.16752125322818756, "learning_rate": 0.002, "loss": 2.5518, "step": 260870 }, { "epoch": 0.5197309702919801, "grad_norm": 0.15704140067100525, "learning_rate": 0.002, "loss": 2.5601, "step": 260880 }, { "epoch": 0.519750892515619, "grad_norm": 0.17031271755695343, "learning_rate": 0.002, "loss": 2.5343, "step": 260890 }, { "epoch": 0.5197708147392579, "grad_norm": 0.20004601776599884, "learning_rate": 0.002, "loss": 2.5443, "step": 260900 }, { "epoch": 0.5197907369628969, "grad_norm": 0.1612555831670761, "learning_rate": 0.002, "loss": 2.5593, "step": 260910 }, { "epoch": 0.5198106591865358, "grad_norm": 0.1795511692762375, "learning_rate": 0.002, "loss": 2.5638, "step": 260920 }, { "epoch": 0.5198305814101747, "grad_norm": 0.17381180822849274, "learning_rate": 0.002, "loss": 2.5506, "step": 260930 }, { "epoch": 0.5198505036338136, "grad_norm": 0.1900576949119568, "learning_rate": 0.002, "loss": 2.5554, "step": 260940 }, { "epoch": 0.5198704258574525, "grad_norm": 0.18440264463424683, "learning_rate": 0.002, "loss": 2.5578, "step": 260950 }, { "epoch": 0.5198903480810915, "grad_norm": 0.19662576913833618, "learning_rate": 0.002, "loss": 2.5581, "step": 260960 }, { "epoch": 0.5199102703047304, "grad_norm": 0.16130824387073517, "learning_rate": 0.002, "loss": 2.5742, "step": 260970 }, { "epoch": 0.5199301925283693, "grad_norm": 0.14540144801139832, "learning_rate": 0.002, "loss": 2.5711, "step": 260980 }, { "epoch": 0.5199501147520081, "grad_norm": 0.15873347222805023, "learning_rate": 0.002, "loss": 2.5514, "step": 260990 }, { "epoch": 0.519970036975647, "grad_norm": 0.15983162820339203, "learning_rate": 0.002, "loss": 2.5892, "step": 261000 }, { "epoch": 0.519989959199286, "grad_norm": 0.13843677937984467, "learning_rate": 0.002, "loss": 2.5652, "step": 261010 }, { "epoch": 0.5200098814229249, "grad_norm": 0.19753830134868622, "learning_rate": 0.002, "loss": 2.5666, "step": 261020 }, { "epoch": 0.5200298036465638, "grad_norm": 0.16867944598197937, "learning_rate": 0.002, "loss": 2.5649, "step": 261030 }, { "epoch": 0.5200497258702027, "grad_norm": 0.16236500442028046, "learning_rate": 0.002, "loss": 2.557, "step": 261040 }, { "epoch": 0.5200696480938416, "grad_norm": 0.1467331498861313, "learning_rate": 0.002, "loss": 2.5587, "step": 261050 }, { "epoch": 0.5200895703174806, "grad_norm": 0.17724835872650146, "learning_rate": 0.002, "loss": 2.5507, "step": 261060 }, { "epoch": 0.5201094925411195, "grad_norm": 0.1770269274711609, "learning_rate": 0.002, "loss": 2.5487, "step": 261070 }, { "epoch": 0.5201294147647584, "grad_norm": 0.14015141129493713, "learning_rate": 0.002, "loss": 2.5543, "step": 261080 }, { "epoch": 0.5201493369883973, "grad_norm": 0.15400560200214386, "learning_rate": 0.002, "loss": 2.5435, "step": 261090 }, { "epoch": 0.5201692592120362, "grad_norm": 0.1927594542503357, "learning_rate": 0.002, "loss": 2.5675, "step": 261100 }, { "epoch": 0.5201891814356752, "grad_norm": 0.16179722547531128, "learning_rate": 0.002, "loss": 2.56, "step": 261110 }, { "epoch": 0.520209103659314, "grad_norm": 0.16309520602226257, "learning_rate": 0.002, "loss": 2.5621, "step": 261120 }, { "epoch": 0.520229025882953, "grad_norm": 0.190096914768219, "learning_rate": 0.002, "loss": 2.5522, "step": 261130 }, { "epoch": 0.5202489481065918, "grad_norm": 0.15767617523670197, "learning_rate": 0.002, "loss": 2.5376, "step": 261140 }, { "epoch": 0.5202688703302307, "grad_norm": 0.14825193583965302, "learning_rate": 0.002, "loss": 2.5665, "step": 261150 }, { "epoch": 0.5202887925538697, "grad_norm": 0.16888612508773804, "learning_rate": 0.002, "loss": 2.5614, "step": 261160 }, { "epoch": 0.5203087147775086, "grad_norm": 0.17308247089385986, "learning_rate": 0.002, "loss": 2.5536, "step": 261170 }, { "epoch": 0.5203286370011475, "grad_norm": 0.16410964727401733, "learning_rate": 0.002, "loss": 2.5629, "step": 261180 }, { "epoch": 0.5203485592247864, "grad_norm": 0.19714176654815674, "learning_rate": 0.002, "loss": 2.5633, "step": 261190 }, { "epoch": 0.5203684814484254, "grad_norm": 0.14879336953163147, "learning_rate": 0.002, "loss": 2.5718, "step": 261200 }, { "epoch": 0.5203884036720643, "grad_norm": 0.15803539752960205, "learning_rate": 0.002, "loss": 2.5562, "step": 261210 }, { "epoch": 0.5204083258957032, "grad_norm": 0.16737306118011475, "learning_rate": 0.002, "loss": 2.5495, "step": 261220 }, { "epoch": 0.5204282481193421, "grad_norm": 0.19345517456531525, "learning_rate": 0.002, "loss": 2.548, "step": 261230 }, { "epoch": 0.520448170342981, "grad_norm": 0.1604425609111786, "learning_rate": 0.002, "loss": 2.5678, "step": 261240 }, { "epoch": 0.52046809256662, "grad_norm": 0.15324680507183075, "learning_rate": 0.002, "loss": 2.5614, "step": 261250 }, { "epoch": 0.5204880147902589, "grad_norm": 0.19417434930801392, "learning_rate": 0.002, "loss": 2.5647, "step": 261260 }, { "epoch": 0.5205079370138977, "grad_norm": 0.1763337254524231, "learning_rate": 0.002, "loss": 2.5642, "step": 261270 }, { "epoch": 0.5205278592375366, "grad_norm": 0.15972979366779327, "learning_rate": 0.002, "loss": 2.5567, "step": 261280 }, { "epoch": 0.5205477814611755, "grad_norm": 0.15969976782798767, "learning_rate": 0.002, "loss": 2.5631, "step": 261290 }, { "epoch": 0.5205677036848145, "grad_norm": 0.2112894058227539, "learning_rate": 0.002, "loss": 2.5433, "step": 261300 }, { "epoch": 0.5205876259084534, "grad_norm": 0.17497791349887848, "learning_rate": 0.002, "loss": 2.5536, "step": 261310 }, { "epoch": 0.5206075481320923, "grad_norm": 0.16166996955871582, "learning_rate": 0.002, "loss": 2.5424, "step": 261320 }, { "epoch": 0.5206274703557312, "grad_norm": 0.13175182044506073, "learning_rate": 0.002, "loss": 2.5427, "step": 261330 }, { "epoch": 0.5206473925793701, "grad_norm": 0.12819001078605652, "learning_rate": 0.002, "loss": 2.5719, "step": 261340 }, { "epoch": 0.5206673148030091, "grad_norm": 0.24505913257598877, "learning_rate": 0.002, "loss": 2.5539, "step": 261350 }, { "epoch": 0.520687237026648, "grad_norm": 0.14587272703647614, "learning_rate": 0.002, "loss": 2.5504, "step": 261360 }, { "epoch": 0.5207071592502869, "grad_norm": 0.24085493385791779, "learning_rate": 0.002, "loss": 2.5537, "step": 261370 }, { "epoch": 0.5207270814739258, "grad_norm": 0.1785919964313507, "learning_rate": 0.002, "loss": 2.5509, "step": 261380 }, { "epoch": 0.5207470036975647, "grad_norm": 0.16465091705322266, "learning_rate": 0.002, "loss": 2.5603, "step": 261390 }, { "epoch": 0.5207669259212037, "grad_norm": 0.16112902760505676, "learning_rate": 0.002, "loss": 2.5517, "step": 261400 }, { "epoch": 0.5207868481448426, "grad_norm": 0.1592281609773636, "learning_rate": 0.002, "loss": 2.5359, "step": 261410 }, { "epoch": 0.5208067703684814, "grad_norm": 0.15913915634155273, "learning_rate": 0.002, "loss": 2.548, "step": 261420 }, { "epoch": 0.5208266925921203, "grad_norm": 0.16994638741016388, "learning_rate": 0.002, "loss": 2.5523, "step": 261430 }, { "epoch": 0.5208466148157592, "grad_norm": 0.1634419709444046, "learning_rate": 0.002, "loss": 2.551, "step": 261440 }, { "epoch": 0.5208665370393982, "grad_norm": 0.17594598233699799, "learning_rate": 0.002, "loss": 2.5541, "step": 261450 }, { "epoch": 0.5208864592630371, "grad_norm": 0.14889182150363922, "learning_rate": 0.002, "loss": 2.5686, "step": 261460 }, { "epoch": 0.520906381486676, "grad_norm": 0.1646411418914795, "learning_rate": 0.002, "loss": 2.5493, "step": 261470 }, { "epoch": 0.5209263037103149, "grad_norm": 0.15611772239208221, "learning_rate": 0.002, "loss": 2.5413, "step": 261480 }, { "epoch": 0.5209462259339539, "grad_norm": 0.139004185795784, "learning_rate": 0.002, "loss": 2.5695, "step": 261490 }, { "epoch": 0.5209661481575928, "grad_norm": 0.16953438520431519, "learning_rate": 0.002, "loss": 2.5446, "step": 261500 }, { "epoch": 0.5209860703812317, "grad_norm": 0.1707332879304886, "learning_rate": 0.002, "loss": 2.5567, "step": 261510 }, { "epoch": 0.5210059926048706, "grad_norm": 0.14432300627231598, "learning_rate": 0.002, "loss": 2.5484, "step": 261520 }, { "epoch": 0.5210259148285095, "grad_norm": 0.18998651206493378, "learning_rate": 0.002, "loss": 2.5311, "step": 261530 }, { "epoch": 0.5210458370521485, "grad_norm": 0.17127935588359833, "learning_rate": 0.002, "loss": 2.5638, "step": 261540 }, { "epoch": 0.5210657592757874, "grad_norm": 0.20184838771820068, "learning_rate": 0.002, "loss": 2.5532, "step": 261550 }, { "epoch": 0.5210856814994262, "grad_norm": 0.13517339527606964, "learning_rate": 0.002, "loss": 2.5506, "step": 261560 }, { "epoch": 0.5211056037230651, "grad_norm": 0.17123256623744965, "learning_rate": 0.002, "loss": 2.554, "step": 261570 }, { "epoch": 0.521125525946704, "grad_norm": 0.16582633554935455, "learning_rate": 0.002, "loss": 2.5634, "step": 261580 }, { "epoch": 0.521145448170343, "grad_norm": 0.15975214540958405, "learning_rate": 0.002, "loss": 2.5512, "step": 261590 }, { "epoch": 0.5211653703939819, "grad_norm": 0.16795390844345093, "learning_rate": 0.002, "loss": 2.5648, "step": 261600 }, { "epoch": 0.5211852926176208, "grad_norm": 0.16370755434036255, "learning_rate": 0.002, "loss": 2.5675, "step": 261610 }, { "epoch": 0.5212052148412597, "grad_norm": 0.16951583325862885, "learning_rate": 0.002, "loss": 2.581, "step": 261620 }, { "epoch": 0.5212251370648986, "grad_norm": 0.15421761572360992, "learning_rate": 0.002, "loss": 2.5505, "step": 261630 }, { "epoch": 0.5212450592885376, "grad_norm": 0.1783498376607895, "learning_rate": 0.002, "loss": 2.5462, "step": 261640 }, { "epoch": 0.5212649815121765, "grad_norm": 0.15503056347370148, "learning_rate": 0.002, "loss": 2.5571, "step": 261650 }, { "epoch": 0.5212849037358154, "grad_norm": 0.16489070653915405, "learning_rate": 0.002, "loss": 2.5584, "step": 261660 }, { "epoch": 0.5213048259594543, "grad_norm": 0.16375070810317993, "learning_rate": 0.002, "loss": 2.5564, "step": 261670 }, { "epoch": 0.5213247481830932, "grad_norm": 0.14813178777694702, "learning_rate": 0.002, "loss": 2.5543, "step": 261680 }, { "epoch": 0.5213446704067322, "grad_norm": 0.15700456500053406, "learning_rate": 0.002, "loss": 2.5462, "step": 261690 }, { "epoch": 0.521364592630371, "grad_norm": 0.18752850592136383, "learning_rate": 0.002, "loss": 2.5545, "step": 261700 }, { "epoch": 0.5213845148540099, "grad_norm": 0.15785136818885803, "learning_rate": 0.002, "loss": 2.5678, "step": 261710 }, { "epoch": 0.5214044370776488, "grad_norm": 0.19266562163829803, "learning_rate": 0.002, "loss": 2.5688, "step": 261720 }, { "epoch": 0.5214243593012877, "grad_norm": 0.19369065761566162, "learning_rate": 0.002, "loss": 2.5645, "step": 261730 }, { "epoch": 0.5214442815249267, "grad_norm": 0.1609991192817688, "learning_rate": 0.002, "loss": 2.5597, "step": 261740 }, { "epoch": 0.5214642037485656, "grad_norm": 0.17371685802936554, "learning_rate": 0.002, "loss": 2.5521, "step": 261750 }, { "epoch": 0.5214841259722045, "grad_norm": 0.22346913814544678, "learning_rate": 0.002, "loss": 2.5687, "step": 261760 }, { "epoch": 0.5215040481958434, "grad_norm": 0.1550426036119461, "learning_rate": 0.002, "loss": 2.543, "step": 261770 }, { "epoch": 0.5215239704194823, "grad_norm": 0.17885035276412964, "learning_rate": 0.002, "loss": 2.5625, "step": 261780 }, { "epoch": 0.5215438926431213, "grad_norm": 0.15482451021671295, "learning_rate": 0.002, "loss": 2.5512, "step": 261790 }, { "epoch": 0.5215638148667602, "grad_norm": 0.1538369059562683, "learning_rate": 0.002, "loss": 2.556, "step": 261800 }, { "epoch": 0.5215837370903991, "grad_norm": 0.1507902592420578, "learning_rate": 0.002, "loss": 2.563, "step": 261810 }, { "epoch": 0.521603659314038, "grad_norm": 0.16351008415222168, "learning_rate": 0.002, "loss": 2.5625, "step": 261820 }, { "epoch": 0.521623581537677, "grad_norm": 0.200688436627388, "learning_rate": 0.002, "loss": 2.5742, "step": 261830 }, { "epoch": 0.5216435037613159, "grad_norm": 0.165622279047966, "learning_rate": 0.002, "loss": 2.5711, "step": 261840 }, { "epoch": 0.5216634259849547, "grad_norm": 0.1699550747871399, "learning_rate": 0.002, "loss": 2.5727, "step": 261850 }, { "epoch": 0.5216833482085936, "grad_norm": 0.18194778263568878, "learning_rate": 0.002, "loss": 2.5601, "step": 261860 }, { "epoch": 0.5217032704322325, "grad_norm": 0.18578897416591644, "learning_rate": 0.002, "loss": 2.5663, "step": 261870 }, { "epoch": 0.5217231926558715, "grad_norm": 0.14419306814670563, "learning_rate": 0.002, "loss": 2.5511, "step": 261880 }, { "epoch": 0.5217431148795104, "grad_norm": 0.15822547674179077, "learning_rate": 0.002, "loss": 2.5534, "step": 261890 }, { "epoch": 0.5217630371031493, "grad_norm": 0.1577543169260025, "learning_rate": 0.002, "loss": 2.543, "step": 261900 }, { "epoch": 0.5217829593267882, "grad_norm": 0.1615869551897049, "learning_rate": 0.002, "loss": 2.5677, "step": 261910 }, { "epoch": 0.5218028815504271, "grad_norm": 0.16103403270244598, "learning_rate": 0.002, "loss": 2.5378, "step": 261920 }, { "epoch": 0.5218228037740661, "grad_norm": 0.148478701710701, "learning_rate": 0.002, "loss": 2.548, "step": 261930 }, { "epoch": 0.521842725997705, "grad_norm": 0.1610371321439743, "learning_rate": 0.002, "loss": 2.5654, "step": 261940 }, { "epoch": 0.5218626482213439, "grad_norm": 0.16879278421401978, "learning_rate": 0.002, "loss": 2.5719, "step": 261950 }, { "epoch": 0.5218825704449828, "grad_norm": 0.15559282898902893, "learning_rate": 0.002, "loss": 2.5561, "step": 261960 }, { "epoch": 0.5219024926686217, "grad_norm": 0.15180429816246033, "learning_rate": 0.002, "loss": 2.568, "step": 261970 }, { "epoch": 0.5219224148922607, "grad_norm": 0.17836447060108185, "learning_rate": 0.002, "loss": 2.5567, "step": 261980 }, { "epoch": 0.5219423371158995, "grad_norm": 0.20049238204956055, "learning_rate": 0.002, "loss": 2.5641, "step": 261990 }, { "epoch": 0.5219622593395384, "grad_norm": 0.1569627821445465, "learning_rate": 0.002, "loss": 2.5542, "step": 262000 }, { "epoch": 0.5219821815631773, "grad_norm": 0.20076313614845276, "learning_rate": 0.002, "loss": 2.5484, "step": 262010 }, { "epoch": 0.5220021037868162, "grad_norm": 0.18541181087493896, "learning_rate": 0.002, "loss": 2.5603, "step": 262020 }, { "epoch": 0.5220220260104552, "grad_norm": 0.13017863035202026, "learning_rate": 0.002, "loss": 2.5623, "step": 262030 }, { "epoch": 0.5220419482340941, "grad_norm": 0.1516377478837967, "learning_rate": 0.002, "loss": 2.541, "step": 262040 }, { "epoch": 0.522061870457733, "grad_norm": 0.2115243822336197, "learning_rate": 0.002, "loss": 2.5508, "step": 262050 }, { "epoch": 0.5220817926813719, "grad_norm": 0.15172061324119568, "learning_rate": 0.002, "loss": 2.5531, "step": 262060 }, { "epoch": 0.5221017149050108, "grad_norm": 0.1598249077796936, "learning_rate": 0.002, "loss": 2.5743, "step": 262070 }, { "epoch": 0.5221216371286498, "grad_norm": 0.20396946370601654, "learning_rate": 0.002, "loss": 2.564, "step": 262080 }, { "epoch": 0.5221415593522887, "grad_norm": 0.15255676209926605, "learning_rate": 0.002, "loss": 2.5462, "step": 262090 }, { "epoch": 0.5221614815759276, "grad_norm": 0.15411238372325897, "learning_rate": 0.002, "loss": 2.5655, "step": 262100 }, { "epoch": 0.5221814037995665, "grad_norm": 0.17592129111289978, "learning_rate": 0.002, "loss": 2.5572, "step": 262110 }, { "epoch": 0.5222013260232055, "grad_norm": 0.15768249332904816, "learning_rate": 0.002, "loss": 2.5622, "step": 262120 }, { "epoch": 0.5222212482468443, "grad_norm": 0.1650262176990509, "learning_rate": 0.002, "loss": 2.5604, "step": 262130 }, { "epoch": 0.5222411704704832, "grad_norm": 0.18681754171848297, "learning_rate": 0.002, "loss": 2.577, "step": 262140 }, { "epoch": 0.5222610926941221, "grad_norm": 0.1726464331150055, "learning_rate": 0.002, "loss": 2.5573, "step": 262150 }, { "epoch": 0.522281014917761, "grad_norm": 0.18088571727275848, "learning_rate": 0.002, "loss": 2.5654, "step": 262160 }, { "epoch": 0.5223009371414, "grad_norm": 0.15163300931453705, "learning_rate": 0.002, "loss": 2.563, "step": 262170 }, { "epoch": 0.5223208593650389, "grad_norm": 0.1612246036529541, "learning_rate": 0.002, "loss": 2.5475, "step": 262180 }, { "epoch": 0.5223407815886778, "grad_norm": 0.18201687932014465, "learning_rate": 0.002, "loss": 2.5554, "step": 262190 }, { "epoch": 0.5223607038123167, "grad_norm": 0.18666446208953857, "learning_rate": 0.002, "loss": 2.5556, "step": 262200 }, { "epoch": 0.5223806260359556, "grad_norm": 0.15671899914741516, "learning_rate": 0.002, "loss": 2.5607, "step": 262210 }, { "epoch": 0.5224005482595946, "grad_norm": 0.18178248405456543, "learning_rate": 0.002, "loss": 2.5492, "step": 262220 }, { "epoch": 0.5224204704832335, "grad_norm": 0.17509374022483826, "learning_rate": 0.002, "loss": 2.5616, "step": 262230 }, { "epoch": 0.5224403927068724, "grad_norm": 0.14300116896629333, "learning_rate": 0.002, "loss": 2.5583, "step": 262240 }, { "epoch": 0.5224603149305113, "grad_norm": 0.1923946887254715, "learning_rate": 0.002, "loss": 2.5573, "step": 262250 }, { "epoch": 0.5224802371541502, "grad_norm": 0.17778053879737854, "learning_rate": 0.002, "loss": 2.5572, "step": 262260 }, { "epoch": 0.5225001593777892, "grad_norm": 0.16996672749519348, "learning_rate": 0.002, "loss": 2.5608, "step": 262270 }, { "epoch": 0.522520081601428, "grad_norm": 0.1780432164669037, "learning_rate": 0.002, "loss": 2.5541, "step": 262280 }, { "epoch": 0.5225400038250669, "grad_norm": 0.14908193051815033, "learning_rate": 0.002, "loss": 2.564, "step": 262290 }, { "epoch": 0.5225599260487058, "grad_norm": 0.25624117255210876, "learning_rate": 0.002, "loss": 2.5512, "step": 262300 }, { "epoch": 0.5225798482723447, "grad_norm": 0.17200727760791779, "learning_rate": 0.002, "loss": 2.5652, "step": 262310 }, { "epoch": 0.5225997704959837, "grad_norm": 0.15831312537193298, "learning_rate": 0.002, "loss": 2.5503, "step": 262320 }, { "epoch": 0.5226196927196226, "grad_norm": 0.15453165769577026, "learning_rate": 0.002, "loss": 2.5617, "step": 262330 }, { "epoch": 0.5226396149432615, "grad_norm": 0.14746330678462982, "learning_rate": 0.002, "loss": 2.564, "step": 262340 }, { "epoch": 0.5226595371669004, "grad_norm": 0.1809011548757553, "learning_rate": 0.002, "loss": 2.5491, "step": 262350 }, { "epoch": 0.5226794593905393, "grad_norm": 0.15852606296539307, "learning_rate": 0.002, "loss": 2.568, "step": 262360 }, { "epoch": 0.5226993816141783, "grad_norm": 0.19130656123161316, "learning_rate": 0.002, "loss": 2.5574, "step": 262370 }, { "epoch": 0.5227193038378172, "grad_norm": 0.16013403236865997, "learning_rate": 0.002, "loss": 2.5413, "step": 262380 }, { "epoch": 0.5227392260614561, "grad_norm": 0.15936660766601562, "learning_rate": 0.002, "loss": 2.5551, "step": 262390 }, { "epoch": 0.522759148285095, "grad_norm": 0.1704501360654831, "learning_rate": 0.002, "loss": 2.5652, "step": 262400 }, { "epoch": 0.522779070508734, "grad_norm": 0.15596504509449005, "learning_rate": 0.002, "loss": 2.5527, "step": 262410 }, { "epoch": 0.5227989927323728, "grad_norm": 0.14662083983421326, "learning_rate": 0.002, "loss": 2.5653, "step": 262420 }, { "epoch": 0.5228189149560117, "grad_norm": 0.16598866879940033, "learning_rate": 0.002, "loss": 2.5545, "step": 262430 }, { "epoch": 0.5228388371796506, "grad_norm": 0.16598935425281525, "learning_rate": 0.002, "loss": 2.5712, "step": 262440 }, { "epoch": 0.5228587594032895, "grad_norm": 0.15804918110370636, "learning_rate": 0.002, "loss": 2.5571, "step": 262450 }, { "epoch": 0.5228786816269285, "grad_norm": 0.2056906372308731, "learning_rate": 0.002, "loss": 2.5504, "step": 262460 }, { "epoch": 0.5228986038505674, "grad_norm": 0.1519840657711029, "learning_rate": 0.002, "loss": 2.5601, "step": 262470 }, { "epoch": 0.5229185260742063, "grad_norm": 0.17815253138542175, "learning_rate": 0.002, "loss": 2.5557, "step": 262480 }, { "epoch": 0.5229384482978452, "grad_norm": 0.1424776017665863, "learning_rate": 0.002, "loss": 2.5447, "step": 262490 }, { "epoch": 0.5229583705214841, "grad_norm": 0.16305504739284515, "learning_rate": 0.002, "loss": 2.5554, "step": 262500 }, { "epoch": 0.5229782927451231, "grad_norm": 0.18492381274700165, "learning_rate": 0.002, "loss": 2.5683, "step": 262510 }, { "epoch": 0.522998214968762, "grad_norm": 0.1537197381258011, "learning_rate": 0.002, "loss": 2.5514, "step": 262520 }, { "epoch": 0.5230181371924009, "grad_norm": 0.15345124900341034, "learning_rate": 0.002, "loss": 2.5609, "step": 262530 }, { "epoch": 0.5230380594160398, "grad_norm": 0.14251206815242767, "learning_rate": 0.002, "loss": 2.5516, "step": 262540 }, { "epoch": 0.5230579816396786, "grad_norm": 0.1609591394662857, "learning_rate": 0.002, "loss": 2.5743, "step": 262550 }, { "epoch": 0.5230779038633177, "grad_norm": 0.1915741264820099, "learning_rate": 0.002, "loss": 2.5853, "step": 262560 }, { "epoch": 0.5230978260869565, "grad_norm": 0.15095560252666473, "learning_rate": 0.002, "loss": 2.5616, "step": 262570 }, { "epoch": 0.5231177483105954, "grad_norm": 0.16668619215488434, "learning_rate": 0.002, "loss": 2.5584, "step": 262580 }, { "epoch": 0.5231376705342343, "grad_norm": 0.20951908826828003, "learning_rate": 0.002, "loss": 2.5562, "step": 262590 }, { "epoch": 0.5231575927578732, "grad_norm": 0.1553954929113388, "learning_rate": 0.002, "loss": 2.5747, "step": 262600 }, { "epoch": 0.5231775149815122, "grad_norm": 0.20542952418327332, "learning_rate": 0.002, "loss": 2.5565, "step": 262610 }, { "epoch": 0.5231974372051511, "grad_norm": 0.18542087078094482, "learning_rate": 0.002, "loss": 2.5789, "step": 262620 }, { "epoch": 0.52321735942879, "grad_norm": 0.14983104169368744, "learning_rate": 0.002, "loss": 2.5505, "step": 262630 }, { "epoch": 0.5232372816524289, "grad_norm": 0.16190749406814575, "learning_rate": 0.002, "loss": 2.5505, "step": 262640 }, { "epoch": 0.5232572038760678, "grad_norm": 0.1682790070772171, "learning_rate": 0.002, "loss": 2.5608, "step": 262650 }, { "epoch": 0.5232771260997068, "grad_norm": 0.19295859336853027, "learning_rate": 0.002, "loss": 2.5545, "step": 262660 }, { "epoch": 0.5232970483233457, "grad_norm": 0.25097548961639404, "learning_rate": 0.002, "loss": 2.5493, "step": 262670 }, { "epoch": 0.5233169705469846, "grad_norm": 0.1846928745508194, "learning_rate": 0.002, "loss": 2.5687, "step": 262680 }, { "epoch": 0.5233368927706235, "grad_norm": 0.16881157457828522, "learning_rate": 0.002, "loss": 2.568, "step": 262690 }, { "epoch": 0.5233568149942625, "grad_norm": 0.1443255990743637, "learning_rate": 0.002, "loss": 2.5326, "step": 262700 }, { "epoch": 0.5233767372179013, "grad_norm": 0.1632264405488968, "learning_rate": 0.002, "loss": 2.5675, "step": 262710 }, { "epoch": 0.5233966594415402, "grad_norm": 0.17510287463665009, "learning_rate": 0.002, "loss": 2.5501, "step": 262720 }, { "epoch": 0.5234165816651791, "grad_norm": 0.16975916922092438, "learning_rate": 0.002, "loss": 2.5631, "step": 262730 }, { "epoch": 0.523436503888818, "grad_norm": 0.16132239997386932, "learning_rate": 0.002, "loss": 2.5343, "step": 262740 }, { "epoch": 0.523456426112457, "grad_norm": 0.16497802734375, "learning_rate": 0.002, "loss": 2.5516, "step": 262750 }, { "epoch": 0.5234763483360959, "grad_norm": 0.18094143271446228, "learning_rate": 0.002, "loss": 2.5508, "step": 262760 }, { "epoch": 0.5234962705597348, "grad_norm": 0.19160160422325134, "learning_rate": 0.002, "loss": 2.5468, "step": 262770 }, { "epoch": 0.5235161927833737, "grad_norm": 0.16694383323192596, "learning_rate": 0.002, "loss": 2.5548, "step": 262780 }, { "epoch": 0.5235361150070126, "grad_norm": 0.1541682630777359, "learning_rate": 0.002, "loss": 2.55, "step": 262790 }, { "epoch": 0.5235560372306516, "grad_norm": 0.14806604385375977, "learning_rate": 0.002, "loss": 2.5384, "step": 262800 }, { "epoch": 0.5235759594542905, "grad_norm": 0.18362677097320557, "learning_rate": 0.002, "loss": 2.5439, "step": 262810 }, { "epoch": 0.5235958816779294, "grad_norm": 0.14781075716018677, "learning_rate": 0.002, "loss": 2.5716, "step": 262820 }, { "epoch": 0.5236158039015683, "grad_norm": 0.1685274988412857, "learning_rate": 0.002, "loss": 2.5507, "step": 262830 }, { "epoch": 0.5236357261252071, "grad_norm": 0.16386236250400543, "learning_rate": 0.002, "loss": 2.5593, "step": 262840 }, { "epoch": 0.5236556483488461, "grad_norm": 0.13255658745765686, "learning_rate": 0.002, "loss": 2.5542, "step": 262850 }, { "epoch": 0.523675570572485, "grad_norm": 0.18479864299297333, "learning_rate": 0.002, "loss": 2.5439, "step": 262860 }, { "epoch": 0.5236954927961239, "grad_norm": 0.1882816106081009, "learning_rate": 0.002, "loss": 2.5511, "step": 262870 }, { "epoch": 0.5237154150197628, "grad_norm": 0.17491064965724945, "learning_rate": 0.002, "loss": 2.5656, "step": 262880 }, { "epoch": 0.5237353372434017, "grad_norm": 0.15840399265289307, "learning_rate": 0.002, "loss": 2.5334, "step": 262890 }, { "epoch": 0.5237552594670407, "grad_norm": 0.18475714325904846, "learning_rate": 0.002, "loss": 2.5458, "step": 262900 }, { "epoch": 0.5237751816906796, "grad_norm": 0.1774369329214096, "learning_rate": 0.002, "loss": 2.5697, "step": 262910 }, { "epoch": 0.5237951039143185, "grad_norm": 0.1538812667131424, "learning_rate": 0.002, "loss": 2.5557, "step": 262920 }, { "epoch": 0.5238150261379574, "grad_norm": 0.18007192015647888, "learning_rate": 0.002, "loss": 2.555, "step": 262930 }, { "epoch": 0.5238349483615963, "grad_norm": 0.17042070627212524, "learning_rate": 0.002, "loss": 2.5534, "step": 262940 }, { "epoch": 0.5238548705852353, "grad_norm": 0.15740050375461578, "learning_rate": 0.002, "loss": 2.5429, "step": 262950 }, { "epoch": 0.5238747928088742, "grad_norm": 0.1748482584953308, "learning_rate": 0.002, "loss": 2.5567, "step": 262960 }, { "epoch": 0.5238947150325131, "grad_norm": 0.16334094107151031, "learning_rate": 0.002, "loss": 2.546, "step": 262970 }, { "epoch": 0.523914637256152, "grad_norm": 0.16507446765899658, "learning_rate": 0.002, "loss": 2.5463, "step": 262980 }, { "epoch": 0.523934559479791, "grad_norm": 0.1762194186449051, "learning_rate": 0.002, "loss": 2.5522, "step": 262990 }, { "epoch": 0.5239544817034298, "grad_norm": 0.17977970838546753, "learning_rate": 0.002, "loss": 2.5659, "step": 263000 }, { "epoch": 0.5239744039270687, "grad_norm": 0.14968179166316986, "learning_rate": 0.002, "loss": 2.5722, "step": 263010 }, { "epoch": 0.5239943261507076, "grad_norm": 0.1570073664188385, "learning_rate": 0.002, "loss": 2.5336, "step": 263020 }, { "epoch": 0.5240142483743465, "grad_norm": 0.1399536281824112, "learning_rate": 0.002, "loss": 2.566, "step": 263030 }, { "epoch": 0.5240341705979855, "grad_norm": 0.23352362215518951, "learning_rate": 0.002, "loss": 2.5574, "step": 263040 }, { "epoch": 0.5240540928216244, "grad_norm": 0.19618406891822815, "learning_rate": 0.002, "loss": 2.5576, "step": 263050 }, { "epoch": 0.5240740150452633, "grad_norm": 0.17718611657619476, "learning_rate": 0.002, "loss": 2.568, "step": 263060 }, { "epoch": 0.5240939372689022, "grad_norm": 0.17801930010318756, "learning_rate": 0.002, "loss": 2.5564, "step": 263070 }, { "epoch": 0.5241138594925411, "grad_norm": 0.19943024218082428, "learning_rate": 0.002, "loss": 2.5598, "step": 263080 }, { "epoch": 0.5241337817161801, "grad_norm": 0.15773724019527435, "learning_rate": 0.002, "loss": 2.5514, "step": 263090 }, { "epoch": 0.524153703939819, "grad_norm": 0.15136416256427765, "learning_rate": 0.002, "loss": 2.5587, "step": 263100 }, { "epoch": 0.5241736261634579, "grad_norm": 0.16015440225601196, "learning_rate": 0.002, "loss": 2.5482, "step": 263110 }, { "epoch": 0.5241935483870968, "grad_norm": 0.2660376727581024, "learning_rate": 0.002, "loss": 2.565, "step": 263120 }, { "epoch": 0.5242134706107356, "grad_norm": 0.15633924305438995, "learning_rate": 0.002, "loss": 2.5636, "step": 263130 }, { "epoch": 0.5242333928343746, "grad_norm": 0.16378070414066315, "learning_rate": 0.002, "loss": 2.5597, "step": 263140 }, { "epoch": 0.5242533150580135, "grad_norm": 0.16177527606487274, "learning_rate": 0.002, "loss": 2.5654, "step": 263150 }, { "epoch": 0.5242732372816524, "grad_norm": 0.1510222852230072, "learning_rate": 0.002, "loss": 2.555, "step": 263160 }, { "epoch": 0.5242931595052913, "grad_norm": 0.16417500376701355, "learning_rate": 0.002, "loss": 2.552, "step": 263170 }, { "epoch": 0.5243130817289302, "grad_norm": 0.1667969673871994, "learning_rate": 0.002, "loss": 2.5513, "step": 263180 }, { "epoch": 0.5243330039525692, "grad_norm": 0.16973894834518433, "learning_rate": 0.002, "loss": 2.5534, "step": 263190 }, { "epoch": 0.5243529261762081, "grad_norm": 0.17317304015159607, "learning_rate": 0.002, "loss": 2.5456, "step": 263200 }, { "epoch": 0.524372848399847, "grad_norm": 0.14789190888404846, "learning_rate": 0.002, "loss": 2.5616, "step": 263210 }, { "epoch": 0.5243927706234859, "grad_norm": 0.1728341281414032, "learning_rate": 0.002, "loss": 2.5751, "step": 263220 }, { "epoch": 0.5244126928471248, "grad_norm": 0.13642655313014984, "learning_rate": 0.002, "loss": 2.5515, "step": 263230 }, { "epoch": 0.5244326150707638, "grad_norm": 0.15533950924873352, "learning_rate": 0.002, "loss": 2.5642, "step": 263240 }, { "epoch": 0.5244525372944027, "grad_norm": 0.17189718782901764, "learning_rate": 0.002, "loss": 2.5642, "step": 263250 }, { "epoch": 0.5244724595180416, "grad_norm": 0.16444498300552368, "learning_rate": 0.002, "loss": 2.554, "step": 263260 }, { "epoch": 0.5244923817416804, "grad_norm": 0.1835198700428009, "learning_rate": 0.002, "loss": 2.5621, "step": 263270 }, { "epoch": 0.5245123039653194, "grad_norm": 0.15045498311519623, "learning_rate": 0.002, "loss": 2.5565, "step": 263280 }, { "epoch": 0.5245322261889583, "grad_norm": 0.1790543645620346, "learning_rate": 0.002, "loss": 2.5767, "step": 263290 }, { "epoch": 0.5245521484125972, "grad_norm": 0.20112650096416473, "learning_rate": 0.002, "loss": 2.5672, "step": 263300 }, { "epoch": 0.5245720706362361, "grad_norm": 0.15368162095546722, "learning_rate": 0.002, "loss": 2.5606, "step": 263310 }, { "epoch": 0.524591992859875, "grad_norm": 0.15812090039253235, "learning_rate": 0.002, "loss": 2.5604, "step": 263320 }, { "epoch": 0.524611915083514, "grad_norm": 0.16665785014629364, "learning_rate": 0.002, "loss": 2.5652, "step": 263330 }, { "epoch": 0.5246318373071529, "grad_norm": 0.17383745312690735, "learning_rate": 0.002, "loss": 2.5527, "step": 263340 }, { "epoch": 0.5246517595307918, "grad_norm": 0.18353861570358276, "learning_rate": 0.002, "loss": 2.5696, "step": 263350 }, { "epoch": 0.5246716817544307, "grad_norm": 0.14816077053546906, "learning_rate": 0.002, "loss": 2.5659, "step": 263360 }, { "epoch": 0.5246916039780696, "grad_norm": 0.15666510164737701, "learning_rate": 0.002, "loss": 2.5399, "step": 263370 }, { "epoch": 0.5247115262017086, "grad_norm": 0.150782510638237, "learning_rate": 0.002, "loss": 2.5705, "step": 263380 }, { "epoch": 0.5247314484253475, "grad_norm": 0.15576396882534027, "learning_rate": 0.002, "loss": 2.5588, "step": 263390 }, { "epoch": 0.5247513706489864, "grad_norm": 0.20821413397789001, "learning_rate": 0.002, "loss": 2.5738, "step": 263400 }, { "epoch": 0.5247712928726253, "grad_norm": 0.14832033216953278, "learning_rate": 0.002, "loss": 2.5541, "step": 263410 }, { "epoch": 0.5247912150962641, "grad_norm": 0.14970548450946808, "learning_rate": 0.002, "loss": 2.5558, "step": 263420 }, { "epoch": 0.5248111373199031, "grad_norm": 0.16254736483097076, "learning_rate": 0.002, "loss": 2.5484, "step": 263430 }, { "epoch": 0.524831059543542, "grad_norm": 0.17612795531749725, "learning_rate": 0.002, "loss": 2.5559, "step": 263440 }, { "epoch": 0.5248509817671809, "grad_norm": 0.14322732388973236, "learning_rate": 0.002, "loss": 2.561, "step": 263450 }, { "epoch": 0.5248709039908198, "grad_norm": 0.16249436140060425, "learning_rate": 0.002, "loss": 2.5562, "step": 263460 }, { "epoch": 0.5248908262144587, "grad_norm": 0.16504383087158203, "learning_rate": 0.002, "loss": 2.5636, "step": 263470 }, { "epoch": 0.5249107484380977, "grad_norm": 0.14724746346473694, "learning_rate": 0.002, "loss": 2.5618, "step": 263480 }, { "epoch": 0.5249306706617366, "grad_norm": 0.1644536852836609, "learning_rate": 0.002, "loss": 2.5706, "step": 263490 }, { "epoch": 0.5249505928853755, "grad_norm": 0.1620347946882248, "learning_rate": 0.002, "loss": 2.5544, "step": 263500 }, { "epoch": 0.5249705151090144, "grad_norm": 0.17416204512119293, "learning_rate": 0.002, "loss": 2.567, "step": 263510 }, { "epoch": 0.5249904373326533, "grad_norm": 0.15148235857486725, "learning_rate": 0.002, "loss": 2.5544, "step": 263520 }, { "epoch": 0.5250103595562923, "grad_norm": 0.1704574078321457, "learning_rate": 0.002, "loss": 2.5459, "step": 263530 }, { "epoch": 0.5250302817799312, "grad_norm": 0.1589908003807068, "learning_rate": 0.002, "loss": 2.561, "step": 263540 }, { "epoch": 0.52505020400357, "grad_norm": 0.1538676619529724, "learning_rate": 0.002, "loss": 2.5417, "step": 263550 }, { "epoch": 0.5250701262272089, "grad_norm": 0.15249434113502502, "learning_rate": 0.002, "loss": 2.5554, "step": 263560 }, { "epoch": 0.5250900484508478, "grad_norm": 0.16564291715621948, "learning_rate": 0.002, "loss": 2.5569, "step": 263570 }, { "epoch": 0.5251099706744868, "grad_norm": 0.17783944308757782, "learning_rate": 0.002, "loss": 2.5612, "step": 263580 }, { "epoch": 0.5251298928981257, "grad_norm": 0.18488186597824097, "learning_rate": 0.002, "loss": 2.5391, "step": 263590 }, { "epoch": 0.5251498151217646, "grad_norm": 0.14559948444366455, "learning_rate": 0.002, "loss": 2.5568, "step": 263600 }, { "epoch": 0.5251697373454035, "grad_norm": 0.1700991541147232, "learning_rate": 0.002, "loss": 2.5588, "step": 263610 }, { "epoch": 0.5251896595690425, "grad_norm": 0.1521337926387787, "learning_rate": 0.002, "loss": 2.5522, "step": 263620 }, { "epoch": 0.5252095817926814, "grad_norm": 0.15838731825351715, "learning_rate": 0.002, "loss": 2.5541, "step": 263630 }, { "epoch": 0.5252295040163203, "grad_norm": 0.18547846376895905, "learning_rate": 0.002, "loss": 2.5484, "step": 263640 }, { "epoch": 0.5252494262399592, "grad_norm": 0.1752564013004303, "learning_rate": 0.002, "loss": 2.5541, "step": 263650 }, { "epoch": 0.5252693484635981, "grad_norm": 0.1710745096206665, "learning_rate": 0.002, "loss": 2.5512, "step": 263660 }, { "epoch": 0.5252892706872371, "grad_norm": 0.182498037815094, "learning_rate": 0.002, "loss": 2.5687, "step": 263670 }, { "epoch": 0.525309192910876, "grad_norm": 0.15992987155914307, "learning_rate": 0.002, "loss": 2.564, "step": 263680 }, { "epoch": 0.5253291151345149, "grad_norm": 0.16839051246643066, "learning_rate": 0.002, "loss": 2.5624, "step": 263690 }, { "epoch": 0.5253490373581537, "grad_norm": 0.1419832855463028, "learning_rate": 0.002, "loss": 2.5565, "step": 263700 }, { "epoch": 0.5253689595817926, "grad_norm": 0.1769840270280838, "learning_rate": 0.002, "loss": 2.5322, "step": 263710 }, { "epoch": 0.5253888818054316, "grad_norm": 0.16362027823925018, "learning_rate": 0.002, "loss": 2.5478, "step": 263720 }, { "epoch": 0.5254088040290705, "grad_norm": 0.17725303769111633, "learning_rate": 0.002, "loss": 2.5611, "step": 263730 }, { "epoch": 0.5254287262527094, "grad_norm": 0.15543204545974731, "learning_rate": 0.002, "loss": 2.5567, "step": 263740 }, { "epoch": 0.5254486484763483, "grad_norm": 0.18036821484565735, "learning_rate": 0.002, "loss": 2.557, "step": 263750 }, { "epoch": 0.5254685706999872, "grad_norm": 0.1658981591463089, "learning_rate": 0.002, "loss": 2.5548, "step": 263760 }, { "epoch": 0.5254884929236262, "grad_norm": 0.14115534722805023, "learning_rate": 0.002, "loss": 2.5713, "step": 263770 }, { "epoch": 0.5255084151472651, "grad_norm": 0.21817077696323395, "learning_rate": 0.002, "loss": 2.5637, "step": 263780 }, { "epoch": 0.525528337370904, "grad_norm": 0.14565804600715637, "learning_rate": 0.002, "loss": 2.561, "step": 263790 }, { "epoch": 0.5255482595945429, "grad_norm": 0.16278517246246338, "learning_rate": 0.002, "loss": 2.547, "step": 263800 }, { "epoch": 0.5255681818181818, "grad_norm": 0.15616868436336517, "learning_rate": 0.002, "loss": 2.5534, "step": 263810 }, { "epoch": 0.5255881040418208, "grad_norm": 0.19615568220615387, "learning_rate": 0.002, "loss": 2.553, "step": 263820 }, { "epoch": 0.5256080262654597, "grad_norm": 0.1629137396812439, "learning_rate": 0.002, "loss": 2.5589, "step": 263830 }, { "epoch": 0.5256279484890986, "grad_norm": 0.18098480999469757, "learning_rate": 0.002, "loss": 2.5574, "step": 263840 }, { "epoch": 0.5256478707127374, "grad_norm": 0.1974835991859436, "learning_rate": 0.002, "loss": 2.5749, "step": 263850 }, { "epoch": 0.5256677929363763, "grad_norm": 0.16240213811397552, "learning_rate": 0.002, "loss": 2.5652, "step": 263860 }, { "epoch": 0.5256877151600153, "grad_norm": 0.1631384789943695, "learning_rate": 0.002, "loss": 2.5486, "step": 263870 }, { "epoch": 0.5257076373836542, "grad_norm": 0.19274947047233582, "learning_rate": 0.002, "loss": 2.5606, "step": 263880 }, { "epoch": 0.5257275596072931, "grad_norm": 0.16480804979801178, "learning_rate": 0.002, "loss": 2.5649, "step": 263890 }, { "epoch": 0.525747481830932, "grad_norm": 0.13689978420734406, "learning_rate": 0.002, "loss": 2.5535, "step": 263900 }, { "epoch": 0.525767404054571, "grad_norm": 0.18663997948169708, "learning_rate": 0.002, "loss": 2.5665, "step": 263910 }, { "epoch": 0.5257873262782099, "grad_norm": 0.18339674174785614, "learning_rate": 0.002, "loss": 2.5714, "step": 263920 }, { "epoch": 0.5258072485018488, "grad_norm": 0.16321204602718353, "learning_rate": 0.002, "loss": 2.5535, "step": 263930 }, { "epoch": 0.5258271707254877, "grad_norm": 0.18147027492523193, "learning_rate": 0.002, "loss": 2.5461, "step": 263940 }, { "epoch": 0.5258470929491266, "grad_norm": 0.1769438087940216, "learning_rate": 0.002, "loss": 2.5669, "step": 263950 }, { "epoch": 0.5258670151727656, "grad_norm": 0.17636090517044067, "learning_rate": 0.002, "loss": 2.5529, "step": 263960 }, { "epoch": 0.5258869373964045, "grad_norm": 0.15864185988903046, "learning_rate": 0.002, "loss": 2.5536, "step": 263970 }, { "epoch": 0.5259068596200434, "grad_norm": 0.16007065773010254, "learning_rate": 0.002, "loss": 2.5596, "step": 263980 }, { "epoch": 0.5259267818436822, "grad_norm": 0.15121284127235413, "learning_rate": 0.002, "loss": 2.5418, "step": 263990 }, { "epoch": 0.5259467040673211, "grad_norm": 0.1797427237033844, "learning_rate": 0.002, "loss": 2.5514, "step": 264000 }, { "epoch": 0.5259666262909601, "grad_norm": 0.15576067566871643, "learning_rate": 0.002, "loss": 2.5578, "step": 264010 }, { "epoch": 0.525986548514599, "grad_norm": 0.17907926440238953, "learning_rate": 0.002, "loss": 2.5732, "step": 264020 }, { "epoch": 0.5260064707382379, "grad_norm": 0.16161511838436127, "learning_rate": 0.002, "loss": 2.5398, "step": 264030 }, { "epoch": 0.5260263929618768, "grad_norm": 0.1374729722738266, "learning_rate": 0.002, "loss": 2.5631, "step": 264040 }, { "epoch": 0.5260463151855157, "grad_norm": 0.16810894012451172, "learning_rate": 0.002, "loss": 2.5669, "step": 264050 }, { "epoch": 0.5260662374091547, "grad_norm": 0.17174221575260162, "learning_rate": 0.002, "loss": 2.5519, "step": 264060 }, { "epoch": 0.5260861596327936, "grad_norm": 0.16831643879413605, "learning_rate": 0.002, "loss": 2.5655, "step": 264070 }, { "epoch": 0.5261060818564325, "grad_norm": 0.15447747707366943, "learning_rate": 0.002, "loss": 2.5479, "step": 264080 }, { "epoch": 0.5261260040800714, "grad_norm": 0.13504990935325623, "learning_rate": 0.002, "loss": 2.5573, "step": 264090 }, { "epoch": 0.5261459263037103, "grad_norm": 0.14115090668201447, "learning_rate": 0.002, "loss": 2.5616, "step": 264100 }, { "epoch": 0.5261658485273493, "grad_norm": 0.17606288194656372, "learning_rate": 0.002, "loss": 2.5541, "step": 264110 }, { "epoch": 0.5261857707509882, "grad_norm": 0.1596660017967224, "learning_rate": 0.002, "loss": 2.5649, "step": 264120 }, { "epoch": 0.526205692974627, "grad_norm": 0.17410370707511902, "learning_rate": 0.002, "loss": 2.5452, "step": 264130 }, { "epoch": 0.5262256151982659, "grad_norm": 0.1461503952741623, "learning_rate": 0.002, "loss": 2.5556, "step": 264140 }, { "epoch": 0.5262455374219048, "grad_norm": 0.19267532229423523, "learning_rate": 0.002, "loss": 2.569, "step": 264150 }, { "epoch": 0.5262654596455438, "grad_norm": 0.18931050598621368, "learning_rate": 0.002, "loss": 2.556, "step": 264160 }, { "epoch": 0.5262853818691827, "grad_norm": 0.15114696323871613, "learning_rate": 0.002, "loss": 2.5669, "step": 264170 }, { "epoch": 0.5263053040928216, "grad_norm": 0.13936926424503326, "learning_rate": 0.002, "loss": 2.5608, "step": 264180 }, { "epoch": 0.5263252263164605, "grad_norm": 0.17689409852027893, "learning_rate": 0.002, "loss": 2.5648, "step": 264190 }, { "epoch": 0.5263451485400995, "grad_norm": 0.16483835875988007, "learning_rate": 0.002, "loss": 2.555, "step": 264200 }, { "epoch": 0.5263650707637384, "grad_norm": 0.2148522287607193, "learning_rate": 0.002, "loss": 2.5602, "step": 264210 }, { "epoch": 0.5263849929873773, "grad_norm": 0.14977775514125824, "learning_rate": 0.002, "loss": 2.5547, "step": 264220 }, { "epoch": 0.5264049152110162, "grad_norm": 0.16219410300254822, "learning_rate": 0.002, "loss": 2.5579, "step": 264230 }, { "epoch": 0.5264248374346551, "grad_norm": 0.16025608777999878, "learning_rate": 0.002, "loss": 2.5593, "step": 264240 }, { "epoch": 0.5264447596582941, "grad_norm": 0.15902762115001678, "learning_rate": 0.002, "loss": 2.5541, "step": 264250 }, { "epoch": 0.526464681881933, "grad_norm": 0.18982799351215363, "learning_rate": 0.002, "loss": 2.5597, "step": 264260 }, { "epoch": 0.5264846041055719, "grad_norm": 0.16481368243694305, "learning_rate": 0.002, "loss": 2.5531, "step": 264270 }, { "epoch": 0.5265045263292107, "grad_norm": 0.19080820679664612, "learning_rate": 0.002, "loss": 2.5514, "step": 264280 }, { "epoch": 0.5265244485528496, "grad_norm": 0.15394078195095062, "learning_rate": 0.002, "loss": 2.5619, "step": 264290 }, { "epoch": 0.5265443707764886, "grad_norm": 0.20223210752010345, "learning_rate": 0.002, "loss": 2.5377, "step": 264300 }, { "epoch": 0.5265642930001275, "grad_norm": 0.16649001836776733, "learning_rate": 0.002, "loss": 2.566, "step": 264310 }, { "epoch": 0.5265842152237664, "grad_norm": 0.14137430489063263, "learning_rate": 0.002, "loss": 2.5412, "step": 264320 }, { "epoch": 0.5266041374474053, "grad_norm": 0.19205023348331451, "learning_rate": 0.002, "loss": 2.5607, "step": 264330 }, { "epoch": 0.5266240596710442, "grad_norm": 0.14630183577537537, "learning_rate": 0.002, "loss": 2.5538, "step": 264340 }, { "epoch": 0.5266439818946832, "grad_norm": 0.15775027871131897, "learning_rate": 0.002, "loss": 2.5588, "step": 264350 }, { "epoch": 0.5266639041183221, "grad_norm": 0.17395837604999542, "learning_rate": 0.002, "loss": 2.564, "step": 264360 }, { "epoch": 0.526683826341961, "grad_norm": 0.1952524036169052, "learning_rate": 0.002, "loss": 2.5513, "step": 264370 }, { "epoch": 0.5267037485655999, "grad_norm": 0.15772870182991028, "learning_rate": 0.002, "loss": 2.5671, "step": 264380 }, { "epoch": 0.5267236707892388, "grad_norm": 0.1714215874671936, "learning_rate": 0.002, "loss": 2.5696, "step": 264390 }, { "epoch": 0.5267435930128778, "grad_norm": 0.16006304323673248, "learning_rate": 0.002, "loss": 2.5607, "step": 264400 }, { "epoch": 0.5267635152365167, "grad_norm": 0.23167654871940613, "learning_rate": 0.002, "loss": 2.5539, "step": 264410 }, { "epoch": 0.5267834374601555, "grad_norm": 0.18711180984973907, "learning_rate": 0.002, "loss": 2.5735, "step": 264420 }, { "epoch": 0.5268033596837944, "grad_norm": 0.1381119191646576, "learning_rate": 0.002, "loss": 2.5631, "step": 264430 }, { "epoch": 0.5268232819074333, "grad_norm": 0.13779368996620178, "learning_rate": 0.002, "loss": 2.5582, "step": 264440 }, { "epoch": 0.5268432041310723, "grad_norm": 0.18439507484436035, "learning_rate": 0.002, "loss": 2.5626, "step": 264450 }, { "epoch": 0.5268631263547112, "grad_norm": 0.14639371633529663, "learning_rate": 0.002, "loss": 2.5539, "step": 264460 }, { "epoch": 0.5268830485783501, "grad_norm": 0.17738422751426697, "learning_rate": 0.002, "loss": 2.5536, "step": 264470 }, { "epoch": 0.526902970801989, "grad_norm": 0.21512646973133087, "learning_rate": 0.002, "loss": 2.5601, "step": 264480 }, { "epoch": 0.526922893025628, "grad_norm": 0.17010560631752014, "learning_rate": 0.002, "loss": 2.5406, "step": 264490 }, { "epoch": 0.5269428152492669, "grad_norm": 0.16695186495780945, "learning_rate": 0.002, "loss": 2.5484, "step": 264500 }, { "epoch": 0.5269627374729058, "grad_norm": 0.15912140905857086, "learning_rate": 0.002, "loss": 2.5518, "step": 264510 }, { "epoch": 0.5269826596965447, "grad_norm": 0.16661711037158966, "learning_rate": 0.002, "loss": 2.5614, "step": 264520 }, { "epoch": 0.5270025819201836, "grad_norm": 0.1722102165222168, "learning_rate": 0.002, "loss": 2.5509, "step": 264530 }, { "epoch": 0.5270225041438226, "grad_norm": 0.24525289237499237, "learning_rate": 0.002, "loss": 2.5521, "step": 264540 }, { "epoch": 0.5270424263674615, "grad_norm": 0.16944938898086548, "learning_rate": 0.002, "loss": 2.532, "step": 264550 }, { "epoch": 0.5270623485911003, "grad_norm": 0.16517752408981323, "learning_rate": 0.002, "loss": 2.5574, "step": 264560 }, { "epoch": 0.5270822708147392, "grad_norm": 0.1584334373474121, "learning_rate": 0.002, "loss": 2.5466, "step": 264570 }, { "epoch": 0.5271021930383781, "grad_norm": 0.19879919290542603, "learning_rate": 0.002, "loss": 2.5429, "step": 264580 }, { "epoch": 0.5271221152620171, "grad_norm": 0.17453643679618835, "learning_rate": 0.002, "loss": 2.5588, "step": 264590 }, { "epoch": 0.527142037485656, "grad_norm": 0.1591196358203888, "learning_rate": 0.002, "loss": 2.5598, "step": 264600 }, { "epoch": 0.5271619597092949, "grad_norm": 0.1569099724292755, "learning_rate": 0.002, "loss": 2.5683, "step": 264610 }, { "epoch": 0.5271818819329338, "grad_norm": 0.14911125600337982, "learning_rate": 0.002, "loss": 2.5454, "step": 264620 }, { "epoch": 0.5272018041565727, "grad_norm": 0.1944015622138977, "learning_rate": 0.002, "loss": 2.5527, "step": 264630 }, { "epoch": 0.5272217263802117, "grad_norm": 0.18503567576408386, "learning_rate": 0.002, "loss": 2.5614, "step": 264640 }, { "epoch": 0.5272416486038506, "grad_norm": 0.14441603422164917, "learning_rate": 0.002, "loss": 2.553, "step": 264650 }, { "epoch": 0.5272615708274895, "grad_norm": 0.17106276750564575, "learning_rate": 0.002, "loss": 2.5557, "step": 264660 }, { "epoch": 0.5272814930511284, "grad_norm": 0.15705542266368866, "learning_rate": 0.002, "loss": 2.5503, "step": 264670 }, { "epoch": 0.5273014152747673, "grad_norm": 0.18783068656921387, "learning_rate": 0.002, "loss": 2.5544, "step": 264680 }, { "epoch": 0.5273213374984063, "grad_norm": 0.1716565489768982, "learning_rate": 0.002, "loss": 2.5508, "step": 264690 }, { "epoch": 0.5273412597220452, "grad_norm": 0.1485607624053955, "learning_rate": 0.002, "loss": 2.5594, "step": 264700 }, { "epoch": 0.527361181945684, "grad_norm": 0.19837281107902527, "learning_rate": 0.002, "loss": 2.5629, "step": 264710 }, { "epoch": 0.5273811041693229, "grad_norm": 0.1567101627588272, "learning_rate": 0.002, "loss": 2.5564, "step": 264720 }, { "epoch": 0.5274010263929618, "grad_norm": 0.16823279857635498, "learning_rate": 0.002, "loss": 2.5558, "step": 264730 }, { "epoch": 0.5274209486166008, "grad_norm": 0.14703519642353058, "learning_rate": 0.002, "loss": 2.5537, "step": 264740 }, { "epoch": 0.5274408708402397, "grad_norm": 0.1571708470582962, "learning_rate": 0.002, "loss": 2.5485, "step": 264750 }, { "epoch": 0.5274607930638786, "grad_norm": 0.17332880198955536, "learning_rate": 0.002, "loss": 2.5578, "step": 264760 }, { "epoch": 0.5274807152875175, "grad_norm": 0.1609901785850525, "learning_rate": 0.002, "loss": 2.5557, "step": 264770 }, { "epoch": 0.5275006375111565, "grad_norm": 0.16928592324256897, "learning_rate": 0.002, "loss": 2.5605, "step": 264780 }, { "epoch": 0.5275205597347954, "grad_norm": 0.16886483132839203, "learning_rate": 0.002, "loss": 2.5551, "step": 264790 }, { "epoch": 0.5275404819584343, "grad_norm": 0.1607895940542221, "learning_rate": 0.002, "loss": 2.5611, "step": 264800 }, { "epoch": 0.5275604041820732, "grad_norm": 0.2033611387014389, "learning_rate": 0.002, "loss": 2.5644, "step": 264810 }, { "epoch": 0.5275803264057121, "grad_norm": 0.15950867533683777, "learning_rate": 0.002, "loss": 2.5654, "step": 264820 }, { "epoch": 0.5276002486293511, "grad_norm": 0.1948316991329193, "learning_rate": 0.002, "loss": 2.5604, "step": 264830 }, { "epoch": 0.52762017085299, "grad_norm": 0.17337095737457275, "learning_rate": 0.002, "loss": 2.5684, "step": 264840 }, { "epoch": 0.5276400930766288, "grad_norm": 0.1625540554523468, "learning_rate": 0.002, "loss": 2.5595, "step": 264850 }, { "epoch": 0.5276600153002677, "grad_norm": 0.1695173978805542, "learning_rate": 0.002, "loss": 2.5397, "step": 264860 }, { "epoch": 0.5276799375239066, "grad_norm": 0.15076008439064026, "learning_rate": 0.002, "loss": 2.56, "step": 264870 }, { "epoch": 0.5276998597475456, "grad_norm": 0.14670594036579132, "learning_rate": 0.002, "loss": 2.5639, "step": 264880 }, { "epoch": 0.5277197819711845, "grad_norm": 0.18808217346668243, "learning_rate": 0.002, "loss": 2.5618, "step": 264890 }, { "epoch": 0.5277397041948234, "grad_norm": 0.1751037985086441, "learning_rate": 0.002, "loss": 2.5614, "step": 264900 }, { "epoch": 0.5277596264184623, "grad_norm": 0.15387165546417236, "learning_rate": 0.002, "loss": 2.5549, "step": 264910 }, { "epoch": 0.5277795486421012, "grad_norm": 0.2722943127155304, "learning_rate": 0.002, "loss": 2.5548, "step": 264920 }, { "epoch": 0.5277994708657402, "grad_norm": 0.14252957701683044, "learning_rate": 0.002, "loss": 2.5601, "step": 264930 }, { "epoch": 0.5278193930893791, "grad_norm": 0.1672476977109909, "learning_rate": 0.002, "loss": 2.5542, "step": 264940 }, { "epoch": 0.527839315313018, "grad_norm": 0.17342965304851532, "learning_rate": 0.002, "loss": 2.5474, "step": 264950 }, { "epoch": 0.5278592375366569, "grad_norm": 0.16499878466129303, "learning_rate": 0.002, "loss": 2.5649, "step": 264960 }, { "epoch": 0.5278791597602958, "grad_norm": 0.16850489377975464, "learning_rate": 0.002, "loss": 2.5488, "step": 264970 }, { "epoch": 0.5278990819839348, "grad_norm": 0.14662593603134155, "learning_rate": 0.002, "loss": 2.5476, "step": 264980 }, { "epoch": 0.5279190042075736, "grad_norm": 0.18365474045276642, "learning_rate": 0.002, "loss": 2.5428, "step": 264990 }, { "epoch": 0.5279389264312125, "grad_norm": 0.16744676232337952, "learning_rate": 0.002, "loss": 2.5737, "step": 265000 }, { "epoch": 0.5279588486548514, "grad_norm": 0.15636779367923737, "learning_rate": 0.002, "loss": 2.5494, "step": 265010 }, { "epoch": 0.5279787708784903, "grad_norm": 0.16803552210330963, "learning_rate": 0.002, "loss": 2.5624, "step": 265020 }, { "epoch": 0.5279986931021293, "grad_norm": 0.20995883643627167, "learning_rate": 0.002, "loss": 2.5624, "step": 265030 }, { "epoch": 0.5280186153257682, "grad_norm": 0.16910918056964874, "learning_rate": 0.002, "loss": 2.5552, "step": 265040 }, { "epoch": 0.5280385375494071, "grad_norm": 0.17966091632843018, "learning_rate": 0.002, "loss": 2.5513, "step": 265050 }, { "epoch": 0.528058459773046, "grad_norm": 0.17759530246257782, "learning_rate": 0.002, "loss": 2.5607, "step": 265060 }, { "epoch": 0.5280783819966849, "grad_norm": 0.15829478204250336, "learning_rate": 0.002, "loss": 2.57, "step": 265070 }, { "epoch": 0.5280983042203239, "grad_norm": 0.14468331634998322, "learning_rate": 0.002, "loss": 2.5587, "step": 265080 }, { "epoch": 0.5281182264439628, "grad_norm": 0.16526447236537933, "learning_rate": 0.002, "loss": 2.5527, "step": 265090 }, { "epoch": 0.5281381486676017, "grad_norm": 0.15042449533939362, "learning_rate": 0.002, "loss": 2.5696, "step": 265100 }, { "epoch": 0.5281580708912406, "grad_norm": 0.2413431704044342, "learning_rate": 0.002, "loss": 2.5617, "step": 265110 }, { "epoch": 0.5281779931148796, "grad_norm": 0.19400471448898315, "learning_rate": 0.002, "loss": 2.5605, "step": 265120 }, { "epoch": 0.5281979153385185, "grad_norm": 0.15555015206336975, "learning_rate": 0.002, "loss": 2.5654, "step": 265130 }, { "epoch": 0.5282178375621573, "grad_norm": 0.17570693790912628, "learning_rate": 0.002, "loss": 2.5597, "step": 265140 }, { "epoch": 0.5282377597857962, "grad_norm": 0.16107892990112305, "learning_rate": 0.002, "loss": 2.5543, "step": 265150 }, { "epoch": 0.5282576820094351, "grad_norm": 0.18313288688659668, "learning_rate": 0.002, "loss": 2.5513, "step": 265160 }, { "epoch": 0.5282776042330741, "grad_norm": 0.21433372795581818, "learning_rate": 0.002, "loss": 2.5559, "step": 265170 }, { "epoch": 0.528297526456713, "grad_norm": 0.22513751685619354, "learning_rate": 0.002, "loss": 2.5499, "step": 265180 }, { "epoch": 0.5283174486803519, "grad_norm": 0.15544262528419495, "learning_rate": 0.002, "loss": 2.5523, "step": 265190 }, { "epoch": 0.5283373709039908, "grad_norm": 0.1659567952156067, "learning_rate": 0.002, "loss": 2.5601, "step": 265200 }, { "epoch": 0.5283572931276297, "grad_norm": 0.17326563596725464, "learning_rate": 0.002, "loss": 2.5459, "step": 265210 }, { "epoch": 0.5283772153512687, "grad_norm": 0.15327201783657074, "learning_rate": 0.002, "loss": 2.5471, "step": 265220 }, { "epoch": 0.5283971375749076, "grad_norm": 0.19706659018993378, "learning_rate": 0.002, "loss": 2.573, "step": 265230 }, { "epoch": 0.5284170597985465, "grad_norm": 0.16626335680484772, "learning_rate": 0.002, "loss": 2.55, "step": 265240 }, { "epoch": 0.5284369820221854, "grad_norm": 0.16876406967639923, "learning_rate": 0.002, "loss": 2.5663, "step": 265250 }, { "epoch": 0.5284569042458243, "grad_norm": 0.16291382908821106, "learning_rate": 0.002, "loss": 2.5334, "step": 265260 }, { "epoch": 0.5284768264694633, "grad_norm": 0.16461478173732758, "learning_rate": 0.002, "loss": 2.5654, "step": 265270 }, { "epoch": 0.5284967486931021, "grad_norm": 0.17945779860019684, "learning_rate": 0.002, "loss": 2.5671, "step": 265280 }, { "epoch": 0.528516670916741, "grad_norm": 0.1827707290649414, "learning_rate": 0.002, "loss": 2.5613, "step": 265290 }, { "epoch": 0.5285365931403799, "grad_norm": 0.1460282951593399, "learning_rate": 0.002, "loss": 2.5505, "step": 265300 }, { "epoch": 0.5285565153640188, "grad_norm": 0.14290660619735718, "learning_rate": 0.002, "loss": 2.5569, "step": 265310 }, { "epoch": 0.5285764375876578, "grad_norm": 0.22055509686470032, "learning_rate": 0.002, "loss": 2.568, "step": 265320 }, { "epoch": 0.5285963598112967, "grad_norm": 0.16672487556934357, "learning_rate": 0.002, "loss": 2.5572, "step": 265330 }, { "epoch": 0.5286162820349356, "grad_norm": 0.16287726163864136, "learning_rate": 0.002, "loss": 2.5501, "step": 265340 }, { "epoch": 0.5286362042585745, "grad_norm": 0.27051424980163574, "learning_rate": 0.002, "loss": 2.5637, "step": 265350 }, { "epoch": 0.5286561264822134, "grad_norm": 0.1436399221420288, "learning_rate": 0.002, "loss": 2.5557, "step": 265360 }, { "epoch": 0.5286760487058524, "grad_norm": 0.160737082362175, "learning_rate": 0.002, "loss": 2.547, "step": 265370 }, { "epoch": 0.5286959709294913, "grad_norm": 0.16153199970722198, "learning_rate": 0.002, "loss": 2.5644, "step": 265380 }, { "epoch": 0.5287158931531302, "grad_norm": 0.17167358100414276, "learning_rate": 0.002, "loss": 2.5563, "step": 265390 }, { "epoch": 0.5287358153767691, "grad_norm": 0.14766107499599457, "learning_rate": 0.002, "loss": 2.555, "step": 265400 }, { "epoch": 0.5287557376004081, "grad_norm": 0.1718912124633789, "learning_rate": 0.002, "loss": 2.5578, "step": 265410 }, { "epoch": 0.528775659824047, "grad_norm": 0.18302209675312042, "learning_rate": 0.002, "loss": 2.5545, "step": 265420 }, { "epoch": 0.5287955820476858, "grad_norm": 0.1932501643896103, "learning_rate": 0.002, "loss": 2.5523, "step": 265430 }, { "epoch": 0.5288155042713247, "grad_norm": 0.1660917103290558, "learning_rate": 0.002, "loss": 2.5538, "step": 265440 }, { "epoch": 0.5288354264949636, "grad_norm": 0.16414643824100494, "learning_rate": 0.002, "loss": 2.5724, "step": 265450 }, { "epoch": 0.5288553487186026, "grad_norm": 0.17663395404815674, "learning_rate": 0.002, "loss": 2.5566, "step": 265460 }, { "epoch": 0.5288752709422415, "grad_norm": 0.16511595249176025, "learning_rate": 0.002, "loss": 2.5547, "step": 265470 }, { "epoch": 0.5288951931658804, "grad_norm": 0.157500758767128, "learning_rate": 0.002, "loss": 2.5535, "step": 265480 }, { "epoch": 0.5289151153895193, "grad_norm": 0.17780905961990356, "learning_rate": 0.002, "loss": 2.5598, "step": 265490 }, { "epoch": 0.5289350376131582, "grad_norm": 0.17078828811645508, "learning_rate": 0.002, "loss": 2.5584, "step": 265500 }, { "epoch": 0.5289549598367972, "grad_norm": 0.1608666330575943, "learning_rate": 0.002, "loss": 2.5627, "step": 265510 }, { "epoch": 0.5289748820604361, "grad_norm": 0.1624837964773178, "learning_rate": 0.002, "loss": 2.5516, "step": 265520 }, { "epoch": 0.528994804284075, "grad_norm": 0.17295342683792114, "learning_rate": 0.002, "loss": 2.5542, "step": 265530 }, { "epoch": 0.5290147265077139, "grad_norm": 0.19649653136730194, "learning_rate": 0.002, "loss": 2.5521, "step": 265540 }, { "epoch": 0.5290346487313528, "grad_norm": 0.1781204491853714, "learning_rate": 0.002, "loss": 2.5547, "step": 265550 }, { "epoch": 0.5290545709549918, "grad_norm": 0.15700608491897583, "learning_rate": 0.002, "loss": 2.5461, "step": 265560 }, { "epoch": 0.5290744931786306, "grad_norm": 0.17604579031467438, "learning_rate": 0.002, "loss": 2.5521, "step": 265570 }, { "epoch": 0.5290944154022695, "grad_norm": 0.17983435094356537, "learning_rate": 0.002, "loss": 2.5613, "step": 265580 }, { "epoch": 0.5291143376259084, "grad_norm": 0.16856509447097778, "learning_rate": 0.002, "loss": 2.5539, "step": 265590 }, { "epoch": 0.5291342598495473, "grad_norm": 0.17831642925739288, "learning_rate": 0.002, "loss": 2.5567, "step": 265600 }, { "epoch": 0.5291541820731863, "grad_norm": 0.16291940212249756, "learning_rate": 0.002, "loss": 2.556, "step": 265610 }, { "epoch": 0.5291741042968252, "grad_norm": 0.18316465616226196, "learning_rate": 0.002, "loss": 2.5742, "step": 265620 }, { "epoch": 0.5291940265204641, "grad_norm": 0.18950577080249786, "learning_rate": 0.002, "loss": 2.5707, "step": 265630 }, { "epoch": 0.529213948744103, "grad_norm": 0.14724436402320862, "learning_rate": 0.002, "loss": 2.5514, "step": 265640 }, { "epoch": 0.5292338709677419, "grad_norm": 0.15036886930465698, "learning_rate": 0.002, "loss": 2.5659, "step": 265650 }, { "epoch": 0.5292537931913809, "grad_norm": 0.15930849313735962, "learning_rate": 0.002, "loss": 2.5562, "step": 265660 }, { "epoch": 0.5292737154150198, "grad_norm": 0.18839381635189056, "learning_rate": 0.002, "loss": 2.5629, "step": 265670 }, { "epoch": 0.5292936376386587, "grad_norm": 0.1958554983139038, "learning_rate": 0.002, "loss": 2.5606, "step": 265680 }, { "epoch": 0.5293135598622976, "grad_norm": 0.17657208442687988, "learning_rate": 0.002, "loss": 2.5648, "step": 265690 }, { "epoch": 0.5293334820859366, "grad_norm": 0.1716303676366806, "learning_rate": 0.002, "loss": 2.5685, "step": 265700 }, { "epoch": 0.5293534043095754, "grad_norm": 0.16711731255054474, "learning_rate": 0.002, "loss": 2.573, "step": 265710 }, { "epoch": 0.5293733265332143, "grad_norm": 0.1764456331729889, "learning_rate": 0.002, "loss": 2.5574, "step": 265720 }, { "epoch": 0.5293932487568532, "grad_norm": 0.1805829256772995, "learning_rate": 0.002, "loss": 2.5781, "step": 265730 }, { "epoch": 0.5294131709804921, "grad_norm": 0.17767871916294098, "learning_rate": 0.002, "loss": 2.569, "step": 265740 }, { "epoch": 0.5294330932041311, "grad_norm": 0.1907465010881424, "learning_rate": 0.002, "loss": 2.546, "step": 265750 }, { "epoch": 0.52945301542777, "grad_norm": 0.16157203912734985, "learning_rate": 0.002, "loss": 2.5657, "step": 265760 }, { "epoch": 0.5294729376514089, "grad_norm": 0.20852793753147125, "learning_rate": 0.002, "loss": 2.5572, "step": 265770 }, { "epoch": 0.5294928598750478, "grad_norm": 0.15807503461837769, "learning_rate": 0.002, "loss": 2.5669, "step": 265780 }, { "epoch": 0.5295127820986867, "grad_norm": 0.16823671758174896, "learning_rate": 0.002, "loss": 2.5568, "step": 265790 }, { "epoch": 0.5295327043223257, "grad_norm": 0.16170427203178406, "learning_rate": 0.002, "loss": 2.5533, "step": 265800 }, { "epoch": 0.5295526265459646, "grad_norm": 0.16436612606048584, "learning_rate": 0.002, "loss": 2.5733, "step": 265810 }, { "epoch": 0.5295725487696035, "grad_norm": 0.1821652501821518, "learning_rate": 0.002, "loss": 2.5592, "step": 265820 }, { "epoch": 0.5295924709932424, "grad_norm": 0.19213640689849854, "learning_rate": 0.002, "loss": 2.5621, "step": 265830 }, { "epoch": 0.5296123932168812, "grad_norm": 0.18425877392292023, "learning_rate": 0.002, "loss": 2.5591, "step": 265840 }, { "epoch": 0.5296323154405203, "grad_norm": 0.15858201682567596, "learning_rate": 0.002, "loss": 2.5719, "step": 265850 }, { "epoch": 0.5296522376641591, "grad_norm": 0.17807583510875702, "learning_rate": 0.002, "loss": 2.5521, "step": 265860 }, { "epoch": 0.529672159887798, "grad_norm": 0.1820925623178482, "learning_rate": 0.002, "loss": 2.5544, "step": 265870 }, { "epoch": 0.5296920821114369, "grad_norm": 0.21241873502731323, "learning_rate": 0.002, "loss": 2.5514, "step": 265880 }, { "epoch": 0.5297120043350758, "grad_norm": 0.16550670564174652, "learning_rate": 0.002, "loss": 2.5596, "step": 265890 }, { "epoch": 0.5297319265587148, "grad_norm": 0.17840731143951416, "learning_rate": 0.002, "loss": 2.5725, "step": 265900 }, { "epoch": 0.5297518487823537, "grad_norm": 0.16705752909183502, "learning_rate": 0.002, "loss": 2.5705, "step": 265910 }, { "epoch": 0.5297717710059926, "grad_norm": 0.15666908025741577, "learning_rate": 0.002, "loss": 2.5657, "step": 265920 }, { "epoch": 0.5297916932296315, "grad_norm": 0.16143731772899628, "learning_rate": 0.002, "loss": 2.563, "step": 265930 }, { "epoch": 0.5298116154532704, "grad_norm": 0.1675102412700653, "learning_rate": 0.002, "loss": 2.5583, "step": 265940 }, { "epoch": 0.5298315376769094, "grad_norm": 0.1765018254518509, "learning_rate": 0.002, "loss": 2.5615, "step": 265950 }, { "epoch": 0.5298514599005483, "grad_norm": 0.17353703081607819, "learning_rate": 0.002, "loss": 2.5683, "step": 265960 }, { "epoch": 0.5298713821241872, "grad_norm": 0.15917420387268066, "learning_rate": 0.002, "loss": 2.5638, "step": 265970 }, { "epoch": 0.529891304347826, "grad_norm": 0.18647095561027527, "learning_rate": 0.002, "loss": 2.5515, "step": 265980 }, { "epoch": 0.529911226571465, "grad_norm": 0.1478138566017151, "learning_rate": 0.002, "loss": 2.545, "step": 265990 }, { "epoch": 0.529931148795104, "grad_norm": 0.1630602329969406, "learning_rate": 0.002, "loss": 2.573, "step": 266000 }, { "epoch": 0.5299510710187428, "grad_norm": 0.1747208684682846, "learning_rate": 0.002, "loss": 2.5679, "step": 266010 }, { "epoch": 0.5299709932423817, "grad_norm": 0.18869853019714355, "learning_rate": 0.002, "loss": 2.5585, "step": 266020 }, { "epoch": 0.5299909154660206, "grad_norm": 0.1564585119485855, "learning_rate": 0.002, "loss": 2.5632, "step": 266030 }, { "epoch": 0.5300108376896596, "grad_norm": 0.1751343011856079, "learning_rate": 0.002, "loss": 2.5563, "step": 266040 }, { "epoch": 0.5300307599132985, "grad_norm": 0.16899874806404114, "learning_rate": 0.002, "loss": 2.5509, "step": 266050 }, { "epoch": 0.5300506821369374, "grad_norm": 0.15098822116851807, "learning_rate": 0.002, "loss": 2.5609, "step": 266060 }, { "epoch": 0.5300706043605763, "grad_norm": 0.17731523513793945, "learning_rate": 0.002, "loss": 2.5742, "step": 266070 }, { "epoch": 0.5300905265842152, "grad_norm": 0.15112586319446564, "learning_rate": 0.002, "loss": 2.5591, "step": 266080 }, { "epoch": 0.5301104488078542, "grad_norm": 0.1815967708826065, "learning_rate": 0.002, "loss": 2.5661, "step": 266090 }, { "epoch": 0.5301303710314931, "grad_norm": 0.17416216433048248, "learning_rate": 0.002, "loss": 2.5608, "step": 266100 }, { "epoch": 0.530150293255132, "grad_norm": 0.15436719357967377, "learning_rate": 0.002, "loss": 2.5586, "step": 266110 }, { "epoch": 0.5301702154787709, "grad_norm": 0.18562962114810944, "learning_rate": 0.002, "loss": 2.5552, "step": 266120 }, { "epoch": 0.5301901377024097, "grad_norm": 0.24679554998874664, "learning_rate": 0.002, "loss": 2.5707, "step": 266130 }, { "epoch": 0.5302100599260487, "grad_norm": 0.14319925010204315, "learning_rate": 0.002, "loss": 2.5635, "step": 266140 }, { "epoch": 0.5302299821496876, "grad_norm": 0.19948060810565948, "learning_rate": 0.002, "loss": 2.5618, "step": 266150 }, { "epoch": 0.5302499043733265, "grad_norm": 0.1588694155216217, "learning_rate": 0.002, "loss": 2.5626, "step": 266160 }, { "epoch": 0.5302698265969654, "grad_norm": 0.13639682531356812, "learning_rate": 0.002, "loss": 2.5578, "step": 266170 }, { "epoch": 0.5302897488206043, "grad_norm": 0.17641444504261017, "learning_rate": 0.002, "loss": 2.5621, "step": 266180 }, { "epoch": 0.5303096710442433, "grad_norm": 0.15414051711559296, "learning_rate": 0.002, "loss": 2.5637, "step": 266190 }, { "epoch": 0.5303295932678822, "grad_norm": 0.16848109662532806, "learning_rate": 0.002, "loss": 2.5538, "step": 266200 }, { "epoch": 0.5303495154915211, "grad_norm": 0.16339632868766785, "learning_rate": 0.002, "loss": 2.5473, "step": 266210 }, { "epoch": 0.53036943771516, "grad_norm": 0.19176575541496277, "learning_rate": 0.002, "loss": 2.5651, "step": 266220 }, { "epoch": 0.5303893599387989, "grad_norm": 0.19012151658535004, "learning_rate": 0.002, "loss": 2.5558, "step": 266230 }, { "epoch": 0.5304092821624379, "grad_norm": 0.1650204360485077, "learning_rate": 0.002, "loss": 2.5503, "step": 266240 }, { "epoch": 0.5304292043860768, "grad_norm": 0.1656396985054016, "learning_rate": 0.002, "loss": 2.5554, "step": 266250 }, { "epoch": 0.5304491266097157, "grad_norm": 0.16716749966144562, "learning_rate": 0.002, "loss": 2.5838, "step": 266260 }, { "epoch": 0.5304690488333546, "grad_norm": 0.17741471529006958, "learning_rate": 0.002, "loss": 2.5539, "step": 266270 }, { "epoch": 0.5304889710569936, "grad_norm": 0.16094015538692474, "learning_rate": 0.002, "loss": 2.5493, "step": 266280 }, { "epoch": 0.5305088932806324, "grad_norm": 0.16977828741073608, "learning_rate": 0.002, "loss": 2.5577, "step": 266290 }, { "epoch": 0.5305288155042713, "grad_norm": 0.15991920232772827, "learning_rate": 0.002, "loss": 2.5677, "step": 266300 }, { "epoch": 0.5305487377279102, "grad_norm": 0.16769813001155853, "learning_rate": 0.002, "loss": 2.5539, "step": 266310 }, { "epoch": 0.5305686599515491, "grad_norm": 0.25385698676109314, "learning_rate": 0.002, "loss": 2.5554, "step": 266320 }, { "epoch": 0.5305885821751881, "grad_norm": 0.17096741497516632, "learning_rate": 0.002, "loss": 2.565, "step": 266330 }, { "epoch": 0.530608504398827, "grad_norm": 0.14688162505626678, "learning_rate": 0.002, "loss": 2.5756, "step": 266340 }, { "epoch": 0.5306284266224659, "grad_norm": 0.16692158579826355, "learning_rate": 0.002, "loss": 2.5561, "step": 266350 }, { "epoch": 0.5306483488461048, "grad_norm": 0.17793583869934082, "learning_rate": 0.002, "loss": 2.5567, "step": 266360 }, { "epoch": 0.5306682710697437, "grad_norm": 0.1641545295715332, "learning_rate": 0.002, "loss": 2.551, "step": 266370 }, { "epoch": 0.5306881932933827, "grad_norm": 0.1713339388370514, "learning_rate": 0.002, "loss": 2.5793, "step": 266380 }, { "epoch": 0.5307081155170216, "grad_norm": 0.16720013320446014, "learning_rate": 0.002, "loss": 2.5494, "step": 266390 }, { "epoch": 0.5307280377406605, "grad_norm": 0.1620943397283554, "learning_rate": 0.002, "loss": 2.5527, "step": 266400 }, { "epoch": 0.5307479599642994, "grad_norm": 0.1506626456975937, "learning_rate": 0.002, "loss": 2.5456, "step": 266410 }, { "epoch": 0.5307678821879382, "grad_norm": 0.16587017476558685, "learning_rate": 0.002, "loss": 2.5426, "step": 266420 }, { "epoch": 0.5307878044115772, "grad_norm": 0.22524437308311462, "learning_rate": 0.002, "loss": 2.5434, "step": 266430 }, { "epoch": 0.5308077266352161, "grad_norm": 0.18716609477996826, "learning_rate": 0.002, "loss": 2.56, "step": 266440 }, { "epoch": 0.530827648858855, "grad_norm": 0.13435113430023193, "learning_rate": 0.002, "loss": 2.5595, "step": 266450 }, { "epoch": 0.5308475710824939, "grad_norm": 0.15033654868602753, "learning_rate": 0.002, "loss": 2.5587, "step": 266460 }, { "epoch": 0.5308674933061328, "grad_norm": 0.23186463117599487, "learning_rate": 0.002, "loss": 2.5647, "step": 266470 }, { "epoch": 0.5308874155297718, "grad_norm": 0.14876459538936615, "learning_rate": 0.002, "loss": 2.5417, "step": 266480 }, { "epoch": 0.5309073377534107, "grad_norm": 0.18172000348567963, "learning_rate": 0.002, "loss": 2.5602, "step": 266490 }, { "epoch": 0.5309272599770496, "grad_norm": 0.16364404559135437, "learning_rate": 0.002, "loss": 2.5557, "step": 266500 }, { "epoch": 0.5309471822006885, "grad_norm": 0.19817520678043365, "learning_rate": 0.002, "loss": 2.5547, "step": 266510 }, { "epoch": 0.5309671044243274, "grad_norm": 0.1865294724702835, "learning_rate": 0.002, "loss": 2.5584, "step": 266520 }, { "epoch": 0.5309870266479664, "grad_norm": 0.17954400181770325, "learning_rate": 0.002, "loss": 2.568, "step": 266530 }, { "epoch": 0.5310069488716053, "grad_norm": 0.20468224585056305, "learning_rate": 0.002, "loss": 2.5578, "step": 266540 }, { "epoch": 0.5310268710952442, "grad_norm": 0.47131896018981934, "learning_rate": 0.002, "loss": 2.5657, "step": 266550 }, { "epoch": 0.531046793318883, "grad_norm": 0.15381605923175812, "learning_rate": 0.002, "loss": 2.5495, "step": 266560 }, { "epoch": 0.5310667155425219, "grad_norm": 0.17092876136302948, "learning_rate": 0.002, "loss": 2.5537, "step": 266570 }, { "epoch": 0.5310866377661609, "grad_norm": 0.16095957159996033, "learning_rate": 0.002, "loss": 2.5549, "step": 266580 }, { "epoch": 0.5311065599897998, "grad_norm": 0.1606886237859726, "learning_rate": 0.002, "loss": 2.5566, "step": 266590 }, { "epoch": 0.5311264822134387, "grad_norm": 0.1629766821861267, "learning_rate": 0.002, "loss": 2.5547, "step": 266600 }, { "epoch": 0.5311464044370776, "grad_norm": 0.17549695074558258, "learning_rate": 0.002, "loss": 2.5738, "step": 266610 }, { "epoch": 0.5311663266607166, "grad_norm": 0.16957534849643707, "learning_rate": 0.002, "loss": 2.5575, "step": 266620 }, { "epoch": 0.5311862488843555, "grad_norm": 0.18198071420192719, "learning_rate": 0.002, "loss": 2.5365, "step": 266630 }, { "epoch": 0.5312061711079944, "grad_norm": 0.16908346116542816, "learning_rate": 0.002, "loss": 2.5396, "step": 266640 }, { "epoch": 0.5312260933316333, "grad_norm": 0.14650383591651917, "learning_rate": 0.002, "loss": 2.5456, "step": 266650 }, { "epoch": 0.5312460155552722, "grad_norm": 0.21058519184589386, "learning_rate": 0.002, "loss": 2.555, "step": 266660 }, { "epoch": 0.5312659377789112, "grad_norm": 0.16837570071220398, "learning_rate": 0.002, "loss": 2.5548, "step": 266670 }, { "epoch": 0.5312858600025501, "grad_norm": 0.1489066183567047, "learning_rate": 0.002, "loss": 2.5499, "step": 266680 }, { "epoch": 0.531305782226189, "grad_norm": 0.1715109646320343, "learning_rate": 0.002, "loss": 2.542, "step": 266690 }, { "epoch": 0.5313257044498279, "grad_norm": 0.15119639039039612, "learning_rate": 0.002, "loss": 2.5754, "step": 266700 }, { "epoch": 0.5313456266734667, "grad_norm": 0.15033312141895294, "learning_rate": 0.002, "loss": 2.5474, "step": 266710 }, { "epoch": 0.5313655488971057, "grad_norm": 0.16558951139450073, "learning_rate": 0.002, "loss": 2.5641, "step": 266720 }, { "epoch": 0.5313854711207446, "grad_norm": 0.1657649725675583, "learning_rate": 0.002, "loss": 2.5719, "step": 266730 }, { "epoch": 0.5314053933443835, "grad_norm": 0.14481331408023834, "learning_rate": 0.002, "loss": 2.5528, "step": 266740 }, { "epoch": 0.5314253155680224, "grad_norm": 0.16997405886650085, "learning_rate": 0.002, "loss": 2.5873, "step": 266750 }, { "epoch": 0.5314452377916613, "grad_norm": 0.17872916162014008, "learning_rate": 0.002, "loss": 2.5522, "step": 266760 }, { "epoch": 0.5314651600153003, "grad_norm": 0.19072934985160828, "learning_rate": 0.002, "loss": 2.5766, "step": 266770 }, { "epoch": 0.5314850822389392, "grad_norm": 0.15549792349338531, "learning_rate": 0.002, "loss": 2.5728, "step": 266780 }, { "epoch": 0.5315050044625781, "grad_norm": 0.15030759572982788, "learning_rate": 0.002, "loss": 2.5336, "step": 266790 }, { "epoch": 0.531524926686217, "grad_norm": 0.15469105541706085, "learning_rate": 0.002, "loss": 2.5503, "step": 266800 }, { "epoch": 0.5315448489098559, "grad_norm": 0.18016475439071655, "learning_rate": 0.002, "loss": 2.5519, "step": 266810 }, { "epoch": 0.5315647711334949, "grad_norm": 0.20891685783863068, "learning_rate": 0.002, "loss": 2.549, "step": 266820 }, { "epoch": 0.5315846933571338, "grad_norm": 0.1722947210073471, "learning_rate": 0.002, "loss": 2.5527, "step": 266830 }, { "epoch": 0.5316046155807727, "grad_norm": 0.1452954113483429, "learning_rate": 0.002, "loss": 2.5521, "step": 266840 }, { "epoch": 0.5316245378044115, "grad_norm": 0.18275342881679535, "learning_rate": 0.002, "loss": 2.5533, "step": 266850 }, { "epoch": 0.5316444600280504, "grad_norm": 0.17670734226703644, "learning_rate": 0.002, "loss": 2.5634, "step": 266860 }, { "epoch": 0.5316643822516894, "grad_norm": 0.16681891679763794, "learning_rate": 0.002, "loss": 2.5701, "step": 266870 }, { "epoch": 0.5316843044753283, "grad_norm": 0.16721627116203308, "learning_rate": 0.002, "loss": 2.5427, "step": 266880 }, { "epoch": 0.5317042266989672, "grad_norm": 0.17448776960372925, "learning_rate": 0.002, "loss": 2.5634, "step": 266890 }, { "epoch": 0.5317241489226061, "grad_norm": 0.1415223628282547, "learning_rate": 0.002, "loss": 2.5733, "step": 266900 }, { "epoch": 0.5317440711462451, "grad_norm": 0.18553611636161804, "learning_rate": 0.002, "loss": 2.5674, "step": 266910 }, { "epoch": 0.531763993369884, "grad_norm": 0.1911686509847641, "learning_rate": 0.002, "loss": 2.5567, "step": 266920 }, { "epoch": 0.5317839155935229, "grad_norm": 0.18434493243694305, "learning_rate": 0.002, "loss": 2.5547, "step": 266930 }, { "epoch": 0.5318038378171618, "grad_norm": 0.17517411708831787, "learning_rate": 0.002, "loss": 2.5439, "step": 266940 }, { "epoch": 0.5318237600408007, "grad_norm": 0.1582169085741043, "learning_rate": 0.002, "loss": 2.5499, "step": 266950 }, { "epoch": 0.5318436822644397, "grad_norm": 0.16102612018585205, "learning_rate": 0.002, "loss": 2.5508, "step": 266960 }, { "epoch": 0.5318636044880786, "grad_norm": 0.17914879322052002, "learning_rate": 0.002, "loss": 2.5515, "step": 266970 }, { "epoch": 0.5318835267117175, "grad_norm": 0.13471734523773193, "learning_rate": 0.002, "loss": 2.5568, "step": 266980 }, { "epoch": 0.5319034489353563, "grad_norm": 0.1825190633535385, "learning_rate": 0.002, "loss": 2.5546, "step": 266990 }, { "epoch": 0.5319233711589952, "grad_norm": 0.16615775227546692, "learning_rate": 0.002, "loss": 2.5551, "step": 267000 }, { "epoch": 0.5319432933826342, "grad_norm": 0.1963341385126114, "learning_rate": 0.002, "loss": 2.5466, "step": 267010 }, { "epoch": 0.5319632156062731, "grad_norm": 0.13718941807746887, "learning_rate": 0.002, "loss": 2.5593, "step": 267020 }, { "epoch": 0.531983137829912, "grad_norm": 0.17126691341400146, "learning_rate": 0.002, "loss": 2.5549, "step": 267030 }, { "epoch": 0.5320030600535509, "grad_norm": 0.1594059318304062, "learning_rate": 0.002, "loss": 2.5556, "step": 267040 }, { "epoch": 0.5320229822771898, "grad_norm": 0.16394314169883728, "learning_rate": 0.002, "loss": 2.5393, "step": 267050 }, { "epoch": 0.5320429045008288, "grad_norm": 0.15142135322093964, "learning_rate": 0.002, "loss": 2.5572, "step": 267060 }, { "epoch": 0.5320628267244677, "grad_norm": 0.14970585703849792, "learning_rate": 0.002, "loss": 2.5641, "step": 267070 }, { "epoch": 0.5320827489481066, "grad_norm": 0.1545906960964203, "learning_rate": 0.002, "loss": 2.5511, "step": 267080 }, { "epoch": 0.5321026711717455, "grad_norm": 0.16554152965545654, "learning_rate": 0.002, "loss": 2.5782, "step": 267090 }, { "epoch": 0.5321225933953844, "grad_norm": 0.2054668664932251, "learning_rate": 0.002, "loss": 2.5598, "step": 267100 }, { "epoch": 0.5321425156190234, "grad_norm": 0.13654273748397827, "learning_rate": 0.002, "loss": 2.5542, "step": 267110 }, { "epoch": 0.5321624378426623, "grad_norm": 0.17207197844982147, "learning_rate": 0.002, "loss": 2.5643, "step": 267120 }, { "epoch": 0.5321823600663012, "grad_norm": 0.1742292046546936, "learning_rate": 0.002, "loss": 2.5511, "step": 267130 }, { "epoch": 0.53220228228994, "grad_norm": 0.18275438249111176, "learning_rate": 0.002, "loss": 2.5504, "step": 267140 }, { "epoch": 0.5322222045135789, "grad_norm": 0.19479407370090485, "learning_rate": 0.002, "loss": 2.5613, "step": 267150 }, { "epoch": 0.5322421267372179, "grad_norm": 0.13235312700271606, "learning_rate": 0.002, "loss": 2.5453, "step": 267160 }, { "epoch": 0.5322620489608568, "grad_norm": 0.1907494068145752, "learning_rate": 0.002, "loss": 2.5652, "step": 267170 }, { "epoch": 0.5322819711844957, "grad_norm": 0.14107166230678558, "learning_rate": 0.002, "loss": 2.5601, "step": 267180 }, { "epoch": 0.5323018934081346, "grad_norm": 0.15405365824699402, "learning_rate": 0.002, "loss": 2.5675, "step": 267190 }, { "epoch": 0.5323218156317736, "grad_norm": 0.1872818022966385, "learning_rate": 0.002, "loss": 2.5542, "step": 267200 }, { "epoch": 0.5323417378554125, "grad_norm": 0.1566101312637329, "learning_rate": 0.002, "loss": 2.5483, "step": 267210 }, { "epoch": 0.5323616600790514, "grad_norm": 0.15131552517414093, "learning_rate": 0.002, "loss": 2.5577, "step": 267220 }, { "epoch": 0.5323815823026903, "grad_norm": 0.1814264953136444, "learning_rate": 0.002, "loss": 2.5671, "step": 267230 }, { "epoch": 0.5324015045263292, "grad_norm": 0.15010906755924225, "learning_rate": 0.002, "loss": 2.5629, "step": 267240 }, { "epoch": 0.5324214267499682, "grad_norm": 0.1804637461900711, "learning_rate": 0.002, "loss": 2.5638, "step": 267250 }, { "epoch": 0.5324413489736071, "grad_norm": 0.17326384782791138, "learning_rate": 0.002, "loss": 2.5669, "step": 267260 }, { "epoch": 0.532461271197246, "grad_norm": 0.17061671614646912, "learning_rate": 0.002, "loss": 2.5515, "step": 267270 }, { "epoch": 0.5324811934208848, "grad_norm": 0.18378545343875885, "learning_rate": 0.002, "loss": 2.554, "step": 267280 }, { "epoch": 0.5325011156445237, "grad_norm": 0.24553827941417694, "learning_rate": 0.002, "loss": 2.5542, "step": 267290 }, { "epoch": 0.5325210378681627, "grad_norm": 0.18769754469394684, "learning_rate": 0.002, "loss": 2.5467, "step": 267300 }, { "epoch": 0.5325409600918016, "grad_norm": 0.20717613399028778, "learning_rate": 0.002, "loss": 2.5657, "step": 267310 }, { "epoch": 0.5325608823154405, "grad_norm": 0.15484051406383514, "learning_rate": 0.002, "loss": 2.545, "step": 267320 }, { "epoch": 0.5325808045390794, "grad_norm": 0.1727205365896225, "learning_rate": 0.002, "loss": 2.5702, "step": 267330 }, { "epoch": 0.5326007267627183, "grad_norm": 0.15613119304180145, "learning_rate": 0.002, "loss": 2.5578, "step": 267340 }, { "epoch": 0.5326206489863573, "grad_norm": 0.15949726104736328, "learning_rate": 0.002, "loss": 2.5429, "step": 267350 }, { "epoch": 0.5326405712099962, "grad_norm": 0.15404075384140015, "learning_rate": 0.002, "loss": 2.5563, "step": 267360 }, { "epoch": 0.5326604934336351, "grad_norm": 0.1627895087003708, "learning_rate": 0.002, "loss": 2.5539, "step": 267370 }, { "epoch": 0.532680415657274, "grad_norm": 0.1713736355304718, "learning_rate": 0.002, "loss": 2.5642, "step": 267380 }, { "epoch": 0.5327003378809129, "grad_norm": 0.13633957505226135, "learning_rate": 0.002, "loss": 2.5665, "step": 267390 }, { "epoch": 0.5327202601045519, "grad_norm": 0.1815475970506668, "learning_rate": 0.002, "loss": 2.556, "step": 267400 }, { "epoch": 0.5327401823281908, "grad_norm": 0.18998348712921143, "learning_rate": 0.002, "loss": 2.5501, "step": 267410 }, { "epoch": 0.5327601045518296, "grad_norm": 0.17467044293880463, "learning_rate": 0.002, "loss": 2.5548, "step": 267420 }, { "epoch": 0.5327800267754685, "grad_norm": 0.2296411395072937, "learning_rate": 0.002, "loss": 2.5559, "step": 267430 }, { "epoch": 0.5327999489991074, "grad_norm": 0.1582714021205902, "learning_rate": 0.002, "loss": 2.5518, "step": 267440 }, { "epoch": 0.5328198712227464, "grad_norm": 0.1676693558692932, "learning_rate": 0.002, "loss": 2.5644, "step": 267450 }, { "epoch": 0.5328397934463853, "grad_norm": 0.20807184278964996, "learning_rate": 0.002, "loss": 2.5657, "step": 267460 }, { "epoch": 0.5328597156700242, "grad_norm": 0.16092874109745026, "learning_rate": 0.002, "loss": 2.564, "step": 267470 }, { "epoch": 0.5328796378936631, "grad_norm": 0.17460574209690094, "learning_rate": 0.002, "loss": 2.5586, "step": 267480 }, { "epoch": 0.5328995601173021, "grad_norm": 0.14670780301094055, "learning_rate": 0.002, "loss": 2.5569, "step": 267490 }, { "epoch": 0.532919482340941, "grad_norm": 0.17070333659648895, "learning_rate": 0.002, "loss": 2.5497, "step": 267500 }, { "epoch": 0.5329394045645799, "grad_norm": 0.16085372865200043, "learning_rate": 0.002, "loss": 2.5623, "step": 267510 }, { "epoch": 0.5329593267882188, "grad_norm": 0.1676873415708542, "learning_rate": 0.002, "loss": 2.5727, "step": 267520 }, { "epoch": 0.5329792490118577, "grad_norm": 0.1640467345714569, "learning_rate": 0.002, "loss": 2.5604, "step": 267530 }, { "epoch": 0.5329991712354967, "grad_norm": 0.17106211185455322, "learning_rate": 0.002, "loss": 2.5429, "step": 267540 }, { "epoch": 0.5330190934591356, "grad_norm": 0.17542403936386108, "learning_rate": 0.002, "loss": 2.5546, "step": 267550 }, { "epoch": 0.5330390156827745, "grad_norm": 0.16768726706504822, "learning_rate": 0.002, "loss": 2.5438, "step": 267560 }, { "epoch": 0.5330589379064133, "grad_norm": 0.18183881044387817, "learning_rate": 0.002, "loss": 2.5622, "step": 267570 }, { "epoch": 0.5330788601300522, "grad_norm": 0.1650552898645401, "learning_rate": 0.002, "loss": 2.5493, "step": 267580 }, { "epoch": 0.5330987823536912, "grad_norm": 0.17685599625110626, "learning_rate": 0.002, "loss": 2.5593, "step": 267590 }, { "epoch": 0.5331187045773301, "grad_norm": 0.14713232219219208, "learning_rate": 0.002, "loss": 2.5551, "step": 267600 }, { "epoch": 0.533138626800969, "grad_norm": 0.17273080348968506, "learning_rate": 0.002, "loss": 2.5579, "step": 267610 }, { "epoch": 0.5331585490246079, "grad_norm": 0.20476095378398895, "learning_rate": 0.002, "loss": 2.5749, "step": 267620 }, { "epoch": 0.5331784712482468, "grad_norm": 0.1594853401184082, "learning_rate": 0.002, "loss": 2.5462, "step": 267630 }, { "epoch": 0.5331983934718858, "grad_norm": 0.17081768810749054, "learning_rate": 0.002, "loss": 2.5606, "step": 267640 }, { "epoch": 0.5332183156955247, "grad_norm": 0.18426662683486938, "learning_rate": 0.002, "loss": 2.5576, "step": 267650 }, { "epoch": 0.5332382379191636, "grad_norm": 0.17862123250961304, "learning_rate": 0.002, "loss": 2.5665, "step": 267660 }, { "epoch": 0.5332581601428025, "grad_norm": 0.1996041089296341, "learning_rate": 0.002, "loss": 2.5447, "step": 267670 }, { "epoch": 0.5332780823664414, "grad_norm": 0.1637120544910431, "learning_rate": 0.002, "loss": 2.5479, "step": 267680 }, { "epoch": 0.5332980045900804, "grad_norm": 0.1803530901670456, "learning_rate": 0.002, "loss": 2.5557, "step": 267690 }, { "epoch": 0.5333179268137193, "grad_norm": 0.16133619844913483, "learning_rate": 0.002, "loss": 2.5563, "step": 267700 }, { "epoch": 0.5333378490373581, "grad_norm": 0.15803347527980804, "learning_rate": 0.002, "loss": 2.5539, "step": 267710 }, { "epoch": 0.533357771260997, "grad_norm": 0.18324501812458038, "learning_rate": 0.002, "loss": 2.5565, "step": 267720 }, { "epoch": 0.5333776934846359, "grad_norm": 0.1482919305562973, "learning_rate": 0.002, "loss": 2.5435, "step": 267730 }, { "epoch": 0.5333976157082749, "grad_norm": 0.17077934741973877, "learning_rate": 0.002, "loss": 2.5469, "step": 267740 }, { "epoch": 0.5334175379319138, "grad_norm": 0.17910271883010864, "learning_rate": 0.002, "loss": 2.5553, "step": 267750 }, { "epoch": 0.5334374601555527, "grad_norm": 0.16442295908927917, "learning_rate": 0.002, "loss": 2.5514, "step": 267760 }, { "epoch": 0.5334573823791916, "grad_norm": 0.1731949895620346, "learning_rate": 0.002, "loss": 2.5575, "step": 267770 }, { "epoch": 0.5334773046028306, "grad_norm": 0.16010205447673798, "learning_rate": 0.002, "loss": 2.5612, "step": 267780 }, { "epoch": 0.5334972268264695, "grad_norm": 0.15655286610126495, "learning_rate": 0.002, "loss": 2.5352, "step": 267790 }, { "epoch": 0.5335171490501084, "grad_norm": 0.14886578917503357, "learning_rate": 0.002, "loss": 2.5689, "step": 267800 }, { "epoch": 0.5335370712737473, "grad_norm": 0.184043288230896, "learning_rate": 0.002, "loss": 2.5667, "step": 267810 }, { "epoch": 0.5335569934973862, "grad_norm": 0.23797617852687836, "learning_rate": 0.002, "loss": 2.546, "step": 267820 }, { "epoch": 0.5335769157210252, "grad_norm": 0.16052676737308502, "learning_rate": 0.002, "loss": 2.5724, "step": 267830 }, { "epoch": 0.5335968379446641, "grad_norm": 0.14711256325244904, "learning_rate": 0.002, "loss": 2.5597, "step": 267840 }, { "epoch": 0.533616760168303, "grad_norm": 0.187986359000206, "learning_rate": 0.002, "loss": 2.5463, "step": 267850 }, { "epoch": 0.5336366823919418, "grad_norm": 0.2307402789592743, "learning_rate": 0.002, "loss": 2.5742, "step": 267860 }, { "epoch": 0.5336566046155807, "grad_norm": 0.15979252755641937, "learning_rate": 0.002, "loss": 2.5662, "step": 267870 }, { "epoch": 0.5336765268392197, "grad_norm": 0.1555924266576767, "learning_rate": 0.002, "loss": 2.5512, "step": 267880 }, { "epoch": 0.5336964490628586, "grad_norm": 0.15657740831375122, "learning_rate": 0.002, "loss": 2.5525, "step": 267890 }, { "epoch": 0.5337163712864975, "grad_norm": 0.16193751990795135, "learning_rate": 0.002, "loss": 2.5599, "step": 267900 }, { "epoch": 0.5337362935101364, "grad_norm": 0.17594486474990845, "learning_rate": 0.002, "loss": 2.559, "step": 267910 }, { "epoch": 0.5337562157337753, "grad_norm": 0.16477233171463013, "learning_rate": 0.002, "loss": 2.5665, "step": 267920 }, { "epoch": 0.5337761379574143, "grad_norm": 0.17394497990608215, "learning_rate": 0.002, "loss": 2.5516, "step": 267930 }, { "epoch": 0.5337960601810532, "grad_norm": 0.1486462950706482, "learning_rate": 0.002, "loss": 2.5546, "step": 267940 }, { "epoch": 0.5338159824046921, "grad_norm": 0.17326396703720093, "learning_rate": 0.002, "loss": 2.5604, "step": 267950 }, { "epoch": 0.533835904628331, "grad_norm": 0.137660413980484, "learning_rate": 0.002, "loss": 2.5675, "step": 267960 }, { "epoch": 0.5338558268519699, "grad_norm": 0.21968597173690796, "learning_rate": 0.002, "loss": 2.5645, "step": 267970 }, { "epoch": 0.5338757490756089, "grad_norm": 0.17996864020824432, "learning_rate": 0.002, "loss": 2.5654, "step": 267980 }, { "epoch": 0.5338956712992478, "grad_norm": 0.1679486185312271, "learning_rate": 0.002, "loss": 2.5514, "step": 267990 }, { "epoch": 0.5339155935228866, "grad_norm": 0.17419160902500153, "learning_rate": 0.002, "loss": 2.5722, "step": 268000 }, { "epoch": 0.5339355157465255, "grad_norm": 0.15629613399505615, "learning_rate": 0.002, "loss": 2.5398, "step": 268010 }, { "epoch": 0.5339554379701644, "grad_norm": 0.15457086265087128, "learning_rate": 0.002, "loss": 2.555, "step": 268020 }, { "epoch": 0.5339753601938034, "grad_norm": 0.15169550478458405, "learning_rate": 0.002, "loss": 2.5641, "step": 268030 }, { "epoch": 0.5339952824174423, "grad_norm": 0.18509279191493988, "learning_rate": 0.002, "loss": 2.5539, "step": 268040 }, { "epoch": 0.5340152046410812, "grad_norm": 0.16060055792331696, "learning_rate": 0.002, "loss": 2.5572, "step": 268050 }, { "epoch": 0.5340351268647201, "grad_norm": 0.18329764902591705, "learning_rate": 0.002, "loss": 2.5693, "step": 268060 }, { "epoch": 0.5340550490883591, "grad_norm": 0.1688721776008606, "learning_rate": 0.002, "loss": 2.5427, "step": 268070 }, { "epoch": 0.534074971311998, "grad_norm": 0.1797836720943451, "learning_rate": 0.002, "loss": 2.5624, "step": 268080 }, { "epoch": 0.5340948935356369, "grad_norm": 0.15820525586605072, "learning_rate": 0.002, "loss": 2.5528, "step": 268090 }, { "epoch": 0.5341148157592758, "grad_norm": 0.16539038717746735, "learning_rate": 0.002, "loss": 2.5376, "step": 268100 }, { "epoch": 0.5341347379829147, "grad_norm": 0.1588105708360672, "learning_rate": 0.002, "loss": 2.5682, "step": 268110 }, { "epoch": 0.5341546602065537, "grad_norm": 0.14210781455039978, "learning_rate": 0.002, "loss": 2.5535, "step": 268120 }, { "epoch": 0.5341745824301926, "grad_norm": 0.2143462896347046, "learning_rate": 0.002, "loss": 2.5487, "step": 268130 }, { "epoch": 0.5341945046538314, "grad_norm": 0.1487106829881668, "learning_rate": 0.002, "loss": 2.5632, "step": 268140 }, { "epoch": 0.5342144268774703, "grad_norm": 0.1958862841129303, "learning_rate": 0.002, "loss": 2.5587, "step": 268150 }, { "epoch": 0.5342343491011092, "grad_norm": 0.17283210158348083, "learning_rate": 0.002, "loss": 2.5703, "step": 268160 }, { "epoch": 0.5342542713247482, "grad_norm": 0.19012784957885742, "learning_rate": 0.002, "loss": 2.5568, "step": 268170 }, { "epoch": 0.5342741935483871, "grad_norm": 0.15656104683876038, "learning_rate": 0.002, "loss": 2.5534, "step": 268180 }, { "epoch": 0.534294115772026, "grad_norm": 0.15458743274211884, "learning_rate": 0.002, "loss": 2.5695, "step": 268190 }, { "epoch": 0.5343140379956649, "grad_norm": 0.17232026159763336, "learning_rate": 0.002, "loss": 2.5566, "step": 268200 }, { "epoch": 0.5343339602193038, "grad_norm": 0.14679467678070068, "learning_rate": 0.002, "loss": 2.5547, "step": 268210 }, { "epoch": 0.5343538824429428, "grad_norm": 0.16296517848968506, "learning_rate": 0.002, "loss": 2.5468, "step": 268220 }, { "epoch": 0.5343738046665817, "grad_norm": 0.20174652338027954, "learning_rate": 0.002, "loss": 2.5396, "step": 268230 }, { "epoch": 0.5343937268902206, "grad_norm": 0.1644945591688156, "learning_rate": 0.002, "loss": 2.5472, "step": 268240 }, { "epoch": 0.5344136491138595, "grad_norm": 0.15627309679985046, "learning_rate": 0.002, "loss": 2.5499, "step": 268250 }, { "epoch": 0.5344335713374984, "grad_norm": 0.19454045593738556, "learning_rate": 0.002, "loss": 2.549, "step": 268260 }, { "epoch": 0.5344534935611374, "grad_norm": 0.15132123231887817, "learning_rate": 0.002, "loss": 2.5638, "step": 268270 }, { "epoch": 0.5344734157847763, "grad_norm": 0.182651087641716, "learning_rate": 0.002, "loss": 2.5469, "step": 268280 }, { "epoch": 0.5344933380084151, "grad_norm": 0.17573082447052002, "learning_rate": 0.002, "loss": 2.5468, "step": 268290 }, { "epoch": 0.534513260232054, "grad_norm": 0.17994269728660583, "learning_rate": 0.002, "loss": 2.5677, "step": 268300 }, { "epoch": 0.5345331824556929, "grad_norm": 0.1727401614189148, "learning_rate": 0.002, "loss": 2.5752, "step": 268310 }, { "epoch": 0.5345531046793319, "grad_norm": 0.18649089336395264, "learning_rate": 0.002, "loss": 2.5609, "step": 268320 }, { "epoch": 0.5345730269029708, "grad_norm": 0.1535881757736206, "learning_rate": 0.002, "loss": 2.5466, "step": 268330 }, { "epoch": 0.5345929491266097, "grad_norm": 0.16327403485774994, "learning_rate": 0.002, "loss": 2.5637, "step": 268340 }, { "epoch": 0.5346128713502486, "grad_norm": 0.163380429148674, "learning_rate": 0.002, "loss": 2.5642, "step": 268350 }, { "epoch": 0.5346327935738875, "grad_norm": 0.17522449791431427, "learning_rate": 0.002, "loss": 2.5613, "step": 268360 }, { "epoch": 0.5346527157975265, "grad_norm": 0.1493699997663498, "learning_rate": 0.002, "loss": 2.5554, "step": 268370 }, { "epoch": 0.5346726380211654, "grad_norm": 0.15452994406223297, "learning_rate": 0.002, "loss": 2.5543, "step": 268380 }, { "epoch": 0.5346925602448043, "grad_norm": 0.16220857203006744, "learning_rate": 0.002, "loss": 2.552, "step": 268390 }, { "epoch": 0.5347124824684432, "grad_norm": 0.1575555056333542, "learning_rate": 0.002, "loss": 2.5536, "step": 268400 }, { "epoch": 0.5347324046920822, "grad_norm": 0.17188091576099396, "learning_rate": 0.002, "loss": 2.5348, "step": 268410 }, { "epoch": 0.534752326915721, "grad_norm": 0.1442834883928299, "learning_rate": 0.002, "loss": 2.5524, "step": 268420 }, { "epoch": 0.53477224913936, "grad_norm": 0.16092142462730408, "learning_rate": 0.002, "loss": 2.549, "step": 268430 }, { "epoch": 0.5347921713629988, "grad_norm": 0.19061876833438873, "learning_rate": 0.002, "loss": 2.5605, "step": 268440 }, { "epoch": 0.5348120935866377, "grad_norm": 0.140468031167984, "learning_rate": 0.002, "loss": 2.5626, "step": 268450 }, { "epoch": 0.5348320158102767, "grad_norm": 0.1832885891199112, "learning_rate": 0.002, "loss": 2.5555, "step": 268460 }, { "epoch": 0.5348519380339156, "grad_norm": 0.17942661046981812, "learning_rate": 0.002, "loss": 2.5443, "step": 268470 }, { "epoch": 0.5348718602575545, "grad_norm": 0.1797783225774765, "learning_rate": 0.002, "loss": 2.5564, "step": 268480 }, { "epoch": 0.5348917824811934, "grad_norm": 0.1795225441455841, "learning_rate": 0.002, "loss": 2.5611, "step": 268490 }, { "epoch": 0.5349117047048323, "grad_norm": 0.1697845160961151, "learning_rate": 0.002, "loss": 2.5578, "step": 268500 }, { "epoch": 0.5349316269284713, "grad_norm": 0.16928187012672424, "learning_rate": 0.002, "loss": 2.5599, "step": 268510 }, { "epoch": 0.5349515491521102, "grad_norm": 0.15416714549064636, "learning_rate": 0.002, "loss": 2.5472, "step": 268520 }, { "epoch": 0.5349714713757491, "grad_norm": 0.17487560212612152, "learning_rate": 0.002, "loss": 2.5535, "step": 268530 }, { "epoch": 0.534991393599388, "grad_norm": 0.2203027606010437, "learning_rate": 0.002, "loss": 2.5638, "step": 268540 }, { "epoch": 0.5350113158230269, "grad_norm": 0.16204456984996796, "learning_rate": 0.002, "loss": 2.5554, "step": 268550 }, { "epoch": 0.5350312380466659, "grad_norm": 0.17012850940227509, "learning_rate": 0.002, "loss": 2.553, "step": 268560 }, { "epoch": 0.5350511602703047, "grad_norm": 0.15783195197582245, "learning_rate": 0.002, "loss": 2.561, "step": 268570 }, { "epoch": 0.5350710824939436, "grad_norm": 0.17157740890979767, "learning_rate": 0.002, "loss": 2.5676, "step": 268580 }, { "epoch": 0.5350910047175825, "grad_norm": 0.1752062737941742, "learning_rate": 0.002, "loss": 2.537, "step": 268590 }, { "epoch": 0.5351109269412214, "grad_norm": 0.75893235206604, "learning_rate": 0.002, "loss": 2.5506, "step": 268600 }, { "epoch": 0.5351308491648604, "grad_norm": 0.2096950262784958, "learning_rate": 0.002, "loss": 2.5522, "step": 268610 }, { "epoch": 0.5351507713884993, "grad_norm": 0.16695500910282135, "learning_rate": 0.002, "loss": 2.5539, "step": 268620 }, { "epoch": 0.5351706936121382, "grad_norm": 0.17481915652751923, "learning_rate": 0.002, "loss": 2.5598, "step": 268630 }, { "epoch": 0.5351906158357771, "grad_norm": 0.14191819727420807, "learning_rate": 0.002, "loss": 2.561, "step": 268640 }, { "epoch": 0.535210538059416, "grad_norm": 0.17906154692173004, "learning_rate": 0.002, "loss": 2.5594, "step": 268650 }, { "epoch": 0.535230460283055, "grad_norm": 0.1825815886259079, "learning_rate": 0.002, "loss": 2.5677, "step": 268660 }, { "epoch": 0.5352503825066939, "grad_norm": 0.14483262598514557, "learning_rate": 0.002, "loss": 2.5559, "step": 268670 }, { "epoch": 0.5352703047303328, "grad_norm": 0.18110232055187225, "learning_rate": 0.002, "loss": 2.5623, "step": 268680 }, { "epoch": 0.5352902269539717, "grad_norm": 0.15552088618278503, "learning_rate": 0.002, "loss": 2.555, "step": 268690 }, { "epoch": 0.5353101491776107, "grad_norm": 0.1700730323791504, "learning_rate": 0.002, "loss": 2.5633, "step": 268700 }, { "epoch": 0.5353300714012496, "grad_norm": 0.14611923694610596, "learning_rate": 0.002, "loss": 2.56, "step": 268710 }, { "epoch": 0.5353499936248884, "grad_norm": 0.17306947708129883, "learning_rate": 0.002, "loss": 2.5667, "step": 268720 }, { "epoch": 0.5353699158485273, "grad_norm": 0.16617770493030548, "learning_rate": 0.002, "loss": 2.5538, "step": 268730 }, { "epoch": 0.5353898380721662, "grad_norm": 0.15864744782447815, "learning_rate": 0.002, "loss": 2.5625, "step": 268740 }, { "epoch": 0.5354097602958052, "grad_norm": 0.16886332631111145, "learning_rate": 0.002, "loss": 2.5419, "step": 268750 }, { "epoch": 0.5354296825194441, "grad_norm": 0.1537378579378128, "learning_rate": 0.002, "loss": 2.5658, "step": 268760 }, { "epoch": 0.535449604743083, "grad_norm": 0.19006063044071198, "learning_rate": 0.002, "loss": 2.5512, "step": 268770 }, { "epoch": 0.5354695269667219, "grad_norm": 0.18267863988876343, "learning_rate": 0.002, "loss": 2.5429, "step": 268780 }, { "epoch": 0.5354894491903608, "grad_norm": 0.18892498314380646, "learning_rate": 0.002, "loss": 2.5636, "step": 268790 }, { "epoch": 0.5355093714139998, "grad_norm": 0.16711799800395966, "learning_rate": 0.002, "loss": 2.5673, "step": 268800 }, { "epoch": 0.5355292936376387, "grad_norm": 0.18263080716133118, "learning_rate": 0.002, "loss": 2.5664, "step": 268810 }, { "epoch": 0.5355492158612776, "grad_norm": 0.19396735727787018, "learning_rate": 0.002, "loss": 2.5521, "step": 268820 }, { "epoch": 0.5355691380849165, "grad_norm": 0.1818930208683014, "learning_rate": 0.002, "loss": 2.5582, "step": 268830 }, { "epoch": 0.5355890603085554, "grad_norm": 0.16393089294433594, "learning_rate": 0.002, "loss": 2.5633, "step": 268840 }, { "epoch": 0.5356089825321944, "grad_norm": 0.1567041128873825, "learning_rate": 0.002, "loss": 2.5664, "step": 268850 }, { "epoch": 0.5356289047558332, "grad_norm": 0.19402925670146942, "learning_rate": 0.002, "loss": 2.5436, "step": 268860 }, { "epoch": 0.5356488269794721, "grad_norm": 0.15676309168338776, "learning_rate": 0.002, "loss": 2.5458, "step": 268870 }, { "epoch": 0.535668749203111, "grad_norm": 0.17579543590545654, "learning_rate": 0.002, "loss": 2.5713, "step": 268880 }, { "epoch": 0.5356886714267499, "grad_norm": 0.17845706641674042, "learning_rate": 0.002, "loss": 2.5471, "step": 268890 }, { "epoch": 0.5357085936503889, "grad_norm": 0.15789204835891724, "learning_rate": 0.002, "loss": 2.555, "step": 268900 }, { "epoch": 0.5357285158740278, "grad_norm": 0.15396922826766968, "learning_rate": 0.002, "loss": 2.5608, "step": 268910 }, { "epoch": 0.5357484380976667, "grad_norm": 0.2103840708732605, "learning_rate": 0.002, "loss": 2.575, "step": 268920 }, { "epoch": 0.5357683603213056, "grad_norm": 0.18315699696540833, "learning_rate": 0.002, "loss": 2.5588, "step": 268930 }, { "epoch": 0.5357882825449445, "grad_norm": 0.22650042176246643, "learning_rate": 0.002, "loss": 2.5613, "step": 268940 }, { "epoch": 0.5358082047685835, "grad_norm": 0.14907586574554443, "learning_rate": 0.002, "loss": 2.5658, "step": 268950 }, { "epoch": 0.5358281269922224, "grad_norm": 0.1585233360528946, "learning_rate": 0.002, "loss": 2.5679, "step": 268960 }, { "epoch": 0.5358480492158613, "grad_norm": 0.19441740214824677, "learning_rate": 0.002, "loss": 2.5626, "step": 268970 }, { "epoch": 0.5358679714395002, "grad_norm": 0.15908251702785492, "learning_rate": 0.002, "loss": 2.5573, "step": 268980 }, { "epoch": 0.5358878936631392, "grad_norm": 0.19573520123958588, "learning_rate": 0.002, "loss": 2.5441, "step": 268990 }, { "epoch": 0.535907815886778, "grad_norm": 0.1638989895582199, "learning_rate": 0.002, "loss": 2.5363, "step": 269000 }, { "epoch": 0.5359277381104169, "grad_norm": 0.17658613622188568, "learning_rate": 0.002, "loss": 2.5632, "step": 269010 }, { "epoch": 0.5359476603340558, "grad_norm": 0.15309536457061768, "learning_rate": 0.002, "loss": 2.5506, "step": 269020 }, { "epoch": 0.5359675825576947, "grad_norm": 0.3045200705528259, "learning_rate": 0.002, "loss": 2.5372, "step": 269030 }, { "epoch": 0.5359875047813337, "grad_norm": 0.16897010803222656, "learning_rate": 0.002, "loss": 2.5575, "step": 269040 }, { "epoch": 0.5360074270049726, "grad_norm": 0.17383526265621185, "learning_rate": 0.002, "loss": 2.5717, "step": 269050 }, { "epoch": 0.5360273492286115, "grad_norm": 0.16147726774215698, "learning_rate": 0.002, "loss": 2.5536, "step": 269060 }, { "epoch": 0.5360472714522504, "grad_norm": 0.16553246974945068, "learning_rate": 0.002, "loss": 2.5687, "step": 269070 }, { "epoch": 0.5360671936758893, "grad_norm": 0.20055201649665833, "learning_rate": 0.002, "loss": 2.562, "step": 269080 }, { "epoch": 0.5360871158995283, "grad_norm": 0.21399566531181335, "learning_rate": 0.002, "loss": 2.5472, "step": 269090 }, { "epoch": 0.5361070381231672, "grad_norm": 0.15977391600608826, "learning_rate": 0.002, "loss": 2.5506, "step": 269100 }, { "epoch": 0.5361269603468061, "grad_norm": 0.1878969371318817, "learning_rate": 0.002, "loss": 2.5722, "step": 269110 }, { "epoch": 0.536146882570445, "grad_norm": 0.1638919860124588, "learning_rate": 0.002, "loss": 2.5541, "step": 269120 }, { "epoch": 0.5361668047940839, "grad_norm": 0.17158374190330505, "learning_rate": 0.002, "loss": 2.5537, "step": 269130 }, { "epoch": 0.5361867270177229, "grad_norm": 0.1980077177286148, "learning_rate": 0.002, "loss": 2.5538, "step": 269140 }, { "epoch": 0.5362066492413617, "grad_norm": 0.14540503919124603, "learning_rate": 0.002, "loss": 2.5711, "step": 269150 }, { "epoch": 0.5362265714650006, "grad_norm": 0.1737639307975769, "learning_rate": 0.002, "loss": 2.555, "step": 269160 }, { "epoch": 0.5362464936886395, "grad_norm": 0.1678636372089386, "learning_rate": 0.002, "loss": 2.5592, "step": 269170 }, { "epoch": 0.5362664159122784, "grad_norm": 0.16661490499973297, "learning_rate": 0.002, "loss": 2.5469, "step": 269180 }, { "epoch": 0.5362863381359174, "grad_norm": 0.1481076329946518, "learning_rate": 0.002, "loss": 2.5365, "step": 269190 }, { "epoch": 0.5363062603595563, "grad_norm": 0.15874676406383514, "learning_rate": 0.002, "loss": 2.5464, "step": 269200 }, { "epoch": 0.5363261825831952, "grad_norm": 0.17492833733558655, "learning_rate": 0.002, "loss": 2.541, "step": 269210 }, { "epoch": 0.5363461048068341, "grad_norm": 0.15261210501194, "learning_rate": 0.002, "loss": 2.5575, "step": 269220 }, { "epoch": 0.536366027030473, "grad_norm": 0.19939351081848145, "learning_rate": 0.002, "loss": 2.5628, "step": 269230 }, { "epoch": 0.536385949254112, "grad_norm": 0.14683440327644348, "learning_rate": 0.002, "loss": 2.5563, "step": 269240 }, { "epoch": 0.5364058714777509, "grad_norm": 0.1732579618692398, "learning_rate": 0.002, "loss": 2.5424, "step": 269250 }, { "epoch": 0.5364257937013898, "grad_norm": 0.2088761031627655, "learning_rate": 0.002, "loss": 2.5516, "step": 269260 }, { "epoch": 0.5364457159250287, "grad_norm": 0.17054687440395355, "learning_rate": 0.002, "loss": 2.5497, "step": 269270 }, { "epoch": 0.5364656381486677, "grad_norm": 0.15931248664855957, "learning_rate": 0.002, "loss": 2.5591, "step": 269280 }, { "epoch": 0.5364855603723065, "grad_norm": 0.15756374597549438, "learning_rate": 0.002, "loss": 2.5704, "step": 269290 }, { "epoch": 0.5365054825959454, "grad_norm": 0.1921396702528, "learning_rate": 0.002, "loss": 2.5559, "step": 269300 }, { "epoch": 0.5365254048195843, "grad_norm": 0.14928866922855377, "learning_rate": 0.002, "loss": 2.5484, "step": 269310 }, { "epoch": 0.5365453270432232, "grad_norm": 0.1640063226222992, "learning_rate": 0.002, "loss": 2.5505, "step": 269320 }, { "epoch": 0.5365652492668622, "grad_norm": 0.17457404732704163, "learning_rate": 0.002, "loss": 2.5584, "step": 269330 }, { "epoch": 0.5365851714905011, "grad_norm": 0.14570309221744537, "learning_rate": 0.002, "loss": 2.5567, "step": 269340 }, { "epoch": 0.53660509371414, "grad_norm": 0.19977909326553345, "learning_rate": 0.002, "loss": 2.5721, "step": 269350 }, { "epoch": 0.5366250159377789, "grad_norm": 0.15112172067165375, "learning_rate": 0.002, "loss": 2.5596, "step": 269360 }, { "epoch": 0.5366449381614178, "grad_norm": 0.16526369750499725, "learning_rate": 0.002, "loss": 2.5576, "step": 269370 }, { "epoch": 0.5366648603850568, "grad_norm": 0.14830496907234192, "learning_rate": 0.002, "loss": 2.553, "step": 269380 }, { "epoch": 0.5366847826086957, "grad_norm": 0.18143580853939056, "learning_rate": 0.002, "loss": 2.5639, "step": 269390 }, { "epoch": 0.5367047048323346, "grad_norm": 0.143254354596138, "learning_rate": 0.002, "loss": 2.5493, "step": 269400 }, { "epoch": 0.5367246270559735, "grad_norm": 0.1480446308851242, "learning_rate": 0.002, "loss": 2.557, "step": 269410 }, { "epoch": 0.5367445492796123, "grad_norm": 0.23231694102287292, "learning_rate": 0.002, "loss": 2.5508, "step": 269420 }, { "epoch": 0.5367644715032514, "grad_norm": 0.16200970113277435, "learning_rate": 0.002, "loss": 2.5641, "step": 269430 }, { "epoch": 0.5367843937268902, "grad_norm": 0.1895189732313156, "learning_rate": 0.002, "loss": 2.5531, "step": 269440 }, { "epoch": 0.5368043159505291, "grad_norm": 0.15643583238124847, "learning_rate": 0.002, "loss": 2.5439, "step": 269450 }, { "epoch": 0.536824238174168, "grad_norm": 0.18947356939315796, "learning_rate": 0.002, "loss": 2.5675, "step": 269460 }, { "epoch": 0.5368441603978069, "grad_norm": 0.15143364667892456, "learning_rate": 0.002, "loss": 2.5656, "step": 269470 }, { "epoch": 0.5368640826214459, "grad_norm": 0.18373537063598633, "learning_rate": 0.002, "loss": 2.5513, "step": 269480 }, { "epoch": 0.5368840048450848, "grad_norm": 0.16090825200080872, "learning_rate": 0.002, "loss": 2.555, "step": 269490 }, { "epoch": 0.5369039270687237, "grad_norm": 0.1649121791124344, "learning_rate": 0.002, "loss": 2.5588, "step": 269500 }, { "epoch": 0.5369238492923626, "grad_norm": 0.1657388061285019, "learning_rate": 0.002, "loss": 2.5553, "step": 269510 }, { "epoch": 0.5369437715160015, "grad_norm": 0.16804347932338715, "learning_rate": 0.002, "loss": 2.574, "step": 269520 }, { "epoch": 0.5369636937396405, "grad_norm": 0.13694258034229279, "learning_rate": 0.002, "loss": 2.563, "step": 269530 }, { "epoch": 0.5369836159632794, "grad_norm": 0.14632368087768555, "learning_rate": 0.002, "loss": 2.5568, "step": 269540 }, { "epoch": 0.5370035381869183, "grad_norm": 0.17775137722492218, "learning_rate": 0.002, "loss": 2.5443, "step": 269550 }, { "epoch": 0.5370234604105572, "grad_norm": 0.17633958160877228, "learning_rate": 0.002, "loss": 2.5604, "step": 269560 }, { "epoch": 0.5370433826341962, "grad_norm": 0.1604994535446167, "learning_rate": 0.002, "loss": 2.5491, "step": 269570 }, { "epoch": 0.537063304857835, "grad_norm": 0.16394992172718048, "learning_rate": 0.002, "loss": 2.5541, "step": 269580 }, { "epoch": 0.5370832270814739, "grad_norm": 0.15800608694553375, "learning_rate": 0.002, "loss": 2.5546, "step": 269590 }, { "epoch": 0.5371031493051128, "grad_norm": 0.17250609397888184, "learning_rate": 0.002, "loss": 2.5635, "step": 269600 }, { "epoch": 0.5371230715287517, "grad_norm": 0.18496236205101013, "learning_rate": 0.002, "loss": 2.5525, "step": 269610 }, { "epoch": 0.5371429937523907, "grad_norm": 0.15714207291603088, "learning_rate": 0.002, "loss": 2.5586, "step": 269620 }, { "epoch": 0.5371629159760296, "grad_norm": 0.15591558814048767, "learning_rate": 0.002, "loss": 2.5395, "step": 269630 }, { "epoch": 0.5371828381996685, "grad_norm": 0.18744602799415588, "learning_rate": 0.002, "loss": 2.5506, "step": 269640 }, { "epoch": 0.5372027604233074, "grad_norm": 0.21831350028514862, "learning_rate": 0.002, "loss": 2.556, "step": 269650 }, { "epoch": 0.5372226826469463, "grad_norm": 0.13845759630203247, "learning_rate": 0.002, "loss": 2.5736, "step": 269660 }, { "epoch": 0.5372426048705853, "grad_norm": 0.15241150557994843, "learning_rate": 0.002, "loss": 2.5606, "step": 269670 }, { "epoch": 0.5372625270942242, "grad_norm": 0.17608188092708588, "learning_rate": 0.002, "loss": 2.5666, "step": 269680 }, { "epoch": 0.5372824493178631, "grad_norm": 0.17338469624519348, "learning_rate": 0.002, "loss": 2.5667, "step": 269690 }, { "epoch": 0.537302371541502, "grad_norm": 0.15600165724754333, "learning_rate": 0.002, "loss": 2.549, "step": 269700 }, { "epoch": 0.5373222937651408, "grad_norm": 0.18233217298984528, "learning_rate": 0.002, "loss": 2.5535, "step": 269710 }, { "epoch": 0.5373422159887798, "grad_norm": 0.1905532032251358, "learning_rate": 0.002, "loss": 2.5644, "step": 269720 }, { "epoch": 0.5373621382124187, "grad_norm": 0.1567777693271637, "learning_rate": 0.002, "loss": 2.5692, "step": 269730 }, { "epoch": 0.5373820604360576, "grad_norm": 0.14509256184101105, "learning_rate": 0.002, "loss": 2.53, "step": 269740 }, { "epoch": 0.5374019826596965, "grad_norm": 0.16656264662742615, "learning_rate": 0.002, "loss": 2.5606, "step": 269750 }, { "epoch": 0.5374219048833354, "grad_norm": 0.15814852714538574, "learning_rate": 0.002, "loss": 2.5559, "step": 269760 }, { "epoch": 0.5374418271069744, "grad_norm": 0.14966779947280884, "learning_rate": 0.002, "loss": 2.5618, "step": 269770 }, { "epoch": 0.5374617493306133, "grad_norm": 0.16495774686336517, "learning_rate": 0.002, "loss": 2.5641, "step": 269780 }, { "epoch": 0.5374816715542522, "grad_norm": 0.1683863401412964, "learning_rate": 0.002, "loss": 2.5631, "step": 269790 }, { "epoch": 0.5375015937778911, "grad_norm": 0.18447159230709076, "learning_rate": 0.002, "loss": 2.5504, "step": 269800 }, { "epoch": 0.53752151600153, "grad_norm": 0.17038996517658234, "learning_rate": 0.002, "loss": 2.5536, "step": 269810 }, { "epoch": 0.537541438225169, "grad_norm": 0.18333910405635834, "learning_rate": 0.002, "loss": 2.5534, "step": 269820 }, { "epoch": 0.5375613604488079, "grad_norm": 0.1598156839609146, "learning_rate": 0.002, "loss": 2.5614, "step": 269830 }, { "epoch": 0.5375812826724468, "grad_norm": 0.15110188722610474, "learning_rate": 0.002, "loss": 2.5505, "step": 269840 }, { "epoch": 0.5376012048960856, "grad_norm": 0.17978435754776, "learning_rate": 0.002, "loss": 2.5603, "step": 269850 }, { "epoch": 0.5376211271197245, "grad_norm": 0.1590314358472824, "learning_rate": 0.002, "loss": 2.5469, "step": 269860 }, { "epoch": 0.5376410493433635, "grad_norm": 0.1486491858959198, "learning_rate": 0.002, "loss": 2.5695, "step": 269870 }, { "epoch": 0.5376609715670024, "grad_norm": 0.15749332308769226, "learning_rate": 0.002, "loss": 2.5719, "step": 269880 }, { "epoch": 0.5376808937906413, "grad_norm": 0.16702455282211304, "learning_rate": 0.002, "loss": 2.5466, "step": 269890 }, { "epoch": 0.5377008160142802, "grad_norm": 0.1753958910703659, "learning_rate": 0.002, "loss": 2.5563, "step": 269900 }, { "epoch": 0.5377207382379192, "grad_norm": 0.18474338948726654, "learning_rate": 0.002, "loss": 2.5503, "step": 269910 }, { "epoch": 0.5377406604615581, "grad_norm": 0.17591899633407593, "learning_rate": 0.002, "loss": 2.5787, "step": 269920 }, { "epoch": 0.537760582685197, "grad_norm": 0.1624305546283722, "learning_rate": 0.002, "loss": 2.5771, "step": 269930 }, { "epoch": 0.5377805049088359, "grad_norm": 0.2011319398880005, "learning_rate": 0.002, "loss": 2.5614, "step": 269940 }, { "epoch": 0.5378004271324748, "grad_norm": 0.17438313364982605, "learning_rate": 0.002, "loss": 2.5604, "step": 269950 }, { "epoch": 0.5378203493561138, "grad_norm": 0.17147189378738403, "learning_rate": 0.002, "loss": 2.5586, "step": 269960 }, { "epoch": 0.5378402715797527, "grad_norm": 0.2015756368637085, "learning_rate": 0.002, "loss": 2.5482, "step": 269970 }, { "epoch": 0.5378601938033916, "grad_norm": 0.15533988177776337, "learning_rate": 0.002, "loss": 2.5714, "step": 269980 }, { "epoch": 0.5378801160270305, "grad_norm": 0.16992494463920593, "learning_rate": 0.002, "loss": 2.5377, "step": 269990 }, { "epoch": 0.5379000382506693, "grad_norm": 0.14397785067558289, "learning_rate": 0.002, "loss": 2.5467, "step": 270000 }, { "epoch": 0.5379199604743083, "grad_norm": 0.21837086975574493, "learning_rate": 0.002, "loss": 2.564, "step": 270010 }, { "epoch": 0.5379398826979472, "grad_norm": 0.15627354383468628, "learning_rate": 0.002, "loss": 2.5564, "step": 270020 }, { "epoch": 0.5379598049215861, "grad_norm": 0.22161079943180084, "learning_rate": 0.002, "loss": 2.5397, "step": 270030 }, { "epoch": 0.537979727145225, "grad_norm": 0.14582307636737823, "learning_rate": 0.002, "loss": 2.5524, "step": 270040 }, { "epoch": 0.5379996493688639, "grad_norm": 0.14989009499549866, "learning_rate": 0.002, "loss": 2.5605, "step": 270050 }, { "epoch": 0.5380195715925029, "grad_norm": 0.18315716087818146, "learning_rate": 0.002, "loss": 2.5393, "step": 270060 }, { "epoch": 0.5380394938161418, "grad_norm": 0.15337993204593658, "learning_rate": 0.002, "loss": 2.5592, "step": 270070 }, { "epoch": 0.5380594160397807, "grad_norm": 0.19455556571483612, "learning_rate": 0.002, "loss": 2.5585, "step": 270080 }, { "epoch": 0.5380793382634196, "grad_norm": 0.2042209655046463, "learning_rate": 0.002, "loss": 2.5763, "step": 270090 }, { "epoch": 0.5380992604870585, "grad_norm": 0.16873162984848022, "learning_rate": 0.002, "loss": 2.5628, "step": 270100 }, { "epoch": 0.5381191827106975, "grad_norm": 0.13846024870872498, "learning_rate": 0.002, "loss": 2.5615, "step": 270110 }, { "epoch": 0.5381391049343364, "grad_norm": 0.23782889544963837, "learning_rate": 0.002, "loss": 2.5442, "step": 270120 }, { "epoch": 0.5381590271579753, "grad_norm": 0.15223154425621033, "learning_rate": 0.002, "loss": 2.5446, "step": 270130 }, { "epoch": 0.5381789493816141, "grad_norm": 0.1390601396560669, "learning_rate": 0.002, "loss": 2.5593, "step": 270140 }, { "epoch": 0.538198871605253, "grad_norm": 0.16772376000881195, "learning_rate": 0.002, "loss": 2.5644, "step": 270150 }, { "epoch": 0.538218793828892, "grad_norm": 0.163993701338768, "learning_rate": 0.002, "loss": 2.5712, "step": 270160 }, { "epoch": 0.5382387160525309, "grad_norm": 0.14688624441623688, "learning_rate": 0.002, "loss": 2.5713, "step": 270170 }, { "epoch": 0.5382586382761698, "grad_norm": 0.1724044382572174, "learning_rate": 0.002, "loss": 2.5486, "step": 270180 }, { "epoch": 0.5382785604998087, "grad_norm": 0.18859362602233887, "learning_rate": 0.002, "loss": 2.5608, "step": 270190 }, { "epoch": 0.5382984827234477, "grad_norm": 0.14360937476158142, "learning_rate": 0.002, "loss": 2.5709, "step": 270200 }, { "epoch": 0.5383184049470866, "grad_norm": 0.18995702266693115, "learning_rate": 0.002, "loss": 2.5667, "step": 270210 }, { "epoch": 0.5383383271707255, "grad_norm": 0.1748199313879013, "learning_rate": 0.002, "loss": 2.5769, "step": 270220 }, { "epoch": 0.5383582493943644, "grad_norm": 0.15473966300487518, "learning_rate": 0.002, "loss": 2.5425, "step": 270230 }, { "epoch": 0.5383781716180033, "grad_norm": 0.20121467113494873, "learning_rate": 0.002, "loss": 2.5708, "step": 270240 }, { "epoch": 0.5383980938416423, "grad_norm": 0.1579664945602417, "learning_rate": 0.002, "loss": 2.5413, "step": 270250 }, { "epoch": 0.5384180160652812, "grad_norm": 0.19590391218662262, "learning_rate": 0.002, "loss": 2.5499, "step": 270260 }, { "epoch": 0.5384379382889201, "grad_norm": 0.16067713499069214, "learning_rate": 0.002, "loss": 2.5502, "step": 270270 }, { "epoch": 0.538457860512559, "grad_norm": 0.18280978500843048, "learning_rate": 0.002, "loss": 2.5515, "step": 270280 }, { "epoch": 0.5384777827361978, "grad_norm": 0.1661442220211029, "learning_rate": 0.002, "loss": 2.5564, "step": 270290 }, { "epoch": 0.5384977049598368, "grad_norm": 0.15762701630592346, "learning_rate": 0.002, "loss": 2.5483, "step": 270300 }, { "epoch": 0.5385176271834757, "grad_norm": 0.17531807720661163, "learning_rate": 0.002, "loss": 2.5576, "step": 270310 }, { "epoch": 0.5385375494071146, "grad_norm": 0.16113899648189545, "learning_rate": 0.002, "loss": 2.5557, "step": 270320 }, { "epoch": 0.5385574716307535, "grad_norm": 0.16191500425338745, "learning_rate": 0.002, "loss": 2.5407, "step": 270330 }, { "epoch": 0.5385773938543924, "grad_norm": 0.16238543391227722, "learning_rate": 0.002, "loss": 2.5631, "step": 270340 }, { "epoch": 0.5385973160780314, "grad_norm": 0.1828572303056717, "learning_rate": 0.002, "loss": 2.5615, "step": 270350 }, { "epoch": 0.5386172383016703, "grad_norm": 0.17244897782802582, "learning_rate": 0.002, "loss": 2.5362, "step": 270360 }, { "epoch": 0.5386371605253092, "grad_norm": 0.1733683943748474, "learning_rate": 0.002, "loss": 2.5586, "step": 270370 }, { "epoch": 0.5386570827489481, "grad_norm": 0.17281833291053772, "learning_rate": 0.002, "loss": 2.553, "step": 270380 }, { "epoch": 0.538677004972587, "grad_norm": 0.18480663001537323, "learning_rate": 0.002, "loss": 2.5657, "step": 270390 }, { "epoch": 0.538696927196226, "grad_norm": 0.1918647438287735, "learning_rate": 0.002, "loss": 2.5497, "step": 270400 }, { "epoch": 0.5387168494198649, "grad_norm": 0.14737719297409058, "learning_rate": 0.002, "loss": 2.5431, "step": 270410 }, { "epoch": 0.5387367716435038, "grad_norm": 0.17787030339241028, "learning_rate": 0.002, "loss": 2.5677, "step": 270420 }, { "epoch": 0.5387566938671426, "grad_norm": 0.21147103607654572, "learning_rate": 0.002, "loss": 2.5525, "step": 270430 }, { "epoch": 0.5387766160907815, "grad_norm": 0.13996522128582, "learning_rate": 0.002, "loss": 2.56, "step": 270440 }, { "epoch": 0.5387965383144205, "grad_norm": 0.18182170391082764, "learning_rate": 0.002, "loss": 2.5655, "step": 270450 }, { "epoch": 0.5388164605380594, "grad_norm": 0.17629815638065338, "learning_rate": 0.002, "loss": 2.5539, "step": 270460 }, { "epoch": 0.5388363827616983, "grad_norm": 0.16322289407253265, "learning_rate": 0.002, "loss": 2.5567, "step": 270470 }, { "epoch": 0.5388563049853372, "grad_norm": 0.18984591960906982, "learning_rate": 0.002, "loss": 2.5636, "step": 270480 }, { "epoch": 0.5388762272089762, "grad_norm": 0.1870962232351303, "learning_rate": 0.002, "loss": 2.5501, "step": 270490 }, { "epoch": 0.5388961494326151, "grad_norm": 0.1619338095188141, "learning_rate": 0.002, "loss": 2.5554, "step": 270500 }, { "epoch": 0.538916071656254, "grad_norm": 0.16075845062732697, "learning_rate": 0.002, "loss": 2.5633, "step": 270510 }, { "epoch": 0.5389359938798929, "grad_norm": 0.18058857321739197, "learning_rate": 0.002, "loss": 2.5467, "step": 270520 }, { "epoch": 0.5389559161035318, "grad_norm": 0.1585453301668167, "learning_rate": 0.002, "loss": 2.5626, "step": 270530 }, { "epoch": 0.5389758383271708, "grad_norm": 0.17368055880069733, "learning_rate": 0.002, "loss": 2.5647, "step": 270540 }, { "epoch": 0.5389957605508097, "grad_norm": 0.15292444825172424, "learning_rate": 0.002, "loss": 2.5483, "step": 270550 }, { "epoch": 0.5390156827744486, "grad_norm": 0.1492496281862259, "learning_rate": 0.002, "loss": 2.5483, "step": 270560 }, { "epoch": 0.5390356049980874, "grad_norm": 0.15740905702114105, "learning_rate": 0.002, "loss": 2.5553, "step": 270570 }, { "epoch": 0.5390555272217263, "grad_norm": 0.16488541662693024, "learning_rate": 0.002, "loss": 2.5645, "step": 270580 }, { "epoch": 0.5390754494453653, "grad_norm": 0.18699124455451965, "learning_rate": 0.002, "loss": 2.5628, "step": 270590 }, { "epoch": 0.5390953716690042, "grad_norm": 0.16501784324645996, "learning_rate": 0.002, "loss": 2.5519, "step": 270600 }, { "epoch": 0.5391152938926431, "grad_norm": 0.13898323476314545, "learning_rate": 0.002, "loss": 2.552, "step": 270610 }, { "epoch": 0.539135216116282, "grad_norm": 0.2138848602771759, "learning_rate": 0.002, "loss": 2.5674, "step": 270620 }, { "epoch": 0.5391551383399209, "grad_norm": 0.2850789427757263, "learning_rate": 0.002, "loss": 2.5676, "step": 270630 }, { "epoch": 0.5391750605635599, "grad_norm": 0.14481955766677856, "learning_rate": 0.002, "loss": 2.5574, "step": 270640 }, { "epoch": 0.5391949827871988, "grad_norm": 0.18554483354091644, "learning_rate": 0.002, "loss": 2.5655, "step": 270650 }, { "epoch": 0.5392149050108377, "grad_norm": 0.15679973363876343, "learning_rate": 0.002, "loss": 2.5636, "step": 270660 }, { "epoch": 0.5392348272344766, "grad_norm": 0.17445343732833862, "learning_rate": 0.002, "loss": 2.5662, "step": 270670 }, { "epoch": 0.5392547494581155, "grad_norm": 0.15987810492515564, "learning_rate": 0.002, "loss": 2.5604, "step": 270680 }, { "epoch": 0.5392746716817545, "grad_norm": 0.16237299144268036, "learning_rate": 0.002, "loss": 2.5576, "step": 270690 }, { "epoch": 0.5392945939053934, "grad_norm": 0.17673461139202118, "learning_rate": 0.002, "loss": 2.56, "step": 270700 }, { "epoch": 0.5393145161290323, "grad_norm": 0.1484929472208023, "learning_rate": 0.002, "loss": 2.5603, "step": 270710 }, { "epoch": 0.5393344383526711, "grad_norm": 0.16473308205604553, "learning_rate": 0.002, "loss": 2.5495, "step": 270720 }, { "epoch": 0.53935436057631, "grad_norm": 0.15107378363609314, "learning_rate": 0.002, "loss": 2.568, "step": 270730 }, { "epoch": 0.539374282799949, "grad_norm": 0.1556260883808136, "learning_rate": 0.002, "loss": 2.5621, "step": 270740 }, { "epoch": 0.5393942050235879, "grad_norm": 0.1769949197769165, "learning_rate": 0.002, "loss": 2.5573, "step": 270750 }, { "epoch": 0.5394141272472268, "grad_norm": 0.1898423284292221, "learning_rate": 0.002, "loss": 2.558, "step": 270760 }, { "epoch": 0.5394340494708657, "grad_norm": 0.19639922678470612, "learning_rate": 0.002, "loss": 2.5573, "step": 270770 }, { "epoch": 0.5394539716945047, "grad_norm": 0.20118974149227142, "learning_rate": 0.002, "loss": 2.5563, "step": 270780 }, { "epoch": 0.5394738939181436, "grad_norm": 0.14349785447120667, "learning_rate": 0.002, "loss": 2.5467, "step": 270790 }, { "epoch": 0.5394938161417825, "grad_norm": 0.13773253560066223, "learning_rate": 0.002, "loss": 2.5665, "step": 270800 }, { "epoch": 0.5395137383654214, "grad_norm": 0.1665918081998825, "learning_rate": 0.002, "loss": 2.5552, "step": 270810 }, { "epoch": 0.5395336605890603, "grad_norm": 0.16079334914684296, "learning_rate": 0.002, "loss": 2.5568, "step": 270820 }, { "epoch": 0.5395535828126993, "grad_norm": 0.17122957110404968, "learning_rate": 0.002, "loss": 2.5548, "step": 270830 }, { "epoch": 0.5395735050363382, "grad_norm": 0.15423712134361267, "learning_rate": 0.002, "loss": 2.5477, "step": 270840 }, { "epoch": 0.539593427259977, "grad_norm": 0.15743908286094666, "learning_rate": 0.002, "loss": 2.5533, "step": 270850 }, { "epoch": 0.539613349483616, "grad_norm": 0.16781646013259888, "learning_rate": 0.002, "loss": 2.5534, "step": 270860 }, { "epoch": 0.5396332717072548, "grad_norm": 0.1792820394039154, "learning_rate": 0.002, "loss": 2.5597, "step": 270870 }, { "epoch": 0.5396531939308938, "grad_norm": 0.16777756810188293, "learning_rate": 0.002, "loss": 2.5616, "step": 270880 }, { "epoch": 0.5396731161545327, "grad_norm": 0.16352733969688416, "learning_rate": 0.002, "loss": 2.5693, "step": 270890 }, { "epoch": 0.5396930383781716, "grad_norm": 0.15231208503246307, "learning_rate": 0.002, "loss": 2.5423, "step": 270900 }, { "epoch": 0.5397129606018105, "grad_norm": 0.1839803010225296, "learning_rate": 0.002, "loss": 2.57, "step": 270910 }, { "epoch": 0.5397328828254494, "grad_norm": 0.1637660264968872, "learning_rate": 0.002, "loss": 2.5605, "step": 270920 }, { "epoch": 0.5397528050490884, "grad_norm": 0.18051986396312714, "learning_rate": 0.002, "loss": 2.5647, "step": 270930 }, { "epoch": 0.5397727272727273, "grad_norm": 0.15971344709396362, "learning_rate": 0.002, "loss": 2.5565, "step": 270940 }, { "epoch": 0.5397926494963662, "grad_norm": 0.1523740142583847, "learning_rate": 0.002, "loss": 2.5645, "step": 270950 }, { "epoch": 0.5398125717200051, "grad_norm": 0.20105867087841034, "learning_rate": 0.002, "loss": 2.5524, "step": 270960 }, { "epoch": 0.539832493943644, "grad_norm": 0.1492065042257309, "learning_rate": 0.002, "loss": 2.5558, "step": 270970 }, { "epoch": 0.539852416167283, "grad_norm": 0.17450784146785736, "learning_rate": 0.002, "loss": 2.5633, "step": 270980 }, { "epoch": 0.5398723383909219, "grad_norm": 0.16259874403476715, "learning_rate": 0.002, "loss": 2.5598, "step": 270990 }, { "epoch": 0.5398922606145607, "grad_norm": 0.14950883388519287, "learning_rate": 0.002, "loss": 2.5528, "step": 271000 }, { "epoch": 0.5399121828381996, "grad_norm": 0.22002041339874268, "learning_rate": 0.002, "loss": 2.5584, "step": 271010 }, { "epoch": 0.5399321050618385, "grad_norm": 0.16043603420257568, "learning_rate": 0.002, "loss": 2.5653, "step": 271020 }, { "epoch": 0.5399520272854775, "grad_norm": 0.16937977075576782, "learning_rate": 0.002, "loss": 2.5622, "step": 271030 }, { "epoch": 0.5399719495091164, "grad_norm": 0.15860304236412048, "learning_rate": 0.002, "loss": 2.54, "step": 271040 }, { "epoch": 0.5399918717327553, "grad_norm": 0.1686747521162033, "learning_rate": 0.002, "loss": 2.5669, "step": 271050 }, { "epoch": 0.5400117939563942, "grad_norm": 0.2229910045862198, "learning_rate": 0.002, "loss": 2.5622, "step": 271060 }, { "epoch": 0.5400317161800332, "grad_norm": 0.1720282882452011, "learning_rate": 0.002, "loss": 2.5606, "step": 271070 }, { "epoch": 0.5400516384036721, "grad_norm": 0.1465798318386078, "learning_rate": 0.002, "loss": 2.5425, "step": 271080 }, { "epoch": 0.540071560627311, "grad_norm": 0.1825433075428009, "learning_rate": 0.002, "loss": 2.5555, "step": 271090 }, { "epoch": 0.5400914828509499, "grad_norm": 0.1462731659412384, "learning_rate": 0.002, "loss": 2.5507, "step": 271100 }, { "epoch": 0.5401114050745888, "grad_norm": 0.15925590693950653, "learning_rate": 0.002, "loss": 2.5544, "step": 271110 }, { "epoch": 0.5401313272982278, "grad_norm": 0.1501440852880478, "learning_rate": 0.002, "loss": 2.5666, "step": 271120 }, { "epoch": 0.5401512495218667, "grad_norm": 0.20030337572097778, "learning_rate": 0.002, "loss": 2.5724, "step": 271130 }, { "epoch": 0.5401711717455056, "grad_norm": 0.1600349247455597, "learning_rate": 0.002, "loss": 2.5586, "step": 271140 }, { "epoch": 0.5401910939691444, "grad_norm": 0.17329278588294983, "learning_rate": 0.002, "loss": 2.5552, "step": 271150 }, { "epoch": 0.5402110161927833, "grad_norm": 0.16770917177200317, "learning_rate": 0.002, "loss": 2.5633, "step": 271160 }, { "epoch": 0.5402309384164223, "grad_norm": 0.17060479521751404, "learning_rate": 0.002, "loss": 2.5477, "step": 271170 }, { "epoch": 0.5402508606400612, "grad_norm": 0.1674707531929016, "learning_rate": 0.002, "loss": 2.5568, "step": 271180 }, { "epoch": 0.5402707828637001, "grad_norm": 0.1829751878976822, "learning_rate": 0.002, "loss": 2.5567, "step": 271190 }, { "epoch": 0.540290705087339, "grad_norm": 0.1856372207403183, "learning_rate": 0.002, "loss": 2.5655, "step": 271200 }, { "epoch": 0.5403106273109779, "grad_norm": 0.16979707777500153, "learning_rate": 0.002, "loss": 2.5608, "step": 271210 }, { "epoch": 0.5403305495346169, "grad_norm": 0.18089371919631958, "learning_rate": 0.002, "loss": 2.5578, "step": 271220 }, { "epoch": 0.5403504717582558, "grad_norm": 0.14687561988830566, "learning_rate": 0.002, "loss": 2.5501, "step": 271230 }, { "epoch": 0.5403703939818947, "grad_norm": 0.15449708700180054, "learning_rate": 0.002, "loss": 2.5511, "step": 271240 }, { "epoch": 0.5403903162055336, "grad_norm": 0.153848797082901, "learning_rate": 0.002, "loss": 2.5556, "step": 271250 }, { "epoch": 0.5404102384291725, "grad_norm": 0.1737292855978012, "learning_rate": 0.002, "loss": 2.5424, "step": 271260 }, { "epoch": 0.5404301606528115, "grad_norm": 0.2058563381433487, "learning_rate": 0.002, "loss": 2.5437, "step": 271270 }, { "epoch": 0.5404500828764504, "grad_norm": 0.17769019305706024, "learning_rate": 0.002, "loss": 2.5591, "step": 271280 }, { "epoch": 0.5404700051000892, "grad_norm": 0.16514554619789124, "learning_rate": 0.002, "loss": 2.5597, "step": 271290 }, { "epoch": 0.5404899273237281, "grad_norm": 0.15240271389484406, "learning_rate": 0.002, "loss": 2.5496, "step": 271300 }, { "epoch": 0.540509849547367, "grad_norm": 0.16664613783359528, "learning_rate": 0.002, "loss": 2.5503, "step": 271310 }, { "epoch": 0.540529771771006, "grad_norm": 0.15955036878585815, "learning_rate": 0.002, "loss": 2.5468, "step": 271320 }, { "epoch": 0.5405496939946449, "grad_norm": 0.18585491180419922, "learning_rate": 0.002, "loss": 2.5789, "step": 271330 }, { "epoch": 0.5405696162182838, "grad_norm": 0.1706576645374298, "learning_rate": 0.002, "loss": 2.5351, "step": 271340 }, { "epoch": 0.5405895384419227, "grad_norm": 0.17325268685817719, "learning_rate": 0.002, "loss": 2.5577, "step": 271350 }, { "epoch": 0.5406094606655616, "grad_norm": 0.15737754106521606, "learning_rate": 0.002, "loss": 2.5544, "step": 271360 }, { "epoch": 0.5406293828892006, "grad_norm": 0.23473046720027924, "learning_rate": 0.002, "loss": 2.5476, "step": 271370 }, { "epoch": 0.5406493051128395, "grad_norm": 0.17074905335903168, "learning_rate": 0.002, "loss": 2.5736, "step": 271380 }, { "epoch": 0.5406692273364784, "grad_norm": 0.1527671366930008, "learning_rate": 0.002, "loss": 2.5413, "step": 271390 }, { "epoch": 0.5406891495601173, "grad_norm": 0.18552224338054657, "learning_rate": 0.002, "loss": 2.5569, "step": 271400 }, { "epoch": 0.5407090717837563, "grad_norm": 0.16375568509101868, "learning_rate": 0.002, "loss": 2.5706, "step": 271410 }, { "epoch": 0.5407289940073952, "grad_norm": 0.16630491614341736, "learning_rate": 0.002, "loss": 2.5667, "step": 271420 }, { "epoch": 0.540748916231034, "grad_norm": 0.1547536551952362, "learning_rate": 0.002, "loss": 2.5553, "step": 271430 }, { "epoch": 0.5407688384546729, "grad_norm": 0.16394437849521637, "learning_rate": 0.002, "loss": 2.5586, "step": 271440 }, { "epoch": 0.5407887606783118, "grad_norm": 0.1401585191488266, "learning_rate": 0.002, "loss": 2.5559, "step": 271450 }, { "epoch": 0.5408086829019508, "grad_norm": 0.18439549207687378, "learning_rate": 0.002, "loss": 2.5723, "step": 271460 }, { "epoch": 0.5408286051255897, "grad_norm": 0.17145995795726776, "learning_rate": 0.002, "loss": 2.5501, "step": 271470 }, { "epoch": 0.5408485273492286, "grad_norm": 0.15551325678825378, "learning_rate": 0.002, "loss": 2.565, "step": 271480 }, { "epoch": 0.5408684495728675, "grad_norm": 0.15576013922691345, "learning_rate": 0.002, "loss": 2.558, "step": 271490 }, { "epoch": 0.5408883717965064, "grad_norm": 0.15507352352142334, "learning_rate": 0.002, "loss": 2.5439, "step": 271500 }, { "epoch": 0.5409082940201454, "grad_norm": 0.16417624056339264, "learning_rate": 0.002, "loss": 2.5546, "step": 271510 }, { "epoch": 0.5409282162437843, "grad_norm": 0.16434995830059052, "learning_rate": 0.002, "loss": 2.5605, "step": 271520 }, { "epoch": 0.5409481384674232, "grad_norm": 0.16447044909000397, "learning_rate": 0.002, "loss": 2.5484, "step": 271530 }, { "epoch": 0.5409680606910621, "grad_norm": 0.15911290049552917, "learning_rate": 0.002, "loss": 2.5537, "step": 271540 }, { "epoch": 0.540987982914701, "grad_norm": 0.15701349079608917, "learning_rate": 0.002, "loss": 2.5674, "step": 271550 }, { "epoch": 0.54100790513834, "grad_norm": 0.18309195339679718, "learning_rate": 0.002, "loss": 2.5536, "step": 271560 }, { "epoch": 0.5410278273619789, "grad_norm": 0.15010099112987518, "learning_rate": 0.002, "loss": 2.5516, "step": 271570 }, { "epoch": 0.5410477495856177, "grad_norm": 0.17979292571544647, "learning_rate": 0.002, "loss": 2.5618, "step": 271580 }, { "epoch": 0.5410676718092566, "grad_norm": 0.17863725125789642, "learning_rate": 0.002, "loss": 2.5613, "step": 271590 }, { "epoch": 0.5410875940328955, "grad_norm": 0.1520983874797821, "learning_rate": 0.002, "loss": 2.5435, "step": 271600 }, { "epoch": 0.5411075162565345, "grad_norm": 0.17714832723140717, "learning_rate": 0.002, "loss": 2.5655, "step": 271610 }, { "epoch": 0.5411274384801734, "grad_norm": 0.18202528357505798, "learning_rate": 0.002, "loss": 2.5687, "step": 271620 }, { "epoch": 0.5411473607038123, "grad_norm": 0.15928897261619568, "learning_rate": 0.002, "loss": 2.5792, "step": 271630 }, { "epoch": 0.5411672829274512, "grad_norm": 0.15040239691734314, "learning_rate": 0.002, "loss": 2.5689, "step": 271640 }, { "epoch": 0.5411872051510901, "grad_norm": 0.1463467925786972, "learning_rate": 0.002, "loss": 2.5556, "step": 271650 }, { "epoch": 0.5412071273747291, "grad_norm": 0.21622774004936218, "learning_rate": 0.002, "loss": 2.5676, "step": 271660 }, { "epoch": 0.541227049598368, "grad_norm": 0.12812791764736176, "learning_rate": 0.002, "loss": 2.5698, "step": 271670 }, { "epoch": 0.5412469718220069, "grad_norm": 0.17523662745952606, "learning_rate": 0.002, "loss": 2.5686, "step": 271680 }, { "epoch": 0.5412668940456458, "grad_norm": 0.18787728250026703, "learning_rate": 0.002, "loss": 2.5464, "step": 271690 }, { "epoch": 0.5412868162692848, "grad_norm": 0.16720835864543915, "learning_rate": 0.002, "loss": 2.5681, "step": 271700 }, { "epoch": 0.5413067384929237, "grad_norm": 0.1783803403377533, "learning_rate": 0.002, "loss": 2.5512, "step": 271710 }, { "epoch": 0.5413266607165625, "grad_norm": 0.17415650188922882, "learning_rate": 0.002, "loss": 2.553, "step": 271720 }, { "epoch": 0.5413465829402014, "grad_norm": 0.16011473536491394, "learning_rate": 0.002, "loss": 2.547, "step": 271730 }, { "epoch": 0.5413665051638403, "grad_norm": 0.17468427121639252, "learning_rate": 0.002, "loss": 2.5531, "step": 271740 }, { "epoch": 0.5413864273874793, "grad_norm": 0.15814368426799774, "learning_rate": 0.002, "loss": 2.5487, "step": 271750 }, { "epoch": 0.5414063496111182, "grad_norm": 0.18194696307182312, "learning_rate": 0.002, "loss": 2.5535, "step": 271760 }, { "epoch": 0.5414262718347571, "grad_norm": 0.15760867297649384, "learning_rate": 0.002, "loss": 2.5411, "step": 271770 }, { "epoch": 0.541446194058396, "grad_norm": 0.16028763353824615, "learning_rate": 0.002, "loss": 2.5571, "step": 271780 }, { "epoch": 0.5414661162820349, "grad_norm": 0.14328496158123016, "learning_rate": 0.002, "loss": 2.5651, "step": 271790 }, { "epoch": 0.5414860385056739, "grad_norm": 0.16870686411857605, "learning_rate": 0.002, "loss": 2.5378, "step": 271800 }, { "epoch": 0.5415059607293128, "grad_norm": 0.18830659985542297, "learning_rate": 0.002, "loss": 2.5576, "step": 271810 }, { "epoch": 0.5415258829529517, "grad_norm": 0.19046704471111298, "learning_rate": 0.002, "loss": 2.5597, "step": 271820 }, { "epoch": 0.5415458051765906, "grad_norm": 0.15559923648834229, "learning_rate": 0.002, "loss": 2.5463, "step": 271830 }, { "epoch": 0.5415657274002295, "grad_norm": 0.20025716722011566, "learning_rate": 0.002, "loss": 2.5622, "step": 271840 }, { "epoch": 0.5415856496238685, "grad_norm": 0.15431839227676392, "learning_rate": 0.002, "loss": 2.5519, "step": 271850 }, { "epoch": 0.5416055718475073, "grad_norm": 0.2243046760559082, "learning_rate": 0.002, "loss": 2.5548, "step": 271860 }, { "epoch": 0.5416254940711462, "grad_norm": 0.15456660091876984, "learning_rate": 0.002, "loss": 2.5564, "step": 271870 }, { "epoch": 0.5416454162947851, "grad_norm": 0.1541271209716797, "learning_rate": 0.002, "loss": 2.5648, "step": 271880 }, { "epoch": 0.541665338518424, "grad_norm": 0.19843530654907227, "learning_rate": 0.002, "loss": 2.5473, "step": 271890 }, { "epoch": 0.541685260742063, "grad_norm": 0.16311174631118774, "learning_rate": 0.002, "loss": 2.5659, "step": 271900 }, { "epoch": 0.5417051829657019, "grad_norm": 0.17179802060127258, "learning_rate": 0.002, "loss": 2.554, "step": 271910 }, { "epoch": 0.5417251051893408, "grad_norm": 0.20217235386371613, "learning_rate": 0.002, "loss": 2.56, "step": 271920 }, { "epoch": 0.5417450274129797, "grad_norm": 0.1807485669851303, "learning_rate": 0.002, "loss": 2.5509, "step": 271930 }, { "epoch": 0.5417649496366186, "grad_norm": 0.1739308387041092, "learning_rate": 0.002, "loss": 2.5535, "step": 271940 }, { "epoch": 0.5417848718602576, "grad_norm": 0.16418491303920746, "learning_rate": 0.002, "loss": 2.5513, "step": 271950 }, { "epoch": 0.5418047940838965, "grad_norm": 0.17753608524799347, "learning_rate": 0.002, "loss": 2.5571, "step": 271960 }, { "epoch": 0.5418247163075354, "grad_norm": 0.1776151955127716, "learning_rate": 0.002, "loss": 2.562, "step": 271970 }, { "epoch": 0.5418446385311743, "grad_norm": 0.1618289202451706, "learning_rate": 0.002, "loss": 2.5553, "step": 271980 }, { "epoch": 0.5418645607548133, "grad_norm": 0.16368091106414795, "learning_rate": 0.002, "loss": 2.5604, "step": 271990 }, { "epoch": 0.5418844829784522, "grad_norm": 0.17488029599189758, "learning_rate": 0.002, "loss": 2.5601, "step": 272000 }, { "epoch": 0.541904405202091, "grad_norm": 0.15387092530727386, "learning_rate": 0.002, "loss": 2.5659, "step": 272010 }, { "epoch": 0.5419243274257299, "grad_norm": 0.15769030153751373, "learning_rate": 0.002, "loss": 2.5558, "step": 272020 }, { "epoch": 0.5419442496493688, "grad_norm": 0.1708899736404419, "learning_rate": 0.002, "loss": 2.5477, "step": 272030 }, { "epoch": 0.5419641718730078, "grad_norm": 0.1888749897480011, "learning_rate": 0.002, "loss": 2.5505, "step": 272040 }, { "epoch": 0.5419840940966467, "grad_norm": 0.1886812001466751, "learning_rate": 0.002, "loss": 2.5508, "step": 272050 }, { "epoch": 0.5420040163202856, "grad_norm": 0.14765232801437378, "learning_rate": 0.002, "loss": 2.562, "step": 272060 }, { "epoch": 0.5420239385439245, "grad_norm": 0.16977868974208832, "learning_rate": 0.002, "loss": 2.5543, "step": 272070 }, { "epoch": 0.5420438607675634, "grad_norm": 0.14942841231822968, "learning_rate": 0.002, "loss": 2.5647, "step": 272080 }, { "epoch": 0.5420637829912024, "grad_norm": 0.17254769802093506, "learning_rate": 0.002, "loss": 2.5499, "step": 272090 }, { "epoch": 0.5420837052148413, "grad_norm": 0.17777584493160248, "learning_rate": 0.002, "loss": 2.5419, "step": 272100 }, { "epoch": 0.5421036274384802, "grad_norm": 0.1439598649740219, "learning_rate": 0.002, "loss": 2.5583, "step": 272110 }, { "epoch": 0.5421235496621191, "grad_norm": 0.1686502993106842, "learning_rate": 0.002, "loss": 2.5542, "step": 272120 }, { "epoch": 0.542143471885758, "grad_norm": 0.16477914154529572, "learning_rate": 0.002, "loss": 2.5478, "step": 272130 }, { "epoch": 0.542163394109397, "grad_norm": 0.14745202660560608, "learning_rate": 0.002, "loss": 2.5632, "step": 272140 }, { "epoch": 0.5421833163330358, "grad_norm": 0.1436631679534912, "learning_rate": 0.002, "loss": 2.5476, "step": 272150 }, { "epoch": 0.5422032385566747, "grad_norm": 0.19075274467468262, "learning_rate": 0.002, "loss": 2.5542, "step": 272160 }, { "epoch": 0.5422231607803136, "grad_norm": 0.1575126051902771, "learning_rate": 0.002, "loss": 2.5597, "step": 272170 }, { "epoch": 0.5422430830039525, "grad_norm": 0.1833287626504898, "learning_rate": 0.002, "loss": 2.5491, "step": 272180 }, { "epoch": 0.5422630052275915, "grad_norm": 0.15496985614299774, "learning_rate": 0.002, "loss": 2.5467, "step": 272190 }, { "epoch": 0.5422829274512304, "grad_norm": 0.17607568204402924, "learning_rate": 0.002, "loss": 2.5636, "step": 272200 }, { "epoch": 0.5423028496748693, "grad_norm": 0.16865582764148712, "learning_rate": 0.002, "loss": 2.564, "step": 272210 }, { "epoch": 0.5423227718985082, "grad_norm": 0.15527713298797607, "learning_rate": 0.002, "loss": 2.5619, "step": 272220 }, { "epoch": 0.5423426941221471, "grad_norm": 0.16039817035198212, "learning_rate": 0.002, "loss": 2.5597, "step": 272230 }, { "epoch": 0.5423626163457861, "grad_norm": 0.1768910437822342, "learning_rate": 0.002, "loss": 2.5719, "step": 272240 }, { "epoch": 0.542382538569425, "grad_norm": 0.15759050846099854, "learning_rate": 0.002, "loss": 2.5605, "step": 272250 }, { "epoch": 0.5424024607930639, "grad_norm": 0.21291300654411316, "learning_rate": 0.002, "loss": 2.5546, "step": 272260 }, { "epoch": 0.5424223830167028, "grad_norm": 0.1491393893957138, "learning_rate": 0.002, "loss": 2.5736, "step": 272270 }, { "epoch": 0.5424423052403418, "grad_norm": 0.18576684594154358, "learning_rate": 0.002, "loss": 2.5539, "step": 272280 }, { "epoch": 0.5424622274639807, "grad_norm": 0.16468283534049988, "learning_rate": 0.002, "loss": 2.558, "step": 272290 }, { "epoch": 0.5424821496876195, "grad_norm": 0.1557256132364273, "learning_rate": 0.002, "loss": 2.5531, "step": 272300 }, { "epoch": 0.5425020719112584, "grad_norm": 0.16469533741474152, "learning_rate": 0.002, "loss": 2.5617, "step": 272310 }, { "epoch": 0.5425219941348973, "grad_norm": 0.16485126316547394, "learning_rate": 0.002, "loss": 2.5624, "step": 272320 }, { "epoch": 0.5425419163585363, "grad_norm": 0.1909523606300354, "learning_rate": 0.002, "loss": 2.5602, "step": 272330 }, { "epoch": 0.5425618385821752, "grad_norm": 0.148250013589859, "learning_rate": 0.002, "loss": 2.5629, "step": 272340 }, { "epoch": 0.5425817608058141, "grad_norm": 0.16643548011779785, "learning_rate": 0.002, "loss": 2.5628, "step": 272350 }, { "epoch": 0.542601683029453, "grad_norm": 0.1771182119846344, "learning_rate": 0.002, "loss": 2.5569, "step": 272360 }, { "epoch": 0.5426216052530919, "grad_norm": 0.17578533291816711, "learning_rate": 0.002, "loss": 2.5546, "step": 272370 }, { "epoch": 0.5426415274767309, "grad_norm": 0.14233455061912537, "learning_rate": 0.002, "loss": 2.5596, "step": 272380 }, { "epoch": 0.5426614497003698, "grad_norm": 0.19472046196460724, "learning_rate": 0.002, "loss": 2.5455, "step": 272390 }, { "epoch": 0.5426813719240087, "grad_norm": 0.17471662163734436, "learning_rate": 0.002, "loss": 2.5753, "step": 272400 }, { "epoch": 0.5427012941476476, "grad_norm": 0.16201674938201904, "learning_rate": 0.002, "loss": 2.5516, "step": 272410 }, { "epoch": 0.5427212163712865, "grad_norm": 0.18791347742080688, "learning_rate": 0.002, "loss": 2.5643, "step": 272420 }, { "epoch": 0.5427411385949255, "grad_norm": 0.1797616183757782, "learning_rate": 0.002, "loss": 2.5606, "step": 272430 }, { "epoch": 0.5427610608185643, "grad_norm": 0.1750594526529312, "learning_rate": 0.002, "loss": 2.553, "step": 272440 }, { "epoch": 0.5427809830422032, "grad_norm": 0.14791345596313477, "learning_rate": 0.002, "loss": 2.56, "step": 272450 }, { "epoch": 0.5428009052658421, "grad_norm": 0.1709238588809967, "learning_rate": 0.002, "loss": 2.5624, "step": 272460 }, { "epoch": 0.542820827489481, "grad_norm": 0.16038718819618225, "learning_rate": 0.002, "loss": 2.5599, "step": 272470 }, { "epoch": 0.54284074971312, "grad_norm": 0.20079439878463745, "learning_rate": 0.002, "loss": 2.5505, "step": 272480 }, { "epoch": 0.5428606719367589, "grad_norm": 0.17280162870883942, "learning_rate": 0.002, "loss": 2.5682, "step": 272490 }, { "epoch": 0.5428805941603978, "grad_norm": 0.15782974660396576, "learning_rate": 0.002, "loss": 2.5605, "step": 272500 }, { "epoch": 0.5429005163840367, "grad_norm": 0.16914047300815582, "learning_rate": 0.002, "loss": 2.5504, "step": 272510 }, { "epoch": 0.5429204386076756, "grad_norm": 0.14709188044071198, "learning_rate": 0.002, "loss": 2.5417, "step": 272520 }, { "epoch": 0.5429403608313146, "grad_norm": 0.1920991688966751, "learning_rate": 0.002, "loss": 2.5437, "step": 272530 }, { "epoch": 0.5429602830549535, "grad_norm": 0.20165258646011353, "learning_rate": 0.002, "loss": 2.5624, "step": 272540 }, { "epoch": 0.5429802052785924, "grad_norm": 0.17050957679748535, "learning_rate": 0.002, "loss": 2.5619, "step": 272550 }, { "epoch": 0.5430001275022313, "grad_norm": 0.21375557780265808, "learning_rate": 0.002, "loss": 2.5619, "step": 272560 }, { "epoch": 0.5430200497258703, "grad_norm": 0.16159535944461823, "learning_rate": 0.002, "loss": 2.544, "step": 272570 }, { "epoch": 0.5430399719495091, "grad_norm": 0.18843545019626617, "learning_rate": 0.002, "loss": 2.551, "step": 272580 }, { "epoch": 0.543059894173148, "grad_norm": 0.15515542030334473, "learning_rate": 0.002, "loss": 2.5724, "step": 272590 }, { "epoch": 0.5430798163967869, "grad_norm": 0.16902241110801697, "learning_rate": 0.002, "loss": 2.5555, "step": 272600 }, { "epoch": 0.5430997386204258, "grad_norm": 0.1708230972290039, "learning_rate": 0.002, "loss": 2.5694, "step": 272610 }, { "epoch": 0.5431196608440648, "grad_norm": 0.15839529037475586, "learning_rate": 0.002, "loss": 2.5523, "step": 272620 }, { "epoch": 0.5431395830677037, "grad_norm": 0.1743887960910797, "learning_rate": 0.002, "loss": 2.5603, "step": 272630 }, { "epoch": 0.5431595052913426, "grad_norm": 0.16218188405036926, "learning_rate": 0.002, "loss": 2.5531, "step": 272640 }, { "epoch": 0.5431794275149815, "grad_norm": 0.18047288060188293, "learning_rate": 0.002, "loss": 2.5654, "step": 272650 }, { "epoch": 0.5431993497386204, "grad_norm": 0.22698041796684265, "learning_rate": 0.002, "loss": 2.5684, "step": 272660 }, { "epoch": 0.5432192719622594, "grad_norm": 0.15222753584384918, "learning_rate": 0.002, "loss": 2.5742, "step": 272670 }, { "epoch": 0.5432391941858983, "grad_norm": 0.1825334131717682, "learning_rate": 0.002, "loss": 2.5657, "step": 272680 }, { "epoch": 0.5432591164095372, "grad_norm": 0.17897705733776093, "learning_rate": 0.002, "loss": 2.5487, "step": 272690 }, { "epoch": 0.5432790386331761, "grad_norm": 0.1719307154417038, "learning_rate": 0.002, "loss": 2.554, "step": 272700 }, { "epoch": 0.543298960856815, "grad_norm": 0.16508714854717255, "learning_rate": 0.002, "loss": 2.5754, "step": 272710 }, { "epoch": 0.543318883080454, "grad_norm": 0.15702597796916962, "learning_rate": 0.002, "loss": 2.5581, "step": 272720 }, { "epoch": 0.5433388053040928, "grad_norm": 0.15004082024097443, "learning_rate": 0.002, "loss": 2.5456, "step": 272730 }, { "epoch": 0.5433587275277317, "grad_norm": 0.1973438709974289, "learning_rate": 0.002, "loss": 2.5577, "step": 272740 }, { "epoch": 0.5433786497513706, "grad_norm": 0.15022853016853333, "learning_rate": 0.002, "loss": 2.5528, "step": 272750 }, { "epoch": 0.5433985719750095, "grad_norm": 0.16931432485580444, "learning_rate": 0.002, "loss": 2.5686, "step": 272760 }, { "epoch": 0.5434184941986485, "grad_norm": 0.18674606084823608, "learning_rate": 0.002, "loss": 2.559, "step": 272770 }, { "epoch": 0.5434384164222874, "grad_norm": 0.15276914834976196, "learning_rate": 0.002, "loss": 2.5565, "step": 272780 }, { "epoch": 0.5434583386459263, "grad_norm": 0.24104590713977814, "learning_rate": 0.002, "loss": 2.563, "step": 272790 }, { "epoch": 0.5434782608695652, "grad_norm": 0.1781337559223175, "learning_rate": 0.002, "loss": 2.5576, "step": 272800 }, { "epoch": 0.5434981830932041, "grad_norm": 0.1751495599746704, "learning_rate": 0.002, "loss": 2.5581, "step": 272810 }, { "epoch": 0.5435181053168431, "grad_norm": 0.14393696188926697, "learning_rate": 0.002, "loss": 2.5479, "step": 272820 }, { "epoch": 0.543538027540482, "grad_norm": 0.24361053109169006, "learning_rate": 0.002, "loss": 2.5473, "step": 272830 }, { "epoch": 0.5435579497641209, "grad_norm": 0.17261533439159393, "learning_rate": 0.002, "loss": 2.5562, "step": 272840 }, { "epoch": 0.5435778719877598, "grad_norm": 0.16793300211429596, "learning_rate": 0.002, "loss": 2.5329, "step": 272850 }, { "epoch": 0.5435977942113988, "grad_norm": 0.1635715514421463, "learning_rate": 0.002, "loss": 2.5527, "step": 272860 }, { "epoch": 0.5436177164350376, "grad_norm": 0.14187337458133698, "learning_rate": 0.002, "loss": 2.5574, "step": 272870 }, { "epoch": 0.5436376386586765, "grad_norm": 0.16312211751937866, "learning_rate": 0.002, "loss": 2.5589, "step": 272880 }, { "epoch": 0.5436575608823154, "grad_norm": 0.24743115901947021, "learning_rate": 0.002, "loss": 2.5596, "step": 272890 }, { "epoch": 0.5436774831059543, "grad_norm": 0.1547132134437561, "learning_rate": 0.002, "loss": 2.5541, "step": 272900 }, { "epoch": 0.5436974053295933, "grad_norm": 0.13860401511192322, "learning_rate": 0.002, "loss": 2.5601, "step": 272910 }, { "epoch": 0.5437173275532322, "grad_norm": 0.18881428241729736, "learning_rate": 0.002, "loss": 2.5524, "step": 272920 }, { "epoch": 0.5437372497768711, "grad_norm": 0.13414475321769714, "learning_rate": 0.002, "loss": 2.5502, "step": 272930 }, { "epoch": 0.54375717200051, "grad_norm": 0.16723401844501495, "learning_rate": 0.002, "loss": 2.5628, "step": 272940 }, { "epoch": 0.5437770942241489, "grad_norm": 0.20066305994987488, "learning_rate": 0.002, "loss": 2.5501, "step": 272950 }, { "epoch": 0.5437970164477879, "grad_norm": 0.16557730734348297, "learning_rate": 0.002, "loss": 2.5301, "step": 272960 }, { "epoch": 0.5438169386714268, "grad_norm": 0.1555039882659912, "learning_rate": 0.002, "loss": 2.5686, "step": 272970 }, { "epoch": 0.5438368608950657, "grad_norm": 0.15924324095249176, "learning_rate": 0.002, "loss": 2.5578, "step": 272980 }, { "epoch": 0.5438567831187046, "grad_norm": 0.22408847510814667, "learning_rate": 0.002, "loss": 2.5559, "step": 272990 }, { "epoch": 0.5438767053423434, "grad_norm": 0.1616072803735733, "learning_rate": 0.002, "loss": 2.5536, "step": 273000 }, { "epoch": 0.5438966275659824, "grad_norm": 0.14314070343971252, "learning_rate": 0.002, "loss": 2.559, "step": 273010 }, { "epoch": 0.5439165497896213, "grad_norm": 0.1728648990392685, "learning_rate": 0.002, "loss": 2.5623, "step": 273020 }, { "epoch": 0.5439364720132602, "grad_norm": 0.21885231137275696, "learning_rate": 0.002, "loss": 2.5761, "step": 273030 }, { "epoch": 0.5439563942368991, "grad_norm": 0.15567198395729065, "learning_rate": 0.002, "loss": 2.5488, "step": 273040 }, { "epoch": 0.543976316460538, "grad_norm": 0.14635951817035675, "learning_rate": 0.002, "loss": 2.5619, "step": 273050 }, { "epoch": 0.543996238684177, "grad_norm": 0.17204229533672333, "learning_rate": 0.002, "loss": 2.5656, "step": 273060 }, { "epoch": 0.5440161609078159, "grad_norm": 0.15279240906238556, "learning_rate": 0.002, "loss": 2.5682, "step": 273070 }, { "epoch": 0.5440360831314548, "grad_norm": 0.14910103380680084, "learning_rate": 0.002, "loss": 2.5819, "step": 273080 }, { "epoch": 0.5440560053550937, "grad_norm": 0.17793495953083038, "learning_rate": 0.002, "loss": 2.5709, "step": 273090 }, { "epoch": 0.5440759275787326, "grad_norm": 0.16983410716056824, "learning_rate": 0.002, "loss": 2.5634, "step": 273100 }, { "epoch": 0.5440958498023716, "grad_norm": 0.13857696950435638, "learning_rate": 0.002, "loss": 2.5496, "step": 273110 }, { "epoch": 0.5441157720260105, "grad_norm": 0.23287619650363922, "learning_rate": 0.002, "loss": 2.5311, "step": 273120 }, { "epoch": 0.5441356942496494, "grad_norm": 0.16016893088817596, "learning_rate": 0.002, "loss": 2.5546, "step": 273130 }, { "epoch": 0.5441556164732883, "grad_norm": 0.17207176983356476, "learning_rate": 0.002, "loss": 2.54, "step": 273140 }, { "epoch": 0.5441755386969271, "grad_norm": 0.15981033444404602, "learning_rate": 0.002, "loss": 2.5526, "step": 273150 }, { "epoch": 0.5441954609205661, "grad_norm": 0.18098177015781403, "learning_rate": 0.002, "loss": 2.555, "step": 273160 }, { "epoch": 0.544215383144205, "grad_norm": 0.1628863513469696, "learning_rate": 0.002, "loss": 2.5687, "step": 273170 }, { "epoch": 0.5442353053678439, "grad_norm": 0.17048808932304382, "learning_rate": 0.002, "loss": 2.5611, "step": 273180 }, { "epoch": 0.5442552275914828, "grad_norm": 0.1484728455543518, "learning_rate": 0.002, "loss": 2.5486, "step": 273190 }, { "epoch": 0.5442751498151218, "grad_norm": 0.17831502854824066, "learning_rate": 0.002, "loss": 2.5629, "step": 273200 }, { "epoch": 0.5442950720387607, "grad_norm": 0.17905983328819275, "learning_rate": 0.002, "loss": 2.5591, "step": 273210 }, { "epoch": 0.5443149942623996, "grad_norm": 0.15161527693271637, "learning_rate": 0.002, "loss": 2.5761, "step": 273220 }, { "epoch": 0.5443349164860385, "grad_norm": 0.19842617213726044, "learning_rate": 0.002, "loss": 2.5542, "step": 273230 }, { "epoch": 0.5443548387096774, "grad_norm": 0.1770636886358261, "learning_rate": 0.002, "loss": 2.542, "step": 273240 }, { "epoch": 0.5443747609333164, "grad_norm": 0.13439911603927612, "learning_rate": 0.002, "loss": 2.5697, "step": 273250 }, { "epoch": 0.5443946831569553, "grad_norm": 0.17646870017051697, "learning_rate": 0.002, "loss": 2.5506, "step": 273260 }, { "epoch": 0.5444146053805942, "grad_norm": 0.14383259415626526, "learning_rate": 0.002, "loss": 2.5535, "step": 273270 }, { "epoch": 0.544434527604233, "grad_norm": 0.18190698325634003, "learning_rate": 0.002, "loss": 2.5607, "step": 273280 }, { "epoch": 0.544454449827872, "grad_norm": 0.14769889414310455, "learning_rate": 0.002, "loss": 2.5549, "step": 273290 }, { "epoch": 0.544474372051511, "grad_norm": 0.1629158854484558, "learning_rate": 0.002, "loss": 2.5495, "step": 273300 }, { "epoch": 0.5444942942751498, "grad_norm": 0.19864825904369354, "learning_rate": 0.002, "loss": 2.5604, "step": 273310 }, { "epoch": 0.5445142164987887, "grad_norm": 0.17614053189754486, "learning_rate": 0.002, "loss": 2.5481, "step": 273320 }, { "epoch": 0.5445341387224276, "grad_norm": 0.14956113696098328, "learning_rate": 0.002, "loss": 2.562, "step": 273330 }, { "epoch": 0.5445540609460665, "grad_norm": 0.164293572306633, "learning_rate": 0.002, "loss": 2.5615, "step": 273340 }, { "epoch": 0.5445739831697055, "grad_norm": 0.16409744322299957, "learning_rate": 0.002, "loss": 2.5745, "step": 273350 }, { "epoch": 0.5445939053933444, "grad_norm": 0.15505895018577576, "learning_rate": 0.002, "loss": 2.5525, "step": 273360 }, { "epoch": 0.5446138276169833, "grad_norm": 0.1713477373123169, "learning_rate": 0.002, "loss": 2.5489, "step": 273370 }, { "epoch": 0.5446337498406222, "grad_norm": 0.18612582981586456, "learning_rate": 0.002, "loss": 2.5576, "step": 273380 }, { "epoch": 0.5446536720642611, "grad_norm": 0.16845501959323883, "learning_rate": 0.002, "loss": 2.5727, "step": 273390 }, { "epoch": 0.5446735942879001, "grad_norm": 0.18143069744110107, "learning_rate": 0.002, "loss": 2.555, "step": 273400 }, { "epoch": 0.544693516511539, "grad_norm": 0.16858787834644318, "learning_rate": 0.002, "loss": 2.5621, "step": 273410 }, { "epoch": 0.5447134387351779, "grad_norm": 0.18842813372612, "learning_rate": 0.002, "loss": 2.5578, "step": 273420 }, { "epoch": 0.5447333609588167, "grad_norm": 0.16457128524780273, "learning_rate": 0.002, "loss": 2.555, "step": 273430 }, { "epoch": 0.5447532831824556, "grad_norm": 0.2244991809129715, "learning_rate": 0.002, "loss": 2.5527, "step": 273440 }, { "epoch": 0.5447732054060946, "grad_norm": 0.17426520586013794, "learning_rate": 0.002, "loss": 2.5503, "step": 273450 }, { "epoch": 0.5447931276297335, "grad_norm": 0.15271782875061035, "learning_rate": 0.002, "loss": 2.5611, "step": 273460 }, { "epoch": 0.5448130498533724, "grad_norm": 0.19307547807693481, "learning_rate": 0.002, "loss": 2.5585, "step": 273470 }, { "epoch": 0.5448329720770113, "grad_norm": 0.15209491550922394, "learning_rate": 0.002, "loss": 2.5421, "step": 273480 }, { "epoch": 0.5448528943006503, "grad_norm": 0.16115351021289825, "learning_rate": 0.002, "loss": 2.5701, "step": 273490 }, { "epoch": 0.5448728165242892, "grad_norm": 0.15173465013504028, "learning_rate": 0.002, "loss": 2.5527, "step": 273500 }, { "epoch": 0.5448927387479281, "grad_norm": 0.16650258004665375, "learning_rate": 0.002, "loss": 2.5565, "step": 273510 }, { "epoch": 0.544912660971567, "grad_norm": 0.1671595722436905, "learning_rate": 0.002, "loss": 2.5589, "step": 273520 }, { "epoch": 0.5449325831952059, "grad_norm": 0.17064660787582397, "learning_rate": 0.002, "loss": 2.5495, "step": 273530 }, { "epoch": 0.5449525054188449, "grad_norm": 0.1649128496646881, "learning_rate": 0.002, "loss": 2.5485, "step": 273540 }, { "epoch": 0.5449724276424838, "grad_norm": 0.17425087094306946, "learning_rate": 0.002, "loss": 2.5458, "step": 273550 }, { "epoch": 0.5449923498661227, "grad_norm": 0.18273955583572388, "learning_rate": 0.002, "loss": 2.5518, "step": 273560 }, { "epoch": 0.5450122720897616, "grad_norm": 0.14840492606163025, "learning_rate": 0.002, "loss": 2.5414, "step": 273570 }, { "epoch": 0.5450321943134004, "grad_norm": 0.16380853950977325, "learning_rate": 0.002, "loss": 2.5458, "step": 273580 }, { "epoch": 0.5450521165370394, "grad_norm": 0.1513754278421402, "learning_rate": 0.002, "loss": 2.5586, "step": 273590 }, { "epoch": 0.5450720387606783, "grad_norm": 0.16638652980327606, "learning_rate": 0.002, "loss": 2.5738, "step": 273600 }, { "epoch": 0.5450919609843172, "grad_norm": 0.20785480737686157, "learning_rate": 0.002, "loss": 2.5547, "step": 273610 }, { "epoch": 0.5451118832079561, "grad_norm": 0.18689754605293274, "learning_rate": 0.002, "loss": 2.543, "step": 273620 }, { "epoch": 0.545131805431595, "grad_norm": 0.144586443901062, "learning_rate": 0.002, "loss": 2.5653, "step": 273630 }, { "epoch": 0.545151727655234, "grad_norm": 0.1701623499393463, "learning_rate": 0.002, "loss": 2.5456, "step": 273640 }, { "epoch": 0.5451716498788729, "grad_norm": 0.17313018441200256, "learning_rate": 0.002, "loss": 2.5559, "step": 273650 }, { "epoch": 0.5451915721025118, "grad_norm": 0.17185041308403015, "learning_rate": 0.002, "loss": 2.5537, "step": 273660 }, { "epoch": 0.5452114943261507, "grad_norm": 0.1514991670846939, "learning_rate": 0.002, "loss": 2.5691, "step": 273670 }, { "epoch": 0.5452314165497896, "grad_norm": 0.1446840614080429, "learning_rate": 0.002, "loss": 2.5443, "step": 273680 }, { "epoch": 0.5452513387734286, "grad_norm": 0.18000291287899017, "learning_rate": 0.002, "loss": 2.5595, "step": 273690 }, { "epoch": 0.5452712609970675, "grad_norm": 0.19545917212963104, "learning_rate": 0.002, "loss": 2.5653, "step": 273700 }, { "epoch": 0.5452911832207064, "grad_norm": 0.15798336267471313, "learning_rate": 0.002, "loss": 2.5674, "step": 273710 }, { "epoch": 0.5453111054443452, "grad_norm": 0.18551959097385406, "learning_rate": 0.002, "loss": 2.5454, "step": 273720 }, { "epoch": 0.5453310276679841, "grad_norm": 0.18560710549354553, "learning_rate": 0.002, "loss": 2.5696, "step": 273730 }, { "epoch": 0.5453509498916231, "grad_norm": 0.15969792008399963, "learning_rate": 0.002, "loss": 2.555, "step": 273740 }, { "epoch": 0.545370872115262, "grad_norm": 0.21361784636974335, "learning_rate": 0.002, "loss": 2.5648, "step": 273750 }, { "epoch": 0.5453907943389009, "grad_norm": 0.18921686708927155, "learning_rate": 0.002, "loss": 2.559, "step": 273760 }, { "epoch": 0.5454107165625398, "grad_norm": 0.1537877917289734, "learning_rate": 0.002, "loss": 2.5539, "step": 273770 }, { "epoch": 0.5454306387861788, "grad_norm": 0.2093820869922638, "learning_rate": 0.002, "loss": 2.5611, "step": 273780 }, { "epoch": 0.5454505610098177, "grad_norm": 0.1711953580379486, "learning_rate": 0.002, "loss": 2.5691, "step": 273790 }, { "epoch": 0.5454704832334566, "grad_norm": 0.16282302141189575, "learning_rate": 0.002, "loss": 2.556, "step": 273800 }, { "epoch": 0.5454904054570955, "grad_norm": 0.17996960878372192, "learning_rate": 0.002, "loss": 2.5535, "step": 273810 }, { "epoch": 0.5455103276807344, "grad_norm": 0.15738779306411743, "learning_rate": 0.002, "loss": 2.5594, "step": 273820 }, { "epoch": 0.5455302499043734, "grad_norm": 0.14947962760925293, "learning_rate": 0.002, "loss": 2.5683, "step": 273830 }, { "epoch": 0.5455501721280123, "grad_norm": 0.17503149807453156, "learning_rate": 0.002, "loss": 2.5475, "step": 273840 }, { "epoch": 0.5455700943516512, "grad_norm": 0.14804436266422272, "learning_rate": 0.002, "loss": 2.5406, "step": 273850 }, { "epoch": 0.54559001657529, "grad_norm": 0.17620065808296204, "learning_rate": 0.002, "loss": 2.5435, "step": 273860 }, { "epoch": 0.5456099387989289, "grad_norm": 0.17152439057826996, "learning_rate": 0.002, "loss": 2.5518, "step": 273870 }, { "epoch": 0.5456298610225679, "grad_norm": 0.16534747183322906, "learning_rate": 0.002, "loss": 2.5634, "step": 273880 }, { "epoch": 0.5456497832462068, "grad_norm": 0.1777593344449997, "learning_rate": 0.002, "loss": 2.5677, "step": 273890 }, { "epoch": 0.5456697054698457, "grad_norm": 0.17798158526420593, "learning_rate": 0.002, "loss": 2.5553, "step": 273900 }, { "epoch": 0.5456896276934846, "grad_norm": 0.1438126415014267, "learning_rate": 0.002, "loss": 2.568, "step": 273910 }, { "epoch": 0.5457095499171235, "grad_norm": 0.1570010781288147, "learning_rate": 0.002, "loss": 2.5504, "step": 273920 }, { "epoch": 0.5457294721407625, "grad_norm": 0.17975488305091858, "learning_rate": 0.002, "loss": 2.5554, "step": 273930 }, { "epoch": 0.5457493943644014, "grad_norm": 0.1467185616493225, "learning_rate": 0.002, "loss": 2.5431, "step": 273940 }, { "epoch": 0.5457693165880403, "grad_norm": 0.16801181435585022, "learning_rate": 0.002, "loss": 2.5399, "step": 273950 }, { "epoch": 0.5457892388116792, "grad_norm": 0.1573498249053955, "learning_rate": 0.002, "loss": 2.5608, "step": 273960 }, { "epoch": 0.5458091610353181, "grad_norm": 0.21458250284194946, "learning_rate": 0.002, "loss": 2.5597, "step": 273970 }, { "epoch": 0.5458290832589571, "grad_norm": 0.17420773208141327, "learning_rate": 0.002, "loss": 2.5533, "step": 273980 }, { "epoch": 0.545849005482596, "grad_norm": 0.1552455872297287, "learning_rate": 0.002, "loss": 2.5538, "step": 273990 }, { "epoch": 0.5458689277062349, "grad_norm": 0.1872003972530365, "learning_rate": 0.002, "loss": 2.5711, "step": 274000 }, { "epoch": 0.5458888499298737, "grad_norm": 0.16964775323867798, "learning_rate": 0.002, "loss": 2.5666, "step": 274010 }, { "epoch": 0.5459087721535126, "grad_norm": 0.17438949644565582, "learning_rate": 0.002, "loss": 2.5686, "step": 274020 }, { "epoch": 0.5459286943771516, "grad_norm": 0.1505051702260971, "learning_rate": 0.002, "loss": 2.552, "step": 274030 }, { "epoch": 0.5459486166007905, "grad_norm": 0.16173763573169708, "learning_rate": 0.002, "loss": 2.5602, "step": 274040 }, { "epoch": 0.5459685388244294, "grad_norm": 0.23693567514419556, "learning_rate": 0.002, "loss": 2.5567, "step": 274050 }, { "epoch": 0.5459884610480683, "grad_norm": 0.165552020072937, "learning_rate": 0.002, "loss": 2.5682, "step": 274060 }, { "epoch": 0.5460083832717073, "grad_norm": 0.49702900648117065, "learning_rate": 0.002, "loss": 2.567, "step": 274070 }, { "epoch": 0.5460283054953462, "grad_norm": 0.16551828384399414, "learning_rate": 0.002, "loss": 2.5635, "step": 274080 }, { "epoch": 0.5460482277189851, "grad_norm": 0.17003969848155975, "learning_rate": 0.002, "loss": 2.5708, "step": 274090 }, { "epoch": 0.546068149942624, "grad_norm": 0.15785031020641327, "learning_rate": 0.002, "loss": 2.5434, "step": 274100 }, { "epoch": 0.5460880721662629, "grad_norm": 0.15390875935554504, "learning_rate": 0.002, "loss": 2.567, "step": 274110 }, { "epoch": 0.5461079943899019, "grad_norm": 0.23626293241977692, "learning_rate": 0.002, "loss": 2.56, "step": 274120 }, { "epoch": 0.5461279166135408, "grad_norm": 0.1676849126815796, "learning_rate": 0.002, "loss": 2.5564, "step": 274130 }, { "epoch": 0.5461478388371797, "grad_norm": 0.17179742455482483, "learning_rate": 0.002, "loss": 2.5486, "step": 274140 }, { "epoch": 0.5461677610608185, "grad_norm": 0.14659249782562256, "learning_rate": 0.002, "loss": 2.5561, "step": 274150 }, { "epoch": 0.5461876832844574, "grad_norm": 0.16969327628612518, "learning_rate": 0.002, "loss": 2.5544, "step": 274160 }, { "epoch": 0.5462076055080964, "grad_norm": 0.1542210578918457, "learning_rate": 0.002, "loss": 2.5704, "step": 274170 }, { "epoch": 0.5462275277317353, "grad_norm": 0.1872127503156662, "learning_rate": 0.002, "loss": 2.5509, "step": 274180 }, { "epoch": 0.5462474499553742, "grad_norm": 0.14714576303958893, "learning_rate": 0.002, "loss": 2.5712, "step": 274190 }, { "epoch": 0.5462673721790131, "grad_norm": 0.15744461119174957, "learning_rate": 0.002, "loss": 2.564, "step": 274200 }, { "epoch": 0.546287294402652, "grad_norm": 0.1759096384048462, "learning_rate": 0.002, "loss": 2.5486, "step": 274210 }, { "epoch": 0.546307216626291, "grad_norm": 0.14721865952014923, "learning_rate": 0.002, "loss": 2.5491, "step": 274220 }, { "epoch": 0.5463271388499299, "grad_norm": 0.18981604278087616, "learning_rate": 0.002, "loss": 2.5497, "step": 274230 }, { "epoch": 0.5463470610735688, "grad_norm": 0.15241752564907074, "learning_rate": 0.002, "loss": 2.5608, "step": 274240 }, { "epoch": 0.5463669832972077, "grad_norm": 0.16975806653499603, "learning_rate": 0.002, "loss": 2.5544, "step": 274250 }, { "epoch": 0.5463869055208466, "grad_norm": 0.1472437083721161, "learning_rate": 0.002, "loss": 2.5641, "step": 274260 }, { "epoch": 0.5464068277444856, "grad_norm": 0.20278646051883698, "learning_rate": 0.002, "loss": 2.5483, "step": 274270 }, { "epoch": 0.5464267499681245, "grad_norm": 0.1599499136209488, "learning_rate": 0.002, "loss": 2.5543, "step": 274280 }, { "epoch": 0.5464466721917633, "grad_norm": 0.14932464063167572, "learning_rate": 0.002, "loss": 2.5448, "step": 274290 }, { "epoch": 0.5464665944154022, "grad_norm": 0.20610612630844116, "learning_rate": 0.002, "loss": 2.5463, "step": 274300 }, { "epoch": 0.5464865166390411, "grad_norm": 0.19189214706420898, "learning_rate": 0.002, "loss": 2.5449, "step": 274310 }, { "epoch": 0.5465064388626801, "grad_norm": 0.18525364995002747, "learning_rate": 0.002, "loss": 2.5701, "step": 274320 }, { "epoch": 0.546526361086319, "grad_norm": 0.1674482524394989, "learning_rate": 0.002, "loss": 2.5639, "step": 274330 }, { "epoch": 0.5465462833099579, "grad_norm": 0.1395389586687088, "learning_rate": 0.002, "loss": 2.552, "step": 274340 }, { "epoch": 0.5465662055335968, "grad_norm": 0.18362745642662048, "learning_rate": 0.002, "loss": 2.5519, "step": 274350 }, { "epoch": 0.5465861277572358, "grad_norm": 0.1484677493572235, "learning_rate": 0.002, "loss": 2.5459, "step": 274360 }, { "epoch": 0.5466060499808747, "grad_norm": 0.15355323255062103, "learning_rate": 0.002, "loss": 2.557, "step": 274370 }, { "epoch": 0.5466259722045136, "grad_norm": 0.17735053598880768, "learning_rate": 0.002, "loss": 2.5527, "step": 274380 }, { "epoch": 0.5466458944281525, "grad_norm": 0.15123163163661957, "learning_rate": 0.002, "loss": 2.5582, "step": 274390 }, { "epoch": 0.5466658166517914, "grad_norm": 0.17720216512680054, "learning_rate": 0.002, "loss": 2.5653, "step": 274400 }, { "epoch": 0.5466857388754304, "grad_norm": 0.20220421254634857, "learning_rate": 0.002, "loss": 2.5579, "step": 274410 }, { "epoch": 0.5467056610990693, "grad_norm": 0.19479350745677948, "learning_rate": 0.002, "loss": 2.5375, "step": 274420 }, { "epoch": 0.5467255833227082, "grad_norm": 0.1530885100364685, "learning_rate": 0.002, "loss": 2.5624, "step": 274430 }, { "epoch": 0.546745505546347, "grad_norm": 0.16953279078006744, "learning_rate": 0.002, "loss": 2.555, "step": 274440 }, { "epoch": 0.5467654277699859, "grad_norm": 0.2018173784017563, "learning_rate": 0.002, "loss": 2.5594, "step": 274450 }, { "epoch": 0.5467853499936249, "grad_norm": 0.18084190785884857, "learning_rate": 0.002, "loss": 2.5522, "step": 274460 }, { "epoch": 0.5468052722172638, "grad_norm": 0.15392956137657166, "learning_rate": 0.002, "loss": 2.5559, "step": 274470 }, { "epoch": 0.5468251944409027, "grad_norm": 0.1394825130701065, "learning_rate": 0.002, "loss": 2.5439, "step": 274480 }, { "epoch": 0.5468451166645416, "grad_norm": 0.1641348898410797, "learning_rate": 0.002, "loss": 2.5633, "step": 274490 }, { "epoch": 0.5468650388881805, "grad_norm": 0.16629581153392792, "learning_rate": 0.002, "loss": 2.5573, "step": 274500 }, { "epoch": 0.5468849611118195, "grad_norm": 0.1506706327199936, "learning_rate": 0.002, "loss": 2.5601, "step": 274510 }, { "epoch": 0.5469048833354584, "grad_norm": 0.16158097982406616, "learning_rate": 0.002, "loss": 2.5465, "step": 274520 }, { "epoch": 0.5469248055590973, "grad_norm": 0.1864538937807083, "learning_rate": 0.002, "loss": 2.5702, "step": 274530 }, { "epoch": 0.5469447277827362, "grad_norm": 0.16004639863967896, "learning_rate": 0.002, "loss": 2.5616, "step": 274540 }, { "epoch": 0.5469646500063751, "grad_norm": 0.18133558332920074, "learning_rate": 0.002, "loss": 2.5531, "step": 274550 }, { "epoch": 0.5469845722300141, "grad_norm": 0.13856224715709686, "learning_rate": 0.002, "loss": 2.5648, "step": 274560 }, { "epoch": 0.547004494453653, "grad_norm": 0.1502457559108734, "learning_rate": 0.002, "loss": 2.5544, "step": 274570 }, { "epoch": 0.5470244166772918, "grad_norm": 0.17146863043308258, "learning_rate": 0.002, "loss": 2.5759, "step": 274580 }, { "epoch": 0.5470443389009307, "grad_norm": 0.16890059411525726, "learning_rate": 0.002, "loss": 2.5535, "step": 274590 }, { "epoch": 0.5470642611245696, "grad_norm": 0.17213907837867737, "learning_rate": 0.002, "loss": 2.5682, "step": 274600 }, { "epoch": 0.5470841833482086, "grad_norm": 0.19671571254730225, "learning_rate": 0.002, "loss": 2.5392, "step": 274610 }, { "epoch": 0.5471041055718475, "grad_norm": 0.18249908089637756, "learning_rate": 0.002, "loss": 2.5719, "step": 274620 }, { "epoch": 0.5471240277954864, "grad_norm": 0.18256966769695282, "learning_rate": 0.002, "loss": 2.56, "step": 274630 }, { "epoch": 0.5471439500191253, "grad_norm": 0.15690962970256805, "learning_rate": 0.002, "loss": 2.56, "step": 274640 }, { "epoch": 0.5471638722427642, "grad_norm": 0.16888263821601868, "learning_rate": 0.002, "loss": 2.5464, "step": 274650 }, { "epoch": 0.5471837944664032, "grad_norm": 0.1646553874015808, "learning_rate": 0.002, "loss": 2.5438, "step": 274660 }, { "epoch": 0.5472037166900421, "grad_norm": 0.1451653093099594, "learning_rate": 0.002, "loss": 2.5552, "step": 274670 }, { "epoch": 0.547223638913681, "grad_norm": 0.1514287143945694, "learning_rate": 0.002, "loss": 2.5426, "step": 274680 }, { "epoch": 0.5472435611373199, "grad_norm": 0.14501690864562988, "learning_rate": 0.002, "loss": 2.5608, "step": 274690 }, { "epoch": 0.5472634833609589, "grad_norm": 0.193023681640625, "learning_rate": 0.002, "loss": 2.5472, "step": 274700 }, { "epoch": 0.5472834055845978, "grad_norm": 0.16566486656665802, "learning_rate": 0.002, "loss": 2.5718, "step": 274710 }, { "epoch": 0.5473033278082367, "grad_norm": 0.18731701374053955, "learning_rate": 0.002, "loss": 2.5375, "step": 274720 }, { "epoch": 0.5473232500318755, "grad_norm": 0.20334097743034363, "learning_rate": 0.002, "loss": 2.5496, "step": 274730 }, { "epoch": 0.5473431722555144, "grad_norm": 0.16621564328670502, "learning_rate": 0.002, "loss": 2.5626, "step": 274740 }, { "epoch": 0.5473630944791534, "grad_norm": 0.1575613021850586, "learning_rate": 0.002, "loss": 2.5623, "step": 274750 }, { "epoch": 0.5473830167027923, "grad_norm": 0.18889784812927246, "learning_rate": 0.002, "loss": 2.5648, "step": 274760 }, { "epoch": 0.5474029389264312, "grad_norm": 0.17327281832695007, "learning_rate": 0.002, "loss": 2.5618, "step": 274770 }, { "epoch": 0.5474228611500701, "grad_norm": 0.14931003749370575, "learning_rate": 0.002, "loss": 2.5473, "step": 274780 }, { "epoch": 0.547442783373709, "grad_norm": 0.17616531252861023, "learning_rate": 0.002, "loss": 2.5468, "step": 274790 }, { "epoch": 0.547462705597348, "grad_norm": 0.18160390853881836, "learning_rate": 0.002, "loss": 2.5479, "step": 274800 }, { "epoch": 0.5474826278209869, "grad_norm": 0.17901015281677246, "learning_rate": 0.002, "loss": 2.562, "step": 274810 }, { "epoch": 0.5475025500446258, "grad_norm": 0.15345969796180725, "learning_rate": 0.002, "loss": 2.5453, "step": 274820 }, { "epoch": 0.5475224722682647, "grad_norm": 0.22566163539886475, "learning_rate": 0.002, "loss": 2.5538, "step": 274830 }, { "epoch": 0.5475423944919036, "grad_norm": 0.1556018441915512, "learning_rate": 0.002, "loss": 2.5534, "step": 274840 }, { "epoch": 0.5475623167155426, "grad_norm": 0.16477185487747192, "learning_rate": 0.002, "loss": 2.5605, "step": 274850 }, { "epoch": 0.5475822389391815, "grad_norm": 0.1659696251153946, "learning_rate": 0.002, "loss": 2.5383, "step": 274860 }, { "epoch": 0.5476021611628203, "grad_norm": 0.1471935212612152, "learning_rate": 0.002, "loss": 2.559, "step": 274870 }, { "epoch": 0.5476220833864592, "grad_norm": 0.17255066335201263, "learning_rate": 0.002, "loss": 2.5666, "step": 274880 }, { "epoch": 0.5476420056100981, "grad_norm": 0.16378062963485718, "learning_rate": 0.002, "loss": 2.5495, "step": 274890 }, { "epoch": 0.5476619278337371, "grad_norm": 0.1770256757736206, "learning_rate": 0.002, "loss": 2.5659, "step": 274900 }, { "epoch": 0.547681850057376, "grad_norm": 0.25220420956611633, "learning_rate": 0.002, "loss": 2.5542, "step": 274910 }, { "epoch": 0.5477017722810149, "grad_norm": 0.1856103539466858, "learning_rate": 0.002, "loss": 2.5531, "step": 274920 }, { "epoch": 0.5477216945046538, "grad_norm": 0.15755382180213928, "learning_rate": 0.002, "loss": 2.5478, "step": 274930 }, { "epoch": 0.5477416167282927, "grad_norm": 0.20541584491729736, "learning_rate": 0.002, "loss": 2.5587, "step": 274940 }, { "epoch": 0.5477615389519317, "grad_norm": 0.1640186756849289, "learning_rate": 0.002, "loss": 2.5423, "step": 274950 }, { "epoch": 0.5477814611755706, "grad_norm": 0.18545347452163696, "learning_rate": 0.002, "loss": 2.5444, "step": 274960 }, { "epoch": 0.5478013833992095, "grad_norm": 0.1614142209291458, "learning_rate": 0.002, "loss": 2.5607, "step": 274970 }, { "epoch": 0.5478213056228484, "grad_norm": 0.14252109825611115, "learning_rate": 0.002, "loss": 2.5508, "step": 274980 }, { "epoch": 0.5478412278464874, "grad_norm": 0.1657632738351822, "learning_rate": 0.002, "loss": 2.5545, "step": 274990 }, { "epoch": 0.5478611500701263, "grad_norm": 0.2867026925086975, "learning_rate": 0.002, "loss": 2.5551, "step": 275000 }, { "epoch": 0.5478810722937651, "grad_norm": 0.146869957447052, "learning_rate": 0.002, "loss": 2.5642, "step": 275010 }, { "epoch": 0.547900994517404, "grad_norm": 0.18410030007362366, "learning_rate": 0.002, "loss": 2.5563, "step": 275020 }, { "epoch": 0.5479209167410429, "grad_norm": 0.15091419219970703, "learning_rate": 0.002, "loss": 2.553, "step": 275030 }, { "epoch": 0.5479408389646819, "grad_norm": 0.17524978518486023, "learning_rate": 0.002, "loss": 2.5596, "step": 275040 }, { "epoch": 0.5479607611883208, "grad_norm": 0.16447708010673523, "learning_rate": 0.002, "loss": 2.5487, "step": 275050 }, { "epoch": 0.5479806834119597, "grad_norm": 0.16504605114459991, "learning_rate": 0.002, "loss": 2.5482, "step": 275060 }, { "epoch": 0.5480006056355986, "grad_norm": 0.16382662951946259, "learning_rate": 0.002, "loss": 2.5531, "step": 275070 }, { "epoch": 0.5480205278592375, "grad_norm": 0.18416531383991241, "learning_rate": 0.002, "loss": 2.5435, "step": 275080 }, { "epoch": 0.5480404500828765, "grad_norm": 0.19415104389190674, "learning_rate": 0.002, "loss": 2.5595, "step": 275090 }, { "epoch": 0.5480603723065154, "grad_norm": 0.16181237995624542, "learning_rate": 0.002, "loss": 2.5513, "step": 275100 }, { "epoch": 0.5480802945301543, "grad_norm": 0.1603105068206787, "learning_rate": 0.002, "loss": 2.5506, "step": 275110 }, { "epoch": 0.5481002167537932, "grad_norm": 0.18141399323940277, "learning_rate": 0.002, "loss": 2.5538, "step": 275120 }, { "epoch": 0.5481201389774321, "grad_norm": 0.14171244204044342, "learning_rate": 0.002, "loss": 2.5615, "step": 275130 }, { "epoch": 0.5481400612010711, "grad_norm": 0.15427906811237335, "learning_rate": 0.002, "loss": 2.5695, "step": 275140 }, { "epoch": 0.54815998342471, "grad_norm": 0.16593419015407562, "learning_rate": 0.002, "loss": 2.564, "step": 275150 }, { "epoch": 0.5481799056483488, "grad_norm": 0.15732090175151825, "learning_rate": 0.002, "loss": 2.5643, "step": 275160 }, { "epoch": 0.5481998278719877, "grad_norm": 0.21540524065494537, "learning_rate": 0.002, "loss": 2.5503, "step": 275170 }, { "epoch": 0.5482197500956266, "grad_norm": 0.18393683433532715, "learning_rate": 0.002, "loss": 2.5575, "step": 275180 }, { "epoch": 0.5482396723192656, "grad_norm": 0.13673105835914612, "learning_rate": 0.002, "loss": 2.5419, "step": 275190 }, { "epoch": 0.5482595945429045, "grad_norm": 0.21240226924419403, "learning_rate": 0.002, "loss": 2.5461, "step": 275200 }, { "epoch": 0.5482795167665434, "grad_norm": 0.15715432167053223, "learning_rate": 0.002, "loss": 2.552, "step": 275210 }, { "epoch": 0.5482994389901823, "grad_norm": 0.16742408275604248, "learning_rate": 0.002, "loss": 2.5809, "step": 275220 }, { "epoch": 0.5483193612138212, "grad_norm": 0.1575632095336914, "learning_rate": 0.002, "loss": 2.5622, "step": 275230 }, { "epoch": 0.5483392834374602, "grad_norm": 0.12994135916233063, "learning_rate": 0.002, "loss": 2.557, "step": 275240 }, { "epoch": 0.5483592056610991, "grad_norm": 0.20526959002017975, "learning_rate": 0.002, "loss": 2.5544, "step": 275250 }, { "epoch": 0.548379127884738, "grad_norm": 0.15373383462429047, "learning_rate": 0.002, "loss": 2.5521, "step": 275260 }, { "epoch": 0.5483990501083769, "grad_norm": 0.22787702083587646, "learning_rate": 0.002, "loss": 2.5653, "step": 275270 }, { "epoch": 0.5484189723320159, "grad_norm": 0.15657661855220795, "learning_rate": 0.002, "loss": 2.5456, "step": 275280 }, { "epoch": 0.5484388945556548, "grad_norm": 0.17231261730194092, "learning_rate": 0.002, "loss": 2.5509, "step": 275290 }, { "epoch": 0.5484588167792936, "grad_norm": 0.15960192680358887, "learning_rate": 0.002, "loss": 2.5444, "step": 275300 }, { "epoch": 0.5484787390029325, "grad_norm": 0.19001302123069763, "learning_rate": 0.002, "loss": 2.5628, "step": 275310 }, { "epoch": 0.5484986612265714, "grad_norm": 0.1461631953716278, "learning_rate": 0.002, "loss": 2.5722, "step": 275320 }, { "epoch": 0.5485185834502104, "grad_norm": 0.157191202044487, "learning_rate": 0.002, "loss": 2.5689, "step": 275330 }, { "epoch": 0.5485385056738493, "grad_norm": 0.1758105307817459, "learning_rate": 0.002, "loss": 2.5469, "step": 275340 }, { "epoch": 0.5485584278974882, "grad_norm": 0.19726116955280304, "learning_rate": 0.002, "loss": 2.5558, "step": 275350 }, { "epoch": 0.5485783501211271, "grad_norm": 0.16063068807125092, "learning_rate": 0.002, "loss": 2.5487, "step": 275360 }, { "epoch": 0.548598272344766, "grad_norm": 0.1445484459400177, "learning_rate": 0.002, "loss": 2.5437, "step": 275370 }, { "epoch": 0.548618194568405, "grad_norm": 0.18240834772586823, "learning_rate": 0.002, "loss": 2.5545, "step": 275380 }, { "epoch": 0.5486381167920439, "grad_norm": 0.16361136734485626, "learning_rate": 0.002, "loss": 2.5676, "step": 275390 }, { "epoch": 0.5486580390156828, "grad_norm": 0.17219023406505585, "learning_rate": 0.002, "loss": 2.5502, "step": 275400 }, { "epoch": 0.5486779612393217, "grad_norm": 0.1837363839149475, "learning_rate": 0.002, "loss": 2.5632, "step": 275410 }, { "epoch": 0.5486978834629606, "grad_norm": 0.15918205678462982, "learning_rate": 0.002, "loss": 2.5651, "step": 275420 }, { "epoch": 0.5487178056865996, "grad_norm": 0.18382206559181213, "learning_rate": 0.002, "loss": 2.5578, "step": 275430 }, { "epoch": 0.5487377279102384, "grad_norm": 0.16942764818668365, "learning_rate": 0.002, "loss": 2.5512, "step": 275440 }, { "epoch": 0.5487576501338773, "grad_norm": 0.17748790979385376, "learning_rate": 0.002, "loss": 2.5473, "step": 275450 }, { "epoch": 0.5487775723575162, "grad_norm": 0.17112106084823608, "learning_rate": 0.002, "loss": 2.5571, "step": 275460 }, { "epoch": 0.5487974945811551, "grad_norm": 0.17543183267116547, "learning_rate": 0.002, "loss": 2.5487, "step": 275470 }, { "epoch": 0.5488174168047941, "grad_norm": 0.1569359302520752, "learning_rate": 0.002, "loss": 2.5547, "step": 275480 }, { "epoch": 0.548837339028433, "grad_norm": 0.1575920134782791, "learning_rate": 0.002, "loss": 2.5549, "step": 275490 }, { "epoch": 0.5488572612520719, "grad_norm": 0.15080079436302185, "learning_rate": 0.002, "loss": 2.5661, "step": 275500 }, { "epoch": 0.5488771834757108, "grad_norm": 0.18330810964107513, "learning_rate": 0.002, "loss": 2.5656, "step": 275510 }, { "epoch": 0.5488971056993497, "grad_norm": 0.1617579311132431, "learning_rate": 0.002, "loss": 2.5684, "step": 275520 }, { "epoch": 0.5489170279229887, "grad_norm": 0.15000736713409424, "learning_rate": 0.002, "loss": 2.5614, "step": 275530 }, { "epoch": 0.5489369501466276, "grad_norm": 0.17746548354625702, "learning_rate": 0.002, "loss": 2.5578, "step": 275540 }, { "epoch": 0.5489568723702665, "grad_norm": 0.16455163061618805, "learning_rate": 0.002, "loss": 2.5644, "step": 275550 }, { "epoch": 0.5489767945939054, "grad_norm": 0.17098306119441986, "learning_rate": 0.002, "loss": 2.5553, "step": 275560 }, { "epoch": 0.5489967168175444, "grad_norm": 0.16320352256298065, "learning_rate": 0.002, "loss": 2.5616, "step": 275570 }, { "epoch": 0.5490166390411833, "grad_norm": 0.15856672823429108, "learning_rate": 0.002, "loss": 2.5743, "step": 275580 }, { "epoch": 0.5490365612648221, "grad_norm": 0.19706663489341736, "learning_rate": 0.002, "loss": 2.5748, "step": 275590 }, { "epoch": 0.549056483488461, "grad_norm": 0.1435202807188034, "learning_rate": 0.002, "loss": 2.5654, "step": 275600 }, { "epoch": 0.5490764057120999, "grad_norm": 0.1650170236825943, "learning_rate": 0.002, "loss": 2.5652, "step": 275610 }, { "epoch": 0.5490963279357389, "grad_norm": 0.20046603679656982, "learning_rate": 0.002, "loss": 2.5599, "step": 275620 }, { "epoch": 0.5491162501593778, "grad_norm": 0.13708600401878357, "learning_rate": 0.002, "loss": 2.5591, "step": 275630 }, { "epoch": 0.5491361723830167, "grad_norm": 0.1870511770248413, "learning_rate": 0.002, "loss": 2.5702, "step": 275640 }, { "epoch": 0.5491560946066556, "grad_norm": 0.15290315449237823, "learning_rate": 0.002, "loss": 2.5419, "step": 275650 }, { "epoch": 0.5491760168302945, "grad_norm": 0.1632610559463501, "learning_rate": 0.002, "loss": 2.5498, "step": 275660 }, { "epoch": 0.5491959390539335, "grad_norm": 0.16696487367153168, "learning_rate": 0.002, "loss": 2.5833, "step": 275670 }, { "epoch": 0.5492158612775724, "grad_norm": 0.6451578140258789, "learning_rate": 0.002, "loss": 2.5501, "step": 275680 }, { "epoch": 0.5492357835012113, "grad_norm": 0.496083527803421, "learning_rate": 0.002, "loss": 2.5686, "step": 275690 }, { "epoch": 0.5492557057248502, "grad_norm": 0.17467337846755981, "learning_rate": 0.002, "loss": 2.5696, "step": 275700 }, { "epoch": 0.549275627948489, "grad_norm": 0.14528490602970123, "learning_rate": 0.002, "loss": 2.5626, "step": 275710 }, { "epoch": 0.549295550172128, "grad_norm": 0.19589991867542267, "learning_rate": 0.002, "loss": 2.5531, "step": 275720 }, { "epoch": 0.549315472395767, "grad_norm": 0.17770040035247803, "learning_rate": 0.002, "loss": 2.551, "step": 275730 }, { "epoch": 0.5493353946194058, "grad_norm": 0.17607533931732178, "learning_rate": 0.002, "loss": 2.56, "step": 275740 }, { "epoch": 0.5493553168430447, "grad_norm": 0.17219142615795135, "learning_rate": 0.002, "loss": 2.5556, "step": 275750 }, { "epoch": 0.5493752390666836, "grad_norm": 0.18858498334884644, "learning_rate": 0.002, "loss": 2.5355, "step": 275760 }, { "epoch": 0.5493951612903226, "grad_norm": 0.16393287479877472, "learning_rate": 0.002, "loss": 2.5528, "step": 275770 }, { "epoch": 0.5494150835139615, "grad_norm": 0.20050829648971558, "learning_rate": 0.002, "loss": 2.5623, "step": 275780 }, { "epoch": 0.5494350057376004, "grad_norm": 0.15994003415107727, "learning_rate": 0.002, "loss": 2.5587, "step": 275790 }, { "epoch": 0.5494549279612393, "grad_norm": 0.2101794332265854, "learning_rate": 0.002, "loss": 2.5547, "step": 275800 }, { "epoch": 0.5494748501848782, "grad_norm": 0.17524851858615875, "learning_rate": 0.002, "loss": 2.557, "step": 275810 }, { "epoch": 0.5494947724085172, "grad_norm": 0.16773906350135803, "learning_rate": 0.002, "loss": 2.566, "step": 275820 }, { "epoch": 0.5495146946321561, "grad_norm": 0.18240515887737274, "learning_rate": 0.002, "loss": 2.5488, "step": 275830 }, { "epoch": 0.549534616855795, "grad_norm": 0.15484459698200226, "learning_rate": 0.002, "loss": 2.5588, "step": 275840 }, { "epoch": 0.5495545390794339, "grad_norm": 0.1917017251253128, "learning_rate": 0.002, "loss": 2.5484, "step": 275850 }, { "epoch": 0.5495744613030729, "grad_norm": 0.1806437373161316, "learning_rate": 0.002, "loss": 2.5711, "step": 275860 }, { "epoch": 0.5495943835267117, "grad_norm": 0.15553228557109833, "learning_rate": 0.002, "loss": 2.5333, "step": 275870 }, { "epoch": 0.5496143057503506, "grad_norm": 0.17233335971832275, "learning_rate": 0.002, "loss": 2.5501, "step": 275880 }, { "epoch": 0.5496342279739895, "grad_norm": 0.15481798350811005, "learning_rate": 0.002, "loss": 2.579, "step": 275890 }, { "epoch": 0.5496541501976284, "grad_norm": 0.1496017426252365, "learning_rate": 0.002, "loss": 2.5533, "step": 275900 }, { "epoch": 0.5496740724212674, "grad_norm": 0.14486457407474518, "learning_rate": 0.002, "loss": 2.5528, "step": 275910 }, { "epoch": 0.5496939946449063, "grad_norm": 0.19349226355552673, "learning_rate": 0.002, "loss": 2.5656, "step": 275920 }, { "epoch": 0.5497139168685452, "grad_norm": 0.17650140821933746, "learning_rate": 0.002, "loss": 2.5527, "step": 275930 }, { "epoch": 0.5497338390921841, "grad_norm": 0.1536870151758194, "learning_rate": 0.002, "loss": 2.5461, "step": 275940 }, { "epoch": 0.549753761315823, "grad_norm": 0.1548227220773697, "learning_rate": 0.002, "loss": 2.555, "step": 275950 }, { "epoch": 0.549773683539462, "grad_norm": 0.1550488919019699, "learning_rate": 0.002, "loss": 2.5587, "step": 275960 }, { "epoch": 0.5497936057631009, "grad_norm": 0.1582510620355606, "learning_rate": 0.002, "loss": 2.5612, "step": 275970 }, { "epoch": 0.5498135279867398, "grad_norm": 0.16864606738090515, "learning_rate": 0.002, "loss": 2.5622, "step": 275980 }, { "epoch": 0.5498334502103787, "grad_norm": 0.1749580204486847, "learning_rate": 0.002, "loss": 2.5489, "step": 275990 }, { "epoch": 0.5498533724340176, "grad_norm": 0.2262866348028183, "learning_rate": 0.002, "loss": 2.5558, "step": 276000 }, { "epoch": 0.5498732946576566, "grad_norm": 0.17130480706691742, "learning_rate": 0.002, "loss": 2.5589, "step": 276010 }, { "epoch": 0.5498932168812954, "grad_norm": 0.15136247873306274, "learning_rate": 0.002, "loss": 2.5371, "step": 276020 }, { "epoch": 0.5499131391049343, "grad_norm": 0.17173224687576294, "learning_rate": 0.002, "loss": 2.5613, "step": 276030 }, { "epoch": 0.5499330613285732, "grad_norm": 0.16531263291835785, "learning_rate": 0.002, "loss": 2.5586, "step": 276040 }, { "epoch": 0.5499529835522121, "grad_norm": 0.1687387079000473, "learning_rate": 0.002, "loss": 2.5476, "step": 276050 }, { "epoch": 0.5499729057758511, "grad_norm": 0.18664784729480743, "learning_rate": 0.002, "loss": 2.5679, "step": 276060 }, { "epoch": 0.54999282799949, "grad_norm": 0.2124350219964981, "learning_rate": 0.002, "loss": 2.5693, "step": 276070 }, { "epoch": 0.5500127502231289, "grad_norm": 0.1570795625448227, "learning_rate": 0.002, "loss": 2.5393, "step": 276080 }, { "epoch": 0.5500326724467678, "grad_norm": 0.162788987159729, "learning_rate": 0.002, "loss": 2.551, "step": 276090 }, { "epoch": 0.5500525946704067, "grad_norm": 0.17233037948608398, "learning_rate": 0.002, "loss": 2.552, "step": 276100 }, { "epoch": 0.5500725168940457, "grad_norm": 0.16086366772651672, "learning_rate": 0.002, "loss": 2.5416, "step": 276110 }, { "epoch": 0.5500924391176846, "grad_norm": 0.18676388263702393, "learning_rate": 0.002, "loss": 2.5654, "step": 276120 }, { "epoch": 0.5501123613413235, "grad_norm": 0.1697680652141571, "learning_rate": 0.002, "loss": 2.5794, "step": 276130 }, { "epoch": 0.5501322835649624, "grad_norm": 0.23677165806293488, "learning_rate": 0.002, "loss": 2.5344, "step": 276140 }, { "epoch": 0.5501522057886012, "grad_norm": 0.1537025421857834, "learning_rate": 0.002, "loss": 2.559, "step": 276150 }, { "epoch": 0.5501721280122402, "grad_norm": 0.17568334937095642, "learning_rate": 0.002, "loss": 2.5589, "step": 276160 }, { "epoch": 0.5501920502358791, "grad_norm": 0.14814090728759766, "learning_rate": 0.002, "loss": 2.5554, "step": 276170 }, { "epoch": 0.550211972459518, "grad_norm": 0.15047694742679596, "learning_rate": 0.002, "loss": 2.5599, "step": 276180 }, { "epoch": 0.5502318946831569, "grad_norm": 0.14622433483600616, "learning_rate": 0.002, "loss": 2.5664, "step": 276190 }, { "epoch": 0.5502518169067959, "grad_norm": 0.15846039354801178, "learning_rate": 0.002, "loss": 2.553, "step": 276200 }, { "epoch": 0.5502717391304348, "grad_norm": 0.158464252948761, "learning_rate": 0.002, "loss": 2.5544, "step": 276210 }, { "epoch": 0.5502916613540737, "grad_norm": 0.15604521334171295, "learning_rate": 0.002, "loss": 2.5565, "step": 276220 }, { "epoch": 0.5503115835777126, "grad_norm": 0.18838949501514435, "learning_rate": 0.002, "loss": 2.555, "step": 276230 }, { "epoch": 0.5503315058013515, "grad_norm": 0.15175414085388184, "learning_rate": 0.002, "loss": 2.5462, "step": 276240 }, { "epoch": 0.5503514280249905, "grad_norm": 0.1815522015094757, "learning_rate": 0.002, "loss": 2.5409, "step": 276250 }, { "epoch": 0.5503713502486294, "grad_norm": 0.1655861884355545, "learning_rate": 0.002, "loss": 2.559, "step": 276260 }, { "epoch": 0.5503912724722683, "grad_norm": 0.1479853242635727, "learning_rate": 0.002, "loss": 2.5522, "step": 276270 }, { "epoch": 0.5504111946959072, "grad_norm": 0.16259418427944183, "learning_rate": 0.002, "loss": 2.5464, "step": 276280 }, { "epoch": 0.550431116919546, "grad_norm": 0.1687374860048294, "learning_rate": 0.002, "loss": 2.5551, "step": 276290 }, { "epoch": 0.550451039143185, "grad_norm": 0.17063046991825104, "learning_rate": 0.002, "loss": 2.5446, "step": 276300 }, { "epoch": 0.5504709613668239, "grad_norm": 0.1603536605834961, "learning_rate": 0.002, "loss": 2.5679, "step": 276310 }, { "epoch": 0.5504908835904628, "grad_norm": 0.1792246699333191, "learning_rate": 0.002, "loss": 2.5542, "step": 276320 }, { "epoch": 0.5505108058141017, "grad_norm": 0.1667710691690445, "learning_rate": 0.002, "loss": 2.5627, "step": 276330 }, { "epoch": 0.5505307280377406, "grad_norm": 0.20489653944969177, "learning_rate": 0.002, "loss": 2.5623, "step": 276340 }, { "epoch": 0.5505506502613796, "grad_norm": 0.13824592530727386, "learning_rate": 0.002, "loss": 2.5648, "step": 276350 }, { "epoch": 0.5505705724850185, "grad_norm": 0.19243605434894562, "learning_rate": 0.002, "loss": 2.5485, "step": 276360 }, { "epoch": 0.5505904947086574, "grad_norm": 0.16495588421821594, "learning_rate": 0.002, "loss": 2.548, "step": 276370 }, { "epoch": 0.5506104169322963, "grad_norm": 0.16065719723701477, "learning_rate": 0.002, "loss": 2.5585, "step": 276380 }, { "epoch": 0.5506303391559352, "grad_norm": 0.138588547706604, "learning_rate": 0.002, "loss": 2.566, "step": 276390 }, { "epoch": 0.5506502613795742, "grad_norm": 0.1676717847585678, "learning_rate": 0.002, "loss": 2.558, "step": 276400 }, { "epoch": 0.5506701836032131, "grad_norm": 0.16411274671554565, "learning_rate": 0.002, "loss": 2.5664, "step": 276410 }, { "epoch": 0.550690105826852, "grad_norm": 0.1673680692911148, "learning_rate": 0.002, "loss": 2.548, "step": 276420 }, { "epoch": 0.5507100280504909, "grad_norm": 0.18470095098018646, "learning_rate": 0.002, "loss": 2.5559, "step": 276430 }, { "epoch": 0.5507299502741297, "grad_norm": 0.18401384353637695, "learning_rate": 0.002, "loss": 2.5565, "step": 276440 }, { "epoch": 0.5507498724977687, "grad_norm": 0.15396925806999207, "learning_rate": 0.002, "loss": 2.5556, "step": 276450 }, { "epoch": 0.5507697947214076, "grad_norm": 0.23160941898822784, "learning_rate": 0.002, "loss": 2.5564, "step": 276460 }, { "epoch": 0.5507897169450465, "grad_norm": 0.17391811311244965, "learning_rate": 0.002, "loss": 2.5584, "step": 276470 }, { "epoch": 0.5508096391686854, "grad_norm": 0.1780371069908142, "learning_rate": 0.002, "loss": 2.5629, "step": 276480 }, { "epoch": 0.5508295613923244, "grad_norm": 0.14689429104328156, "learning_rate": 0.002, "loss": 2.5705, "step": 276490 }, { "epoch": 0.5508494836159633, "grad_norm": 0.14368371665477753, "learning_rate": 0.002, "loss": 2.5483, "step": 276500 }, { "epoch": 0.5508694058396022, "grad_norm": 0.1629403531551361, "learning_rate": 0.002, "loss": 2.5714, "step": 276510 }, { "epoch": 0.5508893280632411, "grad_norm": 0.1971556842327118, "learning_rate": 0.002, "loss": 2.5492, "step": 276520 }, { "epoch": 0.55090925028688, "grad_norm": 0.18377861380577087, "learning_rate": 0.002, "loss": 2.5587, "step": 276530 }, { "epoch": 0.550929172510519, "grad_norm": 0.16865907609462738, "learning_rate": 0.002, "loss": 2.5625, "step": 276540 }, { "epoch": 0.5509490947341579, "grad_norm": 0.16103370487689972, "learning_rate": 0.002, "loss": 2.5404, "step": 276550 }, { "epoch": 0.5509690169577968, "grad_norm": 0.22482869029045105, "learning_rate": 0.002, "loss": 2.5633, "step": 276560 }, { "epoch": 0.5509889391814357, "grad_norm": 0.16599655151367188, "learning_rate": 0.002, "loss": 2.5682, "step": 276570 }, { "epoch": 0.5510088614050745, "grad_norm": 0.12979325652122498, "learning_rate": 0.002, "loss": 2.5465, "step": 276580 }, { "epoch": 0.5510287836287135, "grad_norm": 0.164467453956604, "learning_rate": 0.002, "loss": 2.5427, "step": 276590 }, { "epoch": 0.5510487058523524, "grad_norm": 0.1652517020702362, "learning_rate": 0.002, "loss": 2.5438, "step": 276600 }, { "epoch": 0.5510686280759913, "grad_norm": 0.17538954317569733, "learning_rate": 0.002, "loss": 2.5433, "step": 276610 }, { "epoch": 0.5510885502996302, "grad_norm": 0.17122390866279602, "learning_rate": 0.002, "loss": 2.5707, "step": 276620 }, { "epoch": 0.5511084725232691, "grad_norm": 0.17047014832496643, "learning_rate": 0.002, "loss": 2.5475, "step": 276630 }, { "epoch": 0.5511283947469081, "grad_norm": 0.16311822831630707, "learning_rate": 0.002, "loss": 2.5563, "step": 276640 }, { "epoch": 0.551148316970547, "grad_norm": 0.18136775493621826, "learning_rate": 0.002, "loss": 2.553, "step": 276650 }, { "epoch": 0.5511682391941859, "grad_norm": 0.1587352305650711, "learning_rate": 0.002, "loss": 2.5455, "step": 276660 }, { "epoch": 0.5511881614178248, "grad_norm": 0.14261791110038757, "learning_rate": 0.002, "loss": 2.5625, "step": 276670 }, { "epoch": 0.5512080836414637, "grad_norm": 0.18693464994430542, "learning_rate": 0.002, "loss": 2.5582, "step": 276680 }, { "epoch": 0.5512280058651027, "grad_norm": 0.14816346764564514, "learning_rate": 0.002, "loss": 2.5551, "step": 276690 }, { "epoch": 0.5512479280887416, "grad_norm": 0.15107060968875885, "learning_rate": 0.002, "loss": 2.5535, "step": 276700 }, { "epoch": 0.5512678503123805, "grad_norm": 0.19580325484275818, "learning_rate": 0.002, "loss": 2.5487, "step": 276710 }, { "epoch": 0.5512877725360193, "grad_norm": 0.17875351011753082, "learning_rate": 0.002, "loss": 2.5446, "step": 276720 }, { "epoch": 0.5513076947596582, "grad_norm": 0.18142184615135193, "learning_rate": 0.002, "loss": 2.554, "step": 276730 }, { "epoch": 0.5513276169832972, "grad_norm": 0.16889995336532593, "learning_rate": 0.002, "loss": 2.56, "step": 276740 }, { "epoch": 0.5513475392069361, "grad_norm": 0.1801965981721878, "learning_rate": 0.002, "loss": 2.5568, "step": 276750 }, { "epoch": 0.551367461430575, "grad_norm": 0.1455806940793991, "learning_rate": 0.002, "loss": 2.5508, "step": 276760 }, { "epoch": 0.5513873836542139, "grad_norm": 0.17740345001220703, "learning_rate": 0.002, "loss": 2.5653, "step": 276770 }, { "epoch": 0.5514073058778529, "grad_norm": 0.1592433899641037, "learning_rate": 0.002, "loss": 2.5421, "step": 276780 }, { "epoch": 0.5514272281014918, "grad_norm": 0.14963209629058838, "learning_rate": 0.002, "loss": 2.5511, "step": 276790 }, { "epoch": 0.5514471503251307, "grad_norm": 0.180593803524971, "learning_rate": 0.002, "loss": 2.5679, "step": 276800 }, { "epoch": 0.5514670725487696, "grad_norm": 0.18059514462947845, "learning_rate": 0.002, "loss": 2.5716, "step": 276810 }, { "epoch": 0.5514869947724085, "grad_norm": 0.17896664142608643, "learning_rate": 0.002, "loss": 2.5612, "step": 276820 }, { "epoch": 0.5515069169960475, "grad_norm": 0.19438610970973969, "learning_rate": 0.002, "loss": 2.5732, "step": 276830 }, { "epoch": 0.5515268392196864, "grad_norm": 0.1564955711364746, "learning_rate": 0.002, "loss": 2.5649, "step": 276840 }, { "epoch": 0.5515467614433253, "grad_norm": 0.1638285368680954, "learning_rate": 0.002, "loss": 2.5572, "step": 276850 }, { "epoch": 0.5515666836669642, "grad_norm": 0.1544361263513565, "learning_rate": 0.002, "loss": 2.563, "step": 276860 }, { "epoch": 0.551586605890603, "grad_norm": 0.1744280308485031, "learning_rate": 0.002, "loss": 2.5538, "step": 276870 }, { "epoch": 0.551606528114242, "grad_norm": 0.1447582095861435, "learning_rate": 0.002, "loss": 2.5529, "step": 276880 }, { "epoch": 0.5516264503378809, "grad_norm": 0.21529889106750488, "learning_rate": 0.002, "loss": 2.5451, "step": 276890 }, { "epoch": 0.5516463725615198, "grad_norm": 0.16587425768375397, "learning_rate": 0.002, "loss": 2.5683, "step": 276900 }, { "epoch": 0.5516662947851587, "grad_norm": 0.1712927371263504, "learning_rate": 0.002, "loss": 2.5661, "step": 276910 }, { "epoch": 0.5516862170087976, "grad_norm": 0.15624253451824188, "learning_rate": 0.002, "loss": 2.5615, "step": 276920 }, { "epoch": 0.5517061392324366, "grad_norm": 0.18582980334758759, "learning_rate": 0.002, "loss": 2.5616, "step": 276930 }, { "epoch": 0.5517260614560755, "grad_norm": 0.15816211700439453, "learning_rate": 0.002, "loss": 2.5585, "step": 276940 }, { "epoch": 0.5517459836797144, "grad_norm": 0.15581528842449188, "learning_rate": 0.002, "loss": 2.5452, "step": 276950 }, { "epoch": 0.5517659059033533, "grad_norm": 0.1783822625875473, "learning_rate": 0.002, "loss": 2.5639, "step": 276960 }, { "epoch": 0.5517858281269922, "grad_norm": 0.14748618006706238, "learning_rate": 0.002, "loss": 2.5433, "step": 276970 }, { "epoch": 0.5518057503506312, "grad_norm": 0.1788681000471115, "learning_rate": 0.002, "loss": 2.5486, "step": 276980 }, { "epoch": 0.5518256725742701, "grad_norm": 0.16829857230186462, "learning_rate": 0.002, "loss": 2.5649, "step": 276990 }, { "epoch": 0.551845594797909, "grad_norm": 0.168706014752388, "learning_rate": 0.002, "loss": 2.5594, "step": 277000 }, { "epoch": 0.5518655170215478, "grad_norm": 0.15280652046203613, "learning_rate": 0.002, "loss": 2.5692, "step": 277010 }, { "epoch": 0.5518854392451867, "grad_norm": 0.1675337255001068, "learning_rate": 0.002, "loss": 2.569, "step": 277020 }, { "epoch": 0.5519053614688257, "grad_norm": 0.15576529502868652, "learning_rate": 0.002, "loss": 2.568, "step": 277030 }, { "epoch": 0.5519252836924646, "grad_norm": 0.15718607604503632, "learning_rate": 0.002, "loss": 2.5568, "step": 277040 }, { "epoch": 0.5519452059161035, "grad_norm": 0.17071057856082916, "learning_rate": 0.002, "loss": 2.5557, "step": 277050 }, { "epoch": 0.5519651281397424, "grad_norm": 0.1517796516418457, "learning_rate": 0.002, "loss": 2.555, "step": 277060 }, { "epoch": 0.5519850503633814, "grad_norm": 0.19791167974472046, "learning_rate": 0.002, "loss": 2.5517, "step": 277070 }, { "epoch": 0.5520049725870203, "grad_norm": 0.24764449894428253, "learning_rate": 0.002, "loss": 2.5665, "step": 277080 }, { "epoch": 0.5520248948106592, "grad_norm": 0.2046065330505371, "learning_rate": 0.002, "loss": 2.5664, "step": 277090 }, { "epoch": 0.5520448170342981, "grad_norm": 0.1622489094734192, "learning_rate": 0.002, "loss": 2.5691, "step": 277100 }, { "epoch": 0.552064739257937, "grad_norm": 0.15462036430835724, "learning_rate": 0.002, "loss": 2.5612, "step": 277110 }, { "epoch": 0.552084661481576, "grad_norm": 0.18146690726280212, "learning_rate": 0.002, "loss": 2.5567, "step": 277120 }, { "epoch": 0.5521045837052149, "grad_norm": 0.15973922610282898, "learning_rate": 0.002, "loss": 2.5545, "step": 277130 }, { "epoch": 0.5521245059288538, "grad_norm": 0.17178982496261597, "learning_rate": 0.002, "loss": 2.5566, "step": 277140 }, { "epoch": 0.5521444281524927, "grad_norm": 0.15350861847400665, "learning_rate": 0.002, "loss": 2.553, "step": 277150 }, { "epoch": 0.5521643503761315, "grad_norm": 0.1869756430387497, "learning_rate": 0.002, "loss": 2.5635, "step": 277160 }, { "epoch": 0.5521842725997705, "grad_norm": 0.15080685913562775, "learning_rate": 0.002, "loss": 2.5567, "step": 277170 }, { "epoch": 0.5522041948234094, "grad_norm": 0.16023097932338715, "learning_rate": 0.002, "loss": 2.55, "step": 277180 }, { "epoch": 0.5522241170470483, "grad_norm": 0.16637812554836273, "learning_rate": 0.002, "loss": 2.5523, "step": 277190 }, { "epoch": 0.5522440392706872, "grad_norm": 0.23819853365421295, "learning_rate": 0.002, "loss": 2.5608, "step": 277200 }, { "epoch": 0.5522639614943261, "grad_norm": 0.16550591588020325, "learning_rate": 0.002, "loss": 2.5642, "step": 277210 }, { "epoch": 0.5522838837179651, "grad_norm": 0.18275395035743713, "learning_rate": 0.002, "loss": 2.5552, "step": 277220 }, { "epoch": 0.552303805941604, "grad_norm": 0.1694725900888443, "learning_rate": 0.002, "loss": 2.5682, "step": 277230 }, { "epoch": 0.5523237281652429, "grad_norm": 0.1924019157886505, "learning_rate": 0.002, "loss": 2.5496, "step": 277240 }, { "epoch": 0.5523436503888818, "grad_norm": 0.16108860075473785, "learning_rate": 0.002, "loss": 2.5489, "step": 277250 }, { "epoch": 0.5523635726125207, "grad_norm": 0.18738871812820435, "learning_rate": 0.002, "loss": 2.5358, "step": 277260 }, { "epoch": 0.5523834948361597, "grad_norm": 0.14572812616825104, "learning_rate": 0.002, "loss": 2.5575, "step": 277270 }, { "epoch": 0.5524034170597986, "grad_norm": 0.17322467267513275, "learning_rate": 0.002, "loss": 2.5571, "step": 277280 }, { "epoch": 0.5524233392834375, "grad_norm": 0.18739251792430878, "learning_rate": 0.002, "loss": 2.5749, "step": 277290 }, { "epoch": 0.5524432615070763, "grad_norm": 0.16892285645008087, "learning_rate": 0.002, "loss": 2.5595, "step": 277300 }, { "epoch": 0.5524631837307152, "grad_norm": 0.1706181913614273, "learning_rate": 0.002, "loss": 2.5588, "step": 277310 }, { "epoch": 0.5524831059543542, "grad_norm": 0.14882224798202515, "learning_rate": 0.002, "loss": 2.5525, "step": 277320 }, { "epoch": 0.5525030281779931, "grad_norm": 0.15847229957580566, "learning_rate": 0.002, "loss": 2.5505, "step": 277330 }, { "epoch": 0.552522950401632, "grad_norm": 0.1977054327726364, "learning_rate": 0.002, "loss": 2.5703, "step": 277340 }, { "epoch": 0.5525428726252709, "grad_norm": 0.16211915016174316, "learning_rate": 0.002, "loss": 2.5685, "step": 277350 }, { "epoch": 0.5525627948489099, "grad_norm": 0.17444084584712982, "learning_rate": 0.002, "loss": 2.556, "step": 277360 }, { "epoch": 0.5525827170725488, "grad_norm": 0.1575814187526703, "learning_rate": 0.002, "loss": 2.554, "step": 277370 }, { "epoch": 0.5526026392961877, "grad_norm": 0.20773513615131378, "learning_rate": 0.002, "loss": 2.5487, "step": 277380 }, { "epoch": 0.5526225615198266, "grad_norm": 0.1661074012517929, "learning_rate": 0.002, "loss": 2.5541, "step": 277390 }, { "epoch": 0.5526424837434655, "grad_norm": 0.14759936928749084, "learning_rate": 0.002, "loss": 2.5601, "step": 277400 }, { "epoch": 0.5526624059671045, "grad_norm": 0.1939546912908554, "learning_rate": 0.002, "loss": 2.547, "step": 277410 }, { "epoch": 0.5526823281907434, "grad_norm": 0.17154935002326965, "learning_rate": 0.002, "loss": 2.5615, "step": 277420 }, { "epoch": 0.5527022504143823, "grad_norm": 0.19038942456245422, "learning_rate": 0.002, "loss": 2.5662, "step": 277430 }, { "epoch": 0.5527221726380211, "grad_norm": 0.1598149836063385, "learning_rate": 0.002, "loss": 2.5525, "step": 277440 }, { "epoch": 0.55274209486166, "grad_norm": 0.1454102247953415, "learning_rate": 0.002, "loss": 2.5453, "step": 277450 }, { "epoch": 0.552762017085299, "grad_norm": 0.17209340631961823, "learning_rate": 0.002, "loss": 2.5641, "step": 277460 }, { "epoch": 0.5527819393089379, "grad_norm": 0.15940985083580017, "learning_rate": 0.002, "loss": 2.563, "step": 277470 }, { "epoch": 0.5528018615325768, "grad_norm": 0.18363381922245026, "learning_rate": 0.002, "loss": 2.5618, "step": 277480 }, { "epoch": 0.5528217837562157, "grad_norm": 0.2002255916595459, "learning_rate": 0.002, "loss": 2.5379, "step": 277490 }, { "epoch": 0.5528417059798546, "grad_norm": 0.15023154020309448, "learning_rate": 0.002, "loss": 2.5389, "step": 277500 }, { "epoch": 0.5528616282034936, "grad_norm": 0.17791028320789337, "learning_rate": 0.002, "loss": 2.5686, "step": 277510 }, { "epoch": 0.5528815504271325, "grad_norm": 0.19216464459896088, "learning_rate": 0.002, "loss": 2.5439, "step": 277520 }, { "epoch": 0.5529014726507714, "grad_norm": 0.2010483592748642, "learning_rate": 0.002, "loss": 2.5623, "step": 277530 }, { "epoch": 0.5529213948744103, "grad_norm": 0.20028714835643768, "learning_rate": 0.002, "loss": 2.5597, "step": 277540 }, { "epoch": 0.5529413170980492, "grad_norm": 0.1446782797574997, "learning_rate": 0.002, "loss": 2.5502, "step": 277550 }, { "epoch": 0.5529612393216882, "grad_norm": 0.15350405871868134, "learning_rate": 0.002, "loss": 2.5522, "step": 277560 }, { "epoch": 0.5529811615453271, "grad_norm": 0.16315455734729767, "learning_rate": 0.002, "loss": 2.5478, "step": 277570 }, { "epoch": 0.553001083768966, "grad_norm": 0.18758350610733032, "learning_rate": 0.002, "loss": 2.5743, "step": 277580 }, { "epoch": 0.5530210059926048, "grad_norm": 0.1628405898809433, "learning_rate": 0.002, "loss": 2.5532, "step": 277590 }, { "epoch": 0.5530409282162437, "grad_norm": 0.1793588250875473, "learning_rate": 0.002, "loss": 2.5525, "step": 277600 }, { "epoch": 0.5530608504398827, "grad_norm": 0.1612841933965683, "learning_rate": 0.002, "loss": 2.5581, "step": 277610 }, { "epoch": 0.5530807726635216, "grad_norm": 0.15918149054050446, "learning_rate": 0.002, "loss": 2.5589, "step": 277620 }, { "epoch": 0.5531006948871605, "grad_norm": 0.19015224277973175, "learning_rate": 0.002, "loss": 2.5485, "step": 277630 }, { "epoch": 0.5531206171107994, "grad_norm": 0.14703018963336945, "learning_rate": 0.002, "loss": 2.5549, "step": 277640 }, { "epoch": 0.5531405393344384, "grad_norm": 0.2007768303155899, "learning_rate": 0.002, "loss": 2.5502, "step": 277650 }, { "epoch": 0.5531604615580773, "grad_norm": 0.17097559571266174, "learning_rate": 0.002, "loss": 2.5644, "step": 277660 }, { "epoch": 0.5531803837817162, "grad_norm": 0.17012929916381836, "learning_rate": 0.002, "loss": 2.5524, "step": 277670 }, { "epoch": 0.5532003060053551, "grad_norm": 0.15872471034526825, "learning_rate": 0.002, "loss": 2.5546, "step": 277680 }, { "epoch": 0.553220228228994, "grad_norm": 0.17650920152664185, "learning_rate": 0.002, "loss": 2.5358, "step": 277690 }, { "epoch": 0.553240150452633, "grad_norm": 0.16788125038146973, "learning_rate": 0.002, "loss": 2.5616, "step": 277700 }, { "epoch": 0.5532600726762719, "grad_norm": 0.14092615246772766, "learning_rate": 0.002, "loss": 2.5484, "step": 277710 }, { "epoch": 0.5532799948999108, "grad_norm": 0.15451468527317047, "learning_rate": 0.002, "loss": 2.5597, "step": 277720 }, { "epoch": 0.5532999171235496, "grad_norm": 0.17661626636981964, "learning_rate": 0.002, "loss": 2.5583, "step": 277730 }, { "epoch": 0.5533198393471885, "grad_norm": 0.20190908014774323, "learning_rate": 0.002, "loss": 2.5628, "step": 277740 }, { "epoch": 0.5533397615708275, "grad_norm": 0.1423071175813675, "learning_rate": 0.002, "loss": 2.5549, "step": 277750 }, { "epoch": 0.5533596837944664, "grad_norm": 0.18701903522014618, "learning_rate": 0.002, "loss": 2.5632, "step": 277760 }, { "epoch": 0.5533796060181053, "grad_norm": 0.17634153366088867, "learning_rate": 0.002, "loss": 2.5634, "step": 277770 }, { "epoch": 0.5533995282417442, "grad_norm": 0.1842924952507019, "learning_rate": 0.002, "loss": 2.557, "step": 277780 }, { "epoch": 0.5534194504653831, "grad_norm": 0.18074996769428253, "learning_rate": 0.002, "loss": 2.5677, "step": 277790 }, { "epoch": 0.5534393726890221, "grad_norm": 0.16424471139907837, "learning_rate": 0.002, "loss": 2.5599, "step": 277800 }, { "epoch": 0.553459294912661, "grad_norm": 0.17323018610477448, "learning_rate": 0.002, "loss": 2.5518, "step": 277810 }, { "epoch": 0.5534792171362999, "grad_norm": 0.16751737892627716, "learning_rate": 0.002, "loss": 2.5734, "step": 277820 }, { "epoch": 0.5534991393599388, "grad_norm": 0.16008880734443665, "learning_rate": 0.002, "loss": 2.5534, "step": 277830 }, { "epoch": 0.5535190615835777, "grad_norm": 0.19569998979568481, "learning_rate": 0.002, "loss": 2.5482, "step": 277840 }, { "epoch": 0.5535389838072167, "grad_norm": 0.1697608381509781, "learning_rate": 0.002, "loss": 2.5516, "step": 277850 }, { "epoch": 0.5535589060308556, "grad_norm": 0.14918790757656097, "learning_rate": 0.002, "loss": 2.5514, "step": 277860 }, { "epoch": 0.5535788282544944, "grad_norm": 0.17270983755588531, "learning_rate": 0.002, "loss": 2.5588, "step": 277870 }, { "epoch": 0.5535987504781333, "grad_norm": 0.2087918221950531, "learning_rate": 0.002, "loss": 2.5482, "step": 277880 }, { "epoch": 0.5536186727017722, "grad_norm": 0.19016291201114655, "learning_rate": 0.002, "loss": 2.546, "step": 277890 }, { "epoch": 0.5536385949254112, "grad_norm": 0.17368590831756592, "learning_rate": 0.002, "loss": 2.5402, "step": 277900 }, { "epoch": 0.5536585171490501, "grad_norm": 0.17597432434558868, "learning_rate": 0.002, "loss": 2.5582, "step": 277910 }, { "epoch": 0.553678439372689, "grad_norm": 0.16321130096912384, "learning_rate": 0.002, "loss": 2.5628, "step": 277920 }, { "epoch": 0.5536983615963279, "grad_norm": 0.2271551936864853, "learning_rate": 0.002, "loss": 2.5543, "step": 277930 }, { "epoch": 0.5537182838199668, "grad_norm": 0.16093413531780243, "learning_rate": 0.002, "loss": 2.5437, "step": 277940 }, { "epoch": 0.5537382060436058, "grad_norm": 0.14822591841220856, "learning_rate": 0.002, "loss": 2.5438, "step": 277950 }, { "epoch": 0.5537581282672447, "grad_norm": 0.15686213970184326, "learning_rate": 0.002, "loss": 2.5672, "step": 277960 }, { "epoch": 0.5537780504908836, "grad_norm": 0.17114338278770447, "learning_rate": 0.002, "loss": 2.552, "step": 277970 }, { "epoch": 0.5537979727145225, "grad_norm": 0.16039539873600006, "learning_rate": 0.002, "loss": 2.5708, "step": 277980 }, { "epoch": 0.5538178949381615, "grad_norm": 0.21434076130390167, "learning_rate": 0.002, "loss": 2.5625, "step": 277990 }, { "epoch": 0.5538378171618004, "grad_norm": 0.15260089933872223, "learning_rate": 0.002, "loss": 2.5615, "step": 278000 }, { "epoch": 0.5538577393854393, "grad_norm": 0.1733294129371643, "learning_rate": 0.002, "loss": 2.5576, "step": 278010 }, { "epoch": 0.5538776616090781, "grad_norm": 0.17809832096099854, "learning_rate": 0.002, "loss": 2.556, "step": 278020 }, { "epoch": 0.553897583832717, "grad_norm": 0.17375285923480988, "learning_rate": 0.002, "loss": 2.5532, "step": 278030 }, { "epoch": 0.553917506056356, "grad_norm": 0.16461701691150665, "learning_rate": 0.002, "loss": 2.5615, "step": 278040 }, { "epoch": 0.5539374282799949, "grad_norm": 0.176151841878891, "learning_rate": 0.002, "loss": 2.5407, "step": 278050 }, { "epoch": 0.5539573505036338, "grad_norm": 0.17972710728645325, "learning_rate": 0.002, "loss": 2.5561, "step": 278060 }, { "epoch": 0.5539772727272727, "grad_norm": 0.171569362282753, "learning_rate": 0.002, "loss": 2.5684, "step": 278070 }, { "epoch": 0.5539971949509116, "grad_norm": 0.16652177274227142, "learning_rate": 0.002, "loss": 2.5649, "step": 278080 }, { "epoch": 0.5540171171745506, "grad_norm": 0.170796200633049, "learning_rate": 0.002, "loss": 2.5721, "step": 278090 }, { "epoch": 0.5540370393981895, "grad_norm": 0.14334414899349213, "learning_rate": 0.002, "loss": 2.5403, "step": 278100 }, { "epoch": 0.5540569616218284, "grad_norm": 0.1623578667640686, "learning_rate": 0.002, "loss": 2.554, "step": 278110 }, { "epoch": 0.5540768838454673, "grad_norm": 0.22214776277542114, "learning_rate": 0.002, "loss": 2.5539, "step": 278120 }, { "epoch": 0.5540968060691062, "grad_norm": 0.14156180620193481, "learning_rate": 0.002, "loss": 2.5588, "step": 278130 }, { "epoch": 0.5541167282927452, "grad_norm": 0.15734195709228516, "learning_rate": 0.002, "loss": 2.5597, "step": 278140 }, { "epoch": 0.554136650516384, "grad_norm": 0.19879555702209473, "learning_rate": 0.002, "loss": 2.5453, "step": 278150 }, { "epoch": 0.554156572740023, "grad_norm": 0.15557391941547394, "learning_rate": 0.002, "loss": 2.5531, "step": 278160 }, { "epoch": 0.5541764949636618, "grad_norm": 0.16870972514152527, "learning_rate": 0.002, "loss": 2.5746, "step": 278170 }, { "epoch": 0.5541964171873007, "grad_norm": 0.17866891622543335, "learning_rate": 0.002, "loss": 2.5639, "step": 278180 }, { "epoch": 0.5542163394109397, "grad_norm": 0.15380513668060303, "learning_rate": 0.002, "loss": 2.5788, "step": 278190 }, { "epoch": 0.5542362616345786, "grad_norm": 0.15107952058315277, "learning_rate": 0.002, "loss": 2.5619, "step": 278200 }, { "epoch": 0.5542561838582175, "grad_norm": 0.15967614948749542, "learning_rate": 0.002, "loss": 2.5658, "step": 278210 }, { "epoch": 0.5542761060818564, "grad_norm": 0.15205197036266327, "learning_rate": 0.002, "loss": 2.5585, "step": 278220 }, { "epoch": 0.5542960283054953, "grad_norm": 0.16961055994033813, "learning_rate": 0.002, "loss": 2.5522, "step": 278230 }, { "epoch": 0.5543159505291343, "grad_norm": 0.1529950648546219, "learning_rate": 0.002, "loss": 2.5515, "step": 278240 }, { "epoch": 0.5543358727527732, "grad_norm": 0.18818140029907227, "learning_rate": 0.002, "loss": 2.565, "step": 278250 }, { "epoch": 0.5543557949764121, "grad_norm": 0.17714744806289673, "learning_rate": 0.002, "loss": 2.5589, "step": 278260 }, { "epoch": 0.554375717200051, "grad_norm": 0.15514151751995087, "learning_rate": 0.002, "loss": 2.5464, "step": 278270 }, { "epoch": 0.55439563942369, "grad_norm": 0.1861717849969864, "learning_rate": 0.002, "loss": 2.5603, "step": 278280 }, { "epoch": 0.5544155616473289, "grad_norm": 0.18813209235668182, "learning_rate": 0.002, "loss": 2.5666, "step": 278290 }, { "epoch": 0.5544354838709677, "grad_norm": 0.1825890988111496, "learning_rate": 0.002, "loss": 2.5593, "step": 278300 }, { "epoch": 0.5544554060946066, "grad_norm": 0.16451889276504517, "learning_rate": 0.002, "loss": 2.5416, "step": 278310 }, { "epoch": 0.5544753283182455, "grad_norm": 0.16433538496494293, "learning_rate": 0.002, "loss": 2.5562, "step": 278320 }, { "epoch": 0.5544952505418845, "grad_norm": 0.19649384915828705, "learning_rate": 0.002, "loss": 2.5615, "step": 278330 }, { "epoch": 0.5545151727655234, "grad_norm": 0.1598125398159027, "learning_rate": 0.002, "loss": 2.5684, "step": 278340 }, { "epoch": 0.5545350949891623, "grad_norm": 0.1704121083021164, "learning_rate": 0.002, "loss": 2.5748, "step": 278350 }, { "epoch": 0.5545550172128012, "grad_norm": 0.1383194476366043, "learning_rate": 0.002, "loss": 2.5653, "step": 278360 }, { "epoch": 0.5545749394364401, "grad_norm": 0.2108791619539261, "learning_rate": 0.002, "loss": 2.561, "step": 278370 }, { "epoch": 0.5545948616600791, "grad_norm": 0.15305359661579132, "learning_rate": 0.002, "loss": 2.5582, "step": 278380 }, { "epoch": 0.554614783883718, "grad_norm": 0.1970968246459961, "learning_rate": 0.002, "loss": 2.5567, "step": 278390 }, { "epoch": 0.5546347061073569, "grad_norm": 0.1617959886789322, "learning_rate": 0.002, "loss": 2.5584, "step": 278400 }, { "epoch": 0.5546546283309958, "grad_norm": 0.14456841349601746, "learning_rate": 0.002, "loss": 2.5301, "step": 278410 }, { "epoch": 0.5546745505546347, "grad_norm": 0.17573019862174988, "learning_rate": 0.002, "loss": 2.5613, "step": 278420 }, { "epoch": 0.5546944727782737, "grad_norm": 0.15304479002952576, "learning_rate": 0.002, "loss": 2.5651, "step": 278430 }, { "epoch": 0.5547143950019126, "grad_norm": 0.16169898211956024, "learning_rate": 0.002, "loss": 2.552, "step": 278440 }, { "epoch": 0.5547343172255514, "grad_norm": 0.188339963555336, "learning_rate": 0.002, "loss": 2.5535, "step": 278450 }, { "epoch": 0.5547542394491903, "grad_norm": 0.17655500769615173, "learning_rate": 0.002, "loss": 2.5544, "step": 278460 }, { "epoch": 0.5547741616728292, "grad_norm": 0.16636550426483154, "learning_rate": 0.002, "loss": 2.5628, "step": 278470 }, { "epoch": 0.5547940838964682, "grad_norm": 0.16925188899040222, "learning_rate": 0.002, "loss": 2.5588, "step": 278480 }, { "epoch": 0.5548140061201071, "grad_norm": 0.34741121530532837, "learning_rate": 0.002, "loss": 2.5672, "step": 278490 }, { "epoch": 0.554833928343746, "grad_norm": 0.1534595489501953, "learning_rate": 0.002, "loss": 2.5586, "step": 278500 }, { "epoch": 0.5548538505673849, "grad_norm": 0.18276982009410858, "learning_rate": 0.002, "loss": 2.5695, "step": 278510 }, { "epoch": 0.5548737727910238, "grad_norm": 0.15707892179489136, "learning_rate": 0.002, "loss": 2.5472, "step": 278520 }, { "epoch": 0.5548936950146628, "grad_norm": 0.1635669469833374, "learning_rate": 0.002, "loss": 2.5388, "step": 278530 }, { "epoch": 0.5549136172383017, "grad_norm": 0.1476142406463623, "learning_rate": 0.002, "loss": 2.5492, "step": 278540 }, { "epoch": 0.5549335394619406, "grad_norm": 0.17913630604743958, "learning_rate": 0.002, "loss": 2.5586, "step": 278550 }, { "epoch": 0.5549534616855795, "grad_norm": 0.166945680975914, "learning_rate": 0.002, "loss": 2.561, "step": 278560 }, { "epoch": 0.5549733839092185, "grad_norm": 0.1622319221496582, "learning_rate": 0.002, "loss": 2.5605, "step": 278570 }, { "epoch": 0.5549933061328574, "grad_norm": 0.19230344891548157, "learning_rate": 0.002, "loss": 2.5448, "step": 278580 }, { "epoch": 0.5550132283564962, "grad_norm": 0.16427162289619446, "learning_rate": 0.002, "loss": 2.5633, "step": 278590 }, { "epoch": 0.5550331505801351, "grad_norm": 0.17562086880207062, "learning_rate": 0.002, "loss": 2.5428, "step": 278600 }, { "epoch": 0.555053072803774, "grad_norm": 0.18878600001335144, "learning_rate": 0.002, "loss": 2.5601, "step": 278610 }, { "epoch": 0.555072995027413, "grad_norm": 0.17605678737163544, "learning_rate": 0.002, "loss": 2.5633, "step": 278620 }, { "epoch": 0.5550929172510519, "grad_norm": 0.1757245808839798, "learning_rate": 0.002, "loss": 2.5588, "step": 278630 }, { "epoch": 0.5551128394746908, "grad_norm": 0.16212357580661774, "learning_rate": 0.002, "loss": 2.5653, "step": 278640 }, { "epoch": 0.5551327616983297, "grad_norm": 0.1582527607679367, "learning_rate": 0.002, "loss": 2.578, "step": 278650 }, { "epoch": 0.5551526839219686, "grad_norm": 0.17965292930603027, "learning_rate": 0.002, "loss": 2.5545, "step": 278660 }, { "epoch": 0.5551726061456076, "grad_norm": 0.1651231050491333, "learning_rate": 0.002, "loss": 2.5467, "step": 278670 }, { "epoch": 0.5551925283692465, "grad_norm": 0.20874729752540588, "learning_rate": 0.002, "loss": 2.5501, "step": 278680 }, { "epoch": 0.5552124505928854, "grad_norm": 0.16557036340236664, "learning_rate": 0.002, "loss": 2.5625, "step": 278690 }, { "epoch": 0.5552323728165243, "grad_norm": 0.13264556229114532, "learning_rate": 0.002, "loss": 2.5424, "step": 278700 }, { "epoch": 0.5552522950401632, "grad_norm": 0.14907532930374146, "learning_rate": 0.002, "loss": 2.5644, "step": 278710 }, { "epoch": 0.5552722172638022, "grad_norm": 0.14102299511432648, "learning_rate": 0.002, "loss": 2.5579, "step": 278720 }, { "epoch": 0.555292139487441, "grad_norm": 0.17949379980564117, "learning_rate": 0.002, "loss": 2.5718, "step": 278730 }, { "epoch": 0.5553120617110799, "grad_norm": 0.17753487825393677, "learning_rate": 0.002, "loss": 2.5781, "step": 278740 }, { "epoch": 0.5553319839347188, "grad_norm": 0.18682900071144104, "learning_rate": 0.002, "loss": 2.5604, "step": 278750 }, { "epoch": 0.5553519061583577, "grad_norm": 0.16335180401802063, "learning_rate": 0.002, "loss": 2.5572, "step": 278760 }, { "epoch": 0.5553718283819967, "grad_norm": 0.19501875340938568, "learning_rate": 0.002, "loss": 2.5715, "step": 278770 }, { "epoch": 0.5553917506056356, "grad_norm": 0.16797778010368347, "learning_rate": 0.002, "loss": 2.5573, "step": 278780 }, { "epoch": 0.5554116728292745, "grad_norm": 0.18414804339408875, "learning_rate": 0.002, "loss": 2.5573, "step": 278790 }, { "epoch": 0.5554315950529134, "grad_norm": 0.13842688500881195, "learning_rate": 0.002, "loss": 2.5642, "step": 278800 }, { "epoch": 0.5554515172765523, "grad_norm": 0.167174831032753, "learning_rate": 0.002, "loss": 2.5618, "step": 278810 }, { "epoch": 0.5554714395001913, "grad_norm": 0.17201745510101318, "learning_rate": 0.002, "loss": 2.5528, "step": 278820 }, { "epoch": 0.5554913617238302, "grad_norm": 0.1482696533203125, "learning_rate": 0.002, "loss": 2.5602, "step": 278830 }, { "epoch": 0.5555112839474691, "grad_norm": 0.18641754984855652, "learning_rate": 0.002, "loss": 2.5545, "step": 278840 }, { "epoch": 0.555531206171108, "grad_norm": 0.1792415976524353, "learning_rate": 0.002, "loss": 2.554, "step": 278850 }, { "epoch": 0.555551128394747, "grad_norm": 0.1589505523443222, "learning_rate": 0.002, "loss": 2.5585, "step": 278860 }, { "epoch": 0.5555710506183859, "grad_norm": 0.20280814170837402, "learning_rate": 0.002, "loss": 2.5463, "step": 278870 }, { "epoch": 0.5555909728420247, "grad_norm": 0.20601747930049896, "learning_rate": 0.002, "loss": 2.5571, "step": 278880 }, { "epoch": 0.5556108950656636, "grad_norm": 0.15053996443748474, "learning_rate": 0.002, "loss": 2.5676, "step": 278890 }, { "epoch": 0.5556308172893025, "grad_norm": 0.16139332950115204, "learning_rate": 0.002, "loss": 2.5541, "step": 278900 }, { "epoch": 0.5556507395129415, "grad_norm": 0.15850727260112762, "learning_rate": 0.002, "loss": 2.56, "step": 278910 }, { "epoch": 0.5556706617365804, "grad_norm": 0.17622898519039154, "learning_rate": 0.002, "loss": 2.5647, "step": 278920 }, { "epoch": 0.5556905839602193, "grad_norm": 0.16849260032176971, "learning_rate": 0.002, "loss": 2.5553, "step": 278930 }, { "epoch": 0.5557105061838582, "grad_norm": 0.1720755249261856, "learning_rate": 0.002, "loss": 2.569, "step": 278940 }, { "epoch": 0.5557304284074971, "grad_norm": 0.1704106628894806, "learning_rate": 0.002, "loss": 2.5619, "step": 278950 }, { "epoch": 0.5557503506311361, "grad_norm": 0.19020433723926544, "learning_rate": 0.002, "loss": 2.5453, "step": 278960 }, { "epoch": 0.555770272854775, "grad_norm": 0.15791331231594086, "learning_rate": 0.002, "loss": 2.5658, "step": 278970 }, { "epoch": 0.5557901950784139, "grad_norm": 0.18113496899604797, "learning_rate": 0.002, "loss": 2.5627, "step": 278980 }, { "epoch": 0.5558101173020528, "grad_norm": 0.15881338715553284, "learning_rate": 0.002, "loss": 2.5471, "step": 278990 }, { "epoch": 0.5558300395256917, "grad_norm": 0.1913062185049057, "learning_rate": 0.002, "loss": 2.548, "step": 279000 }, { "epoch": 0.5558499617493307, "grad_norm": 0.17111198604106903, "learning_rate": 0.002, "loss": 2.5572, "step": 279010 }, { "epoch": 0.5558698839729695, "grad_norm": 0.17081275582313538, "learning_rate": 0.002, "loss": 2.562, "step": 279020 }, { "epoch": 0.5558898061966084, "grad_norm": 0.16791653633117676, "learning_rate": 0.002, "loss": 2.5621, "step": 279030 }, { "epoch": 0.5559097284202473, "grad_norm": 0.15089832246303558, "learning_rate": 0.002, "loss": 2.5546, "step": 279040 }, { "epoch": 0.5559296506438862, "grad_norm": 0.14878112077713013, "learning_rate": 0.002, "loss": 2.5616, "step": 279050 }, { "epoch": 0.5559495728675252, "grad_norm": 0.1688721925020218, "learning_rate": 0.002, "loss": 2.554, "step": 279060 }, { "epoch": 0.5559694950911641, "grad_norm": 0.16258247196674347, "learning_rate": 0.002, "loss": 2.5629, "step": 279070 }, { "epoch": 0.555989417314803, "grad_norm": 0.1676369160413742, "learning_rate": 0.002, "loss": 2.5502, "step": 279080 }, { "epoch": 0.5560093395384419, "grad_norm": 0.1492299735546112, "learning_rate": 0.002, "loss": 2.5524, "step": 279090 }, { "epoch": 0.5560292617620808, "grad_norm": 0.17522789537906647, "learning_rate": 0.002, "loss": 2.5412, "step": 279100 }, { "epoch": 0.5560491839857198, "grad_norm": 0.15402522683143616, "learning_rate": 0.002, "loss": 2.5641, "step": 279110 }, { "epoch": 0.5560691062093587, "grad_norm": 0.18011507391929626, "learning_rate": 0.002, "loss": 2.5605, "step": 279120 }, { "epoch": 0.5560890284329976, "grad_norm": 0.20379051566123962, "learning_rate": 0.002, "loss": 2.5679, "step": 279130 }, { "epoch": 0.5561089506566365, "grad_norm": 0.15070052444934845, "learning_rate": 0.002, "loss": 2.5501, "step": 279140 }, { "epoch": 0.5561288728802755, "grad_norm": 0.19010643661022186, "learning_rate": 0.002, "loss": 2.5656, "step": 279150 }, { "epoch": 0.5561487951039144, "grad_norm": 0.20062722265720367, "learning_rate": 0.002, "loss": 2.5562, "step": 279160 }, { "epoch": 0.5561687173275532, "grad_norm": 0.16328057646751404, "learning_rate": 0.002, "loss": 2.5643, "step": 279170 }, { "epoch": 0.5561886395511921, "grad_norm": 0.1434282660484314, "learning_rate": 0.002, "loss": 2.5465, "step": 279180 }, { "epoch": 0.556208561774831, "grad_norm": 0.16492754220962524, "learning_rate": 0.002, "loss": 2.5605, "step": 279190 }, { "epoch": 0.55622848399847, "grad_norm": 0.23029567301273346, "learning_rate": 0.002, "loss": 2.5448, "step": 279200 }, { "epoch": 0.5562484062221089, "grad_norm": 0.17449325323104858, "learning_rate": 0.002, "loss": 2.5502, "step": 279210 }, { "epoch": 0.5562683284457478, "grad_norm": 0.16762757301330566, "learning_rate": 0.002, "loss": 2.5599, "step": 279220 }, { "epoch": 0.5562882506693867, "grad_norm": 0.1743840128183365, "learning_rate": 0.002, "loss": 2.5471, "step": 279230 }, { "epoch": 0.5563081728930256, "grad_norm": 0.16227391362190247, "learning_rate": 0.002, "loss": 2.5334, "step": 279240 }, { "epoch": 0.5563280951166646, "grad_norm": 0.1403837949037552, "learning_rate": 0.002, "loss": 2.5446, "step": 279250 }, { "epoch": 0.5563480173403035, "grad_norm": 0.17688043415546417, "learning_rate": 0.002, "loss": 2.5612, "step": 279260 }, { "epoch": 0.5563679395639424, "grad_norm": 0.1465388685464859, "learning_rate": 0.002, "loss": 2.5489, "step": 279270 }, { "epoch": 0.5563878617875813, "grad_norm": 0.18240158259868622, "learning_rate": 0.002, "loss": 2.5773, "step": 279280 }, { "epoch": 0.5564077840112202, "grad_norm": 0.15367397665977478, "learning_rate": 0.002, "loss": 2.5628, "step": 279290 }, { "epoch": 0.5564277062348592, "grad_norm": 0.16119132936000824, "learning_rate": 0.002, "loss": 2.558, "step": 279300 }, { "epoch": 0.556447628458498, "grad_norm": 0.16100287437438965, "learning_rate": 0.002, "loss": 2.5588, "step": 279310 }, { "epoch": 0.5564675506821369, "grad_norm": 0.20319657027721405, "learning_rate": 0.002, "loss": 2.5397, "step": 279320 }, { "epoch": 0.5564874729057758, "grad_norm": 0.14611035585403442, "learning_rate": 0.002, "loss": 2.5606, "step": 279330 }, { "epoch": 0.5565073951294147, "grad_norm": 0.17756609618663788, "learning_rate": 0.002, "loss": 2.5522, "step": 279340 }, { "epoch": 0.5565273173530537, "grad_norm": 0.14624185860157013, "learning_rate": 0.002, "loss": 2.5619, "step": 279350 }, { "epoch": 0.5565472395766926, "grad_norm": 0.16574689745903015, "learning_rate": 0.002, "loss": 2.5487, "step": 279360 }, { "epoch": 0.5565671618003315, "grad_norm": 0.18607869744300842, "learning_rate": 0.002, "loss": 2.551, "step": 279370 }, { "epoch": 0.5565870840239704, "grad_norm": 0.17256395518779755, "learning_rate": 0.002, "loss": 2.5679, "step": 279380 }, { "epoch": 0.5566070062476093, "grad_norm": 0.17964474856853485, "learning_rate": 0.002, "loss": 2.5613, "step": 279390 }, { "epoch": 0.5566269284712483, "grad_norm": 0.18153223395347595, "learning_rate": 0.002, "loss": 2.5733, "step": 279400 }, { "epoch": 0.5566468506948872, "grad_norm": 0.18175426125526428, "learning_rate": 0.002, "loss": 2.5538, "step": 279410 }, { "epoch": 0.5566667729185261, "grad_norm": 0.1527884155511856, "learning_rate": 0.002, "loss": 2.5462, "step": 279420 }, { "epoch": 0.556686695142165, "grad_norm": 0.15310218930244446, "learning_rate": 0.002, "loss": 2.5562, "step": 279430 }, { "epoch": 0.5567066173658038, "grad_norm": 0.15892814099788666, "learning_rate": 0.002, "loss": 2.5489, "step": 279440 }, { "epoch": 0.5567265395894428, "grad_norm": 0.1572495549917221, "learning_rate": 0.002, "loss": 2.5789, "step": 279450 }, { "epoch": 0.5567464618130817, "grad_norm": 0.17936255037784576, "learning_rate": 0.002, "loss": 2.5592, "step": 279460 }, { "epoch": 0.5567663840367206, "grad_norm": 0.16858331859111786, "learning_rate": 0.002, "loss": 2.535, "step": 279470 }, { "epoch": 0.5567863062603595, "grad_norm": 0.16503529250621796, "learning_rate": 0.002, "loss": 2.5476, "step": 279480 }, { "epoch": 0.5568062284839985, "grad_norm": 0.1675901710987091, "learning_rate": 0.002, "loss": 2.5643, "step": 279490 }, { "epoch": 0.5568261507076374, "grad_norm": 0.21148185431957245, "learning_rate": 0.002, "loss": 2.5524, "step": 279500 }, { "epoch": 0.5568460729312763, "grad_norm": 0.16512516140937805, "learning_rate": 0.002, "loss": 2.5596, "step": 279510 }, { "epoch": 0.5568659951549152, "grad_norm": 0.20174314081668854, "learning_rate": 0.002, "loss": 2.5495, "step": 279520 }, { "epoch": 0.5568859173785541, "grad_norm": 0.15798349678516388, "learning_rate": 0.002, "loss": 2.5472, "step": 279530 }, { "epoch": 0.5569058396021931, "grad_norm": 0.2125607430934906, "learning_rate": 0.002, "loss": 2.5439, "step": 279540 }, { "epoch": 0.556925761825832, "grad_norm": 0.16955630481243134, "learning_rate": 0.002, "loss": 2.5558, "step": 279550 }, { "epoch": 0.5569456840494709, "grad_norm": 0.1654619425535202, "learning_rate": 0.002, "loss": 2.563, "step": 279560 }, { "epoch": 0.5569656062731098, "grad_norm": 0.20954740047454834, "learning_rate": 0.002, "loss": 2.5498, "step": 279570 }, { "epoch": 0.5569855284967486, "grad_norm": 0.1454964131116867, "learning_rate": 0.002, "loss": 2.5617, "step": 279580 }, { "epoch": 0.5570054507203877, "grad_norm": 0.16841155290603638, "learning_rate": 0.002, "loss": 2.5622, "step": 279590 }, { "epoch": 0.5570253729440265, "grad_norm": 0.15527886152267456, "learning_rate": 0.002, "loss": 2.566, "step": 279600 }, { "epoch": 0.5570452951676654, "grad_norm": 0.2342015504837036, "learning_rate": 0.002, "loss": 2.5596, "step": 279610 }, { "epoch": 0.5570652173913043, "grad_norm": 0.16915693879127502, "learning_rate": 0.002, "loss": 2.5592, "step": 279620 }, { "epoch": 0.5570851396149432, "grad_norm": 0.14970654249191284, "learning_rate": 0.002, "loss": 2.5547, "step": 279630 }, { "epoch": 0.5571050618385822, "grad_norm": 0.16231098771095276, "learning_rate": 0.002, "loss": 2.547, "step": 279640 }, { "epoch": 0.5571249840622211, "grad_norm": 0.14500701427459717, "learning_rate": 0.002, "loss": 2.5426, "step": 279650 }, { "epoch": 0.55714490628586, "grad_norm": 0.17961125075817108, "learning_rate": 0.002, "loss": 2.5609, "step": 279660 }, { "epoch": 0.5571648285094989, "grad_norm": 0.1655241996049881, "learning_rate": 0.002, "loss": 2.556, "step": 279670 }, { "epoch": 0.5571847507331378, "grad_norm": 0.18410839140415192, "learning_rate": 0.002, "loss": 2.5481, "step": 279680 }, { "epoch": 0.5572046729567768, "grad_norm": 0.152387335896492, "learning_rate": 0.002, "loss": 2.544, "step": 279690 }, { "epoch": 0.5572245951804157, "grad_norm": 0.15537047386169434, "learning_rate": 0.002, "loss": 2.5517, "step": 279700 }, { "epoch": 0.5572445174040546, "grad_norm": 0.30767738819122314, "learning_rate": 0.002, "loss": 2.5501, "step": 279710 }, { "epoch": 0.5572644396276935, "grad_norm": 0.15348437428474426, "learning_rate": 0.002, "loss": 2.5589, "step": 279720 }, { "epoch": 0.5572843618513323, "grad_norm": 0.1940680593252182, "learning_rate": 0.002, "loss": 2.5579, "step": 279730 }, { "epoch": 0.5573042840749713, "grad_norm": 0.1567806899547577, "learning_rate": 0.002, "loss": 2.5506, "step": 279740 }, { "epoch": 0.5573242062986102, "grad_norm": 0.15572510659694672, "learning_rate": 0.002, "loss": 2.5524, "step": 279750 }, { "epoch": 0.5573441285222491, "grad_norm": 0.16230899095535278, "learning_rate": 0.002, "loss": 2.555, "step": 279760 }, { "epoch": 0.557364050745888, "grad_norm": 0.1627056896686554, "learning_rate": 0.002, "loss": 2.5487, "step": 279770 }, { "epoch": 0.557383972969527, "grad_norm": 0.18027707934379578, "learning_rate": 0.002, "loss": 2.5634, "step": 279780 }, { "epoch": 0.5574038951931659, "grad_norm": 0.15093016624450684, "learning_rate": 0.002, "loss": 2.5538, "step": 279790 }, { "epoch": 0.5574238174168048, "grad_norm": 0.19199882447719574, "learning_rate": 0.002, "loss": 2.5582, "step": 279800 }, { "epoch": 0.5574437396404437, "grad_norm": 0.18838034570217133, "learning_rate": 0.002, "loss": 2.5619, "step": 279810 }, { "epoch": 0.5574636618640826, "grad_norm": 0.18513123691082, "learning_rate": 0.002, "loss": 2.5437, "step": 279820 }, { "epoch": 0.5574835840877216, "grad_norm": 0.23964183032512665, "learning_rate": 0.002, "loss": 2.5703, "step": 279830 }, { "epoch": 0.5575035063113605, "grad_norm": 0.17558260262012482, "learning_rate": 0.002, "loss": 2.5351, "step": 279840 }, { "epoch": 0.5575234285349994, "grad_norm": 0.13408175110816956, "learning_rate": 0.002, "loss": 2.5598, "step": 279850 }, { "epoch": 0.5575433507586383, "grad_norm": 0.14448346197605133, "learning_rate": 0.002, "loss": 2.5589, "step": 279860 }, { "epoch": 0.5575632729822771, "grad_norm": 0.13421662151813507, "learning_rate": 0.002, "loss": 2.559, "step": 279870 }, { "epoch": 0.5575831952059161, "grad_norm": 0.1773608773946762, "learning_rate": 0.002, "loss": 2.5474, "step": 279880 }, { "epoch": 0.557603117429555, "grad_norm": 0.1589309424161911, "learning_rate": 0.002, "loss": 2.5504, "step": 279890 }, { "epoch": 0.5576230396531939, "grad_norm": 0.15869492292404175, "learning_rate": 0.002, "loss": 2.5615, "step": 279900 }, { "epoch": 0.5576429618768328, "grad_norm": 0.21699365973472595, "learning_rate": 0.002, "loss": 2.5518, "step": 279910 }, { "epoch": 0.5576628841004717, "grad_norm": 0.16443650424480438, "learning_rate": 0.002, "loss": 2.575, "step": 279920 }, { "epoch": 0.5576828063241107, "grad_norm": 0.16942359507083893, "learning_rate": 0.002, "loss": 2.5567, "step": 279930 }, { "epoch": 0.5577027285477496, "grad_norm": 0.1722634881734848, "learning_rate": 0.002, "loss": 2.5487, "step": 279940 }, { "epoch": 0.5577226507713885, "grad_norm": 0.1725851446390152, "learning_rate": 0.002, "loss": 2.5339, "step": 279950 }, { "epoch": 0.5577425729950274, "grad_norm": 0.15218566358089447, "learning_rate": 0.002, "loss": 2.5545, "step": 279960 }, { "epoch": 0.5577624952186663, "grad_norm": 0.184341162443161, "learning_rate": 0.002, "loss": 2.5544, "step": 279970 }, { "epoch": 0.5577824174423053, "grad_norm": 0.16900452971458435, "learning_rate": 0.002, "loss": 2.5446, "step": 279980 }, { "epoch": 0.5578023396659442, "grad_norm": 0.15441936254501343, "learning_rate": 0.002, "loss": 2.5584, "step": 279990 }, { "epoch": 0.5578222618895831, "grad_norm": 0.16836434602737427, "learning_rate": 0.002, "loss": 2.5564, "step": 280000 }, { "epoch": 0.557842184113222, "grad_norm": 0.1653878390789032, "learning_rate": 0.002, "loss": 2.5549, "step": 280010 }, { "epoch": 0.5578621063368608, "grad_norm": 0.15635448694229126, "learning_rate": 0.002, "loss": 2.5532, "step": 280020 }, { "epoch": 0.5578820285604998, "grad_norm": 0.19342568516731262, "learning_rate": 0.002, "loss": 2.5519, "step": 280030 }, { "epoch": 0.5579019507841387, "grad_norm": 0.17818498611450195, "learning_rate": 0.002, "loss": 2.5502, "step": 280040 }, { "epoch": 0.5579218730077776, "grad_norm": 0.17282696068286896, "learning_rate": 0.002, "loss": 2.5515, "step": 280050 }, { "epoch": 0.5579417952314165, "grad_norm": 0.15105457603931427, "learning_rate": 0.002, "loss": 2.574, "step": 280060 }, { "epoch": 0.5579617174550555, "grad_norm": 0.16233332455158234, "learning_rate": 0.002, "loss": 2.5576, "step": 280070 }, { "epoch": 0.5579816396786944, "grad_norm": 0.1584024578332901, "learning_rate": 0.002, "loss": 2.5673, "step": 280080 }, { "epoch": 0.5580015619023333, "grad_norm": 0.2040807157754898, "learning_rate": 0.002, "loss": 2.5522, "step": 280090 }, { "epoch": 0.5580214841259722, "grad_norm": 0.15122942626476288, "learning_rate": 0.002, "loss": 2.5524, "step": 280100 }, { "epoch": 0.5580414063496111, "grad_norm": 0.18954980373382568, "learning_rate": 0.002, "loss": 2.5479, "step": 280110 }, { "epoch": 0.5580613285732501, "grad_norm": 0.16166067123413086, "learning_rate": 0.002, "loss": 2.5377, "step": 280120 }, { "epoch": 0.558081250796889, "grad_norm": 0.15605473518371582, "learning_rate": 0.002, "loss": 2.5604, "step": 280130 }, { "epoch": 0.5581011730205279, "grad_norm": 0.17103537917137146, "learning_rate": 0.002, "loss": 2.5667, "step": 280140 }, { "epoch": 0.5581210952441668, "grad_norm": 0.15026479959487915, "learning_rate": 0.002, "loss": 2.5562, "step": 280150 }, { "epoch": 0.5581410174678056, "grad_norm": 0.14955779910087585, "learning_rate": 0.002, "loss": 2.56, "step": 280160 }, { "epoch": 0.5581609396914446, "grad_norm": 0.1648261547088623, "learning_rate": 0.002, "loss": 2.563, "step": 280170 }, { "epoch": 0.5581808619150835, "grad_norm": 0.1570419818162918, "learning_rate": 0.002, "loss": 2.543, "step": 280180 }, { "epoch": 0.5582007841387224, "grad_norm": 0.1873045265674591, "learning_rate": 0.002, "loss": 2.5509, "step": 280190 }, { "epoch": 0.5582207063623613, "grad_norm": 0.1917477250099182, "learning_rate": 0.002, "loss": 2.5358, "step": 280200 }, { "epoch": 0.5582406285860002, "grad_norm": 0.15251116454601288, "learning_rate": 0.002, "loss": 2.5601, "step": 280210 }, { "epoch": 0.5582605508096392, "grad_norm": 0.1781274974346161, "learning_rate": 0.002, "loss": 2.5539, "step": 280220 }, { "epoch": 0.5582804730332781, "grad_norm": 0.19808495044708252, "learning_rate": 0.002, "loss": 2.5636, "step": 280230 }, { "epoch": 0.558300395256917, "grad_norm": 0.1541343331336975, "learning_rate": 0.002, "loss": 2.5548, "step": 280240 }, { "epoch": 0.5583203174805559, "grad_norm": 0.1710202544927597, "learning_rate": 0.002, "loss": 2.5591, "step": 280250 }, { "epoch": 0.5583402397041948, "grad_norm": 0.16430002450942993, "learning_rate": 0.002, "loss": 2.5515, "step": 280260 }, { "epoch": 0.5583601619278338, "grad_norm": 0.1573198437690735, "learning_rate": 0.002, "loss": 2.5561, "step": 280270 }, { "epoch": 0.5583800841514727, "grad_norm": 0.13724996149539948, "learning_rate": 0.002, "loss": 2.5539, "step": 280280 }, { "epoch": 0.5584000063751116, "grad_norm": 0.15602844953536987, "learning_rate": 0.002, "loss": 2.5551, "step": 280290 }, { "epoch": 0.5584199285987504, "grad_norm": 0.18445640802383423, "learning_rate": 0.002, "loss": 2.5506, "step": 280300 }, { "epoch": 0.5584398508223893, "grad_norm": 0.18441766500473022, "learning_rate": 0.002, "loss": 2.5596, "step": 280310 }, { "epoch": 0.5584597730460283, "grad_norm": 0.15063469111919403, "learning_rate": 0.002, "loss": 2.5641, "step": 280320 }, { "epoch": 0.5584796952696672, "grad_norm": 0.17061302065849304, "learning_rate": 0.002, "loss": 2.5543, "step": 280330 }, { "epoch": 0.5584996174933061, "grad_norm": 0.17217682301998138, "learning_rate": 0.002, "loss": 2.5492, "step": 280340 }, { "epoch": 0.558519539716945, "grad_norm": 0.17470063269138336, "learning_rate": 0.002, "loss": 2.5535, "step": 280350 }, { "epoch": 0.558539461940584, "grad_norm": 0.1589987725019455, "learning_rate": 0.002, "loss": 2.549, "step": 280360 }, { "epoch": 0.5585593841642229, "grad_norm": 0.20943261682987213, "learning_rate": 0.002, "loss": 2.5617, "step": 280370 }, { "epoch": 0.5585793063878618, "grad_norm": 0.14271169900894165, "learning_rate": 0.002, "loss": 2.5636, "step": 280380 }, { "epoch": 0.5585992286115007, "grad_norm": 0.14086896181106567, "learning_rate": 0.002, "loss": 2.5607, "step": 280390 }, { "epoch": 0.5586191508351396, "grad_norm": 0.1528494954109192, "learning_rate": 0.002, "loss": 2.5771, "step": 280400 }, { "epoch": 0.5586390730587786, "grad_norm": 0.17435915768146515, "learning_rate": 0.002, "loss": 2.549, "step": 280410 }, { "epoch": 0.5586589952824175, "grad_norm": 0.1752709448337555, "learning_rate": 0.002, "loss": 2.5552, "step": 280420 }, { "epoch": 0.5586789175060564, "grad_norm": 0.1650189906358719, "learning_rate": 0.002, "loss": 2.5531, "step": 280430 }, { "epoch": 0.5586988397296953, "grad_norm": 0.1452111452817917, "learning_rate": 0.002, "loss": 2.5486, "step": 280440 }, { "epoch": 0.5587187619533341, "grad_norm": 0.20869991183280945, "learning_rate": 0.002, "loss": 2.5484, "step": 280450 }, { "epoch": 0.5587386841769731, "grad_norm": 0.21119239926338196, "learning_rate": 0.002, "loss": 2.5596, "step": 280460 }, { "epoch": 0.558758606400612, "grad_norm": 0.15203550457954407, "learning_rate": 0.002, "loss": 2.5503, "step": 280470 }, { "epoch": 0.5587785286242509, "grad_norm": 0.17734776437282562, "learning_rate": 0.002, "loss": 2.5604, "step": 280480 }, { "epoch": 0.5587984508478898, "grad_norm": 0.1614808291196823, "learning_rate": 0.002, "loss": 2.5321, "step": 280490 }, { "epoch": 0.5588183730715287, "grad_norm": 0.16648870706558228, "learning_rate": 0.002, "loss": 2.5563, "step": 280500 }, { "epoch": 0.5588382952951677, "grad_norm": 0.21518974006175995, "learning_rate": 0.002, "loss": 2.5646, "step": 280510 }, { "epoch": 0.5588582175188066, "grad_norm": 0.1583097279071808, "learning_rate": 0.002, "loss": 2.5379, "step": 280520 }, { "epoch": 0.5588781397424455, "grad_norm": 0.17961908876895905, "learning_rate": 0.002, "loss": 2.5557, "step": 280530 }, { "epoch": 0.5588980619660844, "grad_norm": 0.18115395307540894, "learning_rate": 0.002, "loss": 2.5524, "step": 280540 }, { "epoch": 0.5589179841897233, "grad_norm": 0.16777200996875763, "learning_rate": 0.002, "loss": 2.5535, "step": 280550 }, { "epoch": 0.5589379064133623, "grad_norm": 0.1867363154888153, "learning_rate": 0.002, "loss": 2.5596, "step": 280560 }, { "epoch": 0.5589578286370012, "grad_norm": 0.16074839234352112, "learning_rate": 0.002, "loss": 2.544, "step": 280570 }, { "epoch": 0.55897775086064, "grad_norm": 0.1756264716386795, "learning_rate": 0.002, "loss": 2.5288, "step": 280580 }, { "epoch": 0.558997673084279, "grad_norm": 0.15400739014148712, "learning_rate": 0.002, "loss": 2.573, "step": 280590 }, { "epoch": 0.5590175953079178, "grad_norm": 0.17339098453521729, "learning_rate": 0.002, "loss": 2.5518, "step": 280600 }, { "epoch": 0.5590375175315568, "grad_norm": 0.15824487805366516, "learning_rate": 0.002, "loss": 2.5655, "step": 280610 }, { "epoch": 0.5590574397551957, "grad_norm": 0.1597827672958374, "learning_rate": 0.002, "loss": 2.5647, "step": 280620 }, { "epoch": 0.5590773619788346, "grad_norm": 0.16977018117904663, "learning_rate": 0.002, "loss": 2.5396, "step": 280630 }, { "epoch": 0.5590972842024735, "grad_norm": 0.1511145830154419, "learning_rate": 0.002, "loss": 2.5596, "step": 280640 }, { "epoch": 0.5591172064261125, "grad_norm": 0.15175890922546387, "learning_rate": 0.002, "loss": 2.554, "step": 280650 }, { "epoch": 0.5591371286497514, "grad_norm": 0.1899767816066742, "learning_rate": 0.002, "loss": 2.5535, "step": 280660 }, { "epoch": 0.5591570508733903, "grad_norm": 0.16277946531772614, "learning_rate": 0.002, "loss": 2.5589, "step": 280670 }, { "epoch": 0.5591769730970292, "grad_norm": 0.16947631537914276, "learning_rate": 0.002, "loss": 2.5677, "step": 280680 }, { "epoch": 0.5591968953206681, "grad_norm": 0.165349081158638, "learning_rate": 0.002, "loss": 2.5489, "step": 280690 }, { "epoch": 0.5592168175443071, "grad_norm": 0.17297948896884918, "learning_rate": 0.002, "loss": 2.5581, "step": 280700 }, { "epoch": 0.559236739767946, "grad_norm": 0.18445084989070892, "learning_rate": 0.002, "loss": 2.5527, "step": 280710 }, { "epoch": 0.5592566619915849, "grad_norm": 0.16575197875499725, "learning_rate": 0.002, "loss": 2.5508, "step": 280720 }, { "epoch": 0.5592765842152237, "grad_norm": 0.19602657854557037, "learning_rate": 0.002, "loss": 2.5449, "step": 280730 }, { "epoch": 0.5592965064388626, "grad_norm": 0.17817388474941254, "learning_rate": 0.002, "loss": 2.5713, "step": 280740 }, { "epoch": 0.5593164286625016, "grad_norm": 0.15464459359645844, "learning_rate": 0.002, "loss": 2.5666, "step": 280750 }, { "epoch": 0.5593363508861405, "grad_norm": 0.19551034271717072, "learning_rate": 0.002, "loss": 2.549, "step": 280760 }, { "epoch": 0.5593562731097794, "grad_norm": 0.16187509894371033, "learning_rate": 0.002, "loss": 2.5384, "step": 280770 }, { "epoch": 0.5593761953334183, "grad_norm": 0.18832118809223175, "learning_rate": 0.002, "loss": 2.5511, "step": 280780 }, { "epoch": 0.5593961175570572, "grad_norm": 0.1639144867658615, "learning_rate": 0.002, "loss": 2.5409, "step": 280790 }, { "epoch": 0.5594160397806962, "grad_norm": 0.1821439415216446, "learning_rate": 0.002, "loss": 2.5455, "step": 280800 }, { "epoch": 0.5594359620043351, "grad_norm": 0.20147088170051575, "learning_rate": 0.002, "loss": 2.5564, "step": 280810 }, { "epoch": 0.559455884227974, "grad_norm": 0.17368705570697784, "learning_rate": 0.002, "loss": 2.5486, "step": 280820 }, { "epoch": 0.5594758064516129, "grad_norm": 0.14833064377307892, "learning_rate": 0.002, "loss": 2.551, "step": 280830 }, { "epoch": 0.5594957286752518, "grad_norm": 0.17830045521259308, "learning_rate": 0.002, "loss": 2.5483, "step": 280840 }, { "epoch": 0.5595156508988908, "grad_norm": 0.16816291213035583, "learning_rate": 0.002, "loss": 2.5467, "step": 280850 }, { "epoch": 0.5595355731225297, "grad_norm": 0.1657257080078125, "learning_rate": 0.002, "loss": 2.5677, "step": 280860 }, { "epoch": 0.5595554953461686, "grad_norm": 0.15767675638198853, "learning_rate": 0.002, "loss": 2.5423, "step": 280870 }, { "epoch": 0.5595754175698074, "grad_norm": 0.1620693802833557, "learning_rate": 0.002, "loss": 2.5603, "step": 280880 }, { "epoch": 0.5595953397934463, "grad_norm": 0.18400795757770538, "learning_rate": 0.002, "loss": 2.5657, "step": 280890 }, { "epoch": 0.5596152620170853, "grad_norm": 0.1808283030986786, "learning_rate": 0.002, "loss": 2.5532, "step": 280900 }, { "epoch": 0.5596351842407242, "grad_norm": 0.1747768372297287, "learning_rate": 0.002, "loss": 2.5556, "step": 280910 }, { "epoch": 0.5596551064643631, "grad_norm": 0.1845884621143341, "learning_rate": 0.002, "loss": 2.5643, "step": 280920 }, { "epoch": 0.559675028688002, "grad_norm": 0.15822447836399078, "learning_rate": 0.002, "loss": 2.5576, "step": 280930 }, { "epoch": 0.5596949509116409, "grad_norm": 0.14963185787200928, "learning_rate": 0.002, "loss": 2.5444, "step": 280940 }, { "epoch": 0.5597148731352799, "grad_norm": 0.13843874633312225, "learning_rate": 0.002, "loss": 2.5434, "step": 280950 }, { "epoch": 0.5597347953589188, "grad_norm": 0.15040120482444763, "learning_rate": 0.002, "loss": 2.5581, "step": 280960 }, { "epoch": 0.5597547175825577, "grad_norm": 0.15689115226268768, "learning_rate": 0.002, "loss": 2.5713, "step": 280970 }, { "epoch": 0.5597746398061966, "grad_norm": 0.1835218071937561, "learning_rate": 0.002, "loss": 2.5467, "step": 280980 }, { "epoch": 0.5597945620298356, "grad_norm": 0.14122514426708221, "learning_rate": 0.002, "loss": 2.5521, "step": 280990 }, { "epoch": 0.5598144842534745, "grad_norm": 0.17444142699241638, "learning_rate": 0.002, "loss": 2.5512, "step": 281000 }, { "epoch": 0.5598344064771134, "grad_norm": 0.15500183403491974, "learning_rate": 0.002, "loss": 2.5507, "step": 281010 }, { "epoch": 0.5598543287007522, "grad_norm": 0.15387405455112457, "learning_rate": 0.002, "loss": 2.5561, "step": 281020 }, { "epoch": 0.5598742509243911, "grad_norm": 0.1833701729774475, "learning_rate": 0.002, "loss": 2.5846, "step": 281030 }, { "epoch": 0.5598941731480301, "grad_norm": 0.1812383383512497, "learning_rate": 0.002, "loss": 2.5434, "step": 281040 }, { "epoch": 0.559914095371669, "grad_norm": 0.17639045417308807, "learning_rate": 0.002, "loss": 2.5366, "step": 281050 }, { "epoch": 0.5599340175953079, "grad_norm": 0.1936073899269104, "learning_rate": 0.002, "loss": 2.5476, "step": 281060 }, { "epoch": 0.5599539398189468, "grad_norm": 0.18116062879562378, "learning_rate": 0.002, "loss": 2.5341, "step": 281070 }, { "epoch": 0.5599738620425857, "grad_norm": 0.16384845972061157, "learning_rate": 0.002, "loss": 2.5514, "step": 281080 }, { "epoch": 0.5599937842662247, "grad_norm": 0.1716596931219101, "learning_rate": 0.002, "loss": 2.5497, "step": 281090 }, { "epoch": 0.5600137064898636, "grad_norm": 0.16055822372436523, "learning_rate": 0.002, "loss": 2.564, "step": 281100 }, { "epoch": 0.5600336287135025, "grad_norm": 0.17418767511844635, "learning_rate": 0.002, "loss": 2.5638, "step": 281110 }, { "epoch": 0.5600535509371414, "grad_norm": 0.17781834304332733, "learning_rate": 0.002, "loss": 2.5649, "step": 281120 }, { "epoch": 0.5600734731607803, "grad_norm": 0.1727989763021469, "learning_rate": 0.002, "loss": 2.5514, "step": 281130 }, { "epoch": 0.5600933953844193, "grad_norm": 0.17583215236663818, "learning_rate": 0.002, "loss": 2.5415, "step": 281140 }, { "epoch": 0.5601133176080582, "grad_norm": 0.18631446361541748, "learning_rate": 0.002, "loss": 2.5709, "step": 281150 }, { "epoch": 0.560133239831697, "grad_norm": 0.25423723459243774, "learning_rate": 0.002, "loss": 2.5492, "step": 281160 }, { "epoch": 0.5601531620553359, "grad_norm": 0.19362099468708038, "learning_rate": 0.002, "loss": 2.5521, "step": 281170 }, { "epoch": 0.5601730842789748, "grad_norm": 0.17003846168518066, "learning_rate": 0.002, "loss": 2.5629, "step": 281180 }, { "epoch": 0.5601930065026138, "grad_norm": 0.1570112258195877, "learning_rate": 0.002, "loss": 2.5651, "step": 281190 }, { "epoch": 0.5602129287262527, "grad_norm": 0.15236645936965942, "learning_rate": 0.002, "loss": 2.5566, "step": 281200 }, { "epoch": 0.5602328509498916, "grad_norm": 0.1526411771774292, "learning_rate": 0.002, "loss": 2.5514, "step": 281210 }, { "epoch": 0.5602527731735305, "grad_norm": 0.15487165749073029, "learning_rate": 0.002, "loss": 2.5605, "step": 281220 }, { "epoch": 0.5602726953971694, "grad_norm": 0.16589927673339844, "learning_rate": 0.002, "loss": 2.5592, "step": 281230 }, { "epoch": 0.5602926176208084, "grad_norm": 0.1597590297460556, "learning_rate": 0.002, "loss": 2.5653, "step": 281240 }, { "epoch": 0.5603125398444473, "grad_norm": 0.1646086424589157, "learning_rate": 0.002, "loss": 2.5688, "step": 281250 }, { "epoch": 0.5603324620680862, "grad_norm": 0.1426316648721695, "learning_rate": 0.002, "loss": 2.5581, "step": 281260 }, { "epoch": 0.5603523842917251, "grad_norm": 0.16089344024658203, "learning_rate": 0.002, "loss": 2.5585, "step": 281270 }, { "epoch": 0.5603723065153641, "grad_norm": 0.1744716912508011, "learning_rate": 0.002, "loss": 2.5673, "step": 281280 }, { "epoch": 0.560392228739003, "grad_norm": 0.2166438102722168, "learning_rate": 0.002, "loss": 2.5553, "step": 281290 }, { "epoch": 0.5604121509626419, "grad_norm": 0.16248708963394165, "learning_rate": 0.002, "loss": 2.5668, "step": 281300 }, { "epoch": 0.5604320731862807, "grad_norm": 0.16858811676502228, "learning_rate": 0.002, "loss": 2.5488, "step": 281310 }, { "epoch": 0.5604519954099196, "grad_norm": 0.19389905035495758, "learning_rate": 0.002, "loss": 2.5608, "step": 281320 }, { "epoch": 0.5604719176335586, "grad_norm": 0.1654077172279358, "learning_rate": 0.002, "loss": 2.5748, "step": 281330 }, { "epoch": 0.5604918398571975, "grad_norm": 0.17432838678359985, "learning_rate": 0.002, "loss": 2.5502, "step": 281340 }, { "epoch": 0.5605117620808364, "grad_norm": 0.170934796333313, "learning_rate": 0.002, "loss": 2.5533, "step": 281350 }, { "epoch": 0.5605316843044753, "grad_norm": 0.18489240109920502, "learning_rate": 0.002, "loss": 2.5487, "step": 281360 }, { "epoch": 0.5605516065281142, "grad_norm": 0.17380714416503906, "learning_rate": 0.002, "loss": 2.5516, "step": 281370 }, { "epoch": 0.5605715287517532, "grad_norm": 0.1762564480304718, "learning_rate": 0.002, "loss": 2.5558, "step": 281380 }, { "epoch": 0.5605914509753921, "grad_norm": 0.15108391642570496, "learning_rate": 0.002, "loss": 2.5517, "step": 281390 }, { "epoch": 0.560611373199031, "grad_norm": 0.23226282000541687, "learning_rate": 0.002, "loss": 2.5588, "step": 281400 }, { "epoch": 0.5606312954226699, "grad_norm": 0.18547043204307556, "learning_rate": 0.002, "loss": 2.5549, "step": 281410 }, { "epoch": 0.5606512176463088, "grad_norm": 0.1518411636352539, "learning_rate": 0.002, "loss": 2.5453, "step": 281420 }, { "epoch": 0.5606711398699478, "grad_norm": 0.16076761484146118, "learning_rate": 0.002, "loss": 2.5538, "step": 281430 }, { "epoch": 0.5606910620935867, "grad_norm": 0.1621823012828827, "learning_rate": 0.002, "loss": 2.5538, "step": 281440 }, { "epoch": 0.5607109843172255, "grad_norm": 0.17410463094711304, "learning_rate": 0.002, "loss": 2.5742, "step": 281450 }, { "epoch": 0.5607309065408644, "grad_norm": 0.15696555376052856, "learning_rate": 0.002, "loss": 2.567, "step": 281460 }, { "epoch": 0.5607508287645033, "grad_norm": 0.15433108806610107, "learning_rate": 0.002, "loss": 2.5472, "step": 281470 }, { "epoch": 0.5607707509881423, "grad_norm": 0.17823785543441772, "learning_rate": 0.002, "loss": 2.5521, "step": 281480 }, { "epoch": 0.5607906732117812, "grad_norm": 0.16862961649894714, "learning_rate": 0.002, "loss": 2.5553, "step": 281490 }, { "epoch": 0.5608105954354201, "grad_norm": 0.17894919216632843, "learning_rate": 0.002, "loss": 2.5684, "step": 281500 }, { "epoch": 0.560830517659059, "grad_norm": 0.15704241394996643, "learning_rate": 0.002, "loss": 2.5416, "step": 281510 }, { "epoch": 0.5608504398826979, "grad_norm": 0.17450910806655884, "learning_rate": 0.002, "loss": 2.5653, "step": 281520 }, { "epoch": 0.5608703621063369, "grad_norm": 0.15642841160297394, "learning_rate": 0.002, "loss": 2.556, "step": 281530 }, { "epoch": 0.5608902843299758, "grad_norm": 0.1810966432094574, "learning_rate": 0.002, "loss": 2.5495, "step": 281540 }, { "epoch": 0.5609102065536147, "grad_norm": 0.1664101779460907, "learning_rate": 0.002, "loss": 2.5485, "step": 281550 }, { "epoch": 0.5609301287772536, "grad_norm": 0.15114596486091614, "learning_rate": 0.002, "loss": 2.554, "step": 281560 }, { "epoch": 0.5609500510008926, "grad_norm": 0.18166041374206543, "learning_rate": 0.002, "loss": 2.5574, "step": 281570 }, { "epoch": 0.5609699732245315, "grad_norm": 0.18467245995998383, "learning_rate": 0.002, "loss": 2.5601, "step": 281580 }, { "epoch": 0.5609898954481704, "grad_norm": 0.15633752942085266, "learning_rate": 0.002, "loss": 2.556, "step": 281590 }, { "epoch": 0.5610098176718092, "grad_norm": 0.15273143351078033, "learning_rate": 0.002, "loss": 2.5435, "step": 281600 }, { "epoch": 0.5610297398954481, "grad_norm": 0.2056467980146408, "learning_rate": 0.002, "loss": 2.5519, "step": 281610 }, { "epoch": 0.5610496621190871, "grad_norm": 0.1790499985218048, "learning_rate": 0.002, "loss": 2.5664, "step": 281620 }, { "epoch": 0.561069584342726, "grad_norm": 0.1613079160451889, "learning_rate": 0.002, "loss": 2.5518, "step": 281630 }, { "epoch": 0.5610895065663649, "grad_norm": 0.19283278286457062, "learning_rate": 0.002, "loss": 2.5616, "step": 281640 }, { "epoch": 0.5611094287900038, "grad_norm": 0.15469324588775635, "learning_rate": 0.002, "loss": 2.547, "step": 281650 }, { "epoch": 0.5611293510136427, "grad_norm": 0.14994212985038757, "learning_rate": 0.002, "loss": 2.5547, "step": 281660 }, { "epoch": 0.5611492732372817, "grad_norm": 0.19417843222618103, "learning_rate": 0.002, "loss": 2.5453, "step": 281670 }, { "epoch": 0.5611691954609206, "grad_norm": 0.16503088176250458, "learning_rate": 0.002, "loss": 2.5603, "step": 281680 }, { "epoch": 0.5611891176845595, "grad_norm": 0.14534348249435425, "learning_rate": 0.002, "loss": 2.5534, "step": 281690 }, { "epoch": 0.5612090399081984, "grad_norm": 0.17736148834228516, "learning_rate": 0.002, "loss": 2.562, "step": 281700 }, { "epoch": 0.5612289621318373, "grad_norm": 0.17594604194164276, "learning_rate": 0.002, "loss": 2.5541, "step": 281710 }, { "epoch": 0.5612488843554763, "grad_norm": 0.1653086394071579, "learning_rate": 0.002, "loss": 2.5469, "step": 281720 }, { "epoch": 0.5612688065791152, "grad_norm": 0.1659621149301529, "learning_rate": 0.002, "loss": 2.5502, "step": 281730 }, { "epoch": 0.561288728802754, "grad_norm": 0.16059452295303345, "learning_rate": 0.002, "loss": 2.5538, "step": 281740 }, { "epoch": 0.5613086510263929, "grad_norm": 0.19663552939891815, "learning_rate": 0.002, "loss": 2.5567, "step": 281750 }, { "epoch": 0.5613285732500318, "grad_norm": 0.1640259325504303, "learning_rate": 0.002, "loss": 2.546, "step": 281760 }, { "epoch": 0.5613484954736708, "grad_norm": 0.13713675737380981, "learning_rate": 0.002, "loss": 2.5575, "step": 281770 }, { "epoch": 0.5613684176973097, "grad_norm": 0.1865672767162323, "learning_rate": 0.002, "loss": 2.5536, "step": 281780 }, { "epoch": 0.5613883399209486, "grad_norm": 0.1665443480014801, "learning_rate": 0.002, "loss": 2.5591, "step": 281790 }, { "epoch": 0.5614082621445875, "grad_norm": 0.18407875299453735, "learning_rate": 0.002, "loss": 2.568, "step": 281800 }, { "epoch": 0.5614281843682264, "grad_norm": 0.16560105979442596, "learning_rate": 0.002, "loss": 2.5639, "step": 281810 }, { "epoch": 0.5614481065918654, "grad_norm": 0.1680900752544403, "learning_rate": 0.002, "loss": 2.5511, "step": 281820 }, { "epoch": 0.5614680288155043, "grad_norm": 0.14388000965118408, "learning_rate": 0.002, "loss": 2.5496, "step": 281830 }, { "epoch": 0.5614879510391432, "grad_norm": 0.17207017540931702, "learning_rate": 0.002, "loss": 2.549, "step": 281840 }, { "epoch": 0.5615078732627821, "grad_norm": 0.16595670580863953, "learning_rate": 0.002, "loss": 2.5634, "step": 281850 }, { "epoch": 0.5615277954864211, "grad_norm": 0.1674862504005432, "learning_rate": 0.002, "loss": 2.5628, "step": 281860 }, { "epoch": 0.56154771771006, "grad_norm": 0.17042522132396698, "learning_rate": 0.002, "loss": 2.5626, "step": 281870 }, { "epoch": 0.5615676399336988, "grad_norm": 0.16736507415771484, "learning_rate": 0.002, "loss": 2.5578, "step": 281880 }, { "epoch": 0.5615875621573377, "grad_norm": 0.17866457998752594, "learning_rate": 0.002, "loss": 2.5637, "step": 281890 }, { "epoch": 0.5616074843809766, "grad_norm": 0.15502199530601501, "learning_rate": 0.002, "loss": 2.5497, "step": 281900 }, { "epoch": 0.5616274066046156, "grad_norm": 0.18529446423053741, "learning_rate": 0.002, "loss": 2.5562, "step": 281910 }, { "epoch": 0.5616473288282545, "grad_norm": 0.19842533767223358, "learning_rate": 0.002, "loss": 2.5584, "step": 281920 }, { "epoch": 0.5616672510518934, "grad_norm": 0.15986181795597076, "learning_rate": 0.002, "loss": 2.56, "step": 281930 }, { "epoch": 0.5616871732755323, "grad_norm": 0.14311115443706512, "learning_rate": 0.002, "loss": 2.566, "step": 281940 }, { "epoch": 0.5617070954991712, "grad_norm": 0.16268981993198395, "learning_rate": 0.002, "loss": 2.5539, "step": 281950 }, { "epoch": 0.5617270177228102, "grad_norm": 0.16333331167697906, "learning_rate": 0.002, "loss": 2.5557, "step": 281960 }, { "epoch": 0.5617469399464491, "grad_norm": 0.2233969271183014, "learning_rate": 0.002, "loss": 2.5511, "step": 281970 }, { "epoch": 0.561766862170088, "grad_norm": 0.15518561005592346, "learning_rate": 0.002, "loss": 2.5555, "step": 281980 }, { "epoch": 0.5617867843937269, "grad_norm": 0.1650666892528534, "learning_rate": 0.002, "loss": 2.5454, "step": 281990 }, { "epoch": 0.5618067066173658, "grad_norm": 0.17130449414253235, "learning_rate": 0.002, "loss": 2.5457, "step": 282000 }, { "epoch": 0.5618266288410048, "grad_norm": 0.19400206208229065, "learning_rate": 0.002, "loss": 2.5524, "step": 282010 }, { "epoch": 0.5618465510646437, "grad_norm": 0.17766791582107544, "learning_rate": 0.002, "loss": 2.5679, "step": 282020 }, { "epoch": 0.5618664732882825, "grad_norm": 0.1625215858221054, "learning_rate": 0.002, "loss": 2.5567, "step": 282030 }, { "epoch": 0.5618863955119214, "grad_norm": 0.16416393220424652, "learning_rate": 0.002, "loss": 2.5665, "step": 282040 }, { "epoch": 0.5619063177355603, "grad_norm": 0.14613153040409088, "learning_rate": 0.002, "loss": 2.5604, "step": 282050 }, { "epoch": 0.5619262399591993, "grad_norm": 0.1380552053451538, "learning_rate": 0.002, "loss": 2.5499, "step": 282060 }, { "epoch": 0.5619461621828382, "grad_norm": 0.16766177117824554, "learning_rate": 0.002, "loss": 2.5539, "step": 282070 }, { "epoch": 0.5619660844064771, "grad_norm": 0.17803627252578735, "learning_rate": 0.002, "loss": 2.5592, "step": 282080 }, { "epoch": 0.561986006630116, "grad_norm": 0.20808832347393036, "learning_rate": 0.002, "loss": 2.5583, "step": 282090 }, { "epoch": 0.5620059288537549, "grad_norm": 0.1487758606672287, "learning_rate": 0.002, "loss": 2.5549, "step": 282100 }, { "epoch": 0.5620258510773939, "grad_norm": 0.14896805584430695, "learning_rate": 0.002, "loss": 2.5482, "step": 282110 }, { "epoch": 0.5620457733010328, "grad_norm": 0.1528589427471161, "learning_rate": 0.002, "loss": 2.548, "step": 282120 }, { "epoch": 0.5620656955246717, "grad_norm": 0.20030178129673004, "learning_rate": 0.002, "loss": 2.5562, "step": 282130 }, { "epoch": 0.5620856177483106, "grad_norm": 0.17109054327011108, "learning_rate": 0.002, "loss": 2.5628, "step": 282140 }, { "epoch": 0.5621055399719496, "grad_norm": 0.15187469124794006, "learning_rate": 0.002, "loss": 2.5556, "step": 282150 }, { "epoch": 0.5621254621955885, "grad_norm": 0.15013621747493744, "learning_rate": 0.002, "loss": 2.5405, "step": 282160 }, { "epoch": 0.5621453844192273, "grad_norm": 0.24076004326343536, "learning_rate": 0.002, "loss": 2.5545, "step": 282170 }, { "epoch": 0.5621653066428662, "grad_norm": 0.1823010891675949, "learning_rate": 0.002, "loss": 2.5486, "step": 282180 }, { "epoch": 0.5621852288665051, "grad_norm": 0.15060539543628693, "learning_rate": 0.002, "loss": 2.5458, "step": 282190 }, { "epoch": 0.5622051510901441, "grad_norm": 0.18909259140491486, "learning_rate": 0.002, "loss": 2.5381, "step": 282200 }, { "epoch": 0.562225073313783, "grad_norm": 0.1630212813615799, "learning_rate": 0.002, "loss": 2.5728, "step": 282210 }, { "epoch": 0.5622449955374219, "grad_norm": 0.15556295216083527, "learning_rate": 0.002, "loss": 2.5407, "step": 282220 }, { "epoch": 0.5622649177610608, "grad_norm": 0.18135714530944824, "learning_rate": 0.002, "loss": 2.5556, "step": 282230 }, { "epoch": 0.5622848399846997, "grad_norm": 0.1489613950252533, "learning_rate": 0.002, "loss": 2.5491, "step": 282240 }, { "epoch": 0.5623047622083387, "grad_norm": 0.16735364496707916, "learning_rate": 0.002, "loss": 2.56, "step": 282250 }, { "epoch": 0.5623246844319776, "grad_norm": 0.16789427399635315, "learning_rate": 0.002, "loss": 2.5427, "step": 282260 }, { "epoch": 0.5623446066556165, "grad_norm": 0.19459697604179382, "learning_rate": 0.002, "loss": 2.5526, "step": 282270 }, { "epoch": 0.5623645288792554, "grad_norm": 0.1813332438468933, "learning_rate": 0.002, "loss": 2.5647, "step": 282280 }, { "epoch": 0.5623844511028943, "grad_norm": 0.16857442259788513, "learning_rate": 0.002, "loss": 2.5615, "step": 282290 }, { "epoch": 0.5624043733265333, "grad_norm": 0.18606677651405334, "learning_rate": 0.002, "loss": 2.5563, "step": 282300 }, { "epoch": 0.5624242955501721, "grad_norm": 0.16731001436710358, "learning_rate": 0.002, "loss": 2.5616, "step": 282310 }, { "epoch": 0.562444217773811, "grad_norm": 0.1946101188659668, "learning_rate": 0.002, "loss": 2.5559, "step": 282320 }, { "epoch": 0.5624641399974499, "grad_norm": 0.19589735567569733, "learning_rate": 0.002, "loss": 2.5652, "step": 282330 }, { "epoch": 0.5624840622210888, "grad_norm": 0.15560470521450043, "learning_rate": 0.002, "loss": 2.5567, "step": 282340 }, { "epoch": 0.5625039844447278, "grad_norm": 0.18352076411247253, "learning_rate": 0.002, "loss": 2.5589, "step": 282350 }, { "epoch": 0.5625239066683667, "grad_norm": 0.1926759034395218, "learning_rate": 0.002, "loss": 2.5643, "step": 282360 }, { "epoch": 0.5625438288920056, "grad_norm": 0.15273579955101013, "learning_rate": 0.002, "loss": 2.5657, "step": 282370 }, { "epoch": 0.5625637511156445, "grad_norm": 0.16645066440105438, "learning_rate": 0.002, "loss": 2.5577, "step": 282380 }, { "epoch": 0.5625836733392834, "grad_norm": 0.167138934135437, "learning_rate": 0.002, "loss": 2.5539, "step": 282390 }, { "epoch": 0.5626035955629224, "grad_norm": 0.18351340293884277, "learning_rate": 0.002, "loss": 2.5608, "step": 282400 }, { "epoch": 0.5626235177865613, "grad_norm": 0.1772533655166626, "learning_rate": 0.002, "loss": 2.5635, "step": 282410 }, { "epoch": 0.5626434400102002, "grad_norm": 0.17603425681591034, "learning_rate": 0.002, "loss": 2.5521, "step": 282420 }, { "epoch": 0.5626633622338391, "grad_norm": 0.16443108022212982, "learning_rate": 0.002, "loss": 2.5618, "step": 282430 }, { "epoch": 0.5626832844574781, "grad_norm": 0.1896844357252121, "learning_rate": 0.002, "loss": 2.554, "step": 282440 }, { "epoch": 0.562703206681117, "grad_norm": 0.15594777464866638, "learning_rate": 0.002, "loss": 2.5455, "step": 282450 }, { "epoch": 0.5627231289047558, "grad_norm": 0.15921010076999664, "learning_rate": 0.002, "loss": 2.5581, "step": 282460 }, { "epoch": 0.5627430511283947, "grad_norm": 0.21039709448814392, "learning_rate": 0.002, "loss": 2.5508, "step": 282470 }, { "epoch": 0.5627629733520336, "grad_norm": 0.16731567680835724, "learning_rate": 0.002, "loss": 2.5589, "step": 282480 }, { "epoch": 0.5627828955756726, "grad_norm": 0.16558009386062622, "learning_rate": 0.002, "loss": 2.5663, "step": 282490 }, { "epoch": 0.5628028177993115, "grad_norm": 0.16358928382396698, "learning_rate": 0.002, "loss": 2.5635, "step": 282500 }, { "epoch": 0.5628227400229504, "grad_norm": 0.1586926281452179, "learning_rate": 0.002, "loss": 2.5642, "step": 282510 }, { "epoch": 0.5628426622465893, "grad_norm": 0.22237813472747803, "learning_rate": 0.002, "loss": 2.5561, "step": 282520 }, { "epoch": 0.5628625844702282, "grad_norm": 0.17298001050949097, "learning_rate": 0.002, "loss": 2.5652, "step": 282530 }, { "epoch": 0.5628825066938672, "grad_norm": 0.19142717123031616, "learning_rate": 0.002, "loss": 2.5553, "step": 282540 }, { "epoch": 0.5629024289175061, "grad_norm": 0.1783573478460312, "learning_rate": 0.002, "loss": 2.5693, "step": 282550 }, { "epoch": 0.562922351141145, "grad_norm": 0.1870431751012802, "learning_rate": 0.002, "loss": 2.5613, "step": 282560 }, { "epoch": 0.5629422733647839, "grad_norm": 0.2182711958885193, "learning_rate": 0.002, "loss": 2.5685, "step": 282570 }, { "epoch": 0.5629621955884228, "grad_norm": 0.17402808368206024, "learning_rate": 0.002, "loss": 2.5508, "step": 282580 }, { "epoch": 0.5629821178120618, "grad_norm": 0.17035739123821259, "learning_rate": 0.002, "loss": 2.5621, "step": 282590 }, { "epoch": 0.5630020400357006, "grad_norm": 0.19372305274009705, "learning_rate": 0.002, "loss": 2.5564, "step": 282600 }, { "epoch": 0.5630219622593395, "grad_norm": 0.18900777399539948, "learning_rate": 0.002, "loss": 2.5546, "step": 282610 }, { "epoch": 0.5630418844829784, "grad_norm": 0.15060916543006897, "learning_rate": 0.002, "loss": 2.5359, "step": 282620 }, { "epoch": 0.5630618067066173, "grad_norm": 0.1951209306716919, "learning_rate": 0.002, "loss": 2.5695, "step": 282630 }, { "epoch": 0.5630817289302563, "grad_norm": 0.16738006472587585, "learning_rate": 0.002, "loss": 2.5556, "step": 282640 }, { "epoch": 0.5631016511538952, "grad_norm": 0.194317027926445, "learning_rate": 0.002, "loss": 2.5666, "step": 282650 }, { "epoch": 0.5631215733775341, "grad_norm": 0.17450518906116486, "learning_rate": 0.002, "loss": 2.5662, "step": 282660 }, { "epoch": 0.563141495601173, "grad_norm": 0.14944744110107422, "learning_rate": 0.002, "loss": 2.5571, "step": 282670 }, { "epoch": 0.5631614178248119, "grad_norm": 0.18533509969711304, "learning_rate": 0.002, "loss": 2.5585, "step": 282680 }, { "epoch": 0.5631813400484509, "grad_norm": 0.1598302274942398, "learning_rate": 0.002, "loss": 2.5616, "step": 282690 }, { "epoch": 0.5632012622720898, "grad_norm": 0.18547901511192322, "learning_rate": 0.002, "loss": 2.552, "step": 282700 }, { "epoch": 0.5632211844957287, "grad_norm": 0.13757464289665222, "learning_rate": 0.002, "loss": 2.549, "step": 282710 }, { "epoch": 0.5632411067193676, "grad_norm": 0.14410340785980225, "learning_rate": 0.002, "loss": 2.559, "step": 282720 }, { "epoch": 0.5632610289430064, "grad_norm": 0.16815480589866638, "learning_rate": 0.002, "loss": 2.5446, "step": 282730 }, { "epoch": 0.5632809511666454, "grad_norm": 0.28565600514411926, "learning_rate": 0.002, "loss": 2.5477, "step": 282740 }, { "epoch": 0.5633008733902843, "grad_norm": 0.16671006381511688, "learning_rate": 0.002, "loss": 2.5573, "step": 282750 }, { "epoch": 0.5633207956139232, "grad_norm": 0.15273459255695343, "learning_rate": 0.002, "loss": 2.5516, "step": 282760 }, { "epoch": 0.5633407178375621, "grad_norm": 0.16602259874343872, "learning_rate": 0.002, "loss": 2.5577, "step": 282770 }, { "epoch": 0.5633606400612011, "grad_norm": 0.1726268231868744, "learning_rate": 0.002, "loss": 2.5467, "step": 282780 }, { "epoch": 0.56338056228484, "grad_norm": 0.18647320568561554, "learning_rate": 0.002, "loss": 2.5715, "step": 282790 }, { "epoch": 0.5634004845084789, "grad_norm": 0.1544462889432907, "learning_rate": 0.002, "loss": 2.557, "step": 282800 }, { "epoch": 0.5634204067321178, "grad_norm": 0.16013504564762115, "learning_rate": 0.002, "loss": 2.5391, "step": 282810 }, { "epoch": 0.5634403289557567, "grad_norm": 0.16603846848011017, "learning_rate": 0.002, "loss": 2.5555, "step": 282820 }, { "epoch": 0.5634602511793957, "grad_norm": 0.18029484152793884, "learning_rate": 0.002, "loss": 2.5677, "step": 282830 }, { "epoch": 0.5634801734030346, "grad_norm": 0.17510128021240234, "learning_rate": 0.002, "loss": 2.5709, "step": 282840 }, { "epoch": 0.5635000956266735, "grad_norm": 0.19556452333927155, "learning_rate": 0.002, "loss": 2.5601, "step": 282850 }, { "epoch": 0.5635200178503124, "grad_norm": 0.16490045189857483, "learning_rate": 0.002, "loss": 2.5617, "step": 282860 }, { "epoch": 0.5635399400739513, "grad_norm": 0.1680375337600708, "learning_rate": 0.002, "loss": 2.5652, "step": 282870 }, { "epoch": 0.5635598622975903, "grad_norm": 0.16326895356178284, "learning_rate": 0.002, "loss": 2.5464, "step": 282880 }, { "epoch": 0.5635797845212291, "grad_norm": 0.16019590198993683, "learning_rate": 0.002, "loss": 2.558, "step": 282890 }, { "epoch": 0.563599706744868, "grad_norm": 0.16963328421115875, "learning_rate": 0.002, "loss": 2.5472, "step": 282900 }, { "epoch": 0.5636196289685069, "grad_norm": 0.1510888934135437, "learning_rate": 0.002, "loss": 2.5503, "step": 282910 }, { "epoch": 0.5636395511921458, "grad_norm": 0.1473078429698944, "learning_rate": 0.002, "loss": 2.5523, "step": 282920 }, { "epoch": 0.5636594734157848, "grad_norm": 0.18478117883205414, "learning_rate": 0.002, "loss": 2.5498, "step": 282930 }, { "epoch": 0.5636793956394237, "grad_norm": 0.21322554349899292, "learning_rate": 0.002, "loss": 2.5461, "step": 282940 }, { "epoch": 0.5636993178630626, "grad_norm": 0.15220989286899567, "learning_rate": 0.002, "loss": 2.563, "step": 282950 }, { "epoch": 0.5637192400867015, "grad_norm": 0.19036847352981567, "learning_rate": 0.002, "loss": 2.5429, "step": 282960 }, { "epoch": 0.5637391623103404, "grad_norm": 0.15894845128059387, "learning_rate": 0.002, "loss": 2.5562, "step": 282970 }, { "epoch": 0.5637590845339794, "grad_norm": 0.1846480667591095, "learning_rate": 0.002, "loss": 2.5699, "step": 282980 }, { "epoch": 0.5637790067576183, "grad_norm": 0.14198242127895355, "learning_rate": 0.002, "loss": 2.5612, "step": 282990 }, { "epoch": 0.5637989289812572, "grad_norm": 0.1445966511964798, "learning_rate": 0.002, "loss": 2.5697, "step": 283000 }, { "epoch": 0.563818851204896, "grad_norm": 0.18282625079154968, "learning_rate": 0.002, "loss": 2.5505, "step": 283010 }, { "epoch": 0.563838773428535, "grad_norm": 0.1648414433002472, "learning_rate": 0.002, "loss": 2.5391, "step": 283020 }, { "epoch": 0.563858695652174, "grad_norm": 0.1490781009197235, "learning_rate": 0.002, "loss": 2.5518, "step": 283030 }, { "epoch": 0.5638786178758128, "grad_norm": 0.18654128909111023, "learning_rate": 0.002, "loss": 2.5468, "step": 283040 }, { "epoch": 0.5638985400994517, "grad_norm": 0.1557871401309967, "learning_rate": 0.002, "loss": 2.5465, "step": 283050 }, { "epoch": 0.5639184623230906, "grad_norm": 0.16858968138694763, "learning_rate": 0.002, "loss": 2.5705, "step": 283060 }, { "epoch": 0.5639383845467296, "grad_norm": 0.1498718410730362, "learning_rate": 0.002, "loss": 2.5666, "step": 283070 }, { "epoch": 0.5639583067703685, "grad_norm": 0.23895321786403656, "learning_rate": 0.002, "loss": 2.5509, "step": 283080 }, { "epoch": 0.5639782289940074, "grad_norm": 0.14792539179325104, "learning_rate": 0.002, "loss": 2.542, "step": 283090 }, { "epoch": 0.5639981512176463, "grad_norm": 0.1842724084854126, "learning_rate": 0.002, "loss": 2.5549, "step": 283100 }, { "epoch": 0.5640180734412852, "grad_norm": 0.20306521654129028, "learning_rate": 0.002, "loss": 2.5528, "step": 283110 }, { "epoch": 0.5640379956649242, "grad_norm": 0.155024453997612, "learning_rate": 0.002, "loss": 2.5568, "step": 283120 }, { "epoch": 0.5640579178885631, "grad_norm": 0.14351944625377655, "learning_rate": 0.002, "loss": 2.5408, "step": 283130 }, { "epoch": 0.564077840112202, "grad_norm": 0.15395914018154144, "learning_rate": 0.002, "loss": 2.561, "step": 283140 }, { "epoch": 0.5640977623358409, "grad_norm": 2.1813535690307617, "learning_rate": 0.002, "loss": 2.5659, "step": 283150 }, { "epoch": 0.5641176845594797, "grad_norm": 0.17939431965351105, "learning_rate": 0.002, "loss": 2.5957, "step": 283160 }, { "epoch": 0.5641376067831188, "grad_norm": 0.13714811205863953, "learning_rate": 0.002, "loss": 2.5683, "step": 283170 }, { "epoch": 0.5641575290067576, "grad_norm": 0.12863019108772278, "learning_rate": 0.002, "loss": 2.5706, "step": 283180 }, { "epoch": 0.5641774512303965, "grad_norm": 0.12113964557647705, "learning_rate": 0.002, "loss": 2.5542, "step": 283190 }, { "epoch": 0.5641973734540354, "grad_norm": 0.17728401720523834, "learning_rate": 0.002, "loss": 2.5489, "step": 283200 }, { "epoch": 0.5642172956776743, "grad_norm": 0.16270707547664642, "learning_rate": 0.002, "loss": 2.5515, "step": 283210 }, { "epoch": 0.5642372179013133, "grad_norm": 0.16114026308059692, "learning_rate": 0.002, "loss": 2.5478, "step": 283220 }, { "epoch": 0.5642571401249522, "grad_norm": 0.15823104977607727, "learning_rate": 0.002, "loss": 2.556, "step": 283230 }, { "epoch": 0.5642770623485911, "grad_norm": 0.15101873874664307, "learning_rate": 0.002, "loss": 2.56, "step": 283240 }, { "epoch": 0.56429698457223, "grad_norm": 0.18374140560626984, "learning_rate": 0.002, "loss": 2.55, "step": 283250 }, { "epoch": 0.5643169067958689, "grad_norm": 0.16589848697185516, "learning_rate": 0.002, "loss": 2.5578, "step": 283260 }, { "epoch": 0.5643368290195079, "grad_norm": 0.1813521385192871, "learning_rate": 0.002, "loss": 2.5589, "step": 283270 }, { "epoch": 0.5643567512431468, "grad_norm": 0.1788429170846939, "learning_rate": 0.002, "loss": 2.561, "step": 283280 }, { "epoch": 0.5643766734667857, "grad_norm": 0.15670961141586304, "learning_rate": 0.002, "loss": 2.5484, "step": 283290 }, { "epoch": 0.5643965956904246, "grad_norm": 0.16881303489208221, "learning_rate": 0.002, "loss": 2.5549, "step": 283300 }, { "epoch": 0.5644165179140634, "grad_norm": 0.15554961562156677, "learning_rate": 0.002, "loss": 2.5585, "step": 283310 }, { "epoch": 0.5644364401377024, "grad_norm": 0.18360698223114014, "learning_rate": 0.002, "loss": 2.5529, "step": 283320 }, { "epoch": 0.5644563623613413, "grad_norm": 0.19006836414337158, "learning_rate": 0.002, "loss": 2.5622, "step": 283330 }, { "epoch": 0.5644762845849802, "grad_norm": 0.14933383464813232, "learning_rate": 0.002, "loss": 2.554, "step": 283340 }, { "epoch": 0.5644962068086191, "grad_norm": 0.182172030210495, "learning_rate": 0.002, "loss": 2.5572, "step": 283350 }, { "epoch": 0.5645161290322581, "grad_norm": 0.14318719506263733, "learning_rate": 0.002, "loss": 2.5538, "step": 283360 }, { "epoch": 0.564536051255897, "grad_norm": 0.17406608164310455, "learning_rate": 0.002, "loss": 2.5531, "step": 283370 }, { "epoch": 0.5645559734795359, "grad_norm": 0.18005837500095367, "learning_rate": 0.002, "loss": 2.5542, "step": 283380 }, { "epoch": 0.5645758957031748, "grad_norm": 0.16613224148750305, "learning_rate": 0.002, "loss": 2.5578, "step": 283390 }, { "epoch": 0.5645958179268137, "grad_norm": 0.16898851096630096, "learning_rate": 0.002, "loss": 2.5478, "step": 283400 }, { "epoch": 0.5646157401504527, "grad_norm": 0.17304769158363342, "learning_rate": 0.002, "loss": 2.5691, "step": 283410 }, { "epoch": 0.5646356623740916, "grad_norm": 0.15949679911136627, "learning_rate": 0.002, "loss": 2.5451, "step": 283420 }, { "epoch": 0.5646555845977305, "grad_norm": 0.14722004532814026, "learning_rate": 0.002, "loss": 2.5646, "step": 283430 }, { "epoch": 0.5646755068213694, "grad_norm": 0.1818704605102539, "learning_rate": 0.002, "loss": 2.5514, "step": 283440 }, { "epoch": 0.5646954290450082, "grad_norm": 0.17560555040836334, "learning_rate": 0.002, "loss": 2.5411, "step": 283450 }, { "epoch": 0.5647153512686472, "grad_norm": 0.13908490538597107, "learning_rate": 0.002, "loss": 2.5537, "step": 283460 }, { "epoch": 0.5647352734922861, "grad_norm": 0.14457917213439941, "learning_rate": 0.002, "loss": 2.5636, "step": 283470 }, { "epoch": 0.564755195715925, "grad_norm": 0.14869281649589539, "learning_rate": 0.002, "loss": 2.5495, "step": 283480 }, { "epoch": 0.5647751179395639, "grad_norm": 0.14204595983028412, "learning_rate": 0.002, "loss": 2.5568, "step": 283490 }, { "epoch": 0.5647950401632028, "grad_norm": 0.18689978122711182, "learning_rate": 0.002, "loss": 2.551, "step": 283500 }, { "epoch": 0.5648149623868418, "grad_norm": 0.15381182730197906, "learning_rate": 0.002, "loss": 2.5577, "step": 283510 }, { "epoch": 0.5648348846104807, "grad_norm": 0.16414493322372437, "learning_rate": 0.002, "loss": 2.5489, "step": 283520 }, { "epoch": 0.5648548068341196, "grad_norm": 0.15715661644935608, "learning_rate": 0.002, "loss": 2.5688, "step": 283530 }, { "epoch": 0.5648747290577585, "grad_norm": 0.1678289771080017, "learning_rate": 0.002, "loss": 2.5451, "step": 283540 }, { "epoch": 0.5648946512813974, "grad_norm": 0.18905076384544373, "learning_rate": 0.002, "loss": 2.55, "step": 283550 }, { "epoch": 0.5649145735050364, "grad_norm": 0.21109214425086975, "learning_rate": 0.002, "loss": 2.5648, "step": 283560 }, { "epoch": 0.5649344957286753, "grad_norm": 0.1740826964378357, "learning_rate": 0.002, "loss": 2.5689, "step": 283570 }, { "epoch": 0.5649544179523142, "grad_norm": 0.17729806900024414, "learning_rate": 0.002, "loss": 2.5381, "step": 283580 }, { "epoch": 0.564974340175953, "grad_norm": 0.14049406349658966, "learning_rate": 0.002, "loss": 2.574, "step": 283590 }, { "epoch": 0.5649942623995919, "grad_norm": 0.179377943277359, "learning_rate": 0.002, "loss": 2.5581, "step": 283600 }, { "epoch": 0.5650141846232309, "grad_norm": 0.16335143148899078, "learning_rate": 0.002, "loss": 2.5457, "step": 283610 }, { "epoch": 0.5650341068468698, "grad_norm": 0.16311760246753693, "learning_rate": 0.002, "loss": 2.5655, "step": 283620 }, { "epoch": 0.5650540290705087, "grad_norm": 0.20061179995536804, "learning_rate": 0.002, "loss": 2.5463, "step": 283630 }, { "epoch": 0.5650739512941476, "grad_norm": 0.14828838407993317, "learning_rate": 0.002, "loss": 2.5564, "step": 283640 }, { "epoch": 0.5650938735177866, "grad_norm": 0.1644706130027771, "learning_rate": 0.002, "loss": 2.5731, "step": 283650 }, { "epoch": 0.5651137957414255, "grad_norm": 0.13961729407310486, "learning_rate": 0.002, "loss": 2.5506, "step": 283660 }, { "epoch": 0.5651337179650644, "grad_norm": 0.1804085671901703, "learning_rate": 0.002, "loss": 2.5527, "step": 283670 }, { "epoch": 0.5651536401887033, "grad_norm": 0.17378589510917664, "learning_rate": 0.002, "loss": 2.5584, "step": 283680 }, { "epoch": 0.5651735624123422, "grad_norm": 0.2099379152059555, "learning_rate": 0.002, "loss": 2.566, "step": 283690 }, { "epoch": 0.5651934846359812, "grad_norm": 0.18145537376403809, "learning_rate": 0.002, "loss": 2.5563, "step": 283700 }, { "epoch": 0.5652134068596201, "grad_norm": 0.19394201040267944, "learning_rate": 0.002, "loss": 2.5594, "step": 283710 }, { "epoch": 0.565233329083259, "grad_norm": 0.17570942640304565, "learning_rate": 0.002, "loss": 2.5556, "step": 283720 }, { "epoch": 0.5652532513068979, "grad_norm": 0.14797385036945343, "learning_rate": 0.002, "loss": 2.5652, "step": 283730 }, { "epoch": 0.5652731735305367, "grad_norm": 0.1858651041984558, "learning_rate": 0.002, "loss": 2.5555, "step": 283740 }, { "epoch": 0.5652930957541757, "grad_norm": 0.1790008842945099, "learning_rate": 0.002, "loss": 2.5662, "step": 283750 }, { "epoch": 0.5653130179778146, "grad_norm": 0.16959552466869354, "learning_rate": 0.002, "loss": 2.562, "step": 283760 }, { "epoch": 0.5653329402014535, "grad_norm": 0.16525879502296448, "learning_rate": 0.002, "loss": 2.5576, "step": 283770 }, { "epoch": 0.5653528624250924, "grad_norm": 0.18626391887664795, "learning_rate": 0.002, "loss": 2.5454, "step": 283780 }, { "epoch": 0.5653727846487313, "grad_norm": 0.1665530502796173, "learning_rate": 0.002, "loss": 2.5483, "step": 283790 }, { "epoch": 0.5653927068723703, "grad_norm": 0.15591003000736237, "learning_rate": 0.002, "loss": 2.5387, "step": 283800 }, { "epoch": 0.5654126290960092, "grad_norm": 0.1764342188835144, "learning_rate": 0.002, "loss": 2.5634, "step": 283810 }, { "epoch": 0.5654325513196481, "grad_norm": 0.18441425263881683, "learning_rate": 0.002, "loss": 2.5561, "step": 283820 }, { "epoch": 0.565452473543287, "grad_norm": 0.15178550779819489, "learning_rate": 0.002, "loss": 2.5569, "step": 283830 }, { "epoch": 0.5654723957669259, "grad_norm": 0.15204453468322754, "learning_rate": 0.002, "loss": 2.5478, "step": 283840 }, { "epoch": 0.5654923179905649, "grad_norm": 0.15976440906524658, "learning_rate": 0.002, "loss": 2.559, "step": 283850 }, { "epoch": 0.5655122402142038, "grad_norm": 0.15404170751571655, "learning_rate": 0.002, "loss": 2.5597, "step": 283860 }, { "epoch": 0.5655321624378427, "grad_norm": 0.17847047746181488, "learning_rate": 0.002, "loss": 2.5536, "step": 283870 }, { "epoch": 0.5655520846614815, "grad_norm": 0.17347416281700134, "learning_rate": 0.002, "loss": 2.5657, "step": 283880 }, { "epoch": 0.5655720068851204, "grad_norm": 0.15582910180091858, "learning_rate": 0.002, "loss": 2.5704, "step": 283890 }, { "epoch": 0.5655919291087594, "grad_norm": 0.19225965440273285, "learning_rate": 0.002, "loss": 2.5567, "step": 283900 }, { "epoch": 0.5656118513323983, "grad_norm": 0.15754960477352142, "learning_rate": 0.002, "loss": 2.5588, "step": 283910 }, { "epoch": 0.5656317735560372, "grad_norm": 0.14502428472042084, "learning_rate": 0.002, "loss": 2.5624, "step": 283920 }, { "epoch": 0.5656516957796761, "grad_norm": 0.17361724376678467, "learning_rate": 0.002, "loss": 2.5596, "step": 283930 }, { "epoch": 0.5656716180033151, "grad_norm": 0.23156897723674774, "learning_rate": 0.002, "loss": 2.5599, "step": 283940 }, { "epoch": 0.565691540226954, "grad_norm": 0.19438330829143524, "learning_rate": 0.002, "loss": 2.5611, "step": 283950 }, { "epoch": 0.5657114624505929, "grad_norm": 0.15586861968040466, "learning_rate": 0.002, "loss": 2.5498, "step": 283960 }, { "epoch": 0.5657313846742318, "grad_norm": 0.21101020276546478, "learning_rate": 0.002, "loss": 2.5738, "step": 283970 }, { "epoch": 0.5657513068978707, "grad_norm": 0.15059898793697357, "learning_rate": 0.002, "loss": 2.5646, "step": 283980 }, { "epoch": 0.5657712291215097, "grad_norm": 0.17247924208641052, "learning_rate": 0.002, "loss": 2.5622, "step": 283990 }, { "epoch": 0.5657911513451486, "grad_norm": 0.16270464658737183, "learning_rate": 0.002, "loss": 2.5422, "step": 284000 }, { "epoch": 0.5658110735687875, "grad_norm": 0.15510980784893036, "learning_rate": 0.002, "loss": 2.5554, "step": 284010 }, { "epoch": 0.5658309957924264, "grad_norm": 0.1877962052822113, "learning_rate": 0.002, "loss": 2.5599, "step": 284020 }, { "epoch": 0.5658509180160652, "grad_norm": 0.15210486948490143, "learning_rate": 0.002, "loss": 2.5637, "step": 284030 }, { "epoch": 0.5658708402397042, "grad_norm": 0.17309540510177612, "learning_rate": 0.002, "loss": 2.5511, "step": 284040 }, { "epoch": 0.5658907624633431, "grad_norm": 0.18116934597492218, "learning_rate": 0.002, "loss": 2.5617, "step": 284050 }, { "epoch": 0.565910684686982, "grad_norm": 0.15533702075481415, "learning_rate": 0.002, "loss": 2.5538, "step": 284060 }, { "epoch": 0.5659306069106209, "grad_norm": 0.1532943993806839, "learning_rate": 0.002, "loss": 2.5572, "step": 284070 }, { "epoch": 0.5659505291342598, "grad_norm": 0.20667870342731476, "learning_rate": 0.002, "loss": 2.5501, "step": 284080 }, { "epoch": 0.5659704513578988, "grad_norm": 0.17120499908924103, "learning_rate": 0.002, "loss": 2.5685, "step": 284090 }, { "epoch": 0.5659903735815377, "grad_norm": 0.1651766449213028, "learning_rate": 0.002, "loss": 2.5685, "step": 284100 }, { "epoch": 0.5660102958051766, "grad_norm": 0.21130718290805817, "learning_rate": 0.002, "loss": 2.5614, "step": 284110 }, { "epoch": 0.5660302180288155, "grad_norm": 0.16898956894874573, "learning_rate": 0.002, "loss": 2.5583, "step": 284120 }, { "epoch": 0.5660501402524544, "grad_norm": 0.14884954690933228, "learning_rate": 0.002, "loss": 2.5507, "step": 284130 }, { "epoch": 0.5660700624760934, "grad_norm": 0.16562655568122864, "learning_rate": 0.002, "loss": 2.5558, "step": 284140 }, { "epoch": 0.5660899846997323, "grad_norm": 0.17271138727664948, "learning_rate": 0.002, "loss": 2.5646, "step": 284150 }, { "epoch": 0.5661099069233712, "grad_norm": 0.18077452480793, "learning_rate": 0.002, "loss": 2.5528, "step": 284160 }, { "epoch": 0.56612982914701, "grad_norm": 0.48888832330703735, "learning_rate": 0.002, "loss": 2.5526, "step": 284170 }, { "epoch": 0.5661497513706489, "grad_norm": 0.15555764734745026, "learning_rate": 0.002, "loss": 2.5515, "step": 284180 }, { "epoch": 0.5661696735942879, "grad_norm": 0.16833935678005219, "learning_rate": 0.002, "loss": 2.5476, "step": 284190 }, { "epoch": 0.5661895958179268, "grad_norm": 0.16928833723068237, "learning_rate": 0.002, "loss": 2.5634, "step": 284200 }, { "epoch": 0.5662095180415657, "grad_norm": 0.15525005757808685, "learning_rate": 0.002, "loss": 2.5498, "step": 284210 }, { "epoch": 0.5662294402652046, "grad_norm": 0.17100191116333008, "learning_rate": 0.002, "loss": 2.5601, "step": 284220 }, { "epoch": 0.5662493624888435, "grad_norm": 0.13694442808628082, "learning_rate": 0.002, "loss": 2.5578, "step": 284230 }, { "epoch": 0.5662692847124825, "grad_norm": 0.17022575438022614, "learning_rate": 0.002, "loss": 2.5578, "step": 284240 }, { "epoch": 0.5662892069361214, "grad_norm": 0.1702800840139389, "learning_rate": 0.002, "loss": 2.5613, "step": 284250 }, { "epoch": 0.5663091291597603, "grad_norm": 0.17530938982963562, "learning_rate": 0.002, "loss": 2.5586, "step": 284260 }, { "epoch": 0.5663290513833992, "grad_norm": 0.14994879066944122, "learning_rate": 0.002, "loss": 2.5515, "step": 284270 }, { "epoch": 0.5663489736070382, "grad_norm": 0.15694324672222137, "learning_rate": 0.002, "loss": 2.5694, "step": 284280 }, { "epoch": 0.5663688958306771, "grad_norm": 0.18334873020648956, "learning_rate": 0.002, "loss": 2.5635, "step": 284290 }, { "epoch": 0.566388818054316, "grad_norm": 0.19646413624286652, "learning_rate": 0.002, "loss": 2.5497, "step": 284300 }, { "epoch": 0.5664087402779548, "grad_norm": 0.17586718499660492, "learning_rate": 0.002, "loss": 2.5435, "step": 284310 }, { "epoch": 0.5664286625015937, "grad_norm": 0.15439409017562866, "learning_rate": 0.002, "loss": 2.5635, "step": 284320 }, { "epoch": 0.5664485847252327, "grad_norm": 0.1529441475868225, "learning_rate": 0.002, "loss": 2.5577, "step": 284330 }, { "epoch": 0.5664685069488716, "grad_norm": 0.15968291461467743, "learning_rate": 0.002, "loss": 2.5627, "step": 284340 }, { "epoch": 0.5664884291725105, "grad_norm": 0.15424402058124542, "learning_rate": 0.002, "loss": 2.5519, "step": 284350 }, { "epoch": 0.5665083513961494, "grad_norm": 0.15887565910816193, "learning_rate": 0.002, "loss": 2.538, "step": 284360 }, { "epoch": 0.5665282736197883, "grad_norm": 0.1901690661907196, "learning_rate": 0.002, "loss": 2.5553, "step": 284370 }, { "epoch": 0.5665481958434273, "grad_norm": 0.1637650430202484, "learning_rate": 0.002, "loss": 2.5508, "step": 284380 }, { "epoch": 0.5665681180670662, "grad_norm": 0.152215376496315, "learning_rate": 0.002, "loss": 2.5461, "step": 284390 }, { "epoch": 0.5665880402907051, "grad_norm": 0.14624008536338806, "learning_rate": 0.002, "loss": 2.5441, "step": 284400 }, { "epoch": 0.566607962514344, "grad_norm": 0.15111969411373138, "learning_rate": 0.002, "loss": 2.5499, "step": 284410 }, { "epoch": 0.5666278847379829, "grad_norm": 0.17475013434886932, "learning_rate": 0.002, "loss": 2.5628, "step": 284420 }, { "epoch": 0.5666478069616219, "grad_norm": 0.17243680357933044, "learning_rate": 0.002, "loss": 2.5578, "step": 284430 }, { "epoch": 0.5666677291852608, "grad_norm": 0.1499776542186737, "learning_rate": 0.002, "loss": 2.5611, "step": 284440 }, { "epoch": 0.5666876514088997, "grad_norm": 0.1724734902381897, "learning_rate": 0.002, "loss": 2.5623, "step": 284450 }, { "epoch": 0.5667075736325385, "grad_norm": 0.15691781044006348, "learning_rate": 0.002, "loss": 2.5441, "step": 284460 }, { "epoch": 0.5667274958561774, "grad_norm": 0.16467733681201935, "learning_rate": 0.002, "loss": 2.5597, "step": 284470 }, { "epoch": 0.5667474180798164, "grad_norm": 0.15279075503349304, "learning_rate": 0.002, "loss": 2.5501, "step": 284480 }, { "epoch": 0.5667673403034553, "grad_norm": 0.17768490314483643, "learning_rate": 0.002, "loss": 2.5246, "step": 284490 }, { "epoch": 0.5667872625270942, "grad_norm": 0.16076743602752686, "learning_rate": 0.002, "loss": 2.5544, "step": 284500 }, { "epoch": 0.5668071847507331, "grad_norm": 0.18455107510089874, "learning_rate": 0.002, "loss": 2.5564, "step": 284510 }, { "epoch": 0.566827106974372, "grad_norm": 0.1756094992160797, "learning_rate": 0.002, "loss": 2.5515, "step": 284520 }, { "epoch": 0.566847029198011, "grad_norm": 0.21347753703594208, "learning_rate": 0.002, "loss": 2.5531, "step": 284530 }, { "epoch": 0.5668669514216499, "grad_norm": 0.17700542509555817, "learning_rate": 0.002, "loss": 2.5596, "step": 284540 }, { "epoch": 0.5668868736452888, "grad_norm": 0.15775932371616364, "learning_rate": 0.002, "loss": 2.5763, "step": 284550 }, { "epoch": 0.5669067958689277, "grad_norm": 0.1648603230714798, "learning_rate": 0.002, "loss": 2.5537, "step": 284560 }, { "epoch": 0.5669267180925667, "grad_norm": 0.15375497937202454, "learning_rate": 0.002, "loss": 2.5599, "step": 284570 }, { "epoch": 0.5669466403162056, "grad_norm": 0.18578612804412842, "learning_rate": 0.002, "loss": 2.5658, "step": 284580 }, { "epoch": 0.5669665625398445, "grad_norm": 0.15067826211452484, "learning_rate": 0.002, "loss": 2.5524, "step": 284590 }, { "epoch": 0.5669864847634833, "grad_norm": 0.15286865830421448, "learning_rate": 0.002, "loss": 2.5672, "step": 284600 }, { "epoch": 0.5670064069871222, "grad_norm": 0.16335150599479675, "learning_rate": 0.002, "loss": 2.5647, "step": 284610 }, { "epoch": 0.5670263292107612, "grad_norm": 0.17355868220329285, "learning_rate": 0.002, "loss": 2.561, "step": 284620 }, { "epoch": 0.5670462514344001, "grad_norm": 0.16711169481277466, "learning_rate": 0.002, "loss": 2.5417, "step": 284630 }, { "epoch": 0.567066173658039, "grad_norm": 0.15252281725406647, "learning_rate": 0.002, "loss": 2.5499, "step": 284640 }, { "epoch": 0.5670860958816779, "grad_norm": 0.16615217924118042, "learning_rate": 0.002, "loss": 2.5513, "step": 284650 }, { "epoch": 0.5671060181053168, "grad_norm": 0.2091776430606842, "learning_rate": 0.002, "loss": 2.5632, "step": 284660 }, { "epoch": 0.5671259403289558, "grad_norm": 0.16366644203662872, "learning_rate": 0.002, "loss": 2.5719, "step": 284670 }, { "epoch": 0.5671458625525947, "grad_norm": 0.17691685259342194, "learning_rate": 0.002, "loss": 2.5656, "step": 284680 }, { "epoch": 0.5671657847762336, "grad_norm": 0.15784958004951477, "learning_rate": 0.002, "loss": 2.5535, "step": 284690 }, { "epoch": 0.5671857069998725, "grad_norm": 0.18792115151882172, "learning_rate": 0.002, "loss": 2.5603, "step": 284700 }, { "epoch": 0.5672056292235114, "grad_norm": 0.14929597079753876, "learning_rate": 0.002, "loss": 2.5583, "step": 284710 }, { "epoch": 0.5672255514471504, "grad_norm": 0.16608919203281403, "learning_rate": 0.002, "loss": 2.5679, "step": 284720 }, { "epoch": 0.5672454736707893, "grad_norm": 0.18004480004310608, "learning_rate": 0.002, "loss": 2.5522, "step": 284730 }, { "epoch": 0.5672653958944281, "grad_norm": 0.1360037922859192, "learning_rate": 0.002, "loss": 2.5514, "step": 284740 }, { "epoch": 0.567285318118067, "grad_norm": 0.16579550504684448, "learning_rate": 0.002, "loss": 2.5537, "step": 284750 }, { "epoch": 0.5673052403417059, "grad_norm": 0.18217800557613373, "learning_rate": 0.002, "loss": 2.5468, "step": 284760 }, { "epoch": 0.5673251625653449, "grad_norm": 0.15790607035160065, "learning_rate": 0.002, "loss": 2.5535, "step": 284770 }, { "epoch": 0.5673450847889838, "grad_norm": 0.18855085968971252, "learning_rate": 0.002, "loss": 2.5617, "step": 284780 }, { "epoch": 0.5673650070126227, "grad_norm": 0.12992197275161743, "learning_rate": 0.002, "loss": 2.539, "step": 284790 }, { "epoch": 0.5673849292362616, "grad_norm": 0.16702993214130402, "learning_rate": 0.002, "loss": 2.5604, "step": 284800 }, { "epoch": 0.5674048514599005, "grad_norm": 0.21329382061958313, "learning_rate": 0.002, "loss": 2.5641, "step": 284810 }, { "epoch": 0.5674247736835395, "grad_norm": 0.17045485973358154, "learning_rate": 0.002, "loss": 2.5585, "step": 284820 }, { "epoch": 0.5674446959071784, "grad_norm": 0.17321698367595673, "learning_rate": 0.002, "loss": 2.542, "step": 284830 }, { "epoch": 0.5674646181308173, "grad_norm": 0.16438435018062592, "learning_rate": 0.002, "loss": 2.5389, "step": 284840 }, { "epoch": 0.5674845403544562, "grad_norm": 0.13636445999145508, "learning_rate": 0.002, "loss": 2.5464, "step": 284850 }, { "epoch": 0.5675044625780952, "grad_norm": 0.15549436211585999, "learning_rate": 0.002, "loss": 2.5587, "step": 284860 }, { "epoch": 0.5675243848017341, "grad_norm": 0.12582872807979584, "learning_rate": 0.002, "loss": 2.5599, "step": 284870 }, { "epoch": 0.567544307025373, "grad_norm": 0.1871204674243927, "learning_rate": 0.002, "loss": 2.5421, "step": 284880 }, { "epoch": 0.5675642292490118, "grad_norm": 0.13392594456672668, "learning_rate": 0.002, "loss": 2.5445, "step": 284890 }, { "epoch": 0.5675841514726507, "grad_norm": 0.1520199030637741, "learning_rate": 0.002, "loss": 2.5726, "step": 284900 }, { "epoch": 0.5676040736962897, "grad_norm": 0.19968675076961517, "learning_rate": 0.002, "loss": 2.5592, "step": 284910 }, { "epoch": 0.5676239959199286, "grad_norm": 0.15142224729061127, "learning_rate": 0.002, "loss": 2.5408, "step": 284920 }, { "epoch": 0.5676439181435675, "grad_norm": 0.175834521651268, "learning_rate": 0.002, "loss": 2.5764, "step": 284930 }, { "epoch": 0.5676638403672064, "grad_norm": 0.15569950640201569, "learning_rate": 0.002, "loss": 2.5461, "step": 284940 }, { "epoch": 0.5676837625908453, "grad_norm": 0.26499953866004944, "learning_rate": 0.002, "loss": 2.5542, "step": 284950 }, { "epoch": 0.5677036848144843, "grad_norm": 0.17169152200222015, "learning_rate": 0.002, "loss": 2.5679, "step": 284960 }, { "epoch": 0.5677236070381232, "grad_norm": 0.1406167447566986, "learning_rate": 0.002, "loss": 2.5561, "step": 284970 }, { "epoch": 0.5677435292617621, "grad_norm": 0.17012502253055573, "learning_rate": 0.002, "loss": 2.5562, "step": 284980 }, { "epoch": 0.567763451485401, "grad_norm": 0.1840580254793167, "learning_rate": 0.002, "loss": 2.5453, "step": 284990 }, { "epoch": 0.5677833737090399, "grad_norm": 0.17747220396995544, "learning_rate": 0.002, "loss": 2.5643, "step": 285000 }, { "epoch": 0.5678032959326789, "grad_norm": 0.1809888333082199, "learning_rate": 0.002, "loss": 2.5762, "step": 285010 }, { "epoch": 0.5678232181563178, "grad_norm": 0.20832426846027374, "learning_rate": 0.002, "loss": 2.552, "step": 285020 }, { "epoch": 0.5678431403799566, "grad_norm": 0.18538400530815125, "learning_rate": 0.002, "loss": 2.5598, "step": 285030 }, { "epoch": 0.5678630626035955, "grad_norm": 0.16677670180797577, "learning_rate": 0.002, "loss": 2.5565, "step": 285040 }, { "epoch": 0.5678829848272344, "grad_norm": 0.1678788661956787, "learning_rate": 0.002, "loss": 2.5617, "step": 285050 }, { "epoch": 0.5679029070508734, "grad_norm": 0.16141900420188904, "learning_rate": 0.002, "loss": 2.5452, "step": 285060 }, { "epoch": 0.5679228292745123, "grad_norm": 0.16555598378181458, "learning_rate": 0.002, "loss": 2.5571, "step": 285070 }, { "epoch": 0.5679427514981512, "grad_norm": 0.2200794219970703, "learning_rate": 0.002, "loss": 2.555, "step": 285080 }, { "epoch": 0.5679626737217901, "grad_norm": 0.1939191073179245, "learning_rate": 0.002, "loss": 2.555, "step": 285090 }, { "epoch": 0.567982595945429, "grad_norm": 0.140213742852211, "learning_rate": 0.002, "loss": 2.5759, "step": 285100 }, { "epoch": 0.568002518169068, "grad_norm": 0.1958986073732376, "learning_rate": 0.002, "loss": 2.5527, "step": 285110 }, { "epoch": 0.5680224403927069, "grad_norm": 0.14209182560443878, "learning_rate": 0.002, "loss": 2.5516, "step": 285120 }, { "epoch": 0.5680423626163458, "grad_norm": 0.16692082583904266, "learning_rate": 0.002, "loss": 2.5565, "step": 285130 }, { "epoch": 0.5680622848399847, "grad_norm": 0.17596274614334106, "learning_rate": 0.002, "loss": 2.547, "step": 285140 }, { "epoch": 0.5680822070636237, "grad_norm": 0.18929459154605865, "learning_rate": 0.002, "loss": 2.5581, "step": 285150 }, { "epoch": 0.5681021292872626, "grad_norm": 0.21269944310188293, "learning_rate": 0.002, "loss": 2.553, "step": 285160 }, { "epoch": 0.5681220515109014, "grad_norm": 0.15750719606876373, "learning_rate": 0.002, "loss": 2.5625, "step": 285170 }, { "epoch": 0.5681419737345403, "grad_norm": 0.15442931652069092, "learning_rate": 0.002, "loss": 2.5701, "step": 285180 }, { "epoch": 0.5681618959581792, "grad_norm": 0.1678975373506546, "learning_rate": 0.002, "loss": 2.5705, "step": 285190 }, { "epoch": 0.5681818181818182, "grad_norm": 0.15386763215065002, "learning_rate": 0.002, "loss": 2.5769, "step": 285200 }, { "epoch": 0.5682017404054571, "grad_norm": 0.1703868955373764, "learning_rate": 0.002, "loss": 2.5545, "step": 285210 }, { "epoch": 0.568221662629096, "grad_norm": 0.2140687257051468, "learning_rate": 0.002, "loss": 2.5679, "step": 285220 }, { "epoch": 0.5682415848527349, "grad_norm": 0.16574108600616455, "learning_rate": 0.002, "loss": 2.5451, "step": 285230 }, { "epoch": 0.5682615070763738, "grad_norm": 0.1618204414844513, "learning_rate": 0.002, "loss": 2.5609, "step": 285240 }, { "epoch": 0.5682814293000128, "grad_norm": 0.18480506539344788, "learning_rate": 0.002, "loss": 2.5517, "step": 285250 }, { "epoch": 0.5683013515236517, "grad_norm": 0.19609735906124115, "learning_rate": 0.002, "loss": 2.5721, "step": 285260 }, { "epoch": 0.5683212737472906, "grad_norm": 0.15550942718982697, "learning_rate": 0.002, "loss": 2.5761, "step": 285270 }, { "epoch": 0.5683411959709295, "grad_norm": 0.18154595792293549, "learning_rate": 0.002, "loss": 2.5617, "step": 285280 }, { "epoch": 0.5683611181945684, "grad_norm": 0.20511360466480255, "learning_rate": 0.002, "loss": 2.5524, "step": 285290 }, { "epoch": 0.5683810404182074, "grad_norm": 0.17571499943733215, "learning_rate": 0.002, "loss": 2.5588, "step": 285300 }, { "epoch": 0.5684009626418463, "grad_norm": 0.1537976711988449, "learning_rate": 0.002, "loss": 2.5406, "step": 285310 }, { "epoch": 0.5684208848654851, "grad_norm": 0.18328864872455597, "learning_rate": 0.002, "loss": 2.5563, "step": 285320 }, { "epoch": 0.568440807089124, "grad_norm": 0.14941610395908356, "learning_rate": 0.002, "loss": 2.5538, "step": 285330 }, { "epoch": 0.5684607293127629, "grad_norm": 0.1691502183675766, "learning_rate": 0.002, "loss": 2.553, "step": 285340 }, { "epoch": 0.5684806515364019, "grad_norm": 0.15563078224658966, "learning_rate": 0.002, "loss": 2.543, "step": 285350 }, { "epoch": 0.5685005737600408, "grad_norm": 0.1699943244457245, "learning_rate": 0.002, "loss": 2.5571, "step": 285360 }, { "epoch": 0.5685204959836797, "grad_norm": 0.1873338371515274, "learning_rate": 0.002, "loss": 2.5597, "step": 285370 }, { "epoch": 0.5685404182073186, "grad_norm": 0.16564308106899261, "learning_rate": 0.002, "loss": 2.5631, "step": 285380 }, { "epoch": 0.5685603404309575, "grad_norm": 0.1930026412010193, "learning_rate": 0.002, "loss": 2.5617, "step": 285390 }, { "epoch": 0.5685802626545965, "grad_norm": 0.1478491723537445, "learning_rate": 0.002, "loss": 2.5523, "step": 285400 }, { "epoch": 0.5686001848782354, "grad_norm": 0.16065658628940582, "learning_rate": 0.002, "loss": 2.5506, "step": 285410 }, { "epoch": 0.5686201071018743, "grad_norm": 0.20081347227096558, "learning_rate": 0.002, "loss": 2.5732, "step": 285420 }, { "epoch": 0.5686400293255132, "grad_norm": 0.14899013936519623, "learning_rate": 0.002, "loss": 2.5523, "step": 285430 }, { "epoch": 0.5686599515491522, "grad_norm": 0.16593271493911743, "learning_rate": 0.002, "loss": 2.547, "step": 285440 }, { "epoch": 0.5686798737727911, "grad_norm": 0.19065624475479126, "learning_rate": 0.002, "loss": 2.5542, "step": 285450 }, { "epoch": 0.56869979599643, "grad_norm": 0.16800768673419952, "learning_rate": 0.002, "loss": 2.5579, "step": 285460 }, { "epoch": 0.5687197182200688, "grad_norm": 0.16664612293243408, "learning_rate": 0.002, "loss": 2.5495, "step": 285470 }, { "epoch": 0.5687396404437077, "grad_norm": 0.15637284517288208, "learning_rate": 0.002, "loss": 2.5711, "step": 285480 }, { "epoch": 0.5687595626673467, "grad_norm": 0.16310624778270721, "learning_rate": 0.002, "loss": 2.5588, "step": 285490 }, { "epoch": 0.5687794848909856, "grad_norm": 0.17018070816993713, "learning_rate": 0.002, "loss": 2.5573, "step": 285500 }, { "epoch": 0.5687994071146245, "grad_norm": 0.1676221340894699, "learning_rate": 0.002, "loss": 2.5649, "step": 285510 }, { "epoch": 0.5688193293382634, "grad_norm": 0.15229712426662445, "learning_rate": 0.002, "loss": 2.545, "step": 285520 }, { "epoch": 0.5688392515619023, "grad_norm": 0.14212733507156372, "learning_rate": 0.002, "loss": 2.5468, "step": 285530 }, { "epoch": 0.5688591737855413, "grad_norm": 0.18283595144748688, "learning_rate": 0.002, "loss": 2.559, "step": 285540 }, { "epoch": 0.5688790960091802, "grad_norm": 0.1457287073135376, "learning_rate": 0.002, "loss": 2.5552, "step": 285550 }, { "epoch": 0.5688990182328191, "grad_norm": 0.16109322011470795, "learning_rate": 0.002, "loss": 2.565, "step": 285560 }, { "epoch": 0.568918940456458, "grad_norm": 0.1941716969013214, "learning_rate": 0.002, "loss": 2.5677, "step": 285570 }, { "epoch": 0.5689388626800969, "grad_norm": 0.17803117632865906, "learning_rate": 0.002, "loss": 2.5538, "step": 285580 }, { "epoch": 0.5689587849037359, "grad_norm": 0.15921664237976074, "learning_rate": 0.002, "loss": 2.5503, "step": 285590 }, { "epoch": 0.5689787071273747, "grad_norm": 0.1939198225736618, "learning_rate": 0.002, "loss": 2.557, "step": 285600 }, { "epoch": 0.5689986293510136, "grad_norm": 0.1751219928264618, "learning_rate": 0.002, "loss": 2.5522, "step": 285610 }, { "epoch": 0.5690185515746525, "grad_norm": 0.14505328238010406, "learning_rate": 0.002, "loss": 2.5538, "step": 285620 }, { "epoch": 0.5690384737982914, "grad_norm": 0.18204668164253235, "learning_rate": 0.002, "loss": 2.5565, "step": 285630 }, { "epoch": 0.5690583960219304, "grad_norm": 0.1824674755334854, "learning_rate": 0.002, "loss": 2.5586, "step": 285640 }, { "epoch": 0.5690783182455693, "grad_norm": 0.15494659543037415, "learning_rate": 0.002, "loss": 2.5524, "step": 285650 }, { "epoch": 0.5690982404692082, "grad_norm": 0.19895048439502716, "learning_rate": 0.002, "loss": 2.5401, "step": 285660 }, { "epoch": 0.5691181626928471, "grad_norm": 0.1664915531873703, "learning_rate": 0.002, "loss": 2.5433, "step": 285670 }, { "epoch": 0.569138084916486, "grad_norm": 0.18980193138122559, "learning_rate": 0.002, "loss": 2.5577, "step": 285680 }, { "epoch": 0.569158007140125, "grad_norm": 0.17121882736682892, "learning_rate": 0.002, "loss": 2.563, "step": 285690 }, { "epoch": 0.5691779293637639, "grad_norm": 0.15006931126117706, "learning_rate": 0.002, "loss": 2.554, "step": 285700 }, { "epoch": 0.5691978515874028, "grad_norm": 0.15467111766338348, "learning_rate": 0.002, "loss": 2.5709, "step": 285710 }, { "epoch": 0.5692177738110417, "grad_norm": 0.14665870368480682, "learning_rate": 0.002, "loss": 2.5513, "step": 285720 }, { "epoch": 0.5692376960346806, "grad_norm": 0.14821019768714905, "learning_rate": 0.002, "loss": 2.5499, "step": 285730 }, { "epoch": 0.5692576182583196, "grad_norm": 0.21206875145435333, "learning_rate": 0.002, "loss": 2.5735, "step": 285740 }, { "epoch": 0.5692775404819584, "grad_norm": 0.1460818350315094, "learning_rate": 0.002, "loss": 2.5467, "step": 285750 }, { "epoch": 0.5692974627055973, "grad_norm": 0.1384621113538742, "learning_rate": 0.002, "loss": 2.5571, "step": 285760 }, { "epoch": 0.5693173849292362, "grad_norm": 0.20956262946128845, "learning_rate": 0.002, "loss": 2.5507, "step": 285770 }, { "epoch": 0.5693373071528752, "grad_norm": 0.15371742844581604, "learning_rate": 0.002, "loss": 2.5701, "step": 285780 }, { "epoch": 0.5693572293765141, "grad_norm": 0.14849573373794556, "learning_rate": 0.002, "loss": 2.5451, "step": 285790 }, { "epoch": 0.569377151600153, "grad_norm": 0.17995259165763855, "learning_rate": 0.002, "loss": 2.5629, "step": 285800 }, { "epoch": 0.5693970738237919, "grad_norm": 0.15844562649726868, "learning_rate": 0.002, "loss": 2.5503, "step": 285810 }, { "epoch": 0.5694169960474308, "grad_norm": 0.16416172683238983, "learning_rate": 0.002, "loss": 2.5338, "step": 285820 }, { "epoch": 0.5694369182710698, "grad_norm": 0.15350092947483063, "learning_rate": 0.002, "loss": 2.5571, "step": 285830 }, { "epoch": 0.5694568404947087, "grad_norm": 0.1430632472038269, "learning_rate": 0.002, "loss": 2.544, "step": 285840 }, { "epoch": 0.5694767627183476, "grad_norm": 0.18802490830421448, "learning_rate": 0.002, "loss": 2.5533, "step": 285850 }, { "epoch": 0.5694966849419865, "grad_norm": 0.17405669391155243, "learning_rate": 0.002, "loss": 2.5564, "step": 285860 }, { "epoch": 0.5695166071656254, "grad_norm": 0.16909334063529968, "learning_rate": 0.002, "loss": 2.5527, "step": 285870 }, { "epoch": 0.5695365293892644, "grad_norm": 0.15567171573638916, "learning_rate": 0.002, "loss": 2.5699, "step": 285880 }, { "epoch": 0.5695564516129032, "grad_norm": 0.1594105362892151, "learning_rate": 0.002, "loss": 2.5497, "step": 285890 }, { "epoch": 0.5695763738365421, "grad_norm": 0.16252446174621582, "learning_rate": 0.002, "loss": 2.5551, "step": 285900 }, { "epoch": 0.569596296060181, "grad_norm": 0.15723544359207153, "learning_rate": 0.002, "loss": 2.5465, "step": 285910 }, { "epoch": 0.5696162182838199, "grad_norm": 0.16398096084594727, "learning_rate": 0.002, "loss": 2.548, "step": 285920 }, { "epoch": 0.5696361405074589, "grad_norm": 0.1692308932542801, "learning_rate": 0.002, "loss": 2.5567, "step": 285930 }, { "epoch": 0.5696560627310978, "grad_norm": 0.17855709791183472, "learning_rate": 0.002, "loss": 2.557, "step": 285940 }, { "epoch": 0.5696759849547367, "grad_norm": 0.17340441048145294, "learning_rate": 0.002, "loss": 2.5571, "step": 285950 }, { "epoch": 0.5696959071783756, "grad_norm": 0.16985318064689636, "learning_rate": 0.002, "loss": 2.555, "step": 285960 }, { "epoch": 0.5697158294020145, "grad_norm": 0.15436656773090363, "learning_rate": 0.002, "loss": 2.5564, "step": 285970 }, { "epoch": 0.5697357516256535, "grad_norm": 0.1902434378862381, "learning_rate": 0.002, "loss": 2.5535, "step": 285980 }, { "epoch": 0.5697556738492924, "grad_norm": 0.1774214655160904, "learning_rate": 0.002, "loss": 2.5586, "step": 285990 }, { "epoch": 0.5697755960729313, "grad_norm": 0.20032846927642822, "learning_rate": 0.002, "loss": 2.545, "step": 286000 }, { "epoch": 0.5697955182965702, "grad_norm": 0.16605837643146515, "learning_rate": 0.002, "loss": 2.5654, "step": 286010 }, { "epoch": 0.569815440520209, "grad_norm": 0.15698255598545074, "learning_rate": 0.002, "loss": 2.5508, "step": 286020 }, { "epoch": 0.569835362743848, "grad_norm": 0.1805734634399414, "learning_rate": 0.002, "loss": 2.5789, "step": 286030 }, { "epoch": 0.5698552849674869, "grad_norm": 0.15125179290771484, "learning_rate": 0.002, "loss": 2.5499, "step": 286040 }, { "epoch": 0.5698752071911258, "grad_norm": 0.16578596830368042, "learning_rate": 0.002, "loss": 2.5532, "step": 286050 }, { "epoch": 0.5698951294147647, "grad_norm": 0.19981759786605835, "learning_rate": 0.002, "loss": 2.5457, "step": 286060 }, { "epoch": 0.5699150516384037, "grad_norm": 0.1429118663072586, "learning_rate": 0.002, "loss": 2.5693, "step": 286070 }, { "epoch": 0.5699349738620426, "grad_norm": 0.1728534698486328, "learning_rate": 0.002, "loss": 2.559, "step": 286080 }, { "epoch": 0.5699548960856815, "grad_norm": 0.17943577468395233, "learning_rate": 0.002, "loss": 2.5528, "step": 286090 }, { "epoch": 0.5699748183093204, "grad_norm": 0.16148439049720764, "learning_rate": 0.002, "loss": 2.5361, "step": 286100 }, { "epoch": 0.5699947405329593, "grad_norm": 0.1889689713716507, "learning_rate": 0.002, "loss": 2.5423, "step": 286110 }, { "epoch": 0.5700146627565983, "grad_norm": 0.18279312551021576, "learning_rate": 0.002, "loss": 2.5596, "step": 286120 }, { "epoch": 0.5700345849802372, "grad_norm": 0.1606483906507492, "learning_rate": 0.002, "loss": 2.5597, "step": 286130 }, { "epoch": 0.5700545072038761, "grad_norm": 0.15152214467525482, "learning_rate": 0.002, "loss": 2.559, "step": 286140 }, { "epoch": 0.570074429427515, "grad_norm": 0.17442730069160461, "learning_rate": 0.002, "loss": 2.5635, "step": 286150 }, { "epoch": 0.5700943516511539, "grad_norm": 0.19116434454917908, "learning_rate": 0.002, "loss": 2.5534, "step": 286160 }, { "epoch": 0.5701142738747929, "grad_norm": 0.19665801525115967, "learning_rate": 0.002, "loss": 2.5538, "step": 286170 }, { "epoch": 0.5701341960984317, "grad_norm": 0.17807327210903168, "learning_rate": 0.002, "loss": 2.5599, "step": 286180 }, { "epoch": 0.5701541183220706, "grad_norm": 0.18111194670200348, "learning_rate": 0.002, "loss": 2.5456, "step": 286190 }, { "epoch": 0.5701740405457095, "grad_norm": 0.17045727372169495, "learning_rate": 0.002, "loss": 2.5511, "step": 286200 }, { "epoch": 0.5701939627693484, "grad_norm": 0.1683029681444168, "learning_rate": 0.002, "loss": 2.5684, "step": 286210 }, { "epoch": 0.5702138849929874, "grad_norm": 0.15401849150657654, "learning_rate": 0.002, "loss": 2.5562, "step": 286220 }, { "epoch": 0.5702338072166263, "grad_norm": 0.1910286694765091, "learning_rate": 0.002, "loss": 2.5761, "step": 286230 }, { "epoch": 0.5702537294402652, "grad_norm": 0.15941981971263885, "learning_rate": 0.002, "loss": 2.5512, "step": 286240 }, { "epoch": 0.5702736516639041, "grad_norm": 0.14389725029468536, "learning_rate": 0.002, "loss": 2.5478, "step": 286250 }, { "epoch": 0.570293573887543, "grad_norm": 0.17537839710712433, "learning_rate": 0.002, "loss": 2.5688, "step": 286260 }, { "epoch": 0.570313496111182, "grad_norm": 0.20138157904148102, "learning_rate": 0.002, "loss": 2.5652, "step": 286270 }, { "epoch": 0.5703334183348209, "grad_norm": 0.19465328752994537, "learning_rate": 0.002, "loss": 2.5536, "step": 286280 }, { "epoch": 0.5703533405584598, "grad_norm": 0.17068496346473694, "learning_rate": 0.002, "loss": 2.5496, "step": 286290 }, { "epoch": 0.5703732627820987, "grad_norm": 0.16204489767551422, "learning_rate": 0.002, "loss": 2.5561, "step": 286300 }, { "epoch": 0.5703931850057375, "grad_norm": 0.1725187450647354, "learning_rate": 0.002, "loss": 2.5627, "step": 286310 }, { "epoch": 0.5704131072293765, "grad_norm": 0.17039142549037933, "learning_rate": 0.002, "loss": 2.5689, "step": 286320 }, { "epoch": 0.5704330294530154, "grad_norm": 0.17710073292255402, "learning_rate": 0.002, "loss": 2.5636, "step": 286330 }, { "epoch": 0.5704529516766543, "grad_norm": 0.171688973903656, "learning_rate": 0.002, "loss": 2.546, "step": 286340 }, { "epoch": 0.5704728739002932, "grad_norm": 0.21244734525680542, "learning_rate": 0.002, "loss": 2.5633, "step": 286350 }, { "epoch": 0.5704927961239322, "grad_norm": 0.14836181700229645, "learning_rate": 0.002, "loss": 2.57, "step": 286360 }, { "epoch": 0.5705127183475711, "grad_norm": 0.16196726262569427, "learning_rate": 0.002, "loss": 2.5462, "step": 286370 }, { "epoch": 0.57053264057121, "grad_norm": 0.153358593583107, "learning_rate": 0.002, "loss": 2.56, "step": 286380 }, { "epoch": 0.5705525627948489, "grad_norm": 0.14610745012760162, "learning_rate": 0.002, "loss": 2.5506, "step": 286390 }, { "epoch": 0.5705724850184878, "grad_norm": 0.16550986468791962, "learning_rate": 0.002, "loss": 2.5602, "step": 286400 }, { "epoch": 0.5705924072421268, "grad_norm": 0.18204765021800995, "learning_rate": 0.002, "loss": 2.5652, "step": 286410 }, { "epoch": 0.5706123294657657, "grad_norm": 0.1500181257724762, "learning_rate": 0.002, "loss": 2.549, "step": 286420 }, { "epoch": 0.5706322516894046, "grad_norm": 0.14549261331558228, "learning_rate": 0.002, "loss": 2.5565, "step": 286430 }, { "epoch": 0.5706521739130435, "grad_norm": 0.19967640936374664, "learning_rate": 0.002, "loss": 2.5631, "step": 286440 }, { "epoch": 0.5706720961366823, "grad_norm": 0.15912587940692902, "learning_rate": 0.002, "loss": 2.5498, "step": 286450 }, { "epoch": 0.5706920183603214, "grad_norm": 0.165174201130867, "learning_rate": 0.002, "loss": 2.5528, "step": 286460 }, { "epoch": 0.5707119405839602, "grad_norm": 0.1810227781534195, "learning_rate": 0.002, "loss": 2.5437, "step": 286470 }, { "epoch": 0.5707318628075991, "grad_norm": 0.18045854568481445, "learning_rate": 0.002, "loss": 2.5648, "step": 286480 }, { "epoch": 0.570751785031238, "grad_norm": 0.1890762746334076, "learning_rate": 0.002, "loss": 2.5443, "step": 286490 }, { "epoch": 0.5707717072548769, "grad_norm": 0.1713883876800537, "learning_rate": 0.002, "loss": 2.5599, "step": 286500 }, { "epoch": 0.5707916294785159, "grad_norm": 0.14635588228702545, "learning_rate": 0.002, "loss": 2.5579, "step": 286510 }, { "epoch": 0.5708115517021548, "grad_norm": 0.21729721128940582, "learning_rate": 0.002, "loss": 2.5488, "step": 286520 }, { "epoch": 0.5708314739257937, "grad_norm": 0.14704529941082, "learning_rate": 0.002, "loss": 2.5508, "step": 286530 }, { "epoch": 0.5708513961494326, "grad_norm": 0.15190578997135162, "learning_rate": 0.002, "loss": 2.5624, "step": 286540 }, { "epoch": 0.5708713183730715, "grad_norm": 0.1584852635860443, "learning_rate": 0.002, "loss": 2.5367, "step": 286550 }, { "epoch": 0.5708912405967105, "grad_norm": 0.14488697052001953, "learning_rate": 0.002, "loss": 2.5473, "step": 286560 }, { "epoch": 0.5709111628203494, "grad_norm": 0.193638414144516, "learning_rate": 0.002, "loss": 2.551, "step": 286570 }, { "epoch": 0.5709310850439883, "grad_norm": 0.16018332540988922, "learning_rate": 0.002, "loss": 2.5524, "step": 286580 }, { "epoch": 0.5709510072676272, "grad_norm": 0.16117510199546814, "learning_rate": 0.002, "loss": 2.5685, "step": 286590 }, { "epoch": 0.570970929491266, "grad_norm": 0.19902029633522034, "learning_rate": 0.002, "loss": 2.5481, "step": 286600 }, { "epoch": 0.570990851714905, "grad_norm": 0.149562269449234, "learning_rate": 0.002, "loss": 2.5621, "step": 286610 }, { "epoch": 0.5710107739385439, "grad_norm": 0.1741093546152115, "learning_rate": 0.002, "loss": 2.5444, "step": 286620 }, { "epoch": 0.5710306961621828, "grad_norm": 0.15007172524929047, "learning_rate": 0.002, "loss": 2.5668, "step": 286630 }, { "epoch": 0.5710506183858217, "grad_norm": 0.12834084033966064, "learning_rate": 0.002, "loss": 2.5758, "step": 286640 }, { "epoch": 0.5710705406094607, "grad_norm": 0.17075258493423462, "learning_rate": 0.002, "loss": 2.5624, "step": 286650 }, { "epoch": 0.5710904628330996, "grad_norm": 0.19724011421203613, "learning_rate": 0.002, "loss": 2.5582, "step": 286660 }, { "epoch": 0.5711103850567385, "grad_norm": 0.17041516304016113, "learning_rate": 0.002, "loss": 2.5656, "step": 286670 }, { "epoch": 0.5711303072803774, "grad_norm": 0.16076108813285828, "learning_rate": 0.002, "loss": 2.5462, "step": 286680 }, { "epoch": 0.5711502295040163, "grad_norm": 0.16070616245269775, "learning_rate": 0.002, "loss": 2.5598, "step": 286690 }, { "epoch": 0.5711701517276553, "grad_norm": 0.18480920791625977, "learning_rate": 0.002, "loss": 2.5617, "step": 286700 }, { "epoch": 0.5711900739512942, "grad_norm": 0.15395453572273254, "learning_rate": 0.002, "loss": 2.5738, "step": 286710 }, { "epoch": 0.5712099961749331, "grad_norm": 0.1470690369606018, "learning_rate": 0.002, "loss": 2.5379, "step": 286720 }, { "epoch": 0.571229918398572, "grad_norm": 0.1600320041179657, "learning_rate": 0.002, "loss": 2.5579, "step": 286730 }, { "epoch": 0.5712498406222108, "grad_norm": 0.1551777422428131, "learning_rate": 0.002, "loss": 2.5623, "step": 286740 }, { "epoch": 0.5712697628458498, "grad_norm": 0.1832665503025055, "learning_rate": 0.002, "loss": 2.5417, "step": 286750 }, { "epoch": 0.5712896850694887, "grad_norm": 0.14311811327934265, "learning_rate": 0.002, "loss": 2.5607, "step": 286760 }, { "epoch": 0.5713096072931276, "grad_norm": 0.17355258762836456, "learning_rate": 0.002, "loss": 2.5476, "step": 286770 }, { "epoch": 0.5713295295167665, "grad_norm": 0.17118947207927704, "learning_rate": 0.002, "loss": 2.561, "step": 286780 }, { "epoch": 0.5713494517404054, "grad_norm": 0.15903009474277496, "learning_rate": 0.002, "loss": 2.5533, "step": 286790 }, { "epoch": 0.5713693739640444, "grad_norm": 0.15064482390880585, "learning_rate": 0.002, "loss": 2.5486, "step": 286800 }, { "epoch": 0.5713892961876833, "grad_norm": 0.19959400594234467, "learning_rate": 0.002, "loss": 2.5468, "step": 286810 }, { "epoch": 0.5714092184113222, "grad_norm": 0.1511279195547104, "learning_rate": 0.002, "loss": 2.5645, "step": 286820 }, { "epoch": 0.5714291406349611, "grad_norm": 0.15390583872795105, "learning_rate": 0.002, "loss": 2.564, "step": 286830 }, { "epoch": 0.5714490628586, "grad_norm": 0.18811331689357758, "learning_rate": 0.002, "loss": 2.5526, "step": 286840 }, { "epoch": 0.571468985082239, "grad_norm": 0.17748326063156128, "learning_rate": 0.002, "loss": 2.5685, "step": 286850 }, { "epoch": 0.5714889073058779, "grad_norm": 0.15447896718978882, "learning_rate": 0.002, "loss": 2.5627, "step": 286860 }, { "epoch": 0.5715088295295168, "grad_norm": 0.1750335693359375, "learning_rate": 0.002, "loss": 2.5528, "step": 286870 }, { "epoch": 0.5715287517531557, "grad_norm": 0.2292363941669464, "learning_rate": 0.002, "loss": 2.5523, "step": 286880 }, { "epoch": 0.5715486739767945, "grad_norm": 0.1421421617269516, "learning_rate": 0.002, "loss": 2.5445, "step": 286890 }, { "epoch": 0.5715685962004335, "grad_norm": 0.16792413592338562, "learning_rate": 0.002, "loss": 2.5549, "step": 286900 }, { "epoch": 0.5715885184240724, "grad_norm": 0.16826526820659637, "learning_rate": 0.002, "loss": 2.5623, "step": 286910 }, { "epoch": 0.5716084406477113, "grad_norm": 0.1623212695121765, "learning_rate": 0.002, "loss": 2.5356, "step": 286920 }, { "epoch": 0.5716283628713502, "grad_norm": 0.16511623561382294, "learning_rate": 0.002, "loss": 2.5444, "step": 286930 }, { "epoch": 0.5716482850949892, "grad_norm": 0.17194341123104095, "learning_rate": 0.002, "loss": 2.5533, "step": 286940 }, { "epoch": 0.5716682073186281, "grad_norm": 0.1818186193704605, "learning_rate": 0.002, "loss": 2.552, "step": 286950 }, { "epoch": 0.571688129542267, "grad_norm": 0.16441255807876587, "learning_rate": 0.002, "loss": 2.5626, "step": 286960 }, { "epoch": 0.5717080517659059, "grad_norm": 0.1706872284412384, "learning_rate": 0.002, "loss": 2.5636, "step": 286970 }, { "epoch": 0.5717279739895448, "grad_norm": 0.17928189039230347, "learning_rate": 0.002, "loss": 2.5679, "step": 286980 }, { "epoch": 0.5717478962131838, "grad_norm": 0.23680315911769867, "learning_rate": 0.002, "loss": 2.5535, "step": 286990 }, { "epoch": 0.5717678184368227, "grad_norm": 0.14355042576789856, "learning_rate": 0.002, "loss": 2.5561, "step": 287000 }, { "epoch": 0.5717877406604616, "grad_norm": 0.18920835852622986, "learning_rate": 0.002, "loss": 2.5697, "step": 287010 }, { "epoch": 0.5718076628841005, "grad_norm": 0.18205150961875916, "learning_rate": 0.002, "loss": 2.5479, "step": 287020 }, { "epoch": 0.5718275851077393, "grad_norm": 0.14518746733665466, "learning_rate": 0.002, "loss": 2.55, "step": 287030 }, { "epoch": 0.5718475073313783, "grad_norm": 0.20596832036972046, "learning_rate": 0.002, "loss": 2.5505, "step": 287040 }, { "epoch": 0.5718674295550172, "grad_norm": 0.18636226654052734, "learning_rate": 0.002, "loss": 2.551, "step": 287050 }, { "epoch": 0.5718873517786561, "grad_norm": 0.17003031075000763, "learning_rate": 0.002, "loss": 2.5584, "step": 287060 }, { "epoch": 0.571907274002295, "grad_norm": 0.13816635310649872, "learning_rate": 0.002, "loss": 2.5486, "step": 287070 }, { "epoch": 0.5719271962259339, "grad_norm": 0.16319742798805237, "learning_rate": 0.002, "loss": 2.5591, "step": 287080 }, { "epoch": 0.5719471184495729, "grad_norm": 0.18501655757427216, "learning_rate": 0.002, "loss": 2.5642, "step": 287090 }, { "epoch": 0.5719670406732118, "grad_norm": 0.1676223874092102, "learning_rate": 0.002, "loss": 2.544, "step": 287100 }, { "epoch": 0.5719869628968507, "grad_norm": 0.15958848595619202, "learning_rate": 0.002, "loss": 2.5624, "step": 287110 }, { "epoch": 0.5720068851204896, "grad_norm": 0.1612997055053711, "learning_rate": 0.002, "loss": 2.5607, "step": 287120 }, { "epoch": 0.5720268073441285, "grad_norm": 0.1552952527999878, "learning_rate": 0.002, "loss": 2.5502, "step": 287130 }, { "epoch": 0.5720467295677675, "grad_norm": 0.20167729258537292, "learning_rate": 0.002, "loss": 2.5554, "step": 287140 }, { "epoch": 0.5720666517914064, "grad_norm": 0.17826834321022034, "learning_rate": 0.002, "loss": 2.5653, "step": 287150 }, { "epoch": 0.5720865740150453, "grad_norm": 0.14974862337112427, "learning_rate": 0.002, "loss": 2.5553, "step": 287160 }, { "epoch": 0.5721064962386841, "grad_norm": 0.17583201825618744, "learning_rate": 0.002, "loss": 2.5528, "step": 287170 }, { "epoch": 0.572126418462323, "grad_norm": 0.32170069217681885, "learning_rate": 0.002, "loss": 2.5594, "step": 287180 }, { "epoch": 0.572146340685962, "grad_norm": 0.1412992626428604, "learning_rate": 0.002, "loss": 2.5571, "step": 287190 }, { "epoch": 0.5721662629096009, "grad_norm": 0.14557965099811554, "learning_rate": 0.002, "loss": 2.55, "step": 287200 }, { "epoch": 0.5721861851332398, "grad_norm": 0.1495477855205536, "learning_rate": 0.002, "loss": 2.5465, "step": 287210 }, { "epoch": 0.5722061073568787, "grad_norm": 0.18784049153327942, "learning_rate": 0.002, "loss": 2.5581, "step": 287220 }, { "epoch": 0.5722260295805177, "grad_norm": 0.1571403592824936, "learning_rate": 0.002, "loss": 2.553, "step": 287230 }, { "epoch": 0.5722459518041566, "grad_norm": 0.1940322071313858, "learning_rate": 0.002, "loss": 2.5657, "step": 287240 }, { "epoch": 0.5722658740277955, "grad_norm": 0.15939567983150482, "learning_rate": 0.002, "loss": 2.5608, "step": 287250 }, { "epoch": 0.5722857962514344, "grad_norm": 0.1698661595582962, "learning_rate": 0.002, "loss": 2.5776, "step": 287260 }, { "epoch": 0.5723057184750733, "grad_norm": 0.13577963411808014, "learning_rate": 0.002, "loss": 2.5542, "step": 287270 }, { "epoch": 0.5723256406987123, "grad_norm": 0.1611279547214508, "learning_rate": 0.002, "loss": 2.5589, "step": 287280 }, { "epoch": 0.5723455629223512, "grad_norm": 0.1863035261631012, "learning_rate": 0.002, "loss": 2.5563, "step": 287290 }, { "epoch": 0.5723654851459901, "grad_norm": 0.16673162579536438, "learning_rate": 0.002, "loss": 2.5642, "step": 287300 }, { "epoch": 0.572385407369629, "grad_norm": 0.15720875561237335, "learning_rate": 0.002, "loss": 2.5588, "step": 287310 }, { "epoch": 0.5724053295932678, "grad_norm": 0.173602893948555, "learning_rate": 0.002, "loss": 2.5539, "step": 287320 }, { "epoch": 0.5724252518169068, "grad_norm": 0.16184788942337036, "learning_rate": 0.002, "loss": 2.5569, "step": 287330 }, { "epoch": 0.5724451740405457, "grad_norm": 0.15402325987815857, "learning_rate": 0.002, "loss": 2.5462, "step": 287340 }, { "epoch": 0.5724650962641846, "grad_norm": 0.1994282752275467, "learning_rate": 0.002, "loss": 2.5519, "step": 287350 }, { "epoch": 0.5724850184878235, "grad_norm": 0.15207719802856445, "learning_rate": 0.002, "loss": 2.5504, "step": 287360 }, { "epoch": 0.5725049407114624, "grad_norm": 0.16779279708862305, "learning_rate": 0.002, "loss": 2.5531, "step": 287370 }, { "epoch": 0.5725248629351014, "grad_norm": 0.13198977708816528, "learning_rate": 0.002, "loss": 2.5559, "step": 287380 }, { "epoch": 0.5725447851587403, "grad_norm": 0.22547397017478943, "learning_rate": 0.002, "loss": 2.5447, "step": 287390 }, { "epoch": 0.5725647073823792, "grad_norm": 0.151134192943573, "learning_rate": 0.002, "loss": 2.5567, "step": 287400 }, { "epoch": 0.5725846296060181, "grad_norm": 0.20372159779071808, "learning_rate": 0.002, "loss": 2.5627, "step": 287410 }, { "epoch": 0.572604551829657, "grad_norm": 0.18348002433776855, "learning_rate": 0.002, "loss": 2.5478, "step": 287420 }, { "epoch": 0.572624474053296, "grad_norm": 0.13813216984272003, "learning_rate": 0.002, "loss": 2.5611, "step": 287430 }, { "epoch": 0.5726443962769349, "grad_norm": 0.1823853999376297, "learning_rate": 0.002, "loss": 2.532, "step": 287440 }, { "epoch": 0.5726643185005738, "grad_norm": 0.18787018954753876, "learning_rate": 0.002, "loss": 2.5654, "step": 287450 }, { "epoch": 0.5726842407242126, "grad_norm": 0.15233153104782104, "learning_rate": 0.002, "loss": 2.5724, "step": 287460 }, { "epoch": 0.5727041629478515, "grad_norm": 0.17732737958431244, "learning_rate": 0.002, "loss": 2.5479, "step": 287470 }, { "epoch": 0.5727240851714905, "grad_norm": 0.16112001240253448, "learning_rate": 0.002, "loss": 2.5732, "step": 287480 }, { "epoch": 0.5727440073951294, "grad_norm": 0.17455659806728363, "learning_rate": 0.002, "loss": 2.5498, "step": 287490 }, { "epoch": 0.5727639296187683, "grad_norm": 0.1744440793991089, "learning_rate": 0.002, "loss": 2.5395, "step": 287500 }, { "epoch": 0.5727838518424072, "grad_norm": 0.13968487083911896, "learning_rate": 0.002, "loss": 2.563, "step": 287510 }, { "epoch": 0.5728037740660461, "grad_norm": 0.251171350479126, "learning_rate": 0.002, "loss": 2.5566, "step": 287520 }, { "epoch": 0.5728236962896851, "grad_norm": 0.17897847294807434, "learning_rate": 0.002, "loss": 2.563, "step": 287530 }, { "epoch": 0.572843618513324, "grad_norm": 0.16803686320781708, "learning_rate": 0.002, "loss": 2.5503, "step": 287540 }, { "epoch": 0.5728635407369629, "grad_norm": 0.14353200793266296, "learning_rate": 0.002, "loss": 2.5495, "step": 287550 }, { "epoch": 0.5728834629606018, "grad_norm": 0.1572297364473343, "learning_rate": 0.002, "loss": 2.547, "step": 287560 }, { "epoch": 0.5729033851842408, "grad_norm": 0.16160374879837036, "learning_rate": 0.002, "loss": 2.5404, "step": 287570 }, { "epoch": 0.5729233074078797, "grad_norm": 0.17019258439540863, "learning_rate": 0.002, "loss": 2.5597, "step": 287580 }, { "epoch": 0.5729432296315186, "grad_norm": 0.13899442553520203, "learning_rate": 0.002, "loss": 2.5526, "step": 287590 }, { "epoch": 0.5729631518551574, "grad_norm": 0.1580183357000351, "learning_rate": 0.002, "loss": 2.5662, "step": 287600 }, { "epoch": 0.5729830740787963, "grad_norm": 0.17111097276210785, "learning_rate": 0.002, "loss": 2.5515, "step": 287610 }, { "epoch": 0.5730029963024353, "grad_norm": 0.17300945520401, "learning_rate": 0.002, "loss": 2.5373, "step": 287620 }, { "epoch": 0.5730229185260742, "grad_norm": 0.1532260775566101, "learning_rate": 0.002, "loss": 2.5577, "step": 287630 }, { "epoch": 0.5730428407497131, "grad_norm": 0.16040201485157013, "learning_rate": 0.002, "loss": 2.5363, "step": 287640 }, { "epoch": 0.573062762973352, "grad_norm": 0.1576581448316574, "learning_rate": 0.002, "loss": 2.5608, "step": 287650 }, { "epoch": 0.5730826851969909, "grad_norm": 0.1850159764289856, "learning_rate": 0.002, "loss": 2.5465, "step": 287660 }, { "epoch": 0.5731026074206299, "grad_norm": 0.16426897048950195, "learning_rate": 0.002, "loss": 2.5544, "step": 287670 }, { "epoch": 0.5731225296442688, "grad_norm": 0.1643148958683014, "learning_rate": 0.002, "loss": 2.5498, "step": 287680 }, { "epoch": 0.5731424518679077, "grad_norm": 0.1636420488357544, "learning_rate": 0.002, "loss": 2.5483, "step": 287690 }, { "epoch": 0.5731623740915466, "grad_norm": 0.18235698342323303, "learning_rate": 0.002, "loss": 2.5602, "step": 287700 }, { "epoch": 0.5731822963151855, "grad_norm": 0.13539642095565796, "learning_rate": 0.002, "loss": 2.5573, "step": 287710 }, { "epoch": 0.5732022185388245, "grad_norm": 0.1903928816318512, "learning_rate": 0.002, "loss": 2.5639, "step": 287720 }, { "epoch": 0.5732221407624634, "grad_norm": 0.1564258337020874, "learning_rate": 0.002, "loss": 2.5502, "step": 287730 }, { "epoch": 0.5732420629861023, "grad_norm": 0.17975406348705292, "learning_rate": 0.002, "loss": 2.5639, "step": 287740 }, { "epoch": 0.5732619852097411, "grad_norm": 0.1531047523021698, "learning_rate": 0.002, "loss": 2.5501, "step": 287750 }, { "epoch": 0.57328190743338, "grad_norm": 0.21025370061397552, "learning_rate": 0.002, "loss": 2.5606, "step": 287760 }, { "epoch": 0.573301829657019, "grad_norm": 0.14822058379650116, "learning_rate": 0.002, "loss": 2.5594, "step": 287770 }, { "epoch": 0.5733217518806579, "grad_norm": 0.17112119495868683, "learning_rate": 0.002, "loss": 2.5593, "step": 287780 }, { "epoch": 0.5733416741042968, "grad_norm": 0.16826872527599335, "learning_rate": 0.002, "loss": 2.5678, "step": 287790 }, { "epoch": 0.5733615963279357, "grad_norm": 0.1682586818933487, "learning_rate": 0.002, "loss": 2.5566, "step": 287800 }, { "epoch": 0.5733815185515746, "grad_norm": 0.1447942852973938, "learning_rate": 0.002, "loss": 2.5539, "step": 287810 }, { "epoch": 0.5734014407752136, "grad_norm": 0.1563725471496582, "learning_rate": 0.002, "loss": 2.5642, "step": 287820 }, { "epoch": 0.5734213629988525, "grad_norm": 0.16859133541584015, "learning_rate": 0.002, "loss": 2.5627, "step": 287830 }, { "epoch": 0.5734412852224914, "grad_norm": 0.25135159492492676, "learning_rate": 0.002, "loss": 2.5606, "step": 287840 }, { "epoch": 0.5734612074461303, "grad_norm": 0.15476727485656738, "learning_rate": 0.002, "loss": 2.5478, "step": 287850 }, { "epoch": 0.5734811296697693, "grad_norm": 0.15716421604156494, "learning_rate": 0.002, "loss": 2.5528, "step": 287860 }, { "epoch": 0.5735010518934082, "grad_norm": 0.18546494841575623, "learning_rate": 0.002, "loss": 2.5427, "step": 287870 }, { "epoch": 0.573520974117047, "grad_norm": 0.17883890867233276, "learning_rate": 0.002, "loss": 2.5432, "step": 287880 }, { "epoch": 0.573540896340686, "grad_norm": 0.20374304056167603, "learning_rate": 0.002, "loss": 2.5507, "step": 287890 }, { "epoch": 0.5735608185643248, "grad_norm": 0.1837804764509201, "learning_rate": 0.002, "loss": 2.5556, "step": 287900 }, { "epoch": 0.5735807407879638, "grad_norm": 0.15060459077358246, "learning_rate": 0.002, "loss": 2.5678, "step": 287910 }, { "epoch": 0.5736006630116027, "grad_norm": 0.1551554948091507, "learning_rate": 0.002, "loss": 2.5594, "step": 287920 }, { "epoch": 0.5736205852352416, "grad_norm": 0.16092929244041443, "learning_rate": 0.002, "loss": 2.5637, "step": 287930 }, { "epoch": 0.5736405074588805, "grad_norm": 0.1651044636964798, "learning_rate": 0.002, "loss": 2.545, "step": 287940 }, { "epoch": 0.5736604296825194, "grad_norm": 0.17769645154476166, "learning_rate": 0.002, "loss": 2.54, "step": 287950 }, { "epoch": 0.5736803519061584, "grad_norm": 0.16757139563560486, "learning_rate": 0.002, "loss": 2.551, "step": 287960 }, { "epoch": 0.5737002741297973, "grad_norm": 0.18759584426879883, "learning_rate": 0.002, "loss": 2.57, "step": 287970 }, { "epoch": 0.5737201963534362, "grad_norm": 0.18548695743083954, "learning_rate": 0.002, "loss": 2.5584, "step": 287980 }, { "epoch": 0.5737401185770751, "grad_norm": 0.16103574633598328, "learning_rate": 0.002, "loss": 2.5585, "step": 287990 }, { "epoch": 0.573760040800714, "grad_norm": 0.17880526185035706, "learning_rate": 0.002, "loss": 2.5666, "step": 288000 }, { "epoch": 0.573779963024353, "grad_norm": 0.17482194304466248, "learning_rate": 0.002, "loss": 2.5527, "step": 288010 }, { "epoch": 0.5737998852479919, "grad_norm": 0.1614765226840973, "learning_rate": 0.002, "loss": 2.5638, "step": 288020 }, { "epoch": 0.5738198074716307, "grad_norm": 0.181975319981575, "learning_rate": 0.002, "loss": 2.5529, "step": 288030 }, { "epoch": 0.5738397296952696, "grad_norm": 0.15712711215019226, "learning_rate": 0.002, "loss": 2.5694, "step": 288040 }, { "epoch": 0.5738596519189085, "grad_norm": 0.16495847702026367, "learning_rate": 0.002, "loss": 2.5593, "step": 288050 }, { "epoch": 0.5738795741425475, "grad_norm": 0.29994308948516846, "learning_rate": 0.002, "loss": 2.5657, "step": 288060 }, { "epoch": 0.5738994963661864, "grad_norm": 0.14031413197517395, "learning_rate": 0.002, "loss": 2.5317, "step": 288070 }, { "epoch": 0.5739194185898253, "grad_norm": 0.16880300641059875, "learning_rate": 0.002, "loss": 2.564, "step": 288080 }, { "epoch": 0.5739393408134642, "grad_norm": 0.17684900760650635, "learning_rate": 0.002, "loss": 2.5477, "step": 288090 }, { "epoch": 0.5739592630371031, "grad_norm": 0.1459265649318695, "learning_rate": 0.002, "loss": 2.5593, "step": 288100 }, { "epoch": 0.5739791852607421, "grad_norm": 0.1701003611087799, "learning_rate": 0.002, "loss": 2.5482, "step": 288110 }, { "epoch": 0.573999107484381, "grad_norm": 0.166482612490654, "learning_rate": 0.002, "loss": 2.5525, "step": 288120 }, { "epoch": 0.5740190297080199, "grad_norm": 0.176458939909935, "learning_rate": 0.002, "loss": 2.5573, "step": 288130 }, { "epoch": 0.5740389519316588, "grad_norm": 0.15919813513755798, "learning_rate": 0.002, "loss": 2.584, "step": 288140 }, { "epoch": 0.5740588741552978, "grad_norm": 0.1808801293373108, "learning_rate": 0.002, "loss": 2.5629, "step": 288150 }, { "epoch": 0.5740787963789367, "grad_norm": 0.14550377428531647, "learning_rate": 0.002, "loss": 2.5601, "step": 288160 }, { "epoch": 0.5740987186025756, "grad_norm": 0.1564701795578003, "learning_rate": 0.002, "loss": 2.5573, "step": 288170 }, { "epoch": 0.5741186408262144, "grad_norm": 0.16252903640270233, "learning_rate": 0.002, "loss": 2.551, "step": 288180 }, { "epoch": 0.5741385630498533, "grad_norm": 0.2166324257850647, "learning_rate": 0.002, "loss": 2.5662, "step": 288190 }, { "epoch": 0.5741584852734923, "grad_norm": 0.1570025086402893, "learning_rate": 0.002, "loss": 2.5623, "step": 288200 }, { "epoch": 0.5741784074971312, "grad_norm": 0.13967667520046234, "learning_rate": 0.002, "loss": 2.5587, "step": 288210 }, { "epoch": 0.5741983297207701, "grad_norm": 0.16167475283145905, "learning_rate": 0.002, "loss": 2.5511, "step": 288220 }, { "epoch": 0.574218251944409, "grad_norm": 0.19414781033992767, "learning_rate": 0.002, "loss": 2.5518, "step": 288230 }, { "epoch": 0.5742381741680479, "grad_norm": 0.15799446403980255, "learning_rate": 0.002, "loss": 2.5618, "step": 288240 }, { "epoch": 0.5742580963916869, "grad_norm": 0.1796191781759262, "learning_rate": 0.002, "loss": 2.5503, "step": 288250 }, { "epoch": 0.5742780186153258, "grad_norm": 0.1833333969116211, "learning_rate": 0.002, "loss": 2.565, "step": 288260 }, { "epoch": 0.5742979408389647, "grad_norm": 0.16917991638183594, "learning_rate": 0.002, "loss": 2.5595, "step": 288270 }, { "epoch": 0.5743178630626036, "grad_norm": 0.15765409171581268, "learning_rate": 0.002, "loss": 2.5558, "step": 288280 }, { "epoch": 0.5743377852862425, "grad_norm": 0.16861550509929657, "learning_rate": 0.002, "loss": 2.553, "step": 288290 }, { "epoch": 0.5743577075098815, "grad_norm": 0.1790376603603363, "learning_rate": 0.002, "loss": 2.5437, "step": 288300 }, { "epoch": 0.5743776297335204, "grad_norm": 0.19372719526290894, "learning_rate": 0.002, "loss": 2.5701, "step": 288310 }, { "epoch": 0.5743975519571592, "grad_norm": 0.156797856092453, "learning_rate": 0.002, "loss": 2.575, "step": 288320 }, { "epoch": 0.5744174741807981, "grad_norm": 0.16829471290111542, "learning_rate": 0.002, "loss": 2.566, "step": 288330 }, { "epoch": 0.574437396404437, "grad_norm": 0.16540472209453583, "learning_rate": 0.002, "loss": 2.5612, "step": 288340 }, { "epoch": 0.574457318628076, "grad_norm": 0.17341575026512146, "learning_rate": 0.002, "loss": 2.5473, "step": 288350 }, { "epoch": 0.5744772408517149, "grad_norm": 0.1610838621854782, "learning_rate": 0.002, "loss": 2.5563, "step": 288360 }, { "epoch": 0.5744971630753538, "grad_norm": 0.14103233814239502, "learning_rate": 0.002, "loss": 2.5541, "step": 288370 }, { "epoch": 0.5745170852989927, "grad_norm": 0.16338492929935455, "learning_rate": 0.002, "loss": 2.5634, "step": 288380 }, { "epoch": 0.5745370075226316, "grad_norm": 0.1573777198791504, "learning_rate": 0.002, "loss": 2.534, "step": 288390 }, { "epoch": 0.5745569297462706, "grad_norm": 0.1724940985441208, "learning_rate": 0.002, "loss": 2.5605, "step": 288400 }, { "epoch": 0.5745768519699095, "grad_norm": 0.1562068611383438, "learning_rate": 0.002, "loss": 2.5503, "step": 288410 }, { "epoch": 0.5745967741935484, "grad_norm": 0.17270922660827637, "learning_rate": 0.002, "loss": 2.5591, "step": 288420 }, { "epoch": 0.5746166964171873, "grad_norm": 0.16927050054073334, "learning_rate": 0.002, "loss": 2.5513, "step": 288430 }, { "epoch": 0.5746366186408263, "grad_norm": 0.17486487329006195, "learning_rate": 0.002, "loss": 2.5431, "step": 288440 }, { "epoch": 0.5746565408644652, "grad_norm": 0.1669040024280548, "learning_rate": 0.002, "loss": 2.5643, "step": 288450 }, { "epoch": 0.574676463088104, "grad_norm": 0.1475541591644287, "learning_rate": 0.002, "loss": 2.552, "step": 288460 }, { "epoch": 0.5746963853117429, "grad_norm": 0.17978554964065552, "learning_rate": 0.002, "loss": 2.5488, "step": 288470 }, { "epoch": 0.5747163075353818, "grad_norm": 0.17819765210151672, "learning_rate": 0.002, "loss": 2.5575, "step": 288480 }, { "epoch": 0.5747362297590208, "grad_norm": 0.20387612283229828, "learning_rate": 0.002, "loss": 2.5626, "step": 288490 }, { "epoch": 0.5747561519826597, "grad_norm": 0.17407147586345673, "learning_rate": 0.002, "loss": 2.5486, "step": 288500 }, { "epoch": 0.5747760742062986, "grad_norm": 0.16892275214195251, "learning_rate": 0.002, "loss": 2.5405, "step": 288510 }, { "epoch": 0.5747959964299375, "grad_norm": 0.15902364253997803, "learning_rate": 0.002, "loss": 2.5549, "step": 288520 }, { "epoch": 0.5748159186535764, "grad_norm": 0.1397843360900879, "learning_rate": 0.002, "loss": 2.5762, "step": 288530 }, { "epoch": 0.5748358408772154, "grad_norm": 0.1702759712934494, "learning_rate": 0.002, "loss": 2.5447, "step": 288540 }, { "epoch": 0.5748557631008543, "grad_norm": 0.17455905675888062, "learning_rate": 0.002, "loss": 2.5576, "step": 288550 }, { "epoch": 0.5748756853244932, "grad_norm": 0.19059377908706665, "learning_rate": 0.002, "loss": 2.5685, "step": 288560 }, { "epoch": 0.5748956075481321, "grad_norm": 0.17516599595546722, "learning_rate": 0.002, "loss": 2.5346, "step": 288570 }, { "epoch": 0.574915529771771, "grad_norm": 0.17838704586029053, "learning_rate": 0.002, "loss": 2.5554, "step": 288580 }, { "epoch": 0.57493545199541, "grad_norm": 0.1479816734790802, "learning_rate": 0.002, "loss": 2.552, "step": 288590 }, { "epoch": 0.5749553742190489, "grad_norm": 0.17181819677352905, "learning_rate": 0.002, "loss": 2.5454, "step": 288600 }, { "epoch": 0.5749752964426877, "grad_norm": 0.15898212790489197, "learning_rate": 0.002, "loss": 2.5616, "step": 288610 }, { "epoch": 0.5749952186663266, "grad_norm": 0.18182024359703064, "learning_rate": 0.002, "loss": 2.5554, "step": 288620 }, { "epoch": 0.5750151408899655, "grad_norm": 0.18320471048355103, "learning_rate": 0.002, "loss": 2.5561, "step": 288630 }, { "epoch": 0.5750350631136045, "grad_norm": 0.1487899273633957, "learning_rate": 0.002, "loss": 2.5723, "step": 288640 }, { "epoch": 0.5750549853372434, "grad_norm": 0.15559136867523193, "learning_rate": 0.002, "loss": 2.547, "step": 288650 }, { "epoch": 0.5750749075608823, "grad_norm": 0.18455392122268677, "learning_rate": 0.002, "loss": 2.5688, "step": 288660 }, { "epoch": 0.5750948297845212, "grad_norm": 0.19804275035858154, "learning_rate": 0.002, "loss": 2.5617, "step": 288670 }, { "epoch": 0.5751147520081601, "grad_norm": 0.1766635626554489, "learning_rate": 0.002, "loss": 2.5555, "step": 288680 }, { "epoch": 0.5751346742317991, "grad_norm": 0.23855580389499664, "learning_rate": 0.002, "loss": 2.5639, "step": 288690 }, { "epoch": 0.575154596455438, "grad_norm": 0.151626318693161, "learning_rate": 0.002, "loss": 2.5619, "step": 288700 }, { "epoch": 0.5751745186790769, "grad_norm": 0.15310926735401154, "learning_rate": 0.002, "loss": 2.5755, "step": 288710 }, { "epoch": 0.5751944409027158, "grad_norm": 0.17549459636211395, "learning_rate": 0.002, "loss": 2.5508, "step": 288720 }, { "epoch": 0.5752143631263548, "grad_norm": 0.19191883504390717, "learning_rate": 0.002, "loss": 2.5573, "step": 288730 }, { "epoch": 0.5752342853499937, "grad_norm": 0.15785063803195953, "learning_rate": 0.002, "loss": 2.5501, "step": 288740 }, { "epoch": 0.5752542075736325, "grad_norm": 0.1561284065246582, "learning_rate": 0.002, "loss": 2.5514, "step": 288750 }, { "epoch": 0.5752741297972714, "grad_norm": 0.19587989151477814, "learning_rate": 0.002, "loss": 2.5602, "step": 288760 }, { "epoch": 0.5752940520209103, "grad_norm": 0.15916889905929565, "learning_rate": 0.002, "loss": 2.559, "step": 288770 }, { "epoch": 0.5753139742445493, "grad_norm": 0.17823578417301178, "learning_rate": 0.002, "loss": 2.5457, "step": 288780 }, { "epoch": 0.5753338964681882, "grad_norm": 1.4832515716552734, "learning_rate": 0.002, "loss": 2.5546, "step": 288790 }, { "epoch": 0.5753538186918271, "grad_norm": 0.14863170683383942, "learning_rate": 0.002, "loss": 2.5755, "step": 288800 }, { "epoch": 0.575373740915466, "grad_norm": 0.13958001136779785, "learning_rate": 0.002, "loss": 2.5644, "step": 288810 }, { "epoch": 0.5753936631391049, "grad_norm": 0.15389961004257202, "learning_rate": 0.002, "loss": 2.5569, "step": 288820 }, { "epoch": 0.5754135853627439, "grad_norm": 0.16480734944343567, "learning_rate": 0.002, "loss": 2.5635, "step": 288830 }, { "epoch": 0.5754335075863828, "grad_norm": 0.20863224565982819, "learning_rate": 0.002, "loss": 2.563, "step": 288840 }, { "epoch": 0.5754534298100217, "grad_norm": 0.18274514377117157, "learning_rate": 0.002, "loss": 2.5606, "step": 288850 }, { "epoch": 0.5754733520336606, "grad_norm": 0.15471526980400085, "learning_rate": 0.002, "loss": 2.5569, "step": 288860 }, { "epoch": 0.5754932742572995, "grad_norm": 0.16922974586486816, "learning_rate": 0.002, "loss": 2.5501, "step": 288870 }, { "epoch": 0.5755131964809385, "grad_norm": 0.2047516107559204, "learning_rate": 0.002, "loss": 2.5574, "step": 288880 }, { "epoch": 0.5755331187045774, "grad_norm": 0.14642935991287231, "learning_rate": 0.002, "loss": 2.5678, "step": 288890 }, { "epoch": 0.5755530409282162, "grad_norm": 0.20790262520313263, "learning_rate": 0.002, "loss": 2.566, "step": 288900 }, { "epoch": 0.5755729631518551, "grad_norm": 0.13454151153564453, "learning_rate": 0.002, "loss": 2.5566, "step": 288910 }, { "epoch": 0.575592885375494, "grad_norm": 0.17816966772079468, "learning_rate": 0.002, "loss": 2.5634, "step": 288920 }, { "epoch": 0.575612807599133, "grad_norm": 0.14149120450019836, "learning_rate": 0.002, "loss": 2.5539, "step": 288930 }, { "epoch": 0.5756327298227719, "grad_norm": 0.1852392703294754, "learning_rate": 0.002, "loss": 2.5461, "step": 288940 }, { "epoch": 0.5756526520464108, "grad_norm": 0.1675332933664322, "learning_rate": 0.002, "loss": 2.55, "step": 288950 }, { "epoch": 0.5756725742700497, "grad_norm": 0.18642011284828186, "learning_rate": 0.002, "loss": 2.552, "step": 288960 }, { "epoch": 0.5756924964936886, "grad_norm": 0.16071371734142303, "learning_rate": 0.002, "loss": 2.5622, "step": 288970 }, { "epoch": 0.5757124187173276, "grad_norm": 0.17463600635528564, "learning_rate": 0.002, "loss": 2.5499, "step": 288980 }, { "epoch": 0.5757323409409665, "grad_norm": 0.143631249666214, "learning_rate": 0.002, "loss": 2.5463, "step": 288990 }, { "epoch": 0.5757522631646054, "grad_norm": 0.15116974711418152, "learning_rate": 0.002, "loss": 2.548, "step": 289000 }, { "epoch": 0.5757721853882443, "grad_norm": 0.14685963094234467, "learning_rate": 0.002, "loss": 2.5549, "step": 289010 }, { "epoch": 0.5757921076118832, "grad_norm": 0.16808083653450012, "learning_rate": 0.002, "loss": 2.5574, "step": 289020 }, { "epoch": 0.5758120298355222, "grad_norm": 0.17035341262817383, "learning_rate": 0.002, "loss": 2.5756, "step": 289030 }, { "epoch": 0.575831952059161, "grad_norm": 0.17251239717006683, "learning_rate": 0.002, "loss": 2.5581, "step": 289040 }, { "epoch": 0.5758518742827999, "grad_norm": 0.17782673239707947, "learning_rate": 0.002, "loss": 2.5416, "step": 289050 }, { "epoch": 0.5758717965064388, "grad_norm": 0.14175301790237427, "learning_rate": 0.002, "loss": 2.5417, "step": 289060 }, { "epoch": 0.5758917187300778, "grad_norm": 0.17993180453777313, "learning_rate": 0.002, "loss": 2.5447, "step": 289070 }, { "epoch": 0.5759116409537167, "grad_norm": 0.16610407829284668, "learning_rate": 0.002, "loss": 2.573, "step": 289080 }, { "epoch": 0.5759315631773556, "grad_norm": 0.18444249033927917, "learning_rate": 0.002, "loss": 2.5587, "step": 289090 }, { "epoch": 0.5759514854009945, "grad_norm": 0.1987004280090332, "learning_rate": 0.002, "loss": 2.5624, "step": 289100 }, { "epoch": 0.5759714076246334, "grad_norm": 0.14415697753429413, "learning_rate": 0.002, "loss": 2.5567, "step": 289110 }, { "epoch": 0.5759913298482724, "grad_norm": 0.18069817125797272, "learning_rate": 0.002, "loss": 2.5581, "step": 289120 }, { "epoch": 0.5760112520719113, "grad_norm": 0.16314472258090973, "learning_rate": 0.002, "loss": 2.537, "step": 289130 }, { "epoch": 0.5760311742955502, "grad_norm": 0.15186402201652527, "learning_rate": 0.002, "loss": 2.5496, "step": 289140 }, { "epoch": 0.5760510965191891, "grad_norm": 0.16391055285930634, "learning_rate": 0.002, "loss": 2.5583, "step": 289150 }, { "epoch": 0.576071018742828, "grad_norm": 0.18121184408664703, "learning_rate": 0.002, "loss": 2.5431, "step": 289160 }, { "epoch": 0.576090940966467, "grad_norm": 0.17051906883716583, "learning_rate": 0.002, "loss": 2.547, "step": 289170 }, { "epoch": 0.5761108631901058, "grad_norm": 0.1789287030696869, "learning_rate": 0.002, "loss": 2.5622, "step": 289180 }, { "epoch": 0.5761307854137447, "grad_norm": 0.18197830021381378, "learning_rate": 0.002, "loss": 2.5651, "step": 289190 }, { "epoch": 0.5761507076373836, "grad_norm": 0.15261654555797577, "learning_rate": 0.002, "loss": 2.5623, "step": 289200 }, { "epoch": 0.5761706298610225, "grad_norm": 0.18267540633678436, "learning_rate": 0.002, "loss": 2.554, "step": 289210 }, { "epoch": 0.5761905520846615, "grad_norm": 0.2067803591489792, "learning_rate": 0.002, "loss": 2.5542, "step": 289220 }, { "epoch": 0.5762104743083004, "grad_norm": 0.14545534551143646, "learning_rate": 0.002, "loss": 2.5483, "step": 289230 }, { "epoch": 0.5762303965319393, "grad_norm": 0.175690695643425, "learning_rate": 0.002, "loss": 2.5442, "step": 289240 }, { "epoch": 0.5762503187555782, "grad_norm": 0.18619519472122192, "learning_rate": 0.002, "loss": 2.5628, "step": 289250 }, { "epoch": 0.5762702409792171, "grad_norm": 0.15129783749580383, "learning_rate": 0.002, "loss": 2.5562, "step": 289260 }, { "epoch": 0.5762901632028561, "grad_norm": 0.15376977622509003, "learning_rate": 0.002, "loss": 2.5494, "step": 289270 }, { "epoch": 0.576310085426495, "grad_norm": 0.14515481889247894, "learning_rate": 0.002, "loss": 2.5597, "step": 289280 }, { "epoch": 0.5763300076501339, "grad_norm": 0.1522955447435379, "learning_rate": 0.002, "loss": 2.5626, "step": 289290 }, { "epoch": 0.5763499298737728, "grad_norm": 0.17764325439929962, "learning_rate": 0.002, "loss": 2.5588, "step": 289300 }, { "epoch": 0.5763698520974117, "grad_norm": 0.13900159299373627, "learning_rate": 0.002, "loss": 2.559, "step": 289310 }, { "epoch": 0.5763897743210507, "grad_norm": 0.20516881346702576, "learning_rate": 0.002, "loss": 2.5609, "step": 289320 }, { "epoch": 0.5764096965446895, "grad_norm": 0.168929785490036, "learning_rate": 0.002, "loss": 2.5606, "step": 289330 }, { "epoch": 0.5764296187683284, "grad_norm": 0.1930958777666092, "learning_rate": 0.002, "loss": 2.5576, "step": 289340 }, { "epoch": 0.5764495409919673, "grad_norm": 0.19526469707489014, "learning_rate": 0.002, "loss": 2.5558, "step": 289350 }, { "epoch": 0.5764694632156063, "grad_norm": 0.18012763559818268, "learning_rate": 0.002, "loss": 2.5597, "step": 289360 }, { "epoch": 0.5764893854392452, "grad_norm": 0.1599578708410263, "learning_rate": 0.002, "loss": 2.5385, "step": 289370 }, { "epoch": 0.5765093076628841, "grad_norm": 0.19133004546165466, "learning_rate": 0.002, "loss": 2.5466, "step": 289380 }, { "epoch": 0.576529229886523, "grad_norm": 0.1672375351190567, "learning_rate": 0.002, "loss": 2.5615, "step": 289390 }, { "epoch": 0.5765491521101619, "grad_norm": 0.19941140711307526, "learning_rate": 0.002, "loss": 2.5667, "step": 289400 }, { "epoch": 0.5765690743338009, "grad_norm": 0.14943170547485352, "learning_rate": 0.002, "loss": 2.5578, "step": 289410 }, { "epoch": 0.5765889965574398, "grad_norm": 0.17685091495513916, "learning_rate": 0.002, "loss": 2.555, "step": 289420 }, { "epoch": 0.5766089187810787, "grad_norm": 0.14442791044712067, "learning_rate": 0.002, "loss": 2.5644, "step": 289430 }, { "epoch": 0.5766288410047176, "grad_norm": 0.1678408533334732, "learning_rate": 0.002, "loss": 2.5697, "step": 289440 }, { "epoch": 0.5766487632283565, "grad_norm": 0.20056450366973877, "learning_rate": 0.002, "loss": 2.5476, "step": 289450 }, { "epoch": 0.5766686854519955, "grad_norm": 0.1584227979183197, "learning_rate": 0.002, "loss": 2.5406, "step": 289460 }, { "epoch": 0.5766886076756343, "grad_norm": 0.16457906365394592, "learning_rate": 0.002, "loss": 2.559, "step": 289470 }, { "epoch": 0.5767085298992732, "grad_norm": 0.17966097593307495, "learning_rate": 0.002, "loss": 2.5567, "step": 289480 }, { "epoch": 0.5767284521229121, "grad_norm": 0.18616513907909393, "learning_rate": 0.002, "loss": 2.5281, "step": 289490 }, { "epoch": 0.576748374346551, "grad_norm": 0.15688732266426086, "learning_rate": 0.002, "loss": 2.5695, "step": 289500 }, { "epoch": 0.57676829657019, "grad_norm": 0.14473938941955566, "learning_rate": 0.002, "loss": 2.5667, "step": 289510 }, { "epoch": 0.5767882187938289, "grad_norm": 0.208915114402771, "learning_rate": 0.002, "loss": 2.579, "step": 289520 }, { "epoch": 0.5768081410174678, "grad_norm": 0.15142494440078735, "learning_rate": 0.002, "loss": 2.5542, "step": 289530 }, { "epoch": 0.5768280632411067, "grad_norm": 0.14425255358219147, "learning_rate": 0.002, "loss": 2.5565, "step": 289540 }, { "epoch": 0.5768479854647456, "grad_norm": 0.1925479620695114, "learning_rate": 0.002, "loss": 2.5494, "step": 289550 }, { "epoch": 0.5768679076883846, "grad_norm": 0.15695396065711975, "learning_rate": 0.002, "loss": 2.5556, "step": 289560 }, { "epoch": 0.5768878299120235, "grad_norm": 0.18458251655101776, "learning_rate": 0.002, "loss": 2.5657, "step": 289570 }, { "epoch": 0.5769077521356624, "grad_norm": 0.1748441904783249, "learning_rate": 0.002, "loss": 2.557, "step": 289580 }, { "epoch": 0.5769276743593013, "grad_norm": 0.16043518483638763, "learning_rate": 0.002, "loss": 2.5616, "step": 289590 }, { "epoch": 0.5769475965829401, "grad_norm": 0.18210695683956146, "learning_rate": 0.002, "loss": 2.5664, "step": 289600 }, { "epoch": 0.5769675188065791, "grad_norm": 0.18658986687660217, "learning_rate": 0.002, "loss": 2.5371, "step": 289610 }, { "epoch": 0.576987441030218, "grad_norm": 0.16451376676559448, "learning_rate": 0.002, "loss": 2.553, "step": 289620 }, { "epoch": 0.5770073632538569, "grad_norm": 0.15370456874370575, "learning_rate": 0.002, "loss": 2.5655, "step": 289630 }, { "epoch": 0.5770272854774958, "grad_norm": 0.1764030009508133, "learning_rate": 0.002, "loss": 2.5624, "step": 289640 }, { "epoch": 0.5770472077011348, "grad_norm": 0.16149163246154785, "learning_rate": 0.002, "loss": 2.5784, "step": 289650 }, { "epoch": 0.5770671299247737, "grad_norm": 0.17087295651435852, "learning_rate": 0.002, "loss": 2.5685, "step": 289660 }, { "epoch": 0.5770870521484126, "grad_norm": 0.18617558479309082, "learning_rate": 0.002, "loss": 2.5591, "step": 289670 }, { "epoch": 0.5771069743720515, "grad_norm": 0.14033061265945435, "learning_rate": 0.002, "loss": 2.5447, "step": 289680 }, { "epoch": 0.5771268965956904, "grad_norm": 0.14434516429901123, "learning_rate": 0.002, "loss": 2.5623, "step": 289690 }, { "epoch": 0.5771468188193294, "grad_norm": 0.124880850315094, "learning_rate": 0.002, "loss": 2.5605, "step": 289700 }, { "epoch": 0.5771667410429683, "grad_norm": 0.162417933344841, "learning_rate": 0.002, "loss": 2.5705, "step": 289710 }, { "epoch": 0.5771866632666072, "grad_norm": 0.1393222063779831, "learning_rate": 0.002, "loss": 2.5572, "step": 289720 }, { "epoch": 0.5772065854902461, "grad_norm": 0.15332598984241486, "learning_rate": 0.002, "loss": 2.544, "step": 289730 }, { "epoch": 0.577226507713885, "grad_norm": 0.2084275335073471, "learning_rate": 0.002, "loss": 2.5557, "step": 289740 }, { "epoch": 0.577246429937524, "grad_norm": 0.17036229372024536, "learning_rate": 0.002, "loss": 2.554, "step": 289750 }, { "epoch": 0.5772663521611628, "grad_norm": 0.1431809961795807, "learning_rate": 0.002, "loss": 2.5672, "step": 289760 }, { "epoch": 0.5772862743848017, "grad_norm": 0.1808396428823471, "learning_rate": 0.002, "loss": 2.5491, "step": 289770 }, { "epoch": 0.5773061966084406, "grad_norm": 0.18640035390853882, "learning_rate": 0.002, "loss": 2.5599, "step": 289780 }, { "epoch": 0.5773261188320795, "grad_norm": 0.16208162903785706, "learning_rate": 0.002, "loss": 2.5441, "step": 289790 }, { "epoch": 0.5773460410557185, "grad_norm": 0.20760928094387054, "learning_rate": 0.002, "loss": 2.5633, "step": 289800 }, { "epoch": 0.5773659632793574, "grad_norm": 0.15359927713871002, "learning_rate": 0.002, "loss": 2.5526, "step": 289810 }, { "epoch": 0.5773858855029963, "grad_norm": 0.15899907052516937, "learning_rate": 0.002, "loss": 2.5446, "step": 289820 }, { "epoch": 0.5774058077266352, "grad_norm": 0.20466569066047668, "learning_rate": 0.002, "loss": 2.5464, "step": 289830 }, { "epoch": 0.5774257299502741, "grad_norm": 0.16806870698928833, "learning_rate": 0.002, "loss": 2.5731, "step": 289840 }, { "epoch": 0.5774456521739131, "grad_norm": 0.2293078899383545, "learning_rate": 0.002, "loss": 2.5565, "step": 289850 }, { "epoch": 0.577465574397552, "grad_norm": 0.20181825757026672, "learning_rate": 0.002, "loss": 2.5586, "step": 289860 }, { "epoch": 0.5774854966211909, "grad_norm": 0.15995916724205017, "learning_rate": 0.002, "loss": 2.5514, "step": 289870 }, { "epoch": 0.5775054188448298, "grad_norm": 0.13026002049446106, "learning_rate": 0.002, "loss": 2.5473, "step": 289880 }, { "epoch": 0.5775253410684686, "grad_norm": 0.21607372164726257, "learning_rate": 0.002, "loss": 2.5692, "step": 289890 }, { "epoch": 0.5775452632921076, "grad_norm": 0.1443960815668106, "learning_rate": 0.002, "loss": 2.5655, "step": 289900 }, { "epoch": 0.5775651855157465, "grad_norm": 0.16017326712608337, "learning_rate": 0.002, "loss": 2.5554, "step": 289910 }, { "epoch": 0.5775851077393854, "grad_norm": 0.2051772177219391, "learning_rate": 0.002, "loss": 2.5542, "step": 289920 }, { "epoch": 0.5776050299630243, "grad_norm": 0.1528644561767578, "learning_rate": 0.002, "loss": 2.5631, "step": 289930 }, { "epoch": 0.5776249521866633, "grad_norm": 0.1410849690437317, "learning_rate": 0.002, "loss": 2.5568, "step": 289940 }, { "epoch": 0.5776448744103022, "grad_norm": 0.17238175868988037, "learning_rate": 0.002, "loss": 2.5518, "step": 289950 }, { "epoch": 0.5776647966339411, "grad_norm": 0.1537226140499115, "learning_rate": 0.002, "loss": 2.5538, "step": 289960 }, { "epoch": 0.57768471885758, "grad_norm": 0.1785811483860016, "learning_rate": 0.002, "loss": 2.5561, "step": 289970 }, { "epoch": 0.5777046410812189, "grad_norm": 0.14055344462394714, "learning_rate": 0.002, "loss": 2.5638, "step": 289980 }, { "epoch": 0.5777245633048579, "grad_norm": 0.15630272030830383, "learning_rate": 0.002, "loss": 2.5588, "step": 289990 }, { "epoch": 0.5777444855284968, "grad_norm": 0.1889037936925888, "learning_rate": 0.002, "loss": 2.57, "step": 290000 }, { "epoch": 0.5777644077521357, "grad_norm": 0.18927553296089172, "learning_rate": 0.002, "loss": 2.5682, "step": 290010 }, { "epoch": 0.5777843299757746, "grad_norm": 0.1804686188697815, "learning_rate": 0.002, "loss": 2.5698, "step": 290020 }, { "epoch": 0.5778042521994134, "grad_norm": 0.17239589989185333, "learning_rate": 0.002, "loss": 2.5715, "step": 290030 }, { "epoch": 0.5778241744230525, "grad_norm": 0.1485714465379715, "learning_rate": 0.002, "loss": 2.5612, "step": 290040 }, { "epoch": 0.5778440966466913, "grad_norm": 0.1682816445827484, "learning_rate": 0.002, "loss": 2.5621, "step": 290050 }, { "epoch": 0.5778640188703302, "grad_norm": 0.16861750185489655, "learning_rate": 0.002, "loss": 2.5496, "step": 290060 }, { "epoch": 0.5778839410939691, "grad_norm": 0.19722408056259155, "learning_rate": 0.002, "loss": 2.5519, "step": 290070 }, { "epoch": 0.577903863317608, "grad_norm": 0.18741123378276825, "learning_rate": 0.002, "loss": 2.5654, "step": 290080 }, { "epoch": 0.577923785541247, "grad_norm": 0.17022356390953064, "learning_rate": 0.002, "loss": 2.5718, "step": 290090 }, { "epoch": 0.5779437077648859, "grad_norm": 0.17309165000915527, "learning_rate": 0.002, "loss": 2.5599, "step": 290100 }, { "epoch": 0.5779636299885248, "grad_norm": 0.1381065547466278, "learning_rate": 0.002, "loss": 2.5547, "step": 290110 }, { "epoch": 0.5779835522121637, "grad_norm": 0.1616714596748352, "learning_rate": 0.002, "loss": 2.574, "step": 290120 }, { "epoch": 0.5780034744358026, "grad_norm": 0.20641887187957764, "learning_rate": 0.002, "loss": 2.5555, "step": 290130 }, { "epoch": 0.5780233966594416, "grad_norm": 0.16144564747810364, "learning_rate": 0.002, "loss": 2.5782, "step": 290140 }, { "epoch": 0.5780433188830805, "grad_norm": 0.1681162416934967, "learning_rate": 0.002, "loss": 2.5539, "step": 290150 }, { "epoch": 0.5780632411067194, "grad_norm": 0.14923633635044098, "learning_rate": 0.002, "loss": 2.5466, "step": 290160 }, { "epoch": 0.5780831633303583, "grad_norm": 0.20653623342514038, "learning_rate": 0.002, "loss": 2.5585, "step": 290170 }, { "epoch": 0.5781030855539971, "grad_norm": 0.15946736931800842, "learning_rate": 0.002, "loss": 2.539, "step": 290180 }, { "epoch": 0.5781230077776361, "grad_norm": 0.17023922502994537, "learning_rate": 0.002, "loss": 2.5613, "step": 290190 }, { "epoch": 0.578142930001275, "grad_norm": 0.18572652339935303, "learning_rate": 0.002, "loss": 2.561, "step": 290200 }, { "epoch": 0.5781628522249139, "grad_norm": 0.17130787670612335, "learning_rate": 0.002, "loss": 2.5498, "step": 290210 }, { "epoch": 0.5781827744485528, "grad_norm": 0.18210725486278534, "learning_rate": 0.002, "loss": 2.5631, "step": 290220 }, { "epoch": 0.5782026966721918, "grad_norm": 0.16259314119815826, "learning_rate": 0.002, "loss": 2.5501, "step": 290230 }, { "epoch": 0.5782226188958307, "grad_norm": 0.14324595034122467, "learning_rate": 0.002, "loss": 2.5409, "step": 290240 }, { "epoch": 0.5782425411194696, "grad_norm": 0.16845643520355225, "learning_rate": 0.002, "loss": 2.5639, "step": 290250 }, { "epoch": 0.5782624633431085, "grad_norm": 0.1375119686126709, "learning_rate": 0.002, "loss": 2.5419, "step": 290260 }, { "epoch": 0.5782823855667474, "grad_norm": 0.1671447455883026, "learning_rate": 0.002, "loss": 2.5572, "step": 290270 }, { "epoch": 0.5783023077903864, "grad_norm": 0.1648317128419876, "learning_rate": 0.002, "loss": 2.5585, "step": 290280 }, { "epoch": 0.5783222300140253, "grad_norm": 0.1598958522081375, "learning_rate": 0.002, "loss": 2.5592, "step": 290290 }, { "epoch": 0.5783421522376642, "grad_norm": 0.1671956479549408, "learning_rate": 0.002, "loss": 2.555, "step": 290300 }, { "epoch": 0.578362074461303, "grad_norm": 0.1639413684606552, "learning_rate": 0.002, "loss": 2.5601, "step": 290310 }, { "epoch": 0.578381996684942, "grad_norm": 0.16315153241157532, "learning_rate": 0.002, "loss": 2.5628, "step": 290320 }, { "epoch": 0.578401918908581, "grad_norm": 0.16809073090553284, "learning_rate": 0.002, "loss": 2.5564, "step": 290330 }, { "epoch": 0.5784218411322198, "grad_norm": 0.2013741284608841, "learning_rate": 0.002, "loss": 2.5487, "step": 290340 }, { "epoch": 0.5784417633558587, "grad_norm": 0.14738045632839203, "learning_rate": 0.002, "loss": 2.5539, "step": 290350 }, { "epoch": 0.5784616855794976, "grad_norm": 0.16366904973983765, "learning_rate": 0.002, "loss": 2.5681, "step": 290360 }, { "epoch": 0.5784816078031365, "grad_norm": 0.13799086213111877, "learning_rate": 0.002, "loss": 2.5541, "step": 290370 }, { "epoch": 0.5785015300267755, "grad_norm": 0.20401212573051453, "learning_rate": 0.002, "loss": 2.55, "step": 290380 }, { "epoch": 0.5785214522504144, "grad_norm": 0.1809007078409195, "learning_rate": 0.002, "loss": 2.5472, "step": 290390 }, { "epoch": 0.5785413744740533, "grad_norm": 0.16895711421966553, "learning_rate": 0.002, "loss": 2.5475, "step": 290400 }, { "epoch": 0.5785612966976922, "grad_norm": 0.18968699872493744, "learning_rate": 0.002, "loss": 2.5369, "step": 290410 }, { "epoch": 0.5785812189213311, "grad_norm": 0.16947560012340546, "learning_rate": 0.002, "loss": 2.5621, "step": 290420 }, { "epoch": 0.5786011411449701, "grad_norm": 0.15927629172801971, "learning_rate": 0.002, "loss": 2.5531, "step": 290430 }, { "epoch": 0.578621063368609, "grad_norm": 0.1801994889974594, "learning_rate": 0.002, "loss": 2.5504, "step": 290440 }, { "epoch": 0.5786409855922479, "grad_norm": 0.1482219398021698, "learning_rate": 0.002, "loss": 2.5605, "step": 290450 }, { "epoch": 0.5786609078158867, "grad_norm": 0.16838616132736206, "learning_rate": 0.002, "loss": 2.5539, "step": 290460 }, { "epoch": 0.5786808300395256, "grad_norm": 0.2194826304912567, "learning_rate": 0.002, "loss": 2.5692, "step": 290470 }, { "epoch": 0.5787007522631646, "grad_norm": 0.18267777562141418, "learning_rate": 0.002, "loss": 2.5515, "step": 290480 }, { "epoch": 0.5787206744868035, "grad_norm": 0.14382146298885345, "learning_rate": 0.002, "loss": 2.5491, "step": 290490 }, { "epoch": 0.5787405967104424, "grad_norm": 0.17089612782001495, "learning_rate": 0.002, "loss": 2.5662, "step": 290500 }, { "epoch": 0.5787605189340813, "grad_norm": 0.1478034406900406, "learning_rate": 0.002, "loss": 2.5585, "step": 290510 }, { "epoch": 0.5787804411577202, "grad_norm": 0.1631881445646286, "learning_rate": 0.002, "loss": 2.5405, "step": 290520 }, { "epoch": 0.5788003633813592, "grad_norm": 0.2129288613796234, "learning_rate": 0.002, "loss": 2.555, "step": 290530 }, { "epoch": 0.5788202856049981, "grad_norm": 0.15913265943527222, "learning_rate": 0.002, "loss": 2.561, "step": 290540 }, { "epoch": 0.578840207828637, "grad_norm": 0.14277654886245728, "learning_rate": 0.002, "loss": 2.5297, "step": 290550 }, { "epoch": 0.5788601300522759, "grad_norm": 0.179498091340065, "learning_rate": 0.002, "loss": 2.5568, "step": 290560 }, { "epoch": 0.5788800522759149, "grad_norm": 0.16946594417095184, "learning_rate": 0.002, "loss": 2.5408, "step": 290570 }, { "epoch": 0.5788999744995538, "grad_norm": 0.16575177013874054, "learning_rate": 0.002, "loss": 2.554, "step": 290580 }, { "epoch": 0.5789198967231927, "grad_norm": 0.22950167953968048, "learning_rate": 0.002, "loss": 2.5531, "step": 290590 }, { "epoch": 0.5789398189468316, "grad_norm": 0.15324591100215912, "learning_rate": 0.002, "loss": 2.5774, "step": 290600 }, { "epoch": 0.5789597411704704, "grad_norm": 0.19640517234802246, "learning_rate": 0.002, "loss": 2.5526, "step": 290610 }, { "epoch": 0.5789796633941094, "grad_norm": 0.21281872689723969, "learning_rate": 0.002, "loss": 2.5581, "step": 290620 }, { "epoch": 0.5789995856177483, "grad_norm": 0.14743590354919434, "learning_rate": 0.002, "loss": 2.5551, "step": 290630 }, { "epoch": 0.5790195078413872, "grad_norm": 0.15596483647823334, "learning_rate": 0.002, "loss": 2.5586, "step": 290640 }, { "epoch": 0.5790394300650261, "grad_norm": 0.1932588368654251, "learning_rate": 0.002, "loss": 2.5531, "step": 290650 }, { "epoch": 0.579059352288665, "grad_norm": 0.16906620562076569, "learning_rate": 0.002, "loss": 2.537, "step": 290660 }, { "epoch": 0.579079274512304, "grad_norm": 0.19099994003772736, "learning_rate": 0.002, "loss": 2.548, "step": 290670 }, { "epoch": 0.5790991967359429, "grad_norm": 0.16991795599460602, "learning_rate": 0.002, "loss": 2.5529, "step": 290680 }, { "epoch": 0.5791191189595818, "grad_norm": 0.13531658053398132, "learning_rate": 0.002, "loss": 2.5483, "step": 290690 }, { "epoch": 0.5791390411832207, "grad_norm": 0.15278612077236176, "learning_rate": 0.002, "loss": 2.5607, "step": 290700 }, { "epoch": 0.5791589634068596, "grad_norm": 0.16481642425060272, "learning_rate": 0.002, "loss": 2.5516, "step": 290710 }, { "epoch": 0.5791788856304986, "grad_norm": 0.18082278966903687, "learning_rate": 0.002, "loss": 2.5471, "step": 290720 }, { "epoch": 0.5791988078541375, "grad_norm": 0.1516120582818985, "learning_rate": 0.002, "loss": 2.567, "step": 290730 }, { "epoch": 0.5792187300777764, "grad_norm": 0.15725401043891907, "learning_rate": 0.002, "loss": 2.558, "step": 290740 }, { "epoch": 0.5792386523014152, "grad_norm": 0.13594701886177063, "learning_rate": 0.002, "loss": 2.5434, "step": 290750 }, { "epoch": 0.5792585745250541, "grad_norm": 0.18450318276882172, "learning_rate": 0.002, "loss": 2.5452, "step": 290760 }, { "epoch": 0.5792784967486931, "grad_norm": 0.1823136806488037, "learning_rate": 0.002, "loss": 2.5339, "step": 290770 }, { "epoch": 0.579298418972332, "grad_norm": 0.23181504011154175, "learning_rate": 0.002, "loss": 2.5599, "step": 290780 }, { "epoch": 0.5793183411959709, "grad_norm": 0.16182182729244232, "learning_rate": 0.002, "loss": 2.5552, "step": 290790 }, { "epoch": 0.5793382634196098, "grad_norm": 0.14950259029865265, "learning_rate": 0.002, "loss": 2.5645, "step": 290800 }, { "epoch": 0.5793581856432487, "grad_norm": 0.371004581451416, "learning_rate": 0.002, "loss": 2.5511, "step": 290810 }, { "epoch": 0.5793781078668877, "grad_norm": 0.16507677733898163, "learning_rate": 0.002, "loss": 2.5574, "step": 290820 }, { "epoch": 0.5793980300905266, "grad_norm": 0.15486398339271545, "learning_rate": 0.002, "loss": 2.548, "step": 290830 }, { "epoch": 0.5794179523141655, "grad_norm": 0.14587514102458954, "learning_rate": 0.002, "loss": 2.551, "step": 290840 }, { "epoch": 0.5794378745378044, "grad_norm": 0.21689146757125854, "learning_rate": 0.002, "loss": 2.576, "step": 290850 }, { "epoch": 0.5794577967614434, "grad_norm": 0.15688540041446686, "learning_rate": 0.002, "loss": 2.563, "step": 290860 }, { "epoch": 0.5794777189850823, "grad_norm": 0.1500590741634369, "learning_rate": 0.002, "loss": 2.55, "step": 290870 }, { "epoch": 0.5794976412087212, "grad_norm": 0.15253230929374695, "learning_rate": 0.002, "loss": 2.5633, "step": 290880 }, { "epoch": 0.57951756343236, "grad_norm": 0.17499348521232605, "learning_rate": 0.002, "loss": 2.5458, "step": 290890 }, { "epoch": 0.5795374856559989, "grad_norm": 0.13589408993721008, "learning_rate": 0.002, "loss": 2.5388, "step": 290900 }, { "epoch": 0.5795574078796379, "grad_norm": 0.19506540894508362, "learning_rate": 0.002, "loss": 2.5495, "step": 290910 }, { "epoch": 0.5795773301032768, "grad_norm": 0.1750747114419937, "learning_rate": 0.002, "loss": 2.5362, "step": 290920 }, { "epoch": 0.5795972523269157, "grad_norm": 0.1439191848039627, "learning_rate": 0.002, "loss": 2.5595, "step": 290930 }, { "epoch": 0.5796171745505546, "grad_norm": 0.20122994482517242, "learning_rate": 0.002, "loss": 2.544, "step": 290940 }, { "epoch": 0.5796370967741935, "grad_norm": 0.2015949785709381, "learning_rate": 0.002, "loss": 2.5559, "step": 290950 }, { "epoch": 0.5796570189978325, "grad_norm": 0.153817281126976, "learning_rate": 0.002, "loss": 2.5686, "step": 290960 }, { "epoch": 0.5796769412214714, "grad_norm": 0.15526805818080902, "learning_rate": 0.002, "loss": 2.5601, "step": 290970 }, { "epoch": 0.5796968634451103, "grad_norm": 0.20629820227622986, "learning_rate": 0.002, "loss": 2.5505, "step": 290980 }, { "epoch": 0.5797167856687492, "grad_norm": 0.17867888510227203, "learning_rate": 0.002, "loss": 2.5733, "step": 290990 }, { "epoch": 0.5797367078923881, "grad_norm": 0.17267583310604095, "learning_rate": 0.002, "loss": 2.5562, "step": 291000 }, { "epoch": 0.5797566301160271, "grad_norm": 0.15965786576271057, "learning_rate": 0.002, "loss": 2.5494, "step": 291010 }, { "epoch": 0.579776552339666, "grad_norm": 0.1490466445684433, "learning_rate": 0.002, "loss": 2.5592, "step": 291020 }, { "epoch": 0.5797964745633049, "grad_norm": 0.15512225031852722, "learning_rate": 0.002, "loss": 2.5575, "step": 291030 }, { "epoch": 0.5798163967869437, "grad_norm": 0.17878836393356323, "learning_rate": 0.002, "loss": 2.5625, "step": 291040 }, { "epoch": 0.5798363190105826, "grad_norm": 0.1679762452840805, "learning_rate": 0.002, "loss": 2.5594, "step": 291050 }, { "epoch": 0.5798562412342216, "grad_norm": 0.19833576679229736, "learning_rate": 0.002, "loss": 2.5487, "step": 291060 }, { "epoch": 0.5798761634578605, "grad_norm": 0.1773768961429596, "learning_rate": 0.002, "loss": 2.5631, "step": 291070 }, { "epoch": 0.5798960856814994, "grad_norm": 0.1550186723470688, "learning_rate": 0.002, "loss": 2.5477, "step": 291080 }, { "epoch": 0.5799160079051383, "grad_norm": 0.17551980912685394, "learning_rate": 0.002, "loss": 2.5561, "step": 291090 }, { "epoch": 0.5799359301287772, "grad_norm": 0.18076543509960175, "learning_rate": 0.002, "loss": 2.5592, "step": 291100 }, { "epoch": 0.5799558523524162, "grad_norm": 0.15735597908496857, "learning_rate": 0.002, "loss": 2.5532, "step": 291110 }, { "epoch": 0.5799757745760551, "grad_norm": 0.16338664293289185, "learning_rate": 0.002, "loss": 2.5574, "step": 291120 }, { "epoch": 0.579995696799694, "grad_norm": 0.20424433052539825, "learning_rate": 0.002, "loss": 2.5438, "step": 291130 }, { "epoch": 0.5800156190233329, "grad_norm": 0.16670110821723938, "learning_rate": 0.002, "loss": 2.5466, "step": 291140 }, { "epoch": 0.5800355412469719, "grad_norm": 0.13649678230285645, "learning_rate": 0.002, "loss": 2.5756, "step": 291150 }, { "epoch": 0.5800554634706108, "grad_norm": 0.20615731179714203, "learning_rate": 0.002, "loss": 2.5618, "step": 291160 }, { "epoch": 0.5800753856942497, "grad_norm": 0.17206457257270813, "learning_rate": 0.002, "loss": 2.5519, "step": 291170 }, { "epoch": 0.5800953079178885, "grad_norm": 0.14928698539733887, "learning_rate": 0.002, "loss": 2.5561, "step": 291180 }, { "epoch": 0.5801152301415274, "grad_norm": 0.158773735165596, "learning_rate": 0.002, "loss": 2.5659, "step": 291190 }, { "epoch": 0.5801351523651664, "grad_norm": 0.14423368871212006, "learning_rate": 0.002, "loss": 2.5531, "step": 291200 }, { "epoch": 0.5801550745888053, "grad_norm": 0.1888742744922638, "learning_rate": 0.002, "loss": 2.5556, "step": 291210 }, { "epoch": 0.5801749968124442, "grad_norm": 0.14703528583049774, "learning_rate": 0.002, "loss": 2.5617, "step": 291220 }, { "epoch": 0.5801949190360831, "grad_norm": 0.17088325321674347, "learning_rate": 0.002, "loss": 2.5546, "step": 291230 }, { "epoch": 0.580214841259722, "grad_norm": 0.1622968465089798, "learning_rate": 0.002, "loss": 2.5465, "step": 291240 }, { "epoch": 0.580234763483361, "grad_norm": 0.15924577414989471, "learning_rate": 0.002, "loss": 2.5617, "step": 291250 }, { "epoch": 0.5802546857069999, "grad_norm": 0.1818968951702118, "learning_rate": 0.002, "loss": 2.5672, "step": 291260 }, { "epoch": 0.5802746079306388, "grad_norm": 0.18657855689525604, "learning_rate": 0.002, "loss": 2.5606, "step": 291270 }, { "epoch": 0.5802945301542777, "grad_norm": 0.14592067897319794, "learning_rate": 0.002, "loss": 2.5747, "step": 291280 }, { "epoch": 0.5803144523779166, "grad_norm": 0.20522432029247284, "learning_rate": 0.002, "loss": 2.5618, "step": 291290 }, { "epoch": 0.5803343746015556, "grad_norm": 0.16628991067409515, "learning_rate": 0.002, "loss": 2.5503, "step": 291300 }, { "epoch": 0.5803542968251945, "grad_norm": 0.15992425382137299, "learning_rate": 0.002, "loss": 2.5781, "step": 291310 }, { "epoch": 0.5803742190488334, "grad_norm": 0.17539632320404053, "learning_rate": 0.002, "loss": 2.556, "step": 291320 }, { "epoch": 0.5803941412724722, "grad_norm": 0.1494234800338745, "learning_rate": 0.002, "loss": 2.5433, "step": 291330 }, { "epoch": 0.5804140634961111, "grad_norm": 0.16929662227630615, "learning_rate": 0.002, "loss": 2.5664, "step": 291340 }, { "epoch": 0.5804339857197501, "grad_norm": 0.14896440505981445, "learning_rate": 0.002, "loss": 2.5484, "step": 291350 }, { "epoch": 0.580453907943389, "grad_norm": 0.1797751784324646, "learning_rate": 0.002, "loss": 2.5453, "step": 291360 }, { "epoch": 0.5804738301670279, "grad_norm": 0.13631147146224976, "learning_rate": 0.002, "loss": 2.5346, "step": 291370 }, { "epoch": 0.5804937523906668, "grad_norm": 0.1610877364873886, "learning_rate": 0.002, "loss": 2.5436, "step": 291380 }, { "epoch": 0.5805136746143057, "grad_norm": 0.16858908534049988, "learning_rate": 0.002, "loss": 2.5625, "step": 291390 }, { "epoch": 0.5805335968379447, "grad_norm": 0.17654885351657867, "learning_rate": 0.002, "loss": 2.5597, "step": 291400 }, { "epoch": 0.5805535190615836, "grad_norm": 0.18113823235034943, "learning_rate": 0.002, "loss": 2.5726, "step": 291410 }, { "epoch": 0.5805734412852225, "grad_norm": 0.17315740883350372, "learning_rate": 0.002, "loss": 2.5605, "step": 291420 }, { "epoch": 0.5805933635088614, "grad_norm": 0.14116185903549194, "learning_rate": 0.002, "loss": 2.5605, "step": 291430 }, { "epoch": 0.5806132857325004, "grad_norm": 0.18279574811458588, "learning_rate": 0.002, "loss": 2.5561, "step": 291440 }, { "epoch": 0.5806332079561393, "grad_norm": 0.168477863073349, "learning_rate": 0.002, "loss": 2.5652, "step": 291450 }, { "epoch": 0.5806531301797782, "grad_norm": 0.1490745097398758, "learning_rate": 0.002, "loss": 2.5464, "step": 291460 }, { "epoch": 0.580673052403417, "grad_norm": 0.1568143367767334, "learning_rate": 0.002, "loss": 2.563, "step": 291470 }, { "epoch": 0.5806929746270559, "grad_norm": 0.16000787913799286, "learning_rate": 0.002, "loss": 2.5598, "step": 291480 }, { "epoch": 0.5807128968506949, "grad_norm": 0.15679658949375153, "learning_rate": 0.002, "loss": 2.5644, "step": 291490 }, { "epoch": 0.5807328190743338, "grad_norm": 0.15383760631084442, "learning_rate": 0.002, "loss": 2.5584, "step": 291500 }, { "epoch": 0.5807527412979727, "grad_norm": 0.16443690657615662, "learning_rate": 0.002, "loss": 2.5395, "step": 291510 }, { "epoch": 0.5807726635216116, "grad_norm": 0.22460420429706573, "learning_rate": 0.002, "loss": 2.5476, "step": 291520 }, { "epoch": 0.5807925857452505, "grad_norm": 0.1560511589050293, "learning_rate": 0.002, "loss": 2.5538, "step": 291530 }, { "epoch": 0.5808125079688895, "grad_norm": 0.150534987449646, "learning_rate": 0.002, "loss": 2.5542, "step": 291540 }, { "epoch": 0.5808324301925284, "grad_norm": 0.18064886331558228, "learning_rate": 0.002, "loss": 2.5318, "step": 291550 }, { "epoch": 0.5808523524161673, "grad_norm": 0.15540026128292084, "learning_rate": 0.002, "loss": 2.5643, "step": 291560 }, { "epoch": 0.5808722746398062, "grad_norm": 0.15757542848587036, "learning_rate": 0.002, "loss": 2.5674, "step": 291570 }, { "epoch": 0.5808921968634451, "grad_norm": 0.20547805726528168, "learning_rate": 0.002, "loss": 2.5481, "step": 291580 }, { "epoch": 0.5809121190870841, "grad_norm": 0.18520456552505493, "learning_rate": 0.002, "loss": 2.5718, "step": 291590 }, { "epoch": 0.580932041310723, "grad_norm": 0.14457088708877563, "learning_rate": 0.002, "loss": 2.5537, "step": 291600 }, { "epoch": 0.5809519635343618, "grad_norm": 0.14229033887386322, "learning_rate": 0.002, "loss": 2.5563, "step": 291610 }, { "epoch": 0.5809718857580007, "grad_norm": 0.2150493711233139, "learning_rate": 0.002, "loss": 2.5587, "step": 291620 }, { "epoch": 0.5809918079816396, "grad_norm": 0.14661145210266113, "learning_rate": 0.002, "loss": 2.5507, "step": 291630 }, { "epoch": 0.5810117302052786, "grad_norm": 0.1584927886724472, "learning_rate": 0.002, "loss": 2.5568, "step": 291640 }, { "epoch": 0.5810316524289175, "grad_norm": 0.13827085494995117, "learning_rate": 0.002, "loss": 2.5548, "step": 291650 }, { "epoch": 0.5810515746525564, "grad_norm": 0.19305630028247833, "learning_rate": 0.002, "loss": 2.5342, "step": 291660 }, { "epoch": 0.5810714968761953, "grad_norm": 0.17119865119457245, "learning_rate": 0.002, "loss": 2.5611, "step": 291670 }, { "epoch": 0.5810914190998342, "grad_norm": 0.17705968022346497, "learning_rate": 0.002, "loss": 2.5491, "step": 291680 }, { "epoch": 0.5811113413234732, "grad_norm": 0.24175377190113068, "learning_rate": 0.002, "loss": 2.5401, "step": 291690 }, { "epoch": 0.5811312635471121, "grad_norm": 0.15722428262233734, "learning_rate": 0.002, "loss": 2.5671, "step": 291700 }, { "epoch": 0.581151185770751, "grad_norm": 0.22332249581813812, "learning_rate": 0.002, "loss": 2.5588, "step": 291710 }, { "epoch": 0.5811711079943899, "grad_norm": 0.16562862694263458, "learning_rate": 0.002, "loss": 2.5575, "step": 291720 }, { "epoch": 0.5811910302180289, "grad_norm": 0.17672817409038544, "learning_rate": 0.002, "loss": 2.5659, "step": 291730 }, { "epoch": 0.5812109524416678, "grad_norm": 0.16906650364398956, "learning_rate": 0.002, "loss": 2.5584, "step": 291740 }, { "epoch": 0.5812308746653067, "grad_norm": 0.1762124001979828, "learning_rate": 0.002, "loss": 2.5598, "step": 291750 }, { "epoch": 0.5812507968889455, "grad_norm": 0.16385355591773987, "learning_rate": 0.002, "loss": 2.5469, "step": 291760 }, { "epoch": 0.5812707191125844, "grad_norm": 0.2238556295633316, "learning_rate": 0.002, "loss": 2.5525, "step": 291770 }, { "epoch": 0.5812906413362234, "grad_norm": 0.17852869629859924, "learning_rate": 0.002, "loss": 2.5703, "step": 291780 }, { "epoch": 0.5813105635598623, "grad_norm": 0.19904792308807373, "learning_rate": 0.002, "loss": 2.5626, "step": 291790 }, { "epoch": 0.5813304857835012, "grad_norm": 0.15515516698360443, "learning_rate": 0.002, "loss": 2.5427, "step": 291800 }, { "epoch": 0.5813504080071401, "grad_norm": 0.17302320897579193, "learning_rate": 0.002, "loss": 2.5493, "step": 291810 }, { "epoch": 0.581370330230779, "grad_norm": 0.18630918860435486, "learning_rate": 0.002, "loss": 2.5552, "step": 291820 }, { "epoch": 0.581390252454418, "grad_norm": 0.16483765840530396, "learning_rate": 0.002, "loss": 2.561, "step": 291830 }, { "epoch": 0.5814101746780569, "grad_norm": 0.17257949709892273, "learning_rate": 0.002, "loss": 2.5674, "step": 291840 }, { "epoch": 0.5814300969016958, "grad_norm": 0.16697543859481812, "learning_rate": 0.002, "loss": 2.548, "step": 291850 }, { "epoch": 0.5814500191253347, "grad_norm": 0.142265185713768, "learning_rate": 0.002, "loss": 2.5576, "step": 291860 }, { "epoch": 0.5814699413489736, "grad_norm": 0.1586042046546936, "learning_rate": 0.002, "loss": 2.5575, "step": 291870 }, { "epoch": 0.5814898635726126, "grad_norm": 0.14838582277297974, "learning_rate": 0.002, "loss": 2.5605, "step": 291880 }, { "epoch": 0.5815097857962515, "grad_norm": 0.18675872683525085, "learning_rate": 0.002, "loss": 2.546, "step": 291890 }, { "epoch": 0.5815297080198903, "grad_norm": 0.17734001576900482, "learning_rate": 0.002, "loss": 2.564, "step": 291900 }, { "epoch": 0.5815496302435292, "grad_norm": 0.1436994969844818, "learning_rate": 0.002, "loss": 2.5505, "step": 291910 }, { "epoch": 0.5815695524671681, "grad_norm": 0.19295912981033325, "learning_rate": 0.002, "loss": 2.5465, "step": 291920 }, { "epoch": 0.5815894746908071, "grad_norm": 0.17368872463703156, "learning_rate": 0.002, "loss": 2.5594, "step": 291930 }, { "epoch": 0.581609396914446, "grad_norm": 0.17139366269111633, "learning_rate": 0.002, "loss": 2.5474, "step": 291940 }, { "epoch": 0.5816293191380849, "grad_norm": 0.17358465492725372, "learning_rate": 0.002, "loss": 2.549, "step": 291950 }, { "epoch": 0.5816492413617238, "grad_norm": 0.15836197137832642, "learning_rate": 0.002, "loss": 2.5573, "step": 291960 }, { "epoch": 0.5816691635853627, "grad_norm": 0.1653682142496109, "learning_rate": 0.002, "loss": 2.5638, "step": 291970 }, { "epoch": 0.5816890858090017, "grad_norm": 0.14896070957183838, "learning_rate": 0.002, "loss": 2.5536, "step": 291980 }, { "epoch": 0.5817090080326406, "grad_norm": 0.18583400547504425, "learning_rate": 0.002, "loss": 2.5579, "step": 291990 }, { "epoch": 0.5817289302562795, "grad_norm": 0.15065331757068634, "learning_rate": 0.002, "loss": 2.5511, "step": 292000 }, { "epoch": 0.5817488524799184, "grad_norm": 0.15871821343898773, "learning_rate": 0.002, "loss": 2.5501, "step": 292010 }, { "epoch": 0.5817687747035574, "grad_norm": 0.15516303479671478, "learning_rate": 0.002, "loss": 2.5633, "step": 292020 }, { "epoch": 0.5817886969271963, "grad_norm": 0.17455658316612244, "learning_rate": 0.002, "loss": 2.5519, "step": 292030 }, { "epoch": 0.5818086191508351, "grad_norm": 0.1720786839723587, "learning_rate": 0.002, "loss": 2.5553, "step": 292040 }, { "epoch": 0.581828541374474, "grad_norm": 0.16039004921913147, "learning_rate": 0.002, "loss": 2.5532, "step": 292050 }, { "epoch": 0.5818484635981129, "grad_norm": 0.27968108654022217, "learning_rate": 0.002, "loss": 2.5724, "step": 292060 }, { "epoch": 0.5818683858217519, "grad_norm": 0.3003442883491516, "learning_rate": 0.002, "loss": 2.5544, "step": 292070 }, { "epoch": 0.5818883080453908, "grad_norm": 0.186587393283844, "learning_rate": 0.002, "loss": 2.5601, "step": 292080 }, { "epoch": 0.5819082302690297, "grad_norm": 0.1637853980064392, "learning_rate": 0.002, "loss": 2.5554, "step": 292090 }, { "epoch": 0.5819281524926686, "grad_norm": 0.16921299695968628, "learning_rate": 0.002, "loss": 2.5549, "step": 292100 }, { "epoch": 0.5819480747163075, "grad_norm": 0.18846838176250458, "learning_rate": 0.002, "loss": 2.5421, "step": 292110 }, { "epoch": 0.5819679969399465, "grad_norm": 0.14989925920963287, "learning_rate": 0.002, "loss": 2.5726, "step": 292120 }, { "epoch": 0.5819879191635854, "grad_norm": 0.153450608253479, "learning_rate": 0.002, "loss": 2.5699, "step": 292130 }, { "epoch": 0.5820078413872243, "grad_norm": 0.19529126584529877, "learning_rate": 0.002, "loss": 2.5564, "step": 292140 }, { "epoch": 0.5820277636108632, "grad_norm": 0.15763245522975922, "learning_rate": 0.002, "loss": 2.5625, "step": 292150 }, { "epoch": 0.5820476858345021, "grad_norm": 0.1708880066871643, "learning_rate": 0.002, "loss": 2.5576, "step": 292160 }, { "epoch": 0.5820676080581411, "grad_norm": 0.16826674342155457, "learning_rate": 0.002, "loss": 2.5593, "step": 292170 }, { "epoch": 0.58208753028178, "grad_norm": 0.1756744086742401, "learning_rate": 0.002, "loss": 2.5606, "step": 292180 }, { "epoch": 0.5821074525054188, "grad_norm": 0.1664198487997055, "learning_rate": 0.002, "loss": 2.5597, "step": 292190 }, { "epoch": 0.5821273747290577, "grad_norm": 0.17324376106262207, "learning_rate": 0.002, "loss": 2.5395, "step": 292200 }, { "epoch": 0.5821472969526966, "grad_norm": 0.15629175305366516, "learning_rate": 0.002, "loss": 2.5596, "step": 292210 }, { "epoch": 0.5821672191763356, "grad_norm": 0.17212267220020294, "learning_rate": 0.002, "loss": 2.5533, "step": 292220 }, { "epoch": 0.5821871413999745, "grad_norm": 0.1636030226945877, "learning_rate": 0.002, "loss": 2.5627, "step": 292230 }, { "epoch": 0.5822070636236134, "grad_norm": 0.2083669900894165, "learning_rate": 0.002, "loss": 2.5708, "step": 292240 }, { "epoch": 0.5822269858472523, "grad_norm": 0.1636439561843872, "learning_rate": 0.002, "loss": 2.5628, "step": 292250 }, { "epoch": 0.5822469080708912, "grad_norm": 0.17159965634346008, "learning_rate": 0.002, "loss": 2.5572, "step": 292260 }, { "epoch": 0.5822668302945302, "grad_norm": 0.14609725773334503, "learning_rate": 0.002, "loss": 2.5553, "step": 292270 }, { "epoch": 0.5822867525181691, "grad_norm": 0.14889837801456451, "learning_rate": 0.002, "loss": 2.5486, "step": 292280 }, { "epoch": 0.582306674741808, "grad_norm": 0.2060202807188034, "learning_rate": 0.002, "loss": 2.5618, "step": 292290 }, { "epoch": 0.5823265969654469, "grad_norm": 0.13998976349830627, "learning_rate": 0.002, "loss": 2.5473, "step": 292300 }, { "epoch": 0.5823465191890858, "grad_norm": 0.18919973075389862, "learning_rate": 0.002, "loss": 2.5566, "step": 292310 }, { "epoch": 0.5823664414127248, "grad_norm": 0.13154275715351105, "learning_rate": 0.002, "loss": 2.5446, "step": 292320 }, { "epoch": 0.5823863636363636, "grad_norm": 0.19168679416179657, "learning_rate": 0.002, "loss": 2.5592, "step": 292330 }, { "epoch": 0.5824062858600025, "grad_norm": 0.1420462727546692, "learning_rate": 0.002, "loss": 2.5557, "step": 292340 }, { "epoch": 0.5824262080836414, "grad_norm": 0.17481574416160583, "learning_rate": 0.002, "loss": 2.5506, "step": 292350 }, { "epoch": 0.5824461303072804, "grad_norm": 0.22985558211803436, "learning_rate": 0.002, "loss": 2.5564, "step": 292360 }, { "epoch": 0.5824660525309193, "grad_norm": 0.16818976402282715, "learning_rate": 0.002, "loss": 2.5556, "step": 292370 }, { "epoch": 0.5824859747545582, "grad_norm": 0.15966828167438507, "learning_rate": 0.002, "loss": 2.5366, "step": 292380 }, { "epoch": 0.5825058969781971, "grad_norm": 0.16972866654396057, "learning_rate": 0.002, "loss": 2.5651, "step": 292390 }, { "epoch": 0.582525819201836, "grad_norm": 0.16286058723926544, "learning_rate": 0.002, "loss": 2.5528, "step": 292400 }, { "epoch": 0.582545741425475, "grad_norm": 0.19590553641319275, "learning_rate": 0.002, "loss": 2.5471, "step": 292410 }, { "epoch": 0.5825656636491139, "grad_norm": 0.16472069919109344, "learning_rate": 0.002, "loss": 2.5598, "step": 292420 }, { "epoch": 0.5825855858727528, "grad_norm": 0.16144505143165588, "learning_rate": 0.002, "loss": 2.56, "step": 292430 }, { "epoch": 0.5826055080963917, "grad_norm": 0.17582519352436066, "learning_rate": 0.002, "loss": 2.5615, "step": 292440 }, { "epoch": 0.5826254303200306, "grad_norm": 0.19129927456378937, "learning_rate": 0.002, "loss": 2.5556, "step": 292450 }, { "epoch": 0.5826453525436696, "grad_norm": 0.15719854831695557, "learning_rate": 0.002, "loss": 2.5542, "step": 292460 }, { "epoch": 0.5826652747673084, "grad_norm": 0.14199477434158325, "learning_rate": 0.002, "loss": 2.5544, "step": 292470 }, { "epoch": 0.5826851969909473, "grad_norm": 0.14910246431827545, "learning_rate": 0.002, "loss": 2.5669, "step": 292480 }, { "epoch": 0.5827051192145862, "grad_norm": 0.15668253600597382, "learning_rate": 0.002, "loss": 2.5672, "step": 292490 }, { "epoch": 0.5827250414382251, "grad_norm": 0.17895440757274628, "learning_rate": 0.002, "loss": 2.5607, "step": 292500 }, { "epoch": 0.5827449636618641, "grad_norm": 0.17127883434295654, "learning_rate": 0.002, "loss": 2.5512, "step": 292510 }, { "epoch": 0.582764885885503, "grad_norm": 0.13848289847373962, "learning_rate": 0.002, "loss": 2.5695, "step": 292520 }, { "epoch": 0.5827848081091419, "grad_norm": 0.17131367325782776, "learning_rate": 0.002, "loss": 2.5512, "step": 292530 }, { "epoch": 0.5828047303327808, "grad_norm": 0.16450974345207214, "learning_rate": 0.002, "loss": 2.5599, "step": 292540 }, { "epoch": 0.5828246525564197, "grad_norm": 0.19157831370830536, "learning_rate": 0.002, "loss": 2.5486, "step": 292550 }, { "epoch": 0.5828445747800587, "grad_norm": 0.1725407838821411, "learning_rate": 0.002, "loss": 2.5585, "step": 292560 }, { "epoch": 0.5828644970036976, "grad_norm": 0.1697666347026825, "learning_rate": 0.002, "loss": 2.5578, "step": 292570 }, { "epoch": 0.5828844192273365, "grad_norm": 0.1633087396621704, "learning_rate": 0.002, "loss": 2.5531, "step": 292580 }, { "epoch": 0.5829043414509754, "grad_norm": 0.17383907735347748, "learning_rate": 0.002, "loss": 2.5417, "step": 292590 }, { "epoch": 0.5829242636746143, "grad_norm": 0.18612466752529144, "learning_rate": 0.002, "loss": 2.5486, "step": 292600 }, { "epoch": 0.5829441858982533, "grad_norm": 0.22474391758441925, "learning_rate": 0.002, "loss": 2.5447, "step": 292610 }, { "epoch": 0.5829641081218921, "grad_norm": 0.16468727588653564, "learning_rate": 0.002, "loss": 2.5388, "step": 292620 }, { "epoch": 0.582984030345531, "grad_norm": 0.1656142771244049, "learning_rate": 0.002, "loss": 2.5626, "step": 292630 }, { "epoch": 0.5830039525691699, "grad_norm": 0.1525004506111145, "learning_rate": 0.002, "loss": 2.5551, "step": 292640 }, { "epoch": 0.5830238747928089, "grad_norm": 0.17660754919052124, "learning_rate": 0.002, "loss": 2.5632, "step": 292650 }, { "epoch": 0.5830437970164478, "grad_norm": 0.18352624773979187, "learning_rate": 0.002, "loss": 2.5533, "step": 292660 }, { "epoch": 0.5830637192400867, "grad_norm": 0.16206900775432587, "learning_rate": 0.002, "loss": 2.5477, "step": 292670 }, { "epoch": 0.5830836414637256, "grad_norm": 0.18795904517173767, "learning_rate": 0.002, "loss": 2.5612, "step": 292680 }, { "epoch": 0.5831035636873645, "grad_norm": 0.17997853457927704, "learning_rate": 0.002, "loss": 2.5545, "step": 292690 }, { "epoch": 0.5831234859110035, "grad_norm": 0.16594982147216797, "learning_rate": 0.002, "loss": 2.5626, "step": 292700 }, { "epoch": 0.5831434081346424, "grad_norm": 0.1841261237859726, "learning_rate": 0.002, "loss": 2.557, "step": 292710 }, { "epoch": 0.5831633303582813, "grad_norm": 0.15462742745876312, "learning_rate": 0.002, "loss": 2.5413, "step": 292720 }, { "epoch": 0.5831832525819202, "grad_norm": 0.1596755087375641, "learning_rate": 0.002, "loss": 2.5529, "step": 292730 }, { "epoch": 0.583203174805559, "grad_norm": 0.18802011013031006, "learning_rate": 0.002, "loss": 2.5726, "step": 292740 }, { "epoch": 0.5832230970291981, "grad_norm": 0.18266761302947998, "learning_rate": 0.002, "loss": 2.5678, "step": 292750 }, { "epoch": 0.583243019252837, "grad_norm": 0.16332049667835236, "learning_rate": 0.002, "loss": 2.5571, "step": 292760 }, { "epoch": 0.5832629414764758, "grad_norm": 0.15222232043743134, "learning_rate": 0.002, "loss": 2.5529, "step": 292770 }, { "epoch": 0.5832828637001147, "grad_norm": 0.17990681529045105, "learning_rate": 0.002, "loss": 2.5536, "step": 292780 }, { "epoch": 0.5833027859237536, "grad_norm": 0.16326533257961273, "learning_rate": 0.002, "loss": 2.5491, "step": 292790 }, { "epoch": 0.5833227081473926, "grad_norm": 0.18618857860565186, "learning_rate": 0.002, "loss": 2.5446, "step": 292800 }, { "epoch": 0.5833426303710315, "grad_norm": 0.1509828120470047, "learning_rate": 0.002, "loss": 2.5533, "step": 292810 }, { "epoch": 0.5833625525946704, "grad_norm": 0.17757025361061096, "learning_rate": 0.002, "loss": 2.5599, "step": 292820 }, { "epoch": 0.5833824748183093, "grad_norm": 0.1703789085149765, "learning_rate": 0.002, "loss": 2.5497, "step": 292830 }, { "epoch": 0.5834023970419482, "grad_norm": 0.14204995334148407, "learning_rate": 0.002, "loss": 2.539, "step": 292840 }, { "epoch": 0.5834223192655872, "grad_norm": 0.18745321035385132, "learning_rate": 0.002, "loss": 2.5533, "step": 292850 }, { "epoch": 0.5834422414892261, "grad_norm": 0.1585714966058731, "learning_rate": 0.002, "loss": 2.5622, "step": 292860 }, { "epoch": 0.583462163712865, "grad_norm": 0.14587141573429108, "learning_rate": 0.002, "loss": 2.54, "step": 292870 }, { "epoch": 0.5834820859365039, "grad_norm": 0.2472033053636551, "learning_rate": 0.002, "loss": 2.5393, "step": 292880 }, { "epoch": 0.5835020081601427, "grad_norm": 0.14110969007015228, "learning_rate": 0.002, "loss": 2.5605, "step": 292890 }, { "epoch": 0.5835219303837818, "grad_norm": 0.1694953292608261, "learning_rate": 0.002, "loss": 2.548, "step": 292900 }, { "epoch": 0.5835418526074206, "grad_norm": 0.1710408627986908, "learning_rate": 0.002, "loss": 2.5545, "step": 292910 }, { "epoch": 0.5835617748310595, "grad_norm": 0.1625128835439682, "learning_rate": 0.002, "loss": 2.5641, "step": 292920 }, { "epoch": 0.5835816970546984, "grad_norm": 0.17130063474178314, "learning_rate": 0.002, "loss": 2.5538, "step": 292930 }, { "epoch": 0.5836016192783374, "grad_norm": 0.15183086693286896, "learning_rate": 0.002, "loss": 2.5695, "step": 292940 }, { "epoch": 0.5836215415019763, "grad_norm": 0.29504716396331787, "learning_rate": 0.002, "loss": 2.5508, "step": 292950 }, { "epoch": 0.5836414637256152, "grad_norm": 0.1452110856771469, "learning_rate": 0.002, "loss": 2.5629, "step": 292960 }, { "epoch": 0.5836613859492541, "grad_norm": 0.18297579884529114, "learning_rate": 0.002, "loss": 2.5604, "step": 292970 }, { "epoch": 0.583681308172893, "grad_norm": 0.18127760291099548, "learning_rate": 0.002, "loss": 2.5477, "step": 292980 }, { "epoch": 0.583701230396532, "grad_norm": 0.15487715601921082, "learning_rate": 0.002, "loss": 2.5432, "step": 292990 }, { "epoch": 0.5837211526201709, "grad_norm": 0.15548717975616455, "learning_rate": 0.002, "loss": 2.5561, "step": 293000 }, { "epoch": 0.5837410748438098, "grad_norm": 0.14197984337806702, "learning_rate": 0.002, "loss": 2.5566, "step": 293010 }, { "epoch": 0.5837609970674487, "grad_norm": 0.18328499794006348, "learning_rate": 0.002, "loss": 2.5612, "step": 293020 }, { "epoch": 0.5837809192910876, "grad_norm": 0.19455505907535553, "learning_rate": 0.002, "loss": 2.5528, "step": 293030 }, { "epoch": 0.5838008415147266, "grad_norm": 0.1768016815185547, "learning_rate": 0.002, "loss": 2.5588, "step": 293040 }, { "epoch": 0.5838207637383654, "grad_norm": 0.1602901816368103, "learning_rate": 0.002, "loss": 2.5556, "step": 293050 }, { "epoch": 0.5838406859620043, "grad_norm": 0.1779758185148239, "learning_rate": 0.002, "loss": 2.5665, "step": 293060 }, { "epoch": 0.5838606081856432, "grad_norm": 0.19016267359256744, "learning_rate": 0.002, "loss": 2.5482, "step": 293070 }, { "epoch": 0.5838805304092821, "grad_norm": 0.17030870914459229, "learning_rate": 0.002, "loss": 2.5605, "step": 293080 }, { "epoch": 0.5839004526329211, "grad_norm": 0.17005078494548798, "learning_rate": 0.002, "loss": 2.564, "step": 293090 }, { "epoch": 0.58392037485656, "grad_norm": 0.15857332944869995, "learning_rate": 0.002, "loss": 2.5546, "step": 293100 }, { "epoch": 0.5839402970801989, "grad_norm": 0.15018588304519653, "learning_rate": 0.002, "loss": 2.5711, "step": 293110 }, { "epoch": 0.5839602193038378, "grad_norm": 0.17093393206596375, "learning_rate": 0.002, "loss": 2.5427, "step": 293120 }, { "epoch": 0.5839801415274767, "grad_norm": 0.16444016993045807, "learning_rate": 0.002, "loss": 2.5573, "step": 293130 }, { "epoch": 0.5840000637511157, "grad_norm": 0.2119719237089157, "learning_rate": 0.002, "loss": 2.5472, "step": 293140 }, { "epoch": 0.5840199859747546, "grad_norm": 0.14548766613006592, "learning_rate": 0.002, "loss": 2.5498, "step": 293150 }, { "epoch": 0.5840399081983935, "grad_norm": 0.18780352175235748, "learning_rate": 0.002, "loss": 2.5564, "step": 293160 }, { "epoch": 0.5840598304220324, "grad_norm": 0.15450221300125122, "learning_rate": 0.002, "loss": 2.5477, "step": 293170 }, { "epoch": 0.5840797526456712, "grad_norm": 0.22264082729816437, "learning_rate": 0.002, "loss": 2.5534, "step": 293180 }, { "epoch": 0.5840996748693102, "grad_norm": 0.14136555790901184, "learning_rate": 0.002, "loss": 2.5585, "step": 293190 }, { "epoch": 0.5841195970929491, "grad_norm": 0.1462574601173401, "learning_rate": 0.002, "loss": 2.5457, "step": 293200 }, { "epoch": 0.584139519316588, "grad_norm": 0.1649979203939438, "learning_rate": 0.002, "loss": 2.5704, "step": 293210 }, { "epoch": 0.5841594415402269, "grad_norm": 0.16261647641658783, "learning_rate": 0.002, "loss": 2.5572, "step": 293220 }, { "epoch": 0.5841793637638659, "grad_norm": 0.17677593231201172, "learning_rate": 0.002, "loss": 2.5523, "step": 293230 }, { "epoch": 0.5841992859875048, "grad_norm": 0.1423226296901703, "learning_rate": 0.002, "loss": 2.5529, "step": 293240 }, { "epoch": 0.5842192082111437, "grad_norm": 0.19005335867404938, "learning_rate": 0.002, "loss": 2.556, "step": 293250 }, { "epoch": 0.5842391304347826, "grad_norm": 0.1496182084083557, "learning_rate": 0.002, "loss": 2.5589, "step": 293260 }, { "epoch": 0.5842590526584215, "grad_norm": 0.1949325054883957, "learning_rate": 0.002, "loss": 2.5451, "step": 293270 }, { "epoch": 0.5842789748820605, "grad_norm": 0.18043243885040283, "learning_rate": 0.002, "loss": 2.542, "step": 293280 }, { "epoch": 0.5842988971056994, "grad_norm": 0.18333256244659424, "learning_rate": 0.002, "loss": 2.5491, "step": 293290 }, { "epoch": 0.5843188193293383, "grad_norm": 0.1678411364555359, "learning_rate": 0.002, "loss": 2.5475, "step": 293300 }, { "epoch": 0.5843387415529772, "grad_norm": 0.1979486644268036, "learning_rate": 0.002, "loss": 2.5462, "step": 293310 }, { "epoch": 0.584358663776616, "grad_norm": 0.1622227430343628, "learning_rate": 0.002, "loss": 2.5472, "step": 293320 }, { "epoch": 0.584378586000255, "grad_norm": 0.1940252035856247, "learning_rate": 0.002, "loss": 2.5653, "step": 293330 }, { "epoch": 0.5843985082238939, "grad_norm": 0.1549386829137802, "learning_rate": 0.002, "loss": 2.5609, "step": 293340 }, { "epoch": 0.5844184304475328, "grad_norm": 0.14892767369747162, "learning_rate": 0.002, "loss": 2.5561, "step": 293350 }, { "epoch": 0.5844383526711717, "grad_norm": 0.17135880887508392, "learning_rate": 0.002, "loss": 2.569, "step": 293360 }, { "epoch": 0.5844582748948106, "grad_norm": 0.17812615633010864, "learning_rate": 0.002, "loss": 2.5584, "step": 293370 }, { "epoch": 0.5844781971184496, "grad_norm": 0.15980473160743713, "learning_rate": 0.002, "loss": 2.5514, "step": 293380 }, { "epoch": 0.5844981193420885, "grad_norm": 0.1506625860929489, "learning_rate": 0.002, "loss": 2.5582, "step": 293390 }, { "epoch": 0.5845180415657274, "grad_norm": 0.15680742263793945, "learning_rate": 0.002, "loss": 2.5683, "step": 293400 }, { "epoch": 0.5845379637893663, "grad_norm": 0.18834151327610016, "learning_rate": 0.002, "loss": 2.5522, "step": 293410 }, { "epoch": 0.5845578860130052, "grad_norm": 0.17051751911640167, "learning_rate": 0.002, "loss": 2.5501, "step": 293420 }, { "epoch": 0.5845778082366442, "grad_norm": 0.18186038732528687, "learning_rate": 0.002, "loss": 2.5548, "step": 293430 }, { "epoch": 0.5845977304602831, "grad_norm": 0.16780059039592743, "learning_rate": 0.002, "loss": 2.5605, "step": 293440 }, { "epoch": 0.584617652683922, "grad_norm": 0.14715956151485443, "learning_rate": 0.002, "loss": 2.5567, "step": 293450 }, { "epoch": 0.5846375749075609, "grad_norm": 0.1817338615655899, "learning_rate": 0.002, "loss": 2.5658, "step": 293460 }, { "epoch": 0.5846574971311997, "grad_norm": 0.15863485634326935, "learning_rate": 0.002, "loss": 2.5366, "step": 293470 }, { "epoch": 0.5846774193548387, "grad_norm": 0.17149080336093903, "learning_rate": 0.002, "loss": 2.5545, "step": 293480 }, { "epoch": 0.5846973415784776, "grad_norm": 0.16748766601085663, "learning_rate": 0.002, "loss": 2.5562, "step": 293490 }, { "epoch": 0.5847172638021165, "grad_norm": 0.1811574399471283, "learning_rate": 0.002, "loss": 2.5502, "step": 293500 }, { "epoch": 0.5847371860257554, "grad_norm": 0.17442971467971802, "learning_rate": 0.002, "loss": 2.5619, "step": 293510 }, { "epoch": 0.5847571082493944, "grad_norm": 0.15437215566635132, "learning_rate": 0.002, "loss": 2.5451, "step": 293520 }, { "epoch": 0.5847770304730333, "grad_norm": 0.16341719031333923, "learning_rate": 0.002, "loss": 2.549, "step": 293530 }, { "epoch": 0.5847969526966722, "grad_norm": 0.18077552318572998, "learning_rate": 0.002, "loss": 2.5509, "step": 293540 }, { "epoch": 0.5848168749203111, "grad_norm": 0.14548631012439728, "learning_rate": 0.002, "loss": 2.549, "step": 293550 }, { "epoch": 0.58483679714395, "grad_norm": 0.18934930860996246, "learning_rate": 0.002, "loss": 2.5545, "step": 293560 }, { "epoch": 0.584856719367589, "grad_norm": 0.13171464204788208, "learning_rate": 0.002, "loss": 2.5605, "step": 293570 }, { "epoch": 0.5848766415912279, "grad_norm": 0.17056356370449066, "learning_rate": 0.002, "loss": 2.5682, "step": 293580 }, { "epoch": 0.5848965638148668, "grad_norm": 0.21252703666687012, "learning_rate": 0.002, "loss": 2.5515, "step": 293590 }, { "epoch": 0.5849164860385057, "grad_norm": 0.1497868299484253, "learning_rate": 0.002, "loss": 2.567, "step": 293600 }, { "epoch": 0.5849364082621445, "grad_norm": 0.1602919101715088, "learning_rate": 0.002, "loss": 2.5522, "step": 293610 }, { "epoch": 0.5849563304857835, "grad_norm": 0.16978560388088226, "learning_rate": 0.002, "loss": 2.5422, "step": 293620 }, { "epoch": 0.5849762527094224, "grad_norm": 0.1769094616174698, "learning_rate": 0.002, "loss": 2.5644, "step": 293630 }, { "epoch": 0.5849961749330613, "grad_norm": 0.1658349186182022, "learning_rate": 0.002, "loss": 2.5521, "step": 293640 }, { "epoch": 0.5850160971567002, "grad_norm": 0.1765822172164917, "learning_rate": 0.002, "loss": 2.5584, "step": 293650 }, { "epoch": 0.5850360193803391, "grad_norm": 0.18091969192028046, "learning_rate": 0.002, "loss": 2.5616, "step": 293660 }, { "epoch": 0.5850559416039781, "grad_norm": 0.17340222001075745, "learning_rate": 0.002, "loss": 2.5539, "step": 293670 }, { "epoch": 0.585075863827617, "grad_norm": 0.17353938519954681, "learning_rate": 0.002, "loss": 2.5365, "step": 293680 }, { "epoch": 0.5850957860512559, "grad_norm": 0.1465996354818344, "learning_rate": 0.002, "loss": 2.5655, "step": 293690 }, { "epoch": 0.5851157082748948, "grad_norm": 0.1947689652442932, "learning_rate": 0.002, "loss": 2.561, "step": 293700 }, { "epoch": 0.5851356304985337, "grad_norm": 0.1776936948299408, "learning_rate": 0.002, "loss": 2.5616, "step": 293710 }, { "epoch": 0.5851555527221727, "grad_norm": 0.1582491546869278, "learning_rate": 0.002, "loss": 2.5656, "step": 293720 }, { "epoch": 0.5851754749458116, "grad_norm": 0.147243931889534, "learning_rate": 0.002, "loss": 2.549, "step": 293730 }, { "epoch": 0.5851953971694505, "grad_norm": 0.1881961226463318, "learning_rate": 0.002, "loss": 2.5587, "step": 293740 }, { "epoch": 0.5852153193930894, "grad_norm": 0.15614284574985504, "learning_rate": 0.002, "loss": 2.5593, "step": 293750 }, { "epoch": 0.5852352416167282, "grad_norm": 0.18670475482940674, "learning_rate": 0.002, "loss": 2.5492, "step": 293760 }, { "epoch": 0.5852551638403672, "grad_norm": 0.19193825125694275, "learning_rate": 0.002, "loss": 2.5411, "step": 293770 }, { "epoch": 0.5852750860640061, "grad_norm": 0.14258445799350739, "learning_rate": 0.002, "loss": 2.5664, "step": 293780 }, { "epoch": 0.585295008287645, "grad_norm": 0.1512601226568222, "learning_rate": 0.002, "loss": 2.5625, "step": 293790 }, { "epoch": 0.5853149305112839, "grad_norm": 0.19566041231155396, "learning_rate": 0.002, "loss": 2.5746, "step": 293800 }, { "epoch": 0.5853348527349228, "grad_norm": 0.16185607016086578, "learning_rate": 0.002, "loss": 2.5423, "step": 293810 }, { "epoch": 0.5853547749585618, "grad_norm": 0.16046828031539917, "learning_rate": 0.002, "loss": 2.5599, "step": 293820 }, { "epoch": 0.5853746971822007, "grad_norm": 0.15019391477108002, "learning_rate": 0.002, "loss": 2.5497, "step": 293830 }, { "epoch": 0.5853946194058396, "grad_norm": 0.21550658345222473, "learning_rate": 0.002, "loss": 2.5678, "step": 293840 }, { "epoch": 0.5854145416294785, "grad_norm": 0.1619943529367447, "learning_rate": 0.002, "loss": 2.564, "step": 293850 }, { "epoch": 0.5854344638531175, "grad_norm": 0.21168427169322968, "learning_rate": 0.002, "loss": 2.5561, "step": 293860 }, { "epoch": 0.5854543860767564, "grad_norm": 0.14833098649978638, "learning_rate": 0.002, "loss": 2.556, "step": 293870 }, { "epoch": 0.5854743083003953, "grad_norm": 0.1510903239250183, "learning_rate": 0.002, "loss": 2.55, "step": 293880 }, { "epoch": 0.5854942305240342, "grad_norm": 0.18763047456741333, "learning_rate": 0.002, "loss": 2.5625, "step": 293890 }, { "epoch": 0.585514152747673, "grad_norm": 0.16488976776599884, "learning_rate": 0.002, "loss": 2.5482, "step": 293900 }, { "epoch": 0.585534074971312, "grad_norm": 0.19477614760398865, "learning_rate": 0.002, "loss": 2.5613, "step": 293910 }, { "epoch": 0.5855539971949509, "grad_norm": 0.16111141443252563, "learning_rate": 0.002, "loss": 2.5659, "step": 293920 }, { "epoch": 0.5855739194185898, "grad_norm": 0.1525183469057083, "learning_rate": 0.002, "loss": 2.5505, "step": 293930 }, { "epoch": 0.5855938416422287, "grad_norm": 0.17668333649635315, "learning_rate": 0.002, "loss": 2.548, "step": 293940 }, { "epoch": 0.5856137638658676, "grad_norm": 0.17080388963222504, "learning_rate": 0.002, "loss": 2.5569, "step": 293950 }, { "epoch": 0.5856336860895066, "grad_norm": 0.1761157065629959, "learning_rate": 0.002, "loss": 2.561, "step": 293960 }, { "epoch": 0.5856536083131455, "grad_norm": 0.17444287240505219, "learning_rate": 0.002, "loss": 2.5586, "step": 293970 }, { "epoch": 0.5856735305367844, "grad_norm": 0.17353969812393188, "learning_rate": 0.002, "loss": 2.5479, "step": 293980 }, { "epoch": 0.5856934527604233, "grad_norm": 0.13644741475582123, "learning_rate": 0.002, "loss": 2.5593, "step": 293990 }, { "epoch": 0.5857133749840622, "grad_norm": 0.20659813284873962, "learning_rate": 0.002, "loss": 2.5612, "step": 294000 }, { "epoch": 0.5857332972077012, "grad_norm": 0.16768121719360352, "learning_rate": 0.002, "loss": 2.5666, "step": 294010 }, { "epoch": 0.5857532194313401, "grad_norm": 0.14910860359668732, "learning_rate": 0.002, "loss": 2.5684, "step": 294020 }, { "epoch": 0.585773141654979, "grad_norm": 0.18440578877925873, "learning_rate": 0.002, "loss": 2.5535, "step": 294030 }, { "epoch": 0.5857930638786178, "grad_norm": 0.1439274400472641, "learning_rate": 0.002, "loss": 2.5504, "step": 294040 }, { "epoch": 0.5858129861022567, "grad_norm": 0.14702998101711273, "learning_rate": 0.002, "loss": 2.5414, "step": 294050 }, { "epoch": 0.5858329083258957, "grad_norm": 0.21213959157466888, "learning_rate": 0.002, "loss": 2.5618, "step": 294060 }, { "epoch": 0.5858528305495346, "grad_norm": 0.1555112600326538, "learning_rate": 0.002, "loss": 2.5566, "step": 294070 }, { "epoch": 0.5858727527731735, "grad_norm": 0.16404671967029572, "learning_rate": 0.002, "loss": 2.5524, "step": 294080 }, { "epoch": 0.5858926749968124, "grad_norm": 0.15489274263381958, "learning_rate": 0.002, "loss": 2.564, "step": 294090 }, { "epoch": 0.5859125972204513, "grad_norm": 0.1730518490076065, "learning_rate": 0.002, "loss": 2.5506, "step": 294100 }, { "epoch": 0.5859325194440903, "grad_norm": 0.16218788921833038, "learning_rate": 0.002, "loss": 2.5774, "step": 294110 }, { "epoch": 0.5859524416677292, "grad_norm": 0.18573430180549622, "learning_rate": 0.002, "loss": 2.5573, "step": 294120 }, { "epoch": 0.5859723638913681, "grad_norm": 0.20825502276420593, "learning_rate": 0.002, "loss": 2.5428, "step": 294130 }, { "epoch": 0.585992286115007, "grad_norm": 0.16410240530967712, "learning_rate": 0.002, "loss": 2.5655, "step": 294140 }, { "epoch": 0.586012208338646, "grad_norm": 0.17990106344223022, "learning_rate": 0.002, "loss": 2.5588, "step": 294150 }, { "epoch": 0.5860321305622849, "grad_norm": 0.1647447645664215, "learning_rate": 0.002, "loss": 2.5403, "step": 294160 }, { "epoch": 0.5860520527859238, "grad_norm": 0.1784127950668335, "learning_rate": 0.002, "loss": 2.5495, "step": 294170 }, { "epoch": 0.5860719750095627, "grad_norm": 0.14385715126991272, "learning_rate": 0.002, "loss": 2.549, "step": 294180 }, { "epoch": 0.5860918972332015, "grad_norm": 0.15103299915790558, "learning_rate": 0.002, "loss": 2.5722, "step": 294190 }, { "epoch": 0.5861118194568405, "grad_norm": 0.17443999648094177, "learning_rate": 0.002, "loss": 2.5694, "step": 294200 }, { "epoch": 0.5861317416804794, "grad_norm": 0.2069498598575592, "learning_rate": 0.002, "loss": 2.5614, "step": 294210 }, { "epoch": 0.5861516639041183, "grad_norm": 0.18661849200725555, "learning_rate": 0.002, "loss": 2.5571, "step": 294220 }, { "epoch": 0.5861715861277572, "grad_norm": 0.13434389233589172, "learning_rate": 0.002, "loss": 2.5536, "step": 294230 }, { "epoch": 0.5861915083513961, "grad_norm": 0.2000839114189148, "learning_rate": 0.002, "loss": 2.5588, "step": 294240 }, { "epoch": 0.5862114305750351, "grad_norm": 0.1485411524772644, "learning_rate": 0.002, "loss": 2.5623, "step": 294250 }, { "epoch": 0.586231352798674, "grad_norm": 0.1529722511768341, "learning_rate": 0.002, "loss": 2.5583, "step": 294260 }, { "epoch": 0.5862512750223129, "grad_norm": 0.15464310348033905, "learning_rate": 0.002, "loss": 2.5566, "step": 294270 }, { "epoch": 0.5862711972459518, "grad_norm": 0.14341871440410614, "learning_rate": 0.002, "loss": 2.5429, "step": 294280 }, { "epoch": 0.5862911194695907, "grad_norm": 0.1864403486251831, "learning_rate": 0.002, "loss": 2.5475, "step": 294290 }, { "epoch": 0.5863110416932297, "grad_norm": 0.20495247840881348, "learning_rate": 0.002, "loss": 2.5766, "step": 294300 }, { "epoch": 0.5863309639168686, "grad_norm": 0.1661427915096283, "learning_rate": 0.002, "loss": 2.5711, "step": 294310 }, { "epoch": 0.5863508861405075, "grad_norm": 0.13947199285030365, "learning_rate": 0.002, "loss": 2.5505, "step": 294320 }, { "epoch": 0.5863708083641463, "grad_norm": 0.20752139389514923, "learning_rate": 0.002, "loss": 2.5388, "step": 294330 }, { "epoch": 0.5863907305877852, "grad_norm": 0.17871366441249847, "learning_rate": 0.002, "loss": 2.5545, "step": 294340 }, { "epoch": 0.5864106528114242, "grad_norm": 0.12048429995775223, "learning_rate": 0.002, "loss": 2.5588, "step": 294350 }, { "epoch": 0.5864305750350631, "grad_norm": 0.18286748230457306, "learning_rate": 0.002, "loss": 2.567, "step": 294360 }, { "epoch": 0.586450497258702, "grad_norm": 0.1458229273557663, "learning_rate": 0.002, "loss": 2.5502, "step": 294370 }, { "epoch": 0.5864704194823409, "grad_norm": 0.16136613488197327, "learning_rate": 0.002, "loss": 2.5577, "step": 294380 }, { "epoch": 0.5864903417059798, "grad_norm": 0.1607799530029297, "learning_rate": 0.002, "loss": 2.5692, "step": 294390 }, { "epoch": 0.5865102639296188, "grad_norm": 0.1771295815706253, "learning_rate": 0.002, "loss": 2.571, "step": 294400 }, { "epoch": 0.5865301861532577, "grad_norm": 0.18582646548748016, "learning_rate": 0.002, "loss": 2.5634, "step": 294410 }, { "epoch": 0.5865501083768966, "grad_norm": 0.1720215380191803, "learning_rate": 0.002, "loss": 2.5763, "step": 294420 }, { "epoch": 0.5865700306005355, "grad_norm": 0.18990598618984222, "learning_rate": 0.002, "loss": 2.5484, "step": 294430 }, { "epoch": 0.5865899528241745, "grad_norm": 0.16441938281059265, "learning_rate": 0.002, "loss": 2.564, "step": 294440 }, { "epoch": 0.5866098750478134, "grad_norm": 0.17629587650299072, "learning_rate": 0.002, "loss": 2.5447, "step": 294450 }, { "epoch": 0.5866297972714523, "grad_norm": 0.2066188007593155, "learning_rate": 0.002, "loss": 2.5662, "step": 294460 }, { "epoch": 0.5866497194950911, "grad_norm": 0.14475959539413452, "learning_rate": 0.002, "loss": 2.557, "step": 294470 }, { "epoch": 0.58666964171873, "grad_norm": 0.17280636727809906, "learning_rate": 0.002, "loss": 2.5515, "step": 294480 }, { "epoch": 0.586689563942369, "grad_norm": 0.1659756898880005, "learning_rate": 0.002, "loss": 2.5528, "step": 294490 }, { "epoch": 0.5867094861660079, "grad_norm": 0.17410163581371307, "learning_rate": 0.002, "loss": 2.5623, "step": 294500 }, { "epoch": 0.5867294083896468, "grad_norm": 0.1616571545600891, "learning_rate": 0.002, "loss": 2.5425, "step": 294510 }, { "epoch": 0.5867493306132857, "grad_norm": 0.16018261015415192, "learning_rate": 0.002, "loss": 2.5586, "step": 294520 }, { "epoch": 0.5867692528369246, "grad_norm": 0.19003422558307648, "learning_rate": 0.002, "loss": 2.5621, "step": 294530 }, { "epoch": 0.5867891750605636, "grad_norm": 0.15513600409030914, "learning_rate": 0.002, "loss": 2.5717, "step": 294540 }, { "epoch": 0.5868090972842025, "grad_norm": 0.17400896549224854, "learning_rate": 0.002, "loss": 2.5628, "step": 294550 }, { "epoch": 0.5868290195078414, "grad_norm": 0.21722017228603363, "learning_rate": 0.002, "loss": 2.5586, "step": 294560 }, { "epoch": 0.5868489417314803, "grad_norm": 0.15998563170433044, "learning_rate": 0.002, "loss": 2.5473, "step": 294570 }, { "epoch": 0.5868688639551192, "grad_norm": 0.161086767911911, "learning_rate": 0.002, "loss": 2.5383, "step": 294580 }, { "epoch": 0.5868887861787582, "grad_norm": 0.15469211339950562, "learning_rate": 0.002, "loss": 2.5408, "step": 294590 }, { "epoch": 0.5869087084023971, "grad_norm": 0.15244317054748535, "learning_rate": 0.002, "loss": 2.5334, "step": 294600 }, { "epoch": 0.586928630626036, "grad_norm": 0.17004334926605225, "learning_rate": 0.002, "loss": 2.5619, "step": 294610 }, { "epoch": 0.5869485528496748, "grad_norm": 0.15360097587108612, "learning_rate": 0.002, "loss": 2.5617, "step": 294620 }, { "epoch": 0.5869684750733137, "grad_norm": 0.1715124100446701, "learning_rate": 0.002, "loss": 2.5591, "step": 294630 }, { "epoch": 0.5869883972969527, "grad_norm": 0.15457744896411896, "learning_rate": 0.002, "loss": 2.5425, "step": 294640 }, { "epoch": 0.5870083195205916, "grad_norm": 0.1722102165222168, "learning_rate": 0.002, "loss": 2.5607, "step": 294650 }, { "epoch": 0.5870282417442305, "grad_norm": 0.18472696840763092, "learning_rate": 0.002, "loss": 2.5541, "step": 294660 }, { "epoch": 0.5870481639678694, "grad_norm": 0.1316348761320114, "learning_rate": 0.002, "loss": 2.5569, "step": 294670 }, { "epoch": 0.5870680861915083, "grad_norm": 0.19013531506061554, "learning_rate": 0.002, "loss": 2.5584, "step": 294680 }, { "epoch": 0.5870880084151473, "grad_norm": 0.16470180451869965, "learning_rate": 0.002, "loss": 2.5632, "step": 294690 }, { "epoch": 0.5871079306387862, "grad_norm": 0.14955230057239532, "learning_rate": 0.002, "loss": 2.551, "step": 294700 }, { "epoch": 0.5871278528624251, "grad_norm": 0.1678304821252823, "learning_rate": 0.002, "loss": 2.5404, "step": 294710 }, { "epoch": 0.587147775086064, "grad_norm": 0.15723583102226257, "learning_rate": 0.002, "loss": 2.5671, "step": 294720 }, { "epoch": 0.587167697309703, "grad_norm": 0.19405114650726318, "learning_rate": 0.002, "loss": 2.5678, "step": 294730 }, { "epoch": 0.5871876195333419, "grad_norm": 0.14437314867973328, "learning_rate": 0.002, "loss": 2.523, "step": 294740 }, { "epoch": 0.5872075417569808, "grad_norm": 0.15849706530570984, "learning_rate": 0.002, "loss": 2.5673, "step": 294750 }, { "epoch": 0.5872274639806196, "grad_norm": 0.16198688745498657, "learning_rate": 0.002, "loss": 2.5579, "step": 294760 }, { "epoch": 0.5872473862042585, "grad_norm": 0.17709313333034515, "learning_rate": 0.002, "loss": 2.5549, "step": 294770 }, { "epoch": 0.5872673084278975, "grad_norm": 0.17983543872833252, "learning_rate": 0.002, "loss": 2.566, "step": 294780 }, { "epoch": 0.5872872306515364, "grad_norm": 0.16422830522060394, "learning_rate": 0.002, "loss": 2.5491, "step": 294790 }, { "epoch": 0.5873071528751753, "grad_norm": 0.17950427532196045, "learning_rate": 0.002, "loss": 2.5707, "step": 294800 }, { "epoch": 0.5873270750988142, "grad_norm": 0.17900943756103516, "learning_rate": 0.002, "loss": 2.5456, "step": 294810 }, { "epoch": 0.5873469973224531, "grad_norm": 0.15254931151866913, "learning_rate": 0.002, "loss": 2.5537, "step": 294820 }, { "epoch": 0.5873669195460921, "grad_norm": 0.19395293295383453, "learning_rate": 0.002, "loss": 2.5639, "step": 294830 }, { "epoch": 0.587386841769731, "grad_norm": 0.16805511713027954, "learning_rate": 0.002, "loss": 2.555, "step": 294840 }, { "epoch": 0.5874067639933699, "grad_norm": 0.15904821455478668, "learning_rate": 0.002, "loss": 2.5442, "step": 294850 }, { "epoch": 0.5874266862170088, "grad_norm": 0.192759707570076, "learning_rate": 0.002, "loss": 2.5551, "step": 294860 }, { "epoch": 0.5874466084406477, "grad_norm": 0.1578710675239563, "learning_rate": 0.002, "loss": 2.5684, "step": 294870 }, { "epoch": 0.5874665306642867, "grad_norm": 0.35591185092926025, "learning_rate": 0.002, "loss": 2.5553, "step": 294880 }, { "epoch": 0.5874864528879256, "grad_norm": 0.15541040897369385, "learning_rate": 0.002, "loss": 2.5618, "step": 294890 }, { "epoch": 0.5875063751115644, "grad_norm": 0.19752247631549835, "learning_rate": 0.002, "loss": 2.5524, "step": 294900 }, { "epoch": 0.5875262973352033, "grad_norm": 0.1797444373369217, "learning_rate": 0.002, "loss": 2.5678, "step": 294910 }, { "epoch": 0.5875462195588422, "grad_norm": 0.13193851709365845, "learning_rate": 0.002, "loss": 2.5647, "step": 294920 }, { "epoch": 0.5875661417824812, "grad_norm": 0.20366300642490387, "learning_rate": 0.002, "loss": 2.5604, "step": 294930 }, { "epoch": 0.5875860640061201, "grad_norm": 0.1388874500989914, "learning_rate": 0.002, "loss": 2.5499, "step": 294940 }, { "epoch": 0.587605986229759, "grad_norm": 0.14360038936138153, "learning_rate": 0.002, "loss": 2.5545, "step": 294950 }, { "epoch": 0.5876259084533979, "grad_norm": 0.18907546997070312, "learning_rate": 0.002, "loss": 2.5595, "step": 294960 }, { "epoch": 0.5876458306770368, "grad_norm": 0.18052616715431213, "learning_rate": 0.002, "loss": 2.5561, "step": 294970 }, { "epoch": 0.5876657529006758, "grad_norm": 0.17384685575962067, "learning_rate": 0.002, "loss": 2.5558, "step": 294980 }, { "epoch": 0.5876856751243147, "grad_norm": 0.169564351439476, "learning_rate": 0.002, "loss": 2.5565, "step": 294990 }, { "epoch": 0.5877055973479536, "grad_norm": 0.16273856163024902, "learning_rate": 0.002, "loss": 2.5679, "step": 295000 }, { "epoch": 0.5877255195715925, "grad_norm": 0.15975922346115112, "learning_rate": 0.002, "loss": 2.5593, "step": 295010 }, { "epoch": 0.5877454417952315, "grad_norm": 0.13769222795963287, "learning_rate": 0.002, "loss": 2.5604, "step": 295020 }, { "epoch": 0.5877653640188704, "grad_norm": 0.13846464455127716, "learning_rate": 0.002, "loss": 2.5461, "step": 295030 }, { "epoch": 0.5877852862425093, "grad_norm": 0.16188238561153412, "learning_rate": 0.002, "loss": 2.5539, "step": 295040 }, { "epoch": 0.5878052084661481, "grad_norm": 0.18002790212631226, "learning_rate": 0.002, "loss": 2.5621, "step": 295050 }, { "epoch": 0.587825130689787, "grad_norm": 0.14381743967533112, "learning_rate": 0.002, "loss": 2.5622, "step": 295060 }, { "epoch": 0.587845052913426, "grad_norm": 0.19244413077831268, "learning_rate": 0.002, "loss": 2.5683, "step": 295070 }, { "epoch": 0.5878649751370649, "grad_norm": 0.17433810234069824, "learning_rate": 0.002, "loss": 2.5583, "step": 295080 }, { "epoch": 0.5878848973607038, "grad_norm": 0.19076372683048248, "learning_rate": 0.002, "loss": 2.5614, "step": 295090 }, { "epoch": 0.5879048195843427, "grad_norm": 0.16055500507354736, "learning_rate": 0.002, "loss": 2.5521, "step": 295100 }, { "epoch": 0.5879247418079816, "grad_norm": 0.15493227541446686, "learning_rate": 0.002, "loss": 2.5728, "step": 295110 }, { "epoch": 0.5879446640316206, "grad_norm": 0.16916517913341522, "learning_rate": 0.002, "loss": 2.5452, "step": 295120 }, { "epoch": 0.5879645862552595, "grad_norm": 0.17478936910629272, "learning_rate": 0.002, "loss": 2.5403, "step": 295130 }, { "epoch": 0.5879845084788984, "grad_norm": 0.14800962805747986, "learning_rate": 0.002, "loss": 2.5578, "step": 295140 }, { "epoch": 0.5880044307025373, "grad_norm": 0.17232009768486023, "learning_rate": 0.002, "loss": 2.5715, "step": 295150 }, { "epoch": 0.5880243529261762, "grad_norm": 0.1696469783782959, "learning_rate": 0.002, "loss": 2.5493, "step": 295160 }, { "epoch": 0.5880442751498152, "grad_norm": 0.16490301489830017, "learning_rate": 0.002, "loss": 2.5674, "step": 295170 }, { "epoch": 0.5880641973734541, "grad_norm": 0.1467268466949463, "learning_rate": 0.002, "loss": 2.5513, "step": 295180 }, { "epoch": 0.588084119597093, "grad_norm": 0.1630958467721939, "learning_rate": 0.002, "loss": 2.5474, "step": 295190 }, { "epoch": 0.5881040418207318, "grad_norm": 0.178868368268013, "learning_rate": 0.002, "loss": 2.556, "step": 295200 }, { "epoch": 0.5881239640443707, "grad_norm": 0.1963280737400055, "learning_rate": 0.002, "loss": 2.5509, "step": 295210 }, { "epoch": 0.5881438862680097, "grad_norm": 0.15025284886360168, "learning_rate": 0.002, "loss": 2.5482, "step": 295220 }, { "epoch": 0.5881638084916486, "grad_norm": 0.14324088394641876, "learning_rate": 0.002, "loss": 2.5479, "step": 295230 }, { "epoch": 0.5881837307152875, "grad_norm": 0.21095405519008636, "learning_rate": 0.002, "loss": 2.5532, "step": 295240 }, { "epoch": 0.5882036529389264, "grad_norm": 0.18799661099910736, "learning_rate": 0.002, "loss": 2.5516, "step": 295250 }, { "epoch": 0.5882235751625653, "grad_norm": 0.15363986790180206, "learning_rate": 0.002, "loss": 2.5545, "step": 295260 }, { "epoch": 0.5882434973862043, "grad_norm": 0.15075525641441345, "learning_rate": 0.002, "loss": 2.5517, "step": 295270 }, { "epoch": 0.5882634196098432, "grad_norm": 0.16793964803218842, "learning_rate": 0.002, "loss": 2.5421, "step": 295280 }, { "epoch": 0.5882833418334821, "grad_norm": 0.18697203695774078, "learning_rate": 0.002, "loss": 2.5558, "step": 295290 }, { "epoch": 0.588303264057121, "grad_norm": 0.15559688210487366, "learning_rate": 0.002, "loss": 2.5605, "step": 295300 }, { "epoch": 0.58832318628076, "grad_norm": 0.18928715586662292, "learning_rate": 0.002, "loss": 2.5486, "step": 295310 }, { "epoch": 0.5883431085043989, "grad_norm": 0.1686767041683197, "learning_rate": 0.002, "loss": 2.5564, "step": 295320 }, { "epoch": 0.5883630307280378, "grad_norm": 0.18325801193714142, "learning_rate": 0.002, "loss": 2.559, "step": 295330 }, { "epoch": 0.5883829529516766, "grad_norm": 0.15306958556175232, "learning_rate": 0.002, "loss": 2.5532, "step": 295340 }, { "epoch": 0.5884028751753155, "grad_norm": 0.1730383038520813, "learning_rate": 0.002, "loss": 2.5694, "step": 295350 }, { "epoch": 0.5884227973989545, "grad_norm": 0.16050879657268524, "learning_rate": 0.002, "loss": 2.5566, "step": 295360 }, { "epoch": 0.5884427196225934, "grad_norm": 0.20945769548416138, "learning_rate": 0.002, "loss": 2.5503, "step": 295370 }, { "epoch": 0.5884626418462323, "grad_norm": 0.14059458673000336, "learning_rate": 0.002, "loss": 2.558, "step": 295380 }, { "epoch": 0.5884825640698712, "grad_norm": 0.15726536512374878, "learning_rate": 0.002, "loss": 2.5705, "step": 295390 }, { "epoch": 0.5885024862935101, "grad_norm": 0.1649731695652008, "learning_rate": 0.002, "loss": 2.5583, "step": 295400 }, { "epoch": 0.5885224085171491, "grad_norm": 0.15414509177207947, "learning_rate": 0.002, "loss": 2.5626, "step": 295410 }, { "epoch": 0.588542330740788, "grad_norm": 0.15880143642425537, "learning_rate": 0.002, "loss": 2.5701, "step": 295420 }, { "epoch": 0.5885622529644269, "grad_norm": 0.17670689523220062, "learning_rate": 0.002, "loss": 2.566, "step": 295430 }, { "epoch": 0.5885821751880658, "grad_norm": 0.1602146178483963, "learning_rate": 0.002, "loss": 2.5512, "step": 295440 }, { "epoch": 0.5886020974117047, "grad_norm": 0.16181685030460358, "learning_rate": 0.002, "loss": 2.562, "step": 295450 }, { "epoch": 0.5886220196353437, "grad_norm": 0.15865923464298248, "learning_rate": 0.002, "loss": 2.542, "step": 295460 }, { "epoch": 0.5886419418589826, "grad_norm": 0.18210692703723907, "learning_rate": 0.002, "loss": 2.5457, "step": 295470 }, { "epoch": 0.5886618640826214, "grad_norm": 0.15151408314704895, "learning_rate": 0.002, "loss": 2.5561, "step": 295480 }, { "epoch": 0.5886817863062603, "grad_norm": 0.17131207883358002, "learning_rate": 0.002, "loss": 2.5638, "step": 295490 }, { "epoch": 0.5887017085298992, "grad_norm": 0.16359536349773407, "learning_rate": 0.002, "loss": 2.5498, "step": 295500 }, { "epoch": 0.5887216307535382, "grad_norm": 0.1620762050151825, "learning_rate": 0.002, "loss": 2.5479, "step": 295510 }, { "epoch": 0.5887415529771771, "grad_norm": 0.1831769198179245, "learning_rate": 0.002, "loss": 2.5551, "step": 295520 }, { "epoch": 0.588761475200816, "grad_norm": 0.18848484754562378, "learning_rate": 0.002, "loss": 2.5492, "step": 295530 }, { "epoch": 0.5887813974244549, "grad_norm": 0.15413491427898407, "learning_rate": 0.002, "loss": 2.5552, "step": 295540 }, { "epoch": 0.5888013196480938, "grad_norm": 0.16613362729549408, "learning_rate": 0.002, "loss": 2.5756, "step": 295550 }, { "epoch": 0.5888212418717328, "grad_norm": 0.19365864992141724, "learning_rate": 0.002, "loss": 2.5601, "step": 295560 }, { "epoch": 0.5888411640953717, "grad_norm": 0.1606748402118683, "learning_rate": 0.002, "loss": 2.5472, "step": 295570 }, { "epoch": 0.5888610863190106, "grad_norm": 0.18089784681797028, "learning_rate": 0.002, "loss": 2.5461, "step": 295580 }, { "epoch": 0.5888810085426495, "grad_norm": 0.16085873544216156, "learning_rate": 0.002, "loss": 2.5532, "step": 295590 }, { "epoch": 0.5889009307662884, "grad_norm": 0.15361353754997253, "learning_rate": 0.002, "loss": 2.5477, "step": 295600 }, { "epoch": 0.5889208529899274, "grad_norm": 0.17447011172771454, "learning_rate": 0.002, "loss": 2.5696, "step": 295610 }, { "epoch": 0.5889407752135662, "grad_norm": 0.17634877562522888, "learning_rate": 0.002, "loss": 2.551, "step": 295620 }, { "epoch": 0.5889606974372051, "grad_norm": 0.210472971200943, "learning_rate": 0.002, "loss": 2.5508, "step": 295630 }, { "epoch": 0.588980619660844, "grad_norm": 0.1710597723722458, "learning_rate": 0.002, "loss": 2.5505, "step": 295640 }, { "epoch": 0.589000541884483, "grad_norm": 0.16326451301574707, "learning_rate": 0.002, "loss": 2.5672, "step": 295650 }, { "epoch": 0.5890204641081219, "grad_norm": 0.15365956723690033, "learning_rate": 0.002, "loss": 2.5499, "step": 295660 }, { "epoch": 0.5890403863317608, "grad_norm": 0.18728211522102356, "learning_rate": 0.002, "loss": 2.562, "step": 295670 }, { "epoch": 0.5890603085553997, "grad_norm": 0.17220169305801392, "learning_rate": 0.002, "loss": 2.5612, "step": 295680 }, { "epoch": 0.5890802307790386, "grad_norm": 0.17156286537647247, "learning_rate": 0.002, "loss": 2.5675, "step": 295690 }, { "epoch": 0.5891001530026776, "grad_norm": 0.17134587466716766, "learning_rate": 0.002, "loss": 2.5485, "step": 295700 }, { "epoch": 0.5891200752263165, "grad_norm": 0.19570091366767883, "learning_rate": 0.002, "loss": 2.5511, "step": 295710 }, { "epoch": 0.5891399974499554, "grad_norm": 0.16669607162475586, "learning_rate": 0.002, "loss": 2.5677, "step": 295720 }, { "epoch": 0.5891599196735943, "grad_norm": 0.17360255122184753, "learning_rate": 0.002, "loss": 2.5405, "step": 295730 }, { "epoch": 0.5891798418972332, "grad_norm": 0.14529944956302643, "learning_rate": 0.002, "loss": 2.5633, "step": 295740 }, { "epoch": 0.5891997641208722, "grad_norm": 0.23826327919960022, "learning_rate": 0.002, "loss": 2.5653, "step": 295750 }, { "epoch": 0.589219686344511, "grad_norm": 0.19372732937335968, "learning_rate": 0.002, "loss": 2.5475, "step": 295760 }, { "epoch": 0.5892396085681499, "grad_norm": 0.16038157045841217, "learning_rate": 0.002, "loss": 2.5565, "step": 295770 }, { "epoch": 0.5892595307917888, "grad_norm": 0.1391245275735855, "learning_rate": 0.002, "loss": 2.5683, "step": 295780 }, { "epoch": 0.5892794530154277, "grad_norm": 0.1517147570848465, "learning_rate": 0.002, "loss": 2.557, "step": 295790 }, { "epoch": 0.5892993752390667, "grad_norm": 0.17369693517684937, "learning_rate": 0.002, "loss": 2.5472, "step": 295800 }, { "epoch": 0.5893192974627056, "grad_norm": 0.1781371533870697, "learning_rate": 0.002, "loss": 2.5656, "step": 295810 }, { "epoch": 0.5893392196863445, "grad_norm": 0.1942821741104126, "learning_rate": 0.002, "loss": 2.5471, "step": 295820 }, { "epoch": 0.5893591419099834, "grad_norm": 0.1460486799478531, "learning_rate": 0.002, "loss": 2.5642, "step": 295830 }, { "epoch": 0.5893790641336223, "grad_norm": 0.15325051546096802, "learning_rate": 0.002, "loss": 2.5496, "step": 295840 }, { "epoch": 0.5893989863572613, "grad_norm": 0.19152629375457764, "learning_rate": 0.002, "loss": 2.5722, "step": 295850 }, { "epoch": 0.5894189085809002, "grad_norm": 0.1576721966266632, "learning_rate": 0.002, "loss": 2.569, "step": 295860 }, { "epoch": 0.5894388308045391, "grad_norm": 0.17082108557224274, "learning_rate": 0.002, "loss": 2.554, "step": 295870 }, { "epoch": 0.589458753028178, "grad_norm": 0.1547440141439438, "learning_rate": 0.002, "loss": 2.5547, "step": 295880 }, { "epoch": 0.5894786752518169, "grad_norm": 0.19205015897750854, "learning_rate": 0.002, "loss": 2.5495, "step": 295890 }, { "epoch": 0.5894985974754559, "grad_norm": 0.14404024183750153, "learning_rate": 0.002, "loss": 2.5685, "step": 295900 }, { "epoch": 0.5895185196990947, "grad_norm": 0.17731419205665588, "learning_rate": 0.002, "loss": 2.5586, "step": 295910 }, { "epoch": 0.5895384419227336, "grad_norm": 0.20890265703201294, "learning_rate": 0.002, "loss": 2.5618, "step": 295920 }, { "epoch": 0.5895583641463725, "grad_norm": 0.20370988547801971, "learning_rate": 0.002, "loss": 2.5574, "step": 295930 }, { "epoch": 0.5895782863700115, "grad_norm": 0.15822964906692505, "learning_rate": 0.002, "loss": 2.5339, "step": 295940 }, { "epoch": 0.5895982085936504, "grad_norm": 0.157165065407753, "learning_rate": 0.002, "loss": 2.5516, "step": 295950 }, { "epoch": 0.5896181308172893, "grad_norm": 0.18818631768226624, "learning_rate": 0.002, "loss": 2.5425, "step": 295960 }, { "epoch": 0.5896380530409282, "grad_norm": 0.1655951887369156, "learning_rate": 0.002, "loss": 2.5258, "step": 295970 }, { "epoch": 0.5896579752645671, "grad_norm": 0.1478910744190216, "learning_rate": 0.002, "loss": 2.5636, "step": 295980 }, { "epoch": 0.5896778974882061, "grad_norm": 0.16217103600502014, "learning_rate": 0.002, "loss": 2.5609, "step": 295990 }, { "epoch": 0.589697819711845, "grad_norm": 0.17616544663906097, "learning_rate": 0.002, "loss": 2.5477, "step": 296000 }, { "epoch": 0.5897177419354839, "grad_norm": 0.1652982383966446, "learning_rate": 0.002, "loss": 2.5461, "step": 296010 }, { "epoch": 0.5897376641591228, "grad_norm": 0.21548397839069366, "learning_rate": 0.002, "loss": 2.5627, "step": 296020 }, { "epoch": 0.5897575863827617, "grad_norm": 0.1697302907705307, "learning_rate": 0.002, "loss": 2.5537, "step": 296030 }, { "epoch": 0.5897775086064007, "grad_norm": 0.1400667279958725, "learning_rate": 0.002, "loss": 2.5585, "step": 296040 }, { "epoch": 0.5897974308300395, "grad_norm": 0.2073093205690384, "learning_rate": 0.002, "loss": 2.5639, "step": 296050 }, { "epoch": 0.5898173530536784, "grad_norm": 0.19479262828826904, "learning_rate": 0.002, "loss": 2.5539, "step": 296060 }, { "epoch": 0.5898372752773173, "grad_norm": 0.16689303517341614, "learning_rate": 0.002, "loss": 2.5548, "step": 296070 }, { "epoch": 0.5898571975009562, "grad_norm": 0.14116442203521729, "learning_rate": 0.002, "loss": 2.5623, "step": 296080 }, { "epoch": 0.5898771197245952, "grad_norm": 0.1788661628961563, "learning_rate": 0.002, "loss": 2.54, "step": 296090 }, { "epoch": 0.5898970419482341, "grad_norm": 0.1625576764345169, "learning_rate": 0.002, "loss": 2.5449, "step": 296100 }, { "epoch": 0.589916964171873, "grad_norm": 0.14759956300258636, "learning_rate": 0.002, "loss": 2.5445, "step": 296110 }, { "epoch": 0.5899368863955119, "grad_norm": 0.21524420380592346, "learning_rate": 0.002, "loss": 2.5833, "step": 296120 }, { "epoch": 0.5899568086191508, "grad_norm": 0.16020195186138153, "learning_rate": 0.002, "loss": 2.5692, "step": 296130 }, { "epoch": 0.5899767308427898, "grad_norm": 0.17348584532737732, "learning_rate": 0.002, "loss": 2.5487, "step": 296140 }, { "epoch": 0.5899966530664287, "grad_norm": 0.16938208043575287, "learning_rate": 0.002, "loss": 2.5551, "step": 296150 }, { "epoch": 0.5900165752900676, "grad_norm": 0.1475636512041092, "learning_rate": 0.002, "loss": 2.5522, "step": 296160 }, { "epoch": 0.5900364975137065, "grad_norm": 0.18106548488140106, "learning_rate": 0.002, "loss": 2.5324, "step": 296170 }, { "epoch": 0.5900564197373454, "grad_norm": 0.2298673540353775, "learning_rate": 0.002, "loss": 2.559, "step": 296180 }, { "epoch": 0.5900763419609844, "grad_norm": 0.1649855226278305, "learning_rate": 0.002, "loss": 2.541, "step": 296190 }, { "epoch": 0.5900962641846232, "grad_norm": 0.18066300451755524, "learning_rate": 0.002, "loss": 2.5531, "step": 296200 }, { "epoch": 0.5901161864082621, "grad_norm": 0.15636521577835083, "learning_rate": 0.002, "loss": 2.5445, "step": 296210 }, { "epoch": 0.590136108631901, "grad_norm": 0.16199755668640137, "learning_rate": 0.002, "loss": 2.5474, "step": 296220 }, { "epoch": 0.59015603085554, "grad_norm": 0.2275354415178299, "learning_rate": 0.002, "loss": 2.5658, "step": 296230 }, { "epoch": 0.5901759530791789, "grad_norm": 0.16437222063541412, "learning_rate": 0.002, "loss": 2.5709, "step": 296240 }, { "epoch": 0.5901958753028178, "grad_norm": 0.15320774912834167, "learning_rate": 0.002, "loss": 2.5607, "step": 296250 }, { "epoch": 0.5902157975264567, "grad_norm": 0.15958650410175323, "learning_rate": 0.002, "loss": 2.5629, "step": 296260 }, { "epoch": 0.5902357197500956, "grad_norm": 0.1590263545513153, "learning_rate": 0.002, "loss": 2.5478, "step": 296270 }, { "epoch": 0.5902556419737346, "grad_norm": 0.16278398036956787, "learning_rate": 0.002, "loss": 2.5575, "step": 296280 }, { "epoch": 0.5902755641973735, "grad_norm": 0.19270096719264984, "learning_rate": 0.002, "loss": 2.5411, "step": 296290 }, { "epoch": 0.5902954864210124, "grad_norm": 0.16798053681850433, "learning_rate": 0.002, "loss": 2.5649, "step": 296300 }, { "epoch": 0.5903154086446513, "grad_norm": 0.1569257527589798, "learning_rate": 0.002, "loss": 2.5524, "step": 296310 }, { "epoch": 0.5903353308682902, "grad_norm": 0.19068902730941772, "learning_rate": 0.002, "loss": 2.5672, "step": 296320 }, { "epoch": 0.5903552530919292, "grad_norm": 0.12736834585666656, "learning_rate": 0.002, "loss": 2.5499, "step": 296330 }, { "epoch": 0.590375175315568, "grad_norm": 0.14915470778942108, "learning_rate": 0.002, "loss": 2.5483, "step": 296340 }, { "epoch": 0.5903950975392069, "grad_norm": 0.20806004106998444, "learning_rate": 0.002, "loss": 2.5505, "step": 296350 }, { "epoch": 0.5904150197628458, "grad_norm": 0.19791096448898315, "learning_rate": 0.002, "loss": 2.5441, "step": 296360 }, { "epoch": 0.5904349419864847, "grad_norm": 0.1702050268650055, "learning_rate": 0.002, "loss": 2.5751, "step": 296370 }, { "epoch": 0.5904548642101237, "grad_norm": 0.15695345401763916, "learning_rate": 0.002, "loss": 2.5633, "step": 296380 }, { "epoch": 0.5904747864337626, "grad_norm": 0.15471582114696503, "learning_rate": 0.002, "loss": 2.5612, "step": 296390 }, { "epoch": 0.5904947086574015, "grad_norm": 0.1873467117547989, "learning_rate": 0.002, "loss": 2.5468, "step": 296400 }, { "epoch": 0.5905146308810404, "grad_norm": 0.17275413870811462, "learning_rate": 0.002, "loss": 2.5514, "step": 296410 }, { "epoch": 0.5905345531046793, "grad_norm": 0.15029624104499817, "learning_rate": 0.002, "loss": 2.5561, "step": 296420 }, { "epoch": 0.5905544753283183, "grad_norm": 0.17293044924736023, "learning_rate": 0.002, "loss": 2.5355, "step": 296430 }, { "epoch": 0.5905743975519572, "grad_norm": 0.15659329295158386, "learning_rate": 0.002, "loss": 2.56, "step": 296440 }, { "epoch": 0.5905943197755961, "grad_norm": 0.1710749715566635, "learning_rate": 0.002, "loss": 2.5608, "step": 296450 }, { "epoch": 0.590614241999235, "grad_norm": 0.17699649930000305, "learning_rate": 0.002, "loss": 2.571, "step": 296460 }, { "epoch": 0.5906341642228738, "grad_norm": 0.19935031235218048, "learning_rate": 0.002, "loss": 2.5551, "step": 296470 }, { "epoch": 0.5906540864465128, "grad_norm": 0.18623463809490204, "learning_rate": 0.002, "loss": 2.5534, "step": 296480 }, { "epoch": 0.5906740086701517, "grad_norm": 0.1446717530488968, "learning_rate": 0.002, "loss": 2.5473, "step": 296490 }, { "epoch": 0.5906939308937906, "grad_norm": 0.14951202273368835, "learning_rate": 0.002, "loss": 2.5561, "step": 296500 }, { "epoch": 0.5907138531174295, "grad_norm": 0.17390884459018707, "learning_rate": 0.002, "loss": 2.5474, "step": 296510 }, { "epoch": 0.5907337753410685, "grad_norm": 0.19658680260181427, "learning_rate": 0.002, "loss": 2.5484, "step": 296520 }, { "epoch": 0.5907536975647074, "grad_norm": 0.1923038810491562, "learning_rate": 0.002, "loss": 2.5636, "step": 296530 }, { "epoch": 0.5907736197883463, "grad_norm": 0.1429646611213684, "learning_rate": 0.002, "loss": 2.5437, "step": 296540 }, { "epoch": 0.5907935420119852, "grad_norm": 0.1434108316898346, "learning_rate": 0.002, "loss": 2.5463, "step": 296550 }, { "epoch": 0.5908134642356241, "grad_norm": 0.16034866869449615, "learning_rate": 0.002, "loss": 2.5687, "step": 296560 }, { "epoch": 0.5908333864592631, "grad_norm": 0.17397135496139526, "learning_rate": 0.002, "loss": 2.5576, "step": 296570 }, { "epoch": 0.590853308682902, "grad_norm": 0.1589755415916443, "learning_rate": 0.002, "loss": 2.5568, "step": 296580 }, { "epoch": 0.5908732309065409, "grad_norm": 0.1917722076177597, "learning_rate": 0.002, "loss": 2.5733, "step": 296590 }, { "epoch": 0.5908931531301798, "grad_norm": 0.16921158134937286, "learning_rate": 0.002, "loss": 2.5592, "step": 296600 }, { "epoch": 0.5909130753538187, "grad_norm": 0.1936924010515213, "learning_rate": 0.002, "loss": 2.551, "step": 296610 }, { "epoch": 0.5909329975774577, "grad_norm": 0.1499132364988327, "learning_rate": 0.002, "loss": 2.5562, "step": 296620 }, { "epoch": 0.5909529198010965, "grad_norm": 0.15359432995319366, "learning_rate": 0.002, "loss": 2.5815, "step": 296630 }, { "epoch": 0.5909728420247354, "grad_norm": 0.16313353180885315, "learning_rate": 0.002, "loss": 2.5482, "step": 296640 }, { "epoch": 0.5909927642483743, "grad_norm": 0.149459570646286, "learning_rate": 0.002, "loss": 2.5523, "step": 296650 }, { "epoch": 0.5910126864720132, "grad_norm": 0.19228316843509674, "learning_rate": 0.002, "loss": 2.5719, "step": 296660 }, { "epoch": 0.5910326086956522, "grad_norm": 0.2170107215642929, "learning_rate": 0.002, "loss": 2.5529, "step": 296670 }, { "epoch": 0.5910525309192911, "grad_norm": 0.1838182508945465, "learning_rate": 0.002, "loss": 2.5461, "step": 296680 }, { "epoch": 0.59107245314293, "grad_norm": 0.18536390364170074, "learning_rate": 0.002, "loss": 2.5525, "step": 296690 }, { "epoch": 0.5910923753665689, "grad_norm": 0.17813897132873535, "learning_rate": 0.002, "loss": 2.5755, "step": 296700 }, { "epoch": 0.5911122975902078, "grad_norm": 0.19226650893688202, "learning_rate": 0.002, "loss": 2.5681, "step": 296710 }, { "epoch": 0.5911322198138468, "grad_norm": 0.18096031248569489, "learning_rate": 0.002, "loss": 2.5419, "step": 296720 }, { "epoch": 0.5911521420374857, "grad_norm": 0.15634028613567352, "learning_rate": 0.002, "loss": 2.5494, "step": 296730 }, { "epoch": 0.5911720642611246, "grad_norm": 0.1405101865530014, "learning_rate": 0.002, "loss": 2.5467, "step": 296740 }, { "epoch": 0.5911919864847635, "grad_norm": 0.1826961487531662, "learning_rate": 0.002, "loss": 2.5635, "step": 296750 }, { "epoch": 0.5912119087084023, "grad_norm": 0.14311078190803528, "learning_rate": 0.002, "loss": 2.5569, "step": 296760 }, { "epoch": 0.5912318309320413, "grad_norm": 0.16967225074768066, "learning_rate": 0.002, "loss": 2.5545, "step": 296770 }, { "epoch": 0.5912517531556802, "grad_norm": 0.1917492300271988, "learning_rate": 0.002, "loss": 2.5557, "step": 296780 }, { "epoch": 0.5912716753793191, "grad_norm": 0.18054044246673584, "learning_rate": 0.002, "loss": 2.5609, "step": 296790 }, { "epoch": 0.591291597602958, "grad_norm": 0.1779407560825348, "learning_rate": 0.002, "loss": 2.5644, "step": 296800 }, { "epoch": 0.591311519826597, "grad_norm": 0.1786029189825058, "learning_rate": 0.002, "loss": 2.5567, "step": 296810 }, { "epoch": 0.5913314420502359, "grad_norm": 0.1596044898033142, "learning_rate": 0.002, "loss": 2.5593, "step": 296820 }, { "epoch": 0.5913513642738748, "grad_norm": 0.17570297420024872, "learning_rate": 0.002, "loss": 2.5557, "step": 296830 }, { "epoch": 0.5913712864975137, "grad_norm": 0.13294245302677155, "learning_rate": 0.002, "loss": 2.5583, "step": 296840 }, { "epoch": 0.5913912087211526, "grad_norm": 0.16979028284549713, "learning_rate": 0.002, "loss": 2.5629, "step": 296850 }, { "epoch": 0.5914111309447916, "grad_norm": 0.18869417905807495, "learning_rate": 0.002, "loss": 2.5595, "step": 296860 }, { "epoch": 0.5914310531684305, "grad_norm": 0.1452418863773346, "learning_rate": 0.002, "loss": 2.5591, "step": 296870 }, { "epoch": 0.5914509753920694, "grad_norm": 0.15247230231761932, "learning_rate": 0.002, "loss": 2.5513, "step": 296880 }, { "epoch": 0.5914708976157083, "grad_norm": 0.14286962151527405, "learning_rate": 0.002, "loss": 2.5379, "step": 296890 }, { "epoch": 0.5914908198393471, "grad_norm": 0.21743616461753845, "learning_rate": 0.002, "loss": 2.5627, "step": 296900 }, { "epoch": 0.5915107420629862, "grad_norm": 0.15277649462223053, "learning_rate": 0.002, "loss": 2.5519, "step": 296910 }, { "epoch": 0.591530664286625, "grad_norm": 0.1473001092672348, "learning_rate": 0.002, "loss": 2.548, "step": 296920 }, { "epoch": 0.5915505865102639, "grad_norm": 0.1637621819972992, "learning_rate": 0.002, "loss": 2.5583, "step": 296930 }, { "epoch": 0.5915705087339028, "grad_norm": 0.14642490446567535, "learning_rate": 0.002, "loss": 2.5617, "step": 296940 }, { "epoch": 0.5915904309575417, "grad_norm": 0.19292613863945007, "learning_rate": 0.002, "loss": 2.5641, "step": 296950 }, { "epoch": 0.5916103531811807, "grad_norm": 0.19722577929496765, "learning_rate": 0.002, "loss": 2.5698, "step": 296960 }, { "epoch": 0.5916302754048196, "grad_norm": 0.16433443129062653, "learning_rate": 0.002, "loss": 2.5384, "step": 296970 }, { "epoch": 0.5916501976284585, "grad_norm": 0.176587775349617, "learning_rate": 0.002, "loss": 2.5497, "step": 296980 }, { "epoch": 0.5916701198520974, "grad_norm": 0.15662041306495667, "learning_rate": 0.002, "loss": 2.5612, "step": 296990 }, { "epoch": 0.5916900420757363, "grad_norm": 0.1643158495426178, "learning_rate": 0.002, "loss": 2.5595, "step": 297000 }, { "epoch": 0.5917099642993753, "grad_norm": 0.16714882850646973, "learning_rate": 0.002, "loss": 2.5371, "step": 297010 }, { "epoch": 0.5917298865230142, "grad_norm": 0.17723853886127472, "learning_rate": 0.002, "loss": 2.5532, "step": 297020 }, { "epoch": 0.5917498087466531, "grad_norm": 0.1803097277879715, "learning_rate": 0.002, "loss": 2.5694, "step": 297030 }, { "epoch": 0.591769730970292, "grad_norm": 0.21008126437664032, "learning_rate": 0.002, "loss": 2.5597, "step": 297040 }, { "epoch": 0.5917896531939308, "grad_norm": 0.13628749549388885, "learning_rate": 0.002, "loss": 2.5657, "step": 297050 }, { "epoch": 0.5918095754175698, "grad_norm": 0.1539224237203598, "learning_rate": 0.002, "loss": 2.5591, "step": 297060 }, { "epoch": 0.5918294976412087, "grad_norm": 0.17056947946548462, "learning_rate": 0.002, "loss": 2.5577, "step": 297070 }, { "epoch": 0.5918494198648476, "grad_norm": 0.1747661679983139, "learning_rate": 0.002, "loss": 2.5474, "step": 297080 }, { "epoch": 0.5918693420884865, "grad_norm": 0.15794996917247772, "learning_rate": 0.002, "loss": 2.5587, "step": 297090 }, { "epoch": 0.5918892643121254, "grad_norm": 0.15356698632240295, "learning_rate": 0.002, "loss": 2.5502, "step": 297100 }, { "epoch": 0.5919091865357644, "grad_norm": 0.21100983023643494, "learning_rate": 0.002, "loss": 2.5589, "step": 297110 }, { "epoch": 0.5919291087594033, "grad_norm": 0.17005033791065216, "learning_rate": 0.002, "loss": 2.5536, "step": 297120 }, { "epoch": 0.5919490309830422, "grad_norm": 0.145511656999588, "learning_rate": 0.002, "loss": 2.5426, "step": 297130 }, { "epoch": 0.5919689532066811, "grad_norm": 0.16173847019672394, "learning_rate": 0.002, "loss": 2.5558, "step": 297140 }, { "epoch": 0.5919888754303201, "grad_norm": 0.12869378924369812, "learning_rate": 0.002, "loss": 2.545, "step": 297150 }, { "epoch": 0.592008797653959, "grad_norm": 0.19204209744930267, "learning_rate": 0.002, "loss": 2.5554, "step": 297160 }, { "epoch": 0.5920287198775979, "grad_norm": 0.15144626796245575, "learning_rate": 0.002, "loss": 2.5652, "step": 297170 }, { "epoch": 0.5920486421012368, "grad_norm": 0.17529556155204773, "learning_rate": 0.002, "loss": 2.5645, "step": 297180 }, { "epoch": 0.5920685643248756, "grad_norm": 0.17696475982666016, "learning_rate": 0.002, "loss": 2.5584, "step": 297190 }, { "epoch": 0.5920884865485146, "grad_norm": 0.1629180908203125, "learning_rate": 0.002, "loss": 2.5384, "step": 297200 }, { "epoch": 0.5921084087721535, "grad_norm": 0.18703213334083557, "learning_rate": 0.002, "loss": 2.5467, "step": 297210 }, { "epoch": 0.5921283309957924, "grad_norm": 0.1583642214536667, "learning_rate": 0.002, "loss": 2.5502, "step": 297220 }, { "epoch": 0.5921482532194313, "grad_norm": 0.16738668084144592, "learning_rate": 0.002, "loss": 2.5625, "step": 297230 }, { "epoch": 0.5921681754430702, "grad_norm": 0.17909540235996246, "learning_rate": 0.002, "loss": 2.5627, "step": 297240 }, { "epoch": 0.5921880976667092, "grad_norm": 0.18483230471611023, "learning_rate": 0.002, "loss": 2.5559, "step": 297250 }, { "epoch": 0.5922080198903481, "grad_norm": 0.15970660746097565, "learning_rate": 0.002, "loss": 2.5352, "step": 297260 }, { "epoch": 0.592227942113987, "grad_norm": 0.17231179773807526, "learning_rate": 0.002, "loss": 2.5418, "step": 297270 }, { "epoch": 0.5922478643376259, "grad_norm": 0.17825137078762054, "learning_rate": 0.002, "loss": 2.5623, "step": 297280 }, { "epoch": 0.5922677865612648, "grad_norm": 0.18716640770435333, "learning_rate": 0.002, "loss": 2.5472, "step": 297290 }, { "epoch": 0.5922877087849038, "grad_norm": 0.1485663801431656, "learning_rate": 0.002, "loss": 2.5435, "step": 297300 }, { "epoch": 0.5923076310085427, "grad_norm": 0.15895743668079376, "learning_rate": 0.002, "loss": 2.5469, "step": 297310 }, { "epoch": 0.5923275532321816, "grad_norm": 0.16578522324562073, "learning_rate": 0.002, "loss": 2.5673, "step": 297320 }, { "epoch": 0.5923474754558204, "grad_norm": 0.23464861512184143, "learning_rate": 0.002, "loss": 2.5653, "step": 297330 }, { "epoch": 0.5923673976794593, "grad_norm": 0.16714873909950256, "learning_rate": 0.002, "loss": 2.5534, "step": 297340 }, { "epoch": 0.5923873199030983, "grad_norm": 0.16388340294361115, "learning_rate": 0.002, "loss": 2.5527, "step": 297350 }, { "epoch": 0.5924072421267372, "grad_norm": 0.16696789860725403, "learning_rate": 0.002, "loss": 2.5677, "step": 297360 }, { "epoch": 0.5924271643503761, "grad_norm": 0.18876715004444122, "learning_rate": 0.002, "loss": 2.5569, "step": 297370 }, { "epoch": 0.592447086574015, "grad_norm": 0.1598978340625763, "learning_rate": 0.002, "loss": 2.534, "step": 297380 }, { "epoch": 0.5924670087976539, "grad_norm": 0.14984463155269623, "learning_rate": 0.002, "loss": 2.5588, "step": 297390 }, { "epoch": 0.5924869310212929, "grad_norm": 0.19165323674678802, "learning_rate": 0.002, "loss": 2.5524, "step": 297400 }, { "epoch": 0.5925068532449318, "grad_norm": 0.1956305354833603, "learning_rate": 0.002, "loss": 2.543, "step": 297410 }, { "epoch": 0.5925267754685707, "grad_norm": 0.14343903958797455, "learning_rate": 0.002, "loss": 2.5631, "step": 297420 }, { "epoch": 0.5925466976922096, "grad_norm": 0.1997242122888565, "learning_rate": 0.002, "loss": 2.5499, "step": 297430 }, { "epoch": 0.5925666199158486, "grad_norm": 0.1461227387189865, "learning_rate": 0.002, "loss": 2.5431, "step": 297440 }, { "epoch": 0.5925865421394875, "grad_norm": 0.18124383687973022, "learning_rate": 0.002, "loss": 2.5521, "step": 297450 }, { "epoch": 0.5926064643631264, "grad_norm": 0.1546635627746582, "learning_rate": 0.002, "loss": 2.5498, "step": 297460 }, { "epoch": 0.5926263865867653, "grad_norm": 0.16415084898471832, "learning_rate": 0.002, "loss": 2.5728, "step": 297470 }, { "epoch": 0.5926463088104041, "grad_norm": 0.16581380367279053, "learning_rate": 0.002, "loss": 2.5483, "step": 297480 }, { "epoch": 0.5926662310340431, "grad_norm": 0.16645000874996185, "learning_rate": 0.002, "loss": 2.5498, "step": 297490 }, { "epoch": 0.592686153257682, "grad_norm": 0.17491918802261353, "learning_rate": 0.002, "loss": 2.5532, "step": 297500 }, { "epoch": 0.5927060754813209, "grad_norm": 0.14385254681110382, "learning_rate": 0.002, "loss": 2.5712, "step": 297510 }, { "epoch": 0.5927259977049598, "grad_norm": 0.18127234280109406, "learning_rate": 0.002, "loss": 2.5609, "step": 297520 }, { "epoch": 0.5927459199285987, "grad_norm": 0.1558331847190857, "learning_rate": 0.002, "loss": 2.5464, "step": 297530 }, { "epoch": 0.5927658421522377, "grad_norm": 0.15521374344825745, "learning_rate": 0.002, "loss": 2.5511, "step": 297540 }, { "epoch": 0.5927857643758766, "grad_norm": 0.18310554325580597, "learning_rate": 0.002, "loss": 2.5534, "step": 297550 }, { "epoch": 0.5928056865995155, "grad_norm": 0.18620304763317108, "learning_rate": 0.002, "loss": 2.551, "step": 297560 }, { "epoch": 0.5928256088231544, "grad_norm": 0.15881337225437164, "learning_rate": 0.002, "loss": 2.5454, "step": 297570 }, { "epoch": 0.5928455310467933, "grad_norm": 0.16208495199680328, "learning_rate": 0.002, "loss": 2.5637, "step": 297580 }, { "epoch": 0.5928654532704323, "grad_norm": 0.19742338359355927, "learning_rate": 0.002, "loss": 2.5566, "step": 297590 }, { "epoch": 0.5928853754940712, "grad_norm": 0.16632646322250366, "learning_rate": 0.002, "loss": 2.5498, "step": 297600 }, { "epoch": 0.5929052977177101, "grad_norm": 0.1831398755311966, "learning_rate": 0.002, "loss": 2.5477, "step": 297610 }, { "epoch": 0.592925219941349, "grad_norm": 0.17897489666938782, "learning_rate": 0.002, "loss": 2.5618, "step": 297620 }, { "epoch": 0.5929451421649878, "grad_norm": 0.16411879658699036, "learning_rate": 0.002, "loss": 2.5585, "step": 297630 }, { "epoch": 0.5929650643886268, "grad_norm": 0.16731387376785278, "learning_rate": 0.002, "loss": 2.5516, "step": 297640 }, { "epoch": 0.5929849866122657, "grad_norm": 0.1602899581193924, "learning_rate": 0.002, "loss": 2.5453, "step": 297650 }, { "epoch": 0.5930049088359046, "grad_norm": 0.1687026172876358, "learning_rate": 0.002, "loss": 2.5566, "step": 297660 }, { "epoch": 0.5930248310595435, "grad_norm": 0.17355051636695862, "learning_rate": 0.002, "loss": 2.5518, "step": 297670 }, { "epoch": 0.5930447532831824, "grad_norm": 0.1397360861301422, "learning_rate": 0.002, "loss": 2.5482, "step": 297680 }, { "epoch": 0.5930646755068214, "grad_norm": 0.14845922589302063, "learning_rate": 0.002, "loss": 2.5436, "step": 297690 }, { "epoch": 0.5930845977304603, "grad_norm": 0.21575337648391724, "learning_rate": 0.002, "loss": 2.5557, "step": 297700 }, { "epoch": 0.5931045199540992, "grad_norm": 0.20011107623577118, "learning_rate": 0.002, "loss": 2.5627, "step": 297710 }, { "epoch": 0.5931244421777381, "grad_norm": 0.14908882975578308, "learning_rate": 0.002, "loss": 2.553, "step": 297720 }, { "epoch": 0.5931443644013771, "grad_norm": 0.16285865008831024, "learning_rate": 0.002, "loss": 2.5588, "step": 297730 }, { "epoch": 0.593164286625016, "grad_norm": 0.17299090325832367, "learning_rate": 0.002, "loss": 2.5481, "step": 297740 }, { "epoch": 0.5931842088486549, "grad_norm": 0.18385249376296997, "learning_rate": 0.002, "loss": 2.5569, "step": 297750 }, { "epoch": 0.5932041310722937, "grad_norm": 0.14594630897045135, "learning_rate": 0.002, "loss": 2.5494, "step": 297760 }, { "epoch": 0.5932240532959326, "grad_norm": 0.14279691874980927, "learning_rate": 0.002, "loss": 2.5597, "step": 297770 }, { "epoch": 0.5932439755195716, "grad_norm": 0.23456107079982758, "learning_rate": 0.002, "loss": 2.5536, "step": 297780 }, { "epoch": 0.5932638977432105, "grad_norm": 0.16123594343662262, "learning_rate": 0.002, "loss": 2.5687, "step": 297790 }, { "epoch": 0.5932838199668494, "grad_norm": 0.19256684184074402, "learning_rate": 0.002, "loss": 2.5664, "step": 297800 }, { "epoch": 0.5933037421904883, "grad_norm": 0.1774996966123581, "learning_rate": 0.002, "loss": 2.5486, "step": 297810 }, { "epoch": 0.5933236644141272, "grad_norm": 0.180611714720726, "learning_rate": 0.002, "loss": 2.5556, "step": 297820 }, { "epoch": 0.5933435866377662, "grad_norm": 0.1459316909313202, "learning_rate": 0.002, "loss": 2.5603, "step": 297830 }, { "epoch": 0.5933635088614051, "grad_norm": 0.1676091104745865, "learning_rate": 0.002, "loss": 2.5522, "step": 297840 }, { "epoch": 0.593383431085044, "grad_norm": 0.15871867537498474, "learning_rate": 0.002, "loss": 2.5594, "step": 297850 }, { "epoch": 0.5934033533086829, "grad_norm": 0.16632573306560516, "learning_rate": 0.002, "loss": 2.5556, "step": 297860 }, { "epoch": 0.5934232755323218, "grad_norm": 0.1666138619184494, "learning_rate": 0.002, "loss": 2.543, "step": 297870 }, { "epoch": 0.5934431977559608, "grad_norm": 0.15213264524936676, "learning_rate": 0.002, "loss": 2.551, "step": 297880 }, { "epoch": 0.5934631199795997, "grad_norm": 0.22665336728096008, "learning_rate": 0.002, "loss": 2.5624, "step": 297890 }, { "epoch": 0.5934830422032386, "grad_norm": 0.1604202687740326, "learning_rate": 0.002, "loss": 2.55, "step": 297900 }, { "epoch": 0.5935029644268774, "grad_norm": 0.14937441051006317, "learning_rate": 0.002, "loss": 2.567, "step": 297910 }, { "epoch": 0.5935228866505163, "grad_norm": 0.1907322108745575, "learning_rate": 0.002, "loss": 2.5706, "step": 297920 }, { "epoch": 0.5935428088741553, "grad_norm": 0.1761842966079712, "learning_rate": 0.002, "loss": 2.5539, "step": 297930 }, { "epoch": 0.5935627310977942, "grad_norm": 0.1684737205505371, "learning_rate": 0.002, "loss": 2.5573, "step": 297940 }, { "epoch": 0.5935826533214331, "grad_norm": 0.17550316452980042, "learning_rate": 0.002, "loss": 2.5508, "step": 297950 }, { "epoch": 0.593602575545072, "grad_norm": 0.14392831921577454, "learning_rate": 0.002, "loss": 2.5646, "step": 297960 }, { "epoch": 0.5936224977687109, "grad_norm": 0.14841189980506897, "learning_rate": 0.002, "loss": 2.5462, "step": 297970 }, { "epoch": 0.5936424199923499, "grad_norm": 0.18764612078666687, "learning_rate": 0.002, "loss": 2.5534, "step": 297980 }, { "epoch": 0.5936623422159888, "grad_norm": 0.16398857533931732, "learning_rate": 0.002, "loss": 2.5551, "step": 297990 }, { "epoch": 0.5936822644396277, "grad_norm": 0.18460558354854584, "learning_rate": 0.002, "loss": 2.5456, "step": 298000 }, { "epoch": 0.5937021866632666, "grad_norm": 0.1965004950761795, "learning_rate": 0.002, "loss": 2.5559, "step": 298010 }, { "epoch": 0.5937221088869056, "grad_norm": 0.14656071364879608, "learning_rate": 0.002, "loss": 2.5719, "step": 298020 }, { "epoch": 0.5937420311105445, "grad_norm": 0.1451081484556198, "learning_rate": 0.002, "loss": 2.5583, "step": 298030 }, { "epoch": 0.5937619533341834, "grad_norm": 0.1629127562046051, "learning_rate": 0.002, "loss": 2.5609, "step": 298040 }, { "epoch": 0.5937818755578222, "grad_norm": 0.17011268436908722, "learning_rate": 0.002, "loss": 2.545, "step": 298050 }, { "epoch": 0.5938017977814611, "grad_norm": 0.1650024801492691, "learning_rate": 0.002, "loss": 2.567, "step": 298060 }, { "epoch": 0.5938217200051001, "grad_norm": 0.16486385464668274, "learning_rate": 0.002, "loss": 2.5634, "step": 298070 }, { "epoch": 0.593841642228739, "grad_norm": 0.40080103278160095, "learning_rate": 0.002, "loss": 2.568, "step": 298080 }, { "epoch": 0.5938615644523779, "grad_norm": 0.15762388706207275, "learning_rate": 0.002, "loss": 2.5629, "step": 298090 }, { "epoch": 0.5938814866760168, "grad_norm": 0.17176057398319244, "learning_rate": 0.002, "loss": 2.5696, "step": 298100 }, { "epoch": 0.5939014088996557, "grad_norm": 0.17568431794643402, "learning_rate": 0.002, "loss": 2.5605, "step": 298110 }, { "epoch": 0.5939213311232947, "grad_norm": 0.15001969039440155, "learning_rate": 0.002, "loss": 2.5462, "step": 298120 }, { "epoch": 0.5939412533469336, "grad_norm": 0.18364715576171875, "learning_rate": 0.002, "loss": 2.5653, "step": 298130 }, { "epoch": 0.5939611755705725, "grad_norm": 0.15614674985408783, "learning_rate": 0.002, "loss": 2.5519, "step": 298140 }, { "epoch": 0.5939810977942114, "grad_norm": 0.17963166534900665, "learning_rate": 0.002, "loss": 2.538, "step": 298150 }, { "epoch": 0.5940010200178503, "grad_norm": 0.16528286039829254, "learning_rate": 0.002, "loss": 2.5603, "step": 298160 }, { "epoch": 0.5940209422414893, "grad_norm": 0.146614670753479, "learning_rate": 0.002, "loss": 2.5416, "step": 298170 }, { "epoch": 0.5940408644651282, "grad_norm": 0.17122164368629456, "learning_rate": 0.002, "loss": 2.5695, "step": 298180 }, { "epoch": 0.594060786688767, "grad_norm": 0.1699783354997635, "learning_rate": 0.002, "loss": 2.5403, "step": 298190 }, { "epoch": 0.5940807089124059, "grad_norm": 0.13854601979255676, "learning_rate": 0.002, "loss": 2.5381, "step": 298200 }, { "epoch": 0.5941006311360448, "grad_norm": 0.2172841578722, "learning_rate": 0.002, "loss": 2.5634, "step": 298210 }, { "epoch": 0.5941205533596838, "grad_norm": 0.16981379687786102, "learning_rate": 0.002, "loss": 2.5698, "step": 298220 }, { "epoch": 0.5941404755833227, "grad_norm": 0.1573864072561264, "learning_rate": 0.002, "loss": 2.5551, "step": 298230 }, { "epoch": 0.5941603978069616, "grad_norm": 0.16685914993286133, "learning_rate": 0.002, "loss": 2.5614, "step": 298240 }, { "epoch": 0.5941803200306005, "grad_norm": 0.18591710925102234, "learning_rate": 0.002, "loss": 2.5528, "step": 298250 }, { "epoch": 0.5942002422542394, "grad_norm": 0.19474880397319794, "learning_rate": 0.002, "loss": 2.5513, "step": 298260 }, { "epoch": 0.5942201644778784, "grad_norm": 0.1782999187707901, "learning_rate": 0.002, "loss": 2.5547, "step": 298270 }, { "epoch": 0.5942400867015173, "grad_norm": 0.15419630706310272, "learning_rate": 0.002, "loss": 2.5473, "step": 298280 }, { "epoch": 0.5942600089251562, "grad_norm": 0.16527681052684784, "learning_rate": 0.002, "loss": 2.5422, "step": 298290 }, { "epoch": 0.5942799311487951, "grad_norm": 0.19675108790397644, "learning_rate": 0.002, "loss": 2.5465, "step": 298300 }, { "epoch": 0.5942998533724341, "grad_norm": 0.1456514447927475, "learning_rate": 0.002, "loss": 2.5492, "step": 298310 }, { "epoch": 0.594319775596073, "grad_norm": 0.16965632140636444, "learning_rate": 0.002, "loss": 2.5668, "step": 298320 }, { "epoch": 0.5943396978197119, "grad_norm": 0.1468365490436554, "learning_rate": 0.002, "loss": 2.549, "step": 298330 }, { "epoch": 0.5943596200433507, "grad_norm": 0.16316992044448853, "learning_rate": 0.002, "loss": 2.5559, "step": 298340 }, { "epoch": 0.5943795422669896, "grad_norm": 0.18811562657356262, "learning_rate": 0.002, "loss": 2.5589, "step": 298350 }, { "epoch": 0.5943994644906286, "grad_norm": 0.17967204749584198, "learning_rate": 0.002, "loss": 2.5549, "step": 298360 }, { "epoch": 0.5944193867142675, "grad_norm": 0.17018456757068634, "learning_rate": 0.002, "loss": 2.5568, "step": 298370 }, { "epoch": 0.5944393089379064, "grad_norm": 0.16662953794002533, "learning_rate": 0.002, "loss": 2.544, "step": 298380 }, { "epoch": 0.5944592311615453, "grad_norm": 0.2150202989578247, "learning_rate": 0.002, "loss": 2.5323, "step": 298390 }, { "epoch": 0.5944791533851842, "grad_norm": 0.15290120244026184, "learning_rate": 0.002, "loss": 2.5612, "step": 298400 }, { "epoch": 0.5944990756088232, "grad_norm": 0.20204883813858032, "learning_rate": 0.002, "loss": 2.551, "step": 298410 }, { "epoch": 0.5945189978324621, "grad_norm": 0.16718702018260956, "learning_rate": 0.002, "loss": 2.5547, "step": 298420 }, { "epoch": 0.594538920056101, "grad_norm": 0.15666568279266357, "learning_rate": 0.002, "loss": 2.5573, "step": 298430 }, { "epoch": 0.5945588422797399, "grad_norm": 0.18160368502140045, "learning_rate": 0.002, "loss": 2.5549, "step": 298440 }, { "epoch": 0.5945787645033788, "grad_norm": 0.15903319418430328, "learning_rate": 0.002, "loss": 2.5557, "step": 298450 }, { "epoch": 0.5945986867270178, "grad_norm": 0.15407061576843262, "learning_rate": 0.002, "loss": 2.5535, "step": 298460 }, { "epoch": 0.5946186089506567, "grad_norm": 0.23142804205417633, "learning_rate": 0.002, "loss": 2.5694, "step": 298470 }, { "epoch": 0.5946385311742955, "grad_norm": 0.15860159695148468, "learning_rate": 0.002, "loss": 2.5489, "step": 298480 }, { "epoch": 0.5946584533979344, "grad_norm": 0.16709479689598083, "learning_rate": 0.002, "loss": 2.5584, "step": 298490 }, { "epoch": 0.5946783756215733, "grad_norm": 0.20150068402290344, "learning_rate": 0.002, "loss": 2.569, "step": 298500 }, { "epoch": 0.5946982978452123, "grad_norm": 0.17173083126544952, "learning_rate": 0.002, "loss": 2.5402, "step": 298510 }, { "epoch": 0.5947182200688512, "grad_norm": 0.17374590039253235, "learning_rate": 0.002, "loss": 2.5487, "step": 298520 }, { "epoch": 0.5947381422924901, "grad_norm": 0.1618141233921051, "learning_rate": 0.002, "loss": 2.5455, "step": 298530 }, { "epoch": 0.594758064516129, "grad_norm": 0.15196184813976288, "learning_rate": 0.002, "loss": 2.5502, "step": 298540 }, { "epoch": 0.5947779867397679, "grad_norm": 0.2534109354019165, "learning_rate": 0.002, "loss": 2.559, "step": 298550 }, { "epoch": 0.5947979089634069, "grad_norm": 0.1384233683347702, "learning_rate": 0.002, "loss": 2.5459, "step": 298560 }, { "epoch": 0.5948178311870458, "grad_norm": 0.16390790045261383, "learning_rate": 0.002, "loss": 2.5531, "step": 298570 }, { "epoch": 0.5948377534106847, "grad_norm": 0.1637071818113327, "learning_rate": 0.002, "loss": 2.5665, "step": 298580 }, { "epoch": 0.5948576756343236, "grad_norm": 0.13158215582370758, "learning_rate": 0.002, "loss": 2.5515, "step": 298590 }, { "epoch": 0.5948775978579625, "grad_norm": 0.1403990387916565, "learning_rate": 0.002, "loss": 2.5652, "step": 298600 }, { "epoch": 0.5948975200816015, "grad_norm": 0.1514037847518921, "learning_rate": 0.002, "loss": 2.5393, "step": 298610 }, { "epoch": 0.5949174423052404, "grad_norm": 0.16701647639274597, "learning_rate": 0.002, "loss": 2.5509, "step": 298620 }, { "epoch": 0.5949373645288792, "grad_norm": 0.19829170405864716, "learning_rate": 0.002, "loss": 2.5592, "step": 298630 }, { "epoch": 0.5949572867525181, "grad_norm": 0.17906352877616882, "learning_rate": 0.002, "loss": 2.5409, "step": 298640 }, { "epoch": 0.5949772089761571, "grad_norm": 0.15900984406471252, "learning_rate": 0.002, "loss": 2.5492, "step": 298650 }, { "epoch": 0.594997131199796, "grad_norm": 0.17644158005714417, "learning_rate": 0.002, "loss": 2.5655, "step": 298660 }, { "epoch": 0.5950170534234349, "grad_norm": 0.20005103945732117, "learning_rate": 0.002, "loss": 2.5447, "step": 298670 }, { "epoch": 0.5950369756470738, "grad_norm": 0.18685461580753326, "learning_rate": 0.002, "loss": 2.537, "step": 298680 }, { "epoch": 0.5950568978707127, "grad_norm": 0.13977815210819244, "learning_rate": 0.002, "loss": 2.5572, "step": 298690 }, { "epoch": 0.5950768200943517, "grad_norm": 0.1593632847070694, "learning_rate": 0.002, "loss": 2.566, "step": 298700 }, { "epoch": 0.5950967423179906, "grad_norm": 0.17024505138397217, "learning_rate": 0.002, "loss": 2.5639, "step": 298710 }, { "epoch": 0.5951166645416295, "grad_norm": 0.16944648325443268, "learning_rate": 0.002, "loss": 2.5455, "step": 298720 }, { "epoch": 0.5951365867652684, "grad_norm": 0.16158321499824524, "learning_rate": 0.002, "loss": 2.5704, "step": 298730 }, { "epoch": 0.5951565089889073, "grad_norm": 0.16590546071529388, "learning_rate": 0.002, "loss": 2.5607, "step": 298740 }, { "epoch": 0.5951764312125463, "grad_norm": 0.1577165573835373, "learning_rate": 0.002, "loss": 2.5688, "step": 298750 }, { "epoch": 0.5951963534361852, "grad_norm": 0.17697674036026, "learning_rate": 0.002, "loss": 2.5493, "step": 298760 }, { "epoch": 0.595216275659824, "grad_norm": 0.16557742655277252, "learning_rate": 0.002, "loss": 2.5521, "step": 298770 }, { "epoch": 0.5952361978834629, "grad_norm": 0.2027914673089981, "learning_rate": 0.002, "loss": 2.5621, "step": 298780 }, { "epoch": 0.5952561201071018, "grad_norm": 0.13647136092185974, "learning_rate": 0.002, "loss": 2.5546, "step": 298790 }, { "epoch": 0.5952760423307408, "grad_norm": 0.17930051684379578, "learning_rate": 0.002, "loss": 2.5562, "step": 298800 }, { "epoch": 0.5952959645543797, "grad_norm": 0.1744794249534607, "learning_rate": 0.002, "loss": 2.5479, "step": 298810 }, { "epoch": 0.5953158867780186, "grad_norm": 0.17008815705776215, "learning_rate": 0.002, "loss": 2.5552, "step": 298820 }, { "epoch": 0.5953358090016575, "grad_norm": 0.23553186655044556, "learning_rate": 0.002, "loss": 2.5717, "step": 298830 }, { "epoch": 0.5953557312252964, "grad_norm": 0.18376798927783966, "learning_rate": 0.002, "loss": 2.5555, "step": 298840 }, { "epoch": 0.5953756534489354, "grad_norm": 0.1508946269750595, "learning_rate": 0.002, "loss": 2.57, "step": 298850 }, { "epoch": 0.5953955756725743, "grad_norm": 0.16962656378746033, "learning_rate": 0.002, "loss": 2.5642, "step": 298860 }, { "epoch": 0.5954154978962132, "grad_norm": 0.19010458886623383, "learning_rate": 0.002, "loss": 2.5493, "step": 298870 }, { "epoch": 0.5954354201198521, "grad_norm": 0.18622663617134094, "learning_rate": 0.002, "loss": 2.5524, "step": 298880 }, { "epoch": 0.595455342343491, "grad_norm": 0.18494124710559845, "learning_rate": 0.002, "loss": 2.5612, "step": 298890 }, { "epoch": 0.59547526456713, "grad_norm": 0.1388552188873291, "learning_rate": 0.002, "loss": 2.56, "step": 298900 }, { "epoch": 0.5954951867907688, "grad_norm": 0.1669209748506546, "learning_rate": 0.002, "loss": 2.5464, "step": 298910 }, { "epoch": 0.5955151090144077, "grad_norm": 0.1815953105688095, "learning_rate": 0.002, "loss": 2.5664, "step": 298920 }, { "epoch": 0.5955350312380466, "grad_norm": 0.1643056720495224, "learning_rate": 0.002, "loss": 2.5497, "step": 298930 }, { "epoch": 0.5955549534616856, "grad_norm": 0.16111257672309875, "learning_rate": 0.002, "loss": 2.5379, "step": 298940 }, { "epoch": 0.5955748756853245, "grad_norm": 0.1640271693468094, "learning_rate": 0.002, "loss": 2.5613, "step": 298950 }, { "epoch": 0.5955947979089634, "grad_norm": 0.1485884040594101, "learning_rate": 0.002, "loss": 2.5416, "step": 298960 }, { "epoch": 0.5956147201326023, "grad_norm": 0.2360706627368927, "learning_rate": 0.002, "loss": 2.5507, "step": 298970 }, { "epoch": 0.5956346423562412, "grad_norm": 0.146229088306427, "learning_rate": 0.002, "loss": 2.5629, "step": 298980 }, { "epoch": 0.5956545645798802, "grad_norm": 0.16704805195331573, "learning_rate": 0.002, "loss": 2.5533, "step": 298990 }, { "epoch": 0.5956744868035191, "grad_norm": 0.1789083033800125, "learning_rate": 0.002, "loss": 2.5665, "step": 299000 }, { "epoch": 0.595694409027158, "grad_norm": 0.17479705810546875, "learning_rate": 0.002, "loss": 2.566, "step": 299010 }, { "epoch": 0.5957143312507969, "grad_norm": 0.17076736688613892, "learning_rate": 0.002, "loss": 2.5661, "step": 299020 }, { "epoch": 0.5957342534744358, "grad_norm": 0.15658032894134521, "learning_rate": 0.002, "loss": 2.5452, "step": 299030 }, { "epoch": 0.5957541756980748, "grad_norm": 0.183815136551857, "learning_rate": 0.002, "loss": 2.5585, "step": 299040 }, { "epoch": 0.5957740979217137, "grad_norm": 0.1545921415090561, "learning_rate": 0.002, "loss": 2.5544, "step": 299050 }, { "epoch": 0.5957940201453525, "grad_norm": 0.20078741014003754, "learning_rate": 0.002, "loss": 2.5617, "step": 299060 }, { "epoch": 0.5958139423689914, "grad_norm": 0.17037639021873474, "learning_rate": 0.002, "loss": 2.5484, "step": 299070 }, { "epoch": 0.5958338645926303, "grad_norm": 0.17496569454669952, "learning_rate": 0.002, "loss": 2.5589, "step": 299080 }, { "epoch": 0.5958537868162693, "grad_norm": 0.16969610750675201, "learning_rate": 0.002, "loss": 2.5659, "step": 299090 }, { "epoch": 0.5958737090399082, "grad_norm": 0.20290420949459076, "learning_rate": 0.002, "loss": 2.5527, "step": 299100 }, { "epoch": 0.5958936312635471, "grad_norm": 0.17324726283550262, "learning_rate": 0.002, "loss": 2.5691, "step": 299110 }, { "epoch": 0.595913553487186, "grad_norm": 0.15456640720367432, "learning_rate": 0.002, "loss": 2.5604, "step": 299120 }, { "epoch": 0.5959334757108249, "grad_norm": 0.1316623091697693, "learning_rate": 0.002, "loss": 2.5451, "step": 299130 }, { "epoch": 0.5959533979344639, "grad_norm": 0.17252686619758606, "learning_rate": 0.002, "loss": 2.5483, "step": 299140 }, { "epoch": 0.5959733201581028, "grad_norm": 0.1748208999633789, "learning_rate": 0.002, "loss": 2.5555, "step": 299150 }, { "epoch": 0.5959932423817417, "grad_norm": 0.15484732389450073, "learning_rate": 0.002, "loss": 2.5612, "step": 299160 }, { "epoch": 0.5960131646053806, "grad_norm": 0.1739700734615326, "learning_rate": 0.002, "loss": 2.5551, "step": 299170 }, { "epoch": 0.5960330868290195, "grad_norm": 0.15901271998882294, "learning_rate": 0.002, "loss": 2.5511, "step": 299180 }, { "epoch": 0.5960530090526585, "grad_norm": 0.15116986632347107, "learning_rate": 0.002, "loss": 2.5502, "step": 299190 }, { "epoch": 0.5960729312762973, "grad_norm": 0.16731636226177216, "learning_rate": 0.002, "loss": 2.5693, "step": 299200 }, { "epoch": 0.5960928534999362, "grad_norm": 0.16124668717384338, "learning_rate": 0.002, "loss": 2.548, "step": 299210 }, { "epoch": 0.5961127757235751, "grad_norm": 0.17165108025074005, "learning_rate": 0.002, "loss": 2.5707, "step": 299220 }, { "epoch": 0.5961326979472141, "grad_norm": 0.16490647196769714, "learning_rate": 0.002, "loss": 2.5439, "step": 299230 }, { "epoch": 0.596152620170853, "grad_norm": 0.17968745529651642, "learning_rate": 0.002, "loss": 2.5578, "step": 299240 }, { "epoch": 0.5961725423944919, "grad_norm": 0.1311066895723343, "learning_rate": 0.002, "loss": 2.557, "step": 299250 }, { "epoch": 0.5961924646181308, "grad_norm": 0.17616543173789978, "learning_rate": 0.002, "loss": 2.5515, "step": 299260 }, { "epoch": 0.5962123868417697, "grad_norm": 0.1472073346376419, "learning_rate": 0.002, "loss": 2.5429, "step": 299270 }, { "epoch": 0.5962323090654087, "grad_norm": 0.20259852707386017, "learning_rate": 0.002, "loss": 2.552, "step": 299280 }, { "epoch": 0.5962522312890476, "grad_norm": 0.1648542582988739, "learning_rate": 0.002, "loss": 2.5594, "step": 299290 }, { "epoch": 0.5962721535126865, "grad_norm": 0.15790978074073792, "learning_rate": 0.002, "loss": 2.542, "step": 299300 }, { "epoch": 0.5962920757363254, "grad_norm": 0.13847480714321136, "learning_rate": 0.002, "loss": 2.5534, "step": 299310 }, { "epoch": 0.5963119979599643, "grad_norm": 0.15647876262664795, "learning_rate": 0.002, "loss": 2.5446, "step": 299320 }, { "epoch": 0.5963319201836033, "grad_norm": 0.17379505932331085, "learning_rate": 0.002, "loss": 2.5581, "step": 299330 }, { "epoch": 0.5963518424072421, "grad_norm": 0.1777539998292923, "learning_rate": 0.002, "loss": 2.5568, "step": 299340 }, { "epoch": 0.596371764630881, "grad_norm": 0.16840052604675293, "learning_rate": 0.002, "loss": 2.5669, "step": 299350 }, { "epoch": 0.5963916868545199, "grad_norm": 0.16350704431533813, "learning_rate": 0.002, "loss": 2.5553, "step": 299360 }, { "epoch": 0.5964116090781588, "grad_norm": 0.17950870096683502, "learning_rate": 0.002, "loss": 2.5634, "step": 299370 }, { "epoch": 0.5964315313017978, "grad_norm": 0.17045028507709503, "learning_rate": 0.002, "loss": 2.57, "step": 299380 }, { "epoch": 0.5964514535254367, "grad_norm": 0.1746090203523636, "learning_rate": 0.002, "loss": 2.5555, "step": 299390 }, { "epoch": 0.5964713757490756, "grad_norm": 0.19222907721996307, "learning_rate": 0.002, "loss": 2.5529, "step": 299400 }, { "epoch": 0.5964912979727145, "grad_norm": 0.17927300930023193, "learning_rate": 0.002, "loss": 2.5545, "step": 299410 }, { "epoch": 0.5965112201963534, "grad_norm": 0.14457330107688904, "learning_rate": 0.002, "loss": 2.5544, "step": 299420 }, { "epoch": 0.5965311424199924, "grad_norm": 0.17652851343154907, "learning_rate": 0.002, "loss": 2.5555, "step": 299430 }, { "epoch": 0.5965510646436313, "grad_norm": 0.18096336722373962, "learning_rate": 0.002, "loss": 2.5557, "step": 299440 }, { "epoch": 0.5965709868672702, "grad_norm": 0.18532776832580566, "learning_rate": 0.002, "loss": 2.537, "step": 299450 }, { "epoch": 0.5965909090909091, "grad_norm": 0.17338576912879944, "learning_rate": 0.002, "loss": 2.5514, "step": 299460 }, { "epoch": 0.596610831314548, "grad_norm": 0.1570945382118225, "learning_rate": 0.002, "loss": 2.5477, "step": 299470 }, { "epoch": 0.596630753538187, "grad_norm": 0.16264718770980835, "learning_rate": 0.002, "loss": 2.5716, "step": 299480 }, { "epoch": 0.5966506757618258, "grad_norm": 0.16387492418289185, "learning_rate": 0.002, "loss": 2.5609, "step": 299490 }, { "epoch": 0.5966705979854647, "grad_norm": 0.14997318387031555, "learning_rate": 0.002, "loss": 2.5518, "step": 299500 }, { "epoch": 0.5966905202091036, "grad_norm": 0.16807769238948822, "learning_rate": 0.002, "loss": 2.5559, "step": 299510 }, { "epoch": 0.5967104424327426, "grad_norm": 0.2620958387851715, "learning_rate": 0.002, "loss": 2.5649, "step": 299520 }, { "epoch": 0.5967303646563815, "grad_norm": 0.16990670561790466, "learning_rate": 0.002, "loss": 2.5474, "step": 299530 }, { "epoch": 0.5967502868800204, "grad_norm": 0.14184455573558807, "learning_rate": 0.002, "loss": 2.5366, "step": 299540 }, { "epoch": 0.5967702091036593, "grad_norm": 0.20991146564483643, "learning_rate": 0.002, "loss": 2.5572, "step": 299550 }, { "epoch": 0.5967901313272982, "grad_norm": 0.14802344143390656, "learning_rate": 0.002, "loss": 2.5609, "step": 299560 }, { "epoch": 0.5968100535509372, "grad_norm": 0.16956359148025513, "learning_rate": 0.002, "loss": 2.5562, "step": 299570 }, { "epoch": 0.5968299757745761, "grad_norm": 0.1618678867816925, "learning_rate": 0.002, "loss": 2.5501, "step": 299580 }, { "epoch": 0.596849897998215, "grad_norm": 0.17244167625904083, "learning_rate": 0.002, "loss": 2.5694, "step": 299590 }, { "epoch": 0.5968698202218539, "grad_norm": 0.18276745080947876, "learning_rate": 0.002, "loss": 2.5335, "step": 299600 }, { "epoch": 0.5968897424454928, "grad_norm": 0.1717352569103241, "learning_rate": 0.002, "loss": 2.5598, "step": 299610 }, { "epoch": 0.5969096646691318, "grad_norm": 0.1694488674402237, "learning_rate": 0.002, "loss": 2.5586, "step": 299620 }, { "epoch": 0.5969295868927706, "grad_norm": 0.18706758320331573, "learning_rate": 0.002, "loss": 2.5642, "step": 299630 }, { "epoch": 0.5969495091164095, "grad_norm": 0.20211546123027802, "learning_rate": 0.002, "loss": 2.5622, "step": 299640 }, { "epoch": 0.5969694313400484, "grad_norm": 0.15899315476417542, "learning_rate": 0.002, "loss": 2.5571, "step": 299650 }, { "epoch": 0.5969893535636873, "grad_norm": 0.1916200965642929, "learning_rate": 0.002, "loss": 2.5392, "step": 299660 }, { "epoch": 0.5970092757873263, "grad_norm": 0.1457984298467636, "learning_rate": 0.002, "loss": 2.5524, "step": 299670 }, { "epoch": 0.5970291980109652, "grad_norm": 0.15647679567337036, "learning_rate": 0.002, "loss": 2.55, "step": 299680 }, { "epoch": 0.5970491202346041, "grad_norm": 0.19101698696613312, "learning_rate": 0.002, "loss": 2.54, "step": 299690 }, { "epoch": 0.597069042458243, "grad_norm": 0.1540672779083252, "learning_rate": 0.002, "loss": 2.5545, "step": 299700 }, { "epoch": 0.5970889646818819, "grad_norm": 0.18195177614688873, "learning_rate": 0.002, "loss": 2.5548, "step": 299710 }, { "epoch": 0.5971088869055209, "grad_norm": 0.1528460681438446, "learning_rate": 0.002, "loss": 2.5583, "step": 299720 }, { "epoch": 0.5971288091291598, "grad_norm": 0.2014792561531067, "learning_rate": 0.002, "loss": 2.5526, "step": 299730 }, { "epoch": 0.5971487313527987, "grad_norm": 0.19594256579875946, "learning_rate": 0.002, "loss": 2.5474, "step": 299740 }, { "epoch": 0.5971686535764376, "grad_norm": 0.16356541216373444, "learning_rate": 0.002, "loss": 2.563, "step": 299750 }, { "epoch": 0.5971885758000764, "grad_norm": 0.1686466485261917, "learning_rate": 0.002, "loss": 2.5442, "step": 299760 }, { "epoch": 0.5972084980237155, "grad_norm": 0.1612422913312912, "learning_rate": 0.002, "loss": 2.5518, "step": 299770 }, { "epoch": 0.5972284202473543, "grad_norm": 0.15406031906604767, "learning_rate": 0.002, "loss": 2.5587, "step": 299780 }, { "epoch": 0.5972483424709932, "grad_norm": 0.1698395162820816, "learning_rate": 0.002, "loss": 2.5571, "step": 299790 }, { "epoch": 0.5972682646946321, "grad_norm": 0.15692460536956787, "learning_rate": 0.002, "loss": 2.562, "step": 299800 }, { "epoch": 0.5972881869182711, "grad_norm": 0.15048043429851532, "learning_rate": 0.002, "loss": 2.5637, "step": 299810 }, { "epoch": 0.59730810914191, "grad_norm": 0.1544683575630188, "learning_rate": 0.002, "loss": 2.5623, "step": 299820 }, { "epoch": 0.5973280313655489, "grad_norm": 0.201613649725914, "learning_rate": 0.002, "loss": 2.559, "step": 299830 }, { "epoch": 0.5973479535891878, "grad_norm": 0.1576094776391983, "learning_rate": 0.002, "loss": 2.5593, "step": 299840 }, { "epoch": 0.5973678758128267, "grad_norm": 0.21088765561580658, "learning_rate": 0.002, "loss": 2.5735, "step": 299850 }, { "epoch": 0.5973877980364657, "grad_norm": 0.14767412841320038, "learning_rate": 0.002, "loss": 2.5569, "step": 299860 }, { "epoch": 0.5974077202601046, "grad_norm": 0.14936865866184235, "learning_rate": 0.002, "loss": 2.5751, "step": 299870 }, { "epoch": 0.5974276424837435, "grad_norm": 0.23444484174251556, "learning_rate": 0.002, "loss": 2.566, "step": 299880 }, { "epoch": 0.5974475647073824, "grad_norm": 0.15673035383224487, "learning_rate": 0.002, "loss": 2.5728, "step": 299890 }, { "epoch": 0.5974674869310213, "grad_norm": 0.16379816830158234, "learning_rate": 0.002, "loss": 2.5422, "step": 299900 }, { "epoch": 0.5974874091546603, "grad_norm": 0.16815029084682465, "learning_rate": 0.002, "loss": 2.5523, "step": 299910 }, { "epoch": 0.5975073313782991, "grad_norm": 0.2095104306936264, "learning_rate": 0.002, "loss": 2.5584, "step": 299920 }, { "epoch": 0.597527253601938, "grad_norm": 0.2119447886943817, "learning_rate": 0.002, "loss": 2.5535, "step": 299930 }, { "epoch": 0.5975471758255769, "grad_norm": 0.1538229137659073, "learning_rate": 0.002, "loss": 2.5583, "step": 299940 }, { "epoch": 0.5975670980492158, "grad_norm": 0.1805386245250702, "learning_rate": 0.002, "loss": 2.5557, "step": 299950 }, { "epoch": 0.5975870202728548, "grad_norm": 0.15981730818748474, "learning_rate": 0.002, "loss": 2.5605, "step": 299960 }, { "epoch": 0.5976069424964937, "grad_norm": 0.1520117074251175, "learning_rate": 0.002, "loss": 2.5462, "step": 299970 }, { "epoch": 0.5976268647201326, "grad_norm": 0.1693187803030014, "learning_rate": 0.002, "loss": 2.5559, "step": 299980 }, { "epoch": 0.5976467869437715, "grad_norm": 0.20509429275989532, "learning_rate": 0.002, "loss": 2.567, "step": 299990 }, { "epoch": 0.5976667091674104, "grad_norm": 0.17190322279930115, "learning_rate": 0.002, "loss": 2.5819, "step": 300000 }, { "epoch": 0.5976866313910494, "grad_norm": 0.15030725300312042, "learning_rate": 0.002, "loss": 2.5648, "step": 300010 }, { "epoch": 0.5977065536146883, "grad_norm": 0.16159285604953766, "learning_rate": 0.002, "loss": 2.5573, "step": 300020 }, { "epoch": 0.5977264758383272, "grad_norm": 0.18733564019203186, "learning_rate": 0.002, "loss": 2.5594, "step": 300030 }, { "epoch": 0.5977463980619661, "grad_norm": 0.20266136527061462, "learning_rate": 0.002, "loss": 2.5628, "step": 300040 }, { "epoch": 0.597766320285605, "grad_norm": 0.14214830100536346, "learning_rate": 0.002, "loss": 2.5539, "step": 300050 }, { "epoch": 0.597786242509244, "grad_norm": 0.1516256034374237, "learning_rate": 0.002, "loss": 2.5424, "step": 300060 }, { "epoch": 0.5978061647328828, "grad_norm": 0.17351597547531128, "learning_rate": 0.002, "loss": 2.5544, "step": 300070 }, { "epoch": 0.5978260869565217, "grad_norm": 0.1808432787656784, "learning_rate": 0.002, "loss": 2.5595, "step": 300080 }, { "epoch": 0.5978460091801606, "grad_norm": 0.1595190167427063, "learning_rate": 0.002, "loss": 2.5589, "step": 300090 }, { "epoch": 0.5978659314037996, "grad_norm": 0.15741319954395294, "learning_rate": 0.002, "loss": 2.5661, "step": 300100 }, { "epoch": 0.5978858536274385, "grad_norm": 0.2040053904056549, "learning_rate": 0.002, "loss": 2.5528, "step": 300110 }, { "epoch": 0.5979057758510774, "grad_norm": 0.15805920958518982, "learning_rate": 0.002, "loss": 2.5442, "step": 300120 }, { "epoch": 0.5979256980747163, "grad_norm": 0.15170058608055115, "learning_rate": 0.002, "loss": 2.5566, "step": 300130 }, { "epoch": 0.5979456202983552, "grad_norm": 0.18060928583145142, "learning_rate": 0.002, "loss": 2.5463, "step": 300140 }, { "epoch": 0.5979655425219942, "grad_norm": 0.16492469608783722, "learning_rate": 0.002, "loss": 2.5514, "step": 300150 }, { "epoch": 0.5979854647456331, "grad_norm": 0.17326977849006653, "learning_rate": 0.002, "loss": 2.5661, "step": 300160 }, { "epoch": 0.598005386969272, "grad_norm": 0.16976948082447052, "learning_rate": 0.002, "loss": 2.5571, "step": 300170 }, { "epoch": 0.5980253091929109, "grad_norm": 0.15302631258964539, "learning_rate": 0.002, "loss": 2.5679, "step": 300180 }, { "epoch": 0.5980452314165497, "grad_norm": 0.18569692969322205, "learning_rate": 0.002, "loss": 2.5433, "step": 300190 }, { "epoch": 0.5980651536401888, "grad_norm": 0.16019290685653687, "learning_rate": 0.002, "loss": 2.5463, "step": 300200 }, { "epoch": 0.5980850758638276, "grad_norm": 0.1475725620985031, "learning_rate": 0.002, "loss": 2.5574, "step": 300210 }, { "epoch": 0.5981049980874665, "grad_norm": 0.15368005633354187, "learning_rate": 0.002, "loss": 2.5546, "step": 300220 }, { "epoch": 0.5981249203111054, "grad_norm": 0.15924187004566193, "learning_rate": 0.002, "loss": 2.5585, "step": 300230 }, { "epoch": 0.5981448425347443, "grad_norm": 0.146748349070549, "learning_rate": 0.002, "loss": 2.5601, "step": 300240 }, { "epoch": 0.5981647647583833, "grad_norm": 0.1549362689256668, "learning_rate": 0.002, "loss": 2.5558, "step": 300250 }, { "epoch": 0.5981846869820222, "grad_norm": 0.20672500133514404, "learning_rate": 0.002, "loss": 2.5436, "step": 300260 }, { "epoch": 0.5982046092056611, "grad_norm": 0.18108311295509338, "learning_rate": 0.002, "loss": 2.5467, "step": 300270 }, { "epoch": 0.5982245314293, "grad_norm": 0.16949033737182617, "learning_rate": 0.002, "loss": 2.536, "step": 300280 }, { "epoch": 0.5982444536529389, "grad_norm": 0.15328967571258545, "learning_rate": 0.002, "loss": 2.5509, "step": 300290 }, { "epoch": 0.5982643758765779, "grad_norm": 0.17137615382671356, "learning_rate": 0.002, "loss": 2.5568, "step": 300300 }, { "epoch": 0.5982842981002168, "grad_norm": 0.15919743478298187, "learning_rate": 0.002, "loss": 2.5595, "step": 300310 }, { "epoch": 0.5983042203238557, "grad_norm": 0.1591569185256958, "learning_rate": 0.002, "loss": 2.5589, "step": 300320 }, { "epoch": 0.5983241425474946, "grad_norm": 0.17126931250095367, "learning_rate": 0.002, "loss": 2.5568, "step": 300330 }, { "epoch": 0.5983440647711334, "grad_norm": 0.18504220247268677, "learning_rate": 0.002, "loss": 2.5563, "step": 300340 }, { "epoch": 0.5983639869947724, "grad_norm": 0.1662115752696991, "learning_rate": 0.002, "loss": 2.5497, "step": 300350 }, { "epoch": 0.5983839092184113, "grad_norm": 0.15092724561691284, "learning_rate": 0.002, "loss": 2.5501, "step": 300360 }, { "epoch": 0.5984038314420502, "grad_norm": 0.17256790399551392, "learning_rate": 0.002, "loss": 2.5465, "step": 300370 }, { "epoch": 0.5984237536656891, "grad_norm": 0.17047765851020813, "learning_rate": 0.002, "loss": 2.5581, "step": 300380 }, { "epoch": 0.598443675889328, "grad_norm": 0.163407564163208, "learning_rate": 0.002, "loss": 2.5701, "step": 300390 }, { "epoch": 0.598463598112967, "grad_norm": 0.15196490287780762, "learning_rate": 0.002, "loss": 2.5408, "step": 300400 }, { "epoch": 0.5984835203366059, "grad_norm": 0.18511633574962616, "learning_rate": 0.002, "loss": 2.5546, "step": 300410 }, { "epoch": 0.5985034425602448, "grad_norm": 0.16440071165561676, "learning_rate": 0.002, "loss": 2.5415, "step": 300420 }, { "epoch": 0.5985233647838837, "grad_norm": 0.16732749342918396, "learning_rate": 0.002, "loss": 2.5494, "step": 300430 }, { "epoch": 0.5985432870075227, "grad_norm": 0.16600993275642395, "learning_rate": 0.002, "loss": 2.5552, "step": 300440 }, { "epoch": 0.5985632092311616, "grad_norm": 0.166605606675148, "learning_rate": 0.002, "loss": 2.5732, "step": 300450 }, { "epoch": 0.5985831314548005, "grad_norm": 0.16034381091594696, "learning_rate": 0.002, "loss": 2.5511, "step": 300460 }, { "epoch": 0.5986030536784394, "grad_norm": 0.1626977175474167, "learning_rate": 0.002, "loss": 2.5615, "step": 300470 }, { "epoch": 0.5986229759020782, "grad_norm": 0.15729135274887085, "learning_rate": 0.002, "loss": 2.5653, "step": 300480 }, { "epoch": 0.5986428981257172, "grad_norm": 0.15729673206806183, "learning_rate": 0.002, "loss": 2.5578, "step": 300490 }, { "epoch": 0.5986628203493561, "grad_norm": 0.17999185621738434, "learning_rate": 0.002, "loss": 2.5442, "step": 300500 }, { "epoch": 0.598682742572995, "grad_norm": 0.14590011537075043, "learning_rate": 0.002, "loss": 2.5452, "step": 300510 }, { "epoch": 0.5987026647966339, "grad_norm": 0.19941969215869904, "learning_rate": 0.002, "loss": 2.5415, "step": 300520 }, { "epoch": 0.5987225870202728, "grad_norm": 0.16777372360229492, "learning_rate": 0.002, "loss": 2.5663, "step": 300530 }, { "epoch": 0.5987425092439118, "grad_norm": 0.17011038959026337, "learning_rate": 0.002, "loss": 2.5515, "step": 300540 }, { "epoch": 0.5987624314675507, "grad_norm": 0.18833178281784058, "learning_rate": 0.002, "loss": 2.5599, "step": 300550 }, { "epoch": 0.5987823536911896, "grad_norm": 0.18661396205425262, "learning_rate": 0.002, "loss": 2.5447, "step": 300560 }, { "epoch": 0.5988022759148285, "grad_norm": 0.1810036450624466, "learning_rate": 0.002, "loss": 2.5599, "step": 300570 }, { "epoch": 0.5988221981384674, "grad_norm": 0.13932184875011444, "learning_rate": 0.002, "loss": 2.5564, "step": 300580 }, { "epoch": 0.5988421203621064, "grad_norm": 0.17265073955059052, "learning_rate": 0.002, "loss": 2.5528, "step": 300590 }, { "epoch": 0.5988620425857453, "grad_norm": 0.15258029103279114, "learning_rate": 0.002, "loss": 2.5536, "step": 300600 }, { "epoch": 0.5988819648093842, "grad_norm": 0.1536087542772293, "learning_rate": 0.002, "loss": 2.5411, "step": 300610 }, { "epoch": 0.598901887033023, "grad_norm": 0.18476863205432892, "learning_rate": 0.002, "loss": 2.5694, "step": 300620 }, { "epoch": 0.5989218092566619, "grad_norm": 0.19186583161354065, "learning_rate": 0.002, "loss": 2.5584, "step": 300630 }, { "epoch": 0.5989417314803009, "grad_norm": 0.18320250511169434, "learning_rate": 0.002, "loss": 2.5432, "step": 300640 }, { "epoch": 0.5989616537039398, "grad_norm": 0.17695674300193787, "learning_rate": 0.002, "loss": 2.5832, "step": 300650 }, { "epoch": 0.5989815759275787, "grad_norm": 0.14161084592342377, "learning_rate": 0.002, "loss": 2.5588, "step": 300660 }, { "epoch": 0.5990014981512176, "grad_norm": 0.17711974680423737, "learning_rate": 0.002, "loss": 2.5645, "step": 300670 }, { "epoch": 0.5990214203748565, "grad_norm": 0.17311769723892212, "learning_rate": 0.002, "loss": 2.5454, "step": 300680 }, { "epoch": 0.5990413425984955, "grad_norm": 0.14492729306221008, "learning_rate": 0.002, "loss": 2.5625, "step": 300690 }, { "epoch": 0.5990612648221344, "grad_norm": 0.16362455487251282, "learning_rate": 0.002, "loss": 2.5744, "step": 300700 }, { "epoch": 0.5990811870457733, "grad_norm": 0.21745970845222473, "learning_rate": 0.002, "loss": 2.5634, "step": 300710 }, { "epoch": 0.5991011092694122, "grad_norm": 0.1602119505405426, "learning_rate": 0.002, "loss": 2.547, "step": 300720 }, { "epoch": 0.5991210314930512, "grad_norm": 0.15592196583747864, "learning_rate": 0.002, "loss": 2.5538, "step": 300730 }, { "epoch": 0.5991409537166901, "grad_norm": 0.17546866834163666, "learning_rate": 0.002, "loss": 2.5511, "step": 300740 }, { "epoch": 0.599160875940329, "grad_norm": 0.1829276829957962, "learning_rate": 0.002, "loss": 2.5575, "step": 300750 }, { "epoch": 0.5991807981639679, "grad_norm": 0.18161705136299133, "learning_rate": 0.002, "loss": 2.5667, "step": 300760 }, { "epoch": 0.5992007203876067, "grad_norm": 0.18724195659160614, "learning_rate": 0.002, "loss": 2.5509, "step": 300770 }, { "epoch": 0.5992206426112457, "grad_norm": 0.16115131974220276, "learning_rate": 0.002, "loss": 2.5434, "step": 300780 }, { "epoch": 0.5992405648348846, "grad_norm": 0.1737378090620041, "learning_rate": 0.002, "loss": 2.5624, "step": 300790 }, { "epoch": 0.5992604870585235, "grad_norm": 0.16556662321090698, "learning_rate": 0.002, "loss": 2.5585, "step": 300800 }, { "epoch": 0.5992804092821624, "grad_norm": 0.189313143491745, "learning_rate": 0.002, "loss": 2.5437, "step": 300810 }, { "epoch": 0.5993003315058013, "grad_norm": 0.16933368146419525, "learning_rate": 0.002, "loss": 2.5584, "step": 300820 }, { "epoch": 0.5993202537294403, "grad_norm": 0.16652634739875793, "learning_rate": 0.002, "loss": 2.571, "step": 300830 }, { "epoch": 0.5993401759530792, "grad_norm": 0.17245370149612427, "learning_rate": 0.002, "loss": 2.556, "step": 300840 }, { "epoch": 0.5993600981767181, "grad_norm": 0.17984095215797424, "learning_rate": 0.002, "loss": 2.553, "step": 300850 }, { "epoch": 0.599380020400357, "grad_norm": 0.1666453778743744, "learning_rate": 0.002, "loss": 2.569, "step": 300860 }, { "epoch": 0.5993999426239959, "grad_norm": 0.15860705077648163, "learning_rate": 0.002, "loss": 2.5435, "step": 300870 }, { "epoch": 0.5994198648476349, "grad_norm": 0.21802745759487152, "learning_rate": 0.002, "loss": 2.5622, "step": 300880 }, { "epoch": 0.5994397870712738, "grad_norm": 0.17875410616397858, "learning_rate": 0.002, "loss": 2.572, "step": 300890 }, { "epoch": 0.5994597092949127, "grad_norm": 0.16070939600467682, "learning_rate": 0.002, "loss": 2.5789, "step": 300900 }, { "epoch": 0.5994796315185515, "grad_norm": 0.149241641163826, "learning_rate": 0.002, "loss": 2.559, "step": 300910 }, { "epoch": 0.5994995537421904, "grad_norm": 0.18409892916679382, "learning_rate": 0.002, "loss": 2.5627, "step": 300920 }, { "epoch": 0.5995194759658294, "grad_norm": 0.21521878242492676, "learning_rate": 0.002, "loss": 2.5582, "step": 300930 }, { "epoch": 0.5995393981894683, "grad_norm": 0.14654618501663208, "learning_rate": 0.002, "loss": 2.5472, "step": 300940 }, { "epoch": 0.5995593204131072, "grad_norm": 0.15549255907535553, "learning_rate": 0.002, "loss": 2.5624, "step": 300950 }, { "epoch": 0.5995792426367461, "grad_norm": 0.14458461105823517, "learning_rate": 0.002, "loss": 2.5601, "step": 300960 }, { "epoch": 0.599599164860385, "grad_norm": 0.1730610728263855, "learning_rate": 0.002, "loss": 2.5647, "step": 300970 }, { "epoch": 0.599619087084024, "grad_norm": 0.1896371692419052, "learning_rate": 0.002, "loss": 2.5505, "step": 300980 }, { "epoch": 0.5996390093076629, "grad_norm": 0.22002065181732178, "learning_rate": 0.002, "loss": 2.5391, "step": 300990 }, { "epoch": 0.5996589315313018, "grad_norm": 0.1660614013671875, "learning_rate": 0.002, "loss": 2.5568, "step": 301000 }, { "epoch": 0.5996788537549407, "grad_norm": 0.17025986313819885, "learning_rate": 0.002, "loss": 2.5608, "step": 301010 }, { "epoch": 0.5996987759785797, "grad_norm": 0.18873752653598785, "learning_rate": 0.002, "loss": 2.5641, "step": 301020 }, { "epoch": 0.5997186982022186, "grad_norm": 0.16812793910503387, "learning_rate": 0.002, "loss": 2.5551, "step": 301030 }, { "epoch": 0.5997386204258575, "grad_norm": 0.15600042045116425, "learning_rate": 0.002, "loss": 2.5615, "step": 301040 }, { "epoch": 0.5997585426494964, "grad_norm": 0.17381754517555237, "learning_rate": 0.002, "loss": 2.5645, "step": 301050 }, { "epoch": 0.5997784648731352, "grad_norm": 0.18024501204490662, "learning_rate": 0.002, "loss": 2.5578, "step": 301060 }, { "epoch": 0.5997983870967742, "grad_norm": 0.1634833663702011, "learning_rate": 0.002, "loss": 2.5435, "step": 301070 }, { "epoch": 0.5998183093204131, "grad_norm": 0.14329396188259125, "learning_rate": 0.002, "loss": 2.5505, "step": 301080 }, { "epoch": 0.599838231544052, "grad_norm": 0.20275047421455383, "learning_rate": 0.002, "loss": 2.5554, "step": 301090 }, { "epoch": 0.5998581537676909, "grad_norm": 0.19915813207626343, "learning_rate": 0.002, "loss": 2.5527, "step": 301100 }, { "epoch": 0.5998780759913298, "grad_norm": 0.1762954443693161, "learning_rate": 0.002, "loss": 2.5595, "step": 301110 }, { "epoch": 0.5998979982149688, "grad_norm": 0.1511906087398529, "learning_rate": 0.002, "loss": 2.5407, "step": 301120 }, { "epoch": 0.5999179204386077, "grad_norm": 0.14923420548439026, "learning_rate": 0.002, "loss": 2.5559, "step": 301130 }, { "epoch": 0.5999378426622466, "grad_norm": 0.15283843874931335, "learning_rate": 0.002, "loss": 2.5478, "step": 301140 }, { "epoch": 0.5999577648858855, "grad_norm": 0.17567816376686096, "learning_rate": 0.002, "loss": 2.5555, "step": 301150 }, { "epoch": 0.5999776871095244, "grad_norm": 0.18130135536193848, "learning_rate": 0.002, "loss": 2.5454, "step": 301160 }, { "epoch": 0.5999976093331634, "grad_norm": 0.17754995822906494, "learning_rate": 0.002, "loss": 2.5472, "step": 301170 }, { "epoch": 0.6000175315568023, "grad_norm": 0.1641649454832077, "learning_rate": 0.002, "loss": 2.5643, "step": 301180 }, { "epoch": 0.6000374537804412, "grad_norm": 0.15891563892364502, "learning_rate": 0.002, "loss": 2.5592, "step": 301190 }, { "epoch": 0.60005737600408, "grad_norm": 0.15675415098667145, "learning_rate": 0.002, "loss": 2.5556, "step": 301200 }, { "epoch": 0.6000772982277189, "grad_norm": 0.20393510162830353, "learning_rate": 0.002, "loss": 2.5568, "step": 301210 }, { "epoch": 0.6000972204513579, "grad_norm": 0.14989899098873138, "learning_rate": 0.002, "loss": 2.5555, "step": 301220 }, { "epoch": 0.6001171426749968, "grad_norm": 0.179627925157547, "learning_rate": 0.002, "loss": 2.5452, "step": 301230 }, { "epoch": 0.6001370648986357, "grad_norm": 0.19493074715137482, "learning_rate": 0.002, "loss": 2.5448, "step": 301240 }, { "epoch": 0.6001569871222746, "grad_norm": 0.1724182665348053, "learning_rate": 0.002, "loss": 2.5605, "step": 301250 }, { "epoch": 0.6001769093459135, "grad_norm": 0.16965903341770172, "learning_rate": 0.002, "loss": 2.551, "step": 301260 }, { "epoch": 0.6001968315695525, "grad_norm": 0.15649670362472534, "learning_rate": 0.002, "loss": 2.5558, "step": 301270 }, { "epoch": 0.6002167537931914, "grad_norm": 0.19436447322368622, "learning_rate": 0.002, "loss": 2.5472, "step": 301280 }, { "epoch": 0.6002366760168303, "grad_norm": 0.17473603785037994, "learning_rate": 0.002, "loss": 2.5495, "step": 301290 }, { "epoch": 0.6002565982404692, "grad_norm": 0.1773335337638855, "learning_rate": 0.002, "loss": 2.5687, "step": 301300 }, { "epoch": 0.6002765204641082, "grad_norm": 0.16362619400024414, "learning_rate": 0.002, "loss": 2.5578, "step": 301310 }, { "epoch": 0.6002964426877471, "grad_norm": 0.15513299405574799, "learning_rate": 0.002, "loss": 2.556, "step": 301320 }, { "epoch": 0.600316364911386, "grad_norm": 0.21253854036331177, "learning_rate": 0.002, "loss": 2.5589, "step": 301330 }, { "epoch": 0.6003362871350248, "grad_norm": 0.16040854156017303, "learning_rate": 0.002, "loss": 2.5603, "step": 301340 }, { "epoch": 0.6003562093586637, "grad_norm": 0.15341094136238098, "learning_rate": 0.002, "loss": 2.5444, "step": 301350 }, { "epoch": 0.6003761315823027, "grad_norm": 0.17148590087890625, "learning_rate": 0.002, "loss": 2.5609, "step": 301360 }, { "epoch": 0.6003960538059416, "grad_norm": 0.14669504761695862, "learning_rate": 0.002, "loss": 2.5504, "step": 301370 }, { "epoch": 0.6004159760295805, "grad_norm": 0.18011581897735596, "learning_rate": 0.002, "loss": 2.5568, "step": 301380 }, { "epoch": 0.6004358982532194, "grad_norm": 0.14974047243595123, "learning_rate": 0.002, "loss": 2.5727, "step": 301390 }, { "epoch": 0.6004558204768583, "grad_norm": 0.20618635416030884, "learning_rate": 0.002, "loss": 2.5751, "step": 301400 }, { "epoch": 0.6004757427004973, "grad_norm": 0.16975823044776917, "learning_rate": 0.002, "loss": 2.5686, "step": 301410 }, { "epoch": 0.6004956649241362, "grad_norm": 0.20696668326854706, "learning_rate": 0.002, "loss": 2.562, "step": 301420 }, { "epoch": 0.6005155871477751, "grad_norm": 0.1494944989681244, "learning_rate": 0.002, "loss": 2.5383, "step": 301430 }, { "epoch": 0.600535509371414, "grad_norm": 0.15258803963661194, "learning_rate": 0.002, "loss": 2.5579, "step": 301440 }, { "epoch": 0.6005554315950529, "grad_norm": 0.18385964632034302, "learning_rate": 0.002, "loss": 2.5481, "step": 301450 }, { "epoch": 0.6005753538186919, "grad_norm": 0.18524852395057678, "learning_rate": 0.002, "loss": 2.554, "step": 301460 }, { "epoch": 0.6005952760423308, "grad_norm": 0.1800442934036255, "learning_rate": 0.002, "loss": 2.5522, "step": 301470 }, { "epoch": 0.6006151982659697, "grad_norm": 0.1611609309911728, "learning_rate": 0.002, "loss": 2.5474, "step": 301480 }, { "epoch": 0.6006351204896085, "grad_norm": 0.16174457967281342, "learning_rate": 0.002, "loss": 2.5571, "step": 301490 }, { "epoch": 0.6006550427132474, "grad_norm": 0.18504799902439117, "learning_rate": 0.002, "loss": 2.5618, "step": 301500 }, { "epoch": 0.6006749649368864, "grad_norm": 0.1817541867494583, "learning_rate": 0.002, "loss": 2.5717, "step": 301510 }, { "epoch": 0.6006948871605253, "grad_norm": 0.13728635013103485, "learning_rate": 0.002, "loss": 2.5741, "step": 301520 }, { "epoch": 0.6007148093841642, "grad_norm": 0.15360334515571594, "learning_rate": 0.002, "loss": 2.5406, "step": 301530 }, { "epoch": 0.6007347316078031, "grad_norm": 0.1668308824300766, "learning_rate": 0.002, "loss": 2.549, "step": 301540 }, { "epoch": 0.600754653831442, "grad_norm": 0.147466778755188, "learning_rate": 0.002, "loss": 2.5644, "step": 301550 }, { "epoch": 0.600774576055081, "grad_norm": 0.16708064079284668, "learning_rate": 0.002, "loss": 2.5586, "step": 301560 }, { "epoch": 0.6007944982787199, "grad_norm": 0.15004028379917145, "learning_rate": 0.002, "loss": 2.5434, "step": 301570 }, { "epoch": 0.6008144205023588, "grad_norm": 0.17324644327163696, "learning_rate": 0.002, "loss": 2.5509, "step": 301580 }, { "epoch": 0.6008343427259977, "grad_norm": 0.2017515003681183, "learning_rate": 0.002, "loss": 2.5695, "step": 301590 }, { "epoch": 0.6008542649496367, "grad_norm": 0.15577110648155212, "learning_rate": 0.002, "loss": 2.545, "step": 301600 }, { "epoch": 0.6008741871732756, "grad_norm": 0.1393953561782837, "learning_rate": 0.002, "loss": 2.5586, "step": 301610 }, { "epoch": 0.6008941093969145, "grad_norm": 0.18011032044887543, "learning_rate": 0.002, "loss": 2.5533, "step": 301620 }, { "epoch": 0.6009140316205533, "grad_norm": 0.15899237990379333, "learning_rate": 0.002, "loss": 2.5484, "step": 301630 }, { "epoch": 0.6009339538441922, "grad_norm": 0.15844812989234924, "learning_rate": 0.002, "loss": 2.5519, "step": 301640 }, { "epoch": 0.6009538760678312, "grad_norm": 0.16172868013381958, "learning_rate": 0.002, "loss": 2.5533, "step": 301650 }, { "epoch": 0.6009737982914701, "grad_norm": 0.17529501020908356, "learning_rate": 0.002, "loss": 2.566, "step": 301660 }, { "epoch": 0.600993720515109, "grad_norm": 0.16751191020011902, "learning_rate": 0.002, "loss": 2.5555, "step": 301670 }, { "epoch": 0.6010136427387479, "grad_norm": 0.1718350350856781, "learning_rate": 0.002, "loss": 2.5533, "step": 301680 }, { "epoch": 0.6010335649623868, "grad_norm": 0.19174516201019287, "learning_rate": 0.002, "loss": 2.5461, "step": 301690 }, { "epoch": 0.6010534871860258, "grad_norm": 0.15595270693302155, "learning_rate": 0.002, "loss": 2.5516, "step": 301700 }, { "epoch": 0.6010734094096647, "grad_norm": 0.19957907497882843, "learning_rate": 0.002, "loss": 2.5499, "step": 301710 }, { "epoch": 0.6010933316333036, "grad_norm": 0.15838894248008728, "learning_rate": 0.002, "loss": 2.5592, "step": 301720 }, { "epoch": 0.6011132538569425, "grad_norm": 0.17949563264846802, "learning_rate": 0.002, "loss": 2.5568, "step": 301730 }, { "epoch": 0.6011331760805814, "grad_norm": 0.1756603717803955, "learning_rate": 0.002, "loss": 2.5633, "step": 301740 }, { "epoch": 0.6011530983042204, "grad_norm": 0.15859876573085785, "learning_rate": 0.002, "loss": 2.5656, "step": 301750 }, { "epoch": 0.6011730205278593, "grad_norm": 0.1635836660861969, "learning_rate": 0.002, "loss": 2.5606, "step": 301760 }, { "epoch": 0.6011929427514981, "grad_norm": 0.14291594922542572, "learning_rate": 0.002, "loss": 2.5651, "step": 301770 }, { "epoch": 0.601212864975137, "grad_norm": 0.16535243391990662, "learning_rate": 0.002, "loss": 2.5508, "step": 301780 }, { "epoch": 0.6012327871987759, "grad_norm": 0.1337197721004486, "learning_rate": 0.002, "loss": 2.5543, "step": 301790 }, { "epoch": 0.6012527094224149, "grad_norm": 0.16254597902297974, "learning_rate": 0.002, "loss": 2.542, "step": 301800 }, { "epoch": 0.6012726316460538, "grad_norm": 0.14948898553848267, "learning_rate": 0.002, "loss": 2.5641, "step": 301810 }, { "epoch": 0.6012925538696927, "grad_norm": 0.19310015439987183, "learning_rate": 0.002, "loss": 2.5559, "step": 301820 }, { "epoch": 0.6013124760933316, "grad_norm": 0.1738523691892624, "learning_rate": 0.002, "loss": 2.5492, "step": 301830 }, { "epoch": 0.6013323983169705, "grad_norm": 0.15777762234210968, "learning_rate": 0.002, "loss": 2.5614, "step": 301840 }, { "epoch": 0.6013523205406095, "grad_norm": 0.2117673009634018, "learning_rate": 0.002, "loss": 2.5589, "step": 301850 }, { "epoch": 0.6013722427642484, "grad_norm": 0.1639072448015213, "learning_rate": 0.002, "loss": 2.5598, "step": 301860 }, { "epoch": 0.6013921649878873, "grad_norm": 0.15942184627056122, "learning_rate": 0.002, "loss": 2.5648, "step": 301870 }, { "epoch": 0.6014120872115262, "grad_norm": 0.14179804921150208, "learning_rate": 0.002, "loss": 2.557, "step": 301880 }, { "epoch": 0.6014320094351651, "grad_norm": 0.18897728621959686, "learning_rate": 0.002, "loss": 2.5592, "step": 301890 }, { "epoch": 0.6014519316588041, "grad_norm": 0.15321855247020721, "learning_rate": 0.002, "loss": 2.5484, "step": 301900 }, { "epoch": 0.601471853882443, "grad_norm": 0.18636628985404968, "learning_rate": 0.002, "loss": 2.567, "step": 301910 }, { "epoch": 0.6014917761060818, "grad_norm": 0.1383848339319229, "learning_rate": 0.002, "loss": 2.5433, "step": 301920 }, { "epoch": 0.6015116983297207, "grad_norm": 0.19193457067012787, "learning_rate": 0.002, "loss": 2.5744, "step": 301930 }, { "epoch": 0.6015316205533597, "grad_norm": 0.13611499965190887, "learning_rate": 0.002, "loss": 2.5597, "step": 301940 }, { "epoch": 0.6015515427769986, "grad_norm": 0.14583103358745575, "learning_rate": 0.002, "loss": 2.5596, "step": 301950 }, { "epoch": 0.6015714650006375, "grad_norm": 0.14723217487335205, "learning_rate": 0.002, "loss": 2.5568, "step": 301960 }, { "epoch": 0.6015913872242764, "grad_norm": 0.17025025188922882, "learning_rate": 0.002, "loss": 2.55, "step": 301970 }, { "epoch": 0.6016113094479153, "grad_norm": 0.16228464245796204, "learning_rate": 0.002, "loss": 2.5543, "step": 301980 }, { "epoch": 0.6016312316715543, "grad_norm": 0.1689474880695343, "learning_rate": 0.002, "loss": 2.5511, "step": 301990 }, { "epoch": 0.6016511538951932, "grad_norm": 0.16680949926376343, "learning_rate": 0.002, "loss": 2.5313, "step": 302000 }, { "epoch": 0.6016710761188321, "grad_norm": 0.16336366534233093, "learning_rate": 0.002, "loss": 2.556, "step": 302010 }, { "epoch": 0.601690998342471, "grad_norm": 0.15912380814552307, "learning_rate": 0.002, "loss": 2.5626, "step": 302020 }, { "epoch": 0.6017109205661099, "grad_norm": 0.1896572709083557, "learning_rate": 0.002, "loss": 2.5609, "step": 302030 }, { "epoch": 0.6017308427897489, "grad_norm": 0.16028694808483124, "learning_rate": 0.002, "loss": 2.5528, "step": 302040 }, { "epoch": 0.6017507650133878, "grad_norm": 0.15592028200626373, "learning_rate": 0.002, "loss": 2.5521, "step": 302050 }, { "epoch": 0.6017706872370266, "grad_norm": 0.1588851660490036, "learning_rate": 0.002, "loss": 2.5555, "step": 302060 }, { "epoch": 0.6017906094606655, "grad_norm": 0.1823195368051529, "learning_rate": 0.002, "loss": 2.5646, "step": 302070 }, { "epoch": 0.6018105316843044, "grad_norm": 0.1444692462682724, "learning_rate": 0.002, "loss": 2.5508, "step": 302080 }, { "epoch": 0.6018304539079434, "grad_norm": 0.17628213763237, "learning_rate": 0.002, "loss": 2.5513, "step": 302090 }, { "epoch": 0.6018503761315823, "grad_norm": 0.19071918725967407, "learning_rate": 0.002, "loss": 2.5661, "step": 302100 }, { "epoch": 0.6018702983552212, "grad_norm": 0.14984141290187836, "learning_rate": 0.002, "loss": 2.5345, "step": 302110 }, { "epoch": 0.6018902205788601, "grad_norm": 0.19561778008937836, "learning_rate": 0.002, "loss": 2.5574, "step": 302120 }, { "epoch": 0.601910142802499, "grad_norm": 0.16867175698280334, "learning_rate": 0.002, "loss": 2.5626, "step": 302130 }, { "epoch": 0.601930065026138, "grad_norm": 0.15411360561847687, "learning_rate": 0.002, "loss": 2.5599, "step": 302140 }, { "epoch": 0.6019499872497769, "grad_norm": 0.1625533401966095, "learning_rate": 0.002, "loss": 2.5467, "step": 302150 }, { "epoch": 0.6019699094734158, "grad_norm": 0.1598273068666458, "learning_rate": 0.002, "loss": 2.5385, "step": 302160 }, { "epoch": 0.6019898316970547, "grad_norm": 0.1591506451368332, "learning_rate": 0.002, "loss": 2.5429, "step": 302170 }, { "epoch": 0.6020097539206936, "grad_norm": 0.1438159942626953, "learning_rate": 0.002, "loss": 2.556, "step": 302180 }, { "epoch": 0.6020296761443326, "grad_norm": 0.16613048315048218, "learning_rate": 0.002, "loss": 2.5513, "step": 302190 }, { "epoch": 0.6020495983679715, "grad_norm": 0.16072113811969757, "learning_rate": 0.002, "loss": 2.5622, "step": 302200 }, { "epoch": 0.6020695205916103, "grad_norm": 0.1612776815891266, "learning_rate": 0.002, "loss": 2.5412, "step": 302210 }, { "epoch": 0.6020894428152492, "grad_norm": 0.17140677571296692, "learning_rate": 0.002, "loss": 2.5573, "step": 302220 }, { "epoch": 0.6021093650388882, "grad_norm": 0.1682703197002411, "learning_rate": 0.002, "loss": 2.5595, "step": 302230 }, { "epoch": 0.6021292872625271, "grad_norm": 0.15786346793174744, "learning_rate": 0.002, "loss": 2.5599, "step": 302240 }, { "epoch": 0.602149209486166, "grad_norm": 0.1441568285226822, "learning_rate": 0.002, "loss": 2.5517, "step": 302250 }, { "epoch": 0.6021691317098049, "grad_norm": 0.16498276591300964, "learning_rate": 0.002, "loss": 2.5501, "step": 302260 }, { "epoch": 0.6021890539334438, "grad_norm": 0.15410935878753662, "learning_rate": 0.002, "loss": 2.5639, "step": 302270 }, { "epoch": 0.6022089761570828, "grad_norm": 0.16588982939720154, "learning_rate": 0.002, "loss": 2.5571, "step": 302280 }, { "epoch": 0.6022288983807217, "grad_norm": 0.18553517758846283, "learning_rate": 0.002, "loss": 2.5451, "step": 302290 }, { "epoch": 0.6022488206043606, "grad_norm": 0.16208131611347198, "learning_rate": 0.002, "loss": 2.559, "step": 302300 }, { "epoch": 0.6022687428279995, "grad_norm": 0.20039872825145721, "learning_rate": 0.002, "loss": 2.5574, "step": 302310 }, { "epoch": 0.6022886650516384, "grad_norm": 0.15473556518554688, "learning_rate": 0.002, "loss": 2.5549, "step": 302320 }, { "epoch": 0.6023085872752774, "grad_norm": 0.17841491103172302, "learning_rate": 0.002, "loss": 2.5535, "step": 302330 }, { "epoch": 0.6023285094989163, "grad_norm": 0.18304336071014404, "learning_rate": 0.002, "loss": 2.5658, "step": 302340 }, { "epoch": 0.6023484317225551, "grad_norm": 0.16631749272346497, "learning_rate": 0.002, "loss": 2.5478, "step": 302350 }, { "epoch": 0.602368353946194, "grad_norm": 0.19650205969810486, "learning_rate": 0.002, "loss": 2.5671, "step": 302360 }, { "epoch": 0.6023882761698329, "grad_norm": 0.16250406205654144, "learning_rate": 0.002, "loss": 2.5487, "step": 302370 }, { "epoch": 0.6024081983934719, "grad_norm": 0.15773628652095795, "learning_rate": 0.002, "loss": 2.5635, "step": 302380 }, { "epoch": 0.6024281206171108, "grad_norm": 0.14101478457450867, "learning_rate": 0.002, "loss": 2.5535, "step": 302390 }, { "epoch": 0.6024480428407497, "grad_norm": 0.15980899333953857, "learning_rate": 0.002, "loss": 2.5704, "step": 302400 }, { "epoch": 0.6024679650643886, "grad_norm": 0.18092115223407745, "learning_rate": 0.002, "loss": 2.5672, "step": 302410 }, { "epoch": 0.6024878872880275, "grad_norm": 0.19782070815563202, "learning_rate": 0.002, "loss": 2.5538, "step": 302420 }, { "epoch": 0.6025078095116665, "grad_norm": 0.1480851024389267, "learning_rate": 0.002, "loss": 2.5583, "step": 302430 }, { "epoch": 0.6025277317353054, "grad_norm": 0.18578550219535828, "learning_rate": 0.002, "loss": 2.5581, "step": 302440 }, { "epoch": 0.6025476539589443, "grad_norm": 0.142999529838562, "learning_rate": 0.002, "loss": 2.5541, "step": 302450 }, { "epoch": 0.6025675761825832, "grad_norm": 0.1614035964012146, "learning_rate": 0.002, "loss": 2.5501, "step": 302460 }, { "epoch": 0.602587498406222, "grad_norm": 0.19431811571121216, "learning_rate": 0.002, "loss": 2.5409, "step": 302470 }, { "epoch": 0.6026074206298611, "grad_norm": 0.16077028214931488, "learning_rate": 0.002, "loss": 2.5483, "step": 302480 }, { "epoch": 0.6026273428535, "grad_norm": 0.15063920617103577, "learning_rate": 0.002, "loss": 2.5465, "step": 302490 }, { "epoch": 0.6026472650771388, "grad_norm": 0.21155676245689392, "learning_rate": 0.002, "loss": 2.5679, "step": 302500 }, { "epoch": 0.6026671873007777, "grad_norm": 0.1701548844575882, "learning_rate": 0.002, "loss": 2.5549, "step": 302510 }, { "epoch": 0.6026871095244167, "grad_norm": 0.15430854260921478, "learning_rate": 0.002, "loss": 2.5498, "step": 302520 }, { "epoch": 0.6027070317480556, "grad_norm": 0.18493883311748505, "learning_rate": 0.002, "loss": 2.5736, "step": 302530 }, { "epoch": 0.6027269539716945, "grad_norm": 0.16050498187541962, "learning_rate": 0.002, "loss": 2.5708, "step": 302540 }, { "epoch": 0.6027468761953334, "grad_norm": 0.15076705813407898, "learning_rate": 0.002, "loss": 2.5595, "step": 302550 }, { "epoch": 0.6027667984189723, "grad_norm": 0.17472873628139496, "learning_rate": 0.002, "loss": 2.5518, "step": 302560 }, { "epoch": 0.6027867206426113, "grad_norm": 0.15892980992794037, "learning_rate": 0.002, "loss": 2.5665, "step": 302570 }, { "epoch": 0.6028066428662502, "grad_norm": 0.2041492462158203, "learning_rate": 0.002, "loss": 2.5573, "step": 302580 }, { "epoch": 0.6028265650898891, "grad_norm": 0.17535285651683807, "learning_rate": 0.002, "loss": 2.5538, "step": 302590 }, { "epoch": 0.602846487313528, "grad_norm": 0.17063137888908386, "learning_rate": 0.002, "loss": 2.5609, "step": 302600 }, { "epoch": 0.6028664095371669, "grad_norm": 0.17022882401943207, "learning_rate": 0.002, "loss": 2.5429, "step": 302610 }, { "epoch": 0.6028863317608059, "grad_norm": 0.17292062938213348, "learning_rate": 0.002, "loss": 2.5584, "step": 302620 }, { "epoch": 0.6029062539844448, "grad_norm": 0.16910716891288757, "learning_rate": 0.002, "loss": 2.5512, "step": 302630 }, { "epoch": 0.6029261762080836, "grad_norm": 0.16271021962165833, "learning_rate": 0.002, "loss": 2.5484, "step": 302640 }, { "epoch": 0.6029460984317225, "grad_norm": 0.1962013691663742, "learning_rate": 0.002, "loss": 2.5523, "step": 302650 }, { "epoch": 0.6029660206553614, "grad_norm": 0.16490228474140167, "learning_rate": 0.002, "loss": 2.5564, "step": 302660 }, { "epoch": 0.6029859428790004, "grad_norm": 0.1701427549123764, "learning_rate": 0.002, "loss": 2.5478, "step": 302670 }, { "epoch": 0.6030058651026393, "grad_norm": 0.1684238463640213, "learning_rate": 0.002, "loss": 2.536, "step": 302680 }, { "epoch": 0.6030257873262782, "grad_norm": 0.1578964740037918, "learning_rate": 0.002, "loss": 2.5407, "step": 302690 }, { "epoch": 0.6030457095499171, "grad_norm": 0.16640818119049072, "learning_rate": 0.002, "loss": 2.5477, "step": 302700 }, { "epoch": 0.603065631773556, "grad_norm": 0.14404712617397308, "learning_rate": 0.002, "loss": 2.5449, "step": 302710 }, { "epoch": 0.603085553997195, "grad_norm": 0.17410506308078766, "learning_rate": 0.002, "loss": 2.552, "step": 302720 }, { "epoch": 0.6031054762208339, "grad_norm": 0.14694708585739136, "learning_rate": 0.002, "loss": 2.5616, "step": 302730 }, { "epoch": 0.6031253984444728, "grad_norm": 0.18495121598243713, "learning_rate": 0.002, "loss": 2.5706, "step": 302740 }, { "epoch": 0.6031453206681117, "grad_norm": 0.1612064391374588, "learning_rate": 0.002, "loss": 2.5612, "step": 302750 }, { "epoch": 0.6031652428917506, "grad_norm": 0.1901029348373413, "learning_rate": 0.002, "loss": 2.5697, "step": 302760 }, { "epoch": 0.6031851651153896, "grad_norm": 0.15638349950313568, "learning_rate": 0.002, "loss": 2.5494, "step": 302770 }, { "epoch": 0.6032050873390284, "grad_norm": 0.18226660788059235, "learning_rate": 0.002, "loss": 2.5486, "step": 302780 }, { "epoch": 0.6032250095626673, "grad_norm": 0.16209106147289276, "learning_rate": 0.002, "loss": 2.5585, "step": 302790 }, { "epoch": 0.6032449317863062, "grad_norm": 0.18717680871486664, "learning_rate": 0.002, "loss": 2.5414, "step": 302800 }, { "epoch": 0.6032648540099452, "grad_norm": 0.1849147081375122, "learning_rate": 0.002, "loss": 2.5719, "step": 302810 }, { "epoch": 0.6032847762335841, "grad_norm": 0.1395013928413391, "learning_rate": 0.002, "loss": 2.5521, "step": 302820 }, { "epoch": 0.603304698457223, "grad_norm": 0.1521519422531128, "learning_rate": 0.002, "loss": 2.5577, "step": 302830 }, { "epoch": 0.6033246206808619, "grad_norm": 0.15835562348365784, "learning_rate": 0.002, "loss": 2.5639, "step": 302840 }, { "epoch": 0.6033445429045008, "grad_norm": 0.15184225142002106, "learning_rate": 0.002, "loss": 2.5581, "step": 302850 }, { "epoch": 0.6033644651281398, "grad_norm": 0.1661219745874405, "learning_rate": 0.002, "loss": 2.5603, "step": 302860 }, { "epoch": 0.6033843873517787, "grad_norm": 0.18032607436180115, "learning_rate": 0.002, "loss": 2.5538, "step": 302870 }, { "epoch": 0.6034043095754176, "grad_norm": 0.14738816022872925, "learning_rate": 0.002, "loss": 2.5632, "step": 302880 }, { "epoch": 0.6034242317990565, "grad_norm": 0.16869746148586273, "learning_rate": 0.002, "loss": 2.5584, "step": 302890 }, { "epoch": 0.6034441540226954, "grad_norm": 0.21630220115184784, "learning_rate": 0.002, "loss": 2.5589, "step": 302900 }, { "epoch": 0.6034640762463344, "grad_norm": 0.18780577182769775, "learning_rate": 0.002, "loss": 2.5638, "step": 302910 }, { "epoch": 0.6034839984699732, "grad_norm": 0.15295244753360748, "learning_rate": 0.002, "loss": 2.5543, "step": 302920 }, { "epoch": 0.6035039206936121, "grad_norm": 0.17837190628051758, "learning_rate": 0.002, "loss": 2.5641, "step": 302930 }, { "epoch": 0.603523842917251, "grad_norm": 0.17164111137390137, "learning_rate": 0.002, "loss": 2.5555, "step": 302940 }, { "epoch": 0.6035437651408899, "grad_norm": 0.14191974699497223, "learning_rate": 0.002, "loss": 2.5522, "step": 302950 }, { "epoch": 0.6035636873645289, "grad_norm": 0.15415023267269135, "learning_rate": 0.002, "loss": 2.5412, "step": 302960 }, { "epoch": 0.6035836095881678, "grad_norm": 0.21103328466415405, "learning_rate": 0.002, "loss": 2.5493, "step": 302970 }, { "epoch": 0.6036035318118067, "grad_norm": 0.13520292937755585, "learning_rate": 0.002, "loss": 2.5559, "step": 302980 }, { "epoch": 0.6036234540354456, "grad_norm": 0.19563326239585876, "learning_rate": 0.002, "loss": 2.5584, "step": 302990 }, { "epoch": 0.6036433762590845, "grad_norm": 0.17345504462718964, "learning_rate": 0.002, "loss": 2.5504, "step": 303000 }, { "epoch": 0.6036632984827235, "grad_norm": 0.15327772498130798, "learning_rate": 0.002, "loss": 2.5555, "step": 303010 }, { "epoch": 0.6036832207063624, "grad_norm": 0.163053497672081, "learning_rate": 0.002, "loss": 2.5585, "step": 303020 }, { "epoch": 0.6037031429300013, "grad_norm": 0.16666430234909058, "learning_rate": 0.002, "loss": 2.5521, "step": 303030 }, { "epoch": 0.6037230651536402, "grad_norm": 0.1941719949245453, "learning_rate": 0.002, "loss": 2.5528, "step": 303040 }, { "epoch": 0.603742987377279, "grad_norm": 0.17925339937210083, "learning_rate": 0.002, "loss": 2.551, "step": 303050 }, { "epoch": 0.603762909600918, "grad_norm": 0.15048624575138092, "learning_rate": 0.002, "loss": 2.5525, "step": 303060 }, { "epoch": 0.6037828318245569, "grad_norm": 0.15943415462970734, "learning_rate": 0.002, "loss": 2.544, "step": 303070 }, { "epoch": 0.6038027540481958, "grad_norm": 0.13921195268630981, "learning_rate": 0.002, "loss": 2.5619, "step": 303080 }, { "epoch": 0.6038226762718347, "grad_norm": 0.18397681415081024, "learning_rate": 0.002, "loss": 2.5488, "step": 303090 }, { "epoch": 0.6038425984954737, "grad_norm": 0.15684011578559875, "learning_rate": 0.002, "loss": 2.5414, "step": 303100 }, { "epoch": 0.6038625207191126, "grad_norm": 0.14870378375053406, "learning_rate": 0.002, "loss": 2.551, "step": 303110 }, { "epoch": 0.6038824429427515, "grad_norm": 0.1699480563402176, "learning_rate": 0.002, "loss": 2.5691, "step": 303120 }, { "epoch": 0.6039023651663904, "grad_norm": 0.19445523619651794, "learning_rate": 0.002, "loss": 2.5597, "step": 303130 }, { "epoch": 0.6039222873900293, "grad_norm": 0.14644575119018555, "learning_rate": 0.002, "loss": 2.5489, "step": 303140 }, { "epoch": 0.6039422096136683, "grad_norm": 0.17992085218429565, "learning_rate": 0.002, "loss": 2.5681, "step": 303150 }, { "epoch": 0.6039621318373072, "grad_norm": 0.15905572474002838, "learning_rate": 0.002, "loss": 2.5589, "step": 303160 }, { "epoch": 0.6039820540609461, "grad_norm": 0.17232577502727509, "learning_rate": 0.002, "loss": 2.5502, "step": 303170 }, { "epoch": 0.604001976284585, "grad_norm": 0.16599194705486298, "learning_rate": 0.002, "loss": 2.5628, "step": 303180 }, { "epoch": 0.6040218985082239, "grad_norm": 0.20520010590553284, "learning_rate": 0.002, "loss": 2.5511, "step": 303190 }, { "epoch": 0.6040418207318629, "grad_norm": 0.1595698595046997, "learning_rate": 0.002, "loss": 2.5255, "step": 303200 }, { "epoch": 0.6040617429555017, "grad_norm": 0.1400841474533081, "learning_rate": 0.002, "loss": 2.5697, "step": 303210 }, { "epoch": 0.6040816651791406, "grad_norm": 0.19690369069576263, "learning_rate": 0.002, "loss": 2.5512, "step": 303220 }, { "epoch": 0.6041015874027795, "grad_norm": 0.18925607204437256, "learning_rate": 0.002, "loss": 2.5591, "step": 303230 }, { "epoch": 0.6041215096264184, "grad_norm": 0.15428848564624786, "learning_rate": 0.002, "loss": 2.5572, "step": 303240 }, { "epoch": 0.6041414318500574, "grad_norm": 0.15901164710521698, "learning_rate": 0.002, "loss": 2.5585, "step": 303250 }, { "epoch": 0.6041613540736963, "grad_norm": 0.17322856187820435, "learning_rate": 0.002, "loss": 2.5673, "step": 303260 }, { "epoch": 0.6041812762973352, "grad_norm": 0.15359678864479065, "learning_rate": 0.002, "loss": 2.5626, "step": 303270 }, { "epoch": 0.6042011985209741, "grad_norm": 0.14713101089000702, "learning_rate": 0.002, "loss": 2.5698, "step": 303280 }, { "epoch": 0.604221120744613, "grad_norm": 0.15165653824806213, "learning_rate": 0.002, "loss": 2.5599, "step": 303290 }, { "epoch": 0.604241042968252, "grad_norm": 0.15713050961494446, "learning_rate": 0.002, "loss": 2.5645, "step": 303300 }, { "epoch": 0.6042609651918909, "grad_norm": 0.14526988565921783, "learning_rate": 0.002, "loss": 2.5609, "step": 303310 }, { "epoch": 0.6042808874155298, "grad_norm": 0.1507762223482132, "learning_rate": 0.002, "loss": 2.5496, "step": 303320 }, { "epoch": 0.6043008096391687, "grad_norm": 0.15352202951908112, "learning_rate": 0.002, "loss": 2.5642, "step": 303330 }, { "epoch": 0.6043207318628075, "grad_norm": 0.154320627450943, "learning_rate": 0.002, "loss": 2.5681, "step": 303340 }, { "epoch": 0.6043406540864465, "grad_norm": 0.1473003476858139, "learning_rate": 0.002, "loss": 2.5666, "step": 303350 }, { "epoch": 0.6043605763100854, "grad_norm": 0.18844355642795563, "learning_rate": 0.002, "loss": 2.5616, "step": 303360 }, { "epoch": 0.6043804985337243, "grad_norm": 0.14250968396663666, "learning_rate": 0.002, "loss": 2.549, "step": 303370 }, { "epoch": 0.6044004207573632, "grad_norm": 0.1625708043575287, "learning_rate": 0.002, "loss": 2.5551, "step": 303380 }, { "epoch": 0.6044203429810021, "grad_norm": 0.15027591586112976, "learning_rate": 0.002, "loss": 2.5561, "step": 303390 }, { "epoch": 0.6044402652046411, "grad_norm": 0.1458738148212433, "learning_rate": 0.002, "loss": 2.5559, "step": 303400 }, { "epoch": 0.60446018742828, "grad_norm": 0.17551976442337036, "learning_rate": 0.002, "loss": 2.5522, "step": 303410 }, { "epoch": 0.6044801096519189, "grad_norm": 0.15890273451805115, "learning_rate": 0.002, "loss": 2.5537, "step": 303420 }, { "epoch": 0.6045000318755578, "grad_norm": 0.198329359292984, "learning_rate": 0.002, "loss": 2.5664, "step": 303430 }, { "epoch": 0.6045199540991968, "grad_norm": 0.15072885155677795, "learning_rate": 0.002, "loss": 2.5676, "step": 303440 }, { "epoch": 0.6045398763228357, "grad_norm": 0.1466173678636551, "learning_rate": 0.002, "loss": 2.5632, "step": 303450 }, { "epoch": 0.6045597985464746, "grad_norm": 0.1516270488500595, "learning_rate": 0.002, "loss": 2.5591, "step": 303460 }, { "epoch": 0.6045797207701135, "grad_norm": 0.1514076590538025, "learning_rate": 0.002, "loss": 2.5504, "step": 303470 }, { "epoch": 0.6045996429937524, "grad_norm": 0.16637447476387024, "learning_rate": 0.002, "loss": 2.5556, "step": 303480 }, { "epoch": 0.6046195652173914, "grad_norm": 0.15790851414203644, "learning_rate": 0.002, "loss": 2.5656, "step": 303490 }, { "epoch": 0.6046394874410302, "grad_norm": 0.2011549323797226, "learning_rate": 0.002, "loss": 2.5634, "step": 303500 }, { "epoch": 0.6046594096646691, "grad_norm": 0.15816570818424225, "learning_rate": 0.002, "loss": 2.5633, "step": 303510 }, { "epoch": 0.604679331888308, "grad_norm": 0.13825881481170654, "learning_rate": 0.002, "loss": 2.5398, "step": 303520 }, { "epoch": 0.6046992541119469, "grad_norm": 0.15640418231487274, "learning_rate": 0.002, "loss": 2.5446, "step": 303530 }, { "epoch": 0.6047191763355859, "grad_norm": 0.16079632937908173, "learning_rate": 0.002, "loss": 2.5725, "step": 303540 }, { "epoch": 0.6047390985592248, "grad_norm": 0.18475288152694702, "learning_rate": 0.002, "loss": 2.574, "step": 303550 }, { "epoch": 0.6047590207828637, "grad_norm": 0.17500720918178558, "learning_rate": 0.002, "loss": 2.554, "step": 303560 }, { "epoch": 0.6047789430065026, "grad_norm": 0.18435753881931305, "learning_rate": 0.002, "loss": 2.5397, "step": 303570 }, { "epoch": 0.6047988652301415, "grad_norm": 0.1567080169916153, "learning_rate": 0.002, "loss": 2.5377, "step": 303580 }, { "epoch": 0.6048187874537805, "grad_norm": 0.17303426563739777, "learning_rate": 0.002, "loss": 2.5464, "step": 303590 }, { "epoch": 0.6048387096774194, "grad_norm": 0.17080751061439514, "learning_rate": 0.002, "loss": 2.5476, "step": 303600 }, { "epoch": 0.6048586319010583, "grad_norm": 0.21363306045532227, "learning_rate": 0.002, "loss": 2.5664, "step": 303610 }, { "epoch": 0.6048785541246972, "grad_norm": 0.1806132048368454, "learning_rate": 0.002, "loss": 2.5513, "step": 303620 }, { "epoch": 0.604898476348336, "grad_norm": 0.22659176588058472, "learning_rate": 0.002, "loss": 2.5649, "step": 303630 }, { "epoch": 0.604918398571975, "grad_norm": 0.1486613005399704, "learning_rate": 0.002, "loss": 2.5729, "step": 303640 }, { "epoch": 0.6049383207956139, "grad_norm": 0.17720897495746613, "learning_rate": 0.002, "loss": 2.5576, "step": 303650 }, { "epoch": 0.6049582430192528, "grad_norm": 0.18817636370658875, "learning_rate": 0.002, "loss": 2.5517, "step": 303660 }, { "epoch": 0.6049781652428917, "grad_norm": 0.1352108269929886, "learning_rate": 0.002, "loss": 2.5639, "step": 303670 }, { "epoch": 0.6049980874665306, "grad_norm": 0.1772485077381134, "learning_rate": 0.002, "loss": 2.5725, "step": 303680 }, { "epoch": 0.6050180096901696, "grad_norm": 0.16509412229061127, "learning_rate": 0.002, "loss": 2.5498, "step": 303690 }, { "epoch": 0.6050379319138085, "grad_norm": 0.17635616660118103, "learning_rate": 0.002, "loss": 2.5509, "step": 303700 }, { "epoch": 0.6050578541374474, "grad_norm": 0.17142747342586517, "learning_rate": 0.002, "loss": 2.5521, "step": 303710 }, { "epoch": 0.6050777763610863, "grad_norm": 0.15880674123764038, "learning_rate": 0.002, "loss": 2.5596, "step": 303720 }, { "epoch": 0.6050976985847253, "grad_norm": 0.151389017701149, "learning_rate": 0.002, "loss": 2.5611, "step": 303730 }, { "epoch": 0.6051176208083642, "grad_norm": 0.17084673047065735, "learning_rate": 0.002, "loss": 2.5585, "step": 303740 }, { "epoch": 0.6051375430320031, "grad_norm": 0.16476748883724213, "learning_rate": 0.002, "loss": 2.5682, "step": 303750 }, { "epoch": 0.605157465255642, "grad_norm": 0.17310655117034912, "learning_rate": 0.002, "loss": 2.5531, "step": 303760 }, { "epoch": 0.6051773874792808, "grad_norm": 0.1560325175523758, "learning_rate": 0.002, "loss": 2.555, "step": 303770 }, { "epoch": 0.6051973097029199, "grad_norm": 0.1682071089744568, "learning_rate": 0.002, "loss": 2.553, "step": 303780 }, { "epoch": 0.6052172319265587, "grad_norm": 0.17553499341011047, "learning_rate": 0.002, "loss": 2.5686, "step": 303790 }, { "epoch": 0.6052371541501976, "grad_norm": 0.17335954308509827, "learning_rate": 0.002, "loss": 2.5551, "step": 303800 }, { "epoch": 0.6052570763738365, "grad_norm": 0.16349340975284576, "learning_rate": 0.002, "loss": 2.5539, "step": 303810 }, { "epoch": 0.6052769985974754, "grad_norm": 0.17280109226703644, "learning_rate": 0.002, "loss": 2.548, "step": 303820 }, { "epoch": 0.6052969208211144, "grad_norm": 0.1581880748271942, "learning_rate": 0.002, "loss": 2.5528, "step": 303830 }, { "epoch": 0.6053168430447533, "grad_norm": 0.1608557403087616, "learning_rate": 0.002, "loss": 2.5648, "step": 303840 }, { "epoch": 0.6053367652683922, "grad_norm": 0.18524514138698578, "learning_rate": 0.002, "loss": 2.5508, "step": 303850 }, { "epoch": 0.6053566874920311, "grad_norm": 0.15027841925621033, "learning_rate": 0.002, "loss": 2.5461, "step": 303860 }, { "epoch": 0.60537660971567, "grad_norm": 0.19624409079551697, "learning_rate": 0.002, "loss": 2.5613, "step": 303870 }, { "epoch": 0.605396531939309, "grad_norm": 0.17163315415382385, "learning_rate": 0.002, "loss": 2.5393, "step": 303880 }, { "epoch": 0.6054164541629479, "grad_norm": 0.18978284299373627, "learning_rate": 0.002, "loss": 2.5471, "step": 303890 }, { "epoch": 0.6054363763865868, "grad_norm": 0.21119342744350433, "learning_rate": 0.002, "loss": 2.5535, "step": 303900 }, { "epoch": 0.6054562986102257, "grad_norm": 0.15266259014606476, "learning_rate": 0.002, "loss": 2.5456, "step": 303910 }, { "epoch": 0.6054762208338645, "grad_norm": 0.15278089046478271, "learning_rate": 0.002, "loss": 2.5548, "step": 303920 }, { "epoch": 0.6054961430575035, "grad_norm": 0.18066519498825073, "learning_rate": 0.002, "loss": 2.5564, "step": 303930 }, { "epoch": 0.6055160652811424, "grad_norm": 0.19919930398464203, "learning_rate": 0.002, "loss": 2.5891, "step": 303940 }, { "epoch": 0.6055359875047813, "grad_norm": 0.1541571468114853, "learning_rate": 0.002, "loss": 2.5608, "step": 303950 }, { "epoch": 0.6055559097284202, "grad_norm": 0.14509499073028564, "learning_rate": 0.002, "loss": 2.5544, "step": 303960 }, { "epoch": 0.6055758319520591, "grad_norm": 0.15988567471504211, "learning_rate": 0.002, "loss": 2.5419, "step": 303970 }, { "epoch": 0.6055957541756981, "grad_norm": 0.18896791338920593, "learning_rate": 0.002, "loss": 2.5739, "step": 303980 }, { "epoch": 0.605615676399337, "grad_norm": 0.14785827696323395, "learning_rate": 0.002, "loss": 2.5574, "step": 303990 }, { "epoch": 0.6056355986229759, "grad_norm": 0.1923399120569229, "learning_rate": 0.002, "loss": 2.5544, "step": 304000 }, { "epoch": 0.6056555208466148, "grad_norm": 0.15808440744876862, "learning_rate": 0.002, "loss": 2.5692, "step": 304010 }, { "epoch": 0.6056754430702538, "grad_norm": 0.18930085003376007, "learning_rate": 0.002, "loss": 2.5551, "step": 304020 }, { "epoch": 0.6056953652938927, "grad_norm": 0.16687129437923431, "learning_rate": 0.002, "loss": 2.542, "step": 304030 }, { "epoch": 0.6057152875175316, "grad_norm": 0.16212061047554016, "learning_rate": 0.002, "loss": 2.5563, "step": 304040 }, { "epoch": 0.6057352097411705, "grad_norm": 0.1935945600271225, "learning_rate": 0.002, "loss": 2.5635, "step": 304050 }, { "epoch": 0.6057551319648093, "grad_norm": 0.14940427243709564, "learning_rate": 0.002, "loss": 2.5352, "step": 304060 }, { "epoch": 0.6057750541884483, "grad_norm": 0.15361137688159943, "learning_rate": 0.002, "loss": 2.5482, "step": 304070 }, { "epoch": 0.6057949764120872, "grad_norm": 0.1631348580121994, "learning_rate": 0.002, "loss": 2.5654, "step": 304080 }, { "epoch": 0.6058148986357261, "grad_norm": 0.1699964702129364, "learning_rate": 0.002, "loss": 2.5618, "step": 304090 }, { "epoch": 0.605834820859365, "grad_norm": 0.14842914044857025, "learning_rate": 0.002, "loss": 2.567, "step": 304100 }, { "epoch": 0.6058547430830039, "grad_norm": 0.15255731344223022, "learning_rate": 0.002, "loss": 2.5547, "step": 304110 }, { "epoch": 0.6058746653066429, "grad_norm": 0.21025298535823822, "learning_rate": 0.002, "loss": 2.5502, "step": 304120 }, { "epoch": 0.6058945875302818, "grad_norm": 0.15408532321453094, "learning_rate": 0.002, "loss": 2.5565, "step": 304130 }, { "epoch": 0.6059145097539207, "grad_norm": 0.17691892385482788, "learning_rate": 0.002, "loss": 2.5602, "step": 304140 }, { "epoch": 0.6059344319775596, "grad_norm": 0.1617371290922165, "learning_rate": 0.002, "loss": 2.557, "step": 304150 }, { "epoch": 0.6059543542011985, "grad_norm": 0.17063681781291962, "learning_rate": 0.002, "loss": 2.5629, "step": 304160 }, { "epoch": 0.6059742764248375, "grad_norm": 0.16248401999473572, "learning_rate": 0.002, "loss": 2.5579, "step": 304170 }, { "epoch": 0.6059941986484764, "grad_norm": 0.14678475260734558, "learning_rate": 0.002, "loss": 2.5492, "step": 304180 }, { "epoch": 0.6060141208721153, "grad_norm": 0.16944973170757294, "learning_rate": 0.002, "loss": 2.5664, "step": 304190 }, { "epoch": 0.6060340430957541, "grad_norm": 0.16251733899116516, "learning_rate": 0.002, "loss": 2.55, "step": 304200 }, { "epoch": 0.606053965319393, "grad_norm": 0.19883328676223755, "learning_rate": 0.002, "loss": 2.5586, "step": 304210 }, { "epoch": 0.606073887543032, "grad_norm": 0.1600322425365448, "learning_rate": 0.002, "loss": 2.5544, "step": 304220 }, { "epoch": 0.6060938097666709, "grad_norm": 0.16424906253814697, "learning_rate": 0.002, "loss": 2.5494, "step": 304230 }, { "epoch": 0.6061137319903098, "grad_norm": 0.16215157508850098, "learning_rate": 0.002, "loss": 2.538, "step": 304240 }, { "epoch": 0.6061336542139487, "grad_norm": 0.20075678825378418, "learning_rate": 0.002, "loss": 2.5776, "step": 304250 }, { "epoch": 0.6061535764375876, "grad_norm": 0.1392591893672943, "learning_rate": 0.002, "loss": 2.5575, "step": 304260 }, { "epoch": 0.6061734986612266, "grad_norm": 0.17234615981578827, "learning_rate": 0.002, "loss": 2.5509, "step": 304270 }, { "epoch": 0.6061934208848655, "grad_norm": 0.16256506741046906, "learning_rate": 0.002, "loss": 2.5614, "step": 304280 }, { "epoch": 0.6062133431085044, "grad_norm": 0.13973499834537506, "learning_rate": 0.002, "loss": 2.5674, "step": 304290 }, { "epoch": 0.6062332653321433, "grad_norm": 0.16229768097400665, "learning_rate": 0.002, "loss": 2.566, "step": 304300 }, { "epoch": 0.6062531875557823, "grad_norm": 0.2129209339618683, "learning_rate": 0.002, "loss": 2.5661, "step": 304310 }, { "epoch": 0.6062731097794212, "grad_norm": 0.17322556674480438, "learning_rate": 0.002, "loss": 2.5459, "step": 304320 }, { "epoch": 0.6062930320030601, "grad_norm": 0.20911744236946106, "learning_rate": 0.002, "loss": 2.5621, "step": 304330 }, { "epoch": 0.606312954226699, "grad_norm": 0.17795950174331665, "learning_rate": 0.002, "loss": 2.5565, "step": 304340 }, { "epoch": 0.6063328764503378, "grad_norm": 0.1712055802345276, "learning_rate": 0.002, "loss": 2.5552, "step": 304350 }, { "epoch": 0.6063527986739768, "grad_norm": 0.17974431812763214, "learning_rate": 0.002, "loss": 2.5512, "step": 304360 }, { "epoch": 0.6063727208976157, "grad_norm": 0.16340522468090057, "learning_rate": 0.002, "loss": 2.5478, "step": 304370 }, { "epoch": 0.6063926431212546, "grad_norm": 0.15694518387317657, "learning_rate": 0.002, "loss": 2.5721, "step": 304380 }, { "epoch": 0.6064125653448935, "grad_norm": 0.1731468290090561, "learning_rate": 0.002, "loss": 2.5524, "step": 304390 }, { "epoch": 0.6064324875685324, "grad_norm": 0.1861160397529602, "learning_rate": 0.002, "loss": 2.545, "step": 304400 }, { "epoch": 0.6064524097921714, "grad_norm": 0.17294949293136597, "learning_rate": 0.002, "loss": 2.5497, "step": 304410 }, { "epoch": 0.6064723320158103, "grad_norm": 0.14078779518604279, "learning_rate": 0.002, "loss": 2.5541, "step": 304420 }, { "epoch": 0.6064922542394492, "grad_norm": 0.15482482314109802, "learning_rate": 0.002, "loss": 2.5455, "step": 304430 }, { "epoch": 0.6065121764630881, "grad_norm": 0.15656860172748566, "learning_rate": 0.002, "loss": 2.5514, "step": 304440 }, { "epoch": 0.606532098686727, "grad_norm": 0.16565513610839844, "learning_rate": 0.002, "loss": 2.5601, "step": 304450 }, { "epoch": 0.606552020910366, "grad_norm": 0.19001156091690063, "learning_rate": 0.002, "loss": 2.5445, "step": 304460 }, { "epoch": 0.6065719431340049, "grad_norm": 0.15431497991085052, "learning_rate": 0.002, "loss": 2.5665, "step": 304470 }, { "epoch": 0.6065918653576438, "grad_norm": 0.15139931440353394, "learning_rate": 0.002, "loss": 2.5504, "step": 304480 }, { "epoch": 0.6066117875812826, "grad_norm": 0.16717912256717682, "learning_rate": 0.002, "loss": 2.5677, "step": 304490 }, { "epoch": 0.6066317098049215, "grad_norm": 0.17660988867282867, "learning_rate": 0.002, "loss": 2.5491, "step": 304500 }, { "epoch": 0.6066516320285605, "grad_norm": 0.19230850040912628, "learning_rate": 0.002, "loss": 2.5466, "step": 304510 }, { "epoch": 0.6066715542521994, "grad_norm": 0.14971201121807098, "learning_rate": 0.002, "loss": 2.5429, "step": 304520 }, { "epoch": 0.6066914764758383, "grad_norm": 0.15222536027431488, "learning_rate": 0.002, "loss": 2.5487, "step": 304530 }, { "epoch": 0.6067113986994772, "grad_norm": 0.15254613757133484, "learning_rate": 0.002, "loss": 2.544, "step": 304540 }, { "epoch": 0.6067313209231161, "grad_norm": 0.2044772058725357, "learning_rate": 0.002, "loss": 2.5553, "step": 304550 }, { "epoch": 0.6067512431467551, "grad_norm": 0.17527271807193756, "learning_rate": 0.002, "loss": 2.5559, "step": 304560 }, { "epoch": 0.606771165370394, "grad_norm": 0.1821158230304718, "learning_rate": 0.002, "loss": 2.5602, "step": 304570 }, { "epoch": 0.6067910875940329, "grad_norm": 0.19983655214309692, "learning_rate": 0.002, "loss": 2.5574, "step": 304580 }, { "epoch": 0.6068110098176718, "grad_norm": 0.16826710104942322, "learning_rate": 0.002, "loss": 2.5567, "step": 304590 }, { "epoch": 0.6068309320413108, "grad_norm": 0.15606406331062317, "learning_rate": 0.002, "loss": 2.5282, "step": 304600 }, { "epoch": 0.6068508542649497, "grad_norm": 0.19034187495708466, "learning_rate": 0.002, "loss": 2.5535, "step": 304610 }, { "epoch": 0.6068707764885886, "grad_norm": 0.18675851821899414, "learning_rate": 0.002, "loss": 2.5584, "step": 304620 }, { "epoch": 0.6068906987122274, "grad_norm": 0.13862919807434082, "learning_rate": 0.002, "loss": 2.5494, "step": 304630 }, { "epoch": 0.6069106209358663, "grad_norm": 0.15058675408363342, "learning_rate": 0.002, "loss": 2.5628, "step": 304640 }, { "epoch": 0.6069305431595053, "grad_norm": 0.14842496812343597, "learning_rate": 0.002, "loss": 2.5574, "step": 304650 }, { "epoch": 0.6069504653831442, "grad_norm": 0.16022524237632751, "learning_rate": 0.002, "loss": 2.5647, "step": 304660 }, { "epoch": 0.6069703876067831, "grad_norm": 0.16977955400943756, "learning_rate": 0.002, "loss": 2.5524, "step": 304670 }, { "epoch": 0.606990309830422, "grad_norm": 0.15555255115032196, "learning_rate": 0.002, "loss": 2.5541, "step": 304680 }, { "epoch": 0.6070102320540609, "grad_norm": 0.1614171266555786, "learning_rate": 0.002, "loss": 2.5451, "step": 304690 }, { "epoch": 0.6070301542776999, "grad_norm": 0.18246379494667053, "learning_rate": 0.002, "loss": 2.5521, "step": 304700 }, { "epoch": 0.6070500765013388, "grad_norm": 0.1593264788389206, "learning_rate": 0.002, "loss": 2.5661, "step": 304710 }, { "epoch": 0.6070699987249777, "grad_norm": 0.20029377937316895, "learning_rate": 0.002, "loss": 2.549, "step": 304720 }, { "epoch": 0.6070899209486166, "grad_norm": 0.13667087256908417, "learning_rate": 0.002, "loss": 2.5644, "step": 304730 }, { "epoch": 0.6071098431722555, "grad_norm": 0.18020781874656677, "learning_rate": 0.002, "loss": 2.5494, "step": 304740 }, { "epoch": 0.6071297653958945, "grad_norm": 0.16790929436683655, "learning_rate": 0.002, "loss": 2.5537, "step": 304750 }, { "epoch": 0.6071496876195334, "grad_norm": 0.16061468422412872, "learning_rate": 0.002, "loss": 2.5565, "step": 304760 }, { "epoch": 0.6071696098431723, "grad_norm": 0.13432103395462036, "learning_rate": 0.002, "loss": 2.558, "step": 304770 }, { "epoch": 0.6071895320668111, "grad_norm": 0.2016122192144394, "learning_rate": 0.002, "loss": 2.5491, "step": 304780 }, { "epoch": 0.60720945429045, "grad_norm": 0.1875040978193283, "learning_rate": 0.002, "loss": 2.5717, "step": 304790 }, { "epoch": 0.607229376514089, "grad_norm": 0.14993922412395477, "learning_rate": 0.002, "loss": 2.5478, "step": 304800 }, { "epoch": 0.6072492987377279, "grad_norm": 0.15958991646766663, "learning_rate": 0.002, "loss": 2.5472, "step": 304810 }, { "epoch": 0.6072692209613668, "grad_norm": 0.13721963763237, "learning_rate": 0.002, "loss": 2.5593, "step": 304820 }, { "epoch": 0.6072891431850057, "grad_norm": 0.17365840077400208, "learning_rate": 0.002, "loss": 2.5405, "step": 304830 }, { "epoch": 0.6073090654086446, "grad_norm": 0.1596367061138153, "learning_rate": 0.002, "loss": 2.5507, "step": 304840 }, { "epoch": 0.6073289876322836, "grad_norm": 0.18153689801692963, "learning_rate": 0.002, "loss": 2.5506, "step": 304850 }, { "epoch": 0.6073489098559225, "grad_norm": 0.17535358667373657, "learning_rate": 0.002, "loss": 2.5729, "step": 304860 }, { "epoch": 0.6073688320795614, "grad_norm": 0.2188200056552887, "learning_rate": 0.002, "loss": 2.5591, "step": 304870 }, { "epoch": 0.6073887543032003, "grad_norm": 0.37570786476135254, "learning_rate": 0.002, "loss": 2.5471, "step": 304880 }, { "epoch": 0.6074086765268393, "grad_norm": 0.1830122470855713, "learning_rate": 0.002, "loss": 2.5717, "step": 304890 }, { "epoch": 0.6074285987504782, "grad_norm": 0.2012016922235489, "learning_rate": 0.002, "loss": 2.5611, "step": 304900 }, { "epoch": 0.6074485209741171, "grad_norm": 0.170394167304039, "learning_rate": 0.002, "loss": 2.5543, "step": 304910 }, { "epoch": 0.607468443197756, "grad_norm": 0.15645043551921844, "learning_rate": 0.002, "loss": 2.5501, "step": 304920 }, { "epoch": 0.6074883654213948, "grad_norm": 0.18573522567749023, "learning_rate": 0.002, "loss": 2.5557, "step": 304930 }, { "epoch": 0.6075082876450338, "grad_norm": 0.15179461240768433, "learning_rate": 0.002, "loss": 2.558, "step": 304940 }, { "epoch": 0.6075282098686727, "grad_norm": 0.1547956019639969, "learning_rate": 0.002, "loss": 2.5554, "step": 304950 }, { "epoch": 0.6075481320923116, "grad_norm": 0.19773325324058533, "learning_rate": 0.002, "loss": 2.5574, "step": 304960 }, { "epoch": 0.6075680543159505, "grad_norm": 0.1394374668598175, "learning_rate": 0.002, "loss": 2.5512, "step": 304970 }, { "epoch": 0.6075879765395894, "grad_norm": 0.17284570634365082, "learning_rate": 0.002, "loss": 2.5592, "step": 304980 }, { "epoch": 0.6076078987632284, "grad_norm": 0.18062204122543335, "learning_rate": 0.002, "loss": 2.5418, "step": 304990 }, { "epoch": 0.6076278209868673, "grad_norm": 0.16607984900474548, "learning_rate": 0.002, "loss": 2.5566, "step": 305000 }, { "epoch": 0.6076477432105062, "grad_norm": 0.16625000536441803, "learning_rate": 0.002, "loss": 2.5609, "step": 305010 }, { "epoch": 0.6076676654341451, "grad_norm": 0.15401723980903625, "learning_rate": 0.002, "loss": 2.5601, "step": 305020 }, { "epoch": 0.607687587657784, "grad_norm": 0.14099830389022827, "learning_rate": 0.002, "loss": 2.5598, "step": 305030 }, { "epoch": 0.607707509881423, "grad_norm": 0.14979474246501923, "learning_rate": 0.002, "loss": 2.5476, "step": 305040 }, { "epoch": 0.6077274321050619, "grad_norm": 0.13961263000965118, "learning_rate": 0.002, "loss": 2.5573, "step": 305050 }, { "epoch": 0.6077473543287008, "grad_norm": 0.17172881960868835, "learning_rate": 0.002, "loss": 2.5495, "step": 305060 }, { "epoch": 0.6077672765523396, "grad_norm": 0.2051977813243866, "learning_rate": 0.002, "loss": 2.5773, "step": 305070 }, { "epoch": 0.6077871987759785, "grad_norm": 0.16870088875293732, "learning_rate": 0.002, "loss": 2.5486, "step": 305080 }, { "epoch": 0.6078071209996175, "grad_norm": 0.15535926818847656, "learning_rate": 0.002, "loss": 2.5517, "step": 305090 }, { "epoch": 0.6078270432232564, "grad_norm": 0.1692608892917633, "learning_rate": 0.002, "loss": 2.561, "step": 305100 }, { "epoch": 0.6078469654468953, "grad_norm": 0.1428820639848709, "learning_rate": 0.002, "loss": 2.5607, "step": 305110 }, { "epoch": 0.6078668876705342, "grad_norm": 0.16389483213424683, "learning_rate": 0.002, "loss": 2.5601, "step": 305120 }, { "epoch": 0.6078868098941731, "grad_norm": 0.2113884836435318, "learning_rate": 0.002, "loss": 2.5601, "step": 305130 }, { "epoch": 0.6079067321178121, "grad_norm": 0.2214646190404892, "learning_rate": 0.002, "loss": 2.5693, "step": 305140 }, { "epoch": 0.607926654341451, "grad_norm": 0.20302018523216248, "learning_rate": 0.002, "loss": 2.5643, "step": 305150 }, { "epoch": 0.6079465765650899, "grad_norm": 0.13882054388523102, "learning_rate": 0.002, "loss": 2.5527, "step": 305160 }, { "epoch": 0.6079664987887288, "grad_norm": 0.15595033764839172, "learning_rate": 0.002, "loss": 2.5505, "step": 305170 }, { "epoch": 0.6079864210123677, "grad_norm": 1.141027569770813, "learning_rate": 0.002, "loss": 2.5539, "step": 305180 }, { "epoch": 0.6080063432360067, "grad_norm": 0.1473051905632019, "learning_rate": 0.002, "loss": 2.5556, "step": 305190 }, { "epoch": 0.6080262654596456, "grad_norm": 0.19422195851802826, "learning_rate": 0.002, "loss": 2.5578, "step": 305200 }, { "epoch": 0.6080461876832844, "grad_norm": 0.14909976720809937, "learning_rate": 0.002, "loss": 2.5583, "step": 305210 }, { "epoch": 0.6080661099069233, "grad_norm": 0.17778438329696655, "learning_rate": 0.002, "loss": 2.565, "step": 305220 }, { "epoch": 0.6080860321305623, "grad_norm": 0.15998512506484985, "learning_rate": 0.002, "loss": 2.5655, "step": 305230 }, { "epoch": 0.6081059543542012, "grad_norm": 0.15376567840576172, "learning_rate": 0.002, "loss": 2.5674, "step": 305240 }, { "epoch": 0.6081258765778401, "grad_norm": 0.15009520947933197, "learning_rate": 0.002, "loss": 2.5628, "step": 305250 }, { "epoch": 0.608145798801479, "grad_norm": 0.16651618480682373, "learning_rate": 0.002, "loss": 2.552, "step": 305260 }, { "epoch": 0.6081657210251179, "grad_norm": 0.14864379167556763, "learning_rate": 0.002, "loss": 2.5539, "step": 305270 }, { "epoch": 0.6081856432487569, "grad_norm": 0.14869599044322968, "learning_rate": 0.002, "loss": 2.5362, "step": 305280 }, { "epoch": 0.6082055654723958, "grad_norm": 0.19413654506206512, "learning_rate": 0.002, "loss": 2.5701, "step": 305290 }, { "epoch": 0.6082254876960347, "grad_norm": 0.16244852542877197, "learning_rate": 0.002, "loss": 2.5638, "step": 305300 }, { "epoch": 0.6082454099196736, "grad_norm": 0.1708407700061798, "learning_rate": 0.002, "loss": 2.5431, "step": 305310 }, { "epoch": 0.6082653321433125, "grad_norm": 0.16222424805164337, "learning_rate": 0.002, "loss": 2.5534, "step": 305320 }, { "epoch": 0.6082852543669515, "grad_norm": 0.1648397296667099, "learning_rate": 0.002, "loss": 2.5474, "step": 305330 }, { "epoch": 0.6083051765905904, "grad_norm": 0.16492195427417755, "learning_rate": 0.002, "loss": 2.5603, "step": 305340 }, { "epoch": 0.6083250988142292, "grad_norm": 0.15539871156215668, "learning_rate": 0.002, "loss": 2.5718, "step": 305350 }, { "epoch": 0.6083450210378681, "grad_norm": 0.15968000888824463, "learning_rate": 0.002, "loss": 2.5591, "step": 305360 }, { "epoch": 0.608364943261507, "grad_norm": 0.16692084074020386, "learning_rate": 0.002, "loss": 2.5462, "step": 305370 }, { "epoch": 0.608384865485146, "grad_norm": 0.14978301525115967, "learning_rate": 0.002, "loss": 2.5448, "step": 305380 }, { "epoch": 0.6084047877087849, "grad_norm": 0.16935797035694122, "learning_rate": 0.002, "loss": 2.5585, "step": 305390 }, { "epoch": 0.6084247099324238, "grad_norm": 0.15754032135009766, "learning_rate": 0.002, "loss": 2.5644, "step": 305400 }, { "epoch": 0.6084446321560627, "grad_norm": 0.1420455127954483, "learning_rate": 0.002, "loss": 2.5518, "step": 305410 }, { "epoch": 0.6084645543797016, "grad_norm": 0.1755615919828415, "learning_rate": 0.002, "loss": 2.5642, "step": 305420 }, { "epoch": 0.6084844766033406, "grad_norm": 0.176735982298851, "learning_rate": 0.002, "loss": 2.5519, "step": 305430 }, { "epoch": 0.6085043988269795, "grad_norm": 0.17321191728115082, "learning_rate": 0.002, "loss": 2.5647, "step": 305440 }, { "epoch": 0.6085243210506184, "grad_norm": 0.18562570214271545, "learning_rate": 0.002, "loss": 2.5612, "step": 305450 }, { "epoch": 0.6085442432742573, "grad_norm": 0.18669895827770233, "learning_rate": 0.002, "loss": 2.5603, "step": 305460 }, { "epoch": 0.6085641654978962, "grad_norm": 0.15519820153713226, "learning_rate": 0.002, "loss": 2.5437, "step": 305470 }, { "epoch": 0.6085840877215352, "grad_norm": 0.1783882975578308, "learning_rate": 0.002, "loss": 2.5418, "step": 305480 }, { "epoch": 0.608604009945174, "grad_norm": 0.21625962853431702, "learning_rate": 0.002, "loss": 2.5484, "step": 305490 }, { "epoch": 0.6086239321688129, "grad_norm": 0.1484117954969406, "learning_rate": 0.002, "loss": 2.5604, "step": 305500 }, { "epoch": 0.6086438543924518, "grad_norm": 0.16464164853096008, "learning_rate": 0.002, "loss": 2.5511, "step": 305510 }, { "epoch": 0.6086637766160908, "grad_norm": 0.1722572147846222, "learning_rate": 0.002, "loss": 2.5565, "step": 305520 }, { "epoch": 0.6086836988397297, "grad_norm": 0.18771032989025116, "learning_rate": 0.002, "loss": 2.5621, "step": 305530 }, { "epoch": 0.6087036210633686, "grad_norm": 0.19749493896961212, "learning_rate": 0.002, "loss": 2.5617, "step": 305540 }, { "epoch": 0.6087235432870075, "grad_norm": 0.1508169025182724, "learning_rate": 0.002, "loss": 2.558, "step": 305550 }, { "epoch": 0.6087434655106464, "grad_norm": 0.15261825919151306, "learning_rate": 0.002, "loss": 2.5569, "step": 305560 }, { "epoch": 0.6087633877342854, "grad_norm": 0.16554082930088043, "learning_rate": 0.002, "loss": 2.5526, "step": 305570 }, { "epoch": 0.6087833099579243, "grad_norm": 0.16984927654266357, "learning_rate": 0.002, "loss": 2.5648, "step": 305580 }, { "epoch": 0.6088032321815632, "grad_norm": 0.1424379050731659, "learning_rate": 0.002, "loss": 2.5503, "step": 305590 }, { "epoch": 0.6088231544052021, "grad_norm": 0.1659061461687088, "learning_rate": 0.002, "loss": 2.5466, "step": 305600 }, { "epoch": 0.608843076628841, "grad_norm": 0.17522497475147247, "learning_rate": 0.002, "loss": 2.558, "step": 305610 }, { "epoch": 0.60886299885248, "grad_norm": 0.1668153554201126, "learning_rate": 0.002, "loss": 2.5639, "step": 305620 }, { "epoch": 0.6088829210761189, "grad_norm": 0.15935145318508148, "learning_rate": 0.002, "loss": 2.5638, "step": 305630 }, { "epoch": 0.6089028432997577, "grad_norm": 0.16237004101276398, "learning_rate": 0.002, "loss": 2.5463, "step": 305640 }, { "epoch": 0.6089227655233966, "grad_norm": 0.16245059669017792, "learning_rate": 0.002, "loss": 2.557, "step": 305650 }, { "epoch": 0.6089426877470355, "grad_norm": 0.15264436602592468, "learning_rate": 0.002, "loss": 2.5534, "step": 305660 }, { "epoch": 0.6089626099706745, "grad_norm": 0.15369291603565216, "learning_rate": 0.002, "loss": 2.5721, "step": 305670 }, { "epoch": 0.6089825321943134, "grad_norm": 0.175487220287323, "learning_rate": 0.002, "loss": 2.5644, "step": 305680 }, { "epoch": 0.6090024544179523, "grad_norm": 0.17113900184631348, "learning_rate": 0.002, "loss": 2.5636, "step": 305690 }, { "epoch": 0.6090223766415912, "grad_norm": 0.16136007010936737, "learning_rate": 0.002, "loss": 2.5543, "step": 305700 }, { "epoch": 0.6090422988652301, "grad_norm": 0.16074123978614807, "learning_rate": 0.002, "loss": 2.557, "step": 305710 }, { "epoch": 0.6090622210888691, "grad_norm": 0.18052393198013306, "learning_rate": 0.002, "loss": 2.5413, "step": 305720 }, { "epoch": 0.609082143312508, "grad_norm": 0.17836354672908783, "learning_rate": 0.002, "loss": 2.5653, "step": 305730 }, { "epoch": 0.6091020655361469, "grad_norm": 0.16124233603477478, "learning_rate": 0.002, "loss": 2.5646, "step": 305740 }, { "epoch": 0.6091219877597858, "grad_norm": 0.18595941364765167, "learning_rate": 0.002, "loss": 2.5522, "step": 305750 }, { "epoch": 0.6091419099834247, "grad_norm": 0.15495480597019196, "learning_rate": 0.002, "loss": 2.5627, "step": 305760 }, { "epoch": 0.6091618322070637, "grad_norm": 0.16420221328735352, "learning_rate": 0.002, "loss": 2.5584, "step": 305770 }, { "epoch": 0.6091817544307025, "grad_norm": 0.16871005296707153, "learning_rate": 0.002, "loss": 2.5593, "step": 305780 }, { "epoch": 0.6092016766543414, "grad_norm": 0.16299894452095032, "learning_rate": 0.002, "loss": 2.5606, "step": 305790 }, { "epoch": 0.6092215988779803, "grad_norm": 0.19532573223114014, "learning_rate": 0.002, "loss": 2.5521, "step": 305800 }, { "epoch": 0.6092415211016193, "grad_norm": 0.16232340037822723, "learning_rate": 0.002, "loss": 2.5491, "step": 305810 }, { "epoch": 0.6092614433252582, "grad_norm": 0.15945574641227722, "learning_rate": 0.002, "loss": 2.5663, "step": 305820 }, { "epoch": 0.6092813655488971, "grad_norm": 0.16305840015411377, "learning_rate": 0.002, "loss": 2.5516, "step": 305830 }, { "epoch": 0.609301287772536, "grad_norm": 0.1810733526945114, "learning_rate": 0.002, "loss": 2.5616, "step": 305840 }, { "epoch": 0.6093212099961749, "grad_norm": 0.1511743813753128, "learning_rate": 0.002, "loss": 2.5717, "step": 305850 }, { "epoch": 0.6093411322198139, "grad_norm": 0.1716236025094986, "learning_rate": 0.002, "loss": 2.5532, "step": 305860 }, { "epoch": 0.6093610544434528, "grad_norm": 0.15972183644771576, "learning_rate": 0.002, "loss": 2.5467, "step": 305870 }, { "epoch": 0.6093809766670917, "grad_norm": 0.17987124621868134, "learning_rate": 0.002, "loss": 2.5605, "step": 305880 }, { "epoch": 0.6094008988907306, "grad_norm": 0.20369122922420502, "learning_rate": 0.002, "loss": 2.5555, "step": 305890 }, { "epoch": 0.6094208211143695, "grad_norm": 0.1485292613506317, "learning_rate": 0.002, "loss": 2.5385, "step": 305900 }, { "epoch": 0.6094407433380085, "grad_norm": 0.17844994366168976, "learning_rate": 0.002, "loss": 2.56, "step": 305910 }, { "epoch": 0.6094606655616474, "grad_norm": 0.15405696630477905, "learning_rate": 0.002, "loss": 2.5481, "step": 305920 }, { "epoch": 0.6094805877852862, "grad_norm": 0.1573205441236496, "learning_rate": 0.002, "loss": 2.5452, "step": 305930 }, { "epoch": 0.6095005100089251, "grad_norm": 0.16348494589328766, "learning_rate": 0.002, "loss": 2.5537, "step": 305940 }, { "epoch": 0.609520432232564, "grad_norm": 0.1444447934627533, "learning_rate": 0.002, "loss": 2.5777, "step": 305950 }, { "epoch": 0.609540354456203, "grad_norm": 0.15575598180294037, "learning_rate": 0.002, "loss": 2.5495, "step": 305960 }, { "epoch": 0.6095602766798419, "grad_norm": 0.14994271099567413, "learning_rate": 0.002, "loss": 2.5555, "step": 305970 }, { "epoch": 0.6095801989034808, "grad_norm": 0.16788959503173828, "learning_rate": 0.002, "loss": 2.5595, "step": 305980 }, { "epoch": 0.6096001211271197, "grad_norm": 0.22531923651695251, "learning_rate": 0.002, "loss": 2.5554, "step": 305990 }, { "epoch": 0.6096200433507586, "grad_norm": 0.14976766705513, "learning_rate": 0.002, "loss": 2.5558, "step": 306000 }, { "epoch": 0.6096399655743976, "grad_norm": 0.18064089119434357, "learning_rate": 0.002, "loss": 2.553, "step": 306010 }, { "epoch": 0.6096598877980365, "grad_norm": 0.14713208377361298, "learning_rate": 0.002, "loss": 2.5438, "step": 306020 }, { "epoch": 0.6096798100216754, "grad_norm": 0.20722350478172302, "learning_rate": 0.002, "loss": 2.56, "step": 306030 }, { "epoch": 0.6096997322453143, "grad_norm": 0.1578393578529358, "learning_rate": 0.002, "loss": 2.552, "step": 306040 }, { "epoch": 0.6097196544689532, "grad_norm": 0.13983093202114105, "learning_rate": 0.002, "loss": 2.5598, "step": 306050 }, { "epoch": 0.6097395766925922, "grad_norm": 0.14458824694156647, "learning_rate": 0.002, "loss": 2.5457, "step": 306060 }, { "epoch": 0.609759498916231, "grad_norm": 0.16580675542354584, "learning_rate": 0.002, "loss": 2.5416, "step": 306070 }, { "epoch": 0.6097794211398699, "grad_norm": 0.15381401777267456, "learning_rate": 0.002, "loss": 2.5652, "step": 306080 }, { "epoch": 0.6097993433635088, "grad_norm": 0.18139833211898804, "learning_rate": 0.002, "loss": 2.5633, "step": 306090 }, { "epoch": 0.6098192655871478, "grad_norm": 0.15900278091430664, "learning_rate": 0.002, "loss": 2.551, "step": 306100 }, { "epoch": 0.6098391878107867, "grad_norm": 0.22045981884002686, "learning_rate": 0.002, "loss": 2.5684, "step": 306110 }, { "epoch": 0.6098591100344256, "grad_norm": 0.17220601439476013, "learning_rate": 0.002, "loss": 2.5473, "step": 306120 }, { "epoch": 0.6098790322580645, "grad_norm": 0.1719888150691986, "learning_rate": 0.002, "loss": 2.5469, "step": 306130 }, { "epoch": 0.6098989544817034, "grad_norm": 0.1624078005552292, "learning_rate": 0.002, "loss": 2.5575, "step": 306140 }, { "epoch": 0.6099188767053424, "grad_norm": 0.16218215227127075, "learning_rate": 0.002, "loss": 2.5423, "step": 306150 }, { "epoch": 0.6099387989289813, "grad_norm": 0.16141752898693085, "learning_rate": 0.002, "loss": 2.546, "step": 306160 }, { "epoch": 0.6099587211526202, "grad_norm": 0.17621564865112305, "learning_rate": 0.002, "loss": 2.5591, "step": 306170 }, { "epoch": 0.6099786433762591, "grad_norm": 0.17272812128067017, "learning_rate": 0.002, "loss": 2.5532, "step": 306180 }, { "epoch": 0.609998565599898, "grad_norm": 0.14924994111061096, "learning_rate": 0.002, "loss": 2.5432, "step": 306190 }, { "epoch": 0.610018487823537, "grad_norm": 0.15742935240268707, "learning_rate": 0.002, "loss": 2.5638, "step": 306200 }, { "epoch": 0.6100384100471758, "grad_norm": 0.15427517890930176, "learning_rate": 0.002, "loss": 2.5545, "step": 306210 }, { "epoch": 0.6100583322708147, "grad_norm": 0.14463254809379578, "learning_rate": 0.002, "loss": 2.5724, "step": 306220 }, { "epoch": 0.6100782544944536, "grad_norm": 0.17956793308258057, "learning_rate": 0.002, "loss": 2.5559, "step": 306230 }, { "epoch": 0.6100981767180925, "grad_norm": 0.1686168909072876, "learning_rate": 0.002, "loss": 2.5658, "step": 306240 }, { "epoch": 0.6101180989417315, "grad_norm": 0.15709921717643738, "learning_rate": 0.002, "loss": 2.5568, "step": 306250 }, { "epoch": 0.6101380211653704, "grad_norm": 0.18372532725334167, "learning_rate": 0.002, "loss": 2.5617, "step": 306260 }, { "epoch": 0.6101579433890093, "grad_norm": 0.15180256962776184, "learning_rate": 0.002, "loss": 2.5614, "step": 306270 }, { "epoch": 0.6101778656126482, "grad_norm": 0.16782420873641968, "learning_rate": 0.002, "loss": 2.5509, "step": 306280 }, { "epoch": 0.6101977878362871, "grad_norm": 0.1762673556804657, "learning_rate": 0.002, "loss": 2.5584, "step": 306290 }, { "epoch": 0.6102177100599261, "grad_norm": 0.1637965440750122, "learning_rate": 0.002, "loss": 2.5725, "step": 306300 }, { "epoch": 0.610237632283565, "grad_norm": 0.1744837462902069, "learning_rate": 0.002, "loss": 2.5574, "step": 306310 }, { "epoch": 0.6102575545072039, "grad_norm": 0.1525697112083435, "learning_rate": 0.002, "loss": 2.5507, "step": 306320 }, { "epoch": 0.6102774767308428, "grad_norm": 0.15380199253559113, "learning_rate": 0.002, "loss": 2.5798, "step": 306330 }, { "epoch": 0.6102973989544817, "grad_norm": 0.15728384256362915, "learning_rate": 0.002, "loss": 2.5679, "step": 306340 }, { "epoch": 0.6103173211781207, "grad_norm": 0.2058478146791458, "learning_rate": 0.002, "loss": 2.541, "step": 306350 }, { "epoch": 0.6103372434017595, "grad_norm": 0.16856877505779266, "learning_rate": 0.002, "loss": 2.5535, "step": 306360 }, { "epoch": 0.6103571656253984, "grad_norm": 0.13782386481761932, "learning_rate": 0.002, "loss": 2.5714, "step": 306370 }, { "epoch": 0.6103770878490373, "grad_norm": 0.16373690962791443, "learning_rate": 0.002, "loss": 2.5585, "step": 306380 }, { "epoch": 0.6103970100726763, "grad_norm": 0.17215712368488312, "learning_rate": 0.002, "loss": 2.5667, "step": 306390 }, { "epoch": 0.6104169322963152, "grad_norm": 0.15973128378391266, "learning_rate": 0.002, "loss": 2.5487, "step": 306400 }, { "epoch": 0.6104368545199541, "grad_norm": 0.15885038673877716, "learning_rate": 0.002, "loss": 2.5487, "step": 306410 }, { "epoch": 0.610456776743593, "grad_norm": 0.1657518893480301, "learning_rate": 0.002, "loss": 2.5627, "step": 306420 }, { "epoch": 0.6104766989672319, "grad_norm": 0.1752874106168747, "learning_rate": 0.002, "loss": 2.5536, "step": 306430 }, { "epoch": 0.6104966211908709, "grad_norm": 0.16671694815158844, "learning_rate": 0.002, "loss": 2.5581, "step": 306440 }, { "epoch": 0.6105165434145098, "grad_norm": 0.16616006195545197, "learning_rate": 0.002, "loss": 2.5636, "step": 306450 }, { "epoch": 0.6105364656381487, "grad_norm": 0.1923518031835556, "learning_rate": 0.002, "loss": 2.5661, "step": 306460 }, { "epoch": 0.6105563878617876, "grad_norm": 0.1821707785129547, "learning_rate": 0.002, "loss": 2.5792, "step": 306470 }, { "epoch": 0.6105763100854265, "grad_norm": 0.18315237760543823, "learning_rate": 0.002, "loss": 2.5539, "step": 306480 }, { "epoch": 0.6105962323090655, "grad_norm": 0.1554017812013626, "learning_rate": 0.002, "loss": 2.5332, "step": 306490 }, { "epoch": 0.6106161545327043, "grad_norm": 0.15425947308540344, "learning_rate": 0.002, "loss": 2.5694, "step": 306500 }, { "epoch": 0.6106360767563432, "grad_norm": 0.17404568195343018, "learning_rate": 0.002, "loss": 2.5488, "step": 306510 }, { "epoch": 0.6106559989799821, "grad_norm": 0.14375488460063934, "learning_rate": 0.002, "loss": 2.5691, "step": 306520 }, { "epoch": 0.610675921203621, "grad_norm": 0.1550530046224594, "learning_rate": 0.002, "loss": 2.5442, "step": 306530 }, { "epoch": 0.61069584342726, "grad_norm": 0.18439386785030365, "learning_rate": 0.002, "loss": 2.5513, "step": 306540 }, { "epoch": 0.6107157656508989, "grad_norm": 0.15034282207489014, "learning_rate": 0.002, "loss": 2.5635, "step": 306550 }, { "epoch": 0.6107356878745378, "grad_norm": 0.19355012476444244, "learning_rate": 0.002, "loss": 2.5726, "step": 306560 }, { "epoch": 0.6107556100981767, "grad_norm": 0.1666465550661087, "learning_rate": 0.002, "loss": 2.5674, "step": 306570 }, { "epoch": 0.6107755323218156, "grad_norm": 0.15959250926971436, "learning_rate": 0.002, "loss": 2.5458, "step": 306580 }, { "epoch": 0.6107954545454546, "grad_norm": 0.15807601809501648, "learning_rate": 0.002, "loss": 2.5511, "step": 306590 }, { "epoch": 0.6108153767690935, "grad_norm": 0.19012857973575592, "learning_rate": 0.002, "loss": 2.5584, "step": 306600 }, { "epoch": 0.6108352989927324, "grad_norm": 0.1717652976512909, "learning_rate": 0.002, "loss": 2.5537, "step": 306610 }, { "epoch": 0.6108552212163713, "grad_norm": 0.15102288126945496, "learning_rate": 0.002, "loss": 2.5685, "step": 306620 }, { "epoch": 0.6108751434400101, "grad_norm": 0.16490137577056885, "learning_rate": 0.002, "loss": 2.5633, "step": 306630 }, { "epoch": 0.6108950656636492, "grad_norm": 0.19409139454364777, "learning_rate": 0.002, "loss": 2.551, "step": 306640 }, { "epoch": 0.610914987887288, "grad_norm": 0.17455145716667175, "learning_rate": 0.002, "loss": 2.552, "step": 306650 }, { "epoch": 0.6109349101109269, "grad_norm": 0.1640169322490692, "learning_rate": 0.002, "loss": 2.5544, "step": 306660 }, { "epoch": 0.6109548323345658, "grad_norm": 0.1794511079788208, "learning_rate": 0.002, "loss": 2.5434, "step": 306670 }, { "epoch": 0.6109747545582047, "grad_norm": 0.1645750105381012, "learning_rate": 0.002, "loss": 2.5545, "step": 306680 }, { "epoch": 0.6109946767818437, "grad_norm": 0.15432049334049225, "learning_rate": 0.002, "loss": 2.5525, "step": 306690 }, { "epoch": 0.6110145990054826, "grad_norm": 0.16330555081367493, "learning_rate": 0.002, "loss": 2.5743, "step": 306700 }, { "epoch": 0.6110345212291215, "grad_norm": 0.17663605511188507, "learning_rate": 0.002, "loss": 2.5388, "step": 306710 }, { "epoch": 0.6110544434527604, "grad_norm": 0.14352811872959137, "learning_rate": 0.002, "loss": 2.5354, "step": 306720 }, { "epoch": 0.6110743656763994, "grad_norm": 0.14539824426174164, "learning_rate": 0.002, "loss": 2.5673, "step": 306730 }, { "epoch": 0.6110942879000383, "grad_norm": 0.17927202582359314, "learning_rate": 0.002, "loss": 2.5614, "step": 306740 }, { "epoch": 0.6111142101236772, "grad_norm": 0.14870987832546234, "learning_rate": 0.002, "loss": 2.554, "step": 306750 }, { "epoch": 0.6111341323473161, "grad_norm": 0.16441352665424347, "learning_rate": 0.002, "loss": 2.5579, "step": 306760 }, { "epoch": 0.611154054570955, "grad_norm": 0.18774545192718506, "learning_rate": 0.002, "loss": 2.5663, "step": 306770 }, { "epoch": 0.611173976794594, "grad_norm": 0.17639270424842834, "learning_rate": 0.002, "loss": 2.56, "step": 306780 }, { "epoch": 0.6111938990182328, "grad_norm": 0.17522503435611725, "learning_rate": 0.002, "loss": 2.5514, "step": 306790 }, { "epoch": 0.6112138212418717, "grad_norm": 0.15559029579162598, "learning_rate": 0.002, "loss": 2.5477, "step": 306800 }, { "epoch": 0.6112337434655106, "grad_norm": 0.1834390014410019, "learning_rate": 0.002, "loss": 2.5626, "step": 306810 }, { "epoch": 0.6112536656891495, "grad_norm": 0.19570650160312653, "learning_rate": 0.002, "loss": 2.5659, "step": 306820 }, { "epoch": 0.6112735879127885, "grad_norm": 0.13736242055892944, "learning_rate": 0.002, "loss": 2.5518, "step": 306830 }, { "epoch": 0.6112935101364274, "grad_norm": 0.15646904706954956, "learning_rate": 0.002, "loss": 2.5601, "step": 306840 }, { "epoch": 0.6113134323600663, "grad_norm": 0.17231447994709015, "learning_rate": 0.002, "loss": 2.5592, "step": 306850 }, { "epoch": 0.6113333545837052, "grad_norm": 0.16960720717906952, "learning_rate": 0.002, "loss": 2.5412, "step": 306860 }, { "epoch": 0.6113532768073441, "grad_norm": 0.18543021380901337, "learning_rate": 0.002, "loss": 2.5766, "step": 306870 }, { "epoch": 0.6113731990309831, "grad_norm": 0.17805194854736328, "learning_rate": 0.002, "loss": 2.5581, "step": 306880 }, { "epoch": 0.611393121254622, "grad_norm": 0.1659652590751648, "learning_rate": 0.002, "loss": 2.5554, "step": 306890 }, { "epoch": 0.6114130434782609, "grad_norm": 0.16726405918598175, "learning_rate": 0.002, "loss": 2.5415, "step": 306900 }, { "epoch": 0.6114329657018998, "grad_norm": 0.13040801882743835, "learning_rate": 0.002, "loss": 2.5529, "step": 306910 }, { "epoch": 0.6114528879255386, "grad_norm": 0.16402439773082733, "learning_rate": 0.002, "loss": 2.5497, "step": 306920 }, { "epoch": 0.6114728101491776, "grad_norm": 0.17274440824985504, "learning_rate": 0.002, "loss": 2.5558, "step": 306930 }, { "epoch": 0.6114927323728165, "grad_norm": 0.17580999433994293, "learning_rate": 0.002, "loss": 2.5487, "step": 306940 }, { "epoch": 0.6115126545964554, "grad_norm": 0.17627732455730438, "learning_rate": 0.002, "loss": 2.5448, "step": 306950 }, { "epoch": 0.6115325768200943, "grad_norm": 0.15729083120822906, "learning_rate": 0.002, "loss": 2.5504, "step": 306960 }, { "epoch": 0.6115524990437332, "grad_norm": 0.14623621106147766, "learning_rate": 0.002, "loss": 2.5696, "step": 306970 }, { "epoch": 0.6115724212673722, "grad_norm": 0.15902796387672424, "learning_rate": 0.002, "loss": 2.5549, "step": 306980 }, { "epoch": 0.6115923434910111, "grad_norm": 0.170985147356987, "learning_rate": 0.002, "loss": 2.5515, "step": 306990 }, { "epoch": 0.61161226571465, "grad_norm": 0.16992796957492828, "learning_rate": 0.002, "loss": 2.5586, "step": 307000 }, { "epoch": 0.6116321879382889, "grad_norm": 0.1446792483329773, "learning_rate": 0.002, "loss": 2.5647, "step": 307010 }, { "epoch": 0.6116521101619279, "grad_norm": 0.166469544172287, "learning_rate": 0.002, "loss": 2.5552, "step": 307020 }, { "epoch": 0.6116720323855668, "grad_norm": 0.23134438693523407, "learning_rate": 0.002, "loss": 2.5521, "step": 307030 }, { "epoch": 0.6116919546092057, "grad_norm": 0.1615980863571167, "learning_rate": 0.002, "loss": 2.5827, "step": 307040 }, { "epoch": 0.6117118768328446, "grad_norm": 0.14983803033828735, "learning_rate": 0.002, "loss": 2.556, "step": 307050 }, { "epoch": 0.6117317990564834, "grad_norm": 0.140728160738945, "learning_rate": 0.002, "loss": 2.5664, "step": 307060 }, { "epoch": 0.6117517212801225, "grad_norm": 0.1568419486284256, "learning_rate": 0.002, "loss": 2.5494, "step": 307070 }, { "epoch": 0.6117716435037613, "grad_norm": 0.19821284711360931, "learning_rate": 0.002, "loss": 2.5676, "step": 307080 }, { "epoch": 0.6117915657274002, "grad_norm": 0.17830342054367065, "learning_rate": 0.002, "loss": 2.564, "step": 307090 }, { "epoch": 0.6118114879510391, "grad_norm": 0.1514434516429901, "learning_rate": 0.002, "loss": 2.5513, "step": 307100 }, { "epoch": 0.611831410174678, "grad_norm": 0.1684001386165619, "learning_rate": 0.002, "loss": 2.5548, "step": 307110 }, { "epoch": 0.611851332398317, "grad_norm": 0.1468651294708252, "learning_rate": 0.002, "loss": 2.5473, "step": 307120 }, { "epoch": 0.6118712546219559, "grad_norm": 0.3819838762283325, "learning_rate": 0.002, "loss": 2.557, "step": 307130 }, { "epoch": 0.6118911768455948, "grad_norm": 0.1528909057378769, "learning_rate": 0.002, "loss": 2.5665, "step": 307140 }, { "epoch": 0.6119110990692337, "grad_norm": 0.18982410430908203, "learning_rate": 0.002, "loss": 2.5535, "step": 307150 }, { "epoch": 0.6119310212928726, "grad_norm": 0.13656491041183472, "learning_rate": 0.002, "loss": 2.5543, "step": 307160 }, { "epoch": 0.6119509435165116, "grad_norm": 0.18093979358673096, "learning_rate": 0.002, "loss": 2.5642, "step": 307170 }, { "epoch": 0.6119708657401505, "grad_norm": 0.18546007573604584, "learning_rate": 0.002, "loss": 2.5398, "step": 307180 }, { "epoch": 0.6119907879637894, "grad_norm": 0.15669409930706024, "learning_rate": 0.002, "loss": 2.5534, "step": 307190 }, { "epoch": 0.6120107101874283, "grad_norm": 0.18633799254894257, "learning_rate": 0.002, "loss": 2.5488, "step": 307200 }, { "epoch": 0.6120306324110671, "grad_norm": 0.20296970009803772, "learning_rate": 0.002, "loss": 2.5555, "step": 307210 }, { "epoch": 0.6120505546347061, "grad_norm": 0.1683645397424698, "learning_rate": 0.002, "loss": 2.574, "step": 307220 }, { "epoch": 0.612070476858345, "grad_norm": 0.15028423070907593, "learning_rate": 0.002, "loss": 2.5561, "step": 307230 }, { "epoch": 0.6120903990819839, "grad_norm": 0.17192144691944122, "learning_rate": 0.002, "loss": 2.563, "step": 307240 }, { "epoch": 0.6121103213056228, "grad_norm": 0.1516752988100052, "learning_rate": 0.002, "loss": 2.5576, "step": 307250 }, { "epoch": 0.6121302435292617, "grad_norm": 0.18157269060611725, "learning_rate": 0.002, "loss": 2.5599, "step": 307260 }, { "epoch": 0.6121501657529007, "grad_norm": 0.15005815029144287, "learning_rate": 0.002, "loss": 2.5496, "step": 307270 }, { "epoch": 0.6121700879765396, "grad_norm": 0.17026248574256897, "learning_rate": 0.002, "loss": 2.5423, "step": 307280 }, { "epoch": 0.6121900102001785, "grad_norm": 0.15539710223674774, "learning_rate": 0.002, "loss": 2.5556, "step": 307290 }, { "epoch": 0.6122099324238174, "grad_norm": 0.1558990478515625, "learning_rate": 0.002, "loss": 2.5496, "step": 307300 }, { "epoch": 0.6122298546474564, "grad_norm": 0.21197907626628876, "learning_rate": 0.002, "loss": 2.5563, "step": 307310 }, { "epoch": 0.6122497768710953, "grad_norm": 0.16008196771144867, "learning_rate": 0.002, "loss": 2.5619, "step": 307320 }, { "epoch": 0.6122696990947342, "grad_norm": 0.1621982306241989, "learning_rate": 0.002, "loss": 2.557, "step": 307330 }, { "epoch": 0.6122896213183731, "grad_norm": 0.1531728059053421, "learning_rate": 0.002, "loss": 2.5559, "step": 307340 }, { "epoch": 0.612309543542012, "grad_norm": 0.17363563179969788, "learning_rate": 0.002, "loss": 2.5613, "step": 307350 }, { "epoch": 0.612329465765651, "grad_norm": 0.16186706721782684, "learning_rate": 0.002, "loss": 2.5486, "step": 307360 }, { "epoch": 0.6123493879892898, "grad_norm": 0.19363854825496674, "learning_rate": 0.002, "loss": 2.5577, "step": 307370 }, { "epoch": 0.6123693102129287, "grad_norm": 0.16559113562107086, "learning_rate": 0.002, "loss": 2.5573, "step": 307380 }, { "epoch": 0.6123892324365676, "grad_norm": 0.18311166763305664, "learning_rate": 0.002, "loss": 2.5392, "step": 307390 }, { "epoch": 0.6124091546602065, "grad_norm": 0.14967647194862366, "learning_rate": 0.002, "loss": 2.56, "step": 307400 }, { "epoch": 0.6124290768838455, "grad_norm": 0.2823984622955322, "learning_rate": 0.002, "loss": 2.5683, "step": 307410 }, { "epoch": 0.6124489991074844, "grad_norm": 0.17234860360622406, "learning_rate": 0.002, "loss": 2.5621, "step": 307420 }, { "epoch": 0.6124689213311233, "grad_norm": 0.14458295702934265, "learning_rate": 0.002, "loss": 2.5519, "step": 307430 }, { "epoch": 0.6124888435547622, "grad_norm": 0.15418070554733276, "learning_rate": 0.002, "loss": 2.5612, "step": 307440 }, { "epoch": 0.6125087657784011, "grad_norm": 0.15462733805179596, "learning_rate": 0.002, "loss": 2.5518, "step": 307450 }, { "epoch": 0.6125286880020401, "grad_norm": 0.1723245531320572, "learning_rate": 0.002, "loss": 2.5582, "step": 307460 }, { "epoch": 0.612548610225679, "grad_norm": 0.16567184031009674, "learning_rate": 0.002, "loss": 2.5518, "step": 307470 }, { "epoch": 0.6125685324493179, "grad_norm": 0.17451836168766022, "learning_rate": 0.002, "loss": 2.5744, "step": 307480 }, { "epoch": 0.6125884546729568, "grad_norm": 0.1666245311498642, "learning_rate": 0.002, "loss": 2.5437, "step": 307490 }, { "epoch": 0.6126083768965956, "grad_norm": 0.15960100293159485, "learning_rate": 0.002, "loss": 2.5629, "step": 307500 }, { "epoch": 0.6126282991202346, "grad_norm": 0.16263563930988312, "learning_rate": 0.002, "loss": 2.5402, "step": 307510 }, { "epoch": 0.6126482213438735, "grad_norm": 0.15909937024116516, "learning_rate": 0.002, "loss": 2.5519, "step": 307520 }, { "epoch": 0.6126681435675124, "grad_norm": 0.17042158544063568, "learning_rate": 0.002, "loss": 2.5592, "step": 307530 }, { "epoch": 0.6126880657911513, "grad_norm": 0.16244658827781677, "learning_rate": 0.002, "loss": 2.5628, "step": 307540 }, { "epoch": 0.6127079880147902, "grad_norm": 0.15950125455856323, "learning_rate": 0.002, "loss": 2.556, "step": 307550 }, { "epoch": 0.6127279102384292, "grad_norm": 0.1944454163312912, "learning_rate": 0.002, "loss": 2.5585, "step": 307560 }, { "epoch": 0.6127478324620681, "grad_norm": 0.15209054946899414, "learning_rate": 0.002, "loss": 2.5569, "step": 307570 }, { "epoch": 0.612767754685707, "grad_norm": 0.1974761039018631, "learning_rate": 0.002, "loss": 2.568, "step": 307580 }, { "epoch": 0.6127876769093459, "grad_norm": 0.13967759907245636, "learning_rate": 0.002, "loss": 2.5594, "step": 307590 }, { "epoch": 0.6128075991329849, "grad_norm": 0.1470036506652832, "learning_rate": 0.002, "loss": 2.5616, "step": 307600 }, { "epoch": 0.6128275213566238, "grad_norm": 0.1589013785123825, "learning_rate": 0.002, "loss": 2.5458, "step": 307610 }, { "epoch": 0.6128474435802627, "grad_norm": 0.17272676527500153, "learning_rate": 0.002, "loss": 2.5497, "step": 307620 }, { "epoch": 0.6128673658039016, "grad_norm": 0.1733609139919281, "learning_rate": 0.002, "loss": 2.5717, "step": 307630 }, { "epoch": 0.6128872880275404, "grad_norm": 0.13880738615989685, "learning_rate": 0.002, "loss": 2.5567, "step": 307640 }, { "epoch": 0.6129072102511794, "grad_norm": 0.14539211988449097, "learning_rate": 0.002, "loss": 2.5417, "step": 307650 }, { "epoch": 0.6129271324748183, "grad_norm": 0.24737142026424408, "learning_rate": 0.002, "loss": 2.551, "step": 307660 }, { "epoch": 0.6129470546984572, "grad_norm": 0.16481125354766846, "learning_rate": 0.002, "loss": 2.5649, "step": 307670 }, { "epoch": 0.6129669769220961, "grad_norm": 0.1552559733390808, "learning_rate": 0.002, "loss": 2.5471, "step": 307680 }, { "epoch": 0.612986899145735, "grad_norm": 0.1696215122938156, "learning_rate": 0.002, "loss": 2.5606, "step": 307690 }, { "epoch": 0.613006821369374, "grad_norm": 0.1458607017993927, "learning_rate": 0.002, "loss": 2.5563, "step": 307700 }, { "epoch": 0.6130267435930129, "grad_norm": 0.15761348605155945, "learning_rate": 0.002, "loss": 2.5583, "step": 307710 }, { "epoch": 0.6130466658166518, "grad_norm": 0.16361474990844727, "learning_rate": 0.002, "loss": 2.5535, "step": 307720 }, { "epoch": 0.6130665880402907, "grad_norm": 0.1793060302734375, "learning_rate": 0.002, "loss": 2.5582, "step": 307730 }, { "epoch": 0.6130865102639296, "grad_norm": 0.1742348074913025, "learning_rate": 0.002, "loss": 2.5555, "step": 307740 }, { "epoch": 0.6131064324875686, "grad_norm": 0.1569472998380661, "learning_rate": 0.002, "loss": 2.5555, "step": 307750 }, { "epoch": 0.6131263547112075, "grad_norm": 0.15248623490333557, "learning_rate": 0.002, "loss": 2.5579, "step": 307760 }, { "epoch": 0.6131462769348464, "grad_norm": 0.1579587310552597, "learning_rate": 0.002, "loss": 2.5522, "step": 307770 }, { "epoch": 0.6131661991584852, "grad_norm": 0.16594485938549042, "learning_rate": 0.002, "loss": 2.5559, "step": 307780 }, { "epoch": 0.6131861213821241, "grad_norm": 0.14891716837882996, "learning_rate": 0.002, "loss": 2.5473, "step": 307790 }, { "epoch": 0.6132060436057631, "grad_norm": 0.16115200519561768, "learning_rate": 0.002, "loss": 2.5586, "step": 307800 }, { "epoch": 0.613225965829402, "grad_norm": 0.16780312359333038, "learning_rate": 0.002, "loss": 2.5406, "step": 307810 }, { "epoch": 0.6132458880530409, "grad_norm": 0.16509035229682922, "learning_rate": 0.002, "loss": 2.5393, "step": 307820 }, { "epoch": 0.6132658102766798, "grad_norm": 0.17864544689655304, "learning_rate": 0.002, "loss": 2.5452, "step": 307830 }, { "epoch": 0.6132857325003187, "grad_norm": 0.1519169956445694, "learning_rate": 0.002, "loss": 2.55, "step": 307840 }, { "epoch": 0.6133056547239577, "grad_norm": 0.14744777977466583, "learning_rate": 0.002, "loss": 2.5473, "step": 307850 }, { "epoch": 0.6133255769475966, "grad_norm": 0.17092908918857574, "learning_rate": 0.002, "loss": 2.554, "step": 307860 }, { "epoch": 0.6133454991712355, "grad_norm": 0.15851794183254242, "learning_rate": 0.002, "loss": 2.5649, "step": 307870 }, { "epoch": 0.6133654213948744, "grad_norm": 0.19290217757225037, "learning_rate": 0.002, "loss": 2.5588, "step": 307880 }, { "epoch": 0.6133853436185134, "grad_norm": 0.15145482122898102, "learning_rate": 0.002, "loss": 2.5451, "step": 307890 }, { "epoch": 0.6134052658421523, "grad_norm": 0.1695166528224945, "learning_rate": 0.002, "loss": 2.5589, "step": 307900 }, { "epoch": 0.6134251880657912, "grad_norm": 0.1780221313238144, "learning_rate": 0.002, "loss": 2.5495, "step": 307910 }, { "epoch": 0.61344511028943, "grad_norm": 0.17555780708789825, "learning_rate": 0.002, "loss": 2.5621, "step": 307920 }, { "epoch": 0.6134650325130689, "grad_norm": 0.1616918444633484, "learning_rate": 0.002, "loss": 2.5482, "step": 307930 }, { "epoch": 0.6134849547367079, "grad_norm": 0.14530473947525024, "learning_rate": 0.002, "loss": 2.5456, "step": 307940 }, { "epoch": 0.6135048769603468, "grad_norm": 0.18783016502857208, "learning_rate": 0.002, "loss": 2.5572, "step": 307950 }, { "epoch": 0.6135247991839857, "grad_norm": 0.1967892199754715, "learning_rate": 0.002, "loss": 2.5427, "step": 307960 }, { "epoch": 0.6135447214076246, "grad_norm": 0.15517394244670868, "learning_rate": 0.002, "loss": 2.5491, "step": 307970 }, { "epoch": 0.6135646436312635, "grad_norm": 0.1520608514547348, "learning_rate": 0.002, "loss": 2.5583, "step": 307980 }, { "epoch": 0.6135845658549025, "grad_norm": 0.1409468948841095, "learning_rate": 0.002, "loss": 2.5536, "step": 307990 }, { "epoch": 0.6136044880785414, "grad_norm": 0.18400967121124268, "learning_rate": 0.002, "loss": 2.5631, "step": 308000 }, { "epoch": 0.6136244103021803, "grad_norm": 0.13780789077281952, "learning_rate": 0.002, "loss": 2.5572, "step": 308010 }, { "epoch": 0.6136443325258192, "grad_norm": 0.1550699919462204, "learning_rate": 0.002, "loss": 2.5393, "step": 308020 }, { "epoch": 0.6136642547494581, "grad_norm": 0.25095075368881226, "learning_rate": 0.002, "loss": 2.5568, "step": 308030 }, { "epoch": 0.6136841769730971, "grad_norm": 0.16576698422431946, "learning_rate": 0.002, "loss": 2.5581, "step": 308040 }, { "epoch": 0.613704099196736, "grad_norm": 0.14321967959403992, "learning_rate": 0.002, "loss": 2.545, "step": 308050 }, { "epoch": 0.6137240214203749, "grad_norm": 0.15454816818237305, "learning_rate": 0.002, "loss": 2.5617, "step": 308060 }, { "epoch": 0.6137439436440137, "grad_norm": 0.13226664066314697, "learning_rate": 0.002, "loss": 2.5488, "step": 308070 }, { "epoch": 0.6137638658676526, "grad_norm": 0.18388631939888, "learning_rate": 0.002, "loss": 2.5641, "step": 308080 }, { "epoch": 0.6137837880912916, "grad_norm": 0.15094706416130066, "learning_rate": 0.002, "loss": 2.5584, "step": 308090 }, { "epoch": 0.6138037103149305, "grad_norm": 0.18002347648143768, "learning_rate": 0.002, "loss": 2.5681, "step": 308100 }, { "epoch": 0.6138236325385694, "grad_norm": 0.17205752432346344, "learning_rate": 0.002, "loss": 2.5441, "step": 308110 }, { "epoch": 0.6138435547622083, "grad_norm": 0.16615866124629974, "learning_rate": 0.002, "loss": 2.5639, "step": 308120 }, { "epoch": 0.6138634769858472, "grad_norm": 0.18116238713264465, "learning_rate": 0.002, "loss": 2.5519, "step": 308130 }, { "epoch": 0.6138833992094862, "grad_norm": 0.16605854034423828, "learning_rate": 0.002, "loss": 2.5539, "step": 308140 }, { "epoch": 0.6139033214331251, "grad_norm": 0.16798296570777893, "learning_rate": 0.002, "loss": 2.5534, "step": 308150 }, { "epoch": 0.613923243656764, "grad_norm": 0.19132280349731445, "learning_rate": 0.002, "loss": 2.551, "step": 308160 }, { "epoch": 0.6139431658804029, "grad_norm": 0.17819474637508392, "learning_rate": 0.002, "loss": 2.5435, "step": 308170 }, { "epoch": 0.6139630881040418, "grad_norm": 0.152363583445549, "learning_rate": 0.002, "loss": 2.5474, "step": 308180 }, { "epoch": 0.6139830103276808, "grad_norm": 0.15169428288936615, "learning_rate": 0.002, "loss": 2.5566, "step": 308190 }, { "epoch": 0.6140029325513197, "grad_norm": 0.18709059059619904, "learning_rate": 0.002, "loss": 2.5547, "step": 308200 }, { "epoch": 0.6140228547749585, "grad_norm": 0.15988366305828094, "learning_rate": 0.002, "loss": 2.5619, "step": 308210 }, { "epoch": 0.6140427769985974, "grad_norm": 0.1751609444618225, "learning_rate": 0.002, "loss": 2.5466, "step": 308220 }, { "epoch": 0.6140626992222364, "grad_norm": 0.17637518048286438, "learning_rate": 0.002, "loss": 2.5503, "step": 308230 }, { "epoch": 0.6140826214458753, "grad_norm": 0.1441250443458557, "learning_rate": 0.002, "loss": 2.5607, "step": 308240 }, { "epoch": 0.6141025436695142, "grad_norm": 0.15739957988262177, "learning_rate": 0.002, "loss": 2.5592, "step": 308250 }, { "epoch": 0.6141224658931531, "grad_norm": 0.17779186367988586, "learning_rate": 0.002, "loss": 2.5457, "step": 308260 }, { "epoch": 0.614142388116792, "grad_norm": 0.16797390580177307, "learning_rate": 0.002, "loss": 2.5558, "step": 308270 }, { "epoch": 0.614162310340431, "grad_norm": 0.1629939079284668, "learning_rate": 0.002, "loss": 2.5424, "step": 308280 }, { "epoch": 0.6141822325640699, "grad_norm": 0.15961910784244537, "learning_rate": 0.002, "loss": 2.5661, "step": 308290 }, { "epoch": 0.6142021547877088, "grad_norm": 0.14604952931404114, "learning_rate": 0.002, "loss": 2.5776, "step": 308300 }, { "epoch": 0.6142220770113477, "grad_norm": 0.16329306364059448, "learning_rate": 0.002, "loss": 2.5549, "step": 308310 }, { "epoch": 0.6142419992349866, "grad_norm": 0.16901588439941406, "learning_rate": 0.002, "loss": 2.5663, "step": 308320 }, { "epoch": 0.6142619214586256, "grad_norm": 0.21652087569236755, "learning_rate": 0.002, "loss": 2.5477, "step": 308330 }, { "epoch": 0.6142818436822645, "grad_norm": 0.17306236922740936, "learning_rate": 0.002, "loss": 2.5531, "step": 308340 }, { "epoch": 0.6143017659059034, "grad_norm": 0.16850829124450684, "learning_rate": 0.002, "loss": 2.5479, "step": 308350 }, { "epoch": 0.6143216881295422, "grad_norm": 0.16215285658836365, "learning_rate": 0.002, "loss": 2.5362, "step": 308360 }, { "epoch": 0.6143416103531811, "grad_norm": 0.19291354715824127, "learning_rate": 0.002, "loss": 2.5563, "step": 308370 }, { "epoch": 0.6143615325768201, "grad_norm": 0.1568240523338318, "learning_rate": 0.002, "loss": 2.5822, "step": 308380 }, { "epoch": 0.614381454800459, "grad_norm": 0.16523756086826324, "learning_rate": 0.002, "loss": 2.5558, "step": 308390 }, { "epoch": 0.6144013770240979, "grad_norm": 0.17921149730682373, "learning_rate": 0.002, "loss": 2.5428, "step": 308400 }, { "epoch": 0.6144212992477368, "grad_norm": 0.1538916975259781, "learning_rate": 0.002, "loss": 2.5569, "step": 308410 }, { "epoch": 0.6144412214713757, "grad_norm": 0.1424185037612915, "learning_rate": 0.002, "loss": 2.5524, "step": 308420 }, { "epoch": 0.6144611436950147, "grad_norm": 0.17644168436527252, "learning_rate": 0.002, "loss": 2.5501, "step": 308430 }, { "epoch": 0.6144810659186536, "grad_norm": 0.16520091891288757, "learning_rate": 0.002, "loss": 2.5503, "step": 308440 }, { "epoch": 0.6145009881422925, "grad_norm": 0.16902883350849152, "learning_rate": 0.002, "loss": 2.5744, "step": 308450 }, { "epoch": 0.6145209103659314, "grad_norm": 0.16484951972961426, "learning_rate": 0.002, "loss": 2.5582, "step": 308460 }, { "epoch": 0.6145408325895703, "grad_norm": 0.1451185941696167, "learning_rate": 0.002, "loss": 2.5611, "step": 308470 }, { "epoch": 0.6145607548132093, "grad_norm": 0.15486925840377808, "learning_rate": 0.002, "loss": 2.5519, "step": 308480 }, { "epoch": 0.6145806770368482, "grad_norm": 0.1467236429452896, "learning_rate": 0.002, "loss": 2.5536, "step": 308490 }, { "epoch": 0.614600599260487, "grad_norm": 0.1506585329771042, "learning_rate": 0.002, "loss": 2.5561, "step": 308500 }, { "epoch": 0.6146205214841259, "grad_norm": 0.18953421711921692, "learning_rate": 0.002, "loss": 2.5483, "step": 308510 }, { "epoch": 0.6146404437077649, "grad_norm": 0.1567264050245285, "learning_rate": 0.002, "loss": 2.5669, "step": 308520 }, { "epoch": 0.6146603659314038, "grad_norm": 0.20059697329998016, "learning_rate": 0.002, "loss": 2.5645, "step": 308530 }, { "epoch": 0.6146802881550427, "grad_norm": 0.1455877423286438, "learning_rate": 0.002, "loss": 2.5604, "step": 308540 }, { "epoch": 0.6147002103786816, "grad_norm": 0.16972953081130981, "learning_rate": 0.002, "loss": 2.553, "step": 308550 }, { "epoch": 0.6147201326023205, "grad_norm": 0.18926222622394562, "learning_rate": 0.002, "loss": 2.5528, "step": 308560 }, { "epoch": 0.6147400548259595, "grad_norm": 0.1671890765428543, "learning_rate": 0.002, "loss": 2.5506, "step": 308570 }, { "epoch": 0.6147599770495984, "grad_norm": 0.15428954362869263, "learning_rate": 0.002, "loss": 2.5563, "step": 308580 }, { "epoch": 0.6147798992732373, "grad_norm": 0.17869096994400024, "learning_rate": 0.002, "loss": 2.569, "step": 308590 }, { "epoch": 0.6147998214968762, "grad_norm": 0.16502809524536133, "learning_rate": 0.002, "loss": 2.5572, "step": 308600 }, { "epoch": 0.6148197437205151, "grad_norm": 0.1544218510389328, "learning_rate": 0.002, "loss": 2.5527, "step": 308610 }, { "epoch": 0.6148396659441541, "grad_norm": 0.1565283238887787, "learning_rate": 0.002, "loss": 2.5637, "step": 308620 }, { "epoch": 0.614859588167793, "grad_norm": 0.16994601488113403, "learning_rate": 0.002, "loss": 2.5566, "step": 308630 }, { "epoch": 0.6148795103914318, "grad_norm": 0.18593989312648773, "learning_rate": 0.002, "loss": 2.556, "step": 308640 }, { "epoch": 0.6148994326150707, "grad_norm": 0.15316510200500488, "learning_rate": 0.002, "loss": 2.5657, "step": 308650 }, { "epoch": 0.6149193548387096, "grad_norm": 0.171913743019104, "learning_rate": 0.002, "loss": 2.5427, "step": 308660 }, { "epoch": 0.6149392770623486, "grad_norm": 0.19627904891967773, "learning_rate": 0.002, "loss": 2.5507, "step": 308670 }, { "epoch": 0.6149591992859875, "grad_norm": 0.15229447185993195, "learning_rate": 0.002, "loss": 2.5797, "step": 308680 }, { "epoch": 0.6149791215096264, "grad_norm": 0.1751028150320053, "learning_rate": 0.002, "loss": 2.5591, "step": 308690 }, { "epoch": 0.6149990437332653, "grad_norm": 0.15973098576068878, "learning_rate": 0.002, "loss": 2.5535, "step": 308700 }, { "epoch": 0.6150189659569042, "grad_norm": 0.19028383493423462, "learning_rate": 0.002, "loss": 2.5521, "step": 308710 }, { "epoch": 0.6150388881805432, "grad_norm": 0.20154891908168793, "learning_rate": 0.002, "loss": 2.5597, "step": 308720 }, { "epoch": 0.6150588104041821, "grad_norm": 0.17012006044387817, "learning_rate": 0.002, "loss": 2.5391, "step": 308730 }, { "epoch": 0.615078732627821, "grad_norm": 0.16234436631202698, "learning_rate": 0.002, "loss": 2.5541, "step": 308740 }, { "epoch": 0.6150986548514599, "grad_norm": 0.1672687828540802, "learning_rate": 0.002, "loss": 2.5537, "step": 308750 }, { "epoch": 0.6151185770750988, "grad_norm": 0.16082285344600677, "learning_rate": 0.002, "loss": 2.5567, "step": 308760 }, { "epoch": 0.6151384992987378, "grad_norm": 0.15017889440059662, "learning_rate": 0.002, "loss": 2.5633, "step": 308770 }, { "epoch": 0.6151584215223767, "grad_norm": 0.1597783863544464, "learning_rate": 0.002, "loss": 2.5615, "step": 308780 }, { "epoch": 0.6151783437460155, "grad_norm": 0.1874731034040451, "learning_rate": 0.002, "loss": 2.565, "step": 308790 }, { "epoch": 0.6151982659696544, "grad_norm": 0.18561582267284393, "learning_rate": 0.002, "loss": 2.5611, "step": 308800 }, { "epoch": 0.6152181881932934, "grad_norm": 0.17394328117370605, "learning_rate": 0.002, "loss": 2.5583, "step": 308810 }, { "epoch": 0.6152381104169323, "grad_norm": 0.17752976715564728, "learning_rate": 0.002, "loss": 2.5531, "step": 308820 }, { "epoch": 0.6152580326405712, "grad_norm": 0.16736550629138947, "learning_rate": 0.002, "loss": 2.5575, "step": 308830 }, { "epoch": 0.6152779548642101, "grad_norm": 0.16673608124256134, "learning_rate": 0.002, "loss": 2.5458, "step": 308840 }, { "epoch": 0.615297877087849, "grad_norm": 0.17399102449417114, "learning_rate": 0.002, "loss": 2.555, "step": 308850 }, { "epoch": 0.615317799311488, "grad_norm": 0.166429802775383, "learning_rate": 0.002, "loss": 2.5492, "step": 308860 }, { "epoch": 0.6153377215351269, "grad_norm": 0.16921009123325348, "learning_rate": 0.002, "loss": 2.5518, "step": 308870 }, { "epoch": 0.6153576437587658, "grad_norm": 0.1651470810174942, "learning_rate": 0.002, "loss": 2.5481, "step": 308880 }, { "epoch": 0.6153775659824047, "grad_norm": 0.1947069764137268, "learning_rate": 0.002, "loss": 2.5419, "step": 308890 }, { "epoch": 0.6153974882060436, "grad_norm": 0.14504152536392212, "learning_rate": 0.002, "loss": 2.5584, "step": 308900 }, { "epoch": 0.6154174104296826, "grad_norm": 0.17421136796474457, "learning_rate": 0.002, "loss": 2.5558, "step": 308910 }, { "epoch": 0.6154373326533215, "grad_norm": 0.1942012906074524, "learning_rate": 0.002, "loss": 2.545, "step": 308920 }, { "epoch": 0.6154572548769603, "grad_norm": 0.17396360635757446, "learning_rate": 0.002, "loss": 2.5678, "step": 308930 }, { "epoch": 0.6154771771005992, "grad_norm": 0.17621424794197083, "learning_rate": 0.002, "loss": 2.5629, "step": 308940 }, { "epoch": 0.6154970993242381, "grad_norm": 0.18504181504249573, "learning_rate": 0.002, "loss": 2.559, "step": 308950 }, { "epoch": 0.6155170215478771, "grad_norm": 0.16087768971920013, "learning_rate": 0.002, "loss": 2.5599, "step": 308960 }, { "epoch": 0.615536943771516, "grad_norm": 0.1968996375799179, "learning_rate": 0.002, "loss": 2.5526, "step": 308970 }, { "epoch": 0.6155568659951549, "grad_norm": 0.16807642579078674, "learning_rate": 0.002, "loss": 2.5623, "step": 308980 }, { "epoch": 0.6155767882187938, "grad_norm": 0.16195401549339294, "learning_rate": 0.002, "loss": 2.5609, "step": 308990 }, { "epoch": 0.6155967104424327, "grad_norm": 0.22579924762248993, "learning_rate": 0.002, "loss": 2.5577, "step": 309000 }, { "epoch": 0.6156166326660717, "grad_norm": 0.16371013224124908, "learning_rate": 0.002, "loss": 2.5503, "step": 309010 }, { "epoch": 0.6156365548897106, "grad_norm": 0.184713214635849, "learning_rate": 0.002, "loss": 2.5623, "step": 309020 }, { "epoch": 0.6156564771133495, "grad_norm": 0.17804785072803497, "learning_rate": 0.002, "loss": 2.5484, "step": 309030 }, { "epoch": 0.6156763993369884, "grad_norm": 0.13536570966243744, "learning_rate": 0.002, "loss": 2.5495, "step": 309040 }, { "epoch": 0.6156963215606273, "grad_norm": 0.1643529236316681, "learning_rate": 0.002, "loss": 2.5433, "step": 309050 }, { "epoch": 0.6157162437842663, "grad_norm": 0.17539918422698975, "learning_rate": 0.002, "loss": 2.5588, "step": 309060 }, { "epoch": 0.6157361660079052, "grad_norm": 0.17653542757034302, "learning_rate": 0.002, "loss": 2.5659, "step": 309070 }, { "epoch": 0.615756088231544, "grad_norm": 0.17710888385772705, "learning_rate": 0.002, "loss": 2.5591, "step": 309080 }, { "epoch": 0.6157760104551829, "grad_norm": 0.15267729759216309, "learning_rate": 0.002, "loss": 2.558, "step": 309090 }, { "epoch": 0.6157959326788219, "grad_norm": 0.16516347229480743, "learning_rate": 0.002, "loss": 2.5676, "step": 309100 }, { "epoch": 0.6158158549024608, "grad_norm": 0.15012551844120026, "learning_rate": 0.002, "loss": 2.5455, "step": 309110 }, { "epoch": 0.6158357771260997, "grad_norm": 0.14354375004768372, "learning_rate": 0.002, "loss": 2.5474, "step": 309120 }, { "epoch": 0.6158556993497386, "grad_norm": 0.18438205122947693, "learning_rate": 0.002, "loss": 2.5585, "step": 309130 }, { "epoch": 0.6158756215733775, "grad_norm": 0.16310828924179077, "learning_rate": 0.002, "loss": 2.55, "step": 309140 }, { "epoch": 0.6158955437970165, "grad_norm": 0.15757527947425842, "learning_rate": 0.002, "loss": 2.5502, "step": 309150 }, { "epoch": 0.6159154660206554, "grad_norm": 0.17265728116035461, "learning_rate": 0.002, "loss": 2.5596, "step": 309160 }, { "epoch": 0.6159353882442943, "grad_norm": 0.18702657520771027, "learning_rate": 0.002, "loss": 2.5612, "step": 309170 }, { "epoch": 0.6159553104679332, "grad_norm": 0.24077710509300232, "learning_rate": 0.002, "loss": 2.5562, "step": 309180 }, { "epoch": 0.6159752326915721, "grad_norm": 0.16388805210590363, "learning_rate": 0.002, "loss": 2.5574, "step": 309190 }, { "epoch": 0.6159951549152111, "grad_norm": 0.16221527755260468, "learning_rate": 0.002, "loss": 2.5558, "step": 309200 }, { "epoch": 0.61601507713885, "grad_norm": 0.1552165448665619, "learning_rate": 0.002, "loss": 2.5507, "step": 309210 }, { "epoch": 0.6160349993624888, "grad_norm": 0.1681544929742813, "learning_rate": 0.002, "loss": 2.5666, "step": 309220 }, { "epoch": 0.6160549215861277, "grad_norm": 0.176787868142128, "learning_rate": 0.002, "loss": 2.5596, "step": 309230 }, { "epoch": 0.6160748438097666, "grad_norm": 0.16764923930168152, "learning_rate": 0.002, "loss": 2.5511, "step": 309240 }, { "epoch": 0.6160947660334056, "grad_norm": 0.15373091399669647, "learning_rate": 0.002, "loss": 2.5549, "step": 309250 }, { "epoch": 0.6161146882570445, "grad_norm": 0.15621119737625122, "learning_rate": 0.002, "loss": 2.5465, "step": 309260 }, { "epoch": 0.6161346104806834, "grad_norm": 0.18859852850437164, "learning_rate": 0.002, "loss": 2.5606, "step": 309270 }, { "epoch": 0.6161545327043223, "grad_norm": 0.15497858822345734, "learning_rate": 0.002, "loss": 2.5632, "step": 309280 }, { "epoch": 0.6161744549279612, "grad_norm": 0.16804848611354828, "learning_rate": 0.002, "loss": 2.5653, "step": 309290 }, { "epoch": 0.6161943771516002, "grad_norm": 0.16391345858573914, "learning_rate": 0.002, "loss": 2.5761, "step": 309300 }, { "epoch": 0.6162142993752391, "grad_norm": 0.15281446278095245, "learning_rate": 0.002, "loss": 2.5423, "step": 309310 }, { "epoch": 0.616234221598878, "grad_norm": 0.16035470366477966, "learning_rate": 0.002, "loss": 2.5525, "step": 309320 }, { "epoch": 0.6162541438225169, "grad_norm": 0.18385888636112213, "learning_rate": 0.002, "loss": 2.5599, "step": 309330 }, { "epoch": 0.6162740660461558, "grad_norm": 0.16068892180919647, "learning_rate": 0.002, "loss": 2.5508, "step": 309340 }, { "epoch": 0.6162939882697948, "grad_norm": 0.16432605683803558, "learning_rate": 0.002, "loss": 2.5508, "step": 309350 }, { "epoch": 0.6163139104934336, "grad_norm": 0.163162961602211, "learning_rate": 0.002, "loss": 2.564, "step": 309360 }, { "epoch": 0.6163338327170725, "grad_norm": 0.15252543985843658, "learning_rate": 0.002, "loss": 2.5477, "step": 309370 }, { "epoch": 0.6163537549407114, "grad_norm": 0.16390541195869446, "learning_rate": 0.002, "loss": 2.5585, "step": 309380 }, { "epoch": 0.6163736771643504, "grad_norm": 0.17856302857398987, "learning_rate": 0.002, "loss": 2.5469, "step": 309390 }, { "epoch": 0.6163935993879893, "grad_norm": 0.1334066241979599, "learning_rate": 0.002, "loss": 2.5648, "step": 309400 }, { "epoch": 0.6164135216116282, "grad_norm": 0.1536293774843216, "learning_rate": 0.002, "loss": 2.5616, "step": 309410 }, { "epoch": 0.6164334438352671, "grad_norm": 0.1582845151424408, "learning_rate": 0.002, "loss": 2.565, "step": 309420 }, { "epoch": 0.616453366058906, "grad_norm": 0.18724264204502106, "learning_rate": 0.002, "loss": 2.5392, "step": 309430 }, { "epoch": 0.616473288282545, "grad_norm": 0.17745265364646912, "learning_rate": 0.002, "loss": 2.5461, "step": 309440 }, { "epoch": 0.6164932105061839, "grad_norm": 0.17488990724086761, "learning_rate": 0.002, "loss": 2.5529, "step": 309450 }, { "epoch": 0.6165131327298228, "grad_norm": 0.1764737367630005, "learning_rate": 0.002, "loss": 2.5644, "step": 309460 }, { "epoch": 0.6165330549534617, "grad_norm": 0.1596357375383377, "learning_rate": 0.002, "loss": 2.5562, "step": 309470 }, { "epoch": 0.6165529771771006, "grad_norm": 0.14319145679473877, "learning_rate": 0.002, "loss": 2.5536, "step": 309480 }, { "epoch": 0.6165728994007396, "grad_norm": 0.17422227561473846, "learning_rate": 0.002, "loss": 2.5528, "step": 309490 }, { "epoch": 0.6165928216243785, "grad_norm": 0.16081856191158295, "learning_rate": 0.002, "loss": 2.5433, "step": 309500 }, { "epoch": 0.6166127438480173, "grad_norm": 0.1451907753944397, "learning_rate": 0.002, "loss": 2.5524, "step": 309510 }, { "epoch": 0.6166326660716562, "grad_norm": 0.21674436330795288, "learning_rate": 0.002, "loss": 2.5522, "step": 309520 }, { "epoch": 0.6166525882952951, "grad_norm": 0.17813695967197418, "learning_rate": 0.002, "loss": 2.5672, "step": 309530 }, { "epoch": 0.6166725105189341, "grad_norm": 0.1874653398990631, "learning_rate": 0.002, "loss": 2.5488, "step": 309540 }, { "epoch": 0.616692432742573, "grad_norm": 0.15052713453769684, "learning_rate": 0.002, "loss": 2.5454, "step": 309550 }, { "epoch": 0.6167123549662119, "grad_norm": 0.15701386332511902, "learning_rate": 0.002, "loss": 2.5355, "step": 309560 }, { "epoch": 0.6167322771898508, "grad_norm": 0.16472485661506653, "learning_rate": 0.002, "loss": 2.5505, "step": 309570 }, { "epoch": 0.6167521994134897, "grad_norm": 0.13067542016506195, "learning_rate": 0.002, "loss": 2.5582, "step": 309580 }, { "epoch": 0.6167721216371287, "grad_norm": 0.15568450093269348, "learning_rate": 0.002, "loss": 2.5589, "step": 309590 }, { "epoch": 0.6167920438607676, "grad_norm": 0.17134100198745728, "learning_rate": 0.002, "loss": 2.5567, "step": 309600 }, { "epoch": 0.6168119660844065, "grad_norm": 0.20266997814178467, "learning_rate": 0.002, "loss": 2.5626, "step": 309610 }, { "epoch": 0.6168318883080454, "grad_norm": 0.21895527839660645, "learning_rate": 0.002, "loss": 2.5547, "step": 309620 }, { "epoch": 0.6168518105316843, "grad_norm": 0.16595011949539185, "learning_rate": 0.002, "loss": 2.5394, "step": 309630 }, { "epoch": 0.6168717327553233, "grad_norm": 0.19844137132167816, "learning_rate": 0.002, "loss": 2.5572, "step": 309640 }, { "epoch": 0.6168916549789621, "grad_norm": 0.15717342495918274, "learning_rate": 0.002, "loss": 2.5535, "step": 309650 }, { "epoch": 0.616911577202601, "grad_norm": 0.2123871147632599, "learning_rate": 0.002, "loss": 2.5648, "step": 309660 }, { "epoch": 0.6169314994262399, "grad_norm": 0.16268689930438995, "learning_rate": 0.002, "loss": 2.554, "step": 309670 }, { "epoch": 0.6169514216498789, "grad_norm": 0.15170109272003174, "learning_rate": 0.002, "loss": 2.5436, "step": 309680 }, { "epoch": 0.6169713438735178, "grad_norm": 0.1684572994709015, "learning_rate": 0.002, "loss": 2.5581, "step": 309690 }, { "epoch": 0.6169912660971567, "grad_norm": 0.16439248621463776, "learning_rate": 0.002, "loss": 2.5484, "step": 309700 }, { "epoch": 0.6170111883207956, "grad_norm": 0.1634894460439682, "learning_rate": 0.002, "loss": 2.553, "step": 309710 }, { "epoch": 0.6170311105444345, "grad_norm": 0.1860722154378891, "learning_rate": 0.002, "loss": 2.57, "step": 309720 }, { "epoch": 0.6170510327680735, "grad_norm": 0.1660599559545517, "learning_rate": 0.002, "loss": 2.5466, "step": 309730 }, { "epoch": 0.6170709549917124, "grad_norm": 0.15200309455394745, "learning_rate": 0.002, "loss": 2.5405, "step": 309740 }, { "epoch": 0.6170908772153513, "grad_norm": 0.19890987873077393, "learning_rate": 0.002, "loss": 2.5552, "step": 309750 }, { "epoch": 0.6171107994389902, "grad_norm": 0.1524287611246109, "learning_rate": 0.002, "loss": 2.5561, "step": 309760 }, { "epoch": 0.6171307216626291, "grad_norm": 0.20241454243659973, "learning_rate": 0.002, "loss": 2.5526, "step": 309770 }, { "epoch": 0.6171506438862681, "grad_norm": 0.17813356220722198, "learning_rate": 0.002, "loss": 2.5603, "step": 309780 }, { "epoch": 0.617170566109907, "grad_norm": 0.1946832537651062, "learning_rate": 0.002, "loss": 2.557, "step": 309790 }, { "epoch": 0.6171904883335458, "grad_norm": 0.18103504180908203, "learning_rate": 0.002, "loss": 2.568, "step": 309800 }, { "epoch": 0.6172104105571847, "grad_norm": 0.14550650119781494, "learning_rate": 0.002, "loss": 2.56, "step": 309810 }, { "epoch": 0.6172303327808236, "grad_norm": 0.15578341484069824, "learning_rate": 0.002, "loss": 2.5695, "step": 309820 }, { "epoch": 0.6172502550044626, "grad_norm": 0.16242344677448273, "learning_rate": 0.002, "loss": 2.5556, "step": 309830 }, { "epoch": 0.6172701772281015, "grad_norm": 0.1604398787021637, "learning_rate": 0.002, "loss": 2.5475, "step": 309840 }, { "epoch": 0.6172900994517404, "grad_norm": 0.2152411788702011, "learning_rate": 0.002, "loss": 2.5495, "step": 309850 }, { "epoch": 0.6173100216753793, "grad_norm": 0.15844501554965973, "learning_rate": 0.002, "loss": 2.5562, "step": 309860 }, { "epoch": 0.6173299438990182, "grad_norm": 0.16323716938495636, "learning_rate": 0.002, "loss": 2.5481, "step": 309870 }, { "epoch": 0.6173498661226572, "grad_norm": 0.1725054681301117, "learning_rate": 0.002, "loss": 2.5739, "step": 309880 }, { "epoch": 0.6173697883462961, "grad_norm": 0.15702050924301147, "learning_rate": 0.002, "loss": 2.5564, "step": 309890 }, { "epoch": 0.617389710569935, "grad_norm": 0.18230105936527252, "learning_rate": 0.002, "loss": 2.5486, "step": 309900 }, { "epoch": 0.6174096327935739, "grad_norm": 0.1580405980348587, "learning_rate": 0.002, "loss": 2.557, "step": 309910 }, { "epoch": 0.6174295550172128, "grad_norm": 0.16846181452274323, "learning_rate": 0.002, "loss": 2.5644, "step": 309920 }, { "epoch": 0.6174494772408518, "grad_norm": 0.16066931188106537, "learning_rate": 0.002, "loss": 2.5471, "step": 309930 }, { "epoch": 0.6174693994644906, "grad_norm": 0.17190465331077576, "learning_rate": 0.002, "loss": 2.5663, "step": 309940 }, { "epoch": 0.6174893216881295, "grad_norm": 0.15968064963817596, "learning_rate": 0.002, "loss": 2.5508, "step": 309950 }, { "epoch": 0.6175092439117684, "grad_norm": 0.20078590512275696, "learning_rate": 0.002, "loss": 2.5498, "step": 309960 }, { "epoch": 0.6175291661354073, "grad_norm": 0.16764360666275024, "learning_rate": 0.002, "loss": 2.5448, "step": 309970 }, { "epoch": 0.6175490883590463, "grad_norm": 0.15938007831573486, "learning_rate": 0.002, "loss": 2.5511, "step": 309980 }, { "epoch": 0.6175690105826852, "grad_norm": 0.19925743341445923, "learning_rate": 0.002, "loss": 2.5478, "step": 309990 }, { "epoch": 0.6175889328063241, "grad_norm": 0.18303848803043365, "learning_rate": 0.002, "loss": 2.5642, "step": 310000 }, { "epoch": 0.617608855029963, "grad_norm": 0.15101851522922516, "learning_rate": 0.002, "loss": 2.5571, "step": 310010 }, { "epoch": 0.617628777253602, "grad_norm": 0.14716555178165436, "learning_rate": 0.002, "loss": 2.5558, "step": 310020 }, { "epoch": 0.6176486994772409, "grad_norm": 0.22025009989738464, "learning_rate": 0.002, "loss": 2.552, "step": 310030 }, { "epoch": 0.6176686217008798, "grad_norm": 0.17963318526744843, "learning_rate": 0.002, "loss": 2.5629, "step": 310040 }, { "epoch": 0.6176885439245187, "grad_norm": 0.1597166806459427, "learning_rate": 0.002, "loss": 2.5385, "step": 310050 }, { "epoch": 0.6177084661481576, "grad_norm": 0.14842447638511658, "learning_rate": 0.002, "loss": 2.5422, "step": 310060 }, { "epoch": 0.6177283883717966, "grad_norm": 0.1695898026227951, "learning_rate": 0.002, "loss": 2.5517, "step": 310070 }, { "epoch": 0.6177483105954354, "grad_norm": 0.152387335896492, "learning_rate": 0.002, "loss": 2.5488, "step": 310080 }, { "epoch": 0.6177682328190743, "grad_norm": 0.1799926459789276, "learning_rate": 0.002, "loss": 2.5575, "step": 310090 }, { "epoch": 0.6177881550427132, "grad_norm": 0.15729248523712158, "learning_rate": 0.002, "loss": 2.5507, "step": 310100 }, { "epoch": 0.6178080772663521, "grad_norm": 0.18521533906459808, "learning_rate": 0.002, "loss": 2.5458, "step": 310110 }, { "epoch": 0.6178279994899911, "grad_norm": 0.1551843285560608, "learning_rate": 0.002, "loss": 2.5513, "step": 310120 }, { "epoch": 0.61784792171363, "grad_norm": 0.2057429999113083, "learning_rate": 0.002, "loss": 2.5735, "step": 310130 }, { "epoch": 0.6178678439372689, "grad_norm": 0.1542748659849167, "learning_rate": 0.002, "loss": 2.5408, "step": 310140 }, { "epoch": 0.6178877661609078, "grad_norm": 0.14774677157402039, "learning_rate": 0.002, "loss": 2.5593, "step": 310150 }, { "epoch": 0.6179076883845467, "grad_norm": 0.19732536375522614, "learning_rate": 0.002, "loss": 2.5609, "step": 310160 }, { "epoch": 0.6179276106081857, "grad_norm": 0.18114197254180908, "learning_rate": 0.002, "loss": 2.57, "step": 310170 }, { "epoch": 0.6179475328318246, "grad_norm": 0.1552908718585968, "learning_rate": 0.002, "loss": 2.5712, "step": 310180 }, { "epoch": 0.6179674550554635, "grad_norm": 0.16018562018871307, "learning_rate": 0.002, "loss": 2.5509, "step": 310190 }, { "epoch": 0.6179873772791024, "grad_norm": 0.14774778485298157, "learning_rate": 0.002, "loss": 2.5737, "step": 310200 }, { "epoch": 0.6180072995027412, "grad_norm": 0.16554729640483856, "learning_rate": 0.002, "loss": 2.5558, "step": 310210 }, { "epoch": 0.6180272217263802, "grad_norm": 0.16039083898067474, "learning_rate": 0.002, "loss": 2.5564, "step": 310220 }, { "epoch": 0.6180471439500191, "grad_norm": 0.17381617426872253, "learning_rate": 0.002, "loss": 2.5545, "step": 310230 }, { "epoch": 0.618067066173658, "grad_norm": 0.162581667304039, "learning_rate": 0.002, "loss": 2.5523, "step": 310240 }, { "epoch": 0.6180869883972969, "grad_norm": 0.16035069525241852, "learning_rate": 0.002, "loss": 2.5699, "step": 310250 }, { "epoch": 0.6181069106209358, "grad_norm": 0.1800876259803772, "learning_rate": 0.002, "loss": 2.5564, "step": 310260 }, { "epoch": 0.6181268328445748, "grad_norm": 0.15760374069213867, "learning_rate": 0.002, "loss": 2.5753, "step": 310270 }, { "epoch": 0.6181467550682137, "grad_norm": 0.14687813818454742, "learning_rate": 0.002, "loss": 2.5528, "step": 310280 }, { "epoch": 0.6181666772918526, "grad_norm": 0.19694218039512634, "learning_rate": 0.002, "loss": 2.5601, "step": 310290 }, { "epoch": 0.6181865995154915, "grad_norm": 0.16143299639225006, "learning_rate": 0.002, "loss": 2.5587, "step": 310300 }, { "epoch": 0.6182065217391305, "grad_norm": 0.1433573067188263, "learning_rate": 0.002, "loss": 2.5458, "step": 310310 }, { "epoch": 0.6182264439627694, "grad_norm": 0.18764007091522217, "learning_rate": 0.002, "loss": 2.5633, "step": 310320 }, { "epoch": 0.6182463661864083, "grad_norm": 0.15072746574878693, "learning_rate": 0.002, "loss": 2.5385, "step": 310330 }, { "epoch": 0.6182662884100472, "grad_norm": 0.1894194334745407, "learning_rate": 0.002, "loss": 2.54, "step": 310340 }, { "epoch": 0.618286210633686, "grad_norm": 0.16827887296676636, "learning_rate": 0.002, "loss": 2.5665, "step": 310350 }, { "epoch": 0.618306132857325, "grad_norm": 0.17407545447349548, "learning_rate": 0.002, "loss": 2.5536, "step": 310360 }, { "epoch": 0.6183260550809639, "grad_norm": 0.19167225062847137, "learning_rate": 0.002, "loss": 2.5612, "step": 310370 }, { "epoch": 0.6183459773046028, "grad_norm": 0.18280820548534393, "learning_rate": 0.002, "loss": 2.5515, "step": 310380 }, { "epoch": 0.6183658995282417, "grad_norm": 0.14593812823295593, "learning_rate": 0.002, "loss": 2.5572, "step": 310390 }, { "epoch": 0.6183858217518806, "grad_norm": 0.17854566872119904, "learning_rate": 0.002, "loss": 2.5595, "step": 310400 }, { "epoch": 0.6184057439755196, "grad_norm": 0.15957790613174438, "learning_rate": 0.002, "loss": 2.5639, "step": 310410 }, { "epoch": 0.6184256661991585, "grad_norm": 0.17046906054019928, "learning_rate": 0.002, "loss": 2.5516, "step": 310420 }, { "epoch": 0.6184455884227974, "grad_norm": 0.19721007347106934, "learning_rate": 0.002, "loss": 2.5688, "step": 310430 }, { "epoch": 0.6184655106464363, "grad_norm": 0.16107656061649323, "learning_rate": 0.002, "loss": 2.5536, "step": 310440 }, { "epoch": 0.6184854328700752, "grad_norm": 0.1678190380334854, "learning_rate": 0.002, "loss": 2.5598, "step": 310450 }, { "epoch": 0.6185053550937142, "grad_norm": 0.36053016781806946, "learning_rate": 0.002, "loss": 2.5623, "step": 310460 }, { "epoch": 0.6185252773173531, "grad_norm": 0.20483070611953735, "learning_rate": 0.002, "loss": 2.5686, "step": 310470 }, { "epoch": 0.618545199540992, "grad_norm": 0.14227387309074402, "learning_rate": 0.002, "loss": 2.5533, "step": 310480 }, { "epoch": 0.6185651217646309, "grad_norm": 0.1840749830007553, "learning_rate": 0.002, "loss": 2.5542, "step": 310490 }, { "epoch": 0.6185850439882697, "grad_norm": 0.16093210875988007, "learning_rate": 0.002, "loss": 2.5479, "step": 310500 }, { "epoch": 0.6186049662119087, "grad_norm": 0.1722787469625473, "learning_rate": 0.002, "loss": 2.5554, "step": 310510 }, { "epoch": 0.6186248884355476, "grad_norm": 0.1741638481616974, "learning_rate": 0.002, "loss": 2.5582, "step": 310520 }, { "epoch": 0.6186448106591865, "grad_norm": 0.18180391192436218, "learning_rate": 0.002, "loss": 2.543, "step": 310530 }, { "epoch": 0.6186647328828254, "grad_norm": 0.18681460618972778, "learning_rate": 0.002, "loss": 2.5459, "step": 310540 }, { "epoch": 0.6186846551064643, "grad_norm": 0.1733623594045639, "learning_rate": 0.002, "loss": 2.5574, "step": 310550 }, { "epoch": 0.6187045773301033, "grad_norm": 0.15801328420639038, "learning_rate": 0.002, "loss": 2.561, "step": 310560 }, { "epoch": 0.6187244995537422, "grad_norm": 0.1505468785762787, "learning_rate": 0.002, "loss": 2.5547, "step": 310570 }, { "epoch": 0.6187444217773811, "grad_norm": 0.2111404687166214, "learning_rate": 0.002, "loss": 2.5626, "step": 310580 }, { "epoch": 0.61876434400102, "grad_norm": 0.16420957446098328, "learning_rate": 0.002, "loss": 2.5582, "step": 310590 }, { "epoch": 0.618784266224659, "grad_norm": 0.153874933719635, "learning_rate": 0.002, "loss": 2.5534, "step": 310600 }, { "epoch": 0.6188041884482979, "grad_norm": 0.1683189868927002, "learning_rate": 0.002, "loss": 2.5623, "step": 310610 }, { "epoch": 0.6188241106719368, "grad_norm": 0.1850832849740982, "learning_rate": 0.002, "loss": 2.561, "step": 310620 }, { "epoch": 0.6188440328955757, "grad_norm": 0.16534364223480225, "learning_rate": 0.002, "loss": 2.5649, "step": 310630 }, { "epoch": 0.6188639551192145, "grad_norm": 0.14625950157642365, "learning_rate": 0.002, "loss": 2.5526, "step": 310640 }, { "epoch": 0.6188838773428536, "grad_norm": 0.13994306325912476, "learning_rate": 0.002, "loss": 2.5673, "step": 310650 }, { "epoch": 0.6189037995664924, "grad_norm": 0.15074582397937775, "learning_rate": 0.002, "loss": 2.564, "step": 310660 }, { "epoch": 0.6189237217901313, "grad_norm": 0.16928930580615997, "learning_rate": 0.002, "loss": 2.548, "step": 310670 }, { "epoch": 0.6189436440137702, "grad_norm": 0.2030252069234848, "learning_rate": 0.002, "loss": 2.5349, "step": 310680 }, { "epoch": 0.6189635662374091, "grad_norm": 0.1607193946838379, "learning_rate": 0.002, "loss": 2.5492, "step": 310690 }, { "epoch": 0.6189834884610481, "grad_norm": 0.1670580953359604, "learning_rate": 0.002, "loss": 2.5479, "step": 310700 }, { "epoch": 0.619003410684687, "grad_norm": 0.16645702719688416, "learning_rate": 0.002, "loss": 2.5524, "step": 310710 }, { "epoch": 0.6190233329083259, "grad_norm": 0.17382512986660004, "learning_rate": 0.002, "loss": 2.5463, "step": 310720 }, { "epoch": 0.6190432551319648, "grad_norm": 0.17784669995307922, "learning_rate": 0.002, "loss": 2.5523, "step": 310730 }, { "epoch": 0.6190631773556037, "grad_norm": 0.1506344974040985, "learning_rate": 0.002, "loss": 2.534, "step": 310740 }, { "epoch": 0.6190830995792427, "grad_norm": 0.16486677527427673, "learning_rate": 0.002, "loss": 2.5607, "step": 310750 }, { "epoch": 0.6191030218028816, "grad_norm": 0.19819888472557068, "learning_rate": 0.002, "loss": 2.5686, "step": 310760 }, { "epoch": 0.6191229440265205, "grad_norm": 0.1415897160768509, "learning_rate": 0.002, "loss": 2.5636, "step": 310770 }, { "epoch": 0.6191428662501594, "grad_norm": 0.16796033084392548, "learning_rate": 0.002, "loss": 2.5567, "step": 310780 }, { "epoch": 0.6191627884737982, "grad_norm": 0.217366024851799, "learning_rate": 0.002, "loss": 2.5468, "step": 310790 }, { "epoch": 0.6191827106974372, "grad_norm": 0.16951613128185272, "learning_rate": 0.002, "loss": 2.5554, "step": 310800 }, { "epoch": 0.6192026329210761, "grad_norm": 0.18537524342536926, "learning_rate": 0.002, "loss": 2.5486, "step": 310810 }, { "epoch": 0.619222555144715, "grad_norm": 0.1561853587627411, "learning_rate": 0.002, "loss": 2.5523, "step": 310820 }, { "epoch": 0.6192424773683539, "grad_norm": 0.16788683831691742, "learning_rate": 0.002, "loss": 2.5739, "step": 310830 }, { "epoch": 0.6192623995919928, "grad_norm": 0.19550742208957672, "learning_rate": 0.002, "loss": 2.558, "step": 310840 }, { "epoch": 0.6192823218156318, "grad_norm": 0.15891510248184204, "learning_rate": 0.002, "loss": 2.5475, "step": 310850 }, { "epoch": 0.6193022440392707, "grad_norm": 0.16545017063617706, "learning_rate": 0.002, "loss": 2.5614, "step": 310860 }, { "epoch": 0.6193221662629096, "grad_norm": 0.17858530580997467, "learning_rate": 0.002, "loss": 2.57, "step": 310870 }, { "epoch": 0.6193420884865485, "grad_norm": 0.1686326116323471, "learning_rate": 0.002, "loss": 2.5518, "step": 310880 }, { "epoch": 0.6193620107101875, "grad_norm": 0.16398528218269348, "learning_rate": 0.002, "loss": 2.5602, "step": 310890 }, { "epoch": 0.6193819329338264, "grad_norm": 0.18954499065876007, "learning_rate": 0.002, "loss": 2.5679, "step": 310900 }, { "epoch": 0.6194018551574653, "grad_norm": 0.14254990220069885, "learning_rate": 0.002, "loss": 2.5418, "step": 310910 }, { "epoch": 0.6194217773811042, "grad_norm": 0.25071612000465393, "learning_rate": 0.002, "loss": 2.557, "step": 310920 }, { "epoch": 0.619441699604743, "grad_norm": 0.15074816346168518, "learning_rate": 0.002, "loss": 2.5546, "step": 310930 }, { "epoch": 0.619461621828382, "grad_norm": 0.15355412662029266, "learning_rate": 0.002, "loss": 2.5528, "step": 310940 }, { "epoch": 0.6194815440520209, "grad_norm": 0.1872645765542984, "learning_rate": 0.002, "loss": 2.5631, "step": 310950 }, { "epoch": 0.6195014662756598, "grad_norm": 0.15166181325912476, "learning_rate": 0.002, "loss": 2.5525, "step": 310960 }, { "epoch": 0.6195213884992987, "grad_norm": 0.17202910780906677, "learning_rate": 0.002, "loss": 2.543, "step": 310970 }, { "epoch": 0.6195413107229376, "grad_norm": 0.15732261538505554, "learning_rate": 0.002, "loss": 2.5559, "step": 310980 }, { "epoch": 0.6195612329465766, "grad_norm": 0.20179270207881927, "learning_rate": 0.002, "loss": 2.5487, "step": 310990 }, { "epoch": 0.6195811551702155, "grad_norm": 0.18618455529212952, "learning_rate": 0.002, "loss": 2.5318, "step": 311000 }, { "epoch": 0.6196010773938544, "grad_norm": 0.1706884652376175, "learning_rate": 0.002, "loss": 2.5572, "step": 311010 }, { "epoch": 0.6196209996174933, "grad_norm": 0.15032075345516205, "learning_rate": 0.002, "loss": 2.5572, "step": 311020 }, { "epoch": 0.6196409218411322, "grad_norm": 0.18178118765354156, "learning_rate": 0.002, "loss": 2.5647, "step": 311030 }, { "epoch": 0.6196608440647712, "grad_norm": 0.18328899145126343, "learning_rate": 0.002, "loss": 2.546, "step": 311040 }, { "epoch": 0.6196807662884101, "grad_norm": 0.17371778190135956, "learning_rate": 0.002, "loss": 2.5755, "step": 311050 }, { "epoch": 0.619700688512049, "grad_norm": 0.14469879865646362, "learning_rate": 0.002, "loss": 2.5522, "step": 311060 }, { "epoch": 0.6197206107356878, "grad_norm": 0.15698836743831635, "learning_rate": 0.002, "loss": 2.5514, "step": 311070 }, { "epoch": 0.6197405329593267, "grad_norm": 0.15426726639270782, "learning_rate": 0.002, "loss": 2.5569, "step": 311080 }, { "epoch": 0.6197604551829657, "grad_norm": 0.1721573770046234, "learning_rate": 0.002, "loss": 2.5673, "step": 311090 }, { "epoch": 0.6197803774066046, "grad_norm": 0.14609473943710327, "learning_rate": 0.002, "loss": 2.5638, "step": 311100 }, { "epoch": 0.6198002996302435, "grad_norm": 0.18272942304611206, "learning_rate": 0.002, "loss": 2.5546, "step": 311110 }, { "epoch": 0.6198202218538824, "grad_norm": 0.16980208456516266, "learning_rate": 0.002, "loss": 2.5399, "step": 311120 }, { "epoch": 0.6198401440775213, "grad_norm": 0.16456478834152222, "learning_rate": 0.002, "loss": 2.562, "step": 311130 }, { "epoch": 0.6198600663011603, "grad_norm": 0.15783214569091797, "learning_rate": 0.002, "loss": 2.5602, "step": 311140 }, { "epoch": 0.6198799885247992, "grad_norm": 0.15691949427127838, "learning_rate": 0.002, "loss": 2.5571, "step": 311150 }, { "epoch": 0.6198999107484381, "grad_norm": 0.15203353762626648, "learning_rate": 0.002, "loss": 2.5531, "step": 311160 }, { "epoch": 0.619919832972077, "grad_norm": 0.16948658227920532, "learning_rate": 0.002, "loss": 2.5394, "step": 311170 }, { "epoch": 0.619939755195716, "grad_norm": 0.1470271348953247, "learning_rate": 0.002, "loss": 2.5632, "step": 311180 }, { "epoch": 0.6199596774193549, "grad_norm": 0.18366195261478424, "learning_rate": 0.002, "loss": 2.5668, "step": 311190 }, { "epoch": 0.6199795996429938, "grad_norm": 0.17333830893039703, "learning_rate": 0.002, "loss": 2.5665, "step": 311200 }, { "epoch": 0.6199995218666327, "grad_norm": 0.15016783773899078, "learning_rate": 0.002, "loss": 2.5505, "step": 311210 }, { "epoch": 0.6200194440902715, "grad_norm": 0.1631910651922226, "learning_rate": 0.002, "loss": 2.5696, "step": 311220 }, { "epoch": 0.6200393663139105, "grad_norm": 0.17878121137619019, "learning_rate": 0.002, "loss": 2.5505, "step": 311230 }, { "epoch": 0.6200592885375494, "grad_norm": 0.1827496886253357, "learning_rate": 0.002, "loss": 2.5464, "step": 311240 }, { "epoch": 0.6200792107611883, "grad_norm": 0.14057643711566925, "learning_rate": 0.002, "loss": 2.548, "step": 311250 }, { "epoch": 0.6200991329848272, "grad_norm": 0.17273078858852386, "learning_rate": 0.002, "loss": 2.543, "step": 311260 }, { "epoch": 0.6201190552084661, "grad_norm": 0.13534528017044067, "learning_rate": 0.002, "loss": 2.5498, "step": 311270 }, { "epoch": 0.6201389774321051, "grad_norm": 0.15775273740291595, "learning_rate": 0.002, "loss": 2.5394, "step": 311280 }, { "epoch": 0.620158899655744, "grad_norm": 0.13758282363414764, "learning_rate": 0.002, "loss": 2.5676, "step": 311290 }, { "epoch": 0.6201788218793829, "grad_norm": 0.15323752164840698, "learning_rate": 0.002, "loss": 2.5567, "step": 311300 }, { "epoch": 0.6201987441030218, "grad_norm": 0.17043152451515198, "learning_rate": 0.002, "loss": 2.5627, "step": 311310 }, { "epoch": 0.6202186663266607, "grad_norm": 0.13739517331123352, "learning_rate": 0.002, "loss": 2.5455, "step": 311320 }, { "epoch": 0.6202385885502997, "grad_norm": 0.15641054511070251, "learning_rate": 0.002, "loss": 2.5432, "step": 311330 }, { "epoch": 0.6202585107739386, "grad_norm": 0.17054276168346405, "learning_rate": 0.002, "loss": 2.5611, "step": 311340 }, { "epoch": 0.6202784329975775, "grad_norm": 0.14365732669830322, "learning_rate": 0.002, "loss": 2.5584, "step": 311350 }, { "epoch": 0.6202983552212163, "grad_norm": 0.18879395723342896, "learning_rate": 0.002, "loss": 2.5675, "step": 311360 }, { "epoch": 0.6203182774448552, "grad_norm": 0.17875990271568298, "learning_rate": 0.002, "loss": 2.5663, "step": 311370 }, { "epoch": 0.6203381996684942, "grad_norm": 0.22650204598903656, "learning_rate": 0.002, "loss": 2.55, "step": 311380 }, { "epoch": 0.6203581218921331, "grad_norm": 0.15883466601371765, "learning_rate": 0.002, "loss": 2.5557, "step": 311390 }, { "epoch": 0.620378044115772, "grad_norm": 0.1463836133480072, "learning_rate": 0.002, "loss": 2.5473, "step": 311400 }, { "epoch": 0.6203979663394109, "grad_norm": 0.1687491536140442, "learning_rate": 0.002, "loss": 2.5501, "step": 311410 }, { "epoch": 0.6204178885630498, "grad_norm": 0.1501738578081131, "learning_rate": 0.002, "loss": 2.557, "step": 311420 }, { "epoch": 0.6204378107866888, "grad_norm": 0.1700057089328766, "learning_rate": 0.002, "loss": 2.5426, "step": 311430 }, { "epoch": 0.6204577330103277, "grad_norm": 0.15640808641910553, "learning_rate": 0.002, "loss": 2.5599, "step": 311440 }, { "epoch": 0.6204776552339666, "grad_norm": 0.15355812013149261, "learning_rate": 0.002, "loss": 2.5437, "step": 311450 }, { "epoch": 0.6204975774576055, "grad_norm": 0.16974174976348877, "learning_rate": 0.002, "loss": 2.5492, "step": 311460 }, { "epoch": 0.6205174996812444, "grad_norm": 0.188572958111763, "learning_rate": 0.002, "loss": 2.5452, "step": 311470 }, { "epoch": 0.6205374219048834, "grad_norm": 0.17974388599395752, "learning_rate": 0.002, "loss": 2.5683, "step": 311480 }, { "epoch": 0.6205573441285223, "grad_norm": 0.15658976137638092, "learning_rate": 0.002, "loss": 2.5612, "step": 311490 }, { "epoch": 0.6205772663521611, "grad_norm": 0.14402109384536743, "learning_rate": 0.002, "loss": 2.5619, "step": 311500 }, { "epoch": 0.6205971885758, "grad_norm": 0.16875015199184418, "learning_rate": 0.002, "loss": 2.558, "step": 311510 }, { "epoch": 0.620617110799439, "grad_norm": 0.15477333962917328, "learning_rate": 0.002, "loss": 2.5513, "step": 311520 }, { "epoch": 0.6206370330230779, "grad_norm": 0.15451514720916748, "learning_rate": 0.002, "loss": 2.5387, "step": 311530 }, { "epoch": 0.6206569552467168, "grad_norm": 0.16807681322097778, "learning_rate": 0.002, "loss": 2.5535, "step": 311540 }, { "epoch": 0.6206768774703557, "grad_norm": 0.15641091763973236, "learning_rate": 0.002, "loss": 2.5518, "step": 311550 }, { "epoch": 0.6206967996939946, "grad_norm": 0.15115182101726532, "learning_rate": 0.002, "loss": 2.5472, "step": 311560 }, { "epoch": 0.6207167219176336, "grad_norm": 0.1608433574438095, "learning_rate": 0.002, "loss": 2.5504, "step": 311570 }, { "epoch": 0.6207366441412725, "grad_norm": 0.14434809982776642, "learning_rate": 0.002, "loss": 2.5546, "step": 311580 }, { "epoch": 0.6207565663649114, "grad_norm": 0.17958715558052063, "learning_rate": 0.002, "loss": 2.5631, "step": 311590 }, { "epoch": 0.6207764885885503, "grad_norm": 0.1557782143354416, "learning_rate": 0.002, "loss": 2.5575, "step": 311600 }, { "epoch": 0.6207964108121892, "grad_norm": 0.22016426920890808, "learning_rate": 0.002, "loss": 2.5559, "step": 311610 }, { "epoch": 0.6208163330358282, "grad_norm": 0.18525175750255585, "learning_rate": 0.002, "loss": 2.5523, "step": 311620 }, { "epoch": 0.6208362552594671, "grad_norm": 0.14875385165214539, "learning_rate": 0.002, "loss": 2.5575, "step": 311630 }, { "epoch": 0.620856177483106, "grad_norm": 0.16575638949871063, "learning_rate": 0.002, "loss": 2.5517, "step": 311640 }, { "epoch": 0.6208760997067448, "grad_norm": 0.16369318962097168, "learning_rate": 0.002, "loss": 2.5502, "step": 311650 }, { "epoch": 0.6208960219303837, "grad_norm": 0.18212027847766876, "learning_rate": 0.002, "loss": 2.5572, "step": 311660 }, { "epoch": 0.6209159441540227, "grad_norm": 0.15430393815040588, "learning_rate": 0.002, "loss": 2.5571, "step": 311670 }, { "epoch": 0.6209358663776616, "grad_norm": 0.13837170600891113, "learning_rate": 0.002, "loss": 2.5418, "step": 311680 }, { "epoch": 0.6209557886013005, "grad_norm": 0.1663987934589386, "learning_rate": 0.002, "loss": 2.5578, "step": 311690 }, { "epoch": 0.6209757108249394, "grad_norm": 0.13905353844165802, "learning_rate": 0.002, "loss": 2.5426, "step": 311700 }, { "epoch": 0.6209956330485783, "grad_norm": 0.17352360486984253, "learning_rate": 0.002, "loss": 2.554, "step": 311710 }, { "epoch": 0.6210155552722173, "grad_norm": 0.15410861372947693, "learning_rate": 0.002, "loss": 2.5685, "step": 311720 }, { "epoch": 0.6210354774958562, "grad_norm": 0.1714601367712021, "learning_rate": 0.002, "loss": 2.5564, "step": 311730 }, { "epoch": 0.6210553997194951, "grad_norm": 0.19168581068515778, "learning_rate": 0.002, "loss": 2.5721, "step": 311740 }, { "epoch": 0.621075321943134, "grad_norm": 0.15419717133045197, "learning_rate": 0.002, "loss": 2.5584, "step": 311750 }, { "epoch": 0.6210952441667729, "grad_norm": 0.16436079144477844, "learning_rate": 0.002, "loss": 2.5401, "step": 311760 }, { "epoch": 0.6211151663904119, "grad_norm": 0.1572912633419037, "learning_rate": 0.002, "loss": 2.5466, "step": 311770 }, { "epoch": 0.6211350886140508, "grad_norm": 0.16354478895664215, "learning_rate": 0.002, "loss": 2.5599, "step": 311780 }, { "epoch": 0.6211550108376896, "grad_norm": 0.1454678624868393, "learning_rate": 0.002, "loss": 2.558, "step": 311790 }, { "epoch": 0.6211749330613285, "grad_norm": 0.15530090034008026, "learning_rate": 0.002, "loss": 2.5541, "step": 311800 }, { "epoch": 0.6211948552849675, "grad_norm": 0.17117738723754883, "learning_rate": 0.002, "loss": 2.5579, "step": 311810 }, { "epoch": 0.6212147775086064, "grad_norm": 0.18066297471523285, "learning_rate": 0.002, "loss": 2.5593, "step": 311820 }, { "epoch": 0.6212346997322453, "grad_norm": 0.17614859342575073, "learning_rate": 0.002, "loss": 2.5544, "step": 311830 }, { "epoch": 0.6212546219558842, "grad_norm": 0.17237375676631927, "learning_rate": 0.002, "loss": 2.5576, "step": 311840 }, { "epoch": 0.6212745441795231, "grad_norm": 0.16857177019119263, "learning_rate": 0.002, "loss": 2.568, "step": 311850 }, { "epoch": 0.6212944664031621, "grad_norm": 0.18105565011501312, "learning_rate": 0.002, "loss": 2.5484, "step": 311860 }, { "epoch": 0.621314388626801, "grad_norm": 0.21491897106170654, "learning_rate": 0.002, "loss": 2.547, "step": 311870 }, { "epoch": 0.6213343108504399, "grad_norm": 0.17450132966041565, "learning_rate": 0.002, "loss": 2.5614, "step": 311880 }, { "epoch": 0.6213542330740788, "grad_norm": 0.16247786581516266, "learning_rate": 0.002, "loss": 2.553, "step": 311890 }, { "epoch": 0.6213741552977177, "grad_norm": 0.1725800633430481, "learning_rate": 0.002, "loss": 2.5509, "step": 311900 }, { "epoch": 0.6213940775213567, "grad_norm": 0.18167635798454285, "learning_rate": 0.002, "loss": 2.5655, "step": 311910 }, { "epoch": 0.6214139997449956, "grad_norm": 0.15642602741718292, "learning_rate": 0.002, "loss": 2.5431, "step": 311920 }, { "epoch": 0.6214339219686345, "grad_norm": 0.12471616268157959, "learning_rate": 0.002, "loss": 2.5644, "step": 311930 }, { "epoch": 0.6214538441922733, "grad_norm": 0.188604936003685, "learning_rate": 0.002, "loss": 2.5683, "step": 311940 }, { "epoch": 0.6214737664159122, "grad_norm": 0.15221582353115082, "learning_rate": 0.002, "loss": 2.5579, "step": 311950 }, { "epoch": 0.6214936886395512, "grad_norm": 0.15550081431865692, "learning_rate": 0.002, "loss": 2.5397, "step": 311960 }, { "epoch": 0.6215136108631901, "grad_norm": 0.1837880164384842, "learning_rate": 0.002, "loss": 2.5563, "step": 311970 }, { "epoch": 0.621533533086829, "grad_norm": 0.15021973848342896, "learning_rate": 0.002, "loss": 2.564, "step": 311980 }, { "epoch": 0.6215534553104679, "grad_norm": 0.1600179374217987, "learning_rate": 0.002, "loss": 2.5564, "step": 311990 }, { "epoch": 0.6215733775341068, "grad_norm": 0.18915122747421265, "learning_rate": 0.002, "loss": 2.5488, "step": 312000 }, { "epoch": 0.6215932997577458, "grad_norm": 0.18237487971782684, "learning_rate": 0.002, "loss": 2.5485, "step": 312010 }, { "epoch": 0.6216132219813847, "grad_norm": 0.16908933222293854, "learning_rate": 0.002, "loss": 2.5461, "step": 312020 }, { "epoch": 0.6216331442050236, "grad_norm": 0.16412796080112457, "learning_rate": 0.002, "loss": 2.5501, "step": 312030 }, { "epoch": 0.6216530664286625, "grad_norm": 0.20424380898475647, "learning_rate": 0.002, "loss": 2.5566, "step": 312040 }, { "epoch": 0.6216729886523014, "grad_norm": 0.16345058381557465, "learning_rate": 0.002, "loss": 2.5638, "step": 312050 }, { "epoch": 0.6216929108759404, "grad_norm": 0.16709789633750916, "learning_rate": 0.002, "loss": 2.5724, "step": 312060 }, { "epoch": 0.6217128330995793, "grad_norm": 0.18717165291309357, "learning_rate": 0.002, "loss": 2.5554, "step": 312070 }, { "epoch": 0.6217327553232181, "grad_norm": 0.16293281316757202, "learning_rate": 0.002, "loss": 2.5467, "step": 312080 }, { "epoch": 0.621752677546857, "grad_norm": 0.24356724321842194, "learning_rate": 0.002, "loss": 2.5653, "step": 312090 }, { "epoch": 0.621772599770496, "grad_norm": 0.15703606605529785, "learning_rate": 0.002, "loss": 2.5583, "step": 312100 }, { "epoch": 0.6217925219941349, "grad_norm": 0.1805185228586197, "learning_rate": 0.002, "loss": 2.5756, "step": 312110 }, { "epoch": 0.6218124442177738, "grad_norm": 0.16508743166923523, "learning_rate": 0.002, "loss": 2.5694, "step": 312120 }, { "epoch": 0.6218323664414127, "grad_norm": 0.1668691635131836, "learning_rate": 0.002, "loss": 2.5445, "step": 312130 }, { "epoch": 0.6218522886650516, "grad_norm": 0.1695588082075119, "learning_rate": 0.002, "loss": 2.5578, "step": 312140 }, { "epoch": 0.6218722108886906, "grad_norm": 0.16045524179935455, "learning_rate": 0.002, "loss": 2.554, "step": 312150 }, { "epoch": 0.6218921331123295, "grad_norm": 0.14948366582393646, "learning_rate": 0.002, "loss": 2.5601, "step": 312160 }, { "epoch": 0.6219120553359684, "grad_norm": 0.15073740482330322, "learning_rate": 0.002, "loss": 2.5582, "step": 312170 }, { "epoch": 0.6219319775596073, "grad_norm": 0.15521176159381866, "learning_rate": 0.002, "loss": 2.571, "step": 312180 }, { "epoch": 0.6219518997832462, "grad_norm": 0.14575433731079102, "learning_rate": 0.002, "loss": 2.5455, "step": 312190 }, { "epoch": 0.6219718220068852, "grad_norm": 0.15166057646274567, "learning_rate": 0.002, "loss": 2.545, "step": 312200 }, { "epoch": 0.6219917442305241, "grad_norm": 0.16219905018806458, "learning_rate": 0.002, "loss": 2.5587, "step": 312210 }, { "epoch": 0.622011666454163, "grad_norm": 0.14592094719409943, "learning_rate": 0.002, "loss": 2.5554, "step": 312220 }, { "epoch": 0.6220315886778018, "grad_norm": 0.18824802339076996, "learning_rate": 0.002, "loss": 2.5423, "step": 312230 }, { "epoch": 0.6220515109014407, "grad_norm": 0.18781794607639313, "learning_rate": 0.002, "loss": 2.5574, "step": 312240 }, { "epoch": 0.6220714331250797, "grad_norm": 0.16454282402992249, "learning_rate": 0.002, "loss": 2.5522, "step": 312250 }, { "epoch": 0.6220913553487186, "grad_norm": 0.14626140892505646, "learning_rate": 0.002, "loss": 2.5625, "step": 312260 }, { "epoch": 0.6221112775723575, "grad_norm": 0.17454981803894043, "learning_rate": 0.002, "loss": 2.5589, "step": 312270 }, { "epoch": 0.6221311997959964, "grad_norm": 0.15258029103279114, "learning_rate": 0.002, "loss": 2.568, "step": 312280 }, { "epoch": 0.6221511220196353, "grad_norm": 0.15309551358222961, "learning_rate": 0.002, "loss": 2.5518, "step": 312290 }, { "epoch": 0.6221710442432743, "grad_norm": 0.14748285710811615, "learning_rate": 0.002, "loss": 2.5385, "step": 312300 }, { "epoch": 0.6221909664669132, "grad_norm": 0.25333693623542786, "learning_rate": 0.002, "loss": 2.5504, "step": 312310 }, { "epoch": 0.6222108886905521, "grad_norm": 0.17318196594715118, "learning_rate": 0.002, "loss": 2.5522, "step": 312320 }, { "epoch": 0.622230810914191, "grad_norm": 0.17069971561431885, "learning_rate": 0.002, "loss": 2.5587, "step": 312330 }, { "epoch": 0.6222507331378299, "grad_norm": 0.1702190339565277, "learning_rate": 0.002, "loss": 2.5587, "step": 312340 }, { "epoch": 0.6222706553614689, "grad_norm": 0.1587936282157898, "learning_rate": 0.002, "loss": 2.5592, "step": 312350 }, { "epoch": 0.6222905775851078, "grad_norm": 0.19365154206752777, "learning_rate": 0.002, "loss": 2.5377, "step": 312360 }, { "epoch": 0.6223104998087466, "grad_norm": 0.1563272774219513, "learning_rate": 0.002, "loss": 2.5449, "step": 312370 }, { "epoch": 0.6223304220323855, "grad_norm": 0.16899506747722626, "learning_rate": 0.002, "loss": 2.5412, "step": 312380 }, { "epoch": 0.6223503442560245, "grad_norm": 0.14239171147346497, "learning_rate": 0.002, "loss": 2.556, "step": 312390 }, { "epoch": 0.6223702664796634, "grad_norm": 0.14813904464244843, "learning_rate": 0.002, "loss": 2.5516, "step": 312400 }, { "epoch": 0.6223901887033023, "grad_norm": 0.1776694655418396, "learning_rate": 0.002, "loss": 2.5494, "step": 312410 }, { "epoch": 0.6224101109269412, "grad_norm": 0.1415557712316513, "learning_rate": 0.002, "loss": 2.5603, "step": 312420 }, { "epoch": 0.6224300331505801, "grad_norm": 0.20701126754283905, "learning_rate": 0.002, "loss": 2.5619, "step": 312430 }, { "epoch": 0.6224499553742191, "grad_norm": 0.16081538796424866, "learning_rate": 0.002, "loss": 2.5595, "step": 312440 }, { "epoch": 0.622469877597858, "grad_norm": 0.1994353085756302, "learning_rate": 0.002, "loss": 2.5538, "step": 312450 }, { "epoch": 0.6224897998214969, "grad_norm": 0.18751265108585358, "learning_rate": 0.002, "loss": 2.5623, "step": 312460 }, { "epoch": 0.6225097220451358, "grad_norm": 0.1619267463684082, "learning_rate": 0.002, "loss": 2.567, "step": 312470 }, { "epoch": 0.6225296442687747, "grad_norm": 0.20345523953437805, "learning_rate": 0.002, "loss": 2.5616, "step": 312480 }, { "epoch": 0.6225495664924137, "grad_norm": 0.16984932124614716, "learning_rate": 0.002, "loss": 2.5446, "step": 312490 }, { "epoch": 0.6225694887160526, "grad_norm": 0.15422287583351135, "learning_rate": 0.002, "loss": 2.5472, "step": 312500 }, { "epoch": 0.6225894109396914, "grad_norm": 0.19424404203891754, "learning_rate": 0.002, "loss": 2.5383, "step": 312510 }, { "epoch": 0.6226093331633303, "grad_norm": 0.17097865045070648, "learning_rate": 0.002, "loss": 2.5665, "step": 312520 }, { "epoch": 0.6226292553869692, "grad_norm": 0.15916255116462708, "learning_rate": 0.002, "loss": 2.5671, "step": 312530 }, { "epoch": 0.6226491776106082, "grad_norm": 0.18483370542526245, "learning_rate": 0.002, "loss": 2.553, "step": 312540 }, { "epoch": 0.6226690998342471, "grad_norm": 0.18129339814186096, "learning_rate": 0.002, "loss": 2.553, "step": 312550 }, { "epoch": 0.622689022057886, "grad_norm": 0.1741877794265747, "learning_rate": 0.002, "loss": 2.5755, "step": 312560 }, { "epoch": 0.6227089442815249, "grad_norm": 0.1544298678636551, "learning_rate": 0.002, "loss": 2.5413, "step": 312570 }, { "epoch": 0.6227288665051638, "grad_norm": 0.15952958166599274, "learning_rate": 0.002, "loss": 2.5469, "step": 312580 }, { "epoch": 0.6227487887288028, "grad_norm": 0.15368732810020447, "learning_rate": 0.002, "loss": 2.5607, "step": 312590 }, { "epoch": 0.6227687109524417, "grad_norm": 0.1377071738243103, "learning_rate": 0.002, "loss": 2.5448, "step": 312600 }, { "epoch": 0.6227886331760806, "grad_norm": 0.17066603899002075, "learning_rate": 0.002, "loss": 2.5518, "step": 312610 }, { "epoch": 0.6228085553997195, "grad_norm": 0.1752915233373642, "learning_rate": 0.002, "loss": 2.5606, "step": 312620 }, { "epoch": 0.6228284776233584, "grad_norm": 0.1678360104560852, "learning_rate": 0.002, "loss": 2.5511, "step": 312630 }, { "epoch": 0.6228483998469974, "grad_norm": 0.13922624289989471, "learning_rate": 0.002, "loss": 2.5472, "step": 312640 }, { "epoch": 0.6228683220706362, "grad_norm": 0.1872478425502777, "learning_rate": 0.002, "loss": 2.5564, "step": 312650 }, { "epoch": 0.6228882442942751, "grad_norm": 0.15898406505584717, "learning_rate": 0.002, "loss": 2.5481, "step": 312660 }, { "epoch": 0.622908166517914, "grad_norm": 0.15254691243171692, "learning_rate": 0.002, "loss": 2.5524, "step": 312670 }, { "epoch": 0.622928088741553, "grad_norm": 0.16872690618038177, "learning_rate": 0.002, "loss": 2.5473, "step": 312680 }, { "epoch": 0.6229480109651919, "grad_norm": 0.15262103080749512, "learning_rate": 0.002, "loss": 2.544, "step": 312690 }, { "epoch": 0.6229679331888308, "grad_norm": 0.1806594431400299, "learning_rate": 0.002, "loss": 2.5482, "step": 312700 }, { "epoch": 0.6229878554124697, "grad_norm": 0.1494131088256836, "learning_rate": 0.002, "loss": 2.5682, "step": 312710 }, { "epoch": 0.6230077776361086, "grad_norm": 0.14889584481716156, "learning_rate": 0.002, "loss": 2.5632, "step": 312720 }, { "epoch": 0.6230276998597476, "grad_norm": 0.20172548294067383, "learning_rate": 0.002, "loss": 2.5399, "step": 312730 }, { "epoch": 0.6230476220833865, "grad_norm": 0.17291252315044403, "learning_rate": 0.002, "loss": 2.5598, "step": 312740 }, { "epoch": 0.6230675443070254, "grad_norm": 0.17213650047779083, "learning_rate": 0.002, "loss": 2.559, "step": 312750 }, { "epoch": 0.6230874665306643, "grad_norm": 0.14205263555049896, "learning_rate": 0.002, "loss": 2.5499, "step": 312760 }, { "epoch": 0.6231073887543032, "grad_norm": 0.20558281242847443, "learning_rate": 0.002, "loss": 2.5464, "step": 312770 }, { "epoch": 0.6231273109779422, "grad_norm": 0.15330401062965393, "learning_rate": 0.002, "loss": 2.5584, "step": 312780 }, { "epoch": 0.623147233201581, "grad_norm": 0.14890599250793457, "learning_rate": 0.002, "loss": 2.5681, "step": 312790 }, { "epoch": 0.6231671554252199, "grad_norm": 0.16920842230319977, "learning_rate": 0.002, "loss": 2.5619, "step": 312800 }, { "epoch": 0.6231870776488588, "grad_norm": 0.18324048817157745, "learning_rate": 0.002, "loss": 2.5639, "step": 312810 }, { "epoch": 0.6232069998724977, "grad_norm": 0.1833716183900833, "learning_rate": 0.002, "loss": 2.5552, "step": 312820 }, { "epoch": 0.6232269220961367, "grad_norm": 0.14706569910049438, "learning_rate": 0.002, "loss": 2.5571, "step": 312830 }, { "epoch": 0.6232468443197756, "grad_norm": 0.15128061175346375, "learning_rate": 0.002, "loss": 2.5547, "step": 312840 }, { "epoch": 0.6232667665434145, "grad_norm": 0.15515024960041046, "learning_rate": 0.002, "loss": 2.5536, "step": 312850 }, { "epoch": 0.6232866887670534, "grad_norm": 0.15940730273723602, "learning_rate": 0.002, "loss": 2.542, "step": 312860 }, { "epoch": 0.6233066109906923, "grad_norm": 0.19676440954208374, "learning_rate": 0.002, "loss": 2.5515, "step": 312870 }, { "epoch": 0.6233265332143313, "grad_norm": 0.1665221005678177, "learning_rate": 0.002, "loss": 2.5588, "step": 312880 }, { "epoch": 0.6233464554379702, "grad_norm": 0.1442224532365799, "learning_rate": 0.002, "loss": 2.5476, "step": 312890 }, { "epoch": 0.6233663776616091, "grad_norm": 0.12901398539543152, "learning_rate": 0.002, "loss": 2.5583, "step": 312900 }, { "epoch": 0.623386299885248, "grad_norm": 0.19393163919448853, "learning_rate": 0.002, "loss": 2.5626, "step": 312910 }, { "epoch": 0.6234062221088869, "grad_norm": 0.1711089164018631, "learning_rate": 0.002, "loss": 2.5506, "step": 312920 }, { "epoch": 0.6234261443325259, "grad_norm": 0.1914144605398178, "learning_rate": 0.002, "loss": 2.5438, "step": 312930 }, { "epoch": 0.6234460665561647, "grad_norm": 0.1393219232559204, "learning_rate": 0.002, "loss": 2.5617, "step": 312940 }, { "epoch": 0.6234659887798036, "grad_norm": 0.1845369040966034, "learning_rate": 0.002, "loss": 2.5586, "step": 312950 }, { "epoch": 0.6234859110034425, "grad_norm": 0.1455504149198532, "learning_rate": 0.002, "loss": 2.5588, "step": 312960 }, { "epoch": 0.6235058332270814, "grad_norm": 0.18061457574367523, "learning_rate": 0.002, "loss": 2.5404, "step": 312970 }, { "epoch": 0.6235257554507204, "grad_norm": 0.16712191700935364, "learning_rate": 0.002, "loss": 2.5561, "step": 312980 }, { "epoch": 0.6235456776743593, "grad_norm": 0.17707517743110657, "learning_rate": 0.002, "loss": 2.5556, "step": 312990 }, { "epoch": 0.6235655998979982, "grad_norm": 0.1704978197813034, "learning_rate": 0.002, "loss": 2.5425, "step": 313000 }, { "epoch": 0.6235855221216371, "grad_norm": 0.14178192615509033, "learning_rate": 0.002, "loss": 2.5721, "step": 313010 }, { "epoch": 0.6236054443452761, "grad_norm": 0.19310253858566284, "learning_rate": 0.002, "loss": 2.559, "step": 313020 }, { "epoch": 0.623625366568915, "grad_norm": 0.17159508168697357, "learning_rate": 0.002, "loss": 2.5386, "step": 313030 }, { "epoch": 0.6236452887925539, "grad_norm": 0.17494633793830872, "learning_rate": 0.002, "loss": 2.5562, "step": 313040 }, { "epoch": 0.6236652110161928, "grad_norm": 0.17939552664756775, "learning_rate": 0.002, "loss": 2.5608, "step": 313050 }, { "epoch": 0.6236851332398317, "grad_norm": 0.14895480871200562, "learning_rate": 0.002, "loss": 2.5615, "step": 313060 }, { "epoch": 0.6237050554634707, "grad_norm": 0.15877936780452728, "learning_rate": 0.002, "loss": 2.5586, "step": 313070 }, { "epoch": 0.6237249776871095, "grad_norm": 0.17631395161151886, "learning_rate": 0.002, "loss": 2.5605, "step": 313080 }, { "epoch": 0.6237448999107484, "grad_norm": 0.1577390879392624, "learning_rate": 0.002, "loss": 2.5428, "step": 313090 }, { "epoch": 0.6237648221343873, "grad_norm": 0.16546739637851715, "learning_rate": 0.002, "loss": 2.5489, "step": 313100 }, { "epoch": 0.6237847443580262, "grad_norm": 0.16824188828468323, "learning_rate": 0.002, "loss": 2.5504, "step": 313110 }, { "epoch": 0.6238046665816652, "grad_norm": 0.14477379620075226, "learning_rate": 0.002, "loss": 2.5466, "step": 313120 }, { "epoch": 0.6238245888053041, "grad_norm": 0.19202451407909393, "learning_rate": 0.002, "loss": 2.563, "step": 313130 }, { "epoch": 0.623844511028943, "grad_norm": 0.15346398949623108, "learning_rate": 0.002, "loss": 2.552, "step": 313140 }, { "epoch": 0.6238644332525819, "grad_norm": 0.154349222779274, "learning_rate": 0.002, "loss": 2.5492, "step": 313150 }, { "epoch": 0.6238843554762208, "grad_norm": 0.155591681599617, "learning_rate": 0.002, "loss": 2.5582, "step": 313160 }, { "epoch": 0.6239042776998598, "grad_norm": 0.14169561862945557, "learning_rate": 0.002, "loss": 2.5529, "step": 313170 }, { "epoch": 0.6239241999234987, "grad_norm": 0.16718555986881256, "learning_rate": 0.002, "loss": 2.5663, "step": 313180 }, { "epoch": 0.6239441221471376, "grad_norm": 0.15741923451423645, "learning_rate": 0.002, "loss": 2.5466, "step": 313190 }, { "epoch": 0.6239640443707765, "grad_norm": 0.15186594426631927, "learning_rate": 0.002, "loss": 2.5453, "step": 313200 }, { "epoch": 0.6239839665944154, "grad_norm": 0.18262742459774017, "learning_rate": 0.002, "loss": 2.5539, "step": 313210 }, { "epoch": 0.6240038888180544, "grad_norm": 0.1673848181962967, "learning_rate": 0.002, "loss": 2.5639, "step": 313220 }, { "epoch": 0.6240238110416932, "grad_norm": 0.14426618814468384, "learning_rate": 0.002, "loss": 2.5488, "step": 313230 }, { "epoch": 0.6240437332653321, "grad_norm": 0.17384099960327148, "learning_rate": 0.002, "loss": 2.5495, "step": 313240 }, { "epoch": 0.624063655488971, "grad_norm": 0.21076403558254242, "learning_rate": 0.002, "loss": 2.5558, "step": 313250 }, { "epoch": 0.6240835777126099, "grad_norm": 0.1599520593881607, "learning_rate": 0.002, "loss": 2.5671, "step": 313260 }, { "epoch": 0.6241034999362489, "grad_norm": 0.20134617388248444, "learning_rate": 0.002, "loss": 2.5561, "step": 313270 }, { "epoch": 0.6241234221598878, "grad_norm": 0.15853530168533325, "learning_rate": 0.002, "loss": 2.5522, "step": 313280 }, { "epoch": 0.6241433443835267, "grad_norm": 0.15906164050102234, "learning_rate": 0.002, "loss": 2.5463, "step": 313290 }, { "epoch": 0.6241632666071656, "grad_norm": 0.15781180560588837, "learning_rate": 0.002, "loss": 2.5549, "step": 313300 }, { "epoch": 0.6241831888308046, "grad_norm": 0.15481628477573395, "learning_rate": 0.002, "loss": 2.553, "step": 313310 }, { "epoch": 0.6242031110544435, "grad_norm": 0.14469097554683685, "learning_rate": 0.002, "loss": 2.5576, "step": 313320 }, { "epoch": 0.6242230332780824, "grad_norm": 0.19853170216083527, "learning_rate": 0.002, "loss": 2.5682, "step": 313330 }, { "epoch": 0.6242429555017213, "grad_norm": 0.18607905507087708, "learning_rate": 0.002, "loss": 2.546, "step": 313340 }, { "epoch": 0.6242628777253602, "grad_norm": 0.1509890854358673, "learning_rate": 0.002, "loss": 2.5631, "step": 313350 }, { "epoch": 0.6242827999489992, "grad_norm": 0.2264419049024582, "learning_rate": 0.002, "loss": 2.5707, "step": 313360 }, { "epoch": 0.624302722172638, "grad_norm": 0.16610397398471832, "learning_rate": 0.002, "loss": 2.5623, "step": 313370 }, { "epoch": 0.6243226443962769, "grad_norm": 0.16514427959918976, "learning_rate": 0.002, "loss": 2.5636, "step": 313380 }, { "epoch": 0.6243425666199158, "grad_norm": 0.13034449517726898, "learning_rate": 0.002, "loss": 2.5717, "step": 313390 }, { "epoch": 0.6243624888435547, "grad_norm": 0.20152685046195984, "learning_rate": 0.002, "loss": 2.5685, "step": 313400 }, { "epoch": 0.6243824110671937, "grad_norm": 0.16574902832508087, "learning_rate": 0.002, "loss": 2.5615, "step": 313410 }, { "epoch": 0.6244023332908326, "grad_norm": 0.24233298003673553, "learning_rate": 0.002, "loss": 2.5493, "step": 313420 }, { "epoch": 0.6244222555144715, "grad_norm": 0.14994020760059357, "learning_rate": 0.002, "loss": 2.5628, "step": 313430 }, { "epoch": 0.6244421777381104, "grad_norm": 0.15908358991146088, "learning_rate": 0.002, "loss": 2.5636, "step": 313440 }, { "epoch": 0.6244620999617493, "grad_norm": 0.20945963263511658, "learning_rate": 0.002, "loss": 2.5421, "step": 313450 }, { "epoch": 0.6244820221853883, "grad_norm": 0.18068566918373108, "learning_rate": 0.002, "loss": 2.5515, "step": 313460 }, { "epoch": 0.6245019444090272, "grad_norm": 0.1627776026725769, "learning_rate": 0.002, "loss": 2.5635, "step": 313470 }, { "epoch": 0.6245218666326661, "grad_norm": 0.17824433743953705, "learning_rate": 0.002, "loss": 2.5576, "step": 313480 }, { "epoch": 0.624541788856305, "grad_norm": 0.1698598861694336, "learning_rate": 0.002, "loss": 2.5462, "step": 313490 }, { "epoch": 0.6245617110799438, "grad_norm": 0.19563597440719604, "learning_rate": 0.002, "loss": 2.5663, "step": 313500 }, { "epoch": 0.6245816333035829, "grad_norm": 0.17495225369930267, "learning_rate": 0.002, "loss": 2.5627, "step": 313510 }, { "epoch": 0.6246015555272217, "grad_norm": 0.160530686378479, "learning_rate": 0.002, "loss": 2.5498, "step": 313520 }, { "epoch": 0.6246214777508606, "grad_norm": 0.15235456824302673, "learning_rate": 0.002, "loss": 2.5528, "step": 313530 }, { "epoch": 0.6246413999744995, "grad_norm": 0.17073336243629456, "learning_rate": 0.002, "loss": 2.5649, "step": 313540 }, { "epoch": 0.6246613221981384, "grad_norm": 0.1357809454202652, "learning_rate": 0.002, "loss": 2.5458, "step": 313550 }, { "epoch": 0.6246812444217774, "grad_norm": 0.16654996573925018, "learning_rate": 0.002, "loss": 2.5594, "step": 313560 }, { "epoch": 0.6247011666454163, "grad_norm": 0.20189861953258514, "learning_rate": 0.002, "loss": 2.5457, "step": 313570 }, { "epoch": 0.6247210888690552, "grad_norm": 0.16963325440883636, "learning_rate": 0.002, "loss": 2.5356, "step": 313580 }, { "epoch": 0.6247410110926941, "grad_norm": 0.16550324857234955, "learning_rate": 0.002, "loss": 2.5619, "step": 313590 }, { "epoch": 0.6247609333163331, "grad_norm": 0.17796960473060608, "learning_rate": 0.002, "loss": 2.5732, "step": 313600 }, { "epoch": 0.624780855539972, "grad_norm": 0.1548951268196106, "learning_rate": 0.002, "loss": 2.5661, "step": 313610 }, { "epoch": 0.6248007777636109, "grad_norm": 0.1458529382944107, "learning_rate": 0.002, "loss": 2.5701, "step": 313620 }, { "epoch": 0.6248206999872498, "grad_norm": 0.1936025470495224, "learning_rate": 0.002, "loss": 2.5712, "step": 313630 }, { "epoch": 0.6248406222108887, "grad_norm": 0.1775331050157547, "learning_rate": 0.002, "loss": 2.5419, "step": 313640 }, { "epoch": 0.6248605444345277, "grad_norm": 0.17268702387809753, "learning_rate": 0.002, "loss": 2.5461, "step": 313650 }, { "epoch": 0.6248804666581665, "grad_norm": 0.15899233520030975, "learning_rate": 0.002, "loss": 2.5427, "step": 313660 }, { "epoch": 0.6249003888818054, "grad_norm": 0.16708528995513916, "learning_rate": 0.002, "loss": 2.5456, "step": 313670 }, { "epoch": 0.6249203111054443, "grad_norm": 0.16254781186580658, "learning_rate": 0.002, "loss": 2.5553, "step": 313680 }, { "epoch": 0.6249402333290832, "grad_norm": 0.1878020465373993, "learning_rate": 0.002, "loss": 2.5542, "step": 313690 }, { "epoch": 0.6249601555527222, "grad_norm": 0.15802280604839325, "learning_rate": 0.002, "loss": 2.5463, "step": 313700 }, { "epoch": 0.6249800777763611, "grad_norm": 0.1691918522119522, "learning_rate": 0.002, "loss": 2.5663, "step": 313710 }, { "epoch": 0.625, "grad_norm": 0.13482724130153656, "learning_rate": 0.002, "loss": 2.5467, "step": 313720 }, { "epoch": 0.6250199222236389, "grad_norm": 0.22824063897132874, "learning_rate": 0.002, "loss": 2.5626, "step": 313730 }, { "epoch": 0.6250398444472778, "grad_norm": 0.1925584226846695, "learning_rate": 0.002, "loss": 2.5496, "step": 313740 }, { "epoch": 0.6250597666709168, "grad_norm": 0.15033940970897675, "learning_rate": 0.002, "loss": 2.5582, "step": 313750 }, { "epoch": 0.6250796888945557, "grad_norm": 0.15844769775867462, "learning_rate": 0.002, "loss": 2.5678, "step": 313760 }, { "epoch": 0.6250996111181946, "grad_norm": 0.20360836386680603, "learning_rate": 0.002, "loss": 2.5593, "step": 313770 }, { "epoch": 0.6251195333418335, "grad_norm": 0.1932636946439743, "learning_rate": 0.002, "loss": 2.5709, "step": 313780 }, { "epoch": 0.6251394555654723, "grad_norm": 0.15637008845806122, "learning_rate": 0.002, "loss": 2.5678, "step": 313790 }, { "epoch": 0.6251593777891113, "grad_norm": 0.14501674473285675, "learning_rate": 0.002, "loss": 2.5456, "step": 313800 }, { "epoch": 0.6251793000127502, "grad_norm": 0.15136507153511047, "learning_rate": 0.002, "loss": 2.5627, "step": 313810 }, { "epoch": 0.6251992222363891, "grad_norm": 0.16298694908618927, "learning_rate": 0.002, "loss": 2.561, "step": 313820 }, { "epoch": 0.625219144460028, "grad_norm": 0.20722268521785736, "learning_rate": 0.002, "loss": 2.5655, "step": 313830 }, { "epoch": 0.6252390666836669, "grad_norm": 0.17166005074977875, "learning_rate": 0.002, "loss": 2.5585, "step": 313840 }, { "epoch": 0.6252589889073059, "grad_norm": 0.16462968289852142, "learning_rate": 0.002, "loss": 2.5455, "step": 313850 }, { "epoch": 0.6252789111309448, "grad_norm": 0.15529890358448029, "learning_rate": 0.002, "loss": 2.5586, "step": 313860 }, { "epoch": 0.6252988333545837, "grad_norm": 0.16049638390541077, "learning_rate": 0.002, "loss": 2.5744, "step": 313870 }, { "epoch": 0.6253187555782226, "grad_norm": 0.14558328688144684, "learning_rate": 0.002, "loss": 2.5536, "step": 313880 }, { "epoch": 0.6253386778018616, "grad_norm": 0.17441195249557495, "learning_rate": 0.002, "loss": 2.5594, "step": 313890 }, { "epoch": 0.6253586000255005, "grad_norm": 0.15510132908821106, "learning_rate": 0.002, "loss": 2.5451, "step": 313900 }, { "epoch": 0.6253785222491394, "grad_norm": 0.1841995269060135, "learning_rate": 0.002, "loss": 2.5648, "step": 313910 }, { "epoch": 0.6253984444727783, "grad_norm": 0.17437003552913666, "learning_rate": 0.002, "loss": 2.5412, "step": 313920 }, { "epoch": 0.6254183666964171, "grad_norm": 0.1411372721195221, "learning_rate": 0.002, "loss": 2.5476, "step": 313930 }, { "epoch": 0.6254382889200562, "grad_norm": 0.18681398034095764, "learning_rate": 0.002, "loss": 2.5445, "step": 313940 }, { "epoch": 0.625458211143695, "grad_norm": 0.16219355165958405, "learning_rate": 0.002, "loss": 2.536, "step": 313950 }, { "epoch": 0.6254781333673339, "grad_norm": 0.1825135499238968, "learning_rate": 0.002, "loss": 2.5463, "step": 313960 }, { "epoch": 0.6254980555909728, "grad_norm": 0.16048607230186462, "learning_rate": 0.002, "loss": 2.5509, "step": 313970 }, { "epoch": 0.6255179778146117, "grad_norm": 0.15755516290664673, "learning_rate": 0.002, "loss": 2.5559, "step": 313980 }, { "epoch": 0.6255379000382507, "grad_norm": 0.17986902594566345, "learning_rate": 0.002, "loss": 2.5567, "step": 313990 }, { "epoch": 0.6255578222618896, "grad_norm": 0.1488940715789795, "learning_rate": 0.002, "loss": 2.5605, "step": 314000 }, { "epoch": 0.6255777444855285, "grad_norm": 0.16581659018993378, "learning_rate": 0.002, "loss": 2.5615, "step": 314010 }, { "epoch": 0.6255976667091674, "grad_norm": 0.1800186187028885, "learning_rate": 0.002, "loss": 2.5436, "step": 314020 }, { "epoch": 0.6256175889328063, "grad_norm": 0.1863657534122467, "learning_rate": 0.002, "loss": 2.5519, "step": 314030 }, { "epoch": 0.6256375111564453, "grad_norm": 0.1468742936849594, "learning_rate": 0.002, "loss": 2.5475, "step": 314040 }, { "epoch": 0.6256574333800842, "grad_norm": 0.1704210489988327, "learning_rate": 0.002, "loss": 2.5566, "step": 314050 }, { "epoch": 0.6256773556037231, "grad_norm": 0.1751822978258133, "learning_rate": 0.002, "loss": 2.549, "step": 314060 }, { "epoch": 0.625697277827362, "grad_norm": 0.17188240587711334, "learning_rate": 0.002, "loss": 2.5419, "step": 314070 }, { "epoch": 0.6257172000510008, "grad_norm": 0.16596108675003052, "learning_rate": 0.002, "loss": 2.58, "step": 314080 }, { "epoch": 0.6257371222746398, "grad_norm": 0.18867680430412292, "learning_rate": 0.002, "loss": 2.5547, "step": 314090 }, { "epoch": 0.6257570444982787, "grad_norm": 0.1785440295934677, "learning_rate": 0.002, "loss": 2.559, "step": 314100 }, { "epoch": 0.6257769667219176, "grad_norm": 0.17494900524616241, "learning_rate": 0.002, "loss": 2.5517, "step": 314110 }, { "epoch": 0.6257968889455565, "grad_norm": 0.17667922377586365, "learning_rate": 0.002, "loss": 2.5523, "step": 314120 }, { "epoch": 0.6258168111691954, "grad_norm": 0.17144078016281128, "learning_rate": 0.002, "loss": 2.5576, "step": 314130 }, { "epoch": 0.6258367333928344, "grad_norm": 0.18014800548553467, "learning_rate": 0.002, "loss": 2.5445, "step": 314140 }, { "epoch": 0.6258566556164733, "grad_norm": 0.17722269892692566, "learning_rate": 0.002, "loss": 2.5415, "step": 314150 }, { "epoch": 0.6258765778401122, "grad_norm": 0.1492239385843277, "learning_rate": 0.002, "loss": 2.5474, "step": 314160 }, { "epoch": 0.6258965000637511, "grad_norm": 0.1669546216726303, "learning_rate": 0.002, "loss": 2.5535, "step": 314170 }, { "epoch": 0.6259164222873901, "grad_norm": 0.15344470739364624, "learning_rate": 0.002, "loss": 2.5671, "step": 314180 }, { "epoch": 0.625936344511029, "grad_norm": 0.15141376852989197, "learning_rate": 0.002, "loss": 2.5427, "step": 314190 }, { "epoch": 0.6259562667346679, "grad_norm": 0.16357454657554626, "learning_rate": 0.002, "loss": 2.5585, "step": 314200 }, { "epoch": 0.6259761889583068, "grad_norm": 0.17706440389156342, "learning_rate": 0.002, "loss": 2.5531, "step": 314210 }, { "epoch": 0.6259961111819456, "grad_norm": 0.15909218788146973, "learning_rate": 0.002, "loss": 2.5518, "step": 314220 }, { "epoch": 0.6260160334055846, "grad_norm": 0.18267220258712769, "learning_rate": 0.002, "loss": 2.5594, "step": 314230 }, { "epoch": 0.6260359556292235, "grad_norm": 0.17879872024059296, "learning_rate": 0.002, "loss": 2.5437, "step": 314240 }, { "epoch": 0.6260558778528624, "grad_norm": 0.17359979450702667, "learning_rate": 0.002, "loss": 2.5686, "step": 314250 }, { "epoch": 0.6260758000765013, "grad_norm": 0.19623695313930511, "learning_rate": 0.002, "loss": 2.5602, "step": 314260 }, { "epoch": 0.6260957223001402, "grad_norm": 0.18996207416057587, "learning_rate": 0.002, "loss": 2.5578, "step": 314270 }, { "epoch": 0.6261156445237792, "grad_norm": 0.1510525494813919, "learning_rate": 0.002, "loss": 2.5503, "step": 314280 }, { "epoch": 0.6261355667474181, "grad_norm": 0.14508435130119324, "learning_rate": 0.002, "loss": 2.553, "step": 314290 }, { "epoch": 0.626155488971057, "grad_norm": 0.15762607753276825, "learning_rate": 0.002, "loss": 2.5423, "step": 314300 }, { "epoch": 0.6261754111946959, "grad_norm": 0.1625468134880066, "learning_rate": 0.002, "loss": 2.5622, "step": 314310 }, { "epoch": 0.6261953334183348, "grad_norm": 0.15723682940006256, "learning_rate": 0.002, "loss": 2.5658, "step": 314320 }, { "epoch": 0.6262152556419738, "grad_norm": 0.15555639564990997, "learning_rate": 0.002, "loss": 2.5435, "step": 314330 }, { "epoch": 0.6262351778656127, "grad_norm": 0.16186603903770447, "learning_rate": 0.002, "loss": 2.5564, "step": 314340 }, { "epoch": 0.6262551000892516, "grad_norm": 0.17626355588436127, "learning_rate": 0.002, "loss": 2.557, "step": 314350 }, { "epoch": 0.6262750223128905, "grad_norm": 0.16219180822372437, "learning_rate": 0.002, "loss": 2.5508, "step": 314360 }, { "epoch": 0.6262949445365293, "grad_norm": 0.16035452485084534, "learning_rate": 0.002, "loss": 2.5513, "step": 314370 }, { "epoch": 0.6263148667601683, "grad_norm": 0.17209191620349884, "learning_rate": 0.002, "loss": 2.5685, "step": 314380 }, { "epoch": 0.6263347889838072, "grad_norm": 0.15903623402118683, "learning_rate": 0.002, "loss": 2.5447, "step": 314390 }, { "epoch": 0.6263547112074461, "grad_norm": 0.19775938987731934, "learning_rate": 0.002, "loss": 2.5549, "step": 314400 }, { "epoch": 0.626374633431085, "grad_norm": 0.1835029125213623, "learning_rate": 0.002, "loss": 2.5493, "step": 314410 }, { "epoch": 0.6263945556547239, "grad_norm": 0.16553975641727448, "learning_rate": 0.002, "loss": 2.5537, "step": 314420 }, { "epoch": 0.6264144778783629, "grad_norm": 0.13165433704853058, "learning_rate": 0.002, "loss": 2.554, "step": 314430 }, { "epoch": 0.6264344001020018, "grad_norm": 0.1692924201488495, "learning_rate": 0.002, "loss": 2.546, "step": 314440 }, { "epoch": 0.6264543223256407, "grad_norm": 0.15950089693069458, "learning_rate": 0.002, "loss": 2.5559, "step": 314450 }, { "epoch": 0.6264742445492796, "grad_norm": 0.16986919939517975, "learning_rate": 0.002, "loss": 2.564, "step": 314460 }, { "epoch": 0.6264941667729186, "grad_norm": 0.17012052237987518, "learning_rate": 0.002, "loss": 2.5546, "step": 314470 }, { "epoch": 0.6265140889965575, "grad_norm": 0.17732617259025574, "learning_rate": 0.002, "loss": 2.5641, "step": 314480 }, { "epoch": 0.6265340112201964, "grad_norm": 0.14819172024726868, "learning_rate": 0.002, "loss": 2.5495, "step": 314490 }, { "epoch": 0.6265539334438353, "grad_norm": 0.18479023873806, "learning_rate": 0.002, "loss": 2.5572, "step": 314500 }, { "epoch": 0.6265738556674741, "grad_norm": 0.16412213444709778, "learning_rate": 0.002, "loss": 2.5503, "step": 314510 }, { "epoch": 0.6265937778911131, "grad_norm": 0.17728163301944733, "learning_rate": 0.002, "loss": 2.5498, "step": 314520 }, { "epoch": 0.626613700114752, "grad_norm": 0.14798983931541443, "learning_rate": 0.002, "loss": 2.5616, "step": 314530 }, { "epoch": 0.6266336223383909, "grad_norm": 0.20783865451812744, "learning_rate": 0.002, "loss": 2.548, "step": 314540 }, { "epoch": 0.6266535445620298, "grad_norm": 0.1661413162946701, "learning_rate": 0.002, "loss": 2.5495, "step": 314550 }, { "epoch": 0.6266734667856687, "grad_norm": 0.18980973958969116, "learning_rate": 0.002, "loss": 2.5547, "step": 314560 }, { "epoch": 0.6266933890093077, "grad_norm": 0.17687296867370605, "learning_rate": 0.002, "loss": 2.5564, "step": 314570 }, { "epoch": 0.6267133112329466, "grad_norm": 0.1370435357093811, "learning_rate": 0.002, "loss": 2.5655, "step": 314580 }, { "epoch": 0.6267332334565855, "grad_norm": 0.18842200934886932, "learning_rate": 0.002, "loss": 2.5585, "step": 314590 }, { "epoch": 0.6267531556802244, "grad_norm": 0.1672748178243637, "learning_rate": 0.002, "loss": 2.5348, "step": 314600 }, { "epoch": 0.6267730779038633, "grad_norm": 0.14388497173786163, "learning_rate": 0.002, "loss": 2.5558, "step": 314610 }, { "epoch": 0.6267930001275023, "grad_norm": 0.15627190470695496, "learning_rate": 0.002, "loss": 2.5522, "step": 314620 }, { "epoch": 0.6268129223511412, "grad_norm": 0.1560620367527008, "learning_rate": 0.002, "loss": 2.5593, "step": 314630 }, { "epoch": 0.6268328445747801, "grad_norm": 0.1807345449924469, "learning_rate": 0.002, "loss": 2.5572, "step": 314640 }, { "epoch": 0.626852766798419, "grad_norm": 0.17785173654556274, "learning_rate": 0.002, "loss": 2.5497, "step": 314650 }, { "epoch": 0.6268726890220578, "grad_norm": 0.16930748522281647, "learning_rate": 0.002, "loss": 2.5594, "step": 314660 }, { "epoch": 0.6268926112456968, "grad_norm": 0.18334165215492249, "learning_rate": 0.002, "loss": 2.5543, "step": 314670 }, { "epoch": 0.6269125334693357, "grad_norm": 0.16994768381118774, "learning_rate": 0.002, "loss": 2.5493, "step": 314680 }, { "epoch": 0.6269324556929746, "grad_norm": 0.20968417823314667, "learning_rate": 0.002, "loss": 2.5645, "step": 314690 }, { "epoch": 0.6269523779166135, "grad_norm": 0.16536134481430054, "learning_rate": 0.002, "loss": 2.5612, "step": 314700 }, { "epoch": 0.6269723001402524, "grad_norm": 0.14799323678016663, "learning_rate": 0.002, "loss": 2.551, "step": 314710 }, { "epoch": 0.6269922223638914, "grad_norm": 0.17352145910263062, "learning_rate": 0.002, "loss": 2.5709, "step": 314720 }, { "epoch": 0.6270121445875303, "grad_norm": 0.15579187870025635, "learning_rate": 0.002, "loss": 2.5512, "step": 314730 }, { "epoch": 0.6270320668111692, "grad_norm": 0.17448976635932922, "learning_rate": 0.002, "loss": 2.5479, "step": 314740 }, { "epoch": 0.6270519890348081, "grad_norm": 0.19683456420898438, "learning_rate": 0.002, "loss": 2.5628, "step": 314750 }, { "epoch": 0.627071911258447, "grad_norm": 0.14803428947925568, "learning_rate": 0.002, "loss": 2.5582, "step": 314760 }, { "epoch": 0.627091833482086, "grad_norm": 0.14212295413017273, "learning_rate": 0.002, "loss": 2.5573, "step": 314770 }, { "epoch": 0.6271117557057249, "grad_norm": 0.15871433913707733, "learning_rate": 0.002, "loss": 2.5732, "step": 314780 }, { "epoch": 0.6271316779293638, "grad_norm": 0.18107961118221283, "learning_rate": 0.002, "loss": 2.5604, "step": 314790 }, { "epoch": 0.6271516001530026, "grad_norm": 0.15615718066692352, "learning_rate": 0.002, "loss": 2.5485, "step": 314800 }, { "epoch": 0.6271715223766416, "grad_norm": 0.16828300058841705, "learning_rate": 0.002, "loss": 2.5581, "step": 314810 }, { "epoch": 0.6271914446002805, "grad_norm": 0.16128359735012054, "learning_rate": 0.002, "loss": 2.5654, "step": 314820 }, { "epoch": 0.6272113668239194, "grad_norm": 0.17761670053005219, "learning_rate": 0.002, "loss": 2.5549, "step": 314830 }, { "epoch": 0.6272312890475583, "grad_norm": 0.15913265943527222, "learning_rate": 0.002, "loss": 2.5642, "step": 314840 }, { "epoch": 0.6272512112711972, "grad_norm": 0.1609826385974884, "learning_rate": 0.002, "loss": 2.5653, "step": 314850 }, { "epoch": 0.6272711334948362, "grad_norm": 0.14621037244796753, "learning_rate": 0.002, "loss": 2.5571, "step": 314860 }, { "epoch": 0.6272910557184751, "grad_norm": 0.15707778930664062, "learning_rate": 0.002, "loss": 2.5462, "step": 314870 }, { "epoch": 0.627310977942114, "grad_norm": 0.14827010035514832, "learning_rate": 0.002, "loss": 2.548, "step": 314880 }, { "epoch": 0.6273309001657529, "grad_norm": 0.16728781163692474, "learning_rate": 0.002, "loss": 2.5564, "step": 314890 }, { "epoch": 0.6273508223893918, "grad_norm": 0.20563195645809174, "learning_rate": 0.002, "loss": 2.5558, "step": 314900 }, { "epoch": 0.6273707446130308, "grad_norm": 0.16315196454524994, "learning_rate": 0.002, "loss": 2.5538, "step": 314910 }, { "epoch": 0.6273906668366697, "grad_norm": 0.1393636167049408, "learning_rate": 0.002, "loss": 2.5683, "step": 314920 }, { "epoch": 0.6274105890603086, "grad_norm": 0.16436339914798737, "learning_rate": 0.002, "loss": 2.5477, "step": 314930 }, { "epoch": 0.6274305112839474, "grad_norm": 0.17362044751644135, "learning_rate": 0.002, "loss": 2.563, "step": 314940 }, { "epoch": 0.6274504335075863, "grad_norm": 0.14842450618743896, "learning_rate": 0.002, "loss": 2.5534, "step": 314950 }, { "epoch": 0.6274703557312253, "grad_norm": 0.1692923903465271, "learning_rate": 0.002, "loss": 2.5642, "step": 314960 }, { "epoch": 0.6274902779548642, "grad_norm": 0.1484357714653015, "learning_rate": 0.002, "loss": 2.5551, "step": 314970 }, { "epoch": 0.6275102001785031, "grad_norm": 0.15498648583889008, "learning_rate": 0.002, "loss": 2.5475, "step": 314980 }, { "epoch": 0.627530122402142, "grad_norm": 0.15418070554733276, "learning_rate": 0.002, "loss": 2.5473, "step": 314990 }, { "epoch": 0.6275500446257809, "grad_norm": 0.1825043112039566, "learning_rate": 0.002, "loss": 2.5472, "step": 315000 }, { "epoch": 0.6275699668494199, "grad_norm": 0.16608411073684692, "learning_rate": 0.002, "loss": 2.5623, "step": 315010 }, { "epoch": 0.6275898890730588, "grad_norm": 0.1591726839542389, "learning_rate": 0.002, "loss": 2.5492, "step": 315020 }, { "epoch": 0.6276098112966977, "grad_norm": 0.21193811297416687, "learning_rate": 0.002, "loss": 2.5669, "step": 315030 }, { "epoch": 0.6276297335203366, "grad_norm": 0.1833723783493042, "learning_rate": 0.002, "loss": 2.5602, "step": 315040 }, { "epoch": 0.6276496557439755, "grad_norm": 0.19389848411083221, "learning_rate": 0.002, "loss": 2.5561, "step": 315050 }, { "epoch": 0.6276695779676145, "grad_norm": 0.194541797041893, "learning_rate": 0.002, "loss": 2.5643, "step": 315060 }, { "epoch": 0.6276895001912534, "grad_norm": 0.213900625705719, "learning_rate": 0.002, "loss": 2.5506, "step": 315070 }, { "epoch": 0.6277094224148922, "grad_norm": 0.1551811546087265, "learning_rate": 0.002, "loss": 2.5521, "step": 315080 }, { "epoch": 0.6277293446385311, "grad_norm": 0.15023040771484375, "learning_rate": 0.002, "loss": 2.5675, "step": 315090 }, { "epoch": 0.6277492668621701, "grad_norm": 0.1596493422985077, "learning_rate": 0.002, "loss": 2.5432, "step": 315100 }, { "epoch": 0.627769189085809, "grad_norm": 0.15480107069015503, "learning_rate": 0.002, "loss": 2.5554, "step": 315110 }, { "epoch": 0.6277891113094479, "grad_norm": 0.18909715116024017, "learning_rate": 0.002, "loss": 2.5637, "step": 315120 }, { "epoch": 0.6278090335330868, "grad_norm": 0.1634392887353897, "learning_rate": 0.002, "loss": 2.5612, "step": 315130 }, { "epoch": 0.6278289557567257, "grad_norm": 0.14897987246513367, "learning_rate": 0.002, "loss": 2.5544, "step": 315140 }, { "epoch": 0.6278488779803647, "grad_norm": 0.16525965929031372, "learning_rate": 0.002, "loss": 2.5503, "step": 315150 }, { "epoch": 0.6278688002040036, "grad_norm": 0.1829017847776413, "learning_rate": 0.002, "loss": 2.5498, "step": 315160 }, { "epoch": 0.6278887224276425, "grad_norm": 0.21347874402999878, "learning_rate": 0.002, "loss": 2.5504, "step": 315170 }, { "epoch": 0.6279086446512814, "grad_norm": 0.17141622304916382, "learning_rate": 0.002, "loss": 2.5638, "step": 315180 }, { "epoch": 0.6279285668749203, "grad_norm": 0.18733157217502594, "learning_rate": 0.002, "loss": 2.5687, "step": 315190 }, { "epoch": 0.6279484890985593, "grad_norm": 0.15991128981113434, "learning_rate": 0.002, "loss": 2.5673, "step": 315200 }, { "epoch": 0.6279684113221982, "grad_norm": 0.14979706704616547, "learning_rate": 0.002, "loss": 2.5623, "step": 315210 }, { "epoch": 0.627988333545837, "grad_norm": 0.20502175390720367, "learning_rate": 0.002, "loss": 2.5629, "step": 315220 }, { "epoch": 0.6280082557694759, "grad_norm": 0.175113245844841, "learning_rate": 0.002, "loss": 2.5644, "step": 315230 }, { "epoch": 0.6280281779931148, "grad_norm": 0.17661802470684052, "learning_rate": 0.002, "loss": 2.5474, "step": 315240 }, { "epoch": 0.6280481002167538, "grad_norm": 0.163275346159935, "learning_rate": 0.002, "loss": 2.5552, "step": 315250 }, { "epoch": 0.6280680224403927, "grad_norm": 0.1426352560520172, "learning_rate": 0.002, "loss": 2.558, "step": 315260 }, { "epoch": 0.6280879446640316, "grad_norm": 0.18051297962665558, "learning_rate": 0.002, "loss": 2.5562, "step": 315270 }, { "epoch": 0.6281078668876705, "grad_norm": 0.1611509472131729, "learning_rate": 0.002, "loss": 2.5565, "step": 315280 }, { "epoch": 0.6281277891113094, "grad_norm": 0.1791301816701889, "learning_rate": 0.002, "loss": 2.5588, "step": 315290 }, { "epoch": 0.6281477113349484, "grad_norm": 0.1706084907054901, "learning_rate": 0.002, "loss": 2.5477, "step": 315300 }, { "epoch": 0.6281676335585873, "grad_norm": 0.140258327126503, "learning_rate": 0.002, "loss": 2.5547, "step": 315310 }, { "epoch": 0.6281875557822262, "grad_norm": 0.1917395293712616, "learning_rate": 0.002, "loss": 2.5647, "step": 315320 }, { "epoch": 0.6282074780058651, "grad_norm": 0.17892485857009888, "learning_rate": 0.002, "loss": 2.5657, "step": 315330 }, { "epoch": 0.628227400229504, "grad_norm": 0.15386457741260529, "learning_rate": 0.002, "loss": 2.5477, "step": 315340 }, { "epoch": 0.628247322453143, "grad_norm": 0.1322798877954483, "learning_rate": 0.002, "loss": 2.5455, "step": 315350 }, { "epoch": 0.6282672446767819, "grad_norm": 0.2235407680273056, "learning_rate": 0.002, "loss": 2.5649, "step": 315360 }, { "epoch": 0.6282871669004207, "grad_norm": 0.17448504269123077, "learning_rate": 0.002, "loss": 2.5622, "step": 315370 }, { "epoch": 0.6283070891240596, "grad_norm": 0.16313375532627106, "learning_rate": 0.002, "loss": 2.5479, "step": 315380 }, { "epoch": 0.6283270113476986, "grad_norm": 0.15628087520599365, "learning_rate": 0.002, "loss": 2.5709, "step": 315390 }, { "epoch": 0.6283469335713375, "grad_norm": 0.14202821254730225, "learning_rate": 0.002, "loss": 2.5547, "step": 315400 }, { "epoch": 0.6283668557949764, "grad_norm": 0.1743556708097458, "learning_rate": 0.002, "loss": 2.5499, "step": 315410 }, { "epoch": 0.6283867780186153, "grad_norm": 0.15548394620418549, "learning_rate": 0.002, "loss": 2.5679, "step": 315420 }, { "epoch": 0.6284067002422542, "grad_norm": 0.13468852639198303, "learning_rate": 0.002, "loss": 2.5598, "step": 315430 }, { "epoch": 0.6284266224658932, "grad_norm": 0.1748518943786621, "learning_rate": 0.002, "loss": 2.5664, "step": 315440 }, { "epoch": 0.6284465446895321, "grad_norm": 0.14988446235656738, "learning_rate": 0.002, "loss": 2.5513, "step": 315450 }, { "epoch": 0.628466466913171, "grad_norm": 0.14896275103092194, "learning_rate": 0.002, "loss": 2.5466, "step": 315460 }, { "epoch": 0.6284863891368099, "grad_norm": 0.1772766262292862, "learning_rate": 0.002, "loss": 2.5596, "step": 315470 }, { "epoch": 0.6285063113604488, "grad_norm": 0.1641867309808731, "learning_rate": 0.002, "loss": 2.5544, "step": 315480 }, { "epoch": 0.6285262335840878, "grad_norm": 0.20475929975509644, "learning_rate": 0.002, "loss": 2.5622, "step": 315490 }, { "epoch": 0.6285461558077267, "grad_norm": 0.14258195459842682, "learning_rate": 0.002, "loss": 2.5554, "step": 315500 }, { "epoch": 0.6285660780313655, "grad_norm": 0.16007007658481598, "learning_rate": 0.002, "loss": 2.5704, "step": 315510 }, { "epoch": 0.6285860002550044, "grad_norm": 0.14757952094078064, "learning_rate": 0.002, "loss": 2.5479, "step": 315520 }, { "epoch": 0.6286059224786433, "grad_norm": 0.16907696425914764, "learning_rate": 0.002, "loss": 2.5557, "step": 315530 }, { "epoch": 0.6286258447022823, "grad_norm": 0.1684308648109436, "learning_rate": 0.002, "loss": 2.5691, "step": 315540 }, { "epoch": 0.6286457669259212, "grad_norm": 0.15264487266540527, "learning_rate": 0.002, "loss": 2.5444, "step": 315550 }, { "epoch": 0.6286656891495601, "grad_norm": 0.14229078590869904, "learning_rate": 0.002, "loss": 2.5598, "step": 315560 }, { "epoch": 0.628685611373199, "grad_norm": 0.1975083351135254, "learning_rate": 0.002, "loss": 2.5565, "step": 315570 }, { "epoch": 0.6287055335968379, "grad_norm": 0.1543530374765396, "learning_rate": 0.002, "loss": 2.556, "step": 315580 }, { "epoch": 0.6287254558204769, "grad_norm": 0.1912885308265686, "learning_rate": 0.002, "loss": 2.5617, "step": 315590 }, { "epoch": 0.6287453780441158, "grad_norm": 0.14402037858963013, "learning_rate": 0.002, "loss": 2.5643, "step": 315600 }, { "epoch": 0.6287653002677547, "grad_norm": 0.1725175976753235, "learning_rate": 0.002, "loss": 2.5536, "step": 315610 }, { "epoch": 0.6287852224913936, "grad_norm": 0.19114196300506592, "learning_rate": 0.002, "loss": 2.5537, "step": 315620 }, { "epoch": 0.6288051447150325, "grad_norm": 0.14370831847190857, "learning_rate": 0.002, "loss": 2.5605, "step": 315630 }, { "epoch": 0.6288250669386715, "grad_norm": 0.2406919300556183, "learning_rate": 0.002, "loss": 2.5354, "step": 315640 }, { "epoch": 0.6288449891623104, "grad_norm": 0.15439197421073914, "learning_rate": 0.002, "loss": 2.5463, "step": 315650 }, { "epoch": 0.6288649113859492, "grad_norm": 0.16980063915252686, "learning_rate": 0.002, "loss": 2.5578, "step": 315660 }, { "epoch": 0.6288848336095881, "grad_norm": 0.21591541171073914, "learning_rate": 0.002, "loss": 2.5656, "step": 315670 }, { "epoch": 0.6289047558332271, "grad_norm": 0.15534915030002594, "learning_rate": 0.002, "loss": 2.5656, "step": 315680 }, { "epoch": 0.628924678056866, "grad_norm": 0.16870738565921783, "learning_rate": 0.002, "loss": 2.5631, "step": 315690 }, { "epoch": 0.6289446002805049, "grad_norm": 0.1625189632177353, "learning_rate": 0.002, "loss": 2.5409, "step": 315700 }, { "epoch": 0.6289645225041438, "grad_norm": 0.14322416484355927, "learning_rate": 0.002, "loss": 2.5637, "step": 315710 }, { "epoch": 0.6289844447277827, "grad_norm": 0.1363697648048401, "learning_rate": 0.002, "loss": 2.5444, "step": 315720 }, { "epoch": 0.6290043669514217, "grad_norm": 0.17810820043087006, "learning_rate": 0.002, "loss": 2.5653, "step": 315730 }, { "epoch": 0.6290242891750606, "grad_norm": 0.16234663128852844, "learning_rate": 0.002, "loss": 2.562, "step": 315740 }, { "epoch": 0.6290442113986995, "grad_norm": 0.15133726596832275, "learning_rate": 0.002, "loss": 2.5379, "step": 315750 }, { "epoch": 0.6290641336223384, "grad_norm": 0.15237368643283844, "learning_rate": 0.002, "loss": 2.5516, "step": 315760 }, { "epoch": 0.6290840558459773, "grad_norm": 0.16091229021549225, "learning_rate": 0.002, "loss": 2.5715, "step": 315770 }, { "epoch": 0.6291039780696163, "grad_norm": 0.16581495106220245, "learning_rate": 0.002, "loss": 2.5695, "step": 315780 }, { "epoch": 0.6291239002932552, "grad_norm": 0.15711139142513275, "learning_rate": 0.002, "loss": 2.5677, "step": 315790 }, { "epoch": 0.629143822516894, "grad_norm": 0.18646399676799774, "learning_rate": 0.002, "loss": 2.5548, "step": 315800 }, { "epoch": 0.6291637447405329, "grad_norm": 0.176579087972641, "learning_rate": 0.002, "loss": 2.5541, "step": 315810 }, { "epoch": 0.6291836669641718, "grad_norm": 0.18250367045402527, "learning_rate": 0.002, "loss": 2.5568, "step": 315820 }, { "epoch": 0.6292035891878108, "grad_norm": 0.15171034634113312, "learning_rate": 0.002, "loss": 2.5447, "step": 315830 }, { "epoch": 0.6292235114114497, "grad_norm": 0.20416948199272156, "learning_rate": 0.002, "loss": 2.5701, "step": 315840 }, { "epoch": 0.6292434336350886, "grad_norm": 0.19820986688137054, "learning_rate": 0.002, "loss": 2.5751, "step": 315850 }, { "epoch": 0.6292633558587275, "grad_norm": 0.15972900390625, "learning_rate": 0.002, "loss": 2.5606, "step": 315860 }, { "epoch": 0.6292832780823664, "grad_norm": 0.13808587193489075, "learning_rate": 0.002, "loss": 2.555, "step": 315870 }, { "epoch": 0.6293032003060054, "grad_norm": 0.1417246311903, "learning_rate": 0.002, "loss": 2.5446, "step": 315880 }, { "epoch": 0.6293231225296443, "grad_norm": 0.21399502456188202, "learning_rate": 0.002, "loss": 2.5573, "step": 315890 }, { "epoch": 0.6293430447532832, "grad_norm": 0.1739853471517563, "learning_rate": 0.002, "loss": 2.5521, "step": 315900 }, { "epoch": 0.6293629669769221, "grad_norm": 0.13135167956352234, "learning_rate": 0.002, "loss": 2.5496, "step": 315910 }, { "epoch": 0.629382889200561, "grad_norm": 0.1481839120388031, "learning_rate": 0.002, "loss": 2.5532, "step": 315920 }, { "epoch": 0.6294028114242, "grad_norm": 0.16397953033447266, "learning_rate": 0.002, "loss": 2.5607, "step": 315930 }, { "epoch": 0.6294227336478389, "grad_norm": 0.16274262964725494, "learning_rate": 0.002, "loss": 2.543, "step": 315940 }, { "epoch": 0.6294426558714777, "grad_norm": 0.17309682071208954, "learning_rate": 0.002, "loss": 2.5593, "step": 315950 }, { "epoch": 0.6294625780951166, "grad_norm": 0.16339851915836334, "learning_rate": 0.002, "loss": 2.5455, "step": 315960 }, { "epoch": 0.6294825003187556, "grad_norm": 0.18922173976898193, "learning_rate": 0.002, "loss": 2.5519, "step": 315970 }, { "epoch": 0.6295024225423945, "grad_norm": 0.16862349212169647, "learning_rate": 0.002, "loss": 2.5587, "step": 315980 }, { "epoch": 0.6295223447660334, "grad_norm": 0.1540445238351822, "learning_rate": 0.002, "loss": 2.5506, "step": 315990 }, { "epoch": 0.6295422669896723, "grad_norm": 0.16948002576828003, "learning_rate": 0.002, "loss": 2.5599, "step": 316000 }, { "epoch": 0.6295621892133112, "grad_norm": 0.2562895715236664, "learning_rate": 0.002, "loss": 2.5526, "step": 316010 }, { "epoch": 0.6295821114369502, "grad_norm": 0.1860453188419342, "learning_rate": 0.002, "loss": 2.55, "step": 316020 }, { "epoch": 0.6296020336605891, "grad_norm": 0.14823897182941437, "learning_rate": 0.002, "loss": 2.5683, "step": 316030 }, { "epoch": 0.629621955884228, "grad_norm": 0.13304929435253143, "learning_rate": 0.002, "loss": 2.5613, "step": 316040 }, { "epoch": 0.6296418781078669, "grad_norm": 0.17282173037528992, "learning_rate": 0.002, "loss": 2.5605, "step": 316050 }, { "epoch": 0.6296618003315058, "grad_norm": 0.1595725268125534, "learning_rate": 0.002, "loss": 2.5471, "step": 316060 }, { "epoch": 0.6296817225551448, "grad_norm": 0.14589807391166687, "learning_rate": 0.002, "loss": 2.5557, "step": 316070 }, { "epoch": 0.6297016447787837, "grad_norm": 0.17389489710330963, "learning_rate": 0.002, "loss": 2.5546, "step": 316080 }, { "epoch": 0.6297215670024225, "grad_norm": 0.14613282680511475, "learning_rate": 0.002, "loss": 2.5374, "step": 316090 }, { "epoch": 0.6297414892260614, "grad_norm": 0.14410190284252167, "learning_rate": 0.002, "loss": 2.5445, "step": 316100 }, { "epoch": 0.6297614114497003, "grad_norm": 0.18406632542610168, "learning_rate": 0.002, "loss": 2.5563, "step": 316110 }, { "epoch": 0.6297813336733393, "grad_norm": 0.1505916565656662, "learning_rate": 0.002, "loss": 2.5651, "step": 316120 }, { "epoch": 0.6298012558969782, "grad_norm": 0.13476501405239105, "learning_rate": 0.002, "loss": 2.5413, "step": 316130 }, { "epoch": 0.6298211781206171, "grad_norm": 0.16288399696350098, "learning_rate": 0.002, "loss": 2.5569, "step": 316140 }, { "epoch": 0.629841100344256, "grad_norm": 0.16947601735591888, "learning_rate": 0.002, "loss": 2.5399, "step": 316150 }, { "epoch": 0.6298610225678949, "grad_norm": 0.15951025485992432, "learning_rate": 0.002, "loss": 2.5479, "step": 316160 }, { "epoch": 0.6298809447915339, "grad_norm": 0.1574380099773407, "learning_rate": 0.002, "loss": 2.5557, "step": 316170 }, { "epoch": 0.6299008670151728, "grad_norm": 0.22082199156284332, "learning_rate": 0.002, "loss": 2.5642, "step": 316180 }, { "epoch": 0.6299207892388117, "grad_norm": 0.1612519472837448, "learning_rate": 0.002, "loss": 2.5467, "step": 316190 }, { "epoch": 0.6299407114624506, "grad_norm": 0.1603684425354004, "learning_rate": 0.002, "loss": 2.5794, "step": 316200 }, { "epoch": 0.6299606336860895, "grad_norm": 0.19602356851100922, "learning_rate": 0.002, "loss": 2.5586, "step": 316210 }, { "epoch": 0.6299805559097285, "grad_norm": 0.1417107731103897, "learning_rate": 0.002, "loss": 2.5393, "step": 316220 }, { "epoch": 0.6300004781333673, "grad_norm": 0.19130417704582214, "learning_rate": 0.002, "loss": 2.5606, "step": 316230 }, { "epoch": 0.6300204003570062, "grad_norm": 0.1672886162996292, "learning_rate": 0.002, "loss": 2.5505, "step": 316240 }, { "epoch": 0.6300403225806451, "grad_norm": 0.18271245062351227, "learning_rate": 0.002, "loss": 2.5636, "step": 316250 }, { "epoch": 0.630060244804284, "grad_norm": 0.15085643529891968, "learning_rate": 0.002, "loss": 2.5481, "step": 316260 }, { "epoch": 0.630080167027923, "grad_norm": 0.16622474789619446, "learning_rate": 0.002, "loss": 2.5569, "step": 316270 }, { "epoch": 0.6301000892515619, "grad_norm": 0.15093515813350677, "learning_rate": 0.002, "loss": 2.5609, "step": 316280 }, { "epoch": 0.6301200114752008, "grad_norm": 0.16600574553012848, "learning_rate": 0.002, "loss": 2.5451, "step": 316290 }, { "epoch": 0.6301399336988397, "grad_norm": 0.18628092110157013, "learning_rate": 0.002, "loss": 2.5341, "step": 316300 }, { "epoch": 0.6301598559224787, "grad_norm": 0.14934101700782776, "learning_rate": 0.002, "loss": 2.5503, "step": 316310 }, { "epoch": 0.6301797781461176, "grad_norm": 0.14806337654590607, "learning_rate": 0.002, "loss": 2.5531, "step": 316320 }, { "epoch": 0.6301997003697565, "grad_norm": 0.1491093933582306, "learning_rate": 0.002, "loss": 2.5533, "step": 316330 }, { "epoch": 0.6302196225933954, "grad_norm": 0.15991829335689545, "learning_rate": 0.002, "loss": 2.5483, "step": 316340 }, { "epoch": 0.6302395448170343, "grad_norm": 0.18027977645397186, "learning_rate": 0.002, "loss": 2.5466, "step": 316350 }, { "epoch": 0.6302594670406733, "grad_norm": 0.1607358157634735, "learning_rate": 0.002, "loss": 2.5544, "step": 316360 }, { "epoch": 0.6302793892643122, "grad_norm": 0.18700627982616425, "learning_rate": 0.002, "loss": 2.5525, "step": 316370 }, { "epoch": 0.630299311487951, "grad_norm": 0.14674805104732513, "learning_rate": 0.002, "loss": 2.5589, "step": 316380 }, { "epoch": 0.6303192337115899, "grad_norm": 0.14364077150821686, "learning_rate": 0.002, "loss": 2.5482, "step": 316390 }, { "epoch": 0.6303391559352288, "grad_norm": 0.19459117949008942, "learning_rate": 0.002, "loss": 2.5565, "step": 316400 }, { "epoch": 0.6303590781588678, "grad_norm": 0.1586092859506607, "learning_rate": 0.002, "loss": 2.5602, "step": 316410 }, { "epoch": 0.6303790003825067, "grad_norm": 0.14968009293079376, "learning_rate": 0.002, "loss": 2.5662, "step": 316420 }, { "epoch": 0.6303989226061456, "grad_norm": 0.16726118326187134, "learning_rate": 0.002, "loss": 2.5544, "step": 316430 }, { "epoch": 0.6304188448297845, "grad_norm": 0.15502575039863586, "learning_rate": 0.002, "loss": 2.5676, "step": 316440 }, { "epoch": 0.6304387670534234, "grad_norm": 0.17725542187690735, "learning_rate": 0.002, "loss": 2.5559, "step": 316450 }, { "epoch": 0.6304586892770624, "grad_norm": 0.1581314653158188, "learning_rate": 0.002, "loss": 2.5564, "step": 316460 }, { "epoch": 0.6304786115007013, "grad_norm": 0.15279482305049896, "learning_rate": 0.002, "loss": 2.56, "step": 316470 }, { "epoch": 0.6304985337243402, "grad_norm": 0.2302018254995346, "learning_rate": 0.002, "loss": 2.5521, "step": 316480 }, { "epoch": 0.6305184559479791, "grad_norm": 0.14602932333946228, "learning_rate": 0.002, "loss": 2.5585, "step": 316490 }, { "epoch": 0.630538378171618, "grad_norm": 0.158576101064682, "learning_rate": 0.002, "loss": 2.562, "step": 316500 }, { "epoch": 0.630558300395257, "grad_norm": 0.16869419813156128, "learning_rate": 0.002, "loss": 2.5489, "step": 316510 }, { "epoch": 0.6305782226188958, "grad_norm": 0.1530185043811798, "learning_rate": 0.002, "loss": 2.5671, "step": 316520 }, { "epoch": 0.6305981448425347, "grad_norm": 0.14763006567955017, "learning_rate": 0.002, "loss": 2.554, "step": 316530 }, { "epoch": 0.6306180670661736, "grad_norm": 0.17014718055725098, "learning_rate": 0.002, "loss": 2.5679, "step": 316540 }, { "epoch": 0.6306379892898125, "grad_norm": 0.17686940729618073, "learning_rate": 0.002, "loss": 2.5616, "step": 316550 }, { "epoch": 0.6306579115134515, "grad_norm": 0.14469681680202484, "learning_rate": 0.002, "loss": 2.5642, "step": 316560 }, { "epoch": 0.6306778337370904, "grad_norm": 0.1499517261981964, "learning_rate": 0.002, "loss": 2.5416, "step": 316570 }, { "epoch": 0.6306977559607293, "grad_norm": 0.30178025364875793, "learning_rate": 0.002, "loss": 2.54, "step": 316580 }, { "epoch": 0.6307176781843682, "grad_norm": 0.17970944941043854, "learning_rate": 0.002, "loss": 2.5645, "step": 316590 }, { "epoch": 0.6307376004080072, "grad_norm": 0.15626196563243866, "learning_rate": 0.002, "loss": 2.5563, "step": 316600 }, { "epoch": 0.6307575226316461, "grad_norm": 0.15997883677482605, "learning_rate": 0.002, "loss": 2.5463, "step": 316610 }, { "epoch": 0.630777444855285, "grad_norm": 0.14964614808559418, "learning_rate": 0.002, "loss": 2.5567, "step": 316620 }, { "epoch": 0.6307973670789239, "grad_norm": 0.1563793420791626, "learning_rate": 0.002, "loss": 2.5429, "step": 316630 }, { "epoch": 0.6308172893025628, "grad_norm": 0.16954581439495087, "learning_rate": 0.002, "loss": 2.5677, "step": 316640 }, { "epoch": 0.6308372115262018, "grad_norm": 0.16535483300685883, "learning_rate": 0.002, "loss": 2.5456, "step": 316650 }, { "epoch": 0.6308571337498406, "grad_norm": 0.1776413917541504, "learning_rate": 0.002, "loss": 2.5497, "step": 316660 }, { "epoch": 0.6308770559734795, "grad_norm": 0.20830120146274567, "learning_rate": 0.002, "loss": 2.5676, "step": 316670 }, { "epoch": 0.6308969781971184, "grad_norm": 0.1448684185743332, "learning_rate": 0.002, "loss": 2.5428, "step": 316680 }, { "epoch": 0.6309169004207573, "grad_norm": 0.16624070703983307, "learning_rate": 0.002, "loss": 2.5632, "step": 316690 }, { "epoch": 0.6309368226443963, "grad_norm": 0.1591101735830307, "learning_rate": 0.002, "loss": 2.556, "step": 316700 }, { "epoch": 0.6309567448680352, "grad_norm": 0.14301499724388123, "learning_rate": 0.002, "loss": 2.5544, "step": 316710 }, { "epoch": 0.6309766670916741, "grad_norm": 0.1417694091796875, "learning_rate": 0.002, "loss": 2.5482, "step": 316720 }, { "epoch": 0.630996589315313, "grad_norm": 0.16618116199970245, "learning_rate": 0.002, "loss": 2.5684, "step": 316730 }, { "epoch": 0.6310165115389519, "grad_norm": 0.17177382111549377, "learning_rate": 0.002, "loss": 2.5574, "step": 316740 }, { "epoch": 0.6310364337625909, "grad_norm": 0.14614799618721008, "learning_rate": 0.002, "loss": 2.5591, "step": 316750 }, { "epoch": 0.6310563559862298, "grad_norm": 0.17465031147003174, "learning_rate": 0.002, "loss": 2.5704, "step": 316760 }, { "epoch": 0.6310762782098687, "grad_norm": 0.14044439792633057, "learning_rate": 0.002, "loss": 2.5475, "step": 316770 }, { "epoch": 0.6310962004335076, "grad_norm": 0.14484331011772156, "learning_rate": 0.002, "loss": 2.54, "step": 316780 }, { "epoch": 0.6311161226571464, "grad_norm": 0.1763303428888321, "learning_rate": 0.002, "loss": 2.5726, "step": 316790 }, { "epoch": 0.6311360448807855, "grad_norm": 0.18360017240047455, "learning_rate": 0.002, "loss": 2.5562, "step": 316800 }, { "epoch": 0.6311559671044243, "grad_norm": 0.15231935679912567, "learning_rate": 0.002, "loss": 2.5643, "step": 316810 }, { "epoch": 0.6311758893280632, "grad_norm": 0.16491740942001343, "learning_rate": 0.002, "loss": 2.5671, "step": 316820 }, { "epoch": 0.6311958115517021, "grad_norm": 0.15372228622436523, "learning_rate": 0.002, "loss": 2.5536, "step": 316830 }, { "epoch": 0.631215733775341, "grad_norm": 0.13403712213039398, "learning_rate": 0.002, "loss": 2.5536, "step": 316840 }, { "epoch": 0.63123565599898, "grad_norm": 0.14867357909679413, "learning_rate": 0.002, "loss": 2.5578, "step": 316850 }, { "epoch": 0.6312555782226189, "grad_norm": 0.2302185297012329, "learning_rate": 0.002, "loss": 2.5649, "step": 316860 }, { "epoch": 0.6312755004462578, "grad_norm": 0.16489621996879578, "learning_rate": 0.002, "loss": 2.5556, "step": 316870 }, { "epoch": 0.6312954226698967, "grad_norm": 0.17850448191165924, "learning_rate": 0.002, "loss": 2.5494, "step": 316880 }, { "epoch": 0.6313153448935357, "grad_norm": 0.15709929168224335, "learning_rate": 0.002, "loss": 2.5673, "step": 316890 }, { "epoch": 0.6313352671171746, "grad_norm": 0.13991624116897583, "learning_rate": 0.002, "loss": 2.5499, "step": 316900 }, { "epoch": 0.6313551893408135, "grad_norm": 0.1480851173400879, "learning_rate": 0.002, "loss": 2.5497, "step": 316910 }, { "epoch": 0.6313751115644524, "grad_norm": 0.1819084882736206, "learning_rate": 0.002, "loss": 2.5556, "step": 316920 }, { "epoch": 0.6313950337880913, "grad_norm": 0.16855713725090027, "learning_rate": 0.002, "loss": 2.5563, "step": 316930 }, { "epoch": 0.6314149560117303, "grad_norm": 0.16022071242332458, "learning_rate": 0.002, "loss": 2.5429, "step": 316940 }, { "epoch": 0.6314348782353691, "grad_norm": 0.13775284588336945, "learning_rate": 0.002, "loss": 2.5541, "step": 316950 }, { "epoch": 0.631454800459008, "grad_norm": 0.15554703772068024, "learning_rate": 0.002, "loss": 2.5578, "step": 316960 }, { "epoch": 0.6314747226826469, "grad_norm": 0.1499287486076355, "learning_rate": 0.002, "loss": 2.5582, "step": 316970 }, { "epoch": 0.6314946449062858, "grad_norm": 0.17038080096244812, "learning_rate": 0.002, "loss": 2.5524, "step": 316980 }, { "epoch": 0.6315145671299248, "grad_norm": 0.16690731048583984, "learning_rate": 0.002, "loss": 2.552, "step": 316990 }, { "epoch": 0.6315344893535637, "grad_norm": 0.20250721275806427, "learning_rate": 0.002, "loss": 2.5381, "step": 317000 }, { "epoch": 0.6315544115772026, "grad_norm": 0.1690925508737564, "learning_rate": 0.002, "loss": 2.5594, "step": 317010 }, { "epoch": 0.6315743338008415, "grad_norm": 0.1522817462682724, "learning_rate": 0.002, "loss": 2.5523, "step": 317020 }, { "epoch": 0.6315942560244804, "grad_norm": 0.16433148086071014, "learning_rate": 0.002, "loss": 2.5632, "step": 317030 }, { "epoch": 0.6316141782481194, "grad_norm": 0.1822647899389267, "learning_rate": 0.002, "loss": 2.5481, "step": 317040 }, { "epoch": 0.6316341004717583, "grad_norm": 0.16290916502475739, "learning_rate": 0.002, "loss": 2.5637, "step": 317050 }, { "epoch": 0.6316540226953972, "grad_norm": 0.15312127768993378, "learning_rate": 0.002, "loss": 2.5605, "step": 317060 }, { "epoch": 0.6316739449190361, "grad_norm": 0.1891428530216217, "learning_rate": 0.002, "loss": 2.5562, "step": 317070 }, { "epoch": 0.631693867142675, "grad_norm": 0.15015381574630737, "learning_rate": 0.002, "loss": 2.5501, "step": 317080 }, { "epoch": 0.631713789366314, "grad_norm": 0.1594291776418686, "learning_rate": 0.002, "loss": 2.5491, "step": 317090 }, { "epoch": 0.6317337115899528, "grad_norm": 0.1597600132226944, "learning_rate": 0.002, "loss": 2.5541, "step": 317100 }, { "epoch": 0.6317536338135917, "grad_norm": 0.19615238904953003, "learning_rate": 0.002, "loss": 2.5535, "step": 317110 }, { "epoch": 0.6317735560372306, "grad_norm": 0.13641679286956787, "learning_rate": 0.002, "loss": 2.5512, "step": 317120 }, { "epoch": 0.6317934782608695, "grad_norm": 0.1636113077402115, "learning_rate": 0.002, "loss": 2.5558, "step": 317130 }, { "epoch": 0.6318134004845085, "grad_norm": 0.15578021109104156, "learning_rate": 0.002, "loss": 2.56, "step": 317140 }, { "epoch": 0.6318333227081474, "grad_norm": 0.1657221019268036, "learning_rate": 0.002, "loss": 2.5597, "step": 317150 }, { "epoch": 0.6318532449317863, "grad_norm": 0.1571001410484314, "learning_rate": 0.002, "loss": 2.553, "step": 317160 }, { "epoch": 0.6318731671554252, "grad_norm": 0.14933602511882782, "learning_rate": 0.002, "loss": 2.5563, "step": 317170 }, { "epoch": 0.6318930893790642, "grad_norm": 0.207363098859787, "learning_rate": 0.002, "loss": 2.5591, "step": 317180 }, { "epoch": 0.6319130116027031, "grad_norm": 0.22218210995197296, "learning_rate": 0.002, "loss": 2.5501, "step": 317190 }, { "epoch": 0.631932933826342, "grad_norm": 0.14166919887065887, "learning_rate": 0.002, "loss": 2.5513, "step": 317200 }, { "epoch": 0.6319528560499809, "grad_norm": 0.18081875145435333, "learning_rate": 0.002, "loss": 2.5527, "step": 317210 }, { "epoch": 0.6319727782736198, "grad_norm": 0.1494060903787613, "learning_rate": 0.002, "loss": 2.5634, "step": 317220 }, { "epoch": 0.6319927004972588, "grad_norm": 0.14801160991191864, "learning_rate": 0.002, "loss": 2.5554, "step": 317230 }, { "epoch": 0.6320126227208976, "grad_norm": 0.17369161546230316, "learning_rate": 0.002, "loss": 2.5669, "step": 317240 }, { "epoch": 0.6320325449445365, "grad_norm": 0.19400960206985474, "learning_rate": 0.002, "loss": 2.554, "step": 317250 }, { "epoch": 0.6320524671681754, "grad_norm": 0.15373414754867554, "learning_rate": 0.002, "loss": 2.5439, "step": 317260 }, { "epoch": 0.6320723893918143, "grad_norm": 0.14934492111206055, "learning_rate": 0.002, "loss": 2.5494, "step": 317270 }, { "epoch": 0.6320923116154533, "grad_norm": 0.1559174507856369, "learning_rate": 0.002, "loss": 2.5761, "step": 317280 }, { "epoch": 0.6321122338390922, "grad_norm": 0.159613236784935, "learning_rate": 0.002, "loss": 2.5761, "step": 317290 }, { "epoch": 0.6321321560627311, "grad_norm": 0.1960824728012085, "learning_rate": 0.002, "loss": 2.5619, "step": 317300 }, { "epoch": 0.63215207828637, "grad_norm": 0.15671341121196747, "learning_rate": 0.002, "loss": 2.5598, "step": 317310 }, { "epoch": 0.6321720005100089, "grad_norm": 0.1919124871492386, "learning_rate": 0.002, "loss": 2.5641, "step": 317320 }, { "epoch": 0.6321919227336479, "grad_norm": 0.1551866978406906, "learning_rate": 0.002, "loss": 2.5516, "step": 317330 }, { "epoch": 0.6322118449572868, "grad_norm": 0.14114432036876678, "learning_rate": 0.002, "loss": 2.549, "step": 317340 }, { "epoch": 0.6322317671809257, "grad_norm": 0.19180047512054443, "learning_rate": 0.002, "loss": 2.5479, "step": 317350 }, { "epoch": 0.6322516894045646, "grad_norm": 0.1967693716287613, "learning_rate": 0.002, "loss": 2.5494, "step": 317360 }, { "epoch": 0.6322716116282034, "grad_norm": 0.1361936330795288, "learning_rate": 0.002, "loss": 2.5609, "step": 317370 }, { "epoch": 0.6322915338518424, "grad_norm": 0.1589256376028061, "learning_rate": 0.002, "loss": 2.5435, "step": 317380 }, { "epoch": 0.6323114560754813, "grad_norm": 0.2169857770204544, "learning_rate": 0.002, "loss": 2.5677, "step": 317390 }, { "epoch": 0.6323313782991202, "grad_norm": 0.15007658302783966, "learning_rate": 0.002, "loss": 2.5424, "step": 317400 }, { "epoch": 0.6323513005227591, "grad_norm": 0.15376196801662445, "learning_rate": 0.002, "loss": 2.5466, "step": 317410 }, { "epoch": 0.632371222746398, "grad_norm": 0.1830601990222931, "learning_rate": 0.002, "loss": 2.5497, "step": 317420 }, { "epoch": 0.632391144970037, "grad_norm": 0.19751763343811035, "learning_rate": 0.002, "loss": 2.554, "step": 317430 }, { "epoch": 0.6324110671936759, "grad_norm": 0.1578940898180008, "learning_rate": 0.002, "loss": 2.5685, "step": 317440 }, { "epoch": 0.6324309894173148, "grad_norm": 0.153659388422966, "learning_rate": 0.002, "loss": 2.5516, "step": 317450 }, { "epoch": 0.6324509116409537, "grad_norm": 0.21001556515693665, "learning_rate": 0.002, "loss": 2.5477, "step": 317460 }, { "epoch": 0.6324708338645927, "grad_norm": 0.1820835918188095, "learning_rate": 0.002, "loss": 2.5628, "step": 317470 }, { "epoch": 0.6324907560882316, "grad_norm": 0.1600004881620407, "learning_rate": 0.002, "loss": 2.5586, "step": 317480 }, { "epoch": 0.6325106783118705, "grad_norm": 0.17113757133483887, "learning_rate": 0.002, "loss": 2.5548, "step": 317490 }, { "epoch": 0.6325306005355094, "grad_norm": 0.19319330155849457, "learning_rate": 0.002, "loss": 2.5467, "step": 317500 }, { "epoch": 0.6325505227591482, "grad_norm": 0.14537933468818665, "learning_rate": 0.002, "loss": 2.5573, "step": 317510 }, { "epoch": 0.6325704449827872, "grad_norm": 0.16677646338939667, "learning_rate": 0.002, "loss": 2.5557, "step": 317520 }, { "epoch": 0.6325903672064261, "grad_norm": 0.1845211237668991, "learning_rate": 0.002, "loss": 2.5567, "step": 317530 }, { "epoch": 0.632610289430065, "grad_norm": 0.2180400937795639, "learning_rate": 0.002, "loss": 2.5547, "step": 317540 }, { "epoch": 0.6326302116537039, "grad_norm": 0.15849465131759644, "learning_rate": 0.002, "loss": 2.5536, "step": 317550 }, { "epoch": 0.6326501338773428, "grad_norm": 0.1577039211988449, "learning_rate": 0.002, "loss": 2.5588, "step": 317560 }, { "epoch": 0.6326700561009818, "grad_norm": 0.18028870224952698, "learning_rate": 0.002, "loss": 2.5467, "step": 317570 }, { "epoch": 0.6326899783246207, "grad_norm": 0.16341452300548553, "learning_rate": 0.002, "loss": 2.5566, "step": 317580 }, { "epoch": 0.6327099005482596, "grad_norm": 0.1581181436777115, "learning_rate": 0.002, "loss": 2.5592, "step": 317590 }, { "epoch": 0.6327298227718985, "grad_norm": 0.1652614325284958, "learning_rate": 0.002, "loss": 2.5532, "step": 317600 }, { "epoch": 0.6327497449955374, "grad_norm": 0.1803669035434723, "learning_rate": 0.002, "loss": 2.5592, "step": 317610 }, { "epoch": 0.6327696672191764, "grad_norm": 0.14776720106601715, "learning_rate": 0.002, "loss": 2.5606, "step": 317620 }, { "epoch": 0.6327895894428153, "grad_norm": 0.1731671541929245, "learning_rate": 0.002, "loss": 2.5488, "step": 317630 }, { "epoch": 0.6328095116664542, "grad_norm": 0.15442903339862823, "learning_rate": 0.002, "loss": 2.5455, "step": 317640 }, { "epoch": 0.632829433890093, "grad_norm": 0.20868027210235596, "learning_rate": 0.002, "loss": 2.5281, "step": 317650 }, { "epoch": 0.6328493561137319, "grad_norm": 0.1392768770456314, "learning_rate": 0.002, "loss": 2.548, "step": 317660 }, { "epoch": 0.6328692783373709, "grad_norm": 0.16962119936943054, "learning_rate": 0.002, "loss": 2.5661, "step": 317670 }, { "epoch": 0.6328892005610098, "grad_norm": 0.1485927850008011, "learning_rate": 0.002, "loss": 2.5638, "step": 317680 }, { "epoch": 0.6329091227846487, "grad_norm": 0.1748354732990265, "learning_rate": 0.002, "loss": 2.5609, "step": 317690 }, { "epoch": 0.6329290450082876, "grad_norm": 0.2661498785018921, "learning_rate": 0.002, "loss": 2.5503, "step": 317700 }, { "epoch": 0.6329489672319265, "grad_norm": 0.18983767926692963, "learning_rate": 0.002, "loss": 2.5573, "step": 317710 }, { "epoch": 0.6329688894555655, "grad_norm": 0.16275444626808167, "learning_rate": 0.002, "loss": 2.5567, "step": 317720 }, { "epoch": 0.6329888116792044, "grad_norm": 0.16417084634304047, "learning_rate": 0.002, "loss": 2.5599, "step": 317730 }, { "epoch": 0.6330087339028433, "grad_norm": 0.16474603116512299, "learning_rate": 0.002, "loss": 2.5598, "step": 317740 }, { "epoch": 0.6330286561264822, "grad_norm": 0.17485225200653076, "learning_rate": 0.002, "loss": 2.5564, "step": 317750 }, { "epoch": 0.6330485783501211, "grad_norm": 0.18343374133110046, "learning_rate": 0.002, "loss": 2.5557, "step": 317760 }, { "epoch": 0.6330685005737601, "grad_norm": 0.17151516675949097, "learning_rate": 0.002, "loss": 2.548, "step": 317770 }, { "epoch": 0.633088422797399, "grad_norm": 0.15099069476127625, "learning_rate": 0.002, "loss": 2.564, "step": 317780 }, { "epoch": 0.6331083450210379, "grad_norm": 0.2120562344789505, "learning_rate": 0.002, "loss": 2.5627, "step": 317790 }, { "epoch": 0.6331282672446767, "grad_norm": 0.17149151861667633, "learning_rate": 0.002, "loss": 2.5495, "step": 317800 }, { "epoch": 0.6331481894683157, "grad_norm": 0.13991332054138184, "learning_rate": 0.002, "loss": 2.5643, "step": 317810 }, { "epoch": 0.6331681116919546, "grad_norm": 0.15800824761390686, "learning_rate": 0.002, "loss": 2.5388, "step": 317820 }, { "epoch": 0.6331880339155935, "grad_norm": 0.2035064846277237, "learning_rate": 0.002, "loss": 2.5666, "step": 317830 }, { "epoch": 0.6332079561392324, "grad_norm": 0.17272621393203735, "learning_rate": 0.002, "loss": 2.563, "step": 317840 }, { "epoch": 0.6332278783628713, "grad_norm": 0.1633410006761551, "learning_rate": 0.002, "loss": 2.5656, "step": 317850 }, { "epoch": 0.6332478005865103, "grad_norm": 0.1711781769990921, "learning_rate": 0.002, "loss": 2.5597, "step": 317860 }, { "epoch": 0.6332677228101492, "grad_norm": 0.17876577377319336, "learning_rate": 0.002, "loss": 2.5627, "step": 317870 }, { "epoch": 0.6332876450337881, "grad_norm": 0.18677754700183868, "learning_rate": 0.002, "loss": 2.5434, "step": 317880 }, { "epoch": 0.633307567257427, "grad_norm": 0.18436329066753387, "learning_rate": 0.002, "loss": 2.5728, "step": 317890 }, { "epoch": 0.6333274894810659, "grad_norm": 0.1491738110780716, "learning_rate": 0.002, "loss": 2.5535, "step": 317900 }, { "epoch": 0.6333474117047049, "grad_norm": 0.20427356660366058, "learning_rate": 0.002, "loss": 2.5632, "step": 317910 }, { "epoch": 0.6333673339283438, "grad_norm": 0.18567967414855957, "learning_rate": 0.002, "loss": 2.5587, "step": 317920 }, { "epoch": 0.6333872561519827, "grad_norm": 0.1755577027797699, "learning_rate": 0.002, "loss": 2.5522, "step": 317930 }, { "epoch": 0.6334071783756215, "grad_norm": 0.1513335108757019, "learning_rate": 0.002, "loss": 2.5389, "step": 317940 }, { "epoch": 0.6334271005992604, "grad_norm": 0.1441003829240799, "learning_rate": 0.002, "loss": 2.5624, "step": 317950 }, { "epoch": 0.6334470228228994, "grad_norm": 0.16766145825386047, "learning_rate": 0.002, "loss": 2.5498, "step": 317960 }, { "epoch": 0.6334669450465383, "grad_norm": 0.16247473657131195, "learning_rate": 0.002, "loss": 2.5356, "step": 317970 }, { "epoch": 0.6334868672701772, "grad_norm": 0.1765831857919693, "learning_rate": 0.002, "loss": 2.5551, "step": 317980 }, { "epoch": 0.6335067894938161, "grad_norm": 0.19355298578739166, "learning_rate": 0.002, "loss": 2.5481, "step": 317990 }, { "epoch": 0.633526711717455, "grad_norm": 0.14976494014263153, "learning_rate": 0.002, "loss": 2.5523, "step": 318000 }, { "epoch": 0.633546633941094, "grad_norm": 0.15150396525859833, "learning_rate": 0.002, "loss": 2.5499, "step": 318010 }, { "epoch": 0.6335665561647329, "grad_norm": 0.16226741671562195, "learning_rate": 0.002, "loss": 2.5585, "step": 318020 }, { "epoch": 0.6335864783883718, "grad_norm": 0.17446096241474152, "learning_rate": 0.002, "loss": 2.5494, "step": 318030 }, { "epoch": 0.6336064006120107, "grad_norm": 0.13674962520599365, "learning_rate": 0.002, "loss": 2.5572, "step": 318040 }, { "epoch": 0.6336263228356496, "grad_norm": 0.19324040412902832, "learning_rate": 0.002, "loss": 2.5568, "step": 318050 }, { "epoch": 0.6336462450592886, "grad_norm": 0.14731542766094208, "learning_rate": 0.002, "loss": 2.5608, "step": 318060 }, { "epoch": 0.6336661672829275, "grad_norm": 0.1837262660264969, "learning_rate": 0.002, "loss": 2.5473, "step": 318070 }, { "epoch": 0.6336860895065664, "grad_norm": 0.16657967865467072, "learning_rate": 0.002, "loss": 2.5455, "step": 318080 }, { "epoch": 0.6337060117302052, "grad_norm": 0.15437006950378418, "learning_rate": 0.002, "loss": 2.565, "step": 318090 }, { "epoch": 0.6337259339538442, "grad_norm": 0.17575965821743011, "learning_rate": 0.002, "loss": 2.556, "step": 318100 }, { "epoch": 0.6337458561774831, "grad_norm": 0.16769006848335266, "learning_rate": 0.002, "loss": 2.5635, "step": 318110 }, { "epoch": 0.633765778401122, "grad_norm": 0.1606925129890442, "learning_rate": 0.002, "loss": 2.5524, "step": 318120 }, { "epoch": 0.6337857006247609, "grad_norm": 0.15513084828853607, "learning_rate": 0.002, "loss": 2.5397, "step": 318130 }, { "epoch": 0.6338056228483998, "grad_norm": 0.1782388985157013, "learning_rate": 0.002, "loss": 2.5563, "step": 318140 }, { "epoch": 0.6338255450720388, "grad_norm": 0.15746362507343292, "learning_rate": 0.002, "loss": 2.5492, "step": 318150 }, { "epoch": 0.6338454672956777, "grad_norm": 0.15978467464447021, "learning_rate": 0.002, "loss": 2.5666, "step": 318160 }, { "epoch": 0.6338653895193166, "grad_norm": 0.14669102430343628, "learning_rate": 0.002, "loss": 2.5595, "step": 318170 }, { "epoch": 0.6338853117429555, "grad_norm": 0.14950525760650635, "learning_rate": 0.002, "loss": 2.5506, "step": 318180 }, { "epoch": 0.6339052339665944, "grad_norm": 0.18966682255268097, "learning_rate": 0.002, "loss": 2.5557, "step": 318190 }, { "epoch": 0.6339251561902334, "grad_norm": 0.17584837973117828, "learning_rate": 0.002, "loss": 2.5535, "step": 318200 }, { "epoch": 0.6339450784138723, "grad_norm": 0.16669274866580963, "learning_rate": 0.002, "loss": 2.5594, "step": 318210 }, { "epoch": 0.6339650006375112, "grad_norm": 0.16659438610076904, "learning_rate": 0.002, "loss": 2.5511, "step": 318220 }, { "epoch": 0.63398492286115, "grad_norm": 0.13434740900993347, "learning_rate": 0.002, "loss": 2.5508, "step": 318230 }, { "epoch": 0.6340048450847889, "grad_norm": 0.14567971229553223, "learning_rate": 0.002, "loss": 2.5423, "step": 318240 }, { "epoch": 0.6340247673084279, "grad_norm": 0.156858429312706, "learning_rate": 0.002, "loss": 2.5381, "step": 318250 }, { "epoch": 0.6340446895320668, "grad_norm": 0.18911437690258026, "learning_rate": 0.002, "loss": 2.5572, "step": 318260 }, { "epoch": 0.6340646117557057, "grad_norm": 0.17410054802894592, "learning_rate": 0.002, "loss": 2.552, "step": 318270 }, { "epoch": 0.6340845339793446, "grad_norm": 0.13637635111808777, "learning_rate": 0.002, "loss": 2.5584, "step": 318280 }, { "epoch": 0.6341044562029835, "grad_norm": 0.1951073706150055, "learning_rate": 0.002, "loss": 2.5483, "step": 318290 }, { "epoch": 0.6341243784266225, "grad_norm": 0.1414157599210739, "learning_rate": 0.002, "loss": 2.5622, "step": 318300 }, { "epoch": 0.6341443006502614, "grad_norm": 0.16646742820739746, "learning_rate": 0.002, "loss": 2.5354, "step": 318310 }, { "epoch": 0.6341642228739003, "grad_norm": 0.1874421089887619, "learning_rate": 0.002, "loss": 2.5538, "step": 318320 }, { "epoch": 0.6341841450975392, "grad_norm": 0.1642265021800995, "learning_rate": 0.002, "loss": 2.5493, "step": 318330 }, { "epoch": 0.6342040673211781, "grad_norm": 0.15404552221298218, "learning_rate": 0.002, "loss": 2.5477, "step": 318340 }, { "epoch": 0.6342239895448171, "grad_norm": 0.15862596035003662, "learning_rate": 0.002, "loss": 2.5476, "step": 318350 }, { "epoch": 0.634243911768456, "grad_norm": 0.19932381808757782, "learning_rate": 0.002, "loss": 2.5567, "step": 318360 }, { "epoch": 0.6342638339920948, "grad_norm": 0.14836154878139496, "learning_rate": 0.002, "loss": 2.5522, "step": 318370 }, { "epoch": 0.6342837562157337, "grad_norm": 0.15082232654094696, "learning_rate": 0.002, "loss": 2.5474, "step": 318380 }, { "epoch": 0.6343036784393727, "grad_norm": 0.15865789353847504, "learning_rate": 0.002, "loss": 2.5525, "step": 318390 }, { "epoch": 0.6343236006630116, "grad_norm": 0.19431790709495544, "learning_rate": 0.002, "loss": 2.5558, "step": 318400 }, { "epoch": 0.6343435228866505, "grad_norm": 0.15969441831111908, "learning_rate": 0.002, "loss": 2.5573, "step": 318410 }, { "epoch": 0.6343634451102894, "grad_norm": 0.19221407175064087, "learning_rate": 0.002, "loss": 2.5584, "step": 318420 }, { "epoch": 0.6343833673339283, "grad_norm": 0.15404874086380005, "learning_rate": 0.002, "loss": 2.5583, "step": 318430 }, { "epoch": 0.6344032895575673, "grad_norm": 0.1533665806055069, "learning_rate": 0.002, "loss": 2.5652, "step": 318440 }, { "epoch": 0.6344232117812062, "grad_norm": 0.18208631873130798, "learning_rate": 0.002, "loss": 2.5507, "step": 318450 }, { "epoch": 0.6344431340048451, "grad_norm": 0.16102765500545502, "learning_rate": 0.002, "loss": 2.5518, "step": 318460 }, { "epoch": 0.634463056228484, "grad_norm": 0.18026649951934814, "learning_rate": 0.002, "loss": 2.5464, "step": 318470 }, { "epoch": 0.6344829784521229, "grad_norm": 0.17522881925106049, "learning_rate": 0.002, "loss": 2.5448, "step": 318480 }, { "epoch": 0.6345029006757619, "grad_norm": 0.192208394408226, "learning_rate": 0.002, "loss": 2.5471, "step": 318490 }, { "epoch": 0.6345228228994008, "grad_norm": 0.29662442207336426, "learning_rate": 0.002, "loss": 2.5643, "step": 318500 }, { "epoch": 0.6345427451230397, "grad_norm": 0.21614867448806763, "learning_rate": 0.002, "loss": 2.5354, "step": 318510 }, { "epoch": 0.6345626673466785, "grad_norm": 0.16444182395935059, "learning_rate": 0.002, "loss": 2.5616, "step": 318520 }, { "epoch": 0.6345825895703174, "grad_norm": 0.16562232375144958, "learning_rate": 0.002, "loss": 2.5501, "step": 318530 }, { "epoch": 0.6346025117939564, "grad_norm": 0.19917191565036774, "learning_rate": 0.002, "loss": 2.551, "step": 318540 }, { "epoch": 0.6346224340175953, "grad_norm": 0.17150507867336273, "learning_rate": 0.002, "loss": 2.5542, "step": 318550 }, { "epoch": 0.6346423562412342, "grad_norm": 0.12752538919448853, "learning_rate": 0.002, "loss": 2.5551, "step": 318560 }, { "epoch": 0.6346622784648731, "grad_norm": 0.17357343435287476, "learning_rate": 0.002, "loss": 2.5338, "step": 318570 }, { "epoch": 0.634682200688512, "grad_norm": 0.1668342798948288, "learning_rate": 0.002, "loss": 2.5655, "step": 318580 }, { "epoch": 0.634702122912151, "grad_norm": 0.22236773371696472, "learning_rate": 0.002, "loss": 2.5489, "step": 318590 }, { "epoch": 0.6347220451357899, "grad_norm": 0.17101424932479858, "learning_rate": 0.002, "loss": 2.5519, "step": 318600 }, { "epoch": 0.6347419673594288, "grad_norm": 0.14859424531459808, "learning_rate": 0.002, "loss": 2.5495, "step": 318610 }, { "epoch": 0.6347618895830677, "grad_norm": 0.1870090663433075, "learning_rate": 0.002, "loss": 2.5548, "step": 318620 }, { "epoch": 0.6347818118067066, "grad_norm": 0.17612214386463165, "learning_rate": 0.002, "loss": 2.5611, "step": 318630 }, { "epoch": 0.6348017340303456, "grad_norm": 0.15596483647823334, "learning_rate": 0.002, "loss": 2.5571, "step": 318640 }, { "epoch": 0.6348216562539845, "grad_norm": 0.20245592296123505, "learning_rate": 0.002, "loss": 2.5513, "step": 318650 }, { "epoch": 0.6348415784776233, "grad_norm": 0.13623793423175812, "learning_rate": 0.002, "loss": 2.5555, "step": 318660 }, { "epoch": 0.6348615007012622, "grad_norm": 0.20456069707870483, "learning_rate": 0.002, "loss": 2.5451, "step": 318670 }, { "epoch": 0.6348814229249012, "grad_norm": 0.17325124144554138, "learning_rate": 0.002, "loss": 2.5561, "step": 318680 }, { "epoch": 0.6349013451485401, "grad_norm": 0.19919730722904205, "learning_rate": 0.002, "loss": 2.5588, "step": 318690 }, { "epoch": 0.634921267372179, "grad_norm": 0.14591708779335022, "learning_rate": 0.002, "loss": 2.5672, "step": 318700 }, { "epoch": 0.6349411895958179, "grad_norm": 0.1812790334224701, "learning_rate": 0.002, "loss": 2.5607, "step": 318710 }, { "epoch": 0.6349611118194568, "grad_norm": 0.1687353551387787, "learning_rate": 0.002, "loss": 2.5661, "step": 318720 }, { "epoch": 0.6349810340430958, "grad_norm": 0.15037654340267181, "learning_rate": 0.002, "loss": 2.5457, "step": 318730 }, { "epoch": 0.6350009562667347, "grad_norm": 0.16338376700878143, "learning_rate": 0.002, "loss": 2.5623, "step": 318740 }, { "epoch": 0.6350208784903736, "grad_norm": 0.17787203192710876, "learning_rate": 0.002, "loss": 2.5665, "step": 318750 }, { "epoch": 0.6350408007140125, "grad_norm": 0.16684085130691528, "learning_rate": 0.002, "loss": 2.5442, "step": 318760 }, { "epoch": 0.6350607229376514, "grad_norm": 0.5627944469451904, "learning_rate": 0.002, "loss": 2.5548, "step": 318770 }, { "epoch": 0.6350806451612904, "grad_norm": 0.1519162654876709, "learning_rate": 0.002, "loss": 2.5656, "step": 318780 }, { "epoch": 0.6351005673849293, "grad_norm": 0.20595727860927582, "learning_rate": 0.002, "loss": 2.5613, "step": 318790 }, { "epoch": 0.6351204896085682, "grad_norm": 0.1666409969329834, "learning_rate": 0.002, "loss": 2.5512, "step": 318800 }, { "epoch": 0.635140411832207, "grad_norm": 0.1433897614479065, "learning_rate": 0.002, "loss": 2.5671, "step": 318810 }, { "epoch": 0.6351603340558459, "grad_norm": 0.17654789984226227, "learning_rate": 0.002, "loss": 2.5664, "step": 318820 }, { "epoch": 0.6351802562794849, "grad_norm": 0.13819417357444763, "learning_rate": 0.002, "loss": 2.5519, "step": 318830 }, { "epoch": 0.6352001785031238, "grad_norm": 0.15167640149593353, "learning_rate": 0.002, "loss": 2.5577, "step": 318840 }, { "epoch": 0.6352201007267627, "grad_norm": 0.16278639435768127, "learning_rate": 0.002, "loss": 2.5562, "step": 318850 }, { "epoch": 0.6352400229504016, "grad_norm": 0.17004457116127014, "learning_rate": 0.002, "loss": 2.5689, "step": 318860 }, { "epoch": 0.6352599451740405, "grad_norm": 0.16390268504619598, "learning_rate": 0.002, "loss": 2.5594, "step": 318870 }, { "epoch": 0.6352798673976795, "grad_norm": 0.16191145777702332, "learning_rate": 0.002, "loss": 2.5562, "step": 318880 }, { "epoch": 0.6352997896213184, "grad_norm": 0.1816210001707077, "learning_rate": 0.002, "loss": 2.5487, "step": 318890 }, { "epoch": 0.6353197118449573, "grad_norm": 0.15279126167297363, "learning_rate": 0.002, "loss": 2.5654, "step": 318900 }, { "epoch": 0.6353396340685962, "grad_norm": 0.16028621792793274, "learning_rate": 0.002, "loss": 2.5439, "step": 318910 }, { "epoch": 0.6353595562922351, "grad_norm": 0.15227921307086945, "learning_rate": 0.002, "loss": 2.5349, "step": 318920 }, { "epoch": 0.6353794785158741, "grad_norm": 0.1802700310945511, "learning_rate": 0.002, "loss": 2.5468, "step": 318930 }, { "epoch": 0.635399400739513, "grad_norm": 0.20622685551643372, "learning_rate": 0.002, "loss": 2.5609, "step": 318940 }, { "epoch": 0.6354193229631518, "grad_norm": 0.15862572193145752, "learning_rate": 0.002, "loss": 2.542, "step": 318950 }, { "epoch": 0.6354392451867907, "grad_norm": 0.16716399788856506, "learning_rate": 0.002, "loss": 2.5632, "step": 318960 }, { "epoch": 0.6354591674104297, "grad_norm": 0.17843395471572876, "learning_rate": 0.002, "loss": 2.55, "step": 318970 }, { "epoch": 0.6354790896340686, "grad_norm": 0.19492411613464355, "learning_rate": 0.002, "loss": 2.5664, "step": 318980 }, { "epoch": 0.6354990118577075, "grad_norm": 0.15527623891830444, "learning_rate": 0.002, "loss": 2.5525, "step": 318990 }, { "epoch": 0.6355189340813464, "grad_norm": 0.14532631635665894, "learning_rate": 0.002, "loss": 2.558, "step": 319000 }, { "epoch": 0.6355388563049853, "grad_norm": 0.21030578017234802, "learning_rate": 0.002, "loss": 2.5713, "step": 319010 }, { "epoch": 0.6355587785286243, "grad_norm": 0.17404727637767792, "learning_rate": 0.002, "loss": 2.5533, "step": 319020 }, { "epoch": 0.6355787007522632, "grad_norm": 0.18413570523262024, "learning_rate": 0.002, "loss": 2.5567, "step": 319030 }, { "epoch": 0.6355986229759021, "grad_norm": 0.209281325340271, "learning_rate": 0.002, "loss": 2.5662, "step": 319040 }, { "epoch": 0.635618545199541, "grad_norm": 0.16240966320037842, "learning_rate": 0.002, "loss": 2.5705, "step": 319050 }, { "epoch": 0.6356384674231799, "grad_norm": 0.16390427947044373, "learning_rate": 0.002, "loss": 2.5562, "step": 319060 }, { "epoch": 0.6356583896468189, "grad_norm": 0.16663192212581635, "learning_rate": 0.002, "loss": 2.5534, "step": 319070 }, { "epoch": 0.6356783118704578, "grad_norm": 0.1629406362771988, "learning_rate": 0.002, "loss": 2.5431, "step": 319080 }, { "epoch": 0.6356982340940966, "grad_norm": 0.15754573047161102, "learning_rate": 0.002, "loss": 2.5615, "step": 319090 }, { "epoch": 0.6357181563177355, "grad_norm": 0.15641164779663086, "learning_rate": 0.002, "loss": 2.5419, "step": 319100 }, { "epoch": 0.6357380785413744, "grad_norm": 0.17899975180625916, "learning_rate": 0.002, "loss": 2.5567, "step": 319110 }, { "epoch": 0.6357580007650134, "grad_norm": 0.1601792424917221, "learning_rate": 0.002, "loss": 2.5556, "step": 319120 }, { "epoch": 0.6357779229886523, "grad_norm": 0.14961129426956177, "learning_rate": 0.002, "loss": 2.5577, "step": 319130 }, { "epoch": 0.6357978452122912, "grad_norm": 0.17824916541576385, "learning_rate": 0.002, "loss": 2.568, "step": 319140 }, { "epoch": 0.6358177674359301, "grad_norm": 0.14652718603610992, "learning_rate": 0.002, "loss": 2.5621, "step": 319150 }, { "epoch": 0.635837689659569, "grad_norm": 0.1541803926229477, "learning_rate": 0.002, "loss": 2.5571, "step": 319160 }, { "epoch": 0.635857611883208, "grad_norm": 0.18174514174461365, "learning_rate": 0.002, "loss": 2.5549, "step": 319170 }, { "epoch": 0.6358775341068469, "grad_norm": 0.14582203328609467, "learning_rate": 0.002, "loss": 2.5591, "step": 319180 }, { "epoch": 0.6358974563304858, "grad_norm": 0.22596481442451477, "learning_rate": 0.002, "loss": 2.5567, "step": 319190 }, { "epoch": 0.6359173785541247, "grad_norm": 0.16609597206115723, "learning_rate": 0.002, "loss": 2.5537, "step": 319200 }, { "epoch": 0.6359373007777636, "grad_norm": 0.16067010164260864, "learning_rate": 0.002, "loss": 2.5374, "step": 319210 }, { "epoch": 0.6359572230014026, "grad_norm": 0.14126388728618622, "learning_rate": 0.002, "loss": 2.5521, "step": 319220 }, { "epoch": 0.6359771452250415, "grad_norm": 0.1599748134613037, "learning_rate": 0.002, "loss": 2.5441, "step": 319230 }, { "epoch": 0.6359970674486803, "grad_norm": 0.15932980179786682, "learning_rate": 0.002, "loss": 2.553, "step": 319240 }, { "epoch": 0.6360169896723192, "grad_norm": 0.15344050526618958, "learning_rate": 0.002, "loss": 2.5655, "step": 319250 }, { "epoch": 0.6360369118959582, "grad_norm": 0.17150455713272095, "learning_rate": 0.002, "loss": 2.5564, "step": 319260 }, { "epoch": 0.6360568341195971, "grad_norm": 0.1809627115726471, "learning_rate": 0.002, "loss": 2.5669, "step": 319270 }, { "epoch": 0.636076756343236, "grad_norm": 0.15704840421676636, "learning_rate": 0.002, "loss": 2.551, "step": 319280 }, { "epoch": 0.6360966785668749, "grad_norm": 0.16792884469032288, "learning_rate": 0.002, "loss": 2.571, "step": 319290 }, { "epoch": 0.6361166007905138, "grad_norm": 0.8263083696365356, "learning_rate": 0.002, "loss": 2.5616, "step": 319300 }, { "epoch": 0.6361365230141528, "grad_norm": 0.15226714313030243, "learning_rate": 0.002, "loss": 2.5535, "step": 319310 }, { "epoch": 0.6361564452377917, "grad_norm": 0.1643800437450409, "learning_rate": 0.002, "loss": 2.5586, "step": 319320 }, { "epoch": 0.6361763674614306, "grad_norm": 0.15976396203041077, "learning_rate": 0.002, "loss": 2.552, "step": 319330 }, { "epoch": 0.6361962896850695, "grad_norm": 0.16979455947875977, "learning_rate": 0.002, "loss": 2.5744, "step": 319340 }, { "epoch": 0.6362162119087084, "grad_norm": 0.14519107341766357, "learning_rate": 0.002, "loss": 2.5428, "step": 319350 }, { "epoch": 0.6362361341323474, "grad_norm": 0.1975298374891281, "learning_rate": 0.002, "loss": 2.5516, "step": 319360 }, { "epoch": 0.6362560563559863, "grad_norm": 0.1854935586452484, "learning_rate": 0.002, "loss": 2.5631, "step": 319370 }, { "epoch": 0.6362759785796251, "grad_norm": 0.15816058218479156, "learning_rate": 0.002, "loss": 2.5547, "step": 319380 }, { "epoch": 0.636295900803264, "grad_norm": 0.16854526102542877, "learning_rate": 0.002, "loss": 2.571, "step": 319390 }, { "epoch": 0.6363158230269029, "grad_norm": 0.18203873932361603, "learning_rate": 0.002, "loss": 2.5452, "step": 319400 }, { "epoch": 0.6363357452505419, "grad_norm": 0.16970010101795197, "learning_rate": 0.002, "loss": 2.5595, "step": 319410 }, { "epoch": 0.6363556674741808, "grad_norm": 0.16281458735466003, "learning_rate": 0.002, "loss": 2.5625, "step": 319420 }, { "epoch": 0.6363755896978197, "grad_norm": 0.16200153529644012, "learning_rate": 0.002, "loss": 2.5641, "step": 319430 }, { "epoch": 0.6363955119214586, "grad_norm": 0.15771780908107758, "learning_rate": 0.002, "loss": 2.5732, "step": 319440 }, { "epoch": 0.6364154341450975, "grad_norm": 0.16329002380371094, "learning_rate": 0.002, "loss": 2.5711, "step": 319450 }, { "epoch": 0.6364353563687365, "grad_norm": 0.17139168083667755, "learning_rate": 0.002, "loss": 2.5688, "step": 319460 }, { "epoch": 0.6364552785923754, "grad_norm": 0.157241553068161, "learning_rate": 0.002, "loss": 2.5562, "step": 319470 }, { "epoch": 0.6364752008160143, "grad_norm": 0.1933089643716812, "learning_rate": 0.002, "loss": 2.55, "step": 319480 }, { "epoch": 0.6364951230396532, "grad_norm": 0.1582377701997757, "learning_rate": 0.002, "loss": 2.5309, "step": 319490 }, { "epoch": 0.6365150452632921, "grad_norm": 0.16976161301136017, "learning_rate": 0.002, "loss": 2.5382, "step": 319500 }, { "epoch": 0.6365349674869311, "grad_norm": 0.14153185486793518, "learning_rate": 0.002, "loss": 2.5582, "step": 319510 }, { "epoch": 0.63655488971057, "grad_norm": 0.17921149730682373, "learning_rate": 0.002, "loss": 2.5695, "step": 319520 }, { "epoch": 0.6365748119342088, "grad_norm": 0.1771230548620224, "learning_rate": 0.002, "loss": 2.5518, "step": 319530 }, { "epoch": 0.6365947341578477, "grad_norm": 0.1446591168642044, "learning_rate": 0.002, "loss": 2.558, "step": 319540 }, { "epoch": 0.6366146563814866, "grad_norm": 0.1714268922805786, "learning_rate": 0.002, "loss": 2.5499, "step": 319550 }, { "epoch": 0.6366345786051256, "grad_norm": 0.19131386280059814, "learning_rate": 0.002, "loss": 2.5397, "step": 319560 }, { "epoch": 0.6366545008287645, "grad_norm": 0.17486515641212463, "learning_rate": 0.002, "loss": 2.564, "step": 319570 }, { "epoch": 0.6366744230524034, "grad_norm": 0.1915983408689499, "learning_rate": 0.002, "loss": 2.5679, "step": 319580 }, { "epoch": 0.6366943452760423, "grad_norm": 0.16821995377540588, "learning_rate": 0.002, "loss": 2.554, "step": 319590 }, { "epoch": 0.6367142674996813, "grad_norm": 0.12867848575115204, "learning_rate": 0.002, "loss": 2.5606, "step": 319600 }, { "epoch": 0.6367341897233202, "grad_norm": 0.15847797691822052, "learning_rate": 0.002, "loss": 2.5487, "step": 319610 }, { "epoch": 0.6367541119469591, "grad_norm": 0.1700381189584732, "learning_rate": 0.002, "loss": 2.5671, "step": 319620 }, { "epoch": 0.636774034170598, "grad_norm": 0.16188253462314606, "learning_rate": 0.002, "loss": 2.5623, "step": 319630 }, { "epoch": 0.6367939563942369, "grad_norm": 0.17629684507846832, "learning_rate": 0.002, "loss": 2.5464, "step": 319640 }, { "epoch": 0.6368138786178759, "grad_norm": 0.1593470424413681, "learning_rate": 0.002, "loss": 2.5286, "step": 319650 }, { "epoch": 0.6368338008415148, "grad_norm": 0.1752011924982071, "learning_rate": 0.002, "loss": 2.5514, "step": 319660 }, { "epoch": 0.6368537230651536, "grad_norm": 0.17890691757202148, "learning_rate": 0.002, "loss": 2.5675, "step": 319670 }, { "epoch": 0.6368736452887925, "grad_norm": 0.17442774772644043, "learning_rate": 0.002, "loss": 2.5446, "step": 319680 }, { "epoch": 0.6368935675124314, "grad_norm": 0.1914588063955307, "learning_rate": 0.002, "loss": 2.5618, "step": 319690 }, { "epoch": 0.6369134897360704, "grad_norm": 0.18418990075588226, "learning_rate": 0.002, "loss": 2.571, "step": 319700 }, { "epoch": 0.6369334119597093, "grad_norm": 0.15388041734695435, "learning_rate": 0.002, "loss": 2.544, "step": 319710 }, { "epoch": 0.6369533341833482, "grad_norm": 0.13960671424865723, "learning_rate": 0.002, "loss": 2.549, "step": 319720 }, { "epoch": 0.6369732564069871, "grad_norm": 0.15015821158885956, "learning_rate": 0.002, "loss": 2.5416, "step": 319730 }, { "epoch": 0.636993178630626, "grad_norm": 0.15952982008457184, "learning_rate": 0.002, "loss": 2.5544, "step": 319740 }, { "epoch": 0.637013100854265, "grad_norm": 0.18890273571014404, "learning_rate": 0.002, "loss": 2.5623, "step": 319750 }, { "epoch": 0.6370330230779039, "grad_norm": 0.16920705139636993, "learning_rate": 0.002, "loss": 2.5476, "step": 319760 }, { "epoch": 0.6370529453015428, "grad_norm": 0.16518865525722504, "learning_rate": 0.002, "loss": 2.5635, "step": 319770 }, { "epoch": 0.6370728675251817, "grad_norm": 0.16240577399730682, "learning_rate": 0.002, "loss": 2.5726, "step": 319780 }, { "epoch": 0.6370927897488206, "grad_norm": 0.16701553761959076, "learning_rate": 0.002, "loss": 2.5562, "step": 319790 }, { "epoch": 0.6371127119724596, "grad_norm": 0.16146758198738098, "learning_rate": 0.002, "loss": 2.5439, "step": 319800 }, { "epoch": 0.6371326341960984, "grad_norm": 0.16146495938301086, "learning_rate": 0.002, "loss": 2.549, "step": 319810 }, { "epoch": 0.6371525564197373, "grad_norm": 0.145870640873909, "learning_rate": 0.002, "loss": 2.5405, "step": 319820 }, { "epoch": 0.6371724786433762, "grad_norm": 0.15773363411426544, "learning_rate": 0.002, "loss": 2.5509, "step": 319830 }, { "epoch": 0.6371924008670151, "grad_norm": 0.21108968555927277, "learning_rate": 0.002, "loss": 2.567, "step": 319840 }, { "epoch": 0.6372123230906541, "grad_norm": 0.16289487481117249, "learning_rate": 0.002, "loss": 2.5326, "step": 319850 }, { "epoch": 0.637232245314293, "grad_norm": 0.15844550728797913, "learning_rate": 0.002, "loss": 2.5536, "step": 319860 }, { "epoch": 0.6372521675379319, "grad_norm": 0.16241157054901123, "learning_rate": 0.002, "loss": 2.5526, "step": 319870 }, { "epoch": 0.6372720897615708, "grad_norm": 0.1747090369462967, "learning_rate": 0.002, "loss": 2.5626, "step": 319880 }, { "epoch": 0.6372920119852098, "grad_norm": 0.17681291699409485, "learning_rate": 0.002, "loss": 2.5589, "step": 319890 }, { "epoch": 0.6373119342088487, "grad_norm": 0.1577553153038025, "learning_rate": 0.002, "loss": 2.5451, "step": 319900 }, { "epoch": 0.6373318564324876, "grad_norm": 0.2580205798149109, "learning_rate": 0.002, "loss": 2.5611, "step": 319910 }, { "epoch": 0.6373517786561265, "grad_norm": 0.1597091555595398, "learning_rate": 0.002, "loss": 2.5375, "step": 319920 }, { "epoch": 0.6373717008797654, "grad_norm": 0.1490756869316101, "learning_rate": 0.002, "loss": 2.5704, "step": 319930 }, { "epoch": 0.6373916231034044, "grad_norm": 0.1512693166732788, "learning_rate": 0.002, "loss": 2.5499, "step": 319940 }, { "epoch": 0.6374115453270432, "grad_norm": 0.1686876118183136, "learning_rate": 0.002, "loss": 2.5464, "step": 319950 }, { "epoch": 0.6374314675506821, "grad_norm": 0.15947316586971283, "learning_rate": 0.002, "loss": 2.5556, "step": 319960 }, { "epoch": 0.637451389774321, "grad_norm": 0.1607722043991089, "learning_rate": 0.002, "loss": 2.5516, "step": 319970 }, { "epoch": 0.6374713119979599, "grad_norm": 0.19491058588027954, "learning_rate": 0.002, "loss": 2.5782, "step": 319980 }, { "epoch": 0.6374912342215989, "grad_norm": 0.1620774120092392, "learning_rate": 0.002, "loss": 2.5597, "step": 319990 }, { "epoch": 0.6375111564452378, "grad_norm": 0.159882590174675, "learning_rate": 0.002, "loss": 2.5511, "step": 320000 }, { "epoch": 0.6375310786688767, "grad_norm": 0.1593199223279953, "learning_rate": 0.002, "loss": 2.5431, "step": 320010 }, { "epoch": 0.6375510008925156, "grad_norm": 0.18393336236476898, "learning_rate": 0.002, "loss": 2.5643, "step": 320020 }, { "epoch": 0.6375709231161545, "grad_norm": 0.13797827064990997, "learning_rate": 0.002, "loss": 2.5581, "step": 320030 }, { "epoch": 0.6375908453397935, "grad_norm": 0.19388094544410706, "learning_rate": 0.002, "loss": 2.5642, "step": 320040 }, { "epoch": 0.6376107675634324, "grad_norm": 0.16941143572330475, "learning_rate": 0.002, "loss": 2.5589, "step": 320050 }, { "epoch": 0.6376306897870713, "grad_norm": 0.22047357261180878, "learning_rate": 0.002, "loss": 2.541, "step": 320060 }, { "epoch": 0.6376506120107102, "grad_norm": 0.1350759118795395, "learning_rate": 0.002, "loss": 2.554, "step": 320070 }, { "epoch": 0.637670534234349, "grad_norm": 0.1894538551568985, "learning_rate": 0.002, "loss": 2.5633, "step": 320080 }, { "epoch": 0.637690456457988, "grad_norm": 0.14361615478992462, "learning_rate": 0.002, "loss": 2.5575, "step": 320090 }, { "epoch": 0.6377103786816269, "grad_norm": 0.17734268307685852, "learning_rate": 0.002, "loss": 2.5381, "step": 320100 }, { "epoch": 0.6377303009052658, "grad_norm": 0.15350155532360077, "learning_rate": 0.002, "loss": 2.5661, "step": 320110 }, { "epoch": 0.6377502231289047, "grad_norm": 0.1574832797050476, "learning_rate": 0.002, "loss": 2.5486, "step": 320120 }, { "epoch": 0.6377701453525436, "grad_norm": 0.16193720698356628, "learning_rate": 0.002, "loss": 2.5613, "step": 320130 }, { "epoch": 0.6377900675761826, "grad_norm": 0.17448154091835022, "learning_rate": 0.002, "loss": 2.5593, "step": 320140 }, { "epoch": 0.6378099897998215, "grad_norm": 0.18039613962173462, "learning_rate": 0.002, "loss": 2.5579, "step": 320150 }, { "epoch": 0.6378299120234604, "grad_norm": 0.15455971658229828, "learning_rate": 0.002, "loss": 2.5609, "step": 320160 }, { "epoch": 0.6378498342470993, "grad_norm": 0.15176238119602203, "learning_rate": 0.002, "loss": 2.5463, "step": 320170 }, { "epoch": 0.6378697564707383, "grad_norm": 0.1745324730873108, "learning_rate": 0.002, "loss": 2.567, "step": 320180 }, { "epoch": 0.6378896786943772, "grad_norm": 0.18034504354000092, "learning_rate": 0.002, "loss": 2.568, "step": 320190 }, { "epoch": 0.6379096009180161, "grad_norm": 0.13938041031360626, "learning_rate": 0.002, "loss": 2.5635, "step": 320200 }, { "epoch": 0.637929523141655, "grad_norm": 0.2253340482711792, "learning_rate": 0.002, "loss": 2.5537, "step": 320210 }, { "epoch": 0.6379494453652939, "grad_norm": 0.1917373687028885, "learning_rate": 0.002, "loss": 2.5637, "step": 320220 }, { "epoch": 0.6379693675889329, "grad_norm": 0.14422297477722168, "learning_rate": 0.002, "loss": 2.5669, "step": 320230 }, { "epoch": 0.6379892898125717, "grad_norm": 0.16390718519687653, "learning_rate": 0.002, "loss": 2.5535, "step": 320240 }, { "epoch": 0.6380092120362106, "grad_norm": 0.1537010669708252, "learning_rate": 0.002, "loss": 2.5483, "step": 320250 }, { "epoch": 0.6380291342598495, "grad_norm": 0.16246093809604645, "learning_rate": 0.002, "loss": 2.5641, "step": 320260 }, { "epoch": 0.6380490564834884, "grad_norm": 0.17368030548095703, "learning_rate": 0.002, "loss": 2.5532, "step": 320270 }, { "epoch": 0.6380689787071274, "grad_norm": 0.1447117179632187, "learning_rate": 0.002, "loss": 2.5476, "step": 320280 }, { "epoch": 0.6380889009307663, "grad_norm": 0.16143041849136353, "learning_rate": 0.002, "loss": 2.5412, "step": 320290 }, { "epoch": 0.6381088231544052, "grad_norm": 0.16833506524562836, "learning_rate": 0.002, "loss": 2.539, "step": 320300 }, { "epoch": 0.6381287453780441, "grad_norm": 0.2043910026550293, "learning_rate": 0.002, "loss": 2.5445, "step": 320310 }, { "epoch": 0.638148667601683, "grad_norm": 0.19448935985565186, "learning_rate": 0.002, "loss": 2.5482, "step": 320320 }, { "epoch": 0.638168589825322, "grad_norm": 0.17129993438720703, "learning_rate": 0.002, "loss": 2.5672, "step": 320330 }, { "epoch": 0.6381885120489609, "grad_norm": 0.14694862067699432, "learning_rate": 0.002, "loss": 2.542, "step": 320340 }, { "epoch": 0.6382084342725998, "grad_norm": 0.16865703463554382, "learning_rate": 0.002, "loss": 2.5449, "step": 320350 }, { "epoch": 0.6382283564962387, "grad_norm": 0.18670082092285156, "learning_rate": 0.002, "loss": 2.5537, "step": 320360 }, { "epoch": 0.6382482787198775, "grad_norm": 0.15167303383350372, "learning_rate": 0.002, "loss": 2.5515, "step": 320370 }, { "epoch": 0.6382682009435166, "grad_norm": 0.15444639325141907, "learning_rate": 0.002, "loss": 2.5595, "step": 320380 }, { "epoch": 0.6382881231671554, "grad_norm": 0.20433156192302704, "learning_rate": 0.002, "loss": 2.5422, "step": 320390 }, { "epoch": 0.6383080453907943, "grad_norm": 0.1520812064409256, "learning_rate": 0.002, "loss": 2.538, "step": 320400 }, { "epoch": 0.6383279676144332, "grad_norm": 0.17380410432815552, "learning_rate": 0.002, "loss": 2.5569, "step": 320410 }, { "epoch": 0.6383478898380721, "grad_norm": 0.16667059063911438, "learning_rate": 0.002, "loss": 2.5551, "step": 320420 }, { "epoch": 0.6383678120617111, "grad_norm": 0.204354390501976, "learning_rate": 0.002, "loss": 2.5521, "step": 320430 }, { "epoch": 0.63838773428535, "grad_norm": 0.16357408463954926, "learning_rate": 0.002, "loss": 2.5768, "step": 320440 }, { "epoch": 0.6384076565089889, "grad_norm": 0.18260791897773743, "learning_rate": 0.002, "loss": 2.5552, "step": 320450 }, { "epoch": 0.6384275787326278, "grad_norm": 0.16290150582790375, "learning_rate": 0.002, "loss": 2.5553, "step": 320460 }, { "epoch": 0.6384475009562668, "grad_norm": 0.16682210564613342, "learning_rate": 0.002, "loss": 2.5575, "step": 320470 }, { "epoch": 0.6384674231799057, "grad_norm": 0.1438043713569641, "learning_rate": 0.002, "loss": 2.5673, "step": 320480 }, { "epoch": 0.6384873454035446, "grad_norm": 0.1712970733642578, "learning_rate": 0.002, "loss": 2.5393, "step": 320490 }, { "epoch": 0.6385072676271835, "grad_norm": 0.13829095661640167, "learning_rate": 0.002, "loss": 2.5654, "step": 320500 }, { "epoch": 0.6385271898508224, "grad_norm": 0.1567583680152893, "learning_rate": 0.002, "loss": 2.5715, "step": 320510 }, { "epoch": 0.6385471120744614, "grad_norm": 0.16454297304153442, "learning_rate": 0.002, "loss": 2.5675, "step": 320520 }, { "epoch": 0.6385670342981002, "grad_norm": 0.15078099071979523, "learning_rate": 0.002, "loss": 2.5263, "step": 320530 }, { "epoch": 0.6385869565217391, "grad_norm": 0.18555785715579987, "learning_rate": 0.002, "loss": 2.5585, "step": 320540 }, { "epoch": 0.638606878745378, "grad_norm": 0.17665402591228485, "learning_rate": 0.002, "loss": 2.5599, "step": 320550 }, { "epoch": 0.6386268009690169, "grad_norm": 0.2043585479259491, "learning_rate": 0.002, "loss": 2.5685, "step": 320560 }, { "epoch": 0.6386467231926559, "grad_norm": 0.1841493546962738, "learning_rate": 0.002, "loss": 2.5514, "step": 320570 }, { "epoch": 0.6386666454162948, "grad_norm": 0.14446595311164856, "learning_rate": 0.002, "loss": 2.5382, "step": 320580 }, { "epoch": 0.6386865676399337, "grad_norm": 0.1744837611913681, "learning_rate": 0.002, "loss": 2.5366, "step": 320590 }, { "epoch": 0.6387064898635726, "grad_norm": 0.20880302786827087, "learning_rate": 0.002, "loss": 2.5371, "step": 320600 }, { "epoch": 0.6387264120872115, "grad_norm": 0.1642414927482605, "learning_rate": 0.002, "loss": 2.5686, "step": 320610 }, { "epoch": 0.6387463343108505, "grad_norm": 0.15058909356594086, "learning_rate": 0.002, "loss": 2.5413, "step": 320620 }, { "epoch": 0.6387662565344894, "grad_norm": 0.1739318072795868, "learning_rate": 0.002, "loss": 2.5528, "step": 320630 }, { "epoch": 0.6387861787581283, "grad_norm": 0.14907674491405487, "learning_rate": 0.002, "loss": 2.5449, "step": 320640 }, { "epoch": 0.6388061009817672, "grad_norm": 0.16679707169532776, "learning_rate": 0.002, "loss": 2.548, "step": 320650 }, { "epoch": 0.638826023205406, "grad_norm": 0.23727364838123322, "learning_rate": 0.002, "loss": 2.5702, "step": 320660 }, { "epoch": 0.638845945429045, "grad_norm": 0.17457662522792816, "learning_rate": 0.002, "loss": 2.5514, "step": 320670 }, { "epoch": 0.6388658676526839, "grad_norm": 0.13796886801719666, "learning_rate": 0.002, "loss": 2.545, "step": 320680 }, { "epoch": 0.6388857898763228, "grad_norm": 0.20027443766593933, "learning_rate": 0.002, "loss": 2.5677, "step": 320690 }, { "epoch": 0.6389057120999617, "grad_norm": 0.1580931842327118, "learning_rate": 0.002, "loss": 2.5556, "step": 320700 }, { "epoch": 0.6389256343236006, "grad_norm": 0.1804216504096985, "learning_rate": 0.002, "loss": 2.566, "step": 320710 }, { "epoch": 0.6389455565472396, "grad_norm": 0.17257238924503326, "learning_rate": 0.002, "loss": 2.5667, "step": 320720 }, { "epoch": 0.6389654787708785, "grad_norm": 0.17682579159736633, "learning_rate": 0.002, "loss": 2.5523, "step": 320730 }, { "epoch": 0.6389854009945174, "grad_norm": 0.160385861992836, "learning_rate": 0.002, "loss": 2.5492, "step": 320740 }, { "epoch": 0.6390053232181563, "grad_norm": 0.15465053915977478, "learning_rate": 0.002, "loss": 2.5683, "step": 320750 }, { "epoch": 0.6390252454417953, "grad_norm": 0.18601688742637634, "learning_rate": 0.002, "loss": 2.5596, "step": 320760 }, { "epoch": 0.6390451676654342, "grad_norm": 0.15963105857372284, "learning_rate": 0.002, "loss": 2.5437, "step": 320770 }, { "epoch": 0.6390650898890731, "grad_norm": 0.16659487783908844, "learning_rate": 0.002, "loss": 2.5448, "step": 320780 }, { "epoch": 0.639085012112712, "grad_norm": 0.16787701845169067, "learning_rate": 0.002, "loss": 2.5666, "step": 320790 }, { "epoch": 0.6391049343363508, "grad_norm": 0.16628849506378174, "learning_rate": 0.002, "loss": 2.5669, "step": 320800 }, { "epoch": 0.6391248565599899, "grad_norm": 0.16265414655208588, "learning_rate": 0.002, "loss": 2.5701, "step": 320810 }, { "epoch": 0.6391447787836287, "grad_norm": 0.2116764932870865, "learning_rate": 0.002, "loss": 2.5466, "step": 320820 }, { "epoch": 0.6391647010072676, "grad_norm": 0.20362861454486847, "learning_rate": 0.002, "loss": 2.5569, "step": 320830 }, { "epoch": 0.6391846232309065, "grad_norm": 0.16069301962852478, "learning_rate": 0.002, "loss": 2.5541, "step": 320840 }, { "epoch": 0.6392045454545454, "grad_norm": 0.24908524751663208, "learning_rate": 0.002, "loss": 2.5701, "step": 320850 }, { "epoch": 0.6392244676781844, "grad_norm": 0.18078073859214783, "learning_rate": 0.002, "loss": 2.551, "step": 320860 }, { "epoch": 0.6392443899018233, "grad_norm": 0.18456469476222992, "learning_rate": 0.002, "loss": 2.5622, "step": 320870 }, { "epoch": 0.6392643121254622, "grad_norm": 0.18641848862171173, "learning_rate": 0.002, "loss": 2.5377, "step": 320880 }, { "epoch": 0.6392842343491011, "grad_norm": 0.16566912829875946, "learning_rate": 0.002, "loss": 2.5502, "step": 320890 }, { "epoch": 0.63930415657274, "grad_norm": 0.14949949085712433, "learning_rate": 0.002, "loss": 2.5599, "step": 320900 }, { "epoch": 0.639324078796379, "grad_norm": 0.22603000700473785, "learning_rate": 0.002, "loss": 2.5665, "step": 320910 }, { "epoch": 0.6393440010200179, "grad_norm": 0.18344800174236298, "learning_rate": 0.002, "loss": 2.5546, "step": 320920 }, { "epoch": 0.6393639232436568, "grad_norm": 0.15587930381298065, "learning_rate": 0.002, "loss": 2.5591, "step": 320930 }, { "epoch": 0.6393838454672957, "grad_norm": 0.14034788310527802, "learning_rate": 0.002, "loss": 2.5344, "step": 320940 }, { "epoch": 0.6394037676909345, "grad_norm": 0.1907845288515091, "learning_rate": 0.002, "loss": 2.5445, "step": 320950 }, { "epoch": 0.6394236899145735, "grad_norm": 0.1954069435596466, "learning_rate": 0.002, "loss": 2.5489, "step": 320960 }, { "epoch": 0.6394436121382124, "grad_norm": 0.16980762779712677, "learning_rate": 0.002, "loss": 2.5823, "step": 320970 }, { "epoch": 0.6394635343618513, "grad_norm": 0.1872701793909073, "learning_rate": 0.002, "loss": 2.5595, "step": 320980 }, { "epoch": 0.6394834565854902, "grad_norm": 0.14861127734184265, "learning_rate": 0.002, "loss": 2.5622, "step": 320990 }, { "epoch": 0.6395033788091291, "grad_norm": 0.19625240564346313, "learning_rate": 0.002, "loss": 2.5495, "step": 321000 }, { "epoch": 0.6395233010327681, "grad_norm": 0.14982135593891144, "learning_rate": 0.002, "loss": 2.5733, "step": 321010 }, { "epoch": 0.639543223256407, "grad_norm": 0.1652946025133133, "learning_rate": 0.002, "loss": 2.563, "step": 321020 }, { "epoch": 0.6395631454800459, "grad_norm": 0.19911742210388184, "learning_rate": 0.002, "loss": 2.5564, "step": 321030 }, { "epoch": 0.6395830677036848, "grad_norm": 0.17591193318367004, "learning_rate": 0.002, "loss": 2.5563, "step": 321040 }, { "epoch": 0.6396029899273237, "grad_norm": 0.15804104506969452, "learning_rate": 0.002, "loss": 2.5602, "step": 321050 }, { "epoch": 0.6396229121509627, "grad_norm": 0.14553360641002655, "learning_rate": 0.002, "loss": 2.539, "step": 321060 }, { "epoch": 0.6396428343746016, "grad_norm": 0.1549185961484909, "learning_rate": 0.002, "loss": 2.5619, "step": 321070 }, { "epoch": 0.6396627565982405, "grad_norm": 0.20404353737831116, "learning_rate": 0.002, "loss": 2.5491, "step": 321080 }, { "epoch": 0.6396826788218793, "grad_norm": 0.15004342794418335, "learning_rate": 0.002, "loss": 2.565, "step": 321090 }, { "epoch": 0.6397026010455183, "grad_norm": 0.14708997309207916, "learning_rate": 0.002, "loss": 2.5569, "step": 321100 }, { "epoch": 0.6397225232691572, "grad_norm": 0.1574670523405075, "learning_rate": 0.002, "loss": 2.5506, "step": 321110 }, { "epoch": 0.6397424454927961, "grad_norm": 0.1935461014509201, "learning_rate": 0.002, "loss": 2.5665, "step": 321120 }, { "epoch": 0.639762367716435, "grad_norm": 0.13895130157470703, "learning_rate": 0.002, "loss": 2.5569, "step": 321130 }, { "epoch": 0.6397822899400739, "grad_norm": 0.16539600491523743, "learning_rate": 0.002, "loss": 2.5458, "step": 321140 }, { "epoch": 0.6398022121637129, "grad_norm": 0.15781621634960175, "learning_rate": 0.002, "loss": 2.5647, "step": 321150 }, { "epoch": 0.6398221343873518, "grad_norm": 0.15702156722545624, "learning_rate": 0.002, "loss": 2.5521, "step": 321160 }, { "epoch": 0.6398420566109907, "grad_norm": 0.14997056126594543, "learning_rate": 0.002, "loss": 2.5568, "step": 321170 }, { "epoch": 0.6398619788346296, "grad_norm": 0.1602170765399933, "learning_rate": 0.002, "loss": 2.5484, "step": 321180 }, { "epoch": 0.6398819010582685, "grad_norm": 0.17242324352264404, "learning_rate": 0.002, "loss": 2.5478, "step": 321190 }, { "epoch": 0.6399018232819075, "grad_norm": 0.15436004102230072, "learning_rate": 0.002, "loss": 2.5527, "step": 321200 }, { "epoch": 0.6399217455055464, "grad_norm": 0.18807372450828552, "learning_rate": 0.002, "loss": 2.5465, "step": 321210 }, { "epoch": 0.6399416677291853, "grad_norm": 0.1803736686706543, "learning_rate": 0.002, "loss": 2.552, "step": 321220 }, { "epoch": 0.6399615899528242, "grad_norm": 0.19068925082683563, "learning_rate": 0.002, "loss": 2.5294, "step": 321230 }, { "epoch": 0.639981512176463, "grad_norm": 0.18086862564086914, "learning_rate": 0.002, "loss": 2.5521, "step": 321240 }, { "epoch": 0.640001434400102, "grad_norm": 0.15659664571285248, "learning_rate": 0.002, "loss": 2.5492, "step": 321250 }, { "epoch": 0.6400213566237409, "grad_norm": 0.19928236305713654, "learning_rate": 0.002, "loss": 2.5628, "step": 321260 }, { "epoch": 0.6400412788473798, "grad_norm": 0.19988788664340973, "learning_rate": 0.002, "loss": 2.5456, "step": 321270 }, { "epoch": 0.6400612010710187, "grad_norm": 0.18665318191051483, "learning_rate": 0.002, "loss": 2.5615, "step": 321280 }, { "epoch": 0.6400811232946576, "grad_norm": 0.14698055386543274, "learning_rate": 0.002, "loss": 2.5609, "step": 321290 }, { "epoch": 0.6401010455182966, "grad_norm": 0.14479902386665344, "learning_rate": 0.002, "loss": 2.5519, "step": 321300 }, { "epoch": 0.6401209677419355, "grad_norm": 0.178983673453331, "learning_rate": 0.002, "loss": 2.5612, "step": 321310 }, { "epoch": 0.6401408899655744, "grad_norm": 0.16516974568367004, "learning_rate": 0.002, "loss": 2.5626, "step": 321320 }, { "epoch": 0.6401608121892133, "grad_norm": 0.19041641056537628, "learning_rate": 0.002, "loss": 2.5518, "step": 321330 }, { "epoch": 0.6401807344128522, "grad_norm": 0.1728266030550003, "learning_rate": 0.002, "loss": 2.5475, "step": 321340 }, { "epoch": 0.6402006566364912, "grad_norm": 0.1574026644229889, "learning_rate": 0.002, "loss": 2.5545, "step": 321350 }, { "epoch": 0.6402205788601301, "grad_norm": 0.17235425114631653, "learning_rate": 0.002, "loss": 2.5501, "step": 321360 }, { "epoch": 0.640240501083769, "grad_norm": 0.15308620035648346, "learning_rate": 0.002, "loss": 2.5651, "step": 321370 }, { "epoch": 0.6402604233074078, "grad_norm": 0.1564524918794632, "learning_rate": 0.002, "loss": 2.532, "step": 321380 }, { "epoch": 0.6402803455310468, "grad_norm": 0.16397936642169952, "learning_rate": 0.002, "loss": 2.5562, "step": 321390 }, { "epoch": 0.6403002677546857, "grad_norm": 0.16245761513710022, "learning_rate": 0.002, "loss": 2.5584, "step": 321400 }, { "epoch": 0.6403201899783246, "grad_norm": 0.1564456820487976, "learning_rate": 0.002, "loss": 2.545, "step": 321410 }, { "epoch": 0.6403401122019635, "grad_norm": 0.18658681213855743, "learning_rate": 0.002, "loss": 2.5739, "step": 321420 }, { "epoch": 0.6403600344256024, "grad_norm": 0.16018296778202057, "learning_rate": 0.002, "loss": 2.5454, "step": 321430 }, { "epoch": 0.6403799566492414, "grad_norm": 0.27300330996513367, "learning_rate": 0.002, "loss": 2.5288, "step": 321440 }, { "epoch": 0.6403998788728803, "grad_norm": 0.1928301453590393, "learning_rate": 0.002, "loss": 2.5444, "step": 321450 }, { "epoch": 0.6404198010965192, "grad_norm": 0.16825823485851288, "learning_rate": 0.002, "loss": 2.5519, "step": 321460 }, { "epoch": 0.6404397233201581, "grad_norm": 0.1573767513036728, "learning_rate": 0.002, "loss": 2.5551, "step": 321470 }, { "epoch": 0.640459645543797, "grad_norm": 0.14689305424690247, "learning_rate": 0.002, "loss": 2.5628, "step": 321480 }, { "epoch": 0.640479567767436, "grad_norm": 0.15125365555286407, "learning_rate": 0.002, "loss": 2.5677, "step": 321490 }, { "epoch": 0.6404994899910749, "grad_norm": 0.24362584948539734, "learning_rate": 0.002, "loss": 2.5426, "step": 321500 }, { "epoch": 0.6405194122147138, "grad_norm": 0.16249006986618042, "learning_rate": 0.002, "loss": 2.5623, "step": 321510 }, { "epoch": 0.6405393344383526, "grad_norm": 0.13403375446796417, "learning_rate": 0.002, "loss": 2.5502, "step": 321520 }, { "epoch": 0.6405592566619915, "grad_norm": 0.14560474455356598, "learning_rate": 0.002, "loss": 2.5556, "step": 321530 }, { "epoch": 0.6405791788856305, "grad_norm": 0.1436339169740677, "learning_rate": 0.002, "loss": 2.5714, "step": 321540 }, { "epoch": 0.6405991011092694, "grad_norm": 0.18157900869846344, "learning_rate": 0.002, "loss": 2.5653, "step": 321550 }, { "epoch": 0.6406190233329083, "grad_norm": 0.1709180623292923, "learning_rate": 0.002, "loss": 2.553, "step": 321560 }, { "epoch": 0.6406389455565472, "grad_norm": 0.16614976525306702, "learning_rate": 0.002, "loss": 2.5578, "step": 321570 }, { "epoch": 0.6406588677801861, "grad_norm": 0.20870070159435272, "learning_rate": 0.002, "loss": 2.5401, "step": 321580 }, { "epoch": 0.6406787900038251, "grad_norm": 0.151839479804039, "learning_rate": 0.002, "loss": 2.5569, "step": 321590 }, { "epoch": 0.640698712227464, "grad_norm": 0.14819300174713135, "learning_rate": 0.002, "loss": 2.5547, "step": 321600 }, { "epoch": 0.6407186344511029, "grad_norm": 0.14068736135959625, "learning_rate": 0.002, "loss": 2.5482, "step": 321610 }, { "epoch": 0.6407385566747418, "grad_norm": 0.14680737257003784, "learning_rate": 0.002, "loss": 2.5588, "step": 321620 }, { "epoch": 0.6407584788983807, "grad_norm": 0.16039973497390747, "learning_rate": 0.002, "loss": 2.5408, "step": 321630 }, { "epoch": 0.6407784011220197, "grad_norm": 0.14522922039031982, "learning_rate": 0.002, "loss": 2.5652, "step": 321640 }, { "epoch": 0.6407983233456586, "grad_norm": 0.18446367979049683, "learning_rate": 0.002, "loss": 2.5443, "step": 321650 }, { "epoch": 0.6408182455692975, "grad_norm": 0.16251240670681, "learning_rate": 0.002, "loss": 2.5471, "step": 321660 }, { "epoch": 0.6408381677929363, "grad_norm": 0.14871437847614288, "learning_rate": 0.002, "loss": 2.563, "step": 321670 }, { "epoch": 0.6408580900165753, "grad_norm": 0.2044888138771057, "learning_rate": 0.002, "loss": 2.5521, "step": 321680 }, { "epoch": 0.6408780122402142, "grad_norm": 0.168989360332489, "learning_rate": 0.002, "loss": 2.5484, "step": 321690 }, { "epoch": 0.6408979344638531, "grad_norm": 0.15753932297229767, "learning_rate": 0.002, "loss": 2.5552, "step": 321700 }, { "epoch": 0.640917856687492, "grad_norm": 0.14552822709083557, "learning_rate": 0.002, "loss": 2.5566, "step": 321710 }, { "epoch": 0.6409377789111309, "grad_norm": 0.13665266335010529, "learning_rate": 0.002, "loss": 2.5437, "step": 321720 }, { "epoch": 0.6409577011347699, "grad_norm": 0.21554836630821228, "learning_rate": 0.002, "loss": 2.5478, "step": 321730 }, { "epoch": 0.6409776233584088, "grad_norm": 0.16438831388950348, "learning_rate": 0.002, "loss": 2.5609, "step": 321740 }, { "epoch": 0.6409975455820477, "grad_norm": 0.19102492928504944, "learning_rate": 0.002, "loss": 2.5721, "step": 321750 }, { "epoch": 0.6410174678056866, "grad_norm": 0.17934304475784302, "learning_rate": 0.002, "loss": 2.5691, "step": 321760 }, { "epoch": 0.6410373900293255, "grad_norm": 0.14449656009674072, "learning_rate": 0.002, "loss": 2.5519, "step": 321770 }, { "epoch": 0.6410573122529645, "grad_norm": 0.17439676821231842, "learning_rate": 0.002, "loss": 2.5559, "step": 321780 }, { "epoch": 0.6410772344766034, "grad_norm": 0.140436589717865, "learning_rate": 0.002, "loss": 2.572, "step": 321790 }, { "epoch": 0.6410971567002423, "grad_norm": 0.15040837228298187, "learning_rate": 0.002, "loss": 2.5639, "step": 321800 }, { "epoch": 0.6411170789238811, "grad_norm": 0.1817106157541275, "learning_rate": 0.002, "loss": 2.5534, "step": 321810 }, { "epoch": 0.64113700114752, "grad_norm": 0.14000780880451202, "learning_rate": 0.002, "loss": 2.5453, "step": 321820 }, { "epoch": 0.641156923371159, "grad_norm": 0.15853853523731232, "learning_rate": 0.002, "loss": 2.57, "step": 321830 }, { "epoch": 0.6411768455947979, "grad_norm": 0.1500501185655594, "learning_rate": 0.002, "loss": 2.5339, "step": 321840 }, { "epoch": 0.6411967678184368, "grad_norm": 0.14026287198066711, "learning_rate": 0.002, "loss": 2.554, "step": 321850 }, { "epoch": 0.6412166900420757, "grad_norm": 0.20231284201145172, "learning_rate": 0.002, "loss": 2.5476, "step": 321860 }, { "epoch": 0.6412366122657146, "grad_norm": 0.15007054805755615, "learning_rate": 0.002, "loss": 2.551, "step": 321870 }, { "epoch": 0.6412565344893536, "grad_norm": 0.20306667685508728, "learning_rate": 0.002, "loss": 2.5597, "step": 321880 }, { "epoch": 0.6412764567129925, "grad_norm": 0.18619322776794434, "learning_rate": 0.002, "loss": 2.56, "step": 321890 }, { "epoch": 0.6412963789366314, "grad_norm": 0.15367616713047028, "learning_rate": 0.002, "loss": 2.5463, "step": 321900 }, { "epoch": 0.6413163011602703, "grad_norm": 0.16569475829601288, "learning_rate": 0.002, "loss": 2.5499, "step": 321910 }, { "epoch": 0.6413362233839092, "grad_norm": 0.1686169058084488, "learning_rate": 0.002, "loss": 2.5587, "step": 321920 }, { "epoch": 0.6413561456075482, "grad_norm": 0.16179904341697693, "learning_rate": 0.002, "loss": 2.5578, "step": 321930 }, { "epoch": 0.6413760678311871, "grad_norm": 0.14292070269584656, "learning_rate": 0.002, "loss": 2.5612, "step": 321940 }, { "epoch": 0.641395990054826, "grad_norm": 0.15940292179584503, "learning_rate": 0.002, "loss": 2.5482, "step": 321950 }, { "epoch": 0.6414159122784648, "grad_norm": 0.1510925590991974, "learning_rate": 0.002, "loss": 2.5438, "step": 321960 }, { "epoch": 0.6414358345021038, "grad_norm": 0.22563685476779938, "learning_rate": 0.002, "loss": 2.5744, "step": 321970 }, { "epoch": 0.6414557567257427, "grad_norm": 0.16007229685783386, "learning_rate": 0.002, "loss": 2.554, "step": 321980 }, { "epoch": 0.6414756789493816, "grad_norm": 0.15536098182201385, "learning_rate": 0.002, "loss": 2.5357, "step": 321990 }, { "epoch": 0.6414956011730205, "grad_norm": 0.1641324907541275, "learning_rate": 0.002, "loss": 2.553, "step": 322000 }, { "epoch": 0.6415155233966594, "grad_norm": 0.17999856173992157, "learning_rate": 0.002, "loss": 2.5648, "step": 322010 }, { "epoch": 0.6415354456202984, "grad_norm": 0.17226897180080414, "learning_rate": 0.002, "loss": 2.5652, "step": 322020 }, { "epoch": 0.6415553678439373, "grad_norm": 0.13506819307804108, "learning_rate": 0.002, "loss": 2.5395, "step": 322030 }, { "epoch": 0.6415752900675762, "grad_norm": 0.2093317210674286, "learning_rate": 0.002, "loss": 2.5539, "step": 322040 }, { "epoch": 0.6415952122912151, "grad_norm": 0.16368405520915985, "learning_rate": 0.002, "loss": 2.5663, "step": 322050 }, { "epoch": 0.641615134514854, "grad_norm": 0.16452749073505402, "learning_rate": 0.002, "loss": 2.5575, "step": 322060 }, { "epoch": 0.641635056738493, "grad_norm": 0.15014523267745972, "learning_rate": 0.002, "loss": 2.5501, "step": 322070 }, { "epoch": 0.6416549789621319, "grad_norm": 0.17210616171360016, "learning_rate": 0.002, "loss": 2.5498, "step": 322080 }, { "epoch": 0.6416749011857708, "grad_norm": 0.17050927877426147, "learning_rate": 0.002, "loss": 2.5442, "step": 322090 }, { "epoch": 0.6416948234094096, "grad_norm": 0.15701119601726532, "learning_rate": 0.002, "loss": 2.5525, "step": 322100 }, { "epoch": 0.6417147456330485, "grad_norm": 0.16389162838459015, "learning_rate": 0.002, "loss": 2.5626, "step": 322110 }, { "epoch": 0.6417346678566875, "grad_norm": 0.19063013792037964, "learning_rate": 0.002, "loss": 2.5463, "step": 322120 }, { "epoch": 0.6417545900803264, "grad_norm": 0.13683490455150604, "learning_rate": 0.002, "loss": 2.5585, "step": 322130 }, { "epoch": 0.6417745123039653, "grad_norm": 0.19845691323280334, "learning_rate": 0.002, "loss": 2.541, "step": 322140 }, { "epoch": 0.6417944345276042, "grad_norm": 0.15545520186424255, "learning_rate": 0.002, "loss": 2.5573, "step": 322150 }, { "epoch": 0.6418143567512431, "grad_norm": 0.15714971721172333, "learning_rate": 0.002, "loss": 2.5738, "step": 322160 }, { "epoch": 0.6418342789748821, "grad_norm": 0.18261203169822693, "learning_rate": 0.002, "loss": 2.552, "step": 322170 }, { "epoch": 0.641854201198521, "grad_norm": 0.163359597325325, "learning_rate": 0.002, "loss": 2.5717, "step": 322180 }, { "epoch": 0.6418741234221599, "grad_norm": 0.18131640553474426, "learning_rate": 0.002, "loss": 2.5415, "step": 322190 }, { "epoch": 0.6418940456457988, "grad_norm": 0.1712607592344284, "learning_rate": 0.002, "loss": 2.5389, "step": 322200 }, { "epoch": 0.6419139678694377, "grad_norm": 0.14849776029586792, "learning_rate": 0.002, "loss": 2.5503, "step": 322210 }, { "epoch": 0.6419338900930767, "grad_norm": 0.16433930397033691, "learning_rate": 0.002, "loss": 2.5429, "step": 322220 }, { "epoch": 0.6419538123167156, "grad_norm": 0.19504909217357635, "learning_rate": 0.002, "loss": 2.5513, "step": 322230 }, { "epoch": 0.6419737345403544, "grad_norm": 0.15566250681877136, "learning_rate": 0.002, "loss": 2.5576, "step": 322240 }, { "epoch": 0.6419936567639933, "grad_norm": 0.17956869304180145, "learning_rate": 0.002, "loss": 2.5383, "step": 322250 }, { "epoch": 0.6420135789876323, "grad_norm": 0.1532244086265564, "learning_rate": 0.002, "loss": 2.5517, "step": 322260 }, { "epoch": 0.6420335012112712, "grad_norm": 0.16217149794101715, "learning_rate": 0.002, "loss": 2.5541, "step": 322270 }, { "epoch": 0.6420534234349101, "grad_norm": 0.1732640117406845, "learning_rate": 0.002, "loss": 2.5439, "step": 322280 }, { "epoch": 0.642073345658549, "grad_norm": 0.16696423292160034, "learning_rate": 0.002, "loss": 2.566, "step": 322290 }, { "epoch": 0.6420932678821879, "grad_norm": 0.19698414206504822, "learning_rate": 0.002, "loss": 2.545, "step": 322300 }, { "epoch": 0.6421131901058269, "grad_norm": 0.15814469754695892, "learning_rate": 0.002, "loss": 2.5411, "step": 322310 }, { "epoch": 0.6421331123294658, "grad_norm": 0.15916459262371063, "learning_rate": 0.002, "loss": 2.5618, "step": 322320 }, { "epoch": 0.6421530345531047, "grad_norm": 0.16671501100063324, "learning_rate": 0.002, "loss": 2.5605, "step": 322330 }, { "epoch": 0.6421729567767436, "grad_norm": 0.15651766955852509, "learning_rate": 0.002, "loss": 2.5648, "step": 322340 }, { "epoch": 0.6421928790003825, "grad_norm": 0.20684392750263214, "learning_rate": 0.002, "loss": 2.5681, "step": 322350 }, { "epoch": 0.6422128012240215, "grad_norm": 0.14718565344810486, "learning_rate": 0.002, "loss": 2.5646, "step": 322360 }, { "epoch": 0.6422327234476604, "grad_norm": 0.16409452259540558, "learning_rate": 0.002, "loss": 2.5566, "step": 322370 }, { "epoch": 0.6422526456712992, "grad_norm": 0.1575763076543808, "learning_rate": 0.002, "loss": 2.5433, "step": 322380 }, { "epoch": 0.6422725678949381, "grad_norm": 0.1818760335445404, "learning_rate": 0.002, "loss": 2.545, "step": 322390 }, { "epoch": 0.642292490118577, "grad_norm": 0.1595354527235031, "learning_rate": 0.002, "loss": 2.5621, "step": 322400 }, { "epoch": 0.642312412342216, "grad_norm": 0.16411109268665314, "learning_rate": 0.002, "loss": 2.5577, "step": 322410 }, { "epoch": 0.6423323345658549, "grad_norm": 0.14072780311107635, "learning_rate": 0.002, "loss": 2.5638, "step": 322420 }, { "epoch": 0.6423522567894938, "grad_norm": 0.1536269187927246, "learning_rate": 0.002, "loss": 2.5547, "step": 322430 }, { "epoch": 0.6423721790131327, "grad_norm": 0.17351415753364563, "learning_rate": 0.002, "loss": 2.5497, "step": 322440 }, { "epoch": 0.6423921012367716, "grad_norm": 0.1838168501853943, "learning_rate": 0.002, "loss": 2.558, "step": 322450 }, { "epoch": 0.6424120234604106, "grad_norm": 0.20216308534145355, "learning_rate": 0.002, "loss": 2.5486, "step": 322460 }, { "epoch": 0.6424319456840495, "grad_norm": 0.13874341547489166, "learning_rate": 0.002, "loss": 2.5537, "step": 322470 }, { "epoch": 0.6424518679076884, "grad_norm": 0.16534128785133362, "learning_rate": 0.002, "loss": 2.5476, "step": 322480 }, { "epoch": 0.6424717901313273, "grad_norm": 0.16268441081047058, "learning_rate": 0.002, "loss": 2.5521, "step": 322490 }, { "epoch": 0.6424917123549662, "grad_norm": 0.18803653120994568, "learning_rate": 0.002, "loss": 2.5606, "step": 322500 }, { "epoch": 0.6425116345786052, "grad_norm": 0.15644292533397675, "learning_rate": 0.002, "loss": 2.5678, "step": 322510 }, { "epoch": 0.642531556802244, "grad_norm": 0.18109911680221558, "learning_rate": 0.002, "loss": 2.5485, "step": 322520 }, { "epoch": 0.6425514790258829, "grad_norm": 0.14254631102085114, "learning_rate": 0.002, "loss": 2.5418, "step": 322530 }, { "epoch": 0.6425714012495218, "grad_norm": 0.18538996577262878, "learning_rate": 0.002, "loss": 2.5623, "step": 322540 }, { "epoch": 0.6425913234731607, "grad_norm": 0.1547044962644577, "learning_rate": 0.002, "loss": 2.5538, "step": 322550 }, { "epoch": 0.6426112456967997, "grad_norm": 0.17067553102970123, "learning_rate": 0.002, "loss": 2.5627, "step": 322560 }, { "epoch": 0.6426311679204386, "grad_norm": 0.18117906153202057, "learning_rate": 0.002, "loss": 2.5626, "step": 322570 }, { "epoch": 0.6426510901440775, "grad_norm": 0.18971869349479675, "learning_rate": 0.002, "loss": 2.5475, "step": 322580 }, { "epoch": 0.6426710123677164, "grad_norm": 0.1626635491847992, "learning_rate": 0.002, "loss": 2.5636, "step": 322590 }, { "epoch": 0.6426909345913554, "grad_norm": 0.21515247225761414, "learning_rate": 0.002, "loss": 2.5438, "step": 322600 }, { "epoch": 0.6427108568149943, "grad_norm": 0.141759991645813, "learning_rate": 0.002, "loss": 2.5575, "step": 322610 }, { "epoch": 0.6427307790386332, "grad_norm": 0.15458615124225616, "learning_rate": 0.002, "loss": 2.5322, "step": 322620 }, { "epoch": 0.6427507012622721, "grad_norm": 0.14314088225364685, "learning_rate": 0.002, "loss": 2.5507, "step": 322630 }, { "epoch": 0.642770623485911, "grad_norm": 0.1642332822084427, "learning_rate": 0.002, "loss": 2.5454, "step": 322640 }, { "epoch": 0.64279054570955, "grad_norm": 0.16585157811641693, "learning_rate": 0.002, "loss": 2.5422, "step": 322650 }, { "epoch": 0.6428104679331889, "grad_norm": 0.22365957498550415, "learning_rate": 0.002, "loss": 2.5542, "step": 322660 }, { "epoch": 0.6428303901568277, "grad_norm": 0.1416959911584854, "learning_rate": 0.002, "loss": 2.5476, "step": 322670 }, { "epoch": 0.6428503123804666, "grad_norm": 0.18491612374782562, "learning_rate": 0.002, "loss": 2.5434, "step": 322680 }, { "epoch": 0.6428702346041055, "grad_norm": 0.15250813961029053, "learning_rate": 0.002, "loss": 2.5425, "step": 322690 }, { "epoch": 0.6428901568277445, "grad_norm": 0.15604953467845917, "learning_rate": 0.002, "loss": 2.5584, "step": 322700 }, { "epoch": 0.6429100790513834, "grad_norm": 0.1484019160270691, "learning_rate": 0.002, "loss": 2.5524, "step": 322710 }, { "epoch": 0.6429300012750223, "grad_norm": 0.16938473284244537, "learning_rate": 0.002, "loss": 2.5639, "step": 322720 }, { "epoch": 0.6429499234986612, "grad_norm": 0.1687333881855011, "learning_rate": 0.002, "loss": 2.5439, "step": 322730 }, { "epoch": 0.6429698457223001, "grad_norm": 0.18037942051887512, "learning_rate": 0.002, "loss": 2.5604, "step": 322740 }, { "epoch": 0.6429897679459391, "grad_norm": 0.17537692189216614, "learning_rate": 0.002, "loss": 2.5592, "step": 322750 }, { "epoch": 0.643009690169578, "grad_norm": 0.1641613394021988, "learning_rate": 0.002, "loss": 2.5557, "step": 322760 }, { "epoch": 0.6430296123932169, "grad_norm": 0.1553674191236496, "learning_rate": 0.002, "loss": 2.5631, "step": 322770 }, { "epoch": 0.6430495346168558, "grad_norm": 0.15114131569862366, "learning_rate": 0.002, "loss": 2.5583, "step": 322780 }, { "epoch": 0.6430694568404947, "grad_norm": 0.13757117092609406, "learning_rate": 0.002, "loss": 2.5429, "step": 322790 }, { "epoch": 0.6430893790641337, "grad_norm": 0.17169642448425293, "learning_rate": 0.002, "loss": 2.5395, "step": 322800 }, { "epoch": 0.6431093012877726, "grad_norm": 0.16975724697113037, "learning_rate": 0.002, "loss": 2.5464, "step": 322810 }, { "epoch": 0.6431292235114114, "grad_norm": 0.18504959344863892, "learning_rate": 0.002, "loss": 2.5613, "step": 322820 }, { "epoch": 0.6431491457350503, "grad_norm": 0.15544158220291138, "learning_rate": 0.002, "loss": 2.5496, "step": 322830 }, { "epoch": 0.6431690679586892, "grad_norm": 0.16061590611934662, "learning_rate": 0.002, "loss": 2.5536, "step": 322840 }, { "epoch": 0.6431889901823282, "grad_norm": 0.15899211168289185, "learning_rate": 0.002, "loss": 2.5689, "step": 322850 }, { "epoch": 0.6432089124059671, "grad_norm": 0.1609799563884735, "learning_rate": 0.002, "loss": 2.5437, "step": 322860 }, { "epoch": 0.643228834629606, "grad_norm": 0.14145837724208832, "learning_rate": 0.002, "loss": 2.5521, "step": 322870 }, { "epoch": 0.6432487568532449, "grad_norm": 0.17120414972305298, "learning_rate": 0.002, "loss": 2.5712, "step": 322880 }, { "epoch": 0.6432686790768839, "grad_norm": 0.15440921485424042, "learning_rate": 0.002, "loss": 2.5473, "step": 322890 }, { "epoch": 0.6432886013005228, "grad_norm": 0.2693881690502167, "learning_rate": 0.002, "loss": 2.5641, "step": 322900 }, { "epoch": 0.6433085235241617, "grad_norm": 0.16937048733234406, "learning_rate": 0.002, "loss": 2.5648, "step": 322910 }, { "epoch": 0.6433284457478006, "grad_norm": 0.16333238780498505, "learning_rate": 0.002, "loss": 2.539, "step": 322920 }, { "epoch": 0.6433483679714395, "grad_norm": 0.16861213743686676, "learning_rate": 0.002, "loss": 2.5515, "step": 322930 }, { "epoch": 0.6433682901950785, "grad_norm": 0.17790475487709045, "learning_rate": 0.002, "loss": 2.5616, "step": 322940 }, { "epoch": 0.6433882124187174, "grad_norm": 0.1693117618560791, "learning_rate": 0.002, "loss": 2.5533, "step": 322950 }, { "epoch": 0.6434081346423562, "grad_norm": 0.15329734981060028, "learning_rate": 0.002, "loss": 2.557, "step": 322960 }, { "epoch": 0.6434280568659951, "grad_norm": 0.15794476866722107, "learning_rate": 0.002, "loss": 2.5643, "step": 322970 }, { "epoch": 0.643447979089634, "grad_norm": 0.16349737346172333, "learning_rate": 0.002, "loss": 2.5684, "step": 322980 }, { "epoch": 0.643467901313273, "grad_norm": 0.1687316745519638, "learning_rate": 0.002, "loss": 2.5542, "step": 322990 }, { "epoch": 0.6434878235369119, "grad_norm": 0.16971446573734283, "learning_rate": 0.002, "loss": 2.5463, "step": 323000 }, { "epoch": 0.6435077457605508, "grad_norm": 0.1745414286851883, "learning_rate": 0.002, "loss": 2.5522, "step": 323010 }, { "epoch": 0.6435276679841897, "grad_norm": 0.18242968618869781, "learning_rate": 0.002, "loss": 2.5419, "step": 323020 }, { "epoch": 0.6435475902078286, "grad_norm": 0.14114521443843842, "learning_rate": 0.002, "loss": 2.5611, "step": 323030 }, { "epoch": 0.6435675124314676, "grad_norm": 0.1652563214302063, "learning_rate": 0.002, "loss": 2.5473, "step": 323040 }, { "epoch": 0.6435874346551065, "grad_norm": 0.17476533353328705, "learning_rate": 0.002, "loss": 2.5596, "step": 323050 }, { "epoch": 0.6436073568787454, "grad_norm": 0.16242673993110657, "learning_rate": 0.002, "loss": 2.5446, "step": 323060 }, { "epoch": 0.6436272791023843, "grad_norm": 0.1640046387910843, "learning_rate": 0.002, "loss": 2.5712, "step": 323070 }, { "epoch": 0.6436472013260232, "grad_norm": 0.14557220041751862, "learning_rate": 0.002, "loss": 2.5617, "step": 323080 }, { "epoch": 0.6436671235496622, "grad_norm": 0.17543597519397736, "learning_rate": 0.002, "loss": 2.5593, "step": 323090 }, { "epoch": 0.643687045773301, "grad_norm": 0.1554194837808609, "learning_rate": 0.002, "loss": 2.5373, "step": 323100 }, { "epoch": 0.6437069679969399, "grad_norm": 0.14620187878608704, "learning_rate": 0.002, "loss": 2.5684, "step": 323110 }, { "epoch": 0.6437268902205788, "grad_norm": 0.15558508038520813, "learning_rate": 0.002, "loss": 2.5512, "step": 323120 }, { "epoch": 0.6437468124442177, "grad_norm": 0.15414617955684662, "learning_rate": 0.002, "loss": 2.5506, "step": 323130 }, { "epoch": 0.6437667346678567, "grad_norm": 0.15299880504608154, "learning_rate": 0.002, "loss": 2.5548, "step": 323140 }, { "epoch": 0.6437866568914956, "grad_norm": 0.17251528799533844, "learning_rate": 0.002, "loss": 2.5603, "step": 323150 }, { "epoch": 0.6438065791151345, "grad_norm": 0.15448814630508423, "learning_rate": 0.002, "loss": 2.5782, "step": 323160 }, { "epoch": 0.6438265013387734, "grad_norm": 0.19897238910198212, "learning_rate": 0.002, "loss": 2.5472, "step": 323170 }, { "epoch": 0.6438464235624124, "grad_norm": 0.16493549942970276, "learning_rate": 0.002, "loss": 2.5478, "step": 323180 }, { "epoch": 0.6438663457860513, "grad_norm": 0.19244509935379028, "learning_rate": 0.002, "loss": 2.5516, "step": 323190 }, { "epoch": 0.6438862680096902, "grad_norm": 0.17685188353061676, "learning_rate": 0.002, "loss": 2.5677, "step": 323200 }, { "epoch": 0.6439061902333291, "grad_norm": 0.17089271545410156, "learning_rate": 0.002, "loss": 2.5394, "step": 323210 }, { "epoch": 0.643926112456968, "grad_norm": 0.20759548246860504, "learning_rate": 0.002, "loss": 2.542, "step": 323220 }, { "epoch": 0.643946034680607, "grad_norm": 0.15681783854961395, "learning_rate": 0.002, "loss": 2.5477, "step": 323230 }, { "epoch": 0.6439659569042459, "grad_norm": 0.1912621259689331, "learning_rate": 0.002, "loss": 2.5607, "step": 323240 }, { "epoch": 0.6439858791278847, "grad_norm": 0.16267329454421997, "learning_rate": 0.002, "loss": 2.5527, "step": 323250 }, { "epoch": 0.6440058013515236, "grad_norm": 0.16016130149364471, "learning_rate": 0.002, "loss": 2.5521, "step": 323260 }, { "epoch": 0.6440257235751625, "grad_norm": 0.17964163422584534, "learning_rate": 0.002, "loss": 2.5546, "step": 323270 }, { "epoch": 0.6440456457988015, "grad_norm": 0.18688710033893585, "learning_rate": 0.002, "loss": 2.5593, "step": 323280 }, { "epoch": 0.6440655680224404, "grad_norm": 0.1778809130191803, "learning_rate": 0.002, "loss": 2.5431, "step": 323290 }, { "epoch": 0.6440854902460793, "grad_norm": 0.1642014980316162, "learning_rate": 0.002, "loss": 2.5532, "step": 323300 }, { "epoch": 0.6441054124697182, "grad_norm": 0.15417519211769104, "learning_rate": 0.002, "loss": 2.5491, "step": 323310 }, { "epoch": 0.6441253346933571, "grad_norm": 0.20547838509082794, "learning_rate": 0.002, "loss": 2.5499, "step": 323320 }, { "epoch": 0.6441452569169961, "grad_norm": 0.17223280668258667, "learning_rate": 0.002, "loss": 2.5502, "step": 323330 }, { "epoch": 0.644165179140635, "grad_norm": 0.17657159268856049, "learning_rate": 0.002, "loss": 2.5634, "step": 323340 }, { "epoch": 0.6441851013642739, "grad_norm": 0.17123323678970337, "learning_rate": 0.002, "loss": 2.5594, "step": 323350 }, { "epoch": 0.6442050235879128, "grad_norm": 0.20421762764453888, "learning_rate": 0.002, "loss": 2.5478, "step": 323360 }, { "epoch": 0.6442249458115517, "grad_norm": 0.1534707099199295, "learning_rate": 0.002, "loss": 2.5495, "step": 323370 }, { "epoch": 0.6442448680351907, "grad_norm": 0.15505445003509521, "learning_rate": 0.002, "loss": 2.5443, "step": 323380 }, { "epoch": 0.6442647902588295, "grad_norm": 0.1636618822813034, "learning_rate": 0.002, "loss": 2.5493, "step": 323390 }, { "epoch": 0.6442847124824684, "grad_norm": 0.15880925953388214, "learning_rate": 0.002, "loss": 2.5544, "step": 323400 }, { "epoch": 0.6443046347061073, "grad_norm": 0.16828209161758423, "learning_rate": 0.002, "loss": 2.5486, "step": 323410 }, { "epoch": 0.6443245569297462, "grad_norm": 0.13725872337818146, "learning_rate": 0.002, "loss": 2.5544, "step": 323420 }, { "epoch": 0.6443444791533852, "grad_norm": 0.1586376279592514, "learning_rate": 0.002, "loss": 2.5514, "step": 323430 }, { "epoch": 0.6443644013770241, "grad_norm": 0.14952901005744934, "learning_rate": 0.002, "loss": 2.5653, "step": 323440 }, { "epoch": 0.644384323600663, "grad_norm": 0.21268142759799957, "learning_rate": 0.002, "loss": 2.5538, "step": 323450 }, { "epoch": 0.6444042458243019, "grad_norm": 0.16362249851226807, "learning_rate": 0.002, "loss": 2.5493, "step": 323460 }, { "epoch": 0.6444241680479409, "grad_norm": 0.19989325106143951, "learning_rate": 0.002, "loss": 2.5515, "step": 323470 }, { "epoch": 0.6444440902715798, "grad_norm": 0.18008488416671753, "learning_rate": 0.002, "loss": 2.5597, "step": 323480 }, { "epoch": 0.6444640124952187, "grad_norm": 0.16154290735721588, "learning_rate": 0.002, "loss": 2.5475, "step": 323490 }, { "epoch": 0.6444839347188576, "grad_norm": 0.1900184452533722, "learning_rate": 0.002, "loss": 2.5438, "step": 323500 }, { "epoch": 0.6445038569424965, "grad_norm": 0.1703679859638214, "learning_rate": 0.002, "loss": 2.5759, "step": 323510 }, { "epoch": 0.6445237791661355, "grad_norm": 0.16244876384735107, "learning_rate": 0.002, "loss": 2.5567, "step": 323520 }, { "epoch": 0.6445437013897743, "grad_norm": 0.13898977637290955, "learning_rate": 0.002, "loss": 2.5483, "step": 323530 }, { "epoch": 0.6445636236134132, "grad_norm": 0.15876081585884094, "learning_rate": 0.002, "loss": 2.5719, "step": 323540 }, { "epoch": 0.6445835458370521, "grad_norm": 0.15431126952171326, "learning_rate": 0.002, "loss": 2.5709, "step": 323550 }, { "epoch": 0.644603468060691, "grad_norm": 0.1373569369316101, "learning_rate": 0.002, "loss": 2.5628, "step": 323560 }, { "epoch": 0.64462339028433, "grad_norm": 0.1723356992006302, "learning_rate": 0.002, "loss": 2.5515, "step": 323570 }, { "epoch": 0.6446433125079689, "grad_norm": 0.15372566878795624, "learning_rate": 0.002, "loss": 2.5514, "step": 323580 }, { "epoch": 0.6446632347316078, "grad_norm": 0.1598004251718521, "learning_rate": 0.002, "loss": 2.5518, "step": 323590 }, { "epoch": 0.6446831569552467, "grad_norm": 0.1594536155462265, "learning_rate": 0.002, "loss": 2.5533, "step": 323600 }, { "epoch": 0.6447030791788856, "grad_norm": 0.19325055181980133, "learning_rate": 0.002, "loss": 2.5541, "step": 323610 }, { "epoch": 0.6447230014025246, "grad_norm": 0.16790904104709625, "learning_rate": 0.002, "loss": 2.5521, "step": 323620 }, { "epoch": 0.6447429236261635, "grad_norm": 0.16804973781108856, "learning_rate": 0.002, "loss": 2.544, "step": 323630 }, { "epoch": 0.6447628458498024, "grad_norm": 0.1529020220041275, "learning_rate": 0.002, "loss": 2.5457, "step": 323640 }, { "epoch": 0.6447827680734413, "grad_norm": 0.17726127803325653, "learning_rate": 0.002, "loss": 2.5596, "step": 323650 }, { "epoch": 0.6448026902970801, "grad_norm": 0.15825285017490387, "learning_rate": 0.002, "loss": 2.563, "step": 323660 }, { "epoch": 0.6448226125207192, "grad_norm": 0.1642163097858429, "learning_rate": 0.002, "loss": 2.5513, "step": 323670 }, { "epoch": 0.644842534744358, "grad_norm": 0.22374625504016876, "learning_rate": 0.002, "loss": 2.5548, "step": 323680 }, { "epoch": 0.6448624569679969, "grad_norm": 0.14766713976860046, "learning_rate": 0.002, "loss": 2.5661, "step": 323690 }, { "epoch": 0.6448823791916358, "grad_norm": 0.17441219091415405, "learning_rate": 0.002, "loss": 2.5589, "step": 323700 }, { "epoch": 0.6449023014152747, "grad_norm": 0.42953911423683167, "learning_rate": 0.002, "loss": 2.5467, "step": 323710 }, { "epoch": 0.6449222236389137, "grad_norm": 0.21403957903385162, "learning_rate": 0.002, "loss": 2.567, "step": 323720 }, { "epoch": 0.6449421458625526, "grad_norm": 0.15414762496948242, "learning_rate": 0.002, "loss": 2.5483, "step": 323730 }, { "epoch": 0.6449620680861915, "grad_norm": 0.18343567848205566, "learning_rate": 0.002, "loss": 2.5591, "step": 323740 }, { "epoch": 0.6449819903098304, "grad_norm": 0.17438726127147675, "learning_rate": 0.002, "loss": 2.5696, "step": 323750 }, { "epoch": 0.6450019125334694, "grad_norm": 0.15454533696174622, "learning_rate": 0.002, "loss": 2.5666, "step": 323760 }, { "epoch": 0.6450218347571083, "grad_norm": 0.17941103875637054, "learning_rate": 0.002, "loss": 2.5639, "step": 323770 }, { "epoch": 0.6450417569807472, "grad_norm": 0.16577893495559692, "learning_rate": 0.002, "loss": 2.5556, "step": 323780 }, { "epoch": 0.6450616792043861, "grad_norm": 0.1662575900554657, "learning_rate": 0.002, "loss": 2.5607, "step": 323790 }, { "epoch": 0.645081601428025, "grad_norm": 0.1451931744813919, "learning_rate": 0.002, "loss": 2.5566, "step": 323800 }, { "epoch": 0.645101523651664, "grad_norm": 0.20579709112644196, "learning_rate": 0.002, "loss": 2.5533, "step": 323810 }, { "epoch": 0.6451214458753028, "grad_norm": 0.15692579746246338, "learning_rate": 0.002, "loss": 2.5589, "step": 323820 }, { "epoch": 0.6451413680989417, "grad_norm": 0.15295526385307312, "learning_rate": 0.002, "loss": 2.564, "step": 323830 }, { "epoch": 0.6451612903225806, "grad_norm": 0.14345134794712067, "learning_rate": 0.002, "loss": 2.5596, "step": 323840 }, { "epoch": 0.6451812125462195, "grad_norm": 0.17592070996761322, "learning_rate": 0.002, "loss": 2.5651, "step": 323850 }, { "epoch": 0.6452011347698585, "grad_norm": 0.16177499294281006, "learning_rate": 0.002, "loss": 2.5492, "step": 323860 }, { "epoch": 0.6452210569934974, "grad_norm": 0.1461031585931778, "learning_rate": 0.002, "loss": 2.5459, "step": 323870 }, { "epoch": 0.6452409792171363, "grad_norm": 0.16716454923152924, "learning_rate": 0.002, "loss": 2.5591, "step": 323880 }, { "epoch": 0.6452609014407752, "grad_norm": 0.16135218739509583, "learning_rate": 0.002, "loss": 2.5431, "step": 323890 }, { "epoch": 0.6452808236644141, "grad_norm": 0.17885631322860718, "learning_rate": 0.002, "loss": 2.562, "step": 323900 }, { "epoch": 0.6453007458880531, "grad_norm": 0.19271080195903778, "learning_rate": 0.002, "loss": 2.5552, "step": 323910 }, { "epoch": 0.645320668111692, "grad_norm": 0.14662738144397736, "learning_rate": 0.002, "loss": 2.5646, "step": 323920 }, { "epoch": 0.6453405903353309, "grad_norm": 0.18390032649040222, "learning_rate": 0.002, "loss": 2.5598, "step": 323930 }, { "epoch": 0.6453605125589698, "grad_norm": 0.1362474411725998, "learning_rate": 0.002, "loss": 2.5621, "step": 323940 }, { "epoch": 0.6453804347826086, "grad_norm": 0.19503869116306305, "learning_rate": 0.002, "loss": 2.5445, "step": 323950 }, { "epoch": 0.6454003570062476, "grad_norm": 0.1448192149400711, "learning_rate": 0.002, "loss": 2.5498, "step": 323960 }, { "epoch": 0.6454202792298865, "grad_norm": 0.17107029259204865, "learning_rate": 0.002, "loss": 2.5562, "step": 323970 }, { "epoch": 0.6454402014535254, "grad_norm": 0.17162920534610748, "learning_rate": 0.002, "loss": 2.5628, "step": 323980 }, { "epoch": 0.6454601236771643, "grad_norm": 0.17456762492656708, "learning_rate": 0.002, "loss": 2.5498, "step": 323990 }, { "epoch": 0.6454800459008032, "grad_norm": 0.14277176558971405, "learning_rate": 0.002, "loss": 2.5575, "step": 324000 }, { "epoch": 0.6454999681244422, "grad_norm": 0.15327197313308716, "learning_rate": 0.002, "loss": 2.5521, "step": 324010 }, { "epoch": 0.6455198903480811, "grad_norm": 0.18331867456436157, "learning_rate": 0.002, "loss": 2.5534, "step": 324020 }, { "epoch": 0.64553981257172, "grad_norm": 0.14846046268939972, "learning_rate": 0.002, "loss": 2.5516, "step": 324030 }, { "epoch": 0.6455597347953589, "grad_norm": 0.17497900128364563, "learning_rate": 0.002, "loss": 2.5708, "step": 324040 }, { "epoch": 0.6455796570189979, "grad_norm": 0.1777762472629547, "learning_rate": 0.002, "loss": 2.5626, "step": 324050 }, { "epoch": 0.6455995792426368, "grad_norm": 0.18695828318595886, "learning_rate": 0.002, "loss": 2.5762, "step": 324060 }, { "epoch": 0.6456195014662757, "grad_norm": 0.17216187715530396, "learning_rate": 0.002, "loss": 2.5547, "step": 324070 }, { "epoch": 0.6456394236899146, "grad_norm": 0.15484614670276642, "learning_rate": 0.002, "loss": 2.5558, "step": 324080 }, { "epoch": 0.6456593459135535, "grad_norm": 0.1757601946592331, "learning_rate": 0.002, "loss": 2.5516, "step": 324090 }, { "epoch": 0.6456792681371925, "grad_norm": 0.16925466060638428, "learning_rate": 0.002, "loss": 2.5629, "step": 324100 }, { "epoch": 0.6456991903608313, "grad_norm": 0.15100400149822235, "learning_rate": 0.002, "loss": 2.5455, "step": 324110 }, { "epoch": 0.6457191125844702, "grad_norm": 0.1676623672246933, "learning_rate": 0.002, "loss": 2.5552, "step": 324120 }, { "epoch": 0.6457390348081091, "grad_norm": 0.15284940600395203, "learning_rate": 0.002, "loss": 2.5502, "step": 324130 }, { "epoch": 0.645758957031748, "grad_norm": 0.14548322558403015, "learning_rate": 0.002, "loss": 2.5626, "step": 324140 }, { "epoch": 0.645778879255387, "grad_norm": 0.2029336839914322, "learning_rate": 0.002, "loss": 2.5561, "step": 324150 }, { "epoch": 0.6457988014790259, "grad_norm": 0.1637389361858368, "learning_rate": 0.002, "loss": 2.5676, "step": 324160 }, { "epoch": 0.6458187237026648, "grad_norm": 0.13137193024158478, "learning_rate": 0.002, "loss": 2.5529, "step": 324170 }, { "epoch": 0.6458386459263037, "grad_norm": 0.18003584444522858, "learning_rate": 0.002, "loss": 2.5587, "step": 324180 }, { "epoch": 0.6458585681499426, "grad_norm": 0.1516217291355133, "learning_rate": 0.002, "loss": 2.5688, "step": 324190 }, { "epoch": 0.6458784903735816, "grad_norm": 0.142876535654068, "learning_rate": 0.002, "loss": 2.5449, "step": 324200 }, { "epoch": 0.6458984125972205, "grad_norm": 0.1693575084209442, "learning_rate": 0.002, "loss": 2.5316, "step": 324210 }, { "epoch": 0.6459183348208594, "grad_norm": 0.16235747933387756, "learning_rate": 0.002, "loss": 2.566, "step": 324220 }, { "epoch": 0.6459382570444983, "grad_norm": 0.16223646700382233, "learning_rate": 0.002, "loss": 2.5659, "step": 324230 }, { "epoch": 0.6459581792681371, "grad_norm": 0.1702481061220169, "learning_rate": 0.002, "loss": 2.5423, "step": 324240 }, { "epoch": 0.6459781014917761, "grad_norm": 0.16436536610126495, "learning_rate": 0.002, "loss": 2.5551, "step": 324250 }, { "epoch": 0.645998023715415, "grad_norm": 0.14020732045173645, "learning_rate": 0.002, "loss": 2.5678, "step": 324260 }, { "epoch": 0.6460179459390539, "grad_norm": 0.16707326471805573, "learning_rate": 0.002, "loss": 2.5582, "step": 324270 }, { "epoch": 0.6460378681626928, "grad_norm": 0.14523302018642426, "learning_rate": 0.002, "loss": 2.5567, "step": 324280 }, { "epoch": 0.6460577903863317, "grad_norm": 0.14734873175621033, "learning_rate": 0.002, "loss": 2.5485, "step": 324290 }, { "epoch": 0.6460777126099707, "grad_norm": 0.16699367761611938, "learning_rate": 0.002, "loss": 2.5614, "step": 324300 }, { "epoch": 0.6460976348336096, "grad_norm": 0.21172502636909485, "learning_rate": 0.002, "loss": 2.5595, "step": 324310 }, { "epoch": 0.6461175570572485, "grad_norm": 0.1723284125328064, "learning_rate": 0.002, "loss": 2.5609, "step": 324320 }, { "epoch": 0.6461374792808874, "grad_norm": 0.19002823531627655, "learning_rate": 0.002, "loss": 2.5403, "step": 324330 }, { "epoch": 0.6461574015045263, "grad_norm": 0.14645473659038544, "learning_rate": 0.002, "loss": 2.5655, "step": 324340 }, { "epoch": 0.6461773237281653, "grad_norm": 0.16977187991142273, "learning_rate": 0.002, "loss": 2.5474, "step": 324350 }, { "epoch": 0.6461972459518042, "grad_norm": 0.1972680538892746, "learning_rate": 0.002, "loss": 2.5373, "step": 324360 }, { "epoch": 0.6462171681754431, "grad_norm": 0.1708008050918579, "learning_rate": 0.002, "loss": 2.5619, "step": 324370 }, { "epoch": 0.646237090399082, "grad_norm": 0.15323758125305176, "learning_rate": 0.002, "loss": 2.558, "step": 324380 }, { "epoch": 0.646257012622721, "grad_norm": 0.1698087602853775, "learning_rate": 0.002, "loss": 2.5578, "step": 324390 }, { "epoch": 0.6462769348463598, "grad_norm": 0.1776273250579834, "learning_rate": 0.002, "loss": 2.5672, "step": 324400 }, { "epoch": 0.6462968570699987, "grad_norm": 0.15094996988773346, "learning_rate": 0.002, "loss": 2.5346, "step": 324410 }, { "epoch": 0.6463167792936376, "grad_norm": 0.15469568967819214, "learning_rate": 0.002, "loss": 2.5375, "step": 324420 }, { "epoch": 0.6463367015172765, "grad_norm": 0.15649065375328064, "learning_rate": 0.002, "loss": 2.563, "step": 324430 }, { "epoch": 0.6463566237409155, "grad_norm": 0.1718270629644394, "learning_rate": 0.002, "loss": 2.5532, "step": 324440 }, { "epoch": 0.6463765459645544, "grad_norm": 0.1805594265460968, "learning_rate": 0.002, "loss": 2.546, "step": 324450 }, { "epoch": 0.6463964681881933, "grad_norm": 0.16958557069301605, "learning_rate": 0.002, "loss": 2.5416, "step": 324460 }, { "epoch": 0.6464163904118322, "grad_norm": 0.14874401688575745, "learning_rate": 0.002, "loss": 2.5647, "step": 324470 }, { "epoch": 0.6464363126354711, "grad_norm": 0.20580169558525085, "learning_rate": 0.002, "loss": 2.5503, "step": 324480 }, { "epoch": 0.6464562348591101, "grad_norm": 0.22170205414295197, "learning_rate": 0.002, "loss": 2.5512, "step": 324490 }, { "epoch": 0.646476157082749, "grad_norm": 0.17220108211040497, "learning_rate": 0.002, "loss": 2.541, "step": 324500 }, { "epoch": 0.6464960793063879, "grad_norm": 0.14169836044311523, "learning_rate": 0.002, "loss": 2.5617, "step": 324510 }, { "epoch": 0.6465160015300268, "grad_norm": 0.14595261216163635, "learning_rate": 0.002, "loss": 2.5637, "step": 324520 }, { "epoch": 0.6465359237536656, "grad_norm": 0.18167947232723236, "learning_rate": 0.002, "loss": 2.5748, "step": 324530 }, { "epoch": 0.6465558459773046, "grad_norm": 0.14549189805984497, "learning_rate": 0.002, "loss": 2.5491, "step": 324540 }, { "epoch": 0.6465757682009435, "grad_norm": 0.1621144711971283, "learning_rate": 0.002, "loss": 2.5429, "step": 324550 }, { "epoch": 0.6465956904245824, "grad_norm": 0.2113199084997177, "learning_rate": 0.002, "loss": 2.5554, "step": 324560 }, { "epoch": 0.6466156126482213, "grad_norm": 0.15601906180381775, "learning_rate": 0.002, "loss": 2.5645, "step": 324570 }, { "epoch": 0.6466355348718602, "grad_norm": 0.18231318891048431, "learning_rate": 0.002, "loss": 2.5663, "step": 324580 }, { "epoch": 0.6466554570954992, "grad_norm": 0.15789318084716797, "learning_rate": 0.002, "loss": 2.5571, "step": 324590 }, { "epoch": 0.6466753793191381, "grad_norm": 0.20753788948059082, "learning_rate": 0.002, "loss": 2.555, "step": 324600 }, { "epoch": 0.646695301542777, "grad_norm": 0.1745254397392273, "learning_rate": 0.002, "loss": 2.5516, "step": 324610 }, { "epoch": 0.6467152237664159, "grad_norm": 0.17479437589645386, "learning_rate": 0.002, "loss": 2.5597, "step": 324620 }, { "epoch": 0.6467351459900548, "grad_norm": 0.1514943540096283, "learning_rate": 0.002, "loss": 2.542, "step": 324630 }, { "epoch": 0.6467550682136938, "grad_norm": 0.1867031455039978, "learning_rate": 0.002, "loss": 2.5531, "step": 324640 }, { "epoch": 0.6467749904373327, "grad_norm": 0.15143045783042908, "learning_rate": 0.002, "loss": 2.5736, "step": 324650 }, { "epoch": 0.6467949126609716, "grad_norm": 0.1827012002468109, "learning_rate": 0.002, "loss": 2.5603, "step": 324660 }, { "epoch": 0.6468148348846104, "grad_norm": 0.1738271564245224, "learning_rate": 0.002, "loss": 2.5596, "step": 324670 }, { "epoch": 0.6468347571082494, "grad_norm": 0.14242835342884064, "learning_rate": 0.002, "loss": 2.5496, "step": 324680 }, { "epoch": 0.6468546793318883, "grad_norm": 0.15922485291957855, "learning_rate": 0.002, "loss": 2.5582, "step": 324690 }, { "epoch": 0.6468746015555272, "grad_norm": 0.16456352174282074, "learning_rate": 0.002, "loss": 2.5554, "step": 324700 }, { "epoch": 0.6468945237791661, "grad_norm": 0.16621363162994385, "learning_rate": 0.002, "loss": 2.5486, "step": 324710 }, { "epoch": 0.646914446002805, "grad_norm": 0.1500779092311859, "learning_rate": 0.002, "loss": 2.5638, "step": 324720 }, { "epoch": 0.646934368226444, "grad_norm": 0.16090480983257294, "learning_rate": 0.002, "loss": 2.5693, "step": 324730 }, { "epoch": 0.6469542904500829, "grad_norm": 0.18133026361465454, "learning_rate": 0.002, "loss": 2.5543, "step": 324740 }, { "epoch": 0.6469742126737218, "grad_norm": 0.19411113858222961, "learning_rate": 0.002, "loss": 2.5511, "step": 324750 }, { "epoch": 0.6469941348973607, "grad_norm": 0.14871463179588318, "learning_rate": 0.002, "loss": 2.5582, "step": 324760 }, { "epoch": 0.6470140571209996, "grad_norm": 0.19287297129631042, "learning_rate": 0.002, "loss": 2.5428, "step": 324770 }, { "epoch": 0.6470339793446386, "grad_norm": 0.17211663722991943, "learning_rate": 0.002, "loss": 2.5468, "step": 324780 }, { "epoch": 0.6470539015682775, "grad_norm": 0.16040092706680298, "learning_rate": 0.002, "loss": 2.5705, "step": 324790 }, { "epoch": 0.6470738237919164, "grad_norm": 0.19980044662952423, "learning_rate": 0.002, "loss": 2.5525, "step": 324800 }, { "epoch": 0.6470937460155552, "grad_norm": 0.13601021468639374, "learning_rate": 0.002, "loss": 2.5519, "step": 324810 }, { "epoch": 0.6471136682391941, "grad_norm": 0.13130441308021545, "learning_rate": 0.002, "loss": 2.5594, "step": 324820 }, { "epoch": 0.6471335904628331, "grad_norm": 0.14690178632736206, "learning_rate": 0.002, "loss": 2.5363, "step": 324830 }, { "epoch": 0.647153512686472, "grad_norm": 0.1758437305688858, "learning_rate": 0.002, "loss": 2.5635, "step": 324840 }, { "epoch": 0.6471734349101109, "grad_norm": 0.18136245012283325, "learning_rate": 0.002, "loss": 2.5581, "step": 324850 }, { "epoch": 0.6471933571337498, "grad_norm": 0.1619349718093872, "learning_rate": 0.002, "loss": 2.5516, "step": 324860 }, { "epoch": 0.6472132793573887, "grad_norm": 0.1364859789609909, "learning_rate": 0.002, "loss": 2.5491, "step": 324870 }, { "epoch": 0.6472332015810277, "grad_norm": 0.1677870899438858, "learning_rate": 0.002, "loss": 2.5641, "step": 324880 }, { "epoch": 0.6472531238046666, "grad_norm": 0.16219483315944672, "learning_rate": 0.002, "loss": 2.5685, "step": 324890 }, { "epoch": 0.6472730460283055, "grad_norm": 0.1911384016275406, "learning_rate": 0.002, "loss": 2.554, "step": 324900 }, { "epoch": 0.6472929682519444, "grad_norm": 0.14264029264450073, "learning_rate": 0.002, "loss": 2.5583, "step": 324910 }, { "epoch": 0.6473128904755833, "grad_norm": 0.17518554627895355, "learning_rate": 0.002, "loss": 2.5487, "step": 324920 }, { "epoch": 0.6473328126992223, "grad_norm": 0.1501670777797699, "learning_rate": 0.002, "loss": 2.5435, "step": 324930 }, { "epoch": 0.6473527349228612, "grad_norm": 0.18335115909576416, "learning_rate": 0.002, "loss": 2.5478, "step": 324940 }, { "epoch": 0.6473726571465, "grad_norm": 0.14446543157100677, "learning_rate": 0.002, "loss": 2.5613, "step": 324950 }, { "epoch": 0.6473925793701389, "grad_norm": 0.1917114406824112, "learning_rate": 0.002, "loss": 2.5666, "step": 324960 }, { "epoch": 0.647412501593778, "grad_norm": 0.15731434524059296, "learning_rate": 0.002, "loss": 2.5541, "step": 324970 }, { "epoch": 0.6474324238174168, "grad_norm": 0.15886881947517395, "learning_rate": 0.002, "loss": 2.5665, "step": 324980 }, { "epoch": 0.6474523460410557, "grad_norm": 0.18500053882598877, "learning_rate": 0.002, "loss": 2.5491, "step": 324990 }, { "epoch": 0.6474722682646946, "grad_norm": 0.17985524237155914, "learning_rate": 0.002, "loss": 2.563, "step": 325000 }, { "epoch": 0.6474921904883335, "grad_norm": 0.14808423817157745, "learning_rate": 0.002, "loss": 2.5552, "step": 325010 }, { "epoch": 0.6475121127119725, "grad_norm": 0.15859448909759521, "learning_rate": 0.002, "loss": 2.547, "step": 325020 }, { "epoch": 0.6475320349356114, "grad_norm": 0.17190414667129517, "learning_rate": 0.002, "loss": 2.5671, "step": 325030 }, { "epoch": 0.6475519571592503, "grad_norm": 0.15304234623908997, "learning_rate": 0.002, "loss": 2.5425, "step": 325040 }, { "epoch": 0.6475718793828892, "grad_norm": 0.1797541230916977, "learning_rate": 0.002, "loss": 2.5384, "step": 325050 }, { "epoch": 0.6475918016065281, "grad_norm": 0.18420222401618958, "learning_rate": 0.002, "loss": 2.5496, "step": 325060 }, { "epoch": 0.6476117238301671, "grad_norm": 0.1371830850839615, "learning_rate": 0.002, "loss": 2.5684, "step": 325070 }, { "epoch": 0.647631646053806, "grad_norm": 0.1890815943479538, "learning_rate": 0.002, "loss": 2.5648, "step": 325080 }, { "epoch": 0.6476515682774449, "grad_norm": 0.14014287292957306, "learning_rate": 0.002, "loss": 2.5386, "step": 325090 }, { "epoch": 0.6476714905010837, "grad_norm": 0.14875930547714233, "learning_rate": 0.002, "loss": 2.5446, "step": 325100 }, { "epoch": 0.6476914127247226, "grad_norm": 0.18066900968551636, "learning_rate": 0.002, "loss": 2.5513, "step": 325110 }, { "epoch": 0.6477113349483616, "grad_norm": 0.15743520855903625, "learning_rate": 0.002, "loss": 2.543, "step": 325120 }, { "epoch": 0.6477312571720005, "grad_norm": 0.13883498311042786, "learning_rate": 0.002, "loss": 2.5383, "step": 325130 }, { "epoch": 0.6477511793956394, "grad_norm": 0.1861041635274887, "learning_rate": 0.002, "loss": 2.5539, "step": 325140 }, { "epoch": 0.6477711016192783, "grad_norm": 0.1752612143754959, "learning_rate": 0.002, "loss": 2.5442, "step": 325150 }, { "epoch": 0.6477910238429172, "grad_norm": 0.17114177346229553, "learning_rate": 0.002, "loss": 2.5593, "step": 325160 }, { "epoch": 0.6478109460665562, "grad_norm": 0.1390710473060608, "learning_rate": 0.002, "loss": 2.5654, "step": 325170 }, { "epoch": 0.6478308682901951, "grad_norm": 0.15700817108154297, "learning_rate": 0.002, "loss": 2.5379, "step": 325180 }, { "epoch": 0.647850790513834, "grad_norm": 0.17655400931835175, "learning_rate": 0.002, "loss": 2.5502, "step": 325190 }, { "epoch": 0.6478707127374729, "grad_norm": 0.18741075694561005, "learning_rate": 0.002, "loss": 2.5544, "step": 325200 }, { "epoch": 0.6478906349611118, "grad_norm": 0.1942024677991867, "learning_rate": 0.002, "loss": 2.5567, "step": 325210 }, { "epoch": 0.6479105571847508, "grad_norm": 0.16280902922153473, "learning_rate": 0.002, "loss": 2.5465, "step": 325220 }, { "epoch": 0.6479304794083897, "grad_norm": 0.14421342313289642, "learning_rate": 0.002, "loss": 2.5534, "step": 325230 }, { "epoch": 0.6479504016320285, "grad_norm": 0.17543023824691772, "learning_rate": 0.002, "loss": 2.5523, "step": 325240 }, { "epoch": 0.6479703238556674, "grad_norm": 0.15750768780708313, "learning_rate": 0.002, "loss": 2.5527, "step": 325250 }, { "epoch": 0.6479902460793064, "grad_norm": 0.2600339353084564, "learning_rate": 0.002, "loss": 2.5616, "step": 325260 }, { "epoch": 0.6480101683029453, "grad_norm": 0.14498978853225708, "learning_rate": 0.002, "loss": 2.5631, "step": 325270 }, { "epoch": 0.6480300905265842, "grad_norm": 0.13922682404518127, "learning_rate": 0.002, "loss": 2.5637, "step": 325280 }, { "epoch": 0.6480500127502231, "grad_norm": 0.1844555139541626, "learning_rate": 0.002, "loss": 2.5497, "step": 325290 }, { "epoch": 0.648069934973862, "grad_norm": 0.16430611908435822, "learning_rate": 0.002, "loss": 2.5402, "step": 325300 }, { "epoch": 0.648089857197501, "grad_norm": 0.1419590413570404, "learning_rate": 0.002, "loss": 2.5423, "step": 325310 }, { "epoch": 0.6481097794211399, "grad_norm": 0.18442760407924652, "learning_rate": 0.002, "loss": 2.5522, "step": 325320 }, { "epoch": 0.6481297016447788, "grad_norm": 0.16365939378738403, "learning_rate": 0.002, "loss": 2.5534, "step": 325330 }, { "epoch": 0.6481496238684177, "grad_norm": 0.16680313646793365, "learning_rate": 0.002, "loss": 2.5503, "step": 325340 }, { "epoch": 0.6481695460920566, "grad_norm": 0.1609431803226471, "learning_rate": 0.002, "loss": 2.5595, "step": 325350 }, { "epoch": 0.6481894683156956, "grad_norm": 0.17616555094718933, "learning_rate": 0.002, "loss": 2.5508, "step": 325360 }, { "epoch": 0.6482093905393345, "grad_norm": 0.1338859349489212, "learning_rate": 0.002, "loss": 2.5743, "step": 325370 }, { "epoch": 0.6482293127629734, "grad_norm": 0.18829499185085297, "learning_rate": 0.002, "loss": 2.5557, "step": 325380 }, { "epoch": 0.6482492349866122, "grad_norm": 0.15700112283229828, "learning_rate": 0.002, "loss": 2.5633, "step": 325390 }, { "epoch": 0.6482691572102511, "grad_norm": 0.15062668919563293, "learning_rate": 0.002, "loss": 2.5647, "step": 325400 }, { "epoch": 0.6482890794338901, "grad_norm": 0.19848965108394623, "learning_rate": 0.002, "loss": 2.5508, "step": 325410 }, { "epoch": 0.648309001657529, "grad_norm": 0.13780519366264343, "learning_rate": 0.002, "loss": 2.5632, "step": 325420 }, { "epoch": 0.6483289238811679, "grad_norm": 0.1809515506029129, "learning_rate": 0.002, "loss": 2.5602, "step": 325430 }, { "epoch": 0.6483488461048068, "grad_norm": 0.17487390339374542, "learning_rate": 0.002, "loss": 2.548, "step": 325440 }, { "epoch": 0.6483687683284457, "grad_norm": 0.17471669614315033, "learning_rate": 0.002, "loss": 2.5574, "step": 325450 }, { "epoch": 0.6483886905520847, "grad_norm": 0.1680048257112503, "learning_rate": 0.002, "loss": 2.5486, "step": 325460 }, { "epoch": 0.6484086127757236, "grad_norm": 0.1597190797328949, "learning_rate": 0.002, "loss": 2.5561, "step": 325470 }, { "epoch": 0.6484285349993625, "grad_norm": 0.15394112467765808, "learning_rate": 0.002, "loss": 2.5468, "step": 325480 }, { "epoch": 0.6484484572230014, "grad_norm": 0.16357001662254333, "learning_rate": 0.002, "loss": 2.5618, "step": 325490 }, { "epoch": 0.6484683794466403, "grad_norm": 0.1759870946407318, "learning_rate": 0.002, "loss": 2.5607, "step": 325500 }, { "epoch": 0.6484883016702793, "grad_norm": 0.1767052412033081, "learning_rate": 0.002, "loss": 2.5484, "step": 325510 }, { "epoch": 0.6485082238939182, "grad_norm": 0.15611059963703156, "learning_rate": 0.002, "loss": 2.5399, "step": 325520 }, { "epoch": 0.648528146117557, "grad_norm": 0.16549591720104218, "learning_rate": 0.002, "loss": 2.5545, "step": 325530 }, { "epoch": 0.6485480683411959, "grad_norm": 0.14742164313793182, "learning_rate": 0.002, "loss": 2.5537, "step": 325540 }, { "epoch": 0.6485679905648349, "grad_norm": 0.16134482622146606, "learning_rate": 0.002, "loss": 2.5595, "step": 325550 }, { "epoch": 0.6485879127884738, "grad_norm": 0.17391951382160187, "learning_rate": 0.002, "loss": 2.5616, "step": 325560 }, { "epoch": 0.6486078350121127, "grad_norm": 0.13907787203788757, "learning_rate": 0.002, "loss": 2.5496, "step": 325570 }, { "epoch": 0.6486277572357516, "grad_norm": 0.17487972974777222, "learning_rate": 0.002, "loss": 2.5538, "step": 325580 }, { "epoch": 0.6486476794593905, "grad_norm": 0.18067368865013123, "learning_rate": 0.002, "loss": 2.5423, "step": 325590 }, { "epoch": 0.6486676016830295, "grad_norm": 0.15159223973751068, "learning_rate": 0.002, "loss": 2.5589, "step": 325600 }, { "epoch": 0.6486875239066684, "grad_norm": 0.19493786990642548, "learning_rate": 0.002, "loss": 2.5495, "step": 325610 }, { "epoch": 0.6487074461303073, "grad_norm": 0.1437680572271347, "learning_rate": 0.002, "loss": 2.5583, "step": 325620 }, { "epoch": 0.6487273683539462, "grad_norm": 0.1483968049287796, "learning_rate": 0.002, "loss": 2.5482, "step": 325630 }, { "epoch": 0.6487472905775851, "grad_norm": 0.19704614579677582, "learning_rate": 0.002, "loss": 2.567, "step": 325640 }, { "epoch": 0.6487672128012241, "grad_norm": 0.15416117012500763, "learning_rate": 0.002, "loss": 2.5606, "step": 325650 }, { "epoch": 0.648787135024863, "grad_norm": 0.18688516318798065, "learning_rate": 0.002, "loss": 2.5567, "step": 325660 }, { "epoch": 0.6488070572485019, "grad_norm": 0.17586541175842285, "learning_rate": 0.002, "loss": 2.541, "step": 325670 }, { "epoch": 0.6488269794721407, "grad_norm": 0.17400124669075012, "learning_rate": 0.002, "loss": 2.5492, "step": 325680 }, { "epoch": 0.6488469016957796, "grad_norm": 0.18827873468399048, "learning_rate": 0.002, "loss": 2.5634, "step": 325690 }, { "epoch": 0.6488668239194186, "grad_norm": 0.15241274237632751, "learning_rate": 0.002, "loss": 2.5765, "step": 325700 }, { "epoch": 0.6488867461430575, "grad_norm": 0.20064635574817657, "learning_rate": 0.002, "loss": 2.5492, "step": 325710 }, { "epoch": 0.6489066683666964, "grad_norm": 0.15348772704601288, "learning_rate": 0.002, "loss": 2.546, "step": 325720 }, { "epoch": 0.6489265905903353, "grad_norm": 0.16454945504665375, "learning_rate": 0.002, "loss": 2.558, "step": 325730 }, { "epoch": 0.6489465128139742, "grad_norm": 0.14361730217933655, "learning_rate": 0.002, "loss": 2.5585, "step": 325740 }, { "epoch": 0.6489664350376132, "grad_norm": 0.15073543787002563, "learning_rate": 0.002, "loss": 2.5611, "step": 325750 }, { "epoch": 0.6489863572612521, "grad_norm": 0.17119669914245605, "learning_rate": 0.002, "loss": 2.5492, "step": 325760 }, { "epoch": 0.649006279484891, "grad_norm": 0.16829699277877808, "learning_rate": 0.002, "loss": 2.5606, "step": 325770 }, { "epoch": 0.6490262017085299, "grad_norm": 0.1847868412733078, "learning_rate": 0.002, "loss": 2.541, "step": 325780 }, { "epoch": 0.6490461239321688, "grad_norm": 0.1650596261024475, "learning_rate": 0.002, "loss": 2.5681, "step": 325790 }, { "epoch": 0.6490660461558078, "grad_norm": 0.18008457124233246, "learning_rate": 0.002, "loss": 2.5547, "step": 325800 }, { "epoch": 0.6490859683794467, "grad_norm": 0.14973042905330658, "learning_rate": 0.002, "loss": 2.5609, "step": 325810 }, { "epoch": 0.6491058906030855, "grad_norm": 0.14005440473556519, "learning_rate": 0.002, "loss": 2.543, "step": 325820 }, { "epoch": 0.6491258128267244, "grad_norm": 0.1498216688632965, "learning_rate": 0.002, "loss": 2.5521, "step": 325830 }, { "epoch": 0.6491457350503633, "grad_norm": 0.1490505188703537, "learning_rate": 0.002, "loss": 2.5676, "step": 325840 }, { "epoch": 0.6491656572740023, "grad_norm": 0.14892426133155823, "learning_rate": 0.002, "loss": 2.5565, "step": 325850 }, { "epoch": 0.6491855794976412, "grad_norm": 0.15122662484645844, "learning_rate": 0.002, "loss": 2.5517, "step": 325860 }, { "epoch": 0.6492055017212801, "grad_norm": 0.19202367961406708, "learning_rate": 0.002, "loss": 2.5432, "step": 325870 }, { "epoch": 0.649225423944919, "grad_norm": 0.15701618790626526, "learning_rate": 0.002, "loss": 2.5613, "step": 325880 }, { "epoch": 0.649245346168558, "grad_norm": 0.14292782545089722, "learning_rate": 0.002, "loss": 2.5511, "step": 325890 }, { "epoch": 0.6492652683921969, "grad_norm": 0.205121710896492, "learning_rate": 0.002, "loss": 2.5459, "step": 325900 }, { "epoch": 0.6492851906158358, "grad_norm": 0.15474943816661835, "learning_rate": 0.002, "loss": 2.5685, "step": 325910 }, { "epoch": 0.6493051128394747, "grad_norm": 0.15829771757125854, "learning_rate": 0.002, "loss": 2.5457, "step": 325920 }, { "epoch": 0.6493250350631136, "grad_norm": 0.15906786918640137, "learning_rate": 0.002, "loss": 2.5529, "step": 325930 }, { "epoch": 0.6493449572867526, "grad_norm": 0.18590117990970612, "learning_rate": 0.002, "loss": 2.5442, "step": 325940 }, { "epoch": 0.6493648795103915, "grad_norm": 0.17457826435565948, "learning_rate": 0.002, "loss": 2.5508, "step": 325950 }, { "epoch": 0.6493848017340303, "grad_norm": 0.13607877492904663, "learning_rate": 0.002, "loss": 2.5507, "step": 325960 }, { "epoch": 0.6494047239576692, "grad_norm": 0.18577811121940613, "learning_rate": 0.002, "loss": 2.5716, "step": 325970 }, { "epoch": 0.6494246461813081, "grad_norm": 0.16050201654434204, "learning_rate": 0.002, "loss": 2.556, "step": 325980 }, { "epoch": 0.6494445684049471, "grad_norm": 0.14776316285133362, "learning_rate": 0.002, "loss": 2.5419, "step": 325990 }, { "epoch": 0.649464490628586, "grad_norm": 0.17483659088611603, "learning_rate": 0.002, "loss": 2.5601, "step": 326000 }, { "epoch": 0.6494844128522249, "grad_norm": 0.18016597628593445, "learning_rate": 0.002, "loss": 2.5468, "step": 326010 }, { "epoch": 0.6495043350758638, "grad_norm": 0.13258783519268036, "learning_rate": 0.002, "loss": 2.5675, "step": 326020 }, { "epoch": 0.6495242572995027, "grad_norm": 0.1850847750902176, "learning_rate": 0.002, "loss": 2.5466, "step": 326030 }, { "epoch": 0.6495441795231417, "grad_norm": 0.18037602305412292, "learning_rate": 0.002, "loss": 2.5464, "step": 326040 }, { "epoch": 0.6495641017467806, "grad_norm": 0.1747846156358719, "learning_rate": 0.002, "loss": 2.5493, "step": 326050 }, { "epoch": 0.6495840239704195, "grad_norm": 0.15684774518013, "learning_rate": 0.002, "loss": 2.5663, "step": 326060 }, { "epoch": 0.6496039461940584, "grad_norm": 0.17070923745632172, "learning_rate": 0.002, "loss": 2.5608, "step": 326070 }, { "epoch": 0.6496238684176973, "grad_norm": 0.19253085553646088, "learning_rate": 0.002, "loss": 2.5665, "step": 326080 }, { "epoch": 0.6496437906413363, "grad_norm": 0.17052295804023743, "learning_rate": 0.002, "loss": 2.5513, "step": 326090 }, { "epoch": 0.6496637128649752, "grad_norm": 0.16024340689182281, "learning_rate": 0.002, "loss": 2.5516, "step": 326100 }, { "epoch": 0.649683635088614, "grad_norm": 0.13809576630592346, "learning_rate": 0.002, "loss": 2.5657, "step": 326110 }, { "epoch": 0.6497035573122529, "grad_norm": 0.16133855283260345, "learning_rate": 0.002, "loss": 2.5586, "step": 326120 }, { "epoch": 0.6497234795358918, "grad_norm": 0.16403460502624512, "learning_rate": 0.002, "loss": 2.5573, "step": 326130 }, { "epoch": 0.6497434017595308, "grad_norm": 0.16088494658470154, "learning_rate": 0.002, "loss": 2.5493, "step": 326140 }, { "epoch": 0.6497633239831697, "grad_norm": 0.20783479511737823, "learning_rate": 0.002, "loss": 2.5523, "step": 326150 }, { "epoch": 0.6497832462068086, "grad_norm": 0.16623583436012268, "learning_rate": 0.002, "loss": 2.5727, "step": 326160 }, { "epoch": 0.6498031684304475, "grad_norm": 0.17413330078125, "learning_rate": 0.002, "loss": 2.5506, "step": 326170 }, { "epoch": 0.6498230906540865, "grad_norm": 0.18331511318683624, "learning_rate": 0.002, "loss": 2.5613, "step": 326180 }, { "epoch": 0.6498430128777254, "grad_norm": 0.16249492764472961, "learning_rate": 0.002, "loss": 2.547, "step": 326190 }, { "epoch": 0.6498629351013643, "grad_norm": 0.15428180992603302, "learning_rate": 0.002, "loss": 2.5537, "step": 326200 }, { "epoch": 0.6498828573250032, "grad_norm": 0.1742965281009674, "learning_rate": 0.002, "loss": 2.5413, "step": 326210 }, { "epoch": 0.6499027795486421, "grad_norm": 0.17526964843273163, "learning_rate": 0.002, "loss": 2.5491, "step": 326220 }, { "epoch": 0.6499227017722811, "grad_norm": 0.15182217955589294, "learning_rate": 0.002, "loss": 2.5499, "step": 326230 }, { "epoch": 0.64994262399592, "grad_norm": 0.1697588413953781, "learning_rate": 0.002, "loss": 2.5644, "step": 326240 }, { "epoch": 0.6499625462195588, "grad_norm": 0.18605370819568634, "learning_rate": 0.002, "loss": 2.5573, "step": 326250 }, { "epoch": 0.6499824684431977, "grad_norm": 0.1554102599620819, "learning_rate": 0.002, "loss": 2.5485, "step": 326260 }, { "epoch": 0.6500023906668366, "grad_norm": 0.17865516245365143, "learning_rate": 0.002, "loss": 2.5503, "step": 326270 }, { "epoch": 0.6500223128904756, "grad_norm": 0.15437547862529755, "learning_rate": 0.002, "loss": 2.5587, "step": 326280 }, { "epoch": 0.6500422351141145, "grad_norm": 0.1720779836177826, "learning_rate": 0.002, "loss": 2.5558, "step": 326290 }, { "epoch": 0.6500621573377534, "grad_norm": 0.1781105250120163, "learning_rate": 0.002, "loss": 2.546, "step": 326300 }, { "epoch": 0.6500820795613923, "grad_norm": 0.1541992574930191, "learning_rate": 0.002, "loss": 2.5573, "step": 326310 }, { "epoch": 0.6501020017850312, "grad_norm": 0.14054644107818604, "learning_rate": 0.002, "loss": 2.5583, "step": 326320 }, { "epoch": 0.6501219240086702, "grad_norm": 0.18527963757514954, "learning_rate": 0.002, "loss": 2.5611, "step": 326330 }, { "epoch": 0.6501418462323091, "grad_norm": 0.1962832361459732, "learning_rate": 0.002, "loss": 2.5659, "step": 326340 }, { "epoch": 0.650161768455948, "grad_norm": 0.16078215837478638, "learning_rate": 0.002, "loss": 2.5511, "step": 326350 }, { "epoch": 0.6501816906795869, "grad_norm": 0.16067075729370117, "learning_rate": 0.002, "loss": 2.536, "step": 326360 }, { "epoch": 0.6502016129032258, "grad_norm": 0.15551845729351044, "learning_rate": 0.002, "loss": 2.568, "step": 326370 }, { "epoch": 0.6502215351268648, "grad_norm": 0.18717357516288757, "learning_rate": 0.002, "loss": 2.5614, "step": 326380 }, { "epoch": 0.6502414573505036, "grad_norm": 0.13826802372932434, "learning_rate": 0.002, "loss": 2.5481, "step": 326390 }, { "epoch": 0.6502613795741425, "grad_norm": 0.15492521226406097, "learning_rate": 0.002, "loss": 2.555, "step": 326400 }, { "epoch": 0.6502813017977814, "grad_norm": 0.14603067934513092, "learning_rate": 0.002, "loss": 2.5611, "step": 326410 }, { "epoch": 0.6503012240214203, "grad_norm": 0.15500180423259735, "learning_rate": 0.002, "loss": 2.5586, "step": 326420 }, { "epoch": 0.6503211462450593, "grad_norm": 0.19837406277656555, "learning_rate": 0.002, "loss": 2.5694, "step": 326430 }, { "epoch": 0.6503410684686982, "grad_norm": 0.22437511384487152, "learning_rate": 0.002, "loss": 2.5353, "step": 326440 }, { "epoch": 0.6503609906923371, "grad_norm": 0.14867565035820007, "learning_rate": 0.002, "loss": 2.5533, "step": 326450 }, { "epoch": 0.650380912915976, "grad_norm": 0.13958169519901276, "learning_rate": 0.002, "loss": 2.5632, "step": 326460 }, { "epoch": 0.650400835139615, "grad_norm": 0.17577046155929565, "learning_rate": 0.002, "loss": 2.5528, "step": 326470 }, { "epoch": 0.6504207573632539, "grad_norm": 0.1900573968887329, "learning_rate": 0.002, "loss": 2.5584, "step": 326480 }, { "epoch": 0.6504406795868928, "grad_norm": 0.19778583943843842, "learning_rate": 0.002, "loss": 2.5613, "step": 326490 }, { "epoch": 0.6504606018105317, "grad_norm": 0.26213502883911133, "learning_rate": 0.002, "loss": 2.5676, "step": 326500 }, { "epoch": 0.6504805240341706, "grad_norm": 0.19128908216953278, "learning_rate": 0.002, "loss": 2.5637, "step": 326510 }, { "epoch": 0.6505004462578096, "grad_norm": 0.1550477296113968, "learning_rate": 0.002, "loss": 2.5509, "step": 326520 }, { "epoch": 0.6505203684814485, "grad_norm": 0.14940659701824188, "learning_rate": 0.002, "loss": 2.5684, "step": 326530 }, { "epoch": 0.6505402907050873, "grad_norm": 0.1447899043560028, "learning_rate": 0.002, "loss": 2.5476, "step": 326540 }, { "epoch": 0.6505602129287262, "grad_norm": 0.1653635948896408, "learning_rate": 0.002, "loss": 2.5517, "step": 326550 }, { "epoch": 0.6505801351523651, "grad_norm": 0.1849474459886551, "learning_rate": 0.002, "loss": 2.5499, "step": 326560 }, { "epoch": 0.6506000573760041, "grad_norm": 0.1473798155784607, "learning_rate": 0.002, "loss": 2.5739, "step": 326570 }, { "epoch": 0.650619979599643, "grad_norm": 0.22126995027065277, "learning_rate": 0.002, "loss": 2.5344, "step": 326580 }, { "epoch": 0.6506399018232819, "grad_norm": 0.151650071144104, "learning_rate": 0.002, "loss": 2.5593, "step": 326590 }, { "epoch": 0.6506598240469208, "grad_norm": 0.1655876189470291, "learning_rate": 0.002, "loss": 2.5697, "step": 326600 }, { "epoch": 0.6506797462705597, "grad_norm": 0.15149712562561035, "learning_rate": 0.002, "loss": 2.5604, "step": 326610 }, { "epoch": 0.6506996684941987, "grad_norm": 0.1615833342075348, "learning_rate": 0.002, "loss": 2.5515, "step": 326620 }, { "epoch": 0.6507195907178376, "grad_norm": 0.17691101133823395, "learning_rate": 0.002, "loss": 2.5552, "step": 326630 }, { "epoch": 0.6507395129414765, "grad_norm": 0.15111114084720612, "learning_rate": 0.002, "loss": 2.5546, "step": 326640 }, { "epoch": 0.6507594351651154, "grad_norm": 0.18114608526229858, "learning_rate": 0.002, "loss": 2.5578, "step": 326650 }, { "epoch": 0.6507793573887543, "grad_norm": 0.14643321931362152, "learning_rate": 0.002, "loss": 2.5623, "step": 326660 }, { "epoch": 0.6507992796123933, "grad_norm": 0.16081073880195618, "learning_rate": 0.002, "loss": 2.5486, "step": 326670 }, { "epoch": 0.6508192018360321, "grad_norm": 0.17146749794483185, "learning_rate": 0.002, "loss": 2.5591, "step": 326680 }, { "epoch": 0.650839124059671, "grad_norm": 0.13934853672981262, "learning_rate": 0.002, "loss": 2.5545, "step": 326690 }, { "epoch": 0.6508590462833099, "grad_norm": 0.1785057783126831, "learning_rate": 0.002, "loss": 2.5521, "step": 326700 }, { "epoch": 0.6508789685069488, "grad_norm": 0.16323594748973846, "learning_rate": 0.002, "loss": 2.5434, "step": 326710 }, { "epoch": 0.6508988907305878, "grad_norm": 0.19252102077007294, "learning_rate": 0.002, "loss": 2.5598, "step": 326720 }, { "epoch": 0.6509188129542267, "grad_norm": 0.16574332118034363, "learning_rate": 0.002, "loss": 2.5565, "step": 326730 }, { "epoch": 0.6509387351778656, "grad_norm": 0.1527213603258133, "learning_rate": 0.002, "loss": 2.5491, "step": 326740 }, { "epoch": 0.6509586574015045, "grad_norm": 0.1463988721370697, "learning_rate": 0.002, "loss": 2.5563, "step": 326750 }, { "epoch": 0.6509785796251435, "grad_norm": 0.1738155037164688, "learning_rate": 0.002, "loss": 2.5527, "step": 326760 }, { "epoch": 0.6509985018487824, "grad_norm": 0.1464948058128357, "learning_rate": 0.002, "loss": 2.5548, "step": 326770 }, { "epoch": 0.6510184240724213, "grad_norm": 0.1679777055978775, "learning_rate": 0.002, "loss": 2.5457, "step": 326780 }, { "epoch": 0.6510383462960602, "grad_norm": 0.1564028412103653, "learning_rate": 0.002, "loss": 2.561, "step": 326790 }, { "epoch": 0.6510582685196991, "grad_norm": 0.14397217333316803, "learning_rate": 0.002, "loss": 2.5565, "step": 326800 }, { "epoch": 0.6510781907433381, "grad_norm": 0.15059472620487213, "learning_rate": 0.002, "loss": 2.5549, "step": 326810 }, { "epoch": 0.651098112966977, "grad_norm": 0.19546115398406982, "learning_rate": 0.002, "loss": 2.5539, "step": 326820 }, { "epoch": 0.6511180351906158, "grad_norm": 0.15093252062797546, "learning_rate": 0.002, "loss": 2.543, "step": 326830 }, { "epoch": 0.6511379574142547, "grad_norm": 0.16388313472270966, "learning_rate": 0.002, "loss": 2.5589, "step": 326840 }, { "epoch": 0.6511578796378936, "grad_norm": 0.18354447185993195, "learning_rate": 0.002, "loss": 2.5365, "step": 326850 }, { "epoch": 0.6511778018615326, "grad_norm": 0.1738491654396057, "learning_rate": 0.002, "loss": 2.5479, "step": 326860 }, { "epoch": 0.6511977240851715, "grad_norm": 0.20526158809661865, "learning_rate": 0.002, "loss": 2.5528, "step": 326870 }, { "epoch": 0.6512176463088104, "grad_norm": 0.15067815780639648, "learning_rate": 0.002, "loss": 2.5712, "step": 326880 }, { "epoch": 0.6512375685324493, "grad_norm": 0.1525188535451889, "learning_rate": 0.002, "loss": 2.5645, "step": 326890 }, { "epoch": 0.6512574907560882, "grad_norm": 0.14041157066822052, "learning_rate": 0.002, "loss": 2.5455, "step": 326900 }, { "epoch": 0.6512774129797272, "grad_norm": 0.1719469428062439, "learning_rate": 0.002, "loss": 2.5624, "step": 326910 }, { "epoch": 0.6512973352033661, "grad_norm": 0.1982683390378952, "learning_rate": 0.002, "loss": 2.5593, "step": 326920 }, { "epoch": 0.651317257427005, "grad_norm": 0.17335739731788635, "learning_rate": 0.002, "loss": 2.5702, "step": 326930 }, { "epoch": 0.6513371796506439, "grad_norm": 0.16753292083740234, "learning_rate": 0.002, "loss": 2.5634, "step": 326940 }, { "epoch": 0.6513571018742828, "grad_norm": 0.17934055626392365, "learning_rate": 0.002, "loss": 2.556, "step": 326950 }, { "epoch": 0.6513770240979218, "grad_norm": 0.17537781596183777, "learning_rate": 0.002, "loss": 2.5428, "step": 326960 }, { "epoch": 0.6513969463215606, "grad_norm": 0.16144631803035736, "learning_rate": 0.002, "loss": 2.5426, "step": 326970 }, { "epoch": 0.6514168685451995, "grad_norm": 0.15302176773548126, "learning_rate": 0.002, "loss": 2.5379, "step": 326980 }, { "epoch": 0.6514367907688384, "grad_norm": 0.1776486337184906, "learning_rate": 0.002, "loss": 2.5516, "step": 326990 }, { "epoch": 0.6514567129924773, "grad_norm": 0.1578769087791443, "learning_rate": 0.002, "loss": 2.5548, "step": 327000 }, { "epoch": 0.6514766352161163, "grad_norm": 0.20014970004558563, "learning_rate": 0.002, "loss": 2.5569, "step": 327010 }, { "epoch": 0.6514965574397552, "grad_norm": 0.1379760205745697, "learning_rate": 0.002, "loss": 2.5649, "step": 327020 }, { "epoch": 0.6515164796633941, "grad_norm": 0.1897488236427307, "learning_rate": 0.002, "loss": 2.5355, "step": 327030 }, { "epoch": 0.651536401887033, "grad_norm": 0.1793205440044403, "learning_rate": 0.002, "loss": 2.5536, "step": 327040 }, { "epoch": 0.651556324110672, "grad_norm": 0.14121922850608826, "learning_rate": 0.002, "loss": 2.5449, "step": 327050 }, { "epoch": 0.6515762463343109, "grad_norm": 0.18557099997997284, "learning_rate": 0.002, "loss": 2.5645, "step": 327060 }, { "epoch": 0.6515961685579498, "grad_norm": 0.16482840478420258, "learning_rate": 0.002, "loss": 2.546, "step": 327070 }, { "epoch": 0.6516160907815887, "grad_norm": 0.15263596177101135, "learning_rate": 0.002, "loss": 2.5469, "step": 327080 }, { "epoch": 0.6516360130052276, "grad_norm": 0.1754525750875473, "learning_rate": 0.002, "loss": 2.5481, "step": 327090 }, { "epoch": 0.6516559352288666, "grad_norm": 0.13449573516845703, "learning_rate": 0.002, "loss": 2.5476, "step": 327100 }, { "epoch": 0.6516758574525054, "grad_norm": 0.16767123341560364, "learning_rate": 0.002, "loss": 2.5547, "step": 327110 }, { "epoch": 0.6516957796761443, "grad_norm": 0.17404776811599731, "learning_rate": 0.002, "loss": 2.5664, "step": 327120 }, { "epoch": 0.6517157018997832, "grad_norm": 0.17496131360530853, "learning_rate": 0.002, "loss": 2.5509, "step": 327130 }, { "epoch": 0.6517356241234221, "grad_norm": 0.18569517135620117, "learning_rate": 0.002, "loss": 2.5454, "step": 327140 }, { "epoch": 0.6517555463470611, "grad_norm": 0.14024776220321655, "learning_rate": 0.002, "loss": 2.5407, "step": 327150 }, { "epoch": 0.6517754685707, "grad_norm": 0.14687539637088776, "learning_rate": 0.002, "loss": 2.5459, "step": 327160 }, { "epoch": 0.6517953907943389, "grad_norm": 0.1642712950706482, "learning_rate": 0.002, "loss": 2.5484, "step": 327170 }, { "epoch": 0.6518153130179778, "grad_norm": 0.1569611132144928, "learning_rate": 0.002, "loss": 2.5446, "step": 327180 }, { "epoch": 0.6518352352416167, "grad_norm": 0.1717546582221985, "learning_rate": 0.002, "loss": 2.5743, "step": 327190 }, { "epoch": 0.6518551574652557, "grad_norm": 0.19122323393821716, "learning_rate": 0.002, "loss": 2.5435, "step": 327200 }, { "epoch": 0.6518750796888946, "grad_norm": 0.17022018134593964, "learning_rate": 0.002, "loss": 2.5455, "step": 327210 }, { "epoch": 0.6518950019125335, "grad_norm": 0.15406908094882965, "learning_rate": 0.002, "loss": 2.5472, "step": 327220 }, { "epoch": 0.6519149241361724, "grad_norm": 0.1453094482421875, "learning_rate": 0.002, "loss": 2.5368, "step": 327230 }, { "epoch": 0.6519348463598112, "grad_norm": 0.18671588599681854, "learning_rate": 0.002, "loss": 2.557, "step": 327240 }, { "epoch": 0.6519547685834503, "grad_norm": 0.18049205839633942, "learning_rate": 0.002, "loss": 2.5608, "step": 327250 }, { "epoch": 0.6519746908070891, "grad_norm": 0.14719578623771667, "learning_rate": 0.002, "loss": 2.5685, "step": 327260 }, { "epoch": 0.651994613030728, "grad_norm": 0.15643057227134705, "learning_rate": 0.002, "loss": 2.5668, "step": 327270 }, { "epoch": 0.6520145352543669, "grad_norm": 0.1956581175327301, "learning_rate": 0.002, "loss": 2.5529, "step": 327280 }, { "epoch": 0.6520344574780058, "grad_norm": 0.1357211023569107, "learning_rate": 0.002, "loss": 2.5488, "step": 327290 }, { "epoch": 0.6520543797016448, "grad_norm": 0.14114655554294586, "learning_rate": 0.002, "loss": 2.5445, "step": 327300 }, { "epoch": 0.6520743019252837, "grad_norm": 0.1761477142572403, "learning_rate": 0.002, "loss": 2.5702, "step": 327310 }, { "epoch": 0.6520942241489226, "grad_norm": 0.1697276085615158, "learning_rate": 0.002, "loss": 2.5606, "step": 327320 }, { "epoch": 0.6521141463725615, "grad_norm": 0.1684352606534958, "learning_rate": 0.002, "loss": 2.5483, "step": 327330 }, { "epoch": 0.6521340685962004, "grad_norm": 0.1575985699892044, "learning_rate": 0.002, "loss": 2.5615, "step": 327340 }, { "epoch": 0.6521539908198394, "grad_norm": 0.1684028059244156, "learning_rate": 0.002, "loss": 2.5531, "step": 327350 }, { "epoch": 0.6521739130434783, "grad_norm": 0.14425219595432281, "learning_rate": 0.002, "loss": 2.5432, "step": 327360 }, { "epoch": 0.6521938352671172, "grad_norm": 0.16884392499923706, "learning_rate": 0.002, "loss": 2.54, "step": 327370 }, { "epoch": 0.652213757490756, "grad_norm": 0.19564670324325562, "learning_rate": 0.002, "loss": 2.5721, "step": 327380 }, { "epoch": 0.652233679714395, "grad_norm": 0.19791702926158905, "learning_rate": 0.002, "loss": 2.549, "step": 327390 }, { "epoch": 0.6522536019380339, "grad_norm": 0.16157986223697662, "learning_rate": 0.002, "loss": 2.5409, "step": 327400 }, { "epoch": 0.6522735241616728, "grad_norm": 0.13728387653827667, "learning_rate": 0.002, "loss": 2.5718, "step": 327410 }, { "epoch": 0.6522934463853117, "grad_norm": 0.24596725404262543, "learning_rate": 0.002, "loss": 2.5399, "step": 327420 }, { "epoch": 0.6523133686089506, "grad_norm": 0.16663385927677155, "learning_rate": 0.002, "loss": 2.572, "step": 327430 }, { "epoch": 0.6523332908325896, "grad_norm": 0.19037574529647827, "learning_rate": 0.002, "loss": 2.5568, "step": 327440 }, { "epoch": 0.6523532130562285, "grad_norm": 0.189508855342865, "learning_rate": 0.002, "loss": 2.554, "step": 327450 }, { "epoch": 0.6523731352798674, "grad_norm": 0.26718869805336, "learning_rate": 0.002, "loss": 2.5597, "step": 327460 }, { "epoch": 0.6523930575035063, "grad_norm": 0.14763787388801575, "learning_rate": 0.002, "loss": 2.558, "step": 327470 }, { "epoch": 0.6524129797271452, "grad_norm": 0.15851671993732452, "learning_rate": 0.002, "loss": 2.5576, "step": 327480 }, { "epoch": 0.6524329019507842, "grad_norm": 0.1487964540719986, "learning_rate": 0.002, "loss": 2.541, "step": 327490 }, { "epoch": 0.6524528241744231, "grad_norm": 0.18154411017894745, "learning_rate": 0.002, "loss": 2.549, "step": 327500 }, { "epoch": 0.652472746398062, "grad_norm": 0.16044579446315765, "learning_rate": 0.002, "loss": 2.5684, "step": 327510 }, { "epoch": 0.6524926686217009, "grad_norm": 0.16603690385818481, "learning_rate": 0.002, "loss": 2.5557, "step": 327520 }, { "epoch": 0.6525125908453397, "grad_norm": 0.16359463334083557, "learning_rate": 0.002, "loss": 2.5483, "step": 327530 }, { "epoch": 0.6525325130689787, "grad_norm": 0.1736806184053421, "learning_rate": 0.002, "loss": 2.548, "step": 327540 }, { "epoch": 0.6525524352926176, "grad_norm": 0.12539291381835938, "learning_rate": 0.002, "loss": 2.5554, "step": 327550 }, { "epoch": 0.6525723575162565, "grad_norm": 0.1759873479604721, "learning_rate": 0.002, "loss": 2.5534, "step": 327560 }, { "epoch": 0.6525922797398954, "grad_norm": 0.15133199095726013, "learning_rate": 0.002, "loss": 2.5594, "step": 327570 }, { "epoch": 0.6526122019635343, "grad_norm": 0.13317853212356567, "learning_rate": 0.002, "loss": 2.5526, "step": 327580 }, { "epoch": 0.6526321241871733, "grad_norm": 0.19403456151485443, "learning_rate": 0.002, "loss": 2.564, "step": 327590 }, { "epoch": 0.6526520464108122, "grad_norm": 0.16636943817138672, "learning_rate": 0.002, "loss": 2.5431, "step": 327600 }, { "epoch": 0.6526719686344511, "grad_norm": 0.14319850504398346, "learning_rate": 0.002, "loss": 2.5441, "step": 327610 }, { "epoch": 0.65269189085809, "grad_norm": 0.15398958325386047, "learning_rate": 0.002, "loss": 2.5466, "step": 327620 }, { "epoch": 0.6527118130817289, "grad_norm": 0.21518990397453308, "learning_rate": 0.002, "loss": 2.5566, "step": 327630 }, { "epoch": 0.6527317353053679, "grad_norm": 0.1479412168264389, "learning_rate": 0.002, "loss": 2.5557, "step": 327640 }, { "epoch": 0.6527516575290068, "grad_norm": 0.1813948154449463, "learning_rate": 0.002, "loss": 2.5556, "step": 327650 }, { "epoch": 0.6527715797526457, "grad_norm": 0.16371844708919525, "learning_rate": 0.002, "loss": 2.5698, "step": 327660 }, { "epoch": 0.6527915019762845, "grad_norm": 0.16551874577999115, "learning_rate": 0.002, "loss": 2.5605, "step": 327670 }, { "epoch": 0.6528114241999236, "grad_norm": 0.2029484063386917, "learning_rate": 0.002, "loss": 2.5625, "step": 327680 }, { "epoch": 0.6528313464235624, "grad_norm": 0.15594612061977386, "learning_rate": 0.002, "loss": 2.5613, "step": 327690 }, { "epoch": 0.6528512686472013, "grad_norm": 0.17158205807209015, "learning_rate": 0.002, "loss": 2.5645, "step": 327700 }, { "epoch": 0.6528711908708402, "grad_norm": 0.1784713715314865, "learning_rate": 0.002, "loss": 2.5705, "step": 327710 }, { "epoch": 0.6528911130944791, "grad_norm": 0.16362379491329193, "learning_rate": 0.002, "loss": 2.553, "step": 327720 }, { "epoch": 0.6529110353181181, "grad_norm": 0.17195463180541992, "learning_rate": 0.002, "loss": 2.5597, "step": 327730 }, { "epoch": 0.652930957541757, "grad_norm": 0.1430210918188095, "learning_rate": 0.002, "loss": 2.5785, "step": 327740 }, { "epoch": 0.6529508797653959, "grad_norm": 0.14689110219478607, "learning_rate": 0.002, "loss": 2.5551, "step": 327750 }, { "epoch": 0.6529708019890348, "grad_norm": 0.16341572999954224, "learning_rate": 0.002, "loss": 2.5465, "step": 327760 }, { "epoch": 0.6529907242126737, "grad_norm": 0.1686159372329712, "learning_rate": 0.002, "loss": 2.5509, "step": 327770 }, { "epoch": 0.6530106464363127, "grad_norm": 0.15995068848133087, "learning_rate": 0.002, "loss": 2.543, "step": 327780 }, { "epoch": 0.6530305686599516, "grad_norm": 0.21217474341392517, "learning_rate": 0.002, "loss": 2.555, "step": 327790 }, { "epoch": 0.6530504908835905, "grad_norm": 0.170924112200737, "learning_rate": 0.002, "loss": 2.5419, "step": 327800 }, { "epoch": 0.6530704131072294, "grad_norm": 0.1751430779695511, "learning_rate": 0.002, "loss": 2.5632, "step": 327810 }, { "epoch": 0.6530903353308682, "grad_norm": 0.16409079730510712, "learning_rate": 0.002, "loss": 2.5645, "step": 327820 }, { "epoch": 0.6531102575545072, "grad_norm": 0.16895458102226257, "learning_rate": 0.002, "loss": 2.5567, "step": 327830 }, { "epoch": 0.6531301797781461, "grad_norm": 0.1473604142665863, "learning_rate": 0.002, "loss": 2.5533, "step": 327840 }, { "epoch": 0.653150102001785, "grad_norm": 0.18817941844463348, "learning_rate": 0.002, "loss": 2.5642, "step": 327850 }, { "epoch": 0.6531700242254239, "grad_norm": 0.16814810037612915, "learning_rate": 0.002, "loss": 2.559, "step": 327860 }, { "epoch": 0.6531899464490628, "grad_norm": 0.1504342406988144, "learning_rate": 0.002, "loss": 2.5599, "step": 327870 }, { "epoch": 0.6532098686727018, "grad_norm": 0.13487206399440765, "learning_rate": 0.002, "loss": 2.5594, "step": 327880 }, { "epoch": 0.6532297908963407, "grad_norm": 0.19427038729190826, "learning_rate": 0.002, "loss": 2.5361, "step": 327890 }, { "epoch": 0.6532497131199796, "grad_norm": 0.17002668976783752, "learning_rate": 0.002, "loss": 2.5406, "step": 327900 }, { "epoch": 0.6532696353436185, "grad_norm": 0.15147051215171814, "learning_rate": 0.002, "loss": 2.5564, "step": 327910 }, { "epoch": 0.6532895575672574, "grad_norm": 0.13461314141750336, "learning_rate": 0.002, "loss": 2.5672, "step": 327920 }, { "epoch": 0.6533094797908964, "grad_norm": 0.14205273985862732, "learning_rate": 0.002, "loss": 2.5541, "step": 327930 }, { "epoch": 0.6533294020145353, "grad_norm": 0.1570335179567337, "learning_rate": 0.002, "loss": 2.5485, "step": 327940 }, { "epoch": 0.6533493242381742, "grad_norm": 0.17846496403217316, "learning_rate": 0.002, "loss": 2.5456, "step": 327950 }, { "epoch": 0.653369246461813, "grad_norm": 0.16008025407791138, "learning_rate": 0.002, "loss": 2.5384, "step": 327960 }, { "epoch": 0.653389168685452, "grad_norm": 0.1769915521144867, "learning_rate": 0.002, "loss": 2.5424, "step": 327970 }, { "epoch": 0.6534090909090909, "grad_norm": 0.1501728892326355, "learning_rate": 0.002, "loss": 2.5647, "step": 327980 }, { "epoch": 0.6534290131327298, "grad_norm": 0.1986437290906906, "learning_rate": 0.002, "loss": 2.5523, "step": 327990 }, { "epoch": 0.6534489353563687, "grad_norm": 0.18206632137298584, "learning_rate": 0.002, "loss": 2.5591, "step": 328000 }, { "epoch": 0.6534688575800076, "grad_norm": 0.14744141697883606, "learning_rate": 0.002, "loss": 2.5574, "step": 328010 }, { "epoch": 0.6534887798036466, "grad_norm": 0.22698761522769928, "learning_rate": 0.002, "loss": 2.5496, "step": 328020 }, { "epoch": 0.6535087020272855, "grad_norm": 0.187628835439682, "learning_rate": 0.002, "loss": 2.5591, "step": 328030 }, { "epoch": 0.6535286242509244, "grad_norm": 0.15425077080726624, "learning_rate": 0.002, "loss": 2.5636, "step": 328040 }, { "epoch": 0.6535485464745633, "grad_norm": 0.1744348108768463, "learning_rate": 0.002, "loss": 2.54, "step": 328050 }, { "epoch": 0.6535684686982022, "grad_norm": 0.16267934441566467, "learning_rate": 0.002, "loss": 2.558, "step": 328060 }, { "epoch": 0.6535883909218412, "grad_norm": 0.1569019854068756, "learning_rate": 0.002, "loss": 2.5535, "step": 328070 }, { "epoch": 0.6536083131454801, "grad_norm": 0.22129929065704346, "learning_rate": 0.002, "loss": 2.55, "step": 328080 }, { "epoch": 0.653628235369119, "grad_norm": 0.16771237552165985, "learning_rate": 0.002, "loss": 2.5634, "step": 328090 }, { "epoch": 0.6536481575927579, "grad_norm": 0.14638696610927582, "learning_rate": 0.002, "loss": 2.5623, "step": 328100 }, { "epoch": 0.6536680798163967, "grad_norm": 0.15840238332748413, "learning_rate": 0.002, "loss": 2.56, "step": 328110 }, { "epoch": 0.6536880020400357, "grad_norm": 0.16662105917930603, "learning_rate": 0.002, "loss": 2.5546, "step": 328120 }, { "epoch": 0.6537079242636746, "grad_norm": 0.16839908063411713, "learning_rate": 0.002, "loss": 2.55, "step": 328130 }, { "epoch": 0.6537278464873135, "grad_norm": 0.15449605882167816, "learning_rate": 0.002, "loss": 2.5557, "step": 328140 }, { "epoch": 0.6537477687109524, "grad_norm": 0.17627082765102386, "learning_rate": 0.002, "loss": 2.5488, "step": 328150 }, { "epoch": 0.6537676909345913, "grad_norm": 0.190765380859375, "learning_rate": 0.002, "loss": 2.5699, "step": 328160 }, { "epoch": 0.6537876131582303, "grad_norm": 0.16765403747558594, "learning_rate": 0.002, "loss": 2.5475, "step": 328170 }, { "epoch": 0.6538075353818692, "grad_norm": 0.18492892384529114, "learning_rate": 0.002, "loss": 2.5561, "step": 328180 }, { "epoch": 0.6538274576055081, "grad_norm": 0.15157712996006012, "learning_rate": 0.002, "loss": 2.555, "step": 328190 }, { "epoch": 0.653847379829147, "grad_norm": 0.17514453828334808, "learning_rate": 0.002, "loss": 2.5622, "step": 328200 }, { "epoch": 0.6538673020527859, "grad_norm": 0.17760248482227325, "learning_rate": 0.002, "loss": 2.5454, "step": 328210 }, { "epoch": 0.6538872242764249, "grad_norm": 0.15085624158382416, "learning_rate": 0.002, "loss": 2.5506, "step": 328220 }, { "epoch": 0.6539071465000638, "grad_norm": 0.18759815394878387, "learning_rate": 0.002, "loss": 2.5485, "step": 328230 }, { "epoch": 0.6539270687237027, "grad_norm": 0.17955231666564941, "learning_rate": 0.002, "loss": 2.5528, "step": 328240 }, { "epoch": 0.6539469909473415, "grad_norm": 0.14231009781360626, "learning_rate": 0.002, "loss": 2.5696, "step": 328250 }, { "epoch": 0.6539669131709805, "grad_norm": 0.17833749949932098, "learning_rate": 0.002, "loss": 2.5495, "step": 328260 }, { "epoch": 0.6539868353946194, "grad_norm": 0.19720865786075592, "learning_rate": 0.002, "loss": 2.5687, "step": 328270 }, { "epoch": 0.6540067576182583, "grad_norm": 0.1721823662519455, "learning_rate": 0.002, "loss": 2.5503, "step": 328280 }, { "epoch": 0.6540266798418972, "grad_norm": 0.14619934558868408, "learning_rate": 0.002, "loss": 2.5571, "step": 328290 }, { "epoch": 0.6540466020655361, "grad_norm": 0.15210705995559692, "learning_rate": 0.002, "loss": 2.56, "step": 328300 }, { "epoch": 0.6540665242891751, "grad_norm": 0.15626944601535797, "learning_rate": 0.002, "loss": 2.5552, "step": 328310 }, { "epoch": 0.654086446512814, "grad_norm": 0.15291504561901093, "learning_rate": 0.002, "loss": 2.5638, "step": 328320 }, { "epoch": 0.6541063687364529, "grad_norm": 0.18169960379600525, "learning_rate": 0.002, "loss": 2.5508, "step": 328330 }, { "epoch": 0.6541262909600918, "grad_norm": 0.15718311071395874, "learning_rate": 0.002, "loss": 2.5585, "step": 328340 }, { "epoch": 0.6541462131837307, "grad_norm": 0.1636030524969101, "learning_rate": 0.002, "loss": 2.5654, "step": 328350 }, { "epoch": 0.6541661354073697, "grad_norm": 0.14923971891403198, "learning_rate": 0.002, "loss": 2.5569, "step": 328360 }, { "epoch": 0.6541860576310086, "grad_norm": 0.14171114563941956, "learning_rate": 0.002, "loss": 2.5585, "step": 328370 }, { "epoch": 0.6542059798546475, "grad_norm": 0.19838055968284607, "learning_rate": 0.002, "loss": 2.5576, "step": 328380 }, { "epoch": 0.6542259020782863, "grad_norm": 0.15583820641040802, "learning_rate": 0.002, "loss": 2.5483, "step": 328390 }, { "epoch": 0.6542458243019252, "grad_norm": 0.1550903618335724, "learning_rate": 0.002, "loss": 2.5503, "step": 328400 }, { "epoch": 0.6542657465255642, "grad_norm": 0.1667381376028061, "learning_rate": 0.002, "loss": 2.5567, "step": 328410 }, { "epoch": 0.6542856687492031, "grad_norm": 0.166051983833313, "learning_rate": 0.002, "loss": 2.5477, "step": 328420 }, { "epoch": 0.654305590972842, "grad_norm": 0.2017636001110077, "learning_rate": 0.002, "loss": 2.5487, "step": 328430 }, { "epoch": 0.6543255131964809, "grad_norm": 0.1753411591053009, "learning_rate": 0.002, "loss": 2.5456, "step": 328440 }, { "epoch": 0.6543454354201198, "grad_norm": 0.13748043775558472, "learning_rate": 0.002, "loss": 2.5557, "step": 328450 }, { "epoch": 0.6543653576437588, "grad_norm": 0.14914533495903015, "learning_rate": 0.002, "loss": 2.5684, "step": 328460 }, { "epoch": 0.6543852798673977, "grad_norm": 0.15897053480148315, "learning_rate": 0.002, "loss": 2.5526, "step": 328470 }, { "epoch": 0.6544052020910366, "grad_norm": 0.1586655080318451, "learning_rate": 0.002, "loss": 2.5593, "step": 328480 }, { "epoch": 0.6544251243146755, "grad_norm": 0.17060275375843048, "learning_rate": 0.002, "loss": 2.5602, "step": 328490 }, { "epoch": 0.6544450465383144, "grad_norm": 0.16615325212478638, "learning_rate": 0.002, "loss": 2.5645, "step": 328500 }, { "epoch": 0.6544649687619534, "grad_norm": 0.14115853607654572, "learning_rate": 0.002, "loss": 2.5569, "step": 328510 }, { "epoch": 0.6544848909855923, "grad_norm": 0.14454104006290436, "learning_rate": 0.002, "loss": 2.5493, "step": 328520 }, { "epoch": 0.6545048132092312, "grad_norm": 0.20329709351062775, "learning_rate": 0.002, "loss": 2.5622, "step": 328530 }, { "epoch": 0.65452473543287, "grad_norm": 0.19245918095111847, "learning_rate": 0.002, "loss": 2.5634, "step": 328540 }, { "epoch": 0.654544657656509, "grad_norm": 0.15410736203193665, "learning_rate": 0.002, "loss": 2.5455, "step": 328550 }, { "epoch": 0.6545645798801479, "grad_norm": 0.1357056200504303, "learning_rate": 0.002, "loss": 2.5734, "step": 328560 }, { "epoch": 0.6545845021037868, "grad_norm": 0.1573779433965683, "learning_rate": 0.002, "loss": 2.5495, "step": 328570 }, { "epoch": 0.6546044243274257, "grad_norm": 0.156777024269104, "learning_rate": 0.002, "loss": 2.5378, "step": 328580 }, { "epoch": 0.6546243465510646, "grad_norm": 0.18384769558906555, "learning_rate": 0.002, "loss": 2.5701, "step": 328590 }, { "epoch": 0.6546442687747036, "grad_norm": 0.1356820911169052, "learning_rate": 0.002, "loss": 2.5468, "step": 328600 }, { "epoch": 0.6546641909983425, "grad_norm": 0.21349532902240753, "learning_rate": 0.002, "loss": 2.5517, "step": 328610 }, { "epoch": 0.6546841132219814, "grad_norm": 0.1472991704940796, "learning_rate": 0.002, "loss": 2.5537, "step": 328620 }, { "epoch": 0.6547040354456203, "grad_norm": 0.1699669063091278, "learning_rate": 0.002, "loss": 2.5467, "step": 328630 }, { "epoch": 0.6547239576692592, "grad_norm": 0.17602825164794922, "learning_rate": 0.002, "loss": 2.5669, "step": 328640 }, { "epoch": 0.6547438798928982, "grad_norm": 0.14249514043331146, "learning_rate": 0.002, "loss": 2.5342, "step": 328650 }, { "epoch": 0.6547638021165371, "grad_norm": 0.1665307581424713, "learning_rate": 0.002, "loss": 2.5593, "step": 328660 }, { "epoch": 0.654783724340176, "grad_norm": 0.1795656532049179, "learning_rate": 0.002, "loss": 2.5493, "step": 328670 }, { "epoch": 0.6548036465638148, "grad_norm": 0.1514996439218521, "learning_rate": 0.002, "loss": 2.5563, "step": 328680 }, { "epoch": 0.6548235687874537, "grad_norm": 0.16469350457191467, "learning_rate": 0.002, "loss": 2.5655, "step": 328690 }, { "epoch": 0.6548434910110927, "grad_norm": 0.16254620254039764, "learning_rate": 0.002, "loss": 2.5527, "step": 328700 }, { "epoch": 0.6548634132347316, "grad_norm": 0.16086594760417938, "learning_rate": 0.002, "loss": 2.5365, "step": 328710 }, { "epoch": 0.6548833354583705, "grad_norm": 0.1678950935602188, "learning_rate": 0.002, "loss": 2.5551, "step": 328720 }, { "epoch": 0.6549032576820094, "grad_norm": 0.17027492821216583, "learning_rate": 0.002, "loss": 2.5474, "step": 328730 }, { "epoch": 0.6549231799056483, "grad_norm": 0.14231200516223907, "learning_rate": 0.002, "loss": 2.5549, "step": 328740 }, { "epoch": 0.6549431021292873, "grad_norm": 0.14661063253879547, "learning_rate": 0.002, "loss": 2.5601, "step": 328750 }, { "epoch": 0.6549630243529262, "grad_norm": 0.17364346981048584, "learning_rate": 0.002, "loss": 2.5672, "step": 328760 }, { "epoch": 0.6549829465765651, "grad_norm": 0.15194903314113617, "learning_rate": 0.002, "loss": 2.5652, "step": 328770 }, { "epoch": 0.655002868800204, "grad_norm": 0.18418267369270325, "learning_rate": 0.002, "loss": 2.5558, "step": 328780 }, { "epoch": 0.6550227910238429, "grad_norm": 0.16994650661945343, "learning_rate": 0.002, "loss": 2.5508, "step": 328790 }, { "epoch": 0.6550427132474819, "grad_norm": 0.14877642691135406, "learning_rate": 0.002, "loss": 2.5557, "step": 328800 }, { "epoch": 0.6550626354711208, "grad_norm": 0.19144389033317566, "learning_rate": 0.002, "loss": 2.5331, "step": 328810 }, { "epoch": 0.6550825576947596, "grad_norm": 0.15933863818645477, "learning_rate": 0.002, "loss": 2.5326, "step": 328820 }, { "epoch": 0.6551024799183985, "grad_norm": 0.17909985780715942, "learning_rate": 0.002, "loss": 2.5664, "step": 328830 }, { "epoch": 0.6551224021420375, "grad_norm": 0.19649703800678253, "learning_rate": 0.002, "loss": 2.5726, "step": 328840 }, { "epoch": 0.6551423243656764, "grad_norm": 0.16243746876716614, "learning_rate": 0.002, "loss": 2.5576, "step": 328850 }, { "epoch": 0.6551622465893153, "grad_norm": 0.1999644786119461, "learning_rate": 0.002, "loss": 2.5587, "step": 328860 }, { "epoch": 0.6551821688129542, "grad_norm": 0.14525802433490753, "learning_rate": 0.002, "loss": 2.5586, "step": 328870 }, { "epoch": 0.6552020910365931, "grad_norm": 0.14947082102298737, "learning_rate": 0.002, "loss": 2.5589, "step": 328880 }, { "epoch": 0.6552220132602321, "grad_norm": 0.1844588667154312, "learning_rate": 0.002, "loss": 2.5601, "step": 328890 }, { "epoch": 0.655241935483871, "grad_norm": 0.1688719093799591, "learning_rate": 0.002, "loss": 2.5595, "step": 328900 }, { "epoch": 0.6552618577075099, "grad_norm": 0.1639818698167801, "learning_rate": 0.002, "loss": 2.536, "step": 328910 }, { "epoch": 0.6552817799311488, "grad_norm": 0.14177517592906952, "learning_rate": 0.002, "loss": 2.5482, "step": 328920 }, { "epoch": 0.6553017021547877, "grad_norm": 0.1709308922290802, "learning_rate": 0.002, "loss": 2.5458, "step": 328930 }, { "epoch": 0.6553216243784267, "grad_norm": 0.17543640732765198, "learning_rate": 0.002, "loss": 2.5546, "step": 328940 }, { "epoch": 0.6553415466020656, "grad_norm": 0.1530771106481552, "learning_rate": 0.002, "loss": 2.5462, "step": 328950 }, { "epoch": 0.6553614688257045, "grad_norm": 0.19486786425113678, "learning_rate": 0.002, "loss": 2.5422, "step": 328960 }, { "epoch": 0.6553813910493433, "grad_norm": 0.20498184859752655, "learning_rate": 0.002, "loss": 2.5529, "step": 328970 }, { "epoch": 0.6554013132729822, "grad_norm": 0.14213398098945618, "learning_rate": 0.002, "loss": 2.545, "step": 328980 }, { "epoch": 0.6554212354966212, "grad_norm": 0.15284843742847443, "learning_rate": 0.002, "loss": 2.566, "step": 328990 }, { "epoch": 0.6554411577202601, "grad_norm": 0.16865915060043335, "learning_rate": 0.002, "loss": 2.5502, "step": 329000 }, { "epoch": 0.655461079943899, "grad_norm": 0.1484140008687973, "learning_rate": 0.002, "loss": 2.5671, "step": 329010 }, { "epoch": 0.6554810021675379, "grad_norm": 0.16537606716156006, "learning_rate": 0.002, "loss": 2.5607, "step": 329020 }, { "epoch": 0.6555009243911768, "grad_norm": 0.18707987666130066, "learning_rate": 0.002, "loss": 2.5496, "step": 329030 }, { "epoch": 0.6555208466148158, "grad_norm": 0.1471916288137436, "learning_rate": 0.002, "loss": 2.5536, "step": 329040 }, { "epoch": 0.6555407688384547, "grad_norm": 0.18833360075950623, "learning_rate": 0.002, "loss": 2.5497, "step": 329050 }, { "epoch": 0.6555606910620936, "grad_norm": 0.2010933756828308, "learning_rate": 0.002, "loss": 2.5608, "step": 329060 }, { "epoch": 0.6555806132857325, "grad_norm": 0.1659422516822815, "learning_rate": 0.002, "loss": 2.5459, "step": 329070 }, { "epoch": 0.6556005355093714, "grad_norm": 0.18669043481349945, "learning_rate": 0.002, "loss": 2.5556, "step": 329080 }, { "epoch": 0.6556204577330104, "grad_norm": 0.16565299034118652, "learning_rate": 0.002, "loss": 2.5483, "step": 329090 }, { "epoch": 0.6556403799566493, "grad_norm": 0.1763719767332077, "learning_rate": 0.002, "loss": 2.5538, "step": 329100 }, { "epoch": 0.6556603021802881, "grad_norm": 0.1674288958311081, "learning_rate": 0.002, "loss": 2.5598, "step": 329110 }, { "epoch": 0.655680224403927, "grad_norm": 0.15711936354637146, "learning_rate": 0.002, "loss": 2.5584, "step": 329120 }, { "epoch": 0.6557001466275659, "grad_norm": 0.15512068569660187, "learning_rate": 0.002, "loss": 2.5461, "step": 329130 }, { "epoch": 0.6557200688512049, "grad_norm": 0.24295642971992493, "learning_rate": 0.002, "loss": 2.5603, "step": 329140 }, { "epoch": 0.6557399910748438, "grad_norm": 0.14954794943332672, "learning_rate": 0.002, "loss": 2.5462, "step": 329150 }, { "epoch": 0.6557599132984827, "grad_norm": 0.16824287176132202, "learning_rate": 0.002, "loss": 2.5492, "step": 329160 }, { "epoch": 0.6557798355221216, "grad_norm": 0.1840675175189972, "learning_rate": 0.002, "loss": 2.5441, "step": 329170 }, { "epoch": 0.6557997577457606, "grad_norm": 0.1577882319688797, "learning_rate": 0.002, "loss": 2.5438, "step": 329180 }, { "epoch": 0.6558196799693995, "grad_norm": 0.16827493906021118, "learning_rate": 0.002, "loss": 2.5558, "step": 329190 }, { "epoch": 0.6558396021930384, "grad_norm": 0.1445016860961914, "learning_rate": 0.002, "loss": 2.5607, "step": 329200 }, { "epoch": 0.6558595244166773, "grad_norm": 0.15227140486240387, "learning_rate": 0.002, "loss": 2.5535, "step": 329210 }, { "epoch": 0.6558794466403162, "grad_norm": 0.13667656481266022, "learning_rate": 0.002, "loss": 2.5406, "step": 329220 }, { "epoch": 0.6558993688639552, "grad_norm": 0.17547602951526642, "learning_rate": 0.002, "loss": 2.5565, "step": 329230 }, { "epoch": 0.6559192910875941, "grad_norm": 0.18042780458927155, "learning_rate": 0.002, "loss": 2.5348, "step": 329240 }, { "epoch": 0.655939213311233, "grad_norm": 0.16960881650447845, "learning_rate": 0.002, "loss": 2.5695, "step": 329250 }, { "epoch": 0.6559591355348718, "grad_norm": 0.16332635283470154, "learning_rate": 0.002, "loss": 2.5701, "step": 329260 }, { "epoch": 0.6559790577585107, "grad_norm": 0.16497349739074707, "learning_rate": 0.002, "loss": 2.5412, "step": 329270 }, { "epoch": 0.6559989799821497, "grad_norm": 0.14995962381362915, "learning_rate": 0.002, "loss": 2.5487, "step": 329280 }, { "epoch": 0.6560189022057886, "grad_norm": 0.16404664516448975, "learning_rate": 0.002, "loss": 2.555, "step": 329290 }, { "epoch": 0.6560388244294275, "grad_norm": 0.18644866347312927, "learning_rate": 0.002, "loss": 2.5441, "step": 329300 }, { "epoch": 0.6560587466530664, "grad_norm": 0.1347980946302414, "learning_rate": 0.002, "loss": 2.5631, "step": 329310 }, { "epoch": 0.6560786688767053, "grad_norm": 0.1480741798877716, "learning_rate": 0.002, "loss": 2.5507, "step": 329320 }, { "epoch": 0.6560985911003443, "grad_norm": 0.19935370981693268, "learning_rate": 0.002, "loss": 2.5606, "step": 329330 }, { "epoch": 0.6561185133239832, "grad_norm": 0.18997062742710114, "learning_rate": 0.002, "loss": 2.5709, "step": 329340 }, { "epoch": 0.6561384355476221, "grad_norm": 0.16157205402851105, "learning_rate": 0.002, "loss": 2.5679, "step": 329350 }, { "epoch": 0.656158357771261, "grad_norm": 0.1680838167667389, "learning_rate": 0.002, "loss": 2.5531, "step": 329360 }, { "epoch": 0.6561782799948999, "grad_norm": 0.14601120352745056, "learning_rate": 0.002, "loss": 2.5362, "step": 329370 }, { "epoch": 0.6561982022185389, "grad_norm": 0.21220283210277557, "learning_rate": 0.002, "loss": 2.5564, "step": 329380 }, { "epoch": 0.6562181244421778, "grad_norm": 0.16955971717834473, "learning_rate": 0.002, "loss": 2.5582, "step": 329390 }, { "epoch": 0.6562380466658166, "grad_norm": 0.15099520981311798, "learning_rate": 0.002, "loss": 2.5652, "step": 329400 }, { "epoch": 0.6562579688894555, "grad_norm": 0.19727332890033722, "learning_rate": 0.002, "loss": 2.5467, "step": 329410 }, { "epoch": 0.6562778911130944, "grad_norm": 0.13869887590408325, "learning_rate": 0.002, "loss": 2.5553, "step": 329420 }, { "epoch": 0.6562978133367334, "grad_norm": 0.17149341106414795, "learning_rate": 0.002, "loss": 2.546, "step": 329430 }, { "epoch": 0.6563177355603723, "grad_norm": 0.17586609721183777, "learning_rate": 0.002, "loss": 2.5471, "step": 329440 }, { "epoch": 0.6563376577840112, "grad_norm": 0.17142535746097565, "learning_rate": 0.002, "loss": 2.5567, "step": 329450 }, { "epoch": 0.6563575800076501, "grad_norm": 0.1390923261642456, "learning_rate": 0.002, "loss": 2.5584, "step": 329460 }, { "epoch": 0.6563775022312891, "grad_norm": 0.17879098653793335, "learning_rate": 0.002, "loss": 2.5677, "step": 329470 }, { "epoch": 0.656397424454928, "grad_norm": 0.16019701957702637, "learning_rate": 0.002, "loss": 2.5672, "step": 329480 }, { "epoch": 0.6564173466785669, "grad_norm": 0.1891467422246933, "learning_rate": 0.002, "loss": 2.5507, "step": 329490 }, { "epoch": 0.6564372689022058, "grad_norm": 0.14497269690036774, "learning_rate": 0.002, "loss": 2.55, "step": 329500 }, { "epoch": 0.6564571911258447, "grad_norm": 0.15895245969295502, "learning_rate": 0.002, "loss": 2.547, "step": 329510 }, { "epoch": 0.6564771133494837, "grad_norm": 0.15372948348522186, "learning_rate": 0.002, "loss": 2.546, "step": 329520 }, { "epoch": 0.6564970355731226, "grad_norm": 0.1928480565547943, "learning_rate": 0.002, "loss": 2.5758, "step": 329530 }, { "epoch": 0.6565169577967614, "grad_norm": 0.184108704328537, "learning_rate": 0.002, "loss": 2.5597, "step": 329540 }, { "epoch": 0.6565368800204003, "grad_norm": 0.15633660554885864, "learning_rate": 0.002, "loss": 2.544, "step": 329550 }, { "epoch": 0.6565568022440392, "grad_norm": 0.16576987504959106, "learning_rate": 0.002, "loss": 2.5562, "step": 329560 }, { "epoch": 0.6565767244676782, "grad_norm": 0.16823013126850128, "learning_rate": 0.002, "loss": 2.5695, "step": 329570 }, { "epoch": 0.6565966466913171, "grad_norm": 0.18707282841205597, "learning_rate": 0.002, "loss": 2.5589, "step": 329580 }, { "epoch": 0.656616568914956, "grad_norm": 0.15584397315979004, "learning_rate": 0.002, "loss": 2.5428, "step": 329590 }, { "epoch": 0.6566364911385949, "grad_norm": 0.17394056916236877, "learning_rate": 0.002, "loss": 2.5499, "step": 329600 }, { "epoch": 0.6566564133622338, "grad_norm": 0.15888993442058563, "learning_rate": 0.002, "loss": 2.5653, "step": 329610 }, { "epoch": 0.6566763355858728, "grad_norm": 0.17567454278469086, "learning_rate": 0.002, "loss": 2.5672, "step": 329620 }, { "epoch": 0.6566962578095117, "grad_norm": 0.16235145926475525, "learning_rate": 0.002, "loss": 2.5684, "step": 329630 }, { "epoch": 0.6567161800331506, "grad_norm": 0.16867280006408691, "learning_rate": 0.002, "loss": 2.5554, "step": 329640 }, { "epoch": 0.6567361022567895, "grad_norm": 0.14687326550483704, "learning_rate": 0.002, "loss": 2.5508, "step": 329650 }, { "epoch": 0.6567560244804284, "grad_norm": 0.1628476083278656, "learning_rate": 0.002, "loss": 2.552, "step": 329660 }, { "epoch": 0.6567759467040674, "grad_norm": 0.19079792499542236, "learning_rate": 0.002, "loss": 2.5673, "step": 329670 }, { "epoch": 0.6567958689277063, "grad_norm": 0.15974581241607666, "learning_rate": 0.002, "loss": 2.5399, "step": 329680 }, { "epoch": 0.6568157911513451, "grad_norm": 0.1879596710205078, "learning_rate": 0.002, "loss": 2.5531, "step": 329690 }, { "epoch": 0.656835713374984, "grad_norm": 0.1483461856842041, "learning_rate": 0.002, "loss": 2.5707, "step": 329700 }, { "epoch": 0.6568556355986229, "grad_norm": 0.18090082705020905, "learning_rate": 0.002, "loss": 2.5496, "step": 329710 }, { "epoch": 0.6568755578222619, "grad_norm": 0.1790027916431427, "learning_rate": 0.002, "loss": 2.5556, "step": 329720 }, { "epoch": 0.6568954800459008, "grad_norm": 0.15561343729496002, "learning_rate": 0.002, "loss": 2.5713, "step": 329730 }, { "epoch": 0.6569154022695397, "grad_norm": 0.17169342935085297, "learning_rate": 0.002, "loss": 2.5516, "step": 329740 }, { "epoch": 0.6569353244931786, "grad_norm": 0.18003617227077484, "learning_rate": 0.002, "loss": 2.5393, "step": 329750 }, { "epoch": 0.6569552467168176, "grad_norm": 0.16850657761096954, "learning_rate": 0.002, "loss": 2.5557, "step": 329760 }, { "epoch": 0.6569751689404565, "grad_norm": 0.17462106049060822, "learning_rate": 0.002, "loss": 2.547, "step": 329770 }, { "epoch": 0.6569950911640954, "grad_norm": 0.1328667402267456, "learning_rate": 0.002, "loss": 2.5657, "step": 329780 }, { "epoch": 0.6570150133877343, "grad_norm": 0.16649405658245087, "learning_rate": 0.002, "loss": 2.5614, "step": 329790 }, { "epoch": 0.6570349356113732, "grad_norm": 0.1715918332338333, "learning_rate": 0.002, "loss": 2.5635, "step": 329800 }, { "epoch": 0.6570548578350122, "grad_norm": 0.15186062455177307, "learning_rate": 0.002, "loss": 2.5648, "step": 329810 }, { "epoch": 0.657074780058651, "grad_norm": 0.1732073724269867, "learning_rate": 0.002, "loss": 2.5578, "step": 329820 }, { "epoch": 0.6570947022822899, "grad_norm": 0.17506982386112213, "learning_rate": 0.002, "loss": 2.5346, "step": 329830 }, { "epoch": 0.6571146245059288, "grad_norm": 0.2010197788476944, "learning_rate": 0.002, "loss": 2.5837, "step": 329840 }, { "epoch": 0.6571345467295677, "grad_norm": 0.15661916136741638, "learning_rate": 0.002, "loss": 2.5564, "step": 329850 }, { "epoch": 0.6571544689532067, "grad_norm": 0.1552806794643402, "learning_rate": 0.002, "loss": 2.5637, "step": 329860 }, { "epoch": 0.6571743911768456, "grad_norm": 0.13720190525054932, "learning_rate": 0.002, "loss": 2.5518, "step": 329870 }, { "epoch": 0.6571943134004845, "grad_norm": 0.1918007880449295, "learning_rate": 0.002, "loss": 2.5542, "step": 329880 }, { "epoch": 0.6572142356241234, "grad_norm": 0.15681536495685577, "learning_rate": 0.002, "loss": 2.557, "step": 329890 }, { "epoch": 0.6572341578477623, "grad_norm": 0.149499773979187, "learning_rate": 0.002, "loss": 2.5676, "step": 329900 }, { "epoch": 0.6572540800714013, "grad_norm": 0.15307271480560303, "learning_rate": 0.002, "loss": 2.5542, "step": 329910 }, { "epoch": 0.6572740022950402, "grad_norm": 0.16321395337581635, "learning_rate": 0.002, "loss": 2.5495, "step": 329920 }, { "epoch": 0.6572939245186791, "grad_norm": 0.1752544343471527, "learning_rate": 0.002, "loss": 2.549, "step": 329930 }, { "epoch": 0.657313846742318, "grad_norm": 0.16896554827690125, "learning_rate": 0.002, "loss": 2.5525, "step": 329940 }, { "epoch": 0.6573337689659569, "grad_norm": 0.1699094921350479, "learning_rate": 0.002, "loss": 2.5512, "step": 329950 }, { "epoch": 0.6573536911895959, "grad_norm": 0.15948636829853058, "learning_rate": 0.002, "loss": 2.5546, "step": 329960 }, { "epoch": 0.6573736134132347, "grad_norm": 0.18303006887435913, "learning_rate": 0.002, "loss": 2.5675, "step": 329970 }, { "epoch": 0.6573935356368736, "grad_norm": 0.16041335463523865, "learning_rate": 0.002, "loss": 2.5771, "step": 329980 }, { "epoch": 0.6574134578605125, "grad_norm": 0.18961653113365173, "learning_rate": 0.002, "loss": 2.5527, "step": 329990 }, { "epoch": 0.6574333800841514, "grad_norm": 0.1413533240556717, "learning_rate": 0.002, "loss": 2.5618, "step": 330000 }, { "epoch": 0.6574533023077904, "grad_norm": 0.19575081765651703, "learning_rate": 0.002, "loss": 2.5624, "step": 330010 }, { "epoch": 0.6574732245314293, "grad_norm": 0.15388253331184387, "learning_rate": 0.002, "loss": 2.5496, "step": 330020 }, { "epoch": 0.6574931467550682, "grad_norm": 0.15190111100673676, "learning_rate": 0.002, "loss": 2.5611, "step": 330030 }, { "epoch": 0.6575130689787071, "grad_norm": 0.17904123663902283, "learning_rate": 0.002, "loss": 2.5663, "step": 330040 }, { "epoch": 0.6575329912023461, "grad_norm": 0.16392067074775696, "learning_rate": 0.002, "loss": 2.5493, "step": 330050 }, { "epoch": 0.657552913425985, "grad_norm": 0.16628269851207733, "learning_rate": 0.002, "loss": 2.5459, "step": 330060 }, { "epoch": 0.6575728356496239, "grad_norm": 0.14941972494125366, "learning_rate": 0.002, "loss": 2.5524, "step": 330070 }, { "epoch": 0.6575927578732628, "grad_norm": 0.1780238300561905, "learning_rate": 0.002, "loss": 2.5649, "step": 330080 }, { "epoch": 0.6576126800969017, "grad_norm": 0.17417024075984955, "learning_rate": 0.002, "loss": 2.5558, "step": 330090 }, { "epoch": 0.6576326023205407, "grad_norm": 0.16649533808231354, "learning_rate": 0.002, "loss": 2.5483, "step": 330100 }, { "epoch": 0.6576525245441796, "grad_norm": 0.176791250705719, "learning_rate": 0.002, "loss": 2.5565, "step": 330110 }, { "epoch": 0.6576724467678184, "grad_norm": 0.15973706543445587, "learning_rate": 0.002, "loss": 2.5536, "step": 330120 }, { "epoch": 0.6576923689914573, "grad_norm": 0.1950971633195877, "learning_rate": 0.002, "loss": 2.5717, "step": 330130 }, { "epoch": 0.6577122912150962, "grad_norm": 0.155987948179245, "learning_rate": 0.002, "loss": 2.5433, "step": 330140 }, { "epoch": 0.6577322134387352, "grad_norm": 0.16156645119190216, "learning_rate": 0.002, "loss": 2.5464, "step": 330150 }, { "epoch": 0.6577521356623741, "grad_norm": 0.1715511530637741, "learning_rate": 0.002, "loss": 2.5624, "step": 330160 }, { "epoch": 0.657772057886013, "grad_norm": 0.15570656955242157, "learning_rate": 0.002, "loss": 2.5567, "step": 330170 }, { "epoch": 0.6577919801096519, "grad_norm": 0.17035973072052002, "learning_rate": 0.002, "loss": 2.5631, "step": 330180 }, { "epoch": 0.6578119023332908, "grad_norm": 0.13316433131694794, "learning_rate": 0.002, "loss": 2.5427, "step": 330190 }, { "epoch": 0.6578318245569298, "grad_norm": 0.17663690447807312, "learning_rate": 0.002, "loss": 2.547, "step": 330200 }, { "epoch": 0.6578517467805687, "grad_norm": 0.19753870368003845, "learning_rate": 0.002, "loss": 2.5506, "step": 330210 }, { "epoch": 0.6578716690042076, "grad_norm": 0.16515235602855682, "learning_rate": 0.002, "loss": 2.548, "step": 330220 }, { "epoch": 0.6578915912278465, "grad_norm": 0.17036858201026917, "learning_rate": 0.002, "loss": 2.5327, "step": 330230 }, { "epoch": 0.6579115134514854, "grad_norm": 0.13525962829589844, "learning_rate": 0.002, "loss": 2.5451, "step": 330240 }, { "epoch": 0.6579314356751244, "grad_norm": 0.25186917185783386, "learning_rate": 0.002, "loss": 2.5541, "step": 330250 }, { "epoch": 0.6579513578987632, "grad_norm": 0.1745641678571701, "learning_rate": 0.002, "loss": 2.5394, "step": 330260 }, { "epoch": 0.6579712801224021, "grad_norm": 0.14920389652252197, "learning_rate": 0.002, "loss": 2.563, "step": 330270 }, { "epoch": 0.657991202346041, "grad_norm": 0.15327097475528717, "learning_rate": 0.002, "loss": 2.56, "step": 330280 }, { "epoch": 0.6580111245696799, "grad_norm": 0.19444917142391205, "learning_rate": 0.002, "loss": 2.5401, "step": 330290 }, { "epoch": 0.6580310467933189, "grad_norm": 0.16129088401794434, "learning_rate": 0.002, "loss": 2.5659, "step": 330300 }, { "epoch": 0.6580509690169578, "grad_norm": 0.1648930162191391, "learning_rate": 0.002, "loss": 2.5633, "step": 330310 }, { "epoch": 0.6580708912405967, "grad_norm": 0.13430842757225037, "learning_rate": 0.002, "loss": 2.5591, "step": 330320 }, { "epoch": 0.6580908134642356, "grad_norm": 0.17153231799602509, "learning_rate": 0.002, "loss": 2.5621, "step": 330330 }, { "epoch": 0.6581107356878746, "grad_norm": 0.15464510023593903, "learning_rate": 0.002, "loss": 2.5631, "step": 330340 }, { "epoch": 0.6581306579115135, "grad_norm": 0.17069046199321747, "learning_rate": 0.002, "loss": 2.5512, "step": 330350 }, { "epoch": 0.6581505801351524, "grad_norm": 0.1309542953968048, "learning_rate": 0.002, "loss": 2.561, "step": 330360 }, { "epoch": 0.6581705023587913, "grad_norm": 0.16138985753059387, "learning_rate": 0.002, "loss": 2.5536, "step": 330370 }, { "epoch": 0.6581904245824302, "grad_norm": 0.17804023623466492, "learning_rate": 0.002, "loss": 2.5556, "step": 330380 }, { "epoch": 0.6582103468060692, "grad_norm": 0.18251459300518036, "learning_rate": 0.002, "loss": 2.553, "step": 330390 }, { "epoch": 0.658230269029708, "grad_norm": 0.1555980145931244, "learning_rate": 0.002, "loss": 2.5543, "step": 330400 }, { "epoch": 0.6582501912533469, "grad_norm": 0.15428878366947174, "learning_rate": 0.002, "loss": 2.5384, "step": 330410 }, { "epoch": 0.6582701134769858, "grad_norm": 0.17663715779781342, "learning_rate": 0.002, "loss": 2.5519, "step": 330420 }, { "epoch": 0.6582900357006247, "grad_norm": 0.16976962983608246, "learning_rate": 0.002, "loss": 2.558, "step": 330430 }, { "epoch": 0.6583099579242637, "grad_norm": 0.17218096554279327, "learning_rate": 0.002, "loss": 2.5355, "step": 330440 }, { "epoch": 0.6583298801479026, "grad_norm": 0.16714437305927277, "learning_rate": 0.002, "loss": 2.5525, "step": 330450 }, { "epoch": 0.6583498023715415, "grad_norm": 0.1598675698041916, "learning_rate": 0.002, "loss": 2.5594, "step": 330460 }, { "epoch": 0.6583697245951804, "grad_norm": 0.16125290095806122, "learning_rate": 0.002, "loss": 2.5531, "step": 330470 }, { "epoch": 0.6583896468188193, "grad_norm": 0.1742393523454666, "learning_rate": 0.002, "loss": 2.5645, "step": 330480 }, { "epoch": 0.6584095690424583, "grad_norm": 0.17161844670772552, "learning_rate": 0.002, "loss": 2.5591, "step": 330490 }, { "epoch": 0.6584294912660972, "grad_norm": 0.18637420237064362, "learning_rate": 0.002, "loss": 2.5551, "step": 330500 }, { "epoch": 0.6584494134897361, "grad_norm": 0.1528337150812149, "learning_rate": 0.002, "loss": 2.5502, "step": 330510 }, { "epoch": 0.658469335713375, "grad_norm": 0.17219313979148865, "learning_rate": 0.002, "loss": 2.5499, "step": 330520 }, { "epoch": 0.6584892579370138, "grad_norm": 0.16335856914520264, "learning_rate": 0.002, "loss": 2.5463, "step": 330530 }, { "epoch": 0.6585091801606529, "grad_norm": 0.1536124050617218, "learning_rate": 0.002, "loss": 2.5705, "step": 330540 }, { "epoch": 0.6585291023842917, "grad_norm": 0.15039102733135223, "learning_rate": 0.002, "loss": 2.5608, "step": 330550 }, { "epoch": 0.6585490246079306, "grad_norm": 0.21060696244239807, "learning_rate": 0.002, "loss": 2.5502, "step": 330560 }, { "epoch": 0.6585689468315695, "grad_norm": 0.17105264961719513, "learning_rate": 0.002, "loss": 2.5551, "step": 330570 }, { "epoch": 0.6585888690552084, "grad_norm": 0.15417039394378662, "learning_rate": 0.002, "loss": 2.5431, "step": 330580 }, { "epoch": 0.6586087912788474, "grad_norm": 0.17250527441501617, "learning_rate": 0.002, "loss": 2.562, "step": 330590 }, { "epoch": 0.6586287135024863, "grad_norm": 0.1646117866039276, "learning_rate": 0.002, "loss": 2.5526, "step": 330600 }, { "epoch": 0.6586486357261252, "grad_norm": 0.16843858361244202, "learning_rate": 0.002, "loss": 2.5616, "step": 330610 }, { "epoch": 0.6586685579497641, "grad_norm": 0.15900947153568268, "learning_rate": 0.002, "loss": 2.5598, "step": 330620 }, { "epoch": 0.658688480173403, "grad_norm": 0.1511613130569458, "learning_rate": 0.002, "loss": 2.5564, "step": 330630 }, { "epoch": 0.658708402397042, "grad_norm": 0.15938881039619446, "learning_rate": 0.002, "loss": 2.546, "step": 330640 }, { "epoch": 0.6587283246206809, "grad_norm": 0.17320790886878967, "learning_rate": 0.002, "loss": 2.5487, "step": 330650 }, { "epoch": 0.6587482468443198, "grad_norm": 0.21981334686279297, "learning_rate": 0.002, "loss": 2.5666, "step": 330660 }, { "epoch": 0.6587681690679587, "grad_norm": 0.15622578561306, "learning_rate": 0.002, "loss": 2.5489, "step": 330670 }, { "epoch": 0.6587880912915977, "grad_norm": 0.14745838940143585, "learning_rate": 0.002, "loss": 2.5639, "step": 330680 }, { "epoch": 0.6588080135152365, "grad_norm": 0.17623580992221832, "learning_rate": 0.002, "loss": 2.5546, "step": 330690 }, { "epoch": 0.6588279357388754, "grad_norm": 0.1975640058517456, "learning_rate": 0.002, "loss": 2.5557, "step": 330700 }, { "epoch": 0.6588478579625143, "grad_norm": 0.1671621948480606, "learning_rate": 0.002, "loss": 2.5537, "step": 330710 }, { "epoch": 0.6588677801861532, "grad_norm": 0.16470354795455933, "learning_rate": 0.002, "loss": 2.5652, "step": 330720 }, { "epoch": 0.6588877024097922, "grad_norm": 0.1659911572933197, "learning_rate": 0.002, "loss": 2.5534, "step": 330730 }, { "epoch": 0.6589076246334311, "grad_norm": 0.20398922264575958, "learning_rate": 0.002, "loss": 2.5685, "step": 330740 }, { "epoch": 0.65892754685707, "grad_norm": 0.15943261981010437, "learning_rate": 0.002, "loss": 2.5584, "step": 330750 }, { "epoch": 0.6589474690807089, "grad_norm": 0.15137682855129242, "learning_rate": 0.002, "loss": 2.5542, "step": 330760 }, { "epoch": 0.6589673913043478, "grad_norm": 0.16833190619945526, "learning_rate": 0.002, "loss": 2.5555, "step": 330770 }, { "epoch": 0.6589873135279868, "grad_norm": 0.15955153107643127, "learning_rate": 0.002, "loss": 2.5381, "step": 330780 }, { "epoch": 0.6590072357516257, "grad_norm": 0.16583369672298431, "learning_rate": 0.002, "loss": 2.5699, "step": 330790 }, { "epoch": 0.6590271579752646, "grad_norm": 0.14909645915031433, "learning_rate": 0.002, "loss": 2.5504, "step": 330800 }, { "epoch": 0.6590470801989035, "grad_norm": 0.16843917965888977, "learning_rate": 0.002, "loss": 2.5548, "step": 330810 }, { "epoch": 0.6590670024225423, "grad_norm": 0.14599202573299408, "learning_rate": 0.002, "loss": 2.5386, "step": 330820 }, { "epoch": 0.6590869246461813, "grad_norm": 0.20747637748718262, "learning_rate": 0.002, "loss": 2.5484, "step": 330830 }, { "epoch": 0.6591068468698202, "grad_norm": 0.1605871021747589, "learning_rate": 0.002, "loss": 2.5539, "step": 330840 }, { "epoch": 0.6591267690934591, "grad_norm": 0.17966312170028687, "learning_rate": 0.002, "loss": 2.5514, "step": 330850 }, { "epoch": 0.659146691317098, "grad_norm": 0.15863071382045746, "learning_rate": 0.002, "loss": 2.5658, "step": 330860 }, { "epoch": 0.6591666135407369, "grad_norm": 0.23603074252605438, "learning_rate": 0.002, "loss": 2.5591, "step": 330870 }, { "epoch": 0.6591865357643759, "grad_norm": 0.15866713225841522, "learning_rate": 0.002, "loss": 2.5599, "step": 330880 }, { "epoch": 0.6592064579880148, "grad_norm": 0.13802315294742584, "learning_rate": 0.002, "loss": 2.5658, "step": 330890 }, { "epoch": 0.6592263802116537, "grad_norm": 0.15883967280387878, "learning_rate": 0.002, "loss": 2.5654, "step": 330900 }, { "epoch": 0.6592463024352926, "grad_norm": 0.1712357997894287, "learning_rate": 0.002, "loss": 2.5359, "step": 330910 }, { "epoch": 0.6592662246589315, "grad_norm": 0.146736279129982, "learning_rate": 0.002, "loss": 2.547, "step": 330920 }, { "epoch": 0.6592861468825705, "grad_norm": 0.1491510570049286, "learning_rate": 0.002, "loss": 2.5486, "step": 330930 }, { "epoch": 0.6593060691062094, "grad_norm": 0.19920359551906586, "learning_rate": 0.002, "loss": 2.5439, "step": 330940 }, { "epoch": 0.6593259913298483, "grad_norm": 0.16881829500198364, "learning_rate": 0.002, "loss": 2.5517, "step": 330950 }, { "epoch": 0.6593459135534872, "grad_norm": 0.15416938066482544, "learning_rate": 0.002, "loss": 2.545, "step": 330960 }, { "epoch": 0.6593658357771262, "grad_norm": 0.17891384661197662, "learning_rate": 0.002, "loss": 2.549, "step": 330970 }, { "epoch": 0.659385758000765, "grad_norm": 0.15480178594589233, "learning_rate": 0.002, "loss": 2.5712, "step": 330980 }, { "epoch": 0.6594056802244039, "grad_norm": 0.17065642774105072, "learning_rate": 0.002, "loss": 2.5506, "step": 330990 }, { "epoch": 0.6594256024480428, "grad_norm": 0.1757195144891739, "learning_rate": 0.002, "loss": 2.5503, "step": 331000 }, { "epoch": 0.6594455246716817, "grad_norm": 0.14509886503219604, "learning_rate": 0.002, "loss": 2.5432, "step": 331010 }, { "epoch": 0.6594654468953207, "grad_norm": 0.15925005078315735, "learning_rate": 0.002, "loss": 2.5717, "step": 331020 }, { "epoch": 0.6594853691189596, "grad_norm": 0.22177055478096008, "learning_rate": 0.002, "loss": 2.5467, "step": 331030 }, { "epoch": 0.6595052913425985, "grad_norm": 0.169706791639328, "learning_rate": 0.002, "loss": 2.5614, "step": 331040 }, { "epoch": 0.6595252135662374, "grad_norm": 0.18435795605182648, "learning_rate": 0.002, "loss": 2.5519, "step": 331050 }, { "epoch": 0.6595451357898763, "grad_norm": 0.15501916408538818, "learning_rate": 0.002, "loss": 2.5688, "step": 331060 }, { "epoch": 0.6595650580135153, "grad_norm": 0.179579496383667, "learning_rate": 0.002, "loss": 2.553, "step": 331070 }, { "epoch": 0.6595849802371542, "grad_norm": 0.16119812428951263, "learning_rate": 0.002, "loss": 2.5627, "step": 331080 }, { "epoch": 0.6596049024607931, "grad_norm": 0.17580646276474, "learning_rate": 0.002, "loss": 2.5601, "step": 331090 }, { "epoch": 0.659624824684432, "grad_norm": 0.1579640954732895, "learning_rate": 0.002, "loss": 2.548, "step": 331100 }, { "epoch": 0.6596447469080708, "grad_norm": 0.15785446763038635, "learning_rate": 0.002, "loss": 2.5443, "step": 331110 }, { "epoch": 0.6596646691317098, "grad_norm": 0.164139524102211, "learning_rate": 0.002, "loss": 2.5614, "step": 331120 }, { "epoch": 0.6596845913553487, "grad_norm": 0.17661520838737488, "learning_rate": 0.002, "loss": 2.5493, "step": 331130 }, { "epoch": 0.6597045135789876, "grad_norm": 0.13576146960258484, "learning_rate": 0.002, "loss": 2.5612, "step": 331140 }, { "epoch": 0.6597244358026265, "grad_norm": 0.13869696855545044, "learning_rate": 0.002, "loss": 2.5504, "step": 331150 }, { "epoch": 0.6597443580262654, "grad_norm": 0.1547650694847107, "learning_rate": 0.002, "loss": 2.5356, "step": 331160 }, { "epoch": 0.6597642802499044, "grad_norm": 0.19163255393505096, "learning_rate": 0.002, "loss": 2.5422, "step": 331170 }, { "epoch": 0.6597842024735433, "grad_norm": 0.15603511035442352, "learning_rate": 0.002, "loss": 2.5599, "step": 331180 }, { "epoch": 0.6598041246971822, "grad_norm": 0.1569872349500656, "learning_rate": 0.002, "loss": 2.5437, "step": 331190 }, { "epoch": 0.6598240469208211, "grad_norm": 0.1748022586107254, "learning_rate": 0.002, "loss": 2.5565, "step": 331200 }, { "epoch": 0.65984396914446, "grad_norm": 0.16212844848632812, "learning_rate": 0.002, "loss": 2.5764, "step": 331210 }, { "epoch": 0.659863891368099, "grad_norm": 0.15946657955646515, "learning_rate": 0.002, "loss": 2.5649, "step": 331220 }, { "epoch": 0.6598838135917379, "grad_norm": 0.15464048087596893, "learning_rate": 0.002, "loss": 2.5481, "step": 331230 }, { "epoch": 0.6599037358153768, "grad_norm": 0.16174906492233276, "learning_rate": 0.002, "loss": 2.5551, "step": 331240 }, { "epoch": 0.6599236580390156, "grad_norm": 0.15707367658615112, "learning_rate": 0.002, "loss": 2.5406, "step": 331250 }, { "epoch": 0.6599435802626546, "grad_norm": 0.15120580792427063, "learning_rate": 0.002, "loss": 2.5461, "step": 331260 }, { "epoch": 0.6599635024862935, "grad_norm": 0.17845527827739716, "learning_rate": 0.002, "loss": 2.561, "step": 331270 }, { "epoch": 0.6599834247099324, "grad_norm": 0.1811748743057251, "learning_rate": 0.002, "loss": 2.5588, "step": 331280 }, { "epoch": 0.6600033469335713, "grad_norm": 0.15240125358104706, "learning_rate": 0.002, "loss": 2.5522, "step": 331290 }, { "epoch": 0.6600232691572102, "grad_norm": 0.2063310444355011, "learning_rate": 0.002, "loss": 2.5593, "step": 331300 }, { "epoch": 0.6600431913808492, "grad_norm": 0.19143635034561157, "learning_rate": 0.002, "loss": 2.576, "step": 331310 }, { "epoch": 0.6600631136044881, "grad_norm": 0.14506298303604126, "learning_rate": 0.002, "loss": 2.5502, "step": 331320 }, { "epoch": 0.660083035828127, "grad_norm": 0.15789258480072021, "learning_rate": 0.002, "loss": 2.5536, "step": 331330 }, { "epoch": 0.6601029580517659, "grad_norm": 0.18034066259860992, "learning_rate": 0.002, "loss": 2.564, "step": 331340 }, { "epoch": 0.6601228802754048, "grad_norm": 0.22059032320976257, "learning_rate": 0.002, "loss": 2.5718, "step": 331350 }, { "epoch": 0.6601428024990438, "grad_norm": 0.1480673849582672, "learning_rate": 0.002, "loss": 2.5685, "step": 331360 }, { "epoch": 0.6601627247226827, "grad_norm": 0.17359673976898193, "learning_rate": 0.002, "loss": 2.5642, "step": 331370 }, { "epoch": 0.6601826469463216, "grad_norm": 0.13718000054359436, "learning_rate": 0.002, "loss": 2.548, "step": 331380 }, { "epoch": 0.6602025691699605, "grad_norm": 0.1962593048810959, "learning_rate": 0.002, "loss": 2.565, "step": 331390 }, { "epoch": 0.6602224913935993, "grad_norm": 0.19353432953357697, "learning_rate": 0.002, "loss": 2.5527, "step": 331400 }, { "epoch": 0.6602424136172383, "grad_norm": 0.18408526480197906, "learning_rate": 0.002, "loss": 2.5465, "step": 331410 }, { "epoch": 0.6602623358408772, "grad_norm": 0.16603624820709229, "learning_rate": 0.002, "loss": 2.5429, "step": 331420 }, { "epoch": 0.6602822580645161, "grad_norm": 0.15725646913051605, "learning_rate": 0.002, "loss": 2.5641, "step": 331430 }, { "epoch": 0.660302180288155, "grad_norm": 0.17661598324775696, "learning_rate": 0.002, "loss": 2.5586, "step": 331440 }, { "epoch": 0.6603221025117939, "grad_norm": 0.16637901961803436, "learning_rate": 0.002, "loss": 2.5549, "step": 331450 }, { "epoch": 0.6603420247354329, "grad_norm": 0.1513851135969162, "learning_rate": 0.002, "loss": 2.5635, "step": 331460 }, { "epoch": 0.6603619469590718, "grad_norm": 0.16122092306613922, "learning_rate": 0.002, "loss": 2.5488, "step": 331470 }, { "epoch": 0.6603818691827107, "grad_norm": 0.15345452725887299, "learning_rate": 0.002, "loss": 2.538, "step": 331480 }, { "epoch": 0.6604017914063496, "grad_norm": 0.16042110323905945, "learning_rate": 0.002, "loss": 2.5625, "step": 331490 }, { "epoch": 0.6604217136299885, "grad_norm": 0.14292630553245544, "learning_rate": 0.002, "loss": 2.5462, "step": 331500 }, { "epoch": 0.6604416358536275, "grad_norm": 0.17594976723194122, "learning_rate": 0.002, "loss": 2.5677, "step": 331510 }, { "epoch": 0.6604615580772664, "grad_norm": 0.1808091104030609, "learning_rate": 0.002, "loss": 2.5437, "step": 331520 }, { "epoch": 0.6604814803009053, "grad_norm": 0.1747875064611435, "learning_rate": 0.002, "loss": 2.5688, "step": 331530 }, { "epoch": 0.6605014025245441, "grad_norm": 0.15238864719867706, "learning_rate": 0.002, "loss": 2.5533, "step": 331540 }, { "epoch": 0.6605213247481831, "grad_norm": 0.15235351026058197, "learning_rate": 0.002, "loss": 2.5497, "step": 331550 }, { "epoch": 0.660541246971822, "grad_norm": 0.13612128794193268, "learning_rate": 0.002, "loss": 2.5517, "step": 331560 }, { "epoch": 0.6605611691954609, "grad_norm": 0.17313218116760254, "learning_rate": 0.002, "loss": 2.5605, "step": 331570 }, { "epoch": 0.6605810914190998, "grad_norm": 0.15934200584888458, "learning_rate": 0.002, "loss": 2.5558, "step": 331580 }, { "epoch": 0.6606010136427387, "grad_norm": 0.194861501455307, "learning_rate": 0.002, "loss": 2.5654, "step": 331590 }, { "epoch": 0.6606209358663777, "grad_norm": 0.21267923712730408, "learning_rate": 0.002, "loss": 2.5537, "step": 331600 }, { "epoch": 0.6606408580900166, "grad_norm": 0.17495493590831757, "learning_rate": 0.002, "loss": 2.5677, "step": 331610 }, { "epoch": 0.6606607803136555, "grad_norm": 0.2058614194393158, "learning_rate": 0.002, "loss": 2.5443, "step": 331620 }, { "epoch": 0.6606807025372944, "grad_norm": 0.15703123807907104, "learning_rate": 0.002, "loss": 2.5541, "step": 331630 }, { "epoch": 0.6607006247609333, "grad_norm": 0.15325021743774414, "learning_rate": 0.002, "loss": 2.5356, "step": 331640 }, { "epoch": 0.6607205469845723, "grad_norm": 0.1458386331796646, "learning_rate": 0.002, "loss": 2.5688, "step": 331650 }, { "epoch": 0.6607404692082112, "grad_norm": 0.20679236948490143, "learning_rate": 0.002, "loss": 2.5497, "step": 331660 }, { "epoch": 0.6607603914318501, "grad_norm": 0.1524050384759903, "learning_rate": 0.002, "loss": 2.5516, "step": 331670 }, { "epoch": 0.660780313655489, "grad_norm": 0.2106601893901825, "learning_rate": 0.002, "loss": 2.5571, "step": 331680 }, { "epoch": 0.6608002358791278, "grad_norm": 0.16598278284072876, "learning_rate": 0.002, "loss": 2.5515, "step": 331690 }, { "epoch": 0.6608201581027668, "grad_norm": 0.18590718507766724, "learning_rate": 0.002, "loss": 2.5545, "step": 331700 }, { "epoch": 0.6608400803264057, "grad_norm": 0.15578356385231018, "learning_rate": 0.002, "loss": 2.5511, "step": 331710 }, { "epoch": 0.6608600025500446, "grad_norm": 0.16907748579978943, "learning_rate": 0.002, "loss": 2.5491, "step": 331720 }, { "epoch": 0.6608799247736835, "grad_norm": 0.18140114843845367, "learning_rate": 0.002, "loss": 2.5597, "step": 331730 }, { "epoch": 0.6608998469973224, "grad_norm": 0.20411090552806854, "learning_rate": 0.002, "loss": 2.569, "step": 331740 }, { "epoch": 0.6609197692209614, "grad_norm": 0.13098172843456268, "learning_rate": 0.002, "loss": 2.5609, "step": 331750 }, { "epoch": 0.6609396914446003, "grad_norm": 0.1692359894514084, "learning_rate": 0.002, "loss": 2.5776, "step": 331760 }, { "epoch": 0.6609596136682392, "grad_norm": 0.18162281811237335, "learning_rate": 0.002, "loss": 2.5582, "step": 331770 }, { "epoch": 0.6609795358918781, "grad_norm": 0.15969552099704742, "learning_rate": 0.002, "loss": 2.5659, "step": 331780 }, { "epoch": 0.660999458115517, "grad_norm": 0.19912566244602203, "learning_rate": 0.002, "loss": 2.555, "step": 331790 }, { "epoch": 0.661019380339156, "grad_norm": 0.15250959992408752, "learning_rate": 0.002, "loss": 2.5589, "step": 331800 }, { "epoch": 0.6610393025627949, "grad_norm": 0.1505664438009262, "learning_rate": 0.002, "loss": 2.5428, "step": 331810 }, { "epoch": 0.6610592247864338, "grad_norm": 0.1713564693927765, "learning_rate": 0.002, "loss": 2.5689, "step": 331820 }, { "epoch": 0.6610791470100726, "grad_norm": 0.18105798959732056, "learning_rate": 0.002, "loss": 2.5379, "step": 331830 }, { "epoch": 0.6610990692337116, "grad_norm": 0.15212886035442352, "learning_rate": 0.002, "loss": 2.5584, "step": 331840 }, { "epoch": 0.6611189914573505, "grad_norm": 0.5511860847473145, "learning_rate": 0.002, "loss": 2.5509, "step": 331850 }, { "epoch": 0.6611389136809894, "grad_norm": 0.18282143771648407, "learning_rate": 0.002, "loss": 2.5657, "step": 331860 }, { "epoch": 0.6611588359046283, "grad_norm": 0.1380741000175476, "learning_rate": 0.002, "loss": 2.5616, "step": 331870 }, { "epoch": 0.6611787581282672, "grad_norm": 0.21160770952701569, "learning_rate": 0.002, "loss": 2.5533, "step": 331880 }, { "epoch": 0.6611986803519062, "grad_norm": 0.1838851124048233, "learning_rate": 0.002, "loss": 2.5587, "step": 331890 }, { "epoch": 0.6612186025755451, "grad_norm": 0.13868539035320282, "learning_rate": 0.002, "loss": 2.5469, "step": 331900 }, { "epoch": 0.661238524799184, "grad_norm": 0.1503055989742279, "learning_rate": 0.002, "loss": 2.5555, "step": 331910 }, { "epoch": 0.6612584470228229, "grad_norm": 0.1602572351694107, "learning_rate": 0.002, "loss": 2.567, "step": 331920 }, { "epoch": 0.6612783692464618, "grad_norm": 0.198478102684021, "learning_rate": 0.002, "loss": 2.5703, "step": 331930 }, { "epoch": 0.6612982914701008, "grad_norm": 0.14738813042640686, "learning_rate": 0.002, "loss": 2.562, "step": 331940 }, { "epoch": 0.6613182136937397, "grad_norm": 0.1645241230726242, "learning_rate": 0.002, "loss": 2.5618, "step": 331950 }, { "epoch": 0.6613381359173786, "grad_norm": 0.14653605222702026, "learning_rate": 0.002, "loss": 2.5684, "step": 331960 }, { "epoch": 0.6613580581410174, "grad_norm": 0.14409604668617249, "learning_rate": 0.002, "loss": 2.5536, "step": 331970 }, { "epoch": 0.6613779803646563, "grad_norm": 0.1715683490037918, "learning_rate": 0.002, "loss": 2.5537, "step": 331980 }, { "epoch": 0.6613979025882953, "grad_norm": 0.1782952845096588, "learning_rate": 0.002, "loss": 2.5511, "step": 331990 }, { "epoch": 0.6614178248119342, "grad_norm": 0.1636781096458435, "learning_rate": 0.002, "loss": 2.5556, "step": 332000 }, { "epoch": 0.6614377470355731, "grad_norm": 0.16720275580883026, "learning_rate": 0.002, "loss": 2.547, "step": 332010 }, { "epoch": 0.661457669259212, "grad_norm": 0.16122332215309143, "learning_rate": 0.002, "loss": 2.5556, "step": 332020 }, { "epoch": 0.6614775914828509, "grad_norm": 0.15190517902374268, "learning_rate": 0.002, "loss": 2.5534, "step": 332030 }, { "epoch": 0.6614975137064899, "grad_norm": 0.16238564252853394, "learning_rate": 0.002, "loss": 2.5512, "step": 332040 }, { "epoch": 0.6615174359301288, "grad_norm": 0.18171468377113342, "learning_rate": 0.002, "loss": 2.5732, "step": 332050 }, { "epoch": 0.6615373581537677, "grad_norm": 0.18863020837306976, "learning_rate": 0.002, "loss": 2.5553, "step": 332060 }, { "epoch": 0.6615572803774066, "grad_norm": 0.13962742686271667, "learning_rate": 0.002, "loss": 2.5669, "step": 332070 }, { "epoch": 0.6615772026010455, "grad_norm": 0.22494690120220184, "learning_rate": 0.002, "loss": 2.5674, "step": 332080 }, { "epoch": 0.6615971248246845, "grad_norm": 0.1494603455066681, "learning_rate": 0.002, "loss": 2.5466, "step": 332090 }, { "epoch": 0.6616170470483234, "grad_norm": 0.17993636429309845, "learning_rate": 0.002, "loss": 2.5398, "step": 332100 }, { "epoch": 0.6616369692719622, "grad_norm": 0.2178785800933838, "learning_rate": 0.002, "loss": 2.5584, "step": 332110 }, { "epoch": 0.6616568914956011, "grad_norm": 0.1304345279932022, "learning_rate": 0.002, "loss": 2.5629, "step": 332120 }, { "epoch": 0.66167681371924, "grad_norm": 0.17457009851932526, "learning_rate": 0.002, "loss": 2.5557, "step": 332130 }, { "epoch": 0.661696735942879, "grad_norm": 0.1452951282262802, "learning_rate": 0.002, "loss": 2.556, "step": 332140 }, { "epoch": 0.6617166581665179, "grad_norm": 0.15008699893951416, "learning_rate": 0.002, "loss": 2.5422, "step": 332150 }, { "epoch": 0.6617365803901568, "grad_norm": 0.15194246172904968, "learning_rate": 0.002, "loss": 2.5602, "step": 332160 }, { "epoch": 0.6617565026137957, "grad_norm": 0.2218978852033615, "learning_rate": 0.002, "loss": 2.5485, "step": 332170 }, { "epoch": 0.6617764248374347, "grad_norm": 0.1900244951248169, "learning_rate": 0.002, "loss": 2.5534, "step": 332180 }, { "epoch": 0.6617963470610736, "grad_norm": 0.15033170580863953, "learning_rate": 0.002, "loss": 2.5531, "step": 332190 }, { "epoch": 0.6618162692847125, "grad_norm": 0.160668283700943, "learning_rate": 0.002, "loss": 2.5516, "step": 332200 }, { "epoch": 0.6618361915083514, "grad_norm": 0.19283007085323334, "learning_rate": 0.002, "loss": 2.5695, "step": 332210 }, { "epoch": 0.6618561137319903, "grad_norm": 0.16479893028736115, "learning_rate": 0.002, "loss": 2.5599, "step": 332220 }, { "epoch": 0.6618760359556293, "grad_norm": 0.15838149189949036, "learning_rate": 0.002, "loss": 2.5679, "step": 332230 }, { "epoch": 0.6618959581792682, "grad_norm": 0.16838613152503967, "learning_rate": 0.002, "loss": 2.5413, "step": 332240 }, { "epoch": 0.661915880402907, "grad_norm": 0.15572701394557953, "learning_rate": 0.002, "loss": 2.5587, "step": 332250 }, { "epoch": 0.6619358026265459, "grad_norm": 0.1734090894460678, "learning_rate": 0.002, "loss": 2.5643, "step": 332260 }, { "epoch": 0.6619557248501848, "grad_norm": 0.17001907527446747, "learning_rate": 0.002, "loss": 2.5507, "step": 332270 }, { "epoch": 0.6619756470738238, "grad_norm": 0.19087524712085724, "learning_rate": 0.002, "loss": 2.5537, "step": 332280 }, { "epoch": 0.6619955692974627, "grad_norm": 0.18425807356834412, "learning_rate": 0.002, "loss": 2.5636, "step": 332290 }, { "epoch": 0.6620154915211016, "grad_norm": 0.16758154332637787, "learning_rate": 0.002, "loss": 2.5488, "step": 332300 }, { "epoch": 0.6620354137447405, "grad_norm": 0.1618431806564331, "learning_rate": 0.002, "loss": 2.5524, "step": 332310 }, { "epoch": 0.6620553359683794, "grad_norm": 0.18734118342399597, "learning_rate": 0.002, "loss": 2.5487, "step": 332320 }, { "epoch": 0.6620752581920184, "grad_norm": 0.1744239181280136, "learning_rate": 0.002, "loss": 2.5608, "step": 332330 }, { "epoch": 0.6620951804156573, "grad_norm": 0.1418701410293579, "learning_rate": 0.002, "loss": 2.5505, "step": 332340 }, { "epoch": 0.6621151026392962, "grad_norm": 0.1742451936006546, "learning_rate": 0.002, "loss": 2.5535, "step": 332350 }, { "epoch": 0.6621350248629351, "grad_norm": 0.16631625592708588, "learning_rate": 0.002, "loss": 2.5566, "step": 332360 }, { "epoch": 0.662154947086574, "grad_norm": 0.172337606549263, "learning_rate": 0.002, "loss": 2.5533, "step": 332370 }, { "epoch": 0.662174869310213, "grad_norm": 0.1603258103132248, "learning_rate": 0.002, "loss": 2.5499, "step": 332380 }, { "epoch": 0.6621947915338519, "grad_norm": 0.15877389907836914, "learning_rate": 0.002, "loss": 2.5519, "step": 332390 }, { "epoch": 0.6622147137574907, "grad_norm": 0.1878526657819748, "learning_rate": 0.002, "loss": 2.5512, "step": 332400 }, { "epoch": 0.6622346359811296, "grad_norm": 0.1559855192899704, "learning_rate": 0.002, "loss": 2.5603, "step": 332410 }, { "epoch": 0.6622545582047685, "grad_norm": 0.18490582704544067, "learning_rate": 0.002, "loss": 2.5487, "step": 332420 }, { "epoch": 0.6622744804284075, "grad_norm": 0.14618848264217377, "learning_rate": 0.002, "loss": 2.5518, "step": 332430 }, { "epoch": 0.6622944026520464, "grad_norm": 0.1586492657661438, "learning_rate": 0.002, "loss": 2.5575, "step": 332440 }, { "epoch": 0.6623143248756853, "grad_norm": 0.16405372321605682, "learning_rate": 0.002, "loss": 2.5591, "step": 332450 }, { "epoch": 0.6623342470993242, "grad_norm": 0.19220101833343506, "learning_rate": 0.002, "loss": 2.5533, "step": 332460 }, { "epoch": 0.6623541693229632, "grad_norm": 0.20563103258609772, "learning_rate": 0.002, "loss": 2.55, "step": 332470 }, { "epoch": 0.6623740915466021, "grad_norm": 0.14812727272510529, "learning_rate": 0.002, "loss": 2.5596, "step": 332480 }, { "epoch": 0.662394013770241, "grad_norm": 0.1708097904920578, "learning_rate": 0.002, "loss": 2.5466, "step": 332490 }, { "epoch": 0.6624139359938799, "grad_norm": 0.1537240892648697, "learning_rate": 0.002, "loss": 2.5491, "step": 332500 }, { "epoch": 0.6624338582175188, "grad_norm": 0.1685309112071991, "learning_rate": 0.002, "loss": 2.5646, "step": 332510 }, { "epoch": 0.6624537804411578, "grad_norm": 0.1513640135526657, "learning_rate": 0.002, "loss": 2.557, "step": 332520 }, { "epoch": 0.6624737026647967, "grad_norm": 0.4206078350543976, "learning_rate": 0.002, "loss": 2.5433, "step": 332530 }, { "epoch": 0.6624936248884356, "grad_norm": 0.18273033201694489, "learning_rate": 0.002, "loss": 2.5612, "step": 332540 }, { "epoch": 0.6625135471120744, "grad_norm": 0.18857897818088531, "learning_rate": 0.002, "loss": 2.5498, "step": 332550 }, { "epoch": 0.6625334693357133, "grad_norm": 0.1359684318304062, "learning_rate": 0.002, "loss": 2.5574, "step": 332560 }, { "epoch": 0.6625533915593523, "grad_norm": 0.16179777681827545, "learning_rate": 0.002, "loss": 2.5635, "step": 332570 }, { "epoch": 0.6625733137829912, "grad_norm": 0.1521255522966385, "learning_rate": 0.002, "loss": 2.564, "step": 332580 }, { "epoch": 0.6625932360066301, "grad_norm": 0.176780566573143, "learning_rate": 0.002, "loss": 2.5502, "step": 332590 }, { "epoch": 0.662613158230269, "grad_norm": 0.14865130186080933, "learning_rate": 0.002, "loss": 2.5513, "step": 332600 }, { "epoch": 0.6626330804539079, "grad_norm": 0.16420936584472656, "learning_rate": 0.002, "loss": 2.5536, "step": 332610 }, { "epoch": 0.6626530026775469, "grad_norm": 0.176494300365448, "learning_rate": 0.002, "loss": 2.5718, "step": 332620 }, { "epoch": 0.6626729249011858, "grad_norm": 0.18244683742523193, "learning_rate": 0.002, "loss": 2.5478, "step": 332630 }, { "epoch": 0.6626928471248247, "grad_norm": 0.15719255805015564, "learning_rate": 0.002, "loss": 2.5414, "step": 332640 }, { "epoch": 0.6627127693484636, "grad_norm": 0.1539926379919052, "learning_rate": 0.002, "loss": 2.5451, "step": 332650 }, { "epoch": 0.6627326915721025, "grad_norm": 0.16009187698364258, "learning_rate": 0.002, "loss": 2.5495, "step": 332660 }, { "epoch": 0.6627526137957415, "grad_norm": 0.1898220032453537, "learning_rate": 0.002, "loss": 2.5571, "step": 332670 }, { "epoch": 0.6627725360193804, "grad_norm": 0.19101977348327637, "learning_rate": 0.002, "loss": 2.5521, "step": 332680 }, { "epoch": 0.6627924582430192, "grad_norm": 0.16687068343162537, "learning_rate": 0.002, "loss": 2.5625, "step": 332690 }, { "epoch": 0.6628123804666581, "grad_norm": 0.14833974838256836, "learning_rate": 0.002, "loss": 2.5472, "step": 332700 }, { "epoch": 0.662832302690297, "grad_norm": 0.17508254945278168, "learning_rate": 0.002, "loss": 2.5542, "step": 332710 }, { "epoch": 0.662852224913936, "grad_norm": 0.1547425091266632, "learning_rate": 0.002, "loss": 2.5678, "step": 332720 }, { "epoch": 0.6628721471375749, "grad_norm": 0.17109215259552002, "learning_rate": 0.002, "loss": 2.5599, "step": 332730 }, { "epoch": 0.6628920693612138, "grad_norm": 0.1911516934633255, "learning_rate": 0.002, "loss": 2.564, "step": 332740 }, { "epoch": 0.6629119915848527, "grad_norm": 0.17105600237846375, "learning_rate": 0.002, "loss": 2.5572, "step": 332750 }, { "epoch": 0.6629319138084917, "grad_norm": 0.1486363559961319, "learning_rate": 0.002, "loss": 2.5422, "step": 332760 }, { "epoch": 0.6629518360321306, "grad_norm": 0.1736108362674713, "learning_rate": 0.002, "loss": 2.5598, "step": 332770 }, { "epoch": 0.6629717582557695, "grad_norm": 0.17032583057880402, "learning_rate": 0.002, "loss": 2.5721, "step": 332780 }, { "epoch": 0.6629916804794084, "grad_norm": 0.16265897452831268, "learning_rate": 0.002, "loss": 2.5537, "step": 332790 }, { "epoch": 0.6630116027030473, "grad_norm": 0.19095082581043243, "learning_rate": 0.002, "loss": 2.548, "step": 332800 }, { "epoch": 0.6630315249266863, "grad_norm": 0.14117363095283508, "learning_rate": 0.002, "loss": 2.546, "step": 332810 }, { "epoch": 0.6630514471503252, "grad_norm": 0.1531951129436493, "learning_rate": 0.002, "loss": 2.5609, "step": 332820 }, { "epoch": 0.663071369373964, "grad_norm": 0.17308904230594635, "learning_rate": 0.002, "loss": 2.5584, "step": 332830 }, { "epoch": 0.6630912915976029, "grad_norm": 0.1731148511171341, "learning_rate": 0.002, "loss": 2.5598, "step": 332840 }, { "epoch": 0.6631112138212418, "grad_norm": 0.15635208785533905, "learning_rate": 0.002, "loss": 2.5679, "step": 332850 }, { "epoch": 0.6631311360448808, "grad_norm": 0.16351135075092316, "learning_rate": 0.002, "loss": 2.551, "step": 332860 }, { "epoch": 0.6631510582685197, "grad_norm": 0.14692322909832, "learning_rate": 0.002, "loss": 2.5506, "step": 332870 }, { "epoch": 0.6631709804921586, "grad_norm": 0.16845868527889252, "learning_rate": 0.002, "loss": 2.5469, "step": 332880 }, { "epoch": 0.6631909027157975, "grad_norm": 0.19129697978496552, "learning_rate": 0.002, "loss": 2.5532, "step": 332890 }, { "epoch": 0.6632108249394364, "grad_norm": 0.24175742268562317, "learning_rate": 0.002, "loss": 2.5589, "step": 332900 }, { "epoch": 0.6632307471630754, "grad_norm": 0.19121254980564117, "learning_rate": 0.002, "loss": 2.5578, "step": 332910 }, { "epoch": 0.6632506693867143, "grad_norm": 0.17769375443458557, "learning_rate": 0.002, "loss": 2.5515, "step": 332920 }, { "epoch": 0.6632705916103532, "grad_norm": 0.1872033029794693, "learning_rate": 0.002, "loss": 2.5554, "step": 332930 }, { "epoch": 0.6632905138339921, "grad_norm": 0.14610226452350616, "learning_rate": 0.002, "loss": 2.546, "step": 332940 }, { "epoch": 0.663310436057631, "grad_norm": 0.1501840502023697, "learning_rate": 0.002, "loss": 2.5553, "step": 332950 }, { "epoch": 0.66333035828127, "grad_norm": 0.15578529238700867, "learning_rate": 0.002, "loss": 2.5736, "step": 332960 }, { "epoch": 0.6633502805049089, "grad_norm": 0.19685764610767365, "learning_rate": 0.002, "loss": 2.555, "step": 332970 }, { "epoch": 0.6633702027285477, "grad_norm": 0.18478348851203918, "learning_rate": 0.002, "loss": 2.5452, "step": 332980 }, { "epoch": 0.6633901249521866, "grad_norm": 0.1640300750732422, "learning_rate": 0.002, "loss": 2.5456, "step": 332990 }, { "epoch": 0.6634100471758255, "grad_norm": 0.1650356650352478, "learning_rate": 0.002, "loss": 2.5706, "step": 333000 }, { "epoch": 0.6634299693994645, "grad_norm": 0.13508225977420807, "learning_rate": 0.002, "loss": 2.5426, "step": 333010 }, { "epoch": 0.6634498916231034, "grad_norm": 0.16326424479484558, "learning_rate": 0.002, "loss": 2.5704, "step": 333020 }, { "epoch": 0.6634698138467423, "grad_norm": 0.17107349634170532, "learning_rate": 0.002, "loss": 2.559, "step": 333030 }, { "epoch": 0.6634897360703812, "grad_norm": 0.15400759875774384, "learning_rate": 0.002, "loss": 2.5489, "step": 333040 }, { "epoch": 0.6635096582940202, "grad_norm": 0.15813374519348145, "learning_rate": 0.002, "loss": 2.5488, "step": 333050 }, { "epoch": 0.6635295805176591, "grad_norm": 0.16370920836925507, "learning_rate": 0.002, "loss": 2.5616, "step": 333060 }, { "epoch": 0.663549502741298, "grad_norm": 0.16916459798812866, "learning_rate": 0.002, "loss": 2.5588, "step": 333070 }, { "epoch": 0.6635694249649369, "grad_norm": 0.1431669294834137, "learning_rate": 0.002, "loss": 2.555, "step": 333080 }, { "epoch": 0.6635893471885758, "grad_norm": 0.15858136117458344, "learning_rate": 0.002, "loss": 2.5539, "step": 333090 }, { "epoch": 0.6636092694122148, "grad_norm": 0.17088183760643005, "learning_rate": 0.002, "loss": 2.5454, "step": 333100 }, { "epoch": 0.6636291916358537, "grad_norm": 0.15468105673789978, "learning_rate": 0.002, "loss": 2.564, "step": 333110 }, { "epoch": 0.6636491138594925, "grad_norm": 0.16286133229732513, "learning_rate": 0.002, "loss": 2.5647, "step": 333120 }, { "epoch": 0.6636690360831314, "grad_norm": 0.17332008481025696, "learning_rate": 0.002, "loss": 2.5525, "step": 333130 }, { "epoch": 0.6636889583067703, "grad_norm": 0.19021061062812805, "learning_rate": 0.002, "loss": 2.5571, "step": 333140 }, { "epoch": 0.6637088805304093, "grad_norm": 0.18917226791381836, "learning_rate": 0.002, "loss": 2.5531, "step": 333150 }, { "epoch": 0.6637288027540482, "grad_norm": 0.1793031245470047, "learning_rate": 0.002, "loss": 2.5632, "step": 333160 }, { "epoch": 0.6637487249776871, "grad_norm": 0.1481717824935913, "learning_rate": 0.002, "loss": 2.5433, "step": 333170 }, { "epoch": 0.663768647201326, "grad_norm": 0.1849568635225296, "learning_rate": 0.002, "loss": 2.5604, "step": 333180 }, { "epoch": 0.6637885694249649, "grad_norm": 0.17722386121749878, "learning_rate": 0.002, "loss": 2.5681, "step": 333190 }, { "epoch": 0.6638084916486039, "grad_norm": 0.13704755902290344, "learning_rate": 0.002, "loss": 2.5583, "step": 333200 }, { "epoch": 0.6638284138722428, "grad_norm": 0.2129584550857544, "learning_rate": 0.002, "loss": 2.554, "step": 333210 }, { "epoch": 0.6638483360958817, "grad_norm": 0.15073952078819275, "learning_rate": 0.002, "loss": 2.5482, "step": 333220 }, { "epoch": 0.6638682583195206, "grad_norm": 0.14515043795108795, "learning_rate": 0.002, "loss": 2.5504, "step": 333230 }, { "epoch": 0.6638881805431595, "grad_norm": 0.15426164865493774, "learning_rate": 0.002, "loss": 2.5608, "step": 333240 }, { "epoch": 0.6639081027667985, "grad_norm": 0.18046538531780243, "learning_rate": 0.002, "loss": 2.5412, "step": 333250 }, { "epoch": 0.6639280249904373, "grad_norm": 0.15041953325271606, "learning_rate": 0.002, "loss": 2.5494, "step": 333260 }, { "epoch": 0.6639479472140762, "grad_norm": 0.14490821957588196, "learning_rate": 0.002, "loss": 2.5733, "step": 333270 }, { "epoch": 0.6639678694377151, "grad_norm": 0.1497882753610611, "learning_rate": 0.002, "loss": 2.5566, "step": 333280 }, { "epoch": 0.663987791661354, "grad_norm": 0.17090775072574615, "learning_rate": 0.002, "loss": 2.5551, "step": 333290 }, { "epoch": 0.664007713884993, "grad_norm": 0.14140930771827698, "learning_rate": 0.002, "loss": 2.5564, "step": 333300 }, { "epoch": 0.6640276361086319, "grad_norm": 0.19197741150856018, "learning_rate": 0.002, "loss": 2.5579, "step": 333310 }, { "epoch": 0.6640475583322708, "grad_norm": 0.1918136030435562, "learning_rate": 0.002, "loss": 2.5481, "step": 333320 }, { "epoch": 0.6640674805559097, "grad_norm": 0.16332533955574036, "learning_rate": 0.002, "loss": 2.5718, "step": 333330 }, { "epoch": 0.6640874027795487, "grad_norm": 0.14112645387649536, "learning_rate": 0.002, "loss": 2.5607, "step": 333340 }, { "epoch": 0.6641073250031876, "grad_norm": 0.1490669697523117, "learning_rate": 0.002, "loss": 2.5611, "step": 333350 }, { "epoch": 0.6641272472268265, "grad_norm": 0.16009117662906647, "learning_rate": 0.002, "loss": 2.5622, "step": 333360 }, { "epoch": 0.6641471694504654, "grad_norm": 0.16234290599822998, "learning_rate": 0.002, "loss": 2.5529, "step": 333370 }, { "epoch": 0.6641670916741043, "grad_norm": 0.232954740524292, "learning_rate": 0.002, "loss": 2.5661, "step": 333380 }, { "epoch": 0.6641870138977433, "grad_norm": 0.17058110237121582, "learning_rate": 0.002, "loss": 2.5571, "step": 333390 }, { "epoch": 0.6642069361213822, "grad_norm": 0.1565236896276474, "learning_rate": 0.002, "loss": 2.557, "step": 333400 }, { "epoch": 0.664226858345021, "grad_norm": 0.16775007545948029, "learning_rate": 0.002, "loss": 2.5414, "step": 333410 }, { "epoch": 0.6642467805686599, "grad_norm": 0.15983276069164276, "learning_rate": 0.002, "loss": 2.5444, "step": 333420 }, { "epoch": 0.6642667027922988, "grad_norm": 0.225172758102417, "learning_rate": 0.002, "loss": 2.5652, "step": 333430 }, { "epoch": 0.6642866250159378, "grad_norm": 0.15571540594100952, "learning_rate": 0.002, "loss": 2.5548, "step": 333440 }, { "epoch": 0.6643065472395767, "grad_norm": 0.17013785243034363, "learning_rate": 0.002, "loss": 2.5558, "step": 333450 }, { "epoch": 0.6643264694632156, "grad_norm": 0.193781778216362, "learning_rate": 0.002, "loss": 2.5601, "step": 333460 }, { "epoch": 0.6643463916868545, "grad_norm": 0.16876167058944702, "learning_rate": 0.002, "loss": 2.5435, "step": 333470 }, { "epoch": 0.6643663139104934, "grad_norm": 0.17035812139511108, "learning_rate": 0.002, "loss": 2.5429, "step": 333480 }, { "epoch": 0.6643862361341324, "grad_norm": 0.1598397195339203, "learning_rate": 0.002, "loss": 2.5413, "step": 333490 }, { "epoch": 0.6644061583577713, "grad_norm": 0.16324929893016815, "learning_rate": 0.002, "loss": 2.5648, "step": 333500 }, { "epoch": 0.6644260805814102, "grad_norm": 0.16301070153713226, "learning_rate": 0.002, "loss": 2.5415, "step": 333510 }, { "epoch": 0.6644460028050491, "grad_norm": 0.15098939836025238, "learning_rate": 0.002, "loss": 2.5476, "step": 333520 }, { "epoch": 0.664465925028688, "grad_norm": 0.16613377630710602, "learning_rate": 0.002, "loss": 2.5589, "step": 333530 }, { "epoch": 0.664485847252327, "grad_norm": 0.1705811768770218, "learning_rate": 0.002, "loss": 2.5441, "step": 333540 }, { "epoch": 0.6645057694759658, "grad_norm": 0.16741833090782166, "learning_rate": 0.002, "loss": 2.5733, "step": 333550 }, { "epoch": 0.6645256916996047, "grad_norm": 0.14474640786647797, "learning_rate": 0.002, "loss": 2.5516, "step": 333560 }, { "epoch": 0.6645456139232436, "grad_norm": 0.1486075073480606, "learning_rate": 0.002, "loss": 2.5385, "step": 333570 }, { "epoch": 0.6645655361468825, "grad_norm": 0.19658741354942322, "learning_rate": 0.002, "loss": 2.5668, "step": 333580 }, { "epoch": 0.6645854583705215, "grad_norm": 0.17539814114570618, "learning_rate": 0.002, "loss": 2.5572, "step": 333590 }, { "epoch": 0.6646053805941604, "grad_norm": 0.15998788177967072, "learning_rate": 0.002, "loss": 2.5516, "step": 333600 }, { "epoch": 0.6646253028177993, "grad_norm": 0.16708727180957794, "learning_rate": 0.002, "loss": 2.5622, "step": 333610 }, { "epoch": 0.6646452250414382, "grad_norm": 0.15227195620536804, "learning_rate": 0.002, "loss": 2.5408, "step": 333620 }, { "epoch": 0.6646651472650772, "grad_norm": 0.19285948574543, "learning_rate": 0.002, "loss": 2.5535, "step": 333630 }, { "epoch": 0.6646850694887161, "grad_norm": 0.18296702206134796, "learning_rate": 0.002, "loss": 2.5553, "step": 333640 }, { "epoch": 0.664704991712355, "grad_norm": 0.1877990961074829, "learning_rate": 0.002, "loss": 2.5609, "step": 333650 }, { "epoch": 0.6647249139359939, "grad_norm": 0.18151447176933289, "learning_rate": 0.002, "loss": 2.5452, "step": 333660 }, { "epoch": 0.6647448361596328, "grad_norm": 0.16992618143558502, "learning_rate": 0.002, "loss": 2.5661, "step": 333670 }, { "epoch": 0.6647647583832718, "grad_norm": 0.14157380163669586, "learning_rate": 0.002, "loss": 2.5531, "step": 333680 }, { "epoch": 0.6647846806069106, "grad_norm": 0.14495089650154114, "learning_rate": 0.002, "loss": 2.5529, "step": 333690 }, { "epoch": 0.6648046028305495, "grad_norm": 0.20997533202171326, "learning_rate": 0.002, "loss": 2.5788, "step": 333700 }, { "epoch": 0.6648245250541884, "grad_norm": 0.15375322103500366, "learning_rate": 0.002, "loss": 2.562, "step": 333710 }, { "epoch": 0.6648444472778273, "grad_norm": 0.181953564286232, "learning_rate": 0.002, "loss": 2.546, "step": 333720 }, { "epoch": 0.6648643695014663, "grad_norm": 0.18511903285980225, "learning_rate": 0.002, "loss": 2.5655, "step": 333730 }, { "epoch": 0.6648842917251052, "grad_norm": 0.15288442373275757, "learning_rate": 0.002, "loss": 2.5512, "step": 333740 }, { "epoch": 0.6649042139487441, "grad_norm": 0.15933018922805786, "learning_rate": 0.002, "loss": 2.5527, "step": 333750 }, { "epoch": 0.664924136172383, "grad_norm": 0.1389302909374237, "learning_rate": 0.002, "loss": 2.5523, "step": 333760 }, { "epoch": 0.6649440583960219, "grad_norm": 0.16738153994083405, "learning_rate": 0.002, "loss": 2.5732, "step": 333770 }, { "epoch": 0.6649639806196609, "grad_norm": 0.19061045348644257, "learning_rate": 0.002, "loss": 2.5509, "step": 333780 }, { "epoch": 0.6649839028432998, "grad_norm": 0.15963837504386902, "learning_rate": 0.002, "loss": 2.5574, "step": 333790 }, { "epoch": 0.6650038250669387, "grad_norm": 0.15385626256465912, "learning_rate": 0.002, "loss": 2.554, "step": 333800 }, { "epoch": 0.6650237472905776, "grad_norm": 0.19298939406871796, "learning_rate": 0.002, "loss": 2.5453, "step": 333810 }, { "epoch": 0.6650436695142165, "grad_norm": 0.1634034961462021, "learning_rate": 0.002, "loss": 2.5601, "step": 333820 }, { "epoch": 0.6650635917378555, "grad_norm": 0.14097729325294495, "learning_rate": 0.002, "loss": 2.5397, "step": 333830 }, { "epoch": 0.6650835139614943, "grad_norm": 0.14029204845428467, "learning_rate": 0.002, "loss": 2.5538, "step": 333840 }, { "epoch": 0.6651034361851332, "grad_norm": 0.17808008193969727, "learning_rate": 0.002, "loss": 2.5584, "step": 333850 }, { "epoch": 0.6651233584087721, "grad_norm": 0.1853097379207611, "learning_rate": 0.002, "loss": 2.5491, "step": 333860 }, { "epoch": 0.665143280632411, "grad_norm": 0.1393008530139923, "learning_rate": 0.002, "loss": 2.5348, "step": 333870 }, { "epoch": 0.66516320285605, "grad_norm": 0.16872060298919678, "learning_rate": 0.002, "loss": 2.5547, "step": 333880 }, { "epoch": 0.6651831250796889, "grad_norm": 0.15661031007766724, "learning_rate": 0.002, "loss": 2.5594, "step": 333890 }, { "epoch": 0.6652030473033278, "grad_norm": 0.1500881016254425, "learning_rate": 0.002, "loss": 2.5536, "step": 333900 }, { "epoch": 0.6652229695269667, "grad_norm": 0.17750197649002075, "learning_rate": 0.002, "loss": 2.5522, "step": 333910 }, { "epoch": 0.6652428917506056, "grad_norm": 0.17459623515605927, "learning_rate": 0.002, "loss": 2.5762, "step": 333920 }, { "epoch": 0.6652628139742446, "grad_norm": 0.3025680184364319, "learning_rate": 0.002, "loss": 2.5611, "step": 333930 }, { "epoch": 0.6652827361978835, "grad_norm": 0.15910771489143372, "learning_rate": 0.002, "loss": 2.5474, "step": 333940 }, { "epoch": 0.6653026584215224, "grad_norm": 0.1638534814119339, "learning_rate": 0.002, "loss": 2.538, "step": 333950 }, { "epoch": 0.6653225806451613, "grad_norm": 0.15951357781887054, "learning_rate": 0.002, "loss": 2.5556, "step": 333960 }, { "epoch": 0.6653425028688003, "grad_norm": 0.1591169536113739, "learning_rate": 0.002, "loss": 2.5691, "step": 333970 }, { "epoch": 0.6653624250924391, "grad_norm": 0.1531582921743393, "learning_rate": 0.002, "loss": 2.5582, "step": 333980 }, { "epoch": 0.665382347316078, "grad_norm": 0.16248339414596558, "learning_rate": 0.002, "loss": 2.5501, "step": 333990 }, { "epoch": 0.6654022695397169, "grad_norm": 0.1856270432472229, "learning_rate": 0.002, "loss": 2.5692, "step": 334000 }, { "epoch": 0.6654221917633558, "grad_norm": 0.30209338665008545, "learning_rate": 0.002, "loss": 2.547, "step": 334010 }, { "epoch": 0.6654421139869948, "grad_norm": 0.15742528438568115, "learning_rate": 0.002, "loss": 2.5618, "step": 334020 }, { "epoch": 0.6654620362106337, "grad_norm": 0.18949994444847107, "learning_rate": 0.002, "loss": 2.5553, "step": 334030 }, { "epoch": 0.6654819584342726, "grad_norm": 0.15962783992290497, "learning_rate": 0.002, "loss": 2.5437, "step": 334040 }, { "epoch": 0.6655018806579115, "grad_norm": 0.19632716476917267, "learning_rate": 0.002, "loss": 2.559, "step": 334050 }, { "epoch": 0.6655218028815504, "grad_norm": 0.17791850864887238, "learning_rate": 0.002, "loss": 2.5832, "step": 334060 }, { "epoch": 0.6655417251051894, "grad_norm": 0.20001879334449768, "learning_rate": 0.002, "loss": 2.5488, "step": 334070 }, { "epoch": 0.6655616473288283, "grad_norm": 0.1583283543586731, "learning_rate": 0.002, "loss": 2.5572, "step": 334080 }, { "epoch": 0.6655815695524672, "grad_norm": 0.16637054085731506, "learning_rate": 0.002, "loss": 2.5473, "step": 334090 }, { "epoch": 0.6656014917761061, "grad_norm": 0.17620041966438293, "learning_rate": 0.002, "loss": 2.5473, "step": 334100 }, { "epoch": 0.665621413999745, "grad_norm": 0.1773362010717392, "learning_rate": 0.002, "loss": 2.5545, "step": 334110 }, { "epoch": 0.665641336223384, "grad_norm": 0.15553811192512512, "learning_rate": 0.002, "loss": 2.5673, "step": 334120 }, { "epoch": 0.6656612584470228, "grad_norm": 0.17826293408870697, "learning_rate": 0.002, "loss": 2.5495, "step": 334130 }, { "epoch": 0.6656811806706617, "grad_norm": 0.16180014610290527, "learning_rate": 0.002, "loss": 2.5548, "step": 334140 }, { "epoch": 0.6657011028943006, "grad_norm": 0.16836710274219513, "learning_rate": 0.002, "loss": 2.5637, "step": 334150 }, { "epoch": 0.6657210251179395, "grad_norm": 0.14304658770561218, "learning_rate": 0.002, "loss": 2.5441, "step": 334160 }, { "epoch": 0.6657409473415785, "grad_norm": 0.17611615359783173, "learning_rate": 0.002, "loss": 2.546, "step": 334170 }, { "epoch": 0.6657608695652174, "grad_norm": 0.16839750111103058, "learning_rate": 0.002, "loss": 2.5425, "step": 334180 }, { "epoch": 0.6657807917888563, "grad_norm": 0.1651601791381836, "learning_rate": 0.002, "loss": 2.5441, "step": 334190 }, { "epoch": 0.6658007140124952, "grad_norm": 0.16471382975578308, "learning_rate": 0.002, "loss": 2.5746, "step": 334200 }, { "epoch": 0.6658206362361341, "grad_norm": 0.18051104247570038, "learning_rate": 0.002, "loss": 2.5465, "step": 334210 }, { "epoch": 0.6658405584597731, "grad_norm": 0.5238372087478638, "learning_rate": 0.002, "loss": 2.5506, "step": 334220 }, { "epoch": 0.665860480683412, "grad_norm": 0.16095344722270966, "learning_rate": 0.002, "loss": 2.5628, "step": 334230 }, { "epoch": 0.6658804029070509, "grad_norm": 0.20815764367580414, "learning_rate": 0.002, "loss": 2.5637, "step": 334240 }, { "epoch": 0.6659003251306898, "grad_norm": 0.17869475483894348, "learning_rate": 0.002, "loss": 2.5589, "step": 334250 }, { "epoch": 0.6659202473543288, "grad_norm": 0.17606960237026215, "learning_rate": 0.002, "loss": 2.5614, "step": 334260 }, { "epoch": 0.6659401695779676, "grad_norm": 0.17895056307315826, "learning_rate": 0.002, "loss": 2.5511, "step": 334270 }, { "epoch": 0.6659600918016065, "grad_norm": 0.1349639594554901, "learning_rate": 0.002, "loss": 2.5485, "step": 334280 }, { "epoch": 0.6659800140252454, "grad_norm": 0.15923330187797546, "learning_rate": 0.002, "loss": 2.5373, "step": 334290 }, { "epoch": 0.6659999362488843, "grad_norm": 0.14578548073768616, "learning_rate": 0.002, "loss": 2.5687, "step": 334300 }, { "epoch": 0.6660198584725233, "grad_norm": 0.162955179810524, "learning_rate": 0.002, "loss": 2.5625, "step": 334310 }, { "epoch": 0.6660397806961622, "grad_norm": 0.18621701002120972, "learning_rate": 0.002, "loss": 2.5346, "step": 334320 }, { "epoch": 0.6660597029198011, "grad_norm": 0.19559088349342346, "learning_rate": 0.002, "loss": 2.5462, "step": 334330 }, { "epoch": 0.66607962514344, "grad_norm": 0.16054120659828186, "learning_rate": 0.002, "loss": 2.5627, "step": 334340 }, { "epoch": 0.6660995473670789, "grad_norm": 0.15509822964668274, "learning_rate": 0.002, "loss": 2.541, "step": 334350 }, { "epoch": 0.6661194695907179, "grad_norm": 0.17868351936340332, "learning_rate": 0.002, "loss": 2.5345, "step": 334360 }, { "epoch": 0.6661393918143568, "grad_norm": 0.17071934044361115, "learning_rate": 0.002, "loss": 2.5616, "step": 334370 }, { "epoch": 0.6661593140379957, "grad_norm": 0.1820157915353775, "learning_rate": 0.002, "loss": 2.547, "step": 334380 }, { "epoch": 0.6661792362616346, "grad_norm": 0.24780674278736115, "learning_rate": 0.002, "loss": 2.5604, "step": 334390 }, { "epoch": 0.6661991584852734, "grad_norm": 0.15078136324882507, "learning_rate": 0.002, "loss": 2.5623, "step": 334400 }, { "epoch": 0.6662190807089124, "grad_norm": 0.2170535922050476, "learning_rate": 0.002, "loss": 2.5638, "step": 334410 }, { "epoch": 0.6662390029325513, "grad_norm": 0.1644146740436554, "learning_rate": 0.002, "loss": 2.5733, "step": 334420 }, { "epoch": 0.6662589251561902, "grad_norm": 0.2560337781906128, "learning_rate": 0.002, "loss": 2.5648, "step": 334430 }, { "epoch": 0.6662788473798291, "grad_norm": 0.15224406123161316, "learning_rate": 0.002, "loss": 2.5756, "step": 334440 }, { "epoch": 0.666298769603468, "grad_norm": 0.1817573457956314, "learning_rate": 0.002, "loss": 2.545, "step": 334450 }, { "epoch": 0.666318691827107, "grad_norm": 0.17078350484371185, "learning_rate": 0.002, "loss": 2.5588, "step": 334460 }, { "epoch": 0.6663386140507459, "grad_norm": 0.1630171835422516, "learning_rate": 0.002, "loss": 2.551, "step": 334470 }, { "epoch": 0.6663585362743848, "grad_norm": 0.15874317288398743, "learning_rate": 0.002, "loss": 2.5528, "step": 334480 }, { "epoch": 0.6663784584980237, "grad_norm": 0.15182779729366302, "learning_rate": 0.002, "loss": 2.56, "step": 334490 }, { "epoch": 0.6663983807216626, "grad_norm": 0.16093796491622925, "learning_rate": 0.002, "loss": 2.5499, "step": 334500 }, { "epoch": 0.6664183029453016, "grad_norm": 0.17227116227149963, "learning_rate": 0.002, "loss": 2.5557, "step": 334510 }, { "epoch": 0.6664382251689405, "grad_norm": 0.17202183604240417, "learning_rate": 0.002, "loss": 2.5606, "step": 334520 }, { "epoch": 0.6664581473925794, "grad_norm": 0.17223092913627625, "learning_rate": 0.002, "loss": 2.5502, "step": 334530 }, { "epoch": 0.6664780696162182, "grad_norm": 0.14499522745609283, "learning_rate": 0.002, "loss": 2.5625, "step": 334540 }, { "epoch": 0.6664979918398573, "grad_norm": 0.1613398641347885, "learning_rate": 0.002, "loss": 2.5502, "step": 334550 }, { "epoch": 0.6665179140634961, "grad_norm": 0.17397688329219818, "learning_rate": 0.002, "loss": 2.5663, "step": 334560 }, { "epoch": 0.666537836287135, "grad_norm": 0.20686116814613342, "learning_rate": 0.002, "loss": 2.5473, "step": 334570 }, { "epoch": 0.6665577585107739, "grad_norm": 0.15712177753448486, "learning_rate": 0.002, "loss": 2.5529, "step": 334580 }, { "epoch": 0.6665776807344128, "grad_norm": 0.16868601739406586, "learning_rate": 0.002, "loss": 2.5588, "step": 334590 }, { "epoch": 0.6665976029580518, "grad_norm": 0.14001153409481049, "learning_rate": 0.002, "loss": 2.5696, "step": 334600 }, { "epoch": 0.6666175251816907, "grad_norm": 0.18918772041797638, "learning_rate": 0.002, "loss": 2.5492, "step": 334610 }, { "epoch": 0.6666374474053296, "grad_norm": 0.16704265773296356, "learning_rate": 0.002, "loss": 2.5522, "step": 334620 }, { "epoch": 0.6666573696289685, "grad_norm": 0.1860509067773819, "learning_rate": 0.002, "loss": 2.5514, "step": 334630 }, { "epoch": 0.6666772918526074, "grad_norm": 0.17627789080142975, "learning_rate": 0.002, "loss": 2.5728, "step": 334640 }, { "epoch": 0.6666972140762464, "grad_norm": 0.14017583429813385, "learning_rate": 0.002, "loss": 2.5499, "step": 334650 }, { "epoch": 0.6667171362998853, "grad_norm": 0.18901744484901428, "learning_rate": 0.002, "loss": 2.5471, "step": 334660 }, { "epoch": 0.6667370585235242, "grad_norm": 0.15792697668075562, "learning_rate": 0.002, "loss": 2.562, "step": 334670 }, { "epoch": 0.666756980747163, "grad_norm": 0.15252470970153809, "learning_rate": 0.002, "loss": 2.557, "step": 334680 }, { "epoch": 0.6667769029708019, "grad_norm": 0.1643076241016388, "learning_rate": 0.002, "loss": 2.5619, "step": 334690 }, { "epoch": 0.666796825194441, "grad_norm": 0.14370645582675934, "learning_rate": 0.002, "loss": 2.5425, "step": 334700 }, { "epoch": 0.6668167474180798, "grad_norm": 0.2157089114189148, "learning_rate": 0.002, "loss": 2.5538, "step": 334710 }, { "epoch": 0.6668366696417187, "grad_norm": 0.1494440883398056, "learning_rate": 0.002, "loss": 2.5509, "step": 334720 }, { "epoch": 0.6668565918653576, "grad_norm": 0.16660259664058685, "learning_rate": 0.002, "loss": 2.5585, "step": 334730 }, { "epoch": 0.6668765140889965, "grad_norm": 0.17825168371200562, "learning_rate": 0.002, "loss": 2.5513, "step": 334740 }, { "epoch": 0.6668964363126355, "grad_norm": 0.14177535474300385, "learning_rate": 0.002, "loss": 2.5458, "step": 334750 }, { "epoch": 0.6669163585362744, "grad_norm": 0.17347271740436554, "learning_rate": 0.002, "loss": 2.5455, "step": 334760 }, { "epoch": 0.6669362807599133, "grad_norm": 0.18711133301258087, "learning_rate": 0.002, "loss": 2.555, "step": 334770 }, { "epoch": 0.6669562029835522, "grad_norm": 0.1713515967130661, "learning_rate": 0.002, "loss": 2.5483, "step": 334780 }, { "epoch": 0.6669761252071911, "grad_norm": 0.15141154825687408, "learning_rate": 0.002, "loss": 2.5434, "step": 334790 }, { "epoch": 0.6669960474308301, "grad_norm": 0.19222116470336914, "learning_rate": 0.002, "loss": 2.5609, "step": 334800 }, { "epoch": 0.667015969654469, "grad_norm": 0.189959317445755, "learning_rate": 0.002, "loss": 2.5613, "step": 334810 }, { "epoch": 0.6670358918781079, "grad_norm": 0.17135097086429596, "learning_rate": 0.002, "loss": 2.5466, "step": 334820 }, { "epoch": 0.6670558141017467, "grad_norm": 0.18601639568805695, "learning_rate": 0.002, "loss": 2.547, "step": 334830 }, { "epoch": 0.6670757363253857, "grad_norm": 0.1733216792345047, "learning_rate": 0.002, "loss": 2.542, "step": 334840 }, { "epoch": 0.6670956585490246, "grad_norm": 0.1565805971622467, "learning_rate": 0.002, "loss": 2.5421, "step": 334850 }, { "epoch": 0.6671155807726635, "grad_norm": 0.19096821546554565, "learning_rate": 0.002, "loss": 2.5624, "step": 334860 }, { "epoch": 0.6671355029963024, "grad_norm": 0.1574063003063202, "learning_rate": 0.002, "loss": 2.5601, "step": 334870 }, { "epoch": 0.6671554252199413, "grad_norm": 0.17054973542690277, "learning_rate": 0.002, "loss": 2.548, "step": 334880 }, { "epoch": 0.6671753474435803, "grad_norm": 0.14276567101478577, "learning_rate": 0.002, "loss": 2.5443, "step": 334890 }, { "epoch": 0.6671952696672192, "grad_norm": 0.14840838313102722, "learning_rate": 0.002, "loss": 2.5618, "step": 334900 }, { "epoch": 0.6672151918908581, "grad_norm": 0.17375852167606354, "learning_rate": 0.002, "loss": 2.564, "step": 334910 }, { "epoch": 0.667235114114497, "grad_norm": 0.16793590784072876, "learning_rate": 0.002, "loss": 2.5333, "step": 334920 }, { "epoch": 0.6672550363381359, "grad_norm": 0.1682584434747696, "learning_rate": 0.002, "loss": 2.555, "step": 334930 }, { "epoch": 0.6672749585617749, "grad_norm": 0.15698431432247162, "learning_rate": 0.002, "loss": 2.5497, "step": 334940 }, { "epoch": 0.6672948807854138, "grad_norm": 0.16806572675704956, "learning_rate": 0.002, "loss": 2.5504, "step": 334950 }, { "epoch": 0.6673148030090527, "grad_norm": 0.16396498680114746, "learning_rate": 0.002, "loss": 2.5681, "step": 334960 }, { "epoch": 0.6673347252326916, "grad_norm": 0.16315142810344696, "learning_rate": 0.002, "loss": 2.5447, "step": 334970 }, { "epoch": 0.6673546474563304, "grad_norm": 0.1789286583662033, "learning_rate": 0.002, "loss": 2.5514, "step": 334980 }, { "epoch": 0.6673745696799694, "grad_norm": 0.17346327006816864, "learning_rate": 0.002, "loss": 2.5566, "step": 334990 }, { "epoch": 0.6673944919036083, "grad_norm": 0.15146954357624054, "learning_rate": 0.002, "loss": 2.5626, "step": 335000 }, { "epoch": 0.6674144141272472, "grad_norm": 0.20048196613788605, "learning_rate": 0.002, "loss": 2.5618, "step": 335010 }, { "epoch": 0.6674343363508861, "grad_norm": 0.15130437910556793, "learning_rate": 0.002, "loss": 2.55, "step": 335020 }, { "epoch": 0.667454258574525, "grad_norm": 0.14355988800525665, "learning_rate": 0.002, "loss": 2.5568, "step": 335030 }, { "epoch": 0.667474180798164, "grad_norm": 0.19081765413284302, "learning_rate": 0.002, "loss": 2.5517, "step": 335040 }, { "epoch": 0.6674941030218029, "grad_norm": 0.16407263278961182, "learning_rate": 0.002, "loss": 2.5519, "step": 335050 }, { "epoch": 0.6675140252454418, "grad_norm": 0.15493656694889069, "learning_rate": 0.002, "loss": 2.5454, "step": 335060 }, { "epoch": 0.6675339474690807, "grad_norm": 0.17187543213367462, "learning_rate": 0.002, "loss": 2.5609, "step": 335070 }, { "epoch": 0.6675538696927196, "grad_norm": 0.14135301113128662, "learning_rate": 0.002, "loss": 2.5557, "step": 335080 }, { "epoch": 0.6675737919163586, "grad_norm": 0.14838925004005432, "learning_rate": 0.002, "loss": 2.5417, "step": 335090 }, { "epoch": 0.6675937141399975, "grad_norm": 0.19611412286758423, "learning_rate": 0.002, "loss": 2.5514, "step": 335100 }, { "epoch": 0.6676136363636364, "grad_norm": 0.15988419950008392, "learning_rate": 0.002, "loss": 2.5429, "step": 335110 }, { "epoch": 0.6676335585872752, "grad_norm": 0.1502997726202011, "learning_rate": 0.002, "loss": 2.5616, "step": 335120 }, { "epoch": 0.6676534808109142, "grad_norm": 0.15963056683540344, "learning_rate": 0.002, "loss": 2.5516, "step": 335130 }, { "epoch": 0.6676734030345531, "grad_norm": 0.15793661773204803, "learning_rate": 0.002, "loss": 2.5513, "step": 335140 }, { "epoch": 0.667693325258192, "grad_norm": 0.18841294944286346, "learning_rate": 0.002, "loss": 2.5431, "step": 335150 }, { "epoch": 0.6677132474818309, "grad_norm": 0.16170692443847656, "learning_rate": 0.002, "loss": 2.547, "step": 335160 }, { "epoch": 0.6677331697054698, "grad_norm": 0.18485848605632782, "learning_rate": 0.002, "loss": 2.5552, "step": 335170 }, { "epoch": 0.6677530919291088, "grad_norm": 0.159719318151474, "learning_rate": 0.002, "loss": 2.5628, "step": 335180 }, { "epoch": 0.6677730141527477, "grad_norm": 0.1850338876247406, "learning_rate": 0.002, "loss": 2.5667, "step": 335190 }, { "epoch": 0.6677929363763866, "grad_norm": 0.1432572603225708, "learning_rate": 0.002, "loss": 2.5611, "step": 335200 }, { "epoch": 0.6678128586000255, "grad_norm": 0.16838683187961578, "learning_rate": 0.002, "loss": 2.5376, "step": 335210 }, { "epoch": 0.6678327808236644, "grad_norm": 0.15293897688388824, "learning_rate": 0.002, "loss": 2.5539, "step": 335220 }, { "epoch": 0.6678527030473034, "grad_norm": 0.16403566300868988, "learning_rate": 0.002, "loss": 2.5769, "step": 335230 }, { "epoch": 0.6678726252709423, "grad_norm": 0.16473282873630524, "learning_rate": 0.002, "loss": 2.565, "step": 335240 }, { "epoch": 0.6678925474945812, "grad_norm": 0.16015449166297913, "learning_rate": 0.002, "loss": 2.5663, "step": 335250 }, { "epoch": 0.66791246971822, "grad_norm": 0.19295281171798706, "learning_rate": 0.002, "loss": 2.5584, "step": 335260 }, { "epoch": 0.6679323919418589, "grad_norm": 0.16770239174365997, "learning_rate": 0.002, "loss": 2.5521, "step": 335270 }, { "epoch": 0.6679523141654979, "grad_norm": 0.14309199154376984, "learning_rate": 0.002, "loss": 2.5459, "step": 335280 }, { "epoch": 0.6679722363891368, "grad_norm": 0.1957346647977829, "learning_rate": 0.002, "loss": 2.555, "step": 335290 }, { "epoch": 0.6679921586127757, "grad_norm": 0.187333881855011, "learning_rate": 0.002, "loss": 2.5388, "step": 335300 }, { "epoch": 0.6680120808364146, "grad_norm": 0.17693769931793213, "learning_rate": 0.002, "loss": 2.5526, "step": 335310 }, { "epoch": 0.6680320030600535, "grad_norm": 0.1571832150220871, "learning_rate": 0.002, "loss": 2.5439, "step": 335320 }, { "epoch": 0.6680519252836925, "grad_norm": 0.18051333725452423, "learning_rate": 0.002, "loss": 2.5613, "step": 335330 }, { "epoch": 0.6680718475073314, "grad_norm": 0.1443437933921814, "learning_rate": 0.002, "loss": 2.5562, "step": 335340 }, { "epoch": 0.6680917697309703, "grad_norm": 0.15735980868339539, "learning_rate": 0.002, "loss": 2.5556, "step": 335350 }, { "epoch": 0.6681116919546092, "grad_norm": 0.1518602967262268, "learning_rate": 0.002, "loss": 2.5554, "step": 335360 }, { "epoch": 0.6681316141782481, "grad_norm": 0.15884223580360413, "learning_rate": 0.002, "loss": 2.5464, "step": 335370 }, { "epoch": 0.6681515364018871, "grad_norm": 0.18545570969581604, "learning_rate": 0.002, "loss": 2.56, "step": 335380 }, { "epoch": 0.668171458625526, "grad_norm": 0.17315341532230377, "learning_rate": 0.002, "loss": 2.5533, "step": 335390 }, { "epoch": 0.6681913808491649, "grad_norm": 0.19862034916877747, "learning_rate": 0.002, "loss": 2.5491, "step": 335400 }, { "epoch": 0.6682113030728037, "grad_norm": 0.14988498389720917, "learning_rate": 0.002, "loss": 2.5507, "step": 335410 }, { "epoch": 0.6682312252964426, "grad_norm": 0.16165274381637573, "learning_rate": 0.002, "loss": 2.5476, "step": 335420 }, { "epoch": 0.6682511475200816, "grad_norm": 0.18521592020988464, "learning_rate": 0.002, "loss": 2.5502, "step": 335430 }, { "epoch": 0.6682710697437205, "grad_norm": 0.14198806881904602, "learning_rate": 0.002, "loss": 2.5465, "step": 335440 }, { "epoch": 0.6682909919673594, "grad_norm": 0.19687816500663757, "learning_rate": 0.002, "loss": 2.5572, "step": 335450 }, { "epoch": 0.6683109141909983, "grad_norm": 0.15468919277191162, "learning_rate": 0.002, "loss": 2.5764, "step": 335460 }, { "epoch": 0.6683308364146373, "grad_norm": 0.15067850053310394, "learning_rate": 0.002, "loss": 2.5462, "step": 335470 }, { "epoch": 0.6683507586382762, "grad_norm": 0.20635591447353363, "learning_rate": 0.002, "loss": 2.5473, "step": 335480 }, { "epoch": 0.6683706808619151, "grad_norm": 0.19130943715572357, "learning_rate": 0.002, "loss": 2.5663, "step": 335490 }, { "epoch": 0.668390603085554, "grad_norm": 0.17029015719890594, "learning_rate": 0.002, "loss": 2.5436, "step": 335500 }, { "epoch": 0.6684105253091929, "grad_norm": 0.15846414864063263, "learning_rate": 0.002, "loss": 2.5547, "step": 335510 }, { "epoch": 0.6684304475328319, "grad_norm": 0.18782328069210052, "learning_rate": 0.002, "loss": 2.5588, "step": 335520 }, { "epoch": 0.6684503697564708, "grad_norm": 0.22000697255134583, "learning_rate": 0.002, "loss": 2.5601, "step": 335530 }, { "epoch": 0.6684702919801097, "grad_norm": 0.1432289034128189, "learning_rate": 0.002, "loss": 2.548, "step": 335540 }, { "epoch": 0.6684902142037485, "grad_norm": 0.19585371017456055, "learning_rate": 0.002, "loss": 2.5462, "step": 335550 }, { "epoch": 0.6685101364273874, "grad_norm": 0.16856449842453003, "learning_rate": 0.002, "loss": 2.5494, "step": 335560 }, { "epoch": 0.6685300586510264, "grad_norm": 0.1632799655199051, "learning_rate": 0.002, "loss": 2.5526, "step": 335570 }, { "epoch": 0.6685499808746653, "grad_norm": 0.14231285452842712, "learning_rate": 0.002, "loss": 2.5651, "step": 335580 }, { "epoch": 0.6685699030983042, "grad_norm": 0.1851816028356552, "learning_rate": 0.002, "loss": 2.5518, "step": 335590 }, { "epoch": 0.6685898253219431, "grad_norm": 0.19514669477939606, "learning_rate": 0.002, "loss": 2.545, "step": 335600 }, { "epoch": 0.668609747545582, "grad_norm": 0.13726168870925903, "learning_rate": 0.002, "loss": 2.5479, "step": 335610 }, { "epoch": 0.668629669769221, "grad_norm": 0.13825924694538116, "learning_rate": 0.002, "loss": 2.5708, "step": 335620 }, { "epoch": 0.6686495919928599, "grad_norm": 0.17476657032966614, "learning_rate": 0.002, "loss": 2.5764, "step": 335630 }, { "epoch": 0.6686695142164988, "grad_norm": 0.1472269594669342, "learning_rate": 0.002, "loss": 2.5446, "step": 335640 }, { "epoch": 0.6686894364401377, "grad_norm": 0.16105812788009644, "learning_rate": 0.002, "loss": 2.549, "step": 335650 }, { "epoch": 0.6687093586637766, "grad_norm": 0.18990623950958252, "learning_rate": 0.002, "loss": 2.5581, "step": 335660 }, { "epoch": 0.6687292808874156, "grad_norm": 0.20169124007225037, "learning_rate": 0.002, "loss": 2.5535, "step": 335670 }, { "epoch": 0.6687492031110545, "grad_norm": 0.14217345416545868, "learning_rate": 0.002, "loss": 2.537, "step": 335680 }, { "epoch": 0.6687691253346933, "grad_norm": 0.1712835133075714, "learning_rate": 0.002, "loss": 2.5471, "step": 335690 }, { "epoch": 0.6687890475583322, "grad_norm": 0.1652461290359497, "learning_rate": 0.002, "loss": 2.5371, "step": 335700 }, { "epoch": 0.6688089697819711, "grad_norm": 0.17802895605564117, "learning_rate": 0.002, "loss": 2.5528, "step": 335710 }, { "epoch": 0.6688288920056101, "grad_norm": 0.1475338190793991, "learning_rate": 0.002, "loss": 2.5485, "step": 335720 }, { "epoch": 0.668848814229249, "grad_norm": 0.15270593762397766, "learning_rate": 0.002, "loss": 2.557, "step": 335730 }, { "epoch": 0.6688687364528879, "grad_norm": 0.15007071197032928, "learning_rate": 0.002, "loss": 2.5612, "step": 335740 }, { "epoch": 0.6688886586765268, "grad_norm": 0.19991306960582733, "learning_rate": 0.002, "loss": 2.57, "step": 335750 }, { "epoch": 0.6689085809001658, "grad_norm": 0.15600408613681793, "learning_rate": 0.002, "loss": 2.5534, "step": 335760 }, { "epoch": 0.6689285031238047, "grad_norm": 0.14746718108654022, "learning_rate": 0.002, "loss": 2.5615, "step": 335770 }, { "epoch": 0.6689484253474436, "grad_norm": 0.1722537875175476, "learning_rate": 0.002, "loss": 2.5629, "step": 335780 }, { "epoch": 0.6689683475710825, "grad_norm": 0.16088709235191345, "learning_rate": 0.002, "loss": 2.556, "step": 335790 }, { "epoch": 0.6689882697947214, "grad_norm": 0.17635242640972137, "learning_rate": 0.002, "loss": 2.5545, "step": 335800 }, { "epoch": 0.6690081920183604, "grad_norm": 0.15221741795539856, "learning_rate": 0.002, "loss": 2.5473, "step": 335810 }, { "epoch": 0.6690281142419993, "grad_norm": 0.1870684176683426, "learning_rate": 0.002, "loss": 2.57, "step": 335820 }, { "epoch": 0.6690480364656382, "grad_norm": 0.1992844045162201, "learning_rate": 0.002, "loss": 2.5366, "step": 335830 }, { "epoch": 0.669067958689277, "grad_norm": 0.15165874361991882, "learning_rate": 0.002, "loss": 2.5669, "step": 335840 }, { "epoch": 0.6690878809129159, "grad_norm": 0.15399575233459473, "learning_rate": 0.002, "loss": 2.5444, "step": 335850 }, { "epoch": 0.6691078031365549, "grad_norm": 0.16941967606544495, "learning_rate": 0.002, "loss": 2.5596, "step": 335860 }, { "epoch": 0.6691277253601938, "grad_norm": 0.18303658068180084, "learning_rate": 0.002, "loss": 2.5792, "step": 335870 }, { "epoch": 0.6691476475838327, "grad_norm": 0.14560697972774506, "learning_rate": 0.002, "loss": 2.5497, "step": 335880 }, { "epoch": 0.6691675698074716, "grad_norm": 0.1596936136484146, "learning_rate": 0.002, "loss": 2.5642, "step": 335890 }, { "epoch": 0.6691874920311105, "grad_norm": 0.18839289247989655, "learning_rate": 0.002, "loss": 2.5502, "step": 335900 }, { "epoch": 0.6692074142547495, "grad_norm": 0.21444520354270935, "learning_rate": 0.002, "loss": 2.5581, "step": 335910 }, { "epoch": 0.6692273364783884, "grad_norm": 0.1422363817691803, "learning_rate": 0.002, "loss": 2.5522, "step": 335920 }, { "epoch": 0.6692472587020273, "grad_norm": 0.13683530688285828, "learning_rate": 0.002, "loss": 2.5656, "step": 335930 }, { "epoch": 0.6692671809256662, "grad_norm": 0.19128593802452087, "learning_rate": 0.002, "loss": 2.5562, "step": 335940 }, { "epoch": 0.6692871031493051, "grad_norm": 0.1545679271221161, "learning_rate": 0.002, "loss": 2.5566, "step": 335950 }, { "epoch": 0.6693070253729441, "grad_norm": 0.15131008625030518, "learning_rate": 0.002, "loss": 2.5429, "step": 335960 }, { "epoch": 0.669326947596583, "grad_norm": 0.15098924934864044, "learning_rate": 0.002, "loss": 2.5547, "step": 335970 }, { "epoch": 0.6693468698202218, "grad_norm": 0.1525484025478363, "learning_rate": 0.002, "loss": 2.5509, "step": 335980 }, { "epoch": 0.6693667920438607, "grad_norm": 0.16136156022548676, "learning_rate": 0.002, "loss": 2.5588, "step": 335990 }, { "epoch": 0.6693867142674996, "grad_norm": 0.1718069165945053, "learning_rate": 0.002, "loss": 2.5611, "step": 336000 }, { "epoch": 0.6694066364911386, "grad_norm": 0.16668348014354706, "learning_rate": 0.002, "loss": 2.5584, "step": 336010 }, { "epoch": 0.6694265587147775, "grad_norm": 0.167734757065773, "learning_rate": 0.002, "loss": 2.5634, "step": 336020 }, { "epoch": 0.6694464809384164, "grad_norm": 0.14761172235012054, "learning_rate": 0.002, "loss": 2.5603, "step": 336030 }, { "epoch": 0.6694664031620553, "grad_norm": 0.17902377247810364, "learning_rate": 0.002, "loss": 2.5556, "step": 336040 }, { "epoch": 0.6694863253856943, "grad_norm": 0.18392321467399597, "learning_rate": 0.002, "loss": 2.5612, "step": 336050 }, { "epoch": 0.6695062476093332, "grad_norm": 0.18080173432826996, "learning_rate": 0.002, "loss": 2.5544, "step": 336060 }, { "epoch": 0.6695261698329721, "grad_norm": 0.15938174724578857, "learning_rate": 0.002, "loss": 2.545, "step": 336070 }, { "epoch": 0.669546092056611, "grad_norm": 0.16192427277565002, "learning_rate": 0.002, "loss": 2.5388, "step": 336080 }, { "epoch": 0.6695660142802499, "grad_norm": 0.16454146802425385, "learning_rate": 0.002, "loss": 2.5608, "step": 336090 }, { "epoch": 0.6695859365038889, "grad_norm": 0.1620548516511917, "learning_rate": 0.002, "loss": 2.5443, "step": 336100 }, { "epoch": 0.6696058587275278, "grad_norm": 0.15398724377155304, "learning_rate": 0.002, "loss": 2.5555, "step": 336110 }, { "epoch": 0.6696257809511666, "grad_norm": 0.17729459702968597, "learning_rate": 0.002, "loss": 2.5525, "step": 336120 }, { "epoch": 0.6696457031748055, "grad_norm": 0.16442528367042542, "learning_rate": 0.002, "loss": 2.5502, "step": 336130 }, { "epoch": 0.6696656253984444, "grad_norm": 0.15582804381847382, "learning_rate": 0.002, "loss": 2.5496, "step": 336140 }, { "epoch": 0.6696855476220834, "grad_norm": 0.17336197197437286, "learning_rate": 0.002, "loss": 2.5609, "step": 336150 }, { "epoch": 0.6697054698457223, "grad_norm": 0.21033531427383423, "learning_rate": 0.002, "loss": 2.5559, "step": 336160 }, { "epoch": 0.6697253920693612, "grad_norm": 0.16149461269378662, "learning_rate": 0.002, "loss": 2.5524, "step": 336170 }, { "epoch": 0.6697453142930001, "grad_norm": 0.1954578012228012, "learning_rate": 0.002, "loss": 2.5518, "step": 336180 }, { "epoch": 0.669765236516639, "grad_norm": 0.1568726897239685, "learning_rate": 0.002, "loss": 2.5542, "step": 336190 }, { "epoch": 0.669785158740278, "grad_norm": 0.15999913215637207, "learning_rate": 0.002, "loss": 2.5666, "step": 336200 }, { "epoch": 0.6698050809639169, "grad_norm": 0.14704079926013947, "learning_rate": 0.002, "loss": 2.5546, "step": 336210 }, { "epoch": 0.6698250031875558, "grad_norm": 0.1890522986650467, "learning_rate": 0.002, "loss": 2.5519, "step": 336220 }, { "epoch": 0.6698449254111947, "grad_norm": 0.2038072943687439, "learning_rate": 0.002, "loss": 2.5417, "step": 336230 }, { "epoch": 0.6698648476348336, "grad_norm": 0.16068103909492493, "learning_rate": 0.002, "loss": 2.556, "step": 336240 }, { "epoch": 0.6698847698584726, "grad_norm": 0.17950521409511566, "learning_rate": 0.002, "loss": 2.5518, "step": 336250 }, { "epoch": 0.6699046920821115, "grad_norm": 0.1582689732313156, "learning_rate": 0.002, "loss": 2.5517, "step": 336260 }, { "epoch": 0.6699246143057503, "grad_norm": 0.180631622672081, "learning_rate": 0.002, "loss": 2.5597, "step": 336270 }, { "epoch": 0.6699445365293892, "grad_norm": 0.14715540409088135, "learning_rate": 0.002, "loss": 2.5503, "step": 336280 }, { "epoch": 0.6699644587530281, "grad_norm": 0.19014005362987518, "learning_rate": 0.002, "loss": 2.5344, "step": 336290 }, { "epoch": 0.6699843809766671, "grad_norm": 0.16749197244644165, "learning_rate": 0.002, "loss": 2.5673, "step": 336300 }, { "epoch": 0.670004303200306, "grad_norm": 0.18859981000423431, "learning_rate": 0.002, "loss": 2.5734, "step": 336310 }, { "epoch": 0.6700242254239449, "grad_norm": 0.17919853329658508, "learning_rate": 0.002, "loss": 2.5502, "step": 336320 }, { "epoch": 0.6700441476475838, "grad_norm": 0.1611078977584839, "learning_rate": 0.002, "loss": 2.5544, "step": 336330 }, { "epoch": 0.6700640698712228, "grad_norm": 0.14547012746334076, "learning_rate": 0.002, "loss": 2.5662, "step": 336340 }, { "epoch": 0.6700839920948617, "grad_norm": 0.16060228645801544, "learning_rate": 0.002, "loss": 2.5526, "step": 336350 }, { "epoch": 0.6701039143185006, "grad_norm": 0.18501806259155273, "learning_rate": 0.002, "loss": 2.5541, "step": 336360 }, { "epoch": 0.6701238365421395, "grad_norm": 0.14974023401737213, "learning_rate": 0.002, "loss": 2.5501, "step": 336370 }, { "epoch": 0.6701437587657784, "grad_norm": 0.15937690436840057, "learning_rate": 0.002, "loss": 2.5445, "step": 336380 }, { "epoch": 0.6701636809894174, "grad_norm": 0.17892788350582123, "learning_rate": 0.002, "loss": 2.5552, "step": 336390 }, { "epoch": 0.6701836032130563, "grad_norm": 0.1777641773223877, "learning_rate": 0.002, "loss": 2.536, "step": 336400 }, { "epoch": 0.6702035254366951, "grad_norm": 0.18604731559753418, "learning_rate": 0.002, "loss": 2.562, "step": 336410 }, { "epoch": 0.670223447660334, "grad_norm": 0.1473851352930069, "learning_rate": 0.002, "loss": 2.5637, "step": 336420 }, { "epoch": 0.6702433698839729, "grad_norm": 0.17652645707130432, "learning_rate": 0.002, "loss": 2.5526, "step": 336430 }, { "epoch": 0.6702632921076119, "grad_norm": 0.17882516980171204, "learning_rate": 0.002, "loss": 2.543, "step": 336440 }, { "epoch": 0.6702832143312508, "grad_norm": 0.18855953216552734, "learning_rate": 0.002, "loss": 2.5509, "step": 336450 }, { "epoch": 0.6703031365548897, "grad_norm": 0.16724422574043274, "learning_rate": 0.002, "loss": 2.5455, "step": 336460 }, { "epoch": 0.6703230587785286, "grad_norm": 0.17073240876197815, "learning_rate": 0.002, "loss": 2.5556, "step": 336470 }, { "epoch": 0.6703429810021675, "grad_norm": 0.1445722132921219, "learning_rate": 0.002, "loss": 2.561, "step": 336480 }, { "epoch": 0.6703629032258065, "grad_norm": 0.17180930078029633, "learning_rate": 0.002, "loss": 2.5586, "step": 336490 }, { "epoch": 0.6703828254494454, "grad_norm": 0.20201246440410614, "learning_rate": 0.002, "loss": 2.5379, "step": 336500 }, { "epoch": 0.6704027476730843, "grad_norm": 0.15887956321239471, "learning_rate": 0.002, "loss": 2.5453, "step": 336510 }, { "epoch": 0.6704226698967232, "grad_norm": 0.17527057230472565, "learning_rate": 0.002, "loss": 2.5647, "step": 336520 }, { "epoch": 0.6704425921203621, "grad_norm": 0.1819949895143509, "learning_rate": 0.002, "loss": 2.5614, "step": 336530 }, { "epoch": 0.6704625143440011, "grad_norm": 0.1741715669631958, "learning_rate": 0.002, "loss": 2.5502, "step": 336540 }, { "epoch": 0.67048243656764, "grad_norm": 0.15299715101718903, "learning_rate": 0.002, "loss": 2.5534, "step": 336550 }, { "epoch": 0.6705023587912788, "grad_norm": 0.19684945046901703, "learning_rate": 0.002, "loss": 2.5472, "step": 336560 }, { "epoch": 0.6705222810149177, "grad_norm": 0.13997501134872437, "learning_rate": 0.002, "loss": 2.5585, "step": 336570 }, { "epoch": 0.6705422032385566, "grad_norm": 0.16507919132709503, "learning_rate": 0.002, "loss": 2.5508, "step": 336580 }, { "epoch": 0.6705621254621956, "grad_norm": 0.13718093931674957, "learning_rate": 0.002, "loss": 2.5525, "step": 336590 }, { "epoch": 0.6705820476858345, "grad_norm": 0.18895356357097626, "learning_rate": 0.002, "loss": 2.572, "step": 336600 }, { "epoch": 0.6706019699094734, "grad_norm": 0.17833921313285828, "learning_rate": 0.002, "loss": 2.5536, "step": 336610 }, { "epoch": 0.6706218921331123, "grad_norm": 0.17273975908756256, "learning_rate": 0.002, "loss": 2.5615, "step": 336620 }, { "epoch": 0.6706418143567513, "grad_norm": 0.17388302087783813, "learning_rate": 0.002, "loss": 2.5483, "step": 336630 }, { "epoch": 0.6706617365803902, "grad_norm": 0.1999925971031189, "learning_rate": 0.002, "loss": 2.5361, "step": 336640 }, { "epoch": 0.6706816588040291, "grad_norm": 0.15696841478347778, "learning_rate": 0.002, "loss": 2.5355, "step": 336650 }, { "epoch": 0.670701581027668, "grad_norm": 0.23372460901737213, "learning_rate": 0.002, "loss": 2.5564, "step": 336660 }, { "epoch": 0.6707215032513069, "grad_norm": 0.17063535749912262, "learning_rate": 0.002, "loss": 2.5495, "step": 336670 }, { "epoch": 0.6707414254749459, "grad_norm": 0.16138207912445068, "learning_rate": 0.002, "loss": 2.5596, "step": 336680 }, { "epoch": 0.6707613476985848, "grad_norm": 0.1470225304365158, "learning_rate": 0.002, "loss": 2.5596, "step": 336690 }, { "epoch": 0.6707812699222236, "grad_norm": 0.18227601051330566, "learning_rate": 0.002, "loss": 2.5582, "step": 336700 }, { "epoch": 0.6708011921458625, "grad_norm": 0.1454949676990509, "learning_rate": 0.002, "loss": 2.5481, "step": 336710 }, { "epoch": 0.6708211143695014, "grad_norm": 0.18629081547260284, "learning_rate": 0.002, "loss": 2.558, "step": 336720 }, { "epoch": 0.6708410365931404, "grad_norm": 0.17387820780277252, "learning_rate": 0.002, "loss": 2.5409, "step": 336730 }, { "epoch": 0.6708609588167793, "grad_norm": 0.16896001994609833, "learning_rate": 0.002, "loss": 2.5604, "step": 336740 }, { "epoch": 0.6708808810404182, "grad_norm": 0.17901016771793365, "learning_rate": 0.002, "loss": 2.5568, "step": 336750 }, { "epoch": 0.6709008032640571, "grad_norm": 0.13437789678573608, "learning_rate": 0.002, "loss": 2.5535, "step": 336760 }, { "epoch": 0.670920725487696, "grad_norm": 0.18111371994018555, "learning_rate": 0.002, "loss": 2.5577, "step": 336770 }, { "epoch": 0.670940647711335, "grad_norm": 0.16591119766235352, "learning_rate": 0.002, "loss": 2.5602, "step": 336780 }, { "epoch": 0.6709605699349739, "grad_norm": 0.15993280708789825, "learning_rate": 0.002, "loss": 2.5578, "step": 336790 }, { "epoch": 0.6709804921586128, "grad_norm": 0.17217551171779633, "learning_rate": 0.002, "loss": 2.5535, "step": 336800 }, { "epoch": 0.6710004143822517, "grad_norm": 0.18446896970272064, "learning_rate": 0.002, "loss": 2.5747, "step": 336810 }, { "epoch": 0.6710203366058906, "grad_norm": 0.17980694770812988, "learning_rate": 0.002, "loss": 2.5546, "step": 336820 }, { "epoch": 0.6710402588295296, "grad_norm": 0.14287309348583221, "learning_rate": 0.002, "loss": 2.5652, "step": 336830 }, { "epoch": 0.6710601810531684, "grad_norm": 0.14893324673175812, "learning_rate": 0.002, "loss": 2.5562, "step": 336840 }, { "epoch": 0.6710801032768073, "grad_norm": 0.17564325034618378, "learning_rate": 0.002, "loss": 2.5663, "step": 336850 }, { "epoch": 0.6711000255004462, "grad_norm": 0.1864793300628662, "learning_rate": 0.002, "loss": 2.5578, "step": 336860 }, { "epoch": 0.6711199477240851, "grad_norm": 0.17825324833393097, "learning_rate": 0.002, "loss": 2.5479, "step": 336870 }, { "epoch": 0.6711398699477241, "grad_norm": 0.18083897233009338, "learning_rate": 0.002, "loss": 2.5563, "step": 336880 }, { "epoch": 0.671159792171363, "grad_norm": 0.18095500767230988, "learning_rate": 0.002, "loss": 2.5684, "step": 336890 }, { "epoch": 0.6711797143950019, "grad_norm": 0.1630224585533142, "learning_rate": 0.002, "loss": 2.5476, "step": 336900 }, { "epoch": 0.6711996366186408, "grad_norm": 0.16706682741641998, "learning_rate": 0.002, "loss": 2.5573, "step": 336910 }, { "epoch": 0.6712195588422798, "grad_norm": 0.15111930668354034, "learning_rate": 0.002, "loss": 2.553, "step": 336920 }, { "epoch": 0.6712394810659187, "grad_norm": 0.1561674326658249, "learning_rate": 0.002, "loss": 2.5651, "step": 336930 }, { "epoch": 0.6712594032895576, "grad_norm": 0.15716318786144257, "learning_rate": 0.002, "loss": 2.5537, "step": 336940 }, { "epoch": 0.6712793255131965, "grad_norm": 0.17918981611728668, "learning_rate": 0.002, "loss": 2.551, "step": 336950 }, { "epoch": 0.6712992477368354, "grad_norm": 0.15832126140594482, "learning_rate": 0.002, "loss": 2.5575, "step": 336960 }, { "epoch": 0.6713191699604744, "grad_norm": 0.14752353727817535, "learning_rate": 0.002, "loss": 2.5491, "step": 336970 }, { "epoch": 0.6713390921841133, "grad_norm": 0.16342078149318695, "learning_rate": 0.002, "loss": 2.557, "step": 336980 }, { "epoch": 0.6713590144077521, "grad_norm": 0.15680094063282013, "learning_rate": 0.002, "loss": 2.538, "step": 336990 }, { "epoch": 0.671378936631391, "grad_norm": 0.17196057736873627, "learning_rate": 0.002, "loss": 2.559, "step": 337000 }, { "epoch": 0.6713988588550299, "grad_norm": 0.1583506315946579, "learning_rate": 0.002, "loss": 2.5525, "step": 337010 }, { "epoch": 0.6714187810786689, "grad_norm": 0.15196958184242249, "learning_rate": 0.002, "loss": 2.5499, "step": 337020 }, { "epoch": 0.6714387033023078, "grad_norm": 0.17599734663963318, "learning_rate": 0.002, "loss": 2.5402, "step": 337030 }, { "epoch": 0.6714586255259467, "grad_norm": 0.1552693098783493, "learning_rate": 0.002, "loss": 2.5398, "step": 337040 }, { "epoch": 0.6714785477495856, "grad_norm": 0.15739738941192627, "learning_rate": 0.002, "loss": 2.5553, "step": 337050 }, { "epoch": 0.6714984699732245, "grad_norm": 0.1550569236278534, "learning_rate": 0.002, "loss": 2.5589, "step": 337060 }, { "epoch": 0.6715183921968635, "grad_norm": 0.18086428940296173, "learning_rate": 0.002, "loss": 2.5511, "step": 337070 }, { "epoch": 0.6715383144205024, "grad_norm": 0.16906282305717468, "learning_rate": 0.002, "loss": 2.5463, "step": 337080 }, { "epoch": 0.6715582366441413, "grad_norm": 0.2156551331281662, "learning_rate": 0.002, "loss": 2.5728, "step": 337090 }, { "epoch": 0.6715781588677802, "grad_norm": 0.1557990163564682, "learning_rate": 0.002, "loss": 2.5638, "step": 337100 }, { "epoch": 0.671598081091419, "grad_norm": 0.15103094279766083, "learning_rate": 0.002, "loss": 2.5487, "step": 337110 }, { "epoch": 0.671618003315058, "grad_norm": 0.1774456948041916, "learning_rate": 0.002, "loss": 2.5688, "step": 337120 }, { "epoch": 0.671637925538697, "grad_norm": 0.15938471257686615, "learning_rate": 0.002, "loss": 2.5456, "step": 337130 }, { "epoch": 0.6716578477623358, "grad_norm": 0.1531839221715927, "learning_rate": 0.002, "loss": 2.5428, "step": 337140 }, { "epoch": 0.6716777699859747, "grad_norm": 0.1683090180158615, "learning_rate": 0.002, "loss": 2.5684, "step": 337150 }, { "epoch": 0.6716976922096136, "grad_norm": 0.17293348908424377, "learning_rate": 0.002, "loss": 2.557, "step": 337160 }, { "epoch": 0.6717176144332526, "grad_norm": 0.16224157810211182, "learning_rate": 0.002, "loss": 2.5448, "step": 337170 }, { "epoch": 0.6717375366568915, "grad_norm": 0.17554982006549835, "learning_rate": 0.002, "loss": 2.5471, "step": 337180 }, { "epoch": 0.6717574588805304, "grad_norm": 0.16140490770339966, "learning_rate": 0.002, "loss": 2.552, "step": 337190 }, { "epoch": 0.6717773811041693, "grad_norm": 0.1669379472732544, "learning_rate": 0.002, "loss": 2.5603, "step": 337200 }, { "epoch": 0.6717973033278082, "grad_norm": 0.15522243082523346, "learning_rate": 0.002, "loss": 2.5488, "step": 337210 }, { "epoch": 0.6718172255514472, "grad_norm": 0.19173987209796906, "learning_rate": 0.002, "loss": 2.5719, "step": 337220 }, { "epoch": 0.6718371477750861, "grad_norm": 0.1510494202375412, "learning_rate": 0.002, "loss": 2.5468, "step": 337230 }, { "epoch": 0.671857069998725, "grad_norm": 0.17304418981075287, "learning_rate": 0.002, "loss": 2.5519, "step": 337240 }, { "epoch": 0.6718769922223639, "grad_norm": 0.1379803717136383, "learning_rate": 0.002, "loss": 2.5691, "step": 337250 }, { "epoch": 0.6718969144460029, "grad_norm": 0.1806858628988266, "learning_rate": 0.002, "loss": 2.5604, "step": 337260 }, { "epoch": 0.6719168366696417, "grad_norm": 0.1564260870218277, "learning_rate": 0.002, "loss": 2.5422, "step": 337270 }, { "epoch": 0.6719367588932806, "grad_norm": 0.17315594851970673, "learning_rate": 0.002, "loss": 2.5582, "step": 337280 }, { "epoch": 0.6719566811169195, "grad_norm": 0.14356090128421783, "learning_rate": 0.002, "loss": 2.558, "step": 337290 }, { "epoch": 0.6719766033405584, "grad_norm": 0.18318329751491547, "learning_rate": 0.002, "loss": 2.562, "step": 337300 }, { "epoch": 0.6719965255641974, "grad_norm": 0.1941763311624527, "learning_rate": 0.002, "loss": 2.5599, "step": 337310 }, { "epoch": 0.6720164477878363, "grad_norm": 0.15498948097229004, "learning_rate": 0.002, "loss": 2.5402, "step": 337320 }, { "epoch": 0.6720363700114752, "grad_norm": 0.13916391134262085, "learning_rate": 0.002, "loss": 2.5657, "step": 337330 }, { "epoch": 0.6720562922351141, "grad_norm": 0.152935191988945, "learning_rate": 0.002, "loss": 2.5474, "step": 337340 }, { "epoch": 0.672076214458753, "grad_norm": 0.18998487293720245, "learning_rate": 0.002, "loss": 2.5592, "step": 337350 }, { "epoch": 0.672096136682392, "grad_norm": 0.18970750272274017, "learning_rate": 0.002, "loss": 2.5605, "step": 337360 }, { "epoch": 0.6721160589060309, "grad_norm": 0.1697789579629898, "learning_rate": 0.002, "loss": 2.563, "step": 337370 }, { "epoch": 0.6721359811296698, "grad_norm": 0.16471439599990845, "learning_rate": 0.002, "loss": 2.5459, "step": 337380 }, { "epoch": 0.6721559033533087, "grad_norm": 0.1849239617586136, "learning_rate": 0.002, "loss": 2.5365, "step": 337390 }, { "epoch": 0.6721758255769475, "grad_norm": 0.1894063651561737, "learning_rate": 0.002, "loss": 2.5483, "step": 337400 }, { "epoch": 0.6721957478005866, "grad_norm": 0.1658887267112732, "learning_rate": 0.002, "loss": 2.5562, "step": 337410 }, { "epoch": 0.6722156700242254, "grad_norm": 0.14288640022277832, "learning_rate": 0.002, "loss": 2.5693, "step": 337420 }, { "epoch": 0.6722355922478643, "grad_norm": 0.1742362529039383, "learning_rate": 0.002, "loss": 2.557, "step": 337430 }, { "epoch": 0.6722555144715032, "grad_norm": 0.1812695860862732, "learning_rate": 0.002, "loss": 2.5596, "step": 337440 }, { "epoch": 0.6722754366951421, "grad_norm": 0.1789184957742691, "learning_rate": 0.002, "loss": 2.5388, "step": 337450 }, { "epoch": 0.6722953589187811, "grad_norm": 0.14421385526657104, "learning_rate": 0.002, "loss": 2.5503, "step": 337460 }, { "epoch": 0.67231528114242, "grad_norm": 0.1535646617412567, "learning_rate": 0.002, "loss": 2.5472, "step": 337470 }, { "epoch": 0.6723352033660589, "grad_norm": 0.17581182718276978, "learning_rate": 0.002, "loss": 2.5568, "step": 337480 }, { "epoch": 0.6723551255896978, "grad_norm": 0.14866231381893158, "learning_rate": 0.002, "loss": 2.5594, "step": 337490 }, { "epoch": 0.6723750478133367, "grad_norm": 0.15927907824516296, "learning_rate": 0.002, "loss": 2.56, "step": 337500 }, { "epoch": 0.6723949700369757, "grad_norm": 0.17878514528274536, "learning_rate": 0.002, "loss": 2.5646, "step": 337510 }, { "epoch": 0.6724148922606146, "grad_norm": 0.1834307312965393, "learning_rate": 0.002, "loss": 2.5627, "step": 337520 }, { "epoch": 0.6724348144842535, "grad_norm": 0.1604757457971573, "learning_rate": 0.002, "loss": 2.5532, "step": 337530 }, { "epoch": 0.6724547367078924, "grad_norm": 0.2111254781484604, "learning_rate": 0.002, "loss": 2.5648, "step": 337540 }, { "epoch": 0.6724746589315314, "grad_norm": 0.1652677059173584, "learning_rate": 0.002, "loss": 2.568, "step": 337550 }, { "epoch": 0.6724945811551702, "grad_norm": 0.16538497805595398, "learning_rate": 0.002, "loss": 2.5651, "step": 337560 }, { "epoch": 0.6725145033788091, "grad_norm": 0.17596982419490814, "learning_rate": 0.002, "loss": 2.558, "step": 337570 }, { "epoch": 0.672534425602448, "grad_norm": 0.14996826648712158, "learning_rate": 0.002, "loss": 2.5527, "step": 337580 }, { "epoch": 0.6725543478260869, "grad_norm": 0.17144103348255157, "learning_rate": 0.002, "loss": 2.5578, "step": 337590 }, { "epoch": 0.6725742700497259, "grad_norm": 0.19160518050193787, "learning_rate": 0.002, "loss": 2.5444, "step": 337600 }, { "epoch": 0.6725941922733648, "grad_norm": 0.1523260772228241, "learning_rate": 0.002, "loss": 2.5522, "step": 337610 }, { "epoch": 0.6726141144970037, "grad_norm": 0.16334232687950134, "learning_rate": 0.002, "loss": 2.5474, "step": 337620 }, { "epoch": 0.6726340367206426, "grad_norm": 0.18294548988342285, "learning_rate": 0.002, "loss": 2.5476, "step": 337630 }, { "epoch": 0.6726539589442815, "grad_norm": 0.1876126080751419, "learning_rate": 0.002, "loss": 2.5626, "step": 337640 }, { "epoch": 0.6726738811679205, "grad_norm": 0.17303211987018585, "learning_rate": 0.002, "loss": 2.5487, "step": 337650 }, { "epoch": 0.6726938033915594, "grad_norm": 0.1455433964729309, "learning_rate": 0.002, "loss": 2.5457, "step": 337660 }, { "epoch": 0.6727137256151983, "grad_norm": 0.16695533692836761, "learning_rate": 0.002, "loss": 2.5647, "step": 337670 }, { "epoch": 0.6727336478388372, "grad_norm": 0.1496986746788025, "learning_rate": 0.002, "loss": 2.5626, "step": 337680 }, { "epoch": 0.672753570062476, "grad_norm": 0.16043789684772491, "learning_rate": 0.002, "loss": 2.5579, "step": 337690 }, { "epoch": 0.672773492286115, "grad_norm": 0.15535703301429749, "learning_rate": 0.002, "loss": 2.5478, "step": 337700 }, { "epoch": 0.6727934145097539, "grad_norm": 0.15162639319896698, "learning_rate": 0.002, "loss": 2.5614, "step": 337710 }, { "epoch": 0.6728133367333928, "grad_norm": 0.17070555686950684, "learning_rate": 0.002, "loss": 2.5534, "step": 337720 }, { "epoch": 0.6728332589570317, "grad_norm": 0.16635863482952118, "learning_rate": 0.002, "loss": 2.5539, "step": 337730 }, { "epoch": 0.6728531811806706, "grad_norm": 0.18803671002388, "learning_rate": 0.002, "loss": 2.5539, "step": 337740 }, { "epoch": 0.6728731034043096, "grad_norm": 0.15913188457489014, "learning_rate": 0.002, "loss": 2.571, "step": 337750 }, { "epoch": 0.6728930256279485, "grad_norm": 0.17068374156951904, "learning_rate": 0.002, "loss": 2.5618, "step": 337760 }, { "epoch": 0.6729129478515874, "grad_norm": 0.21263501048088074, "learning_rate": 0.002, "loss": 2.5629, "step": 337770 }, { "epoch": 0.6729328700752263, "grad_norm": 0.16492173075675964, "learning_rate": 0.002, "loss": 2.5286, "step": 337780 }, { "epoch": 0.6729527922988652, "grad_norm": 0.15932393074035645, "learning_rate": 0.002, "loss": 2.565, "step": 337790 }, { "epoch": 0.6729727145225042, "grad_norm": 0.16833463311195374, "learning_rate": 0.002, "loss": 2.5548, "step": 337800 }, { "epoch": 0.6729926367461431, "grad_norm": 0.1910751909017563, "learning_rate": 0.002, "loss": 2.5546, "step": 337810 }, { "epoch": 0.673012558969782, "grad_norm": 0.17828616499900818, "learning_rate": 0.002, "loss": 2.5606, "step": 337820 }, { "epoch": 0.6730324811934209, "grad_norm": 0.20376306772232056, "learning_rate": 0.002, "loss": 2.5497, "step": 337830 }, { "epoch": 0.6730524034170599, "grad_norm": 0.1487378180027008, "learning_rate": 0.002, "loss": 2.5464, "step": 337840 }, { "epoch": 0.6730723256406987, "grad_norm": 0.15153548121452332, "learning_rate": 0.002, "loss": 2.5479, "step": 337850 }, { "epoch": 0.6730922478643376, "grad_norm": 0.20656810700893402, "learning_rate": 0.002, "loss": 2.5653, "step": 337860 }, { "epoch": 0.6731121700879765, "grad_norm": 0.20215009152889252, "learning_rate": 0.002, "loss": 2.5416, "step": 337870 }, { "epoch": 0.6731320923116154, "grad_norm": 0.14405913650989532, "learning_rate": 0.002, "loss": 2.5404, "step": 337880 }, { "epoch": 0.6731520145352544, "grad_norm": 0.18362770974636078, "learning_rate": 0.002, "loss": 2.5395, "step": 337890 }, { "epoch": 0.6731719367588933, "grad_norm": 0.20322130620479584, "learning_rate": 0.002, "loss": 2.5605, "step": 337900 }, { "epoch": 0.6731918589825322, "grad_norm": 0.17015881836414337, "learning_rate": 0.002, "loss": 2.5539, "step": 337910 }, { "epoch": 0.6732117812061711, "grad_norm": 0.19548553228378296, "learning_rate": 0.002, "loss": 2.5574, "step": 337920 }, { "epoch": 0.67323170342981, "grad_norm": 0.1561824530363083, "learning_rate": 0.002, "loss": 2.5642, "step": 337930 }, { "epoch": 0.673251625653449, "grad_norm": 0.135222926735878, "learning_rate": 0.002, "loss": 2.5657, "step": 337940 }, { "epoch": 0.6732715478770879, "grad_norm": 0.20585715770721436, "learning_rate": 0.002, "loss": 2.5583, "step": 337950 }, { "epoch": 0.6732914701007268, "grad_norm": 0.13996630907058716, "learning_rate": 0.002, "loss": 2.5482, "step": 337960 }, { "epoch": 0.6733113923243657, "grad_norm": 0.15821252763271332, "learning_rate": 0.002, "loss": 2.5379, "step": 337970 }, { "epoch": 0.6733313145480045, "grad_norm": 0.15642967820167542, "learning_rate": 0.002, "loss": 2.5628, "step": 337980 }, { "epoch": 0.6733512367716435, "grad_norm": 0.17813509702682495, "learning_rate": 0.002, "loss": 2.5421, "step": 337990 }, { "epoch": 0.6733711589952824, "grad_norm": 0.1508454829454422, "learning_rate": 0.002, "loss": 2.5394, "step": 338000 }, { "epoch": 0.6733910812189213, "grad_norm": 0.1683502495288849, "learning_rate": 0.002, "loss": 2.5502, "step": 338010 }, { "epoch": 0.6734110034425602, "grad_norm": 0.1459738165140152, "learning_rate": 0.002, "loss": 2.5711, "step": 338020 }, { "epoch": 0.6734309256661991, "grad_norm": 0.1791868358850479, "learning_rate": 0.002, "loss": 2.5587, "step": 338030 }, { "epoch": 0.6734508478898381, "grad_norm": 0.15575426816940308, "learning_rate": 0.002, "loss": 2.5461, "step": 338040 }, { "epoch": 0.673470770113477, "grad_norm": 0.15579655766487122, "learning_rate": 0.002, "loss": 2.548, "step": 338050 }, { "epoch": 0.6734906923371159, "grad_norm": 0.18426616489887238, "learning_rate": 0.002, "loss": 2.5528, "step": 338060 }, { "epoch": 0.6735106145607548, "grad_norm": 0.16652612388134003, "learning_rate": 0.002, "loss": 2.5468, "step": 338070 }, { "epoch": 0.6735305367843937, "grad_norm": 0.1699514091014862, "learning_rate": 0.002, "loss": 2.5667, "step": 338080 }, { "epoch": 0.6735504590080327, "grad_norm": 0.15381871163845062, "learning_rate": 0.002, "loss": 2.557, "step": 338090 }, { "epoch": 0.6735703812316716, "grad_norm": 0.15257461369037628, "learning_rate": 0.002, "loss": 2.5435, "step": 338100 }, { "epoch": 0.6735903034553105, "grad_norm": 0.18725918233394623, "learning_rate": 0.002, "loss": 2.5503, "step": 338110 }, { "epoch": 0.6736102256789493, "grad_norm": 0.15036232769489288, "learning_rate": 0.002, "loss": 2.5543, "step": 338120 }, { "epoch": 0.6736301479025883, "grad_norm": 0.19702406227588654, "learning_rate": 0.002, "loss": 2.5329, "step": 338130 }, { "epoch": 0.6736500701262272, "grad_norm": 0.17517095804214478, "learning_rate": 0.002, "loss": 2.5452, "step": 338140 }, { "epoch": 0.6736699923498661, "grad_norm": 0.16119220852851868, "learning_rate": 0.002, "loss": 2.5612, "step": 338150 }, { "epoch": 0.673689914573505, "grad_norm": 0.17102088034152985, "learning_rate": 0.002, "loss": 2.5547, "step": 338160 }, { "epoch": 0.6737098367971439, "grad_norm": 0.16640223562717438, "learning_rate": 0.002, "loss": 2.5422, "step": 338170 }, { "epoch": 0.6737297590207829, "grad_norm": 0.1619587391614914, "learning_rate": 0.002, "loss": 2.5539, "step": 338180 }, { "epoch": 0.6737496812444218, "grad_norm": 0.1621801108121872, "learning_rate": 0.002, "loss": 2.5511, "step": 338190 }, { "epoch": 0.6737696034680607, "grad_norm": 0.14396215975284576, "learning_rate": 0.002, "loss": 2.5426, "step": 338200 }, { "epoch": 0.6737895256916996, "grad_norm": 0.167883038520813, "learning_rate": 0.002, "loss": 2.5621, "step": 338210 }, { "epoch": 0.6738094479153385, "grad_norm": 0.17201685905456543, "learning_rate": 0.002, "loss": 2.5494, "step": 338220 }, { "epoch": 0.6738293701389775, "grad_norm": 0.15368250012397766, "learning_rate": 0.002, "loss": 2.5554, "step": 338230 }, { "epoch": 0.6738492923626164, "grad_norm": 0.16612233221530914, "learning_rate": 0.002, "loss": 2.5493, "step": 338240 }, { "epoch": 0.6738692145862553, "grad_norm": 0.1593225747346878, "learning_rate": 0.002, "loss": 2.572, "step": 338250 }, { "epoch": 0.6738891368098942, "grad_norm": 0.14467041194438934, "learning_rate": 0.002, "loss": 2.5519, "step": 338260 }, { "epoch": 0.673909059033533, "grad_norm": 0.19887417554855347, "learning_rate": 0.002, "loss": 2.5568, "step": 338270 }, { "epoch": 0.673928981257172, "grad_norm": 0.1571086347103119, "learning_rate": 0.002, "loss": 2.5624, "step": 338280 }, { "epoch": 0.6739489034808109, "grad_norm": 0.16483843326568604, "learning_rate": 0.002, "loss": 2.5507, "step": 338290 }, { "epoch": 0.6739688257044498, "grad_norm": 0.19405703246593475, "learning_rate": 0.002, "loss": 2.5532, "step": 338300 }, { "epoch": 0.6739887479280887, "grad_norm": 0.18204756081104279, "learning_rate": 0.002, "loss": 2.552, "step": 338310 }, { "epoch": 0.6740086701517276, "grad_norm": 0.19422318041324615, "learning_rate": 0.002, "loss": 2.547, "step": 338320 }, { "epoch": 0.6740285923753666, "grad_norm": 0.1694341003894806, "learning_rate": 0.002, "loss": 2.5711, "step": 338330 }, { "epoch": 0.6740485145990055, "grad_norm": 0.1697261780500412, "learning_rate": 0.002, "loss": 2.5482, "step": 338340 }, { "epoch": 0.6740684368226444, "grad_norm": 0.19385042786598206, "learning_rate": 0.002, "loss": 2.5575, "step": 338350 }, { "epoch": 0.6740883590462833, "grad_norm": 0.16790257394313812, "learning_rate": 0.002, "loss": 2.5447, "step": 338360 }, { "epoch": 0.6741082812699222, "grad_norm": 0.15621080994606018, "learning_rate": 0.002, "loss": 2.5458, "step": 338370 }, { "epoch": 0.6741282034935612, "grad_norm": 0.1623227745294571, "learning_rate": 0.002, "loss": 2.5567, "step": 338380 }, { "epoch": 0.6741481257172001, "grad_norm": 0.16974522173404694, "learning_rate": 0.002, "loss": 2.5583, "step": 338390 }, { "epoch": 0.674168047940839, "grad_norm": 0.15735535323619843, "learning_rate": 0.002, "loss": 2.5631, "step": 338400 }, { "epoch": 0.6741879701644778, "grad_norm": 0.17079658806324005, "learning_rate": 0.002, "loss": 2.5558, "step": 338410 }, { "epoch": 0.6742078923881168, "grad_norm": 0.16057029366493225, "learning_rate": 0.002, "loss": 2.5671, "step": 338420 }, { "epoch": 0.6742278146117557, "grad_norm": 0.17481495440006256, "learning_rate": 0.002, "loss": 2.5458, "step": 338430 }, { "epoch": 0.6742477368353946, "grad_norm": 0.13651694357395172, "learning_rate": 0.002, "loss": 2.5509, "step": 338440 }, { "epoch": 0.6742676590590335, "grad_norm": 0.21674717962741852, "learning_rate": 0.002, "loss": 2.5582, "step": 338450 }, { "epoch": 0.6742875812826724, "grad_norm": 0.17513011395931244, "learning_rate": 0.002, "loss": 2.5525, "step": 338460 }, { "epoch": 0.6743075035063114, "grad_norm": 0.20198018848896027, "learning_rate": 0.002, "loss": 2.5485, "step": 338470 }, { "epoch": 0.6743274257299503, "grad_norm": 0.16584734618663788, "learning_rate": 0.002, "loss": 2.5543, "step": 338480 }, { "epoch": 0.6743473479535892, "grad_norm": 0.20821624994277954, "learning_rate": 0.002, "loss": 2.5432, "step": 338490 }, { "epoch": 0.6743672701772281, "grad_norm": 0.18134742975234985, "learning_rate": 0.002, "loss": 2.5519, "step": 338500 }, { "epoch": 0.674387192400867, "grad_norm": 0.15694783627986908, "learning_rate": 0.002, "loss": 2.578, "step": 338510 }, { "epoch": 0.674407114624506, "grad_norm": 0.2193615436553955, "learning_rate": 0.002, "loss": 2.5503, "step": 338520 }, { "epoch": 0.6744270368481449, "grad_norm": 0.16168682277202606, "learning_rate": 0.002, "loss": 2.5421, "step": 338530 }, { "epoch": 0.6744469590717838, "grad_norm": 0.17905071377754211, "learning_rate": 0.002, "loss": 2.5491, "step": 338540 }, { "epoch": 0.6744668812954226, "grad_norm": 0.20192040503025055, "learning_rate": 0.002, "loss": 2.5532, "step": 338550 }, { "epoch": 0.6744868035190615, "grad_norm": 0.1525653898715973, "learning_rate": 0.002, "loss": 2.5572, "step": 338560 }, { "epoch": 0.6745067257427005, "grad_norm": 0.16062189638614655, "learning_rate": 0.002, "loss": 2.5549, "step": 338570 }, { "epoch": 0.6745266479663394, "grad_norm": 0.18369515240192413, "learning_rate": 0.002, "loss": 2.5595, "step": 338580 }, { "epoch": 0.6745465701899783, "grad_norm": 0.17207825183868408, "learning_rate": 0.002, "loss": 2.5531, "step": 338590 }, { "epoch": 0.6745664924136172, "grad_norm": 0.17219319939613342, "learning_rate": 0.002, "loss": 2.5564, "step": 338600 }, { "epoch": 0.6745864146372561, "grad_norm": 0.16509702801704407, "learning_rate": 0.002, "loss": 2.5508, "step": 338610 }, { "epoch": 0.6746063368608951, "grad_norm": 0.1690794974565506, "learning_rate": 0.002, "loss": 2.5617, "step": 338620 }, { "epoch": 0.674626259084534, "grad_norm": 0.20785504579544067, "learning_rate": 0.002, "loss": 2.543, "step": 338630 }, { "epoch": 0.6746461813081729, "grad_norm": 0.1467089205980301, "learning_rate": 0.002, "loss": 2.5434, "step": 338640 }, { "epoch": 0.6746661035318118, "grad_norm": 0.14237891137599945, "learning_rate": 0.002, "loss": 2.5563, "step": 338650 }, { "epoch": 0.6746860257554507, "grad_norm": 0.19383829832077026, "learning_rate": 0.002, "loss": 2.5574, "step": 338660 }, { "epoch": 0.6747059479790897, "grad_norm": 0.18725742399692535, "learning_rate": 0.002, "loss": 2.5587, "step": 338670 }, { "epoch": 0.6747258702027286, "grad_norm": 0.18865084648132324, "learning_rate": 0.002, "loss": 2.57, "step": 338680 }, { "epoch": 0.6747457924263675, "grad_norm": 0.1680552065372467, "learning_rate": 0.002, "loss": 2.5664, "step": 338690 }, { "epoch": 0.6747657146500063, "grad_norm": 0.16018317639827728, "learning_rate": 0.002, "loss": 2.5841, "step": 338700 }, { "epoch": 0.6747856368736452, "grad_norm": 0.18534325063228607, "learning_rate": 0.002, "loss": 2.5506, "step": 338710 }, { "epoch": 0.6748055590972842, "grad_norm": 0.19170577824115753, "learning_rate": 0.002, "loss": 2.5718, "step": 338720 }, { "epoch": 0.6748254813209231, "grad_norm": 0.14824169874191284, "learning_rate": 0.002, "loss": 2.5581, "step": 338730 }, { "epoch": 0.674845403544562, "grad_norm": 0.1463928520679474, "learning_rate": 0.002, "loss": 2.5444, "step": 338740 }, { "epoch": 0.6748653257682009, "grad_norm": 0.1634821742773056, "learning_rate": 0.002, "loss": 2.5557, "step": 338750 }, { "epoch": 0.6748852479918399, "grad_norm": 0.16378994286060333, "learning_rate": 0.002, "loss": 2.5535, "step": 338760 }, { "epoch": 0.6749051702154788, "grad_norm": 0.19212578237056732, "learning_rate": 0.002, "loss": 2.5334, "step": 338770 }, { "epoch": 0.6749250924391177, "grad_norm": 0.16569186747074127, "learning_rate": 0.002, "loss": 2.5366, "step": 338780 }, { "epoch": 0.6749450146627566, "grad_norm": 0.1549350619316101, "learning_rate": 0.002, "loss": 2.5507, "step": 338790 }, { "epoch": 0.6749649368863955, "grad_norm": 0.17608390748500824, "learning_rate": 0.002, "loss": 2.548, "step": 338800 }, { "epoch": 0.6749848591100345, "grad_norm": 0.16662277281284332, "learning_rate": 0.002, "loss": 2.562, "step": 338810 }, { "epoch": 0.6750047813336734, "grad_norm": 0.21721793711185455, "learning_rate": 0.002, "loss": 2.5511, "step": 338820 }, { "epoch": 0.6750247035573123, "grad_norm": 0.18856289982795715, "learning_rate": 0.002, "loss": 2.562, "step": 338830 }, { "epoch": 0.6750446257809511, "grad_norm": 0.16216632723808289, "learning_rate": 0.002, "loss": 2.5656, "step": 338840 }, { "epoch": 0.67506454800459, "grad_norm": 0.15171858668327332, "learning_rate": 0.002, "loss": 2.5411, "step": 338850 }, { "epoch": 0.675084470228229, "grad_norm": 0.17060349881649017, "learning_rate": 0.002, "loss": 2.5548, "step": 338860 }, { "epoch": 0.6751043924518679, "grad_norm": 0.18838338553905487, "learning_rate": 0.002, "loss": 2.5615, "step": 338870 }, { "epoch": 0.6751243146755068, "grad_norm": 0.1870652437210083, "learning_rate": 0.002, "loss": 2.5428, "step": 338880 }, { "epoch": 0.6751442368991457, "grad_norm": 0.16578994691371918, "learning_rate": 0.002, "loss": 2.5417, "step": 338890 }, { "epoch": 0.6751641591227846, "grad_norm": 0.18620392680168152, "learning_rate": 0.002, "loss": 2.5627, "step": 338900 }, { "epoch": 0.6751840813464236, "grad_norm": 0.1491491198539734, "learning_rate": 0.002, "loss": 2.5631, "step": 338910 }, { "epoch": 0.6752040035700625, "grad_norm": 0.1802884340286255, "learning_rate": 0.002, "loss": 2.5656, "step": 338920 }, { "epoch": 0.6752239257937014, "grad_norm": 0.17194825410842896, "learning_rate": 0.002, "loss": 2.567, "step": 338930 }, { "epoch": 0.6752438480173403, "grad_norm": 0.15829353034496307, "learning_rate": 0.002, "loss": 2.5498, "step": 338940 }, { "epoch": 0.6752637702409792, "grad_norm": 0.16487768292427063, "learning_rate": 0.002, "loss": 2.5553, "step": 338950 }, { "epoch": 0.6752836924646182, "grad_norm": 0.19699469208717346, "learning_rate": 0.002, "loss": 2.5611, "step": 338960 }, { "epoch": 0.6753036146882571, "grad_norm": 0.19860398769378662, "learning_rate": 0.002, "loss": 2.5465, "step": 338970 }, { "epoch": 0.675323536911896, "grad_norm": 0.1749037653207779, "learning_rate": 0.002, "loss": 2.5473, "step": 338980 }, { "epoch": 0.6753434591355348, "grad_norm": 0.17668497562408447, "learning_rate": 0.002, "loss": 2.5513, "step": 338990 }, { "epoch": 0.6753633813591737, "grad_norm": 0.13677437603473663, "learning_rate": 0.002, "loss": 2.5739, "step": 339000 }, { "epoch": 0.6753833035828127, "grad_norm": 0.17606690526008606, "learning_rate": 0.002, "loss": 2.5405, "step": 339010 }, { "epoch": 0.6754032258064516, "grad_norm": 0.15334196388721466, "learning_rate": 0.002, "loss": 2.5718, "step": 339020 }, { "epoch": 0.6754231480300905, "grad_norm": 0.14832636713981628, "learning_rate": 0.002, "loss": 2.5546, "step": 339030 }, { "epoch": 0.6754430702537294, "grad_norm": 0.18668696284294128, "learning_rate": 0.002, "loss": 2.5513, "step": 339040 }, { "epoch": 0.6754629924773684, "grad_norm": 0.1567697823047638, "learning_rate": 0.002, "loss": 2.5768, "step": 339050 }, { "epoch": 0.6754829147010073, "grad_norm": 0.2099577635526657, "learning_rate": 0.002, "loss": 2.5637, "step": 339060 }, { "epoch": 0.6755028369246462, "grad_norm": 0.1658785343170166, "learning_rate": 0.002, "loss": 2.551, "step": 339070 }, { "epoch": 0.6755227591482851, "grad_norm": 0.14366967976093292, "learning_rate": 0.002, "loss": 2.5695, "step": 339080 }, { "epoch": 0.675542681371924, "grad_norm": 0.15032733976840973, "learning_rate": 0.002, "loss": 2.5438, "step": 339090 }, { "epoch": 0.675562603595563, "grad_norm": 0.20946577191352844, "learning_rate": 0.002, "loss": 2.568, "step": 339100 }, { "epoch": 0.6755825258192019, "grad_norm": 0.23851992189884186, "learning_rate": 0.002, "loss": 2.5658, "step": 339110 }, { "epoch": 0.6756024480428408, "grad_norm": 0.18401561677455902, "learning_rate": 0.002, "loss": 2.5504, "step": 339120 }, { "epoch": 0.6756223702664796, "grad_norm": 0.15877151489257812, "learning_rate": 0.002, "loss": 2.5431, "step": 339130 }, { "epoch": 0.6756422924901185, "grad_norm": 0.1680344194173813, "learning_rate": 0.002, "loss": 2.5462, "step": 339140 }, { "epoch": 0.6756622147137575, "grad_norm": 0.16975341737270355, "learning_rate": 0.002, "loss": 2.5549, "step": 339150 }, { "epoch": 0.6756821369373964, "grad_norm": 0.13097770512104034, "learning_rate": 0.002, "loss": 2.5784, "step": 339160 }, { "epoch": 0.6757020591610353, "grad_norm": 0.1947396844625473, "learning_rate": 0.002, "loss": 2.5666, "step": 339170 }, { "epoch": 0.6757219813846742, "grad_norm": 0.1490350365638733, "learning_rate": 0.002, "loss": 2.551, "step": 339180 }, { "epoch": 0.6757419036083131, "grad_norm": 0.16165143251419067, "learning_rate": 0.002, "loss": 2.5621, "step": 339190 }, { "epoch": 0.6757618258319521, "grad_norm": 0.15962310135364532, "learning_rate": 0.002, "loss": 2.5474, "step": 339200 }, { "epoch": 0.675781748055591, "grad_norm": 0.15502287447452545, "learning_rate": 0.002, "loss": 2.5596, "step": 339210 }, { "epoch": 0.6758016702792299, "grad_norm": 0.16672955453395844, "learning_rate": 0.002, "loss": 2.5456, "step": 339220 }, { "epoch": 0.6758215925028688, "grad_norm": 0.1868383288383484, "learning_rate": 0.002, "loss": 2.5459, "step": 339230 }, { "epoch": 0.6758415147265077, "grad_norm": 0.1539730578660965, "learning_rate": 0.002, "loss": 2.5532, "step": 339240 }, { "epoch": 0.6758614369501467, "grad_norm": 0.15220391750335693, "learning_rate": 0.002, "loss": 2.5626, "step": 339250 }, { "epoch": 0.6758813591737856, "grad_norm": 0.1616775393486023, "learning_rate": 0.002, "loss": 2.5505, "step": 339260 }, { "epoch": 0.6759012813974244, "grad_norm": 0.17991642653942108, "learning_rate": 0.002, "loss": 2.554, "step": 339270 }, { "epoch": 0.6759212036210633, "grad_norm": 0.1969718039035797, "learning_rate": 0.002, "loss": 2.5608, "step": 339280 }, { "epoch": 0.6759411258447022, "grad_norm": 0.1347590982913971, "learning_rate": 0.002, "loss": 2.5486, "step": 339290 }, { "epoch": 0.6759610480683412, "grad_norm": 0.16340979933738708, "learning_rate": 0.002, "loss": 2.5521, "step": 339300 }, { "epoch": 0.6759809702919801, "grad_norm": 0.2512147128582001, "learning_rate": 0.002, "loss": 2.5714, "step": 339310 }, { "epoch": 0.676000892515619, "grad_norm": 0.2075694054365158, "learning_rate": 0.002, "loss": 2.5819, "step": 339320 }, { "epoch": 0.6760208147392579, "grad_norm": 0.16840052604675293, "learning_rate": 0.002, "loss": 2.542, "step": 339330 }, { "epoch": 0.6760407369628969, "grad_norm": 0.15353788435459137, "learning_rate": 0.002, "loss": 2.5485, "step": 339340 }, { "epoch": 0.6760606591865358, "grad_norm": 0.2096453756093979, "learning_rate": 0.002, "loss": 2.5582, "step": 339350 }, { "epoch": 0.6760805814101747, "grad_norm": 0.14989185333251953, "learning_rate": 0.002, "loss": 2.5647, "step": 339360 }, { "epoch": 0.6761005036338136, "grad_norm": 0.17012682557106018, "learning_rate": 0.002, "loss": 2.5522, "step": 339370 }, { "epoch": 0.6761204258574525, "grad_norm": 0.15332481265068054, "learning_rate": 0.002, "loss": 2.5599, "step": 339380 }, { "epoch": 0.6761403480810915, "grad_norm": 0.18337279558181763, "learning_rate": 0.002, "loss": 2.5523, "step": 339390 }, { "epoch": 0.6761602703047304, "grad_norm": 0.15505830943584442, "learning_rate": 0.002, "loss": 2.5663, "step": 339400 }, { "epoch": 0.6761801925283693, "grad_norm": 0.22713378071784973, "learning_rate": 0.002, "loss": 2.5646, "step": 339410 }, { "epoch": 0.6762001147520081, "grad_norm": 0.18784187734127045, "learning_rate": 0.002, "loss": 2.5515, "step": 339420 }, { "epoch": 0.676220036975647, "grad_norm": 0.13997912406921387, "learning_rate": 0.002, "loss": 2.5501, "step": 339430 }, { "epoch": 0.676239959199286, "grad_norm": 0.14776843786239624, "learning_rate": 0.002, "loss": 2.5661, "step": 339440 }, { "epoch": 0.6762598814229249, "grad_norm": 0.1652582436800003, "learning_rate": 0.002, "loss": 2.5606, "step": 339450 }, { "epoch": 0.6762798036465638, "grad_norm": 0.19515618681907654, "learning_rate": 0.002, "loss": 2.5498, "step": 339460 }, { "epoch": 0.6762997258702027, "grad_norm": 0.15036249160766602, "learning_rate": 0.002, "loss": 2.5466, "step": 339470 }, { "epoch": 0.6763196480938416, "grad_norm": 0.18237793445587158, "learning_rate": 0.002, "loss": 2.5404, "step": 339480 }, { "epoch": 0.6763395703174806, "grad_norm": 0.17095856368541718, "learning_rate": 0.002, "loss": 2.5641, "step": 339490 }, { "epoch": 0.6763594925411195, "grad_norm": 0.15386055409908295, "learning_rate": 0.002, "loss": 2.5688, "step": 339500 }, { "epoch": 0.6763794147647584, "grad_norm": 0.16452620923519135, "learning_rate": 0.002, "loss": 2.5526, "step": 339510 }, { "epoch": 0.6763993369883973, "grad_norm": 0.164123997092247, "learning_rate": 0.002, "loss": 2.5596, "step": 339520 }, { "epoch": 0.6764192592120362, "grad_norm": 0.1714594066143036, "learning_rate": 0.002, "loss": 2.5449, "step": 339530 }, { "epoch": 0.6764391814356752, "grad_norm": 0.19654399156570435, "learning_rate": 0.002, "loss": 2.5557, "step": 339540 }, { "epoch": 0.676459103659314, "grad_norm": 0.16812764108181, "learning_rate": 0.002, "loss": 2.5551, "step": 339550 }, { "epoch": 0.676479025882953, "grad_norm": 0.17313869297504425, "learning_rate": 0.002, "loss": 2.5597, "step": 339560 }, { "epoch": 0.6764989481065918, "grad_norm": 0.1858639419078827, "learning_rate": 0.002, "loss": 2.5538, "step": 339570 }, { "epoch": 0.6765188703302307, "grad_norm": 0.16419664025306702, "learning_rate": 0.002, "loss": 2.5522, "step": 339580 }, { "epoch": 0.6765387925538697, "grad_norm": 0.16768038272857666, "learning_rate": 0.002, "loss": 2.5637, "step": 339590 }, { "epoch": 0.6765587147775086, "grad_norm": 0.14976054430007935, "learning_rate": 0.002, "loss": 2.5511, "step": 339600 }, { "epoch": 0.6765786370011475, "grad_norm": 0.15752020478248596, "learning_rate": 0.002, "loss": 2.5684, "step": 339610 }, { "epoch": 0.6765985592247864, "grad_norm": 0.17956605553627014, "learning_rate": 0.002, "loss": 2.5581, "step": 339620 }, { "epoch": 0.6766184814484254, "grad_norm": 0.16334576904773712, "learning_rate": 0.002, "loss": 2.563, "step": 339630 }, { "epoch": 0.6766384036720643, "grad_norm": 0.17258650064468384, "learning_rate": 0.002, "loss": 2.5446, "step": 339640 }, { "epoch": 0.6766583258957032, "grad_norm": 0.19005298614501953, "learning_rate": 0.002, "loss": 2.5688, "step": 339650 }, { "epoch": 0.6766782481193421, "grad_norm": 0.15430180728435516, "learning_rate": 0.002, "loss": 2.5662, "step": 339660 }, { "epoch": 0.676698170342981, "grad_norm": 0.20390966534614563, "learning_rate": 0.002, "loss": 2.5451, "step": 339670 }, { "epoch": 0.67671809256662, "grad_norm": 0.16474078595638275, "learning_rate": 0.002, "loss": 2.5514, "step": 339680 }, { "epoch": 0.6767380147902589, "grad_norm": 0.1893368810415268, "learning_rate": 0.002, "loss": 2.5492, "step": 339690 }, { "epoch": 0.6767579370138977, "grad_norm": 0.16046245396137238, "learning_rate": 0.002, "loss": 2.5608, "step": 339700 }, { "epoch": 0.6767778592375366, "grad_norm": 0.16246503591537476, "learning_rate": 0.002, "loss": 2.5308, "step": 339710 }, { "epoch": 0.6767977814611755, "grad_norm": 0.1397416740655899, "learning_rate": 0.002, "loss": 2.5594, "step": 339720 }, { "epoch": 0.6768177036848145, "grad_norm": 0.162374809384346, "learning_rate": 0.002, "loss": 2.5536, "step": 339730 }, { "epoch": 0.6768376259084534, "grad_norm": 0.17090576887130737, "learning_rate": 0.002, "loss": 2.5531, "step": 339740 }, { "epoch": 0.6768575481320923, "grad_norm": 0.1707671880722046, "learning_rate": 0.002, "loss": 2.5628, "step": 339750 }, { "epoch": 0.6768774703557312, "grad_norm": 0.1887965053319931, "learning_rate": 0.002, "loss": 2.5468, "step": 339760 }, { "epoch": 0.6768973925793701, "grad_norm": 0.17182697355747223, "learning_rate": 0.002, "loss": 2.5533, "step": 339770 }, { "epoch": 0.6769173148030091, "grad_norm": 0.19816909730434418, "learning_rate": 0.002, "loss": 2.5478, "step": 339780 }, { "epoch": 0.676937237026648, "grad_norm": 0.1335514485836029, "learning_rate": 0.002, "loss": 2.543, "step": 339790 }, { "epoch": 0.6769571592502869, "grad_norm": 0.1567280888557434, "learning_rate": 0.002, "loss": 2.5473, "step": 339800 }, { "epoch": 0.6769770814739258, "grad_norm": 0.19266702234745026, "learning_rate": 0.002, "loss": 2.5465, "step": 339810 }, { "epoch": 0.6769970036975647, "grad_norm": 0.189906507730484, "learning_rate": 0.002, "loss": 2.5553, "step": 339820 }, { "epoch": 0.6770169259212037, "grad_norm": 0.17166882753372192, "learning_rate": 0.002, "loss": 2.5361, "step": 339830 }, { "epoch": 0.6770368481448426, "grad_norm": 0.17069929838180542, "learning_rate": 0.002, "loss": 2.5492, "step": 339840 }, { "epoch": 0.6770567703684814, "grad_norm": 0.1796986311674118, "learning_rate": 0.002, "loss": 2.5528, "step": 339850 }, { "epoch": 0.6770766925921203, "grad_norm": 0.15666437149047852, "learning_rate": 0.002, "loss": 2.5431, "step": 339860 }, { "epoch": 0.6770966148157592, "grad_norm": 0.21319982409477234, "learning_rate": 0.002, "loss": 2.5367, "step": 339870 }, { "epoch": 0.6771165370393982, "grad_norm": 0.1897115558385849, "learning_rate": 0.002, "loss": 2.5686, "step": 339880 }, { "epoch": 0.6771364592630371, "grad_norm": 0.1682463139295578, "learning_rate": 0.002, "loss": 2.5424, "step": 339890 }, { "epoch": 0.677156381486676, "grad_norm": 0.12973159551620483, "learning_rate": 0.002, "loss": 2.5488, "step": 339900 }, { "epoch": 0.6771763037103149, "grad_norm": 0.2482856661081314, "learning_rate": 0.002, "loss": 2.5641, "step": 339910 }, { "epoch": 0.6771962259339539, "grad_norm": 0.13884033262729645, "learning_rate": 0.002, "loss": 2.5333, "step": 339920 }, { "epoch": 0.6772161481575928, "grad_norm": 0.14907395839691162, "learning_rate": 0.002, "loss": 2.5651, "step": 339930 }, { "epoch": 0.6772360703812317, "grad_norm": 0.20560204982757568, "learning_rate": 0.002, "loss": 2.5639, "step": 339940 }, { "epoch": 0.6772559926048706, "grad_norm": 0.1638595312833786, "learning_rate": 0.002, "loss": 2.5431, "step": 339950 }, { "epoch": 0.6772759148285095, "grad_norm": 0.1568925529718399, "learning_rate": 0.002, "loss": 2.556, "step": 339960 }, { "epoch": 0.6772958370521485, "grad_norm": 0.1467411071062088, "learning_rate": 0.002, "loss": 2.5525, "step": 339970 }, { "epoch": 0.6773157592757874, "grad_norm": 0.16900184750556946, "learning_rate": 0.002, "loss": 2.5492, "step": 339980 }, { "epoch": 0.6773356814994262, "grad_norm": 0.18913054466247559, "learning_rate": 0.002, "loss": 2.5499, "step": 339990 }, { "epoch": 0.6773556037230651, "grad_norm": 0.1500677466392517, "learning_rate": 0.002, "loss": 2.5643, "step": 340000 }, { "epoch": 0.677375525946704, "grad_norm": 0.15219321846961975, "learning_rate": 0.002, "loss": 2.557, "step": 340010 }, { "epoch": 0.677395448170343, "grad_norm": 0.20003364980220795, "learning_rate": 0.002, "loss": 2.5675, "step": 340020 }, { "epoch": 0.6774153703939819, "grad_norm": 0.1574871838092804, "learning_rate": 0.002, "loss": 2.5667, "step": 340030 }, { "epoch": 0.6774352926176208, "grad_norm": 0.18290963768959045, "learning_rate": 0.002, "loss": 2.5451, "step": 340040 }, { "epoch": 0.6774552148412597, "grad_norm": 0.17735624313354492, "learning_rate": 0.002, "loss": 2.5551, "step": 340050 }, { "epoch": 0.6774751370648986, "grad_norm": 0.14381161332130432, "learning_rate": 0.002, "loss": 2.5511, "step": 340060 }, { "epoch": 0.6774950592885376, "grad_norm": 0.1392877995967865, "learning_rate": 0.002, "loss": 2.5441, "step": 340070 }, { "epoch": 0.6775149815121765, "grad_norm": 0.19310207664966583, "learning_rate": 0.002, "loss": 2.5446, "step": 340080 }, { "epoch": 0.6775349037358154, "grad_norm": 0.16355592012405396, "learning_rate": 0.002, "loss": 2.5555, "step": 340090 }, { "epoch": 0.6775548259594543, "grad_norm": 0.1679975688457489, "learning_rate": 0.002, "loss": 2.5486, "step": 340100 }, { "epoch": 0.6775747481830932, "grad_norm": 0.1465037316083908, "learning_rate": 0.002, "loss": 2.5672, "step": 340110 }, { "epoch": 0.6775946704067322, "grad_norm": 0.16884425282478333, "learning_rate": 0.002, "loss": 2.5468, "step": 340120 }, { "epoch": 0.677614592630371, "grad_norm": 0.16035126149654388, "learning_rate": 0.002, "loss": 2.5583, "step": 340130 }, { "epoch": 0.6776345148540099, "grad_norm": 0.1519736498594284, "learning_rate": 0.002, "loss": 2.5574, "step": 340140 }, { "epoch": 0.6776544370776488, "grad_norm": 0.1824312061071396, "learning_rate": 0.002, "loss": 2.5566, "step": 340150 }, { "epoch": 0.6776743593012877, "grad_norm": 0.1872626543045044, "learning_rate": 0.002, "loss": 2.5465, "step": 340160 }, { "epoch": 0.6776942815249267, "grad_norm": 0.1780114322900772, "learning_rate": 0.002, "loss": 2.5481, "step": 340170 }, { "epoch": 0.6777142037485656, "grad_norm": 0.157297283411026, "learning_rate": 0.002, "loss": 2.5532, "step": 340180 }, { "epoch": 0.6777341259722045, "grad_norm": 0.149314746260643, "learning_rate": 0.002, "loss": 2.5596, "step": 340190 }, { "epoch": 0.6777540481958434, "grad_norm": 0.14208893477916718, "learning_rate": 0.002, "loss": 2.5618, "step": 340200 }, { "epoch": 0.6777739704194823, "grad_norm": 0.19540758430957794, "learning_rate": 0.002, "loss": 2.5412, "step": 340210 }, { "epoch": 0.6777938926431213, "grad_norm": 0.1577766090631485, "learning_rate": 0.002, "loss": 2.5543, "step": 340220 }, { "epoch": 0.6778138148667602, "grad_norm": 0.13635636866092682, "learning_rate": 0.002, "loss": 2.5482, "step": 340230 }, { "epoch": 0.6778337370903991, "grad_norm": 0.20820465683937073, "learning_rate": 0.002, "loss": 2.5615, "step": 340240 }, { "epoch": 0.677853659314038, "grad_norm": 0.14828543365001678, "learning_rate": 0.002, "loss": 2.5563, "step": 340250 }, { "epoch": 0.677873581537677, "grad_norm": 0.15076880156993866, "learning_rate": 0.002, "loss": 2.5483, "step": 340260 }, { "epoch": 0.6778935037613159, "grad_norm": 0.2048855572938919, "learning_rate": 0.002, "loss": 2.5606, "step": 340270 }, { "epoch": 0.6779134259849547, "grad_norm": 0.15955562889575958, "learning_rate": 0.002, "loss": 2.5474, "step": 340280 }, { "epoch": 0.6779333482085936, "grad_norm": 0.15126144886016846, "learning_rate": 0.002, "loss": 2.5686, "step": 340290 }, { "epoch": 0.6779532704322325, "grad_norm": 0.20160391926765442, "learning_rate": 0.002, "loss": 2.5502, "step": 340300 }, { "epoch": 0.6779731926558715, "grad_norm": 0.14405666291713715, "learning_rate": 0.002, "loss": 2.5597, "step": 340310 }, { "epoch": 0.6779931148795104, "grad_norm": 0.15210334956645966, "learning_rate": 0.002, "loss": 2.549, "step": 340320 }, { "epoch": 0.6780130371031493, "grad_norm": 0.1748681664466858, "learning_rate": 0.002, "loss": 2.5492, "step": 340330 }, { "epoch": 0.6780329593267882, "grad_norm": 0.1906166672706604, "learning_rate": 0.002, "loss": 2.5543, "step": 340340 }, { "epoch": 0.6780528815504271, "grad_norm": 0.13816173374652863, "learning_rate": 0.002, "loss": 2.5621, "step": 340350 }, { "epoch": 0.6780728037740661, "grad_norm": 0.16802294552326202, "learning_rate": 0.002, "loss": 2.5631, "step": 340360 }, { "epoch": 0.678092725997705, "grad_norm": 0.2061186134815216, "learning_rate": 0.002, "loss": 2.5534, "step": 340370 }, { "epoch": 0.6781126482213439, "grad_norm": 0.18144553899765015, "learning_rate": 0.002, "loss": 2.5462, "step": 340380 }, { "epoch": 0.6781325704449828, "grad_norm": 0.1548236906528473, "learning_rate": 0.002, "loss": 2.535, "step": 340390 }, { "epoch": 0.6781524926686217, "grad_norm": 0.15973934531211853, "learning_rate": 0.002, "loss": 2.5609, "step": 340400 }, { "epoch": 0.6781724148922607, "grad_norm": 0.1392284333705902, "learning_rate": 0.002, "loss": 2.555, "step": 340410 }, { "epoch": 0.6781923371158995, "grad_norm": 0.18358951807022095, "learning_rate": 0.002, "loss": 2.5691, "step": 340420 }, { "epoch": 0.6782122593395384, "grad_norm": 0.16866999864578247, "learning_rate": 0.002, "loss": 2.5541, "step": 340430 }, { "epoch": 0.6782321815631773, "grad_norm": 0.25023701786994934, "learning_rate": 0.002, "loss": 2.556, "step": 340440 }, { "epoch": 0.6782521037868162, "grad_norm": 0.15913636982440948, "learning_rate": 0.002, "loss": 2.5677, "step": 340450 }, { "epoch": 0.6782720260104552, "grad_norm": 0.1394471377134323, "learning_rate": 0.002, "loss": 2.5569, "step": 340460 }, { "epoch": 0.6782919482340941, "grad_norm": 0.1540059894323349, "learning_rate": 0.002, "loss": 2.5698, "step": 340470 }, { "epoch": 0.678311870457733, "grad_norm": 0.19181187450885773, "learning_rate": 0.002, "loss": 2.5569, "step": 340480 }, { "epoch": 0.6783317926813719, "grad_norm": 0.18778419494628906, "learning_rate": 0.002, "loss": 2.5641, "step": 340490 }, { "epoch": 0.6783517149050108, "grad_norm": 0.15451891720294952, "learning_rate": 0.002, "loss": 2.5532, "step": 340500 }, { "epoch": 0.6783716371286498, "grad_norm": 0.1709272712469101, "learning_rate": 0.002, "loss": 2.553, "step": 340510 }, { "epoch": 0.6783915593522887, "grad_norm": 0.13322694599628448, "learning_rate": 0.002, "loss": 2.5495, "step": 340520 }, { "epoch": 0.6784114815759276, "grad_norm": 0.18321606516838074, "learning_rate": 0.002, "loss": 2.5521, "step": 340530 }, { "epoch": 0.6784314037995665, "grad_norm": 0.15713076293468475, "learning_rate": 0.002, "loss": 2.5513, "step": 340540 }, { "epoch": 0.6784513260232055, "grad_norm": 0.16129480302333832, "learning_rate": 0.002, "loss": 2.5441, "step": 340550 }, { "epoch": 0.6784712482468443, "grad_norm": 0.17811502516269684, "learning_rate": 0.002, "loss": 2.5641, "step": 340560 }, { "epoch": 0.6784911704704832, "grad_norm": 0.1723122000694275, "learning_rate": 0.002, "loss": 2.5455, "step": 340570 }, { "epoch": 0.6785110926941221, "grad_norm": 0.1584286242723465, "learning_rate": 0.002, "loss": 2.558, "step": 340580 }, { "epoch": 0.678531014917761, "grad_norm": 0.1476142406463623, "learning_rate": 0.002, "loss": 2.5462, "step": 340590 }, { "epoch": 0.6785509371414, "grad_norm": 0.1800171285867691, "learning_rate": 0.002, "loss": 2.5583, "step": 340600 }, { "epoch": 0.6785708593650389, "grad_norm": 0.1764286607503891, "learning_rate": 0.002, "loss": 2.5396, "step": 340610 }, { "epoch": 0.6785907815886778, "grad_norm": 0.16393747925758362, "learning_rate": 0.002, "loss": 2.5572, "step": 340620 }, { "epoch": 0.6786107038123167, "grad_norm": 0.1687614768743515, "learning_rate": 0.002, "loss": 2.5529, "step": 340630 }, { "epoch": 0.6786306260359556, "grad_norm": 0.19921007752418518, "learning_rate": 0.002, "loss": 2.5526, "step": 340640 }, { "epoch": 0.6786505482595946, "grad_norm": 0.16836921870708466, "learning_rate": 0.002, "loss": 2.5394, "step": 340650 }, { "epoch": 0.6786704704832335, "grad_norm": 0.1565876603126526, "learning_rate": 0.002, "loss": 2.5392, "step": 340660 }, { "epoch": 0.6786903927068724, "grad_norm": 0.17390963435173035, "learning_rate": 0.002, "loss": 2.5402, "step": 340670 }, { "epoch": 0.6787103149305113, "grad_norm": 0.197918102145195, "learning_rate": 0.002, "loss": 2.565, "step": 340680 }, { "epoch": 0.6787302371541502, "grad_norm": 0.19786162674427032, "learning_rate": 0.002, "loss": 2.5542, "step": 340690 }, { "epoch": 0.6787501593777892, "grad_norm": 0.21124032139778137, "learning_rate": 0.002, "loss": 2.5316, "step": 340700 }, { "epoch": 0.678770081601428, "grad_norm": 0.15330655872821808, "learning_rate": 0.002, "loss": 2.5581, "step": 340710 }, { "epoch": 0.6787900038250669, "grad_norm": 0.136412113904953, "learning_rate": 0.002, "loss": 2.5352, "step": 340720 }, { "epoch": 0.6788099260487058, "grad_norm": 0.19027079641819, "learning_rate": 0.002, "loss": 2.5485, "step": 340730 }, { "epoch": 0.6788298482723447, "grad_norm": 0.18479420244693756, "learning_rate": 0.002, "loss": 2.5534, "step": 340740 }, { "epoch": 0.6788497704959837, "grad_norm": 0.19110648334026337, "learning_rate": 0.002, "loss": 2.5492, "step": 340750 }, { "epoch": 0.6788696927196226, "grad_norm": 0.15073411166667938, "learning_rate": 0.002, "loss": 2.5516, "step": 340760 }, { "epoch": 0.6788896149432615, "grad_norm": 0.16543683409690857, "learning_rate": 0.002, "loss": 2.5556, "step": 340770 }, { "epoch": 0.6789095371669004, "grad_norm": 0.16403380036354065, "learning_rate": 0.002, "loss": 2.5481, "step": 340780 }, { "epoch": 0.6789294593905393, "grad_norm": 0.2508392632007599, "learning_rate": 0.002, "loss": 2.5609, "step": 340790 }, { "epoch": 0.6789493816141783, "grad_norm": 0.17813459038734436, "learning_rate": 0.002, "loss": 2.5481, "step": 340800 }, { "epoch": 0.6789693038378172, "grad_norm": 0.16866102814674377, "learning_rate": 0.002, "loss": 2.5556, "step": 340810 }, { "epoch": 0.6789892260614561, "grad_norm": 0.15058010816574097, "learning_rate": 0.002, "loss": 2.5558, "step": 340820 }, { "epoch": 0.679009148285095, "grad_norm": 0.17697767913341522, "learning_rate": 0.002, "loss": 2.5528, "step": 340830 }, { "epoch": 0.679029070508734, "grad_norm": 0.15929578244686127, "learning_rate": 0.002, "loss": 2.567, "step": 340840 }, { "epoch": 0.6790489927323728, "grad_norm": 0.15662439167499542, "learning_rate": 0.002, "loss": 2.5498, "step": 340850 }, { "epoch": 0.6790689149560117, "grad_norm": 0.15156002342700958, "learning_rate": 0.002, "loss": 2.5535, "step": 340860 }, { "epoch": 0.6790888371796506, "grad_norm": 0.15027962625026703, "learning_rate": 0.002, "loss": 2.5491, "step": 340870 }, { "epoch": 0.6791087594032895, "grad_norm": 0.16903342306613922, "learning_rate": 0.002, "loss": 2.5591, "step": 340880 }, { "epoch": 0.6791286816269285, "grad_norm": 0.17949655652046204, "learning_rate": 0.002, "loss": 2.577, "step": 340890 }, { "epoch": 0.6791486038505674, "grad_norm": 0.17584814131259918, "learning_rate": 0.002, "loss": 2.5542, "step": 340900 }, { "epoch": 0.6791685260742063, "grad_norm": 0.14303983747959137, "learning_rate": 0.002, "loss": 2.5608, "step": 340910 }, { "epoch": 0.6791884482978452, "grad_norm": 0.2098531275987625, "learning_rate": 0.002, "loss": 2.554, "step": 340920 }, { "epoch": 0.6792083705214841, "grad_norm": 0.1487358957529068, "learning_rate": 0.002, "loss": 2.5514, "step": 340930 }, { "epoch": 0.6792282927451231, "grad_norm": 0.1792755126953125, "learning_rate": 0.002, "loss": 2.5562, "step": 340940 }, { "epoch": 0.679248214968762, "grad_norm": 0.17258962988853455, "learning_rate": 0.002, "loss": 2.5563, "step": 340950 }, { "epoch": 0.6792681371924009, "grad_norm": 0.2518490254878998, "learning_rate": 0.002, "loss": 2.5558, "step": 340960 }, { "epoch": 0.6792880594160398, "grad_norm": 0.16556242108345032, "learning_rate": 0.002, "loss": 2.5605, "step": 340970 }, { "epoch": 0.6793079816396786, "grad_norm": 0.17970594763755798, "learning_rate": 0.002, "loss": 2.5624, "step": 340980 }, { "epoch": 0.6793279038633177, "grad_norm": 0.15857860445976257, "learning_rate": 0.002, "loss": 2.5523, "step": 340990 }, { "epoch": 0.6793478260869565, "grad_norm": 0.16365861892700195, "learning_rate": 0.002, "loss": 2.5659, "step": 341000 }, { "epoch": 0.6793677483105954, "grad_norm": 0.14947748184204102, "learning_rate": 0.002, "loss": 2.5509, "step": 341010 }, { "epoch": 0.6793876705342343, "grad_norm": 0.17272016406059265, "learning_rate": 0.002, "loss": 2.5509, "step": 341020 }, { "epoch": 0.6794075927578732, "grad_norm": 0.17001675069332123, "learning_rate": 0.002, "loss": 2.5685, "step": 341030 }, { "epoch": 0.6794275149815122, "grad_norm": 0.1695340871810913, "learning_rate": 0.002, "loss": 2.5568, "step": 341040 }, { "epoch": 0.6794474372051511, "grad_norm": 0.17370177805423737, "learning_rate": 0.002, "loss": 2.5587, "step": 341050 }, { "epoch": 0.67946735942879, "grad_norm": 0.17549511790275574, "learning_rate": 0.002, "loss": 2.5583, "step": 341060 }, { "epoch": 0.6794872816524289, "grad_norm": 0.1818607747554779, "learning_rate": 0.002, "loss": 2.5686, "step": 341070 }, { "epoch": 0.6795072038760678, "grad_norm": 0.1557535082101822, "learning_rate": 0.002, "loss": 2.5555, "step": 341080 }, { "epoch": 0.6795271260997068, "grad_norm": 0.1642664074897766, "learning_rate": 0.002, "loss": 2.5508, "step": 341090 }, { "epoch": 0.6795470483233457, "grad_norm": 0.19619609415531158, "learning_rate": 0.002, "loss": 2.5384, "step": 341100 }, { "epoch": 0.6795669705469846, "grad_norm": 0.16340823471546173, "learning_rate": 0.002, "loss": 2.5596, "step": 341110 }, { "epoch": 0.6795868927706235, "grad_norm": 0.14547394216060638, "learning_rate": 0.002, "loss": 2.5594, "step": 341120 }, { "epoch": 0.6796068149942625, "grad_norm": 0.1503802388906479, "learning_rate": 0.002, "loss": 2.5487, "step": 341130 }, { "epoch": 0.6796267372179013, "grad_norm": 0.15244370698928833, "learning_rate": 0.002, "loss": 2.5641, "step": 341140 }, { "epoch": 0.6796466594415402, "grad_norm": 0.16308797895908356, "learning_rate": 0.002, "loss": 2.5561, "step": 341150 }, { "epoch": 0.6796665816651791, "grad_norm": 0.16536639630794525, "learning_rate": 0.002, "loss": 2.563, "step": 341160 }, { "epoch": 0.679686503888818, "grad_norm": 0.1761939376592636, "learning_rate": 0.002, "loss": 2.5674, "step": 341170 }, { "epoch": 0.679706426112457, "grad_norm": 0.1617341935634613, "learning_rate": 0.002, "loss": 2.5545, "step": 341180 }, { "epoch": 0.6797263483360959, "grad_norm": 0.15494804084300995, "learning_rate": 0.002, "loss": 2.5449, "step": 341190 }, { "epoch": 0.6797462705597348, "grad_norm": 0.1810765117406845, "learning_rate": 0.002, "loss": 2.5619, "step": 341200 }, { "epoch": 0.6797661927833737, "grad_norm": 0.18877986073493958, "learning_rate": 0.002, "loss": 2.5402, "step": 341210 }, { "epoch": 0.6797861150070126, "grad_norm": 0.15950587391853333, "learning_rate": 0.002, "loss": 2.5601, "step": 341220 }, { "epoch": 0.6798060372306516, "grad_norm": 0.15588849782943726, "learning_rate": 0.002, "loss": 2.5477, "step": 341230 }, { "epoch": 0.6798259594542905, "grad_norm": 0.19351409375667572, "learning_rate": 0.002, "loss": 2.5484, "step": 341240 }, { "epoch": 0.6798458816779294, "grad_norm": 0.15030159056186676, "learning_rate": 0.002, "loss": 2.5508, "step": 341250 }, { "epoch": 0.6798658039015683, "grad_norm": 0.15619005262851715, "learning_rate": 0.002, "loss": 2.5604, "step": 341260 }, { "epoch": 0.6798857261252071, "grad_norm": 0.16318948566913605, "learning_rate": 0.002, "loss": 2.5589, "step": 341270 }, { "epoch": 0.6799056483488461, "grad_norm": 0.17369577288627625, "learning_rate": 0.002, "loss": 2.5497, "step": 341280 }, { "epoch": 0.679925570572485, "grad_norm": 0.18844486773014069, "learning_rate": 0.002, "loss": 2.5515, "step": 341290 }, { "epoch": 0.6799454927961239, "grad_norm": 0.20186354219913483, "learning_rate": 0.002, "loss": 2.5617, "step": 341300 }, { "epoch": 0.6799654150197628, "grad_norm": 0.1684459149837494, "learning_rate": 0.002, "loss": 2.5468, "step": 341310 }, { "epoch": 0.6799853372434017, "grad_norm": 0.18818335235118866, "learning_rate": 0.002, "loss": 2.5673, "step": 341320 }, { "epoch": 0.6800052594670407, "grad_norm": 0.18022064864635468, "learning_rate": 0.002, "loss": 2.5553, "step": 341330 }, { "epoch": 0.6800251816906796, "grad_norm": 0.1705285906791687, "learning_rate": 0.002, "loss": 2.5717, "step": 341340 }, { "epoch": 0.6800451039143185, "grad_norm": 0.16720037162303925, "learning_rate": 0.002, "loss": 2.5543, "step": 341350 }, { "epoch": 0.6800650261379574, "grad_norm": 0.15243233740329742, "learning_rate": 0.002, "loss": 2.5584, "step": 341360 }, { "epoch": 0.6800849483615963, "grad_norm": 0.14634396135807037, "learning_rate": 0.002, "loss": 2.5622, "step": 341370 }, { "epoch": 0.6801048705852353, "grad_norm": 0.15934652090072632, "learning_rate": 0.002, "loss": 2.5622, "step": 341380 }, { "epoch": 0.6801247928088742, "grad_norm": 0.17957741022109985, "learning_rate": 0.002, "loss": 2.5741, "step": 341390 }, { "epoch": 0.6801447150325131, "grad_norm": 0.1705465018749237, "learning_rate": 0.002, "loss": 2.5503, "step": 341400 }, { "epoch": 0.680164637256152, "grad_norm": 0.2023325115442276, "learning_rate": 0.002, "loss": 2.5614, "step": 341410 }, { "epoch": 0.680184559479791, "grad_norm": 0.16902433335781097, "learning_rate": 0.002, "loss": 2.5442, "step": 341420 }, { "epoch": 0.6802044817034298, "grad_norm": 0.17615503072738647, "learning_rate": 0.002, "loss": 2.5541, "step": 341430 }, { "epoch": 0.6802244039270687, "grad_norm": 0.15500572323799133, "learning_rate": 0.002, "loss": 2.5618, "step": 341440 }, { "epoch": 0.6802443261507076, "grad_norm": 0.14396749436855316, "learning_rate": 0.002, "loss": 2.5561, "step": 341450 }, { "epoch": 0.6802642483743465, "grad_norm": 0.2557750940322876, "learning_rate": 0.002, "loss": 2.5484, "step": 341460 }, { "epoch": 0.6802841705979855, "grad_norm": 0.18521735072135925, "learning_rate": 0.002, "loss": 2.5532, "step": 341470 }, { "epoch": 0.6803040928216244, "grad_norm": 0.14582453668117523, "learning_rate": 0.002, "loss": 2.5538, "step": 341480 }, { "epoch": 0.6803240150452633, "grad_norm": 0.15017546713352203, "learning_rate": 0.002, "loss": 2.5737, "step": 341490 }, { "epoch": 0.6803439372689022, "grad_norm": 0.15700404345989227, "learning_rate": 0.002, "loss": 2.5622, "step": 341500 }, { "epoch": 0.6803638594925411, "grad_norm": 0.16450035572052002, "learning_rate": 0.002, "loss": 2.5543, "step": 341510 }, { "epoch": 0.6803837817161801, "grad_norm": 0.1702497899532318, "learning_rate": 0.002, "loss": 2.5559, "step": 341520 }, { "epoch": 0.680403703939819, "grad_norm": 0.14130955934524536, "learning_rate": 0.002, "loss": 2.547, "step": 341530 }, { "epoch": 0.6804236261634579, "grad_norm": 0.16839931905269623, "learning_rate": 0.002, "loss": 2.5546, "step": 341540 }, { "epoch": 0.6804435483870968, "grad_norm": 0.19086000323295593, "learning_rate": 0.002, "loss": 2.5421, "step": 341550 }, { "epoch": 0.6804634706107356, "grad_norm": 0.16459393501281738, "learning_rate": 0.002, "loss": 2.5635, "step": 341560 }, { "epoch": 0.6804833928343746, "grad_norm": 0.17322488129138947, "learning_rate": 0.002, "loss": 2.5532, "step": 341570 }, { "epoch": 0.6805033150580135, "grad_norm": 0.195377379655838, "learning_rate": 0.002, "loss": 2.5527, "step": 341580 }, { "epoch": 0.6805232372816524, "grad_norm": 0.15419557690620422, "learning_rate": 0.002, "loss": 2.5486, "step": 341590 }, { "epoch": 0.6805431595052913, "grad_norm": 0.1599799245595932, "learning_rate": 0.002, "loss": 2.5618, "step": 341600 }, { "epoch": 0.6805630817289302, "grad_norm": 0.16742828488349915, "learning_rate": 0.002, "loss": 2.5465, "step": 341610 }, { "epoch": 0.6805830039525692, "grad_norm": 0.1878182590007782, "learning_rate": 0.002, "loss": 2.5568, "step": 341620 }, { "epoch": 0.6806029261762081, "grad_norm": 0.17525313794612885, "learning_rate": 0.002, "loss": 2.5678, "step": 341630 }, { "epoch": 0.680622848399847, "grad_norm": 0.16677995026111603, "learning_rate": 0.002, "loss": 2.5537, "step": 341640 }, { "epoch": 0.6806427706234859, "grad_norm": 0.1917247623205185, "learning_rate": 0.002, "loss": 2.5723, "step": 341650 }, { "epoch": 0.6806626928471248, "grad_norm": 0.15476371347904205, "learning_rate": 0.002, "loss": 2.5515, "step": 341660 }, { "epoch": 0.6806826150707638, "grad_norm": 0.15507136285305023, "learning_rate": 0.002, "loss": 2.5652, "step": 341670 }, { "epoch": 0.6807025372944027, "grad_norm": 0.1444002240896225, "learning_rate": 0.002, "loss": 2.5521, "step": 341680 }, { "epoch": 0.6807224595180416, "grad_norm": 0.17079930007457733, "learning_rate": 0.002, "loss": 2.5381, "step": 341690 }, { "epoch": 0.6807423817416804, "grad_norm": 0.14892327785491943, "learning_rate": 0.002, "loss": 2.5464, "step": 341700 }, { "epoch": 0.6807623039653194, "grad_norm": 0.21494567394256592, "learning_rate": 0.002, "loss": 2.5663, "step": 341710 }, { "epoch": 0.6807822261889583, "grad_norm": 0.1665690690279007, "learning_rate": 0.002, "loss": 2.5523, "step": 341720 }, { "epoch": 0.6808021484125972, "grad_norm": 0.1488906294107437, "learning_rate": 0.002, "loss": 2.5518, "step": 341730 }, { "epoch": 0.6808220706362361, "grad_norm": 0.1928623616695404, "learning_rate": 0.002, "loss": 2.573, "step": 341740 }, { "epoch": 0.680841992859875, "grad_norm": 0.1887270212173462, "learning_rate": 0.002, "loss": 2.5382, "step": 341750 }, { "epoch": 0.680861915083514, "grad_norm": 0.13109230995178223, "learning_rate": 0.002, "loss": 2.577, "step": 341760 }, { "epoch": 0.6808818373071529, "grad_norm": 0.1741134524345398, "learning_rate": 0.002, "loss": 2.5382, "step": 341770 }, { "epoch": 0.6809017595307918, "grad_norm": 0.20340391993522644, "learning_rate": 0.002, "loss": 2.5578, "step": 341780 }, { "epoch": 0.6809216817544307, "grad_norm": 0.19089695811271667, "learning_rate": 0.002, "loss": 2.5581, "step": 341790 }, { "epoch": 0.6809416039780696, "grad_norm": 0.1800081431865692, "learning_rate": 0.002, "loss": 2.544, "step": 341800 }, { "epoch": 0.6809615262017086, "grad_norm": 0.1792328953742981, "learning_rate": 0.002, "loss": 2.5576, "step": 341810 }, { "epoch": 0.6809814484253475, "grad_norm": 0.14780914783477783, "learning_rate": 0.002, "loss": 2.5527, "step": 341820 }, { "epoch": 0.6810013706489864, "grad_norm": 0.15251226723194122, "learning_rate": 0.002, "loss": 2.5548, "step": 341830 }, { "epoch": 0.6810212928726253, "grad_norm": 0.1928933560848236, "learning_rate": 0.002, "loss": 2.5642, "step": 341840 }, { "epoch": 0.6810412150962641, "grad_norm": 0.15742731094360352, "learning_rate": 0.002, "loss": 2.544, "step": 341850 }, { "epoch": 0.6810611373199031, "grad_norm": 0.1516977846622467, "learning_rate": 0.002, "loss": 2.5503, "step": 341860 }, { "epoch": 0.681081059543542, "grad_norm": 0.16714635491371155, "learning_rate": 0.002, "loss": 2.5396, "step": 341870 }, { "epoch": 0.6811009817671809, "grad_norm": 0.16435183584690094, "learning_rate": 0.002, "loss": 2.5732, "step": 341880 }, { "epoch": 0.6811209039908198, "grad_norm": 0.14845791459083557, "learning_rate": 0.002, "loss": 2.5545, "step": 341890 }, { "epoch": 0.6811408262144587, "grad_norm": 0.15780699253082275, "learning_rate": 0.002, "loss": 2.5588, "step": 341900 }, { "epoch": 0.6811607484380977, "grad_norm": 0.1759837567806244, "learning_rate": 0.002, "loss": 2.5658, "step": 341910 }, { "epoch": 0.6811806706617366, "grad_norm": 0.15097492933273315, "learning_rate": 0.002, "loss": 2.5514, "step": 341920 }, { "epoch": 0.6812005928853755, "grad_norm": 0.16868796944618225, "learning_rate": 0.002, "loss": 2.5646, "step": 341930 }, { "epoch": 0.6812205151090144, "grad_norm": 0.16514186561107635, "learning_rate": 0.002, "loss": 2.5578, "step": 341940 }, { "epoch": 0.6812404373326533, "grad_norm": 0.1417335718870163, "learning_rate": 0.002, "loss": 2.545, "step": 341950 }, { "epoch": 0.6812603595562923, "grad_norm": 0.1741231232881546, "learning_rate": 0.002, "loss": 2.5688, "step": 341960 }, { "epoch": 0.6812802817799312, "grad_norm": 0.1832219660282135, "learning_rate": 0.002, "loss": 2.5368, "step": 341970 }, { "epoch": 0.68130020400357, "grad_norm": 0.17166981101036072, "learning_rate": 0.002, "loss": 2.5711, "step": 341980 }, { "epoch": 0.6813201262272089, "grad_norm": 0.16078472137451172, "learning_rate": 0.002, "loss": 2.5427, "step": 341990 }, { "epoch": 0.6813400484508478, "grad_norm": 0.17642158269882202, "learning_rate": 0.002, "loss": 2.5498, "step": 342000 }, { "epoch": 0.6813599706744868, "grad_norm": 0.15474839508533478, "learning_rate": 0.002, "loss": 2.5583, "step": 342010 }, { "epoch": 0.6813798928981257, "grad_norm": 0.17284448444843292, "learning_rate": 0.002, "loss": 2.5499, "step": 342020 }, { "epoch": 0.6813998151217646, "grad_norm": 0.14930705726146698, "learning_rate": 0.002, "loss": 2.5769, "step": 342030 }, { "epoch": 0.6814197373454035, "grad_norm": 0.15567857027053833, "learning_rate": 0.002, "loss": 2.5549, "step": 342040 }, { "epoch": 0.6814396595690425, "grad_norm": 0.17559662461280823, "learning_rate": 0.002, "loss": 2.5464, "step": 342050 }, { "epoch": 0.6814595817926814, "grad_norm": 0.162511944770813, "learning_rate": 0.002, "loss": 2.5609, "step": 342060 }, { "epoch": 0.6814795040163203, "grad_norm": 0.18522340059280396, "learning_rate": 0.002, "loss": 2.5432, "step": 342070 }, { "epoch": 0.6814994262399592, "grad_norm": 0.14325769245624542, "learning_rate": 0.002, "loss": 2.5584, "step": 342080 }, { "epoch": 0.6815193484635981, "grad_norm": 0.554096519947052, "learning_rate": 0.002, "loss": 2.5435, "step": 342090 }, { "epoch": 0.6815392706872371, "grad_norm": 0.18197160959243774, "learning_rate": 0.002, "loss": 2.5425, "step": 342100 }, { "epoch": 0.681559192910876, "grad_norm": 0.13450591266155243, "learning_rate": 0.002, "loss": 2.5578, "step": 342110 }, { "epoch": 0.6815791151345149, "grad_norm": 0.1451447755098343, "learning_rate": 0.002, "loss": 2.5615, "step": 342120 }, { "epoch": 0.6815990373581537, "grad_norm": 0.17274209856987, "learning_rate": 0.002, "loss": 2.5454, "step": 342130 }, { "epoch": 0.6816189595817926, "grad_norm": 0.1574709266424179, "learning_rate": 0.002, "loss": 2.5483, "step": 342140 }, { "epoch": 0.6816388818054316, "grad_norm": 0.18761639297008514, "learning_rate": 0.002, "loss": 2.5642, "step": 342150 }, { "epoch": 0.6816588040290705, "grad_norm": 0.16825595498085022, "learning_rate": 0.002, "loss": 2.5551, "step": 342160 }, { "epoch": 0.6816787262527094, "grad_norm": 0.16821041703224182, "learning_rate": 0.002, "loss": 2.5499, "step": 342170 }, { "epoch": 0.6816986484763483, "grad_norm": 0.15401296317577362, "learning_rate": 0.002, "loss": 2.5592, "step": 342180 }, { "epoch": 0.6817185706999872, "grad_norm": 0.15643519163131714, "learning_rate": 0.002, "loss": 2.5417, "step": 342190 }, { "epoch": 0.6817384929236262, "grad_norm": 0.1885278820991516, "learning_rate": 0.002, "loss": 2.557, "step": 342200 }, { "epoch": 0.6817584151472651, "grad_norm": 0.16998808085918427, "learning_rate": 0.002, "loss": 2.5607, "step": 342210 }, { "epoch": 0.681778337370904, "grad_norm": 0.17538368701934814, "learning_rate": 0.002, "loss": 2.5555, "step": 342220 }, { "epoch": 0.6817982595945429, "grad_norm": 0.18367087841033936, "learning_rate": 0.002, "loss": 2.5742, "step": 342230 }, { "epoch": 0.6818181818181818, "grad_norm": 0.16234421730041504, "learning_rate": 0.002, "loss": 2.5414, "step": 342240 }, { "epoch": 0.6818381040418208, "grad_norm": 0.17163915932178497, "learning_rate": 0.002, "loss": 2.5458, "step": 342250 }, { "epoch": 0.6818580262654597, "grad_norm": 0.14931632578372955, "learning_rate": 0.002, "loss": 2.5782, "step": 342260 }, { "epoch": 0.6818779484890986, "grad_norm": 0.16856035590171814, "learning_rate": 0.002, "loss": 2.5512, "step": 342270 }, { "epoch": 0.6818978707127374, "grad_norm": 0.14674292504787445, "learning_rate": 0.002, "loss": 2.5461, "step": 342280 }, { "epoch": 0.6819177929363763, "grad_norm": 0.19205062091350555, "learning_rate": 0.002, "loss": 2.5736, "step": 342290 }, { "epoch": 0.6819377151600153, "grad_norm": 0.15128065645694733, "learning_rate": 0.002, "loss": 2.5446, "step": 342300 }, { "epoch": 0.6819576373836542, "grad_norm": 0.15026697516441345, "learning_rate": 0.002, "loss": 2.5486, "step": 342310 }, { "epoch": 0.6819775596072931, "grad_norm": 0.15050120651721954, "learning_rate": 0.002, "loss": 2.5678, "step": 342320 }, { "epoch": 0.681997481830932, "grad_norm": 0.18504102528095245, "learning_rate": 0.002, "loss": 2.5451, "step": 342330 }, { "epoch": 0.682017404054571, "grad_norm": 0.15467879176139832, "learning_rate": 0.002, "loss": 2.5477, "step": 342340 }, { "epoch": 0.6820373262782099, "grad_norm": 0.17600128054618835, "learning_rate": 0.002, "loss": 2.5655, "step": 342350 }, { "epoch": 0.6820572485018488, "grad_norm": 0.1747206449508667, "learning_rate": 0.002, "loss": 2.5654, "step": 342360 }, { "epoch": 0.6820771707254877, "grad_norm": 0.1792343407869339, "learning_rate": 0.002, "loss": 2.5558, "step": 342370 }, { "epoch": 0.6820970929491266, "grad_norm": 0.16793569922447205, "learning_rate": 0.002, "loss": 2.5477, "step": 342380 }, { "epoch": 0.6821170151727656, "grad_norm": 0.15360450744628906, "learning_rate": 0.002, "loss": 2.5626, "step": 342390 }, { "epoch": 0.6821369373964045, "grad_norm": 0.18181759119033813, "learning_rate": 0.002, "loss": 2.5494, "step": 342400 }, { "epoch": 0.6821568596200434, "grad_norm": 0.16041199862957, "learning_rate": 0.002, "loss": 2.5685, "step": 342410 }, { "epoch": 0.6821767818436822, "grad_norm": 0.16332705318927765, "learning_rate": 0.002, "loss": 2.5624, "step": 342420 }, { "epoch": 0.6821967040673211, "grad_norm": 0.18944238126277924, "learning_rate": 0.002, "loss": 2.5365, "step": 342430 }, { "epoch": 0.6822166262909601, "grad_norm": 0.17482224106788635, "learning_rate": 0.002, "loss": 2.5449, "step": 342440 }, { "epoch": 0.682236548514599, "grad_norm": 0.1667330116033554, "learning_rate": 0.002, "loss": 2.5571, "step": 342450 }, { "epoch": 0.6822564707382379, "grad_norm": 0.15636342763900757, "learning_rate": 0.002, "loss": 2.5489, "step": 342460 }, { "epoch": 0.6822763929618768, "grad_norm": 0.18037551641464233, "learning_rate": 0.002, "loss": 2.5701, "step": 342470 }, { "epoch": 0.6822963151855157, "grad_norm": 0.17922033369541168, "learning_rate": 0.002, "loss": 2.5366, "step": 342480 }, { "epoch": 0.6823162374091547, "grad_norm": 0.1496715247631073, "learning_rate": 0.002, "loss": 2.5704, "step": 342490 }, { "epoch": 0.6823361596327936, "grad_norm": 0.16555196046829224, "learning_rate": 0.002, "loss": 2.5423, "step": 342500 }, { "epoch": 0.6823560818564325, "grad_norm": 0.20508503913879395, "learning_rate": 0.002, "loss": 2.545, "step": 342510 }, { "epoch": 0.6823760040800714, "grad_norm": 0.21509885787963867, "learning_rate": 0.002, "loss": 2.5535, "step": 342520 }, { "epoch": 0.6823959263037103, "grad_norm": 0.14651058614253998, "learning_rate": 0.002, "loss": 2.5523, "step": 342530 }, { "epoch": 0.6824158485273493, "grad_norm": 0.1604645699262619, "learning_rate": 0.002, "loss": 2.5544, "step": 342540 }, { "epoch": 0.6824357707509882, "grad_norm": 0.20262114703655243, "learning_rate": 0.002, "loss": 2.5594, "step": 342550 }, { "epoch": 0.682455692974627, "grad_norm": 0.16256891191005707, "learning_rate": 0.002, "loss": 2.5713, "step": 342560 }, { "epoch": 0.6824756151982659, "grad_norm": 0.15688258409500122, "learning_rate": 0.002, "loss": 2.551, "step": 342570 }, { "epoch": 0.6824955374219048, "grad_norm": 0.16762866079807281, "learning_rate": 0.002, "loss": 2.5462, "step": 342580 }, { "epoch": 0.6825154596455438, "grad_norm": 0.15532724559307098, "learning_rate": 0.002, "loss": 2.5643, "step": 342590 }, { "epoch": 0.6825353818691827, "grad_norm": 0.1651889830827713, "learning_rate": 0.002, "loss": 2.557, "step": 342600 }, { "epoch": 0.6825553040928216, "grad_norm": 0.14833438396453857, "learning_rate": 0.002, "loss": 2.5398, "step": 342610 }, { "epoch": 0.6825752263164605, "grad_norm": 0.13804619014263153, "learning_rate": 0.002, "loss": 2.5497, "step": 342620 }, { "epoch": 0.6825951485400995, "grad_norm": 0.19195818901062012, "learning_rate": 0.002, "loss": 2.5623, "step": 342630 }, { "epoch": 0.6826150707637384, "grad_norm": 0.1799943596124649, "learning_rate": 0.002, "loss": 2.5542, "step": 342640 }, { "epoch": 0.6826349929873773, "grad_norm": 0.27192482352256775, "learning_rate": 0.002, "loss": 2.5467, "step": 342650 }, { "epoch": 0.6826549152110162, "grad_norm": 0.1624540239572525, "learning_rate": 0.002, "loss": 2.5487, "step": 342660 }, { "epoch": 0.6826748374346551, "grad_norm": 0.1940988004207611, "learning_rate": 0.002, "loss": 2.5446, "step": 342670 }, { "epoch": 0.6826947596582941, "grad_norm": 0.14675675332546234, "learning_rate": 0.002, "loss": 2.5517, "step": 342680 }, { "epoch": 0.682714681881933, "grad_norm": 0.16059710085391998, "learning_rate": 0.002, "loss": 2.5473, "step": 342690 }, { "epoch": 0.6827346041055719, "grad_norm": 0.17209701240062714, "learning_rate": 0.002, "loss": 2.5569, "step": 342700 }, { "epoch": 0.6827545263292107, "grad_norm": 0.18074922263622284, "learning_rate": 0.002, "loss": 2.5496, "step": 342710 }, { "epoch": 0.6827744485528496, "grad_norm": 0.1963481456041336, "learning_rate": 0.002, "loss": 2.5494, "step": 342720 }, { "epoch": 0.6827943707764886, "grad_norm": 0.164359450340271, "learning_rate": 0.002, "loss": 2.5391, "step": 342730 }, { "epoch": 0.6828142930001275, "grad_norm": 0.2533193528652191, "learning_rate": 0.002, "loss": 2.5491, "step": 342740 }, { "epoch": 0.6828342152237664, "grad_norm": 0.16662588715553284, "learning_rate": 0.002, "loss": 2.5473, "step": 342750 }, { "epoch": 0.6828541374474053, "grad_norm": 0.14349140226840973, "learning_rate": 0.002, "loss": 2.563, "step": 342760 }, { "epoch": 0.6828740596710442, "grad_norm": 0.16463953256607056, "learning_rate": 0.002, "loss": 2.5486, "step": 342770 }, { "epoch": 0.6828939818946832, "grad_norm": 0.16723418235778809, "learning_rate": 0.002, "loss": 2.5552, "step": 342780 }, { "epoch": 0.6829139041183221, "grad_norm": 0.15239223837852478, "learning_rate": 0.002, "loss": 2.5593, "step": 342790 }, { "epoch": 0.682933826341961, "grad_norm": 0.17743529379367828, "learning_rate": 0.002, "loss": 2.5585, "step": 342800 }, { "epoch": 0.6829537485655999, "grad_norm": 0.1759190857410431, "learning_rate": 0.002, "loss": 2.5553, "step": 342810 }, { "epoch": 0.6829736707892388, "grad_norm": 0.19342519342899323, "learning_rate": 0.002, "loss": 2.5565, "step": 342820 }, { "epoch": 0.6829935930128778, "grad_norm": 0.2005591243505478, "learning_rate": 0.002, "loss": 2.5466, "step": 342830 }, { "epoch": 0.6830135152365167, "grad_norm": 0.16800132393836975, "learning_rate": 0.002, "loss": 2.5572, "step": 342840 }, { "epoch": 0.6830334374601555, "grad_norm": 0.13571669161319733, "learning_rate": 0.002, "loss": 2.5644, "step": 342850 }, { "epoch": 0.6830533596837944, "grad_norm": 0.1492013782262802, "learning_rate": 0.002, "loss": 2.5491, "step": 342860 }, { "epoch": 0.6830732819074333, "grad_norm": 0.1757040023803711, "learning_rate": 0.002, "loss": 2.5477, "step": 342870 }, { "epoch": 0.6830932041310723, "grad_norm": 0.14389930665493011, "learning_rate": 0.002, "loss": 2.5478, "step": 342880 }, { "epoch": 0.6831131263547112, "grad_norm": 0.1648629605770111, "learning_rate": 0.002, "loss": 2.5585, "step": 342890 }, { "epoch": 0.6831330485783501, "grad_norm": 0.19048939645290375, "learning_rate": 0.002, "loss": 2.5611, "step": 342900 }, { "epoch": 0.683152970801989, "grad_norm": 0.16026128828525543, "learning_rate": 0.002, "loss": 2.5448, "step": 342910 }, { "epoch": 0.683172893025628, "grad_norm": 0.16959084570407867, "learning_rate": 0.002, "loss": 2.5511, "step": 342920 }, { "epoch": 0.6831928152492669, "grad_norm": 0.1670246124267578, "learning_rate": 0.002, "loss": 2.5591, "step": 342930 }, { "epoch": 0.6832127374729058, "grad_norm": 0.17916512489318848, "learning_rate": 0.002, "loss": 2.5635, "step": 342940 }, { "epoch": 0.6832326596965447, "grad_norm": 0.14964228868484497, "learning_rate": 0.002, "loss": 2.5504, "step": 342950 }, { "epoch": 0.6832525819201836, "grad_norm": 0.1703023463487625, "learning_rate": 0.002, "loss": 2.5538, "step": 342960 }, { "epoch": 0.6832725041438226, "grad_norm": 0.17654401063919067, "learning_rate": 0.002, "loss": 2.5552, "step": 342970 }, { "epoch": 0.6832924263674615, "grad_norm": 0.15274402499198914, "learning_rate": 0.002, "loss": 2.5611, "step": 342980 }, { "epoch": 0.6833123485911003, "grad_norm": 0.1886311173439026, "learning_rate": 0.002, "loss": 2.5592, "step": 342990 }, { "epoch": 0.6833322708147392, "grad_norm": 0.14785650372505188, "learning_rate": 0.002, "loss": 2.5462, "step": 343000 }, { "epoch": 0.6833521930383781, "grad_norm": 0.17139104008674622, "learning_rate": 0.002, "loss": 2.5635, "step": 343010 }, { "epoch": 0.6833721152620171, "grad_norm": 0.17657195031642914, "learning_rate": 0.002, "loss": 2.5568, "step": 343020 }, { "epoch": 0.683392037485656, "grad_norm": 0.15655747056007385, "learning_rate": 0.002, "loss": 2.5554, "step": 343030 }, { "epoch": 0.6834119597092949, "grad_norm": 0.15221403539180756, "learning_rate": 0.002, "loss": 2.5488, "step": 343040 }, { "epoch": 0.6834318819329338, "grad_norm": 0.179268017411232, "learning_rate": 0.002, "loss": 2.5504, "step": 343050 }, { "epoch": 0.6834518041565727, "grad_norm": 0.2142689973115921, "learning_rate": 0.002, "loss": 2.5593, "step": 343060 }, { "epoch": 0.6834717263802117, "grad_norm": 0.16281113028526306, "learning_rate": 0.002, "loss": 2.5595, "step": 343070 }, { "epoch": 0.6834916486038506, "grad_norm": 0.14637255668640137, "learning_rate": 0.002, "loss": 2.5593, "step": 343080 }, { "epoch": 0.6835115708274895, "grad_norm": 0.1624370664358139, "learning_rate": 0.002, "loss": 2.5497, "step": 343090 }, { "epoch": 0.6835314930511284, "grad_norm": 0.21136410534381866, "learning_rate": 0.002, "loss": 2.5482, "step": 343100 }, { "epoch": 0.6835514152747673, "grad_norm": 0.1649322807788849, "learning_rate": 0.002, "loss": 2.5571, "step": 343110 }, { "epoch": 0.6835713374984063, "grad_norm": 0.1588228940963745, "learning_rate": 0.002, "loss": 2.5404, "step": 343120 }, { "epoch": 0.6835912597220452, "grad_norm": 0.16388669610023499, "learning_rate": 0.002, "loss": 2.5532, "step": 343130 }, { "epoch": 0.683611181945684, "grad_norm": 0.15082883834838867, "learning_rate": 0.002, "loss": 2.5487, "step": 343140 }, { "epoch": 0.6836311041693229, "grad_norm": 0.21969208121299744, "learning_rate": 0.002, "loss": 2.5544, "step": 343150 }, { "epoch": 0.6836510263929618, "grad_norm": 0.14577719569206238, "learning_rate": 0.002, "loss": 2.5425, "step": 343160 }, { "epoch": 0.6836709486166008, "grad_norm": 0.16080695390701294, "learning_rate": 0.002, "loss": 2.5588, "step": 343170 }, { "epoch": 0.6836908708402397, "grad_norm": 0.15272682905197144, "learning_rate": 0.002, "loss": 2.547, "step": 343180 }, { "epoch": 0.6837107930638786, "grad_norm": 0.21597391366958618, "learning_rate": 0.002, "loss": 2.5537, "step": 343190 }, { "epoch": 0.6837307152875175, "grad_norm": 0.16395647823810577, "learning_rate": 0.002, "loss": 2.563, "step": 343200 }, { "epoch": 0.6837506375111565, "grad_norm": 0.14574933052062988, "learning_rate": 0.002, "loss": 2.5524, "step": 343210 }, { "epoch": 0.6837705597347954, "grad_norm": 0.1585923582315445, "learning_rate": 0.002, "loss": 2.5524, "step": 343220 }, { "epoch": 0.6837904819584343, "grad_norm": 0.16449420154094696, "learning_rate": 0.002, "loss": 2.5663, "step": 343230 }, { "epoch": 0.6838104041820732, "grad_norm": 0.17781122028827667, "learning_rate": 0.002, "loss": 2.5521, "step": 343240 }, { "epoch": 0.6838303264057121, "grad_norm": 0.18261505663394928, "learning_rate": 0.002, "loss": 2.5653, "step": 343250 }, { "epoch": 0.6838502486293511, "grad_norm": 0.17486268281936646, "learning_rate": 0.002, "loss": 2.5661, "step": 343260 }, { "epoch": 0.68387017085299, "grad_norm": 0.15926136076450348, "learning_rate": 0.002, "loss": 2.56, "step": 343270 }, { "epoch": 0.6838900930766288, "grad_norm": 0.1705324500799179, "learning_rate": 0.002, "loss": 2.571, "step": 343280 }, { "epoch": 0.6839100153002677, "grad_norm": 0.1613454520702362, "learning_rate": 0.002, "loss": 2.5388, "step": 343290 }, { "epoch": 0.6839299375239066, "grad_norm": 0.22582203149795532, "learning_rate": 0.002, "loss": 2.5686, "step": 343300 }, { "epoch": 0.6839498597475456, "grad_norm": 0.14861996471881866, "learning_rate": 0.002, "loss": 2.5642, "step": 343310 }, { "epoch": 0.6839697819711845, "grad_norm": 0.15753503143787384, "learning_rate": 0.002, "loss": 2.5399, "step": 343320 }, { "epoch": 0.6839897041948234, "grad_norm": 0.18962392210960388, "learning_rate": 0.002, "loss": 2.5562, "step": 343330 }, { "epoch": 0.6840096264184623, "grad_norm": 0.2047753632068634, "learning_rate": 0.002, "loss": 2.5637, "step": 343340 }, { "epoch": 0.6840295486421012, "grad_norm": 0.19187599420547485, "learning_rate": 0.002, "loss": 2.5549, "step": 343350 }, { "epoch": 0.6840494708657402, "grad_norm": 0.16096875071525574, "learning_rate": 0.002, "loss": 2.5575, "step": 343360 }, { "epoch": 0.6840693930893791, "grad_norm": 0.1638668179512024, "learning_rate": 0.002, "loss": 2.5641, "step": 343370 }, { "epoch": 0.684089315313018, "grad_norm": 0.17167197167873383, "learning_rate": 0.002, "loss": 2.5566, "step": 343380 }, { "epoch": 0.6841092375366569, "grad_norm": 0.2270241379737854, "learning_rate": 0.002, "loss": 2.5454, "step": 343390 }, { "epoch": 0.6841291597602958, "grad_norm": 0.212549090385437, "learning_rate": 0.002, "loss": 2.5655, "step": 343400 }, { "epoch": 0.6841490819839348, "grad_norm": 0.16656993329524994, "learning_rate": 0.002, "loss": 2.5541, "step": 343410 }, { "epoch": 0.6841690042075736, "grad_norm": 0.1490231305360794, "learning_rate": 0.002, "loss": 2.5406, "step": 343420 }, { "epoch": 0.6841889264312125, "grad_norm": 0.16993233561515808, "learning_rate": 0.002, "loss": 2.5413, "step": 343430 }, { "epoch": 0.6842088486548514, "grad_norm": 0.18234887719154358, "learning_rate": 0.002, "loss": 2.5498, "step": 343440 }, { "epoch": 0.6842287708784903, "grad_norm": 0.19956661760807037, "learning_rate": 0.002, "loss": 2.5508, "step": 343450 }, { "epoch": 0.6842486931021293, "grad_norm": 0.14246638119220734, "learning_rate": 0.002, "loss": 2.5586, "step": 343460 }, { "epoch": 0.6842686153257682, "grad_norm": 0.16361965239048004, "learning_rate": 0.002, "loss": 2.57, "step": 343470 }, { "epoch": 0.6842885375494071, "grad_norm": 0.2210775464773178, "learning_rate": 0.002, "loss": 2.5575, "step": 343480 }, { "epoch": 0.684308459773046, "grad_norm": 0.16163977980613708, "learning_rate": 0.002, "loss": 2.5585, "step": 343490 }, { "epoch": 0.6843283819966849, "grad_norm": 0.1848626285791397, "learning_rate": 0.002, "loss": 2.5574, "step": 343500 }, { "epoch": 0.6843483042203239, "grad_norm": 0.17214855551719666, "learning_rate": 0.002, "loss": 2.546, "step": 343510 }, { "epoch": 0.6843682264439628, "grad_norm": 0.14740575850009918, "learning_rate": 0.002, "loss": 2.5639, "step": 343520 }, { "epoch": 0.6843881486676017, "grad_norm": 0.18436306715011597, "learning_rate": 0.002, "loss": 2.5694, "step": 343530 }, { "epoch": 0.6844080708912406, "grad_norm": 0.15979574620723724, "learning_rate": 0.002, "loss": 2.5621, "step": 343540 }, { "epoch": 0.6844279931148796, "grad_norm": 0.17806263267993927, "learning_rate": 0.002, "loss": 2.5636, "step": 343550 }, { "epoch": 0.6844479153385185, "grad_norm": 0.198761448264122, "learning_rate": 0.002, "loss": 2.5448, "step": 343560 }, { "epoch": 0.6844678375621573, "grad_norm": 0.18999525904655457, "learning_rate": 0.002, "loss": 2.5622, "step": 343570 }, { "epoch": 0.6844877597857962, "grad_norm": 0.1586417555809021, "learning_rate": 0.002, "loss": 2.5481, "step": 343580 }, { "epoch": 0.6845076820094351, "grad_norm": 0.16601915657520294, "learning_rate": 0.002, "loss": 2.5493, "step": 343590 }, { "epoch": 0.6845276042330741, "grad_norm": 0.1855771839618683, "learning_rate": 0.002, "loss": 2.5521, "step": 343600 }, { "epoch": 0.684547526456713, "grad_norm": 0.16855637729167938, "learning_rate": 0.002, "loss": 2.5508, "step": 343610 }, { "epoch": 0.6845674486803519, "grad_norm": 0.1814156472682953, "learning_rate": 0.002, "loss": 2.5496, "step": 343620 }, { "epoch": 0.6845873709039908, "grad_norm": 0.1732996255159378, "learning_rate": 0.002, "loss": 2.558, "step": 343630 }, { "epoch": 0.6846072931276297, "grad_norm": 0.1462392956018448, "learning_rate": 0.002, "loss": 2.5674, "step": 343640 }, { "epoch": 0.6846272153512687, "grad_norm": 0.18453720211982727, "learning_rate": 0.002, "loss": 2.5594, "step": 343650 }, { "epoch": 0.6846471375749076, "grad_norm": 0.17158527672290802, "learning_rate": 0.002, "loss": 2.5499, "step": 343660 }, { "epoch": 0.6846670597985465, "grad_norm": 0.16129039227962494, "learning_rate": 0.002, "loss": 2.5694, "step": 343670 }, { "epoch": 0.6846869820221854, "grad_norm": 0.16897179186344147, "learning_rate": 0.002, "loss": 2.5438, "step": 343680 }, { "epoch": 0.6847069042458243, "grad_norm": 0.1754036545753479, "learning_rate": 0.002, "loss": 2.5569, "step": 343690 }, { "epoch": 0.6847268264694633, "grad_norm": 0.1637093871831894, "learning_rate": 0.002, "loss": 2.5492, "step": 343700 }, { "epoch": 0.6847467486931021, "grad_norm": 0.15467801690101624, "learning_rate": 0.002, "loss": 2.5507, "step": 343710 }, { "epoch": 0.684766670916741, "grad_norm": 0.17483076453208923, "learning_rate": 0.002, "loss": 2.5587, "step": 343720 }, { "epoch": 0.6847865931403799, "grad_norm": 0.16784480214118958, "learning_rate": 0.002, "loss": 2.5525, "step": 343730 }, { "epoch": 0.6848065153640188, "grad_norm": 0.20397643744945526, "learning_rate": 0.002, "loss": 2.5502, "step": 343740 }, { "epoch": 0.6848264375876578, "grad_norm": 0.15090061724185944, "learning_rate": 0.002, "loss": 2.5573, "step": 343750 }, { "epoch": 0.6848463598112967, "grad_norm": 0.1672801822423935, "learning_rate": 0.002, "loss": 2.5526, "step": 343760 }, { "epoch": 0.6848662820349356, "grad_norm": 0.19182243943214417, "learning_rate": 0.002, "loss": 2.5557, "step": 343770 }, { "epoch": 0.6848862042585745, "grad_norm": 0.16467483341693878, "learning_rate": 0.002, "loss": 2.5532, "step": 343780 }, { "epoch": 0.6849061264822134, "grad_norm": 0.18740470707416534, "learning_rate": 0.002, "loss": 2.5454, "step": 343790 }, { "epoch": 0.6849260487058524, "grad_norm": 0.1645578294992447, "learning_rate": 0.002, "loss": 2.5591, "step": 343800 }, { "epoch": 0.6849459709294913, "grad_norm": 0.15160977840423584, "learning_rate": 0.002, "loss": 2.5627, "step": 343810 }, { "epoch": 0.6849658931531302, "grad_norm": 0.15447135269641876, "learning_rate": 0.002, "loss": 2.5826, "step": 343820 }, { "epoch": 0.6849858153767691, "grad_norm": 0.15946373343467712, "learning_rate": 0.002, "loss": 2.5581, "step": 343830 }, { "epoch": 0.6850057376004081, "grad_norm": 0.16845709085464478, "learning_rate": 0.002, "loss": 2.5627, "step": 343840 }, { "epoch": 0.685025659824047, "grad_norm": 0.1539498120546341, "learning_rate": 0.002, "loss": 2.5525, "step": 343850 }, { "epoch": 0.6850455820476858, "grad_norm": 0.21280503273010254, "learning_rate": 0.002, "loss": 2.5593, "step": 343860 }, { "epoch": 0.6850655042713247, "grad_norm": 0.142876997590065, "learning_rate": 0.002, "loss": 2.5679, "step": 343870 }, { "epoch": 0.6850854264949636, "grad_norm": 0.20208020508289337, "learning_rate": 0.002, "loss": 2.5501, "step": 343880 }, { "epoch": 0.6851053487186026, "grad_norm": 0.16723953187465668, "learning_rate": 0.002, "loss": 2.5464, "step": 343890 }, { "epoch": 0.6851252709422415, "grad_norm": 0.15835720300674438, "learning_rate": 0.002, "loss": 2.5479, "step": 343900 }, { "epoch": 0.6851451931658804, "grad_norm": 0.19351550936698914, "learning_rate": 0.002, "loss": 2.5503, "step": 343910 }, { "epoch": 0.6851651153895193, "grad_norm": 0.13444602489471436, "learning_rate": 0.002, "loss": 2.5526, "step": 343920 }, { "epoch": 0.6851850376131582, "grad_norm": 0.1747642606496811, "learning_rate": 0.002, "loss": 2.5639, "step": 343930 }, { "epoch": 0.6852049598367972, "grad_norm": 0.1831493079662323, "learning_rate": 0.002, "loss": 2.5643, "step": 343940 }, { "epoch": 0.6852248820604361, "grad_norm": 0.1913822740316391, "learning_rate": 0.002, "loss": 2.5601, "step": 343950 }, { "epoch": 0.685244804284075, "grad_norm": 0.14237336814403534, "learning_rate": 0.002, "loss": 2.5608, "step": 343960 }, { "epoch": 0.6852647265077139, "grad_norm": 0.1407509595155716, "learning_rate": 0.002, "loss": 2.5539, "step": 343970 }, { "epoch": 0.6852846487313528, "grad_norm": 0.16909722983837128, "learning_rate": 0.002, "loss": 2.5644, "step": 343980 }, { "epoch": 0.6853045709549918, "grad_norm": 0.2156606912612915, "learning_rate": 0.002, "loss": 2.5579, "step": 343990 }, { "epoch": 0.6853244931786306, "grad_norm": 0.16310127079486847, "learning_rate": 0.002, "loss": 2.5452, "step": 344000 }, { "epoch": 0.6853444154022695, "grad_norm": 0.16501154005527496, "learning_rate": 0.002, "loss": 2.547, "step": 344010 }, { "epoch": 0.6853643376259084, "grad_norm": 0.2027876079082489, "learning_rate": 0.002, "loss": 2.5336, "step": 344020 }, { "epoch": 0.6853842598495473, "grad_norm": 0.14921835064888, "learning_rate": 0.002, "loss": 2.5495, "step": 344030 }, { "epoch": 0.6854041820731863, "grad_norm": 0.1621202677488327, "learning_rate": 0.002, "loss": 2.5441, "step": 344040 }, { "epoch": 0.6854241042968252, "grad_norm": 0.1532014161348343, "learning_rate": 0.002, "loss": 2.5563, "step": 344050 }, { "epoch": 0.6854440265204641, "grad_norm": 0.14803215861320496, "learning_rate": 0.002, "loss": 2.5453, "step": 344060 }, { "epoch": 0.685463948744103, "grad_norm": 0.1651284098625183, "learning_rate": 0.002, "loss": 2.549, "step": 344070 }, { "epoch": 0.6854838709677419, "grad_norm": 0.14786387979984283, "learning_rate": 0.002, "loss": 2.5568, "step": 344080 }, { "epoch": 0.6855037931913809, "grad_norm": 0.15482184290885925, "learning_rate": 0.002, "loss": 2.5306, "step": 344090 }, { "epoch": 0.6855237154150198, "grad_norm": 0.1741742193698883, "learning_rate": 0.002, "loss": 2.5691, "step": 344100 }, { "epoch": 0.6855436376386587, "grad_norm": 0.17330068349838257, "learning_rate": 0.002, "loss": 2.5475, "step": 344110 }, { "epoch": 0.6855635598622976, "grad_norm": 0.1808207929134369, "learning_rate": 0.002, "loss": 2.5505, "step": 344120 }, { "epoch": 0.6855834820859366, "grad_norm": 0.16800446808338165, "learning_rate": 0.002, "loss": 2.5579, "step": 344130 }, { "epoch": 0.6856034043095754, "grad_norm": 0.16099092364311218, "learning_rate": 0.002, "loss": 2.5538, "step": 344140 }, { "epoch": 0.6856233265332143, "grad_norm": 0.13914720714092255, "learning_rate": 0.002, "loss": 2.5566, "step": 344150 }, { "epoch": 0.6856432487568532, "grad_norm": 0.1805337518453598, "learning_rate": 0.002, "loss": 2.5465, "step": 344160 }, { "epoch": 0.6856631709804921, "grad_norm": 0.16234233975410461, "learning_rate": 0.002, "loss": 2.5628, "step": 344170 }, { "epoch": 0.6856830932041311, "grad_norm": 0.14992614090442657, "learning_rate": 0.002, "loss": 2.5405, "step": 344180 }, { "epoch": 0.68570301542777, "grad_norm": 0.18231907486915588, "learning_rate": 0.002, "loss": 2.5712, "step": 344190 }, { "epoch": 0.6857229376514089, "grad_norm": 0.19417482614517212, "learning_rate": 0.002, "loss": 2.5588, "step": 344200 }, { "epoch": 0.6857428598750478, "grad_norm": 0.18351241946220398, "learning_rate": 0.002, "loss": 2.5552, "step": 344210 }, { "epoch": 0.6857627820986867, "grad_norm": 0.15274427831172943, "learning_rate": 0.002, "loss": 2.5544, "step": 344220 }, { "epoch": 0.6857827043223257, "grad_norm": 0.1537393182516098, "learning_rate": 0.002, "loss": 2.5578, "step": 344230 }, { "epoch": 0.6858026265459646, "grad_norm": 0.15897604823112488, "learning_rate": 0.002, "loss": 2.5601, "step": 344240 }, { "epoch": 0.6858225487696035, "grad_norm": 0.1646631956100464, "learning_rate": 0.002, "loss": 2.5372, "step": 344250 }, { "epoch": 0.6858424709932424, "grad_norm": 0.13843922317028046, "learning_rate": 0.002, "loss": 2.5604, "step": 344260 }, { "epoch": 0.6858623932168812, "grad_norm": 0.18983377516269684, "learning_rate": 0.002, "loss": 2.5535, "step": 344270 }, { "epoch": 0.6858823154405203, "grad_norm": 0.16212329268455505, "learning_rate": 0.002, "loss": 2.5368, "step": 344280 }, { "epoch": 0.6859022376641591, "grad_norm": 0.1474471539258957, "learning_rate": 0.002, "loss": 2.5657, "step": 344290 }, { "epoch": 0.685922159887798, "grad_norm": 0.15064772963523865, "learning_rate": 0.002, "loss": 2.5598, "step": 344300 }, { "epoch": 0.6859420821114369, "grad_norm": 0.16543179750442505, "learning_rate": 0.002, "loss": 2.5516, "step": 344310 }, { "epoch": 0.6859620043350758, "grad_norm": 0.1591278612613678, "learning_rate": 0.002, "loss": 2.5472, "step": 344320 }, { "epoch": 0.6859819265587148, "grad_norm": 0.1520746797323227, "learning_rate": 0.002, "loss": 2.5738, "step": 344330 }, { "epoch": 0.6860018487823537, "grad_norm": 0.18136344850063324, "learning_rate": 0.002, "loss": 2.546, "step": 344340 }, { "epoch": 0.6860217710059926, "grad_norm": 0.1528911590576172, "learning_rate": 0.002, "loss": 2.5621, "step": 344350 }, { "epoch": 0.6860416932296315, "grad_norm": 0.16434837877750397, "learning_rate": 0.002, "loss": 2.5497, "step": 344360 }, { "epoch": 0.6860616154532704, "grad_norm": 0.15735237300395966, "learning_rate": 0.002, "loss": 2.5463, "step": 344370 }, { "epoch": 0.6860815376769094, "grad_norm": 0.1767885386943817, "learning_rate": 0.002, "loss": 2.5579, "step": 344380 }, { "epoch": 0.6861014599005483, "grad_norm": 0.1667499840259552, "learning_rate": 0.002, "loss": 2.5748, "step": 344390 }, { "epoch": 0.6861213821241872, "grad_norm": 0.16188018023967743, "learning_rate": 0.002, "loss": 2.5641, "step": 344400 }, { "epoch": 0.686141304347826, "grad_norm": 0.20650680363178253, "learning_rate": 0.002, "loss": 2.551, "step": 344410 }, { "epoch": 0.686161226571465, "grad_norm": 0.19806960225105286, "learning_rate": 0.002, "loss": 2.5362, "step": 344420 }, { "epoch": 0.686181148795104, "grad_norm": 0.13541507720947266, "learning_rate": 0.002, "loss": 2.5571, "step": 344430 }, { "epoch": 0.6862010710187428, "grad_norm": 0.19112348556518555, "learning_rate": 0.002, "loss": 2.5469, "step": 344440 }, { "epoch": 0.6862209932423817, "grad_norm": 0.14917409420013428, "learning_rate": 0.002, "loss": 2.5515, "step": 344450 }, { "epoch": 0.6862409154660206, "grad_norm": 0.19295617938041687, "learning_rate": 0.002, "loss": 2.5517, "step": 344460 }, { "epoch": 0.6862608376896596, "grad_norm": 0.15507328510284424, "learning_rate": 0.002, "loss": 2.5595, "step": 344470 }, { "epoch": 0.6862807599132985, "grad_norm": 0.16256441175937653, "learning_rate": 0.002, "loss": 2.5558, "step": 344480 }, { "epoch": 0.6863006821369374, "grad_norm": 0.18194392323493958, "learning_rate": 0.002, "loss": 2.5512, "step": 344490 }, { "epoch": 0.6863206043605763, "grad_norm": 0.1439017653465271, "learning_rate": 0.002, "loss": 2.5566, "step": 344500 }, { "epoch": 0.6863405265842152, "grad_norm": 0.14383229613304138, "learning_rate": 0.002, "loss": 2.5505, "step": 344510 }, { "epoch": 0.6863604488078542, "grad_norm": 0.2365293651819229, "learning_rate": 0.002, "loss": 2.5587, "step": 344520 }, { "epoch": 0.6863803710314931, "grad_norm": 0.16854658722877502, "learning_rate": 0.002, "loss": 2.5499, "step": 344530 }, { "epoch": 0.686400293255132, "grad_norm": 0.1537841260433197, "learning_rate": 0.002, "loss": 2.5612, "step": 344540 }, { "epoch": 0.6864202154787709, "grad_norm": 0.17560620605945587, "learning_rate": 0.002, "loss": 2.5589, "step": 344550 }, { "epoch": 0.6864401377024097, "grad_norm": 0.15874797105789185, "learning_rate": 0.002, "loss": 2.5529, "step": 344560 }, { "epoch": 0.6864600599260487, "grad_norm": 0.1303655505180359, "learning_rate": 0.002, "loss": 2.5568, "step": 344570 }, { "epoch": 0.6864799821496876, "grad_norm": 0.16191934049129486, "learning_rate": 0.002, "loss": 2.5558, "step": 344580 }, { "epoch": 0.6864999043733265, "grad_norm": 0.16748784482479095, "learning_rate": 0.002, "loss": 2.5566, "step": 344590 }, { "epoch": 0.6865198265969654, "grad_norm": 0.14869321882724762, "learning_rate": 0.002, "loss": 2.5556, "step": 344600 }, { "epoch": 0.6865397488206043, "grad_norm": 0.1535000503063202, "learning_rate": 0.002, "loss": 2.5572, "step": 344610 }, { "epoch": 0.6865596710442433, "grad_norm": 0.2161799818277359, "learning_rate": 0.002, "loss": 2.5821, "step": 344620 }, { "epoch": 0.6865795932678822, "grad_norm": 0.18877460062503815, "learning_rate": 0.002, "loss": 2.5593, "step": 344630 }, { "epoch": 0.6865995154915211, "grad_norm": 0.1434304565191269, "learning_rate": 0.002, "loss": 2.5504, "step": 344640 }, { "epoch": 0.68661943771516, "grad_norm": 0.16595889627933502, "learning_rate": 0.002, "loss": 2.5492, "step": 344650 }, { "epoch": 0.6866393599387989, "grad_norm": 0.16044116020202637, "learning_rate": 0.002, "loss": 2.5468, "step": 344660 }, { "epoch": 0.6866592821624379, "grad_norm": 0.16898508369922638, "learning_rate": 0.002, "loss": 2.5441, "step": 344670 }, { "epoch": 0.6866792043860768, "grad_norm": 0.18016307055950165, "learning_rate": 0.002, "loss": 2.5478, "step": 344680 }, { "epoch": 0.6866991266097157, "grad_norm": 0.1458960473537445, "learning_rate": 0.002, "loss": 2.5537, "step": 344690 }, { "epoch": 0.6867190488333546, "grad_norm": 0.1848841905593872, "learning_rate": 0.002, "loss": 2.5617, "step": 344700 }, { "epoch": 0.6867389710569936, "grad_norm": 0.16209127008914948, "learning_rate": 0.002, "loss": 2.558, "step": 344710 }, { "epoch": 0.6867588932806324, "grad_norm": 0.16275444626808167, "learning_rate": 0.002, "loss": 2.5609, "step": 344720 }, { "epoch": 0.6867788155042713, "grad_norm": 0.16305452585220337, "learning_rate": 0.002, "loss": 2.5495, "step": 344730 }, { "epoch": 0.6867987377279102, "grad_norm": 0.15841278433799744, "learning_rate": 0.002, "loss": 2.5466, "step": 344740 }, { "epoch": 0.6868186599515491, "grad_norm": 0.2322542667388916, "learning_rate": 0.002, "loss": 2.5651, "step": 344750 }, { "epoch": 0.6868385821751881, "grad_norm": 0.15557773411273956, "learning_rate": 0.002, "loss": 2.5532, "step": 344760 }, { "epoch": 0.686858504398827, "grad_norm": 0.16192321479320526, "learning_rate": 0.002, "loss": 2.5404, "step": 344770 }, { "epoch": 0.6868784266224659, "grad_norm": 0.1556234508752823, "learning_rate": 0.002, "loss": 2.5529, "step": 344780 }, { "epoch": 0.6868983488461048, "grad_norm": 0.1542501151561737, "learning_rate": 0.002, "loss": 2.544, "step": 344790 }, { "epoch": 0.6869182710697437, "grad_norm": 0.17484629154205322, "learning_rate": 0.002, "loss": 2.5506, "step": 344800 }, { "epoch": 0.6869381932933827, "grad_norm": 0.14458893239498138, "learning_rate": 0.002, "loss": 2.5509, "step": 344810 }, { "epoch": 0.6869581155170216, "grad_norm": 0.18992966413497925, "learning_rate": 0.002, "loss": 2.5471, "step": 344820 }, { "epoch": 0.6869780377406605, "grad_norm": 0.14367790520191193, "learning_rate": 0.002, "loss": 2.5461, "step": 344830 }, { "epoch": 0.6869979599642994, "grad_norm": 0.15188631415367126, "learning_rate": 0.002, "loss": 2.5492, "step": 344840 }, { "epoch": 0.6870178821879382, "grad_norm": 0.1949174404144287, "learning_rate": 0.002, "loss": 2.567, "step": 344850 }, { "epoch": 0.6870378044115772, "grad_norm": 0.14018696546554565, "learning_rate": 0.002, "loss": 2.5706, "step": 344860 }, { "epoch": 0.6870577266352161, "grad_norm": 0.16424579918384552, "learning_rate": 0.002, "loss": 2.5488, "step": 344870 }, { "epoch": 0.687077648858855, "grad_norm": 0.16047626733779907, "learning_rate": 0.002, "loss": 2.5676, "step": 344880 }, { "epoch": 0.6870975710824939, "grad_norm": 0.15954352915287018, "learning_rate": 0.002, "loss": 2.5513, "step": 344890 }, { "epoch": 0.6871174933061328, "grad_norm": 0.14647497236728668, "learning_rate": 0.002, "loss": 2.5421, "step": 344900 }, { "epoch": 0.6871374155297718, "grad_norm": 0.15871278941631317, "learning_rate": 0.002, "loss": 2.5713, "step": 344910 }, { "epoch": 0.6871573377534107, "grad_norm": 0.22231820225715637, "learning_rate": 0.002, "loss": 2.5377, "step": 344920 }, { "epoch": 0.6871772599770496, "grad_norm": 0.14184249937534332, "learning_rate": 0.002, "loss": 2.566, "step": 344930 }, { "epoch": 0.6871971822006885, "grad_norm": 0.13183796405792236, "learning_rate": 0.002, "loss": 2.5439, "step": 344940 }, { "epoch": 0.6872171044243274, "grad_norm": 0.20413292944431305, "learning_rate": 0.002, "loss": 2.5571, "step": 344950 }, { "epoch": 0.6872370266479664, "grad_norm": 0.16118913888931274, "learning_rate": 0.002, "loss": 2.56, "step": 344960 }, { "epoch": 0.6872569488716053, "grad_norm": 0.15171033143997192, "learning_rate": 0.002, "loss": 2.563, "step": 344970 }, { "epoch": 0.6872768710952442, "grad_norm": 0.16635742783546448, "learning_rate": 0.002, "loss": 2.5685, "step": 344980 }, { "epoch": 0.687296793318883, "grad_norm": 0.19609308242797852, "learning_rate": 0.002, "loss": 2.5429, "step": 344990 }, { "epoch": 0.6873167155425219, "grad_norm": 0.1674819439649582, "learning_rate": 0.002, "loss": 2.5377, "step": 345000 }, { "epoch": 0.6873366377661609, "grad_norm": 0.14740468561649323, "learning_rate": 0.002, "loss": 2.5618, "step": 345010 }, { "epoch": 0.6873565599897998, "grad_norm": 0.2733098864555359, "learning_rate": 0.002, "loss": 2.5566, "step": 345020 }, { "epoch": 0.6873764822134387, "grad_norm": 0.17606642842292786, "learning_rate": 0.002, "loss": 2.5473, "step": 345030 }, { "epoch": 0.6873964044370776, "grad_norm": 0.14818556606769562, "learning_rate": 0.002, "loss": 2.5738, "step": 345040 }, { "epoch": 0.6874163266607166, "grad_norm": 0.18387189507484436, "learning_rate": 0.002, "loss": 2.5716, "step": 345050 }, { "epoch": 0.6874362488843555, "grad_norm": 0.1604420393705368, "learning_rate": 0.002, "loss": 2.5488, "step": 345060 }, { "epoch": 0.6874561711079944, "grad_norm": 0.14738084375858307, "learning_rate": 0.002, "loss": 2.548, "step": 345070 }, { "epoch": 0.6874760933316333, "grad_norm": 0.16411660611629486, "learning_rate": 0.002, "loss": 2.5563, "step": 345080 }, { "epoch": 0.6874960155552722, "grad_norm": 0.17639045417308807, "learning_rate": 0.002, "loss": 2.5606, "step": 345090 }, { "epoch": 0.6875159377789112, "grad_norm": 0.17588190734386444, "learning_rate": 0.002, "loss": 2.5615, "step": 345100 }, { "epoch": 0.6875358600025501, "grad_norm": 0.1595199853181839, "learning_rate": 0.002, "loss": 2.5487, "step": 345110 }, { "epoch": 0.687555782226189, "grad_norm": 0.16646172106266022, "learning_rate": 0.002, "loss": 2.5693, "step": 345120 }, { "epoch": 0.6875757044498279, "grad_norm": 0.14915451407432556, "learning_rate": 0.002, "loss": 2.564, "step": 345130 }, { "epoch": 0.6875956266734667, "grad_norm": 0.15128763020038605, "learning_rate": 0.002, "loss": 2.5441, "step": 345140 }, { "epoch": 0.6876155488971057, "grad_norm": 0.14977170526981354, "learning_rate": 0.002, "loss": 2.5502, "step": 345150 }, { "epoch": 0.6876354711207446, "grad_norm": 0.17854611575603485, "learning_rate": 0.002, "loss": 2.5503, "step": 345160 }, { "epoch": 0.6876553933443835, "grad_norm": 0.1566237211227417, "learning_rate": 0.002, "loss": 2.5644, "step": 345170 }, { "epoch": 0.6876753155680224, "grad_norm": 0.15839113295078278, "learning_rate": 0.002, "loss": 2.5612, "step": 345180 }, { "epoch": 0.6876952377916613, "grad_norm": 0.16034488379955292, "learning_rate": 0.002, "loss": 2.5546, "step": 345190 }, { "epoch": 0.6877151600153003, "grad_norm": 0.16031700372695923, "learning_rate": 0.002, "loss": 2.5486, "step": 345200 }, { "epoch": 0.6877350822389392, "grad_norm": 0.14868468046188354, "learning_rate": 0.002, "loss": 2.5612, "step": 345210 }, { "epoch": 0.6877550044625781, "grad_norm": 0.1329822987318039, "learning_rate": 0.002, "loss": 2.5469, "step": 345220 }, { "epoch": 0.687774926686217, "grad_norm": 0.18588319420814514, "learning_rate": 0.002, "loss": 2.5556, "step": 345230 }, { "epoch": 0.6877948489098559, "grad_norm": 0.3001040816307068, "learning_rate": 0.002, "loss": 2.5584, "step": 345240 }, { "epoch": 0.6878147711334949, "grad_norm": 0.1576656699180603, "learning_rate": 0.002, "loss": 2.556, "step": 345250 }, { "epoch": 0.6878346933571338, "grad_norm": 0.1602666974067688, "learning_rate": 0.002, "loss": 2.5467, "step": 345260 }, { "epoch": 0.6878546155807727, "grad_norm": 0.14276500046253204, "learning_rate": 0.002, "loss": 2.5557, "step": 345270 }, { "epoch": 0.6878745378044115, "grad_norm": 0.14734430611133575, "learning_rate": 0.002, "loss": 2.5505, "step": 345280 }, { "epoch": 0.6878944600280504, "grad_norm": 0.169035404920578, "learning_rate": 0.002, "loss": 2.551, "step": 345290 }, { "epoch": 0.6879143822516894, "grad_norm": 0.15711601078510284, "learning_rate": 0.002, "loss": 2.5405, "step": 345300 }, { "epoch": 0.6879343044753283, "grad_norm": 0.16014261543750763, "learning_rate": 0.002, "loss": 2.5671, "step": 345310 }, { "epoch": 0.6879542266989672, "grad_norm": 0.15506143867969513, "learning_rate": 0.002, "loss": 2.549, "step": 345320 }, { "epoch": 0.6879741489226061, "grad_norm": 0.1973842829465866, "learning_rate": 0.002, "loss": 2.5616, "step": 345330 }, { "epoch": 0.6879940711462451, "grad_norm": 0.14684590697288513, "learning_rate": 0.002, "loss": 2.5567, "step": 345340 }, { "epoch": 0.688013993369884, "grad_norm": 0.18879349529743195, "learning_rate": 0.002, "loss": 2.547, "step": 345350 }, { "epoch": 0.6880339155935229, "grad_norm": 0.19718706607818604, "learning_rate": 0.002, "loss": 2.5566, "step": 345360 }, { "epoch": 0.6880538378171618, "grad_norm": 0.192352756857872, "learning_rate": 0.002, "loss": 2.5607, "step": 345370 }, { "epoch": 0.6880737600408007, "grad_norm": 0.17081862688064575, "learning_rate": 0.002, "loss": 2.5624, "step": 345380 }, { "epoch": 0.6880936822644397, "grad_norm": 0.147657573223114, "learning_rate": 0.002, "loss": 2.5559, "step": 345390 }, { "epoch": 0.6881136044880786, "grad_norm": 0.20796968042850494, "learning_rate": 0.002, "loss": 2.5487, "step": 345400 }, { "epoch": 0.6881335267117175, "grad_norm": 0.15705673396587372, "learning_rate": 0.002, "loss": 2.5376, "step": 345410 }, { "epoch": 0.6881534489353563, "grad_norm": 0.13088324666023254, "learning_rate": 0.002, "loss": 2.5633, "step": 345420 }, { "epoch": 0.6881733711589952, "grad_norm": 0.14631438255310059, "learning_rate": 0.002, "loss": 2.5557, "step": 345430 }, { "epoch": 0.6881932933826342, "grad_norm": 0.16582702100276947, "learning_rate": 0.002, "loss": 2.5467, "step": 345440 }, { "epoch": 0.6882132156062731, "grad_norm": 0.1779305636882782, "learning_rate": 0.002, "loss": 2.5566, "step": 345450 }, { "epoch": 0.688233137829912, "grad_norm": 0.15680281817913055, "learning_rate": 0.002, "loss": 2.5732, "step": 345460 }, { "epoch": 0.6882530600535509, "grad_norm": 0.15131667256355286, "learning_rate": 0.002, "loss": 2.5561, "step": 345470 }, { "epoch": 0.6882729822771898, "grad_norm": 0.1757410317659378, "learning_rate": 0.002, "loss": 2.5508, "step": 345480 }, { "epoch": 0.6882929045008288, "grad_norm": 0.17122793197631836, "learning_rate": 0.002, "loss": 2.5553, "step": 345490 }, { "epoch": 0.6883128267244677, "grad_norm": 0.18424713611602783, "learning_rate": 0.002, "loss": 2.557, "step": 345500 }, { "epoch": 0.6883327489481066, "grad_norm": 0.19035686552524567, "learning_rate": 0.002, "loss": 2.5695, "step": 345510 }, { "epoch": 0.6883526711717455, "grad_norm": 0.1618082970380783, "learning_rate": 0.002, "loss": 2.5492, "step": 345520 }, { "epoch": 0.6883725933953844, "grad_norm": 0.13983504474163055, "learning_rate": 0.002, "loss": 2.5646, "step": 345530 }, { "epoch": 0.6883925156190234, "grad_norm": 0.2495381087064743, "learning_rate": 0.002, "loss": 2.5482, "step": 345540 }, { "epoch": 0.6884124378426623, "grad_norm": 0.15269975364208221, "learning_rate": 0.002, "loss": 2.5708, "step": 345550 }, { "epoch": 0.6884323600663012, "grad_norm": 0.17686988413333893, "learning_rate": 0.002, "loss": 2.5565, "step": 345560 }, { "epoch": 0.68845228228994, "grad_norm": 0.1541040688753128, "learning_rate": 0.002, "loss": 2.5657, "step": 345570 }, { "epoch": 0.6884722045135789, "grad_norm": 0.16934427618980408, "learning_rate": 0.002, "loss": 2.5531, "step": 345580 }, { "epoch": 0.6884921267372179, "grad_norm": 0.17455026507377625, "learning_rate": 0.002, "loss": 2.543, "step": 345590 }, { "epoch": 0.6885120489608568, "grad_norm": 0.15236450731754303, "learning_rate": 0.002, "loss": 2.5606, "step": 345600 }, { "epoch": 0.6885319711844957, "grad_norm": 0.1703929305076599, "learning_rate": 0.002, "loss": 2.5595, "step": 345610 }, { "epoch": 0.6885518934081346, "grad_norm": 0.16503897309303284, "learning_rate": 0.002, "loss": 2.5616, "step": 345620 }, { "epoch": 0.6885718156317736, "grad_norm": 0.16076065599918365, "learning_rate": 0.002, "loss": 2.5565, "step": 345630 }, { "epoch": 0.6885917378554125, "grad_norm": 0.17121365666389465, "learning_rate": 0.002, "loss": 2.5597, "step": 345640 }, { "epoch": 0.6886116600790514, "grad_norm": 0.1849268674850464, "learning_rate": 0.002, "loss": 2.5524, "step": 345650 }, { "epoch": 0.6886315823026903, "grad_norm": 0.16809025406837463, "learning_rate": 0.002, "loss": 2.5552, "step": 345660 }, { "epoch": 0.6886515045263292, "grad_norm": 0.156111478805542, "learning_rate": 0.002, "loss": 2.5399, "step": 345670 }, { "epoch": 0.6886714267499682, "grad_norm": 0.2176673859357834, "learning_rate": 0.002, "loss": 2.5501, "step": 345680 }, { "epoch": 0.6886913489736071, "grad_norm": 0.17078086733818054, "learning_rate": 0.002, "loss": 2.5522, "step": 345690 }, { "epoch": 0.688711271197246, "grad_norm": 0.17852437496185303, "learning_rate": 0.002, "loss": 2.556, "step": 345700 }, { "epoch": 0.6887311934208848, "grad_norm": 0.19016417860984802, "learning_rate": 0.002, "loss": 2.5499, "step": 345710 }, { "epoch": 0.6887511156445237, "grad_norm": 0.15203768014907837, "learning_rate": 0.002, "loss": 2.5543, "step": 345720 }, { "epoch": 0.6887710378681627, "grad_norm": 0.19107094407081604, "learning_rate": 0.002, "loss": 2.5615, "step": 345730 }, { "epoch": 0.6887909600918016, "grad_norm": 0.16529244184494019, "learning_rate": 0.002, "loss": 2.5578, "step": 345740 }, { "epoch": 0.6888108823154405, "grad_norm": 0.18321876227855682, "learning_rate": 0.002, "loss": 2.549, "step": 345750 }, { "epoch": 0.6888308045390794, "grad_norm": 0.18153932690620422, "learning_rate": 0.002, "loss": 2.5445, "step": 345760 }, { "epoch": 0.6888507267627183, "grad_norm": 0.14315341413021088, "learning_rate": 0.002, "loss": 2.5513, "step": 345770 }, { "epoch": 0.6888706489863573, "grad_norm": 0.15932270884513855, "learning_rate": 0.002, "loss": 2.5559, "step": 345780 }, { "epoch": 0.6888905712099962, "grad_norm": 0.1816466599702835, "learning_rate": 0.002, "loss": 2.5486, "step": 345790 }, { "epoch": 0.6889104934336351, "grad_norm": 0.15909333527088165, "learning_rate": 0.002, "loss": 2.5559, "step": 345800 }, { "epoch": 0.688930415657274, "grad_norm": 0.1757115125656128, "learning_rate": 0.002, "loss": 2.5522, "step": 345810 }, { "epoch": 0.6889503378809129, "grad_norm": 0.16356879472732544, "learning_rate": 0.002, "loss": 2.5643, "step": 345820 }, { "epoch": 0.6889702601045519, "grad_norm": 0.17827782034873962, "learning_rate": 0.002, "loss": 2.552, "step": 345830 }, { "epoch": 0.6889901823281908, "grad_norm": 0.2006533145904541, "learning_rate": 0.002, "loss": 2.5591, "step": 345840 }, { "epoch": 0.6890101045518296, "grad_norm": 0.1694212108850479, "learning_rate": 0.002, "loss": 2.5424, "step": 345850 }, { "epoch": 0.6890300267754685, "grad_norm": 0.1772838532924652, "learning_rate": 0.002, "loss": 2.5618, "step": 345860 }, { "epoch": 0.6890499489991074, "grad_norm": 0.14439678192138672, "learning_rate": 0.002, "loss": 2.553, "step": 345870 }, { "epoch": 0.6890698712227464, "grad_norm": 0.16838134825229645, "learning_rate": 0.002, "loss": 2.537, "step": 345880 }, { "epoch": 0.6890897934463853, "grad_norm": 0.20906294882297516, "learning_rate": 0.002, "loss": 2.563, "step": 345890 }, { "epoch": 0.6891097156700242, "grad_norm": 0.17305780947208405, "learning_rate": 0.002, "loss": 2.5607, "step": 345900 }, { "epoch": 0.6891296378936631, "grad_norm": 0.18562424182891846, "learning_rate": 0.002, "loss": 2.545, "step": 345910 }, { "epoch": 0.6891495601173021, "grad_norm": 0.17606475949287415, "learning_rate": 0.002, "loss": 2.5644, "step": 345920 }, { "epoch": 0.689169482340941, "grad_norm": 0.1519518941640854, "learning_rate": 0.002, "loss": 2.5366, "step": 345930 }, { "epoch": 0.6891894045645799, "grad_norm": 0.15557236969470978, "learning_rate": 0.002, "loss": 2.5259, "step": 345940 }, { "epoch": 0.6892093267882188, "grad_norm": 0.15122032165527344, "learning_rate": 0.002, "loss": 2.5553, "step": 345950 }, { "epoch": 0.6892292490118577, "grad_norm": 0.15211661159992218, "learning_rate": 0.002, "loss": 2.5599, "step": 345960 }, { "epoch": 0.6892491712354967, "grad_norm": 0.1529170721769333, "learning_rate": 0.002, "loss": 2.5294, "step": 345970 }, { "epoch": 0.6892690934591356, "grad_norm": 0.21974751353263855, "learning_rate": 0.002, "loss": 2.561, "step": 345980 }, { "epoch": 0.6892890156827745, "grad_norm": 0.18299372494220734, "learning_rate": 0.002, "loss": 2.553, "step": 345990 }, { "epoch": 0.6893089379064133, "grad_norm": 0.1550329476594925, "learning_rate": 0.002, "loss": 2.5695, "step": 346000 }, { "epoch": 0.6893288601300522, "grad_norm": 0.16494835913181305, "learning_rate": 0.002, "loss": 2.5606, "step": 346010 }, { "epoch": 0.6893487823536912, "grad_norm": 0.15744704008102417, "learning_rate": 0.002, "loss": 2.5509, "step": 346020 }, { "epoch": 0.6893687045773301, "grad_norm": 0.17302840948104858, "learning_rate": 0.002, "loss": 2.5371, "step": 346030 }, { "epoch": 0.689388626800969, "grad_norm": 0.16794148087501526, "learning_rate": 0.002, "loss": 2.5599, "step": 346040 }, { "epoch": 0.6894085490246079, "grad_norm": 0.14514075219631195, "learning_rate": 0.002, "loss": 2.5726, "step": 346050 }, { "epoch": 0.6894284712482468, "grad_norm": 0.18612544238567352, "learning_rate": 0.002, "loss": 2.5473, "step": 346060 }, { "epoch": 0.6894483934718858, "grad_norm": 0.16673840582370758, "learning_rate": 0.002, "loss": 2.5481, "step": 346070 }, { "epoch": 0.6894683156955247, "grad_norm": 0.1732574999332428, "learning_rate": 0.002, "loss": 2.5429, "step": 346080 }, { "epoch": 0.6894882379191636, "grad_norm": 0.14133845269680023, "learning_rate": 0.002, "loss": 2.5433, "step": 346090 }, { "epoch": 0.6895081601428025, "grad_norm": 0.18244194984436035, "learning_rate": 0.002, "loss": 2.5505, "step": 346100 }, { "epoch": 0.6895280823664414, "grad_norm": 0.1482025533914566, "learning_rate": 0.002, "loss": 2.5535, "step": 346110 }, { "epoch": 0.6895480045900804, "grad_norm": 0.1704648733139038, "learning_rate": 0.002, "loss": 2.5556, "step": 346120 }, { "epoch": 0.6895679268137193, "grad_norm": 0.21477068960666656, "learning_rate": 0.002, "loss": 2.5582, "step": 346130 }, { "epoch": 0.6895878490373581, "grad_norm": 0.16125640273094177, "learning_rate": 0.002, "loss": 2.5625, "step": 346140 }, { "epoch": 0.689607771260997, "grad_norm": 0.16667574644088745, "learning_rate": 0.002, "loss": 2.5632, "step": 346150 }, { "epoch": 0.6896276934846359, "grad_norm": 0.15656675398349762, "learning_rate": 0.002, "loss": 2.5468, "step": 346160 }, { "epoch": 0.6896476157082749, "grad_norm": 0.15814489126205444, "learning_rate": 0.002, "loss": 2.5624, "step": 346170 }, { "epoch": 0.6896675379319138, "grad_norm": 0.18583516776561737, "learning_rate": 0.002, "loss": 2.5587, "step": 346180 }, { "epoch": 0.6896874601555527, "grad_norm": 0.14898782968521118, "learning_rate": 0.002, "loss": 2.5523, "step": 346190 }, { "epoch": 0.6897073823791916, "grad_norm": 0.16044415533542633, "learning_rate": 0.002, "loss": 2.5443, "step": 346200 }, { "epoch": 0.6897273046028306, "grad_norm": 0.1541457623243332, "learning_rate": 0.002, "loss": 2.5489, "step": 346210 }, { "epoch": 0.6897472268264695, "grad_norm": 0.18995670974254608, "learning_rate": 0.002, "loss": 2.5397, "step": 346220 }, { "epoch": 0.6897671490501084, "grad_norm": 0.15220250189304352, "learning_rate": 0.002, "loss": 2.5259, "step": 346230 }, { "epoch": 0.6897870712737473, "grad_norm": 0.1411256641149521, "learning_rate": 0.002, "loss": 2.5585, "step": 346240 }, { "epoch": 0.6898069934973862, "grad_norm": 0.17429876327514648, "learning_rate": 0.002, "loss": 2.543, "step": 346250 }, { "epoch": 0.6898269157210252, "grad_norm": 0.15160614252090454, "learning_rate": 0.002, "loss": 2.5657, "step": 346260 }, { "epoch": 0.6898468379446641, "grad_norm": 0.1896815001964569, "learning_rate": 0.002, "loss": 2.5447, "step": 346270 }, { "epoch": 0.689866760168303, "grad_norm": 0.18319974839687347, "learning_rate": 0.002, "loss": 2.5565, "step": 346280 }, { "epoch": 0.6898866823919418, "grad_norm": 0.17804311215877533, "learning_rate": 0.002, "loss": 2.5489, "step": 346290 }, { "epoch": 0.6899066046155807, "grad_norm": 0.1662537008523941, "learning_rate": 0.002, "loss": 2.5431, "step": 346300 }, { "epoch": 0.6899265268392197, "grad_norm": 0.16320616006851196, "learning_rate": 0.002, "loss": 2.5675, "step": 346310 }, { "epoch": 0.6899464490628586, "grad_norm": 0.18986965715885162, "learning_rate": 0.002, "loss": 2.556, "step": 346320 }, { "epoch": 0.6899663712864975, "grad_norm": 0.13083867728710175, "learning_rate": 0.002, "loss": 2.5613, "step": 346330 }, { "epoch": 0.6899862935101364, "grad_norm": 0.15760895609855652, "learning_rate": 0.002, "loss": 2.5575, "step": 346340 }, { "epoch": 0.6900062157337753, "grad_norm": 0.17827939987182617, "learning_rate": 0.002, "loss": 2.5494, "step": 346350 }, { "epoch": 0.6900261379574143, "grad_norm": 0.16640853881835938, "learning_rate": 0.002, "loss": 2.5699, "step": 346360 }, { "epoch": 0.6900460601810532, "grad_norm": 0.19229285418987274, "learning_rate": 0.002, "loss": 2.5504, "step": 346370 }, { "epoch": 0.6900659824046921, "grad_norm": 0.1716049611568451, "learning_rate": 0.002, "loss": 2.5503, "step": 346380 }, { "epoch": 0.690085904628331, "grad_norm": 0.15903915464878082, "learning_rate": 0.002, "loss": 2.5599, "step": 346390 }, { "epoch": 0.6901058268519699, "grad_norm": 0.16473175585269928, "learning_rate": 0.002, "loss": 2.5657, "step": 346400 }, { "epoch": 0.6901257490756089, "grad_norm": 0.16414007544517517, "learning_rate": 0.002, "loss": 2.5485, "step": 346410 }, { "epoch": 0.6901456712992478, "grad_norm": 0.17048275470733643, "learning_rate": 0.002, "loss": 2.5674, "step": 346420 }, { "epoch": 0.6901655935228866, "grad_norm": 0.187697634100914, "learning_rate": 0.002, "loss": 2.5431, "step": 346430 }, { "epoch": 0.6901855157465255, "grad_norm": 0.20133723318576813, "learning_rate": 0.002, "loss": 2.5532, "step": 346440 }, { "epoch": 0.6902054379701644, "grad_norm": 0.15244489908218384, "learning_rate": 0.002, "loss": 2.5334, "step": 346450 }, { "epoch": 0.6902253601938034, "grad_norm": 0.14117653667926788, "learning_rate": 0.002, "loss": 2.5461, "step": 346460 }, { "epoch": 0.6902452824174423, "grad_norm": 0.14857347309589386, "learning_rate": 0.002, "loss": 2.5483, "step": 346470 }, { "epoch": 0.6902652046410812, "grad_norm": 0.17803138494491577, "learning_rate": 0.002, "loss": 2.5471, "step": 346480 }, { "epoch": 0.6902851268647201, "grad_norm": 0.15040597319602966, "learning_rate": 0.002, "loss": 2.5468, "step": 346490 }, { "epoch": 0.6903050490883591, "grad_norm": 0.1628912091255188, "learning_rate": 0.002, "loss": 2.5392, "step": 346500 }, { "epoch": 0.690324971311998, "grad_norm": 0.18646839261054993, "learning_rate": 0.002, "loss": 2.5538, "step": 346510 }, { "epoch": 0.6903448935356369, "grad_norm": 0.17397430539131165, "learning_rate": 0.002, "loss": 2.5643, "step": 346520 }, { "epoch": 0.6903648157592758, "grad_norm": 0.14624528586864471, "learning_rate": 0.002, "loss": 2.5635, "step": 346530 }, { "epoch": 0.6903847379829147, "grad_norm": 0.21256428956985474, "learning_rate": 0.002, "loss": 2.555, "step": 346540 }, { "epoch": 0.6904046602065537, "grad_norm": 0.16762880980968475, "learning_rate": 0.002, "loss": 2.5593, "step": 346550 }, { "epoch": 0.6904245824301926, "grad_norm": 0.14554019272327423, "learning_rate": 0.002, "loss": 2.5615, "step": 346560 }, { "epoch": 0.6904445046538314, "grad_norm": 0.1516834795475006, "learning_rate": 0.002, "loss": 2.5622, "step": 346570 }, { "epoch": 0.6904644268774703, "grad_norm": 0.16664133965969086, "learning_rate": 0.002, "loss": 2.551, "step": 346580 }, { "epoch": 0.6904843491011092, "grad_norm": 0.1757417917251587, "learning_rate": 0.002, "loss": 2.5611, "step": 346590 }, { "epoch": 0.6905042713247482, "grad_norm": 0.1698194295167923, "learning_rate": 0.002, "loss": 2.5538, "step": 346600 }, { "epoch": 0.6905241935483871, "grad_norm": 0.1626550853252411, "learning_rate": 0.002, "loss": 2.5429, "step": 346610 }, { "epoch": 0.690544115772026, "grad_norm": 0.15499189496040344, "learning_rate": 0.002, "loss": 2.5628, "step": 346620 }, { "epoch": 0.6905640379956649, "grad_norm": 0.16712817549705505, "learning_rate": 0.002, "loss": 2.5691, "step": 346630 }, { "epoch": 0.6905839602193038, "grad_norm": 0.20713408291339874, "learning_rate": 0.002, "loss": 2.5458, "step": 346640 }, { "epoch": 0.6906038824429428, "grad_norm": 0.1647920459508896, "learning_rate": 0.002, "loss": 2.5527, "step": 346650 }, { "epoch": 0.6906238046665817, "grad_norm": 0.17293816804885864, "learning_rate": 0.002, "loss": 2.5508, "step": 346660 }, { "epoch": 0.6906437268902206, "grad_norm": 0.16189414262771606, "learning_rate": 0.002, "loss": 2.5681, "step": 346670 }, { "epoch": 0.6906636491138595, "grad_norm": 0.1804908961057663, "learning_rate": 0.002, "loss": 2.5554, "step": 346680 }, { "epoch": 0.6906835713374984, "grad_norm": 0.17209520936012268, "learning_rate": 0.002, "loss": 2.5427, "step": 346690 }, { "epoch": 0.6907034935611374, "grad_norm": 0.1457187384366989, "learning_rate": 0.002, "loss": 2.5579, "step": 346700 }, { "epoch": 0.6907234157847763, "grad_norm": 0.1565849334001541, "learning_rate": 0.002, "loss": 2.5444, "step": 346710 }, { "epoch": 0.6907433380084151, "grad_norm": 0.1727178692817688, "learning_rate": 0.002, "loss": 2.5591, "step": 346720 }, { "epoch": 0.690763260232054, "grad_norm": 0.1825285106897354, "learning_rate": 0.002, "loss": 2.5593, "step": 346730 }, { "epoch": 0.6907831824556929, "grad_norm": 0.2037978619337082, "learning_rate": 0.002, "loss": 2.5556, "step": 346740 }, { "epoch": 0.6908031046793319, "grad_norm": 0.17824393510818481, "learning_rate": 0.002, "loss": 2.5586, "step": 346750 }, { "epoch": 0.6908230269029708, "grad_norm": 0.15382800996303558, "learning_rate": 0.002, "loss": 2.5502, "step": 346760 }, { "epoch": 0.6908429491266097, "grad_norm": 0.1783118098974228, "learning_rate": 0.002, "loss": 2.5378, "step": 346770 }, { "epoch": 0.6908628713502486, "grad_norm": 0.14133888483047485, "learning_rate": 0.002, "loss": 2.5607, "step": 346780 }, { "epoch": 0.6908827935738875, "grad_norm": 0.1656501740217209, "learning_rate": 0.002, "loss": 2.5513, "step": 346790 }, { "epoch": 0.6909027157975265, "grad_norm": 0.17224012315273285, "learning_rate": 0.002, "loss": 2.5512, "step": 346800 }, { "epoch": 0.6909226380211654, "grad_norm": 0.19071803987026215, "learning_rate": 0.002, "loss": 2.5511, "step": 346810 }, { "epoch": 0.6909425602448043, "grad_norm": 0.19574841856956482, "learning_rate": 0.002, "loss": 2.5494, "step": 346820 }, { "epoch": 0.6909624824684432, "grad_norm": 0.18670786917209625, "learning_rate": 0.002, "loss": 2.5445, "step": 346830 }, { "epoch": 0.6909824046920822, "grad_norm": 0.18740305304527283, "learning_rate": 0.002, "loss": 2.551, "step": 346840 }, { "epoch": 0.691002326915721, "grad_norm": 0.18146148324012756, "learning_rate": 0.002, "loss": 2.5689, "step": 346850 }, { "epoch": 0.69102224913936, "grad_norm": 0.14847080409526825, "learning_rate": 0.002, "loss": 2.5664, "step": 346860 }, { "epoch": 0.6910421713629988, "grad_norm": 0.18325325846672058, "learning_rate": 0.002, "loss": 2.5672, "step": 346870 }, { "epoch": 0.6910620935866377, "grad_norm": 0.15126635134220123, "learning_rate": 0.002, "loss": 2.56, "step": 346880 }, { "epoch": 0.6910820158102767, "grad_norm": 0.14330598711967468, "learning_rate": 0.002, "loss": 2.5697, "step": 346890 }, { "epoch": 0.6911019380339156, "grad_norm": 0.15091350674629211, "learning_rate": 0.002, "loss": 2.5536, "step": 346900 }, { "epoch": 0.6911218602575545, "grad_norm": 0.177107036113739, "learning_rate": 0.002, "loss": 2.546, "step": 346910 }, { "epoch": 0.6911417824811934, "grad_norm": 0.17363637685775757, "learning_rate": 0.002, "loss": 2.5553, "step": 346920 }, { "epoch": 0.6911617047048323, "grad_norm": 0.17877694964408875, "learning_rate": 0.002, "loss": 2.5792, "step": 346930 }, { "epoch": 0.6911816269284713, "grad_norm": 0.21171551942825317, "learning_rate": 0.002, "loss": 2.5604, "step": 346940 }, { "epoch": 0.6912015491521102, "grad_norm": 0.1531815528869629, "learning_rate": 0.002, "loss": 2.5666, "step": 346950 }, { "epoch": 0.6912214713757491, "grad_norm": 0.173382967710495, "learning_rate": 0.002, "loss": 2.5617, "step": 346960 }, { "epoch": 0.691241393599388, "grad_norm": 0.16785088181495667, "learning_rate": 0.002, "loss": 2.5676, "step": 346970 }, { "epoch": 0.6912613158230269, "grad_norm": 0.171905517578125, "learning_rate": 0.002, "loss": 2.5533, "step": 346980 }, { "epoch": 0.6912812380466659, "grad_norm": 0.14433570206165314, "learning_rate": 0.002, "loss": 2.5524, "step": 346990 }, { "epoch": 0.6913011602703047, "grad_norm": 0.17017149925231934, "learning_rate": 0.002, "loss": 2.5579, "step": 347000 }, { "epoch": 0.6913210824939436, "grad_norm": 0.1846984177827835, "learning_rate": 0.002, "loss": 2.5642, "step": 347010 }, { "epoch": 0.6913410047175825, "grad_norm": 0.1768437623977661, "learning_rate": 0.002, "loss": 2.5572, "step": 347020 }, { "epoch": 0.6913609269412214, "grad_norm": 0.17872239649295807, "learning_rate": 0.002, "loss": 2.5625, "step": 347030 }, { "epoch": 0.6913808491648604, "grad_norm": 0.16746070981025696, "learning_rate": 0.002, "loss": 2.5532, "step": 347040 }, { "epoch": 0.6914007713884993, "grad_norm": 0.13933803141117096, "learning_rate": 0.002, "loss": 2.5511, "step": 347050 }, { "epoch": 0.6914206936121382, "grad_norm": 0.18153077363967896, "learning_rate": 0.002, "loss": 2.5541, "step": 347060 }, { "epoch": 0.6914406158357771, "grad_norm": 0.16800369322299957, "learning_rate": 0.002, "loss": 2.5652, "step": 347070 }, { "epoch": 0.691460538059416, "grad_norm": 0.13420410454273224, "learning_rate": 0.002, "loss": 2.5615, "step": 347080 }, { "epoch": 0.691480460283055, "grad_norm": 0.15812768042087555, "learning_rate": 0.002, "loss": 2.5507, "step": 347090 }, { "epoch": 0.6915003825066939, "grad_norm": 0.18308496475219727, "learning_rate": 0.002, "loss": 2.5419, "step": 347100 }, { "epoch": 0.6915203047303328, "grad_norm": 0.1513490527868271, "learning_rate": 0.002, "loss": 2.5444, "step": 347110 }, { "epoch": 0.6915402269539717, "grad_norm": 0.2137843668460846, "learning_rate": 0.002, "loss": 2.5685, "step": 347120 }, { "epoch": 0.6915601491776107, "grad_norm": 0.19913159310817719, "learning_rate": 0.002, "loss": 2.5548, "step": 347130 }, { "epoch": 0.6915800714012496, "grad_norm": 0.17805203795433044, "learning_rate": 0.002, "loss": 2.567, "step": 347140 }, { "epoch": 0.6915999936248884, "grad_norm": 0.1769789159297943, "learning_rate": 0.002, "loss": 2.5516, "step": 347150 }, { "epoch": 0.6916199158485273, "grad_norm": 0.17469975352287292, "learning_rate": 0.002, "loss": 2.5474, "step": 347160 }, { "epoch": 0.6916398380721662, "grad_norm": 0.17939446866512299, "learning_rate": 0.002, "loss": 2.5506, "step": 347170 }, { "epoch": 0.6916597602958052, "grad_norm": 0.15777935087680817, "learning_rate": 0.002, "loss": 2.5454, "step": 347180 }, { "epoch": 0.6916796825194441, "grad_norm": 0.1545371115207672, "learning_rate": 0.002, "loss": 2.5631, "step": 347190 }, { "epoch": 0.691699604743083, "grad_norm": 0.14938396215438843, "learning_rate": 0.002, "loss": 2.5518, "step": 347200 }, { "epoch": 0.6917195269667219, "grad_norm": 0.1842082440853119, "learning_rate": 0.002, "loss": 2.5564, "step": 347210 }, { "epoch": 0.6917394491903608, "grad_norm": 0.14258533716201782, "learning_rate": 0.002, "loss": 2.557, "step": 347220 }, { "epoch": 0.6917593714139998, "grad_norm": 0.16849759221076965, "learning_rate": 0.002, "loss": 2.5662, "step": 347230 }, { "epoch": 0.6917792936376387, "grad_norm": 0.16598078608512878, "learning_rate": 0.002, "loss": 2.5685, "step": 347240 }, { "epoch": 0.6917992158612776, "grad_norm": 0.14887738227844238, "learning_rate": 0.002, "loss": 2.5595, "step": 347250 }, { "epoch": 0.6918191380849165, "grad_norm": 0.17025873064994812, "learning_rate": 0.002, "loss": 2.5581, "step": 347260 }, { "epoch": 0.6918390603085554, "grad_norm": 0.17241576313972473, "learning_rate": 0.002, "loss": 2.5655, "step": 347270 }, { "epoch": 0.6918589825321944, "grad_norm": 0.1951335221529007, "learning_rate": 0.002, "loss": 2.5532, "step": 347280 }, { "epoch": 0.6918789047558332, "grad_norm": 0.15205033123493195, "learning_rate": 0.002, "loss": 2.559, "step": 347290 }, { "epoch": 0.6918988269794721, "grad_norm": 0.15836648643016815, "learning_rate": 0.002, "loss": 2.5446, "step": 347300 }, { "epoch": 0.691918749203111, "grad_norm": 0.17711125314235687, "learning_rate": 0.002, "loss": 2.5657, "step": 347310 }, { "epoch": 0.6919386714267499, "grad_norm": 0.16037799417972565, "learning_rate": 0.002, "loss": 2.554, "step": 347320 }, { "epoch": 0.6919585936503889, "grad_norm": 0.1697630137205124, "learning_rate": 0.002, "loss": 2.5552, "step": 347330 }, { "epoch": 0.6919785158740278, "grad_norm": 0.1726851612329483, "learning_rate": 0.002, "loss": 2.5568, "step": 347340 }, { "epoch": 0.6919984380976667, "grad_norm": 0.1809416562318802, "learning_rate": 0.002, "loss": 2.5585, "step": 347350 }, { "epoch": 0.6920183603213056, "grad_norm": 0.14583401381969452, "learning_rate": 0.002, "loss": 2.5538, "step": 347360 }, { "epoch": 0.6920382825449445, "grad_norm": 0.16265517473220825, "learning_rate": 0.002, "loss": 2.5631, "step": 347370 }, { "epoch": 0.6920582047685835, "grad_norm": 0.16653048992156982, "learning_rate": 0.002, "loss": 2.5577, "step": 347380 }, { "epoch": 0.6920781269922224, "grad_norm": 0.16226829588413239, "learning_rate": 0.002, "loss": 2.5635, "step": 347390 }, { "epoch": 0.6920980492158613, "grad_norm": 0.2117650955915451, "learning_rate": 0.002, "loss": 2.5595, "step": 347400 }, { "epoch": 0.6921179714395002, "grad_norm": 0.16073550283908844, "learning_rate": 0.002, "loss": 2.5371, "step": 347410 }, { "epoch": 0.6921378936631392, "grad_norm": 0.16847874224185944, "learning_rate": 0.002, "loss": 2.5587, "step": 347420 }, { "epoch": 0.692157815886778, "grad_norm": 0.149717777967453, "learning_rate": 0.002, "loss": 2.5474, "step": 347430 }, { "epoch": 0.6921777381104169, "grad_norm": 0.1739863008260727, "learning_rate": 0.002, "loss": 2.5479, "step": 347440 }, { "epoch": 0.6921976603340558, "grad_norm": 0.1721428483724594, "learning_rate": 0.002, "loss": 2.5548, "step": 347450 }, { "epoch": 0.6922175825576947, "grad_norm": 0.16600260138511658, "learning_rate": 0.002, "loss": 2.5522, "step": 347460 }, { "epoch": 0.6922375047813337, "grad_norm": 0.1483292132616043, "learning_rate": 0.002, "loss": 2.5498, "step": 347470 }, { "epoch": 0.6922574270049726, "grad_norm": 0.15452469885349274, "learning_rate": 0.002, "loss": 2.5567, "step": 347480 }, { "epoch": 0.6922773492286115, "grad_norm": 0.15038591623306274, "learning_rate": 0.002, "loss": 2.5585, "step": 347490 }, { "epoch": 0.6922972714522504, "grad_norm": 0.16056275367736816, "learning_rate": 0.002, "loss": 2.5605, "step": 347500 }, { "epoch": 0.6923171936758893, "grad_norm": 0.20526117086410522, "learning_rate": 0.002, "loss": 2.5374, "step": 347510 }, { "epoch": 0.6923371158995283, "grad_norm": 0.1717802733182907, "learning_rate": 0.002, "loss": 2.56, "step": 347520 }, { "epoch": 0.6923570381231672, "grad_norm": 0.14518506824970245, "learning_rate": 0.002, "loss": 2.554, "step": 347530 }, { "epoch": 0.6923769603468061, "grad_norm": 0.16150440275669098, "learning_rate": 0.002, "loss": 2.5597, "step": 347540 }, { "epoch": 0.692396882570445, "grad_norm": 0.15451674163341522, "learning_rate": 0.002, "loss": 2.564, "step": 347550 }, { "epoch": 0.6924168047940839, "grad_norm": 0.20002944767475128, "learning_rate": 0.002, "loss": 2.5637, "step": 347560 }, { "epoch": 0.6924367270177229, "grad_norm": 0.17206330597400665, "learning_rate": 0.002, "loss": 2.5565, "step": 347570 }, { "epoch": 0.6924566492413617, "grad_norm": 0.1432552933692932, "learning_rate": 0.002, "loss": 2.5491, "step": 347580 }, { "epoch": 0.6924765714650006, "grad_norm": 0.17278115451335907, "learning_rate": 0.002, "loss": 2.5554, "step": 347590 }, { "epoch": 0.6924964936886395, "grad_norm": 0.17904087901115417, "learning_rate": 0.002, "loss": 2.5541, "step": 347600 }, { "epoch": 0.6925164159122784, "grad_norm": 0.1763174831867218, "learning_rate": 0.002, "loss": 2.5667, "step": 347610 }, { "epoch": 0.6925363381359174, "grad_norm": 0.1615501195192337, "learning_rate": 0.002, "loss": 2.567, "step": 347620 }, { "epoch": 0.6925562603595563, "grad_norm": 0.1404702514410019, "learning_rate": 0.002, "loss": 2.5539, "step": 347630 }, { "epoch": 0.6925761825831952, "grad_norm": 0.16766922175884247, "learning_rate": 0.002, "loss": 2.5624, "step": 347640 }, { "epoch": 0.6925961048068341, "grad_norm": 0.1593180000782013, "learning_rate": 0.002, "loss": 2.5474, "step": 347650 }, { "epoch": 0.692616027030473, "grad_norm": 0.16677816212177277, "learning_rate": 0.002, "loss": 2.5679, "step": 347660 }, { "epoch": 0.692635949254112, "grad_norm": 0.14308546483516693, "learning_rate": 0.002, "loss": 2.5593, "step": 347670 }, { "epoch": 0.6926558714777509, "grad_norm": 0.19530096650123596, "learning_rate": 0.002, "loss": 2.5545, "step": 347680 }, { "epoch": 0.6926757937013898, "grad_norm": 0.15183573961257935, "learning_rate": 0.002, "loss": 2.5644, "step": 347690 }, { "epoch": 0.6926957159250287, "grad_norm": 0.1734495759010315, "learning_rate": 0.002, "loss": 2.5634, "step": 347700 }, { "epoch": 0.6927156381486677, "grad_norm": 0.14700227975845337, "learning_rate": 0.002, "loss": 2.5507, "step": 347710 }, { "epoch": 0.6927355603723065, "grad_norm": 0.16086497902870178, "learning_rate": 0.002, "loss": 2.579, "step": 347720 }, { "epoch": 0.6927554825959454, "grad_norm": 0.16953997313976288, "learning_rate": 0.002, "loss": 2.5574, "step": 347730 }, { "epoch": 0.6927754048195843, "grad_norm": 0.14566287398338318, "learning_rate": 0.002, "loss": 2.5548, "step": 347740 }, { "epoch": 0.6927953270432232, "grad_norm": 0.21985086798667908, "learning_rate": 0.002, "loss": 2.567, "step": 347750 }, { "epoch": 0.6928152492668622, "grad_norm": 0.1707753986120224, "learning_rate": 0.002, "loss": 2.5611, "step": 347760 }, { "epoch": 0.6928351714905011, "grad_norm": 0.15864920616149902, "learning_rate": 0.002, "loss": 2.5445, "step": 347770 }, { "epoch": 0.69285509371414, "grad_norm": 0.16667552292346954, "learning_rate": 0.002, "loss": 2.545, "step": 347780 }, { "epoch": 0.6928750159377789, "grad_norm": 0.1993720382452011, "learning_rate": 0.002, "loss": 2.5423, "step": 347790 }, { "epoch": 0.6928949381614178, "grad_norm": 0.1553695648908615, "learning_rate": 0.002, "loss": 2.5751, "step": 347800 }, { "epoch": 0.6929148603850568, "grad_norm": 0.19453459978103638, "learning_rate": 0.002, "loss": 2.5442, "step": 347810 }, { "epoch": 0.6929347826086957, "grad_norm": 0.150964617729187, "learning_rate": 0.002, "loss": 2.5571, "step": 347820 }, { "epoch": 0.6929547048323346, "grad_norm": 0.177743062376976, "learning_rate": 0.002, "loss": 2.5635, "step": 347830 }, { "epoch": 0.6929746270559735, "grad_norm": 0.21612024307250977, "learning_rate": 0.002, "loss": 2.5519, "step": 347840 }, { "epoch": 0.6929945492796123, "grad_norm": 0.14899186789989471, "learning_rate": 0.002, "loss": 2.5454, "step": 347850 }, { "epoch": 0.6930144715032514, "grad_norm": 0.18237556517124176, "learning_rate": 0.002, "loss": 2.5614, "step": 347860 }, { "epoch": 0.6930343937268902, "grad_norm": 0.1812726855278015, "learning_rate": 0.002, "loss": 2.5588, "step": 347870 }, { "epoch": 0.6930543159505291, "grad_norm": 0.1614299863576889, "learning_rate": 0.002, "loss": 2.5508, "step": 347880 }, { "epoch": 0.693074238174168, "grad_norm": 0.16301314532756805, "learning_rate": 0.002, "loss": 2.5556, "step": 347890 }, { "epoch": 0.6930941603978069, "grad_norm": 0.17021948099136353, "learning_rate": 0.002, "loss": 2.5393, "step": 347900 }, { "epoch": 0.6931140826214459, "grad_norm": 0.19927461445331573, "learning_rate": 0.002, "loss": 2.5529, "step": 347910 }, { "epoch": 0.6931340048450848, "grad_norm": 0.15033994615077972, "learning_rate": 0.002, "loss": 2.5331, "step": 347920 }, { "epoch": 0.6931539270687237, "grad_norm": 0.17422425746917725, "learning_rate": 0.002, "loss": 2.549, "step": 347930 }, { "epoch": 0.6931738492923626, "grad_norm": 0.17059960961341858, "learning_rate": 0.002, "loss": 2.5457, "step": 347940 }, { "epoch": 0.6931937715160015, "grad_norm": 0.15889663994312286, "learning_rate": 0.002, "loss": 2.5825, "step": 347950 }, { "epoch": 0.6932136937396405, "grad_norm": 0.16504070162773132, "learning_rate": 0.002, "loss": 2.5486, "step": 347960 }, { "epoch": 0.6932336159632794, "grad_norm": 0.15706004202365875, "learning_rate": 0.002, "loss": 2.5425, "step": 347970 }, { "epoch": 0.6932535381869183, "grad_norm": 0.16951537132263184, "learning_rate": 0.002, "loss": 2.5657, "step": 347980 }, { "epoch": 0.6932734604105572, "grad_norm": 0.16508275270462036, "learning_rate": 0.002, "loss": 2.563, "step": 347990 }, { "epoch": 0.6932933826341962, "grad_norm": 0.16168417036533356, "learning_rate": 0.002, "loss": 2.553, "step": 348000 }, { "epoch": 0.693313304857835, "grad_norm": 0.1628180593252182, "learning_rate": 0.002, "loss": 2.5725, "step": 348010 }, { "epoch": 0.6933332270814739, "grad_norm": 0.18224811553955078, "learning_rate": 0.002, "loss": 2.5673, "step": 348020 }, { "epoch": 0.6933531493051128, "grad_norm": 0.16614335775375366, "learning_rate": 0.002, "loss": 2.5597, "step": 348030 }, { "epoch": 0.6933730715287517, "grad_norm": 0.18209069967269897, "learning_rate": 0.002, "loss": 2.545, "step": 348040 }, { "epoch": 0.6933929937523907, "grad_norm": 0.15559887886047363, "learning_rate": 0.002, "loss": 2.5367, "step": 348050 }, { "epoch": 0.6934129159760296, "grad_norm": 0.16899634897708893, "learning_rate": 0.002, "loss": 2.5524, "step": 348060 }, { "epoch": 0.6934328381996685, "grad_norm": 0.21264125406742096, "learning_rate": 0.002, "loss": 2.5694, "step": 348070 }, { "epoch": 0.6934527604233074, "grad_norm": 0.1571715772151947, "learning_rate": 0.002, "loss": 2.5458, "step": 348080 }, { "epoch": 0.6934726826469463, "grad_norm": 0.16481544077396393, "learning_rate": 0.002, "loss": 2.5417, "step": 348090 }, { "epoch": 0.6934926048705853, "grad_norm": 0.17673294246196747, "learning_rate": 0.002, "loss": 2.5592, "step": 348100 }, { "epoch": 0.6935125270942242, "grad_norm": 0.1909676045179367, "learning_rate": 0.002, "loss": 2.5467, "step": 348110 }, { "epoch": 0.6935324493178631, "grad_norm": 0.16883188486099243, "learning_rate": 0.002, "loss": 2.5492, "step": 348120 }, { "epoch": 0.693552371541502, "grad_norm": 0.17084740102291107, "learning_rate": 0.002, "loss": 2.5655, "step": 348130 }, { "epoch": 0.6935722937651408, "grad_norm": 0.15482835471630096, "learning_rate": 0.002, "loss": 2.5627, "step": 348140 }, { "epoch": 0.6935922159887798, "grad_norm": 0.19678275287151337, "learning_rate": 0.002, "loss": 2.5622, "step": 348150 }, { "epoch": 0.6936121382124187, "grad_norm": 0.15543588995933533, "learning_rate": 0.002, "loss": 2.5614, "step": 348160 }, { "epoch": 0.6936320604360576, "grad_norm": 0.1756870150566101, "learning_rate": 0.002, "loss": 2.5538, "step": 348170 }, { "epoch": 0.6936519826596965, "grad_norm": 0.1624651551246643, "learning_rate": 0.002, "loss": 2.5563, "step": 348180 }, { "epoch": 0.6936719048833354, "grad_norm": 0.1567525714635849, "learning_rate": 0.002, "loss": 2.5507, "step": 348190 }, { "epoch": 0.6936918271069744, "grad_norm": 0.1655937284231186, "learning_rate": 0.002, "loss": 2.5429, "step": 348200 }, { "epoch": 0.6937117493306133, "grad_norm": 0.14788463711738586, "learning_rate": 0.002, "loss": 2.5609, "step": 348210 }, { "epoch": 0.6937316715542522, "grad_norm": 0.17324605584144592, "learning_rate": 0.002, "loss": 2.5509, "step": 348220 }, { "epoch": 0.6937515937778911, "grad_norm": 0.14246313273906708, "learning_rate": 0.002, "loss": 2.5505, "step": 348230 }, { "epoch": 0.69377151600153, "grad_norm": 0.167422354221344, "learning_rate": 0.002, "loss": 2.5434, "step": 348240 }, { "epoch": 0.693791438225169, "grad_norm": 0.15348975360393524, "learning_rate": 0.002, "loss": 2.5583, "step": 348250 }, { "epoch": 0.6938113604488079, "grad_norm": 0.17384475469589233, "learning_rate": 0.002, "loss": 2.5394, "step": 348260 }, { "epoch": 0.6938312826724468, "grad_norm": 0.18888171017169952, "learning_rate": 0.002, "loss": 2.5467, "step": 348270 }, { "epoch": 0.6938512048960856, "grad_norm": 0.15485897660255432, "learning_rate": 0.002, "loss": 2.5676, "step": 348280 }, { "epoch": 0.6938711271197245, "grad_norm": 0.20491661131381989, "learning_rate": 0.002, "loss": 2.5537, "step": 348290 }, { "epoch": 0.6938910493433635, "grad_norm": 0.1483697146177292, "learning_rate": 0.002, "loss": 2.555, "step": 348300 }, { "epoch": 0.6939109715670024, "grad_norm": 0.16692017018795013, "learning_rate": 0.002, "loss": 2.553, "step": 348310 }, { "epoch": 0.6939308937906413, "grad_norm": 0.16939100623130798, "learning_rate": 0.002, "loss": 2.5549, "step": 348320 }, { "epoch": 0.6939508160142802, "grad_norm": 0.1791706681251526, "learning_rate": 0.002, "loss": 2.5546, "step": 348330 }, { "epoch": 0.6939707382379192, "grad_norm": 0.21872882544994354, "learning_rate": 0.002, "loss": 2.5669, "step": 348340 }, { "epoch": 0.6939906604615581, "grad_norm": 0.16570904850959778, "learning_rate": 0.002, "loss": 2.5365, "step": 348350 }, { "epoch": 0.694010582685197, "grad_norm": 0.14851032197475433, "learning_rate": 0.002, "loss": 2.5691, "step": 348360 }, { "epoch": 0.6940305049088359, "grad_norm": 0.1627861112356186, "learning_rate": 0.002, "loss": 2.56, "step": 348370 }, { "epoch": 0.6940504271324748, "grad_norm": 0.21271692216396332, "learning_rate": 0.002, "loss": 2.5502, "step": 348380 }, { "epoch": 0.6940703493561138, "grad_norm": 0.15503576397895813, "learning_rate": 0.002, "loss": 2.5604, "step": 348390 }, { "epoch": 0.6940902715797527, "grad_norm": 0.17347632348537445, "learning_rate": 0.002, "loss": 2.5492, "step": 348400 }, { "epoch": 0.6941101938033916, "grad_norm": 0.1571870893239975, "learning_rate": 0.002, "loss": 2.5503, "step": 348410 }, { "epoch": 0.6941301160270305, "grad_norm": 0.19234688580036163, "learning_rate": 0.002, "loss": 2.5473, "step": 348420 }, { "epoch": 0.6941500382506693, "grad_norm": 0.13772520422935486, "learning_rate": 0.002, "loss": 2.5497, "step": 348430 }, { "epoch": 0.6941699604743083, "grad_norm": 0.17316047847270966, "learning_rate": 0.002, "loss": 2.5553, "step": 348440 }, { "epoch": 0.6941898826979472, "grad_norm": 0.1827620565891266, "learning_rate": 0.002, "loss": 2.5496, "step": 348450 }, { "epoch": 0.6942098049215861, "grad_norm": 0.18542467057704926, "learning_rate": 0.002, "loss": 2.5556, "step": 348460 }, { "epoch": 0.694229727145225, "grad_norm": 0.13256791234016418, "learning_rate": 0.002, "loss": 2.5502, "step": 348470 }, { "epoch": 0.6942496493688639, "grad_norm": 0.1602543443441391, "learning_rate": 0.002, "loss": 2.557, "step": 348480 }, { "epoch": 0.6942695715925029, "grad_norm": 0.2026156336069107, "learning_rate": 0.002, "loss": 2.5608, "step": 348490 }, { "epoch": 0.6942894938161418, "grad_norm": 0.16239461302757263, "learning_rate": 0.002, "loss": 2.5482, "step": 348500 }, { "epoch": 0.6943094160397807, "grad_norm": 0.18720288574695587, "learning_rate": 0.002, "loss": 2.5608, "step": 348510 }, { "epoch": 0.6943293382634196, "grad_norm": 0.16884957253932953, "learning_rate": 0.002, "loss": 2.5488, "step": 348520 }, { "epoch": 0.6943492604870585, "grad_norm": 0.15876446664333344, "learning_rate": 0.002, "loss": 2.5552, "step": 348530 }, { "epoch": 0.6943691827106975, "grad_norm": 0.21215170621871948, "learning_rate": 0.002, "loss": 2.5575, "step": 348540 }, { "epoch": 0.6943891049343364, "grad_norm": 0.1603657603263855, "learning_rate": 0.002, "loss": 2.5452, "step": 348550 }, { "epoch": 0.6944090271579753, "grad_norm": 0.16235728561878204, "learning_rate": 0.002, "loss": 2.553, "step": 348560 }, { "epoch": 0.6944289493816141, "grad_norm": 0.16051173210144043, "learning_rate": 0.002, "loss": 2.5696, "step": 348570 }, { "epoch": 0.694448871605253, "grad_norm": 0.16874848306179047, "learning_rate": 0.002, "loss": 2.5562, "step": 348580 }, { "epoch": 0.694468793828892, "grad_norm": 0.15251697599887848, "learning_rate": 0.002, "loss": 2.5622, "step": 348590 }, { "epoch": 0.6944887160525309, "grad_norm": 0.15601418912410736, "learning_rate": 0.002, "loss": 2.5539, "step": 348600 }, { "epoch": 0.6945086382761698, "grad_norm": 0.23619388043880463, "learning_rate": 0.002, "loss": 2.5633, "step": 348610 }, { "epoch": 0.6945285604998087, "grad_norm": 0.15504805743694305, "learning_rate": 0.002, "loss": 2.5718, "step": 348620 }, { "epoch": 0.6945484827234477, "grad_norm": 0.16374576091766357, "learning_rate": 0.002, "loss": 2.5549, "step": 348630 }, { "epoch": 0.6945684049470866, "grad_norm": 0.18250888586044312, "learning_rate": 0.002, "loss": 2.5475, "step": 348640 }, { "epoch": 0.6945883271707255, "grad_norm": 0.1884254515171051, "learning_rate": 0.002, "loss": 2.5551, "step": 348650 }, { "epoch": 0.6946082493943644, "grad_norm": 0.16850335896015167, "learning_rate": 0.002, "loss": 2.5623, "step": 348660 }, { "epoch": 0.6946281716180033, "grad_norm": 0.1595281958580017, "learning_rate": 0.002, "loss": 2.5563, "step": 348670 }, { "epoch": 0.6946480938416423, "grad_norm": 0.1631820648908615, "learning_rate": 0.002, "loss": 2.558, "step": 348680 }, { "epoch": 0.6946680160652812, "grad_norm": 0.17766758799552917, "learning_rate": 0.002, "loss": 2.5565, "step": 348690 }, { "epoch": 0.6946879382889201, "grad_norm": 0.19079731404781342, "learning_rate": 0.002, "loss": 2.5592, "step": 348700 }, { "epoch": 0.694707860512559, "grad_norm": 0.14248186349868774, "learning_rate": 0.002, "loss": 2.5522, "step": 348710 }, { "epoch": 0.6947277827361978, "grad_norm": 0.1512080281972885, "learning_rate": 0.002, "loss": 2.5609, "step": 348720 }, { "epoch": 0.6947477049598368, "grad_norm": 0.20951347053050995, "learning_rate": 0.002, "loss": 2.5463, "step": 348730 }, { "epoch": 0.6947676271834757, "grad_norm": 0.14275558292865753, "learning_rate": 0.002, "loss": 2.5572, "step": 348740 }, { "epoch": 0.6947875494071146, "grad_norm": 0.1695501208305359, "learning_rate": 0.002, "loss": 2.5597, "step": 348750 }, { "epoch": 0.6948074716307535, "grad_norm": 0.16849830746650696, "learning_rate": 0.002, "loss": 2.548, "step": 348760 }, { "epoch": 0.6948273938543924, "grad_norm": 0.23659498989582062, "learning_rate": 0.002, "loss": 2.5478, "step": 348770 }, { "epoch": 0.6948473160780314, "grad_norm": 0.1452983021736145, "learning_rate": 0.002, "loss": 2.5643, "step": 348780 }, { "epoch": 0.6948672383016703, "grad_norm": 0.13495419919490814, "learning_rate": 0.002, "loss": 2.5308, "step": 348790 }, { "epoch": 0.6948871605253092, "grad_norm": 0.18335574865341187, "learning_rate": 0.002, "loss": 2.5434, "step": 348800 }, { "epoch": 0.6949070827489481, "grad_norm": 0.13245870172977448, "learning_rate": 0.002, "loss": 2.5712, "step": 348810 }, { "epoch": 0.694927004972587, "grad_norm": 0.1763635277748108, "learning_rate": 0.002, "loss": 2.5656, "step": 348820 }, { "epoch": 0.694946927196226, "grad_norm": 0.18644896149635315, "learning_rate": 0.002, "loss": 2.5641, "step": 348830 }, { "epoch": 0.6949668494198649, "grad_norm": 0.1643611341714859, "learning_rate": 0.002, "loss": 2.5418, "step": 348840 }, { "epoch": 0.6949867716435038, "grad_norm": 0.17335094511508942, "learning_rate": 0.002, "loss": 2.5541, "step": 348850 }, { "epoch": 0.6950066938671426, "grad_norm": 0.19648881256580353, "learning_rate": 0.002, "loss": 2.5459, "step": 348860 }, { "epoch": 0.6950266160907815, "grad_norm": 0.17037808895111084, "learning_rate": 0.002, "loss": 2.553, "step": 348870 }, { "epoch": 0.6950465383144205, "grad_norm": 0.16318954527378082, "learning_rate": 0.002, "loss": 2.5621, "step": 348880 }, { "epoch": 0.6950664605380594, "grad_norm": 0.15485888719558716, "learning_rate": 0.002, "loss": 2.562, "step": 348890 }, { "epoch": 0.6950863827616983, "grad_norm": 0.21491345763206482, "learning_rate": 0.002, "loss": 2.5442, "step": 348900 }, { "epoch": 0.6951063049853372, "grad_norm": 0.16015417873859406, "learning_rate": 0.002, "loss": 2.5612, "step": 348910 }, { "epoch": 0.6951262272089762, "grad_norm": 0.15329888463020325, "learning_rate": 0.002, "loss": 2.5698, "step": 348920 }, { "epoch": 0.6951461494326151, "grad_norm": 0.20453287661075592, "learning_rate": 0.002, "loss": 2.5511, "step": 348930 }, { "epoch": 0.695166071656254, "grad_norm": 0.16641570627689362, "learning_rate": 0.002, "loss": 2.5631, "step": 348940 }, { "epoch": 0.6951859938798929, "grad_norm": 0.1648128181695938, "learning_rate": 0.002, "loss": 2.564, "step": 348950 }, { "epoch": 0.6952059161035318, "grad_norm": 0.1601470559835434, "learning_rate": 0.002, "loss": 2.5481, "step": 348960 }, { "epoch": 0.6952258383271708, "grad_norm": 0.14340460300445557, "learning_rate": 0.002, "loss": 2.5496, "step": 348970 }, { "epoch": 0.6952457605508097, "grad_norm": 0.17047160863876343, "learning_rate": 0.002, "loss": 2.5575, "step": 348980 }, { "epoch": 0.6952656827744486, "grad_norm": 0.15321628749370575, "learning_rate": 0.002, "loss": 2.5459, "step": 348990 }, { "epoch": 0.6952856049980874, "grad_norm": 0.17634835839271545, "learning_rate": 0.002, "loss": 2.5574, "step": 349000 }, { "epoch": 0.6953055272217263, "grad_norm": 0.177052840590477, "learning_rate": 0.002, "loss": 2.5568, "step": 349010 }, { "epoch": 0.6953254494453653, "grad_norm": 0.2180148810148239, "learning_rate": 0.002, "loss": 2.5446, "step": 349020 }, { "epoch": 0.6953453716690042, "grad_norm": 0.14456871151924133, "learning_rate": 0.002, "loss": 2.5646, "step": 349030 }, { "epoch": 0.6953652938926431, "grad_norm": 0.1816345900297165, "learning_rate": 0.002, "loss": 2.549, "step": 349040 }, { "epoch": 0.695385216116282, "grad_norm": 0.14419986307621002, "learning_rate": 0.002, "loss": 2.5514, "step": 349050 }, { "epoch": 0.6954051383399209, "grad_norm": 0.15718139708042145, "learning_rate": 0.002, "loss": 2.5585, "step": 349060 }, { "epoch": 0.6954250605635599, "grad_norm": 0.1849132627248764, "learning_rate": 0.002, "loss": 2.5665, "step": 349070 }, { "epoch": 0.6954449827871988, "grad_norm": 0.20963934063911438, "learning_rate": 0.002, "loss": 2.5569, "step": 349080 }, { "epoch": 0.6954649050108377, "grad_norm": 0.16820058226585388, "learning_rate": 0.002, "loss": 2.5572, "step": 349090 }, { "epoch": 0.6954848272344766, "grad_norm": 0.16163481771945953, "learning_rate": 0.002, "loss": 2.5577, "step": 349100 }, { "epoch": 0.6955047494581155, "grad_norm": 0.20099125802516937, "learning_rate": 0.002, "loss": 2.5636, "step": 349110 }, { "epoch": 0.6955246716817545, "grad_norm": 0.1828695833683014, "learning_rate": 0.002, "loss": 2.5616, "step": 349120 }, { "epoch": 0.6955445939053934, "grad_norm": 0.1746903657913208, "learning_rate": 0.002, "loss": 2.5569, "step": 349130 }, { "epoch": 0.6955645161290323, "grad_norm": 0.15782296657562256, "learning_rate": 0.002, "loss": 2.5653, "step": 349140 }, { "epoch": 0.6955844383526711, "grad_norm": 0.14468203485012054, "learning_rate": 0.002, "loss": 2.5511, "step": 349150 }, { "epoch": 0.69560436057631, "grad_norm": 0.16094540059566498, "learning_rate": 0.002, "loss": 2.5349, "step": 349160 }, { "epoch": 0.695624282799949, "grad_norm": 0.1821277141571045, "learning_rate": 0.002, "loss": 2.5598, "step": 349170 }, { "epoch": 0.6956442050235879, "grad_norm": 0.18614456057548523, "learning_rate": 0.002, "loss": 2.5482, "step": 349180 }, { "epoch": 0.6956641272472268, "grad_norm": 0.186176598072052, "learning_rate": 0.002, "loss": 2.5602, "step": 349190 }, { "epoch": 0.6956840494708657, "grad_norm": 0.18526789546012878, "learning_rate": 0.002, "loss": 2.5578, "step": 349200 }, { "epoch": 0.6957039716945047, "grad_norm": 0.15185824036598206, "learning_rate": 0.002, "loss": 2.5588, "step": 349210 }, { "epoch": 0.6957238939181436, "grad_norm": 0.16928789019584656, "learning_rate": 0.002, "loss": 2.5531, "step": 349220 }, { "epoch": 0.6957438161417825, "grad_norm": 0.18213649094104767, "learning_rate": 0.002, "loss": 2.5615, "step": 349230 }, { "epoch": 0.6957637383654214, "grad_norm": 0.14767593145370483, "learning_rate": 0.002, "loss": 2.5463, "step": 349240 }, { "epoch": 0.6957836605890603, "grad_norm": 0.17485663294792175, "learning_rate": 0.002, "loss": 2.5623, "step": 349250 }, { "epoch": 0.6958035828126993, "grad_norm": 0.15093082189559937, "learning_rate": 0.002, "loss": 2.545, "step": 349260 }, { "epoch": 0.6958235050363382, "grad_norm": 0.16433686017990112, "learning_rate": 0.002, "loss": 2.5575, "step": 349270 }, { "epoch": 0.695843427259977, "grad_norm": 0.1707078516483307, "learning_rate": 0.002, "loss": 2.5447, "step": 349280 }, { "epoch": 0.695863349483616, "grad_norm": 0.18238979578018188, "learning_rate": 0.002, "loss": 2.5683, "step": 349290 }, { "epoch": 0.6958832717072548, "grad_norm": 0.19146478176116943, "learning_rate": 0.002, "loss": 2.561, "step": 349300 }, { "epoch": 0.6959031939308938, "grad_norm": 0.16547124087810516, "learning_rate": 0.002, "loss": 2.5588, "step": 349310 }, { "epoch": 0.6959231161545327, "grad_norm": 0.1888476014137268, "learning_rate": 0.002, "loss": 2.5438, "step": 349320 }, { "epoch": 0.6959430383781716, "grad_norm": 0.14765682816505432, "learning_rate": 0.002, "loss": 2.5601, "step": 349330 }, { "epoch": 0.6959629606018105, "grad_norm": 0.1492682695388794, "learning_rate": 0.002, "loss": 2.5412, "step": 349340 }, { "epoch": 0.6959828828254494, "grad_norm": 0.24461539089679718, "learning_rate": 0.002, "loss": 2.5555, "step": 349350 }, { "epoch": 0.6960028050490884, "grad_norm": 0.14318619668483734, "learning_rate": 0.002, "loss": 2.5667, "step": 349360 }, { "epoch": 0.6960227272727273, "grad_norm": 0.16966237127780914, "learning_rate": 0.002, "loss": 2.5428, "step": 349370 }, { "epoch": 0.6960426494963662, "grad_norm": 0.14542661607265472, "learning_rate": 0.002, "loss": 2.5567, "step": 349380 }, { "epoch": 0.6960625717200051, "grad_norm": 0.16141551733016968, "learning_rate": 0.002, "loss": 2.5652, "step": 349390 }, { "epoch": 0.696082493943644, "grad_norm": 0.17724411189556122, "learning_rate": 0.002, "loss": 2.5578, "step": 349400 }, { "epoch": 0.696102416167283, "grad_norm": 0.1467602401971817, "learning_rate": 0.002, "loss": 2.546, "step": 349410 }, { "epoch": 0.6961223383909219, "grad_norm": 0.16205234825611115, "learning_rate": 0.002, "loss": 2.5679, "step": 349420 }, { "epoch": 0.6961422606145607, "grad_norm": 0.17234356701374054, "learning_rate": 0.002, "loss": 2.5553, "step": 349430 }, { "epoch": 0.6961621828381996, "grad_norm": 0.1929551064968109, "learning_rate": 0.002, "loss": 2.5555, "step": 349440 }, { "epoch": 0.6961821050618385, "grad_norm": 0.1516048014163971, "learning_rate": 0.002, "loss": 2.5629, "step": 349450 }, { "epoch": 0.6962020272854775, "grad_norm": 0.20144659280776978, "learning_rate": 0.002, "loss": 2.5565, "step": 349460 }, { "epoch": 0.6962219495091164, "grad_norm": 0.17210939526557922, "learning_rate": 0.002, "loss": 2.5547, "step": 349470 }, { "epoch": 0.6962418717327553, "grad_norm": 0.17931485176086426, "learning_rate": 0.002, "loss": 2.5709, "step": 349480 }, { "epoch": 0.6962617939563942, "grad_norm": 0.17207352817058563, "learning_rate": 0.002, "loss": 2.5589, "step": 349490 }, { "epoch": 0.6962817161800332, "grad_norm": 0.17689967155456543, "learning_rate": 0.002, "loss": 2.5598, "step": 349500 }, { "epoch": 0.6963016384036721, "grad_norm": 0.16336598992347717, "learning_rate": 0.002, "loss": 2.5663, "step": 349510 }, { "epoch": 0.696321560627311, "grad_norm": 0.18263794481754303, "learning_rate": 0.002, "loss": 2.5446, "step": 349520 }, { "epoch": 0.6963414828509499, "grad_norm": 0.15641872584819794, "learning_rate": 0.002, "loss": 2.5514, "step": 349530 }, { "epoch": 0.6963614050745888, "grad_norm": 0.19013744592666626, "learning_rate": 0.002, "loss": 2.5553, "step": 349540 }, { "epoch": 0.6963813272982278, "grad_norm": 0.17328135669231415, "learning_rate": 0.002, "loss": 2.5682, "step": 349550 }, { "epoch": 0.6964012495218667, "grad_norm": 0.1880042850971222, "learning_rate": 0.002, "loss": 2.5682, "step": 349560 }, { "epoch": 0.6964211717455056, "grad_norm": 0.17120887339115143, "learning_rate": 0.002, "loss": 2.5618, "step": 349570 }, { "epoch": 0.6964410939691444, "grad_norm": 0.17448560893535614, "learning_rate": 0.002, "loss": 2.5543, "step": 349580 }, { "epoch": 0.6964610161927833, "grad_norm": 0.15495465695858002, "learning_rate": 0.002, "loss": 2.5324, "step": 349590 }, { "epoch": 0.6964809384164223, "grad_norm": 0.1680152416229248, "learning_rate": 0.002, "loss": 2.5459, "step": 349600 }, { "epoch": 0.6965008606400612, "grad_norm": 0.2182176560163498, "learning_rate": 0.002, "loss": 2.5544, "step": 349610 }, { "epoch": 0.6965207828637001, "grad_norm": 0.17981503903865814, "learning_rate": 0.002, "loss": 2.5578, "step": 349620 }, { "epoch": 0.696540705087339, "grad_norm": 0.13568924367427826, "learning_rate": 0.002, "loss": 2.5699, "step": 349630 }, { "epoch": 0.6965606273109779, "grad_norm": 0.15986749529838562, "learning_rate": 0.002, "loss": 2.5629, "step": 349640 }, { "epoch": 0.6965805495346169, "grad_norm": 0.1651007980108261, "learning_rate": 0.002, "loss": 2.5695, "step": 349650 }, { "epoch": 0.6966004717582558, "grad_norm": 0.16122643649578094, "learning_rate": 0.002, "loss": 2.5599, "step": 349660 }, { "epoch": 0.6966203939818947, "grad_norm": 0.1393604278564453, "learning_rate": 0.002, "loss": 2.5534, "step": 349670 }, { "epoch": 0.6966403162055336, "grad_norm": 0.1419226974248886, "learning_rate": 0.002, "loss": 2.5549, "step": 349680 }, { "epoch": 0.6966602384291725, "grad_norm": 0.16260677576065063, "learning_rate": 0.002, "loss": 2.5566, "step": 349690 }, { "epoch": 0.6966801606528115, "grad_norm": 0.14882205426692963, "learning_rate": 0.002, "loss": 2.5407, "step": 349700 }, { "epoch": 0.6967000828764504, "grad_norm": 0.18568867444992065, "learning_rate": 0.002, "loss": 2.5506, "step": 349710 }, { "epoch": 0.6967200051000892, "grad_norm": 0.1644599735736847, "learning_rate": 0.002, "loss": 2.5616, "step": 349720 }, { "epoch": 0.6967399273237281, "grad_norm": 0.16995982825756073, "learning_rate": 0.002, "loss": 2.5634, "step": 349730 }, { "epoch": 0.696759849547367, "grad_norm": 0.1777978092432022, "learning_rate": 0.002, "loss": 2.5361, "step": 349740 }, { "epoch": 0.696779771771006, "grad_norm": 0.1832057684659958, "learning_rate": 0.002, "loss": 2.5536, "step": 349750 }, { "epoch": 0.6967996939946449, "grad_norm": 0.15759925544261932, "learning_rate": 0.002, "loss": 2.5535, "step": 349760 }, { "epoch": 0.6968196162182838, "grad_norm": 0.1849820762872696, "learning_rate": 0.002, "loss": 2.5409, "step": 349770 }, { "epoch": 0.6968395384419227, "grad_norm": 0.17450346052646637, "learning_rate": 0.002, "loss": 2.5484, "step": 349780 }, { "epoch": 0.6968594606655616, "grad_norm": 0.15410694479942322, "learning_rate": 0.002, "loss": 2.558, "step": 349790 }, { "epoch": 0.6968793828892006, "grad_norm": 0.17239081859588623, "learning_rate": 0.002, "loss": 2.5641, "step": 349800 }, { "epoch": 0.6968993051128395, "grad_norm": 0.1628047227859497, "learning_rate": 0.002, "loss": 2.5501, "step": 349810 }, { "epoch": 0.6969192273364784, "grad_norm": 0.15693579614162445, "learning_rate": 0.002, "loss": 2.5568, "step": 349820 }, { "epoch": 0.6969391495601173, "grad_norm": 0.18990856409072876, "learning_rate": 0.002, "loss": 2.5629, "step": 349830 }, { "epoch": 0.6969590717837563, "grad_norm": 0.16510652005672455, "learning_rate": 0.002, "loss": 2.5505, "step": 349840 }, { "epoch": 0.6969789940073952, "grad_norm": 0.15463308990001678, "learning_rate": 0.002, "loss": 2.576, "step": 349850 }, { "epoch": 0.696998916231034, "grad_norm": 0.18653085827827454, "learning_rate": 0.002, "loss": 2.5576, "step": 349860 }, { "epoch": 0.6970188384546729, "grad_norm": 0.15310591459274292, "learning_rate": 0.002, "loss": 2.5447, "step": 349870 }, { "epoch": 0.6970387606783118, "grad_norm": 0.17563189566135406, "learning_rate": 0.002, "loss": 2.546, "step": 349880 }, { "epoch": 0.6970586829019508, "grad_norm": 0.16191910207271576, "learning_rate": 0.002, "loss": 2.5525, "step": 349890 }, { "epoch": 0.6970786051255897, "grad_norm": 0.15564315021038055, "learning_rate": 0.002, "loss": 2.5614, "step": 349900 }, { "epoch": 0.6970985273492286, "grad_norm": 0.16679422557353973, "learning_rate": 0.002, "loss": 2.5499, "step": 349910 }, { "epoch": 0.6971184495728675, "grad_norm": 0.1567021757364273, "learning_rate": 0.002, "loss": 2.5562, "step": 349920 }, { "epoch": 0.6971383717965064, "grad_norm": 0.18413744866847992, "learning_rate": 0.002, "loss": 2.5642, "step": 349930 }, { "epoch": 0.6971582940201454, "grad_norm": 0.1663791984319687, "learning_rate": 0.002, "loss": 2.5546, "step": 349940 }, { "epoch": 0.6971782162437843, "grad_norm": 0.1955738514661789, "learning_rate": 0.002, "loss": 2.5475, "step": 349950 }, { "epoch": 0.6971981384674232, "grad_norm": 0.18305020034313202, "learning_rate": 0.002, "loss": 2.5674, "step": 349960 }, { "epoch": 0.6972180606910621, "grad_norm": 0.14742280542850494, "learning_rate": 0.002, "loss": 2.5535, "step": 349970 }, { "epoch": 0.697237982914701, "grad_norm": 0.1540450155735016, "learning_rate": 0.002, "loss": 2.5634, "step": 349980 }, { "epoch": 0.69725790513834, "grad_norm": 0.15896405279636383, "learning_rate": 0.002, "loss": 2.5538, "step": 349990 }, { "epoch": 0.6972778273619789, "grad_norm": 0.22918732464313507, "learning_rate": 0.002, "loss": 2.5724, "step": 350000 }, { "epoch": 0.6972977495856177, "grad_norm": 0.1782345324754715, "learning_rate": 0.002, "loss": 2.5532, "step": 350010 }, { "epoch": 0.6973176718092566, "grad_norm": 0.13744063675403595, "learning_rate": 0.002, "loss": 2.5692, "step": 350020 }, { "epoch": 0.6973375940328955, "grad_norm": 0.1658572554588318, "learning_rate": 0.002, "loss": 2.5518, "step": 350030 }, { "epoch": 0.6973575162565345, "grad_norm": 0.1531880497932434, "learning_rate": 0.002, "loss": 2.5504, "step": 350040 }, { "epoch": 0.6973774384801734, "grad_norm": 0.198338121175766, "learning_rate": 0.002, "loss": 2.5655, "step": 350050 }, { "epoch": 0.6973973607038123, "grad_norm": 0.152738556265831, "learning_rate": 0.002, "loss": 2.5575, "step": 350060 }, { "epoch": 0.6974172829274512, "grad_norm": 0.16052861511707306, "learning_rate": 0.002, "loss": 2.5559, "step": 350070 }, { "epoch": 0.6974372051510901, "grad_norm": 0.19299402832984924, "learning_rate": 0.002, "loss": 2.5553, "step": 350080 }, { "epoch": 0.6974571273747291, "grad_norm": 0.1889379471540451, "learning_rate": 0.002, "loss": 2.5447, "step": 350090 }, { "epoch": 0.697477049598368, "grad_norm": 0.18913531303405762, "learning_rate": 0.002, "loss": 2.5474, "step": 350100 }, { "epoch": 0.6974969718220069, "grad_norm": 0.15184231102466583, "learning_rate": 0.002, "loss": 2.554, "step": 350110 }, { "epoch": 0.6975168940456458, "grad_norm": 0.19239866733551025, "learning_rate": 0.002, "loss": 2.5682, "step": 350120 }, { "epoch": 0.6975368162692848, "grad_norm": 0.15515494346618652, "learning_rate": 0.002, "loss": 2.5517, "step": 350130 }, { "epoch": 0.6975567384929237, "grad_norm": 0.1898423582315445, "learning_rate": 0.002, "loss": 2.5579, "step": 350140 }, { "epoch": 0.6975766607165625, "grad_norm": 0.15531198680400848, "learning_rate": 0.002, "loss": 2.5573, "step": 350150 }, { "epoch": 0.6975965829402014, "grad_norm": 0.16364742815494537, "learning_rate": 0.002, "loss": 2.5488, "step": 350160 }, { "epoch": 0.6976165051638403, "grad_norm": 0.1852923035621643, "learning_rate": 0.002, "loss": 2.5445, "step": 350170 }, { "epoch": 0.6976364273874793, "grad_norm": 0.15080884099006653, "learning_rate": 0.002, "loss": 2.5541, "step": 350180 }, { "epoch": 0.6976563496111182, "grad_norm": 0.1559951901435852, "learning_rate": 0.002, "loss": 2.5695, "step": 350190 }, { "epoch": 0.6976762718347571, "grad_norm": 0.18127240240573883, "learning_rate": 0.002, "loss": 2.5651, "step": 350200 }, { "epoch": 0.697696194058396, "grad_norm": 0.14461033046245575, "learning_rate": 0.002, "loss": 2.5476, "step": 350210 }, { "epoch": 0.6977161162820349, "grad_norm": 0.15722346305847168, "learning_rate": 0.002, "loss": 2.5456, "step": 350220 }, { "epoch": 0.6977360385056739, "grad_norm": 0.17772004008293152, "learning_rate": 0.002, "loss": 2.5571, "step": 350230 }, { "epoch": 0.6977559607293128, "grad_norm": 0.1589048206806183, "learning_rate": 0.002, "loss": 2.5572, "step": 350240 }, { "epoch": 0.6977758829529517, "grad_norm": 0.1716071367263794, "learning_rate": 0.002, "loss": 2.5587, "step": 350250 }, { "epoch": 0.6977958051765906, "grad_norm": 0.16126100718975067, "learning_rate": 0.002, "loss": 2.5382, "step": 350260 }, { "epoch": 0.6978157274002295, "grad_norm": 0.184049591422081, "learning_rate": 0.002, "loss": 2.5507, "step": 350270 }, { "epoch": 0.6978356496238685, "grad_norm": 0.2021227777004242, "learning_rate": 0.002, "loss": 2.5391, "step": 350280 }, { "epoch": 0.6978555718475073, "grad_norm": 0.17528045177459717, "learning_rate": 0.002, "loss": 2.5631, "step": 350290 }, { "epoch": 0.6978754940711462, "grad_norm": 0.1956421285867691, "learning_rate": 0.002, "loss": 2.5689, "step": 350300 }, { "epoch": 0.6978954162947851, "grad_norm": 0.18154051899909973, "learning_rate": 0.002, "loss": 2.5705, "step": 350310 }, { "epoch": 0.697915338518424, "grad_norm": 0.15856638550758362, "learning_rate": 0.002, "loss": 2.5489, "step": 350320 }, { "epoch": 0.697935260742063, "grad_norm": 0.16127154231071472, "learning_rate": 0.002, "loss": 2.5573, "step": 350330 }, { "epoch": 0.6979551829657019, "grad_norm": 0.16875287890434265, "learning_rate": 0.002, "loss": 2.5652, "step": 350340 }, { "epoch": 0.6979751051893408, "grad_norm": 0.18047896027565002, "learning_rate": 0.002, "loss": 2.5659, "step": 350350 }, { "epoch": 0.6979950274129797, "grad_norm": 0.16226255893707275, "learning_rate": 0.002, "loss": 2.5667, "step": 350360 }, { "epoch": 0.6980149496366186, "grad_norm": 0.1907666027545929, "learning_rate": 0.002, "loss": 2.5702, "step": 350370 }, { "epoch": 0.6980348718602576, "grad_norm": 0.16102154552936554, "learning_rate": 0.002, "loss": 2.563, "step": 350380 }, { "epoch": 0.6980547940838965, "grad_norm": 0.19458484649658203, "learning_rate": 0.002, "loss": 2.568, "step": 350390 }, { "epoch": 0.6980747163075354, "grad_norm": 0.15561550855636597, "learning_rate": 0.002, "loss": 2.5341, "step": 350400 }, { "epoch": 0.6980946385311743, "grad_norm": 0.15048037469387054, "learning_rate": 0.002, "loss": 2.5429, "step": 350410 }, { "epoch": 0.6981145607548133, "grad_norm": 0.15694262087345123, "learning_rate": 0.002, "loss": 2.557, "step": 350420 }, { "epoch": 0.6981344829784522, "grad_norm": 0.18332552909851074, "learning_rate": 0.002, "loss": 2.5444, "step": 350430 }, { "epoch": 0.698154405202091, "grad_norm": 0.16098040342330933, "learning_rate": 0.002, "loss": 2.5601, "step": 350440 }, { "epoch": 0.6981743274257299, "grad_norm": 0.1496974676847458, "learning_rate": 0.002, "loss": 2.5513, "step": 350450 }, { "epoch": 0.6981942496493688, "grad_norm": 0.19188688695430756, "learning_rate": 0.002, "loss": 2.5531, "step": 350460 }, { "epoch": 0.6982141718730078, "grad_norm": 0.16253963112831116, "learning_rate": 0.002, "loss": 2.5482, "step": 350470 }, { "epoch": 0.6982340940966467, "grad_norm": 0.18886233866214752, "learning_rate": 0.002, "loss": 2.5591, "step": 350480 }, { "epoch": 0.6982540163202856, "grad_norm": 0.14020872116088867, "learning_rate": 0.002, "loss": 2.5633, "step": 350490 }, { "epoch": 0.6982739385439245, "grad_norm": 0.14038804173469543, "learning_rate": 0.002, "loss": 2.5428, "step": 350500 }, { "epoch": 0.6982938607675634, "grad_norm": 0.17404745519161224, "learning_rate": 0.002, "loss": 2.544, "step": 350510 }, { "epoch": 0.6983137829912024, "grad_norm": 0.18118984997272491, "learning_rate": 0.002, "loss": 2.5651, "step": 350520 }, { "epoch": 0.6983337052148413, "grad_norm": 0.1367628127336502, "learning_rate": 0.002, "loss": 2.5465, "step": 350530 }, { "epoch": 0.6983536274384802, "grad_norm": 0.2007615715265274, "learning_rate": 0.002, "loss": 2.5428, "step": 350540 }, { "epoch": 0.6983735496621191, "grad_norm": 0.14942039549350739, "learning_rate": 0.002, "loss": 2.5515, "step": 350550 }, { "epoch": 0.698393471885758, "grad_norm": 0.13494449853897095, "learning_rate": 0.002, "loss": 2.5573, "step": 350560 }, { "epoch": 0.698413394109397, "grad_norm": 0.18364329636096954, "learning_rate": 0.002, "loss": 2.5447, "step": 350570 }, { "epoch": 0.6984333163330358, "grad_norm": 0.15858644247055054, "learning_rate": 0.002, "loss": 2.5689, "step": 350580 }, { "epoch": 0.6984532385566747, "grad_norm": 0.1884603351354599, "learning_rate": 0.002, "loss": 2.5446, "step": 350590 }, { "epoch": 0.6984731607803136, "grad_norm": 0.1554211974143982, "learning_rate": 0.002, "loss": 2.5572, "step": 350600 }, { "epoch": 0.6984930830039525, "grad_norm": 0.1570131778717041, "learning_rate": 0.002, "loss": 2.5591, "step": 350610 }, { "epoch": 0.6985130052275915, "grad_norm": 0.15361610054969788, "learning_rate": 0.002, "loss": 2.5504, "step": 350620 }, { "epoch": 0.6985329274512304, "grad_norm": 0.17965322732925415, "learning_rate": 0.002, "loss": 2.5621, "step": 350630 }, { "epoch": 0.6985528496748693, "grad_norm": 0.15176570415496826, "learning_rate": 0.002, "loss": 2.5588, "step": 350640 }, { "epoch": 0.6985727718985082, "grad_norm": 0.1688966453075409, "learning_rate": 0.002, "loss": 2.5553, "step": 350650 }, { "epoch": 0.6985926941221471, "grad_norm": 0.16345496475696564, "learning_rate": 0.002, "loss": 2.5535, "step": 350660 }, { "epoch": 0.6986126163457861, "grad_norm": 0.14988446235656738, "learning_rate": 0.002, "loss": 2.5541, "step": 350670 }, { "epoch": 0.698632538569425, "grad_norm": 0.186315655708313, "learning_rate": 0.002, "loss": 2.5529, "step": 350680 }, { "epoch": 0.6986524607930639, "grad_norm": 0.1664322465658188, "learning_rate": 0.002, "loss": 2.5587, "step": 350690 }, { "epoch": 0.6986723830167028, "grad_norm": 0.15137970447540283, "learning_rate": 0.002, "loss": 2.5609, "step": 350700 }, { "epoch": 0.6986923052403418, "grad_norm": 0.16148105263710022, "learning_rate": 0.002, "loss": 2.5633, "step": 350710 }, { "epoch": 0.6987122274639807, "grad_norm": 0.14434802532196045, "learning_rate": 0.002, "loss": 2.5549, "step": 350720 }, { "epoch": 0.6987321496876195, "grad_norm": 0.2242746353149414, "learning_rate": 0.002, "loss": 2.5562, "step": 350730 }, { "epoch": 0.6987520719112584, "grad_norm": 0.1682109534740448, "learning_rate": 0.002, "loss": 2.5398, "step": 350740 }, { "epoch": 0.6987719941348973, "grad_norm": 0.17262132465839386, "learning_rate": 0.002, "loss": 2.5312, "step": 350750 }, { "epoch": 0.6987919163585363, "grad_norm": 0.1686093807220459, "learning_rate": 0.002, "loss": 2.5433, "step": 350760 }, { "epoch": 0.6988118385821752, "grad_norm": 0.19373787939548492, "learning_rate": 0.002, "loss": 2.5553, "step": 350770 }, { "epoch": 0.6988317608058141, "grad_norm": 0.1428380310535431, "learning_rate": 0.002, "loss": 2.55, "step": 350780 }, { "epoch": 0.698851683029453, "grad_norm": 0.19463208317756653, "learning_rate": 0.002, "loss": 2.5594, "step": 350790 }, { "epoch": 0.6988716052530919, "grad_norm": 0.17005705833435059, "learning_rate": 0.002, "loss": 2.5541, "step": 350800 }, { "epoch": 0.6988915274767309, "grad_norm": 0.15821202099323273, "learning_rate": 0.002, "loss": 2.5515, "step": 350810 }, { "epoch": 0.6989114497003698, "grad_norm": 0.1593119204044342, "learning_rate": 0.002, "loss": 2.5595, "step": 350820 }, { "epoch": 0.6989313719240087, "grad_norm": 0.17785726487636566, "learning_rate": 0.002, "loss": 2.5523, "step": 350830 }, { "epoch": 0.6989512941476476, "grad_norm": 0.16679830849170685, "learning_rate": 0.002, "loss": 2.5535, "step": 350840 }, { "epoch": 0.6989712163712865, "grad_norm": 0.14110367000102997, "learning_rate": 0.002, "loss": 2.5546, "step": 350850 }, { "epoch": 0.6989911385949255, "grad_norm": 0.17446547746658325, "learning_rate": 0.002, "loss": 2.5534, "step": 350860 }, { "epoch": 0.6990110608185643, "grad_norm": 0.19387905299663544, "learning_rate": 0.002, "loss": 2.5615, "step": 350870 }, { "epoch": 0.6990309830422032, "grad_norm": 0.15126284956932068, "learning_rate": 0.002, "loss": 2.5582, "step": 350880 }, { "epoch": 0.6990509052658421, "grad_norm": 0.1985238790512085, "learning_rate": 0.002, "loss": 2.5374, "step": 350890 }, { "epoch": 0.699070827489481, "grad_norm": 0.14660194516181946, "learning_rate": 0.002, "loss": 2.5615, "step": 350900 }, { "epoch": 0.69909074971312, "grad_norm": 0.17332598567008972, "learning_rate": 0.002, "loss": 2.5526, "step": 350910 }, { "epoch": 0.6991106719367589, "grad_norm": 0.1637391746044159, "learning_rate": 0.002, "loss": 2.5506, "step": 350920 }, { "epoch": 0.6991305941603978, "grad_norm": 0.16943155229091644, "learning_rate": 0.002, "loss": 2.5515, "step": 350930 }, { "epoch": 0.6991505163840367, "grad_norm": 0.21828137338161469, "learning_rate": 0.002, "loss": 2.5511, "step": 350940 }, { "epoch": 0.6991704386076756, "grad_norm": 0.17554207146167755, "learning_rate": 0.002, "loss": 2.5396, "step": 350950 }, { "epoch": 0.6991903608313146, "grad_norm": 0.1370970606803894, "learning_rate": 0.002, "loss": 2.5647, "step": 350960 }, { "epoch": 0.6992102830549535, "grad_norm": 0.15717299282550812, "learning_rate": 0.002, "loss": 2.5561, "step": 350970 }, { "epoch": 0.6992302052785924, "grad_norm": 0.17968305945396423, "learning_rate": 0.002, "loss": 2.5346, "step": 350980 }, { "epoch": 0.6992501275022313, "grad_norm": 0.15721313655376434, "learning_rate": 0.002, "loss": 2.5532, "step": 350990 }, { "epoch": 0.6992700497258703, "grad_norm": 0.15663327276706696, "learning_rate": 0.002, "loss": 2.5502, "step": 351000 }, { "epoch": 0.6992899719495091, "grad_norm": 0.1500711590051651, "learning_rate": 0.002, "loss": 2.565, "step": 351010 }, { "epoch": 0.699309894173148, "grad_norm": 0.18173882365226746, "learning_rate": 0.002, "loss": 2.5647, "step": 351020 }, { "epoch": 0.6993298163967869, "grad_norm": 0.17815224826335907, "learning_rate": 0.002, "loss": 2.5612, "step": 351030 }, { "epoch": 0.6993497386204258, "grad_norm": 0.5192729234695435, "learning_rate": 0.002, "loss": 2.5614, "step": 351040 }, { "epoch": 0.6993696608440648, "grad_norm": 0.15841364860534668, "learning_rate": 0.002, "loss": 2.5568, "step": 351050 }, { "epoch": 0.6993895830677037, "grad_norm": 0.14533455669879913, "learning_rate": 0.002, "loss": 2.5671, "step": 351060 }, { "epoch": 0.6994095052913426, "grad_norm": 0.14045213162899017, "learning_rate": 0.002, "loss": 2.5452, "step": 351070 }, { "epoch": 0.6994294275149815, "grad_norm": 0.15265437960624695, "learning_rate": 0.002, "loss": 2.5524, "step": 351080 }, { "epoch": 0.6994493497386204, "grad_norm": 0.1455685794353485, "learning_rate": 0.002, "loss": 2.546, "step": 351090 }, { "epoch": 0.6994692719622594, "grad_norm": 0.19929173588752747, "learning_rate": 0.002, "loss": 2.5634, "step": 351100 }, { "epoch": 0.6994891941858983, "grad_norm": 0.1524747908115387, "learning_rate": 0.002, "loss": 2.5556, "step": 351110 }, { "epoch": 0.6995091164095372, "grad_norm": 0.20109452307224274, "learning_rate": 0.002, "loss": 2.5496, "step": 351120 }, { "epoch": 0.6995290386331761, "grad_norm": 0.17747296392917633, "learning_rate": 0.002, "loss": 2.5605, "step": 351130 }, { "epoch": 0.699548960856815, "grad_norm": 0.13560692965984344, "learning_rate": 0.002, "loss": 2.551, "step": 351140 }, { "epoch": 0.699568883080454, "grad_norm": 0.3129032254219055, "learning_rate": 0.002, "loss": 2.5635, "step": 351150 }, { "epoch": 0.6995888053040928, "grad_norm": 0.1524847149848938, "learning_rate": 0.002, "loss": 2.5709, "step": 351160 }, { "epoch": 0.6996087275277317, "grad_norm": 0.15652665495872498, "learning_rate": 0.002, "loss": 2.5578, "step": 351170 }, { "epoch": 0.6996286497513706, "grad_norm": 0.16196082532405853, "learning_rate": 0.002, "loss": 2.561, "step": 351180 }, { "epoch": 0.6996485719750095, "grad_norm": 0.1495143324136734, "learning_rate": 0.002, "loss": 2.5304, "step": 351190 }, { "epoch": 0.6996684941986485, "grad_norm": 0.15081743896007538, "learning_rate": 0.002, "loss": 2.5486, "step": 351200 }, { "epoch": 0.6996884164222874, "grad_norm": 0.1760508418083191, "learning_rate": 0.002, "loss": 2.568, "step": 351210 }, { "epoch": 0.6997083386459263, "grad_norm": 0.15268994867801666, "learning_rate": 0.002, "loss": 2.5574, "step": 351220 }, { "epoch": 0.6997282608695652, "grad_norm": 0.1660694181919098, "learning_rate": 0.002, "loss": 2.5652, "step": 351230 }, { "epoch": 0.6997481830932041, "grad_norm": 0.17315533757209778, "learning_rate": 0.002, "loss": 2.5412, "step": 351240 }, { "epoch": 0.6997681053168431, "grad_norm": 0.15275131165981293, "learning_rate": 0.002, "loss": 2.5601, "step": 351250 }, { "epoch": 0.699788027540482, "grad_norm": 0.16869166493415833, "learning_rate": 0.002, "loss": 2.5636, "step": 351260 }, { "epoch": 0.6998079497641209, "grad_norm": 0.13899089395999908, "learning_rate": 0.002, "loss": 2.5508, "step": 351270 }, { "epoch": 0.6998278719877598, "grad_norm": 0.19940392673015594, "learning_rate": 0.002, "loss": 2.5458, "step": 351280 }, { "epoch": 0.6998477942113988, "grad_norm": 0.15356025099754333, "learning_rate": 0.002, "loss": 2.5598, "step": 351290 }, { "epoch": 0.6998677164350376, "grad_norm": 0.16744425892829895, "learning_rate": 0.002, "loss": 2.5554, "step": 351300 }, { "epoch": 0.6998876386586765, "grad_norm": 0.1590312123298645, "learning_rate": 0.002, "loss": 2.5398, "step": 351310 }, { "epoch": 0.6999075608823154, "grad_norm": 0.17512263357639313, "learning_rate": 0.002, "loss": 2.5666, "step": 351320 }, { "epoch": 0.6999274831059543, "grad_norm": 0.17416171729564667, "learning_rate": 0.002, "loss": 2.5517, "step": 351330 }, { "epoch": 0.6999474053295933, "grad_norm": 0.1841118186712265, "learning_rate": 0.002, "loss": 2.5458, "step": 351340 }, { "epoch": 0.6999673275532322, "grad_norm": 0.14509865641593933, "learning_rate": 0.002, "loss": 2.5645, "step": 351350 }, { "epoch": 0.6999872497768711, "grad_norm": 0.17483274638652802, "learning_rate": 0.002, "loss": 2.5508, "step": 351360 }, { "epoch": 0.70000717200051, "grad_norm": 0.17986828088760376, "learning_rate": 0.002, "loss": 2.5661, "step": 351370 }, { "epoch": 0.7000270942241489, "grad_norm": 0.14624620974063873, "learning_rate": 0.002, "loss": 2.56, "step": 351380 }, { "epoch": 0.7000470164477879, "grad_norm": 0.16547036170959473, "learning_rate": 0.002, "loss": 2.545, "step": 351390 }, { "epoch": 0.7000669386714268, "grad_norm": 0.1576707810163498, "learning_rate": 0.002, "loss": 2.5413, "step": 351400 }, { "epoch": 0.7000868608950657, "grad_norm": 0.16736909747123718, "learning_rate": 0.002, "loss": 2.5683, "step": 351410 }, { "epoch": 0.7001067831187046, "grad_norm": 0.18082109093666077, "learning_rate": 0.002, "loss": 2.5459, "step": 351420 }, { "epoch": 0.7001267053423434, "grad_norm": 0.14882126450538635, "learning_rate": 0.002, "loss": 2.5614, "step": 351430 }, { "epoch": 0.7001466275659824, "grad_norm": 0.17156782746315002, "learning_rate": 0.002, "loss": 2.561, "step": 351440 }, { "epoch": 0.7001665497896213, "grad_norm": 0.15202151238918304, "learning_rate": 0.002, "loss": 2.5505, "step": 351450 }, { "epoch": 0.7001864720132602, "grad_norm": 0.1622401773929596, "learning_rate": 0.002, "loss": 2.5583, "step": 351460 }, { "epoch": 0.7002063942368991, "grad_norm": 0.16856437921524048, "learning_rate": 0.002, "loss": 2.5619, "step": 351470 }, { "epoch": 0.700226316460538, "grad_norm": 0.15669885277748108, "learning_rate": 0.002, "loss": 2.5707, "step": 351480 }, { "epoch": 0.700246238684177, "grad_norm": 0.17623798549175262, "learning_rate": 0.002, "loss": 2.5558, "step": 351490 }, { "epoch": 0.7002661609078159, "grad_norm": 0.2569740116596222, "learning_rate": 0.002, "loss": 2.5554, "step": 351500 }, { "epoch": 0.7002860831314548, "grad_norm": 0.14797887206077576, "learning_rate": 0.002, "loss": 2.5738, "step": 351510 }, { "epoch": 0.7003060053550937, "grad_norm": 0.1598977893590927, "learning_rate": 0.002, "loss": 2.562, "step": 351520 }, { "epoch": 0.7003259275787326, "grad_norm": 0.17179757356643677, "learning_rate": 0.002, "loss": 2.5366, "step": 351530 }, { "epoch": 0.7003458498023716, "grad_norm": 0.1850055754184723, "learning_rate": 0.002, "loss": 2.5532, "step": 351540 }, { "epoch": 0.7003657720260105, "grad_norm": 0.1587604284286499, "learning_rate": 0.002, "loss": 2.5647, "step": 351550 }, { "epoch": 0.7003856942496494, "grad_norm": 0.15380433201789856, "learning_rate": 0.002, "loss": 2.5425, "step": 351560 }, { "epoch": 0.7004056164732883, "grad_norm": 0.1767437607049942, "learning_rate": 0.002, "loss": 2.5419, "step": 351570 }, { "epoch": 0.7004255386969271, "grad_norm": 0.15227745473384857, "learning_rate": 0.002, "loss": 2.5417, "step": 351580 }, { "epoch": 0.7004454609205661, "grad_norm": 0.17973890900611877, "learning_rate": 0.002, "loss": 2.5591, "step": 351590 }, { "epoch": 0.700465383144205, "grad_norm": 0.14876972138881683, "learning_rate": 0.002, "loss": 2.5501, "step": 351600 }, { "epoch": 0.7004853053678439, "grad_norm": 0.16518713533878326, "learning_rate": 0.002, "loss": 2.5519, "step": 351610 }, { "epoch": 0.7005052275914828, "grad_norm": 0.15992183983325958, "learning_rate": 0.002, "loss": 2.5452, "step": 351620 }, { "epoch": 0.7005251498151218, "grad_norm": 0.1668238788843155, "learning_rate": 0.002, "loss": 2.5597, "step": 351630 }, { "epoch": 0.7005450720387607, "grad_norm": 0.1502836048603058, "learning_rate": 0.002, "loss": 2.5463, "step": 351640 }, { "epoch": 0.7005649942623996, "grad_norm": 0.15496082603931427, "learning_rate": 0.002, "loss": 2.5522, "step": 351650 }, { "epoch": 0.7005849164860385, "grad_norm": 0.14284925162792206, "learning_rate": 0.002, "loss": 2.5493, "step": 351660 }, { "epoch": 0.7006048387096774, "grad_norm": 0.16022002696990967, "learning_rate": 0.002, "loss": 2.5522, "step": 351670 }, { "epoch": 0.7006247609333164, "grad_norm": 0.17246665060520172, "learning_rate": 0.002, "loss": 2.5557, "step": 351680 }, { "epoch": 0.7006446831569553, "grad_norm": 0.1561869978904724, "learning_rate": 0.002, "loss": 2.5475, "step": 351690 }, { "epoch": 0.7006646053805942, "grad_norm": 0.19360709190368652, "learning_rate": 0.002, "loss": 2.5478, "step": 351700 }, { "epoch": 0.700684527604233, "grad_norm": 0.15685179829597473, "learning_rate": 0.002, "loss": 2.5707, "step": 351710 }, { "epoch": 0.700704449827872, "grad_norm": 0.16549482941627502, "learning_rate": 0.002, "loss": 2.5495, "step": 351720 }, { "epoch": 0.700724372051511, "grad_norm": 0.16671723127365112, "learning_rate": 0.002, "loss": 2.5687, "step": 351730 }, { "epoch": 0.7007442942751498, "grad_norm": 0.15854215621948242, "learning_rate": 0.002, "loss": 2.5521, "step": 351740 }, { "epoch": 0.7007642164987887, "grad_norm": 0.16740916669368744, "learning_rate": 0.002, "loss": 2.5479, "step": 351750 }, { "epoch": 0.7007841387224276, "grad_norm": 0.16916093230247498, "learning_rate": 0.002, "loss": 2.555, "step": 351760 }, { "epoch": 0.7008040609460665, "grad_norm": 0.14257963001728058, "learning_rate": 0.002, "loss": 2.5499, "step": 351770 }, { "epoch": 0.7008239831697055, "grad_norm": 0.15220730006694794, "learning_rate": 0.002, "loss": 2.5371, "step": 351780 }, { "epoch": 0.7008439053933444, "grad_norm": 0.20185235142707825, "learning_rate": 0.002, "loss": 2.5467, "step": 351790 }, { "epoch": 0.7008638276169833, "grad_norm": 0.18007633090019226, "learning_rate": 0.002, "loss": 2.551, "step": 351800 }, { "epoch": 0.7008837498406222, "grad_norm": 0.17539183795452118, "learning_rate": 0.002, "loss": 2.5507, "step": 351810 }, { "epoch": 0.7009036720642611, "grad_norm": 0.16551391780376434, "learning_rate": 0.002, "loss": 2.539, "step": 351820 }, { "epoch": 0.7009235942879001, "grad_norm": 0.1722310185432434, "learning_rate": 0.002, "loss": 2.5525, "step": 351830 }, { "epoch": 0.700943516511539, "grad_norm": 0.15742464363574982, "learning_rate": 0.002, "loss": 2.5501, "step": 351840 }, { "epoch": 0.7009634387351779, "grad_norm": 0.19148729741573334, "learning_rate": 0.002, "loss": 2.5621, "step": 351850 }, { "epoch": 0.7009833609588167, "grad_norm": 0.14550678431987762, "learning_rate": 0.002, "loss": 2.5576, "step": 351860 }, { "epoch": 0.7010032831824556, "grad_norm": 0.15984946489334106, "learning_rate": 0.002, "loss": 2.5518, "step": 351870 }, { "epoch": 0.7010232054060946, "grad_norm": 0.1548708975315094, "learning_rate": 0.002, "loss": 2.5506, "step": 351880 }, { "epoch": 0.7010431276297335, "grad_norm": 0.16267094016075134, "learning_rate": 0.002, "loss": 2.5497, "step": 351890 }, { "epoch": 0.7010630498533724, "grad_norm": 0.17450928688049316, "learning_rate": 0.002, "loss": 2.5447, "step": 351900 }, { "epoch": 0.7010829720770113, "grad_norm": 0.18740347027778625, "learning_rate": 0.002, "loss": 2.5687, "step": 351910 }, { "epoch": 0.7011028943006503, "grad_norm": 0.17382606863975525, "learning_rate": 0.002, "loss": 2.5656, "step": 351920 }, { "epoch": 0.7011228165242892, "grad_norm": 0.1871158480644226, "learning_rate": 0.002, "loss": 2.5683, "step": 351930 }, { "epoch": 0.7011427387479281, "grad_norm": 0.1706027388572693, "learning_rate": 0.002, "loss": 2.555, "step": 351940 }, { "epoch": 0.701162660971567, "grad_norm": 0.13692106306552887, "learning_rate": 0.002, "loss": 2.5535, "step": 351950 }, { "epoch": 0.7011825831952059, "grad_norm": 0.18067815899848938, "learning_rate": 0.002, "loss": 2.5535, "step": 351960 }, { "epoch": 0.7012025054188449, "grad_norm": 0.1836090087890625, "learning_rate": 0.002, "loss": 2.552, "step": 351970 }, { "epoch": 0.7012224276424838, "grad_norm": 0.18772251904010773, "learning_rate": 0.002, "loss": 2.5399, "step": 351980 }, { "epoch": 0.7012423498661227, "grad_norm": 0.20273371040821075, "learning_rate": 0.002, "loss": 2.5487, "step": 351990 }, { "epoch": 0.7012622720897616, "grad_norm": 0.17854782938957214, "learning_rate": 0.002, "loss": 2.5626, "step": 352000 }, { "epoch": 0.7012821943134004, "grad_norm": 0.16822466254234314, "learning_rate": 0.002, "loss": 2.5489, "step": 352010 }, { "epoch": 0.7013021165370394, "grad_norm": 0.1571943163871765, "learning_rate": 0.002, "loss": 2.5355, "step": 352020 }, { "epoch": 0.7013220387606783, "grad_norm": 0.13135063648223877, "learning_rate": 0.002, "loss": 2.5666, "step": 352030 }, { "epoch": 0.7013419609843172, "grad_norm": 0.1673131138086319, "learning_rate": 0.002, "loss": 2.5552, "step": 352040 }, { "epoch": 0.7013618832079561, "grad_norm": 0.13681820034980774, "learning_rate": 0.002, "loss": 2.5541, "step": 352050 }, { "epoch": 0.701381805431595, "grad_norm": 0.15910199284553528, "learning_rate": 0.002, "loss": 2.5538, "step": 352060 }, { "epoch": 0.701401727655234, "grad_norm": 0.1548076868057251, "learning_rate": 0.002, "loss": 2.5291, "step": 352070 }, { "epoch": 0.7014216498788729, "grad_norm": 0.1418767124414444, "learning_rate": 0.002, "loss": 2.5545, "step": 352080 }, { "epoch": 0.7014415721025118, "grad_norm": 0.25540363788604736, "learning_rate": 0.002, "loss": 2.5573, "step": 352090 }, { "epoch": 0.7014614943261507, "grad_norm": 0.15342219173908234, "learning_rate": 0.002, "loss": 2.5444, "step": 352100 }, { "epoch": 0.7014814165497896, "grad_norm": 0.19752004742622375, "learning_rate": 0.002, "loss": 2.5695, "step": 352110 }, { "epoch": 0.7015013387734286, "grad_norm": 0.16619203984737396, "learning_rate": 0.002, "loss": 2.5527, "step": 352120 }, { "epoch": 0.7015212609970675, "grad_norm": 0.17231345176696777, "learning_rate": 0.002, "loss": 2.5526, "step": 352130 }, { "epoch": 0.7015411832207064, "grad_norm": 0.149753600358963, "learning_rate": 0.002, "loss": 2.5549, "step": 352140 }, { "epoch": 0.7015611054443452, "grad_norm": 0.18389546871185303, "learning_rate": 0.002, "loss": 2.5622, "step": 352150 }, { "epoch": 0.7015810276679841, "grad_norm": 0.1755496710538864, "learning_rate": 0.002, "loss": 2.5518, "step": 352160 }, { "epoch": 0.7016009498916231, "grad_norm": 0.16168411076068878, "learning_rate": 0.002, "loss": 2.5496, "step": 352170 }, { "epoch": 0.701620872115262, "grad_norm": 0.18319803476333618, "learning_rate": 0.002, "loss": 2.5553, "step": 352180 }, { "epoch": 0.7016407943389009, "grad_norm": 0.18219393491744995, "learning_rate": 0.002, "loss": 2.538, "step": 352190 }, { "epoch": 0.7016607165625398, "grad_norm": 0.15290944278240204, "learning_rate": 0.002, "loss": 2.5609, "step": 352200 }, { "epoch": 0.7016806387861788, "grad_norm": 0.19067783653736115, "learning_rate": 0.002, "loss": 2.5628, "step": 352210 }, { "epoch": 0.7017005610098177, "grad_norm": 0.17141062021255493, "learning_rate": 0.002, "loss": 2.552, "step": 352220 }, { "epoch": 0.7017204832334566, "grad_norm": 0.16441336274147034, "learning_rate": 0.002, "loss": 2.5579, "step": 352230 }, { "epoch": 0.7017404054570955, "grad_norm": 0.1730218380689621, "learning_rate": 0.002, "loss": 2.5699, "step": 352240 }, { "epoch": 0.7017603276807344, "grad_norm": 0.1601564586162567, "learning_rate": 0.002, "loss": 2.5502, "step": 352250 }, { "epoch": 0.7017802499043734, "grad_norm": 0.15245236456394196, "learning_rate": 0.002, "loss": 2.5655, "step": 352260 }, { "epoch": 0.7018001721280123, "grad_norm": 0.17617645859718323, "learning_rate": 0.002, "loss": 2.5528, "step": 352270 }, { "epoch": 0.7018200943516512, "grad_norm": 0.15819114446640015, "learning_rate": 0.002, "loss": 2.556, "step": 352280 }, { "epoch": 0.70184001657529, "grad_norm": 0.15665575861930847, "learning_rate": 0.002, "loss": 2.5529, "step": 352290 }, { "epoch": 0.7018599387989289, "grad_norm": 0.19198572635650635, "learning_rate": 0.002, "loss": 2.5521, "step": 352300 }, { "epoch": 0.7018798610225679, "grad_norm": 0.1888633817434311, "learning_rate": 0.002, "loss": 2.5559, "step": 352310 }, { "epoch": 0.7018997832462068, "grad_norm": 0.15677060186862946, "learning_rate": 0.002, "loss": 2.5686, "step": 352320 }, { "epoch": 0.7019197054698457, "grad_norm": 0.237426295876503, "learning_rate": 0.002, "loss": 2.5492, "step": 352330 }, { "epoch": 0.7019396276934846, "grad_norm": 0.1414838284254074, "learning_rate": 0.002, "loss": 2.5537, "step": 352340 }, { "epoch": 0.7019595499171235, "grad_norm": 0.15922850370407104, "learning_rate": 0.002, "loss": 2.5539, "step": 352350 }, { "epoch": 0.7019794721407625, "grad_norm": 0.15540540218353271, "learning_rate": 0.002, "loss": 2.5526, "step": 352360 }, { "epoch": 0.7019993943644014, "grad_norm": 0.20352937281131744, "learning_rate": 0.002, "loss": 2.5583, "step": 352370 }, { "epoch": 0.7020193165880403, "grad_norm": 0.1512589305639267, "learning_rate": 0.002, "loss": 2.5585, "step": 352380 }, { "epoch": 0.7020392388116792, "grad_norm": 0.15098290145397186, "learning_rate": 0.002, "loss": 2.5527, "step": 352390 }, { "epoch": 0.7020591610353181, "grad_norm": 0.16853298246860504, "learning_rate": 0.002, "loss": 2.5562, "step": 352400 }, { "epoch": 0.7020790832589571, "grad_norm": 0.14709767699241638, "learning_rate": 0.002, "loss": 2.5567, "step": 352410 }, { "epoch": 0.702099005482596, "grad_norm": 0.15399497747421265, "learning_rate": 0.002, "loss": 2.5315, "step": 352420 }, { "epoch": 0.7021189277062349, "grad_norm": 0.1545344591140747, "learning_rate": 0.002, "loss": 2.5701, "step": 352430 }, { "epoch": 0.7021388499298737, "grad_norm": 0.18146179616451263, "learning_rate": 0.002, "loss": 2.5436, "step": 352440 }, { "epoch": 0.7021587721535126, "grad_norm": 0.15106739103794098, "learning_rate": 0.002, "loss": 2.5542, "step": 352450 }, { "epoch": 0.7021786943771516, "grad_norm": 0.1538810431957245, "learning_rate": 0.002, "loss": 2.5567, "step": 352460 }, { "epoch": 0.7021986166007905, "grad_norm": 0.19842565059661865, "learning_rate": 0.002, "loss": 2.5497, "step": 352470 }, { "epoch": 0.7022185388244294, "grad_norm": 0.17582185566425323, "learning_rate": 0.002, "loss": 2.5617, "step": 352480 }, { "epoch": 0.7022384610480683, "grad_norm": 0.22271393239498138, "learning_rate": 0.002, "loss": 2.5536, "step": 352490 }, { "epoch": 0.7022583832717073, "grad_norm": 0.15716241300106049, "learning_rate": 0.002, "loss": 2.5492, "step": 352500 }, { "epoch": 0.7022783054953462, "grad_norm": 0.14365530014038086, "learning_rate": 0.002, "loss": 2.5576, "step": 352510 }, { "epoch": 0.7022982277189851, "grad_norm": 0.15388134121894836, "learning_rate": 0.002, "loss": 2.5554, "step": 352520 }, { "epoch": 0.702318149942624, "grad_norm": 0.16581851243972778, "learning_rate": 0.002, "loss": 2.5523, "step": 352530 }, { "epoch": 0.7023380721662629, "grad_norm": 0.21633610129356384, "learning_rate": 0.002, "loss": 2.5526, "step": 352540 }, { "epoch": 0.7023579943899019, "grad_norm": 0.1653934121131897, "learning_rate": 0.002, "loss": 2.5452, "step": 352550 }, { "epoch": 0.7023779166135408, "grad_norm": 0.15036387741565704, "learning_rate": 0.002, "loss": 2.5641, "step": 352560 }, { "epoch": 0.7023978388371797, "grad_norm": 0.17315977811813354, "learning_rate": 0.002, "loss": 2.5591, "step": 352570 }, { "epoch": 0.7024177610608185, "grad_norm": 0.14342878758907318, "learning_rate": 0.002, "loss": 2.5407, "step": 352580 }, { "epoch": 0.7024376832844574, "grad_norm": 0.19747257232666016, "learning_rate": 0.002, "loss": 2.5669, "step": 352590 }, { "epoch": 0.7024576055080964, "grad_norm": 0.26089733839035034, "learning_rate": 0.002, "loss": 2.5495, "step": 352600 }, { "epoch": 0.7024775277317353, "grad_norm": 0.16572663187980652, "learning_rate": 0.002, "loss": 2.553, "step": 352610 }, { "epoch": 0.7024974499553742, "grad_norm": 0.17953559756278992, "learning_rate": 0.002, "loss": 2.5586, "step": 352620 }, { "epoch": 0.7025173721790131, "grad_norm": 0.1515614539384842, "learning_rate": 0.002, "loss": 2.5359, "step": 352630 }, { "epoch": 0.702537294402652, "grad_norm": 0.19514307379722595, "learning_rate": 0.002, "loss": 2.5445, "step": 352640 }, { "epoch": 0.702557216626291, "grad_norm": 0.15495440363883972, "learning_rate": 0.002, "loss": 2.5508, "step": 352650 }, { "epoch": 0.7025771388499299, "grad_norm": 0.15973122417926788, "learning_rate": 0.002, "loss": 2.5437, "step": 352660 }, { "epoch": 0.7025970610735688, "grad_norm": 0.1518315076828003, "learning_rate": 0.002, "loss": 2.5588, "step": 352670 }, { "epoch": 0.7026169832972077, "grad_norm": 0.15382511913776398, "learning_rate": 0.002, "loss": 2.5516, "step": 352680 }, { "epoch": 0.7026369055208466, "grad_norm": 0.1703038513660431, "learning_rate": 0.002, "loss": 2.5523, "step": 352690 }, { "epoch": 0.7026568277444856, "grad_norm": 0.1489214152097702, "learning_rate": 0.002, "loss": 2.5547, "step": 352700 }, { "epoch": 0.7026767499681245, "grad_norm": 0.14547526836395264, "learning_rate": 0.002, "loss": 2.554, "step": 352710 }, { "epoch": 0.7026966721917633, "grad_norm": 0.16882939636707306, "learning_rate": 0.002, "loss": 2.548, "step": 352720 }, { "epoch": 0.7027165944154022, "grad_norm": 0.1619335114955902, "learning_rate": 0.002, "loss": 2.5562, "step": 352730 }, { "epoch": 0.7027365166390411, "grad_norm": 0.20673350989818573, "learning_rate": 0.002, "loss": 2.5562, "step": 352740 }, { "epoch": 0.7027564388626801, "grad_norm": 0.1515827476978302, "learning_rate": 0.002, "loss": 2.5493, "step": 352750 }, { "epoch": 0.702776361086319, "grad_norm": 0.15410766005516052, "learning_rate": 0.002, "loss": 2.542, "step": 352760 }, { "epoch": 0.7027962833099579, "grad_norm": 0.17537821829319, "learning_rate": 0.002, "loss": 2.5513, "step": 352770 }, { "epoch": 0.7028162055335968, "grad_norm": 0.18467728793621063, "learning_rate": 0.002, "loss": 2.5504, "step": 352780 }, { "epoch": 0.7028361277572358, "grad_norm": 0.1635507196187973, "learning_rate": 0.002, "loss": 2.5549, "step": 352790 }, { "epoch": 0.7028560499808747, "grad_norm": 0.15415330231189728, "learning_rate": 0.002, "loss": 2.5494, "step": 352800 }, { "epoch": 0.7028759722045136, "grad_norm": 0.17214679718017578, "learning_rate": 0.002, "loss": 2.5553, "step": 352810 }, { "epoch": 0.7028958944281525, "grad_norm": 0.16554543375968933, "learning_rate": 0.002, "loss": 2.5473, "step": 352820 }, { "epoch": 0.7029158166517914, "grad_norm": 0.2006424516439438, "learning_rate": 0.002, "loss": 2.5427, "step": 352830 }, { "epoch": 0.7029357388754304, "grad_norm": 0.17600303888320923, "learning_rate": 0.002, "loss": 2.5625, "step": 352840 }, { "epoch": 0.7029556610990693, "grad_norm": 0.16343554854393005, "learning_rate": 0.002, "loss": 2.5604, "step": 352850 }, { "epoch": 0.7029755833227082, "grad_norm": 0.13840758800506592, "learning_rate": 0.002, "loss": 2.5625, "step": 352860 }, { "epoch": 0.702995505546347, "grad_norm": 0.17237962782382965, "learning_rate": 0.002, "loss": 2.5487, "step": 352870 }, { "epoch": 0.7030154277699859, "grad_norm": 0.14432448148727417, "learning_rate": 0.002, "loss": 2.5626, "step": 352880 }, { "epoch": 0.7030353499936249, "grad_norm": 0.17054572701454163, "learning_rate": 0.002, "loss": 2.5702, "step": 352890 }, { "epoch": 0.7030552722172638, "grad_norm": 0.15278752148151398, "learning_rate": 0.002, "loss": 2.5575, "step": 352900 }, { "epoch": 0.7030751944409027, "grad_norm": 0.1786317676305771, "learning_rate": 0.002, "loss": 2.5452, "step": 352910 }, { "epoch": 0.7030951166645416, "grad_norm": 0.15334978699684143, "learning_rate": 0.002, "loss": 2.5699, "step": 352920 }, { "epoch": 0.7031150388881805, "grad_norm": 0.15018197894096375, "learning_rate": 0.002, "loss": 2.5588, "step": 352930 }, { "epoch": 0.7031349611118195, "grad_norm": 0.17870470881462097, "learning_rate": 0.002, "loss": 2.5598, "step": 352940 }, { "epoch": 0.7031548833354584, "grad_norm": 0.16396717727184296, "learning_rate": 0.002, "loss": 2.5484, "step": 352950 }, { "epoch": 0.7031748055590973, "grad_norm": 0.18344183266162872, "learning_rate": 0.002, "loss": 2.5553, "step": 352960 }, { "epoch": 0.7031947277827362, "grad_norm": 0.16446900367736816, "learning_rate": 0.002, "loss": 2.5648, "step": 352970 }, { "epoch": 0.7032146500063751, "grad_norm": 0.14646445214748383, "learning_rate": 0.002, "loss": 2.5356, "step": 352980 }, { "epoch": 0.7032345722300141, "grad_norm": 0.20114253461360931, "learning_rate": 0.002, "loss": 2.5675, "step": 352990 }, { "epoch": 0.703254494453653, "grad_norm": 0.18722601234912872, "learning_rate": 0.002, "loss": 2.5519, "step": 353000 }, { "epoch": 0.7032744166772918, "grad_norm": 0.16132855415344238, "learning_rate": 0.002, "loss": 2.5512, "step": 353010 }, { "epoch": 0.7032943389009307, "grad_norm": 0.14521977305412292, "learning_rate": 0.002, "loss": 2.5599, "step": 353020 }, { "epoch": 0.7033142611245696, "grad_norm": 0.18226781487464905, "learning_rate": 0.002, "loss": 2.5587, "step": 353030 }, { "epoch": 0.7033341833482086, "grad_norm": 0.19413447380065918, "learning_rate": 0.002, "loss": 2.5446, "step": 353040 }, { "epoch": 0.7033541055718475, "grad_norm": 0.14704419672489166, "learning_rate": 0.002, "loss": 2.5523, "step": 353050 }, { "epoch": 0.7033740277954864, "grad_norm": 0.16869543492794037, "learning_rate": 0.002, "loss": 2.5539, "step": 353060 }, { "epoch": 0.7033939500191253, "grad_norm": 0.20887604355812073, "learning_rate": 0.002, "loss": 2.5625, "step": 353070 }, { "epoch": 0.7034138722427642, "grad_norm": 0.16198325157165527, "learning_rate": 0.002, "loss": 2.5405, "step": 353080 }, { "epoch": 0.7034337944664032, "grad_norm": 0.16340219974517822, "learning_rate": 0.002, "loss": 2.5663, "step": 353090 }, { "epoch": 0.7034537166900421, "grad_norm": 0.14201733469963074, "learning_rate": 0.002, "loss": 2.57, "step": 353100 }, { "epoch": 0.703473638913681, "grad_norm": 0.16149108111858368, "learning_rate": 0.002, "loss": 2.5694, "step": 353110 }, { "epoch": 0.7034935611373199, "grad_norm": 0.1772400140762329, "learning_rate": 0.002, "loss": 2.5521, "step": 353120 }, { "epoch": 0.7035134833609589, "grad_norm": 0.15133430063724518, "learning_rate": 0.002, "loss": 2.5659, "step": 353130 }, { "epoch": 0.7035334055845978, "grad_norm": 0.17655359208583832, "learning_rate": 0.002, "loss": 2.5612, "step": 353140 }, { "epoch": 0.7035533278082367, "grad_norm": 0.14635024964809418, "learning_rate": 0.002, "loss": 2.5545, "step": 353150 }, { "epoch": 0.7035732500318755, "grad_norm": 0.16882100701332092, "learning_rate": 0.002, "loss": 2.5563, "step": 353160 }, { "epoch": 0.7035931722555144, "grad_norm": 0.17503929138183594, "learning_rate": 0.002, "loss": 2.5564, "step": 353170 }, { "epoch": 0.7036130944791534, "grad_norm": 0.13846154510974884, "learning_rate": 0.002, "loss": 2.549, "step": 353180 }, { "epoch": 0.7036330167027923, "grad_norm": 0.14894837141036987, "learning_rate": 0.002, "loss": 2.5564, "step": 353190 }, { "epoch": 0.7036529389264312, "grad_norm": 0.18118001520633698, "learning_rate": 0.002, "loss": 2.561, "step": 353200 }, { "epoch": 0.7036728611500701, "grad_norm": 0.14690494537353516, "learning_rate": 0.002, "loss": 2.5669, "step": 353210 }, { "epoch": 0.703692783373709, "grad_norm": 0.1512560248374939, "learning_rate": 0.002, "loss": 2.547, "step": 353220 }, { "epoch": 0.703712705597348, "grad_norm": 0.15800456702709198, "learning_rate": 0.002, "loss": 2.5335, "step": 353230 }, { "epoch": 0.7037326278209869, "grad_norm": 0.16619502007961273, "learning_rate": 0.002, "loss": 2.5649, "step": 353240 }, { "epoch": 0.7037525500446258, "grad_norm": 0.1585816740989685, "learning_rate": 0.002, "loss": 2.5626, "step": 353250 }, { "epoch": 0.7037724722682647, "grad_norm": 0.15871655941009521, "learning_rate": 0.002, "loss": 2.5576, "step": 353260 }, { "epoch": 0.7037923944919036, "grad_norm": 0.20946569740772247, "learning_rate": 0.002, "loss": 2.5567, "step": 353270 }, { "epoch": 0.7038123167155426, "grad_norm": 0.15623198449611664, "learning_rate": 0.002, "loss": 2.5569, "step": 353280 }, { "epoch": 0.7038322389391815, "grad_norm": 0.1686481237411499, "learning_rate": 0.002, "loss": 2.5587, "step": 353290 }, { "epoch": 0.7038521611628203, "grad_norm": 0.1448371857404709, "learning_rate": 0.002, "loss": 2.5372, "step": 353300 }, { "epoch": 0.7038720833864592, "grad_norm": 0.22809937596321106, "learning_rate": 0.002, "loss": 2.5454, "step": 353310 }, { "epoch": 0.7038920056100981, "grad_norm": 0.1880768984556198, "learning_rate": 0.002, "loss": 2.5322, "step": 353320 }, { "epoch": 0.7039119278337371, "grad_norm": 0.16758812963962555, "learning_rate": 0.002, "loss": 2.5494, "step": 353330 }, { "epoch": 0.703931850057376, "grad_norm": 0.1690618246793747, "learning_rate": 0.002, "loss": 2.538, "step": 353340 }, { "epoch": 0.7039517722810149, "grad_norm": 0.1654902845621109, "learning_rate": 0.002, "loss": 2.557, "step": 353350 }, { "epoch": 0.7039716945046538, "grad_norm": 0.15870250761508942, "learning_rate": 0.002, "loss": 2.5596, "step": 353360 }, { "epoch": 0.7039916167282927, "grad_norm": 0.16570593416690826, "learning_rate": 0.002, "loss": 2.5653, "step": 353370 }, { "epoch": 0.7040115389519317, "grad_norm": 0.1626155823469162, "learning_rate": 0.002, "loss": 2.5503, "step": 353380 }, { "epoch": 0.7040314611755706, "grad_norm": 0.15892423689365387, "learning_rate": 0.002, "loss": 2.557, "step": 353390 }, { "epoch": 0.7040513833992095, "grad_norm": 0.2076214998960495, "learning_rate": 0.002, "loss": 2.5497, "step": 353400 }, { "epoch": 0.7040713056228484, "grad_norm": 0.1550489217042923, "learning_rate": 0.002, "loss": 2.5484, "step": 353410 }, { "epoch": 0.7040912278464874, "grad_norm": 0.1900399774312973, "learning_rate": 0.002, "loss": 2.5504, "step": 353420 }, { "epoch": 0.7041111500701263, "grad_norm": 0.18336008489131927, "learning_rate": 0.002, "loss": 2.5623, "step": 353430 }, { "epoch": 0.7041310722937651, "grad_norm": 0.1994425356388092, "learning_rate": 0.002, "loss": 2.5495, "step": 353440 }, { "epoch": 0.704150994517404, "grad_norm": 0.14091014862060547, "learning_rate": 0.002, "loss": 2.5528, "step": 353450 }, { "epoch": 0.7041709167410429, "grad_norm": 0.15058492124080658, "learning_rate": 0.002, "loss": 2.5473, "step": 353460 }, { "epoch": 0.7041908389646819, "grad_norm": 0.16089418530464172, "learning_rate": 0.002, "loss": 2.5334, "step": 353470 }, { "epoch": 0.7042107611883208, "grad_norm": 0.14320804178714752, "learning_rate": 0.002, "loss": 2.5545, "step": 353480 }, { "epoch": 0.7042306834119597, "grad_norm": 0.16538609564304352, "learning_rate": 0.002, "loss": 2.5478, "step": 353490 }, { "epoch": 0.7042506056355986, "grad_norm": 0.16569575667381287, "learning_rate": 0.002, "loss": 2.554, "step": 353500 }, { "epoch": 0.7042705278592375, "grad_norm": 0.1734936386346817, "learning_rate": 0.002, "loss": 2.5634, "step": 353510 }, { "epoch": 0.7042904500828765, "grad_norm": 0.14190059900283813, "learning_rate": 0.002, "loss": 2.5389, "step": 353520 }, { "epoch": 0.7043103723065154, "grad_norm": 0.1773284524679184, "learning_rate": 0.002, "loss": 2.5491, "step": 353530 }, { "epoch": 0.7043302945301543, "grad_norm": 0.16549278795719147, "learning_rate": 0.002, "loss": 2.5526, "step": 353540 }, { "epoch": 0.7043502167537932, "grad_norm": 0.18093839287757874, "learning_rate": 0.002, "loss": 2.5732, "step": 353550 }, { "epoch": 0.7043701389774321, "grad_norm": 0.14925506711006165, "learning_rate": 0.002, "loss": 2.5551, "step": 353560 }, { "epoch": 0.7043900612010711, "grad_norm": 0.17715544998645782, "learning_rate": 0.002, "loss": 2.5738, "step": 353570 }, { "epoch": 0.70440998342471, "grad_norm": 0.16219715774059296, "learning_rate": 0.002, "loss": 2.55, "step": 353580 }, { "epoch": 0.7044299056483488, "grad_norm": 0.15348486602306366, "learning_rate": 0.002, "loss": 2.5449, "step": 353590 }, { "epoch": 0.7044498278719877, "grad_norm": 0.17029598355293274, "learning_rate": 0.002, "loss": 2.5585, "step": 353600 }, { "epoch": 0.7044697500956266, "grad_norm": 0.1981486976146698, "learning_rate": 0.002, "loss": 2.5673, "step": 353610 }, { "epoch": 0.7044896723192656, "grad_norm": 0.155137300491333, "learning_rate": 0.002, "loss": 2.5469, "step": 353620 }, { "epoch": 0.7045095945429045, "grad_norm": 0.19488105177879333, "learning_rate": 0.002, "loss": 2.5593, "step": 353630 }, { "epoch": 0.7045295167665434, "grad_norm": 0.1386948823928833, "learning_rate": 0.002, "loss": 2.5569, "step": 353640 }, { "epoch": 0.7045494389901823, "grad_norm": 0.15713711082935333, "learning_rate": 0.002, "loss": 2.5624, "step": 353650 }, { "epoch": 0.7045693612138212, "grad_norm": 0.18548980355262756, "learning_rate": 0.002, "loss": 2.5585, "step": 353660 }, { "epoch": 0.7045892834374602, "grad_norm": 0.20812785625457764, "learning_rate": 0.002, "loss": 2.5534, "step": 353670 }, { "epoch": 0.7046092056610991, "grad_norm": 0.1448761373758316, "learning_rate": 0.002, "loss": 2.5644, "step": 353680 }, { "epoch": 0.704629127884738, "grad_norm": 0.15075261890888214, "learning_rate": 0.002, "loss": 2.5539, "step": 353690 }, { "epoch": 0.7046490501083769, "grad_norm": 0.16877727210521698, "learning_rate": 0.002, "loss": 2.5437, "step": 353700 }, { "epoch": 0.7046689723320159, "grad_norm": 0.30890893936157227, "learning_rate": 0.002, "loss": 2.5376, "step": 353710 }, { "epoch": 0.7046888945556548, "grad_norm": 0.17361225187778473, "learning_rate": 0.002, "loss": 2.5425, "step": 353720 }, { "epoch": 0.7047088167792936, "grad_norm": 0.18938365578651428, "learning_rate": 0.002, "loss": 2.558, "step": 353730 }, { "epoch": 0.7047287390029325, "grad_norm": 0.146750807762146, "learning_rate": 0.002, "loss": 2.564, "step": 353740 }, { "epoch": 0.7047486612265714, "grad_norm": 0.14805658161640167, "learning_rate": 0.002, "loss": 2.5587, "step": 353750 }, { "epoch": 0.7047685834502104, "grad_norm": 0.18630890548229218, "learning_rate": 0.002, "loss": 2.5726, "step": 353760 }, { "epoch": 0.7047885056738493, "grad_norm": 0.19143997132778168, "learning_rate": 0.002, "loss": 2.5437, "step": 353770 }, { "epoch": 0.7048084278974882, "grad_norm": 0.17479825019836426, "learning_rate": 0.002, "loss": 2.5515, "step": 353780 }, { "epoch": 0.7048283501211271, "grad_norm": 0.22331207990646362, "learning_rate": 0.002, "loss": 2.5599, "step": 353790 }, { "epoch": 0.704848272344766, "grad_norm": 0.1662723869085312, "learning_rate": 0.002, "loss": 2.563, "step": 353800 }, { "epoch": 0.704868194568405, "grad_norm": 0.1660783588886261, "learning_rate": 0.002, "loss": 2.5686, "step": 353810 }, { "epoch": 0.7048881167920439, "grad_norm": 0.16630707681179047, "learning_rate": 0.002, "loss": 2.559, "step": 353820 }, { "epoch": 0.7049080390156828, "grad_norm": 0.16231466829776764, "learning_rate": 0.002, "loss": 2.5591, "step": 353830 }, { "epoch": 0.7049279612393217, "grad_norm": 0.15883533656597137, "learning_rate": 0.002, "loss": 2.5751, "step": 353840 }, { "epoch": 0.7049478834629606, "grad_norm": 0.16324344277381897, "learning_rate": 0.002, "loss": 2.5606, "step": 353850 }, { "epoch": 0.7049678056865996, "grad_norm": 0.1510259509086609, "learning_rate": 0.002, "loss": 2.5608, "step": 353860 }, { "epoch": 0.7049877279102384, "grad_norm": 0.17169377207756042, "learning_rate": 0.002, "loss": 2.5646, "step": 353870 }, { "epoch": 0.7050076501338773, "grad_norm": 0.16896702349185944, "learning_rate": 0.002, "loss": 2.564, "step": 353880 }, { "epoch": 0.7050275723575162, "grad_norm": 0.15219657123088837, "learning_rate": 0.002, "loss": 2.5532, "step": 353890 }, { "epoch": 0.7050474945811551, "grad_norm": 0.163397416472435, "learning_rate": 0.002, "loss": 2.5675, "step": 353900 }, { "epoch": 0.7050674168047941, "grad_norm": 0.19412948191165924, "learning_rate": 0.002, "loss": 2.546, "step": 353910 }, { "epoch": 0.705087339028433, "grad_norm": 0.17613370716571808, "learning_rate": 0.002, "loss": 2.5609, "step": 353920 }, { "epoch": 0.7051072612520719, "grad_norm": 0.15318188071250916, "learning_rate": 0.002, "loss": 2.5593, "step": 353930 }, { "epoch": 0.7051271834757108, "grad_norm": 0.1684877574443817, "learning_rate": 0.002, "loss": 2.5565, "step": 353940 }, { "epoch": 0.7051471056993497, "grad_norm": 0.15498663485050201, "learning_rate": 0.002, "loss": 2.5408, "step": 353950 }, { "epoch": 0.7051670279229887, "grad_norm": 0.18227843940258026, "learning_rate": 0.002, "loss": 2.5587, "step": 353960 }, { "epoch": 0.7051869501466276, "grad_norm": 0.1521657407283783, "learning_rate": 0.002, "loss": 2.551, "step": 353970 }, { "epoch": 0.7052068723702665, "grad_norm": 0.1637151688337326, "learning_rate": 0.002, "loss": 2.5599, "step": 353980 }, { "epoch": 0.7052267945939054, "grad_norm": 0.17707425355911255, "learning_rate": 0.002, "loss": 2.5686, "step": 353990 }, { "epoch": 0.7052467168175444, "grad_norm": 0.17090879380702972, "learning_rate": 0.002, "loss": 2.5484, "step": 354000 }, { "epoch": 0.7052666390411833, "grad_norm": 0.17591704428195953, "learning_rate": 0.002, "loss": 2.5604, "step": 354010 }, { "epoch": 0.7052865612648221, "grad_norm": 0.1739719957113266, "learning_rate": 0.002, "loss": 2.554, "step": 354020 }, { "epoch": 0.705306483488461, "grad_norm": 0.16477622091770172, "learning_rate": 0.002, "loss": 2.5552, "step": 354030 }, { "epoch": 0.7053264057120999, "grad_norm": 0.16374613344669342, "learning_rate": 0.002, "loss": 2.5618, "step": 354040 }, { "epoch": 0.7053463279357389, "grad_norm": 0.179876908659935, "learning_rate": 0.002, "loss": 2.5331, "step": 354050 }, { "epoch": 0.7053662501593778, "grad_norm": 0.15977632999420166, "learning_rate": 0.002, "loss": 2.566, "step": 354060 }, { "epoch": 0.7053861723830167, "grad_norm": 0.16728007793426514, "learning_rate": 0.002, "loss": 2.5546, "step": 354070 }, { "epoch": 0.7054060946066556, "grad_norm": 0.19360122084617615, "learning_rate": 0.002, "loss": 2.5645, "step": 354080 }, { "epoch": 0.7054260168302945, "grad_norm": 0.162397563457489, "learning_rate": 0.002, "loss": 2.5428, "step": 354090 }, { "epoch": 0.7054459390539335, "grad_norm": 0.14897967875003815, "learning_rate": 0.002, "loss": 2.5469, "step": 354100 }, { "epoch": 0.7054658612775724, "grad_norm": 0.1719750463962555, "learning_rate": 0.002, "loss": 2.5526, "step": 354110 }, { "epoch": 0.7054857835012113, "grad_norm": 0.20426346361637115, "learning_rate": 0.002, "loss": 2.55, "step": 354120 }, { "epoch": 0.7055057057248502, "grad_norm": 0.16849231719970703, "learning_rate": 0.002, "loss": 2.5648, "step": 354130 }, { "epoch": 0.705525627948489, "grad_norm": 0.18244719505310059, "learning_rate": 0.002, "loss": 2.5539, "step": 354140 }, { "epoch": 0.705545550172128, "grad_norm": 0.15699951350688934, "learning_rate": 0.002, "loss": 2.5623, "step": 354150 }, { "epoch": 0.705565472395767, "grad_norm": 0.14508916437625885, "learning_rate": 0.002, "loss": 2.5587, "step": 354160 }, { "epoch": 0.7055853946194058, "grad_norm": 0.15810821950435638, "learning_rate": 0.002, "loss": 2.5638, "step": 354170 }, { "epoch": 0.7056053168430447, "grad_norm": 0.1944848895072937, "learning_rate": 0.002, "loss": 2.5525, "step": 354180 }, { "epoch": 0.7056252390666836, "grad_norm": 0.1529632955789566, "learning_rate": 0.002, "loss": 2.554, "step": 354190 }, { "epoch": 0.7056451612903226, "grad_norm": 0.1602572798728943, "learning_rate": 0.002, "loss": 2.558, "step": 354200 }, { "epoch": 0.7056650835139615, "grad_norm": 0.17830456793308258, "learning_rate": 0.002, "loss": 2.5497, "step": 354210 }, { "epoch": 0.7056850057376004, "grad_norm": 0.17801691591739655, "learning_rate": 0.002, "loss": 2.5494, "step": 354220 }, { "epoch": 0.7057049279612393, "grad_norm": 0.17532621324062347, "learning_rate": 0.002, "loss": 2.5624, "step": 354230 }, { "epoch": 0.7057248501848782, "grad_norm": 0.17369438707828522, "learning_rate": 0.002, "loss": 2.5396, "step": 354240 }, { "epoch": 0.7057447724085172, "grad_norm": 0.15445342659950256, "learning_rate": 0.002, "loss": 2.5597, "step": 354250 }, { "epoch": 0.7057646946321561, "grad_norm": 0.227538600564003, "learning_rate": 0.002, "loss": 2.5447, "step": 354260 }, { "epoch": 0.705784616855795, "grad_norm": 0.16812250018119812, "learning_rate": 0.002, "loss": 2.5669, "step": 354270 }, { "epoch": 0.7058045390794339, "grad_norm": 0.1533202826976776, "learning_rate": 0.002, "loss": 2.5555, "step": 354280 }, { "epoch": 0.7058244613030729, "grad_norm": 0.15243613719940186, "learning_rate": 0.002, "loss": 2.5548, "step": 354290 }, { "epoch": 0.7058443835267117, "grad_norm": 0.20311498641967773, "learning_rate": 0.002, "loss": 2.5675, "step": 354300 }, { "epoch": 0.7058643057503506, "grad_norm": 0.16443032026290894, "learning_rate": 0.002, "loss": 2.551, "step": 354310 }, { "epoch": 0.7058842279739895, "grad_norm": 0.15005241334438324, "learning_rate": 0.002, "loss": 2.5594, "step": 354320 }, { "epoch": 0.7059041501976284, "grad_norm": 0.18759186565876007, "learning_rate": 0.002, "loss": 2.547, "step": 354330 }, { "epoch": 0.7059240724212674, "grad_norm": 0.1568450629711151, "learning_rate": 0.002, "loss": 2.545, "step": 354340 }, { "epoch": 0.7059439946449063, "grad_norm": 0.16427123546600342, "learning_rate": 0.002, "loss": 2.5556, "step": 354350 }, { "epoch": 0.7059639168685452, "grad_norm": 0.19473496079444885, "learning_rate": 0.002, "loss": 2.5544, "step": 354360 }, { "epoch": 0.7059838390921841, "grad_norm": 0.16217416524887085, "learning_rate": 0.002, "loss": 2.5613, "step": 354370 }, { "epoch": 0.706003761315823, "grad_norm": 0.17305268347263336, "learning_rate": 0.002, "loss": 2.5621, "step": 354380 }, { "epoch": 0.706023683539462, "grad_norm": 0.138584703207016, "learning_rate": 0.002, "loss": 2.5654, "step": 354390 }, { "epoch": 0.7060436057631009, "grad_norm": 0.18129310011863708, "learning_rate": 0.002, "loss": 2.5519, "step": 354400 }, { "epoch": 0.7060635279867398, "grad_norm": 0.160623699426651, "learning_rate": 0.002, "loss": 2.5465, "step": 354410 }, { "epoch": 0.7060834502103787, "grad_norm": 0.14236146211624146, "learning_rate": 0.002, "loss": 2.5574, "step": 354420 }, { "epoch": 0.7061033724340176, "grad_norm": 0.1648155301809311, "learning_rate": 0.002, "loss": 2.552, "step": 354430 }, { "epoch": 0.7061232946576566, "grad_norm": 0.16296152770519257, "learning_rate": 0.002, "loss": 2.5514, "step": 354440 }, { "epoch": 0.7061432168812954, "grad_norm": 0.1504903882741928, "learning_rate": 0.002, "loss": 2.5529, "step": 354450 }, { "epoch": 0.7061631391049343, "grad_norm": 0.16708676517009735, "learning_rate": 0.002, "loss": 2.5678, "step": 354460 }, { "epoch": 0.7061830613285732, "grad_norm": 0.16481128334999084, "learning_rate": 0.002, "loss": 2.5391, "step": 354470 }, { "epoch": 0.7062029835522121, "grad_norm": 0.1484825611114502, "learning_rate": 0.002, "loss": 2.5515, "step": 354480 }, { "epoch": 0.7062229057758511, "grad_norm": 0.19223062694072723, "learning_rate": 0.002, "loss": 2.5607, "step": 354490 }, { "epoch": 0.70624282799949, "grad_norm": 0.15786033868789673, "learning_rate": 0.002, "loss": 2.5491, "step": 354500 }, { "epoch": 0.7062627502231289, "grad_norm": 0.17909851670265198, "learning_rate": 0.002, "loss": 2.5441, "step": 354510 }, { "epoch": 0.7062826724467678, "grad_norm": 0.16384382545948029, "learning_rate": 0.002, "loss": 2.5641, "step": 354520 }, { "epoch": 0.7063025946704067, "grad_norm": 0.18422919511795044, "learning_rate": 0.002, "loss": 2.5658, "step": 354530 }, { "epoch": 0.7063225168940457, "grad_norm": 0.17059029638767242, "learning_rate": 0.002, "loss": 2.5752, "step": 354540 }, { "epoch": 0.7063424391176846, "grad_norm": 0.16635653376579285, "learning_rate": 0.002, "loss": 2.5745, "step": 354550 }, { "epoch": 0.7063623613413235, "grad_norm": 0.14467892050743103, "learning_rate": 0.002, "loss": 2.5504, "step": 354560 }, { "epoch": 0.7063822835649624, "grad_norm": 0.19944490492343903, "learning_rate": 0.002, "loss": 2.5674, "step": 354570 }, { "epoch": 0.7064022057886012, "grad_norm": 0.18019194900989532, "learning_rate": 0.002, "loss": 2.5512, "step": 354580 }, { "epoch": 0.7064221280122402, "grad_norm": 0.16167818009853363, "learning_rate": 0.002, "loss": 2.5444, "step": 354590 }, { "epoch": 0.7064420502358791, "grad_norm": 0.16884294152259827, "learning_rate": 0.002, "loss": 2.5539, "step": 354600 }, { "epoch": 0.706461972459518, "grad_norm": 0.1911555677652359, "learning_rate": 0.002, "loss": 2.5594, "step": 354610 }, { "epoch": 0.7064818946831569, "grad_norm": 0.15789249539375305, "learning_rate": 0.002, "loss": 2.549, "step": 354620 }, { "epoch": 0.7065018169067959, "grad_norm": 0.17543500661849976, "learning_rate": 0.002, "loss": 2.565, "step": 354630 }, { "epoch": 0.7065217391304348, "grad_norm": 0.1956559270620346, "learning_rate": 0.002, "loss": 2.5661, "step": 354640 }, { "epoch": 0.7065416613540737, "grad_norm": 0.1649208813905716, "learning_rate": 0.002, "loss": 2.5607, "step": 354650 }, { "epoch": 0.7065615835777126, "grad_norm": 0.14824365079402924, "learning_rate": 0.002, "loss": 2.5533, "step": 354660 }, { "epoch": 0.7065815058013515, "grad_norm": 0.15825943648815155, "learning_rate": 0.002, "loss": 2.5429, "step": 354670 }, { "epoch": 0.7066014280249905, "grad_norm": 0.1559411883354187, "learning_rate": 0.002, "loss": 2.5434, "step": 354680 }, { "epoch": 0.7066213502486294, "grad_norm": 0.16755475103855133, "learning_rate": 0.002, "loss": 2.5521, "step": 354690 }, { "epoch": 0.7066412724722683, "grad_norm": 0.164476677775383, "learning_rate": 0.002, "loss": 2.5477, "step": 354700 }, { "epoch": 0.7066611946959072, "grad_norm": 0.15167520940303802, "learning_rate": 0.002, "loss": 2.5796, "step": 354710 }, { "epoch": 0.706681116919546, "grad_norm": 0.15590476989746094, "learning_rate": 0.002, "loss": 2.553, "step": 354720 }, { "epoch": 0.706701039143185, "grad_norm": 0.151667058467865, "learning_rate": 0.002, "loss": 2.5508, "step": 354730 }, { "epoch": 0.7067209613668239, "grad_norm": 0.18832598626613617, "learning_rate": 0.002, "loss": 2.5556, "step": 354740 }, { "epoch": 0.7067408835904628, "grad_norm": 0.14822542667388916, "learning_rate": 0.002, "loss": 2.5574, "step": 354750 }, { "epoch": 0.7067608058141017, "grad_norm": 0.16292159259319305, "learning_rate": 0.002, "loss": 2.5259, "step": 354760 }, { "epoch": 0.7067807280377406, "grad_norm": 0.14652325212955475, "learning_rate": 0.002, "loss": 2.5323, "step": 354770 }, { "epoch": 0.7068006502613796, "grad_norm": 0.15085402131080627, "learning_rate": 0.002, "loss": 2.5496, "step": 354780 }, { "epoch": 0.7068205724850185, "grad_norm": 0.1536671370267868, "learning_rate": 0.002, "loss": 2.5673, "step": 354790 }, { "epoch": 0.7068404947086574, "grad_norm": 0.14408567547798157, "learning_rate": 0.002, "loss": 2.5549, "step": 354800 }, { "epoch": 0.7068604169322963, "grad_norm": 0.2399265319108963, "learning_rate": 0.002, "loss": 2.5396, "step": 354810 }, { "epoch": 0.7068803391559352, "grad_norm": 0.1534164845943451, "learning_rate": 0.002, "loss": 2.5547, "step": 354820 }, { "epoch": 0.7069002613795742, "grad_norm": 0.16558417677879333, "learning_rate": 0.002, "loss": 2.5628, "step": 354830 }, { "epoch": 0.7069201836032131, "grad_norm": 0.1716783195734024, "learning_rate": 0.002, "loss": 2.5646, "step": 354840 }, { "epoch": 0.706940105826852, "grad_norm": 0.13168880343437195, "learning_rate": 0.002, "loss": 2.5491, "step": 354850 }, { "epoch": 0.7069600280504909, "grad_norm": 0.1587284952402115, "learning_rate": 0.002, "loss": 2.5506, "step": 354860 }, { "epoch": 0.7069799502741297, "grad_norm": 0.1507638841867447, "learning_rate": 0.002, "loss": 2.5525, "step": 354870 }, { "epoch": 0.7069998724977687, "grad_norm": 0.15862546861171722, "learning_rate": 0.002, "loss": 2.5611, "step": 354880 }, { "epoch": 0.7070197947214076, "grad_norm": 0.14044737815856934, "learning_rate": 0.002, "loss": 2.5449, "step": 354890 }, { "epoch": 0.7070397169450465, "grad_norm": 0.20006270706653595, "learning_rate": 0.002, "loss": 2.5566, "step": 354900 }, { "epoch": 0.7070596391686854, "grad_norm": 0.16270983219146729, "learning_rate": 0.002, "loss": 2.563, "step": 354910 }, { "epoch": 0.7070795613923244, "grad_norm": 0.18582135438919067, "learning_rate": 0.002, "loss": 2.5714, "step": 354920 }, { "epoch": 0.7070994836159633, "grad_norm": 0.1552537977695465, "learning_rate": 0.002, "loss": 2.5626, "step": 354930 }, { "epoch": 0.7071194058396022, "grad_norm": 0.13379745185375214, "learning_rate": 0.002, "loss": 2.5681, "step": 354940 }, { "epoch": 0.7071393280632411, "grad_norm": 0.16950610280036926, "learning_rate": 0.002, "loss": 2.5582, "step": 354950 }, { "epoch": 0.70715925028688, "grad_norm": 0.19006583094596863, "learning_rate": 0.002, "loss": 2.5498, "step": 354960 }, { "epoch": 0.707179172510519, "grad_norm": 0.17035247385501862, "learning_rate": 0.002, "loss": 2.5545, "step": 354970 }, { "epoch": 0.7071990947341579, "grad_norm": 0.17920753359794617, "learning_rate": 0.002, "loss": 2.5696, "step": 354980 }, { "epoch": 0.7072190169577968, "grad_norm": 0.18829986453056335, "learning_rate": 0.002, "loss": 2.5545, "step": 354990 }, { "epoch": 0.7072389391814357, "grad_norm": 0.15563026070594788, "learning_rate": 0.002, "loss": 2.5603, "step": 355000 }, { "epoch": 0.7072588614050745, "grad_norm": 0.17717602849006653, "learning_rate": 0.002, "loss": 2.5379, "step": 355010 }, { "epoch": 0.7072787836287135, "grad_norm": 0.17092058062553406, "learning_rate": 0.002, "loss": 2.5612, "step": 355020 }, { "epoch": 0.7072987058523524, "grad_norm": 0.17526032030582428, "learning_rate": 0.002, "loss": 2.5593, "step": 355030 }, { "epoch": 0.7073186280759913, "grad_norm": 0.17980200052261353, "learning_rate": 0.002, "loss": 2.5603, "step": 355040 }, { "epoch": 0.7073385502996302, "grad_norm": 0.1869756132364273, "learning_rate": 0.002, "loss": 2.5458, "step": 355050 }, { "epoch": 0.7073584725232691, "grad_norm": 0.15785834193229675, "learning_rate": 0.002, "loss": 2.5498, "step": 355060 }, { "epoch": 0.7073783947469081, "grad_norm": 0.16392013430595398, "learning_rate": 0.002, "loss": 2.5678, "step": 355070 }, { "epoch": 0.707398316970547, "grad_norm": 0.19355721771717072, "learning_rate": 0.002, "loss": 2.5742, "step": 355080 }, { "epoch": 0.7074182391941859, "grad_norm": 0.15545886754989624, "learning_rate": 0.002, "loss": 2.5515, "step": 355090 }, { "epoch": 0.7074381614178248, "grad_norm": 0.15775100886821747, "learning_rate": 0.002, "loss": 2.5644, "step": 355100 }, { "epoch": 0.7074580836414637, "grad_norm": 0.15286533534526825, "learning_rate": 0.002, "loss": 2.5564, "step": 355110 }, { "epoch": 0.7074780058651027, "grad_norm": 0.21659685671329498, "learning_rate": 0.002, "loss": 2.5614, "step": 355120 }, { "epoch": 0.7074979280887416, "grad_norm": 0.1471935659646988, "learning_rate": 0.002, "loss": 2.5562, "step": 355130 }, { "epoch": 0.7075178503123805, "grad_norm": 0.14043760299682617, "learning_rate": 0.002, "loss": 2.5621, "step": 355140 }, { "epoch": 0.7075377725360193, "grad_norm": 0.16756966710090637, "learning_rate": 0.002, "loss": 2.5519, "step": 355150 }, { "epoch": 0.7075576947596582, "grad_norm": 0.14905883371829987, "learning_rate": 0.002, "loss": 2.5491, "step": 355160 }, { "epoch": 0.7075776169832972, "grad_norm": 0.1515073925256729, "learning_rate": 0.002, "loss": 2.5603, "step": 355170 }, { "epoch": 0.7075975392069361, "grad_norm": 0.1706787347793579, "learning_rate": 0.002, "loss": 2.5722, "step": 355180 }, { "epoch": 0.707617461430575, "grad_norm": 0.13666300475597382, "learning_rate": 0.002, "loss": 2.5476, "step": 355190 }, { "epoch": 0.7076373836542139, "grad_norm": 0.20024743676185608, "learning_rate": 0.002, "loss": 2.552, "step": 355200 }, { "epoch": 0.7076573058778529, "grad_norm": 0.1795138716697693, "learning_rate": 0.002, "loss": 2.5554, "step": 355210 }, { "epoch": 0.7076772281014918, "grad_norm": 0.14039136469364166, "learning_rate": 0.002, "loss": 2.5516, "step": 355220 }, { "epoch": 0.7076971503251307, "grad_norm": 0.18455006182193756, "learning_rate": 0.002, "loss": 2.5491, "step": 355230 }, { "epoch": 0.7077170725487696, "grad_norm": 0.17771829664707184, "learning_rate": 0.002, "loss": 2.5603, "step": 355240 }, { "epoch": 0.7077369947724085, "grad_norm": 0.19150690734386444, "learning_rate": 0.002, "loss": 2.5558, "step": 355250 }, { "epoch": 0.7077569169960475, "grad_norm": 0.17173048853874207, "learning_rate": 0.002, "loss": 2.5514, "step": 355260 }, { "epoch": 0.7077768392196864, "grad_norm": 0.14734283089637756, "learning_rate": 0.002, "loss": 2.5495, "step": 355270 }, { "epoch": 0.7077967614433253, "grad_norm": 0.1935788094997406, "learning_rate": 0.002, "loss": 2.5627, "step": 355280 }, { "epoch": 0.7078166836669642, "grad_norm": 0.14689287543296814, "learning_rate": 0.002, "loss": 2.5494, "step": 355290 }, { "epoch": 0.707836605890603, "grad_norm": 0.17491622269153595, "learning_rate": 0.002, "loss": 2.5527, "step": 355300 }, { "epoch": 0.707856528114242, "grad_norm": 0.16483542323112488, "learning_rate": 0.002, "loss": 2.5528, "step": 355310 }, { "epoch": 0.7078764503378809, "grad_norm": 0.18419061601161957, "learning_rate": 0.002, "loss": 2.5678, "step": 355320 }, { "epoch": 0.7078963725615198, "grad_norm": 0.1694309264421463, "learning_rate": 0.002, "loss": 2.5339, "step": 355330 }, { "epoch": 0.7079162947851587, "grad_norm": 0.15864427387714386, "learning_rate": 0.002, "loss": 2.5523, "step": 355340 }, { "epoch": 0.7079362170087976, "grad_norm": 0.1552717089653015, "learning_rate": 0.002, "loss": 2.5532, "step": 355350 }, { "epoch": 0.7079561392324366, "grad_norm": 0.165871262550354, "learning_rate": 0.002, "loss": 2.5638, "step": 355360 }, { "epoch": 0.7079760614560755, "grad_norm": 0.16094756126403809, "learning_rate": 0.002, "loss": 2.5554, "step": 355370 }, { "epoch": 0.7079959836797144, "grad_norm": 0.17379532754421234, "learning_rate": 0.002, "loss": 2.551, "step": 355380 }, { "epoch": 0.7080159059033533, "grad_norm": 0.15874697268009186, "learning_rate": 0.002, "loss": 2.5684, "step": 355390 }, { "epoch": 0.7080358281269922, "grad_norm": 0.15731625258922577, "learning_rate": 0.002, "loss": 2.5452, "step": 355400 }, { "epoch": 0.7080557503506312, "grad_norm": 0.15533143281936646, "learning_rate": 0.002, "loss": 2.554, "step": 355410 }, { "epoch": 0.7080756725742701, "grad_norm": 0.17326460778713226, "learning_rate": 0.002, "loss": 2.561, "step": 355420 }, { "epoch": 0.708095594797909, "grad_norm": 0.1408829540014267, "learning_rate": 0.002, "loss": 2.5447, "step": 355430 }, { "epoch": 0.7081155170215478, "grad_norm": 0.1736825406551361, "learning_rate": 0.002, "loss": 2.552, "step": 355440 }, { "epoch": 0.7081354392451867, "grad_norm": 0.18487732112407684, "learning_rate": 0.002, "loss": 2.5464, "step": 355450 }, { "epoch": 0.7081553614688257, "grad_norm": 0.15433204174041748, "learning_rate": 0.002, "loss": 2.5531, "step": 355460 }, { "epoch": 0.7081752836924646, "grad_norm": 0.16250303387641907, "learning_rate": 0.002, "loss": 2.5709, "step": 355470 }, { "epoch": 0.7081952059161035, "grad_norm": 0.16826514899730682, "learning_rate": 0.002, "loss": 2.561, "step": 355480 }, { "epoch": 0.7082151281397424, "grad_norm": 0.1543385237455368, "learning_rate": 0.002, "loss": 2.5402, "step": 355490 }, { "epoch": 0.7082350503633814, "grad_norm": 0.19128619134426117, "learning_rate": 0.002, "loss": 2.5358, "step": 355500 }, { "epoch": 0.7082549725870203, "grad_norm": 0.1477750986814499, "learning_rate": 0.002, "loss": 2.5684, "step": 355510 }, { "epoch": 0.7082748948106592, "grad_norm": 0.1820080578327179, "learning_rate": 0.002, "loss": 2.5516, "step": 355520 }, { "epoch": 0.7082948170342981, "grad_norm": 0.15818189084529877, "learning_rate": 0.002, "loss": 2.5482, "step": 355530 }, { "epoch": 0.708314739257937, "grad_norm": 0.15818288922309875, "learning_rate": 0.002, "loss": 2.5591, "step": 355540 }, { "epoch": 0.708334661481576, "grad_norm": 0.1843128651380539, "learning_rate": 0.002, "loss": 2.5623, "step": 355550 }, { "epoch": 0.7083545837052149, "grad_norm": 0.1701258271932602, "learning_rate": 0.002, "loss": 2.5576, "step": 355560 }, { "epoch": 0.7083745059288538, "grad_norm": 0.1535477340221405, "learning_rate": 0.002, "loss": 2.5636, "step": 355570 }, { "epoch": 0.7083944281524927, "grad_norm": 0.17574404180049896, "learning_rate": 0.002, "loss": 2.5538, "step": 355580 }, { "epoch": 0.7084143503761315, "grad_norm": 0.16869132220745087, "learning_rate": 0.002, "loss": 2.5486, "step": 355590 }, { "epoch": 0.7084342725997705, "grad_norm": 0.14771351218223572, "learning_rate": 0.002, "loss": 2.5636, "step": 355600 }, { "epoch": 0.7084541948234094, "grad_norm": 0.15972323715686798, "learning_rate": 0.002, "loss": 2.5565, "step": 355610 }, { "epoch": 0.7084741170470483, "grad_norm": 0.1574072241783142, "learning_rate": 0.002, "loss": 2.5652, "step": 355620 }, { "epoch": 0.7084940392706872, "grad_norm": 0.1424369364976883, "learning_rate": 0.002, "loss": 2.5545, "step": 355630 }, { "epoch": 0.7085139614943261, "grad_norm": 0.21666477620601654, "learning_rate": 0.002, "loss": 2.5574, "step": 355640 }, { "epoch": 0.7085338837179651, "grad_norm": 0.17707066237926483, "learning_rate": 0.002, "loss": 2.5527, "step": 355650 }, { "epoch": 0.708553805941604, "grad_norm": 0.16556723415851593, "learning_rate": 0.002, "loss": 2.5697, "step": 355660 }, { "epoch": 0.7085737281652429, "grad_norm": 0.15262408554553986, "learning_rate": 0.002, "loss": 2.5347, "step": 355670 }, { "epoch": 0.7085936503888818, "grad_norm": 0.13848212361335754, "learning_rate": 0.002, "loss": 2.5568, "step": 355680 }, { "epoch": 0.7086135726125207, "grad_norm": 0.2017737329006195, "learning_rate": 0.002, "loss": 2.5661, "step": 355690 }, { "epoch": 0.7086334948361597, "grad_norm": 0.17192046344280243, "learning_rate": 0.002, "loss": 2.5732, "step": 355700 }, { "epoch": 0.7086534170597986, "grad_norm": 0.1513616144657135, "learning_rate": 0.002, "loss": 2.5759, "step": 355710 }, { "epoch": 0.7086733392834375, "grad_norm": 0.18683172762393951, "learning_rate": 0.002, "loss": 2.5601, "step": 355720 }, { "epoch": 0.7086932615070763, "grad_norm": 0.1956353783607483, "learning_rate": 0.002, "loss": 2.5421, "step": 355730 }, { "epoch": 0.7087131837307152, "grad_norm": 0.16692675650119781, "learning_rate": 0.002, "loss": 2.5588, "step": 355740 }, { "epoch": 0.7087331059543542, "grad_norm": 0.19734948873519897, "learning_rate": 0.002, "loss": 2.5331, "step": 355750 }, { "epoch": 0.7087530281779931, "grad_norm": 0.16803376376628876, "learning_rate": 0.002, "loss": 2.5634, "step": 355760 }, { "epoch": 0.708772950401632, "grad_norm": 0.14521005749702454, "learning_rate": 0.002, "loss": 2.5597, "step": 355770 }, { "epoch": 0.7087928726252709, "grad_norm": 0.1802109181880951, "learning_rate": 0.002, "loss": 2.5627, "step": 355780 }, { "epoch": 0.7088127948489099, "grad_norm": 0.15236124396324158, "learning_rate": 0.002, "loss": 2.5523, "step": 355790 }, { "epoch": 0.7088327170725488, "grad_norm": 0.16497227549552917, "learning_rate": 0.002, "loss": 2.5735, "step": 355800 }, { "epoch": 0.7088526392961877, "grad_norm": 0.1439979523420334, "learning_rate": 0.002, "loss": 2.5478, "step": 355810 }, { "epoch": 0.7088725615198266, "grad_norm": 0.16157938539981842, "learning_rate": 0.002, "loss": 2.5462, "step": 355820 }, { "epoch": 0.7088924837434655, "grad_norm": 0.1629021018743515, "learning_rate": 0.002, "loss": 2.551, "step": 355830 }, { "epoch": 0.7089124059671045, "grad_norm": 0.138754203915596, "learning_rate": 0.002, "loss": 2.5469, "step": 355840 }, { "epoch": 0.7089323281907434, "grad_norm": 0.15694788098335266, "learning_rate": 0.002, "loss": 2.5549, "step": 355850 }, { "epoch": 0.7089522504143823, "grad_norm": 0.16101938486099243, "learning_rate": 0.002, "loss": 2.5514, "step": 355860 }, { "epoch": 0.7089721726380211, "grad_norm": 0.16132377088069916, "learning_rate": 0.002, "loss": 2.5552, "step": 355870 }, { "epoch": 0.70899209486166, "grad_norm": 0.15887464582920074, "learning_rate": 0.002, "loss": 2.5453, "step": 355880 }, { "epoch": 0.709012017085299, "grad_norm": 0.1742851287126541, "learning_rate": 0.002, "loss": 2.5546, "step": 355890 }, { "epoch": 0.7090319393089379, "grad_norm": 0.14630939066410065, "learning_rate": 0.002, "loss": 2.5598, "step": 355900 }, { "epoch": 0.7090518615325768, "grad_norm": 0.21880310773849487, "learning_rate": 0.002, "loss": 2.5367, "step": 355910 }, { "epoch": 0.7090717837562157, "grad_norm": 0.17092148959636688, "learning_rate": 0.002, "loss": 2.5651, "step": 355920 }, { "epoch": 0.7090917059798546, "grad_norm": 0.17678441107273102, "learning_rate": 0.002, "loss": 2.5677, "step": 355930 }, { "epoch": 0.7091116282034936, "grad_norm": 0.16514655947685242, "learning_rate": 0.002, "loss": 2.5477, "step": 355940 }, { "epoch": 0.7091315504271325, "grad_norm": 0.14818857610225677, "learning_rate": 0.002, "loss": 2.552, "step": 355950 }, { "epoch": 0.7091514726507714, "grad_norm": 0.15409857034683228, "learning_rate": 0.002, "loss": 2.5516, "step": 355960 }, { "epoch": 0.7091713948744103, "grad_norm": 0.17695635557174683, "learning_rate": 0.002, "loss": 2.5779, "step": 355970 }, { "epoch": 0.7091913170980492, "grad_norm": 0.1546722799539566, "learning_rate": 0.002, "loss": 2.5553, "step": 355980 }, { "epoch": 0.7092112393216882, "grad_norm": 0.15090788900852203, "learning_rate": 0.002, "loss": 2.5461, "step": 355990 }, { "epoch": 0.7092311615453271, "grad_norm": 0.2116939276456833, "learning_rate": 0.002, "loss": 2.5626, "step": 356000 }, { "epoch": 0.709251083768966, "grad_norm": 0.15091589093208313, "learning_rate": 0.002, "loss": 2.5512, "step": 356010 }, { "epoch": 0.7092710059926048, "grad_norm": 0.18889300525188446, "learning_rate": 0.002, "loss": 2.5505, "step": 356020 }, { "epoch": 0.7092909282162437, "grad_norm": 0.20717556774616241, "learning_rate": 0.002, "loss": 2.5534, "step": 356030 }, { "epoch": 0.7093108504398827, "grad_norm": 0.14835812151432037, "learning_rate": 0.002, "loss": 2.5304, "step": 356040 }, { "epoch": 0.7093307726635216, "grad_norm": 0.1729315221309662, "learning_rate": 0.002, "loss": 2.5476, "step": 356050 }, { "epoch": 0.7093506948871605, "grad_norm": 0.18758977949619293, "learning_rate": 0.002, "loss": 2.5625, "step": 356060 }, { "epoch": 0.7093706171107994, "grad_norm": 0.17761805653572083, "learning_rate": 0.002, "loss": 2.5588, "step": 356070 }, { "epoch": 0.7093905393344384, "grad_norm": 0.17707379162311554, "learning_rate": 0.002, "loss": 2.5307, "step": 356080 }, { "epoch": 0.7094104615580773, "grad_norm": 0.16551542282104492, "learning_rate": 0.002, "loss": 2.5424, "step": 356090 }, { "epoch": 0.7094303837817162, "grad_norm": 0.15890322625637054, "learning_rate": 0.002, "loss": 2.5613, "step": 356100 }, { "epoch": 0.7094503060053551, "grad_norm": 0.1756158322095871, "learning_rate": 0.002, "loss": 2.5581, "step": 356110 }, { "epoch": 0.709470228228994, "grad_norm": 0.15616853535175323, "learning_rate": 0.002, "loss": 2.5608, "step": 356120 }, { "epoch": 0.709490150452633, "grad_norm": 0.15370292961597443, "learning_rate": 0.002, "loss": 2.5574, "step": 356130 }, { "epoch": 0.7095100726762719, "grad_norm": 0.1630203127861023, "learning_rate": 0.002, "loss": 2.556, "step": 356140 }, { "epoch": 0.7095299948999108, "grad_norm": 0.15009190142154694, "learning_rate": 0.002, "loss": 2.5517, "step": 356150 }, { "epoch": 0.7095499171235496, "grad_norm": 0.15843114256858826, "learning_rate": 0.002, "loss": 2.5558, "step": 356160 }, { "epoch": 0.7095698393471885, "grad_norm": 0.14696775376796722, "learning_rate": 0.002, "loss": 2.5537, "step": 356170 }, { "epoch": 0.7095897615708275, "grad_norm": 0.152384415268898, "learning_rate": 0.002, "loss": 2.5571, "step": 356180 }, { "epoch": 0.7096096837944664, "grad_norm": 0.20767606794834137, "learning_rate": 0.002, "loss": 2.5468, "step": 356190 }, { "epoch": 0.7096296060181053, "grad_norm": 0.1612020879983902, "learning_rate": 0.002, "loss": 2.5371, "step": 356200 }, { "epoch": 0.7096495282417442, "grad_norm": 0.1801023781299591, "learning_rate": 0.002, "loss": 2.5632, "step": 356210 }, { "epoch": 0.7096694504653831, "grad_norm": 0.17589233815670013, "learning_rate": 0.002, "loss": 2.5508, "step": 356220 }, { "epoch": 0.7096893726890221, "grad_norm": 0.16007447242736816, "learning_rate": 0.002, "loss": 2.5363, "step": 356230 }, { "epoch": 0.709709294912661, "grad_norm": 0.18895861506462097, "learning_rate": 0.002, "loss": 2.5525, "step": 356240 }, { "epoch": 0.7097292171362999, "grad_norm": 0.14023078978061676, "learning_rate": 0.002, "loss": 2.5548, "step": 356250 }, { "epoch": 0.7097491393599388, "grad_norm": 0.16057416796684265, "learning_rate": 0.002, "loss": 2.5495, "step": 356260 }, { "epoch": 0.7097690615835777, "grad_norm": 0.1767161637544632, "learning_rate": 0.002, "loss": 2.5593, "step": 356270 }, { "epoch": 0.7097889838072167, "grad_norm": 0.16470061242580414, "learning_rate": 0.002, "loss": 2.5551, "step": 356280 }, { "epoch": 0.7098089060308556, "grad_norm": 0.18305613100528717, "learning_rate": 0.002, "loss": 2.5581, "step": 356290 }, { "epoch": 0.7098288282544944, "grad_norm": 0.1621125340461731, "learning_rate": 0.002, "loss": 2.5699, "step": 356300 }, { "epoch": 0.7098487504781333, "grad_norm": 0.19868707656860352, "learning_rate": 0.002, "loss": 2.5461, "step": 356310 }, { "epoch": 0.7098686727017722, "grad_norm": 0.14638875424861908, "learning_rate": 0.002, "loss": 2.5475, "step": 356320 }, { "epoch": 0.7098885949254112, "grad_norm": 0.179634690284729, "learning_rate": 0.002, "loss": 2.5607, "step": 356330 }, { "epoch": 0.7099085171490501, "grad_norm": 0.14061471819877625, "learning_rate": 0.002, "loss": 2.5564, "step": 356340 }, { "epoch": 0.709928439372689, "grad_norm": 0.16343943774700165, "learning_rate": 0.002, "loss": 2.5631, "step": 356350 }, { "epoch": 0.7099483615963279, "grad_norm": 0.20071066915988922, "learning_rate": 0.002, "loss": 2.5575, "step": 356360 }, { "epoch": 0.7099682838199668, "grad_norm": 0.15902359783649445, "learning_rate": 0.002, "loss": 2.5546, "step": 356370 }, { "epoch": 0.7099882060436058, "grad_norm": 0.14945979416370392, "learning_rate": 0.002, "loss": 2.569, "step": 356380 }, { "epoch": 0.7100081282672447, "grad_norm": 0.14418131113052368, "learning_rate": 0.002, "loss": 2.5487, "step": 356390 }, { "epoch": 0.7100280504908836, "grad_norm": 0.20953448116779327, "learning_rate": 0.002, "loss": 2.5541, "step": 356400 }, { "epoch": 0.7100479727145225, "grad_norm": 0.1770174503326416, "learning_rate": 0.002, "loss": 2.5474, "step": 356410 }, { "epoch": 0.7100678949381615, "grad_norm": 0.1429666429758072, "learning_rate": 0.002, "loss": 2.5487, "step": 356420 }, { "epoch": 0.7100878171618004, "grad_norm": 0.1852891892194748, "learning_rate": 0.002, "loss": 2.5652, "step": 356430 }, { "epoch": 0.7101077393854393, "grad_norm": 0.18024112284183502, "learning_rate": 0.002, "loss": 2.5667, "step": 356440 }, { "epoch": 0.7101276616090781, "grad_norm": 0.16265766322612762, "learning_rate": 0.002, "loss": 2.5502, "step": 356450 }, { "epoch": 0.710147583832717, "grad_norm": 0.17031291127204895, "learning_rate": 0.002, "loss": 2.5553, "step": 356460 }, { "epoch": 0.710167506056356, "grad_norm": 0.15356217324733734, "learning_rate": 0.002, "loss": 2.5489, "step": 356470 }, { "epoch": 0.7101874282799949, "grad_norm": 0.17509271204471588, "learning_rate": 0.002, "loss": 2.5598, "step": 356480 }, { "epoch": 0.7102073505036338, "grad_norm": 0.16580183804035187, "learning_rate": 0.002, "loss": 2.5499, "step": 356490 }, { "epoch": 0.7102272727272727, "grad_norm": 0.15058031678199768, "learning_rate": 0.002, "loss": 2.5672, "step": 356500 }, { "epoch": 0.7102471949509116, "grad_norm": 0.19476234912872314, "learning_rate": 0.002, "loss": 2.5458, "step": 356510 }, { "epoch": 0.7102671171745506, "grad_norm": 0.17067478597164154, "learning_rate": 0.002, "loss": 2.5545, "step": 356520 }, { "epoch": 0.7102870393981895, "grad_norm": 0.17787019908428192, "learning_rate": 0.002, "loss": 2.5707, "step": 356530 }, { "epoch": 0.7103069616218284, "grad_norm": 0.15055227279663086, "learning_rate": 0.002, "loss": 2.5526, "step": 356540 }, { "epoch": 0.7103268838454673, "grad_norm": 0.17455673217773438, "learning_rate": 0.002, "loss": 2.5476, "step": 356550 }, { "epoch": 0.7103468060691062, "grad_norm": 0.14781604707241058, "learning_rate": 0.002, "loss": 2.5554, "step": 356560 }, { "epoch": 0.7103667282927452, "grad_norm": 0.17302754521369934, "learning_rate": 0.002, "loss": 2.5602, "step": 356570 }, { "epoch": 0.710386650516384, "grad_norm": 0.18610453605651855, "learning_rate": 0.002, "loss": 2.5466, "step": 356580 }, { "epoch": 0.710406572740023, "grad_norm": 0.1879977136850357, "learning_rate": 0.002, "loss": 2.5461, "step": 356590 }, { "epoch": 0.7104264949636618, "grad_norm": 0.16187439858913422, "learning_rate": 0.002, "loss": 2.5541, "step": 356600 }, { "epoch": 0.7104464171873007, "grad_norm": 0.21002380549907684, "learning_rate": 0.002, "loss": 2.5487, "step": 356610 }, { "epoch": 0.7104663394109397, "grad_norm": 0.17432793974876404, "learning_rate": 0.002, "loss": 2.5585, "step": 356620 }, { "epoch": 0.7104862616345786, "grad_norm": 0.1405283510684967, "learning_rate": 0.002, "loss": 2.5621, "step": 356630 }, { "epoch": 0.7105061838582175, "grad_norm": 0.17392753064632416, "learning_rate": 0.002, "loss": 2.5667, "step": 356640 }, { "epoch": 0.7105261060818564, "grad_norm": 0.1296796053647995, "learning_rate": 0.002, "loss": 2.5432, "step": 356650 }, { "epoch": 0.7105460283054953, "grad_norm": 0.1430756151676178, "learning_rate": 0.002, "loss": 2.5594, "step": 356660 }, { "epoch": 0.7105659505291343, "grad_norm": 0.19022144377231598, "learning_rate": 0.002, "loss": 2.5533, "step": 356670 }, { "epoch": 0.7105858727527732, "grad_norm": 0.16740889847278595, "learning_rate": 0.002, "loss": 2.5524, "step": 356680 }, { "epoch": 0.7106057949764121, "grad_norm": 0.17546728253364563, "learning_rate": 0.002, "loss": 2.5592, "step": 356690 }, { "epoch": 0.710625717200051, "grad_norm": 0.16127827763557434, "learning_rate": 0.002, "loss": 2.5544, "step": 356700 }, { "epoch": 0.71064563942369, "grad_norm": 0.17702141404151917, "learning_rate": 0.002, "loss": 2.5498, "step": 356710 }, { "epoch": 0.7106655616473289, "grad_norm": 0.17298297584056854, "learning_rate": 0.002, "loss": 2.5466, "step": 356720 }, { "epoch": 0.7106854838709677, "grad_norm": 0.2575921416282654, "learning_rate": 0.002, "loss": 2.5443, "step": 356730 }, { "epoch": 0.7107054060946066, "grad_norm": 0.18376412987709045, "learning_rate": 0.002, "loss": 2.5602, "step": 356740 }, { "epoch": 0.7107253283182455, "grad_norm": 0.16907744109630585, "learning_rate": 0.002, "loss": 2.5343, "step": 356750 }, { "epoch": 0.7107452505418845, "grad_norm": 0.15198840200901031, "learning_rate": 0.002, "loss": 2.5502, "step": 356760 }, { "epoch": 0.7107651727655234, "grad_norm": 0.1620088368654251, "learning_rate": 0.002, "loss": 2.5623, "step": 356770 }, { "epoch": 0.7107850949891623, "grad_norm": 0.13848939538002014, "learning_rate": 0.002, "loss": 2.5516, "step": 356780 }, { "epoch": 0.7108050172128012, "grad_norm": 0.1555074006319046, "learning_rate": 0.002, "loss": 2.5569, "step": 356790 }, { "epoch": 0.7108249394364401, "grad_norm": 0.14798009395599365, "learning_rate": 0.002, "loss": 2.5668, "step": 356800 }, { "epoch": 0.7108448616600791, "grad_norm": 0.13450846076011658, "learning_rate": 0.002, "loss": 2.5384, "step": 356810 }, { "epoch": 0.710864783883718, "grad_norm": 0.15990765392780304, "learning_rate": 0.002, "loss": 2.5621, "step": 356820 }, { "epoch": 0.7108847061073569, "grad_norm": 0.16996046900749207, "learning_rate": 0.002, "loss": 2.5534, "step": 356830 }, { "epoch": 0.7109046283309958, "grad_norm": 0.1544705480337143, "learning_rate": 0.002, "loss": 2.5553, "step": 356840 }, { "epoch": 0.7109245505546347, "grad_norm": 0.17758046090602875, "learning_rate": 0.002, "loss": 2.5549, "step": 356850 }, { "epoch": 0.7109444727782737, "grad_norm": 0.173304945230484, "learning_rate": 0.002, "loss": 2.5612, "step": 356860 }, { "epoch": 0.7109643950019126, "grad_norm": 0.14423222839832306, "learning_rate": 0.002, "loss": 2.5374, "step": 356870 }, { "epoch": 0.7109843172255514, "grad_norm": 0.15923722088336945, "learning_rate": 0.002, "loss": 2.5405, "step": 356880 }, { "epoch": 0.7110042394491903, "grad_norm": 0.1968873143196106, "learning_rate": 0.002, "loss": 2.5447, "step": 356890 }, { "epoch": 0.7110241616728292, "grad_norm": 0.1595393270254135, "learning_rate": 0.002, "loss": 2.5464, "step": 356900 }, { "epoch": 0.7110440838964682, "grad_norm": 0.20742058753967285, "learning_rate": 0.002, "loss": 2.5442, "step": 356910 }, { "epoch": 0.7110640061201071, "grad_norm": 0.1956663578748703, "learning_rate": 0.002, "loss": 2.5585, "step": 356920 }, { "epoch": 0.711083928343746, "grad_norm": 0.18079325556755066, "learning_rate": 0.002, "loss": 2.5488, "step": 356930 }, { "epoch": 0.7111038505673849, "grad_norm": 0.16340433061122894, "learning_rate": 0.002, "loss": 2.5674, "step": 356940 }, { "epoch": 0.7111237727910238, "grad_norm": 0.1702137589454651, "learning_rate": 0.002, "loss": 2.5403, "step": 356950 }, { "epoch": 0.7111436950146628, "grad_norm": 0.16741859912872314, "learning_rate": 0.002, "loss": 2.5651, "step": 356960 }, { "epoch": 0.7111636172383017, "grad_norm": 0.15171003341674805, "learning_rate": 0.002, "loss": 2.5581, "step": 356970 }, { "epoch": 0.7111835394619406, "grad_norm": 0.15786436200141907, "learning_rate": 0.002, "loss": 2.5431, "step": 356980 }, { "epoch": 0.7112034616855795, "grad_norm": 0.18305917084217072, "learning_rate": 0.002, "loss": 2.5635, "step": 356990 }, { "epoch": 0.7112233839092185, "grad_norm": 0.18948844075202942, "learning_rate": 0.002, "loss": 2.5485, "step": 357000 }, { "epoch": 0.7112433061328574, "grad_norm": 0.1586613655090332, "learning_rate": 0.002, "loss": 2.5497, "step": 357010 }, { "epoch": 0.7112632283564962, "grad_norm": 0.16906161606311798, "learning_rate": 0.002, "loss": 2.558, "step": 357020 }, { "epoch": 0.7112831505801351, "grad_norm": 0.16187460720539093, "learning_rate": 0.002, "loss": 2.5417, "step": 357030 }, { "epoch": 0.711303072803774, "grad_norm": 0.1785610318183899, "learning_rate": 0.002, "loss": 2.5456, "step": 357040 }, { "epoch": 0.711322995027413, "grad_norm": 0.16761156916618347, "learning_rate": 0.002, "loss": 2.558, "step": 357050 }, { "epoch": 0.7113429172510519, "grad_norm": 0.14527499675750732, "learning_rate": 0.002, "loss": 2.5576, "step": 357060 }, { "epoch": 0.7113628394746908, "grad_norm": 0.20207135379314423, "learning_rate": 0.002, "loss": 2.5494, "step": 357070 }, { "epoch": 0.7113827616983297, "grad_norm": 0.17547112703323364, "learning_rate": 0.002, "loss": 2.5537, "step": 357080 }, { "epoch": 0.7114026839219686, "grad_norm": 0.14305542409420013, "learning_rate": 0.002, "loss": 2.5535, "step": 357090 }, { "epoch": 0.7114226061456076, "grad_norm": 0.17466139793395996, "learning_rate": 0.002, "loss": 2.5446, "step": 357100 }, { "epoch": 0.7114425283692465, "grad_norm": 0.1495627760887146, "learning_rate": 0.002, "loss": 2.5495, "step": 357110 }, { "epoch": 0.7114624505928854, "grad_norm": 0.16302591562271118, "learning_rate": 0.002, "loss": 2.5616, "step": 357120 }, { "epoch": 0.7114823728165243, "grad_norm": 0.140116885304451, "learning_rate": 0.002, "loss": 2.5701, "step": 357130 }, { "epoch": 0.7115022950401632, "grad_norm": 0.1639382690191269, "learning_rate": 0.002, "loss": 2.5503, "step": 357140 }, { "epoch": 0.7115222172638022, "grad_norm": 0.1498776227235794, "learning_rate": 0.002, "loss": 2.5695, "step": 357150 }, { "epoch": 0.711542139487441, "grad_norm": 0.19106633961200714, "learning_rate": 0.002, "loss": 2.5497, "step": 357160 }, { "epoch": 0.7115620617110799, "grad_norm": 0.13187329471111298, "learning_rate": 0.002, "loss": 2.5397, "step": 357170 }, { "epoch": 0.7115819839347188, "grad_norm": 0.21464881300926208, "learning_rate": 0.002, "loss": 2.5471, "step": 357180 }, { "epoch": 0.7116019061583577, "grad_norm": 0.16334682703018188, "learning_rate": 0.002, "loss": 2.5597, "step": 357190 }, { "epoch": 0.7116218283819967, "grad_norm": 0.1514769345521927, "learning_rate": 0.002, "loss": 2.543, "step": 357200 }, { "epoch": 0.7116417506056356, "grad_norm": 0.13039067387580872, "learning_rate": 0.002, "loss": 2.5372, "step": 357210 }, { "epoch": 0.7116616728292745, "grad_norm": 0.2222760021686554, "learning_rate": 0.002, "loss": 2.5441, "step": 357220 }, { "epoch": 0.7116815950529134, "grad_norm": 0.15017686784267426, "learning_rate": 0.002, "loss": 2.5526, "step": 357230 }, { "epoch": 0.7117015172765523, "grad_norm": 0.1567668914794922, "learning_rate": 0.002, "loss": 2.5515, "step": 357240 }, { "epoch": 0.7117214395001913, "grad_norm": 0.16681192815303802, "learning_rate": 0.002, "loss": 2.5509, "step": 357250 }, { "epoch": 0.7117413617238302, "grad_norm": 0.2074110358953476, "learning_rate": 0.002, "loss": 2.5505, "step": 357260 }, { "epoch": 0.7117612839474691, "grad_norm": 0.18367348611354828, "learning_rate": 0.002, "loss": 2.5557, "step": 357270 }, { "epoch": 0.711781206171108, "grad_norm": 0.16422611474990845, "learning_rate": 0.002, "loss": 2.56, "step": 357280 }, { "epoch": 0.711801128394747, "grad_norm": 0.16143758594989777, "learning_rate": 0.002, "loss": 2.5482, "step": 357290 }, { "epoch": 0.7118210506183859, "grad_norm": 0.18726344406604767, "learning_rate": 0.002, "loss": 2.5691, "step": 357300 }, { "epoch": 0.7118409728420247, "grad_norm": 0.2150609940290451, "learning_rate": 0.002, "loss": 2.5588, "step": 357310 }, { "epoch": 0.7118608950656636, "grad_norm": 0.1491941213607788, "learning_rate": 0.002, "loss": 2.5457, "step": 357320 }, { "epoch": 0.7118808172893025, "grad_norm": 0.14423391222953796, "learning_rate": 0.002, "loss": 2.5451, "step": 357330 }, { "epoch": 0.7119007395129415, "grad_norm": 0.17326970398426056, "learning_rate": 0.002, "loss": 2.5459, "step": 357340 }, { "epoch": 0.7119206617365804, "grad_norm": 0.186028391122818, "learning_rate": 0.002, "loss": 2.555, "step": 357350 }, { "epoch": 0.7119405839602193, "grad_norm": 0.1529187262058258, "learning_rate": 0.002, "loss": 2.5597, "step": 357360 }, { "epoch": 0.7119605061838582, "grad_norm": 0.1473853439092636, "learning_rate": 0.002, "loss": 2.5512, "step": 357370 }, { "epoch": 0.7119804284074971, "grad_norm": 0.13630905747413635, "learning_rate": 0.002, "loss": 2.5526, "step": 357380 }, { "epoch": 0.7120003506311361, "grad_norm": 0.2233893871307373, "learning_rate": 0.002, "loss": 2.5522, "step": 357390 }, { "epoch": 0.712020272854775, "grad_norm": 0.21204376220703125, "learning_rate": 0.002, "loss": 2.5379, "step": 357400 }, { "epoch": 0.7120401950784139, "grad_norm": 0.1713603287935257, "learning_rate": 0.002, "loss": 2.5624, "step": 357410 }, { "epoch": 0.7120601173020528, "grad_norm": 0.1940683275461197, "learning_rate": 0.002, "loss": 2.5649, "step": 357420 }, { "epoch": 0.7120800395256917, "grad_norm": 0.18020761013031006, "learning_rate": 0.002, "loss": 2.5578, "step": 357430 }, { "epoch": 0.7120999617493307, "grad_norm": 0.16789107024669647, "learning_rate": 0.002, "loss": 2.542, "step": 357440 }, { "epoch": 0.7121198839729695, "grad_norm": 0.16667187213897705, "learning_rate": 0.002, "loss": 2.5407, "step": 357450 }, { "epoch": 0.7121398061966084, "grad_norm": 0.1652347892522812, "learning_rate": 0.002, "loss": 2.569, "step": 357460 }, { "epoch": 0.7121597284202473, "grad_norm": 0.15784971415996552, "learning_rate": 0.002, "loss": 2.5529, "step": 357470 }, { "epoch": 0.7121796506438862, "grad_norm": 0.1491444706916809, "learning_rate": 0.002, "loss": 2.5442, "step": 357480 }, { "epoch": 0.7121995728675252, "grad_norm": 0.17184695601463318, "learning_rate": 0.002, "loss": 2.5651, "step": 357490 }, { "epoch": 0.7122194950911641, "grad_norm": 0.1760156899690628, "learning_rate": 0.002, "loss": 2.5445, "step": 357500 }, { "epoch": 0.712239417314803, "grad_norm": 0.22245413064956665, "learning_rate": 0.002, "loss": 2.5517, "step": 357510 }, { "epoch": 0.7122593395384419, "grad_norm": 0.16160522401332855, "learning_rate": 0.002, "loss": 2.5576, "step": 357520 }, { "epoch": 0.7122792617620808, "grad_norm": 0.1603049337863922, "learning_rate": 0.002, "loss": 2.5535, "step": 357530 }, { "epoch": 0.7122991839857198, "grad_norm": 0.1665521264076233, "learning_rate": 0.002, "loss": 2.5519, "step": 357540 }, { "epoch": 0.7123191062093587, "grad_norm": 0.12856769561767578, "learning_rate": 0.002, "loss": 2.5497, "step": 357550 }, { "epoch": 0.7123390284329976, "grad_norm": 0.18111243844032288, "learning_rate": 0.002, "loss": 2.5546, "step": 357560 }, { "epoch": 0.7123589506566365, "grad_norm": 0.14454297721385956, "learning_rate": 0.002, "loss": 2.5546, "step": 357570 }, { "epoch": 0.7123788728802755, "grad_norm": 0.1699957251548767, "learning_rate": 0.002, "loss": 2.5491, "step": 357580 }, { "epoch": 0.7123987951039144, "grad_norm": 0.16252481937408447, "learning_rate": 0.002, "loss": 2.5422, "step": 357590 }, { "epoch": 0.7124187173275532, "grad_norm": 0.22469568252563477, "learning_rate": 0.002, "loss": 2.5597, "step": 357600 }, { "epoch": 0.7124386395511921, "grad_norm": 0.20210348069667816, "learning_rate": 0.002, "loss": 2.5562, "step": 357610 }, { "epoch": 0.712458561774831, "grad_norm": 0.15254031121730804, "learning_rate": 0.002, "loss": 2.5559, "step": 357620 }, { "epoch": 0.71247848399847, "grad_norm": 0.17605619132518768, "learning_rate": 0.002, "loss": 2.5574, "step": 357630 }, { "epoch": 0.7124984062221089, "grad_norm": 0.15684492886066437, "learning_rate": 0.002, "loss": 2.5596, "step": 357640 }, { "epoch": 0.7125183284457478, "grad_norm": 0.15564773976802826, "learning_rate": 0.002, "loss": 2.547, "step": 357650 }, { "epoch": 0.7125382506693867, "grad_norm": 0.1608753502368927, "learning_rate": 0.002, "loss": 2.5373, "step": 357660 }, { "epoch": 0.7125581728930256, "grad_norm": 0.14886336028575897, "learning_rate": 0.002, "loss": 2.5558, "step": 357670 }, { "epoch": 0.7125780951166646, "grad_norm": 0.20330460369586945, "learning_rate": 0.002, "loss": 2.5492, "step": 357680 }, { "epoch": 0.7125980173403035, "grad_norm": 0.166458398103714, "learning_rate": 0.002, "loss": 2.5549, "step": 357690 }, { "epoch": 0.7126179395639424, "grad_norm": 0.1435166746377945, "learning_rate": 0.002, "loss": 2.5618, "step": 357700 }, { "epoch": 0.7126378617875813, "grad_norm": 0.17203223705291748, "learning_rate": 0.002, "loss": 2.5503, "step": 357710 }, { "epoch": 0.7126577840112202, "grad_norm": 0.2174784392118454, "learning_rate": 0.002, "loss": 2.5571, "step": 357720 }, { "epoch": 0.7126777062348592, "grad_norm": 0.1536487340927124, "learning_rate": 0.002, "loss": 2.5421, "step": 357730 }, { "epoch": 0.712697628458498, "grad_norm": 0.13790932297706604, "learning_rate": 0.002, "loss": 2.541, "step": 357740 }, { "epoch": 0.7127175506821369, "grad_norm": 0.1665833294391632, "learning_rate": 0.002, "loss": 2.5589, "step": 357750 }, { "epoch": 0.7127374729057758, "grad_norm": 0.15325963497161865, "learning_rate": 0.002, "loss": 2.5654, "step": 357760 }, { "epoch": 0.7127573951294147, "grad_norm": 0.16413205862045288, "learning_rate": 0.002, "loss": 2.5494, "step": 357770 }, { "epoch": 0.7127773173530537, "grad_norm": 0.1991676241159439, "learning_rate": 0.002, "loss": 2.5396, "step": 357780 }, { "epoch": 0.7127972395766926, "grad_norm": 0.16016775369644165, "learning_rate": 0.002, "loss": 2.5404, "step": 357790 }, { "epoch": 0.7128171618003315, "grad_norm": 0.1616130769252777, "learning_rate": 0.002, "loss": 2.5614, "step": 357800 }, { "epoch": 0.7128370840239704, "grad_norm": 0.16040544211864471, "learning_rate": 0.002, "loss": 2.5588, "step": 357810 }, { "epoch": 0.7128570062476093, "grad_norm": 0.15775340795516968, "learning_rate": 0.002, "loss": 2.5533, "step": 357820 }, { "epoch": 0.7128769284712483, "grad_norm": 0.17133985459804535, "learning_rate": 0.002, "loss": 2.5464, "step": 357830 }, { "epoch": 0.7128968506948872, "grad_norm": 0.1494489312171936, "learning_rate": 0.002, "loss": 2.5525, "step": 357840 }, { "epoch": 0.7129167729185261, "grad_norm": 0.16359859704971313, "learning_rate": 0.002, "loss": 2.5562, "step": 357850 }, { "epoch": 0.712936695142165, "grad_norm": 0.2252657562494278, "learning_rate": 0.002, "loss": 2.5619, "step": 357860 }, { "epoch": 0.7129566173658038, "grad_norm": 0.18669643998146057, "learning_rate": 0.002, "loss": 2.5604, "step": 357870 }, { "epoch": 0.7129765395894428, "grad_norm": 0.15200649201869965, "learning_rate": 0.002, "loss": 2.5472, "step": 357880 }, { "epoch": 0.7129964618130817, "grad_norm": 0.15145722031593323, "learning_rate": 0.002, "loss": 2.5332, "step": 357890 }, { "epoch": 0.7130163840367206, "grad_norm": 0.1582396775484085, "learning_rate": 0.002, "loss": 2.5576, "step": 357900 }, { "epoch": 0.7130363062603595, "grad_norm": 0.16655054688453674, "learning_rate": 0.002, "loss": 2.5581, "step": 357910 }, { "epoch": 0.7130562284839985, "grad_norm": 0.16111433506011963, "learning_rate": 0.002, "loss": 2.5394, "step": 357920 }, { "epoch": 0.7130761507076374, "grad_norm": 0.17189471423625946, "learning_rate": 0.002, "loss": 2.5511, "step": 357930 }, { "epoch": 0.7130960729312763, "grad_norm": 0.2989984154701233, "learning_rate": 0.002, "loss": 2.5489, "step": 357940 }, { "epoch": 0.7131159951549152, "grad_norm": 0.17815767228603363, "learning_rate": 0.002, "loss": 2.5563, "step": 357950 }, { "epoch": 0.7131359173785541, "grad_norm": 0.16451206803321838, "learning_rate": 0.002, "loss": 2.5587, "step": 357960 }, { "epoch": 0.7131558396021931, "grad_norm": 0.17724256217479706, "learning_rate": 0.002, "loss": 2.5578, "step": 357970 }, { "epoch": 0.713175761825832, "grad_norm": 0.1566087305545807, "learning_rate": 0.002, "loss": 2.5345, "step": 357980 }, { "epoch": 0.7131956840494709, "grad_norm": 0.16626891493797302, "learning_rate": 0.002, "loss": 2.5502, "step": 357990 }, { "epoch": 0.7132156062731098, "grad_norm": 0.1871318817138672, "learning_rate": 0.002, "loss": 2.5506, "step": 358000 }, { "epoch": 0.7132355284967486, "grad_norm": 0.17537079751491547, "learning_rate": 0.002, "loss": 2.5682, "step": 358010 }, { "epoch": 0.7132554507203877, "grad_norm": 0.16206052899360657, "learning_rate": 0.002, "loss": 2.549, "step": 358020 }, { "epoch": 0.7132753729440265, "grad_norm": 0.13651423156261444, "learning_rate": 0.002, "loss": 2.5538, "step": 358030 }, { "epoch": 0.7132952951676654, "grad_norm": 0.16328445076942444, "learning_rate": 0.002, "loss": 2.5406, "step": 358040 }, { "epoch": 0.7133152173913043, "grad_norm": 0.16930823028087616, "learning_rate": 0.002, "loss": 2.555, "step": 358050 }, { "epoch": 0.7133351396149432, "grad_norm": 0.2123313844203949, "learning_rate": 0.002, "loss": 2.5616, "step": 358060 }, { "epoch": 0.7133550618385822, "grad_norm": 0.2099432647228241, "learning_rate": 0.002, "loss": 2.543, "step": 358070 }, { "epoch": 0.7133749840622211, "grad_norm": 0.2475215047597885, "learning_rate": 0.002, "loss": 2.5775, "step": 358080 }, { "epoch": 0.71339490628586, "grad_norm": 0.1758769005537033, "learning_rate": 0.002, "loss": 2.5628, "step": 358090 }, { "epoch": 0.7134148285094989, "grad_norm": 0.15499532222747803, "learning_rate": 0.002, "loss": 2.5702, "step": 358100 }, { "epoch": 0.7134347507331378, "grad_norm": 0.21694843471050262, "learning_rate": 0.002, "loss": 2.572, "step": 358110 }, { "epoch": 0.7134546729567768, "grad_norm": 0.1610974371433258, "learning_rate": 0.002, "loss": 2.5456, "step": 358120 }, { "epoch": 0.7134745951804157, "grad_norm": 0.15359292924404144, "learning_rate": 0.002, "loss": 2.5586, "step": 358130 }, { "epoch": 0.7134945174040546, "grad_norm": 0.19337981939315796, "learning_rate": 0.002, "loss": 2.5611, "step": 358140 }, { "epoch": 0.7135144396276935, "grad_norm": 0.18171162903308868, "learning_rate": 0.002, "loss": 2.5541, "step": 358150 }, { "epoch": 0.7135343618513323, "grad_norm": 0.14997084438800812, "learning_rate": 0.002, "loss": 2.5417, "step": 358160 }, { "epoch": 0.7135542840749713, "grad_norm": 0.18685218691825867, "learning_rate": 0.002, "loss": 2.5406, "step": 358170 }, { "epoch": 0.7135742062986102, "grad_norm": 0.16741567850112915, "learning_rate": 0.002, "loss": 2.5519, "step": 358180 }, { "epoch": 0.7135941285222491, "grad_norm": 0.17979994416236877, "learning_rate": 0.002, "loss": 2.5539, "step": 358190 }, { "epoch": 0.713614050745888, "grad_norm": 0.17396365106105804, "learning_rate": 0.002, "loss": 2.5441, "step": 358200 }, { "epoch": 0.713633972969527, "grad_norm": 0.21524210274219513, "learning_rate": 0.002, "loss": 2.5611, "step": 358210 }, { "epoch": 0.7136538951931659, "grad_norm": 0.16468453407287598, "learning_rate": 0.002, "loss": 2.5559, "step": 358220 }, { "epoch": 0.7136738174168048, "grad_norm": 0.16121892631053925, "learning_rate": 0.002, "loss": 2.5541, "step": 358230 }, { "epoch": 0.7136937396404437, "grad_norm": 0.1457400918006897, "learning_rate": 0.002, "loss": 2.5482, "step": 358240 }, { "epoch": 0.7137136618640826, "grad_norm": 0.15350060164928436, "learning_rate": 0.002, "loss": 2.5527, "step": 358250 }, { "epoch": 0.7137335840877216, "grad_norm": 0.1878027766942978, "learning_rate": 0.002, "loss": 2.5681, "step": 358260 }, { "epoch": 0.7137535063113605, "grad_norm": 0.20435799658298492, "learning_rate": 0.002, "loss": 2.55, "step": 358270 }, { "epoch": 0.7137734285349994, "grad_norm": 0.170481875538826, "learning_rate": 0.002, "loss": 2.5629, "step": 358280 }, { "epoch": 0.7137933507586383, "grad_norm": 0.1549878567457199, "learning_rate": 0.002, "loss": 2.5524, "step": 358290 }, { "epoch": 0.7138132729822771, "grad_norm": 0.1710585504770279, "learning_rate": 0.002, "loss": 2.5721, "step": 358300 }, { "epoch": 0.7138331952059161, "grad_norm": 0.1522163599729538, "learning_rate": 0.002, "loss": 2.572, "step": 358310 }, { "epoch": 0.713853117429555, "grad_norm": 0.1733926385641098, "learning_rate": 0.002, "loss": 2.5612, "step": 358320 }, { "epoch": 0.7138730396531939, "grad_norm": 0.16838790476322174, "learning_rate": 0.002, "loss": 2.5687, "step": 358330 }, { "epoch": 0.7138929618768328, "grad_norm": 0.18268966674804688, "learning_rate": 0.002, "loss": 2.5746, "step": 358340 }, { "epoch": 0.7139128841004717, "grad_norm": 0.18379724025726318, "learning_rate": 0.002, "loss": 2.5426, "step": 358350 }, { "epoch": 0.7139328063241107, "grad_norm": 0.15009278059005737, "learning_rate": 0.002, "loss": 2.5547, "step": 358360 }, { "epoch": 0.7139527285477496, "grad_norm": 0.209206223487854, "learning_rate": 0.002, "loss": 2.5447, "step": 358370 }, { "epoch": 0.7139726507713885, "grad_norm": 0.15581564605236053, "learning_rate": 0.002, "loss": 2.5548, "step": 358380 }, { "epoch": 0.7139925729950274, "grad_norm": 0.16676364839076996, "learning_rate": 0.002, "loss": 2.5557, "step": 358390 }, { "epoch": 0.7140124952186663, "grad_norm": 0.1953951120376587, "learning_rate": 0.002, "loss": 2.5516, "step": 358400 }, { "epoch": 0.7140324174423053, "grad_norm": 0.16702815890312195, "learning_rate": 0.002, "loss": 2.5608, "step": 358410 }, { "epoch": 0.7140523396659442, "grad_norm": 0.18733705580234528, "learning_rate": 0.002, "loss": 2.5515, "step": 358420 }, { "epoch": 0.7140722618895831, "grad_norm": 0.15297013521194458, "learning_rate": 0.002, "loss": 2.5618, "step": 358430 }, { "epoch": 0.714092184113222, "grad_norm": 0.1985168308019638, "learning_rate": 0.002, "loss": 2.5565, "step": 358440 }, { "epoch": 0.7141121063368608, "grad_norm": 0.14830873906612396, "learning_rate": 0.002, "loss": 2.55, "step": 358450 }, { "epoch": 0.7141320285604998, "grad_norm": 0.13263314962387085, "learning_rate": 0.002, "loss": 2.5527, "step": 358460 }, { "epoch": 0.7141519507841387, "grad_norm": 0.16777941584587097, "learning_rate": 0.002, "loss": 2.5582, "step": 358470 }, { "epoch": 0.7141718730077776, "grad_norm": 0.18054409325122833, "learning_rate": 0.002, "loss": 2.5597, "step": 358480 }, { "epoch": 0.7141917952314165, "grad_norm": 0.13660064339637756, "learning_rate": 0.002, "loss": 2.5611, "step": 358490 }, { "epoch": 0.7142117174550555, "grad_norm": 0.1661725789308548, "learning_rate": 0.002, "loss": 2.5457, "step": 358500 }, { "epoch": 0.7142316396786944, "grad_norm": 0.18706001341342926, "learning_rate": 0.002, "loss": 2.5673, "step": 358510 }, { "epoch": 0.7142515619023333, "grad_norm": 0.18531203269958496, "learning_rate": 0.002, "loss": 2.5464, "step": 358520 }, { "epoch": 0.7142714841259722, "grad_norm": 0.19928526878356934, "learning_rate": 0.002, "loss": 2.5614, "step": 358530 }, { "epoch": 0.7142914063496111, "grad_norm": 0.1606828272342682, "learning_rate": 0.002, "loss": 2.5556, "step": 358540 }, { "epoch": 0.7143113285732501, "grad_norm": 0.1454392522573471, "learning_rate": 0.002, "loss": 2.5742, "step": 358550 }, { "epoch": 0.714331250796889, "grad_norm": 0.15569862723350525, "learning_rate": 0.002, "loss": 2.5502, "step": 358560 }, { "epoch": 0.7143511730205279, "grad_norm": 0.14371633529663086, "learning_rate": 0.002, "loss": 2.5593, "step": 358570 }, { "epoch": 0.7143710952441668, "grad_norm": 0.16417601704597473, "learning_rate": 0.002, "loss": 2.5667, "step": 358580 }, { "epoch": 0.7143910174678056, "grad_norm": 0.35166287422180176, "learning_rate": 0.002, "loss": 2.5721, "step": 358590 }, { "epoch": 0.7144109396914446, "grad_norm": 0.15769077837467194, "learning_rate": 0.002, "loss": 2.5538, "step": 358600 }, { "epoch": 0.7144308619150835, "grad_norm": 0.16960999369621277, "learning_rate": 0.002, "loss": 2.54, "step": 358610 }, { "epoch": 0.7144507841387224, "grad_norm": 0.15068575739860535, "learning_rate": 0.002, "loss": 2.5442, "step": 358620 }, { "epoch": 0.7144707063623613, "grad_norm": 0.15474452078342438, "learning_rate": 0.002, "loss": 2.5497, "step": 358630 }, { "epoch": 0.7144906285860002, "grad_norm": 0.1717957854270935, "learning_rate": 0.002, "loss": 2.5584, "step": 358640 }, { "epoch": 0.7145105508096392, "grad_norm": 0.14783771336078644, "learning_rate": 0.002, "loss": 2.5508, "step": 358650 }, { "epoch": 0.7145304730332781, "grad_norm": 0.16878065466880798, "learning_rate": 0.002, "loss": 2.5644, "step": 358660 }, { "epoch": 0.714550395256917, "grad_norm": 0.1883334368467331, "learning_rate": 0.002, "loss": 2.5663, "step": 358670 }, { "epoch": 0.7145703174805559, "grad_norm": 0.14677096903324127, "learning_rate": 0.002, "loss": 2.551, "step": 358680 }, { "epoch": 0.7145902397041948, "grad_norm": 0.230960875749588, "learning_rate": 0.002, "loss": 2.5601, "step": 358690 }, { "epoch": 0.7146101619278338, "grad_norm": 0.14946337044239044, "learning_rate": 0.002, "loss": 2.5472, "step": 358700 }, { "epoch": 0.7146300841514727, "grad_norm": 0.1690576672554016, "learning_rate": 0.002, "loss": 2.5412, "step": 358710 }, { "epoch": 0.7146500063751116, "grad_norm": 0.1813058704137802, "learning_rate": 0.002, "loss": 2.5593, "step": 358720 }, { "epoch": 0.7146699285987504, "grad_norm": 0.16517531871795654, "learning_rate": 0.002, "loss": 2.5552, "step": 358730 }, { "epoch": 0.7146898508223893, "grad_norm": 0.16811268031597137, "learning_rate": 0.002, "loss": 2.5575, "step": 358740 }, { "epoch": 0.7147097730460283, "grad_norm": 0.175123929977417, "learning_rate": 0.002, "loss": 2.5553, "step": 358750 }, { "epoch": 0.7147296952696672, "grad_norm": 0.16052822768688202, "learning_rate": 0.002, "loss": 2.5718, "step": 358760 }, { "epoch": 0.7147496174933061, "grad_norm": 0.15461812913417816, "learning_rate": 0.002, "loss": 2.5562, "step": 358770 }, { "epoch": 0.714769539716945, "grad_norm": 0.1465650051832199, "learning_rate": 0.002, "loss": 2.5457, "step": 358780 }, { "epoch": 0.714789461940584, "grad_norm": 0.15322741866111755, "learning_rate": 0.002, "loss": 2.5725, "step": 358790 }, { "epoch": 0.7148093841642229, "grad_norm": 0.16090841591358185, "learning_rate": 0.002, "loss": 2.5411, "step": 358800 }, { "epoch": 0.7148293063878618, "grad_norm": 0.18681985139846802, "learning_rate": 0.002, "loss": 2.5524, "step": 358810 }, { "epoch": 0.7148492286115007, "grad_norm": 0.18320691585540771, "learning_rate": 0.002, "loss": 2.5673, "step": 358820 }, { "epoch": 0.7148691508351396, "grad_norm": 0.16972507536411285, "learning_rate": 0.002, "loss": 2.5551, "step": 358830 }, { "epoch": 0.7148890730587786, "grad_norm": 0.1762121617794037, "learning_rate": 0.002, "loss": 2.5477, "step": 358840 }, { "epoch": 0.7149089952824175, "grad_norm": 0.14924106001853943, "learning_rate": 0.002, "loss": 2.5529, "step": 358850 }, { "epoch": 0.7149289175060564, "grad_norm": 0.13709254562854767, "learning_rate": 0.002, "loss": 2.5561, "step": 358860 }, { "epoch": 0.7149488397296953, "grad_norm": 0.1965562254190445, "learning_rate": 0.002, "loss": 2.5591, "step": 358870 }, { "epoch": 0.7149687619533341, "grad_norm": 0.16751061379909515, "learning_rate": 0.002, "loss": 2.5585, "step": 358880 }, { "epoch": 0.7149886841769731, "grad_norm": 0.1675652116537094, "learning_rate": 0.002, "loss": 2.5507, "step": 358890 }, { "epoch": 0.715008606400612, "grad_norm": 0.18974587321281433, "learning_rate": 0.002, "loss": 2.5578, "step": 358900 }, { "epoch": 0.7150285286242509, "grad_norm": 0.15570831298828125, "learning_rate": 0.002, "loss": 2.5448, "step": 358910 }, { "epoch": 0.7150484508478898, "grad_norm": 0.15889465808868408, "learning_rate": 0.002, "loss": 2.5766, "step": 358920 }, { "epoch": 0.7150683730715287, "grad_norm": 0.16479361057281494, "learning_rate": 0.002, "loss": 2.5444, "step": 358930 }, { "epoch": 0.7150882952951677, "grad_norm": 0.1537550389766693, "learning_rate": 0.002, "loss": 2.5595, "step": 358940 }, { "epoch": 0.7151082175188066, "grad_norm": 0.17843309044837952, "learning_rate": 0.002, "loss": 2.5344, "step": 358950 }, { "epoch": 0.7151281397424455, "grad_norm": 0.14092281460762024, "learning_rate": 0.002, "loss": 2.5554, "step": 358960 }, { "epoch": 0.7151480619660844, "grad_norm": 0.1753024160861969, "learning_rate": 0.002, "loss": 2.5529, "step": 358970 }, { "epoch": 0.7151679841897233, "grad_norm": 0.1516364961862564, "learning_rate": 0.002, "loss": 2.5551, "step": 358980 }, { "epoch": 0.7151879064133623, "grad_norm": 0.16057716310024261, "learning_rate": 0.002, "loss": 2.542, "step": 358990 }, { "epoch": 0.7152078286370012, "grad_norm": 0.18127068877220154, "learning_rate": 0.002, "loss": 2.5529, "step": 359000 }, { "epoch": 0.71522775086064, "grad_norm": 0.134046271443367, "learning_rate": 0.002, "loss": 2.5434, "step": 359010 }, { "epoch": 0.715247673084279, "grad_norm": 0.15210339426994324, "learning_rate": 0.002, "loss": 2.561, "step": 359020 }, { "epoch": 0.7152675953079178, "grad_norm": 0.18562999367713928, "learning_rate": 0.002, "loss": 2.5634, "step": 359030 }, { "epoch": 0.7152875175315568, "grad_norm": 0.14959943294525146, "learning_rate": 0.002, "loss": 2.5555, "step": 359040 }, { "epoch": 0.7153074397551957, "grad_norm": 0.19341041147708893, "learning_rate": 0.002, "loss": 2.5607, "step": 359050 }, { "epoch": 0.7153273619788346, "grad_norm": 0.1507357805967331, "learning_rate": 0.002, "loss": 2.5614, "step": 359060 }, { "epoch": 0.7153472842024735, "grad_norm": 0.22377096116542816, "learning_rate": 0.002, "loss": 2.551, "step": 359070 }, { "epoch": 0.7153672064261125, "grad_norm": 0.17379647493362427, "learning_rate": 0.002, "loss": 2.5455, "step": 359080 }, { "epoch": 0.7153871286497514, "grad_norm": 0.16767260432243347, "learning_rate": 0.002, "loss": 2.5468, "step": 359090 }, { "epoch": 0.7154070508733903, "grad_norm": 0.1544046252965927, "learning_rate": 0.002, "loss": 2.5713, "step": 359100 }, { "epoch": 0.7154269730970292, "grad_norm": 0.1889180839061737, "learning_rate": 0.002, "loss": 2.5426, "step": 359110 }, { "epoch": 0.7154468953206681, "grad_norm": 0.14245015382766724, "learning_rate": 0.002, "loss": 2.5414, "step": 359120 }, { "epoch": 0.7154668175443071, "grad_norm": 0.15135174989700317, "learning_rate": 0.002, "loss": 2.561, "step": 359130 }, { "epoch": 0.715486739767946, "grad_norm": 0.16134054958820343, "learning_rate": 0.002, "loss": 2.5719, "step": 359140 }, { "epoch": 0.7155066619915849, "grad_norm": 0.1676892638206482, "learning_rate": 0.002, "loss": 2.5465, "step": 359150 }, { "epoch": 0.7155265842152237, "grad_norm": 0.17228403687477112, "learning_rate": 0.002, "loss": 2.5635, "step": 359160 }, { "epoch": 0.7155465064388626, "grad_norm": 0.167617067694664, "learning_rate": 0.002, "loss": 2.5634, "step": 359170 }, { "epoch": 0.7155664286625016, "grad_norm": 0.15259812772274017, "learning_rate": 0.002, "loss": 2.5541, "step": 359180 }, { "epoch": 0.7155863508861405, "grad_norm": 0.18803533911705017, "learning_rate": 0.002, "loss": 2.5519, "step": 359190 }, { "epoch": 0.7156062731097794, "grad_norm": 0.15604697167873383, "learning_rate": 0.002, "loss": 2.5588, "step": 359200 }, { "epoch": 0.7156261953334183, "grad_norm": 0.17466387152671814, "learning_rate": 0.002, "loss": 2.5487, "step": 359210 }, { "epoch": 0.7156461175570572, "grad_norm": 0.16792066395282745, "learning_rate": 0.002, "loss": 2.5535, "step": 359220 }, { "epoch": 0.7156660397806962, "grad_norm": 0.15875567495822906, "learning_rate": 0.002, "loss": 2.5378, "step": 359230 }, { "epoch": 0.7156859620043351, "grad_norm": 0.19629530608654022, "learning_rate": 0.002, "loss": 2.5421, "step": 359240 }, { "epoch": 0.715705884227974, "grad_norm": 0.16244319081306458, "learning_rate": 0.002, "loss": 2.5424, "step": 359250 }, { "epoch": 0.7157258064516129, "grad_norm": 0.1796930730342865, "learning_rate": 0.002, "loss": 2.5687, "step": 359260 }, { "epoch": 0.7157457286752518, "grad_norm": 0.15246543288230896, "learning_rate": 0.002, "loss": 2.5408, "step": 359270 }, { "epoch": 0.7157656508988908, "grad_norm": 0.15261971950531006, "learning_rate": 0.002, "loss": 2.5408, "step": 359280 }, { "epoch": 0.7157855731225297, "grad_norm": 0.15775081515312195, "learning_rate": 0.002, "loss": 2.5577, "step": 359290 }, { "epoch": 0.7158054953461686, "grad_norm": 0.16605989634990692, "learning_rate": 0.002, "loss": 2.5531, "step": 359300 }, { "epoch": 0.7158254175698074, "grad_norm": 0.15391984581947327, "learning_rate": 0.002, "loss": 2.5589, "step": 359310 }, { "epoch": 0.7158453397934463, "grad_norm": 0.17156392335891724, "learning_rate": 0.002, "loss": 2.5413, "step": 359320 }, { "epoch": 0.7158652620170853, "grad_norm": 0.13817423582077026, "learning_rate": 0.002, "loss": 2.5355, "step": 359330 }, { "epoch": 0.7158851842407242, "grad_norm": 0.19007031619548798, "learning_rate": 0.002, "loss": 2.5544, "step": 359340 }, { "epoch": 0.7159051064643631, "grad_norm": 0.1965571939945221, "learning_rate": 0.002, "loss": 2.5618, "step": 359350 }, { "epoch": 0.715925028688002, "grad_norm": 0.15639346837997437, "learning_rate": 0.002, "loss": 2.5567, "step": 359360 }, { "epoch": 0.7159449509116409, "grad_norm": 0.1557990461587906, "learning_rate": 0.002, "loss": 2.5531, "step": 359370 }, { "epoch": 0.7159648731352799, "grad_norm": 0.1472676396369934, "learning_rate": 0.002, "loss": 2.565, "step": 359380 }, { "epoch": 0.7159847953589188, "grad_norm": 0.17219872772693634, "learning_rate": 0.002, "loss": 2.5502, "step": 359390 }, { "epoch": 0.7160047175825577, "grad_norm": 0.18638479709625244, "learning_rate": 0.002, "loss": 2.5509, "step": 359400 }, { "epoch": 0.7160246398061966, "grad_norm": 0.17280110716819763, "learning_rate": 0.002, "loss": 2.5512, "step": 359410 }, { "epoch": 0.7160445620298356, "grad_norm": 0.15296649932861328, "learning_rate": 0.002, "loss": 2.5589, "step": 359420 }, { "epoch": 0.7160644842534745, "grad_norm": 0.1394958794116974, "learning_rate": 0.002, "loss": 2.5322, "step": 359430 }, { "epoch": 0.7160844064771134, "grad_norm": 0.15825684368610382, "learning_rate": 0.002, "loss": 2.562, "step": 359440 }, { "epoch": 0.7161043287007522, "grad_norm": 0.15940451622009277, "learning_rate": 0.002, "loss": 2.5487, "step": 359450 }, { "epoch": 0.7161242509243911, "grad_norm": 0.2036527842283249, "learning_rate": 0.002, "loss": 2.5548, "step": 359460 }, { "epoch": 0.7161441731480301, "grad_norm": 0.14376963675022125, "learning_rate": 0.002, "loss": 2.5586, "step": 359470 }, { "epoch": 0.716164095371669, "grad_norm": 0.21498872339725494, "learning_rate": 0.002, "loss": 2.5473, "step": 359480 }, { "epoch": 0.7161840175953079, "grad_norm": 0.15875652432441711, "learning_rate": 0.002, "loss": 2.5526, "step": 359490 }, { "epoch": 0.7162039398189468, "grad_norm": 0.1546841710805893, "learning_rate": 0.002, "loss": 2.5473, "step": 359500 }, { "epoch": 0.7162238620425857, "grad_norm": 0.20867431163787842, "learning_rate": 0.002, "loss": 2.5617, "step": 359510 }, { "epoch": 0.7162437842662247, "grad_norm": 0.19072848558425903, "learning_rate": 0.002, "loss": 2.5657, "step": 359520 }, { "epoch": 0.7162637064898636, "grad_norm": 0.20550163090229034, "learning_rate": 0.002, "loss": 2.5675, "step": 359530 }, { "epoch": 0.7162836287135025, "grad_norm": 0.1923670470714569, "learning_rate": 0.002, "loss": 2.5614, "step": 359540 }, { "epoch": 0.7163035509371414, "grad_norm": 0.16559132933616638, "learning_rate": 0.002, "loss": 2.5441, "step": 359550 }, { "epoch": 0.7163234731607803, "grad_norm": 0.15171866118907928, "learning_rate": 0.002, "loss": 2.5565, "step": 359560 }, { "epoch": 0.7163433953844193, "grad_norm": 0.18788225948810577, "learning_rate": 0.002, "loss": 2.5652, "step": 359570 }, { "epoch": 0.7163633176080582, "grad_norm": 0.15095515549182892, "learning_rate": 0.002, "loss": 2.5662, "step": 359580 }, { "epoch": 0.716383239831697, "grad_norm": 0.15079054236412048, "learning_rate": 0.002, "loss": 2.5707, "step": 359590 }, { "epoch": 0.7164031620553359, "grad_norm": 0.15627767145633698, "learning_rate": 0.002, "loss": 2.5386, "step": 359600 }, { "epoch": 0.7164230842789748, "grad_norm": 0.171737402677536, "learning_rate": 0.002, "loss": 2.5657, "step": 359610 }, { "epoch": 0.7164430065026138, "grad_norm": 0.16661278903484344, "learning_rate": 0.002, "loss": 2.5531, "step": 359620 }, { "epoch": 0.7164629287262527, "grad_norm": 0.17115257680416107, "learning_rate": 0.002, "loss": 2.5448, "step": 359630 }, { "epoch": 0.7164828509498916, "grad_norm": 0.17486461997032166, "learning_rate": 0.002, "loss": 2.5598, "step": 359640 }, { "epoch": 0.7165027731735305, "grad_norm": 0.18074637651443481, "learning_rate": 0.002, "loss": 2.5486, "step": 359650 }, { "epoch": 0.7165226953971694, "grad_norm": 0.17556338012218475, "learning_rate": 0.002, "loss": 2.5434, "step": 359660 }, { "epoch": 0.7165426176208084, "grad_norm": 0.18945708870887756, "learning_rate": 0.002, "loss": 2.5555, "step": 359670 }, { "epoch": 0.7165625398444473, "grad_norm": 0.14902450144290924, "learning_rate": 0.002, "loss": 2.5544, "step": 359680 }, { "epoch": 0.7165824620680862, "grad_norm": 0.1722189337015152, "learning_rate": 0.002, "loss": 2.5751, "step": 359690 }, { "epoch": 0.7166023842917251, "grad_norm": 0.1685449481010437, "learning_rate": 0.002, "loss": 2.5485, "step": 359700 }, { "epoch": 0.7166223065153641, "grad_norm": 0.14581432938575745, "learning_rate": 0.002, "loss": 2.5485, "step": 359710 }, { "epoch": 0.716642228739003, "grad_norm": 0.19630014896392822, "learning_rate": 0.002, "loss": 2.563, "step": 359720 }, { "epoch": 0.7166621509626419, "grad_norm": 0.16302812099456787, "learning_rate": 0.002, "loss": 2.5487, "step": 359730 }, { "epoch": 0.7166820731862807, "grad_norm": 0.17959511280059814, "learning_rate": 0.002, "loss": 2.5631, "step": 359740 }, { "epoch": 0.7167019954099196, "grad_norm": 0.14420197904109955, "learning_rate": 0.002, "loss": 2.555, "step": 359750 }, { "epoch": 0.7167219176335586, "grad_norm": 0.16897937655448914, "learning_rate": 0.002, "loss": 2.5501, "step": 359760 }, { "epoch": 0.7167418398571975, "grad_norm": 0.1636967957019806, "learning_rate": 0.002, "loss": 2.5445, "step": 359770 }, { "epoch": 0.7167617620808364, "grad_norm": 0.16699101030826569, "learning_rate": 0.002, "loss": 2.5646, "step": 359780 }, { "epoch": 0.7167816843044753, "grad_norm": 0.15833482146263123, "learning_rate": 0.002, "loss": 2.5559, "step": 359790 }, { "epoch": 0.7168016065281142, "grad_norm": 0.18405267596244812, "learning_rate": 0.002, "loss": 2.5808, "step": 359800 }, { "epoch": 0.7168215287517532, "grad_norm": 0.14834405481815338, "learning_rate": 0.002, "loss": 2.553, "step": 359810 }, { "epoch": 0.7168414509753921, "grad_norm": 0.1630258560180664, "learning_rate": 0.002, "loss": 2.5301, "step": 359820 }, { "epoch": 0.716861373199031, "grad_norm": 0.17504245042800903, "learning_rate": 0.002, "loss": 2.5536, "step": 359830 }, { "epoch": 0.7168812954226699, "grad_norm": 0.20863012969493866, "learning_rate": 0.002, "loss": 2.5609, "step": 359840 }, { "epoch": 0.7169012176463088, "grad_norm": 0.17185798287391663, "learning_rate": 0.002, "loss": 2.5578, "step": 359850 }, { "epoch": 0.7169211398699478, "grad_norm": 0.13868515193462372, "learning_rate": 0.002, "loss": 2.5564, "step": 359860 }, { "epoch": 0.7169410620935867, "grad_norm": 0.15668722987174988, "learning_rate": 0.002, "loss": 2.5554, "step": 359870 }, { "epoch": 0.7169609843172255, "grad_norm": 0.16068615019321442, "learning_rate": 0.002, "loss": 2.5453, "step": 359880 }, { "epoch": 0.7169809065408644, "grad_norm": 0.1459122598171234, "learning_rate": 0.002, "loss": 2.5528, "step": 359890 }, { "epoch": 0.7170008287645033, "grad_norm": 0.20811663568019867, "learning_rate": 0.002, "loss": 2.5583, "step": 359900 }, { "epoch": 0.7170207509881423, "grad_norm": 0.15787634253501892, "learning_rate": 0.002, "loss": 2.5544, "step": 359910 }, { "epoch": 0.7170406732117812, "grad_norm": 0.14425204694271088, "learning_rate": 0.002, "loss": 2.5295, "step": 359920 }, { "epoch": 0.7170605954354201, "grad_norm": 0.16185367107391357, "learning_rate": 0.002, "loss": 2.5555, "step": 359930 }, { "epoch": 0.717080517659059, "grad_norm": 0.1747380644083023, "learning_rate": 0.002, "loss": 2.561, "step": 359940 }, { "epoch": 0.7171004398826979, "grad_norm": 0.1919078528881073, "learning_rate": 0.002, "loss": 2.5528, "step": 359950 }, { "epoch": 0.7171203621063369, "grad_norm": 0.1894952803850174, "learning_rate": 0.002, "loss": 2.5567, "step": 359960 }, { "epoch": 0.7171402843299758, "grad_norm": 0.18623383343219757, "learning_rate": 0.002, "loss": 2.5554, "step": 359970 }, { "epoch": 0.7171602065536147, "grad_norm": 0.16642747819423676, "learning_rate": 0.002, "loss": 2.5441, "step": 359980 }, { "epoch": 0.7171801287772536, "grad_norm": 0.19746524095535278, "learning_rate": 0.002, "loss": 2.5509, "step": 359990 }, { "epoch": 0.7172000510008926, "grad_norm": 0.1494186967611313, "learning_rate": 0.002, "loss": 2.5463, "step": 360000 }, { "epoch": 0.7172199732245315, "grad_norm": 0.18083906173706055, "learning_rate": 0.002, "loss": 2.5528, "step": 360010 }, { "epoch": 0.7172398954481704, "grad_norm": 0.15873359143733978, "learning_rate": 0.002, "loss": 2.5523, "step": 360020 }, { "epoch": 0.7172598176718092, "grad_norm": 0.15006369352340698, "learning_rate": 0.002, "loss": 2.5561, "step": 360030 }, { "epoch": 0.7172797398954481, "grad_norm": 0.1528049260377884, "learning_rate": 0.002, "loss": 2.5602, "step": 360040 }, { "epoch": 0.7172996621190871, "grad_norm": 0.15750159323215485, "learning_rate": 0.002, "loss": 2.5477, "step": 360050 }, { "epoch": 0.717319584342726, "grad_norm": 0.21079616248607635, "learning_rate": 0.002, "loss": 2.5523, "step": 360060 }, { "epoch": 0.7173395065663649, "grad_norm": 0.1671651154756546, "learning_rate": 0.002, "loss": 2.5485, "step": 360070 }, { "epoch": 0.7173594287900038, "grad_norm": 0.15259616076946259, "learning_rate": 0.002, "loss": 2.5494, "step": 360080 }, { "epoch": 0.7173793510136427, "grad_norm": 0.145721435546875, "learning_rate": 0.002, "loss": 2.5589, "step": 360090 }, { "epoch": 0.7173992732372817, "grad_norm": 0.1913938671350479, "learning_rate": 0.002, "loss": 2.5458, "step": 360100 }, { "epoch": 0.7174191954609206, "grad_norm": 0.14893469214439392, "learning_rate": 0.002, "loss": 2.5608, "step": 360110 }, { "epoch": 0.7174391176845595, "grad_norm": 0.16404196619987488, "learning_rate": 0.002, "loss": 2.5636, "step": 360120 }, { "epoch": 0.7174590399081984, "grad_norm": 0.16866299510002136, "learning_rate": 0.002, "loss": 2.5606, "step": 360130 }, { "epoch": 0.7174789621318373, "grad_norm": 0.14022420346736908, "learning_rate": 0.002, "loss": 2.5533, "step": 360140 }, { "epoch": 0.7174988843554763, "grad_norm": 0.15857842564582825, "learning_rate": 0.002, "loss": 2.5527, "step": 360150 }, { "epoch": 0.7175188065791152, "grad_norm": 0.15439681708812714, "learning_rate": 0.002, "loss": 2.546, "step": 360160 }, { "epoch": 0.717538728802754, "grad_norm": 0.16561159491539001, "learning_rate": 0.002, "loss": 2.5462, "step": 360170 }, { "epoch": 0.7175586510263929, "grad_norm": 0.16821478307247162, "learning_rate": 0.002, "loss": 2.5608, "step": 360180 }, { "epoch": 0.7175785732500318, "grad_norm": 0.20620600879192352, "learning_rate": 0.002, "loss": 2.5335, "step": 360190 }, { "epoch": 0.7175984954736708, "grad_norm": 0.16672411561012268, "learning_rate": 0.002, "loss": 2.5572, "step": 360200 }, { "epoch": 0.7176184176973097, "grad_norm": 0.20694531500339508, "learning_rate": 0.002, "loss": 2.559, "step": 360210 }, { "epoch": 0.7176383399209486, "grad_norm": 0.16563986241817474, "learning_rate": 0.002, "loss": 2.543, "step": 360220 }, { "epoch": 0.7176582621445875, "grad_norm": 0.1822270154953003, "learning_rate": 0.002, "loss": 2.559, "step": 360230 }, { "epoch": 0.7176781843682264, "grad_norm": 0.13732919096946716, "learning_rate": 0.002, "loss": 2.5493, "step": 360240 }, { "epoch": 0.7176981065918654, "grad_norm": 0.1632830649614334, "learning_rate": 0.002, "loss": 2.5531, "step": 360250 }, { "epoch": 0.7177180288155043, "grad_norm": 0.14622031152248383, "learning_rate": 0.002, "loss": 2.5639, "step": 360260 }, { "epoch": 0.7177379510391432, "grad_norm": 0.15170876681804657, "learning_rate": 0.002, "loss": 2.5383, "step": 360270 }, { "epoch": 0.7177578732627821, "grad_norm": 0.1769706904888153, "learning_rate": 0.002, "loss": 2.559, "step": 360280 }, { "epoch": 0.7177777954864211, "grad_norm": 0.1963663399219513, "learning_rate": 0.002, "loss": 2.544, "step": 360290 }, { "epoch": 0.71779771771006, "grad_norm": 0.1612057089805603, "learning_rate": 0.002, "loss": 2.5456, "step": 360300 }, { "epoch": 0.7178176399336988, "grad_norm": 0.17026574909687042, "learning_rate": 0.002, "loss": 2.5691, "step": 360310 }, { "epoch": 0.7178375621573377, "grad_norm": 0.13901756703853607, "learning_rate": 0.002, "loss": 2.5492, "step": 360320 }, { "epoch": 0.7178574843809766, "grad_norm": 0.15055780112743378, "learning_rate": 0.002, "loss": 2.5468, "step": 360330 }, { "epoch": 0.7178774066046156, "grad_norm": 0.17118827998638153, "learning_rate": 0.002, "loss": 2.556, "step": 360340 }, { "epoch": 0.7178973288282545, "grad_norm": 0.1711847484111786, "learning_rate": 0.002, "loss": 2.5626, "step": 360350 }, { "epoch": 0.7179172510518934, "grad_norm": 0.17325392365455627, "learning_rate": 0.002, "loss": 2.5541, "step": 360360 }, { "epoch": 0.7179371732755323, "grad_norm": 0.17358116805553436, "learning_rate": 0.002, "loss": 2.5641, "step": 360370 }, { "epoch": 0.7179570954991712, "grad_norm": 0.1406172811985016, "learning_rate": 0.002, "loss": 2.5713, "step": 360380 }, { "epoch": 0.7179770177228102, "grad_norm": 0.16408711671829224, "learning_rate": 0.002, "loss": 2.5435, "step": 360390 }, { "epoch": 0.7179969399464491, "grad_norm": 0.18189193308353424, "learning_rate": 0.002, "loss": 2.5449, "step": 360400 }, { "epoch": 0.718016862170088, "grad_norm": 0.1720493733882904, "learning_rate": 0.002, "loss": 2.5535, "step": 360410 }, { "epoch": 0.7180367843937269, "grad_norm": 0.19752857089042664, "learning_rate": 0.002, "loss": 2.552, "step": 360420 }, { "epoch": 0.7180567066173658, "grad_norm": 0.16842412948608398, "learning_rate": 0.002, "loss": 2.5479, "step": 360430 }, { "epoch": 0.7180766288410048, "grad_norm": 0.15422579646110535, "learning_rate": 0.002, "loss": 2.5703, "step": 360440 }, { "epoch": 0.7180965510646437, "grad_norm": 0.20113436877727509, "learning_rate": 0.002, "loss": 2.5461, "step": 360450 }, { "epoch": 0.7181164732882825, "grad_norm": 0.16276973485946655, "learning_rate": 0.002, "loss": 2.548, "step": 360460 }, { "epoch": 0.7181363955119214, "grad_norm": 0.1601768583059311, "learning_rate": 0.002, "loss": 2.56, "step": 360470 }, { "epoch": 0.7181563177355603, "grad_norm": 0.17962972819805145, "learning_rate": 0.002, "loss": 2.5627, "step": 360480 }, { "epoch": 0.7181762399591993, "grad_norm": 0.1643291860818863, "learning_rate": 0.002, "loss": 2.5588, "step": 360490 }, { "epoch": 0.7181961621828382, "grad_norm": 0.18250194191932678, "learning_rate": 0.002, "loss": 2.5416, "step": 360500 }, { "epoch": 0.7182160844064771, "grad_norm": 0.17437893152236938, "learning_rate": 0.002, "loss": 2.5476, "step": 360510 }, { "epoch": 0.718236006630116, "grad_norm": 0.14800113439559937, "learning_rate": 0.002, "loss": 2.545, "step": 360520 }, { "epoch": 0.7182559288537549, "grad_norm": 0.1317509114742279, "learning_rate": 0.002, "loss": 2.545, "step": 360530 }, { "epoch": 0.7182758510773939, "grad_norm": 0.1510079950094223, "learning_rate": 0.002, "loss": 2.552, "step": 360540 }, { "epoch": 0.7182957733010328, "grad_norm": 0.1835266798734665, "learning_rate": 0.002, "loss": 2.5705, "step": 360550 }, { "epoch": 0.7183156955246717, "grad_norm": 0.1785876750946045, "learning_rate": 0.002, "loss": 2.5639, "step": 360560 }, { "epoch": 0.7183356177483106, "grad_norm": 0.17162556946277618, "learning_rate": 0.002, "loss": 2.5655, "step": 360570 }, { "epoch": 0.7183555399719496, "grad_norm": 0.1819305717945099, "learning_rate": 0.002, "loss": 2.5441, "step": 360580 }, { "epoch": 0.7183754621955885, "grad_norm": 0.16495868563652039, "learning_rate": 0.002, "loss": 2.5593, "step": 360590 }, { "epoch": 0.7183953844192273, "grad_norm": 0.1864491105079651, "learning_rate": 0.002, "loss": 2.5639, "step": 360600 }, { "epoch": 0.7184153066428662, "grad_norm": 0.16140982508659363, "learning_rate": 0.002, "loss": 2.5583, "step": 360610 }, { "epoch": 0.7184352288665051, "grad_norm": 0.16760113835334778, "learning_rate": 0.002, "loss": 2.5554, "step": 360620 }, { "epoch": 0.7184551510901441, "grad_norm": 0.15930119156837463, "learning_rate": 0.002, "loss": 2.5399, "step": 360630 }, { "epoch": 0.718475073313783, "grad_norm": 0.4897890090942383, "learning_rate": 0.002, "loss": 2.5475, "step": 360640 }, { "epoch": 0.7184949955374219, "grad_norm": 0.1771060973405838, "learning_rate": 0.002, "loss": 2.552, "step": 360650 }, { "epoch": 0.7185149177610608, "grad_norm": 0.14461712539196014, "learning_rate": 0.002, "loss": 2.5563, "step": 360660 }, { "epoch": 0.7185348399846997, "grad_norm": 0.1553047001361847, "learning_rate": 0.002, "loss": 2.5524, "step": 360670 }, { "epoch": 0.7185547622083387, "grad_norm": 0.1460757851600647, "learning_rate": 0.002, "loss": 2.5496, "step": 360680 }, { "epoch": 0.7185746844319776, "grad_norm": 0.16043171286582947, "learning_rate": 0.002, "loss": 2.5607, "step": 360690 }, { "epoch": 0.7185946066556165, "grad_norm": 0.17657877504825592, "learning_rate": 0.002, "loss": 2.5688, "step": 360700 }, { "epoch": 0.7186145288792554, "grad_norm": 0.42375603318214417, "learning_rate": 0.002, "loss": 2.5554, "step": 360710 }, { "epoch": 0.7186344511028943, "grad_norm": 0.15779826045036316, "learning_rate": 0.002, "loss": 2.5482, "step": 360720 }, { "epoch": 0.7186543733265333, "grad_norm": 0.17539125680923462, "learning_rate": 0.002, "loss": 2.5629, "step": 360730 }, { "epoch": 0.7186742955501721, "grad_norm": 0.15130414068698883, "learning_rate": 0.002, "loss": 2.5496, "step": 360740 }, { "epoch": 0.718694217773811, "grad_norm": 0.14878736436367035, "learning_rate": 0.002, "loss": 2.5667, "step": 360750 }, { "epoch": 0.7187141399974499, "grad_norm": 0.13909289240837097, "learning_rate": 0.002, "loss": 2.5389, "step": 360760 }, { "epoch": 0.7187340622210888, "grad_norm": 0.16215334832668304, "learning_rate": 0.002, "loss": 2.5526, "step": 360770 }, { "epoch": 0.7187539844447278, "grad_norm": 0.16830754280090332, "learning_rate": 0.002, "loss": 2.5576, "step": 360780 }, { "epoch": 0.7187739066683667, "grad_norm": 0.2102094292640686, "learning_rate": 0.002, "loss": 2.5424, "step": 360790 }, { "epoch": 0.7187938288920056, "grad_norm": 0.1676434427499771, "learning_rate": 0.002, "loss": 2.558, "step": 360800 }, { "epoch": 0.7188137511156445, "grad_norm": 0.1715104579925537, "learning_rate": 0.002, "loss": 2.5315, "step": 360810 }, { "epoch": 0.7188336733392834, "grad_norm": 0.1887713521718979, "learning_rate": 0.002, "loss": 2.5604, "step": 360820 }, { "epoch": 0.7188535955629224, "grad_norm": 0.1847234070301056, "learning_rate": 0.002, "loss": 2.5514, "step": 360830 }, { "epoch": 0.7188735177865613, "grad_norm": 0.1546800434589386, "learning_rate": 0.002, "loss": 2.5682, "step": 360840 }, { "epoch": 0.7188934400102002, "grad_norm": 0.14155735075473785, "learning_rate": 0.002, "loss": 2.5545, "step": 360850 }, { "epoch": 0.7189133622338391, "grad_norm": 0.15357179939746857, "learning_rate": 0.002, "loss": 2.5502, "step": 360860 }, { "epoch": 0.7189332844574781, "grad_norm": 0.1856369823217392, "learning_rate": 0.002, "loss": 2.5542, "step": 360870 }, { "epoch": 0.718953206681117, "grad_norm": 0.1388665735721588, "learning_rate": 0.002, "loss": 2.5578, "step": 360880 }, { "epoch": 0.7189731289047558, "grad_norm": 0.14649686217308044, "learning_rate": 0.002, "loss": 2.5391, "step": 360890 }, { "epoch": 0.7189930511283947, "grad_norm": 0.175508514046669, "learning_rate": 0.002, "loss": 2.5538, "step": 360900 }, { "epoch": 0.7190129733520336, "grad_norm": 0.20035482943058014, "learning_rate": 0.002, "loss": 2.5498, "step": 360910 }, { "epoch": 0.7190328955756726, "grad_norm": 0.15240779519081116, "learning_rate": 0.002, "loss": 2.5517, "step": 360920 }, { "epoch": 0.7190528177993115, "grad_norm": 0.14045019447803497, "learning_rate": 0.002, "loss": 2.5593, "step": 360930 }, { "epoch": 0.7190727400229504, "grad_norm": 0.17273849248886108, "learning_rate": 0.002, "loss": 2.547, "step": 360940 }, { "epoch": 0.7190926622465893, "grad_norm": 0.15338508784770966, "learning_rate": 0.002, "loss": 2.5436, "step": 360950 }, { "epoch": 0.7191125844702282, "grad_norm": 0.19120573997497559, "learning_rate": 0.002, "loss": 2.5577, "step": 360960 }, { "epoch": 0.7191325066938672, "grad_norm": 0.2037070393562317, "learning_rate": 0.002, "loss": 2.5642, "step": 360970 }, { "epoch": 0.7191524289175061, "grad_norm": 0.14291059970855713, "learning_rate": 0.002, "loss": 2.5476, "step": 360980 }, { "epoch": 0.719172351141145, "grad_norm": 0.16450056433677673, "learning_rate": 0.002, "loss": 2.5407, "step": 360990 }, { "epoch": 0.7191922733647839, "grad_norm": 0.13892817497253418, "learning_rate": 0.002, "loss": 2.5447, "step": 361000 }, { "epoch": 0.7192121955884228, "grad_norm": 0.1761041283607483, "learning_rate": 0.002, "loss": 2.5611, "step": 361010 }, { "epoch": 0.7192321178120618, "grad_norm": 0.1760401427745819, "learning_rate": 0.002, "loss": 2.5597, "step": 361020 }, { "epoch": 0.7192520400357006, "grad_norm": 0.19363048672676086, "learning_rate": 0.002, "loss": 2.551, "step": 361030 }, { "epoch": 0.7192719622593395, "grad_norm": 0.1605348289012909, "learning_rate": 0.002, "loss": 2.5416, "step": 361040 }, { "epoch": 0.7192918844829784, "grad_norm": 0.16176067292690277, "learning_rate": 0.002, "loss": 2.5578, "step": 361050 }, { "epoch": 0.7193118067066173, "grad_norm": 0.16563531756401062, "learning_rate": 0.002, "loss": 2.539, "step": 361060 }, { "epoch": 0.7193317289302563, "grad_norm": 0.20154541730880737, "learning_rate": 0.002, "loss": 2.5664, "step": 361070 }, { "epoch": 0.7193516511538952, "grad_norm": 0.14321297407150269, "learning_rate": 0.002, "loss": 2.5556, "step": 361080 }, { "epoch": 0.7193715733775341, "grad_norm": 0.1687166392803192, "learning_rate": 0.002, "loss": 2.5425, "step": 361090 }, { "epoch": 0.719391495601173, "grad_norm": 0.17428146302700043, "learning_rate": 0.002, "loss": 2.5755, "step": 361100 }, { "epoch": 0.7194114178248119, "grad_norm": 0.16971924901008606, "learning_rate": 0.002, "loss": 2.547, "step": 361110 }, { "epoch": 0.7194313400484509, "grad_norm": 0.20559242367744446, "learning_rate": 0.002, "loss": 2.5626, "step": 361120 }, { "epoch": 0.7194512622720898, "grad_norm": 0.16334067285060883, "learning_rate": 0.002, "loss": 2.5469, "step": 361130 }, { "epoch": 0.7194711844957287, "grad_norm": 0.21520523726940155, "learning_rate": 0.002, "loss": 2.541, "step": 361140 }, { "epoch": 0.7194911067193676, "grad_norm": 0.1572893261909485, "learning_rate": 0.002, "loss": 2.5476, "step": 361150 }, { "epoch": 0.7195110289430064, "grad_norm": 0.15629783272743225, "learning_rate": 0.002, "loss": 2.5595, "step": 361160 }, { "epoch": 0.7195309511666454, "grad_norm": 0.2002161145210266, "learning_rate": 0.002, "loss": 2.5494, "step": 361170 }, { "epoch": 0.7195508733902843, "grad_norm": 0.1656271517276764, "learning_rate": 0.002, "loss": 2.5592, "step": 361180 }, { "epoch": 0.7195707956139232, "grad_norm": 0.14508439600467682, "learning_rate": 0.002, "loss": 2.5712, "step": 361190 }, { "epoch": 0.7195907178375621, "grad_norm": 0.20002628862857819, "learning_rate": 0.002, "loss": 2.5613, "step": 361200 }, { "epoch": 0.7196106400612011, "grad_norm": 0.1453678160905838, "learning_rate": 0.002, "loss": 2.5442, "step": 361210 }, { "epoch": 0.71963056228484, "grad_norm": 0.17744477093219757, "learning_rate": 0.002, "loss": 2.5497, "step": 361220 }, { "epoch": 0.7196504845084789, "grad_norm": 0.18360590934753418, "learning_rate": 0.002, "loss": 2.5495, "step": 361230 }, { "epoch": 0.7196704067321178, "grad_norm": 0.159806489944458, "learning_rate": 0.002, "loss": 2.5538, "step": 361240 }, { "epoch": 0.7196903289557567, "grad_norm": 0.16003966331481934, "learning_rate": 0.002, "loss": 2.5547, "step": 361250 }, { "epoch": 0.7197102511793957, "grad_norm": 0.16530372202396393, "learning_rate": 0.002, "loss": 2.5447, "step": 361260 }, { "epoch": 0.7197301734030346, "grad_norm": 0.16005510091781616, "learning_rate": 0.002, "loss": 2.5446, "step": 361270 }, { "epoch": 0.7197500956266735, "grad_norm": 0.15389806032180786, "learning_rate": 0.002, "loss": 2.5618, "step": 361280 }, { "epoch": 0.7197700178503124, "grad_norm": 0.17306840419769287, "learning_rate": 0.002, "loss": 2.5682, "step": 361290 }, { "epoch": 0.7197899400739513, "grad_norm": 0.16422410309314728, "learning_rate": 0.002, "loss": 2.5482, "step": 361300 }, { "epoch": 0.7198098622975903, "grad_norm": 0.16005079448223114, "learning_rate": 0.002, "loss": 2.5272, "step": 361310 }, { "epoch": 0.7198297845212291, "grad_norm": 0.1771281659603119, "learning_rate": 0.002, "loss": 2.5362, "step": 361320 }, { "epoch": 0.719849706744868, "grad_norm": 0.15887361764907837, "learning_rate": 0.002, "loss": 2.5637, "step": 361330 }, { "epoch": 0.7198696289685069, "grad_norm": 0.14014370739459991, "learning_rate": 0.002, "loss": 2.5442, "step": 361340 }, { "epoch": 0.7198895511921458, "grad_norm": 0.15605811774730682, "learning_rate": 0.002, "loss": 2.5552, "step": 361350 }, { "epoch": 0.7199094734157848, "grad_norm": 0.21748897433280945, "learning_rate": 0.002, "loss": 2.5501, "step": 361360 }, { "epoch": 0.7199293956394237, "grad_norm": 0.16922031342983246, "learning_rate": 0.002, "loss": 2.5574, "step": 361370 }, { "epoch": 0.7199493178630626, "grad_norm": 0.17409801483154297, "learning_rate": 0.002, "loss": 2.5648, "step": 361380 }, { "epoch": 0.7199692400867015, "grad_norm": 0.15390270948410034, "learning_rate": 0.002, "loss": 2.5636, "step": 361390 }, { "epoch": 0.7199891623103404, "grad_norm": 0.17216156423091888, "learning_rate": 0.002, "loss": 2.5562, "step": 361400 }, { "epoch": 0.7200090845339794, "grad_norm": 0.18125903606414795, "learning_rate": 0.002, "loss": 2.5601, "step": 361410 }, { "epoch": 0.7200290067576183, "grad_norm": 0.17028333246707916, "learning_rate": 0.002, "loss": 2.5629, "step": 361420 }, { "epoch": 0.7200489289812572, "grad_norm": 0.17738878726959229, "learning_rate": 0.002, "loss": 2.5558, "step": 361430 }, { "epoch": 0.720068851204896, "grad_norm": 0.14237788319587708, "learning_rate": 0.002, "loss": 2.5627, "step": 361440 }, { "epoch": 0.720088773428535, "grad_norm": 0.22023524343967438, "learning_rate": 0.002, "loss": 2.577, "step": 361450 }, { "epoch": 0.720108695652174, "grad_norm": 0.1700102537870407, "learning_rate": 0.002, "loss": 2.5595, "step": 361460 }, { "epoch": 0.7201286178758128, "grad_norm": 0.16480213403701782, "learning_rate": 0.002, "loss": 2.5692, "step": 361470 }, { "epoch": 0.7201485400994517, "grad_norm": 0.1829737275838852, "learning_rate": 0.002, "loss": 2.5614, "step": 361480 }, { "epoch": 0.7201684623230906, "grad_norm": 0.17394855618476868, "learning_rate": 0.002, "loss": 2.5536, "step": 361490 }, { "epoch": 0.7201883845467296, "grad_norm": 0.16476024687290192, "learning_rate": 0.002, "loss": 2.5418, "step": 361500 }, { "epoch": 0.7202083067703685, "grad_norm": 0.2072133868932724, "learning_rate": 0.002, "loss": 2.5533, "step": 361510 }, { "epoch": 0.7202282289940074, "grad_norm": 0.1749485731124878, "learning_rate": 0.002, "loss": 2.5631, "step": 361520 }, { "epoch": 0.7202481512176463, "grad_norm": 0.1718592494726181, "learning_rate": 0.002, "loss": 2.5593, "step": 361530 }, { "epoch": 0.7202680734412852, "grad_norm": 0.13838335871696472, "learning_rate": 0.002, "loss": 2.559, "step": 361540 }, { "epoch": 0.7202879956649242, "grad_norm": 0.15964429080486298, "learning_rate": 0.002, "loss": 2.5555, "step": 361550 }, { "epoch": 0.7203079178885631, "grad_norm": 0.18652893602848053, "learning_rate": 0.002, "loss": 2.5567, "step": 361560 }, { "epoch": 0.720327840112202, "grad_norm": 0.2003404200077057, "learning_rate": 0.002, "loss": 2.5703, "step": 361570 }, { "epoch": 0.7203477623358409, "grad_norm": 0.17178574204444885, "learning_rate": 0.002, "loss": 2.5535, "step": 361580 }, { "epoch": 0.7203676845594797, "grad_norm": 0.1564473658800125, "learning_rate": 0.002, "loss": 2.5476, "step": 361590 }, { "epoch": 0.7203876067831188, "grad_norm": 0.17588382959365845, "learning_rate": 0.002, "loss": 2.5413, "step": 361600 }, { "epoch": 0.7204075290067576, "grad_norm": 0.14823102951049805, "learning_rate": 0.002, "loss": 2.5629, "step": 361610 }, { "epoch": 0.7204274512303965, "grad_norm": 0.1684931069612503, "learning_rate": 0.002, "loss": 2.5372, "step": 361620 }, { "epoch": 0.7204473734540354, "grad_norm": 0.17598697543144226, "learning_rate": 0.002, "loss": 2.5523, "step": 361630 }, { "epoch": 0.7204672956776743, "grad_norm": 0.17345485091209412, "learning_rate": 0.002, "loss": 2.5427, "step": 361640 }, { "epoch": 0.7204872179013133, "grad_norm": 0.17057637870311737, "learning_rate": 0.002, "loss": 2.5487, "step": 361650 }, { "epoch": 0.7205071401249522, "grad_norm": 0.18346786499023438, "learning_rate": 0.002, "loss": 2.5733, "step": 361660 }, { "epoch": 0.7205270623485911, "grad_norm": 0.16467903554439545, "learning_rate": 0.002, "loss": 2.5588, "step": 361670 }, { "epoch": 0.72054698457223, "grad_norm": 0.1630982905626297, "learning_rate": 0.002, "loss": 2.5533, "step": 361680 }, { "epoch": 0.7205669067958689, "grad_norm": 0.1665959507226944, "learning_rate": 0.002, "loss": 2.5512, "step": 361690 }, { "epoch": 0.7205868290195079, "grad_norm": 0.20365053415298462, "learning_rate": 0.002, "loss": 2.5666, "step": 361700 }, { "epoch": 0.7206067512431468, "grad_norm": 0.14004148542881012, "learning_rate": 0.002, "loss": 2.5621, "step": 361710 }, { "epoch": 0.7206266734667857, "grad_norm": 0.18051765859127045, "learning_rate": 0.002, "loss": 2.5583, "step": 361720 }, { "epoch": 0.7206465956904246, "grad_norm": 0.1456153392791748, "learning_rate": 0.002, "loss": 2.5447, "step": 361730 }, { "epoch": 0.7206665179140634, "grad_norm": 0.19099988043308258, "learning_rate": 0.002, "loss": 2.5555, "step": 361740 }, { "epoch": 0.7206864401377024, "grad_norm": 0.14061670005321503, "learning_rate": 0.002, "loss": 2.5583, "step": 361750 }, { "epoch": 0.7207063623613413, "grad_norm": 0.18696627020835876, "learning_rate": 0.002, "loss": 2.542, "step": 361760 }, { "epoch": 0.7207262845849802, "grad_norm": 0.1574745625257492, "learning_rate": 0.002, "loss": 2.5614, "step": 361770 }, { "epoch": 0.7207462068086191, "grad_norm": 0.1658904254436493, "learning_rate": 0.002, "loss": 2.5443, "step": 361780 }, { "epoch": 0.7207661290322581, "grad_norm": 0.1433197557926178, "learning_rate": 0.002, "loss": 2.5552, "step": 361790 }, { "epoch": 0.720786051255897, "grad_norm": 0.1351899355649948, "learning_rate": 0.002, "loss": 2.5482, "step": 361800 }, { "epoch": 0.7208059734795359, "grad_norm": 0.2105812281370163, "learning_rate": 0.002, "loss": 2.5578, "step": 361810 }, { "epoch": 0.7208258957031748, "grad_norm": 0.184633269906044, "learning_rate": 0.002, "loss": 2.564, "step": 361820 }, { "epoch": 0.7208458179268137, "grad_norm": 0.1667567938566208, "learning_rate": 0.002, "loss": 2.5577, "step": 361830 }, { "epoch": 0.7208657401504527, "grad_norm": 0.16590280830860138, "learning_rate": 0.002, "loss": 2.5541, "step": 361840 }, { "epoch": 0.7208856623740916, "grad_norm": 0.1589968502521515, "learning_rate": 0.002, "loss": 2.5507, "step": 361850 }, { "epoch": 0.7209055845977305, "grad_norm": 0.17332801222801208, "learning_rate": 0.002, "loss": 2.5602, "step": 361860 }, { "epoch": 0.7209255068213694, "grad_norm": 0.15374444425106049, "learning_rate": 0.002, "loss": 2.5605, "step": 361870 }, { "epoch": 0.7209454290450082, "grad_norm": 0.16491158306598663, "learning_rate": 0.002, "loss": 2.555, "step": 361880 }, { "epoch": 0.7209653512686472, "grad_norm": 0.15531478822231293, "learning_rate": 0.002, "loss": 2.5726, "step": 361890 }, { "epoch": 0.7209852734922861, "grad_norm": 0.1380760818719864, "learning_rate": 0.002, "loss": 2.555, "step": 361900 }, { "epoch": 0.721005195715925, "grad_norm": 0.1671202927827835, "learning_rate": 0.002, "loss": 2.5422, "step": 361910 }, { "epoch": 0.7210251179395639, "grad_norm": 0.16341105103492737, "learning_rate": 0.002, "loss": 2.5592, "step": 361920 }, { "epoch": 0.7210450401632028, "grad_norm": 0.1641426384449005, "learning_rate": 0.002, "loss": 2.5435, "step": 361930 }, { "epoch": 0.7210649623868418, "grad_norm": 0.16530656814575195, "learning_rate": 0.002, "loss": 2.5567, "step": 361940 }, { "epoch": 0.7210848846104807, "grad_norm": 0.1990266889333725, "learning_rate": 0.002, "loss": 2.5531, "step": 361950 }, { "epoch": 0.7211048068341196, "grad_norm": 0.1482403576374054, "learning_rate": 0.002, "loss": 2.5604, "step": 361960 }, { "epoch": 0.7211247290577585, "grad_norm": 0.17783239483833313, "learning_rate": 0.002, "loss": 2.5574, "step": 361970 }, { "epoch": 0.7211446512813974, "grad_norm": 0.1488378494977951, "learning_rate": 0.002, "loss": 2.5485, "step": 361980 }, { "epoch": 0.7211645735050364, "grad_norm": 0.17872190475463867, "learning_rate": 0.002, "loss": 2.5543, "step": 361990 }, { "epoch": 0.7211844957286753, "grad_norm": 0.184197798371315, "learning_rate": 0.002, "loss": 2.5583, "step": 362000 }, { "epoch": 0.7212044179523142, "grad_norm": 0.13384918868541718, "learning_rate": 0.002, "loss": 2.5576, "step": 362010 }, { "epoch": 0.721224340175953, "grad_norm": 0.16409273445606232, "learning_rate": 0.002, "loss": 2.5529, "step": 362020 }, { "epoch": 0.7212442623995919, "grad_norm": 0.16592629253864288, "learning_rate": 0.002, "loss": 2.5511, "step": 362030 }, { "epoch": 0.7212641846232309, "grad_norm": 0.20447032153606415, "learning_rate": 0.002, "loss": 2.5429, "step": 362040 }, { "epoch": 0.7212841068468698, "grad_norm": 0.19047680497169495, "learning_rate": 0.002, "loss": 2.5558, "step": 362050 }, { "epoch": 0.7213040290705087, "grad_norm": 0.17249122262001038, "learning_rate": 0.002, "loss": 2.5685, "step": 362060 }, { "epoch": 0.7213239512941476, "grad_norm": 0.14779241383075714, "learning_rate": 0.002, "loss": 2.559, "step": 362070 }, { "epoch": 0.7213438735177866, "grad_norm": 0.15533246099948883, "learning_rate": 0.002, "loss": 2.5657, "step": 362080 }, { "epoch": 0.7213637957414255, "grad_norm": 0.1679154485464096, "learning_rate": 0.002, "loss": 2.5527, "step": 362090 }, { "epoch": 0.7213837179650644, "grad_norm": 0.1541193723678589, "learning_rate": 0.002, "loss": 2.5571, "step": 362100 }, { "epoch": 0.7214036401887033, "grad_norm": 0.13816553354263306, "learning_rate": 0.002, "loss": 2.5502, "step": 362110 }, { "epoch": 0.7214235624123422, "grad_norm": 0.1789935976266861, "learning_rate": 0.002, "loss": 2.5573, "step": 362120 }, { "epoch": 0.7214434846359812, "grad_norm": 0.16914071142673492, "learning_rate": 0.002, "loss": 2.5465, "step": 362130 }, { "epoch": 0.7214634068596201, "grad_norm": 0.20612141489982605, "learning_rate": 0.002, "loss": 2.5765, "step": 362140 }, { "epoch": 0.721483329083259, "grad_norm": 0.1663997620344162, "learning_rate": 0.002, "loss": 2.5541, "step": 362150 }, { "epoch": 0.7215032513068979, "grad_norm": 0.15802821516990662, "learning_rate": 0.002, "loss": 2.5385, "step": 362160 }, { "epoch": 0.7215231735305367, "grad_norm": 0.16982124745845795, "learning_rate": 0.002, "loss": 2.5573, "step": 362170 }, { "epoch": 0.7215430957541757, "grad_norm": 0.16488119959831238, "learning_rate": 0.002, "loss": 2.5534, "step": 362180 }, { "epoch": 0.7215630179778146, "grad_norm": 0.19712720811367035, "learning_rate": 0.002, "loss": 2.5411, "step": 362190 }, { "epoch": 0.7215829402014535, "grad_norm": 0.18929997086524963, "learning_rate": 0.002, "loss": 2.556, "step": 362200 }, { "epoch": 0.7216028624250924, "grad_norm": 0.15317806601524353, "learning_rate": 0.002, "loss": 2.5597, "step": 362210 }, { "epoch": 0.7216227846487313, "grad_norm": 0.17320826649665833, "learning_rate": 0.002, "loss": 2.5853, "step": 362220 }, { "epoch": 0.7216427068723703, "grad_norm": 0.13887919485569, "learning_rate": 0.002, "loss": 2.552, "step": 362230 }, { "epoch": 0.7216626290960092, "grad_norm": 0.1644507497549057, "learning_rate": 0.002, "loss": 2.5595, "step": 362240 }, { "epoch": 0.7216825513196481, "grad_norm": 0.1539364755153656, "learning_rate": 0.002, "loss": 2.5813, "step": 362250 }, { "epoch": 0.721702473543287, "grad_norm": 0.16992579400539398, "learning_rate": 0.002, "loss": 2.5551, "step": 362260 }, { "epoch": 0.7217223957669259, "grad_norm": 0.16042955219745636, "learning_rate": 0.002, "loss": 2.5563, "step": 362270 }, { "epoch": 0.7217423179905649, "grad_norm": 0.15508826076984406, "learning_rate": 0.002, "loss": 2.5452, "step": 362280 }, { "epoch": 0.7217622402142038, "grad_norm": 0.16413719952106476, "learning_rate": 0.002, "loss": 2.5593, "step": 362290 }, { "epoch": 0.7217821624378427, "grad_norm": 0.15560109913349152, "learning_rate": 0.002, "loss": 2.5627, "step": 362300 }, { "epoch": 0.7218020846614815, "grad_norm": 0.16707487404346466, "learning_rate": 0.002, "loss": 2.5607, "step": 362310 }, { "epoch": 0.7218220068851204, "grad_norm": 0.17728008329868317, "learning_rate": 0.002, "loss": 2.5693, "step": 362320 }, { "epoch": 0.7218419291087594, "grad_norm": 0.17641031742095947, "learning_rate": 0.002, "loss": 2.5609, "step": 362330 }, { "epoch": 0.7218618513323983, "grad_norm": 0.16275948286056519, "learning_rate": 0.002, "loss": 2.5496, "step": 362340 }, { "epoch": 0.7218817735560372, "grad_norm": 0.1608923375606537, "learning_rate": 0.002, "loss": 2.5481, "step": 362350 }, { "epoch": 0.7219016957796761, "grad_norm": 0.17270447313785553, "learning_rate": 0.002, "loss": 2.5426, "step": 362360 }, { "epoch": 0.7219216180033151, "grad_norm": 0.17689043283462524, "learning_rate": 0.002, "loss": 2.5488, "step": 362370 }, { "epoch": 0.721941540226954, "grad_norm": 0.15074068307876587, "learning_rate": 0.002, "loss": 2.5486, "step": 362380 }, { "epoch": 0.7219614624505929, "grad_norm": 0.13972900807857513, "learning_rate": 0.002, "loss": 2.5473, "step": 362390 }, { "epoch": 0.7219813846742318, "grad_norm": 0.23074871301651, "learning_rate": 0.002, "loss": 2.5579, "step": 362400 }, { "epoch": 0.7220013068978707, "grad_norm": 0.1680905669927597, "learning_rate": 0.002, "loss": 2.5654, "step": 362410 }, { "epoch": 0.7220212291215097, "grad_norm": 0.16124504804611206, "learning_rate": 0.002, "loss": 2.5574, "step": 362420 }, { "epoch": 0.7220411513451486, "grad_norm": 0.14815658330917358, "learning_rate": 0.002, "loss": 2.5639, "step": 362430 }, { "epoch": 0.7220610735687875, "grad_norm": 0.16499295830726624, "learning_rate": 0.002, "loss": 2.5604, "step": 362440 }, { "epoch": 0.7220809957924264, "grad_norm": 0.19306179881095886, "learning_rate": 0.002, "loss": 2.5857, "step": 362450 }, { "epoch": 0.7221009180160652, "grad_norm": 0.15736667811870575, "learning_rate": 0.002, "loss": 2.5515, "step": 362460 }, { "epoch": 0.7221208402397042, "grad_norm": 0.1753985583782196, "learning_rate": 0.002, "loss": 2.56, "step": 362470 }, { "epoch": 0.7221407624633431, "grad_norm": 0.16980580985546112, "learning_rate": 0.002, "loss": 2.5546, "step": 362480 }, { "epoch": 0.722160684686982, "grad_norm": 0.19577713310718536, "learning_rate": 0.002, "loss": 2.5649, "step": 362490 }, { "epoch": 0.7221806069106209, "grad_norm": 0.1567123532295227, "learning_rate": 0.002, "loss": 2.5602, "step": 362500 }, { "epoch": 0.7222005291342598, "grad_norm": 0.19270795583724976, "learning_rate": 0.002, "loss": 2.5715, "step": 362510 }, { "epoch": 0.7222204513578988, "grad_norm": 0.18345795571804047, "learning_rate": 0.002, "loss": 2.5406, "step": 362520 }, { "epoch": 0.7222403735815377, "grad_norm": 0.16810482740402222, "learning_rate": 0.002, "loss": 2.5562, "step": 362530 }, { "epoch": 0.7222602958051766, "grad_norm": 0.16635999083518982, "learning_rate": 0.002, "loss": 2.548, "step": 362540 }, { "epoch": 0.7222802180288155, "grad_norm": 0.17486438155174255, "learning_rate": 0.002, "loss": 2.5485, "step": 362550 }, { "epoch": 0.7223001402524544, "grad_norm": 0.18072432279586792, "learning_rate": 0.002, "loss": 2.556, "step": 362560 }, { "epoch": 0.7223200624760934, "grad_norm": 0.17707346379756927, "learning_rate": 0.002, "loss": 2.5594, "step": 362570 }, { "epoch": 0.7223399846997323, "grad_norm": 0.1541743129491806, "learning_rate": 0.002, "loss": 2.5508, "step": 362580 }, { "epoch": 0.7223599069233712, "grad_norm": 0.18167486786842346, "learning_rate": 0.002, "loss": 2.5482, "step": 362590 }, { "epoch": 0.72237982914701, "grad_norm": 0.19798676669597626, "learning_rate": 0.002, "loss": 2.5655, "step": 362600 }, { "epoch": 0.7223997513706489, "grad_norm": 0.14126135408878326, "learning_rate": 0.002, "loss": 2.5548, "step": 362610 }, { "epoch": 0.7224196735942879, "grad_norm": 0.1625860035419464, "learning_rate": 0.002, "loss": 2.5528, "step": 362620 }, { "epoch": 0.7224395958179268, "grad_norm": 0.18134063482284546, "learning_rate": 0.002, "loss": 2.5603, "step": 362630 }, { "epoch": 0.7224595180415657, "grad_norm": 0.19169995188713074, "learning_rate": 0.002, "loss": 2.5549, "step": 362640 }, { "epoch": 0.7224794402652046, "grad_norm": 0.16624745726585388, "learning_rate": 0.002, "loss": 2.5435, "step": 362650 }, { "epoch": 0.7224993624888435, "grad_norm": 0.1678922176361084, "learning_rate": 0.002, "loss": 2.559, "step": 362660 }, { "epoch": 0.7225192847124825, "grad_norm": 0.16921831667423248, "learning_rate": 0.002, "loss": 2.5631, "step": 362670 }, { "epoch": 0.7225392069361214, "grad_norm": 0.15729562938213348, "learning_rate": 0.002, "loss": 2.5376, "step": 362680 }, { "epoch": 0.7225591291597603, "grad_norm": 0.21924962103366852, "learning_rate": 0.002, "loss": 2.5456, "step": 362690 }, { "epoch": 0.7225790513833992, "grad_norm": 0.16733697056770325, "learning_rate": 0.002, "loss": 2.5341, "step": 362700 }, { "epoch": 0.7225989736070382, "grad_norm": 0.1541808396577835, "learning_rate": 0.002, "loss": 2.5508, "step": 362710 }, { "epoch": 0.7226188958306771, "grad_norm": 0.16661155223846436, "learning_rate": 0.002, "loss": 2.5586, "step": 362720 }, { "epoch": 0.722638818054316, "grad_norm": 0.1575106829404831, "learning_rate": 0.002, "loss": 2.5567, "step": 362730 }, { "epoch": 0.7226587402779548, "grad_norm": 0.1635090559720993, "learning_rate": 0.002, "loss": 2.5741, "step": 362740 }, { "epoch": 0.7226786625015937, "grad_norm": 0.1585700362920761, "learning_rate": 0.002, "loss": 2.5463, "step": 362750 }, { "epoch": 0.7226985847252327, "grad_norm": 0.15186648070812225, "learning_rate": 0.002, "loss": 2.5427, "step": 362760 }, { "epoch": 0.7227185069488716, "grad_norm": 0.21884553134441376, "learning_rate": 0.002, "loss": 2.547, "step": 362770 }, { "epoch": 0.7227384291725105, "grad_norm": 0.14828762412071228, "learning_rate": 0.002, "loss": 2.542, "step": 362780 }, { "epoch": 0.7227583513961494, "grad_norm": 0.15786078572273254, "learning_rate": 0.002, "loss": 2.5517, "step": 362790 }, { "epoch": 0.7227782736197883, "grad_norm": 0.1680930256843567, "learning_rate": 0.002, "loss": 2.551, "step": 362800 }, { "epoch": 0.7227981958434273, "grad_norm": 0.1997540146112442, "learning_rate": 0.002, "loss": 2.5502, "step": 362810 }, { "epoch": 0.7228181180670662, "grad_norm": 0.15430745482444763, "learning_rate": 0.002, "loss": 2.5482, "step": 362820 }, { "epoch": 0.7228380402907051, "grad_norm": 0.15791632235050201, "learning_rate": 0.002, "loss": 2.5536, "step": 362830 }, { "epoch": 0.722857962514344, "grad_norm": 0.17665541172027588, "learning_rate": 0.002, "loss": 2.5418, "step": 362840 }, { "epoch": 0.7228778847379829, "grad_norm": 0.1446801722049713, "learning_rate": 0.002, "loss": 2.5585, "step": 362850 }, { "epoch": 0.7228978069616219, "grad_norm": 0.15911364555358887, "learning_rate": 0.002, "loss": 2.551, "step": 362860 }, { "epoch": 0.7229177291852608, "grad_norm": 0.15665659308433533, "learning_rate": 0.002, "loss": 2.5456, "step": 362870 }, { "epoch": 0.7229376514088997, "grad_norm": 0.16432270407676697, "learning_rate": 0.002, "loss": 2.5527, "step": 362880 }, { "epoch": 0.7229575736325385, "grad_norm": 0.1552523821592331, "learning_rate": 0.002, "loss": 2.5523, "step": 362890 }, { "epoch": 0.7229774958561774, "grad_norm": 0.1714528501033783, "learning_rate": 0.002, "loss": 2.5515, "step": 362900 }, { "epoch": 0.7229974180798164, "grad_norm": 0.16639256477355957, "learning_rate": 0.002, "loss": 2.5497, "step": 362910 }, { "epoch": 0.7230173403034553, "grad_norm": 0.21952834725379944, "learning_rate": 0.002, "loss": 2.5509, "step": 362920 }, { "epoch": 0.7230372625270942, "grad_norm": 0.14549314975738525, "learning_rate": 0.002, "loss": 2.5589, "step": 362930 }, { "epoch": 0.7230571847507331, "grad_norm": 0.1632377803325653, "learning_rate": 0.002, "loss": 2.5559, "step": 362940 }, { "epoch": 0.723077106974372, "grad_norm": 0.18611693382263184, "learning_rate": 0.002, "loss": 2.557, "step": 362950 }, { "epoch": 0.723097029198011, "grad_norm": 0.1723397821187973, "learning_rate": 0.002, "loss": 2.5563, "step": 362960 }, { "epoch": 0.7231169514216499, "grad_norm": 0.18912145495414734, "learning_rate": 0.002, "loss": 2.5547, "step": 362970 }, { "epoch": 0.7231368736452888, "grad_norm": 0.1514587551355362, "learning_rate": 0.002, "loss": 2.5613, "step": 362980 }, { "epoch": 0.7231567958689277, "grad_norm": 0.1483612209558487, "learning_rate": 0.002, "loss": 2.5659, "step": 362990 }, { "epoch": 0.7231767180925667, "grad_norm": 0.17651750147342682, "learning_rate": 0.002, "loss": 2.5604, "step": 363000 }, { "epoch": 0.7231966403162056, "grad_norm": 0.15452446043491364, "learning_rate": 0.002, "loss": 2.5484, "step": 363010 }, { "epoch": 0.7232165625398445, "grad_norm": 0.17339758574962616, "learning_rate": 0.002, "loss": 2.5713, "step": 363020 }, { "epoch": 0.7232364847634833, "grad_norm": 0.21223188936710358, "learning_rate": 0.002, "loss": 2.5608, "step": 363030 }, { "epoch": 0.7232564069871222, "grad_norm": 0.15699166059494019, "learning_rate": 0.002, "loss": 2.5705, "step": 363040 }, { "epoch": 0.7232763292107612, "grad_norm": 0.1771329790353775, "learning_rate": 0.002, "loss": 2.5323, "step": 363050 }, { "epoch": 0.7232962514344001, "grad_norm": 0.17234742641448975, "learning_rate": 0.002, "loss": 2.5567, "step": 363060 }, { "epoch": 0.723316173658039, "grad_norm": 0.1514551043510437, "learning_rate": 0.002, "loss": 2.5579, "step": 363070 }, { "epoch": 0.7233360958816779, "grad_norm": 0.19123682379722595, "learning_rate": 0.002, "loss": 2.5623, "step": 363080 }, { "epoch": 0.7233560181053168, "grad_norm": 0.19378484785556793, "learning_rate": 0.002, "loss": 2.5522, "step": 363090 }, { "epoch": 0.7233759403289558, "grad_norm": 0.1485036462545395, "learning_rate": 0.002, "loss": 2.557, "step": 363100 }, { "epoch": 0.7233958625525947, "grad_norm": 0.16856934130191803, "learning_rate": 0.002, "loss": 2.5466, "step": 363110 }, { "epoch": 0.7234157847762336, "grad_norm": 0.14116251468658447, "learning_rate": 0.002, "loss": 2.559, "step": 363120 }, { "epoch": 0.7234357069998725, "grad_norm": 0.1836063116788864, "learning_rate": 0.002, "loss": 2.5525, "step": 363130 }, { "epoch": 0.7234556292235114, "grad_norm": 0.18727850914001465, "learning_rate": 0.002, "loss": 2.551, "step": 363140 }, { "epoch": 0.7234755514471504, "grad_norm": 0.1869572103023529, "learning_rate": 0.002, "loss": 2.5625, "step": 363150 }, { "epoch": 0.7234954736707893, "grad_norm": 0.16672521829605103, "learning_rate": 0.002, "loss": 2.5421, "step": 363160 }, { "epoch": 0.7235153958944281, "grad_norm": 0.16869086027145386, "learning_rate": 0.002, "loss": 2.5487, "step": 363170 }, { "epoch": 0.723535318118067, "grad_norm": 0.15630871057510376, "learning_rate": 0.002, "loss": 2.5553, "step": 363180 }, { "epoch": 0.7235552403417059, "grad_norm": 0.17189538478851318, "learning_rate": 0.002, "loss": 2.5502, "step": 363190 }, { "epoch": 0.7235751625653449, "grad_norm": 0.19418762624263763, "learning_rate": 0.002, "loss": 2.5752, "step": 363200 }, { "epoch": 0.7235950847889838, "grad_norm": 0.16098272800445557, "learning_rate": 0.002, "loss": 2.5515, "step": 363210 }, { "epoch": 0.7236150070126227, "grad_norm": 0.1566007435321808, "learning_rate": 0.002, "loss": 2.5493, "step": 363220 }, { "epoch": 0.7236349292362616, "grad_norm": 0.18009255826473236, "learning_rate": 0.002, "loss": 2.58, "step": 363230 }, { "epoch": 0.7236548514599005, "grad_norm": 0.15500342845916748, "learning_rate": 0.002, "loss": 2.5655, "step": 363240 }, { "epoch": 0.7236747736835395, "grad_norm": 0.1798877865076065, "learning_rate": 0.002, "loss": 2.5545, "step": 363250 }, { "epoch": 0.7236946959071784, "grad_norm": 0.161727637052536, "learning_rate": 0.002, "loss": 2.5435, "step": 363260 }, { "epoch": 0.7237146181308173, "grad_norm": 0.16560593247413635, "learning_rate": 0.002, "loss": 2.5709, "step": 363270 }, { "epoch": 0.7237345403544562, "grad_norm": 0.21120457351207733, "learning_rate": 0.002, "loss": 2.5645, "step": 363280 }, { "epoch": 0.7237544625780952, "grad_norm": 0.1892206072807312, "learning_rate": 0.002, "loss": 2.5738, "step": 363290 }, { "epoch": 0.7237743848017341, "grad_norm": 0.16336911916732788, "learning_rate": 0.002, "loss": 2.5568, "step": 363300 }, { "epoch": 0.723794307025373, "grad_norm": 0.17342613637447357, "learning_rate": 0.002, "loss": 2.5586, "step": 363310 }, { "epoch": 0.7238142292490118, "grad_norm": 0.1766633689403534, "learning_rate": 0.002, "loss": 2.552, "step": 363320 }, { "epoch": 0.7238341514726507, "grad_norm": 0.13491763174533844, "learning_rate": 0.002, "loss": 2.563, "step": 363330 }, { "epoch": 0.7238540736962897, "grad_norm": 0.17143338918685913, "learning_rate": 0.002, "loss": 2.5422, "step": 363340 }, { "epoch": 0.7238739959199286, "grad_norm": 0.16327838599681854, "learning_rate": 0.002, "loss": 2.5488, "step": 363350 }, { "epoch": 0.7238939181435675, "grad_norm": 0.173413947224617, "learning_rate": 0.002, "loss": 2.5412, "step": 363360 }, { "epoch": 0.7239138403672064, "grad_norm": 0.18130581080913544, "learning_rate": 0.002, "loss": 2.5605, "step": 363370 }, { "epoch": 0.7239337625908453, "grad_norm": 0.14605242013931274, "learning_rate": 0.002, "loss": 2.5674, "step": 363380 }, { "epoch": 0.7239536848144843, "grad_norm": 0.1627468764781952, "learning_rate": 0.002, "loss": 2.5419, "step": 363390 }, { "epoch": 0.7239736070381232, "grad_norm": 0.18728479743003845, "learning_rate": 0.002, "loss": 2.543, "step": 363400 }, { "epoch": 0.7239935292617621, "grad_norm": 0.1809224784374237, "learning_rate": 0.002, "loss": 2.5436, "step": 363410 }, { "epoch": 0.724013451485401, "grad_norm": 0.150976300239563, "learning_rate": 0.002, "loss": 2.5574, "step": 363420 }, { "epoch": 0.7240333737090399, "grad_norm": 0.1548086255788803, "learning_rate": 0.002, "loss": 2.5466, "step": 363430 }, { "epoch": 0.7240532959326789, "grad_norm": 0.18571782112121582, "learning_rate": 0.002, "loss": 2.5635, "step": 363440 }, { "epoch": 0.7240732181563178, "grad_norm": 0.18709583580493927, "learning_rate": 0.002, "loss": 2.5615, "step": 363450 }, { "epoch": 0.7240931403799566, "grad_norm": 0.16292667388916016, "learning_rate": 0.002, "loss": 2.555, "step": 363460 }, { "epoch": 0.7241130626035955, "grad_norm": 0.16468751430511475, "learning_rate": 0.002, "loss": 2.568, "step": 363470 }, { "epoch": 0.7241329848272344, "grad_norm": 0.14375784993171692, "learning_rate": 0.002, "loss": 2.5515, "step": 363480 }, { "epoch": 0.7241529070508734, "grad_norm": 0.17874352633953094, "learning_rate": 0.002, "loss": 2.5492, "step": 363490 }, { "epoch": 0.7241728292745123, "grad_norm": 0.17477375268936157, "learning_rate": 0.002, "loss": 2.5361, "step": 363500 }, { "epoch": 0.7241927514981512, "grad_norm": 0.1913316398859024, "learning_rate": 0.002, "loss": 2.543, "step": 363510 }, { "epoch": 0.7242126737217901, "grad_norm": 0.1678164154291153, "learning_rate": 0.002, "loss": 2.5427, "step": 363520 }, { "epoch": 0.724232595945429, "grad_norm": 0.13872049748897552, "learning_rate": 0.002, "loss": 2.5545, "step": 363530 }, { "epoch": 0.724252518169068, "grad_norm": 0.17440973222255707, "learning_rate": 0.002, "loss": 2.5564, "step": 363540 }, { "epoch": 0.7242724403927069, "grad_norm": 0.1692911833524704, "learning_rate": 0.002, "loss": 2.5513, "step": 363550 }, { "epoch": 0.7242923626163458, "grad_norm": 0.16665154695510864, "learning_rate": 0.002, "loss": 2.5408, "step": 363560 }, { "epoch": 0.7243122848399847, "grad_norm": 0.18521054089069366, "learning_rate": 0.002, "loss": 2.5518, "step": 363570 }, { "epoch": 0.7243322070636237, "grad_norm": 0.17331601679325104, "learning_rate": 0.002, "loss": 2.5695, "step": 363580 }, { "epoch": 0.7243521292872626, "grad_norm": 0.1884414106607437, "learning_rate": 0.002, "loss": 2.5513, "step": 363590 }, { "epoch": 0.7243720515109014, "grad_norm": 0.1407487839460373, "learning_rate": 0.002, "loss": 2.5439, "step": 363600 }, { "epoch": 0.7243919737345403, "grad_norm": 0.15502332150936127, "learning_rate": 0.002, "loss": 2.5475, "step": 363610 }, { "epoch": 0.7244118959581792, "grad_norm": 0.13596978783607483, "learning_rate": 0.002, "loss": 2.5483, "step": 363620 }, { "epoch": 0.7244318181818182, "grad_norm": 0.15359529852867126, "learning_rate": 0.002, "loss": 2.5523, "step": 363630 }, { "epoch": 0.7244517404054571, "grad_norm": 0.23996581137180328, "learning_rate": 0.002, "loss": 2.5747, "step": 363640 }, { "epoch": 0.724471662629096, "grad_norm": 0.15502603352069855, "learning_rate": 0.002, "loss": 2.5632, "step": 363650 }, { "epoch": 0.7244915848527349, "grad_norm": 0.1667986810207367, "learning_rate": 0.002, "loss": 2.547, "step": 363660 }, { "epoch": 0.7245115070763738, "grad_norm": 0.13499198853969574, "learning_rate": 0.002, "loss": 2.5457, "step": 363670 }, { "epoch": 0.7245314293000128, "grad_norm": 0.17867036163806915, "learning_rate": 0.002, "loss": 2.5574, "step": 363680 }, { "epoch": 0.7245513515236517, "grad_norm": 0.16025027632713318, "learning_rate": 0.002, "loss": 2.5429, "step": 363690 }, { "epoch": 0.7245712737472906, "grad_norm": 0.16520507633686066, "learning_rate": 0.002, "loss": 2.5573, "step": 363700 }, { "epoch": 0.7245911959709295, "grad_norm": 0.15493397414684296, "learning_rate": 0.002, "loss": 2.5476, "step": 363710 }, { "epoch": 0.7246111181945684, "grad_norm": 0.172109916806221, "learning_rate": 0.002, "loss": 2.5533, "step": 363720 }, { "epoch": 0.7246310404182074, "grad_norm": 0.16630983352661133, "learning_rate": 0.002, "loss": 2.5487, "step": 363730 }, { "epoch": 0.7246509626418463, "grad_norm": 0.19038636982440948, "learning_rate": 0.002, "loss": 2.5502, "step": 363740 }, { "epoch": 0.7246708848654851, "grad_norm": 0.15419089794158936, "learning_rate": 0.002, "loss": 2.5703, "step": 363750 }, { "epoch": 0.724690807089124, "grad_norm": 0.13419736921787262, "learning_rate": 0.002, "loss": 2.5445, "step": 363760 }, { "epoch": 0.7247107293127629, "grad_norm": 0.18317550420761108, "learning_rate": 0.002, "loss": 2.5552, "step": 363770 }, { "epoch": 0.7247306515364019, "grad_norm": 0.16474443674087524, "learning_rate": 0.002, "loss": 2.5542, "step": 363780 }, { "epoch": 0.7247505737600408, "grad_norm": 0.18069308996200562, "learning_rate": 0.002, "loss": 2.5636, "step": 363790 }, { "epoch": 0.7247704959836797, "grad_norm": 0.1747128814458847, "learning_rate": 0.002, "loss": 2.5517, "step": 363800 }, { "epoch": 0.7247904182073186, "grad_norm": 0.16767965257167816, "learning_rate": 0.002, "loss": 2.5728, "step": 363810 }, { "epoch": 0.7248103404309575, "grad_norm": 0.19000472128391266, "learning_rate": 0.002, "loss": 2.558, "step": 363820 }, { "epoch": 0.7248302626545965, "grad_norm": 0.15623462200164795, "learning_rate": 0.002, "loss": 2.5587, "step": 363830 }, { "epoch": 0.7248501848782354, "grad_norm": 0.16798347234725952, "learning_rate": 0.002, "loss": 2.5491, "step": 363840 }, { "epoch": 0.7248701071018743, "grad_norm": 0.1577479988336563, "learning_rate": 0.002, "loss": 2.5497, "step": 363850 }, { "epoch": 0.7248900293255132, "grad_norm": 0.2078857421875, "learning_rate": 0.002, "loss": 2.5493, "step": 363860 }, { "epoch": 0.7249099515491522, "grad_norm": 0.1608673632144928, "learning_rate": 0.002, "loss": 2.5566, "step": 363870 }, { "epoch": 0.7249298737727911, "grad_norm": 0.14254264533519745, "learning_rate": 0.002, "loss": 2.5703, "step": 363880 }, { "epoch": 0.72494979599643, "grad_norm": 0.1604350060224533, "learning_rate": 0.002, "loss": 2.5577, "step": 363890 }, { "epoch": 0.7249697182200688, "grad_norm": 0.16271357238292694, "learning_rate": 0.002, "loss": 2.5556, "step": 363900 }, { "epoch": 0.7249896404437077, "grad_norm": 0.15711528062820435, "learning_rate": 0.002, "loss": 2.5619, "step": 363910 }, { "epoch": 0.7250095626673467, "grad_norm": 0.17551273107528687, "learning_rate": 0.002, "loss": 2.5383, "step": 363920 }, { "epoch": 0.7250294848909856, "grad_norm": 0.16292811930179596, "learning_rate": 0.002, "loss": 2.5505, "step": 363930 }, { "epoch": 0.7250494071146245, "grad_norm": 0.17259903252124786, "learning_rate": 0.002, "loss": 2.558, "step": 363940 }, { "epoch": 0.7250693293382634, "grad_norm": 0.21169917285442352, "learning_rate": 0.002, "loss": 2.5556, "step": 363950 }, { "epoch": 0.7250892515619023, "grad_norm": 0.15589220821857452, "learning_rate": 0.002, "loss": 2.5361, "step": 363960 }, { "epoch": 0.7251091737855413, "grad_norm": 0.14542657136917114, "learning_rate": 0.002, "loss": 2.53, "step": 363970 }, { "epoch": 0.7251290960091802, "grad_norm": 0.16299766302108765, "learning_rate": 0.002, "loss": 2.5459, "step": 363980 }, { "epoch": 0.7251490182328191, "grad_norm": 0.18717098236083984, "learning_rate": 0.002, "loss": 2.5638, "step": 363990 }, { "epoch": 0.725168940456458, "grad_norm": 0.162079319357872, "learning_rate": 0.002, "loss": 2.5483, "step": 364000 }, { "epoch": 0.7251888626800969, "grad_norm": 0.14017757773399353, "learning_rate": 0.002, "loss": 2.5681, "step": 364010 }, { "epoch": 0.7252087849037359, "grad_norm": 0.15884941816329956, "learning_rate": 0.002, "loss": 2.5573, "step": 364020 }, { "epoch": 0.7252287071273747, "grad_norm": 0.16354642808437347, "learning_rate": 0.002, "loss": 2.5629, "step": 364030 }, { "epoch": 0.7252486293510136, "grad_norm": 0.14543074369430542, "learning_rate": 0.002, "loss": 2.5468, "step": 364040 }, { "epoch": 0.7252685515746525, "grad_norm": 0.16652901470661163, "learning_rate": 0.002, "loss": 2.535, "step": 364050 }, { "epoch": 0.7252884737982914, "grad_norm": 0.20322473347187042, "learning_rate": 0.002, "loss": 2.5509, "step": 364060 }, { "epoch": 0.7253083960219304, "grad_norm": 0.1775873601436615, "learning_rate": 0.002, "loss": 2.5648, "step": 364070 }, { "epoch": 0.7253283182455693, "grad_norm": 0.16724301874637604, "learning_rate": 0.002, "loss": 2.5431, "step": 364080 }, { "epoch": 0.7253482404692082, "grad_norm": 0.1455642282962799, "learning_rate": 0.002, "loss": 2.5366, "step": 364090 }, { "epoch": 0.7253681626928471, "grad_norm": 0.17472460865974426, "learning_rate": 0.002, "loss": 2.5576, "step": 364100 }, { "epoch": 0.725388084916486, "grad_norm": 0.1713751256465912, "learning_rate": 0.002, "loss": 2.5621, "step": 364110 }, { "epoch": 0.725408007140125, "grad_norm": 0.16315655410289764, "learning_rate": 0.002, "loss": 2.5471, "step": 364120 }, { "epoch": 0.7254279293637639, "grad_norm": 0.1378389149904251, "learning_rate": 0.002, "loss": 2.5588, "step": 364130 }, { "epoch": 0.7254478515874028, "grad_norm": 0.17318691313266754, "learning_rate": 0.002, "loss": 2.5534, "step": 364140 }, { "epoch": 0.7254677738110417, "grad_norm": 0.14427053928375244, "learning_rate": 0.002, "loss": 2.5636, "step": 364150 }, { "epoch": 0.7254876960346806, "grad_norm": 0.1861361563205719, "learning_rate": 0.002, "loss": 2.5586, "step": 364160 }, { "epoch": 0.7255076182583196, "grad_norm": 0.1794152706861496, "learning_rate": 0.002, "loss": 2.5426, "step": 364170 }, { "epoch": 0.7255275404819584, "grad_norm": 0.15901018679141998, "learning_rate": 0.002, "loss": 2.5483, "step": 364180 }, { "epoch": 0.7255474627055973, "grad_norm": 0.16114071011543274, "learning_rate": 0.002, "loss": 2.5658, "step": 364190 }, { "epoch": 0.7255673849292362, "grad_norm": 0.18624641001224518, "learning_rate": 0.002, "loss": 2.5569, "step": 364200 }, { "epoch": 0.7255873071528752, "grad_norm": 0.1656372845172882, "learning_rate": 0.002, "loss": 2.5607, "step": 364210 }, { "epoch": 0.7256072293765141, "grad_norm": 0.16167138516902924, "learning_rate": 0.002, "loss": 2.5415, "step": 364220 }, { "epoch": 0.725627151600153, "grad_norm": 0.1427515149116516, "learning_rate": 0.002, "loss": 2.5469, "step": 364230 }, { "epoch": 0.7256470738237919, "grad_norm": 0.16723762452602386, "learning_rate": 0.002, "loss": 2.5735, "step": 364240 }, { "epoch": 0.7256669960474308, "grad_norm": 0.15638014674186707, "learning_rate": 0.002, "loss": 2.5442, "step": 364250 }, { "epoch": 0.7256869182710698, "grad_norm": 0.15456415712833405, "learning_rate": 0.002, "loss": 2.558, "step": 364260 }, { "epoch": 0.7257068404947087, "grad_norm": 0.16546384990215302, "learning_rate": 0.002, "loss": 2.5409, "step": 364270 }, { "epoch": 0.7257267627183476, "grad_norm": 0.17461137473583221, "learning_rate": 0.002, "loss": 2.5697, "step": 364280 }, { "epoch": 0.7257466849419865, "grad_norm": 0.14511792361736298, "learning_rate": 0.002, "loss": 2.5673, "step": 364290 }, { "epoch": 0.7257666071656254, "grad_norm": 0.19086794555187225, "learning_rate": 0.002, "loss": 2.5725, "step": 364300 }, { "epoch": 0.7257865293892644, "grad_norm": 0.18921959400177002, "learning_rate": 0.002, "loss": 2.5503, "step": 364310 }, { "epoch": 0.7258064516129032, "grad_norm": 0.19742326438426971, "learning_rate": 0.002, "loss": 2.5665, "step": 364320 }, { "epoch": 0.7258263738365421, "grad_norm": 0.16568998992443085, "learning_rate": 0.002, "loss": 2.5476, "step": 364330 }, { "epoch": 0.725846296060181, "grad_norm": 0.17952455580234528, "learning_rate": 0.002, "loss": 2.5505, "step": 364340 }, { "epoch": 0.7258662182838199, "grad_norm": 0.16821129620075226, "learning_rate": 0.002, "loss": 2.5512, "step": 364350 }, { "epoch": 0.7258861405074589, "grad_norm": 0.16888192296028137, "learning_rate": 0.002, "loss": 2.5519, "step": 364360 }, { "epoch": 0.7259060627310978, "grad_norm": 0.24077855050563812, "learning_rate": 0.002, "loss": 2.5756, "step": 364370 }, { "epoch": 0.7259259849547367, "grad_norm": 0.1615356206893921, "learning_rate": 0.002, "loss": 2.5833, "step": 364380 }, { "epoch": 0.7259459071783756, "grad_norm": 0.159219428896904, "learning_rate": 0.002, "loss": 2.5535, "step": 364390 }, { "epoch": 0.7259658294020145, "grad_norm": 0.1731986701488495, "learning_rate": 0.002, "loss": 2.5341, "step": 364400 }, { "epoch": 0.7259857516256535, "grad_norm": 0.178147554397583, "learning_rate": 0.002, "loss": 2.5448, "step": 364410 }, { "epoch": 0.7260056738492924, "grad_norm": 0.13772052526474, "learning_rate": 0.002, "loss": 2.5529, "step": 364420 }, { "epoch": 0.7260255960729313, "grad_norm": 0.13708090782165527, "learning_rate": 0.002, "loss": 2.561, "step": 364430 }, { "epoch": 0.7260455182965702, "grad_norm": 0.16296885907649994, "learning_rate": 0.002, "loss": 2.5488, "step": 364440 }, { "epoch": 0.726065440520209, "grad_norm": 0.1416803002357483, "learning_rate": 0.002, "loss": 2.5566, "step": 364450 }, { "epoch": 0.726085362743848, "grad_norm": 0.1426842361688614, "learning_rate": 0.002, "loss": 2.5429, "step": 364460 }, { "epoch": 0.7261052849674869, "grad_norm": 0.16552649438381195, "learning_rate": 0.002, "loss": 2.5572, "step": 364470 }, { "epoch": 0.7261252071911258, "grad_norm": 0.205797016620636, "learning_rate": 0.002, "loss": 2.556, "step": 364480 }, { "epoch": 0.7261451294147647, "grad_norm": 0.1526312530040741, "learning_rate": 0.002, "loss": 2.5513, "step": 364490 }, { "epoch": 0.7261650516384037, "grad_norm": 0.15972769260406494, "learning_rate": 0.002, "loss": 2.5571, "step": 364500 }, { "epoch": 0.7261849738620426, "grad_norm": 0.1877029687166214, "learning_rate": 0.002, "loss": 2.5563, "step": 364510 }, { "epoch": 0.7262048960856815, "grad_norm": 0.19955086708068848, "learning_rate": 0.002, "loss": 2.5448, "step": 364520 }, { "epoch": 0.7262248183093204, "grad_norm": 0.13676100969314575, "learning_rate": 0.002, "loss": 2.5361, "step": 364530 }, { "epoch": 0.7262447405329593, "grad_norm": 0.19548776745796204, "learning_rate": 0.002, "loss": 2.553, "step": 364540 }, { "epoch": 0.7262646627565983, "grad_norm": 0.17076338827610016, "learning_rate": 0.002, "loss": 2.5646, "step": 364550 }, { "epoch": 0.7262845849802372, "grad_norm": 0.15126948058605194, "learning_rate": 0.002, "loss": 2.5659, "step": 364560 }, { "epoch": 0.7263045072038761, "grad_norm": 0.16657915711402893, "learning_rate": 0.002, "loss": 2.5492, "step": 364570 }, { "epoch": 0.726324429427515, "grad_norm": 0.14989472925662994, "learning_rate": 0.002, "loss": 2.5469, "step": 364580 }, { "epoch": 0.7263443516511539, "grad_norm": 0.15568816661834717, "learning_rate": 0.002, "loss": 2.5715, "step": 364590 }, { "epoch": 0.7263642738747929, "grad_norm": 0.13614784181118011, "learning_rate": 0.002, "loss": 2.5659, "step": 364600 }, { "epoch": 0.7263841960984317, "grad_norm": 0.1754833161830902, "learning_rate": 0.002, "loss": 2.5508, "step": 364610 }, { "epoch": 0.7264041183220706, "grad_norm": 0.14416015148162842, "learning_rate": 0.002, "loss": 2.5569, "step": 364620 }, { "epoch": 0.7264240405457095, "grad_norm": 0.1495923399925232, "learning_rate": 0.002, "loss": 2.5575, "step": 364630 }, { "epoch": 0.7264439627693484, "grad_norm": 0.17380832135677338, "learning_rate": 0.002, "loss": 2.555, "step": 364640 }, { "epoch": 0.7264638849929874, "grad_norm": 0.20252326130867004, "learning_rate": 0.002, "loss": 2.552, "step": 364650 }, { "epoch": 0.7264838072166263, "grad_norm": 0.16809053719043732, "learning_rate": 0.002, "loss": 2.5518, "step": 364660 }, { "epoch": 0.7265037294402652, "grad_norm": 0.15036144852638245, "learning_rate": 0.002, "loss": 2.546, "step": 364670 }, { "epoch": 0.7265236516639041, "grad_norm": 0.1606331616640091, "learning_rate": 0.002, "loss": 2.5591, "step": 364680 }, { "epoch": 0.726543573887543, "grad_norm": 0.16792084276676178, "learning_rate": 0.002, "loss": 2.5679, "step": 364690 }, { "epoch": 0.726563496111182, "grad_norm": 0.14312744140625, "learning_rate": 0.002, "loss": 2.5653, "step": 364700 }, { "epoch": 0.7265834183348209, "grad_norm": 0.18917986750602722, "learning_rate": 0.002, "loss": 2.5696, "step": 364710 }, { "epoch": 0.7266033405584598, "grad_norm": 0.16457195580005646, "learning_rate": 0.002, "loss": 2.5415, "step": 364720 }, { "epoch": 0.7266232627820987, "grad_norm": 0.18495628237724304, "learning_rate": 0.002, "loss": 2.5516, "step": 364730 }, { "epoch": 0.7266431850057375, "grad_norm": 0.18971505761146545, "learning_rate": 0.002, "loss": 2.5524, "step": 364740 }, { "epoch": 0.7266631072293765, "grad_norm": 0.1542465090751648, "learning_rate": 0.002, "loss": 2.5538, "step": 364750 }, { "epoch": 0.7266830294530154, "grad_norm": 0.1903523951768875, "learning_rate": 0.002, "loss": 2.5509, "step": 364760 }, { "epoch": 0.7267029516766543, "grad_norm": 0.17175039649009705, "learning_rate": 0.002, "loss": 2.5351, "step": 364770 }, { "epoch": 0.7267228739002932, "grad_norm": 0.17550620436668396, "learning_rate": 0.002, "loss": 2.5485, "step": 364780 }, { "epoch": 0.7267427961239322, "grad_norm": 0.18604891002178192, "learning_rate": 0.002, "loss": 2.5537, "step": 364790 }, { "epoch": 0.7267627183475711, "grad_norm": 0.16815321147441864, "learning_rate": 0.002, "loss": 2.5601, "step": 364800 }, { "epoch": 0.72678264057121, "grad_norm": 0.16542714834213257, "learning_rate": 0.002, "loss": 2.5463, "step": 364810 }, { "epoch": 0.7268025627948489, "grad_norm": 0.1550218164920807, "learning_rate": 0.002, "loss": 2.5651, "step": 364820 }, { "epoch": 0.7268224850184878, "grad_norm": 0.15526634454727173, "learning_rate": 0.002, "loss": 2.5553, "step": 364830 }, { "epoch": 0.7268424072421268, "grad_norm": 0.19043153524398804, "learning_rate": 0.002, "loss": 2.5496, "step": 364840 }, { "epoch": 0.7268623294657657, "grad_norm": 0.15787257254123688, "learning_rate": 0.002, "loss": 2.5568, "step": 364850 }, { "epoch": 0.7268822516894046, "grad_norm": 0.17210212349891663, "learning_rate": 0.002, "loss": 2.5571, "step": 364860 }, { "epoch": 0.7269021739130435, "grad_norm": 0.16609036922454834, "learning_rate": 0.002, "loss": 2.5487, "step": 364870 }, { "epoch": 0.7269220961366823, "grad_norm": 0.2514649033546448, "learning_rate": 0.002, "loss": 2.5761, "step": 364880 }, { "epoch": 0.7269420183603214, "grad_norm": 0.16769710183143616, "learning_rate": 0.002, "loss": 2.5656, "step": 364890 }, { "epoch": 0.7269619405839602, "grad_norm": 0.16617299616336823, "learning_rate": 0.002, "loss": 2.5673, "step": 364900 }, { "epoch": 0.7269818628075991, "grad_norm": 0.14788220822811127, "learning_rate": 0.002, "loss": 2.5732, "step": 364910 }, { "epoch": 0.727001785031238, "grad_norm": 0.17190401256084442, "learning_rate": 0.002, "loss": 2.551, "step": 364920 }, { "epoch": 0.7270217072548769, "grad_norm": 0.15525095164775848, "learning_rate": 0.002, "loss": 2.5603, "step": 364930 }, { "epoch": 0.7270416294785159, "grad_norm": 0.16711445152759552, "learning_rate": 0.002, "loss": 2.5626, "step": 364940 }, { "epoch": 0.7270615517021548, "grad_norm": 0.13741444051265717, "learning_rate": 0.002, "loss": 2.5425, "step": 364950 }, { "epoch": 0.7270814739257937, "grad_norm": 0.16629663109779358, "learning_rate": 0.002, "loss": 2.5543, "step": 364960 }, { "epoch": 0.7271013961494326, "grad_norm": 0.15849778056144714, "learning_rate": 0.002, "loss": 2.5591, "step": 364970 }, { "epoch": 0.7271213183730715, "grad_norm": 0.1380533128976822, "learning_rate": 0.002, "loss": 2.5464, "step": 364980 }, { "epoch": 0.7271412405967105, "grad_norm": 0.15965615212917328, "learning_rate": 0.002, "loss": 2.5612, "step": 364990 }, { "epoch": 0.7271611628203494, "grad_norm": 0.19827573001384735, "learning_rate": 0.002, "loss": 2.5503, "step": 365000 }, { "epoch": 0.7271810850439883, "grad_norm": 0.1588471680879593, "learning_rate": 0.002, "loss": 2.5429, "step": 365010 }, { "epoch": 0.7272010072676272, "grad_norm": 0.1972067505121231, "learning_rate": 0.002, "loss": 2.5516, "step": 365020 }, { "epoch": 0.727220929491266, "grad_norm": 0.15975476801395416, "learning_rate": 0.002, "loss": 2.5457, "step": 365030 }, { "epoch": 0.727240851714905, "grad_norm": 0.18931086361408234, "learning_rate": 0.002, "loss": 2.56, "step": 365040 }, { "epoch": 0.7272607739385439, "grad_norm": 0.17877641320228577, "learning_rate": 0.002, "loss": 2.5529, "step": 365050 }, { "epoch": 0.7272806961621828, "grad_norm": 0.18687786161899567, "learning_rate": 0.002, "loss": 2.558, "step": 365060 }, { "epoch": 0.7273006183858217, "grad_norm": 0.17967575788497925, "learning_rate": 0.002, "loss": 2.5675, "step": 365070 }, { "epoch": 0.7273205406094607, "grad_norm": 0.15431635081768036, "learning_rate": 0.002, "loss": 2.5647, "step": 365080 }, { "epoch": 0.7273404628330996, "grad_norm": 0.1489294022321701, "learning_rate": 0.002, "loss": 2.5584, "step": 365090 }, { "epoch": 0.7273603850567385, "grad_norm": 0.15673689544200897, "learning_rate": 0.002, "loss": 2.5437, "step": 365100 }, { "epoch": 0.7273803072803774, "grad_norm": 0.18141871690750122, "learning_rate": 0.002, "loss": 2.571, "step": 365110 }, { "epoch": 0.7274002295040163, "grad_norm": 0.15457434952259064, "learning_rate": 0.002, "loss": 2.544, "step": 365120 }, { "epoch": 0.7274201517276553, "grad_norm": 0.15603090822696686, "learning_rate": 0.002, "loss": 2.5511, "step": 365130 }, { "epoch": 0.7274400739512942, "grad_norm": 0.16834498941898346, "learning_rate": 0.002, "loss": 2.55, "step": 365140 }, { "epoch": 0.7274599961749331, "grad_norm": 0.196956604719162, "learning_rate": 0.002, "loss": 2.564, "step": 365150 }, { "epoch": 0.727479918398572, "grad_norm": 0.15364371240139008, "learning_rate": 0.002, "loss": 2.5575, "step": 365160 }, { "epoch": 0.7274998406222108, "grad_norm": 0.1440013200044632, "learning_rate": 0.002, "loss": 2.5598, "step": 365170 }, { "epoch": 0.7275197628458498, "grad_norm": 0.1551314741373062, "learning_rate": 0.002, "loss": 2.5477, "step": 365180 }, { "epoch": 0.7275396850694887, "grad_norm": 0.15364503860473633, "learning_rate": 0.002, "loss": 2.5551, "step": 365190 }, { "epoch": 0.7275596072931276, "grad_norm": 0.24600625038146973, "learning_rate": 0.002, "loss": 2.561, "step": 365200 }, { "epoch": 0.7275795295167665, "grad_norm": 0.16494035720825195, "learning_rate": 0.002, "loss": 2.5537, "step": 365210 }, { "epoch": 0.7275994517404054, "grad_norm": 0.18040594458580017, "learning_rate": 0.002, "loss": 2.5598, "step": 365220 }, { "epoch": 0.7276193739640444, "grad_norm": 0.1567019671201706, "learning_rate": 0.002, "loss": 2.5595, "step": 365230 }, { "epoch": 0.7276392961876833, "grad_norm": 0.17514237761497498, "learning_rate": 0.002, "loss": 2.558, "step": 365240 }, { "epoch": 0.7276592184113222, "grad_norm": 0.16752827167510986, "learning_rate": 0.002, "loss": 2.5617, "step": 365250 }, { "epoch": 0.7276791406349611, "grad_norm": 0.16002428531646729, "learning_rate": 0.002, "loss": 2.5661, "step": 365260 }, { "epoch": 0.7276990628586, "grad_norm": 0.17287932336330414, "learning_rate": 0.002, "loss": 2.5745, "step": 365270 }, { "epoch": 0.727718985082239, "grad_norm": 0.13850723206996918, "learning_rate": 0.002, "loss": 2.5552, "step": 365280 }, { "epoch": 0.7277389073058779, "grad_norm": 0.162076935172081, "learning_rate": 0.002, "loss": 2.5646, "step": 365290 }, { "epoch": 0.7277588295295168, "grad_norm": 0.21681740880012512, "learning_rate": 0.002, "loss": 2.5484, "step": 365300 }, { "epoch": 0.7277787517531557, "grad_norm": 0.16430476307868958, "learning_rate": 0.002, "loss": 2.559, "step": 365310 }, { "epoch": 0.7277986739767945, "grad_norm": 0.1595371514558792, "learning_rate": 0.002, "loss": 2.5504, "step": 365320 }, { "epoch": 0.7278185962004335, "grad_norm": 0.17813685536384583, "learning_rate": 0.002, "loss": 2.5637, "step": 365330 }, { "epoch": 0.7278385184240724, "grad_norm": 0.15688170492649078, "learning_rate": 0.002, "loss": 2.5582, "step": 365340 }, { "epoch": 0.7278584406477113, "grad_norm": 0.1631699651479721, "learning_rate": 0.002, "loss": 2.556, "step": 365350 }, { "epoch": 0.7278783628713502, "grad_norm": 0.17853938043117523, "learning_rate": 0.002, "loss": 2.5677, "step": 365360 }, { "epoch": 0.7278982850949892, "grad_norm": 0.17418169975280762, "learning_rate": 0.002, "loss": 2.5591, "step": 365370 }, { "epoch": 0.7279182073186281, "grad_norm": 0.14345471560955048, "learning_rate": 0.002, "loss": 2.5442, "step": 365380 }, { "epoch": 0.727938129542267, "grad_norm": 0.15905357897281647, "learning_rate": 0.002, "loss": 2.569, "step": 365390 }, { "epoch": 0.7279580517659059, "grad_norm": 0.16898353397846222, "learning_rate": 0.002, "loss": 2.5369, "step": 365400 }, { "epoch": 0.7279779739895448, "grad_norm": 0.18073824048042297, "learning_rate": 0.002, "loss": 2.5515, "step": 365410 }, { "epoch": 0.7279978962131838, "grad_norm": 0.16411516070365906, "learning_rate": 0.002, "loss": 2.5453, "step": 365420 }, { "epoch": 0.7280178184368227, "grad_norm": 0.19637852907180786, "learning_rate": 0.002, "loss": 2.5503, "step": 365430 }, { "epoch": 0.7280377406604616, "grad_norm": 0.21715618669986725, "learning_rate": 0.002, "loss": 2.5486, "step": 365440 }, { "epoch": 0.7280576628841005, "grad_norm": 0.17054571211338043, "learning_rate": 0.002, "loss": 2.5677, "step": 365450 }, { "epoch": 0.7280775851077393, "grad_norm": 0.15298187732696533, "learning_rate": 0.002, "loss": 2.552, "step": 365460 }, { "epoch": 0.7280975073313783, "grad_norm": 0.16518913209438324, "learning_rate": 0.002, "loss": 2.5447, "step": 365470 }, { "epoch": 0.7281174295550172, "grad_norm": 0.1804618388414383, "learning_rate": 0.002, "loss": 2.5445, "step": 365480 }, { "epoch": 0.7281373517786561, "grad_norm": 0.18416191637516022, "learning_rate": 0.002, "loss": 2.5559, "step": 365490 }, { "epoch": 0.728157274002295, "grad_norm": 0.1508062183856964, "learning_rate": 0.002, "loss": 2.5429, "step": 365500 }, { "epoch": 0.7281771962259339, "grad_norm": 0.1739867478609085, "learning_rate": 0.002, "loss": 2.555, "step": 365510 }, { "epoch": 0.7281971184495729, "grad_norm": 0.14790791273117065, "learning_rate": 0.002, "loss": 2.5558, "step": 365520 }, { "epoch": 0.7282170406732118, "grad_norm": 0.15474699437618256, "learning_rate": 0.002, "loss": 2.5667, "step": 365530 }, { "epoch": 0.7282369628968507, "grad_norm": 0.190948024392128, "learning_rate": 0.002, "loss": 2.5596, "step": 365540 }, { "epoch": 0.7282568851204896, "grad_norm": 0.17459529638290405, "learning_rate": 0.002, "loss": 2.5503, "step": 365550 }, { "epoch": 0.7282768073441285, "grad_norm": 0.20350061357021332, "learning_rate": 0.002, "loss": 2.5704, "step": 365560 }, { "epoch": 0.7282967295677675, "grad_norm": 0.1679912507534027, "learning_rate": 0.002, "loss": 2.5515, "step": 365570 }, { "epoch": 0.7283166517914064, "grad_norm": 0.17484691739082336, "learning_rate": 0.002, "loss": 2.5537, "step": 365580 }, { "epoch": 0.7283365740150453, "grad_norm": 0.1695159375667572, "learning_rate": 0.002, "loss": 2.5487, "step": 365590 }, { "epoch": 0.7283564962386841, "grad_norm": 0.14517930150032043, "learning_rate": 0.002, "loss": 2.5663, "step": 365600 }, { "epoch": 0.728376418462323, "grad_norm": 0.15228821337223053, "learning_rate": 0.002, "loss": 2.5734, "step": 365610 }, { "epoch": 0.728396340685962, "grad_norm": 0.17204971611499786, "learning_rate": 0.002, "loss": 2.5573, "step": 365620 }, { "epoch": 0.7284162629096009, "grad_norm": 0.17329147458076477, "learning_rate": 0.002, "loss": 2.5601, "step": 365630 }, { "epoch": 0.7284361851332398, "grad_norm": 0.18819133937358856, "learning_rate": 0.002, "loss": 2.5594, "step": 365640 }, { "epoch": 0.7284561073568787, "grad_norm": 0.15077823400497437, "learning_rate": 0.002, "loss": 2.5526, "step": 365650 }, { "epoch": 0.7284760295805177, "grad_norm": 0.18672038614749908, "learning_rate": 0.002, "loss": 2.5581, "step": 365660 }, { "epoch": 0.7284959518041566, "grad_norm": 0.1725289523601532, "learning_rate": 0.002, "loss": 2.5562, "step": 365670 }, { "epoch": 0.7285158740277955, "grad_norm": 0.1551130712032318, "learning_rate": 0.002, "loss": 2.5631, "step": 365680 }, { "epoch": 0.7285357962514344, "grad_norm": 0.16900916397571564, "learning_rate": 0.002, "loss": 2.5551, "step": 365690 }, { "epoch": 0.7285557184750733, "grad_norm": 0.15111415088176727, "learning_rate": 0.002, "loss": 2.5712, "step": 365700 }, { "epoch": 0.7285756406987123, "grad_norm": 0.159269779920578, "learning_rate": 0.002, "loss": 2.5494, "step": 365710 }, { "epoch": 0.7285955629223512, "grad_norm": 0.15087886154651642, "learning_rate": 0.002, "loss": 2.5581, "step": 365720 }, { "epoch": 0.7286154851459901, "grad_norm": 0.17154501378536224, "learning_rate": 0.002, "loss": 2.5752, "step": 365730 }, { "epoch": 0.728635407369629, "grad_norm": 0.16582533717155457, "learning_rate": 0.002, "loss": 2.5587, "step": 365740 }, { "epoch": 0.7286553295932678, "grad_norm": 0.17621251940727234, "learning_rate": 0.002, "loss": 2.5586, "step": 365750 }, { "epoch": 0.7286752518169068, "grad_norm": 0.15722645819187164, "learning_rate": 0.002, "loss": 2.5565, "step": 365760 }, { "epoch": 0.7286951740405457, "grad_norm": 0.15396283566951752, "learning_rate": 0.002, "loss": 2.5634, "step": 365770 }, { "epoch": 0.7287150962641846, "grad_norm": 0.15847036242485046, "learning_rate": 0.002, "loss": 2.5607, "step": 365780 }, { "epoch": 0.7287350184878235, "grad_norm": 0.15863975882530212, "learning_rate": 0.002, "loss": 2.5513, "step": 365790 }, { "epoch": 0.7287549407114624, "grad_norm": 0.17095090448856354, "learning_rate": 0.002, "loss": 2.5676, "step": 365800 }, { "epoch": 0.7287748629351014, "grad_norm": 0.153629332780838, "learning_rate": 0.002, "loss": 2.5604, "step": 365810 }, { "epoch": 0.7287947851587403, "grad_norm": 0.17911343276500702, "learning_rate": 0.002, "loss": 2.5391, "step": 365820 }, { "epoch": 0.7288147073823792, "grad_norm": 0.16953811049461365, "learning_rate": 0.002, "loss": 2.5682, "step": 365830 }, { "epoch": 0.7288346296060181, "grad_norm": 0.18944188952445984, "learning_rate": 0.002, "loss": 2.5559, "step": 365840 }, { "epoch": 0.728854551829657, "grad_norm": 0.1572411209344864, "learning_rate": 0.002, "loss": 2.5424, "step": 365850 }, { "epoch": 0.728874474053296, "grad_norm": 0.1553112417459488, "learning_rate": 0.002, "loss": 2.5469, "step": 365860 }, { "epoch": 0.7288943962769349, "grad_norm": 0.1579718142747879, "learning_rate": 0.002, "loss": 2.5441, "step": 365870 }, { "epoch": 0.7289143185005738, "grad_norm": 0.1761210709810257, "learning_rate": 0.002, "loss": 2.5648, "step": 365880 }, { "epoch": 0.7289342407242126, "grad_norm": 0.18600031733512878, "learning_rate": 0.002, "loss": 2.5552, "step": 365890 }, { "epoch": 0.7289541629478515, "grad_norm": 0.17143628001213074, "learning_rate": 0.002, "loss": 2.5595, "step": 365900 }, { "epoch": 0.7289740851714905, "grad_norm": 0.1484501212835312, "learning_rate": 0.002, "loss": 2.5579, "step": 365910 }, { "epoch": 0.7289940073951294, "grad_norm": 0.1759568154811859, "learning_rate": 0.002, "loss": 2.5408, "step": 365920 }, { "epoch": 0.7290139296187683, "grad_norm": 0.1843569278717041, "learning_rate": 0.002, "loss": 2.5504, "step": 365930 }, { "epoch": 0.7290338518424072, "grad_norm": 0.19228918850421906, "learning_rate": 0.002, "loss": 2.5299, "step": 365940 }, { "epoch": 0.7290537740660461, "grad_norm": 0.16583354771137238, "learning_rate": 0.002, "loss": 2.5391, "step": 365950 }, { "epoch": 0.7290736962896851, "grad_norm": 0.15968342125415802, "learning_rate": 0.002, "loss": 2.5558, "step": 365960 }, { "epoch": 0.729093618513324, "grad_norm": 0.16166424751281738, "learning_rate": 0.002, "loss": 2.5534, "step": 365970 }, { "epoch": 0.7291135407369629, "grad_norm": 0.16905447840690613, "learning_rate": 0.002, "loss": 2.5505, "step": 365980 }, { "epoch": 0.7291334629606018, "grad_norm": 0.18022046983242035, "learning_rate": 0.002, "loss": 2.5565, "step": 365990 }, { "epoch": 0.7291533851842408, "grad_norm": 0.16396677494049072, "learning_rate": 0.002, "loss": 2.5647, "step": 366000 }, { "epoch": 0.7291733074078797, "grad_norm": 0.18065489828586578, "learning_rate": 0.002, "loss": 2.5572, "step": 366010 }, { "epoch": 0.7291932296315186, "grad_norm": 0.15812119841575623, "learning_rate": 0.002, "loss": 2.5391, "step": 366020 }, { "epoch": 0.7292131518551574, "grad_norm": 0.1945641040802002, "learning_rate": 0.002, "loss": 2.5506, "step": 366030 }, { "epoch": 0.7292330740787963, "grad_norm": 0.16997864842414856, "learning_rate": 0.002, "loss": 2.5583, "step": 366040 }, { "epoch": 0.7292529963024353, "grad_norm": 0.1606503427028656, "learning_rate": 0.002, "loss": 2.561, "step": 366050 }, { "epoch": 0.7292729185260742, "grad_norm": 0.14847436547279358, "learning_rate": 0.002, "loss": 2.5569, "step": 366060 }, { "epoch": 0.7292928407497131, "grad_norm": 0.182343527674675, "learning_rate": 0.002, "loss": 2.5587, "step": 366070 }, { "epoch": 0.729312762973352, "grad_norm": 0.16904664039611816, "learning_rate": 0.002, "loss": 2.5648, "step": 366080 }, { "epoch": 0.7293326851969909, "grad_norm": 0.1801881641149521, "learning_rate": 0.002, "loss": 2.5558, "step": 366090 }, { "epoch": 0.7293526074206299, "grad_norm": 0.1464005410671234, "learning_rate": 0.002, "loss": 2.5418, "step": 366100 }, { "epoch": 0.7293725296442688, "grad_norm": 0.15402410924434662, "learning_rate": 0.002, "loss": 2.5498, "step": 366110 }, { "epoch": 0.7293924518679077, "grad_norm": 0.14777669310569763, "learning_rate": 0.002, "loss": 2.5682, "step": 366120 }, { "epoch": 0.7294123740915466, "grad_norm": 0.22429145872592926, "learning_rate": 0.002, "loss": 2.5673, "step": 366130 }, { "epoch": 0.7294322963151855, "grad_norm": 0.17005780339241028, "learning_rate": 0.002, "loss": 2.5531, "step": 366140 }, { "epoch": 0.7294522185388245, "grad_norm": 0.16529789566993713, "learning_rate": 0.002, "loss": 2.5591, "step": 366150 }, { "epoch": 0.7294721407624634, "grad_norm": 0.1676652878522873, "learning_rate": 0.002, "loss": 2.5536, "step": 366160 }, { "epoch": 0.7294920629861023, "grad_norm": 0.15748292207717896, "learning_rate": 0.002, "loss": 2.5426, "step": 366170 }, { "epoch": 0.7295119852097411, "grad_norm": 0.16066190600395203, "learning_rate": 0.002, "loss": 2.5605, "step": 366180 }, { "epoch": 0.72953190743338, "grad_norm": 0.184393972158432, "learning_rate": 0.002, "loss": 2.5547, "step": 366190 }, { "epoch": 0.729551829657019, "grad_norm": 0.17109991610050201, "learning_rate": 0.002, "loss": 2.5347, "step": 366200 }, { "epoch": 0.7295717518806579, "grad_norm": 0.2022901028394699, "learning_rate": 0.002, "loss": 2.5615, "step": 366210 }, { "epoch": 0.7295916741042968, "grad_norm": 0.1617588996887207, "learning_rate": 0.002, "loss": 2.5564, "step": 366220 }, { "epoch": 0.7296115963279357, "grad_norm": 0.1564456671476364, "learning_rate": 0.002, "loss": 2.5521, "step": 366230 }, { "epoch": 0.7296315185515746, "grad_norm": 0.15963444113731384, "learning_rate": 0.002, "loss": 2.5459, "step": 366240 }, { "epoch": 0.7296514407752136, "grad_norm": 0.17430222034454346, "learning_rate": 0.002, "loss": 2.5493, "step": 366250 }, { "epoch": 0.7296713629988525, "grad_norm": 0.16987478733062744, "learning_rate": 0.002, "loss": 2.5575, "step": 366260 }, { "epoch": 0.7296912852224914, "grad_norm": 0.18516819179058075, "learning_rate": 0.002, "loss": 2.564, "step": 366270 }, { "epoch": 0.7297112074461303, "grad_norm": 0.16167517006397247, "learning_rate": 0.002, "loss": 2.5646, "step": 366280 }, { "epoch": 0.7297311296697693, "grad_norm": 0.1408943235874176, "learning_rate": 0.002, "loss": 2.5577, "step": 366290 }, { "epoch": 0.7297510518934082, "grad_norm": 0.17628945410251617, "learning_rate": 0.002, "loss": 2.5597, "step": 366300 }, { "epoch": 0.729770974117047, "grad_norm": 0.21064884960651398, "learning_rate": 0.002, "loss": 2.5619, "step": 366310 }, { "epoch": 0.729790896340686, "grad_norm": 0.16534879803657532, "learning_rate": 0.002, "loss": 2.5537, "step": 366320 }, { "epoch": 0.7298108185643248, "grad_norm": 0.15791258215904236, "learning_rate": 0.002, "loss": 2.5593, "step": 366330 }, { "epoch": 0.7298307407879638, "grad_norm": 0.1770210713148117, "learning_rate": 0.002, "loss": 2.5505, "step": 366340 }, { "epoch": 0.7298506630116027, "grad_norm": 0.17593996226787567, "learning_rate": 0.002, "loss": 2.5583, "step": 366350 }, { "epoch": 0.7298705852352416, "grad_norm": 0.17065489292144775, "learning_rate": 0.002, "loss": 2.552, "step": 366360 }, { "epoch": 0.7298905074588805, "grad_norm": 0.15368272364139557, "learning_rate": 0.002, "loss": 2.5329, "step": 366370 }, { "epoch": 0.7299104296825194, "grad_norm": 0.21191753447055817, "learning_rate": 0.002, "loss": 2.5635, "step": 366380 }, { "epoch": 0.7299303519061584, "grad_norm": 0.17618055641651154, "learning_rate": 0.002, "loss": 2.553, "step": 366390 }, { "epoch": 0.7299502741297973, "grad_norm": 0.16047000885009766, "learning_rate": 0.002, "loss": 2.5641, "step": 366400 }, { "epoch": 0.7299701963534362, "grad_norm": 0.20567123591899872, "learning_rate": 0.002, "loss": 2.5565, "step": 366410 }, { "epoch": 0.7299901185770751, "grad_norm": 0.1595878005027771, "learning_rate": 0.002, "loss": 2.5692, "step": 366420 }, { "epoch": 0.730010040800714, "grad_norm": 0.22447030246257782, "learning_rate": 0.002, "loss": 2.5439, "step": 366430 }, { "epoch": 0.730029963024353, "grad_norm": 0.16477139294147491, "learning_rate": 0.002, "loss": 2.5644, "step": 366440 }, { "epoch": 0.7300498852479919, "grad_norm": 0.15620464086532593, "learning_rate": 0.002, "loss": 2.5554, "step": 366450 }, { "epoch": 0.7300698074716307, "grad_norm": 0.16112875938415527, "learning_rate": 0.002, "loss": 2.5591, "step": 366460 }, { "epoch": 0.7300897296952696, "grad_norm": 0.1448189616203308, "learning_rate": 0.002, "loss": 2.5514, "step": 366470 }, { "epoch": 0.7301096519189085, "grad_norm": 0.18506261706352234, "learning_rate": 0.002, "loss": 2.5568, "step": 366480 }, { "epoch": 0.7301295741425475, "grad_norm": 0.17466852068901062, "learning_rate": 0.002, "loss": 2.5596, "step": 366490 }, { "epoch": 0.7301494963661864, "grad_norm": 0.16368545591831207, "learning_rate": 0.002, "loss": 2.5612, "step": 366500 }, { "epoch": 0.7301694185898253, "grad_norm": 0.1558731496334076, "learning_rate": 0.002, "loss": 2.5376, "step": 366510 }, { "epoch": 0.7301893408134642, "grad_norm": 0.1428251564502716, "learning_rate": 0.002, "loss": 2.5456, "step": 366520 }, { "epoch": 0.7302092630371031, "grad_norm": 0.19567114114761353, "learning_rate": 0.002, "loss": 2.5684, "step": 366530 }, { "epoch": 0.7302291852607421, "grad_norm": 0.17721055448055267, "learning_rate": 0.002, "loss": 2.5526, "step": 366540 }, { "epoch": 0.730249107484381, "grad_norm": 0.15307046473026276, "learning_rate": 0.002, "loss": 2.5586, "step": 366550 }, { "epoch": 0.7302690297080199, "grad_norm": 0.16805528104305267, "learning_rate": 0.002, "loss": 2.5407, "step": 366560 }, { "epoch": 0.7302889519316588, "grad_norm": 0.17796988785266876, "learning_rate": 0.002, "loss": 2.5581, "step": 366570 }, { "epoch": 0.7303088741552978, "grad_norm": 0.16798600554466248, "learning_rate": 0.002, "loss": 2.5601, "step": 366580 }, { "epoch": 0.7303287963789367, "grad_norm": 0.19055092334747314, "learning_rate": 0.002, "loss": 2.5441, "step": 366590 }, { "epoch": 0.7303487186025756, "grad_norm": 0.14344476163387299, "learning_rate": 0.002, "loss": 2.5605, "step": 366600 }, { "epoch": 0.7303686408262144, "grad_norm": 0.16286131739616394, "learning_rate": 0.002, "loss": 2.562, "step": 366610 }, { "epoch": 0.7303885630498533, "grad_norm": 0.15692712366580963, "learning_rate": 0.002, "loss": 2.5479, "step": 366620 }, { "epoch": 0.7304084852734923, "grad_norm": 0.23929505050182343, "learning_rate": 0.002, "loss": 2.5602, "step": 366630 }, { "epoch": 0.7304284074971312, "grad_norm": 0.16097334027290344, "learning_rate": 0.002, "loss": 2.5664, "step": 366640 }, { "epoch": 0.7304483297207701, "grad_norm": 0.15287502110004425, "learning_rate": 0.002, "loss": 2.5527, "step": 366650 }, { "epoch": 0.730468251944409, "grad_norm": 0.15999704599380493, "learning_rate": 0.002, "loss": 2.5678, "step": 366660 }, { "epoch": 0.7304881741680479, "grad_norm": 0.15615789592266083, "learning_rate": 0.002, "loss": 2.5442, "step": 366670 }, { "epoch": 0.7305080963916869, "grad_norm": 0.14928540587425232, "learning_rate": 0.002, "loss": 2.5646, "step": 366680 }, { "epoch": 0.7305280186153258, "grad_norm": 0.1772543340921402, "learning_rate": 0.002, "loss": 2.5566, "step": 366690 }, { "epoch": 0.7305479408389647, "grad_norm": 0.1566527634859085, "learning_rate": 0.002, "loss": 2.5535, "step": 366700 }, { "epoch": 0.7305678630626036, "grad_norm": 0.1423967480659485, "learning_rate": 0.002, "loss": 2.551, "step": 366710 }, { "epoch": 0.7305877852862425, "grad_norm": 0.22414281964302063, "learning_rate": 0.002, "loss": 2.532, "step": 366720 }, { "epoch": 0.7306077075098815, "grad_norm": 0.1886889934539795, "learning_rate": 0.002, "loss": 2.5589, "step": 366730 }, { "epoch": 0.7306276297335204, "grad_norm": 0.19104909896850586, "learning_rate": 0.002, "loss": 2.5567, "step": 366740 }, { "epoch": 0.7306475519571592, "grad_norm": 0.1513453722000122, "learning_rate": 0.002, "loss": 2.5464, "step": 366750 }, { "epoch": 0.7306674741807981, "grad_norm": 0.16484051942825317, "learning_rate": 0.002, "loss": 2.5654, "step": 366760 }, { "epoch": 0.730687396404437, "grad_norm": 0.14263872802257538, "learning_rate": 0.002, "loss": 2.538, "step": 366770 }, { "epoch": 0.730707318628076, "grad_norm": 0.1508723348379135, "learning_rate": 0.002, "loss": 2.5475, "step": 366780 }, { "epoch": 0.7307272408517149, "grad_norm": 0.17265714704990387, "learning_rate": 0.002, "loss": 2.5492, "step": 366790 }, { "epoch": 0.7307471630753538, "grad_norm": 0.15646423399448395, "learning_rate": 0.002, "loss": 2.5592, "step": 366800 }, { "epoch": 0.7307670852989927, "grad_norm": 0.18636752665042877, "learning_rate": 0.002, "loss": 2.5464, "step": 366810 }, { "epoch": 0.7307870075226316, "grad_norm": 0.14874689280986786, "learning_rate": 0.002, "loss": 2.5695, "step": 366820 }, { "epoch": 0.7308069297462706, "grad_norm": 0.21584434807300568, "learning_rate": 0.002, "loss": 2.5482, "step": 366830 }, { "epoch": 0.7308268519699095, "grad_norm": 0.16102388501167297, "learning_rate": 0.002, "loss": 2.5626, "step": 366840 }, { "epoch": 0.7308467741935484, "grad_norm": 0.16735711693763733, "learning_rate": 0.002, "loss": 2.56, "step": 366850 }, { "epoch": 0.7308666964171873, "grad_norm": 0.14895768463611603, "learning_rate": 0.002, "loss": 2.5511, "step": 366860 }, { "epoch": 0.7308866186408263, "grad_norm": 0.1774412989616394, "learning_rate": 0.002, "loss": 2.5529, "step": 366870 }, { "epoch": 0.7309065408644652, "grad_norm": 0.1655842661857605, "learning_rate": 0.002, "loss": 2.5503, "step": 366880 }, { "epoch": 0.730926463088104, "grad_norm": 0.1582886129617691, "learning_rate": 0.002, "loss": 2.5487, "step": 366890 }, { "epoch": 0.7309463853117429, "grad_norm": 0.21203453838825226, "learning_rate": 0.002, "loss": 2.5761, "step": 366900 }, { "epoch": 0.7309663075353818, "grad_norm": 0.14562749862670898, "learning_rate": 0.002, "loss": 2.5551, "step": 366910 }, { "epoch": 0.7309862297590208, "grad_norm": 0.17590899765491486, "learning_rate": 0.002, "loss": 2.5759, "step": 366920 }, { "epoch": 0.7310061519826597, "grad_norm": 0.15040482580661774, "learning_rate": 0.002, "loss": 2.5483, "step": 366930 }, { "epoch": 0.7310260742062986, "grad_norm": 0.1865973323583603, "learning_rate": 0.002, "loss": 2.5506, "step": 366940 }, { "epoch": 0.7310459964299375, "grad_norm": 0.13733519613742828, "learning_rate": 0.002, "loss": 2.5642, "step": 366950 }, { "epoch": 0.7310659186535764, "grad_norm": 0.17181278765201569, "learning_rate": 0.002, "loss": 2.5348, "step": 366960 }, { "epoch": 0.7310858408772154, "grad_norm": 0.17342939972877502, "learning_rate": 0.002, "loss": 2.5494, "step": 366970 }, { "epoch": 0.7311057631008543, "grad_norm": 0.17549465596675873, "learning_rate": 0.002, "loss": 2.5494, "step": 366980 }, { "epoch": 0.7311256853244932, "grad_norm": 0.1601213663816452, "learning_rate": 0.002, "loss": 2.5587, "step": 366990 }, { "epoch": 0.7311456075481321, "grad_norm": 0.14781911671161652, "learning_rate": 0.002, "loss": 2.5779, "step": 367000 }, { "epoch": 0.731165529771771, "grad_norm": 0.17632313072681427, "learning_rate": 0.002, "loss": 2.5435, "step": 367010 }, { "epoch": 0.73118545199541, "grad_norm": 0.16455672681331635, "learning_rate": 0.002, "loss": 2.5443, "step": 367020 }, { "epoch": 0.7312053742190489, "grad_norm": 0.18416127562522888, "learning_rate": 0.002, "loss": 2.5606, "step": 367030 }, { "epoch": 0.7312252964426877, "grad_norm": 0.16470758616924286, "learning_rate": 0.002, "loss": 2.5546, "step": 367040 }, { "epoch": 0.7312452186663266, "grad_norm": 0.19218705594539642, "learning_rate": 0.002, "loss": 2.564, "step": 367050 }, { "epoch": 0.7312651408899655, "grad_norm": 0.19405098259449005, "learning_rate": 0.002, "loss": 2.5548, "step": 367060 }, { "epoch": 0.7312850631136045, "grad_norm": 0.16272138059139252, "learning_rate": 0.002, "loss": 2.5664, "step": 367070 }, { "epoch": 0.7313049853372434, "grad_norm": 0.16641755402088165, "learning_rate": 0.002, "loss": 2.5574, "step": 367080 }, { "epoch": 0.7313249075608823, "grad_norm": 0.18043410778045654, "learning_rate": 0.002, "loss": 2.5468, "step": 367090 }, { "epoch": 0.7313448297845212, "grad_norm": 0.1540464609861374, "learning_rate": 0.002, "loss": 2.5498, "step": 367100 }, { "epoch": 0.7313647520081601, "grad_norm": 0.16962358355522156, "learning_rate": 0.002, "loss": 2.5537, "step": 367110 }, { "epoch": 0.7313846742317991, "grad_norm": 0.1459970623254776, "learning_rate": 0.002, "loss": 2.5494, "step": 367120 }, { "epoch": 0.731404596455438, "grad_norm": 0.1703362613916397, "learning_rate": 0.002, "loss": 2.5583, "step": 367130 }, { "epoch": 0.7314245186790769, "grad_norm": 0.14790421724319458, "learning_rate": 0.002, "loss": 2.5524, "step": 367140 }, { "epoch": 0.7314444409027158, "grad_norm": 0.17483435571193695, "learning_rate": 0.002, "loss": 2.5524, "step": 367150 }, { "epoch": 0.7314643631263548, "grad_norm": 0.15389534831047058, "learning_rate": 0.002, "loss": 2.5645, "step": 367160 }, { "epoch": 0.7314842853499937, "grad_norm": 0.16771459579467773, "learning_rate": 0.002, "loss": 2.558, "step": 367170 }, { "epoch": 0.7315042075736325, "grad_norm": 0.161503866314888, "learning_rate": 0.002, "loss": 2.5512, "step": 367180 }, { "epoch": 0.7315241297972714, "grad_norm": 0.16730372607707977, "learning_rate": 0.002, "loss": 2.5411, "step": 367190 }, { "epoch": 0.7315440520209103, "grad_norm": 0.18328607082366943, "learning_rate": 0.002, "loss": 2.5452, "step": 367200 }, { "epoch": 0.7315639742445493, "grad_norm": 0.1395748257637024, "learning_rate": 0.002, "loss": 2.5662, "step": 367210 }, { "epoch": 0.7315838964681882, "grad_norm": 0.155768021941185, "learning_rate": 0.002, "loss": 2.5493, "step": 367220 }, { "epoch": 0.7316038186918271, "grad_norm": 0.1611945629119873, "learning_rate": 0.002, "loss": 2.5532, "step": 367230 }, { "epoch": 0.731623740915466, "grad_norm": 0.15050391852855682, "learning_rate": 0.002, "loss": 2.557, "step": 367240 }, { "epoch": 0.7316436631391049, "grad_norm": 0.163781076669693, "learning_rate": 0.002, "loss": 2.5577, "step": 367250 }, { "epoch": 0.7316635853627439, "grad_norm": 0.15808600187301636, "learning_rate": 0.002, "loss": 2.5589, "step": 367260 }, { "epoch": 0.7316835075863828, "grad_norm": 0.15584580600261688, "learning_rate": 0.002, "loss": 2.5607, "step": 367270 }, { "epoch": 0.7317034298100217, "grad_norm": 0.15095584094524384, "learning_rate": 0.002, "loss": 2.5591, "step": 367280 }, { "epoch": 0.7317233520336606, "grad_norm": 0.14341752231121063, "learning_rate": 0.002, "loss": 2.5435, "step": 367290 }, { "epoch": 0.7317432742572995, "grad_norm": 0.1593450903892517, "learning_rate": 0.002, "loss": 2.5719, "step": 367300 }, { "epoch": 0.7317631964809385, "grad_norm": 0.165078267455101, "learning_rate": 0.002, "loss": 2.5532, "step": 367310 }, { "epoch": 0.7317831187045774, "grad_norm": 0.1563478261232376, "learning_rate": 0.002, "loss": 2.5668, "step": 367320 }, { "epoch": 0.7318030409282162, "grad_norm": 0.14536529779434204, "learning_rate": 0.002, "loss": 2.5547, "step": 367330 }, { "epoch": 0.7318229631518551, "grad_norm": 0.1707291305065155, "learning_rate": 0.002, "loss": 2.5498, "step": 367340 }, { "epoch": 0.731842885375494, "grad_norm": 0.1629674732685089, "learning_rate": 0.002, "loss": 2.5564, "step": 367350 }, { "epoch": 0.731862807599133, "grad_norm": 0.18351350724697113, "learning_rate": 0.002, "loss": 2.5626, "step": 367360 }, { "epoch": 0.7318827298227719, "grad_norm": 0.20063729584217072, "learning_rate": 0.002, "loss": 2.5431, "step": 367370 }, { "epoch": 0.7319026520464108, "grad_norm": 0.19300958514213562, "learning_rate": 0.002, "loss": 2.5574, "step": 367380 }, { "epoch": 0.7319225742700497, "grad_norm": 0.15043391287326813, "learning_rate": 0.002, "loss": 2.5656, "step": 367390 }, { "epoch": 0.7319424964936886, "grad_norm": 0.19183184206485748, "learning_rate": 0.002, "loss": 2.5502, "step": 367400 }, { "epoch": 0.7319624187173276, "grad_norm": 0.17599612474441528, "learning_rate": 0.002, "loss": 2.5603, "step": 367410 }, { "epoch": 0.7319823409409665, "grad_norm": 0.14900866150856018, "learning_rate": 0.002, "loss": 2.5504, "step": 367420 }, { "epoch": 0.7320022631646054, "grad_norm": 0.17072825133800507, "learning_rate": 0.002, "loss": 2.55, "step": 367430 }, { "epoch": 0.7320221853882443, "grad_norm": 0.1714002937078476, "learning_rate": 0.002, "loss": 2.5578, "step": 367440 }, { "epoch": 0.7320421076118832, "grad_norm": 0.18060164153575897, "learning_rate": 0.002, "loss": 2.56, "step": 367450 }, { "epoch": 0.7320620298355222, "grad_norm": 0.16123172640800476, "learning_rate": 0.002, "loss": 2.5643, "step": 367460 }, { "epoch": 0.732081952059161, "grad_norm": 0.15594317018985748, "learning_rate": 0.002, "loss": 2.5526, "step": 367470 }, { "epoch": 0.7321018742827999, "grad_norm": 0.16359475255012512, "learning_rate": 0.002, "loss": 2.566, "step": 367480 }, { "epoch": 0.7321217965064388, "grad_norm": 0.16981929540634155, "learning_rate": 0.002, "loss": 2.5486, "step": 367490 }, { "epoch": 0.7321417187300778, "grad_norm": 0.17983342707157135, "learning_rate": 0.002, "loss": 2.5389, "step": 367500 }, { "epoch": 0.7321616409537167, "grad_norm": 0.18437328934669495, "learning_rate": 0.002, "loss": 2.5369, "step": 367510 }, { "epoch": 0.7321815631773556, "grad_norm": 0.157290518283844, "learning_rate": 0.002, "loss": 2.5589, "step": 367520 }, { "epoch": 0.7322014854009945, "grad_norm": 0.1774880588054657, "learning_rate": 0.002, "loss": 2.5485, "step": 367530 }, { "epoch": 0.7322214076246334, "grad_norm": 0.15871776640415192, "learning_rate": 0.002, "loss": 2.5666, "step": 367540 }, { "epoch": 0.7322413298482724, "grad_norm": 0.1689990758895874, "learning_rate": 0.002, "loss": 2.5315, "step": 367550 }, { "epoch": 0.7322612520719113, "grad_norm": 0.17232592403888702, "learning_rate": 0.002, "loss": 2.544, "step": 367560 }, { "epoch": 0.7322811742955502, "grad_norm": 0.17185242474079132, "learning_rate": 0.002, "loss": 2.5588, "step": 367570 }, { "epoch": 0.7323010965191891, "grad_norm": 0.14482809603214264, "learning_rate": 0.002, "loss": 2.552, "step": 367580 }, { "epoch": 0.732321018742828, "grad_norm": 0.14914332330226898, "learning_rate": 0.002, "loss": 2.5634, "step": 367590 }, { "epoch": 0.732340940966467, "grad_norm": 0.17465923726558685, "learning_rate": 0.002, "loss": 2.5561, "step": 367600 }, { "epoch": 0.7323608631901058, "grad_norm": 0.15529930591583252, "learning_rate": 0.002, "loss": 2.5623, "step": 367610 }, { "epoch": 0.7323807854137447, "grad_norm": 0.17426608502864838, "learning_rate": 0.002, "loss": 2.5537, "step": 367620 }, { "epoch": 0.7324007076373836, "grad_norm": 0.1766263097524643, "learning_rate": 0.002, "loss": 2.5488, "step": 367630 }, { "epoch": 0.7324206298610225, "grad_norm": 0.18499058485031128, "learning_rate": 0.002, "loss": 2.5642, "step": 367640 }, { "epoch": 0.7324405520846615, "grad_norm": 0.17479830980300903, "learning_rate": 0.002, "loss": 2.5572, "step": 367650 }, { "epoch": 0.7324604743083004, "grad_norm": 0.1573180854320526, "learning_rate": 0.002, "loss": 2.5624, "step": 367660 }, { "epoch": 0.7324803965319393, "grad_norm": 0.14528261125087738, "learning_rate": 0.002, "loss": 2.5438, "step": 367670 }, { "epoch": 0.7325003187555782, "grad_norm": 0.22058100998401642, "learning_rate": 0.002, "loss": 2.5654, "step": 367680 }, { "epoch": 0.7325202409792171, "grad_norm": 0.17073415219783783, "learning_rate": 0.002, "loss": 2.5597, "step": 367690 }, { "epoch": 0.7325401632028561, "grad_norm": 0.18915441632270813, "learning_rate": 0.002, "loss": 2.5422, "step": 367700 }, { "epoch": 0.732560085426495, "grad_norm": 0.18635456264019012, "learning_rate": 0.002, "loss": 2.5714, "step": 367710 }, { "epoch": 0.7325800076501339, "grad_norm": 0.14145134389400482, "learning_rate": 0.002, "loss": 2.5502, "step": 367720 }, { "epoch": 0.7325999298737728, "grad_norm": 0.18243585526943207, "learning_rate": 0.002, "loss": 2.562, "step": 367730 }, { "epoch": 0.7326198520974117, "grad_norm": 0.16704779863357544, "learning_rate": 0.002, "loss": 2.5547, "step": 367740 }, { "epoch": 0.7326397743210507, "grad_norm": 0.15583164989948273, "learning_rate": 0.002, "loss": 2.5467, "step": 367750 }, { "epoch": 0.7326596965446895, "grad_norm": 0.12578392028808594, "learning_rate": 0.002, "loss": 2.5636, "step": 367760 }, { "epoch": 0.7326796187683284, "grad_norm": 0.1884355992078781, "learning_rate": 0.002, "loss": 2.5489, "step": 367770 }, { "epoch": 0.7326995409919673, "grad_norm": 0.1695052683353424, "learning_rate": 0.002, "loss": 2.5585, "step": 367780 }, { "epoch": 0.7327194632156063, "grad_norm": 0.14726372063159943, "learning_rate": 0.002, "loss": 2.5397, "step": 367790 }, { "epoch": 0.7327393854392452, "grad_norm": 0.16456107795238495, "learning_rate": 0.002, "loss": 2.5556, "step": 367800 }, { "epoch": 0.7327593076628841, "grad_norm": 0.19926685094833374, "learning_rate": 0.002, "loss": 2.5552, "step": 367810 }, { "epoch": 0.732779229886523, "grad_norm": 0.19135262072086334, "learning_rate": 0.002, "loss": 2.5588, "step": 367820 }, { "epoch": 0.7327991521101619, "grad_norm": 0.15813389420509338, "learning_rate": 0.002, "loss": 2.542, "step": 367830 }, { "epoch": 0.7328190743338009, "grad_norm": 0.1589999496936798, "learning_rate": 0.002, "loss": 2.57, "step": 367840 }, { "epoch": 0.7328389965574398, "grad_norm": 0.1388707160949707, "learning_rate": 0.002, "loss": 2.5499, "step": 367850 }, { "epoch": 0.7328589187810787, "grad_norm": 0.14440692961215973, "learning_rate": 0.002, "loss": 2.5526, "step": 367860 }, { "epoch": 0.7328788410047176, "grad_norm": 0.29336515069007874, "learning_rate": 0.002, "loss": 2.5599, "step": 367870 }, { "epoch": 0.7328987632283565, "grad_norm": 0.16978082060813904, "learning_rate": 0.002, "loss": 2.5562, "step": 367880 }, { "epoch": 0.7329186854519955, "grad_norm": 0.1580200046300888, "learning_rate": 0.002, "loss": 2.5398, "step": 367890 }, { "epoch": 0.7329386076756343, "grad_norm": 0.18147669732570648, "learning_rate": 0.002, "loss": 2.5511, "step": 367900 }, { "epoch": 0.7329585298992732, "grad_norm": 0.18296615779399872, "learning_rate": 0.002, "loss": 2.5482, "step": 367910 }, { "epoch": 0.7329784521229121, "grad_norm": 0.16400989890098572, "learning_rate": 0.002, "loss": 2.5646, "step": 367920 }, { "epoch": 0.732998374346551, "grad_norm": 0.2001257836818695, "learning_rate": 0.002, "loss": 2.551, "step": 367930 }, { "epoch": 0.73301829657019, "grad_norm": 0.14829012751579285, "learning_rate": 0.002, "loss": 2.5688, "step": 367940 }, { "epoch": 0.7330382187938289, "grad_norm": 0.16408713161945343, "learning_rate": 0.002, "loss": 2.5625, "step": 367950 }, { "epoch": 0.7330581410174678, "grad_norm": 0.14353246986865997, "learning_rate": 0.002, "loss": 2.5423, "step": 367960 }, { "epoch": 0.7330780632411067, "grad_norm": 0.18631984293460846, "learning_rate": 0.002, "loss": 2.5527, "step": 367970 }, { "epoch": 0.7330979854647456, "grad_norm": 0.21009112894535065, "learning_rate": 0.002, "loss": 2.5512, "step": 367980 }, { "epoch": 0.7331179076883846, "grad_norm": 0.1543230563402176, "learning_rate": 0.002, "loss": 2.5429, "step": 367990 }, { "epoch": 0.7331378299120235, "grad_norm": 0.17374704778194427, "learning_rate": 0.002, "loss": 2.5693, "step": 368000 }, { "epoch": 0.7331577521356624, "grad_norm": 0.19362393021583557, "learning_rate": 0.002, "loss": 2.5486, "step": 368010 }, { "epoch": 0.7331776743593013, "grad_norm": 0.18441413342952728, "learning_rate": 0.002, "loss": 2.5562, "step": 368020 }, { "epoch": 0.7331975965829401, "grad_norm": 0.1578187346458435, "learning_rate": 0.002, "loss": 2.5611, "step": 368030 }, { "epoch": 0.7332175188065791, "grad_norm": 0.18203739821910858, "learning_rate": 0.002, "loss": 2.5675, "step": 368040 }, { "epoch": 0.733237441030218, "grad_norm": 0.1679697185754776, "learning_rate": 0.002, "loss": 2.5638, "step": 368050 }, { "epoch": 0.7332573632538569, "grad_norm": 0.17142526805400848, "learning_rate": 0.002, "loss": 2.5452, "step": 368060 }, { "epoch": 0.7332772854774958, "grad_norm": 0.15681001543998718, "learning_rate": 0.002, "loss": 2.5466, "step": 368070 }, { "epoch": 0.7332972077011348, "grad_norm": 0.15047766268253326, "learning_rate": 0.002, "loss": 2.548, "step": 368080 }, { "epoch": 0.7333171299247737, "grad_norm": 0.15488305687904358, "learning_rate": 0.002, "loss": 2.5536, "step": 368090 }, { "epoch": 0.7333370521484126, "grad_norm": 0.20115138590335846, "learning_rate": 0.002, "loss": 2.5556, "step": 368100 }, { "epoch": 0.7333569743720515, "grad_norm": 0.14516456425189972, "learning_rate": 0.002, "loss": 2.5337, "step": 368110 }, { "epoch": 0.7333768965956904, "grad_norm": 0.15843205153942108, "learning_rate": 0.002, "loss": 2.5532, "step": 368120 }, { "epoch": 0.7333968188193294, "grad_norm": 0.17730501294136047, "learning_rate": 0.002, "loss": 2.5698, "step": 368130 }, { "epoch": 0.7334167410429683, "grad_norm": 0.22457727789878845, "learning_rate": 0.002, "loss": 2.5635, "step": 368140 }, { "epoch": 0.7334366632666072, "grad_norm": 0.17273709177970886, "learning_rate": 0.002, "loss": 2.559, "step": 368150 }, { "epoch": 0.7334565854902461, "grad_norm": 0.16216516494750977, "learning_rate": 0.002, "loss": 2.546, "step": 368160 }, { "epoch": 0.733476507713885, "grad_norm": 0.1854797750711441, "learning_rate": 0.002, "loss": 2.5572, "step": 368170 }, { "epoch": 0.733496429937524, "grad_norm": 0.2313755601644516, "learning_rate": 0.002, "loss": 2.557, "step": 368180 }, { "epoch": 0.7335163521611628, "grad_norm": 0.177621528506279, "learning_rate": 0.002, "loss": 2.5623, "step": 368190 }, { "epoch": 0.7335362743848017, "grad_norm": 0.17159660160541534, "learning_rate": 0.002, "loss": 2.5656, "step": 368200 }, { "epoch": 0.7335561966084406, "grad_norm": 0.16872243583202362, "learning_rate": 0.002, "loss": 2.5552, "step": 368210 }, { "epoch": 0.7335761188320795, "grad_norm": 0.1813434660434723, "learning_rate": 0.002, "loss": 2.5556, "step": 368220 }, { "epoch": 0.7335960410557185, "grad_norm": 0.14218753576278687, "learning_rate": 0.002, "loss": 2.5417, "step": 368230 }, { "epoch": 0.7336159632793574, "grad_norm": 0.24882447719573975, "learning_rate": 0.002, "loss": 2.5457, "step": 368240 }, { "epoch": 0.7336358855029963, "grad_norm": 0.14771856367588043, "learning_rate": 0.002, "loss": 2.5498, "step": 368250 }, { "epoch": 0.7336558077266352, "grad_norm": 0.14964985847473145, "learning_rate": 0.002, "loss": 2.5456, "step": 368260 }, { "epoch": 0.7336757299502741, "grad_norm": 0.15420415997505188, "learning_rate": 0.002, "loss": 2.5644, "step": 368270 }, { "epoch": 0.7336956521739131, "grad_norm": 0.17810887098312378, "learning_rate": 0.002, "loss": 2.5546, "step": 368280 }, { "epoch": 0.733715574397552, "grad_norm": 0.15239381790161133, "learning_rate": 0.002, "loss": 2.5434, "step": 368290 }, { "epoch": 0.7337354966211909, "grad_norm": 0.16056132316589355, "learning_rate": 0.002, "loss": 2.5336, "step": 368300 }, { "epoch": 0.7337554188448298, "grad_norm": 0.18297114968299866, "learning_rate": 0.002, "loss": 2.5425, "step": 368310 }, { "epoch": 0.7337753410684686, "grad_norm": 0.19127275049686432, "learning_rate": 0.002, "loss": 2.5436, "step": 368320 }, { "epoch": 0.7337952632921076, "grad_norm": 0.16507895290851593, "learning_rate": 0.002, "loss": 2.5616, "step": 368330 }, { "epoch": 0.7338151855157465, "grad_norm": 0.13761678338050842, "learning_rate": 0.002, "loss": 2.5381, "step": 368340 }, { "epoch": 0.7338351077393854, "grad_norm": 0.19256198406219482, "learning_rate": 0.002, "loss": 2.5642, "step": 368350 }, { "epoch": 0.7338550299630243, "grad_norm": 0.17346836626529694, "learning_rate": 0.002, "loss": 2.5439, "step": 368360 }, { "epoch": 0.7338749521866633, "grad_norm": 0.181808203458786, "learning_rate": 0.002, "loss": 2.5691, "step": 368370 }, { "epoch": 0.7338948744103022, "grad_norm": 0.13911043107509613, "learning_rate": 0.002, "loss": 2.5581, "step": 368380 }, { "epoch": 0.7339147966339411, "grad_norm": 0.22233794629573822, "learning_rate": 0.002, "loss": 2.5424, "step": 368390 }, { "epoch": 0.73393471885758, "grad_norm": 0.14730355143547058, "learning_rate": 0.002, "loss": 2.5572, "step": 368400 }, { "epoch": 0.7339546410812189, "grad_norm": 0.17715424299240112, "learning_rate": 0.002, "loss": 2.5522, "step": 368410 }, { "epoch": 0.7339745633048579, "grad_norm": 0.14569702744483948, "learning_rate": 0.002, "loss": 2.5623, "step": 368420 }, { "epoch": 0.7339944855284968, "grad_norm": 0.15641731023788452, "learning_rate": 0.002, "loss": 2.5416, "step": 368430 }, { "epoch": 0.7340144077521357, "grad_norm": 0.16746126115322113, "learning_rate": 0.002, "loss": 2.5644, "step": 368440 }, { "epoch": 0.7340343299757746, "grad_norm": 0.15213459730148315, "learning_rate": 0.002, "loss": 2.5404, "step": 368450 }, { "epoch": 0.7340542521994134, "grad_norm": 0.16945582628250122, "learning_rate": 0.002, "loss": 2.5542, "step": 368460 }, { "epoch": 0.7340741744230525, "grad_norm": 0.16125541925430298, "learning_rate": 0.002, "loss": 2.5576, "step": 368470 }, { "epoch": 0.7340940966466913, "grad_norm": 0.161969855427742, "learning_rate": 0.002, "loss": 2.5418, "step": 368480 }, { "epoch": 0.7341140188703302, "grad_norm": 0.15458855032920837, "learning_rate": 0.002, "loss": 2.5452, "step": 368490 }, { "epoch": 0.7341339410939691, "grad_norm": 0.1716517210006714, "learning_rate": 0.002, "loss": 2.5543, "step": 368500 }, { "epoch": 0.734153863317608, "grad_norm": 0.18310263752937317, "learning_rate": 0.002, "loss": 2.5681, "step": 368510 }, { "epoch": 0.734173785541247, "grad_norm": 0.16179566085338593, "learning_rate": 0.002, "loss": 2.5786, "step": 368520 }, { "epoch": 0.7341937077648859, "grad_norm": 0.14631161093711853, "learning_rate": 0.002, "loss": 2.5393, "step": 368530 }, { "epoch": 0.7342136299885248, "grad_norm": 0.16357405483722687, "learning_rate": 0.002, "loss": 2.5619, "step": 368540 }, { "epoch": 0.7342335522121637, "grad_norm": 0.17214876413345337, "learning_rate": 0.002, "loss": 2.555, "step": 368550 }, { "epoch": 0.7342534744358026, "grad_norm": 0.17872944474220276, "learning_rate": 0.002, "loss": 2.5424, "step": 368560 }, { "epoch": 0.7342733966594416, "grad_norm": 0.14704813063144684, "learning_rate": 0.002, "loss": 2.5503, "step": 368570 }, { "epoch": 0.7342933188830805, "grad_norm": 0.18155540525913239, "learning_rate": 0.002, "loss": 2.5709, "step": 368580 }, { "epoch": 0.7343132411067194, "grad_norm": 0.23810066282749176, "learning_rate": 0.002, "loss": 2.5584, "step": 368590 }, { "epoch": 0.7343331633303583, "grad_norm": 0.1431899070739746, "learning_rate": 0.002, "loss": 2.5641, "step": 368600 }, { "epoch": 0.7343530855539971, "grad_norm": 0.16060104966163635, "learning_rate": 0.002, "loss": 2.5564, "step": 368610 }, { "epoch": 0.7343730077776361, "grad_norm": 0.14055775105953217, "learning_rate": 0.002, "loss": 2.5504, "step": 368620 }, { "epoch": 0.734392930001275, "grad_norm": 0.1688421070575714, "learning_rate": 0.002, "loss": 2.5353, "step": 368630 }, { "epoch": 0.7344128522249139, "grad_norm": 0.15523359179496765, "learning_rate": 0.002, "loss": 2.5553, "step": 368640 }, { "epoch": 0.7344327744485528, "grad_norm": 0.17672725021839142, "learning_rate": 0.002, "loss": 2.5631, "step": 368650 }, { "epoch": 0.7344526966721918, "grad_norm": 0.17477816343307495, "learning_rate": 0.002, "loss": 2.5618, "step": 368660 }, { "epoch": 0.7344726188958307, "grad_norm": 0.13887527585029602, "learning_rate": 0.002, "loss": 2.5677, "step": 368670 }, { "epoch": 0.7344925411194696, "grad_norm": 0.1953841596841812, "learning_rate": 0.002, "loss": 2.55, "step": 368680 }, { "epoch": 0.7345124633431085, "grad_norm": 0.17134177684783936, "learning_rate": 0.002, "loss": 2.5573, "step": 368690 }, { "epoch": 0.7345323855667474, "grad_norm": 0.1349010020494461, "learning_rate": 0.002, "loss": 2.5504, "step": 368700 }, { "epoch": 0.7345523077903864, "grad_norm": 0.14381398260593414, "learning_rate": 0.002, "loss": 2.5808, "step": 368710 }, { "epoch": 0.7345722300140253, "grad_norm": 0.20540104806423187, "learning_rate": 0.002, "loss": 2.5503, "step": 368720 }, { "epoch": 0.7345921522376642, "grad_norm": 0.1481892466545105, "learning_rate": 0.002, "loss": 2.5571, "step": 368730 }, { "epoch": 0.734612074461303, "grad_norm": 0.16038794815540314, "learning_rate": 0.002, "loss": 2.5582, "step": 368740 }, { "epoch": 0.734631996684942, "grad_norm": 0.16329915821552277, "learning_rate": 0.002, "loss": 2.5338, "step": 368750 }, { "epoch": 0.734651918908581, "grad_norm": 0.1590849757194519, "learning_rate": 0.002, "loss": 2.5512, "step": 368760 }, { "epoch": 0.7346718411322198, "grad_norm": 0.17732898890972137, "learning_rate": 0.002, "loss": 2.5587, "step": 368770 }, { "epoch": 0.7346917633558587, "grad_norm": 0.15917520225048065, "learning_rate": 0.002, "loss": 2.5554, "step": 368780 }, { "epoch": 0.7347116855794976, "grad_norm": 0.1450023055076599, "learning_rate": 0.002, "loss": 2.5531, "step": 368790 }, { "epoch": 0.7347316078031365, "grad_norm": 0.16485247015953064, "learning_rate": 0.002, "loss": 2.5598, "step": 368800 }, { "epoch": 0.7347515300267755, "grad_norm": 0.19664379954338074, "learning_rate": 0.002, "loss": 2.5377, "step": 368810 }, { "epoch": 0.7347714522504144, "grad_norm": 0.14908941090106964, "learning_rate": 0.002, "loss": 2.58, "step": 368820 }, { "epoch": 0.7347913744740533, "grad_norm": 0.17851197719573975, "learning_rate": 0.002, "loss": 2.5628, "step": 368830 }, { "epoch": 0.7348112966976922, "grad_norm": 0.15252110362052917, "learning_rate": 0.002, "loss": 2.5398, "step": 368840 }, { "epoch": 0.7348312189213311, "grad_norm": 0.18179191648960114, "learning_rate": 0.002, "loss": 2.5573, "step": 368850 }, { "epoch": 0.7348511411449701, "grad_norm": 0.16927529871463776, "learning_rate": 0.002, "loss": 2.5362, "step": 368860 }, { "epoch": 0.734871063368609, "grad_norm": 0.17593708634376526, "learning_rate": 0.002, "loss": 2.5561, "step": 368870 }, { "epoch": 0.7348909855922479, "grad_norm": 0.17062999308109283, "learning_rate": 0.002, "loss": 2.5463, "step": 368880 }, { "epoch": 0.7349109078158867, "grad_norm": 0.17984691262245178, "learning_rate": 0.002, "loss": 2.5602, "step": 368890 }, { "epoch": 0.7349308300395256, "grad_norm": 0.14808784425258636, "learning_rate": 0.002, "loss": 2.5587, "step": 368900 }, { "epoch": 0.7349507522631646, "grad_norm": 0.1516263484954834, "learning_rate": 0.002, "loss": 2.5595, "step": 368910 }, { "epoch": 0.7349706744868035, "grad_norm": 0.1671275943517685, "learning_rate": 0.002, "loss": 2.5445, "step": 368920 }, { "epoch": 0.7349905967104424, "grad_norm": 0.1608739048242569, "learning_rate": 0.002, "loss": 2.5573, "step": 368930 }, { "epoch": 0.7350105189340813, "grad_norm": 0.17226260900497437, "learning_rate": 0.002, "loss": 2.5444, "step": 368940 }, { "epoch": 0.7350304411577202, "grad_norm": 0.16697248816490173, "learning_rate": 0.002, "loss": 2.5712, "step": 368950 }, { "epoch": 0.7350503633813592, "grad_norm": 0.16324064135551453, "learning_rate": 0.002, "loss": 2.5631, "step": 368960 }, { "epoch": 0.7350702856049981, "grad_norm": 0.1537771224975586, "learning_rate": 0.002, "loss": 2.5583, "step": 368970 }, { "epoch": 0.735090207828637, "grad_norm": 0.17370131611824036, "learning_rate": 0.002, "loss": 2.5704, "step": 368980 }, { "epoch": 0.7351101300522759, "grad_norm": 0.18960566818714142, "learning_rate": 0.002, "loss": 2.5451, "step": 368990 }, { "epoch": 0.7351300522759149, "grad_norm": 0.15597409009933472, "learning_rate": 0.002, "loss": 2.5538, "step": 369000 }, { "epoch": 0.7351499744995538, "grad_norm": 0.19373005628585815, "learning_rate": 0.002, "loss": 2.5411, "step": 369010 }, { "epoch": 0.7351698967231927, "grad_norm": 0.14782558381557465, "learning_rate": 0.002, "loss": 2.5672, "step": 369020 }, { "epoch": 0.7351898189468316, "grad_norm": 0.2017868012189865, "learning_rate": 0.002, "loss": 2.5503, "step": 369030 }, { "epoch": 0.7352097411704704, "grad_norm": 0.1809367686510086, "learning_rate": 0.002, "loss": 2.5444, "step": 369040 }, { "epoch": 0.7352296633941094, "grad_norm": 0.18204635381698608, "learning_rate": 0.002, "loss": 2.5578, "step": 369050 }, { "epoch": 0.7352495856177483, "grad_norm": 0.18367765843868256, "learning_rate": 0.002, "loss": 2.5539, "step": 369060 }, { "epoch": 0.7352695078413872, "grad_norm": 0.15137693285942078, "learning_rate": 0.002, "loss": 2.5514, "step": 369070 }, { "epoch": 0.7352894300650261, "grad_norm": 0.3221847712993622, "learning_rate": 0.002, "loss": 2.5525, "step": 369080 }, { "epoch": 0.735309352288665, "grad_norm": 0.18210537731647491, "learning_rate": 0.002, "loss": 2.5583, "step": 369090 }, { "epoch": 0.735329274512304, "grad_norm": 0.169067844748497, "learning_rate": 0.002, "loss": 2.5593, "step": 369100 }, { "epoch": 0.7353491967359429, "grad_norm": 0.1858157217502594, "learning_rate": 0.002, "loss": 2.5542, "step": 369110 }, { "epoch": 0.7353691189595818, "grad_norm": 0.1492600440979004, "learning_rate": 0.002, "loss": 2.5751, "step": 369120 }, { "epoch": 0.7353890411832207, "grad_norm": 0.18225599825382233, "learning_rate": 0.002, "loss": 2.5625, "step": 369130 }, { "epoch": 0.7354089634068596, "grad_norm": 0.1703716218471527, "learning_rate": 0.002, "loss": 2.5642, "step": 369140 }, { "epoch": 0.7354288856304986, "grad_norm": 0.18794552981853485, "learning_rate": 0.002, "loss": 2.5518, "step": 369150 }, { "epoch": 0.7354488078541375, "grad_norm": 0.16532154381275177, "learning_rate": 0.002, "loss": 2.5648, "step": 369160 }, { "epoch": 0.7354687300777764, "grad_norm": 0.14827048778533936, "learning_rate": 0.002, "loss": 2.5509, "step": 369170 }, { "epoch": 0.7354886523014152, "grad_norm": 0.17551076412200928, "learning_rate": 0.002, "loss": 2.5477, "step": 369180 }, { "epoch": 0.7355085745250541, "grad_norm": 0.18849286437034607, "learning_rate": 0.002, "loss": 2.5469, "step": 369190 }, { "epoch": 0.7355284967486931, "grad_norm": 0.1434195637702942, "learning_rate": 0.002, "loss": 2.5413, "step": 369200 }, { "epoch": 0.735548418972332, "grad_norm": 0.2078678011894226, "learning_rate": 0.002, "loss": 2.5534, "step": 369210 }, { "epoch": 0.7355683411959709, "grad_norm": 0.17963460087776184, "learning_rate": 0.002, "loss": 2.5372, "step": 369220 }, { "epoch": 0.7355882634196098, "grad_norm": 0.15484820306301117, "learning_rate": 0.002, "loss": 2.5479, "step": 369230 }, { "epoch": 0.7356081856432487, "grad_norm": 0.19431520998477936, "learning_rate": 0.002, "loss": 2.5455, "step": 369240 }, { "epoch": 0.7356281078668877, "grad_norm": 0.1511641889810562, "learning_rate": 0.002, "loss": 2.5541, "step": 369250 }, { "epoch": 0.7356480300905266, "grad_norm": 0.17037972807884216, "learning_rate": 0.002, "loss": 2.5482, "step": 369260 }, { "epoch": 0.7356679523141655, "grad_norm": 0.18173719942569733, "learning_rate": 0.002, "loss": 2.542, "step": 369270 }, { "epoch": 0.7356878745378044, "grad_norm": 0.17365418374538422, "learning_rate": 0.002, "loss": 2.5735, "step": 369280 }, { "epoch": 0.7357077967614434, "grad_norm": 0.14967632293701172, "learning_rate": 0.002, "loss": 2.5479, "step": 369290 }, { "epoch": 0.7357277189850823, "grad_norm": 0.15621256828308105, "learning_rate": 0.002, "loss": 2.5479, "step": 369300 }, { "epoch": 0.7357476412087212, "grad_norm": 0.16934624314308167, "learning_rate": 0.002, "loss": 2.5614, "step": 369310 }, { "epoch": 0.73576756343236, "grad_norm": 0.17395856976509094, "learning_rate": 0.002, "loss": 2.5629, "step": 369320 }, { "epoch": 0.7357874856559989, "grad_norm": 0.16193202137947083, "learning_rate": 0.002, "loss": 2.5527, "step": 369330 }, { "epoch": 0.7358074078796379, "grad_norm": 0.15346001088619232, "learning_rate": 0.002, "loss": 2.5635, "step": 369340 }, { "epoch": 0.7358273301032768, "grad_norm": 0.15405979752540588, "learning_rate": 0.002, "loss": 2.5473, "step": 369350 }, { "epoch": 0.7358472523269157, "grad_norm": 0.15901842713356018, "learning_rate": 0.002, "loss": 2.5585, "step": 369360 }, { "epoch": 0.7358671745505546, "grad_norm": 0.3564392328262329, "learning_rate": 0.002, "loss": 2.5349, "step": 369370 }, { "epoch": 0.7358870967741935, "grad_norm": 0.2076522260904312, "learning_rate": 0.002, "loss": 2.5803, "step": 369380 }, { "epoch": 0.7359070189978325, "grad_norm": 0.15203611552715302, "learning_rate": 0.002, "loss": 2.5502, "step": 369390 }, { "epoch": 0.7359269412214714, "grad_norm": 0.15076498687267303, "learning_rate": 0.002, "loss": 2.5736, "step": 369400 }, { "epoch": 0.7359468634451103, "grad_norm": 0.14138410985469818, "learning_rate": 0.002, "loss": 2.5512, "step": 369410 }, { "epoch": 0.7359667856687492, "grad_norm": 0.1626216322183609, "learning_rate": 0.002, "loss": 2.5734, "step": 369420 }, { "epoch": 0.7359867078923881, "grad_norm": 0.2185801863670349, "learning_rate": 0.002, "loss": 2.5517, "step": 369430 }, { "epoch": 0.7360066301160271, "grad_norm": 0.24585913121700287, "learning_rate": 0.002, "loss": 2.5602, "step": 369440 }, { "epoch": 0.736026552339666, "grad_norm": 0.14612218737602234, "learning_rate": 0.002, "loss": 2.563, "step": 369450 }, { "epoch": 0.7360464745633049, "grad_norm": 0.14855937659740448, "learning_rate": 0.002, "loss": 2.5534, "step": 369460 }, { "epoch": 0.7360663967869437, "grad_norm": 0.15046077966690063, "learning_rate": 0.002, "loss": 2.5604, "step": 369470 }, { "epoch": 0.7360863190105826, "grad_norm": 0.1854565590620041, "learning_rate": 0.002, "loss": 2.5479, "step": 369480 }, { "epoch": 0.7361062412342216, "grad_norm": 0.16699014604091644, "learning_rate": 0.002, "loss": 2.5422, "step": 369490 }, { "epoch": 0.7361261634578605, "grad_norm": 0.14623698592185974, "learning_rate": 0.002, "loss": 2.5758, "step": 369500 }, { "epoch": 0.7361460856814994, "grad_norm": 0.17208611965179443, "learning_rate": 0.002, "loss": 2.5711, "step": 369510 }, { "epoch": 0.7361660079051383, "grad_norm": 0.1508258730173111, "learning_rate": 0.002, "loss": 2.5444, "step": 369520 }, { "epoch": 0.7361859301287772, "grad_norm": 0.163711816072464, "learning_rate": 0.002, "loss": 2.5608, "step": 369530 }, { "epoch": 0.7362058523524162, "grad_norm": 0.16350232064723969, "learning_rate": 0.002, "loss": 2.5593, "step": 369540 }, { "epoch": 0.7362257745760551, "grad_norm": 0.15707218647003174, "learning_rate": 0.002, "loss": 2.5573, "step": 369550 }, { "epoch": 0.736245696799694, "grad_norm": 0.1637895703315735, "learning_rate": 0.002, "loss": 2.5475, "step": 369560 }, { "epoch": 0.7362656190233329, "grad_norm": 0.18582922220230103, "learning_rate": 0.002, "loss": 2.5388, "step": 369570 }, { "epoch": 0.7362855412469719, "grad_norm": 0.14902685582637787, "learning_rate": 0.002, "loss": 2.5666, "step": 369580 }, { "epoch": 0.7363054634706108, "grad_norm": 0.1626802682876587, "learning_rate": 0.002, "loss": 2.5454, "step": 369590 }, { "epoch": 0.7363253856942497, "grad_norm": 0.14686310291290283, "learning_rate": 0.002, "loss": 2.5537, "step": 369600 }, { "epoch": 0.7363453079178885, "grad_norm": 0.18248988687992096, "learning_rate": 0.002, "loss": 2.5457, "step": 369610 }, { "epoch": 0.7363652301415274, "grad_norm": 0.1964259147644043, "learning_rate": 0.002, "loss": 2.5442, "step": 369620 }, { "epoch": 0.7363851523651664, "grad_norm": 0.16237834095954895, "learning_rate": 0.002, "loss": 2.5416, "step": 369630 }, { "epoch": 0.7364050745888053, "grad_norm": 0.14561577141284943, "learning_rate": 0.002, "loss": 2.5469, "step": 369640 }, { "epoch": 0.7364249968124442, "grad_norm": 0.17612408101558685, "learning_rate": 0.002, "loss": 2.5694, "step": 369650 }, { "epoch": 0.7364449190360831, "grad_norm": 0.1746208667755127, "learning_rate": 0.002, "loss": 2.563, "step": 369660 }, { "epoch": 0.736464841259722, "grad_norm": 0.17475415766239166, "learning_rate": 0.002, "loss": 2.5593, "step": 369670 }, { "epoch": 0.736484763483361, "grad_norm": 0.14842724800109863, "learning_rate": 0.002, "loss": 2.5534, "step": 369680 }, { "epoch": 0.7365046857069999, "grad_norm": 0.1846335232257843, "learning_rate": 0.002, "loss": 2.559, "step": 369690 }, { "epoch": 0.7365246079306388, "grad_norm": 0.20423589646816254, "learning_rate": 0.002, "loss": 2.5511, "step": 369700 }, { "epoch": 0.7365445301542777, "grad_norm": 0.17087140679359436, "learning_rate": 0.002, "loss": 2.561, "step": 369710 }, { "epoch": 0.7365644523779166, "grad_norm": 0.15757712721824646, "learning_rate": 0.002, "loss": 2.5476, "step": 369720 }, { "epoch": 0.7365843746015556, "grad_norm": 0.17288225889205933, "learning_rate": 0.002, "loss": 2.562, "step": 369730 }, { "epoch": 0.7366042968251945, "grad_norm": 0.19928738474845886, "learning_rate": 0.002, "loss": 2.5614, "step": 369740 }, { "epoch": 0.7366242190488334, "grad_norm": 0.140142023563385, "learning_rate": 0.002, "loss": 2.553, "step": 369750 }, { "epoch": 0.7366441412724722, "grad_norm": 0.1803322434425354, "learning_rate": 0.002, "loss": 2.5383, "step": 369760 }, { "epoch": 0.7366640634961111, "grad_norm": 0.1603088229894638, "learning_rate": 0.002, "loss": 2.5555, "step": 369770 }, { "epoch": 0.7366839857197501, "grad_norm": 0.1747511625289917, "learning_rate": 0.002, "loss": 2.5576, "step": 369780 }, { "epoch": 0.736703907943389, "grad_norm": 0.1722380518913269, "learning_rate": 0.002, "loss": 2.567, "step": 369790 }, { "epoch": 0.7367238301670279, "grad_norm": 0.17086347937583923, "learning_rate": 0.002, "loss": 2.5379, "step": 369800 }, { "epoch": 0.7367437523906668, "grad_norm": 0.1775922030210495, "learning_rate": 0.002, "loss": 2.5426, "step": 369810 }, { "epoch": 0.7367636746143057, "grad_norm": 0.1683858036994934, "learning_rate": 0.002, "loss": 2.5604, "step": 369820 }, { "epoch": 0.7367835968379447, "grad_norm": 0.14689500629901886, "learning_rate": 0.002, "loss": 2.5445, "step": 369830 }, { "epoch": 0.7368035190615836, "grad_norm": 0.15851008892059326, "learning_rate": 0.002, "loss": 2.5524, "step": 369840 }, { "epoch": 0.7368234412852225, "grad_norm": 0.1476321816444397, "learning_rate": 0.002, "loss": 2.5716, "step": 369850 }, { "epoch": 0.7368433635088614, "grad_norm": 0.21701094508171082, "learning_rate": 0.002, "loss": 2.5577, "step": 369860 }, { "epoch": 0.7368632857325004, "grad_norm": 0.14666929841041565, "learning_rate": 0.002, "loss": 2.5509, "step": 369870 }, { "epoch": 0.7368832079561393, "grad_norm": 0.22029772400856018, "learning_rate": 0.002, "loss": 2.5508, "step": 369880 }, { "epoch": 0.7369031301797782, "grad_norm": 0.1727314293384552, "learning_rate": 0.002, "loss": 2.5405, "step": 369890 }, { "epoch": 0.736923052403417, "grad_norm": 0.164171501994133, "learning_rate": 0.002, "loss": 2.5562, "step": 369900 }, { "epoch": 0.7369429746270559, "grad_norm": 0.15736152231693268, "learning_rate": 0.002, "loss": 2.5552, "step": 369910 }, { "epoch": 0.7369628968506949, "grad_norm": 0.19452637434005737, "learning_rate": 0.002, "loss": 2.5645, "step": 369920 }, { "epoch": 0.7369828190743338, "grad_norm": 0.15814539790153503, "learning_rate": 0.002, "loss": 2.5506, "step": 369930 }, { "epoch": 0.7370027412979727, "grad_norm": 0.1692761331796646, "learning_rate": 0.002, "loss": 2.5619, "step": 369940 }, { "epoch": 0.7370226635216116, "grad_norm": 0.20111477375030518, "learning_rate": 0.002, "loss": 2.5553, "step": 369950 }, { "epoch": 0.7370425857452505, "grad_norm": 0.14141882956027985, "learning_rate": 0.002, "loss": 2.5507, "step": 369960 }, { "epoch": 0.7370625079688895, "grad_norm": 0.16604532301425934, "learning_rate": 0.002, "loss": 2.5649, "step": 369970 }, { "epoch": 0.7370824301925284, "grad_norm": 0.17958003282546997, "learning_rate": 0.002, "loss": 2.5529, "step": 369980 }, { "epoch": 0.7371023524161673, "grad_norm": 0.1962190568447113, "learning_rate": 0.002, "loss": 2.5421, "step": 369990 }, { "epoch": 0.7371222746398062, "grad_norm": 0.16366882622241974, "learning_rate": 0.002, "loss": 2.5527, "step": 370000 }, { "epoch": 0.7371421968634451, "grad_norm": 0.16459760069847107, "learning_rate": 0.002, "loss": 2.5462, "step": 370010 }, { "epoch": 0.7371621190870841, "grad_norm": 0.1462215930223465, "learning_rate": 0.002, "loss": 2.5599, "step": 370020 }, { "epoch": 0.737182041310723, "grad_norm": 0.17494219541549683, "learning_rate": 0.002, "loss": 2.5724, "step": 370030 }, { "epoch": 0.7372019635343618, "grad_norm": 0.17503520846366882, "learning_rate": 0.002, "loss": 2.5519, "step": 370040 }, { "epoch": 0.7372218857580007, "grad_norm": 0.1592695713043213, "learning_rate": 0.002, "loss": 2.5432, "step": 370050 }, { "epoch": 0.7372418079816396, "grad_norm": 0.17908091843128204, "learning_rate": 0.002, "loss": 2.5535, "step": 370060 }, { "epoch": 0.7372617302052786, "grad_norm": 0.18390940129756927, "learning_rate": 0.002, "loss": 2.5438, "step": 370070 }, { "epoch": 0.7372816524289175, "grad_norm": 0.1900888830423355, "learning_rate": 0.002, "loss": 2.5568, "step": 370080 }, { "epoch": 0.7373015746525564, "grad_norm": 0.15616784989833832, "learning_rate": 0.002, "loss": 2.5519, "step": 370090 }, { "epoch": 0.7373214968761953, "grad_norm": 0.19607189297676086, "learning_rate": 0.002, "loss": 2.5655, "step": 370100 }, { "epoch": 0.7373414190998342, "grad_norm": 0.16537150740623474, "learning_rate": 0.002, "loss": 2.5484, "step": 370110 }, { "epoch": 0.7373613413234732, "grad_norm": 0.16046613454818726, "learning_rate": 0.002, "loss": 2.5507, "step": 370120 }, { "epoch": 0.7373812635471121, "grad_norm": 0.16535305976867676, "learning_rate": 0.002, "loss": 2.5642, "step": 370130 }, { "epoch": 0.737401185770751, "grad_norm": 0.1740453988313675, "learning_rate": 0.002, "loss": 2.5542, "step": 370140 }, { "epoch": 0.7374211079943899, "grad_norm": 0.1655372828245163, "learning_rate": 0.002, "loss": 2.5452, "step": 370150 }, { "epoch": 0.7374410302180289, "grad_norm": 0.1682860553264618, "learning_rate": 0.002, "loss": 2.5531, "step": 370160 }, { "epoch": 0.7374609524416678, "grad_norm": 0.15706610679626465, "learning_rate": 0.002, "loss": 2.5531, "step": 370170 }, { "epoch": 0.7374808746653067, "grad_norm": 0.15808460116386414, "learning_rate": 0.002, "loss": 2.5608, "step": 370180 }, { "epoch": 0.7375007968889455, "grad_norm": 0.17675010859966278, "learning_rate": 0.002, "loss": 2.5505, "step": 370190 }, { "epoch": 0.7375207191125844, "grad_norm": 0.18689444661140442, "learning_rate": 0.002, "loss": 2.5512, "step": 370200 }, { "epoch": 0.7375406413362234, "grad_norm": 0.15587589144706726, "learning_rate": 0.002, "loss": 2.5477, "step": 370210 }, { "epoch": 0.7375605635598623, "grad_norm": 0.1634584665298462, "learning_rate": 0.002, "loss": 2.5493, "step": 370220 }, { "epoch": 0.7375804857835012, "grad_norm": 0.1803419440984726, "learning_rate": 0.002, "loss": 2.5625, "step": 370230 }, { "epoch": 0.7376004080071401, "grad_norm": 0.15590114891529083, "learning_rate": 0.002, "loss": 2.5665, "step": 370240 }, { "epoch": 0.737620330230779, "grad_norm": 0.1864897608757019, "learning_rate": 0.002, "loss": 2.5512, "step": 370250 }, { "epoch": 0.737640252454418, "grad_norm": 0.16447849571704865, "learning_rate": 0.002, "loss": 2.5595, "step": 370260 }, { "epoch": 0.7376601746780569, "grad_norm": 0.18725624680519104, "learning_rate": 0.002, "loss": 2.5796, "step": 370270 }, { "epoch": 0.7376800969016958, "grad_norm": 0.16682280600070953, "learning_rate": 0.002, "loss": 2.542, "step": 370280 }, { "epoch": 0.7377000191253347, "grad_norm": 0.16100367903709412, "learning_rate": 0.002, "loss": 2.5495, "step": 370290 }, { "epoch": 0.7377199413489736, "grad_norm": 0.17510248720645905, "learning_rate": 0.002, "loss": 2.5517, "step": 370300 }, { "epoch": 0.7377398635726126, "grad_norm": 0.1796419471502304, "learning_rate": 0.002, "loss": 2.5443, "step": 370310 }, { "epoch": 0.7377597857962515, "grad_norm": 0.16155147552490234, "learning_rate": 0.002, "loss": 2.5574, "step": 370320 }, { "epoch": 0.7377797080198903, "grad_norm": 0.19586937129497528, "learning_rate": 0.002, "loss": 2.5501, "step": 370330 }, { "epoch": 0.7377996302435292, "grad_norm": 0.18784917891025543, "learning_rate": 0.002, "loss": 2.5678, "step": 370340 }, { "epoch": 0.7378195524671681, "grad_norm": 0.1725143939256668, "learning_rate": 0.002, "loss": 2.567, "step": 370350 }, { "epoch": 0.7378394746908071, "grad_norm": 0.19584257900714874, "learning_rate": 0.002, "loss": 2.5504, "step": 370360 }, { "epoch": 0.737859396914446, "grad_norm": 0.15495266020298004, "learning_rate": 0.002, "loss": 2.5558, "step": 370370 }, { "epoch": 0.7378793191380849, "grad_norm": 0.13742780685424805, "learning_rate": 0.002, "loss": 2.5719, "step": 370380 }, { "epoch": 0.7378992413617238, "grad_norm": 0.18546342849731445, "learning_rate": 0.002, "loss": 2.5596, "step": 370390 }, { "epoch": 0.7379191635853627, "grad_norm": 0.17328502237796783, "learning_rate": 0.002, "loss": 2.5502, "step": 370400 }, { "epoch": 0.7379390858090017, "grad_norm": 0.18021613359451294, "learning_rate": 0.002, "loss": 2.5413, "step": 370410 }, { "epoch": 0.7379590080326406, "grad_norm": 0.17758962512016296, "learning_rate": 0.002, "loss": 2.5338, "step": 370420 }, { "epoch": 0.7379789302562795, "grad_norm": 0.15815147757530212, "learning_rate": 0.002, "loss": 2.5466, "step": 370430 }, { "epoch": 0.7379988524799184, "grad_norm": 0.22152039408683777, "learning_rate": 0.002, "loss": 2.5637, "step": 370440 }, { "epoch": 0.7380187747035574, "grad_norm": 0.18956606090068817, "learning_rate": 0.002, "loss": 2.5637, "step": 370450 }, { "epoch": 0.7380386969271963, "grad_norm": 0.16520898044109344, "learning_rate": 0.002, "loss": 2.5433, "step": 370460 }, { "epoch": 0.7380586191508351, "grad_norm": 0.14975211024284363, "learning_rate": 0.002, "loss": 2.5409, "step": 370470 }, { "epoch": 0.738078541374474, "grad_norm": 0.14856648445129395, "learning_rate": 0.002, "loss": 2.5421, "step": 370480 }, { "epoch": 0.7380984635981129, "grad_norm": 0.20260918140411377, "learning_rate": 0.002, "loss": 2.5608, "step": 370490 }, { "epoch": 0.7381183858217519, "grad_norm": 0.18691644072532654, "learning_rate": 0.002, "loss": 2.5431, "step": 370500 }, { "epoch": 0.7381383080453908, "grad_norm": 0.17546406388282776, "learning_rate": 0.002, "loss": 2.5477, "step": 370510 }, { "epoch": 0.7381582302690297, "grad_norm": 0.15853725373744965, "learning_rate": 0.002, "loss": 2.5529, "step": 370520 }, { "epoch": 0.7381781524926686, "grad_norm": 0.17921310663223267, "learning_rate": 0.002, "loss": 2.5586, "step": 370530 }, { "epoch": 0.7381980747163075, "grad_norm": 0.18878309428691864, "learning_rate": 0.002, "loss": 2.5595, "step": 370540 }, { "epoch": 0.7382179969399465, "grad_norm": 0.1579335629940033, "learning_rate": 0.002, "loss": 2.5468, "step": 370550 }, { "epoch": 0.7382379191635854, "grad_norm": 0.16874264180660248, "learning_rate": 0.002, "loss": 2.5568, "step": 370560 }, { "epoch": 0.7382578413872243, "grad_norm": 0.21377623081207275, "learning_rate": 0.002, "loss": 2.5554, "step": 370570 }, { "epoch": 0.7382777636108632, "grad_norm": 0.15584640204906464, "learning_rate": 0.002, "loss": 2.5659, "step": 370580 }, { "epoch": 0.7382976858345021, "grad_norm": 0.18781189620494843, "learning_rate": 0.002, "loss": 2.5565, "step": 370590 }, { "epoch": 0.7383176080581411, "grad_norm": 0.17543692886829376, "learning_rate": 0.002, "loss": 2.5502, "step": 370600 }, { "epoch": 0.73833753028178, "grad_norm": 0.15949180722236633, "learning_rate": 0.002, "loss": 2.5535, "step": 370610 }, { "epoch": 0.7383574525054188, "grad_norm": 0.16620495915412903, "learning_rate": 0.002, "loss": 2.5569, "step": 370620 }, { "epoch": 0.7383773747290577, "grad_norm": 0.17036598920822144, "learning_rate": 0.002, "loss": 2.5274, "step": 370630 }, { "epoch": 0.7383972969526966, "grad_norm": 0.1745859980583191, "learning_rate": 0.002, "loss": 2.564, "step": 370640 }, { "epoch": 0.7384172191763356, "grad_norm": 0.14934362471103668, "learning_rate": 0.002, "loss": 2.5595, "step": 370650 }, { "epoch": 0.7384371413999745, "grad_norm": 0.1742229461669922, "learning_rate": 0.002, "loss": 2.5453, "step": 370660 }, { "epoch": 0.7384570636236134, "grad_norm": 0.15685582160949707, "learning_rate": 0.002, "loss": 2.5645, "step": 370670 }, { "epoch": 0.7384769858472523, "grad_norm": 0.14734016358852386, "learning_rate": 0.002, "loss": 2.5467, "step": 370680 }, { "epoch": 0.7384969080708912, "grad_norm": 0.1656741052865982, "learning_rate": 0.002, "loss": 2.5685, "step": 370690 }, { "epoch": 0.7385168302945302, "grad_norm": 0.24912548065185547, "learning_rate": 0.002, "loss": 2.5572, "step": 370700 }, { "epoch": 0.7385367525181691, "grad_norm": 0.18277326226234436, "learning_rate": 0.002, "loss": 2.5587, "step": 370710 }, { "epoch": 0.738556674741808, "grad_norm": 0.15895499289035797, "learning_rate": 0.002, "loss": 2.5436, "step": 370720 }, { "epoch": 0.7385765969654469, "grad_norm": 0.18720056116580963, "learning_rate": 0.002, "loss": 2.5617, "step": 370730 }, { "epoch": 0.7385965191890858, "grad_norm": 0.14808908104896545, "learning_rate": 0.002, "loss": 2.5451, "step": 370740 }, { "epoch": 0.7386164414127248, "grad_norm": 0.1601935625076294, "learning_rate": 0.002, "loss": 2.5632, "step": 370750 }, { "epoch": 0.7386363636363636, "grad_norm": 0.15812866389751434, "learning_rate": 0.002, "loss": 2.5559, "step": 370760 }, { "epoch": 0.7386562858600025, "grad_norm": 0.15617501735687256, "learning_rate": 0.002, "loss": 2.5542, "step": 370770 }, { "epoch": 0.7386762080836414, "grad_norm": 0.16481414437294006, "learning_rate": 0.002, "loss": 2.5657, "step": 370780 }, { "epoch": 0.7386961303072804, "grad_norm": 0.13917505741119385, "learning_rate": 0.002, "loss": 2.5632, "step": 370790 }, { "epoch": 0.7387160525309193, "grad_norm": 0.19166353344917297, "learning_rate": 0.002, "loss": 2.5561, "step": 370800 }, { "epoch": 0.7387359747545582, "grad_norm": 0.17332753539085388, "learning_rate": 0.002, "loss": 2.5625, "step": 370810 }, { "epoch": 0.7387558969781971, "grad_norm": 0.17234516143798828, "learning_rate": 0.002, "loss": 2.5632, "step": 370820 }, { "epoch": 0.738775819201836, "grad_norm": 0.15810877084732056, "learning_rate": 0.002, "loss": 2.5557, "step": 370830 }, { "epoch": 0.738795741425475, "grad_norm": 0.17737241089344025, "learning_rate": 0.002, "loss": 2.561, "step": 370840 }, { "epoch": 0.7388156636491139, "grad_norm": 0.1841994673013687, "learning_rate": 0.002, "loss": 2.569, "step": 370850 }, { "epoch": 0.7388355858727528, "grad_norm": 0.1652173101902008, "learning_rate": 0.002, "loss": 2.5392, "step": 370860 }, { "epoch": 0.7388555080963917, "grad_norm": 0.14265470206737518, "learning_rate": 0.002, "loss": 2.5585, "step": 370870 }, { "epoch": 0.7388754303200306, "grad_norm": 0.2391866147518158, "learning_rate": 0.002, "loss": 2.5617, "step": 370880 }, { "epoch": 0.7388953525436696, "grad_norm": 0.1555364429950714, "learning_rate": 0.002, "loss": 2.5493, "step": 370890 }, { "epoch": 0.7389152747673084, "grad_norm": 0.15281157195568085, "learning_rate": 0.002, "loss": 2.5563, "step": 370900 }, { "epoch": 0.7389351969909473, "grad_norm": 0.13837192952632904, "learning_rate": 0.002, "loss": 2.5538, "step": 370910 }, { "epoch": 0.7389551192145862, "grad_norm": 0.16889554262161255, "learning_rate": 0.002, "loss": 2.5456, "step": 370920 }, { "epoch": 0.7389750414382251, "grad_norm": 0.17879876494407654, "learning_rate": 0.002, "loss": 2.5449, "step": 370930 }, { "epoch": 0.7389949636618641, "grad_norm": 0.16965900361537933, "learning_rate": 0.002, "loss": 2.5523, "step": 370940 }, { "epoch": 0.739014885885503, "grad_norm": 0.1654188185930252, "learning_rate": 0.002, "loss": 2.5433, "step": 370950 }, { "epoch": 0.7390348081091419, "grad_norm": 0.16107451915740967, "learning_rate": 0.002, "loss": 2.5483, "step": 370960 }, { "epoch": 0.7390547303327808, "grad_norm": 0.1578936129808426, "learning_rate": 0.002, "loss": 2.5572, "step": 370970 }, { "epoch": 0.7390746525564197, "grad_norm": 0.16123707592487335, "learning_rate": 0.002, "loss": 2.54, "step": 370980 }, { "epoch": 0.7390945747800587, "grad_norm": 0.1626994013786316, "learning_rate": 0.002, "loss": 2.5414, "step": 370990 }, { "epoch": 0.7391144970036976, "grad_norm": 0.1651945561170578, "learning_rate": 0.002, "loss": 2.5609, "step": 371000 }, { "epoch": 0.7391344192273365, "grad_norm": 0.1634703427553177, "learning_rate": 0.002, "loss": 2.5409, "step": 371010 }, { "epoch": 0.7391543414509754, "grad_norm": 0.3254729211330414, "learning_rate": 0.002, "loss": 2.5292, "step": 371020 }, { "epoch": 0.7391742636746143, "grad_norm": 0.16266095638275146, "learning_rate": 0.002, "loss": 2.5404, "step": 371030 }, { "epoch": 0.7391941858982533, "grad_norm": 0.1791534125804901, "learning_rate": 0.002, "loss": 2.5597, "step": 371040 }, { "epoch": 0.7392141081218921, "grad_norm": 0.16772018373012543, "learning_rate": 0.002, "loss": 2.5478, "step": 371050 }, { "epoch": 0.739234030345531, "grad_norm": 0.17758874595165253, "learning_rate": 0.002, "loss": 2.5402, "step": 371060 }, { "epoch": 0.7392539525691699, "grad_norm": 0.15492449700832367, "learning_rate": 0.002, "loss": 2.5597, "step": 371070 }, { "epoch": 0.7392738747928089, "grad_norm": 0.18073081970214844, "learning_rate": 0.002, "loss": 2.5565, "step": 371080 }, { "epoch": 0.7392937970164478, "grad_norm": 0.14926835894584656, "learning_rate": 0.002, "loss": 2.5567, "step": 371090 }, { "epoch": 0.7393137192400867, "grad_norm": 0.17029747366905212, "learning_rate": 0.002, "loss": 2.5447, "step": 371100 }, { "epoch": 0.7393336414637256, "grad_norm": 0.19370971620082855, "learning_rate": 0.002, "loss": 2.5355, "step": 371110 }, { "epoch": 0.7393535636873645, "grad_norm": 0.17404115200042725, "learning_rate": 0.002, "loss": 2.5561, "step": 371120 }, { "epoch": 0.7393734859110035, "grad_norm": 0.16180658340454102, "learning_rate": 0.002, "loss": 2.5417, "step": 371130 }, { "epoch": 0.7393934081346424, "grad_norm": 0.15000203251838684, "learning_rate": 0.002, "loss": 2.5595, "step": 371140 }, { "epoch": 0.7394133303582813, "grad_norm": 0.16257546842098236, "learning_rate": 0.002, "loss": 2.5498, "step": 371150 }, { "epoch": 0.7394332525819202, "grad_norm": 0.1728001981973648, "learning_rate": 0.002, "loss": 2.5602, "step": 371160 }, { "epoch": 0.739453174805559, "grad_norm": 0.15745076537132263, "learning_rate": 0.002, "loss": 2.5556, "step": 371170 }, { "epoch": 0.7394730970291981, "grad_norm": 0.17178159952163696, "learning_rate": 0.002, "loss": 2.5519, "step": 371180 }, { "epoch": 0.739493019252837, "grad_norm": 0.14986878633499146, "learning_rate": 0.002, "loss": 2.5455, "step": 371190 }, { "epoch": 0.7395129414764758, "grad_norm": 0.17194347083568573, "learning_rate": 0.002, "loss": 2.559, "step": 371200 }, { "epoch": 0.7395328637001147, "grad_norm": 0.17030078172683716, "learning_rate": 0.002, "loss": 2.5643, "step": 371210 }, { "epoch": 0.7395527859237536, "grad_norm": 0.1517452746629715, "learning_rate": 0.002, "loss": 2.5628, "step": 371220 }, { "epoch": 0.7395727081473926, "grad_norm": 0.20867452025413513, "learning_rate": 0.002, "loss": 2.5487, "step": 371230 }, { "epoch": 0.7395926303710315, "grad_norm": 0.14650070667266846, "learning_rate": 0.002, "loss": 2.5472, "step": 371240 }, { "epoch": 0.7396125525946704, "grad_norm": 0.1852307915687561, "learning_rate": 0.002, "loss": 2.5583, "step": 371250 }, { "epoch": 0.7396324748183093, "grad_norm": 0.1490703523159027, "learning_rate": 0.002, "loss": 2.5686, "step": 371260 }, { "epoch": 0.7396523970419482, "grad_norm": 0.24695545434951782, "learning_rate": 0.002, "loss": 2.5668, "step": 371270 }, { "epoch": 0.7396723192655872, "grad_norm": 0.16561518609523773, "learning_rate": 0.002, "loss": 2.5325, "step": 371280 }, { "epoch": 0.7396922414892261, "grad_norm": 0.15065321326255798, "learning_rate": 0.002, "loss": 2.5514, "step": 371290 }, { "epoch": 0.739712163712865, "grad_norm": 0.1447412222623825, "learning_rate": 0.002, "loss": 2.5494, "step": 371300 }, { "epoch": 0.7397320859365039, "grad_norm": 0.17128825187683105, "learning_rate": 0.002, "loss": 2.5381, "step": 371310 }, { "epoch": 0.7397520081601427, "grad_norm": 0.17944586277008057, "learning_rate": 0.002, "loss": 2.5578, "step": 371320 }, { "epoch": 0.7397719303837818, "grad_norm": 0.1477765291929245, "learning_rate": 0.002, "loss": 2.5423, "step": 371330 }, { "epoch": 0.7397918526074206, "grad_norm": 0.16839103400707245, "learning_rate": 0.002, "loss": 2.5517, "step": 371340 }, { "epoch": 0.7398117748310595, "grad_norm": 0.1746349334716797, "learning_rate": 0.002, "loss": 2.5601, "step": 371350 }, { "epoch": 0.7398316970546984, "grad_norm": 0.1706879585981369, "learning_rate": 0.002, "loss": 2.5418, "step": 371360 }, { "epoch": 0.7398516192783374, "grad_norm": 0.17923521995544434, "learning_rate": 0.002, "loss": 2.5532, "step": 371370 }, { "epoch": 0.7398715415019763, "grad_norm": 0.15546953678131104, "learning_rate": 0.002, "loss": 2.5559, "step": 371380 }, { "epoch": 0.7398914637256152, "grad_norm": 0.1672903448343277, "learning_rate": 0.002, "loss": 2.5493, "step": 371390 }, { "epoch": 0.7399113859492541, "grad_norm": 0.20695823431015015, "learning_rate": 0.002, "loss": 2.5596, "step": 371400 }, { "epoch": 0.739931308172893, "grad_norm": 0.13808377087116241, "learning_rate": 0.002, "loss": 2.5579, "step": 371410 }, { "epoch": 0.739951230396532, "grad_norm": 0.18058238923549652, "learning_rate": 0.002, "loss": 2.5562, "step": 371420 }, { "epoch": 0.7399711526201709, "grad_norm": 0.1681603193283081, "learning_rate": 0.002, "loss": 2.5433, "step": 371430 }, { "epoch": 0.7399910748438098, "grad_norm": 0.13729631900787354, "learning_rate": 0.002, "loss": 2.5624, "step": 371440 }, { "epoch": 0.7400109970674487, "grad_norm": 0.15520121157169342, "learning_rate": 0.002, "loss": 2.5524, "step": 371450 }, { "epoch": 0.7400309192910876, "grad_norm": 0.20499815046787262, "learning_rate": 0.002, "loss": 2.5471, "step": 371460 }, { "epoch": 0.7400508415147266, "grad_norm": 0.17797476053237915, "learning_rate": 0.002, "loss": 2.5569, "step": 371470 }, { "epoch": 0.7400707637383654, "grad_norm": 0.17332243919372559, "learning_rate": 0.002, "loss": 2.5432, "step": 371480 }, { "epoch": 0.7400906859620043, "grad_norm": 0.17232298851013184, "learning_rate": 0.002, "loss": 2.5358, "step": 371490 }, { "epoch": 0.7401106081856432, "grad_norm": 0.1639462411403656, "learning_rate": 0.002, "loss": 2.5581, "step": 371500 }, { "epoch": 0.7401305304092821, "grad_norm": 0.15545614063739777, "learning_rate": 0.002, "loss": 2.557, "step": 371510 }, { "epoch": 0.7401504526329211, "grad_norm": 0.1565868854522705, "learning_rate": 0.002, "loss": 2.5517, "step": 371520 }, { "epoch": 0.74017037485656, "grad_norm": 0.17103193700313568, "learning_rate": 0.002, "loss": 2.5685, "step": 371530 }, { "epoch": 0.7401902970801989, "grad_norm": 0.14182507991790771, "learning_rate": 0.002, "loss": 2.5626, "step": 371540 }, { "epoch": 0.7402102193038378, "grad_norm": 0.1714300513267517, "learning_rate": 0.002, "loss": 2.5626, "step": 371550 }, { "epoch": 0.7402301415274767, "grad_norm": 0.17413939535617828, "learning_rate": 0.002, "loss": 2.5778, "step": 371560 }, { "epoch": 0.7402500637511157, "grad_norm": 0.16599516570568085, "learning_rate": 0.002, "loss": 2.5646, "step": 371570 }, { "epoch": 0.7402699859747546, "grad_norm": 0.15310488641262054, "learning_rate": 0.002, "loss": 2.5694, "step": 371580 }, { "epoch": 0.7402899081983935, "grad_norm": 0.15441927313804626, "learning_rate": 0.002, "loss": 2.5498, "step": 371590 }, { "epoch": 0.7403098304220324, "grad_norm": 0.18110495805740356, "learning_rate": 0.002, "loss": 2.5667, "step": 371600 }, { "epoch": 0.7403297526456712, "grad_norm": 0.15482468903064728, "learning_rate": 0.002, "loss": 2.5412, "step": 371610 }, { "epoch": 0.7403496748693102, "grad_norm": 0.1830456405878067, "learning_rate": 0.002, "loss": 2.5619, "step": 371620 }, { "epoch": 0.7403695970929491, "grad_norm": 0.15198656916618347, "learning_rate": 0.002, "loss": 2.5717, "step": 371630 }, { "epoch": 0.740389519316588, "grad_norm": 0.150051087141037, "learning_rate": 0.002, "loss": 2.5531, "step": 371640 }, { "epoch": 0.7404094415402269, "grad_norm": 0.21817563474178314, "learning_rate": 0.002, "loss": 2.5467, "step": 371650 }, { "epoch": 0.7404293637638659, "grad_norm": 0.1453782618045807, "learning_rate": 0.002, "loss": 2.5625, "step": 371660 }, { "epoch": 0.7404492859875048, "grad_norm": 0.1564658284187317, "learning_rate": 0.002, "loss": 2.5691, "step": 371670 }, { "epoch": 0.7404692082111437, "grad_norm": 0.18612290918827057, "learning_rate": 0.002, "loss": 2.5527, "step": 371680 }, { "epoch": 0.7404891304347826, "grad_norm": 0.19011124968528748, "learning_rate": 0.002, "loss": 2.5611, "step": 371690 }, { "epoch": 0.7405090526584215, "grad_norm": 0.16089452803134918, "learning_rate": 0.002, "loss": 2.5614, "step": 371700 }, { "epoch": 0.7405289748820605, "grad_norm": 0.14172828197479248, "learning_rate": 0.002, "loss": 2.5465, "step": 371710 }, { "epoch": 0.7405488971056994, "grad_norm": 0.16784465312957764, "learning_rate": 0.002, "loss": 2.563, "step": 371720 }, { "epoch": 0.7405688193293383, "grad_norm": 0.17492148280143738, "learning_rate": 0.002, "loss": 2.5554, "step": 371730 }, { "epoch": 0.7405887415529772, "grad_norm": 0.18557555973529816, "learning_rate": 0.002, "loss": 2.5537, "step": 371740 }, { "epoch": 0.740608663776616, "grad_norm": 0.17593026161193848, "learning_rate": 0.002, "loss": 2.5502, "step": 371750 }, { "epoch": 0.740628586000255, "grad_norm": 0.17121002078056335, "learning_rate": 0.002, "loss": 2.5484, "step": 371760 }, { "epoch": 0.7406485082238939, "grad_norm": 0.1991499662399292, "learning_rate": 0.002, "loss": 2.5521, "step": 371770 }, { "epoch": 0.7406684304475328, "grad_norm": 0.16276146471500397, "learning_rate": 0.002, "loss": 2.5539, "step": 371780 }, { "epoch": 0.7406883526711717, "grad_norm": 0.152939110994339, "learning_rate": 0.002, "loss": 2.5573, "step": 371790 }, { "epoch": 0.7407082748948106, "grad_norm": 0.21282050013542175, "learning_rate": 0.002, "loss": 2.5592, "step": 371800 }, { "epoch": 0.7407281971184496, "grad_norm": 0.18179315328598022, "learning_rate": 0.002, "loss": 2.5623, "step": 371810 }, { "epoch": 0.7407481193420885, "grad_norm": 0.1808338165283203, "learning_rate": 0.002, "loss": 2.5571, "step": 371820 }, { "epoch": 0.7407680415657274, "grad_norm": 0.16311059892177582, "learning_rate": 0.002, "loss": 2.5608, "step": 371830 }, { "epoch": 0.7407879637893663, "grad_norm": 0.1940000206232071, "learning_rate": 0.002, "loss": 2.5578, "step": 371840 }, { "epoch": 0.7408078860130052, "grad_norm": 0.1759316772222519, "learning_rate": 0.002, "loss": 2.5528, "step": 371850 }, { "epoch": 0.7408278082366442, "grad_norm": 0.1946471929550171, "learning_rate": 0.002, "loss": 2.5516, "step": 371860 }, { "epoch": 0.7408477304602831, "grad_norm": 0.1528838872909546, "learning_rate": 0.002, "loss": 2.5595, "step": 371870 }, { "epoch": 0.740867652683922, "grad_norm": 0.14117136597633362, "learning_rate": 0.002, "loss": 2.5587, "step": 371880 }, { "epoch": 0.7408875749075609, "grad_norm": 0.15786774456501007, "learning_rate": 0.002, "loss": 2.5461, "step": 371890 }, { "epoch": 0.7409074971311997, "grad_norm": 0.216132253408432, "learning_rate": 0.002, "loss": 2.5542, "step": 371900 }, { "epoch": 0.7409274193548387, "grad_norm": 0.15671268105506897, "learning_rate": 0.002, "loss": 2.548, "step": 371910 }, { "epoch": 0.7409473415784776, "grad_norm": 0.1746198982000351, "learning_rate": 0.002, "loss": 2.5604, "step": 371920 }, { "epoch": 0.7409672638021165, "grad_norm": 0.1816578507423401, "learning_rate": 0.002, "loss": 2.5677, "step": 371930 }, { "epoch": 0.7409871860257554, "grad_norm": 0.16158269345760345, "learning_rate": 0.002, "loss": 2.5502, "step": 371940 }, { "epoch": 0.7410071082493944, "grad_norm": 0.13943570852279663, "learning_rate": 0.002, "loss": 2.5402, "step": 371950 }, { "epoch": 0.7410270304730333, "grad_norm": 0.19324733316898346, "learning_rate": 0.002, "loss": 2.5677, "step": 371960 }, { "epoch": 0.7410469526966722, "grad_norm": 0.17923876643180847, "learning_rate": 0.002, "loss": 2.5557, "step": 371970 }, { "epoch": 0.7410668749203111, "grad_norm": 0.17241932451725006, "learning_rate": 0.002, "loss": 2.553, "step": 371980 }, { "epoch": 0.74108679714395, "grad_norm": 0.18227887153625488, "learning_rate": 0.002, "loss": 2.5302, "step": 371990 }, { "epoch": 0.741106719367589, "grad_norm": 0.16289938986301422, "learning_rate": 0.002, "loss": 2.5418, "step": 372000 }, { "epoch": 0.7411266415912279, "grad_norm": 0.1761476993560791, "learning_rate": 0.002, "loss": 2.5591, "step": 372010 }, { "epoch": 0.7411465638148668, "grad_norm": 0.200505331158638, "learning_rate": 0.002, "loss": 2.5564, "step": 372020 }, { "epoch": 0.7411664860385057, "grad_norm": 0.168381929397583, "learning_rate": 0.002, "loss": 2.5642, "step": 372030 }, { "epoch": 0.7411864082621445, "grad_norm": 0.172062948346138, "learning_rate": 0.002, "loss": 2.5459, "step": 372040 }, { "epoch": 0.7412063304857835, "grad_norm": 0.18883094191551208, "learning_rate": 0.002, "loss": 2.5562, "step": 372050 }, { "epoch": 0.7412262527094224, "grad_norm": 0.1579388529062271, "learning_rate": 0.002, "loss": 2.5676, "step": 372060 }, { "epoch": 0.7412461749330613, "grad_norm": 0.16127507388591766, "learning_rate": 0.002, "loss": 2.5451, "step": 372070 }, { "epoch": 0.7412660971567002, "grad_norm": 0.15454882383346558, "learning_rate": 0.002, "loss": 2.5486, "step": 372080 }, { "epoch": 0.7412860193803391, "grad_norm": 0.16335169970989227, "learning_rate": 0.002, "loss": 2.5549, "step": 372090 }, { "epoch": 0.7413059416039781, "grad_norm": 0.16737626492977142, "learning_rate": 0.002, "loss": 2.5658, "step": 372100 }, { "epoch": 0.741325863827617, "grad_norm": 0.17136813700199127, "learning_rate": 0.002, "loss": 2.552, "step": 372110 }, { "epoch": 0.7413457860512559, "grad_norm": 0.1513003557920456, "learning_rate": 0.002, "loss": 2.5533, "step": 372120 }, { "epoch": 0.7413657082748948, "grad_norm": 0.14156828820705414, "learning_rate": 0.002, "loss": 2.5596, "step": 372130 }, { "epoch": 0.7413856304985337, "grad_norm": 0.1696673333644867, "learning_rate": 0.002, "loss": 2.5391, "step": 372140 }, { "epoch": 0.7414055527221727, "grad_norm": 0.15624447166919708, "learning_rate": 0.002, "loss": 2.5357, "step": 372150 }, { "epoch": 0.7414254749458116, "grad_norm": 0.1678861677646637, "learning_rate": 0.002, "loss": 2.5466, "step": 372160 }, { "epoch": 0.7414453971694505, "grad_norm": 0.15330855548381805, "learning_rate": 0.002, "loss": 2.5542, "step": 372170 }, { "epoch": 0.7414653193930894, "grad_norm": 0.2013867050409317, "learning_rate": 0.002, "loss": 2.553, "step": 372180 }, { "epoch": 0.7414852416167282, "grad_norm": 0.14229844510555267, "learning_rate": 0.002, "loss": 2.5643, "step": 372190 }, { "epoch": 0.7415051638403672, "grad_norm": 0.19145749509334564, "learning_rate": 0.002, "loss": 2.5732, "step": 372200 }, { "epoch": 0.7415250860640061, "grad_norm": 0.14647173881530762, "learning_rate": 0.002, "loss": 2.5445, "step": 372210 }, { "epoch": 0.741545008287645, "grad_norm": 0.16099292039871216, "learning_rate": 0.002, "loss": 2.5471, "step": 372220 }, { "epoch": 0.7415649305112839, "grad_norm": 0.16242413222789764, "learning_rate": 0.002, "loss": 2.5555, "step": 372230 }, { "epoch": 0.7415848527349228, "grad_norm": 0.19784018397331238, "learning_rate": 0.002, "loss": 2.5426, "step": 372240 }, { "epoch": 0.7416047749585618, "grad_norm": 0.16744552552700043, "learning_rate": 0.002, "loss": 2.5513, "step": 372250 }, { "epoch": 0.7416246971822007, "grad_norm": 0.13292482495307922, "learning_rate": 0.002, "loss": 2.564, "step": 372260 }, { "epoch": 0.7416446194058396, "grad_norm": 0.17369158565998077, "learning_rate": 0.002, "loss": 2.5561, "step": 372270 }, { "epoch": 0.7416645416294785, "grad_norm": 0.1385006308555603, "learning_rate": 0.002, "loss": 2.5454, "step": 372280 }, { "epoch": 0.7416844638531175, "grad_norm": 0.1580030620098114, "learning_rate": 0.002, "loss": 2.5565, "step": 372290 }, { "epoch": 0.7417043860767564, "grad_norm": 0.1561848670244217, "learning_rate": 0.002, "loss": 2.5546, "step": 372300 }, { "epoch": 0.7417243083003953, "grad_norm": 0.15619730949401855, "learning_rate": 0.002, "loss": 2.5517, "step": 372310 }, { "epoch": 0.7417442305240342, "grad_norm": 0.15099337697029114, "learning_rate": 0.002, "loss": 2.5545, "step": 372320 }, { "epoch": 0.741764152747673, "grad_norm": 0.157582625746727, "learning_rate": 0.002, "loss": 2.5492, "step": 372330 }, { "epoch": 0.741784074971312, "grad_norm": 0.16935914754867554, "learning_rate": 0.002, "loss": 2.5647, "step": 372340 }, { "epoch": 0.7418039971949509, "grad_norm": 0.1503516286611557, "learning_rate": 0.002, "loss": 2.5553, "step": 372350 }, { "epoch": 0.7418239194185898, "grad_norm": 0.16008178889751434, "learning_rate": 0.002, "loss": 2.5469, "step": 372360 }, { "epoch": 0.7418438416422287, "grad_norm": 0.20136147737503052, "learning_rate": 0.002, "loss": 2.5534, "step": 372370 }, { "epoch": 0.7418637638658676, "grad_norm": 0.16121971607208252, "learning_rate": 0.002, "loss": 2.5554, "step": 372380 }, { "epoch": 0.7418836860895066, "grad_norm": 0.2298726588487625, "learning_rate": 0.002, "loss": 2.5626, "step": 372390 }, { "epoch": 0.7419036083131455, "grad_norm": 0.1694350689649582, "learning_rate": 0.002, "loss": 2.5612, "step": 372400 }, { "epoch": 0.7419235305367844, "grad_norm": 0.15434251725673676, "learning_rate": 0.002, "loss": 2.5527, "step": 372410 }, { "epoch": 0.7419434527604233, "grad_norm": 0.20251627266407013, "learning_rate": 0.002, "loss": 2.555, "step": 372420 }, { "epoch": 0.7419633749840622, "grad_norm": 0.15847893059253693, "learning_rate": 0.002, "loss": 2.5552, "step": 372430 }, { "epoch": 0.7419832972077012, "grad_norm": 0.1575513780117035, "learning_rate": 0.002, "loss": 2.5515, "step": 372440 }, { "epoch": 0.7420032194313401, "grad_norm": 0.16110213100910187, "learning_rate": 0.002, "loss": 2.5535, "step": 372450 }, { "epoch": 0.742023141654979, "grad_norm": 0.15237009525299072, "learning_rate": 0.002, "loss": 2.5436, "step": 372460 }, { "epoch": 0.7420430638786178, "grad_norm": 0.16100233793258667, "learning_rate": 0.002, "loss": 2.5557, "step": 372470 }, { "epoch": 0.7420629861022567, "grad_norm": 0.16536112129688263, "learning_rate": 0.002, "loss": 2.5339, "step": 372480 }, { "epoch": 0.7420829083258957, "grad_norm": 0.17869482934474945, "learning_rate": 0.002, "loss": 2.5534, "step": 372490 }, { "epoch": 0.7421028305495346, "grad_norm": 0.1430872678756714, "learning_rate": 0.002, "loss": 2.5597, "step": 372500 }, { "epoch": 0.7421227527731735, "grad_norm": 0.18899644911289215, "learning_rate": 0.002, "loss": 2.5584, "step": 372510 }, { "epoch": 0.7421426749968124, "grad_norm": 0.20844663679599762, "learning_rate": 0.002, "loss": 2.5537, "step": 372520 }, { "epoch": 0.7421625972204513, "grad_norm": 0.17557014524936676, "learning_rate": 0.002, "loss": 2.5565, "step": 372530 }, { "epoch": 0.7421825194440903, "grad_norm": 0.14994311332702637, "learning_rate": 0.002, "loss": 2.5457, "step": 372540 }, { "epoch": 0.7422024416677292, "grad_norm": 0.16857944428920746, "learning_rate": 0.002, "loss": 2.5484, "step": 372550 }, { "epoch": 0.7422223638913681, "grad_norm": 0.20020659267902374, "learning_rate": 0.002, "loss": 2.5508, "step": 372560 }, { "epoch": 0.742242286115007, "grad_norm": 0.16765734553337097, "learning_rate": 0.002, "loss": 2.5566, "step": 372570 }, { "epoch": 0.742262208338646, "grad_norm": 0.14471715688705444, "learning_rate": 0.002, "loss": 2.5462, "step": 372580 }, { "epoch": 0.7422821305622849, "grad_norm": 0.1552150398492813, "learning_rate": 0.002, "loss": 2.5581, "step": 372590 }, { "epoch": 0.7423020527859238, "grad_norm": 0.1780693531036377, "learning_rate": 0.002, "loss": 2.5513, "step": 372600 }, { "epoch": 0.7423219750095627, "grad_norm": 0.182081937789917, "learning_rate": 0.002, "loss": 2.5465, "step": 372610 }, { "epoch": 0.7423418972332015, "grad_norm": 0.17606255412101746, "learning_rate": 0.002, "loss": 2.5401, "step": 372620 }, { "epoch": 0.7423618194568405, "grad_norm": 0.16123902797698975, "learning_rate": 0.002, "loss": 2.5716, "step": 372630 }, { "epoch": 0.7423817416804794, "grad_norm": 0.17508219182491302, "learning_rate": 0.002, "loss": 2.5544, "step": 372640 }, { "epoch": 0.7424016639041183, "grad_norm": 0.14610596001148224, "learning_rate": 0.002, "loss": 2.5611, "step": 372650 }, { "epoch": 0.7424215861277572, "grad_norm": 0.1429731696844101, "learning_rate": 0.002, "loss": 2.5451, "step": 372660 }, { "epoch": 0.7424415083513961, "grad_norm": 0.1693098247051239, "learning_rate": 0.002, "loss": 2.559, "step": 372670 }, { "epoch": 0.7424614305750351, "grad_norm": 0.20053227245807648, "learning_rate": 0.002, "loss": 2.5683, "step": 372680 }, { "epoch": 0.742481352798674, "grad_norm": 0.15577851235866547, "learning_rate": 0.002, "loss": 2.5639, "step": 372690 }, { "epoch": 0.7425012750223129, "grad_norm": 0.18714909255504608, "learning_rate": 0.002, "loss": 2.5581, "step": 372700 }, { "epoch": 0.7425211972459518, "grad_norm": 0.16246552765369415, "learning_rate": 0.002, "loss": 2.5566, "step": 372710 }, { "epoch": 0.7425411194695907, "grad_norm": 0.15960143506526947, "learning_rate": 0.002, "loss": 2.5652, "step": 372720 }, { "epoch": 0.7425610416932297, "grad_norm": 0.19090934097766876, "learning_rate": 0.002, "loss": 2.557, "step": 372730 }, { "epoch": 0.7425809639168686, "grad_norm": 0.16300269961357117, "learning_rate": 0.002, "loss": 2.551, "step": 372740 }, { "epoch": 0.7426008861405075, "grad_norm": 0.16242599487304688, "learning_rate": 0.002, "loss": 2.5649, "step": 372750 }, { "epoch": 0.7426208083641463, "grad_norm": 0.156644806265831, "learning_rate": 0.002, "loss": 2.5666, "step": 372760 }, { "epoch": 0.7426407305877852, "grad_norm": 0.1790734976530075, "learning_rate": 0.002, "loss": 2.5535, "step": 372770 }, { "epoch": 0.7426606528114242, "grad_norm": 0.16345641016960144, "learning_rate": 0.002, "loss": 2.5587, "step": 372780 }, { "epoch": 0.7426805750350631, "grad_norm": 0.16575121879577637, "learning_rate": 0.002, "loss": 2.5395, "step": 372790 }, { "epoch": 0.742700497258702, "grad_norm": 0.17596818506717682, "learning_rate": 0.002, "loss": 2.5612, "step": 372800 }, { "epoch": 0.7427204194823409, "grad_norm": 0.14116545021533966, "learning_rate": 0.002, "loss": 2.5567, "step": 372810 }, { "epoch": 0.7427403417059798, "grad_norm": 0.16080565750598907, "learning_rate": 0.002, "loss": 2.5473, "step": 372820 }, { "epoch": 0.7427602639296188, "grad_norm": 0.20509879291057587, "learning_rate": 0.002, "loss": 2.5414, "step": 372830 }, { "epoch": 0.7427801861532577, "grad_norm": 0.1296335607767105, "learning_rate": 0.002, "loss": 2.5429, "step": 372840 }, { "epoch": 0.7428001083768966, "grad_norm": 0.17503495514392853, "learning_rate": 0.002, "loss": 2.5504, "step": 372850 }, { "epoch": 0.7428200306005355, "grad_norm": 0.1522599458694458, "learning_rate": 0.002, "loss": 2.5649, "step": 372860 }, { "epoch": 0.7428399528241745, "grad_norm": 0.1668054461479187, "learning_rate": 0.002, "loss": 2.5695, "step": 372870 }, { "epoch": 0.7428598750478134, "grad_norm": 0.2133040428161621, "learning_rate": 0.002, "loss": 2.5506, "step": 372880 }, { "epoch": 0.7428797972714523, "grad_norm": 0.16319772601127625, "learning_rate": 0.002, "loss": 2.572, "step": 372890 }, { "epoch": 0.7428997194950911, "grad_norm": 0.1404043287038803, "learning_rate": 0.002, "loss": 2.5487, "step": 372900 }, { "epoch": 0.74291964171873, "grad_norm": 0.20864146947860718, "learning_rate": 0.002, "loss": 2.5477, "step": 372910 }, { "epoch": 0.742939563942369, "grad_norm": 0.15836398303508759, "learning_rate": 0.002, "loss": 2.5465, "step": 372920 }, { "epoch": 0.7429594861660079, "grad_norm": 0.18341778218746185, "learning_rate": 0.002, "loss": 2.5434, "step": 372930 }, { "epoch": 0.7429794083896468, "grad_norm": 0.16412147879600525, "learning_rate": 0.002, "loss": 2.5541, "step": 372940 }, { "epoch": 0.7429993306132857, "grad_norm": 0.187821164727211, "learning_rate": 0.002, "loss": 2.5484, "step": 372950 }, { "epoch": 0.7430192528369246, "grad_norm": 0.21252918243408203, "learning_rate": 0.002, "loss": 2.5556, "step": 372960 }, { "epoch": 0.7430391750605636, "grad_norm": 0.1353023797273636, "learning_rate": 0.002, "loss": 2.5523, "step": 372970 }, { "epoch": 0.7430590972842025, "grad_norm": 0.22175928950309753, "learning_rate": 0.002, "loss": 2.5611, "step": 372980 }, { "epoch": 0.7430790195078414, "grad_norm": 0.1608588844537735, "learning_rate": 0.002, "loss": 2.5475, "step": 372990 }, { "epoch": 0.7430989417314803, "grad_norm": 0.16046267747879028, "learning_rate": 0.002, "loss": 2.5567, "step": 373000 }, { "epoch": 0.7431188639551192, "grad_norm": 0.18598484992980957, "learning_rate": 0.002, "loss": 2.5547, "step": 373010 }, { "epoch": 0.7431387861787582, "grad_norm": 0.1728239357471466, "learning_rate": 0.002, "loss": 2.5504, "step": 373020 }, { "epoch": 0.7431587084023971, "grad_norm": 0.17169371247291565, "learning_rate": 0.002, "loss": 2.5568, "step": 373030 }, { "epoch": 0.743178630626036, "grad_norm": 0.17028163373470306, "learning_rate": 0.002, "loss": 2.5676, "step": 373040 }, { "epoch": 0.7431985528496748, "grad_norm": 0.18512853980064392, "learning_rate": 0.002, "loss": 2.5419, "step": 373050 }, { "epoch": 0.7432184750733137, "grad_norm": 0.16342793405056, "learning_rate": 0.002, "loss": 2.5619, "step": 373060 }, { "epoch": 0.7432383972969527, "grad_norm": 0.15855716168880463, "learning_rate": 0.002, "loss": 2.549, "step": 373070 }, { "epoch": 0.7432583195205916, "grad_norm": 0.15094344317913055, "learning_rate": 0.002, "loss": 2.5499, "step": 373080 }, { "epoch": 0.7432782417442305, "grad_norm": 0.16126379370689392, "learning_rate": 0.002, "loss": 2.5455, "step": 373090 }, { "epoch": 0.7432981639678694, "grad_norm": 0.15887047350406647, "learning_rate": 0.002, "loss": 2.5465, "step": 373100 }, { "epoch": 0.7433180861915083, "grad_norm": 0.15607044100761414, "learning_rate": 0.002, "loss": 2.5399, "step": 373110 }, { "epoch": 0.7433380084151473, "grad_norm": 0.16410337388515472, "learning_rate": 0.002, "loss": 2.5519, "step": 373120 }, { "epoch": 0.7433579306387862, "grad_norm": 0.17998813092708588, "learning_rate": 0.002, "loss": 2.5477, "step": 373130 }, { "epoch": 0.7433778528624251, "grad_norm": 0.16276372969150543, "learning_rate": 0.002, "loss": 2.5448, "step": 373140 }, { "epoch": 0.743397775086064, "grad_norm": 0.17820769548416138, "learning_rate": 0.002, "loss": 2.5414, "step": 373150 }, { "epoch": 0.743417697309703, "grad_norm": 0.17707648873329163, "learning_rate": 0.002, "loss": 2.5587, "step": 373160 }, { "epoch": 0.7434376195333419, "grad_norm": 0.15440364181995392, "learning_rate": 0.002, "loss": 2.5485, "step": 373170 }, { "epoch": 0.7434575417569808, "grad_norm": 0.1541580855846405, "learning_rate": 0.002, "loss": 2.5572, "step": 373180 }, { "epoch": 0.7434774639806196, "grad_norm": 0.20593541860580444, "learning_rate": 0.002, "loss": 2.5533, "step": 373190 }, { "epoch": 0.7434973862042585, "grad_norm": 0.14345696568489075, "learning_rate": 0.002, "loss": 2.5494, "step": 373200 }, { "epoch": 0.7435173084278975, "grad_norm": 0.16532516479492188, "learning_rate": 0.002, "loss": 2.5528, "step": 373210 }, { "epoch": 0.7435372306515364, "grad_norm": 0.15342971682548523, "learning_rate": 0.002, "loss": 2.5608, "step": 373220 }, { "epoch": 0.7435571528751753, "grad_norm": 0.16732540726661682, "learning_rate": 0.002, "loss": 2.5569, "step": 373230 }, { "epoch": 0.7435770750988142, "grad_norm": 0.18601033091545105, "learning_rate": 0.002, "loss": 2.5656, "step": 373240 }, { "epoch": 0.7435969973224531, "grad_norm": 0.17446798086166382, "learning_rate": 0.002, "loss": 2.569, "step": 373250 }, { "epoch": 0.7436169195460921, "grad_norm": 0.15109436213970184, "learning_rate": 0.002, "loss": 2.5552, "step": 373260 }, { "epoch": 0.743636841769731, "grad_norm": 0.1540907919406891, "learning_rate": 0.002, "loss": 2.5634, "step": 373270 }, { "epoch": 0.7436567639933699, "grad_norm": 0.16265560686588287, "learning_rate": 0.002, "loss": 2.5499, "step": 373280 }, { "epoch": 0.7436766862170088, "grad_norm": 0.14558623731136322, "learning_rate": 0.002, "loss": 2.5427, "step": 373290 }, { "epoch": 0.7436966084406477, "grad_norm": 0.2337726354598999, "learning_rate": 0.002, "loss": 2.5592, "step": 373300 }, { "epoch": 0.7437165306642867, "grad_norm": 0.16743139922618866, "learning_rate": 0.002, "loss": 2.5645, "step": 373310 }, { "epoch": 0.7437364528879256, "grad_norm": 0.17179051041603088, "learning_rate": 0.002, "loss": 2.555, "step": 373320 }, { "epoch": 0.7437563751115644, "grad_norm": 0.1790853887796402, "learning_rate": 0.002, "loss": 2.551, "step": 373330 }, { "epoch": 0.7437762973352033, "grad_norm": 0.16567300260066986, "learning_rate": 0.002, "loss": 2.5467, "step": 373340 }, { "epoch": 0.7437962195588422, "grad_norm": 0.14982593059539795, "learning_rate": 0.002, "loss": 2.5597, "step": 373350 }, { "epoch": 0.7438161417824812, "grad_norm": 0.18965400755405426, "learning_rate": 0.002, "loss": 2.5447, "step": 373360 }, { "epoch": 0.7438360640061201, "grad_norm": 0.18652760982513428, "learning_rate": 0.002, "loss": 2.5705, "step": 373370 }, { "epoch": 0.743855986229759, "grad_norm": 0.1521216481924057, "learning_rate": 0.002, "loss": 2.5377, "step": 373380 }, { "epoch": 0.7438759084533979, "grad_norm": 0.16397128999233246, "learning_rate": 0.002, "loss": 2.5708, "step": 373390 }, { "epoch": 0.7438958306770368, "grad_norm": 0.16136665642261505, "learning_rate": 0.002, "loss": 2.5439, "step": 373400 }, { "epoch": 0.7439157529006758, "grad_norm": 0.16477803885936737, "learning_rate": 0.002, "loss": 2.5651, "step": 373410 }, { "epoch": 0.7439356751243147, "grad_norm": 0.1515156775712967, "learning_rate": 0.002, "loss": 2.5546, "step": 373420 }, { "epoch": 0.7439555973479536, "grad_norm": 0.15192429721355438, "learning_rate": 0.002, "loss": 2.5499, "step": 373430 }, { "epoch": 0.7439755195715925, "grad_norm": 0.14966465532779694, "learning_rate": 0.002, "loss": 2.5691, "step": 373440 }, { "epoch": 0.7439954417952315, "grad_norm": 0.15482355654239655, "learning_rate": 0.002, "loss": 2.5409, "step": 373450 }, { "epoch": 0.7440153640188704, "grad_norm": 0.16884373128414154, "learning_rate": 0.002, "loss": 2.5606, "step": 373460 }, { "epoch": 0.7440352862425093, "grad_norm": 0.14647690951824188, "learning_rate": 0.002, "loss": 2.5395, "step": 373470 }, { "epoch": 0.7440552084661481, "grad_norm": 0.15854521095752716, "learning_rate": 0.002, "loss": 2.5528, "step": 373480 }, { "epoch": 0.744075130689787, "grad_norm": 0.1724579930305481, "learning_rate": 0.002, "loss": 2.5515, "step": 373490 }, { "epoch": 0.744095052913426, "grad_norm": 0.14871560037136078, "learning_rate": 0.002, "loss": 2.5519, "step": 373500 }, { "epoch": 0.7441149751370649, "grad_norm": 0.17450453341007233, "learning_rate": 0.002, "loss": 2.5611, "step": 373510 }, { "epoch": 0.7441348973607038, "grad_norm": 0.16859589517116547, "learning_rate": 0.002, "loss": 2.5502, "step": 373520 }, { "epoch": 0.7441548195843427, "grad_norm": 0.15394993126392365, "learning_rate": 0.002, "loss": 2.5578, "step": 373530 }, { "epoch": 0.7441747418079816, "grad_norm": 0.17718911170959473, "learning_rate": 0.002, "loss": 2.5741, "step": 373540 }, { "epoch": 0.7441946640316206, "grad_norm": 0.16724294424057007, "learning_rate": 0.002, "loss": 2.5433, "step": 373550 }, { "epoch": 0.7442145862552595, "grad_norm": 0.318566232919693, "learning_rate": 0.002, "loss": 2.5512, "step": 373560 }, { "epoch": 0.7442345084788984, "grad_norm": 0.15001438558101654, "learning_rate": 0.002, "loss": 2.5564, "step": 373570 }, { "epoch": 0.7442544307025373, "grad_norm": 0.17393948137760162, "learning_rate": 0.002, "loss": 2.5492, "step": 373580 }, { "epoch": 0.7442743529261762, "grad_norm": 0.17926359176635742, "learning_rate": 0.002, "loss": 2.5449, "step": 373590 }, { "epoch": 0.7442942751498152, "grad_norm": 0.19361893832683563, "learning_rate": 0.002, "loss": 2.5643, "step": 373600 }, { "epoch": 0.7443141973734541, "grad_norm": 0.15058816969394684, "learning_rate": 0.002, "loss": 2.5582, "step": 373610 }, { "epoch": 0.744334119597093, "grad_norm": 0.1592465192079544, "learning_rate": 0.002, "loss": 2.5352, "step": 373620 }, { "epoch": 0.7443540418207318, "grad_norm": 0.1676124930381775, "learning_rate": 0.002, "loss": 2.5649, "step": 373630 }, { "epoch": 0.7443739640443707, "grad_norm": 0.16719618439674377, "learning_rate": 0.002, "loss": 2.5687, "step": 373640 }, { "epoch": 0.7443938862680097, "grad_norm": 0.1773216873407364, "learning_rate": 0.002, "loss": 2.5474, "step": 373650 }, { "epoch": 0.7444138084916486, "grad_norm": 0.17114494740962982, "learning_rate": 0.002, "loss": 2.5514, "step": 373660 }, { "epoch": 0.7444337307152875, "grad_norm": 0.15226761996746063, "learning_rate": 0.002, "loss": 2.5595, "step": 373670 }, { "epoch": 0.7444536529389264, "grad_norm": 0.17537149786949158, "learning_rate": 0.002, "loss": 2.554, "step": 373680 }, { "epoch": 0.7444735751625653, "grad_norm": 0.17707499861717224, "learning_rate": 0.002, "loss": 2.556, "step": 373690 }, { "epoch": 0.7444934973862043, "grad_norm": 0.1706763654947281, "learning_rate": 0.002, "loss": 2.5651, "step": 373700 }, { "epoch": 0.7445134196098432, "grad_norm": 0.192195326089859, "learning_rate": 0.002, "loss": 2.5551, "step": 373710 }, { "epoch": 0.7445333418334821, "grad_norm": 0.1574721485376358, "learning_rate": 0.002, "loss": 2.5463, "step": 373720 }, { "epoch": 0.744553264057121, "grad_norm": 0.21781891584396362, "learning_rate": 0.002, "loss": 2.553, "step": 373730 }, { "epoch": 0.74457318628076, "grad_norm": 0.1850765347480774, "learning_rate": 0.002, "loss": 2.5604, "step": 373740 }, { "epoch": 0.7445931085043989, "grad_norm": 0.1563909351825714, "learning_rate": 0.002, "loss": 2.5529, "step": 373750 }, { "epoch": 0.7446130307280378, "grad_norm": 0.16009235382080078, "learning_rate": 0.002, "loss": 2.5658, "step": 373760 }, { "epoch": 0.7446329529516766, "grad_norm": 0.19502247869968414, "learning_rate": 0.002, "loss": 2.5349, "step": 373770 }, { "epoch": 0.7446528751753155, "grad_norm": 0.16259431838989258, "learning_rate": 0.002, "loss": 2.5463, "step": 373780 }, { "epoch": 0.7446727973989545, "grad_norm": 0.15072989463806152, "learning_rate": 0.002, "loss": 2.5472, "step": 373790 }, { "epoch": 0.7446927196225934, "grad_norm": 0.19818246364593506, "learning_rate": 0.002, "loss": 2.5588, "step": 373800 }, { "epoch": 0.7447126418462323, "grad_norm": 0.17507703602313995, "learning_rate": 0.002, "loss": 2.5505, "step": 373810 }, { "epoch": 0.7447325640698712, "grad_norm": 0.1474446803331375, "learning_rate": 0.002, "loss": 2.554, "step": 373820 }, { "epoch": 0.7447524862935101, "grad_norm": 0.16621555387973785, "learning_rate": 0.002, "loss": 2.5657, "step": 373830 }, { "epoch": 0.7447724085171491, "grad_norm": 0.18373875319957733, "learning_rate": 0.002, "loss": 2.55, "step": 373840 }, { "epoch": 0.744792330740788, "grad_norm": 0.1585358828306198, "learning_rate": 0.002, "loss": 2.5431, "step": 373850 }, { "epoch": 0.7448122529644269, "grad_norm": 0.16076798737049103, "learning_rate": 0.002, "loss": 2.5566, "step": 373860 }, { "epoch": 0.7448321751880658, "grad_norm": 0.17574137449264526, "learning_rate": 0.002, "loss": 2.559, "step": 373870 }, { "epoch": 0.7448520974117047, "grad_norm": 0.1920383870601654, "learning_rate": 0.002, "loss": 2.5523, "step": 373880 }, { "epoch": 0.7448720196353437, "grad_norm": 0.17218197882175446, "learning_rate": 0.002, "loss": 2.5566, "step": 373890 }, { "epoch": 0.7448919418589826, "grad_norm": 0.15581752359867096, "learning_rate": 0.002, "loss": 2.5502, "step": 373900 }, { "epoch": 0.7449118640826214, "grad_norm": 0.18899115920066833, "learning_rate": 0.002, "loss": 2.5426, "step": 373910 }, { "epoch": 0.7449317863062603, "grad_norm": 0.19004394114017487, "learning_rate": 0.002, "loss": 2.5495, "step": 373920 }, { "epoch": 0.7449517085298992, "grad_norm": 0.18140852451324463, "learning_rate": 0.002, "loss": 2.5485, "step": 373930 }, { "epoch": 0.7449716307535382, "grad_norm": 0.1696334183216095, "learning_rate": 0.002, "loss": 2.5611, "step": 373940 }, { "epoch": 0.7449915529771771, "grad_norm": 0.21111276745796204, "learning_rate": 0.002, "loss": 2.5795, "step": 373950 }, { "epoch": 0.745011475200816, "grad_norm": 0.1687506139278412, "learning_rate": 0.002, "loss": 2.5465, "step": 373960 }, { "epoch": 0.7450313974244549, "grad_norm": 0.18652133643627167, "learning_rate": 0.002, "loss": 2.5618, "step": 373970 }, { "epoch": 0.7450513196480938, "grad_norm": 0.15486742556095123, "learning_rate": 0.002, "loss": 2.5626, "step": 373980 }, { "epoch": 0.7450712418717328, "grad_norm": 0.1577288955450058, "learning_rate": 0.002, "loss": 2.5495, "step": 373990 }, { "epoch": 0.7450911640953717, "grad_norm": 0.1456364393234253, "learning_rate": 0.002, "loss": 2.5542, "step": 374000 }, { "epoch": 0.7451110863190106, "grad_norm": 0.133406862616539, "learning_rate": 0.002, "loss": 2.5442, "step": 374010 }, { "epoch": 0.7451310085426495, "grad_norm": 0.21142315864562988, "learning_rate": 0.002, "loss": 2.5508, "step": 374020 }, { "epoch": 0.7451509307662884, "grad_norm": 0.16632887721061707, "learning_rate": 0.002, "loss": 2.5589, "step": 374030 }, { "epoch": 0.7451708529899274, "grad_norm": 0.18250258266925812, "learning_rate": 0.002, "loss": 2.5544, "step": 374040 }, { "epoch": 0.7451907752135662, "grad_norm": 0.16240382194519043, "learning_rate": 0.002, "loss": 2.5599, "step": 374050 }, { "epoch": 0.7452106974372051, "grad_norm": 0.15329869091510773, "learning_rate": 0.002, "loss": 2.5633, "step": 374060 }, { "epoch": 0.745230619660844, "grad_norm": 0.1722443401813507, "learning_rate": 0.002, "loss": 2.5567, "step": 374070 }, { "epoch": 0.745250541884483, "grad_norm": 0.17568209767341614, "learning_rate": 0.002, "loss": 2.5585, "step": 374080 }, { "epoch": 0.7452704641081219, "grad_norm": 0.19635915756225586, "learning_rate": 0.002, "loss": 2.5636, "step": 374090 }, { "epoch": 0.7452903863317608, "grad_norm": 0.15369197726249695, "learning_rate": 0.002, "loss": 2.5613, "step": 374100 }, { "epoch": 0.7453103085553997, "grad_norm": 0.16991537809371948, "learning_rate": 0.002, "loss": 2.5662, "step": 374110 }, { "epoch": 0.7453302307790386, "grad_norm": 0.15573853254318237, "learning_rate": 0.002, "loss": 2.5544, "step": 374120 }, { "epoch": 0.7453501530026776, "grad_norm": 0.2023661583662033, "learning_rate": 0.002, "loss": 2.5541, "step": 374130 }, { "epoch": 0.7453700752263165, "grad_norm": 0.17995718121528625, "learning_rate": 0.002, "loss": 2.5532, "step": 374140 }, { "epoch": 0.7453899974499554, "grad_norm": 0.21667936444282532, "learning_rate": 0.002, "loss": 2.5535, "step": 374150 }, { "epoch": 0.7454099196735943, "grad_norm": 0.1716565191745758, "learning_rate": 0.002, "loss": 2.5572, "step": 374160 }, { "epoch": 0.7454298418972332, "grad_norm": 0.1716178059577942, "learning_rate": 0.002, "loss": 2.5505, "step": 374170 }, { "epoch": 0.7454497641208722, "grad_norm": 0.17605920135974884, "learning_rate": 0.002, "loss": 2.5626, "step": 374180 }, { "epoch": 0.745469686344511, "grad_norm": 0.15910334885120392, "learning_rate": 0.002, "loss": 2.5641, "step": 374190 }, { "epoch": 0.7454896085681499, "grad_norm": 0.15649889409542084, "learning_rate": 0.002, "loss": 2.5671, "step": 374200 }, { "epoch": 0.7455095307917888, "grad_norm": 0.1479223370552063, "learning_rate": 0.002, "loss": 2.5447, "step": 374210 }, { "epoch": 0.7455294530154277, "grad_norm": 0.15880940854549408, "learning_rate": 0.002, "loss": 2.5651, "step": 374220 }, { "epoch": 0.7455493752390667, "grad_norm": 0.19546356797218323, "learning_rate": 0.002, "loss": 2.548, "step": 374230 }, { "epoch": 0.7455692974627056, "grad_norm": 0.18507173657417297, "learning_rate": 0.002, "loss": 2.5514, "step": 374240 }, { "epoch": 0.7455892196863445, "grad_norm": 0.15523508191108704, "learning_rate": 0.002, "loss": 2.55, "step": 374250 }, { "epoch": 0.7456091419099834, "grad_norm": 0.16580095887184143, "learning_rate": 0.002, "loss": 2.5424, "step": 374260 }, { "epoch": 0.7456290641336223, "grad_norm": 0.16314437985420227, "learning_rate": 0.002, "loss": 2.562, "step": 374270 }, { "epoch": 0.7456489863572613, "grad_norm": 0.16616928577423096, "learning_rate": 0.002, "loss": 2.5339, "step": 374280 }, { "epoch": 0.7456689085809002, "grad_norm": 0.14219248294830322, "learning_rate": 0.002, "loss": 2.557, "step": 374290 }, { "epoch": 0.7456888308045391, "grad_norm": 0.1532927006483078, "learning_rate": 0.002, "loss": 2.5639, "step": 374300 }, { "epoch": 0.745708753028178, "grad_norm": 0.17394278943538666, "learning_rate": 0.002, "loss": 2.5521, "step": 374310 }, { "epoch": 0.7457286752518169, "grad_norm": 0.20664794743061066, "learning_rate": 0.002, "loss": 2.5548, "step": 374320 }, { "epoch": 0.7457485974754559, "grad_norm": 0.18383163213729858, "learning_rate": 0.002, "loss": 2.571, "step": 374330 }, { "epoch": 0.7457685196990947, "grad_norm": 0.1683766096830368, "learning_rate": 0.002, "loss": 2.5562, "step": 374340 }, { "epoch": 0.7457884419227336, "grad_norm": 0.21163786947727203, "learning_rate": 0.002, "loss": 2.5626, "step": 374350 }, { "epoch": 0.7458083641463725, "grad_norm": 0.15652331709861755, "learning_rate": 0.002, "loss": 2.551, "step": 374360 }, { "epoch": 0.7458282863700115, "grad_norm": 0.16101938486099243, "learning_rate": 0.002, "loss": 2.5507, "step": 374370 }, { "epoch": 0.7458482085936504, "grad_norm": 0.17570596933364868, "learning_rate": 0.002, "loss": 2.5512, "step": 374380 }, { "epoch": 0.7458681308172893, "grad_norm": 0.13385368883609772, "learning_rate": 0.002, "loss": 2.5544, "step": 374390 }, { "epoch": 0.7458880530409282, "grad_norm": 0.14832912385463715, "learning_rate": 0.002, "loss": 2.555, "step": 374400 }, { "epoch": 0.7459079752645671, "grad_norm": 0.14775589108467102, "learning_rate": 0.002, "loss": 2.5402, "step": 374410 }, { "epoch": 0.7459278974882061, "grad_norm": 0.16958275437355042, "learning_rate": 0.002, "loss": 2.5551, "step": 374420 }, { "epoch": 0.745947819711845, "grad_norm": 0.1761002391576767, "learning_rate": 0.002, "loss": 2.5612, "step": 374430 }, { "epoch": 0.7459677419354839, "grad_norm": 0.1669187843799591, "learning_rate": 0.002, "loss": 2.558, "step": 374440 }, { "epoch": 0.7459876641591228, "grad_norm": 0.24000689387321472, "learning_rate": 0.002, "loss": 2.5488, "step": 374450 }, { "epoch": 0.7460075863827617, "grad_norm": 0.15075884759426117, "learning_rate": 0.002, "loss": 2.5587, "step": 374460 }, { "epoch": 0.7460275086064007, "grad_norm": 0.17816723883152008, "learning_rate": 0.002, "loss": 2.5682, "step": 374470 }, { "epoch": 0.7460474308300395, "grad_norm": 0.2013188749551773, "learning_rate": 0.002, "loss": 2.5504, "step": 374480 }, { "epoch": 0.7460673530536784, "grad_norm": 0.15019828081130981, "learning_rate": 0.002, "loss": 2.5536, "step": 374490 }, { "epoch": 0.7460872752773173, "grad_norm": 0.14914964139461517, "learning_rate": 0.002, "loss": 2.5566, "step": 374500 }, { "epoch": 0.7461071975009562, "grad_norm": 0.18217502534389496, "learning_rate": 0.002, "loss": 2.5543, "step": 374510 }, { "epoch": 0.7461271197245952, "grad_norm": 0.1962321549654007, "learning_rate": 0.002, "loss": 2.5633, "step": 374520 }, { "epoch": 0.7461470419482341, "grad_norm": 0.15262053906917572, "learning_rate": 0.002, "loss": 2.5502, "step": 374530 }, { "epoch": 0.746166964171873, "grad_norm": 0.14578059315681458, "learning_rate": 0.002, "loss": 2.5531, "step": 374540 }, { "epoch": 0.7461868863955119, "grad_norm": 0.19103454053401947, "learning_rate": 0.002, "loss": 2.5587, "step": 374550 }, { "epoch": 0.7462068086191508, "grad_norm": 0.23457638919353485, "learning_rate": 0.002, "loss": 2.563, "step": 374560 }, { "epoch": 0.7462267308427898, "grad_norm": 0.16839872300624847, "learning_rate": 0.002, "loss": 2.5606, "step": 374570 }, { "epoch": 0.7462466530664287, "grad_norm": 0.18227171897888184, "learning_rate": 0.002, "loss": 2.5414, "step": 374580 }, { "epoch": 0.7462665752900676, "grad_norm": 0.236235573887825, "learning_rate": 0.002, "loss": 2.5634, "step": 374590 }, { "epoch": 0.7462864975137065, "grad_norm": 0.1410650759935379, "learning_rate": 0.002, "loss": 2.551, "step": 374600 }, { "epoch": 0.7463064197373454, "grad_norm": 0.16391028463840485, "learning_rate": 0.002, "loss": 2.5426, "step": 374610 }, { "epoch": 0.7463263419609844, "grad_norm": 0.18675680458545685, "learning_rate": 0.002, "loss": 2.5729, "step": 374620 }, { "epoch": 0.7463462641846232, "grad_norm": 0.1775771528482437, "learning_rate": 0.002, "loss": 2.5673, "step": 374630 }, { "epoch": 0.7463661864082621, "grad_norm": 0.19692283868789673, "learning_rate": 0.002, "loss": 2.5545, "step": 374640 }, { "epoch": 0.746386108631901, "grad_norm": 0.19487713277339935, "learning_rate": 0.002, "loss": 2.5664, "step": 374650 }, { "epoch": 0.74640603085554, "grad_norm": 0.15360115468502045, "learning_rate": 0.002, "loss": 2.5495, "step": 374660 }, { "epoch": 0.7464259530791789, "grad_norm": 0.1575567126274109, "learning_rate": 0.002, "loss": 2.5526, "step": 374670 }, { "epoch": 0.7464458753028178, "grad_norm": 0.1783868670463562, "learning_rate": 0.002, "loss": 2.5476, "step": 374680 }, { "epoch": 0.7464657975264567, "grad_norm": 0.16755017638206482, "learning_rate": 0.002, "loss": 2.5348, "step": 374690 }, { "epoch": 0.7464857197500956, "grad_norm": 0.16610132157802582, "learning_rate": 0.002, "loss": 2.5525, "step": 374700 }, { "epoch": 0.7465056419737346, "grad_norm": 0.17674005031585693, "learning_rate": 0.002, "loss": 2.5622, "step": 374710 }, { "epoch": 0.7465255641973735, "grad_norm": 0.1510060727596283, "learning_rate": 0.002, "loss": 2.5717, "step": 374720 }, { "epoch": 0.7465454864210124, "grad_norm": 0.16610309481620789, "learning_rate": 0.002, "loss": 2.5521, "step": 374730 }, { "epoch": 0.7465654086446513, "grad_norm": 0.16532500088214874, "learning_rate": 0.002, "loss": 2.5533, "step": 374740 }, { "epoch": 0.7465853308682902, "grad_norm": 0.15930961072444916, "learning_rate": 0.002, "loss": 2.5463, "step": 374750 }, { "epoch": 0.7466052530919292, "grad_norm": 0.14708280563354492, "learning_rate": 0.002, "loss": 2.5647, "step": 374760 }, { "epoch": 0.746625175315568, "grad_norm": 0.18729080259799957, "learning_rate": 0.002, "loss": 2.5543, "step": 374770 }, { "epoch": 0.7466450975392069, "grad_norm": 0.16746094822883606, "learning_rate": 0.002, "loss": 2.5416, "step": 374780 }, { "epoch": 0.7466650197628458, "grad_norm": 0.1851889193058014, "learning_rate": 0.002, "loss": 2.5628, "step": 374790 }, { "epoch": 0.7466849419864847, "grad_norm": 0.1618792861700058, "learning_rate": 0.002, "loss": 2.5493, "step": 374800 }, { "epoch": 0.7467048642101237, "grad_norm": 0.18720115721225739, "learning_rate": 0.002, "loss": 2.5686, "step": 374810 }, { "epoch": 0.7467247864337626, "grad_norm": 0.1554233729839325, "learning_rate": 0.002, "loss": 2.5432, "step": 374820 }, { "epoch": 0.7467447086574015, "grad_norm": 0.15980929136276245, "learning_rate": 0.002, "loss": 2.5485, "step": 374830 }, { "epoch": 0.7467646308810404, "grad_norm": 0.1839955449104309, "learning_rate": 0.002, "loss": 2.556, "step": 374840 }, { "epoch": 0.7467845531046793, "grad_norm": 0.16079553961753845, "learning_rate": 0.002, "loss": 2.5368, "step": 374850 }, { "epoch": 0.7468044753283183, "grad_norm": 0.16860558092594147, "learning_rate": 0.002, "loss": 2.5566, "step": 374860 }, { "epoch": 0.7468243975519572, "grad_norm": 0.1510361284017563, "learning_rate": 0.002, "loss": 2.5501, "step": 374870 }, { "epoch": 0.7468443197755961, "grad_norm": 0.21395368874073029, "learning_rate": 0.002, "loss": 2.5629, "step": 374880 }, { "epoch": 0.746864241999235, "grad_norm": 0.15930825471878052, "learning_rate": 0.002, "loss": 2.5741, "step": 374890 }, { "epoch": 0.7468841642228738, "grad_norm": 0.14680084586143494, "learning_rate": 0.002, "loss": 2.5489, "step": 374900 }, { "epoch": 0.7469040864465128, "grad_norm": 0.19382183253765106, "learning_rate": 0.002, "loss": 2.5589, "step": 374910 }, { "epoch": 0.7469240086701517, "grad_norm": 0.16623690724372864, "learning_rate": 0.002, "loss": 2.5485, "step": 374920 }, { "epoch": 0.7469439308937906, "grad_norm": 0.14153268933296204, "learning_rate": 0.002, "loss": 2.5624, "step": 374930 }, { "epoch": 0.7469638531174295, "grad_norm": 0.18038931488990784, "learning_rate": 0.002, "loss": 2.5404, "step": 374940 }, { "epoch": 0.7469837753410685, "grad_norm": 0.14364241063594818, "learning_rate": 0.002, "loss": 2.5522, "step": 374950 }, { "epoch": 0.7470036975647074, "grad_norm": 0.7536690831184387, "learning_rate": 0.002, "loss": 2.5548, "step": 374960 }, { "epoch": 0.7470236197883463, "grad_norm": 0.18685293197631836, "learning_rate": 0.002, "loss": 2.5656, "step": 374970 }, { "epoch": 0.7470435420119852, "grad_norm": 0.14052440226078033, "learning_rate": 0.002, "loss": 2.551, "step": 374980 }, { "epoch": 0.7470634642356241, "grad_norm": 0.15441830456256866, "learning_rate": 0.002, "loss": 2.5643, "step": 374990 }, { "epoch": 0.7470833864592631, "grad_norm": 0.15629151463508606, "learning_rate": 0.002, "loss": 2.5675, "step": 375000 }, { "epoch": 0.747103308682902, "grad_norm": 0.17036589980125427, "learning_rate": 0.002, "loss": 2.5592, "step": 375010 }, { "epoch": 0.7471232309065409, "grad_norm": 0.14855767786502838, "learning_rate": 0.002, "loss": 2.5556, "step": 375020 }, { "epoch": 0.7471431531301798, "grad_norm": 0.15921379625797272, "learning_rate": 0.002, "loss": 2.5421, "step": 375030 }, { "epoch": 0.7471630753538187, "grad_norm": 0.15252353250980377, "learning_rate": 0.002, "loss": 2.5667, "step": 375040 }, { "epoch": 0.7471829975774577, "grad_norm": 0.16029496490955353, "learning_rate": 0.002, "loss": 2.5632, "step": 375050 }, { "epoch": 0.7472029198010965, "grad_norm": 0.1519049108028412, "learning_rate": 0.002, "loss": 2.5481, "step": 375060 }, { "epoch": 0.7472228420247354, "grad_norm": 0.1645854264497757, "learning_rate": 0.002, "loss": 2.5651, "step": 375070 }, { "epoch": 0.7472427642483743, "grad_norm": 0.1425803303718567, "learning_rate": 0.002, "loss": 2.5663, "step": 375080 }, { "epoch": 0.7472626864720132, "grad_norm": 0.13916108012199402, "learning_rate": 0.002, "loss": 2.5377, "step": 375090 }, { "epoch": 0.7472826086956522, "grad_norm": 0.2049422711133957, "learning_rate": 0.002, "loss": 2.5369, "step": 375100 }, { "epoch": 0.7473025309192911, "grad_norm": 0.15998922288417816, "learning_rate": 0.002, "loss": 2.5577, "step": 375110 }, { "epoch": 0.74732245314293, "grad_norm": 0.15052105486392975, "learning_rate": 0.002, "loss": 2.5416, "step": 375120 }, { "epoch": 0.7473423753665689, "grad_norm": 0.1708051860332489, "learning_rate": 0.002, "loss": 2.554, "step": 375130 }, { "epoch": 0.7473622975902078, "grad_norm": 0.17342273890972137, "learning_rate": 0.002, "loss": 2.5596, "step": 375140 }, { "epoch": 0.7473822198138468, "grad_norm": 0.15646937489509583, "learning_rate": 0.002, "loss": 2.5522, "step": 375150 }, { "epoch": 0.7474021420374857, "grad_norm": 0.19294436275959015, "learning_rate": 0.002, "loss": 2.5192, "step": 375160 }, { "epoch": 0.7474220642611246, "grad_norm": 0.16283074021339417, "learning_rate": 0.002, "loss": 2.5527, "step": 375170 }, { "epoch": 0.7474419864847635, "grad_norm": 0.15819436311721802, "learning_rate": 0.002, "loss": 2.5607, "step": 375180 }, { "epoch": 0.7474619087084023, "grad_norm": 0.16796645522117615, "learning_rate": 0.002, "loss": 2.5552, "step": 375190 }, { "epoch": 0.7474818309320413, "grad_norm": 0.15957973897457123, "learning_rate": 0.002, "loss": 2.5488, "step": 375200 }, { "epoch": 0.7475017531556802, "grad_norm": 0.15444472432136536, "learning_rate": 0.002, "loss": 2.5404, "step": 375210 }, { "epoch": 0.7475216753793191, "grad_norm": 0.1535819172859192, "learning_rate": 0.002, "loss": 2.545, "step": 375220 }, { "epoch": 0.747541597602958, "grad_norm": 0.1560751348733902, "learning_rate": 0.002, "loss": 2.5506, "step": 375230 }, { "epoch": 0.747561519826597, "grad_norm": 0.18095943331718445, "learning_rate": 0.002, "loss": 2.5401, "step": 375240 }, { "epoch": 0.7475814420502359, "grad_norm": 0.15742817521095276, "learning_rate": 0.002, "loss": 2.5508, "step": 375250 }, { "epoch": 0.7476013642738748, "grad_norm": 0.1690627783536911, "learning_rate": 0.002, "loss": 2.5649, "step": 375260 }, { "epoch": 0.7476212864975137, "grad_norm": 0.15693798661231995, "learning_rate": 0.002, "loss": 2.5395, "step": 375270 }, { "epoch": 0.7476412087211526, "grad_norm": 0.16639173030853271, "learning_rate": 0.002, "loss": 2.5624, "step": 375280 }, { "epoch": 0.7476611309447916, "grad_norm": 0.1473836898803711, "learning_rate": 0.002, "loss": 2.5665, "step": 375290 }, { "epoch": 0.7476810531684305, "grad_norm": 0.15407510101795197, "learning_rate": 0.002, "loss": 2.5466, "step": 375300 }, { "epoch": 0.7477009753920694, "grad_norm": 0.18127526342868805, "learning_rate": 0.002, "loss": 2.5585, "step": 375310 }, { "epoch": 0.7477208976157083, "grad_norm": 0.16651926934719086, "learning_rate": 0.002, "loss": 2.5536, "step": 375320 }, { "epoch": 0.7477408198393471, "grad_norm": 0.16045412421226501, "learning_rate": 0.002, "loss": 2.5583, "step": 375330 }, { "epoch": 0.7477607420629862, "grad_norm": 0.16685786843299866, "learning_rate": 0.002, "loss": 2.5622, "step": 375340 }, { "epoch": 0.747780664286625, "grad_norm": 0.16322122514247894, "learning_rate": 0.002, "loss": 2.5614, "step": 375350 }, { "epoch": 0.7478005865102639, "grad_norm": 0.1464080959558487, "learning_rate": 0.002, "loss": 2.5782, "step": 375360 }, { "epoch": 0.7478205087339028, "grad_norm": 0.17625853419303894, "learning_rate": 0.002, "loss": 2.5565, "step": 375370 }, { "epoch": 0.7478404309575417, "grad_norm": 0.15456990897655487, "learning_rate": 0.002, "loss": 2.5594, "step": 375380 }, { "epoch": 0.7478603531811807, "grad_norm": 0.19803990423679352, "learning_rate": 0.002, "loss": 2.5619, "step": 375390 }, { "epoch": 0.7478802754048196, "grad_norm": 0.1756231039762497, "learning_rate": 0.002, "loss": 2.5297, "step": 375400 }, { "epoch": 0.7479001976284585, "grad_norm": 0.18419967591762543, "learning_rate": 0.002, "loss": 2.5556, "step": 375410 }, { "epoch": 0.7479201198520974, "grad_norm": 0.1516282707452774, "learning_rate": 0.002, "loss": 2.5534, "step": 375420 }, { "epoch": 0.7479400420757363, "grad_norm": 0.1495574414730072, "learning_rate": 0.002, "loss": 2.5604, "step": 375430 }, { "epoch": 0.7479599642993753, "grad_norm": 0.16128572821617126, "learning_rate": 0.002, "loss": 2.5384, "step": 375440 }, { "epoch": 0.7479798865230142, "grad_norm": 0.16247674822807312, "learning_rate": 0.002, "loss": 2.5674, "step": 375450 }, { "epoch": 0.7479998087466531, "grad_norm": 0.19788353145122528, "learning_rate": 0.002, "loss": 2.5552, "step": 375460 }, { "epoch": 0.748019730970292, "grad_norm": 0.14306072890758514, "learning_rate": 0.002, "loss": 2.5582, "step": 375470 }, { "epoch": 0.7480396531939308, "grad_norm": 0.16900552809238434, "learning_rate": 0.002, "loss": 2.5553, "step": 375480 }, { "epoch": 0.7480595754175698, "grad_norm": 0.16372445225715637, "learning_rate": 0.002, "loss": 2.5514, "step": 375490 }, { "epoch": 0.7480794976412087, "grad_norm": 0.1453859508037567, "learning_rate": 0.002, "loss": 2.5673, "step": 375500 }, { "epoch": 0.7480994198648476, "grad_norm": 0.17549671232700348, "learning_rate": 0.002, "loss": 2.5508, "step": 375510 }, { "epoch": 0.7481193420884865, "grad_norm": 0.14350947737693787, "learning_rate": 0.002, "loss": 2.5548, "step": 375520 }, { "epoch": 0.7481392643121254, "grad_norm": 0.18865172564983368, "learning_rate": 0.002, "loss": 2.5488, "step": 375530 }, { "epoch": 0.7481591865357644, "grad_norm": 0.1698717623949051, "learning_rate": 0.002, "loss": 2.5541, "step": 375540 }, { "epoch": 0.7481791087594033, "grad_norm": 0.1715376377105713, "learning_rate": 0.002, "loss": 2.5598, "step": 375550 }, { "epoch": 0.7481990309830422, "grad_norm": 0.1663486808538437, "learning_rate": 0.002, "loss": 2.5526, "step": 375560 }, { "epoch": 0.7482189532066811, "grad_norm": 0.19111834466457367, "learning_rate": 0.002, "loss": 2.5533, "step": 375570 }, { "epoch": 0.7482388754303201, "grad_norm": 0.1672610193490982, "learning_rate": 0.002, "loss": 2.5472, "step": 375580 }, { "epoch": 0.748258797653959, "grad_norm": 0.21326938271522522, "learning_rate": 0.002, "loss": 2.5551, "step": 375590 }, { "epoch": 0.7482787198775979, "grad_norm": 0.16660813987255096, "learning_rate": 0.002, "loss": 2.5407, "step": 375600 }, { "epoch": 0.7482986421012368, "grad_norm": 0.14436015486717224, "learning_rate": 0.002, "loss": 2.5498, "step": 375610 }, { "epoch": 0.7483185643248756, "grad_norm": 0.1557707041501999, "learning_rate": 0.002, "loss": 2.5566, "step": 375620 }, { "epoch": 0.7483384865485146, "grad_norm": 0.19780507683753967, "learning_rate": 0.002, "loss": 2.5624, "step": 375630 }, { "epoch": 0.7483584087721535, "grad_norm": 0.1469258815050125, "learning_rate": 0.002, "loss": 2.5609, "step": 375640 }, { "epoch": 0.7483783309957924, "grad_norm": 0.16555367410182953, "learning_rate": 0.002, "loss": 2.5529, "step": 375650 }, { "epoch": 0.7483982532194313, "grad_norm": 0.16511015594005585, "learning_rate": 0.002, "loss": 2.5349, "step": 375660 }, { "epoch": 0.7484181754430702, "grad_norm": 0.18187546730041504, "learning_rate": 0.002, "loss": 2.5471, "step": 375670 }, { "epoch": 0.7484380976667092, "grad_norm": 0.14881640672683716, "learning_rate": 0.002, "loss": 2.5564, "step": 375680 }, { "epoch": 0.7484580198903481, "grad_norm": 0.16565938293933868, "learning_rate": 0.002, "loss": 2.5544, "step": 375690 }, { "epoch": 0.748477942113987, "grad_norm": 0.19363921880722046, "learning_rate": 0.002, "loss": 2.557, "step": 375700 }, { "epoch": 0.7484978643376259, "grad_norm": 0.1499408781528473, "learning_rate": 0.002, "loss": 2.5404, "step": 375710 }, { "epoch": 0.7485177865612648, "grad_norm": 0.17076578736305237, "learning_rate": 0.002, "loss": 2.5745, "step": 375720 }, { "epoch": 0.7485377087849038, "grad_norm": 0.18132787942886353, "learning_rate": 0.002, "loss": 2.5576, "step": 375730 }, { "epoch": 0.7485576310085427, "grad_norm": 0.20955324172973633, "learning_rate": 0.002, "loss": 2.5487, "step": 375740 }, { "epoch": 0.7485775532321816, "grad_norm": 0.1500805914402008, "learning_rate": 0.002, "loss": 2.5493, "step": 375750 }, { "epoch": 0.7485974754558204, "grad_norm": 0.19388878345489502, "learning_rate": 0.002, "loss": 2.54, "step": 375760 }, { "epoch": 0.7486173976794593, "grad_norm": 0.19496910274028778, "learning_rate": 0.002, "loss": 2.5642, "step": 375770 }, { "epoch": 0.7486373199030983, "grad_norm": 0.15606537461280823, "learning_rate": 0.002, "loss": 2.5716, "step": 375780 }, { "epoch": 0.7486572421267372, "grad_norm": 0.20236682891845703, "learning_rate": 0.002, "loss": 2.5506, "step": 375790 }, { "epoch": 0.7486771643503761, "grad_norm": 0.1539115011692047, "learning_rate": 0.002, "loss": 2.5437, "step": 375800 }, { "epoch": 0.748697086574015, "grad_norm": 0.1674240529537201, "learning_rate": 0.002, "loss": 2.5546, "step": 375810 }, { "epoch": 0.7487170087976539, "grad_norm": 0.1542864441871643, "learning_rate": 0.002, "loss": 2.5557, "step": 375820 }, { "epoch": 0.7487369310212929, "grad_norm": 0.1570959836244583, "learning_rate": 0.002, "loss": 2.5485, "step": 375830 }, { "epoch": 0.7487568532449318, "grad_norm": 0.19685344398021698, "learning_rate": 0.002, "loss": 2.5639, "step": 375840 }, { "epoch": 0.7487767754685707, "grad_norm": 0.20281648635864258, "learning_rate": 0.002, "loss": 2.5522, "step": 375850 }, { "epoch": 0.7487966976922096, "grad_norm": 0.1602959781885147, "learning_rate": 0.002, "loss": 2.5461, "step": 375860 }, { "epoch": 0.7488166199158486, "grad_norm": 0.15200036764144897, "learning_rate": 0.002, "loss": 2.572, "step": 375870 }, { "epoch": 0.7488365421394875, "grad_norm": 0.1714077889919281, "learning_rate": 0.002, "loss": 2.5585, "step": 375880 }, { "epoch": 0.7488564643631264, "grad_norm": 0.17168743908405304, "learning_rate": 0.002, "loss": 2.5571, "step": 375890 }, { "epoch": 0.7488763865867653, "grad_norm": 0.14123626053333282, "learning_rate": 0.002, "loss": 2.5473, "step": 375900 }, { "epoch": 0.7488963088104041, "grad_norm": 0.14959301054477692, "learning_rate": 0.002, "loss": 2.5585, "step": 375910 }, { "epoch": 0.7489162310340431, "grad_norm": 0.17103825509548187, "learning_rate": 0.002, "loss": 2.5558, "step": 375920 }, { "epoch": 0.748936153257682, "grad_norm": 0.14547409117221832, "learning_rate": 0.002, "loss": 2.5506, "step": 375930 }, { "epoch": 0.7489560754813209, "grad_norm": 0.15974991023540497, "learning_rate": 0.002, "loss": 2.5398, "step": 375940 }, { "epoch": 0.7489759977049598, "grad_norm": 0.1950410157442093, "learning_rate": 0.002, "loss": 2.5468, "step": 375950 }, { "epoch": 0.7489959199285987, "grad_norm": 0.173061802983284, "learning_rate": 0.002, "loss": 2.5617, "step": 375960 }, { "epoch": 0.7490158421522377, "grad_norm": 0.20049941539764404, "learning_rate": 0.002, "loss": 2.5537, "step": 375970 }, { "epoch": 0.7490357643758766, "grad_norm": 0.172401562333107, "learning_rate": 0.002, "loss": 2.5548, "step": 375980 }, { "epoch": 0.7490556865995155, "grad_norm": 0.16714751720428467, "learning_rate": 0.002, "loss": 2.5511, "step": 375990 }, { "epoch": 0.7490756088231544, "grad_norm": 0.15457521378993988, "learning_rate": 0.002, "loss": 2.5647, "step": 376000 }, { "epoch": 0.7490955310467933, "grad_norm": 0.35270315408706665, "learning_rate": 0.002, "loss": 2.5529, "step": 376010 }, { "epoch": 0.7491154532704323, "grad_norm": 0.15466904640197754, "learning_rate": 0.002, "loss": 2.5733, "step": 376020 }, { "epoch": 0.7491353754940712, "grad_norm": 0.16676542162895203, "learning_rate": 0.002, "loss": 2.5564, "step": 376030 }, { "epoch": 0.7491552977177101, "grad_norm": 0.161665141582489, "learning_rate": 0.002, "loss": 2.5577, "step": 376040 }, { "epoch": 0.749175219941349, "grad_norm": 0.15508794784545898, "learning_rate": 0.002, "loss": 2.5597, "step": 376050 }, { "epoch": 0.7491951421649878, "grad_norm": 0.19238188862800598, "learning_rate": 0.002, "loss": 2.5488, "step": 376060 }, { "epoch": 0.7492150643886268, "grad_norm": 0.14541272819042206, "learning_rate": 0.002, "loss": 2.5513, "step": 376070 }, { "epoch": 0.7492349866122657, "grad_norm": 0.15297967195510864, "learning_rate": 0.002, "loss": 2.5517, "step": 376080 }, { "epoch": 0.7492549088359046, "grad_norm": 0.172171950340271, "learning_rate": 0.002, "loss": 2.5571, "step": 376090 }, { "epoch": 0.7492748310595435, "grad_norm": 0.17428648471832275, "learning_rate": 0.002, "loss": 2.5695, "step": 376100 }, { "epoch": 0.7492947532831824, "grad_norm": 0.18824520707130432, "learning_rate": 0.002, "loss": 2.552, "step": 376110 }, { "epoch": 0.7493146755068214, "grad_norm": 0.17171171307563782, "learning_rate": 0.002, "loss": 2.5533, "step": 376120 }, { "epoch": 0.7493345977304603, "grad_norm": 0.24772848188877106, "learning_rate": 0.002, "loss": 2.5714, "step": 376130 }, { "epoch": 0.7493545199540992, "grad_norm": 0.16036278009414673, "learning_rate": 0.002, "loss": 2.5645, "step": 376140 }, { "epoch": 0.7493744421777381, "grad_norm": 0.160329207777977, "learning_rate": 0.002, "loss": 2.5576, "step": 376150 }, { "epoch": 0.7493943644013771, "grad_norm": 0.1465034782886505, "learning_rate": 0.002, "loss": 2.5394, "step": 376160 }, { "epoch": 0.749414286625016, "grad_norm": 0.17385412752628326, "learning_rate": 0.002, "loss": 2.5647, "step": 376170 }, { "epoch": 0.7494342088486549, "grad_norm": 0.15982165932655334, "learning_rate": 0.002, "loss": 2.5556, "step": 376180 }, { "epoch": 0.7494541310722937, "grad_norm": 0.17175698280334473, "learning_rate": 0.002, "loss": 2.5211, "step": 376190 }, { "epoch": 0.7494740532959326, "grad_norm": 0.17268800735473633, "learning_rate": 0.002, "loss": 2.5593, "step": 376200 }, { "epoch": 0.7494939755195716, "grad_norm": 0.15229998528957367, "learning_rate": 0.002, "loss": 2.5552, "step": 376210 }, { "epoch": 0.7495138977432105, "grad_norm": 0.15250258147716522, "learning_rate": 0.002, "loss": 2.5531, "step": 376220 }, { "epoch": 0.7495338199668494, "grad_norm": 0.14150722324848175, "learning_rate": 0.002, "loss": 2.5471, "step": 376230 }, { "epoch": 0.7495537421904883, "grad_norm": 0.1858731508255005, "learning_rate": 0.002, "loss": 2.5414, "step": 376240 }, { "epoch": 0.7495736644141272, "grad_norm": 0.17123261094093323, "learning_rate": 0.002, "loss": 2.558, "step": 376250 }, { "epoch": 0.7495935866377662, "grad_norm": 0.22138793766498566, "learning_rate": 0.002, "loss": 2.5548, "step": 376260 }, { "epoch": 0.7496135088614051, "grad_norm": 0.14938659965991974, "learning_rate": 0.002, "loss": 2.5582, "step": 376270 }, { "epoch": 0.749633431085044, "grad_norm": 0.15053944289684296, "learning_rate": 0.002, "loss": 2.5526, "step": 376280 }, { "epoch": 0.7496533533086829, "grad_norm": 0.17028851807117462, "learning_rate": 0.002, "loss": 2.5574, "step": 376290 }, { "epoch": 0.7496732755323218, "grad_norm": 0.18814364075660706, "learning_rate": 0.002, "loss": 2.5527, "step": 376300 }, { "epoch": 0.7496931977559608, "grad_norm": 0.1506037414073944, "learning_rate": 0.002, "loss": 2.5598, "step": 376310 }, { "epoch": 0.7497131199795997, "grad_norm": 0.17502108216285706, "learning_rate": 0.002, "loss": 2.5508, "step": 376320 }, { "epoch": 0.7497330422032386, "grad_norm": 0.20376890897750854, "learning_rate": 0.002, "loss": 2.5613, "step": 376330 }, { "epoch": 0.7497529644268774, "grad_norm": 0.19316533207893372, "learning_rate": 0.002, "loss": 2.5658, "step": 376340 }, { "epoch": 0.7497728866505163, "grad_norm": 0.17849141359329224, "learning_rate": 0.002, "loss": 2.5667, "step": 376350 }, { "epoch": 0.7497928088741553, "grad_norm": 0.14683383703231812, "learning_rate": 0.002, "loss": 2.5677, "step": 376360 }, { "epoch": 0.7498127310977942, "grad_norm": 0.1866942048072815, "learning_rate": 0.002, "loss": 2.5485, "step": 376370 }, { "epoch": 0.7498326533214331, "grad_norm": 0.16886171698570251, "learning_rate": 0.002, "loss": 2.5732, "step": 376380 }, { "epoch": 0.749852575545072, "grad_norm": 0.14375346899032593, "learning_rate": 0.002, "loss": 2.5497, "step": 376390 }, { "epoch": 0.7498724977687109, "grad_norm": 0.13854384422302246, "learning_rate": 0.002, "loss": 2.5569, "step": 376400 }, { "epoch": 0.7498924199923499, "grad_norm": 0.15346916019916534, "learning_rate": 0.002, "loss": 2.5621, "step": 376410 }, { "epoch": 0.7499123422159888, "grad_norm": 0.162155881524086, "learning_rate": 0.002, "loss": 2.5694, "step": 376420 }, { "epoch": 0.7499322644396277, "grad_norm": 0.14860494434833527, "learning_rate": 0.002, "loss": 2.5586, "step": 376430 }, { "epoch": 0.7499521866632666, "grad_norm": 0.17784664034843445, "learning_rate": 0.002, "loss": 2.558, "step": 376440 }, { "epoch": 0.7499721088869056, "grad_norm": 0.14275576174259186, "learning_rate": 0.002, "loss": 2.5488, "step": 376450 }, { "epoch": 0.7499920311105445, "grad_norm": 0.17604143917560577, "learning_rate": 0.002, "loss": 2.5582, "step": 376460 }, { "epoch": 0.7500119533341834, "grad_norm": 0.14021696150302887, "learning_rate": 0.002, "loss": 2.5422, "step": 376470 }, { "epoch": 0.7500318755578222, "grad_norm": 0.17697398364543915, "learning_rate": 0.002, "loss": 2.5577, "step": 376480 }, { "epoch": 0.7500517977814611, "grad_norm": 0.15720775723457336, "learning_rate": 0.002, "loss": 2.5511, "step": 376490 }, { "epoch": 0.7500717200051001, "grad_norm": 0.1697523593902588, "learning_rate": 0.002, "loss": 2.5583, "step": 376500 }, { "epoch": 0.750091642228739, "grad_norm": 0.16951413452625275, "learning_rate": 0.002, "loss": 2.5476, "step": 376510 }, { "epoch": 0.7501115644523779, "grad_norm": 0.14972025156021118, "learning_rate": 0.002, "loss": 2.5575, "step": 376520 }, { "epoch": 0.7501314866760168, "grad_norm": 0.20098260045051575, "learning_rate": 0.002, "loss": 2.5529, "step": 376530 }, { "epoch": 0.7501514088996557, "grad_norm": 0.41190069913864136, "learning_rate": 0.002, "loss": 2.5421, "step": 376540 }, { "epoch": 0.7501713311232947, "grad_norm": 0.1403564065694809, "learning_rate": 0.002, "loss": 2.547, "step": 376550 }, { "epoch": 0.7501912533469336, "grad_norm": 0.16573114693164825, "learning_rate": 0.002, "loss": 2.5478, "step": 376560 }, { "epoch": 0.7502111755705725, "grad_norm": 0.15457582473754883, "learning_rate": 0.002, "loss": 2.5382, "step": 376570 }, { "epoch": 0.7502310977942114, "grad_norm": 0.16741806268692017, "learning_rate": 0.002, "loss": 2.5412, "step": 376580 }, { "epoch": 0.7502510200178503, "grad_norm": 0.15503615140914917, "learning_rate": 0.002, "loss": 2.5433, "step": 376590 }, { "epoch": 0.7502709422414893, "grad_norm": 0.25934621691703796, "learning_rate": 0.002, "loss": 2.5541, "step": 376600 }, { "epoch": 0.7502908644651282, "grad_norm": 0.14550091326236725, "learning_rate": 0.002, "loss": 2.54, "step": 376610 }, { "epoch": 0.750310786688767, "grad_norm": 0.13811856508255005, "learning_rate": 0.002, "loss": 2.5515, "step": 376620 }, { "epoch": 0.7503307089124059, "grad_norm": 0.1510998159646988, "learning_rate": 0.002, "loss": 2.5456, "step": 376630 }, { "epoch": 0.7503506311360448, "grad_norm": 0.17588481307029724, "learning_rate": 0.002, "loss": 2.5763, "step": 376640 }, { "epoch": 0.7503705533596838, "grad_norm": 0.1518196016550064, "learning_rate": 0.002, "loss": 2.5572, "step": 376650 }, { "epoch": 0.7503904755833227, "grad_norm": 0.1872890442609787, "learning_rate": 0.002, "loss": 2.5511, "step": 376660 }, { "epoch": 0.7504103978069616, "grad_norm": 0.17489571869373322, "learning_rate": 0.002, "loss": 2.5509, "step": 376670 }, { "epoch": 0.7504303200306005, "grad_norm": 0.15469208359718323, "learning_rate": 0.002, "loss": 2.5606, "step": 376680 }, { "epoch": 0.7504502422542394, "grad_norm": 0.1542646288871765, "learning_rate": 0.002, "loss": 2.5679, "step": 376690 }, { "epoch": 0.7504701644778784, "grad_norm": 0.17466893792152405, "learning_rate": 0.002, "loss": 2.5571, "step": 376700 }, { "epoch": 0.7504900867015173, "grad_norm": 0.21017983555793762, "learning_rate": 0.002, "loss": 2.5562, "step": 376710 }, { "epoch": 0.7505100089251562, "grad_norm": 0.16265763342380524, "learning_rate": 0.002, "loss": 2.5454, "step": 376720 }, { "epoch": 0.7505299311487951, "grad_norm": 0.15094159543514252, "learning_rate": 0.002, "loss": 2.5552, "step": 376730 }, { "epoch": 0.7505498533724341, "grad_norm": 0.1951347291469574, "learning_rate": 0.002, "loss": 2.5434, "step": 376740 }, { "epoch": 0.750569775596073, "grad_norm": 0.18733903765678406, "learning_rate": 0.002, "loss": 2.5518, "step": 376750 }, { "epoch": 0.7505896978197119, "grad_norm": 0.14798320829868317, "learning_rate": 0.002, "loss": 2.5718, "step": 376760 }, { "epoch": 0.7506096200433507, "grad_norm": 0.1563737988471985, "learning_rate": 0.002, "loss": 2.5561, "step": 376770 }, { "epoch": 0.7506295422669896, "grad_norm": 0.15720918774604797, "learning_rate": 0.002, "loss": 2.5482, "step": 376780 }, { "epoch": 0.7506494644906286, "grad_norm": 0.16044706106185913, "learning_rate": 0.002, "loss": 2.5524, "step": 376790 }, { "epoch": 0.7506693867142675, "grad_norm": 0.15647666156291962, "learning_rate": 0.002, "loss": 2.5415, "step": 376800 }, { "epoch": 0.7506893089379064, "grad_norm": 0.16918188333511353, "learning_rate": 0.002, "loss": 2.5664, "step": 376810 }, { "epoch": 0.7507092311615453, "grad_norm": 0.15968802571296692, "learning_rate": 0.002, "loss": 2.5575, "step": 376820 }, { "epoch": 0.7507291533851842, "grad_norm": 0.1620483249425888, "learning_rate": 0.002, "loss": 2.5603, "step": 376830 }, { "epoch": 0.7507490756088232, "grad_norm": 0.18217363953590393, "learning_rate": 0.002, "loss": 2.5653, "step": 376840 }, { "epoch": 0.7507689978324621, "grad_norm": 0.17721733450889587, "learning_rate": 0.002, "loss": 2.5443, "step": 376850 }, { "epoch": 0.750788920056101, "grad_norm": 0.1509728729724884, "learning_rate": 0.002, "loss": 2.5545, "step": 376860 }, { "epoch": 0.7508088422797399, "grad_norm": 0.1730840802192688, "learning_rate": 0.002, "loss": 2.554, "step": 376870 }, { "epoch": 0.7508287645033788, "grad_norm": 0.15057988464832306, "learning_rate": 0.002, "loss": 2.5411, "step": 376880 }, { "epoch": 0.7508486867270178, "grad_norm": 0.18805353343486786, "learning_rate": 0.002, "loss": 2.5541, "step": 376890 }, { "epoch": 0.7508686089506567, "grad_norm": 0.20014888048171997, "learning_rate": 0.002, "loss": 2.5485, "step": 376900 }, { "epoch": 0.7508885311742955, "grad_norm": 0.17594459652900696, "learning_rate": 0.002, "loss": 2.559, "step": 376910 }, { "epoch": 0.7509084533979344, "grad_norm": 0.18276259303092957, "learning_rate": 0.002, "loss": 2.5528, "step": 376920 }, { "epoch": 0.7509283756215733, "grad_norm": 0.15901660919189453, "learning_rate": 0.002, "loss": 2.5571, "step": 376930 }, { "epoch": 0.7509482978452123, "grad_norm": 0.18259483575820923, "learning_rate": 0.002, "loss": 2.5604, "step": 376940 }, { "epoch": 0.7509682200688512, "grad_norm": 0.1542658805847168, "learning_rate": 0.002, "loss": 2.5475, "step": 376950 }, { "epoch": 0.7509881422924901, "grad_norm": 0.15800514817237854, "learning_rate": 0.002, "loss": 2.5451, "step": 376960 }, { "epoch": 0.751008064516129, "grad_norm": 0.18290388584136963, "learning_rate": 0.002, "loss": 2.5651, "step": 376970 }, { "epoch": 0.7510279867397679, "grad_norm": 0.15910425782203674, "learning_rate": 0.002, "loss": 2.5519, "step": 376980 }, { "epoch": 0.7510479089634069, "grad_norm": 0.19867585599422455, "learning_rate": 0.002, "loss": 2.5403, "step": 376990 }, { "epoch": 0.7510678311870458, "grad_norm": 0.1637653261423111, "learning_rate": 0.002, "loss": 2.556, "step": 377000 }, { "epoch": 0.7510877534106847, "grad_norm": 0.14665894210338593, "learning_rate": 0.002, "loss": 2.5526, "step": 377010 }, { "epoch": 0.7511076756343236, "grad_norm": 0.4141463041305542, "learning_rate": 0.002, "loss": 2.5664, "step": 377020 }, { "epoch": 0.7511275978579625, "grad_norm": 0.16325582563877106, "learning_rate": 0.002, "loss": 2.5678, "step": 377030 }, { "epoch": 0.7511475200816015, "grad_norm": 0.13370804488658905, "learning_rate": 0.002, "loss": 2.5373, "step": 377040 }, { "epoch": 0.7511674423052404, "grad_norm": 0.18824411928653717, "learning_rate": 0.002, "loss": 2.5389, "step": 377050 }, { "epoch": 0.7511873645288792, "grad_norm": 0.15979595482349396, "learning_rate": 0.002, "loss": 2.55, "step": 377060 }, { "epoch": 0.7512072867525181, "grad_norm": 0.14656034111976624, "learning_rate": 0.002, "loss": 2.5616, "step": 377070 }, { "epoch": 0.7512272089761571, "grad_norm": 0.17949295043945312, "learning_rate": 0.002, "loss": 2.5528, "step": 377080 }, { "epoch": 0.751247131199796, "grad_norm": 0.15679773688316345, "learning_rate": 0.002, "loss": 2.538, "step": 377090 }, { "epoch": 0.7512670534234349, "grad_norm": 0.13944756984710693, "learning_rate": 0.002, "loss": 2.549, "step": 377100 }, { "epoch": 0.7512869756470738, "grad_norm": 0.19516220688819885, "learning_rate": 0.002, "loss": 2.5419, "step": 377110 }, { "epoch": 0.7513068978707127, "grad_norm": 0.15819434821605682, "learning_rate": 0.002, "loss": 2.5466, "step": 377120 }, { "epoch": 0.7513268200943517, "grad_norm": 0.16417783498764038, "learning_rate": 0.002, "loss": 2.5547, "step": 377130 }, { "epoch": 0.7513467423179906, "grad_norm": 0.16920141875743866, "learning_rate": 0.002, "loss": 2.5586, "step": 377140 }, { "epoch": 0.7513666645416295, "grad_norm": 0.15933331847190857, "learning_rate": 0.002, "loss": 2.5675, "step": 377150 }, { "epoch": 0.7513865867652684, "grad_norm": 0.17097580432891846, "learning_rate": 0.002, "loss": 2.5616, "step": 377160 }, { "epoch": 0.7514065089889073, "grad_norm": 0.1743813455104828, "learning_rate": 0.002, "loss": 2.5587, "step": 377170 }, { "epoch": 0.7514264312125463, "grad_norm": 0.17952245473861694, "learning_rate": 0.002, "loss": 2.5708, "step": 377180 }, { "epoch": 0.7514463534361852, "grad_norm": 0.15468713641166687, "learning_rate": 0.002, "loss": 2.5518, "step": 377190 }, { "epoch": 0.751466275659824, "grad_norm": 0.141525000333786, "learning_rate": 0.002, "loss": 2.5483, "step": 377200 }, { "epoch": 0.7514861978834629, "grad_norm": 0.18890446424484253, "learning_rate": 0.002, "loss": 2.5516, "step": 377210 }, { "epoch": 0.7515061201071018, "grad_norm": 0.1490914225578308, "learning_rate": 0.002, "loss": 2.5481, "step": 377220 }, { "epoch": 0.7515260423307408, "grad_norm": 0.15528680384159088, "learning_rate": 0.002, "loss": 2.5462, "step": 377230 }, { "epoch": 0.7515459645543797, "grad_norm": 0.17721913754940033, "learning_rate": 0.002, "loss": 2.5453, "step": 377240 }, { "epoch": 0.7515658867780186, "grad_norm": 0.18527436256408691, "learning_rate": 0.002, "loss": 2.559, "step": 377250 }, { "epoch": 0.7515858090016575, "grad_norm": 0.1715461015701294, "learning_rate": 0.002, "loss": 2.5612, "step": 377260 }, { "epoch": 0.7516057312252964, "grad_norm": 0.15036581456661224, "learning_rate": 0.002, "loss": 2.5497, "step": 377270 }, { "epoch": 0.7516256534489354, "grad_norm": 0.16062495112419128, "learning_rate": 0.002, "loss": 2.5591, "step": 377280 }, { "epoch": 0.7516455756725743, "grad_norm": 0.14609301090240479, "learning_rate": 0.002, "loss": 2.5396, "step": 377290 }, { "epoch": 0.7516654978962132, "grad_norm": 0.16682538390159607, "learning_rate": 0.002, "loss": 2.5496, "step": 377300 }, { "epoch": 0.7516854201198521, "grad_norm": 0.20657820999622345, "learning_rate": 0.002, "loss": 2.552, "step": 377310 }, { "epoch": 0.751705342343491, "grad_norm": 0.1431986391544342, "learning_rate": 0.002, "loss": 2.5601, "step": 377320 }, { "epoch": 0.75172526456713, "grad_norm": 0.15715868771076202, "learning_rate": 0.002, "loss": 2.5463, "step": 377330 }, { "epoch": 0.7517451867907688, "grad_norm": 0.1762477308511734, "learning_rate": 0.002, "loss": 2.5428, "step": 377340 }, { "epoch": 0.7517651090144077, "grad_norm": 0.155330628156662, "learning_rate": 0.002, "loss": 2.5456, "step": 377350 }, { "epoch": 0.7517850312380466, "grad_norm": 0.2145216017961502, "learning_rate": 0.002, "loss": 2.5567, "step": 377360 }, { "epoch": 0.7518049534616856, "grad_norm": 0.1649121195077896, "learning_rate": 0.002, "loss": 2.5652, "step": 377370 }, { "epoch": 0.7518248756853245, "grad_norm": 0.1389816850423813, "learning_rate": 0.002, "loss": 2.5543, "step": 377380 }, { "epoch": 0.7518447979089634, "grad_norm": 0.14342953264713287, "learning_rate": 0.002, "loss": 2.5505, "step": 377390 }, { "epoch": 0.7518647201326023, "grad_norm": 0.16940222680568695, "learning_rate": 0.002, "loss": 2.5624, "step": 377400 }, { "epoch": 0.7518846423562412, "grad_norm": 0.20465883612632751, "learning_rate": 0.002, "loss": 2.5598, "step": 377410 }, { "epoch": 0.7519045645798802, "grad_norm": 0.1722247451543808, "learning_rate": 0.002, "loss": 2.5791, "step": 377420 }, { "epoch": 0.7519244868035191, "grad_norm": 0.15095041692256927, "learning_rate": 0.002, "loss": 2.5457, "step": 377430 }, { "epoch": 0.751944409027158, "grad_norm": 0.17190049588680267, "learning_rate": 0.002, "loss": 2.5525, "step": 377440 }, { "epoch": 0.7519643312507969, "grad_norm": 0.19970116019248962, "learning_rate": 0.002, "loss": 2.5592, "step": 377450 }, { "epoch": 0.7519842534744358, "grad_norm": 0.1366012990474701, "learning_rate": 0.002, "loss": 2.5465, "step": 377460 }, { "epoch": 0.7520041756980748, "grad_norm": 0.14865286648273468, "learning_rate": 0.002, "loss": 2.5601, "step": 377470 }, { "epoch": 0.7520240979217137, "grad_norm": 0.15411852300167084, "learning_rate": 0.002, "loss": 2.5588, "step": 377480 }, { "epoch": 0.7520440201453525, "grad_norm": 0.1647561490535736, "learning_rate": 0.002, "loss": 2.5531, "step": 377490 }, { "epoch": 0.7520639423689914, "grad_norm": 0.1693916618824005, "learning_rate": 0.002, "loss": 2.561, "step": 377500 }, { "epoch": 0.7520838645926303, "grad_norm": 0.14371038973331451, "learning_rate": 0.002, "loss": 2.5633, "step": 377510 }, { "epoch": 0.7521037868162693, "grad_norm": 0.18023280799388885, "learning_rate": 0.002, "loss": 2.5418, "step": 377520 }, { "epoch": 0.7521237090399082, "grad_norm": 0.15853197872638702, "learning_rate": 0.002, "loss": 2.5722, "step": 377530 }, { "epoch": 0.7521436312635471, "grad_norm": 0.15093538165092468, "learning_rate": 0.002, "loss": 2.5427, "step": 377540 }, { "epoch": 0.752163553487186, "grad_norm": 0.1491752713918686, "learning_rate": 0.002, "loss": 2.5531, "step": 377550 }, { "epoch": 0.7521834757108249, "grad_norm": 0.1453448385000229, "learning_rate": 0.002, "loss": 2.548, "step": 377560 }, { "epoch": 0.7522033979344639, "grad_norm": 0.18396054208278656, "learning_rate": 0.002, "loss": 2.5442, "step": 377570 }, { "epoch": 0.7522233201581028, "grad_norm": 0.1554652750492096, "learning_rate": 0.002, "loss": 2.5505, "step": 377580 }, { "epoch": 0.7522432423817417, "grad_norm": 0.15311013162136078, "learning_rate": 0.002, "loss": 2.5532, "step": 377590 }, { "epoch": 0.7522631646053806, "grad_norm": 0.1678386628627777, "learning_rate": 0.002, "loss": 2.5393, "step": 377600 }, { "epoch": 0.7522830868290195, "grad_norm": 0.1638116091489792, "learning_rate": 0.002, "loss": 2.5438, "step": 377610 }, { "epoch": 0.7523030090526585, "grad_norm": 0.15736235678195953, "learning_rate": 0.002, "loss": 2.5515, "step": 377620 }, { "epoch": 0.7523229312762973, "grad_norm": 0.14231657981872559, "learning_rate": 0.002, "loss": 2.5573, "step": 377630 }, { "epoch": 0.7523428534999362, "grad_norm": 0.20314890146255493, "learning_rate": 0.002, "loss": 2.5444, "step": 377640 }, { "epoch": 0.7523627757235751, "grad_norm": 0.16825945675373077, "learning_rate": 0.002, "loss": 2.5539, "step": 377650 }, { "epoch": 0.7523826979472141, "grad_norm": 0.14741197228431702, "learning_rate": 0.002, "loss": 2.5642, "step": 377660 }, { "epoch": 0.752402620170853, "grad_norm": 0.1853698343038559, "learning_rate": 0.002, "loss": 2.5504, "step": 377670 }, { "epoch": 0.7524225423944919, "grad_norm": 0.1621646136045456, "learning_rate": 0.002, "loss": 2.5501, "step": 377680 }, { "epoch": 0.7524424646181308, "grad_norm": 0.1647462099790573, "learning_rate": 0.002, "loss": 2.5397, "step": 377690 }, { "epoch": 0.7524623868417697, "grad_norm": 0.169899582862854, "learning_rate": 0.002, "loss": 2.5448, "step": 377700 }, { "epoch": 0.7524823090654087, "grad_norm": 0.18951766192913055, "learning_rate": 0.002, "loss": 2.5399, "step": 377710 }, { "epoch": 0.7525022312890476, "grad_norm": 0.16012191772460938, "learning_rate": 0.002, "loss": 2.5621, "step": 377720 }, { "epoch": 0.7525221535126865, "grad_norm": 0.18116705119609833, "learning_rate": 0.002, "loss": 2.5519, "step": 377730 }, { "epoch": 0.7525420757363254, "grad_norm": 0.1945320963859558, "learning_rate": 0.002, "loss": 2.5626, "step": 377740 }, { "epoch": 0.7525619979599643, "grad_norm": 0.14629705250263214, "learning_rate": 0.002, "loss": 2.5515, "step": 377750 }, { "epoch": 0.7525819201836033, "grad_norm": 0.14623147249221802, "learning_rate": 0.002, "loss": 2.5669, "step": 377760 }, { "epoch": 0.7526018424072421, "grad_norm": 0.2138354480266571, "learning_rate": 0.002, "loss": 2.5428, "step": 377770 }, { "epoch": 0.752621764630881, "grad_norm": 0.1779087334871292, "learning_rate": 0.002, "loss": 2.5579, "step": 377780 }, { "epoch": 0.7526416868545199, "grad_norm": 0.1529884785413742, "learning_rate": 0.002, "loss": 2.548, "step": 377790 }, { "epoch": 0.7526616090781588, "grad_norm": 0.1641337126493454, "learning_rate": 0.002, "loss": 2.5432, "step": 377800 }, { "epoch": 0.7526815313017978, "grad_norm": 0.16374680399894714, "learning_rate": 0.002, "loss": 2.5471, "step": 377810 }, { "epoch": 0.7527014535254367, "grad_norm": 0.14766626060009003, "learning_rate": 0.002, "loss": 2.5507, "step": 377820 }, { "epoch": 0.7527213757490756, "grad_norm": 0.17298689484596252, "learning_rate": 0.002, "loss": 2.5529, "step": 377830 }, { "epoch": 0.7527412979727145, "grad_norm": 0.17142292857170105, "learning_rate": 0.002, "loss": 2.5616, "step": 377840 }, { "epoch": 0.7527612201963534, "grad_norm": 0.1590338796377182, "learning_rate": 0.002, "loss": 2.5485, "step": 377850 }, { "epoch": 0.7527811424199924, "grad_norm": 0.16333569586277008, "learning_rate": 0.002, "loss": 2.5516, "step": 377860 }, { "epoch": 0.7528010646436313, "grad_norm": 0.1542879343032837, "learning_rate": 0.002, "loss": 2.5498, "step": 377870 }, { "epoch": 0.7528209868672702, "grad_norm": 0.15028579533100128, "learning_rate": 0.002, "loss": 2.5398, "step": 377880 }, { "epoch": 0.7528409090909091, "grad_norm": 0.17641885578632355, "learning_rate": 0.002, "loss": 2.5368, "step": 377890 }, { "epoch": 0.752860831314548, "grad_norm": 0.15530049800872803, "learning_rate": 0.002, "loss": 2.5611, "step": 377900 }, { "epoch": 0.752880753538187, "grad_norm": 0.13072967529296875, "learning_rate": 0.002, "loss": 2.5428, "step": 377910 }, { "epoch": 0.7529006757618258, "grad_norm": 0.14965558052062988, "learning_rate": 0.002, "loss": 2.562, "step": 377920 }, { "epoch": 0.7529205979854647, "grad_norm": 0.20103977620601654, "learning_rate": 0.002, "loss": 2.567, "step": 377930 }, { "epoch": 0.7529405202091036, "grad_norm": 0.1543739289045334, "learning_rate": 0.002, "loss": 2.5484, "step": 377940 }, { "epoch": 0.7529604424327426, "grad_norm": 0.15792424976825714, "learning_rate": 0.002, "loss": 2.5493, "step": 377950 }, { "epoch": 0.7529803646563815, "grad_norm": 0.1543775200843811, "learning_rate": 0.002, "loss": 2.5723, "step": 377960 }, { "epoch": 0.7530002868800204, "grad_norm": 0.16671133041381836, "learning_rate": 0.002, "loss": 2.5525, "step": 377970 }, { "epoch": 0.7530202091036593, "grad_norm": 0.1596480906009674, "learning_rate": 0.002, "loss": 2.5576, "step": 377980 }, { "epoch": 0.7530401313272982, "grad_norm": 0.1644425392150879, "learning_rate": 0.002, "loss": 2.5601, "step": 377990 }, { "epoch": 0.7530600535509372, "grad_norm": 0.15250994265079498, "learning_rate": 0.002, "loss": 2.5552, "step": 378000 }, { "epoch": 0.7530799757745761, "grad_norm": 0.18381023406982422, "learning_rate": 0.002, "loss": 2.5613, "step": 378010 }, { "epoch": 0.753099897998215, "grad_norm": 0.14673233032226562, "learning_rate": 0.002, "loss": 2.5542, "step": 378020 }, { "epoch": 0.7531198202218539, "grad_norm": 0.14119534194469452, "learning_rate": 0.002, "loss": 2.5584, "step": 378030 }, { "epoch": 0.7531397424454928, "grad_norm": 0.16047261655330658, "learning_rate": 0.002, "loss": 2.5514, "step": 378040 }, { "epoch": 0.7531596646691318, "grad_norm": 0.20491337776184082, "learning_rate": 0.002, "loss": 2.5443, "step": 378050 }, { "epoch": 0.7531795868927706, "grad_norm": 0.1481260359287262, "learning_rate": 0.002, "loss": 2.5634, "step": 378060 }, { "epoch": 0.7531995091164095, "grad_norm": 0.13568009436130524, "learning_rate": 0.002, "loss": 2.5524, "step": 378070 }, { "epoch": 0.7532194313400484, "grad_norm": 0.18719714879989624, "learning_rate": 0.002, "loss": 2.5643, "step": 378080 }, { "epoch": 0.7532393535636873, "grad_norm": 0.1639028936624527, "learning_rate": 0.002, "loss": 2.5446, "step": 378090 }, { "epoch": 0.7532592757873263, "grad_norm": 0.1599999964237213, "learning_rate": 0.002, "loss": 2.5446, "step": 378100 }, { "epoch": 0.7532791980109652, "grad_norm": 0.1747179478406906, "learning_rate": 0.002, "loss": 2.561, "step": 378110 }, { "epoch": 0.7532991202346041, "grad_norm": 0.1813706010580063, "learning_rate": 0.002, "loss": 2.5534, "step": 378120 }, { "epoch": 0.753319042458243, "grad_norm": 0.15981324017047882, "learning_rate": 0.002, "loss": 2.5626, "step": 378130 }, { "epoch": 0.7533389646818819, "grad_norm": 0.17091237008571625, "learning_rate": 0.002, "loss": 2.5519, "step": 378140 }, { "epoch": 0.7533588869055209, "grad_norm": 0.16158427298069, "learning_rate": 0.002, "loss": 2.5429, "step": 378150 }, { "epoch": 0.7533788091291598, "grad_norm": 0.19806621968746185, "learning_rate": 0.002, "loss": 2.5407, "step": 378160 }, { "epoch": 0.7533987313527987, "grad_norm": 0.1571761518716812, "learning_rate": 0.002, "loss": 2.5572, "step": 378170 }, { "epoch": 0.7534186535764376, "grad_norm": 0.1543751358985901, "learning_rate": 0.002, "loss": 2.5478, "step": 378180 }, { "epoch": 0.7534385758000764, "grad_norm": 0.160430446267128, "learning_rate": 0.002, "loss": 2.5532, "step": 378190 }, { "epoch": 0.7534584980237155, "grad_norm": 0.1807955950498581, "learning_rate": 0.002, "loss": 2.56, "step": 378200 }, { "epoch": 0.7534784202473543, "grad_norm": 0.15038283169269562, "learning_rate": 0.002, "loss": 2.5495, "step": 378210 }, { "epoch": 0.7534983424709932, "grad_norm": 0.1469138264656067, "learning_rate": 0.002, "loss": 2.5531, "step": 378220 }, { "epoch": 0.7535182646946321, "grad_norm": 0.16683173179626465, "learning_rate": 0.002, "loss": 2.5492, "step": 378230 }, { "epoch": 0.7535381869182711, "grad_norm": 0.1753186285495758, "learning_rate": 0.002, "loss": 2.5444, "step": 378240 }, { "epoch": 0.75355810914191, "grad_norm": 0.15860982239246368, "learning_rate": 0.002, "loss": 2.5582, "step": 378250 }, { "epoch": 0.7535780313655489, "grad_norm": 0.17024455964565277, "learning_rate": 0.002, "loss": 2.5547, "step": 378260 }, { "epoch": 0.7535979535891878, "grad_norm": 0.1651442050933838, "learning_rate": 0.002, "loss": 2.5461, "step": 378270 }, { "epoch": 0.7536178758128267, "grad_norm": 0.18808379769325256, "learning_rate": 0.002, "loss": 2.562, "step": 378280 }, { "epoch": 0.7536377980364657, "grad_norm": 0.16619868576526642, "learning_rate": 0.002, "loss": 2.5483, "step": 378290 }, { "epoch": 0.7536577202601046, "grad_norm": 0.16555050015449524, "learning_rate": 0.002, "loss": 2.5602, "step": 378300 }, { "epoch": 0.7536776424837435, "grad_norm": 0.16048485040664673, "learning_rate": 0.002, "loss": 2.5506, "step": 378310 }, { "epoch": 0.7536975647073824, "grad_norm": 0.1670294553041458, "learning_rate": 0.002, "loss": 2.5477, "step": 378320 }, { "epoch": 0.7537174869310213, "grad_norm": 0.1379077434539795, "learning_rate": 0.002, "loss": 2.5435, "step": 378330 }, { "epoch": 0.7537374091546603, "grad_norm": 0.21451213955879211, "learning_rate": 0.002, "loss": 2.5693, "step": 378340 }, { "epoch": 0.7537573313782991, "grad_norm": 0.2271403819322586, "learning_rate": 0.002, "loss": 2.5585, "step": 378350 }, { "epoch": 0.753777253601938, "grad_norm": 0.17237994074821472, "learning_rate": 0.002, "loss": 2.5622, "step": 378360 }, { "epoch": 0.7537971758255769, "grad_norm": 0.14289505779743195, "learning_rate": 0.002, "loss": 2.5599, "step": 378370 }, { "epoch": 0.7538170980492158, "grad_norm": 0.19488807022571564, "learning_rate": 0.002, "loss": 2.5573, "step": 378380 }, { "epoch": 0.7538370202728548, "grad_norm": 0.17294242978096008, "learning_rate": 0.002, "loss": 2.5429, "step": 378390 }, { "epoch": 0.7538569424964937, "grad_norm": 0.1686772108078003, "learning_rate": 0.002, "loss": 2.5524, "step": 378400 }, { "epoch": 0.7538768647201326, "grad_norm": 0.17128539085388184, "learning_rate": 0.002, "loss": 2.5583, "step": 378410 }, { "epoch": 0.7538967869437715, "grad_norm": 0.16943156719207764, "learning_rate": 0.002, "loss": 2.5642, "step": 378420 }, { "epoch": 0.7539167091674104, "grad_norm": 0.1575007289648056, "learning_rate": 0.002, "loss": 2.5606, "step": 378430 }, { "epoch": 0.7539366313910494, "grad_norm": 0.20598120987415314, "learning_rate": 0.002, "loss": 2.5551, "step": 378440 }, { "epoch": 0.7539565536146883, "grad_norm": 0.15204179286956787, "learning_rate": 0.002, "loss": 2.5397, "step": 378450 }, { "epoch": 0.7539764758383272, "grad_norm": 0.16776911914348602, "learning_rate": 0.002, "loss": 2.5588, "step": 378460 }, { "epoch": 0.7539963980619661, "grad_norm": 0.15740753710269928, "learning_rate": 0.002, "loss": 2.558, "step": 378470 }, { "epoch": 0.754016320285605, "grad_norm": 0.1570778638124466, "learning_rate": 0.002, "loss": 2.55, "step": 378480 }, { "epoch": 0.754036242509244, "grad_norm": 0.18187353014945984, "learning_rate": 0.002, "loss": 2.5601, "step": 378490 }, { "epoch": 0.7540561647328828, "grad_norm": 0.14020265638828278, "learning_rate": 0.002, "loss": 2.5514, "step": 378500 }, { "epoch": 0.7540760869565217, "grad_norm": 0.19318756461143494, "learning_rate": 0.002, "loss": 2.5504, "step": 378510 }, { "epoch": 0.7540960091801606, "grad_norm": 0.14052951335906982, "learning_rate": 0.002, "loss": 2.5512, "step": 378520 }, { "epoch": 0.7541159314037996, "grad_norm": 0.1700415164232254, "learning_rate": 0.002, "loss": 2.5552, "step": 378530 }, { "epoch": 0.7541358536274385, "grad_norm": 0.1699608862400055, "learning_rate": 0.002, "loss": 2.5653, "step": 378540 }, { "epoch": 0.7541557758510774, "grad_norm": 0.17644256353378296, "learning_rate": 0.002, "loss": 2.5564, "step": 378550 }, { "epoch": 0.7541756980747163, "grad_norm": 0.1822700947523117, "learning_rate": 0.002, "loss": 2.5456, "step": 378560 }, { "epoch": 0.7541956202983552, "grad_norm": 0.16221405565738678, "learning_rate": 0.002, "loss": 2.5529, "step": 378570 }, { "epoch": 0.7542155425219942, "grad_norm": 0.16334453225135803, "learning_rate": 0.002, "loss": 2.5547, "step": 378580 }, { "epoch": 0.7542354647456331, "grad_norm": 0.16810525953769684, "learning_rate": 0.002, "loss": 2.5542, "step": 378590 }, { "epoch": 0.754255386969272, "grad_norm": 0.1522928923368454, "learning_rate": 0.002, "loss": 2.5523, "step": 378600 }, { "epoch": 0.7542753091929109, "grad_norm": 0.1881783902645111, "learning_rate": 0.002, "loss": 2.5501, "step": 378610 }, { "epoch": 0.7542952314165497, "grad_norm": 0.16414298117160797, "learning_rate": 0.002, "loss": 2.5366, "step": 378620 }, { "epoch": 0.7543151536401888, "grad_norm": 0.15954507887363434, "learning_rate": 0.002, "loss": 2.5632, "step": 378630 }, { "epoch": 0.7543350758638276, "grad_norm": 0.15002115070819855, "learning_rate": 0.002, "loss": 2.5553, "step": 378640 }, { "epoch": 0.7543549980874665, "grad_norm": 0.17004874348640442, "learning_rate": 0.002, "loss": 2.548, "step": 378650 }, { "epoch": 0.7543749203111054, "grad_norm": 0.1521579772233963, "learning_rate": 0.002, "loss": 2.5483, "step": 378660 }, { "epoch": 0.7543948425347443, "grad_norm": 0.16323916614055634, "learning_rate": 0.002, "loss": 2.5735, "step": 378670 }, { "epoch": 0.7544147647583833, "grad_norm": 0.15410223603248596, "learning_rate": 0.002, "loss": 2.5499, "step": 378680 }, { "epoch": 0.7544346869820222, "grad_norm": 0.15238434076309204, "learning_rate": 0.002, "loss": 2.5288, "step": 378690 }, { "epoch": 0.7544546092056611, "grad_norm": 0.174372598528862, "learning_rate": 0.002, "loss": 2.5643, "step": 378700 }, { "epoch": 0.7544745314293, "grad_norm": 0.14204604923725128, "learning_rate": 0.002, "loss": 2.5564, "step": 378710 }, { "epoch": 0.7544944536529389, "grad_norm": 0.15086008608341217, "learning_rate": 0.002, "loss": 2.5478, "step": 378720 }, { "epoch": 0.7545143758765779, "grad_norm": 0.16678375005722046, "learning_rate": 0.002, "loss": 2.5697, "step": 378730 }, { "epoch": 0.7545342981002168, "grad_norm": 0.17085902392864227, "learning_rate": 0.002, "loss": 2.5492, "step": 378740 }, { "epoch": 0.7545542203238557, "grad_norm": 0.1614009588956833, "learning_rate": 0.002, "loss": 2.5554, "step": 378750 }, { "epoch": 0.7545741425474946, "grad_norm": 0.15385834872722626, "learning_rate": 0.002, "loss": 2.5564, "step": 378760 }, { "epoch": 0.7545940647711334, "grad_norm": 0.16794873774051666, "learning_rate": 0.002, "loss": 2.5369, "step": 378770 }, { "epoch": 0.7546139869947724, "grad_norm": 0.17588379979133606, "learning_rate": 0.002, "loss": 2.5538, "step": 378780 }, { "epoch": 0.7546339092184113, "grad_norm": 0.13346828520298004, "learning_rate": 0.002, "loss": 2.5335, "step": 378790 }, { "epoch": 0.7546538314420502, "grad_norm": 0.1906900256872177, "learning_rate": 0.002, "loss": 2.5492, "step": 378800 }, { "epoch": 0.7546737536656891, "grad_norm": 0.18824313580989838, "learning_rate": 0.002, "loss": 2.5435, "step": 378810 }, { "epoch": 0.754693675889328, "grad_norm": 0.14224159717559814, "learning_rate": 0.002, "loss": 2.557, "step": 378820 }, { "epoch": 0.754713598112967, "grad_norm": 0.16337427496910095, "learning_rate": 0.002, "loss": 2.5662, "step": 378830 }, { "epoch": 0.7547335203366059, "grad_norm": 0.14412353932857513, "learning_rate": 0.002, "loss": 2.5518, "step": 378840 }, { "epoch": 0.7547534425602448, "grad_norm": 0.18416237831115723, "learning_rate": 0.002, "loss": 2.5493, "step": 378850 }, { "epoch": 0.7547733647838837, "grad_norm": 0.15375454723834991, "learning_rate": 0.002, "loss": 2.5593, "step": 378860 }, { "epoch": 0.7547932870075227, "grad_norm": 0.1821860671043396, "learning_rate": 0.002, "loss": 2.5458, "step": 378870 }, { "epoch": 0.7548132092311616, "grad_norm": 0.1449919044971466, "learning_rate": 0.002, "loss": 2.5556, "step": 378880 }, { "epoch": 0.7548331314548005, "grad_norm": 0.20592869818210602, "learning_rate": 0.002, "loss": 2.541, "step": 378890 }, { "epoch": 0.7548530536784394, "grad_norm": 0.14430657029151917, "learning_rate": 0.002, "loss": 2.5531, "step": 378900 }, { "epoch": 0.7548729759020782, "grad_norm": 0.15399722754955292, "learning_rate": 0.002, "loss": 2.5649, "step": 378910 }, { "epoch": 0.7548928981257172, "grad_norm": 0.16523626446723938, "learning_rate": 0.002, "loss": 2.542, "step": 378920 }, { "epoch": 0.7549128203493561, "grad_norm": 0.1506102979183197, "learning_rate": 0.002, "loss": 2.5592, "step": 378930 }, { "epoch": 0.754932742572995, "grad_norm": 0.18097567558288574, "learning_rate": 0.002, "loss": 2.5558, "step": 378940 }, { "epoch": 0.7549526647966339, "grad_norm": 0.16772319376468658, "learning_rate": 0.002, "loss": 2.5672, "step": 378950 }, { "epoch": 0.7549725870202728, "grad_norm": 0.15119841694831848, "learning_rate": 0.002, "loss": 2.5578, "step": 378960 }, { "epoch": 0.7549925092439118, "grad_norm": 0.14189185202121735, "learning_rate": 0.002, "loss": 2.565, "step": 378970 }, { "epoch": 0.7550124314675507, "grad_norm": 0.15093711018562317, "learning_rate": 0.002, "loss": 2.5396, "step": 378980 }, { "epoch": 0.7550323536911896, "grad_norm": 0.1726623922586441, "learning_rate": 0.002, "loss": 2.56, "step": 378990 }, { "epoch": 0.7550522759148285, "grad_norm": 0.16694237291812897, "learning_rate": 0.002, "loss": 2.5523, "step": 379000 }, { "epoch": 0.7550721981384674, "grad_norm": 0.1810939759016037, "learning_rate": 0.002, "loss": 2.5515, "step": 379010 }, { "epoch": 0.7550921203621064, "grad_norm": 0.266801118850708, "learning_rate": 0.002, "loss": 2.5519, "step": 379020 }, { "epoch": 0.7551120425857453, "grad_norm": 0.2019360512495041, "learning_rate": 0.002, "loss": 2.5548, "step": 379030 }, { "epoch": 0.7551319648093842, "grad_norm": 0.16264115273952484, "learning_rate": 0.002, "loss": 2.5552, "step": 379040 }, { "epoch": 0.755151887033023, "grad_norm": 0.1659257858991623, "learning_rate": 0.002, "loss": 2.5654, "step": 379050 }, { "epoch": 0.7551718092566619, "grad_norm": 0.13764482736587524, "learning_rate": 0.002, "loss": 2.5534, "step": 379060 }, { "epoch": 0.7551917314803009, "grad_norm": 0.14699576795101166, "learning_rate": 0.002, "loss": 2.5529, "step": 379070 }, { "epoch": 0.7552116537039398, "grad_norm": 0.13604417443275452, "learning_rate": 0.002, "loss": 2.5665, "step": 379080 }, { "epoch": 0.7552315759275787, "grad_norm": 0.21556030213832855, "learning_rate": 0.002, "loss": 2.5631, "step": 379090 }, { "epoch": 0.7552514981512176, "grad_norm": 0.14466390013694763, "learning_rate": 0.002, "loss": 2.5553, "step": 379100 }, { "epoch": 0.7552714203748565, "grad_norm": 0.15267717838287354, "learning_rate": 0.002, "loss": 2.545, "step": 379110 }, { "epoch": 0.7552913425984955, "grad_norm": 0.13804584741592407, "learning_rate": 0.002, "loss": 2.5695, "step": 379120 }, { "epoch": 0.7553112648221344, "grad_norm": 0.17614485323429108, "learning_rate": 0.002, "loss": 2.5607, "step": 379130 }, { "epoch": 0.7553311870457733, "grad_norm": 0.14245834946632385, "learning_rate": 0.002, "loss": 2.543, "step": 379140 }, { "epoch": 0.7553511092694122, "grad_norm": 0.16374103724956512, "learning_rate": 0.002, "loss": 2.5443, "step": 379150 }, { "epoch": 0.7553710314930512, "grad_norm": 0.17038364708423615, "learning_rate": 0.002, "loss": 2.5585, "step": 379160 }, { "epoch": 0.7553909537166901, "grad_norm": 0.16696754097938538, "learning_rate": 0.002, "loss": 2.5408, "step": 379170 }, { "epoch": 0.755410875940329, "grad_norm": 0.16846659779548645, "learning_rate": 0.002, "loss": 2.5299, "step": 379180 }, { "epoch": 0.7554307981639679, "grad_norm": 0.16439971327781677, "learning_rate": 0.002, "loss": 2.5539, "step": 379190 }, { "epoch": 0.7554507203876067, "grad_norm": 0.14642493426799774, "learning_rate": 0.002, "loss": 2.5373, "step": 379200 }, { "epoch": 0.7554706426112457, "grad_norm": 0.18697577714920044, "learning_rate": 0.002, "loss": 2.5576, "step": 379210 }, { "epoch": 0.7554905648348846, "grad_norm": 0.17981566488742828, "learning_rate": 0.002, "loss": 2.5557, "step": 379220 }, { "epoch": 0.7555104870585235, "grad_norm": 0.15478704869747162, "learning_rate": 0.002, "loss": 2.5593, "step": 379230 }, { "epoch": 0.7555304092821624, "grad_norm": 0.15764741599559784, "learning_rate": 0.002, "loss": 2.5712, "step": 379240 }, { "epoch": 0.7555503315058013, "grad_norm": 0.1614643931388855, "learning_rate": 0.002, "loss": 2.5683, "step": 379250 }, { "epoch": 0.7555702537294403, "grad_norm": 0.16979187726974487, "learning_rate": 0.002, "loss": 2.5704, "step": 379260 }, { "epoch": 0.7555901759530792, "grad_norm": 0.1944737732410431, "learning_rate": 0.002, "loss": 2.5699, "step": 379270 }, { "epoch": 0.7556100981767181, "grad_norm": 0.16289955377578735, "learning_rate": 0.002, "loss": 2.5468, "step": 379280 }, { "epoch": 0.755630020400357, "grad_norm": 0.1960143893957138, "learning_rate": 0.002, "loss": 2.5468, "step": 379290 }, { "epoch": 0.7556499426239959, "grad_norm": 0.14604206383228302, "learning_rate": 0.002, "loss": 2.5601, "step": 379300 }, { "epoch": 0.7556698648476349, "grad_norm": 0.1810968518257141, "learning_rate": 0.002, "loss": 2.5461, "step": 379310 }, { "epoch": 0.7556897870712738, "grad_norm": 0.16254863142967224, "learning_rate": 0.002, "loss": 2.5538, "step": 379320 }, { "epoch": 0.7557097092949127, "grad_norm": 0.17668533325195312, "learning_rate": 0.002, "loss": 2.5608, "step": 379330 }, { "epoch": 0.7557296315185515, "grad_norm": 0.17830342054367065, "learning_rate": 0.002, "loss": 2.5428, "step": 379340 }, { "epoch": 0.7557495537421904, "grad_norm": 0.1782982349395752, "learning_rate": 0.002, "loss": 2.5732, "step": 379350 }, { "epoch": 0.7557694759658294, "grad_norm": 0.14069390296936035, "learning_rate": 0.002, "loss": 2.573, "step": 379360 }, { "epoch": 0.7557893981894683, "grad_norm": 0.18334943056106567, "learning_rate": 0.002, "loss": 2.5434, "step": 379370 }, { "epoch": 0.7558093204131072, "grad_norm": 0.16777926683425903, "learning_rate": 0.002, "loss": 2.5598, "step": 379380 }, { "epoch": 0.7558292426367461, "grad_norm": 0.1651909351348877, "learning_rate": 0.002, "loss": 2.5457, "step": 379390 }, { "epoch": 0.755849164860385, "grad_norm": 0.167683407664299, "learning_rate": 0.002, "loss": 2.5388, "step": 379400 }, { "epoch": 0.755869087084024, "grad_norm": 0.1787596195936203, "learning_rate": 0.002, "loss": 2.5388, "step": 379410 }, { "epoch": 0.7558890093076629, "grad_norm": 0.16417653858661652, "learning_rate": 0.002, "loss": 2.5338, "step": 379420 }, { "epoch": 0.7559089315313018, "grad_norm": 0.18469743430614471, "learning_rate": 0.002, "loss": 2.554, "step": 379430 }, { "epoch": 0.7559288537549407, "grad_norm": 0.18163429200649261, "learning_rate": 0.002, "loss": 2.5547, "step": 379440 }, { "epoch": 0.7559487759785797, "grad_norm": 0.1507771760225296, "learning_rate": 0.002, "loss": 2.5458, "step": 379450 }, { "epoch": 0.7559686982022186, "grad_norm": 0.18485017120838165, "learning_rate": 0.002, "loss": 2.5536, "step": 379460 }, { "epoch": 0.7559886204258575, "grad_norm": 0.1691247820854187, "learning_rate": 0.002, "loss": 2.56, "step": 379470 }, { "epoch": 0.7560085426494964, "grad_norm": 0.16102832555770874, "learning_rate": 0.002, "loss": 2.55, "step": 379480 }, { "epoch": 0.7560284648731352, "grad_norm": 0.174394428730011, "learning_rate": 0.002, "loss": 2.5453, "step": 379490 }, { "epoch": 0.7560483870967742, "grad_norm": 0.16978687047958374, "learning_rate": 0.002, "loss": 2.5507, "step": 379500 }, { "epoch": 0.7560683093204131, "grad_norm": 0.1583884060382843, "learning_rate": 0.002, "loss": 2.5562, "step": 379510 }, { "epoch": 0.756088231544052, "grad_norm": 0.15689383447170258, "learning_rate": 0.002, "loss": 2.5565, "step": 379520 }, { "epoch": 0.7561081537676909, "grad_norm": 0.16922415792942047, "learning_rate": 0.002, "loss": 2.5491, "step": 379530 }, { "epoch": 0.7561280759913298, "grad_norm": 0.17251306772232056, "learning_rate": 0.002, "loss": 2.5522, "step": 379540 }, { "epoch": 0.7561479982149688, "grad_norm": 0.17889884114265442, "learning_rate": 0.002, "loss": 2.5373, "step": 379550 }, { "epoch": 0.7561679204386077, "grad_norm": 0.18343086540699005, "learning_rate": 0.002, "loss": 2.5666, "step": 379560 }, { "epoch": 0.7561878426622466, "grad_norm": 0.1647200584411621, "learning_rate": 0.002, "loss": 2.5503, "step": 379570 }, { "epoch": 0.7562077648858855, "grad_norm": 0.16407178342342377, "learning_rate": 0.002, "loss": 2.5569, "step": 379580 }, { "epoch": 0.7562276871095244, "grad_norm": 0.17544735968112946, "learning_rate": 0.002, "loss": 2.5608, "step": 379590 }, { "epoch": 0.7562476093331634, "grad_norm": 0.13851840794086456, "learning_rate": 0.002, "loss": 2.556, "step": 379600 }, { "epoch": 0.7562675315568023, "grad_norm": 0.14971305429935455, "learning_rate": 0.002, "loss": 2.5578, "step": 379610 }, { "epoch": 0.7562874537804412, "grad_norm": 0.19733554124832153, "learning_rate": 0.002, "loss": 2.5703, "step": 379620 }, { "epoch": 0.75630737600408, "grad_norm": 0.15465599298477173, "learning_rate": 0.002, "loss": 2.5467, "step": 379630 }, { "epoch": 0.7563272982277189, "grad_norm": 0.179335817694664, "learning_rate": 0.002, "loss": 2.5416, "step": 379640 }, { "epoch": 0.7563472204513579, "grad_norm": 0.17090685665607452, "learning_rate": 0.002, "loss": 2.549, "step": 379650 }, { "epoch": 0.7563671426749968, "grad_norm": 0.1583327203989029, "learning_rate": 0.002, "loss": 2.5666, "step": 379660 }, { "epoch": 0.7563870648986357, "grad_norm": 0.16204452514648438, "learning_rate": 0.002, "loss": 2.5306, "step": 379670 }, { "epoch": 0.7564069871222746, "grad_norm": 0.16446606814861298, "learning_rate": 0.002, "loss": 2.548, "step": 379680 }, { "epoch": 0.7564269093459135, "grad_norm": 0.149882510304451, "learning_rate": 0.002, "loss": 2.5536, "step": 379690 }, { "epoch": 0.7564468315695525, "grad_norm": 0.1640254110097885, "learning_rate": 0.002, "loss": 2.5503, "step": 379700 }, { "epoch": 0.7564667537931914, "grad_norm": 0.18483921885490417, "learning_rate": 0.002, "loss": 2.5529, "step": 379710 }, { "epoch": 0.7564866760168303, "grad_norm": 0.19907823204994202, "learning_rate": 0.002, "loss": 2.5602, "step": 379720 }, { "epoch": 0.7565065982404692, "grad_norm": 0.16173993051052094, "learning_rate": 0.002, "loss": 2.5553, "step": 379730 }, { "epoch": 0.7565265204641082, "grad_norm": 0.16154152154922485, "learning_rate": 0.002, "loss": 2.5681, "step": 379740 }, { "epoch": 0.7565464426877471, "grad_norm": 0.15572500228881836, "learning_rate": 0.002, "loss": 2.5489, "step": 379750 }, { "epoch": 0.756566364911386, "grad_norm": 0.18970359861850739, "learning_rate": 0.002, "loss": 2.5634, "step": 379760 }, { "epoch": 0.7565862871350248, "grad_norm": 0.16983243823051453, "learning_rate": 0.002, "loss": 2.548, "step": 379770 }, { "epoch": 0.7566062093586637, "grad_norm": 0.13761377334594727, "learning_rate": 0.002, "loss": 2.5588, "step": 379780 }, { "epoch": 0.7566261315823027, "grad_norm": 0.1704823076725006, "learning_rate": 0.002, "loss": 2.5472, "step": 379790 }, { "epoch": 0.7566460538059416, "grad_norm": 0.16800229251384735, "learning_rate": 0.002, "loss": 2.5519, "step": 379800 }, { "epoch": 0.7566659760295805, "grad_norm": 0.17313504219055176, "learning_rate": 0.002, "loss": 2.5629, "step": 379810 }, { "epoch": 0.7566858982532194, "grad_norm": 0.1573392003774643, "learning_rate": 0.002, "loss": 2.5617, "step": 379820 }, { "epoch": 0.7567058204768583, "grad_norm": 0.23760122060775757, "learning_rate": 0.002, "loss": 2.5501, "step": 379830 }, { "epoch": 0.7567257427004973, "grad_norm": 0.17332088947296143, "learning_rate": 0.002, "loss": 2.5491, "step": 379840 }, { "epoch": 0.7567456649241362, "grad_norm": 0.1876550316810608, "learning_rate": 0.002, "loss": 2.5509, "step": 379850 }, { "epoch": 0.7567655871477751, "grad_norm": 0.1588282287120819, "learning_rate": 0.002, "loss": 2.5575, "step": 379860 }, { "epoch": 0.756785509371414, "grad_norm": 0.17807942628860474, "learning_rate": 0.002, "loss": 2.5674, "step": 379870 }, { "epoch": 0.7568054315950529, "grad_norm": 0.16326193511486053, "learning_rate": 0.002, "loss": 2.5586, "step": 379880 }, { "epoch": 0.7568253538186919, "grad_norm": 0.1714177131652832, "learning_rate": 0.002, "loss": 2.5722, "step": 379890 }, { "epoch": 0.7568452760423308, "grad_norm": 0.1684613674879074, "learning_rate": 0.002, "loss": 2.5547, "step": 379900 }, { "epoch": 0.7568651982659697, "grad_norm": 0.19937774538993835, "learning_rate": 0.002, "loss": 2.5527, "step": 379910 }, { "epoch": 0.7568851204896085, "grad_norm": 0.16603267192840576, "learning_rate": 0.002, "loss": 2.5547, "step": 379920 }, { "epoch": 0.7569050427132474, "grad_norm": 0.1443759948015213, "learning_rate": 0.002, "loss": 2.5546, "step": 379930 }, { "epoch": 0.7569249649368864, "grad_norm": 0.2050207108259201, "learning_rate": 0.002, "loss": 2.5539, "step": 379940 }, { "epoch": 0.7569448871605253, "grad_norm": 0.16799497604370117, "learning_rate": 0.002, "loss": 2.5599, "step": 379950 }, { "epoch": 0.7569648093841642, "grad_norm": 0.17505741119384766, "learning_rate": 0.002, "loss": 2.5546, "step": 379960 }, { "epoch": 0.7569847316078031, "grad_norm": 0.14767992496490479, "learning_rate": 0.002, "loss": 2.5486, "step": 379970 }, { "epoch": 0.757004653831442, "grad_norm": 0.1377810686826706, "learning_rate": 0.002, "loss": 2.5496, "step": 379980 }, { "epoch": 0.757024576055081, "grad_norm": 0.18642671406269073, "learning_rate": 0.002, "loss": 2.5665, "step": 379990 }, { "epoch": 0.7570444982787199, "grad_norm": 0.172836035490036, "learning_rate": 0.002, "loss": 2.55, "step": 380000 }, { "epoch": 0.7570644205023588, "grad_norm": 0.153257817029953, "learning_rate": 0.002, "loss": 2.5497, "step": 380010 }, { "epoch": 0.7570843427259977, "grad_norm": 0.24519972503185272, "learning_rate": 0.002, "loss": 2.5492, "step": 380020 }, { "epoch": 0.7571042649496367, "grad_norm": 0.14897164702415466, "learning_rate": 0.002, "loss": 2.5575, "step": 380030 }, { "epoch": 0.7571241871732756, "grad_norm": 0.1579451858997345, "learning_rate": 0.002, "loss": 2.5349, "step": 380040 }, { "epoch": 0.7571441093969145, "grad_norm": 0.17146621644496918, "learning_rate": 0.002, "loss": 2.5573, "step": 380050 }, { "epoch": 0.7571640316205533, "grad_norm": 0.1892847865819931, "learning_rate": 0.002, "loss": 2.5534, "step": 380060 }, { "epoch": 0.7571839538441922, "grad_norm": 0.1519077569246292, "learning_rate": 0.002, "loss": 2.5476, "step": 380070 }, { "epoch": 0.7572038760678312, "grad_norm": 0.17025789618492126, "learning_rate": 0.002, "loss": 2.5502, "step": 380080 }, { "epoch": 0.7572237982914701, "grad_norm": 0.1721723973751068, "learning_rate": 0.002, "loss": 2.539, "step": 380090 }, { "epoch": 0.757243720515109, "grad_norm": 0.15937282145023346, "learning_rate": 0.002, "loss": 2.5426, "step": 380100 }, { "epoch": 0.7572636427387479, "grad_norm": 0.18996945023536682, "learning_rate": 0.002, "loss": 2.5483, "step": 380110 }, { "epoch": 0.7572835649623868, "grad_norm": 0.1571091115474701, "learning_rate": 0.002, "loss": 2.5581, "step": 380120 }, { "epoch": 0.7573034871860258, "grad_norm": 0.15792404115200043, "learning_rate": 0.002, "loss": 2.56, "step": 380130 }, { "epoch": 0.7573234094096647, "grad_norm": 0.15521752834320068, "learning_rate": 0.002, "loss": 2.5503, "step": 380140 }, { "epoch": 0.7573433316333036, "grad_norm": 0.17940035462379456, "learning_rate": 0.002, "loss": 2.5625, "step": 380150 }, { "epoch": 0.7573632538569425, "grad_norm": 0.17650459706783295, "learning_rate": 0.002, "loss": 2.5512, "step": 380160 }, { "epoch": 0.7573831760805814, "grad_norm": 0.16028572618961334, "learning_rate": 0.002, "loss": 2.5571, "step": 380170 }, { "epoch": 0.7574030983042204, "grad_norm": 0.1494506299495697, "learning_rate": 0.002, "loss": 2.5455, "step": 380180 }, { "epoch": 0.7574230205278593, "grad_norm": 0.15393690764904022, "learning_rate": 0.002, "loss": 2.5571, "step": 380190 }, { "epoch": 0.7574429427514981, "grad_norm": 0.1792823076248169, "learning_rate": 0.002, "loss": 2.5505, "step": 380200 }, { "epoch": 0.757462864975137, "grad_norm": 0.15964728593826294, "learning_rate": 0.002, "loss": 2.5259, "step": 380210 }, { "epoch": 0.7574827871987759, "grad_norm": 0.15690506994724274, "learning_rate": 0.002, "loss": 2.547, "step": 380220 }, { "epoch": 0.7575027094224149, "grad_norm": 0.20741671323776245, "learning_rate": 0.002, "loss": 2.5396, "step": 380230 }, { "epoch": 0.7575226316460538, "grad_norm": 0.17194180190563202, "learning_rate": 0.002, "loss": 2.5624, "step": 380240 }, { "epoch": 0.7575425538696927, "grad_norm": 0.20433714985847473, "learning_rate": 0.002, "loss": 2.5483, "step": 380250 }, { "epoch": 0.7575624760933316, "grad_norm": 0.14438645541667938, "learning_rate": 0.002, "loss": 2.5657, "step": 380260 }, { "epoch": 0.7575823983169705, "grad_norm": 0.15362539887428284, "learning_rate": 0.002, "loss": 2.5552, "step": 380270 }, { "epoch": 0.7576023205406095, "grad_norm": 0.14209270477294922, "learning_rate": 0.002, "loss": 2.5627, "step": 380280 }, { "epoch": 0.7576222427642484, "grad_norm": 0.1766948103904724, "learning_rate": 0.002, "loss": 2.5589, "step": 380290 }, { "epoch": 0.7576421649878873, "grad_norm": 0.1660282015800476, "learning_rate": 0.002, "loss": 2.5587, "step": 380300 }, { "epoch": 0.7576620872115262, "grad_norm": 0.164915069937706, "learning_rate": 0.002, "loss": 2.5556, "step": 380310 }, { "epoch": 0.7576820094351651, "grad_norm": 0.1552526354789734, "learning_rate": 0.002, "loss": 2.5463, "step": 380320 }, { "epoch": 0.7577019316588041, "grad_norm": 0.17691831290721893, "learning_rate": 0.002, "loss": 2.5711, "step": 380330 }, { "epoch": 0.757721853882443, "grad_norm": 0.17738807201385498, "learning_rate": 0.002, "loss": 2.5599, "step": 380340 }, { "epoch": 0.7577417761060818, "grad_norm": 0.1456904113292694, "learning_rate": 0.002, "loss": 2.5503, "step": 380350 }, { "epoch": 0.7577616983297207, "grad_norm": 0.1762586236000061, "learning_rate": 0.002, "loss": 2.5599, "step": 380360 }, { "epoch": 0.7577816205533597, "grad_norm": 0.15384246408939362, "learning_rate": 0.002, "loss": 2.5562, "step": 380370 }, { "epoch": 0.7578015427769986, "grad_norm": 0.17291660606861115, "learning_rate": 0.002, "loss": 2.5553, "step": 380380 }, { "epoch": 0.7578214650006375, "grad_norm": 0.1675748974084854, "learning_rate": 0.002, "loss": 2.5532, "step": 380390 }, { "epoch": 0.7578413872242764, "grad_norm": 0.17047680914402008, "learning_rate": 0.002, "loss": 2.5273, "step": 380400 }, { "epoch": 0.7578613094479153, "grad_norm": 0.192280575633049, "learning_rate": 0.002, "loss": 2.5521, "step": 380410 }, { "epoch": 0.7578812316715543, "grad_norm": 0.13108167052268982, "learning_rate": 0.002, "loss": 2.5511, "step": 380420 }, { "epoch": 0.7579011538951932, "grad_norm": 0.1646409034729004, "learning_rate": 0.002, "loss": 2.5644, "step": 380430 }, { "epoch": 0.7579210761188321, "grad_norm": 0.1600094437599182, "learning_rate": 0.002, "loss": 2.5556, "step": 380440 }, { "epoch": 0.757940998342471, "grad_norm": 0.13446560502052307, "learning_rate": 0.002, "loss": 2.5603, "step": 380450 }, { "epoch": 0.7579609205661099, "grad_norm": 0.18076303601264954, "learning_rate": 0.002, "loss": 2.5355, "step": 380460 }, { "epoch": 0.7579808427897489, "grad_norm": 0.13927674293518066, "learning_rate": 0.002, "loss": 2.5613, "step": 380470 }, { "epoch": 0.7580007650133878, "grad_norm": 0.1787630170583725, "learning_rate": 0.002, "loss": 2.5492, "step": 380480 }, { "epoch": 0.7580206872370266, "grad_norm": 0.19354137778282166, "learning_rate": 0.002, "loss": 2.5583, "step": 380490 }, { "epoch": 0.7580406094606655, "grad_norm": 0.15999412536621094, "learning_rate": 0.002, "loss": 2.5662, "step": 380500 }, { "epoch": 0.7580605316843044, "grad_norm": 0.17730440199375153, "learning_rate": 0.002, "loss": 2.5463, "step": 380510 }, { "epoch": 0.7580804539079434, "grad_norm": 0.15786123275756836, "learning_rate": 0.002, "loss": 2.5493, "step": 380520 }, { "epoch": 0.7581003761315823, "grad_norm": 0.16324713826179504, "learning_rate": 0.002, "loss": 2.5639, "step": 380530 }, { "epoch": 0.7581202983552212, "grad_norm": 0.16148634254932404, "learning_rate": 0.002, "loss": 2.5637, "step": 380540 }, { "epoch": 0.7581402205788601, "grad_norm": 0.17743059992790222, "learning_rate": 0.002, "loss": 2.5647, "step": 380550 }, { "epoch": 0.758160142802499, "grad_norm": 0.19565340876579285, "learning_rate": 0.002, "loss": 2.5425, "step": 380560 }, { "epoch": 0.758180065026138, "grad_norm": 0.14830340445041656, "learning_rate": 0.002, "loss": 2.5517, "step": 380570 }, { "epoch": 0.7581999872497769, "grad_norm": 0.15805600583553314, "learning_rate": 0.002, "loss": 2.5486, "step": 380580 }, { "epoch": 0.7582199094734158, "grad_norm": 0.17307353019714355, "learning_rate": 0.002, "loss": 2.5583, "step": 380590 }, { "epoch": 0.7582398316970547, "grad_norm": 0.17381593585014343, "learning_rate": 0.002, "loss": 2.5439, "step": 380600 }, { "epoch": 0.7582597539206936, "grad_norm": 0.15895508229732513, "learning_rate": 0.002, "loss": 2.5624, "step": 380610 }, { "epoch": 0.7582796761443326, "grad_norm": 0.15403760969638824, "learning_rate": 0.002, "loss": 2.5573, "step": 380620 }, { "epoch": 0.7582995983679715, "grad_norm": 0.14195571839809418, "learning_rate": 0.002, "loss": 2.5598, "step": 380630 }, { "epoch": 0.7583195205916103, "grad_norm": 0.1944984644651413, "learning_rate": 0.002, "loss": 2.5463, "step": 380640 }, { "epoch": 0.7583394428152492, "grad_norm": 0.143406480550766, "learning_rate": 0.002, "loss": 2.5549, "step": 380650 }, { "epoch": 0.7583593650388882, "grad_norm": 0.18407690525054932, "learning_rate": 0.002, "loss": 2.5723, "step": 380660 }, { "epoch": 0.7583792872625271, "grad_norm": 0.17305582761764526, "learning_rate": 0.002, "loss": 2.5602, "step": 380670 }, { "epoch": 0.758399209486166, "grad_norm": 0.18417207896709442, "learning_rate": 0.002, "loss": 2.5543, "step": 380680 }, { "epoch": 0.7584191317098049, "grad_norm": 0.16391824185848236, "learning_rate": 0.002, "loss": 2.5587, "step": 380690 }, { "epoch": 0.7584390539334438, "grad_norm": 0.18428808450698853, "learning_rate": 0.002, "loss": 2.5495, "step": 380700 }, { "epoch": 0.7584589761570828, "grad_norm": 0.16326020658016205, "learning_rate": 0.002, "loss": 2.554, "step": 380710 }, { "epoch": 0.7584788983807217, "grad_norm": 0.19323483109474182, "learning_rate": 0.002, "loss": 2.5504, "step": 380720 }, { "epoch": 0.7584988206043606, "grad_norm": 0.1924670785665512, "learning_rate": 0.002, "loss": 2.5554, "step": 380730 }, { "epoch": 0.7585187428279995, "grad_norm": 0.15345029532909393, "learning_rate": 0.002, "loss": 2.5647, "step": 380740 }, { "epoch": 0.7585386650516384, "grad_norm": 0.15979766845703125, "learning_rate": 0.002, "loss": 2.5526, "step": 380750 }, { "epoch": 0.7585585872752774, "grad_norm": 0.19018910825252533, "learning_rate": 0.002, "loss": 2.5563, "step": 380760 }, { "epoch": 0.7585785094989163, "grad_norm": 0.18237543106079102, "learning_rate": 0.002, "loss": 2.5506, "step": 380770 }, { "epoch": 0.7585984317225551, "grad_norm": 0.17545343935489655, "learning_rate": 0.002, "loss": 2.5526, "step": 380780 }, { "epoch": 0.758618353946194, "grad_norm": 0.1787368357181549, "learning_rate": 0.002, "loss": 2.5406, "step": 380790 }, { "epoch": 0.7586382761698329, "grad_norm": 0.17290546000003815, "learning_rate": 0.002, "loss": 2.5427, "step": 380800 }, { "epoch": 0.7586581983934719, "grad_norm": 0.15558728575706482, "learning_rate": 0.002, "loss": 2.5605, "step": 380810 }, { "epoch": 0.7586781206171108, "grad_norm": 0.16169512271881104, "learning_rate": 0.002, "loss": 2.5591, "step": 380820 }, { "epoch": 0.7586980428407497, "grad_norm": 0.21008311212062836, "learning_rate": 0.002, "loss": 2.5492, "step": 380830 }, { "epoch": 0.7587179650643886, "grad_norm": 0.18358689546585083, "learning_rate": 0.002, "loss": 2.5578, "step": 380840 }, { "epoch": 0.7587378872880275, "grad_norm": 0.15928927063941956, "learning_rate": 0.002, "loss": 2.5556, "step": 380850 }, { "epoch": 0.7587578095116665, "grad_norm": 0.16683341562747955, "learning_rate": 0.002, "loss": 2.5555, "step": 380860 }, { "epoch": 0.7587777317353054, "grad_norm": 0.14042404294013977, "learning_rate": 0.002, "loss": 2.5635, "step": 380870 }, { "epoch": 0.7587976539589443, "grad_norm": 0.1676757037639618, "learning_rate": 0.002, "loss": 2.5561, "step": 380880 }, { "epoch": 0.7588175761825832, "grad_norm": 0.13892611861228943, "learning_rate": 0.002, "loss": 2.5459, "step": 380890 }, { "epoch": 0.758837498406222, "grad_norm": 0.13659147918224335, "learning_rate": 0.002, "loss": 2.5703, "step": 380900 }, { "epoch": 0.7588574206298611, "grad_norm": 0.17229366302490234, "learning_rate": 0.002, "loss": 2.5537, "step": 380910 }, { "epoch": 0.7588773428535, "grad_norm": 0.18090049922466278, "learning_rate": 0.002, "loss": 2.5434, "step": 380920 }, { "epoch": 0.7588972650771388, "grad_norm": 0.18708425760269165, "learning_rate": 0.002, "loss": 2.5688, "step": 380930 }, { "epoch": 0.7589171873007777, "grad_norm": 0.17110379040241241, "learning_rate": 0.002, "loss": 2.5499, "step": 380940 }, { "epoch": 0.7589371095244167, "grad_norm": 0.19284172356128693, "learning_rate": 0.002, "loss": 2.5517, "step": 380950 }, { "epoch": 0.7589570317480556, "grad_norm": 0.1743183583021164, "learning_rate": 0.002, "loss": 2.5545, "step": 380960 }, { "epoch": 0.7589769539716945, "grad_norm": 0.15870802104473114, "learning_rate": 0.002, "loss": 2.5455, "step": 380970 }, { "epoch": 0.7589968761953334, "grad_norm": 0.16505636274814606, "learning_rate": 0.002, "loss": 2.5594, "step": 380980 }, { "epoch": 0.7590167984189723, "grad_norm": 0.1674983650445938, "learning_rate": 0.002, "loss": 2.5505, "step": 380990 }, { "epoch": 0.7590367206426113, "grad_norm": 0.155597522854805, "learning_rate": 0.002, "loss": 2.5593, "step": 381000 }, { "epoch": 0.7590566428662502, "grad_norm": 0.18069691956043243, "learning_rate": 0.002, "loss": 2.5552, "step": 381010 }, { "epoch": 0.7590765650898891, "grad_norm": 0.15133675932884216, "learning_rate": 0.002, "loss": 2.5545, "step": 381020 }, { "epoch": 0.759096487313528, "grad_norm": 0.1689232587814331, "learning_rate": 0.002, "loss": 2.5612, "step": 381030 }, { "epoch": 0.7591164095371669, "grad_norm": 0.15903857350349426, "learning_rate": 0.002, "loss": 2.5612, "step": 381040 }, { "epoch": 0.7591363317608059, "grad_norm": 0.14960896968841553, "learning_rate": 0.002, "loss": 2.5573, "step": 381050 }, { "epoch": 0.7591562539844448, "grad_norm": 0.16740676760673523, "learning_rate": 0.002, "loss": 2.5375, "step": 381060 }, { "epoch": 0.7591761762080836, "grad_norm": 0.201837420463562, "learning_rate": 0.002, "loss": 2.5554, "step": 381070 }, { "epoch": 0.7591960984317225, "grad_norm": 0.14965608716011047, "learning_rate": 0.002, "loss": 2.5666, "step": 381080 }, { "epoch": 0.7592160206553614, "grad_norm": 0.17157192528247833, "learning_rate": 0.002, "loss": 2.5692, "step": 381090 }, { "epoch": 0.7592359428790004, "grad_norm": 0.19286026060581207, "learning_rate": 0.002, "loss": 2.5532, "step": 381100 }, { "epoch": 0.7592558651026393, "grad_norm": 0.1559055894613266, "learning_rate": 0.002, "loss": 2.5814, "step": 381110 }, { "epoch": 0.7592757873262782, "grad_norm": 0.13274896144866943, "learning_rate": 0.002, "loss": 2.5652, "step": 381120 }, { "epoch": 0.7592957095499171, "grad_norm": 0.1534903198480606, "learning_rate": 0.002, "loss": 2.5473, "step": 381130 }, { "epoch": 0.759315631773556, "grad_norm": 0.20973002910614014, "learning_rate": 0.002, "loss": 2.5701, "step": 381140 }, { "epoch": 0.759335553997195, "grad_norm": 0.16154427826404572, "learning_rate": 0.002, "loss": 2.5596, "step": 381150 }, { "epoch": 0.7593554762208339, "grad_norm": 0.1450817584991455, "learning_rate": 0.002, "loss": 2.5343, "step": 381160 }, { "epoch": 0.7593753984444728, "grad_norm": 0.1556103378534317, "learning_rate": 0.002, "loss": 2.558, "step": 381170 }, { "epoch": 0.7593953206681117, "grad_norm": 0.1859055459499359, "learning_rate": 0.002, "loss": 2.5535, "step": 381180 }, { "epoch": 0.7594152428917506, "grad_norm": 0.16568623483181, "learning_rate": 0.002, "loss": 2.5609, "step": 381190 }, { "epoch": 0.7594351651153896, "grad_norm": 0.15319940447807312, "learning_rate": 0.002, "loss": 2.5486, "step": 381200 }, { "epoch": 0.7594550873390284, "grad_norm": 0.19336964190006256, "learning_rate": 0.002, "loss": 2.5375, "step": 381210 }, { "epoch": 0.7594750095626673, "grad_norm": 0.16207703948020935, "learning_rate": 0.002, "loss": 2.5369, "step": 381220 }, { "epoch": 0.7594949317863062, "grad_norm": 0.13955393433570862, "learning_rate": 0.002, "loss": 2.5528, "step": 381230 }, { "epoch": 0.7595148540099452, "grad_norm": 0.13491977751255035, "learning_rate": 0.002, "loss": 2.5607, "step": 381240 }, { "epoch": 0.7595347762335841, "grad_norm": 0.1781609058380127, "learning_rate": 0.002, "loss": 2.5734, "step": 381250 }, { "epoch": 0.759554698457223, "grad_norm": 0.18627670407295227, "learning_rate": 0.002, "loss": 2.569, "step": 381260 }, { "epoch": 0.7595746206808619, "grad_norm": 0.196842759847641, "learning_rate": 0.002, "loss": 2.5663, "step": 381270 }, { "epoch": 0.7595945429045008, "grad_norm": 0.1731484830379486, "learning_rate": 0.002, "loss": 2.5609, "step": 381280 }, { "epoch": 0.7596144651281398, "grad_norm": 0.14930638670921326, "learning_rate": 0.002, "loss": 2.5434, "step": 381290 }, { "epoch": 0.7596343873517787, "grad_norm": 0.1801566183567047, "learning_rate": 0.002, "loss": 2.572, "step": 381300 }, { "epoch": 0.7596543095754176, "grad_norm": 0.1599494218826294, "learning_rate": 0.002, "loss": 2.5606, "step": 381310 }, { "epoch": 0.7596742317990565, "grad_norm": 0.17663514614105225, "learning_rate": 0.002, "loss": 2.5568, "step": 381320 }, { "epoch": 0.7596941540226954, "grad_norm": 0.18913941085338593, "learning_rate": 0.002, "loss": 2.5362, "step": 381330 }, { "epoch": 0.7597140762463344, "grad_norm": 0.17739105224609375, "learning_rate": 0.002, "loss": 2.5536, "step": 381340 }, { "epoch": 0.7597339984699732, "grad_norm": 0.16461166739463806, "learning_rate": 0.002, "loss": 2.5372, "step": 381350 }, { "epoch": 0.7597539206936121, "grad_norm": 0.15925846993923187, "learning_rate": 0.002, "loss": 2.5639, "step": 381360 }, { "epoch": 0.759773842917251, "grad_norm": 0.16694174706935883, "learning_rate": 0.002, "loss": 2.5523, "step": 381370 }, { "epoch": 0.7597937651408899, "grad_norm": 0.2012442797422409, "learning_rate": 0.002, "loss": 2.5444, "step": 381380 }, { "epoch": 0.7598136873645289, "grad_norm": 0.16873811185359955, "learning_rate": 0.002, "loss": 2.5578, "step": 381390 }, { "epoch": 0.7598336095881678, "grad_norm": 0.16680431365966797, "learning_rate": 0.002, "loss": 2.5456, "step": 381400 }, { "epoch": 0.7598535318118067, "grad_norm": 0.15426650643348694, "learning_rate": 0.002, "loss": 2.5604, "step": 381410 }, { "epoch": 0.7598734540354456, "grad_norm": 0.21662494540214539, "learning_rate": 0.002, "loss": 2.5564, "step": 381420 }, { "epoch": 0.7598933762590845, "grad_norm": 0.1500391662120819, "learning_rate": 0.002, "loss": 2.5487, "step": 381430 }, { "epoch": 0.7599132984827235, "grad_norm": 0.14496853947639465, "learning_rate": 0.002, "loss": 2.5628, "step": 381440 }, { "epoch": 0.7599332207063624, "grad_norm": 0.16056758165359497, "learning_rate": 0.002, "loss": 2.5569, "step": 381450 }, { "epoch": 0.7599531429300013, "grad_norm": 0.14484477043151855, "learning_rate": 0.002, "loss": 2.5438, "step": 381460 }, { "epoch": 0.7599730651536402, "grad_norm": 0.16620342433452606, "learning_rate": 0.002, "loss": 2.558, "step": 381470 }, { "epoch": 0.759992987377279, "grad_norm": 0.15665623545646667, "learning_rate": 0.002, "loss": 2.5641, "step": 381480 }, { "epoch": 0.760012909600918, "grad_norm": 0.15993256866931915, "learning_rate": 0.002, "loss": 2.5416, "step": 381490 }, { "epoch": 0.7600328318245569, "grad_norm": 0.16013149917125702, "learning_rate": 0.002, "loss": 2.5566, "step": 381500 }, { "epoch": 0.7600527540481958, "grad_norm": 0.1511934995651245, "learning_rate": 0.002, "loss": 2.5469, "step": 381510 }, { "epoch": 0.7600726762718347, "grad_norm": 0.16086538136005402, "learning_rate": 0.002, "loss": 2.5605, "step": 381520 }, { "epoch": 0.7600925984954737, "grad_norm": 0.1565108299255371, "learning_rate": 0.002, "loss": 2.561, "step": 381530 }, { "epoch": 0.7601125207191126, "grad_norm": 0.21800082921981812, "learning_rate": 0.002, "loss": 2.5632, "step": 381540 }, { "epoch": 0.7601324429427515, "grad_norm": 0.14648272097110748, "learning_rate": 0.002, "loss": 2.5582, "step": 381550 }, { "epoch": 0.7601523651663904, "grad_norm": 0.15596114099025726, "learning_rate": 0.002, "loss": 2.547, "step": 381560 }, { "epoch": 0.7601722873900293, "grad_norm": 0.17614565789699554, "learning_rate": 0.002, "loss": 2.5595, "step": 381570 }, { "epoch": 0.7601922096136683, "grad_norm": 0.15686555206775665, "learning_rate": 0.002, "loss": 2.5548, "step": 381580 }, { "epoch": 0.7602121318373072, "grad_norm": 0.18494310975074768, "learning_rate": 0.002, "loss": 2.5504, "step": 381590 }, { "epoch": 0.7602320540609461, "grad_norm": 0.1959693729877472, "learning_rate": 0.002, "loss": 2.5445, "step": 381600 }, { "epoch": 0.760251976284585, "grad_norm": 0.14831826090812683, "learning_rate": 0.002, "loss": 2.5501, "step": 381610 }, { "epoch": 0.7602718985082239, "grad_norm": 0.1628059446811676, "learning_rate": 0.002, "loss": 2.5432, "step": 381620 }, { "epoch": 0.7602918207318629, "grad_norm": 0.15223586559295654, "learning_rate": 0.002, "loss": 2.5479, "step": 381630 }, { "epoch": 0.7603117429555017, "grad_norm": 0.17307148873806, "learning_rate": 0.002, "loss": 2.5419, "step": 381640 }, { "epoch": 0.7603316651791406, "grad_norm": 0.1652238368988037, "learning_rate": 0.002, "loss": 2.5572, "step": 381650 }, { "epoch": 0.7603515874027795, "grad_norm": 0.18217600882053375, "learning_rate": 0.002, "loss": 2.5629, "step": 381660 }, { "epoch": 0.7603715096264184, "grad_norm": 0.1639654040336609, "learning_rate": 0.002, "loss": 2.5739, "step": 381670 }, { "epoch": 0.7603914318500574, "grad_norm": 0.1678123027086258, "learning_rate": 0.002, "loss": 2.5447, "step": 381680 }, { "epoch": 0.7604113540736963, "grad_norm": 0.16408711671829224, "learning_rate": 0.002, "loss": 2.5569, "step": 381690 }, { "epoch": 0.7604312762973352, "grad_norm": 0.1890115737915039, "learning_rate": 0.002, "loss": 2.5674, "step": 381700 }, { "epoch": 0.7604511985209741, "grad_norm": 0.17069485783576965, "learning_rate": 0.002, "loss": 2.5665, "step": 381710 }, { "epoch": 0.760471120744613, "grad_norm": 0.18645399808883667, "learning_rate": 0.002, "loss": 2.5494, "step": 381720 }, { "epoch": 0.760491042968252, "grad_norm": 0.14290693402290344, "learning_rate": 0.002, "loss": 2.563, "step": 381730 }, { "epoch": 0.7605109651918909, "grad_norm": 0.14082644879817963, "learning_rate": 0.002, "loss": 2.5454, "step": 381740 }, { "epoch": 0.7605308874155298, "grad_norm": 0.16458925604820251, "learning_rate": 0.002, "loss": 2.5519, "step": 381750 }, { "epoch": 0.7605508096391687, "grad_norm": 0.17131395637989044, "learning_rate": 0.002, "loss": 2.5619, "step": 381760 }, { "epoch": 0.7605707318628075, "grad_norm": 0.20824599266052246, "learning_rate": 0.002, "loss": 2.5595, "step": 381770 }, { "epoch": 0.7605906540864465, "grad_norm": 0.15907704830169678, "learning_rate": 0.002, "loss": 2.5427, "step": 381780 }, { "epoch": 0.7606105763100854, "grad_norm": 0.1692178100347519, "learning_rate": 0.002, "loss": 2.5394, "step": 381790 }, { "epoch": 0.7606304985337243, "grad_norm": 0.1749786138534546, "learning_rate": 0.002, "loss": 2.549, "step": 381800 }, { "epoch": 0.7606504207573632, "grad_norm": 0.17944398522377014, "learning_rate": 0.002, "loss": 2.5516, "step": 381810 }, { "epoch": 0.7606703429810021, "grad_norm": 0.1678420752286911, "learning_rate": 0.002, "loss": 2.5604, "step": 381820 }, { "epoch": 0.7606902652046411, "grad_norm": 0.1746119260787964, "learning_rate": 0.002, "loss": 2.5508, "step": 381830 }, { "epoch": 0.76071018742828, "grad_norm": 0.18130341172218323, "learning_rate": 0.002, "loss": 2.556, "step": 381840 }, { "epoch": 0.7607301096519189, "grad_norm": 0.16150200366973877, "learning_rate": 0.002, "loss": 2.553, "step": 381850 }, { "epoch": 0.7607500318755578, "grad_norm": 0.47030940651893616, "learning_rate": 0.002, "loss": 2.5676, "step": 381860 }, { "epoch": 0.7607699540991968, "grad_norm": 0.1688089668750763, "learning_rate": 0.002, "loss": 2.5455, "step": 381870 }, { "epoch": 0.7607898763228357, "grad_norm": 0.14826326072216034, "learning_rate": 0.002, "loss": 2.5721, "step": 381880 }, { "epoch": 0.7608097985464746, "grad_norm": 0.15506789088249207, "learning_rate": 0.002, "loss": 2.5507, "step": 381890 }, { "epoch": 0.7608297207701135, "grad_norm": 0.19554276764392853, "learning_rate": 0.002, "loss": 2.5529, "step": 381900 }, { "epoch": 0.7608496429937524, "grad_norm": 0.15503960847854614, "learning_rate": 0.002, "loss": 2.5474, "step": 381910 }, { "epoch": 0.7608695652173914, "grad_norm": 0.16025511920452118, "learning_rate": 0.002, "loss": 2.544, "step": 381920 }, { "epoch": 0.7608894874410302, "grad_norm": 0.15400567650794983, "learning_rate": 0.002, "loss": 2.5652, "step": 381930 }, { "epoch": 0.7609094096646691, "grad_norm": 0.16740228235721588, "learning_rate": 0.002, "loss": 2.5501, "step": 381940 }, { "epoch": 0.760929331888308, "grad_norm": 0.17739008367061615, "learning_rate": 0.002, "loss": 2.5502, "step": 381950 }, { "epoch": 0.7609492541119469, "grad_norm": 0.14960937201976776, "learning_rate": 0.002, "loss": 2.5556, "step": 381960 }, { "epoch": 0.7609691763355859, "grad_norm": 0.19765380024909973, "learning_rate": 0.002, "loss": 2.5591, "step": 381970 }, { "epoch": 0.7609890985592248, "grad_norm": 0.16472779214382172, "learning_rate": 0.002, "loss": 2.5563, "step": 381980 }, { "epoch": 0.7610090207828637, "grad_norm": 0.15576285123825073, "learning_rate": 0.002, "loss": 2.5429, "step": 381990 }, { "epoch": 0.7610289430065026, "grad_norm": 0.16389738023281097, "learning_rate": 0.002, "loss": 2.5433, "step": 382000 }, { "epoch": 0.7610488652301415, "grad_norm": 0.13929390907287598, "learning_rate": 0.002, "loss": 2.5419, "step": 382010 }, { "epoch": 0.7610687874537805, "grad_norm": 0.16579149663448334, "learning_rate": 0.002, "loss": 2.5445, "step": 382020 }, { "epoch": 0.7610887096774194, "grad_norm": 0.17860379815101624, "learning_rate": 0.002, "loss": 2.5437, "step": 382030 }, { "epoch": 0.7611086319010583, "grad_norm": 0.16553421318531036, "learning_rate": 0.002, "loss": 2.5558, "step": 382040 }, { "epoch": 0.7611285541246972, "grad_norm": 0.18440277874469757, "learning_rate": 0.002, "loss": 2.5568, "step": 382050 }, { "epoch": 0.761148476348336, "grad_norm": 0.16963721811771393, "learning_rate": 0.002, "loss": 2.5624, "step": 382060 }, { "epoch": 0.761168398571975, "grad_norm": 0.1410137116909027, "learning_rate": 0.002, "loss": 2.5424, "step": 382070 }, { "epoch": 0.7611883207956139, "grad_norm": 0.1815117746591568, "learning_rate": 0.002, "loss": 2.5384, "step": 382080 }, { "epoch": 0.7612082430192528, "grad_norm": 0.15544457733631134, "learning_rate": 0.002, "loss": 2.5678, "step": 382090 }, { "epoch": 0.7612281652428917, "grad_norm": 0.1782359778881073, "learning_rate": 0.002, "loss": 2.5761, "step": 382100 }, { "epoch": 0.7612480874665306, "grad_norm": 0.16566577553749084, "learning_rate": 0.002, "loss": 2.5612, "step": 382110 }, { "epoch": 0.7612680096901696, "grad_norm": 0.16774265468120575, "learning_rate": 0.002, "loss": 2.5527, "step": 382120 }, { "epoch": 0.7612879319138085, "grad_norm": 0.16546635329723358, "learning_rate": 0.002, "loss": 2.5432, "step": 382130 }, { "epoch": 0.7613078541374474, "grad_norm": 0.17128753662109375, "learning_rate": 0.002, "loss": 2.5592, "step": 382140 }, { "epoch": 0.7613277763610863, "grad_norm": 0.141081303358078, "learning_rate": 0.002, "loss": 2.5401, "step": 382150 }, { "epoch": 0.7613476985847253, "grad_norm": 0.21345974504947662, "learning_rate": 0.002, "loss": 2.5532, "step": 382160 }, { "epoch": 0.7613676208083642, "grad_norm": 0.17112736403942108, "learning_rate": 0.002, "loss": 2.5405, "step": 382170 }, { "epoch": 0.7613875430320031, "grad_norm": 0.13252703845500946, "learning_rate": 0.002, "loss": 2.5445, "step": 382180 }, { "epoch": 0.761407465255642, "grad_norm": 0.1599547117948532, "learning_rate": 0.002, "loss": 2.5517, "step": 382190 }, { "epoch": 0.7614273874792808, "grad_norm": 0.20753222703933716, "learning_rate": 0.002, "loss": 2.5659, "step": 382200 }, { "epoch": 0.7614473097029199, "grad_norm": 0.16020488739013672, "learning_rate": 0.002, "loss": 2.5554, "step": 382210 }, { "epoch": 0.7614672319265587, "grad_norm": 0.1754440814256668, "learning_rate": 0.002, "loss": 2.5616, "step": 382220 }, { "epoch": 0.7614871541501976, "grad_norm": 0.15027596056461334, "learning_rate": 0.002, "loss": 2.555, "step": 382230 }, { "epoch": 0.7615070763738365, "grad_norm": 0.14532716572284698, "learning_rate": 0.002, "loss": 2.5607, "step": 382240 }, { "epoch": 0.7615269985974754, "grad_norm": 0.18410228192806244, "learning_rate": 0.002, "loss": 2.5482, "step": 382250 }, { "epoch": 0.7615469208211144, "grad_norm": 0.20363257825374603, "learning_rate": 0.002, "loss": 2.5763, "step": 382260 }, { "epoch": 0.7615668430447533, "grad_norm": 0.1595325618982315, "learning_rate": 0.002, "loss": 2.5752, "step": 382270 }, { "epoch": 0.7615867652683922, "grad_norm": 0.1717149019241333, "learning_rate": 0.002, "loss": 2.5334, "step": 382280 }, { "epoch": 0.7616066874920311, "grad_norm": 0.14754608273506165, "learning_rate": 0.002, "loss": 2.5559, "step": 382290 }, { "epoch": 0.76162660971567, "grad_norm": 0.17496785521507263, "learning_rate": 0.002, "loss": 2.5487, "step": 382300 }, { "epoch": 0.761646531939309, "grad_norm": 0.15994741022586823, "learning_rate": 0.002, "loss": 2.5607, "step": 382310 }, { "epoch": 0.7616664541629479, "grad_norm": 0.15179631114006042, "learning_rate": 0.002, "loss": 2.5525, "step": 382320 }, { "epoch": 0.7616863763865868, "grad_norm": 0.16994139552116394, "learning_rate": 0.002, "loss": 2.5745, "step": 382330 }, { "epoch": 0.7617062986102257, "grad_norm": 0.1857772022485733, "learning_rate": 0.002, "loss": 2.5348, "step": 382340 }, { "epoch": 0.7617262208338645, "grad_norm": 0.15334530174732208, "learning_rate": 0.002, "loss": 2.5681, "step": 382350 }, { "epoch": 0.7617461430575035, "grad_norm": 0.20707418024539948, "learning_rate": 0.002, "loss": 2.5575, "step": 382360 }, { "epoch": 0.7617660652811424, "grad_norm": 0.13664260506629944, "learning_rate": 0.002, "loss": 2.553, "step": 382370 }, { "epoch": 0.7617859875047813, "grad_norm": 0.16963815689086914, "learning_rate": 0.002, "loss": 2.5506, "step": 382380 }, { "epoch": 0.7618059097284202, "grad_norm": 0.14945170283317566, "learning_rate": 0.002, "loss": 2.5476, "step": 382390 }, { "epoch": 0.7618258319520591, "grad_norm": 0.18236036598682404, "learning_rate": 0.002, "loss": 2.5473, "step": 382400 }, { "epoch": 0.7618457541756981, "grad_norm": 0.16409017145633698, "learning_rate": 0.002, "loss": 2.5473, "step": 382410 }, { "epoch": 0.761865676399337, "grad_norm": 0.16723036766052246, "learning_rate": 0.002, "loss": 2.542, "step": 382420 }, { "epoch": 0.7618855986229759, "grad_norm": 0.15471981465816498, "learning_rate": 0.002, "loss": 2.5626, "step": 382430 }, { "epoch": 0.7619055208466148, "grad_norm": 0.21271933615207672, "learning_rate": 0.002, "loss": 2.5397, "step": 382440 }, { "epoch": 0.7619254430702538, "grad_norm": 0.17036953568458557, "learning_rate": 0.002, "loss": 2.5509, "step": 382450 }, { "epoch": 0.7619453652938927, "grad_norm": 0.1561252325773239, "learning_rate": 0.002, "loss": 2.5406, "step": 382460 }, { "epoch": 0.7619652875175316, "grad_norm": 0.1609359234571457, "learning_rate": 0.002, "loss": 2.5465, "step": 382470 }, { "epoch": 0.7619852097411705, "grad_norm": 0.17722579836845398, "learning_rate": 0.002, "loss": 2.5626, "step": 382480 }, { "epoch": 0.7620051319648093, "grad_norm": 0.1514328569173813, "learning_rate": 0.002, "loss": 2.5475, "step": 382490 }, { "epoch": 0.7620250541884483, "grad_norm": 0.16804634034633636, "learning_rate": 0.002, "loss": 2.5589, "step": 382500 }, { "epoch": 0.7620449764120872, "grad_norm": 0.19476726651191711, "learning_rate": 0.002, "loss": 2.5667, "step": 382510 }, { "epoch": 0.7620648986357261, "grad_norm": 0.16318339109420776, "learning_rate": 0.002, "loss": 2.5509, "step": 382520 }, { "epoch": 0.762084820859365, "grad_norm": 0.16094370186328888, "learning_rate": 0.002, "loss": 2.547, "step": 382530 }, { "epoch": 0.7621047430830039, "grad_norm": 0.15861929953098297, "learning_rate": 0.002, "loss": 2.5557, "step": 382540 }, { "epoch": 0.7621246653066429, "grad_norm": 0.17819873988628387, "learning_rate": 0.002, "loss": 2.5642, "step": 382550 }, { "epoch": 0.7621445875302818, "grad_norm": 0.16188576817512512, "learning_rate": 0.002, "loss": 2.5481, "step": 382560 }, { "epoch": 0.7621645097539207, "grad_norm": 0.14745377004146576, "learning_rate": 0.002, "loss": 2.5513, "step": 382570 }, { "epoch": 0.7621844319775596, "grad_norm": 0.16853933036327362, "learning_rate": 0.002, "loss": 2.558, "step": 382580 }, { "epoch": 0.7622043542011985, "grad_norm": 0.18060478568077087, "learning_rate": 0.002, "loss": 2.5659, "step": 382590 }, { "epoch": 0.7622242764248375, "grad_norm": 0.15664950013160706, "learning_rate": 0.002, "loss": 2.542, "step": 382600 }, { "epoch": 0.7622441986484764, "grad_norm": 0.15030457079410553, "learning_rate": 0.002, "loss": 2.5632, "step": 382610 }, { "epoch": 0.7622641208721153, "grad_norm": 0.1802894026041031, "learning_rate": 0.002, "loss": 2.5594, "step": 382620 }, { "epoch": 0.7622840430957541, "grad_norm": 0.15055038034915924, "learning_rate": 0.002, "loss": 2.5592, "step": 382630 }, { "epoch": 0.762303965319393, "grad_norm": 0.16289782524108887, "learning_rate": 0.002, "loss": 2.5494, "step": 382640 }, { "epoch": 0.762323887543032, "grad_norm": 0.22673292458057404, "learning_rate": 0.002, "loss": 2.5419, "step": 382650 }, { "epoch": 0.7623438097666709, "grad_norm": 0.1833781599998474, "learning_rate": 0.002, "loss": 2.551, "step": 382660 }, { "epoch": 0.7623637319903098, "grad_norm": 0.16377344727516174, "learning_rate": 0.002, "loss": 2.5555, "step": 382670 }, { "epoch": 0.7623836542139487, "grad_norm": 0.15124815702438354, "learning_rate": 0.002, "loss": 2.5338, "step": 382680 }, { "epoch": 0.7624035764375876, "grad_norm": 0.17907506227493286, "learning_rate": 0.002, "loss": 2.5533, "step": 382690 }, { "epoch": 0.7624234986612266, "grad_norm": 0.2212950885295868, "learning_rate": 0.002, "loss": 2.5503, "step": 382700 }, { "epoch": 0.7624434208848655, "grad_norm": 0.16470277309417725, "learning_rate": 0.002, "loss": 2.5513, "step": 382710 }, { "epoch": 0.7624633431085044, "grad_norm": 0.14861778914928436, "learning_rate": 0.002, "loss": 2.5614, "step": 382720 }, { "epoch": 0.7624832653321433, "grad_norm": 0.1714198887348175, "learning_rate": 0.002, "loss": 2.5593, "step": 382730 }, { "epoch": 0.7625031875557823, "grad_norm": 0.19161373376846313, "learning_rate": 0.002, "loss": 2.552, "step": 382740 }, { "epoch": 0.7625231097794212, "grad_norm": 0.18214459717273712, "learning_rate": 0.002, "loss": 2.5552, "step": 382750 }, { "epoch": 0.7625430320030601, "grad_norm": 0.15200275182724, "learning_rate": 0.002, "loss": 2.5545, "step": 382760 }, { "epoch": 0.762562954226699, "grad_norm": 0.15889118611812592, "learning_rate": 0.002, "loss": 2.5473, "step": 382770 }, { "epoch": 0.7625828764503378, "grad_norm": 0.16886566579341888, "learning_rate": 0.002, "loss": 2.5709, "step": 382780 }, { "epoch": 0.7626027986739768, "grad_norm": 0.15551891922950745, "learning_rate": 0.002, "loss": 2.5382, "step": 382790 }, { "epoch": 0.7626227208976157, "grad_norm": 0.18641361594200134, "learning_rate": 0.002, "loss": 2.5599, "step": 382800 }, { "epoch": 0.7626426431212546, "grad_norm": 0.16434015333652496, "learning_rate": 0.002, "loss": 2.5609, "step": 382810 }, { "epoch": 0.7626625653448935, "grad_norm": 0.19319763779640198, "learning_rate": 0.002, "loss": 2.5405, "step": 382820 }, { "epoch": 0.7626824875685324, "grad_norm": 0.15755872428417206, "learning_rate": 0.002, "loss": 2.5432, "step": 382830 }, { "epoch": 0.7627024097921714, "grad_norm": 0.19004763662815094, "learning_rate": 0.002, "loss": 2.5651, "step": 382840 }, { "epoch": 0.7627223320158103, "grad_norm": 0.13163472712039948, "learning_rate": 0.002, "loss": 2.5631, "step": 382850 }, { "epoch": 0.7627422542394492, "grad_norm": 0.15186215937137604, "learning_rate": 0.002, "loss": 2.5626, "step": 382860 }, { "epoch": 0.7627621764630881, "grad_norm": 0.14785686135292053, "learning_rate": 0.002, "loss": 2.5423, "step": 382870 }, { "epoch": 0.762782098686727, "grad_norm": 0.1487726867198944, "learning_rate": 0.002, "loss": 2.5476, "step": 382880 }, { "epoch": 0.762802020910366, "grad_norm": 0.13973060250282288, "learning_rate": 0.002, "loss": 2.5512, "step": 382890 }, { "epoch": 0.7628219431340049, "grad_norm": 0.18655283749103546, "learning_rate": 0.002, "loss": 2.5418, "step": 382900 }, { "epoch": 0.7628418653576438, "grad_norm": 0.14917196333408356, "learning_rate": 0.002, "loss": 2.5466, "step": 382910 }, { "epoch": 0.7628617875812826, "grad_norm": 0.16795650124549866, "learning_rate": 0.002, "loss": 2.5543, "step": 382920 }, { "epoch": 0.7628817098049215, "grad_norm": 0.16018535196781158, "learning_rate": 0.002, "loss": 2.5361, "step": 382930 }, { "epoch": 0.7629016320285605, "grad_norm": 0.235252246260643, "learning_rate": 0.002, "loss": 2.5394, "step": 382940 }, { "epoch": 0.7629215542521994, "grad_norm": 0.16753706336021423, "learning_rate": 0.002, "loss": 2.5689, "step": 382950 }, { "epoch": 0.7629414764758383, "grad_norm": 0.15835154056549072, "learning_rate": 0.002, "loss": 2.5607, "step": 382960 }, { "epoch": 0.7629613986994772, "grad_norm": 0.18359485268592834, "learning_rate": 0.002, "loss": 2.5581, "step": 382970 }, { "epoch": 0.7629813209231161, "grad_norm": 0.16441801190376282, "learning_rate": 0.002, "loss": 2.5506, "step": 382980 }, { "epoch": 0.7630012431467551, "grad_norm": 0.14765940606594086, "learning_rate": 0.002, "loss": 2.5375, "step": 382990 }, { "epoch": 0.763021165370394, "grad_norm": 0.14854592084884644, "learning_rate": 0.002, "loss": 2.5539, "step": 383000 }, { "epoch": 0.7630410875940329, "grad_norm": 0.1886139214038849, "learning_rate": 0.002, "loss": 2.5531, "step": 383010 }, { "epoch": 0.7630610098176718, "grad_norm": 0.15261323750019073, "learning_rate": 0.002, "loss": 2.5572, "step": 383020 }, { "epoch": 0.7630809320413108, "grad_norm": 0.15129782259464264, "learning_rate": 0.002, "loss": 2.5523, "step": 383030 }, { "epoch": 0.7631008542649497, "grad_norm": 0.14508967101573944, "learning_rate": 0.002, "loss": 2.5571, "step": 383040 }, { "epoch": 0.7631207764885886, "grad_norm": 0.1810465306043625, "learning_rate": 0.002, "loss": 2.557, "step": 383050 }, { "epoch": 0.7631406987122274, "grad_norm": 0.14357125759124756, "learning_rate": 0.002, "loss": 2.5475, "step": 383060 }, { "epoch": 0.7631606209358663, "grad_norm": 0.17043396830558777, "learning_rate": 0.002, "loss": 2.548, "step": 383070 }, { "epoch": 0.7631805431595053, "grad_norm": 0.22428563237190247, "learning_rate": 0.002, "loss": 2.5523, "step": 383080 }, { "epoch": 0.7632004653831442, "grad_norm": 0.1746281236410141, "learning_rate": 0.002, "loss": 2.5559, "step": 383090 }, { "epoch": 0.7632203876067831, "grad_norm": 0.16785664856433868, "learning_rate": 0.002, "loss": 2.5593, "step": 383100 }, { "epoch": 0.763240309830422, "grad_norm": 0.15724565088748932, "learning_rate": 0.002, "loss": 2.5632, "step": 383110 }, { "epoch": 0.7632602320540609, "grad_norm": 0.14510193467140198, "learning_rate": 0.002, "loss": 2.5464, "step": 383120 }, { "epoch": 0.7632801542776999, "grad_norm": 0.15754464268684387, "learning_rate": 0.002, "loss": 2.5587, "step": 383130 }, { "epoch": 0.7633000765013388, "grad_norm": 0.1374751627445221, "learning_rate": 0.002, "loss": 2.5592, "step": 383140 }, { "epoch": 0.7633199987249777, "grad_norm": 0.18119312822818756, "learning_rate": 0.002, "loss": 2.5507, "step": 383150 }, { "epoch": 0.7633399209486166, "grad_norm": 0.1350966989994049, "learning_rate": 0.002, "loss": 2.5481, "step": 383160 }, { "epoch": 0.7633598431722555, "grad_norm": 0.17349198460578918, "learning_rate": 0.002, "loss": 2.5414, "step": 383170 }, { "epoch": 0.7633797653958945, "grad_norm": 0.13391615450382233, "learning_rate": 0.002, "loss": 2.547, "step": 383180 }, { "epoch": 0.7633996876195334, "grad_norm": 0.15444006025791168, "learning_rate": 0.002, "loss": 2.5481, "step": 383190 }, { "epoch": 0.7634196098431723, "grad_norm": 0.17643488943576813, "learning_rate": 0.002, "loss": 2.5632, "step": 383200 }, { "epoch": 0.7634395320668111, "grad_norm": 0.15434162318706512, "learning_rate": 0.002, "loss": 2.5429, "step": 383210 }, { "epoch": 0.76345945429045, "grad_norm": 0.14229492843151093, "learning_rate": 0.002, "loss": 2.552, "step": 383220 }, { "epoch": 0.763479376514089, "grad_norm": 0.195338174700737, "learning_rate": 0.002, "loss": 2.5528, "step": 383230 }, { "epoch": 0.7634992987377279, "grad_norm": 0.204460009932518, "learning_rate": 0.002, "loss": 2.5549, "step": 383240 }, { "epoch": 0.7635192209613668, "grad_norm": 0.18438148498535156, "learning_rate": 0.002, "loss": 2.5474, "step": 383250 }, { "epoch": 0.7635391431850057, "grad_norm": 0.17753903567790985, "learning_rate": 0.002, "loss": 2.5572, "step": 383260 }, { "epoch": 0.7635590654086446, "grad_norm": 0.13974788784980774, "learning_rate": 0.002, "loss": 2.5584, "step": 383270 }, { "epoch": 0.7635789876322836, "grad_norm": 0.1744440197944641, "learning_rate": 0.002, "loss": 2.5605, "step": 383280 }, { "epoch": 0.7635989098559225, "grad_norm": 0.17720642685890198, "learning_rate": 0.002, "loss": 2.5544, "step": 383290 }, { "epoch": 0.7636188320795614, "grad_norm": 0.2587242126464844, "learning_rate": 0.002, "loss": 2.5559, "step": 383300 }, { "epoch": 0.7636387543032003, "grad_norm": 0.16798202693462372, "learning_rate": 0.002, "loss": 2.5543, "step": 383310 }, { "epoch": 0.7636586765268393, "grad_norm": 0.18451489508152008, "learning_rate": 0.002, "loss": 2.5561, "step": 383320 }, { "epoch": 0.7636785987504782, "grad_norm": 0.16311180591583252, "learning_rate": 0.002, "loss": 2.562, "step": 383330 }, { "epoch": 0.7636985209741171, "grad_norm": 0.17408259212970734, "learning_rate": 0.002, "loss": 2.5621, "step": 383340 }, { "epoch": 0.763718443197756, "grad_norm": 0.1707737147808075, "learning_rate": 0.002, "loss": 2.5543, "step": 383350 }, { "epoch": 0.7637383654213948, "grad_norm": 0.16006721556186676, "learning_rate": 0.002, "loss": 2.5617, "step": 383360 }, { "epoch": 0.7637582876450338, "grad_norm": 0.1643855720758438, "learning_rate": 0.002, "loss": 2.5486, "step": 383370 }, { "epoch": 0.7637782098686727, "grad_norm": 0.15754054486751556, "learning_rate": 0.002, "loss": 2.5527, "step": 383380 }, { "epoch": 0.7637981320923116, "grad_norm": 0.13734424114227295, "learning_rate": 0.002, "loss": 2.5589, "step": 383390 }, { "epoch": 0.7638180543159505, "grad_norm": 0.15543940663337708, "learning_rate": 0.002, "loss": 2.5709, "step": 383400 }, { "epoch": 0.7638379765395894, "grad_norm": 0.16141490638256073, "learning_rate": 0.002, "loss": 2.5561, "step": 383410 }, { "epoch": 0.7638578987632284, "grad_norm": 0.16677509248256683, "learning_rate": 0.002, "loss": 2.5557, "step": 383420 }, { "epoch": 0.7638778209868673, "grad_norm": 0.14017125964164734, "learning_rate": 0.002, "loss": 2.5475, "step": 383430 }, { "epoch": 0.7638977432105062, "grad_norm": 0.14330022037029266, "learning_rate": 0.002, "loss": 2.5348, "step": 383440 }, { "epoch": 0.7639176654341451, "grad_norm": 0.1894620954990387, "learning_rate": 0.002, "loss": 2.5679, "step": 383450 }, { "epoch": 0.763937587657784, "grad_norm": 0.2017667144536972, "learning_rate": 0.002, "loss": 2.5526, "step": 383460 }, { "epoch": 0.763957509881423, "grad_norm": 0.14389951527118683, "learning_rate": 0.002, "loss": 2.5517, "step": 383470 }, { "epoch": 0.7639774321050619, "grad_norm": 0.1682891547679901, "learning_rate": 0.002, "loss": 2.5506, "step": 383480 }, { "epoch": 0.7639973543287008, "grad_norm": 0.15942905843257904, "learning_rate": 0.002, "loss": 2.5599, "step": 383490 }, { "epoch": 0.7640172765523396, "grad_norm": 0.14348796010017395, "learning_rate": 0.002, "loss": 2.5567, "step": 383500 }, { "epoch": 0.7640371987759785, "grad_norm": 0.19569312036037445, "learning_rate": 0.002, "loss": 2.5455, "step": 383510 }, { "epoch": 0.7640571209996175, "grad_norm": 0.19406451284885406, "learning_rate": 0.002, "loss": 2.5664, "step": 383520 }, { "epoch": 0.7640770432232564, "grad_norm": 0.16402503848075867, "learning_rate": 0.002, "loss": 2.5369, "step": 383530 }, { "epoch": 0.7640969654468953, "grad_norm": 0.15401889383792877, "learning_rate": 0.002, "loss": 2.5481, "step": 383540 }, { "epoch": 0.7641168876705342, "grad_norm": 0.1746157854795456, "learning_rate": 0.002, "loss": 2.5536, "step": 383550 }, { "epoch": 0.7641368098941731, "grad_norm": 0.16950169205665588, "learning_rate": 0.002, "loss": 2.5405, "step": 383560 }, { "epoch": 0.7641567321178121, "grad_norm": 0.15185582637786865, "learning_rate": 0.002, "loss": 2.5598, "step": 383570 }, { "epoch": 0.764176654341451, "grad_norm": 0.16289828717708588, "learning_rate": 0.002, "loss": 2.571, "step": 383580 }, { "epoch": 0.7641965765650899, "grad_norm": 0.16465865075588226, "learning_rate": 0.002, "loss": 2.5622, "step": 383590 }, { "epoch": 0.7642164987887288, "grad_norm": 0.16324323415756226, "learning_rate": 0.002, "loss": 2.5721, "step": 383600 }, { "epoch": 0.7642364210123677, "grad_norm": 0.14990383386611938, "learning_rate": 0.002, "loss": 2.5564, "step": 383610 }, { "epoch": 0.7642563432360067, "grad_norm": 0.17510725557804108, "learning_rate": 0.002, "loss": 2.5465, "step": 383620 }, { "epoch": 0.7642762654596456, "grad_norm": 0.14087852835655212, "learning_rate": 0.002, "loss": 2.5366, "step": 383630 }, { "epoch": 0.7642961876832844, "grad_norm": 0.1363346129655838, "learning_rate": 0.002, "loss": 2.5578, "step": 383640 }, { "epoch": 0.7643161099069233, "grad_norm": 0.16631099581718445, "learning_rate": 0.002, "loss": 2.5605, "step": 383650 }, { "epoch": 0.7643360321305623, "grad_norm": 0.14361712336540222, "learning_rate": 0.002, "loss": 2.5645, "step": 383660 }, { "epoch": 0.7643559543542012, "grad_norm": 0.1899920254945755, "learning_rate": 0.002, "loss": 2.5385, "step": 383670 }, { "epoch": 0.7643758765778401, "grad_norm": 0.15699516236782074, "learning_rate": 0.002, "loss": 2.5626, "step": 383680 }, { "epoch": 0.764395798801479, "grad_norm": 0.14894495904445648, "learning_rate": 0.002, "loss": 2.5503, "step": 383690 }, { "epoch": 0.7644157210251179, "grad_norm": 0.16399680078029633, "learning_rate": 0.002, "loss": 2.5428, "step": 383700 }, { "epoch": 0.7644356432487569, "grad_norm": 0.16218878328800201, "learning_rate": 0.002, "loss": 2.5418, "step": 383710 }, { "epoch": 0.7644555654723958, "grad_norm": 0.16500739753246307, "learning_rate": 0.002, "loss": 2.5462, "step": 383720 }, { "epoch": 0.7644754876960347, "grad_norm": 0.2010354846715927, "learning_rate": 0.002, "loss": 2.5619, "step": 383730 }, { "epoch": 0.7644954099196736, "grad_norm": 0.15569555759429932, "learning_rate": 0.002, "loss": 2.5564, "step": 383740 }, { "epoch": 0.7645153321433125, "grad_norm": 0.1815395951271057, "learning_rate": 0.002, "loss": 2.5537, "step": 383750 }, { "epoch": 0.7645352543669515, "grad_norm": 0.15822933614253998, "learning_rate": 0.002, "loss": 2.5643, "step": 383760 }, { "epoch": 0.7645551765905904, "grad_norm": 0.13968928158283234, "learning_rate": 0.002, "loss": 2.5611, "step": 383770 }, { "epoch": 0.7645750988142292, "grad_norm": 0.1728416532278061, "learning_rate": 0.002, "loss": 2.5557, "step": 383780 }, { "epoch": 0.7645950210378681, "grad_norm": 0.1692904531955719, "learning_rate": 0.002, "loss": 2.5489, "step": 383790 }, { "epoch": 0.764614943261507, "grad_norm": 0.15598338842391968, "learning_rate": 0.002, "loss": 2.5539, "step": 383800 }, { "epoch": 0.764634865485146, "grad_norm": 0.17593804001808167, "learning_rate": 0.002, "loss": 2.5614, "step": 383810 }, { "epoch": 0.7646547877087849, "grad_norm": 0.13755996525287628, "learning_rate": 0.002, "loss": 2.5737, "step": 383820 }, { "epoch": 0.7646747099324238, "grad_norm": 0.19384756684303284, "learning_rate": 0.002, "loss": 2.5533, "step": 383830 }, { "epoch": 0.7646946321560627, "grad_norm": 0.1758088320493698, "learning_rate": 0.002, "loss": 2.5663, "step": 383840 }, { "epoch": 0.7647145543797016, "grad_norm": 0.1682644784450531, "learning_rate": 0.002, "loss": 2.5564, "step": 383850 }, { "epoch": 0.7647344766033406, "grad_norm": 0.14207538962364197, "learning_rate": 0.002, "loss": 2.5532, "step": 383860 }, { "epoch": 0.7647543988269795, "grad_norm": 0.15337345004081726, "learning_rate": 0.002, "loss": 2.5633, "step": 383870 }, { "epoch": 0.7647743210506184, "grad_norm": 0.21010670065879822, "learning_rate": 0.002, "loss": 2.5607, "step": 383880 }, { "epoch": 0.7647942432742573, "grad_norm": 0.13819876313209534, "learning_rate": 0.002, "loss": 2.5557, "step": 383890 }, { "epoch": 0.7648141654978962, "grad_norm": 0.19877977669239044, "learning_rate": 0.002, "loss": 2.558, "step": 383900 }, { "epoch": 0.7648340877215352, "grad_norm": 0.15958258509635925, "learning_rate": 0.002, "loss": 2.5685, "step": 383910 }, { "epoch": 0.764854009945174, "grad_norm": 0.1653914749622345, "learning_rate": 0.002, "loss": 2.5469, "step": 383920 }, { "epoch": 0.7648739321688129, "grad_norm": 0.1666194051504135, "learning_rate": 0.002, "loss": 2.5512, "step": 383930 }, { "epoch": 0.7648938543924518, "grad_norm": 0.15570944547653198, "learning_rate": 0.002, "loss": 2.5615, "step": 383940 }, { "epoch": 0.7649137766160908, "grad_norm": 0.15435120463371277, "learning_rate": 0.002, "loss": 2.5444, "step": 383950 }, { "epoch": 0.7649336988397297, "grad_norm": 0.16507050395011902, "learning_rate": 0.002, "loss": 2.563, "step": 383960 }, { "epoch": 0.7649536210633686, "grad_norm": 0.13463708758354187, "learning_rate": 0.002, "loss": 2.5491, "step": 383970 }, { "epoch": 0.7649735432870075, "grad_norm": 0.15333984792232513, "learning_rate": 0.002, "loss": 2.5611, "step": 383980 }, { "epoch": 0.7649934655106464, "grad_norm": 0.15405596792697906, "learning_rate": 0.002, "loss": 2.5568, "step": 383990 }, { "epoch": 0.7650133877342854, "grad_norm": 0.13665764033794403, "learning_rate": 0.002, "loss": 2.5442, "step": 384000 }, { "epoch": 0.7650333099579243, "grad_norm": 0.1568135768175125, "learning_rate": 0.002, "loss": 2.5586, "step": 384010 }, { "epoch": 0.7650532321815632, "grad_norm": 0.15870486199855804, "learning_rate": 0.002, "loss": 2.5473, "step": 384020 }, { "epoch": 0.7650731544052021, "grad_norm": 0.16687965393066406, "learning_rate": 0.002, "loss": 2.5453, "step": 384030 }, { "epoch": 0.765093076628841, "grad_norm": 0.1521155685186386, "learning_rate": 0.002, "loss": 2.5536, "step": 384040 }, { "epoch": 0.76511299885248, "grad_norm": 0.1644107848405838, "learning_rate": 0.002, "loss": 2.551, "step": 384050 }, { "epoch": 0.7651329210761189, "grad_norm": 0.16031084954738617, "learning_rate": 0.002, "loss": 2.5578, "step": 384060 }, { "epoch": 0.7651528432997577, "grad_norm": 0.20565181970596313, "learning_rate": 0.002, "loss": 2.5465, "step": 384070 }, { "epoch": 0.7651727655233966, "grad_norm": 0.17392109334468842, "learning_rate": 0.002, "loss": 2.5603, "step": 384080 }, { "epoch": 0.7651926877470355, "grad_norm": 0.15617001056671143, "learning_rate": 0.002, "loss": 2.542, "step": 384090 }, { "epoch": 0.7652126099706745, "grad_norm": 0.17975623905658722, "learning_rate": 0.002, "loss": 2.5572, "step": 384100 }, { "epoch": 0.7652325321943134, "grad_norm": 0.16048374772071838, "learning_rate": 0.002, "loss": 2.5584, "step": 384110 }, { "epoch": 0.7652524544179523, "grad_norm": 0.18138709664344788, "learning_rate": 0.002, "loss": 2.5382, "step": 384120 }, { "epoch": 0.7652723766415912, "grad_norm": 0.18773815035820007, "learning_rate": 0.002, "loss": 2.5523, "step": 384130 }, { "epoch": 0.7652922988652301, "grad_norm": 0.14735841751098633, "learning_rate": 0.002, "loss": 2.5409, "step": 384140 }, { "epoch": 0.7653122210888691, "grad_norm": 0.17787019908428192, "learning_rate": 0.002, "loss": 2.5536, "step": 384150 }, { "epoch": 0.765332143312508, "grad_norm": 0.16259685158729553, "learning_rate": 0.002, "loss": 2.5574, "step": 384160 }, { "epoch": 0.7653520655361469, "grad_norm": 0.1633799821138382, "learning_rate": 0.002, "loss": 2.574, "step": 384170 }, { "epoch": 0.7653719877597858, "grad_norm": 0.16514255106449127, "learning_rate": 0.002, "loss": 2.5418, "step": 384180 }, { "epoch": 0.7653919099834247, "grad_norm": 0.18710225820541382, "learning_rate": 0.002, "loss": 2.5459, "step": 384190 }, { "epoch": 0.7654118322070637, "grad_norm": 0.18721774220466614, "learning_rate": 0.002, "loss": 2.5483, "step": 384200 }, { "epoch": 0.7654317544307025, "grad_norm": 0.16155485808849335, "learning_rate": 0.002, "loss": 2.5555, "step": 384210 }, { "epoch": 0.7654516766543414, "grad_norm": 0.15068307518959045, "learning_rate": 0.002, "loss": 2.5554, "step": 384220 }, { "epoch": 0.7654715988779803, "grad_norm": 0.15926244854927063, "learning_rate": 0.002, "loss": 2.5577, "step": 384230 }, { "epoch": 0.7654915211016193, "grad_norm": 0.14417479932308197, "learning_rate": 0.002, "loss": 2.5526, "step": 384240 }, { "epoch": 0.7655114433252582, "grad_norm": 0.19326740503311157, "learning_rate": 0.002, "loss": 2.5565, "step": 384250 }, { "epoch": 0.7655313655488971, "grad_norm": 0.16285327076911926, "learning_rate": 0.002, "loss": 2.5576, "step": 384260 }, { "epoch": 0.765551287772536, "grad_norm": 0.14276497066020966, "learning_rate": 0.002, "loss": 2.5537, "step": 384270 }, { "epoch": 0.7655712099961749, "grad_norm": 0.17398175597190857, "learning_rate": 0.002, "loss": 2.562, "step": 384280 }, { "epoch": 0.7655911322198139, "grad_norm": 0.1431802213191986, "learning_rate": 0.002, "loss": 2.5534, "step": 384290 }, { "epoch": 0.7656110544434528, "grad_norm": 0.15854111313819885, "learning_rate": 0.002, "loss": 2.5413, "step": 384300 }, { "epoch": 0.7656309766670917, "grad_norm": 0.16066581010818481, "learning_rate": 0.002, "loss": 2.5619, "step": 384310 }, { "epoch": 0.7656508988907306, "grad_norm": 0.1739177107810974, "learning_rate": 0.002, "loss": 2.5604, "step": 384320 }, { "epoch": 0.7656708211143695, "grad_norm": 0.15345945954322815, "learning_rate": 0.002, "loss": 2.532, "step": 384330 }, { "epoch": 0.7656907433380085, "grad_norm": 0.17144010961055756, "learning_rate": 0.002, "loss": 2.5524, "step": 384340 }, { "epoch": 0.7657106655616474, "grad_norm": 0.18019920587539673, "learning_rate": 0.002, "loss": 2.5455, "step": 384350 }, { "epoch": 0.7657305877852862, "grad_norm": 0.14906004071235657, "learning_rate": 0.002, "loss": 2.5649, "step": 384360 }, { "epoch": 0.7657505100089251, "grad_norm": 0.20293165743350983, "learning_rate": 0.002, "loss": 2.5609, "step": 384370 }, { "epoch": 0.765770432232564, "grad_norm": 0.14892840385437012, "learning_rate": 0.002, "loss": 2.5569, "step": 384380 }, { "epoch": 0.765790354456203, "grad_norm": 0.16818086802959442, "learning_rate": 0.002, "loss": 2.5515, "step": 384390 }, { "epoch": 0.7658102766798419, "grad_norm": 0.14146171510219574, "learning_rate": 0.002, "loss": 2.5494, "step": 384400 }, { "epoch": 0.7658301989034808, "grad_norm": 0.21127483248710632, "learning_rate": 0.002, "loss": 2.5538, "step": 384410 }, { "epoch": 0.7658501211271197, "grad_norm": 0.167976975440979, "learning_rate": 0.002, "loss": 2.5792, "step": 384420 }, { "epoch": 0.7658700433507586, "grad_norm": 0.17768439650535583, "learning_rate": 0.002, "loss": 2.5531, "step": 384430 }, { "epoch": 0.7658899655743976, "grad_norm": 0.13221603631973267, "learning_rate": 0.002, "loss": 2.5317, "step": 384440 }, { "epoch": 0.7659098877980365, "grad_norm": 0.15343791246414185, "learning_rate": 0.002, "loss": 2.5466, "step": 384450 }, { "epoch": 0.7659298100216754, "grad_norm": 0.1492394655942917, "learning_rate": 0.002, "loss": 2.5533, "step": 384460 }, { "epoch": 0.7659497322453143, "grad_norm": 0.19401510059833527, "learning_rate": 0.002, "loss": 2.5587, "step": 384470 }, { "epoch": 0.7659696544689532, "grad_norm": 0.15807372331619263, "learning_rate": 0.002, "loss": 2.5657, "step": 384480 }, { "epoch": 0.7659895766925922, "grad_norm": 0.1536630541086197, "learning_rate": 0.002, "loss": 2.5707, "step": 384490 }, { "epoch": 0.766009498916231, "grad_norm": 0.17082872986793518, "learning_rate": 0.002, "loss": 2.5602, "step": 384500 }, { "epoch": 0.7660294211398699, "grad_norm": 0.14709068834781647, "learning_rate": 0.002, "loss": 2.5476, "step": 384510 }, { "epoch": 0.7660493433635088, "grad_norm": 0.1941942274570465, "learning_rate": 0.002, "loss": 2.5659, "step": 384520 }, { "epoch": 0.7660692655871478, "grad_norm": 0.1571694165468216, "learning_rate": 0.002, "loss": 2.5457, "step": 384530 }, { "epoch": 0.7660891878107867, "grad_norm": 0.17893467843532562, "learning_rate": 0.002, "loss": 2.5668, "step": 384540 }, { "epoch": 0.7661091100344256, "grad_norm": 0.17933930456638336, "learning_rate": 0.002, "loss": 2.56, "step": 384550 }, { "epoch": 0.7661290322580645, "grad_norm": 0.16767065227031708, "learning_rate": 0.002, "loss": 2.542, "step": 384560 }, { "epoch": 0.7661489544817034, "grad_norm": 0.1485760509967804, "learning_rate": 0.002, "loss": 2.5673, "step": 384570 }, { "epoch": 0.7661688767053424, "grad_norm": 0.1923641413450241, "learning_rate": 0.002, "loss": 2.561, "step": 384580 }, { "epoch": 0.7661887989289813, "grad_norm": 0.14487113058567047, "learning_rate": 0.002, "loss": 2.5572, "step": 384590 }, { "epoch": 0.7662087211526202, "grad_norm": 0.1663231998682022, "learning_rate": 0.002, "loss": 2.5511, "step": 384600 }, { "epoch": 0.7662286433762591, "grad_norm": 0.15048032999038696, "learning_rate": 0.002, "loss": 2.5659, "step": 384610 }, { "epoch": 0.766248565599898, "grad_norm": 0.1398782730102539, "learning_rate": 0.002, "loss": 2.5577, "step": 384620 }, { "epoch": 0.766268487823537, "grad_norm": 0.1758858561515808, "learning_rate": 0.002, "loss": 2.5646, "step": 384630 }, { "epoch": 0.7662884100471758, "grad_norm": 0.1465308964252472, "learning_rate": 0.002, "loss": 2.5534, "step": 384640 }, { "epoch": 0.7663083322708147, "grad_norm": 0.14019906520843506, "learning_rate": 0.002, "loss": 2.5537, "step": 384650 }, { "epoch": 0.7663282544944536, "grad_norm": 0.15701864659786224, "learning_rate": 0.002, "loss": 2.5419, "step": 384660 }, { "epoch": 0.7663481767180925, "grad_norm": 0.18521372973918915, "learning_rate": 0.002, "loss": 2.5521, "step": 384670 }, { "epoch": 0.7663680989417315, "grad_norm": 0.16428810358047485, "learning_rate": 0.002, "loss": 2.5508, "step": 384680 }, { "epoch": 0.7663880211653704, "grad_norm": 0.15900877118110657, "learning_rate": 0.002, "loss": 2.5501, "step": 384690 }, { "epoch": 0.7664079433890093, "grad_norm": 0.15394830703735352, "learning_rate": 0.002, "loss": 2.5465, "step": 384700 }, { "epoch": 0.7664278656126482, "grad_norm": 0.14526543021202087, "learning_rate": 0.002, "loss": 2.5497, "step": 384710 }, { "epoch": 0.7664477878362871, "grad_norm": 0.16346047818660736, "learning_rate": 0.002, "loss": 2.5426, "step": 384720 }, { "epoch": 0.7664677100599261, "grad_norm": 0.17225825786590576, "learning_rate": 0.002, "loss": 2.5488, "step": 384730 }, { "epoch": 0.766487632283565, "grad_norm": 0.15095645189285278, "learning_rate": 0.002, "loss": 2.5634, "step": 384740 }, { "epoch": 0.7665075545072039, "grad_norm": 0.151543527841568, "learning_rate": 0.002, "loss": 2.5565, "step": 384750 }, { "epoch": 0.7665274767308428, "grad_norm": 0.16118033230304718, "learning_rate": 0.002, "loss": 2.5505, "step": 384760 }, { "epoch": 0.7665473989544817, "grad_norm": 0.14705288410186768, "learning_rate": 0.002, "loss": 2.5757, "step": 384770 }, { "epoch": 0.7665673211781207, "grad_norm": 0.17277535796165466, "learning_rate": 0.002, "loss": 2.5512, "step": 384780 }, { "epoch": 0.7665872434017595, "grad_norm": 0.16242314875125885, "learning_rate": 0.002, "loss": 2.5547, "step": 384790 }, { "epoch": 0.7666071656253984, "grad_norm": 0.14909477531909943, "learning_rate": 0.002, "loss": 2.559, "step": 384800 }, { "epoch": 0.7666270878490373, "grad_norm": 0.16922248899936676, "learning_rate": 0.002, "loss": 2.5573, "step": 384810 }, { "epoch": 0.7666470100726763, "grad_norm": 0.18210013210773468, "learning_rate": 0.002, "loss": 2.5544, "step": 384820 }, { "epoch": 0.7666669322963152, "grad_norm": 0.1479005664587021, "learning_rate": 0.002, "loss": 2.5575, "step": 384830 }, { "epoch": 0.7666868545199541, "grad_norm": 0.17176514863967896, "learning_rate": 0.002, "loss": 2.5424, "step": 384840 }, { "epoch": 0.766706776743593, "grad_norm": 0.13677489757537842, "learning_rate": 0.002, "loss": 2.5517, "step": 384850 }, { "epoch": 0.7667266989672319, "grad_norm": 0.2046133577823639, "learning_rate": 0.002, "loss": 2.5641, "step": 384860 }, { "epoch": 0.7667466211908709, "grad_norm": 0.16163505613803864, "learning_rate": 0.002, "loss": 2.5514, "step": 384870 }, { "epoch": 0.7667665434145098, "grad_norm": 0.165251687169075, "learning_rate": 0.002, "loss": 2.5494, "step": 384880 }, { "epoch": 0.7667864656381487, "grad_norm": 0.15535040199756622, "learning_rate": 0.002, "loss": 2.5495, "step": 384890 }, { "epoch": 0.7668063878617876, "grad_norm": 0.149103045463562, "learning_rate": 0.002, "loss": 2.5577, "step": 384900 }, { "epoch": 0.7668263100854265, "grad_norm": 0.20431052148342133, "learning_rate": 0.002, "loss": 2.557, "step": 384910 }, { "epoch": 0.7668462323090655, "grad_norm": 0.14487384259700775, "learning_rate": 0.002, "loss": 2.5366, "step": 384920 }, { "epoch": 0.7668661545327043, "grad_norm": 0.17026473581790924, "learning_rate": 0.002, "loss": 2.5455, "step": 384930 }, { "epoch": 0.7668860767563432, "grad_norm": 0.15508751571178436, "learning_rate": 0.002, "loss": 2.5569, "step": 384940 }, { "epoch": 0.7669059989799821, "grad_norm": 0.14589840173721313, "learning_rate": 0.002, "loss": 2.5522, "step": 384950 }, { "epoch": 0.766925921203621, "grad_norm": 0.1575625240802765, "learning_rate": 0.002, "loss": 2.5531, "step": 384960 }, { "epoch": 0.76694584342726, "grad_norm": 0.14626632630825043, "learning_rate": 0.002, "loss": 2.5371, "step": 384970 }, { "epoch": 0.7669657656508989, "grad_norm": 0.17483581602573395, "learning_rate": 0.002, "loss": 2.5543, "step": 384980 }, { "epoch": 0.7669856878745378, "grad_norm": 0.15742461383342743, "learning_rate": 0.002, "loss": 2.5706, "step": 384990 }, { "epoch": 0.7670056100981767, "grad_norm": 0.15252649784088135, "learning_rate": 0.002, "loss": 2.5551, "step": 385000 }, { "epoch": 0.7670255323218156, "grad_norm": 0.17912957072257996, "learning_rate": 0.002, "loss": 2.5737, "step": 385010 }, { "epoch": 0.7670454545454546, "grad_norm": 0.16253912448883057, "learning_rate": 0.002, "loss": 2.5519, "step": 385020 }, { "epoch": 0.7670653767690935, "grad_norm": 0.1614065170288086, "learning_rate": 0.002, "loss": 2.5466, "step": 385030 }, { "epoch": 0.7670852989927324, "grad_norm": 0.1807451844215393, "learning_rate": 0.002, "loss": 2.5553, "step": 385040 }, { "epoch": 0.7671052212163713, "grad_norm": 0.19695083796977997, "learning_rate": 0.002, "loss": 2.5739, "step": 385050 }, { "epoch": 0.7671251434400101, "grad_norm": 0.180726557970047, "learning_rate": 0.002, "loss": 2.5622, "step": 385060 }, { "epoch": 0.7671450656636492, "grad_norm": 0.1706138402223587, "learning_rate": 0.002, "loss": 2.5583, "step": 385070 }, { "epoch": 0.767164987887288, "grad_norm": 0.1572166383266449, "learning_rate": 0.002, "loss": 2.5503, "step": 385080 }, { "epoch": 0.7671849101109269, "grad_norm": 0.19468727707862854, "learning_rate": 0.002, "loss": 2.5409, "step": 385090 }, { "epoch": 0.7672048323345658, "grad_norm": 0.17415392398834229, "learning_rate": 0.002, "loss": 2.555, "step": 385100 }, { "epoch": 0.7672247545582047, "grad_norm": 0.148086816072464, "learning_rate": 0.002, "loss": 2.5382, "step": 385110 }, { "epoch": 0.7672446767818437, "grad_norm": 0.1629854142665863, "learning_rate": 0.002, "loss": 2.5507, "step": 385120 }, { "epoch": 0.7672645990054826, "grad_norm": 0.15216021239757538, "learning_rate": 0.002, "loss": 2.5756, "step": 385130 }, { "epoch": 0.7672845212291215, "grad_norm": 0.15826058387756348, "learning_rate": 0.002, "loss": 2.5517, "step": 385140 }, { "epoch": 0.7673044434527604, "grad_norm": 0.15220394730567932, "learning_rate": 0.002, "loss": 2.5565, "step": 385150 }, { "epoch": 0.7673243656763994, "grad_norm": 0.16064265370368958, "learning_rate": 0.002, "loss": 2.5444, "step": 385160 }, { "epoch": 0.7673442879000383, "grad_norm": 0.1461126059293747, "learning_rate": 0.002, "loss": 2.5495, "step": 385170 }, { "epoch": 0.7673642101236772, "grad_norm": 0.18028850853443146, "learning_rate": 0.002, "loss": 2.5571, "step": 385180 }, { "epoch": 0.7673841323473161, "grad_norm": 0.1458163857460022, "learning_rate": 0.002, "loss": 2.5606, "step": 385190 }, { "epoch": 0.767404054570955, "grad_norm": 0.1481180340051651, "learning_rate": 0.002, "loss": 2.5485, "step": 385200 }, { "epoch": 0.767423976794594, "grad_norm": 0.19719648361206055, "learning_rate": 0.002, "loss": 2.5634, "step": 385210 }, { "epoch": 0.7674438990182328, "grad_norm": 0.16785582900047302, "learning_rate": 0.002, "loss": 2.555, "step": 385220 }, { "epoch": 0.7674638212418717, "grad_norm": 0.19816415011882782, "learning_rate": 0.002, "loss": 2.5394, "step": 385230 }, { "epoch": 0.7674837434655106, "grad_norm": 0.1875314712524414, "learning_rate": 0.002, "loss": 2.5309, "step": 385240 }, { "epoch": 0.7675036656891495, "grad_norm": 0.1823383867740631, "learning_rate": 0.002, "loss": 2.5509, "step": 385250 }, { "epoch": 0.7675235879127885, "grad_norm": 0.15061430633068085, "learning_rate": 0.002, "loss": 2.5527, "step": 385260 }, { "epoch": 0.7675435101364274, "grad_norm": 0.18858632445335388, "learning_rate": 0.002, "loss": 2.551, "step": 385270 }, { "epoch": 0.7675634323600663, "grad_norm": 0.16563235223293304, "learning_rate": 0.002, "loss": 2.5454, "step": 385280 }, { "epoch": 0.7675833545837052, "grad_norm": 0.1383451372385025, "learning_rate": 0.002, "loss": 2.5558, "step": 385290 }, { "epoch": 0.7676032768073441, "grad_norm": 0.1567487120628357, "learning_rate": 0.002, "loss": 2.5471, "step": 385300 }, { "epoch": 0.7676231990309831, "grad_norm": 0.16073809564113617, "learning_rate": 0.002, "loss": 2.5513, "step": 385310 }, { "epoch": 0.767643121254622, "grad_norm": 0.14870499074459076, "learning_rate": 0.002, "loss": 2.5428, "step": 385320 }, { "epoch": 0.7676630434782609, "grad_norm": 0.24314860999584198, "learning_rate": 0.002, "loss": 2.5469, "step": 385330 }, { "epoch": 0.7676829657018998, "grad_norm": 0.16179916262626648, "learning_rate": 0.002, "loss": 2.5523, "step": 385340 }, { "epoch": 0.7677028879255386, "grad_norm": 0.13319438695907593, "learning_rate": 0.002, "loss": 2.5573, "step": 385350 }, { "epoch": 0.7677228101491776, "grad_norm": 0.20245151221752167, "learning_rate": 0.002, "loss": 2.5443, "step": 385360 }, { "epoch": 0.7677427323728165, "grad_norm": 0.16478829085826874, "learning_rate": 0.002, "loss": 2.5511, "step": 385370 }, { "epoch": 0.7677626545964554, "grad_norm": 0.14356079697608948, "learning_rate": 0.002, "loss": 2.5543, "step": 385380 }, { "epoch": 0.7677825768200943, "grad_norm": 0.16520573198795319, "learning_rate": 0.002, "loss": 2.5643, "step": 385390 }, { "epoch": 0.7678024990437332, "grad_norm": 0.15630879998207092, "learning_rate": 0.002, "loss": 2.5594, "step": 385400 }, { "epoch": 0.7678224212673722, "grad_norm": 0.165143221616745, "learning_rate": 0.002, "loss": 2.5519, "step": 385410 }, { "epoch": 0.7678423434910111, "grad_norm": 0.21200108528137207, "learning_rate": 0.002, "loss": 2.559, "step": 385420 }, { "epoch": 0.76786226571465, "grad_norm": 0.1455334573984146, "learning_rate": 0.002, "loss": 2.5602, "step": 385430 }, { "epoch": 0.7678821879382889, "grad_norm": 0.2117370367050171, "learning_rate": 0.002, "loss": 2.5643, "step": 385440 }, { "epoch": 0.7679021101619279, "grad_norm": 0.16474927961826324, "learning_rate": 0.002, "loss": 2.551, "step": 385450 }, { "epoch": 0.7679220323855668, "grad_norm": 0.17901623249053955, "learning_rate": 0.002, "loss": 2.5581, "step": 385460 }, { "epoch": 0.7679419546092057, "grad_norm": 0.15625621378421783, "learning_rate": 0.002, "loss": 2.5527, "step": 385470 }, { "epoch": 0.7679618768328446, "grad_norm": 0.14090223610401154, "learning_rate": 0.002, "loss": 2.5599, "step": 385480 }, { "epoch": 0.7679817990564834, "grad_norm": 0.26650872826576233, "learning_rate": 0.002, "loss": 2.5587, "step": 385490 }, { "epoch": 0.7680017212801225, "grad_norm": 0.16176986694335938, "learning_rate": 0.002, "loss": 2.5458, "step": 385500 }, { "epoch": 0.7680216435037613, "grad_norm": 0.187707781791687, "learning_rate": 0.002, "loss": 2.5429, "step": 385510 }, { "epoch": 0.7680415657274002, "grad_norm": 0.14574982225894928, "learning_rate": 0.002, "loss": 2.5514, "step": 385520 }, { "epoch": 0.7680614879510391, "grad_norm": 0.15926994383335114, "learning_rate": 0.002, "loss": 2.5532, "step": 385530 }, { "epoch": 0.768081410174678, "grad_norm": 0.17430976033210754, "learning_rate": 0.002, "loss": 2.545, "step": 385540 }, { "epoch": 0.768101332398317, "grad_norm": 0.16120408475399017, "learning_rate": 0.002, "loss": 2.5501, "step": 385550 }, { "epoch": 0.7681212546219559, "grad_norm": 0.1698564738035202, "learning_rate": 0.002, "loss": 2.5396, "step": 385560 }, { "epoch": 0.7681411768455948, "grad_norm": 0.15849314630031586, "learning_rate": 0.002, "loss": 2.542, "step": 385570 }, { "epoch": 0.7681610990692337, "grad_norm": 0.16718034446239471, "learning_rate": 0.002, "loss": 2.5628, "step": 385580 }, { "epoch": 0.7681810212928726, "grad_norm": 0.16872341930866241, "learning_rate": 0.002, "loss": 2.5517, "step": 385590 }, { "epoch": 0.7682009435165116, "grad_norm": 0.17814235389232635, "learning_rate": 0.002, "loss": 2.5736, "step": 385600 }, { "epoch": 0.7682208657401505, "grad_norm": 0.1645747572183609, "learning_rate": 0.002, "loss": 2.5479, "step": 385610 }, { "epoch": 0.7682407879637894, "grad_norm": 0.15560513734817505, "learning_rate": 0.002, "loss": 2.5595, "step": 385620 }, { "epoch": 0.7682607101874283, "grad_norm": 0.16275149583816528, "learning_rate": 0.002, "loss": 2.5366, "step": 385630 }, { "epoch": 0.7682806324110671, "grad_norm": 0.15608148276805878, "learning_rate": 0.002, "loss": 2.5723, "step": 385640 }, { "epoch": 0.7683005546347061, "grad_norm": 0.14756150543689728, "learning_rate": 0.002, "loss": 2.552, "step": 385650 }, { "epoch": 0.768320476858345, "grad_norm": 0.15456199645996094, "learning_rate": 0.002, "loss": 2.5484, "step": 385660 }, { "epoch": 0.7683403990819839, "grad_norm": 0.17636847496032715, "learning_rate": 0.002, "loss": 2.5644, "step": 385670 }, { "epoch": 0.7683603213056228, "grad_norm": 0.15908007323741913, "learning_rate": 0.002, "loss": 2.5666, "step": 385680 }, { "epoch": 0.7683802435292617, "grad_norm": 0.16550230979919434, "learning_rate": 0.002, "loss": 2.5496, "step": 385690 }, { "epoch": 0.7684001657529007, "grad_norm": 0.1369214653968811, "learning_rate": 0.002, "loss": 2.5653, "step": 385700 }, { "epoch": 0.7684200879765396, "grad_norm": 0.18419238924980164, "learning_rate": 0.002, "loss": 2.5399, "step": 385710 }, { "epoch": 0.7684400102001785, "grad_norm": 0.17669890820980072, "learning_rate": 0.002, "loss": 2.5606, "step": 385720 }, { "epoch": 0.7684599324238174, "grad_norm": 0.15160009264945984, "learning_rate": 0.002, "loss": 2.5528, "step": 385730 }, { "epoch": 0.7684798546474564, "grad_norm": 0.17559431493282318, "learning_rate": 0.002, "loss": 2.5522, "step": 385740 }, { "epoch": 0.7684997768710953, "grad_norm": 0.15052513778209686, "learning_rate": 0.002, "loss": 2.5597, "step": 385750 }, { "epoch": 0.7685196990947342, "grad_norm": 0.16576920449733734, "learning_rate": 0.002, "loss": 2.5525, "step": 385760 }, { "epoch": 0.7685396213183731, "grad_norm": 0.16942834854125977, "learning_rate": 0.002, "loss": 2.5567, "step": 385770 }, { "epoch": 0.768559543542012, "grad_norm": 0.17684559524059296, "learning_rate": 0.002, "loss": 2.5538, "step": 385780 }, { "epoch": 0.768579465765651, "grad_norm": 0.16416384279727936, "learning_rate": 0.002, "loss": 2.5582, "step": 385790 }, { "epoch": 0.7685993879892898, "grad_norm": 0.1780635565519333, "learning_rate": 0.002, "loss": 2.5602, "step": 385800 }, { "epoch": 0.7686193102129287, "grad_norm": 0.1508522629737854, "learning_rate": 0.002, "loss": 2.5523, "step": 385810 }, { "epoch": 0.7686392324365676, "grad_norm": 0.15301215648651123, "learning_rate": 0.002, "loss": 2.5502, "step": 385820 }, { "epoch": 0.7686591546602065, "grad_norm": 0.16834650933742523, "learning_rate": 0.002, "loss": 2.5684, "step": 385830 }, { "epoch": 0.7686790768838455, "grad_norm": 0.13365596532821655, "learning_rate": 0.002, "loss": 2.5562, "step": 385840 }, { "epoch": 0.7686989991074844, "grad_norm": 0.1980174034833908, "learning_rate": 0.002, "loss": 2.5582, "step": 385850 }, { "epoch": 0.7687189213311233, "grad_norm": 0.17781953513622284, "learning_rate": 0.002, "loss": 2.5643, "step": 385860 }, { "epoch": 0.7687388435547622, "grad_norm": 0.16174522042274475, "learning_rate": 0.002, "loss": 2.5558, "step": 385870 }, { "epoch": 0.7687587657784011, "grad_norm": 0.15085160732269287, "learning_rate": 0.002, "loss": 2.5335, "step": 385880 }, { "epoch": 0.7687786880020401, "grad_norm": 0.13989734649658203, "learning_rate": 0.002, "loss": 2.5426, "step": 385890 }, { "epoch": 0.768798610225679, "grad_norm": 0.18708549439907074, "learning_rate": 0.002, "loss": 2.5535, "step": 385900 }, { "epoch": 0.7688185324493179, "grad_norm": 0.1472209393978119, "learning_rate": 0.002, "loss": 2.5512, "step": 385910 }, { "epoch": 0.7688384546729568, "grad_norm": 0.1709863692522049, "learning_rate": 0.002, "loss": 2.5433, "step": 385920 }, { "epoch": 0.7688583768965956, "grad_norm": 0.1519746631383896, "learning_rate": 0.002, "loss": 2.5679, "step": 385930 }, { "epoch": 0.7688782991202346, "grad_norm": 0.17843212187290192, "learning_rate": 0.002, "loss": 2.5413, "step": 385940 }, { "epoch": 0.7688982213438735, "grad_norm": 0.16309742629528046, "learning_rate": 0.002, "loss": 2.5634, "step": 385950 }, { "epoch": 0.7689181435675124, "grad_norm": 0.17199277877807617, "learning_rate": 0.002, "loss": 2.5396, "step": 385960 }, { "epoch": 0.7689380657911513, "grad_norm": 0.17406943440437317, "learning_rate": 0.002, "loss": 2.5527, "step": 385970 }, { "epoch": 0.7689579880147902, "grad_norm": 0.1421719789505005, "learning_rate": 0.002, "loss": 2.5519, "step": 385980 }, { "epoch": 0.7689779102384292, "grad_norm": 0.16242492198944092, "learning_rate": 0.002, "loss": 2.5586, "step": 385990 }, { "epoch": 0.7689978324620681, "grad_norm": 0.12306774407625198, "learning_rate": 0.002, "loss": 2.56, "step": 386000 }, { "epoch": 0.769017754685707, "grad_norm": 0.13375036418437958, "learning_rate": 0.002, "loss": 2.5565, "step": 386010 }, { "epoch": 0.7690376769093459, "grad_norm": 0.18409320712089539, "learning_rate": 0.002, "loss": 2.5552, "step": 386020 }, { "epoch": 0.7690575991329849, "grad_norm": 0.15452446043491364, "learning_rate": 0.002, "loss": 2.5416, "step": 386030 }, { "epoch": 0.7690775213566238, "grad_norm": 0.1535695344209671, "learning_rate": 0.002, "loss": 2.5388, "step": 386040 }, { "epoch": 0.7690974435802627, "grad_norm": 0.15992538630962372, "learning_rate": 0.002, "loss": 2.5689, "step": 386050 }, { "epoch": 0.7691173658039016, "grad_norm": 0.16817225515842438, "learning_rate": 0.002, "loss": 2.5582, "step": 386060 }, { "epoch": 0.7691372880275404, "grad_norm": 0.14491212368011475, "learning_rate": 0.002, "loss": 2.5567, "step": 386070 }, { "epoch": 0.7691572102511794, "grad_norm": 0.14226070046424866, "learning_rate": 0.002, "loss": 2.5679, "step": 386080 }, { "epoch": 0.7691771324748183, "grad_norm": 0.17641431093215942, "learning_rate": 0.002, "loss": 2.5507, "step": 386090 }, { "epoch": 0.7691970546984572, "grad_norm": 0.1530170887708664, "learning_rate": 0.002, "loss": 2.5521, "step": 386100 }, { "epoch": 0.7692169769220961, "grad_norm": 0.1540059596300125, "learning_rate": 0.002, "loss": 2.5656, "step": 386110 }, { "epoch": 0.769236899145735, "grad_norm": 0.16983848810195923, "learning_rate": 0.002, "loss": 2.5483, "step": 386120 }, { "epoch": 0.769256821369374, "grad_norm": 0.16223806142807007, "learning_rate": 0.002, "loss": 2.5459, "step": 386130 }, { "epoch": 0.7692767435930129, "grad_norm": 0.17423461377620697, "learning_rate": 0.002, "loss": 2.5411, "step": 386140 }, { "epoch": 0.7692966658166518, "grad_norm": 0.15696310997009277, "learning_rate": 0.002, "loss": 2.5526, "step": 386150 }, { "epoch": 0.7693165880402907, "grad_norm": 0.17924286425113678, "learning_rate": 0.002, "loss": 2.5502, "step": 386160 }, { "epoch": 0.7693365102639296, "grad_norm": 0.1680767983198166, "learning_rate": 0.002, "loss": 2.5454, "step": 386170 }, { "epoch": 0.7693564324875686, "grad_norm": 0.15603430569171906, "learning_rate": 0.002, "loss": 2.5516, "step": 386180 }, { "epoch": 0.7693763547112075, "grad_norm": 0.15661033987998962, "learning_rate": 0.002, "loss": 2.5414, "step": 386190 }, { "epoch": 0.7693962769348464, "grad_norm": 0.14187201857566833, "learning_rate": 0.002, "loss": 2.5389, "step": 386200 }, { "epoch": 0.7694161991584852, "grad_norm": 0.1549515724182129, "learning_rate": 0.002, "loss": 2.5457, "step": 386210 }, { "epoch": 0.7694361213821241, "grad_norm": 0.14786957204341888, "learning_rate": 0.002, "loss": 2.558, "step": 386220 }, { "epoch": 0.7694560436057631, "grad_norm": 0.2030339539051056, "learning_rate": 0.002, "loss": 2.5643, "step": 386230 }, { "epoch": 0.769475965829402, "grad_norm": 0.20877359807491302, "learning_rate": 0.002, "loss": 2.5653, "step": 386240 }, { "epoch": 0.7694958880530409, "grad_norm": 0.14564643800258636, "learning_rate": 0.002, "loss": 2.5488, "step": 386250 }, { "epoch": 0.7695158102766798, "grad_norm": 0.15058676898479462, "learning_rate": 0.002, "loss": 2.544, "step": 386260 }, { "epoch": 0.7695357325003187, "grad_norm": 0.1684746891260147, "learning_rate": 0.002, "loss": 2.5594, "step": 386270 }, { "epoch": 0.7695556547239577, "grad_norm": 0.1576535403728485, "learning_rate": 0.002, "loss": 2.5537, "step": 386280 }, { "epoch": 0.7695755769475966, "grad_norm": 0.16768598556518555, "learning_rate": 0.002, "loss": 2.5466, "step": 386290 }, { "epoch": 0.7695954991712355, "grad_norm": 0.21732306480407715, "learning_rate": 0.002, "loss": 2.558, "step": 386300 }, { "epoch": 0.7696154213948744, "grad_norm": 0.150776669383049, "learning_rate": 0.002, "loss": 2.5502, "step": 386310 }, { "epoch": 0.7696353436185134, "grad_norm": 0.1663365364074707, "learning_rate": 0.002, "loss": 2.5515, "step": 386320 }, { "epoch": 0.7696552658421523, "grad_norm": 0.16757382452487946, "learning_rate": 0.002, "loss": 2.5573, "step": 386330 }, { "epoch": 0.7696751880657912, "grad_norm": 0.1728920042514801, "learning_rate": 0.002, "loss": 2.5564, "step": 386340 }, { "epoch": 0.76969511028943, "grad_norm": 0.12693563103675842, "learning_rate": 0.002, "loss": 2.5562, "step": 386350 }, { "epoch": 0.7697150325130689, "grad_norm": 0.14323389530181885, "learning_rate": 0.002, "loss": 2.5448, "step": 386360 }, { "epoch": 0.7697349547367079, "grad_norm": 0.2131558507680893, "learning_rate": 0.002, "loss": 2.5532, "step": 386370 }, { "epoch": 0.7697548769603468, "grad_norm": 0.17902371287345886, "learning_rate": 0.002, "loss": 2.5624, "step": 386380 }, { "epoch": 0.7697747991839857, "grad_norm": 0.1718461662530899, "learning_rate": 0.002, "loss": 2.5534, "step": 386390 }, { "epoch": 0.7697947214076246, "grad_norm": 0.17377643287181854, "learning_rate": 0.002, "loss": 2.5705, "step": 386400 }, { "epoch": 0.7698146436312635, "grad_norm": 0.14854726195335388, "learning_rate": 0.002, "loss": 2.5468, "step": 386410 }, { "epoch": 0.7698345658549025, "grad_norm": 0.16987821459770203, "learning_rate": 0.002, "loss": 2.5576, "step": 386420 }, { "epoch": 0.7698544880785414, "grad_norm": 0.2004273533821106, "learning_rate": 0.002, "loss": 2.5293, "step": 386430 }, { "epoch": 0.7698744103021803, "grad_norm": 0.1389647126197815, "learning_rate": 0.002, "loss": 2.5491, "step": 386440 }, { "epoch": 0.7698943325258192, "grad_norm": 0.1490211933851242, "learning_rate": 0.002, "loss": 2.5523, "step": 386450 }, { "epoch": 0.7699142547494581, "grad_norm": 0.14879216253757477, "learning_rate": 0.002, "loss": 2.5473, "step": 386460 }, { "epoch": 0.7699341769730971, "grad_norm": 0.18083469569683075, "learning_rate": 0.002, "loss": 2.5353, "step": 386470 }, { "epoch": 0.769954099196736, "grad_norm": 0.19811920821666718, "learning_rate": 0.002, "loss": 2.5575, "step": 386480 }, { "epoch": 0.7699740214203749, "grad_norm": 0.16944962739944458, "learning_rate": 0.002, "loss": 2.5498, "step": 386490 }, { "epoch": 0.7699939436440137, "grad_norm": 0.16092832386493683, "learning_rate": 0.002, "loss": 2.5551, "step": 386500 }, { "epoch": 0.7700138658676526, "grad_norm": 0.1757969856262207, "learning_rate": 0.002, "loss": 2.5588, "step": 386510 }, { "epoch": 0.7700337880912916, "grad_norm": 0.14845995604991913, "learning_rate": 0.002, "loss": 2.5581, "step": 386520 }, { "epoch": 0.7700537103149305, "grad_norm": 0.1580626219511032, "learning_rate": 0.002, "loss": 2.5517, "step": 386530 }, { "epoch": 0.7700736325385694, "grad_norm": 0.14942839741706848, "learning_rate": 0.002, "loss": 2.5399, "step": 386540 }, { "epoch": 0.7700935547622083, "grad_norm": 0.16792401671409607, "learning_rate": 0.002, "loss": 2.5521, "step": 386550 }, { "epoch": 0.7701134769858472, "grad_norm": 0.1419026255607605, "learning_rate": 0.002, "loss": 2.5573, "step": 386560 }, { "epoch": 0.7701333992094862, "grad_norm": 0.18349990248680115, "learning_rate": 0.002, "loss": 2.548, "step": 386570 }, { "epoch": 0.7701533214331251, "grad_norm": 0.1572282612323761, "learning_rate": 0.002, "loss": 2.5402, "step": 386580 }, { "epoch": 0.770173243656764, "grad_norm": 0.13943296670913696, "learning_rate": 0.002, "loss": 2.5497, "step": 386590 }, { "epoch": 0.7701931658804029, "grad_norm": 0.15064845979213715, "learning_rate": 0.002, "loss": 2.5493, "step": 386600 }, { "epoch": 0.7702130881040418, "grad_norm": 0.1786559373140335, "learning_rate": 0.002, "loss": 2.5491, "step": 386610 }, { "epoch": 0.7702330103276808, "grad_norm": 0.1794300079345703, "learning_rate": 0.002, "loss": 2.5462, "step": 386620 }, { "epoch": 0.7702529325513197, "grad_norm": 0.14626628160476685, "learning_rate": 0.002, "loss": 2.5488, "step": 386630 }, { "epoch": 0.7702728547749585, "grad_norm": 0.1504722684621811, "learning_rate": 0.002, "loss": 2.5543, "step": 386640 }, { "epoch": 0.7702927769985974, "grad_norm": 0.16224870085716248, "learning_rate": 0.002, "loss": 2.5343, "step": 386650 }, { "epoch": 0.7703126992222364, "grad_norm": 0.17713582515716553, "learning_rate": 0.002, "loss": 2.5653, "step": 386660 }, { "epoch": 0.7703326214458753, "grad_norm": 0.15393736958503723, "learning_rate": 0.002, "loss": 2.5407, "step": 386670 }, { "epoch": 0.7703525436695142, "grad_norm": 0.14847035706043243, "learning_rate": 0.002, "loss": 2.5483, "step": 386680 }, { "epoch": 0.7703724658931531, "grad_norm": 0.14966194331645966, "learning_rate": 0.002, "loss": 2.5602, "step": 386690 }, { "epoch": 0.770392388116792, "grad_norm": 0.14192429184913635, "learning_rate": 0.002, "loss": 2.544, "step": 386700 }, { "epoch": 0.770412310340431, "grad_norm": 0.13392066955566406, "learning_rate": 0.002, "loss": 2.5401, "step": 386710 }, { "epoch": 0.7704322325640699, "grad_norm": 0.19088482856750488, "learning_rate": 0.002, "loss": 2.5582, "step": 386720 }, { "epoch": 0.7704521547877088, "grad_norm": 0.13757196068763733, "learning_rate": 0.002, "loss": 2.5443, "step": 386730 }, { "epoch": 0.7704720770113477, "grad_norm": 0.1667586863040924, "learning_rate": 0.002, "loss": 2.561, "step": 386740 }, { "epoch": 0.7704919992349866, "grad_norm": 0.15651097893714905, "learning_rate": 0.002, "loss": 2.5461, "step": 386750 }, { "epoch": 0.7705119214586256, "grad_norm": 0.3044902980327606, "learning_rate": 0.002, "loss": 2.5478, "step": 386760 }, { "epoch": 0.7705318436822645, "grad_norm": 0.17618079483509064, "learning_rate": 0.002, "loss": 2.5631, "step": 386770 }, { "epoch": 0.7705517659059034, "grad_norm": 0.13097678124904633, "learning_rate": 0.002, "loss": 2.5433, "step": 386780 }, { "epoch": 0.7705716881295422, "grad_norm": 0.15583153069019318, "learning_rate": 0.002, "loss": 2.5387, "step": 386790 }, { "epoch": 0.7705916103531811, "grad_norm": 0.14331531524658203, "learning_rate": 0.002, "loss": 2.5542, "step": 386800 }, { "epoch": 0.7706115325768201, "grad_norm": 0.3746321499347687, "learning_rate": 0.002, "loss": 2.5481, "step": 386810 }, { "epoch": 0.770631454800459, "grad_norm": 0.16025055944919586, "learning_rate": 0.002, "loss": 2.5568, "step": 386820 }, { "epoch": 0.7706513770240979, "grad_norm": 0.1474044919013977, "learning_rate": 0.002, "loss": 2.5523, "step": 386830 }, { "epoch": 0.7706712992477368, "grad_norm": 0.1569989174604416, "learning_rate": 0.002, "loss": 2.5505, "step": 386840 }, { "epoch": 0.7706912214713757, "grad_norm": 0.17838534712791443, "learning_rate": 0.002, "loss": 2.5435, "step": 386850 }, { "epoch": 0.7707111436950147, "grad_norm": 0.16238395869731903, "learning_rate": 0.002, "loss": 2.5583, "step": 386860 }, { "epoch": 0.7707310659186536, "grad_norm": 0.190401092171669, "learning_rate": 0.002, "loss": 2.5668, "step": 386870 }, { "epoch": 0.7707509881422925, "grad_norm": 0.15836890041828156, "learning_rate": 0.002, "loss": 2.5729, "step": 386880 }, { "epoch": 0.7707709103659314, "grad_norm": 0.14817310869693756, "learning_rate": 0.002, "loss": 2.5404, "step": 386890 }, { "epoch": 0.7707908325895703, "grad_norm": 0.15452231466770172, "learning_rate": 0.002, "loss": 2.5609, "step": 386900 }, { "epoch": 0.7708107548132093, "grad_norm": 0.13176874816417694, "learning_rate": 0.002, "loss": 2.5395, "step": 386910 }, { "epoch": 0.7708306770368482, "grad_norm": 0.16827373206615448, "learning_rate": 0.002, "loss": 2.5586, "step": 386920 }, { "epoch": 0.770850599260487, "grad_norm": 0.183242529630661, "learning_rate": 0.002, "loss": 2.5606, "step": 386930 }, { "epoch": 0.7708705214841259, "grad_norm": 0.16880083084106445, "learning_rate": 0.002, "loss": 2.5591, "step": 386940 }, { "epoch": 0.7708904437077649, "grad_norm": 0.19129927456378937, "learning_rate": 0.002, "loss": 2.5567, "step": 386950 }, { "epoch": 0.7709103659314038, "grad_norm": 0.15553341805934906, "learning_rate": 0.002, "loss": 2.5574, "step": 386960 }, { "epoch": 0.7709302881550427, "grad_norm": 0.1527814269065857, "learning_rate": 0.002, "loss": 2.5555, "step": 386970 }, { "epoch": 0.7709502103786816, "grad_norm": 0.1623174101114273, "learning_rate": 0.002, "loss": 2.5512, "step": 386980 }, { "epoch": 0.7709701326023205, "grad_norm": 0.15950334072113037, "learning_rate": 0.002, "loss": 2.5549, "step": 386990 }, { "epoch": 0.7709900548259595, "grad_norm": 0.17268718779087067, "learning_rate": 0.002, "loss": 2.5506, "step": 387000 }, { "epoch": 0.7710099770495984, "grad_norm": 0.19555124640464783, "learning_rate": 0.002, "loss": 2.55, "step": 387010 }, { "epoch": 0.7710298992732373, "grad_norm": 0.1884436458349228, "learning_rate": 0.002, "loss": 2.5563, "step": 387020 }, { "epoch": 0.7710498214968762, "grad_norm": 0.13266541063785553, "learning_rate": 0.002, "loss": 2.5542, "step": 387030 }, { "epoch": 0.7710697437205151, "grad_norm": 0.15366843342781067, "learning_rate": 0.002, "loss": 2.5542, "step": 387040 }, { "epoch": 0.7710896659441541, "grad_norm": 0.15821336209774017, "learning_rate": 0.002, "loss": 2.5545, "step": 387050 }, { "epoch": 0.771109588167793, "grad_norm": 0.14925727248191833, "learning_rate": 0.002, "loss": 2.5516, "step": 387060 }, { "epoch": 0.7711295103914318, "grad_norm": 0.15409772098064423, "learning_rate": 0.002, "loss": 2.5552, "step": 387070 }, { "epoch": 0.7711494326150707, "grad_norm": 0.15886875987052917, "learning_rate": 0.002, "loss": 2.56, "step": 387080 }, { "epoch": 0.7711693548387096, "grad_norm": 0.15075723826885223, "learning_rate": 0.002, "loss": 2.5582, "step": 387090 }, { "epoch": 0.7711892770623486, "grad_norm": 0.18644993007183075, "learning_rate": 0.002, "loss": 2.5526, "step": 387100 }, { "epoch": 0.7712091992859875, "grad_norm": 0.1600043624639511, "learning_rate": 0.002, "loss": 2.5425, "step": 387110 }, { "epoch": 0.7712291215096264, "grad_norm": 0.1394386738538742, "learning_rate": 0.002, "loss": 2.5545, "step": 387120 }, { "epoch": 0.7712490437332653, "grad_norm": 0.146823912858963, "learning_rate": 0.002, "loss": 2.5453, "step": 387130 }, { "epoch": 0.7712689659569042, "grad_norm": 0.17075970768928528, "learning_rate": 0.002, "loss": 2.5529, "step": 387140 }, { "epoch": 0.7712888881805432, "grad_norm": 0.18716266751289368, "learning_rate": 0.002, "loss": 2.5482, "step": 387150 }, { "epoch": 0.7713088104041821, "grad_norm": 0.14758403599262238, "learning_rate": 0.002, "loss": 2.5642, "step": 387160 }, { "epoch": 0.771328732627821, "grad_norm": 0.17009712755680084, "learning_rate": 0.002, "loss": 2.5516, "step": 387170 }, { "epoch": 0.7713486548514599, "grad_norm": 0.18614831566810608, "learning_rate": 0.002, "loss": 2.5541, "step": 387180 }, { "epoch": 0.7713685770750988, "grad_norm": 0.16098231077194214, "learning_rate": 0.002, "loss": 2.5588, "step": 387190 }, { "epoch": 0.7713884992987378, "grad_norm": 0.1456606686115265, "learning_rate": 0.002, "loss": 2.5583, "step": 387200 }, { "epoch": 0.7714084215223767, "grad_norm": 0.18849442899227142, "learning_rate": 0.002, "loss": 2.5518, "step": 387210 }, { "epoch": 0.7714283437460155, "grad_norm": 0.15858952701091766, "learning_rate": 0.002, "loss": 2.5717, "step": 387220 }, { "epoch": 0.7714482659696544, "grad_norm": 0.16836056113243103, "learning_rate": 0.002, "loss": 2.5622, "step": 387230 }, { "epoch": 0.7714681881932934, "grad_norm": 0.16074571013450623, "learning_rate": 0.002, "loss": 2.5538, "step": 387240 }, { "epoch": 0.7714881104169323, "grad_norm": 0.15750649571418762, "learning_rate": 0.002, "loss": 2.5597, "step": 387250 }, { "epoch": 0.7715080326405712, "grad_norm": 0.14115437865257263, "learning_rate": 0.002, "loss": 2.5483, "step": 387260 }, { "epoch": 0.7715279548642101, "grad_norm": 0.13093596696853638, "learning_rate": 0.002, "loss": 2.5622, "step": 387270 }, { "epoch": 0.771547877087849, "grad_norm": 0.22581684589385986, "learning_rate": 0.002, "loss": 2.5682, "step": 387280 }, { "epoch": 0.771567799311488, "grad_norm": 0.14223943650722504, "learning_rate": 0.002, "loss": 2.5665, "step": 387290 }, { "epoch": 0.7715877215351269, "grad_norm": 0.20904169976711273, "learning_rate": 0.002, "loss": 2.5338, "step": 387300 }, { "epoch": 0.7716076437587658, "grad_norm": 0.15420663356781006, "learning_rate": 0.002, "loss": 2.5602, "step": 387310 }, { "epoch": 0.7716275659824047, "grad_norm": 0.1602664440870285, "learning_rate": 0.002, "loss": 2.5422, "step": 387320 }, { "epoch": 0.7716474882060436, "grad_norm": 0.15965299308300018, "learning_rate": 0.002, "loss": 2.5315, "step": 387330 }, { "epoch": 0.7716674104296826, "grad_norm": 0.1935303956270218, "learning_rate": 0.002, "loss": 2.5354, "step": 387340 }, { "epoch": 0.7716873326533215, "grad_norm": 0.13399222493171692, "learning_rate": 0.002, "loss": 2.5631, "step": 387350 }, { "epoch": 0.7717072548769603, "grad_norm": 0.16189667582511902, "learning_rate": 0.002, "loss": 2.5468, "step": 387360 }, { "epoch": 0.7717271771005992, "grad_norm": 0.175684854388237, "learning_rate": 0.002, "loss": 2.5483, "step": 387370 }, { "epoch": 0.7717470993242381, "grad_norm": 0.13745912909507751, "learning_rate": 0.002, "loss": 2.5501, "step": 387380 }, { "epoch": 0.7717670215478771, "grad_norm": 0.1427498161792755, "learning_rate": 0.002, "loss": 2.5599, "step": 387390 }, { "epoch": 0.771786943771516, "grad_norm": 0.15460319817066193, "learning_rate": 0.002, "loss": 2.5582, "step": 387400 }, { "epoch": 0.7718068659951549, "grad_norm": 0.15621469914913177, "learning_rate": 0.002, "loss": 2.5573, "step": 387410 }, { "epoch": 0.7718267882187938, "grad_norm": 0.1730184704065323, "learning_rate": 0.002, "loss": 2.5675, "step": 387420 }, { "epoch": 0.7718467104424327, "grad_norm": 0.20552368462085724, "learning_rate": 0.002, "loss": 2.5531, "step": 387430 }, { "epoch": 0.7718666326660717, "grad_norm": 0.14736701548099518, "learning_rate": 0.002, "loss": 2.5529, "step": 387440 }, { "epoch": 0.7718865548897106, "grad_norm": 0.1630738377571106, "learning_rate": 0.002, "loss": 2.559, "step": 387450 }, { "epoch": 0.7719064771133495, "grad_norm": 0.1333831250667572, "learning_rate": 0.002, "loss": 2.55, "step": 387460 }, { "epoch": 0.7719263993369884, "grad_norm": 0.17074483633041382, "learning_rate": 0.002, "loss": 2.5418, "step": 387470 }, { "epoch": 0.7719463215606273, "grad_norm": 0.1520897001028061, "learning_rate": 0.002, "loss": 2.5449, "step": 387480 }, { "epoch": 0.7719662437842663, "grad_norm": 0.15478035807609558, "learning_rate": 0.002, "loss": 2.5498, "step": 387490 }, { "epoch": 0.7719861660079052, "grad_norm": 0.15142063796520233, "learning_rate": 0.002, "loss": 2.5572, "step": 387500 }, { "epoch": 0.772006088231544, "grad_norm": 0.1759500652551651, "learning_rate": 0.002, "loss": 2.5438, "step": 387510 }, { "epoch": 0.7720260104551829, "grad_norm": 0.1307004690170288, "learning_rate": 0.002, "loss": 2.5605, "step": 387520 }, { "epoch": 0.7720459326788219, "grad_norm": 0.2014337182044983, "learning_rate": 0.002, "loss": 2.5493, "step": 387530 }, { "epoch": 0.7720658549024608, "grad_norm": 0.13820159435272217, "learning_rate": 0.002, "loss": 2.5469, "step": 387540 }, { "epoch": 0.7720857771260997, "grad_norm": 0.15007895231246948, "learning_rate": 0.002, "loss": 2.5478, "step": 387550 }, { "epoch": 0.7721056993497386, "grad_norm": 0.1556287556886673, "learning_rate": 0.002, "loss": 2.5586, "step": 387560 }, { "epoch": 0.7721256215733775, "grad_norm": 0.14194869995117188, "learning_rate": 0.002, "loss": 2.5545, "step": 387570 }, { "epoch": 0.7721455437970165, "grad_norm": 0.1584223508834839, "learning_rate": 0.002, "loss": 2.551, "step": 387580 }, { "epoch": 0.7721654660206554, "grad_norm": 0.1354532390832901, "learning_rate": 0.002, "loss": 2.5483, "step": 387590 }, { "epoch": 0.7721853882442943, "grad_norm": 0.16865861415863037, "learning_rate": 0.002, "loss": 2.5354, "step": 387600 }, { "epoch": 0.7722053104679332, "grad_norm": 0.15629719197750092, "learning_rate": 0.002, "loss": 2.5489, "step": 387610 }, { "epoch": 0.7722252326915721, "grad_norm": 0.18291616439819336, "learning_rate": 0.002, "loss": 2.5658, "step": 387620 }, { "epoch": 0.7722451549152111, "grad_norm": 0.17685124278068542, "learning_rate": 0.002, "loss": 2.5722, "step": 387630 }, { "epoch": 0.77226507713885, "grad_norm": 0.1805170476436615, "learning_rate": 0.002, "loss": 2.5687, "step": 387640 }, { "epoch": 0.7722849993624888, "grad_norm": 0.14957596361637115, "learning_rate": 0.002, "loss": 2.5543, "step": 387650 }, { "epoch": 0.7723049215861277, "grad_norm": 0.14932429790496826, "learning_rate": 0.002, "loss": 2.5428, "step": 387660 }, { "epoch": 0.7723248438097666, "grad_norm": 0.17003995180130005, "learning_rate": 0.002, "loss": 2.5382, "step": 387670 }, { "epoch": 0.7723447660334056, "grad_norm": 0.15232418477535248, "learning_rate": 0.002, "loss": 2.5472, "step": 387680 }, { "epoch": 0.7723646882570445, "grad_norm": 0.15854088962078094, "learning_rate": 0.002, "loss": 2.553, "step": 387690 }, { "epoch": 0.7723846104806834, "grad_norm": 0.15196998417377472, "learning_rate": 0.002, "loss": 2.5678, "step": 387700 }, { "epoch": 0.7724045327043223, "grad_norm": 0.15366409718990326, "learning_rate": 0.002, "loss": 2.5441, "step": 387710 }, { "epoch": 0.7724244549279612, "grad_norm": 0.17464730143547058, "learning_rate": 0.002, "loss": 2.5526, "step": 387720 }, { "epoch": 0.7724443771516002, "grad_norm": 0.15772229433059692, "learning_rate": 0.002, "loss": 2.5543, "step": 387730 }, { "epoch": 0.7724642993752391, "grad_norm": 0.15259714424610138, "learning_rate": 0.002, "loss": 2.5531, "step": 387740 }, { "epoch": 0.772484221598878, "grad_norm": 0.17815381288528442, "learning_rate": 0.002, "loss": 2.5507, "step": 387750 }, { "epoch": 0.7725041438225169, "grad_norm": 0.13366739451885223, "learning_rate": 0.002, "loss": 2.559, "step": 387760 }, { "epoch": 0.7725240660461558, "grad_norm": 0.18669693171977997, "learning_rate": 0.002, "loss": 2.5583, "step": 387770 }, { "epoch": 0.7725439882697948, "grad_norm": 0.15841306746006012, "learning_rate": 0.002, "loss": 2.5435, "step": 387780 }, { "epoch": 0.7725639104934336, "grad_norm": 0.12972834706306458, "learning_rate": 0.002, "loss": 2.5463, "step": 387790 }, { "epoch": 0.7725838327170725, "grad_norm": 0.20366021990776062, "learning_rate": 0.002, "loss": 2.5625, "step": 387800 }, { "epoch": 0.7726037549407114, "grad_norm": 0.15638169646263123, "learning_rate": 0.002, "loss": 2.5528, "step": 387810 }, { "epoch": 0.7726236771643504, "grad_norm": 0.1806359589099884, "learning_rate": 0.002, "loss": 2.557, "step": 387820 }, { "epoch": 0.7726435993879893, "grad_norm": 0.14506204426288605, "learning_rate": 0.002, "loss": 2.564, "step": 387830 }, { "epoch": 0.7726635216116282, "grad_norm": 0.21872034668922424, "learning_rate": 0.002, "loss": 2.5551, "step": 387840 }, { "epoch": 0.7726834438352671, "grad_norm": 0.15569615364074707, "learning_rate": 0.002, "loss": 2.5548, "step": 387850 }, { "epoch": 0.772703366058906, "grad_norm": 0.18891046941280365, "learning_rate": 0.002, "loss": 2.5514, "step": 387860 }, { "epoch": 0.772723288282545, "grad_norm": 0.17062169313430786, "learning_rate": 0.002, "loss": 2.5578, "step": 387870 }, { "epoch": 0.7727432105061839, "grad_norm": 0.15904198586940765, "learning_rate": 0.002, "loss": 2.5438, "step": 387880 }, { "epoch": 0.7727631327298228, "grad_norm": 0.17012356221675873, "learning_rate": 0.002, "loss": 2.557, "step": 387890 }, { "epoch": 0.7727830549534617, "grad_norm": 0.18329517543315887, "learning_rate": 0.002, "loss": 2.5442, "step": 387900 }, { "epoch": 0.7728029771771006, "grad_norm": 0.14125926792621613, "learning_rate": 0.002, "loss": 2.5479, "step": 387910 }, { "epoch": 0.7728228994007396, "grad_norm": 0.18295273184776306, "learning_rate": 0.002, "loss": 2.5628, "step": 387920 }, { "epoch": 0.7728428216243785, "grad_norm": 0.1422005146741867, "learning_rate": 0.002, "loss": 2.538, "step": 387930 }, { "epoch": 0.7728627438480173, "grad_norm": 0.1608463078737259, "learning_rate": 0.002, "loss": 2.5563, "step": 387940 }, { "epoch": 0.7728826660716562, "grad_norm": 0.1570500135421753, "learning_rate": 0.002, "loss": 2.5532, "step": 387950 }, { "epoch": 0.7729025882952951, "grad_norm": 0.1773119568824768, "learning_rate": 0.002, "loss": 2.5442, "step": 387960 }, { "epoch": 0.7729225105189341, "grad_norm": 0.18307499587535858, "learning_rate": 0.002, "loss": 2.5473, "step": 387970 }, { "epoch": 0.772942432742573, "grad_norm": 0.17252615094184875, "learning_rate": 0.002, "loss": 2.544, "step": 387980 }, { "epoch": 0.7729623549662119, "grad_norm": 0.1563977301120758, "learning_rate": 0.002, "loss": 2.5229, "step": 387990 }, { "epoch": 0.7729822771898508, "grad_norm": 0.14493884146213531, "learning_rate": 0.002, "loss": 2.5443, "step": 388000 }, { "epoch": 0.7730021994134897, "grad_norm": 0.14174804091453552, "learning_rate": 0.002, "loss": 2.5485, "step": 388010 }, { "epoch": 0.7730221216371287, "grad_norm": 0.17913149297237396, "learning_rate": 0.002, "loss": 2.5527, "step": 388020 }, { "epoch": 0.7730420438607676, "grad_norm": 0.13716715574264526, "learning_rate": 0.002, "loss": 2.56, "step": 388030 }, { "epoch": 0.7730619660844065, "grad_norm": 0.14378570020198822, "learning_rate": 0.002, "loss": 2.5338, "step": 388040 }, { "epoch": 0.7730818883080454, "grad_norm": 0.18213774263858795, "learning_rate": 0.002, "loss": 2.5521, "step": 388050 }, { "epoch": 0.7731018105316843, "grad_norm": 0.22511842846870422, "learning_rate": 0.002, "loss": 2.5368, "step": 388060 }, { "epoch": 0.7731217327553233, "grad_norm": 0.14498837292194366, "learning_rate": 0.002, "loss": 2.5641, "step": 388070 }, { "epoch": 0.7731416549789621, "grad_norm": 0.21376889944076538, "learning_rate": 0.002, "loss": 2.5651, "step": 388080 }, { "epoch": 0.773161577202601, "grad_norm": 0.18090341985225677, "learning_rate": 0.002, "loss": 2.5587, "step": 388090 }, { "epoch": 0.7731814994262399, "grad_norm": 0.17829124629497528, "learning_rate": 0.002, "loss": 2.5569, "step": 388100 }, { "epoch": 0.7732014216498789, "grad_norm": 0.12845569849014282, "learning_rate": 0.002, "loss": 2.5567, "step": 388110 }, { "epoch": 0.7732213438735178, "grad_norm": 0.17459452152252197, "learning_rate": 0.002, "loss": 2.5632, "step": 388120 }, { "epoch": 0.7732412660971567, "grad_norm": 0.17299577593803406, "learning_rate": 0.002, "loss": 2.5601, "step": 388130 }, { "epoch": 0.7732611883207956, "grad_norm": 0.14227241277694702, "learning_rate": 0.002, "loss": 2.563, "step": 388140 }, { "epoch": 0.7732811105444345, "grad_norm": 0.150864839553833, "learning_rate": 0.002, "loss": 2.5501, "step": 388150 }, { "epoch": 0.7733010327680735, "grad_norm": 0.20711612701416016, "learning_rate": 0.002, "loss": 2.5577, "step": 388160 }, { "epoch": 0.7733209549917124, "grad_norm": 0.16017568111419678, "learning_rate": 0.002, "loss": 2.5515, "step": 388170 }, { "epoch": 0.7733408772153513, "grad_norm": 0.15582048892974854, "learning_rate": 0.002, "loss": 2.5458, "step": 388180 }, { "epoch": 0.7733607994389902, "grad_norm": 0.14341816306114197, "learning_rate": 0.002, "loss": 2.5479, "step": 388190 }, { "epoch": 0.7733807216626291, "grad_norm": 0.19857141375541687, "learning_rate": 0.002, "loss": 2.5568, "step": 388200 }, { "epoch": 0.7734006438862681, "grad_norm": 0.14925944805145264, "learning_rate": 0.002, "loss": 2.5594, "step": 388210 }, { "epoch": 0.773420566109907, "grad_norm": 0.18127012252807617, "learning_rate": 0.002, "loss": 2.5477, "step": 388220 }, { "epoch": 0.7734404883335458, "grad_norm": 0.13755162060260773, "learning_rate": 0.002, "loss": 2.5421, "step": 388230 }, { "epoch": 0.7734604105571847, "grad_norm": 0.16109023988246918, "learning_rate": 0.002, "loss": 2.5559, "step": 388240 }, { "epoch": 0.7734803327808236, "grad_norm": 0.17666828632354736, "learning_rate": 0.002, "loss": 2.5565, "step": 388250 }, { "epoch": 0.7735002550044626, "grad_norm": 0.18046481907367706, "learning_rate": 0.002, "loss": 2.5505, "step": 388260 }, { "epoch": 0.7735201772281015, "grad_norm": 0.15541036427021027, "learning_rate": 0.002, "loss": 2.5544, "step": 388270 }, { "epoch": 0.7735400994517404, "grad_norm": 0.18028362095355988, "learning_rate": 0.002, "loss": 2.5525, "step": 388280 }, { "epoch": 0.7735600216753793, "grad_norm": 0.1989205777645111, "learning_rate": 0.002, "loss": 2.5513, "step": 388290 }, { "epoch": 0.7735799438990182, "grad_norm": 0.16939927637577057, "learning_rate": 0.002, "loss": 2.5514, "step": 388300 }, { "epoch": 0.7735998661226572, "grad_norm": 0.18347737193107605, "learning_rate": 0.002, "loss": 2.5449, "step": 388310 }, { "epoch": 0.7736197883462961, "grad_norm": 0.14620445668697357, "learning_rate": 0.002, "loss": 2.5363, "step": 388320 }, { "epoch": 0.773639710569935, "grad_norm": 0.1963014304637909, "learning_rate": 0.002, "loss": 2.5313, "step": 388330 }, { "epoch": 0.7736596327935739, "grad_norm": 0.14367760717868805, "learning_rate": 0.002, "loss": 2.5559, "step": 388340 }, { "epoch": 0.7736795550172128, "grad_norm": 0.16984829306602478, "learning_rate": 0.002, "loss": 2.5598, "step": 388350 }, { "epoch": 0.7736994772408518, "grad_norm": 0.1486360728740692, "learning_rate": 0.002, "loss": 2.5505, "step": 388360 }, { "epoch": 0.7737193994644906, "grad_norm": 0.14955788850784302, "learning_rate": 0.002, "loss": 2.5461, "step": 388370 }, { "epoch": 0.7737393216881295, "grad_norm": 0.1861918568611145, "learning_rate": 0.002, "loss": 2.5556, "step": 388380 }, { "epoch": 0.7737592439117684, "grad_norm": 0.2101098895072937, "learning_rate": 0.002, "loss": 2.5448, "step": 388390 }, { "epoch": 0.7737791661354073, "grad_norm": 0.2122935950756073, "learning_rate": 0.002, "loss": 2.5613, "step": 388400 }, { "epoch": 0.7737990883590463, "grad_norm": 0.1533169150352478, "learning_rate": 0.002, "loss": 2.5549, "step": 388410 }, { "epoch": 0.7738190105826852, "grad_norm": 0.18745160102844238, "learning_rate": 0.002, "loss": 2.5579, "step": 388420 }, { "epoch": 0.7738389328063241, "grad_norm": 0.1865682750940323, "learning_rate": 0.002, "loss": 2.5492, "step": 388430 }, { "epoch": 0.773858855029963, "grad_norm": 0.13623222708702087, "learning_rate": 0.002, "loss": 2.5522, "step": 388440 }, { "epoch": 0.773878777253602, "grad_norm": 0.16977998614311218, "learning_rate": 0.002, "loss": 2.5484, "step": 388450 }, { "epoch": 0.7738986994772409, "grad_norm": 0.1633482724428177, "learning_rate": 0.002, "loss": 2.5442, "step": 388460 }, { "epoch": 0.7739186217008798, "grad_norm": 0.16193655133247375, "learning_rate": 0.002, "loss": 2.5432, "step": 388470 }, { "epoch": 0.7739385439245187, "grad_norm": 0.14150381088256836, "learning_rate": 0.002, "loss": 2.5593, "step": 388480 }, { "epoch": 0.7739584661481576, "grad_norm": 0.17272017896175385, "learning_rate": 0.002, "loss": 2.5668, "step": 388490 }, { "epoch": 0.7739783883717966, "grad_norm": 0.17185015976428986, "learning_rate": 0.002, "loss": 2.5643, "step": 388500 }, { "epoch": 0.7739983105954354, "grad_norm": 0.15403316915035248, "learning_rate": 0.002, "loss": 2.5613, "step": 388510 }, { "epoch": 0.7740182328190743, "grad_norm": 0.15026327967643738, "learning_rate": 0.002, "loss": 2.5518, "step": 388520 }, { "epoch": 0.7740381550427132, "grad_norm": 0.21457421779632568, "learning_rate": 0.002, "loss": 2.5533, "step": 388530 }, { "epoch": 0.7740580772663521, "grad_norm": 0.1411966234445572, "learning_rate": 0.002, "loss": 2.5578, "step": 388540 }, { "epoch": 0.7740779994899911, "grad_norm": 0.15267007052898407, "learning_rate": 0.002, "loss": 2.5487, "step": 388550 }, { "epoch": 0.77409792171363, "grad_norm": 0.2159283608198166, "learning_rate": 0.002, "loss": 2.5563, "step": 388560 }, { "epoch": 0.7741178439372689, "grad_norm": 0.1460971236228943, "learning_rate": 0.002, "loss": 2.5411, "step": 388570 }, { "epoch": 0.7741377661609078, "grad_norm": 0.14293712377548218, "learning_rate": 0.002, "loss": 2.5669, "step": 388580 }, { "epoch": 0.7741576883845467, "grad_norm": 0.17819678783416748, "learning_rate": 0.002, "loss": 2.551, "step": 388590 }, { "epoch": 0.7741776106081857, "grad_norm": 0.16235189139842987, "learning_rate": 0.002, "loss": 2.5464, "step": 388600 }, { "epoch": 0.7741975328318246, "grad_norm": 0.15931497514247894, "learning_rate": 0.002, "loss": 2.5674, "step": 388610 }, { "epoch": 0.7742174550554635, "grad_norm": 0.14066585898399353, "learning_rate": 0.002, "loss": 2.5567, "step": 388620 }, { "epoch": 0.7742373772791024, "grad_norm": 0.169039785861969, "learning_rate": 0.002, "loss": 2.5443, "step": 388630 }, { "epoch": 0.7742572995027412, "grad_norm": 0.13549190759658813, "learning_rate": 0.002, "loss": 2.5582, "step": 388640 }, { "epoch": 0.7742772217263802, "grad_norm": 0.1527860164642334, "learning_rate": 0.002, "loss": 2.5636, "step": 388650 }, { "epoch": 0.7742971439500191, "grad_norm": 0.13845263421535492, "learning_rate": 0.002, "loss": 2.5507, "step": 388660 }, { "epoch": 0.774317066173658, "grad_norm": 0.19099117815494537, "learning_rate": 0.002, "loss": 2.5485, "step": 388670 }, { "epoch": 0.7743369883972969, "grad_norm": 0.1630963534116745, "learning_rate": 0.002, "loss": 2.5483, "step": 388680 }, { "epoch": 0.7743569106209358, "grad_norm": 0.13426810503005981, "learning_rate": 0.002, "loss": 2.5662, "step": 388690 }, { "epoch": 0.7743768328445748, "grad_norm": 0.1974768489599228, "learning_rate": 0.002, "loss": 2.5572, "step": 388700 }, { "epoch": 0.7743967550682137, "grad_norm": 0.1443740725517273, "learning_rate": 0.002, "loss": 2.553, "step": 388710 }, { "epoch": 0.7744166772918526, "grad_norm": 0.15029333531856537, "learning_rate": 0.002, "loss": 2.5414, "step": 388720 }, { "epoch": 0.7744365995154915, "grad_norm": 0.16719788312911987, "learning_rate": 0.002, "loss": 2.5522, "step": 388730 }, { "epoch": 0.7744565217391305, "grad_norm": 0.16579435765743256, "learning_rate": 0.002, "loss": 2.5472, "step": 388740 }, { "epoch": 0.7744764439627694, "grad_norm": 0.23200643062591553, "learning_rate": 0.002, "loss": 2.5527, "step": 388750 }, { "epoch": 0.7744963661864083, "grad_norm": 0.142223060131073, "learning_rate": 0.002, "loss": 2.5563, "step": 388760 }, { "epoch": 0.7745162884100472, "grad_norm": 0.17489294707775116, "learning_rate": 0.002, "loss": 2.5633, "step": 388770 }, { "epoch": 0.774536210633686, "grad_norm": 0.17586548626422882, "learning_rate": 0.002, "loss": 2.5518, "step": 388780 }, { "epoch": 0.774556132857325, "grad_norm": 0.1727527529001236, "learning_rate": 0.002, "loss": 2.5714, "step": 388790 }, { "epoch": 0.7745760550809639, "grad_norm": 0.18452699482440948, "learning_rate": 0.002, "loss": 2.5508, "step": 388800 }, { "epoch": 0.7745959773046028, "grad_norm": 0.15106704831123352, "learning_rate": 0.002, "loss": 2.5661, "step": 388810 }, { "epoch": 0.7746158995282417, "grad_norm": 0.18860475718975067, "learning_rate": 0.002, "loss": 2.5495, "step": 388820 }, { "epoch": 0.7746358217518806, "grad_norm": 0.16201643645763397, "learning_rate": 0.002, "loss": 2.5529, "step": 388830 }, { "epoch": 0.7746557439755196, "grad_norm": 0.1645575910806656, "learning_rate": 0.002, "loss": 2.5527, "step": 388840 }, { "epoch": 0.7746756661991585, "grad_norm": 0.18590739369392395, "learning_rate": 0.002, "loss": 2.5513, "step": 388850 }, { "epoch": 0.7746955884227974, "grad_norm": 0.1573992222547531, "learning_rate": 0.002, "loss": 2.5476, "step": 388860 }, { "epoch": 0.7747155106464363, "grad_norm": 0.16334952414035797, "learning_rate": 0.002, "loss": 2.5599, "step": 388870 }, { "epoch": 0.7747354328700752, "grad_norm": 0.19778801500797272, "learning_rate": 0.002, "loss": 2.5513, "step": 388880 }, { "epoch": 0.7747553550937142, "grad_norm": 0.16413120925426483, "learning_rate": 0.002, "loss": 2.5512, "step": 388890 }, { "epoch": 0.7747752773173531, "grad_norm": 0.16242238879203796, "learning_rate": 0.002, "loss": 2.558, "step": 388900 }, { "epoch": 0.774795199540992, "grad_norm": 0.16301557421684265, "learning_rate": 0.002, "loss": 2.546, "step": 388910 }, { "epoch": 0.7748151217646309, "grad_norm": 0.16612601280212402, "learning_rate": 0.002, "loss": 2.5491, "step": 388920 }, { "epoch": 0.7748350439882697, "grad_norm": 0.15696410834789276, "learning_rate": 0.002, "loss": 2.5524, "step": 388930 }, { "epoch": 0.7748549662119087, "grad_norm": 0.1521151065826416, "learning_rate": 0.002, "loss": 2.553, "step": 388940 }, { "epoch": 0.7748748884355476, "grad_norm": 0.1759445071220398, "learning_rate": 0.002, "loss": 2.5414, "step": 388950 }, { "epoch": 0.7748948106591865, "grad_norm": 0.1502940058708191, "learning_rate": 0.002, "loss": 2.5492, "step": 388960 }, { "epoch": 0.7749147328828254, "grad_norm": 0.18711508810520172, "learning_rate": 0.002, "loss": 2.5309, "step": 388970 }, { "epoch": 0.7749346551064643, "grad_norm": 0.1572716385126114, "learning_rate": 0.002, "loss": 2.5603, "step": 388980 }, { "epoch": 0.7749545773301033, "grad_norm": 0.1779867261648178, "learning_rate": 0.002, "loss": 2.564, "step": 388990 }, { "epoch": 0.7749744995537422, "grad_norm": 0.1405279040336609, "learning_rate": 0.002, "loss": 2.5496, "step": 389000 }, { "epoch": 0.7749944217773811, "grad_norm": 0.1815812736749649, "learning_rate": 0.002, "loss": 2.5573, "step": 389010 }, { "epoch": 0.77501434400102, "grad_norm": 0.1642809957265854, "learning_rate": 0.002, "loss": 2.5562, "step": 389020 }, { "epoch": 0.775034266224659, "grad_norm": 0.15741069614887238, "learning_rate": 0.002, "loss": 2.5544, "step": 389030 }, { "epoch": 0.7750541884482979, "grad_norm": 0.15107335150241852, "learning_rate": 0.002, "loss": 2.5587, "step": 389040 }, { "epoch": 0.7750741106719368, "grad_norm": 0.13929221034049988, "learning_rate": 0.002, "loss": 2.5493, "step": 389050 }, { "epoch": 0.7750940328955757, "grad_norm": 0.16382978856563568, "learning_rate": 0.002, "loss": 2.5557, "step": 389060 }, { "epoch": 0.7751139551192145, "grad_norm": 0.1939697563648224, "learning_rate": 0.002, "loss": 2.5532, "step": 389070 }, { "epoch": 0.7751338773428536, "grad_norm": 0.1448875069618225, "learning_rate": 0.002, "loss": 2.5525, "step": 389080 }, { "epoch": 0.7751537995664924, "grad_norm": 0.12783464789390564, "learning_rate": 0.002, "loss": 2.5661, "step": 389090 }, { "epoch": 0.7751737217901313, "grad_norm": 0.21824368834495544, "learning_rate": 0.002, "loss": 2.55, "step": 389100 }, { "epoch": 0.7751936440137702, "grad_norm": 0.14152558147907257, "learning_rate": 0.002, "loss": 2.5625, "step": 389110 }, { "epoch": 0.7752135662374091, "grad_norm": 0.16490444540977478, "learning_rate": 0.002, "loss": 2.5428, "step": 389120 }, { "epoch": 0.7752334884610481, "grad_norm": 0.14234140515327454, "learning_rate": 0.002, "loss": 2.5492, "step": 389130 }, { "epoch": 0.775253410684687, "grad_norm": 0.14237920939922333, "learning_rate": 0.002, "loss": 2.5539, "step": 389140 }, { "epoch": 0.7752733329083259, "grad_norm": 0.16288596391677856, "learning_rate": 0.002, "loss": 2.5609, "step": 389150 }, { "epoch": 0.7752932551319648, "grad_norm": 0.1457333117723465, "learning_rate": 0.002, "loss": 2.5561, "step": 389160 }, { "epoch": 0.7753131773556037, "grad_norm": 0.16530264914035797, "learning_rate": 0.002, "loss": 2.5609, "step": 389170 }, { "epoch": 0.7753330995792427, "grad_norm": 0.14793828129768372, "learning_rate": 0.002, "loss": 2.5561, "step": 389180 }, { "epoch": 0.7753530218028816, "grad_norm": 0.16129766404628754, "learning_rate": 0.002, "loss": 2.5643, "step": 389190 }, { "epoch": 0.7753729440265205, "grad_norm": 0.1404312700033188, "learning_rate": 0.002, "loss": 2.5549, "step": 389200 }, { "epoch": 0.7753928662501594, "grad_norm": 0.20761018991470337, "learning_rate": 0.002, "loss": 2.5495, "step": 389210 }, { "epoch": 0.7754127884737982, "grad_norm": 0.17087264358997345, "learning_rate": 0.002, "loss": 2.5413, "step": 389220 }, { "epoch": 0.7754327106974372, "grad_norm": 0.1724645048379898, "learning_rate": 0.002, "loss": 2.5505, "step": 389230 }, { "epoch": 0.7754526329210761, "grad_norm": 0.1388053447008133, "learning_rate": 0.002, "loss": 2.5642, "step": 389240 }, { "epoch": 0.775472555144715, "grad_norm": 0.17906944453716278, "learning_rate": 0.002, "loss": 2.5358, "step": 389250 }, { "epoch": 0.7754924773683539, "grad_norm": 0.16158494353294373, "learning_rate": 0.002, "loss": 2.5546, "step": 389260 }, { "epoch": 0.7755123995919928, "grad_norm": 0.13452567160129547, "learning_rate": 0.002, "loss": 2.5557, "step": 389270 }, { "epoch": 0.7755323218156318, "grad_norm": 0.16127164661884308, "learning_rate": 0.002, "loss": 2.554, "step": 389280 }, { "epoch": 0.7755522440392707, "grad_norm": 0.15927574038505554, "learning_rate": 0.002, "loss": 2.547, "step": 389290 }, { "epoch": 0.7755721662629096, "grad_norm": 0.20853446424007416, "learning_rate": 0.002, "loss": 2.5507, "step": 389300 }, { "epoch": 0.7755920884865485, "grad_norm": 0.14103981852531433, "learning_rate": 0.002, "loss": 2.5467, "step": 389310 }, { "epoch": 0.7756120107101875, "grad_norm": 0.1598326563835144, "learning_rate": 0.002, "loss": 2.5644, "step": 389320 }, { "epoch": 0.7756319329338264, "grad_norm": 0.15167871117591858, "learning_rate": 0.002, "loss": 2.5635, "step": 389330 }, { "epoch": 0.7756518551574653, "grad_norm": 0.15294747054576874, "learning_rate": 0.002, "loss": 2.5538, "step": 389340 }, { "epoch": 0.7756717773811042, "grad_norm": 0.1808353066444397, "learning_rate": 0.002, "loss": 2.5575, "step": 389350 }, { "epoch": 0.775691699604743, "grad_norm": 0.17738759517669678, "learning_rate": 0.002, "loss": 2.5483, "step": 389360 }, { "epoch": 0.775711621828382, "grad_norm": 0.14767973124980927, "learning_rate": 0.002, "loss": 2.556, "step": 389370 }, { "epoch": 0.7757315440520209, "grad_norm": 0.1709279716014862, "learning_rate": 0.002, "loss": 2.5448, "step": 389380 }, { "epoch": 0.7757514662756598, "grad_norm": 0.21187850832939148, "learning_rate": 0.002, "loss": 2.5477, "step": 389390 }, { "epoch": 0.7757713884992987, "grad_norm": 0.14796939492225647, "learning_rate": 0.002, "loss": 2.5581, "step": 389400 }, { "epoch": 0.7757913107229376, "grad_norm": 0.1812940537929535, "learning_rate": 0.002, "loss": 2.5487, "step": 389410 }, { "epoch": 0.7758112329465766, "grad_norm": 0.3459240794181824, "learning_rate": 0.002, "loss": 2.5607, "step": 389420 }, { "epoch": 0.7758311551702155, "grad_norm": 0.14496676623821259, "learning_rate": 0.002, "loss": 2.5456, "step": 389430 }, { "epoch": 0.7758510773938544, "grad_norm": 0.1939968764781952, "learning_rate": 0.002, "loss": 2.5645, "step": 389440 }, { "epoch": 0.7758709996174933, "grad_norm": 0.13875259459018707, "learning_rate": 0.002, "loss": 2.5599, "step": 389450 }, { "epoch": 0.7758909218411322, "grad_norm": 0.18587252497673035, "learning_rate": 0.002, "loss": 2.5535, "step": 389460 }, { "epoch": 0.7759108440647712, "grad_norm": 0.1330302655696869, "learning_rate": 0.002, "loss": 2.5577, "step": 389470 }, { "epoch": 0.7759307662884101, "grad_norm": 0.13852044939994812, "learning_rate": 0.002, "loss": 2.5563, "step": 389480 }, { "epoch": 0.775950688512049, "grad_norm": 0.15273667871952057, "learning_rate": 0.002, "loss": 2.5462, "step": 389490 }, { "epoch": 0.7759706107356878, "grad_norm": 0.18903912603855133, "learning_rate": 0.002, "loss": 2.558, "step": 389500 }, { "epoch": 0.7759905329593267, "grad_norm": 0.14308175444602966, "learning_rate": 0.002, "loss": 2.5753, "step": 389510 }, { "epoch": 0.7760104551829657, "grad_norm": 0.15900175273418427, "learning_rate": 0.002, "loss": 2.5401, "step": 389520 }, { "epoch": 0.7760303774066046, "grad_norm": 0.18904121220111847, "learning_rate": 0.002, "loss": 2.5528, "step": 389530 }, { "epoch": 0.7760502996302435, "grad_norm": 0.15962405502796173, "learning_rate": 0.002, "loss": 2.5543, "step": 389540 }, { "epoch": 0.7760702218538824, "grad_norm": 0.1774468868970871, "learning_rate": 0.002, "loss": 2.5565, "step": 389550 }, { "epoch": 0.7760901440775213, "grad_norm": 0.18022242188453674, "learning_rate": 0.002, "loss": 2.5426, "step": 389560 }, { "epoch": 0.7761100663011603, "grad_norm": 0.1575556993484497, "learning_rate": 0.002, "loss": 2.561, "step": 389570 }, { "epoch": 0.7761299885247992, "grad_norm": 0.149744912981987, "learning_rate": 0.002, "loss": 2.5347, "step": 389580 }, { "epoch": 0.7761499107484381, "grad_norm": 0.14875911176204681, "learning_rate": 0.002, "loss": 2.5461, "step": 389590 }, { "epoch": 0.776169832972077, "grad_norm": 0.1560271680355072, "learning_rate": 0.002, "loss": 2.538, "step": 389600 }, { "epoch": 0.776189755195716, "grad_norm": 0.14998044073581696, "learning_rate": 0.002, "loss": 2.5493, "step": 389610 }, { "epoch": 0.7762096774193549, "grad_norm": 0.15663254261016846, "learning_rate": 0.002, "loss": 2.5509, "step": 389620 }, { "epoch": 0.7762295996429938, "grad_norm": 0.14988811314105988, "learning_rate": 0.002, "loss": 2.5548, "step": 389630 }, { "epoch": 0.7762495218666327, "grad_norm": 0.1840924769639969, "learning_rate": 0.002, "loss": 2.5566, "step": 389640 }, { "epoch": 0.7762694440902715, "grad_norm": 0.1665061116218567, "learning_rate": 0.002, "loss": 2.5573, "step": 389650 }, { "epoch": 0.7762893663139105, "grad_norm": 0.14851392805576324, "learning_rate": 0.002, "loss": 2.5602, "step": 389660 }, { "epoch": 0.7763092885375494, "grad_norm": 0.1495884358882904, "learning_rate": 0.002, "loss": 2.5574, "step": 389670 }, { "epoch": 0.7763292107611883, "grad_norm": 0.13618896901607513, "learning_rate": 0.002, "loss": 2.5558, "step": 389680 }, { "epoch": 0.7763491329848272, "grad_norm": 0.17412568628787994, "learning_rate": 0.002, "loss": 2.5602, "step": 389690 }, { "epoch": 0.7763690552084661, "grad_norm": 0.16613368690013885, "learning_rate": 0.002, "loss": 2.5538, "step": 389700 }, { "epoch": 0.7763889774321051, "grad_norm": 0.1353127658367157, "learning_rate": 0.002, "loss": 2.5458, "step": 389710 }, { "epoch": 0.776408899655744, "grad_norm": 0.17096088826656342, "learning_rate": 0.002, "loss": 2.5602, "step": 389720 }, { "epoch": 0.7764288218793829, "grad_norm": 0.13915026187896729, "learning_rate": 0.002, "loss": 2.5692, "step": 389730 }, { "epoch": 0.7764487441030218, "grad_norm": 0.15783777832984924, "learning_rate": 0.002, "loss": 2.555, "step": 389740 }, { "epoch": 0.7764686663266607, "grad_norm": 0.18217790126800537, "learning_rate": 0.002, "loss": 2.5578, "step": 389750 }, { "epoch": 0.7764885885502997, "grad_norm": 0.17170944809913635, "learning_rate": 0.002, "loss": 2.5696, "step": 389760 }, { "epoch": 0.7765085107739386, "grad_norm": 0.34416064620018005, "learning_rate": 0.002, "loss": 2.565, "step": 389770 }, { "epoch": 0.7765284329975775, "grad_norm": 0.17855104804039001, "learning_rate": 0.002, "loss": 2.5473, "step": 389780 }, { "epoch": 0.7765483552212163, "grad_norm": 0.15503981709480286, "learning_rate": 0.002, "loss": 2.5471, "step": 389790 }, { "epoch": 0.7765682774448552, "grad_norm": 0.1670527160167694, "learning_rate": 0.002, "loss": 2.5639, "step": 389800 }, { "epoch": 0.7765881996684942, "grad_norm": 0.14407004415988922, "learning_rate": 0.002, "loss": 2.5417, "step": 389810 }, { "epoch": 0.7766081218921331, "grad_norm": 0.16823376715183258, "learning_rate": 0.002, "loss": 2.5626, "step": 389820 }, { "epoch": 0.776628044115772, "grad_norm": 0.18463167548179626, "learning_rate": 0.002, "loss": 2.5671, "step": 389830 }, { "epoch": 0.7766479663394109, "grad_norm": 0.12777704000473022, "learning_rate": 0.002, "loss": 2.5563, "step": 389840 }, { "epoch": 0.7766678885630498, "grad_norm": 0.1691291183233261, "learning_rate": 0.002, "loss": 2.5572, "step": 389850 }, { "epoch": 0.7766878107866888, "grad_norm": 0.1714124232530594, "learning_rate": 0.002, "loss": 2.5518, "step": 389860 }, { "epoch": 0.7767077330103277, "grad_norm": 0.19956271350383759, "learning_rate": 0.002, "loss": 2.5373, "step": 389870 }, { "epoch": 0.7767276552339666, "grad_norm": 0.15722022950649261, "learning_rate": 0.002, "loss": 2.5564, "step": 389880 }, { "epoch": 0.7767475774576055, "grad_norm": 0.1713487058877945, "learning_rate": 0.002, "loss": 2.5467, "step": 389890 }, { "epoch": 0.7767674996812444, "grad_norm": 0.14012129604816437, "learning_rate": 0.002, "loss": 2.5418, "step": 389900 }, { "epoch": 0.7767874219048834, "grad_norm": 0.1612202376127243, "learning_rate": 0.002, "loss": 2.5491, "step": 389910 }, { "epoch": 0.7768073441285223, "grad_norm": 0.1698722094297409, "learning_rate": 0.002, "loss": 2.5296, "step": 389920 }, { "epoch": 0.7768272663521611, "grad_norm": 0.16233892738819122, "learning_rate": 0.002, "loss": 2.5534, "step": 389930 }, { "epoch": 0.7768471885758, "grad_norm": 0.15787319839000702, "learning_rate": 0.002, "loss": 2.5599, "step": 389940 }, { "epoch": 0.776867110799439, "grad_norm": 0.1432814747095108, "learning_rate": 0.002, "loss": 2.5528, "step": 389950 }, { "epoch": 0.7768870330230779, "grad_norm": 0.1902891993522644, "learning_rate": 0.002, "loss": 2.5571, "step": 389960 }, { "epoch": 0.7769069552467168, "grad_norm": 0.16824990510940552, "learning_rate": 0.002, "loss": 2.5442, "step": 389970 }, { "epoch": 0.7769268774703557, "grad_norm": 0.14506568014621735, "learning_rate": 0.002, "loss": 2.548, "step": 389980 }, { "epoch": 0.7769467996939946, "grad_norm": 0.12986601889133453, "learning_rate": 0.002, "loss": 2.5516, "step": 389990 }, { "epoch": 0.7769667219176336, "grad_norm": 0.15669603645801544, "learning_rate": 0.002, "loss": 2.545, "step": 390000 }, { "epoch": 0.7769866441412725, "grad_norm": 0.16797147691249847, "learning_rate": 0.002, "loss": 2.5559, "step": 390010 }, { "epoch": 0.7770065663649114, "grad_norm": 0.17541545629501343, "learning_rate": 0.002, "loss": 2.5531, "step": 390020 }, { "epoch": 0.7770264885885503, "grad_norm": 0.13169068098068237, "learning_rate": 0.002, "loss": 2.5453, "step": 390030 }, { "epoch": 0.7770464108121892, "grad_norm": 0.14936865866184235, "learning_rate": 0.002, "loss": 2.5423, "step": 390040 }, { "epoch": 0.7770663330358282, "grad_norm": 0.19889813661575317, "learning_rate": 0.002, "loss": 2.5658, "step": 390050 }, { "epoch": 0.7770862552594671, "grad_norm": 0.1627640575170517, "learning_rate": 0.002, "loss": 2.5507, "step": 390060 }, { "epoch": 0.777106177483106, "grad_norm": 0.17390109598636627, "learning_rate": 0.002, "loss": 2.5624, "step": 390070 }, { "epoch": 0.7771260997067448, "grad_norm": 0.1817525029182434, "learning_rate": 0.002, "loss": 2.549, "step": 390080 }, { "epoch": 0.7771460219303837, "grad_norm": 0.20090550184249878, "learning_rate": 0.002, "loss": 2.5556, "step": 390090 }, { "epoch": 0.7771659441540227, "grad_norm": 0.17424292862415314, "learning_rate": 0.002, "loss": 2.5495, "step": 390100 }, { "epoch": 0.7771858663776616, "grad_norm": 0.297615110874176, "learning_rate": 0.002, "loss": 2.5416, "step": 390110 }, { "epoch": 0.7772057886013005, "grad_norm": 0.1704493910074234, "learning_rate": 0.002, "loss": 2.5589, "step": 390120 }, { "epoch": 0.7772257108249394, "grad_norm": 0.18495328724384308, "learning_rate": 0.002, "loss": 2.5557, "step": 390130 }, { "epoch": 0.7772456330485783, "grad_norm": 0.15784835815429688, "learning_rate": 0.002, "loss": 2.5578, "step": 390140 }, { "epoch": 0.7772655552722173, "grad_norm": 0.16598933935165405, "learning_rate": 0.002, "loss": 2.5521, "step": 390150 }, { "epoch": 0.7772854774958562, "grad_norm": 0.24676719307899475, "learning_rate": 0.002, "loss": 2.5467, "step": 390160 }, { "epoch": 0.7773053997194951, "grad_norm": 0.17118100821971893, "learning_rate": 0.002, "loss": 2.5435, "step": 390170 }, { "epoch": 0.777325321943134, "grad_norm": 0.1599809229373932, "learning_rate": 0.002, "loss": 2.5578, "step": 390180 }, { "epoch": 0.7773452441667729, "grad_norm": 0.16560913622379303, "learning_rate": 0.002, "loss": 2.5562, "step": 390190 }, { "epoch": 0.7773651663904119, "grad_norm": 0.14296668767929077, "learning_rate": 0.002, "loss": 2.5622, "step": 390200 }, { "epoch": 0.7773850886140508, "grad_norm": 0.15192386507987976, "learning_rate": 0.002, "loss": 2.5524, "step": 390210 }, { "epoch": 0.7774050108376896, "grad_norm": 0.1402623951435089, "learning_rate": 0.002, "loss": 2.5584, "step": 390220 }, { "epoch": 0.7774249330613285, "grad_norm": 0.16041165590286255, "learning_rate": 0.002, "loss": 2.5492, "step": 390230 }, { "epoch": 0.7774448552849675, "grad_norm": 0.14315330982208252, "learning_rate": 0.002, "loss": 2.5668, "step": 390240 }, { "epoch": 0.7774647775086064, "grad_norm": 0.21174442768096924, "learning_rate": 0.002, "loss": 2.5539, "step": 390250 }, { "epoch": 0.7774846997322453, "grad_norm": 0.16879624128341675, "learning_rate": 0.002, "loss": 2.5512, "step": 390260 }, { "epoch": 0.7775046219558842, "grad_norm": 0.1402091234922409, "learning_rate": 0.002, "loss": 2.5618, "step": 390270 }, { "epoch": 0.7775245441795231, "grad_norm": 0.17063473165035248, "learning_rate": 0.002, "loss": 2.5491, "step": 390280 }, { "epoch": 0.7775444664031621, "grad_norm": 0.14935651421546936, "learning_rate": 0.002, "loss": 2.5675, "step": 390290 }, { "epoch": 0.777564388626801, "grad_norm": 0.18224167823791504, "learning_rate": 0.002, "loss": 2.538, "step": 390300 }, { "epoch": 0.7775843108504399, "grad_norm": 0.14506980776786804, "learning_rate": 0.002, "loss": 2.5468, "step": 390310 }, { "epoch": 0.7776042330740788, "grad_norm": 0.1626867949962616, "learning_rate": 0.002, "loss": 2.567, "step": 390320 }, { "epoch": 0.7776241552977177, "grad_norm": 0.2278677225112915, "learning_rate": 0.002, "loss": 2.561, "step": 390330 }, { "epoch": 0.7776440775213567, "grad_norm": 0.14485515654087067, "learning_rate": 0.002, "loss": 2.5535, "step": 390340 }, { "epoch": 0.7776639997449956, "grad_norm": 0.14847983419895172, "learning_rate": 0.002, "loss": 2.5608, "step": 390350 }, { "epoch": 0.7776839219686345, "grad_norm": 0.17961882054805756, "learning_rate": 0.002, "loss": 2.5565, "step": 390360 }, { "epoch": 0.7777038441922733, "grad_norm": 0.19107910990715027, "learning_rate": 0.002, "loss": 2.5593, "step": 390370 }, { "epoch": 0.7777237664159122, "grad_norm": 0.19353313744068146, "learning_rate": 0.002, "loss": 2.5575, "step": 390380 }, { "epoch": 0.7777436886395512, "grad_norm": 0.1506287008523941, "learning_rate": 0.002, "loss": 2.5674, "step": 390390 }, { "epoch": 0.7777636108631901, "grad_norm": 0.14296026527881622, "learning_rate": 0.002, "loss": 2.5544, "step": 390400 }, { "epoch": 0.777783533086829, "grad_norm": 0.15996311604976654, "learning_rate": 0.002, "loss": 2.5691, "step": 390410 }, { "epoch": 0.7778034553104679, "grad_norm": 0.15965545177459717, "learning_rate": 0.002, "loss": 2.5696, "step": 390420 }, { "epoch": 0.7778233775341068, "grad_norm": 0.14646857976913452, "learning_rate": 0.002, "loss": 2.5425, "step": 390430 }, { "epoch": 0.7778432997577458, "grad_norm": 0.18636007606983185, "learning_rate": 0.002, "loss": 2.5503, "step": 390440 }, { "epoch": 0.7778632219813847, "grad_norm": 0.14353729784488678, "learning_rate": 0.002, "loss": 2.5515, "step": 390450 }, { "epoch": 0.7778831442050236, "grad_norm": 0.1570117175579071, "learning_rate": 0.002, "loss": 2.5606, "step": 390460 }, { "epoch": 0.7779030664286625, "grad_norm": 0.14977630972862244, "learning_rate": 0.002, "loss": 2.5492, "step": 390470 }, { "epoch": 0.7779229886523014, "grad_norm": 0.19453124701976776, "learning_rate": 0.002, "loss": 2.5465, "step": 390480 }, { "epoch": 0.7779429108759404, "grad_norm": 0.13625061511993408, "learning_rate": 0.002, "loss": 2.5465, "step": 390490 }, { "epoch": 0.7779628330995793, "grad_norm": 0.16699844598770142, "learning_rate": 0.002, "loss": 2.5517, "step": 390500 }, { "epoch": 0.7779827553232181, "grad_norm": 0.17357656359672546, "learning_rate": 0.002, "loss": 2.5505, "step": 390510 }, { "epoch": 0.778002677546857, "grad_norm": 0.14370529353618622, "learning_rate": 0.002, "loss": 2.5456, "step": 390520 }, { "epoch": 0.778022599770496, "grad_norm": 0.1554376482963562, "learning_rate": 0.002, "loss": 2.5433, "step": 390530 }, { "epoch": 0.7780425219941349, "grad_norm": 0.15304505825042725, "learning_rate": 0.002, "loss": 2.554, "step": 390540 }, { "epoch": 0.7780624442177738, "grad_norm": 0.126238614320755, "learning_rate": 0.002, "loss": 2.5623, "step": 390550 }, { "epoch": 0.7780823664414127, "grad_norm": 0.16958390176296234, "learning_rate": 0.002, "loss": 2.5313, "step": 390560 }, { "epoch": 0.7781022886650516, "grad_norm": 0.18171334266662598, "learning_rate": 0.002, "loss": 2.5512, "step": 390570 }, { "epoch": 0.7781222108886906, "grad_norm": 0.16077758371829987, "learning_rate": 0.002, "loss": 2.5642, "step": 390580 }, { "epoch": 0.7781421331123295, "grad_norm": 0.1498328596353531, "learning_rate": 0.002, "loss": 2.5484, "step": 390590 }, { "epoch": 0.7781620553359684, "grad_norm": 0.16585798561573029, "learning_rate": 0.002, "loss": 2.5585, "step": 390600 }, { "epoch": 0.7781819775596073, "grad_norm": 0.18356822431087494, "learning_rate": 0.002, "loss": 2.5526, "step": 390610 }, { "epoch": 0.7782018997832462, "grad_norm": 0.18058332800865173, "learning_rate": 0.002, "loss": 2.5486, "step": 390620 }, { "epoch": 0.7782218220068852, "grad_norm": 0.15210209786891937, "learning_rate": 0.002, "loss": 2.5431, "step": 390630 }, { "epoch": 0.7782417442305241, "grad_norm": 0.15908591449260712, "learning_rate": 0.002, "loss": 2.5584, "step": 390640 }, { "epoch": 0.778261666454163, "grad_norm": 0.1911786049604416, "learning_rate": 0.002, "loss": 2.555, "step": 390650 }, { "epoch": 0.7782815886778018, "grad_norm": 0.16136719286441803, "learning_rate": 0.002, "loss": 2.5362, "step": 390660 }, { "epoch": 0.7783015109014407, "grad_norm": 0.14688773453235626, "learning_rate": 0.002, "loss": 2.5532, "step": 390670 }, { "epoch": 0.7783214331250797, "grad_norm": 0.1641056388616562, "learning_rate": 0.002, "loss": 2.5487, "step": 390680 }, { "epoch": 0.7783413553487186, "grad_norm": 0.1650383323431015, "learning_rate": 0.002, "loss": 2.5393, "step": 390690 }, { "epoch": 0.7783612775723575, "grad_norm": 0.18181779980659485, "learning_rate": 0.002, "loss": 2.5596, "step": 390700 }, { "epoch": 0.7783811997959964, "grad_norm": 0.14149264991283417, "learning_rate": 0.002, "loss": 2.542, "step": 390710 }, { "epoch": 0.7784011220196353, "grad_norm": 0.1954256147146225, "learning_rate": 0.002, "loss": 2.5393, "step": 390720 }, { "epoch": 0.7784210442432743, "grad_norm": 0.1691003441810608, "learning_rate": 0.002, "loss": 2.5599, "step": 390730 }, { "epoch": 0.7784409664669132, "grad_norm": 0.1385614573955536, "learning_rate": 0.002, "loss": 2.5475, "step": 390740 }, { "epoch": 0.7784608886905521, "grad_norm": 0.15127456188201904, "learning_rate": 0.002, "loss": 2.5606, "step": 390750 }, { "epoch": 0.778480810914191, "grad_norm": 0.16564881801605225, "learning_rate": 0.002, "loss": 2.5596, "step": 390760 }, { "epoch": 0.7785007331378299, "grad_norm": 0.17030759155750275, "learning_rate": 0.002, "loss": 2.5521, "step": 390770 }, { "epoch": 0.7785206553614689, "grad_norm": 0.18846434354782104, "learning_rate": 0.002, "loss": 2.5604, "step": 390780 }, { "epoch": 0.7785405775851078, "grad_norm": 0.15222850441932678, "learning_rate": 0.002, "loss": 2.5632, "step": 390790 }, { "epoch": 0.7785604998087466, "grad_norm": 0.22314772009849548, "learning_rate": 0.002, "loss": 2.5492, "step": 390800 }, { "epoch": 0.7785804220323855, "grad_norm": 0.14522609114646912, "learning_rate": 0.002, "loss": 2.5517, "step": 390810 }, { "epoch": 0.7786003442560245, "grad_norm": 0.17791295051574707, "learning_rate": 0.002, "loss": 2.5577, "step": 390820 }, { "epoch": 0.7786202664796634, "grad_norm": 0.14652252197265625, "learning_rate": 0.002, "loss": 2.5518, "step": 390830 }, { "epoch": 0.7786401887033023, "grad_norm": 0.17321662604808807, "learning_rate": 0.002, "loss": 2.5732, "step": 390840 }, { "epoch": 0.7786601109269412, "grad_norm": 0.14347423613071442, "learning_rate": 0.002, "loss": 2.5352, "step": 390850 }, { "epoch": 0.7786800331505801, "grad_norm": 0.14031681418418884, "learning_rate": 0.002, "loss": 2.5487, "step": 390860 }, { "epoch": 0.7786999553742191, "grad_norm": 0.19766674935817719, "learning_rate": 0.002, "loss": 2.5437, "step": 390870 }, { "epoch": 0.778719877597858, "grad_norm": 0.14731667935848236, "learning_rate": 0.002, "loss": 2.5508, "step": 390880 }, { "epoch": 0.7787397998214969, "grad_norm": 0.13418138027191162, "learning_rate": 0.002, "loss": 2.5542, "step": 390890 }, { "epoch": 0.7787597220451358, "grad_norm": 0.14250998198986053, "learning_rate": 0.002, "loss": 2.5577, "step": 390900 }, { "epoch": 0.7787796442687747, "grad_norm": 0.1766006052494049, "learning_rate": 0.002, "loss": 2.5453, "step": 390910 }, { "epoch": 0.7787995664924137, "grad_norm": 0.2540803551673889, "learning_rate": 0.002, "loss": 2.5469, "step": 390920 }, { "epoch": 0.7788194887160526, "grad_norm": 0.1637624353170395, "learning_rate": 0.002, "loss": 2.56, "step": 390930 }, { "epoch": 0.7788394109396914, "grad_norm": 0.16708646714687347, "learning_rate": 0.002, "loss": 2.5473, "step": 390940 }, { "epoch": 0.7788593331633303, "grad_norm": 0.15758678317070007, "learning_rate": 0.002, "loss": 2.5715, "step": 390950 }, { "epoch": 0.7788792553869692, "grad_norm": 0.15279583632946014, "learning_rate": 0.002, "loss": 2.5594, "step": 390960 }, { "epoch": 0.7788991776106082, "grad_norm": 0.14488163590431213, "learning_rate": 0.002, "loss": 2.5586, "step": 390970 }, { "epoch": 0.7789190998342471, "grad_norm": 0.16970553994178772, "learning_rate": 0.002, "loss": 2.555, "step": 390980 }, { "epoch": 0.778939022057886, "grad_norm": 0.23075750470161438, "learning_rate": 0.002, "loss": 2.5545, "step": 390990 }, { "epoch": 0.7789589442815249, "grad_norm": 0.1440809816122055, "learning_rate": 0.002, "loss": 2.5436, "step": 391000 }, { "epoch": 0.7789788665051638, "grad_norm": 0.18687139451503754, "learning_rate": 0.002, "loss": 2.5461, "step": 391010 }, { "epoch": 0.7789987887288028, "grad_norm": 0.15548449754714966, "learning_rate": 0.002, "loss": 2.5518, "step": 391020 }, { "epoch": 0.7790187109524417, "grad_norm": 0.16584685444831848, "learning_rate": 0.002, "loss": 2.5518, "step": 391030 }, { "epoch": 0.7790386331760806, "grad_norm": 0.1597181260585785, "learning_rate": 0.002, "loss": 2.5419, "step": 391040 }, { "epoch": 0.7790585553997195, "grad_norm": 0.15212640166282654, "learning_rate": 0.002, "loss": 2.5594, "step": 391050 }, { "epoch": 0.7790784776233584, "grad_norm": 0.14058256149291992, "learning_rate": 0.002, "loss": 2.5469, "step": 391060 }, { "epoch": 0.7790983998469974, "grad_norm": 0.2087450921535492, "learning_rate": 0.002, "loss": 2.551, "step": 391070 }, { "epoch": 0.7791183220706362, "grad_norm": 0.17714394629001617, "learning_rate": 0.002, "loss": 2.5499, "step": 391080 }, { "epoch": 0.7791382442942751, "grad_norm": 0.1435505598783493, "learning_rate": 0.002, "loss": 2.5527, "step": 391090 }, { "epoch": 0.779158166517914, "grad_norm": 0.13195647299289703, "learning_rate": 0.002, "loss": 2.5643, "step": 391100 }, { "epoch": 0.779178088741553, "grad_norm": 0.19320350885391235, "learning_rate": 0.002, "loss": 2.5425, "step": 391110 }, { "epoch": 0.7791980109651919, "grad_norm": 0.18830887973308563, "learning_rate": 0.002, "loss": 2.5522, "step": 391120 }, { "epoch": 0.7792179331888308, "grad_norm": 0.2043824940919876, "learning_rate": 0.002, "loss": 2.5427, "step": 391130 }, { "epoch": 0.7792378554124697, "grad_norm": 0.14567503333091736, "learning_rate": 0.002, "loss": 2.5653, "step": 391140 }, { "epoch": 0.7792577776361086, "grad_norm": 0.15643739700317383, "learning_rate": 0.002, "loss": 2.5682, "step": 391150 }, { "epoch": 0.7792776998597476, "grad_norm": 0.14287641644477844, "learning_rate": 0.002, "loss": 2.5698, "step": 391160 }, { "epoch": 0.7792976220833865, "grad_norm": 0.16307994723320007, "learning_rate": 0.002, "loss": 2.5553, "step": 391170 }, { "epoch": 0.7793175443070254, "grad_norm": 0.14692479372024536, "learning_rate": 0.002, "loss": 2.534, "step": 391180 }, { "epoch": 0.7793374665306643, "grad_norm": 0.19063496589660645, "learning_rate": 0.002, "loss": 2.562, "step": 391190 }, { "epoch": 0.7793573887543032, "grad_norm": 0.14499497413635254, "learning_rate": 0.002, "loss": 2.5549, "step": 391200 }, { "epoch": 0.7793773109779422, "grad_norm": 0.13915401697158813, "learning_rate": 0.002, "loss": 2.5478, "step": 391210 }, { "epoch": 0.779397233201581, "grad_norm": 0.18600523471832275, "learning_rate": 0.002, "loss": 2.5484, "step": 391220 }, { "epoch": 0.7794171554252199, "grad_norm": 0.18667660653591156, "learning_rate": 0.002, "loss": 2.5664, "step": 391230 }, { "epoch": 0.7794370776488588, "grad_norm": 0.17208769917488098, "learning_rate": 0.002, "loss": 2.564, "step": 391240 }, { "epoch": 0.7794569998724977, "grad_norm": 0.15298959612846375, "learning_rate": 0.002, "loss": 2.5525, "step": 391250 }, { "epoch": 0.7794769220961367, "grad_norm": 0.14715202152729034, "learning_rate": 0.002, "loss": 2.5494, "step": 391260 }, { "epoch": 0.7794968443197756, "grad_norm": 0.14102688431739807, "learning_rate": 0.002, "loss": 2.5515, "step": 391270 }, { "epoch": 0.7795167665434145, "grad_norm": 0.1602606624364853, "learning_rate": 0.002, "loss": 2.5467, "step": 391280 }, { "epoch": 0.7795366887670534, "grad_norm": 0.1398639678955078, "learning_rate": 0.002, "loss": 2.5502, "step": 391290 }, { "epoch": 0.7795566109906923, "grad_norm": 0.1420358121395111, "learning_rate": 0.002, "loss": 2.5667, "step": 391300 }, { "epoch": 0.7795765332143313, "grad_norm": 0.16212525963783264, "learning_rate": 0.002, "loss": 2.54, "step": 391310 }, { "epoch": 0.7795964554379702, "grad_norm": 0.17418272793293, "learning_rate": 0.002, "loss": 2.5556, "step": 391320 }, { "epoch": 0.7796163776616091, "grad_norm": 0.13957908749580383, "learning_rate": 0.002, "loss": 2.5462, "step": 391330 }, { "epoch": 0.779636299885248, "grad_norm": 0.18709257245063782, "learning_rate": 0.002, "loss": 2.5512, "step": 391340 }, { "epoch": 0.7796562221088869, "grad_norm": 0.15461091697216034, "learning_rate": 0.002, "loss": 2.5632, "step": 391350 }, { "epoch": 0.7796761443325259, "grad_norm": 0.1660902500152588, "learning_rate": 0.002, "loss": 2.5588, "step": 391360 }, { "epoch": 0.7796960665561647, "grad_norm": 0.15278743207454681, "learning_rate": 0.002, "loss": 2.5623, "step": 391370 }, { "epoch": 0.7797159887798036, "grad_norm": 0.14871037006378174, "learning_rate": 0.002, "loss": 2.5549, "step": 391380 }, { "epoch": 0.7797359110034425, "grad_norm": 0.1712120771408081, "learning_rate": 0.002, "loss": 2.5506, "step": 391390 }, { "epoch": 0.7797558332270814, "grad_norm": 0.20154856145381927, "learning_rate": 0.002, "loss": 2.547, "step": 391400 }, { "epoch": 0.7797757554507204, "grad_norm": 0.16635218262672424, "learning_rate": 0.002, "loss": 2.5648, "step": 391410 }, { "epoch": 0.7797956776743593, "grad_norm": 0.1479913741350174, "learning_rate": 0.002, "loss": 2.5521, "step": 391420 }, { "epoch": 0.7798155998979982, "grad_norm": 0.18145830929279327, "learning_rate": 0.002, "loss": 2.5633, "step": 391430 }, { "epoch": 0.7798355221216371, "grad_norm": 0.12762977182865143, "learning_rate": 0.002, "loss": 2.5571, "step": 391440 }, { "epoch": 0.7798554443452761, "grad_norm": 0.18991826474666595, "learning_rate": 0.002, "loss": 2.5346, "step": 391450 }, { "epoch": 0.779875366568915, "grad_norm": 0.16279034316539764, "learning_rate": 0.002, "loss": 2.5579, "step": 391460 }, { "epoch": 0.7798952887925539, "grad_norm": 0.15635493397712708, "learning_rate": 0.002, "loss": 2.5498, "step": 391470 }, { "epoch": 0.7799152110161928, "grad_norm": 0.14367015659809113, "learning_rate": 0.002, "loss": 2.5433, "step": 391480 }, { "epoch": 0.7799351332398317, "grad_norm": 0.16416329145431519, "learning_rate": 0.002, "loss": 2.5511, "step": 391490 }, { "epoch": 0.7799550554634707, "grad_norm": 0.14011172950267792, "learning_rate": 0.002, "loss": 2.5512, "step": 391500 }, { "epoch": 0.7799749776871095, "grad_norm": 0.22751973569393158, "learning_rate": 0.002, "loss": 2.562, "step": 391510 }, { "epoch": 0.7799948999107484, "grad_norm": 0.14440198242664337, "learning_rate": 0.002, "loss": 2.5597, "step": 391520 }, { "epoch": 0.7800148221343873, "grad_norm": 0.151497945189476, "learning_rate": 0.002, "loss": 2.5564, "step": 391530 }, { "epoch": 0.7800347443580262, "grad_norm": 0.13722223043441772, "learning_rate": 0.002, "loss": 2.5645, "step": 391540 }, { "epoch": 0.7800546665816652, "grad_norm": 0.19508489966392517, "learning_rate": 0.002, "loss": 2.5625, "step": 391550 }, { "epoch": 0.7800745888053041, "grad_norm": 0.14523166418075562, "learning_rate": 0.002, "loss": 2.5583, "step": 391560 }, { "epoch": 0.780094511028943, "grad_norm": 0.13383761048316956, "learning_rate": 0.002, "loss": 2.5614, "step": 391570 }, { "epoch": 0.7801144332525819, "grad_norm": 0.15934684872627258, "learning_rate": 0.002, "loss": 2.5385, "step": 391580 }, { "epoch": 0.7801343554762208, "grad_norm": 0.15640497207641602, "learning_rate": 0.002, "loss": 2.569, "step": 391590 }, { "epoch": 0.7801542776998598, "grad_norm": 0.16048523783683777, "learning_rate": 0.002, "loss": 2.5499, "step": 391600 }, { "epoch": 0.7801741999234987, "grad_norm": 0.1491297334432602, "learning_rate": 0.002, "loss": 2.542, "step": 391610 }, { "epoch": 0.7801941221471376, "grad_norm": 0.18027545511722565, "learning_rate": 0.002, "loss": 2.5657, "step": 391620 }, { "epoch": 0.7802140443707765, "grad_norm": 0.15482331812381744, "learning_rate": 0.002, "loss": 2.5672, "step": 391630 }, { "epoch": 0.7802339665944154, "grad_norm": 0.1810058206319809, "learning_rate": 0.002, "loss": 2.5607, "step": 391640 }, { "epoch": 0.7802538888180544, "grad_norm": 0.18387123942375183, "learning_rate": 0.002, "loss": 2.553, "step": 391650 }, { "epoch": 0.7802738110416932, "grad_norm": 0.15680548548698425, "learning_rate": 0.002, "loss": 2.5547, "step": 391660 }, { "epoch": 0.7802937332653321, "grad_norm": 0.18555167317390442, "learning_rate": 0.002, "loss": 2.5456, "step": 391670 }, { "epoch": 0.780313655488971, "grad_norm": 0.14476729929447174, "learning_rate": 0.002, "loss": 2.548, "step": 391680 }, { "epoch": 0.7803335777126099, "grad_norm": 0.15826702117919922, "learning_rate": 0.002, "loss": 2.5748, "step": 391690 }, { "epoch": 0.7803534999362489, "grad_norm": 0.16749843955039978, "learning_rate": 0.002, "loss": 2.5542, "step": 391700 }, { "epoch": 0.7803734221598878, "grad_norm": 0.14332257211208344, "learning_rate": 0.002, "loss": 2.5468, "step": 391710 }, { "epoch": 0.7803933443835267, "grad_norm": 0.16626527905464172, "learning_rate": 0.002, "loss": 2.5324, "step": 391720 }, { "epoch": 0.7804132666071656, "grad_norm": 0.16121616959571838, "learning_rate": 0.002, "loss": 2.5645, "step": 391730 }, { "epoch": 0.7804331888308046, "grad_norm": 0.1627034842967987, "learning_rate": 0.002, "loss": 2.541, "step": 391740 }, { "epoch": 0.7804531110544435, "grad_norm": 0.14184243977069855, "learning_rate": 0.002, "loss": 2.5445, "step": 391750 }, { "epoch": 0.7804730332780824, "grad_norm": 0.19453325867652893, "learning_rate": 0.002, "loss": 2.5383, "step": 391760 }, { "epoch": 0.7804929555017213, "grad_norm": 0.14294767379760742, "learning_rate": 0.002, "loss": 2.5472, "step": 391770 }, { "epoch": 0.7805128777253602, "grad_norm": 0.14270895719528198, "learning_rate": 0.002, "loss": 2.5615, "step": 391780 }, { "epoch": 0.7805327999489992, "grad_norm": 0.1449468731880188, "learning_rate": 0.002, "loss": 2.5548, "step": 391790 }, { "epoch": 0.780552722172638, "grad_norm": 0.16226132214069366, "learning_rate": 0.002, "loss": 2.5531, "step": 391800 }, { "epoch": 0.7805726443962769, "grad_norm": 0.15187321603298187, "learning_rate": 0.002, "loss": 2.5537, "step": 391810 }, { "epoch": 0.7805925666199158, "grad_norm": 0.17146247625350952, "learning_rate": 0.002, "loss": 2.572, "step": 391820 }, { "epoch": 0.7806124888435547, "grad_norm": 0.16221091151237488, "learning_rate": 0.002, "loss": 2.5321, "step": 391830 }, { "epoch": 0.7806324110671937, "grad_norm": 0.17690545320510864, "learning_rate": 0.002, "loss": 2.5583, "step": 391840 }, { "epoch": 0.7806523332908326, "grad_norm": 0.14927799999713898, "learning_rate": 0.002, "loss": 2.5626, "step": 391850 }, { "epoch": 0.7806722555144715, "grad_norm": 0.15329284965991974, "learning_rate": 0.002, "loss": 2.5391, "step": 391860 }, { "epoch": 0.7806921777381104, "grad_norm": 0.19032928347587585, "learning_rate": 0.002, "loss": 2.549, "step": 391870 }, { "epoch": 0.7807120999617493, "grad_norm": 0.1625310182571411, "learning_rate": 0.002, "loss": 2.5565, "step": 391880 }, { "epoch": 0.7807320221853883, "grad_norm": 0.1442636251449585, "learning_rate": 0.002, "loss": 2.5494, "step": 391890 }, { "epoch": 0.7807519444090272, "grad_norm": 0.20566490292549133, "learning_rate": 0.002, "loss": 2.5578, "step": 391900 }, { "epoch": 0.7807718666326661, "grad_norm": 0.1492408961057663, "learning_rate": 0.002, "loss": 2.5419, "step": 391910 }, { "epoch": 0.780791788856305, "grad_norm": 0.1552167534828186, "learning_rate": 0.002, "loss": 2.5442, "step": 391920 }, { "epoch": 0.7808117110799438, "grad_norm": 0.1373281180858612, "learning_rate": 0.002, "loss": 2.5478, "step": 391930 }, { "epoch": 0.7808316333035829, "grad_norm": 0.17250734567642212, "learning_rate": 0.002, "loss": 2.5536, "step": 391940 }, { "epoch": 0.7808515555272217, "grad_norm": 0.166198268532753, "learning_rate": 0.002, "loss": 2.5402, "step": 391950 }, { "epoch": 0.7808714777508606, "grad_norm": 0.16154025495052338, "learning_rate": 0.002, "loss": 2.5441, "step": 391960 }, { "epoch": 0.7808913999744995, "grad_norm": 0.1471966803073883, "learning_rate": 0.002, "loss": 2.5572, "step": 391970 }, { "epoch": 0.7809113221981384, "grad_norm": 0.20908546447753906, "learning_rate": 0.002, "loss": 2.5482, "step": 391980 }, { "epoch": 0.7809312444217774, "grad_norm": 0.15360042452812195, "learning_rate": 0.002, "loss": 2.5597, "step": 391990 }, { "epoch": 0.7809511666454163, "grad_norm": 0.1609315723180771, "learning_rate": 0.002, "loss": 2.5395, "step": 392000 }, { "epoch": 0.7809710888690552, "grad_norm": 0.17762941122055054, "learning_rate": 0.002, "loss": 2.551, "step": 392010 }, { "epoch": 0.7809910110926941, "grad_norm": 0.1388128399848938, "learning_rate": 0.002, "loss": 2.5596, "step": 392020 }, { "epoch": 0.7810109333163331, "grad_norm": 0.16436706483364105, "learning_rate": 0.002, "loss": 2.5624, "step": 392030 }, { "epoch": 0.781030855539972, "grad_norm": 0.16206449270248413, "learning_rate": 0.002, "loss": 2.5423, "step": 392040 }, { "epoch": 0.7810507777636109, "grad_norm": 0.158903107047081, "learning_rate": 0.002, "loss": 2.5512, "step": 392050 }, { "epoch": 0.7810706999872498, "grad_norm": 0.16056126356124878, "learning_rate": 0.002, "loss": 2.5574, "step": 392060 }, { "epoch": 0.7810906222108887, "grad_norm": 0.1477016806602478, "learning_rate": 0.002, "loss": 2.5634, "step": 392070 }, { "epoch": 0.7811105444345277, "grad_norm": 0.1373499631881714, "learning_rate": 0.002, "loss": 2.5726, "step": 392080 }, { "epoch": 0.7811304666581665, "grad_norm": 0.22601434588432312, "learning_rate": 0.002, "loss": 2.5475, "step": 392090 }, { "epoch": 0.7811503888818054, "grad_norm": 0.1460592895746231, "learning_rate": 0.002, "loss": 2.5567, "step": 392100 }, { "epoch": 0.7811703111054443, "grad_norm": 0.136668398976326, "learning_rate": 0.002, "loss": 2.5511, "step": 392110 }, { "epoch": 0.7811902333290832, "grad_norm": 0.16214314103126526, "learning_rate": 0.002, "loss": 2.5611, "step": 392120 }, { "epoch": 0.7812101555527222, "grad_norm": 0.14978256821632385, "learning_rate": 0.002, "loss": 2.5422, "step": 392130 }, { "epoch": 0.7812300777763611, "grad_norm": 0.18487712740898132, "learning_rate": 0.002, "loss": 2.5552, "step": 392140 }, { "epoch": 0.78125, "grad_norm": 0.16280266642570496, "learning_rate": 0.002, "loss": 2.553, "step": 392150 }, { "epoch": 0.7812699222236389, "grad_norm": 0.15717945992946625, "learning_rate": 0.002, "loss": 2.5451, "step": 392160 }, { "epoch": 0.7812898444472778, "grad_norm": 0.16399899125099182, "learning_rate": 0.002, "loss": 2.5492, "step": 392170 }, { "epoch": 0.7813097666709168, "grad_norm": 0.18129804730415344, "learning_rate": 0.002, "loss": 2.5527, "step": 392180 }, { "epoch": 0.7813296888945557, "grad_norm": 0.1794963926076889, "learning_rate": 0.002, "loss": 2.5344, "step": 392190 }, { "epoch": 0.7813496111181946, "grad_norm": 0.14376403391361237, "learning_rate": 0.002, "loss": 2.5456, "step": 392200 }, { "epoch": 0.7813695333418335, "grad_norm": 0.15258006751537323, "learning_rate": 0.002, "loss": 2.5437, "step": 392210 }, { "epoch": 0.7813894555654723, "grad_norm": 0.1548834592103958, "learning_rate": 0.002, "loss": 2.5558, "step": 392220 }, { "epoch": 0.7814093777891113, "grad_norm": 0.33078038692474365, "learning_rate": 0.002, "loss": 2.5552, "step": 392230 }, { "epoch": 0.7814293000127502, "grad_norm": 0.15923623740673065, "learning_rate": 0.002, "loss": 2.5481, "step": 392240 }, { "epoch": 0.7814492222363891, "grad_norm": 0.15763244032859802, "learning_rate": 0.002, "loss": 2.5557, "step": 392250 }, { "epoch": 0.781469144460028, "grad_norm": 0.16319730877876282, "learning_rate": 0.002, "loss": 2.559, "step": 392260 }, { "epoch": 0.7814890666836669, "grad_norm": 0.14825408160686493, "learning_rate": 0.002, "loss": 2.5535, "step": 392270 }, { "epoch": 0.7815089889073059, "grad_norm": 0.17653992772102356, "learning_rate": 0.002, "loss": 2.5636, "step": 392280 }, { "epoch": 0.7815289111309448, "grad_norm": 0.1554795652627945, "learning_rate": 0.002, "loss": 2.5441, "step": 392290 }, { "epoch": 0.7815488333545837, "grad_norm": 0.14340618252754211, "learning_rate": 0.002, "loss": 2.5678, "step": 392300 }, { "epoch": 0.7815687555782226, "grad_norm": 0.16664189100265503, "learning_rate": 0.002, "loss": 2.5457, "step": 392310 }, { "epoch": 0.7815886778018616, "grad_norm": 0.16456736624240875, "learning_rate": 0.002, "loss": 2.5551, "step": 392320 }, { "epoch": 0.7816086000255005, "grad_norm": 0.16165585815906525, "learning_rate": 0.002, "loss": 2.54, "step": 392330 }, { "epoch": 0.7816285222491394, "grad_norm": 0.18183869123458862, "learning_rate": 0.002, "loss": 2.5532, "step": 392340 }, { "epoch": 0.7816484444727783, "grad_norm": 0.15187428891658783, "learning_rate": 0.002, "loss": 2.5383, "step": 392350 }, { "epoch": 0.7816683666964171, "grad_norm": 0.14150471985340118, "learning_rate": 0.002, "loss": 2.5537, "step": 392360 }, { "epoch": 0.7816882889200562, "grad_norm": 0.17855684459209442, "learning_rate": 0.002, "loss": 2.5746, "step": 392370 }, { "epoch": 0.781708211143695, "grad_norm": 0.14890660345554352, "learning_rate": 0.002, "loss": 2.5633, "step": 392380 }, { "epoch": 0.7817281333673339, "grad_norm": 0.15703092515468597, "learning_rate": 0.002, "loss": 2.5585, "step": 392390 }, { "epoch": 0.7817480555909728, "grad_norm": 0.1588326096534729, "learning_rate": 0.002, "loss": 2.5521, "step": 392400 }, { "epoch": 0.7817679778146117, "grad_norm": 0.1408776044845581, "learning_rate": 0.002, "loss": 2.5495, "step": 392410 }, { "epoch": 0.7817879000382507, "grad_norm": 0.16506467759609222, "learning_rate": 0.002, "loss": 2.5639, "step": 392420 }, { "epoch": 0.7818078222618896, "grad_norm": 0.16767075657844543, "learning_rate": 0.002, "loss": 2.5557, "step": 392430 }, { "epoch": 0.7818277444855285, "grad_norm": 0.14115235209465027, "learning_rate": 0.002, "loss": 2.545, "step": 392440 }, { "epoch": 0.7818476667091674, "grad_norm": 0.1752655804157257, "learning_rate": 0.002, "loss": 2.5653, "step": 392450 }, { "epoch": 0.7818675889328063, "grad_norm": 0.1763676106929779, "learning_rate": 0.002, "loss": 2.5477, "step": 392460 }, { "epoch": 0.7818875111564453, "grad_norm": 0.20374168455600739, "learning_rate": 0.002, "loss": 2.5532, "step": 392470 }, { "epoch": 0.7819074333800842, "grad_norm": 0.146153062582016, "learning_rate": 0.002, "loss": 2.5491, "step": 392480 }, { "epoch": 0.7819273556037231, "grad_norm": 0.1430726796388626, "learning_rate": 0.002, "loss": 2.5416, "step": 392490 }, { "epoch": 0.781947277827362, "grad_norm": 0.22268515825271606, "learning_rate": 0.002, "loss": 2.5493, "step": 392500 }, { "epoch": 0.7819672000510008, "grad_norm": 0.13493870198726654, "learning_rate": 0.002, "loss": 2.5648, "step": 392510 }, { "epoch": 0.7819871222746398, "grad_norm": 0.15520302951335907, "learning_rate": 0.002, "loss": 2.5565, "step": 392520 }, { "epoch": 0.7820070444982787, "grad_norm": 0.16330914199352264, "learning_rate": 0.002, "loss": 2.5484, "step": 392530 }, { "epoch": 0.7820269667219176, "grad_norm": 0.15445542335510254, "learning_rate": 0.002, "loss": 2.5439, "step": 392540 }, { "epoch": 0.7820468889455565, "grad_norm": 0.1459636688232422, "learning_rate": 0.002, "loss": 2.5577, "step": 392550 }, { "epoch": 0.7820668111691954, "grad_norm": 0.13770747184753418, "learning_rate": 0.002, "loss": 2.5462, "step": 392560 }, { "epoch": 0.7820867333928344, "grad_norm": 0.1465488076210022, "learning_rate": 0.002, "loss": 2.5595, "step": 392570 }, { "epoch": 0.7821066556164733, "grad_norm": 0.13737350702285767, "learning_rate": 0.002, "loss": 2.5496, "step": 392580 }, { "epoch": 0.7821265778401122, "grad_norm": 0.1299162656068802, "learning_rate": 0.002, "loss": 2.5652, "step": 392590 }, { "epoch": 0.7821465000637511, "grad_norm": 0.1668517291545868, "learning_rate": 0.002, "loss": 2.5609, "step": 392600 }, { "epoch": 0.7821664222873901, "grad_norm": 0.17054608464241028, "learning_rate": 0.002, "loss": 2.5468, "step": 392610 }, { "epoch": 0.782186344511029, "grad_norm": 0.17145097255706787, "learning_rate": 0.002, "loss": 2.5513, "step": 392620 }, { "epoch": 0.7822062667346679, "grad_norm": 0.14593224227428436, "learning_rate": 0.002, "loss": 2.5475, "step": 392630 }, { "epoch": 0.7822261889583068, "grad_norm": 0.15672306716442108, "learning_rate": 0.002, "loss": 2.5747, "step": 392640 }, { "epoch": 0.7822461111819456, "grad_norm": 0.14017589390277863, "learning_rate": 0.002, "loss": 2.5608, "step": 392650 }, { "epoch": 0.7822660334055846, "grad_norm": 0.1512807011604309, "learning_rate": 0.002, "loss": 2.543, "step": 392660 }, { "epoch": 0.7822859556292235, "grad_norm": 0.17190547287464142, "learning_rate": 0.002, "loss": 2.5592, "step": 392670 }, { "epoch": 0.7823058778528624, "grad_norm": 0.16164609789848328, "learning_rate": 0.002, "loss": 2.5548, "step": 392680 }, { "epoch": 0.7823258000765013, "grad_norm": 0.1390998363494873, "learning_rate": 0.002, "loss": 2.5531, "step": 392690 }, { "epoch": 0.7823457223001402, "grad_norm": 0.1536491960287094, "learning_rate": 0.002, "loss": 2.5429, "step": 392700 }, { "epoch": 0.7823656445237792, "grad_norm": 0.1717049479484558, "learning_rate": 0.002, "loss": 2.5567, "step": 392710 }, { "epoch": 0.7823855667474181, "grad_norm": 0.1702919453382492, "learning_rate": 0.002, "loss": 2.5594, "step": 392720 }, { "epoch": 0.782405488971057, "grad_norm": 0.17969152331352234, "learning_rate": 0.002, "loss": 2.5496, "step": 392730 }, { "epoch": 0.7824254111946959, "grad_norm": 0.1398181915283203, "learning_rate": 0.002, "loss": 2.5498, "step": 392740 }, { "epoch": 0.7824453334183348, "grad_norm": 0.14092449843883514, "learning_rate": 0.002, "loss": 2.5515, "step": 392750 }, { "epoch": 0.7824652556419738, "grad_norm": 0.16238126158714294, "learning_rate": 0.002, "loss": 2.5599, "step": 392760 }, { "epoch": 0.7824851778656127, "grad_norm": 0.14434821903705597, "learning_rate": 0.002, "loss": 2.5623, "step": 392770 }, { "epoch": 0.7825051000892516, "grad_norm": 0.15197215974330902, "learning_rate": 0.002, "loss": 2.5519, "step": 392780 }, { "epoch": 0.7825250223128905, "grad_norm": 0.13833291828632355, "learning_rate": 0.002, "loss": 2.5503, "step": 392790 }, { "epoch": 0.7825449445365293, "grad_norm": 0.1453019678592682, "learning_rate": 0.002, "loss": 2.549, "step": 392800 }, { "epoch": 0.7825648667601683, "grad_norm": 0.16619795560836792, "learning_rate": 0.002, "loss": 2.5678, "step": 392810 }, { "epoch": 0.7825847889838072, "grad_norm": 0.1581697016954422, "learning_rate": 0.002, "loss": 2.5475, "step": 392820 }, { "epoch": 0.7826047112074461, "grad_norm": 0.15754345059394836, "learning_rate": 0.002, "loss": 2.5545, "step": 392830 }, { "epoch": 0.782624633431085, "grad_norm": 0.13732914626598358, "learning_rate": 0.002, "loss": 2.554, "step": 392840 }, { "epoch": 0.7826445556547239, "grad_norm": 0.16300761699676514, "learning_rate": 0.002, "loss": 2.5424, "step": 392850 }, { "epoch": 0.7826644778783629, "grad_norm": 0.1573328822851181, "learning_rate": 0.002, "loss": 2.5419, "step": 392860 }, { "epoch": 0.7826844001020018, "grad_norm": 0.18246407806873322, "learning_rate": 0.002, "loss": 2.5564, "step": 392870 }, { "epoch": 0.7827043223256407, "grad_norm": 0.17409150302410126, "learning_rate": 0.002, "loss": 2.551, "step": 392880 }, { "epoch": 0.7827242445492796, "grad_norm": 0.15016110241413116, "learning_rate": 0.002, "loss": 2.5502, "step": 392890 }, { "epoch": 0.7827441667729186, "grad_norm": 0.1805947721004486, "learning_rate": 0.002, "loss": 2.5571, "step": 392900 }, { "epoch": 0.7827640889965575, "grad_norm": 0.15151210129261017, "learning_rate": 0.002, "loss": 2.5497, "step": 392910 }, { "epoch": 0.7827840112201964, "grad_norm": 0.14050504565238953, "learning_rate": 0.002, "loss": 2.5602, "step": 392920 }, { "epoch": 0.7828039334438353, "grad_norm": 0.16456075012683868, "learning_rate": 0.002, "loss": 2.5598, "step": 392930 }, { "epoch": 0.7828238556674741, "grad_norm": 0.15828849375247955, "learning_rate": 0.002, "loss": 2.541, "step": 392940 }, { "epoch": 0.7828437778911131, "grad_norm": 0.1420297622680664, "learning_rate": 0.002, "loss": 2.545, "step": 392950 }, { "epoch": 0.782863700114752, "grad_norm": 0.12362965196371078, "learning_rate": 0.002, "loss": 2.5555, "step": 392960 }, { "epoch": 0.7828836223383909, "grad_norm": 0.17318780720233917, "learning_rate": 0.002, "loss": 2.5662, "step": 392970 }, { "epoch": 0.7829035445620298, "grad_norm": 0.1453435868024826, "learning_rate": 0.002, "loss": 2.5545, "step": 392980 }, { "epoch": 0.7829234667856687, "grad_norm": 0.1675262451171875, "learning_rate": 0.002, "loss": 2.546, "step": 392990 }, { "epoch": 0.7829433890093077, "grad_norm": 0.1417459100484848, "learning_rate": 0.002, "loss": 2.5392, "step": 393000 }, { "epoch": 0.7829633112329466, "grad_norm": 0.2299368977546692, "learning_rate": 0.002, "loss": 2.564, "step": 393010 }, { "epoch": 0.7829832334565855, "grad_norm": 0.1513030230998993, "learning_rate": 0.002, "loss": 2.5459, "step": 393020 }, { "epoch": 0.7830031556802244, "grad_norm": 0.1671762615442276, "learning_rate": 0.002, "loss": 2.5659, "step": 393030 }, { "epoch": 0.7830230779038633, "grad_norm": 0.18475602567195892, "learning_rate": 0.002, "loss": 2.5734, "step": 393040 }, { "epoch": 0.7830430001275023, "grad_norm": 0.14057593047618866, "learning_rate": 0.002, "loss": 2.5628, "step": 393050 }, { "epoch": 0.7830629223511412, "grad_norm": 0.1358911395072937, "learning_rate": 0.002, "loss": 2.5625, "step": 393060 }, { "epoch": 0.7830828445747801, "grad_norm": 0.18212221562862396, "learning_rate": 0.002, "loss": 2.5504, "step": 393070 }, { "epoch": 0.783102766798419, "grad_norm": 0.16041916608810425, "learning_rate": 0.002, "loss": 2.54, "step": 393080 }, { "epoch": 0.7831226890220578, "grad_norm": 0.1648666113615036, "learning_rate": 0.002, "loss": 2.5571, "step": 393090 }, { "epoch": 0.7831426112456968, "grad_norm": 0.15596117079257965, "learning_rate": 0.002, "loss": 2.5573, "step": 393100 }, { "epoch": 0.7831625334693357, "grad_norm": 0.15510930120944977, "learning_rate": 0.002, "loss": 2.5542, "step": 393110 }, { "epoch": 0.7831824556929746, "grad_norm": 0.15498465299606323, "learning_rate": 0.002, "loss": 2.5438, "step": 393120 }, { "epoch": 0.7832023779166135, "grad_norm": 0.15452425181865692, "learning_rate": 0.002, "loss": 2.5467, "step": 393130 }, { "epoch": 0.7832223001402524, "grad_norm": 0.1661411076784134, "learning_rate": 0.002, "loss": 2.5528, "step": 393140 }, { "epoch": 0.7832422223638914, "grad_norm": 0.14483413100242615, "learning_rate": 0.002, "loss": 2.5709, "step": 393150 }, { "epoch": 0.7832621445875303, "grad_norm": 0.12435586005449295, "learning_rate": 0.002, "loss": 2.5601, "step": 393160 }, { "epoch": 0.7832820668111692, "grad_norm": 0.13292109966278076, "learning_rate": 0.002, "loss": 2.5548, "step": 393170 }, { "epoch": 0.7833019890348081, "grad_norm": 0.19999219477176666, "learning_rate": 0.002, "loss": 2.5733, "step": 393180 }, { "epoch": 0.783321911258447, "grad_norm": 0.151274174451828, "learning_rate": 0.002, "loss": 2.5516, "step": 393190 }, { "epoch": 0.783341833482086, "grad_norm": 0.1762719452381134, "learning_rate": 0.002, "loss": 2.5413, "step": 393200 }, { "epoch": 0.7833617557057249, "grad_norm": 0.1619146764278412, "learning_rate": 0.002, "loss": 2.5544, "step": 393210 }, { "epoch": 0.7833816779293638, "grad_norm": 0.1621410846710205, "learning_rate": 0.002, "loss": 2.5562, "step": 393220 }, { "epoch": 0.7834016001530026, "grad_norm": 0.17784447968006134, "learning_rate": 0.002, "loss": 2.5603, "step": 393230 }, { "epoch": 0.7834215223766416, "grad_norm": 0.14807634055614471, "learning_rate": 0.002, "loss": 2.5494, "step": 393240 }, { "epoch": 0.7834414446002805, "grad_norm": 0.17651142179965973, "learning_rate": 0.002, "loss": 2.5607, "step": 393250 }, { "epoch": 0.7834613668239194, "grad_norm": 0.1645732969045639, "learning_rate": 0.002, "loss": 2.5387, "step": 393260 }, { "epoch": 0.7834812890475583, "grad_norm": 0.15545734763145447, "learning_rate": 0.002, "loss": 2.5557, "step": 393270 }, { "epoch": 0.7835012112711972, "grad_norm": 0.19148312509059906, "learning_rate": 0.002, "loss": 2.5418, "step": 393280 }, { "epoch": 0.7835211334948362, "grad_norm": 0.1723354458808899, "learning_rate": 0.002, "loss": 2.5369, "step": 393290 }, { "epoch": 0.7835410557184751, "grad_norm": 0.14710812270641327, "learning_rate": 0.002, "loss": 2.5548, "step": 393300 }, { "epoch": 0.783560977942114, "grad_norm": 0.13270370662212372, "learning_rate": 0.002, "loss": 2.5462, "step": 393310 }, { "epoch": 0.7835809001657529, "grad_norm": 0.1904861032962799, "learning_rate": 0.002, "loss": 2.5365, "step": 393320 }, { "epoch": 0.7836008223893918, "grad_norm": 0.15049679577350616, "learning_rate": 0.002, "loss": 2.5427, "step": 393330 }, { "epoch": 0.7836207446130308, "grad_norm": 0.18301382660865784, "learning_rate": 0.002, "loss": 2.5531, "step": 393340 }, { "epoch": 0.7836406668366697, "grad_norm": 0.15880711376667023, "learning_rate": 0.002, "loss": 2.5541, "step": 393350 }, { "epoch": 0.7836605890603086, "grad_norm": 0.19765923917293549, "learning_rate": 0.002, "loss": 2.5465, "step": 393360 }, { "epoch": 0.7836805112839474, "grad_norm": 0.16343069076538086, "learning_rate": 0.002, "loss": 2.5589, "step": 393370 }, { "epoch": 0.7837004335075863, "grad_norm": 0.1436392068862915, "learning_rate": 0.002, "loss": 2.5598, "step": 393380 }, { "epoch": 0.7837203557312253, "grad_norm": 0.14198040962219238, "learning_rate": 0.002, "loss": 2.5608, "step": 393390 }, { "epoch": 0.7837402779548642, "grad_norm": 0.1690729558467865, "learning_rate": 0.002, "loss": 2.5542, "step": 393400 }, { "epoch": 0.7837602001785031, "grad_norm": 0.16185542941093445, "learning_rate": 0.002, "loss": 2.5525, "step": 393410 }, { "epoch": 0.783780122402142, "grad_norm": 0.1495954841375351, "learning_rate": 0.002, "loss": 2.5459, "step": 393420 }, { "epoch": 0.7838000446257809, "grad_norm": 0.14122819900512695, "learning_rate": 0.002, "loss": 2.5595, "step": 393430 }, { "epoch": 0.7838199668494199, "grad_norm": 0.18356581032276154, "learning_rate": 0.002, "loss": 2.5544, "step": 393440 }, { "epoch": 0.7838398890730588, "grad_norm": 0.15852496027946472, "learning_rate": 0.002, "loss": 2.5595, "step": 393450 }, { "epoch": 0.7838598112966977, "grad_norm": 0.16639608144760132, "learning_rate": 0.002, "loss": 2.5488, "step": 393460 }, { "epoch": 0.7838797335203366, "grad_norm": 0.1445513665676117, "learning_rate": 0.002, "loss": 2.5296, "step": 393470 }, { "epoch": 0.7838996557439755, "grad_norm": 0.16893546283245087, "learning_rate": 0.002, "loss": 2.5425, "step": 393480 }, { "epoch": 0.7839195779676145, "grad_norm": 0.14676617085933685, "learning_rate": 0.002, "loss": 2.5381, "step": 393490 }, { "epoch": 0.7839395001912534, "grad_norm": 0.15936702489852905, "learning_rate": 0.002, "loss": 2.5468, "step": 393500 }, { "epoch": 0.7839594224148922, "grad_norm": 0.13931693136692047, "learning_rate": 0.002, "loss": 2.5505, "step": 393510 }, { "epoch": 0.7839793446385311, "grad_norm": 0.14109736680984497, "learning_rate": 0.002, "loss": 2.5545, "step": 393520 }, { "epoch": 0.7839992668621701, "grad_norm": 0.1730852723121643, "learning_rate": 0.002, "loss": 2.5498, "step": 393530 }, { "epoch": 0.784019189085809, "grad_norm": 0.13753069937229156, "learning_rate": 0.002, "loss": 2.5601, "step": 393540 }, { "epoch": 0.7840391113094479, "grad_norm": 0.15705139935016632, "learning_rate": 0.002, "loss": 2.5445, "step": 393550 }, { "epoch": 0.7840590335330868, "grad_norm": 0.14806990325450897, "learning_rate": 0.002, "loss": 2.5564, "step": 393560 }, { "epoch": 0.7840789557567257, "grad_norm": 0.13878293335437775, "learning_rate": 0.002, "loss": 2.5638, "step": 393570 }, { "epoch": 0.7840988779803647, "grad_norm": 0.14352621138095856, "learning_rate": 0.002, "loss": 2.5632, "step": 393580 }, { "epoch": 0.7841188002040036, "grad_norm": 0.15054547786712646, "learning_rate": 0.002, "loss": 2.5678, "step": 393590 }, { "epoch": 0.7841387224276425, "grad_norm": 0.1348053514957428, "learning_rate": 0.002, "loss": 2.5437, "step": 393600 }, { "epoch": 0.7841586446512814, "grad_norm": 0.1499543935060501, "learning_rate": 0.002, "loss": 2.561, "step": 393610 }, { "epoch": 0.7841785668749203, "grad_norm": 0.14305315911769867, "learning_rate": 0.002, "loss": 2.5475, "step": 393620 }, { "epoch": 0.7841984890985593, "grad_norm": 0.1400093287229538, "learning_rate": 0.002, "loss": 2.556, "step": 393630 }, { "epoch": 0.7842184113221982, "grad_norm": 0.14467056095600128, "learning_rate": 0.002, "loss": 2.5589, "step": 393640 }, { "epoch": 0.784238333545837, "grad_norm": 0.16027459502220154, "learning_rate": 0.002, "loss": 2.5583, "step": 393650 }, { "epoch": 0.7842582557694759, "grad_norm": 0.18520896136760712, "learning_rate": 0.002, "loss": 2.5494, "step": 393660 }, { "epoch": 0.7842781779931148, "grad_norm": 0.16752800345420837, "learning_rate": 0.002, "loss": 2.5432, "step": 393670 }, { "epoch": 0.7842981002167538, "grad_norm": 0.16519983112812042, "learning_rate": 0.002, "loss": 2.5617, "step": 393680 }, { "epoch": 0.7843180224403927, "grad_norm": 0.15618322789669037, "learning_rate": 0.002, "loss": 2.5549, "step": 393690 }, { "epoch": 0.7843379446640316, "grad_norm": 0.1877194195985794, "learning_rate": 0.002, "loss": 2.5629, "step": 393700 }, { "epoch": 0.7843578668876705, "grad_norm": 0.176323801279068, "learning_rate": 0.002, "loss": 2.5504, "step": 393710 }, { "epoch": 0.7843777891113094, "grad_norm": 0.14280498027801514, "learning_rate": 0.002, "loss": 2.5535, "step": 393720 }, { "epoch": 0.7843977113349484, "grad_norm": 0.13946297764778137, "learning_rate": 0.002, "loss": 2.5715, "step": 393730 }, { "epoch": 0.7844176335585873, "grad_norm": 0.1722002625465393, "learning_rate": 0.002, "loss": 2.5476, "step": 393740 }, { "epoch": 0.7844375557822262, "grad_norm": 0.13923461735248566, "learning_rate": 0.002, "loss": 2.5478, "step": 393750 }, { "epoch": 0.7844574780058651, "grad_norm": 0.18914499878883362, "learning_rate": 0.002, "loss": 2.5535, "step": 393760 }, { "epoch": 0.784477400229504, "grad_norm": 0.1365383416414261, "learning_rate": 0.002, "loss": 2.5509, "step": 393770 }, { "epoch": 0.784497322453143, "grad_norm": 0.15824051201343536, "learning_rate": 0.002, "loss": 2.5462, "step": 393780 }, { "epoch": 0.7845172446767819, "grad_norm": 0.19722451269626617, "learning_rate": 0.002, "loss": 2.5455, "step": 393790 }, { "epoch": 0.7845371669004207, "grad_norm": 0.14398689568042755, "learning_rate": 0.002, "loss": 2.5576, "step": 393800 }, { "epoch": 0.7845570891240596, "grad_norm": 0.13914692401885986, "learning_rate": 0.002, "loss": 2.5456, "step": 393810 }, { "epoch": 0.7845770113476986, "grad_norm": 0.13900108635425568, "learning_rate": 0.002, "loss": 2.553, "step": 393820 }, { "epoch": 0.7845969335713375, "grad_norm": 0.16189061105251312, "learning_rate": 0.002, "loss": 2.5525, "step": 393830 }, { "epoch": 0.7846168557949764, "grad_norm": 0.16196051239967346, "learning_rate": 0.002, "loss": 2.5522, "step": 393840 }, { "epoch": 0.7846367780186153, "grad_norm": 0.178965762257576, "learning_rate": 0.002, "loss": 2.5658, "step": 393850 }, { "epoch": 0.7846567002422542, "grad_norm": 0.16857647895812988, "learning_rate": 0.002, "loss": 2.566, "step": 393860 }, { "epoch": 0.7846766224658932, "grad_norm": 0.14936955273151398, "learning_rate": 0.002, "loss": 2.542, "step": 393870 }, { "epoch": 0.7846965446895321, "grad_norm": 0.1480192244052887, "learning_rate": 0.002, "loss": 2.545, "step": 393880 }, { "epoch": 0.784716466913171, "grad_norm": 0.15058855712413788, "learning_rate": 0.002, "loss": 2.5721, "step": 393890 }, { "epoch": 0.7847363891368099, "grad_norm": 0.1642736792564392, "learning_rate": 0.002, "loss": 2.5599, "step": 393900 }, { "epoch": 0.7847563113604488, "grad_norm": 0.1338527798652649, "learning_rate": 0.002, "loss": 2.5299, "step": 393910 }, { "epoch": 0.7847762335840878, "grad_norm": 0.1931818276643753, "learning_rate": 0.002, "loss": 2.5487, "step": 393920 }, { "epoch": 0.7847961558077267, "grad_norm": 0.3890244960784912, "learning_rate": 0.002, "loss": 2.559, "step": 393930 }, { "epoch": 0.7848160780313655, "grad_norm": 0.15950702130794525, "learning_rate": 0.002, "loss": 2.5618, "step": 393940 }, { "epoch": 0.7848360002550044, "grad_norm": 0.1460169553756714, "learning_rate": 0.002, "loss": 2.5435, "step": 393950 }, { "epoch": 0.7848559224786433, "grad_norm": 0.13427864015102386, "learning_rate": 0.002, "loss": 2.5356, "step": 393960 }, { "epoch": 0.7848758447022823, "grad_norm": 0.16153256595134735, "learning_rate": 0.002, "loss": 2.5486, "step": 393970 }, { "epoch": 0.7848957669259212, "grad_norm": 0.14629510045051575, "learning_rate": 0.002, "loss": 2.5595, "step": 393980 }, { "epoch": 0.7849156891495601, "grad_norm": 0.14125776290893555, "learning_rate": 0.002, "loss": 2.5603, "step": 393990 }, { "epoch": 0.784935611373199, "grad_norm": 0.18263843655586243, "learning_rate": 0.002, "loss": 2.5577, "step": 394000 }, { "epoch": 0.7849555335968379, "grad_norm": 0.1517936885356903, "learning_rate": 0.002, "loss": 2.5506, "step": 394010 }, { "epoch": 0.7849754558204769, "grad_norm": 0.1958589106798172, "learning_rate": 0.002, "loss": 2.5623, "step": 394020 }, { "epoch": 0.7849953780441158, "grad_norm": 0.18602029979228973, "learning_rate": 0.002, "loss": 2.5432, "step": 394030 }, { "epoch": 0.7850153002677547, "grad_norm": 0.15676438808441162, "learning_rate": 0.002, "loss": 2.5274, "step": 394040 }, { "epoch": 0.7850352224913936, "grad_norm": 0.19096802175045013, "learning_rate": 0.002, "loss": 2.5695, "step": 394050 }, { "epoch": 0.7850551447150325, "grad_norm": 0.16113002598285675, "learning_rate": 0.002, "loss": 2.5572, "step": 394060 }, { "epoch": 0.7850750669386715, "grad_norm": 0.18590152263641357, "learning_rate": 0.002, "loss": 2.5573, "step": 394070 }, { "epoch": 0.7850949891623104, "grad_norm": 0.16556981205940247, "learning_rate": 0.002, "loss": 2.5551, "step": 394080 }, { "epoch": 0.7851149113859492, "grad_norm": 0.14887066185474396, "learning_rate": 0.002, "loss": 2.5636, "step": 394090 }, { "epoch": 0.7851348336095881, "grad_norm": 0.15650777518749237, "learning_rate": 0.002, "loss": 2.546, "step": 394100 }, { "epoch": 0.7851547558332271, "grad_norm": 0.15712541341781616, "learning_rate": 0.002, "loss": 2.5648, "step": 394110 }, { "epoch": 0.785174678056866, "grad_norm": 0.1509152501821518, "learning_rate": 0.002, "loss": 2.5578, "step": 394120 }, { "epoch": 0.7851946002805049, "grad_norm": 0.14588452875614166, "learning_rate": 0.002, "loss": 2.5612, "step": 394130 }, { "epoch": 0.7852145225041438, "grad_norm": 0.19453531503677368, "learning_rate": 0.002, "loss": 2.5537, "step": 394140 }, { "epoch": 0.7852344447277827, "grad_norm": 0.15881669521331787, "learning_rate": 0.002, "loss": 2.5584, "step": 394150 }, { "epoch": 0.7852543669514217, "grad_norm": 0.1417708843946457, "learning_rate": 0.002, "loss": 2.5513, "step": 394160 }, { "epoch": 0.7852742891750606, "grad_norm": 0.19208106398582458, "learning_rate": 0.002, "loss": 2.5493, "step": 394170 }, { "epoch": 0.7852942113986995, "grad_norm": 0.13651707768440247, "learning_rate": 0.002, "loss": 2.5543, "step": 394180 }, { "epoch": 0.7853141336223384, "grad_norm": 0.1446738988161087, "learning_rate": 0.002, "loss": 2.5483, "step": 394190 }, { "epoch": 0.7853340558459773, "grad_norm": 0.137604221701622, "learning_rate": 0.002, "loss": 2.5627, "step": 394200 }, { "epoch": 0.7853539780696163, "grad_norm": 0.15272021293640137, "learning_rate": 0.002, "loss": 2.5496, "step": 394210 }, { "epoch": 0.7853739002932552, "grad_norm": 0.15206649899482727, "learning_rate": 0.002, "loss": 2.5519, "step": 394220 }, { "epoch": 0.785393822516894, "grad_norm": 0.15708598494529724, "learning_rate": 0.002, "loss": 2.5486, "step": 394230 }, { "epoch": 0.7854137447405329, "grad_norm": 0.1422751545906067, "learning_rate": 0.002, "loss": 2.5527, "step": 394240 }, { "epoch": 0.7854336669641718, "grad_norm": 0.14865441620349884, "learning_rate": 0.002, "loss": 2.5592, "step": 394250 }, { "epoch": 0.7854535891878108, "grad_norm": 0.15468259155750275, "learning_rate": 0.002, "loss": 2.5544, "step": 394260 }, { "epoch": 0.7854735114114497, "grad_norm": 0.17121195793151855, "learning_rate": 0.002, "loss": 2.5396, "step": 394270 }, { "epoch": 0.7854934336350886, "grad_norm": 0.16911351680755615, "learning_rate": 0.002, "loss": 2.5466, "step": 394280 }, { "epoch": 0.7855133558587275, "grad_norm": 0.20737233757972717, "learning_rate": 0.002, "loss": 2.5569, "step": 394290 }, { "epoch": 0.7855332780823664, "grad_norm": 0.14873002469539642, "learning_rate": 0.002, "loss": 2.5522, "step": 394300 }, { "epoch": 0.7855532003060054, "grad_norm": 0.15918120741844177, "learning_rate": 0.002, "loss": 2.5626, "step": 394310 }, { "epoch": 0.7855731225296443, "grad_norm": 0.1676035225391388, "learning_rate": 0.002, "loss": 2.5309, "step": 394320 }, { "epoch": 0.7855930447532832, "grad_norm": 0.1504976749420166, "learning_rate": 0.002, "loss": 2.5649, "step": 394330 }, { "epoch": 0.7856129669769221, "grad_norm": 0.15246546268463135, "learning_rate": 0.002, "loss": 2.5528, "step": 394340 }, { "epoch": 0.785632889200561, "grad_norm": 0.15333540737628937, "learning_rate": 0.002, "loss": 2.5496, "step": 394350 }, { "epoch": 0.7856528114242, "grad_norm": 0.1508112996816635, "learning_rate": 0.002, "loss": 2.5439, "step": 394360 }, { "epoch": 0.7856727336478389, "grad_norm": 0.14214354753494263, "learning_rate": 0.002, "loss": 2.5561, "step": 394370 }, { "epoch": 0.7856926558714777, "grad_norm": 0.17606337368488312, "learning_rate": 0.002, "loss": 2.548, "step": 394380 }, { "epoch": 0.7857125780951166, "grad_norm": 0.1624486744403839, "learning_rate": 0.002, "loss": 2.5513, "step": 394390 }, { "epoch": 0.7857325003187556, "grad_norm": 0.13631081581115723, "learning_rate": 0.002, "loss": 2.557, "step": 394400 }, { "epoch": 0.7857524225423945, "grad_norm": 0.1820162683725357, "learning_rate": 0.002, "loss": 2.5371, "step": 394410 }, { "epoch": 0.7857723447660334, "grad_norm": 0.1607726812362671, "learning_rate": 0.002, "loss": 2.5511, "step": 394420 }, { "epoch": 0.7857922669896723, "grad_norm": 0.14763084053993225, "learning_rate": 0.002, "loss": 2.5744, "step": 394430 }, { "epoch": 0.7858121892133112, "grad_norm": 0.15123829245567322, "learning_rate": 0.002, "loss": 2.5397, "step": 394440 }, { "epoch": 0.7858321114369502, "grad_norm": 0.1629299521446228, "learning_rate": 0.002, "loss": 2.5732, "step": 394450 }, { "epoch": 0.7858520336605891, "grad_norm": 0.11980453133583069, "learning_rate": 0.002, "loss": 2.554, "step": 394460 }, { "epoch": 0.785871955884228, "grad_norm": 0.1336052119731903, "learning_rate": 0.002, "loss": 2.5303, "step": 394470 }, { "epoch": 0.7858918781078669, "grad_norm": 0.17296108603477478, "learning_rate": 0.002, "loss": 2.5526, "step": 394480 }, { "epoch": 0.7859118003315058, "grad_norm": 0.15294906497001648, "learning_rate": 0.002, "loss": 2.5412, "step": 394490 }, { "epoch": 0.7859317225551448, "grad_norm": 0.1795111745595932, "learning_rate": 0.002, "loss": 2.5584, "step": 394500 }, { "epoch": 0.7859516447787837, "grad_norm": 0.1458691954612732, "learning_rate": 0.002, "loss": 2.5521, "step": 394510 }, { "epoch": 0.7859715670024225, "grad_norm": 0.20608453452587128, "learning_rate": 0.002, "loss": 2.5524, "step": 394520 }, { "epoch": 0.7859914892260614, "grad_norm": 0.16768313944339752, "learning_rate": 0.002, "loss": 2.5551, "step": 394530 }, { "epoch": 0.7860114114497003, "grad_norm": 0.15237779915332794, "learning_rate": 0.002, "loss": 2.5743, "step": 394540 }, { "epoch": 0.7860313336733393, "grad_norm": 0.1363067626953125, "learning_rate": 0.002, "loss": 2.5483, "step": 394550 }, { "epoch": 0.7860512558969782, "grad_norm": 0.14701949059963226, "learning_rate": 0.002, "loss": 2.5531, "step": 394560 }, { "epoch": 0.7860711781206171, "grad_norm": 0.1518075317144394, "learning_rate": 0.002, "loss": 2.5542, "step": 394570 }, { "epoch": 0.786091100344256, "grad_norm": 0.14527173340320587, "learning_rate": 0.002, "loss": 2.5488, "step": 394580 }, { "epoch": 0.7861110225678949, "grad_norm": 0.18996134400367737, "learning_rate": 0.002, "loss": 2.5565, "step": 394590 }, { "epoch": 0.7861309447915339, "grad_norm": 0.15133632719516754, "learning_rate": 0.002, "loss": 2.5643, "step": 394600 }, { "epoch": 0.7861508670151728, "grad_norm": 0.1540561318397522, "learning_rate": 0.002, "loss": 2.5365, "step": 394610 }, { "epoch": 0.7861707892388117, "grad_norm": 0.16128413379192352, "learning_rate": 0.002, "loss": 2.5534, "step": 394620 }, { "epoch": 0.7861907114624506, "grad_norm": 0.1563389003276825, "learning_rate": 0.002, "loss": 2.5448, "step": 394630 }, { "epoch": 0.7862106336860895, "grad_norm": 0.14683961868286133, "learning_rate": 0.002, "loss": 2.5623, "step": 394640 }, { "epoch": 0.7862305559097285, "grad_norm": 0.15226934850215912, "learning_rate": 0.002, "loss": 2.5533, "step": 394650 }, { "epoch": 0.7862504781333673, "grad_norm": 0.19078849256038666, "learning_rate": 0.002, "loss": 2.5608, "step": 394660 }, { "epoch": 0.7862704003570062, "grad_norm": 0.12636959552764893, "learning_rate": 0.002, "loss": 2.5659, "step": 394670 }, { "epoch": 0.7862903225806451, "grad_norm": 0.19179178774356842, "learning_rate": 0.002, "loss": 2.5662, "step": 394680 }, { "epoch": 0.786310244804284, "grad_norm": 0.15741629898548126, "learning_rate": 0.002, "loss": 2.5464, "step": 394690 }, { "epoch": 0.786330167027923, "grad_norm": 0.15439613163471222, "learning_rate": 0.002, "loss": 2.5464, "step": 394700 }, { "epoch": 0.7863500892515619, "grad_norm": 0.14748728275299072, "learning_rate": 0.002, "loss": 2.562, "step": 394710 }, { "epoch": 0.7863700114752008, "grad_norm": 0.19257257878780365, "learning_rate": 0.002, "loss": 2.5592, "step": 394720 }, { "epoch": 0.7863899336988397, "grad_norm": 0.1482865959405899, "learning_rate": 0.002, "loss": 2.5508, "step": 394730 }, { "epoch": 0.7864098559224787, "grad_norm": 0.15937845408916473, "learning_rate": 0.002, "loss": 2.5438, "step": 394740 }, { "epoch": 0.7864297781461176, "grad_norm": 0.13363490998744965, "learning_rate": 0.002, "loss": 2.5496, "step": 394750 }, { "epoch": 0.7864497003697565, "grad_norm": 0.1445426493883133, "learning_rate": 0.002, "loss": 2.5564, "step": 394760 }, { "epoch": 0.7864696225933954, "grad_norm": 0.18903444707393646, "learning_rate": 0.002, "loss": 2.5441, "step": 394770 }, { "epoch": 0.7864895448170343, "grad_norm": 0.16960225999355316, "learning_rate": 0.002, "loss": 2.5544, "step": 394780 }, { "epoch": 0.7865094670406733, "grad_norm": 0.1914454996585846, "learning_rate": 0.002, "loss": 2.571, "step": 394790 }, { "epoch": 0.7865293892643122, "grad_norm": 0.14650246500968933, "learning_rate": 0.002, "loss": 2.562, "step": 394800 }, { "epoch": 0.786549311487951, "grad_norm": 0.16618427634239197, "learning_rate": 0.002, "loss": 2.5583, "step": 394810 }, { "epoch": 0.7865692337115899, "grad_norm": 0.12119318544864655, "learning_rate": 0.002, "loss": 2.5477, "step": 394820 }, { "epoch": 0.7865891559352288, "grad_norm": 0.15977546572685242, "learning_rate": 0.002, "loss": 2.555, "step": 394830 }, { "epoch": 0.7866090781588678, "grad_norm": 0.16040122509002686, "learning_rate": 0.002, "loss": 2.545, "step": 394840 }, { "epoch": 0.7866290003825067, "grad_norm": 0.1510363519191742, "learning_rate": 0.002, "loss": 2.5457, "step": 394850 }, { "epoch": 0.7866489226061456, "grad_norm": 0.15382950007915497, "learning_rate": 0.002, "loss": 2.5327, "step": 394860 }, { "epoch": 0.7866688448297845, "grad_norm": 0.16986489295959473, "learning_rate": 0.002, "loss": 2.5611, "step": 394870 }, { "epoch": 0.7866887670534234, "grad_norm": 0.13536708056926727, "learning_rate": 0.002, "loss": 2.554, "step": 394880 }, { "epoch": 0.7867086892770624, "grad_norm": 0.1368628591299057, "learning_rate": 0.002, "loss": 2.5489, "step": 394890 }, { "epoch": 0.7867286115007013, "grad_norm": 0.171883687376976, "learning_rate": 0.002, "loss": 2.5516, "step": 394900 }, { "epoch": 0.7867485337243402, "grad_norm": 0.14357496798038483, "learning_rate": 0.002, "loss": 2.5409, "step": 394910 }, { "epoch": 0.7867684559479791, "grad_norm": 0.20421701669692993, "learning_rate": 0.002, "loss": 2.5563, "step": 394920 }, { "epoch": 0.786788378171618, "grad_norm": 0.16692548990249634, "learning_rate": 0.002, "loss": 2.5674, "step": 394930 }, { "epoch": 0.786808300395257, "grad_norm": 0.16667087376117706, "learning_rate": 0.002, "loss": 2.5636, "step": 394940 }, { "epoch": 0.7868282226188958, "grad_norm": 0.16826820373535156, "learning_rate": 0.002, "loss": 2.5586, "step": 394950 }, { "epoch": 0.7868481448425347, "grad_norm": 0.15751570463180542, "learning_rate": 0.002, "loss": 2.5664, "step": 394960 }, { "epoch": 0.7868680670661736, "grad_norm": 0.16586072742938995, "learning_rate": 0.002, "loss": 2.5388, "step": 394970 }, { "epoch": 0.7868879892898125, "grad_norm": 0.14504331350326538, "learning_rate": 0.002, "loss": 2.5504, "step": 394980 }, { "epoch": 0.7869079115134515, "grad_norm": 0.18528932332992554, "learning_rate": 0.002, "loss": 2.5565, "step": 394990 }, { "epoch": 0.7869278337370904, "grad_norm": 0.14925937354564667, "learning_rate": 0.002, "loss": 2.5512, "step": 395000 }, { "epoch": 0.7869477559607293, "grad_norm": 0.15093225240707397, "learning_rate": 0.002, "loss": 2.5514, "step": 395010 }, { "epoch": 0.7869676781843682, "grad_norm": 0.16288672387599945, "learning_rate": 0.002, "loss": 2.5601, "step": 395020 }, { "epoch": 0.7869876004080072, "grad_norm": 0.1518414169549942, "learning_rate": 0.002, "loss": 2.5545, "step": 395030 }, { "epoch": 0.7870075226316461, "grad_norm": 0.17944149672985077, "learning_rate": 0.002, "loss": 2.5596, "step": 395040 }, { "epoch": 0.787027444855285, "grad_norm": 0.15872840583324432, "learning_rate": 0.002, "loss": 2.5555, "step": 395050 }, { "epoch": 0.7870473670789239, "grad_norm": 0.1431957483291626, "learning_rate": 0.002, "loss": 2.5535, "step": 395060 }, { "epoch": 0.7870672893025628, "grad_norm": 0.1838349550962448, "learning_rate": 0.002, "loss": 2.5541, "step": 395070 }, { "epoch": 0.7870872115262018, "grad_norm": 0.17142169177532196, "learning_rate": 0.002, "loss": 2.5579, "step": 395080 }, { "epoch": 0.7871071337498406, "grad_norm": 0.16641424596309662, "learning_rate": 0.002, "loss": 2.5542, "step": 395090 }, { "epoch": 0.7871270559734795, "grad_norm": 0.13859879970550537, "learning_rate": 0.002, "loss": 2.5537, "step": 395100 }, { "epoch": 0.7871469781971184, "grad_norm": 0.2001027762889862, "learning_rate": 0.002, "loss": 2.5592, "step": 395110 }, { "epoch": 0.7871669004207573, "grad_norm": 0.14256460964679718, "learning_rate": 0.002, "loss": 2.5562, "step": 395120 }, { "epoch": 0.7871868226443963, "grad_norm": 0.15852278470993042, "learning_rate": 0.002, "loss": 2.5596, "step": 395130 }, { "epoch": 0.7872067448680352, "grad_norm": 0.2307162582874298, "learning_rate": 0.002, "loss": 2.5571, "step": 395140 }, { "epoch": 0.7872266670916741, "grad_norm": 0.1363769769668579, "learning_rate": 0.002, "loss": 2.5702, "step": 395150 }, { "epoch": 0.787246589315313, "grad_norm": 0.13054457306861877, "learning_rate": 0.002, "loss": 2.5566, "step": 395160 }, { "epoch": 0.7872665115389519, "grad_norm": 0.15962980687618256, "learning_rate": 0.002, "loss": 2.5454, "step": 395170 }, { "epoch": 0.7872864337625909, "grad_norm": 0.22280894219875336, "learning_rate": 0.002, "loss": 2.5453, "step": 395180 }, { "epoch": 0.7873063559862298, "grad_norm": 0.15743805468082428, "learning_rate": 0.002, "loss": 2.5543, "step": 395190 }, { "epoch": 0.7873262782098687, "grad_norm": 0.16911523044109344, "learning_rate": 0.002, "loss": 2.5526, "step": 395200 }, { "epoch": 0.7873462004335076, "grad_norm": 0.17060482501983643, "learning_rate": 0.002, "loss": 2.5556, "step": 395210 }, { "epoch": 0.7873661226571464, "grad_norm": 0.16132241487503052, "learning_rate": 0.002, "loss": 2.5599, "step": 395220 }, { "epoch": 0.7873860448807855, "grad_norm": 0.16358911991119385, "learning_rate": 0.002, "loss": 2.56, "step": 395230 }, { "epoch": 0.7874059671044243, "grad_norm": 0.16205188632011414, "learning_rate": 0.002, "loss": 2.5508, "step": 395240 }, { "epoch": 0.7874258893280632, "grad_norm": 0.15147444605827332, "learning_rate": 0.002, "loss": 2.552, "step": 395250 }, { "epoch": 0.7874458115517021, "grad_norm": 0.14801417291164398, "learning_rate": 0.002, "loss": 2.5554, "step": 395260 }, { "epoch": 0.787465733775341, "grad_norm": 0.1886453479528427, "learning_rate": 0.002, "loss": 2.5521, "step": 395270 }, { "epoch": 0.78748565599898, "grad_norm": 0.17698976397514343, "learning_rate": 0.002, "loss": 2.5628, "step": 395280 }, { "epoch": 0.7875055782226189, "grad_norm": 0.13006159663200378, "learning_rate": 0.002, "loss": 2.5506, "step": 395290 }, { "epoch": 0.7875255004462578, "grad_norm": 0.18342535197734833, "learning_rate": 0.002, "loss": 2.5811, "step": 395300 }, { "epoch": 0.7875454226698967, "grad_norm": 0.14254625141620636, "learning_rate": 0.002, "loss": 2.5664, "step": 395310 }, { "epoch": 0.7875653448935357, "grad_norm": 0.1378501057624817, "learning_rate": 0.002, "loss": 2.5477, "step": 395320 }, { "epoch": 0.7875852671171746, "grad_norm": 0.17711907625198364, "learning_rate": 0.002, "loss": 2.5503, "step": 395330 }, { "epoch": 0.7876051893408135, "grad_norm": 0.1537594199180603, "learning_rate": 0.002, "loss": 2.5469, "step": 395340 }, { "epoch": 0.7876251115644524, "grad_norm": 0.1404428780078888, "learning_rate": 0.002, "loss": 2.5525, "step": 395350 }, { "epoch": 0.7876450337880913, "grad_norm": 0.14419429004192352, "learning_rate": 0.002, "loss": 2.5598, "step": 395360 }, { "epoch": 0.7876649560117303, "grad_norm": 0.17412595450878143, "learning_rate": 0.002, "loss": 2.5452, "step": 395370 }, { "epoch": 0.7876848782353691, "grad_norm": 0.14923249185085297, "learning_rate": 0.002, "loss": 2.5443, "step": 395380 }, { "epoch": 0.787704800459008, "grad_norm": 0.18631331622600555, "learning_rate": 0.002, "loss": 2.5429, "step": 395390 }, { "epoch": 0.7877247226826469, "grad_norm": 0.15098871290683746, "learning_rate": 0.002, "loss": 2.5612, "step": 395400 }, { "epoch": 0.7877446449062858, "grad_norm": 0.15078237652778625, "learning_rate": 0.002, "loss": 2.5523, "step": 395410 }, { "epoch": 0.7877645671299248, "grad_norm": 0.16424375772476196, "learning_rate": 0.002, "loss": 2.5631, "step": 395420 }, { "epoch": 0.7877844893535637, "grad_norm": 0.15787693858146667, "learning_rate": 0.002, "loss": 2.5495, "step": 395430 }, { "epoch": 0.7878044115772026, "grad_norm": 0.15603254735469818, "learning_rate": 0.002, "loss": 2.5391, "step": 395440 }, { "epoch": 0.7878243338008415, "grad_norm": 0.16012565791606903, "learning_rate": 0.002, "loss": 2.5509, "step": 395450 }, { "epoch": 0.7878442560244804, "grad_norm": 0.16941426694393158, "learning_rate": 0.002, "loss": 2.5357, "step": 395460 }, { "epoch": 0.7878641782481194, "grad_norm": 0.16748172044754028, "learning_rate": 0.002, "loss": 2.5593, "step": 395470 }, { "epoch": 0.7878841004717583, "grad_norm": 0.15241757035255432, "learning_rate": 0.002, "loss": 2.5551, "step": 395480 }, { "epoch": 0.7879040226953972, "grad_norm": 0.1501617133617401, "learning_rate": 0.002, "loss": 2.5568, "step": 395490 }, { "epoch": 0.7879239449190361, "grad_norm": 0.1564236432313919, "learning_rate": 0.002, "loss": 2.5577, "step": 395500 }, { "epoch": 0.787943867142675, "grad_norm": 0.1485571563243866, "learning_rate": 0.002, "loss": 2.5682, "step": 395510 }, { "epoch": 0.787963789366314, "grad_norm": 0.21816477179527283, "learning_rate": 0.002, "loss": 2.5531, "step": 395520 }, { "epoch": 0.7879837115899528, "grad_norm": 0.1385393887758255, "learning_rate": 0.002, "loss": 2.5418, "step": 395530 }, { "epoch": 0.7880036338135917, "grad_norm": 0.16852128505706787, "learning_rate": 0.002, "loss": 2.5401, "step": 395540 }, { "epoch": 0.7880235560372306, "grad_norm": 0.15992644429206848, "learning_rate": 0.002, "loss": 2.5613, "step": 395550 }, { "epoch": 0.7880434782608695, "grad_norm": 0.17356662452220917, "learning_rate": 0.002, "loss": 2.5575, "step": 395560 }, { "epoch": 0.7880634004845085, "grad_norm": 0.14999140799045563, "learning_rate": 0.002, "loss": 2.5507, "step": 395570 }, { "epoch": 0.7880833227081474, "grad_norm": 0.155324786901474, "learning_rate": 0.002, "loss": 2.5477, "step": 395580 }, { "epoch": 0.7881032449317863, "grad_norm": 0.15177883207798004, "learning_rate": 0.002, "loss": 2.5462, "step": 395590 }, { "epoch": 0.7881231671554252, "grad_norm": 0.15943720936775208, "learning_rate": 0.002, "loss": 2.5715, "step": 395600 }, { "epoch": 0.7881430893790642, "grad_norm": 0.14745189249515533, "learning_rate": 0.002, "loss": 2.5526, "step": 395610 }, { "epoch": 0.7881630116027031, "grad_norm": 0.14448557794094086, "learning_rate": 0.002, "loss": 2.5422, "step": 395620 }, { "epoch": 0.788182933826342, "grad_norm": 0.16695360839366913, "learning_rate": 0.002, "loss": 2.5529, "step": 395630 }, { "epoch": 0.7882028560499809, "grad_norm": 0.14340129494667053, "learning_rate": 0.002, "loss": 2.5429, "step": 395640 }, { "epoch": 0.7882227782736198, "grad_norm": 0.14570970833301544, "learning_rate": 0.002, "loss": 2.543, "step": 395650 }, { "epoch": 0.7882427004972588, "grad_norm": 0.14374707639217377, "learning_rate": 0.002, "loss": 2.5592, "step": 395660 }, { "epoch": 0.7882626227208976, "grad_norm": 0.33164218068122864, "learning_rate": 0.002, "loss": 2.5647, "step": 395670 }, { "epoch": 0.7882825449445365, "grad_norm": 0.14546439051628113, "learning_rate": 0.002, "loss": 2.5695, "step": 395680 }, { "epoch": 0.7883024671681754, "grad_norm": 0.17809012532234192, "learning_rate": 0.002, "loss": 2.5574, "step": 395690 }, { "epoch": 0.7883223893918143, "grad_norm": 0.13535331189632416, "learning_rate": 0.002, "loss": 2.5525, "step": 395700 }, { "epoch": 0.7883423116154533, "grad_norm": 0.15788449347019196, "learning_rate": 0.002, "loss": 2.5573, "step": 395710 }, { "epoch": 0.7883622338390922, "grad_norm": 0.17327941954135895, "learning_rate": 0.002, "loss": 2.5526, "step": 395720 }, { "epoch": 0.7883821560627311, "grad_norm": 0.13476890325546265, "learning_rate": 0.002, "loss": 2.5653, "step": 395730 }, { "epoch": 0.78840207828637, "grad_norm": 0.1551697999238968, "learning_rate": 0.002, "loss": 2.55, "step": 395740 }, { "epoch": 0.7884220005100089, "grad_norm": 0.16623173654079437, "learning_rate": 0.002, "loss": 2.5505, "step": 395750 }, { "epoch": 0.7884419227336479, "grad_norm": 0.16662712395191193, "learning_rate": 0.002, "loss": 2.5657, "step": 395760 }, { "epoch": 0.7884618449572868, "grad_norm": 0.1395234763622284, "learning_rate": 0.002, "loss": 2.5674, "step": 395770 }, { "epoch": 0.7884817671809257, "grad_norm": 0.1398976445198059, "learning_rate": 0.002, "loss": 2.5438, "step": 395780 }, { "epoch": 0.7885016894045646, "grad_norm": 0.17137709259986877, "learning_rate": 0.002, "loss": 2.5384, "step": 395790 }, { "epoch": 0.7885216116282034, "grad_norm": 0.16773299872875214, "learning_rate": 0.002, "loss": 2.5719, "step": 395800 }, { "epoch": 0.7885415338518424, "grad_norm": 0.20438143610954285, "learning_rate": 0.002, "loss": 2.5592, "step": 395810 }, { "epoch": 0.7885614560754813, "grad_norm": 0.2081301361322403, "learning_rate": 0.002, "loss": 2.5575, "step": 395820 }, { "epoch": 0.7885813782991202, "grad_norm": 0.16549035906791687, "learning_rate": 0.002, "loss": 2.5521, "step": 395830 }, { "epoch": 0.7886013005227591, "grad_norm": 0.16236118972301483, "learning_rate": 0.002, "loss": 2.5641, "step": 395840 }, { "epoch": 0.788621222746398, "grad_norm": 0.15420648455619812, "learning_rate": 0.002, "loss": 2.5524, "step": 395850 }, { "epoch": 0.788641144970037, "grad_norm": 0.1531127542257309, "learning_rate": 0.002, "loss": 2.5589, "step": 395860 }, { "epoch": 0.7886610671936759, "grad_norm": 0.1683526486158371, "learning_rate": 0.002, "loss": 2.5581, "step": 395870 }, { "epoch": 0.7886809894173148, "grad_norm": 0.12831848859786987, "learning_rate": 0.002, "loss": 2.5507, "step": 395880 }, { "epoch": 0.7887009116409537, "grad_norm": 0.13473263382911682, "learning_rate": 0.002, "loss": 2.5535, "step": 395890 }, { "epoch": 0.7887208338645927, "grad_norm": 0.1622989922761917, "learning_rate": 0.002, "loss": 2.5512, "step": 395900 }, { "epoch": 0.7887407560882316, "grad_norm": 0.1948370635509491, "learning_rate": 0.002, "loss": 2.5378, "step": 395910 }, { "epoch": 0.7887606783118705, "grad_norm": 0.15610109269618988, "learning_rate": 0.002, "loss": 2.5549, "step": 395920 }, { "epoch": 0.7887806005355094, "grad_norm": 0.14728309214115143, "learning_rate": 0.002, "loss": 2.5553, "step": 395930 }, { "epoch": 0.7888005227591482, "grad_norm": 0.15063261985778809, "learning_rate": 0.002, "loss": 2.5395, "step": 395940 }, { "epoch": 0.7888204449827872, "grad_norm": 0.1672741174697876, "learning_rate": 0.002, "loss": 2.5464, "step": 395950 }, { "epoch": 0.7888403672064261, "grad_norm": 0.15890246629714966, "learning_rate": 0.002, "loss": 2.5705, "step": 395960 }, { "epoch": 0.788860289430065, "grad_norm": 0.1727258414030075, "learning_rate": 0.002, "loss": 2.5753, "step": 395970 }, { "epoch": 0.7888802116537039, "grad_norm": 0.14805227518081665, "learning_rate": 0.002, "loss": 2.5515, "step": 395980 }, { "epoch": 0.7889001338773428, "grad_norm": 0.18718552589416504, "learning_rate": 0.002, "loss": 2.554, "step": 395990 }, { "epoch": 0.7889200561009818, "grad_norm": 0.16267463564872742, "learning_rate": 0.002, "loss": 2.5483, "step": 396000 }, { "epoch": 0.7889399783246207, "grad_norm": 0.14952519536018372, "learning_rate": 0.002, "loss": 2.5445, "step": 396010 }, { "epoch": 0.7889599005482596, "grad_norm": 0.14925289154052734, "learning_rate": 0.002, "loss": 2.5527, "step": 396020 }, { "epoch": 0.7889798227718985, "grad_norm": 0.17210248112678528, "learning_rate": 0.002, "loss": 2.5724, "step": 396030 }, { "epoch": 0.7889997449955374, "grad_norm": 0.1409744918346405, "learning_rate": 0.002, "loss": 2.5438, "step": 396040 }, { "epoch": 0.7890196672191764, "grad_norm": 0.14264504611492157, "learning_rate": 0.002, "loss": 2.5543, "step": 396050 }, { "epoch": 0.7890395894428153, "grad_norm": 0.18655821681022644, "learning_rate": 0.002, "loss": 2.5526, "step": 396060 }, { "epoch": 0.7890595116664542, "grad_norm": 0.15385648608207703, "learning_rate": 0.002, "loss": 2.5556, "step": 396070 }, { "epoch": 0.789079433890093, "grad_norm": 0.17625947296619415, "learning_rate": 0.002, "loss": 2.5579, "step": 396080 }, { "epoch": 0.7890993561137319, "grad_norm": 0.15774588286876678, "learning_rate": 0.002, "loss": 2.5617, "step": 396090 }, { "epoch": 0.7891192783373709, "grad_norm": 0.1639818549156189, "learning_rate": 0.002, "loss": 2.5526, "step": 396100 }, { "epoch": 0.7891392005610098, "grad_norm": 0.1454273760318756, "learning_rate": 0.002, "loss": 2.5501, "step": 396110 }, { "epoch": 0.7891591227846487, "grad_norm": 0.132364884018898, "learning_rate": 0.002, "loss": 2.5542, "step": 396120 }, { "epoch": 0.7891790450082876, "grad_norm": 0.18050292134284973, "learning_rate": 0.002, "loss": 2.5553, "step": 396130 }, { "epoch": 0.7891989672319265, "grad_norm": 0.15748389065265656, "learning_rate": 0.002, "loss": 2.5304, "step": 396140 }, { "epoch": 0.7892188894555655, "grad_norm": 0.1576608568429947, "learning_rate": 0.002, "loss": 2.5694, "step": 396150 }, { "epoch": 0.7892388116792044, "grad_norm": 0.19928406178951263, "learning_rate": 0.002, "loss": 2.5616, "step": 396160 }, { "epoch": 0.7892587339028433, "grad_norm": 0.1411881148815155, "learning_rate": 0.002, "loss": 2.5581, "step": 396170 }, { "epoch": 0.7892786561264822, "grad_norm": 0.3810367286205292, "learning_rate": 0.002, "loss": 2.5455, "step": 396180 }, { "epoch": 0.7892985783501211, "grad_norm": 0.12846539914608002, "learning_rate": 0.002, "loss": 2.5475, "step": 396190 }, { "epoch": 0.7893185005737601, "grad_norm": 0.19633512198925018, "learning_rate": 0.002, "loss": 2.5559, "step": 396200 }, { "epoch": 0.789338422797399, "grad_norm": 0.136583611369133, "learning_rate": 0.002, "loss": 2.5539, "step": 396210 }, { "epoch": 0.7893583450210379, "grad_norm": 0.15652751922607422, "learning_rate": 0.002, "loss": 2.5546, "step": 396220 }, { "epoch": 0.7893782672446767, "grad_norm": 0.1406177282333374, "learning_rate": 0.002, "loss": 2.5698, "step": 396230 }, { "epoch": 0.7893981894683157, "grad_norm": 0.15226386487483978, "learning_rate": 0.002, "loss": 2.5527, "step": 396240 }, { "epoch": 0.7894181116919546, "grad_norm": 0.15874478220939636, "learning_rate": 0.002, "loss": 2.558, "step": 396250 }, { "epoch": 0.7894380339155935, "grad_norm": 0.13812050223350525, "learning_rate": 0.002, "loss": 2.5395, "step": 396260 }, { "epoch": 0.7894579561392324, "grad_norm": 0.18125474452972412, "learning_rate": 0.002, "loss": 2.5471, "step": 396270 }, { "epoch": 0.7894778783628713, "grad_norm": 0.16154001653194427, "learning_rate": 0.002, "loss": 2.5576, "step": 396280 }, { "epoch": 0.7894978005865103, "grad_norm": 0.13596497476100922, "learning_rate": 0.002, "loss": 2.5409, "step": 396290 }, { "epoch": 0.7895177228101492, "grad_norm": 0.14929719269275665, "learning_rate": 0.002, "loss": 2.5361, "step": 396300 }, { "epoch": 0.7895376450337881, "grad_norm": 0.12769544124603271, "learning_rate": 0.002, "loss": 2.5404, "step": 396310 }, { "epoch": 0.789557567257427, "grad_norm": 0.17416095733642578, "learning_rate": 0.002, "loss": 2.5482, "step": 396320 }, { "epoch": 0.7895774894810659, "grad_norm": 0.17727316915988922, "learning_rate": 0.002, "loss": 2.551, "step": 396330 }, { "epoch": 0.7895974117047049, "grad_norm": 0.13317705690860748, "learning_rate": 0.002, "loss": 2.5621, "step": 396340 }, { "epoch": 0.7896173339283438, "grad_norm": 0.18595169484615326, "learning_rate": 0.002, "loss": 2.5535, "step": 396350 }, { "epoch": 0.7896372561519827, "grad_norm": 0.14795775711536407, "learning_rate": 0.002, "loss": 2.569, "step": 396360 }, { "epoch": 0.7896571783756215, "grad_norm": 0.1503961682319641, "learning_rate": 0.002, "loss": 2.5601, "step": 396370 }, { "epoch": 0.7896771005992604, "grad_norm": 0.17618921399116516, "learning_rate": 0.002, "loss": 2.5726, "step": 396380 }, { "epoch": 0.7896970228228994, "grad_norm": 0.1433936208486557, "learning_rate": 0.002, "loss": 2.5654, "step": 396390 }, { "epoch": 0.7897169450465383, "grad_norm": 0.14161965250968933, "learning_rate": 0.002, "loss": 2.5683, "step": 396400 }, { "epoch": 0.7897368672701772, "grad_norm": 0.1566929817199707, "learning_rate": 0.002, "loss": 2.5365, "step": 396410 }, { "epoch": 0.7897567894938161, "grad_norm": 0.13191166520118713, "learning_rate": 0.002, "loss": 2.5535, "step": 396420 }, { "epoch": 0.789776711717455, "grad_norm": 0.14992284774780273, "learning_rate": 0.002, "loss": 2.5708, "step": 396430 }, { "epoch": 0.789796633941094, "grad_norm": 0.12843891978263855, "learning_rate": 0.002, "loss": 2.5493, "step": 396440 }, { "epoch": 0.7898165561647329, "grad_norm": 0.1422305852174759, "learning_rate": 0.002, "loss": 2.5493, "step": 396450 }, { "epoch": 0.7898364783883718, "grad_norm": 0.15944240987300873, "learning_rate": 0.002, "loss": 2.5663, "step": 396460 }, { "epoch": 0.7898564006120107, "grad_norm": 0.16901308298110962, "learning_rate": 0.002, "loss": 2.5477, "step": 396470 }, { "epoch": 0.7898763228356496, "grad_norm": 0.13507115840911865, "learning_rate": 0.002, "loss": 2.5603, "step": 396480 }, { "epoch": 0.7898962450592886, "grad_norm": 0.17128370702266693, "learning_rate": 0.002, "loss": 2.5624, "step": 396490 }, { "epoch": 0.7899161672829275, "grad_norm": 0.19320113956928253, "learning_rate": 0.002, "loss": 2.5616, "step": 396500 }, { "epoch": 0.7899360895065664, "grad_norm": 0.16641421616077423, "learning_rate": 0.002, "loss": 2.5574, "step": 396510 }, { "epoch": 0.7899560117302052, "grad_norm": 0.13308916985988617, "learning_rate": 0.002, "loss": 2.5555, "step": 396520 }, { "epoch": 0.7899759339538442, "grad_norm": 0.18290722370147705, "learning_rate": 0.002, "loss": 2.562, "step": 396530 }, { "epoch": 0.7899958561774831, "grad_norm": 0.16787266731262207, "learning_rate": 0.002, "loss": 2.5529, "step": 396540 }, { "epoch": 0.790015778401122, "grad_norm": 0.1432856172323227, "learning_rate": 0.002, "loss": 2.5596, "step": 396550 }, { "epoch": 0.7900357006247609, "grad_norm": 0.16315893828868866, "learning_rate": 0.002, "loss": 2.5619, "step": 396560 }, { "epoch": 0.7900556228483998, "grad_norm": 0.14910200238227844, "learning_rate": 0.002, "loss": 2.5584, "step": 396570 }, { "epoch": 0.7900755450720388, "grad_norm": 0.16048485040664673, "learning_rate": 0.002, "loss": 2.5521, "step": 396580 }, { "epoch": 0.7900954672956777, "grad_norm": 0.1549631953239441, "learning_rate": 0.002, "loss": 2.5546, "step": 396590 }, { "epoch": 0.7901153895193166, "grad_norm": 0.1575993150472641, "learning_rate": 0.002, "loss": 2.5548, "step": 396600 }, { "epoch": 0.7901353117429555, "grad_norm": 0.1771039366722107, "learning_rate": 0.002, "loss": 2.5481, "step": 396610 }, { "epoch": 0.7901552339665944, "grad_norm": 0.1745615154504776, "learning_rate": 0.002, "loss": 2.5626, "step": 396620 }, { "epoch": 0.7901751561902334, "grad_norm": 0.1935308873653412, "learning_rate": 0.002, "loss": 2.5623, "step": 396630 }, { "epoch": 0.7901950784138723, "grad_norm": 0.15907511115074158, "learning_rate": 0.002, "loss": 2.5364, "step": 396640 }, { "epoch": 0.7902150006375112, "grad_norm": 0.1309560090303421, "learning_rate": 0.002, "loss": 2.5593, "step": 396650 }, { "epoch": 0.79023492286115, "grad_norm": 0.16026580333709717, "learning_rate": 0.002, "loss": 2.5421, "step": 396660 }, { "epoch": 0.7902548450847889, "grad_norm": 0.19390380382537842, "learning_rate": 0.002, "loss": 2.5497, "step": 396670 }, { "epoch": 0.7902747673084279, "grad_norm": 0.15960298478603363, "learning_rate": 0.002, "loss": 2.5483, "step": 396680 }, { "epoch": 0.7902946895320668, "grad_norm": 0.3265155851840973, "learning_rate": 0.002, "loss": 2.5484, "step": 396690 }, { "epoch": 0.7903146117557057, "grad_norm": 0.16538287699222565, "learning_rate": 0.002, "loss": 2.5562, "step": 396700 }, { "epoch": 0.7903345339793446, "grad_norm": 0.16185548901557922, "learning_rate": 0.002, "loss": 2.5464, "step": 396710 }, { "epoch": 0.7903544562029835, "grad_norm": 0.1735069900751114, "learning_rate": 0.002, "loss": 2.5688, "step": 396720 }, { "epoch": 0.7903743784266225, "grad_norm": 0.14247740805149078, "learning_rate": 0.002, "loss": 2.5529, "step": 396730 }, { "epoch": 0.7903943006502614, "grad_norm": 0.14891275763511658, "learning_rate": 0.002, "loss": 2.5571, "step": 396740 }, { "epoch": 0.7904142228739003, "grad_norm": 0.1729489266872406, "learning_rate": 0.002, "loss": 2.5429, "step": 396750 }, { "epoch": 0.7904341450975392, "grad_norm": 0.18172116577625275, "learning_rate": 0.002, "loss": 2.5496, "step": 396760 }, { "epoch": 0.7904540673211781, "grad_norm": 0.14601175487041473, "learning_rate": 0.002, "loss": 2.5574, "step": 396770 }, { "epoch": 0.7904739895448171, "grad_norm": 0.14400635659694672, "learning_rate": 0.002, "loss": 2.5572, "step": 396780 }, { "epoch": 0.790493911768456, "grad_norm": 0.1523069143295288, "learning_rate": 0.002, "loss": 2.5309, "step": 396790 }, { "epoch": 0.7905138339920948, "grad_norm": 0.15383456647396088, "learning_rate": 0.002, "loss": 2.5599, "step": 396800 }, { "epoch": 0.7905337562157337, "grad_norm": 0.18890708684921265, "learning_rate": 0.002, "loss": 2.5501, "step": 396810 }, { "epoch": 0.7905536784393727, "grad_norm": 0.1448512226343155, "learning_rate": 0.002, "loss": 2.5404, "step": 396820 }, { "epoch": 0.7905736006630116, "grad_norm": 0.1446453332901001, "learning_rate": 0.002, "loss": 2.5578, "step": 396830 }, { "epoch": 0.7905935228866505, "grad_norm": 0.14022013545036316, "learning_rate": 0.002, "loss": 2.5427, "step": 396840 }, { "epoch": 0.7906134451102894, "grad_norm": 0.198075532913208, "learning_rate": 0.002, "loss": 2.5406, "step": 396850 }, { "epoch": 0.7906333673339283, "grad_norm": 0.16024638712406158, "learning_rate": 0.002, "loss": 2.5507, "step": 396860 }, { "epoch": 0.7906532895575673, "grad_norm": 0.14429661631584167, "learning_rate": 0.002, "loss": 2.5596, "step": 396870 }, { "epoch": 0.7906732117812062, "grad_norm": 0.16320346295833588, "learning_rate": 0.002, "loss": 2.5457, "step": 396880 }, { "epoch": 0.7906931340048451, "grad_norm": 0.1431180089712143, "learning_rate": 0.002, "loss": 2.5576, "step": 396890 }, { "epoch": 0.790713056228484, "grad_norm": 0.17474663257598877, "learning_rate": 0.002, "loss": 2.536, "step": 396900 }, { "epoch": 0.7907329784521229, "grad_norm": 0.18843023478984833, "learning_rate": 0.002, "loss": 2.5522, "step": 396910 }, { "epoch": 0.7907529006757619, "grad_norm": 0.16673624515533447, "learning_rate": 0.002, "loss": 2.5529, "step": 396920 }, { "epoch": 0.7907728228994008, "grad_norm": 0.1457408219575882, "learning_rate": 0.002, "loss": 2.5471, "step": 396930 }, { "epoch": 0.7907927451230397, "grad_norm": 0.14038804173469543, "learning_rate": 0.002, "loss": 2.5485, "step": 396940 }, { "epoch": 0.7908126673466785, "grad_norm": 0.17995242774486542, "learning_rate": 0.002, "loss": 2.5506, "step": 396950 }, { "epoch": 0.7908325895703174, "grad_norm": 0.1664080023765564, "learning_rate": 0.002, "loss": 2.5619, "step": 396960 }, { "epoch": 0.7908525117939564, "grad_norm": 0.13128411769866943, "learning_rate": 0.002, "loss": 2.5398, "step": 396970 }, { "epoch": 0.7908724340175953, "grad_norm": 0.15691639482975006, "learning_rate": 0.002, "loss": 2.5602, "step": 396980 }, { "epoch": 0.7908923562412342, "grad_norm": 0.18452687561511993, "learning_rate": 0.002, "loss": 2.561, "step": 396990 }, { "epoch": 0.7909122784648731, "grad_norm": 0.31763365864753723, "learning_rate": 0.002, "loss": 2.5531, "step": 397000 }, { "epoch": 0.790932200688512, "grad_norm": 0.13605691492557526, "learning_rate": 0.002, "loss": 2.5513, "step": 397010 }, { "epoch": 0.790952122912151, "grad_norm": 0.13165804743766785, "learning_rate": 0.002, "loss": 2.545, "step": 397020 }, { "epoch": 0.7909720451357899, "grad_norm": 0.15170028805732727, "learning_rate": 0.002, "loss": 2.5357, "step": 397030 }, { "epoch": 0.7909919673594288, "grad_norm": 0.1437763124704361, "learning_rate": 0.002, "loss": 2.5523, "step": 397040 }, { "epoch": 0.7910118895830677, "grad_norm": 0.17823311686515808, "learning_rate": 0.002, "loss": 2.5591, "step": 397050 }, { "epoch": 0.7910318118067066, "grad_norm": 0.15585580468177795, "learning_rate": 0.002, "loss": 2.5557, "step": 397060 }, { "epoch": 0.7910517340303456, "grad_norm": 0.204165980219841, "learning_rate": 0.002, "loss": 2.558, "step": 397070 }, { "epoch": 0.7910716562539845, "grad_norm": 0.13282012939453125, "learning_rate": 0.002, "loss": 2.5575, "step": 397080 }, { "epoch": 0.7910915784776233, "grad_norm": 0.14470908045768738, "learning_rate": 0.002, "loss": 2.5659, "step": 397090 }, { "epoch": 0.7911115007012622, "grad_norm": 0.1493416130542755, "learning_rate": 0.002, "loss": 2.5595, "step": 397100 }, { "epoch": 0.7911314229249012, "grad_norm": 0.16019324958324432, "learning_rate": 0.002, "loss": 2.5576, "step": 397110 }, { "epoch": 0.7911513451485401, "grad_norm": 0.15048454701900482, "learning_rate": 0.002, "loss": 2.5593, "step": 397120 }, { "epoch": 0.791171267372179, "grad_norm": 0.17852415144443512, "learning_rate": 0.002, "loss": 2.551, "step": 397130 }, { "epoch": 0.7911911895958179, "grad_norm": 0.14311274886131287, "learning_rate": 0.002, "loss": 2.5495, "step": 397140 }, { "epoch": 0.7912111118194568, "grad_norm": 0.14930348098278046, "learning_rate": 0.002, "loss": 2.5556, "step": 397150 }, { "epoch": 0.7912310340430958, "grad_norm": 0.17081819474697113, "learning_rate": 0.002, "loss": 2.5512, "step": 397160 }, { "epoch": 0.7912509562667347, "grad_norm": 0.15618988871574402, "learning_rate": 0.002, "loss": 2.5648, "step": 397170 }, { "epoch": 0.7912708784903736, "grad_norm": 0.19178354740142822, "learning_rate": 0.002, "loss": 2.5699, "step": 397180 }, { "epoch": 0.7912908007140125, "grad_norm": 0.14498639106750488, "learning_rate": 0.002, "loss": 2.537, "step": 397190 }, { "epoch": 0.7913107229376514, "grad_norm": 0.14200183749198914, "learning_rate": 0.002, "loss": 2.5429, "step": 397200 }, { "epoch": 0.7913306451612904, "grad_norm": 0.18380682170391083, "learning_rate": 0.002, "loss": 2.5529, "step": 397210 }, { "epoch": 0.7913505673849293, "grad_norm": 0.136840358376503, "learning_rate": 0.002, "loss": 2.5612, "step": 397220 }, { "epoch": 0.7913704896085682, "grad_norm": 0.17987562716007233, "learning_rate": 0.002, "loss": 2.5439, "step": 397230 }, { "epoch": 0.791390411832207, "grad_norm": 0.16122694313526154, "learning_rate": 0.002, "loss": 2.5468, "step": 397240 }, { "epoch": 0.7914103340558459, "grad_norm": 0.1671791523694992, "learning_rate": 0.002, "loss": 2.548, "step": 397250 }, { "epoch": 0.7914302562794849, "grad_norm": 0.14007782936096191, "learning_rate": 0.002, "loss": 2.5328, "step": 397260 }, { "epoch": 0.7914501785031238, "grad_norm": 0.14726215600967407, "learning_rate": 0.002, "loss": 2.5654, "step": 397270 }, { "epoch": 0.7914701007267627, "grad_norm": 0.16309799253940582, "learning_rate": 0.002, "loss": 2.5482, "step": 397280 }, { "epoch": 0.7914900229504016, "grad_norm": 0.15771923959255219, "learning_rate": 0.002, "loss": 2.549, "step": 397290 }, { "epoch": 0.7915099451740405, "grad_norm": 0.14719754457473755, "learning_rate": 0.002, "loss": 2.5505, "step": 397300 }, { "epoch": 0.7915298673976795, "grad_norm": 0.14752651751041412, "learning_rate": 0.002, "loss": 2.5675, "step": 397310 }, { "epoch": 0.7915497896213184, "grad_norm": 0.1509653776884079, "learning_rate": 0.002, "loss": 2.5631, "step": 397320 }, { "epoch": 0.7915697118449573, "grad_norm": 0.18229617178440094, "learning_rate": 0.002, "loss": 2.5502, "step": 397330 }, { "epoch": 0.7915896340685962, "grad_norm": 0.1515735387802124, "learning_rate": 0.002, "loss": 2.5589, "step": 397340 }, { "epoch": 0.7916095562922351, "grad_norm": 0.18785309791564941, "learning_rate": 0.002, "loss": 2.5651, "step": 397350 }, { "epoch": 0.7916294785158741, "grad_norm": 0.14676959812641144, "learning_rate": 0.002, "loss": 2.5406, "step": 397360 }, { "epoch": 0.791649400739513, "grad_norm": 0.15544916689395905, "learning_rate": 0.002, "loss": 2.5723, "step": 397370 }, { "epoch": 0.7916693229631518, "grad_norm": 0.15904544293880463, "learning_rate": 0.002, "loss": 2.5576, "step": 397380 }, { "epoch": 0.7916892451867907, "grad_norm": 0.14237916469573975, "learning_rate": 0.002, "loss": 2.5476, "step": 397390 }, { "epoch": 0.7917091674104297, "grad_norm": 0.1487995684146881, "learning_rate": 0.002, "loss": 2.5623, "step": 397400 }, { "epoch": 0.7917290896340686, "grad_norm": 0.14295698702335358, "learning_rate": 0.002, "loss": 2.5362, "step": 397410 }, { "epoch": 0.7917490118577075, "grad_norm": 0.17456012964248657, "learning_rate": 0.002, "loss": 2.5464, "step": 397420 }, { "epoch": 0.7917689340813464, "grad_norm": 0.1534840613603592, "learning_rate": 0.002, "loss": 2.5585, "step": 397430 }, { "epoch": 0.7917888563049853, "grad_norm": 0.1592777520418167, "learning_rate": 0.002, "loss": 2.5599, "step": 397440 }, { "epoch": 0.7918087785286243, "grad_norm": 0.13724850118160248, "learning_rate": 0.002, "loss": 2.5583, "step": 397450 }, { "epoch": 0.7918287007522632, "grad_norm": 0.1379803568124771, "learning_rate": 0.002, "loss": 2.5357, "step": 397460 }, { "epoch": 0.7918486229759021, "grad_norm": 0.1693049818277359, "learning_rate": 0.002, "loss": 2.5404, "step": 397470 }, { "epoch": 0.791868545199541, "grad_norm": 0.14660517871379852, "learning_rate": 0.002, "loss": 2.5586, "step": 397480 }, { "epoch": 0.7918884674231799, "grad_norm": 0.18422141671180725, "learning_rate": 0.002, "loss": 2.5433, "step": 397490 }, { "epoch": 0.7919083896468189, "grad_norm": 0.14394406974315643, "learning_rate": 0.002, "loss": 2.5577, "step": 397500 }, { "epoch": 0.7919283118704578, "grad_norm": 0.16225998103618622, "learning_rate": 0.002, "loss": 2.5656, "step": 397510 }, { "epoch": 0.7919482340940966, "grad_norm": 0.1505575031042099, "learning_rate": 0.002, "loss": 2.5588, "step": 397520 }, { "epoch": 0.7919681563177355, "grad_norm": 0.1785726398229599, "learning_rate": 0.002, "loss": 2.5522, "step": 397530 }, { "epoch": 0.7919880785413744, "grad_norm": 0.15722520649433136, "learning_rate": 0.002, "loss": 2.5443, "step": 397540 }, { "epoch": 0.7920080007650134, "grad_norm": 0.1315908432006836, "learning_rate": 0.002, "loss": 2.5571, "step": 397550 }, { "epoch": 0.7920279229886523, "grad_norm": 0.163784921169281, "learning_rate": 0.002, "loss": 2.5494, "step": 397560 }, { "epoch": 0.7920478452122912, "grad_norm": 0.1527293473482132, "learning_rate": 0.002, "loss": 2.5509, "step": 397570 }, { "epoch": 0.7920677674359301, "grad_norm": 0.1541651338338852, "learning_rate": 0.002, "loss": 2.5546, "step": 397580 }, { "epoch": 0.792087689659569, "grad_norm": 0.13387632369995117, "learning_rate": 0.002, "loss": 2.5611, "step": 397590 }, { "epoch": 0.792107611883208, "grad_norm": 0.15911294519901276, "learning_rate": 0.002, "loss": 2.5591, "step": 397600 }, { "epoch": 0.7921275341068469, "grad_norm": 0.13309891521930695, "learning_rate": 0.002, "loss": 2.5568, "step": 397610 }, { "epoch": 0.7921474563304858, "grad_norm": 0.16667455434799194, "learning_rate": 0.002, "loss": 2.565, "step": 397620 }, { "epoch": 0.7921673785541247, "grad_norm": 0.15594901144504547, "learning_rate": 0.002, "loss": 2.5382, "step": 397630 }, { "epoch": 0.7921873007777636, "grad_norm": 0.15679776668548584, "learning_rate": 0.002, "loss": 2.5584, "step": 397640 }, { "epoch": 0.7922072230014026, "grad_norm": 0.13464559614658356, "learning_rate": 0.002, "loss": 2.5575, "step": 397650 }, { "epoch": 0.7922271452250415, "grad_norm": 0.1877468228340149, "learning_rate": 0.002, "loss": 2.5423, "step": 397660 }, { "epoch": 0.7922470674486803, "grad_norm": 0.15537111461162567, "learning_rate": 0.002, "loss": 2.5569, "step": 397670 }, { "epoch": 0.7922669896723192, "grad_norm": 0.12443943321704865, "learning_rate": 0.002, "loss": 2.5596, "step": 397680 }, { "epoch": 0.7922869118959582, "grad_norm": 0.17087115347385406, "learning_rate": 0.002, "loss": 2.5515, "step": 397690 }, { "epoch": 0.7923068341195971, "grad_norm": 0.14541810750961304, "learning_rate": 0.002, "loss": 2.5541, "step": 397700 }, { "epoch": 0.792326756343236, "grad_norm": 0.1691599041223526, "learning_rate": 0.002, "loss": 2.5512, "step": 397710 }, { "epoch": 0.7923466785668749, "grad_norm": 0.16859960556030273, "learning_rate": 0.002, "loss": 2.5657, "step": 397720 }, { "epoch": 0.7923666007905138, "grad_norm": 0.14148962497711182, "learning_rate": 0.002, "loss": 2.5597, "step": 397730 }, { "epoch": 0.7923865230141528, "grad_norm": 0.1840188056230545, "learning_rate": 0.002, "loss": 2.5349, "step": 397740 }, { "epoch": 0.7924064452377917, "grad_norm": 0.1574764996767044, "learning_rate": 0.002, "loss": 2.5562, "step": 397750 }, { "epoch": 0.7924263674614306, "grad_norm": 0.14349465072155, "learning_rate": 0.002, "loss": 2.5495, "step": 397760 }, { "epoch": 0.7924462896850695, "grad_norm": 0.15808330476284027, "learning_rate": 0.002, "loss": 2.5551, "step": 397770 }, { "epoch": 0.7924662119087084, "grad_norm": 0.15946586430072784, "learning_rate": 0.002, "loss": 2.5475, "step": 397780 }, { "epoch": 0.7924861341323474, "grad_norm": 0.18616607785224915, "learning_rate": 0.002, "loss": 2.5523, "step": 397790 }, { "epoch": 0.7925060563559863, "grad_norm": 0.16263243556022644, "learning_rate": 0.002, "loss": 2.5464, "step": 397800 }, { "epoch": 0.7925259785796251, "grad_norm": 0.15500447154045105, "learning_rate": 0.002, "loss": 2.5636, "step": 397810 }, { "epoch": 0.792545900803264, "grad_norm": 0.14078889787197113, "learning_rate": 0.002, "loss": 2.5573, "step": 397820 }, { "epoch": 0.7925658230269029, "grad_norm": 0.14689749479293823, "learning_rate": 0.002, "loss": 2.5555, "step": 397830 }, { "epoch": 0.7925857452505419, "grad_norm": 0.16618914902210236, "learning_rate": 0.002, "loss": 2.5521, "step": 397840 }, { "epoch": 0.7926056674741808, "grad_norm": 0.15873983502388, "learning_rate": 0.002, "loss": 2.5581, "step": 397850 }, { "epoch": 0.7926255896978197, "grad_norm": 0.17256629467010498, "learning_rate": 0.002, "loss": 2.5255, "step": 397860 }, { "epoch": 0.7926455119214586, "grad_norm": 0.1390005350112915, "learning_rate": 0.002, "loss": 2.5582, "step": 397870 }, { "epoch": 0.7926654341450975, "grad_norm": 0.18279337882995605, "learning_rate": 0.002, "loss": 2.5567, "step": 397880 }, { "epoch": 0.7926853563687365, "grad_norm": 0.1717832237482071, "learning_rate": 0.002, "loss": 2.5448, "step": 397890 }, { "epoch": 0.7927052785923754, "grad_norm": 0.1725427210330963, "learning_rate": 0.002, "loss": 2.5608, "step": 397900 }, { "epoch": 0.7927252008160143, "grad_norm": 0.14868082106113434, "learning_rate": 0.002, "loss": 2.5555, "step": 397910 }, { "epoch": 0.7927451230396532, "grad_norm": 0.13929986953735352, "learning_rate": 0.002, "loss": 2.5636, "step": 397920 }, { "epoch": 0.7927650452632921, "grad_norm": 0.17223438620567322, "learning_rate": 0.002, "loss": 2.5543, "step": 397930 }, { "epoch": 0.7927849674869311, "grad_norm": 0.13886871933937073, "learning_rate": 0.002, "loss": 2.5377, "step": 397940 }, { "epoch": 0.79280488971057, "grad_norm": 0.1481318473815918, "learning_rate": 0.002, "loss": 2.5591, "step": 397950 }, { "epoch": 0.7928248119342088, "grad_norm": 0.1378866732120514, "learning_rate": 0.002, "loss": 2.5581, "step": 397960 }, { "epoch": 0.7928447341578477, "grad_norm": 0.1513793170452118, "learning_rate": 0.002, "loss": 2.5413, "step": 397970 }, { "epoch": 0.7928646563814866, "grad_norm": 0.15994086861610413, "learning_rate": 0.002, "loss": 2.5705, "step": 397980 }, { "epoch": 0.7928845786051256, "grad_norm": 0.18800008296966553, "learning_rate": 0.002, "loss": 2.5467, "step": 397990 }, { "epoch": 0.7929045008287645, "grad_norm": 0.12751853466033936, "learning_rate": 0.002, "loss": 2.5624, "step": 398000 }, { "epoch": 0.7929244230524034, "grad_norm": 0.14857614040374756, "learning_rate": 0.002, "loss": 2.5537, "step": 398010 }, { "epoch": 0.7929443452760423, "grad_norm": 0.17244675755500793, "learning_rate": 0.002, "loss": 2.5499, "step": 398020 }, { "epoch": 0.7929642674996813, "grad_norm": 0.178907111287117, "learning_rate": 0.002, "loss": 2.5431, "step": 398030 }, { "epoch": 0.7929841897233202, "grad_norm": 0.13364769518375397, "learning_rate": 0.002, "loss": 2.5432, "step": 398040 }, { "epoch": 0.7930041119469591, "grad_norm": 0.15062583982944489, "learning_rate": 0.002, "loss": 2.561, "step": 398050 }, { "epoch": 0.793024034170598, "grad_norm": 0.1629362404346466, "learning_rate": 0.002, "loss": 2.5455, "step": 398060 }, { "epoch": 0.7930439563942369, "grad_norm": 0.14270812273025513, "learning_rate": 0.002, "loss": 2.5411, "step": 398070 }, { "epoch": 0.7930638786178759, "grad_norm": 0.18950308859348297, "learning_rate": 0.002, "loss": 2.5492, "step": 398080 }, { "epoch": 0.7930838008415148, "grad_norm": 0.16198953986167908, "learning_rate": 0.002, "loss": 2.544, "step": 398090 }, { "epoch": 0.7931037230651536, "grad_norm": 0.17830908298492432, "learning_rate": 0.002, "loss": 2.5596, "step": 398100 }, { "epoch": 0.7931236452887925, "grad_norm": 0.16310325264930725, "learning_rate": 0.002, "loss": 2.5582, "step": 398110 }, { "epoch": 0.7931435675124314, "grad_norm": 0.14147712290287018, "learning_rate": 0.002, "loss": 2.5393, "step": 398120 }, { "epoch": 0.7931634897360704, "grad_norm": 0.15055686235427856, "learning_rate": 0.002, "loss": 2.5453, "step": 398130 }, { "epoch": 0.7931834119597093, "grad_norm": 0.14710171520709991, "learning_rate": 0.002, "loss": 2.5503, "step": 398140 }, { "epoch": 0.7932033341833482, "grad_norm": 0.16996143758296967, "learning_rate": 0.002, "loss": 2.541, "step": 398150 }, { "epoch": 0.7932232564069871, "grad_norm": 0.12851513922214508, "learning_rate": 0.002, "loss": 2.5502, "step": 398160 }, { "epoch": 0.793243178630626, "grad_norm": 0.16968445479869843, "learning_rate": 0.002, "loss": 2.5641, "step": 398170 }, { "epoch": 0.793263100854265, "grad_norm": 0.14891672134399414, "learning_rate": 0.002, "loss": 2.5548, "step": 398180 }, { "epoch": 0.7932830230779039, "grad_norm": 0.16071926057338715, "learning_rate": 0.002, "loss": 2.553, "step": 398190 }, { "epoch": 0.7933029453015428, "grad_norm": 0.16130176186561584, "learning_rate": 0.002, "loss": 2.5414, "step": 398200 }, { "epoch": 0.7933228675251817, "grad_norm": 0.17110180854797363, "learning_rate": 0.002, "loss": 2.5581, "step": 398210 }, { "epoch": 0.7933427897488206, "grad_norm": 0.16075241565704346, "learning_rate": 0.002, "loss": 2.559, "step": 398220 }, { "epoch": 0.7933627119724596, "grad_norm": 0.14254805445671082, "learning_rate": 0.002, "loss": 2.548, "step": 398230 }, { "epoch": 0.7933826341960984, "grad_norm": 0.15294784307479858, "learning_rate": 0.002, "loss": 2.5424, "step": 398240 }, { "epoch": 0.7934025564197373, "grad_norm": 0.14810556173324585, "learning_rate": 0.002, "loss": 2.5601, "step": 398250 }, { "epoch": 0.7934224786433762, "grad_norm": 0.15437617897987366, "learning_rate": 0.002, "loss": 2.5596, "step": 398260 }, { "epoch": 0.7934424008670151, "grad_norm": 0.15702134370803833, "learning_rate": 0.002, "loss": 2.5469, "step": 398270 }, { "epoch": 0.7934623230906541, "grad_norm": 0.17742928862571716, "learning_rate": 0.002, "loss": 2.5485, "step": 398280 }, { "epoch": 0.793482245314293, "grad_norm": 0.1426640748977661, "learning_rate": 0.002, "loss": 2.5644, "step": 398290 }, { "epoch": 0.7935021675379319, "grad_norm": 0.22640399634838104, "learning_rate": 0.002, "loss": 2.5477, "step": 398300 }, { "epoch": 0.7935220897615708, "grad_norm": 0.17556703090667725, "learning_rate": 0.002, "loss": 2.5544, "step": 398310 }, { "epoch": 0.7935420119852098, "grad_norm": 0.13226830959320068, "learning_rate": 0.002, "loss": 2.5454, "step": 398320 }, { "epoch": 0.7935619342088487, "grad_norm": 0.15127962827682495, "learning_rate": 0.002, "loss": 2.5499, "step": 398330 }, { "epoch": 0.7935818564324876, "grad_norm": 0.1541300266981125, "learning_rate": 0.002, "loss": 2.535, "step": 398340 }, { "epoch": 0.7936017786561265, "grad_norm": 0.14656470715999603, "learning_rate": 0.002, "loss": 2.5593, "step": 398350 }, { "epoch": 0.7936217008797654, "grad_norm": 0.1382184475660324, "learning_rate": 0.002, "loss": 2.5526, "step": 398360 }, { "epoch": 0.7936416231034044, "grad_norm": 0.1350875198841095, "learning_rate": 0.002, "loss": 2.5391, "step": 398370 }, { "epoch": 0.7936615453270432, "grad_norm": 0.19260025024414062, "learning_rate": 0.002, "loss": 2.5456, "step": 398380 }, { "epoch": 0.7936814675506821, "grad_norm": 0.14644142985343933, "learning_rate": 0.002, "loss": 2.554, "step": 398390 }, { "epoch": 0.793701389774321, "grad_norm": 0.19949685037136078, "learning_rate": 0.002, "loss": 2.5581, "step": 398400 }, { "epoch": 0.7937213119979599, "grad_norm": 0.1327487975358963, "learning_rate": 0.002, "loss": 2.5446, "step": 398410 }, { "epoch": 0.7937412342215989, "grad_norm": 0.2077057957649231, "learning_rate": 0.002, "loss": 2.5515, "step": 398420 }, { "epoch": 0.7937611564452378, "grad_norm": 0.12959644198417664, "learning_rate": 0.002, "loss": 2.5396, "step": 398430 }, { "epoch": 0.7937810786688767, "grad_norm": 0.14392830431461334, "learning_rate": 0.002, "loss": 2.5514, "step": 398440 }, { "epoch": 0.7938010008925156, "grad_norm": 0.1579146385192871, "learning_rate": 0.002, "loss": 2.5429, "step": 398450 }, { "epoch": 0.7938209231161545, "grad_norm": 0.15667878091335297, "learning_rate": 0.002, "loss": 2.5509, "step": 398460 }, { "epoch": 0.7938408453397935, "grad_norm": 0.18642888963222504, "learning_rate": 0.002, "loss": 2.5523, "step": 398470 }, { "epoch": 0.7938607675634324, "grad_norm": 0.13929183781147003, "learning_rate": 0.002, "loss": 2.5423, "step": 398480 }, { "epoch": 0.7938806897870713, "grad_norm": 0.14877712726593018, "learning_rate": 0.002, "loss": 2.5545, "step": 398490 }, { "epoch": 0.7939006120107102, "grad_norm": 0.21640369296073914, "learning_rate": 0.002, "loss": 2.5514, "step": 398500 }, { "epoch": 0.793920534234349, "grad_norm": 0.15035855770111084, "learning_rate": 0.002, "loss": 2.5558, "step": 398510 }, { "epoch": 0.793940456457988, "grad_norm": 0.16824281215667725, "learning_rate": 0.002, "loss": 2.5528, "step": 398520 }, { "epoch": 0.7939603786816269, "grad_norm": 0.1771359145641327, "learning_rate": 0.002, "loss": 2.5549, "step": 398530 }, { "epoch": 0.7939803009052658, "grad_norm": 0.1397968977689743, "learning_rate": 0.002, "loss": 2.5566, "step": 398540 }, { "epoch": 0.7940002231289047, "grad_norm": 0.1634826809167862, "learning_rate": 0.002, "loss": 2.5621, "step": 398550 }, { "epoch": 0.7940201453525436, "grad_norm": 0.17132468521595, "learning_rate": 0.002, "loss": 2.552, "step": 398560 }, { "epoch": 0.7940400675761826, "grad_norm": 0.14804300665855408, "learning_rate": 0.002, "loss": 2.542, "step": 398570 }, { "epoch": 0.7940599897998215, "grad_norm": 0.1384858787059784, "learning_rate": 0.002, "loss": 2.5553, "step": 398580 }, { "epoch": 0.7940799120234604, "grad_norm": 0.14366509020328522, "learning_rate": 0.002, "loss": 2.5502, "step": 398590 }, { "epoch": 0.7940998342470993, "grad_norm": 0.22290199995040894, "learning_rate": 0.002, "loss": 2.5602, "step": 398600 }, { "epoch": 0.7941197564707383, "grad_norm": 0.1778952181339264, "learning_rate": 0.002, "loss": 2.5591, "step": 398610 }, { "epoch": 0.7941396786943772, "grad_norm": 0.138075590133667, "learning_rate": 0.002, "loss": 2.5465, "step": 398620 }, { "epoch": 0.7941596009180161, "grad_norm": 0.16269387304782867, "learning_rate": 0.002, "loss": 2.5574, "step": 398630 }, { "epoch": 0.794179523141655, "grad_norm": 0.14755317568778992, "learning_rate": 0.002, "loss": 2.5593, "step": 398640 }, { "epoch": 0.7941994453652939, "grad_norm": 0.13973794877529144, "learning_rate": 0.002, "loss": 2.5489, "step": 398650 }, { "epoch": 0.7942193675889329, "grad_norm": 0.2087421864271164, "learning_rate": 0.002, "loss": 2.5506, "step": 398660 }, { "epoch": 0.7942392898125717, "grad_norm": 0.14182762801647186, "learning_rate": 0.002, "loss": 2.5533, "step": 398670 }, { "epoch": 0.7942592120362106, "grad_norm": 0.15744620561599731, "learning_rate": 0.002, "loss": 2.5371, "step": 398680 }, { "epoch": 0.7942791342598495, "grad_norm": 0.1272016018629074, "learning_rate": 0.002, "loss": 2.5431, "step": 398690 }, { "epoch": 0.7942990564834884, "grad_norm": 0.18531256914138794, "learning_rate": 0.002, "loss": 2.5524, "step": 398700 }, { "epoch": 0.7943189787071274, "grad_norm": 0.15515866875648499, "learning_rate": 0.002, "loss": 2.5575, "step": 398710 }, { "epoch": 0.7943389009307663, "grad_norm": 0.18428963422775269, "learning_rate": 0.002, "loss": 2.5554, "step": 398720 }, { "epoch": 0.7943588231544052, "grad_norm": 0.16328372061252594, "learning_rate": 0.002, "loss": 2.5523, "step": 398730 }, { "epoch": 0.7943787453780441, "grad_norm": 0.18164750933647156, "learning_rate": 0.002, "loss": 2.5463, "step": 398740 }, { "epoch": 0.794398667601683, "grad_norm": 0.1669938564300537, "learning_rate": 0.002, "loss": 2.5559, "step": 398750 }, { "epoch": 0.794418589825322, "grad_norm": 0.12718817591667175, "learning_rate": 0.002, "loss": 2.5443, "step": 398760 }, { "epoch": 0.7944385120489609, "grad_norm": 0.16848692297935486, "learning_rate": 0.002, "loss": 2.5484, "step": 398770 }, { "epoch": 0.7944584342725998, "grad_norm": 0.15602046251296997, "learning_rate": 0.002, "loss": 2.5547, "step": 398780 }, { "epoch": 0.7944783564962387, "grad_norm": 0.1509755402803421, "learning_rate": 0.002, "loss": 2.5389, "step": 398790 }, { "epoch": 0.7944982787198775, "grad_norm": 0.14406199753284454, "learning_rate": 0.002, "loss": 2.5487, "step": 398800 }, { "epoch": 0.7945182009435166, "grad_norm": 0.15143722295761108, "learning_rate": 0.002, "loss": 2.574, "step": 398810 }, { "epoch": 0.7945381231671554, "grad_norm": 0.16005933284759521, "learning_rate": 0.002, "loss": 2.5558, "step": 398820 }, { "epoch": 0.7945580453907943, "grad_norm": 0.16174937784671783, "learning_rate": 0.002, "loss": 2.5636, "step": 398830 }, { "epoch": 0.7945779676144332, "grad_norm": 0.13833989202976227, "learning_rate": 0.002, "loss": 2.553, "step": 398840 }, { "epoch": 0.7945978898380721, "grad_norm": 0.17625562846660614, "learning_rate": 0.002, "loss": 2.5572, "step": 398850 }, { "epoch": 0.7946178120617111, "grad_norm": 0.14126527309417725, "learning_rate": 0.002, "loss": 2.5575, "step": 398860 }, { "epoch": 0.79463773428535, "grad_norm": 0.15113788843154907, "learning_rate": 0.002, "loss": 2.5562, "step": 398870 }, { "epoch": 0.7946576565089889, "grad_norm": 0.14272204041481018, "learning_rate": 0.002, "loss": 2.5685, "step": 398880 }, { "epoch": 0.7946775787326278, "grad_norm": 0.15216462314128876, "learning_rate": 0.002, "loss": 2.554, "step": 398890 }, { "epoch": 0.7946975009562668, "grad_norm": 0.14072285592556, "learning_rate": 0.002, "loss": 2.5557, "step": 398900 }, { "epoch": 0.7947174231799057, "grad_norm": 0.16886155307292938, "learning_rate": 0.002, "loss": 2.5601, "step": 398910 }, { "epoch": 0.7947373454035446, "grad_norm": 0.1457933485507965, "learning_rate": 0.002, "loss": 2.5406, "step": 398920 }, { "epoch": 0.7947572676271835, "grad_norm": 0.13580773770809174, "learning_rate": 0.002, "loss": 2.5637, "step": 398930 }, { "epoch": 0.7947771898508224, "grad_norm": 0.15365411341190338, "learning_rate": 0.002, "loss": 2.5559, "step": 398940 }, { "epoch": 0.7947971120744614, "grad_norm": 0.14395375549793243, "learning_rate": 0.002, "loss": 2.5475, "step": 398950 }, { "epoch": 0.7948170342981002, "grad_norm": 0.16970671713352203, "learning_rate": 0.002, "loss": 2.5465, "step": 398960 }, { "epoch": 0.7948369565217391, "grad_norm": 0.1368710994720459, "learning_rate": 0.002, "loss": 2.5472, "step": 398970 }, { "epoch": 0.794856878745378, "grad_norm": 0.13258615136146545, "learning_rate": 0.002, "loss": 2.5582, "step": 398980 }, { "epoch": 0.7948768009690169, "grad_norm": 0.16293296217918396, "learning_rate": 0.002, "loss": 2.5525, "step": 398990 }, { "epoch": 0.7948967231926559, "grad_norm": 0.2255130410194397, "learning_rate": 0.002, "loss": 2.5599, "step": 399000 }, { "epoch": 0.7949166454162948, "grad_norm": 0.13103103637695312, "learning_rate": 0.002, "loss": 2.5466, "step": 399010 }, { "epoch": 0.7949365676399337, "grad_norm": 0.14197106659412384, "learning_rate": 0.002, "loss": 2.5682, "step": 399020 }, { "epoch": 0.7949564898635726, "grad_norm": 0.13699796795845032, "learning_rate": 0.002, "loss": 2.5542, "step": 399030 }, { "epoch": 0.7949764120872115, "grad_norm": 0.16931189596652985, "learning_rate": 0.002, "loss": 2.5514, "step": 399040 }, { "epoch": 0.7949963343108505, "grad_norm": 0.14700423181056976, "learning_rate": 0.002, "loss": 2.5427, "step": 399050 }, { "epoch": 0.7950162565344894, "grad_norm": 0.17986872792243958, "learning_rate": 0.002, "loss": 2.5585, "step": 399060 }, { "epoch": 0.7950361787581283, "grad_norm": 0.13853250443935394, "learning_rate": 0.002, "loss": 2.5491, "step": 399070 }, { "epoch": 0.7950561009817672, "grad_norm": 0.13835632801055908, "learning_rate": 0.002, "loss": 2.5509, "step": 399080 }, { "epoch": 0.795076023205406, "grad_norm": 0.18255969882011414, "learning_rate": 0.002, "loss": 2.5496, "step": 399090 }, { "epoch": 0.795095945429045, "grad_norm": 0.14699172973632812, "learning_rate": 0.002, "loss": 2.5589, "step": 399100 }, { "epoch": 0.7951158676526839, "grad_norm": 0.17864103615283966, "learning_rate": 0.002, "loss": 2.5447, "step": 399110 }, { "epoch": 0.7951357898763228, "grad_norm": 0.1529688686132431, "learning_rate": 0.002, "loss": 2.5531, "step": 399120 }, { "epoch": 0.7951557120999617, "grad_norm": 0.13731348514556885, "learning_rate": 0.002, "loss": 2.5678, "step": 399130 }, { "epoch": 0.7951756343236006, "grad_norm": 0.14764688909053802, "learning_rate": 0.002, "loss": 2.5466, "step": 399140 }, { "epoch": 0.7951955565472396, "grad_norm": 0.18429039418697357, "learning_rate": 0.002, "loss": 2.5526, "step": 399150 }, { "epoch": 0.7952154787708785, "grad_norm": 0.14726099371910095, "learning_rate": 0.002, "loss": 2.5617, "step": 399160 }, { "epoch": 0.7952354009945174, "grad_norm": 0.14886978268623352, "learning_rate": 0.002, "loss": 2.5383, "step": 399170 }, { "epoch": 0.7952553232181563, "grad_norm": 0.14863452315330505, "learning_rate": 0.002, "loss": 2.5316, "step": 399180 }, { "epoch": 0.7952752454417953, "grad_norm": 0.19460588693618774, "learning_rate": 0.002, "loss": 2.5699, "step": 399190 }, { "epoch": 0.7952951676654342, "grad_norm": 0.13809621334075928, "learning_rate": 0.002, "loss": 2.5542, "step": 399200 }, { "epoch": 0.7953150898890731, "grad_norm": 0.13616667687892914, "learning_rate": 0.002, "loss": 2.5456, "step": 399210 }, { "epoch": 0.795335012112712, "grad_norm": 0.16509142518043518, "learning_rate": 0.002, "loss": 2.5454, "step": 399220 }, { "epoch": 0.7953549343363508, "grad_norm": 0.1419239044189453, "learning_rate": 0.002, "loss": 2.5456, "step": 399230 }, { "epoch": 0.7953748565599899, "grad_norm": 0.1693224310874939, "learning_rate": 0.002, "loss": 2.5533, "step": 399240 }, { "epoch": 0.7953947787836287, "grad_norm": 0.16401295363903046, "learning_rate": 0.002, "loss": 2.5549, "step": 399250 }, { "epoch": 0.7954147010072676, "grad_norm": 0.1730494648218155, "learning_rate": 0.002, "loss": 2.5424, "step": 399260 }, { "epoch": 0.7954346232309065, "grad_norm": 0.14373382925987244, "learning_rate": 0.002, "loss": 2.555, "step": 399270 }, { "epoch": 0.7954545454545454, "grad_norm": 0.15197385847568512, "learning_rate": 0.002, "loss": 2.549, "step": 399280 }, { "epoch": 0.7954744676781844, "grad_norm": 0.17275606095790863, "learning_rate": 0.002, "loss": 2.5628, "step": 399290 }, { "epoch": 0.7954943899018233, "grad_norm": 0.15038669109344482, "learning_rate": 0.002, "loss": 2.5465, "step": 399300 }, { "epoch": 0.7955143121254622, "grad_norm": 0.16874706745147705, "learning_rate": 0.002, "loss": 2.5434, "step": 399310 }, { "epoch": 0.7955342343491011, "grad_norm": 0.16755402088165283, "learning_rate": 0.002, "loss": 2.5685, "step": 399320 }, { "epoch": 0.79555415657274, "grad_norm": 0.17023035883903503, "learning_rate": 0.002, "loss": 2.5523, "step": 399330 }, { "epoch": 0.795574078796379, "grad_norm": 0.13680866360664368, "learning_rate": 0.002, "loss": 2.5656, "step": 399340 }, { "epoch": 0.7955940010200179, "grad_norm": 0.2031531184911728, "learning_rate": 0.002, "loss": 2.5626, "step": 399350 }, { "epoch": 0.7956139232436568, "grad_norm": 0.1862054467201233, "learning_rate": 0.002, "loss": 2.5413, "step": 399360 }, { "epoch": 0.7956338454672957, "grad_norm": 0.13630639016628265, "learning_rate": 0.002, "loss": 2.5591, "step": 399370 }, { "epoch": 0.7956537676909345, "grad_norm": 0.1332029104232788, "learning_rate": 0.002, "loss": 2.5517, "step": 399380 }, { "epoch": 0.7956736899145735, "grad_norm": 0.17288480699062347, "learning_rate": 0.002, "loss": 2.5434, "step": 399390 }, { "epoch": 0.7956936121382124, "grad_norm": 0.14659607410430908, "learning_rate": 0.002, "loss": 2.5493, "step": 399400 }, { "epoch": 0.7957135343618513, "grad_norm": 0.14992475509643555, "learning_rate": 0.002, "loss": 2.5609, "step": 399410 }, { "epoch": 0.7957334565854902, "grad_norm": 0.13141728937625885, "learning_rate": 0.002, "loss": 2.5414, "step": 399420 }, { "epoch": 0.7957533788091291, "grad_norm": 0.17786532640457153, "learning_rate": 0.002, "loss": 2.5557, "step": 399430 }, { "epoch": 0.7957733010327681, "grad_norm": 0.12902943789958954, "learning_rate": 0.002, "loss": 2.534, "step": 399440 }, { "epoch": 0.795793223256407, "grad_norm": 0.1683853417634964, "learning_rate": 0.002, "loss": 2.5628, "step": 399450 }, { "epoch": 0.7958131454800459, "grad_norm": 0.17234934866428375, "learning_rate": 0.002, "loss": 2.5684, "step": 399460 }, { "epoch": 0.7958330677036848, "grad_norm": 0.14910933375358582, "learning_rate": 0.002, "loss": 2.5553, "step": 399470 }, { "epoch": 0.7958529899273237, "grad_norm": 0.1469014286994934, "learning_rate": 0.002, "loss": 2.5585, "step": 399480 }, { "epoch": 0.7958729121509627, "grad_norm": 0.15901850163936615, "learning_rate": 0.002, "loss": 2.5511, "step": 399490 }, { "epoch": 0.7958928343746016, "grad_norm": 0.12169796973466873, "learning_rate": 0.002, "loss": 2.5572, "step": 399500 }, { "epoch": 0.7959127565982405, "grad_norm": 0.14482416212558746, "learning_rate": 0.002, "loss": 2.547, "step": 399510 }, { "epoch": 0.7959326788218793, "grad_norm": 0.15895159542560577, "learning_rate": 0.002, "loss": 2.5543, "step": 399520 }, { "epoch": 0.7959526010455183, "grad_norm": 0.17179572582244873, "learning_rate": 0.002, "loss": 2.5579, "step": 399530 }, { "epoch": 0.7959725232691572, "grad_norm": 0.1528366506099701, "learning_rate": 0.002, "loss": 2.5606, "step": 399540 }, { "epoch": 0.7959924454927961, "grad_norm": 0.1430068165063858, "learning_rate": 0.002, "loss": 2.557, "step": 399550 }, { "epoch": 0.796012367716435, "grad_norm": 0.15148137509822845, "learning_rate": 0.002, "loss": 2.5548, "step": 399560 }, { "epoch": 0.7960322899400739, "grad_norm": 0.1361013948917389, "learning_rate": 0.002, "loss": 2.5421, "step": 399570 }, { "epoch": 0.7960522121637129, "grad_norm": 0.1407388299703598, "learning_rate": 0.002, "loss": 2.5447, "step": 399580 }, { "epoch": 0.7960721343873518, "grad_norm": 0.14324814081192017, "learning_rate": 0.002, "loss": 2.5632, "step": 399590 }, { "epoch": 0.7960920566109907, "grad_norm": 0.19308140873908997, "learning_rate": 0.002, "loss": 2.54, "step": 399600 }, { "epoch": 0.7961119788346296, "grad_norm": 0.172729954123497, "learning_rate": 0.002, "loss": 2.5567, "step": 399610 }, { "epoch": 0.7961319010582685, "grad_norm": 0.14055880904197693, "learning_rate": 0.002, "loss": 2.5722, "step": 399620 }, { "epoch": 0.7961518232819075, "grad_norm": 0.16139726340770721, "learning_rate": 0.002, "loss": 2.5587, "step": 399630 }, { "epoch": 0.7961717455055464, "grad_norm": 0.16203106939792633, "learning_rate": 0.002, "loss": 2.5488, "step": 399640 }, { "epoch": 0.7961916677291853, "grad_norm": 0.15789194405078888, "learning_rate": 0.002, "loss": 2.5677, "step": 399650 }, { "epoch": 0.7962115899528242, "grad_norm": 0.13672997057437897, "learning_rate": 0.002, "loss": 2.5388, "step": 399660 }, { "epoch": 0.796231512176463, "grad_norm": 0.18844006955623627, "learning_rate": 0.002, "loss": 2.5485, "step": 399670 }, { "epoch": 0.796251434400102, "grad_norm": 0.16063690185546875, "learning_rate": 0.002, "loss": 2.5561, "step": 399680 }, { "epoch": 0.7962713566237409, "grad_norm": 0.14182499051094055, "learning_rate": 0.002, "loss": 2.5614, "step": 399690 }, { "epoch": 0.7962912788473798, "grad_norm": 0.13605773448944092, "learning_rate": 0.002, "loss": 2.5523, "step": 399700 }, { "epoch": 0.7963112010710187, "grad_norm": 0.16863545775413513, "learning_rate": 0.002, "loss": 2.5393, "step": 399710 }, { "epoch": 0.7963311232946576, "grad_norm": 0.17502595484256744, "learning_rate": 0.002, "loss": 2.5589, "step": 399720 }, { "epoch": 0.7963510455182966, "grad_norm": 0.1482219249010086, "learning_rate": 0.002, "loss": 2.5594, "step": 399730 }, { "epoch": 0.7963709677419355, "grad_norm": 0.1677902340888977, "learning_rate": 0.002, "loss": 2.5434, "step": 399740 }, { "epoch": 0.7963908899655744, "grad_norm": 0.1414656639099121, "learning_rate": 0.002, "loss": 2.5566, "step": 399750 }, { "epoch": 0.7964108121892133, "grad_norm": 0.14147160947322845, "learning_rate": 0.002, "loss": 2.5567, "step": 399760 }, { "epoch": 0.7964307344128522, "grad_norm": 0.1555997133255005, "learning_rate": 0.002, "loss": 2.5625, "step": 399770 }, { "epoch": 0.7964506566364912, "grad_norm": 0.16929790377616882, "learning_rate": 0.002, "loss": 2.5581, "step": 399780 }, { "epoch": 0.7964705788601301, "grad_norm": 0.1461833268404007, "learning_rate": 0.002, "loss": 2.5526, "step": 399790 }, { "epoch": 0.796490501083769, "grad_norm": 0.15342871844768524, "learning_rate": 0.002, "loss": 2.5645, "step": 399800 }, { "epoch": 0.7965104233074078, "grad_norm": 0.137991264462471, "learning_rate": 0.002, "loss": 2.5592, "step": 399810 }, { "epoch": 0.7965303455310468, "grad_norm": 0.14148655533790588, "learning_rate": 0.002, "loss": 2.5594, "step": 399820 }, { "epoch": 0.7965502677546857, "grad_norm": 0.1586882323026657, "learning_rate": 0.002, "loss": 2.5694, "step": 399830 }, { "epoch": 0.7965701899783246, "grad_norm": 0.14725683629512787, "learning_rate": 0.002, "loss": 2.5559, "step": 399840 }, { "epoch": 0.7965901122019635, "grad_norm": 0.1468372792005539, "learning_rate": 0.002, "loss": 2.5444, "step": 399850 }, { "epoch": 0.7966100344256024, "grad_norm": 0.16992856562137604, "learning_rate": 0.002, "loss": 2.5585, "step": 399860 }, { "epoch": 0.7966299566492414, "grad_norm": 0.1656477451324463, "learning_rate": 0.002, "loss": 2.5587, "step": 399870 }, { "epoch": 0.7966498788728803, "grad_norm": 0.1687692105770111, "learning_rate": 0.002, "loss": 2.5336, "step": 399880 }, { "epoch": 0.7966698010965192, "grad_norm": 0.14747755229473114, "learning_rate": 0.002, "loss": 2.5607, "step": 399890 }, { "epoch": 0.7966897233201581, "grad_norm": 0.14409281313419342, "learning_rate": 0.002, "loss": 2.5456, "step": 399900 }, { "epoch": 0.796709645543797, "grad_norm": 0.15367071330547333, "learning_rate": 0.002, "loss": 2.5546, "step": 399910 }, { "epoch": 0.796729567767436, "grad_norm": 0.16992349922657013, "learning_rate": 0.002, "loss": 2.5506, "step": 399920 }, { "epoch": 0.7967494899910749, "grad_norm": 0.15919922292232513, "learning_rate": 0.002, "loss": 2.5662, "step": 399930 }, { "epoch": 0.7967694122147138, "grad_norm": 0.14944875240325928, "learning_rate": 0.002, "loss": 2.5492, "step": 399940 }, { "epoch": 0.7967893344383526, "grad_norm": 0.1631845235824585, "learning_rate": 0.002, "loss": 2.5761, "step": 399950 }, { "epoch": 0.7968092566619915, "grad_norm": 0.15198133885860443, "learning_rate": 0.002, "loss": 2.5563, "step": 399960 }, { "epoch": 0.7968291788856305, "grad_norm": 0.14805825054645538, "learning_rate": 0.002, "loss": 2.5486, "step": 399970 }, { "epoch": 0.7968491011092694, "grad_norm": 0.14401569962501526, "learning_rate": 0.002, "loss": 2.5614, "step": 399980 }, { "epoch": 0.7968690233329083, "grad_norm": 0.13406141102313995, "learning_rate": 0.002, "loss": 2.5563, "step": 399990 }, { "epoch": 0.7968889455565472, "grad_norm": 0.16786833107471466, "learning_rate": 0.002, "loss": 2.5524, "step": 400000 }, { "epoch": 0.7969088677801861, "grad_norm": 0.15789537131786346, "learning_rate": 0.00198, "loss": 2.5606, "step": 400010 }, { "epoch": 0.7969287900038251, "grad_norm": 0.13598322868347168, "learning_rate": 0.001971715728752538, "loss": 2.5468, "step": 400020 }, { "epoch": 0.796948712227464, "grad_norm": 0.16228272020816803, "learning_rate": 0.0019653589838486227, "loss": 2.557, "step": 400030 }, { "epoch": 0.7969686344511029, "grad_norm": 0.1845237910747528, "learning_rate": 0.00196, "loss": 2.5553, "step": 400040 }, { "epoch": 0.7969885566747418, "grad_norm": 0.14702513813972473, "learning_rate": 0.001955278640450004, "loss": 2.5627, "step": 400050 }, { "epoch": 0.7970084788983807, "grad_norm": 0.14810208976268768, "learning_rate": 0.0019510102051443366, "loss": 2.5501, "step": 400060 }, { "epoch": 0.7970284011220197, "grad_norm": 0.14273054897785187, "learning_rate": 0.0019470849737787082, "loss": 2.5567, "step": 400070 }, { "epoch": 0.7970483233456586, "grad_norm": 0.13485367596149445, "learning_rate": 0.0019434314575050762, "loss": 2.539, "step": 400080 }, { "epoch": 0.7970682455692975, "grad_norm": 0.16581478714942932, "learning_rate": 0.0019399999999999999, "loss": 2.5393, "step": 400090 }, { "epoch": 0.7970881677929363, "grad_norm": 0.177420973777771, "learning_rate": 0.0019367544467966324, "loss": 2.5512, "step": 400100 }, { "epoch": 0.7971080900165753, "grad_norm": 0.1475823074579239, "learning_rate": 0.0019336675041928919, "loss": 2.5582, "step": 400110 }, { "epoch": 0.7971280122402142, "grad_norm": 0.139571875333786, "learning_rate": 0.001930717967697245, "loss": 2.551, "step": 400120 }, { "epoch": 0.7971479344638531, "grad_norm": 0.14178411662578583, "learning_rate": 0.00192788897449072, "loss": 2.5623, "step": 400130 }, { "epoch": 0.797167856687492, "grad_norm": 0.1453656256198883, "learning_rate": 0.0019251668522645212, "loss": 2.5461, "step": 400140 }, { "epoch": 0.7971877789111309, "grad_norm": 0.1680845320224762, "learning_rate": 0.0019225403330758518, "loss": 2.5533, "step": 400150 }, { "epoch": 0.7972077011347699, "grad_norm": 0.12718060612678528, "learning_rate": 0.00192, "loss": 2.5306, "step": 400160 }, { "epoch": 0.7972276233584088, "grad_norm": 0.14337702095508575, "learning_rate": 0.0019175378874876467, "loss": 2.5436, "step": 400170 }, { "epoch": 0.7972475455820477, "grad_norm": 0.15974104404449463, "learning_rate": 0.0019151471862576144, "loss": 2.5632, "step": 400180 }, { "epoch": 0.7972674678056866, "grad_norm": 0.158884659409523, "learning_rate": 0.0019128220211291867, "loss": 2.5481, "step": 400190 }, { "epoch": 0.7972873900293255, "grad_norm": 0.1431550234556198, "learning_rate": 0.0019105572809000086, "loss": 2.5593, "step": 400200 }, { "epoch": 0.7973073122529645, "grad_norm": 0.15382954478263855, "learning_rate": 0.0019083484861008833, "loss": 2.5641, "step": 400210 }, { "epoch": 0.7973272344766034, "grad_norm": 0.16017603874206543, "learning_rate": 0.0019061916848035314, "loss": 2.54, "step": 400220 }, { "epoch": 0.7973471567002423, "grad_norm": 0.1577632576227188, "learning_rate": 0.0019040833695337456, "loss": 2.54, "step": 400230 }, { "epoch": 0.7973670789238811, "grad_norm": 0.20268318057060242, "learning_rate": 0.001902020410288673, "loss": 2.5476, "step": 400240 }, { "epoch": 0.79738700114752, "grad_norm": 0.16080786287784576, "learning_rate": 0.0019, "loss": 2.5383, "step": 400250 }, { "epoch": 0.797406923371159, "grad_norm": 0.18139733374118805, "learning_rate": 0.0018980196097281444, "loss": 2.5537, "step": 400260 }, { "epoch": 0.7974268455947979, "grad_norm": 0.1279629021883011, "learning_rate": 0.0018960769515458673, "loss": 2.5477, "step": 400270 }, { "epoch": 0.7974467678184368, "grad_norm": 0.1450481116771698, "learning_rate": 0.0018941699475574164, "loss": 2.5494, "step": 400280 }, { "epoch": 0.7974666900420757, "grad_norm": 0.1620795875787735, "learning_rate": 0.00189229670385731, "loss": 2.5581, "step": 400290 }, { "epoch": 0.7974866122657146, "grad_norm": 0.16198201477527618, "learning_rate": 0.0018904554884989668, "loss": 2.558, "step": 400300 }, { "epoch": 0.7975065344893536, "grad_norm": 0.13751396536827087, "learning_rate": 0.0018886447127433998, "loss": 2.5355, "step": 400310 }, { "epoch": 0.7975264567129925, "grad_norm": 0.14582569897174835, "learning_rate": 0.0018868629150101525, "loss": 2.541, "step": 400320 }, { "epoch": 0.7975463789366314, "grad_norm": 0.16046074032783508, "learning_rate": 0.0018851087470692395, "loss": 2.5404, "step": 400330 }, { "epoch": 0.7975663011602703, "grad_norm": 0.14361894130706787, "learning_rate": 0.001883380962103094, "loss": 2.5292, "step": 400340 }, { "epoch": 0.7975862233839092, "grad_norm": 0.17577555775642395, "learning_rate": 0.0018816784043380076, "loss": 2.5584, "step": 400350 }, { "epoch": 0.7976061456075482, "grad_norm": 0.14137482643127441, "learning_rate": 0.00188, "loss": 2.5462, "step": 400360 }, { "epoch": 0.7976260678311871, "grad_norm": 0.1400216668844223, "learning_rate": 0.0018783447493940356, "loss": 2.5305, "step": 400370 }, { "epoch": 0.797645990054826, "grad_norm": 0.16553784906864166, "learning_rate": 0.0018767117199406204, "loss": 2.5279, "step": 400380 }, { "epoch": 0.7976659122784648, "grad_norm": 0.1466386318206787, "learning_rate": 0.0018751000400320321, "loss": 2.5324, "step": 400390 }, { "epoch": 0.7976858345021038, "grad_norm": 0.14347292482852936, "learning_rate": 0.001873508893593265, "loss": 2.5436, "step": 400400 }, { "epoch": 0.7977057567257427, "grad_norm": 0.13633215427398682, "learning_rate": 0.001871937515251343, "loss": 2.5529, "step": 400410 }, { "epoch": 0.7977256789493816, "grad_norm": 0.14621829986572266, "learning_rate": 0.0018703851860318427, "loss": 2.5493, "step": 400420 }, { "epoch": 0.7977456011730205, "grad_norm": 0.15457147359848022, "learning_rate": 0.00186885122951396, "loss": 2.5509, "step": 400430 }, { "epoch": 0.7977655233966594, "grad_norm": 0.16809315979480743, "learning_rate": 0.0018673350083857842, "loss": 2.5365, "step": 400440 }, { "epoch": 0.7977854456202984, "grad_norm": 0.13516990840435028, "learning_rate": 0.0018658359213500127, "loss": 2.5456, "step": 400450 }, { "epoch": 0.7978053678439373, "grad_norm": 0.13053426146507263, "learning_rate": 0.0018643534003374947, "loss": 2.5463, "step": 400460 }, { "epoch": 0.7978252900675762, "grad_norm": 0.14450643956661224, "learning_rate": 0.0018628869079919791, "loss": 2.5442, "step": 400470 }, { "epoch": 0.7978452122912151, "grad_norm": 0.15514034032821655, "learning_rate": 0.0018614359353944898, "loss": 2.5407, "step": 400480 }, { "epoch": 0.797865134514854, "grad_norm": 0.2021617740392685, "learning_rate": 0.00186, "loss": 2.5455, "step": 400490 }, { "epoch": 0.797885056738493, "grad_norm": 0.1605004519224167, "learning_rate": 0.0018585786437626906, "loss": 2.535, "step": 400500 }, { "epoch": 0.7979049789621319, "grad_norm": 0.14689035713672638, "learning_rate": 0.001857171431429143, "loss": 2.5633, "step": 400510 }, { "epoch": 0.7979249011857708, "grad_norm": 0.13243351876735687, "learning_rate": 0.0018557779489814404, "loss": 2.5547, "step": 400520 }, { "epoch": 0.7979448234094096, "grad_norm": 0.19309458136558533, "learning_rate": 0.0018543978022143898, "loss": 2.5455, "step": 400530 }, { "epoch": 0.7979647456330485, "grad_norm": 0.15435795485973358, "learning_rate": 0.0018530306154330095, "loss": 2.5445, "step": 400540 }, { "epoch": 0.7979846678566875, "grad_norm": 0.13495010137557983, "learning_rate": 0.0018516760302580869, "loss": 2.5529, "step": 400550 }, { "epoch": 0.7980045900803264, "grad_norm": 0.15542714297771454, "learning_rate": 0.0018503337045290423, "loss": 2.5447, "step": 400560 }, { "epoch": 0.7980245123039653, "grad_norm": 0.17557378113269806, "learning_rate": 0.001849003311294585, "loss": 2.5394, "step": 400570 }, { "epoch": 0.7980444345276042, "grad_norm": 0.14723631739616394, "learning_rate": 0.001847684537882722, "loss": 2.5476, "step": 400580 }, { "epoch": 0.7980643567512431, "grad_norm": 0.17215780913829803, "learning_rate": 0.0018463770850426278, "loss": 2.5655, "step": 400590 }, { "epoch": 0.7980842789748821, "grad_norm": 0.1328696757555008, "learning_rate": 0.0018450806661517035, "loss": 2.5622, "step": 400600 }, { "epoch": 0.798104201198521, "grad_norm": 0.15025146305561066, "learning_rate": 0.001843795006481867, "loss": 2.5508, "step": 400610 }, { "epoch": 0.7981241234221599, "grad_norm": 0.15790019929409027, "learning_rate": 0.0018425198425197637, "loss": 2.5375, "step": 400620 }, { "epoch": 0.7981440456457988, "grad_norm": 0.14588844776153564, "learning_rate": 0.0018412549213361244, "loss": 2.554, "step": 400630 }, { "epoch": 0.7981639678694377, "grad_norm": 0.14258281886577606, "learning_rate": 0.00184, "loss": 2.5381, "step": 400640 }, { "epoch": 0.7981838900930767, "grad_norm": 0.14186303317546844, "learning_rate": 0.001838754845034029, "loss": 2.535, "step": 400650 }, { "epoch": 0.7982038123167156, "grad_norm": 0.1652912199497223, "learning_rate": 0.0018375192319072808, "loss": 2.5353, "step": 400660 }, { "epoch": 0.7982237345403544, "grad_norm": 0.15252353250980377, "learning_rate": 0.0018362929445625512, "loss": 2.5383, "step": 400670 }, { "epoch": 0.7982436567639933, "grad_norm": 0.16481387615203857, "learning_rate": 0.0018350757749752936, "loss": 2.5509, "step": 400680 }, { "epoch": 0.7982635789876323, "grad_norm": 0.15426945686340332, "learning_rate": 0.0018338675227416387, "loss": 2.5174, "step": 400690 }, { "epoch": 0.7982835012112712, "grad_norm": 0.17859479784965515, "learning_rate": 0.001832667994693185, "loss": 2.5351, "step": 400700 }, { "epoch": 0.7983034234349101, "grad_norm": 0.15767379105091095, "learning_rate": 0.0018314770045364727, "loss": 2.5333, "step": 400710 }, { "epoch": 0.798323345658549, "grad_norm": 0.18488699197769165, "learning_rate": 0.0018302943725152286, "loss": 2.5452, "step": 400720 }, { "epoch": 0.7983432678821879, "grad_norm": 0.15157672762870789, "learning_rate": 0.0018291199250936494, "loss": 2.5383, "step": 400730 }, { "epoch": 0.7983631901058269, "grad_norm": 0.14131927490234375, "learning_rate": 0.0018279534946591474, "loss": 2.5386, "step": 400740 }, { "epoch": 0.7983831123294658, "grad_norm": 0.15071697533130646, "learning_rate": 0.0018267949192431123, "loss": 2.5514, "step": 400750 }, { "epoch": 0.7984030345531047, "grad_norm": 0.17703130841255188, "learning_rate": 0.0018256440422583732, "loss": 2.5453, "step": 400760 }, { "epoch": 0.7984229567767436, "grad_norm": 0.16537870466709137, "learning_rate": 0.0018245007122521576, "loss": 2.5477, "step": 400770 }, { "epoch": 0.7984428790003825, "grad_norm": 0.13924117386341095, "learning_rate": 0.0018233647826734433, "loss": 2.5487, "step": 400780 }, { "epoch": 0.7984628012240215, "grad_norm": 0.1596071422100067, "learning_rate": 0.0018222361116536883, "loss": 2.5432, "step": 400790 }, { "epoch": 0.7984827234476604, "grad_norm": 0.1434679478406906, "learning_rate": 0.0018211145618000169, "loss": 2.5381, "step": 400800 }, { "epoch": 0.7985026456712992, "grad_norm": 0.14060911536216736, "learning_rate": 0.00182, "loss": 2.5443, "step": 400810 }, { "epoch": 0.7985225678949381, "grad_norm": 0.13200603425502777, "learning_rate": 0.0018188922972372516, "loss": 2.5403, "step": 400820 }, { "epoch": 0.798542490118577, "grad_norm": 0.15856467187404633, "learning_rate": 0.001817791328417114, "loss": 2.5432, "step": 400830 }, { "epoch": 0.798562412342216, "grad_norm": 0.15332576632499695, "learning_rate": 0.0018166969722017666, "loss": 2.5446, "step": 400840 }, { "epoch": 0.7985823345658549, "grad_norm": 0.13354821503162384, "learning_rate": 0.0018156091108541424, "loss": 2.5374, "step": 400850 }, { "epoch": 0.7986022567894938, "grad_norm": 0.1335950493812561, "learning_rate": 0.0018145276300900858, "loss": 2.5312, "step": 400860 }, { "epoch": 0.7986221790131327, "grad_norm": 0.14411376416683197, "learning_rate": 0.0018134524189382239, "loss": 2.5482, "step": 400870 }, { "epoch": 0.7986421012367716, "grad_norm": 0.15172173082828522, "learning_rate": 0.001812383369607063, "loss": 2.5496, "step": 400880 }, { "epoch": 0.7986620234604106, "grad_norm": 0.13944454491138458, "learning_rate": 0.001811320377358868, "loss": 2.5565, "step": 400890 }, { "epoch": 0.7986819456840495, "grad_norm": 0.1584743857383728, "learning_rate": 0.0018102633403898974, "loss": 2.539, "step": 400900 }, { "epoch": 0.7987018679076884, "grad_norm": 0.1650189459323883, "learning_rate": 0.001809212159716611, "loss": 2.54, "step": 400910 }, { "epoch": 0.7987217901313273, "grad_norm": 0.13850072026252747, "learning_rate": 0.0018081667390674912, "loss": 2.5542, "step": 400920 }, { "epoch": 0.7987417123549662, "grad_norm": 0.1416229009628296, "learning_rate": 0.001807126984780141, "loss": 2.5443, "step": 400930 }, { "epoch": 0.7987616345786052, "grad_norm": 0.15036022663116455, "learning_rate": 0.0018060928057033467, "loss": 2.5391, "step": 400940 }, { "epoch": 0.798781556802244, "grad_norm": 0.13617442548274994, "learning_rate": 0.001805064113103821, "loss": 2.5489, "step": 400950 }, { "epoch": 0.7988014790258829, "grad_norm": 0.13908250629901886, "learning_rate": 0.0018040408205773457, "loss": 2.5464, "step": 400960 }, { "epoch": 0.7988214012495218, "grad_norm": 0.15261605381965637, "learning_rate": 0.001803022843964078, "loss": 2.5506, "step": 400970 }, { "epoch": 0.7988413234731607, "grad_norm": 0.12970580160617828, "learning_rate": 0.0018020101012677667, "loss": 2.5272, "step": 400980 }, { "epoch": 0.7988612456967997, "grad_norm": 0.15447448194026947, "learning_rate": 0.001801002512578676, "loss": 2.5453, "step": 400990 }, { "epoch": 0.7988811679204386, "grad_norm": 0.13538798689842224, "learning_rate": 0.0018000000000000002, "loss": 2.5583, "step": 401000 }, { "epoch": 0.7989010901440775, "grad_norm": 0.15896864235401154, "learning_rate": 0.001799002487577582, "loss": 2.5469, "step": 401010 }, { "epoch": 0.7989210123677164, "grad_norm": 0.13655439019203186, "learning_rate": 0.0017980099012327585, "loss": 2.537, "step": 401020 }, { "epoch": 0.7989409345913554, "grad_norm": 0.13717475533485413, "learning_rate": 0.0017970221686981556, "loss": 2.5358, "step": 401030 }, { "epoch": 0.7989608568149943, "grad_norm": 0.1330132782459259, "learning_rate": 0.0017960392194562886, "loss": 2.5372, "step": 401040 }, { "epoch": 0.7989807790386332, "grad_norm": 0.1492437869310379, "learning_rate": 0.001795060984680808, "loss": 2.5435, "step": 401050 }, { "epoch": 0.7990007012622721, "grad_norm": 0.15421202778816223, "learning_rate": 0.00179408739718026, "loss": 2.5328, "step": 401060 }, { "epoch": 0.799020623485911, "grad_norm": 0.15678873658180237, "learning_rate": 0.0017931183913442279, "loss": 2.5346, "step": 401070 }, { "epoch": 0.79904054570955, "grad_norm": 0.1687782257795334, "learning_rate": 0.0017921539030917347, "loss": 2.5327, "step": 401080 }, { "epoch": 0.7990604679331889, "grad_norm": 0.14441625773906708, "learning_rate": 0.0017911938698217892, "loss": 2.5431, "step": 401090 }, { "epoch": 0.7990803901568277, "grad_norm": 0.17602191865444183, "learning_rate": 0.0017902382303659696, "loss": 2.5413, "step": 401100 }, { "epoch": 0.7991003123804666, "grad_norm": 0.16073787212371826, "learning_rate": 0.0017892869249429453, "loss": 2.5546, "step": 401110 }, { "epoch": 0.7991202346041055, "grad_norm": 0.14446689188480377, "learning_rate": 0.0017883398951148329, "loss": 2.5411, "step": 401120 }, { "epoch": 0.7991401568277445, "grad_norm": 0.14582058787345886, "learning_rate": 0.0017873970837453072, "loss": 2.5337, "step": 401130 }, { "epoch": 0.7991600790513834, "grad_norm": 0.13882097601890564, "learning_rate": 0.0017864584349593737, "loss": 2.5481, "step": 401140 }, { "epoch": 0.7991800012750223, "grad_norm": 0.13192863762378693, "learning_rate": 0.0017855238941047278, "loss": 2.5328, "step": 401150 }, { "epoch": 0.7991999234986612, "grad_norm": 0.14616937935352325, "learning_rate": 0.00178459340771462, "loss": 2.5355, "step": 401160 }, { "epoch": 0.7992198457223001, "grad_norm": 0.14970307052135468, "learning_rate": 0.0017836669234721607, "loss": 2.5204, "step": 401170 }, { "epoch": 0.7992397679459391, "grad_norm": 0.12260400503873825, "learning_rate": 0.0017827443901759956, "loss": 2.5513, "step": 401180 }, { "epoch": 0.799259690169578, "grad_norm": 0.13582450151443481, "learning_rate": 0.001781825757707286, "loss": 2.526, "step": 401190 }, { "epoch": 0.7992796123932169, "grad_norm": 0.12570679187774658, "learning_rate": 0.0017809109769979336, "loss": 2.5361, "step": 401200 }, { "epoch": 0.7992995346168558, "grad_norm": 0.16246548295021057, "learning_rate": 0.0017800000000000001, "loss": 2.5446, "step": 401210 }, { "epoch": 0.7993194568404947, "grad_norm": 0.16696251928806305, "learning_rate": 0.0017790927796562548, "loss": 2.5347, "step": 401220 }, { "epoch": 0.7993393790641337, "grad_norm": 0.16296562552452087, "learning_rate": 0.0017781892698718116, "loss": 2.544, "step": 401230 }, { "epoch": 0.7993593012877726, "grad_norm": 0.14863982796669006, "learning_rate": 0.0017772894254867993, "loss": 2.5268, "step": 401240 }, { "epoch": 0.7993792235114114, "grad_norm": 0.17042332887649536, "learning_rate": 0.001776393202250021, "loss": 2.5307, "step": 401250 }, { "epoch": 0.7993991457350503, "grad_norm": 0.1283605396747589, "learning_rate": 0.0017755005567935637, "loss": 2.5473, "step": 401260 }, { "epoch": 0.7994190679586892, "grad_norm": 0.15434125065803528, "learning_rate": 0.0017746114466083071, "loss": 2.5254, "step": 401270 }, { "epoch": 0.7994389901823282, "grad_norm": 0.14236855506896973, "learning_rate": 0.0017737258300203047, "loss": 2.5466, "step": 401280 }, { "epoch": 0.7994589124059671, "grad_norm": 0.14882232248783112, "learning_rate": 0.0017728436661679891, "loss": 2.5423, "step": 401290 }, { "epoch": 0.799478834629606, "grad_norm": 0.15653221309185028, "learning_rate": 0.0017719649149801724, "loss": 2.5272, "step": 401300 }, { "epoch": 0.7994987568532449, "grad_norm": 0.1496780514717102, "learning_rate": 0.001771089537154808, "loss": 2.5412, "step": 401310 }, { "epoch": 0.7995186790768839, "grad_norm": 0.1771424412727356, "learning_rate": 0.0017702174941384788, "loss": 2.5196, "step": 401320 }, { "epoch": 0.7995386013005228, "grad_norm": 0.1535225212574005, "learning_rate": 0.0017693487481065843, "loss": 2.5365, "step": 401330 }, { "epoch": 0.7995585235241617, "grad_norm": 0.16176797449588776, "learning_rate": 0.0017684832619441954, "loss": 2.5302, "step": 401340 }, { "epoch": 0.7995784457478006, "grad_norm": 0.179540753364563, "learning_rate": 0.001767620999227555, "loss": 2.5349, "step": 401350 }, { "epoch": 0.7995983679714395, "grad_norm": 0.13177332282066345, "learning_rate": 0.001766761924206188, "loss": 2.5522, "step": 401360 }, { "epoch": 0.7996182901950785, "grad_norm": 0.15665198862552643, "learning_rate": 0.0017659060017856075, "loss": 2.5369, "step": 401370 }, { "epoch": 0.7996382124187174, "grad_norm": 0.15654389560222626, "learning_rate": 0.0017650531975105855, "loss": 2.5348, "step": 401380 }, { "epoch": 0.7996581346423562, "grad_norm": 0.13034333288669586, "learning_rate": 0.0017642034775489682, "loss": 2.5358, "step": 401390 }, { "epoch": 0.7996780568659951, "grad_norm": 0.14865317940711975, "learning_rate": 0.0017633568086760155, "loss": 2.5316, "step": 401400 }, { "epoch": 0.799697979089634, "grad_norm": 0.15597380697727203, "learning_rate": 0.0017625131582592417, "loss": 2.534, "step": 401410 }, { "epoch": 0.799717901313273, "grad_norm": 0.17521335184574127, "learning_rate": 0.0017616724942437403, "loss": 2.5299, "step": 401420 }, { "epoch": 0.7997378235369119, "grad_norm": 0.13144978880882263, "learning_rate": 0.001760834785137972, "loss": 2.5368, "step": 401430 }, { "epoch": 0.7997577457605508, "grad_norm": 0.1882655918598175, "learning_rate": 0.00176, "loss": 2.5423, "step": 401440 }, { "epoch": 0.7997776679841897, "grad_norm": 0.12882596254348755, "learning_rate": 0.0017591681084241542, "loss": 2.5509, "step": 401450 }, { "epoch": 0.7997975902078286, "grad_norm": 0.14456254243850708, "learning_rate": 0.0017583390805281085, "loss": 2.5334, "step": 401460 }, { "epoch": 0.7998175124314676, "grad_norm": 0.1316775679588318, "learning_rate": 0.0017575128869403572, "loss": 2.5259, "step": 401470 }, { "epoch": 0.7998374346551065, "grad_norm": 0.14819608628749847, "learning_rate": 0.0017566894987880713, "loss": 2.5244, "step": 401480 }, { "epoch": 0.7998573568787454, "grad_norm": 0.15569163858890533, "learning_rate": 0.001755868887685326, "loss": 2.5481, "step": 401490 }, { "epoch": 0.7998772791023843, "grad_norm": 0.157561257481575, "learning_rate": 0.0017550510257216823, "loss": 2.5406, "step": 401500 }, { "epoch": 0.7998972013260232, "grad_norm": 0.14439615607261658, "learning_rate": 0.0017542358854511098, "loss": 2.5347, "step": 401510 }, { "epoch": 0.7999171235496622, "grad_norm": 0.16386990249156952, "learning_rate": 0.001753423439881241, "loss": 2.5451, "step": 401520 }, { "epoch": 0.799937045773301, "grad_norm": 0.13921675086021423, "learning_rate": 0.0017526136624629403, "loss": 2.5527, "step": 401530 }, { "epoch": 0.7999569679969399, "grad_norm": 0.13225138187408447, "learning_rate": 0.001751806527080183, "loss": 2.5389, "step": 401540 }, { "epoch": 0.7999768902205788, "grad_norm": 0.1349523812532425, "learning_rate": 0.0017510020080402254, "loss": 2.5403, "step": 401550 }, { "epoch": 0.7999968124442177, "grad_norm": 0.15527960658073425, "learning_rate": 0.001750200080064064, "loss": 2.5516, "step": 401560 }, { "epoch": 0.8000167346678567, "grad_norm": 0.17492568492889404, "learning_rate": 0.0017494007182771668, "loss": 2.5319, "step": 401570 }, { "epoch": 0.8000366568914956, "grad_norm": 0.1342025250196457, "learning_rate": 0.0017486038982004693, "loss": 2.5463, "step": 401580 }, { "epoch": 0.8000565791151345, "grad_norm": 0.14898772537708282, "learning_rate": 0.0017478095957416302, "loss": 2.5509, "step": 401590 }, { "epoch": 0.8000765013387734, "grad_norm": 0.1458273082971573, "learning_rate": 0.0017470177871865297, "loss": 2.5323, "step": 401600 }, { "epoch": 0.8000964235624124, "grad_norm": 0.13050445914268494, "learning_rate": 0.0017462284491910098, "loss": 2.5391, "step": 401610 }, { "epoch": 0.8001163457860513, "grad_norm": 0.1501179188489914, "learning_rate": 0.001745441558772843, "loss": 2.5398, "step": 401620 }, { "epoch": 0.8001362680096902, "grad_norm": 0.21882370114326477, "learning_rate": 0.0017446570933039262, "loss": 2.5322, "step": 401630 }, { "epoch": 0.8001561902333291, "grad_norm": 0.1473979502916336, "learning_rate": 0.001743875030502686, "loss": 2.5498, "step": 401640 }, { "epoch": 0.800176112456968, "grad_norm": 0.14563342928886414, "learning_rate": 0.0017430953484266973, "loss": 2.5415, "step": 401650 }, { "epoch": 0.800196034680607, "grad_norm": 0.14951932430267334, "learning_rate": 0.0017423180254654976, "loss": 2.5357, "step": 401660 }, { "epoch": 0.8002159569042459, "grad_norm": 0.16206128895282745, "learning_rate": 0.0017415430403335984, "loss": 2.5337, "step": 401670 }, { "epoch": 0.8002358791278847, "grad_norm": 0.16541750729084015, "learning_rate": 0.0017407703720636856, "loss": 2.5228, "step": 401680 }, { "epoch": 0.8002558013515236, "grad_norm": 0.14230790734291077, "learning_rate": 0.00174, "loss": 2.5352, "step": 401690 }, { "epoch": 0.8002757235751625, "grad_norm": 0.12667883932590485, "learning_rate": 0.001739231903791894, "loss": 2.5512, "step": 401700 }, { "epoch": 0.8002956457988015, "grad_norm": 0.13645388185977936, "learning_rate": 0.0017384660633875595, "loss": 2.5447, "step": 401710 }, { "epoch": 0.8003155680224404, "grad_norm": 0.17229868471622467, "learning_rate": 0.00173770245902792, "loss": 2.5407, "step": 401720 }, { "epoch": 0.8003354902460793, "grad_norm": 0.15294069051742554, "learning_rate": 0.0017369410712406818, "loss": 2.5419, "step": 401730 }, { "epoch": 0.8003554124697182, "grad_norm": 0.15157760679721832, "learning_rate": 0.0017361818808345416, "loss": 2.5514, "step": 401740 }, { "epoch": 0.8003753346933571, "grad_norm": 0.1522708237171173, "learning_rate": 0.001735424868893541, "loss": 2.5349, "step": 401750 }, { "epoch": 0.8003952569169961, "grad_norm": 0.1341230720281601, "learning_rate": 0.001734670016771568, "loss": 2.5317, "step": 401760 }, { "epoch": 0.800415179140635, "grad_norm": 0.15723739564418793, "learning_rate": 0.0017339173060869986, "loss": 2.5513, "step": 401770 }, { "epoch": 0.8004351013642739, "grad_norm": 0.14140133559703827, "learning_rate": 0.0017331667187174733, "loss": 2.5299, "step": 401780 }, { "epoch": 0.8004550235879128, "grad_norm": 0.13402147591114044, "learning_rate": 0.001732418236794807, "loss": 2.5373, "step": 401790 }, { "epoch": 0.8004749458115517, "grad_norm": 0.15473029017448425, "learning_rate": 0.0017316718427000254, "loss": 2.5138, "step": 401800 }, { "epoch": 0.8004948680351907, "grad_norm": 0.18117530643939972, "learning_rate": 0.001730927519058526, "loss": 2.5268, "step": 401810 }, { "epoch": 0.8005147902588295, "grad_norm": 0.14250631630420685, "learning_rate": 0.001730185248735359, "loss": 2.5408, "step": 401820 }, { "epoch": 0.8005347124824684, "grad_norm": 0.14143986999988556, "learning_rate": 0.0017294450148306263, "loss": 2.5352, "step": 401830 }, { "epoch": 0.8005546347061073, "grad_norm": 0.16062667965888977, "learning_rate": 0.0017287068006749894, "loss": 2.5432, "step": 401840 }, { "epoch": 0.8005745569297462, "grad_norm": 0.16532418131828308, "learning_rate": 0.0017279705898252912, "loss": 2.5375, "step": 401850 }, { "epoch": 0.8005944791533852, "grad_norm": 0.153151735663414, "learning_rate": 0.001727236366060283, "loss": 2.5237, "step": 401860 }, { "epoch": 0.8006144013770241, "grad_norm": 0.18004557490348816, "learning_rate": 0.0017265041133764532, "loss": 2.5188, "step": 401870 }, { "epoch": 0.800634323600663, "grad_norm": 0.13437007367610931, "learning_rate": 0.0017257738159839582, "loss": 2.5312, "step": 401880 }, { "epoch": 0.8006542458243019, "grad_norm": 0.15343116223812103, "learning_rate": 0.0017250454583026497, "loss": 2.5364, "step": 401890 }, { "epoch": 0.8006741680479409, "grad_norm": 0.1631826013326645, "learning_rate": 0.0017243190249581954, "loss": 2.5463, "step": 401900 }, { "epoch": 0.8006940902715798, "grad_norm": 0.14278410375118256, "learning_rate": 0.0017235945007782949, "loss": 2.5435, "step": 401910 }, { "epoch": 0.8007140124952187, "grad_norm": 0.14094369113445282, "learning_rate": 0.0017228718707889797, "loss": 2.5342, "step": 401920 }, { "epoch": 0.8007339347188576, "grad_norm": 0.12963469326496124, "learning_rate": 0.001722151120211004, "loss": 2.5134, "step": 401930 }, { "epoch": 0.8007538569424965, "grad_norm": 0.17431625723838806, "learning_rate": 0.0017214322344563176, "loss": 2.5373, "step": 401940 }, { "epoch": 0.8007737791661355, "grad_norm": 0.13259509205818176, "learning_rate": 0.0017207151991246214, "loss": 2.5316, "step": 401950 }, { "epoch": 0.8007937013897743, "grad_norm": 0.1550055891275406, "learning_rate": 0.00172, "loss": 2.5261, "step": 401960 }, { "epoch": 0.8008136236134132, "grad_norm": 0.15188059210777283, "learning_rate": 0.001719286623047636, "loss": 2.541, "step": 401970 }, { "epoch": 0.8008335458370521, "grad_norm": 0.1421087682247162, "learning_rate": 0.0017185750544105943, "loss": 2.5655, "step": 401980 }, { "epoch": 0.800853468060691, "grad_norm": 0.12941332161426544, "learning_rate": 0.0017178652804066823, "loss": 2.5223, "step": 401990 }, { "epoch": 0.80087339028433, "grad_norm": 0.14994363486766815, "learning_rate": 0.001717157287525381, "loss": 2.5482, "step": 402000 }, { "epoch": 0.8008933125079689, "grad_norm": 0.1720682978630066, "learning_rate": 0.0017164510624248434, "loss": 2.5463, "step": 402010 }, { "epoch": 0.8009132347316078, "grad_norm": 0.1337049901485443, "learning_rate": 0.001715746591928962, "loss": 2.5355, "step": 402020 }, { "epoch": 0.8009331569552467, "grad_norm": 0.16231103241443634, "learning_rate": 0.0017150438630245, "loss": 2.5375, "step": 402030 }, { "epoch": 0.8009530791788856, "grad_norm": 0.1403045654296875, "learning_rate": 0.001714342862858286, "loss": 2.542, "step": 402040 }, { "epoch": 0.8009730014025246, "grad_norm": 0.17849485576152802, "learning_rate": 0.001713643578734473, "loss": 2.5425, "step": 402050 }, { "epoch": 0.8009929236261635, "grad_norm": 0.1569921374320984, "learning_rate": 0.0017129459981118536, "loss": 2.5246, "step": 402060 }, { "epoch": 0.8010128458498024, "grad_norm": 0.12845833599567413, "learning_rate": 0.001712250108601237, "loss": 2.526, "step": 402070 }, { "epoch": 0.8010327680734413, "grad_norm": 0.1717187613248825, "learning_rate": 0.001711555897962881, "loss": 2.5355, "step": 402080 }, { "epoch": 0.8010526902970801, "grad_norm": 0.16106043756008148, "learning_rate": 0.001710863354103981, "loss": 2.5404, "step": 402090 }, { "epoch": 0.8010726125207192, "grad_norm": 0.1351327747106552, "learning_rate": 0.0017101724650762113, "loss": 2.5307, "step": 402100 }, { "epoch": 0.801092534744358, "grad_norm": 0.13319051265716553, "learning_rate": 0.001709483219073321, "loss": 2.5362, "step": 402110 }, { "epoch": 0.8011124569679969, "grad_norm": 0.1529339700937271, "learning_rate": 0.0017087956044287793, "loss": 2.5578, "step": 402120 }, { "epoch": 0.8011323791916358, "grad_norm": 0.16086801886558533, "learning_rate": 0.0017081096096134717, "loss": 2.5312, "step": 402130 }, { "epoch": 0.8011523014152747, "grad_norm": 0.16800400614738464, "learning_rate": 0.0017074252232334441, "loss": 2.5102, "step": 402140 }, { "epoch": 0.8011722236389137, "grad_norm": 0.14595285058021545, "learning_rate": 0.0017067424340276964, "loss": 2.533, "step": 402150 }, { "epoch": 0.8011921458625526, "grad_norm": 0.13986697793006897, "learning_rate": 0.0017060612308660186, "loss": 2.5291, "step": 402160 }, { "epoch": 0.8012120680861915, "grad_norm": 0.14504873752593994, "learning_rate": 0.0017053816027468754, "loss": 2.5089, "step": 402170 }, { "epoch": 0.8012319903098304, "grad_norm": 0.14121846854686737, "learning_rate": 0.001704703538795332, "loss": 2.5341, "step": 402180 }, { "epoch": 0.8012519125334694, "grad_norm": 0.15115804970264435, "learning_rate": 0.0017040270282610251, "loss": 2.5228, "step": 402190 }, { "epoch": 0.8012718347571083, "grad_norm": 0.16881948709487915, "learning_rate": 0.0017033520605161737, "loss": 2.5378, "step": 402200 }, { "epoch": 0.8012917569807472, "grad_norm": 0.1427966058254242, "learning_rate": 0.00170267862505363, "loss": 2.5316, "step": 402210 }, { "epoch": 0.8013116792043861, "grad_norm": 0.16176961362361908, "learning_rate": 0.0017020067114849732, "loss": 2.5312, "step": 402220 }, { "epoch": 0.801331601428025, "grad_norm": 0.14361554384231567, "learning_rate": 0.0017013363095386386, "loss": 2.5504, "step": 402230 }, { "epoch": 0.801351523651664, "grad_norm": 0.14880821108818054, "learning_rate": 0.0017006674090580846, "loss": 2.5275, "step": 402240 }, { "epoch": 0.8013714458753028, "grad_norm": 0.17867246270179749, "learning_rate": 0.0017, "loss": 2.5286, "step": 402250 }, { "epoch": 0.8013913680989417, "grad_norm": 0.13100917637348175, "learning_rate": 0.001699334072432542, "loss": 2.5234, "step": 402260 }, { "epoch": 0.8014112903225806, "grad_norm": 0.15298214554786682, "learning_rate": 0.0016986696165336129, "loss": 2.5304, "step": 402270 }, { "epoch": 0.8014312125462195, "grad_norm": 0.19547361135482788, "learning_rate": 0.0016980066225891698, "loss": 2.548, "step": 402280 }, { "epoch": 0.8014511347698585, "grad_norm": 0.17275279760360718, "learning_rate": 0.001697345080991569, "loss": 2.5426, "step": 402290 }, { "epoch": 0.8014710569934974, "grad_norm": 0.16716307401657104, "learning_rate": 0.001696684982237938, "loss": 2.5396, "step": 402300 }, { "epoch": 0.8014909792171363, "grad_norm": 0.15004205703735352, "learning_rate": 0.0016960263169285867, "loss": 2.5424, "step": 402310 }, { "epoch": 0.8015109014407752, "grad_norm": 0.16144095361232758, "learning_rate": 0.0016953690757654436, "loss": 2.5252, "step": 402320 }, { "epoch": 0.8015308236644141, "grad_norm": 0.14849233627319336, "learning_rate": 0.001694713249550525, "loss": 2.5223, "step": 402330 }, { "epoch": 0.8015507458880531, "grad_norm": 0.14049968123435974, "learning_rate": 0.001694058829184433, "loss": 2.5253, "step": 402340 }, { "epoch": 0.801570668111692, "grad_norm": 0.13806650042533875, "learning_rate": 0.0016934058056648823, "loss": 2.5437, "step": 402350 }, { "epoch": 0.8015905903353309, "grad_norm": 0.16161535680294037, "learning_rate": 0.0016927541700852559, "loss": 2.5413, "step": 402360 }, { "epoch": 0.8016105125589698, "grad_norm": 0.1429503858089447, "learning_rate": 0.001692103913633187, "loss": 2.5367, "step": 402370 }, { "epoch": 0.8016304347826086, "grad_norm": 0.1478048712015152, "learning_rate": 0.0016914550275891698, "loss": 2.5282, "step": 402380 }, { "epoch": 0.8016503570062476, "grad_norm": 0.14441794157028198, "learning_rate": 0.001690807503325194, "loss": 2.5269, "step": 402390 }, { "epoch": 0.8016702792298865, "grad_norm": 0.16119807958602905, "learning_rate": 0.0016901613323034067, "loss": 2.5436, "step": 402400 }, { "epoch": 0.8016902014535254, "grad_norm": 0.12576423585414886, "learning_rate": 0.0016895165060747998, "loss": 2.5264, "step": 402410 }, { "epoch": 0.8017101236771643, "grad_norm": 0.13793736696243286, "learning_rate": 0.0016888730162779191, "loss": 2.5362, "step": 402420 }, { "epoch": 0.8017300459008032, "grad_norm": 0.13417935371398926, "learning_rate": 0.0016882308546376022, "loss": 2.5108, "step": 402430 }, { "epoch": 0.8017499681244422, "grad_norm": 0.15839731693267822, "learning_rate": 0.001687590012963734, "loss": 2.5457, "step": 402440 }, { "epoch": 0.8017698903480811, "grad_norm": 0.17364297807216644, "learning_rate": 0.0016869504831500295, "loss": 2.5279, "step": 402450 }, { "epoch": 0.80178981257172, "grad_norm": 0.16134828329086304, "learning_rate": 0.0016863122571728376, "loss": 2.5246, "step": 402460 }, { "epoch": 0.8018097347953589, "grad_norm": 0.14324882626533508, "learning_rate": 0.0016856753270899658, "loss": 2.5251, "step": 402470 }, { "epoch": 0.8018296570189979, "grad_norm": 0.14254216849803925, "learning_rate": 0.0016850396850395274, "loss": 2.514, "step": 402480 }, { "epoch": 0.8018495792426368, "grad_norm": 0.15445278584957123, "learning_rate": 0.00168440532323881, "loss": 2.5351, "step": 402490 }, { "epoch": 0.8018695014662757, "grad_norm": 0.1506587564945221, "learning_rate": 0.001683772233983162, "loss": 2.5394, "step": 402500 }, { "epoch": 0.8018894236899146, "grad_norm": 0.17640970647335052, "learning_rate": 0.0016831404096449027, "loss": 2.5325, "step": 402510 }, { "epoch": 0.8019093459135535, "grad_norm": 0.14197233319282532, "learning_rate": 0.0016825098426722493, "loss": 2.524, "step": 402520 }, { "epoch": 0.8019292681371925, "grad_norm": 0.13371457159519196, "learning_rate": 0.0016818805255882628, "loss": 2.5313, "step": 402530 }, { "epoch": 0.8019491903608313, "grad_norm": 0.13191550970077515, "learning_rate": 0.0016812524509898154, "loss": 2.534, "step": 402540 }, { "epoch": 0.8019691125844702, "grad_norm": 0.18451520800590515, "learning_rate": 0.0016806256115465738, "loss": 2.5354, "step": 402550 }, { "epoch": 0.8019890348081091, "grad_norm": 0.15914537012577057, "learning_rate": 0.00168, "loss": 2.5441, "step": 402560 }, { "epoch": 0.802008957031748, "grad_norm": 0.14049740135669708, "learning_rate": 0.001679375609162372, "loss": 2.541, "step": 402570 }, { "epoch": 0.802028879255387, "grad_norm": 0.13531331717967987, "learning_rate": 0.0016787524319158199, "loss": 2.5172, "step": 402580 }, { "epoch": 0.8020488014790259, "grad_norm": 0.13383722305297852, "learning_rate": 0.0016781304612113784, "loss": 2.5279, "step": 402590 }, { "epoch": 0.8020687237026648, "grad_norm": 0.14016568660736084, "learning_rate": 0.001677509690068058, "loss": 2.5168, "step": 402600 }, { "epoch": 0.8020886459263037, "grad_norm": 0.17788636684417725, "learning_rate": 0.0016768901115719298, "loss": 2.5317, "step": 402610 }, { "epoch": 0.8021085681499426, "grad_norm": 0.12910716235637665, "learning_rate": 0.0016762717188752272, "loss": 2.539, "step": 402620 }, { "epoch": 0.8021284903735816, "grad_norm": 0.14596694707870483, "learning_rate": 0.001675654505195463, "loss": 2.5263, "step": 402630 }, { "epoch": 0.8021484125972205, "grad_norm": 0.15516126155853271, "learning_rate": 0.0016750384638145616, "loss": 2.5298, "step": 402640 }, { "epoch": 0.8021683348208594, "grad_norm": 0.1356307417154312, "learning_rate": 0.001674423588078006, "loss": 2.5269, "step": 402650 }, { "epoch": 0.8021882570444983, "grad_norm": 0.16610170900821686, "learning_rate": 0.0016738098713939983, "loss": 2.5316, "step": 402660 }, { "epoch": 0.8022081792681371, "grad_norm": 0.13332274556159973, "learning_rate": 0.0016731973072326362, "loss": 2.5234, "step": 402670 }, { "epoch": 0.8022281014917761, "grad_norm": 0.13253328204154968, "learning_rate": 0.001672585889125102, "loss": 2.5334, "step": 402680 }, { "epoch": 0.802248023715415, "grad_norm": 0.15514519810676575, "learning_rate": 0.0016719756106628654, "loss": 2.541, "step": 402690 }, { "epoch": 0.8022679459390539, "grad_norm": 0.14285671710968018, "learning_rate": 0.0016713664654969002, "loss": 2.5228, "step": 402700 }, { "epoch": 0.8022878681626928, "grad_norm": 0.1704518347978592, "learning_rate": 0.0016707584473369134, "loss": 2.533, "step": 402710 }, { "epoch": 0.8023077903863317, "grad_norm": 0.13324551284313202, "learning_rate": 0.0016701515499505872, "loss": 2.5394, "step": 402720 }, { "epoch": 0.8023277126099707, "grad_norm": 0.12126784026622772, "learning_rate": 0.001669545767162834, "loss": 2.5279, "step": 402730 }, { "epoch": 0.8023476348336096, "grad_norm": 0.1611231565475464, "learning_rate": 0.001668941092855063, "loss": 2.538, "step": 402740 }, { "epoch": 0.8023675570572485, "grad_norm": 0.1578601896762848, "learning_rate": 0.00166833752096446, "loss": 2.5218, "step": 402750 }, { "epoch": 0.8023874792808874, "grad_norm": 0.1502915471792221, "learning_rate": 0.0016677350454832771, "loss": 2.5352, "step": 402760 }, { "epoch": 0.8024074015045263, "grad_norm": 0.18301719427108765, "learning_rate": 0.0016671336604581353, "loss": 2.5356, "step": 402770 }, { "epoch": 0.8024273237281653, "grad_norm": 0.14327266812324524, "learning_rate": 0.0016665333599893387, "loss": 2.5437, "step": 402780 }, { "epoch": 0.8024472459518042, "grad_norm": 0.14590750634670258, "learning_rate": 0.0016659341382301988, "loss": 2.5275, "step": 402790 }, { "epoch": 0.8024671681754431, "grad_norm": 0.15848501026630402, "learning_rate": 0.0016653359893863697, "loss": 2.5299, "step": 402800 }, { "epoch": 0.802487090399082, "grad_norm": 0.13859164714813232, "learning_rate": 0.0016647389077151958, "loss": 2.5338, "step": 402810 }, { "epoch": 0.802507012622721, "grad_norm": 0.13838495314121246, "learning_rate": 0.0016641428875250669, "loss": 2.5459, "step": 402820 }, { "epoch": 0.8025269348463598, "grad_norm": 0.1670478880405426, "learning_rate": 0.0016635479231747856, "loss": 2.5368, "step": 402830 }, { "epoch": 0.8025468570699987, "grad_norm": 0.14116454124450684, "learning_rate": 0.0016629540090729456, "loss": 2.5403, "step": 402840 }, { "epoch": 0.8025667792936376, "grad_norm": 0.15022329986095428, "learning_rate": 0.0016623611396773175, "loss": 2.5451, "step": 402850 }, { "epoch": 0.8025867015172765, "grad_norm": 0.14137853682041168, "learning_rate": 0.0016617693094942447, "loss": 2.5217, "step": 402860 }, { "epoch": 0.8026066237409155, "grad_norm": 0.1546240895986557, "learning_rate": 0.0016611785130780517, "loss": 2.5245, "step": 402870 }, { "epoch": 0.8026265459645544, "grad_norm": 0.1673281192779541, "learning_rate": 0.0016605887450304572, "loss": 2.5212, "step": 402880 }, { "epoch": 0.8026464681881933, "grad_norm": 0.14107786118984222, "learning_rate": 0.0016600000000000002, "loss": 2.5389, "step": 402890 }, { "epoch": 0.8026663904118322, "grad_norm": 0.15075941383838654, "learning_rate": 0.0016594122726814719, "loss": 2.5317, "step": 402900 }, { "epoch": 0.8026863126354711, "grad_norm": 0.16000549495220184, "learning_rate": 0.0016588255578153604, "loss": 2.5286, "step": 402910 }, { "epoch": 0.8027062348591101, "grad_norm": 0.1277739703655243, "learning_rate": 0.0016582398501872988, "loss": 2.5435, "step": 402920 }, { "epoch": 0.802726157082749, "grad_norm": 0.16477538645267487, "learning_rate": 0.0016576551446275264, "loss": 2.5358, "step": 402930 }, { "epoch": 0.8027460793063879, "grad_norm": 0.16010457277297974, "learning_rate": 0.0016570714360103552, "loss": 2.5286, "step": 402940 }, { "epoch": 0.8027660015300268, "grad_norm": 0.14471934735774994, "learning_rate": 0.0016564887192536467, "loss": 2.5091, "step": 402950 }, { "epoch": 0.8027859237536656, "grad_norm": 0.13071641325950623, "learning_rate": 0.001655906989318295, "loss": 2.5298, "step": 402960 }, { "epoch": 0.8028058459773046, "grad_norm": 0.16512586176395416, "learning_rate": 0.0016553262412077184, "loss": 2.536, "step": 402970 }, { "epoch": 0.8028257682009435, "grad_norm": 0.1718374341726303, "learning_rate": 0.0016547464699673588, "loss": 2.5297, "step": 402980 }, { "epoch": 0.8028456904245824, "grad_norm": 0.13852041959762573, "learning_rate": 0.0016541676706841883, "loss": 2.5314, "step": 402990 }, { "epoch": 0.8028656126482213, "grad_norm": 0.1697070449590683, "learning_rate": 0.0016535898384862247, "loss": 2.5338, "step": 403000 }, { "epoch": 0.8028855348718602, "grad_norm": 0.15010619163513184, "learning_rate": 0.0016530129685420505, "loss": 2.5254, "step": 403010 }, { "epoch": 0.8029054570954992, "grad_norm": 0.14444294571876526, "learning_rate": 0.0016524370560603448, "loss": 2.5255, "step": 403020 }, { "epoch": 0.8029253793191381, "grad_norm": 0.14538806676864624, "learning_rate": 0.0016518620962894157, "loss": 2.5396, "step": 403030 }, { "epoch": 0.802945301542777, "grad_norm": 0.1391831636428833, "learning_rate": 0.0016512880845167461, "loss": 2.5263, "step": 403040 }, { "epoch": 0.8029652237664159, "grad_norm": 0.1538299322128296, "learning_rate": 0.0016507150160685405, "loss": 2.5397, "step": 403050 }, { "epoch": 0.8029851459900548, "grad_norm": 0.14871077239513397, "learning_rate": 0.001650142886309282, "loss": 2.5305, "step": 403060 }, { "epoch": 0.8030050682136938, "grad_norm": 0.13450203835964203, "learning_rate": 0.0016495716906412955, "loss": 2.5305, "step": 403070 }, { "epoch": 0.8030249904373327, "grad_norm": 0.14711534976959229, "learning_rate": 0.0016490014245043151, "loss": 2.533, "step": 403080 }, { "epoch": 0.8030449126609716, "grad_norm": 0.1460445523262024, "learning_rate": 0.0016484320833750612, "loss": 2.5345, "step": 403090 }, { "epoch": 0.8030648348846104, "grad_norm": 0.1512516736984253, "learning_rate": 0.0016478636627668198, "loss": 2.5069, "step": 403100 }, { "epoch": 0.8030847571082494, "grad_norm": 0.1352475881576538, "learning_rate": 0.001647296158229032, "loss": 2.5248, "step": 403110 }, { "epoch": 0.8031046793318883, "grad_norm": 0.17596356570720673, "learning_rate": 0.001646729565346886, "loss": 2.5251, "step": 403120 }, { "epoch": 0.8031246015555272, "grad_norm": 0.13853493332862854, "learning_rate": 0.0016461638797409172, "loss": 2.5291, "step": 403130 }, { "epoch": 0.8031445237791661, "grad_norm": 0.15469352900981903, "learning_rate": 0.001645599097066613, "loss": 2.5215, "step": 403140 }, { "epoch": 0.803164446002805, "grad_norm": 0.16490240395069122, "learning_rate": 0.001645035213014023, "loss": 2.5304, "step": 403150 }, { "epoch": 0.803184368226444, "grad_norm": 0.1657698005437851, "learning_rate": 0.0016444722233073766, "loss": 2.5197, "step": 403160 }, { "epoch": 0.8032042904500829, "grad_norm": 0.15518340468406677, "learning_rate": 0.001643910123704703, "loss": 2.5274, "step": 403170 }, { "epoch": 0.8032242126737218, "grad_norm": 0.17400361597537994, "learning_rate": 0.00164334890999746, "loss": 2.5267, "step": 403180 }, { "epoch": 0.8032441348973607, "grad_norm": 0.1319066286087036, "learning_rate": 0.001642788578010165, "loss": 2.521, "step": 403190 }, { "epoch": 0.8032640571209996, "grad_norm": 0.1508115977048874, "learning_rate": 0.0016422291236000337, "loss": 2.5364, "step": 403200 }, { "epoch": 0.8032839793446386, "grad_norm": 0.1605699062347412, "learning_rate": 0.0016416705426566217, "loss": 2.5344, "step": 403210 }, { "epoch": 0.8033039015682775, "grad_norm": 0.14557401835918427, "learning_rate": 0.0016411128311014727, "loss": 2.5283, "step": 403220 }, { "epoch": 0.8033238237919164, "grad_norm": 0.1355041265487671, "learning_rate": 0.0016405559848877713, "loss": 2.5243, "step": 403230 }, { "epoch": 0.8033437460155552, "grad_norm": 0.1446717381477356, "learning_rate": 0.0016400000000000002, "loss": 2.5271, "step": 403240 }, { "epoch": 0.8033636682391941, "grad_norm": 0.15572960674762726, "learning_rate": 0.001639444872453601, "loss": 2.5253, "step": 403250 }, { "epoch": 0.8033835904628331, "grad_norm": 0.12836164236068726, "learning_rate": 0.0016388905982946443, "loss": 2.5309, "step": 403260 }, { "epoch": 0.803403512686472, "grad_norm": 0.17437605559825897, "learning_rate": 0.0016383371735994973, "loss": 2.5147, "step": 403270 }, { "epoch": 0.8034234349101109, "grad_norm": 0.14367792010307312, "learning_rate": 0.0016377845944745032, "loss": 2.5151, "step": 403280 }, { "epoch": 0.8034433571337498, "grad_norm": 0.14968270063400269, "learning_rate": 0.001637232857055659, "loss": 2.5429, "step": 403290 }, { "epoch": 0.8034632793573887, "grad_norm": 0.1526433229446411, "learning_rate": 0.001636681957508301, "loss": 2.5287, "step": 403300 }, { "epoch": 0.8034832015810277, "grad_norm": 0.15250800549983978, "learning_rate": 0.001636131892026795, "loss": 2.5234, "step": 403310 }, { "epoch": 0.8035031238046666, "grad_norm": 0.14007118344306946, "learning_rate": 0.001635582656834228, "loss": 2.5339, "step": 403320 }, { "epoch": 0.8035230460283055, "grad_norm": 0.12906257808208466, "learning_rate": 0.0016350342481821069, "loss": 2.5253, "step": 403330 }, { "epoch": 0.8035429682519444, "grad_norm": 0.1685645431280136, "learning_rate": 0.0016344866623500588, "loss": 2.5295, "step": 403340 }, { "epoch": 0.8035628904755833, "grad_norm": 0.14088298380374908, "learning_rate": 0.0016339398956455376, "loss": 2.5375, "step": 403350 }, { "epoch": 0.8035828126992223, "grad_norm": 0.13845936954021454, "learning_rate": 0.001633393944403533, "loss": 2.5218, "step": 403360 }, { "epoch": 0.8036027349228612, "grad_norm": 0.16271446645259857, "learning_rate": 0.0016328488049862835, "loss": 2.5227, "step": 403370 }, { "epoch": 0.8036226571465, "grad_norm": 0.1815074235200882, "learning_rate": 0.0016323044737829953, "loss": 2.5398, "step": 403380 }, { "epoch": 0.8036425793701389, "grad_norm": 0.1403111219406128, "learning_rate": 0.0016317609472095607, "loss": 2.5259, "step": 403390 }, { "epoch": 0.803662501593778, "grad_norm": 0.14476259052753448, "learning_rate": 0.0016312182217082846, "loss": 2.5201, "step": 403400 }, { "epoch": 0.8036824238174168, "grad_norm": 0.15692508220672607, "learning_rate": 0.0016306762937476123, "loss": 2.5502, "step": 403410 }, { "epoch": 0.8037023460410557, "grad_norm": 0.1436474323272705, "learning_rate": 0.0016301351598218615, "loss": 2.5393, "step": 403420 }, { "epoch": 0.8037222682646946, "grad_norm": 0.17181064188480377, "learning_rate": 0.0016295948164509572, "loss": 2.5289, "step": 403430 }, { "epoch": 0.8037421904883335, "grad_norm": 0.14593243598937988, "learning_rate": 0.0016290552601801718, "loss": 2.5334, "step": 403440 }, { "epoch": 0.8037621127119725, "grad_norm": 0.157896026968956, "learning_rate": 0.0016285164875798658, "loss": 2.5234, "step": 403450 }, { "epoch": 0.8037820349356114, "grad_norm": 0.1707024723291397, "learning_rate": 0.0016279784952452346, "loss": 2.5411, "step": 403460 }, { "epoch": 0.8038019571592503, "grad_norm": 0.15895825624465942, "learning_rate": 0.0016274412797960569, "loss": 2.5337, "step": 403470 }, { "epoch": 0.8038218793828892, "grad_norm": 0.13471488654613495, "learning_rate": 0.0016269048378764475, "loss": 2.5404, "step": 403480 }, { "epoch": 0.8038418016065281, "grad_norm": 0.1501074731349945, "learning_rate": 0.001626369166154612, "loss": 2.5393, "step": 403490 }, { "epoch": 0.8038617238301671, "grad_norm": 0.19196119904518127, "learning_rate": 0.0016258342613226057, "loss": 2.5309, "step": 403500 }, { "epoch": 0.803881646053806, "grad_norm": 0.19949044287204742, "learning_rate": 0.0016253001200960961, "loss": 2.5289, "step": 403510 }, { "epoch": 0.8039015682774449, "grad_norm": 0.16289934515953064, "learning_rate": 0.0016247667392141256, "loss": 2.5267, "step": 403520 }, { "epoch": 0.8039214905010837, "grad_norm": 0.12825970351696014, "learning_rate": 0.0016242341154388813, "loss": 2.5221, "step": 403530 }, { "epoch": 0.8039414127247226, "grad_norm": 0.1434536725282669, "learning_rate": 0.0016237022455554645, "loss": 2.5246, "step": 403540 }, { "epoch": 0.8039613349483616, "grad_norm": 0.14164011180400848, "learning_rate": 0.0016231711263716647, "loss": 2.5366, "step": 403550 }, { "epoch": 0.8039812571720005, "grad_norm": 0.13447947800159454, "learning_rate": 0.001622640754717736, "loss": 2.5322, "step": 403560 }, { "epoch": 0.8040011793956394, "grad_norm": 0.15188899636268616, "learning_rate": 0.0016221111274461763, "loss": 2.5363, "step": 403570 }, { "epoch": 0.8040211016192783, "grad_norm": 0.15280763804912567, "learning_rate": 0.00162158224143151, "loss": 2.5155, "step": 403580 }, { "epoch": 0.8040410238429172, "grad_norm": 0.14735892415046692, "learning_rate": 0.0016210540935700716, "loss": 2.5208, "step": 403590 }, { "epoch": 0.8040609460665562, "grad_norm": 0.13610464334487915, "learning_rate": 0.0016205266807797945, "loss": 2.523, "step": 403600 }, { "epoch": 0.8040808682901951, "grad_norm": 0.1666685789823532, "learning_rate": 0.0016200000000000001, "loss": 2.5226, "step": 403610 }, { "epoch": 0.804100790513834, "grad_norm": 0.15002472698688507, "learning_rate": 0.001619474048191191, "loss": 2.5221, "step": 403620 }, { "epoch": 0.8041207127374729, "grad_norm": 0.1521066427230835, "learning_rate": 0.001618948822334847, "loss": 2.5148, "step": 403630 }, { "epoch": 0.8041406349611118, "grad_norm": 0.1426207274198532, "learning_rate": 0.0016184243194332217, "loss": 2.525, "step": 403640 }, { "epoch": 0.8041605571847508, "grad_norm": 0.14745667576789856, "learning_rate": 0.001617900536509144, "loss": 2.5444, "step": 403650 }, { "epoch": 0.8041804794083897, "grad_norm": 0.19155694544315338, "learning_rate": 0.0016173774706058203, "loss": 2.5261, "step": 403660 }, { "epoch": 0.8042004016320285, "grad_norm": 0.13771191239356995, "learning_rate": 0.0016168551187866395, "loss": 2.5338, "step": 403670 }, { "epoch": 0.8042203238556674, "grad_norm": 0.15744702517986298, "learning_rate": 0.0016163334781349825, "loss": 2.5367, "step": 403680 }, { "epoch": 0.8042402460793064, "grad_norm": 0.16421563923358917, "learning_rate": 0.001615812545754029, "loss": 2.5218, "step": 403690 }, { "epoch": 0.8042601683029453, "grad_norm": 0.1365087330341339, "learning_rate": 0.0016152923187665732, "loss": 2.522, "step": 403700 }, { "epoch": 0.8042800905265842, "grad_norm": 0.15052145719528198, "learning_rate": 0.0016147727943148357, "loss": 2.5295, "step": 403710 }, { "epoch": 0.8043000127502231, "grad_norm": 0.1319112330675125, "learning_rate": 0.0016142539695602818, "loss": 2.5294, "step": 403720 }, { "epoch": 0.804319934973862, "grad_norm": 0.1522659808397293, "learning_rate": 0.0016137358416834406, "loss": 2.5356, "step": 403730 }, { "epoch": 0.804339857197501, "grad_norm": 0.14807388186454773, "learning_rate": 0.0016132184078837257, "loss": 2.5183, "step": 403740 }, { "epoch": 0.8043597794211399, "grad_norm": 0.14250729978084564, "learning_rate": 0.0016127016653792582, "loss": 2.5262, "step": 403750 }, { "epoch": 0.8043797016447788, "grad_norm": 0.13924705982208252, "learning_rate": 0.0016121856114066936, "loss": 2.5242, "step": 403760 }, { "epoch": 0.8043996238684177, "grad_norm": 0.1428615301847458, "learning_rate": 0.0016116702432210478, "loss": 2.5309, "step": 403770 }, { "epoch": 0.8044195460920566, "grad_norm": 0.1384151726961136, "learning_rate": 0.0016111555580955283, "loss": 2.5311, "step": 403780 }, { "epoch": 0.8044394683156956, "grad_norm": 0.15889878571033478, "learning_rate": 0.0016106415533213643, "loss": 2.5236, "step": 403790 }, { "epoch": 0.8044593905393345, "grad_norm": 0.16636373102664948, "learning_rate": 0.0016101282262076415, "loss": 2.525, "step": 403800 }, { "epoch": 0.8044793127629734, "grad_norm": 0.14169369637966156, "learning_rate": 0.0016096155740811374, "loss": 2.5308, "step": 403810 }, { "epoch": 0.8044992349866122, "grad_norm": 0.17583999037742615, "learning_rate": 0.0016091035942861587, "loss": 2.5158, "step": 403820 }, { "epoch": 0.8045191572102511, "grad_norm": 0.15812978148460388, "learning_rate": 0.0016085922841843814, "loss": 2.5164, "step": 403830 }, { "epoch": 0.8045390794338901, "grad_norm": 0.13438650965690613, "learning_rate": 0.0016080816411546915, "loss": 2.5322, "step": 403840 }, { "epoch": 0.804559001657529, "grad_norm": 0.1568228155374527, "learning_rate": 0.0016075716625930283, "loss": 2.5334, "step": 403850 }, { "epoch": 0.8045789238811679, "grad_norm": 0.17228251695632935, "learning_rate": 0.00160706234591223, "loss": 2.5317, "step": 403860 }, { "epoch": 0.8045988461048068, "grad_norm": 0.13613492250442505, "learning_rate": 0.00160655368854188, "loss": 2.5255, "step": 403870 }, { "epoch": 0.8046187683284457, "grad_norm": 0.13971181213855743, "learning_rate": 0.0016060456879281558, "loss": 2.5236, "step": 403880 }, { "epoch": 0.8046386905520847, "grad_norm": 0.15176191926002502, "learning_rate": 0.0016055383415336797, "loss": 2.5151, "step": 403890 }, { "epoch": 0.8046586127757236, "grad_norm": 0.1343846619129181, "learning_rate": 0.00160503164683737, "loss": 2.5362, "step": 403900 }, { "epoch": 0.8046785349993625, "grad_norm": 0.15300697088241577, "learning_rate": 0.0016045256013342965, "loss": 2.5256, "step": 403910 }, { "epoch": 0.8046984572230014, "grad_norm": 0.17532192170619965, "learning_rate": 0.0016040202025355335, "loss": 2.5301, "step": 403920 }, { "epoch": 0.8047183794466403, "grad_norm": 0.14286136627197266, "learning_rate": 0.00160351544796802, "loss": 2.529, "step": 403930 }, { "epoch": 0.8047383016702793, "grad_norm": 0.16147813200950623, "learning_rate": 0.0016030113351744157, "loss": 2.5289, "step": 403940 }, { "epoch": 0.8047582238939182, "grad_norm": 0.15806658565998077, "learning_rate": 0.0016025078617129642, "loss": 2.5245, "step": 403950 }, { "epoch": 0.804778146117557, "grad_norm": 0.14497427642345428, "learning_rate": 0.001602005025157352, "loss": 2.5236, "step": 403960 }, { "epoch": 0.8047980683411959, "grad_norm": 0.15869663655757904, "learning_rate": 0.0016015028230965745, "loss": 2.5324, "step": 403970 }, { "epoch": 0.8048179905648349, "grad_norm": 0.16916143894195557, "learning_rate": 0.0016010012531348, "loss": 2.5204, "step": 403980 }, { "epoch": 0.8048379127884738, "grad_norm": 0.13573934137821198, "learning_rate": 0.0016005003128912365, "loss": 2.5242, "step": 403990 }, { "epoch": 0.8048578350121127, "grad_norm": 0.15861737728118896, "learning_rate": 0.0016, "loss": 2.5446, "step": 404000 }, { "epoch": 0.8048777572357516, "grad_norm": 0.15019427239894867, "learning_rate": 0.0015995003121099844, "loss": 2.5213, "step": 404010 }, { "epoch": 0.8048976794593905, "grad_norm": 0.14431500434875488, "learning_rate": 0.0015990012468847317, "loss": 2.5355, "step": 404020 }, { "epoch": 0.8049176016830295, "grad_norm": 0.15749326348304749, "learning_rate": 0.0015985028020023054, "loss": 2.5328, "step": 404030 }, { "epoch": 0.8049375239066684, "grad_norm": 0.14044104516506195, "learning_rate": 0.0015980049751551644, "loss": 2.5266, "step": 404040 }, { "epoch": 0.8049574461303073, "grad_norm": 0.17160534858703613, "learning_rate": 0.001597507764050038, "loss": 2.5203, "step": 404050 }, { "epoch": 0.8049773683539462, "grad_norm": 0.13911117613315582, "learning_rate": 0.0015970111664078024, "loss": 2.5242, "step": 404060 }, { "epoch": 0.8049972905775851, "grad_norm": 0.1468229442834854, "learning_rate": 0.0015965151799633597, "loss": 2.5318, "step": 404070 }, { "epoch": 0.8050172128012241, "grad_norm": 0.16870072484016418, "learning_rate": 0.0015960198024655167, "loss": 2.5141, "step": 404080 }, { "epoch": 0.805037135024863, "grad_norm": 0.1362670361995697, "learning_rate": 0.0015955250316768664, "loss": 2.5231, "step": 404090 }, { "epoch": 0.8050570572485019, "grad_norm": 0.16453035175800323, "learning_rate": 0.0015950308653736682, "loss": 2.5422, "step": 404100 }, { "epoch": 0.8050769794721407, "grad_norm": 0.1502913385629654, "learning_rate": 0.0015945373013457342, "loss": 2.5357, "step": 404110 }, { "epoch": 0.8050969016957796, "grad_norm": 0.14375732839107513, "learning_rate": 0.0015940443373963112, "loss": 2.5255, "step": 404120 }, { "epoch": 0.8051168239194186, "grad_norm": 0.14213675260543823, "learning_rate": 0.0015935519713419684, "loss": 2.5376, "step": 404130 }, { "epoch": 0.8051367461430575, "grad_norm": 0.15136271715164185, "learning_rate": 0.0015930602010124839, "loss": 2.5363, "step": 404140 }, { "epoch": 0.8051566683666964, "grad_norm": 0.14599590003490448, "learning_rate": 0.0015925690242507328, "loss": 2.5345, "step": 404150 }, { "epoch": 0.8051765905903353, "grad_norm": 0.1669621765613556, "learning_rate": 0.0015920784389125773, "loss": 2.5265, "step": 404160 }, { "epoch": 0.8051965128139742, "grad_norm": 0.17581488192081451, "learning_rate": 0.0015915884428667573, "loss": 2.5249, "step": 404170 }, { "epoch": 0.8052164350376132, "grad_norm": 0.21804410219192505, "learning_rate": 0.0015910990339947826, "loss": 2.5249, "step": 404180 }, { "epoch": 0.8052363572612521, "grad_norm": 0.16918198764324188, "learning_rate": 0.0015906102101908256, "loss": 2.5333, "step": 404190 }, { "epoch": 0.805256279484891, "grad_norm": 0.1384584754705429, "learning_rate": 0.001590121969361616, "loss": 2.5353, "step": 404200 }, { "epoch": 0.8052762017085299, "grad_norm": 0.1343391239643097, "learning_rate": 0.0015896343094263363, "loss": 2.5166, "step": 404210 }, { "epoch": 0.8052961239321688, "grad_norm": 0.1529293656349182, "learning_rate": 0.0015891472283165173, "loss": 2.5216, "step": 404220 }, { "epoch": 0.8053160461558078, "grad_norm": 0.18074943125247955, "learning_rate": 0.0015886607239759373, "loss": 2.5196, "step": 404230 }, { "epoch": 0.8053359683794467, "grad_norm": 0.14372162520885468, "learning_rate": 0.00158817479436052, "loss": 2.5286, "step": 404240 }, { "epoch": 0.8053558906030855, "grad_norm": 0.17478899657726288, "learning_rate": 0.001587689437438234, "loss": 2.5352, "step": 404250 }, { "epoch": 0.8053758128267244, "grad_norm": 0.15047205984592438, "learning_rate": 0.001587204651188994, "loss": 2.5339, "step": 404260 }, { "epoch": 0.8053957350503633, "grad_norm": 0.12702669203281403, "learning_rate": 0.0015867204336045635, "loss": 2.5274, "step": 404270 }, { "epoch": 0.8054156572740023, "grad_norm": 0.13275332748889923, "learning_rate": 0.0015862367826884561, "loss": 2.5263, "step": 404280 }, { "epoch": 0.8054355794976412, "grad_norm": 0.1862144023180008, "learning_rate": 0.0015857536964558406, "loss": 2.5157, "step": 404290 }, { "epoch": 0.8054555017212801, "grad_norm": 0.13594868779182434, "learning_rate": 0.0015852711729334456, "loss": 2.5216, "step": 404300 }, { "epoch": 0.805475423944919, "grad_norm": 0.13344383239746094, "learning_rate": 0.001584789210159466, "loss": 2.5487, "step": 404310 }, { "epoch": 0.805495346168558, "grad_norm": 0.16984346508979797, "learning_rate": 0.0015843078061834695, "loss": 2.5451, "step": 404320 }, { "epoch": 0.8055152683921969, "grad_norm": 0.15196679532527924, "learning_rate": 0.001583826959066304, "loss": 2.5245, "step": 404330 }, { "epoch": 0.8055351906158358, "grad_norm": 0.18904724717140198, "learning_rate": 0.001583346666880007, "loss": 2.5155, "step": 404340 }, { "epoch": 0.8055551128394747, "grad_norm": 0.13617631793022156, "learning_rate": 0.0015828669277077158, "loss": 2.5356, "step": 404350 }, { "epoch": 0.8055750350631136, "grad_norm": 0.19810739159584045, "learning_rate": 0.001582387739643578, "loss": 2.5295, "step": 404360 }, { "epoch": 0.8055949572867526, "grad_norm": 0.16844980418682098, "learning_rate": 0.0015819091007926627, "loss": 2.5139, "step": 404370 }, { "epoch": 0.8056148795103915, "grad_norm": 0.1379992663860321, "learning_rate": 0.001581431009270873, "loss": 2.5312, "step": 404380 }, { "epoch": 0.8056348017340303, "grad_norm": 0.14723914861679077, "learning_rate": 0.0015809534632048607, "loss": 2.5311, "step": 404390 }, { "epoch": 0.8056547239576692, "grad_norm": 0.14358307421207428, "learning_rate": 0.0015804764607319393, "loss": 2.5361, "step": 404400 }, { "epoch": 0.8056746461813081, "grad_norm": 0.14563529193401337, "learning_rate": 0.00158, "loss": 2.5209, "step": 404410 }, { "epoch": 0.8056945684049471, "grad_norm": 0.13736052811145782, "learning_rate": 0.0015795240791674273, "loss": 2.5334, "step": 404420 }, { "epoch": 0.805714490628586, "grad_norm": 0.14029397070407867, "learning_rate": 0.0015790486964030164, "loss": 2.5236, "step": 404430 }, { "epoch": 0.8057344128522249, "grad_norm": 0.15002787113189697, "learning_rate": 0.0015785738498858906, "loss": 2.5264, "step": 404440 }, { "epoch": 0.8057543350758638, "grad_norm": 0.15306220948696136, "learning_rate": 0.0015780995378054203, "loss": 2.5307, "step": 404450 }, { "epoch": 0.8057742572995027, "grad_norm": 0.1507221758365631, "learning_rate": 0.0015776257583611426, "loss": 2.5145, "step": 404460 }, { "epoch": 0.8057941795231417, "grad_norm": 0.14715124666690826, "learning_rate": 0.0015771525097626805, "loss": 2.5296, "step": 404470 }, { "epoch": 0.8058141017467806, "grad_norm": 0.14571057260036469, "learning_rate": 0.0015766797902296657, "loss": 2.5238, "step": 404480 }, { "epoch": 0.8058340239704195, "grad_norm": 0.15060877799987793, "learning_rate": 0.0015762075979916582, "loss": 2.5319, "step": 404490 }, { "epoch": 0.8058539461940584, "grad_norm": 0.15207520127296448, "learning_rate": 0.0015757359312880714, "loss": 2.5324, "step": 404500 }, { "epoch": 0.8058738684176973, "grad_norm": 0.16044652462005615, "learning_rate": 0.001575264788368094, "loss": 2.5184, "step": 404510 }, { "epoch": 0.8058937906413363, "grad_norm": 0.16415245831012726, "learning_rate": 0.001574794167490614, "loss": 2.5207, "step": 404520 }, { "epoch": 0.8059137128649752, "grad_norm": 0.14860884845256805, "learning_rate": 0.0015743240669241448, "loss": 2.5216, "step": 404530 }, { "epoch": 0.805933635088614, "grad_norm": 0.1493440419435501, "learning_rate": 0.0015738544849467498, "loss": 2.5258, "step": 404540 }, { "epoch": 0.8059535573122529, "grad_norm": 0.13825571537017822, "learning_rate": 0.0015733854198459692, "loss": 2.5241, "step": 404550 }, { "epoch": 0.8059734795358918, "grad_norm": 0.18653543293476105, "learning_rate": 0.0015729168699187474, "loss": 2.5261, "step": 404560 }, { "epoch": 0.8059934017595308, "grad_norm": 0.17432361841201782, "learning_rate": 0.0015724488334713609, "loss": 2.5329, "step": 404570 }, { "epoch": 0.8060133239831697, "grad_norm": 0.13334448635578156, "learning_rate": 0.0015719813088193463, "loss": 2.5202, "step": 404580 }, { "epoch": 0.8060332462068086, "grad_norm": 0.16499777138233185, "learning_rate": 0.001571514294287429, "loss": 2.5279, "step": 404590 }, { "epoch": 0.8060531684304475, "grad_norm": 0.16274775564670563, "learning_rate": 0.0015710477882094555, "loss": 2.5315, "step": 404600 }, { "epoch": 0.8060730906540865, "grad_norm": 0.13896845281124115, "learning_rate": 0.0015705817889283223, "loss": 2.5317, "step": 404610 }, { "epoch": 0.8060930128777254, "grad_norm": 0.1453430950641632, "learning_rate": 0.0015701162947959065, "loss": 2.5453, "step": 404620 }, { "epoch": 0.8061129351013643, "grad_norm": 0.16606010496616364, "learning_rate": 0.0015696513041729998, "loss": 2.5201, "step": 404630 }, { "epoch": 0.8061328573250032, "grad_norm": 0.1473844349384308, "learning_rate": 0.0015691868154292397, "loss": 2.5219, "step": 404640 }, { "epoch": 0.8061527795486421, "grad_norm": 0.1557721048593521, "learning_rate": 0.0015687228269430436, "loss": 2.54, "step": 404650 }, { "epoch": 0.8061727017722811, "grad_norm": 0.17044945061206818, "learning_rate": 0.0015682593371015418, "loss": 2.5344, "step": 404660 }, { "epoch": 0.80619262399592, "grad_norm": 0.13964295387268066, "learning_rate": 0.0015677963443005139, "loss": 2.5182, "step": 404670 }, { "epoch": 0.8062125462195588, "grad_norm": 0.1531052440404892, "learning_rate": 0.0015673338469443213, "loss": 2.5177, "step": 404680 }, { "epoch": 0.8062324684431977, "grad_norm": 0.1545148640871048, "learning_rate": 0.0015668718434458458, "loss": 2.5366, "step": 404690 }, { "epoch": 0.8062523906668366, "grad_norm": 0.16214096546173096, "learning_rate": 0.001566410332226424, "loss": 2.5277, "step": 404700 }, { "epoch": 0.8062723128904756, "grad_norm": 0.17652638256549835, "learning_rate": 0.0015659493117157859, "loss": 2.5303, "step": 404710 }, { "epoch": 0.8062922351141145, "grad_norm": 0.14900349080562592, "learning_rate": 0.0015654887803519912, "loss": 2.5403, "step": 404720 }, { "epoch": 0.8063121573377534, "grad_norm": 0.1448216587305069, "learning_rate": 0.001565028736581369, "loss": 2.5313, "step": 404730 }, { "epoch": 0.8063320795613923, "grad_norm": 0.15683117508888245, "learning_rate": 0.0015645691788584552, "loss": 2.5147, "step": 404740 }, { "epoch": 0.8063520017850312, "grad_norm": 0.1495533585548401, "learning_rate": 0.0015641101056459328, "loss": 2.5287, "step": 404750 }, { "epoch": 0.8063719240086702, "grad_norm": 0.17584402859210968, "learning_rate": 0.0015636515154145714, "loss": 2.5227, "step": 404760 }, { "epoch": 0.8063918462323091, "grad_norm": 0.15908460319042206, "learning_rate": 0.001563193406643169, "loss": 2.513, "step": 404770 }, { "epoch": 0.806411768455948, "grad_norm": 0.1398417204618454, "learning_rate": 0.001562735777818491, "loss": 2.5124, "step": 404780 }, { "epoch": 0.8064316906795869, "grad_norm": 0.16115862131118774, "learning_rate": 0.0015622786274352143, "loss": 2.5139, "step": 404790 }, { "epoch": 0.8064516129032258, "grad_norm": 0.14552059769630432, "learning_rate": 0.001561821953995867, "loss": 2.5244, "step": 404800 }, { "epoch": 0.8064715351268648, "grad_norm": 0.15204565227031708, "learning_rate": 0.0015613657560107738, "loss": 2.5188, "step": 404810 }, { "epoch": 0.8064914573505036, "grad_norm": 0.15722933411598206, "learning_rate": 0.001560910031997997, "loss": 2.5246, "step": 404820 }, { "epoch": 0.8065113795741425, "grad_norm": 0.15105696022510529, "learning_rate": 0.001560454780483282, "loss": 2.5351, "step": 404830 }, { "epoch": 0.8065313017977814, "grad_norm": 0.15387944877147675, "learning_rate": 0.0015600000000000002, "loss": 2.5335, "step": 404840 }, { "epoch": 0.8065512240214203, "grad_norm": 0.14905111491680145, "learning_rate": 0.001559545689089095, "loss": 2.5191, "step": 404850 }, { "epoch": 0.8065711462450593, "grad_norm": 0.12595100700855255, "learning_rate": 0.001559091846299028, "loss": 2.5362, "step": 404860 }, { "epoch": 0.8065910684686982, "grad_norm": 0.1413847804069519, "learning_rate": 0.0015586384701857217, "loss": 2.5262, "step": 404870 }, { "epoch": 0.8066109906923371, "grad_norm": 0.15717296302318573, "learning_rate": 0.0015581855593125096, "loss": 2.5181, "step": 404880 }, { "epoch": 0.806630912915976, "grad_norm": 0.1546061933040619, "learning_rate": 0.0015577331122500805, "loss": 2.5216, "step": 404890 }, { "epoch": 0.806650835139615, "grad_norm": 0.15254074335098267, "learning_rate": 0.0015572811275764268, "loss": 2.5282, "step": 404900 }, { "epoch": 0.8066707573632539, "grad_norm": 0.15371611714363098, "learning_rate": 0.0015568296038767932, "loss": 2.524, "step": 404910 }, { "epoch": 0.8066906795868928, "grad_norm": 0.16548311710357666, "learning_rate": 0.0015563785397436235, "loss": 2.5257, "step": 404920 }, { "epoch": 0.8067106018105317, "grad_norm": 0.1513560712337494, "learning_rate": 0.0015559279337765098, "loss": 2.52, "step": 404930 }, { "epoch": 0.8067305240341706, "grad_norm": 0.15448595583438873, "learning_rate": 0.0015554777845821425, "loss": 2.5508, "step": 404940 }, { "epoch": 0.8067504462578096, "grad_norm": 0.14928005635738373, "learning_rate": 0.0015550280907742603, "loss": 2.5253, "step": 404950 }, { "epoch": 0.8067703684814485, "grad_norm": 0.14366421103477478, "learning_rate": 0.0015545788509735983, "loss": 2.5313, "step": 404960 }, { "epoch": 0.8067902907050873, "grad_norm": 0.1381356120109558, "learning_rate": 0.001554130063807841, "loss": 2.5265, "step": 404970 }, { "epoch": 0.8068102129287262, "grad_norm": 0.18639621138572693, "learning_rate": 0.001553681727911572, "loss": 2.5253, "step": 404980 }, { "epoch": 0.8068301351523651, "grad_norm": 0.14262939989566803, "learning_rate": 0.0015532338419262267, "loss": 2.5255, "step": 404990 }, { "epoch": 0.8068500573760041, "grad_norm": 0.14426636695861816, "learning_rate": 0.0015527864045000422, "loss": 2.5336, "step": 405000 }, { "epoch": 0.806869979599643, "grad_norm": 0.15819764137268066, "learning_rate": 0.0015523394142880123, "loss": 2.5192, "step": 405010 }, { "epoch": 0.8068899018232819, "grad_norm": 0.15210379660129547, "learning_rate": 0.0015518928699518385, "loss": 2.5299, "step": 405020 }, { "epoch": 0.8069098240469208, "grad_norm": 0.12520070374011993, "learning_rate": 0.001551446770159884, "loss": 2.5298, "step": 405030 }, { "epoch": 0.8069297462705597, "grad_norm": 0.14333483576774597, "learning_rate": 0.001551001113587127, "loss": 2.5055, "step": 405040 }, { "epoch": 0.8069496684941987, "grad_norm": 0.13570982217788696, "learning_rate": 0.0015505558989151154, "loss": 2.5493, "step": 405050 }, { "epoch": 0.8069695907178376, "grad_norm": 0.17876489460468292, "learning_rate": 0.0015501111248319205, "loss": 2.5053, "step": 405060 }, { "epoch": 0.8069895129414765, "grad_norm": 0.1779024302959442, "learning_rate": 0.001549666790032092, "loss": 2.5209, "step": 405070 }, { "epoch": 0.8070094351651154, "grad_norm": 0.1700783520936966, "learning_rate": 0.0015492228932166144, "loss": 2.5232, "step": 405080 }, { "epoch": 0.8070293573887543, "grad_norm": 0.13282260298728943, "learning_rate": 0.0015487794330928607, "loss": 2.5027, "step": 405090 }, { "epoch": 0.8070492796123933, "grad_norm": 0.14405451714992523, "learning_rate": 0.0015483364083745514, "loss": 2.5249, "step": 405100 }, { "epoch": 0.8070692018360321, "grad_norm": 0.14529778063297272, "learning_rate": 0.0015478938177817074, "loss": 2.5317, "step": 405110 }, { "epoch": 0.807089124059671, "grad_norm": 0.16622446477413177, "learning_rate": 0.0015474516600406097, "loss": 2.5179, "step": 405120 }, { "epoch": 0.8071090462833099, "grad_norm": 0.14031995832920074, "learning_rate": 0.001547009933883755, "loss": 2.5063, "step": 405130 }, { "epoch": 0.8071289685069488, "grad_norm": 0.14721274375915527, "learning_rate": 0.0015465686380498147, "loss": 2.5296, "step": 405140 }, { "epoch": 0.8071488907305878, "grad_norm": 0.1752372533082962, "learning_rate": 0.0015461277712835913, "loss": 2.5359, "step": 405150 }, { "epoch": 0.8071688129542267, "grad_norm": 0.1386176496744156, "learning_rate": 0.0015456873323359782, "loss": 2.5285, "step": 405160 }, { "epoch": 0.8071887351778656, "grad_norm": 0.16739681363105774, "learning_rate": 0.001545247319963917, "loss": 2.5343, "step": 405170 }, { "epoch": 0.8072086574015045, "grad_norm": 0.14393192529678345, "learning_rate": 0.0015448077329303582, "loss": 2.5203, "step": 405180 }, { "epoch": 0.8072285796251435, "grad_norm": 0.16046687960624695, "learning_rate": 0.0015443685700042193, "loss": 2.5333, "step": 405190 }, { "epoch": 0.8072485018487824, "grad_norm": 0.14269909262657166, "learning_rate": 0.001543929829960345, "loss": 2.5174, "step": 405200 }, { "epoch": 0.8072684240724213, "grad_norm": 0.13366413116455078, "learning_rate": 0.001543491511579467, "loss": 2.524, "step": 405210 }, { "epoch": 0.8072883462960602, "grad_norm": 0.1536739021539688, "learning_rate": 0.0015430536136481657, "loss": 2.5169, "step": 405220 }, { "epoch": 0.8073082685196991, "grad_norm": 0.13915008306503296, "learning_rate": 0.0015426161349588292, "loss": 2.5211, "step": 405230 }, { "epoch": 0.8073281907433381, "grad_norm": 0.17403431236743927, "learning_rate": 0.0015421790743096163, "loss": 2.5217, "step": 405240 }, { "epoch": 0.807348112966977, "grad_norm": 0.14719608426094055, "learning_rate": 0.001541742430504416, "loss": 2.5223, "step": 405250 }, { "epoch": 0.8073680351906158, "grad_norm": 0.13940811157226562, "learning_rate": 0.0015413062023528115, "loss": 2.5225, "step": 405260 }, { "epoch": 0.8073879574142547, "grad_norm": 0.13722939789295197, "learning_rate": 0.0015408703886700401, "loss": 2.5115, "step": 405270 }, { "epoch": 0.8074078796378936, "grad_norm": 0.15839678049087524, "learning_rate": 0.0015404349882769577, "loss": 2.5355, "step": 405280 }, { "epoch": 0.8074278018615326, "grad_norm": 0.1649620234966278, "learning_rate": 0.0015400000000000001, "loss": 2.5311, "step": 405290 }, { "epoch": 0.8074477240851715, "grad_norm": 0.16379845142364502, "learning_rate": 0.0015395654226711467, "loss": 2.5126, "step": 405300 }, { "epoch": 0.8074676463088104, "grad_norm": 0.14117339253425598, "learning_rate": 0.0015391312551278835, "loss": 2.5221, "step": 405310 }, { "epoch": 0.8074875685324493, "grad_norm": 0.1539071649312973, "learning_rate": 0.0015386974962131683, "loss": 2.5289, "step": 405320 }, { "epoch": 0.8075074907560882, "grad_norm": 0.13863873481750488, "learning_rate": 0.0015382641447753923, "loss": 2.5185, "step": 405330 }, { "epoch": 0.8075274129797272, "grad_norm": 0.13526935875415802, "learning_rate": 0.0015378311996683462, "loss": 2.533, "step": 405340 }, { "epoch": 0.8075473352033661, "grad_norm": 0.15889368951320648, "learning_rate": 0.0015373986597511848, "loss": 2.5432, "step": 405350 }, { "epoch": 0.807567257427005, "grad_norm": 0.16591043770313263, "learning_rate": 0.001536966523888391, "loss": 2.5333, "step": 405360 }, { "epoch": 0.8075871796506439, "grad_norm": 0.16340656578540802, "learning_rate": 0.0015365347909497415, "loss": 2.5207, "step": 405370 }, { "epoch": 0.8076071018742828, "grad_norm": 0.14483694732189178, "learning_rate": 0.001536103459810272, "loss": 2.534, "step": 405380 }, { "epoch": 0.8076270240979218, "grad_norm": 0.1565070003271103, "learning_rate": 0.001535672529350244, "loss": 2.5231, "step": 405390 }, { "epoch": 0.8076469463215606, "grad_norm": 0.14457125961780548, "learning_rate": 0.00153524199845511, "loss": 2.5256, "step": 405400 }, { "epoch": 0.8076668685451995, "grad_norm": 0.16656456887722015, "learning_rate": 0.0015348118660154797, "loss": 2.5303, "step": 405410 }, { "epoch": 0.8076867907688384, "grad_norm": 0.14919495582580566, "learning_rate": 0.0015343821309270872, "loss": 2.5246, "step": 405420 }, { "epoch": 0.8077067129924773, "grad_norm": 0.1491183489561081, "learning_rate": 0.0015339527920907583, "loss": 2.5326, "step": 405430 }, { "epoch": 0.8077266352161163, "grad_norm": 0.1472022384405136, "learning_rate": 0.0015335238484123761, "loss": 2.5275, "step": 405440 }, { "epoch": 0.8077465574397552, "grad_norm": 0.16623903810977936, "learning_rate": 0.00153309529880285, "loss": 2.5205, "step": 405450 }, { "epoch": 0.8077664796633941, "grad_norm": 0.16462968289852142, "learning_rate": 0.001532667142178083, "loss": 2.5169, "step": 405460 }, { "epoch": 0.807786401887033, "grad_norm": 0.17243967950344086, "learning_rate": 0.00153223937745894, "loss": 2.5266, "step": 405470 }, { "epoch": 0.807806324110672, "grad_norm": 0.13517554104328156, "learning_rate": 0.0015318120035712152, "loss": 2.5431, "step": 405480 }, { "epoch": 0.8078262463343109, "grad_norm": 0.1499304622411728, "learning_rate": 0.0015313850194456006, "loss": 2.5331, "step": 405490 }, { "epoch": 0.8078461685579498, "grad_norm": 0.13176684081554413, "learning_rate": 0.0015309584240176572, "loss": 2.512, "step": 405500 }, { "epoch": 0.8078660907815887, "grad_norm": 0.1530952900648117, "learning_rate": 0.0015305322162277799, "loss": 2.5159, "step": 405510 }, { "epoch": 0.8078860130052276, "grad_norm": 0.1601148098707199, "learning_rate": 0.001530106395021171, "loss": 2.5205, "step": 405520 }, { "epoch": 0.8079059352288666, "grad_norm": 0.16111819446086884, "learning_rate": 0.0015296809593478061, "loss": 2.5219, "step": 405530 }, { "epoch": 0.8079258574525054, "grad_norm": 0.13422541320323944, "learning_rate": 0.001529255908162407, "loss": 2.5191, "step": 405540 }, { "epoch": 0.8079457796761443, "grad_norm": 0.17997071146965027, "learning_rate": 0.0015288312404244102, "loss": 2.5256, "step": 405550 }, { "epoch": 0.8079657018997832, "grad_norm": 0.1621672362089157, "learning_rate": 0.0015284069550979362, "loss": 2.518, "step": 405560 }, { "epoch": 0.8079856241234221, "grad_norm": 0.14823749661445618, "learning_rate": 0.0015279830511517622, "loss": 2.5231, "step": 405570 }, { "epoch": 0.8080055463470611, "grad_norm": 0.1530391424894333, "learning_rate": 0.0015275595275592913, "loss": 2.5119, "step": 405580 }, { "epoch": 0.8080254685707, "grad_norm": 0.13742245733737946, "learning_rate": 0.0015271363832985245, "loss": 2.5236, "step": 405590 }, { "epoch": 0.8080453907943389, "grad_norm": 0.1794224977493286, "learning_rate": 0.001526713617352031, "loss": 2.521, "step": 405600 }, { "epoch": 0.8080653130179778, "grad_norm": 0.15097495913505554, "learning_rate": 0.0015262912287069197, "loss": 2.5312, "step": 405610 }, { "epoch": 0.8080852352416167, "grad_norm": 0.15401999652385712, "learning_rate": 0.0015258692163548121, "loss": 2.5207, "step": 405620 }, { "epoch": 0.8081051574652557, "grad_norm": 0.13267263770103455, "learning_rate": 0.0015254475792918132, "loss": 2.5269, "step": 405630 }, { "epoch": 0.8081250796888946, "grad_norm": 0.13014505803585052, "learning_rate": 0.0015250263165184834, "loss": 2.5139, "step": 405640 }, { "epoch": 0.8081450019125335, "grad_norm": 0.18090347945690155, "learning_rate": 0.0015246054270398116, "loss": 2.5126, "step": 405650 }, { "epoch": 0.8081649241361724, "grad_norm": 0.18106521666049957, "learning_rate": 0.0015241849098651871, "loss": 2.5286, "step": 405660 }, { "epoch": 0.8081848463598112, "grad_norm": 0.1616157591342926, "learning_rate": 0.0015237647640083739, "loss": 2.5267, "step": 405670 }, { "epoch": 0.8082047685834503, "grad_norm": 0.14229744672775269, "learning_rate": 0.0015233449884874805, "loss": 2.5254, "step": 405680 }, { "epoch": 0.8082246908070891, "grad_norm": 0.14616569876670837, "learning_rate": 0.0015229255823249375, "loss": 2.5435, "step": 405690 }, { "epoch": 0.808244613030728, "grad_norm": 0.19235071539878845, "learning_rate": 0.001522506544547467, "loss": 2.5263, "step": 405700 }, { "epoch": 0.8082645352543669, "grad_norm": 0.14887522161006927, "learning_rate": 0.0015220878741860591, "loss": 2.5125, "step": 405710 }, { "epoch": 0.8082844574780058, "grad_norm": 0.14623574912548065, "learning_rate": 0.0015216695702759439, "loss": 2.5336, "step": 405720 }, { "epoch": 0.8083043797016448, "grad_norm": 0.1589062362909317, "learning_rate": 0.001521251631856567, "loss": 2.5328, "step": 405730 }, { "epoch": 0.8083243019252837, "grad_norm": 0.1555103063583374, "learning_rate": 0.0015208340579715624, "loss": 2.5147, "step": 405740 }, { "epoch": 0.8083442241489226, "grad_norm": 0.14954765141010284, "learning_rate": 0.001520416847668728, "loss": 2.5153, "step": 405750 }, { "epoch": 0.8083641463725615, "grad_norm": 0.24124273657798767, "learning_rate": 0.00152, "loss": 2.5282, "step": 405760 }, { "epoch": 0.8083840685962004, "grad_norm": 0.14841607213020325, "learning_rate": 0.0015195835140214276, "loss": 2.5146, "step": 405770 }, { "epoch": 0.8084039908198394, "grad_norm": 0.14596320688724518, "learning_rate": 0.001519167388793148, "loss": 2.5221, "step": 405780 }, { "epoch": 0.8084239130434783, "grad_norm": 0.19645261764526367, "learning_rate": 0.0015187516233793614, "loss": 2.5235, "step": 405790 }, { "epoch": 0.8084438352671172, "grad_norm": 0.13135820627212524, "learning_rate": 0.001518336216848308, "loss": 2.5244, "step": 405800 }, { "epoch": 0.808463757490756, "grad_norm": 0.15145520865917206, "learning_rate": 0.001517921168272242, "loss": 2.53, "step": 405810 }, { "epoch": 0.808483679714395, "grad_norm": 0.1378210335969925, "learning_rate": 0.0015175064767274074, "loss": 2.5199, "step": 405820 }, { "epoch": 0.8085036019380339, "grad_norm": 0.17966905236244202, "learning_rate": 0.0015170921412940147, "loss": 2.5189, "step": 405830 }, { "epoch": 0.8085235241616728, "grad_norm": 0.16239047050476074, "learning_rate": 0.001516678161056217, "loss": 2.5087, "step": 405840 }, { "epoch": 0.8085434463853117, "grad_norm": 0.14358296990394592, "learning_rate": 0.0015162645351020872, "loss": 2.511, "step": 405850 }, { "epoch": 0.8085633686089506, "grad_norm": 0.1564370095729828, "learning_rate": 0.0015158512625235918, "loss": 2.5167, "step": 405860 }, { "epoch": 0.8085832908325896, "grad_norm": 0.15484106540679932, "learning_rate": 0.0015154383424165713, "loss": 2.526, "step": 405870 }, { "epoch": 0.8086032130562285, "grad_norm": 0.15546078979969025, "learning_rate": 0.0015150257738807142, "loss": 2.5104, "step": 405880 }, { "epoch": 0.8086231352798674, "grad_norm": 0.18981584906578064, "learning_rate": 0.0015146135560195361, "loss": 2.5275, "step": 405890 }, { "epoch": 0.8086430575035063, "grad_norm": 0.1329488605260849, "learning_rate": 0.0015142016879403552, "loss": 2.5244, "step": 405900 }, { "epoch": 0.8086629797271452, "grad_norm": 0.17071513831615448, "learning_rate": 0.0015137901687542713, "loss": 2.5179, "step": 405910 }, { "epoch": 0.8086829019507842, "grad_norm": 0.1672687530517578, "learning_rate": 0.0015133789975761425, "loss": 2.5337, "step": 405920 }, { "epoch": 0.8087028241744231, "grad_norm": 0.14356397092342377, "learning_rate": 0.0015129681735245634, "loss": 2.52, "step": 405930 }, { "epoch": 0.808722746398062, "grad_norm": 0.1309930384159088, "learning_rate": 0.0015125576957218426, "loss": 2.5008, "step": 405940 }, { "epoch": 0.8087426686217009, "grad_norm": 0.1525850147008896, "learning_rate": 0.0015121475632939815, "loss": 2.5117, "step": 405950 }, { "epoch": 0.8087625908453397, "grad_norm": 0.13608254492282867, "learning_rate": 0.0015117377753706518, "loss": 2.5172, "step": 405960 }, { "epoch": 0.8087825130689787, "grad_norm": 0.15202686190605164, "learning_rate": 0.0015113283310851753, "loss": 2.523, "step": 405970 }, { "epoch": 0.8088024352926176, "grad_norm": 0.16987162828445435, "learning_rate": 0.0015109192295745006, "loss": 2.5152, "step": 405980 }, { "epoch": 0.8088223575162565, "grad_norm": 0.1522826999425888, "learning_rate": 0.0015105104699791835, "loss": 2.5224, "step": 405990 }, { "epoch": 0.8088422797398954, "grad_norm": 0.16502727568149567, "learning_rate": 0.0015101020514433645, "loss": 2.5348, "step": 406000 }, { "epoch": 0.8088622019635343, "grad_norm": 0.169258251786232, "learning_rate": 0.0015096939731147497, "loss": 2.5203, "step": 406010 }, { "epoch": 0.8088821241871733, "grad_norm": 0.1449711173772812, "learning_rate": 0.0015092862341445881, "loss": 2.5339, "step": 406020 }, { "epoch": 0.8089020464108122, "grad_norm": 0.129799947142601, "learning_rate": 0.0015088788336876532, "loss": 2.5045, "step": 406030 }, { "epoch": 0.8089219686344511, "grad_norm": 0.14321978390216827, "learning_rate": 0.0015084717709022198, "loss": 2.5189, "step": 406040 }, { "epoch": 0.80894189085809, "grad_norm": 0.1384783834218979, "learning_rate": 0.0015080650449500463, "loss": 2.5137, "step": 406050 }, { "epoch": 0.8089618130817289, "grad_norm": 0.15002942085266113, "learning_rate": 0.0015076586549963532, "loss": 2.5262, "step": 406060 }, { "epoch": 0.8089817353053679, "grad_norm": 0.15933191776275635, "learning_rate": 0.0015072526002098032, "loss": 2.5279, "step": 406070 }, { "epoch": 0.8090016575290068, "grad_norm": 0.15870977938175201, "learning_rate": 0.0015068468797624819, "loss": 2.5225, "step": 406080 }, { "epoch": 0.8090215797526457, "grad_norm": 0.1423741728067398, "learning_rate": 0.0015064414928298772, "loss": 2.5139, "step": 406090 }, { "epoch": 0.8090415019762845, "grad_norm": 0.14929114282131195, "learning_rate": 0.0015060364385908614, "loss": 2.513, "step": 406100 }, { "epoch": 0.8090614241999236, "grad_norm": 0.1420724242925644, "learning_rate": 0.0015056317162276689, "loss": 2.5201, "step": 406110 }, { "epoch": 0.8090813464235624, "grad_norm": 0.1490645706653595, "learning_rate": 0.0015052273249258807, "loss": 2.512, "step": 406120 }, { "epoch": 0.8091012686472013, "grad_norm": 0.15468940138816833, "learning_rate": 0.0015048232638744021, "loss": 2.5206, "step": 406130 }, { "epoch": 0.8091211908708402, "grad_norm": 0.15857037901878357, "learning_rate": 0.0015044195322654452, "loss": 2.5278, "step": 406140 }, { "epoch": 0.8091411130944791, "grad_norm": 0.15421396493911743, "learning_rate": 0.0015040161292945102, "loss": 2.5368, "step": 406150 }, { "epoch": 0.8091610353181181, "grad_norm": 0.13370683789253235, "learning_rate": 0.0015036130541603656, "loss": 2.5266, "step": 406160 }, { "epoch": 0.809180957541757, "grad_norm": 0.17055553197860718, "learning_rate": 0.0015032103060650312, "loss": 2.5211, "step": 406170 }, { "epoch": 0.8092008797653959, "grad_norm": 0.14788246154785156, "learning_rate": 0.001502807884213758, "loss": 2.5329, "step": 406180 }, { "epoch": 0.8092208019890348, "grad_norm": 0.1279459148645401, "learning_rate": 0.001502405787815011, "loss": 2.5257, "step": 406190 }, { "epoch": 0.8092407242126737, "grad_norm": 0.15908844769001007, "learning_rate": 0.0015020040160804507, "loss": 2.515, "step": 406200 }, { "epoch": 0.8092606464363127, "grad_norm": 0.14598949253559113, "learning_rate": 0.0015016025682249156, "loss": 2.5344, "step": 406210 }, { "epoch": 0.8092805686599516, "grad_norm": 0.1560278683900833, "learning_rate": 0.0015012014434664029, "loss": 2.5209, "step": 406220 }, { "epoch": 0.8093004908835905, "grad_norm": 0.1661209762096405, "learning_rate": 0.0015008006410260526, "loss": 2.5109, "step": 406230 }, { "epoch": 0.8093204131072294, "grad_norm": 0.14738202095031738, "learning_rate": 0.001500400160128128, "loss": 2.5252, "step": 406240 }, { "epoch": 0.8093403353308682, "grad_norm": 0.14838267862796783, "learning_rate": 0.0015, "loss": 2.5316, "step": 406250 }, { "epoch": 0.8093602575545072, "grad_norm": 0.18667949736118317, "learning_rate": 0.001499600159872128, "loss": 2.5282, "step": 406260 }, { "epoch": 0.8093801797781461, "grad_norm": 0.15787729620933533, "learning_rate": 0.0014992006389780434, "loss": 2.5202, "step": 406270 }, { "epoch": 0.809400102001785, "grad_norm": 0.16745099425315857, "learning_rate": 0.0014988014365543334, "loss": 2.5247, "step": 406280 }, { "epoch": 0.8094200242254239, "grad_norm": 0.13821086287498474, "learning_rate": 0.0014984025518406218, "loss": 2.5132, "step": 406290 }, { "epoch": 0.8094399464490628, "grad_norm": 0.13288161158561707, "learning_rate": 0.0014980039840795547, "loss": 2.5098, "step": 406300 }, { "epoch": 0.8094598686727018, "grad_norm": 0.13780973851680756, "learning_rate": 0.0014976057325167811, "loss": 2.5227, "step": 406310 }, { "epoch": 0.8094797908963407, "grad_norm": 0.1297537237405777, "learning_rate": 0.0014972077964009384, "loss": 2.5182, "step": 406320 }, { "epoch": 0.8094997131199796, "grad_norm": 0.14667584002017975, "learning_rate": 0.0014968101749836351, "loss": 2.5145, "step": 406330 }, { "epoch": 0.8095196353436185, "grad_norm": 0.16383539140224457, "learning_rate": 0.0014964128675194332, "loss": 2.5211, "step": 406340 }, { "epoch": 0.8095395575672574, "grad_norm": 0.13295769691467285, "learning_rate": 0.0014960158732658338, "loss": 2.5338, "step": 406350 }, { "epoch": 0.8095594797908964, "grad_norm": 0.15100811421871185, "learning_rate": 0.0014956191914832604, "loss": 2.5218, "step": 406360 }, { "epoch": 0.8095794020145353, "grad_norm": 0.1401727944612503, "learning_rate": 0.0014952228214350413, "loss": 2.5162, "step": 406370 }, { "epoch": 0.8095993242381742, "grad_norm": 0.1824701875448227, "learning_rate": 0.0014948267623873966, "loss": 2.5344, "step": 406380 }, { "epoch": 0.809619246461813, "grad_norm": 0.14474736154079437, "learning_rate": 0.0014944310136094186, "loss": 2.5284, "step": 406390 }, { "epoch": 0.809639168685452, "grad_norm": 0.15539000928401947, "learning_rate": 0.0014940355743730594, "loss": 2.5163, "step": 406400 }, { "epoch": 0.8096590909090909, "grad_norm": 0.1528388410806656, "learning_rate": 0.0014936404439531135, "loss": 2.5329, "step": 406410 }, { "epoch": 0.8096790131327298, "grad_norm": 0.18642409145832062, "learning_rate": 0.001493245621627203, "loss": 2.5146, "step": 406420 }, { "epoch": 0.8096989353563687, "grad_norm": 0.14324317872524261, "learning_rate": 0.0014928511066757614, "loss": 2.521, "step": 406430 }, { "epoch": 0.8097188575800076, "grad_norm": 0.13232925534248352, "learning_rate": 0.0014924568983820192, "loss": 2.5253, "step": 406440 }, { "epoch": 0.8097387798036466, "grad_norm": 0.1410466879606247, "learning_rate": 0.0014920629960319882, "loss": 2.5177, "step": 406450 }, { "epoch": 0.8097587020272855, "grad_norm": 0.1367514431476593, "learning_rate": 0.0014916693989144468, "loss": 2.5279, "step": 406460 }, { "epoch": 0.8097786242509244, "grad_norm": 0.16532893478870392, "learning_rate": 0.001491276106320924, "loss": 2.5167, "step": 406470 }, { "epoch": 0.8097985464745633, "grad_norm": 0.14396481215953827, "learning_rate": 0.0014908831175456858, "loss": 2.5232, "step": 406480 }, { "epoch": 0.8098184686982022, "grad_norm": 0.14559920132160187, "learning_rate": 0.0014904904318857202, "loss": 2.5378, "step": 406490 }, { "epoch": 0.8098383909218412, "grad_norm": 0.15056177973747253, "learning_rate": 0.0014900980486407215, "loss": 2.5255, "step": 406500 }, { "epoch": 0.8098583131454801, "grad_norm": 0.15216940641403198, "learning_rate": 0.0014897059671130771, "loss": 2.5386, "step": 406510 }, { "epoch": 0.809878235369119, "grad_norm": 0.15851502120494843, "learning_rate": 0.0014893141866078518, "loss": 2.5272, "step": 406520 }, { "epoch": 0.8098981575927579, "grad_norm": 0.16973945498466492, "learning_rate": 0.0014889227064327746, "loss": 2.5058, "step": 406530 }, { "epoch": 0.8099180798163967, "grad_norm": 0.14633134007453918, "learning_rate": 0.0014885315258982234, "loss": 2.5128, "step": 406540 }, { "epoch": 0.8099380020400357, "grad_norm": 0.17269651591777802, "learning_rate": 0.001488140644317211, "loss": 2.5327, "step": 406550 }, { "epoch": 0.8099579242636746, "grad_norm": 0.13309189677238464, "learning_rate": 0.0014877500610053721, "loss": 2.5163, "step": 406560 }, { "epoch": 0.8099778464873135, "grad_norm": 0.16278810799121857, "learning_rate": 0.0014873597752809483, "loss": 2.5138, "step": 406570 }, { "epoch": 0.8099977687109524, "grad_norm": 0.15599770843982697, "learning_rate": 0.0014869697864647736, "loss": 2.5138, "step": 406580 }, { "epoch": 0.8100176909345913, "grad_norm": 0.1701647788286209, "learning_rate": 0.0014865800938802627, "loss": 2.5292, "step": 406590 }, { "epoch": 0.8100376131582303, "grad_norm": 0.14205420017242432, "learning_rate": 0.0014861906968533947, "loss": 2.5231, "step": 406600 }, { "epoch": 0.8100575353818692, "grad_norm": 0.14001254737377167, "learning_rate": 0.0014858015947127024, "loss": 2.5098, "step": 406610 }, { "epoch": 0.8100774576055081, "grad_norm": 0.1415794938802719, "learning_rate": 0.0014854127867892557, "loss": 2.5226, "step": 406620 }, { "epoch": 0.810097379829147, "grad_norm": 0.16022507846355438, "learning_rate": 0.0014850242724166508, "loss": 2.5216, "step": 406630 }, { "epoch": 0.8101173020527859, "grad_norm": 0.129188671708107, "learning_rate": 0.001484636050930995, "loss": 2.5276, "step": 406640 }, { "epoch": 0.8101372242764249, "grad_norm": 0.16384637355804443, "learning_rate": 0.001484248121670895, "loss": 2.5222, "step": 406650 }, { "epoch": 0.8101571465000638, "grad_norm": 0.1556120663881302, "learning_rate": 0.0014838604839774426, "loss": 2.5166, "step": 406660 }, { "epoch": 0.8101770687237027, "grad_norm": 0.14601074159145355, "learning_rate": 0.0014834731371942019, "loss": 2.5231, "step": 406670 }, { "epoch": 0.8101969909473415, "grad_norm": 0.1529528796672821, "learning_rate": 0.0014830860806671967, "loss": 2.5357, "step": 406680 }, { "epoch": 0.8102169131709805, "grad_norm": 0.1631389707326889, "learning_rate": 0.0014826993137448973, "loss": 2.5218, "step": 406690 }, { "epoch": 0.8102368353946194, "grad_norm": 0.15807010233402252, "learning_rate": 0.0014823128357782087, "loss": 2.5104, "step": 406700 }, { "epoch": 0.8102567576182583, "grad_norm": 0.1755693107843399, "learning_rate": 0.0014819266461204553, "loss": 2.5211, "step": 406710 }, { "epoch": 0.8102766798418972, "grad_norm": 0.1374850571155548, "learning_rate": 0.0014815407441273712, "loss": 2.5202, "step": 406720 }, { "epoch": 0.8102966020655361, "grad_norm": 0.1479365974664688, "learning_rate": 0.0014811551291570861, "loss": 2.51, "step": 406730 }, { "epoch": 0.8103165242891751, "grad_norm": 0.14495916664600372, "learning_rate": 0.0014807698005701133, "loss": 2.5251, "step": 406740 }, { "epoch": 0.810336446512814, "grad_norm": 0.1507270634174347, "learning_rate": 0.0014803847577293367, "loss": 2.5122, "step": 406750 }, { "epoch": 0.8103563687364529, "grad_norm": 0.1658211499452591, "learning_rate": 0.00148, "loss": 2.518, "step": 406760 }, { "epoch": 0.8103762909600918, "grad_norm": 0.16517218947410583, "learning_rate": 0.0014796155267496928, "loss": 2.5354, "step": 406770 }, { "epoch": 0.8103962131837307, "grad_norm": 0.17263761162757874, "learning_rate": 0.0014792313373483386, "loss": 2.525, "step": 406780 }, { "epoch": 0.8104161354073697, "grad_norm": 0.13403569161891937, "learning_rate": 0.0014788474311681849, "loss": 2.518, "step": 406790 }, { "epoch": 0.8104360576310086, "grad_norm": 0.14351162314414978, "learning_rate": 0.001478463807583788, "loss": 2.5146, "step": 406800 }, { "epoch": 0.8104559798546475, "grad_norm": 0.14483694732189178, "learning_rate": 0.0014780804659720044, "loss": 2.5149, "step": 406810 }, { "epoch": 0.8104759020782863, "grad_norm": 0.1523008793592453, "learning_rate": 0.001477697405711976, "loss": 2.5125, "step": 406820 }, { "epoch": 0.8104958243019252, "grad_norm": 0.14660140872001648, "learning_rate": 0.0014773146261851209, "loss": 2.5182, "step": 406830 }, { "epoch": 0.8105157465255642, "grad_norm": 0.2150389850139618, "learning_rate": 0.0014769321267751193, "loss": 2.5164, "step": 406840 }, { "epoch": 0.8105356687492031, "grad_norm": 0.1353447437286377, "learning_rate": 0.001476549906867904, "loss": 2.5164, "step": 406850 }, { "epoch": 0.810555590972842, "grad_norm": 0.16487453877925873, "learning_rate": 0.0014761679658516482, "loss": 2.5297, "step": 406860 }, { "epoch": 0.8105755131964809, "grad_norm": 0.13933642208576202, "learning_rate": 0.0014757863031167537, "loss": 2.5108, "step": 406870 }, { "epoch": 0.8105954354201198, "grad_norm": 0.15097549557685852, "learning_rate": 0.00147540491805584, "loss": 2.5109, "step": 406880 }, { "epoch": 0.8106153576437588, "grad_norm": 0.20919422805309296, "learning_rate": 0.0014750238100637325, "loss": 2.5217, "step": 406890 }, { "epoch": 0.8106352798673977, "grad_norm": 0.15981999039649963, "learning_rate": 0.0014746429785374522, "loss": 2.5045, "step": 406900 }, { "epoch": 0.8106552020910366, "grad_norm": 0.1497834324836731, "learning_rate": 0.0014742624228762033, "loss": 2.5154, "step": 406910 }, { "epoch": 0.8106751243146755, "grad_norm": 0.16234222054481506, "learning_rate": 0.0014738821424813637, "loss": 2.5305, "step": 406920 }, { "epoch": 0.8106950465383144, "grad_norm": 0.1518496423959732, "learning_rate": 0.0014735021367564727, "loss": 2.5206, "step": 406930 }, { "epoch": 0.8107149687619534, "grad_norm": 0.1549660712480545, "learning_rate": 0.0014731224051072202, "loss": 2.5202, "step": 406940 }, { "epoch": 0.8107348909855923, "grad_norm": 0.16809600591659546, "learning_rate": 0.0014727429469414373, "loss": 2.5188, "step": 406950 }, { "epoch": 0.8107548132092312, "grad_norm": 0.16411066055297852, "learning_rate": 0.0014723637616690832, "loss": 2.5125, "step": 406960 }, { "epoch": 0.81077473543287, "grad_norm": 0.14397893846035004, "learning_rate": 0.0014719848487022365, "loss": 2.5249, "step": 406970 }, { "epoch": 0.810794657656509, "grad_norm": 0.13493257761001587, "learning_rate": 0.0014716062074550838, "loss": 2.5228, "step": 406980 }, { "epoch": 0.8108145798801479, "grad_norm": 0.14782357215881348, "learning_rate": 0.0014712278373439087, "loss": 2.5187, "step": 406990 }, { "epoch": 0.8108345021037868, "grad_norm": 0.14180609583854675, "learning_rate": 0.001470849737787082, "loss": 2.5142, "step": 407000 }, { "epoch": 0.8108544243274257, "grad_norm": 0.15011848509311676, "learning_rate": 0.001470471908205051, "loss": 2.5308, "step": 407010 }, { "epoch": 0.8108743465510646, "grad_norm": 0.1458420753479004, "learning_rate": 0.0014700943480203293, "loss": 2.5095, "step": 407020 }, { "epoch": 0.8108942687747036, "grad_norm": 0.13804399967193604, "learning_rate": 0.001469717056657486, "loss": 2.5113, "step": 407030 }, { "epoch": 0.8109141909983425, "grad_norm": 0.14278219640254974, "learning_rate": 0.0014693400335431361, "loss": 2.5245, "step": 407040 }, { "epoch": 0.8109341132219814, "grad_norm": 0.15795984864234924, "learning_rate": 0.00146896327810593, "loss": 2.5125, "step": 407050 }, { "epoch": 0.8109540354456203, "grad_norm": 0.14065095782279968, "learning_rate": 0.001468586789776543, "loss": 2.5181, "step": 407060 }, { "epoch": 0.8109739576692592, "grad_norm": 0.14271879196166992, "learning_rate": 0.0014682105679876666, "loss": 2.5212, "step": 407070 }, { "epoch": 0.8109938798928982, "grad_norm": 0.14829783141613007, "learning_rate": 0.0014678346121739972, "loss": 2.536, "step": 407080 }, { "epoch": 0.8110138021165371, "grad_norm": 0.17397890985012054, "learning_rate": 0.001467458921772226, "loss": 2.5208, "step": 407090 }, { "epoch": 0.811033724340176, "grad_norm": 0.1531408429145813, "learning_rate": 0.0014670834962210312, "loss": 2.5195, "step": 407100 }, { "epoch": 0.8110536465638148, "grad_norm": 0.16193027794361115, "learning_rate": 0.001466708334961065, "loss": 2.5307, "step": 407110 }, { "epoch": 0.8110735687874537, "grad_norm": 0.14310304820537567, "learning_rate": 0.0014663334374349466, "loss": 2.5031, "step": 407120 }, { "epoch": 0.8110934910110927, "grad_norm": 0.13968421518802643, "learning_rate": 0.0014659588030872524, "loss": 2.5187, "step": 407130 }, { "epoch": 0.8111134132347316, "grad_norm": 0.1845170259475708, "learning_rate": 0.0014655844313645045, "loss": 2.536, "step": 407140 }, { "epoch": 0.8111333354583705, "grad_norm": 0.13902057707309723, "learning_rate": 0.0014652103217151625, "loss": 2.5189, "step": 407150 }, { "epoch": 0.8111532576820094, "grad_norm": 0.1473570019006729, "learning_rate": 0.0014648364735896139, "loss": 2.5351, "step": 407160 }, { "epoch": 0.8111731799056483, "grad_norm": 0.15756238996982574, "learning_rate": 0.0014644628864401645, "loss": 2.5154, "step": 407170 }, { "epoch": 0.8111931021292873, "grad_norm": 0.15228617191314697, "learning_rate": 0.0014640895597210295, "loss": 2.5452, "step": 407180 }, { "epoch": 0.8112130243529262, "grad_norm": 0.14835578203201294, "learning_rate": 0.0014637164928883231, "loss": 2.5202, "step": 407190 }, { "epoch": 0.8112329465765651, "grad_norm": 0.18762871623039246, "learning_rate": 0.0014633436854000505, "loss": 2.5336, "step": 407200 }, { "epoch": 0.811252868800204, "grad_norm": 0.14979691803455353, "learning_rate": 0.001462971136716098, "loss": 2.5171, "step": 407210 }, { "epoch": 0.8112727910238429, "grad_norm": 0.1426854282617569, "learning_rate": 0.0014625988462982238, "loss": 2.5219, "step": 407220 }, { "epoch": 0.8112927132474819, "grad_norm": 0.14382293820381165, "learning_rate": 0.0014622268136100498, "loss": 2.521, "step": 407230 }, { "epoch": 0.8113126354711208, "grad_norm": 0.14891131222248077, "learning_rate": 0.0014618550381170516, "loss": 2.5338, "step": 407240 }, { "epoch": 0.8113325576947596, "grad_norm": 0.13899262249469757, "learning_rate": 0.0014614835192865497, "loss": 2.523, "step": 407250 }, { "epoch": 0.8113524799183985, "grad_norm": 0.16405652463436127, "learning_rate": 0.0014611122565877008, "loss": 2.5239, "step": 407260 }, { "epoch": 0.8113724021420375, "grad_norm": 0.18179526925086975, "learning_rate": 0.0014607412494914893, "loss": 2.5216, "step": 407270 }, { "epoch": 0.8113923243656764, "grad_norm": 0.13966132700443268, "learning_rate": 0.0014603704974707184, "loss": 2.5137, "step": 407280 }, { "epoch": 0.8114122465893153, "grad_norm": 0.1377115100622177, "learning_rate": 0.00146, "loss": 2.5404, "step": 407290 }, { "epoch": 0.8114321688129542, "grad_norm": 0.12645302712917328, "learning_rate": 0.0014596297565557484, "loss": 2.5185, "step": 407300 }, { "epoch": 0.8114520910365931, "grad_norm": 0.1768086850643158, "learning_rate": 0.001459259766616169, "loss": 2.5149, "step": 407310 }, { "epoch": 0.8114720132602321, "grad_norm": 0.16100512444972992, "learning_rate": 0.0014588900296612529, "loss": 2.539, "step": 407320 }, { "epoch": 0.811491935483871, "grad_norm": 0.14016126096248627, "learning_rate": 0.0014585205451727648, "loss": 2.5208, "step": 407330 }, { "epoch": 0.8115118577075099, "grad_norm": 0.135379821062088, "learning_rate": 0.0014581513126342375, "loss": 2.5247, "step": 407340 }, { "epoch": 0.8115317799311488, "grad_norm": 0.1605091243982315, "learning_rate": 0.0014577823315309617, "loss": 2.5122, "step": 407350 }, { "epoch": 0.8115517021547877, "grad_norm": 0.12863264977931976, "learning_rate": 0.0014574136013499786, "loss": 2.5263, "step": 407360 }, { "epoch": 0.8115716243784267, "grad_norm": 0.142963245511055, "learning_rate": 0.0014570451215800708, "loss": 2.5238, "step": 407370 }, { "epoch": 0.8115915466020656, "grad_norm": 0.14703279733657837, "learning_rate": 0.001456676891711755, "loss": 2.5098, "step": 407380 }, { "epoch": 0.8116114688257045, "grad_norm": 0.17332226037979126, "learning_rate": 0.0014563089112372726, "loss": 2.5208, "step": 407390 }, { "epoch": 0.8116313910493433, "grad_norm": 0.1662967950105667, "learning_rate": 0.001455941179650582, "loss": 2.5158, "step": 407400 }, { "epoch": 0.8116513132729822, "grad_norm": 0.13819092512130737, "learning_rate": 0.001455573696447352, "loss": 2.5304, "step": 407410 }, { "epoch": 0.8116712354966212, "grad_norm": 0.15184181928634644, "learning_rate": 0.0014552064611249506, "loss": 2.5302, "step": 407420 }, { "epoch": 0.8116911577202601, "grad_norm": 0.18527117371559143, "learning_rate": 0.0014548394731824397, "loss": 2.517, "step": 407430 }, { "epoch": 0.811711079943899, "grad_norm": 0.1482732594013214, "learning_rate": 0.001454472732120566, "loss": 2.5282, "step": 407440 }, { "epoch": 0.8117310021675379, "grad_norm": 0.17671777307987213, "learning_rate": 0.0014541062374417528, "loss": 2.5204, "step": 407450 }, { "epoch": 0.8117509243911768, "grad_norm": 0.1430862694978714, "learning_rate": 0.0014537399886500935, "loss": 2.5073, "step": 407460 }, { "epoch": 0.8117708466148158, "grad_norm": 0.1538131833076477, "learning_rate": 0.001453373985251342, "loss": 2.515, "step": 407470 }, { "epoch": 0.8117907688384547, "grad_norm": 0.15565170347690582, "learning_rate": 0.0014530082267529064, "loss": 2.5218, "step": 407480 }, { "epoch": 0.8118106910620936, "grad_norm": 0.14260238409042358, "learning_rate": 0.0014526427126638397, "loss": 2.5276, "step": 407490 }, { "epoch": 0.8118306132857325, "grad_norm": 0.14695404469966888, "learning_rate": 0.001452277442494834, "loss": 2.5126, "step": 407500 }, { "epoch": 0.8118505355093714, "grad_norm": 0.15058688819408417, "learning_rate": 0.0014519124157582111, "loss": 2.5282, "step": 407510 }, { "epoch": 0.8118704577330104, "grad_norm": 0.15498259663581848, "learning_rate": 0.0014515476319679164, "loss": 2.5306, "step": 407520 }, { "epoch": 0.8118903799566493, "grad_norm": 0.22217845916748047, "learning_rate": 0.0014511830906395102, "loss": 2.5417, "step": 407530 }, { "epoch": 0.8119103021802881, "grad_norm": 0.1347903460264206, "learning_rate": 0.001450818791290161, "loss": 2.5238, "step": 407540 }, { "epoch": 0.811930224403927, "grad_norm": 0.17673085629940033, "learning_rate": 0.0014504547334386368, "loss": 2.52, "step": 407550 }, { "epoch": 0.8119501466275659, "grad_norm": 0.1381736844778061, "learning_rate": 0.001450090916605299, "loss": 2.504, "step": 407560 }, { "epoch": 0.8119700688512049, "grad_norm": 0.16022783517837524, "learning_rate": 0.0014497273403120958, "loss": 2.527, "step": 407570 }, { "epoch": 0.8119899910748438, "grad_norm": 0.17167986929416656, "learning_rate": 0.0014493640040825516, "loss": 2.5091, "step": 407580 }, { "epoch": 0.8120099132984827, "grad_norm": 0.15783536434173584, "learning_rate": 0.0014490009074417636, "loss": 2.5311, "step": 407590 }, { "epoch": 0.8120298355221216, "grad_norm": 0.1882094293832779, "learning_rate": 0.001448638049916391, "loss": 2.5064, "step": 407600 }, { "epoch": 0.8120497577457606, "grad_norm": 0.15039491653442383, "learning_rate": 0.0014482754310346513, "loss": 2.5113, "step": 407610 }, { "epoch": 0.8120696799693995, "grad_norm": 0.1588602513074875, "learning_rate": 0.0014479130503263096, "loss": 2.5159, "step": 407620 }, { "epoch": 0.8120896021930384, "grad_norm": 0.3366965353488922, "learning_rate": 0.0014475509073226746, "loss": 2.5181, "step": 407630 }, { "epoch": 0.8121095244166773, "grad_norm": 0.15549369156360626, "learning_rate": 0.00144718900155659, "loss": 2.5216, "step": 407640 }, { "epoch": 0.8121294466403162, "grad_norm": 0.1512380838394165, "learning_rate": 0.0014468273325624267, "loss": 2.514, "step": 407650 }, { "epoch": 0.8121493688639552, "grad_norm": 0.15253545343875885, "learning_rate": 0.001446465899876078, "loss": 2.5085, "step": 407660 }, { "epoch": 0.8121692910875941, "grad_norm": 0.14100901782512665, "learning_rate": 0.0014461047030349507, "loss": 2.5143, "step": 407670 }, { "epoch": 0.812189213311233, "grad_norm": 0.1772787868976593, "learning_rate": 0.0014457437415779594, "loss": 2.5138, "step": 407680 }, { "epoch": 0.8122091355348718, "grad_norm": 0.18140584230422974, "learning_rate": 0.0014453830150455182, "loss": 2.5235, "step": 407690 }, { "epoch": 0.8122290577585107, "grad_norm": 0.13956791162490845, "learning_rate": 0.0014450225229795358, "loss": 2.5279, "step": 407700 }, { "epoch": 0.8122489799821497, "grad_norm": 0.14864154160022736, "learning_rate": 0.0014446622649234074, "loss": 2.51, "step": 407710 }, { "epoch": 0.8122689022057886, "grad_norm": 0.16603823006153107, "learning_rate": 0.0014443022404220078, "loss": 2.5015, "step": 407720 }, { "epoch": 0.8122888244294275, "grad_norm": 0.12771089375019073, "learning_rate": 0.0014439424490216863, "loss": 2.5189, "step": 407730 }, { "epoch": 0.8123087466530664, "grad_norm": 0.15632081031799316, "learning_rate": 0.0014435828902702576, "loss": 2.5147, "step": 407740 }, { "epoch": 0.8123286688767053, "grad_norm": 0.156829372048378, "learning_rate": 0.0014432235637169978, "loss": 2.5198, "step": 407750 }, { "epoch": 0.8123485911003443, "grad_norm": 0.16445019841194153, "learning_rate": 0.0014428644689126351, "loss": 2.5213, "step": 407760 }, { "epoch": 0.8123685133239832, "grad_norm": 0.14248181879520416, "learning_rate": 0.0014425056054093459, "loss": 2.5283, "step": 407770 }, { "epoch": 0.8123884355476221, "grad_norm": 0.16393814980983734, "learning_rate": 0.001442146972760746, "loss": 2.5244, "step": 407780 }, { "epoch": 0.812408357771261, "grad_norm": 0.1550055742263794, "learning_rate": 0.0014417885705218856, "loss": 2.5224, "step": 407790 }, { "epoch": 0.8124282799948999, "grad_norm": 0.1462465226650238, "learning_rate": 0.0014414303982492424, "loss": 2.5197, "step": 407800 }, { "epoch": 0.8124482022185389, "grad_norm": 0.15021172165870667, "learning_rate": 0.0014410724555007151, "loss": 2.5275, "step": 407810 }, { "epoch": 0.8124681244421778, "grad_norm": 0.1542309820652008, "learning_rate": 0.0014407147418356175, "loss": 2.5209, "step": 407820 }, { "epoch": 0.8124880466658166, "grad_norm": 0.15865401923656464, "learning_rate": 0.001440357256814671, "loss": 2.5126, "step": 407830 }, { "epoch": 0.8125079688894555, "grad_norm": 0.16211937367916107, "learning_rate": 0.0014399999999999999, "loss": 2.5113, "step": 407840 }, { "epoch": 0.8125278911130944, "grad_norm": 0.15336057543754578, "learning_rate": 0.001439642970955124, "loss": 2.518, "step": 407850 }, { "epoch": 0.8125478133367334, "grad_norm": 0.1637704223394394, "learning_rate": 0.0014392861692449526, "loss": 2.5121, "step": 407860 }, { "epoch": 0.8125677355603723, "grad_norm": 0.17872874438762665, "learning_rate": 0.0014389295944357786, "loss": 2.5162, "step": 407870 }, { "epoch": 0.8125876577840112, "grad_norm": 0.1500461846590042, "learning_rate": 0.001438573246095272, "loss": 2.512, "step": 407880 }, { "epoch": 0.8126075800076501, "grad_norm": 0.1451253592967987, "learning_rate": 0.0014382171237924744, "loss": 2.5176, "step": 407890 }, { "epoch": 0.8126275022312891, "grad_norm": 0.15199369192123413, "learning_rate": 0.0014378612270977923, "loss": 2.5122, "step": 407900 }, { "epoch": 0.812647424454928, "grad_norm": 0.15651477873325348, "learning_rate": 0.0014375055555829906, "loss": 2.5164, "step": 407910 }, { "epoch": 0.8126673466785669, "grad_norm": 0.13517826795578003, "learning_rate": 0.0014371501088211885, "loss": 2.5093, "step": 407920 }, { "epoch": 0.8126872689022058, "grad_norm": 0.1890072226524353, "learning_rate": 0.001436794886386851, "loss": 2.5202, "step": 407930 }, { "epoch": 0.8127071911258447, "grad_norm": 0.1812620759010315, "learning_rate": 0.001436439887855785, "loss": 2.5092, "step": 407940 }, { "epoch": 0.8127271133494837, "grad_norm": 0.1562664806842804, "learning_rate": 0.0014360851128051326, "loss": 2.5207, "step": 407950 }, { "epoch": 0.8127470355731226, "grad_norm": 0.16949467360973358, "learning_rate": 0.0014357305608133646, "loss": 2.5274, "step": 407960 }, { "epoch": 0.8127669577967614, "grad_norm": 0.157281756401062, "learning_rate": 0.0014353762314602758, "loss": 2.525, "step": 407970 }, { "epoch": 0.8127868800204003, "grad_norm": 0.16333527863025665, "learning_rate": 0.0014350221243269786, "loss": 2.5181, "step": 407980 }, { "epoch": 0.8128068022440392, "grad_norm": 0.15738001465797424, "learning_rate": 0.0014346682389958971, "loss": 2.5095, "step": 407990 }, { "epoch": 0.8128267244676782, "grad_norm": 0.1632472574710846, "learning_rate": 0.0014343145750507621, "loss": 2.5166, "step": 408000 }, { "epoch": 0.8128466466913171, "grad_norm": 0.17587843537330627, "learning_rate": 0.0014339611320766039, "loss": 2.5064, "step": 408010 }, { "epoch": 0.812866568914956, "grad_norm": 0.15527336299419403, "learning_rate": 0.0014336079096597483, "loss": 2.5118, "step": 408020 }, { "epoch": 0.8128864911385949, "grad_norm": 0.17058733105659485, "learning_rate": 0.0014332549073878098, "loss": 2.5151, "step": 408030 }, { "epoch": 0.8129064133622338, "grad_norm": 0.14682914316654205, "learning_rate": 0.001432902124849687, "loss": 2.5172, "step": 408040 }, { "epoch": 0.8129263355858728, "grad_norm": 0.14575038850307465, "learning_rate": 0.0014325495616355557, "loss": 2.5238, "step": 408050 }, { "epoch": 0.8129462578095117, "grad_norm": 0.14471130073070526, "learning_rate": 0.0014321972173368645, "loss": 2.5127, "step": 408060 }, { "epoch": 0.8129661800331506, "grad_norm": 0.13214242458343506, "learning_rate": 0.0014318450915463284, "loss": 2.5263, "step": 408070 }, { "epoch": 0.8129861022567895, "grad_norm": 0.17159277200698853, "learning_rate": 0.0014314931838579242, "loss": 2.5091, "step": 408080 }, { "epoch": 0.8130060244804284, "grad_norm": 0.16168329119682312, "learning_rate": 0.0014311414938668846, "loss": 2.5302, "step": 408090 }, { "epoch": 0.8130259467040674, "grad_norm": 0.16154010593891144, "learning_rate": 0.0014307900211696918, "loss": 2.5041, "step": 408100 }, { "epoch": 0.8130458689277063, "grad_norm": 0.14812277257442474, "learning_rate": 0.0014304387653640744, "loss": 2.5093, "step": 408110 }, { "epoch": 0.8130657911513451, "grad_norm": 0.14271241426467896, "learning_rate": 0.0014300877260489998, "loss": 2.5104, "step": 408120 }, { "epoch": 0.813085713374984, "grad_norm": 0.1723180115222931, "learning_rate": 0.0014297369028246699, "loss": 2.5277, "step": 408130 }, { "epoch": 0.8131056355986229, "grad_norm": 0.1553167849779129, "learning_rate": 0.0014293862952925159, "loss": 2.5099, "step": 408140 }, { "epoch": 0.8131255578222619, "grad_norm": 0.14095109701156616, "learning_rate": 0.0014290359030551922, "loss": 2.5172, "step": 408150 }, { "epoch": 0.8131454800459008, "grad_norm": 0.17907528579235077, "learning_rate": 0.001428685725716572, "loss": 2.5235, "step": 408160 }, { "epoch": 0.8131654022695397, "grad_norm": 0.1364750862121582, "learning_rate": 0.001428335762881742, "loss": 2.5181, "step": 408170 }, { "epoch": 0.8131853244931786, "grad_norm": 0.14384686946868896, "learning_rate": 0.0014279860141569963, "loss": 2.5093, "step": 408180 }, { "epoch": 0.8132052467168176, "grad_norm": 0.19229358434677124, "learning_rate": 0.0014276364791498326, "loss": 2.5142, "step": 408190 }, { "epoch": 0.8132251689404565, "grad_norm": 0.14585936069488525, "learning_rate": 0.0014272871574689458, "loss": 2.5205, "step": 408200 }, { "epoch": 0.8132450911640954, "grad_norm": 0.1474335938692093, "learning_rate": 0.0014269380487242239, "loss": 2.5146, "step": 408210 }, { "epoch": 0.8132650133877343, "grad_norm": 0.17093217372894287, "learning_rate": 0.0014265891525267421, "loss": 2.5266, "step": 408220 }, { "epoch": 0.8132849356113732, "grad_norm": 0.18397311866283417, "learning_rate": 0.001426240468488758, "loss": 2.5099, "step": 408230 }, { "epoch": 0.8133048578350122, "grad_norm": 0.15776380896568298, "learning_rate": 0.0014258919962237071, "loss": 2.5339, "step": 408240 }, { "epoch": 0.813324780058651, "grad_norm": 0.1328119933605194, "learning_rate": 0.0014255437353461972, "loss": 2.5204, "step": 408250 }, { "epoch": 0.8133447022822899, "grad_norm": 0.15014426410198212, "learning_rate": 0.0014251956854720034, "loss": 2.5084, "step": 408260 }, { "epoch": 0.8133646245059288, "grad_norm": 0.15402476489543915, "learning_rate": 0.0014248478462180639, "loss": 2.5119, "step": 408270 }, { "epoch": 0.8133845467295677, "grad_norm": 0.1568363457918167, "learning_rate": 0.0014245002172024737, "loss": 2.5266, "step": 408280 }, { "epoch": 0.8134044689532067, "grad_norm": 0.15324784815311432, "learning_rate": 0.0014241527980444813, "loss": 2.5274, "step": 408290 }, { "epoch": 0.8134243911768456, "grad_norm": 0.14814412593841553, "learning_rate": 0.0014238055883644828, "loss": 2.5215, "step": 408300 }, { "epoch": 0.8134443134004845, "grad_norm": 0.1535797268152237, "learning_rate": 0.0014234585877840173, "loss": 2.518, "step": 408310 }, { "epoch": 0.8134642356241234, "grad_norm": 0.2889454662799835, "learning_rate": 0.0014231117959257618, "loss": 2.5132, "step": 408320 }, { "epoch": 0.8134841578477623, "grad_norm": 0.15083953738212585, "learning_rate": 0.0014227652124135276, "loss": 2.4995, "step": 408330 }, { "epoch": 0.8135040800714013, "grad_norm": 0.15846022963523865, "learning_rate": 0.001422418836872254, "loss": 2.5196, "step": 408340 }, { "epoch": 0.8135240022950402, "grad_norm": 0.15387655794620514, "learning_rate": 0.0014220726689280045, "loss": 2.5255, "step": 408350 }, { "epoch": 0.8135439245186791, "grad_norm": 0.15479212999343872, "learning_rate": 0.0014217267082079618, "loss": 2.5222, "step": 408360 }, { "epoch": 0.813563846742318, "grad_norm": 0.157337486743927, "learning_rate": 0.0014213809543404227, "loss": 2.5228, "step": 408370 }, { "epoch": 0.8135837689659569, "grad_norm": 0.1335190087556839, "learning_rate": 0.0014210354069547951, "loss": 2.5258, "step": 408380 }, { "epoch": 0.8136036911895959, "grad_norm": 0.1437472701072693, "learning_rate": 0.0014206900656815904, "loss": 2.5198, "step": 408390 }, { "epoch": 0.8136236134132347, "grad_norm": 0.1669107973575592, "learning_rate": 0.0014203449301524226, "loss": 2.517, "step": 408400 }, { "epoch": 0.8136435356368736, "grad_norm": 0.15432189404964447, "learning_rate": 0.00142, "loss": 2.5155, "step": 408410 }, { "epoch": 0.8136634578605125, "grad_norm": 0.15968064963817596, "learning_rate": 0.0014196552748581238, "loss": 2.5227, "step": 408420 }, { "epoch": 0.8136833800841514, "grad_norm": 0.1499885469675064, "learning_rate": 0.001419310754361681, "loss": 2.5242, "step": 408430 }, { "epoch": 0.8137033023077904, "grad_norm": 0.1568160057067871, "learning_rate": 0.001418966438146642, "loss": 2.5139, "step": 408440 }, { "epoch": 0.8137232245314293, "grad_norm": 0.17886275053024292, "learning_rate": 0.0014186223258500546, "loss": 2.5055, "step": 408450 }, { "epoch": 0.8137431467550682, "grad_norm": 0.16317707300186157, "learning_rate": 0.0014182784171100408, "loss": 2.5203, "step": 408460 }, { "epoch": 0.8137630689787071, "grad_norm": 0.16045545041561127, "learning_rate": 0.00141793471156579, "loss": 2.5281, "step": 408470 }, { "epoch": 0.8137829912023461, "grad_norm": 0.1729706972837448, "learning_rate": 0.0014175912088575586, "loss": 2.5188, "step": 408480 }, { "epoch": 0.813802913425985, "grad_norm": 0.16396178305149078, "learning_rate": 0.0014172479086266615, "loss": 2.5245, "step": 408490 }, { "epoch": 0.8138228356496239, "grad_norm": 0.16950452327728271, "learning_rate": 0.0014169048105154701, "loss": 2.5118, "step": 408500 }, { "epoch": 0.8138427578732628, "grad_norm": 0.14689764380455017, "learning_rate": 0.0014165619141674073, "loss": 2.5339, "step": 408510 }, { "epoch": 0.8138626800969017, "grad_norm": 0.15073075890541077, "learning_rate": 0.0014162192192269433, "loss": 2.5117, "step": 408520 }, { "epoch": 0.8138826023205407, "grad_norm": 0.14230911433696747, "learning_rate": 0.0014158767253395906, "loss": 2.5206, "step": 408530 }, { "epoch": 0.8139025245441796, "grad_norm": 0.14483734965324402, "learning_rate": 0.0014155344321519017, "loss": 2.5158, "step": 408540 }, { "epoch": 0.8139224467678184, "grad_norm": 0.1554025262594223, "learning_rate": 0.0014151923393114624, "loss": 2.5118, "step": 408550 }, { "epoch": 0.8139423689914573, "grad_norm": 0.15288269519805908, "learning_rate": 0.0014148504464668882, "loss": 2.5136, "step": 408560 }, { "epoch": 0.8139622912150962, "grad_norm": 0.1516689956188202, "learning_rate": 0.0014145087532678221, "loss": 2.5146, "step": 408570 }, { "epoch": 0.8139822134387352, "grad_norm": 0.14393413066864014, "learning_rate": 0.0014141672593649278, "loss": 2.5272, "step": 408580 }, { "epoch": 0.8140021356623741, "grad_norm": 0.14681227505207062, "learning_rate": 0.0014138259644098862, "loss": 2.5106, "step": 408590 }, { "epoch": 0.814022057886013, "grad_norm": 0.14728142321109772, "learning_rate": 0.001413484868055393, "loss": 2.5393, "step": 408600 }, { "epoch": 0.8140419801096519, "grad_norm": 0.18513166904449463, "learning_rate": 0.0014131439699551515, "loss": 2.5223, "step": 408610 }, { "epoch": 0.8140619023332908, "grad_norm": 0.1366259604692459, "learning_rate": 0.001412803269763872, "loss": 2.5086, "step": 408620 }, { "epoch": 0.8140818245569298, "grad_norm": 0.14779207110404968, "learning_rate": 0.0014124627671372647, "loss": 2.5139, "step": 408630 }, { "epoch": 0.8141017467805687, "grad_norm": 0.1747850477695465, "learning_rate": 0.0014121224617320372, "loss": 2.5028, "step": 408640 }, { "epoch": 0.8141216690042076, "grad_norm": 0.15691840648651123, "learning_rate": 0.0014117823532058904, "loss": 2.5332, "step": 408650 }, { "epoch": 0.8141415912278465, "grad_norm": 0.16548174619674683, "learning_rate": 0.0014114424412175136, "loss": 2.5194, "step": 408660 }, { "epoch": 0.8141615134514854, "grad_norm": 0.14229927957057953, "learning_rate": 0.0014111027254265819, "loss": 2.5131, "step": 408670 }, { "epoch": 0.8141814356751244, "grad_norm": 0.19101500511169434, "learning_rate": 0.0014107632054937507, "loss": 2.5273, "step": 408680 }, { "epoch": 0.8142013578987632, "grad_norm": 0.1555853635072708, "learning_rate": 0.0014104238810806532, "loss": 2.5028, "step": 408690 }, { "epoch": 0.8142212801224021, "grad_norm": 0.18559375405311584, "learning_rate": 0.001410084751849895, "loss": 2.5177, "step": 408700 }, { "epoch": 0.814241202346041, "grad_norm": 0.14080491662025452, "learning_rate": 0.0014097458174650516, "loss": 2.5338, "step": 408710 }, { "epoch": 0.8142611245696799, "grad_norm": 0.1569576859474182, "learning_rate": 0.001409407077590664, "loss": 2.5232, "step": 408720 }, { "epoch": 0.8142810467933189, "grad_norm": 0.16590915620326996, "learning_rate": 0.001409068531892234, "loss": 2.5189, "step": 408730 }, { "epoch": 0.8143009690169578, "grad_norm": 0.17389467358589172, "learning_rate": 0.0014087301800362207, "loss": 2.5095, "step": 408740 }, { "epoch": 0.8143208912405967, "grad_norm": 0.1593240201473236, "learning_rate": 0.0014083920216900383, "loss": 2.4979, "step": 408750 }, { "epoch": 0.8143408134642356, "grad_norm": 0.16272461414337158, "learning_rate": 0.0014080540565220502, "loss": 2.5247, "step": 408760 }, { "epoch": 0.8143607356878746, "grad_norm": 0.1475263386964798, "learning_rate": 0.001407716284201566, "loss": 2.5253, "step": 408770 }, { "epoch": 0.8143806579115135, "grad_norm": 0.14530935883522034, "learning_rate": 0.001407378704398838, "loss": 2.5001, "step": 408780 }, { "epoch": 0.8144005801351524, "grad_norm": 0.17371158301830292, "learning_rate": 0.0014070413167850565, "loss": 2.514, "step": 408790 }, { "epoch": 0.8144205023587913, "grad_norm": 0.13983680307865143, "learning_rate": 0.001406704121032347, "loss": 2.5057, "step": 408800 }, { "epoch": 0.8144404245824302, "grad_norm": 0.14687873423099518, "learning_rate": 0.001406367116813767, "loss": 2.5185, "step": 408810 }, { "epoch": 0.8144603468060692, "grad_norm": 0.19047807157039642, "learning_rate": 0.0014060303038033002, "loss": 2.5156, "step": 408820 }, { "epoch": 0.814480269029708, "grad_norm": 0.1659405529499054, "learning_rate": 0.001405693681675855, "loss": 2.5181, "step": 408830 }, { "epoch": 0.8145001912533469, "grad_norm": 0.1660519540309906, "learning_rate": 0.0014053572501072599, "loss": 2.5218, "step": 408840 }, { "epoch": 0.8145201134769858, "grad_norm": 0.13702338933944702, "learning_rate": 0.0014050210087742593, "loss": 2.4994, "step": 408850 }, { "epoch": 0.8145400357006247, "grad_norm": 0.14767107367515564, "learning_rate": 0.0014046849573545112, "loss": 2.5104, "step": 408860 }, { "epoch": 0.8145599579242637, "grad_norm": 0.14213927090168, "learning_rate": 0.001404349095526583, "loss": 2.5174, "step": 408870 }, { "epoch": 0.8145798801479026, "grad_norm": 0.15455803275108337, "learning_rate": 0.0014040134229699464, "loss": 2.519, "step": 408880 }, { "epoch": 0.8145998023715415, "grad_norm": 0.1260426938533783, "learning_rate": 0.001403677939364977, "loss": 2.5016, "step": 408890 }, { "epoch": 0.8146197245951804, "grad_norm": 0.1627751588821411, "learning_rate": 0.0014033426443929482, "loss": 2.5044, "step": 408900 }, { "epoch": 0.8146396468188193, "grad_norm": 0.1701558232307434, "learning_rate": 0.001403007537736028, "loss": 2.5133, "step": 408910 }, { "epoch": 0.8146595690424583, "grad_norm": 0.17715857923030853, "learning_rate": 0.0014026726190772768, "loss": 2.5108, "step": 408920 }, { "epoch": 0.8146794912660972, "grad_norm": 0.13515037298202515, "learning_rate": 0.0014023378881006425, "loss": 2.5297, "step": 408930 }, { "epoch": 0.8146994134897361, "grad_norm": 0.17000365257263184, "learning_rate": 0.0014020033444909579, "loss": 2.5096, "step": 408940 }, { "epoch": 0.814719335713375, "grad_norm": 0.17064623534679413, "learning_rate": 0.0014016689879339363, "loss": 2.5076, "step": 408950 }, { "epoch": 0.8147392579370138, "grad_norm": 0.16777941584587097, "learning_rate": 0.0014013348181161694, "loss": 2.5156, "step": 408960 }, { "epoch": 0.8147591801606529, "grad_norm": 0.16505584120750427, "learning_rate": 0.0014010008347251227, "loss": 2.5219, "step": 408970 }, { "epoch": 0.8147791023842917, "grad_norm": 0.2116514891386032, "learning_rate": 0.001400667037449132, "loss": 2.5208, "step": 408980 }, { "epoch": 0.8147990246079306, "grad_norm": 0.167787566781044, "learning_rate": 0.0014003334259774021, "loss": 2.5478, "step": 408990 }, { "epoch": 0.8148189468315695, "grad_norm": 0.16565757989883423, "learning_rate": 0.0014, "loss": 2.5239, "step": 409000 }, { "epoch": 0.8148388690552084, "grad_norm": 0.15051861107349396, "learning_rate": 0.0013996667592078547, "loss": 2.5182, "step": 409010 }, { "epoch": 0.8148587912788474, "grad_norm": 0.149265855550766, "learning_rate": 0.0013993337032927517, "loss": 2.5064, "step": 409020 }, { "epoch": 0.8148787135024863, "grad_norm": 0.14699728786945343, "learning_rate": 0.001399000831947331, "loss": 2.5217, "step": 409030 }, { "epoch": 0.8148986357261252, "grad_norm": 0.14264048635959625, "learning_rate": 0.0013986681448650837, "loss": 2.5124, "step": 409040 }, { "epoch": 0.8149185579497641, "grad_norm": 0.14310644567012787, "learning_rate": 0.001398335641740347, "loss": 2.5164, "step": 409050 }, { "epoch": 0.814938480173403, "grad_norm": 0.22057794034481049, "learning_rate": 0.0013980033222683037, "loss": 2.5084, "step": 409060 }, { "epoch": 0.814958402397042, "grad_norm": 0.16550879180431366, "learning_rate": 0.0013976711861449762, "loss": 2.5184, "step": 409070 }, { "epoch": 0.8149783246206809, "grad_norm": 0.16342993080615997, "learning_rate": 0.0013973392330672255, "loss": 2.5206, "step": 409080 }, { "epoch": 0.8149982468443198, "grad_norm": 0.15155692398548126, "learning_rate": 0.0013970074627327467, "loss": 2.5138, "step": 409090 }, { "epoch": 0.8150181690679587, "grad_norm": 0.15259084105491638, "learning_rate": 0.0013966758748400658, "loss": 2.5008, "step": 409100 }, { "epoch": 0.8150380912915977, "grad_norm": 0.15259793400764465, "learning_rate": 0.0013963444690885374, "loss": 2.526, "step": 409110 }, { "epoch": 0.8150580135152365, "grad_norm": 0.13657014071941376, "learning_rate": 0.00139601324517834, "loss": 2.5198, "step": 409120 }, { "epoch": 0.8150779357388754, "grad_norm": 0.1325484663248062, "learning_rate": 0.0013956822028104749, "loss": 2.5228, "step": 409130 }, { "epoch": 0.8150978579625143, "grad_norm": 0.13468322157859802, "learning_rate": 0.0013953513416867609, "loss": 2.5229, "step": 409140 }, { "epoch": 0.8151177801861532, "grad_norm": 0.13934268057346344, "learning_rate": 0.001395020661509833, "loss": 2.5149, "step": 409150 }, { "epoch": 0.8151377024097922, "grad_norm": 0.1920861154794693, "learning_rate": 0.0013946901619831377, "loss": 2.515, "step": 409160 }, { "epoch": 0.8151576246334311, "grad_norm": 0.4079817533493042, "learning_rate": 0.0013943598428109313, "loss": 2.5164, "step": 409170 }, { "epoch": 0.81517754685707, "grad_norm": 0.1678249090909958, "learning_rate": 0.0013940297036982754, "loss": 2.5296, "step": 409180 }, { "epoch": 0.8151974690807089, "grad_norm": 0.19244442880153656, "learning_rate": 0.0013936997443510352, "loss": 2.5285, "step": 409190 }, { "epoch": 0.8152173913043478, "grad_norm": 0.146364226937294, "learning_rate": 0.0013933699644758759, "loss": 2.5051, "step": 409200 }, { "epoch": 0.8152373135279868, "grad_norm": 0.1446896493434906, "learning_rate": 0.0013930403637802593, "loss": 2.5259, "step": 409210 }, { "epoch": 0.8152572357516257, "grad_norm": 0.1662910133600235, "learning_rate": 0.0013927109419724411, "loss": 2.5236, "step": 409220 }, { "epoch": 0.8152771579752646, "grad_norm": 0.16051895916461945, "learning_rate": 0.0013923816987614678, "loss": 2.5133, "step": 409230 }, { "epoch": 0.8152970801989035, "grad_norm": 0.17005205154418945, "learning_rate": 0.0013920526338571735, "loss": 2.5067, "step": 409240 }, { "epoch": 0.8153170024225423, "grad_norm": 0.12569394707679749, "learning_rate": 0.001391723746970178, "loss": 2.5169, "step": 409250 }, { "epoch": 0.8153369246461813, "grad_norm": 0.1596638262271881, "learning_rate": 0.0013913950378118824, "loss": 2.5081, "step": 409260 }, { "epoch": 0.8153568468698202, "grad_norm": 0.16171814501285553, "learning_rate": 0.0013910665060944667, "loss": 2.5109, "step": 409270 }, { "epoch": 0.8153767690934591, "grad_norm": 0.162149116396904, "learning_rate": 0.0013907381515308873, "loss": 2.5234, "step": 409280 }, { "epoch": 0.815396691317098, "grad_norm": 0.15521930158138275, "learning_rate": 0.0013904099738348732, "loss": 2.5056, "step": 409290 }, { "epoch": 0.8154166135407369, "grad_norm": 0.15521886944770813, "learning_rate": 0.0013900819727209238, "loss": 2.5144, "step": 409300 }, { "epoch": 0.8154365357643759, "grad_norm": 0.14585299789905548, "learning_rate": 0.0013897541479043058, "loss": 2.5198, "step": 409310 }, { "epoch": 0.8154564579880148, "grad_norm": 0.1630743443965912, "learning_rate": 0.0013894264991010502, "loss": 2.5127, "step": 409320 }, { "epoch": 0.8154763802116537, "grad_norm": 0.14788611233234406, "learning_rate": 0.0013890990260279495, "loss": 2.5133, "step": 409330 }, { "epoch": 0.8154963024352926, "grad_norm": 0.15278233587741852, "learning_rate": 0.0013887717284025549, "loss": 2.5184, "step": 409340 }, { "epoch": 0.8155162246589315, "grad_norm": 0.15266171097755432, "learning_rate": 0.0013884446059431736, "loss": 2.5084, "step": 409350 }, { "epoch": 0.8155361468825705, "grad_norm": 0.17537660896778107, "learning_rate": 0.0013881176583688658, "loss": 2.5148, "step": 409360 }, { "epoch": 0.8155560691062094, "grad_norm": 0.15118490159511566, "learning_rate": 0.0013877908853994411, "loss": 2.5245, "step": 409370 }, { "epoch": 0.8155759913298483, "grad_norm": 0.15718936920166016, "learning_rate": 0.0013874642867554578, "loss": 2.5122, "step": 409380 }, { "epoch": 0.8155959135534872, "grad_norm": 0.15465694665908813, "learning_rate": 0.0013871378621582175, "loss": 2.5029, "step": 409390 }, { "epoch": 0.8156158357771262, "grad_norm": 0.14837044477462769, "learning_rate": 0.0013868116113297643, "loss": 2.5138, "step": 409400 }, { "epoch": 0.815635758000765, "grad_norm": 0.16134458780288696, "learning_rate": 0.0013864855339928814, "loss": 2.5297, "step": 409410 }, { "epoch": 0.8156556802244039, "grad_norm": 0.16235563158988953, "learning_rate": 0.0013861596298710879, "loss": 2.5194, "step": 409420 }, { "epoch": 0.8156756024480428, "grad_norm": 0.15675179660320282, "learning_rate": 0.0013858338986886365, "loss": 2.5047, "step": 409430 }, { "epoch": 0.8156955246716817, "grad_norm": 0.1464935839176178, "learning_rate": 0.0013855083401705115, "loss": 2.5169, "step": 409440 }, { "epoch": 0.8157154468953207, "grad_norm": 0.16055943071842194, "learning_rate": 0.001385182954042424, "loss": 2.5219, "step": 409450 }, { "epoch": 0.8157353691189596, "grad_norm": 0.14947295188903809, "learning_rate": 0.001384857740030812, "loss": 2.5142, "step": 409460 }, { "epoch": 0.8157552913425985, "grad_norm": 0.15197360515594482, "learning_rate": 0.0013845326978628352, "loss": 2.5078, "step": 409470 }, { "epoch": 0.8157752135662374, "grad_norm": 0.13585804402828217, "learning_rate": 0.001384207827266374, "loss": 2.514, "step": 409480 }, { "epoch": 0.8157951357898763, "grad_norm": 0.15575550496578217, "learning_rate": 0.0013838831279700254, "loss": 2.5035, "step": 409490 }, { "epoch": 0.8158150580135153, "grad_norm": 0.14382657408714294, "learning_rate": 0.0013835585997031023, "loss": 2.5108, "step": 409500 }, { "epoch": 0.8158349802371542, "grad_norm": 0.15735174715518951, "learning_rate": 0.001383234242195629, "loss": 2.5228, "step": 409510 }, { "epoch": 0.8158549024607931, "grad_norm": 0.15249855816364288, "learning_rate": 0.0013829100551783395, "loss": 2.5168, "step": 409520 }, { "epoch": 0.815874824684432, "grad_norm": 0.19733268022537231, "learning_rate": 0.0013825860383826747, "loss": 2.5192, "step": 409530 }, { "epoch": 0.8158947469080708, "grad_norm": 0.1501566767692566, "learning_rate": 0.00138226219154078, "loss": 2.5062, "step": 409540 }, { "epoch": 0.8159146691317098, "grad_norm": 0.14700771868228912, "learning_rate": 0.0013819385143855024, "loss": 2.5266, "step": 409550 }, { "epoch": 0.8159345913553487, "grad_norm": 0.13951122760772705, "learning_rate": 0.0013816150066503878, "loss": 2.5114, "step": 409560 }, { "epoch": 0.8159545135789876, "grad_norm": 0.17311178147792816, "learning_rate": 0.0013812916680696792, "loss": 2.5144, "step": 409570 }, { "epoch": 0.8159744358026265, "grad_norm": 0.15688954293727875, "learning_rate": 0.0013809684983783134, "loss": 2.511, "step": 409580 }, { "epoch": 0.8159943580262654, "grad_norm": 0.1738351583480835, "learning_rate": 0.001380645497311919, "loss": 2.529, "step": 409590 }, { "epoch": 0.8160142802499044, "grad_norm": 0.13754774630069733, "learning_rate": 0.0013803226646068132, "loss": 2.5117, "step": 409600 }, { "epoch": 0.8160342024735433, "grad_norm": 0.12562990188598633, "learning_rate": 0.00138, "loss": 2.5105, "step": 409610 }, { "epoch": 0.8160541246971822, "grad_norm": 0.15840600430965424, "learning_rate": 0.0013796775032291673, "loss": 2.5008, "step": 409620 }, { "epoch": 0.8160740469208211, "grad_norm": 0.17711742222309113, "learning_rate": 0.001379355174032684, "loss": 2.5252, "step": 409630 }, { "epoch": 0.81609396914446, "grad_norm": 0.15792372822761536, "learning_rate": 0.001379033012149599, "loss": 2.5226, "step": 409640 }, { "epoch": 0.816113891368099, "grad_norm": 0.15500323474407196, "learning_rate": 0.0013787110173196374, "loss": 2.5006, "step": 409650 }, { "epoch": 0.8161338135917379, "grad_norm": 0.1528485119342804, "learning_rate": 0.0013783891892831979, "loss": 2.5162, "step": 409660 }, { "epoch": 0.8161537358153768, "grad_norm": 0.1648803949356079, "learning_rate": 0.0013780675277813518, "loss": 2.525, "step": 409670 }, { "epoch": 0.8161736580390156, "grad_norm": 0.1450159102678299, "learning_rate": 0.0013777460325558382, "loss": 2.504, "step": 409680 }, { "epoch": 0.8161935802626546, "grad_norm": 0.14055751264095306, "learning_rate": 0.0013774247033490647, "loss": 2.52, "step": 409690 }, { "epoch": 0.8162135024862935, "grad_norm": 0.15564167499542236, "learning_rate": 0.0013771035399041025, "loss": 2.5295, "step": 409700 }, { "epoch": 0.8162334247099324, "grad_norm": 0.16617384552955627, "learning_rate": 0.0013767825419646847, "loss": 2.5208, "step": 409710 }, { "epoch": 0.8162533469335713, "grad_norm": 0.1610332578420639, "learning_rate": 0.0013764617092752044, "loss": 2.5235, "step": 409720 }, { "epoch": 0.8162732691572102, "grad_norm": 0.18171046674251556, "learning_rate": 0.001376141041580711, "loss": 2.5124, "step": 409730 }, { "epoch": 0.8162931913808492, "grad_norm": 0.18144631385803223, "learning_rate": 0.0013758205386269107, "loss": 2.5051, "step": 409740 }, { "epoch": 0.8163131136044881, "grad_norm": 0.15379735827445984, "learning_rate": 0.0013755002001601601, "loss": 2.5044, "step": 409750 }, { "epoch": 0.816333035828127, "grad_norm": 0.15668995678424835, "learning_rate": 0.0013751800259274676, "loss": 2.5199, "step": 409760 }, { "epoch": 0.8163529580517659, "grad_norm": 0.14658842980861664, "learning_rate": 0.0013748600156764886, "loss": 2.525, "step": 409770 }, { "epoch": 0.8163728802754048, "grad_norm": 0.15121151506900787, "learning_rate": 0.0013745401691555243, "loss": 2.5112, "step": 409780 }, { "epoch": 0.8163928024990438, "grad_norm": 0.13808593153953552, "learning_rate": 0.0013742204861135194, "loss": 2.5117, "step": 409790 }, { "epoch": 0.8164127247226827, "grad_norm": 0.1671813428401947, "learning_rate": 0.001373900966300059, "loss": 2.5148, "step": 409800 }, { "epoch": 0.8164326469463216, "grad_norm": 0.16481776535511017, "learning_rate": 0.001373581609465367, "loss": 2.5088, "step": 409810 }, { "epoch": 0.8164525691699605, "grad_norm": 0.13760897517204285, "learning_rate": 0.0013732624153603042, "loss": 2.5232, "step": 409820 }, { "epoch": 0.8164724913935993, "grad_norm": 0.14562414586544037, "learning_rate": 0.0013729433837363647, "loss": 2.5292, "step": 409830 }, { "epoch": 0.8164924136172383, "grad_norm": 0.1402241736650467, "learning_rate": 0.001372624514345675, "loss": 2.5182, "step": 409840 }, { "epoch": 0.8165123358408772, "grad_norm": 0.18437901139259338, "learning_rate": 0.0013723058069409913, "loss": 2.519, "step": 409850 }, { "epoch": 0.8165322580645161, "grad_norm": 0.1751425862312317, "learning_rate": 0.0013719872612756967, "loss": 2.502, "step": 409860 }, { "epoch": 0.816552180288155, "grad_norm": 0.14348438382148743, "learning_rate": 0.0013716688771037997, "loss": 2.526, "step": 409870 }, { "epoch": 0.8165721025117939, "grad_norm": 0.16724911332130432, "learning_rate": 0.0013713506541799317, "loss": 2.505, "step": 409880 }, { "epoch": 0.8165920247354329, "grad_norm": 0.17632155120372772, "learning_rate": 0.0013710325922593445, "loss": 2.5262, "step": 409890 }, { "epoch": 0.8166119469590718, "grad_norm": 0.1467697024345398, "learning_rate": 0.0013707146910979092, "loss": 2.5212, "step": 409900 }, { "epoch": 0.8166318691827107, "grad_norm": 0.13715922832489014, "learning_rate": 0.0013703969504521123, "loss": 2.5193, "step": 409910 }, { "epoch": 0.8166517914063496, "grad_norm": 0.14869748055934906, "learning_rate": 0.001370079370079055, "loss": 2.5126, "step": 409920 }, { "epoch": 0.8166717136299885, "grad_norm": 0.14923830330371857, "learning_rate": 0.0013697619497364509, "loss": 2.527, "step": 409930 }, { "epoch": 0.8166916358536275, "grad_norm": 0.13944683969020844, "learning_rate": 0.0013694446891826223, "loss": 2.518, "step": 409940 }, { "epoch": 0.8167115580772664, "grad_norm": 0.15119558572769165, "learning_rate": 0.0013691275881764998, "loss": 2.5161, "step": 409950 }, { "epoch": 0.8167314803009053, "grad_norm": 0.17707866430282593, "learning_rate": 0.00136881064647762, "loss": 2.5264, "step": 409960 }, { "epoch": 0.8167514025245441, "grad_norm": 0.1715049296617508, "learning_rate": 0.0013684938638461222, "loss": 2.5166, "step": 409970 }, { "epoch": 0.8167713247481831, "grad_norm": 0.17041411995887756, "learning_rate": 0.0013681772400427475, "loss": 2.5143, "step": 409980 }, { "epoch": 0.816791246971822, "grad_norm": 0.14835944771766663, "learning_rate": 0.0013678607748288357, "loss": 2.497, "step": 409990 }, { "epoch": 0.8168111691954609, "grad_norm": 0.14882521331310272, "learning_rate": 0.001367544467966324, "loss": 2.5068, "step": 410000 }, { "epoch": 0.8168310914190998, "grad_norm": 0.1819552630186081, "learning_rate": 0.001367228319217745, "loss": 2.5187, "step": 410010 }, { "epoch": 0.8168510136427387, "grad_norm": 0.1394505500793457, "learning_rate": 0.0013669123283462235, "loss": 2.5102, "step": 410020 }, { "epoch": 0.8168709358663777, "grad_norm": 0.18464235961437225, "learning_rate": 0.0013665964951154754, "loss": 2.5256, "step": 410030 }, { "epoch": 0.8168908580900166, "grad_norm": 0.13954275846481323, "learning_rate": 0.0013662808192898056, "loss": 2.5116, "step": 410040 }, { "epoch": 0.8169107803136555, "grad_norm": 0.1424463391304016, "learning_rate": 0.0013659653006341057, "loss": 2.5258, "step": 410050 }, { "epoch": 0.8169307025372944, "grad_norm": 0.1534266322851181, "learning_rate": 0.0013656499389138519, "loss": 2.5161, "step": 410060 }, { "epoch": 0.8169506247609333, "grad_norm": 0.16821469366550446, "learning_rate": 0.0013653347338951028, "loss": 2.5015, "step": 410070 }, { "epoch": 0.8169705469845723, "grad_norm": 0.14566555619239807, "learning_rate": 0.0013650196853444983, "loss": 2.5142, "step": 410080 }, { "epoch": 0.8169904692082112, "grad_norm": 0.1749100387096405, "learning_rate": 0.0013647047930292564, "loss": 2.5173, "step": 410090 }, { "epoch": 0.8170103914318501, "grad_norm": 0.16434597969055176, "learning_rate": 0.0013643900567171718, "loss": 2.5241, "step": 410100 }, { "epoch": 0.817030313655489, "grad_norm": 0.15028266608715057, "learning_rate": 0.0013640754761766144, "loss": 2.5328, "step": 410110 }, { "epoch": 0.8170502358791278, "grad_norm": 0.15962202847003937, "learning_rate": 0.0013637610511765255, "loss": 2.5302, "step": 410120 }, { "epoch": 0.8170701581027668, "grad_norm": 0.15358564257621765, "learning_rate": 0.001363446781486418, "loss": 2.5127, "step": 410130 }, { "epoch": 0.8170900803264057, "grad_norm": 0.15354222059249878, "learning_rate": 0.0013631326668763738, "loss": 2.5135, "step": 410140 }, { "epoch": 0.8171100025500446, "grad_norm": 0.13807448744773865, "learning_rate": 0.0013628187071170403, "loss": 2.5145, "step": 410150 }, { "epoch": 0.8171299247736835, "grad_norm": 0.15895792841911316, "learning_rate": 0.001362504901979631, "loss": 2.5123, "step": 410160 }, { "epoch": 0.8171498469973224, "grad_norm": 0.14278973639011383, "learning_rate": 0.0013621912512359212, "loss": 2.5248, "step": 410170 }, { "epoch": 0.8171697692209614, "grad_norm": 0.1425129473209381, "learning_rate": 0.0013618777546582472, "loss": 2.5204, "step": 410180 }, { "epoch": 0.8171896914446003, "grad_norm": 0.14807486534118652, "learning_rate": 0.001361564412019505, "loss": 2.5183, "step": 410190 }, { "epoch": 0.8172096136682392, "grad_norm": 0.18739686906337738, "learning_rate": 0.0013612512230931474, "loss": 2.5203, "step": 410200 }, { "epoch": 0.8172295358918781, "grad_norm": 0.17471416294574738, "learning_rate": 0.001360938187653182, "loss": 2.5122, "step": 410210 }, { "epoch": 0.817249458115517, "grad_norm": 0.1566162109375, "learning_rate": 0.0013606253054741688, "loss": 2.5222, "step": 410220 }, { "epoch": 0.817269380339156, "grad_norm": 0.16563178598880768, "learning_rate": 0.001360312576331221, "loss": 2.5235, "step": 410230 }, { "epoch": 0.8172893025627949, "grad_norm": 0.15731097757816315, "learning_rate": 0.0013599999999999999, "loss": 2.5077, "step": 410240 }, { "epoch": 0.8173092247864338, "grad_norm": 0.14801102876663208, "learning_rate": 0.0013596875762567151, "loss": 2.5223, "step": 410250 }, { "epoch": 0.8173291470100726, "grad_norm": 0.1723209023475647, "learning_rate": 0.0013593753048781214, "loss": 2.5111, "step": 410260 }, { "epoch": 0.8173490692337116, "grad_norm": 0.15272608399391174, "learning_rate": 0.0013590631856415173, "loss": 2.5161, "step": 410270 }, { "epoch": 0.8173689914573505, "grad_norm": 0.15814170241355896, "learning_rate": 0.0013587512183247442, "loss": 2.5092, "step": 410280 }, { "epoch": 0.8173889136809894, "grad_norm": 0.16831068694591522, "learning_rate": 0.0013584394027061823, "loss": 2.5167, "step": 410290 }, { "epoch": 0.8174088359046283, "grad_norm": 0.1453341692686081, "learning_rate": 0.0013581277385647517, "loss": 2.5045, "step": 410300 }, { "epoch": 0.8174287581282672, "grad_norm": 0.1534247100353241, "learning_rate": 0.0013578162256799072, "loss": 2.5041, "step": 410310 }, { "epoch": 0.8174486803519062, "grad_norm": 0.14559011161327362, "learning_rate": 0.0013575048638316397, "loss": 2.506, "step": 410320 }, { "epoch": 0.8174686025755451, "grad_norm": 0.15096251666545868, "learning_rate": 0.0013571936528004721, "loss": 2.524, "step": 410330 }, { "epoch": 0.817488524799184, "grad_norm": 0.19094426929950714, "learning_rate": 0.001356882592367459, "loss": 2.5118, "step": 410340 }, { "epoch": 0.8175084470228229, "grad_norm": 0.16753698885440826, "learning_rate": 0.0013565716823141837, "loss": 2.5117, "step": 410350 }, { "epoch": 0.8175283692464618, "grad_norm": 0.1604902297258377, "learning_rate": 0.0013562609224227566, "loss": 2.5174, "step": 410360 }, { "epoch": 0.8175482914701008, "grad_norm": 0.1293615847826004, "learning_rate": 0.0013559503124758153, "loss": 2.5087, "step": 410370 }, { "epoch": 0.8175682136937397, "grad_norm": 0.13994421064853668, "learning_rate": 0.0013556398522565195, "loss": 2.5092, "step": 410380 }, { "epoch": 0.8175881359173786, "grad_norm": 0.1419992744922638, "learning_rate": 0.001355329541548552, "loss": 2.5279, "step": 410390 }, { "epoch": 0.8176080581410174, "grad_norm": 0.17049624025821686, "learning_rate": 0.001355019380136116, "loss": 2.5029, "step": 410400 }, { "epoch": 0.8176279803646563, "grad_norm": 0.12519124150276184, "learning_rate": 0.0013547093678039329, "loss": 2.5283, "step": 410410 }, { "epoch": 0.8176479025882953, "grad_norm": 0.15546540915966034, "learning_rate": 0.0013543995043372413, "loss": 2.51, "step": 410420 }, { "epoch": 0.8176678248119342, "grad_norm": 0.16582518815994263, "learning_rate": 0.0013540897895217942, "loss": 2.5094, "step": 410430 }, { "epoch": 0.8176877470355731, "grad_norm": 0.1535637378692627, "learning_rate": 0.0013537802231438595, "loss": 2.5203, "step": 410440 }, { "epoch": 0.817707669259212, "grad_norm": 0.14259925484657288, "learning_rate": 0.0013534708049902156, "loss": 2.5115, "step": 410450 }, { "epoch": 0.8177275914828509, "grad_norm": 0.2971309423446655, "learning_rate": 0.0013531615348481507, "loss": 2.5048, "step": 410460 }, { "epoch": 0.8177475137064899, "grad_norm": 0.17370058596134186, "learning_rate": 0.0013528524125054626, "loss": 2.5337, "step": 410470 }, { "epoch": 0.8177674359301288, "grad_norm": 0.16531234979629517, "learning_rate": 0.0013525434377504543, "loss": 2.5323, "step": 410480 }, { "epoch": 0.8177873581537677, "grad_norm": 0.16585326194763184, "learning_rate": 0.001352234610371934, "loss": 2.5202, "step": 410490 }, { "epoch": 0.8178072803774066, "grad_norm": 0.1557302176952362, "learning_rate": 0.001351925930159214, "loss": 2.515, "step": 410500 }, { "epoch": 0.8178272026010455, "grad_norm": 0.13355334103107452, "learning_rate": 0.001351617396902107, "loss": 2.5259, "step": 410510 }, { "epoch": 0.8178471248246845, "grad_norm": 0.13372744619846344, "learning_rate": 0.0013513090103909257, "loss": 2.5328, "step": 410520 }, { "epoch": 0.8178670470483234, "grad_norm": 0.1544795036315918, "learning_rate": 0.001351000770416482, "loss": 2.5242, "step": 410530 }, { "epoch": 0.8178869692719622, "grad_norm": 0.15381799638271332, "learning_rate": 0.001350692676770083, "loss": 2.4982, "step": 410540 }, { "epoch": 0.8179068914956011, "grad_norm": 0.16036446392536163, "learning_rate": 0.0013503847292435315, "loss": 2.5199, "step": 410550 }, { "epoch": 0.81792681371924, "grad_norm": 0.16559071838855743, "learning_rate": 0.0013500769276291232, "loss": 2.5112, "step": 410560 }, { "epoch": 0.817946735942879, "grad_norm": 0.15142957866191864, "learning_rate": 0.0013497692717196457, "loss": 2.5164, "step": 410570 }, { "epoch": 0.8179666581665179, "grad_norm": 0.1728896200656891, "learning_rate": 0.001349461761308376, "loss": 2.5238, "step": 410580 }, { "epoch": 0.8179865803901568, "grad_norm": 0.16045616567134857, "learning_rate": 0.0013491543961890809, "loss": 2.5142, "step": 410590 }, { "epoch": 0.8180065026137957, "grad_norm": 0.17549967765808105, "learning_rate": 0.0013488471761560117, "loss": 2.5234, "step": 410600 }, { "epoch": 0.8180264248374347, "grad_norm": 0.15463997423648834, "learning_rate": 0.001348540101003907, "loss": 2.5134, "step": 410610 }, { "epoch": 0.8180463470610736, "grad_norm": 0.15373702347278595, "learning_rate": 0.001348233170527987, "loss": 2.5195, "step": 410620 }, { "epoch": 0.8180662692847125, "grad_norm": 0.1632845401763916, "learning_rate": 0.0013479263845239558, "loss": 2.5101, "step": 410630 }, { "epoch": 0.8180861915083514, "grad_norm": 0.160064235329628, "learning_rate": 0.0013476197427879965, "loss": 2.4988, "step": 410640 }, { "epoch": 0.8181061137319903, "grad_norm": 0.1339864581823349, "learning_rate": 0.0013473132451167712, "loss": 2.5021, "step": 410650 }, { "epoch": 0.8181260359556293, "grad_norm": 0.15464802086353302, "learning_rate": 0.0013470068913074198, "loss": 2.5119, "step": 410660 }, { "epoch": 0.8181459581792682, "grad_norm": 0.17171907424926758, "learning_rate": 0.0013467006811575572, "loss": 2.506, "step": 410670 }, { "epoch": 0.818165880402907, "grad_norm": 0.18236809968948364, "learning_rate": 0.0013463946144652724, "loss": 2.5077, "step": 410680 }, { "epoch": 0.8181858026265459, "grad_norm": 0.14502480626106262, "learning_rate": 0.0013460886910291274, "loss": 2.5135, "step": 410690 }, { "epoch": 0.8182057248501848, "grad_norm": 0.17861098051071167, "learning_rate": 0.001345782910648155, "loss": 2.517, "step": 410700 }, { "epoch": 0.8182256470738238, "grad_norm": 0.16084221005439758, "learning_rate": 0.0013454772731218572, "loss": 2.5224, "step": 410710 }, { "epoch": 0.8182455692974627, "grad_norm": 0.1356726735830307, "learning_rate": 0.001345171778250204, "loss": 2.5161, "step": 410720 }, { "epoch": 0.8182654915211016, "grad_norm": 0.14979740977287292, "learning_rate": 0.0013448664258336321, "loss": 2.5192, "step": 410730 }, { "epoch": 0.8182854137447405, "grad_norm": 0.15295709669589996, "learning_rate": 0.0013445612156730426, "loss": 2.5099, "step": 410740 }, { "epoch": 0.8183053359683794, "grad_norm": 0.18301452696323395, "learning_rate": 0.0013442561475698002, "loss": 2.507, "step": 410750 }, { "epoch": 0.8183252581920184, "grad_norm": 0.1609399914741516, "learning_rate": 0.001343951221325731, "loss": 2.5167, "step": 410760 }, { "epoch": 0.8183451804156573, "grad_norm": 0.1528598964214325, "learning_rate": 0.0013436464367431226, "loss": 2.5155, "step": 410770 }, { "epoch": 0.8183651026392962, "grad_norm": 0.15554237365722656, "learning_rate": 0.0013433417936247198, "loss": 2.5182, "step": 410780 }, { "epoch": 0.8183850248629351, "grad_norm": 0.17934903502464294, "learning_rate": 0.0013430372917737263, "loss": 2.4995, "step": 410790 }, { "epoch": 0.818404947086574, "grad_norm": 0.20151886343955994, "learning_rate": 0.0013427329309938006, "loss": 2.516, "step": 410800 }, { "epoch": 0.818424869310213, "grad_norm": 0.14858591556549072, "learning_rate": 0.001342428711089056, "loss": 2.5229, "step": 410810 }, { "epoch": 0.8184447915338519, "grad_norm": 0.1572282612323761, "learning_rate": 0.001342124631864059, "loss": 2.5126, "step": 410820 }, { "epoch": 0.8184647137574907, "grad_norm": 0.17646561563014984, "learning_rate": 0.0013418206931238266, "loss": 2.5144, "step": 410830 }, { "epoch": 0.8184846359811296, "grad_norm": 0.16106389462947845, "learning_rate": 0.001341516894673827, "loss": 2.5282, "step": 410840 }, { "epoch": 0.8185045582047685, "grad_norm": 0.17196692526340485, "learning_rate": 0.001341213236319976, "loss": 2.5169, "step": 410850 }, { "epoch": 0.8185244804284075, "grad_norm": 0.15362262725830078, "learning_rate": 0.0013409097178686369, "loss": 2.5159, "step": 410860 }, { "epoch": 0.8185444026520464, "grad_norm": 0.1499544233083725, "learning_rate": 0.0013406063391266185, "loss": 2.5075, "step": 410870 }, { "epoch": 0.8185643248756853, "grad_norm": 0.17420628666877747, "learning_rate": 0.0013403030999011745, "loss": 2.5185, "step": 410880 }, { "epoch": 0.8185842470993242, "grad_norm": 0.14357997477054596, "learning_rate": 0.0013399999999999998, "loss": 2.5206, "step": 410890 }, { "epoch": 0.8186041693229632, "grad_norm": 0.17990469932556152, "learning_rate": 0.0013396970392312328, "loss": 2.5037, "step": 410900 }, { "epoch": 0.8186240915466021, "grad_norm": 0.17254933714866638, "learning_rate": 0.0013393942174034503, "loss": 2.5143, "step": 410910 }, { "epoch": 0.818644013770241, "grad_norm": 0.1397666037082672, "learning_rate": 0.0013390915343256678, "loss": 2.5107, "step": 410920 }, { "epoch": 0.8186639359938799, "grad_norm": 0.20612284541130066, "learning_rate": 0.0013387889898073383, "loss": 2.4903, "step": 410930 }, { "epoch": 0.8186838582175188, "grad_norm": 0.16941528022289276, "learning_rate": 0.001338486583658351, "loss": 2.5121, "step": 410940 }, { "epoch": 0.8187037804411578, "grad_norm": 0.14051015675067902, "learning_rate": 0.0013381843156890282, "loss": 2.5123, "step": 410950 }, { "epoch": 0.8187237026647967, "grad_norm": 0.14702552556991577, "learning_rate": 0.001337882185710126, "loss": 2.5191, "step": 410960 }, { "epoch": 0.8187436248884356, "grad_norm": 0.1469460129737854, "learning_rate": 0.0013375801935328322, "loss": 2.5182, "step": 410970 }, { "epoch": 0.8187635471120744, "grad_norm": 0.1991729885339737, "learning_rate": 0.0013372783389687644, "loss": 2.5013, "step": 410980 }, { "epoch": 0.8187834693357133, "grad_norm": 0.15260258316993713, "learning_rate": 0.001336976621829969, "loss": 2.5218, "step": 410990 }, { "epoch": 0.8188033915593523, "grad_norm": 0.1589353233575821, "learning_rate": 0.00133667504192892, "loss": 2.5282, "step": 411000 }, { "epoch": 0.8188233137829912, "grad_norm": 0.16008511185646057, "learning_rate": 0.0013363735990785178, "loss": 2.5096, "step": 411010 }, { "epoch": 0.8188432360066301, "grad_norm": 0.1590580940246582, "learning_rate": 0.0013360722930920868, "loss": 2.5163, "step": 411020 }, { "epoch": 0.818863158230269, "grad_norm": 0.16348616778850555, "learning_rate": 0.0013357711237833754, "loss": 2.5111, "step": 411030 }, { "epoch": 0.8188830804539079, "grad_norm": 0.18114745616912842, "learning_rate": 0.001335470090966554, "loss": 2.5065, "step": 411040 }, { "epoch": 0.8189030026775469, "grad_norm": 0.1250077784061432, "learning_rate": 0.0013351691944562135, "loss": 2.5191, "step": 411050 }, { "epoch": 0.8189229249011858, "grad_norm": 0.1485835164785385, "learning_rate": 0.0013348684340673644, "loss": 2.5079, "step": 411060 }, { "epoch": 0.8189428471248247, "grad_norm": 0.14470183849334717, "learning_rate": 0.001334567809615435, "loss": 2.5117, "step": 411070 }, { "epoch": 0.8189627693484636, "grad_norm": 0.13984942436218262, "learning_rate": 0.0013342673209162706, "loss": 2.5173, "step": 411080 }, { "epoch": 0.8189826915721025, "grad_norm": 0.15152136981487274, "learning_rate": 0.0013339669677861314, "loss": 2.5245, "step": 411090 }, { "epoch": 0.8190026137957415, "grad_norm": 0.19820694625377655, "learning_rate": 0.0013336667500416929, "loss": 2.5059, "step": 411100 }, { "epoch": 0.8190225360193804, "grad_norm": 0.1452031135559082, "learning_rate": 0.0013333666675000417, "loss": 2.5127, "step": 411110 }, { "epoch": 0.8190424582430192, "grad_norm": 0.18303081393241882, "learning_rate": 0.0013330667199786773, "loss": 2.522, "step": 411120 }, { "epoch": 0.8190623804666581, "grad_norm": 0.14237573742866516, "learning_rate": 0.0013327669072955089, "loss": 2.5142, "step": 411130 }, { "epoch": 0.819082302690297, "grad_norm": 0.13982248306274414, "learning_rate": 0.0013324672292688545, "loss": 2.5098, "step": 411140 }, { "epoch": 0.819102224913936, "grad_norm": 0.15904830396175385, "learning_rate": 0.00133216768571744, "loss": 2.5254, "step": 411150 }, { "epoch": 0.8191221471375749, "grad_norm": 0.14658932387828827, "learning_rate": 0.0013318682764603973, "loss": 2.5204, "step": 411160 }, { "epoch": 0.8191420693612138, "grad_norm": 0.15314032137393951, "learning_rate": 0.001331569001317264, "loss": 2.5233, "step": 411170 }, { "epoch": 0.8191619915848527, "grad_norm": 0.17049236595630646, "learning_rate": 0.0013312698601079807, "loss": 2.5249, "step": 411180 }, { "epoch": 0.8191819138084917, "grad_norm": 0.165738046169281, "learning_rate": 0.0013309708526528907, "loss": 2.5128, "step": 411190 }, { "epoch": 0.8192018360321306, "grad_norm": 0.15897206962108612, "learning_rate": 0.0013306719787727396, "loss": 2.5189, "step": 411200 }, { "epoch": 0.8192217582557695, "grad_norm": 0.17085948586463928, "learning_rate": 0.0013303732382886717, "loss": 2.5137, "step": 411210 }, { "epoch": 0.8192416804794084, "grad_norm": 0.17608390748500824, "learning_rate": 0.001330074631022231, "loss": 2.493, "step": 411220 }, { "epoch": 0.8192616027030473, "grad_norm": 0.17103023827075958, "learning_rate": 0.0013297761567953583, "loss": 2.5167, "step": 411230 }, { "epoch": 0.8192815249266863, "grad_norm": 0.16015690565109253, "learning_rate": 0.0013294778154303915, "loss": 2.5046, "step": 411240 }, { "epoch": 0.8193014471503252, "grad_norm": 0.16723226010799408, "learning_rate": 0.001329179606750063, "loss": 2.5222, "step": 411250 }, { "epoch": 0.819321369373964, "grad_norm": 0.15738916397094727, "learning_rate": 0.0013288815305774993, "loss": 2.504, "step": 411260 }, { "epoch": 0.8193412915976029, "grad_norm": 0.13581129908561707, "learning_rate": 0.0013285835867362192, "loss": 2.5215, "step": 411270 }, { "epoch": 0.8193612138212418, "grad_norm": 0.17616543173789978, "learning_rate": 0.0013282857750501337, "loss": 2.5209, "step": 411280 }, { "epoch": 0.8193811360448808, "grad_norm": 0.182284414768219, "learning_rate": 0.0013279880953435423, "loss": 2.5166, "step": 411290 }, { "epoch": 0.8194010582685197, "grad_norm": 0.17587517201900482, "learning_rate": 0.0013276905474411357, "loss": 2.5149, "step": 411300 }, { "epoch": 0.8194209804921586, "grad_norm": 0.16225773096084595, "learning_rate": 0.0013273931311679906, "loss": 2.5105, "step": 411310 }, { "epoch": 0.8194409027157975, "grad_norm": 0.13855814933776855, "learning_rate": 0.0013270958463495713, "loss": 2.5014, "step": 411320 }, { "epoch": 0.8194608249394364, "grad_norm": 0.13926072418689728, "learning_rate": 0.0013267986928117267, "loss": 2.5106, "step": 411330 }, { "epoch": 0.8194807471630754, "grad_norm": 0.1655479073524475, "learning_rate": 0.0013265016703806905, "loss": 2.512, "step": 411340 }, { "epoch": 0.8195006693867143, "grad_norm": 0.16342268884181976, "learning_rate": 0.0013262047788830792, "loss": 2.4931, "step": 411350 }, { "epoch": 0.8195205916103532, "grad_norm": 0.13711607456207275, "learning_rate": 0.0013259080181458912, "loss": 2.514, "step": 411360 }, { "epoch": 0.8195405138339921, "grad_norm": 0.14116208255290985, "learning_rate": 0.0013256113879965056, "loss": 2.5117, "step": 411370 }, { "epoch": 0.819560436057631, "grad_norm": 0.1916487067937851, "learning_rate": 0.00132531488826268, "loss": 2.5194, "step": 411380 }, { "epoch": 0.81958035828127, "grad_norm": 0.191172793507576, "learning_rate": 0.001325018518772552, "loss": 2.5077, "step": 411390 }, { "epoch": 0.8196002805049089, "grad_norm": 0.14569099247455597, "learning_rate": 0.0013247222793546347, "loss": 2.5095, "step": 411400 }, { "epoch": 0.8196202027285477, "grad_norm": 0.15674856305122375, "learning_rate": 0.0013244261698378186, "loss": 2.5165, "step": 411410 }, { "epoch": 0.8196401249521866, "grad_norm": 0.1871228665113449, "learning_rate": 0.001324130190051368, "loss": 2.5064, "step": 411420 }, { "epoch": 0.8196600471758255, "grad_norm": 0.15279395878314972, "learning_rate": 0.0013238343398249213, "loss": 2.5095, "step": 411430 }, { "epoch": 0.8196799693994645, "grad_norm": 0.1372290402650833, "learning_rate": 0.0013235386189884896, "loss": 2.5158, "step": 411440 }, { "epoch": 0.8196998916231034, "grad_norm": 0.14185182750225067, "learning_rate": 0.0013232430273724548, "loss": 2.5203, "step": 411450 }, { "epoch": 0.8197198138467423, "grad_norm": 0.14722216129302979, "learning_rate": 0.0013229475648075697, "loss": 2.5031, "step": 411460 }, { "epoch": 0.8197397360703812, "grad_norm": 0.16573026776313782, "learning_rate": 0.001322652231124956, "loss": 2.5061, "step": 411470 }, { "epoch": 0.8197596582940202, "grad_norm": 0.18067367374897003, "learning_rate": 0.0013223570261561032, "loss": 2.5161, "step": 411480 }, { "epoch": 0.8197795805176591, "grad_norm": 0.18858671188354492, "learning_rate": 0.0013220619497328683, "loss": 2.5038, "step": 411490 }, { "epoch": 0.819799502741298, "grad_norm": 0.17810244858264923, "learning_rate": 0.0013217670016874732, "loss": 2.5148, "step": 411500 }, { "epoch": 0.8198194249649369, "grad_norm": 0.1418730914592743, "learning_rate": 0.001321472181852505, "loss": 2.5338, "step": 411510 }, { "epoch": 0.8198393471885758, "grad_norm": 0.13230662047863007, "learning_rate": 0.0013211774900609144, "loss": 2.5122, "step": 411520 }, { "epoch": 0.8198592694122148, "grad_norm": 0.15530946850776672, "learning_rate": 0.0013208829261460143, "loss": 2.5188, "step": 411530 }, { "epoch": 0.8198791916358537, "grad_norm": 0.17818883061408997, "learning_rate": 0.0013205884899414788, "loss": 2.5127, "step": 411540 }, { "epoch": 0.8198991138594925, "grad_norm": 0.14182718098163605, "learning_rate": 0.0013202941812813429, "loss": 2.5237, "step": 411550 }, { "epoch": 0.8199190360831314, "grad_norm": 0.17532682418823242, "learning_rate": 0.00132, "loss": 2.5034, "step": 411560 }, { "epoch": 0.8199389583067703, "grad_norm": 0.13993634283542633, "learning_rate": 0.0013197059459322022, "loss": 2.4989, "step": 411570 }, { "epoch": 0.8199588805304093, "grad_norm": 0.16361920535564423, "learning_rate": 0.0013194120189130578, "loss": 2.5109, "step": 411580 }, { "epoch": 0.8199788027540482, "grad_norm": 0.14612196385860443, "learning_rate": 0.001319118218778032, "loss": 2.515, "step": 411590 }, { "epoch": 0.8199987249776871, "grad_norm": 0.16782110929489136, "learning_rate": 0.001318824545362944, "loss": 2.5148, "step": 411600 }, { "epoch": 0.820018647201326, "grad_norm": 0.158275306224823, "learning_rate": 0.001318530998503967, "loss": 2.5115, "step": 411610 }, { "epoch": 0.8200385694249649, "grad_norm": 0.14027011394500732, "learning_rate": 0.0013182375780376274, "loss": 2.5018, "step": 411620 }, { "epoch": 0.8200584916486039, "grad_norm": 0.1728304773569107, "learning_rate": 0.0013179442838008027, "loss": 2.5023, "step": 411630 }, { "epoch": 0.8200784138722428, "grad_norm": 0.16708973050117493, "learning_rate": 0.0013176511156307207, "loss": 2.5218, "step": 411640 }, { "epoch": 0.8200983360958817, "grad_norm": 0.1470176726579666, "learning_rate": 0.0013173580733649595, "loss": 2.5269, "step": 411650 }, { "epoch": 0.8201182583195206, "grad_norm": 0.14670780301094055, "learning_rate": 0.001317065156841445, "loss": 2.5139, "step": 411660 }, { "epoch": 0.8201381805431595, "grad_norm": 0.1625676304101944, "learning_rate": 0.0013167723658984512, "loss": 2.5229, "step": 411670 }, { "epoch": 0.8201581027667985, "grad_norm": 0.15876851975917816, "learning_rate": 0.0013164797003745977, "loss": 2.5053, "step": 411680 }, { "epoch": 0.8201780249904373, "grad_norm": 0.1390938013792038, "learning_rate": 0.0013161871601088495, "loss": 2.5201, "step": 411690 }, { "epoch": 0.8201979472140762, "grad_norm": 0.14768163859844208, "learning_rate": 0.0013158947449405172, "loss": 2.5163, "step": 411700 }, { "epoch": 0.8202178694377151, "grad_norm": 0.16805662214756012, "learning_rate": 0.001315602454709253, "loss": 2.5186, "step": 411710 }, { "epoch": 0.820237791661354, "grad_norm": 0.14613117277622223, "learning_rate": 0.0013153102892550525, "loss": 2.526, "step": 411720 }, { "epoch": 0.820257713884993, "grad_norm": 0.15478770434856415, "learning_rate": 0.0013150182484182515, "loss": 2.508, "step": 411730 }, { "epoch": 0.8202776361086319, "grad_norm": 0.18088573217391968, "learning_rate": 0.0013147263320395274, "loss": 2.5279, "step": 411740 }, { "epoch": 0.8202975583322708, "grad_norm": 0.17090779542922974, "learning_rate": 0.0013144345399598955, "loss": 2.5053, "step": 411750 }, { "epoch": 0.8203174805559097, "grad_norm": 0.16960208117961884, "learning_rate": 0.0013141428720207103, "loss": 2.4958, "step": 411760 }, { "epoch": 0.8203374027795487, "grad_norm": 0.15192168951034546, "learning_rate": 0.0013138513280636624, "loss": 2.5178, "step": 411770 }, { "epoch": 0.8203573250031876, "grad_norm": 0.17015211284160614, "learning_rate": 0.0013135599079307796, "loss": 2.5087, "step": 411780 }, { "epoch": 0.8203772472268265, "grad_norm": 0.15667486190795898, "learning_rate": 0.0013132686114644243, "loss": 2.517, "step": 411790 }, { "epoch": 0.8203971694504654, "grad_norm": 0.1452755481004715, "learning_rate": 0.0013129774385072933, "loss": 2.5202, "step": 411800 }, { "epoch": 0.8204170916741043, "grad_norm": 0.14336450397968292, "learning_rate": 0.0013126863889024168, "loss": 2.5105, "step": 411810 }, { "epoch": 0.8204370138977433, "grad_norm": 0.152309387922287, "learning_rate": 0.0013123954624931567, "loss": 2.5159, "step": 411820 }, { "epoch": 0.8204569361213822, "grad_norm": 0.16726133227348328, "learning_rate": 0.0013121046591232065, "loss": 2.5146, "step": 411830 }, { "epoch": 0.820476858345021, "grad_norm": 0.1751108169555664, "learning_rate": 0.00131181397863659, "loss": 2.5052, "step": 411840 }, { "epoch": 0.8204967805686599, "grad_norm": 0.157670259475708, "learning_rate": 0.0013115234208776597, "loss": 2.5134, "step": 411850 }, { "epoch": 0.8205167027922988, "grad_norm": 0.17679466307163239, "learning_rate": 0.0013112329856910976, "loss": 2.5023, "step": 411860 }, { "epoch": 0.8205366250159378, "grad_norm": 0.14145201444625854, "learning_rate": 0.0013109426729219114, "loss": 2.5083, "step": 411870 }, { "epoch": 0.8205565472395767, "grad_norm": 0.13934284448623657, "learning_rate": 0.0013106524824154366, "loss": 2.5231, "step": 411880 }, { "epoch": 0.8205764694632156, "grad_norm": 0.14628715813159943, "learning_rate": 0.0013103624140173335, "loss": 2.5138, "step": 411890 }, { "epoch": 0.8205963916868545, "grad_norm": 0.14785560965538025, "learning_rate": 0.0013100724675735864, "loss": 2.5328, "step": 411900 }, { "epoch": 0.8206163139104934, "grad_norm": 0.1510958969593048, "learning_rate": 0.0013097826429305042, "loss": 2.5102, "step": 411910 }, { "epoch": 0.8206362361341324, "grad_norm": 0.13904815912246704, "learning_rate": 0.0013094929399347173, "loss": 2.5362, "step": 411920 }, { "epoch": 0.8206561583577713, "grad_norm": 0.17727211117744446, "learning_rate": 0.0013092033584331784, "loss": 2.5224, "step": 411930 }, { "epoch": 0.8206760805814102, "grad_norm": 0.13968078792095184, "learning_rate": 0.0013089138982731604, "loss": 2.5156, "step": 411940 }, { "epoch": 0.8206960028050491, "grad_norm": 0.13724379241466522, "learning_rate": 0.001308624559302256, "loss": 2.5155, "step": 411950 }, { "epoch": 0.820715925028688, "grad_norm": 0.14535756409168243, "learning_rate": 0.0013083353413683768, "loss": 2.4918, "step": 411960 }, { "epoch": 0.820735847252327, "grad_norm": 0.17495374381542206, "learning_rate": 0.0013080462443197524, "loss": 2.5079, "step": 411970 }, { "epoch": 0.8207557694759658, "grad_norm": 0.15916310250759125, "learning_rate": 0.0013077572680049287, "loss": 2.5058, "step": 411980 }, { "epoch": 0.8207756916996047, "grad_norm": 0.14373621344566345, "learning_rate": 0.0013074684122727684, "loss": 2.5088, "step": 411990 }, { "epoch": 0.8207956139232436, "grad_norm": 0.15482689440250397, "learning_rate": 0.0013071796769724491, "loss": 2.5056, "step": 412000 }, { "epoch": 0.8208155361468825, "grad_norm": 0.16156049072742462, "learning_rate": 0.0013068910619534618, "loss": 2.5033, "step": 412010 }, { "epoch": 0.8208354583705215, "grad_norm": 0.15555799007415771, "learning_rate": 0.0013066025670656115, "loss": 2.5086, "step": 412020 }, { "epoch": 0.8208553805941604, "grad_norm": 0.15313071012496948, "learning_rate": 0.0013063141921590149, "loss": 2.5083, "step": 412030 }, { "epoch": 0.8208753028177993, "grad_norm": 0.15053287148475647, "learning_rate": 0.001306025937084101, "loss": 2.4955, "step": 412040 }, { "epoch": 0.8208952250414382, "grad_norm": 0.16710343956947327, "learning_rate": 0.0013057378016916089, "loss": 2.5162, "step": 412050 }, { "epoch": 0.8209151472650772, "grad_norm": 0.1445341259241104, "learning_rate": 0.0013054497858325865, "loss": 2.5054, "step": 412060 }, { "epoch": 0.8209350694887161, "grad_norm": 0.14048102498054504, "learning_rate": 0.0013051618893583916, "loss": 2.5148, "step": 412070 }, { "epoch": 0.820954991712355, "grad_norm": 0.17343978583812714, "learning_rate": 0.0013048741121206894, "loss": 2.4967, "step": 412080 }, { "epoch": 0.8209749139359939, "grad_norm": 0.1691148728132248, "learning_rate": 0.0013045864539714515, "loss": 2.5105, "step": 412090 }, { "epoch": 0.8209948361596328, "grad_norm": 0.14837421476840973, "learning_rate": 0.0013042989147629567, "loss": 2.5248, "step": 412100 }, { "epoch": 0.8210147583832718, "grad_norm": 0.14441163837909698, "learning_rate": 0.0013040114943477874, "loss": 2.5177, "step": 412110 }, { "epoch": 0.8210346806069106, "grad_norm": 0.13560253381729126, "learning_rate": 0.0013037241925788314, "loss": 2.5072, "step": 412120 }, { "epoch": 0.8210546028305495, "grad_norm": 0.16694004833698273, "learning_rate": 0.0013034370093092803, "loss": 2.5156, "step": 412130 }, { "epoch": 0.8210745250541884, "grad_norm": 0.19011290371418, "learning_rate": 0.0013031499443926261, "loss": 2.5117, "step": 412140 }, { "epoch": 0.8210944472778273, "grad_norm": 0.16851817071437836, "learning_rate": 0.0013028629976826651, "loss": 2.5074, "step": 412150 }, { "epoch": 0.8211143695014663, "grad_norm": 0.1422683596611023, "learning_rate": 0.0013025761690334922, "loss": 2.5158, "step": 412160 }, { "epoch": 0.8211342917251052, "grad_norm": 0.15976737439632416, "learning_rate": 0.0013022894582995037, "loss": 2.5041, "step": 412170 }, { "epoch": 0.8211542139487441, "grad_norm": 0.1614580750465393, "learning_rate": 0.001302002865335394, "loss": 2.4917, "step": 412180 }, { "epoch": 0.821174136172383, "grad_norm": 0.15689434111118317, "learning_rate": 0.0013017163899961563, "loss": 2.5158, "step": 412190 }, { "epoch": 0.8211940583960219, "grad_norm": 0.16571900248527527, "learning_rate": 0.0013014300321370809, "loss": 2.5112, "step": 412200 }, { "epoch": 0.8212139806196609, "grad_norm": 0.15365096926689148, "learning_rate": 0.001301143791613754, "loss": 2.5066, "step": 412210 }, { "epoch": 0.8212339028432998, "grad_norm": 0.15304654836654663, "learning_rate": 0.0013008576682820587, "loss": 2.5052, "step": 412220 }, { "epoch": 0.8212538250669387, "grad_norm": 0.15677915513515472, "learning_rate": 0.0013005716619981715, "loss": 2.5148, "step": 412230 }, { "epoch": 0.8212737472905776, "grad_norm": 0.16957154870033264, "learning_rate": 0.001300285772618564, "loss": 2.53, "step": 412240 }, { "epoch": 0.8212936695142165, "grad_norm": 0.14294001460075378, "learning_rate": 0.0013000000000000002, "loss": 2.5125, "step": 412250 }, { "epoch": 0.8213135917378555, "grad_norm": 0.16163045167922974, "learning_rate": 0.0012997143439995363, "loss": 2.5209, "step": 412260 }, { "epoch": 0.8213335139614943, "grad_norm": 0.15834195911884308, "learning_rate": 0.0012994288044745202, "loss": 2.5148, "step": 412270 }, { "epoch": 0.8213534361851332, "grad_norm": 0.14612609148025513, "learning_rate": 0.0012991433812825908, "loss": 2.5171, "step": 412280 }, { "epoch": 0.8213733584087721, "grad_norm": 0.1619909554719925, "learning_rate": 0.001298858074281676, "loss": 2.5136, "step": 412290 }, { "epoch": 0.821393280632411, "grad_norm": 0.1674439013004303, "learning_rate": 0.0012985728833299927, "loss": 2.5083, "step": 412300 }, { "epoch": 0.82141320285605, "grad_norm": 0.1702425330877304, "learning_rate": 0.0012982878082860468, "loss": 2.5155, "step": 412310 }, { "epoch": 0.8214331250796889, "grad_norm": 0.16812613606452942, "learning_rate": 0.0012980028490086305, "loss": 2.5031, "step": 412320 }, { "epoch": 0.8214530473033278, "grad_norm": 0.1849328577518463, "learning_rate": 0.0012977180053568224, "loss": 2.5163, "step": 412330 }, { "epoch": 0.8214729695269667, "grad_norm": 0.14192502200603485, "learning_rate": 0.0012974332771899882, "loss": 2.5242, "step": 412340 }, { "epoch": 0.8214928917506056, "grad_norm": 0.1428094506263733, "learning_rate": 0.0012971486643677769, "loss": 2.5062, "step": 412350 }, { "epoch": 0.8215128139742446, "grad_norm": 0.18215803802013397, "learning_rate": 0.0012968641667501222, "loss": 2.5102, "step": 412360 }, { "epoch": 0.8215327361978835, "grad_norm": 0.1612088829278946, "learning_rate": 0.0012965797841972412, "loss": 2.5088, "step": 412370 }, { "epoch": 0.8215526584215224, "grad_norm": 0.16261611878871918, "learning_rate": 0.0012962955165696328, "loss": 2.5039, "step": 412380 }, { "epoch": 0.8215725806451613, "grad_norm": 0.16427423059940338, "learning_rate": 0.0012960113637280784, "loss": 2.5142, "step": 412390 }, { "epoch": 0.8215925028688003, "grad_norm": 0.18784494698047638, "learning_rate": 0.0012957273255336397, "loss": 2.514, "step": 412400 }, { "epoch": 0.8216124250924391, "grad_norm": 0.1501462757587433, "learning_rate": 0.0012954434018476583, "loss": 2.5155, "step": 412410 }, { "epoch": 0.821632347316078, "grad_norm": 0.1383163034915924, "learning_rate": 0.001295159592531756, "loss": 2.5049, "step": 412420 }, { "epoch": 0.8216522695397169, "grad_norm": 0.16615410149097443, "learning_rate": 0.0012948758974478322, "loss": 2.497, "step": 412430 }, { "epoch": 0.8216721917633558, "grad_norm": 0.16719754040241241, "learning_rate": 0.001294592316458064, "loss": 2.5081, "step": 412440 }, { "epoch": 0.8216921139869948, "grad_norm": 0.1529553383588791, "learning_rate": 0.001294308849424906, "loss": 2.5165, "step": 412450 }, { "epoch": 0.8217120362106337, "grad_norm": 0.1867208033800125, "learning_rate": 0.0012940254962110881, "loss": 2.5167, "step": 412460 }, { "epoch": 0.8217319584342726, "grad_norm": 0.13839948177337646, "learning_rate": 0.001293742256679617, "loss": 2.5106, "step": 412470 }, { "epoch": 0.8217518806579115, "grad_norm": 0.15261439979076385, "learning_rate": 0.0012934591306937723, "loss": 2.5027, "step": 412480 }, { "epoch": 0.8217718028815504, "grad_norm": 0.1519058346748352, "learning_rate": 0.0012931761181171084, "loss": 2.5162, "step": 412490 }, { "epoch": 0.8217917251051894, "grad_norm": 0.16060975193977356, "learning_rate": 0.0012928932188134526, "loss": 2.4934, "step": 412500 }, { "epoch": 0.8218116473288283, "grad_norm": 0.15865951776504517, "learning_rate": 0.0012926104326469044, "loss": 2.5139, "step": 412510 }, { "epoch": 0.8218315695524672, "grad_norm": 0.1511809527873993, "learning_rate": 0.0012923277594818346, "loss": 2.5134, "step": 412520 }, { "epoch": 0.8218514917761061, "grad_norm": 0.14641383290290833, "learning_rate": 0.0012920451991828856, "loss": 2.5199, "step": 412530 }, { "epoch": 0.821871413999745, "grad_norm": 0.1543818563222885, "learning_rate": 0.001291762751614969, "loss": 2.5141, "step": 412540 }, { "epoch": 0.821891336223384, "grad_norm": 0.1640031635761261, "learning_rate": 0.0012914804166432661, "loss": 2.5111, "step": 412550 }, { "epoch": 0.8219112584470228, "grad_norm": 0.15029720962047577, "learning_rate": 0.0012911981941332261, "loss": 2.5224, "step": 412560 }, { "epoch": 0.8219311806706617, "grad_norm": 0.16910125315189362, "learning_rate": 0.0012909160839505668, "loss": 2.5154, "step": 412570 }, { "epoch": 0.8219511028943006, "grad_norm": 0.15760347247123718, "learning_rate": 0.001290634085961272, "loss": 2.4975, "step": 412580 }, { "epoch": 0.8219710251179395, "grad_norm": 0.14865320920944214, "learning_rate": 0.001290352200031593, "loss": 2.5033, "step": 412590 }, { "epoch": 0.8219909473415785, "grad_norm": 0.1614609658718109, "learning_rate": 0.0012900704260280463, "loss": 2.5003, "step": 412600 }, { "epoch": 0.8220108695652174, "grad_norm": 0.1416332721710205, "learning_rate": 0.001289788763817412, "loss": 2.5042, "step": 412610 }, { "epoch": 0.8220307917888563, "grad_norm": 0.138368621468544, "learning_rate": 0.0012895072132667355, "loss": 2.5193, "step": 412620 }, { "epoch": 0.8220507140124952, "grad_norm": 0.16200315952301025, "learning_rate": 0.0012892257742433254, "loss": 2.5158, "step": 412630 }, { "epoch": 0.8220706362361341, "grad_norm": 0.20657801628112793, "learning_rate": 0.0012889444466147528, "loss": 2.5092, "step": 412640 }, { "epoch": 0.8220905584597731, "grad_norm": 0.1290082186460495, "learning_rate": 0.0012886632302488504, "loss": 2.5112, "step": 412650 }, { "epoch": 0.822110480683412, "grad_norm": 0.1763136386871338, "learning_rate": 0.0012883821250137123, "loss": 2.5107, "step": 412660 }, { "epoch": 0.8221304029070509, "grad_norm": 0.16483384370803833, "learning_rate": 0.0012881011307776923, "loss": 2.5201, "step": 412670 }, { "epoch": 0.8221503251306898, "grad_norm": 0.15674996376037598, "learning_rate": 0.0012878202474094057, "loss": 2.5167, "step": 412680 }, { "epoch": 0.8221702473543288, "grad_norm": 0.14435376226902008, "learning_rate": 0.001287539474777725, "loss": 2.4982, "step": 412690 }, { "epoch": 0.8221901695779676, "grad_norm": 0.16207849979400635, "learning_rate": 0.0012872588127517816, "loss": 2.5113, "step": 412700 }, { "epoch": 0.8222100918016065, "grad_norm": 0.15445710718631744, "learning_rate": 0.0012869782612009643, "loss": 2.517, "step": 412710 }, { "epoch": 0.8222300140252454, "grad_norm": 0.16071121394634247, "learning_rate": 0.0012866978199949197, "loss": 2.5164, "step": 412720 }, { "epoch": 0.8222499362488843, "grad_norm": 0.15038983523845673, "learning_rate": 0.0012864174890035492, "loss": 2.5023, "step": 412730 }, { "epoch": 0.8222698584725233, "grad_norm": 0.1555032581090927, "learning_rate": 0.00128613726809701, "loss": 2.5028, "step": 412740 }, { "epoch": 0.8222897806961622, "grad_norm": 0.17055535316467285, "learning_rate": 0.0012858571571457151, "loss": 2.5124, "step": 412750 }, { "epoch": 0.8223097029198011, "grad_norm": 0.1831890493631363, "learning_rate": 0.0012855771560203299, "loss": 2.5164, "step": 412760 }, { "epoch": 0.82232962514344, "grad_norm": 0.14484068751335144, "learning_rate": 0.0012852972645917746, "loss": 2.5127, "step": 412770 }, { "epoch": 0.8223495473670789, "grad_norm": 0.15041400492191315, "learning_rate": 0.0012850174827312212, "loss": 2.5096, "step": 412780 }, { "epoch": 0.8223694695907179, "grad_norm": 0.16640444099903107, "learning_rate": 0.0012847378103100933, "loss": 2.5154, "step": 412790 }, { "epoch": 0.8223893918143568, "grad_norm": 0.16988180577754974, "learning_rate": 0.0012844582472000675, "loss": 2.512, "step": 412800 }, { "epoch": 0.8224093140379957, "grad_norm": 0.1628853976726532, "learning_rate": 0.001284178793273069, "loss": 2.5226, "step": 412810 }, { "epoch": 0.8224292362616346, "grad_norm": 0.18733203411102295, "learning_rate": 0.0012838994484012738, "loss": 2.502, "step": 412820 }, { "epoch": 0.8224491584852734, "grad_norm": 0.17950253188610077, "learning_rate": 0.0012836202124571075, "loss": 2.5068, "step": 412830 }, { "epoch": 0.8224690807089124, "grad_norm": 0.1461244374513626, "learning_rate": 0.0012833410853132431, "loss": 2.5061, "step": 412840 }, { "epoch": 0.8224890029325513, "grad_norm": 0.1470220983028412, "learning_rate": 0.0012830620668426032, "loss": 2.5132, "step": 412850 }, { "epoch": 0.8225089251561902, "grad_norm": 0.15693610906600952, "learning_rate": 0.001282783156918356, "loss": 2.5223, "step": 412860 }, { "epoch": 0.8225288473798291, "grad_norm": 0.19010788202285767, "learning_rate": 0.001282504355413916, "loss": 2.5055, "step": 412870 }, { "epoch": 0.822548769603468, "grad_norm": 0.1557391732931137, "learning_rate": 0.0012822256622029456, "loss": 2.4902, "step": 412880 }, { "epoch": 0.822568691827107, "grad_norm": 0.15251846611499786, "learning_rate": 0.0012819470771593504, "loss": 2.5211, "step": 412890 }, { "epoch": 0.8225886140507459, "grad_norm": 0.15235723555088043, "learning_rate": 0.001281668600157281, "loss": 2.4886, "step": 412900 }, { "epoch": 0.8226085362743848, "grad_norm": 0.15389034152030945, "learning_rate": 0.001281390231071133, "loss": 2.5032, "step": 412910 }, { "epoch": 0.8226284584980237, "grad_norm": 0.17000678181648254, "learning_rate": 0.001281111969775543, "loss": 2.5064, "step": 412920 }, { "epoch": 0.8226483807216626, "grad_norm": 0.17820751667022705, "learning_rate": 0.001280833816145392, "loss": 2.4997, "step": 412930 }, { "epoch": 0.8226683029453016, "grad_norm": 0.13728228211402893, "learning_rate": 0.0012805557700558022, "loss": 2.5107, "step": 412940 }, { "epoch": 0.8226882251689405, "grad_norm": 0.15067443251609802, "learning_rate": 0.0012802778313821368, "loss": 2.5121, "step": 412950 }, { "epoch": 0.8227081473925794, "grad_norm": 0.16732719540596008, "learning_rate": 0.00128, "loss": 2.5071, "step": 412960 }, { "epoch": 0.8227280696162182, "grad_norm": 0.17158761620521545, "learning_rate": 0.0012797222757852356, "loss": 2.524, "step": 412970 }, { "epoch": 0.8227479918398573, "grad_norm": 0.1522710770368576, "learning_rate": 0.0012794446586139273, "loss": 2.525, "step": 412980 }, { "epoch": 0.8227679140634961, "grad_norm": 0.15463757514953613, "learning_rate": 0.001279167148362396, "loss": 2.5168, "step": 412990 }, { "epoch": 0.822787836287135, "grad_norm": 0.15183773636817932, "learning_rate": 0.0012788897449072022, "loss": 2.5145, "step": 413000 }, { "epoch": 0.8228077585107739, "grad_norm": 0.16867558658123016, "learning_rate": 0.0012786124481251426, "loss": 2.5125, "step": 413010 }, { "epoch": 0.8228276807344128, "grad_norm": 0.15596850216388702, "learning_rate": 0.0012783352578932515, "loss": 2.5169, "step": 413020 }, { "epoch": 0.8228476029580518, "grad_norm": 0.1484583020210266, "learning_rate": 0.0012780581740887982, "loss": 2.5224, "step": 413030 }, { "epoch": 0.8228675251816907, "grad_norm": 0.1518860161304474, "learning_rate": 0.0012777811965892884, "loss": 2.5221, "step": 413040 }, { "epoch": 0.8228874474053296, "grad_norm": 0.14796289801597595, "learning_rate": 0.0012775043252724622, "loss": 2.5158, "step": 413050 }, { "epoch": 0.8229073696289685, "grad_norm": 0.1622398942708969, "learning_rate": 0.0012772275600162939, "loss": 2.4986, "step": 413060 }, { "epoch": 0.8229272918526074, "grad_norm": 0.1670529991388321, "learning_rate": 0.0012769509006989913, "loss": 2.5083, "step": 413070 }, { "epoch": 0.8229472140762464, "grad_norm": 0.15138556063175201, "learning_rate": 0.001276674347198995, "loss": 2.5066, "step": 413080 }, { "epoch": 0.8229671362998853, "grad_norm": 0.13993680477142334, "learning_rate": 0.0012763978993949783, "loss": 2.5039, "step": 413090 }, { "epoch": 0.8229870585235242, "grad_norm": 0.19579946994781494, "learning_rate": 0.0012761215571658459, "loss": 2.5037, "step": 413100 }, { "epoch": 0.823006980747163, "grad_norm": 0.5085973739624023, "learning_rate": 0.0012758453203907329, "loss": 2.5134, "step": 413110 }, { "epoch": 0.8230269029708019, "grad_norm": 0.16958819329738617, "learning_rate": 0.0012755691889490067, "loss": 2.4977, "step": 413120 }, { "epoch": 0.823046825194441, "grad_norm": 0.18015426397323608, "learning_rate": 0.0012752931627202627, "loss": 2.5167, "step": 413130 }, { "epoch": 0.8230667474180798, "grad_norm": 0.1557008922100067, "learning_rate": 0.0012750172415843257, "loss": 2.5197, "step": 413140 }, { "epoch": 0.8230866696417187, "grad_norm": 0.1531970500946045, "learning_rate": 0.0012747414254212501, "loss": 2.5097, "step": 413150 }, { "epoch": 0.8231065918653576, "grad_norm": 0.18406645953655243, "learning_rate": 0.0012744657141113178, "loss": 2.5067, "step": 413160 }, { "epoch": 0.8231265140889965, "grad_norm": 0.13979560136795044, "learning_rate": 0.0012741901075350376, "loss": 2.5122, "step": 413170 }, { "epoch": 0.8231464363126355, "grad_norm": 0.15983018279075623, "learning_rate": 0.001273914605573146, "loss": 2.5111, "step": 413180 }, { "epoch": 0.8231663585362744, "grad_norm": 0.18708057701587677, "learning_rate": 0.001273639208106605, "loss": 2.5057, "step": 413190 }, { "epoch": 0.8231862807599133, "grad_norm": 0.1632193773984909, "learning_rate": 0.0012733639150166018, "loss": 2.5054, "step": 413200 }, { "epoch": 0.8232062029835522, "grad_norm": 0.1385328620672226, "learning_rate": 0.0012730887261845501, "loss": 2.5156, "step": 413210 }, { "epoch": 0.8232261252071911, "grad_norm": 0.15129856765270233, "learning_rate": 0.0012728136414920863, "loss": 2.5125, "step": 413220 }, { "epoch": 0.8232460474308301, "grad_norm": 0.16463583707809448, "learning_rate": 0.0012725386608210716, "loss": 2.5172, "step": 413230 }, { "epoch": 0.823265969654469, "grad_norm": 0.1645229309797287, "learning_rate": 0.00127226378405359, "loss": 2.5104, "step": 413240 }, { "epoch": 0.8232858918781079, "grad_norm": 0.15770083665847778, "learning_rate": 0.0012719890110719483, "loss": 2.5108, "step": 413250 }, { "epoch": 0.8233058141017467, "grad_norm": 0.17767778038978577, "learning_rate": 0.001271714341758675, "loss": 2.4954, "step": 413260 }, { "epoch": 0.8233257363253857, "grad_norm": 0.16678231954574585, "learning_rate": 0.0012714397759965206, "loss": 2.5013, "step": 413270 }, { "epoch": 0.8233456585490246, "grad_norm": 0.15601103007793427, "learning_rate": 0.001271165313668456, "loss": 2.5064, "step": 413280 }, { "epoch": 0.8233655807726635, "grad_norm": 0.1883591264486313, "learning_rate": 0.0012708909546576726, "loss": 2.5129, "step": 413290 }, { "epoch": 0.8233855029963024, "grad_norm": 0.16219288110733032, "learning_rate": 0.0012706166988475812, "loss": 2.4981, "step": 413300 }, { "epoch": 0.8234054252199413, "grad_norm": 0.1695711612701416, "learning_rate": 0.001270342546121812, "loss": 2.5176, "step": 413310 }, { "epoch": 0.8234253474435803, "grad_norm": 0.16450157761573792, "learning_rate": 0.0012700684963642135, "loss": 2.5167, "step": 413320 }, { "epoch": 0.8234452696672192, "grad_norm": 0.1781466156244278, "learning_rate": 0.0012697945494588529, "loss": 2.492, "step": 413330 }, { "epoch": 0.8234651918908581, "grad_norm": 0.15395423769950867, "learning_rate": 0.0012695207052900131, "loss": 2.4989, "step": 413340 }, { "epoch": 0.823485114114497, "grad_norm": 0.1502373367547989, "learning_rate": 0.0012692469637421958, "loss": 2.5027, "step": 413350 }, { "epoch": 0.8235050363381359, "grad_norm": 0.1567055583000183, "learning_rate": 0.0012689733247001173, "loss": 2.5056, "step": 413360 }, { "epoch": 0.8235249585617749, "grad_norm": 0.15016862750053406, "learning_rate": 0.001268699788048711, "loss": 2.499, "step": 413370 }, { "epoch": 0.8235448807854138, "grad_norm": 0.16027507185935974, "learning_rate": 0.0012684263536731249, "loss": 2.5009, "step": 413380 }, { "epoch": 0.8235648030090527, "grad_norm": 0.14910420775413513, "learning_rate": 0.0012681530214587204, "loss": 2.5145, "step": 413390 }, { "epoch": 0.8235847252326916, "grad_norm": 0.16509975492954254, "learning_rate": 0.001267879791291075, "loss": 2.4982, "step": 413400 }, { "epoch": 0.8236046474563304, "grad_norm": 0.14972028136253357, "learning_rate": 0.0012676066630559779, "loss": 2.5057, "step": 413410 }, { "epoch": 0.8236245696799694, "grad_norm": 0.1515323519706726, "learning_rate": 0.001267333636639432, "loss": 2.5073, "step": 413420 }, { "epoch": 0.8236444919036083, "grad_norm": 0.15220381319522858, "learning_rate": 0.0012670607119276522, "loss": 2.5012, "step": 413430 }, { "epoch": 0.8236644141272472, "grad_norm": 0.17994114756584167, "learning_rate": 0.0012667878888070656, "loss": 2.5085, "step": 413440 }, { "epoch": 0.8236843363508861, "grad_norm": 0.14945517480373383, "learning_rate": 0.0012665151671643101, "loss": 2.5001, "step": 413450 }, { "epoch": 0.823704258574525, "grad_norm": 0.14912045001983643, "learning_rate": 0.0012662425468862343, "loss": 2.5137, "step": 413460 }, { "epoch": 0.823724180798164, "grad_norm": 0.12898802757263184, "learning_rate": 0.0012659700278598973, "loss": 2.5066, "step": 413470 }, { "epoch": 0.8237441030218029, "grad_norm": 0.16712242364883423, "learning_rate": 0.0012656976099725671, "loss": 2.5229, "step": 413480 }, { "epoch": 0.8237640252454418, "grad_norm": 0.15942738950252533, "learning_rate": 0.0012654252931117217, "loss": 2.5236, "step": 413490 }, { "epoch": 0.8237839474690807, "grad_norm": 0.18304914236068726, "learning_rate": 0.0012651530771650465, "loss": 2.522, "step": 413500 }, { "epoch": 0.8238038696927196, "grad_norm": 0.13614243268966675, "learning_rate": 0.0012648809620204359, "loss": 2.5202, "step": 413510 }, { "epoch": 0.8238237919163586, "grad_norm": 0.14630284905433655, "learning_rate": 0.0012646089475659905, "loss": 2.5087, "step": 413520 }, { "epoch": 0.8238437141399975, "grad_norm": 0.1448027491569519, "learning_rate": 0.001264337033690019, "loss": 2.4958, "step": 413530 }, { "epoch": 0.8238636363636364, "grad_norm": 0.15956580638885498, "learning_rate": 0.001264065220281036, "loss": 2.4947, "step": 413540 }, { "epoch": 0.8238835585872752, "grad_norm": 0.16714854538440704, "learning_rate": 0.0012637935072277616, "loss": 2.5018, "step": 413550 }, { "epoch": 0.8239034808109142, "grad_norm": 0.14546902477741241, "learning_rate": 0.0012635218944191213, "loss": 2.5034, "step": 413560 }, { "epoch": 0.8239234030345531, "grad_norm": 0.1418486088514328, "learning_rate": 0.0012632503817442456, "loss": 2.4977, "step": 413570 }, { "epoch": 0.823943325258192, "grad_norm": 0.15770268440246582, "learning_rate": 0.001262978969092469, "loss": 2.5117, "step": 413580 }, { "epoch": 0.8239632474818309, "grad_norm": 0.17726272344589233, "learning_rate": 0.0012627076563533298, "loss": 2.5105, "step": 413590 }, { "epoch": 0.8239831697054698, "grad_norm": 0.1605202704668045, "learning_rate": 0.001262436443416569, "loss": 2.529, "step": 413600 }, { "epoch": 0.8240030919291088, "grad_norm": 0.1433667689561844, "learning_rate": 0.0012621653301721314, "loss": 2.5071, "step": 413610 }, { "epoch": 0.8240230141527477, "grad_norm": 0.1877664178609848, "learning_rate": 0.0012618943165101629, "loss": 2.5102, "step": 413620 }, { "epoch": 0.8240429363763866, "grad_norm": 0.1452469825744629, "learning_rate": 0.0012616234023210106, "loss": 2.5178, "step": 413630 }, { "epoch": 0.8240628586000255, "grad_norm": 0.17541450262069702, "learning_rate": 0.0012613525874952244, "loss": 2.5221, "step": 413640 }, { "epoch": 0.8240827808236644, "grad_norm": 0.1502661556005478, "learning_rate": 0.0012610818719235534, "loss": 2.5067, "step": 413650 }, { "epoch": 0.8241027030473034, "grad_norm": 0.14890283346176147, "learning_rate": 0.0012608112554969469, "loss": 2.5193, "step": 413660 }, { "epoch": 0.8241226252709423, "grad_norm": 0.15962734818458557, "learning_rate": 0.0012605407381065538, "loss": 2.5039, "step": 413670 }, { "epoch": 0.8241425474945812, "grad_norm": 0.16524282097816467, "learning_rate": 0.001260270319643723, "loss": 2.5124, "step": 413680 }, { "epoch": 0.82416246971822, "grad_norm": 0.15706481039524078, "learning_rate": 0.00126, "loss": 2.5038, "step": 413690 }, { "epoch": 0.8241823919418589, "grad_norm": 0.1778867244720459, "learning_rate": 0.00125972977906713, "loss": 2.5071, "step": 413700 }, { "epoch": 0.8242023141654979, "grad_norm": 0.15296052396297455, "learning_rate": 0.0012594596567370552, "loss": 2.5071, "step": 413710 }, { "epoch": 0.8242222363891368, "grad_norm": 0.16211557388305664, "learning_rate": 0.0012591896329019147, "loss": 2.4932, "step": 413720 }, { "epoch": 0.8242421586127757, "grad_norm": 0.16545197367668152, "learning_rate": 0.001258919707454044, "loss": 2.52, "step": 413730 }, { "epoch": 0.8242620808364146, "grad_norm": 0.15187831223011017, "learning_rate": 0.0012586498802859745, "loss": 2.5177, "step": 413740 }, { "epoch": 0.8242820030600535, "grad_norm": 0.14515943825244904, "learning_rate": 0.0012583801512904339, "loss": 2.496, "step": 413750 }, { "epoch": 0.8243019252836925, "grad_norm": 0.14882086217403412, "learning_rate": 0.0012581105203603436, "loss": 2.5013, "step": 413760 }, { "epoch": 0.8243218475073314, "grad_norm": 0.15446609258651733, "learning_rate": 0.0012578409873888212, "loss": 2.5294, "step": 413770 }, { "epoch": 0.8243417697309703, "grad_norm": 0.17030476033687592, "learning_rate": 0.0012575715522691767, "loss": 2.517, "step": 413780 }, { "epoch": 0.8243616919546092, "grad_norm": 0.14597667753696442, "learning_rate": 0.0012573022148949144, "loss": 2.4995, "step": 413790 }, { "epoch": 0.8243816141782481, "grad_norm": 0.16240133345127106, "learning_rate": 0.0012570329751597316, "loss": 2.508, "step": 413800 }, { "epoch": 0.8244015364018871, "grad_norm": 0.1548568159341812, "learning_rate": 0.0012567638329575182, "loss": 2.5151, "step": 413810 }, { "epoch": 0.824421458625526, "grad_norm": 0.17116202414035797, "learning_rate": 0.001256494788182356, "loss": 2.5182, "step": 413820 }, { "epoch": 0.8244413808491649, "grad_norm": 0.140827476978302, "learning_rate": 0.0012562258407285182, "loss": 2.5102, "step": 413830 }, { "epoch": 0.8244613030728037, "grad_norm": 0.1617780178785324, "learning_rate": 0.001255956990490469, "loss": 2.5156, "step": 413840 }, { "epoch": 0.8244812252964426, "grad_norm": 0.1607723832130432, "learning_rate": 0.0012556882373628642, "loss": 2.5154, "step": 413850 }, { "epoch": 0.8245011475200816, "grad_norm": 0.13740935921669006, "learning_rate": 0.0012554195812405488, "loss": 2.4958, "step": 413860 }, { "epoch": 0.8245210697437205, "grad_norm": 0.19117698073387146, "learning_rate": 0.001255151022018557, "loss": 2.4935, "step": 413870 }, { "epoch": 0.8245409919673594, "grad_norm": 0.16104832291603088, "learning_rate": 0.0012548825595921139, "loss": 2.5145, "step": 413880 }, { "epoch": 0.8245609141909983, "grad_norm": 0.15557102859020233, "learning_rate": 0.001254614193856631, "loss": 2.5074, "step": 413890 }, { "epoch": 0.8245808364146373, "grad_norm": 0.15127088129520416, "learning_rate": 0.00125434592470771, "loss": 2.512, "step": 413900 }, { "epoch": 0.8246007586382762, "grad_norm": 0.17887642979621887, "learning_rate": 0.0012540777520411394, "loss": 2.5211, "step": 413910 }, { "epoch": 0.8246206808619151, "grad_norm": 0.16448283195495605, "learning_rate": 0.0012538096757528949, "loss": 2.5088, "step": 413920 }, { "epoch": 0.824640603085554, "grad_norm": 0.16730749607086182, "learning_rate": 0.001253541695739139, "loss": 2.5047, "step": 413930 }, { "epoch": 0.8246605253091929, "grad_norm": 0.14173835515975952, "learning_rate": 0.0012532738118962213, "loss": 2.5088, "step": 413940 }, { "epoch": 0.8246804475328319, "grad_norm": 0.14547781646251678, "learning_rate": 0.0012530060241206762, "loss": 2.4971, "step": 413950 }, { "epoch": 0.8247003697564708, "grad_norm": 0.1709316223859787, "learning_rate": 0.0012527383323092237, "loss": 2.5096, "step": 413960 }, { "epoch": 0.8247202919801097, "grad_norm": 0.16008424758911133, "learning_rate": 0.0012524707363587696, "loss": 2.5148, "step": 413970 }, { "epoch": 0.8247402142037485, "grad_norm": 0.1664358377456665, "learning_rate": 0.0012522032361664034, "loss": 2.5168, "step": 413980 }, { "epoch": 0.8247601364273874, "grad_norm": 0.1542132943868637, "learning_rate": 0.0012519358316293982, "loss": 2.5031, "step": 413990 }, { "epoch": 0.8247800586510264, "grad_norm": 0.16327309608459473, "learning_rate": 0.0012516685226452117, "loss": 2.4998, "step": 414000 }, { "epoch": 0.8247999808746653, "grad_norm": 0.18121758103370667, "learning_rate": 0.0012514013091114839, "loss": 2.5136, "step": 414010 }, { "epoch": 0.8248199030983042, "grad_norm": 0.15226641297340393, "learning_rate": 0.0012511341909260379, "loss": 2.5118, "step": 414020 }, { "epoch": 0.8248398253219431, "grad_norm": 0.1545374095439911, "learning_rate": 0.0012508671679868782, "loss": 2.5108, "step": 414030 }, { "epoch": 0.824859747545582, "grad_norm": 0.1849561184644699, "learning_rate": 0.0012506002401921922, "loss": 2.5067, "step": 414040 }, { "epoch": 0.824879669769221, "grad_norm": 0.18084800243377686, "learning_rate": 0.0012503334074403477, "loss": 2.507, "step": 414050 }, { "epoch": 0.8248995919928599, "grad_norm": 0.15512435138225555, "learning_rate": 0.001250066669629893, "loss": 2.5056, "step": 414060 }, { "epoch": 0.8249195142164988, "grad_norm": 0.15900787711143494, "learning_rate": 0.001249800026659558, "loss": 2.5289, "step": 414070 }, { "epoch": 0.8249394364401377, "grad_norm": 0.14586001634597778, "learning_rate": 0.0012495334784282512, "loss": 2.5052, "step": 414080 }, { "epoch": 0.8249593586637766, "grad_norm": 0.15580563247203827, "learning_rate": 0.0012492670248350616, "loss": 2.4955, "step": 414090 }, { "epoch": 0.8249792808874156, "grad_norm": 0.15695954859256744, "learning_rate": 0.0012490006657792565, "loss": 2.5049, "step": 414100 }, { "epoch": 0.8249992031110545, "grad_norm": 0.15193527936935425, "learning_rate": 0.0012487344011602821, "loss": 2.5204, "step": 414110 }, { "epoch": 0.8250191253346933, "grad_norm": 0.17531779408454895, "learning_rate": 0.0012484682308777626, "loss": 2.5079, "step": 414120 }, { "epoch": 0.8250390475583322, "grad_norm": 0.15580441057682037, "learning_rate": 0.0012482021548315, "loss": 2.4961, "step": 414130 }, { "epoch": 0.8250589697819711, "grad_norm": 0.18018652498722076, "learning_rate": 0.0012479361729214734, "loss": 2.5115, "step": 414140 }, { "epoch": 0.8250788920056101, "grad_norm": 0.16520561277866364, "learning_rate": 0.001247670285047839, "loss": 2.4993, "step": 414150 }, { "epoch": 0.825098814229249, "grad_norm": 0.18245981633663177, "learning_rate": 0.0012474044911109288, "loss": 2.5033, "step": 414160 }, { "epoch": 0.8251187364528879, "grad_norm": 0.14600522816181183, "learning_rate": 0.0012471387910112518, "loss": 2.5102, "step": 414170 }, { "epoch": 0.8251386586765268, "grad_norm": 0.15236154198646545, "learning_rate": 0.0012468731846494907, "loss": 2.5129, "step": 414180 }, { "epoch": 0.8251585809001658, "grad_norm": 0.1461741030216217, "learning_rate": 0.0012466076719265056, "loss": 2.5169, "step": 414190 }, { "epoch": 0.8251785031238047, "grad_norm": 0.15750819444656372, "learning_rate": 0.0012463422527433292, "loss": 2.5041, "step": 414200 }, { "epoch": 0.8251984253474436, "grad_norm": 0.1475946009159088, "learning_rate": 0.0012460769270011697, "loss": 2.5101, "step": 414210 }, { "epoch": 0.8252183475710825, "grad_norm": 0.19959242641925812, "learning_rate": 0.001245811694601408, "loss": 2.4994, "step": 414220 }, { "epoch": 0.8252382697947214, "grad_norm": 0.16281607747077942, "learning_rate": 0.0012455465554455994, "loss": 2.5038, "step": 414230 }, { "epoch": 0.8252581920183604, "grad_norm": 0.16944323480129242, "learning_rate": 0.0012452815094354717, "loss": 2.498, "step": 414240 }, { "epoch": 0.8252781142419993, "grad_norm": 0.16766750812530518, "learning_rate": 0.0012450165564729253, "loss": 2.51, "step": 414250 }, { "epoch": 0.8252980364656382, "grad_norm": 0.16653551161289215, "learning_rate": 0.001244751696460032, "loss": 2.4984, "step": 414260 }, { "epoch": 0.825317958689277, "grad_norm": 0.15366452932357788, "learning_rate": 0.0012444869292990359, "loss": 2.5012, "step": 414270 }, { "epoch": 0.8253378809129159, "grad_norm": 0.17018499970436096, "learning_rate": 0.0012442222548923528, "loss": 2.5135, "step": 414280 }, { "epoch": 0.8253578031365549, "grad_norm": 0.17821088433265686, "learning_rate": 0.001243957673142568, "loss": 2.514, "step": 414290 }, { "epoch": 0.8253777253601938, "grad_norm": 0.2232891470193863, "learning_rate": 0.0012436931839524385, "loss": 2.4997, "step": 414300 }, { "epoch": 0.8253976475838327, "grad_norm": 0.17046014964580536, "learning_rate": 0.0012434287872248903, "loss": 2.506, "step": 414310 }, { "epoch": 0.8254175698074716, "grad_norm": 0.16547904908657074, "learning_rate": 0.00124316448286302, "loss": 2.523, "step": 414320 }, { "epoch": 0.8254374920311105, "grad_norm": 0.16680362820625305, "learning_rate": 0.001242900270770092, "loss": 2.5033, "step": 414330 }, { "epoch": 0.8254574142547495, "grad_norm": 0.15597479045391083, "learning_rate": 0.0012426361508495404, "loss": 2.5223, "step": 414340 }, { "epoch": 0.8254773364783884, "grad_norm": 0.38804149627685547, "learning_rate": 0.0012423721230049676, "loss": 2.497, "step": 414350 }, { "epoch": 0.8254972587020273, "grad_norm": 0.16402363777160645, "learning_rate": 0.0012421081871401435, "loss": 2.5097, "step": 414360 }, { "epoch": 0.8255171809256662, "grad_norm": 0.15028981864452362, "learning_rate": 0.0012418443431590053, "loss": 2.4966, "step": 414370 }, { "epoch": 0.8255371031493051, "grad_norm": 0.15676875412464142, "learning_rate": 0.0012415805909656583, "loss": 2.5272, "step": 414380 }, { "epoch": 0.8255570253729441, "grad_norm": 0.32985833287239075, "learning_rate": 0.0012413169304643736, "loss": 2.5109, "step": 414390 }, { "epoch": 0.825576947596583, "grad_norm": 0.16769161820411682, "learning_rate": 0.001241053361559589, "loss": 2.511, "step": 414400 }, { "epoch": 0.8255968698202218, "grad_norm": 0.15856237709522247, "learning_rate": 0.0012407898841559077, "loss": 2.495, "step": 414410 }, { "epoch": 0.8256167920438607, "grad_norm": 0.1390889286994934, "learning_rate": 0.001240526498158099, "loss": 2.5055, "step": 414420 }, { "epoch": 0.8256367142674996, "grad_norm": 0.14931556582450867, "learning_rate": 0.001240263203471097, "loss": 2.4944, "step": 414430 }, { "epoch": 0.8256566364911386, "grad_norm": 0.1722390204668045, "learning_rate": 0.00124, "loss": 2.5264, "step": 414440 }, { "epoch": 0.8256765587147775, "grad_norm": 0.18006296455860138, "learning_rate": 0.0012397368876500715, "loss": 2.4928, "step": 414450 }, { "epoch": 0.8256964809384164, "grad_norm": 0.15361599624156952, "learning_rate": 0.0012394738663267383, "loss": 2.501, "step": 414460 }, { "epoch": 0.8257164031620553, "grad_norm": 0.1596936136484146, "learning_rate": 0.0012392109359355907, "loss": 2.5094, "step": 414470 }, { "epoch": 0.8257363253856943, "grad_norm": 0.15353858470916748, "learning_rate": 0.001238948096382382, "loss": 2.5087, "step": 414480 }, { "epoch": 0.8257562476093332, "grad_norm": 0.16430214047431946, "learning_rate": 0.001238685347573029, "loss": 2.5068, "step": 414490 }, { "epoch": 0.8257761698329721, "grad_norm": 0.17067886888980865, "learning_rate": 0.0012384226894136092, "loss": 2.5069, "step": 414500 }, { "epoch": 0.825796092056611, "grad_norm": 0.14354334771633148, "learning_rate": 0.0012381601218103637, "loss": 2.5279, "step": 414510 }, { "epoch": 0.8258160142802499, "grad_norm": 0.1699235886335373, "learning_rate": 0.0012378976446696939, "loss": 2.4967, "step": 414520 }, { "epoch": 0.8258359365038889, "grad_norm": 0.1434345841407776, "learning_rate": 0.0012376352578981633, "loss": 2.5151, "step": 414530 }, { "epoch": 0.8258558587275278, "grad_norm": 0.18160122632980347, "learning_rate": 0.0012373729614024952, "loss": 2.5082, "step": 414540 }, { "epoch": 0.8258757809511666, "grad_norm": 0.15458326041698456, "learning_rate": 0.001237110755089574, "loss": 2.5118, "step": 414550 }, { "epoch": 0.8258957031748055, "grad_norm": 0.14572569727897644, "learning_rate": 0.0012368486388664435, "loss": 2.5071, "step": 414560 }, { "epoch": 0.8259156253984444, "grad_norm": 0.1544044464826584, "learning_rate": 0.0012365866126403074, "loss": 2.5067, "step": 414570 }, { "epoch": 0.8259355476220834, "grad_norm": 0.17004787921905518, "learning_rate": 0.0012363246763185285, "loss": 2.503, "step": 414580 }, { "epoch": 0.8259554698457223, "grad_norm": 0.19173429906368256, "learning_rate": 0.001236062829808629, "loss": 2.526, "step": 414590 }, { "epoch": 0.8259753920693612, "grad_norm": 0.1660272777080536, "learning_rate": 0.001235801073018288, "loss": 2.5187, "step": 414600 }, { "epoch": 0.8259953142930001, "grad_norm": 0.14800062775611877, "learning_rate": 0.0012355394058553442, "loss": 2.5123, "step": 414610 }, { "epoch": 0.826015236516639, "grad_norm": 0.15086491405963898, "learning_rate": 0.0012352778282277935, "loss": 2.5197, "step": 414620 }, { "epoch": 0.826035158740278, "grad_norm": 0.18514133989810944, "learning_rate": 0.001235016340043789, "loss": 2.5147, "step": 414630 }, { "epoch": 0.8260550809639169, "grad_norm": 0.16010147333145142, "learning_rate": 0.0012347549412116403, "loss": 2.517, "step": 414640 }, { "epoch": 0.8260750031875558, "grad_norm": 0.19384117424488068, "learning_rate": 0.0012344936316398146, "loss": 2.506, "step": 414650 }, { "epoch": 0.8260949254111947, "grad_norm": 0.17194999754428864, "learning_rate": 0.001234232411236934, "loss": 2.5002, "step": 414660 }, { "epoch": 0.8261148476348336, "grad_norm": 0.1594114750623703, "learning_rate": 0.0012339712799117777, "loss": 2.5196, "step": 414670 }, { "epoch": 0.8261347698584726, "grad_norm": 0.1532030999660492, "learning_rate": 0.0012337102375732792, "loss": 2.5022, "step": 414680 }, { "epoch": 0.8261546920821115, "grad_norm": 0.15254619717597961, "learning_rate": 0.001233449284130528, "loss": 2.4996, "step": 414690 }, { "epoch": 0.8261746143057503, "grad_norm": 0.16012920439243317, "learning_rate": 0.0012331884194927674, "loss": 2.5077, "step": 414700 }, { "epoch": 0.8261945365293892, "grad_norm": 0.16086246073246002, "learning_rate": 0.0012329276435693957, "loss": 2.504, "step": 414710 }, { "epoch": 0.8262144587530281, "grad_norm": 0.1751800924539566, "learning_rate": 0.001232666956269965, "loss": 2.5115, "step": 414720 }, { "epoch": 0.8262343809766671, "grad_norm": 0.15652473270893097, "learning_rate": 0.0012324063575041807, "loss": 2.4919, "step": 414730 }, { "epoch": 0.826254303200306, "grad_norm": 0.19311313331127167, "learning_rate": 0.0012321458471819013, "loss": 2.5076, "step": 414740 }, { "epoch": 0.8262742254239449, "grad_norm": 0.1554311215877533, "learning_rate": 0.001231885425213139, "loss": 2.5007, "step": 414750 }, { "epoch": 0.8262941476475838, "grad_norm": 0.1475185602903366, "learning_rate": 0.0012316250915080582, "loss": 2.5054, "step": 414760 }, { "epoch": 0.8263140698712228, "grad_norm": 0.1696791797876358, "learning_rate": 0.0012313648459769747, "loss": 2.5036, "step": 414770 }, { "epoch": 0.8263339920948617, "grad_norm": 0.15692369639873505, "learning_rate": 0.0012311046885303564, "loss": 2.5023, "step": 414780 }, { "epoch": 0.8263539143185006, "grad_norm": 0.15474487841129303, "learning_rate": 0.0012308446190788236, "loss": 2.5071, "step": 414790 }, { "epoch": 0.8263738365421395, "grad_norm": 0.1529700607061386, "learning_rate": 0.0012305846375331461, "loss": 2.5005, "step": 414800 }, { "epoch": 0.8263937587657784, "grad_norm": 0.15367738902568817, "learning_rate": 0.0012303247438042457, "loss": 2.5038, "step": 414810 }, { "epoch": 0.8264136809894174, "grad_norm": 0.15901990234851837, "learning_rate": 0.001230064937803194, "loss": 2.5111, "step": 414820 }, { "epoch": 0.8264336032130563, "grad_norm": 0.15142589807510376, "learning_rate": 0.0012298052194412118, "loss": 2.4928, "step": 414830 }, { "epoch": 0.8264535254366951, "grad_norm": 0.15420852601528168, "learning_rate": 0.0012295455886296711, "loss": 2.5007, "step": 414840 }, { "epoch": 0.826473447660334, "grad_norm": 0.180233433842659, "learning_rate": 0.0012292860452800922, "loss": 2.5053, "step": 414850 }, { "epoch": 0.8264933698839729, "grad_norm": 0.1699274182319641, "learning_rate": 0.0012290265893041448, "loss": 2.4855, "step": 414860 }, { "epoch": 0.8265132921076119, "grad_norm": 0.13849523663520813, "learning_rate": 0.0012287672206136465, "loss": 2.498, "step": 414870 }, { "epoch": 0.8265332143312508, "grad_norm": 0.144430473446846, "learning_rate": 0.0012285079391205636, "loss": 2.5073, "step": 414880 }, { "epoch": 0.8265531365548897, "grad_norm": 0.16405370831489563, "learning_rate": 0.0012282487447370105, "loss": 2.5068, "step": 414890 }, { "epoch": 0.8265730587785286, "grad_norm": 0.184962198138237, "learning_rate": 0.0012279896373752486, "loss": 2.5023, "step": 414900 }, { "epoch": 0.8265929810021675, "grad_norm": 0.16529791057109833, "learning_rate": 0.001227730616947687, "loss": 2.5032, "step": 414910 }, { "epoch": 0.8266129032258065, "grad_norm": 0.16849052906036377, "learning_rate": 0.0012274716833668813, "loss": 2.5046, "step": 414920 }, { "epoch": 0.8266328254494454, "grad_norm": 0.1965417116880417, "learning_rate": 0.0012272128365455337, "loss": 2.5112, "step": 414930 }, { "epoch": 0.8266527476730843, "grad_norm": 0.16507233679294586, "learning_rate": 0.0012269540763964924, "loss": 2.5116, "step": 414940 }, { "epoch": 0.8266726698967232, "grad_norm": 0.19312763214111328, "learning_rate": 0.0012266954028327518, "loss": 2.5159, "step": 414950 }, { "epoch": 0.8266925921203621, "grad_norm": 0.17542436718940735, "learning_rate": 0.0012264368157674514, "loss": 2.5003, "step": 414960 }, { "epoch": 0.8267125143440011, "grad_norm": 0.14832721650600433, "learning_rate": 0.0012261783151138758, "loss": 2.4952, "step": 414970 }, { "epoch": 0.82673243656764, "grad_norm": 0.1618369221687317, "learning_rate": 0.001225919900785455, "loss": 2.4791, "step": 414980 }, { "epoch": 0.8267523587912788, "grad_norm": 0.14174285531044006, "learning_rate": 0.0012256615726957624, "loss": 2.5025, "step": 414990 }, { "epoch": 0.8267722810149177, "grad_norm": 0.16050510108470917, "learning_rate": 0.0012254033307585166, "loss": 2.5171, "step": 415000 }, { "epoch": 0.8267922032385566, "grad_norm": 0.14089541137218475, "learning_rate": 0.0012251451748875794, "loss": 2.5101, "step": 415010 }, { "epoch": 0.8268121254621956, "grad_norm": 0.16388443112373352, "learning_rate": 0.0012248871049969558, "loss": 2.5067, "step": 415020 }, { "epoch": 0.8268320476858345, "grad_norm": 0.15276354551315308, "learning_rate": 0.001224629121000795, "loss": 2.5111, "step": 415030 }, { "epoch": 0.8268519699094734, "grad_norm": 0.15780635178089142, "learning_rate": 0.0012243712228133875, "loss": 2.5049, "step": 415040 }, { "epoch": 0.8268718921331123, "grad_norm": 0.16128313541412354, "learning_rate": 0.0012241134103491672, "loss": 2.5061, "step": 415050 }, { "epoch": 0.8268918143567513, "grad_norm": 0.15473626554012299, "learning_rate": 0.0012238556835227098, "loss": 2.4991, "step": 415060 }, { "epoch": 0.8269117365803902, "grad_norm": 0.14959891140460968, "learning_rate": 0.0012235980422487332, "loss": 2.509, "step": 415070 }, { "epoch": 0.8269316588040291, "grad_norm": 0.1533479392528534, "learning_rate": 0.001223340486442096, "loss": 2.51, "step": 415080 }, { "epoch": 0.826951581027668, "grad_norm": 0.16409049928188324, "learning_rate": 0.0012230830160177987, "loss": 2.5015, "step": 415090 }, { "epoch": 0.8269715032513069, "grad_norm": 0.17058001458644867, "learning_rate": 0.001222825630890982, "loss": 2.5128, "step": 415100 }, { "epoch": 0.8269914254749459, "grad_norm": 0.15731202065944672, "learning_rate": 0.0012225683309769276, "loss": 2.5109, "step": 415110 }, { "epoch": 0.8270113476985848, "grad_norm": 0.16395451128482819, "learning_rate": 0.0012223111161910568, "loss": 2.4971, "step": 415120 }, { "epoch": 0.8270312699222236, "grad_norm": 0.15439294278621674, "learning_rate": 0.001222053986448931, "loss": 2.5042, "step": 415130 }, { "epoch": 0.8270511921458625, "grad_norm": 0.17338532209396362, "learning_rate": 0.001221796941666251, "loss": 2.5092, "step": 415140 }, { "epoch": 0.8270711143695014, "grad_norm": 0.154598206281662, "learning_rate": 0.0012215399817588576, "loss": 2.5107, "step": 415150 }, { "epoch": 0.8270910365931404, "grad_norm": 0.18447095155715942, "learning_rate": 0.0012212831066427286, "loss": 2.5005, "step": 415160 }, { "epoch": 0.8271109588167793, "grad_norm": 0.14491914212703705, "learning_rate": 0.0012210263162339822, "loss": 2.4963, "step": 415170 }, { "epoch": 0.8271308810404182, "grad_norm": 0.19584892690181732, "learning_rate": 0.001220769610448874, "loss": 2.4963, "step": 415180 }, { "epoch": 0.8271508032640571, "grad_norm": 0.1530306488275528, "learning_rate": 0.001220512989203797, "loss": 2.5052, "step": 415190 }, { "epoch": 0.827170725487696, "grad_norm": 0.18141578137874603, "learning_rate": 0.001220256452415283, "loss": 2.5144, "step": 415200 }, { "epoch": 0.827190647711335, "grad_norm": 0.19474634528160095, "learning_rate": 0.00122, "loss": 2.5144, "step": 415210 }, { "epoch": 0.8272105699349739, "grad_norm": 0.17003828287124634, "learning_rate": 0.0012197436318747536, "loss": 2.5174, "step": 415220 }, { "epoch": 0.8272304921586128, "grad_norm": 0.15982280671596527, "learning_rate": 0.0012194873479564859, "loss": 2.5046, "step": 415230 }, { "epoch": 0.8272504143822517, "grad_norm": 0.1705809235572815, "learning_rate": 0.0012192311481622746, "loss": 2.511, "step": 415240 }, { "epoch": 0.8272703366058906, "grad_norm": 0.15820589661598206, "learning_rate": 0.0012189750324093347, "loss": 2.5037, "step": 415250 }, { "epoch": 0.8272902588295296, "grad_norm": 0.14973647892475128, "learning_rate": 0.0012187190006150157, "loss": 2.4983, "step": 415260 }, { "epoch": 0.8273101810531684, "grad_norm": 0.15392082929611206, "learning_rate": 0.0012184630526968032, "loss": 2.5126, "step": 415270 }, { "epoch": 0.8273301032768073, "grad_norm": 0.15476229786872864, "learning_rate": 0.0012182071885723173, "loss": 2.497, "step": 415280 }, { "epoch": 0.8273500255004462, "grad_norm": 0.21410813927650452, "learning_rate": 0.001217951408159314, "loss": 2.5209, "step": 415290 }, { "epoch": 0.8273699477240851, "grad_norm": 0.17232929170131683, "learning_rate": 0.001217695711375682, "loss": 2.5122, "step": 415300 }, { "epoch": 0.8273898699477241, "grad_norm": 0.2100236862897873, "learning_rate": 0.0012174400981394458, "loss": 2.5181, "step": 415310 }, { "epoch": 0.827409792171363, "grad_norm": 0.16211433708667755, "learning_rate": 0.0012171845683687627, "loss": 2.4969, "step": 415320 }, { "epoch": 0.8274297143950019, "grad_norm": 0.1566064953804016, "learning_rate": 0.0012169291219819242, "loss": 2.5042, "step": 415330 }, { "epoch": 0.8274496366186408, "grad_norm": 0.1531820297241211, "learning_rate": 0.0012166737588973544, "loss": 2.5085, "step": 415340 }, { "epoch": 0.8274695588422798, "grad_norm": 0.15379618108272552, "learning_rate": 0.0012164184790336107, "loss": 2.505, "step": 415350 }, { "epoch": 0.8274894810659187, "grad_norm": 0.18452806770801544, "learning_rate": 0.001216163282309383, "loss": 2.5147, "step": 415360 }, { "epoch": 0.8275094032895576, "grad_norm": 0.15390001237392426, "learning_rate": 0.0012159081686434936, "loss": 2.4995, "step": 415370 }, { "epoch": 0.8275293255131965, "grad_norm": 0.16196370124816895, "learning_rate": 0.001215653137954897, "loss": 2.4988, "step": 415380 }, { "epoch": 0.8275492477368354, "grad_norm": 0.17419670522212982, "learning_rate": 0.0012153981901626787, "loss": 2.4996, "step": 415390 }, { "epoch": 0.8275691699604744, "grad_norm": 0.15702974796295166, "learning_rate": 0.0012151433251860566, "loss": 2.4902, "step": 415400 }, { "epoch": 0.8275890921841133, "grad_norm": 0.15721552073955536, "learning_rate": 0.0012148885429443792, "loss": 2.5132, "step": 415410 }, { "epoch": 0.8276090144077521, "grad_norm": 0.1658782809972763, "learning_rate": 0.0012146338433571256, "loss": 2.5138, "step": 415420 }, { "epoch": 0.827628936631391, "grad_norm": 0.17684689164161682, "learning_rate": 0.0012143792263439059, "loss": 2.5192, "step": 415430 }, { "epoch": 0.8276488588550299, "grad_norm": 0.22774545848369598, "learning_rate": 0.00121412469182446, "loss": 2.5079, "step": 415440 }, { "epoch": 0.8276687810786689, "grad_norm": 0.17149215936660767, "learning_rate": 0.0012138702397186583, "loss": 2.5157, "step": 415450 }, { "epoch": 0.8276887033023078, "grad_norm": 0.14235036075115204, "learning_rate": 0.0012136158699465002, "loss": 2.5132, "step": 415460 }, { "epoch": 0.8277086255259467, "grad_norm": 0.16036739945411682, "learning_rate": 0.0012133615824281145, "loss": 2.5011, "step": 415470 }, { "epoch": 0.8277285477495856, "grad_norm": 0.19377417862415314, "learning_rate": 0.00121310737708376, "loss": 2.4957, "step": 415480 }, { "epoch": 0.8277484699732245, "grad_norm": 0.14320020377635956, "learning_rate": 0.0012128532538338227, "loss": 2.5099, "step": 415490 }, { "epoch": 0.8277683921968635, "grad_norm": 0.18393585085868835, "learning_rate": 0.001212599212598819, "loss": 2.5067, "step": 415500 }, { "epoch": 0.8277883144205024, "grad_norm": 0.1605662852525711, "learning_rate": 0.0012123452532993913, "loss": 2.5164, "step": 415510 }, { "epoch": 0.8278082366441413, "grad_norm": 0.17844681441783905, "learning_rate": 0.0012120913758563118, "loss": 2.484, "step": 415520 }, { "epoch": 0.8278281588677802, "grad_norm": 0.166663259267807, "learning_rate": 0.0012118375801904789, "loss": 2.5137, "step": 415530 }, { "epoch": 0.827848081091419, "grad_norm": 0.15791092813014984, "learning_rate": 0.0012115838662229191, "loss": 2.5113, "step": 415540 }, { "epoch": 0.827868003315058, "grad_norm": 0.1848752200603485, "learning_rate": 0.0012113302338747861, "loss": 2.5075, "step": 415550 }, { "epoch": 0.827887925538697, "grad_norm": 0.16261784732341766, "learning_rate": 0.0012110766830673594, "loss": 2.5174, "step": 415560 }, { "epoch": 0.8279078477623358, "grad_norm": 0.15531283617019653, "learning_rate": 0.001210823213722046, "loss": 2.5113, "step": 415570 }, { "epoch": 0.8279277699859747, "grad_norm": 0.14711050689220428, "learning_rate": 0.001210569825760378, "loss": 2.5174, "step": 415580 }, { "epoch": 0.8279476922096136, "grad_norm": 0.15960188210010529, "learning_rate": 0.0012103165191040147, "loss": 2.4886, "step": 415590 }, { "epoch": 0.8279676144332526, "grad_norm": 0.15672531723976135, "learning_rate": 0.00121006329367474, "loss": 2.5038, "step": 415600 }, { "epoch": 0.8279875366568915, "grad_norm": 0.1648426353931427, "learning_rate": 0.0012098101493944636, "loss": 2.5026, "step": 415610 }, { "epoch": 0.8280074588805304, "grad_norm": 0.17784667015075684, "learning_rate": 0.0012095570861852198, "loss": 2.4928, "step": 415620 }, { "epoch": 0.8280273811041693, "grad_norm": 0.15600961446762085, "learning_rate": 0.0012093041039691684, "loss": 2.4958, "step": 415630 }, { "epoch": 0.8280473033278082, "grad_norm": 0.13808582723140717, "learning_rate": 0.0012090512026685925, "loss": 2.5083, "step": 415640 }, { "epoch": 0.8280672255514472, "grad_norm": 0.15496328473091125, "learning_rate": 0.0012087983822059007, "loss": 2.508, "step": 415650 }, { "epoch": 0.8280871477750861, "grad_norm": 0.17834872007369995, "learning_rate": 0.001208545642503625, "loss": 2.5043, "step": 415660 }, { "epoch": 0.828107069998725, "grad_norm": 0.15165185928344727, "learning_rate": 0.0012082929834844206, "loss": 2.5062, "step": 415670 }, { "epoch": 0.8281269922223639, "grad_norm": 0.5688374638557434, "learning_rate": 0.001208040405071067, "loss": 2.4961, "step": 415680 }, { "epoch": 0.8281469144460029, "grad_norm": 0.1681707799434662, "learning_rate": 0.0012077879071864658, "loss": 2.5104, "step": 415690 }, { "epoch": 0.8281668366696417, "grad_norm": 0.15745705366134644, "learning_rate": 0.0012075354897536422, "loss": 2.5155, "step": 415700 }, { "epoch": 0.8281867588932806, "grad_norm": 0.1878305822610855, "learning_rate": 0.0012072831526957436, "loss": 2.5069, "step": 415710 }, { "epoch": 0.8282066811169195, "grad_norm": 0.16299015283584595, "learning_rate": 0.0012070308959360395, "loss": 2.4936, "step": 415720 }, { "epoch": 0.8282266033405584, "grad_norm": 0.14324726164340973, "learning_rate": 0.0012067787193979225, "loss": 2.4933, "step": 415730 }, { "epoch": 0.8282465255641974, "grad_norm": 0.15222787857055664, "learning_rate": 0.0012065266230049051, "loss": 2.4916, "step": 415740 }, { "epoch": 0.8282664477878363, "grad_norm": 0.16714251041412354, "learning_rate": 0.0012062746066806226, "loss": 2.5205, "step": 415750 }, { "epoch": 0.8282863700114752, "grad_norm": 0.17448747158050537, "learning_rate": 0.0012060226703488317, "loss": 2.5078, "step": 415760 }, { "epoch": 0.8283062922351141, "grad_norm": 0.15993599593639374, "learning_rate": 0.0012057708139334088, "loss": 2.5017, "step": 415770 }, { "epoch": 0.828326214458753, "grad_norm": 0.154878169298172, "learning_rate": 0.0012055190373583518, "loss": 2.4984, "step": 415780 }, { "epoch": 0.828346136682392, "grad_norm": 0.1481126844882965, "learning_rate": 0.0012052673405477789, "loss": 2.519, "step": 415790 }, { "epoch": 0.8283660589060309, "grad_norm": 0.15679457783699036, "learning_rate": 0.0012050157234259286, "loss": 2.5034, "step": 415800 }, { "epoch": 0.8283859811296698, "grad_norm": 0.1542724370956421, "learning_rate": 0.0012047641859171583, "loss": 2.5209, "step": 415810 }, { "epoch": 0.8284059033533087, "grad_norm": 0.16007433831691742, "learning_rate": 0.0012045127279459464, "loss": 2.4969, "step": 415820 }, { "epoch": 0.8284258255769475, "grad_norm": 0.15970222651958466, "learning_rate": 0.0012042613494368895, "loss": 2.5001, "step": 415830 }, { "epoch": 0.8284457478005866, "grad_norm": 0.15941070020198822, "learning_rate": 0.001204010050314704, "loss": 2.5047, "step": 415840 }, { "epoch": 0.8284656700242254, "grad_norm": 0.14153754711151123, "learning_rate": 0.0012037588305042247, "loss": 2.5026, "step": 415850 }, { "epoch": 0.8284855922478643, "grad_norm": 0.17550228536128998, "learning_rate": 0.0012035076899304048, "loss": 2.5023, "step": 415860 }, { "epoch": 0.8285055144715032, "grad_norm": 0.1760772168636322, "learning_rate": 0.0012032566285183164, "loss": 2.4982, "step": 415870 }, { "epoch": 0.8285254366951421, "grad_norm": 0.18901483714580536, "learning_rate": 0.001203005646193149, "loss": 2.5027, "step": 415880 }, { "epoch": 0.8285453589187811, "grad_norm": 0.15965771675109863, "learning_rate": 0.00120275474288021, "loss": 2.5003, "step": 415890 }, { "epoch": 0.82856528114242, "grad_norm": 0.18023422360420227, "learning_rate": 0.0012025039185049246, "loss": 2.4985, "step": 415900 }, { "epoch": 0.8285852033660589, "grad_norm": 0.1586858034133911, "learning_rate": 0.001202253172992835, "loss": 2.511, "step": 415910 }, { "epoch": 0.8286051255896978, "grad_norm": 0.17146289348602295, "learning_rate": 0.0012020025062695998, "loss": 2.5065, "step": 415920 }, { "epoch": 0.8286250478133367, "grad_norm": 0.14610795676708221, "learning_rate": 0.001201751918260996, "loss": 2.4971, "step": 415930 }, { "epoch": 0.8286449700369757, "grad_norm": 0.4488724172115326, "learning_rate": 0.001201501408892915, "loss": 2.513, "step": 415940 }, { "epoch": 0.8286648922606146, "grad_norm": 0.15246015787124634, "learning_rate": 0.0012012509780913656, "loss": 2.4989, "step": 415950 }, { "epoch": 0.8286848144842535, "grad_norm": 0.1575099378824234, "learning_rate": 0.001201000625782473, "loss": 2.4943, "step": 415960 }, { "epoch": 0.8287047367078924, "grad_norm": 0.13671867549419403, "learning_rate": 0.0012007503518924767, "loss": 2.4992, "step": 415970 }, { "epoch": 0.8287246589315314, "grad_norm": 0.1701522171497345, "learning_rate": 0.0012005001563477327, "loss": 2.5071, "step": 415980 }, { "epoch": 0.8287445811551702, "grad_norm": 0.1885617971420288, "learning_rate": 0.001200250039074712, "loss": 2.5031, "step": 415990 }, { "epoch": 0.8287645033788091, "grad_norm": 0.15902812778949738, "learning_rate": 0.0012, "loss": 2.5212, "step": 416000 }, { "epoch": 0.828784425602448, "grad_norm": 0.15901122987270355, "learning_rate": 0.0011997500390502978, "loss": 2.5083, "step": 416010 }, { "epoch": 0.8288043478260869, "grad_norm": 0.15786109864711761, "learning_rate": 0.0011995001561524198, "loss": 2.4995, "step": 416020 }, { "epoch": 0.8288242700497259, "grad_norm": 0.14915403723716736, "learning_rate": 0.001199250351233296, "loss": 2.5202, "step": 416030 }, { "epoch": 0.8288441922733648, "grad_norm": 0.15201154351234436, "learning_rate": 0.0011990006242199684, "loss": 2.4959, "step": 416040 }, { "epoch": 0.8288641144970037, "grad_norm": 0.15504780411720276, "learning_rate": 0.0011987509750395948, "loss": 2.4988, "step": 416050 }, { "epoch": 0.8288840367206426, "grad_norm": 0.15184861421585083, "learning_rate": 0.0011985014036194448, "loss": 2.5086, "step": 416060 }, { "epoch": 0.8289039589442815, "grad_norm": 0.19867271184921265, "learning_rate": 0.0011982519098869022, "loss": 2.5013, "step": 416070 }, { "epoch": 0.8289238811679205, "grad_norm": 0.16209350526332855, "learning_rate": 0.0011980024937694631, "loss": 2.5041, "step": 416080 }, { "epoch": 0.8289438033915594, "grad_norm": 0.1660800725221634, "learning_rate": 0.001197753155194737, "loss": 2.5123, "step": 416090 }, { "epoch": 0.8289637256151983, "grad_norm": 0.14981868863105774, "learning_rate": 0.0011975038940904448, "loss": 2.4899, "step": 416100 }, { "epoch": 0.8289836478388372, "grad_norm": 0.17291061580181122, "learning_rate": 0.0011972547103844208, "loss": 2.4898, "step": 416110 }, { "epoch": 0.829003570062476, "grad_norm": 0.17707835137844086, "learning_rate": 0.0011970056040046108, "loss": 2.5011, "step": 416120 }, { "epoch": 0.829023492286115, "grad_norm": 0.16384181380271912, "learning_rate": 0.001196756574879072, "loss": 2.4926, "step": 416130 }, { "epoch": 0.8290434145097539, "grad_norm": 0.18127265572547913, "learning_rate": 0.0011965076229359735, "loss": 2.4923, "step": 416140 }, { "epoch": 0.8290633367333928, "grad_norm": 0.1573583334684372, "learning_rate": 0.0011962587481035953, "loss": 2.5061, "step": 416150 }, { "epoch": 0.8290832589570317, "grad_norm": 0.1451445072889328, "learning_rate": 0.0011960099503103287, "loss": 2.5063, "step": 416160 }, { "epoch": 0.8291031811806706, "grad_norm": 0.15804481506347656, "learning_rate": 0.001195761229484676, "loss": 2.5104, "step": 416170 }, { "epoch": 0.8291231034043096, "grad_norm": 0.16875815391540527, "learning_rate": 0.0011955125855552494, "loss": 2.5145, "step": 416180 }, { "epoch": 0.8291430256279485, "grad_norm": 0.16901014745235443, "learning_rate": 0.0011952640184507716, "loss": 2.5088, "step": 416190 }, { "epoch": 0.8291629478515874, "grad_norm": 0.17304444313049316, "learning_rate": 0.0011950155281000758, "loss": 2.5053, "step": 416200 }, { "epoch": 0.8291828700752263, "grad_norm": 0.18555738031864166, "learning_rate": 0.0011947671144321042, "loss": 2.5078, "step": 416210 }, { "epoch": 0.8292027922988652, "grad_norm": 0.15341131389141083, "learning_rate": 0.0011945187773759094, "loss": 2.495, "step": 416220 }, { "epoch": 0.8292227145225042, "grad_norm": 0.222602978348732, "learning_rate": 0.0011942705168606525, "loss": 2.4966, "step": 416230 }, { "epoch": 0.8292426367461431, "grad_norm": 0.171828955411911, "learning_rate": 0.0011940223328156048, "loss": 2.5108, "step": 416240 }, { "epoch": 0.829262558969782, "grad_norm": 0.15291234850883484, "learning_rate": 0.0011937742251701452, "loss": 2.5086, "step": 416250 }, { "epoch": 0.8292824811934209, "grad_norm": 0.1745070517063141, "learning_rate": 0.001193526193853762, "loss": 2.5063, "step": 416260 }, { "epoch": 0.8293024034170599, "grad_norm": 0.16326077282428741, "learning_rate": 0.0011932782387960516, "loss": 2.4981, "step": 416270 }, { "epoch": 0.8293223256406987, "grad_norm": 0.14547082781791687, "learning_rate": 0.0011930303599267194, "loss": 2.5098, "step": 416280 }, { "epoch": 0.8293422478643376, "grad_norm": 0.1436719000339508, "learning_rate": 0.0011927825571755775, "loss": 2.4991, "step": 416290 }, { "epoch": 0.8293621700879765, "grad_norm": 0.1690998077392578, "learning_rate": 0.001192534830472546, "loss": 2.4879, "step": 416300 }, { "epoch": 0.8293820923116154, "grad_norm": 0.1609696000814438, "learning_rate": 0.0011922871797476532, "loss": 2.5051, "step": 416310 }, { "epoch": 0.8294020145352544, "grad_norm": 0.17579172551631927, "learning_rate": 0.0011920396049310339, "loss": 2.4992, "step": 416320 }, { "epoch": 0.8294219367588933, "grad_norm": 0.16966703534126282, "learning_rate": 0.00119179210595293, "loss": 2.5077, "step": 416330 }, { "epoch": 0.8294418589825322, "grad_norm": 0.1915077269077301, "learning_rate": 0.0011915446827436905, "loss": 2.513, "step": 416340 }, { "epoch": 0.8294617812061711, "grad_norm": 0.1676548421382904, "learning_rate": 0.0011912973352337709, "loss": 2.5165, "step": 416350 }, { "epoch": 0.82948170342981, "grad_norm": 0.15527424216270447, "learning_rate": 0.0011910500633537328, "loss": 2.4791, "step": 416360 }, { "epoch": 0.829501625653449, "grad_norm": 0.19299525022506714, "learning_rate": 0.0011908028670342436, "loss": 2.4918, "step": 416370 }, { "epoch": 0.8295215478770879, "grad_norm": 0.18224173784255981, "learning_rate": 0.0011905557462060777, "loss": 2.5149, "step": 416380 }, { "epoch": 0.8295414701007268, "grad_norm": 0.1751825213432312, "learning_rate": 0.0011903087008001136, "loss": 2.5007, "step": 416390 }, { "epoch": 0.8295613923243657, "grad_norm": 0.18872350454330444, "learning_rate": 0.0011900617307473364, "loss": 2.498, "step": 416400 }, { "epoch": 0.8295813145480045, "grad_norm": 0.1522427797317505, "learning_rate": 0.0011898148359788363, "loss": 2.4996, "step": 416410 }, { "epoch": 0.8296012367716435, "grad_norm": 0.1619197279214859, "learning_rate": 0.0011895680164258076, "loss": 2.5027, "step": 416420 }, { "epoch": 0.8296211589952824, "grad_norm": 0.16091591119766235, "learning_rate": 0.0011893212720195502, "loss": 2.5072, "step": 416430 }, { "epoch": 0.8296410812189213, "grad_norm": 0.17199894785881042, "learning_rate": 0.0011890746026914686, "loss": 2.499, "step": 416440 }, { "epoch": 0.8296610034425602, "grad_norm": 0.17201894521713257, "learning_rate": 0.0011888280083730703, "loss": 2.505, "step": 416450 }, { "epoch": 0.8296809256661991, "grad_norm": 0.1793392151594162, "learning_rate": 0.0011885814889959683, "loss": 2.5084, "step": 416460 }, { "epoch": 0.8297008478898381, "grad_norm": 0.16989557445049286, "learning_rate": 0.001188335044491879, "loss": 2.5104, "step": 416470 }, { "epoch": 0.829720770113477, "grad_norm": 0.184933140873909, "learning_rate": 0.0011880886747926225, "loss": 2.5005, "step": 416480 }, { "epoch": 0.8297406923371159, "grad_norm": 0.17317929863929749, "learning_rate": 0.001187842379830122, "loss": 2.5232, "step": 416490 }, { "epoch": 0.8297606145607548, "grad_norm": 0.16527162492275238, "learning_rate": 0.001187596159536404, "loss": 2.5116, "step": 416500 }, { "epoch": 0.8297805367843937, "grad_norm": 0.14614899456501007, "learning_rate": 0.0011873500138435981, "loss": 2.5203, "step": 416510 }, { "epoch": 0.8298004590080327, "grad_norm": 0.1609712839126587, "learning_rate": 0.0011871039426839369, "loss": 2.4954, "step": 416520 }, { "epoch": 0.8298203812316716, "grad_norm": 0.17121680080890656, "learning_rate": 0.0011868579459897552, "loss": 2.5089, "step": 416530 }, { "epoch": 0.8298403034553105, "grad_norm": 0.15893901884555817, "learning_rate": 0.00118661202369349, "loss": 2.5071, "step": 416540 }, { "epoch": 0.8298602256789493, "grad_norm": 0.16354753077030182, "learning_rate": 0.0011863661757276806, "loss": 2.5039, "step": 416550 }, { "epoch": 0.8298801479025883, "grad_norm": 0.17595277726650238, "learning_rate": 0.001186120402024968, "loss": 2.5081, "step": 416560 }, { "epoch": 0.8299000701262272, "grad_norm": 0.16972263157367706, "learning_rate": 0.0011858747025180953, "loss": 2.5103, "step": 416570 }, { "epoch": 0.8299199923498661, "grad_norm": 0.1639769822359085, "learning_rate": 0.0011856290771399068, "loss": 2.5027, "step": 416580 }, { "epoch": 0.829939914573505, "grad_norm": 0.153351828455925, "learning_rate": 0.001185383525823348, "loss": 2.5071, "step": 416590 }, { "epoch": 0.8299598367971439, "grad_norm": 0.14368936419487, "learning_rate": 0.0011851380485014653, "loss": 2.505, "step": 416600 }, { "epoch": 0.8299797590207829, "grad_norm": 0.15648333728313446, "learning_rate": 0.0011848926451074066, "loss": 2.5004, "step": 416610 }, { "epoch": 0.8299996812444218, "grad_norm": 0.16296067833900452, "learning_rate": 0.0011846473155744197, "loss": 2.5018, "step": 416620 }, { "epoch": 0.8300196034680607, "grad_norm": 0.17241539061069489, "learning_rate": 0.0011844020598358526, "loss": 2.5011, "step": 416630 }, { "epoch": 0.8300395256916996, "grad_norm": 0.17338930070400238, "learning_rate": 0.0011841568778251546, "loss": 2.5055, "step": 416640 }, { "epoch": 0.8300594479153385, "grad_norm": 0.14475074410438538, "learning_rate": 0.0011839117694758734, "loss": 2.5088, "step": 416650 }, { "epoch": 0.8300793701389775, "grad_norm": 0.16443674266338348, "learning_rate": 0.001183666734721658, "loss": 2.4951, "step": 416660 }, { "epoch": 0.8300992923626164, "grad_norm": 0.16018779575824738, "learning_rate": 0.001183421773496256, "loss": 2.5022, "step": 416670 }, { "epoch": 0.8301192145862553, "grad_norm": 0.1414119452238083, "learning_rate": 0.0011831768857335145, "loss": 2.5189, "step": 416680 }, { "epoch": 0.8301391368098942, "grad_norm": 0.16557282209396362, "learning_rate": 0.0011829320713673801, "loss": 2.4964, "step": 416690 }, { "epoch": 0.830159059033533, "grad_norm": 0.1574593335390091, "learning_rate": 0.0011826873303318979, "loss": 2.5003, "step": 416700 }, { "epoch": 0.830178981257172, "grad_norm": 0.17067183554172516, "learning_rate": 0.0011824426625612122, "loss": 2.5066, "step": 416710 }, { "epoch": 0.8301989034808109, "grad_norm": 0.17185409367084503, "learning_rate": 0.0011821980679895651, "loss": 2.5269, "step": 416720 }, { "epoch": 0.8302188257044498, "grad_norm": 0.14342282712459564, "learning_rate": 0.0011819535465512976, "loss": 2.5058, "step": 416730 }, { "epoch": 0.8302387479280887, "grad_norm": 0.15867941081523895, "learning_rate": 0.0011817090981808486, "loss": 2.5074, "step": 416740 }, { "epoch": 0.8302586701517276, "grad_norm": 0.18463090062141418, "learning_rate": 0.001181464722812755, "loss": 2.5261, "step": 416750 }, { "epoch": 0.8302785923753666, "grad_norm": 0.18852393329143524, "learning_rate": 0.0011812204203816513, "loss": 2.5152, "step": 416760 }, { "epoch": 0.8302985145990055, "grad_norm": 0.1494060456752777, "learning_rate": 0.0011809761908222695, "loss": 2.5032, "step": 416770 }, { "epoch": 0.8303184368226444, "grad_norm": 0.1984427124261856, "learning_rate": 0.0011807320340694383, "loss": 2.5097, "step": 416780 }, { "epoch": 0.8303383590462833, "grad_norm": 0.1548834890127182, "learning_rate": 0.001180487950058085, "loss": 2.5041, "step": 416790 }, { "epoch": 0.8303582812699222, "grad_norm": 0.1610426902770996, "learning_rate": 0.001180243938723232, "loss": 2.499, "step": 416800 }, { "epoch": 0.8303782034935612, "grad_norm": 0.17015232145786285, "learning_rate": 0.0011800000000000003, "loss": 2.5105, "step": 416810 }, { "epoch": 0.8303981257172001, "grad_norm": 0.17195744812488556, "learning_rate": 0.0011797561338236049, "loss": 2.5077, "step": 416820 }, { "epoch": 0.830418047940839, "grad_norm": 0.152188241481781, "learning_rate": 0.0011795123401293593, "loss": 2.516, "step": 416830 }, { "epoch": 0.8304379701644778, "grad_norm": 0.14898669719696045, "learning_rate": 0.0011792686188526725, "loss": 2.5053, "step": 416840 }, { "epoch": 0.8304578923881168, "grad_norm": 0.15727348625659943, "learning_rate": 0.0011790249699290482, "loss": 2.4999, "step": 416850 }, { "epoch": 0.8304778146117557, "grad_norm": 0.1665448397397995, "learning_rate": 0.0011787813932940877, "loss": 2.5138, "step": 416860 }, { "epoch": 0.8304977368353946, "grad_norm": 0.16429275274276733, "learning_rate": 0.0011785378888834858, "loss": 2.4964, "step": 416870 }, { "epoch": 0.8305176590590335, "grad_norm": 0.16985465586185455, "learning_rate": 0.0011782944566330344, "loss": 2.5004, "step": 416880 }, { "epoch": 0.8305375812826724, "grad_norm": 0.15067803859710693, "learning_rate": 0.0011780510964786192, "loss": 2.4997, "step": 416890 }, { "epoch": 0.8305575035063114, "grad_norm": 0.14080071449279785, "learning_rate": 0.0011778078083562213, "loss": 2.4961, "step": 416900 }, { "epoch": 0.8305774257299503, "grad_norm": 0.17241574823856354, "learning_rate": 0.0011775645922019165, "loss": 2.5074, "step": 416910 }, { "epoch": 0.8305973479535892, "grad_norm": 0.18013694882392883, "learning_rate": 0.0011773214479518749, "loss": 2.5122, "step": 416920 }, { "epoch": 0.8306172701772281, "grad_norm": 0.16315235197544098, "learning_rate": 0.0011770783755423607, "loss": 2.5037, "step": 416930 }, { "epoch": 0.830637192400867, "grad_norm": 0.14874565601348877, "learning_rate": 0.0011768353749097328, "loss": 2.5056, "step": 416940 }, { "epoch": 0.830657114624506, "grad_norm": 0.15751099586486816, "learning_rate": 0.001176592445990444, "loss": 2.5045, "step": 416950 }, { "epoch": 0.8306770368481449, "grad_norm": 0.17556075751781464, "learning_rate": 0.00117634958872104, "loss": 2.4984, "step": 416960 }, { "epoch": 0.8306969590717838, "grad_norm": 0.18099616467952728, "learning_rate": 0.0011761068030381608, "loss": 2.4984, "step": 416970 }, { "epoch": 0.8307168812954226, "grad_norm": 0.1733371466398239, "learning_rate": 0.0011758640888785395, "loss": 2.4936, "step": 416980 }, { "epoch": 0.8307368035190615, "grad_norm": 0.1448121964931488, "learning_rate": 0.0011756214461790023, "loss": 2.5165, "step": 416990 }, { "epoch": 0.8307567257427005, "grad_norm": 0.17026948928833008, "learning_rate": 0.001175378874876468, "loss": 2.5111, "step": 417000 }, { "epoch": 0.8307766479663394, "grad_norm": 0.17142559587955475, "learning_rate": 0.0011751363749079489, "loss": 2.5199, "step": 417010 }, { "epoch": 0.8307965701899783, "grad_norm": 0.15534673631191254, "learning_rate": 0.0011748939462105494, "loss": 2.4998, "step": 417020 }, { "epoch": 0.8308164924136172, "grad_norm": 0.15800216794013977, "learning_rate": 0.0011746515887214662, "loss": 2.4996, "step": 417030 }, { "epoch": 0.8308364146372561, "grad_norm": 0.15679626166820526, "learning_rate": 0.0011744093023779883, "loss": 2.4923, "step": 417040 }, { "epoch": 0.8308563368608951, "grad_norm": 0.17134833335876465, "learning_rate": 0.001174167087117497, "loss": 2.5122, "step": 417050 }, { "epoch": 0.830876259084534, "grad_norm": 0.15529818832874298, "learning_rate": 0.0011739249428774645, "loss": 2.4989, "step": 417060 }, { "epoch": 0.8308961813081729, "grad_norm": 0.15157140791416168, "learning_rate": 0.001173682869595456, "loss": 2.5007, "step": 417070 }, { "epoch": 0.8309161035318118, "grad_norm": 0.1579800397157669, "learning_rate": 0.001173440867209127, "loss": 2.5107, "step": 417080 }, { "epoch": 0.8309360257554507, "grad_norm": 0.18941032886505127, "learning_rate": 0.0011731989356562245, "loss": 2.5023, "step": 417090 }, { "epoch": 0.8309559479790897, "grad_norm": 0.1600729078054428, "learning_rate": 0.0011729570748745869, "loss": 2.4853, "step": 417100 }, { "epoch": 0.8309758702027286, "grad_norm": 0.18967077136039734, "learning_rate": 0.0011727152848021425, "loss": 2.4911, "step": 417110 }, { "epoch": 0.8309957924263675, "grad_norm": 0.17228032648563385, "learning_rate": 0.001172473565376912, "loss": 2.4984, "step": 417120 }, { "epoch": 0.8310157146500063, "grad_norm": 0.1483227163553238, "learning_rate": 0.001172231916537005, "loss": 2.4953, "step": 417130 }, { "epoch": 0.8310356368736452, "grad_norm": 0.17679952085018158, "learning_rate": 0.0011719903382206218, "loss": 2.4948, "step": 417140 }, { "epoch": 0.8310555590972842, "grad_norm": 0.16191822290420532, "learning_rate": 0.0011717488303660537, "loss": 2.5122, "step": 417150 }, { "epoch": 0.8310754813209231, "grad_norm": 0.189957857131958, "learning_rate": 0.0011715073929116809, "loss": 2.4975, "step": 417160 }, { "epoch": 0.831095403544562, "grad_norm": 0.16955822706222534, "learning_rate": 0.0011712660257959734, "loss": 2.5043, "step": 417170 }, { "epoch": 0.8311153257682009, "grad_norm": 0.17466549575328827, "learning_rate": 0.0011710247289574917, "loss": 2.5069, "step": 417180 }, { "epoch": 0.8311352479918399, "grad_norm": 0.14790457487106323, "learning_rate": 0.001170783502334885, "loss": 2.5095, "step": 417190 }, { "epoch": 0.8311551702154788, "grad_norm": 0.16529643535614014, "learning_rate": 0.0011705423458668914, "loss": 2.5114, "step": 417200 }, { "epoch": 0.8311750924391177, "grad_norm": 0.17430399358272552, "learning_rate": 0.0011703012594923385, "loss": 2.4989, "step": 417210 }, { "epoch": 0.8311950146627566, "grad_norm": 0.1434645801782608, "learning_rate": 0.001170060243150143, "loss": 2.5187, "step": 417220 }, { "epoch": 0.8312149368863955, "grad_norm": 0.16276367008686066, "learning_rate": 0.0011698192967793096, "loss": 2.5028, "step": 417230 }, { "epoch": 0.8312348591100345, "grad_norm": 0.17884919047355652, "learning_rate": 0.0011695784203189321, "loss": 2.5102, "step": 417240 }, { "epoch": 0.8312547813336734, "grad_norm": 0.14250153303146362, "learning_rate": 0.0011693376137081927, "loss": 2.5078, "step": 417250 }, { "epoch": 0.8312747035573123, "grad_norm": 0.1804332286119461, "learning_rate": 0.0011690968768863605, "loss": 2.51, "step": 417260 }, { "epoch": 0.8312946257809511, "grad_norm": 0.15777376294136047, "learning_rate": 0.0011688562097927943, "loss": 2.4907, "step": 417270 }, { "epoch": 0.83131454800459, "grad_norm": 0.15857544541358948, "learning_rate": 0.0011686156123669389, "loss": 2.5131, "step": 417280 }, { "epoch": 0.831334470228229, "grad_norm": 0.14149542152881622, "learning_rate": 0.0011683750845483286, "loss": 2.5041, "step": 417290 }, { "epoch": 0.8313543924518679, "grad_norm": 0.1798512488603592, "learning_rate": 0.0011681346262765833, "loss": 2.5054, "step": 417300 }, { "epoch": 0.8313743146755068, "grad_norm": 0.159232035279274, "learning_rate": 0.0011678942374914111, "loss": 2.5063, "step": 417310 }, { "epoch": 0.8313942368991457, "grad_norm": 0.1611430048942566, "learning_rate": 0.0011676539181326075, "loss": 2.5027, "step": 417320 }, { "epoch": 0.8314141591227846, "grad_norm": 0.15808707475662231, "learning_rate": 0.0011674136681400543, "loss": 2.491, "step": 417330 }, { "epoch": 0.8314340813464236, "grad_norm": 0.15394340455532074, "learning_rate": 0.0011671734874537194, "loss": 2.5076, "step": 417340 }, { "epoch": 0.8314540035700625, "grad_norm": 0.15468356013298035, "learning_rate": 0.001166933376013659, "loss": 2.4903, "step": 417350 }, { "epoch": 0.8314739257937014, "grad_norm": 0.15345247089862823, "learning_rate": 0.0011666933337600137, "loss": 2.4943, "step": 417360 }, { "epoch": 0.8314938480173403, "grad_norm": 0.15287448465824127, "learning_rate": 0.0011664533606330118, "loss": 2.498, "step": 417370 }, { "epoch": 0.8315137702409792, "grad_norm": 0.1650836318731308, "learning_rate": 0.0011662134565729666, "loss": 2.4925, "step": 417380 }, { "epoch": 0.8315336924646182, "grad_norm": 0.1517283022403717, "learning_rate": 0.0011659736215202782, "loss": 2.5017, "step": 417390 }, { "epoch": 0.8315536146882571, "grad_norm": 0.18089736998081207, "learning_rate": 0.0011657338554154318, "loss": 2.5081, "step": 417400 }, { "epoch": 0.831573536911896, "grad_norm": 0.16265869140625, "learning_rate": 0.0011654941581989973, "loss": 2.4963, "step": 417410 }, { "epoch": 0.8315934591355348, "grad_norm": 0.20105423033237457, "learning_rate": 0.001165254529811632, "loss": 2.5174, "step": 417420 }, { "epoch": 0.8316133813591737, "grad_norm": 0.17052516341209412, "learning_rate": 0.001165014970194076, "loss": 2.5074, "step": 417430 }, { "epoch": 0.8316333035828127, "grad_norm": 0.1639917641878128, "learning_rate": 0.0011647754792871558, "loss": 2.5016, "step": 417440 }, { "epoch": 0.8316532258064516, "grad_norm": 0.16614732146263123, "learning_rate": 0.001164536057031783, "loss": 2.4911, "step": 417450 }, { "epoch": 0.8316731480300905, "grad_norm": 0.1573503613471985, "learning_rate": 0.001164296703368953, "loss": 2.5126, "step": 417460 }, { "epoch": 0.8316930702537294, "grad_norm": 0.15278024971485138, "learning_rate": 0.0011640574182397454, "loss": 2.5049, "step": 417470 }, { "epoch": 0.8317129924773684, "grad_norm": 0.14848490059375763, "learning_rate": 0.001163818201585325, "loss": 2.5101, "step": 417480 }, { "epoch": 0.8317329147010073, "grad_norm": 0.16050924360752106, "learning_rate": 0.0011635790533469407, "loss": 2.4908, "step": 417490 }, { "epoch": 0.8317528369246462, "grad_norm": 0.175228551030159, "learning_rate": 0.0011633399734659244, "loss": 2.4904, "step": 417500 }, { "epoch": 0.8317727591482851, "grad_norm": 0.21021881699562073, "learning_rate": 0.001163100961883693, "loss": 2.5094, "step": 417510 }, { "epoch": 0.831792681371924, "grad_norm": 0.19743579626083374, "learning_rate": 0.001162862018541746, "loss": 2.5117, "step": 417520 }, { "epoch": 0.831812603595563, "grad_norm": 0.18607263267040253, "learning_rate": 0.001162623143381667, "loss": 2.496, "step": 417530 }, { "epoch": 0.8318325258192019, "grad_norm": 0.15201976895332336, "learning_rate": 0.0011623843363451232, "loss": 2.5029, "step": 417540 }, { "epoch": 0.8318524480428408, "grad_norm": 0.15635281801223755, "learning_rate": 0.0011621455973738635, "loss": 2.5079, "step": 417550 }, { "epoch": 0.8318723702664796, "grad_norm": 0.21324418485164642, "learning_rate": 0.0011619069264097216, "loss": 2.5046, "step": 417560 }, { "epoch": 0.8318922924901185, "grad_norm": 0.15970034897327423, "learning_rate": 0.0011616683233946126, "loss": 2.5116, "step": 417570 }, { "epoch": 0.8319122147137575, "grad_norm": 0.1439165472984314, "learning_rate": 0.0011614297882705346, "loss": 2.5138, "step": 417580 }, { "epoch": 0.8319321369373964, "grad_norm": 0.16147387027740479, "learning_rate": 0.0011611913209795693, "loss": 2.5175, "step": 417590 }, { "epoch": 0.8319520591610353, "grad_norm": 0.166578009724617, "learning_rate": 0.0011609529214638788, "loss": 2.5062, "step": 417600 }, { "epoch": 0.8319719813846742, "grad_norm": 0.16128867864608765, "learning_rate": 0.0011607145896657086, "loss": 2.5043, "step": 417610 }, { "epoch": 0.8319919036083131, "grad_norm": 0.16234590113162994, "learning_rate": 0.0011604763255273858, "loss": 2.512, "step": 417620 }, { "epoch": 0.8320118258319521, "grad_norm": 0.17479926347732544, "learning_rate": 0.0011602381289913192, "loss": 2.4881, "step": 417630 }, { "epoch": 0.832031748055591, "grad_norm": 0.18300610780715942, "learning_rate": 0.0011600000000000002, "loss": 2.499, "step": 417640 }, { "epoch": 0.8320516702792299, "grad_norm": 0.15797074139118195, "learning_rate": 0.001159761938496, "loss": 2.5157, "step": 417650 }, { "epoch": 0.8320715925028688, "grad_norm": 0.15708434581756592, "learning_rate": 0.0011595239444219722, "loss": 2.4938, "step": 417660 }, { "epoch": 0.8320915147265077, "grad_norm": 0.19068747758865356, "learning_rate": 0.0011592860177206518, "loss": 2.5126, "step": 417670 }, { "epoch": 0.8321114369501467, "grad_norm": 0.16521821916103363, "learning_rate": 0.0011590481583348545, "loss": 2.4987, "step": 417680 }, { "epoch": 0.8321313591737856, "grad_norm": 0.1665377914905548, "learning_rate": 0.0011588103662074764, "loss": 2.502, "step": 417690 }, { "epoch": 0.8321512813974244, "grad_norm": 0.16250728070735931, "learning_rate": 0.001158572641281495, "loss": 2.5087, "step": 417700 }, { "epoch": 0.8321712036210633, "grad_norm": 0.1621679961681366, "learning_rate": 0.0011583349834999675, "loss": 2.4972, "step": 417710 }, { "epoch": 0.8321911258447022, "grad_norm": 0.1848878711462021, "learning_rate": 0.0011580973928060326, "loss": 2.5152, "step": 417720 }, { "epoch": 0.8322110480683412, "grad_norm": 0.14295826852321625, "learning_rate": 0.001157859869142908, "loss": 2.5019, "step": 417730 }, { "epoch": 0.8322309702919801, "grad_norm": 0.14684441685676575, "learning_rate": 0.0011576224124538925, "loss": 2.4886, "step": 417740 }, { "epoch": 0.832250892515619, "grad_norm": 0.14687971770763397, "learning_rate": 0.0011573850226823642, "loss": 2.5016, "step": 417750 }, { "epoch": 0.8322708147392579, "grad_norm": 0.1904318630695343, "learning_rate": 0.001157147699771781, "loss": 2.5047, "step": 417760 }, { "epoch": 0.8322907369628969, "grad_norm": 0.1491335779428482, "learning_rate": 0.0011569104436656803, "loss": 2.5009, "step": 417770 }, { "epoch": 0.8323106591865358, "grad_norm": 0.159675732254982, "learning_rate": 0.0011566732543076795, "loss": 2.5086, "step": 417780 }, { "epoch": 0.8323305814101747, "grad_norm": 0.15300118923187256, "learning_rate": 0.0011564361316414745, "loss": 2.5101, "step": 417790 }, { "epoch": 0.8323505036338136, "grad_norm": 0.1713639795780182, "learning_rate": 0.0011561990756108406, "loss": 2.5029, "step": 417800 }, { "epoch": 0.8323704258574525, "grad_norm": 0.17435480654239655, "learning_rate": 0.0011559620861596323, "loss": 2.5132, "step": 417810 }, { "epoch": 0.8323903480810915, "grad_norm": 0.18782760202884674, "learning_rate": 0.0011557251632317827, "loss": 2.4969, "step": 417820 }, { "epoch": 0.8324102703047304, "grad_norm": 0.14737871289253235, "learning_rate": 0.0011554883067713035, "loss": 2.4975, "step": 417830 }, { "epoch": 0.8324301925283693, "grad_norm": 0.16552796959877014, "learning_rate": 0.0011552515167222851, "loss": 2.4995, "step": 417840 }, { "epoch": 0.8324501147520081, "grad_norm": 0.16791366040706635, "learning_rate": 0.0011550147930288956, "loss": 2.4977, "step": 417850 }, { "epoch": 0.832470036975647, "grad_norm": 0.16512323915958405, "learning_rate": 0.0011547781356353825, "loss": 2.5014, "step": 417860 }, { "epoch": 0.832489959199286, "grad_norm": 0.1698048859834671, "learning_rate": 0.00115454154448607, "loss": 2.4939, "step": 417870 }, { "epoch": 0.8325098814229249, "grad_norm": 0.15513408184051514, "learning_rate": 0.0011543050195253611, "loss": 2.501, "step": 417880 }, { "epoch": 0.8325298036465638, "grad_norm": 0.1490403264760971, "learning_rate": 0.001154068560697736, "loss": 2.5028, "step": 417890 }, { "epoch": 0.8325497258702027, "grad_norm": 0.15075597167015076, "learning_rate": 0.0011538321679477527, "loss": 2.4976, "step": 417900 }, { "epoch": 0.8325696480938416, "grad_norm": 0.1715918779373169, "learning_rate": 0.0011535958412200468, "loss": 2.4941, "step": 417910 }, { "epoch": 0.8325895703174806, "grad_norm": 0.16923922300338745, "learning_rate": 0.001153359580459331, "loss": 2.5166, "step": 417920 }, { "epoch": 0.8326094925411195, "grad_norm": 0.17612890899181366, "learning_rate": 0.0011531233856103948, "loss": 2.5019, "step": 417930 }, { "epoch": 0.8326294147647584, "grad_norm": 0.14989055693149567, "learning_rate": 0.0011528872566181052, "loss": 2.5078, "step": 417940 }, { "epoch": 0.8326493369883973, "grad_norm": 0.17315682768821716, "learning_rate": 0.0011526511934274058, "loss": 2.5146, "step": 417950 }, { "epoch": 0.8326692592120362, "grad_norm": 0.18272094428539276, "learning_rate": 0.0011524151959833163, "loss": 2.516, "step": 417960 }, { "epoch": 0.8326891814356752, "grad_norm": 0.18220283091068268, "learning_rate": 0.0011521792642309344, "loss": 2.505, "step": 417970 }, { "epoch": 0.832709103659314, "grad_norm": 0.1764015108346939, "learning_rate": 0.0011519433981154324, "loss": 2.5013, "step": 417980 }, { "epoch": 0.832729025882953, "grad_norm": 0.15973567962646484, "learning_rate": 0.00115170759758206, "loss": 2.4862, "step": 417990 }, { "epoch": 0.8327489481065918, "grad_norm": 0.14118267595767975, "learning_rate": 0.001151471862576143, "loss": 2.4883, "step": 418000 }, { "epoch": 0.8327688703302307, "grad_norm": 0.15817256271839142, "learning_rate": 0.0011512361930430823, "loss": 2.5017, "step": 418010 }, { "epoch": 0.8327887925538697, "grad_norm": 0.1956692934036255, "learning_rate": 0.001151000588928355, "loss": 2.4983, "step": 418020 }, { "epoch": 0.8328087147775086, "grad_norm": 0.1554996520280838, "learning_rate": 0.0011507650501775143, "loss": 2.4961, "step": 418030 }, { "epoch": 0.8328286370011475, "grad_norm": 0.1628435254096985, "learning_rate": 0.0011505295767361878, "loss": 2.4988, "step": 418040 }, { "epoch": 0.8328485592247864, "grad_norm": 0.16145910322666168, "learning_rate": 0.0011502941685500798, "loss": 2.483, "step": 418050 }, { "epoch": 0.8328684814484254, "grad_norm": 0.16322147846221924, "learning_rate": 0.0011500588255649688, "loss": 2.5027, "step": 418060 }, { "epoch": 0.8328884036720643, "grad_norm": 0.1569071263074875, "learning_rate": 0.0011498235477267087, "loss": 2.4909, "step": 418070 }, { "epoch": 0.8329083258957032, "grad_norm": 0.17062833905220032, "learning_rate": 0.001149588334981228, "loss": 2.5182, "step": 418080 }, { "epoch": 0.8329282481193421, "grad_norm": 0.14250123500823975, "learning_rate": 0.0011493531872745306, "loss": 2.4942, "step": 418090 }, { "epoch": 0.832948170342981, "grad_norm": 0.1838841289281845, "learning_rate": 0.0011491181045526942, "loss": 2.5026, "step": 418100 }, { "epoch": 0.83296809256662, "grad_norm": 0.17244774103164673, "learning_rate": 0.0011488830867618716, "loss": 2.501, "step": 418110 }, { "epoch": 0.8329880147902589, "grad_norm": 0.16339056193828583, "learning_rate": 0.0011486481338482894, "loss": 2.4869, "step": 418120 }, { "epoch": 0.8330079370138977, "grad_norm": 0.16998571157455444, "learning_rate": 0.0011484132457582495, "loss": 2.4991, "step": 418130 }, { "epoch": 0.8330278592375366, "grad_norm": 0.1546918749809265, "learning_rate": 0.001148178422438126, "loss": 2.4918, "step": 418140 }, { "epoch": 0.8330477814611755, "grad_norm": 0.17780190706253052, "learning_rate": 0.0011479436638343683, "loss": 2.5133, "step": 418150 }, { "epoch": 0.8330677036848145, "grad_norm": 0.16715508699417114, "learning_rate": 0.0011477089698934994, "loss": 2.5059, "step": 418160 }, { "epoch": 0.8330876259084534, "grad_norm": 0.1420063078403473, "learning_rate": 0.0011474743405621153, "loss": 2.5212, "step": 418170 }, { "epoch": 0.8331075481320923, "grad_norm": 0.15758836269378662, "learning_rate": 0.001147239775786886, "loss": 2.5133, "step": 418180 }, { "epoch": 0.8331274703557312, "grad_norm": 0.16518132388591766, "learning_rate": 0.0011470052755145552, "loss": 2.5047, "step": 418190 }, { "epoch": 0.8331473925793701, "grad_norm": 0.1571129858493805, "learning_rate": 0.0011467708396919382, "loss": 2.5128, "step": 418200 }, { "epoch": 0.8331673148030091, "grad_norm": 0.20052261650562286, "learning_rate": 0.0011465364682659255, "loss": 2.5112, "step": 418210 }, { "epoch": 0.833187237026648, "grad_norm": 0.15994292497634888, "learning_rate": 0.001146302161183478, "loss": 2.4992, "step": 418220 }, { "epoch": 0.8332071592502869, "grad_norm": 0.1571117490530014, "learning_rate": 0.0011460679183916322, "loss": 2.5113, "step": 418230 }, { "epoch": 0.8332270814739258, "grad_norm": 0.1670428216457367, "learning_rate": 0.001145833739837495, "loss": 2.4985, "step": 418240 }, { "epoch": 0.8332470036975647, "grad_norm": 0.13623611629009247, "learning_rate": 0.0011455996254682468, "loss": 2.4954, "step": 418250 }, { "epoch": 0.8332669259212037, "grad_norm": 0.19676832854747772, "learning_rate": 0.00114536557523114, "loss": 2.4895, "step": 418260 }, { "epoch": 0.8332868481448426, "grad_norm": 0.15223851799964905, "learning_rate": 0.0011451315890734994, "loss": 2.5008, "step": 418270 }, { "epoch": 0.8333067703684814, "grad_norm": 0.16106297075748444, "learning_rate": 0.001144897666942722, "loss": 2.498, "step": 418280 }, { "epoch": 0.8333266925921203, "grad_norm": 0.17498570680618286, "learning_rate": 0.0011446638087862762, "loss": 2.5078, "step": 418290 }, { "epoch": 0.8333466148157592, "grad_norm": 0.14973735809326172, "learning_rate": 0.0011444300145517024, "loss": 2.488, "step": 418300 }, { "epoch": 0.8333665370393982, "grad_norm": 0.18735229969024658, "learning_rate": 0.0011441962841866134, "loss": 2.5012, "step": 418310 }, { "epoch": 0.8333864592630371, "grad_norm": 0.17660503089427948, "learning_rate": 0.001143962617638692, "loss": 2.5017, "step": 418320 }, { "epoch": 0.833406381486676, "grad_norm": 0.15867644548416138, "learning_rate": 0.0011437290148556942, "loss": 2.4915, "step": 418330 }, { "epoch": 0.8334263037103149, "grad_norm": 0.1686123162508011, "learning_rate": 0.0011434954757854458, "loss": 2.497, "step": 418340 }, { "epoch": 0.8334462259339539, "grad_norm": 0.16092054545879364, "learning_rate": 0.0011432620003758442, "loss": 2.5069, "step": 418350 }, { "epoch": 0.8334661481575928, "grad_norm": 0.14804911613464355, "learning_rate": 0.0011430285885748578, "loss": 2.4852, "step": 418360 }, { "epoch": 0.8334860703812317, "grad_norm": 0.16443724930286407, "learning_rate": 0.0011427952403305264, "loss": 2.4939, "step": 418370 }, { "epoch": 0.8335059926048706, "grad_norm": 0.18328973650932312, "learning_rate": 0.0011425619555909595, "loss": 2.4982, "step": 418380 }, { "epoch": 0.8335259148285095, "grad_norm": 0.16852204501628876, "learning_rate": 0.0011423287343043374, "loss": 2.5096, "step": 418390 }, { "epoch": 0.8335458370521485, "grad_norm": 0.22071026265621185, "learning_rate": 0.0011420955764189114, "loss": 2.5088, "step": 418400 }, { "epoch": 0.8335657592757874, "grad_norm": 0.1530482918024063, "learning_rate": 0.0011418624818830025, "loss": 2.5225, "step": 418410 }, { "epoch": 0.8335856814994262, "grad_norm": 0.17423875629901886, "learning_rate": 0.0011416294506450028, "loss": 2.505, "step": 418420 }, { "epoch": 0.8336056037230651, "grad_norm": 0.16464665532112122, "learning_rate": 0.0011413964826533726, "loss": 2.4902, "step": 418430 }, { "epoch": 0.833625525946704, "grad_norm": 0.16727592051029205, "learning_rate": 0.0011411635778566444, "loss": 2.4808, "step": 418440 }, { "epoch": 0.833645448170343, "grad_norm": 0.17338724434375763, "learning_rate": 0.0011409307362034186, "loss": 2.5021, "step": 418450 }, { "epoch": 0.8336653703939819, "grad_norm": 0.19493994116783142, "learning_rate": 0.0011406979576423666, "loss": 2.5214, "step": 418460 }, { "epoch": 0.8336852926176208, "grad_norm": 0.14660145342350006, "learning_rate": 0.001140465242122228, "loss": 2.5213, "step": 418470 }, { "epoch": 0.8337052148412597, "grad_norm": 0.1616959571838379, "learning_rate": 0.0011402325895918128, "loss": 2.5036, "step": 418480 }, { "epoch": 0.8337251370648986, "grad_norm": 0.15342216193675995, "learning_rate": 0.0011400000000000002, "loss": 2.5166, "step": 418490 }, { "epoch": 0.8337450592885376, "grad_norm": 0.1690918356180191, "learning_rate": 0.0011397674732957373, "loss": 2.4955, "step": 418500 }, { "epoch": 0.8337649815121765, "grad_norm": 0.15700992941856384, "learning_rate": 0.0011395350094280418, "loss": 2.502, "step": 418510 }, { "epoch": 0.8337849037358154, "grad_norm": 0.1442607343196869, "learning_rate": 0.0011393026083459997, "loss": 2.4999, "step": 418520 }, { "epoch": 0.8338048259594543, "grad_norm": 0.16218030452728271, "learning_rate": 0.0011390702699987647, "loss": 2.5019, "step": 418530 }, { "epoch": 0.8338247481830932, "grad_norm": 0.15477946400642395, "learning_rate": 0.0011388379943355607, "loss": 2.4893, "step": 418540 }, { "epoch": 0.8338446704067322, "grad_norm": 0.16074085235595703, "learning_rate": 0.001138605781305679, "loss": 2.4977, "step": 418550 }, { "epoch": 0.833864592630371, "grad_norm": 0.1539228856563568, "learning_rate": 0.0011383736308584794, "loss": 2.4898, "step": 418560 }, { "epoch": 0.8338845148540099, "grad_norm": 0.2848284840583801, "learning_rate": 0.0011381415429433904, "loss": 2.5075, "step": 418570 }, { "epoch": 0.8339044370776488, "grad_norm": 0.16135528683662415, "learning_rate": 0.0011379095175099079, "loss": 2.5087, "step": 418580 }, { "epoch": 0.8339243593012877, "grad_norm": 0.17414157092571259, "learning_rate": 0.001137677554507596, "loss": 2.4915, "step": 418590 }, { "epoch": 0.8339442815249267, "grad_norm": 0.14807693660259247, "learning_rate": 0.0011374456538860872, "loss": 2.511, "step": 418600 }, { "epoch": 0.8339642037485656, "grad_norm": 0.17090417444705963, "learning_rate": 0.0011372138155950804, "loss": 2.5002, "step": 418610 }, { "epoch": 0.8339841259722045, "grad_norm": 0.18019422888755798, "learning_rate": 0.0011369820395843433, "loss": 2.5022, "step": 418620 }, { "epoch": 0.8340040481958434, "grad_norm": 0.15635545551776886, "learning_rate": 0.0011367503258037104, "loss": 2.4858, "step": 418630 }, { "epoch": 0.8340239704194823, "grad_norm": 0.1668543815612793, "learning_rate": 0.0011365186742030838, "loss": 2.5054, "step": 418640 }, { "epoch": 0.8340438926431213, "grad_norm": 0.1771882176399231, "learning_rate": 0.0011362870847324326, "loss": 2.4998, "step": 418650 }, { "epoch": 0.8340638148667602, "grad_norm": 0.14597614109516144, "learning_rate": 0.001136055557341793, "loss": 2.4949, "step": 418660 }, { "epoch": 0.8340837370903991, "grad_norm": 0.18590064346790314, "learning_rate": 0.001135824091981268, "loss": 2.5112, "step": 418670 }, { "epoch": 0.834103659314038, "grad_norm": 0.15306884050369263, "learning_rate": 0.0011355926886010277, "loss": 2.5081, "step": 418680 }, { "epoch": 0.834123581537677, "grad_norm": 0.15811000764369965, "learning_rate": 0.0011353613471513087, "loss": 2.4887, "step": 418690 }, { "epoch": 0.8341435037613159, "grad_norm": 0.2145085632801056, "learning_rate": 0.0011351300675824139, "loss": 2.4868, "step": 418700 }, { "epoch": 0.8341634259849547, "grad_norm": 0.16548772156238556, "learning_rate": 0.0011348988498447133, "loss": 2.5033, "step": 418710 }, { "epoch": 0.8341833482085936, "grad_norm": 0.1611783504486084, "learning_rate": 0.0011346676938886425, "loss": 2.517, "step": 418720 }, { "epoch": 0.8342032704322325, "grad_norm": 0.1594877690076828, "learning_rate": 0.0011344365996647041, "loss": 2.5101, "step": 418730 }, { "epoch": 0.8342231926558715, "grad_norm": 0.17245382070541382, "learning_rate": 0.0011342055671234655, "loss": 2.5151, "step": 418740 }, { "epoch": 0.8342431148795104, "grad_norm": 0.1567108929157257, "learning_rate": 0.0011339745962155613, "loss": 2.5074, "step": 418750 }, { "epoch": 0.8342630371031493, "grad_norm": 0.1545310616493225, "learning_rate": 0.0011337436868916915, "loss": 2.5039, "step": 418760 }, { "epoch": 0.8342829593267882, "grad_norm": 0.15614871680736542, "learning_rate": 0.0011335128391026213, "loss": 2.5105, "step": 418770 }, { "epoch": 0.8343028815504271, "grad_norm": 0.18008743226528168, "learning_rate": 0.001133282052799182, "loss": 2.5009, "step": 418780 }, { "epoch": 0.8343228037740661, "grad_norm": 0.15020541846752167, "learning_rate": 0.0011330513279322703, "loss": 2.49, "step": 418790 }, { "epoch": 0.834342725997705, "grad_norm": 0.1822979599237442, "learning_rate": 0.001132820664452848, "loss": 2.5086, "step": 418800 }, { "epoch": 0.8343626482213439, "grad_norm": 0.1772497594356537, "learning_rate": 0.0011325900623119422, "loss": 2.5189, "step": 418810 }, { "epoch": 0.8343825704449828, "grad_norm": 0.18800631165504456, "learning_rate": 0.0011323595214606454, "loss": 2.4975, "step": 418820 }, { "epoch": 0.8344024926686217, "grad_norm": 0.15115733444690704, "learning_rate": 0.0011321290418501147, "loss": 2.4972, "step": 418830 }, { "epoch": 0.8344224148922607, "grad_norm": 0.18627773225307465, "learning_rate": 0.0011318986234315719, "loss": 2.5006, "step": 418840 }, { "epoch": 0.8344423371158995, "grad_norm": 0.16536059975624084, "learning_rate": 0.0011316682661563035, "loss": 2.5123, "step": 418850 }, { "epoch": 0.8344622593395384, "grad_norm": 0.15705984830856323, "learning_rate": 0.0011314379699756615, "loss": 2.5035, "step": 418860 }, { "epoch": 0.8344821815631773, "grad_norm": 0.17479823529720306, "learning_rate": 0.0011312077348410612, "loss": 2.5095, "step": 418870 }, { "epoch": 0.8345021037868162, "grad_norm": 0.15454894304275513, "learning_rate": 0.0011309775607039826, "loss": 2.4884, "step": 418880 }, { "epoch": 0.8345220260104552, "grad_norm": 0.1524612307548523, "learning_rate": 0.001130747447515971, "loss": 2.5001, "step": 418890 }, { "epoch": 0.8345419482340941, "grad_norm": 0.1578390747308731, "learning_rate": 0.0011305173952286336, "loss": 2.4968, "step": 418900 }, { "epoch": 0.834561870457733, "grad_norm": 0.15393008291721344, "learning_rate": 0.001130287403793644, "loss": 2.4989, "step": 418910 }, { "epoch": 0.8345817926813719, "grad_norm": 0.1765017956495285, "learning_rate": 0.0011300574731627382, "loss": 2.5165, "step": 418920 }, { "epoch": 0.8346017149050108, "grad_norm": 0.1515943557024002, "learning_rate": 0.0011298276032877164, "loss": 2.5084, "step": 418930 }, { "epoch": 0.8346216371286498, "grad_norm": 0.14674551784992218, "learning_rate": 0.0011295977941204423, "loss": 2.4936, "step": 418940 }, { "epoch": 0.8346415593522887, "grad_norm": 0.14819879829883575, "learning_rate": 0.0011293680456128433, "loss": 2.5116, "step": 418950 }, { "epoch": 0.8346614815759276, "grad_norm": 0.18788132071495056, "learning_rate": 0.0011291383577169106, "loss": 2.4921, "step": 418960 }, { "epoch": 0.8346814037995665, "grad_norm": 0.1755901724100113, "learning_rate": 0.0011289087303846972, "loss": 2.4865, "step": 418970 }, { "epoch": 0.8347013260232055, "grad_norm": 0.1593828946352005, "learning_rate": 0.0011286791635683215, "loss": 2.4901, "step": 418980 }, { "epoch": 0.8347212482468443, "grad_norm": 0.15567012131214142, "learning_rate": 0.001128449657219963, "loss": 2.486, "step": 418990 }, { "epoch": 0.8347411704704832, "grad_norm": 0.14688925445079803, "learning_rate": 0.0011282202112918654, "loss": 2.5076, "step": 419000 }, { "epoch": 0.8347610926941221, "grad_norm": 0.16445297002792358, "learning_rate": 0.0011279908257363345, "loss": 2.4795, "step": 419010 }, { "epoch": 0.834781014917761, "grad_norm": 0.17390567064285278, "learning_rate": 0.0011277615005057392, "loss": 2.5082, "step": 419020 }, { "epoch": 0.8348009371414, "grad_norm": 0.16648364067077637, "learning_rate": 0.0011275322355525103, "loss": 2.4915, "step": 419030 }, { "epoch": 0.8348208593650389, "grad_norm": 0.15876753628253937, "learning_rate": 0.0011273030308291427, "loss": 2.4938, "step": 419040 }, { "epoch": 0.8348407815886778, "grad_norm": 0.1736726015806198, "learning_rate": 0.0011270738862881923, "loss": 2.4931, "step": 419050 }, { "epoch": 0.8348607038123167, "grad_norm": 0.18092657625675201, "learning_rate": 0.0011268448018822773, "loss": 2.5065, "step": 419060 }, { "epoch": 0.8348806260359556, "grad_norm": 0.161252960562706, "learning_rate": 0.001126615777564078, "loss": 2.4983, "step": 419070 }, { "epoch": 0.8349005482595946, "grad_norm": 0.17505928874015808, "learning_rate": 0.001126386813286338, "loss": 2.5014, "step": 419080 }, { "epoch": 0.8349204704832335, "grad_norm": 0.16264738142490387, "learning_rate": 0.001126157909001861, "loss": 2.5215, "step": 419090 }, { "epoch": 0.8349403927068724, "grad_norm": 0.14974063634872437, "learning_rate": 0.0011259290646635138, "loss": 2.4935, "step": 419100 }, { "epoch": 0.8349603149305113, "grad_norm": 0.18297940492630005, "learning_rate": 0.0011257002802242243, "loss": 2.5051, "step": 419110 }, { "epoch": 0.8349802371541502, "grad_norm": 0.1678498089313507, "learning_rate": 0.001125471555636982, "loss": 2.5049, "step": 419120 }, { "epoch": 0.8350001593777892, "grad_norm": 0.17860935628414154, "learning_rate": 0.0011252428908548385, "loss": 2.485, "step": 419130 }, { "epoch": 0.835020081601428, "grad_norm": 0.15960076451301575, "learning_rate": 0.0011250142858309057, "loss": 2.5071, "step": 419140 }, { "epoch": 0.8350400038250669, "grad_norm": 0.1435483694076538, "learning_rate": 0.0011247857405183575, "loss": 2.4864, "step": 419150 }, { "epoch": 0.8350599260487058, "grad_norm": 0.16752147674560547, "learning_rate": 0.0011245572548704286, "loss": 2.4995, "step": 419160 }, { "epoch": 0.8350798482723447, "grad_norm": 0.19800859689712524, "learning_rate": 0.0011243288288404144, "loss": 2.5058, "step": 419170 }, { "epoch": 0.8350997704959837, "grad_norm": 0.1613755226135254, "learning_rate": 0.0011241004623816725, "loss": 2.4867, "step": 419180 }, { "epoch": 0.8351196927196226, "grad_norm": 0.1586718112230301, "learning_rate": 0.0011238721554476198, "loss": 2.5072, "step": 419190 }, { "epoch": 0.8351396149432615, "grad_norm": 0.1489235758781433, "learning_rate": 0.0011236439079917342, "loss": 2.5003, "step": 419200 }, { "epoch": 0.8351595371669004, "grad_norm": 0.18022112548351288, "learning_rate": 0.001123415719967555, "loss": 2.5058, "step": 419210 }, { "epoch": 0.8351794593905393, "grad_norm": 0.15984123945236206, "learning_rate": 0.001123187591328681, "loss": 2.5113, "step": 419220 }, { "epoch": 0.8351993816141783, "grad_norm": 0.3497752845287323, "learning_rate": 0.001122959522028772, "loss": 2.5137, "step": 419230 }, { "epoch": 0.8352193038378172, "grad_norm": 0.1662687361240387, "learning_rate": 0.0011227315120215478, "loss": 2.5052, "step": 419240 }, { "epoch": 0.8352392260614561, "grad_norm": 0.15040819346904755, "learning_rate": 0.0011225035612607876, "loss": 2.4912, "step": 419250 }, { "epoch": 0.835259148285095, "grad_norm": 0.14675231277942657, "learning_rate": 0.0011222756697003324, "loss": 2.501, "step": 419260 }, { "epoch": 0.835279070508734, "grad_norm": 0.1611698567867279, "learning_rate": 0.0011220478372940812, "loss": 2.5011, "step": 419270 }, { "epoch": 0.8352989927323728, "grad_norm": 0.16462016105651855, "learning_rate": 0.0011218200639959942, "loss": 2.5054, "step": 419280 }, { "epoch": 0.8353189149560117, "grad_norm": 0.15947063267230988, "learning_rate": 0.0011215923497600898, "loss": 2.4958, "step": 419290 }, { "epoch": 0.8353388371796506, "grad_norm": 0.184757262468338, "learning_rate": 0.001121364694540448, "loss": 2.4903, "step": 419300 }, { "epoch": 0.8353587594032895, "grad_norm": 0.15622372925281525, "learning_rate": 0.001121137098291207, "loss": 2.5118, "step": 419310 }, { "epoch": 0.8353786816269285, "grad_norm": 0.17857691645622253, "learning_rate": 0.0011209095609665636, "loss": 2.4945, "step": 419320 }, { "epoch": 0.8353986038505674, "grad_norm": 0.15086598694324493, "learning_rate": 0.0011206820825207756, "loss": 2.5026, "step": 419330 }, { "epoch": 0.8354185260742063, "grad_norm": 0.16061298549175262, "learning_rate": 0.001120454662908159, "loss": 2.5071, "step": 419340 }, { "epoch": 0.8354384482978452, "grad_norm": 0.174332857131958, "learning_rate": 0.0011202273020830891, "loss": 2.4929, "step": 419350 }, { "epoch": 0.8354583705214841, "grad_norm": 0.1669045388698578, "learning_rate": 0.0011200000000000001, "loss": 2.486, "step": 419360 }, { "epoch": 0.8354782927451231, "grad_norm": 0.15146879851818085, "learning_rate": 0.0011197727566133846, "loss": 2.4996, "step": 419370 }, { "epoch": 0.835498214968762, "grad_norm": 0.15423035621643066, "learning_rate": 0.0011195455718777945, "loss": 2.4783, "step": 419380 }, { "epoch": 0.8355181371924009, "grad_norm": 0.16332754492759705, "learning_rate": 0.0011193184457478403, "loss": 2.506, "step": 419390 }, { "epoch": 0.8355380594160398, "grad_norm": 0.18440988659858704, "learning_rate": 0.0011190913781781904, "loss": 2.4908, "step": 419400 }, { "epoch": 0.8355579816396786, "grad_norm": 0.15988950431346893, "learning_rate": 0.0011188643691235724, "loss": 2.5122, "step": 419410 }, { "epoch": 0.8355779038633177, "grad_norm": 0.17793910205364227, "learning_rate": 0.0011186374185387718, "loss": 2.504, "step": 419420 }, { "epoch": 0.8355978260869565, "grad_norm": 0.16605757176876068, "learning_rate": 0.0011184105263786323, "loss": 2.5015, "step": 419430 }, { "epoch": 0.8356177483105954, "grad_norm": 0.15153466165065765, "learning_rate": 0.001118183692598056, "loss": 2.5022, "step": 419440 }, { "epoch": 0.8356376705342343, "grad_norm": 0.18640108406543732, "learning_rate": 0.0011179569171520024, "loss": 2.4952, "step": 419450 }, { "epoch": 0.8356575927578732, "grad_norm": 0.1691805124282837, "learning_rate": 0.001117730199995489, "loss": 2.4963, "step": 419460 }, { "epoch": 0.8356775149815122, "grad_norm": 0.17371805012226105, "learning_rate": 0.0011175035410835917, "loss": 2.4986, "step": 419470 }, { "epoch": 0.8356974372051511, "grad_norm": 0.15342743694782257, "learning_rate": 0.0011172769403714436, "loss": 2.4973, "step": 419480 }, { "epoch": 0.83571735942879, "grad_norm": 0.18850144743919373, "learning_rate": 0.0011170503978142354, "loss": 2.5062, "step": 419490 }, { "epoch": 0.8357372816524289, "grad_norm": 0.17328202724456787, "learning_rate": 0.0011168239133672153, "loss": 2.4795, "step": 419500 }, { "epoch": 0.8357572038760678, "grad_norm": 0.1745021790266037, "learning_rate": 0.001116597486985689, "loss": 2.517, "step": 419510 }, { "epoch": 0.8357771260997068, "grad_norm": 0.16167095303535461, "learning_rate": 0.0011163711186250192, "loss": 2.5145, "step": 419520 }, { "epoch": 0.8357970483233457, "grad_norm": 0.15454745292663574, "learning_rate": 0.0011161448082406258, "loss": 2.5067, "step": 419530 }, { "epoch": 0.8358169705469846, "grad_norm": 0.1621994972229004, "learning_rate": 0.0011159185557879862, "loss": 2.5036, "step": 419540 }, { "epoch": 0.8358368927706235, "grad_norm": 0.16552527248859406, "learning_rate": 0.0011156923612226342, "loss": 2.5099, "step": 419550 }, { "epoch": 0.8358568149942625, "grad_norm": 0.16117964684963226, "learning_rate": 0.0011154662245001607, "loss": 2.484, "step": 419560 }, { "epoch": 0.8358767372179013, "grad_norm": 0.19000953435897827, "learning_rate": 0.0011152401455762134, "loss": 2.4954, "step": 419570 }, { "epoch": 0.8358966594415402, "grad_norm": 0.1753184199333191, "learning_rate": 0.0011150141244064967, "loss": 2.4915, "step": 419580 }, { "epoch": 0.8359165816651791, "grad_norm": 0.1613403558731079, "learning_rate": 0.001114788160946771, "loss": 2.493, "step": 419590 }, { "epoch": 0.835936503888818, "grad_norm": 0.16978807747364044, "learning_rate": 0.0011145622551528539, "loss": 2.4943, "step": 419600 }, { "epoch": 0.835956426112457, "grad_norm": 0.15134690701961517, "learning_rate": 0.0011143364069806188, "loss": 2.499, "step": 419610 }, { "epoch": 0.8359763483360959, "grad_norm": 0.1492578238248825, "learning_rate": 0.001114110616385996, "loss": 2.5026, "step": 419620 }, { "epoch": 0.8359962705597348, "grad_norm": 0.15347544848918915, "learning_rate": 0.001113884883324971, "loss": 2.5006, "step": 419630 }, { "epoch": 0.8360161927833737, "grad_norm": 0.16237571835517883, "learning_rate": 0.0011136592077535866, "loss": 2.501, "step": 419640 }, { "epoch": 0.8360361150070126, "grad_norm": 0.15874873101711273, "learning_rate": 0.00111343358962794, "loss": 2.4955, "step": 419650 }, { "epoch": 0.8360560372306516, "grad_norm": 0.16190491616725922, "learning_rate": 0.0011132080289041855, "loss": 2.514, "step": 419660 }, { "epoch": 0.8360759594542905, "grad_norm": 0.1841159164905548, "learning_rate": 0.0011129825255385326, "loss": 2.4981, "step": 419670 }, { "epoch": 0.8360958816779294, "grad_norm": 0.1871269792318344, "learning_rate": 0.0011127570794872469, "loss": 2.483, "step": 419680 }, { "epoch": 0.8361158039015683, "grad_norm": 0.17132262885570526, "learning_rate": 0.0011125316907066485, "loss": 2.4928, "step": 419690 }, { "epoch": 0.8361357261252071, "grad_norm": 0.1660955548286438, "learning_rate": 0.0011123063591531141, "loss": 2.508, "step": 419700 }, { "epoch": 0.8361556483488461, "grad_norm": 0.18209435045719147, "learning_rate": 0.0011120810847830755, "loss": 2.4953, "step": 419710 }, { "epoch": 0.836175570572485, "grad_norm": 0.16593623161315918, "learning_rate": 0.0011118558675530196, "loss": 2.5107, "step": 419720 }, { "epoch": 0.8361954927961239, "grad_norm": 0.17405329644680023, "learning_rate": 0.0011116307074194877, "loss": 2.4836, "step": 419730 }, { "epoch": 0.8362154150197628, "grad_norm": 0.1805039346218109, "learning_rate": 0.0011114056043390775, "loss": 2.4918, "step": 419740 }, { "epoch": 0.8362353372434017, "grad_norm": 0.15366403758525848, "learning_rate": 0.001111180558268441, "loss": 2.4948, "step": 419750 }, { "epoch": 0.8362552594670407, "grad_norm": 0.1667601466178894, "learning_rate": 0.0011109555691642852, "loss": 2.499, "step": 419760 }, { "epoch": 0.8362751816906796, "grad_norm": 0.17468954622745514, "learning_rate": 0.0011107306369833715, "loss": 2.4902, "step": 419770 }, { "epoch": 0.8362951039143185, "grad_norm": 0.1442538946866989, "learning_rate": 0.0011105057616825165, "loss": 2.4907, "step": 419780 }, { "epoch": 0.8363150261379574, "grad_norm": 0.15146446228027344, "learning_rate": 0.0011102809432185913, "loss": 2.4984, "step": 419790 }, { "epoch": 0.8363349483615963, "grad_norm": 0.16930830478668213, "learning_rate": 0.0011100561815485205, "loss": 2.4938, "step": 419800 }, { "epoch": 0.8363548705852353, "grad_norm": 0.16444075107574463, "learning_rate": 0.0011098314766292846, "loss": 2.4983, "step": 419810 }, { "epoch": 0.8363747928088742, "grad_norm": 0.1740850806236267, "learning_rate": 0.0011096068284179176, "loss": 2.4984, "step": 419820 }, { "epoch": 0.8363947150325131, "grad_norm": 0.16278937458992004, "learning_rate": 0.0011093822368715073, "loss": 2.4971, "step": 419830 }, { "epoch": 0.836414637256152, "grad_norm": 0.18012863397598267, "learning_rate": 0.0011091577019471965, "loss": 2.5121, "step": 419840 }, { "epoch": 0.836434559479791, "grad_norm": 0.15265004336833954, "learning_rate": 0.0011089332236021814, "loss": 2.4987, "step": 419850 }, { "epoch": 0.8364544817034298, "grad_norm": 0.1564292311668396, "learning_rate": 0.0011087088017937123, "loss": 2.4943, "step": 419860 }, { "epoch": 0.8364744039270687, "grad_norm": 0.17670215666294098, "learning_rate": 0.0011084844364790932, "loss": 2.4966, "step": 419870 }, { "epoch": 0.8364943261507076, "grad_norm": 0.16317789256572723, "learning_rate": 0.001108260127615682, "loss": 2.4943, "step": 419880 }, { "epoch": 0.8365142483743465, "grad_norm": 0.16881190240383148, "learning_rate": 0.0011080358751608896, "loss": 2.5117, "step": 419890 }, { "epoch": 0.8365341705979855, "grad_norm": 0.16618387401103973, "learning_rate": 0.0011078116790721815, "loss": 2.4771, "step": 419900 }, { "epoch": 0.8365540928216244, "grad_norm": 0.15897315740585327, "learning_rate": 0.0011075875393070757, "loss": 2.5043, "step": 419910 }, { "epoch": 0.8365740150452633, "grad_norm": 0.17270813882350922, "learning_rate": 0.001107363455823144, "loss": 2.4956, "step": 419920 }, { "epoch": 0.8365939372689022, "grad_norm": 0.15602219104766846, "learning_rate": 0.0011071394285780114, "loss": 2.4969, "step": 419930 }, { "epoch": 0.8366138594925411, "grad_norm": 0.15528294444084167, "learning_rate": 0.0011069154575293555, "loss": 2.4978, "step": 419940 }, { "epoch": 0.8366337817161801, "grad_norm": 0.16294965147972107, "learning_rate": 0.0011066915426349083, "loss": 2.491, "step": 419950 }, { "epoch": 0.836653703939819, "grad_norm": 0.21122971177101135, "learning_rate": 0.001106467683852453, "loss": 2.4951, "step": 419960 }, { "epoch": 0.8366736261634579, "grad_norm": 0.17117829620838165, "learning_rate": 0.001106243881139827, "loss": 2.5043, "step": 419970 }, { "epoch": 0.8366935483870968, "grad_norm": 0.17787200212478638, "learning_rate": 0.0011060201344549197, "loss": 2.5016, "step": 419980 }, { "epoch": 0.8367134706107356, "grad_norm": 0.1750144362449646, "learning_rate": 0.0011057964437556737, "loss": 2.5061, "step": 419990 }, { "epoch": 0.8367333928343746, "grad_norm": 0.1768816113471985, "learning_rate": 0.0011055728090000843, "loss": 2.5051, "step": 420000 }, { "epoch": 0.8367533150580135, "grad_norm": 0.18050217628479004, "learning_rate": 0.0011053492301461986, "loss": 2.5114, "step": 420010 }, { "epoch": 0.8367732372816524, "grad_norm": 0.16112853586673737, "learning_rate": 0.0011051257071521164, "loss": 2.5113, "step": 420020 }, { "epoch": 0.8367931595052913, "grad_norm": 0.14340966939926147, "learning_rate": 0.0011049022399759903, "loss": 2.5091, "step": 420030 }, { "epoch": 0.8368130817289302, "grad_norm": 0.1689361333847046, "learning_rate": 0.0011046788285760246, "loss": 2.5092, "step": 420040 }, { "epoch": 0.8368330039525692, "grad_norm": 0.16682304441928864, "learning_rate": 0.0011044554729104754, "loss": 2.5031, "step": 420050 }, { "epoch": 0.8368529261762081, "grad_norm": 0.1575222909450531, "learning_rate": 0.0011042321729376524, "loss": 2.5101, "step": 420060 }, { "epoch": 0.836872848399847, "grad_norm": 0.1634014993906021, "learning_rate": 0.0011040089286159151, "loss": 2.4922, "step": 420070 }, { "epoch": 0.8368927706234859, "grad_norm": 0.1459985226392746, "learning_rate": 0.0011037857399036769, "loss": 2.5096, "step": 420080 }, { "epoch": 0.8369126928471248, "grad_norm": 0.1695406138896942, "learning_rate": 0.0011035626067594012, "loss": 2.49, "step": 420090 }, { "epoch": 0.8369326150707638, "grad_norm": 0.14914655685424805, "learning_rate": 0.0011033395291416043, "loss": 2.4924, "step": 420100 }, { "epoch": 0.8369525372944027, "grad_norm": 0.16005611419677734, "learning_rate": 0.0011031165070088536, "loss": 2.498, "step": 420110 }, { "epoch": 0.8369724595180416, "grad_norm": 0.17357276380062103, "learning_rate": 0.001102893540319768, "loss": 2.5058, "step": 420120 }, { "epoch": 0.8369923817416804, "grad_norm": 0.1587935835123062, "learning_rate": 0.0011026706290330178, "loss": 2.5125, "step": 420130 }, { "epoch": 0.8370123039653194, "grad_norm": 0.17656210064888, "learning_rate": 0.001102447773107325, "loss": 2.5017, "step": 420140 }, { "epoch": 0.8370322261889583, "grad_norm": 0.163879856467247, "learning_rate": 0.0011022249725014623, "loss": 2.4901, "step": 420150 }, { "epoch": 0.8370521484125972, "grad_norm": 0.16968970000743866, "learning_rate": 0.0011020022271742542, "loss": 2.505, "step": 420160 }, { "epoch": 0.8370720706362361, "grad_norm": 0.16234704852104187, "learning_rate": 0.0011017795370845752, "loss": 2.4845, "step": 420170 }, { "epoch": 0.837091992859875, "grad_norm": 0.16229505836963654, "learning_rate": 0.0011015569021913519, "loss": 2.5057, "step": 420180 }, { "epoch": 0.837111915083514, "grad_norm": 0.1738448441028595, "learning_rate": 0.0011013343224535613, "loss": 2.4866, "step": 420190 }, { "epoch": 0.8371318373071529, "grad_norm": 0.16437695920467377, "learning_rate": 0.0011011117978302308, "loss": 2.5048, "step": 420200 }, { "epoch": 0.8371517595307918, "grad_norm": 0.14746958017349243, "learning_rate": 0.0011008893282804392, "loss": 2.5113, "step": 420210 }, { "epoch": 0.8371716817544307, "grad_norm": 0.16532078385353088, "learning_rate": 0.0011006669137633154, "loss": 2.5029, "step": 420220 }, { "epoch": 0.8371916039780696, "grad_norm": 0.17726068198680878, "learning_rate": 0.0011004445542380393, "loss": 2.5076, "step": 420230 }, { "epoch": 0.8372115262017086, "grad_norm": 0.15393424034118652, "learning_rate": 0.0011002222496638407, "loss": 2.5008, "step": 420240 }, { "epoch": 0.8372314484253475, "grad_norm": 0.1576584130525589, "learning_rate": 0.0011, "loss": 2.4942, "step": 420250 }, { "epoch": 0.8372513706489864, "grad_norm": 0.17598898708820343, "learning_rate": 0.0010997778052058483, "loss": 2.5068, "step": 420260 }, { "epoch": 0.8372712928726253, "grad_norm": 0.17009474337100983, "learning_rate": 0.0010995556652407656, "loss": 2.5066, "step": 420270 }, { "epoch": 0.8372912150962641, "grad_norm": 0.1645357608795166, "learning_rate": 0.0010993335800641838, "loss": 2.4891, "step": 420280 }, { "epoch": 0.8373111373199031, "grad_norm": 0.14807648956775665, "learning_rate": 0.0010991115496355834, "loss": 2.4989, "step": 420290 }, { "epoch": 0.837331059543542, "grad_norm": 0.1673106998205185, "learning_rate": 0.0010988895739144954, "loss": 2.5088, "step": 420300 }, { "epoch": 0.8373509817671809, "grad_norm": 0.14872372150421143, "learning_rate": 0.0010986676528605003, "loss": 2.5005, "step": 420310 }, { "epoch": 0.8373709039908198, "grad_norm": 0.15971170365810394, "learning_rate": 0.0010984457864332284, "loss": 2.5017, "step": 420320 }, { "epoch": 0.8373908262144587, "grad_norm": 0.2009911835193634, "learning_rate": 0.0010982239745923603, "loss": 2.4978, "step": 420330 }, { "epoch": 0.8374107484380977, "grad_norm": 0.1548869013786316, "learning_rate": 0.0010980022172976255, "loss": 2.5042, "step": 420340 }, { "epoch": 0.8374306706617366, "grad_norm": 0.17568299174308777, "learning_rate": 0.0010977805145088034, "loss": 2.4935, "step": 420350 }, { "epoch": 0.8374505928853755, "grad_norm": 0.1817784458398819, "learning_rate": 0.0010975588661857217, "loss": 2.495, "step": 420360 }, { "epoch": 0.8374705151090144, "grad_norm": 0.16730071604251862, "learning_rate": 0.0010973372722882595, "loss": 2.4974, "step": 420370 }, { "epoch": 0.8374904373326533, "grad_norm": 0.17555423080921173, "learning_rate": 0.001097115732776343, "loss": 2.5273, "step": 420380 }, { "epoch": 0.8375103595562923, "grad_norm": 0.1639835089445114, "learning_rate": 0.001096894247609949, "loss": 2.4868, "step": 420390 }, { "epoch": 0.8375302817799312, "grad_norm": 0.14257986843585968, "learning_rate": 0.001096672816749103, "loss": 2.4876, "step": 420400 }, { "epoch": 0.83755020400357, "grad_norm": 0.1781865656375885, "learning_rate": 0.0010964514401538789, "loss": 2.5201, "step": 420410 }, { "epoch": 0.8375701262272089, "grad_norm": 0.1661914438009262, "learning_rate": 0.0010962301177843998, "loss": 2.4827, "step": 420420 }, { "epoch": 0.8375900484508478, "grad_norm": 0.15119437873363495, "learning_rate": 0.0010960088496008381, "loss": 2.4965, "step": 420430 }, { "epoch": 0.8376099706744868, "grad_norm": 0.16528470814228058, "learning_rate": 0.0010957876355634148, "loss": 2.4996, "step": 420440 }, { "epoch": 0.8376298928981257, "grad_norm": 0.16076529026031494, "learning_rate": 0.001095566475632399, "loss": 2.4882, "step": 420450 }, { "epoch": 0.8376498151217646, "grad_norm": 0.1639702320098877, "learning_rate": 0.0010953453697681087, "loss": 2.4832, "step": 420460 }, { "epoch": 0.8376697373454035, "grad_norm": 0.14770352840423584, "learning_rate": 0.0010951243179309106, "loss": 2.4931, "step": 420470 }, { "epoch": 0.8376896595690425, "grad_norm": 0.17656829953193665, "learning_rate": 0.0010949033200812193, "loss": 2.5081, "step": 420480 }, { "epoch": 0.8377095817926814, "grad_norm": 0.1568860411643982, "learning_rate": 0.001094682376179498, "loss": 2.5106, "step": 420490 }, { "epoch": 0.8377295040163203, "grad_norm": 0.15384738147258759, "learning_rate": 0.0010944614861862583, "loss": 2.5077, "step": 420500 }, { "epoch": 0.8377494262399592, "grad_norm": 0.17462469637393951, "learning_rate": 0.00109424065006206, "loss": 2.5089, "step": 420510 }, { "epoch": 0.8377693484635981, "grad_norm": 0.16899117827415466, "learning_rate": 0.00109401986776751, "loss": 2.4962, "step": 420520 }, { "epoch": 0.8377892706872371, "grad_norm": 0.16148388385772705, "learning_rate": 0.0010937991392632647, "loss": 2.4989, "step": 420530 }, { "epoch": 0.837809192910876, "grad_norm": 0.17938749492168427, "learning_rate": 0.0010935784645100272, "loss": 2.5023, "step": 420540 }, { "epoch": 0.8378291151345149, "grad_norm": 0.17446857690811157, "learning_rate": 0.001093357843468549, "loss": 2.4816, "step": 420550 }, { "epoch": 0.8378490373581537, "grad_norm": 0.15482380986213684, "learning_rate": 0.0010931372760996294, "loss": 2.5011, "step": 420560 }, { "epoch": 0.8378689595817926, "grad_norm": 0.17740945518016815, "learning_rate": 0.0010929167623641147, "loss": 2.4974, "step": 420570 }, { "epoch": 0.8378888818054316, "grad_norm": 0.1710691899061203, "learning_rate": 0.0010926963022228997, "loss": 2.5078, "step": 420580 }, { "epoch": 0.8379088040290705, "grad_norm": 0.17348821461200714, "learning_rate": 0.0010924758956369258, "loss": 2.5041, "step": 420590 }, { "epoch": 0.8379287262527094, "grad_norm": 0.14122940599918365, "learning_rate": 0.0010922555425671825, "loss": 2.5136, "step": 420600 }, { "epoch": 0.8379486484763483, "grad_norm": 0.16992755234241486, "learning_rate": 0.0010920352429747066, "loss": 2.5046, "step": 420610 }, { "epoch": 0.8379685706999872, "grad_norm": 0.1672450304031372, "learning_rate": 0.0010918149968205818, "loss": 2.4907, "step": 420620 }, { "epoch": 0.8379884929236262, "grad_norm": 0.15571893751621246, "learning_rate": 0.0010915948040659388, "loss": 2.4884, "step": 420630 }, { "epoch": 0.8380084151472651, "grad_norm": 0.1660158336162567, "learning_rate": 0.0010913746646719561, "loss": 2.5052, "step": 420640 }, { "epoch": 0.838028337370904, "grad_norm": 0.18060365319252014, "learning_rate": 0.0010911545785998592, "loss": 2.4937, "step": 420650 }, { "epoch": 0.8380482595945429, "grad_norm": 0.17364747822284698, "learning_rate": 0.001090934545810919, "loss": 2.503, "step": 420660 }, { "epoch": 0.8380681818181818, "grad_norm": 0.18030676245689392, "learning_rate": 0.0010907145662664556, "loss": 2.5087, "step": 420670 }, { "epoch": 0.8380881040418208, "grad_norm": 0.17192348837852478, "learning_rate": 0.0010904946399278343, "loss": 2.4901, "step": 420680 }, { "epoch": 0.8381080262654597, "grad_norm": 0.19158858060836792, "learning_rate": 0.0010902747667564671, "loss": 2.4888, "step": 420690 }, { "epoch": 0.8381279484890986, "grad_norm": 0.16983698308467865, "learning_rate": 0.001090054946713814, "loss": 2.4891, "step": 420700 }, { "epoch": 0.8381478707127374, "grad_norm": 0.16221605241298676, "learning_rate": 0.0010898351797613798, "loss": 2.4997, "step": 420710 }, { "epoch": 0.8381677929363763, "grad_norm": 0.18629175424575806, "learning_rate": 0.0010896154658607167, "loss": 2.4981, "step": 420720 }, { "epoch": 0.8381877151600153, "grad_norm": 0.1371159851551056, "learning_rate": 0.0010893958049734232, "loss": 2.5057, "step": 420730 }, { "epoch": 0.8382076373836542, "grad_norm": 0.1725771278142929, "learning_rate": 0.001089176197061144, "loss": 2.5019, "step": 420740 }, { "epoch": 0.8382275596072931, "grad_norm": 0.1558554768562317, "learning_rate": 0.0010889566420855701, "loss": 2.5008, "step": 420750 }, { "epoch": 0.838247481830932, "grad_norm": 0.1651686728000641, "learning_rate": 0.0010887371400084386, "loss": 2.4909, "step": 420760 }, { "epoch": 0.838267404054571, "grad_norm": 0.15699264407157898, "learning_rate": 0.001088517690791533, "loss": 2.4908, "step": 420770 }, { "epoch": 0.8382873262782099, "grad_norm": 0.16151493787765503, "learning_rate": 0.0010882982943966814, "loss": 2.511, "step": 420780 }, { "epoch": 0.8383072485018488, "grad_norm": 0.17820248007774353, "learning_rate": 0.0010880789507857602, "loss": 2.4859, "step": 420790 }, { "epoch": 0.8383271707254877, "grad_norm": 0.17211605608463287, "learning_rate": 0.0010878596599206895, "loss": 2.4825, "step": 420800 }, { "epoch": 0.8383470929491266, "grad_norm": 0.14656126499176025, "learning_rate": 0.0010876404217634366, "loss": 2.4836, "step": 420810 }, { "epoch": 0.8383670151727656, "grad_norm": 0.15903812646865845, "learning_rate": 0.0010874212362760134, "loss": 2.4816, "step": 420820 }, { "epoch": 0.8383869373964045, "grad_norm": 0.16991709172725677, "learning_rate": 0.001087202103420478, "loss": 2.4988, "step": 420830 }, { "epoch": 0.8384068596200434, "grad_norm": 0.1543756127357483, "learning_rate": 0.0010869830231589339, "loss": 2.509, "step": 420840 }, { "epoch": 0.8384267818436822, "grad_norm": 0.15204402804374695, "learning_rate": 0.0010867639954535303, "loss": 2.5154, "step": 420850 }, { "epoch": 0.8384467040673211, "grad_norm": 0.15570992231369019, "learning_rate": 0.001086545020266461, "loss": 2.4853, "step": 420860 }, { "epoch": 0.8384666262909601, "grad_norm": 0.2237449735403061, "learning_rate": 0.0010863260975599665, "loss": 2.4977, "step": 420870 }, { "epoch": 0.838486548514599, "grad_norm": 0.1656871885061264, "learning_rate": 0.001086107227296331, "loss": 2.5157, "step": 420880 }, { "epoch": 0.8385064707382379, "grad_norm": 0.17236541211605072, "learning_rate": 0.001085888409437885, "loss": 2.4968, "step": 420890 }, { "epoch": 0.8385263929618768, "grad_norm": 0.1922597587108612, "learning_rate": 0.001085669643947003, "loss": 2.5016, "step": 420900 }, { "epoch": 0.8385463151855157, "grad_norm": 0.15692946314811707, "learning_rate": 0.0010854509307861057, "loss": 2.4979, "step": 420910 }, { "epoch": 0.8385662374091547, "grad_norm": 0.1966712772846222, "learning_rate": 0.0010852322699176585, "loss": 2.4913, "step": 420920 }, { "epoch": 0.8385861596327936, "grad_norm": 0.16906292736530304, "learning_rate": 0.0010850136613041702, "loss": 2.5085, "step": 420930 }, { "epoch": 0.8386060818564325, "grad_norm": 0.18047486245632172, "learning_rate": 0.001084795104908196, "loss": 2.4949, "step": 420940 }, { "epoch": 0.8386260040800714, "grad_norm": 0.1695374846458435, "learning_rate": 0.0010845766006923354, "loss": 2.4932, "step": 420950 }, { "epoch": 0.8386459263037103, "grad_norm": 0.16412128508090973, "learning_rate": 0.0010843581486192323, "loss": 2.4954, "step": 420960 }, { "epoch": 0.8386658485273493, "grad_norm": 0.18200188875198364, "learning_rate": 0.0010841397486515752, "loss": 2.4975, "step": 420970 }, { "epoch": 0.8386857707509882, "grad_norm": 0.18445609509944916, "learning_rate": 0.001083921400752097, "loss": 2.5049, "step": 420980 }, { "epoch": 0.838705692974627, "grad_norm": 0.152959942817688, "learning_rate": 0.0010837031048835754, "loss": 2.4959, "step": 420990 }, { "epoch": 0.8387256151982659, "grad_norm": 0.1775761842727661, "learning_rate": 0.0010834848610088322, "loss": 2.4938, "step": 421000 }, { "epoch": 0.8387455374219048, "grad_norm": 0.1712588667869568, "learning_rate": 0.0010832666690907327, "loss": 2.5056, "step": 421010 }, { "epoch": 0.8387654596455438, "grad_norm": 0.1784202605485916, "learning_rate": 0.0010830485290921881, "loss": 2.4894, "step": 421020 }, { "epoch": 0.8387853818691827, "grad_norm": 0.18494504690170288, "learning_rate": 0.001082830440976152, "loss": 2.4859, "step": 421030 }, { "epoch": 0.8388053040928216, "grad_norm": 0.158297598361969, "learning_rate": 0.001082612404705623, "loss": 2.5036, "step": 421040 }, { "epoch": 0.8388252263164605, "grad_norm": 0.1872245967388153, "learning_rate": 0.001082394420243643, "loss": 2.5033, "step": 421050 }, { "epoch": 0.8388451485400995, "grad_norm": 0.1720411330461502, "learning_rate": 0.0010821764875532987, "loss": 2.512, "step": 421060 }, { "epoch": 0.8388650707637384, "grad_norm": 0.1547027975320816, "learning_rate": 0.0010819586065977198, "loss": 2.5045, "step": 421070 }, { "epoch": 0.8388849929873773, "grad_norm": 0.17229415476322174, "learning_rate": 0.0010817407773400802, "loss": 2.502, "step": 421080 }, { "epoch": 0.8389049152110162, "grad_norm": 0.13678134977817535, "learning_rate": 0.0010815229997435973, "loss": 2.4891, "step": 421090 }, { "epoch": 0.8389248374346551, "grad_norm": 0.19045376777648926, "learning_rate": 0.0010813052737715318, "loss": 2.4989, "step": 421100 }, { "epoch": 0.8389447596582941, "grad_norm": 0.1794947385787964, "learning_rate": 0.0010810875993871884, "loss": 2.5006, "step": 421110 }, { "epoch": 0.838964681881933, "grad_norm": 0.17680084705352783, "learning_rate": 0.0010808699765539154, "loss": 2.498, "step": 421120 }, { "epoch": 0.8389846041055719, "grad_norm": 0.16614586114883423, "learning_rate": 0.0010806524052351038, "loss": 2.4845, "step": 421130 }, { "epoch": 0.8390045263292107, "grad_norm": 0.16594454646110535, "learning_rate": 0.0010804348853941882, "loss": 2.4923, "step": 421140 }, { "epoch": 0.8390244485528496, "grad_norm": 0.167612224817276, "learning_rate": 0.0010802174169946466, "loss": 2.5011, "step": 421150 }, { "epoch": 0.8390443707764886, "grad_norm": 0.15617072582244873, "learning_rate": 0.00108, "loss": 2.5063, "step": 421160 }, { "epoch": 0.8390642930001275, "grad_norm": 0.16843347251415253, "learning_rate": 0.0010797826343738129, "loss": 2.509, "step": 421170 }, { "epoch": 0.8390842152237664, "grad_norm": 0.18212905526161194, "learning_rate": 0.001079565320079692, "loss": 2.4919, "step": 421180 }, { "epoch": 0.8391041374474053, "grad_norm": 0.16680598258972168, "learning_rate": 0.0010793480570812878, "loss": 2.4964, "step": 421190 }, { "epoch": 0.8391240596710442, "grad_norm": 0.1726182997226715, "learning_rate": 0.001079130845342293, "loss": 2.4975, "step": 421200 }, { "epoch": 0.8391439818946832, "grad_norm": 0.15571993589401245, "learning_rate": 0.0010789136848264435, "loss": 2.4914, "step": 421210 }, { "epoch": 0.8391639041183221, "grad_norm": 0.18087035417556763, "learning_rate": 0.0010786965754975184, "loss": 2.4917, "step": 421220 }, { "epoch": 0.839183826341961, "grad_norm": 0.16769501566886902, "learning_rate": 0.0010784795173193382, "loss": 2.5011, "step": 421230 }, { "epoch": 0.8392037485655999, "grad_norm": 0.1592089831829071, "learning_rate": 0.001078262510255767, "loss": 2.5065, "step": 421240 }, { "epoch": 0.8392236707892388, "grad_norm": 0.15967364609241486, "learning_rate": 0.0010780455542707113, "loss": 2.482, "step": 421250 }, { "epoch": 0.8392435930128778, "grad_norm": 0.1580633521080017, "learning_rate": 0.0010778286493281198, "loss": 2.5086, "step": 421260 }, { "epoch": 0.8392635152365167, "grad_norm": 0.1672946661710739, "learning_rate": 0.0010776117953919834, "loss": 2.4984, "step": 421270 }, { "epoch": 0.8392834374601555, "grad_norm": 0.16511568427085876, "learning_rate": 0.0010773949924263364, "loss": 2.5131, "step": 421280 }, { "epoch": 0.8393033596837944, "grad_norm": 0.1665404587984085, "learning_rate": 0.0010771782403952539, "loss": 2.4955, "step": 421290 }, { "epoch": 0.8393232819074333, "grad_norm": 0.16939038038253784, "learning_rate": 0.0010769615392628537, "loss": 2.4871, "step": 421300 }, { "epoch": 0.8393432041310723, "grad_norm": 0.16492542624473572, "learning_rate": 0.0010767448889932967, "loss": 2.4921, "step": 421310 }, { "epoch": 0.8393631263547112, "grad_norm": 0.2257681041955948, "learning_rate": 0.0010765282895507844, "loss": 2.4924, "step": 421320 }, { "epoch": 0.8393830485783501, "grad_norm": 0.1831596940755844, "learning_rate": 0.0010763117408995608, "loss": 2.5048, "step": 421330 }, { "epoch": 0.839402970801989, "grad_norm": 0.17585575580596924, "learning_rate": 0.0010760952430039121, "loss": 2.4889, "step": 421340 }, { "epoch": 0.839422893025628, "grad_norm": 0.16164445877075195, "learning_rate": 0.0010758787958281663, "loss": 2.4983, "step": 421350 }, { "epoch": 0.8394428152492669, "grad_norm": 0.19638291001319885, "learning_rate": 0.0010756623993366924, "loss": 2.4936, "step": 421360 }, { "epoch": 0.8394627374729058, "grad_norm": 0.18244117498397827, "learning_rate": 0.0010754460534939025, "loss": 2.4983, "step": 421370 }, { "epoch": 0.8394826596965447, "grad_norm": 0.15414360165596008, "learning_rate": 0.0010752297582642487, "loss": 2.5096, "step": 421380 }, { "epoch": 0.8395025819201836, "grad_norm": 0.16161704063415527, "learning_rate": 0.001075013513612226, "loss": 2.4849, "step": 421390 }, { "epoch": 0.8395225041438226, "grad_norm": 0.16227075457572937, "learning_rate": 0.0010747973195023696, "loss": 2.503, "step": 421400 }, { "epoch": 0.8395424263674615, "grad_norm": 0.16977766156196594, "learning_rate": 0.001074581175899258, "loss": 2.4853, "step": 421410 }, { "epoch": 0.8395623485911003, "grad_norm": 0.17227040231227875, "learning_rate": 0.001074365082767509, "loss": 2.5001, "step": 421420 }, { "epoch": 0.8395822708147392, "grad_norm": 0.16743330657482147, "learning_rate": 0.0010741490400717835, "loss": 2.507, "step": 421430 }, { "epoch": 0.8396021930383781, "grad_norm": 0.16285833716392517, "learning_rate": 0.0010739330477767819, "loss": 2.4861, "step": 421440 }, { "epoch": 0.8396221152620171, "grad_norm": 0.19517697393894196, "learning_rate": 0.0010737171058472473, "loss": 2.48, "step": 421450 }, { "epoch": 0.839642037485656, "grad_norm": 0.18360130488872528, "learning_rate": 0.0010735012142479624, "loss": 2.4884, "step": 421460 }, { "epoch": 0.8396619597092949, "grad_norm": 0.18022723495960236, "learning_rate": 0.0010732853729437527, "loss": 2.4959, "step": 421470 }, { "epoch": 0.8396818819329338, "grad_norm": 0.1805436760187149, "learning_rate": 0.0010730695818994827, "loss": 2.4911, "step": 421480 }, { "epoch": 0.8397018041565727, "grad_norm": 0.15044213831424713, "learning_rate": 0.0010728538410800592, "loss": 2.5, "step": 421490 }, { "epoch": 0.8397217263802117, "grad_norm": 0.1739618182182312, "learning_rate": 0.0010726381504504296, "loss": 2.4853, "step": 421500 }, { "epoch": 0.8397416486038506, "grad_norm": 0.16690438985824585, "learning_rate": 0.0010724225099755815, "loss": 2.5029, "step": 421510 }, { "epoch": 0.8397615708274895, "grad_norm": 0.18907767534255981, "learning_rate": 0.0010722069196205438, "loss": 2.5091, "step": 421520 }, { "epoch": 0.8397814930511284, "grad_norm": 0.17382368445396423, "learning_rate": 0.0010719913793503855, "loss": 2.4881, "step": 421530 }, { "epoch": 0.8398014152747673, "grad_norm": 0.16067548096179962, "learning_rate": 0.0010717758891302168, "loss": 2.5019, "step": 421540 }, { "epoch": 0.8398213374984063, "grad_norm": 0.14655914902687073, "learning_rate": 0.0010715604489251872, "loss": 2.4989, "step": 421550 }, { "epoch": 0.8398412597220452, "grad_norm": 0.15235233306884766, "learning_rate": 0.001071345058700488, "loss": 2.5053, "step": 421560 }, { "epoch": 0.839861181945684, "grad_norm": 0.1542920619249344, "learning_rate": 0.0010711297184213505, "loss": 2.4964, "step": 421570 }, { "epoch": 0.8398811041693229, "grad_norm": 0.15947163105010986, "learning_rate": 0.0010709144280530451, "loss": 2.4944, "step": 421580 }, { "epoch": 0.8399010263929618, "grad_norm": 0.1651822328567505, "learning_rate": 0.0010706991875608844, "loss": 2.4975, "step": 421590 }, { "epoch": 0.8399209486166008, "grad_norm": 0.15190333127975464, "learning_rate": 0.00107048399691022, "loss": 2.5122, "step": 421600 }, { "epoch": 0.8399408708402397, "grad_norm": 0.19124893844127655, "learning_rate": 0.0010702688560664433, "loss": 2.4952, "step": 421610 }, { "epoch": 0.8399607930638786, "grad_norm": 0.174396350979805, "learning_rate": 0.0010700537649949865, "loss": 2.5053, "step": 421620 }, { "epoch": 0.8399807152875175, "grad_norm": 0.15273265540599823, "learning_rate": 0.0010698387236613212, "loss": 2.4917, "step": 421630 }, { "epoch": 0.8400006375111565, "grad_norm": 0.16878551244735718, "learning_rate": 0.0010696237320309595, "loss": 2.4942, "step": 421640 }, { "epoch": 0.8400205597347954, "grad_norm": 0.15990999341011047, "learning_rate": 0.0010694087900694528, "loss": 2.5116, "step": 421650 }, { "epoch": 0.8400404819584343, "grad_norm": 0.1525391936302185, "learning_rate": 0.0010691938977423923, "loss": 2.4891, "step": 421660 }, { "epoch": 0.8400604041820732, "grad_norm": 0.16839347779750824, "learning_rate": 0.0010689790550154094, "loss": 2.4945, "step": 421670 }, { "epoch": 0.8400803264057121, "grad_norm": 0.17866232991218567, "learning_rate": 0.0010687642618541747, "loss": 2.4992, "step": 421680 }, { "epoch": 0.8401002486293511, "grad_norm": 0.17100580036640167, "learning_rate": 0.0010685495182243985, "loss": 2.5033, "step": 421690 }, { "epoch": 0.84012017085299, "grad_norm": 0.17098814249038696, "learning_rate": 0.001068334824091831, "loss": 2.4903, "step": 421700 }, { "epoch": 0.8401400930766288, "grad_norm": 0.16459563374519348, "learning_rate": 0.0010681201794222606, "loss": 2.4861, "step": 421710 }, { "epoch": 0.8401600153002677, "grad_norm": 0.18847115337848663, "learning_rate": 0.0010679055841815166, "loss": 2.4966, "step": 421720 }, { "epoch": 0.8401799375239066, "grad_norm": 0.17412780225276947, "learning_rate": 0.0010676910383354667, "loss": 2.4978, "step": 421730 }, { "epoch": 0.8401998597475456, "grad_norm": 0.16403013467788696, "learning_rate": 0.0010674765418500188, "loss": 2.4905, "step": 421740 }, { "epoch": 0.8402197819711845, "grad_norm": 0.16140471398830414, "learning_rate": 0.0010672620946911185, "loss": 2.4771, "step": 421750 }, { "epoch": 0.8402397041948234, "grad_norm": 0.2038239985704422, "learning_rate": 0.001067047696824752, "loss": 2.4787, "step": 421760 }, { "epoch": 0.8402596264184623, "grad_norm": 0.15934088826179504, "learning_rate": 0.0010668333482169437, "loss": 2.5035, "step": 421770 }, { "epoch": 0.8402795486421012, "grad_norm": 0.18489232659339905, "learning_rate": 0.0010666190488337573, "loss": 2.5112, "step": 421780 }, { "epoch": 0.8402994708657402, "grad_norm": 0.18052470684051514, "learning_rate": 0.0010664047986412954, "loss": 2.4982, "step": 421790 }, { "epoch": 0.8403193930893791, "grad_norm": 0.1565270721912384, "learning_rate": 0.0010661905976056998, "loss": 2.4929, "step": 421800 }, { "epoch": 0.840339315313018, "grad_norm": 0.16400550305843353, "learning_rate": 0.0010659764456931507, "loss": 2.4955, "step": 421810 }, { "epoch": 0.8403592375366569, "grad_norm": 0.15702512860298157, "learning_rate": 0.001065762342869867, "loss": 2.4897, "step": 421820 }, { "epoch": 0.8403791597602958, "grad_norm": 0.15994727611541748, "learning_rate": 0.0010655482891021068, "loss": 2.4826, "step": 421830 }, { "epoch": 0.8403990819839348, "grad_norm": 0.1907452493906021, "learning_rate": 0.0010653342843561662, "loss": 2.4978, "step": 421840 }, { "epoch": 0.8404190042075736, "grad_norm": 0.17777588963508606, "learning_rate": 0.0010651203285983806, "loss": 2.5101, "step": 421850 }, { "epoch": 0.8404389264312125, "grad_norm": 0.16422274708747864, "learning_rate": 0.0010649064217951234, "loss": 2.4922, "step": 421860 }, { "epoch": 0.8404588486548514, "grad_norm": 0.17719489336013794, "learning_rate": 0.0010646925639128063, "loss": 2.4783, "step": 421870 }, { "epoch": 0.8404787708784903, "grad_norm": 0.18134838342666626, "learning_rate": 0.00106447875491788, "loss": 2.4975, "step": 421880 }, { "epoch": 0.8404986931021293, "grad_norm": 0.18304727971553802, "learning_rate": 0.0010642649947768333, "loss": 2.495, "step": 421890 }, { "epoch": 0.8405186153257682, "grad_norm": 0.1489657759666443, "learning_rate": 0.0010640512834561929, "loss": 2.4981, "step": 421900 }, { "epoch": 0.8405385375494071, "grad_norm": 0.15570999681949615, "learning_rate": 0.001063837620922524, "loss": 2.4932, "step": 421910 }, { "epoch": 0.840558459773046, "grad_norm": 0.18075338006019592, "learning_rate": 0.00106362400714243, "loss": 2.4925, "step": 421920 }, { "epoch": 0.8405783819966849, "grad_norm": 0.14936530590057373, "learning_rate": 0.0010634104420825525, "loss": 2.4777, "step": 421930 }, { "epoch": 0.8405983042203239, "grad_norm": 0.17220576107501984, "learning_rate": 0.0010631969257095704, "loss": 2.4842, "step": 421940 }, { "epoch": 0.8406182264439628, "grad_norm": 0.17253994941711426, "learning_rate": 0.0010629834579902017, "loss": 2.5042, "step": 421950 }, { "epoch": 0.8406381486676017, "grad_norm": 0.15065276622772217, "learning_rate": 0.0010627700388912014, "loss": 2.5, "step": 421960 }, { "epoch": 0.8406580708912406, "grad_norm": 0.17345155775547028, "learning_rate": 0.0010625566683793628, "loss": 2.5023, "step": 421970 }, { "epoch": 0.8406779931148796, "grad_norm": 0.14992551505565643, "learning_rate": 0.0010623433464215166, "loss": 2.4999, "step": 421980 }, { "epoch": 0.8406979153385185, "grad_norm": 0.16671329736709595, "learning_rate": 0.0010621300729845315, "loss": 2.493, "step": 421990 }, { "epoch": 0.8407178375621573, "grad_norm": 0.17223873734474182, "learning_rate": 0.001061916848035314, "loss": 2.483, "step": 422000 }, { "epoch": 0.8407377597857962, "grad_norm": 0.1791755110025406, "learning_rate": 0.001061703671540808, "loss": 2.493, "step": 422010 }, { "epoch": 0.8407576820094351, "grad_norm": 0.15508082509040833, "learning_rate": 0.0010614905434679946, "loss": 2.487, "step": 422020 }, { "epoch": 0.8407776042330741, "grad_norm": 0.1722489446401596, "learning_rate": 0.001061277463783893, "loss": 2.4891, "step": 422030 }, { "epoch": 0.840797526456713, "grad_norm": 0.17928171157836914, "learning_rate": 0.0010610644324555599, "loss": 2.497, "step": 422040 }, { "epoch": 0.8408174486803519, "grad_norm": 0.1694246530532837, "learning_rate": 0.0010608514494500883, "loss": 2.4903, "step": 422050 }, { "epoch": 0.8408373709039908, "grad_norm": 0.16818538308143616, "learning_rate": 0.0010606385147346098, "loss": 2.4985, "step": 422060 }, { "epoch": 0.8408572931276297, "grad_norm": 0.1725328266620636, "learning_rate": 0.0010604256282762924, "loss": 2.4901, "step": 422070 }, { "epoch": 0.8408772153512687, "grad_norm": 0.1763852834701538, "learning_rate": 0.0010602127900423417, "loss": 2.4967, "step": 422080 }, { "epoch": 0.8408971375749076, "grad_norm": 0.15725059807300568, "learning_rate": 0.0010600000000000002, "loss": 2.4859, "step": 422090 }, { "epoch": 0.8409170597985465, "grad_norm": 0.19288527965545654, "learning_rate": 0.0010597872581165472, "loss": 2.5099, "step": 422100 }, { "epoch": 0.8409369820221854, "grad_norm": 0.16199789941310883, "learning_rate": 0.0010595745643593002, "loss": 2.5007, "step": 422110 }, { "epoch": 0.8409569042458243, "grad_norm": 0.17033889889717102, "learning_rate": 0.0010593619186956122, "loss": 2.4879, "step": 422120 }, { "epoch": 0.8409768264694633, "grad_norm": 0.17037957906723022, "learning_rate": 0.001059149321092874, "loss": 2.4978, "step": 422130 }, { "epoch": 0.8409967486931021, "grad_norm": 0.19077835977077484, "learning_rate": 0.0010589367715185128, "loss": 2.4849, "step": 422140 }, { "epoch": 0.841016670916741, "grad_norm": 0.16126932203769684, "learning_rate": 0.0010587242699399927, "loss": 2.5051, "step": 422150 }, { "epoch": 0.8410365931403799, "grad_norm": 0.16355054080486298, "learning_rate": 0.0010585118163248144, "loss": 2.4989, "step": 422160 }, { "epoch": 0.8410565153640188, "grad_norm": 0.16926924884319305, "learning_rate": 0.0010582994106405157, "loss": 2.4869, "step": 422170 }, { "epoch": 0.8410764375876578, "grad_norm": 0.16820450127124786, "learning_rate": 0.0010580870528546706, "loss": 2.4972, "step": 422180 }, { "epoch": 0.8410963598112967, "grad_norm": 0.15476472675800323, "learning_rate": 0.00105787474293489, "loss": 2.5012, "step": 422190 }, { "epoch": 0.8411162820349356, "grad_norm": 0.1582009345293045, "learning_rate": 0.0010576624808488204, "loss": 2.4903, "step": 422200 }, { "epoch": 0.8411362042585745, "grad_norm": 0.16226078569889069, "learning_rate": 0.0010574502665641457, "loss": 2.4917, "step": 422210 }, { "epoch": 0.8411561264822134, "grad_norm": 0.15993362665176392, "learning_rate": 0.001057238100048586, "loss": 2.4927, "step": 422220 }, { "epoch": 0.8411760487058524, "grad_norm": 0.1657189428806305, "learning_rate": 0.0010570259812698972, "loss": 2.4809, "step": 422230 }, { "epoch": 0.8411959709294913, "grad_norm": 0.1657545566558838, "learning_rate": 0.0010568139101958724, "loss": 2.5074, "step": 422240 }, { "epoch": 0.8412158931531302, "grad_norm": 0.16113407909870148, "learning_rate": 0.0010566018867943397, "loss": 2.49, "step": 422250 }, { "epoch": 0.8412358153767691, "grad_norm": 0.16102229058742523, "learning_rate": 0.001056389911033164, "loss": 2.4856, "step": 422260 }, { "epoch": 0.8412557376004081, "grad_norm": 0.1625012904405594, "learning_rate": 0.0010561779828802468, "loss": 2.4966, "step": 422270 }, { "epoch": 0.841275659824047, "grad_norm": 0.14953841269016266, "learning_rate": 0.0010559661023035244, "loss": 2.5028, "step": 422280 }, { "epoch": 0.8412955820476858, "grad_norm": 0.16349247097969055, "learning_rate": 0.0010557542692709698, "loss": 2.4939, "step": 422290 }, { "epoch": 0.8413155042713247, "grad_norm": 0.16416491568088531, "learning_rate": 0.0010555424837505926, "loss": 2.4962, "step": 422300 }, { "epoch": 0.8413354264949636, "grad_norm": 0.16641221940517426, "learning_rate": 0.0010553307457104367, "loss": 2.5117, "step": 422310 }, { "epoch": 0.8413553487186026, "grad_norm": 0.2076888233423233, "learning_rate": 0.0010551190551185824, "loss": 2.4858, "step": 422320 }, { "epoch": 0.8413752709422415, "grad_norm": 0.15774300694465637, "learning_rate": 0.0010549074119431472, "loss": 2.5019, "step": 422330 }, { "epoch": 0.8413951931658804, "grad_norm": 0.19374527037143707, "learning_rate": 0.001054695816152282, "loss": 2.5003, "step": 422340 }, { "epoch": 0.8414151153895193, "grad_norm": 0.17403610050678253, "learning_rate": 0.001054484267714175, "loss": 2.5017, "step": 422350 }, { "epoch": 0.8414350376131582, "grad_norm": 0.22485437989234924, "learning_rate": 0.001054272766597049, "loss": 2.5046, "step": 422360 }, { "epoch": 0.8414549598367972, "grad_norm": 0.17342005670070648, "learning_rate": 0.0010540613127691626, "loss": 2.4955, "step": 422370 }, { "epoch": 0.8414748820604361, "grad_norm": 0.1747836470603943, "learning_rate": 0.0010538499061988104, "loss": 2.5022, "step": 422380 }, { "epoch": 0.841494804284075, "grad_norm": 0.17220689356327057, "learning_rate": 0.0010536385468543218, "loss": 2.5048, "step": 422390 }, { "epoch": 0.8415147265077139, "grad_norm": 0.20881105959415436, "learning_rate": 0.0010534272347040615, "loss": 2.5082, "step": 422400 }, { "epoch": 0.8415346487313528, "grad_norm": 0.15386125445365906, "learning_rate": 0.0010532159697164301, "loss": 2.4979, "step": 422410 }, { "epoch": 0.8415545709549918, "grad_norm": 0.14779521524906158, "learning_rate": 0.001053004751859863, "loss": 2.4988, "step": 422420 }, { "epoch": 0.8415744931786306, "grad_norm": 0.15803635120391846, "learning_rate": 0.0010527935811028306, "loss": 2.5061, "step": 422430 }, { "epoch": 0.8415944154022695, "grad_norm": 0.20224961638450623, "learning_rate": 0.0010525824574138393, "loss": 2.4889, "step": 422440 }, { "epoch": 0.8416143376259084, "grad_norm": 0.19783428311347961, "learning_rate": 0.0010523713807614294, "loss": 2.4952, "step": 422450 }, { "epoch": 0.8416342598495473, "grad_norm": 0.16447867453098297, "learning_rate": 0.0010521603511141771, "loss": 2.4994, "step": 422460 }, { "epoch": 0.8416541820731863, "grad_norm": 0.16849440336227417, "learning_rate": 0.0010519493684406935, "loss": 2.4917, "step": 422470 }, { "epoch": 0.8416741042968252, "grad_norm": 0.18694056570529938, "learning_rate": 0.0010517384327096242, "loss": 2.4961, "step": 422480 }, { "epoch": 0.8416940265204641, "grad_norm": 0.16346414387226105, "learning_rate": 0.00105152754388965, "loss": 2.4939, "step": 422490 }, { "epoch": 0.841713948744103, "grad_norm": 0.18996946513652802, "learning_rate": 0.0010513167019494862, "loss": 2.4925, "step": 422500 }, { "epoch": 0.8417338709677419, "grad_norm": 0.18232427537441254, "learning_rate": 0.0010511059068578832, "loss": 2.4966, "step": 422510 }, { "epoch": 0.8417537931913809, "grad_norm": 0.17913010716438293, "learning_rate": 0.0010508951585836263, "loss": 2.4988, "step": 422520 }, { "epoch": 0.8417737154150198, "grad_norm": 0.18802937865257263, "learning_rate": 0.0010506844570955345, "loss": 2.4992, "step": 422530 }, { "epoch": 0.8417936376386587, "grad_norm": 0.16633659601211548, "learning_rate": 0.0010504738023624624, "loss": 2.5067, "step": 422540 }, { "epoch": 0.8418135598622976, "grad_norm": 0.17152749001979828, "learning_rate": 0.0010502631943532987, "loss": 2.4783, "step": 422550 }, { "epoch": 0.8418334820859366, "grad_norm": 0.24852807819843292, "learning_rate": 0.0010500526330369667, "loss": 2.4961, "step": 422560 }, { "epoch": 0.8418534043095754, "grad_norm": 0.19337241351604462, "learning_rate": 0.001049842118382424, "loss": 2.5122, "step": 422570 }, { "epoch": 0.8418733265332143, "grad_norm": 0.2394830286502838, "learning_rate": 0.0010496316503586622, "loss": 2.5119, "step": 422580 }, { "epoch": 0.8418932487568532, "grad_norm": 0.1857350468635559, "learning_rate": 0.0010494212289347085, "loss": 2.5026, "step": 422590 }, { "epoch": 0.8419131709804921, "grad_norm": 0.1547689437866211, "learning_rate": 0.001049210854079623, "loss": 2.5133, "step": 422600 }, { "epoch": 0.8419330932041311, "grad_norm": 0.1662198156118393, "learning_rate": 0.0010490005257625008, "loss": 2.4951, "step": 422610 }, { "epoch": 0.84195301542777, "grad_norm": 0.16196347773075104, "learning_rate": 0.0010487902439524708, "loss": 2.5019, "step": 422620 }, { "epoch": 0.8419729376514089, "grad_norm": 0.17570042610168457, "learning_rate": 0.0010485800086186964, "loss": 2.488, "step": 422630 }, { "epoch": 0.8419928598750478, "grad_norm": 0.15895305573940277, "learning_rate": 0.0010483698197303745, "loss": 2.4889, "step": 422640 }, { "epoch": 0.8420127820986867, "grad_norm": 0.16936850547790527, "learning_rate": 0.0010481596772567365, "loss": 2.49, "step": 422650 }, { "epoch": 0.8420327043223257, "grad_norm": 0.1893978863954544, "learning_rate": 0.0010479495811670477, "loss": 2.4906, "step": 422660 }, { "epoch": 0.8420526265459646, "grad_norm": 0.19424350559711456, "learning_rate": 0.001047739531430607, "loss": 2.4842, "step": 422670 }, { "epoch": 0.8420725487696035, "grad_norm": 0.155365452170372, "learning_rate": 0.0010475295280167475, "loss": 2.4972, "step": 422680 }, { "epoch": 0.8420924709932424, "grad_norm": 0.18439477682113647, "learning_rate": 0.0010473195708948359, "loss": 2.4936, "step": 422690 }, { "epoch": 0.8421123932168812, "grad_norm": 0.16795186698436737, "learning_rate": 0.0010471096600342725, "loss": 2.4695, "step": 422700 }, { "epoch": 0.8421323154405203, "grad_norm": 0.16259311139583588, "learning_rate": 0.0010468997954044916, "loss": 2.496, "step": 422710 }, { "epoch": 0.8421522376641591, "grad_norm": 0.1753101944923401, "learning_rate": 0.0010466899769749612, "loss": 2.4972, "step": 422720 }, { "epoch": 0.842172159887798, "grad_norm": 0.1536707580089569, "learning_rate": 0.0010464802047151828, "loss": 2.497, "step": 422730 }, { "epoch": 0.8421920821114369, "grad_norm": 0.1674579679965973, "learning_rate": 0.0010462704785946908, "loss": 2.5016, "step": 422740 }, { "epoch": 0.8422120043350758, "grad_norm": 0.16285286843776703, "learning_rate": 0.0010460607985830544, "loss": 2.4992, "step": 422750 }, { "epoch": 0.8422319265587148, "grad_norm": 0.1936897486448288, "learning_rate": 0.0010458511646498749, "loss": 2.4754, "step": 422760 }, { "epoch": 0.8422518487823537, "grad_norm": 0.18217550218105316, "learning_rate": 0.001045641576764788, "loss": 2.4979, "step": 422770 }, { "epoch": 0.8422717710059926, "grad_norm": 0.1734781116247177, "learning_rate": 0.001045432034897462, "loss": 2.4889, "step": 422780 }, { "epoch": 0.8422916932296315, "grad_norm": 0.16894444823265076, "learning_rate": 0.0010452225390175994, "loss": 2.4926, "step": 422790 }, { "epoch": 0.8423116154532704, "grad_norm": 0.17465418577194214, "learning_rate": 0.0010450130890949341, "loss": 2.4865, "step": 422800 }, { "epoch": 0.8423315376769094, "grad_norm": 0.16583412885665894, "learning_rate": 0.0010448036850992358, "loss": 2.4912, "step": 422810 }, { "epoch": 0.8423514599005483, "grad_norm": 0.16284134984016418, "learning_rate": 0.0010445943270003051, "loss": 2.5167, "step": 422820 }, { "epoch": 0.8423713821241872, "grad_norm": 0.19473999738693237, "learning_rate": 0.0010443850147679767, "loss": 2.4983, "step": 422830 }, { "epoch": 0.842391304347826, "grad_norm": 0.1662989854812622, "learning_rate": 0.0010441757483721184, "loss": 2.5088, "step": 422840 }, { "epoch": 0.842411226571465, "grad_norm": 0.1891588419675827, "learning_rate": 0.0010439665277826304, "loss": 2.5002, "step": 422850 }, { "epoch": 0.842431148795104, "grad_norm": 0.15203127264976501, "learning_rate": 0.0010437573529694464, "loss": 2.4994, "step": 422860 }, { "epoch": 0.8424510710187428, "grad_norm": 0.15723104774951935, "learning_rate": 0.0010435482239025324, "loss": 2.4932, "step": 422870 }, { "epoch": 0.8424709932423817, "grad_norm": 0.1669987440109253, "learning_rate": 0.0010433391405518882, "loss": 2.507, "step": 422880 }, { "epoch": 0.8424909154660206, "grad_norm": 0.1709228754043579, "learning_rate": 0.0010431301028875452, "loss": 2.5087, "step": 422890 }, { "epoch": 0.8425108376896596, "grad_norm": 0.16609007120132446, "learning_rate": 0.001042921110879568, "loss": 2.5013, "step": 422900 }, { "epoch": 0.8425307599132985, "grad_norm": 0.1564311981201172, "learning_rate": 0.0010427121644980546, "loss": 2.4904, "step": 422910 }, { "epoch": 0.8425506821369374, "grad_norm": 0.172862246632576, "learning_rate": 0.0010425032637131342, "loss": 2.4795, "step": 422920 }, { "epoch": 0.8425706043605763, "grad_norm": 0.17752274870872498, "learning_rate": 0.0010422944084949697, "loss": 2.5007, "step": 422930 }, { "epoch": 0.8425905265842152, "grad_norm": 0.16155888140201569, "learning_rate": 0.0010420855988137563, "loss": 2.4907, "step": 422940 }, { "epoch": 0.8426104488078542, "grad_norm": 0.19672903418540955, "learning_rate": 0.0010418768346397212, "loss": 2.4878, "step": 422950 }, { "epoch": 0.8426303710314931, "grad_norm": 0.16470831632614136, "learning_rate": 0.001041668115943125, "loss": 2.5017, "step": 422960 }, { "epoch": 0.842650293255132, "grad_norm": 0.6385697722434998, "learning_rate": 0.001041459442694259, "loss": 2.4987, "step": 422970 }, { "epoch": 0.8426702154787709, "grad_norm": 0.16323362290859222, "learning_rate": 0.0010412508148634494, "loss": 2.5039, "step": 422980 }, { "epoch": 0.8426901377024097, "grad_norm": 0.15830625593662262, "learning_rate": 0.0010410422324210519, "loss": 2.4906, "step": 422990 }, { "epoch": 0.8427100599260487, "grad_norm": 0.15800100564956665, "learning_rate": 0.0010408336953374561, "loss": 2.4991, "step": 423000 }, { "epoch": 0.8427299821496876, "grad_norm": 0.1605672985315323, "learning_rate": 0.0010406252035830837, "loss": 2.4856, "step": 423010 }, { "epoch": 0.8427499043733265, "grad_norm": 0.17204320430755615, "learning_rate": 0.0010404167571283875, "loss": 2.4781, "step": 423020 }, { "epoch": 0.8427698265969654, "grad_norm": 0.16866856813430786, "learning_rate": 0.0010402083559438537, "loss": 2.4977, "step": 423030 }, { "epoch": 0.8427897488206043, "grad_norm": 0.20156817138195038, "learning_rate": 0.0010400000000000001, "loss": 2.4964, "step": 423040 }, { "epoch": 0.8428096710442433, "grad_norm": 0.16594122350215912, "learning_rate": 0.0010397916892673758, "loss": 2.4939, "step": 423050 }, { "epoch": 0.8428295932678822, "grad_norm": 0.16415977478027344, "learning_rate": 0.0010395834237165624, "loss": 2.4884, "step": 423060 }, { "epoch": 0.8428495154915211, "grad_norm": 0.1513100564479828, "learning_rate": 0.0010393752033181739, "loss": 2.4856, "step": 423070 }, { "epoch": 0.84286943771516, "grad_norm": 0.1960822194814682, "learning_rate": 0.0010391670280428548, "loss": 2.4933, "step": 423080 }, { "epoch": 0.8428893599387989, "grad_norm": 0.16322985291481018, "learning_rate": 0.001038958897861283, "loss": 2.5076, "step": 423090 }, { "epoch": 0.8429092821624379, "grad_norm": 0.1843281239271164, "learning_rate": 0.0010387508127441666, "loss": 2.5111, "step": 423100 }, { "epoch": 0.8429292043860768, "grad_norm": 0.15717177093029022, "learning_rate": 0.0010385427726622466, "loss": 2.4863, "step": 423110 }, { "epoch": 0.8429491266097157, "grad_norm": 0.15300123393535614, "learning_rate": 0.0010383347775862954, "loss": 2.4999, "step": 423120 }, { "epoch": 0.8429690488333546, "grad_norm": 0.16945084929466248, "learning_rate": 0.0010381268274871163, "loss": 2.5085, "step": 423130 }, { "epoch": 0.8429889710569936, "grad_norm": 0.23421631753444672, "learning_rate": 0.0010379189223355446, "loss": 2.5034, "step": 423140 }, { "epoch": 0.8430088932806324, "grad_norm": 0.1712341457605362, "learning_rate": 0.0010377110621024475, "loss": 2.5084, "step": 423150 }, { "epoch": 0.8430288155042713, "grad_norm": 0.17643697559833527, "learning_rate": 0.0010375032467587228, "loss": 2.5023, "step": 423160 }, { "epoch": 0.8430487377279102, "grad_norm": 0.17657315731048584, "learning_rate": 0.0010372954762753008, "loss": 2.5026, "step": 423170 }, { "epoch": 0.8430686599515491, "grad_norm": 0.18744339048862457, "learning_rate": 0.001037087750623142, "loss": 2.5075, "step": 423180 }, { "epoch": 0.8430885821751881, "grad_norm": 0.16555500030517578, "learning_rate": 0.0010368800697732397, "loss": 2.5104, "step": 423190 }, { "epoch": 0.843108504398827, "grad_norm": 0.1628972589969635, "learning_rate": 0.0010366724336966161, "loss": 2.5016, "step": 423200 }, { "epoch": 0.8431284266224659, "grad_norm": 0.15061038732528687, "learning_rate": 0.0010364648423643278, "loss": 2.4993, "step": 423210 }, { "epoch": 0.8431483488461048, "grad_norm": 0.16279514133930206, "learning_rate": 0.0010362572957474594, "loss": 2.4886, "step": 423220 }, { "epoch": 0.8431682710697437, "grad_norm": 0.18842999637126923, "learning_rate": 0.0010360497938171286, "loss": 2.5083, "step": 423230 }, { "epoch": 0.8431881932933827, "grad_norm": 0.17440015077590942, "learning_rate": 0.001035842336544484, "loss": 2.4861, "step": 423240 }, { "epoch": 0.8432081155170216, "grad_norm": 0.18871329724788666, "learning_rate": 0.0010356349239007043, "loss": 2.5022, "step": 423250 }, { "epoch": 0.8432280377406605, "grad_norm": 0.1704091727733612, "learning_rate": 0.0010354275558570004, "loss": 2.5113, "step": 423260 }, { "epoch": 0.8432479599642994, "grad_norm": 0.17222537100315094, "learning_rate": 0.0010352202323846132, "loss": 2.5129, "step": 423270 }, { "epoch": 0.8432678821879382, "grad_norm": 0.15871524810791016, "learning_rate": 0.0010350129534548145, "loss": 2.5072, "step": 423280 }, { "epoch": 0.8432878044115772, "grad_norm": 0.17550192773342133, "learning_rate": 0.0010348057190389078, "loss": 2.4938, "step": 423290 }, { "epoch": 0.8433077266352161, "grad_norm": 0.15896670520305634, "learning_rate": 0.0010345985291082265, "loss": 2.4964, "step": 423300 }, { "epoch": 0.843327648858855, "grad_norm": 0.1544639766216278, "learning_rate": 0.001034391383634135, "loss": 2.4781, "step": 423310 }, { "epoch": 0.8433475710824939, "grad_norm": 0.163480743765831, "learning_rate": 0.0010341842825880292, "loss": 2.4905, "step": 423320 }, { "epoch": 0.8433674933061328, "grad_norm": 0.190342515707016, "learning_rate": 0.0010339772259413342, "loss": 2.4978, "step": 423330 }, { "epoch": 0.8433874155297718, "grad_norm": 0.16879694163799286, "learning_rate": 0.0010337702136655072, "loss": 2.4966, "step": 423340 }, { "epoch": 0.8434073377534107, "grad_norm": 0.1663384586572647, "learning_rate": 0.0010335632457320343, "loss": 2.4927, "step": 423350 }, { "epoch": 0.8434272599770496, "grad_norm": 0.16312269866466522, "learning_rate": 0.0010333563221124342, "loss": 2.4814, "step": 423360 }, { "epoch": 0.8434471822006885, "grad_norm": 0.16531340777873993, "learning_rate": 0.0010331494427782543, "loss": 2.4763, "step": 423370 }, { "epoch": 0.8434671044243274, "grad_norm": 0.15381407737731934, "learning_rate": 0.0010329426077010733, "loss": 2.5041, "step": 423380 }, { "epoch": 0.8434870266479664, "grad_norm": 0.1672775000333786, "learning_rate": 0.0010327358168525003, "loss": 2.5031, "step": 423390 }, { "epoch": 0.8435069488716053, "grad_norm": 0.16436903178691864, "learning_rate": 0.001032529070204174, "loss": 2.4775, "step": 423400 }, { "epoch": 0.8435268710952442, "grad_norm": 0.1549130529165268, "learning_rate": 0.0010323223677277645, "loss": 2.4944, "step": 423410 }, { "epoch": 0.843546793318883, "grad_norm": 0.16252346336841583, "learning_rate": 0.001032115709394971, "loss": 2.4987, "step": 423420 }, { "epoch": 0.8435667155425219, "grad_norm": 0.14731861650943756, "learning_rate": 0.0010319090951775242, "loss": 2.4979, "step": 423430 }, { "epoch": 0.8435866377661609, "grad_norm": 0.18168425559997559, "learning_rate": 0.0010317025250471837, "loss": 2.499, "step": 423440 }, { "epoch": 0.8436065599897998, "grad_norm": 0.17027662694454193, "learning_rate": 0.00103149599897574, "loss": 2.4981, "step": 423450 }, { "epoch": 0.8436264822134387, "grad_norm": 0.16104240715503693, "learning_rate": 0.0010312895169350131, "loss": 2.4885, "step": 423460 }, { "epoch": 0.8436464044370776, "grad_norm": 0.16682462394237518, "learning_rate": 0.0010310830788968542, "loss": 2.5029, "step": 423470 }, { "epoch": 0.8436663266607166, "grad_norm": 0.3205091953277588, "learning_rate": 0.0010308766848331425, "loss": 2.4926, "step": 423480 }, { "epoch": 0.8436862488843555, "grad_norm": 0.17707359790802002, "learning_rate": 0.0010306703347157894, "loss": 2.4833, "step": 423490 }, { "epoch": 0.8437061711079944, "grad_norm": 0.17326126992702484, "learning_rate": 0.0010304640285167341, "loss": 2.4968, "step": 423500 }, { "epoch": 0.8437260933316333, "grad_norm": 0.17419086396694183, "learning_rate": 0.0010302577662079475, "loss": 2.4971, "step": 423510 }, { "epoch": 0.8437460155552722, "grad_norm": 0.16242708265781403, "learning_rate": 0.0010300515477614288, "loss": 2.489, "step": 423520 }, { "epoch": 0.8437659377789112, "grad_norm": 0.17789648473262787, "learning_rate": 0.0010298453731492076, "loss": 2.4949, "step": 423530 }, { "epoch": 0.8437858600025501, "grad_norm": 0.17792831361293793, "learning_rate": 0.0010296392423433437, "loss": 2.4909, "step": 423540 }, { "epoch": 0.843805782226189, "grad_norm": 0.1655425727367401, "learning_rate": 0.0010294331553159257, "loss": 2.4924, "step": 423550 }, { "epoch": 0.8438257044498279, "grad_norm": 0.1983819305896759, "learning_rate": 0.0010292271120390724, "loss": 2.4988, "step": 423560 }, { "epoch": 0.8438456266734667, "grad_norm": 0.15612776577472687, "learning_rate": 0.0010290211124849315, "loss": 2.5038, "step": 423570 }, { "epoch": 0.8438655488971057, "grad_norm": 0.1479535549879074, "learning_rate": 0.0010288151566256812, "loss": 2.474, "step": 423580 }, { "epoch": 0.8438854711207446, "grad_norm": 0.15078505873680115, "learning_rate": 0.001028609244433529, "loss": 2.4944, "step": 423590 }, { "epoch": 0.8439053933443835, "grad_norm": 0.16688506305217743, "learning_rate": 0.0010284033758807105, "loss": 2.4625, "step": 423600 }, { "epoch": 0.8439253155680224, "grad_norm": 0.15564732253551483, "learning_rate": 0.0010281975509394928, "loss": 2.4857, "step": 423610 }, { "epoch": 0.8439452377916613, "grad_norm": 0.17132288217544556, "learning_rate": 0.0010279917695821706, "loss": 2.4915, "step": 423620 }, { "epoch": 0.8439651600153003, "grad_norm": 0.1711675226688385, "learning_rate": 0.0010277860317810692, "loss": 2.493, "step": 423630 }, { "epoch": 0.8439850822389392, "grad_norm": 0.16324849426746368, "learning_rate": 0.0010275803375085425, "loss": 2.4823, "step": 423640 }, { "epoch": 0.8440050044625781, "grad_norm": 0.16100835800170898, "learning_rate": 0.0010273746867369737, "loss": 2.5041, "step": 423650 }, { "epoch": 0.844024926686217, "grad_norm": 0.18395154178142548, "learning_rate": 0.0010271690794387753, "loss": 2.4854, "step": 423660 }, { "epoch": 0.8440448489098559, "grad_norm": 0.15123796463012695, "learning_rate": 0.0010269635155863888, "loss": 2.4832, "step": 423670 }, { "epoch": 0.8440647711334949, "grad_norm": 0.15708112716674805, "learning_rate": 0.001026757995152285, "loss": 2.5066, "step": 423680 }, { "epoch": 0.8440846933571338, "grad_norm": 0.17655256390571594, "learning_rate": 0.0010265525181089635, "loss": 2.4981, "step": 423690 }, { "epoch": 0.8441046155807727, "grad_norm": 0.16263091564178467, "learning_rate": 0.0010263470844289531, "loss": 2.4836, "step": 423700 }, { "epoch": 0.8441245378044115, "grad_norm": 0.20904923975467682, "learning_rate": 0.001026141694084812, "loss": 2.5011, "step": 423710 }, { "epoch": 0.8441444600280504, "grad_norm": 0.17239795625209808, "learning_rate": 0.0010259363470491265, "loss": 2.4936, "step": 423720 }, { "epoch": 0.8441643822516894, "grad_norm": 0.19257444143295288, "learning_rate": 0.0010257310432945119, "loss": 2.5114, "step": 423730 }, { "epoch": 0.8441843044753283, "grad_norm": 0.150425523519516, "learning_rate": 0.0010255257827936136, "loss": 2.4971, "step": 423740 }, { "epoch": 0.8442042266989672, "grad_norm": 0.18472318351268768, "learning_rate": 0.001025320565519104, "loss": 2.4785, "step": 423750 }, { "epoch": 0.8442241489226061, "grad_norm": 0.1685618758201599, "learning_rate": 0.0010251153914436848, "loss": 2.495, "step": 423760 }, { "epoch": 0.8442440711462451, "grad_norm": 0.16394475102424622, "learning_rate": 0.0010249102605400875, "loss": 2.487, "step": 423770 }, { "epoch": 0.844263993369884, "grad_norm": 0.17848123610019684, "learning_rate": 0.0010247051727810712, "loss": 2.5102, "step": 423780 }, { "epoch": 0.8442839155935229, "grad_norm": 0.20304253697395325, "learning_rate": 0.0010245001281394242, "loss": 2.4989, "step": 423790 }, { "epoch": 0.8443038378171618, "grad_norm": 0.16638857126235962, "learning_rate": 0.0010242951265879627, "loss": 2.4962, "step": 423800 }, { "epoch": 0.8443237600408007, "grad_norm": 0.1872539520263672, "learning_rate": 0.0010240901680995318, "loss": 2.4788, "step": 423810 }, { "epoch": 0.8443436822644397, "grad_norm": 0.1609676629304886, "learning_rate": 0.0010238852526470057, "loss": 2.5037, "step": 423820 }, { "epoch": 0.8443636044880786, "grad_norm": 0.1637631356716156, "learning_rate": 0.001023680380203286, "loss": 2.4836, "step": 423830 }, { "epoch": 0.8443835267117175, "grad_norm": 0.20390121638774872, "learning_rate": 0.0010234755507413037, "loss": 2.4931, "step": 423840 }, { "epoch": 0.8444034489353563, "grad_norm": 0.16464018821716309, "learning_rate": 0.0010232707642340176, "loss": 2.4994, "step": 423850 }, { "epoch": 0.8444233711589952, "grad_norm": 0.1842787265777588, "learning_rate": 0.001023066020654415, "loss": 2.5026, "step": 423860 }, { "epoch": 0.8444432933826342, "grad_norm": 0.17186632752418518, "learning_rate": 0.0010228613199755113, "loss": 2.5018, "step": 423870 }, { "epoch": 0.8444632156062731, "grad_norm": 0.1674444079399109, "learning_rate": 0.0010226566621703505, "loss": 2.5039, "step": 423880 }, { "epoch": 0.844483137829912, "grad_norm": 0.1973322629928589, "learning_rate": 0.001022452047212005, "loss": 2.4931, "step": 423890 }, { "epoch": 0.8445030600535509, "grad_norm": 0.15109211206436157, "learning_rate": 0.0010222474750735749, "loss": 2.4942, "step": 423900 }, { "epoch": 0.8445229822771898, "grad_norm": 0.15300336480140686, "learning_rate": 0.001022042945728188, "loss": 2.4934, "step": 423910 }, { "epoch": 0.8445429045008288, "grad_norm": 0.14840592443943024, "learning_rate": 0.0010218384591490013, "loss": 2.4828, "step": 423920 }, { "epoch": 0.8445628267244677, "grad_norm": 0.18901748955249786, "learning_rate": 0.0010216340153091994, "loss": 2.4926, "step": 423930 }, { "epoch": 0.8445827489481066, "grad_norm": 0.1692054718732834, "learning_rate": 0.0010214296141819945, "loss": 2.509, "step": 423940 }, { "epoch": 0.8446026711717455, "grad_norm": 0.18253803253173828, "learning_rate": 0.0010212252557406276, "loss": 2.4947, "step": 423950 }, { "epoch": 0.8446225933953844, "grad_norm": 0.18295156955718994, "learning_rate": 0.0010210209399583667, "loss": 2.5044, "step": 423960 }, { "epoch": 0.8446425156190234, "grad_norm": 0.17276257276535034, "learning_rate": 0.0010208166668085083, "loss": 2.5115, "step": 423970 }, { "epoch": 0.8446624378426623, "grad_norm": 0.1938982903957367, "learning_rate": 0.0010206124362643766, "loss": 2.4892, "step": 423980 }, { "epoch": 0.8446823600663012, "grad_norm": 0.15084154903888702, "learning_rate": 0.0010204082482993236, "loss": 2.4939, "step": 423990 }, { "epoch": 0.84470228228994, "grad_norm": 0.1703859269618988, "learning_rate": 0.001020204102886729, "loss": 2.4998, "step": 424000 }, { "epoch": 0.8447222045135789, "grad_norm": 0.17014536261558533, "learning_rate": 0.00102, "loss": 2.4891, "step": 424010 }, { "epoch": 0.8447421267372179, "grad_norm": 0.14922992885112762, "learning_rate": 0.0010197959396125724, "loss": 2.4955, "step": 424020 }, { "epoch": 0.8447620489608568, "grad_norm": 0.155168354511261, "learning_rate": 0.0010195919216979083, "loss": 2.4976, "step": 424030 }, { "epoch": 0.8447819711844957, "grad_norm": 0.16683878004550934, "learning_rate": 0.0010193879462294991, "loss": 2.4907, "step": 424040 }, { "epoch": 0.8448018934081346, "grad_norm": 0.25085797905921936, "learning_rate": 0.0010191840131808618, "loss": 2.4827, "step": 424050 }, { "epoch": 0.8448218156317736, "grad_norm": 0.17677761614322662, "learning_rate": 0.001018980122525542, "loss": 2.4838, "step": 424060 }, { "epoch": 0.8448417378554125, "grad_norm": 0.1762179136276245, "learning_rate": 0.0010187762742371138, "loss": 2.4899, "step": 424070 }, { "epoch": 0.8448616600790514, "grad_norm": 0.1657116562128067, "learning_rate": 0.0010185724682891762, "loss": 2.4974, "step": 424080 }, { "epoch": 0.8448815823026903, "grad_norm": 0.16187871992588043, "learning_rate": 0.001018368704655358, "loss": 2.511, "step": 424090 }, { "epoch": 0.8449015045263292, "grad_norm": 0.16449812054634094, "learning_rate": 0.0010181649833093138, "loss": 2.4919, "step": 424100 }, { "epoch": 0.8449214267499682, "grad_norm": 0.1740816831588745, "learning_rate": 0.0010179613042247265, "loss": 2.5032, "step": 424110 }, { "epoch": 0.8449413489736071, "grad_norm": 0.16885459423065186, "learning_rate": 0.0010177576673753063, "loss": 2.5045, "step": 424120 }, { "epoch": 0.844961271197246, "grad_norm": 0.1610127091407776, "learning_rate": 0.0010175540727347893, "loss": 2.5138, "step": 424130 }, { "epoch": 0.8449811934208848, "grad_norm": 0.16151784360408783, "learning_rate": 0.0010173505202769402, "loss": 2.481, "step": 424140 }, { "epoch": 0.8450011156445237, "grad_norm": 0.1812966763973236, "learning_rate": 0.001017147009975551, "loss": 2.5018, "step": 424150 }, { "epoch": 0.8450210378681627, "grad_norm": 0.15284833312034607, "learning_rate": 0.0010169435418044394, "loss": 2.5013, "step": 424160 }, { "epoch": 0.8450409600918016, "grad_norm": 0.4180237650871277, "learning_rate": 0.0010167401157374517, "loss": 2.4943, "step": 424170 }, { "epoch": 0.8450608823154405, "grad_norm": 0.17150355875492096, "learning_rate": 0.0010165367317484604, "loss": 2.5082, "step": 424180 }, { "epoch": 0.8450808045390794, "grad_norm": 0.17866812646389008, "learning_rate": 0.001016333389811365, "loss": 2.5102, "step": 424190 }, { "epoch": 0.8451007267627183, "grad_norm": 0.1713184416294098, "learning_rate": 0.0010161300899000926, "loss": 2.4934, "step": 424200 }, { "epoch": 0.8451206489863573, "grad_norm": 0.16374462842941284, "learning_rate": 0.0010159268319885966, "loss": 2.4976, "step": 424210 }, { "epoch": 0.8451405712099962, "grad_norm": 0.1763007640838623, "learning_rate": 0.0010157236160508573, "loss": 2.4966, "step": 424220 }, { "epoch": 0.8451604934336351, "grad_norm": 0.17952632904052734, "learning_rate": 0.0010155204420608825, "loss": 2.4876, "step": 424230 }, { "epoch": 0.845180415657274, "grad_norm": 0.19473502039909363, "learning_rate": 0.0010153173099927062, "loss": 2.5003, "step": 424240 }, { "epoch": 0.8452003378809129, "grad_norm": 0.1788078248500824, "learning_rate": 0.0010151142198203894, "loss": 2.5039, "step": 424250 }, { "epoch": 0.8452202601045519, "grad_norm": 0.17500998079776764, "learning_rate": 0.00101491117151802, "loss": 2.4896, "step": 424260 }, { "epoch": 0.8452401823281908, "grad_norm": 0.17980092763900757, "learning_rate": 0.0010147081650597118, "loss": 2.4806, "step": 424270 }, { "epoch": 0.8452601045518296, "grad_norm": 0.20116549730300903, "learning_rate": 0.0010145052004196064, "loss": 2.4952, "step": 424280 }, { "epoch": 0.8452800267754685, "grad_norm": 0.18247011303901672, "learning_rate": 0.0010143022775718716, "loss": 2.4994, "step": 424290 }, { "epoch": 0.8452999489991074, "grad_norm": 0.16565878689289093, "learning_rate": 0.001014099396490701, "loss": 2.4801, "step": 424300 }, { "epoch": 0.8453198712227464, "grad_norm": 0.17145763337612152, "learning_rate": 0.0010138965571503161, "loss": 2.5131, "step": 424310 }, { "epoch": 0.8453397934463853, "grad_norm": 0.16693681478500366, "learning_rate": 0.0010136937595249637, "loss": 2.5043, "step": 424320 }, { "epoch": 0.8453597156700242, "grad_norm": 0.16859334707260132, "learning_rate": 0.0010134910035889183, "loss": 2.4911, "step": 424330 }, { "epoch": 0.8453796378936631, "grad_norm": 0.15890514850616455, "learning_rate": 0.0010132882893164792, "loss": 2.4931, "step": 424340 }, { "epoch": 0.8453995601173021, "grad_norm": 0.17547111213207245, "learning_rate": 0.0010130856166819737, "loss": 2.498, "step": 424350 }, { "epoch": 0.845419482340941, "grad_norm": 0.17123878002166748, "learning_rate": 0.0010128829856597547, "loss": 2.4897, "step": 424360 }, { "epoch": 0.8454394045645799, "grad_norm": 0.18609915673732758, "learning_rate": 0.0010126803962242014, "loss": 2.4941, "step": 424370 }, { "epoch": 0.8454593267882188, "grad_norm": 0.17971955239772797, "learning_rate": 0.0010124778483497194, "loss": 2.4909, "step": 424380 }, { "epoch": 0.8454792490118577, "grad_norm": 0.17686595022678375, "learning_rate": 0.0010122753420107405, "loss": 2.4911, "step": 424390 }, { "epoch": 0.8454991712354967, "grad_norm": 0.16382743418216705, "learning_rate": 0.0010120728771817224, "loss": 2.4991, "step": 424400 }, { "epoch": 0.8455190934591356, "grad_norm": 0.19874043762683868, "learning_rate": 0.00101187045383715, "loss": 2.4832, "step": 424410 }, { "epoch": 0.8455390156827745, "grad_norm": 0.16962909698486328, "learning_rate": 0.001011668071951533, "loss": 2.4906, "step": 424420 }, { "epoch": 0.8455589379064133, "grad_norm": 0.16011986136436462, "learning_rate": 0.0010114657314994084, "loss": 2.493, "step": 424430 }, { "epoch": 0.8455788601300522, "grad_norm": 0.1857658326625824, "learning_rate": 0.0010112634324553381, "loss": 2.4906, "step": 424440 }, { "epoch": 0.8455987823536912, "grad_norm": 0.18932676315307617, "learning_rate": 0.0010110611747939106, "loss": 2.5059, "step": 424450 }, { "epoch": 0.8456187045773301, "grad_norm": 0.16544762253761292, "learning_rate": 0.001010858958489741, "loss": 2.4858, "step": 424460 }, { "epoch": 0.845638626800969, "grad_norm": 0.15710940957069397, "learning_rate": 0.0010106567835174692, "loss": 2.5037, "step": 424470 }, { "epoch": 0.8456585490246079, "grad_norm": 0.17104743421077728, "learning_rate": 0.0010104546498517614, "loss": 2.5008, "step": 424480 }, { "epoch": 0.8456784712482468, "grad_norm": 0.19331468641757965, "learning_rate": 0.0010102525574673103, "loss": 2.4906, "step": 424490 }, { "epoch": 0.8456983934718858, "grad_norm": 0.16986311972141266, "learning_rate": 0.0010100505063388335, "loss": 2.4822, "step": 424500 }, { "epoch": 0.8457183156955247, "grad_norm": 0.17564919590950012, "learning_rate": 0.0010098484964410747, "loss": 2.4894, "step": 424510 }, { "epoch": 0.8457382379191636, "grad_norm": 0.17488518357276917, "learning_rate": 0.0010096465277488042, "loss": 2.4904, "step": 424520 }, { "epoch": 0.8457581601428025, "grad_norm": 0.18179339170455933, "learning_rate": 0.0010094446002368168, "loss": 2.4913, "step": 424530 }, { "epoch": 0.8457780823664414, "grad_norm": 0.1798262894153595, "learning_rate": 0.001009242713879933, "loss": 2.5082, "step": 424540 }, { "epoch": 0.8457980045900804, "grad_norm": 0.15767884254455566, "learning_rate": 0.0010090408686530003, "loss": 2.4869, "step": 424550 }, { "epoch": 0.8458179268137193, "grad_norm": 0.16172833740711212, "learning_rate": 0.0010088390645308905, "loss": 2.4862, "step": 424560 }, { "epoch": 0.8458378490373581, "grad_norm": 0.1734984815120697, "learning_rate": 0.0010086373014885014, "loss": 2.4947, "step": 424570 }, { "epoch": 0.845857771260997, "grad_norm": 0.17374946177005768, "learning_rate": 0.0010084355795007566, "loss": 2.4843, "step": 424580 }, { "epoch": 0.8458776934846359, "grad_norm": 0.19298696517944336, "learning_rate": 0.001008233898542605, "loss": 2.4922, "step": 424590 }, { "epoch": 0.8458976157082749, "grad_norm": 0.16208891570568085, "learning_rate": 0.0010080322585890205, "loss": 2.4853, "step": 424600 }, { "epoch": 0.8459175379319138, "grad_norm": 0.19318337738513947, "learning_rate": 0.001007830659615003, "loss": 2.4926, "step": 424610 }, { "epoch": 0.8459374601555527, "grad_norm": 0.16837352514266968, "learning_rate": 0.0010076291015955778, "loss": 2.4904, "step": 424620 }, { "epoch": 0.8459573823791916, "grad_norm": 0.17770995199680328, "learning_rate": 0.0010074275845057955, "loss": 2.4786, "step": 424630 }, { "epoch": 0.8459773046028306, "grad_norm": 0.18907077610492706, "learning_rate": 0.0010072261083207315, "loss": 2.5052, "step": 424640 }, { "epoch": 0.8459972268264695, "grad_norm": 0.16153281927108765, "learning_rate": 0.0010070246730154873, "loss": 2.5296, "step": 424650 }, { "epoch": 0.8460171490501084, "grad_norm": 0.17936336994171143, "learning_rate": 0.001006823278565189, "loss": 2.4996, "step": 424660 }, { "epoch": 0.8460370712737473, "grad_norm": 0.17822706699371338, "learning_rate": 0.0010066219249449885, "loss": 2.4969, "step": 424670 }, { "epoch": 0.8460569934973862, "grad_norm": 0.1924443244934082, "learning_rate": 0.0010064206121300626, "loss": 2.4918, "step": 424680 }, { "epoch": 0.8460769157210252, "grad_norm": 0.16147898137569427, "learning_rate": 0.0010062193400956123, "loss": 2.4923, "step": 424690 }, { "epoch": 0.8460968379446641, "grad_norm": 0.17562445998191833, "learning_rate": 0.001006018108816866, "loss": 2.4835, "step": 424700 }, { "epoch": 0.846116760168303, "grad_norm": 0.1735588163137436, "learning_rate": 0.0010058169182690746, "loss": 2.4973, "step": 424710 }, { "epoch": 0.8461366823919418, "grad_norm": 0.18025396764278412, "learning_rate": 0.0010056157684275157, "loss": 2.4697, "step": 424720 }, { "epoch": 0.8461566046155807, "grad_norm": 0.18000654876232147, "learning_rate": 0.0010054146592674916, "loss": 2.4961, "step": 424730 }, { "epoch": 0.8461765268392197, "grad_norm": 0.175640270113945, "learning_rate": 0.001005213590764329, "loss": 2.4925, "step": 424740 }, { "epoch": 0.8461964490628586, "grad_norm": 0.18561996519565582, "learning_rate": 0.0010050125628933801, "loss": 2.4954, "step": 424750 }, { "epoch": 0.8462163712864975, "grad_norm": 0.1782521903514862, "learning_rate": 0.0010048115756300217, "loss": 2.508, "step": 424760 }, { "epoch": 0.8462362935101364, "grad_norm": 0.15728388726711273, "learning_rate": 0.0010046106289496558, "loss": 2.4888, "step": 424770 }, { "epoch": 0.8462562157337753, "grad_norm": 0.17045418918132782, "learning_rate": 0.0010044097228277087, "loss": 2.4792, "step": 424780 }, { "epoch": 0.8462761379574143, "grad_norm": 0.16859903931617737, "learning_rate": 0.0010042088572396319, "loss": 2.4849, "step": 424790 }, { "epoch": 0.8462960601810532, "grad_norm": 0.1591850072145462, "learning_rate": 0.0010040080321609014, "loss": 2.4903, "step": 424800 }, { "epoch": 0.8463159824046921, "grad_norm": 0.16737207770347595, "learning_rate": 0.0010038072475670183, "loss": 2.4866, "step": 424810 }, { "epoch": 0.846335904628331, "grad_norm": 0.17134103178977966, "learning_rate": 0.001003606503433508, "loss": 2.5017, "step": 424820 }, { "epoch": 0.8463558268519699, "grad_norm": 0.1671193242073059, "learning_rate": 0.0010034057997359205, "loss": 2.4949, "step": 424830 }, { "epoch": 0.8463757490756089, "grad_norm": 0.158605694770813, "learning_rate": 0.0010032051364498309, "loss": 2.4906, "step": 424840 }, { "epoch": 0.8463956712992478, "grad_norm": 0.14949369430541992, "learning_rate": 0.0010030045135508386, "loss": 2.4981, "step": 424850 }, { "epoch": 0.8464155935228866, "grad_norm": 0.16837313771247864, "learning_rate": 0.0010028039310145673, "loss": 2.4862, "step": 424860 }, { "epoch": 0.8464355157465255, "grad_norm": 0.1651041954755783, "learning_rate": 0.0010026033888166652, "loss": 2.4854, "step": 424870 }, { "epoch": 0.8464554379701644, "grad_norm": 0.16472238302230835, "learning_rate": 0.001002402886932806, "loss": 2.4887, "step": 424880 }, { "epoch": 0.8464753601938034, "grad_norm": 0.16792874038219452, "learning_rate": 0.0010022024253386861, "loss": 2.4823, "step": 424890 }, { "epoch": 0.8464952824174423, "grad_norm": 0.15265105664730072, "learning_rate": 0.0010020020040100279, "loss": 2.5048, "step": 424900 }, { "epoch": 0.8465152046410812, "grad_norm": 0.1728520691394806, "learning_rate": 0.0010018016229225775, "loss": 2.4917, "step": 424910 }, { "epoch": 0.8465351268647201, "grad_norm": 0.16073772311210632, "learning_rate": 0.0010016012820521052, "loss": 2.498, "step": 424920 }, { "epoch": 0.8465550490883591, "grad_norm": 0.16406014561653137, "learning_rate": 0.0010014009813744055, "loss": 2.4828, "step": 424930 }, { "epoch": 0.846574971311998, "grad_norm": 0.15545131266117096, "learning_rate": 0.0010012007208652983, "loss": 2.5047, "step": 424940 }, { "epoch": 0.8465948935356369, "grad_norm": 0.18569763004779816, "learning_rate": 0.0010010005005006257, "loss": 2.4907, "step": 424950 }, { "epoch": 0.8466148157592758, "grad_norm": 0.1754862517118454, "learning_rate": 0.0010008003202562562, "loss": 2.5014, "step": 424960 }, { "epoch": 0.8466347379829147, "grad_norm": 0.18939320743083954, "learning_rate": 0.0010006001801080812, "loss": 2.4794, "step": 424970 }, { "epoch": 0.8466546602065537, "grad_norm": 0.15828590095043182, "learning_rate": 0.0010004000800320162, "loss": 2.4864, "step": 424980 }, { "epoch": 0.8466745824301926, "grad_norm": 0.1636822670698166, "learning_rate": 0.0010002000200040012, "loss": 2.4921, "step": 424990 }, { "epoch": 0.8466945046538314, "grad_norm": 0.16296346485614777, "learning_rate": 0.001, "loss": 2.4963, "step": 425000 }, { "epoch": 0.8467144268774703, "grad_norm": 0.15898311138153076, "learning_rate": 0.000999800019996001, "loss": 2.4935, "step": 425010 }, { "epoch": 0.8467343491011092, "grad_norm": 0.18379293382167816, "learning_rate": 0.000999600079968016, "loss": 2.4841, "step": 425020 }, { "epoch": 0.8467542713247482, "grad_norm": 0.17226895689964294, "learning_rate": 0.000999400179892081, "loss": 2.4994, "step": 425030 }, { "epoch": 0.8467741935483871, "grad_norm": 0.19274359941482544, "learning_rate": 0.0009992003197442556, "loss": 2.4833, "step": 425040 }, { "epoch": 0.846794115772026, "grad_norm": 0.17044995725154877, "learning_rate": 0.0009990004995006241, "loss": 2.4928, "step": 425050 }, { "epoch": 0.8468140379956649, "grad_norm": 0.1651201844215393, "learning_rate": 0.0009988007191372938, "loss": 2.4968, "step": 425060 }, { "epoch": 0.8468339602193038, "grad_norm": 0.1486617475748062, "learning_rate": 0.0009986009786303964, "loss": 2.4971, "step": 425070 }, { "epoch": 0.8468538824429428, "grad_norm": 0.1717904657125473, "learning_rate": 0.0009984012779560869, "loss": 2.4898, "step": 425080 }, { "epoch": 0.8468738046665817, "grad_norm": 0.15735217928886414, "learning_rate": 0.0009982016170905447, "loss": 2.495, "step": 425090 }, { "epoch": 0.8468937268902206, "grad_norm": 0.17562100291252136, "learning_rate": 0.0009980019960099723, "loss": 2.4982, "step": 425100 }, { "epoch": 0.8469136491138595, "grad_norm": 0.1931219846010208, "learning_rate": 0.0009978024146905962, "loss": 2.499, "step": 425110 }, { "epoch": 0.8469335713374984, "grad_norm": 0.18083824217319489, "learning_rate": 0.0009976028731086665, "loss": 2.4911, "step": 425120 }, { "epoch": 0.8469534935611374, "grad_norm": 0.1956861913204193, "learning_rate": 0.0009974033712404572, "loss": 2.4942, "step": 425130 }, { "epoch": 0.8469734157847763, "grad_norm": 0.15335488319396973, "learning_rate": 0.000997203909062266, "loss": 2.4858, "step": 425140 }, { "epoch": 0.8469933380084151, "grad_norm": 0.19948190450668335, "learning_rate": 0.0009970044865504134, "loss": 2.4825, "step": 425150 }, { "epoch": 0.847013260232054, "grad_norm": 0.17652353644371033, "learning_rate": 0.0009968051036812438, "loss": 2.4918, "step": 425160 }, { "epoch": 0.8470331824556929, "grad_norm": 0.17326189577579498, "learning_rate": 0.0009966057604311256, "loss": 2.4922, "step": 425170 }, { "epoch": 0.8470531046793319, "grad_norm": 0.17512880265712738, "learning_rate": 0.0009964064567764496, "loss": 2.4922, "step": 425180 }, { "epoch": 0.8470730269029708, "grad_norm": 0.18605579435825348, "learning_rate": 0.0009962071926936315, "loss": 2.4894, "step": 425190 }, { "epoch": 0.8470929491266097, "grad_norm": 0.1588774025440216, "learning_rate": 0.0009960079681591094, "loss": 2.4879, "step": 425200 }, { "epoch": 0.8471128713502486, "grad_norm": 0.16370068490505219, "learning_rate": 0.0009958087831493448, "loss": 2.4889, "step": 425210 }, { "epoch": 0.8471327935738875, "grad_norm": 0.16769476234912872, "learning_rate": 0.0009956096376408225, "loss": 2.4863, "step": 425220 }, { "epoch": 0.8471527157975265, "grad_norm": 0.16626083850860596, "learning_rate": 0.0009954105316100513, "loss": 2.4889, "step": 425230 }, { "epoch": 0.8471726380211654, "grad_norm": 0.17894548177719116, "learning_rate": 0.0009952114650335624, "loss": 2.4868, "step": 425240 }, { "epoch": 0.8471925602448043, "grad_norm": 0.17121641337871552, "learning_rate": 0.000995012437887911, "loss": 2.4924, "step": 425250 }, { "epoch": 0.8472124824684432, "grad_norm": 0.17494848370552063, "learning_rate": 0.0009948134501496751, "loss": 2.4878, "step": 425260 }, { "epoch": 0.8472324046920822, "grad_norm": 0.17847993969917297, "learning_rate": 0.0009946145017954554, "loss": 2.5104, "step": 425270 }, { "epoch": 0.847252326915721, "grad_norm": 0.17218011617660522, "learning_rate": 0.0009944155928018773, "loss": 2.4766, "step": 425280 }, { "epoch": 0.84727224913936, "grad_norm": 0.164812371134758, "learning_rate": 0.0009942167231455875, "loss": 2.502, "step": 425290 }, { "epoch": 0.8472921713629988, "grad_norm": 0.16051749885082245, "learning_rate": 0.0009940178928032567, "loss": 2.4871, "step": 425300 }, { "epoch": 0.8473120935866377, "grad_norm": 0.1700105220079422, "learning_rate": 0.0009938191017515787, "loss": 2.4711, "step": 425310 }, { "epoch": 0.8473320158102767, "grad_norm": 0.16758178174495697, "learning_rate": 0.00099362034996727, "loss": 2.4886, "step": 425320 }, { "epoch": 0.8473519380339156, "grad_norm": 0.18707667291164398, "learning_rate": 0.0009934216374270703, "loss": 2.4819, "step": 425330 }, { "epoch": 0.8473718602575545, "grad_norm": 0.16270211338996887, "learning_rate": 0.0009932229641077424, "loss": 2.4911, "step": 425340 }, { "epoch": 0.8473917824811934, "grad_norm": 0.198701411485672, "learning_rate": 0.0009930243299860718, "loss": 2.4844, "step": 425350 }, { "epoch": 0.8474117047048323, "grad_norm": 0.14787934720516205, "learning_rate": 0.0009928257350388663, "loss": 2.4829, "step": 425360 }, { "epoch": 0.8474316269284713, "grad_norm": 0.17203816771507263, "learning_rate": 0.0009926271792429578, "loss": 2.4713, "step": 425370 }, { "epoch": 0.8474515491521102, "grad_norm": 0.21232037246227264, "learning_rate": 0.0009924286625752, "loss": 2.5008, "step": 425380 }, { "epoch": 0.8474714713757491, "grad_norm": 0.16259069740772247, "learning_rate": 0.0009922301850124702, "loss": 2.4878, "step": 425390 }, { "epoch": 0.847491393599388, "grad_norm": 0.20358312129974365, "learning_rate": 0.0009920317465316676, "loss": 2.4791, "step": 425400 }, { "epoch": 0.8475113158230269, "grad_norm": 0.1995570808649063, "learning_rate": 0.0009918333471097152, "loss": 2.5005, "step": 425410 }, { "epoch": 0.8475312380466659, "grad_norm": 0.17310285568237305, "learning_rate": 0.0009916349867235576, "loss": 2.5064, "step": 425420 }, { "epoch": 0.8475511602703047, "grad_norm": 0.16067130863666534, "learning_rate": 0.0009914366653501626, "loss": 2.4796, "step": 425430 }, { "epoch": 0.8475710824939436, "grad_norm": 0.16730137169361115, "learning_rate": 0.0009912383829665207, "loss": 2.482, "step": 425440 }, { "epoch": 0.8475910047175825, "grad_norm": 0.15950186550617218, "learning_rate": 0.000991040139549645, "loss": 2.4867, "step": 425450 }, { "epoch": 0.8476109269412214, "grad_norm": 0.15679968893527985, "learning_rate": 0.000990841935076571, "loss": 2.507, "step": 425460 }, { "epoch": 0.8476308491648604, "grad_norm": 0.16235055029392242, "learning_rate": 0.0009906437695243566, "loss": 2.4895, "step": 425470 }, { "epoch": 0.8476507713884993, "grad_norm": 0.17396312952041626, "learning_rate": 0.000990445642870083, "loss": 2.4816, "step": 425480 }, { "epoch": 0.8476706936121382, "grad_norm": 0.18234483897686005, "learning_rate": 0.000990247555090853, "loss": 2.4821, "step": 425490 }, { "epoch": 0.8476906158357771, "grad_norm": 0.17263765633106232, "learning_rate": 0.000990049506163792, "loss": 2.4955, "step": 425500 }, { "epoch": 0.847710538059416, "grad_norm": 0.16755199432373047, "learning_rate": 0.0009898514960660488, "loss": 2.4969, "step": 425510 }, { "epoch": 0.847730460283055, "grad_norm": 0.1927979290485382, "learning_rate": 0.000989653524774793, "loss": 2.4788, "step": 425520 }, { "epoch": 0.8477503825066939, "grad_norm": 0.18026915192604065, "learning_rate": 0.0009894555922672175, "loss": 2.478, "step": 425530 }, { "epoch": 0.8477703047303328, "grad_norm": 0.15686848759651184, "learning_rate": 0.0009892576985205377, "loss": 2.5001, "step": 425540 }, { "epoch": 0.8477902269539717, "grad_norm": 0.16468952596187592, "learning_rate": 0.000989059843511991, "loss": 2.5009, "step": 425550 }, { "epoch": 0.8478101491776107, "grad_norm": 0.1441439390182495, "learning_rate": 0.0009888620272188369, "loss": 2.485, "step": 425560 }, { "epoch": 0.8478300714012496, "grad_norm": 0.1898316591978073, "learning_rate": 0.0009886642496183574, "loss": 2.4951, "step": 425570 }, { "epoch": 0.8478499936248884, "grad_norm": 0.16884848475456238, "learning_rate": 0.0009884665106878565, "loss": 2.4923, "step": 425580 }, { "epoch": 0.8478699158485273, "grad_norm": 0.15392926335334778, "learning_rate": 0.000988268810404661, "loss": 2.4967, "step": 425590 }, { "epoch": 0.8478898380721662, "grad_norm": 0.1793208122253418, "learning_rate": 0.0009880711487461186, "loss": 2.4946, "step": 425600 }, { "epoch": 0.8479097602958052, "grad_norm": 0.1780160516500473, "learning_rate": 0.0009878735256896004, "loss": 2.494, "step": 425610 }, { "epoch": 0.8479296825194441, "grad_norm": 0.16484273970127106, "learning_rate": 0.000987675941212499, "loss": 2.4975, "step": 425620 }, { "epoch": 0.847949604743083, "grad_norm": 0.1743893027305603, "learning_rate": 0.0009874783952922288, "loss": 2.495, "step": 425630 }, { "epoch": 0.8479695269667219, "grad_norm": 0.1697520762681961, "learning_rate": 0.000987280887906227, "loss": 2.5032, "step": 425640 }, { "epoch": 0.8479894491903608, "grad_norm": 0.18543961644172668, "learning_rate": 0.000987083419031952, "loss": 2.4821, "step": 425650 }, { "epoch": 0.8480093714139998, "grad_norm": 0.16008219122886658, "learning_rate": 0.000986885988646885, "loss": 2.5035, "step": 425660 }, { "epoch": 0.8480292936376387, "grad_norm": 0.17598433792591095, "learning_rate": 0.000986688596728528, "loss": 2.4867, "step": 425670 }, { "epoch": 0.8480492158612776, "grad_norm": 0.19825828075408936, "learning_rate": 0.000986491243254406, "loss": 2.4943, "step": 425680 }, { "epoch": 0.8480691380849165, "grad_norm": 0.1633259505033493, "learning_rate": 0.0009862939282020652, "loss": 2.5128, "step": 425690 }, { "epoch": 0.8480890603085554, "grad_norm": 0.1501120775938034, "learning_rate": 0.000986096651549074, "loss": 2.5134, "step": 425700 }, { "epoch": 0.8481089825321944, "grad_norm": 0.1938427984714508, "learning_rate": 0.0009858994132730225, "loss": 2.4827, "step": 425710 }, { "epoch": 0.8481289047558332, "grad_norm": 0.15203936398029327, "learning_rate": 0.0009857022133515228, "loss": 2.4837, "step": 425720 }, { "epoch": 0.8481488269794721, "grad_norm": 0.18570004403591156, "learning_rate": 0.0009855050517622083, "loss": 2.4851, "step": 425730 }, { "epoch": 0.848168749203111, "grad_norm": 0.16619543731212616, "learning_rate": 0.0009853079284827342, "loss": 2.4833, "step": 425740 }, { "epoch": 0.8481886714267499, "grad_norm": 0.18298469483852386, "learning_rate": 0.000985110843490778, "loss": 2.4951, "step": 425750 }, { "epoch": 0.8482085936503889, "grad_norm": 0.1684761941432953, "learning_rate": 0.0009849137967640384, "loss": 2.4852, "step": 425760 }, { "epoch": 0.8482285158740278, "grad_norm": 0.1607615351676941, "learning_rate": 0.0009847167882802356, "loss": 2.4877, "step": 425770 }, { "epoch": 0.8482484380976667, "grad_norm": 0.16184070706367493, "learning_rate": 0.0009845198180171118, "loss": 2.5015, "step": 425780 }, { "epoch": 0.8482683603213056, "grad_norm": 0.19303283095359802, "learning_rate": 0.0009843228859524303, "loss": 2.4905, "step": 425790 }, { "epoch": 0.8482882825449445, "grad_norm": 0.1617332547903061, "learning_rate": 0.0009841259920639765, "loss": 2.4922, "step": 425800 }, { "epoch": 0.8483082047685835, "grad_norm": 0.17977815866470337, "learning_rate": 0.0009839291363295572, "loss": 2.4822, "step": 425810 }, { "epoch": 0.8483281269922224, "grad_norm": 0.1606600284576416, "learning_rate": 0.000983732318727, "loss": 2.5076, "step": 425820 }, { "epoch": 0.8483480492158613, "grad_norm": 0.18963539600372314, "learning_rate": 0.0009835355392341552, "loss": 2.4817, "step": 425830 }, { "epoch": 0.8483679714395002, "grad_norm": 0.21766221523284912, "learning_rate": 0.0009833387978288933, "loss": 2.4947, "step": 425840 }, { "epoch": 0.8483878936631392, "grad_norm": 0.16184014081954956, "learning_rate": 0.0009831420944891073, "loss": 2.4858, "step": 425850 }, { "epoch": 0.848407815886778, "grad_norm": 0.18834492564201355, "learning_rate": 0.0009829454291927105, "loss": 2.5021, "step": 425860 }, { "epoch": 0.8484277381104169, "grad_norm": 0.16220539808273315, "learning_rate": 0.0009827488019176385, "loss": 2.478, "step": 425870 }, { "epoch": 0.8484476603340558, "grad_norm": 0.19130706787109375, "learning_rate": 0.0009825522126418477, "loss": 2.4999, "step": 425880 }, { "epoch": 0.8484675825576947, "grad_norm": 0.1637106090784073, "learning_rate": 0.0009823556613433157, "loss": 2.4862, "step": 425890 }, { "epoch": 0.8484875047813337, "grad_norm": 0.17480573058128357, "learning_rate": 0.0009821591480000422, "loss": 2.48, "step": 425900 }, { "epoch": 0.8485074270049726, "grad_norm": 0.1607401818037033, "learning_rate": 0.000981962672590047, "loss": 2.4904, "step": 425910 }, { "epoch": 0.8485273492286115, "grad_norm": 0.16061259806156158, "learning_rate": 0.0009817662350913717, "loss": 2.4982, "step": 425920 }, { "epoch": 0.8485472714522504, "grad_norm": 0.36648550629615784, "learning_rate": 0.0009815698354820788, "loss": 2.4922, "step": 425930 }, { "epoch": 0.8485671936758893, "grad_norm": 0.16271230578422546, "learning_rate": 0.0009813734737402523, "loss": 2.4855, "step": 425940 }, { "epoch": 0.8485871158995283, "grad_norm": 0.18631282448768616, "learning_rate": 0.0009811771498439976, "loss": 2.4964, "step": 425950 }, { "epoch": 0.8486070381231672, "grad_norm": 0.20439842343330383, "learning_rate": 0.0009809808637714404, "loss": 2.4846, "step": 425960 }, { "epoch": 0.8486269603468061, "grad_norm": 0.1688450127840042, "learning_rate": 0.0009807846155007274, "loss": 2.5026, "step": 425970 }, { "epoch": 0.848646882570445, "grad_norm": 0.15831191837787628, "learning_rate": 0.0009805884050100274, "loss": 2.5029, "step": 425980 }, { "epoch": 0.8486668047940839, "grad_norm": 0.17921338975429535, "learning_rate": 0.000980392232277529, "loss": 2.4888, "step": 425990 }, { "epoch": 0.8486867270177229, "grad_norm": 0.2039492428302765, "learning_rate": 0.000980196097281443, "loss": 2.4862, "step": 426000 }, { "epoch": 0.8487066492413617, "grad_norm": 0.17582044005393982, "learning_rate": 0.00098, "loss": 2.4963, "step": 426010 }, { "epoch": 0.8487265714650006, "grad_norm": 0.16314376890659332, "learning_rate": 0.000979803940411452, "loss": 2.4923, "step": 426020 }, { "epoch": 0.8487464936886395, "grad_norm": 0.18010929226875305, "learning_rate": 0.0009796079184940722, "loss": 2.4786, "step": 426030 }, { "epoch": 0.8487664159122784, "grad_norm": 0.19320233166217804, "learning_rate": 0.000979411934226154, "loss": 2.493, "step": 426040 }, { "epoch": 0.8487863381359174, "grad_norm": 0.19163358211517334, "learning_rate": 0.0009792159875860124, "loss": 2.4878, "step": 426050 }, { "epoch": 0.8488062603595563, "grad_norm": 0.18898166716098785, "learning_rate": 0.0009790200785519826, "loss": 2.4975, "step": 426060 }, { "epoch": 0.8488261825831952, "grad_norm": 0.16448922455310822, "learning_rate": 0.0009788242071024206, "loss": 2.4931, "step": 426070 }, { "epoch": 0.8488461048068341, "grad_norm": 0.1710217446088791, "learning_rate": 0.0009786283732157038, "loss": 2.4886, "step": 426080 }, { "epoch": 0.848866027030473, "grad_norm": 0.17307282984256744, "learning_rate": 0.0009784325768702293, "loss": 2.4835, "step": 426090 }, { "epoch": 0.848885949254112, "grad_norm": 0.17667153477668762, "learning_rate": 0.0009782368180444158, "loss": 2.4917, "step": 426100 }, { "epoch": 0.8489058714777509, "grad_norm": 0.1615779846906662, "learning_rate": 0.0009780410967167026, "loss": 2.5053, "step": 426110 }, { "epoch": 0.8489257937013898, "grad_norm": 0.16318467259407043, "learning_rate": 0.000977845412865549, "loss": 2.4903, "step": 426120 }, { "epoch": 0.8489457159250287, "grad_norm": 0.16180144250392914, "learning_rate": 0.0009776497664694356, "loss": 2.4939, "step": 426130 }, { "epoch": 0.8489656381486677, "grad_norm": 0.18114310503005981, "learning_rate": 0.0009774541575068628, "loss": 2.5076, "step": 426140 }, { "epoch": 0.8489855603723065, "grad_norm": 0.20027467608451843, "learning_rate": 0.0009772585859563524, "loss": 2.4879, "step": 426150 }, { "epoch": 0.8490054825959454, "grad_norm": 0.18501119315624237, "learning_rate": 0.0009770630517964463, "loss": 2.505, "step": 426160 }, { "epoch": 0.8490254048195843, "grad_norm": 0.17607098817825317, "learning_rate": 0.0009768675550057072, "loss": 2.4904, "step": 426170 }, { "epoch": 0.8490453270432232, "grad_norm": 0.15471813082695007, "learning_rate": 0.0009766720955627175, "loss": 2.5002, "step": 426180 }, { "epoch": 0.8490652492668622, "grad_norm": 0.1859438270330429, "learning_rate": 0.000976476673446081, "loss": 2.487, "step": 426190 }, { "epoch": 0.8490851714905011, "grad_norm": 0.17427697777748108, "learning_rate": 0.0009762812886344219, "loss": 2.5026, "step": 426200 }, { "epoch": 0.84910509371414, "grad_norm": 0.166532963514328, "learning_rate": 0.0009760859411063836, "loss": 2.4933, "step": 426210 }, { "epoch": 0.8491250159377789, "grad_norm": 0.16922400891780853, "learning_rate": 0.0009758906308406315, "loss": 2.4835, "step": 426220 }, { "epoch": 0.8491449381614178, "grad_norm": 0.16433373093605042, "learning_rate": 0.0009756953578158498, "loss": 2.4949, "step": 426230 }, { "epoch": 0.8491648603850568, "grad_norm": 0.20721188187599182, "learning_rate": 0.0009755001220107442, "loss": 2.4798, "step": 426240 }, { "epoch": 0.8491847826086957, "grad_norm": 0.15745969116687775, "learning_rate": 0.00097530492340404, "loss": 2.4858, "step": 426250 }, { "epoch": 0.8492047048323346, "grad_norm": 0.1820092350244522, "learning_rate": 0.0009751097619744836, "loss": 2.4858, "step": 426260 }, { "epoch": 0.8492246270559735, "grad_norm": 0.17056185007095337, "learning_rate": 0.00097491463770084, "loss": 2.5095, "step": 426270 }, { "epoch": 0.8492445492796123, "grad_norm": 0.17307624220848083, "learning_rate": 0.0009747195505618964, "loss": 2.4757, "step": 426280 }, { "epoch": 0.8492644715032514, "grad_norm": 0.18304455280303955, "learning_rate": 0.0009745245005364585, "loss": 2.4918, "step": 426290 }, { "epoch": 0.8492843937268902, "grad_norm": 0.18136784434318542, "learning_rate": 0.0009743294876033532, "loss": 2.4782, "step": 426300 }, { "epoch": 0.8493043159505291, "grad_norm": 0.16102229058742523, "learning_rate": 0.0009741345117414273, "loss": 2.4744, "step": 426310 }, { "epoch": 0.849324238174168, "grad_norm": 0.19051247835159302, "learning_rate": 0.0009739395729295473, "loss": 2.5027, "step": 426320 }, { "epoch": 0.8493441603978069, "grad_norm": 0.165945902466774, "learning_rate": 0.0009737446711466002, "loss": 2.4769, "step": 426330 }, { "epoch": 0.8493640826214459, "grad_norm": 0.1892751157283783, "learning_rate": 0.0009735498063714927, "loss": 2.4932, "step": 426340 }, { "epoch": 0.8493840048450848, "grad_norm": 0.16821882128715515, "learning_rate": 0.0009733549785831522, "loss": 2.4923, "step": 426350 }, { "epoch": 0.8494039270687237, "grad_norm": 0.1760510802268982, "learning_rate": 0.0009731601877605252, "loss": 2.4853, "step": 426360 }, { "epoch": 0.8494238492923626, "grad_norm": 0.18671078979969025, "learning_rate": 0.0009729654338825786, "loss": 2.505, "step": 426370 }, { "epoch": 0.8494437715160015, "grad_norm": 0.18819324672222137, "learning_rate": 0.0009727707169282996, "loss": 2.4906, "step": 426380 }, { "epoch": 0.8494636937396405, "grad_norm": 0.1782270073890686, "learning_rate": 0.0009725760368766943, "loss": 2.4933, "step": 426390 }, { "epoch": 0.8494836159632794, "grad_norm": 0.19748537242412567, "learning_rate": 0.0009723813937067895, "loss": 2.4942, "step": 426400 }, { "epoch": 0.8495035381869183, "grad_norm": 0.1675967127084732, "learning_rate": 0.0009721867873976322, "loss": 2.4888, "step": 426410 }, { "epoch": 0.8495234604105572, "grad_norm": 0.17771534621715546, "learning_rate": 0.0009719922179282883, "loss": 2.4966, "step": 426420 }, { "epoch": 0.8495433826341962, "grad_norm": 0.16859649121761322, "learning_rate": 0.0009717976852778438, "loss": 2.4876, "step": 426430 }, { "epoch": 0.849563304857835, "grad_norm": 0.15423156321048737, "learning_rate": 0.0009716031894254048, "loss": 2.4802, "step": 426440 }, { "epoch": 0.8495832270814739, "grad_norm": 0.16913846135139465, "learning_rate": 0.0009714087303500967, "loss": 2.4984, "step": 426450 }, { "epoch": 0.8496031493051128, "grad_norm": 0.18923260271549225, "learning_rate": 0.0009712143080310651, "loss": 2.4949, "step": 426460 }, { "epoch": 0.8496230715287517, "grad_norm": 0.17133831977844238, "learning_rate": 0.0009710199224474751, "loss": 2.4929, "step": 426470 }, { "epoch": 0.8496429937523907, "grad_norm": 0.15816514194011688, "learning_rate": 0.0009708255735785114, "loss": 2.4783, "step": 426480 }, { "epoch": 0.8496629159760296, "grad_norm": 0.15699727833271027, "learning_rate": 0.0009706312614033783, "loss": 2.4861, "step": 426490 }, { "epoch": 0.8496828381996685, "grad_norm": 0.15402238070964813, "learning_rate": 0.0009704369859013, "loss": 2.499, "step": 426500 }, { "epoch": 0.8497027604233074, "grad_norm": 0.16205857694149017, "learning_rate": 0.0009702427470515198, "loss": 2.4909, "step": 426510 }, { "epoch": 0.8497226826469463, "grad_norm": 0.18515026569366455, "learning_rate": 0.0009700485448333014, "loss": 2.4744, "step": 426520 }, { "epoch": 0.8497426048705853, "grad_norm": 0.18757373094558716, "learning_rate": 0.0009698543792259272, "loss": 2.4951, "step": 426530 }, { "epoch": 0.8497625270942242, "grad_norm": 0.17342348396778107, "learning_rate": 0.0009696602502086993, "loss": 2.4925, "step": 426540 }, { "epoch": 0.8497824493178631, "grad_norm": 0.16764084994792938, "learning_rate": 0.0009694661577609398, "loss": 2.4995, "step": 426550 }, { "epoch": 0.849802371541502, "grad_norm": 0.17862027883529663, "learning_rate": 0.0009692721018619898, "loss": 2.4952, "step": 426560 }, { "epoch": 0.8498222937651408, "grad_norm": 0.1849038004875183, "learning_rate": 0.0009690780824912102, "loss": 2.4958, "step": 426570 }, { "epoch": 0.8498422159887798, "grad_norm": 0.16005003452301025, "learning_rate": 0.0009688840996279808, "loss": 2.4899, "step": 426580 }, { "epoch": 0.8498621382124187, "grad_norm": 0.15844540297985077, "learning_rate": 0.000968690153251701, "loss": 2.5033, "step": 426590 }, { "epoch": 0.8498820604360576, "grad_norm": 0.1808193027973175, "learning_rate": 0.0009684962433417899, "loss": 2.4898, "step": 426600 }, { "epoch": 0.8499019826596965, "grad_norm": 0.15806470811367035, "learning_rate": 0.0009683023698776856, "loss": 2.4999, "step": 426610 }, { "epoch": 0.8499219048833354, "grad_norm": 0.17374545335769653, "learning_rate": 0.0009681085328388455, "loss": 2.4905, "step": 426620 }, { "epoch": 0.8499418271069744, "grad_norm": 0.18012620508670807, "learning_rate": 0.0009679147322047465, "loss": 2.4918, "step": 426630 }, { "epoch": 0.8499617493306133, "grad_norm": 0.1578352302312851, "learning_rate": 0.0009677209679548848, "loss": 2.4761, "step": 426640 }, { "epoch": 0.8499816715542522, "grad_norm": 0.16814209520816803, "learning_rate": 0.0009675272400687755, "loss": 2.4904, "step": 426650 }, { "epoch": 0.8500015937778911, "grad_norm": 0.1815982460975647, "learning_rate": 0.0009673335485259531, "loss": 2.4873, "step": 426660 }, { "epoch": 0.85002151600153, "grad_norm": 0.17714513838291168, "learning_rate": 0.0009671398933059714, "loss": 2.4968, "step": 426670 }, { "epoch": 0.850041438225169, "grad_norm": 0.16171523928642273, "learning_rate": 0.0009669462743884035, "loss": 2.4986, "step": 426680 }, { "epoch": 0.8500613604488079, "grad_norm": 0.18871267139911652, "learning_rate": 0.0009667526917528409, "loss": 2.4834, "step": 426690 }, { "epoch": 0.8500812826724468, "grad_norm": 0.17674243450164795, "learning_rate": 0.0009665591453788949, "loss": 2.486, "step": 426700 }, { "epoch": 0.8501012048960856, "grad_norm": 0.16745154559612274, "learning_rate": 0.000966365635246196, "loss": 2.4825, "step": 426710 }, { "epoch": 0.8501211271197245, "grad_norm": 0.16343359649181366, "learning_rate": 0.0009661721613343933, "loss": 2.4892, "step": 426720 }, { "epoch": 0.8501410493433635, "grad_norm": 0.16226190328598022, "learning_rate": 0.0009659787236231549, "loss": 2.4895, "step": 426730 }, { "epoch": 0.8501609715670024, "grad_norm": 0.18585790693759918, "learning_rate": 0.0009657853220921684, "loss": 2.4969, "step": 426740 }, { "epoch": 0.8501808937906413, "grad_norm": 0.1870187371969223, "learning_rate": 0.0009655919567211399, "loss": 2.5096, "step": 426750 }, { "epoch": 0.8502008160142802, "grad_norm": 0.16599296033382416, "learning_rate": 0.0009653986274897951, "loss": 2.5036, "step": 426760 }, { "epoch": 0.8502207382379192, "grad_norm": 0.19691473245620728, "learning_rate": 0.0009652053343778777, "loss": 2.486, "step": 426770 }, { "epoch": 0.8502406604615581, "grad_norm": 0.18940387666225433, "learning_rate": 0.0009650120773651512, "loss": 2.4887, "step": 426780 }, { "epoch": 0.850260582685197, "grad_norm": 0.17082209885120392, "learning_rate": 0.0009648188564313974, "loss": 2.4895, "step": 426790 }, { "epoch": 0.8502805049088359, "grad_norm": 0.17596164345741272, "learning_rate": 0.0009646256715564172, "loss": 2.491, "step": 426800 }, { "epoch": 0.8503004271324748, "grad_norm": 0.19353727996349335, "learning_rate": 0.0009644325227200305, "loss": 2.4877, "step": 426810 }, { "epoch": 0.8503203493561138, "grad_norm": 0.19668693840503693, "learning_rate": 0.0009642394099020759, "loss": 2.4813, "step": 426820 }, { "epoch": 0.8503402715797527, "grad_norm": 0.19850629568099976, "learning_rate": 0.0009640463330824105, "loss": 2.5043, "step": 426830 }, { "epoch": 0.8503601938033916, "grad_norm": 0.17500659823417664, "learning_rate": 0.0009638532922409105, "loss": 2.4891, "step": 426840 }, { "epoch": 0.8503801160270305, "grad_norm": 0.19939163327217102, "learning_rate": 0.0009636602873574707, "loss": 2.506, "step": 426850 }, { "epoch": 0.8504000382506693, "grad_norm": 0.19878007471561432, "learning_rate": 0.0009634673184120048, "loss": 2.4649, "step": 426860 }, { "epoch": 0.8504199604743083, "grad_norm": 0.17148667573928833, "learning_rate": 0.0009632743853844452, "loss": 2.4909, "step": 426870 }, { "epoch": 0.8504398826979472, "grad_norm": 0.1763867884874344, "learning_rate": 0.0009630814882547425, "loss": 2.482, "step": 426880 }, { "epoch": 0.8504598049215861, "grad_norm": 0.15771862864494324, "learning_rate": 0.0009628886270028663, "loss": 2.4798, "step": 426890 }, { "epoch": 0.850479727145225, "grad_norm": 0.15379847586154938, "learning_rate": 0.0009626958016088048, "loss": 2.4822, "step": 426900 }, { "epoch": 0.8504996493688639, "grad_norm": 0.1802392601966858, "learning_rate": 0.0009625030120525651, "loss": 2.4921, "step": 426910 }, { "epoch": 0.8505195715925029, "grad_norm": 0.16184104979038239, "learning_rate": 0.0009623102583141723, "loss": 2.4765, "step": 426920 }, { "epoch": 0.8505394938161418, "grad_norm": 0.18399809300899506, "learning_rate": 0.0009621175403736704, "loss": 2.4878, "step": 426930 }, { "epoch": 0.8505594160397807, "grad_norm": 0.23038284480571747, "learning_rate": 0.0009619248582111217, "loss": 2.4892, "step": 426940 }, { "epoch": 0.8505793382634196, "grad_norm": 0.18811045587062836, "learning_rate": 0.0009617322118066072, "loss": 2.4892, "step": 426950 }, { "epoch": 0.8505992604870585, "grad_norm": 0.17108234763145447, "learning_rate": 0.0009615396011402264, "loss": 2.4818, "step": 426960 }, { "epoch": 0.8506191827106975, "grad_norm": 0.17599335312843323, "learning_rate": 0.0009613470261920971, "loss": 2.5015, "step": 426970 }, { "epoch": 0.8506391049343364, "grad_norm": 0.19087155163288116, "learning_rate": 0.0009611544869423559, "loss": 2.4914, "step": 426980 }, { "epoch": 0.8506590271579753, "grad_norm": 0.18362218141555786, "learning_rate": 0.0009609619833711569, "loss": 2.5063, "step": 426990 }, { "epoch": 0.8506789493816141, "grad_norm": 0.16841718554496765, "learning_rate": 0.0009607695154586736, "loss": 2.4916, "step": 427000 }, { "epoch": 0.850698871605253, "grad_norm": 0.17084550857543945, "learning_rate": 0.0009605770831850973, "loss": 2.4965, "step": 427010 }, { "epoch": 0.850718793828892, "grad_norm": 0.16559939086437225, "learning_rate": 0.000960384686530638, "loss": 2.4737, "step": 427020 }, { "epoch": 0.8507387160525309, "grad_norm": 0.1974034309387207, "learning_rate": 0.0009601923254755234, "loss": 2.4972, "step": 427030 }, { "epoch": 0.8507586382761698, "grad_norm": 0.18152377009391785, "learning_rate": 0.00096, "loss": 2.4978, "step": 427040 }, { "epoch": 0.8507785604998087, "grad_norm": 0.17843568325042725, "learning_rate": 0.0009598077100843325, "loss": 2.4914, "step": 427050 }, { "epoch": 0.8507984827234477, "grad_norm": 0.18557067215442657, "learning_rate": 0.0009596154557088037, "loss": 2.4792, "step": 427060 }, { "epoch": 0.8508184049470866, "grad_norm": 0.170054629445076, "learning_rate": 0.0009594232368537147, "loss": 2.4924, "step": 427070 }, { "epoch": 0.8508383271707255, "grad_norm": 0.17135624587535858, "learning_rate": 0.0009592310534993851, "loss": 2.4851, "step": 427080 }, { "epoch": 0.8508582493943644, "grad_norm": 0.20910842716693878, "learning_rate": 0.0009590389056261517, "loss": 2.4811, "step": 427090 }, { "epoch": 0.8508781716180033, "grad_norm": 0.19649644196033478, "learning_rate": 0.0009588467932143703, "loss": 2.4988, "step": 427100 }, { "epoch": 0.8508980938416423, "grad_norm": 0.1741069257259369, "learning_rate": 0.000958654716244415, "loss": 2.4935, "step": 427110 }, { "epoch": 0.8509180160652812, "grad_norm": 0.16559679806232452, "learning_rate": 0.0009584626746966771, "loss": 2.5228, "step": 427120 }, { "epoch": 0.8509379382889201, "grad_norm": 0.16562069952487946, "learning_rate": 0.0009582706685515668, "loss": 2.4865, "step": 427130 }, { "epoch": 0.850957860512559, "grad_norm": 0.17457683384418488, "learning_rate": 0.0009580786977895115, "loss": 2.4746, "step": 427140 }, { "epoch": 0.8509777827361978, "grad_norm": 0.17927272617816925, "learning_rate": 0.0009578867623909577, "loss": 2.4868, "step": 427150 }, { "epoch": 0.8509977049598368, "grad_norm": 0.18304133415222168, "learning_rate": 0.0009576948623363692, "loss": 2.475, "step": 427160 }, { "epoch": 0.8510176271834757, "grad_norm": 0.16907665133476257, "learning_rate": 0.0009575029976062281, "loss": 2.4912, "step": 427170 }, { "epoch": 0.8510375494071146, "grad_norm": 0.16667303442955017, "learning_rate": 0.0009573111681810341, "loss": 2.4847, "step": 427180 }, { "epoch": 0.8510574716307535, "grad_norm": 0.2007875144481659, "learning_rate": 0.0009571193740413049, "loss": 2.49, "step": 427190 }, { "epoch": 0.8510773938543924, "grad_norm": 0.16791073977947235, "learning_rate": 0.000956927615167576, "loss": 2.4969, "step": 427200 }, { "epoch": 0.8510973160780314, "grad_norm": 0.17085614800453186, "learning_rate": 0.0009567358915404018, "loss": 2.4962, "step": 427210 }, { "epoch": 0.8511172383016703, "grad_norm": 0.16294841468334198, "learning_rate": 0.0009565442031403535, "loss": 2.4744, "step": 427220 }, { "epoch": 0.8511371605253092, "grad_norm": 0.17173053324222565, "learning_rate": 0.0009563525499480201, "loss": 2.5006, "step": 427230 }, { "epoch": 0.8511570827489481, "grad_norm": 0.1843913048505783, "learning_rate": 0.000956160931944009, "loss": 2.4915, "step": 427240 }, { "epoch": 0.851177004972587, "grad_norm": 0.16863925755023956, "learning_rate": 0.000955969349108945, "loss": 2.4937, "step": 427250 }, { "epoch": 0.851196927196226, "grad_norm": 0.178557351231575, "learning_rate": 0.0009557778014234711, "loss": 2.5, "step": 427260 }, { "epoch": 0.8512168494198649, "grad_norm": 0.19298800826072693, "learning_rate": 0.0009555862888682474, "loss": 2.4924, "step": 427270 }, { "epoch": 0.8512367716435038, "grad_norm": 0.17160868644714355, "learning_rate": 0.0009553948114239524, "loss": 2.4945, "step": 427280 }, { "epoch": 0.8512566938671426, "grad_norm": 0.17869634926319122, "learning_rate": 0.0009552033690712821, "loss": 2.4946, "step": 427290 }, { "epoch": 0.8512766160907815, "grad_norm": 0.16548854112625122, "learning_rate": 0.0009550119617909494, "loss": 2.4966, "step": 427300 }, { "epoch": 0.8512965383144205, "grad_norm": 0.205746591091156, "learning_rate": 0.0009548205895636864, "loss": 2.5002, "step": 427310 }, { "epoch": 0.8513164605380594, "grad_norm": 0.15866701304912567, "learning_rate": 0.0009546292523702416, "loss": 2.499, "step": 427320 }, { "epoch": 0.8513363827616983, "grad_norm": 0.1711042821407318, "learning_rate": 0.0009544379501913816, "loss": 2.4968, "step": 427330 }, { "epoch": 0.8513563049853372, "grad_norm": 0.21805240213871002, "learning_rate": 0.0009542466830078904, "loss": 2.4941, "step": 427340 }, { "epoch": 0.8513762272089762, "grad_norm": 0.15472356975078583, "learning_rate": 0.0009540554508005694, "loss": 2.4907, "step": 427350 }, { "epoch": 0.8513961494326151, "grad_norm": 0.16730299592018127, "learning_rate": 0.0009538642535502384, "loss": 2.4936, "step": 427360 }, { "epoch": 0.851416071656254, "grad_norm": 0.16251008212566376, "learning_rate": 0.0009536730912377336, "loss": 2.4852, "step": 427370 }, { "epoch": 0.8514359938798929, "grad_norm": 0.15121184289455414, "learning_rate": 0.0009534819638439096, "loss": 2.4864, "step": 427380 }, { "epoch": 0.8514559161035318, "grad_norm": 0.17018838226795197, "learning_rate": 0.000953290871349638, "loss": 2.4967, "step": 427390 }, { "epoch": 0.8514758383271708, "grad_norm": 0.1550029069185257, "learning_rate": 0.000953099813735808, "loss": 2.4959, "step": 427400 }, { "epoch": 0.8514957605508097, "grad_norm": 0.17700521647930145, "learning_rate": 0.0009529087909833261, "loss": 2.4846, "step": 427410 }, { "epoch": 0.8515156827744486, "grad_norm": 0.18961523473262787, "learning_rate": 0.0009527178030731163, "loss": 2.4963, "step": 427420 }, { "epoch": 0.8515356049980874, "grad_norm": 0.1805706024169922, "learning_rate": 0.0009525268499861202, "loss": 2.4952, "step": 427430 }, { "epoch": 0.8515555272217263, "grad_norm": 0.16322322189807892, "learning_rate": 0.0009523359317032964, "loss": 2.4921, "step": 427440 }, { "epoch": 0.8515754494453653, "grad_norm": 0.1736258864402771, "learning_rate": 0.0009521450482056211, "loss": 2.4844, "step": 427450 }, { "epoch": 0.8515953716690042, "grad_norm": 0.18128666281700134, "learning_rate": 0.0009519541994740879, "loss": 2.4759, "step": 427460 }, { "epoch": 0.8516152938926431, "grad_norm": 0.18671494722366333, "learning_rate": 0.0009517633854897072, "loss": 2.4972, "step": 427470 }, { "epoch": 0.851635216116282, "grad_norm": 0.16788950562477112, "learning_rate": 0.0009515726062335075, "loss": 2.4809, "step": 427480 }, { "epoch": 0.8516551383399209, "grad_norm": 0.19847247004508972, "learning_rate": 0.0009513818616865337, "loss": 2.4896, "step": 427490 }, { "epoch": 0.8516750605635599, "grad_norm": 0.18593713641166687, "learning_rate": 0.0009511911518298484, "loss": 2.5017, "step": 427500 }, { "epoch": 0.8516949827871988, "grad_norm": 0.16292692720890045, "learning_rate": 0.0009510004766445315, "loss": 2.497, "step": 427510 }, { "epoch": 0.8517149050108377, "grad_norm": 0.1559799611568451, "learning_rate": 0.0009508098361116799, "loss": 2.4982, "step": 427520 }, { "epoch": 0.8517348272344766, "grad_norm": 0.19561204314231873, "learning_rate": 0.0009506192302124077, "loss": 2.4934, "step": 427530 }, { "epoch": 0.8517547494581155, "grad_norm": 0.1704104244709015, "learning_rate": 0.0009504286589278459, "loss": 2.4889, "step": 427540 }, { "epoch": 0.8517746716817545, "grad_norm": 0.18943920731544495, "learning_rate": 0.0009502381222391432, "loss": 2.4966, "step": 427550 }, { "epoch": 0.8517945939053934, "grad_norm": 0.17588846385478973, "learning_rate": 0.0009500476201274651, "loss": 2.4848, "step": 427560 }, { "epoch": 0.8518145161290323, "grad_norm": 0.18619152903556824, "learning_rate": 0.0009498571525739938, "loss": 2.49, "step": 427570 }, { "epoch": 0.8518344383526711, "grad_norm": 0.1690993309020996, "learning_rate": 0.0009496667195599294, "loss": 2.4927, "step": 427580 }, { "epoch": 0.85185436057631, "grad_norm": 0.1900196224451065, "learning_rate": 0.0009494763210664883, "loss": 2.5091, "step": 427590 }, { "epoch": 0.851874282799949, "grad_norm": 0.2043789029121399, "learning_rate": 0.0009492859570749042, "loss": 2.4897, "step": 427600 }, { "epoch": 0.8518942050235879, "grad_norm": 0.1543407440185547, "learning_rate": 0.000949095627566428, "loss": 2.4838, "step": 427610 }, { "epoch": 0.8519141272472268, "grad_norm": 0.16692163050174713, "learning_rate": 0.0009489053325223269, "loss": 2.4833, "step": 427620 }, { "epoch": 0.8519340494708657, "grad_norm": 0.17450617253780365, "learning_rate": 0.0009487150719238862, "loss": 2.4817, "step": 427630 }, { "epoch": 0.8519539716945047, "grad_norm": 0.22258685529232025, "learning_rate": 0.0009485248457524068, "loss": 2.4974, "step": 427640 }, { "epoch": 0.8519738939181436, "grad_norm": 0.17631298303604126, "learning_rate": 0.0009483346539892075, "loss": 2.4606, "step": 427650 }, { "epoch": 0.8519938161417825, "grad_norm": 0.27481940388679504, "learning_rate": 0.0009481444966156237, "loss": 2.4839, "step": 427660 }, { "epoch": 0.8520137383654214, "grad_norm": 0.20340731739997864, "learning_rate": 0.0009479543736130072, "loss": 2.4865, "step": 427670 }, { "epoch": 0.8520336605890603, "grad_norm": 0.18002811074256897, "learning_rate": 0.0009477642849627277, "loss": 2.4991, "step": 427680 }, { "epoch": 0.8520535828126993, "grad_norm": 0.17228421568870544, "learning_rate": 0.0009475742306461705, "loss": 2.4665, "step": 427690 }, { "epoch": 0.8520735050363382, "grad_norm": 0.17872454226016998, "learning_rate": 0.0009473842106447385, "loss": 2.4763, "step": 427700 }, { "epoch": 0.852093427259977, "grad_norm": 0.1566232591867447, "learning_rate": 0.0009471942249398515, "loss": 2.4842, "step": 427710 }, { "epoch": 0.852113349483616, "grad_norm": 0.16379588842391968, "learning_rate": 0.0009470042735129454, "loss": 2.492, "step": 427720 }, { "epoch": 0.8521332717072548, "grad_norm": 0.15261925756931305, "learning_rate": 0.0009468143563454732, "loss": 2.4875, "step": 427730 }, { "epoch": 0.8521531939308938, "grad_norm": 0.15785157680511475, "learning_rate": 0.0009466244734189046, "loss": 2.5053, "step": 427740 }, { "epoch": 0.8521731161545327, "grad_norm": 0.17596867680549622, "learning_rate": 0.0009464346247147261, "loss": 2.485, "step": 427750 }, { "epoch": 0.8521930383781716, "grad_norm": 0.17690329253673553, "learning_rate": 0.0009462448102144407, "loss": 2.4875, "step": 427760 }, { "epoch": 0.8522129606018105, "grad_norm": 0.18338771164417267, "learning_rate": 0.0009460550298995683, "loss": 2.4798, "step": 427770 }, { "epoch": 0.8522328828254494, "grad_norm": 0.18200771510601044, "learning_rate": 0.0009458652837516451, "loss": 2.4739, "step": 427780 }, { "epoch": 0.8522528050490884, "grad_norm": 0.15991947054862976, "learning_rate": 0.0009456755717522239, "loss": 2.4993, "step": 427790 }, { "epoch": 0.8522727272727273, "grad_norm": 0.17616130411624908, "learning_rate": 0.0009454858938828745, "loss": 2.4877, "step": 427800 }, { "epoch": 0.8522926494963662, "grad_norm": 0.17102108895778656, "learning_rate": 0.0009452962501251831, "loss": 2.4918, "step": 427810 }, { "epoch": 0.8523125717200051, "grad_norm": 0.19490943849086761, "learning_rate": 0.0009451066404607525, "loss": 2.4903, "step": 427820 }, { "epoch": 0.852332493943644, "grad_norm": 0.1673220843076706, "learning_rate": 0.0009449170648712018, "loss": 2.4928, "step": 427830 }, { "epoch": 0.852352416167283, "grad_norm": 0.17442017793655396, "learning_rate": 0.0009447275233381664, "loss": 2.4853, "step": 427840 }, { "epoch": 0.8523723383909219, "grad_norm": 0.18403498828411102, "learning_rate": 0.000944538015843299, "loss": 2.5021, "step": 427850 }, { "epoch": 0.8523922606145607, "grad_norm": 0.1561926305294037, "learning_rate": 0.0009443485423682683, "loss": 2.4875, "step": 427860 }, { "epoch": 0.8524121828381996, "grad_norm": 0.1715698391199112, "learning_rate": 0.0009441591028947592, "loss": 2.4906, "step": 427870 }, { "epoch": 0.8524321050618385, "grad_norm": 0.1730613261461258, "learning_rate": 0.0009439696974044732, "loss": 2.4733, "step": 427880 }, { "epoch": 0.8524520272854775, "grad_norm": 0.16504338383674622, "learning_rate": 0.0009437803258791287, "loss": 2.4964, "step": 427890 }, { "epoch": 0.8524719495091164, "grad_norm": 0.1770544946193695, "learning_rate": 0.0009435909883004594, "loss": 2.4886, "step": 427900 }, { "epoch": 0.8524918717327553, "grad_norm": 0.16926701366901398, "learning_rate": 0.0009434016846502167, "loss": 2.4851, "step": 427910 }, { "epoch": 0.8525117939563942, "grad_norm": 0.1695130169391632, "learning_rate": 0.0009432124149101676, "loss": 2.4934, "step": 427920 }, { "epoch": 0.8525317161800332, "grad_norm": 0.1838758885860443, "learning_rate": 0.0009430231790620951, "loss": 2.5062, "step": 427930 }, { "epoch": 0.8525516384036721, "grad_norm": 0.16044186055660248, "learning_rate": 0.0009428339770877991, "loss": 2.5072, "step": 427940 }, { "epoch": 0.852571560627311, "grad_norm": 0.19388598203659058, "learning_rate": 0.0009426448089690958, "loss": 2.502, "step": 427950 }, { "epoch": 0.8525914828509499, "grad_norm": 0.17253001034259796, "learning_rate": 0.0009424556746878173, "loss": 2.4786, "step": 427960 }, { "epoch": 0.8526114050745888, "grad_norm": 0.1539583057165146, "learning_rate": 0.0009422665742258118, "loss": 2.4796, "step": 427970 }, { "epoch": 0.8526313272982278, "grad_norm": 0.1679103672504425, "learning_rate": 0.0009420775075649445, "loss": 2.4862, "step": 427980 }, { "epoch": 0.8526512495218667, "grad_norm": 0.17614002525806427, "learning_rate": 0.0009418884746870962, "loss": 2.4942, "step": 427990 }, { "epoch": 0.8526711717455056, "grad_norm": 0.47868749499320984, "learning_rate": 0.0009416994755741637, "loss": 2.4845, "step": 428000 }, { "epoch": 0.8526910939691444, "grad_norm": 0.16113920509815216, "learning_rate": 0.0009415105102080608, "loss": 2.4916, "step": 428010 }, { "epoch": 0.8527110161927833, "grad_norm": 0.18016162514686584, "learning_rate": 0.0009413215785707164, "loss": 2.5002, "step": 428020 }, { "epoch": 0.8527309384164223, "grad_norm": 0.16189435124397278, "learning_rate": 0.0009411326806440761, "loss": 2.4967, "step": 428030 }, { "epoch": 0.8527508606400612, "grad_norm": 0.16095156967639923, "learning_rate": 0.0009409438164101018, "loss": 2.4864, "step": 428040 }, { "epoch": 0.8527707828637001, "grad_norm": 0.15004204213619232, "learning_rate": 0.0009407549858507712, "loss": 2.4971, "step": 428050 }, { "epoch": 0.852790705087339, "grad_norm": 0.16426178812980652, "learning_rate": 0.0009405661889480779, "loss": 2.47, "step": 428060 }, { "epoch": 0.8528106273109779, "grad_norm": 0.17248618602752686, "learning_rate": 0.0009403774256840316, "loss": 2.4787, "step": 428070 }, { "epoch": 0.8528305495346169, "grad_norm": 0.205800399184227, "learning_rate": 0.0009401886960406585, "loss": 2.4928, "step": 428080 }, { "epoch": 0.8528504717582558, "grad_norm": 0.17116746306419373, "learning_rate": 0.00094, "loss": 2.4982, "step": 428090 }, { "epoch": 0.8528703939818947, "grad_norm": 0.17900042235851288, "learning_rate": 0.0009398113375441142, "loss": 2.4634, "step": 428100 }, { "epoch": 0.8528903162055336, "grad_norm": 0.17830023169517517, "learning_rate": 0.000939622708655075, "loss": 2.4898, "step": 428110 }, { "epoch": 0.8529102384291725, "grad_norm": 0.9229720234870911, "learning_rate": 0.0009394341133149717, "loss": 2.499, "step": 428120 }, { "epoch": 0.8529301606528115, "grad_norm": 0.1655762940645218, "learning_rate": 0.0009392455515059106, "loss": 2.493, "step": 428130 }, { "epoch": 0.8529500828764504, "grad_norm": 0.16973429918289185, "learning_rate": 0.0009390570232100127, "loss": 2.4926, "step": 428140 }, { "epoch": 0.8529700051000892, "grad_norm": 0.20428530871868134, "learning_rate": 0.0009388685284094154, "loss": 2.4903, "step": 428150 }, { "epoch": 0.8529899273237281, "grad_norm": 0.19060982763767242, "learning_rate": 0.0009386800670862721, "loss": 2.4773, "step": 428160 }, { "epoch": 0.853009849547367, "grad_norm": 0.18434594571590424, "learning_rate": 0.000938491639222752, "loss": 2.483, "step": 428170 }, { "epoch": 0.853029771771006, "grad_norm": 0.15587347745895386, "learning_rate": 0.0009383032448010403, "loss": 2.4926, "step": 428180 }, { "epoch": 0.8530496939946449, "grad_norm": 0.1698594093322754, "learning_rate": 0.0009381148838033374, "loss": 2.4791, "step": 428190 }, { "epoch": 0.8530696162182838, "grad_norm": 0.18077489733695984, "learning_rate": 0.0009379265562118597, "loss": 2.4763, "step": 428200 }, { "epoch": 0.8530895384419227, "grad_norm": 0.1898813098669052, "learning_rate": 0.0009377382620088399, "loss": 2.4958, "step": 428210 }, { "epoch": 0.8531094606655616, "grad_norm": 0.16502611339092255, "learning_rate": 0.0009375500011765259, "loss": 2.4904, "step": 428220 }, { "epoch": 0.8531293828892006, "grad_norm": 0.17238442599773407, "learning_rate": 0.0009373617736971816, "loss": 2.4869, "step": 428230 }, { "epoch": 0.8531493051128395, "grad_norm": 0.1813613474369049, "learning_rate": 0.0009371735795530862, "loss": 2.4905, "step": 428240 }, { "epoch": 0.8531692273364784, "grad_norm": 0.15807858109474182, "learning_rate": 0.0009369854187265352, "loss": 2.4911, "step": 428250 }, { "epoch": 0.8531891495601173, "grad_norm": 0.18153506517410278, "learning_rate": 0.000936797291199839, "loss": 2.477, "step": 428260 }, { "epoch": 0.8532090717837563, "grad_norm": 0.18686716258525848, "learning_rate": 0.0009366091969553246, "loss": 2.4825, "step": 428270 }, { "epoch": 0.8532289940073952, "grad_norm": 0.16275466978549957, "learning_rate": 0.0009364211359753336, "loss": 2.475, "step": 428280 }, { "epoch": 0.853248916231034, "grad_norm": 0.16259117424488068, "learning_rate": 0.000936233108242224, "loss": 2.4922, "step": 428290 }, { "epoch": 0.8532688384546729, "grad_norm": 0.1775559037923813, "learning_rate": 0.0009360451137383692, "loss": 2.4796, "step": 428300 }, { "epoch": 0.8532887606783118, "grad_norm": 0.16092485189437866, "learning_rate": 0.0009358571524461577, "loss": 2.4928, "step": 428310 }, { "epoch": 0.8533086829019508, "grad_norm": 0.20076026022434235, "learning_rate": 0.0009356692243479943, "loss": 2.499, "step": 428320 }, { "epoch": 0.8533286051255897, "grad_norm": 0.17751580476760864, "learning_rate": 0.0009354813294262989, "loss": 2.4787, "step": 428330 }, { "epoch": 0.8533485273492286, "grad_norm": 0.1800525039434433, "learning_rate": 0.0009352934676635068, "loss": 2.4878, "step": 428340 }, { "epoch": 0.8533684495728675, "grad_norm": 0.16966816782951355, "learning_rate": 0.0009351056390420691, "loss": 2.4758, "step": 428350 }, { "epoch": 0.8533883717965064, "grad_norm": 0.17332597076892853, "learning_rate": 0.0009349178435444521, "loss": 2.4956, "step": 428360 }, { "epoch": 0.8534082940201454, "grad_norm": 0.16436518728733063, "learning_rate": 0.0009347300811531381, "loss": 2.4796, "step": 428370 }, { "epoch": 0.8534282162437843, "grad_norm": 0.16914232075214386, "learning_rate": 0.0009345423518506237, "loss": 2.4865, "step": 428380 }, { "epoch": 0.8534481384674232, "grad_norm": 0.16159436106681824, "learning_rate": 0.0009343546556194224, "loss": 2.4926, "step": 428390 }, { "epoch": 0.8534680606910621, "grad_norm": 0.17046916484832764, "learning_rate": 0.000934166992442062, "loss": 2.496, "step": 428400 }, { "epoch": 0.853487982914701, "grad_norm": 0.1950443834066391, "learning_rate": 0.0009339793623010858, "loss": 2.4719, "step": 428410 }, { "epoch": 0.85350790513834, "grad_norm": 0.176376074552536, "learning_rate": 0.0009337917651790528, "loss": 2.487, "step": 428420 }, { "epoch": 0.8535278273619789, "grad_norm": 0.23663808405399323, "learning_rate": 0.0009336042010585375, "loss": 2.4992, "step": 428430 }, { "epoch": 0.8535477495856177, "grad_norm": 0.17607851326465607, "learning_rate": 0.0009334166699221293, "loss": 2.4849, "step": 428440 }, { "epoch": 0.8535676718092566, "grad_norm": 0.1711922585964203, "learning_rate": 0.0009332291717524331, "loss": 2.4921, "step": 428450 }, { "epoch": 0.8535875940328955, "grad_norm": 0.18471206724643707, "learning_rate": 0.0009330417065320689, "loss": 2.479, "step": 428460 }, { "epoch": 0.8536075162565345, "grad_norm": 0.16943351924419403, "learning_rate": 0.000932854274243672, "loss": 2.4848, "step": 428470 }, { "epoch": 0.8536274384801734, "grad_norm": 0.18008184432983398, "learning_rate": 0.0009326668748698934, "loss": 2.4777, "step": 428480 }, { "epoch": 0.8536473607038123, "grad_norm": 0.186637744307518, "learning_rate": 0.0009324795083933986, "loss": 2.4836, "step": 428490 }, { "epoch": 0.8536672829274512, "grad_norm": 0.18966633081436157, "learning_rate": 0.0009322921747968689, "loss": 2.4896, "step": 428500 }, { "epoch": 0.8536872051510901, "grad_norm": 0.19198761880397797, "learning_rate": 0.0009321048740630005, "loss": 2.4854, "step": 428510 }, { "epoch": 0.8537071273747291, "grad_norm": 0.15162517130374908, "learning_rate": 0.0009319176061745049, "loss": 2.4875, "step": 428520 }, { "epoch": 0.853727049598368, "grad_norm": 0.1583823561668396, "learning_rate": 0.0009317303711141087, "loss": 2.4716, "step": 428530 }, { "epoch": 0.8537469718220069, "grad_norm": 0.17181414365768433, "learning_rate": 0.0009315431688645535, "loss": 2.5035, "step": 428540 }, { "epoch": 0.8537668940456458, "grad_norm": 0.20217643678188324, "learning_rate": 0.0009313559994085963, "loss": 2.4842, "step": 428550 }, { "epoch": 0.8537868162692848, "grad_norm": 0.16668599843978882, "learning_rate": 0.0009311688627290089, "loss": 2.4935, "step": 428560 }, { "epoch": 0.8538067384929237, "grad_norm": 0.16280238330364227, "learning_rate": 0.0009309817588085787, "loss": 2.4821, "step": 428570 }, { "epoch": 0.8538266607165625, "grad_norm": 0.17700856924057007, "learning_rate": 0.0009307946876301072, "loss": 2.5103, "step": 428580 }, { "epoch": 0.8538465829402014, "grad_norm": 0.18105106055736542, "learning_rate": 0.0009306076491764119, "loss": 2.4989, "step": 428590 }, { "epoch": 0.8538665051638403, "grad_norm": 0.17463809251785278, "learning_rate": 0.000930420643430325, "loss": 2.4895, "step": 428600 }, { "epoch": 0.8538864273874793, "grad_norm": 0.18017853796482086, "learning_rate": 0.0009302336703746934, "loss": 2.4829, "step": 428610 }, { "epoch": 0.8539063496111182, "grad_norm": 0.18906870484352112, "learning_rate": 0.0009300467299923796, "loss": 2.4861, "step": 428620 }, { "epoch": 0.8539262718347571, "grad_norm": 0.1711692214012146, "learning_rate": 0.0009298598222662604, "loss": 2.4824, "step": 428630 }, { "epoch": 0.853946194058396, "grad_norm": 0.1751202493906021, "learning_rate": 0.0009296729471792278, "loss": 2.4923, "step": 428640 }, { "epoch": 0.8539661162820349, "grad_norm": 0.20336471498012543, "learning_rate": 0.0009294861047141891, "loss": 2.4864, "step": 428650 }, { "epoch": 0.8539860385056739, "grad_norm": 0.17152313888072968, "learning_rate": 0.0009292992948540662, "loss": 2.4818, "step": 428660 }, { "epoch": 0.8540059607293128, "grad_norm": 0.1749878078699112, "learning_rate": 0.0009291125175817956, "loss": 2.4973, "step": 428670 }, { "epoch": 0.8540258829529517, "grad_norm": 0.18068090081214905, "learning_rate": 0.0009289257728803293, "loss": 2.4869, "step": 428680 }, { "epoch": 0.8540458051765906, "grad_norm": 0.1982819139957428, "learning_rate": 0.0009287390607326336, "loss": 2.4916, "step": 428690 }, { "epoch": 0.8540657274002295, "grad_norm": 0.16958147287368774, "learning_rate": 0.0009285523811216902, "loss": 2.4923, "step": 428700 }, { "epoch": 0.8540856496238685, "grad_norm": 0.16951310634613037, "learning_rate": 0.0009283657340304949, "loss": 2.501, "step": 428710 }, { "epoch": 0.8541055718475073, "grad_norm": 0.1608472615480423, "learning_rate": 0.000928179119442059, "loss": 2.4956, "step": 428720 }, { "epoch": 0.8541254940711462, "grad_norm": 0.15508615970611572, "learning_rate": 0.0009279925373394082, "loss": 2.4936, "step": 428730 }, { "epoch": 0.8541454162947851, "grad_norm": 0.1710125207901001, "learning_rate": 0.0009278059877055833, "loss": 2.4884, "step": 428740 }, { "epoch": 0.854165338518424, "grad_norm": 0.20348849892616272, "learning_rate": 0.0009276194705236391, "loss": 2.4904, "step": 428750 }, { "epoch": 0.854185260742063, "grad_norm": 0.16708354651927948, "learning_rate": 0.0009274329857766461, "loss": 2.4752, "step": 428760 }, { "epoch": 0.8542051829657019, "grad_norm": 0.17129525542259216, "learning_rate": 0.0009272465334476892, "loss": 2.4863, "step": 428770 }, { "epoch": 0.8542251051893408, "grad_norm": 0.1862604320049286, "learning_rate": 0.0009270601135198673, "loss": 2.4866, "step": 428780 }, { "epoch": 0.8542450274129797, "grad_norm": 0.18664170801639557, "learning_rate": 0.000926873725976295, "loss": 2.4871, "step": 428790 }, { "epoch": 0.8542649496366186, "grad_norm": 0.19459733366966248, "learning_rate": 0.000926687370800101, "loss": 2.4887, "step": 428800 }, { "epoch": 0.8542848718602576, "grad_norm": 0.16690999269485474, "learning_rate": 0.0009265010479744287, "loss": 2.495, "step": 428810 }, { "epoch": 0.8543047940838965, "grad_norm": 0.18971556425094604, "learning_rate": 0.0009263147574824362, "loss": 2.4944, "step": 428820 }, { "epoch": 0.8543247163075354, "grad_norm": 0.1586054265499115, "learning_rate": 0.000926128499307296, "loss": 2.4868, "step": 428830 }, { "epoch": 0.8543446385311743, "grad_norm": 0.177381232380867, "learning_rate": 0.0009259422734321959, "loss": 2.4842, "step": 428840 }, { "epoch": 0.8543645607548133, "grad_norm": 0.16140124201774597, "learning_rate": 0.0009257560798403373, "loss": 2.4858, "step": 428850 }, { "epoch": 0.8543844829784522, "grad_norm": 0.1788461059331894, "learning_rate": 0.0009255699185149366, "loss": 2.4935, "step": 428860 }, { "epoch": 0.854404405202091, "grad_norm": 0.1646813452243805, "learning_rate": 0.0009253837894392249, "loss": 2.5047, "step": 428870 }, { "epoch": 0.8544243274257299, "grad_norm": 0.17126010358333588, "learning_rate": 0.0009251976925964479, "loss": 2.4893, "step": 428880 }, { "epoch": 0.8544442496493688, "grad_norm": 0.1832411140203476, "learning_rate": 0.000925011627969865, "loss": 2.5009, "step": 428890 }, { "epoch": 0.8544641718730078, "grad_norm": 0.183136448264122, "learning_rate": 0.0009248255955427512, "loss": 2.4857, "step": 428900 }, { "epoch": 0.8544840940966467, "grad_norm": 0.18900655210018158, "learning_rate": 0.0009246395952983948, "loss": 2.4731, "step": 428910 }, { "epoch": 0.8545040163202856, "grad_norm": 0.15735085308551788, "learning_rate": 0.0009244536272200999, "loss": 2.486, "step": 428920 }, { "epoch": 0.8545239385439245, "grad_norm": 0.16509826481342316, "learning_rate": 0.0009242676912911838, "loss": 2.4926, "step": 428930 }, { "epoch": 0.8545438607675634, "grad_norm": 0.1793270856142044, "learning_rate": 0.0009240817874949787, "loss": 2.4889, "step": 428940 }, { "epoch": 0.8545637829912024, "grad_norm": 0.16713117063045502, "learning_rate": 0.0009238959158148317, "loss": 2.4834, "step": 428950 }, { "epoch": 0.8545837052148413, "grad_norm": 0.16976633667945862, "learning_rate": 0.0009237100762341031, "loss": 2.4979, "step": 428960 }, { "epoch": 0.8546036274384802, "grad_norm": 0.1592726707458496, "learning_rate": 0.000923524268736169, "loss": 2.4859, "step": 428970 }, { "epoch": 0.8546235496621191, "grad_norm": 0.2082885205745697, "learning_rate": 0.0009233384933044185, "loss": 2.4801, "step": 428980 }, { "epoch": 0.854643471885758, "grad_norm": 0.1810307502746582, "learning_rate": 0.0009231527499222556, "loss": 2.4919, "step": 428990 }, { "epoch": 0.854663394109397, "grad_norm": 0.19941651821136475, "learning_rate": 0.0009229670385730993, "loss": 2.5052, "step": 429000 }, { "epoch": 0.8546833163330358, "grad_norm": 0.1802702397108078, "learning_rate": 0.0009227813592403815, "loss": 2.4772, "step": 429010 }, { "epoch": 0.8547032385566747, "grad_norm": 0.22204262018203735, "learning_rate": 0.0009225957119075494, "loss": 2.4933, "step": 429020 }, { "epoch": 0.8547231607803136, "grad_norm": 0.1685098111629486, "learning_rate": 0.0009224100965580644, "loss": 2.5005, "step": 429030 }, { "epoch": 0.8547430830039525, "grad_norm": 0.1995270848274231, "learning_rate": 0.0009222245131754017, "loss": 2.4911, "step": 429040 }, { "epoch": 0.8547630052275915, "grad_norm": 0.1891176849603653, "learning_rate": 0.0009220389617430509, "loss": 2.4741, "step": 429050 }, { "epoch": 0.8547829274512304, "grad_norm": 0.1624685376882553, "learning_rate": 0.0009218534422445155, "loss": 2.4965, "step": 429060 }, { "epoch": 0.8548028496748693, "grad_norm": 0.19421754777431488, "learning_rate": 0.0009216679546633143, "loss": 2.5018, "step": 429070 }, { "epoch": 0.8548227718985082, "grad_norm": 0.18920958042144775, "learning_rate": 0.0009214824989829789, "loss": 2.4899, "step": 429080 }, { "epoch": 0.8548426941221471, "grad_norm": 0.17783035337924957, "learning_rate": 0.000921297075187056, "loss": 2.4774, "step": 429090 }, { "epoch": 0.8548626163457861, "grad_norm": 0.16921676695346832, "learning_rate": 0.0009211116832591057, "loss": 2.4904, "step": 429100 }, { "epoch": 0.854882538569425, "grad_norm": 0.1547461301088333, "learning_rate": 0.0009209263231827032, "loss": 2.4733, "step": 429110 }, { "epoch": 0.8549024607930639, "grad_norm": 0.18204212188720703, "learning_rate": 0.0009207409949414367, "loss": 2.4761, "step": 429120 }, { "epoch": 0.8549223830167028, "grad_norm": 0.17001239955425262, "learning_rate": 0.0009205556985189091, "loss": 2.4901, "step": 429130 }, { "epoch": 0.8549423052403418, "grad_norm": 0.1963324397802353, "learning_rate": 0.0009203704338987378, "loss": 2.4887, "step": 429140 }, { "epoch": 0.8549622274639807, "grad_norm": 0.16558219492435455, "learning_rate": 0.000920185201064553, "loss": 2.5, "step": 429150 }, { "epoch": 0.8549821496876195, "grad_norm": 0.16809403896331787, "learning_rate": 0.0009199999999999999, "loss": 2.4879, "step": 429160 }, { "epoch": 0.8550020719112584, "grad_norm": 0.1958526223897934, "learning_rate": 0.000919814830688738, "loss": 2.4743, "step": 429170 }, { "epoch": 0.8550219941348973, "grad_norm": 0.22561819851398468, "learning_rate": 0.0009196296931144397, "loss": 2.4814, "step": 429180 }, { "epoch": 0.8550419163585363, "grad_norm": 0.1980932205915451, "learning_rate": 0.0009194445872607921, "loss": 2.4871, "step": 429190 }, { "epoch": 0.8550618385821752, "grad_norm": 0.17196299135684967, "learning_rate": 0.0009192595131114965, "loss": 2.4772, "step": 429200 }, { "epoch": 0.8550817608058141, "grad_norm": 0.18293990194797516, "learning_rate": 0.0009190744706502672, "loss": 2.4835, "step": 429210 }, { "epoch": 0.855101683029453, "grad_norm": 0.18413861095905304, "learning_rate": 0.0009188894598608338, "loss": 2.4816, "step": 429220 }, { "epoch": 0.8551216052530919, "grad_norm": 0.1923416554927826, "learning_rate": 0.0009187044807269385, "loss": 2.5019, "step": 429230 }, { "epoch": 0.8551415274767309, "grad_norm": 0.1760616898536682, "learning_rate": 0.0009185195332323382, "loss": 2.5045, "step": 429240 }, { "epoch": 0.8551614497003698, "grad_norm": 0.1920831948518753, "learning_rate": 0.0009183346173608034, "loss": 2.474, "step": 429250 }, { "epoch": 0.8551813719240087, "grad_norm": 0.16693271696567535, "learning_rate": 0.0009181497330961184, "loss": 2.487, "step": 429260 }, { "epoch": 0.8552012941476476, "grad_norm": 0.17581705749034882, "learning_rate": 0.0009179648804220817, "loss": 2.4994, "step": 429270 }, { "epoch": 0.8552212163712865, "grad_norm": 0.1861657202243805, "learning_rate": 0.0009177800593225054, "loss": 2.4695, "step": 429280 }, { "epoch": 0.8552411385949255, "grad_norm": 0.18706022202968597, "learning_rate": 0.0009175952697812154, "loss": 2.4938, "step": 429290 }, { "epoch": 0.8552610608185643, "grad_norm": 0.17061476409435272, "learning_rate": 0.0009174105117820514, "loss": 2.4625, "step": 429300 }, { "epoch": 0.8552809830422032, "grad_norm": 0.1709502786397934, "learning_rate": 0.0009172257853088668, "loss": 2.481, "step": 429310 }, { "epoch": 0.8553009052658421, "grad_norm": 0.16767361760139465, "learning_rate": 0.0009170410903455293, "loss": 2.4792, "step": 429320 }, { "epoch": 0.855320827489481, "grad_norm": 0.15226256847381592, "learning_rate": 0.0009168564268759197, "loss": 2.493, "step": 429330 }, { "epoch": 0.85534074971312, "grad_norm": 0.17799536883831024, "learning_rate": 0.0009166717948839327, "loss": 2.4836, "step": 429340 }, { "epoch": 0.8553606719367589, "grad_norm": 0.1912429928779602, "learning_rate": 0.0009164871943534769, "loss": 2.4964, "step": 429350 }, { "epoch": 0.8553805941603978, "grad_norm": 0.1687937080860138, "learning_rate": 0.0009163026252684747, "loss": 2.4866, "step": 429360 }, { "epoch": 0.8554005163840367, "grad_norm": 0.16060778498649597, "learning_rate": 0.0009161180876128619, "loss": 2.4669, "step": 429370 }, { "epoch": 0.8554204386076756, "grad_norm": 0.1667313277721405, "learning_rate": 0.0009159335813705878, "loss": 2.4831, "step": 429380 }, { "epoch": 0.8554403608313146, "grad_norm": 0.17617365717887878, "learning_rate": 0.000915749106525616, "loss": 2.4989, "step": 429390 }, { "epoch": 0.8554602830549535, "grad_norm": 0.17735005915164948, "learning_rate": 0.0009155646630619234, "loss": 2.4944, "step": 429400 }, { "epoch": 0.8554802052785924, "grad_norm": 0.15917950868606567, "learning_rate": 0.0009153802509635001, "loss": 2.4774, "step": 429410 }, { "epoch": 0.8555001275022313, "grad_norm": 0.18107931315898895, "learning_rate": 0.0009151958702143506, "loss": 2.5028, "step": 429420 }, { "epoch": 0.8555200497258703, "grad_norm": 0.17587798833847046, "learning_rate": 0.0009150115207984926, "loss": 2.4795, "step": 429430 }, { "epoch": 0.8555399719495091, "grad_norm": 0.18907375633716583, "learning_rate": 0.0009148272026999571, "loss": 2.4883, "step": 429440 }, { "epoch": 0.855559894173148, "grad_norm": 0.16694247722625732, "learning_rate": 0.0009146429159027892, "loss": 2.4918, "step": 429450 }, { "epoch": 0.8555798163967869, "grad_norm": 0.17300453782081604, "learning_rate": 0.0009144586603910472, "loss": 2.4884, "step": 429460 }, { "epoch": 0.8555997386204258, "grad_norm": 0.23766884207725525, "learning_rate": 0.0009142744361488029, "loss": 2.4935, "step": 429470 }, { "epoch": 0.8556196608440648, "grad_norm": 0.16685368120670319, "learning_rate": 0.0009140902431601419, "loss": 2.4949, "step": 429480 }, { "epoch": 0.8556395830677037, "grad_norm": 0.17725887894630432, "learning_rate": 0.0009139060814091629, "loss": 2.4834, "step": 429490 }, { "epoch": 0.8556595052913426, "grad_norm": 0.18405839800834656, "learning_rate": 0.0009137219508799785, "loss": 2.4859, "step": 429500 }, { "epoch": 0.8556794275149815, "grad_norm": 0.16121532022953033, "learning_rate": 0.0009135378515567145, "loss": 2.4871, "step": 429510 }, { "epoch": 0.8556993497386204, "grad_norm": 0.1722608357667923, "learning_rate": 0.00091335378342351, "loss": 2.4836, "step": 429520 }, { "epoch": 0.8557192719622594, "grad_norm": 0.17154167592525482, "learning_rate": 0.0009131697464645182, "loss": 2.4966, "step": 429530 }, { "epoch": 0.8557391941858983, "grad_norm": 0.18487755954265594, "learning_rate": 0.0009129857406639046, "loss": 2.5034, "step": 429540 }, { "epoch": 0.8557591164095372, "grad_norm": 0.17320941388607025, "learning_rate": 0.0009128017660058494, "loss": 2.4744, "step": 429550 }, { "epoch": 0.8557790386331761, "grad_norm": 0.17329873144626617, "learning_rate": 0.0009126178224745452, "loss": 2.4841, "step": 429560 }, { "epoch": 0.855798960856815, "grad_norm": 0.1752997785806656, "learning_rate": 0.000912433910054198, "loss": 2.4725, "step": 429570 }, { "epoch": 0.855818883080454, "grad_norm": 0.23834256827831268, "learning_rate": 0.0009122500287290282, "loss": 2.4883, "step": 429580 }, { "epoch": 0.8558388053040928, "grad_norm": 0.1657237857580185, "learning_rate": 0.0009120661784832682, "loss": 2.4687, "step": 429590 }, { "epoch": 0.8558587275277317, "grad_norm": 0.16867539286613464, "learning_rate": 0.0009118823593011645, "loss": 2.4805, "step": 429600 }, { "epoch": 0.8558786497513706, "grad_norm": 0.1766015887260437, "learning_rate": 0.0009116985711669767, "loss": 2.4828, "step": 429610 }, { "epoch": 0.8558985719750095, "grad_norm": 0.1779145747423172, "learning_rate": 0.0009115148140649778, "loss": 2.4767, "step": 429620 }, { "epoch": 0.8559184941986485, "grad_norm": 0.16336889564990997, "learning_rate": 0.0009113310879794536, "loss": 2.4789, "step": 429630 }, { "epoch": 0.8559384164222874, "grad_norm": 0.16910576820373535, "learning_rate": 0.000911147392894704, "loss": 2.5026, "step": 429640 }, { "epoch": 0.8559583386459263, "grad_norm": 0.19497530162334442, "learning_rate": 0.0009109637287950414, "loss": 2.4821, "step": 429650 }, { "epoch": 0.8559782608695652, "grad_norm": 0.1830938160419464, "learning_rate": 0.0009107800956647918, "loss": 2.4979, "step": 429660 }, { "epoch": 0.8559981830932041, "grad_norm": 0.1826339215040207, "learning_rate": 0.0009105964934882942, "loss": 2.4817, "step": 429670 }, { "epoch": 0.8560181053168431, "grad_norm": 0.18584243953227997, "learning_rate": 0.0009104129222499011, "loss": 2.4865, "step": 429680 }, { "epoch": 0.856038027540482, "grad_norm": 0.16575315594673157, "learning_rate": 0.0009102293819339777, "loss": 2.4832, "step": 429690 }, { "epoch": 0.8560579497641209, "grad_norm": 0.1822475790977478, "learning_rate": 0.000910045872524903, "loss": 2.487, "step": 429700 }, { "epoch": 0.8560778719877598, "grad_norm": 0.19272011518478394, "learning_rate": 0.0009098623940070685, "loss": 2.4872, "step": 429710 }, { "epoch": 0.8560977942113988, "grad_norm": 0.16312666237354279, "learning_rate": 0.0009096789463648792, "loss": 2.4911, "step": 429720 }, { "epoch": 0.8561177164350376, "grad_norm": 0.1641789823770523, "learning_rate": 0.0009094955295827532, "loss": 2.4766, "step": 429730 }, { "epoch": 0.8561376386586765, "grad_norm": 0.17674149572849274, "learning_rate": 0.0009093121436451215, "loss": 2.4822, "step": 429740 }, { "epoch": 0.8561575608823154, "grad_norm": 0.16721342504024506, "learning_rate": 0.0009091287885364286, "loss": 2.4719, "step": 429750 }, { "epoch": 0.8561774831059543, "grad_norm": 0.17043954133987427, "learning_rate": 0.0009089454642411316, "loss": 2.4905, "step": 429760 }, { "epoch": 0.8561974053295933, "grad_norm": 0.1930563896894455, "learning_rate": 0.0009087621707437008, "loss": 2.4737, "step": 429770 }, { "epoch": 0.8562173275532322, "grad_norm": 0.1784306764602661, "learning_rate": 0.0009085789080286198, "loss": 2.4799, "step": 429780 }, { "epoch": 0.8562372497768711, "grad_norm": 0.17327652871608734, "learning_rate": 0.0009083956760803849, "loss": 2.478, "step": 429790 }, { "epoch": 0.85625717200051, "grad_norm": 0.17682604491710663, "learning_rate": 0.0009082124748835057, "loss": 2.4857, "step": 429800 }, { "epoch": 0.8562770942241489, "grad_norm": 0.1944049894809723, "learning_rate": 0.0009080293044225043, "loss": 2.4927, "step": 429810 }, { "epoch": 0.8562970164477879, "grad_norm": 0.16744385659694672, "learning_rate": 0.0009078461646819162, "loss": 2.4688, "step": 429820 }, { "epoch": 0.8563169386714268, "grad_norm": 0.17371860146522522, "learning_rate": 0.0009076630556462902, "loss": 2.4817, "step": 429830 }, { "epoch": 0.8563368608950657, "grad_norm": 0.1775260865688324, "learning_rate": 0.000907479977300187, "loss": 2.4832, "step": 429840 }, { "epoch": 0.8563567831187046, "grad_norm": 0.17564614117145538, "learning_rate": 0.0009072969296281812, "loss": 2.4871, "step": 429850 }, { "epoch": 0.8563767053423434, "grad_norm": 0.18302038311958313, "learning_rate": 0.00090711391261486, "loss": 2.4774, "step": 429860 }, { "epoch": 0.8563966275659824, "grad_norm": 0.17575888335704803, "learning_rate": 0.0009069309262448233, "loss": 2.4903, "step": 429870 }, { "epoch": 0.8564165497896213, "grad_norm": 0.15749144554138184, "learning_rate": 0.0009067479705026842, "loss": 2.4855, "step": 429880 }, { "epoch": 0.8564364720132602, "grad_norm": 0.16605402529239655, "learning_rate": 0.0009065650453730685, "loss": 2.4983, "step": 429890 }, { "epoch": 0.8564563942368991, "grad_norm": 0.17049892246723175, "learning_rate": 0.0009063821508406147, "loss": 2.4805, "step": 429900 }, { "epoch": 0.856476316460538, "grad_norm": 0.18014401197433472, "learning_rate": 0.0009061992868899747, "loss": 2.4819, "step": 429910 }, { "epoch": 0.856496238684177, "grad_norm": 0.15692582726478577, "learning_rate": 0.0009060164535058126, "loss": 2.4881, "step": 429920 }, { "epoch": 0.8565161609078159, "grad_norm": 0.18206587433815002, "learning_rate": 0.0009058336506728056, "loss": 2.4828, "step": 429930 }, { "epoch": 0.8565360831314548, "grad_norm": 0.18349653482437134, "learning_rate": 0.0009056508783756438, "loss": 2.4603, "step": 429940 }, { "epoch": 0.8565560053550937, "grad_norm": 0.18106135725975037, "learning_rate": 0.0009054681365990298, "loss": 2.4922, "step": 429950 }, { "epoch": 0.8565759275787326, "grad_norm": 0.18742692470550537, "learning_rate": 0.0009052854253276795, "loss": 2.477, "step": 429960 }, { "epoch": 0.8565958498023716, "grad_norm": 0.17987792193889618, "learning_rate": 0.0009051027445463205, "loss": 2.4901, "step": 429970 }, { "epoch": 0.8566157720260105, "grad_norm": 0.1712563931941986, "learning_rate": 0.0009049200942396942, "loss": 2.4867, "step": 429980 }, { "epoch": 0.8566356942496494, "grad_norm": 0.16554208099842072, "learning_rate": 0.0009047374743925547, "loss": 2.4895, "step": 429990 }, { "epoch": 0.8566556164732883, "grad_norm": 0.1739846020936966, "learning_rate": 0.0009045548849896678, "loss": 2.4887, "step": 430000 }, { "epoch": 0.8566755386969271, "grad_norm": 0.15681256353855133, "learning_rate": 0.0009043723260158129, "loss": 2.4845, "step": 430010 }, { "epoch": 0.8566954609205661, "grad_norm": 0.16573551297187805, "learning_rate": 0.000904189797455782, "loss": 2.4869, "step": 430020 }, { "epoch": 0.856715383144205, "grad_norm": 0.1780211329460144, "learning_rate": 0.0009040072992943795, "loss": 2.4766, "step": 430030 }, { "epoch": 0.8567353053678439, "grad_norm": 0.158983513712883, "learning_rate": 0.0009038248315164224, "loss": 2.4707, "step": 430040 }, { "epoch": 0.8567552275914828, "grad_norm": 0.19725483655929565, "learning_rate": 0.0009036423941067405, "loss": 2.4853, "step": 430050 }, { "epoch": 0.8567751498151218, "grad_norm": 0.19329674541950226, "learning_rate": 0.0009034599870501761, "loss": 2.4719, "step": 430060 }, { "epoch": 0.8567950720387607, "grad_norm": 0.16473405063152313, "learning_rate": 0.0009032776103315843, "loss": 2.4758, "step": 430070 }, { "epoch": 0.8568149942623996, "grad_norm": 0.1902613788843155, "learning_rate": 0.000903095263935833, "loss": 2.4924, "step": 430080 }, { "epoch": 0.8568349164860385, "grad_norm": 0.17700903117656708, "learning_rate": 0.0009029129478478017, "loss": 2.4889, "step": 430090 }, { "epoch": 0.8568548387096774, "grad_norm": 0.18201427161693573, "learning_rate": 0.0009027306620523839, "loss": 2.4812, "step": 430100 }, { "epoch": 0.8568747609333164, "grad_norm": 0.18885765969753265, "learning_rate": 0.0009025484065344842, "loss": 2.4926, "step": 430110 }, { "epoch": 0.8568946831569553, "grad_norm": 0.15452547371387482, "learning_rate": 0.0009023661812790205, "loss": 2.4766, "step": 430120 }, { "epoch": 0.8569146053805942, "grad_norm": 0.19219006597995758, "learning_rate": 0.0009021839862709233, "loss": 2.4825, "step": 430130 }, { "epoch": 0.856934527604233, "grad_norm": 0.16506581008434296, "learning_rate": 0.0009020018214951355, "loss": 2.4772, "step": 430140 }, { "epoch": 0.856954449827872, "grad_norm": 0.20598775148391724, "learning_rate": 0.0009018196869366124, "loss": 2.4884, "step": 430150 }, { "epoch": 0.856974372051511, "grad_norm": 0.16464678943157196, "learning_rate": 0.0009016375825803216, "loss": 2.4803, "step": 430160 }, { "epoch": 0.8569942942751498, "grad_norm": 0.17978787422180176, "learning_rate": 0.0009014555084112432, "loss": 2.4747, "step": 430170 }, { "epoch": 0.8570142164987887, "grad_norm": 0.16087374091148376, "learning_rate": 0.0009012734644143703, "loss": 2.481, "step": 430180 }, { "epoch": 0.8570341387224276, "grad_norm": 0.1717057079076767, "learning_rate": 0.0009010914505747077, "loss": 2.4911, "step": 430190 }, { "epoch": 0.8570540609460665, "grad_norm": 0.1806596964597702, "learning_rate": 0.000900909466877273, "loss": 2.4864, "step": 430200 }, { "epoch": 0.8570739831697055, "grad_norm": 0.1750420331954956, "learning_rate": 0.0009007275133070964, "loss": 2.491, "step": 430210 }, { "epoch": 0.8570939053933444, "grad_norm": 0.17202720046043396, "learning_rate": 0.0009005455898492198, "loss": 2.4815, "step": 430220 }, { "epoch": 0.8571138276169833, "grad_norm": 0.16959771513938904, "learning_rate": 0.000900363696488698, "loss": 2.4933, "step": 430230 }, { "epoch": 0.8571337498406222, "grad_norm": 0.17964188754558563, "learning_rate": 0.0009001818332105984, "loss": 2.4771, "step": 430240 }, { "epoch": 0.8571536720642611, "grad_norm": 0.18320873379707336, "learning_rate": 0.0009, "loss": 2.4896, "step": 430250 }, { "epoch": 0.8571735942879001, "grad_norm": 0.2159167230129242, "learning_rate": 0.0008998181968419948, "loss": 2.4977, "step": 430260 }, { "epoch": 0.857193516511539, "grad_norm": 0.17564786970615387, "learning_rate": 0.0008996364237216863, "loss": 2.4833, "step": 430270 }, { "epoch": 0.8572134387351779, "grad_norm": 0.170196995139122, "learning_rate": 0.0008994546806241917, "loss": 2.4837, "step": 430280 }, { "epoch": 0.8572333609588167, "grad_norm": 0.1992051750421524, "learning_rate": 0.0008992729675346389, "loss": 2.472, "step": 430290 }, { "epoch": 0.8572532831824556, "grad_norm": 0.17175868153572083, "learning_rate": 0.0008990912844381694, "loss": 2.4716, "step": 430300 }, { "epoch": 0.8572732054060946, "grad_norm": 0.1999960094690323, "learning_rate": 0.0008989096313199356, "loss": 2.4861, "step": 430310 }, { "epoch": 0.8572931276297335, "grad_norm": 0.16209296882152557, "learning_rate": 0.0008987280081651037, "loss": 2.492, "step": 430320 }, { "epoch": 0.8573130498533724, "grad_norm": 0.17649289965629578, "learning_rate": 0.0008985464149588509, "loss": 2.4978, "step": 430330 }, { "epoch": 0.8573329720770113, "grad_norm": 0.1539132446050644, "learning_rate": 0.0008983648516863672, "loss": 2.4821, "step": 430340 }, { "epoch": 0.8573528943006503, "grad_norm": 0.18182732164859772, "learning_rate": 0.0008981833183328545, "loss": 2.4913, "step": 430350 }, { "epoch": 0.8573728165242892, "grad_norm": 0.1670544445514679, "learning_rate": 0.0008980018148835271, "loss": 2.4781, "step": 430360 }, { "epoch": 0.8573927387479281, "grad_norm": 0.2032182663679123, "learning_rate": 0.0008978203413236116, "loss": 2.48, "step": 430370 }, { "epoch": 0.857412660971567, "grad_norm": 0.17332616448402405, "learning_rate": 0.0008976388976383465, "loss": 2.4769, "step": 430380 }, { "epoch": 0.8574325831952059, "grad_norm": 0.18300101161003113, "learning_rate": 0.0008974574838129823, "loss": 2.4907, "step": 430390 }, { "epoch": 0.8574525054188449, "grad_norm": 0.16488873958587646, "learning_rate": 0.0008972760998327823, "loss": 2.4855, "step": 430400 }, { "epoch": 0.8574724276424838, "grad_norm": 0.17362836003303528, "learning_rate": 0.0008970947456830212, "loss": 2.4837, "step": 430410 }, { "epoch": 0.8574923498661227, "grad_norm": 0.15410593152046204, "learning_rate": 0.0008969134213489858, "loss": 2.4813, "step": 430420 }, { "epoch": 0.8575122720897616, "grad_norm": 0.17907071113586426, "learning_rate": 0.0008967321268159758, "loss": 2.4724, "step": 430430 }, { "epoch": 0.8575321943134004, "grad_norm": 0.2146998643875122, "learning_rate": 0.0008965508620693024, "loss": 2.4815, "step": 430440 }, { "epoch": 0.8575521165370394, "grad_norm": 0.16668324172496796, "learning_rate": 0.0008963696270942884, "loss": 2.4919, "step": 430450 }, { "epoch": 0.8575720387606783, "grad_norm": 0.19114360213279724, "learning_rate": 0.0008961884218762697, "loss": 2.4962, "step": 430460 }, { "epoch": 0.8575919609843172, "grad_norm": 0.18700091540813446, "learning_rate": 0.0008960072464005935, "loss": 2.4814, "step": 430470 }, { "epoch": 0.8576118832079561, "grad_norm": 0.20049938559532166, "learning_rate": 0.0008958261006526191, "loss": 2.4944, "step": 430480 }, { "epoch": 0.857631805431595, "grad_norm": 0.18459108471870422, "learning_rate": 0.0008956449846177181, "loss": 2.4969, "step": 430490 }, { "epoch": 0.857651727655234, "grad_norm": 0.1854393035173416, "learning_rate": 0.0008954638982812739, "loss": 2.489, "step": 430500 }, { "epoch": 0.8576716498788729, "grad_norm": 0.16791683435440063, "learning_rate": 0.000895282841628682, "loss": 2.4759, "step": 430510 }, { "epoch": 0.8576915721025118, "grad_norm": 0.1913970410823822, "learning_rate": 0.0008951018146453494, "loss": 2.5062, "step": 430520 }, { "epoch": 0.8577114943261507, "grad_norm": 0.17347365617752075, "learning_rate": 0.0008949208173166957, "loss": 2.4888, "step": 430530 }, { "epoch": 0.8577314165497896, "grad_norm": 0.17145861685276031, "learning_rate": 0.000894739849628152, "loss": 2.4958, "step": 430540 }, { "epoch": 0.8577513387734286, "grad_norm": 0.19069530069828033, "learning_rate": 0.0008945589115651617, "loss": 2.4623, "step": 430550 }, { "epoch": 0.8577712609970675, "grad_norm": 0.24288736283779144, "learning_rate": 0.0008943780031131799, "loss": 2.486, "step": 430560 }, { "epoch": 0.8577911832207064, "grad_norm": 0.17170719802379608, "learning_rate": 0.0008941971242576732, "loss": 2.4837, "step": 430570 }, { "epoch": 0.8578111054443452, "grad_norm": 0.2501211166381836, "learning_rate": 0.0008940162749841207, "loss": 2.489, "step": 430580 }, { "epoch": 0.8578310276679841, "grad_norm": 0.18560442328453064, "learning_rate": 0.0008938354552780135, "loss": 2.4776, "step": 430590 }, { "epoch": 0.8578509498916231, "grad_norm": 0.15662522614002228, "learning_rate": 0.0008936546651248537, "loss": 2.499, "step": 430600 }, { "epoch": 0.857870872115262, "grad_norm": 0.1772615760564804, "learning_rate": 0.0008934739045101559, "loss": 2.4732, "step": 430610 }, { "epoch": 0.8578907943389009, "grad_norm": 0.19283439218997955, "learning_rate": 0.0008932931734194463, "loss": 2.4851, "step": 430620 }, { "epoch": 0.8579107165625398, "grad_norm": 0.17564955353736877, "learning_rate": 0.0008931124718382631, "loss": 2.4857, "step": 430630 }, { "epoch": 0.8579306387861788, "grad_norm": 0.17306694388389587, "learning_rate": 0.0008929317997521562, "loss": 2.4776, "step": 430640 }, { "epoch": 0.8579505610098177, "grad_norm": 0.16126161813735962, "learning_rate": 0.0008927511571466873, "loss": 2.4716, "step": 430650 }, { "epoch": 0.8579704832334566, "grad_norm": 0.19988228380680084, "learning_rate": 0.0008925705440074299, "loss": 2.4767, "step": 430660 }, { "epoch": 0.8579904054570955, "grad_norm": 0.17603699862957, "learning_rate": 0.000892389960319969, "loss": 2.4776, "step": 430670 }, { "epoch": 0.8580103276807344, "grad_norm": 0.20323406159877777, "learning_rate": 0.0008922094060699017, "loss": 2.4907, "step": 430680 }, { "epoch": 0.8580302499043734, "grad_norm": 0.17278984189033508, "learning_rate": 0.0008920288812428367, "loss": 2.4713, "step": 430690 }, { "epoch": 0.8580501721280123, "grad_norm": 0.19227179884910583, "learning_rate": 0.0008918483858243947, "loss": 2.4825, "step": 430700 }, { "epoch": 0.8580700943516512, "grad_norm": 0.1814562827348709, "learning_rate": 0.0008916679198002071, "loss": 2.499, "step": 430710 }, { "epoch": 0.85809001657529, "grad_norm": 0.1677551418542862, "learning_rate": 0.0008914874831559185, "loss": 2.4805, "step": 430720 }, { "epoch": 0.8581099387989289, "grad_norm": 0.18468286097049713, "learning_rate": 0.0008913070758771841, "loss": 2.48, "step": 430730 }, { "epoch": 0.8581298610225679, "grad_norm": 0.17168790102005005, "learning_rate": 0.000891126697949671, "loss": 2.4804, "step": 430740 }, { "epoch": 0.8581497832462068, "grad_norm": 0.1627034842967987, "learning_rate": 0.0008909463493590584, "loss": 2.4807, "step": 430750 }, { "epoch": 0.8581697054698457, "grad_norm": 0.18949837982654572, "learning_rate": 0.0008907660300910363, "loss": 2.4726, "step": 430760 }, { "epoch": 0.8581896276934846, "grad_norm": 0.17128720879554749, "learning_rate": 0.0008905857401313071, "loss": 2.4971, "step": 430770 }, { "epoch": 0.8582095499171235, "grad_norm": 0.18092800676822662, "learning_rate": 0.0008904054794655842, "loss": 2.4698, "step": 430780 }, { "epoch": 0.8582294721407625, "grad_norm": 0.16776351630687714, "learning_rate": 0.0008902252480795933, "loss": 2.484, "step": 430790 }, { "epoch": 0.8582493943644014, "grad_norm": 0.18261565268039703, "learning_rate": 0.0008900450459590714, "loss": 2.469, "step": 430800 }, { "epoch": 0.8582693165880403, "grad_norm": 0.1619436889886856, "learning_rate": 0.0008898648730897667, "loss": 2.4796, "step": 430810 }, { "epoch": 0.8582892388116792, "grad_norm": 0.16075262427330017, "learning_rate": 0.0008896847294574393, "loss": 2.4738, "step": 430820 }, { "epoch": 0.8583091610353181, "grad_norm": 0.2008861005306244, "learning_rate": 0.0008895046150478608, "loss": 2.4796, "step": 430830 }, { "epoch": 0.8583290832589571, "grad_norm": 0.16405512392520905, "learning_rate": 0.0008893245298468145, "loss": 2.4714, "step": 430840 }, { "epoch": 0.858349005482596, "grad_norm": 0.18343837559223175, "learning_rate": 0.0008891444738400947, "loss": 2.4787, "step": 430850 }, { "epoch": 0.8583689277062349, "grad_norm": 0.17827238142490387, "learning_rate": 0.0008889644470135081, "loss": 2.4738, "step": 430860 }, { "epoch": 0.8583888499298737, "grad_norm": 0.17109279334545135, "learning_rate": 0.0008887844493528721, "loss": 2.4812, "step": 430870 }, { "epoch": 0.8584087721535126, "grad_norm": 0.14844460785388947, "learning_rate": 0.0008886044808440157, "loss": 2.486, "step": 430880 }, { "epoch": 0.8584286943771516, "grad_norm": 0.17169630527496338, "learning_rate": 0.0008884245414727797, "loss": 2.5012, "step": 430890 }, { "epoch": 0.8584486166007905, "grad_norm": 0.43179628252983093, "learning_rate": 0.0008882446312250163, "loss": 2.4755, "step": 430900 }, { "epoch": 0.8584685388244294, "grad_norm": 0.18411707878112793, "learning_rate": 0.000888064750086589, "loss": 2.488, "step": 430910 }, { "epoch": 0.8584884610480683, "grad_norm": 0.16277331113815308, "learning_rate": 0.0008878848980433724, "loss": 2.4758, "step": 430920 }, { "epoch": 0.8585083832717073, "grad_norm": 0.19611023366451263, "learning_rate": 0.0008877050750812534, "loss": 2.4821, "step": 430930 }, { "epoch": 0.8585283054953462, "grad_norm": 0.17514747381210327, "learning_rate": 0.0008875252811861296, "loss": 2.4845, "step": 430940 }, { "epoch": 0.8585482277189851, "grad_norm": 0.17063502967357635, "learning_rate": 0.0008873455163439101, "loss": 2.486, "step": 430950 }, { "epoch": 0.858568149942624, "grad_norm": 0.18884538114070892, "learning_rate": 0.0008871657805405156, "loss": 2.4777, "step": 430960 }, { "epoch": 0.8585880721662629, "grad_norm": 0.18773208558559418, "learning_rate": 0.000886986073761878, "loss": 2.4791, "step": 430970 }, { "epoch": 0.8586079943899019, "grad_norm": 0.18767346441745758, "learning_rate": 0.0008868063959939403, "loss": 2.4841, "step": 430980 }, { "epoch": 0.8586279166135408, "grad_norm": 0.19588561356067657, "learning_rate": 0.0008866267472226577, "loss": 2.4778, "step": 430990 }, { "epoch": 0.8586478388371797, "grad_norm": 0.1935880184173584, "learning_rate": 0.0008864471274339956, "loss": 2.4719, "step": 431000 }, { "epoch": 0.8586677610608185, "grad_norm": 0.1650254726409912, "learning_rate": 0.0008862675366139317, "loss": 2.4801, "step": 431010 }, { "epoch": 0.8586876832844574, "grad_norm": 0.16666698455810547, "learning_rate": 0.0008860879747484545, "loss": 2.4882, "step": 431020 }, { "epoch": 0.8587076055080964, "grad_norm": 0.17900805175304413, "learning_rate": 0.0008859084418235637, "loss": 2.4833, "step": 431030 }, { "epoch": 0.8587275277317353, "grad_norm": 0.18754447996616364, "learning_rate": 0.0008857289378252705, "loss": 2.4928, "step": 431040 }, { "epoch": 0.8587474499553742, "grad_norm": 0.21252331137657166, "learning_rate": 0.0008855494627395974, "loss": 2.5083, "step": 431050 }, { "epoch": 0.8587673721790131, "grad_norm": 0.1786474883556366, "learning_rate": 0.0008853700165525782, "loss": 2.4862, "step": 431060 }, { "epoch": 0.858787294402652, "grad_norm": 0.18619468808174133, "learning_rate": 0.0008851905992502576, "loss": 2.472, "step": 431070 }, { "epoch": 0.858807216626291, "grad_norm": 0.21559444069862366, "learning_rate": 0.0008850112108186916, "loss": 2.4967, "step": 431080 }, { "epoch": 0.8588271388499299, "grad_norm": 0.1913028508424759, "learning_rate": 0.0008848318512439481, "loss": 2.4926, "step": 431090 }, { "epoch": 0.8588470610735688, "grad_norm": 0.1898999810218811, "learning_rate": 0.0008846525205121052, "loss": 2.4839, "step": 431100 }, { "epoch": 0.8588669832972077, "grad_norm": 0.17358915507793427, "learning_rate": 0.0008844732186092528, "loss": 2.5002, "step": 431110 }, { "epoch": 0.8588869055208466, "grad_norm": 0.1737835705280304, "learning_rate": 0.0008842939455214918, "loss": 2.4753, "step": 431120 }, { "epoch": 0.8589068277444856, "grad_norm": 0.18818725645542145, "learning_rate": 0.0008841147012349342, "loss": 2.4642, "step": 431130 }, { "epoch": 0.8589267499681245, "grad_norm": 0.1568625569343567, "learning_rate": 0.0008839354857357036, "loss": 2.4924, "step": 431140 }, { "epoch": 0.8589466721917633, "grad_norm": 0.18042483925819397, "learning_rate": 0.000883756299009934, "loss": 2.4777, "step": 431150 }, { "epoch": 0.8589665944154022, "grad_norm": 0.18021610379219055, "learning_rate": 0.0008835771410437709, "loss": 2.4764, "step": 431160 }, { "epoch": 0.8589865166390411, "grad_norm": 0.1637008786201477, "learning_rate": 0.0008833980118233714, "loss": 2.497, "step": 431170 }, { "epoch": 0.8590064388626801, "grad_norm": 0.19008411467075348, "learning_rate": 0.0008832189113349027, "loss": 2.4713, "step": 431180 }, { "epoch": 0.859026361086319, "grad_norm": 0.18673576414585114, "learning_rate": 0.0008830398395645438, "loss": 2.4849, "step": 431190 }, { "epoch": 0.8590462833099579, "grad_norm": 0.1742667853832245, "learning_rate": 0.0008828607964984847, "loss": 2.4769, "step": 431200 }, { "epoch": 0.8590662055335968, "grad_norm": 0.18811090290546417, "learning_rate": 0.0008826817821229263, "loss": 2.493, "step": 431210 }, { "epoch": 0.8590861277572358, "grad_norm": 0.16650156676769257, "learning_rate": 0.0008825027964240806, "loss": 2.484, "step": 431220 }, { "epoch": 0.8591060499808747, "grad_norm": 0.18260067701339722, "learning_rate": 0.0008823238393881705, "loss": 2.4889, "step": 431230 }, { "epoch": 0.8591259722045136, "grad_norm": 0.17201916873455048, "learning_rate": 0.0008821449110014304, "loss": 2.4821, "step": 431240 }, { "epoch": 0.8591458944281525, "grad_norm": 0.16600361466407776, "learning_rate": 0.0008819660112501051, "loss": 2.4965, "step": 431250 }, { "epoch": 0.8591658166517914, "grad_norm": 0.1831584870815277, "learning_rate": 0.0008817871401204509, "loss": 2.4918, "step": 431260 }, { "epoch": 0.8591857388754304, "grad_norm": 0.252658873796463, "learning_rate": 0.0008816082975987349, "loss": 2.5012, "step": 431270 }, { "epoch": 0.8592056610990693, "grad_norm": 0.1939336508512497, "learning_rate": 0.0008814294836712349, "loss": 2.4692, "step": 431280 }, { "epoch": 0.8592255833227082, "grad_norm": 0.19201123714447021, "learning_rate": 0.0008812506983242403, "loss": 2.4901, "step": 431290 }, { "epoch": 0.859245505546347, "grad_norm": 0.170222669839859, "learning_rate": 0.0008810719415440509, "loss": 2.4834, "step": 431300 }, { "epoch": 0.8592654277699859, "grad_norm": 0.18247251212596893, "learning_rate": 0.0008808932133169776, "loss": 2.4899, "step": 431310 }, { "epoch": 0.8592853499936249, "grad_norm": 0.1700877696275711, "learning_rate": 0.0008807145136293422, "loss": 2.4993, "step": 431320 }, { "epoch": 0.8593052722172638, "grad_norm": 0.18148212134838104, "learning_rate": 0.0008805358424674776, "loss": 2.4942, "step": 431330 }, { "epoch": 0.8593251944409027, "grad_norm": 0.16936707496643066, "learning_rate": 0.0008803571998177276, "loss": 2.49, "step": 431340 }, { "epoch": 0.8593451166645416, "grad_norm": 0.1919940710067749, "learning_rate": 0.0008801785856664466, "loss": 2.4922, "step": 431350 }, { "epoch": 0.8593650388881805, "grad_norm": 0.15979430079460144, "learning_rate": 0.0008800000000000001, "loss": 2.4824, "step": 431360 }, { "epoch": 0.8593849611118195, "grad_norm": 0.19158487021923065, "learning_rate": 0.0008798214428047644, "loss": 2.4906, "step": 431370 }, { "epoch": 0.8594048833354584, "grad_norm": 0.19470666348934174, "learning_rate": 0.0008796429140671265, "loss": 2.4783, "step": 431380 }, { "epoch": 0.8594248055590973, "grad_norm": 0.18234902620315552, "learning_rate": 0.0008794644137734848, "loss": 2.4973, "step": 431390 }, { "epoch": 0.8594447277827362, "grad_norm": 0.17606069147586823, "learning_rate": 0.0008792859419102481, "loss": 2.4821, "step": 431400 }, { "epoch": 0.8594646500063751, "grad_norm": 0.18182364106178284, "learning_rate": 0.0008791074984638358, "loss": 2.4935, "step": 431410 }, { "epoch": 0.8594845722300141, "grad_norm": 0.1775273233652115, "learning_rate": 0.0008789290834206786, "loss": 2.4547, "step": 431420 }, { "epoch": 0.859504494453653, "grad_norm": 0.1741396188735962, "learning_rate": 0.0008787506967672177, "loss": 2.496, "step": 431430 }, { "epoch": 0.8595244166772918, "grad_norm": 0.15719006955623627, "learning_rate": 0.0008785723384899051, "loss": 2.484, "step": 431440 }, { "epoch": 0.8595443389009307, "grad_norm": 0.16766807436943054, "learning_rate": 0.0008783940085752038, "loss": 2.4794, "step": 431450 }, { "epoch": 0.8595642611245696, "grad_norm": 0.16363763809204102, "learning_rate": 0.0008782157070095873, "loss": 2.4898, "step": 431460 }, { "epoch": 0.8595841833482086, "grad_norm": 0.1839616596698761, "learning_rate": 0.0008780374337795401, "loss": 2.4821, "step": 431470 }, { "epoch": 0.8596041055718475, "grad_norm": 0.18532414734363556, "learning_rate": 0.000877859188871557, "loss": 2.4847, "step": 431480 }, { "epoch": 0.8596240277954864, "grad_norm": 0.17738263309001923, "learning_rate": 0.0008776809722721439, "loss": 2.4846, "step": 431490 }, { "epoch": 0.8596439500191253, "grad_norm": 0.17207275331020355, "learning_rate": 0.0008775027839678176, "loss": 2.4953, "step": 431500 }, { "epoch": 0.8596638722427642, "grad_norm": 0.1780313104391098, "learning_rate": 0.000877324623945105, "loss": 2.485, "step": 431510 }, { "epoch": 0.8596837944664032, "grad_norm": 0.1944180727005005, "learning_rate": 0.000877146492190544, "loss": 2.4898, "step": 431520 }, { "epoch": 0.8597037166900421, "grad_norm": 0.17284448444843292, "learning_rate": 0.0008769683886906834, "loss": 2.4806, "step": 431530 }, { "epoch": 0.859723638913681, "grad_norm": 0.1966474950313568, "learning_rate": 0.0008767903134320823, "loss": 2.49, "step": 431540 }, { "epoch": 0.8597435611373199, "grad_norm": 0.1957629770040512, "learning_rate": 0.0008766122664013105, "loss": 2.4797, "step": 431550 }, { "epoch": 0.8597634833609589, "grad_norm": 0.18055808544158936, "learning_rate": 0.0008764342475849489, "loss": 2.496, "step": 431560 }, { "epoch": 0.8597834055845978, "grad_norm": 0.17424450814723969, "learning_rate": 0.0008762562569695885, "loss": 2.4824, "step": 431570 }, { "epoch": 0.8598033278082367, "grad_norm": 0.17610730230808258, "learning_rate": 0.0008760782945418307, "loss": 2.4927, "step": 431580 }, { "epoch": 0.8598232500318755, "grad_norm": 0.1667863130569458, "learning_rate": 0.0008759003602882882, "loss": 2.4814, "step": 431590 }, { "epoch": 0.8598431722555144, "grad_norm": 0.1814669966697693, "learning_rate": 0.0008757224541955844, "loss": 2.4891, "step": 431600 }, { "epoch": 0.8598630944791534, "grad_norm": 0.202549546957016, "learning_rate": 0.0008755445762503522, "loss": 2.4748, "step": 431610 }, { "epoch": 0.8598830167027923, "grad_norm": 0.1993432343006134, "learning_rate": 0.0008753667264392362, "loss": 2.486, "step": 431620 }, { "epoch": 0.8599029389264312, "grad_norm": 0.16709966957569122, "learning_rate": 0.0008751889047488908, "loss": 2.4814, "step": 431630 }, { "epoch": 0.8599228611500701, "grad_norm": 0.20509395003318787, "learning_rate": 0.0008750111111659813, "loss": 2.4859, "step": 431640 }, { "epoch": 0.859942783373709, "grad_norm": 0.20038893818855286, "learning_rate": 0.0008748333456771838, "loss": 2.4922, "step": 431650 }, { "epoch": 0.859962705597348, "grad_norm": 0.16377192735671997, "learning_rate": 0.0008746556082691842, "loss": 2.4734, "step": 431660 }, { "epoch": 0.8599826278209869, "grad_norm": 0.17454363405704498, "learning_rate": 0.0008744778989286795, "loss": 2.477, "step": 431670 }, { "epoch": 0.8600025500446258, "grad_norm": 0.16420488059520721, "learning_rate": 0.0008743002176423768, "loss": 2.4851, "step": 431680 }, { "epoch": 0.8600224722682647, "grad_norm": 0.19094902276992798, "learning_rate": 0.0008741225643969944, "loss": 2.4835, "step": 431690 }, { "epoch": 0.8600423944919036, "grad_norm": 0.18400903046131134, "learning_rate": 0.0008739449391792602, "loss": 2.4742, "step": 431700 }, { "epoch": 0.8600623167155426, "grad_norm": 0.16340269148349762, "learning_rate": 0.0008737673419759131, "loss": 2.4703, "step": 431710 }, { "epoch": 0.8600822389391815, "grad_norm": 0.19227850437164307, "learning_rate": 0.0008735897727737022, "loss": 2.4866, "step": 431720 }, { "epoch": 0.8601021611628203, "grad_norm": 0.16312147676944733, "learning_rate": 0.0008734122315593871, "loss": 2.4792, "step": 431730 }, { "epoch": 0.8601220833864592, "grad_norm": 0.19318413734436035, "learning_rate": 0.000873234718319738, "loss": 2.4815, "step": 431740 }, { "epoch": 0.8601420056100981, "grad_norm": 0.18651528656482697, "learning_rate": 0.0008730572330415356, "loss": 2.5017, "step": 431750 }, { "epoch": 0.8601619278337371, "grad_norm": 0.18166480958461761, "learning_rate": 0.0008728797757115703, "loss": 2.4897, "step": 431760 }, { "epoch": 0.860181850057376, "grad_norm": 0.16385692358016968, "learning_rate": 0.0008727023463166439, "loss": 2.4917, "step": 431770 }, { "epoch": 0.8602017722810149, "grad_norm": 0.193761944770813, "learning_rate": 0.0008725249448435677, "loss": 2.4772, "step": 431780 }, { "epoch": 0.8602216945046538, "grad_norm": 0.17368829250335693, "learning_rate": 0.0008723475712791639, "loss": 2.481, "step": 431790 }, { "epoch": 0.8602416167282927, "grad_norm": 0.17211590707302094, "learning_rate": 0.0008721702256102653, "loss": 2.4639, "step": 431800 }, { "epoch": 0.8602615389519317, "grad_norm": 0.1805865615606308, "learning_rate": 0.0008719929078237141, "loss": 2.472, "step": 431810 }, { "epoch": 0.8602814611755706, "grad_norm": 0.2396164834499359, "learning_rate": 0.0008718156179063636, "loss": 2.4893, "step": 431820 }, { "epoch": 0.8603013833992095, "grad_norm": 0.18435345590114594, "learning_rate": 0.0008716383558450776, "loss": 2.4752, "step": 431830 }, { "epoch": 0.8603213056228484, "grad_norm": 0.16297559440135956, "learning_rate": 0.0008714611216267292, "loss": 2.4839, "step": 431840 }, { "epoch": 0.8603412278464874, "grad_norm": 0.16973142325878143, "learning_rate": 0.0008712839152382031, "loss": 2.4842, "step": 431850 }, { "epoch": 0.8603611500701263, "grad_norm": 0.17161811888217926, "learning_rate": 0.0008711067366663932, "loss": 2.4827, "step": 431860 }, { "epoch": 0.8603810722937651, "grad_norm": 0.18606004118919373, "learning_rate": 0.0008709295858982045, "loss": 2.4701, "step": 431870 }, { "epoch": 0.860400994517404, "grad_norm": 0.18685603141784668, "learning_rate": 0.0008707524629205516, "loss": 2.4817, "step": 431880 }, { "epoch": 0.8604209167410429, "grad_norm": 0.1551486998796463, "learning_rate": 0.0008705753677203601, "loss": 2.4822, "step": 431890 }, { "epoch": 0.8604408389646819, "grad_norm": 0.18995529413223267, "learning_rate": 0.000870398300284565, "loss": 2.4817, "step": 431900 }, { "epoch": 0.8604607611883208, "grad_norm": 0.24071986973285675, "learning_rate": 0.000870221260600112, "loss": 2.4918, "step": 431910 }, { "epoch": 0.8604806834119597, "grad_norm": 0.2193005532026291, "learning_rate": 0.0008700442486539573, "loss": 2.5042, "step": 431920 }, { "epoch": 0.8605006056355986, "grad_norm": 0.18487267196178436, "learning_rate": 0.0008698672644330668, "loss": 2.4776, "step": 431930 }, { "epoch": 0.8605205278592375, "grad_norm": 0.17108243703842163, "learning_rate": 0.0008696903079244168, "loss": 2.4851, "step": 431940 }, { "epoch": 0.8605404500828765, "grad_norm": 0.17775267362594604, "learning_rate": 0.0008695133791149937, "loss": 2.4893, "step": 431950 }, { "epoch": 0.8605603723065154, "grad_norm": 0.18067823350429535, "learning_rate": 0.0008693364779917945, "loss": 2.4849, "step": 431960 }, { "epoch": 0.8605802945301543, "grad_norm": 0.16851890087127686, "learning_rate": 0.0008691596045418258, "loss": 2.4801, "step": 431970 }, { "epoch": 0.8606002167537932, "grad_norm": 0.16079774498939514, "learning_rate": 0.0008689827587521047, "loss": 2.4727, "step": 431980 }, { "epoch": 0.8606201389774321, "grad_norm": 0.17816133797168732, "learning_rate": 0.0008688059406096583, "loss": 2.4827, "step": 431990 }, { "epoch": 0.8606400612010711, "grad_norm": 0.18678659200668335, "learning_rate": 0.000868629150101524, "loss": 2.4704, "step": 432000 }, { "epoch": 0.86065998342471, "grad_norm": 0.17722466588020325, "learning_rate": 0.0008684523872147492, "loss": 2.4726, "step": 432010 }, { "epoch": 0.8606799056483488, "grad_norm": 0.1680871844291687, "learning_rate": 0.0008682756519363914, "loss": 2.4765, "step": 432020 }, { "epoch": 0.8606998278719877, "grad_norm": 0.20491015911102295, "learning_rate": 0.0008680989442535183, "loss": 2.4971, "step": 432030 }, { "epoch": 0.8607197500956266, "grad_norm": 0.17958861589431763, "learning_rate": 0.0008679222641532076, "loss": 2.4918, "step": 432040 }, { "epoch": 0.8607396723192656, "grad_norm": 0.19436399638652802, "learning_rate": 0.000867745611622547, "loss": 2.4742, "step": 432050 }, { "epoch": 0.8607595945429045, "grad_norm": 0.16996563971042633, "learning_rate": 0.0008675689866486347, "loss": 2.4873, "step": 432060 }, { "epoch": 0.8607795167665434, "grad_norm": 0.18702495098114014, "learning_rate": 0.0008673923892185784, "loss": 2.4874, "step": 432070 }, { "epoch": 0.8607994389901823, "grad_norm": 0.17675593495368958, "learning_rate": 0.0008672158193194963, "loss": 2.4697, "step": 432080 }, { "epoch": 0.8608193612138212, "grad_norm": 0.21098670363426208, "learning_rate": 0.0008670392769385163, "loss": 2.4661, "step": 432090 }, { "epoch": 0.8608392834374602, "grad_norm": 0.18408241868019104, "learning_rate": 0.0008668627620627765, "loss": 2.4725, "step": 432100 }, { "epoch": 0.8608592056610991, "grad_norm": 0.1705162227153778, "learning_rate": 0.0008666862746794248, "loss": 2.4878, "step": 432110 }, { "epoch": 0.860879127884738, "grad_norm": 0.15918418765068054, "learning_rate": 0.0008665098147756196, "loss": 2.4791, "step": 432120 }, { "epoch": 0.8608990501083769, "grad_norm": 0.19506962597370148, "learning_rate": 0.0008663333823385291, "loss": 2.4757, "step": 432130 }, { "epoch": 0.8609189723320159, "grad_norm": 0.1606840044260025, "learning_rate": 0.0008661569773553307, "loss": 2.482, "step": 432140 }, { "epoch": 0.8609388945556548, "grad_norm": 0.21441839635372162, "learning_rate": 0.0008659805998132131, "loss": 2.4781, "step": 432150 }, { "epoch": 0.8609588167792936, "grad_norm": 0.16166329383850098, "learning_rate": 0.0008658042496993741, "loss": 2.4925, "step": 432160 }, { "epoch": 0.8609787390029325, "grad_norm": 0.17121891677379608, "learning_rate": 0.0008656279270010214, "loss": 2.4691, "step": 432170 }, { "epoch": 0.8609986612265714, "grad_norm": 0.20565663278102875, "learning_rate": 0.0008654516317053734, "loss": 2.4998, "step": 432180 }, { "epoch": 0.8610185834502104, "grad_norm": 0.18980495631694794, "learning_rate": 0.0008652753637996574, "loss": 2.4934, "step": 432190 }, { "epoch": 0.8610385056738493, "grad_norm": 0.17270494997501373, "learning_rate": 0.0008650991232711115, "loss": 2.4698, "step": 432200 }, { "epoch": 0.8610584278974882, "grad_norm": 0.20462289452552795, "learning_rate": 0.0008649229101069831, "loss": 2.4636, "step": 432210 }, { "epoch": 0.8610783501211271, "grad_norm": 0.15961630642414093, "learning_rate": 0.00086474672429453, "loss": 2.4749, "step": 432220 }, { "epoch": 0.861098272344766, "grad_norm": 0.1705755889415741, "learning_rate": 0.0008645705658210194, "loss": 2.4666, "step": 432230 }, { "epoch": 0.861118194568405, "grad_norm": 0.1795147955417633, "learning_rate": 0.0008643944346737286, "loss": 2.4848, "step": 432240 }, { "epoch": 0.8611381167920439, "grad_norm": 0.20915597677230835, "learning_rate": 0.0008642183308399454, "loss": 2.4827, "step": 432250 }, { "epoch": 0.8611580390156828, "grad_norm": 0.15978795289993286, "learning_rate": 0.0008640422543069659, "loss": 2.4863, "step": 432260 }, { "epoch": 0.8611779612393217, "grad_norm": 0.18911796808242798, "learning_rate": 0.0008638662050620975, "loss": 2.4674, "step": 432270 }, { "epoch": 0.8611978834629606, "grad_norm": 0.181573748588562, "learning_rate": 0.0008636901830926567, "loss": 2.4851, "step": 432280 }, { "epoch": 0.8612178056865996, "grad_norm": 0.17331530153751373, "learning_rate": 0.0008635141883859702, "loss": 2.4605, "step": 432290 }, { "epoch": 0.8612377279102384, "grad_norm": 0.18621070683002472, "learning_rate": 0.0008633382209293744, "loss": 2.4773, "step": 432300 }, { "epoch": 0.8612576501338773, "grad_norm": 0.19387614727020264, "learning_rate": 0.0008631622807102151, "loss": 2.4882, "step": 432310 }, { "epoch": 0.8612775723575162, "grad_norm": 0.20082710683345795, "learning_rate": 0.0008629863677158485, "loss": 2.4794, "step": 432320 }, { "epoch": 0.8612974945811551, "grad_norm": 0.19207029044628143, "learning_rate": 0.0008628104819336402, "loss": 2.4893, "step": 432330 }, { "epoch": 0.8613174168047941, "grad_norm": 0.17344273626804352, "learning_rate": 0.0008626346233509654, "loss": 2.4793, "step": 432340 }, { "epoch": 0.861337339028433, "grad_norm": 0.17479588091373444, "learning_rate": 0.00086245879195521, "loss": 2.474, "step": 432350 }, { "epoch": 0.8613572612520719, "grad_norm": 0.19514834880828857, "learning_rate": 0.0008622829877337688, "loss": 2.484, "step": 432360 }, { "epoch": 0.8613771834757108, "grad_norm": 0.21731090545654297, "learning_rate": 0.000862107210674046, "loss": 2.4844, "step": 432370 }, { "epoch": 0.8613971056993497, "grad_norm": 0.2054028958082199, "learning_rate": 0.0008619314607634565, "loss": 2.4826, "step": 432380 }, { "epoch": 0.8614170279229887, "grad_norm": 0.19375629723072052, "learning_rate": 0.0008617557379894243, "loss": 2.4893, "step": 432390 }, { "epoch": 0.8614369501466276, "grad_norm": 0.19535589218139648, "learning_rate": 0.0008615800423393833, "loss": 2.4761, "step": 432400 }, { "epoch": 0.8614568723702665, "grad_norm": 0.1732669621706009, "learning_rate": 0.0008614043738007773, "loss": 2.4821, "step": 432410 }, { "epoch": 0.8614767945939054, "grad_norm": 0.1719726026058197, "learning_rate": 0.0008612287323610593, "loss": 2.4863, "step": 432420 }, { "epoch": 0.8614967168175444, "grad_norm": 0.17131192982196808, "learning_rate": 0.0008610531180076921, "loss": 2.4824, "step": 432430 }, { "epoch": 0.8615166390411833, "grad_norm": 0.17616555094718933, "learning_rate": 0.0008608775307281486, "loss": 2.4662, "step": 432440 }, { "epoch": 0.8615365612648221, "grad_norm": 0.19009631872177124, "learning_rate": 0.0008607019705099108, "loss": 2.4978, "step": 432450 }, { "epoch": 0.861556483488461, "grad_norm": 0.1740851253271103, "learning_rate": 0.0008605264373404708, "loss": 2.4852, "step": 432460 }, { "epoch": 0.8615764057120999, "grad_norm": 0.16520115733146667, "learning_rate": 0.0008603509312073298, "loss": 2.4625, "step": 432470 }, { "epoch": 0.8615963279357389, "grad_norm": 0.17601972818374634, "learning_rate": 0.0008601754520979994, "loss": 2.478, "step": 432480 }, { "epoch": 0.8616162501593778, "grad_norm": 0.19199584424495697, "learning_rate": 0.0008599999999999999, "loss": 2.4819, "step": 432490 }, { "epoch": 0.8616361723830167, "grad_norm": 0.17327916622161865, "learning_rate": 0.000859824574900862, "loss": 2.4783, "step": 432500 }, { "epoch": 0.8616560946066556, "grad_norm": 0.192365363240242, "learning_rate": 0.0008596491767881254, "loss": 2.4752, "step": 432510 }, { "epoch": 0.8616760168302945, "grad_norm": 0.1739257276058197, "learning_rate": 0.0008594738056493398, "loss": 2.4866, "step": 432520 }, { "epoch": 0.8616959390539335, "grad_norm": 0.16659465432167053, "learning_rate": 0.0008592984614720642, "loss": 2.4864, "step": 432530 }, { "epoch": 0.8617158612775724, "grad_norm": 0.18418513238430023, "learning_rate": 0.0008591231442438672, "loss": 2.501, "step": 432540 }, { "epoch": 0.8617357835012113, "grad_norm": 0.17883627116680145, "learning_rate": 0.000858947853952327, "loss": 2.4924, "step": 432550 }, { "epoch": 0.8617557057248502, "grad_norm": 0.196421280503273, "learning_rate": 0.0008587725905850317, "loss": 2.4866, "step": 432560 }, { "epoch": 0.861775627948489, "grad_norm": 0.15568473935127258, "learning_rate": 0.000858597354129578, "loss": 2.4895, "step": 432570 }, { "epoch": 0.861795550172128, "grad_norm": 0.15942837297916412, "learning_rate": 0.0008584221445735731, "loss": 2.4781, "step": 432580 }, { "epoch": 0.861815472395767, "grad_norm": 0.1800929605960846, "learning_rate": 0.0008582469619046331, "loss": 2.4853, "step": 432590 }, { "epoch": 0.8618353946194058, "grad_norm": 0.18626904487609863, "learning_rate": 0.0008580718061103842, "loss": 2.4883, "step": 432600 }, { "epoch": 0.8618553168430447, "grad_norm": 0.17130959033966064, "learning_rate": 0.0008578966771784613, "loss": 2.4911, "step": 432610 }, { "epoch": 0.8618752390666836, "grad_norm": 0.17548340559005737, "learning_rate": 0.0008577215750965092, "loss": 2.497, "step": 432620 }, { "epoch": 0.8618951612903226, "grad_norm": 0.19621041417121887, "learning_rate": 0.0008575464998521823, "loss": 2.4858, "step": 432630 }, { "epoch": 0.8619150835139615, "grad_norm": 0.1840434968471527, "learning_rate": 0.0008573714514331439, "loss": 2.4818, "step": 432640 }, { "epoch": 0.8619350057376004, "grad_norm": 0.16023163497447968, "learning_rate": 0.0008571964298270678, "loss": 2.4813, "step": 432650 }, { "epoch": 0.8619549279612393, "grad_norm": 0.19114737212657928, "learning_rate": 0.0008570214350216361, "loss": 2.4692, "step": 432660 }, { "epoch": 0.8619748501848782, "grad_norm": 0.18516762554645538, "learning_rate": 0.000856846467004541, "loss": 2.4841, "step": 432670 }, { "epoch": 0.8619947724085172, "grad_norm": 0.19071534276008606, "learning_rate": 0.0008566715257634838, "loss": 2.4784, "step": 432680 }, { "epoch": 0.8620146946321561, "grad_norm": 0.1801196187734604, "learning_rate": 0.0008564966112861754, "loss": 2.4693, "step": 432690 }, { "epoch": 0.862034616855795, "grad_norm": 0.18470194935798645, "learning_rate": 0.0008563217235603363, "loss": 2.4737, "step": 432700 }, { "epoch": 0.8620545390794339, "grad_norm": 0.15848825871944427, "learning_rate": 0.0008561468625736956, "loss": 2.485, "step": 432710 }, { "epoch": 0.8620744613030729, "grad_norm": 0.1708982139825821, "learning_rate": 0.0008559720283139926, "loss": 2.4701, "step": 432720 }, { "epoch": 0.8620943835267117, "grad_norm": 0.1637512594461441, "learning_rate": 0.0008557972207689757, "loss": 2.4769, "step": 432730 }, { "epoch": 0.8621143057503506, "grad_norm": 0.16580335795879364, "learning_rate": 0.0008556224399264025, "loss": 2.4803, "step": 432740 }, { "epoch": 0.8621342279739895, "grad_norm": 0.17615020275115967, "learning_rate": 0.0008554476857740403, "loss": 2.4884, "step": 432750 }, { "epoch": 0.8621541501976284, "grad_norm": 0.1582322120666504, "learning_rate": 0.0008552729582996652, "loss": 2.5058, "step": 432760 }, { "epoch": 0.8621740724212674, "grad_norm": 0.18424266576766968, "learning_rate": 0.0008550982574910631, "loss": 2.4938, "step": 432770 }, { "epoch": 0.8621939946449063, "grad_norm": 0.16191183030605316, "learning_rate": 0.0008549235833360292, "loss": 2.4764, "step": 432780 }, { "epoch": 0.8622139168685452, "grad_norm": 0.16399772465229034, "learning_rate": 0.0008547489358223674, "loss": 2.4834, "step": 432790 }, { "epoch": 0.8622338390921841, "grad_norm": 0.1972200870513916, "learning_rate": 0.0008545743149378917, "loss": 2.4718, "step": 432800 }, { "epoch": 0.862253761315823, "grad_norm": 0.1812339872121811, "learning_rate": 0.0008543997206704251, "loss": 2.4797, "step": 432810 }, { "epoch": 0.862273683539462, "grad_norm": 0.19865228235721588, "learning_rate": 0.0008542251530077998, "loss": 2.4754, "step": 432820 }, { "epoch": 0.8622936057631009, "grad_norm": 0.1769222617149353, "learning_rate": 0.000854050611937857, "loss": 2.4698, "step": 432830 }, { "epoch": 0.8623135279867398, "grad_norm": 0.21075224876403809, "learning_rate": 0.0008538760974484476, "loss": 2.49, "step": 432840 }, { "epoch": 0.8623334502103787, "grad_norm": 0.16998706758022308, "learning_rate": 0.0008537016095274319, "loss": 2.4839, "step": 432850 }, { "epoch": 0.8623533724340176, "grad_norm": 0.15676383674144745, "learning_rate": 0.0008535271481626789, "loss": 2.4818, "step": 432860 }, { "epoch": 0.8623732946576566, "grad_norm": 0.17506232857704163, "learning_rate": 0.0008533527133420671, "loss": 2.4821, "step": 432870 }, { "epoch": 0.8623932168812954, "grad_norm": 0.17518897354602814, "learning_rate": 0.000853178305053484, "loss": 2.4718, "step": 432880 }, { "epoch": 0.8624131391049343, "grad_norm": 0.18552745878696442, "learning_rate": 0.0008530039232848266, "loss": 2.4796, "step": 432890 }, { "epoch": 0.8624330613285732, "grad_norm": 0.16277313232421875, "learning_rate": 0.000852829568024001, "loss": 2.4763, "step": 432900 }, { "epoch": 0.8624529835522121, "grad_norm": 0.16864147782325745, "learning_rate": 0.0008526552392589226, "loss": 2.5068, "step": 432910 }, { "epoch": 0.8624729057758511, "grad_norm": 0.18568305671215057, "learning_rate": 0.0008524809369775159, "loss": 2.4934, "step": 432920 }, { "epoch": 0.86249282799949, "grad_norm": 0.1687958687543869, "learning_rate": 0.0008523066611677142, "loss": 2.4769, "step": 432930 }, { "epoch": 0.8625127502231289, "grad_norm": 0.18432043492794037, "learning_rate": 0.0008521324118174605, "loss": 2.4719, "step": 432940 }, { "epoch": 0.8625326724467678, "grad_norm": 0.18176120519638062, "learning_rate": 0.0008519581889147068, "loss": 2.4858, "step": 432950 }, { "epoch": 0.8625525946704067, "grad_norm": 0.1962219774723053, "learning_rate": 0.000851783992447414, "loss": 2.4837, "step": 432960 }, { "epoch": 0.8625725168940457, "grad_norm": 0.18150751292705536, "learning_rate": 0.0008516098224035526, "loss": 2.4785, "step": 432970 }, { "epoch": 0.8625924391176846, "grad_norm": 0.17652098834514618, "learning_rate": 0.0008514356787711017, "loss": 2.4731, "step": 432980 }, { "epoch": 0.8626123613413235, "grad_norm": 0.15925486385822296, "learning_rate": 0.0008512615615380496, "loss": 2.4723, "step": 432990 }, { "epoch": 0.8626322835649624, "grad_norm": 0.18611329793930054, "learning_rate": 0.0008510874706923943, "loss": 2.4834, "step": 433000 }, { "epoch": 0.8626522057886012, "grad_norm": 0.17582197487354279, "learning_rate": 0.000850913406222142, "loss": 2.4754, "step": 433010 }, { "epoch": 0.8626721280122402, "grad_norm": 0.16061915457248688, "learning_rate": 0.0008507393681153086, "loss": 2.4817, "step": 433020 }, { "epoch": 0.8626920502358791, "grad_norm": 0.18774016201496124, "learning_rate": 0.000850565356359919, "loss": 2.4871, "step": 433030 }, { "epoch": 0.862711972459518, "grad_norm": 0.19637039303779602, "learning_rate": 0.0008503913709440068, "loss": 2.4818, "step": 433040 }, { "epoch": 0.8627318946831569, "grad_norm": 0.17186400294303894, "learning_rate": 0.0008502174118556152, "loss": 2.4889, "step": 433050 }, { "epoch": 0.8627518169067959, "grad_norm": 0.17439769208431244, "learning_rate": 0.000850043479082796, "loss": 2.4878, "step": 433060 }, { "epoch": 0.8627717391304348, "grad_norm": 0.18800088763237, "learning_rate": 0.0008498695726136101, "loss": 2.4859, "step": 433070 }, { "epoch": 0.8627916613540737, "grad_norm": 0.17517372965812683, "learning_rate": 0.0008496956924361277, "loss": 2.4833, "step": 433080 }, { "epoch": 0.8628115835777126, "grad_norm": 0.17380103468894958, "learning_rate": 0.0008495218385384276, "loss": 2.4758, "step": 433090 }, { "epoch": 0.8628315058013515, "grad_norm": 0.17788006365299225, "learning_rate": 0.000849348010908598, "loss": 2.488, "step": 433100 }, { "epoch": 0.8628514280249905, "grad_norm": 0.16515405476093292, "learning_rate": 0.0008491742095347359, "loss": 2.481, "step": 433110 }, { "epoch": 0.8628713502486294, "grad_norm": 0.17686191201210022, "learning_rate": 0.0008490004344049474, "loss": 2.476, "step": 433120 }, { "epoch": 0.8628912724722683, "grad_norm": 0.1748190075159073, "learning_rate": 0.0008488266855073472, "loss": 2.4883, "step": 433130 }, { "epoch": 0.8629111946959072, "grad_norm": 0.1939217448234558, "learning_rate": 0.0008486529628300597, "loss": 2.4891, "step": 433140 }, { "epoch": 0.862931116919546, "grad_norm": 0.17906922101974487, "learning_rate": 0.0008484792663612176, "loss": 2.4773, "step": 433150 }, { "epoch": 0.862951039143185, "grad_norm": 0.1739749163389206, "learning_rate": 0.0008483055960889625, "loss": 2.4772, "step": 433160 }, { "epoch": 0.8629709613668239, "grad_norm": 0.17432901263237, "learning_rate": 0.0008481319520014457, "loss": 2.4994, "step": 433170 }, { "epoch": 0.8629908835904628, "grad_norm": 0.17451490461826324, "learning_rate": 0.0008479583340868267, "loss": 2.4733, "step": 433180 }, { "epoch": 0.8630108058141017, "grad_norm": 0.1742187887430191, "learning_rate": 0.000847784742333274, "loss": 2.4902, "step": 433190 }, { "epoch": 0.8630307280377406, "grad_norm": 0.19309979677200317, "learning_rate": 0.0008476111767289654, "loss": 2.4894, "step": 433200 }, { "epoch": 0.8630506502613796, "grad_norm": 0.1837921142578125, "learning_rate": 0.0008474376372620873, "loss": 2.4719, "step": 433210 }, { "epoch": 0.8630705724850185, "grad_norm": 0.21658676862716675, "learning_rate": 0.000847264123920835, "loss": 2.4837, "step": 433220 }, { "epoch": 0.8630904947086574, "grad_norm": 0.1852256953716278, "learning_rate": 0.000847090636693413, "loss": 2.498, "step": 433230 }, { "epoch": 0.8631104169322963, "grad_norm": 0.19616864621639252, "learning_rate": 0.000846917175568034, "loss": 2.4607, "step": 433240 }, { "epoch": 0.8631303391559352, "grad_norm": 0.17688201367855072, "learning_rate": 0.0008467437405329203, "loss": 2.4967, "step": 433250 }, { "epoch": 0.8631502613795742, "grad_norm": 0.1847601979970932, "learning_rate": 0.0008465703315763029, "loss": 2.4885, "step": 433260 }, { "epoch": 0.8631701836032131, "grad_norm": 0.18348746001720428, "learning_rate": 0.0008463969486864209, "loss": 2.4762, "step": 433270 }, { "epoch": 0.863190105826852, "grad_norm": 0.17694327235221863, "learning_rate": 0.0008462235918515235, "loss": 2.4831, "step": 433280 }, { "epoch": 0.8632100280504909, "grad_norm": 0.17254674434661865, "learning_rate": 0.0008460502610598675, "loss": 2.4931, "step": 433290 }, { "epoch": 0.8632299502741297, "grad_norm": 0.1769307404756546, "learning_rate": 0.0008458769562997193, "loss": 2.4954, "step": 433300 }, { "epoch": 0.8632498724977687, "grad_norm": 0.17677655816078186, "learning_rate": 0.0008457036775593538, "loss": 2.4741, "step": 433310 }, { "epoch": 0.8632697947214076, "grad_norm": 0.18728816509246826, "learning_rate": 0.000845530424827055, "loss": 2.4805, "step": 433320 }, { "epoch": 0.8632897169450465, "grad_norm": 0.2029736340045929, "learning_rate": 0.0008453571980911153, "loss": 2.4766, "step": 433330 }, { "epoch": 0.8633096391686854, "grad_norm": 0.16505371034145355, "learning_rate": 0.000845183997339836, "loss": 2.4796, "step": 433340 }, { "epoch": 0.8633295613923244, "grad_norm": 0.18263915181159973, "learning_rate": 0.0008450108225615271, "loss": 2.4802, "step": 433350 }, { "epoch": 0.8633494836159633, "grad_norm": 0.17793814837932587, "learning_rate": 0.0008448376737445079, "loss": 2.4829, "step": 433360 }, { "epoch": 0.8633694058396022, "grad_norm": 0.16513946652412415, "learning_rate": 0.0008446645508771056, "loss": 2.4824, "step": 433370 }, { "epoch": 0.8633893280632411, "grad_norm": 0.16862595081329346, "learning_rate": 0.0008444914539476569, "loss": 2.4676, "step": 433380 }, { "epoch": 0.86340925028688, "grad_norm": 0.18740972876548767, "learning_rate": 0.0008443183829445067, "loss": 2.4674, "step": 433390 }, { "epoch": 0.863429172510519, "grad_norm": 0.1605880856513977, "learning_rate": 0.0008441453378560089, "loss": 2.4771, "step": 433400 }, { "epoch": 0.8634490947341579, "grad_norm": 0.17668233811855316, "learning_rate": 0.000843972318670526, "loss": 2.4699, "step": 433410 }, { "epoch": 0.8634690169577968, "grad_norm": 0.16525018215179443, "learning_rate": 0.0008437993253764294, "loss": 2.4914, "step": 433420 }, { "epoch": 0.8634889391814357, "grad_norm": 0.1909908503293991, "learning_rate": 0.0008436263579620989, "loss": 2.4801, "step": 433430 }, { "epoch": 0.8635088614050745, "grad_norm": 0.1784379929304123, "learning_rate": 0.0008434534164159233, "loss": 2.4792, "step": 433440 }, { "epoch": 0.8635287836287135, "grad_norm": 0.18731923401355743, "learning_rate": 0.0008432805007262996, "loss": 2.4842, "step": 433450 }, { "epoch": 0.8635487058523524, "grad_norm": 0.17433848977088928, "learning_rate": 0.0008431076108816342, "loss": 2.4796, "step": 433460 }, { "epoch": 0.8635686280759913, "grad_norm": 0.1756761521100998, "learning_rate": 0.0008429347468703419, "loss": 2.4904, "step": 433470 }, { "epoch": 0.8635885502996302, "grad_norm": 0.1850949376821518, "learning_rate": 0.0008427619086808453, "loss": 2.4746, "step": 433480 }, { "epoch": 0.8636084725232691, "grad_norm": 0.19221967458724976, "learning_rate": 0.0008425890963015772, "loss": 2.4858, "step": 433490 }, { "epoch": 0.8636283947469081, "grad_norm": 0.18799914419651031, "learning_rate": 0.0008424163097209774, "loss": 2.4855, "step": 433500 }, { "epoch": 0.863648316970547, "grad_norm": 0.2213045358657837, "learning_rate": 0.0008422435489274958, "loss": 2.489, "step": 433510 }, { "epoch": 0.8636682391941859, "grad_norm": 0.2027241587638855, "learning_rate": 0.0008420708139095899, "loss": 2.4906, "step": 433520 }, { "epoch": 0.8636881614178248, "grad_norm": 0.16755910217761993, "learning_rate": 0.0008418981046557259, "loss": 2.4855, "step": 433530 }, { "epoch": 0.8637080836414637, "grad_norm": 0.16920064389705658, "learning_rate": 0.0008417254211543795, "loss": 2.4711, "step": 433540 }, { "epoch": 0.8637280058651027, "grad_norm": 0.1730061173439026, "learning_rate": 0.0008415527633940336, "loss": 2.4766, "step": 433550 }, { "epoch": 0.8637479280887416, "grad_norm": 0.17795053124427795, "learning_rate": 0.0008413801313631808, "loss": 2.4805, "step": 433560 }, { "epoch": 0.8637678503123805, "grad_norm": 0.20315714180469513, "learning_rate": 0.0008412075250503222, "loss": 2.4814, "step": 433570 }, { "epoch": 0.8637877725360193, "grad_norm": 0.1716310828924179, "learning_rate": 0.0008410349444439664, "loss": 2.4829, "step": 433580 }, { "epoch": 0.8638076947596582, "grad_norm": 0.16806888580322266, "learning_rate": 0.0008408623895326319, "loss": 2.485, "step": 433590 }, { "epoch": 0.8638276169832972, "grad_norm": 0.1999502182006836, "learning_rate": 0.000840689860304845, "loss": 2.4716, "step": 433600 }, { "epoch": 0.8638475392069361, "grad_norm": 0.1694258451461792, "learning_rate": 0.0008405173567491405, "loss": 2.4867, "step": 433610 }, { "epoch": 0.863867461430575, "grad_norm": 0.17900973558425903, "learning_rate": 0.0008403448788540621, "loss": 2.4662, "step": 433620 }, { "epoch": 0.8638873836542139, "grad_norm": 0.17346841096878052, "learning_rate": 0.0008401724266081618, "loss": 2.4889, "step": 433630 }, { "epoch": 0.8639073058778529, "grad_norm": 0.17833499610424042, "learning_rate": 0.0008400000000000001, "loss": 2.4779, "step": 433640 }, { "epoch": 0.8639272281014918, "grad_norm": 0.1986810714006424, "learning_rate": 0.0008398275990181459, "loss": 2.4713, "step": 433650 }, { "epoch": 0.8639471503251307, "grad_norm": 0.171680748462677, "learning_rate": 0.0008396552236511769, "loss": 2.4825, "step": 433660 }, { "epoch": 0.8639670725487696, "grad_norm": 0.1854582279920578, "learning_rate": 0.0008394828738876795, "loss": 2.4802, "step": 433670 }, { "epoch": 0.8639869947724085, "grad_norm": 0.18249645829200745, "learning_rate": 0.0008393105497162475, "loss": 2.4742, "step": 433680 }, { "epoch": 0.8640069169960475, "grad_norm": 0.1797194927930832, "learning_rate": 0.0008391382511254839, "loss": 2.463, "step": 433690 }, { "epoch": 0.8640268392196864, "grad_norm": 0.17542746663093567, "learning_rate": 0.0008389659781040007, "loss": 2.483, "step": 433700 }, { "epoch": 0.8640467614433253, "grad_norm": 0.17688150703907013, "learning_rate": 0.0008387937306404172, "loss": 2.4837, "step": 433710 }, { "epoch": 0.8640666836669642, "grad_norm": 0.16840402781963348, "learning_rate": 0.0008386215087233619, "loss": 2.4916, "step": 433720 }, { "epoch": 0.864086605890603, "grad_norm": 0.16115723550319672, "learning_rate": 0.0008384493123414718, "loss": 2.4819, "step": 433730 }, { "epoch": 0.864106528114242, "grad_norm": 0.1701059490442276, "learning_rate": 0.0008382771414833916, "loss": 2.4845, "step": 433740 }, { "epoch": 0.8641264503378809, "grad_norm": 0.14908848702907562, "learning_rate": 0.0008381049961377749, "loss": 2.4725, "step": 433750 }, { "epoch": 0.8641463725615198, "grad_norm": 0.1784757375717163, "learning_rate": 0.000837932876293284, "loss": 2.4873, "step": 433760 }, { "epoch": 0.8641662947851587, "grad_norm": 0.16600415110588074, "learning_rate": 0.0008377607819385891, "loss": 2.4835, "step": 433770 }, { "epoch": 0.8641862170087976, "grad_norm": 0.18100082874298096, "learning_rate": 0.0008375887130623687, "loss": 2.4755, "step": 433780 }, { "epoch": 0.8642061392324366, "grad_norm": 0.17674389481544495, "learning_rate": 0.0008374166696533106, "loss": 2.4915, "step": 433790 }, { "epoch": 0.8642260614560755, "grad_norm": 0.1843775063753128, "learning_rate": 0.0008372446517001094, "loss": 2.4606, "step": 433800 }, { "epoch": 0.8642459836797144, "grad_norm": 0.1892305314540863, "learning_rate": 0.0008370726591914694, "loss": 2.4828, "step": 433810 }, { "epoch": 0.8642659059033533, "grad_norm": 0.18377354741096497, "learning_rate": 0.000836900692116103, "loss": 2.483, "step": 433820 }, { "epoch": 0.8642858281269922, "grad_norm": 0.1879427433013916, "learning_rate": 0.0008367287504627308, "loss": 2.4968, "step": 433830 }, { "epoch": 0.8643057503506312, "grad_norm": 0.18908922374248505, "learning_rate": 0.0008365568342200812, "loss": 2.4775, "step": 433840 }, { "epoch": 0.8643256725742701, "grad_norm": 0.1693822145462036, "learning_rate": 0.0008363849433768915, "loss": 2.4645, "step": 433850 }, { "epoch": 0.864345594797909, "grad_norm": 0.1845707893371582, "learning_rate": 0.0008362130779219075, "loss": 2.4704, "step": 433860 }, { "epoch": 0.8643655170215478, "grad_norm": 0.18587452173233032, "learning_rate": 0.0008360412378438831, "loss": 2.4748, "step": 433870 }, { "epoch": 0.8643854392451867, "grad_norm": 0.17915664613246918, "learning_rate": 0.0008358694231315802, "loss": 2.4859, "step": 433880 }, { "epoch": 0.8644053614688257, "grad_norm": 0.17958666384220123, "learning_rate": 0.0008356976337737692, "loss": 2.4908, "step": 433890 }, { "epoch": 0.8644252836924646, "grad_norm": 0.18647655844688416, "learning_rate": 0.000835525869759229, "loss": 2.4864, "step": 433900 }, { "epoch": 0.8644452059161035, "grad_norm": 0.1794973462820053, "learning_rate": 0.0008353541310767465, "loss": 2.4917, "step": 433910 }, { "epoch": 0.8644651281397424, "grad_norm": 0.17375409603118896, "learning_rate": 0.000835182417715117, "loss": 2.4771, "step": 433920 }, { "epoch": 0.8644850503633814, "grad_norm": 0.1660362035036087, "learning_rate": 0.0008350107296631442, "loss": 2.4684, "step": 433930 }, { "epoch": 0.8645049725870203, "grad_norm": 0.19490186870098114, "learning_rate": 0.0008348390669096393, "loss": 2.4716, "step": 433940 }, { "epoch": 0.8645248948106592, "grad_norm": 0.16428683698177338, "learning_rate": 0.0008346674294434226, "loss": 2.4823, "step": 433950 }, { "epoch": 0.8645448170342981, "grad_norm": 0.18995392322540283, "learning_rate": 0.0008344958172533228, "loss": 2.4749, "step": 433960 }, { "epoch": 0.864564739257937, "grad_norm": 0.17368842661380768, "learning_rate": 0.0008343242303281757, "loss": 2.4812, "step": 433970 }, { "epoch": 0.864584661481576, "grad_norm": 0.1685485690832138, "learning_rate": 0.0008341526686568264, "loss": 2.482, "step": 433980 }, { "epoch": 0.8646045837052149, "grad_norm": 0.15630784630775452, "learning_rate": 0.0008339811322281275, "loss": 2.4683, "step": 433990 }, { "epoch": 0.8646245059288538, "grad_norm": 0.1659374088048935, "learning_rate": 0.0008338096210309398, "loss": 2.4867, "step": 434000 }, { "epoch": 0.8646444281524927, "grad_norm": 0.19319376349449158, "learning_rate": 0.0008336381350541333, "loss": 2.4773, "step": 434010 }, { "epoch": 0.8646643503761315, "grad_norm": 0.17264623939990997, "learning_rate": 0.0008334666742865851, "loss": 2.4744, "step": 434020 }, { "epoch": 0.8646842725997705, "grad_norm": 0.1827518194913864, "learning_rate": 0.0008332952387171809, "loss": 2.4831, "step": 434030 }, { "epoch": 0.8647041948234094, "grad_norm": 0.16804872453212738, "learning_rate": 0.0008331238283348143, "loss": 2.4923, "step": 434040 }, { "epoch": 0.8647241170470483, "grad_norm": 0.19617536664009094, "learning_rate": 0.0008329524431283873, "loss": 2.5051, "step": 434050 }, { "epoch": 0.8647440392706872, "grad_norm": 0.17805758118629456, "learning_rate": 0.0008327810830868101, "loss": 2.4923, "step": 434060 }, { "epoch": 0.8647639614943261, "grad_norm": 0.1712428331375122, "learning_rate": 0.0008326097481990009, "loss": 2.4872, "step": 434070 }, { "epoch": 0.8647838837179651, "grad_norm": 0.17341747879981995, "learning_rate": 0.0008324384384538862, "loss": 2.4682, "step": 434080 }, { "epoch": 0.864803805941604, "grad_norm": 0.1741722971200943, "learning_rate": 0.0008322671538404001, "loss": 2.4751, "step": 434090 }, { "epoch": 0.8648237281652429, "grad_norm": 0.2006082385778427, "learning_rate": 0.0008320958943474854, "loss": 2.4685, "step": 434100 }, { "epoch": 0.8648436503888818, "grad_norm": 0.19077500700950623, "learning_rate": 0.0008319246599640929, "loss": 2.4603, "step": 434110 }, { "epoch": 0.8648635726125207, "grad_norm": 0.16350512206554413, "learning_rate": 0.0008317534506791813, "loss": 2.4862, "step": 434120 }, { "epoch": 0.8648834948361597, "grad_norm": 0.17572332918643951, "learning_rate": 0.0008315822664817176, "loss": 2.4653, "step": 434130 }, { "epoch": 0.8649034170597986, "grad_norm": 0.1950281411409378, "learning_rate": 0.0008314111073606767, "loss": 2.4895, "step": 434140 }, { "epoch": 0.8649233392834375, "grad_norm": 0.16786321997642517, "learning_rate": 0.0008312399733050415, "loss": 2.4704, "step": 434150 }, { "epoch": 0.8649432615070763, "grad_norm": 0.17793720960617065, "learning_rate": 0.0008310688643038034, "loss": 2.4771, "step": 434160 }, { "epoch": 0.8649631837307152, "grad_norm": 0.15677133202552795, "learning_rate": 0.0008308977803459614, "loss": 2.4661, "step": 434170 }, { "epoch": 0.8649831059543542, "grad_norm": 0.19872283935546875, "learning_rate": 0.0008307267214205228, "loss": 2.4747, "step": 434180 }, { "epoch": 0.8650030281779931, "grad_norm": 0.17961403727531433, "learning_rate": 0.0008305556875165026, "loss": 2.4746, "step": 434190 }, { "epoch": 0.865022950401632, "grad_norm": 0.1708395630121231, "learning_rate": 0.0008303846786229243, "loss": 2.4894, "step": 434200 }, { "epoch": 0.8650428726252709, "grad_norm": 0.1815253496170044, "learning_rate": 0.0008302136947288193, "loss": 2.4624, "step": 434210 }, { "epoch": 0.8650627948489099, "grad_norm": 0.1570218801498413, "learning_rate": 0.0008300427358232268, "loss": 2.4644, "step": 434220 }, { "epoch": 0.8650827170725488, "grad_norm": 0.17178891599178314, "learning_rate": 0.0008298718018951941, "loss": 2.4684, "step": 434230 }, { "epoch": 0.8651026392961877, "grad_norm": 0.21362952888011932, "learning_rate": 0.0008297008929337766, "loss": 2.4816, "step": 434240 }, { "epoch": 0.8651225615198266, "grad_norm": 0.1770113706588745, "learning_rate": 0.0008295300089280375, "loss": 2.4621, "step": 434250 }, { "epoch": 0.8651424837434655, "grad_norm": 0.19697648286819458, "learning_rate": 0.0008293591498670483, "loss": 2.484, "step": 434260 }, { "epoch": 0.8651624059671045, "grad_norm": 0.18216899037361145, "learning_rate": 0.000829188315739888, "loss": 2.4738, "step": 434270 }, { "epoch": 0.8651823281907434, "grad_norm": 0.1935456544160843, "learning_rate": 0.0008290175065356442, "loss": 2.4724, "step": 434280 }, { "epoch": 0.8652022504143823, "grad_norm": 0.17695365846157074, "learning_rate": 0.0008288467222434119, "loss": 2.4634, "step": 434290 }, { "epoch": 0.8652221726380211, "grad_norm": 0.19433824717998505, "learning_rate": 0.0008286759628522942, "loss": 2.4587, "step": 434300 }, { "epoch": 0.86524209486166, "grad_norm": 0.1847711056470871, "learning_rate": 0.0008285052283514023, "loss": 2.4661, "step": 434310 }, { "epoch": 0.865262017085299, "grad_norm": 0.17738278210163116, "learning_rate": 0.0008283345187298552, "loss": 2.4696, "step": 434320 }, { "epoch": 0.8652819393089379, "grad_norm": 0.19071410596370697, "learning_rate": 0.0008281638339767798, "loss": 2.4789, "step": 434330 }, { "epoch": 0.8653018615325768, "grad_norm": 0.17382171750068665, "learning_rate": 0.0008279931740813112, "loss": 2.491, "step": 434340 }, { "epoch": 0.8653217837562157, "grad_norm": 0.16389119625091553, "learning_rate": 0.0008278225390325918, "loss": 2.4749, "step": 434350 }, { "epoch": 0.8653417059798546, "grad_norm": 0.1846337765455246, "learning_rate": 0.0008276519288197724, "loss": 2.4606, "step": 434360 }, { "epoch": 0.8653616282034936, "grad_norm": 0.1600310504436493, "learning_rate": 0.0008274813434320117, "loss": 2.4727, "step": 434370 }, { "epoch": 0.8653815504271325, "grad_norm": 0.17699971795082092, "learning_rate": 0.0008273107828584762, "loss": 2.4679, "step": 434380 }, { "epoch": 0.8654014726507714, "grad_norm": 0.21074555814266205, "learning_rate": 0.00082714024708834, "loss": 2.4808, "step": 434390 }, { "epoch": 0.8654213948744103, "grad_norm": 0.17514967918395996, "learning_rate": 0.0008269697361107857, "loss": 2.4812, "step": 434400 }, { "epoch": 0.8654413170980492, "grad_norm": 0.19033150374889374, "learning_rate": 0.0008267992499150028, "loss": 2.4626, "step": 434410 }, { "epoch": 0.8654612393216882, "grad_norm": 0.193288654088974, "learning_rate": 0.0008266287884901897, "loss": 2.4887, "step": 434420 }, { "epoch": 0.8654811615453271, "grad_norm": 0.1682114452123642, "learning_rate": 0.000826458351825552, "loss": 2.4802, "step": 434430 }, { "epoch": 0.865501083768966, "grad_norm": 0.17143559455871582, "learning_rate": 0.0008262879399103031, "loss": 2.4745, "step": 434440 }, { "epoch": 0.8655210059926048, "grad_norm": 0.2032233476638794, "learning_rate": 0.0008261175527336651, "loss": 2.4792, "step": 434450 }, { "epoch": 0.8655409282162437, "grad_norm": 0.1891588568687439, "learning_rate": 0.0008259471902848663, "loss": 2.4874, "step": 434460 }, { "epoch": 0.8655608504398827, "grad_norm": 0.16777755320072174, "learning_rate": 0.0008257768525531444, "loss": 2.4803, "step": 434470 }, { "epoch": 0.8655807726635216, "grad_norm": 0.1809605062007904, "learning_rate": 0.0008256065395277441, "loss": 2.4777, "step": 434480 }, { "epoch": 0.8656006948871605, "grad_norm": 0.1693057417869568, "learning_rate": 0.0008254362511979181, "loss": 2.4857, "step": 434490 }, { "epoch": 0.8656206171107994, "grad_norm": 0.20315924286842346, "learning_rate": 0.000825265987552927, "loss": 2.4772, "step": 434500 }, { "epoch": 0.8656405393344384, "grad_norm": 0.17828452587127686, "learning_rate": 0.0008250957485820388, "loss": 2.4824, "step": 434510 }, { "epoch": 0.8656604615580773, "grad_norm": 0.1907786726951599, "learning_rate": 0.0008249255342745295, "loss": 2.4915, "step": 434520 }, { "epoch": 0.8656803837817162, "grad_norm": 0.16921992599964142, "learning_rate": 0.0008247553446196832, "loss": 2.4834, "step": 434530 }, { "epoch": 0.8657003060053551, "grad_norm": 0.1707833707332611, "learning_rate": 0.0008245851796067909, "loss": 2.4657, "step": 434540 }, { "epoch": 0.865720228228994, "grad_norm": 0.18580834567546844, "learning_rate": 0.0008244150392251524, "loss": 2.4844, "step": 434550 }, { "epoch": 0.865740150452633, "grad_norm": 0.18108634650707245, "learning_rate": 0.0008242449234640745, "loss": 2.4848, "step": 434560 }, { "epoch": 0.8657600726762719, "grad_norm": 0.19209639728069305, "learning_rate": 0.000824074832312872, "loss": 2.4813, "step": 434570 }, { "epoch": 0.8657799948999108, "grad_norm": 0.18656648695468903, "learning_rate": 0.0008239047657608676, "loss": 2.4832, "step": 434580 }, { "epoch": 0.8657999171235496, "grad_norm": 0.18208086490631104, "learning_rate": 0.0008237347237973911, "loss": 2.4766, "step": 434590 }, { "epoch": 0.8658198393471885, "grad_norm": 0.19056981801986694, "learning_rate": 0.0008235647064117806, "loss": 2.4753, "step": 434600 }, { "epoch": 0.8658397615708275, "grad_norm": 0.19353532791137695, "learning_rate": 0.0008233947135933817, "loss": 2.4938, "step": 434610 }, { "epoch": 0.8658596837944664, "grad_norm": 0.1664697676897049, "learning_rate": 0.000823224745331548, "loss": 2.4809, "step": 434620 }, { "epoch": 0.8658796060181053, "grad_norm": 0.1659325510263443, "learning_rate": 0.0008230548016156403, "loss": 2.4801, "step": 434630 }, { "epoch": 0.8658995282417442, "grad_norm": 0.20248502492904663, "learning_rate": 0.0008228848824350272, "loss": 2.4872, "step": 434640 }, { "epoch": 0.8659194504653831, "grad_norm": 0.1804409623146057, "learning_rate": 0.0008227149877790852, "loss": 2.4753, "step": 434650 }, { "epoch": 0.8659393726890221, "grad_norm": 0.1716252863407135, "learning_rate": 0.0008225451176371979, "loss": 2.4841, "step": 434660 }, { "epoch": 0.865959294912661, "grad_norm": 0.19121302664279938, "learning_rate": 0.0008223752719987576, "loss": 2.4719, "step": 434670 }, { "epoch": 0.8659792171362999, "grad_norm": 0.16800439357757568, "learning_rate": 0.0008222054508531635, "loss": 2.464, "step": 434680 }, { "epoch": 0.8659991393599388, "grad_norm": 0.18854568898677826, "learning_rate": 0.0008220356541898224, "loss": 2.493, "step": 434690 }, { "epoch": 0.8660190615835777, "grad_norm": 0.20514114201068878, "learning_rate": 0.0008218658819981489, "loss": 2.4689, "step": 434700 }, { "epoch": 0.8660389838072167, "grad_norm": 0.18713605403900146, "learning_rate": 0.0008216961342675651, "loss": 2.4732, "step": 434710 }, { "epoch": 0.8660589060308556, "grad_norm": 0.18080371618270874, "learning_rate": 0.0008215264109875011, "loss": 2.4766, "step": 434720 }, { "epoch": 0.8660788282544944, "grad_norm": 0.1564732939004898, "learning_rate": 0.0008213567121473944, "loss": 2.4706, "step": 434730 }, { "epoch": 0.8660987504781333, "grad_norm": 0.19186672568321228, "learning_rate": 0.0008211870377366899, "loss": 2.4791, "step": 434740 }, { "epoch": 0.8661186727017722, "grad_norm": 0.1639876663684845, "learning_rate": 0.0008210173877448403, "loss": 2.4867, "step": 434750 }, { "epoch": 0.8661385949254112, "grad_norm": 0.17157725989818573, "learning_rate": 0.000820847762161306, "loss": 2.4676, "step": 434760 }, { "epoch": 0.8661585171490501, "grad_norm": 0.1752360314130783, "learning_rate": 0.0008206781609755544, "loss": 2.4906, "step": 434770 }, { "epoch": 0.866178439372689, "grad_norm": 0.19318652153015137, "learning_rate": 0.0008205085841770614, "loss": 2.4861, "step": 434780 }, { "epoch": 0.8661983615963279, "grad_norm": 0.18820244073867798, "learning_rate": 0.0008203390317553099, "loss": 2.4771, "step": 434790 }, { "epoch": 0.8662182838199668, "grad_norm": 0.1758875995874405, "learning_rate": 0.00082016950369979, "loss": 2.492, "step": 434800 }, { "epoch": 0.8662382060436058, "grad_norm": 0.16711020469665527, "learning_rate": 0.0008200000000000001, "loss": 2.4851, "step": 434810 }, { "epoch": 0.8662581282672447, "grad_norm": 0.1694469153881073, "learning_rate": 0.0008198305206454455, "loss": 2.5011, "step": 434820 }, { "epoch": 0.8662780504908836, "grad_norm": 0.1749519556760788, "learning_rate": 0.0008196610656256398, "loss": 2.4846, "step": 434830 }, { "epoch": 0.8662979727145225, "grad_norm": 0.20306028425693512, "learning_rate": 0.0008194916349301036, "loss": 2.476, "step": 434840 }, { "epoch": 0.8663178949381615, "grad_norm": 0.1599062830209732, "learning_rate": 0.0008193222285483648, "loss": 2.4755, "step": 434850 }, { "epoch": 0.8663378171618004, "grad_norm": 0.17951735854148865, "learning_rate": 0.0008191528464699591, "loss": 2.463, "step": 434860 }, { "epoch": 0.8663577393854393, "grad_norm": 0.17604243755340576, "learning_rate": 0.0008189834886844299, "loss": 2.4819, "step": 434870 }, { "epoch": 0.8663776616090781, "grad_norm": 0.18554450571537018, "learning_rate": 0.0008188141551813279, "loss": 2.4868, "step": 434880 }, { "epoch": 0.866397583832717, "grad_norm": 0.16011790931224823, "learning_rate": 0.0008186448459502113, "loss": 2.4786, "step": 434890 }, { "epoch": 0.866417506056356, "grad_norm": 0.15987086296081543, "learning_rate": 0.0008184755609806458, "loss": 2.4834, "step": 434900 }, { "epoch": 0.8664374282799949, "grad_norm": 0.18574978411197662, "learning_rate": 0.0008183063002622042, "loss": 2.4831, "step": 434910 }, { "epoch": 0.8664573505036338, "grad_norm": 0.1685452163219452, "learning_rate": 0.0008181370637844674, "loss": 2.4651, "step": 434920 }, { "epoch": 0.8664772727272727, "grad_norm": 0.1756986826658249, "learning_rate": 0.0008179678515370235, "loss": 2.4765, "step": 434930 }, { "epoch": 0.8664971949509116, "grad_norm": 0.20014184713363647, "learning_rate": 0.0008177986635094681, "loss": 2.4782, "step": 434940 }, { "epoch": 0.8665171171745506, "grad_norm": 0.17168794572353363, "learning_rate": 0.000817629499691404, "loss": 2.4846, "step": 434950 }, { "epoch": 0.8665370393981895, "grad_norm": 0.22398100793361664, "learning_rate": 0.0008174603600724413, "loss": 2.4733, "step": 434960 }, { "epoch": 0.8665569616218284, "grad_norm": 0.17524442076683044, "learning_rate": 0.0008172912446421985, "loss": 2.4898, "step": 434970 }, { "epoch": 0.8665768838454673, "grad_norm": 0.18283908069133759, "learning_rate": 0.0008171221533903003, "loss": 2.4892, "step": 434980 }, { "epoch": 0.8665968060691062, "grad_norm": 0.19491910934448242, "learning_rate": 0.0008169530863063798, "loss": 2.4775, "step": 434990 }, { "epoch": 0.8666167282927452, "grad_norm": 0.17712239921092987, "learning_rate": 0.0008167840433800769, "loss": 2.4749, "step": 435000 }, { "epoch": 0.866636650516384, "grad_norm": 0.18419380486011505, "learning_rate": 0.0008166150246010389, "loss": 2.4622, "step": 435010 }, { "epoch": 0.866656572740023, "grad_norm": 0.21520014107227325, "learning_rate": 0.0008164460299589207, "loss": 2.4825, "step": 435020 }, { "epoch": 0.8666764949636618, "grad_norm": 0.1937699317932129, "learning_rate": 0.0008162770594433848, "loss": 2.4874, "step": 435030 }, { "epoch": 0.8666964171873007, "grad_norm": 0.16736409068107605, "learning_rate": 0.0008161081130441006, "loss": 2.485, "step": 435040 }, { "epoch": 0.8667163394109397, "grad_norm": 0.19295908510684967, "learning_rate": 0.0008159391907507454, "loss": 2.4839, "step": 435050 }, { "epoch": 0.8667362616345786, "grad_norm": 0.16827115416526794, "learning_rate": 0.000815770292553003, "loss": 2.4811, "step": 435060 }, { "epoch": 0.8667561838582175, "grad_norm": 0.1669827103614807, "learning_rate": 0.0008156014184405656, "loss": 2.4838, "step": 435070 }, { "epoch": 0.8667761060818564, "grad_norm": 0.22817839682102203, "learning_rate": 0.0008154325684031323, "loss": 2.4713, "step": 435080 }, { "epoch": 0.8667960283054953, "grad_norm": 0.1782103180885315, "learning_rate": 0.0008152637424304092, "loss": 2.4807, "step": 435090 }, { "epoch": 0.8668159505291343, "grad_norm": 0.16638605296611786, "learning_rate": 0.0008150949405121102, "loss": 2.49, "step": 435100 }, { "epoch": 0.8668358727527732, "grad_norm": 0.193051278591156, "learning_rate": 0.000814926162637956, "loss": 2.4937, "step": 435110 }, { "epoch": 0.8668557949764121, "grad_norm": 0.1759132742881775, "learning_rate": 0.0008147574087976757, "loss": 2.4723, "step": 435120 }, { "epoch": 0.866875717200051, "grad_norm": 0.16992172598838806, "learning_rate": 0.0008145886789810044, "loss": 2.4738, "step": 435130 }, { "epoch": 0.86689563942369, "grad_norm": 0.18376941978931427, "learning_rate": 0.0008144199731776856, "loss": 2.4928, "step": 435140 }, { "epoch": 0.8669155616473289, "grad_norm": 0.17697350680828094, "learning_rate": 0.0008142512913774691, "loss": 2.4723, "step": 435150 }, { "epoch": 0.8669354838709677, "grad_norm": 0.21272721886634827, "learning_rate": 0.0008140826335701125, "loss": 2.4867, "step": 435160 }, { "epoch": 0.8669554060946066, "grad_norm": 0.17616520822048187, "learning_rate": 0.0008139139997453811, "loss": 2.485, "step": 435170 }, { "epoch": 0.8669753283182455, "grad_norm": 0.17645929753780365, "learning_rate": 0.0008137453898930467, "loss": 2.4657, "step": 435180 }, { "epoch": 0.8669952505418845, "grad_norm": 0.18815170228481293, "learning_rate": 0.0008135768040028888, "loss": 2.4793, "step": 435190 }, { "epoch": 0.8670151727655234, "grad_norm": 0.2346722036600113, "learning_rate": 0.000813408242064694, "loss": 2.4838, "step": 435200 }, { "epoch": 0.8670350949891623, "grad_norm": 0.1927759200334549, "learning_rate": 0.0008132397040682562, "loss": 2.494, "step": 435210 }, { "epoch": 0.8670550172128012, "grad_norm": 0.20367582142353058, "learning_rate": 0.0008130711900033769, "loss": 2.4856, "step": 435220 }, { "epoch": 0.8670749394364401, "grad_norm": 0.17392142117023468, "learning_rate": 0.0008129026998598641, "loss": 2.491, "step": 435230 }, { "epoch": 0.8670948616600791, "grad_norm": 0.19216036796569824, "learning_rate": 0.0008127342336275336, "loss": 2.4743, "step": 435240 }, { "epoch": 0.867114783883718, "grad_norm": 0.1777123510837555, "learning_rate": 0.0008125657912962084, "loss": 2.4712, "step": 435250 }, { "epoch": 0.8671347061073569, "grad_norm": 0.18314045667648315, "learning_rate": 0.0008123973728557182, "loss": 2.4754, "step": 435260 }, { "epoch": 0.8671546283309958, "grad_norm": 0.16371102631092072, "learning_rate": 0.0008122289782959007, "loss": 2.4704, "step": 435270 }, { "epoch": 0.8671745505546347, "grad_norm": 0.1587846875190735, "learning_rate": 0.0008120606076066002, "loss": 2.4687, "step": 435280 }, { "epoch": 0.8671944727782737, "grad_norm": 0.17906801402568817, "learning_rate": 0.0008118922607776684, "loss": 2.4729, "step": 435290 }, { "epoch": 0.8672143950019126, "grad_norm": 0.1854005604982376, "learning_rate": 0.0008117239377989642, "loss": 2.476, "step": 435300 }, { "epoch": 0.8672343172255514, "grad_norm": 0.17665351927280426, "learning_rate": 0.0008115556386603535, "loss": 2.4788, "step": 435310 }, { "epoch": 0.8672542394491903, "grad_norm": 0.17880307137966156, "learning_rate": 0.0008113873633517099, "loss": 2.487, "step": 435320 }, { "epoch": 0.8672741616728292, "grad_norm": 0.18928003311157227, "learning_rate": 0.0008112191118629135, "loss": 2.4688, "step": 435330 }, { "epoch": 0.8672940838964682, "grad_norm": 0.19179311394691467, "learning_rate": 0.000811050884183852, "loss": 2.4786, "step": 435340 }, { "epoch": 0.8673140061201071, "grad_norm": 0.18232974410057068, "learning_rate": 0.0008108826803044201, "loss": 2.4768, "step": 435350 }, { "epoch": 0.867333928343746, "grad_norm": 0.17147985100746155, "learning_rate": 0.0008107145002145196, "loss": 2.4646, "step": 435360 }, { "epoch": 0.8673538505673849, "grad_norm": 0.1703101396560669, "learning_rate": 0.0008105463439040594, "loss": 2.4816, "step": 435370 }, { "epoch": 0.8673737727910238, "grad_norm": 0.2189209759235382, "learning_rate": 0.0008103782113629558, "loss": 2.4785, "step": 435380 }, { "epoch": 0.8673936950146628, "grad_norm": 0.18601520359516144, "learning_rate": 0.0008102101025811322, "loss": 2.4605, "step": 435390 }, { "epoch": 0.8674136172383017, "grad_norm": 0.17110659182071686, "learning_rate": 0.0008100420175485188, "loss": 2.4767, "step": 435400 }, { "epoch": 0.8674335394619406, "grad_norm": 0.17732961475849152, "learning_rate": 0.0008098739562550528, "loss": 2.4842, "step": 435410 }, { "epoch": 0.8674534616855795, "grad_norm": 0.2009115070104599, "learning_rate": 0.0008097059186906792, "loss": 2.488, "step": 435420 }, { "epoch": 0.8674733839092185, "grad_norm": 0.19195546209812164, "learning_rate": 0.0008095379048453495, "loss": 2.471, "step": 435430 }, { "epoch": 0.8674933061328574, "grad_norm": 0.1785440444946289, "learning_rate": 0.0008093699147090228, "loss": 2.4758, "step": 435440 }, { "epoch": 0.8675132283564962, "grad_norm": 0.2003220021724701, "learning_rate": 0.0008092019482716644, "loss": 2.4724, "step": 435450 }, { "epoch": 0.8675331505801351, "grad_norm": 0.17181122303009033, "learning_rate": 0.0008090340055232475, "loss": 2.4554, "step": 435460 }, { "epoch": 0.867553072803774, "grad_norm": 0.1785402148962021, "learning_rate": 0.0008088660864537522, "loss": 2.4747, "step": 435470 }, { "epoch": 0.867572995027413, "grad_norm": 0.1745413988828659, "learning_rate": 0.0008086981910531656, "loss": 2.4906, "step": 435480 }, { "epoch": 0.8675929172510519, "grad_norm": 0.20506823062896729, "learning_rate": 0.0008085303193114817, "loss": 2.4847, "step": 435490 }, { "epoch": 0.8676128394746908, "grad_norm": 0.16464929282665253, "learning_rate": 0.0008083624712187016, "loss": 2.4762, "step": 435500 }, { "epoch": 0.8676327616983297, "grad_norm": 0.1922905445098877, "learning_rate": 0.0008081946467648336, "loss": 2.4798, "step": 435510 }, { "epoch": 0.8676526839219686, "grad_norm": 0.18800348043441772, "learning_rate": 0.0008080268459398928, "loss": 2.4625, "step": 435520 }, { "epoch": 0.8676726061456076, "grad_norm": 0.16969209909439087, "learning_rate": 0.0008078590687339018, "loss": 2.4796, "step": 435530 }, { "epoch": 0.8676925283692465, "grad_norm": 0.17531131207942963, "learning_rate": 0.0008076913151368896, "loss": 2.4852, "step": 435540 }, { "epoch": 0.8677124505928854, "grad_norm": 0.20177015662193298, "learning_rate": 0.0008075235851388927, "loss": 2.484, "step": 435550 }, { "epoch": 0.8677323728165243, "grad_norm": 0.19798588752746582, "learning_rate": 0.000807355878729954, "loss": 2.4743, "step": 435560 }, { "epoch": 0.8677522950401632, "grad_norm": 0.17818696796894073, "learning_rate": 0.0008071881959001244, "loss": 2.4643, "step": 435570 }, { "epoch": 0.8677722172638022, "grad_norm": 0.1921335756778717, "learning_rate": 0.0008070205366394609, "loss": 2.4779, "step": 435580 }, { "epoch": 0.867792139487441, "grad_norm": 0.18437163531780243, "learning_rate": 0.0008068529009380277, "loss": 2.481, "step": 435590 }, { "epoch": 0.8678120617110799, "grad_norm": 0.1735006421804428, "learning_rate": 0.0008066852887858961, "loss": 2.4819, "step": 435600 }, { "epoch": 0.8678319839347188, "grad_norm": 0.19843290746212006, "learning_rate": 0.0008065177001731445, "loss": 2.4686, "step": 435610 }, { "epoch": 0.8678519061583577, "grad_norm": 0.177099347114563, "learning_rate": 0.0008063501350898581, "loss": 2.4766, "step": 435620 }, { "epoch": 0.8678718283819967, "grad_norm": 0.20486341416835785, "learning_rate": 0.0008061825935261289, "loss": 2.4763, "step": 435630 }, { "epoch": 0.8678917506056356, "grad_norm": 0.17676734924316406, "learning_rate": 0.0008060150754720561, "loss": 2.4732, "step": 435640 }, { "epoch": 0.8679116728292745, "grad_norm": 0.20182310044765472, "learning_rate": 0.0008058475809177457, "loss": 2.4869, "step": 435650 }, { "epoch": 0.8679315950529134, "grad_norm": 0.19660192728042603, "learning_rate": 0.0008056801098533108, "loss": 2.4668, "step": 435660 }, { "epoch": 0.8679515172765523, "grad_norm": 0.17572809755802155, "learning_rate": 0.0008055126622688711, "loss": 2.4872, "step": 435670 }, { "epoch": 0.8679714395001913, "grad_norm": 0.18454498052597046, "learning_rate": 0.0008053452381545536, "loss": 2.4872, "step": 435680 }, { "epoch": 0.8679913617238302, "grad_norm": 0.1873282939195633, "learning_rate": 0.0008051778375004924, "loss": 2.4988, "step": 435690 }, { "epoch": 0.8680112839474691, "grad_norm": 0.17717429995536804, "learning_rate": 0.0008050104602968276, "loss": 2.4827, "step": 435700 }, { "epoch": 0.868031206171108, "grad_norm": 0.19051219522953033, "learning_rate": 0.0008048431065337072, "loss": 2.4734, "step": 435710 }, { "epoch": 0.868051128394747, "grad_norm": 0.17133529484272003, "learning_rate": 0.0008046757762012852, "loss": 2.4923, "step": 435720 }, { "epoch": 0.8680710506183859, "grad_norm": 0.19897125661373138, "learning_rate": 0.0008045084692897235, "loss": 2.4745, "step": 435730 }, { "epoch": 0.8680909728420247, "grad_norm": 0.19426210224628448, "learning_rate": 0.0008043411857891902, "loss": 2.4656, "step": 435740 }, { "epoch": 0.8681108950656636, "grad_norm": 0.18045444786548615, "learning_rate": 0.0008041739256898601, "loss": 2.4744, "step": 435750 }, { "epoch": 0.8681308172893025, "grad_norm": 0.1707787960767746, "learning_rate": 0.0008040066889819158, "loss": 2.4618, "step": 435760 }, { "epoch": 0.8681507395129415, "grad_norm": 0.2079138308763504, "learning_rate": 0.0008038394756555456, "loss": 2.4673, "step": 435770 }, { "epoch": 0.8681706617365804, "grad_norm": 0.20965750515460968, "learning_rate": 0.0008036722857009455, "loss": 2.4811, "step": 435780 }, { "epoch": 0.8681905839602193, "grad_norm": 0.20526428520679474, "learning_rate": 0.0008035051191083183, "loss": 2.4829, "step": 435790 }, { "epoch": 0.8682105061838582, "grad_norm": 0.17907944321632385, "learning_rate": 0.0008033379758678727, "loss": 2.4825, "step": 435800 }, { "epoch": 0.8682304284074971, "grad_norm": 0.2492135763168335, "learning_rate": 0.0008031708559698257, "loss": 2.4886, "step": 435810 }, { "epoch": 0.8682503506311361, "grad_norm": 0.17386768758296967, "learning_rate": 0.0008030037594043998, "loss": 2.4844, "step": 435820 }, { "epoch": 0.868270272854775, "grad_norm": 0.19615794718265533, "learning_rate": 0.0008028366861618253, "loss": 2.4872, "step": 435830 }, { "epoch": 0.8682901950784139, "grad_norm": 0.18818950653076172, "learning_rate": 0.0008026696362323387, "loss": 2.4653, "step": 435840 }, { "epoch": 0.8683101173020528, "grad_norm": 0.1671384572982788, "learning_rate": 0.0008025026096061839, "loss": 2.4561, "step": 435850 }, { "epoch": 0.8683300395256917, "grad_norm": 0.15854966640472412, "learning_rate": 0.0008023356062736107, "loss": 2.4778, "step": 435860 }, { "epoch": 0.8683499617493307, "grad_norm": 0.18603533506393433, "learning_rate": 0.0008021686262248764, "loss": 2.4721, "step": 435870 }, { "epoch": 0.8683698839729695, "grad_norm": 0.17123223841190338, "learning_rate": 0.000802001669450245, "loss": 2.4746, "step": 435880 }, { "epoch": 0.8683898061966084, "grad_norm": 0.17058154940605164, "learning_rate": 0.0008018347359399873, "loss": 2.4723, "step": 435890 }, { "epoch": 0.8684097284202473, "grad_norm": 0.16285128891468048, "learning_rate": 0.0008016678256843806, "loss": 2.4517, "step": 435900 }, { "epoch": 0.8684296506438862, "grad_norm": 0.16541454195976257, "learning_rate": 0.0008015009386737093, "loss": 2.4705, "step": 435910 }, { "epoch": 0.8684495728675252, "grad_norm": 0.17973443865776062, "learning_rate": 0.0008013340748982642, "loss": 2.4784, "step": 435920 }, { "epoch": 0.8684694950911641, "grad_norm": 0.16358859837055206, "learning_rate": 0.0008011672343483433, "loss": 2.4909, "step": 435930 }, { "epoch": 0.868489417314803, "grad_norm": 0.18347588181495667, "learning_rate": 0.0008010004170142511, "loss": 2.4832, "step": 435940 }, { "epoch": 0.8685093395384419, "grad_norm": 0.1808967888355255, "learning_rate": 0.0008008336228862987, "loss": 2.4775, "step": 435950 }, { "epoch": 0.8685292617620808, "grad_norm": 0.17639288306236267, "learning_rate": 0.0008006668519548041, "loss": 2.4773, "step": 435960 }, { "epoch": 0.8685491839857198, "grad_norm": 0.16534006595611572, "learning_rate": 0.0008005001042100922, "loss": 2.4695, "step": 435970 }, { "epoch": 0.8685691062093587, "grad_norm": 0.20476286113262177, "learning_rate": 0.0008003333796424941, "loss": 2.4641, "step": 435980 }, { "epoch": 0.8685890284329976, "grad_norm": 0.19545595347881317, "learning_rate": 0.0008001666782423484, "loss": 2.4634, "step": 435990 }, { "epoch": 0.8686089506566365, "grad_norm": 0.19167588651180267, "learning_rate": 0.0008, "loss": 2.4752, "step": 436000 }, { "epoch": 0.8686288728802755, "grad_norm": 0.17068591713905334, "learning_rate": 0.0007998333449058002, "loss": 2.4647, "step": 436010 }, { "epoch": 0.8686487951039144, "grad_norm": 0.18188373744487762, "learning_rate": 0.0007996667129501074, "loss": 2.4849, "step": 436020 }, { "epoch": 0.8686687173275532, "grad_norm": 0.19871006906032562, "learning_rate": 0.0007995001041232866, "loss": 2.4634, "step": 436030 }, { "epoch": 0.8686886395511921, "grad_norm": 0.19095689058303833, "learning_rate": 0.0007993335184157093, "loss": 2.4693, "step": 436040 }, { "epoch": 0.868708561774831, "grad_norm": 0.17268981039524078, "learning_rate": 0.0007991669558177541, "loss": 2.4716, "step": 436050 }, { "epoch": 0.86872848399847, "grad_norm": 0.18378378450870514, "learning_rate": 0.0007990004163198057, "loss": 2.4888, "step": 436060 }, { "epoch": 0.8687484062221089, "grad_norm": 0.16897399723529816, "learning_rate": 0.0007988338999122561, "loss": 2.4833, "step": 436070 }, { "epoch": 0.8687683284457478, "grad_norm": 0.18778398633003235, "learning_rate": 0.0007986674065855035, "loss": 2.4802, "step": 436080 }, { "epoch": 0.8687882506693867, "grad_norm": 0.17110133171081543, "learning_rate": 0.0007985009363299529, "loss": 2.4809, "step": 436090 }, { "epoch": 0.8688081728930256, "grad_norm": 0.17580492794513702, "learning_rate": 0.0007983344891360158, "loss": 2.4847, "step": 436100 }, { "epoch": 0.8688280951166646, "grad_norm": 0.18188297748565674, "learning_rate": 0.0007981680649941107, "loss": 2.4691, "step": 436110 }, { "epoch": 0.8688480173403035, "grad_norm": 0.17731529474258423, "learning_rate": 0.0007980016638946623, "loss": 2.4808, "step": 436120 }, { "epoch": 0.8688679395639424, "grad_norm": 0.16379094123840332, "learning_rate": 0.0007978352858281026, "loss": 2.4606, "step": 436130 }, { "epoch": 0.8688878617875813, "grad_norm": 0.1884004771709442, "learning_rate": 0.0007976689307848691, "loss": 2.4899, "step": 436140 }, { "epoch": 0.8689077840112202, "grad_norm": 0.23214960098266602, "learning_rate": 0.000797502598755407, "loss": 2.4635, "step": 436150 }, { "epoch": 0.8689277062348592, "grad_norm": 0.1692793220281601, "learning_rate": 0.0007973362897301674, "loss": 2.4777, "step": 436160 }, { "epoch": 0.868947628458498, "grad_norm": 0.16153937578201294, "learning_rate": 0.0007971700036996084, "loss": 2.4729, "step": 436170 }, { "epoch": 0.8689675506821369, "grad_norm": 0.1651514768600464, "learning_rate": 0.0007970037406541948, "loss": 2.4466, "step": 436180 }, { "epoch": 0.8689874729057758, "grad_norm": 0.1708892285823822, "learning_rate": 0.0007968375005843973, "loss": 2.4742, "step": 436190 }, { "epoch": 0.8690073951294147, "grad_norm": 0.19262701272964478, "learning_rate": 0.0007966712834806942, "loss": 2.4701, "step": 436200 }, { "epoch": 0.8690273173530537, "grad_norm": 0.191444993019104, "learning_rate": 0.0007965050893335695, "loss": 2.4707, "step": 436210 }, { "epoch": 0.8690472395766926, "grad_norm": 0.17915736138820648, "learning_rate": 0.0007963389181335137, "loss": 2.4766, "step": 436220 }, { "epoch": 0.8690671618003315, "grad_norm": 0.18233488500118256, "learning_rate": 0.000796172769871025, "loss": 2.4796, "step": 436230 }, { "epoch": 0.8690870840239704, "grad_norm": 0.2030743807554245, "learning_rate": 0.0007960066445366072, "loss": 2.4587, "step": 436240 }, { "epoch": 0.8691070062476093, "grad_norm": 0.18317607045173645, "learning_rate": 0.0007958405421207704, "loss": 2.4674, "step": 436250 }, { "epoch": 0.8691269284712483, "grad_norm": 0.17863790690898895, "learning_rate": 0.0007956744626140322, "loss": 2.4815, "step": 436260 }, { "epoch": 0.8691468506948872, "grad_norm": 0.17605839669704437, "learning_rate": 0.0007955084060069162, "loss": 2.4541, "step": 436270 }, { "epoch": 0.8691667729185261, "grad_norm": 0.1910417526960373, "learning_rate": 0.0007953423722899522, "loss": 2.4716, "step": 436280 }, { "epoch": 0.869186695142165, "grad_norm": 0.18085770308971405, "learning_rate": 0.0007951763614536773, "loss": 2.4871, "step": 436290 }, { "epoch": 0.8692066173658038, "grad_norm": 0.1801871359348297, "learning_rate": 0.0007950103734886345, "loss": 2.4706, "step": 436300 }, { "epoch": 0.8692265395894428, "grad_norm": 0.19016262888908386, "learning_rate": 0.0007948444083853736, "loss": 2.4847, "step": 436310 }, { "epoch": 0.8692464618130817, "grad_norm": 0.17374926805496216, "learning_rate": 0.0007946784661344509, "loss": 2.4881, "step": 436320 }, { "epoch": 0.8692663840367206, "grad_norm": 0.18572762608528137, "learning_rate": 0.0007945125467264289, "loss": 2.4853, "step": 436330 }, { "epoch": 0.8692863062603595, "grad_norm": 0.17514093220233917, "learning_rate": 0.0007943466501518772, "loss": 2.4776, "step": 436340 }, { "epoch": 0.8693062284839985, "grad_norm": 0.176726832985878, "learning_rate": 0.0007941807764013713, "loss": 2.4817, "step": 436350 }, { "epoch": 0.8693261507076374, "grad_norm": 0.17816825211048126, "learning_rate": 0.0007940149254654931, "loss": 2.478, "step": 436360 }, { "epoch": 0.8693460729312763, "grad_norm": 0.16659337282180786, "learning_rate": 0.0007938490973348318, "loss": 2.4899, "step": 436370 }, { "epoch": 0.8693659951549152, "grad_norm": 0.175531804561615, "learning_rate": 0.0007936832919999824, "loss": 2.4701, "step": 436380 }, { "epoch": 0.8693859173785541, "grad_norm": 0.16667765378952026, "learning_rate": 0.0007935175094515461, "loss": 2.4679, "step": 436390 }, { "epoch": 0.8694058396021931, "grad_norm": 0.17400547862052917, "learning_rate": 0.0007933517496801314, "loss": 2.481, "step": 436400 }, { "epoch": 0.869425761825832, "grad_norm": 0.17066839337348938, "learning_rate": 0.0007931860126763528, "loss": 2.4749, "step": 436410 }, { "epoch": 0.8694456840494709, "grad_norm": 0.1889006644487381, "learning_rate": 0.000793020298430831, "loss": 2.4774, "step": 436420 }, { "epoch": 0.8694656062731098, "grad_norm": 0.18237003684043884, "learning_rate": 0.0007928546069341937, "loss": 2.4775, "step": 436430 }, { "epoch": 0.8694855284967486, "grad_norm": 0.20538212358951569, "learning_rate": 0.0007926889381770746, "loss": 2.474, "step": 436440 }, { "epoch": 0.8695054507203877, "grad_norm": 0.20437777042388916, "learning_rate": 0.0007925232921501135, "loss": 2.4822, "step": 436450 }, { "epoch": 0.8695253729440265, "grad_norm": 0.17767713963985443, "learning_rate": 0.0007923576688439578, "loss": 2.4774, "step": 436460 }, { "epoch": 0.8695452951676654, "grad_norm": 0.19203928112983704, "learning_rate": 0.00079219206824926, "loss": 2.4773, "step": 436470 }, { "epoch": 0.8695652173913043, "grad_norm": 0.18131281435489655, "learning_rate": 0.0007920264903566801, "loss": 2.4679, "step": 436480 }, { "epoch": 0.8695851396149432, "grad_norm": 0.16566483676433563, "learning_rate": 0.0007918609351568835, "loss": 2.473, "step": 436490 }, { "epoch": 0.8696050618385822, "grad_norm": 0.17927198112010956, "learning_rate": 0.0007916954026405428, "loss": 2.4675, "step": 436500 }, { "epoch": 0.8696249840622211, "grad_norm": 0.16496607661247253, "learning_rate": 0.0007915298927983366, "loss": 2.4647, "step": 436510 }, { "epoch": 0.86964490628586, "grad_norm": 0.18269598484039307, "learning_rate": 0.0007913644056209497, "loss": 2.4779, "step": 436520 }, { "epoch": 0.8696648285094989, "grad_norm": 0.19585339725017548, "learning_rate": 0.0007911989410990741, "loss": 2.4803, "step": 436530 }, { "epoch": 0.8696847507331378, "grad_norm": 0.19819381833076477, "learning_rate": 0.000791033499223407, "loss": 2.4747, "step": 436540 }, { "epoch": 0.8697046729567768, "grad_norm": 0.15244732797145844, "learning_rate": 0.0007908680799846528, "loss": 2.4777, "step": 436550 }, { "epoch": 0.8697245951804157, "grad_norm": 0.18925988674163818, "learning_rate": 0.0007907026833735221, "loss": 2.4757, "step": 436560 }, { "epoch": 0.8697445174040546, "grad_norm": 0.1765671819448471, "learning_rate": 0.0007905373093807315, "loss": 2.4807, "step": 436570 }, { "epoch": 0.8697644396276935, "grad_norm": 0.18054182827472687, "learning_rate": 0.0007903719579970047, "loss": 2.4777, "step": 436580 }, { "epoch": 0.8697843618513323, "grad_norm": 0.18113726377487183, "learning_rate": 0.000790206629213071, "loss": 2.486, "step": 436590 }, { "epoch": 0.8698042840749713, "grad_norm": 0.2263946235179901, "learning_rate": 0.0007900413230196661, "loss": 2.4676, "step": 436600 }, { "epoch": 0.8698242062986102, "grad_norm": 0.17834243178367615, "learning_rate": 0.0007898760394075324, "loss": 2.4829, "step": 436610 }, { "epoch": 0.8698441285222491, "grad_norm": 0.17605197429656982, "learning_rate": 0.0007897107783674185, "loss": 2.4682, "step": 436620 }, { "epoch": 0.869864050745888, "grad_norm": 0.1762782335281372, "learning_rate": 0.000789545539890079, "loss": 2.4828, "step": 436630 }, { "epoch": 0.869883972969527, "grad_norm": 0.1806858628988266, "learning_rate": 0.0007893803239662756, "loss": 2.477, "step": 436640 }, { "epoch": 0.8699038951931659, "grad_norm": 0.17838628590106964, "learning_rate": 0.0007892151305867752, "loss": 2.4705, "step": 436650 }, { "epoch": 0.8699238174168048, "grad_norm": 0.1670668125152588, "learning_rate": 0.0007890499597423517, "loss": 2.4894, "step": 436660 }, { "epoch": 0.8699437396404437, "grad_norm": 0.18526549637317657, "learning_rate": 0.0007888848114237852, "loss": 2.4709, "step": 436670 }, { "epoch": 0.8699636618640826, "grad_norm": 0.188874289393425, "learning_rate": 0.0007887196856218624, "loss": 2.4761, "step": 436680 }, { "epoch": 0.8699835840877216, "grad_norm": 0.1894499808549881, "learning_rate": 0.0007885545823273755, "loss": 2.4603, "step": 436690 }, { "epoch": 0.8700035063113605, "grad_norm": 0.18878766894340515, "learning_rate": 0.0007883895015311233, "loss": 2.4709, "step": 436700 }, { "epoch": 0.8700234285349994, "grad_norm": 0.17617188394069672, "learning_rate": 0.0007882244432239112, "loss": 2.4881, "step": 436710 }, { "epoch": 0.8700433507586383, "grad_norm": 0.17042696475982666, "learning_rate": 0.0007880594073965505, "loss": 2.4672, "step": 436720 }, { "epoch": 0.8700632729822771, "grad_norm": 0.20741309225559235, "learning_rate": 0.0007878943940398593, "loss": 2.4726, "step": 436730 }, { "epoch": 0.8700831952059161, "grad_norm": 0.1895836889743805, "learning_rate": 0.0007877294031446609, "loss": 2.4655, "step": 436740 }, { "epoch": 0.870103117429555, "grad_norm": 0.18983827531337738, "learning_rate": 0.0007875644347017859, "loss": 2.4702, "step": 436750 }, { "epoch": 0.8701230396531939, "grad_norm": 0.17618852853775024, "learning_rate": 0.0007873994887020706, "loss": 2.4762, "step": 436760 }, { "epoch": 0.8701429618768328, "grad_norm": 0.18020814657211304, "learning_rate": 0.0007872345651363575, "loss": 2.4769, "step": 436770 }, { "epoch": 0.8701628841004717, "grad_norm": 0.22066156566143036, "learning_rate": 0.0007870696639954955, "loss": 2.4747, "step": 436780 }, { "epoch": 0.8701828063241107, "grad_norm": 0.20417281985282898, "learning_rate": 0.0007869047852703399, "loss": 2.4918, "step": 436790 }, { "epoch": 0.8702027285477496, "grad_norm": 0.18530024588108063, "learning_rate": 0.0007867399289517518, "loss": 2.4634, "step": 436800 }, { "epoch": 0.8702226507713885, "grad_norm": 0.18354558944702148, "learning_rate": 0.000786575095030599, "loss": 2.4832, "step": 436810 }, { "epoch": 0.8702425729950274, "grad_norm": 0.19849510490894318, "learning_rate": 0.0007864102834977547, "loss": 2.4867, "step": 436820 }, { "epoch": 0.8702624952186663, "grad_norm": 0.17728428542613983, "learning_rate": 0.0007862454943440993, "loss": 2.4873, "step": 436830 }, { "epoch": 0.8702824174423053, "grad_norm": 0.20007985830307007, "learning_rate": 0.0007860807275605186, "loss": 2.4788, "step": 436840 }, { "epoch": 0.8703023396659442, "grad_norm": 0.17829062044620514, "learning_rate": 0.0007859159831379051, "loss": 2.4756, "step": 436850 }, { "epoch": 0.8703222618895831, "grad_norm": 0.17015552520751953, "learning_rate": 0.0007857512610671569, "loss": 2.465, "step": 436860 }, { "epoch": 0.870342184113222, "grad_norm": 0.19259561598300934, "learning_rate": 0.0007855865613391788, "loss": 2.4696, "step": 436870 }, { "epoch": 0.8703621063368608, "grad_norm": 0.1957891434431076, "learning_rate": 0.0007854218839448819, "loss": 2.4844, "step": 436880 }, { "epoch": 0.8703820285604998, "grad_norm": 0.19518610835075378, "learning_rate": 0.000785257228875183, "loss": 2.4744, "step": 436890 }, { "epoch": 0.8704019507841387, "grad_norm": 0.18415473401546478, "learning_rate": 0.0007850925961210047, "loss": 2.4723, "step": 436900 }, { "epoch": 0.8704218730077776, "grad_norm": 0.20798109471797943, "learning_rate": 0.0007849279856732772, "loss": 2.4898, "step": 436910 }, { "epoch": 0.8704417952314165, "grad_norm": 0.2200450450181961, "learning_rate": 0.0007847633975229351, "loss": 2.464, "step": 436920 }, { "epoch": 0.8704617174550555, "grad_norm": 0.1757732331752777, "learning_rate": 0.0007845988316609203, "loss": 2.4743, "step": 436930 }, { "epoch": 0.8704816396786944, "grad_norm": 0.19906578958034515, "learning_rate": 0.0007844342880781805, "loss": 2.4847, "step": 436940 }, { "epoch": 0.8705015619023333, "grad_norm": 0.16130895912647247, "learning_rate": 0.0007842697667656693, "loss": 2.481, "step": 436950 }, { "epoch": 0.8705214841259722, "grad_norm": 0.16575410962104797, "learning_rate": 0.000784105267714347, "loss": 2.4768, "step": 436960 }, { "epoch": 0.8705414063496111, "grad_norm": 0.16649435460567474, "learning_rate": 0.0007839407909151793, "loss": 2.4762, "step": 436970 }, { "epoch": 0.8705613285732501, "grad_norm": 0.16089975833892822, "learning_rate": 0.0007837763363591382, "loss": 2.4807, "step": 436980 }, { "epoch": 0.870581250796889, "grad_norm": 0.16841192543506622, "learning_rate": 0.0007836119040372025, "loss": 2.4846, "step": 436990 }, { "epoch": 0.8706011730205279, "grad_norm": 0.16355948150157928, "learning_rate": 0.0007834474939403562, "loss": 2.4678, "step": 437000 }, { "epoch": 0.8706210952441668, "grad_norm": 0.1887141764163971, "learning_rate": 0.0007832831060595896, "loss": 2.4789, "step": 437010 }, { "epoch": 0.8706410174678056, "grad_norm": 0.1840386539697647, "learning_rate": 0.0007831187403858995, "loss": 2.4887, "step": 437020 }, { "epoch": 0.8706609396914446, "grad_norm": 0.17919225990772247, "learning_rate": 0.0007829543969102884, "loss": 2.4774, "step": 437030 }, { "epoch": 0.8706808619150835, "grad_norm": 0.1904146373271942, "learning_rate": 0.000782790075623765, "loss": 2.4802, "step": 437040 }, { "epoch": 0.8707007841387224, "grad_norm": 0.20286624133586884, "learning_rate": 0.0007826257765173439, "loss": 2.4762, "step": 437050 }, { "epoch": 0.8707207063623613, "grad_norm": 0.17825140058994293, "learning_rate": 0.000782461499582046, "loss": 2.4645, "step": 437060 }, { "epoch": 0.8707406285860002, "grad_norm": 0.1713242083787918, "learning_rate": 0.0007822972448088986, "loss": 2.4666, "step": 437070 }, { "epoch": 0.8707605508096392, "grad_norm": 0.17782293260097504, "learning_rate": 0.0007821330121889336, "loss": 2.4726, "step": 437080 }, { "epoch": 0.8707804730332781, "grad_norm": 0.1907271146774292, "learning_rate": 0.0007819688017131909, "loss": 2.4837, "step": 437090 }, { "epoch": 0.870800395256917, "grad_norm": 0.17730072140693665, "learning_rate": 0.0007818046133727152, "loss": 2.4658, "step": 437100 }, { "epoch": 0.8708203174805559, "grad_norm": 0.17462068796157837, "learning_rate": 0.0007816404471585575, "loss": 2.4644, "step": 437110 }, { "epoch": 0.8708402397041948, "grad_norm": 0.17912255227565765, "learning_rate": 0.0007814763030617746, "loss": 2.4743, "step": 437120 }, { "epoch": 0.8708601619278338, "grad_norm": 0.18590426445007324, "learning_rate": 0.0007813121810734301, "loss": 2.4744, "step": 437130 }, { "epoch": 0.8708800841514727, "grad_norm": 0.1781928539276123, "learning_rate": 0.0007811480811845927, "loss": 2.472, "step": 437140 }, { "epoch": 0.8709000063751116, "grad_norm": 0.18742048740386963, "learning_rate": 0.0007809840033863378, "loss": 2.4646, "step": 437150 }, { "epoch": 0.8709199285987504, "grad_norm": 0.1890387386083603, "learning_rate": 0.0007808199476697464, "loss": 2.4871, "step": 437160 }, { "epoch": 0.8709398508223893, "grad_norm": 0.1768869012594223, "learning_rate": 0.0007806559140259055, "loss": 2.4771, "step": 437170 }, { "epoch": 0.8709597730460283, "grad_norm": 0.1707269847393036, "learning_rate": 0.0007804919024459083, "loss": 2.469, "step": 437180 }, { "epoch": 0.8709796952696672, "grad_norm": 0.15951187908649445, "learning_rate": 0.0007803279129208541, "loss": 2.484, "step": 437190 }, { "epoch": 0.8709996174933061, "grad_norm": 0.19618485867977142, "learning_rate": 0.0007801639454418475, "loss": 2.4872, "step": 437200 }, { "epoch": 0.871019539716945, "grad_norm": 0.20216509699821472, "learning_rate": 0.0007800000000000001, "loss": 2.4704, "step": 437210 }, { "epoch": 0.871039461940584, "grad_norm": 0.19367557764053345, "learning_rate": 0.0007798360765864285, "loss": 2.4853, "step": 437220 }, { "epoch": 0.8710593841642229, "grad_norm": 0.1791018843650818, "learning_rate": 0.000779672175192256, "loss": 2.4852, "step": 437230 }, { "epoch": 0.8710793063878618, "grad_norm": 0.16923663020133972, "learning_rate": 0.0007795082958086115, "loss": 2.4708, "step": 437240 }, { "epoch": 0.8710992286115007, "grad_norm": 0.1631775051355362, "learning_rate": 0.0007793444384266297, "loss": 2.4774, "step": 437250 }, { "epoch": 0.8711191508351396, "grad_norm": 0.1886972039937973, "learning_rate": 0.0007791806030374518, "loss": 2.4757, "step": 437260 }, { "epoch": 0.8711390730587786, "grad_norm": 0.1943555623292923, "learning_rate": 0.0007790167896322244, "loss": 2.4788, "step": 437270 }, { "epoch": 0.8711589952824175, "grad_norm": 0.22415168583393097, "learning_rate": 0.0007788529982021002, "loss": 2.4766, "step": 437280 }, { "epoch": 0.8711789175060564, "grad_norm": 0.18408143520355225, "learning_rate": 0.0007786892287382379, "loss": 2.4685, "step": 437290 }, { "epoch": 0.8711988397296953, "grad_norm": 0.17224130034446716, "learning_rate": 0.0007785254812318023, "loss": 2.4642, "step": 437300 }, { "epoch": 0.8712187619533341, "grad_norm": 0.1588156819343567, "learning_rate": 0.0007783617556739639, "loss": 2.4855, "step": 437310 }, { "epoch": 0.8712386841769731, "grad_norm": 0.17532502114772797, "learning_rate": 0.0007781980520558989, "loss": 2.4694, "step": 437320 }, { "epoch": 0.871258606400612, "grad_norm": 0.17435967922210693, "learning_rate": 0.0007780343703687898, "loss": 2.4646, "step": 437330 }, { "epoch": 0.8712785286242509, "grad_norm": 0.18006835877895355, "learning_rate": 0.000777870710603825, "loss": 2.4786, "step": 437340 }, { "epoch": 0.8712984508478898, "grad_norm": 0.19998496770858765, "learning_rate": 0.0007777070727521982, "loss": 2.4774, "step": 437350 }, { "epoch": 0.8713183730715287, "grad_norm": 0.16636347770690918, "learning_rate": 0.00077754345680511, "loss": 2.4873, "step": 437360 }, { "epoch": 0.8713382952951677, "grad_norm": 0.17497454583644867, "learning_rate": 0.000777379862753766, "loss": 2.4662, "step": 437370 }, { "epoch": 0.8713582175188066, "grad_norm": 0.18861916661262512, "learning_rate": 0.0007772162905893783, "loss": 2.4761, "step": 437380 }, { "epoch": 0.8713781397424455, "grad_norm": 0.18594245612621307, "learning_rate": 0.0007770527403031642, "loss": 2.464, "step": 437390 }, { "epoch": 0.8713980619660844, "grad_norm": 0.2043626457452774, "learning_rate": 0.0007768892118863476, "loss": 2.4784, "step": 437400 }, { "epoch": 0.8714179841897233, "grad_norm": 0.17899253964424133, "learning_rate": 0.0007767257053301577, "loss": 2.4771, "step": 437410 }, { "epoch": 0.8714379064133623, "grad_norm": 0.20770150423049927, "learning_rate": 0.0007765622206258303, "loss": 2.4641, "step": 437420 }, { "epoch": 0.8714578286370012, "grad_norm": 0.16878798604011536, "learning_rate": 0.0007763987577646057, "loss": 2.4779, "step": 437430 }, { "epoch": 0.87147775086064, "grad_norm": 0.16640852391719818, "learning_rate": 0.0007762353167377316, "loss": 2.4723, "step": 437440 }, { "epoch": 0.871497673084279, "grad_norm": 0.2095128446817398, "learning_rate": 0.0007760718975364607, "loss": 2.4676, "step": 437450 }, { "epoch": 0.8715175953079178, "grad_norm": 0.18900740146636963, "learning_rate": 0.0007759085001520516, "loss": 2.4645, "step": 437460 }, { "epoch": 0.8715375175315568, "grad_norm": 0.1727815866470337, "learning_rate": 0.0007757451245757687, "loss": 2.4561, "step": 437470 }, { "epoch": 0.8715574397551957, "grad_norm": 0.19020472466945648, "learning_rate": 0.0007755817707988826, "loss": 2.4871, "step": 437480 }, { "epoch": 0.8715773619788346, "grad_norm": 0.17575478553771973, "learning_rate": 0.0007754184388126693, "loss": 2.4825, "step": 437490 }, { "epoch": 0.8715972842024735, "grad_norm": 0.18132157623767853, "learning_rate": 0.0007752551286084111, "loss": 2.4789, "step": 437500 }, { "epoch": 0.8716172064261125, "grad_norm": 0.17329570651054382, "learning_rate": 0.0007750918401773952, "loss": 2.5003, "step": 437510 }, { "epoch": 0.8716371286497514, "grad_norm": 0.21767768263816833, "learning_rate": 0.0007749285735109158, "loss": 2.4662, "step": 437520 }, { "epoch": 0.8716570508733903, "grad_norm": 0.1958187222480774, "learning_rate": 0.0007747653286002718, "loss": 2.4842, "step": 437530 }, { "epoch": 0.8716769730970292, "grad_norm": 0.20155787467956543, "learning_rate": 0.0007746021054367687, "loss": 2.4897, "step": 437540 }, { "epoch": 0.8716968953206681, "grad_norm": 0.17682170867919922, "learning_rate": 0.0007744389040117177, "loss": 2.4741, "step": 437550 }, { "epoch": 0.8717168175443071, "grad_norm": 0.20310832560062408, "learning_rate": 0.0007742757243164351, "loss": 2.4795, "step": 437560 }, { "epoch": 0.871736739767946, "grad_norm": 0.18917852640151978, "learning_rate": 0.0007741125663422437, "loss": 2.4652, "step": 437570 }, { "epoch": 0.8717566619915849, "grad_norm": 0.1777392029762268, "learning_rate": 0.0007739494300804717, "loss": 2.4633, "step": 437580 }, { "epoch": 0.8717765842152237, "grad_norm": 0.17206308245658875, "learning_rate": 0.0007737863155224534, "loss": 2.4769, "step": 437590 }, { "epoch": 0.8717965064388626, "grad_norm": 0.1697855144739151, "learning_rate": 0.0007736232226595288, "loss": 2.4741, "step": 437600 }, { "epoch": 0.8718164286625016, "grad_norm": 0.20360134541988373, "learning_rate": 0.000773460151483043, "loss": 2.4742, "step": 437610 }, { "epoch": 0.8718363508861405, "grad_norm": 0.1854439228773117, "learning_rate": 0.0007732971019843478, "loss": 2.477, "step": 437620 }, { "epoch": 0.8718562731097794, "grad_norm": 0.18922735750675201, "learning_rate": 0.0007731340741547999, "loss": 2.4702, "step": 437630 }, { "epoch": 0.8718761953334183, "grad_norm": 0.1739203929901123, "learning_rate": 0.0007729710679857626, "loss": 2.4789, "step": 437640 }, { "epoch": 0.8718961175570572, "grad_norm": 0.19216570258140564, "learning_rate": 0.0007728080834686045, "loss": 2.4744, "step": 437650 }, { "epoch": 0.8719160397806962, "grad_norm": 0.19334006309509277, "learning_rate": 0.0007726451205946993, "loss": 2.4778, "step": 437660 }, { "epoch": 0.8719359620043351, "grad_norm": 0.17513683438301086, "learning_rate": 0.000772482179355428, "loss": 2.48, "step": 437670 }, { "epoch": 0.871955884227974, "grad_norm": 0.1993885636329651, "learning_rate": 0.0007723192597421755, "loss": 2.4658, "step": 437680 }, { "epoch": 0.8719758064516129, "grad_norm": 0.1923469454050064, "learning_rate": 0.0007721563617463338, "loss": 2.4792, "step": 437690 }, { "epoch": 0.8719957286752518, "grad_norm": 0.1765817105770111, "learning_rate": 0.0007719934853592998, "loss": 2.4821, "step": 437700 }, { "epoch": 0.8720156508988908, "grad_norm": 0.17231105268001556, "learning_rate": 0.0007718306305724768, "loss": 2.4563, "step": 437710 }, { "epoch": 0.8720355731225297, "grad_norm": 0.19872696697711945, "learning_rate": 0.000771667797377273, "loss": 2.4658, "step": 437720 }, { "epoch": 0.8720554953461686, "grad_norm": 0.16644027829170227, "learning_rate": 0.0007715049857651029, "loss": 2.4747, "step": 437730 }, { "epoch": 0.8720754175698074, "grad_norm": 0.1877848505973816, "learning_rate": 0.0007713421957273865, "loss": 2.4566, "step": 437740 }, { "epoch": 0.8720953397934463, "grad_norm": 0.3608156442642212, "learning_rate": 0.0007711794272555493, "loss": 2.4762, "step": 437750 }, { "epoch": 0.8721152620170853, "grad_norm": 0.1839742809534073, "learning_rate": 0.0007710166803410228, "loss": 2.4733, "step": 437760 }, { "epoch": 0.8721351842407242, "grad_norm": 0.21853679418563843, "learning_rate": 0.0007708539549752438, "loss": 2.4923, "step": 437770 }, { "epoch": 0.8721551064643631, "grad_norm": 0.1912522315979004, "learning_rate": 0.0007706912511496553, "loss": 2.4709, "step": 437780 }, { "epoch": 0.872175028688002, "grad_norm": 0.17677518725395203, "learning_rate": 0.0007705285688557053, "loss": 2.4581, "step": 437790 }, { "epoch": 0.8721949509116409, "grad_norm": 0.1683482527732849, "learning_rate": 0.0007703659080848482, "loss": 2.4812, "step": 437800 }, { "epoch": 0.8722148731352799, "grad_norm": 0.18129533529281616, "learning_rate": 0.0007702032688285434, "loss": 2.4867, "step": 437810 }, { "epoch": 0.8722347953589188, "grad_norm": 0.17380394041538239, "learning_rate": 0.0007700406510782562, "loss": 2.4765, "step": 437820 }, { "epoch": 0.8722547175825577, "grad_norm": 0.19088959693908691, "learning_rate": 0.0007698780548254575, "loss": 2.4838, "step": 437830 }, { "epoch": 0.8722746398061966, "grad_norm": 0.18467514216899872, "learning_rate": 0.0007697154800616242, "loss": 2.4723, "step": 437840 }, { "epoch": 0.8722945620298356, "grad_norm": 0.17335110902786255, "learning_rate": 0.000769552926778238, "loss": 2.4894, "step": 437850 }, { "epoch": 0.8723144842534745, "grad_norm": 0.17698003351688385, "learning_rate": 0.0007693903949667874, "loss": 2.4777, "step": 437860 }, { "epoch": 0.8723344064771134, "grad_norm": 0.18306776881217957, "learning_rate": 0.000769227884618765, "loss": 2.4659, "step": 437870 }, { "epoch": 0.8723543287007522, "grad_norm": 0.18534399569034576, "learning_rate": 0.0007690653957256705, "loss": 2.4879, "step": 437880 }, { "epoch": 0.8723742509243911, "grad_norm": 0.19089661538600922, "learning_rate": 0.0007689029282790086, "loss": 2.4661, "step": 437890 }, { "epoch": 0.8723941731480301, "grad_norm": 0.19271136820316315, "learning_rate": 0.0007687404822702892, "loss": 2.4735, "step": 437900 }, { "epoch": 0.872414095371669, "grad_norm": 0.20442429184913635, "learning_rate": 0.0007685780576910284, "loss": 2.484, "step": 437910 }, { "epoch": 0.8724340175953079, "grad_norm": 0.16807906329631805, "learning_rate": 0.0007684156545327478, "loss": 2.4802, "step": 437920 }, { "epoch": 0.8724539398189468, "grad_norm": 0.1870943158864975, "learning_rate": 0.0007682532727869742, "loss": 2.4819, "step": 437930 }, { "epoch": 0.8724738620425857, "grad_norm": 0.20305423438549042, "learning_rate": 0.0007680909124452404, "loss": 2.4928, "step": 437940 }, { "epoch": 0.8724937842662247, "grad_norm": 0.21466489136219025, "learning_rate": 0.0007679285734990849, "loss": 2.4888, "step": 437950 }, { "epoch": 0.8725137064898636, "grad_norm": 0.16044719517230988, "learning_rate": 0.0007677662559400509, "loss": 2.4696, "step": 437960 }, { "epoch": 0.8725336287135025, "grad_norm": 0.16165322065353394, "learning_rate": 0.0007676039597596884, "loss": 2.4613, "step": 437970 }, { "epoch": 0.8725535509371414, "grad_norm": 0.19890521466732025, "learning_rate": 0.0007674416849495518, "loss": 2.4762, "step": 437980 }, { "epoch": 0.8725734731607803, "grad_norm": 0.19680377840995789, "learning_rate": 0.0007672794315012019, "loss": 2.4569, "step": 437990 }, { "epoch": 0.8725933953844193, "grad_norm": 0.1784905046224594, "learning_rate": 0.0007671171994062048, "loss": 2.4682, "step": 438000 }, { "epoch": 0.8726133176080582, "grad_norm": 0.1788853257894516, "learning_rate": 0.0007669549886561318, "loss": 2.4733, "step": 438010 }, { "epoch": 0.872633239831697, "grad_norm": 0.1844499111175537, "learning_rate": 0.0007667927992425606, "loss": 2.4811, "step": 438020 }, { "epoch": 0.8726531620553359, "grad_norm": 0.19485506415367126, "learning_rate": 0.000766630631157073, "loss": 2.4654, "step": 438030 }, { "epoch": 0.8726730842789748, "grad_norm": 0.18952150642871857, "learning_rate": 0.000766468484391258, "loss": 2.4813, "step": 438040 }, { "epoch": 0.8726930065026138, "grad_norm": 0.22026565670967102, "learning_rate": 0.0007663063589367091, "loss": 2.4727, "step": 438050 }, { "epoch": 0.8727129287262527, "grad_norm": 0.1644229292869568, "learning_rate": 0.0007661442547850255, "loss": 2.4549, "step": 438060 }, { "epoch": 0.8727328509498916, "grad_norm": 0.1815803349018097, "learning_rate": 0.0007659821719278122, "loss": 2.4754, "step": 438070 }, { "epoch": 0.8727527731735305, "grad_norm": 0.1704997718334198, "learning_rate": 0.0007658201103566788, "loss": 2.463, "step": 438080 }, { "epoch": 0.8727726953971694, "grad_norm": 0.19051162898540497, "learning_rate": 0.0007656580700632421, "loss": 2.4796, "step": 438090 }, { "epoch": 0.8727926176208084, "grad_norm": 0.2203468233346939, "learning_rate": 0.0007654960510391229, "loss": 2.4743, "step": 438100 }, { "epoch": 0.8728125398444473, "grad_norm": 0.17387759685516357, "learning_rate": 0.0007653340532759479, "loss": 2.4724, "step": 438110 }, { "epoch": 0.8728324620680862, "grad_norm": 0.213627889752388, "learning_rate": 0.0007651720767653497, "loss": 2.4706, "step": 438120 }, { "epoch": 0.8728523842917251, "grad_norm": 0.19479703903198242, "learning_rate": 0.0007650101214989655, "loss": 2.46, "step": 438130 }, { "epoch": 0.8728723065153641, "grad_norm": 0.1743924617767334, "learning_rate": 0.0007648481874684393, "loss": 2.4817, "step": 438140 }, { "epoch": 0.872892228739003, "grad_norm": 0.1745162308216095, "learning_rate": 0.0007646862746654193, "loss": 2.4667, "step": 438150 }, { "epoch": 0.8729121509626419, "grad_norm": 0.18559850752353668, "learning_rate": 0.00076452438308156, "loss": 2.4489, "step": 438160 }, { "epoch": 0.8729320731862807, "grad_norm": 0.23189885914325714, "learning_rate": 0.000764362512708521, "loss": 2.4664, "step": 438170 }, { "epoch": 0.8729519954099196, "grad_norm": 0.1930808424949646, "learning_rate": 0.0007642006635379674, "loss": 2.4556, "step": 438180 }, { "epoch": 0.8729719176335586, "grad_norm": 0.15963418781757355, "learning_rate": 0.0007640388355615699, "loss": 2.482, "step": 438190 }, { "epoch": 0.8729918398571975, "grad_norm": 0.17993664741516113, "learning_rate": 0.0007638770287710046, "loss": 2.4858, "step": 438200 }, { "epoch": 0.8730117620808364, "grad_norm": 0.1872062385082245, "learning_rate": 0.0007637152431579529, "loss": 2.4731, "step": 438210 }, { "epoch": 0.8730316843044753, "grad_norm": 0.18788385391235352, "learning_rate": 0.0007635534787141014, "loss": 2.474, "step": 438220 }, { "epoch": 0.8730516065281142, "grad_norm": 0.1691267192363739, "learning_rate": 0.000763391735431143, "loss": 2.4688, "step": 438230 }, { "epoch": 0.8730715287517532, "grad_norm": 0.20595291256904602, "learning_rate": 0.0007632300133007755, "loss": 2.4787, "step": 438240 }, { "epoch": 0.8730914509753921, "grad_norm": 0.1886252909898758, "learning_rate": 0.0007630683123147018, "loss": 2.4744, "step": 438250 }, { "epoch": 0.873111373199031, "grad_norm": 0.17819029092788696, "learning_rate": 0.0007629066324646307, "loss": 2.4782, "step": 438260 }, { "epoch": 0.8731312954226699, "grad_norm": 0.16979102790355682, "learning_rate": 0.0007627449737422765, "loss": 2.4683, "step": 438270 }, { "epoch": 0.8731512176463088, "grad_norm": 0.20211094617843628, "learning_rate": 0.0007625833361393586, "loss": 2.4635, "step": 438280 }, { "epoch": 0.8731711398699478, "grad_norm": 0.17338870465755463, "learning_rate": 0.0007624217196476013, "loss": 2.4653, "step": 438290 }, { "epoch": 0.8731910620935867, "grad_norm": 0.19179730117321014, "learning_rate": 0.000762260124258736, "loss": 2.4726, "step": 438300 }, { "epoch": 0.8732109843172255, "grad_norm": 0.19920948147773743, "learning_rate": 0.0007620985499644975, "loss": 2.4633, "step": 438310 }, { "epoch": 0.8732309065408644, "grad_norm": 0.1827521175146103, "learning_rate": 0.0007619369967566272, "loss": 2.4771, "step": 438320 }, { "epoch": 0.8732508287645033, "grad_norm": 0.17289918661117554, "learning_rate": 0.0007617754646268715, "loss": 2.4812, "step": 438330 }, { "epoch": 0.8732707509881423, "grad_norm": 0.1666225790977478, "learning_rate": 0.0007616139535669824, "loss": 2.4955, "step": 438340 }, { "epoch": 0.8732906732117812, "grad_norm": 0.1745985746383667, "learning_rate": 0.0007614524635687172, "loss": 2.474, "step": 438350 }, { "epoch": 0.8733105954354201, "grad_norm": 0.20458151400089264, "learning_rate": 0.0007612909946238382, "loss": 2.4817, "step": 438360 }, { "epoch": 0.873330517659059, "grad_norm": 0.16925132274627686, "learning_rate": 0.0007611295467241137, "loss": 2.4616, "step": 438370 }, { "epoch": 0.8733504398826979, "grad_norm": 0.21054022014141083, "learning_rate": 0.0007609681198613169, "loss": 2.4614, "step": 438380 }, { "epoch": 0.8733703621063369, "grad_norm": 0.17540821433067322, "learning_rate": 0.0007608067140272266, "loss": 2.4915, "step": 438390 }, { "epoch": 0.8733902843299758, "grad_norm": 0.1670169234275818, "learning_rate": 0.0007606453292136266, "loss": 2.4591, "step": 438400 }, { "epoch": 0.8734102065536147, "grad_norm": 0.1915545016527176, "learning_rate": 0.0007604839654123066, "loss": 2.4681, "step": 438410 }, { "epoch": 0.8734301287772536, "grad_norm": 0.17835035920143127, "learning_rate": 0.0007603226226150612, "loss": 2.4756, "step": 438420 }, { "epoch": 0.8734500510008926, "grad_norm": 0.17673595249652863, "learning_rate": 0.0007601613008136906, "loss": 2.4611, "step": 438430 }, { "epoch": 0.8734699732245315, "grad_norm": 0.1846230924129486, "learning_rate": 0.00076, "loss": 2.4681, "step": 438440 }, { "epoch": 0.8734898954481704, "grad_norm": 0.20697763562202454, "learning_rate": 0.0007598387201658004, "loss": 2.4703, "step": 438450 }, { "epoch": 0.8735098176718092, "grad_norm": 0.2002323716878891, "learning_rate": 0.0007596774613029078, "loss": 2.4834, "step": 438460 }, { "epoch": 0.8735297398954481, "grad_norm": 0.18121986091136932, "learning_rate": 0.0007595162234031434, "loss": 2.476, "step": 438470 }, { "epoch": 0.8735496621190871, "grad_norm": 0.19307288527488708, "learning_rate": 0.0007593550064583344, "loss": 2.471, "step": 438480 }, { "epoch": 0.873569584342726, "grad_norm": 0.1614970564842224, "learning_rate": 0.000759193810460312, "loss": 2.4611, "step": 438490 }, { "epoch": 0.8735895065663649, "grad_norm": 0.18219366669654846, "learning_rate": 0.0007590326354009143, "loss": 2.4546, "step": 438500 }, { "epoch": 0.8736094287900038, "grad_norm": 0.19820310175418854, "learning_rate": 0.0007588714812719837, "loss": 2.4683, "step": 438510 }, { "epoch": 0.8736293510136427, "grad_norm": 0.19832703471183777, "learning_rate": 0.0007587103480653679, "loss": 2.4847, "step": 438520 }, { "epoch": 0.8736492732372817, "grad_norm": 0.18006199598312378, "learning_rate": 0.0007585492357729205, "loss": 2.4839, "step": 438530 }, { "epoch": 0.8736691954609206, "grad_norm": 0.1770399659872055, "learning_rate": 0.0007583881443864995, "loss": 2.4745, "step": 438540 }, { "epoch": 0.8736891176845595, "grad_norm": 0.20063400268554688, "learning_rate": 0.0007582270738979691, "loss": 2.4778, "step": 438550 }, { "epoch": 0.8737090399081984, "grad_norm": 0.18673183023929596, "learning_rate": 0.0007580660242991981, "loss": 2.4666, "step": 438560 }, { "epoch": 0.8737289621318373, "grad_norm": 0.1702776551246643, "learning_rate": 0.0007579049955820609, "loss": 2.4851, "step": 438570 }, { "epoch": 0.8737488843554763, "grad_norm": 0.1762433499097824, "learning_rate": 0.0007577439877384373, "loss": 2.4704, "step": 438580 }, { "epoch": 0.8737688065791152, "grad_norm": 0.1833111196756363, "learning_rate": 0.0007575830007602118, "loss": 2.4685, "step": 438590 }, { "epoch": 0.873788728802754, "grad_norm": 0.19167645275592804, "learning_rate": 0.0007574220346392746, "loss": 2.4877, "step": 438600 }, { "epoch": 0.8738086510263929, "grad_norm": 0.1906130611896515, "learning_rate": 0.0007572610893675214, "loss": 2.4806, "step": 438610 }, { "epoch": 0.8738285732500318, "grad_norm": 0.1776309758424759, "learning_rate": 0.0007571001649368523, "loss": 2.4722, "step": 438620 }, { "epoch": 0.8738484954736708, "grad_norm": 0.1953170895576477, "learning_rate": 0.0007569392613391735, "loss": 2.4686, "step": 438630 }, { "epoch": 0.8738684176973097, "grad_norm": 0.22724296152591705, "learning_rate": 0.0007567783785663957, "loss": 2.4744, "step": 438640 }, { "epoch": 0.8738883399209486, "grad_norm": 0.17978183925151825, "learning_rate": 0.0007566175166104356, "loss": 2.4712, "step": 438650 }, { "epoch": 0.8739082621445875, "grad_norm": 0.17748884856700897, "learning_rate": 0.000756456675463215, "loss": 2.4698, "step": 438660 }, { "epoch": 0.8739281843682264, "grad_norm": 0.18235251307487488, "learning_rate": 0.0007562958551166601, "loss": 2.4636, "step": 438670 }, { "epoch": 0.8739481065918654, "grad_norm": 0.18180778622627258, "learning_rate": 0.0007561350555627031, "loss": 2.4829, "step": 438680 }, { "epoch": 0.8739680288155043, "grad_norm": 0.18637771904468536, "learning_rate": 0.0007559742767932811, "loss": 2.4752, "step": 438690 }, { "epoch": 0.8739879510391432, "grad_norm": 0.16722813248634338, "learning_rate": 0.0007558135188003368, "loss": 2.4618, "step": 438700 }, { "epoch": 0.8740078732627821, "grad_norm": 0.2200881540775299, "learning_rate": 0.0007556527815758176, "loss": 2.4587, "step": 438710 }, { "epoch": 0.8740277954864211, "grad_norm": 0.17906348407268524, "learning_rate": 0.0007554920651116763, "loss": 2.4864, "step": 438720 }, { "epoch": 0.87404771771006, "grad_norm": 0.17339077591896057, "learning_rate": 0.0007553313693998711, "loss": 2.4603, "step": 438730 }, { "epoch": 0.8740676399336988, "grad_norm": 0.1699337363243103, "learning_rate": 0.0007551706944323651, "loss": 2.4713, "step": 438740 }, { "epoch": 0.8740875621573377, "grad_norm": 0.18684858083724976, "learning_rate": 0.0007550100402011268, "loss": 2.4718, "step": 438750 }, { "epoch": 0.8741074843809766, "grad_norm": 0.18544204533100128, "learning_rate": 0.0007548494066981295, "loss": 2.4712, "step": 438760 }, { "epoch": 0.8741274066046156, "grad_norm": 0.19196073710918427, "learning_rate": 0.0007546887939153524, "loss": 2.4621, "step": 438770 }, { "epoch": 0.8741473288282545, "grad_norm": 0.16419059038162231, "learning_rate": 0.0007545282018447788, "loss": 2.4759, "step": 438780 }, { "epoch": 0.8741672510518934, "grad_norm": 0.2000611424446106, "learning_rate": 0.0007543676304783984, "loss": 2.4724, "step": 438790 }, { "epoch": 0.8741871732755323, "grad_norm": 0.17993131279945374, "learning_rate": 0.000754207079808205, "loss": 2.4662, "step": 438800 }, { "epoch": 0.8742070954991712, "grad_norm": 0.18577007949352264, "learning_rate": 0.0007540465498261983, "loss": 2.4813, "step": 438810 }, { "epoch": 0.8742270177228102, "grad_norm": 0.16438798606395721, "learning_rate": 0.0007538860405243828, "loss": 2.4591, "step": 438820 }, { "epoch": 0.8742469399464491, "grad_norm": 0.17574141919612885, "learning_rate": 0.0007537255518947683, "loss": 2.4902, "step": 438830 }, { "epoch": 0.874266862170088, "grad_norm": 0.19815559685230255, "learning_rate": 0.0007535650839293693, "loss": 2.4791, "step": 438840 }, { "epoch": 0.8742867843937269, "grad_norm": 0.16624461114406586, "learning_rate": 0.0007534046366202063, "loss": 2.471, "step": 438850 }, { "epoch": 0.8743067066173658, "grad_norm": 0.18936480581760406, "learning_rate": 0.000753244209959304, "loss": 2.4823, "step": 438860 }, { "epoch": 0.8743266288410048, "grad_norm": 0.16473166644573212, "learning_rate": 0.000753083803938693, "loss": 2.4699, "step": 438870 }, { "epoch": 0.8743465510646437, "grad_norm": 0.18728113174438477, "learning_rate": 0.0007529234185504084, "loss": 2.478, "step": 438880 }, { "epoch": 0.8743664732882825, "grad_norm": 0.1653159260749817, "learning_rate": 0.0007527630537864909, "loss": 2.4733, "step": 438890 }, { "epoch": 0.8743863955119214, "grad_norm": 0.1953730434179306, "learning_rate": 0.000752602709638986, "loss": 2.4699, "step": 438900 }, { "epoch": 0.8744063177355603, "grad_norm": 0.17274779081344604, "learning_rate": 0.0007524423860999445, "loss": 2.4715, "step": 438910 }, { "epoch": 0.8744262399591993, "grad_norm": 0.1630379855632782, "learning_rate": 0.0007522820831614224, "loss": 2.4646, "step": 438920 }, { "epoch": 0.8744461621828382, "grad_norm": 0.19815364480018616, "learning_rate": 0.0007521218008154802, "loss": 2.4754, "step": 438930 }, { "epoch": 0.8744660844064771, "grad_norm": 0.18932853639125824, "learning_rate": 0.0007519615390541845, "loss": 2.4667, "step": 438940 }, { "epoch": 0.874486006630116, "grad_norm": 0.20887675881385803, "learning_rate": 0.000751801297869606, "loss": 2.4758, "step": 438950 }, { "epoch": 0.8745059288537549, "grad_norm": 0.18295559287071228, "learning_rate": 0.0007516410772538211, "loss": 2.4742, "step": 438960 }, { "epoch": 0.8745258510773939, "grad_norm": 0.1899382323026657, "learning_rate": 0.0007514808771989113, "loss": 2.472, "step": 438970 }, { "epoch": 0.8745457733010328, "grad_norm": 0.16977402567863464, "learning_rate": 0.0007513206976969627, "loss": 2.4781, "step": 438980 }, { "epoch": 0.8745656955246717, "grad_norm": 0.1797223836183548, "learning_rate": 0.0007511605387400668, "loss": 2.4787, "step": 438990 }, { "epoch": 0.8745856177483106, "grad_norm": 0.18742643296718597, "learning_rate": 0.0007510004003203203, "loss": 2.4626, "step": 439000 }, { "epoch": 0.8746055399719496, "grad_norm": 0.18221065402030945, "learning_rate": 0.0007508402824298249, "loss": 2.4663, "step": 439010 }, { "epoch": 0.8746254621955885, "grad_norm": 0.17797087132930756, "learning_rate": 0.0007506801850606868, "loss": 2.4518, "step": 439020 }, { "epoch": 0.8746453844192273, "grad_norm": 0.17157810926437378, "learning_rate": 0.0007505201082050179, "loss": 2.4817, "step": 439030 }, { "epoch": 0.8746653066428662, "grad_norm": 0.19621127843856812, "learning_rate": 0.0007503600518549352, "loss": 2.4763, "step": 439040 }, { "epoch": 0.8746852288665051, "grad_norm": 0.18640044331550598, "learning_rate": 0.0007502000160025606, "loss": 2.4616, "step": 439050 }, { "epoch": 0.8747051510901441, "grad_norm": 0.1688242405653, "learning_rate": 0.0007500400006400206, "loss": 2.4759, "step": 439060 }, { "epoch": 0.874725073313783, "grad_norm": 0.20948997139930725, "learning_rate": 0.000749880005759447, "loss": 2.4597, "step": 439070 }, { "epoch": 0.8747449955374219, "grad_norm": 0.16743484139442444, "learning_rate": 0.0007497200313529772, "loss": 2.4614, "step": 439080 }, { "epoch": 0.8747649177610608, "grad_norm": 0.182662695646286, "learning_rate": 0.0007495600774127531, "loss": 2.4772, "step": 439090 }, { "epoch": 0.8747848399846997, "grad_norm": 0.17260043323040009, "learning_rate": 0.0007494001439309214, "loss": 2.4681, "step": 439100 }, { "epoch": 0.8748047622083387, "grad_norm": 0.1701900064945221, "learning_rate": 0.0007492402308996345, "loss": 2.4678, "step": 439110 }, { "epoch": 0.8748246844319776, "grad_norm": 0.20301680266857147, "learning_rate": 0.0007490803383110489, "loss": 2.4743, "step": 439120 }, { "epoch": 0.8748446066556165, "grad_norm": 0.19158321619033813, "learning_rate": 0.000748920466157327, "loss": 2.4526, "step": 439130 }, { "epoch": 0.8748645288792554, "grad_norm": 0.2214977741241455, "learning_rate": 0.0007487606144306358, "loss": 2.478, "step": 439140 }, { "epoch": 0.8748844511028943, "grad_norm": 0.1733400821685791, "learning_rate": 0.0007486007831231474, "loss": 2.4708, "step": 439150 }, { "epoch": 0.8749043733265333, "grad_norm": 0.19829769432544708, "learning_rate": 0.0007484409722270386, "loss": 2.4741, "step": 439160 }, { "epoch": 0.8749242955501721, "grad_norm": 0.16904088854789734, "learning_rate": 0.0007482811817344919, "loss": 2.4573, "step": 439170 }, { "epoch": 0.874944217773811, "grad_norm": 0.16707536578178406, "learning_rate": 0.000748121411637694, "loss": 2.462, "step": 439180 }, { "epoch": 0.8749641399974499, "grad_norm": 0.1759539097547531, "learning_rate": 0.0007479616619288369, "loss": 2.4589, "step": 439190 }, { "epoch": 0.8749840622210888, "grad_norm": 0.18863815069198608, "learning_rate": 0.0007478019326001178, "loss": 2.4665, "step": 439200 }, { "epoch": 0.8750039844447278, "grad_norm": 0.19417229294776917, "learning_rate": 0.0007476422236437386, "loss": 2.4749, "step": 439210 }, { "epoch": 0.8750239066683667, "grad_norm": 0.1811130791902542, "learning_rate": 0.0007474825350519059, "loss": 2.4593, "step": 439220 }, { "epoch": 0.8750438288920056, "grad_norm": 0.18561671674251556, "learning_rate": 0.0007473228668168321, "loss": 2.4627, "step": 439230 }, { "epoch": 0.8750637511156445, "grad_norm": 0.1892828494310379, "learning_rate": 0.000747163218930734, "loss": 2.4875, "step": 439240 }, { "epoch": 0.8750836733392834, "grad_norm": 0.2024318128824234, "learning_rate": 0.0007470035913858333, "loss": 2.4787, "step": 439250 }, { "epoch": 0.8751035955629224, "grad_norm": 0.18965719640254974, "learning_rate": 0.0007468439841743567, "loss": 2.4675, "step": 439260 }, { "epoch": 0.8751235177865613, "grad_norm": 0.17572154104709625, "learning_rate": 0.0007466843972885362, "loss": 2.469, "step": 439270 }, { "epoch": 0.8751434400102002, "grad_norm": 0.20216652750968933, "learning_rate": 0.0007465248307206083, "loss": 2.4665, "step": 439280 }, { "epoch": 0.8751633622338391, "grad_norm": 0.17666880786418915, "learning_rate": 0.0007463652844628143, "loss": 2.4769, "step": 439290 }, { "epoch": 0.8751832844574781, "grad_norm": 0.2097259908914566, "learning_rate": 0.0007462057585074015, "loss": 2.4722, "step": 439300 }, { "epoch": 0.875203206681117, "grad_norm": 0.22021551430225372, "learning_rate": 0.0007460462528466212, "loss": 2.4791, "step": 439310 }, { "epoch": 0.8752231289047558, "grad_norm": 0.17580506205558777, "learning_rate": 0.0007458867674727294, "loss": 2.4698, "step": 439320 }, { "epoch": 0.8752430511283947, "grad_norm": 0.16847054660320282, "learning_rate": 0.0007457273023779878, "loss": 2.4869, "step": 439330 }, { "epoch": 0.8752629733520336, "grad_norm": 0.1772819459438324, "learning_rate": 0.0007455678575546623, "loss": 2.4715, "step": 439340 }, { "epoch": 0.8752828955756726, "grad_norm": 0.1985260397195816, "learning_rate": 0.0007454084329950244, "loss": 2.4671, "step": 439350 }, { "epoch": 0.8753028177993115, "grad_norm": 0.18500186502933502, "learning_rate": 0.0007452490286913502, "loss": 2.4759, "step": 439360 }, { "epoch": 0.8753227400229504, "grad_norm": 0.1864231824874878, "learning_rate": 0.0007450896446359206, "loss": 2.4782, "step": 439370 }, { "epoch": 0.8753426622465893, "grad_norm": 0.17950420081615448, "learning_rate": 0.0007449302808210215, "loss": 2.463, "step": 439380 }, { "epoch": 0.8753625844702282, "grad_norm": 0.1933954805135727, "learning_rate": 0.0007447709372389437, "loss": 2.4786, "step": 439390 }, { "epoch": 0.8753825066938672, "grad_norm": 0.1854044497013092, "learning_rate": 0.0007446116138819827, "loss": 2.4703, "step": 439400 }, { "epoch": 0.8754024289175061, "grad_norm": 0.17948147654533386, "learning_rate": 0.0007444523107424394, "loss": 2.473, "step": 439410 }, { "epoch": 0.875422351141145, "grad_norm": 0.17085354030132294, "learning_rate": 0.000744293027812619, "loss": 2.4701, "step": 439420 }, { "epoch": 0.8754422733647839, "grad_norm": 0.18790583312511444, "learning_rate": 0.0007441337650848321, "loss": 2.469, "step": 439430 }, { "epoch": 0.8754621955884228, "grad_norm": 0.17712000012397766, "learning_rate": 0.0007439745225513934, "loss": 2.4792, "step": 439440 }, { "epoch": 0.8754821178120618, "grad_norm": 0.19839298725128174, "learning_rate": 0.0007438153002046235, "loss": 2.4683, "step": 439450 }, { "epoch": 0.8755020400357006, "grad_norm": 0.15976352989673615, "learning_rate": 0.0007436560980368472, "loss": 2.4713, "step": 439460 }, { "epoch": 0.8755219622593395, "grad_norm": 0.21717554330825806, "learning_rate": 0.0007434969160403944, "loss": 2.4742, "step": 439470 }, { "epoch": 0.8755418844829784, "grad_norm": 0.18292337656021118, "learning_rate": 0.0007433377542075994, "loss": 2.4686, "step": 439480 }, { "epoch": 0.8755618067066173, "grad_norm": 0.1924850195646286, "learning_rate": 0.000743178612530802, "loss": 2.4617, "step": 439490 }, { "epoch": 0.8755817289302563, "grad_norm": 0.17960505187511444, "learning_rate": 0.0007430194910023465, "loss": 2.4656, "step": 439500 }, { "epoch": 0.8756016511538952, "grad_norm": 0.1751403510570526, "learning_rate": 0.0007428603896145823, "loss": 2.4804, "step": 439510 }, { "epoch": 0.8756215733775341, "grad_norm": 0.17498302459716797, "learning_rate": 0.000742701308359863, "loss": 2.4663, "step": 439520 }, { "epoch": 0.875641495601173, "grad_norm": 0.18377311527729034, "learning_rate": 0.0007425422472305481, "loss": 2.4828, "step": 439530 }, { "epoch": 0.8756614178248119, "grad_norm": 0.16202062368392944, "learning_rate": 0.000742383206219001, "loss": 2.4633, "step": 439540 }, { "epoch": 0.8756813400484509, "grad_norm": 0.17644646763801575, "learning_rate": 0.0007422241853175899, "loss": 2.4795, "step": 439550 }, { "epoch": 0.8757012622720898, "grad_norm": 0.17641973495483398, "learning_rate": 0.0007420651845186889, "loss": 2.4754, "step": 439560 }, { "epoch": 0.8757211844957287, "grad_norm": 0.17737789452075958, "learning_rate": 0.0007419062038146758, "loss": 2.4679, "step": 439570 }, { "epoch": 0.8757411067193676, "grad_norm": 0.19078220427036285, "learning_rate": 0.0007417472431979338, "loss": 2.4651, "step": 439580 }, { "epoch": 0.8757610289430064, "grad_norm": 0.17712073028087616, "learning_rate": 0.0007415883026608503, "loss": 2.4716, "step": 439590 }, { "epoch": 0.8757809511666454, "grad_norm": 0.1813737452030182, "learning_rate": 0.0007414293821958182, "loss": 2.4713, "step": 439600 }, { "epoch": 0.8758008733902843, "grad_norm": 0.19820503890514374, "learning_rate": 0.0007412704817952349, "loss": 2.4661, "step": 439610 }, { "epoch": 0.8758207956139232, "grad_norm": 0.1955489069223404, "learning_rate": 0.0007411116014515027, "loss": 2.4709, "step": 439620 }, { "epoch": 0.8758407178375621, "grad_norm": 0.18676768243312836, "learning_rate": 0.0007409527411570287, "loss": 2.473, "step": 439630 }, { "epoch": 0.8758606400612011, "grad_norm": 0.17654749751091003, "learning_rate": 0.0007407939009042246, "loss": 2.4673, "step": 439640 }, { "epoch": 0.87588056228484, "grad_norm": 0.18404430150985718, "learning_rate": 0.0007406350806855067, "loss": 2.4874, "step": 439650 }, { "epoch": 0.8759004845084789, "grad_norm": 0.1846814900636673, "learning_rate": 0.000740476280493297, "loss": 2.4748, "step": 439660 }, { "epoch": 0.8759204067321178, "grad_norm": 0.18191905319690704, "learning_rate": 0.0007403175003200211, "loss": 2.4517, "step": 439670 }, { "epoch": 0.8759403289557567, "grad_norm": 0.1906312257051468, "learning_rate": 0.0007401587401581102, "loss": 2.4679, "step": 439680 }, { "epoch": 0.8759602511793957, "grad_norm": 0.17780832946300507, "learning_rate": 0.00074, "loss": 2.4736, "step": 439690 }, { "epoch": 0.8759801734030346, "grad_norm": 0.21048210561275482, "learning_rate": 0.0007398412798381308, "loss": 2.4554, "step": 439700 }, { "epoch": 0.8760000956266735, "grad_norm": 0.17912739515304565, "learning_rate": 0.000739682579664948, "loss": 2.4851, "step": 439710 }, { "epoch": 0.8760200178503124, "grad_norm": 0.17690825462341309, "learning_rate": 0.0007395238994729016, "loss": 2.4549, "step": 439720 }, { "epoch": 0.8760399400739513, "grad_norm": 0.17231716215610504, "learning_rate": 0.0007393652392544461, "loss": 2.4879, "step": 439730 }, { "epoch": 0.8760598622975903, "grad_norm": 0.1771118938922882, "learning_rate": 0.0007392065990020412, "loss": 2.4821, "step": 439740 }, { "epoch": 0.8760797845212291, "grad_norm": 0.1857648640871048, "learning_rate": 0.0007390479787081508, "loss": 2.4682, "step": 439750 }, { "epoch": 0.876099706744868, "grad_norm": 0.167835995554924, "learning_rate": 0.0007388893783652444, "loss": 2.4629, "step": 439760 }, { "epoch": 0.8761196289685069, "grad_norm": 0.17772270739078522, "learning_rate": 0.0007387307979657952, "loss": 2.4769, "step": 439770 }, { "epoch": 0.8761395511921458, "grad_norm": 0.19446653127670288, "learning_rate": 0.0007385722375022817, "loss": 2.4734, "step": 439780 }, { "epoch": 0.8761594734157848, "grad_norm": 0.1761370152235031, "learning_rate": 0.0007384136969671874, "loss": 2.4533, "step": 439790 }, { "epoch": 0.8761793956394237, "grad_norm": 0.6937363743782043, "learning_rate": 0.0007382551763529996, "loss": 2.4532, "step": 439800 }, { "epoch": 0.8761993178630626, "grad_norm": 0.18737199902534485, "learning_rate": 0.0007380966756522114, "loss": 2.4647, "step": 439810 }, { "epoch": 0.8762192400867015, "grad_norm": 0.19928669929504395, "learning_rate": 0.0007379381948573201, "loss": 2.4661, "step": 439820 }, { "epoch": 0.8762391623103404, "grad_norm": 0.1883266568183899, "learning_rate": 0.0007377797339608272, "loss": 2.4807, "step": 439830 }, { "epoch": 0.8762590845339794, "grad_norm": 0.2087736427783966, "learning_rate": 0.00073762129295524, "loss": 2.4721, "step": 439840 }, { "epoch": 0.8762790067576183, "grad_norm": 0.18326927721500397, "learning_rate": 0.0007374628718330696, "loss": 2.4652, "step": 439850 }, { "epoch": 0.8762989289812572, "grad_norm": 0.20018108189105988, "learning_rate": 0.0007373044705868321, "loss": 2.4762, "step": 439860 }, { "epoch": 0.876318851204896, "grad_norm": 0.19236619770526886, "learning_rate": 0.0007371460892090487, "loss": 2.4734, "step": 439870 }, { "epoch": 0.876338773428535, "grad_norm": 0.16599074006080627, "learning_rate": 0.0007369877276922445, "loss": 2.4674, "step": 439880 }, { "epoch": 0.876358695652174, "grad_norm": 0.17581138014793396, "learning_rate": 0.0007368293860289498, "loss": 2.4879, "step": 439890 }, { "epoch": 0.8763786178758128, "grad_norm": 0.20122548937797546, "learning_rate": 0.0007366710642116994, "loss": 2.4713, "step": 439900 }, { "epoch": 0.8763985400994517, "grad_norm": 0.19133122265338898, "learning_rate": 0.000736512762233033, "loss": 2.4675, "step": 439910 }, { "epoch": 0.8764184623230906, "grad_norm": 0.18568481504917145, "learning_rate": 0.0007363544800854948, "loss": 2.4716, "step": 439920 }, { "epoch": 0.8764383845467296, "grad_norm": 0.17904944717884064, "learning_rate": 0.0007361962177616338, "loss": 2.4742, "step": 439930 }, { "epoch": 0.8764583067703685, "grad_norm": 0.1783183366060257, "learning_rate": 0.0007360379752540031, "loss": 2.4618, "step": 439940 }, { "epoch": 0.8764782289940074, "grad_norm": 0.18046124279499054, "learning_rate": 0.0007358797525551612, "loss": 2.462, "step": 439950 }, { "epoch": 0.8764981512176463, "grad_norm": 0.19548776745796204, "learning_rate": 0.0007357215496576712, "loss": 2.4676, "step": 439960 }, { "epoch": 0.8765180734412852, "grad_norm": 0.20147453248500824, "learning_rate": 0.0007355633665541006, "loss": 2.4626, "step": 439970 }, { "epoch": 0.8765379956649242, "grad_norm": 0.19646506011486053, "learning_rate": 0.0007354052032370211, "loss": 2.4645, "step": 439980 }, { "epoch": 0.8765579178885631, "grad_norm": 0.19524334371089935, "learning_rate": 0.0007352470596990099, "loss": 2.4557, "step": 439990 }, { "epoch": 0.876577840112202, "grad_norm": 0.17996470630168915, "learning_rate": 0.0007350889359326483, "loss": 2.4476, "step": 440000 }, { "epoch": 0.8765977623358409, "grad_norm": 0.20440584421157837, "learning_rate": 0.0007349308319305226, "loss": 2.481, "step": 440010 }, { "epoch": 0.8766176845594797, "grad_norm": 0.1817394345998764, "learning_rate": 0.000734772747685223, "loss": 2.4848, "step": 440020 }, { "epoch": 0.8766376067831188, "grad_norm": 0.1811668425798416, "learning_rate": 0.0007346146831893457, "loss": 2.4668, "step": 440030 }, { "epoch": 0.8766575290067576, "grad_norm": 0.16300739347934723, "learning_rate": 0.00073445663843549, "loss": 2.4801, "step": 440040 }, { "epoch": 0.8766774512303965, "grad_norm": 0.18321357667446136, "learning_rate": 0.0007342986134162608, "loss": 2.4685, "step": 440050 }, { "epoch": 0.8766973734540354, "grad_norm": 0.22426727414131165, "learning_rate": 0.0007341406081242672, "loss": 2.4801, "step": 440060 }, { "epoch": 0.8767172956776743, "grad_norm": 0.1899750977754593, "learning_rate": 0.0007339826225521231, "loss": 2.4608, "step": 440070 }, { "epoch": 0.8767372179013133, "grad_norm": 0.20040032267570496, "learning_rate": 0.000733824656692447, "loss": 2.4669, "step": 440080 }, { "epoch": 0.8767571401249522, "grad_norm": 0.1834864318370819, "learning_rate": 0.0007336667105378616, "loss": 2.4705, "step": 440090 }, { "epoch": 0.8767770623485911, "grad_norm": 0.19103321433067322, "learning_rate": 0.0007335087840809949, "loss": 2.4801, "step": 440100 }, { "epoch": 0.87679698457223, "grad_norm": 0.18356694281101227, "learning_rate": 0.000733350877314479, "loss": 2.483, "step": 440110 }, { "epoch": 0.8768169067958689, "grad_norm": 0.18501950800418854, "learning_rate": 0.0007331929902309509, "loss": 2.4699, "step": 440120 }, { "epoch": 0.8768368290195079, "grad_norm": 0.906545102596283, "learning_rate": 0.0007330351228230516, "loss": 2.4733, "step": 440130 }, { "epoch": 0.8768567512431468, "grad_norm": 0.18454915285110474, "learning_rate": 0.0007328772750834273, "loss": 2.472, "step": 440140 }, { "epoch": 0.8768766734667857, "grad_norm": 0.18267585337162018, "learning_rate": 0.000732719447004729, "loss": 2.4813, "step": 440150 }, { "epoch": 0.8768965956904246, "grad_norm": 0.20022733509540558, "learning_rate": 0.0007325616385796112, "loss": 2.468, "step": 440160 }, { "epoch": 0.8769165179140634, "grad_norm": 0.23129068315029144, "learning_rate": 0.0007324038498007342, "loss": 2.4687, "step": 440170 }, { "epoch": 0.8769364401377024, "grad_norm": 0.20497477054595947, "learning_rate": 0.0007322460806607616, "loss": 2.4703, "step": 440180 }, { "epoch": 0.8769563623613413, "grad_norm": 0.190741166472435, "learning_rate": 0.000732088331152363, "loss": 2.4864, "step": 440190 }, { "epoch": 0.8769762845849802, "grad_norm": 0.1695951521396637, "learning_rate": 0.0007319306012682114, "loss": 2.4753, "step": 440200 }, { "epoch": 0.8769962068086191, "grad_norm": 0.1743655651807785, "learning_rate": 0.0007317728910009849, "loss": 2.4823, "step": 440210 }, { "epoch": 0.8770161290322581, "grad_norm": 0.17109672725200653, "learning_rate": 0.0007316152003433658, "loss": 2.4778, "step": 440220 }, { "epoch": 0.877036051255897, "grad_norm": 0.19004061818122864, "learning_rate": 0.0007314575292880417, "loss": 2.4619, "step": 440230 }, { "epoch": 0.8770559734795359, "grad_norm": 0.18771733343601227, "learning_rate": 0.0007312998778277036, "loss": 2.4708, "step": 440240 }, { "epoch": 0.8770758957031748, "grad_norm": 0.19178399443626404, "learning_rate": 0.0007311422459550478, "loss": 2.4784, "step": 440250 }, { "epoch": 0.8770958179268137, "grad_norm": 0.1743803173303604, "learning_rate": 0.0007309846336627754, "loss": 2.4735, "step": 440260 }, { "epoch": 0.8771157401504527, "grad_norm": 0.18317997455596924, "learning_rate": 0.0007308270409435916, "loss": 2.4689, "step": 440270 }, { "epoch": 0.8771356623740916, "grad_norm": 0.19577166438102722, "learning_rate": 0.0007306694677902055, "loss": 2.4642, "step": 440280 }, { "epoch": 0.8771555845977305, "grad_norm": 0.18466177582740784, "learning_rate": 0.000730511914195332, "loss": 2.4764, "step": 440290 }, { "epoch": 0.8771755068213694, "grad_norm": 0.22281910479068756, "learning_rate": 0.0007303543801516896, "loss": 2.4693, "step": 440300 }, { "epoch": 0.8771954290450082, "grad_norm": 0.1794433444738388, "learning_rate": 0.0007301968656520019, "loss": 2.4749, "step": 440310 }, { "epoch": 0.8772153512686472, "grad_norm": 0.20079071819782257, "learning_rate": 0.0007300393706889965, "loss": 2.4622, "step": 440320 }, { "epoch": 0.8772352734922861, "grad_norm": 0.16987769305706024, "learning_rate": 0.0007298818952554059, "loss": 2.4873, "step": 440330 }, { "epoch": 0.877255195715925, "grad_norm": 0.1861128807067871, "learning_rate": 0.0007297244393439665, "loss": 2.4705, "step": 440340 }, { "epoch": 0.8772751179395639, "grad_norm": 0.2065155804157257, "learning_rate": 0.0007295670029474202, "loss": 2.4745, "step": 440350 }, { "epoch": 0.8772950401632028, "grad_norm": 0.1757560521364212, "learning_rate": 0.0007294095860585128, "loss": 2.4687, "step": 440360 }, { "epoch": 0.8773149623868418, "grad_norm": 0.201727032661438, "learning_rate": 0.0007292521886699943, "loss": 2.4685, "step": 440370 }, { "epoch": 0.8773348846104807, "grad_norm": 0.17758668959140778, "learning_rate": 0.0007290948107746195, "loss": 2.4723, "step": 440380 }, { "epoch": 0.8773548068341196, "grad_norm": 0.19471365213394165, "learning_rate": 0.0007289374523651482, "loss": 2.4634, "step": 440390 }, { "epoch": 0.8773747290577585, "grad_norm": 0.17755359411239624, "learning_rate": 0.0007287801134343437, "loss": 2.4849, "step": 440400 }, { "epoch": 0.8773946512813974, "grad_norm": 0.17365646362304688, "learning_rate": 0.0007286227939749746, "loss": 2.4661, "step": 440410 }, { "epoch": 0.8774145735050364, "grad_norm": 0.18641585111618042, "learning_rate": 0.0007284654939798134, "loss": 2.4696, "step": 440420 }, { "epoch": 0.8774344957286753, "grad_norm": 0.18645381927490234, "learning_rate": 0.0007283082134416374, "loss": 2.4586, "step": 440430 }, { "epoch": 0.8774544179523142, "grad_norm": 0.17815862596035004, "learning_rate": 0.0007281509523532284, "loss": 2.4707, "step": 440440 }, { "epoch": 0.877474340175953, "grad_norm": 0.19948342442512512, "learning_rate": 0.0007279937107073722, "loss": 2.4756, "step": 440450 }, { "epoch": 0.8774942623995919, "grad_norm": 0.1825738400220871, "learning_rate": 0.00072783648849686, "loss": 2.4813, "step": 440460 }, { "epoch": 0.8775141846232309, "grad_norm": 0.21827760338783264, "learning_rate": 0.0007276792857144863, "loss": 2.4697, "step": 440470 }, { "epoch": 0.8775341068468698, "grad_norm": 0.1792750358581543, "learning_rate": 0.0007275221023530507, "loss": 2.4752, "step": 440480 }, { "epoch": 0.8775540290705087, "grad_norm": 0.18383917212486267, "learning_rate": 0.0007273649384053573, "loss": 2.4609, "step": 440490 }, { "epoch": 0.8775739512941476, "grad_norm": 0.18664321303367615, "learning_rate": 0.0007272077938642145, "loss": 2.4823, "step": 440500 }, { "epoch": 0.8775938735177866, "grad_norm": 0.1762142926454544, "learning_rate": 0.000727050668722435, "loss": 2.4811, "step": 440510 }, { "epoch": 0.8776137957414255, "grad_norm": 0.18202531337738037, "learning_rate": 0.0007268935629728361, "loss": 2.4643, "step": 440520 }, { "epoch": 0.8776337179650644, "grad_norm": 0.19783590734004974, "learning_rate": 0.0007267364766082396, "loss": 2.4691, "step": 440530 }, { "epoch": 0.8776536401887033, "grad_norm": 0.19159065186977386, "learning_rate": 0.0007265794096214715, "loss": 2.4639, "step": 440540 }, { "epoch": 0.8776735624123422, "grad_norm": 0.18722012639045715, "learning_rate": 0.0007264223620053625, "loss": 2.469, "step": 440550 }, { "epoch": 0.8776934846359812, "grad_norm": 0.17407724261283875, "learning_rate": 0.0007262653337527474, "loss": 2.4711, "step": 440560 }, { "epoch": 0.8777134068596201, "grad_norm": 0.17470039427280426, "learning_rate": 0.0007261083248564657, "loss": 2.474, "step": 440570 }, { "epoch": 0.877733329083259, "grad_norm": 0.17349620163440704, "learning_rate": 0.0007259513353093612, "loss": 2.4702, "step": 440580 }, { "epoch": 0.8777532513068979, "grad_norm": 0.19670365750789642, "learning_rate": 0.000725794365104282, "loss": 2.4679, "step": 440590 }, { "epoch": 0.8777731735305367, "grad_norm": 0.18995928764343262, "learning_rate": 0.0007256374142340807, "loss": 2.4566, "step": 440600 }, { "epoch": 0.8777930957541757, "grad_norm": 0.17991334199905396, "learning_rate": 0.0007254804826916144, "loss": 2.4574, "step": 440610 }, { "epoch": 0.8778130179778146, "grad_norm": 0.20301713049411774, "learning_rate": 0.0007253235704697447, "loss": 2.4787, "step": 440620 }, { "epoch": 0.8778329402014535, "grad_norm": 0.18616876006126404, "learning_rate": 0.0007251666775613371, "loss": 2.4612, "step": 440630 }, { "epoch": 0.8778528624250924, "grad_norm": 0.18534252047538757, "learning_rate": 0.0007250098039592619, "loss": 2.4741, "step": 440640 }, { "epoch": 0.8778727846487313, "grad_norm": 0.17890280485153198, "learning_rate": 0.0007248529496563936, "loss": 2.4695, "step": 440650 }, { "epoch": 0.8778927068723703, "grad_norm": 0.17845943570137024, "learning_rate": 0.0007246961146456113, "loss": 2.4702, "step": 440660 }, { "epoch": 0.8779126290960092, "grad_norm": 0.1984855830669403, "learning_rate": 0.000724539298919798, "loss": 2.4823, "step": 440670 }, { "epoch": 0.8779325513196481, "grad_norm": 0.1924934834241867, "learning_rate": 0.000724382502471842, "loss": 2.4668, "step": 440680 }, { "epoch": 0.877952473543287, "grad_norm": 0.20016047358512878, "learning_rate": 0.0007242257252946351, "loss": 2.4575, "step": 440690 }, { "epoch": 0.8779723957669259, "grad_norm": 0.21896719932556152, "learning_rate": 0.0007240689673810736, "loss": 2.4627, "step": 440700 }, { "epoch": 0.8779923179905649, "grad_norm": 0.1973377913236618, "learning_rate": 0.0007239122287240583, "loss": 2.4782, "step": 440710 }, { "epoch": 0.8780122402142038, "grad_norm": 0.45463451743125916, "learning_rate": 0.0007237555093164947, "loss": 2.4693, "step": 440720 }, { "epoch": 0.8780321624378427, "grad_norm": 0.1942722648382187, "learning_rate": 0.000723598809151292, "loss": 2.486, "step": 440730 }, { "epoch": 0.8780520846614815, "grad_norm": 0.16667921841144562, "learning_rate": 0.0007234421282213641, "loss": 2.4834, "step": 440740 }, { "epoch": 0.8780720068851204, "grad_norm": 0.193106546998024, "learning_rate": 0.0007232854665196295, "loss": 2.4721, "step": 440750 }, { "epoch": 0.8780919291087594, "grad_norm": 0.16087648272514343, "learning_rate": 0.0007231288240390106, "loss": 2.4574, "step": 440760 }, { "epoch": 0.8781118513323983, "grad_norm": 0.1875259429216385, "learning_rate": 0.0007229722007724342, "loss": 2.4758, "step": 440770 }, { "epoch": 0.8781317735560372, "grad_norm": 0.18289758265018463, "learning_rate": 0.0007228155967128318, "loss": 2.4766, "step": 440780 }, { "epoch": 0.8781516957796761, "grad_norm": 0.1985923945903778, "learning_rate": 0.0007226590118531389, "loss": 2.4715, "step": 440790 }, { "epoch": 0.8781716180033151, "grad_norm": 0.18341384828090668, "learning_rate": 0.000722502446186295, "loss": 2.4687, "step": 440800 }, { "epoch": 0.878191540226954, "grad_norm": 0.17167480289936066, "learning_rate": 0.000722345899705245, "loss": 2.4618, "step": 440810 }, { "epoch": 0.8782114624505929, "grad_norm": 0.15391960740089417, "learning_rate": 0.0007221893724029369, "loss": 2.4565, "step": 440820 }, { "epoch": 0.8782313846742318, "grad_norm": 0.1887100785970688, "learning_rate": 0.0007220328642723242, "loss": 2.4716, "step": 440830 }, { "epoch": 0.8782513068978707, "grad_norm": 0.18233932554721832, "learning_rate": 0.0007218763753063636, "loss": 2.4888, "step": 440840 }, { "epoch": 0.8782712291215097, "grad_norm": 0.2132144421339035, "learning_rate": 0.0007217199054980166, "loss": 2.4587, "step": 440850 }, { "epoch": 0.8782911513451486, "grad_norm": 0.16841977834701538, "learning_rate": 0.000721563454840249, "loss": 2.4677, "step": 440860 }, { "epoch": 0.8783110735687875, "grad_norm": 0.20759715139865875, "learning_rate": 0.0007214070233260313, "loss": 2.4697, "step": 440870 }, { "epoch": 0.8783309957924264, "grad_norm": 0.17475254833698273, "learning_rate": 0.0007212506109483377, "loss": 2.4768, "step": 440880 }, { "epoch": 0.8783509180160652, "grad_norm": 0.17362025380134583, "learning_rate": 0.0007210942177001467, "loss": 2.4706, "step": 440890 }, { "epoch": 0.8783708402397042, "grad_norm": 0.19398541748523712, "learning_rate": 0.0007209378435744416, "loss": 2.4751, "step": 440900 }, { "epoch": 0.8783907624633431, "grad_norm": 0.20428362488746643, "learning_rate": 0.0007207814885642095, "loss": 2.4631, "step": 440910 }, { "epoch": 0.878410684686982, "grad_norm": 0.17308355867862701, "learning_rate": 0.0007206251526624418, "loss": 2.4776, "step": 440920 }, { "epoch": 0.8784306069106209, "grad_norm": 0.20392712950706482, "learning_rate": 0.000720468835862135, "loss": 2.4649, "step": 440930 }, { "epoch": 0.8784505291342598, "grad_norm": 0.19748523831367493, "learning_rate": 0.0007203125381562887, "loss": 2.4474, "step": 440940 }, { "epoch": 0.8784704513578988, "grad_norm": 0.19587260484695435, "learning_rate": 0.0007201562595379077, "loss": 2.4712, "step": 440950 }, { "epoch": 0.8784903735815377, "grad_norm": 0.17184700071811676, "learning_rate": 0.0007199999999999999, "loss": 2.4609, "step": 440960 }, { "epoch": 0.8785102958051766, "grad_norm": 0.1815931648015976, "learning_rate": 0.0007198437595355791, "loss": 2.4514, "step": 440970 }, { "epoch": 0.8785302180288155, "grad_norm": 0.20091047883033752, "learning_rate": 0.0007196875381376621, "loss": 2.4684, "step": 440980 }, { "epoch": 0.8785501402524544, "grad_norm": 0.18553167581558228, "learning_rate": 0.0007195313357992708, "loss": 2.4539, "step": 440990 }, { "epoch": 0.8785700624760934, "grad_norm": 0.1802625209093094, "learning_rate": 0.0007193751525134302, "loss": 2.4719, "step": 441000 }, { "epoch": 0.8785899846997323, "grad_norm": 0.2265816330909729, "learning_rate": 0.0007192189882731707, "loss": 2.4708, "step": 441010 }, { "epoch": 0.8786099069233712, "grad_norm": 0.1774926632642746, "learning_rate": 0.0007190628430715267, "loss": 2.4777, "step": 441020 }, { "epoch": 0.87862982914701, "grad_norm": 0.18118730187416077, "learning_rate": 0.0007189067169015365, "loss": 2.4621, "step": 441030 }, { "epoch": 0.8786497513706489, "grad_norm": 0.17588657140731812, "learning_rate": 0.0007187506097562427, "loss": 2.4754, "step": 441040 }, { "epoch": 0.8786696735942879, "grad_norm": 0.1936110258102417, "learning_rate": 0.0007185945216286924, "loss": 2.4743, "step": 441050 }, { "epoch": 0.8786895958179268, "grad_norm": 0.17632195353507996, "learning_rate": 0.0007184384525119365, "loss": 2.4684, "step": 441060 }, { "epoch": 0.8787095180415657, "grad_norm": 0.1735101193189621, "learning_rate": 0.0007182824023990309, "loss": 2.4652, "step": 441070 }, { "epoch": 0.8787294402652046, "grad_norm": 0.17835713922977448, "learning_rate": 0.0007181263712830348, "loss": 2.4779, "step": 441080 }, { "epoch": 0.8787493624888435, "grad_norm": 0.17591632902622223, "learning_rate": 0.0007179703591570124, "loss": 2.479, "step": 441090 }, { "epoch": 0.8787692847124825, "grad_norm": 0.1734897941350937, "learning_rate": 0.0007178143660140315, "loss": 2.4711, "step": 441100 }, { "epoch": 0.8787892069361214, "grad_norm": 0.19923004508018494, "learning_rate": 0.0007176583918471647, "loss": 2.4776, "step": 441110 }, { "epoch": 0.8788091291597603, "grad_norm": 0.18386124074459076, "learning_rate": 0.0007175024366494882, "loss": 2.4568, "step": 441120 }, { "epoch": 0.8788290513833992, "grad_norm": 0.18880152702331543, "learning_rate": 0.0007173465004140831, "loss": 2.4788, "step": 441130 }, { "epoch": 0.8788489736070382, "grad_norm": 0.22519893944263458, "learning_rate": 0.0007171905831340339, "loss": 2.4619, "step": 441140 }, { "epoch": 0.8788688958306771, "grad_norm": 0.18997377157211304, "learning_rate": 0.00071703468480243, "loss": 2.4723, "step": 441150 }, { "epoch": 0.878888818054316, "grad_norm": 0.2059745341539383, "learning_rate": 0.0007168788054123647, "loss": 2.4698, "step": 441160 }, { "epoch": 0.8789087402779548, "grad_norm": 0.19213126599788666, "learning_rate": 0.0007167229449569357, "loss": 2.466, "step": 441170 }, { "epoch": 0.8789286625015937, "grad_norm": 0.17779409885406494, "learning_rate": 0.0007165671034292442, "loss": 2.4675, "step": 441180 }, { "epoch": 0.8789485847252327, "grad_norm": 0.18923182785511017, "learning_rate": 0.0007164112808223968, "loss": 2.4704, "step": 441190 }, { "epoch": 0.8789685069488716, "grad_norm": 0.17248475551605225, "learning_rate": 0.0007162554771295031, "loss": 2.4825, "step": 441200 }, { "epoch": 0.8789884291725105, "grad_norm": 0.1992751955986023, "learning_rate": 0.0007160996923436774, "loss": 2.4672, "step": 441210 }, { "epoch": 0.8790083513961494, "grad_norm": 0.1972547471523285, "learning_rate": 0.0007159439264580384, "loss": 2.465, "step": 441220 }, { "epoch": 0.8790282736197883, "grad_norm": 0.16517360508441925, "learning_rate": 0.0007157881794657082, "loss": 2.4584, "step": 441230 }, { "epoch": 0.8790481958434273, "grad_norm": 0.1812276393175125, "learning_rate": 0.0007156324513598142, "loss": 2.4673, "step": 441240 }, { "epoch": 0.8790681180670662, "grad_norm": 0.18333861231803894, "learning_rate": 0.0007154767421334871, "loss": 2.4645, "step": 441250 }, { "epoch": 0.8790880402907051, "grad_norm": 0.18574713170528412, "learning_rate": 0.0007153210517798621, "loss": 2.4761, "step": 441260 }, { "epoch": 0.879107962514344, "grad_norm": 0.18547797203063965, "learning_rate": 0.0007151653802920781, "loss": 2.4629, "step": 441270 }, { "epoch": 0.8791278847379829, "grad_norm": 0.1709592640399933, "learning_rate": 0.0007150097276632792, "loss": 2.4719, "step": 441280 }, { "epoch": 0.8791478069616219, "grad_norm": 0.20701663196086884, "learning_rate": 0.0007148540938866124, "loss": 2.4749, "step": 441290 }, { "epoch": 0.8791677291852608, "grad_norm": 0.16680815815925598, "learning_rate": 0.0007146984789552298, "loss": 2.4678, "step": 441300 }, { "epoch": 0.8791876514088997, "grad_norm": 0.1842605322599411, "learning_rate": 0.000714542882862287, "loss": 2.4632, "step": 441310 }, { "epoch": 0.8792075736325385, "grad_norm": 0.18928435444831848, "learning_rate": 0.0007143873056009442, "loss": 2.4582, "step": 441320 }, { "epoch": 0.8792274958561774, "grad_norm": 0.21771733462810516, "learning_rate": 0.0007142317471643656, "loss": 2.4558, "step": 441330 }, { "epoch": 0.8792474180798164, "grad_norm": 0.19986112415790558, "learning_rate": 0.0007140762075457192, "loss": 2.4656, "step": 441340 }, { "epoch": 0.8792673403034553, "grad_norm": 0.19921091198921204, "learning_rate": 0.000713920686738178, "loss": 2.464, "step": 441350 }, { "epoch": 0.8792872625270942, "grad_norm": 0.2306683510541916, "learning_rate": 0.0007137651847349178, "loss": 2.4652, "step": 441360 }, { "epoch": 0.8793071847507331, "grad_norm": 0.18617917597293854, "learning_rate": 0.0007136097015291199, "loss": 2.4768, "step": 441370 }, { "epoch": 0.879327106974372, "grad_norm": 0.2011549472808838, "learning_rate": 0.0007134542371139687, "loss": 2.455, "step": 441380 }, { "epoch": 0.879347029198011, "grad_norm": 0.1962113231420517, "learning_rate": 0.0007132987914826534, "loss": 2.4672, "step": 441390 }, { "epoch": 0.8793669514216499, "grad_norm": 0.17137303948402405, "learning_rate": 0.0007131433646283672, "loss": 2.4727, "step": 441400 }, { "epoch": 0.8793868736452888, "grad_norm": 0.2719401717185974, "learning_rate": 0.0007129879565443064, "loss": 2.4542, "step": 441410 }, { "epoch": 0.8794067958689277, "grad_norm": 0.1745845526456833, "learning_rate": 0.000712832567223673, "loss": 2.4762, "step": 441420 }, { "epoch": 0.8794267180925667, "grad_norm": 0.18163511157035828, "learning_rate": 0.0007126771966596724, "loss": 2.4586, "step": 441430 }, { "epoch": 0.8794466403162056, "grad_norm": 0.18464244902133942, "learning_rate": 0.0007125218448455136, "loss": 2.4706, "step": 441440 }, { "epoch": 0.8794665625398445, "grad_norm": 0.20627887547016144, "learning_rate": 0.0007123665117744103, "loss": 2.4755, "step": 441450 }, { "epoch": 0.8794864847634833, "grad_norm": 0.1912967562675476, "learning_rate": 0.0007122111974395802, "loss": 2.4639, "step": 441460 }, { "epoch": 0.8795064069871222, "grad_norm": 0.18515464663505554, "learning_rate": 0.0007120559018342449, "loss": 2.4606, "step": 441470 }, { "epoch": 0.8795263292107612, "grad_norm": 0.19768793880939484, "learning_rate": 0.0007119006249516304, "loss": 2.4687, "step": 441480 }, { "epoch": 0.8795462514344001, "grad_norm": 0.21278244256973267, "learning_rate": 0.0007117453667849667, "loss": 2.4618, "step": 441490 }, { "epoch": 0.879566173658039, "grad_norm": 0.1880493015050888, "learning_rate": 0.0007115901273274874, "loss": 2.4672, "step": 441500 }, { "epoch": 0.8795860958816779, "grad_norm": 0.19190511107444763, "learning_rate": 0.0007114349065724308, "loss": 2.4828, "step": 441510 }, { "epoch": 0.8796060181053168, "grad_norm": 0.1943606287240982, "learning_rate": 0.000711279704513039, "loss": 2.4676, "step": 441520 }, { "epoch": 0.8796259403289558, "grad_norm": 0.18366549909114838, "learning_rate": 0.0007111245211425581, "loss": 2.4613, "step": 441530 }, { "epoch": 0.8796458625525947, "grad_norm": 0.21104812622070312, "learning_rate": 0.0007109693564542385, "loss": 2.474, "step": 441540 }, { "epoch": 0.8796657847762336, "grad_norm": 0.19270750880241394, "learning_rate": 0.0007108142104413344, "loss": 2.4652, "step": 441550 }, { "epoch": 0.8796857069998725, "grad_norm": 0.1663019210100174, "learning_rate": 0.0007106590830971043, "loss": 2.4657, "step": 441560 }, { "epoch": 0.8797056292235114, "grad_norm": 0.18274231255054474, "learning_rate": 0.0007105039744148103, "loss": 2.4731, "step": 441570 }, { "epoch": 0.8797255514471504, "grad_norm": 0.17201142013072968, "learning_rate": 0.0007103488843877195, "loss": 2.4656, "step": 441580 }, { "epoch": 0.8797454736707893, "grad_norm": 0.20835836231708527, "learning_rate": 0.0007101938130091017, "loss": 2.4528, "step": 441590 }, { "epoch": 0.8797653958944281, "grad_norm": 0.18388058245182037, "learning_rate": 0.0007100387602722321, "loss": 2.4564, "step": 441600 }, { "epoch": 0.879785318118067, "grad_norm": 0.1924087554216385, "learning_rate": 0.000709883726170389, "loss": 2.4634, "step": 441610 }, { "epoch": 0.8798052403417059, "grad_norm": 0.20157591998577118, "learning_rate": 0.0007097287106968551, "loss": 2.4875, "step": 441620 }, { "epoch": 0.8798251625653449, "grad_norm": 0.19815020263195038, "learning_rate": 0.000709573713844917, "loss": 2.4728, "step": 441630 }, { "epoch": 0.8798450847889838, "grad_norm": 0.1779060959815979, "learning_rate": 0.0007094187356078658, "loss": 2.4737, "step": 441640 }, { "epoch": 0.8798650070126227, "grad_norm": 0.18758772313594818, "learning_rate": 0.0007092637759789957, "loss": 2.4776, "step": 441650 }, { "epoch": 0.8798849292362616, "grad_norm": 0.20340096950531006, "learning_rate": 0.000709108834951606, "loss": 2.4843, "step": 441660 }, { "epoch": 0.8799048514599005, "grad_norm": 0.18890507519245148, "learning_rate": 0.0007089539125189992, "loss": 2.471, "step": 441670 }, { "epoch": 0.8799247736835395, "grad_norm": 0.1930919736623764, "learning_rate": 0.0007087990086744822, "loss": 2.4637, "step": 441680 }, { "epoch": 0.8799446959071784, "grad_norm": 0.18103085458278656, "learning_rate": 0.0007086441234113659, "loss": 2.4695, "step": 441690 }, { "epoch": 0.8799646181308173, "grad_norm": 0.1764920949935913, "learning_rate": 0.0007084892567229648, "loss": 2.4646, "step": 441700 }, { "epoch": 0.8799845403544562, "grad_norm": 0.18701918423175812, "learning_rate": 0.000708334408602598, "loss": 2.4595, "step": 441710 }, { "epoch": 0.8800044625780952, "grad_norm": 0.17691709101200104, "learning_rate": 0.0007081795790435885, "loss": 2.4677, "step": 441720 }, { "epoch": 0.8800243848017341, "grad_norm": 0.23992247879505157, "learning_rate": 0.0007080247680392631, "loss": 2.4738, "step": 441730 }, { "epoch": 0.880044307025373, "grad_norm": 0.2180006504058838, "learning_rate": 0.0007078699755829525, "loss": 2.4734, "step": 441740 }, { "epoch": 0.8800642292490118, "grad_norm": 0.20636926591396332, "learning_rate": 0.0007077152016679915, "loss": 2.4656, "step": 441750 }, { "epoch": 0.8800841514726507, "grad_norm": 0.19280490279197693, "learning_rate": 0.000707560446287719, "loss": 2.4526, "step": 441760 }, { "epoch": 0.8801040736962897, "grad_norm": 0.19620740413665771, "learning_rate": 0.000707405709435478, "loss": 2.4695, "step": 441770 }, { "epoch": 0.8801239959199286, "grad_norm": 0.1755906492471695, "learning_rate": 0.000707250991104615, "loss": 2.482, "step": 441780 }, { "epoch": 0.8801439181435675, "grad_norm": 0.20418262481689453, "learning_rate": 0.0007070962912884811, "loss": 2.473, "step": 441790 }, { "epoch": 0.8801638403672064, "grad_norm": 0.20271486043930054, "learning_rate": 0.0007069416099804309, "loss": 2.4652, "step": 441800 }, { "epoch": 0.8801837625908453, "grad_norm": 0.17099584639072418, "learning_rate": 0.000706786947173823, "loss": 2.4721, "step": 441810 }, { "epoch": 0.8802036848144843, "grad_norm": 0.17408102750778198, "learning_rate": 0.0007066323028620205, "loss": 2.4522, "step": 441820 }, { "epoch": 0.8802236070381232, "grad_norm": 0.17877404391765594, "learning_rate": 0.0007064776770383898, "loss": 2.4664, "step": 441830 }, { "epoch": 0.8802435292617621, "grad_norm": 0.19068042933940887, "learning_rate": 0.0007063230696963017, "loss": 2.4727, "step": 441840 }, { "epoch": 0.880263451485401, "grad_norm": 0.18158641457557678, "learning_rate": 0.0007061684808291307, "loss": 2.4736, "step": 441850 }, { "epoch": 0.8802833737090399, "grad_norm": 0.18082964420318604, "learning_rate": 0.000706013910430255, "loss": 2.477, "step": 441860 }, { "epoch": 0.8803032959326789, "grad_norm": 0.1761755645275116, "learning_rate": 0.0007058593584930577, "loss": 2.4746, "step": 441870 }, { "epoch": 0.8803232181563178, "grad_norm": 0.1763119250535965, "learning_rate": 0.0007057048250109251, "loss": 2.4701, "step": 441880 }, { "epoch": 0.8803431403799566, "grad_norm": 0.18361583352088928, "learning_rate": 0.0007055503099772475, "loss": 2.4553, "step": 441890 }, { "epoch": 0.8803630626035955, "grad_norm": 0.18550355732440948, "learning_rate": 0.0007053958133854195, "loss": 2.475, "step": 441900 }, { "epoch": 0.8803829848272344, "grad_norm": 0.1960555762052536, "learning_rate": 0.0007052413352288389, "loss": 2.4646, "step": 441910 }, { "epoch": 0.8804029070508734, "grad_norm": 0.183974027633667, "learning_rate": 0.0007050868755009084, "loss": 2.4648, "step": 441920 }, { "epoch": 0.8804228292745123, "grad_norm": 0.17566756904125214, "learning_rate": 0.000704932434195034, "loss": 2.4619, "step": 441930 }, { "epoch": 0.8804427514981512, "grad_norm": 0.1839698702096939, "learning_rate": 0.0007047780113046258, "loss": 2.4667, "step": 441940 }, { "epoch": 0.8804626737217901, "grad_norm": 0.17701007425785065, "learning_rate": 0.000704623606823098, "loss": 2.4786, "step": 441950 }, { "epoch": 0.880482595945429, "grad_norm": 0.20013858377933502, "learning_rate": 0.0007044692207438683, "loss": 2.4524, "step": 441960 }, { "epoch": 0.880502518169068, "grad_norm": 0.17634226381778717, "learning_rate": 0.0007043148530603587, "loss": 2.4689, "step": 441970 }, { "epoch": 0.8805224403927069, "grad_norm": 0.2114974409341812, "learning_rate": 0.0007041605037659951, "loss": 2.4692, "step": 441980 }, { "epoch": 0.8805423626163458, "grad_norm": 0.19008664786815643, "learning_rate": 0.0007040061728542068, "loss": 2.467, "step": 441990 }, { "epoch": 0.8805622848399847, "grad_norm": 0.17619770765304565, "learning_rate": 0.0007038518603184281, "loss": 2.4832, "step": 442000 }, { "epoch": 0.8805822070636237, "grad_norm": 0.1891339123249054, "learning_rate": 0.0007036975661520959, "loss": 2.4581, "step": 442010 }, { "epoch": 0.8806021292872626, "grad_norm": 0.1882983148097992, "learning_rate": 0.0007035432903486519, "loss": 2.4566, "step": 442020 }, { "epoch": 0.8806220515109014, "grad_norm": 0.20892725884914398, "learning_rate": 0.0007033890329015415, "loss": 2.4628, "step": 442030 }, { "epoch": 0.8806419737345403, "grad_norm": 0.18667960166931152, "learning_rate": 0.0007032347938042138, "loss": 2.4648, "step": 442040 }, { "epoch": 0.8806618959581792, "grad_norm": 0.1798824816942215, "learning_rate": 0.0007030805730501219, "loss": 2.4609, "step": 442050 }, { "epoch": 0.8806818181818182, "grad_norm": 0.18117979168891907, "learning_rate": 0.0007029263706327232, "loss": 2.4563, "step": 442060 }, { "epoch": 0.8807017404054571, "grad_norm": 0.1988905966281891, "learning_rate": 0.0007027721865454781, "loss": 2.4724, "step": 442070 }, { "epoch": 0.880721662629096, "grad_norm": 0.1818040907382965, "learning_rate": 0.0007026180207818515, "loss": 2.4669, "step": 442080 }, { "epoch": 0.8807415848527349, "grad_norm": 0.18673290312290192, "learning_rate": 0.0007024638733353126, "loss": 2.4631, "step": 442090 }, { "epoch": 0.8807615070763738, "grad_norm": 0.1875915676355362, "learning_rate": 0.0007023097441993334, "loss": 2.4658, "step": 442100 }, { "epoch": 0.8807814293000128, "grad_norm": 0.19146113097667694, "learning_rate": 0.0007021556333673902, "loss": 2.4988, "step": 442110 }, { "epoch": 0.8808013515236517, "grad_norm": 0.17600932717323303, "learning_rate": 0.0007020015408329639, "loss": 2.4768, "step": 442120 }, { "epoch": 0.8808212737472906, "grad_norm": 0.2061227411031723, "learning_rate": 0.0007018474665895383, "loss": 2.4727, "step": 442130 }, { "epoch": 0.8808411959709295, "grad_norm": 0.20916667580604553, "learning_rate": 0.0007016934106306014, "loss": 2.463, "step": 442140 }, { "epoch": 0.8808611181945684, "grad_norm": 0.18126708269119263, "learning_rate": 0.0007015393729496455, "loss": 2.4646, "step": 442150 }, { "epoch": 0.8808810404182074, "grad_norm": 0.18388135731220245, "learning_rate": 0.000701385353540166, "loss": 2.459, "step": 442160 }, { "epoch": 0.8809009626418463, "grad_norm": 0.18423782289028168, "learning_rate": 0.0007012313523956624, "loss": 2.4697, "step": 442170 }, { "epoch": 0.8809208848654851, "grad_norm": 0.21257701516151428, "learning_rate": 0.0007010773695096386, "loss": 2.4674, "step": 442180 }, { "epoch": 0.880940807089124, "grad_norm": 0.19850732386112213, "learning_rate": 0.0007009234048756017, "loss": 2.4586, "step": 442190 }, { "epoch": 0.8809607293127629, "grad_norm": 0.18683253228664398, "learning_rate": 0.0007007694584870627, "loss": 2.4513, "step": 442200 }, { "epoch": 0.8809806515364019, "grad_norm": 0.1833878755569458, "learning_rate": 0.0007006155303375373, "loss": 2.4721, "step": 442210 }, { "epoch": 0.8810005737600408, "grad_norm": 0.1960173100233078, "learning_rate": 0.0007004616204205434, "loss": 2.4785, "step": 442220 }, { "epoch": 0.8810204959836797, "grad_norm": 0.18375016748905182, "learning_rate": 0.0007003077287296043, "loss": 2.4741, "step": 442230 }, { "epoch": 0.8810404182073186, "grad_norm": 0.22448429465293884, "learning_rate": 0.0007001538552582463, "loss": 2.4817, "step": 442240 }, { "epoch": 0.8810603404309575, "grad_norm": 0.18052691221237183, "learning_rate": 0.0007, "loss": 2.4704, "step": 442250 }, { "epoch": 0.8810802626545965, "grad_norm": 0.19742177426815033, "learning_rate": 0.0006998461629483994, "loss": 2.4851, "step": 442260 }, { "epoch": 0.8811001848782354, "grad_norm": 0.21189270913600922, "learning_rate": 0.0006996923440969825, "loss": 2.4704, "step": 442270 }, { "epoch": 0.8811201071018743, "grad_norm": 0.18905192613601685, "learning_rate": 0.0006995385434392914, "loss": 2.4723, "step": 442280 }, { "epoch": 0.8811400293255132, "grad_norm": 0.18372975289821625, "learning_rate": 0.0006993847609688713, "loss": 2.4655, "step": 442290 }, { "epoch": 0.8811599515491522, "grad_norm": 0.2024952918291092, "learning_rate": 0.000699230996679272, "loss": 2.4738, "step": 442300 }, { "epoch": 0.8811798737727911, "grad_norm": 0.199290931224823, "learning_rate": 0.0006990772505640468, "loss": 2.4777, "step": 442310 }, { "epoch": 0.88119979599643, "grad_norm": 0.1686742603778839, "learning_rate": 0.0006989235226167526, "loss": 2.4496, "step": 442320 }, { "epoch": 0.8812197182200688, "grad_norm": 0.1718815565109253, "learning_rate": 0.0006987698128309504, "loss": 2.4648, "step": 442330 }, { "epoch": 0.8812396404437077, "grad_norm": 0.19269658625125885, "learning_rate": 0.0006986161212002049, "loss": 2.4636, "step": 442340 }, { "epoch": 0.8812595626673467, "grad_norm": 0.1883983165025711, "learning_rate": 0.0006984624477180846, "loss": 2.4634, "step": 442350 }, { "epoch": 0.8812794848909856, "grad_norm": 0.18834300339221954, "learning_rate": 0.0006983087923781617, "loss": 2.4641, "step": 442360 }, { "epoch": 0.8812994071146245, "grad_norm": 0.19982464611530304, "learning_rate": 0.0006981551551740122, "loss": 2.4836, "step": 442370 }, { "epoch": 0.8813193293382634, "grad_norm": 0.17416001856327057, "learning_rate": 0.0006980015360992164, "loss": 2.4752, "step": 442380 }, { "epoch": 0.8813392515619023, "grad_norm": 0.21737819910049438, "learning_rate": 0.0006978479351473577, "loss": 2.4637, "step": 442390 }, { "epoch": 0.8813591737855413, "grad_norm": 0.18056799471378326, "learning_rate": 0.0006976943523120236, "loss": 2.4646, "step": 442400 }, { "epoch": 0.8813790960091802, "grad_norm": 0.18826769292354584, "learning_rate": 0.0006975407875868052, "loss": 2.4688, "step": 442410 }, { "epoch": 0.8813990182328191, "grad_norm": 0.19096244871616364, "learning_rate": 0.0006973872409652975, "loss": 2.4708, "step": 442420 }, { "epoch": 0.881418940456458, "grad_norm": 0.1967892199754715, "learning_rate": 0.0006972337124410995, "loss": 2.4602, "step": 442430 }, { "epoch": 0.8814388626800969, "grad_norm": 0.18207843601703644, "learning_rate": 0.0006970802020078136, "loss": 2.4706, "step": 442440 }, { "epoch": 0.8814587849037359, "grad_norm": 0.19030612707138062, "learning_rate": 0.0006969267096590461, "loss": 2.4676, "step": 442450 }, { "epoch": 0.8814787071273747, "grad_norm": 0.19465596973896027, "learning_rate": 0.0006967732353884074, "loss": 2.4817, "step": 442460 }, { "epoch": 0.8814986293510136, "grad_norm": 0.19330807030200958, "learning_rate": 0.0006966197791895105, "loss": 2.4619, "step": 442470 }, { "epoch": 0.8815185515746525, "grad_norm": 0.20674552023410797, "learning_rate": 0.0006964663410559741, "loss": 2.4669, "step": 442480 }, { "epoch": 0.8815384737982914, "grad_norm": 0.17838861048221588, "learning_rate": 0.000696312920981419, "loss": 2.4615, "step": 442490 }, { "epoch": 0.8815583960219304, "grad_norm": 0.18521836400032043, "learning_rate": 0.0006961595189594702, "loss": 2.4625, "step": 442500 }, { "epoch": 0.8815783182455693, "grad_norm": 0.19925376772880554, "learning_rate": 0.000696006134983757, "loss": 2.4532, "step": 442510 }, { "epoch": 0.8815982404692082, "grad_norm": 0.16845005750656128, "learning_rate": 0.0006958527690479115, "loss": 2.4563, "step": 442520 }, { "epoch": 0.8816181626928471, "grad_norm": 0.18114374577999115, "learning_rate": 0.0006956994211455705, "loss": 2.4724, "step": 442530 }, { "epoch": 0.881638084916486, "grad_norm": 0.17316575348377228, "learning_rate": 0.0006955460912703738, "loss": 2.4619, "step": 442540 }, { "epoch": 0.881658007140125, "grad_norm": 0.2048971801996231, "learning_rate": 0.0006953927794159653, "loss": 2.4666, "step": 442550 }, { "epoch": 0.8816779293637639, "grad_norm": 0.21814635396003723, "learning_rate": 0.0006952394855759927, "loss": 2.4637, "step": 442560 }, { "epoch": 0.8816978515874028, "grad_norm": 0.19769474864006042, "learning_rate": 0.0006950862097441073, "loss": 2.4714, "step": 442570 }, { "epoch": 0.8817177738110417, "grad_norm": 0.19265785813331604, "learning_rate": 0.000694932951913964, "loss": 2.4612, "step": 442580 }, { "epoch": 0.8817376960346806, "grad_norm": 0.1795954704284668, "learning_rate": 0.0006947797120792214, "loss": 2.4613, "step": 442590 }, { "epoch": 0.8817576182583196, "grad_norm": 0.2191489040851593, "learning_rate": 0.0006946264902335424, "loss": 2.4568, "step": 442600 }, { "epoch": 0.8817775404819584, "grad_norm": 0.18097440898418427, "learning_rate": 0.000694473286370593, "loss": 2.4637, "step": 442610 }, { "epoch": 0.8817974627055973, "grad_norm": 0.18838457763195038, "learning_rate": 0.0006943201004840428, "loss": 2.4727, "step": 442620 }, { "epoch": 0.8818173849292362, "grad_norm": 0.18211786448955536, "learning_rate": 0.0006941669325675659, "loss": 2.4813, "step": 442630 }, { "epoch": 0.8818373071528752, "grad_norm": 0.20431408286094666, "learning_rate": 0.0006940137826148394, "loss": 2.4767, "step": 442640 }, { "epoch": 0.8818572293765141, "grad_norm": 0.1949954628944397, "learning_rate": 0.0006938606506195444, "loss": 2.4699, "step": 442650 }, { "epoch": 0.881877151600153, "grad_norm": 0.17234861850738525, "learning_rate": 0.0006937075365753655, "loss": 2.4551, "step": 442660 }, { "epoch": 0.8818970738237919, "grad_norm": 0.1939409226179123, "learning_rate": 0.0006935544404759913, "loss": 2.4666, "step": 442670 }, { "epoch": 0.8819169960474308, "grad_norm": 0.1967208832502365, "learning_rate": 0.0006934013623151141, "loss": 2.4705, "step": 442680 }, { "epoch": 0.8819369182710698, "grad_norm": 0.18079958856105804, "learning_rate": 0.0006932483020864293, "loss": 2.4871, "step": 442690 }, { "epoch": 0.8819568404947087, "grad_norm": 0.18607039749622345, "learning_rate": 0.0006930952597836369, "loss": 2.4665, "step": 442700 }, { "epoch": 0.8819767627183476, "grad_norm": 0.1778435856103897, "learning_rate": 0.0006929422354004396, "loss": 2.4702, "step": 442710 }, { "epoch": 0.8819966849419865, "grad_norm": 0.17517201602458954, "learning_rate": 0.0006927892289305446, "loss": 2.4701, "step": 442720 }, { "epoch": 0.8820166071656254, "grad_norm": 0.1896967589855194, "learning_rate": 0.0006926362403676627, "loss": 2.4668, "step": 442730 }, { "epoch": 0.8820365293892644, "grad_norm": 0.18578119575977325, "learning_rate": 0.0006924832697055076, "loss": 2.4777, "step": 442740 }, { "epoch": 0.8820564516129032, "grad_norm": 0.21578830480575562, "learning_rate": 0.000692330316937798, "loss": 2.4513, "step": 442750 }, { "epoch": 0.8820763738365421, "grad_norm": 0.18247368931770325, "learning_rate": 0.0006921773820582549, "loss": 2.4531, "step": 442760 }, { "epoch": 0.882096296060181, "grad_norm": 0.16083243489265442, "learning_rate": 0.0006920244650606036, "loss": 2.4691, "step": 442770 }, { "epoch": 0.8821162182838199, "grad_norm": 0.17124119400978088, "learning_rate": 0.0006918715659385735, "loss": 2.4555, "step": 442780 }, { "epoch": 0.8821361405074589, "grad_norm": 0.18517987430095673, "learning_rate": 0.0006917186846858968, "loss": 2.476, "step": 442790 }, { "epoch": 0.8821560627310978, "grad_norm": 0.18157394230365753, "learning_rate": 0.0006915658212963098, "loss": 2.4475, "step": 442800 }, { "epoch": 0.8821759849547367, "grad_norm": 0.19032904505729675, "learning_rate": 0.0006914129757635528, "loss": 2.4717, "step": 442810 }, { "epoch": 0.8821959071783756, "grad_norm": 0.17962785065174103, "learning_rate": 0.000691260148081369, "loss": 2.4515, "step": 442820 }, { "epoch": 0.8822158294020145, "grad_norm": 0.18446221947669983, "learning_rate": 0.0006911073382435061, "loss": 2.4609, "step": 442830 }, { "epoch": 0.8822357516256535, "grad_norm": 0.1892208307981491, "learning_rate": 0.0006909545462437143, "loss": 2.4547, "step": 442840 }, { "epoch": 0.8822556738492924, "grad_norm": 0.18802045285701752, "learning_rate": 0.0006908017720757487, "loss": 2.478, "step": 442850 }, { "epoch": 0.8822755960729313, "grad_norm": 0.288224458694458, "learning_rate": 0.0006906490157333674, "loss": 2.4711, "step": 442860 }, { "epoch": 0.8822955182965702, "grad_norm": 0.19940559566020966, "learning_rate": 0.0006904962772103318, "loss": 2.4759, "step": 442870 }, { "epoch": 0.882315440520209, "grad_norm": 0.20462939143180847, "learning_rate": 0.0006903435565004079, "loss": 2.4599, "step": 442880 }, { "epoch": 0.882335362743848, "grad_norm": 0.17841120064258575, "learning_rate": 0.0006901908535973647, "loss": 2.4736, "step": 442890 }, { "epoch": 0.8823552849674869, "grad_norm": 0.19600513577461243, "learning_rate": 0.0006900381684949748, "loss": 2.465, "step": 442900 }, { "epoch": 0.8823752071911258, "grad_norm": 0.18234336376190186, "learning_rate": 0.0006898855011870146, "loss": 2.4695, "step": 442910 }, { "epoch": 0.8823951294147647, "grad_norm": 0.19159384071826935, "learning_rate": 0.000689732851667264, "loss": 2.4713, "step": 442920 }, { "epoch": 0.8824150516384037, "grad_norm": 0.20416295528411865, "learning_rate": 0.0006895802199295068, "loss": 2.487, "step": 442930 }, { "epoch": 0.8824349738620426, "grad_norm": 0.19962389767169952, "learning_rate": 0.00068942760596753, "loss": 2.4619, "step": 442940 }, { "epoch": 0.8824548960856815, "grad_norm": 0.18135401606559753, "learning_rate": 0.0006892750097751245, "loss": 2.4705, "step": 442950 }, { "epoch": 0.8824748183093204, "grad_norm": 0.18686695396900177, "learning_rate": 0.0006891224313460848, "loss": 2.4728, "step": 442960 }, { "epoch": 0.8824947405329593, "grad_norm": 0.19691091775894165, "learning_rate": 0.0006889698706742092, "loss": 2.4653, "step": 442970 }, { "epoch": 0.8825146627565983, "grad_norm": 0.1760360449552536, "learning_rate": 0.000688817327753299, "loss": 2.4905, "step": 442980 }, { "epoch": 0.8825345849802372, "grad_norm": 0.17805039882659912, "learning_rate": 0.0006886648025771595, "loss": 2.4593, "step": 442990 }, { "epoch": 0.8825545072038761, "grad_norm": 0.17350973188877106, "learning_rate": 0.0006885122951395999, "loss": 2.4759, "step": 443000 }, { "epoch": 0.882574429427515, "grad_norm": 0.18330468237400055, "learning_rate": 0.0006883598054344324, "loss": 2.4495, "step": 443010 }, { "epoch": 0.8825943516511539, "grad_norm": 0.16877681016921997, "learning_rate": 0.0006882073334554732, "loss": 2.4594, "step": 443020 }, { "epoch": 0.8826142738747929, "grad_norm": 0.1802217811346054, "learning_rate": 0.000688054879196542, "loss": 2.4611, "step": 443030 }, { "epoch": 0.8826341960984317, "grad_norm": 0.20565184950828552, "learning_rate": 0.0006879024426514619, "loss": 2.4701, "step": 443040 }, { "epoch": 0.8826541183220706, "grad_norm": 0.19169311225414276, "learning_rate": 0.0006877500238140602, "loss": 2.4715, "step": 443050 }, { "epoch": 0.8826740405457095, "grad_norm": 0.18840450048446655, "learning_rate": 0.0006875976226781666, "loss": 2.4747, "step": 443060 }, { "epoch": 0.8826939627693484, "grad_norm": 0.18273185193538666, "learning_rate": 0.0006874452392376156, "loss": 2.4681, "step": 443070 }, { "epoch": 0.8827138849929874, "grad_norm": 0.163893461227417, "learning_rate": 0.000687292873486245, "loss": 2.4785, "step": 443080 }, { "epoch": 0.8827338072166263, "grad_norm": 0.2082049399614334, "learning_rate": 0.0006871405254178953, "loss": 2.4669, "step": 443090 }, { "epoch": 0.8827537294402652, "grad_norm": 0.5071864128112793, "learning_rate": 0.0006869881950264118, "loss": 2.4702, "step": 443100 }, { "epoch": 0.8827736516639041, "grad_norm": 0.6867355704307556, "learning_rate": 0.0006868358823056426, "loss": 2.4627, "step": 443110 }, { "epoch": 0.882793573887543, "grad_norm": 0.19496721029281616, "learning_rate": 0.0006866835872494397, "loss": 2.4812, "step": 443120 }, { "epoch": 0.882813496111182, "grad_norm": 0.196377694606781, "learning_rate": 0.0006865313098516585, "loss": 2.4695, "step": 443130 }, { "epoch": 0.8828334183348209, "grad_norm": 0.16754473745822906, "learning_rate": 0.000686379050106158, "loss": 2.4691, "step": 443140 }, { "epoch": 0.8828533405584598, "grad_norm": 0.1946757286787033, "learning_rate": 0.0006862268080068006, "loss": 2.4591, "step": 443150 }, { "epoch": 0.8828732627820987, "grad_norm": 0.21373215317726135, "learning_rate": 0.0006860745835474527, "loss": 2.4611, "step": 443160 }, { "epoch": 0.8828931850057375, "grad_norm": 0.1793307065963745, "learning_rate": 0.0006859223767219837, "loss": 2.481, "step": 443170 }, { "epoch": 0.8829131072293765, "grad_norm": 0.18743759393692017, "learning_rate": 0.000685770187524267, "loss": 2.4688, "step": 443180 }, { "epoch": 0.8829330294530154, "grad_norm": 0.19103965163230896, "learning_rate": 0.0006856180159481796, "loss": 2.4556, "step": 443190 }, { "epoch": 0.8829529516766543, "grad_norm": 0.18353113532066345, "learning_rate": 0.0006854658619876013, "loss": 2.4814, "step": 443200 }, { "epoch": 0.8829728739002932, "grad_norm": 0.21537162363529205, "learning_rate": 0.0006853137256364164, "loss": 2.4696, "step": 443210 }, { "epoch": 0.8829927961239322, "grad_norm": 0.2011507749557495, "learning_rate": 0.000685161606888512, "loss": 2.4625, "step": 443220 }, { "epoch": 0.8830127183475711, "grad_norm": 0.17591212689876556, "learning_rate": 0.0006850095057377792, "loss": 2.4748, "step": 443230 }, { "epoch": 0.88303264057121, "grad_norm": 0.20018649101257324, "learning_rate": 0.0006848574221781123, "loss": 2.4848, "step": 443240 }, { "epoch": 0.8830525627948489, "grad_norm": 0.1801280975341797, "learning_rate": 0.0006847053562034096, "loss": 2.4702, "step": 443250 }, { "epoch": 0.8830724850184878, "grad_norm": 0.16966822743415833, "learning_rate": 0.0006845533078075723, "loss": 2.4658, "step": 443260 }, { "epoch": 0.8830924072421268, "grad_norm": 0.182905375957489, "learning_rate": 0.0006844012769845054, "loss": 2.4845, "step": 443270 }, { "epoch": 0.8831123294657657, "grad_norm": 0.18211863934993744, "learning_rate": 0.000684249263728118, "loss": 2.4537, "step": 443280 }, { "epoch": 0.8831322516894046, "grad_norm": 0.18591995537281036, "learning_rate": 0.0006840972680323214, "loss": 2.4612, "step": 443290 }, { "epoch": 0.8831521739130435, "grad_norm": 0.19009797275066376, "learning_rate": 0.0006839452898910319, "loss": 2.4627, "step": 443300 }, { "epoch": 0.8831720961366823, "grad_norm": 0.19285953044891357, "learning_rate": 0.0006837933292981684, "loss": 2.4561, "step": 443310 }, { "epoch": 0.8831920183603214, "grad_norm": 0.18267688155174255, "learning_rate": 0.0006836413862476534, "loss": 2.4668, "step": 443320 }, { "epoch": 0.8832119405839602, "grad_norm": 0.18177345395088196, "learning_rate": 0.0006834894607334128, "loss": 2.4784, "step": 443330 }, { "epoch": 0.8832318628075991, "grad_norm": 0.18269631266593933, "learning_rate": 0.0006833375527493768, "loss": 2.4589, "step": 443340 }, { "epoch": 0.883251785031238, "grad_norm": 0.1767500787973404, "learning_rate": 0.0006831856622894784, "loss": 2.4455, "step": 443350 }, { "epoch": 0.8832717072548769, "grad_norm": 0.17439080774784088, "learning_rate": 0.0006830337893476537, "loss": 2.4689, "step": 443360 }, { "epoch": 0.8832916294785159, "grad_norm": 0.18933358788490295, "learning_rate": 0.0006828819339178435, "loss": 2.478, "step": 443370 }, { "epoch": 0.8833115517021548, "grad_norm": 0.1801949292421341, "learning_rate": 0.0006827300959939911, "loss": 2.4643, "step": 443380 }, { "epoch": 0.8833314739257937, "grad_norm": 0.219045028090477, "learning_rate": 0.0006825782755700435, "loss": 2.464, "step": 443390 }, { "epoch": 0.8833513961494326, "grad_norm": 0.22310076653957367, "learning_rate": 0.0006824264726399516, "loss": 2.4716, "step": 443400 }, { "epoch": 0.8833713183730715, "grad_norm": 0.17524878680706024, "learning_rate": 0.0006822746871976695, "loss": 2.4717, "step": 443410 }, { "epoch": 0.8833912405967105, "grad_norm": 0.16466526687145233, "learning_rate": 0.0006821229192371543, "loss": 2.4695, "step": 443420 }, { "epoch": 0.8834111628203494, "grad_norm": 0.18890967965126038, "learning_rate": 0.0006819711687523675, "loss": 2.4755, "step": 443430 }, { "epoch": 0.8834310850439883, "grad_norm": 0.17626787722110748, "learning_rate": 0.0006818194357372736, "loss": 2.4642, "step": 443440 }, { "epoch": 0.8834510072676272, "grad_norm": 0.19926324486732483, "learning_rate": 0.0006816677201858404, "loss": 2.4724, "step": 443450 }, { "epoch": 0.883470929491266, "grad_norm": 0.16317811608314514, "learning_rate": 0.0006815160220920393, "loss": 2.4687, "step": 443460 }, { "epoch": 0.883490851714905, "grad_norm": 0.1890982985496521, "learning_rate": 0.0006813643414498453, "loss": 2.4819, "step": 443470 }, { "epoch": 0.8835107739385439, "grad_norm": 0.18824592232704163, "learning_rate": 0.000681212678253237, "loss": 2.4509, "step": 443480 }, { "epoch": 0.8835306961621828, "grad_norm": 0.1823011338710785, "learning_rate": 0.0006810610324961961, "loss": 2.4557, "step": 443490 }, { "epoch": 0.8835506183858217, "grad_norm": 0.21608375012874603, "learning_rate": 0.0006809094041727082, "loss": 2.4802, "step": 443500 }, { "epoch": 0.8835705406094607, "grad_norm": 0.18210600316524506, "learning_rate": 0.0006807577932767615, "loss": 2.4673, "step": 443510 }, { "epoch": 0.8835904628330996, "grad_norm": 0.18487949669361115, "learning_rate": 0.0006806061998023488, "loss": 2.465, "step": 443520 }, { "epoch": 0.8836103850567385, "grad_norm": 0.17067471146583557, "learning_rate": 0.0006804546237434652, "loss": 2.4645, "step": 443530 }, { "epoch": 0.8836303072803774, "grad_norm": 0.16073687374591827, "learning_rate": 0.0006803030650941102, "loss": 2.4649, "step": 443540 }, { "epoch": 0.8836502295040163, "grad_norm": 0.20481832325458527, "learning_rate": 0.0006801515238482867, "loss": 2.4623, "step": 443550 }, { "epoch": 0.8836701517276553, "grad_norm": 0.17821359634399414, "learning_rate": 0.0006799999999999999, "loss": 2.4669, "step": 443560 }, { "epoch": 0.8836900739512942, "grad_norm": 0.2180960476398468, "learning_rate": 0.00067984849354326, "loss": 2.4721, "step": 443570 }, { "epoch": 0.8837099961749331, "grad_norm": 0.1799663007259369, "learning_rate": 0.0006796970044720797, "loss": 2.4731, "step": 443580 }, { "epoch": 0.883729918398572, "grad_norm": 0.20381604135036469, "learning_rate": 0.0006795455327804749, "loss": 2.4659, "step": 443590 }, { "epoch": 0.8837498406222108, "grad_norm": 0.17884163558483124, "learning_rate": 0.0006793940784624657, "loss": 2.4813, "step": 443600 }, { "epoch": 0.8837697628458498, "grad_norm": 0.1795414388179779, "learning_rate": 0.0006792426415120756, "loss": 2.4645, "step": 443610 }, { "epoch": 0.8837896850694887, "grad_norm": 0.1706998199224472, "learning_rate": 0.0006790912219233305, "loss": 2.4578, "step": 443620 }, { "epoch": 0.8838096072931276, "grad_norm": 0.19854791462421417, "learning_rate": 0.000678939819690261, "loss": 2.4604, "step": 443630 }, { "epoch": 0.8838295295167665, "grad_norm": 0.19579915702342987, "learning_rate": 0.0006787884348069005, "loss": 2.4502, "step": 443640 }, { "epoch": 0.8838494517404054, "grad_norm": 0.21304839849472046, "learning_rate": 0.0006786370672672857, "loss": 2.4522, "step": 443650 }, { "epoch": 0.8838693739640444, "grad_norm": 0.20285004377365112, "learning_rate": 0.0006784857170654568, "loss": 2.4726, "step": 443660 }, { "epoch": 0.8838892961876833, "grad_norm": 0.18684430420398712, "learning_rate": 0.0006783343841954577, "loss": 2.4599, "step": 443670 }, { "epoch": 0.8839092184113222, "grad_norm": 0.16887658834457397, "learning_rate": 0.0006781830686513355, "loss": 2.453, "step": 443680 }, { "epoch": 0.8839291406349611, "grad_norm": 0.1901799887418747, "learning_rate": 0.0006780317704271409, "loss": 2.4722, "step": 443690 }, { "epoch": 0.8839490628586, "grad_norm": 0.18239884078502655, "learning_rate": 0.0006778804895169273, "loss": 2.4623, "step": 443700 }, { "epoch": 0.883968985082239, "grad_norm": 0.18431973457336426, "learning_rate": 0.0006777292259147525, "loss": 2.4598, "step": 443710 }, { "epoch": 0.8839889073058779, "grad_norm": 0.19253019988536835, "learning_rate": 0.0006775779796146769, "loss": 2.4639, "step": 443720 }, { "epoch": 0.8840088295295168, "grad_norm": 0.1908632218837738, "learning_rate": 0.0006774267506107648, "loss": 2.4677, "step": 443730 }, { "epoch": 0.8840287517531557, "grad_norm": 0.19349688291549683, "learning_rate": 0.0006772755388970839, "loss": 2.4753, "step": 443740 }, { "epoch": 0.8840486739767945, "grad_norm": 0.2346254140138626, "learning_rate": 0.0006771243444677047, "loss": 2.4694, "step": 443750 }, { "epoch": 0.8840685962004335, "grad_norm": 0.19788578152656555, "learning_rate": 0.0006769731673167019, "loss": 2.4555, "step": 443760 }, { "epoch": 0.8840885184240724, "grad_norm": 0.2210610955953598, "learning_rate": 0.0006768220074381528, "loss": 2.4726, "step": 443770 }, { "epoch": 0.8841084406477113, "grad_norm": 0.1981685310602188, "learning_rate": 0.0006766708648261386, "loss": 2.4747, "step": 443780 }, { "epoch": 0.8841283628713502, "grad_norm": 0.17761091887950897, "learning_rate": 0.0006765197394747438, "loss": 2.4703, "step": 443790 }, { "epoch": 0.8841482850949892, "grad_norm": 0.1847204566001892, "learning_rate": 0.0006763686313780562, "loss": 2.48, "step": 443800 }, { "epoch": 0.8841682073186281, "grad_norm": 0.18720819056034088, "learning_rate": 0.0006762175405301672, "loss": 2.4593, "step": 443810 }, { "epoch": 0.884188129542267, "grad_norm": 0.19615265727043152, "learning_rate": 0.0006760664669251709, "loss": 2.4606, "step": 443820 }, { "epoch": 0.8842080517659059, "grad_norm": 0.20080134272575378, "learning_rate": 0.0006759154105571654, "loss": 2.4629, "step": 443830 }, { "epoch": 0.8842279739895448, "grad_norm": 0.18559934198856354, "learning_rate": 0.0006757643714202521, "loss": 2.4466, "step": 443840 }, { "epoch": 0.8842478962131838, "grad_norm": 0.18684791028499603, "learning_rate": 0.0006756133495085356, "loss": 2.4426, "step": 443850 }, { "epoch": 0.8842678184368227, "grad_norm": 0.22234687209129333, "learning_rate": 0.0006754623448161242, "loss": 2.4653, "step": 443860 }, { "epoch": 0.8842877406604616, "grad_norm": 0.19157588481903076, "learning_rate": 0.0006753113573371288, "loss": 2.4568, "step": 443870 }, { "epoch": 0.8843076628841005, "grad_norm": 0.17481939494609833, "learning_rate": 0.0006751603870656644, "loss": 2.4679, "step": 443880 }, { "epoch": 0.8843275851077393, "grad_norm": 0.19082823395729065, "learning_rate": 0.0006750094339958492, "loss": 2.4695, "step": 443890 }, { "epoch": 0.8843475073313783, "grad_norm": 0.17520050704479218, "learning_rate": 0.0006748584981218044, "loss": 2.4659, "step": 443900 }, { "epoch": 0.8843674295550172, "grad_norm": 0.1873093694448471, "learning_rate": 0.000674707579437655, "loss": 2.463, "step": 443910 }, { "epoch": 0.8843873517786561, "grad_norm": 0.1869436800479889, "learning_rate": 0.0006745566779375287, "loss": 2.4708, "step": 443920 }, { "epoch": 0.884407274002295, "grad_norm": 0.19310182332992554, "learning_rate": 0.0006744057936155574, "loss": 2.4511, "step": 443930 }, { "epoch": 0.8844271962259339, "grad_norm": 0.21531075239181519, "learning_rate": 0.0006742549264658759, "loss": 2.4604, "step": 443940 }, { "epoch": 0.8844471184495729, "grad_norm": 0.19732527434825897, "learning_rate": 0.0006741040764826222, "loss": 2.4551, "step": 443950 }, { "epoch": 0.8844670406732118, "grad_norm": 0.20328843593597412, "learning_rate": 0.0006739532436599381, "loss": 2.4681, "step": 443960 }, { "epoch": 0.8844869628968507, "grad_norm": 0.20400568842887878, "learning_rate": 0.0006738024279919676, "loss": 2.4631, "step": 443970 }, { "epoch": 0.8845068851204896, "grad_norm": 0.1736270636320114, "learning_rate": 0.0006736516294728598, "loss": 2.4597, "step": 443980 }, { "epoch": 0.8845268073441285, "grad_norm": 0.1898803412914276, "learning_rate": 0.0006735008480967657, "loss": 2.4657, "step": 443990 }, { "epoch": 0.8845467295677675, "grad_norm": 0.1818918138742447, "learning_rate": 0.0006733500838578401, "loss": 2.4711, "step": 444000 }, { "epoch": 0.8845666517914064, "grad_norm": 0.17888742685317993, "learning_rate": 0.0006731993367502412, "loss": 2.4691, "step": 444010 }, { "epoch": 0.8845865740150453, "grad_norm": 0.18102070689201355, "learning_rate": 0.0006730486067681303, "loss": 2.4717, "step": 444020 }, { "epoch": 0.8846064962386841, "grad_norm": 0.18521884083747864, "learning_rate": 0.0006728978939056725, "loss": 2.4663, "step": 444030 }, { "epoch": 0.884626418462323, "grad_norm": 0.1952625811100006, "learning_rate": 0.0006727471981570354, "loss": 2.4599, "step": 444040 }, { "epoch": 0.884646340685962, "grad_norm": 0.1857222467660904, "learning_rate": 0.0006725965195163905, "loss": 2.4503, "step": 444050 }, { "epoch": 0.8846662629096009, "grad_norm": 0.1928844153881073, "learning_rate": 0.0006724458579779129, "loss": 2.4633, "step": 444060 }, { "epoch": 0.8846861851332398, "grad_norm": 0.1894223541021347, "learning_rate": 0.00067229521353578, "loss": 2.448, "step": 444070 }, { "epoch": 0.8847061073568787, "grad_norm": 0.3280019760131836, "learning_rate": 0.0006721445861841735, "loss": 2.4738, "step": 444080 }, { "epoch": 0.8847260295805177, "grad_norm": 0.1713348776102066, "learning_rate": 0.0006719939759172777, "loss": 2.4725, "step": 444090 }, { "epoch": 0.8847459518041566, "grad_norm": 0.18979693949222565, "learning_rate": 0.0006718433827292807, "loss": 2.453, "step": 444100 }, { "epoch": 0.8847658740277955, "grad_norm": 0.20125381648540497, "learning_rate": 0.0006716928066143735, "loss": 2.4817, "step": 444110 }, { "epoch": 0.8847857962514344, "grad_norm": 0.18447771668434143, "learning_rate": 0.0006715422475667508, "loss": 2.4611, "step": 444120 }, { "epoch": 0.8848057184750733, "grad_norm": 0.20636673271656036, "learning_rate": 0.0006713917055806102, "loss": 2.4646, "step": 444130 }, { "epoch": 0.8848256406987123, "grad_norm": 0.1887342780828476, "learning_rate": 0.0006712411806501529, "loss": 2.4708, "step": 444140 }, { "epoch": 0.8848455629223512, "grad_norm": 0.1813468188047409, "learning_rate": 0.0006710906727695829, "loss": 2.4679, "step": 444150 }, { "epoch": 0.8848654851459901, "grad_norm": 0.18682944774627686, "learning_rate": 0.000670940181933108, "loss": 2.4657, "step": 444160 }, { "epoch": 0.884885407369629, "grad_norm": 0.18812969326972961, "learning_rate": 0.0006707897081349392, "loss": 2.4653, "step": 444170 }, { "epoch": 0.8849053295932678, "grad_norm": 0.182874858379364, "learning_rate": 0.0006706392513692907, "loss": 2.4651, "step": 444180 }, { "epoch": 0.8849252518169068, "grad_norm": 0.1859389841556549, "learning_rate": 0.0006704888116303798, "loss": 2.4707, "step": 444190 }, { "epoch": 0.8849451740405457, "grad_norm": 0.1682816445827484, "learning_rate": 0.000670338388912427, "loss": 2.4694, "step": 444200 }, { "epoch": 0.8849650962641846, "grad_norm": 0.1820203810930252, "learning_rate": 0.0006701879832096569, "loss": 2.4705, "step": 444210 }, { "epoch": 0.8849850184878235, "grad_norm": 0.16904261708259583, "learning_rate": 0.0006700375945162962, "loss": 2.4436, "step": 444220 }, { "epoch": 0.8850049407114624, "grad_norm": 0.18605826795101166, "learning_rate": 0.0006698872228265755, "loss": 2.4547, "step": 444230 }, { "epoch": 0.8850248629351014, "grad_norm": 0.20156213641166687, "learning_rate": 0.0006697368681347287, "loss": 2.4724, "step": 444240 }, { "epoch": 0.8850447851587403, "grad_norm": 0.17628037929534912, "learning_rate": 0.0006695865304349928, "loss": 2.4584, "step": 444250 }, { "epoch": 0.8850647073823792, "grad_norm": 0.18988829851150513, "learning_rate": 0.0006694362097216083, "loss": 2.474, "step": 444260 }, { "epoch": 0.8850846296060181, "grad_norm": 0.19477884471416473, "learning_rate": 0.0006692859059888183, "loss": 2.4707, "step": 444270 }, { "epoch": 0.885104551829657, "grad_norm": 0.19693982601165771, "learning_rate": 0.00066913561923087, "loss": 2.4677, "step": 444280 }, { "epoch": 0.885124474053296, "grad_norm": 0.17076082527637482, "learning_rate": 0.0006689853494420131, "loss": 2.4856, "step": 444290 }, { "epoch": 0.8851443962769349, "grad_norm": 0.21218684315681458, "learning_rate": 0.0006688350966165012, "loss": 2.4625, "step": 444300 }, { "epoch": 0.8851643185005738, "grad_norm": 0.24565762281417847, "learning_rate": 0.0006686848607485905, "loss": 2.4543, "step": 444310 }, { "epoch": 0.8851842407242126, "grad_norm": 0.1783066689968109, "learning_rate": 0.0006685346418325411, "loss": 2.4607, "step": 444320 }, { "epoch": 0.8852041629478515, "grad_norm": 0.18578021228313446, "learning_rate": 0.0006683844398626156, "loss": 2.4709, "step": 444330 }, { "epoch": 0.8852240851714905, "grad_norm": 0.18970432877540588, "learning_rate": 0.0006682342548330806, "loss": 2.4573, "step": 444340 }, { "epoch": 0.8852440073951294, "grad_norm": 0.16821520030498505, "learning_rate": 0.0006680840867382056, "loss": 2.4645, "step": 444350 }, { "epoch": 0.8852639296187683, "grad_norm": 0.1767561286687851, "learning_rate": 0.0006679339355722631, "loss": 2.4689, "step": 444360 }, { "epoch": 0.8852838518424072, "grad_norm": 0.1691332310438156, "learning_rate": 0.000667783801329529, "loss": 2.4604, "step": 444370 }, { "epoch": 0.8853037740660461, "grad_norm": 0.1950913667678833, "learning_rate": 0.0006676336840042827, "loss": 2.4691, "step": 444380 }, { "epoch": 0.8853236962896851, "grad_norm": 0.19883699715137482, "learning_rate": 0.0006674835835908062, "loss": 2.4467, "step": 444390 }, { "epoch": 0.885343618513324, "grad_norm": 0.17896834015846252, "learning_rate": 0.0006673335000833856, "loss": 2.4606, "step": 444400 }, { "epoch": 0.8853635407369629, "grad_norm": 0.19504722952842712, "learning_rate": 0.0006671834334763091, "loss": 2.4462, "step": 444410 }, { "epoch": 0.8853834629606018, "grad_norm": 0.22131475806236267, "learning_rate": 0.0006670333837638694, "loss": 2.4617, "step": 444420 }, { "epoch": 0.8854033851842408, "grad_norm": 0.17724665999412537, "learning_rate": 0.0006668833509403614, "loss": 2.4615, "step": 444430 }, { "epoch": 0.8854233074078797, "grad_norm": 0.18487392365932465, "learning_rate": 0.0006667333350000833, "loss": 2.4684, "step": 444440 }, { "epoch": 0.8854432296315186, "grad_norm": 0.17127616703510284, "learning_rate": 0.0006665833359373372, "loss": 2.4604, "step": 444450 }, { "epoch": 0.8854631518551574, "grad_norm": 0.1795104742050171, "learning_rate": 0.000666433353746428, "loss": 2.4559, "step": 444460 }, { "epoch": 0.8854830740787963, "grad_norm": 0.1976245790719986, "learning_rate": 0.0006662833884216633, "loss": 2.46, "step": 444470 }, { "epoch": 0.8855029963024353, "grad_norm": 0.2001321166753769, "learning_rate": 0.0006661334399573547, "loss": 2.471, "step": 444480 }, { "epoch": 0.8855229185260742, "grad_norm": 0.17944350838661194, "learning_rate": 0.0006659835083478165, "loss": 2.4598, "step": 444490 }, { "epoch": 0.8855428407497131, "grad_norm": 0.20915818214416504, "learning_rate": 0.0006658335935873668, "loss": 2.4561, "step": 444500 }, { "epoch": 0.885562762973352, "grad_norm": 0.22741979360580444, "learning_rate": 0.0006656836956703258, "loss": 2.4703, "step": 444510 }, { "epoch": 0.8855826851969909, "grad_norm": 0.22114858031272888, "learning_rate": 0.000665533814591018, "loss": 2.4599, "step": 444520 }, { "epoch": 0.8856026074206299, "grad_norm": 0.1842406988143921, "learning_rate": 0.0006653839503437702, "loss": 2.4701, "step": 444530 }, { "epoch": 0.8856225296442688, "grad_norm": 0.18173399567604065, "learning_rate": 0.0006652341029229131, "loss": 2.4558, "step": 444540 }, { "epoch": 0.8856424518679077, "grad_norm": 0.18389691412448883, "learning_rate": 0.0006650842723227806, "loss": 2.4792, "step": 444550 }, { "epoch": 0.8856623740915466, "grad_norm": 0.1794508695602417, "learning_rate": 0.000664934458537709, "loss": 2.4508, "step": 444560 }, { "epoch": 0.8856822963151855, "grad_norm": 0.20036590099334717, "learning_rate": 0.0006647846615620385, "loss": 2.4601, "step": 444570 }, { "epoch": 0.8857022185388245, "grad_norm": 0.19942349195480347, "learning_rate": 0.0006646348813901121, "loss": 2.4783, "step": 444580 }, { "epoch": 0.8857221407624634, "grad_norm": 0.20390969514846802, "learning_rate": 0.000664485118016276, "loss": 2.458, "step": 444590 }, { "epoch": 0.8857420629861023, "grad_norm": 0.16656921803951263, "learning_rate": 0.0006643353714348801, "loss": 2.4651, "step": 444600 }, { "epoch": 0.8857619852097411, "grad_norm": 0.20067906379699707, "learning_rate": 0.0006641856416402765, "loss": 2.4585, "step": 444610 }, { "epoch": 0.88578190743338, "grad_norm": 0.17546935379505157, "learning_rate": 0.0006640359286268212, "loss": 2.4722, "step": 444620 }, { "epoch": 0.885801829657019, "grad_norm": 0.2028653919696808, "learning_rate": 0.0006638862323888733, "loss": 2.4821, "step": 444630 }, { "epoch": 0.8858217518806579, "grad_norm": 0.28036949038505554, "learning_rate": 0.0006637365529207948, "loss": 2.462, "step": 444640 }, { "epoch": 0.8858416741042968, "grad_norm": 0.19342966377735138, "learning_rate": 0.0006635868902169508, "loss": 2.4575, "step": 444650 }, { "epoch": 0.8858615963279357, "grad_norm": 0.2062821090221405, "learning_rate": 0.0006634372442717103, "loss": 2.4532, "step": 444660 }, { "epoch": 0.8858815185515746, "grad_norm": 0.18181028962135315, "learning_rate": 0.0006632876150794442, "loss": 2.4718, "step": 444670 }, { "epoch": 0.8859014407752136, "grad_norm": 0.19190841913223267, "learning_rate": 0.0006631380026345278, "loss": 2.4605, "step": 444680 }, { "epoch": 0.8859213629988525, "grad_norm": 0.17308741807937622, "learning_rate": 0.0006629884069313385, "loss": 2.4616, "step": 444690 }, { "epoch": 0.8859412852224914, "grad_norm": 0.18728633224964142, "learning_rate": 0.0006628388279642576, "loss": 2.453, "step": 444700 }, { "epoch": 0.8859612074461303, "grad_norm": 0.18487797677516937, "learning_rate": 0.0006626892657276695, "loss": 2.4538, "step": 444710 }, { "epoch": 0.8859811296697693, "grad_norm": 0.19725032150745392, "learning_rate": 0.0006625397202159611, "loss": 2.4496, "step": 444720 }, { "epoch": 0.8860010518934082, "grad_norm": 0.21230420470237732, "learning_rate": 0.0006623901914235229, "loss": 2.4748, "step": 444730 }, { "epoch": 0.886020974117047, "grad_norm": 0.2032557874917984, "learning_rate": 0.0006622406793447485, "loss": 2.4723, "step": 444740 }, { "epoch": 0.886040896340686, "grad_norm": 0.18390965461730957, "learning_rate": 0.0006620911839740349, "loss": 2.4638, "step": 444750 }, { "epoch": 0.8860608185643248, "grad_norm": 0.19103169441223145, "learning_rate": 0.0006619417053057814, "loss": 2.463, "step": 444760 }, { "epoch": 0.8860807407879638, "grad_norm": 0.2029920071363449, "learning_rate": 0.0006617922433343917, "loss": 2.4549, "step": 444770 }, { "epoch": 0.8861006630116027, "grad_norm": 0.17338094115257263, "learning_rate": 0.0006616427980542714, "loss": 2.4619, "step": 444780 }, { "epoch": 0.8861205852352416, "grad_norm": 0.1739243119955063, "learning_rate": 0.0006614933694598298, "loss": 2.4672, "step": 444790 }, { "epoch": 0.8861405074588805, "grad_norm": 0.21802809834480286, "learning_rate": 0.0006613439575454791, "loss": 2.4704, "step": 444800 }, { "epoch": 0.8861604296825194, "grad_norm": 0.23939335346221924, "learning_rate": 0.000661194562305635, "loss": 2.4493, "step": 444810 }, { "epoch": 0.8861803519061584, "grad_norm": 0.1844341903924942, "learning_rate": 0.0006610451837347162, "loss": 2.4541, "step": 444820 }, { "epoch": 0.8862002741297973, "grad_norm": 0.23902112245559692, "learning_rate": 0.0006608958218271441, "loss": 2.4657, "step": 444830 }, { "epoch": 0.8862201963534362, "grad_norm": 0.18013647198677063, "learning_rate": 0.0006607464765773435, "loss": 2.4642, "step": 444840 }, { "epoch": 0.8862401185770751, "grad_norm": 0.16046789288520813, "learning_rate": 0.0006605971479797424, "loss": 2.4705, "step": 444850 }, { "epoch": 0.886260040800714, "grad_norm": 0.18460164964199066, "learning_rate": 0.000660447836028772, "loss": 2.4617, "step": 444860 }, { "epoch": 0.886279963024353, "grad_norm": 0.15995074808597565, "learning_rate": 0.000660298540718866, "loss": 2.4395, "step": 444870 }, { "epoch": 0.8862998852479919, "grad_norm": 0.19997788965702057, "learning_rate": 0.000660149262044462, "loss": 2.4654, "step": 444880 }, { "epoch": 0.8863198074716307, "grad_norm": 0.1839929074048996, "learning_rate": 0.0006599999999999999, "loss": 2.462, "step": 444890 }, { "epoch": 0.8863397296952696, "grad_norm": 0.19171525537967682, "learning_rate": 0.0006598507545799237, "loss": 2.4645, "step": 444900 }, { "epoch": 0.8863596519189085, "grad_norm": 0.1728035807609558, "learning_rate": 0.0006597015257786793, "loss": 2.4534, "step": 444910 }, { "epoch": 0.8863795741425475, "grad_norm": 0.17760513722896576, "learning_rate": 0.0006595523135907169, "loss": 2.4666, "step": 444920 }, { "epoch": 0.8863994963661864, "grad_norm": 0.18605174124240875, "learning_rate": 0.0006594031180104887, "loss": 2.4693, "step": 444930 }, { "epoch": 0.8864194185898253, "grad_norm": 0.1887996643781662, "learning_rate": 0.0006592539390324506, "loss": 2.4536, "step": 444940 }, { "epoch": 0.8864393408134642, "grad_norm": 0.1701599508523941, "learning_rate": 0.0006591047766510614, "loss": 2.47, "step": 444950 }, { "epoch": 0.8864592630371031, "grad_norm": 0.17061953246593475, "learning_rate": 0.0006589556308607831, "loss": 2.4562, "step": 444960 }, { "epoch": 0.8864791852607421, "grad_norm": 0.1677349954843521, "learning_rate": 0.0006588065016560809, "loss": 2.4609, "step": 444970 }, { "epoch": 0.886499107484381, "grad_norm": 0.19062526524066925, "learning_rate": 0.0006586573890314227, "loss": 2.4719, "step": 444980 }, { "epoch": 0.8865190297080199, "grad_norm": 0.18889355659484863, "learning_rate": 0.0006585082929812798, "loss": 2.4597, "step": 444990 }, { "epoch": 0.8865389519316588, "grad_norm": 0.18544740974903107, "learning_rate": 0.0006583592135001262, "loss": 2.4497, "step": 445000 }, { "epoch": 0.8865588741552978, "grad_norm": 0.2223215252161026, "learning_rate": 0.0006582101505824394, "loss": 2.4491, "step": 445010 }, { "epoch": 0.8865787963789367, "grad_norm": 0.18664103746414185, "learning_rate": 0.0006580611042226998, "loss": 2.4597, "step": 445020 }, { "epoch": 0.8865987186025756, "grad_norm": 0.2010280340909958, "learning_rate": 0.0006579120744153906, "loss": 2.4471, "step": 445030 }, { "epoch": 0.8866186408262144, "grad_norm": 0.21511287987232208, "learning_rate": 0.0006577630611549985, "loss": 2.4602, "step": 445040 }, { "epoch": 0.8866385630498533, "grad_norm": 0.2027350664138794, "learning_rate": 0.0006576140644360133, "loss": 2.4705, "step": 445050 }, { "epoch": 0.8866584852734923, "grad_norm": 0.17597457766532898, "learning_rate": 0.0006574650842529271, "loss": 2.4536, "step": 445060 }, { "epoch": 0.8866784074971312, "grad_norm": 0.17356520891189575, "learning_rate": 0.0006573161206002361, "loss": 2.4496, "step": 445070 }, { "epoch": 0.8866983297207701, "grad_norm": 0.20752248167991638, "learning_rate": 0.0006571671734724385, "loss": 2.4565, "step": 445080 }, { "epoch": 0.886718251944409, "grad_norm": 0.1906382143497467, "learning_rate": 0.0006570182428640366, "loss": 2.4641, "step": 445090 }, { "epoch": 0.8867381741680479, "grad_norm": 0.19558584690093994, "learning_rate": 0.0006568693287695348, "loss": 2.4698, "step": 445100 }, { "epoch": 0.8867580963916869, "grad_norm": 0.17651256918907166, "learning_rate": 0.0006567204311834411, "loss": 2.4699, "step": 445110 }, { "epoch": 0.8867780186153258, "grad_norm": 0.17517875134944916, "learning_rate": 0.0006565715501002669, "loss": 2.4541, "step": 445120 }, { "epoch": 0.8867979408389647, "grad_norm": 0.2052982747554779, "learning_rate": 0.0006564226855145254, "loss": 2.4592, "step": 445130 }, { "epoch": 0.8868178630626036, "grad_norm": 0.1801413595676422, "learning_rate": 0.000656273837420734, "loss": 2.4636, "step": 445140 }, { "epoch": 0.8868377852862425, "grad_norm": 0.3133048117160797, "learning_rate": 0.0006561250058134127, "loss": 2.4682, "step": 445150 }, { "epoch": 0.8868577075098815, "grad_norm": 0.2227884829044342, "learning_rate": 0.0006559761906870847, "loss": 2.4625, "step": 445160 }, { "epoch": 0.8868776297335204, "grad_norm": 0.19788143038749695, "learning_rate": 0.0006558273920362758, "loss": 2.454, "step": 445170 }, { "epoch": 0.8868975519571592, "grad_norm": 0.1914181262254715, "learning_rate": 0.0006556786098555152, "loss": 2.4848, "step": 445180 }, { "epoch": 0.8869174741807981, "grad_norm": 0.18399806320667267, "learning_rate": 0.0006555298441393353, "loss": 2.4731, "step": 445190 }, { "epoch": 0.886937396404437, "grad_norm": 0.1916961818933487, "learning_rate": 0.0006553810948822711, "loss": 2.4514, "step": 445200 }, { "epoch": 0.886957318628076, "grad_norm": 0.1923648566007614, "learning_rate": 0.000655232362078861, "loss": 2.4742, "step": 445210 }, { "epoch": 0.8869772408517149, "grad_norm": 0.17816044390201569, "learning_rate": 0.0006550836457236458, "loss": 2.4618, "step": 445220 }, { "epoch": 0.8869971630753538, "grad_norm": 0.17296475172042847, "learning_rate": 0.0006549349458111702, "loss": 2.4768, "step": 445230 }, { "epoch": 0.8870170852989927, "grad_norm": 0.17645661532878876, "learning_rate": 0.000654786262335981, "loss": 2.4554, "step": 445240 }, { "epoch": 0.8870370075226316, "grad_norm": 0.1740129441022873, "learning_rate": 0.0006546375952926289, "loss": 2.4701, "step": 445250 }, { "epoch": 0.8870569297462706, "grad_norm": 0.1876365840435028, "learning_rate": 0.0006544889446756672, "loss": 2.4644, "step": 445260 }, { "epoch": 0.8870768519699095, "grad_norm": 0.22382178902626038, "learning_rate": 0.0006543403104796518, "loss": 2.4653, "step": 445270 }, { "epoch": 0.8870967741935484, "grad_norm": 0.169656440615654, "learning_rate": 0.0006541916926991422, "loss": 2.4453, "step": 445280 }, { "epoch": 0.8871166964171873, "grad_norm": 0.19908826053142548, "learning_rate": 0.0006540430913287008, "loss": 2.4808, "step": 445290 }, { "epoch": 0.8871366186408263, "grad_norm": 0.19665513932704926, "learning_rate": 0.0006538945063628927, "loss": 2.4574, "step": 445300 }, { "epoch": 0.8871565408644652, "grad_norm": 0.18922385573387146, "learning_rate": 0.0006537459377962864, "loss": 2.4529, "step": 445310 }, { "epoch": 0.887176463088104, "grad_norm": 0.20013107359409332, "learning_rate": 0.0006535973856234534, "loss": 2.4636, "step": 445320 }, { "epoch": 0.8871963853117429, "grad_norm": 0.21121588349342346, "learning_rate": 0.0006534488498389675, "loss": 2.4511, "step": 445330 }, { "epoch": 0.8872163075353818, "grad_norm": 0.1881568282842636, "learning_rate": 0.0006533003304374058, "loss": 2.4576, "step": 445340 }, { "epoch": 0.8872362297590208, "grad_norm": 0.18921849131584167, "learning_rate": 0.0006531518274133495, "loss": 2.4639, "step": 445350 }, { "epoch": 0.8872561519826597, "grad_norm": 0.17998848855495453, "learning_rate": 0.0006530033407613811, "loss": 2.4665, "step": 445360 }, { "epoch": 0.8872760742062986, "grad_norm": 0.20008811354637146, "learning_rate": 0.0006528548704760871, "loss": 2.4676, "step": 445370 }, { "epoch": 0.8872959964299375, "grad_norm": 0.22404980659484863, "learning_rate": 0.0006527064165520569, "loss": 2.4637, "step": 445380 }, { "epoch": 0.8873159186535764, "grad_norm": 0.19542196393013, "learning_rate": 0.0006525579789838822, "loss": 2.4658, "step": 445390 }, { "epoch": 0.8873358408772154, "grad_norm": 0.20756351947784424, "learning_rate": 0.0006524095577661586, "loss": 2.4608, "step": 445400 }, { "epoch": 0.8873557631008543, "grad_norm": 0.1761431246995926, "learning_rate": 0.0006522611528934843, "loss": 2.4693, "step": 445410 }, { "epoch": 0.8873756853244932, "grad_norm": 0.17555595934391022, "learning_rate": 0.0006521127643604603, "loss": 2.4809, "step": 445420 }, { "epoch": 0.8873956075481321, "grad_norm": 0.1797633171081543, "learning_rate": 0.0006519643921616907, "loss": 2.4685, "step": 445430 }, { "epoch": 0.887415529771771, "grad_norm": 0.18345028162002563, "learning_rate": 0.0006518160362917827, "loss": 2.4652, "step": 445440 }, { "epoch": 0.88743545199541, "grad_norm": 0.2097838819026947, "learning_rate": 0.0006516676967453461, "loss": 2.4629, "step": 445450 }, { "epoch": 0.8874553742190489, "grad_norm": 0.182045578956604, "learning_rate": 0.0006515193735169942, "loss": 2.4398, "step": 445460 }, { "epoch": 0.8874752964426877, "grad_norm": 0.18576321005821228, "learning_rate": 0.0006513710666013428, "loss": 2.4742, "step": 445470 }, { "epoch": 0.8874952186663266, "grad_norm": 0.18244723975658417, "learning_rate": 0.0006512227759930108, "loss": 2.4814, "step": 445480 }, { "epoch": 0.8875151408899655, "grad_norm": 0.1777283102273941, "learning_rate": 0.0006510745016866202, "loss": 2.4676, "step": 445490 }, { "epoch": 0.8875350631136045, "grad_norm": 0.2011980563402176, "learning_rate": 0.0006509262436767958, "loss": 2.4557, "step": 445500 }, { "epoch": 0.8875549853372434, "grad_norm": 0.21331417560577393, "learning_rate": 0.0006507780019581655, "loss": 2.4689, "step": 445510 }, { "epoch": 0.8875749075608823, "grad_norm": 0.19180884957313538, "learning_rate": 0.00065062977652536, "loss": 2.4879, "step": 445520 }, { "epoch": 0.8875948297845212, "grad_norm": 0.2062310129404068, "learning_rate": 0.0006504815673730129, "loss": 2.4576, "step": 445530 }, { "epoch": 0.8876147520081601, "grad_norm": 0.20935845375061035, "learning_rate": 0.0006503333744957609, "loss": 2.4614, "step": 445540 }, { "epoch": 0.8876346742317991, "grad_norm": 0.1829637885093689, "learning_rate": 0.0006501851978882436, "loss": 2.4793, "step": 445550 }, { "epoch": 0.887654596455438, "grad_norm": 0.21597205102443695, "learning_rate": 0.0006500370375451036, "loss": 2.4644, "step": 445560 }, { "epoch": 0.8876745186790769, "grad_norm": 0.1914876401424408, "learning_rate": 0.0006498888934609863, "loss": 2.4662, "step": 445570 }, { "epoch": 0.8876944409027158, "grad_norm": 0.16594627499580383, "learning_rate": 0.0006497407656305401, "loss": 2.4654, "step": 445580 }, { "epoch": 0.8877143631263548, "grad_norm": 0.1876029372215271, "learning_rate": 0.0006495926540484163, "loss": 2.4638, "step": 445590 }, { "epoch": 0.8877342853499937, "grad_norm": 0.1788298487663269, "learning_rate": 0.0006494445587092695, "loss": 2.4529, "step": 445600 }, { "epoch": 0.8877542075736325, "grad_norm": 0.18934395909309387, "learning_rate": 0.0006492964796077566, "loss": 2.4547, "step": 445610 }, { "epoch": 0.8877741297972714, "grad_norm": 0.18878966569900513, "learning_rate": 0.0006491484167385376, "loss": 2.4741, "step": 445620 }, { "epoch": 0.8877940520209103, "grad_norm": 0.2053736299276352, "learning_rate": 0.0006490003700962756, "loss": 2.4656, "step": 445630 }, { "epoch": 0.8878139742445493, "grad_norm": 0.17743375897407532, "learning_rate": 0.0006488523396756371, "loss": 2.4571, "step": 445640 }, { "epoch": 0.8878338964681882, "grad_norm": 0.1915721893310547, "learning_rate": 0.0006487043254712906, "loss": 2.4532, "step": 445650 }, { "epoch": 0.8878538186918271, "grad_norm": 0.17919211089611053, "learning_rate": 0.0006485563274779079, "loss": 2.4701, "step": 445660 }, { "epoch": 0.887873740915466, "grad_norm": 0.1787567138671875, "learning_rate": 0.0006484083456901637, "loss": 2.4898, "step": 445670 }, { "epoch": 0.8878936631391049, "grad_norm": 0.18704645335674286, "learning_rate": 0.0006482603801027361, "loss": 2.4663, "step": 445680 }, { "epoch": 0.8879135853627439, "grad_norm": 0.17178800702095032, "learning_rate": 0.000648112430710305, "loss": 2.478, "step": 445690 }, { "epoch": 0.8879335075863828, "grad_norm": 0.1886122077703476, "learning_rate": 0.0006479644975075543, "loss": 2.4625, "step": 445700 }, { "epoch": 0.8879534298100217, "grad_norm": 0.2287289798259735, "learning_rate": 0.0006478165804891705, "loss": 2.4577, "step": 445710 }, { "epoch": 0.8879733520336606, "grad_norm": 0.18309453129768372, "learning_rate": 0.0006476686796498426, "loss": 2.4656, "step": 445720 }, { "epoch": 0.8879932742572995, "grad_norm": 0.17124202847480774, "learning_rate": 0.0006475207949842629, "loss": 2.4624, "step": 445730 }, { "epoch": 0.8880131964809385, "grad_norm": 0.18350841104984283, "learning_rate": 0.0006473729264871266, "loss": 2.4458, "step": 445740 }, { "epoch": 0.8880331187045774, "grad_norm": 0.1732555627822876, "learning_rate": 0.0006472250741531316, "loss": 2.455, "step": 445750 }, { "epoch": 0.8880530409282162, "grad_norm": 0.1801813393831253, "learning_rate": 0.000647077237976979, "loss": 2.4574, "step": 445760 }, { "epoch": 0.8880729631518551, "grad_norm": 0.19300103187561035, "learning_rate": 0.0006469294179533722, "loss": 2.4625, "step": 445770 }, { "epoch": 0.888092885375494, "grad_norm": 0.21868038177490234, "learning_rate": 0.0006467816140770182, "loss": 2.4655, "step": 445780 }, { "epoch": 0.888112807599133, "grad_norm": 0.22540801763534546, "learning_rate": 0.0006466338263426266, "loss": 2.4641, "step": 445790 }, { "epoch": 0.8881327298227719, "grad_norm": 0.2032075971364975, "learning_rate": 0.0006464860547449096, "loss": 2.4599, "step": 445800 }, { "epoch": 0.8881526520464108, "grad_norm": 0.183964341878891, "learning_rate": 0.0006463382992785827, "loss": 2.4735, "step": 445810 }, { "epoch": 0.8881725742700497, "grad_norm": 0.19792741537094116, "learning_rate": 0.0006461905599383642, "loss": 2.451, "step": 445820 }, { "epoch": 0.8881924964936886, "grad_norm": 0.18710584938526154, "learning_rate": 0.000646042836718975, "loss": 2.4634, "step": 445830 }, { "epoch": 0.8882124187173276, "grad_norm": 0.20142804086208344, "learning_rate": 0.0006458951296151394, "loss": 2.4648, "step": 445840 }, { "epoch": 0.8882323409409665, "grad_norm": 0.17065036296844482, "learning_rate": 0.000645747438621584, "loss": 2.4744, "step": 445850 }, { "epoch": 0.8882522631646054, "grad_norm": 0.2352098822593689, "learning_rate": 0.0006455997637330389, "loss": 2.4432, "step": 445860 }, { "epoch": 0.8882721853882443, "grad_norm": 0.23513321578502655, "learning_rate": 0.0006454521049442364, "loss": 2.476, "step": 445870 }, { "epoch": 0.8882921076118832, "grad_norm": 0.1947913020849228, "learning_rate": 0.000645304462249912, "loss": 2.4649, "step": 445880 }, { "epoch": 0.8883120298355222, "grad_norm": 0.18451634049415588, "learning_rate": 0.0006451568356448044, "loss": 2.4372, "step": 445890 }, { "epoch": 0.888331952059161, "grad_norm": 0.1999838501214981, "learning_rate": 0.0006450092251236541, "loss": 2.4505, "step": 445900 }, { "epoch": 0.8883518742827999, "grad_norm": 0.1764480173587799, "learning_rate": 0.0006448616306812061, "loss": 2.4692, "step": 445910 }, { "epoch": 0.8883717965064388, "grad_norm": 0.18790248036384583, "learning_rate": 0.0006447140523122068, "loss": 2.4576, "step": 445920 }, { "epoch": 0.8883917187300778, "grad_norm": 0.20682379603385925, "learning_rate": 0.0006445664900114061, "loss": 2.4682, "step": 445930 }, { "epoch": 0.8884116409537167, "grad_norm": 0.773055911064148, "learning_rate": 0.0006444189437735566, "loss": 2.4606, "step": 445940 }, { "epoch": 0.8884315631773556, "grad_norm": 0.2109546959400177, "learning_rate": 0.000644271413593414, "loss": 2.4704, "step": 445950 }, { "epoch": 0.8884514854009945, "grad_norm": 0.2067871391773224, "learning_rate": 0.0006441238994657366, "loss": 2.4668, "step": 445960 }, { "epoch": 0.8884714076246334, "grad_norm": 0.1668989658355713, "learning_rate": 0.0006439764013852856, "loss": 2.4687, "step": 445970 }, { "epoch": 0.8884913298482724, "grad_norm": 0.18586599826812744, "learning_rate": 0.0006438289193468252, "loss": 2.4804, "step": 445980 }, { "epoch": 0.8885112520719113, "grad_norm": 0.19088000059127808, "learning_rate": 0.0006436814533451223, "loss": 2.4549, "step": 445990 }, { "epoch": 0.8885311742955502, "grad_norm": 0.18041399121284485, "learning_rate": 0.0006435340033749463, "loss": 2.4648, "step": 446000 }, { "epoch": 0.8885510965191891, "grad_norm": 0.21724198758602142, "learning_rate": 0.0006433865694310704, "loss": 2.4529, "step": 446010 }, { "epoch": 0.888571018742828, "grad_norm": 0.20008137822151184, "learning_rate": 0.0006432391515082696, "loss": 2.4704, "step": 446020 }, { "epoch": 0.888590940966467, "grad_norm": 0.18744976818561554, "learning_rate": 0.0006430917496013227, "loss": 2.4608, "step": 446030 }, { "epoch": 0.8886108631901058, "grad_norm": 0.19840438663959503, "learning_rate": 0.0006429443637050101, "loss": 2.4584, "step": 446040 }, { "epoch": 0.8886307854137447, "grad_norm": 0.1732749193906784, "learning_rate": 0.0006427969938141162, "loss": 2.4578, "step": 446050 }, { "epoch": 0.8886507076373836, "grad_norm": 0.20560982823371887, "learning_rate": 0.0006426496399234279, "loss": 2.4721, "step": 446060 }, { "epoch": 0.8886706298610225, "grad_norm": 0.18904078006744385, "learning_rate": 0.0006425023020277345, "loss": 2.4552, "step": 446070 }, { "epoch": 0.8886905520846615, "grad_norm": 0.17699351906776428, "learning_rate": 0.0006423549801218287, "loss": 2.4705, "step": 446080 }, { "epoch": 0.8887104743083004, "grad_norm": 0.19426894187927246, "learning_rate": 0.0006422076742005059, "loss": 2.4659, "step": 446090 }, { "epoch": 0.8887303965319393, "grad_norm": 0.17252425849437714, "learning_rate": 0.0006420603842585635, "loss": 2.4416, "step": 446100 }, { "epoch": 0.8887503187555782, "grad_norm": 0.18663649260997772, "learning_rate": 0.0006419131102908031, "loss": 2.4512, "step": 446110 }, { "epoch": 0.8887702409792171, "grad_norm": 0.1802007406949997, "learning_rate": 0.0006417658522920285, "loss": 2.4661, "step": 446120 }, { "epoch": 0.8887901632028561, "grad_norm": 0.18029169738292694, "learning_rate": 0.0006416186102570456, "loss": 2.4635, "step": 446130 }, { "epoch": 0.888810085426495, "grad_norm": 0.1897183358669281, "learning_rate": 0.0006414713841806645, "loss": 2.448, "step": 446140 }, { "epoch": 0.8888300076501339, "grad_norm": 0.18086859583854675, "learning_rate": 0.0006413241740576967, "loss": 2.4522, "step": 446150 }, { "epoch": 0.8888499298737728, "grad_norm": 0.18416345119476318, "learning_rate": 0.0006411769798829577, "loss": 2.4606, "step": 446160 }, { "epoch": 0.8888698520974117, "grad_norm": 0.1779724359512329, "learning_rate": 0.000641029801651265, "loss": 2.4713, "step": 446170 }, { "epoch": 0.8888897743210507, "grad_norm": 0.17644797265529633, "learning_rate": 0.0006408826393574394, "loss": 2.4773, "step": 446180 }, { "epoch": 0.8889096965446895, "grad_norm": 0.20382124185562134, "learning_rate": 0.0006407354929963043, "loss": 2.4626, "step": 446190 }, { "epoch": 0.8889296187683284, "grad_norm": 0.20778292417526245, "learning_rate": 0.0006405883625626856, "loss": 2.4664, "step": 446200 }, { "epoch": 0.8889495409919673, "grad_norm": 0.18319937586784363, "learning_rate": 0.0006404412480514128, "loss": 2.433, "step": 446210 }, { "epoch": 0.8889694632156063, "grad_norm": 0.1796889305114746, "learning_rate": 0.0006402941494573173, "loss": 2.4633, "step": 446220 }, { "epoch": 0.8889893854392452, "grad_norm": 0.187499538064003, "learning_rate": 0.0006401470667752341, "loss": 2.4664, "step": 446230 }, { "epoch": 0.8890093076628841, "grad_norm": 0.19794975221157074, "learning_rate": 0.0006400000000000002, "loss": 2.4555, "step": 446240 }, { "epoch": 0.889029229886523, "grad_norm": 0.4853894114494324, "learning_rate": 0.0006398529491264557, "loss": 2.4687, "step": 446250 }, { "epoch": 0.8890491521101619, "grad_norm": 0.1904306411743164, "learning_rate": 0.0006397059141494439, "loss": 2.4646, "step": 446260 }, { "epoch": 0.8890690743338009, "grad_norm": 0.18667075037956238, "learning_rate": 0.0006395588950638105, "loss": 2.4682, "step": 446270 }, { "epoch": 0.8890889965574398, "grad_norm": 0.17816106975078583, "learning_rate": 0.000639411891864404, "loss": 2.4716, "step": 446280 }, { "epoch": 0.8891089187810787, "grad_norm": 0.19219470024108887, "learning_rate": 0.0006392649045460759, "loss": 2.4632, "step": 446290 }, { "epoch": 0.8891288410047176, "grad_norm": 0.18060173094272614, "learning_rate": 0.0006391179331036801, "loss": 2.464, "step": 446300 }, { "epoch": 0.8891487632283565, "grad_norm": 0.19015827775001526, "learning_rate": 0.0006389709775320733, "loss": 2.4666, "step": 446310 }, { "epoch": 0.8891686854519955, "grad_norm": 0.19997817277908325, "learning_rate": 0.0006388240378261157, "loss": 2.4583, "step": 446320 }, { "epoch": 0.8891886076756343, "grad_norm": 0.19596868753433228, "learning_rate": 0.0006386771139806693, "loss": 2.4667, "step": 446330 }, { "epoch": 0.8892085298992732, "grad_norm": 0.18838311731815338, "learning_rate": 0.0006385302059905993, "loss": 2.4704, "step": 446340 }, { "epoch": 0.8892284521229121, "grad_norm": 0.1953156590461731, "learning_rate": 0.000638383313850774, "loss": 2.4735, "step": 446350 }, { "epoch": 0.889248374346551, "grad_norm": 0.19335702061653137, "learning_rate": 0.0006382364375560639, "loss": 2.4592, "step": 446360 }, { "epoch": 0.88926829657019, "grad_norm": 0.18482737243175507, "learning_rate": 0.0006380895771013426, "loss": 2.4687, "step": 446370 }, { "epoch": 0.8892882187938289, "grad_norm": 0.21192407608032227, "learning_rate": 0.0006379427324814863, "loss": 2.4655, "step": 446380 }, { "epoch": 0.8893081410174678, "grad_norm": 0.20421859622001648, "learning_rate": 0.0006377959036913743, "loss": 2.4654, "step": 446390 }, { "epoch": 0.8893280632411067, "grad_norm": 0.17835953831672668, "learning_rate": 0.0006376490907258879, "loss": 2.4426, "step": 446400 }, { "epoch": 0.8893479854647456, "grad_norm": 0.19332611560821533, "learning_rate": 0.0006375022935799122, "loss": 2.4659, "step": 446410 }, { "epoch": 0.8893679076883846, "grad_norm": 0.19756950438022614, "learning_rate": 0.0006373555122483341, "loss": 2.4605, "step": 446420 }, { "epoch": 0.8893878299120235, "grad_norm": 0.17150896787643433, "learning_rate": 0.0006372087467260438, "loss": 2.4464, "step": 446430 }, { "epoch": 0.8894077521356624, "grad_norm": 0.2324734777212143, "learning_rate": 0.0006370619970079343, "loss": 2.4638, "step": 446440 }, { "epoch": 0.8894276743593013, "grad_norm": 0.18703791499137878, "learning_rate": 0.000636915263088901, "loss": 2.4661, "step": 446450 }, { "epoch": 0.8894475965829401, "grad_norm": 0.1847105771303177, "learning_rate": 0.0006367685449638421, "loss": 2.4631, "step": 446460 }, { "epoch": 0.8894675188065791, "grad_norm": 0.17757216095924377, "learning_rate": 0.0006366218426276589, "loss": 2.445, "step": 446470 }, { "epoch": 0.889487441030218, "grad_norm": 0.1818854659795761, "learning_rate": 0.0006364751560752551, "loss": 2.4591, "step": 446480 }, { "epoch": 0.8895073632538569, "grad_norm": 0.20672746002674103, "learning_rate": 0.0006363284853015372, "loss": 2.4737, "step": 446490 }, { "epoch": 0.8895272854774958, "grad_norm": 0.2114885300397873, "learning_rate": 0.0006361818303014144, "loss": 2.4665, "step": 446500 }, { "epoch": 0.8895472077011348, "grad_norm": 0.18868480622768402, "learning_rate": 0.0006360351910697988, "loss": 2.4843, "step": 446510 }, { "epoch": 0.8895671299247737, "grad_norm": 0.19724364578723907, "learning_rate": 0.0006358885676016053, "loss": 2.4597, "step": 446520 }, { "epoch": 0.8895870521484126, "grad_norm": 0.1947201043367386, "learning_rate": 0.0006357419598917514, "loss": 2.468, "step": 446530 }, { "epoch": 0.8896069743720515, "grad_norm": 0.1863144338130951, "learning_rate": 0.0006355953679351569, "loss": 2.4726, "step": 446540 }, { "epoch": 0.8896268965956904, "grad_norm": 0.18180979788303375, "learning_rate": 0.0006354487917267451, "loss": 2.4544, "step": 446550 }, { "epoch": 0.8896468188193294, "grad_norm": 0.19771501421928406, "learning_rate": 0.0006353022312614416, "loss": 2.4538, "step": 446560 }, { "epoch": 0.8896667410429683, "grad_norm": 0.18876396119594574, "learning_rate": 0.0006351556865341748, "loss": 2.4549, "step": 446570 }, { "epoch": 0.8896866632666072, "grad_norm": 0.18998277187347412, "learning_rate": 0.0006350091575398757, "loss": 2.4321, "step": 446580 }, { "epoch": 0.8897065854902461, "grad_norm": 0.1924561709165573, "learning_rate": 0.0006348626442734784, "loss": 2.4488, "step": 446590 }, { "epoch": 0.889726507713885, "grad_norm": 0.1836174875497818, "learning_rate": 0.0006347161467299189, "loss": 2.4439, "step": 446600 }, { "epoch": 0.889746429937524, "grad_norm": 0.16997703909873962, "learning_rate": 0.0006345696649041373, "loss": 2.4685, "step": 446610 }, { "epoch": 0.8897663521611628, "grad_norm": 0.2146146595478058, "learning_rate": 0.0006344231987910749, "loss": 2.4683, "step": 446620 }, { "epoch": 0.8897862743848017, "grad_norm": 0.17508333921432495, "learning_rate": 0.0006342767483856768, "loss": 2.4518, "step": 446630 }, { "epoch": 0.8898061966084406, "grad_norm": 0.1988273411989212, "learning_rate": 0.0006341303136828902, "loss": 2.4622, "step": 446640 }, { "epoch": 0.8898261188320795, "grad_norm": 0.2118610143661499, "learning_rate": 0.0006339838946776653, "loss": 2.475, "step": 446650 }, { "epoch": 0.8898460410557185, "grad_norm": 0.1817605048418045, "learning_rate": 0.0006338374913649547, "loss": 2.4665, "step": 446660 }, { "epoch": 0.8898659632793574, "grad_norm": 0.20409782230854034, "learning_rate": 0.0006336911037397144, "loss": 2.4742, "step": 446670 }, { "epoch": 0.8898858855029963, "grad_norm": 0.1708504855632782, "learning_rate": 0.0006335447317969023, "loss": 2.4471, "step": 446680 }, { "epoch": 0.8899058077266352, "grad_norm": 0.2057061493396759, "learning_rate": 0.0006333983755314791, "loss": 2.4754, "step": 446690 }, { "epoch": 0.8899257299502741, "grad_norm": 0.21135912835597992, "learning_rate": 0.000633252034938409, "loss": 2.4639, "step": 446700 }, { "epoch": 0.8899456521739131, "grad_norm": 0.17203199863433838, "learning_rate": 0.0006331057100126578, "loss": 2.4751, "step": 446710 }, { "epoch": 0.889965574397552, "grad_norm": 0.2100498080253601, "learning_rate": 0.000632959400749195, "loss": 2.4502, "step": 446720 }, { "epoch": 0.8899854966211909, "grad_norm": 0.19146081805229187, "learning_rate": 0.0006328131071429919, "loss": 2.453, "step": 446730 }, { "epoch": 0.8900054188448298, "grad_norm": 0.18794788420200348, "learning_rate": 0.0006326668291890232, "loss": 2.4541, "step": 446740 }, { "epoch": 0.8900253410684686, "grad_norm": 0.1939481645822525, "learning_rate": 0.0006325205668822655, "loss": 2.4715, "step": 446750 }, { "epoch": 0.8900452632921076, "grad_norm": 0.19024553894996643, "learning_rate": 0.0006323743202176993, "loss": 2.4653, "step": 446760 }, { "epoch": 0.8900651855157465, "grad_norm": 0.18247757852077484, "learning_rate": 0.0006322280891903065, "loss": 2.4659, "step": 446770 }, { "epoch": 0.8900851077393854, "grad_norm": 0.19639171659946442, "learning_rate": 0.0006320818737950725, "loss": 2.4653, "step": 446780 }, { "epoch": 0.8901050299630243, "grad_norm": 0.18429015576839447, "learning_rate": 0.0006319356740269851, "loss": 2.4662, "step": 446790 }, { "epoch": 0.8901249521866633, "grad_norm": 0.1933794915676117, "learning_rate": 0.0006317894898810344, "loss": 2.4595, "step": 446800 }, { "epoch": 0.8901448744103022, "grad_norm": 0.19730709493160248, "learning_rate": 0.0006316433213522141, "loss": 2.465, "step": 446810 }, { "epoch": 0.8901647966339411, "grad_norm": 0.2001035362482071, "learning_rate": 0.0006314971684355197, "loss": 2.4728, "step": 446820 }, { "epoch": 0.89018471885758, "grad_norm": 0.2091955840587616, "learning_rate": 0.0006313510311259501, "loss": 2.4567, "step": 446830 }, { "epoch": 0.8902046410812189, "grad_norm": 0.18472225964069366, "learning_rate": 0.0006312049094185061, "loss": 2.4629, "step": 446840 }, { "epoch": 0.8902245633048579, "grad_norm": 0.20021747052669525, "learning_rate": 0.0006310588033081917, "loss": 2.4688, "step": 446850 }, { "epoch": 0.8902444855284968, "grad_norm": 0.178959921002388, "learning_rate": 0.0006309127127900136, "loss": 2.4508, "step": 446860 }, { "epoch": 0.8902644077521357, "grad_norm": 0.1960902065038681, "learning_rate": 0.0006307666378589807, "loss": 2.4586, "step": 446870 }, { "epoch": 0.8902843299757746, "grad_norm": 0.21215996146202087, "learning_rate": 0.0006306205785101049, "loss": 2.4647, "step": 446880 }, { "epoch": 0.8903042521994134, "grad_norm": 0.20443497598171234, "learning_rate": 0.0006304745347384007, "loss": 2.4627, "step": 446890 }, { "epoch": 0.8903241744230525, "grad_norm": 0.19650046527385712, "learning_rate": 0.0006303285065388855, "loss": 2.4702, "step": 446900 }, { "epoch": 0.8903440966466913, "grad_norm": 0.20451508462429047, "learning_rate": 0.0006301824939065788, "loss": 2.4712, "step": 446910 }, { "epoch": 0.8903640188703302, "grad_norm": 0.20882301032543182, "learning_rate": 0.0006300364968365033, "loss": 2.4576, "step": 446920 }, { "epoch": 0.8903839410939691, "grad_norm": 0.17441189289093018, "learning_rate": 0.000629890515323684, "loss": 2.4697, "step": 446930 }, { "epoch": 0.890403863317608, "grad_norm": 0.17744924128055573, "learning_rate": 0.0006297445493631488, "loss": 2.4755, "step": 446940 }, { "epoch": 0.890423785541247, "grad_norm": 0.1741221845149994, "learning_rate": 0.0006295985989499283, "loss": 2.4554, "step": 446950 }, { "epoch": 0.8904437077648859, "grad_norm": 0.21515806019306183, "learning_rate": 0.0006294526640790547, "loss": 2.4646, "step": 446960 }, { "epoch": 0.8904636299885248, "grad_norm": 0.17930202186107635, "learning_rate": 0.000629306744745565, "loss": 2.464, "step": 446970 }, { "epoch": 0.8904835522121637, "grad_norm": 0.20299255847930908, "learning_rate": 0.0006291608409444965, "loss": 2.4523, "step": 446980 }, { "epoch": 0.8905034744358026, "grad_norm": 0.17894963920116425, "learning_rate": 0.0006290149526708909, "loss": 2.4679, "step": 446990 }, { "epoch": 0.8905233966594416, "grad_norm": 0.20679762959480286, "learning_rate": 0.0006288690799197912, "loss": 2.4705, "step": 447000 }, { "epoch": 0.8905433188830805, "grad_norm": 0.17298568785190582, "learning_rate": 0.0006287232226862441, "loss": 2.4613, "step": 447010 }, { "epoch": 0.8905632411067194, "grad_norm": 0.18060559034347534, "learning_rate": 0.0006285773809652985, "loss": 2.4494, "step": 447020 }, { "epoch": 0.8905831633303583, "grad_norm": 0.20957264304161072, "learning_rate": 0.0006284315547520059, "loss": 2.4393, "step": 447030 }, { "epoch": 0.8906030855539971, "grad_norm": 0.20606033504009247, "learning_rate": 0.0006282857440414203, "loss": 2.4659, "step": 447040 }, { "epoch": 0.8906230077776361, "grad_norm": 0.18025796115398407, "learning_rate": 0.0006281399488285986, "loss": 2.4706, "step": 447050 }, { "epoch": 0.890642930001275, "grad_norm": 0.19312125444412231, "learning_rate": 0.0006279941691086004, "loss": 2.4487, "step": 447060 }, { "epoch": 0.8906628522249139, "grad_norm": 0.2055753767490387, "learning_rate": 0.0006278484048764874, "loss": 2.4623, "step": 447070 }, { "epoch": 0.8906827744485528, "grad_norm": 0.19317474961280823, "learning_rate": 0.0006277026561273247, "loss": 2.4633, "step": 447080 }, { "epoch": 0.8907026966721918, "grad_norm": 0.19491924345493317, "learning_rate": 0.0006275569228561791, "loss": 2.4517, "step": 447090 }, { "epoch": 0.8907226188958307, "grad_norm": 0.22534269094467163, "learning_rate": 0.0006274112050581209, "loss": 2.4576, "step": 447100 }, { "epoch": 0.8907425411194696, "grad_norm": 0.2104899138212204, "learning_rate": 0.0006272655027282223, "loss": 2.4469, "step": 447110 }, { "epoch": 0.8907624633431085, "grad_norm": 0.18502745032310486, "learning_rate": 0.000627119815861559, "loss": 2.4632, "step": 447120 }, { "epoch": 0.8907823855667474, "grad_norm": 0.2138977348804474, "learning_rate": 0.0006269741444532079, "loss": 2.4536, "step": 447130 }, { "epoch": 0.8908023077903864, "grad_norm": 0.1948000192642212, "learning_rate": 0.0006268284884982503, "loss": 2.4643, "step": 447140 }, { "epoch": 0.8908222300140253, "grad_norm": 0.18026717007160187, "learning_rate": 0.0006266828479917685, "loss": 2.4599, "step": 447150 }, { "epoch": 0.8908421522376642, "grad_norm": 0.18386802077293396, "learning_rate": 0.0006265372229288484, "loss": 2.4553, "step": 447160 }, { "epoch": 0.890862074461303, "grad_norm": 0.21917714178562164, "learning_rate": 0.0006263916133045779, "loss": 2.4592, "step": 447170 }, { "epoch": 0.890881996684942, "grad_norm": 0.1994839310646057, "learning_rate": 0.0006262460191140482, "loss": 2.4673, "step": 447180 }, { "epoch": 0.890901918908581, "grad_norm": 0.1809796839952469, "learning_rate": 0.0006261004403523524, "loss": 2.4604, "step": 447190 }, { "epoch": 0.8909218411322198, "grad_norm": 0.19010931253433228, "learning_rate": 0.0006259548770145867, "loss": 2.4657, "step": 447200 }, { "epoch": 0.8909417633558587, "grad_norm": 0.18594041466712952, "learning_rate": 0.0006258093290958493, "loss": 2.4618, "step": 447210 }, { "epoch": 0.8909616855794976, "grad_norm": 0.17380979657173157, "learning_rate": 0.0006256637965912417, "loss": 2.4596, "step": 447220 }, { "epoch": 0.8909816078031365, "grad_norm": 0.18506014347076416, "learning_rate": 0.0006255182794958676, "loss": 2.4601, "step": 447230 }, { "epoch": 0.8910015300267755, "grad_norm": 0.19453611969947815, "learning_rate": 0.0006253727778048335, "loss": 2.4598, "step": 447240 }, { "epoch": 0.8910214522504144, "grad_norm": 0.2058108001947403, "learning_rate": 0.000625227291513248, "loss": 2.4599, "step": 447250 }, { "epoch": 0.8910413744740533, "grad_norm": 0.1884804517030716, "learning_rate": 0.000625081820616223, "loss": 2.4536, "step": 447260 }, { "epoch": 0.8910612966976922, "grad_norm": 0.22586029767990112, "learning_rate": 0.0006249363651088724, "loss": 2.4441, "step": 447270 }, { "epoch": 0.8910812189213311, "grad_norm": 0.18267013132572174, "learning_rate": 0.0006247909249863133, "loss": 2.4747, "step": 447280 }, { "epoch": 0.8911011411449701, "grad_norm": 0.18380434811115265, "learning_rate": 0.0006246455002436644, "loss": 2.4571, "step": 447290 }, { "epoch": 0.891121063368609, "grad_norm": 0.19895826280117035, "learning_rate": 0.0006245000908760481, "loss": 2.4565, "step": 447300 }, { "epoch": 0.8911409855922479, "grad_norm": 0.19620133936405182, "learning_rate": 0.0006243546968785885, "loss": 2.4607, "step": 447310 }, { "epoch": 0.8911609078158867, "grad_norm": 0.18231186270713806, "learning_rate": 0.0006242093182464128, "loss": 2.4653, "step": 447320 }, { "epoch": 0.8911808300395256, "grad_norm": 0.19305358827114105, "learning_rate": 0.0006240639549746507, "loss": 2.4704, "step": 447330 }, { "epoch": 0.8912007522631646, "grad_norm": 0.20077191293239594, "learning_rate": 0.0006239186070584342, "loss": 2.4545, "step": 447340 }, { "epoch": 0.8912206744868035, "grad_norm": 0.17988470196723938, "learning_rate": 0.0006237732744928981, "loss": 2.4555, "step": 447350 }, { "epoch": 0.8912405967104424, "grad_norm": 0.1850925236940384, "learning_rate": 0.0006236279572731797, "loss": 2.4618, "step": 447360 }, { "epoch": 0.8912605189340813, "grad_norm": 0.1955961138010025, "learning_rate": 0.0006234826553944188, "loss": 2.4635, "step": 447370 }, { "epoch": 0.8912804411577202, "grad_norm": 0.16880668699741364, "learning_rate": 0.0006233373688517583, "loss": 2.4701, "step": 447380 }, { "epoch": 0.8913003633813592, "grad_norm": 0.2088666707277298, "learning_rate": 0.0006231920976403427, "loss": 2.4606, "step": 447390 }, { "epoch": 0.8913202856049981, "grad_norm": 0.19291545450687408, "learning_rate": 0.0006230468417553197, "loss": 2.4464, "step": 447400 }, { "epoch": 0.891340207828637, "grad_norm": 0.17293426394462585, "learning_rate": 0.0006229016011918394, "loss": 2.4456, "step": 447410 }, { "epoch": 0.8913601300522759, "grad_norm": 0.18632924556732178, "learning_rate": 0.0006227563759450545, "loss": 2.4585, "step": 447420 }, { "epoch": 0.8913800522759149, "grad_norm": 0.17675067484378815, "learning_rate": 0.0006226111660101204, "loss": 2.4534, "step": 447430 }, { "epoch": 0.8913999744995538, "grad_norm": 0.18589669466018677, "learning_rate": 0.000622465971382195, "loss": 2.4683, "step": 447440 }, { "epoch": 0.8914198967231927, "grad_norm": 0.48208650946617126, "learning_rate": 0.0006223207920564382, "loss": 2.4549, "step": 447450 }, { "epoch": 0.8914398189468316, "grad_norm": 0.18878072500228882, "learning_rate": 0.000622175628028013, "loss": 2.4634, "step": 447460 }, { "epoch": 0.8914597411704704, "grad_norm": 0.19303302466869354, "learning_rate": 0.0006220304792920855, "loss": 2.4638, "step": 447470 }, { "epoch": 0.8914796633941094, "grad_norm": 0.19402720034122467, "learning_rate": 0.0006218853458438227, "loss": 2.4589, "step": 447480 }, { "epoch": 0.8914995856177483, "grad_norm": 0.1755528450012207, "learning_rate": 0.0006217402276783959, "loss": 2.4645, "step": 447490 }, { "epoch": 0.8915195078413872, "grad_norm": 0.1966404914855957, "learning_rate": 0.0006215951247909779, "loss": 2.461, "step": 447500 }, { "epoch": 0.8915394300650261, "grad_norm": 0.1884276419878006, "learning_rate": 0.0006214500371767442, "loss": 2.459, "step": 447510 }, { "epoch": 0.891559352288665, "grad_norm": 0.19660115242004395, "learning_rate": 0.0006213049648308731, "loss": 2.4632, "step": 447520 }, { "epoch": 0.891579274512304, "grad_norm": 0.1940881460905075, "learning_rate": 0.0006211599077485452, "loss": 2.4642, "step": 447530 }, { "epoch": 0.8915991967359429, "grad_norm": 0.16979140043258667, "learning_rate": 0.0006210148659249442, "loss": 2.4551, "step": 447540 }, { "epoch": 0.8916191189595818, "grad_norm": 0.1738772690296173, "learning_rate": 0.0006208698393552552, "loss": 2.4477, "step": 447550 }, { "epoch": 0.8916390411832207, "grad_norm": 0.18614190816879272, "learning_rate": 0.0006207248280346667, "loss": 2.4754, "step": 447560 }, { "epoch": 0.8916589634068596, "grad_norm": 0.1835886836051941, "learning_rate": 0.0006205798319583695, "loss": 2.4571, "step": 447570 }, { "epoch": 0.8916788856304986, "grad_norm": 0.16117185354232788, "learning_rate": 0.0006204348511215571, "loss": 2.4485, "step": 447580 }, { "epoch": 0.8916988078541375, "grad_norm": 0.18318040668964386, "learning_rate": 0.0006202898855194255, "loss": 2.4509, "step": 447590 }, { "epoch": 0.8917187300777764, "grad_norm": 0.20097190141677856, "learning_rate": 0.0006201449351471729, "loss": 2.4561, "step": 447600 }, { "epoch": 0.8917386523014152, "grad_norm": 0.19276899099349976, "learning_rate": 0.0006199999999999999, "loss": 2.4725, "step": 447610 }, { "epoch": 0.8917585745250541, "grad_norm": 0.17999427020549774, "learning_rate": 0.0006198550800731107, "loss": 2.4479, "step": 447620 }, { "epoch": 0.8917784967486931, "grad_norm": 0.21446654200553894, "learning_rate": 0.0006197101753617105, "loss": 2.4677, "step": 447630 }, { "epoch": 0.891798418972332, "grad_norm": 0.1942809373140335, "learning_rate": 0.0006195652858610081, "loss": 2.4475, "step": 447640 }, { "epoch": 0.8918183411959709, "grad_norm": 0.1932443529367447, "learning_rate": 0.0006194204115662148, "loss": 2.4556, "step": 447650 }, { "epoch": 0.8918382634196098, "grad_norm": 0.19480158388614655, "learning_rate": 0.0006192755524725435, "loss": 2.4636, "step": 447660 }, { "epoch": 0.8918581856432487, "grad_norm": 0.2411307394504547, "learning_rate": 0.0006191307085752105, "loss": 2.4658, "step": 447670 }, { "epoch": 0.8918781078668877, "grad_norm": 0.19616352021694183, "learning_rate": 0.0006189858798694345, "loss": 2.46, "step": 447680 }, { "epoch": 0.8918980300905266, "grad_norm": 0.18922024965286255, "learning_rate": 0.0006188410663504363, "loss": 2.4542, "step": 447690 }, { "epoch": 0.8919179523141655, "grad_norm": 0.18640577793121338, "learning_rate": 0.0006186962680134394, "loss": 2.4578, "step": 447700 }, { "epoch": 0.8919378745378044, "grad_norm": 0.21984535455703735, "learning_rate": 0.00061855148485367, "loss": 2.4438, "step": 447710 }, { "epoch": 0.8919577967614434, "grad_norm": 0.19555416703224182, "learning_rate": 0.0006184067168663565, "loss": 2.462, "step": 447720 }, { "epoch": 0.8919777189850823, "grad_norm": 0.1885530799627304, "learning_rate": 0.00061826196404673, "loss": 2.4567, "step": 447730 }, { "epoch": 0.8919976412087212, "grad_norm": 0.21018731594085693, "learning_rate": 0.0006181172263900241, "loss": 2.4627, "step": 447740 }, { "epoch": 0.89201756343236, "grad_norm": 0.19047848880290985, "learning_rate": 0.0006179725038914747, "loss": 2.4688, "step": 447750 }, { "epoch": 0.8920374856559989, "grad_norm": 0.20531602203845978, "learning_rate": 0.0006178277965463204, "loss": 2.4504, "step": 447760 }, { "epoch": 0.8920574078796379, "grad_norm": 0.22575093805789948, "learning_rate": 0.0006176831043498021, "loss": 2.4725, "step": 447770 }, { "epoch": 0.8920773301032768, "grad_norm": 0.2050808221101761, "learning_rate": 0.0006175384272971636, "loss": 2.4653, "step": 447780 }, { "epoch": 0.8920972523269157, "grad_norm": 0.23882891237735748, "learning_rate": 0.0006173937653836506, "loss": 2.4603, "step": 447790 }, { "epoch": 0.8921171745505546, "grad_norm": 0.18326173722743988, "learning_rate": 0.0006172491186045117, "loss": 2.4662, "step": 447800 }, { "epoch": 0.8921370967741935, "grad_norm": 0.18490293622016907, "learning_rate": 0.0006171044869549977, "loss": 2.4585, "step": 447810 }, { "epoch": 0.8921570189978325, "grad_norm": 0.18257208168506622, "learning_rate": 0.0006169598704303623, "loss": 2.4497, "step": 447820 }, { "epoch": 0.8921769412214714, "grad_norm": 0.17048780620098114, "learning_rate": 0.0006168152690258615, "loss": 2.4569, "step": 447830 }, { "epoch": 0.8921968634451103, "grad_norm": 0.20184487104415894, "learning_rate": 0.0006166706827367535, "loss": 2.4683, "step": 447840 }, { "epoch": 0.8922167856687492, "grad_norm": 0.23248222470283508, "learning_rate": 0.0006165261115582992, "loss": 2.4558, "step": 447850 }, { "epoch": 0.8922367078923881, "grad_norm": 0.18367207050323486, "learning_rate": 0.0006163815554857617, "loss": 2.4575, "step": 447860 }, { "epoch": 0.8922566301160271, "grad_norm": 0.18652766942977905, "learning_rate": 0.0006162370145144076, "loss": 2.4627, "step": 447870 }, { "epoch": 0.892276552339666, "grad_norm": 0.2014719545841217, "learning_rate": 0.0006160924886395046, "loss": 2.4454, "step": 447880 }, { "epoch": 0.8922964745633049, "grad_norm": 0.1918644905090332, "learning_rate": 0.0006159479778563235, "loss": 2.4659, "step": 447890 }, { "epoch": 0.8923163967869437, "grad_norm": 0.18696846067905426, "learning_rate": 0.0006158034821601379, "loss": 2.4577, "step": 447900 }, { "epoch": 0.8923363190105826, "grad_norm": 0.1990634799003601, "learning_rate": 0.0006156590015462231, "loss": 2.4625, "step": 447910 }, { "epoch": 0.8923562412342216, "grad_norm": 0.1921103298664093, "learning_rate": 0.0006155145360098575, "loss": 2.4556, "step": 447920 }, { "epoch": 0.8923761634578605, "grad_norm": 0.1899355947971344, "learning_rate": 0.0006153700855463218, "loss": 2.4635, "step": 447930 }, { "epoch": 0.8923960856814994, "grad_norm": 0.1956111192703247, "learning_rate": 0.0006152256501508992, "loss": 2.4568, "step": 447940 }, { "epoch": 0.8924160079051383, "grad_norm": 0.17895598709583282, "learning_rate": 0.000615081229818875, "loss": 2.4507, "step": 447950 }, { "epoch": 0.8924359301287772, "grad_norm": 0.19254915416240692, "learning_rate": 0.0006149368245455372, "loss": 2.4594, "step": 447960 }, { "epoch": 0.8924558523524162, "grad_norm": 0.19654597342014313, "learning_rate": 0.0006147924343261766, "loss": 2.4528, "step": 447970 }, { "epoch": 0.8924757745760551, "grad_norm": 0.20494778454303741, "learning_rate": 0.0006146480591560857, "loss": 2.4698, "step": 447980 }, { "epoch": 0.892495696799694, "grad_norm": 0.19937664270401, "learning_rate": 0.0006145036990305604, "loss": 2.4641, "step": 447990 }, { "epoch": 0.8925156190233329, "grad_norm": 0.20213207602500916, "learning_rate": 0.0006143593539448982, "loss": 2.4642, "step": 448000 }, { "epoch": 0.8925355412469719, "grad_norm": 0.19691269099712372, "learning_rate": 0.0006142150238943994, "loss": 2.4724, "step": 448010 }, { "epoch": 0.8925554634706108, "grad_norm": 0.20089593529701233, "learning_rate": 0.0006140707088743668, "loss": 2.4616, "step": 448020 }, { "epoch": 0.8925753856942497, "grad_norm": 0.17058397829532623, "learning_rate": 0.0006139264088801058, "loss": 2.4545, "step": 448030 }, { "epoch": 0.8925953079178885, "grad_norm": 0.1918603479862213, "learning_rate": 0.0006137821239069235, "loss": 2.4757, "step": 448040 }, { "epoch": 0.8926152301415274, "grad_norm": 0.26990678906440735, "learning_rate": 0.0006136378539501304, "loss": 2.4473, "step": 448050 }, { "epoch": 0.8926351523651664, "grad_norm": 0.19336597621440887, "learning_rate": 0.0006134935990050387, "loss": 2.468, "step": 448060 }, { "epoch": 0.8926550745888053, "grad_norm": 0.183689147233963, "learning_rate": 0.0006133493590669639, "loss": 2.4623, "step": 448070 }, { "epoch": 0.8926749968124442, "grad_norm": 0.19165007770061493, "learning_rate": 0.0006132051341312229, "loss": 2.4647, "step": 448080 }, { "epoch": 0.8926949190360831, "grad_norm": 0.18097245693206787, "learning_rate": 0.0006130609241931353, "loss": 2.4575, "step": 448090 }, { "epoch": 0.892714841259722, "grad_norm": 0.18913601338863373, "learning_rate": 0.0006129167292480238, "loss": 2.4468, "step": 448100 }, { "epoch": 0.892734763483361, "grad_norm": 0.17889487743377686, "learning_rate": 0.000612772549291213, "loss": 2.4482, "step": 448110 }, { "epoch": 0.8927546857069999, "grad_norm": 0.19444821774959564, "learning_rate": 0.00061262838431803, "loss": 2.4628, "step": 448120 }, { "epoch": 0.8927746079306388, "grad_norm": 0.18148604035377502, "learning_rate": 0.0006124842343238042, "loss": 2.4605, "step": 448130 }, { "epoch": 0.8927945301542777, "grad_norm": 0.1906617134809494, "learning_rate": 0.0006123400993038677, "loss": 2.4599, "step": 448140 }, { "epoch": 0.8928144523779166, "grad_norm": 0.1888696402311325, "learning_rate": 0.0006121959792535547, "loss": 2.4494, "step": 448150 }, { "epoch": 0.8928343746015556, "grad_norm": 0.1830560863018036, "learning_rate": 0.0006120518741682022, "loss": 2.4573, "step": 448160 }, { "epoch": 0.8928542968251945, "grad_norm": 0.17985039949417114, "learning_rate": 0.0006119077840431493, "loss": 2.451, "step": 448170 }, { "epoch": 0.8928742190488334, "grad_norm": 0.18265748023986816, "learning_rate": 0.0006117637088737378, "loss": 2.4584, "step": 448180 }, { "epoch": 0.8928941412724722, "grad_norm": 0.18749594688415527, "learning_rate": 0.0006116196486553119, "loss": 2.4566, "step": 448190 }, { "epoch": 0.8929140634961111, "grad_norm": 0.18510526418685913, "learning_rate": 0.0006114756033832174, "loss": 2.4607, "step": 448200 }, { "epoch": 0.8929339857197501, "grad_norm": 0.18249772489070892, "learning_rate": 0.000611331573052804, "loss": 2.4541, "step": 448210 }, { "epoch": 0.892953907943389, "grad_norm": 0.19805629551410675, "learning_rate": 0.0006111875576594223, "loss": 2.4613, "step": 448220 }, { "epoch": 0.8929738301670279, "grad_norm": 0.18389792740345, "learning_rate": 0.0006110435571984267, "loss": 2.4616, "step": 448230 }, { "epoch": 0.8929937523906668, "grad_norm": 0.18615971505641937, "learning_rate": 0.000610899571665173, "loss": 2.4572, "step": 448240 }, { "epoch": 0.8930136746143057, "grad_norm": 0.19235126674175262, "learning_rate": 0.0006107556010550195, "loss": 2.4625, "step": 448250 }, { "epoch": 0.8930335968379447, "grad_norm": 0.1882283240556717, "learning_rate": 0.0006106116453633275, "loss": 2.4524, "step": 448260 }, { "epoch": 0.8930535190615836, "grad_norm": 0.19624608755111694, "learning_rate": 0.0006104677045854601, "loss": 2.4431, "step": 448270 }, { "epoch": 0.8930734412852225, "grad_norm": 0.20393522083759308, "learning_rate": 0.0006103237787167833, "loss": 2.4537, "step": 448280 }, { "epoch": 0.8930933635088614, "grad_norm": 0.1927250325679779, "learning_rate": 0.0006101798677526648, "loss": 2.4599, "step": 448290 }, { "epoch": 0.8931132857325004, "grad_norm": 0.2158317267894745, "learning_rate": 0.0006100359716884758, "loss": 2.4537, "step": 448300 }, { "epoch": 0.8931332079561393, "grad_norm": 0.1791650801897049, "learning_rate": 0.0006098920905195886, "loss": 2.4639, "step": 448310 }, { "epoch": 0.8931531301797782, "grad_norm": 0.189030259847641, "learning_rate": 0.0006097482242413785, "loss": 2.4683, "step": 448320 }, { "epoch": 0.893173052403417, "grad_norm": 0.21079707145690918, "learning_rate": 0.000609604372849224, "loss": 2.4552, "step": 448330 }, { "epoch": 0.8931929746270559, "grad_norm": 0.21604950726032257, "learning_rate": 0.0006094605363385044, "loss": 2.4598, "step": 448340 }, { "epoch": 0.8932128968506949, "grad_norm": 0.18629232048988342, "learning_rate": 0.0006093167147046026, "loss": 2.4512, "step": 448350 }, { "epoch": 0.8932328190743338, "grad_norm": 0.1886754333972931, "learning_rate": 0.0006091729079429033, "loss": 2.4548, "step": 448360 }, { "epoch": 0.8932527412979727, "grad_norm": 0.181244358420372, "learning_rate": 0.0006090291160487937, "loss": 2.4771, "step": 448370 }, { "epoch": 0.8932726635216116, "grad_norm": 0.18873651325702667, "learning_rate": 0.0006088853390176639, "loss": 2.4678, "step": 448380 }, { "epoch": 0.8932925857452505, "grad_norm": 0.21283063292503357, "learning_rate": 0.0006087415768449056, "loss": 2.4564, "step": 448390 }, { "epoch": 0.8933125079688895, "grad_norm": 0.19282962381839752, "learning_rate": 0.0006085978295259131, "loss": 2.461, "step": 448400 }, { "epoch": 0.8933324301925284, "grad_norm": 0.18577326834201813, "learning_rate": 0.0006084540970560835, "loss": 2.4523, "step": 448410 }, { "epoch": 0.8933523524161673, "grad_norm": 0.18304957449436188, "learning_rate": 0.0006083103794308158, "loss": 2.4676, "step": 448420 }, { "epoch": 0.8933722746398062, "grad_norm": 0.19202737510204315, "learning_rate": 0.0006081666766455116, "loss": 2.4618, "step": 448430 }, { "epoch": 0.8933921968634451, "grad_norm": 0.2011571228504181, "learning_rate": 0.0006080229886955748, "loss": 2.4698, "step": 448440 }, { "epoch": 0.8934121190870841, "grad_norm": 0.19925183057785034, "learning_rate": 0.0006078793155764118, "loss": 2.4655, "step": 448450 }, { "epoch": 0.893432041310723, "grad_norm": 0.17740470170974731, "learning_rate": 0.0006077356572834309, "loss": 2.4602, "step": 448460 }, { "epoch": 0.8934519635343618, "grad_norm": 0.19162777066230774, "learning_rate": 0.0006075920138120436, "loss": 2.463, "step": 448470 }, { "epoch": 0.8934718857580007, "grad_norm": 0.18897022306919098, "learning_rate": 0.000607448385157663, "loss": 2.4567, "step": 448480 }, { "epoch": 0.8934918079816396, "grad_norm": 0.18661221861839294, "learning_rate": 0.0006073047713157053, "loss": 2.4543, "step": 448490 }, { "epoch": 0.8935117302052786, "grad_norm": 0.1826183795928955, "learning_rate": 0.0006071611722815881, "loss": 2.4512, "step": 448500 }, { "epoch": 0.8935316524289175, "grad_norm": 0.21173237264156342, "learning_rate": 0.0006070175880507321, "loss": 2.4573, "step": 448510 }, { "epoch": 0.8935515746525564, "grad_norm": 0.20576702058315277, "learning_rate": 0.0006068740186185602, "loss": 2.4557, "step": 448520 }, { "epoch": 0.8935714968761953, "grad_norm": 0.20958401262760162, "learning_rate": 0.0006067304639804973, "loss": 2.4564, "step": 448530 }, { "epoch": 0.8935914190998342, "grad_norm": 0.17025606334209442, "learning_rate": 0.0006065869241319715, "loss": 2.4578, "step": 448540 }, { "epoch": 0.8936113413234732, "grad_norm": 0.19203396141529083, "learning_rate": 0.0006064433990684123, "loss": 2.4524, "step": 448550 }, { "epoch": 0.8936312635471121, "grad_norm": 0.20396709442138672, "learning_rate": 0.0006062998887852525, "loss": 2.4647, "step": 448560 }, { "epoch": 0.893651185770751, "grad_norm": 0.24800141155719757, "learning_rate": 0.0006061563932779259, "loss": 2.4628, "step": 448570 }, { "epoch": 0.8936711079943899, "grad_norm": 0.17771954834461212, "learning_rate": 0.00060601291254187, "loss": 2.4536, "step": 448580 }, { "epoch": 0.8936910302180289, "grad_norm": 0.20745524764060974, "learning_rate": 0.0006058694465725243, "loss": 2.4553, "step": 448590 }, { "epoch": 0.8937109524416678, "grad_norm": 0.18208765983581543, "learning_rate": 0.00060572599536533, "loss": 2.4654, "step": 448600 }, { "epoch": 0.8937308746653067, "grad_norm": 0.19779072701931, "learning_rate": 0.0006055825589157313, "loss": 2.4586, "step": 448610 }, { "epoch": 0.8937507968889455, "grad_norm": 0.20657382905483246, "learning_rate": 0.0006054391372191745, "loss": 2.4544, "step": 448620 }, { "epoch": 0.8937707191125844, "grad_norm": 0.18388110399246216, "learning_rate": 0.0006052957302711086, "loss": 2.4547, "step": 448630 }, { "epoch": 0.8937906413362234, "grad_norm": 0.18894968926906586, "learning_rate": 0.0006051523380669845, "loss": 2.4563, "step": 448640 }, { "epoch": 0.8938105635598623, "grad_norm": 0.19171981513500214, "learning_rate": 0.0006050089606022553, "loss": 2.471, "step": 448650 }, { "epoch": 0.8938304857835012, "grad_norm": 0.18629373610019684, "learning_rate": 0.0006048655978723772, "loss": 2.4605, "step": 448660 }, { "epoch": 0.8938504080071401, "grad_norm": 0.18943244218826294, "learning_rate": 0.0006047222498728076, "loss": 2.4571, "step": 448670 }, { "epoch": 0.893870330230779, "grad_norm": 0.18507324159145355, "learning_rate": 0.0006045789165990075, "loss": 2.4463, "step": 448680 }, { "epoch": 0.893890252454418, "grad_norm": 0.1758149415254593, "learning_rate": 0.0006044355980464391, "loss": 2.4611, "step": 448690 }, { "epoch": 0.8939101746780569, "grad_norm": 0.18198175728321075, "learning_rate": 0.000604292294210568, "loss": 2.4506, "step": 448700 }, { "epoch": 0.8939300969016958, "grad_norm": 0.19338424503803253, "learning_rate": 0.000604149005086861, "loss": 2.4784, "step": 448710 }, { "epoch": 0.8939500191253347, "grad_norm": 0.1925092488527298, "learning_rate": 0.0006040057306707882, "loss": 2.4577, "step": 448720 }, { "epoch": 0.8939699413489736, "grad_norm": 0.17376457154750824, "learning_rate": 0.0006038624709578215, "loss": 2.4569, "step": 448730 }, { "epoch": 0.8939898635726126, "grad_norm": 0.1990678459405899, "learning_rate": 0.0006037192259434353, "loss": 2.4419, "step": 448740 }, { "epoch": 0.8940097857962515, "grad_norm": 0.19275806844234467, "learning_rate": 0.000603575995623106, "loss": 2.4577, "step": 448750 }, { "epoch": 0.8940297080198903, "grad_norm": 0.21649228036403656, "learning_rate": 0.0006034327799923127, "loss": 2.4603, "step": 448760 }, { "epoch": 0.8940496302435292, "grad_norm": 0.21994365751743317, "learning_rate": 0.0006032895790465369, "loss": 2.4557, "step": 448770 }, { "epoch": 0.8940695524671681, "grad_norm": 0.17761090397834778, "learning_rate": 0.0006031463927812622, "loss": 2.4569, "step": 448780 }, { "epoch": 0.8940894746908071, "grad_norm": 0.22056105732917786, "learning_rate": 0.0006030032211919743, "loss": 2.4596, "step": 448790 }, { "epoch": 0.894109396914446, "grad_norm": 0.19075675308704376, "learning_rate": 0.0006028600642741615, "loss": 2.4553, "step": 448800 }, { "epoch": 0.8941293191380849, "grad_norm": 0.22294098138809204, "learning_rate": 0.0006027169220233147, "loss": 2.4576, "step": 448810 }, { "epoch": 0.8941492413617238, "grad_norm": 0.33511635661125183, "learning_rate": 0.0006025737944349261, "loss": 2.4628, "step": 448820 }, { "epoch": 0.8941691635853627, "grad_norm": 0.204468235373497, "learning_rate": 0.0006024306815044916, "loss": 2.4581, "step": 448830 }, { "epoch": 0.8941890858090017, "grad_norm": 0.17869651317596436, "learning_rate": 0.0006022875832275081, "loss": 2.4668, "step": 448840 }, { "epoch": 0.8942090080326406, "grad_norm": 0.1918356865644455, "learning_rate": 0.0006021444995994758, "loss": 2.4702, "step": 448850 }, { "epoch": 0.8942289302562795, "grad_norm": 0.20236597955226898, "learning_rate": 0.0006020014306158965, "loss": 2.4346, "step": 448860 }, { "epoch": 0.8942488524799184, "grad_norm": 0.2097017467021942, "learning_rate": 0.0006018583762722747, "loss": 2.4605, "step": 448870 }, { "epoch": 0.8942687747035574, "grad_norm": 0.21001717448234558, "learning_rate": 0.0006017153365641172, "loss": 2.4518, "step": 448880 }, { "epoch": 0.8942886969271963, "grad_norm": 0.2105998545885086, "learning_rate": 0.0006015723114869329, "loss": 2.4806, "step": 448890 }, { "epoch": 0.8943086191508351, "grad_norm": 0.1887788325548172, "learning_rate": 0.0006014293010362331, "loss": 2.4722, "step": 448900 }, { "epoch": 0.894328541374474, "grad_norm": 0.18715478479862213, "learning_rate": 0.000601286305207531, "loss": 2.4605, "step": 448910 }, { "epoch": 0.8943484635981129, "grad_norm": 0.18954549729824066, "learning_rate": 0.000601143323996343, "loss": 2.4626, "step": 448920 }, { "epoch": 0.8943683858217519, "grad_norm": 0.22713209688663483, "learning_rate": 0.0006010003573981873, "loss": 2.4536, "step": 448930 }, { "epoch": 0.8943883080453908, "grad_norm": 0.19621524214744568, "learning_rate": 0.0006008574054085838, "loss": 2.452, "step": 448940 }, { "epoch": 0.8944082302690297, "grad_norm": 0.20553109049797058, "learning_rate": 0.0006007144680230557, "loss": 2.4501, "step": 448950 }, { "epoch": 0.8944281524926686, "grad_norm": 0.2154255360364914, "learning_rate": 0.0006005715452371278, "loss": 2.454, "step": 448960 }, { "epoch": 0.8944480747163075, "grad_norm": 0.19442160427570343, "learning_rate": 0.0006004286370463277, "loss": 2.4664, "step": 448970 }, { "epoch": 0.8944679969399465, "grad_norm": 0.1833454668521881, "learning_rate": 0.0006002857434461846, "loss": 2.4535, "step": 448980 }, { "epoch": 0.8944879191635854, "grad_norm": 0.20260335505008698, "learning_rate": 0.0006001428644322307, "loss": 2.4545, "step": 448990 }, { "epoch": 0.8945078413872243, "grad_norm": 0.1737385094165802, "learning_rate": 0.0006000000000000001, "loss": 2.4479, "step": 449000 }, { "epoch": 0.8945277636108632, "grad_norm": 0.17821073532104492, "learning_rate": 0.000599857150145029, "loss": 2.4537, "step": 449010 }, { "epoch": 0.8945476858345021, "grad_norm": 0.18995031714439392, "learning_rate": 0.0005997143148628563, "loss": 2.4446, "step": 449020 }, { "epoch": 0.8945676080581411, "grad_norm": 0.18292385339736938, "learning_rate": 0.0005995714941490229, "loss": 2.4542, "step": 449030 }, { "epoch": 0.89458753028178, "grad_norm": 0.18486429750919342, "learning_rate": 0.0005994286879990723, "loss": 2.4604, "step": 449040 }, { "epoch": 0.8946074525054188, "grad_norm": 0.21492192149162292, "learning_rate": 0.0005992858964085497, "loss": 2.4602, "step": 449050 }, { "epoch": 0.8946273747290577, "grad_norm": 0.20593716204166412, "learning_rate": 0.0005991431193730032, "loss": 2.4419, "step": 449060 }, { "epoch": 0.8946472969526966, "grad_norm": 0.18082672357559204, "learning_rate": 0.0005990003568879827, "loss": 2.4499, "step": 449070 }, { "epoch": 0.8946672191763356, "grad_norm": 0.1953558623790741, "learning_rate": 0.0005988576089490406, "loss": 2.4506, "step": 449080 }, { "epoch": 0.8946871413999745, "grad_norm": 0.19776567816734314, "learning_rate": 0.0005987148755517314, "loss": 2.4783, "step": 449090 }, { "epoch": 0.8947070636236134, "grad_norm": 0.19442591071128845, "learning_rate": 0.0005985721566916121, "loss": 2.4586, "step": 449100 }, { "epoch": 0.8947269858472523, "grad_norm": 0.18298213183879852, "learning_rate": 0.0005984294523642415, "loss": 2.4427, "step": 449110 }, { "epoch": 0.8947469080708912, "grad_norm": 0.19548791646957397, "learning_rate": 0.0005982867625651813, "loss": 2.4617, "step": 449120 }, { "epoch": 0.8947668302945302, "grad_norm": 0.19817501306533813, "learning_rate": 0.0005981440872899954, "loss": 2.4436, "step": 449130 }, { "epoch": 0.8947867525181691, "grad_norm": 0.1850573867559433, "learning_rate": 0.0005980014265342492, "loss": 2.4452, "step": 449140 }, { "epoch": 0.894806674741808, "grad_norm": 0.180106520652771, "learning_rate": 0.0005978587802935112, "loss": 2.4542, "step": 449150 }, { "epoch": 0.8948265969654469, "grad_norm": 0.19961075484752655, "learning_rate": 0.0005977161485633518, "loss": 2.4581, "step": 449160 }, { "epoch": 0.8948465191890858, "grad_norm": 0.17606493830680847, "learning_rate": 0.0005975735313393433, "loss": 2.454, "step": 449170 }, { "epoch": 0.8948664414127248, "grad_norm": 0.18505191802978516, "learning_rate": 0.000597430928617061, "loss": 2.4615, "step": 449180 }, { "epoch": 0.8948863636363636, "grad_norm": 0.2146182507276535, "learning_rate": 0.0005972883403920819, "loss": 2.4686, "step": 449190 }, { "epoch": 0.8949062858600025, "grad_norm": 0.18560639023780823, "learning_rate": 0.0005971457666599856, "loss": 2.4596, "step": 449200 }, { "epoch": 0.8949262080836414, "grad_norm": 0.1991565078496933, "learning_rate": 0.0005970032074163534, "loss": 2.4561, "step": 449210 }, { "epoch": 0.8949461303072804, "grad_norm": 0.19533422589302063, "learning_rate": 0.0005968606626567695, "loss": 2.4581, "step": 449220 }, { "epoch": 0.8949660525309193, "grad_norm": 0.18019652366638184, "learning_rate": 0.00059671813237682, "loss": 2.4777, "step": 449230 }, { "epoch": 0.8949859747545582, "grad_norm": 0.19260163605213165, "learning_rate": 0.0005965756165720932, "loss": 2.4555, "step": 449240 }, { "epoch": 0.8950058969781971, "grad_norm": 0.20820888876914978, "learning_rate": 0.0005964331152381802, "loss": 2.4546, "step": 449250 }, { "epoch": 0.895025819201836, "grad_norm": 0.18252414464950562, "learning_rate": 0.0005962906283706731, "loss": 2.4512, "step": 449260 }, { "epoch": 0.895045741425475, "grad_norm": 0.1789625585079193, "learning_rate": 0.0005961481559651673, "loss": 2.4679, "step": 449270 }, { "epoch": 0.8950656636491139, "grad_norm": 0.18120892345905304, "learning_rate": 0.0005960056980172606, "loss": 2.4484, "step": 449280 }, { "epoch": 0.8950855858727528, "grad_norm": 0.18534018099308014, "learning_rate": 0.000595863254522552, "loss": 2.4532, "step": 449290 }, { "epoch": 0.8951055080963917, "grad_norm": 0.18978074193000793, "learning_rate": 0.0005957208254766433, "loss": 2.4624, "step": 449300 }, { "epoch": 0.8951254303200306, "grad_norm": 0.1928979456424713, "learning_rate": 0.000595578410875139, "loss": 2.4515, "step": 449310 }, { "epoch": 0.8951453525436696, "grad_norm": 0.1945439726114273, "learning_rate": 0.000595436010713645, "loss": 2.4621, "step": 449320 }, { "epoch": 0.8951652747673084, "grad_norm": 0.19545570015907288, "learning_rate": 0.0005952936249877699, "loss": 2.4493, "step": 449330 }, { "epoch": 0.8951851969909473, "grad_norm": 0.18800051510334015, "learning_rate": 0.0005951512536931245, "loss": 2.4563, "step": 449340 }, { "epoch": 0.8952051192145862, "grad_norm": 0.19510671496391296, "learning_rate": 0.0005950088968253215, "loss": 2.4605, "step": 449350 }, { "epoch": 0.8952250414382251, "grad_norm": 0.1980319768190384, "learning_rate": 0.0005948665543799763, "loss": 2.4727, "step": 449360 }, { "epoch": 0.8952449636618641, "grad_norm": 0.21941153705120087, "learning_rate": 0.0005947242263527061, "loss": 2.4399, "step": 449370 }, { "epoch": 0.895264885885503, "grad_norm": 0.18744412064552307, "learning_rate": 0.0005945819127391308, "loss": 2.4669, "step": 449380 }, { "epoch": 0.8952848081091419, "grad_norm": 0.194815531373024, "learning_rate": 0.000594439613534872, "loss": 2.4504, "step": 449390 }, { "epoch": 0.8953047303327808, "grad_norm": 0.18544119596481323, "learning_rate": 0.0005942973287355538, "loss": 2.4652, "step": 449400 }, { "epoch": 0.8953246525564197, "grad_norm": 0.18922565877437592, "learning_rate": 0.0005941550583368023, "loss": 2.4533, "step": 449410 }, { "epoch": 0.8953445747800587, "grad_norm": 0.20042191445827484, "learning_rate": 0.0005940128023342461, "loss": 2.4601, "step": 449420 }, { "epoch": 0.8953644970036976, "grad_norm": 0.20084725320339203, "learning_rate": 0.0005938705607235157, "loss": 2.4598, "step": 449430 }, { "epoch": 0.8953844192273365, "grad_norm": 0.19874194264411926, "learning_rate": 0.0005937283335002443, "loss": 2.4628, "step": 449440 }, { "epoch": 0.8954043414509754, "grad_norm": 0.18423785269260406, "learning_rate": 0.0005935861206600668, "loss": 2.4675, "step": 449450 }, { "epoch": 0.8954242636746143, "grad_norm": 0.2155013531446457, "learning_rate": 0.0005934439221986208, "loss": 2.4577, "step": 449460 }, { "epoch": 0.8954441858982533, "grad_norm": 0.21352139115333557, "learning_rate": 0.000593301738111545, "loss": 2.4611, "step": 449470 }, { "epoch": 0.8954641081218921, "grad_norm": 0.19655705988407135, "learning_rate": 0.0005931595683944822, "loss": 2.454, "step": 449480 }, { "epoch": 0.895484030345531, "grad_norm": 0.19482475519180298, "learning_rate": 0.0005930174130430755, "loss": 2.4555, "step": 449490 }, { "epoch": 0.8955039525691699, "grad_norm": 0.19034305214881897, "learning_rate": 0.0005928752720529711, "loss": 2.4453, "step": 449500 }, { "epoch": 0.8955238747928089, "grad_norm": 0.19612567126750946, "learning_rate": 0.0005927331454198177, "loss": 2.4707, "step": 449510 }, { "epoch": 0.8955437970164478, "grad_norm": 0.2047751247882843, "learning_rate": 0.0005925910331392657, "loss": 2.4391, "step": 449520 }, { "epoch": 0.8955637192400867, "grad_norm": 0.20980095863342285, "learning_rate": 0.0005924489352069673, "loss": 2.45, "step": 449530 }, { "epoch": 0.8955836414637256, "grad_norm": 0.18049387633800507, "learning_rate": 0.0005923068516185781, "loss": 2.4563, "step": 449540 }, { "epoch": 0.8956035636873645, "grad_norm": 0.23087657988071442, "learning_rate": 0.0005921647823697547, "loss": 2.4553, "step": 449550 }, { "epoch": 0.8956234859110035, "grad_norm": 0.18545816838741302, "learning_rate": 0.0005920227274561567, "loss": 2.4328, "step": 449560 }, { "epoch": 0.8956434081346424, "grad_norm": 0.19148635864257812, "learning_rate": 0.0005918806868734454, "loss": 2.4599, "step": 449570 }, { "epoch": 0.8956633303582813, "grad_norm": 0.19348609447479248, "learning_rate": 0.0005917386606172844, "loss": 2.4648, "step": 449580 }, { "epoch": 0.8956832525819202, "grad_norm": 0.2194896638393402, "learning_rate": 0.0005915966486833397, "loss": 2.4528, "step": 449590 }, { "epoch": 0.895703174805559, "grad_norm": 0.19675958156585693, "learning_rate": 0.0005914546510672793, "loss": 2.4358, "step": 449600 }, { "epoch": 0.8957230970291981, "grad_norm": 0.17850205302238464, "learning_rate": 0.0005913126677647734, "loss": 2.4554, "step": 449610 }, { "epoch": 0.895743019252837, "grad_norm": 0.22711332142353058, "learning_rate": 0.0005911706987714942, "loss": 2.4463, "step": 449620 }, { "epoch": 0.8957629414764758, "grad_norm": 0.23447123169898987, "learning_rate": 0.0005910287440831166, "loss": 2.4496, "step": 449630 }, { "epoch": 0.8957828637001147, "grad_norm": 0.19919444620609283, "learning_rate": 0.000590886803695317, "loss": 2.4552, "step": 449640 }, { "epoch": 0.8958027859237536, "grad_norm": 0.19616828858852386, "learning_rate": 0.0005907448776037747, "loss": 2.4615, "step": 449650 }, { "epoch": 0.8958227081473926, "grad_norm": 0.18247830867767334, "learning_rate": 0.0005906029658041705, "loss": 2.4673, "step": 449660 }, { "epoch": 0.8958426303710315, "grad_norm": 0.2032465636730194, "learning_rate": 0.000590461068292188, "loss": 2.4624, "step": 449670 }, { "epoch": 0.8958625525946704, "grad_norm": 0.207025408744812, "learning_rate": 0.0005903191850635123, "loss": 2.471, "step": 449680 }, { "epoch": 0.8958824748183093, "grad_norm": 0.19557571411132812, "learning_rate": 0.0005901773161138313, "loss": 2.4519, "step": 449690 }, { "epoch": 0.8959023970419482, "grad_norm": 0.2012237161397934, "learning_rate": 0.0005900354614388346, "loss": 2.4571, "step": 449700 }, { "epoch": 0.8959223192655872, "grad_norm": 0.19410617649555206, "learning_rate": 0.0005898936210342143, "loss": 2.4662, "step": 449710 }, { "epoch": 0.8959422414892261, "grad_norm": 0.20512990653514862, "learning_rate": 0.0005897517948956646, "loss": 2.4621, "step": 449720 }, { "epoch": 0.895962163712865, "grad_norm": 0.19458523392677307, "learning_rate": 0.0005896099830188815, "loss": 2.4666, "step": 449730 }, { "epoch": 0.8959820859365039, "grad_norm": 0.20252814888954163, "learning_rate": 0.0005894681853995635, "loss": 2.4511, "step": 449740 }, { "epoch": 0.8960020081601427, "grad_norm": 0.19587181508541107, "learning_rate": 0.0005893264020334115, "loss": 2.4543, "step": 449750 }, { "epoch": 0.8960219303837818, "grad_norm": 0.17627650499343872, "learning_rate": 0.0005891846329161281, "loss": 2.4456, "step": 449760 }, { "epoch": 0.8960418526074206, "grad_norm": 0.2861989438533783, "learning_rate": 0.0005890428780434185, "loss": 2.4527, "step": 449770 }, { "epoch": 0.8960617748310595, "grad_norm": 0.19048720598220825, "learning_rate": 0.0005889011374109891, "loss": 2.4385, "step": 449780 }, { "epoch": 0.8960816970546984, "grad_norm": 0.1946696639060974, "learning_rate": 0.00058875941101455, "loss": 2.4523, "step": 449790 }, { "epoch": 0.8961016192783374, "grad_norm": 0.19752000272274017, "learning_rate": 0.0005886176988498119, "loss": 2.4653, "step": 449800 }, { "epoch": 0.8961215415019763, "grad_norm": 0.22214117646217346, "learning_rate": 0.0005884760009124889, "loss": 2.4796, "step": 449810 }, { "epoch": 0.8961414637256152, "grad_norm": 0.17158547043800354, "learning_rate": 0.0005883343171982964, "loss": 2.4592, "step": 449820 }, { "epoch": 0.8961613859492541, "grad_norm": 0.1891641914844513, "learning_rate": 0.0005881926477029524, "loss": 2.455, "step": 449830 }, { "epoch": 0.896181308172893, "grad_norm": 0.20695480704307556, "learning_rate": 0.0005880509924221767, "loss": 2.4496, "step": 449840 }, { "epoch": 0.896201230396532, "grad_norm": 0.2006603628396988, "learning_rate": 0.0005879093513516917, "loss": 2.4582, "step": 449850 }, { "epoch": 0.8962211526201709, "grad_norm": 0.19424107670783997, "learning_rate": 0.0005877677244872217, "loss": 2.4567, "step": 449860 }, { "epoch": 0.8962410748438098, "grad_norm": 0.20408056676387787, "learning_rate": 0.000587626111824493, "loss": 2.4595, "step": 449870 }, { "epoch": 0.8962609970674487, "grad_norm": 0.2044089287519455, "learning_rate": 0.0005874845133592339, "loss": 2.4538, "step": 449880 }, { "epoch": 0.8962809192910876, "grad_norm": 0.18089495599269867, "learning_rate": 0.0005873429290871759, "loss": 2.4512, "step": 449890 }, { "epoch": 0.8963008415147266, "grad_norm": 0.18792228400707245, "learning_rate": 0.0005872013590040513, "loss": 2.4523, "step": 449900 }, { "epoch": 0.8963207637383654, "grad_norm": 0.1858002245426178, "learning_rate": 0.000587059803105595, "loss": 2.4528, "step": 449910 }, { "epoch": 0.8963406859620043, "grad_norm": 0.2294221818447113, "learning_rate": 0.0005869182613875446, "loss": 2.4478, "step": 449920 }, { "epoch": 0.8963606081856432, "grad_norm": 0.1901184320449829, "learning_rate": 0.0005867767338456389, "loss": 2.4341, "step": 449930 }, { "epoch": 0.8963805304092821, "grad_norm": 0.2037976086139679, "learning_rate": 0.0005866352204756198, "loss": 2.4545, "step": 449940 }, { "epoch": 0.8964004526329211, "grad_norm": 0.19599807262420654, "learning_rate": 0.0005864937212732304, "loss": 2.4582, "step": 449950 }, { "epoch": 0.89642037485656, "grad_norm": 0.18916381895542145, "learning_rate": 0.0005863522362342166, "loss": 2.4367, "step": 449960 }, { "epoch": 0.8964402970801989, "grad_norm": 0.18032389879226685, "learning_rate": 0.0005862107653543262, "loss": 2.4544, "step": 449970 }, { "epoch": 0.8964602193038378, "grad_norm": 0.20874778926372528, "learning_rate": 0.000586069308629309, "loss": 2.4722, "step": 449980 }, { "epoch": 0.8964801415274767, "grad_norm": 0.18461467325687408, "learning_rate": 0.0005859278660549172, "loss": 2.4613, "step": 449990 }, { "epoch": 0.8965000637511157, "grad_norm": 0.20701640844345093, "learning_rate": 0.0005857864376269049, "loss": 2.4644, "step": 450000 }, { "epoch": 0.8965199859747546, "grad_norm": 0.23410438001155853, "learning_rate": 0.0005856450233410284, "loss": 2.4576, "step": 450010 }, { "epoch": 0.8965399081983935, "grad_norm": 0.1864277422428131, "learning_rate": 0.0005855036231930461, "loss": 2.4554, "step": 450020 }, { "epoch": 0.8965598304220324, "grad_norm": 0.19412103295326233, "learning_rate": 0.0005853622371787186, "loss": 2.4641, "step": 450030 }, { "epoch": 0.8965797526456712, "grad_norm": 0.18610645830631256, "learning_rate": 0.0005852208652938085, "loss": 2.4369, "step": 450040 }, { "epoch": 0.8965996748693102, "grad_norm": 0.19009439647197723, "learning_rate": 0.0005850795075340807, "loss": 2.4577, "step": 450050 }, { "epoch": 0.8966195970929491, "grad_norm": 0.20017972588539124, "learning_rate": 0.0005849381638953018, "loss": 2.4625, "step": 450060 }, { "epoch": 0.896639519316588, "grad_norm": 0.21201471984386444, "learning_rate": 0.0005847968343732408, "loss": 2.4681, "step": 450070 }, { "epoch": 0.8966594415402269, "grad_norm": 0.17373351752758026, "learning_rate": 0.0005846555189636694, "loss": 2.47, "step": 450080 }, { "epoch": 0.8966793637638659, "grad_norm": 0.20514973998069763, "learning_rate": 0.0005845142176623603, "loss": 2.462, "step": 450090 }, { "epoch": 0.8966992859875048, "grad_norm": 0.18783041834831238, "learning_rate": 0.0005843729304650889, "loss": 2.4671, "step": 450100 }, { "epoch": 0.8967192082111437, "grad_norm": 0.17983950674533844, "learning_rate": 0.0005842316573676327, "loss": 2.4343, "step": 450110 }, { "epoch": 0.8967391304347826, "grad_norm": 0.19596920907497406, "learning_rate": 0.0005840903983657715, "loss": 2.4613, "step": 450120 }, { "epoch": 0.8967590526584215, "grad_norm": 0.17772796750068665, "learning_rate": 0.0005839491534552865, "loss": 2.4804, "step": 450130 }, { "epoch": 0.8967789748820605, "grad_norm": 0.1892510950565338, "learning_rate": 0.0005838079226319617, "loss": 2.4593, "step": 450140 }, { "epoch": 0.8967988971056994, "grad_norm": 0.19912897050380707, "learning_rate": 0.000583666705891583, "loss": 2.4471, "step": 450150 }, { "epoch": 0.8968188193293383, "grad_norm": 0.2012328952550888, "learning_rate": 0.0005835255032299381, "loss": 2.4627, "step": 450160 }, { "epoch": 0.8968387415529772, "grad_norm": 0.17686332762241364, "learning_rate": 0.0005833843146428175, "loss": 2.4547, "step": 450170 }, { "epoch": 0.896858663776616, "grad_norm": 0.201583132147789, "learning_rate": 0.000583243140126013, "loss": 2.4391, "step": 450180 }, { "epoch": 0.896878586000255, "grad_norm": 0.19314642250537872, "learning_rate": 0.000583101979675319, "loss": 2.4579, "step": 450190 }, { "epoch": 0.8968985082238939, "grad_norm": 0.18378955125808716, "learning_rate": 0.0005829608332865319, "loss": 2.4661, "step": 450200 }, { "epoch": 0.8969184304475328, "grad_norm": 0.1941070407629013, "learning_rate": 0.0005828197009554501, "loss": 2.4614, "step": 450210 }, { "epoch": 0.8969383526711717, "grad_norm": 0.19545836746692657, "learning_rate": 0.000582678582677874, "loss": 2.4531, "step": 450220 }, { "epoch": 0.8969582748948106, "grad_norm": 0.2013760358095169, "learning_rate": 0.0005825374784496065, "loss": 2.4528, "step": 450230 }, { "epoch": 0.8969781971184496, "grad_norm": 0.19372576475143433, "learning_rate": 0.000582396388266452, "loss": 2.4631, "step": 450240 }, { "epoch": 0.8969981193420885, "grad_norm": 0.19249212741851807, "learning_rate": 0.0005822553121242176, "loss": 2.4683, "step": 450250 }, { "epoch": 0.8970180415657274, "grad_norm": 0.18002116680145264, "learning_rate": 0.0005821142500187118, "loss": 2.4675, "step": 450260 }, { "epoch": 0.8970379637893663, "grad_norm": 0.18286144733428955, "learning_rate": 0.000581973201945746, "loss": 2.4396, "step": 450270 }, { "epoch": 0.8970578860130052, "grad_norm": 0.20485462248325348, "learning_rate": 0.0005818321679011331, "loss": 2.4673, "step": 450280 }, { "epoch": 0.8970778082366442, "grad_norm": 0.18899758160114288, "learning_rate": 0.0005816911478806881, "loss": 2.4768, "step": 450290 }, { "epoch": 0.8970977304602831, "grad_norm": 0.17099718749523163, "learning_rate": 0.0005815501418802285, "loss": 2.4608, "step": 450300 }, { "epoch": 0.897117652683922, "grad_norm": 0.1747099608182907, "learning_rate": 0.0005814091498955733, "loss": 2.4675, "step": 450310 }, { "epoch": 0.8971375749075609, "grad_norm": 0.18595029413700104, "learning_rate": 0.0005812681719225441, "loss": 2.4616, "step": 450320 }, { "epoch": 0.8971574971311997, "grad_norm": 0.1894521862268448, "learning_rate": 0.0005811272079569642, "loss": 2.448, "step": 450330 }, { "epoch": 0.8971774193548387, "grad_norm": 0.18729770183563232, "learning_rate": 0.0005809862579946592, "loss": 2.45, "step": 450340 }, { "epoch": 0.8971973415784776, "grad_norm": 0.20195844769477844, "learning_rate": 0.0005808453220314567, "loss": 2.4448, "step": 450350 }, { "epoch": 0.8972172638021165, "grad_norm": 0.19401606917381287, "learning_rate": 0.0005807044000631863, "loss": 2.4439, "step": 450360 }, { "epoch": 0.8972371860257554, "grad_norm": 0.17976869642734528, "learning_rate": 0.0005805634920856797, "loss": 2.4654, "step": 450370 }, { "epoch": 0.8972571082493944, "grad_norm": 0.21491703391075134, "learning_rate": 0.000580422598094771, "loss": 2.4442, "step": 450380 }, { "epoch": 0.8972770304730333, "grad_norm": 0.1817806512117386, "learning_rate": 0.0005802817180862958, "loss": 2.4574, "step": 450390 }, { "epoch": 0.8972969526966722, "grad_norm": 0.21866677701473236, "learning_rate": 0.0005801408520560923, "loss": 2.4524, "step": 450400 }, { "epoch": 0.8973168749203111, "grad_norm": 0.18182925879955292, "learning_rate": 0.0005800000000000001, "loss": 2.4512, "step": 450410 }, { "epoch": 0.89733679714395, "grad_norm": 0.18246255815029144, "learning_rate": 0.0005798591619138616, "loss": 2.4663, "step": 450420 }, { "epoch": 0.897356719367589, "grad_norm": 0.19667106866836548, "learning_rate": 0.0005797183377935207, "loss": 2.4434, "step": 450430 }, { "epoch": 0.8973766415912279, "grad_norm": 0.19261594116687775, "learning_rate": 0.0005795775276348238, "loss": 2.4473, "step": 450440 }, { "epoch": 0.8973965638148668, "grad_norm": 0.19791488349437714, "learning_rate": 0.0005794367314336191, "loss": 2.4557, "step": 450450 }, { "epoch": 0.8974164860385057, "grad_norm": 0.20586960017681122, "learning_rate": 0.0005792959491857565, "loss": 2.4589, "step": 450460 }, { "epoch": 0.8974364082621445, "grad_norm": 0.21224777400493622, "learning_rate": 0.0005791551808870892, "loss": 2.4441, "step": 450470 }, { "epoch": 0.8974563304857835, "grad_norm": 0.2023608237504959, "learning_rate": 0.0005790144265334712, "loss": 2.4508, "step": 450480 }, { "epoch": 0.8974762527094224, "grad_norm": 0.18152210116386414, "learning_rate": 0.0005788736861207587, "loss": 2.437, "step": 450490 }, { "epoch": 0.8974961749330613, "grad_norm": 0.1714843511581421, "learning_rate": 0.0005787329596448105, "loss": 2.4626, "step": 450500 }, { "epoch": 0.8975160971567002, "grad_norm": 0.17674657702445984, "learning_rate": 0.000578592247101487, "loss": 2.4533, "step": 450510 }, { "epoch": 0.8975360193803391, "grad_norm": 0.2033287137746811, "learning_rate": 0.0005784515484866513, "loss": 2.4513, "step": 450520 }, { "epoch": 0.8975559416039781, "grad_norm": 0.2007274329662323, "learning_rate": 0.0005783108637961674, "loss": 2.4409, "step": 450530 }, { "epoch": 0.897575863827617, "grad_norm": 0.17972423136234283, "learning_rate": 0.0005781701930259023, "loss": 2.4331, "step": 450540 }, { "epoch": 0.8975957860512559, "grad_norm": 0.22641921043395996, "learning_rate": 0.0005780295361717249, "loss": 2.4662, "step": 450550 }, { "epoch": 0.8976157082748948, "grad_norm": 0.18772536516189575, "learning_rate": 0.0005778888932295057, "loss": 2.4629, "step": 450560 }, { "epoch": 0.8976356304985337, "grad_norm": 0.19304482638835907, "learning_rate": 0.0005777482641951179, "loss": 2.4395, "step": 450570 }, { "epoch": 0.8976555527221727, "grad_norm": 0.17798694968223572, "learning_rate": 0.0005776076490644362, "loss": 2.4778, "step": 450580 }, { "epoch": 0.8976754749458116, "grad_norm": 0.17982329428195953, "learning_rate": 0.0005774670478333375, "loss": 2.453, "step": 450590 }, { "epoch": 0.8976953971694505, "grad_norm": 0.19532346725463867, "learning_rate": 0.0005773264604977009, "loss": 2.4607, "step": 450600 }, { "epoch": 0.8977153193930894, "grad_norm": 0.21607455611228943, "learning_rate": 0.000577185887053407, "loss": 2.4536, "step": 450610 }, { "epoch": 0.8977352416167282, "grad_norm": 0.19056640565395355, "learning_rate": 0.0005770453274963394, "loss": 2.4565, "step": 450620 }, { "epoch": 0.8977551638403672, "grad_norm": 0.18906991183757782, "learning_rate": 0.0005769047818223827, "loss": 2.456, "step": 450630 }, { "epoch": 0.8977750860640061, "grad_norm": 0.19680774211883545, "learning_rate": 0.0005767642500274243, "loss": 2.4487, "step": 450640 }, { "epoch": 0.897795008287645, "grad_norm": 0.21629372239112854, "learning_rate": 0.0005766237321073533, "loss": 2.4407, "step": 450650 }, { "epoch": 0.8978149305112839, "grad_norm": 0.19398118555545807, "learning_rate": 0.0005764832280580603, "loss": 2.4643, "step": 450660 }, { "epoch": 0.8978348527349228, "grad_norm": 0.22105374932289124, "learning_rate": 0.0005763427378754393, "loss": 2.4613, "step": 450670 }, { "epoch": 0.8978547749585618, "grad_norm": 0.18197517096996307, "learning_rate": 0.000576202261555385, "loss": 2.4504, "step": 450680 }, { "epoch": 0.8978746971822007, "grad_norm": 0.18838419020175934, "learning_rate": 0.0005760617990937948, "loss": 2.4537, "step": 450690 }, { "epoch": 0.8978946194058396, "grad_norm": 0.19798241555690765, "learning_rate": 0.0005759213504865681, "loss": 2.4534, "step": 450700 }, { "epoch": 0.8979145416294785, "grad_norm": 0.19582200050354004, "learning_rate": 0.0005757809157296059, "loss": 2.4525, "step": 450710 }, { "epoch": 0.8979344638531175, "grad_norm": 0.1927594542503357, "learning_rate": 0.0005756404948188117, "loss": 2.4523, "step": 450720 }, { "epoch": 0.8979543860767564, "grad_norm": 0.1974450796842575, "learning_rate": 0.0005755000877500905, "loss": 2.4589, "step": 450730 }, { "epoch": 0.8979743083003953, "grad_norm": 0.20104533433914185, "learning_rate": 0.00057535969451935, "loss": 2.4615, "step": 450740 }, { "epoch": 0.8979942305240342, "grad_norm": 0.18897204101085663, "learning_rate": 0.0005752193151224994, "loss": 2.4682, "step": 450750 }, { "epoch": 0.898014152747673, "grad_norm": 0.20858514308929443, "learning_rate": 0.0005750789495554498, "loss": 2.4594, "step": 450760 }, { "epoch": 0.898034074971312, "grad_norm": 0.19269147515296936, "learning_rate": 0.0005749385978141153, "loss": 2.4554, "step": 450770 }, { "epoch": 0.8980539971949509, "grad_norm": 0.17957141995429993, "learning_rate": 0.0005747982598944106, "loss": 2.4527, "step": 450780 }, { "epoch": 0.8980739194185898, "grad_norm": 0.1980765014886856, "learning_rate": 0.0005746579357922535, "loss": 2.442, "step": 450790 }, { "epoch": 0.8980938416422287, "grad_norm": 0.217368021607399, "learning_rate": 0.0005745176255035631, "loss": 2.4463, "step": 450800 }, { "epoch": 0.8981137638658676, "grad_norm": 0.19982244074344635, "learning_rate": 0.0005743773290242611, "loss": 2.4622, "step": 450810 }, { "epoch": 0.8981336860895066, "grad_norm": 0.19049212336540222, "learning_rate": 0.0005742370463502709, "loss": 2.4424, "step": 450820 }, { "epoch": 0.8981536083131455, "grad_norm": 0.18477670848369598, "learning_rate": 0.0005740967774775177, "loss": 2.4492, "step": 450830 }, { "epoch": 0.8981735305367844, "grad_norm": 0.18565575778484344, "learning_rate": 0.0005739565224019289, "loss": 2.4623, "step": 450840 }, { "epoch": 0.8981934527604233, "grad_norm": 0.2033805549144745, "learning_rate": 0.0005738162811194345, "loss": 2.4617, "step": 450850 }, { "epoch": 0.8982133749840622, "grad_norm": 0.187417671084404, "learning_rate": 0.0005736760536259653, "loss": 2.4539, "step": 450860 }, { "epoch": 0.8982332972077012, "grad_norm": 0.18725886940956116, "learning_rate": 0.000573535839917455, "loss": 2.4777, "step": 450870 }, { "epoch": 0.8982532194313401, "grad_norm": 0.19888316094875336, "learning_rate": 0.0005733956399898392, "loss": 2.4529, "step": 450880 }, { "epoch": 0.898273141654979, "grad_norm": 0.26543331146240234, "learning_rate": 0.0005732554538390553, "loss": 2.457, "step": 450890 }, { "epoch": 0.8982930638786178, "grad_norm": 0.1749354600906372, "learning_rate": 0.0005731152814610425, "loss": 2.4402, "step": 450900 }, { "epoch": 0.8983129861022567, "grad_norm": 0.2017371952533722, "learning_rate": 0.0005729751228517423, "loss": 2.4643, "step": 450910 }, { "epoch": 0.8983329083258957, "grad_norm": 0.2189386785030365, "learning_rate": 0.0005728349780070983, "loss": 2.444, "step": 450920 }, { "epoch": 0.8983528305495346, "grad_norm": 0.21731823682785034, "learning_rate": 0.0005726948469230556, "loss": 2.4569, "step": 450930 }, { "epoch": 0.8983727527731735, "grad_norm": 0.20966406166553497, "learning_rate": 0.0005725547295955618, "loss": 2.4437, "step": 450940 }, { "epoch": 0.8983926749968124, "grad_norm": 0.188908189535141, "learning_rate": 0.0005724146260205663, "loss": 2.4476, "step": 450950 }, { "epoch": 0.8984125972204513, "grad_norm": 0.18650291860103607, "learning_rate": 0.0005722745361940203, "loss": 2.4441, "step": 450960 }, { "epoch": 0.8984325194440903, "grad_norm": 0.20454083383083344, "learning_rate": 0.0005721344601118772, "loss": 2.4548, "step": 450970 }, { "epoch": 0.8984524416677292, "grad_norm": 0.20694221556186676, "learning_rate": 0.0005719943977700927, "loss": 2.4447, "step": 450980 }, { "epoch": 0.8984723638913681, "grad_norm": 0.19126836955547333, "learning_rate": 0.000571854349164624, "loss": 2.4617, "step": 450990 }, { "epoch": 0.898492286115007, "grad_norm": 0.18278874456882477, "learning_rate": 0.0005717143142914301, "loss": 2.4438, "step": 451000 }, { "epoch": 0.898512208338646, "grad_norm": 0.17783701419830322, "learning_rate": 0.0005715742931464724, "loss": 2.4369, "step": 451010 }, { "epoch": 0.8985321305622849, "grad_norm": 0.18772132694721222, "learning_rate": 0.0005714342857257145, "loss": 2.4505, "step": 451020 }, { "epoch": 0.8985520527859238, "grad_norm": 0.19157752394676208, "learning_rate": 0.0005712942920251212, "loss": 2.4574, "step": 451030 }, { "epoch": 0.8985719750095627, "grad_norm": 0.20694327354431152, "learning_rate": 0.00057115431204066, "loss": 2.4561, "step": 451040 }, { "epoch": 0.8985918972332015, "grad_norm": 0.21721379458904266, "learning_rate": 0.0005710143457683, "loss": 2.4445, "step": 451050 }, { "epoch": 0.8986118194568405, "grad_norm": 0.17436595261096954, "learning_rate": 0.0005708743932040124, "loss": 2.4748, "step": 451060 }, { "epoch": 0.8986317416804794, "grad_norm": 0.18574689328670502, "learning_rate": 0.0005707344543437702, "loss": 2.4482, "step": 451070 }, { "epoch": 0.8986516639041183, "grad_norm": 0.2151842713356018, "learning_rate": 0.0005705945291835491, "loss": 2.4674, "step": 451080 }, { "epoch": 0.8986715861277572, "grad_norm": 0.18244877457618713, "learning_rate": 0.0005704546177193255, "loss": 2.4643, "step": 451090 }, { "epoch": 0.8986915083513961, "grad_norm": 0.18569415807724, "learning_rate": 0.0005703147199470787, "loss": 2.4446, "step": 451100 }, { "epoch": 0.8987114305750351, "grad_norm": 0.20409466326236725, "learning_rate": 0.0005701748358627898, "loss": 2.4402, "step": 451110 }, { "epoch": 0.898731352798674, "grad_norm": 0.18366314470767975, "learning_rate": 0.000570034965462442, "loss": 2.4594, "step": 451120 }, { "epoch": 0.8987512750223129, "grad_norm": 0.198212668299675, "learning_rate": 0.0005698951087420197, "loss": 2.4604, "step": 451130 }, { "epoch": 0.8987711972459518, "grad_norm": 0.18951359391212463, "learning_rate": 0.0005697552656975102, "loss": 2.4465, "step": 451140 }, { "epoch": 0.8987911194695907, "grad_norm": 0.1978580206632614, "learning_rate": 0.0005696154363249022, "loss": 2.442, "step": 451150 }, { "epoch": 0.8988110416932297, "grad_norm": 0.2195516675710678, "learning_rate": 0.0005694756206201866, "loss": 2.4621, "step": 451160 }, { "epoch": 0.8988309639168686, "grad_norm": 0.18755914270877838, "learning_rate": 0.0005693358185793564, "loss": 2.4552, "step": 451170 }, { "epoch": 0.8988508861405075, "grad_norm": 0.18765120208263397, "learning_rate": 0.0005691960301984063, "loss": 2.465, "step": 451180 }, { "epoch": 0.8988708083641463, "grad_norm": 0.20982930064201355, "learning_rate": 0.0005690562554733328, "loss": 2.4695, "step": 451190 }, { "epoch": 0.8988907305877852, "grad_norm": 0.20890912413597107, "learning_rate": 0.0005689164944001346, "loss": 2.4569, "step": 451200 }, { "epoch": 0.8989106528114242, "grad_norm": 0.187676802277565, "learning_rate": 0.0005687767469748124, "loss": 2.4465, "step": 451210 }, { "epoch": 0.8989305750350631, "grad_norm": 0.1865345686674118, "learning_rate": 0.0005686370131933689, "loss": 2.457, "step": 451220 }, { "epoch": 0.898950497258702, "grad_norm": 0.1927390843629837, "learning_rate": 0.0005684972930518085, "loss": 2.4548, "step": 451230 }, { "epoch": 0.8989704194823409, "grad_norm": 0.20030896365642548, "learning_rate": 0.0005683575865461376, "loss": 2.4711, "step": 451240 }, { "epoch": 0.8989903417059798, "grad_norm": 0.2106715738773346, "learning_rate": 0.0005682178936723647, "loss": 2.4518, "step": 451250 }, { "epoch": 0.8990102639296188, "grad_norm": 0.19887448847293854, "learning_rate": 0.0005680782144265002, "loss": 2.4626, "step": 451260 }, { "epoch": 0.8990301861532577, "grad_norm": 0.1831067055463791, "learning_rate": 0.0005679385488045563, "loss": 2.4426, "step": 451270 }, { "epoch": 0.8990501083768966, "grad_norm": 0.1858729124069214, "learning_rate": 0.0005677988968025474, "loss": 2.4566, "step": 451280 }, { "epoch": 0.8990700306005355, "grad_norm": 0.17507916688919067, "learning_rate": 0.0005676592584164897, "loss": 2.4528, "step": 451290 }, { "epoch": 0.8990899528241745, "grad_norm": 0.1826171875, "learning_rate": 0.0005675196336424013, "loss": 2.4545, "step": 451300 }, { "epoch": 0.8991098750478134, "grad_norm": 0.2175087183713913, "learning_rate": 0.0005673800224763024, "loss": 2.4496, "step": 451310 }, { "epoch": 0.8991297972714523, "grad_norm": 0.19759540259838104, "learning_rate": 0.0005672404249142147, "loss": 2.4497, "step": 451320 }, { "epoch": 0.8991497194950911, "grad_norm": 0.22000139951705933, "learning_rate": 0.0005671008409521626, "loss": 2.4658, "step": 451330 }, { "epoch": 0.89916964171873, "grad_norm": 0.19496189057826996, "learning_rate": 0.0005669612705861716, "loss": 2.4481, "step": 451340 }, { "epoch": 0.899189563942369, "grad_norm": 0.1772204339504242, "learning_rate": 0.0005668217138122697, "loss": 2.4563, "step": 451350 }, { "epoch": 0.8992094861660079, "grad_norm": 0.19874250888824463, "learning_rate": 0.0005666821706264867, "loss": 2.4434, "step": 451360 }, { "epoch": 0.8992294083896468, "grad_norm": 0.1842503547668457, "learning_rate": 0.0005665426410248542, "loss": 2.4624, "step": 451370 }, { "epoch": 0.8992493306132857, "grad_norm": 0.19719642400741577, "learning_rate": 0.0005664031250034059, "loss": 2.4531, "step": 451380 }, { "epoch": 0.8992692528369246, "grad_norm": 0.2097540944814682, "learning_rate": 0.0005662636225581774, "loss": 2.4446, "step": 451390 }, { "epoch": 0.8992891750605636, "grad_norm": 0.2355320006608963, "learning_rate": 0.0005661241336852063, "loss": 2.4503, "step": 451400 }, { "epoch": 0.8993090972842025, "grad_norm": 0.1993074268102646, "learning_rate": 0.0005659846583805319, "loss": 2.4505, "step": 451410 }, { "epoch": 0.8993290195078414, "grad_norm": 0.1956937313079834, "learning_rate": 0.0005658451966401954, "loss": 2.4639, "step": 451420 }, { "epoch": 0.8993489417314803, "grad_norm": 0.19398047029972076, "learning_rate": 0.0005657057484602402, "loss": 2.4511, "step": 451430 }, { "epoch": 0.8993688639551192, "grad_norm": 0.18594932556152344, "learning_rate": 0.0005655663138367115, "loss": 2.4653, "step": 451440 }, { "epoch": 0.8993887861787582, "grad_norm": 0.2130926102399826, "learning_rate": 0.0005654268927656563, "loss": 2.4596, "step": 451450 }, { "epoch": 0.8994087084023971, "grad_norm": 0.19360363483428955, "learning_rate": 0.0005652874852431238, "loss": 2.4732, "step": 451460 }, { "epoch": 0.899428630626036, "grad_norm": 0.1971202939748764, "learning_rate": 0.0005651480912651647, "loss": 2.4472, "step": 451470 }, { "epoch": 0.8994485528496748, "grad_norm": 0.19820819795131683, "learning_rate": 0.0005650087108278321, "loss": 2.4622, "step": 451480 }, { "epoch": 0.8994684750733137, "grad_norm": 0.19447539746761322, "learning_rate": 0.0005648693439271811, "loss": 2.4522, "step": 451490 }, { "epoch": 0.8994883972969527, "grad_norm": 0.18800917267799377, "learning_rate": 0.0005647299905592676, "loss": 2.4411, "step": 451500 }, { "epoch": 0.8995083195205916, "grad_norm": 0.18486498296260834, "learning_rate": 0.0005645906507201508, "loss": 2.4557, "step": 451510 }, { "epoch": 0.8995282417442305, "grad_norm": 0.2147798091173172, "learning_rate": 0.0005644513244058911, "loss": 2.4534, "step": 451520 }, { "epoch": 0.8995481639678694, "grad_norm": 0.18444961309432983, "learning_rate": 0.0005643120116125511, "loss": 2.4623, "step": 451530 }, { "epoch": 0.8995680861915083, "grad_norm": 0.21053563058376312, "learning_rate": 0.0005641727123361949, "loss": 2.4677, "step": 451540 }, { "epoch": 0.8995880084151473, "grad_norm": 0.19920064508914948, "learning_rate": 0.0005640334265728886, "loss": 2.461, "step": 451550 }, { "epoch": 0.8996079306387862, "grad_norm": 0.21947957575321198, "learning_rate": 0.0005638941543187008, "loss": 2.4466, "step": 451560 }, { "epoch": 0.8996278528624251, "grad_norm": 0.18988928198814392, "learning_rate": 0.0005637548955697012, "loss": 2.4587, "step": 451570 }, { "epoch": 0.899647775086064, "grad_norm": 0.1869768351316452, "learning_rate": 0.000563615650321962, "loss": 2.4553, "step": 451580 }, { "epoch": 0.899667697309703, "grad_norm": 0.2000301033258438, "learning_rate": 0.0005634764185715571, "loss": 2.4574, "step": 451590 }, { "epoch": 0.8996876195333419, "grad_norm": 0.2085288017988205, "learning_rate": 0.0005633372003145623, "loss": 2.457, "step": 451600 }, { "epoch": 0.8997075417569808, "grad_norm": 0.20412315428256989, "learning_rate": 0.0005631979955470551, "loss": 2.4372, "step": 451610 }, { "epoch": 0.8997274639806196, "grad_norm": 0.21524256467819214, "learning_rate": 0.0005630588042651155, "loss": 2.4459, "step": 451620 }, { "epoch": 0.8997473862042585, "grad_norm": 0.18696652352809906, "learning_rate": 0.0005629196264648244, "loss": 2.4517, "step": 451630 }, { "epoch": 0.8997673084278975, "grad_norm": 0.19121624529361725, "learning_rate": 0.0005627804621422657, "loss": 2.4638, "step": 451640 }, { "epoch": 0.8997872306515364, "grad_norm": 0.19348399341106415, "learning_rate": 0.0005626413112935241, "loss": 2.4615, "step": 451650 }, { "epoch": 0.8998071528751753, "grad_norm": 0.20057262480258942, "learning_rate": 0.0005625021739146874, "loss": 2.4548, "step": 451660 }, { "epoch": 0.8998270750988142, "grad_norm": 0.22579902410507202, "learning_rate": 0.0005623630500018442, "loss": 2.4634, "step": 451670 }, { "epoch": 0.8998469973224531, "grad_norm": 0.21222475171089172, "learning_rate": 0.0005622239395510857, "loss": 2.4366, "step": 451680 }, { "epoch": 0.8998669195460921, "grad_norm": 0.1886952519416809, "learning_rate": 0.0005620848425585048, "loss": 2.4462, "step": 451690 }, { "epoch": 0.899886841769731, "grad_norm": 0.19261983036994934, "learning_rate": 0.000561945759020196, "loss": 2.4622, "step": 451700 }, { "epoch": 0.8999067639933699, "grad_norm": 0.17774201929569244, "learning_rate": 0.0005618066889322562, "loss": 2.439, "step": 451710 }, { "epoch": 0.8999266862170088, "grad_norm": 0.22564832866191864, "learning_rate": 0.0005616676322907839, "loss": 2.4434, "step": 451720 }, { "epoch": 0.8999466084406477, "grad_norm": 0.1929023116827011, "learning_rate": 0.0005615285890918791, "loss": 2.4528, "step": 451730 }, { "epoch": 0.8999665306642867, "grad_norm": 0.20327380299568176, "learning_rate": 0.0005613895593316445, "loss": 2.4557, "step": 451740 }, { "epoch": 0.8999864528879256, "grad_norm": 0.21863925457000732, "learning_rate": 0.0005612505430061843, "loss": 2.448, "step": 451750 }, { "epoch": 0.9000063751115644, "grad_norm": 0.18261504173278809, "learning_rate": 0.0005611115401116043, "loss": 2.4452, "step": 451760 }, { "epoch": 0.9000262973352033, "grad_norm": 0.1923189014196396, "learning_rate": 0.0005609725506440122, "loss": 2.4653, "step": 451770 }, { "epoch": 0.9000462195588422, "grad_norm": 0.18680353462696075, "learning_rate": 0.0005608335745995183, "loss": 2.4372, "step": 451780 }, { "epoch": 0.9000661417824812, "grad_norm": 0.17182467877864838, "learning_rate": 0.0005606946119742342, "loss": 2.4493, "step": 451790 }, { "epoch": 0.9000860640061201, "grad_norm": 0.1908724457025528, "learning_rate": 0.0005605556627642734, "loss": 2.462, "step": 451800 }, { "epoch": 0.900105986229759, "grad_norm": 0.2071850746870041, "learning_rate": 0.0005604167269657514, "loss": 2.434, "step": 451810 }, { "epoch": 0.9001259084533979, "grad_norm": 0.20451146364212036, "learning_rate": 0.0005602778045747854, "loss": 2.446, "step": 451820 }, { "epoch": 0.9001458306770368, "grad_norm": 0.19302622973918915, "learning_rate": 0.0005601388955874946, "loss": 2.4545, "step": 451830 }, { "epoch": 0.9001657529006758, "grad_norm": 0.20317789912223816, "learning_rate": 0.0005600000000000001, "loss": 2.4704, "step": 451840 }, { "epoch": 0.9001856751243147, "grad_norm": 0.21801325678825378, "learning_rate": 0.0005598611178084247, "loss": 2.4484, "step": 451850 }, { "epoch": 0.9002055973479536, "grad_norm": 0.19955936074256897, "learning_rate": 0.0005597222490088936, "loss": 2.4635, "step": 451860 }, { "epoch": 0.9002255195715925, "grad_norm": 0.2089611142873764, "learning_rate": 0.0005595833935975327, "loss": 2.4592, "step": 451870 }, { "epoch": 0.9002454417952315, "grad_norm": 0.19915688037872314, "learning_rate": 0.0005594445515704713, "loss": 2.4541, "step": 451880 }, { "epoch": 0.9002653640188704, "grad_norm": 0.19175240397453308, "learning_rate": 0.0005593057229238397, "loss": 2.4518, "step": 451890 }, { "epoch": 0.9002852862425093, "grad_norm": 0.2020522803068161, "learning_rate": 0.00055916690765377, "loss": 2.4446, "step": 451900 }, { "epoch": 0.9003052084661481, "grad_norm": 0.20026147365570068, "learning_rate": 0.0005590281057563962, "loss": 2.4614, "step": 451910 }, { "epoch": 0.900325130689787, "grad_norm": 0.21585597097873688, "learning_rate": 0.0005588893172278544, "loss": 2.4567, "step": 451920 }, { "epoch": 0.900345052913426, "grad_norm": 0.19411544501781464, "learning_rate": 0.0005587505420642825, "loss": 2.4513, "step": 451930 }, { "epoch": 0.9003649751370649, "grad_norm": 0.20152220129966736, "learning_rate": 0.00055861178026182, "loss": 2.4659, "step": 451940 }, { "epoch": 0.9003848973607038, "grad_norm": 0.19657984375953674, "learning_rate": 0.0005584730318166087, "loss": 2.4517, "step": 451950 }, { "epoch": 0.9004048195843427, "grad_norm": 0.1947202980518341, "learning_rate": 0.0005583342967247922, "loss": 2.4441, "step": 451960 }, { "epoch": 0.9004247418079816, "grad_norm": 0.17761097848415375, "learning_rate": 0.0005581955749825152, "loss": 2.4646, "step": 451970 }, { "epoch": 0.9004446640316206, "grad_norm": 0.19164258241653442, "learning_rate": 0.0005580568665859253, "loss": 2.4481, "step": 451980 }, { "epoch": 0.9004645862552595, "grad_norm": 0.18174391984939575, "learning_rate": 0.0005579181715311714, "loss": 2.4526, "step": 451990 }, { "epoch": 0.9004845084788984, "grad_norm": 0.19785037636756897, "learning_rate": 0.0005577794898144042, "loss": 2.4558, "step": 452000 }, { "epoch": 0.9005044307025373, "grad_norm": 0.2174323946237564, "learning_rate": 0.0005576408214317767, "loss": 2.4434, "step": 452010 }, { "epoch": 0.9005243529261762, "grad_norm": 0.23530153930187225, "learning_rate": 0.0005575021663794431, "loss": 2.4434, "step": 452020 }, { "epoch": 0.9005442751498152, "grad_norm": 0.17477528750896454, "learning_rate": 0.00055736352465356, "loss": 2.4655, "step": 452030 }, { "epoch": 0.9005641973734541, "grad_norm": 0.19927386939525604, "learning_rate": 0.0005572248962502853, "loss": 2.4391, "step": 452040 }, { "epoch": 0.900584119597093, "grad_norm": 0.18382486701011658, "learning_rate": 0.0005570862811657795, "loss": 2.4608, "step": 452050 }, { "epoch": 0.9006040418207318, "grad_norm": 0.19665896892547607, "learning_rate": 0.0005569476793962042, "loss": 2.4489, "step": 452060 }, { "epoch": 0.9006239640443707, "grad_norm": 0.3223107159137726, "learning_rate": 0.0005568090909377235, "loss": 2.4368, "step": 452070 }, { "epoch": 0.9006438862680097, "grad_norm": 0.19223153591156006, "learning_rate": 0.0005566705157865026, "loss": 2.4554, "step": 452080 }, { "epoch": 0.9006638084916486, "grad_norm": 0.2016596794128418, "learning_rate": 0.0005565319539387095, "loss": 2.4604, "step": 452090 }, { "epoch": 0.9006837307152875, "grad_norm": 0.20837055146694183, "learning_rate": 0.0005563934053905129, "loss": 2.4569, "step": 452100 }, { "epoch": 0.9007036529389264, "grad_norm": 0.20427067577838898, "learning_rate": 0.0005562548701380843, "loss": 2.4581, "step": 452110 }, { "epoch": 0.9007235751625653, "grad_norm": 0.19529719650745392, "learning_rate": 0.0005561163481775964, "loss": 2.4431, "step": 452120 }, { "epoch": 0.9007434973862043, "grad_norm": 0.19504962861537933, "learning_rate": 0.0005559778395052242, "loss": 2.4509, "step": 452130 }, { "epoch": 0.9007634196098432, "grad_norm": 0.1762164831161499, "learning_rate": 0.0005558393441171443, "loss": 2.4507, "step": 452140 }, { "epoch": 0.9007833418334821, "grad_norm": 0.1923043131828308, "learning_rate": 0.0005557008620095352, "loss": 2.452, "step": 452150 }, { "epoch": 0.900803264057121, "grad_norm": 0.20624177157878876, "learning_rate": 0.0005555623931785769, "loss": 2.4559, "step": 452160 }, { "epoch": 0.90082318628076, "grad_norm": 0.18906237185001373, "learning_rate": 0.000555423937620452, "loss": 2.4601, "step": 452170 }, { "epoch": 0.9008431085043989, "grad_norm": 0.18926797807216644, "learning_rate": 0.000555285495331344, "loss": 2.436, "step": 452180 }, { "epoch": 0.9008630307280378, "grad_norm": 0.204231396317482, "learning_rate": 0.0005551470663074389, "loss": 2.4649, "step": 452190 }, { "epoch": 0.9008829529516766, "grad_norm": 0.21435490250587463, "learning_rate": 0.0005550086505449245, "loss": 2.4579, "step": 452200 }, { "epoch": 0.9009028751753155, "grad_norm": 0.19120974838733673, "learning_rate": 0.0005548702480399901, "loss": 2.4495, "step": 452210 }, { "epoch": 0.9009227973989545, "grad_norm": 0.18114329874515533, "learning_rate": 0.0005547318587888265, "loss": 2.456, "step": 452220 }, { "epoch": 0.9009427196225934, "grad_norm": 0.18280069530010223, "learning_rate": 0.0005545934827876277, "loss": 2.4508, "step": 452230 }, { "epoch": 0.9009626418462323, "grad_norm": 0.18815423548221588, "learning_rate": 0.0005544551200325878, "loss": 2.46, "step": 452240 }, { "epoch": 0.9009825640698712, "grad_norm": 0.17746247351169586, "learning_rate": 0.000554316770519904, "loss": 2.4595, "step": 452250 }, { "epoch": 0.9010024862935101, "grad_norm": 0.17943242192268372, "learning_rate": 0.0005541784342457748, "loss": 2.4643, "step": 452260 }, { "epoch": 0.9010224085171491, "grad_norm": 0.19319483637809753, "learning_rate": 0.0005540401112064, "loss": 2.4382, "step": 452270 }, { "epoch": 0.901042330740788, "grad_norm": 0.20252849161624908, "learning_rate": 0.0005539018013979825, "loss": 2.4562, "step": 452280 }, { "epoch": 0.9010622529644269, "grad_norm": 0.1914633810520172, "learning_rate": 0.0005537635048167259, "loss": 2.4437, "step": 452290 }, { "epoch": 0.9010821751880658, "grad_norm": 0.20146282017230988, "learning_rate": 0.0005536252214588364, "loss": 2.4477, "step": 452300 }, { "epoch": 0.9011020974117047, "grad_norm": 0.22270934283733368, "learning_rate": 0.0005534869513205212, "loss": 2.4324, "step": 452310 }, { "epoch": 0.9011220196353437, "grad_norm": 0.2239583432674408, "learning_rate": 0.00055334869439799, "loss": 2.4561, "step": 452320 }, { "epoch": 0.9011419418589826, "grad_norm": 0.2065618485212326, "learning_rate": 0.000553210450687454, "loss": 2.4427, "step": 452330 }, { "epoch": 0.9011618640826214, "grad_norm": 0.188986137509346, "learning_rate": 0.0005530722201851261, "loss": 2.448, "step": 452340 }, { "epoch": 0.9011817863062603, "grad_norm": 0.20678411424160004, "learning_rate": 0.0005529340028872216, "loss": 2.4322, "step": 452350 }, { "epoch": 0.9012017085298992, "grad_norm": 0.19355225563049316, "learning_rate": 0.0005527957987899565, "loss": 2.4398, "step": 452360 }, { "epoch": 0.9012216307535382, "grad_norm": 0.19014465808868408, "learning_rate": 0.0005526576078895498, "loss": 2.4522, "step": 452370 }, { "epoch": 0.9012415529771771, "grad_norm": 0.19799426198005676, "learning_rate": 0.0005525194301822216, "loss": 2.4398, "step": 452380 }, { "epoch": 0.901261475200816, "grad_norm": 0.19054029881954193, "learning_rate": 0.0005523812656641942, "loss": 2.4554, "step": 452390 }, { "epoch": 0.9012813974244549, "grad_norm": 0.1941710263490677, "learning_rate": 0.0005522431143316913, "loss": 2.45, "step": 452400 }, { "epoch": 0.9013013196480938, "grad_norm": 0.2077140510082245, "learning_rate": 0.0005521049761809387, "loss": 2.4348, "step": 452410 }, { "epoch": 0.9013212418717328, "grad_norm": 0.19870884716510773, "learning_rate": 0.0005519668512081639, "loss": 2.4437, "step": 452420 }, { "epoch": 0.9013411640953717, "grad_norm": 0.18802261352539062, "learning_rate": 0.000551828739409596, "loss": 2.4474, "step": 452430 }, { "epoch": 0.9013610863190106, "grad_norm": 0.2018231898546219, "learning_rate": 0.0005516906407814662, "loss": 2.4473, "step": 452440 }, { "epoch": 0.9013810085426495, "grad_norm": 0.18908433616161346, "learning_rate": 0.0005515525553200076, "loss": 2.4505, "step": 452450 }, { "epoch": 0.9014009307662884, "grad_norm": 0.20336385071277618, "learning_rate": 0.0005514144830214545, "loss": 2.4481, "step": 452460 }, { "epoch": 0.9014208529899274, "grad_norm": 0.19191570580005646, "learning_rate": 0.0005512764238820436, "loss": 2.4439, "step": 452470 }, { "epoch": 0.9014407752135662, "grad_norm": 0.19277247786521912, "learning_rate": 0.0005511383778980133, "loss": 2.4462, "step": 452480 }, { "epoch": 0.9014606974372051, "grad_norm": 0.21722044050693512, "learning_rate": 0.0005510003450656036, "loss": 2.4637, "step": 452490 }, { "epoch": 0.901480619660844, "grad_norm": 0.20063616335391998, "learning_rate": 0.000550862325381056, "loss": 2.453, "step": 452500 }, { "epoch": 0.901500541884483, "grad_norm": 0.22274640202522278, "learning_rate": 0.0005507243188406148, "loss": 2.4458, "step": 452510 }, { "epoch": 0.9015204641081219, "grad_norm": 0.17864449322223663, "learning_rate": 0.000550586325440525, "loss": 2.4506, "step": 452520 }, { "epoch": 0.9015403863317608, "grad_norm": 0.2165374755859375, "learning_rate": 0.0005504483451770337, "loss": 2.4431, "step": 452530 }, { "epoch": 0.9015603085553997, "grad_norm": 0.20690466463565826, "learning_rate": 0.0005503103780463902, "loss": 2.4561, "step": 452540 }, { "epoch": 0.9015802307790386, "grad_norm": 0.1904328316450119, "learning_rate": 0.0005501724240448454, "loss": 2.46, "step": 452550 }, { "epoch": 0.9016001530026776, "grad_norm": 0.2060011774301529, "learning_rate": 0.0005500344831686515, "loss": 2.4325, "step": 452560 }, { "epoch": 0.9016200752263165, "grad_norm": 0.19246108829975128, "learning_rate": 0.000549896555414063, "loss": 2.4387, "step": 452570 }, { "epoch": 0.9016399974499554, "grad_norm": 0.1845373660326004, "learning_rate": 0.0005497586407773358, "loss": 2.4476, "step": 452580 }, { "epoch": 0.9016599196735943, "grad_norm": 0.19468159973621368, "learning_rate": 0.0005496207392547285, "loss": 2.4472, "step": 452590 }, { "epoch": 0.9016798418972332, "grad_norm": 0.17481249570846558, "learning_rate": 0.0005494828508425003, "loss": 2.4494, "step": 452600 }, { "epoch": 0.9016997641208722, "grad_norm": 0.19245645403862, "learning_rate": 0.0005493449755369129, "loss": 2.4526, "step": 452610 }, { "epoch": 0.901719686344511, "grad_norm": 0.20744094252586365, "learning_rate": 0.0005492071133342294, "loss": 2.454, "step": 452620 }, { "epoch": 0.9017396085681499, "grad_norm": 0.19863386452198029, "learning_rate": 0.0005490692642307147, "loss": 2.4553, "step": 452630 }, { "epoch": 0.9017595307917888, "grad_norm": 0.19435572624206543, "learning_rate": 0.0005489314282226356, "loss": 2.457, "step": 452640 }, { "epoch": 0.9017794530154277, "grad_norm": 0.20040126144886017, "learning_rate": 0.0005487936053062612, "loss": 2.4576, "step": 452650 }, { "epoch": 0.9017993752390667, "grad_norm": 0.1982489675283432, "learning_rate": 0.0005486557954778613, "loss": 2.465, "step": 452660 }, { "epoch": 0.9018192974627056, "grad_norm": 0.20028004050254822, "learning_rate": 0.0005485179987337081, "loss": 2.4513, "step": 452670 }, { "epoch": 0.9018392196863445, "grad_norm": 0.1987629383802414, "learning_rate": 0.0005483802150700756, "loss": 2.4405, "step": 452680 }, { "epoch": 0.9018591419099834, "grad_norm": 0.20308689773082733, "learning_rate": 0.0005482424444832395, "loss": 2.4516, "step": 452690 }, { "epoch": 0.9018790641336223, "grad_norm": 0.186989888548851, "learning_rate": 0.0005481046869694771, "loss": 2.459, "step": 452700 }, { "epoch": 0.9018989863572613, "grad_norm": 0.1863551288843155, "learning_rate": 0.0005479669425250677, "loss": 2.4516, "step": 452710 }, { "epoch": 0.9019189085809002, "grad_norm": 0.17743593454360962, "learning_rate": 0.0005478292111462922, "loss": 2.4453, "step": 452720 }, { "epoch": 0.9019388308045391, "grad_norm": 0.18519648909568787, "learning_rate": 0.0005476914928294334, "loss": 2.4332, "step": 452730 }, { "epoch": 0.901958753028178, "grad_norm": 0.21317917108535767, "learning_rate": 0.0005475537875707754, "loss": 2.4461, "step": 452740 }, { "epoch": 0.9019786752518169, "grad_norm": 0.21354132890701294, "learning_rate": 0.0005474160953666049, "loss": 2.4637, "step": 452750 }, { "epoch": 0.9019985974754559, "grad_norm": 0.1913042962551117, "learning_rate": 0.0005472784162132098, "loss": 2.4642, "step": 452760 }, { "epoch": 0.9020185196990947, "grad_norm": 0.18473368883132935, "learning_rate": 0.0005471407501068799, "loss": 2.4511, "step": 452770 }, { "epoch": 0.9020384419227336, "grad_norm": 0.19225738942623138, "learning_rate": 0.0005470030970439061, "loss": 2.4479, "step": 452780 }, { "epoch": 0.9020583641463725, "grad_norm": 0.17813290655612946, "learning_rate": 0.0005468654570205826, "loss": 2.4351, "step": 452790 }, { "epoch": 0.9020782863700115, "grad_norm": 0.18408986926078796, "learning_rate": 0.000546727830033204, "loss": 2.4429, "step": 452800 }, { "epoch": 0.9020982085936504, "grad_norm": 0.18621857464313507, "learning_rate": 0.0005465902160780669, "loss": 2.4579, "step": 452810 }, { "epoch": 0.9021181308172893, "grad_norm": 0.1944374442100525, "learning_rate": 0.0005464526151514701, "loss": 2.4442, "step": 452820 }, { "epoch": 0.9021380530409282, "grad_norm": 0.18716147541999817, "learning_rate": 0.0005463150272497139, "loss": 2.4651, "step": 452830 }, { "epoch": 0.9021579752645671, "grad_norm": 0.20897431671619415, "learning_rate": 0.0005461774523691001, "loss": 2.4544, "step": 452840 }, { "epoch": 0.9021778974882061, "grad_norm": 0.18286672234535217, "learning_rate": 0.0005460398905059327, "loss": 2.4516, "step": 452850 }, { "epoch": 0.902197819711845, "grad_norm": 0.1913984715938568, "learning_rate": 0.0005459023416565172, "loss": 2.4626, "step": 452860 }, { "epoch": 0.9022177419354839, "grad_norm": 0.1947515308856964, "learning_rate": 0.000545764805817161, "loss": 2.4376, "step": 452870 }, { "epoch": 0.9022376641591228, "grad_norm": 0.20115159451961517, "learning_rate": 0.0005456272829841727, "loss": 2.4453, "step": 452880 }, { "epoch": 0.9022575863827617, "grad_norm": 0.1954306811094284, "learning_rate": 0.0005454897731538633, "loss": 2.4709, "step": 452890 }, { "epoch": 0.9022775086064007, "grad_norm": 0.20128917694091797, "learning_rate": 0.0005453522763225456, "loss": 2.4431, "step": 452900 }, { "epoch": 0.9022974308300395, "grad_norm": 0.19593164324760437, "learning_rate": 0.0005452147924865333, "loss": 2.4393, "step": 452910 }, { "epoch": 0.9023173530536784, "grad_norm": 0.19373027980327606, "learning_rate": 0.0005450773216421432, "loss": 2.4653, "step": 452920 }, { "epoch": 0.9023372752773173, "grad_norm": 0.19191722571849823, "learning_rate": 0.0005449398637856922, "loss": 2.4701, "step": 452930 }, { "epoch": 0.9023571975009562, "grad_norm": 0.17822827398777008, "learning_rate": 0.0005448024189135004, "loss": 2.4523, "step": 452940 }, { "epoch": 0.9023771197245952, "grad_norm": 0.18758755922317505, "learning_rate": 0.0005446649870218885, "loss": 2.4502, "step": 452950 }, { "epoch": 0.9023970419482341, "grad_norm": 0.19720175862312317, "learning_rate": 0.00054452756810718, "loss": 2.4468, "step": 452960 }, { "epoch": 0.902416964171873, "grad_norm": 0.18572843074798584, "learning_rate": 0.000544390162165699, "loss": 2.4416, "step": 452970 }, { "epoch": 0.9024368863955119, "grad_norm": 0.19136445224285126, "learning_rate": 0.0005442527691937724, "loss": 2.4511, "step": 452980 }, { "epoch": 0.9024568086191508, "grad_norm": 0.19963745772838593, "learning_rate": 0.0005441153891877282, "loss": 2.4462, "step": 452990 }, { "epoch": 0.9024767308427898, "grad_norm": 0.18669170141220093, "learning_rate": 0.0005439780221438964, "loss": 2.4324, "step": 453000 }, { "epoch": 0.9024966530664287, "grad_norm": 0.21889157593250275, "learning_rate": 0.0005438406680586083, "loss": 2.4517, "step": 453010 }, { "epoch": 0.9025165752900676, "grad_norm": 0.1963021606206894, "learning_rate": 0.0005437033269281977, "loss": 2.447, "step": 453020 }, { "epoch": 0.9025364975137065, "grad_norm": 0.22221742570400238, "learning_rate": 0.0005435659987489994, "loss": 2.4591, "step": 453030 }, { "epoch": 0.9025564197373454, "grad_norm": 0.17573091387748718, "learning_rate": 0.0005434286835173501, "loss": 2.4531, "step": 453040 }, { "epoch": 0.9025763419609844, "grad_norm": 0.1994946002960205, "learning_rate": 0.0005432913812295886, "loss": 2.4436, "step": 453050 }, { "epoch": 0.9025962641846232, "grad_norm": 0.19068659842014313, "learning_rate": 0.000543154091882055, "loss": 2.4382, "step": 453060 }, { "epoch": 0.9026161864082621, "grad_norm": 0.2178780883550644, "learning_rate": 0.0005430168154710915, "loss": 2.4444, "step": 453070 }, { "epoch": 0.902636108631901, "grad_norm": 0.20937052369117737, "learning_rate": 0.0005428795519930411, "loss": 2.4592, "step": 453080 }, { "epoch": 0.90265603085554, "grad_norm": 0.1917959451675415, "learning_rate": 0.0005427423014442503, "loss": 2.4482, "step": 453090 }, { "epoch": 0.9026759530791789, "grad_norm": 0.2036132663488388, "learning_rate": 0.0005426050638210656, "loss": 2.4421, "step": 453100 }, { "epoch": 0.9026958753028178, "grad_norm": 0.18830563127994537, "learning_rate": 0.0005424678391198362, "loss": 2.4491, "step": 453110 }, { "epoch": 0.9027157975264567, "grad_norm": 0.20123113691806793, "learning_rate": 0.0005423306273369122, "loss": 2.4478, "step": 453120 }, { "epoch": 0.9027357197500956, "grad_norm": 0.18674300611019135, "learning_rate": 0.0005421934284686463, "loss": 2.4403, "step": 453130 }, { "epoch": 0.9027556419737346, "grad_norm": 0.19695225358009338, "learning_rate": 0.0005420562425113924, "loss": 2.4522, "step": 453140 }, { "epoch": 0.9027755641973735, "grad_norm": 0.205143541097641, "learning_rate": 0.0005419190694615062, "loss": 2.4374, "step": 453150 }, { "epoch": 0.9027954864210124, "grad_norm": 0.19849178194999695, "learning_rate": 0.0005417819093153453, "loss": 2.4614, "step": 453160 }, { "epoch": 0.9028154086446513, "grad_norm": 0.20603728294372559, "learning_rate": 0.0005416447620692688, "loss": 2.4529, "step": 453170 }, { "epoch": 0.9028353308682902, "grad_norm": 0.2014337033033371, "learning_rate": 0.0005415076277196373, "loss": 2.4545, "step": 453180 }, { "epoch": 0.9028552530919292, "grad_norm": 0.21374821662902832, "learning_rate": 0.0005413705062628138, "loss": 2.4664, "step": 453190 }, { "epoch": 0.902875175315568, "grad_norm": 0.20320133864879608, "learning_rate": 0.0005412333976951624, "loss": 2.4484, "step": 453200 }, { "epoch": 0.9028950975392069, "grad_norm": 0.1887580305337906, "learning_rate": 0.0005410963020130493, "loss": 2.4558, "step": 453210 }, { "epoch": 0.9029150197628458, "grad_norm": 0.2084531933069229, "learning_rate": 0.0005409592192128418, "loss": 2.4356, "step": 453220 }, { "epoch": 0.9029349419864847, "grad_norm": 0.1891615241765976, "learning_rate": 0.0005408221492909098, "loss": 2.4489, "step": 453230 }, { "epoch": 0.9029548642101237, "grad_norm": 0.2058693766593933, "learning_rate": 0.0005406850922436241, "loss": 2.4565, "step": 453240 }, { "epoch": 0.9029747864337626, "grad_norm": 0.19203034043312073, "learning_rate": 0.0005405480480673577, "loss": 2.4537, "step": 453250 }, { "epoch": 0.9029947086574015, "grad_norm": 0.23965246975421906, "learning_rate": 0.0005404110167584848, "loss": 2.4515, "step": 453260 }, { "epoch": 0.9030146308810404, "grad_norm": 0.20037107169628143, "learning_rate": 0.0005402739983133822, "loss": 2.4534, "step": 453270 }, { "epoch": 0.9030345531046793, "grad_norm": 0.20489515364170074, "learning_rate": 0.0005401369927284272, "loss": 2.447, "step": 453280 }, { "epoch": 0.9030544753283183, "grad_norm": 0.1949852705001831, "learning_rate": 0.00054, "loss": 2.45, "step": 453290 }, { "epoch": 0.9030743975519572, "grad_norm": 0.19945259392261505, "learning_rate": 0.0005398630201244816, "loss": 2.4519, "step": 453300 }, { "epoch": 0.9030943197755961, "grad_norm": 0.17577600479125977, "learning_rate": 0.0005397260530982551, "loss": 2.4377, "step": 453310 }, { "epoch": 0.903114241999235, "grad_norm": 0.1926383674144745, "learning_rate": 0.0005395890989177052, "loss": 2.4506, "step": 453320 }, { "epoch": 0.9031341642228738, "grad_norm": 0.27101290225982666, "learning_rate": 0.0005394521575792186, "loss": 2.442, "step": 453330 }, { "epoch": 0.9031540864465128, "grad_norm": 0.19756680727005005, "learning_rate": 0.000539315229079183, "loss": 2.4701, "step": 453340 }, { "epoch": 0.9031740086701517, "grad_norm": 0.22247153520584106, "learning_rate": 0.0005391783134139883, "loss": 2.4373, "step": 453350 }, { "epoch": 0.9031939308937906, "grad_norm": 0.21741124987602234, "learning_rate": 0.0005390414105800261, "loss": 2.4547, "step": 453360 }, { "epoch": 0.9032138531174295, "grad_norm": 0.19727779924869537, "learning_rate": 0.0005389045205736895, "loss": 2.4487, "step": 453370 }, { "epoch": 0.9032337753410685, "grad_norm": 0.19532273709774017, "learning_rate": 0.0005387676433913735, "loss": 2.4336, "step": 453380 }, { "epoch": 0.9032536975647074, "grad_norm": 0.1793842911720276, "learning_rate": 0.0005386307790294746, "loss": 2.4465, "step": 453390 }, { "epoch": 0.9032736197883463, "grad_norm": 0.20658858120441437, "learning_rate": 0.0005384939274843912, "loss": 2.4442, "step": 453400 }, { "epoch": 0.9032935420119852, "grad_norm": 0.1933632791042328, "learning_rate": 0.0005383570887525229, "loss": 2.4585, "step": 453410 }, { "epoch": 0.9033134642356241, "grad_norm": 0.1960306316614151, "learning_rate": 0.0005382202628302717, "loss": 2.4373, "step": 453420 }, { "epoch": 0.9033333864592631, "grad_norm": 0.17264126241207123, "learning_rate": 0.0005380834497140406, "loss": 2.4508, "step": 453430 }, { "epoch": 0.903353308682902, "grad_norm": 0.20614968240261078, "learning_rate": 0.0005379466494002347, "loss": 2.4381, "step": 453440 }, { "epoch": 0.9033732309065409, "grad_norm": 0.18688805401325226, "learning_rate": 0.000537809861885261, "loss": 2.4563, "step": 453450 }, { "epoch": 0.9033931531301798, "grad_norm": 0.17512165009975433, "learning_rate": 0.0005376730871655271, "loss": 2.4447, "step": 453460 }, { "epoch": 0.9034130753538187, "grad_norm": 0.20087505877017975, "learning_rate": 0.0005375363252374439, "loss": 2.4521, "step": 453470 }, { "epoch": 0.9034329975774577, "grad_norm": 0.1885552704334259, "learning_rate": 0.0005373995760974222, "loss": 2.4629, "step": 453480 }, { "epoch": 0.9034529198010965, "grad_norm": 0.2045174539089203, "learning_rate": 0.0005372628397418761, "loss": 2.4544, "step": 453490 }, { "epoch": 0.9034728420247354, "grad_norm": 0.19947871565818787, "learning_rate": 0.0005371261161672206, "loss": 2.4598, "step": 453500 }, { "epoch": 0.9034927642483743, "grad_norm": 0.18947643041610718, "learning_rate": 0.0005369894053698723, "loss": 2.4598, "step": 453510 }, { "epoch": 0.9035126864720132, "grad_norm": 0.17337870597839355, "learning_rate": 0.0005368527073462495, "loss": 2.4406, "step": 453520 }, { "epoch": 0.9035326086956522, "grad_norm": 0.22517329454421997, "learning_rate": 0.0005367160220927723, "loss": 2.4443, "step": 453530 }, { "epoch": 0.9035525309192911, "grad_norm": 0.20247307419776917, "learning_rate": 0.0005365793496058626, "loss": 2.4533, "step": 453540 }, { "epoch": 0.90357245314293, "grad_norm": 0.19773685932159424, "learning_rate": 0.0005364426898819439, "loss": 2.4575, "step": 453550 }, { "epoch": 0.9035923753665689, "grad_norm": 0.21474848687648773, "learning_rate": 0.0005363060429174412, "loss": 2.4454, "step": 453560 }, { "epoch": 0.9036122975902078, "grad_norm": 0.19312743842601776, "learning_rate": 0.000536169408708781, "loss": 2.4568, "step": 453570 }, { "epoch": 0.9036322198138468, "grad_norm": 0.2126132994890213, "learning_rate": 0.000536032787252392, "loss": 2.465, "step": 453580 }, { "epoch": 0.9036521420374857, "grad_norm": 0.2016073763370514, "learning_rate": 0.0005358961785447043, "loss": 2.4546, "step": 453590 }, { "epoch": 0.9036720642611246, "grad_norm": 0.22534842789173126, "learning_rate": 0.0005357595825821497, "loss": 2.4496, "step": 453600 }, { "epoch": 0.9036919864847635, "grad_norm": 0.18039803206920624, "learning_rate": 0.0005356229993611617, "loss": 2.4468, "step": 453610 }, { "epoch": 0.9037119087084023, "grad_norm": 0.23904050886631012, "learning_rate": 0.0005354864288781753, "loss": 2.4555, "step": 453620 }, { "epoch": 0.9037318309320413, "grad_norm": 0.17676705121994019, "learning_rate": 0.0005353498711296272, "loss": 2.4544, "step": 453630 }, { "epoch": 0.9037517531556802, "grad_norm": 0.20502249896526337, "learning_rate": 0.0005352133261119556, "loss": 2.4352, "step": 453640 }, { "epoch": 0.9037716753793191, "grad_norm": 0.18895378708839417, "learning_rate": 0.000535076793821601, "loss": 2.4453, "step": 453650 }, { "epoch": 0.903791597602958, "grad_norm": 0.17814190685749054, "learning_rate": 0.0005349402742550051, "loss": 2.4517, "step": 453660 }, { "epoch": 0.903811519826597, "grad_norm": 0.20547311007976532, "learning_rate": 0.000534803767408611, "loss": 2.4447, "step": 453670 }, { "epoch": 0.9038314420502359, "grad_norm": 0.19692350924015045, "learning_rate": 0.0005346672732788638, "loss": 2.44, "step": 453680 }, { "epoch": 0.9038513642738748, "grad_norm": 0.1884315311908722, "learning_rate": 0.0005345307918622104, "loss": 2.4614, "step": 453690 }, { "epoch": 0.9038712864975137, "grad_norm": 0.21436917781829834, "learning_rate": 0.0005343943231550991, "loss": 2.457, "step": 453700 }, { "epoch": 0.9038912087211526, "grad_norm": 0.1973056197166443, "learning_rate": 0.0005342578671539799, "loss": 2.4388, "step": 453710 }, { "epoch": 0.9039111309447916, "grad_norm": 0.19913290441036224, "learning_rate": 0.0005341214238553045, "loss": 2.4594, "step": 453720 }, { "epoch": 0.9039310531684305, "grad_norm": 0.19986072182655334, "learning_rate": 0.000533984993255526, "loss": 2.4597, "step": 453730 }, { "epoch": 0.9039509753920694, "grad_norm": 0.18327172100543976, "learning_rate": 0.0005338485753510997, "loss": 2.4414, "step": 453740 }, { "epoch": 0.9039708976157083, "grad_norm": 0.19700206816196442, "learning_rate": 0.000533712170138482, "loss": 2.4538, "step": 453750 }, { "epoch": 0.9039908198393471, "grad_norm": 0.22310170531272888, "learning_rate": 0.0005335757776141312, "loss": 2.4505, "step": 453760 }, { "epoch": 0.9040107420629862, "grad_norm": 0.18286839127540588, "learning_rate": 0.0005334393977745074, "loss": 2.4549, "step": 453770 }, { "epoch": 0.904030664286625, "grad_norm": 0.19790226221084595, "learning_rate": 0.0005333030306160718, "loss": 2.4412, "step": 453780 }, { "epoch": 0.9040505865102639, "grad_norm": 0.19104881584644318, "learning_rate": 0.0005331666761352876, "loss": 2.4516, "step": 453790 }, { "epoch": 0.9040705087339028, "grad_norm": 0.5311588048934937, "learning_rate": 0.00053303033432862, "loss": 2.4554, "step": 453800 }, { "epoch": 0.9040904309575417, "grad_norm": 0.21958398818969727, "learning_rate": 0.0005328940051925355, "loss": 2.463, "step": 453810 }, { "epoch": 0.9041103531811807, "grad_norm": 0.1866549551486969, "learning_rate": 0.000532757688723502, "loss": 2.4477, "step": 453820 }, { "epoch": 0.9041302754048196, "grad_norm": 0.20636777579784393, "learning_rate": 0.0005326213849179892, "loss": 2.4394, "step": 453830 }, { "epoch": 0.9041501976284585, "grad_norm": 0.21158921718597412, "learning_rate": 0.0005324850937724688, "loss": 2.4466, "step": 453840 }, { "epoch": 0.9041701198520974, "grad_norm": 0.18905502557754517, "learning_rate": 0.0005323488152834135, "loss": 2.4373, "step": 453850 }, { "epoch": 0.9041900420757363, "grad_norm": 0.19821858406066895, "learning_rate": 0.0005322125494472982, "loss": 2.4552, "step": 453860 }, { "epoch": 0.9042099642993753, "grad_norm": 0.20136351883411407, "learning_rate": 0.0005320762962605994, "loss": 2.4442, "step": 453870 }, { "epoch": 0.9042298865230142, "grad_norm": 0.21619364619255066, "learning_rate": 0.0005319400557197946, "loss": 2.4496, "step": 453880 }, { "epoch": 0.9042498087466531, "grad_norm": 0.18713781237602234, "learning_rate": 0.0005318038278213635, "loss": 2.4367, "step": 453890 }, { "epoch": 0.904269730970292, "grad_norm": 0.20438934862613678, "learning_rate": 0.0005316676125617879, "loss": 2.4436, "step": 453900 }, { "epoch": 0.9042896531939308, "grad_norm": 0.18321551382541656, "learning_rate": 0.0005315314099375499, "loss": 2.4443, "step": 453910 }, { "epoch": 0.9043095754175698, "grad_norm": 0.2564208209514618, "learning_rate": 0.0005313952199451344, "loss": 2.4585, "step": 453920 }, { "epoch": 0.9043294976412087, "grad_norm": 0.2123778909444809, "learning_rate": 0.0005312590425810275, "loss": 2.455, "step": 453930 }, { "epoch": 0.9043494198648476, "grad_norm": 0.1846250593662262, "learning_rate": 0.0005311228778417167, "loss": 2.4589, "step": 453940 }, { "epoch": 0.9043693420884865, "grad_norm": 0.1933811902999878, "learning_rate": 0.0005309867257236918, "loss": 2.4545, "step": 453950 }, { "epoch": 0.9043892643121254, "grad_norm": 0.20462791621685028, "learning_rate": 0.0005308505862234434, "loss": 2.4436, "step": 453960 }, { "epoch": 0.9044091865357644, "grad_norm": 0.1918487548828125, "learning_rate": 0.0005307144593374644, "loss": 2.4461, "step": 453970 }, { "epoch": 0.9044291087594033, "grad_norm": 0.19980503618717194, "learning_rate": 0.0005305783450622488, "loss": 2.4451, "step": 453980 }, { "epoch": 0.9044490309830422, "grad_norm": 0.18693146109580994, "learning_rate": 0.0005304422433942924, "loss": 2.4397, "step": 453990 }, { "epoch": 0.9044689532066811, "grad_norm": 0.2036394625902176, "learning_rate": 0.0005303061543300931, "loss": 2.452, "step": 454000 }, { "epoch": 0.9044888754303201, "grad_norm": 0.20311740040779114, "learning_rate": 0.0005301700778661498, "loss": 2.4567, "step": 454010 }, { "epoch": 0.904508797653959, "grad_norm": 0.18842275440692902, "learning_rate": 0.0005300340139989632, "loss": 2.4423, "step": 454020 }, { "epoch": 0.9045287198775979, "grad_norm": 0.21050390601158142, "learning_rate": 0.000529897962725036, "loss": 2.4582, "step": 454030 }, { "epoch": 0.9045486421012368, "grad_norm": 0.18442414700984955, "learning_rate": 0.0005297619240408715, "loss": 2.4286, "step": 454040 }, { "epoch": 0.9045685643248756, "grad_norm": 0.19504685699939728, "learning_rate": 0.000529625897942976, "loss": 2.4392, "step": 454050 }, { "epoch": 0.9045884865485146, "grad_norm": 0.1769477128982544, "learning_rate": 0.0005294898844278562, "loss": 2.4483, "step": 454060 }, { "epoch": 0.9046084087721535, "grad_norm": 0.18435658514499664, "learning_rate": 0.0005293538834920211, "loss": 2.4539, "step": 454070 }, { "epoch": 0.9046283309957924, "grad_norm": 0.18552574515342712, "learning_rate": 0.0005292178951319812, "loss": 2.4384, "step": 454080 }, { "epoch": 0.9046482532194313, "grad_norm": 0.20443472266197205, "learning_rate": 0.0005290819193442484, "loss": 2.4567, "step": 454090 }, { "epoch": 0.9046681754430702, "grad_norm": 0.21175579726696014, "learning_rate": 0.0005289459561253367, "loss": 2.4644, "step": 454100 }, { "epoch": 0.9046880976667092, "grad_norm": 0.1859738975763321, "learning_rate": 0.0005288100054717608, "loss": 2.4583, "step": 454110 }, { "epoch": 0.9047080198903481, "grad_norm": 0.20519576966762543, "learning_rate": 0.0005286740673800383, "loss": 2.4569, "step": 454120 }, { "epoch": 0.904727942113987, "grad_norm": 0.1844080239534378, "learning_rate": 0.0005285381418466872, "loss": 2.4426, "step": 454130 }, { "epoch": 0.9047478643376259, "grad_norm": 0.1674385517835617, "learning_rate": 0.0005284022288682277, "loss": 2.4427, "step": 454140 }, { "epoch": 0.9047677865612648, "grad_norm": 0.20798903703689575, "learning_rate": 0.0005282663284411817, "loss": 2.439, "step": 454150 }, { "epoch": 0.9047877087849038, "grad_norm": 0.18858946859836578, "learning_rate": 0.0005281304405620722, "loss": 2.4513, "step": 454160 }, { "epoch": 0.9048076310085427, "grad_norm": 0.18183709681034088, "learning_rate": 0.0005279945652274243, "loss": 2.4427, "step": 454170 }, { "epoch": 0.9048275532321816, "grad_norm": 0.2019704431295395, "learning_rate": 0.0005278587024337644, "loss": 2.4673, "step": 454180 }, { "epoch": 0.9048474754558204, "grad_norm": 0.19872550666332245, "learning_rate": 0.0005277228521776207, "loss": 2.4524, "step": 454190 }, { "epoch": 0.9048673976794593, "grad_norm": 0.22691771388053894, "learning_rate": 0.0005275870144555231, "loss": 2.4313, "step": 454200 }, { "epoch": 0.9048873199030983, "grad_norm": 0.19048401713371277, "learning_rate": 0.0005274511892640025, "loss": 2.4432, "step": 454210 }, { "epoch": 0.9049072421267372, "grad_norm": 0.1939495950937271, "learning_rate": 0.0005273153765995926, "loss": 2.4533, "step": 454220 }, { "epoch": 0.9049271643503761, "grad_norm": 0.20750167965888977, "learning_rate": 0.000527179576458827, "loss": 2.4563, "step": 454230 }, { "epoch": 0.904947086574015, "grad_norm": 0.19203320145606995, "learning_rate": 0.0005270437888382425, "loss": 2.449, "step": 454240 }, { "epoch": 0.9049670087976539, "grad_norm": 0.20454145967960358, "learning_rate": 0.0005269080137343765, "loss": 2.452, "step": 454250 }, { "epoch": 0.9049869310212929, "grad_norm": 0.20474664866924286, "learning_rate": 0.0005267722511437684, "loss": 2.4452, "step": 454260 }, { "epoch": 0.9050068532449318, "grad_norm": 0.20471800863742828, "learning_rate": 0.0005266365010629591, "loss": 2.4451, "step": 454270 }, { "epoch": 0.9050267754685707, "grad_norm": 0.20335471630096436, "learning_rate": 0.0005265007634884911, "loss": 2.4519, "step": 454280 }, { "epoch": 0.9050466976922096, "grad_norm": 0.2091483175754547, "learning_rate": 0.0005263650384169083, "loss": 2.439, "step": 454290 }, { "epoch": 0.9050666199158486, "grad_norm": 0.19036038219928741, "learning_rate": 0.0005262293258447568, "loss": 2.4638, "step": 454300 }, { "epoch": 0.9050865421394875, "grad_norm": 0.20241357386112213, "learning_rate": 0.0005260936257685836, "loss": 2.4525, "step": 454310 }, { "epoch": 0.9051064643631264, "grad_norm": 0.21710795164108276, "learning_rate": 0.0005259579381849378, "loss": 2.4457, "step": 454320 }, { "epoch": 0.9051263865867653, "grad_norm": 0.2378474771976471, "learning_rate": 0.0005258222630903695, "loss": 2.435, "step": 454330 }, { "epoch": 0.9051463088104041, "grad_norm": 0.21101517975330353, "learning_rate": 0.0005256866004814309, "loss": 2.4498, "step": 454340 }, { "epoch": 0.9051662310340431, "grad_norm": 0.2154051810503006, "learning_rate": 0.0005255509503546758, "loss": 2.4645, "step": 454350 }, { "epoch": 0.905186153257682, "grad_norm": 0.20115378499031067, "learning_rate": 0.0005254153127066592, "loss": 2.4433, "step": 454360 }, { "epoch": 0.9052060754813209, "grad_norm": 0.1917172372341156, "learning_rate": 0.0005252796875339378, "loss": 2.4524, "step": 454370 }, { "epoch": 0.9052259977049598, "grad_norm": 0.20200969278812408, "learning_rate": 0.0005251440748330705, "loss": 2.45, "step": 454380 }, { "epoch": 0.9052459199285987, "grad_norm": 0.18487466871738434, "learning_rate": 0.0005250084746006165, "loss": 2.4533, "step": 454390 }, { "epoch": 0.9052658421522377, "grad_norm": 0.21138040721416473, "learning_rate": 0.0005248728868331381, "loss": 2.4499, "step": 454400 }, { "epoch": 0.9052857643758766, "grad_norm": 0.1920831799507141, "learning_rate": 0.0005247373115271979, "loss": 2.4544, "step": 454410 }, { "epoch": 0.9053056865995155, "grad_norm": 0.18648435175418854, "learning_rate": 0.0005246017486793609, "loss": 2.4527, "step": 454420 }, { "epoch": 0.9053256088231544, "grad_norm": 0.19703873991966248, "learning_rate": 0.0005244661982861931, "loss": 2.4488, "step": 454430 }, { "epoch": 0.9053455310467933, "grad_norm": 0.19908340275287628, "learning_rate": 0.0005243306603442626, "loss": 2.4528, "step": 454440 }, { "epoch": 0.9053654532704323, "grad_norm": 0.21156643331050873, "learning_rate": 0.0005241951348501388, "loss": 2.4532, "step": 454450 }, { "epoch": 0.9053853754940712, "grad_norm": 0.2007552832365036, "learning_rate": 0.0005240596218003928, "loss": 2.4507, "step": 454460 }, { "epoch": 0.9054052977177101, "grad_norm": 0.22262519598007202, "learning_rate": 0.0005239241211915968, "loss": 2.4598, "step": 454470 }, { "epoch": 0.905425219941349, "grad_norm": 0.23596283793449402, "learning_rate": 0.0005237886330203255, "loss": 2.4334, "step": 454480 }, { "epoch": 0.9054451421649878, "grad_norm": 0.2210589200258255, "learning_rate": 0.0005236531572831537, "loss": 2.441, "step": 454490 }, { "epoch": 0.9054650643886268, "grad_norm": 0.1893415004014969, "learning_rate": 0.0005235176939766599, "loss": 2.4489, "step": 454500 }, { "epoch": 0.9054849866122657, "grad_norm": 0.21360482275485992, "learning_rate": 0.0005233822430974223, "loss": 2.4568, "step": 454510 }, { "epoch": 0.9055049088359046, "grad_norm": 0.191374272108078, "learning_rate": 0.0005232468046420213, "loss": 2.4407, "step": 454520 }, { "epoch": 0.9055248310595435, "grad_norm": 0.2058160901069641, "learning_rate": 0.0005231113786070394, "loss": 2.4565, "step": 454530 }, { "epoch": 0.9055447532831824, "grad_norm": 0.20108474791049957, "learning_rate": 0.0005229759649890595, "loss": 2.4375, "step": 454540 }, { "epoch": 0.9055646755068214, "grad_norm": 0.1896834373474121, "learning_rate": 0.0005228405637846672, "loss": 2.4579, "step": 454550 }, { "epoch": 0.9055845977304603, "grad_norm": 0.2041582465171814, "learning_rate": 0.000522705174990449, "loss": 2.4491, "step": 454560 }, { "epoch": 0.9056045199540992, "grad_norm": 0.19359886646270752, "learning_rate": 0.0005225697986029934, "loss": 2.4489, "step": 454570 }, { "epoch": 0.9056244421777381, "grad_norm": 0.19743601977825165, "learning_rate": 0.0005224344346188898, "loss": 2.4494, "step": 454580 }, { "epoch": 0.9056443644013771, "grad_norm": 0.19943535327911377, "learning_rate": 0.0005222990830347299, "loss": 2.4432, "step": 454590 }, { "epoch": 0.905664286625016, "grad_norm": 0.19803178310394287, "learning_rate": 0.0005221637438471067, "loss": 2.443, "step": 454600 }, { "epoch": 0.9056842088486549, "grad_norm": 0.19027765095233917, "learning_rate": 0.0005220284170526146, "loss": 2.4313, "step": 454610 }, { "epoch": 0.9057041310722937, "grad_norm": 0.17352208495140076, "learning_rate": 0.0005218931026478497, "loss": 2.4506, "step": 454620 }, { "epoch": 0.9057240532959326, "grad_norm": 0.2087937593460083, "learning_rate": 0.0005217578006294097, "loss": 2.4593, "step": 454630 }, { "epoch": 0.9057439755195716, "grad_norm": 0.21347211301326752, "learning_rate": 0.0005216225109938937, "loss": 2.4582, "step": 454640 }, { "epoch": 0.9057638977432105, "grad_norm": 0.2234891951084137, "learning_rate": 0.0005214872337379025, "loss": 2.4423, "step": 454650 }, { "epoch": 0.9057838199668494, "grad_norm": 0.19379840791225433, "learning_rate": 0.0005213519688580382, "loss": 2.4522, "step": 454660 }, { "epoch": 0.9058037421904883, "grad_norm": 0.20575907826423645, "learning_rate": 0.0005212167163509049, "loss": 2.4492, "step": 454670 }, { "epoch": 0.9058236644141272, "grad_norm": 0.19851060211658478, "learning_rate": 0.000521081476213108, "loss": 2.4542, "step": 454680 }, { "epoch": 0.9058435866377662, "grad_norm": 0.19368059933185577, "learning_rate": 0.0005209462484412541, "loss": 2.4483, "step": 454690 }, { "epoch": 0.9058635088614051, "grad_norm": 0.20441953837871552, "learning_rate": 0.0005208110330319522, "loss": 2.4485, "step": 454700 }, { "epoch": 0.905883431085044, "grad_norm": 0.1816813200712204, "learning_rate": 0.0005206758299818122, "loss": 2.44, "step": 454710 }, { "epoch": 0.9059033533086829, "grad_norm": 0.1986788511276245, "learning_rate": 0.0005205406392874457, "loss": 2.4447, "step": 454720 }, { "epoch": 0.9059232755323218, "grad_norm": 0.16975639760494232, "learning_rate": 0.0005204054609454655, "loss": 2.4404, "step": 454730 }, { "epoch": 0.9059431977559608, "grad_norm": 0.17731261253356934, "learning_rate": 0.0005202702949524869, "loss": 2.4476, "step": 454740 }, { "epoch": 0.9059631199795997, "grad_norm": 0.19143378734588623, "learning_rate": 0.0005201351413051258, "loss": 2.442, "step": 454750 }, { "epoch": 0.9059830422032386, "grad_norm": 0.18318895995616913, "learning_rate": 0.0005200000000000001, "loss": 2.4434, "step": 454760 }, { "epoch": 0.9060029644268774, "grad_norm": 0.20723968744277954, "learning_rate": 0.0005198648710337291, "loss": 2.4402, "step": 454770 }, { "epoch": 0.9060228866505163, "grad_norm": 0.1847505420446396, "learning_rate": 0.0005197297544029334, "loss": 2.4531, "step": 454780 }, { "epoch": 0.9060428088741553, "grad_norm": 0.2060687243938446, "learning_rate": 0.0005195946501042357, "loss": 2.4474, "step": 454790 }, { "epoch": 0.9060627310977942, "grad_norm": 0.2221786230802536, "learning_rate": 0.0005194595581342603, "loss": 2.4462, "step": 454800 }, { "epoch": 0.9060826533214331, "grad_norm": 0.459344744682312, "learning_rate": 0.0005193244784896321, "loss": 2.4683, "step": 454810 }, { "epoch": 0.906102575545072, "grad_norm": 0.19742423295974731, "learning_rate": 0.0005191894111669784, "loss": 2.4451, "step": 454820 }, { "epoch": 0.9061224977687109, "grad_norm": 0.20058152079582214, "learning_rate": 0.000519054356162928, "loss": 2.4627, "step": 454830 }, { "epoch": 0.9061424199923499, "grad_norm": 0.18786607682704926, "learning_rate": 0.0005189193134741106, "loss": 2.4527, "step": 454840 }, { "epoch": 0.9061623422159888, "grad_norm": 0.20956259965896606, "learning_rate": 0.0005187842830971581, "loss": 2.4516, "step": 454850 }, { "epoch": 0.9061822644396277, "grad_norm": 0.21213430166244507, "learning_rate": 0.0005186492650287036, "loss": 2.4334, "step": 454860 }, { "epoch": 0.9062021866632666, "grad_norm": 0.18738675117492676, "learning_rate": 0.000518514259265382, "loss": 2.4266, "step": 454870 }, { "epoch": 0.9062221088869056, "grad_norm": 0.30286136269569397, "learning_rate": 0.0005183792658038294, "loss": 2.4553, "step": 454880 }, { "epoch": 0.9062420311105445, "grad_norm": 0.2085082232952118, "learning_rate": 0.0005182442846406834, "loss": 2.4503, "step": 454890 }, { "epoch": 0.9062619533341834, "grad_norm": 0.1878749430179596, "learning_rate": 0.0005181093157725835, "loss": 2.4551, "step": 454900 }, { "epoch": 0.9062818755578222, "grad_norm": 0.19422918558120728, "learning_rate": 0.000517974359196171, "loss": 2.4526, "step": 454910 }, { "epoch": 0.9063017977814611, "grad_norm": 0.21191062033176422, "learning_rate": 0.0005178394149080876, "loss": 2.4524, "step": 454920 }, { "epoch": 0.9063217200051001, "grad_norm": 0.22223792970180511, "learning_rate": 0.0005177044829049775, "loss": 2.4544, "step": 454930 }, { "epoch": 0.906341642228739, "grad_norm": 0.18410168588161469, "learning_rate": 0.0005175695631834862, "loss": 2.4398, "step": 454940 }, { "epoch": 0.9063615644523779, "grad_norm": 0.18989616632461548, "learning_rate": 0.0005174346557402605, "loss": 2.4574, "step": 454950 }, { "epoch": 0.9063814866760168, "grad_norm": 0.19851915538311005, "learning_rate": 0.000517299760571949, "loss": 2.4463, "step": 454960 }, { "epoch": 0.9064014088996557, "grad_norm": 0.1869146078824997, "learning_rate": 0.0005171648776752016, "loss": 2.4488, "step": 454970 }, { "epoch": 0.9064213311232947, "grad_norm": 0.19021815061569214, "learning_rate": 0.00051703000704667, "loss": 2.4462, "step": 454980 }, { "epoch": 0.9064412533469336, "grad_norm": 0.200156569480896, "learning_rate": 0.000516895148683007, "loss": 2.4382, "step": 454990 }, { "epoch": 0.9064611755705725, "grad_norm": 0.21380086243152618, "learning_rate": 0.0005167603025808674, "loss": 2.4383, "step": 455000 }, { "epoch": 0.9064810977942114, "grad_norm": 0.20754389464855194, "learning_rate": 0.0005166254687369072, "loss": 2.4564, "step": 455010 }, { "epoch": 0.9065010200178503, "grad_norm": 0.223995178937912, "learning_rate": 0.0005164906471477841, "loss": 2.4376, "step": 455020 }, { "epoch": 0.9065209422414893, "grad_norm": 0.2024393081665039, "learning_rate": 0.0005163558378101574, "loss": 2.4364, "step": 455030 }, { "epoch": 0.9065408644651282, "grad_norm": 0.20849615335464478, "learning_rate": 0.0005162210407206873, "loss": 2.435, "step": 455040 }, { "epoch": 0.906560786688767, "grad_norm": 0.2872566878795624, "learning_rate": 0.0005160862558760364, "loss": 2.4483, "step": 455050 }, { "epoch": 0.9065807089124059, "grad_norm": 0.19490739703178406, "learning_rate": 0.0005159514832728682, "loss": 2.4637, "step": 455060 }, { "epoch": 0.9066006311360448, "grad_norm": 0.19933216273784637, "learning_rate": 0.0005158167229078478, "loss": 2.4513, "step": 455070 }, { "epoch": 0.9066205533596838, "grad_norm": 0.20075324177742004, "learning_rate": 0.0005156819747776422, "loss": 2.4494, "step": 455080 }, { "epoch": 0.9066404755833227, "grad_norm": 0.18549026548862457, "learning_rate": 0.0005155472388789196, "loss": 2.4538, "step": 455090 }, { "epoch": 0.9066603978069616, "grad_norm": 0.19893187284469604, "learning_rate": 0.0005154125152083491, "loss": 2.4722, "step": 455100 }, { "epoch": 0.9066803200306005, "grad_norm": 0.17912663519382477, "learning_rate": 0.0005152778037626029, "loss": 2.451, "step": 455110 }, { "epoch": 0.9067002422542394, "grad_norm": 0.20499947667121887, "learning_rate": 0.0005151431045383533, "loss": 2.4332, "step": 455120 }, { "epoch": 0.9067201644778784, "grad_norm": 0.24805162847042084, "learning_rate": 0.0005150084175322744, "loss": 2.4469, "step": 455130 }, { "epoch": 0.9067400867015173, "grad_norm": 0.23159396648406982, "learning_rate": 0.0005148737427410422, "loss": 2.4488, "step": 455140 }, { "epoch": 0.9067600089251562, "grad_norm": 0.1996622383594513, "learning_rate": 0.0005147390801613341, "loss": 2.4436, "step": 455150 }, { "epoch": 0.9067799311487951, "grad_norm": 0.22402584552764893, "learning_rate": 0.0005146044297898287, "loss": 2.4589, "step": 455160 }, { "epoch": 0.9067998533724341, "grad_norm": 0.19065937399864197, "learning_rate": 0.0005144697916232064, "loss": 2.4456, "step": 455170 }, { "epoch": 0.906819775596073, "grad_norm": 0.21457721292972565, "learning_rate": 0.000514335165658149, "loss": 2.4374, "step": 455180 }, { "epoch": 0.9068396978197119, "grad_norm": 0.19952723383903503, "learning_rate": 0.0005142005518913395, "loss": 2.4534, "step": 455190 }, { "epoch": 0.9068596200433507, "grad_norm": 0.20522180199623108, "learning_rate": 0.0005140659503194632, "loss": 2.4409, "step": 455200 }, { "epoch": 0.9068795422669896, "grad_norm": 0.20588137209415436, "learning_rate": 0.000513931360939206, "loss": 2.4413, "step": 455210 }, { "epoch": 0.9068994644906286, "grad_norm": 0.21223300695419312, "learning_rate": 0.0005137967837472562, "loss": 2.4481, "step": 455220 }, { "epoch": 0.9069193867142675, "grad_norm": 0.1899217814207077, "learning_rate": 0.0005136622187403026, "loss": 2.4401, "step": 455230 }, { "epoch": 0.9069393089379064, "grad_norm": 0.19925090670585632, "learning_rate": 0.0005135276659150363, "loss": 2.4483, "step": 455240 }, { "epoch": 0.9069592311615453, "grad_norm": 0.20414035022258759, "learning_rate": 0.0005133931252681494, "loss": 2.4494, "step": 455250 }, { "epoch": 0.9069791533851842, "grad_norm": 0.19019834697246552, "learning_rate": 0.0005132585967963359, "loss": 2.436, "step": 455260 }, { "epoch": 0.9069990756088232, "grad_norm": 0.18349947035312653, "learning_rate": 0.0005131240804962911, "loss": 2.4629, "step": 455270 }, { "epoch": 0.9070189978324621, "grad_norm": 0.20997631549835205, "learning_rate": 0.0005129895763647116, "loss": 2.4399, "step": 455280 }, { "epoch": 0.907038920056101, "grad_norm": 0.19497334957122803, "learning_rate": 0.0005128550843982957, "loss": 2.4519, "step": 455290 }, { "epoch": 0.9070588422797399, "grad_norm": 0.19419819116592407, "learning_rate": 0.0005127206045937434, "loss": 2.4516, "step": 455300 }, { "epoch": 0.9070787645033788, "grad_norm": 0.19341111183166504, "learning_rate": 0.0005125861369477557, "loss": 2.4419, "step": 455310 }, { "epoch": 0.9070986867270178, "grad_norm": 0.20110256969928741, "learning_rate": 0.0005124516814570359, "loss": 2.4563, "step": 455320 }, { "epoch": 0.9071186089506567, "grad_norm": 0.21819192171096802, "learning_rate": 0.0005123172381182874, "loss": 2.4537, "step": 455330 }, { "epoch": 0.9071385311742955, "grad_norm": 0.19472447037696838, "learning_rate": 0.0005121828069282169, "loss": 2.4593, "step": 455340 }, { "epoch": 0.9071584533979344, "grad_norm": 0.18554726243019104, "learning_rate": 0.0005120483878835307, "loss": 2.4525, "step": 455350 }, { "epoch": 0.9071783756215733, "grad_norm": 0.19801482558250427, "learning_rate": 0.0005119139809809381, "loss": 2.4423, "step": 455360 }, { "epoch": 0.9071982978452123, "grad_norm": 0.18485766649246216, "learning_rate": 0.000511779586217149, "loss": 2.4558, "step": 455370 }, { "epoch": 0.9072182200688512, "grad_norm": 0.19890211522579193, "learning_rate": 0.0005116452035888755, "loss": 2.4369, "step": 455380 }, { "epoch": 0.9072381422924901, "grad_norm": 0.18300624191761017, "learning_rate": 0.0005115108330928305, "loss": 2.4297, "step": 455390 }, { "epoch": 0.907258064516129, "grad_norm": 0.18664664030075073, "learning_rate": 0.0005113764747257284, "loss": 2.4457, "step": 455400 }, { "epoch": 0.9072779867397679, "grad_norm": 0.2520425319671631, "learning_rate": 0.0005112421284842857, "loss": 2.4474, "step": 455410 }, { "epoch": 0.9072979089634069, "grad_norm": 0.21581798791885376, "learning_rate": 0.00051110779436522, "loss": 2.4378, "step": 455420 }, { "epoch": 0.9073178311870458, "grad_norm": 0.21068258583545685, "learning_rate": 0.0005109734723652504, "loss": 2.4533, "step": 455430 }, { "epoch": 0.9073377534106847, "grad_norm": 0.19740921258926392, "learning_rate": 0.0005108391624810973, "loss": 2.4396, "step": 455440 }, { "epoch": 0.9073576756343236, "grad_norm": 0.20372110605239868, "learning_rate": 0.0005107048647094828, "loss": 2.4489, "step": 455450 }, { "epoch": 0.9073775978579625, "grad_norm": 0.1975979506969452, "learning_rate": 0.0005105705790471307, "loss": 2.4467, "step": 455460 }, { "epoch": 0.9073975200816015, "grad_norm": 0.20735104382038116, "learning_rate": 0.0005104363054907655, "loss": 2.4372, "step": 455470 }, { "epoch": 0.9074174423052404, "grad_norm": 0.17066513001918793, "learning_rate": 0.0005103020440371143, "loss": 2.442, "step": 455480 }, { "epoch": 0.9074373645288792, "grad_norm": 0.19406268000602722, "learning_rate": 0.0005101677946829046, "loss": 2.4424, "step": 455490 }, { "epoch": 0.9074572867525181, "grad_norm": 0.1998881846666336, "learning_rate": 0.000510033557424866, "loss": 2.4539, "step": 455500 }, { "epoch": 0.9074772089761571, "grad_norm": 0.19744274020195007, "learning_rate": 0.0005098993322597294, "loss": 2.4607, "step": 455510 }, { "epoch": 0.907497131199796, "grad_norm": 0.1779138296842575, "learning_rate": 0.0005097651191842274, "loss": 2.4461, "step": 455520 }, { "epoch": 0.9075170534234349, "grad_norm": 0.1925908476114273, "learning_rate": 0.0005096309181950936, "loss": 2.4447, "step": 455530 }, { "epoch": 0.9075369756470738, "grad_norm": 0.19528865814208984, "learning_rate": 0.0005094967292890633, "loss": 2.4697, "step": 455540 }, { "epoch": 0.9075568978707127, "grad_norm": 0.19221898913383484, "learning_rate": 0.0005093625524628734, "loss": 2.4311, "step": 455550 }, { "epoch": 0.9075768200943517, "grad_norm": 0.19315434992313385, "learning_rate": 0.0005092283877132622, "loss": 2.4382, "step": 455560 }, { "epoch": 0.9075967423179906, "grad_norm": 0.19877706468105316, "learning_rate": 0.0005090942350369691, "loss": 2.446, "step": 455570 }, { "epoch": 0.9076166645416295, "grad_norm": 0.20046871900558472, "learning_rate": 0.0005089600944307359, "loss": 2.4556, "step": 455580 }, { "epoch": 0.9076365867652684, "grad_norm": 0.20461054146289825, "learning_rate": 0.0005088259658913048, "loss": 2.4459, "step": 455590 }, { "epoch": 0.9076565089889073, "grad_norm": 0.1903323531150818, "learning_rate": 0.00050869184941542, "loss": 2.4579, "step": 455600 }, { "epoch": 0.9076764312125463, "grad_norm": 0.19689354300498962, "learning_rate": 0.0005085577449998273, "loss": 2.4696, "step": 455610 }, { "epoch": 0.9076963534361852, "grad_norm": 0.20984701812267303, "learning_rate": 0.0005084236526412735, "loss": 2.4331, "step": 455620 }, { "epoch": 0.907716275659824, "grad_norm": 0.19021384418010712, "learning_rate": 0.0005082895723365073, "loss": 2.4414, "step": 455630 }, { "epoch": 0.9077361978834629, "grad_norm": 0.18058112263679504, "learning_rate": 0.0005081555040822788, "loss": 2.4474, "step": 455640 }, { "epoch": 0.9077561201071018, "grad_norm": 0.19666944444179535, "learning_rate": 0.0005080214478753389, "loss": 2.4564, "step": 455650 }, { "epoch": 0.9077760423307408, "grad_norm": 0.20851299166679382, "learning_rate": 0.000507887403712441, "loss": 2.4516, "step": 455660 }, { "epoch": 0.9077959645543797, "grad_norm": 0.20824553072452545, "learning_rate": 0.0005077533715903393, "loss": 2.4479, "step": 455670 }, { "epoch": 0.9078158867780186, "grad_norm": 0.1849086582660675, "learning_rate": 0.0005076193515057896, "loss": 2.4529, "step": 455680 }, { "epoch": 0.9078358090016575, "grad_norm": 0.19237300753593445, "learning_rate": 0.0005074853434555493, "loss": 2.4395, "step": 455690 }, { "epoch": 0.9078557312252964, "grad_norm": 0.19277356564998627, "learning_rate": 0.0005073513474363767, "loss": 2.4501, "step": 455700 }, { "epoch": 0.9078756534489354, "grad_norm": 0.18934959173202515, "learning_rate": 0.0005072173634450326, "loss": 2.4529, "step": 455710 }, { "epoch": 0.9078955756725743, "grad_norm": 0.21116162836551666, "learning_rate": 0.0005070833914782781, "loss": 2.4499, "step": 455720 }, { "epoch": 0.9079154978962132, "grad_norm": 0.20493677258491516, "learning_rate": 0.0005069494315328766, "loss": 2.4494, "step": 455730 }, { "epoch": 0.9079354201198521, "grad_norm": 0.2057669311761856, "learning_rate": 0.0005068154836055927, "loss": 2.4537, "step": 455740 }, { "epoch": 0.907955342343491, "grad_norm": 0.2004365473985672, "learning_rate": 0.000506681547693192, "loss": 2.4512, "step": 455750 }, { "epoch": 0.90797526456713, "grad_norm": 0.20927584171295166, "learning_rate": 0.0005065476237924425, "loss": 2.4281, "step": 455760 }, { "epoch": 0.9079951867907688, "grad_norm": 0.18989339470863342, "learning_rate": 0.0005064137119001127, "loss": 2.4404, "step": 455770 }, { "epoch": 0.9080151090144077, "grad_norm": 0.18887054920196533, "learning_rate": 0.0005062798120129728, "loss": 2.4367, "step": 455780 }, { "epoch": 0.9080350312380466, "grad_norm": 0.5101431012153625, "learning_rate": 0.0005061459241277951, "loss": 2.432, "step": 455790 }, { "epoch": 0.9080549534616856, "grad_norm": 0.194184347987175, "learning_rate": 0.0005060120482413521, "loss": 2.4565, "step": 455800 }, { "epoch": 0.9080748756853245, "grad_norm": 0.19419199228286743, "learning_rate": 0.0005058781843504192, "loss": 2.453, "step": 455810 }, { "epoch": 0.9080947979089634, "grad_norm": 0.19204646348953247, "learning_rate": 0.0005057443324517721, "loss": 2.4421, "step": 455820 }, { "epoch": 0.9081147201326023, "grad_norm": 0.21863429248332977, "learning_rate": 0.0005056104925421888, "loss": 2.4556, "step": 455830 }, { "epoch": 0.9081346423562412, "grad_norm": 0.21223986148834229, "learning_rate": 0.0005054766646184477, "loss": 2.4453, "step": 455840 }, { "epoch": 0.9081545645798802, "grad_norm": 0.20749114453792572, "learning_rate": 0.0005053428486773297, "loss": 2.445, "step": 455850 }, { "epoch": 0.9081744868035191, "grad_norm": 0.19657933712005615, "learning_rate": 0.0005052090447156164, "loss": 2.4487, "step": 455860 }, { "epoch": 0.908194409027158, "grad_norm": 0.20628619194030762, "learning_rate": 0.0005050752527300914, "loss": 2.4393, "step": 455870 }, { "epoch": 0.9082143312507969, "grad_norm": 0.1842554360628128, "learning_rate": 0.0005049414727175394, "loss": 2.4636, "step": 455880 }, { "epoch": 0.9082342534744358, "grad_norm": 0.19151897728443146, "learning_rate": 0.0005048077046747465, "loss": 2.4487, "step": 455890 }, { "epoch": 0.9082541756980748, "grad_norm": 0.20849889516830444, "learning_rate": 0.0005046739485985004, "loss": 2.4485, "step": 455900 }, { "epoch": 0.9082740979217137, "grad_norm": 0.6942219138145447, "learning_rate": 0.0005045402044855903, "loss": 2.4578, "step": 455910 }, { "epoch": 0.9082940201453525, "grad_norm": 0.2210046648979187, "learning_rate": 0.0005044064723328065, "loss": 2.4467, "step": 455920 }, { "epoch": 0.9083139423689914, "grad_norm": 0.2087530493736267, "learning_rate": 0.0005042727521369412, "loss": 2.4442, "step": 455930 }, { "epoch": 0.9083338645926303, "grad_norm": 0.20477503538131714, "learning_rate": 0.0005041390438947877, "loss": 2.4441, "step": 455940 }, { "epoch": 0.9083537868162693, "grad_norm": 0.20665882527828217, "learning_rate": 0.0005040053476031408, "loss": 2.4256, "step": 455950 }, { "epoch": 0.9083737090399082, "grad_norm": 0.20350240170955658, "learning_rate": 0.0005038716632587965, "loss": 2.4512, "step": 455960 }, { "epoch": 0.9083936312635471, "grad_norm": 0.18071259558200836, "learning_rate": 0.0005037379908585528, "loss": 2.4485, "step": 455970 }, { "epoch": 0.908413553487186, "grad_norm": 0.17120754718780518, "learning_rate": 0.000503604330399209, "loss": 2.446, "step": 455980 }, { "epoch": 0.9084334757108249, "grad_norm": 0.18771860003471375, "learning_rate": 0.0005034706818775652, "loss": 2.4515, "step": 455990 }, { "epoch": 0.9084533979344639, "grad_norm": 0.21000796556472778, "learning_rate": 0.0005033370452904234, "loss": 2.4482, "step": 456000 }, { "epoch": 0.9084733201581028, "grad_norm": 0.18567243218421936, "learning_rate": 0.0005032034206345872, "loss": 2.4379, "step": 456010 }, { "epoch": 0.9084932423817417, "grad_norm": 0.1885126233100891, "learning_rate": 0.0005030698079068616, "loss": 2.4431, "step": 456020 }, { "epoch": 0.9085131646053806, "grad_norm": 0.18478184938430786, "learning_rate": 0.0005029362071040526, "loss": 2.4484, "step": 456030 }, { "epoch": 0.9085330868290195, "grad_norm": 0.24290014803409576, "learning_rate": 0.0005028026182229679, "loss": 2.4208, "step": 456040 }, { "epoch": 0.9085530090526585, "grad_norm": 0.2295360565185547, "learning_rate": 0.0005026690412604167, "loss": 2.4484, "step": 456050 }, { "epoch": 0.9085729312762973, "grad_norm": 0.22304163873195648, "learning_rate": 0.0005025354762132093, "loss": 2.4578, "step": 456060 }, { "epoch": 0.9085928534999362, "grad_norm": 0.19837726652622223, "learning_rate": 0.0005024019230781578, "loss": 2.4408, "step": 456070 }, { "epoch": 0.9086127757235751, "grad_norm": 0.19741985201835632, "learning_rate": 0.0005022683818520757, "loss": 2.4505, "step": 456080 }, { "epoch": 0.9086326979472141, "grad_norm": 0.20775729417800903, "learning_rate": 0.0005021348525317775, "loss": 2.4505, "step": 456090 }, { "epoch": 0.908652620170853, "grad_norm": 0.202039897441864, "learning_rate": 0.0005020013351140796, "loss": 2.4585, "step": 456100 }, { "epoch": 0.9086725423944919, "grad_norm": 0.22946327924728394, "learning_rate": 0.0005018678295957996, "loss": 2.4413, "step": 456110 }, { "epoch": 0.9086924646181308, "grad_norm": 0.2100880742073059, "learning_rate": 0.0005017343359737565, "loss": 2.4341, "step": 456120 }, { "epoch": 0.9087123868417697, "grad_norm": 0.19673091173171997, "learning_rate": 0.000501600854244771, "loss": 2.4527, "step": 456130 }, { "epoch": 0.9087323090654087, "grad_norm": 0.18386802077293396, "learning_rate": 0.0005014673844056648, "loss": 2.445, "step": 456140 }, { "epoch": 0.9087522312890476, "grad_norm": 0.196580171585083, "learning_rate": 0.0005013339264532609, "loss": 2.4439, "step": 456150 }, { "epoch": 0.9087721535126865, "grad_norm": 0.18716727197170258, "learning_rate": 0.0005012004803843843, "loss": 2.4446, "step": 456160 }, { "epoch": 0.9087920757363254, "grad_norm": 0.18198123574256897, "learning_rate": 0.0005010670461958613, "loss": 2.4681, "step": 456170 }, { "epoch": 0.9088119979599643, "grad_norm": 0.24877074360847473, "learning_rate": 0.0005009336238845192, "loss": 2.4304, "step": 456180 }, { "epoch": 0.9088319201836033, "grad_norm": 0.19734607636928558, "learning_rate": 0.0005008002134471871, "loss": 2.4311, "step": 456190 }, { "epoch": 0.9088518424072421, "grad_norm": 0.18983043730258942, "learning_rate": 0.000500666814880695, "loss": 2.4333, "step": 456200 }, { "epoch": 0.908871764630881, "grad_norm": 0.2106146663427353, "learning_rate": 0.000500533428181875, "loss": 2.4612, "step": 456210 }, { "epoch": 0.9088916868545199, "grad_norm": 0.20865477621555328, "learning_rate": 0.0005004000533475603, "loss": 2.453, "step": 456220 }, { "epoch": 0.9089116090781588, "grad_norm": 0.1904890537261963, "learning_rate": 0.0005002666903745854, "loss": 2.4356, "step": 456230 }, { "epoch": 0.9089315313017978, "grad_norm": 0.20854078233242035, "learning_rate": 0.000500133339259786, "loss": 2.448, "step": 456240 }, { "epoch": 0.9089514535254367, "grad_norm": 0.20848818123340607, "learning_rate": 0.0005, "loss": 2.4503, "step": 456250 }, { "epoch": 0.9089713757490756, "grad_norm": 0.2014981210231781, "learning_rate": 0.000499866672592066, "loss": 2.4353, "step": 456260 }, { "epoch": 0.9089912979727145, "grad_norm": 0.20699387788772583, "learning_rate": 0.0004997333570328239, "loss": 2.4375, "step": 456270 }, { "epoch": 0.9090112201963534, "grad_norm": 0.1922094225883484, "learning_rate": 0.0004996000533191158, "loss": 2.4452, "step": 456280 }, { "epoch": 0.9090311424199924, "grad_norm": 0.21400675177574158, "learning_rate": 0.0004994667614477846, "loss": 2.4568, "step": 456290 }, { "epoch": 0.9090510646436313, "grad_norm": 0.282990038394928, "learning_rate": 0.0004993334814156745, "loss": 2.4578, "step": 456300 }, { "epoch": 0.9090709868672702, "grad_norm": 0.22079722583293915, "learning_rate": 0.0004992002132196314, "loss": 2.4381, "step": 456310 }, { "epoch": 0.9090909090909091, "grad_norm": 0.19879096746444702, "learning_rate": 0.0004990669568565025, "loss": 2.4185, "step": 456320 }, { "epoch": 0.909110831314548, "grad_norm": 0.18813058733940125, "learning_rate": 0.0004989337123231366, "loss": 2.446, "step": 456330 }, { "epoch": 0.909130753538187, "grad_norm": 0.18096096813678741, "learning_rate": 0.0004988004796163836, "loss": 2.4536, "step": 456340 }, { "epoch": 0.9091506757618258, "grad_norm": 0.1957252323627472, "learning_rate": 0.0004986672587330949, "loss": 2.4511, "step": 456350 }, { "epoch": 0.9091705979854647, "grad_norm": 0.19277578592300415, "learning_rate": 0.0004985340496701232, "loss": 2.4375, "step": 456360 }, { "epoch": 0.9091905202091036, "grad_norm": 0.2150311917066574, "learning_rate": 0.0004984008524243228, "loss": 2.436, "step": 456370 }, { "epoch": 0.9092104424327426, "grad_norm": 0.20555515587329865, "learning_rate": 0.0004982676669925497, "loss": 2.45, "step": 456380 }, { "epoch": 0.9092303646563815, "grad_norm": 0.22676898539066315, "learning_rate": 0.0004981344933716603, "loss": 2.455, "step": 456390 }, { "epoch": 0.9092502868800204, "grad_norm": 0.2021336853504181, "learning_rate": 0.000498001331558513, "loss": 2.4536, "step": 456400 }, { "epoch": 0.9092702091036593, "grad_norm": 0.18970942497253418, "learning_rate": 0.0004978681815499679, "loss": 2.4373, "step": 456410 }, { "epoch": 0.9092901313272982, "grad_norm": 0.20828741788864136, "learning_rate": 0.0004977350433428863, "loss": 2.4468, "step": 456420 }, { "epoch": 0.9093100535509372, "grad_norm": 0.20498502254486084, "learning_rate": 0.0004976019169341302, "loss": 2.4442, "step": 456430 }, { "epoch": 0.9093299757745761, "grad_norm": 0.1935032159090042, "learning_rate": 0.000497468802320564, "loss": 2.4482, "step": 456440 }, { "epoch": 0.909349897998215, "grad_norm": 0.2346823364496231, "learning_rate": 0.0004973356994990532, "loss": 2.4404, "step": 456450 }, { "epoch": 0.9093698202218539, "grad_norm": 0.23443259298801422, "learning_rate": 0.000497202608466464, "loss": 2.4402, "step": 456460 }, { "epoch": 0.9093897424454928, "grad_norm": 0.1981040984392166, "learning_rate": 0.0004970695292196648, "loss": 2.44, "step": 456470 }, { "epoch": 0.9094096646691318, "grad_norm": 0.18361340463161469, "learning_rate": 0.0004969364617555252, "loss": 2.4348, "step": 456480 }, { "epoch": 0.9094295868927706, "grad_norm": 0.1899949461221695, "learning_rate": 0.0004968034060709159, "loss": 2.4606, "step": 456490 }, { "epoch": 0.9094495091164095, "grad_norm": 0.21587294340133667, "learning_rate": 0.0004966703621627094, "loss": 2.4535, "step": 456500 }, { "epoch": 0.9094694313400484, "grad_norm": 0.20438504219055176, "learning_rate": 0.0004965373300277789, "loss": 2.4393, "step": 456510 }, { "epoch": 0.9094893535636873, "grad_norm": 0.19158031046390533, "learning_rate": 0.0004964043096629998, "loss": 2.4334, "step": 456520 }, { "epoch": 0.9095092757873263, "grad_norm": 0.21856240928173065, "learning_rate": 0.0004962713010652486, "loss": 2.432, "step": 456530 }, { "epoch": 0.9095291980109652, "grad_norm": 0.19665728509426117, "learning_rate": 0.0004961383042314031, "loss": 2.4459, "step": 456540 }, { "epoch": 0.9095491202346041, "grad_norm": 0.19098366796970367, "learning_rate": 0.0004960053191583422, "loss": 2.4574, "step": 456550 }, { "epoch": 0.909569042458243, "grad_norm": 0.20208312571048737, "learning_rate": 0.0004958723458429468, "loss": 2.4582, "step": 456560 }, { "epoch": 0.9095889646818819, "grad_norm": 0.19607411324977875, "learning_rate": 0.0004957393842820986, "loss": 2.4551, "step": 456570 }, { "epoch": 0.9096088869055209, "grad_norm": 0.19822873175144196, "learning_rate": 0.0004956064344726809, "loss": 2.4472, "step": 456580 }, { "epoch": 0.9096288091291598, "grad_norm": 0.20407478511333466, "learning_rate": 0.0004954734964115788, "loss": 2.453, "step": 456590 }, { "epoch": 0.9096487313527987, "grad_norm": 0.19272871315479279, "learning_rate": 0.000495340570095678, "loss": 2.4438, "step": 456600 }, { "epoch": 0.9096686535764376, "grad_norm": 0.19873815774917603, "learning_rate": 0.0004952076555218656, "loss": 2.449, "step": 456610 }, { "epoch": 0.9096885758000764, "grad_norm": 0.22774598002433777, "learning_rate": 0.0004950747526870312, "loss": 2.4504, "step": 456620 }, { "epoch": 0.9097084980237155, "grad_norm": 0.19387906789779663, "learning_rate": 0.0004949418615880647, "loss": 2.4467, "step": 456630 }, { "epoch": 0.9097284202473543, "grad_norm": 0.18057872354984283, "learning_rate": 0.0004948089822218576, "loss": 2.4121, "step": 456640 }, { "epoch": 0.9097483424709932, "grad_norm": 0.20722758769989014, "learning_rate": 0.0004946761145853029, "loss": 2.4411, "step": 456650 }, { "epoch": 0.9097682646946321, "grad_norm": 0.2032184898853302, "learning_rate": 0.0004945432586752949, "loss": 2.438, "step": 456660 }, { "epoch": 0.9097881869182711, "grad_norm": 0.21136152744293213, "learning_rate": 0.0004944104144887294, "loss": 2.4307, "step": 456670 }, { "epoch": 0.90980810914191, "grad_norm": 0.20149579644203186, "learning_rate": 0.0004942775820225032, "loss": 2.4485, "step": 456680 }, { "epoch": 0.9098280313655489, "grad_norm": 0.18820388615131378, "learning_rate": 0.0004941447612735148, "loss": 2.4466, "step": 456690 }, { "epoch": 0.9098479535891878, "grad_norm": 0.20934219658374786, "learning_rate": 0.0004940119522386642, "loss": 2.4371, "step": 456700 }, { "epoch": 0.9098678758128267, "grad_norm": 0.2084777057170868, "learning_rate": 0.000493879154914852, "loss": 2.4499, "step": 456710 }, { "epoch": 0.9098877980364657, "grad_norm": 0.20136025547981262, "learning_rate": 0.0004937463692989815, "loss": 2.4461, "step": 456720 }, { "epoch": 0.9099077202601046, "grad_norm": 0.22235547006130219, "learning_rate": 0.0004936135953879561, "loss": 2.4496, "step": 456730 }, { "epoch": 0.9099276424837435, "grad_norm": 0.19039349257946014, "learning_rate": 0.0004934808331786813, "loss": 2.4511, "step": 456740 }, { "epoch": 0.9099475647073824, "grad_norm": 0.20442914962768555, "learning_rate": 0.0004933480826680636, "loss": 2.4458, "step": 456750 }, { "epoch": 0.9099674869310213, "grad_norm": 0.21554796397686005, "learning_rate": 0.0004932153438530109, "loss": 2.4515, "step": 456760 }, { "epoch": 0.9099874091546603, "grad_norm": 0.1841897964477539, "learning_rate": 0.0004930826167304327, "loss": 2.451, "step": 456770 }, { "epoch": 0.9100073313782991, "grad_norm": 0.21675194799900055, "learning_rate": 0.0004929499012972397, "loss": 2.4452, "step": 456780 }, { "epoch": 0.910027253601938, "grad_norm": 0.23348021507263184, "learning_rate": 0.0004928171975503437, "loss": 2.4585, "step": 456790 }, { "epoch": 0.9100471758255769, "grad_norm": 0.20150533318519592, "learning_rate": 0.0004926845054866582, "loss": 2.4463, "step": 456800 }, { "epoch": 0.9100670980492158, "grad_norm": 0.17517217993736267, "learning_rate": 0.0004925518251030983, "loss": 2.4539, "step": 456810 }, { "epoch": 0.9100870202728548, "grad_norm": 0.22428759932518005, "learning_rate": 0.0004924191563965798, "loss": 2.4561, "step": 456820 }, { "epoch": 0.9101069424964937, "grad_norm": 0.1949317753314972, "learning_rate": 0.0004922864993640204, "loss": 2.4417, "step": 456830 }, { "epoch": 0.9101268647201326, "grad_norm": 0.20076411962509155, "learning_rate": 0.000492153854002339, "loss": 2.4456, "step": 456840 }, { "epoch": 0.9101467869437715, "grad_norm": 0.19879640638828278, "learning_rate": 0.0004920212203084553, "loss": 2.4594, "step": 456850 }, { "epoch": 0.9101667091674104, "grad_norm": 0.20188701152801514, "learning_rate": 0.0004918885982792916, "loss": 2.4439, "step": 456860 }, { "epoch": 0.9101866313910494, "grad_norm": 0.19413423538208008, "learning_rate": 0.0004917559879117704, "loss": 2.4458, "step": 456870 }, { "epoch": 0.9102065536146883, "grad_norm": 0.21087364852428436, "learning_rate": 0.0004916233892028159, "loss": 2.431, "step": 456880 }, { "epoch": 0.9102264758383272, "grad_norm": 0.2161530703306198, "learning_rate": 0.0004914908021493538, "loss": 2.4328, "step": 456890 }, { "epoch": 0.9102463980619661, "grad_norm": 0.22402949631214142, "learning_rate": 0.0004913582267483112, "loss": 2.439, "step": 456900 }, { "epoch": 0.910266320285605, "grad_norm": 0.2003614753484726, "learning_rate": 0.0004912256629966163, "loss": 2.4449, "step": 456910 }, { "epoch": 0.910286242509244, "grad_norm": 0.19521501660346985, "learning_rate": 0.0004910931108911986, "loss": 2.4395, "step": 456920 }, { "epoch": 0.9103061647328828, "grad_norm": 0.19664403796195984, "learning_rate": 0.0004909605704289897, "loss": 2.4507, "step": 456930 }, { "epoch": 0.9103260869565217, "grad_norm": 0.19821931421756744, "learning_rate": 0.0004908280416069215, "loss": 2.4342, "step": 456940 }, { "epoch": 0.9103460091801606, "grad_norm": 0.22202761471271515, "learning_rate": 0.0004906955244219275, "loss": 2.4421, "step": 456950 }, { "epoch": 0.9103659314037996, "grad_norm": 0.1866215318441391, "learning_rate": 0.0004905630188709434, "loss": 2.4545, "step": 456960 }, { "epoch": 0.9103858536274385, "grad_norm": 0.22344394028186798, "learning_rate": 0.0004904305249509052, "loss": 2.4471, "step": 456970 }, { "epoch": 0.9104057758510774, "grad_norm": 0.21917982399463654, "learning_rate": 0.0004902980426587505, "loss": 2.4542, "step": 456980 }, { "epoch": 0.9104256980747163, "grad_norm": 0.21404410898685455, "learning_rate": 0.0004901655719914186, "loss": 2.4343, "step": 456990 }, { "epoch": 0.9104456202983552, "grad_norm": 0.19368083775043488, "learning_rate": 0.0004900331129458502, "loss": 2.4593, "step": 457000 }, { "epoch": 0.9104655425219942, "grad_norm": 0.19397316873073578, "learning_rate": 0.0004899006655189864, "loss": 2.4526, "step": 457010 }, { "epoch": 0.9104854647456331, "grad_norm": 0.1863994151353836, "learning_rate": 0.000489768229707771, "loss": 2.45, "step": 457020 }, { "epoch": 0.910505386969272, "grad_norm": 0.20291295647621155, "learning_rate": 0.0004896358055091481, "loss": 2.4475, "step": 457030 }, { "epoch": 0.9105253091929109, "grad_norm": 0.18848532438278198, "learning_rate": 0.0004895033929200634, "loss": 2.4443, "step": 457040 }, { "epoch": 0.9105452314165497, "grad_norm": 0.20200079679489136, "learning_rate": 0.0004893709919374645, "loss": 2.4512, "step": 457050 }, { "epoch": 0.9105651536401888, "grad_norm": 0.21304376423358917, "learning_rate": 0.0004892386025582993, "loss": 2.4443, "step": 457060 }, { "epoch": 0.9105850758638276, "grad_norm": 0.20110194385051727, "learning_rate": 0.0004891062247795181, "loss": 2.4372, "step": 457070 }, { "epoch": 0.9106049980874665, "grad_norm": 0.19373875856399536, "learning_rate": 0.0004889738585980718, "loss": 2.4419, "step": 457080 }, { "epoch": 0.9106249203111054, "grad_norm": 0.20282219350337982, "learning_rate": 0.0004888415040109129, "loss": 2.4533, "step": 457090 }, { "epoch": 0.9106448425347443, "grad_norm": 0.18160170316696167, "learning_rate": 0.0004887091610149952, "loss": 2.4265, "step": 457100 }, { "epoch": 0.9106647647583833, "grad_norm": 0.20966015756130219, "learning_rate": 0.0004885768296072737, "loss": 2.4474, "step": 457110 }, { "epoch": 0.9106846869820222, "grad_norm": 0.19751198589801788, "learning_rate": 0.0004884445097847052, "loss": 2.4262, "step": 457120 }, { "epoch": 0.9107046092056611, "grad_norm": 0.20351524651050568, "learning_rate": 0.0004883122015442474, "loss": 2.4517, "step": 457130 }, { "epoch": 0.9107245314293, "grad_norm": 0.18975646793842316, "learning_rate": 0.00048817990488285944, "loss": 2.4526, "step": 457140 }, { "epoch": 0.9107444536529389, "grad_norm": 0.19501197338104248, "learning_rate": 0.00048804761979750165, "loss": 2.4243, "step": 457150 }, { "epoch": 0.9107643758765779, "grad_norm": 0.19973090291023254, "learning_rate": 0.000487915346285136, "loss": 2.4403, "step": 457160 }, { "epoch": 0.9107842981002168, "grad_norm": 0.23004667460918427, "learning_rate": 0.00048778308434272577, "loss": 2.4497, "step": 457170 }, { "epoch": 0.9108042203238557, "grad_norm": 0.2187279313802719, "learning_rate": 0.00048765083396723496, "loss": 2.4495, "step": 457180 }, { "epoch": 0.9108241425474946, "grad_norm": 0.2056492120027542, "learning_rate": 0.0004875185951556298, "loss": 2.4493, "step": 457190 }, { "epoch": 0.9108440647711334, "grad_norm": 0.28386810421943665, "learning_rate": 0.0004873863679048771, "loss": 2.4428, "step": 457200 }, { "epoch": 0.9108639869947724, "grad_norm": 0.20719113945960999, "learning_rate": 0.00048725415221194537, "loss": 2.4393, "step": 457210 }, { "epoch": 0.9108839092184113, "grad_norm": 0.19248147308826447, "learning_rate": 0.0004871219480738045, "loss": 2.4344, "step": 457220 }, { "epoch": 0.9109038314420502, "grad_norm": 0.18774184584617615, "learning_rate": 0.0004869897554874256, "loss": 2.4447, "step": 457230 }, { "epoch": 0.9109237536656891, "grad_norm": 0.20814503729343414, "learning_rate": 0.000486857574449781, "loss": 2.4379, "step": 457240 }, { "epoch": 0.910943675889328, "grad_norm": 0.2067018300294876, "learning_rate": 0.00048672540495784447, "loss": 2.442, "step": 457250 }, { "epoch": 0.910963598112967, "grad_norm": 0.22973749041557312, "learning_rate": 0.000486593247008591, "loss": 2.4507, "step": 457260 }, { "epoch": 0.9109835203366059, "grad_norm": 0.20736679434776306, "learning_rate": 0.00048646110059899695, "loss": 2.4343, "step": 457270 }, { "epoch": 0.9110034425602448, "grad_norm": 0.1922300159931183, "learning_rate": 0.0004863289657260399, "loss": 2.4409, "step": 457280 }, { "epoch": 0.9110233647838837, "grad_norm": 0.18948994576931, "learning_rate": 0.00048619684238669934, "loss": 2.4416, "step": 457290 }, { "epoch": 0.9110432870075227, "grad_norm": 0.19804257154464722, "learning_rate": 0.0004860647305779551, "loss": 2.4361, "step": 457300 }, { "epoch": 0.9110632092311616, "grad_norm": 0.20523189008235931, "learning_rate": 0.0004859326302967888, "loss": 2.4407, "step": 457310 }, { "epoch": 0.9110831314548005, "grad_norm": 0.19792954623699188, "learning_rate": 0.00048580054154018405, "loss": 2.4533, "step": 457320 }, { "epoch": 0.9111030536784394, "grad_norm": 0.21013613045215607, "learning_rate": 0.0004856684643051246, "loss": 2.4573, "step": 457330 }, { "epoch": 0.9111229759020782, "grad_norm": 0.1956992745399475, "learning_rate": 0.0004855363985885959, "loss": 2.445, "step": 457340 }, { "epoch": 0.9111428981257172, "grad_norm": 0.20122645795345306, "learning_rate": 0.0004854043443875853, "loss": 2.4494, "step": 457350 }, { "epoch": 0.9111628203493561, "grad_norm": 0.20255939662456512, "learning_rate": 0.000485272301699081, "loss": 2.4453, "step": 457360 }, { "epoch": 0.911182742572995, "grad_norm": 0.1882673054933548, "learning_rate": 0.0004851402705200723, "loss": 2.454, "step": 457370 }, { "epoch": 0.9112026647966339, "grad_norm": 0.22774411737918854, "learning_rate": 0.00048500825084754997, "loss": 2.4476, "step": 457380 }, { "epoch": 0.9112225870202728, "grad_norm": 0.2150030881166458, "learning_rate": 0.0004848762426785065, "loss": 2.4382, "step": 457390 }, { "epoch": 0.9112425092439118, "grad_norm": 0.21442218124866486, "learning_rate": 0.00048474424600993514, "loss": 2.4347, "step": 457400 }, { "epoch": 0.9112624314675507, "grad_norm": 0.2621428966522217, "learning_rate": 0.0004846122608388306, "loss": 2.4381, "step": 457410 }, { "epoch": 0.9112823536911896, "grad_norm": 0.2124066948890686, "learning_rate": 0.0004844802871621894, "loss": 2.4509, "step": 457420 }, { "epoch": 0.9113022759148285, "grad_norm": 0.2001834213733673, "learning_rate": 0.0004843483249770084, "loss": 2.4444, "step": 457430 }, { "epoch": 0.9113221981384674, "grad_norm": 0.21292448043823242, "learning_rate": 0.00048421637428028676, "loss": 2.4453, "step": 457440 }, { "epoch": 0.9113421203621064, "grad_norm": 0.19470269978046417, "learning_rate": 0.0004840844350690241, "loss": 2.4496, "step": 457450 }, { "epoch": 0.9113620425857453, "grad_norm": 0.2397078275680542, "learning_rate": 0.00048395250734022176, "loss": 2.4356, "step": 457460 }, { "epoch": 0.9113819648093842, "grad_norm": 0.2104749232530594, "learning_rate": 0.0004838205910908828, "loss": 2.4356, "step": 457470 }, { "epoch": 0.911401887033023, "grad_norm": 0.20174291729927063, "learning_rate": 0.0004836886863180108, "loss": 2.4489, "step": 457480 }, { "epoch": 0.9114218092566619, "grad_norm": 0.20775499939918518, "learning_rate": 0.0004835567930186109, "loss": 2.4512, "step": 457490 }, { "epoch": 0.9114417314803009, "grad_norm": 0.21215899288654327, "learning_rate": 0.00048342491118969, "loss": 2.4332, "step": 457500 }, { "epoch": 0.9114616537039398, "grad_norm": 0.20545272529125214, "learning_rate": 0.0004832930408282554, "loss": 2.4235, "step": 457510 }, { "epoch": 0.9114815759275787, "grad_norm": 0.19743332266807556, "learning_rate": 0.00048316118193131666, "loss": 2.4423, "step": 457520 }, { "epoch": 0.9115014981512176, "grad_norm": 0.19023464620113373, "learning_rate": 0.0004830293344958843, "loss": 2.4372, "step": 457530 }, { "epoch": 0.9115214203748565, "grad_norm": 0.19394612312316895, "learning_rate": 0.0004828974985189696, "loss": 2.4363, "step": 457540 }, { "epoch": 0.9115413425984955, "grad_norm": 0.19607539474964142, "learning_rate": 0.00048276567399758584, "loss": 2.4519, "step": 457550 }, { "epoch": 0.9115612648221344, "grad_norm": 0.18673649430274963, "learning_rate": 0.0004826338609287473, "loss": 2.4423, "step": 457560 }, { "epoch": 0.9115811870457733, "grad_norm": 0.2197742909193039, "learning_rate": 0.00048250205930946977, "loss": 2.4571, "step": 457570 }, { "epoch": 0.9116011092694122, "grad_norm": 0.2206367701292038, "learning_rate": 0.00048237026913677, "loss": 2.4387, "step": 457580 }, { "epoch": 0.9116210314930512, "grad_norm": 0.19474568963050842, "learning_rate": 0.0004822384904076662, "loss": 2.4282, "step": 457590 }, { "epoch": 0.9116409537166901, "grad_norm": 0.21844874322414398, "learning_rate": 0.00048210672311917803, "loss": 2.4374, "step": 457600 }, { "epoch": 0.911660875940329, "grad_norm": 0.21794117987155914, "learning_rate": 0.000481974967268326, "loss": 2.4564, "step": 457610 }, { "epoch": 0.9116807981639679, "grad_norm": 0.19543153047561646, "learning_rate": 0.00048184322285213234, "loss": 2.4574, "step": 457620 }, { "epoch": 0.9117007203876067, "grad_norm": 0.19414758682250977, "learning_rate": 0.0004817114898676207, "loss": 2.4383, "step": 457630 }, { "epoch": 0.9117206426112457, "grad_norm": 0.19513925909996033, "learning_rate": 0.0004815797683118155, "loss": 2.4352, "step": 457640 }, { "epoch": 0.9117405648348846, "grad_norm": 0.2224668562412262, "learning_rate": 0.00048144805818174267, "loss": 2.4534, "step": 457650 }, { "epoch": 0.9117604870585235, "grad_norm": 0.20684902369976044, "learning_rate": 0.00048131635947442963, "loss": 2.4345, "step": 457660 }, { "epoch": 0.9117804092821624, "grad_norm": 0.1961706578731537, "learning_rate": 0.0004811846721869049, "loss": 2.4582, "step": 457670 }, { "epoch": 0.9118003315058013, "grad_norm": 0.19484220445156097, "learning_rate": 0.00048105299631619804, "loss": 2.4544, "step": 457680 }, { "epoch": 0.9118202537294403, "grad_norm": 0.19189044833183289, "learning_rate": 0.0004809213318593406, "loss": 2.4448, "step": 457690 }, { "epoch": 0.9118401759530792, "grad_norm": 0.19381354749202728, "learning_rate": 0.00048078967881336453, "loss": 2.4494, "step": 457700 }, { "epoch": 0.9118600981767181, "grad_norm": 0.1940164715051651, "learning_rate": 0.000480658037175304, "loss": 2.4379, "step": 457710 }, { "epoch": 0.911880020400357, "grad_norm": 0.20409059524536133, "learning_rate": 0.00048052640694219376, "loss": 2.4617, "step": 457720 }, { "epoch": 0.9118999426239959, "grad_norm": 0.20628570020198822, "learning_rate": 0.00048039478811107, "loss": 2.4386, "step": 457730 }, { "epoch": 0.9119198648476349, "grad_norm": 0.19612397253513336, "learning_rate": 0.00048026318067897035, "loss": 2.4381, "step": 457740 }, { "epoch": 0.9119397870712738, "grad_norm": 0.19107384979724884, "learning_rate": 0.0004801315846429337, "loss": 2.4524, "step": 457750 }, { "epoch": 0.9119597092949127, "grad_norm": 0.1811482310295105, "learning_rate": 0.00048, "loss": 2.435, "step": 457760 }, { "epoch": 0.9119796315185515, "grad_norm": 0.2101098597049713, "learning_rate": 0.00047986842674721086, "loss": 2.4377, "step": 457770 }, { "epoch": 0.9119995537421904, "grad_norm": 0.2043110430240631, "learning_rate": 0.00047973686488160874, "loss": 2.445, "step": 457780 }, { "epoch": 0.9120194759658294, "grad_norm": 0.19807815551757812, "learning_rate": 0.0004796053144002377, "loss": 2.4496, "step": 457790 }, { "epoch": 0.9120393981894683, "grad_norm": 0.19081075489521027, "learning_rate": 0.000479473775300143, "loss": 2.4458, "step": 457800 }, { "epoch": 0.9120593204131072, "grad_norm": 0.2054743468761444, "learning_rate": 0.00047934224757837106, "loss": 2.4513, "step": 457810 }, { "epoch": 0.9120792426367461, "grad_norm": 0.20463019609451294, "learning_rate": 0.00047921073123196977, "loss": 2.4374, "step": 457820 }, { "epoch": 0.912099164860385, "grad_norm": 0.2055499255657196, "learning_rate": 0.0004790792262579882, "loss": 2.4412, "step": 457830 }, { "epoch": 0.912119087084024, "grad_norm": 0.20219741761684418, "learning_rate": 0.0004789477326534766, "loss": 2.4454, "step": 457840 }, { "epoch": 0.9121390093076629, "grad_norm": 0.20509734749794006, "learning_rate": 0.0004788162504154865, "loss": 2.4476, "step": 457850 }, { "epoch": 0.9121589315313018, "grad_norm": 0.19372260570526123, "learning_rate": 0.0004786847795410709, "loss": 2.4539, "step": 457860 }, { "epoch": 0.9121788537549407, "grad_norm": 0.21722908318042755, "learning_rate": 0.000478553320027284, "loss": 2.4519, "step": 457870 }, { "epoch": 0.9121987759785797, "grad_norm": 0.20370925962924957, "learning_rate": 0.00047842187187118124, "loss": 2.441, "step": 457880 }, { "epoch": 0.9122186982022186, "grad_norm": 0.19697874784469604, "learning_rate": 0.0004782904350698194, "loss": 2.4394, "step": 457890 }, { "epoch": 0.9122386204258575, "grad_norm": 0.1902419477701187, "learning_rate": 0.00047815900962025617, "loss": 2.45, "step": 457900 }, { "epoch": 0.9122585426494964, "grad_norm": 0.21278759837150574, "learning_rate": 0.000478027595519551, "loss": 2.4521, "step": 457910 }, { "epoch": 0.9122784648731352, "grad_norm": 0.19867154955863953, "learning_rate": 0.00047789619276476406, "loss": 2.4361, "step": 457920 }, { "epoch": 0.9122983870967742, "grad_norm": 0.18820059299468994, "learning_rate": 0.0004777648013529579, "loss": 2.4532, "step": 457930 }, { "epoch": 0.9123183093204131, "grad_norm": 0.19729086756706238, "learning_rate": 0.0004776334212811948, "loss": 2.4409, "step": 457940 }, { "epoch": 0.912338231544052, "grad_norm": 0.2070324867963791, "learning_rate": 0.0004775020525465394, "loss": 2.4486, "step": 457950 }, { "epoch": 0.9123581537676909, "grad_norm": 0.20726525783538818, "learning_rate": 0.0004773706951460575, "loss": 2.4316, "step": 457960 }, { "epoch": 0.9123780759913298, "grad_norm": 0.17500914633274078, "learning_rate": 0.00047723934907681565, "loss": 2.4352, "step": 457970 }, { "epoch": 0.9123979982149688, "grad_norm": 0.18628083169460297, "learning_rate": 0.00047710801433588216, "loss": 2.4528, "step": 457980 }, { "epoch": 0.9124179204386077, "grad_norm": 0.18239270150661469, "learning_rate": 0.00047697669092032616, "loss": 2.4463, "step": 457990 }, { "epoch": 0.9124378426622466, "grad_norm": 0.22823466360569, "learning_rate": 0.00047684537882721845, "loss": 2.4465, "step": 458000 }, { "epoch": 0.9124577648858855, "grad_norm": 0.20135943591594696, "learning_rate": 0.000476714078053631, "loss": 2.4187, "step": 458010 }, { "epoch": 0.9124776871095244, "grad_norm": 0.2005084753036499, "learning_rate": 0.00047658278859663653, "loss": 2.4514, "step": 458020 }, { "epoch": 0.9124976093331634, "grad_norm": 0.20663833618164062, "learning_rate": 0.00047645151045331023, "loss": 2.4155, "step": 458030 }, { "epoch": 0.9125175315568023, "grad_norm": 0.20747555792331696, "learning_rate": 0.00047632024362072724, "loss": 2.4439, "step": 458040 }, { "epoch": 0.9125374537804412, "grad_norm": 0.20143572986125946, "learning_rate": 0.0004761889880959647, "loss": 2.4306, "step": 458050 }, { "epoch": 0.91255737600408, "grad_norm": 0.2114853858947754, "learning_rate": 0.0004760577438761009, "loss": 2.418, "step": 458060 }, { "epoch": 0.9125772982277189, "grad_norm": 0.18714694678783417, "learning_rate": 0.0004759265109582149, "loss": 2.4356, "step": 458070 }, { "epoch": 0.9125972204513579, "grad_norm": 0.19627171754837036, "learning_rate": 0.000475795289339388, "loss": 2.4462, "step": 458080 }, { "epoch": 0.9126171426749968, "grad_norm": 0.19350622594356537, "learning_rate": 0.0004756640790167017, "loss": 2.4592, "step": 458090 }, { "epoch": 0.9126370648986357, "grad_norm": 0.2146250605583191, "learning_rate": 0.00047553287998723957, "loss": 2.4434, "step": 458100 }, { "epoch": 0.9126569871222746, "grad_norm": 0.19253788888454437, "learning_rate": 0.0004754016922480859, "loss": 2.4343, "step": 458110 }, { "epoch": 0.9126769093459135, "grad_norm": 0.2077011615037918, "learning_rate": 0.0004752705157963266, "loss": 2.4592, "step": 458120 }, { "epoch": 0.9126968315695525, "grad_norm": 0.21903108060359955, "learning_rate": 0.00047513935062904844, "loss": 2.4393, "step": 458130 }, { "epoch": 0.9127167537931914, "grad_norm": 0.19984140992164612, "learning_rate": 0.00047500819674333995, "loss": 2.4392, "step": 458140 }, { "epoch": 0.9127366760168303, "grad_norm": 0.1983356773853302, "learning_rate": 0.00047487705413629035, "loss": 2.4494, "step": 458150 }, { "epoch": 0.9127565982404692, "grad_norm": 0.23893988132476807, "learning_rate": 0.0004747459228049904, "loss": 2.4292, "step": 458160 }, { "epoch": 0.9127765204641082, "grad_norm": 0.20754127204418182, "learning_rate": 0.00047461480274653243, "loss": 2.4404, "step": 458170 }, { "epoch": 0.9127964426877471, "grad_norm": 0.2011164277791977, "learning_rate": 0.0004744836939580095, "loss": 2.4554, "step": 458180 }, { "epoch": 0.912816364911386, "grad_norm": 0.21468497812747955, "learning_rate": 0.00047435259643651605, "loss": 2.446, "step": 458190 }, { "epoch": 0.9128362871350248, "grad_norm": 0.22039546072483063, "learning_rate": 0.00047422151017914804, "loss": 2.4458, "step": 458200 }, { "epoch": 0.9128562093586637, "grad_norm": 0.21094626188278198, "learning_rate": 0.0004740904351830022, "loss": 2.4281, "step": 458210 }, { "epoch": 0.9128761315823027, "grad_norm": 0.22182440757751465, "learning_rate": 0.0004739593714451766, "loss": 2.4356, "step": 458220 }, { "epoch": 0.9128960538059416, "grad_norm": 0.2009890377521515, "learning_rate": 0.00047382831896277143, "loss": 2.4358, "step": 458230 }, { "epoch": 0.9129159760295805, "grad_norm": 0.3286466896533966, "learning_rate": 0.00047369727773288696, "loss": 2.4469, "step": 458240 }, { "epoch": 0.9129358982532194, "grad_norm": 0.19737082719802856, "learning_rate": 0.00047356624775262524, "loss": 2.4295, "step": 458250 }, { "epoch": 0.9129558204768583, "grad_norm": 0.2086038440465927, "learning_rate": 0.0004734352290190893, "loss": 2.4513, "step": 458260 }, { "epoch": 0.9129757427004973, "grad_norm": 0.20594942569732666, "learning_rate": 0.0004733042215293841, "loss": 2.4391, "step": 458270 }, { "epoch": 0.9129956649241362, "grad_norm": 0.18465347588062286, "learning_rate": 0.00047317322528061493, "loss": 2.4363, "step": 458280 }, { "epoch": 0.9130155871477751, "grad_norm": 0.20537807047367096, "learning_rate": 0.00047304224026988885, "loss": 2.4375, "step": 458290 }, { "epoch": 0.913035509371414, "grad_norm": 0.20322185754776, "learning_rate": 0.00047291126649431404, "loss": 2.4417, "step": 458300 }, { "epoch": 0.9130554315950529, "grad_norm": 0.21495603024959564, "learning_rate": 0.0004727803039510001, "loss": 2.4354, "step": 458310 }, { "epoch": 0.9130753538186919, "grad_norm": 0.1852308064699173, "learning_rate": 0.0004726493526370572, "loss": 2.4338, "step": 458320 }, { "epoch": 0.9130952760423308, "grad_norm": 0.19125770032405853, "learning_rate": 0.00047251841254959806, "loss": 2.4435, "step": 458330 }, { "epoch": 0.9131151982659697, "grad_norm": 0.21513023972511292, "learning_rate": 0.00047238748368573513, "loss": 2.4348, "step": 458340 }, { "epoch": 0.9131351204896085, "grad_norm": 0.19230008125305176, "learning_rate": 0.00047225656604258325, "loss": 2.4531, "step": 458350 }, { "epoch": 0.9131550427132474, "grad_norm": 0.22201943397521973, "learning_rate": 0.0004721256596172576, "loss": 2.4328, "step": 458360 }, { "epoch": 0.9131749649368864, "grad_norm": 0.21055254340171814, "learning_rate": 0.0004719947644068754, "loss": 2.4345, "step": 458370 }, { "epoch": 0.9131948871605253, "grad_norm": 0.19470779597759247, "learning_rate": 0.00047186388040855465, "loss": 2.4544, "step": 458380 }, { "epoch": 0.9132148093841642, "grad_norm": 0.22374507784843445, "learning_rate": 0.0004717330076194146, "loss": 2.4383, "step": 458390 }, { "epoch": 0.9132347316078031, "grad_norm": 0.2208961695432663, "learning_rate": 0.000471602146036576, "loss": 2.4379, "step": 458400 }, { "epoch": 0.913254653831442, "grad_norm": 0.2110244333744049, "learning_rate": 0.00047147129565716054, "loss": 2.4551, "step": 458410 }, { "epoch": 0.913274576055081, "grad_norm": 0.20975421369075775, "learning_rate": 0.0004713404564782908, "loss": 2.4422, "step": 458420 }, { "epoch": 0.9132944982787199, "grad_norm": 0.19373305141925812, "learning_rate": 0.0004712096284970917, "loss": 2.4351, "step": 458430 }, { "epoch": 0.9133144205023588, "grad_norm": 0.2049371749162674, "learning_rate": 0.00047107881171068856, "loss": 2.4334, "step": 458440 }, { "epoch": 0.9133343427259977, "grad_norm": 0.21966470777988434, "learning_rate": 0.0004709480061162079, "loss": 2.437, "step": 458450 }, { "epoch": 0.9133542649496367, "grad_norm": 0.21507367491722107, "learning_rate": 0.00047081721171077787, "loss": 2.4373, "step": 458460 }, { "epoch": 0.9133741871732756, "grad_norm": 0.1902637630701065, "learning_rate": 0.0004706864284915275, "loss": 2.4238, "step": 458470 }, { "epoch": 0.9133941093969145, "grad_norm": 0.20826628804206848, "learning_rate": 0.00047055565645558704, "loss": 2.4479, "step": 458480 }, { "epoch": 0.9134140316205533, "grad_norm": 0.1907881200313568, "learning_rate": 0.0004704248956000885, "loss": 2.4542, "step": 458490 }, { "epoch": 0.9134339538441922, "grad_norm": 0.2319958209991455, "learning_rate": 0.00047029414592216455, "loss": 2.4512, "step": 458500 }, { "epoch": 0.9134538760678312, "grad_norm": 0.19046275317668915, "learning_rate": 0.0004701634074189494, "loss": 2.4346, "step": 458510 }, { "epoch": 0.9134737982914701, "grad_norm": 0.19690720736980438, "learning_rate": 0.00047003268008757784, "loss": 2.4425, "step": 458520 }, { "epoch": 0.913493720515109, "grad_norm": 0.20937827229499817, "learning_rate": 0.0004699019639251869, "loss": 2.4499, "step": 458530 }, { "epoch": 0.9135136427387479, "grad_norm": 0.215205579996109, "learning_rate": 0.00046977125892891424, "loss": 2.4446, "step": 458540 }, { "epoch": 0.9135335649623868, "grad_norm": 0.21731406450271606, "learning_rate": 0.0004696405650958988, "loss": 2.4412, "step": 458550 }, { "epoch": 0.9135534871860258, "grad_norm": 0.20191554725170135, "learning_rate": 0.0004695098824232806, "loss": 2.451, "step": 458560 }, { "epoch": 0.9135734094096647, "grad_norm": 0.18671418726444244, "learning_rate": 0.00046937921090820157, "loss": 2.4478, "step": 458570 }, { "epoch": 0.9135933316333036, "grad_norm": 0.1832067221403122, "learning_rate": 0.0004692485505478037, "loss": 2.4273, "step": 458580 }, { "epoch": 0.9136132538569425, "grad_norm": 0.21103094518184662, "learning_rate": 0.0004691179013392313, "loss": 2.4395, "step": 458590 }, { "epoch": 0.9136331760805814, "grad_norm": 0.19647887349128723, "learning_rate": 0.00046898726327962903, "loss": 2.4471, "step": 458600 }, { "epoch": 0.9136530983042204, "grad_norm": 0.1995116025209427, "learning_rate": 0.0004688566363661435, "loss": 2.4356, "step": 458610 }, { "epoch": 0.9136730205278593, "grad_norm": 0.21503275632858276, "learning_rate": 0.0004687260205959221, "loss": 2.4559, "step": 458620 }, { "epoch": 0.9136929427514981, "grad_norm": 0.19449760019779205, "learning_rate": 0.00046859541596611364, "loss": 2.4137, "step": 458630 }, { "epoch": 0.913712864975137, "grad_norm": 0.21361209452152252, "learning_rate": 0.0004684648224738681, "loss": 2.4467, "step": 458640 }, { "epoch": 0.9137327871987759, "grad_norm": 0.22929830849170685, "learning_rate": 0.00046833424011633664, "loss": 2.4404, "step": 458650 }, { "epoch": 0.9137527094224149, "grad_norm": 0.20074187219142914, "learning_rate": 0.00046820366889067146, "loss": 2.4543, "step": 458660 }, { "epoch": 0.9137726316460538, "grad_norm": 0.2100244164466858, "learning_rate": 0.000468073108794026, "loss": 2.4313, "step": 458670 }, { "epoch": 0.9137925538696927, "grad_norm": 0.19087886810302734, "learning_rate": 0.0004679425598235554, "loss": 2.4311, "step": 458680 }, { "epoch": 0.9138124760933316, "grad_norm": 0.19874687492847443, "learning_rate": 0.0004678120219764157, "loss": 2.4577, "step": 458690 }, { "epoch": 0.9138323983169705, "grad_norm": 0.19488784670829773, "learning_rate": 0.0004676814952497637, "loss": 2.4397, "step": 458700 }, { "epoch": 0.9138523205406095, "grad_norm": 0.22600524127483368, "learning_rate": 0.0004675509796407582, "loss": 2.4511, "step": 458710 }, { "epoch": 0.9138722427642484, "grad_norm": 0.2019926905632019, "learning_rate": 0.0004674204751465585, "loss": 2.4394, "step": 458720 }, { "epoch": 0.9138921649878873, "grad_norm": 0.19588488340377808, "learning_rate": 0.0004672899817643259, "loss": 2.4363, "step": 458730 }, { "epoch": 0.9139120872115262, "grad_norm": 0.1933189183473587, "learning_rate": 0.0004671594994912223, "loss": 2.4425, "step": 458740 }, { "epoch": 0.9139320094351651, "grad_norm": 0.21530857682228088, "learning_rate": 0.00046702902832441073, "loss": 2.439, "step": 458750 }, { "epoch": 0.9139519316588041, "grad_norm": 0.19950319826602936, "learning_rate": 0.00046689856826105604, "loss": 2.4442, "step": 458760 }, { "epoch": 0.913971853882443, "grad_norm": 0.18924580514431, "learning_rate": 0.0004667681192983235, "loss": 2.4406, "step": 458770 }, { "epoch": 0.9139917761060818, "grad_norm": 0.21129336953163147, "learning_rate": 0.00046663768143338015, "loss": 2.4376, "step": 458780 }, { "epoch": 0.9140116983297207, "grad_norm": 0.20006826519966125, "learning_rate": 0.0004665072546633944, "loss": 2.4392, "step": 458790 }, { "epoch": 0.9140316205533597, "grad_norm": 0.20316526293754578, "learning_rate": 0.00046637683898553497, "loss": 2.4347, "step": 458800 }, { "epoch": 0.9140515427769986, "grad_norm": 0.20450246334075928, "learning_rate": 0.0004662464343969728, "loss": 2.4443, "step": 458810 }, { "epoch": 0.9140714650006375, "grad_norm": 0.1971508264541626, "learning_rate": 0.00046611604089487926, "loss": 2.4416, "step": 458820 }, { "epoch": 0.9140913872242764, "grad_norm": 0.19788548350334167, "learning_rate": 0.0004659856584764273, "loss": 2.4398, "step": 458830 }, { "epoch": 0.9141113094479153, "grad_norm": 0.2238151729106903, "learning_rate": 0.0004658552871387913, "loss": 2.4427, "step": 458840 }, { "epoch": 0.9141312316715543, "grad_norm": 0.21474167704582214, "learning_rate": 0.00046572492687914656, "loss": 2.4386, "step": 458850 }, { "epoch": 0.9141511538951932, "grad_norm": 0.19387191534042358, "learning_rate": 0.0004655945776946693, "loss": 2.4472, "step": 458860 }, { "epoch": 0.9141710761188321, "grad_norm": 0.19997312128543854, "learning_rate": 0.00046546423958253747, "loss": 2.4602, "step": 458870 }, { "epoch": 0.914190998342471, "grad_norm": 0.2034490704536438, "learning_rate": 0.00046533391253992986, "loss": 2.4407, "step": 458880 }, { "epoch": 0.9142109205661099, "grad_norm": 0.22290009260177612, "learning_rate": 0.00046520359656402645, "loss": 2.4521, "step": 458890 }, { "epoch": 0.9142308427897489, "grad_norm": 0.20831067860126495, "learning_rate": 0.0004650732916520086, "loss": 2.4476, "step": 458900 }, { "epoch": 0.9142507650133878, "grad_norm": 0.23218831419944763, "learning_rate": 0.0004649429978010589, "loss": 2.4314, "step": 458910 }, { "epoch": 0.9142706872370266, "grad_norm": 0.19221092760562897, "learning_rate": 0.000464812715008361, "loss": 2.4488, "step": 458920 }, { "epoch": 0.9142906094606655, "grad_norm": 0.21444402635097504, "learning_rate": 0.00046468244327109963, "loss": 2.4228, "step": 458930 }, { "epoch": 0.9143105316843044, "grad_norm": 0.21687613427639008, "learning_rate": 0.00046455218258646116, "loss": 2.4383, "step": 458940 }, { "epoch": 0.9143304539079434, "grad_norm": 0.20766475796699524, "learning_rate": 0.0004644219329516326, "loss": 2.436, "step": 458950 }, { "epoch": 0.9143503761315823, "grad_norm": 0.22202812135219574, "learning_rate": 0.0004642916943638027, "loss": 2.4448, "step": 458960 }, { "epoch": 0.9143702983552212, "grad_norm": 0.20369727909564972, "learning_rate": 0.0004641614668201608, "loss": 2.4377, "step": 458970 }, { "epoch": 0.9143902205788601, "grad_norm": 0.1983931064605713, "learning_rate": 0.0004640312503178978, "loss": 2.4444, "step": 458980 }, { "epoch": 0.914410142802499, "grad_norm": 0.21202492713928223, "learning_rate": 0.00046390104485420603, "loss": 2.4568, "step": 458990 }, { "epoch": 0.914430065026138, "grad_norm": 0.19492845237255096, "learning_rate": 0.0004637708504262783, "loss": 2.4276, "step": 459000 }, { "epoch": 0.9144499872497769, "grad_norm": 0.19280636310577393, "learning_rate": 0.00046364066703130934, "loss": 2.444, "step": 459010 }, { "epoch": 0.9144699094734158, "grad_norm": 0.21696382761001587, "learning_rate": 0.00046351049466649476, "loss": 2.4447, "step": 459020 }, { "epoch": 0.9144898316970547, "grad_norm": 0.19630736112594604, "learning_rate": 0.00046338033332903093, "loss": 2.4361, "step": 459030 }, { "epoch": 0.9145097539206936, "grad_norm": 0.25358301401138306, "learning_rate": 0.0004632501830161162, "loss": 2.4336, "step": 459040 }, { "epoch": 0.9145296761443326, "grad_norm": 0.19500550627708435, "learning_rate": 0.00046312004372494985, "loss": 2.4512, "step": 459050 }, { "epoch": 0.9145495983679715, "grad_norm": 0.20808055996894836, "learning_rate": 0.0004629899154527319, "loss": 2.424, "step": 459060 }, { "epoch": 0.9145695205916103, "grad_norm": 0.20274502038955688, "learning_rate": 0.00046285979819666424, "loss": 2.4528, "step": 459070 }, { "epoch": 0.9145894428152492, "grad_norm": 0.19736871123313904, "learning_rate": 0.0004627296919539492, "loss": 2.4424, "step": 459080 }, { "epoch": 0.9146093650388882, "grad_norm": 0.19002817571163177, "learning_rate": 0.00046259959672179086, "loss": 2.4448, "step": 459090 }, { "epoch": 0.9146292872625271, "grad_norm": 0.22167405486106873, "learning_rate": 0.00046246951249739453, "loss": 2.4555, "step": 459100 }, { "epoch": 0.914649209486166, "grad_norm": 0.20469199120998383, "learning_rate": 0.0004623394392779661, "loss": 2.4393, "step": 459110 }, { "epoch": 0.9146691317098049, "grad_norm": 0.2035001516342163, "learning_rate": 0.0004622093770607132, "loss": 2.4479, "step": 459120 }, { "epoch": 0.9146890539334438, "grad_norm": 0.3775191605091095, "learning_rate": 0.0004620793258428444, "loss": 2.4399, "step": 459130 }, { "epoch": 0.9147089761570828, "grad_norm": 0.20168720185756683, "learning_rate": 0.0004619492856215697, "loss": 2.4241, "step": 459140 }, { "epoch": 0.9147288983807217, "grad_norm": 0.2291465550661087, "learning_rate": 0.0004618192563940997, "loss": 2.4514, "step": 459150 }, { "epoch": 0.9147488206043606, "grad_norm": 0.19216729700565338, "learning_rate": 0.0004616892381576472, "loss": 2.4348, "step": 459160 }, { "epoch": 0.9147687428279995, "grad_norm": 0.19567053020000458, "learning_rate": 0.000461559230909425, "loss": 2.4495, "step": 459170 }, { "epoch": 0.9147886650516384, "grad_norm": 0.2135598063468933, "learning_rate": 0.0004614292346466478, "loss": 2.4362, "step": 459180 }, { "epoch": 0.9148085872752774, "grad_norm": 0.20047524571418762, "learning_rate": 0.00046129924936653136, "loss": 2.4352, "step": 459190 }, { "epoch": 0.9148285094989163, "grad_norm": 0.20963814854621887, "learning_rate": 0.0004611692750662924, "loss": 2.4465, "step": 459200 }, { "epoch": 0.9148484317225551, "grad_norm": 0.23961478471755981, "learning_rate": 0.00046103931174314925, "loss": 2.4405, "step": 459210 }, { "epoch": 0.914868353946194, "grad_norm": 0.18924668431282043, "learning_rate": 0.0004609093593943208, "loss": 2.4174, "step": 459220 }, { "epoch": 0.9148882761698329, "grad_norm": 0.19874173402786255, "learning_rate": 0.0004607794180170277, "loss": 2.4485, "step": 459230 }, { "epoch": 0.9149081983934719, "grad_norm": 0.19925332069396973, "learning_rate": 0.00046064948760849144, "loss": 2.4431, "step": 459240 }, { "epoch": 0.9149281206171108, "grad_norm": 0.19469767808914185, "learning_rate": 0.00046051956816593466, "loss": 2.4556, "step": 459250 }, { "epoch": 0.9149480428407497, "grad_norm": 0.196563258767128, "learning_rate": 0.0004603896596865815, "loss": 2.4516, "step": 459260 }, { "epoch": 0.9149679650643886, "grad_norm": 0.20747177302837372, "learning_rate": 0.000460259762167657, "loss": 2.4374, "step": 459270 }, { "epoch": 0.9149878872880275, "grad_norm": 0.250046044588089, "learning_rate": 0.0004601298756063874, "loss": 2.4303, "step": 459280 }, { "epoch": 0.9150078095116665, "grad_norm": 0.19421401619911194, "learning_rate": 0.00045999999999999996, "loss": 2.4394, "step": 459290 }, { "epoch": 0.9150277317353054, "grad_norm": 0.19494904577732086, "learning_rate": 0.00045987013534572353, "loss": 2.434, "step": 459300 }, { "epoch": 0.9150476539589443, "grad_norm": 0.2032286375761032, "learning_rate": 0.0004597402816407878, "loss": 2.4436, "step": 459310 }, { "epoch": 0.9150675761825832, "grad_norm": 0.20761410892009735, "learning_rate": 0.0004596104388824236, "loss": 2.448, "step": 459320 }, { "epoch": 0.915087498406222, "grad_norm": 0.19052740931510925, "learning_rate": 0.000459480607067863, "loss": 2.4548, "step": 459330 }, { "epoch": 0.9151074206298611, "grad_norm": 0.19104516506195068, "learning_rate": 0.00045935078619433936, "loss": 2.4567, "step": 459340 }, { "epoch": 0.9151273428535, "grad_norm": 0.22277846932411194, "learning_rate": 0.00045922097625908734, "loss": 2.4498, "step": 459350 }, { "epoch": 0.9151472650771388, "grad_norm": 0.23890703916549683, "learning_rate": 0.00045909117725934225, "loss": 2.4465, "step": 459360 }, { "epoch": 0.9151671873007777, "grad_norm": 0.2236303836107254, "learning_rate": 0.00045896138919234076, "loss": 2.4433, "step": 459370 }, { "epoch": 0.9151871095244167, "grad_norm": 0.21876311302185059, "learning_rate": 0.0004588316120553213, "loss": 2.435, "step": 459380 }, { "epoch": 0.9152070317480556, "grad_norm": 0.19485591351985931, "learning_rate": 0.0004587018458455223, "loss": 2.4361, "step": 459390 }, { "epoch": 0.9152269539716945, "grad_norm": 0.19688980281352997, "learning_rate": 0.00045857209056018465, "loss": 2.4307, "step": 459400 }, { "epoch": 0.9152468761953334, "grad_norm": 0.20628833770751953, "learning_rate": 0.0004584423461965492, "loss": 2.4368, "step": 459410 }, { "epoch": 0.9152667984189723, "grad_norm": 0.19912102818489075, "learning_rate": 0.00045831261275185885, "loss": 2.4309, "step": 459420 }, { "epoch": 0.9152867206426113, "grad_norm": 0.2140037715435028, "learning_rate": 0.0004581828902233571, "loss": 2.4412, "step": 459430 }, { "epoch": 0.9153066428662502, "grad_norm": 0.21069267392158508, "learning_rate": 0.00045805317860828933, "loss": 2.4442, "step": 459440 }, { "epoch": 0.9153265650898891, "grad_norm": 0.22699928283691406, "learning_rate": 0.0004579234779039012, "loss": 2.4314, "step": 459450 }, { "epoch": 0.915346487313528, "grad_norm": 0.19839581847190857, "learning_rate": 0.0004577937881074399, "loss": 2.4365, "step": 459460 }, { "epoch": 0.9153664095371669, "grad_norm": 0.19178442656993866, "learning_rate": 0.00045766410921615396, "loss": 2.4525, "step": 459470 }, { "epoch": 0.9153863317608059, "grad_norm": 0.20461973547935486, "learning_rate": 0.0004575344412272928, "loss": 2.4365, "step": 459480 }, { "epoch": 0.9154062539844448, "grad_norm": 0.21073074638843536, "learning_rate": 0.00045740478413810705, "loss": 2.4336, "step": 459490 }, { "epoch": 0.9154261762080836, "grad_norm": 0.20630459487438202, "learning_rate": 0.00045727513794584885, "loss": 2.4394, "step": 459500 }, { "epoch": 0.9154460984317225, "grad_norm": 0.19866254925727844, "learning_rate": 0.0004571455026477709, "loss": 2.4329, "step": 459510 }, { "epoch": 0.9154660206553614, "grad_norm": 0.2016671597957611, "learning_rate": 0.00045701587824112735, "loss": 2.4506, "step": 459520 }, { "epoch": 0.9154859428790004, "grad_norm": 0.21586160361766815, "learning_rate": 0.0004568862647231733, "loss": 2.4359, "step": 459530 }, { "epoch": 0.9155058651026393, "grad_norm": 0.19585545361042023, "learning_rate": 0.00045675666209116586, "loss": 2.4446, "step": 459540 }, { "epoch": 0.9155257873262782, "grad_norm": 0.21883679926395416, "learning_rate": 0.00045662707034236207, "loss": 2.4424, "step": 459550 }, { "epoch": 0.9155457095499171, "grad_norm": 0.21268028020858765, "learning_rate": 0.00045649748947402104, "loss": 2.4401, "step": 459560 }, { "epoch": 0.915565631773556, "grad_norm": 0.17975369095802307, "learning_rate": 0.0004563679194834023, "loss": 2.4563, "step": 459570 }, { "epoch": 0.915585553997195, "grad_norm": 0.22056955099105835, "learning_rate": 0.0004562383603677671, "loss": 2.4444, "step": 459580 }, { "epoch": 0.9156054762208339, "grad_norm": 0.22118230164051056, "learning_rate": 0.0004561088121243777, "loss": 2.4533, "step": 459590 }, { "epoch": 0.9156253984444728, "grad_norm": 0.2136945277452469, "learning_rate": 0.0004559792747504974, "loss": 2.4577, "step": 459600 }, { "epoch": 0.9156453206681117, "grad_norm": 0.20262910425662994, "learning_rate": 0.0004558497482433908, "loss": 2.4575, "step": 459610 }, { "epoch": 0.9156652428917506, "grad_norm": 0.21487899124622345, "learning_rate": 0.0004557202326003231, "loss": 2.4435, "step": 459620 }, { "epoch": 0.9156851651153896, "grad_norm": 0.21430860459804535, "learning_rate": 0.00045559072781856157, "loss": 2.4173, "step": 459630 }, { "epoch": 0.9157050873390284, "grad_norm": 0.1908351182937622, "learning_rate": 0.0004554612338953741, "loss": 2.449, "step": 459640 }, { "epoch": 0.9157250095626673, "grad_norm": 0.21394498646259308, "learning_rate": 0.0004553317508280297, "loss": 2.4297, "step": 459650 }, { "epoch": 0.9157449317863062, "grad_norm": 0.23006710410118103, "learning_rate": 0.0004552022786137986, "loss": 2.4465, "step": 459660 }, { "epoch": 0.9157648540099452, "grad_norm": 0.23251736164093018, "learning_rate": 0.00045507281724995206, "loss": 2.4437, "step": 459670 }, { "epoch": 0.9157847762335841, "grad_norm": 0.20984050631523132, "learning_rate": 0.0004549433667337628, "loss": 2.4265, "step": 459680 }, { "epoch": 0.915804698457223, "grad_norm": 0.18542662262916565, "learning_rate": 0.0004548139270625042, "loss": 2.428, "step": 459690 }, { "epoch": 0.9158246206808619, "grad_norm": 0.21022248268127441, "learning_rate": 0.00045468449823345147, "loss": 2.4431, "step": 459700 }, { "epoch": 0.9158445429045008, "grad_norm": 0.9458552002906799, "learning_rate": 0.00045455508024388005, "loss": 2.4584, "step": 459710 }, { "epoch": 0.9158644651281398, "grad_norm": 0.22194011509418488, "learning_rate": 0.0004544256730910674, "loss": 2.4403, "step": 459720 }, { "epoch": 0.9158843873517787, "grad_norm": 0.195490300655365, "learning_rate": 0.0004542962767722916, "loss": 2.432, "step": 459730 }, { "epoch": 0.9159043095754176, "grad_norm": 0.1953786015510559, "learning_rate": 0.000454166891284832, "loss": 2.4548, "step": 459740 }, { "epoch": 0.9159242317990565, "grad_norm": 0.20796847343444824, "learning_rate": 0.0004540375166259694, "loss": 2.4338, "step": 459750 }, { "epoch": 0.9159441540226954, "grad_norm": 0.18329104781150818, "learning_rate": 0.0004539081527929849, "loss": 2.4392, "step": 459760 }, { "epoch": 0.9159640762463344, "grad_norm": 0.21526333689689636, "learning_rate": 0.00045377879978316174, "loss": 2.4284, "step": 459770 }, { "epoch": 0.9159839984699732, "grad_norm": 0.21021677553653717, "learning_rate": 0.00045364945759378354, "loss": 2.4527, "step": 459780 }, { "epoch": 0.9160039206936121, "grad_norm": 0.4500496983528137, "learning_rate": 0.00045352012622213535, "loss": 2.4291, "step": 459790 }, { "epoch": 0.916023842917251, "grad_norm": 0.23187746107578278, "learning_rate": 0.0004533908056655036, "loss": 2.4399, "step": 459800 }, { "epoch": 0.9160437651408899, "grad_norm": 0.22677116096019745, "learning_rate": 0.0004532614959211754, "loss": 2.4457, "step": 459810 }, { "epoch": 0.9160636873645289, "grad_norm": 0.21593716740608215, "learning_rate": 0.0004531321969864395, "loss": 2.4261, "step": 459820 }, { "epoch": 0.9160836095881678, "grad_norm": 0.19857697188854218, "learning_rate": 0.00045300290885858473, "loss": 2.4492, "step": 459830 }, { "epoch": 0.9161035318118067, "grad_norm": 0.2081221640110016, "learning_rate": 0.0004528736315349027, "loss": 2.4459, "step": 459840 }, { "epoch": 0.9161234540354456, "grad_norm": 0.20611940324306488, "learning_rate": 0.0004527443650126848, "loss": 2.4496, "step": 459850 }, { "epoch": 0.9161433762590845, "grad_norm": 0.18216203153133392, "learning_rate": 0.0004526151092892241, "loss": 2.4477, "step": 459860 }, { "epoch": 0.9161632984827235, "grad_norm": 0.18692168593406677, "learning_rate": 0.00045248586436181463, "loss": 2.4396, "step": 459870 }, { "epoch": 0.9161832207063624, "grad_norm": 0.18847164511680603, "learning_rate": 0.00045235663022775177, "loss": 2.4417, "step": 459880 }, { "epoch": 0.9162031429300013, "grad_norm": 0.20369629561901093, "learning_rate": 0.0004522274068843317, "loss": 2.4345, "step": 459890 }, { "epoch": 0.9162230651536402, "grad_norm": 0.2136576771736145, "learning_rate": 0.0004520981943288522, "loss": 2.438, "step": 459900 }, { "epoch": 0.916242987377279, "grad_norm": 0.20735225081443787, "learning_rate": 0.00045196899255861166, "loss": 2.4257, "step": 459910 }, { "epoch": 0.916262909600918, "grad_norm": 0.22240197658538818, "learning_rate": 0.00045183980157090974, "loss": 2.4296, "step": 459920 }, { "epoch": 0.9162828318245569, "grad_norm": 0.20963740348815918, "learning_rate": 0.0004517106213630475, "loss": 2.4341, "step": 459930 }, { "epoch": 0.9163027540481958, "grad_norm": 0.17627841234207153, "learning_rate": 0.0004515814519323271, "loss": 2.4493, "step": 459940 }, { "epoch": 0.9163226762718347, "grad_norm": 0.2633558213710785, "learning_rate": 0.0004514522932760514, "loss": 2.4585, "step": 459950 }, { "epoch": 0.9163425984954737, "grad_norm": 0.2175201177597046, "learning_rate": 0.0004513231453915247, "loss": 2.4493, "step": 459960 }, { "epoch": 0.9163625207191126, "grad_norm": 0.1925150454044342, "learning_rate": 0.0004511940082760526, "loss": 2.4487, "step": 459970 }, { "epoch": 0.9163824429427515, "grad_norm": 0.19235289096832275, "learning_rate": 0.0004510648819269414, "loss": 2.4321, "step": 459980 }, { "epoch": 0.9164023651663904, "grad_norm": 0.20605146884918213, "learning_rate": 0.0004509357663414988, "loss": 2.4384, "step": 459990 }, { "epoch": 0.9164222873900293, "grad_norm": 0.18736548721790314, "learning_rate": 0.0004508066615170332, "loss": 2.4234, "step": 460000 }, { "epoch": 0.9164422096136683, "grad_norm": 0.20901387929916382, "learning_rate": 0.000450677567450855, "loss": 2.4432, "step": 460010 }, { "epoch": 0.9164621318373072, "grad_norm": 0.2386356145143509, "learning_rate": 0.00045054848414027496, "loss": 2.4407, "step": 460020 }, { "epoch": 0.9164820540609461, "grad_norm": 0.23384906351566315, "learning_rate": 0.00045041941158260505, "loss": 2.4493, "step": 460030 }, { "epoch": 0.916501976284585, "grad_norm": 0.19040299952030182, "learning_rate": 0.0004502903497751587, "loss": 2.439, "step": 460040 }, { "epoch": 0.9165218985082239, "grad_norm": 0.20296615362167358, "learning_rate": 0.00045016129871525016, "loss": 2.427, "step": 460050 }, { "epoch": 0.9165418207318629, "grad_norm": 0.2058628499507904, "learning_rate": 0.000450032258400195, "loss": 2.4398, "step": 460060 }, { "epoch": 0.9165617429555017, "grad_norm": 0.1985773742198944, "learning_rate": 0.0004499032288273097, "loss": 2.4448, "step": 460070 }, { "epoch": 0.9165816651791406, "grad_norm": 0.19878889620304108, "learning_rate": 0.00044977420999391197, "loss": 2.432, "step": 460080 }, { "epoch": 0.9166015874027795, "grad_norm": 0.20245002210140228, "learning_rate": 0.0004496452018973205, "loss": 2.4401, "step": 460090 }, { "epoch": 0.9166215096264184, "grad_norm": 0.21438390016555786, "learning_rate": 0.00044951620453485553, "loss": 2.4383, "step": 460100 }, { "epoch": 0.9166414318500574, "grad_norm": 0.18283149600028992, "learning_rate": 0.00044938721790383786, "loss": 2.4502, "step": 460110 }, { "epoch": 0.9166613540736963, "grad_norm": 0.1924271583557129, "learning_rate": 0.00044925824200158984, "loss": 2.4384, "step": 460120 }, { "epoch": 0.9166812762973352, "grad_norm": 0.2127283215522766, "learning_rate": 0.00044912927682543447, "loss": 2.4379, "step": 460130 }, { "epoch": 0.9167011985209741, "grad_norm": 0.1884683519601822, "learning_rate": 0.00044900032237269616, "loss": 2.443, "step": 460140 }, { "epoch": 0.916721120744613, "grad_norm": 0.2121000438928604, "learning_rate": 0.0004488713786407008, "loss": 2.458, "step": 460150 }, { "epoch": 0.916741042968252, "grad_norm": 0.1995004564523697, "learning_rate": 0.0004487424456267748, "loss": 2.4257, "step": 460160 }, { "epoch": 0.9167609651918909, "grad_norm": 0.18726706504821777, "learning_rate": 0.0004486135233282456, "loss": 2.4541, "step": 460170 }, { "epoch": 0.9167808874155298, "grad_norm": 0.21381360292434692, "learning_rate": 0.00044848461174244237, "loss": 2.4462, "step": 460180 }, { "epoch": 0.9168008096391687, "grad_norm": 0.21425753831863403, "learning_rate": 0.00044835571086669493, "loss": 2.4442, "step": 460190 }, { "epoch": 0.9168207318628075, "grad_norm": 0.20042963325977325, "learning_rate": 0.0004482268206983342, "loss": 2.449, "step": 460200 }, { "epoch": 0.9168406540864465, "grad_norm": 0.19906653463840485, "learning_rate": 0.0004480979412346926, "loss": 2.435, "step": 460210 }, { "epoch": 0.9168605763100854, "grad_norm": 0.191689133644104, "learning_rate": 0.00044796907247310314, "loss": 2.4466, "step": 460220 }, { "epoch": 0.9168804985337243, "grad_norm": 0.2193782776594162, "learning_rate": 0.0004478402144109004, "loss": 2.4351, "step": 460230 }, { "epoch": 0.9169004207573632, "grad_norm": 0.19917964935302734, "learning_rate": 0.0004477113670454196, "loss": 2.4444, "step": 460240 }, { "epoch": 0.9169203429810021, "grad_norm": 0.1970478743314743, "learning_rate": 0.00044758253037399757, "loss": 2.4428, "step": 460250 }, { "epoch": 0.9169402652046411, "grad_norm": 0.21134749054908752, "learning_rate": 0.0004474537043939719, "loss": 2.4411, "step": 460260 }, { "epoch": 0.91696018742828, "grad_norm": 0.19588948786258698, "learning_rate": 0.00044732488910268154, "loss": 2.4346, "step": 460270 }, { "epoch": 0.9169801096519189, "grad_norm": 0.19716070592403412, "learning_rate": 0.0004471960844974663, "loss": 2.451, "step": 460280 }, { "epoch": 0.9170000318755578, "grad_norm": 0.19576486945152283, "learning_rate": 0.000447067290575667, "loss": 2.4367, "step": 460290 }, { "epoch": 0.9170199540991968, "grad_norm": 0.21879616379737854, "learning_rate": 0.00044693850733462594, "loss": 2.4353, "step": 460300 }, { "epoch": 0.9170398763228357, "grad_norm": 0.20943482220172882, "learning_rate": 0.0004468097347716862, "loss": 2.4432, "step": 460310 }, { "epoch": 0.9170597985464746, "grad_norm": 0.19653558731079102, "learning_rate": 0.00044668097288419207, "loss": 2.4467, "step": 460320 }, { "epoch": 0.9170797207701135, "grad_norm": 0.18919937312602997, "learning_rate": 0.00044655222166948926, "loss": 2.4332, "step": 460330 }, { "epoch": 0.9170996429937524, "grad_norm": 0.2093185931444168, "learning_rate": 0.0004464234811249237, "loss": 2.4523, "step": 460340 }, { "epoch": 0.9171195652173914, "grad_norm": 0.2286442071199417, "learning_rate": 0.0004462947512478437, "loss": 2.4413, "step": 460350 }, { "epoch": 0.9171394874410302, "grad_norm": 0.197861447930336, "learning_rate": 0.0004461660320355974, "loss": 2.431, "step": 460360 }, { "epoch": 0.9171594096646691, "grad_norm": 0.2040375918149948, "learning_rate": 0.00044603732348553484, "loss": 2.4294, "step": 460370 }, { "epoch": 0.917179331888308, "grad_norm": 0.23612096905708313, "learning_rate": 0.0004459086255950071, "loss": 2.4533, "step": 460380 }, { "epoch": 0.9171992541119469, "grad_norm": 0.20629695057868958, "learning_rate": 0.00044577993836136587, "loss": 2.4205, "step": 460390 }, { "epoch": 0.9172191763355859, "grad_norm": 0.19492517411708832, "learning_rate": 0.00044565126178196414, "loss": 2.4509, "step": 460400 }, { "epoch": 0.9172390985592248, "grad_norm": 0.19379408657550812, "learning_rate": 0.00044552259585415666, "loss": 2.4296, "step": 460410 }, { "epoch": 0.9172590207828637, "grad_norm": 0.21548748016357422, "learning_rate": 0.000445393940575298, "loss": 2.4425, "step": 460420 }, { "epoch": 0.9172789430065026, "grad_norm": 0.20761679112911224, "learning_rate": 0.0004452652959427452, "loss": 2.4477, "step": 460430 }, { "epoch": 0.9172988652301415, "grad_norm": 0.1967933475971222, "learning_rate": 0.00044513666195385507, "loss": 2.4369, "step": 460440 }, { "epoch": 0.9173187874537805, "grad_norm": 0.20730064809322357, "learning_rate": 0.00044500803860598694, "loss": 2.4467, "step": 460450 }, { "epoch": 0.9173387096774194, "grad_norm": 0.22065956890583038, "learning_rate": 0.0004448794258964999, "loss": 2.447, "step": 460460 }, { "epoch": 0.9173586319010583, "grad_norm": 0.23392996191978455, "learning_rate": 0.0004447508238227547, "loss": 2.4452, "step": 460470 }, { "epoch": 0.9173785541246972, "grad_norm": 0.20531603693962097, "learning_rate": 0.00044462223238211365, "loss": 2.4498, "step": 460480 }, { "epoch": 0.917398476348336, "grad_norm": 0.22263488173484802, "learning_rate": 0.0004444936515719391, "loss": 2.4328, "step": 460490 }, { "epoch": 0.917418398571975, "grad_norm": 0.21496625244617462, "learning_rate": 0.0004443650813895954, "loss": 2.4359, "step": 460500 }, { "epoch": 0.9174383207956139, "grad_norm": 0.2056083232164383, "learning_rate": 0.0004442365218324478, "loss": 2.4438, "step": 460510 }, { "epoch": 0.9174582430192528, "grad_norm": 0.2229824811220169, "learning_rate": 0.0004441079728978621, "loss": 2.4433, "step": 460520 }, { "epoch": 0.9174781652428917, "grad_norm": 0.19447265565395355, "learning_rate": 0.0004439794345832058, "loss": 2.4374, "step": 460530 }, { "epoch": 0.9174980874665306, "grad_norm": 0.20098084211349487, "learning_rate": 0.00044385090688584715, "loss": 2.4308, "step": 460540 }, { "epoch": 0.9175180096901696, "grad_norm": 0.21137045323848724, "learning_rate": 0.0004437223898031559, "loss": 2.4338, "step": 460550 }, { "epoch": 0.9175379319138085, "grad_norm": 0.2163456380367279, "learning_rate": 0.0004435938833325024, "loss": 2.4472, "step": 460560 }, { "epoch": 0.9175578541374474, "grad_norm": 0.19678451120853424, "learning_rate": 0.0004434653874712582, "loss": 2.4375, "step": 460570 }, { "epoch": 0.9175777763610863, "grad_norm": 0.2077728509902954, "learning_rate": 0.00044333690221679635, "loss": 2.4469, "step": 460580 }, { "epoch": 0.9175976985847253, "grad_norm": 0.21621012687683105, "learning_rate": 0.00044320842756649025, "loss": 2.4475, "step": 460590 }, { "epoch": 0.9176176208083642, "grad_norm": 0.205530047416687, "learning_rate": 0.00044307996351771497, "loss": 2.4483, "step": 460600 }, { "epoch": 0.9176375430320031, "grad_norm": 0.19577853381633759, "learning_rate": 0.00044295151006784627, "loss": 2.4434, "step": 460610 }, { "epoch": 0.917657465255642, "grad_norm": 0.2098258137702942, "learning_rate": 0.0004428230672142617, "loss": 2.4449, "step": 460620 }, { "epoch": 0.9176773874792808, "grad_norm": 0.21588291227817535, "learning_rate": 0.0004426946349543388, "loss": 2.4312, "step": 460630 }, { "epoch": 0.9176973097029199, "grad_norm": 0.20187608897686005, "learning_rate": 0.0004425662132854571, "loss": 2.4519, "step": 460640 }, { "epoch": 0.9177172319265587, "grad_norm": 0.19343231618404388, "learning_rate": 0.0004424378022049971, "loss": 2.4508, "step": 460650 }, { "epoch": 0.9177371541501976, "grad_norm": 0.2050241380929947, "learning_rate": 0.00044230940171033974, "loss": 2.4363, "step": 460660 }, { "epoch": 0.9177570763738365, "grad_norm": 0.21762655675411224, "learning_rate": 0.0004421810117988676, "loss": 2.4474, "step": 460670 }, { "epoch": 0.9177769985974754, "grad_norm": 0.20531953871250153, "learning_rate": 0.00044205263246796436, "loss": 2.4537, "step": 460680 }, { "epoch": 0.9177969208211144, "grad_norm": 0.20405003428459167, "learning_rate": 0.00044192426371501446, "loss": 2.4541, "step": 460690 }, { "epoch": 0.9178168430447533, "grad_norm": 0.22065165638923645, "learning_rate": 0.00044179590553740367, "loss": 2.4455, "step": 460700 }, { "epoch": 0.9178367652683922, "grad_norm": 0.20428875088691711, "learning_rate": 0.0004416675579325187, "loss": 2.4351, "step": 460710 }, { "epoch": 0.9178566874920311, "grad_norm": 0.20740921795368195, "learning_rate": 0.00044153922089774754, "loss": 2.4291, "step": 460720 }, { "epoch": 0.91787660971567, "grad_norm": 0.21491853892803192, "learning_rate": 0.0004414108944304789, "loss": 2.4373, "step": 460730 }, { "epoch": 0.917896531939309, "grad_norm": 0.20810681581497192, "learning_rate": 0.00044128257852810274, "loss": 2.4606, "step": 460740 }, { "epoch": 0.9179164541629479, "grad_norm": 0.21647244691848755, "learning_rate": 0.0004411542731880105, "loss": 2.4533, "step": 460750 }, { "epoch": 0.9179363763865868, "grad_norm": 0.2184622883796692, "learning_rate": 0.0004410259784075938, "loss": 2.4433, "step": 460760 }, { "epoch": 0.9179562986102257, "grad_norm": 0.21798545122146606, "learning_rate": 0.0004408976941842462, "loss": 2.4327, "step": 460770 }, { "epoch": 0.9179762208338645, "grad_norm": 0.20676493644714355, "learning_rate": 0.00044076942051536204, "loss": 2.4478, "step": 460780 }, { "epoch": 0.9179961430575035, "grad_norm": 0.2051374614238739, "learning_rate": 0.0004406411573983364, "loss": 2.4467, "step": 460790 }, { "epoch": 0.9180160652811424, "grad_norm": 0.20931458473205566, "learning_rate": 0.00044051290483056583, "loss": 2.4321, "step": 460800 }, { "epoch": 0.9180359875047813, "grad_norm": 0.2688330113887787, "learning_rate": 0.0004403846628094479, "loss": 2.4421, "step": 460810 }, { "epoch": 0.9180559097284202, "grad_norm": 0.24365590512752533, "learning_rate": 0.0004402564313323809, "loss": 2.4261, "step": 460820 }, { "epoch": 0.9180758319520591, "grad_norm": 0.20138826966285706, "learning_rate": 0.00044012821039676476, "loss": 2.4533, "step": 460830 }, { "epoch": 0.9180957541756981, "grad_norm": 0.20425952970981598, "learning_rate": 0.00043999999999999996, "loss": 2.4301, "step": 460840 }, { "epoch": 0.918115676399337, "grad_norm": 0.19596895575523376, "learning_rate": 0.00043987180013948856, "loss": 2.4343, "step": 460850 }, { "epoch": 0.9181355986229759, "grad_norm": 0.19847264885902405, "learning_rate": 0.00043974361081263315, "loss": 2.4397, "step": 460860 }, { "epoch": 0.9181555208466148, "grad_norm": 0.19668065011501312, "learning_rate": 0.00043961543201683775, "loss": 2.4425, "step": 460870 }, { "epoch": 0.9181754430702538, "grad_norm": 0.2264663577079773, "learning_rate": 0.00043948726374950733, "loss": 2.4448, "step": 460880 }, { "epoch": 0.9181953652938927, "grad_norm": 0.20343899726867676, "learning_rate": 0.0004393591060080477, "loss": 2.4475, "step": 460890 }, { "epoch": 0.9182152875175316, "grad_norm": 0.22112995386123657, "learning_rate": 0.0004392309587898664, "loss": 2.4325, "step": 460900 }, { "epoch": 0.9182352097411705, "grad_norm": 0.21552911400794983, "learning_rate": 0.0004391028220923712, "loss": 2.4448, "step": 460910 }, { "epoch": 0.9182551319648093, "grad_norm": 0.21477872133255005, "learning_rate": 0.00043897469591297147, "loss": 2.4331, "step": 460920 }, { "epoch": 0.9182750541884483, "grad_norm": 0.19719800353050232, "learning_rate": 0.0004388465802490775, "loss": 2.4262, "step": 460930 }, { "epoch": 0.9182949764120872, "grad_norm": 0.1885470300912857, "learning_rate": 0.0004387184750981006, "loss": 2.4407, "step": 460940 }, { "epoch": 0.9183148986357261, "grad_norm": 0.20786793529987335, "learning_rate": 0.00043859038045745334, "loss": 2.4455, "step": 460950 }, { "epoch": 0.918334820859365, "grad_norm": 0.2005770057439804, "learning_rate": 0.00043846229632454924, "loss": 2.4436, "step": 460960 }, { "epoch": 0.9183547430830039, "grad_norm": 0.19691303372383118, "learning_rate": 0.0004383342226968026, "loss": 2.4356, "step": 460970 }, { "epoch": 0.9183746653066429, "grad_norm": 0.1890733242034912, "learning_rate": 0.0004382061595716289, "loss": 2.4273, "step": 460980 }, { "epoch": 0.9183945875302818, "grad_norm": 0.21634545922279358, "learning_rate": 0.00043807810694644525, "loss": 2.4281, "step": 460990 }, { "epoch": 0.9184145097539207, "grad_norm": 0.23267488181591034, "learning_rate": 0.00043795006481866914, "loss": 2.4397, "step": 461000 }, { "epoch": 0.9184344319775596, "grad_norm": 0.19247792661190033, "learning_rate": 0.00043782203318571924, "loss": 2.4189, "step": 461010 }, { "epoch": 0.9184543542011985, "grad_norm": 0.2148926705121994, "learning_rate": 0.0004376940120450157, "loss": 2.4337, "step": 461020 }, { "epoch": 0.9184742764248375, "grad_norm": 0.20372779667377472, "learning_rate": 0.00043756600139397886, "loss": 2.4247, "step": 461030 }, { "epoch": 0.9184941986484764, "grad_norm": 0.2069309502840042, "learning_rate": 0.00043743800123003117, "loss": 2.4215, "step": 461040 }, { "epoch": 0.9185141208721153, "grad_norm": 0.19434905052185059, "learning_rate": 0.00043731001155059544, "loss": 2.4466, "step": 461050 }, { "epoch": 0.9185340430957541, "grad_norm": 0.20997057855129242, "learning_rate": 0.0004371820323530957, "loss": 2.4282, "step": 461060 }, { "epoch": 0.918553965319393, "grad_norm": 0.20450665056705475, "learning_rate": 0.00043705406363495736, "loss": 2.4484, "step": 461070 }, { "epoch": 0.918573887543032, "grad_norm": 0.21957561373710632, "learning_rate": 0.00043692610539360625, "loss": 2.4247, "step": 461080 }, { "epoch": 0.9185938097666709, "grad_norm": 0.20875678956508636, "learning_rate": 0.00043679815762646945, "loss": 2.4431, "step": 461090 }, { "epoch": 0.9186137319903098, "grad_norm": 0.20035986602306366, "learning_rate": 0.0004366702203309758, "loss": 2.4351, "step": 461100 }, { "epoch": 0.9186336542139487, "grad_norm": 0.21625789999961853, "learning_rate": 0.000436542293504554, "loss": 2.4414, "step": 461110 }, { "epoch": 0.9186535764375876, "grad_norm": 0.20531761646270752, "learning_rate": 0.00043641437714463495, "loss": 2.438, "step": 461120 }, { "epoch": 0.9186734986612266, "grad_norm": 0.2029721885919571, "learning_rate": 0.00043628647124864983, "loss": 2.4383, "step": 461130 }, { "epoch": 0.9186934208848655, "grad_norm": 0.21569401025772095, "learning_rate": 0.00043615857581403097, "loss": 2.4409, "step": 461140 }, { "epoch": 0.9187133431085044, "grad_norm": 0.2091200351715088, "learning_rate": 0.0004360306908382121, "loss": 2.4372, "step": 461150 }, { "epoch": 0.9187332653321433, "grad_norm": 0.18993645906448364, "learning_rate": 0.0004359028163186278, "loss": 2.4308, "step": 461160 }, { "epoch": 0.9187531875557823, "grad_norm": 0.1875019520521164, "learning_rate": 0.0004357749522527137, "loss": 2.4515, "step": 461170 }, { "epoch": 0.9187731097794212, "grad_norm": 0.20781812071800232, "learning_rate": 0.00043564709863790643, "loss": 2.4424, "step": 461180 }, { "epoch": 0.9187930320030601, "grad_norm": 0.20718564093112946, "learning_rate": 0.0004355192554716436, "loss": 2.4405, "step": 461190 }, { "epoch": 0.918812954226699, "grad_norm": 0.2105482667684555, "learning_rate": 0.0004353914227513642, "loss": 2.4473, "step": 461200 }, { "epoch": 0.9188328764503378, "grad_norm": 0.22316160798072815, "learning_rate": 0.0004352636004745081, "loss": 2.4417, "step": 461210 }, { "epoch": 0.9188527986739768, "grad_norm": 0.22286175191402435, "learning_rate": 0.0004351357886385159, "loss": 2.4491, "step": 461220 }, { "epoch": 0.9188727208976157, "grad_norm": 0.19726626574993134, "learning_rate": 0.00043500798724082946, "loss": 2.4204, "step": 461230 }, { "epoch": 0.9188926431212546, "grad_norm": 0.2042127400636673, "learning_rate": 0.00043488019627889175, "loss": 2.4343, "step": 461240 }, { "epoch": 0.9189125653448935, "grad_norm": 0.21482235193252563, "learning_rate": 0.00043475241575014725, "loss": 2.4339, "step": 461250 }, { "epoch": 0.9189324875685324, "grad_norm": 0.22094206511974335, "learning_rate": 0.0004346246456520404, "loss": 2.4543, "step": 461260 }, { "epoch": 0.9189524097921714, "grad_norm": 0.21627600491046906, "learning_rate": 0.00043449688598201755, "loss": 2.4341, "step": 461270 }, { "epoch": 0.9189723320158103, "grad_norm": 0.19869036972522736, "learning_rate": 0.00043436913673752575, "loss": 2.4392, "step": 461280 }, { "epoch": 0.9189922542394492, "grad_norm": 0.19411146640777588, "learning_rate": 0.0004342413979160134, "loss": 2.4467, "step": 461290 }, { "epoch": 0.9190121764630881, "grad_norm": 0.2155729979276657, "learning_rate": 0.00043411366951492925, "loss": 2.4426, "step": 461300 }, { "epoch": 0.919032098686727, "grad_norm": 0.22141669690608978, "learning_rate": 0.0004339859515317239, "loss": 2.444, "step": 461310 }, { "epoch": 0.919052020910366, "grad_norm": 0.20421694219112396, "learning_rate": 0.00043385824396384855, "loss": 2.4389, "step": 461320 }, { "epoch": 0.9190719431340049, "grad_norm": 0.2052329033613205, "learning_rate": 0.00043373054680875557, "loss": 2.4565, "step": 461330 }, { "epoch": 0.9190918653576438, "grad_norm": 0.2002236694097519, "learning_rate": 0.00043360286006389814, "loss": 2.4288, "step": 461340 }, { "epoch": 0.9191117875812826, "grad_norm": 0.23449918627738953, "learning_rate": 0.0004334751837267308, "loss": 2.4186, "step": 461350 }, { "epoch": 0.9191317098049215, "grad_norm": 0.21180717647075653, "learning_rate": 0.0004333475177947088, "loss": 2.4434, "step": 461360 }, { "epoch": 0.9191516320285605, "grad_norm": 0.20605476200580597, "learning_rate": 0.000433219862265289, "loss": 2.4352, "step": 461370 }, { "epoch": 0.9191715542521994, "grad_norm": 0.20884598791599274, "learning_rate": 0.0004330922171359286, "loss": 2.4269, "step": 461380 }, { "epoch": 0.9191914764758383, "grad_norm": 0.1972038596868515, "learning_rate": 0.0004329645824040862, "loss": 2.4298, "step": 461390 }, { "epoch": 0.9192113986994772, "grad_norm": 0.3516885042190552, "learning_rate": 0.00043283695806722134, "loss": 2.4432, "step": 461400 }, { "epoch": 0.9192313209231161, "grad_norm": 0.22633439302444458, "learning_rate": 0.00043270934412279496, "loss": 2.4505, "step": 461410 }, { "epoch": 0.9192512431467551, "grad_norm": 0.2156495302915573, "learning_rate": 0.00043258174056826814, "loss": 2.4502, "step": 461420 }, { "epoch": 0.919271165370394, "grad_norm": 0.21648509800434113, "learning_rate": 0.0004324541474011039, "loss": 2.4181, "step": 461430 }, { "epoch": 0.9192910875940329, "grad_norm": 0.21278637647628784, "learning_rate": 0.0004323265646187662, "loss": 2.424, "step": 461440 }, { "epoch": 0.9193110098176718, "grad_norm": 0.1999731957912445, "learning_rate": 0.00043219899221871904, "loss": 2.4255, "step": 461450 }, { "epoch": 0.9193309320413108, "grad_norm": 0.20285965502262115, "learning_rate": 0.000432071430198429, "loss": 2.4262, "step": 461460 }, { "epoch": 0.9193508542649497, "grad_norm": 0.21244439482688904, "learning_rate": 0.0004319438785553624, "loss": 2.439, "step": 461470 }, { "epoch": 0.9193707764885886, "grad_norm": 0.194444477558136, "learning_rate": 0.0004318163372869872, "loss": 2.4096, "step": 461480 }, { "epoch": 0.9193906987122274, "grad_norm": 0.2192068248987198, "learning_rate": 0.00043168880639077247, "loss": 2.4304, "step": 461490 }, { "epoch": 0.9194106209358663, "grad_norm": 0.2162078619003296, "learning_rate": 0.00043156128586418775, "loss": 2.4427, "step": 461500 }, { "epoch": 0.9194305431595053, "grad_norm": 0.20269453525543213, "learning_rate": 0.0004314337757047042, "loss": 2.4357, "step": 461510 }, { "epoch": 0.9194504653831442, "grad_norm": 0.20260688662528992, "learning_rate": 0.0004313062759097939, "loss": 2.4426, "step": 461520 }, { "epoch": 0.9194703876067831, "grad_norm": 0.20399115979671478, "learning_rate": 0.0004311787864769294, "loss": 2.4379, "step": 461530 }, { "epoch": 0.919490309830422, "grad_norm": 0.2014700025320053, "learning_rate": 0.0004310513074035851, "loss": 2.4386, "step": 461540 }, { "epoch": 0.9195102320540609, "grad_norm": 0.19614961743354797, "learning_rate": 0.0004309238386872356, "loss": 2.4496, "step": 461550 }, { "epoch": 0.9195301542776999, "grad_norm": 0.23894838988780975, "learning_rate": 0.0004307963803253574, "loss": 2.4336, "step": 461560 }, { "epoch": 0.9195500765013388, "grad_norm": 0.20166224241256714, "learning_rate": 0.0004306689323154276, "loss": 2.4453, "step": 461570 }, { "epoch": 0.9195699987249777, "grad_norm": 0.20767800509929657, "learning_rate": 0.000430541494654924, "loss": 2.443, "step": 461580 }, { "epoch": 0.9195899209486166, "grad_norm": 0.21713775396347046, "learning_rate": 0.00043041406734132593, "loss": 2.4451, "step": 461590 }, { "epoch": 0.9196098431722555, "grad_norm": 0.1967509388923645, "learning_rate": 0.00043028665037211325, "loss": 2.4418, "step": 461600 }, { "epoch": 0.9196297653958945, "grad_norm": 0.21795445680618286, "learning_rate": 0.0004301592437447677, "loss": 2.4457, "step": 461610 }, { "epoch": 0.9196496876195334, "grad_norm": 0.2256329506635666, "learning_rate": 0.0004300318474567708, "loss": 2.4501, "step": 461620 }, { "epoch": 0.9196696098431723, "grad_norm": 0.24972446262836456, "learning_rate": 0.00042990446150560646, "loss": 2.4544, "step": 461630 }, { "epoch": 0.9196895320668111, "grad_norm": 0.19553053379058838, "learning_rate": 0.0004297770858887584, "loss": 2.4496, "step": 461640 }, { "epoch": 0.91970945429045, "grad_norm": 0.22178316116333008, "learning_rate": 0.0004296497206037118, "loss": 2.4301, "step": 461650 }, { "epoch": 0.919729376514089, "grad_norm": 0.19884221255779266, "learning_rate": 0.00042952236564795343, "loss": 2.4441, "step": 461660 }, { "epoch": 0.9197492987377279, "grad_norm": 0.24555560946464539, "learning_rate": 0.00042939502101897053, "loss": 2.4293, "step": 461670 }, { "epoch": 0.9197692209613668, "grad_norm": 0.19866472482681274, "learning_rate": 0.0004292676867142511, "loss": 2.4297, "step": 461680 }, { "epoch": 0.9197891431850057, "grad_norm": 0.19815681874752045, "learning_rate": 0.0004291403627312846, "loss": 2.4173, "step": 461690 }, { "epoch": 0.9198090654086446, "grad_norm": 0.19887804985046387, "learning_rate": 0.00042901304906756144, "loss": 2.44, "step": 461700 }, { "epoch": 0.9198289876322836, "grad_norm": 0.2038460522890091, "learning_rate": 0.000428885745720573, "loss": 2.422, "step": 461710 }, { "epoch": 0.9198489098559225, "grad_norm": 0.19758659601211548, "learning_rate": 0.00042875845268781166, "loss": 2.4204, "step": 461720 }, { "epoch": 0.9198688320795614, "grad_norm": 0.2120504230260849, "learning_rate": 0.00042863116996677065, "loss": 2.4527, "step": 461730 }, { "epoch": 0.9198887543032003, "grad_norm": 0.2326524406671524, "learning_rate": 0.0004285038975549447, "loss": 2.4495, "step": 461740 }, { "epoch": 0.9199086765268393, "grad_norm": 0.21056795120239258, "learning_rate": 0.00042837663544982883, "loss": 2.4379, "step": 461750 }, { "epoch": 0.9199285987504782, "grad_norm": 0.2043052762746811, "learning_rate": 0.00042824938364891983, "loss": 2.4256, "step": 461760 }, { "epoch": 0.9199485209741171, "grad_norm": 0.1909409910440445, "learning_rate": 0.00042812214214971525, "loss": 2.4361, "step": 461770 }, { "epoch": 0.919968443197756, "grad_norm": 0.20744864642620087, "learning_rate": 0.0004279949109497132, "loss": 2.4418, "step": 461780 }, { "epoch": 0.9199883654213948, "grad_norm": 0.21534910798072815, "learning_rate": 0.00042786769004641336, "loss": 2.4474, "step": 461790 }, { "epoch": 0.9200082876450338, "grad_norm": 0.20888404548168182, "learning_rate": 0.0004277404794373163, "loss": 2.439, "step": 461800 }, { "epoch": 0.9200282098686727, "grad_norm": 0.19903549551963806, "learning_rate": 0.00042761327911992344, "loss": 2.4429, "step": 461810 }, { "epoch": 0.9200481320923116, "grad_norm": 0.197305828332901, "learning_rate": 0.0004274860890917371, "loss": 2.4328, "step": 461820 }, { "epoch": 0.9200680543159505, "grad_norm": 0.2081567943096161, "learning_rate": 0.00042735890935026124, "loss": 2.4397, "step": 461830 }, { "epoch": 0.9200879765395894, "grad_norm": 0.20865806937217712, "learning_rate": 0.00042723173989300014, "loss": 2.4439, "step": 461840 }, { "epoch": 0.9201078987632284, "grad_norm": 0.2167249619960785, "learning_rate": 0.0004271045807174592, "loss": 2.4276, "step": 461850 }, { "epoch": 0.9201278209868673, "grad_norm": 0.20659290254116058, "learning_rate": 0.00042697743182114524, "loss": 2.4353, "step": 461860 }, { "epoch": 0.9201477432105062, "grad_norm": 0.19120602309703827, "learning_rate": 0.00042685029320156567, "loss": 2.428, "step": 461870 }, { "epoch": 0.9201676654341451, "grad_norm": 0.20741227269172668, "learning_rate": 0.0004267231648562291, "loss": 2.4427, "step": 461880 }, { "epoch": 0.920187587657784, "grad_norm": 0.20188285410404205, "learning_rate": 0.0004265960467826453, "loss": 2.4218, "step": 461890 }, { "epoch": 0.920207509881423, "grad_norm": 0.195818230509758, "learning_rate": 0.0004264689389783245, "loss": 2.419, "step": 461900 }, { "epoch": 0.9202274321050619, "grad_norm": 0.1878478080034256, "learning_rate": 0.0004263418414407785, "loss": 2.4525, "step": 461910 }, { "epoch": 0.9202473543287008, "grad_norm": 0.2089276909828186, "learning_rate": 0.0004262147541675199, "loss": 2.446, "step": 461920 }, { "epoch": 0.9202672765523396, "grad_norm": 0.20573711395263672, "learning_rate": 0.00042608767715606225, "loss": 2.4249, "step": 461930 }, { "epoch": 0.9202871987759785, "grad_norm": 0.19901172816753387, "learning_rate": 0.0004259606104039202, "loss": 2.4427, "step": 461940 }, { "epoch": 0.9203071209996175, "grad_norm": 0.20439428091049194, "learning_rate": 0.0004258335539086091, "loss": 2.4236, "step": 461950 }, { "epoch": 0.9203270432232564, "grad_norm": 0.21362653374671936, "learning_rate": 0.00042570650766764586, "loss": 2.4274, "step": 461960 }, { "epoch": 0.9203469654468953, "grad_norm": 0.203421488404274, "learning_rate": 0.00042557947167854793, "loss": 2.4214, "step": 461970 }, { "epoch": 0.9203668876705342, "grad_norm": 0.20171257853507996, "learning_rate": 0.00042545244593883403, "loss": 2.4244, "step": 461980 }, { "epoch": 0.9203868098941731, "grad_norm": 0.20267550647258759, "learning_rate": 0.0004253254304460239, "loss": 2.4527, "step": 461990 }, { "epoch": 0.9204067321178121, "grad_norm": 0.2223014533519745, "learning_rate": 0.00042519842519763774, "loss": 2.4383, "step": 462000 }, { "epoch": 0.920426654341451, "grad_norm": 0.22175480425357819, "learning_rate": 0.00042507143019119756, "loss": 2.4325, "step": 462010 }, { "epoch": 0.9204465765650899, "grad_norm": 0.22078044712543488, "learning_rate": 0.00042494444542422573, "loss": 2.4283, "step": 462020 }, { "epoch": 0.9204664987887288, "grad_norm": 0.1949082314968109, "learning_rate": 0.0004248174708942458, "loss": 2.4335, "step": 462030 }, { "epoch": 0.9204864210123677, "grad_norm": 0.2049214392900467, "learning_rate": 0.0004246905065987827, "loss": 2.4441, "step": 462040 }, { "epoch": 0.9205063432360067, "grad_norm": 0.1896800845861435, "learning_rate": 0.00042456355253536174, "loss": 2.4258, "step": 462050 }, { "epoch": 0.9205262654596456, "grad_norm": 0.20121364295482635, "learning_rate": 0.0004244366087015095, "loss": 2.4463, "step": 462060 }, { "epoch": 0.9205461876832844, "grad_norm": 0.20112726092338562, "learning_rate": 0.0004243096750947539, "loss": 2.4322, "step": 462070 }, { "epoch": 0.9205661099069233, "grad_norm": 0.21219560503959656, "learning_rate": 0.00042418275171262335, "loss": 2.4263, "step": 462080 }, { "epoch": 0.9205860321305623, "grad_norm": 0.19648823142051697, "learning_rate": 0.0004240558385526472, "loss": 2.4302, "step": 462090 }, { "epoch": 0.9206059543542012, "grad_norm": 0.2244247943162918, "learning_rate": 0.00042392893561235637, "loss": 2.441, "step": 462100 }, { "epoch": 0.9206258765778401, "grad_norm": 0.208896666765213, "learning_rate": 0.0004238020428892824, "loss": 2.4544, "step": 462110 }, { "epoch": 0.920645798801479, "grad_norm": 0.23627834022045135, "learning_rate": 0.00042367516038095763, "loss": 2.4384, "step": 462120 }, { "epoch": 0.9206657210251179, "grad_norm": 0.21420849859714508, "learning_rate": 0.0004235482880849157, "loss": 2.4228, "step": 462130 }, { "epoch": 0.9206856432487569, "grad_norm": 0.20263345539569855, "learning_rate": 0.0004234214259986913, "loss": 2.4309, "step": 462140 }, { "epoch": 0.9207055654723958, "grad_norm": 0.19927796721458435, "learning_rate": 0.00042329457411982, "loss": 2.4513, "step": 462150 }, { "epoch": 0.9207254876960347, "grad_norm": 0.2373095601797104, "learning_rate": 0.00042316773244583804, "loss": 2.4307, "step": 462160 }, { "epoch": 0.9207454099196736, "grad_norm": 0.2210044115781784, "learning_rate": 0.00042304090097428326, "loss": 2.4369, "step": 462170 }, { "epoch": 0.9207653321433125, "grad_norm": 0.2513303756713867, "learning_rate": 0.00042291407970269424, "loss": 2.4347, "step": 462180 }, { "epoch": 0.9207852543669515, "grad_norm": 0.20723989605903625, "learning_rate": 0.00042278726862861007, "loss": 2.4406, "step": 462190 }, { "epoch": 0.9208051765905904, "grad_norm": 0.21826645731925964, "learning_rate": 0.00042266046774957176, "loss": 2.4219, "step": 462200 }, { "epoch": 0.9208250988142292, "grad_norm": 0.19926658272743225, "learning_rate": 0.0004225336770631203, "loss": 2.4413, "step": 462210 }, { "epoch": 0.9208450210378681, "grad_norm": 0.1971622109413147, "learning_rate": 0.0004224068965667987, "loss": 2.4373, "step": 462220 }, { "epoch": 0.920864943261507, "grad_norm": 0.19992011785507202, "learning_rate": 0.0004222801262581497, "loss": 2.4405, "step": 462230 }, { "epoch": 0.920884865485146, "grad_norm": 0.2099832147359848, "learning_rate": 0.00042215336613471855, "loss": 2.4301, "step": 462240 }, { "epoch": 0.9209047877087849, "grad_norm": 0.19161568582057953, "learning_rate": 0.00042202661619405004, "loss": 2.4359, "step": 462250 }, { "epoch": 0.9209247099324238, "grad_norm": 0.20054756104946136, "learning_rate": 0.00042189987643369075, "loss": 2.4529, "step": 462260 }, { "epoch": 0.9209446321560627, "grad_norm": 0.20939990878105164, "learning_rate": 0.0004217731468511885, "loss": 2.4331, "step": 462270 }, { "epoch": 0.9209645543797016, "grad_norm": 0.233613058924675, "learning_rate": 0.00042164642744409143, "loss": 2.4328, "step": 462280 }, { "epoch": 0.9209844766033406, "grad_norm": 0.2014949470758438, "learning_rate": 0.0004215197182099486, "loss": 2.4433, "step": 462290 }, { "epoch": 0.9210043988269795, "grad_norm": 0.21079467236995697, "learning_rate": 0.00042139301914631067, "loss": 2.4365, "step": 462300 }, { "epoch": 0.9210243210506184, "grad_norm": 0.21118871867656708, "learning_rate": 0.0004212663302507291, "loss": 2.4176, "step": 462310 }, { "epoch": 0.9210442432742573, "grad_norm": 0.2104686051607132, "learning_rate": 0.000421139651520756, "loss": 2.4459, "step": 462320 }, { "epoch": 0.9210641654978962, "grad_norm": 0.19685660302639008, "learning_rate": 0.00042101298295394465, "loss": 2.4405, "step": 462330 }, { "epoch": 0.9210840877215352, "grad_norm": 0.2051784247159958, "learning_rate": 0.00042088632454784935, "loss": 2.4273, "step": 462340 }, { "epoch": 0.921104009945174, "grad_norm": 0.21657168865203857, "learning_rate": 0.0004207596763000254, "loss": 2.4351, "step": 462350 }, { "epoch": 0.9211239321688129, "grad_norm": 0.19479921460151672, "learning_rate": 0.00042063303820802925, "loss": 2.4343, "step": 462360 }, { "epoch": 0.9211438543924518, "grad_norm": 0.22589074075222015, "learning_rate": 0.00042050641026941806, "loss": 2.4282, "step": 462370 }, { "epoch": 0.9211637766160908, "grad_norm": 0.22366677224636078, "learning_rate": 0.0004203797924817496, "loss": 2.4338, "step": 462380 }, { "epoch": 0.9211836988397297, "grad_norm": 0.22557887434959412, "learning_rate": 0.00042025318484258367, "loss": 2.4364, "step": 462390 }, { "epoch": 0.9212036210633686, "grad_norm": 0.21753647923469543, "learning_rate": 0.0004201265873494802, "loss": 2.4516, "step": 462400 }, { "epoch": 0.9212235432870075, "grad_norm": 0.1922234743833542, "learning_rate": 0.00041999999999999996, "loss": 2.4323, "step": 462410 }, { "epoch": 0.9212434655106464, "grad_norm": 0.20951834321022034, "learning_rate": 0.0004198734227917056, "loss": 2.4359, "step": 462420 }, { "epoch": 0.9212633877342854, "grad_norm": 0.22774936258792877, "learning_rate": 0.00041974685572216, "loss": 2.4332, "step": 462430 }, { "epoch": 0.9212833099579243, "grad_norm": 0.2017119973897934, "learning_rate": 0.00041962029878892725, "loss": 2.4187, "step": 462440 }, { "epoch": 0.9213032321815632, "grad_norm": 0.21650514006614685, "learning_rate": 0.00041949375198957207, "loss": 2.4433, "step": 462450 }, { "epoch": 0.9213231544052021, "grad_norm": 0.2230631709098816, "learning_rate": 0.000419367215321661, "loss": 2.4372, "step": 462460 }, { "epoch": 0.921343076628841, "grad_norm": 0.19808329641819, "learning_rate": 0.0004192406887827609, "loss": 2.4374, "step": 462470 }, { "epoch": 0.92136299885248, "grad_norm": 0.20227791368961334, "learning_rate": 0.0004191141723704397, "loss": 2.4384, "step": 462480 }, { "epoch": 0.9213829210761189, "grad_norm": 0.19839617609977722, "learning_rate": 0.0004189876660822662, "loss": 2.4502, "step": 462490 }, { "epoch": 0.9214028432997577, "grad_norm": 0.28368067741394043, "learning_rate": 0.00041886116991581024, "loss": 2.4382, "step": 462500 }, { "epoch": 0.9214227655233966, "grad_norm": 0.19872140884399414, "learning_rate": 0.0004187346838686432, "loss": 2.44, "step": 462510 }, { "epoch": 0.9214426877470355, "grad_norm": 0.2065197378396988, "learning_rate": 0.0004186082079383364, "loss": 2.4434, "step": 462520 }, { "epoch": 0.9214626099706745, "grad_norm": 0.2003067284822464, "learning_rate": 0.00041848174212246294, "loss": 2.4341, "step": 462530 }, { "epoch": 0.9214825321943134, "grad_norm": 0.20985755324363708, "learning_rate": 0.00041835528641859644, "loss": 2.4205, "step": 462540 }, { "epoch": 0.9215024544179523, "grad_norm": 0.2123686969280243, "learning_rate": 0.000418228840824312, "loss": 2.4405, "step": 462550 }, { "epoch": 0.9215223766415912, "grad_norm": 0.19328472018241882, "learning_rate": 0.00041810240533718493, "loss": 2.4439, "step": 462560 }, { "epoch": 0.9215422988652301, "grad_norm": 0.19628693163394928, "learning_rate": 0.0004179759799547922, "loss": 2.4466, "step": 462570 }, { "epoch": 0.9215622210888691, "grad_norm": 0.19946354627609253, "learning_rate": 0.00041784956467471137, "loss": 2.4572, "step": 462580 }, { "epoch": 0.921582143312508, "grad_norm": 0.2043151706457138, "learning_rate": 0.0004177231594945212, "loss": 2.4369, "step": 462590 }, { "epoch": 0.9216020655361469, "grad_norm": 0.2312050312757492, "learning_rate": 0.0004175967644118015, "loss": 2.4262, "step": 462600 }, { "epoch": 0.9216219877597858, "grad_norm": 0.20580096542835236, "learning_rate": 0.00041747037942413233, "loss": 2.4382, "step": 462610 }, { "epoch": 0.9216419099834247, "grad_norm": 0.22139067947864532, "learning_rate": 0.00041734400452909546, "loss": 2.4276, "step": 462620 }, { "epoch": 0.9216618322070637, "grad_norm": 0.20762768387794495, "learning_rate": 0.0004172176397242735, "loss": 2.4407, "step": 462630 }, { "epoch": 0.9216817544307025, "grad_norm": 0.22737739980220795, "learning_rate": 0.0004170912850072497, "loss": 2.4391, "step": 462640 }, { "epoch": 0.9217016766543414, "grad_norm": 0.19356583058834076, "learning_rate": 0.00041696494037560884, "loss": 2.4271, "step": 462650 }, { "epoch": 0.9217215988779803, "grad_norm": 0.1963675171136856, "learning_rate": 0.00041683860582693576, "loss": 2.423, "step": 462660 }, { "epoch": 0.9217415211016193, "grad_norm": 0.21555723249912262, "learning_rate": 0.00041671228135881756, "loss": 2.4301, "step": 462670 }, { "epoch": 0.9217614433252582, "grad_norm": 0.23148386180400848, "learning_rate": 0.000416585966968841, "loss": 2.444, "step": 462680 }, { "epoch": 0.9217813655488971, "grad_norm": 0.19185999035835266, "learning_rate": 0.00041645966265459465, "loss": 2.4307, "step": 462690 }, { "epoch": 0.921801287772536, "grad_norm": 0.21689389646053314, "learning_rate": 0.00041633336841366764, "loss": 2.4474, "step": 462700 }, { "epoch": 0.9218212099961749, "grad_norm": 0.2090746909379959, "learning_rate": 0.0004162070842436503, "loss": 2.4362, "step": 462710 }, { "epoch": 0.9218411322198139, "grad_norm": 0.21837523579597473, "learning_rate": 0.0004160808101421336, "loss": 2.447, "step": 462720 }, { "epoch": 0.9218610544434528, "grad_norm": 0.20269155502319336, "learning_rate": 0.0004159545461067098, "loss": 2.4429, "step": 462730 }, { "epoch": 0.9218809766670917, "grad_norm": 0.21797415614128113, "learning_rate": 0.00041582829213497184, "loss": 2.4352, "step": 462740 }, { "epoch": 0.9219008988907306, "grad_norm": 0.1962893158197403, "learning_rate": 0.00041570204822451417, "loss": 2.4335, "step": 462750 }, { "epoch": 0.9219208211143695, "grad_norm": 0.2369939088821411, "learning_rate": 0.00041557581437293113, "loss": 2.4364, "step": 462760 }, { "epoch": 0.9219407433380085, "grad_norm": 0.20731674134731293, "learning_rate": 0.0004154495905778195, "loss": 2.4413, "step": 462770 }, { "epoch": 0.9219606655616474, "grad_norm": 0.21124742925167084, "learning_rate": 0.0004153233768367755, "loss": 2.4337, "step": 462780 }, { "epoch": 0.9219805877852862, "grad_norm": 0.21880792081356049, "learning_rate": 0.0004151971731473976, "loss": 2.4502, "step": 462790 }, { "epoch": 0.9220005100089251, "grad_norm": 0.20662790536880493, "learning_rate": 0.000415070979507284, "loss": 2.4282, "step": 462800 }, { "epoch": 0.922020432232564, "grad_norm": 0.20446935296058655, "learning_rate": 0.00041494479591403514, "loss": 2.4475, "step": 462810 }, { "epoch": 0.922040354456203, "grad_norm": 0.20189352333545685, "learning_rate": 0.00041481862236525125, "loss": 2.431, "step": 462820 }, { "epoch": 0.9220602766798419, "grad_norm": 0.204117551445961, "learning_rate": 0.0004146924588585344, "loss": 2.4437, "step": 462830 }, { "epoch": 0.9220801989034808, "grad_norm": 0.20974402129650116, "learning_rate": 0.000414566305391487, "loss": 2.4282, "step": 462840 }, { "epoch": 0.9221001211271197, "grad_norm": 0.21794047951698303, "learning_rate": 0.0004144401619617126, "loss": 2.4227, "step": 462850 }, { "epoch": 0.9221200433507586, "grad_norm": 0.20113562047481537, "learning_rate": 0.0004143140285668161, "loss": 2.4257, "step": 462860 }, { "epoch": 0.9221399655743976, "grad_norm": 0.20309187471866608, "learning_rate": 0.00041418790520440286, "loss": 2.4421, "step": 462870 }, { "epoch": 0.9221598877980365, "grad_norm": 0.2708934545516968, "learning_rate": 0.00041406179187207927, "loss": 2.4449, "step": 462880 }, { "epoch": 0.9221798100216754, "grad_norm": 0.2295302003622055, "learning_rate": 0.00041393568856745277, "loss": 2.437, "step": 462890 }, { "epoch": 0.9221997322453143, "grad_norm": 0.22245875000953674, "learning_rate": 0.0004138095952881318, "loss": 2.436, "step": 462900 }, { "epoch": 0.9222196544689532, "grad_norm": 0.18590040504932404, "learning_rate": 0.0004136835120317257, "loss": 2.442, "step": 462910 }, { "epoch": 0.9222395766925922, "grad_norm": 0.22687487304210663, "learning_rate": 0.0004135574387958447, "loss": 2.4375, "step": 462920 }, { "epoch": 0.922259498916231, "grad_norm": 0.20872895419597626, "learning_rate": 0.00041343137557810005, "loss": 2.4314, "step": 462930 }, { "epoch": 0.9222794211398699, "grad_norm": 0.2089260369539261, "learning_rate": 0.0004133053223761039, "loss": 2.4358, "step": 462940 }, { "epoch": 0.9222993433635088, "grad_norm": 0.22666791081428528, "learning_rate": 0.0004131792791874691, "loss": 2.433, "step": 462950 }, { "epoch": 0.9223192655871478, "grad_norm": 0.21206627786159515, "learning_rate": 0.0004130532460098102, "loss": 2.4267, "step": 462960 }, { "epoch": 0.9223391878107867, "grad_norm": 0.19218827784061432, "learning_rate": 0.0004129272228407419, "loss": 2.4189, "step": 462970 }, { "epoch": 0.9223591100344256, "grad_norm": 0.2090914249420166, "learning_rate": 0.0004128012096778804, "loss": 2.4407, "step": 462980 }, { "epoch": 0.9223790322580645, "grad_norm": 0.1917424499988556, "learning_rate": 0.00041267520651884216, "loss": 2.4369, "step": 462990 }, { "epoch": 0.9223989544817034, "grad_norm": 0.21582235395908356, "learning_rate": 0.0004125492133612456, "loss": 2.4356, "step": 463000 }, { "epoch": 0.9224188767053424, "grad_norm": 0.20042715966701508, "learning_rate": 0.00041242323020270936, "loss": 2.426, "step": 463010 }, { "epoch": 0.9224387989289813, "grad_norm": 0.19439540803432465, "learning_rate": 0.00041229725704085274, "loss": 2.4167, "step": 463020 }, { "epoch": 0.9224587211526202, "grad_norm": 0.21304123103618622, "learning_rate": 0.00041217129387329687, "loss": 2.431, "step": 463030 }, { "epoch": 0.9224786433762591, "grad_norm": 0.19982703030109406, "learning_rate": 0.00041204534069766343, "loss": 2.4378, "step": 463040 }, { "epoch": 0.922498565599898, "grad_norm": 0.1922471970319748, "learning_rate": 0.0004119193975115747, "loss": 2.4339, "step": 463050 }, { "epoch": 0.922518487823537, "grad_norm": 0.22855789959430695, "learning_rate": 0.00041179346431265417, "loss": 2.4301, "step": 463060 }, { "epoch": 0.9225384100471758, "grad_norm": 0.26264381408691406, "learning_rate": 0.0004116675410985269, "loss": 2.4222, "step": 463070 }, { "epoch": 0.9225583322708147, "grad_norm": 0.2147914320230484, "learning_rate": 0.0004115416278668176, "loss": 2.4421, "step": 463080 }, { "epoch": 0.9225782544944536, "grad_norm": 0.21026399731636047, "learning_rate": 0.00041141572461515286, "loss": 2.4334, "step": 463090 }, { "epoch": 0.9225981767180925, "grad_norm": 0.21372227370738983, "learning_rate": 0.00041128983134115993, "loss": 2.4333, "step": 463100 }, { "epoch": 0.9226180989417315, "grad_norm": 0.19555966556072235, "learning_rate": 0.00041116394804246716, "loss": 2.4336, "step": 463110 }, { "epoch": 0.9226380211653704, "grad_norm": 0.20951984822750092, "learning_rate": 0.00041103807471670376, "loss": 2.4256, "step": 463120 }, { "epoch": 0.9226579433890093, "grad_norm": 0.184479221701622, "learning_rate": 0.00041091221136149936, "loss": 2.434, "step": 463130 }, { "epoch": 0.9226778656126482, "grad_norm": 0.1849042773246765, "learning_rate": 0.00041078635797448573, "loss": 2.4387, "step": 463140 }, { "epoch": 0.9226977878362871, "grad_norm": 0.21169604361057281, "learning_rate": 0.0004106605145532942, "loss": 2.4304, "step": 463150 }, { "epoch": 0.9227177100599261, "grad_norm": 0.23481489717960358, "learning_rate": 0.0004105346810955577, "loss": 2.4396, "step": 463160 }, { "epoch": 0.922737632283565, "grad_norm": 0.1901261806488037, "learning_rate": 0.0004104088575989107, "loss": 2.4405, "step": 463170 }, { "epoch": 0.9227575545072039, "grad_norm": 0.21704940497875214, "learning_rate": 0.0004102830440609875, "loss": 2.4349, "step": 463180 }, { "epoch": 0.9227774767308428, "grad_norm": 0.21098986268043518, "learning_rate": 0.00041015724047942405, "loss": 2.4382, "step": 463190 }, { "epoch": 0.9227973989544817, "grad_norm": 0.23374265432357788, "learning_rate": 0.00041003144685185667, "loss": 2.4398, "step": 463200 }, { "epoch": 0.9228173211781207, "grad_norm": 0.20060919225215912, "learning_rate": 0.0004099056631759235, "loss": 2.4313, "step": 463210 }, { "epoch": 0.9228372434017595, "grad_norm": 0.2037961333990097, "learning_rate": 0.0004097798894492624, "loss": 2.4248, "step": 463220 }, { "epoch": 0.9228571656253984, "grad_norm": 0.21480000019073486, "learning_rate": 0.00040965412566951344, "loss": 2.4298, "step": 463230 }, { "epoch": 0.9228770878490373, "grad_norm": 0.19559882581233978, "learning_rate": 0.0004095283718343166, "loss": 2.4549, "step": 463240 }, { "epoch": 0.9228970100726763, "grad_norm": 0.2015884667634964, "learning_rate": 0.00040940262794131343, "loss": 2.4345, "step": 463250 }, { "epoch": 0.9229169322963152, "grad_norm": 0.2206926941871643, "learning_rate": 0.0004092768939881459, "loss": 2.4373, "step": 463260 }, { "epoch": 0.9229368545199541, "grad_norm": 0.21680814027786255, "learning_rate": 0.0004091511699724577, "loss": 2.4371, "step": 463270 }, { "epoch": 0.922956776743593, "grad_norm": 0.20016829669475555, "learning_rate": 0.0004090254558918927, "loss": 2.4364, "step": 463280 }, { "epoch": 0.9229766989672319, "grad_norm": 0.20272226631641388, "learning_rate": 0.0004088997517440958, "loss": 2.419, "step": 463290 }, { "epoch": 0.9229966211908709, "grad_norm": 0.21249815821647644, "learning_rate": 0.00040877405752671314, "loss": 2.4383, "step": 463300 }, { "epoch": 0.9230165434145098, "grad_norm": 0.21751415729522705, "learning_rate": 0.0004086483732373916, "loss": 2.4475, "step": 463310 }, { "epoch": 0.9230364656381487, "grad_norm": 0.24054840207099915, "learning_rate": 0.00040852269887377914, "loss": 2.4375, "step": 463320 }, { "epoch": 0.9230563878617876, "grad_norm": 0.21695710718631744, "learning_rate": 0.0004083970344335244, "loss": 2.4368, "step": 463330 }, { "epoch": 0.9230763100854265, "grad_norm": 0.19569361209869385, "learning_rate": 0.000408271379914277, "loss": 2.4418, "step": 463340 }, { "epoch": 0.9230962323090655, "grad_norm": 0.20955954492092133, "learning_rate": 0.0004081457353136877, "loss": 2.4428, "step": 463350 }, { "epoch": 0.9231161545327043, "grad_norm": 0.2156054675579071, "learning_rate": 0.00040802010062940795, "loss": 2.4491, "step": 463360 }, { "epoch": 0.9231360767563432, "grad_norm": 0.24187874794006348, "learning_rate": 0.0004078944758590906, "loss": 2.4322, "step": 463370 }, { "epoch": 0.9231559989799821, "grad_norm": 0.22262518107891083, "learning_rate": 0.0004077688610003885, "loss": 2.4216, "step": 463380 }, { "epoch": 0.923175921203621, "grad_norm": 0.20083458721637726, "learning_rate": 0.00040764325605095644, "loss": 2.4229, "step": 463390 }, { "epoch": 0.92319584342726, "grad_norm": 0.23751278221607208, "learning_rate": 0.00040751766100844945, "loss": 2.4376, "step": 463400 }, { "epoch": 0.9232157656508989, "grad_norm": 0.2163679301738739, "learning_rate": 0.00040739207587052364, "loss": 2.4338, "step": 463410 }, { "epoch": 0.9232356878745378, "grad_norm": 0.21040432155132294, "learning_rate": 0.0004072665006348364, "loss": 2.4344, "step": 463420 }, { "epoch": 0.9232556100981767, "grad_norm": 0.24275152385234833, "learning_rate": 0.0004071409352990454, "loss": 2.4381, "step": 463430 }, { "epoch": 0.9232755323218156, "grad_norm": 0.20592179894447327, "learning_rate": 0.00040701537986080985, "loss": 2.4365, "step": 463440 }, { "epoch": 0.9232954545454546, "grad_norm": 0.19647692143917084, "learning_rate": 0.00040688983431778957, "loss": 2.4323, "step": 463450 }, { "epoch": 0.9233153767690935, "grad_norm": 0.22254987061023712, "learning_rate": 0.0004067642986676452, "loss": 2.4331, "step": 463460 }, { "epoch": 0.9233352989927324, "grad_norm": 0.2143164873123169, "learning_rate": 0.00040663877290803897, "loss": 2.4259, "step": 463470 }, { "epoch": 0.9233552212163713, "grad_norm": 0.20838616788387299, "learning_rate": 0.00040651325703663276, "loss": 2.4313, "step": 463480 }, { "epoch": 0.9233751434400101, "grad_norm": 0.20422981679439545, "learning_rate": 0.0004063877510510909, "loss": 2.4361, "step": 463490 }, { "epoch": 0.9233950656636492, "grad_norm": 0.2128792107105255, "learning_rate": 0.0004062622549490773, "loss": 2.4521, "step": 463500 }, { "epoch": 0.923414987887288, "grad_norm": 0.21490029990673065, "learning_rate": 0.00040613676872825757, "loss": 2.4284, "step": 463510 }, { "epoch": 0.9234349101109269, "grad_norm": 0.2117621749639511, "learning_rate": 0.00040601129238629797, "loss": 2.4432, "step": 463520 }, { "epoch": 0.9234548323345658, "grad_norm": 0.21669666469097137, "learning_rate": 0.00040588582592086597, "loss": 2.4319, "step": 463530 }, { "epoch": 0.9234747545582047, "grad_norm": 0.2148352414369583, "learning_rate": 0.00040576036932962945, "loss": 2.4364, "step": 463540 }, { "epoch": 0.9234946767818437, "grad_norm": 0.2072460949420929, "learning_rate": 0.0004056349226102574, "loss": 2.4359, "step": 463550 }, { "epoch": 0.9235145990054826, "grad_norm": 0.20698221027851105, "learning_rate": 0.00040550948576042003, "loss": 2.4385, "step": 463560 }, { "epoch": 0.9235345212291215, "grad_norm": 0.19744598865509033, "learning_rate": 0.0004053840587777884, "loss": 2.4433, "step": 463570 }, { "epoch": 0.9235544434527604, "grad_norm": 0.21220171451568604, "learning_rate": 0.0004052586416600339, "loss": 2.4543, "step": 463580 }, { "epoch": 0.9235743656763994, "grad_norm": 0.20326967537403107, "learning_rate": 0.00040513323440482976, "loss": 2.424, "step": 463590 }, { "epoch": 0.9235942879000383, "grad_norm": 0.208375483751297, "learning_rate": 0.0004050078370098491, "loss": 2.4388, "step": 463600 }, { "epoch": 0.9236142101236772, "grad_norm": 0.20705634355545044, "learning_rate": 0.00040488244947276697, "loss": 2.4216, "step": 463610 }, { "epoch": 0.9236341323473161, "grad_norm": 0.21246406435966492, "learning_rate": 0.00040475707179125854, "loss": 2.4409, "step": 463620 }, { "epoch": 0.923654054570955, "grad_norm": 0.21838457882404327, "learning_rate": 0.00040463170396300033, "loss": 2.4264, "step": 463630 }, { "epoch": 0.923673976794594, "grad_norm": 0.22052237391471863, "learning_rate": 0.0004045063459856697, "loss": 2.4353, "step": 463640 }, { "epoch": 0.9236938990182328, "grad_norm": 0.20324918627738953, "learning_rate": 0.0004043809978569446, "loss": 2.4506, "step": 463650 }, { "epoch": 0.9237138212418717, "grad_norm": 0.22829866409301758, "learning_rate": 0.0004042556595745044, "loss": 2.4349, "step": 463660 }, { "epoch": 0.9237337434655106, "grad_norm": 0.21439126133918762, "learning_rate": 0.00040413033113602913, "loss": 2.4336, "step": 463670 }, { "epoch": 0.9237536656891495, "grad_norm": 0.2235914021730423, "learning_rate": 0.00040400501253919964, "loss": 2.4198, "step": 463680 }, { "epoch": 0.9237735879127885, "grad_norm": 0.2088329941034317, "learning_rate": 0.0004038797037816979, "loss": 2.429, "step": 463690 }, { "epoch": 0.9237935101364274, "grad_norm": 0.20968204736709595, "learning_rate": 0.0004037544048612067, "loss": 2.4305, "step": 463700 }, { "epoch": 0.9238134323600663, "grad_norm": 0.19534054398536682, "learning_rate": 0.00040362911577540973, "loss": 2.442, "step": 463710 }, { "epoch": 0.9238333545837052, "grad_norm": 0.2208183854818344, "learning_rate": 0.00040350383652199164, "loss": 2.4306, "step": 463720 }, { "epoch": 0.9238532768073441, "grad_norm": 0.20978887379169464, "learning_rate": 0.0004033785670986376, "loss": 2.4407, "step": 463730 }, { "epoch": 0.9238731990309831, "grad_norm": 0.2383158951997757, "learning_rate": 0.0004032533075030342, "loss": 2.4273, "step": 463740 }, { "epoch": 0.923893121254622, "grad_norm": 0.23846060037612915, "learning_rate": 0.0004031280577328689, "loss": 2.4448, "step": 463750 }, { "epoch": 0.9239130434782609, "grad_norm": 0.21939119696617126, "learning_rate": 0.0004030028177858298, "loss": 2.438, "step": 463760 }, { "epoch": 0.9239329657018998, "grad_norm": 0.21827460825443268, "learning_rate": 0.0004028775876596058, "loss": 2.431, "step": 463770 }, { "epoch": 0.9239528879255386, "grad_norm": 0.2168954312801361, "learning_rate": 0.00040275236735188734, "loss": 2.4357, "step": 463780 }, { "epoch": 0.9239728101491776, "grad_norm": 0.2348303496837616, "learning_rate": 0.0004026271568603652, "loss": 2.4211, "step": 463790 }, { "epoch": 0.9239927323728165, "grad_norm": 0.20327314734458923, "learning_rate": 0.00040250195618273145, "loss": 2.4191, "step": 463800 }, { "epoch": 0.9240126545964554, "grad_norm": 0.21851155161857605, "learning_rate": 0.00040237676531667833, "loss": 2.4342, "step": 463810 }, { "epoch": 0.9240325768200943, "grad_norm": 0.1967075914144516, "learning_rate": 0.00040225158425989996, "loss": 2.4251, "step": 463820 }, { "epoch": 0.9240524990437332, "grad_norm": 0.2062535285949707, "learning_rate": 0.0004021264130100906, "loss": 2.4391, "step": 463830 }, { "epoch": 0.9240724212673722, "grad_norm": 0.19419921934604645, "learning_rate": 0.0004020012515649458, "loss": 2.4375, "step": 463840 }, { "epoch": 0.9240923434910111, "grad_norm": 0.20909909904003143, "learning_rate": 0.00040187609992216203, "loss": 2.4298, "step": 463850 }, { "epoch": 0.92411226571465, "grad_norm": 0.19383187592029572, "learning_rate": 0.0004017509580794363, "loss": 2.4277, "step": 463860 }, { "epoch": 0.9241321879382889, "grad_norm": 0.20186956226825714, "learning_rate": 0.000401625826034467, "loss": 2.4255, "step": 463870 }, { "epoch": 0.9241521101619279, "grad_norm": 1.148751139640808, "learning_rate": 0.00040150070378495317, "loss": 2.4342, "step": 463880 }, { "epoch": 0.9241720323855668, "grad_norm": 0.2010319083929062, "learning_rate": 0.0004013755913285948, "loss": 2.44, "step": 463890 }, { "epoch": 0.9241919546092057, "grad_norm": 0.20354793965816498, "learning_rate": 0.00040125048866309254, "loss": 2.4389, "step": 463900 }, { "epoch": 0.9242118768328446, "grad_norm": 0.22082456946372986, "learning_rate": 0.0004011253957861485, "loss": 2.4281, "step": 463910 }, { "epoch": 0.9242317990564834, "grad_norm": 0.1781143844127655, "learning_rate": 0.0004010003126954653, "loss": 2.4285, "step": 463920 }, { "epoch": 0.9242517212801225, "grad_norm": 0.20741312205791473, "learning_rate": 0.00040087523938874627, "loss": 2.4436, "step": 463930 }, { "epoch": 0.9242716435037613, "grad_norm": 0.18793605268001556, "learning_rate": 0.0004007501758636958, "loss": 2.4379, "step": 463940 }, { "epoch": 0.9242915657274002, "grad_norm": 0.21240107715129852, "learning_rate": 0.00040062512211801947, "loss": 2.434, "step": 463950 }, { "epoch": 0.9243114879510391, "grad_norm": 0.19999508559703827, "learning_rate": 0.0004005000781494237, "loss": 2.4342, "step": 463960 }, { "epoch": 0.924331410174678, "grad_norm": 0.2597944736480713, "learning_rate": 0.0004003750439556151, "loss": 2.427, "step": 463970 }, { "epoch": 0.924351332398317, "grad_norm": 0.1901301145553589, "learning_rate": 0.0004002500195343024, "loss": 2.4338, "step": 463980 }, { "epoch": 0.9243712546219559, "grad_norm": 0.22465413808822632, "learning_rate": 0.00040012500488319393, "loss": 2.4362, "step": 463990 }, { "epoch": 0.9243911768455948, "grad_norm": 0.18538691103458405, "learning_rate": 0.0003999999999999999, "loss": 2.4362, "step": 464000 }, { "epoch": 0.9244110990692337, "grad_norm": 0.20059384405612946, "learning_rate": 0.0003998750048824311, "loss": 2.4295, "step": 464010 }, { "epoch": 0.9244310212928726, "grad_norm": 0.19721239805221558, "learning_rate": 0.00039975001952819887, "loss": 2.4303, "step": 464020 }, { "epoch": 0.9244509435165116, "grad_norm": 0.22462022304534912, "learning_rate": 0.00039962504393501597, "loss": 2.4341, "step": 464030 }, { "epoch": 0.9244708657401505, "grad_norm": 0.21126629412174225, "learning_rate": 0.0003995000781005955, "loss": 2.4412, "step": 464040 }, { "epoch": 0.9244907879637894, "grad_norm": 0.19885660707950592, "learning_rate": 0.000399375122022652, "loss": 2.4335, "step": 464050 }, { "epoch": 0.9245107101874283, "grad_norm": 0.20190134644508362, "learning_rate": 0.0003992501756989009, "loss": 2.4273, "step": 464060 }, { "epoch": 0.9245306324110671, "grad_norm": 0.20704424381256104, "learning_rate": 0.0003991252391270577, "loss": 2.4269, "step": 464070 }, { "epoch": 0.9245505546347061, "grad_norm": 0.19616147875785828, "learning_rate": 0.00039900031230484, "loss": 2.4331, "step": 464080 }, { "epoch": 0.924570476858345, "grad_norm": 0.21522670984268188, "learning_rate": 0.0003988753952299653, "loss": 2.4325, "step": 464090 }, { "epoch": 0.9245903990819839, "grad_norm": 0.22253333032131195, "learning_rate": 0.0003987504879001524, "loss": 2.4285, "step": 464100 }, { "epoch": 0.9246103213056228, "grad_norm": 0.2105974406003952, "learning_rate": 0.0003986255903131211, "loss": 2.4373, "step": 464110 }, { "epoch": 0.9246302435292617, "grad_norm": 0.21872685849666595, "learning_rate": 0.0003985007024665919, "loss": 2.4417, "step": 464120 }, { "epoch": 0.9246501657529007, "grad_norm": 0.2239266186952591, "learning_rate": 0.000398375824358286, "loss": 2.4444, "step": 464130 }, { "epoch": 0.9246700879765396, "grad_norm": 0.2022373229265213, "learning_rate": 0.000398250955985926, "loss": 2.431, "step": 464140 }, { "epoch": 0.9246900102001785, "grad_norm": 0.203194260597229, "learning_rate": 0.00039812609734723515, "loss": 2.426, "step": 464150 }, { "epoch": 0.9247099324238174, "grad_norm": 0.20517006516456604, "learning_rate": 0.0003980012484399371, "loss": 2.4534, "step": 464160 }, { "epoch": 0.9247298546474564, "grad_norm": 0.20891594886779785, "learning_rate": 0.00039787640926175727, "loss": 2.4326, "step": 464170 }, { "epoch": 0.9247497768710953, "grad_norm": 0.21204274892807007, "learning_rate": 0.0003977515798104214, "loss": 2.4301, "step": 464180 }, { "epoch": 0.9247696990947342, "grad_norm": 0.20821474492549896, "learning_rate": 0.00039762676008365627, "loss": 2.4505, "step": 464190 }, { "epoch": 0.9247896213183731, "grad_norm": 0.21005691587924957, "learning_rate": 0.00039750195007918967, "loss": 2.426, "step": 464200 }, { "epoch": 0.924809543542012, "grad_norm": 0.19104857742786407, "learning_rate": 0.0003973771497947498, "loss": 2.4213, "step": 464210 }, { "epoch": 0.924829465765651, "grad_norm": 0.20390017330646515, "learning_rate": 0.00039725235922806614, "loss": 2.412, "step": 464220 }, { "epoch": 0.9248493879892898, "grad_norm": 0.2054957151412964, "learning_rate": 0.00039712757837686907, "loss": 2.4391, "step": 464230 }, { "epoch": 0.9248693102129287, "grad_norm": 0.19825536012649536, "learning_rate": 0.0003970028072388898, "loss": 2.4345, "step": 464240 }, { "epoch": 0.9248892324365676, "grad_norm": 0.22566737234592438, "learning_rate": 0.00039687804581186036, "loss": 2.4173, "step": 464250 }, { "epoch": 0.9249091546602065, "grad_norm": 0.2252407670021057, "learning_rate": 0.00039675329409351366, "loss": 2.4252, "step": 464260 }, { "epoch": 0.9249290768838455, "grad_norm": 0.21722938120365143, "learning_rate": 0.0003966285520815833, "loss": 2.4195, "step": 464270 }, { "epoch": 0.9249489991074844, "grad_norm": 0.19908833503723145, "learning_rate": 0.00039650381977380443, "loss": 2.434, "step": 464280 }, { "epoch": 0.9249689213311233, "grad_norm": 0.1933380365371704, "learning_rate": 0.0003963790971679122, "loss": 2.4313, "step": 464290 }, { "epoch": 0.9249888435547622, "grad_norm": 0.2052333652973175, "learning_rate": 0.0003962543842616435, "loss": 2.4345, "step": 464300 }, { "epoch": 0.9250087657784011, "grad_norm": 0.2122112661600113, "learning_rate": 0.0003961296810527355, "loss": 2.4329, "step": 464310 }, { "epoch": 0.9250286880020401, "grad_norm": 0.2220783829689026, "learning_rate": 0.0003960049875389262, "loss": 2.4313, "step": 464320 }, { "epoch": 0.925048610225679, "grad_norm": 0.2043585330247879, "learning_rate": 0.000395880303717955, "loss": 2.4278, "step": 464330 }, { "epoch": 0.9250685324493179, "grad_norm": 0.2144356518983841, "learning_rate": 0.00039575562958756196, "loss": 2.4276, "step": 464340 }, { "epoch": 0.9250884546729568, "grad_norm": 0.20433250069618225, "learning_rate": 0.00039563096514548746, "loss": 2.4266, "step": 464350 }, { "epoch": 0.9251083768965956, "grad_norm": 0.223281130194664, "learning_rate": 0.00039550631038947384, "loss": 2.4337, "step": 464360 }, { "epoch": 0.9251282991202346, "grad_norm": 0.19710233807563782, "learning_rate": 0.0003953816653172631, "loss": 2.4496, "step": 464370 }, { "epoch": 0.9251482213438735, "grad_norm": 0.20499533414840698, "learning_rate": 0.00039525702992659916, "loss": 2.4487, "step": 464380 }, { "epoch": 0.9251681435675124, "grad_norm": 0.19674064218997955, "learning_rate": 0.0003951324042152262, "loss": 2.4213, "step": 464390 }, { "epoch": 0.9251880657911513, "grad_norm": 0.18873393535614014, "learning_rate": 0.00039500778818088954, "loss": 2.4333, "step": 464400 }, { "epoch": 0.9252079880147902, "grad_norm": 0.2184934765100479, "learning_rate": 0.00039488318182133545, "loss": 2.4232, "step": 464410 }, { "epoch": 0.9252279102384292, "grad_norm": 0.2400134652853012, "learning_rate": 0.00039475858513431073, "loss": 2.4631, "step": 464420 }, { "epoch": 0.9252478324620681, "grad_norm": 0.19427330791950226, "learning_rate": 0.0003946339981175633, "loss": 2.4341, "step": 464430 }, { "epoch": 0.925267754685707, "grad_norm": 0.2138531357049942, "learning_rate": 0.0003945094207688418, "loss": 2.4343, "step": 464440 }, { "epoch": 0.9252876769093459, "grad_norm": 0.20618213713169098, "learning_rate": 0.000394384853085896, "loss": 2.4127, "step": 464450 }, { "epoch": 0.9253075991329849, "grad_norm": 0.19905947148799896, "learning_rate": 0.0003942602950664762, "loss": 2.43, "step": 464460 }, { "epoch": 0.9253275213566238, "grad_norm": 0.23691979050636292, "learning_rate": 0.00039413574670833394, "loss": 2.4301, "step": 464470 }, { "epoch": 0.9253474435802627, "grad_norm": 0.20683468878269196, "learning_rate": 0.00039401120800922155, "loss": 2.4283, "step": 464480 }, { "epoch": 0.9253673658039016, "grad_norm": 0.19995389878749847, "learning_rate": 0.0003938866789668918, "loss": 2.4235, "step": 464490 }, { "epoch": 0.9253872880275404, "grad_norm": 0.19637425243854523, "learning_rate": 0.000393762159579099, "loss": 2.4258, "step": 464500 }, { "epoch": 0.9254072102511794, "grad_norm": 0.20884114503860474, "learning_rate": 0.0003936376498435976, "loss": 2.4291, "step": 464510 }, { "epoch": 0.9254271324748183, "grad_norm": 0.21051235496997833, "learning_rate": 0.0003935131497581439, "loss": 2.4358, "step": 464520 }, { "epoch": 0.9254470546984572, "grad_norm": 0.1921892911195755, "learning_rate": 0.0003933886593204938, "loss": 2.4282, "step": 464530 }, { "epoch": 0.9254669769220961, "grad_norm": 0.21058732271194458, "learning_rate": 0.00039326417852840543, "loss": 2.4378, "step": 464540 }, { "epoch": 0.925486899145735, "grad_norm": 0.19726842641830444, "learning_rate": 0.00039313970737963656, "loss": 2.4296, "step": 464550 }, { "epoch": 0.925506821369374, "grad_norm": 0.19792714715003967, "learning_rate": 0.0003930152458719467, "loss": 2.4313, "step": 464560 }, { "epoch": 0.9255267435930129, "grad_norm": 0.22471006214618683, "learning_rate": 0.0003928907940030957, "loss": 2.4275, "step": 464570 }, { "epoch": 0.9255466658166518, "grad_norm": 0.20540021359920502, "learning_rate": 0.0003927663517708446, "loss": 2.4218, "step": 464580 }, { "epoch": 0.9255665880402907, "grad_norm": 0.20920614898204803, "learning_rate": 0.00039264191917295536, "loss": 2.4334, "step": 464590 }, { "epoch": 0.9255865102639296, "grad_norm": 0.21188434958457947, "learning_rate": 0.00039251749620719046, "loss": 2.4383, "step": 464600 }, { "epoch": 0.9256064324875686, "grad_norm": 0.19823534786701202, "learning_rate": 0.0003923930828713134, "loss": 2.4297, "step": 464610 }, { "epoch": 0.9256263547112075, "grad_norm": 0.22228270769119263, "learning_rate": 0.0003922686791630885, "loss": 2.4444, "step": 464620 }, { "epoch": 0.9256462769348464, "grad_norm": 0.2180401086807251, "learning_rate": 0.0003921442850802812, "loss": 2.4248, "step": 464630 }, { "epoch": 0.9256661991584852, "grad_norm": 0.22029513120651245, "learning_rate": 0.0003920199006206575, "loss": 2.4454, "step": 464640 }, { "epoch": 0.9256861213821241, "grad_norm": 0.19742825627326965, "learning_rate": 0.00039189552578198453, "loss": 2.422, "step": 464650 }, { "epoch": 0.9257060436057631, "grad_norm": 0.21198885142803192, "learning_rate": 0.00039177116056203, "loss": 2.4347, "step": 464660 }, { "epoch": 0.925725965829402, "grad_norm": 0.21600760519504547, "learning_rate": 0.00039164680495856264, "loss": 2.4516, "step": 464670 }, { "epoch": 0.9257458880530409, "grad_norm": 0.20039209723472595, "learning_rate": 0.000391522458969352, "loss": 2.433, "step": 464680 }, { "epoch": 0.9257658102766798, "grad_norm": 0.2547244727611542, "learning_rate": 0.00039139812259216856, "loss": 2.4277, "step": 464690 }, { "epoch": 0.9257857325003187, "grad_norm": 0.20144641399383545, "learning_rate": 0.0003912737958247836, "loss": 2.4388, "step": 464700 }, { "epoch": 0.9258056547239577, "grad_norm": 0.19987952709197998, "learning_rate": 0.0003911494786649694, "loss": 2.4366, "step": 464710 }, { "epoch": 0.9258255769475966, "grad_norm": 0.21333009004592896, "learning_rate": 0.00039102517111049863, "loss": 2.4216, "step": 464720 }, { "epoch": 0.9258454991712355, "grad_norm": 0.19970549643039703, "learning_rate": 0.0003909008731591457, "loss": 2.4164, "step": 464730 }, { "epoch": 0.9258654213948744, "grad_norm": 0.21181318163871765, "learning_rate": 0.00039077658480868484, "loss": 2.4276, "step": 464740 }, { "epoch": 0.9258853436185134, "grad_norm": 0.19816064834594727, "learning_rate": 0.0003906523060568918, "loss": 2.4295, "step": 464750 }, { "epoch": 0.9259052658421523, "grad_norm": 0.22084422409534454, "learning_rate": 0.00039052803690154336, "loss": 2.429, "step": 464760 }, { "epoch": 0.9259251880657912, "grad_norm": 0.22456717491149902, "learning_rate": 0.0003904037773404163, "loss": 2.4442, "step": 464770 }, { "epoch": 0.92594511028943, "grad_norm": 0.2286006212234497, "learning_rate": 0.00039027952737128916, "loss": 2.4269, "step": 464780 }, { "epoch": 0.9259650325130689, "grad_norm": 0.19270074367523193, "learning_rate": 0.0003901552869919409, "loss": 2.4284, "step": 464790 }, { "epoch": 0.9259849547367079, "grad_norm": 0.21793638169765472, "learning_rate": 0.00039003105620015146, "loss": 2.4336, "step": 464800 }, { "epoch": 0.9260048769603468, "grad_norm": 0.2176855206489563, "learning_rate": 0.0003899068349937014, "loss": 2.439, "step": 464810 }, { "epoch": 0.9260247991839857, "grad_norm": 0.20925579965114594, "learning_rate": 0.0003897826233703725, "loss": 2.4258, "step": 464820 }, { "epoch": 0.9260447214076246, "grad_norm": 0.23196908831596375, "learning_rate": 0.00038965842132794706, "loss": 2.4338, "step": 464830 }, { "epoch": 0.9260646436312635, "grad_norm": 0.19313961267471313, "learning_rate": 0.0003895342288642083, "loss": 2.4371, "step": 464840 }, { "epoch": 0.9260845658549025, "grad_norm": 0.18811598420143127, "learning_rate": 0.00038941004597694076, "loss": 2.4219, "step": 464850 }, { "epoch": 0.9261044880785414, "grad_norm": 0.21298564970493317, "learning_rate": 0.00038928587266392925, "loss": 2.4493, "step": 464860 }, { "epoch": 0.9261244103021803, "grad_norm": 0.21393319964408875, "learning_rate": 0.00038916170892295957, "loss": 2.4391, "step": 464870 }, { "epoch": 0.9261443325258192, "grad_norm": 0.21008285880088806, "learning_rate": 0.00038903755475181855, "loss": 2.4268, "step": 464880 }, { "epoch": 0.9261642547494581, "grad_norm": 0.2122572511434555, "learning_rate": 0.00038891341014829365, "loss": 2.4365, "step": 464890 }, { "epoch": 0.9261841769730971, "grad_norm": 0.25517550110816956, "learning_rate": 0.0003887892751101736, "loss": 2.4492, "step": 464900 }, { "epoch": 0.926204099196736, "grad_norm": 0.1901441216468811, "learning_rate": 0.00038866514963524734, "loss": 2.4341, "step": 464910 }, { "epoch": 0.9262240214203749, "grad_norm": 0.20598161220550537, "learning_rate": 0.0003885410337213051, "loss": 2.4384, "step": 464920 }, { "epoch": 0.9262439436440137, "grad_norm": 0.22253043949604034, "learning_rate": 0.00038841692736613797, "loss": 2.4304, "step": 464930 }, { "epoch": 0.9262638658676526, "grad_norm": 0.21371275186538696, "learning_rate": 0.00038829283056753775, "loss": 2.4318, "step": 464940 }, { "epoch": 0.9262837880912916, "grad_norm": 0.20885688066482544, "learning_rate": 0.000388168743323297, "loss": 2.4283, "step": 464950 }, { "epoch": 0.9263037103149305, "grad_norm": 0.1890670359134674, "learning_rate": 0.0003880446656312093, "loss": 2.4266, "step": 464960 }, { "epoch": 0.9263236325385694, "grad_norm": 0.20783929526805878, "learning_rate": 0.00038792059748906895, "loss": 2.4242, "step": 464970 }, { "epoch": 0.9263435547622083, "grad_norm": 0.221775084733963, "learning_rate": 0.0003877965388946716, "loss": 2.4347, "step": 464980 }, { "epoch": 0.9263634769858472, "grad_norm": 0.2195870727300644, "learning_rate": 0.00038767248984581304, "loss": 2.4396, "step": 464990 }, { "epoch": 0.9263833992094862, "grad_norm": 0.22431480884552002, "learning_rate": 0.00038754845034028994, "loss": 2.4372, "step": 465000 }, { "epoch": 0.9264033214331251, "grad_norm": 0.20115458965301514, "learning_rate": 0.0003874244203759005, "loss": 2.4204, "step": 465010 }, { "epoch": 0.926423243656764, "grad_norm": 0.21372926235198975, "learning_rate": 0.0003873003999504434, "loss": 2.4309, "step": 465020 }, { "epoch": 0.9264431658804029, "grad_norm": 0.22456489503383636, "learning_rate": 0.0003871763890617177, "loss": 2.4324, "step": 465030 }, { "epoch": 0.9264630881040418, "grad_norm": 0.20730435848236084, "learning_rate": 0.00038705238770752383, "loss": 2.4351, "step": 465040 }, { "epoch": 0.9264830103276808, "grad_norm": 0.20081549882888794, "learning_rate": 0.0003869283958856631, "loss": 2.4123, "step": 465050 }, { "epoch": 0.9265029325513197, "grad_norm": 0.19821010529994965, "learning_rate": 0.00038680441359393745, "loss": 2.4399, "step": 465060 }, { "epoch": 0.9265228547749585, "grad_norm": 0.21424752473831177, "learning_rate": 0.00038668044083014987, "loss": 2.4418, "step": 465070 }, { "epoch": 0.9265427769985974, "grad_norm": 0.2026827037334442, "learning_rate": 0.00038655647759210354, "loss": 2.4415, "step": 465080 }, { "epoch": 0.9265626992222364, "grad_norm": 0.21391551196575165, "learning_rate": 0.00038643252387760365, "loss": 2.4269, "step": 465090 }, { "epoch": 0.9265826214458753, "grad_norm": 0.19984550774097443, "learning_rate": 0.0003863085796844552, "loss": 2.4342, "step": 465100 }, { "epoch": 0.9266025436695142, "grad_norm": 0.19436661899089813, "learning_rate": 0.00038618464501046466, "loss": 2.4377, "step": 465110 }, { "epoch": 0.9266224658931531, "grad_norm": 0.210469588637352, "learning_rate": 0.00038606071985343893, "loss": 2.4314, "step": 465120 }, { "epoch": 0.926642388116792, "grad_norm": 0.20127569139003754, "learning_rate": 0.00038593680421118574, "loss": 2.4239, "step": 465130 }, { "epoch": 0.926662310340431, "grad_norm": 0.20170556008815765, "learning_rate": 0.0003858128980815143, "loss": 2.4316, "step": 465140 }, { "epoch": 0.9266822325640699, "grad_norm": 0.20991352200508118, "learning_rate": 0.00038568900146223386, "loss": 2.4191, "step": 465150 }, { "epoch": 0.9267021547877088, "grad_norm": 0.21165122091770172, "learning_rate": 0.00038556511435115494, "loss": 2.4441, "step": 465160 }, { "epoch": 0.9267220770113477, "grad_norm": 0.2097775638103485, "learning_rate": 0.00038544123674608865, "loss": 2.4373, "step": 465170 }, { "epoch": 0.9267419992349866, "grad_norm": 0.2148319035768509, "learning_rate": 0.0003853173686448472, "loss": 2.4285, "step": 465180 }, { "epoch": 0.9267619214586256, "grad_norm": 0.1971156895160675, "learning_rate": 0.0003851935100452437, "loss": 2.4336, "step": 465190 }, { "epoch": 0.9267818436822645, "grad_norm": 0.202992781996727, "learning_rate": 0.0003850696609450921, "loss": 2.4274, "step": 465200 }, { "epoch": 0.9268017659059034, "grad_norm": 0.20564281940460205, "learning_rate": 0.0003849458213422066, "loss": 2.4424, "step": 465210 }, { "epoch": 0.9268216881295422, "grad_norm": 0.21992404758930206, "learning_rate": 0.00038482199123440264, "loss": 2.4205, "step": 465220 }, { "epoch": 0.9268416103531811, "grad_norm": 0.2263452410697937, "learning_rate": 0.000384698170619497, "loss": 2.4435, "step": 465230 }, { "epoch": 0.9268615325768201, "grad_norm": 0.21665449440479279, "learning_rate": 0.0003845743594953064, "loss": 2.4195, "step": 465240 }, { "epoch": 0.926881454800459, "grad_norm": 0.2365863025188446, "learning_rate": 0.0003844505578596489, "loss": 2.4221, "step": 465250 }, { "epoch": 0.9269013770240979, "grad_norm": 0.31078585982322693, "learning_rate": 0.0003843267657103433, "loss": 2.425, "step": 465260 }, { "epoch": 0.9269212992477368, "grad_norm": 0.22459407150745392, "learning_rate": 0.0003842029830452094, "loss": 2.4309, "step": 465270 }, { "epoch": 0.9269412214713757, "grad_norm": 0.21933482587337494, "learning_rate": 0.00038407920986206756, "loss": 2.4501, "step": 465280 }, { "epoch": 0.9269611436950147, "grad_norm": 0.22143764793872833, "learning_rate": 0.00038395544615873914, "loss": 2.4271, "step": 465290 }, { "epoch": 0.9269810659186536, "grad_norm": 0.21351240575313568, "learning_rate": 0.00038383169193304624, "loss": 2.4272, "step": 465300 }, { "epoch": 0.9270009881422925, "grad_norm": 0.2122313380241394, "learning_rate": 0.000383707947182812, "loss": 2.4321, "step": 465310 }, { "epoch": 0.9270209103659314, "grad_norm": 0.20410658419132233, "learning_rate": 0.0003835842119058599, "loss": 2.4243, "step": 465320 }, { "epoch": 0.9270408325895703, "grad_norm": 0.21123720705509186, "learning_rate": 0.000383460486100015, "loss": 2.4188, "step": 465330 }, { "epoch": 0.9270607548132093, "grad_norm": 0.2150617241859436, "learning_rate": 0.0003833367697631025, "loss": 2.4434, "step": 465340 }, { "epoch": 0.9270806770368482, "grad_norm": 0.202577605843544, "learning_rate": 0.00038321306289294886, "loss": 2.413, "step": 465350 }, { "epoch": 0.927100599260487, "grad_norm": 0.21016745269298553, "learning_rate": 0.00038308936548738106, "loss": 2.4351, "step": 465360 }, { "epoch": 0.9271205214841259, "grad_norm": 0.22633980214595795, "learning_rate": 0.0003829656775442274, "loss": 2.4242, "step": 465370 }, { "epoch": 0.9271404437077649, "grad_norm": 0.2208424210548401, "learning_rate": 0.00038284199906131613, "loss": 2.4254, "step": 465380 }, { "epoch": 0.9271603659314038, "grad_norm": 0.21510347723960876, "learning_rate": 0.0003827183300364776, "loss": 2.4388, "step": 465390 }, { "epoch": 0.9271802881550427, "grad_norm": 0.22785085439682007, "learning_rate": 0.0003825946704675416, "loss": 2.4176, "step": 465400 }, { "epoch": 0.9272002103786816, "grad_norm": 0.1844281256198883, "learning_rate": 0.00038247102035234004, "loss": 2.4421, "step": 465410 }, { "epoch": 0.9272201326023205, "grad_norm": 0.19066019356250763, "learning_rate": 0.0003823473796887047, "loss": 2.4451, "step": 465420 }, { "epoch": 0.9272400548259595, "grad_norm": 0.21047408878803253, "learning_rate": 0.0003822237484744684, "loss": 2.4283, "step": 465430 }, { "epoch": 0.9272599770495984, "grad_norm": 0.23101019859313965, "learning_rate": 0.0003821001267074653, "loss": 2.4315, "step": 465440 }, { "epoch": 0.9272798992732373, "grad_norm": 0.19876708090305328, "learning_rate": 0.00038197651438552985, "loss": 2.423, "step": 465450 }, { "epoch": 0.9272998214968762, "grad_norm": 0.23407994210720062, "learning_rate": 0.00038185291150649725, "loss": 2.4423, "step": 465460 }, { "epoch": 0.9273197437205151, "grad_norm": 0.2690671384334564, "learning_rate": 0.00038172931806820397, "loss": 2.4371, "step": 465470 }, { "epoch": 0.9273396659441541, "grad_norm": 0.23408310115337372, "learning_rate": 0.0003816057340684873, "loss": 2.423, "step": 465480 }, { "epoch": 0.927359588167793, "grad_norm": 0.2279948741197586, "learning_rate": 0.0003814821595051847, "loss": 2.4338, "step": 465490 }, { "epoch": 0.9273795103914318, "grad_norm": 0.2065771520137787, "learning_rate": 0.00038135859437613554, "loss": 2.4278, "step": 465500 }, { "epoch": 0.9273994326150707, "grad_norm": 0.20260316133499146, "learning_rate": 0.00038123503867917873, "loss": 2.4305, "step": 465510 }, { "epoch": 0.9274193548387096, "grad_norm": 0.20229080319404602, "learning_rate": 0.00038111149241215505, "loss": 2.4325, "step": 465520 }, { "epoch": 0.9274392770623486, "grad_norm": 0.22141985595226288, "learning_rate": 0.0003809879555729057, "loss": 2.4159, "step": 465530 }, { "epoch": 0.9274591992859875, "grad_norm": 0.2129228413105011, "learning_rate": 0.0003808644281592724, "loss": 2.4332, "step": 465540 }, { "epoch": 0.9274791215096264, "grad_norm": 0.19762346148490906, "learning_rate": 0.0003807409101690984, "loss": 2.426, "step": 465550 }, { "epoch": 0.9274990437332653, "grad_norm": 0.19922703504562378, "learning_rate": 0.0003806174016002273, "loss": 2.4298, "step": 465560 }, { "epoch": 0.9275189659569042, "grad_norm": 0.2094452679157257, "learning_rate": 0.0003804939024505034, "loss": 2.4221, "step": 465570 }, { "epoch": 0.9275388881805432, "grad_norm": 0.2075936198234558, "learning_rate": 0.00038037041271777206, "loss": 2.421, "step": 465580 }, { "epoch": 0.9275588104041821, "grad_norm": 0.20259074866771698, "learning_rate": 0.0003802469323998796, "loss": 2.4528, "step": 465590 }, { "epoch": 0.927578732627821, "grad_norm": 0.2264549434185028, "learning_rate": 0.00038012346149467294, "loss": 2.4516, "step": 465600 }, { "epoch": 0.9275986548514599, "grad_norm": 0.21659806370735168, "learning_rate": 0.0003799999999999999, "loss": 2.4497, "step": 465610 }, { "epoch": 0.9276185770750988, "grad_norm": 0.23954008519649506, "learning_rate": 0.00037987654791370916, "loss": 2.4369, "step": 465620 }, { "epoch": 0.9276384992987378, "grad_norm": 0.20989680290222168, "learning_rate": 0.0003797531052336498, "loss": 2.4192, "step": 465630 }, { "epoch": 0.9276584215223767, "grad_norm": 0.1992124319076538, "learning_rate": 0.00037962967195767264, "loss": 2.4436, "step": 465640 }, { "epoch": 0.9276783437460155, "grad_norm": 0.2172149121761322, "learning_rate": 0.00037950624808362823, "loss": 2.4275, "step": 465650 }, { "epoch": 0.9276982659696544, "grad_norm": 0.20530077815055847, "learning_rate": 0.00037938283360936854, "loss": 2.4183, "step": 465660 }, { "epoch": 0.9277181881932934, "grad_norm": 0.2322235405445099, "learning_rate": 0.0003792594285327464, "loss": 2.4336, "step": 465670 }, { "epoch": 0.9277381104169323, "grad_norm": 0.19481413066387177, "learning_rate": 0.0003791360328516151, "loss": 2.4422, "step": 465680 }, { "epoch": 0.9277580326405712, "grad_norm": 0.21009744703769684, "learning_rate": 0.0003790126465638295, "loss": 2.4287, "step": 465690 }, { "epoch": 0.9277779548642101, "grad_norm": 0.20663343369960785, "learning_rate": 0.00037888926966724436, "loss": 2.4315, "step": 465700 }, { "epoch": 0.927797877087849, "grad_norm": 0.20270809531211853, "learning_rate": 0.00037876590215971584, "loss": 2.4258, "step": 465710 }, { "epoch": 0.927817799311488, "grad_norm": 0.22365689277648926, "learning_rate": 0.0003786425440391006, "loss": 2.4208, "step": 465720 }, { "epoch": 0.9278377215351269, "grad_norm": 0.20464543998241425, "learning_rate": 0.00037851919530325606, "loss": 2.4357, "step": 465730 }, { "epoch": 0.9278576437587658, "grad_norm": 0.20625190436840057, "learning_rate": 0.0003783958559500411, "loss": 2.4467, "step": 465740 }, { "epoch": 0.9278775659824047, "grad_norm": 0.21997007727622986, "learning_rate": 0.0003782725259773145, "loss": 2.4171, "step": 465750 }, { "epoch": 0.9278974882060436, "grad_norm": 0.23396766185760498, "learning_rate": 0.0003781492053829367, "loss": 2.4227, "step": 465760 }, { "epoch": 0.9279174104296826, "grad_norm": 0.20806558430194855, "learning_rate": 0.0003780258941647683, "loss": 2.4344, "step": 465770 }, { "epoch": 0.9279373326533215, "grad_norm": 0.2016381323337555, "learning_rate": 0.0003779025923206707, "loss": 2.4378, "step": 465780 }, { "epoch": 0.9279572548769603, "grad_norm": 0.20144180953502655, "learning_rate": 0.000377779299848507, "loss": 2.4099, "step": 465790 }, { "epoch": 0.9279771771005992, "grad_norm": 0.20215988159179688, "learning_rate": 0.00037765601674614026, "loss": 2.4338, "step": 465800 }, { "epoch": 0.9279970993242381, "grad_norm": 0.2095857709646225, "learning_rate": 0.00037753274301143436, "loss": 2.43, "step": 465810 }, { "epoch": 0.9280170215478771, "grad_norm": 0.28868308663368225, "learning_rate": 0.0003774094786422546, "loss": 2.4318, "step": 465820 }, { "epoch": 0.928036943771516, "grad_norm": 0.22195743024349213, "learning_rate": 0.00037728622363646645, "loss": 2.4333, "step": 465830 }, { "epoch": 0.9280568659951549, "grad_norm": 0.21811914443969727, "learning_rate": 0.00037716297799193657, "loss": 2.4342, "step": 465840 }, { "epoch": 0.9280767882187938, "grad_norm": 0.20465582609176636, "learning_rate": 0.0003770397417065321, "loss": 2.4151, "step": 465850 }, { "epoch": 0.9280967104424327, "grad_norm": 0.19043004512786865, "learning_rate": 0.0003769165147781215, "loss": 2.425, "step": 465860 }, { "epoch": 0.9281166326660717, "grad_norm": 0.21996474266052246, "learning_rate": 0.00037679329720457356, "loss": 2.4501, "step": 465870 }, { "epoch": 0.9281365548897106, "grad_norm": 0.22073325514793396, "learning_rate": 0.000376670088983758, "loss": 2.426, "step": 465880 }, { "epoch": 0.9281564771133495, "grad_norm": 0.23024605214595795, "learning_rate": 0.0003765468901135456, "loss": 2.4283, "step": 465890 }, { "epoch": 0.9281763993369884, "grad_norm": 0.19816164672374725, "learning_rate": 0.00037642370059180763, "loss": 2.4314, "step": 465900 }, { "epoch": 0.9281963215606273, "grad_norm": 0.21541310846805573, "learning_rate": 0.0003763005204164165, "loss": 2.4149, "step": 465910 }, { "epoch": 0.9282162437842663, "grad_norm": 0.21869002282619476, "learning_rate": 0.000376177349585245, "loss": 2.4236, "step": 465920 }, { "epoch": 0.9282361660079052, "grad_norm": 0.20304764807224274, "learning_rate": 0.00037605418809616675, "loss": 2.4353, "step": 465930 }, { "epoch": 0.928256088231544, "grad_norm": 0.21396851539611816, "learning_rate": 0.0003759310359470569, "loss": 2.4231, "step": 465940 }, { "epoch": 0.9282760104551829, "grad_norm": 0.22051173448562622, "learning_rate": 0.00037580789313579045, "loss": 2.4162, "step": 465950 }, { "epoch": 0.9282959326788219, "grad_norm": 0.26629000902175903, "learning_rate": 0.0003756847596602437, "loss": 2.4309, "step": 465960 }, { "epoch": 0.9283158549024608, "grad_norm": 0.19282734394073486, "learning_rate": 0.00037556163551829403, "loss": 2.4225, "step": 465970 }, { "epoch": 0.9283357771260997, "grad_norm": 0.22395820915699005, "learning_rate": 0.0003754385207078188, "loss": 2.4333, "step": 465980 }, { "epoch": 0.9283556993497386, "grad_norm": 0.2241693139076233, "learning_rate": 0.00037531541522669686, "loss": 2.4354, "step": 465990 }, { "epoch": 0.9283756215733775, "grad_norm": 0.19828027486801147, "learning_rate": 0.00037519231907280794, "loss": 2.4394, "step": 466000 }, { "epoch": 0.9283955437970165, "grad_norm": 0.1913343071937561, "learning_rate": 0.0003750692322440319, "loss": 2.4333, "step": 466010 }, { "epoch": 0.9284154660206554, "grad_norm": 0.23036928474903107, "learning_rate": 0.00037494615473825, "loss": 2.4345, "step": 466020 }, { "epoch": 0.9284353882442943, "grad_norm": 0.22089651226997375, "learning_rate": 0.00037482308655334396, "loss": 2.4272, "step": 466030 }, { "epoch": 0.9284553104679332, "grad_norm": 0.2286529839038849, "learning_rate": 0.00037470002768719656, "loss": 2.4222, "step": 466040 }, { "epoch": 0.9284752326915721, "grad_norm": 0.20843005180358887, "learning_rate": 0.00037457697813769107, "loss": 2.4293, "step": 466050 }, { "epoch": 0.9284951549152111, "grad_norm": 0.20587590336799622, "learning_rate": 0.0003744539379027121, "loss": 2.4253, "step": 466060 }, { "epoch": 0.92851507713885, "grad_norm": 0.19615454971790314, "learning_rate": 0.0003743309069801444, "loss": 2.4234, "step": 466070 }, { "epoch": 0.9285349993624888, "grad_norm": 0.21846766769886017, "learning_rate": 0.0003742078853678739, "loss": 2.4113, "step": 466080 }, { "epoch": 0.9285549215861277, "grad_norm": 0.20270663499832153, "learning_rate": 0.00037408487306378734, "loss": 2.4338, "step": 466090 }, { "epoch": 0.9285748438097666, "grad_norm": 0.19054868817329407, "learning_rate": 0.0003739618700657723, "loss": 2.4321, "step": 466100 }, { "epoch": 0.9285947660334056, "grad_norm": 0.20177671313285828, "learning_rate": 0.0003738388763717169, "loss": 2.4312, "step": 466110 }, { "epoch": 0.9286146882570445, "grad_norm": 0.21324825286865234, "learning_rate": 0.00037371589197951027, "loss": 2.4207, "step": 466120 }, { "epoch": 0.9286346104806834, "grad_norm": 0.2002904862165451, "learning_rate": 0.000373592916887042, "loss": 2.4347, "step": 466130 }, { "epoch": 0.9286545327043223, "grad_norm": 0.22556477785110474, "learning_rate": 0.0003734699510922033, "loss": 2.4165, "step": 466140 }, { "epoch": 0.9286744549279612, "grad_norm": 0.20391938090324402, "learning_rate": 0.0003733469945928849, "loss": 2.4387, "step": 466150 }, { "epoch": 0.9286943771516002, "grad_norm": 0.2395697832107544, "learning_rate": 0.00037322404738697967, "loss": 2.4306, "step": 466160 }, { "epoch": 0.9287142993752391, "grad_norm": 0.2335222363471985, "learning_rate": 0.0003731011094723804, "loss": 2.441, "step": 466170 }, { "epoch": 0.928734221598878, "grad_norm": 0.2190578132867813, "learning_rate": 0.0003729781808469808, "loss": 2.4297, "step": 466180 }, { "epoch": 0.9287541438225169, "grad_norm": 0.22820650041103363, "learning_rate": 0.0003728552615086758, "loss": 2.4499, "step": 466190 }, { "epoch": 0.9287740660461558, "grad_norm": 0.2214152067899704, "learning_rate": 0.00037273235145536066, "loss": 2.4301, "step": 466200 }, { "epoch": 0.9287939882697948, "grad_norm": 0.21900896728038788, "learning_rate": 0.0003726094506849316, "loss": 2.4333, "step": 466210 }, { "epoch": 0.9288139104934336, "grad_norm": 0.2174740880727768, "learning_rate": 0.0003724865591952857, "loss": 2.4104, "step": 466220 }, { "epoch": 0.9288338327170725, "grad_norm": 0.21060414612293243, "learning_rate": 0.00037236367698432083, "loss": 2.4378, "step": 466230 }, { "epoch": 0.9288537549407114, "grad_norm": 0.21995382010936737, "learning_rate": 0.00037224080404993565, "loss": 2.4271, "step": 466240 }, { "epoch": 0.9288736771643504, "grad_norm": 0.20178717374801636, "learning_rate": 0.00037211794039002945, "loss": 2.4293, "step": 466250 }, { "epoch": 0.9288935993879893, "grad_norm": 0.2085949033498764, "learning_rate": 0.00037199508600250234, "loss": 2.4216, "step": 466260 }, { "epoch": 0.9289135216116282, "grad_norm": 0.21606911718845367, "learning_rate": 0.0003718722408852555, "loss": 2.4446, "step": 466270 }, { "epoch": 0.9289334438352671, "grad_norm": 0.24213890731334686, "learning_rate": 0.00037174940503619045, "loss": 2.4329, "step": 466280 }, { "epoch": 0.928953366058906, "grad_norm": 0.22332453727722168, "learning_rate": 0.0003716265784532098, "loss": 2.4359, "step": 466290 }, { "epoch": 0.928973288282545, "grad_norm": 0.21190010011196136, "learning_rate": 0.00037150376113421736, "loss": 2.4297, "step": 466300 }, { "epoch": 0.9289932105061839, "grad_norm": 0.233079731464386, "learning_rate": 0.00037138095307711683, "loss": 2.4207, "step": 466310 }, { "epoch": 0.9290131327298228, "grad_norm": 0.21651597321033478, "learning_rate": 0.00037125815427981344, "loss": 2.4288, "step": 466320 }, { "epoch": 0.9290330549534617, "grad_norm": 0.22474640607833862, "learning_rate": 0.0003711353647402127, "loss": 2.4264, "step": 466330 }, { "epoch": 0.9290529771771006, "grad_norm": 0.22314204275608063, "learning_rate": 0.0003710125844562211, "loss": 2.4372, "step": 466340 }, { "epoch": 0.9290728994007396, "grad_norm": 0.21456503868103027, "learning_rate": 0.00037088981342574636, "loss": 2.4206, "step": 466350 }, { "epoch": 0.9290928216243785, "grad_norm": 0.2079564929008484, "learning_rate": 0.0003707670516466959, "loss": 2.4249, "step": 466360 }, { "epoch": 0.9291127438480173, "grad_norm": 0.20560196042060852, "learning_rate": 0.0003706442991169792, "loss": 2.4247, "step": 466370 }, { "epoch": 0.9291326660716562, "grad_norm": 0.19717960059642792, "learning_rate": 0.00037052155583450566, "loss": 2.4264, "step": 466380 }, { "epoch": 0.9291525882952951, "grad_norm": 0.20963259041309357, "learning_rate": 0.0003703988217971859, "loss": 2.4309, "step": 466390 }, { "epoch": 0.9291725105189341, "grad_norm": 0.2006016969680786, "learning_rate": 0.00037027609700293086, "loss": 2.4241, "step": 466400 }, { "epoch": 0.929192432742573, "grad_norm": 0.2209652066230774, "learning_rate": 0.00037015338144965313, "loss": 2.4295, "step": 466410 }, { "epoch": 0.9292123549662119, "grad_norm": 0.21133239567279816, "learning_rate": 0.0003700306751352649, "loss": 2.4282, "step": 466420 }, { "epoch": 0.9292322771898508, "grad_norm": 0.24208898842334747, "learning_rate": 0.0003699079780576804, "loss": 2.4477, "step": 466430 }, { "epoch": 0.9292521994134897, "grad_norm": 0.23073674738407135, "learning_rate": 0.0003697852902148136, "loss": 2.4285, "step": 466440 }, { "epoch": 0.9292721216371287, "grad_norm": 0.20153909921646118, "learning_rate": 0.0003696626116045796, "loss": 2.4296, "step": 466450 }, { "epoch": 0.9292920438607676, "grad_norm": 0.20897963643074036, "learning_rate": 0.0003695399422248946, "loss": 2.4396, "step": 466460 }, { "epoch": 0.9293119660844065, "grad_norm": 0.21135546267032623, "learning_rate": 0.00036941728207367543, "loss": 2.4474, "step": 466470 }, { "epoch": 0.9293318883080454, "grad_norm": 0.22123152017593384, "learning_rate": 0.00036929463114883946, "loss": 2.4472, "step": 466480 }, { "epoch": 0.9293518105316843, "grad_norm": 0.21550938487052917, "learning_rate": 0.00036917198944830476, "loss": 2.4364, "step": 466490 }, { "epoch": 0.9293717327553233, "grad_norm": 0.229400172829628, "learning_rate": 0.0003690493569699909, "loss": 2.4227, "step": 466500 }, { "epoch": 0.9293916549789621, "grad_norm": 0.20281334221363068, "learning_rate": 0.00036892673371181765, "loss": 2.431, "step": 466510 }, { "epoch": 0.929411577202601, "grad_norm": 0.19010324776172638, "learning_rate": 0.00036880411967170535, "loss": 2.4273, "step": 466520 }, { "epoch": 0.9294314994262399, "grad_norm": 0.22753562033176422, "learning_rate": 0.00036868151484757575, "loss": 2.4352, "step": 466530 }, { "epoch": 0.9294514216498789, "grad_norm": 0.212712362408638, "learning_rate": 0.00036855891923735106, "loss": 2.4184, "step": 466540 }, { "epoch": 0.9294713438735178, "grad_norm": 0.22926272451877594, "learning_rate": 0.0003684363328389542, "loss": 2.431, "step": 466550 }, { "epoch": 0.9294912660971567, "grad_norm": 0.18206347525119781, "learning_rate": 0.00036831375565030887, "loss": 2.4306, "step": 466560 }, { "epoch": 0.9295111883207956, "grad_norm": 0.20325680077075958, "learning_rate": 0.0003681911876693398, "loss": 2.4402, "step": 466570 }, { "epoch": 0.9295311105444345, "grad_norm": 0.22757621109485626, "learning_rate": 0.0003680686288939723, "loss": 2.4159, "step": 466580 }, { "epoch": 0.9295510327680735, "grad_norm": 0.20142315328121185, "learning_rate": 0.0003679460793221321, "loss": 2.4285, "step": 466590 }, { "epoch": 0.9295709549917124, "grad_norm": 0.20999205112457275, "learning_rate": 0.0003678235389517468, "loss": 2.4243, "step": 466600 }, { "epoch": 0.9295908772153513, "grad_norm": 0.23756995797157288, "learning_rate": 0.00036770100778074367, "loss": 2.4339, "step": 466610 }, { "epoch": 0.9296107994389902, "grad_norm": 0.19020406901836395, "learning_rate": 0.000367578485807051, "loss": 2.426, "step": 466620 }, { "epoch": 0.9296307216626291, "grad_norm": 0.22633738815784454, "learning_rate": 0.00036745597302859843, "loss": 2.4332, "step": 466630 }, { "epoch": 0.9296506438862681, "grad_norm": 0.20160625874996185, "learning_rate": 0.00036733346944331594, "loss": 2.4359, "step": 466640 }, { "epoch": 0.929670566109907, "grad_norm": 0.20536485314369202, "learning_rate": 0.0003672109750491339, "loss": 2.4252, "step": 466650 }, { "epoch": 0.9296904883335458, "grad_norm": 0.22151878476142883, "learning_rate": 0.00036708848984398434, "loss": 2.4239, "step": 466660 }, { "epoch": 0.9297104105571847, "grad_norm": 0.2077740877866745, "learning_rate": 0.0003669660138257993, "loss": 2.4266, "step": 466670 }, { "epoch": 0.9297303327808236, "grad_norm": 0.20060725510120392, "learning_rate": 0.0003668435469925118, "loss": 2.4261, "step": 466680 }, { "epoch": 0.9297502550044626, "grad_norm": 0.22552616894245148, "learning_rate": 0.0003667210893420561, "loss": 2.4178, "step": 466690 }, { "epoch": 0.9297701772281015, "grad_norm": 0.21504834294319153, "learning_rate": 0.0003665986408723665, "loss": 2.4065, "step": 466700 }, { "epoch": 0.9297900994517404, "grad_norm": 0.21309597790241241, "learning_rate": 0.0003664762015813787, "loss": 2.4162, "step": 466710 }, { "epoch": 0.9298100216753793, "grad_norm": 0.19833987951278687, "learning_rate": 0.0003663537714670291, "loss": 2.4382, "step": 466720 }, { "epoch": 0.9298299438990182, "grad_norm": 0.21713189780712128, "learning_rate": 0.0003662313505272541, "loss": 2.4306, "step": 466730 }, { "epoch": 0.9298498661226572, "grad_norm": 0.2113897055387497, "learning_rate": 0.00036610893875999186, "loss": 2.4187, "step": 466740 }, { "epoch": 0.9298697883462961, "grad_norm": 0.21943047642707825, "learning_rate": 0.0003659865361631809, "loss": 2.4213, "step": 466750 }, { "epoch": 0.929889710569935, "grad_norm": 0.21321739256381989, "learning_rate": 0.0003658641427347602, "loss": 2.4296, "step": 466760 }, { "epoch": 0.9299096327935739, "grad_norm": 0.19550104439258575, "learning_rate": 0.00036574175847267035, "loss": 2.4365, "step": 466770 }, { "epoch": 0.9299295550172128, "grad_norm": 0.2324112504720688, "learning_rate": 0.0003656193833748518, "loss": 2.4194, "step": 466780 }, { "epoch": 0.9299494772408518, "grad_norm": 0.20682960748672485, "learning_rate": 0.000365497017439246, "loss": 2.4275, "step": 466790 }, { "epoch": 0.9299693994644906, "grad_norm": 0.21514536440372467, "learning_rate": 0.000365374660663796, "loss": 2.4391, "step": 466800 }, { "epoch": 0.9299893216881295, "grad_norm": 0.21169044077396393, "learning_rate": 0.00036525231304644444, "loss": 2.4286, "step": 466810 }, { "epoch": 0.9300092439117684, "grad_norm": 0.2026451826095581, "learning_rate": 0.0003651299745851353, "loss": 2.4119, "step": 466820 }, { "epoch": 0.9300291661354073, "grad_norm": 0.22813156247138977, "learning_rate": 0.0003650076452778135, "loss": 2.4269, "step": 466830 }, { "epoch": 0.9300490883590463, "grad_norm": 0.21216805279254913, "learning_rate": 0.00036488532512242444, "loss": 2.4213, "step": 466840 }, { "epoch": 0.9300690105826852, "grad_norm": 0.2156534194946289, "learning_rate": 0.00036476301411691406, "loss": 2.4377, "step": 466850 }, { "epoch": 0.9300889328063241, "grad_norm": 0.24698233604431152, "learning_rate": 0.0003646407122592297, "loss": 2.434, "step": 466860 }, { "epoch": 0.930108855029963, "grad_norm": 0.21267804503440857, "learning_rate": 0.0003645184195473188, "loss": 2.4371, "step": 466870 }, { "epoch": 0.930128777253602, "grad_norm": 0.23376335203647614, "learning_rate": 0.00036439613597913037, "loss": 2.4106, "step": 466880 }, { "epoch": 0.9301486994772409, "grad_norm": 0.2157755345106125, "learning_rate": 0.0003642738615526131, "loss": 2.4241, "step": 466890 }, { "epoch": 0.9301686217008798, "grad_norm": 0.20471902191638947, "learning_rate": 0.0003641515962657176, "loss": 2.4314, "step": 466900 }, { "epoch": 0.9301885439245187, "grad_norm": 0.1981128603219986, "learning_rate": 0.00036402934011639434, "loss": 2.4125, "step": 466910 }, { "epoch": 0.9302084661481576, "grad_norm": 0.20391887426376343, "learning_rate": 0.0003639070931025952, "loss": 2.4232, "step": 466920 }, { "epoch": 0.9302283883717966, "grad_norm": 0.20959696173667908, "learning_rate": 0.0003637848552222725, "loss": 2.4364, "step": 466930 }, { "epoch": 0.9302483105954354, "grad_norm": 0.19680187106132507, "learning_rate": 0.0003636626264733791, "loss": 2.4415, "step": 466940 }, { "epoch": 0.9302682328190743, "grad_norm": 0.2234765589237213, "learning_rate": 0.00036354040685386926, "loss": 2.435, "step": 466950 }, { "epoch": 0.9302881550427132, "grad_norm": 0.2154797613620758, "learning_rate": 0.00036341819636169736, "loss": 2.4217, "step": 466960 }, { "epoch": 0.9303080772663521, "grad_norm": 0.22891728579998016, "learning_rate": 0.00036329599499481893, "loss": 2.4423, "step": 466970 }, { "epoch": 0.9303279994899911, "grad_norm": 0.21155911684036255, "learning_rate": 0.0003631738027511902, "loss": 2.4342, "step": 466980 }, { "epoch": 0.93034792171363, "grad_norm": 0.185174822807312, "learning_rate": 0.00036305161962876787, "loss": 2.425, "step": 466990 }, { "epoch": 0.9303678439372689, "grad_norm": 0.18817178905010223, "learning_rate": 0.0003629294456255099, "loss": 2.4146, "step": 467000 }, { "epoch": 0.9303877661609078, "grad_norm": 0.21817399561405182, "learning_rate": 0.000362807280739375, "loss": 2.4375, "step": 467010 }, { "epoch": 0.9304076883845467, "grad_norm": 0.2370581477880478, "learning_rate": 0.00036268512496832184, "loss": 2.4394, "step": 467020 }, { "epoch": 0.9304276106081857, "grad_norm": 0.23864144086837769, "learning_rate": 0.0003625629783103106, "loss": 2.4389, "step": 467030 }, { "epoch": 0.9304475328318246, "grad_norm": 0.2027616798877716, "learning_rate": 0.0003624408407633024, "loss": 2.4199, "step": 467040 }, { "epoch": 0.9304674550554635, "grad_norm": 0.21921372413635254, "learning_rate": 0.0003623187123252583, "loss": 2.4183, "step": 467050 }, { "epoch": 0.9304873772791024, "grad_norm": 0.2106579691171646, "learning_rate": 0.0003621965929941409, "loss": 2.4348, "step": 467060 }, { "epoch": 0.9305072995027412, "grad_norm": 0.22388695180416107, "learning_rate": 0.000362074482767913, "loss": 2.4363, "step": 467070 }, { "epoch": 0.9305272217263802, "grad_norm": 0.21752828359603882, "learning_rate": 0.0003619523816445385, "loss": 2.4213, "step": 467080 }, { "epoch": 0.9305471439500191, "grad_norm": 0.22083447873592377, "learning_rate": 0.00036183028962198183, "loss": 2.4195, "step": 467090 }, { "epoch": 0.930567066173658, "grad_norm": 0.1906072199344635, "learning_rate": 0.0003617082066982085, "loss": 2.4341, "step": 467100 }, { "epoch": 0.9305869883972969, "grad_norm": 0.22582148015499115, "learning_rate": 0.0003615861328711845, "loss": 2.4274, "step": 467110 }, { "epoch": 0.9306069106209358, "grad_norm": 0.2073034942150116, "learning_rate": 0.0003614640681388766, "loss": 2.4302, "step": 467120 }, { "epoch": 0.9306268328445748, "grad_norm": 0.20862096548080444, "learning_rate": 0.00036134201249925257, "loss": 2.4281, "step": 467130 }, { "epoch": 0.9306467550682137, "grad_norm": 0.21540698409080505, "learning_rate": 0.0003612199659502804, "loss": 2.4252, "step": 467140 }, { "epoch": 0.9306666772918526, "grad_norm": 0.20939627289772034, "learning_rate": 0.00036109792848992964, "loss": 2.4236, "step": 467150 }, { "epoch": 0.9306865995154915, "grad_norm": 0.20564553141593933, "learning_rate": 0.0003609759001161699, "loss": 2.4219, "step": 467160 }, { "epoch": 0.9307065217391305, "grad_norm": 0.19177784025669098, "learning_rate": 0.0003608538808269717, "loss": 2.4183, "step": 467170 }, { "epoch": 0.9307264439627694, "grad_norm": 0.19788582623004913, "learning_rate": 0.0003607318706203064, "loss": 2.4117, "step": 467180 }, { "epoch": 0.9307463661864083, "grad_norm": 0.24174658954143524, "learning_rate": 0.0003606098694941462, "loss": 2.4255, "step": 467190 }, { "epoch": 0.9307662884100472, "grad_norm": 0.21979811787605286, "learning_rate": 0.00036048787744646417, "loss": 2.4215, "step": 467200 }, { "epoch": 0.930786210633686, "grad_norm": 0.22226482629776, "learning_rate": 0.0003603658944752337, "loss": 2.4244, "step": 467210 }, { "epoch": 0.930806132857325, "grad_norm": 0.20411254465579987, "learning_rate": 0.00036024392057842935, "loss": 2.4201, "step": 467220 }, { "epoch": 0.9308260550809639, "grad_norm": 0.20361760258674622, "learning_rate": 0.0003601219557540263, "loss": 2.4422, "step": 467230 }, { "epoch": 0.9308459773046028, "grad_norm": 0.21267448365688324, "learning_rate": 0.00036000000000000013, "loss": 2.426, "step": 467240 }, { "epoch": 0.9308658995282417, "grad_norm": 0.1981126368045807, "learning_rate": 0.00035987805331432756, "loss": 2.4484, "step": 467250 }, { "epoch": 0.9308858217518806, "grad_norm": 0.23946315050125122, "learning_rate": 0.000359756115694986, "loss": 2.4309, "step": 467260 }, { "epoch": 0.9309057439755196, "grad_norm": 0.21486155688762665, "learning_rate": 0.000359634187139954, "loss": 2.4353, "step": 467270 }, { "epoch": 0.9309256661991585, "grad_norm": 0.2146676629781723, "learning_rate": 0.00035951226764720977, "loss": 2.4387, "step": 467280 }, { "epoch": 0.9309455884227974, "grad_norm": 0.2094220519065857, "learning_rate": 0.00035939035721473345, "loss": 2.4167, "step": 467290 }, { "epoch": 0.9309655106464363, "grad_norm": 0.19678433239459991, "learning_rate": 0.00035926845584050525, "loss": 2.4204, "step": 467300 }, { "epoch": 0.9309854328700752, "grad_norm": 0.2103509157896042, "learning_rate": 0.0003591465635225064, "loss": 2.4309, "step": 467310 }, { "epoch": 0.9310053550937142, "grad_norm": 0.22802923619747162, "learning_rate": 0.0003590246802587187, "loss": 2.4395, "step": 467320 }, { "epoch": 0.9310252773173531, "grad_norm": 0.21178027987480164, "learning_rate": 0.0003589028060471251, "loss": 2.4191, "step": 467330 }, { "epoch": 0.931045199540992, "grad_norm": 0.20118945837020874, "learning_rate": 0.00035878094088570854, "loss": 2.4227, "step": 467340 }, { "epoch": 0.9310651217646309, "grad_norm": 0.19861780107021332, "learning_rate": 0.0003586590847724536, "loss": 2.4207, "step": 467350 }, { "epoch": 0.9310850439882697, "grad_norm": 0.20313917100429535, "learning_rate": 0.00035853723770534485, "loss": 2.4473, "step": 467360 }, { "epoch": 0.9311049662119087, "grad_norm": 0.23142880201339722, "learning_rate": 0.00035841539968236804, "loss": 2.4051, "step": 467370 }, { "epoch": 0.9311248884355476, "grad_norm": 0.21729055047035217, "learning_rate": 0.00035829357070150936, "loss": 2.4211, "step": 467380 }, { "epoch": 0.9311448106591865, "grad_norm": 0.2323661893606186, "learning_rate": 0.00035817175076075625, "loss": 2.4214, "step": 467390 }, { "epoch": 0.9311647328828254, "grad_norm": 0.20704074203968048, "learning_rate": 0.00035804993985809655, "loss": 2.4351, "step": 467400 }, { "epoch": 0.9311846551064643, "grad_norm": 0.20253613591194153, "learning_rate": 0.0003579281379915189, "loss": 2.4307, "step": 467410 }, { "epoch": 0.9312045773301033, "grad_norm": 0.204005166888237, "learning_rate": 0.00035780634515901255, "loss": 2.418, "step": 467420 }, { "epoch": 0.9312244995537422, "grad_norm": 0.206811785697937, "learning_rate": 0.00035768456135856774, "loss": 2.4441, "step": 467430 }, { "epoch": 0.9312444217773811, "grad_norm": 0.2050262689590454, "learning_rate": 0.0003575627865881752, "loss": 2.4188, "step": 467440 }, { "epoch": 0.93126434400102, "grad_norm": 0.2165483832359314, "learning_rate": 0.00035744102084582676, "loss": 2.4215, "step": 467450 }, { "epoch": 0.931284266224659, "grad_norm": 0.2085740864276886, "learning_rate": 0.0003573192641295144, "loss": 2.4379, "step": 467460 }, { "epoch": 0.9313041884482979, "grad_norm": 0.20969128608703613, "learning_rate": 0.00035719751643723165, "loss": 2.4309, "step": 467470 }, { "epoch": 0.9313241106719368, "grad_norm": 0.22816862165927887, "learning_rate": 0.00035707577776697197, "loss": 2.4268, "step": 467480 }, { "epoch": 0.9313440328955757, "grad_norm": 0.21425677835941315, "learning_rate": 0.00035695404811673015, "loss": 2.4284, "step": 467490 }, { "epoch": 0.9313639551192145, "grad_norm": 0.20922698080539703, "learning_rate": 0.00035683232748450155, "loss": 2.4246, "step": 467500 }, { "epoch": 0.9313838773428536, "grad_norm": 0.19500510394573212, "learning_rate": 0.00035671061586828225, "loss": 2.4207, "step": 467510 }, { "epoch": 0.9314037995664924, "grad_norm": 0.19155673682689667, "learning_rate": 0.0003565889132660689, "loss": 2.43, "step": 467520 }, { "epoch": 0.9314237217901313, "grad_norm": 0.20839481055736542, "learning_rate": 0.0003564672196758594, "loss": 2.4364, "step": 467530 }, { "epoch": 0.9314436440137702, "grad_norm": 0.218321293592453, "learning_rate": 0.0003563455350956515, "loss": 2.4287, "step": 467540 }, { "epoch": 0.9314635662374091, "grad_norm": 0.21696974337100983, "learning_rate": 0.00035622385952344484, "loss": 2.4275, "step": 467550 }, { "epoch": 0.9314834884610481, "grad_norm": 0.20965996384620667, "learning_rate": 0.0003561021929572386, "loss": 2.4246, "step": 467560 }, { "epoch": 0.931503410684687, "grad_norm": 0.2005995661020279, "learning_rate": 0.00035598053539503385, "loss": 2.4189, "step": 467570 }, { "epoch": 0.9315233329083259, "grad_norm": 0.20885179936885834, "learning_rate": 0.00035585888683483134, "loss": 2.4302, "step": 467580 }, { "epoch": 0.9315432551319648, "grad_norm": 0.22171951830387115, "learning_rate": 0.0003557372472746334, "loss": 2.4312, "step": 467590 }, { "epoch": 0.9315631773556037, "grad_norm": 0.2178008109331131, "learning_rate": 0.0003556156167124427, "loss": 2.4053, "step": 467600 }, { "epoch": 0.9315830995792427, "grad_norm": 0.21408918499946594, "learning_rate": 0.0003554939951462628, "loss": 2.4213, "step": 467610 }, { "epoch": 0.9316030218028816, "grad_norm": 0.6433497667312622, "learning_rate": 0.00035537238257409774, "loss": 2.4266, "step": 467620 }, { "epoch": 0.9316229440265205, "grad_norm": 0.19916149973869324, "learning_rate": 0.0003552507789939525, "loss": 2.4073, "step": 467630 }, { "epoch": 0.9316428662501594, "grad_norm": 0.20469783246517181, "learning_rate": 0.0003551291844038329, "loss": 2.4289, "step": 467640 }, { "epoch": 0.9316627884737982, "grad_norm": 0.2145426869392395, "learning_rate": 0.0003550075988017452, "loss": 2.4374, "step": 467650 }, { "epoch": 0.9316827106974372, "grad_norm": 0.20719699561595917, "learning_rate": 0.00035488602218569664, "loss": 2.4293, "step": 467660 }, { "epoch": 0.9317026329210761, "grad_norm": 0.21487650275230408, "learning_rate": 0.00035476445455369523, "loss": 2.4254, "step": 467670 }, { "epoch": 0.931722555144715, "grad_norm": 0.2432635873556137, "learning_rate": 0.0003546428959037495, "loss": 2.4404, "step": 467680 }, { "epoch": 0.9317424773683539, "grad_norm": 0.21384747326374054, "learning_rate": 0.0003545213462338688, "loss": 2.4263, "step": 467690 }, { "epoch": 0.9317623995919928, "grad_norm": 0.21353931725025177, "learning_rate": 0.000354399805542063, "loss": 2.4161, "step": 467700 }, { "epoch": 0.9317823218156318, "grad_norm": 0.21593362092971802, "learning_rate": 0.00035427827382634324, "loss": 2.432, "step": 467710 }, { "epoch": 0.9318022440392707, "grad_norm": 0.20977286994457245, "learning_rate": 0.0003541567510847212, "loss": 2.4247, "step": 467720 }, { "epoch": 0.9318221662629096, "grad_norm": 0.21511539816856384, "learning_rate": 0.00035403523731520914, "loss": 2.4202, "step": 467730 }, { "epoch": 0.9318420884865485, "grad_norm": 0.223983034491539, "learning_rate": 0.00035391373251581973, "loss": 2.4202, "step": 467740 }, { "epoch": 0.9318620107101875, "grad_norm": 0.22593140602111816, "learning_rate": 0.00035379223668456714, "loss": 2.4327, "step": 467750 }, { "epoch": 0.9318819329338264, "grad_norm": 0.19725921750068665, "learning_rate": 0.0003536707498194658, "loss": 2.4334, "step": 467760 }, { "epoch": 0.9319018551574653, "grad_norm": 0.21732032299041748, "learning_rate": 0.0003535492719185309, "loss": 2.4297, "step": 467770 }, { "epoch": 0.9319217773811042, "grad_norm": 0.20013362169265747, "learning_rate": 0.0003534278029797784, "loss": 2.4215, "step": 467780 }, { "epoch": 0.931941699604743, "grad_norm": 0.22033582627773285, "learning_rate": 0.00035330634300122487, "loss": 2.4306, "step": 467790 }, { "epoch": 0.931961621828382, "grad_norm": 0.21531374752521515, "learning_rate": 0.0003531848919808878, "loss": 2.4191, "step": 467800 }, { "epoch": 0.9319815440520209, "grad_norm": 0.20357970893383026, "learning_rate": 0.0003530634499167855, "loss": 2.4317, "step": 467810 }, { "epoch": 0.9320014662756598, "grad_norm": 0.23837564885616302, "learning_rate": 0.00035294201680693705, "loss": 2.4156, "step": 467820 }, { "epoch": 0.9320213884992987, "grad_norm": 0.21137577295303345, "learning_rate": 0.0003528205926493617, "loss": 2.4326, "step": 467830 }, { "epoch": 0.9320413107229376, "grad_norm": 0.2074478715658188, "learning_rate": 0.00035269917744208, "loss": 2.4298, "step": 467840 }, { "epoch": 0.9320612329465766, "grad_norm": 0.22132691740989685, "learning_rate": 0.00035257777118311283, "loss": 2.4278, "step": 467850 }, { "epoch": 0.9320811551702155, "grad_norm": 0.19239282608032227, "learning_rate": 0.0003524563738704822, "loss": 2.4313, "step": 467860 }, { "epoch": 0.9321010773938544, "grad_norm": 0.3934701383113861, "learning_rate": 0.00035233498550221086, "loss": 2.4147, "step": 467870 }, { "epoch": 0.9321209996174933, "grad_norm": 0.2173759490251541, "learning_rate": 0.0003522136060763217, "loss": 2.4421, "step": 467880 }, { "epoch": 0.9321409218411322, "grad_norm": 0.220638245344162, "learning_rate": 0.0003520922355908387, "loss": 2.4319, "step": 467890 }, { "epoch": 0.9321608440647712, "grad_norm": 0.23044465482234955, "learning_rate": 0.0003519708740437868, "loss": 2.4197, "step": 467900 }, { "epoch": 0.9321807662884101, "grad_norm": 0.2218685895204544, "learning_rate": 0.0003518495214331914, "loss": 2.4285, "step": 467910 }, { "epoch": 0.932200688512049, "grad_norm": 0.23149283230304718, "learning_rate": 0.0003517281777570789, "loss": 2.4408, "step": 467920 }, { "epoch": 0.9322206107356878, "grad_norm": 0.21460384130477905, "learning_rate": 0.0003516068430134758, "loss": 2.4178, "step": 467930 }, { "epoch": 0.9322405329593267, "grad_norm": 0.21648263931274414, "learning_rate": 0.0003514855172004099, "loss": 2.4111, "step": 467940 }, { "epoch": 0.9322604551829657, "grad_norm": 0.20769262313842773, "learning_rate": 0.00035136420031590964, "loss": 2.408, "step": 467950 }, { "epoch": 0.9322803774066046, "grad_norm": 0.21033261716365814, "learning_rate": 0.0003512428923580042, "loss": 2.4165, "step": 467960 }, { "epoch": 0.9323002996302435, "grad_norm": 0.21417184174060822, "learning_rate": 0.00035112159332472315, "loss": 2.433, "step": 467970 }, { "epoch": 0.9323202218538824, "grad_norm": 0.22698727250099182, "learning_rate": 0.00035100030321409713, "loss": 2.4364, "step": 467980 }, { "epoch": 0.9323401440775213, "grad_norm": 0.20658107101917267, "learning_rate": 0.00035087902202415736, "loss": 2.4424, "step": 467990 }, { "epoch": 0.9323600663011603, "grad_norm": 0.26082393527030945, "learning_rate": 0.0003507577497529357, "loss": 2.4328, "step": 468000 }, { "epoch": 0.9323799885247992, "grad_norm": 0.21928654611110687, "learning_rate": 0.0003506364863984652, "loss": 2.4226, "step": 468010 }, { "epoch": 0.9323999107484381, "grad_norm": 0.1927662491798401, "learning_rate": 0.0003505152319587792, "loss": 2.4317, "step": 468020 }, { "epoch": 0.932419832972077, "grad_norm": 0.2043161541223526, "learning_rate": 0.0003503939864319117, "loss": 2.4089, "step": 468030 }, { "epoch": 0.932439755195716, "grad_norm": 0.2313447743654251, "learning_rate": 0.0003502727498158975, "loss": 2.4106, "step": 468040 }, { "epoch": 0.9324596774193549, "grad_norm": 0.22986896336078644, "learning_rate": 0.00035015152210877255, "loss": 2.4267, "step": 468050 }, { "epoch": 0.9324795996429938, "grad_norm": 0.20370015501976013, "learning_rate": 0.000350030303308573, "loss": 2.4328, "step": 468060 }, { "epoch": 0.9324995218666327, "grad_norm": 0.2173384577035904, "learning_rate": 0.0003499090934133355, "loss": 2.4196, "step": 468070 }, { "epoch": 0.9325194440902715, "grad_norm": 0.22347421944141388, "learning_rate": 0.00034978789242109867, "loss": 2.435, "step": 468080 }, { "epoch": 0.9325393663139105, "grad_norm": 0.21665796637535095, "learning_rate": 0.00034966670032990013, "loss": 2.4402, "step": 468090 }, { "epoch": 0.9325592885375494, "grad_norm": 0.2085593342781067, "learning_rate": 0.00034954551713777926, "loss": 2.4234, "step": 468100 }, { "epoch": 0.9325792107611883, "grad_norm": 0.19867826998233795, "learning_rate": 0.00034942434284277656, "loss": 2.439, "step": 468110 }, { "epoch": 0.9325991329848272, "grad_norm": 0.2539226710796356, "learning_rate": 0.00034930317744293207, "loss": 2.4202, "step": 468120 }, { "epoch": 0.9326190552084661, "grad_norm": 0.23429863154888153, "learning_rate": 0.0003491820209362875, "loss": 2.4292, "step": 468130 }, { "epoch": 0.9326389774321051, "grad_norm": 0.1998261958360672, "learning_rate": 0.0003490608733208847, "loss": 2.4116, "step": 468140 }, { "epoch": 0.932658899655744, "grad_norm": 0.20637521147727966, "learning_rate": 0.00034893973459476646, "loss": 2.4144, "step": 468150 }, { "epoch": 0.9326788218793829, "grad_norm": 0.19783912599086761, "learning_rate": 0.0003488186047559765, "loss": 2.4266, "step": 468160 }, { "epoch": 0.9326987441030218, "grad_norm": 0.21472811698913574, "learning_rate": 0.00034869748380255897, "loss": 2.439, "step": 468170 }, { "epoch": 0.9327186663266607, "grad_norm": 0.20298658311367035, "learning_rate": 0.00034857637173255876, "loss": 2.425, "step": 468180 }, { "epoch": 0.9327385885502997, "grad_norm": 0.2051638513803482, "learning_rate": 0.00034845526854402167, "loss": 2.4302, "step": 468190 }, { "epoch": 0.9327585107739386, "grad_norm": 0.19949664175510406, "learning_rate": 0.0003483341742349937, "loss": 2.4335, "step": 468200 }, { "epoch": 0.9327784329975775, "grad_norm": 0.21994587779045105, "learning_rate": 0.0003482130888035224, "loss": 2.4179, "step": 468210 }, { "epoch": 0.9327983552212163, "grad_norm": 0.20744749903678894, "learning_rate": 0.0003480920122476554, "loss": 2.4205, "step": 468220 }, { "epoch": 0.9328182774448552, "grad_norm": 0.1901818811893463, "learning_rate": 0.00034797094456544134, "loss": 2.4313, "step": 468230 }, { "epoch": 0.9328381996684942, "grad_norm": 0.21004067361354828, "learning_rate": 0.00034784988575492927, "loss": 2.4148, "step": 468240 }, { "epoch": 0.9328581218921331, "grad_norm": 0.22077934443950653, "learning_rate": 0.00034772883581416945, "loss": 2.4266, "step": 468250 }, { "epoch": 0.932878044115772, "grad_norm": 0.20132441818714142, "learning_rate": 0.0003476077947412122, "loss": 2.4339, "step": 468260 }, { "epoch": 0.9328979663394109, "grad_norm": 0.19953399896621704, "learning_rate": 0.00034748676253410914, "loss": 2.4357, "step": 468270 }, { "epoch": 0.9329178885630498, "grad_norm": 0.21682430803775787, "learning_rate": 0.0003473657391909122, "loss": 2.4342, "step": 468280 }, { "epoch": 0.9329378107866888, "grad_norm": 0.24584119021892548, "learning_rate": 0.0003472447247096744, "loss": 2.4399, "step": 468290 }, { "epoch": 0.9329577330103277, "grad_norm": 0.2191980630159378, "learning_rate": 0.00034712371908844907, "loss": 2.4386, "step": 468300 }, { "epoch": 0.9329776552339666, "grad_norm": 0.22002138197422028, "learning_rate": 0.00034700272232529074, "loss": 2.414, "step": 468310 }, { "epoch": 0.9329975774576055, "grad_norm": 0.23746638000011444, "learning_rate": 0.00034688173441825397, "loss": 2.4127, "step": 468320 }, { "epoch": 0.9330174996812444, "grad_norm": 0.20812875032424927, "learning_rate": 0.00034676075536539485, "loss": 2.4196, "step": 468330 }, { "epoch": 0.9330374219048834, "grad_norm": 0.2137971818447113, "learning_rate": 0.00034663978516476934, "loss": 2.4278, "step": 468340 }, { "epoch": 0.9330573441285223, "grad_norm": 0.21808673441410065, "learning_rate": 0.0003465188238144348, "loss": 2.428, "step": 468350 }, { "epoch": 0.9330772663521611, "grad_norm": 0.20671817660331726, "learning_rate": 0.0003463978713124489, "loss": 2.4266, "step": 468360 }, { "epoch": 0.9330971885758, "grad_norm": 0.2062802016735077, "learning_rate": 0.00034627692765687044, "loss": 2.4187, "step": 468370 }, { "epoch": 0.933117110799439, "grad_norm": 0.23584413528442383, "learning_rate": 0.00034615599284575825, "loss": 2.4194, "step": 468380 }, { "epoch": 0.9331370330230779, "grad_norm": 0.22129440307617188, "learning_rate": 0.00034603506687717233, "loss": 2.4178, "step": 468390 }, { "epoch": 0.9331569552467168, "grad_norm": 0.22504673898220062, "learning_rate": 0.00034591414974917336, "loss": 2.4187, "step": 468400 }, { "epoch": 0.9331768774703557, "grad_norm": 0.20745405554771423, "learning_rate": 0.00034579324145982284, "loss": 2.4408, "step": 468410 }, { "epoch": 0.9331967996939946, "grad_norm": 0.21614761650562286, "learning_rate": 0.00034567234200718255, "loss": 2.4061, "step": 468420 }, { "epoch": 0.9332167219176336, "grad_norm": 0.21033211052417755, "learning_rate": 0.00034555145138931567, "loss": 2.4135, "step": 468430 }, { "epoch": 0.9332366441412725, "grad_norm": 0.20241722464561462, "learning_rate": 0.0003454305696042852, "loss": 2.4198, "step": 468440 }, { "epoch": 0.9332565663649114, "grad_norm": 0.21787463128566742, "learning_rate": 0.00034530969665015566, "loss": 2.4182, "step": 468450 }, { "epoch": 0.9332764885885503, "grad_norm": 0.22803668677806854, "learning_rate": 0.00034518883252499167, "loss": 2.4038, "step": 468460 }, { "epoch": 0.9332964108121892, "grad_norm": 0.22990351915359497, "learning_rate": 0.000345067977226859, "loss": 2.4176, "step": 468470 }, { "epoch": 0.9333163330358282, "grad_norm": 0.21808505058288574, "learning_rate": 0.0003449471307538239, "loss": 2.4185, "step": 468480 }, { "epoch": 0.9333362552594671, "grad_norm": 0.21616461873054504, "learning_rate": 0.0003448262931039534, "loss": 2.4162, "step": 468490 }, { "epoch": 0.933356177483106, "grad_norm": 0.2004321813583374, "learning_rate": 0.00034470546427531513, "loss": 2.4105, "step": 468500 }, { "epoch": 0.9333760997067448, "grad_norm": 0.20578190684318542, "learning_rate": 0.0003445846442659775, "loss": 2.4288, "step": 468510 }, { "epoch": 0.9333960219303837, "grad_norm": 0.22294127941131592, "learning_rate": 0.00034446383307400977, "loss": 2.4263, "step": 468520 }, { "epoch": 0.9334159441540227, "grad_norm": 0.2063082605600357, "learning_rate": 0.00034434303069748177, "loss": 2.421, "step": 468530 }, { "epoch": 0.9334358663776616, "grad_norm": 0.21107061207294464, "learning_rate": 0.00034422223713446363, "loss": 2.4271, "step": 468540 }, { "epoch": 0.9334557886013005, "grad_norm": 0.2177913635969162, "learning_rate": 0.0003441014523830273, "loss": 2.4183, "step": 468550 }, { "epoch": 0.9334757108249394, "grad_norm": 0.20932313799858093, "learning_rate": 0.00034398067644124386, "loss": 2.4343, "step": 468560 }, { "epoch": 0.9334956330485783, "grad_norm": 0.19882792234420776, "learning_rate": 0.0003438599093071868, "loss": 2.435, "step": 468570 }, { "epoch": 0.9335155552722173, "grad_norm": 0.19978491961956024, "learning_rate": 0.0003437391509789287, "loss": 2.4388, "step": 468580 }, { "epoch": 0.9335354774958562, "grad_norm": 0.21871957182884216, "learning_rate": 0.00034361840145454405, "loss": 2.4257, "step": 468590 }, { "epoch": 0.9335553997194951, "grad_norm": 0.22576382756233215, "learning_rate": 0.00034349766073210744, "loss": 2.4182, "step": 468600 }, { "epoch": 0.933575321943134, "grad_norm": 0.21604226529598236, "learning_rate": 0.0003433769288096944, "loss": 2.4354, "step": 468610 }, { "epoch": 0.9335952441667729, "grad_norm": 0.21136917173862457, "learning_rate": 0.00034325620568538117, "loss": 2.4159, "step": 468620 }, { "epoch": 0.9336151663904119, "grad_norm": 0.2144293636083603, "learning_rate": 0.00034313549135724444, "loss": 2.4349, "step": 468630 }, { "epoch": 0.9336350886140508, "grad_norm": 0.25427237153053284, "learning_rate": 0.00034301478582336167, "loss": 2.4123, "step": 468640 }, { "epoch": 0.9336550108376896, "grad_norm": 0.23287202417850494, "learning_rate": 0.0003428940890818113, "loss": 2.4143, "step": 468650 }, { "epoch": 0.9336749330613285, "grad_norm": 0.1952780932188034, "learning_rate": 0.00034277340113067224, "loss": 2.4323, "step": 468660 }, { "epoch": 0.9336948552849675, "grad_norm": 0.24696983397006989, "learning_rate": 0.000342652721968024, "loss": 2.4234, "step": 468670 }, { "epoch": 0.9337147775086064, "grad_norm": 0.22183020412921906, "learning_rate": 0.00034253205159194723, "loss": 2.4075, "step": 468680 }, { "epoch": 0.9337346997322453, "grad_norm": 0.23260033130645752, "learning_rate": 0.0003424113900005226, "loss": 2.4224, "step": 468690 }, { "epoch": 0.9337546219558842, "grad_norm": 0.21600794792175293, "learning_rate": 0.0003422907371918318, "loss": 2.4177, "step": 468700 }, { "epoch": 0.9337745441795231, "grad_norm": 0.22145159542560577, "learning_rate": 0.00034217009316395776, "loss": 2.4215, "step": 468710 }, { "epoch": 0.9337944664031621, "grad_norm": 0.19378508627414703, "learning_rate": 0.0003420494579149835, "loss": 2.4351, "step": 468720 }, { "epoch": 0.933814388626801, "grad_norm": 0.2163695991039276, "learning_rate": 0.0003419288314429927, "loss": 2.4086, "step": 468730 }, { "epoch": 0.9338343108504399, "grad_norm": 0.2027459591627121, "learning_rate": 0.00034180821374606965, "loss": 2.4244, "step": 468740 }, { "epoch": 0.9338542330740788, "grad_norm": 0.22320425510406494, "learning_rate": 0.0003416876048223001, "loss": 2.4176, "step": 468750 }, { "epoch": 0.9338741552977177, "grad_norm": 0.2284315526485443, "learning_rate": 0.00034156700466976965, "loss": 2.421, "step": 468760 }, { "epoch": 0.9338940775213567, "grad_norm": 0.2139403373003006, "learning_rate": 0.0003414464132865649, "loss": 2.4187, "step": 468770 }, { "epoch": 0.9339139997449956, "grad_norm": 0.24192574620246887, "learning_rate": 0.0003413258306707734, "loss": 2.4127, "step": 468780 }, { "epoch": 0.9339339219686345, "grad_norm": 0.20034582912921906, "learning_rate": 0.0003412052568204831, "loss": 2.4443, "step": 468790 }, { "epoch": 0.9339538441922733, "grad_norm": 0.21760062873363495, "learning_rate": 0.0003410846917337824, "loss": 2.4195, "step": 468800 }, { "epoch": 0.9339737664159122, "grad_norm": 0.2089947909116745, "learning_rate": 0.00034096413540876094, "loss": 2.4318, "step": 468810 }, { "epoch": 0.9339936886395512, "grad_norm": 0.23104865849018097, "learning_rate": 0.000340843587843509, "loss": 2.435, "step": 468820 }, { "epoch": 0.9340136108631901, "grad_norm": 0.21881896257400513, "learning_rate": 0.00034072304903611704, "loss": 2.4133, "step": 468830 }, { "epoch": 0.934033533086829, "grad_norm": 0.21540567278862, "learning_rate": 0.00034060251898467686, "loss": 2.4302, "step": 468840 }, { "epoch": 0.9340534553104679, "grad_norm": 0.21018607914447784, "learning_rate": 0.0003404819976872804, "loss": 2.4355, "step": 468850 }, { "epoch": 0.9340733775341068, "grad_norm": 0.43189021944999695, "learning_rate": 0.0003403614851420205, "loss": 2.4386, "step": 468860 }, { "epoch": 0.9340932997577458, "grad_norm": 0.20325212180614471, "learning_rate": 0.0003402409813469909, "loss": 2.4414, "step": 468870 }, { "epoch": 0.9341132219813847, "grad_norm": 0.19114620983600616, "learning_rate": 0.0003401204863002858, "loss": 2.4145, "step": 468880 }, { "epoch": 0.9341331442050236, "grad_norm": 0.21337701380252838, "learning_rate": 0.0003400000000000001, "loss": 2.4361, "step": 468890 }, { "epoch": 0.9341530664286625, "grad_norm": 0.2130245715379715, "learning_rate": 0.0003398795224442295, "loss": 2.4288, "step": 468900 }, { "epoch": 0.9341729886523014, "grad_norm": 0.20837832987308502, "learning_rate": 0.00033975905363106993, "loss": 2.4371, "step": 468910 }, { "epoch": 0.9341929108759404, "grad_norm": 0.2174045890569687, "learning_rate": 0.00033963859355861926, "loss": 2.4327, "step": 468920 }, { "epoch": 0.9342128330995793, "grad_norm": 0.22027720510959625, "learning_rate": 0.00033951814222497444, "loss": 2.4181, "step": 468930 }, { "epoch": 0.9342327553232181, "grad_norm": 0.2826063930988312, "learning_rate": 0.0003393976996282342, "loss": 2.4303, "step": 468940 }, { "epoch": 0.934252677546857, "grad_norm": 0.21415923535823822, "learning_rate": 0.00033927726576649776, "loss": 2.4407, "step": 468950 }, { "epoch": 0.934272599770496, "grad_norm": 0.22546325623989105, "learning_rate": 0.00033915684063786446, "loss": 2.4259, "step": 468960 }, { "epoch": 0.9342925219941349, "grad_norm": 0.21346038579940796, "learning_rate": 0.0003390364242404351, "loss": 2.4286, "step": 468970 }, { "epoch": 0.9343124442177738, "grad_norm": 0.20939598977565765, "learning_rate": 0.0003389160165723106, "loss": 2.4138, "step": 468980 }, { "epoch": 0.9343323664414127, "grad_norm": 0.21595768630504608, "learning_rate": 0.0003387956176315932, "loss": 2.4246, "step": 468990 }, { "epoch": 0.9343522886650516, "grad_norm": 0.22123707830905914, "learning_rate": 0.0003386752274163851, "loss": 2.4111, "step": 469000 }, { "epoch": 0.9343722108886906, "grad_norm": 0.2213280200958252, "learning_rate": 0.00033855484592478933, "loss": 2.4341, "step": 469010 }, { "epoch": 0.9343921331123295, "grad_norm": 0.21006116271018982, "learning_rate": 0.00033843447315491047, "loss": 2.4214, "step": 469020 }, { "epoch": 0.9344120553359684, "grad_norm": 0.2115011364221573, "learning_rate": 0.00033831410910485247, "loss": 2.4226, "step": 469030 }, { "epoch": 0.9344319775596073, "grad_norm": 0.21450649201869965, "learning_rate": 0.00033819375377272087, "loss": 2.4173, "step": 469040 }, { "epoch": 0.9344518997832462, "grad_norm": 0.2159159630537033, "learning_rate": 0.0003380734071566218, "loss": 2.4277, "step": 469050 }, { "epoch": 0.9344718220068852, "grad_norm": 0.22697874903678894, "learning_rate": 0.0003379530692546615, "loss": 2.4066, "step": 469060 }, { "epoch": 0.9344917442305241, "grad_norm": 0.21295665204524994, "learning_rate": 0.0003378327400649477, "loss": 2.4296, "step": 469070 }, { "epoch": 0.934511666454163, "grad_norm": 0.1915714591741562, "learning_rate": 0.00033771241958558806, "loss": 2.4232, "step": 469080 }, { "epoch": 0.9345315886778018, "grad_norm": 0.22541601955890656, "learning_rate": 0.00033759210781469175, "loss": 2.409, "step": 469090 }, { "epoch": 0.9345515109014407, "grad_norm": 0.23332418501377106, "learning_rate": 0.0003374718047503675, "loss": 2.4391, "step": 469100 }, { "epoch": 0.9345714331250797, "grad_norm": 0.21852053701877594, "learning_rate": 0.0003373515103907261, "loss": 2.427, "step": 469110 }, { "epoch": 0.9345913553487186, "grad_norm": 0.21029026806354523, "learning_rate": 0.0003372312247338778, "loss": 2.4265, "step": 469120 }, { "epoch": 0.9346112775723575, "grad_norm": 0.21762043237686157, "learning_rate": 0.00033711094777793415, "loss": 2.4114, "step": 469130 }, { "epoch": 0.9346311997959964, "grad_norm": 0.22972747683525085, "learning_rate": 0.0003369906795210076, "loss": 2.4197, "step": 469140 }, { "epoch": 0.9346511220196353, "grad_norm": 0.221077561378479, "learning_rate": 0.0003368704199612107, "loss": 2.4347, "step": 469150 }, { "epoch": 0.9346710442432743, "grad_norm": 0.22081629931926727, "learning_rate": 0.0003367501690966568, "loss": 2.4241, "step": 469160 }, { "epoch": 0.9346909664669132, "grad_norm": 0.21982860565185547, "learning_rate": 0.00033662992692546023, "loss": 2.4173, "step": 469170 }, { "epoch": 0.9347108886905521, "grad_norm": 0.23794128000736237, "learning_rate": 0.0003365096934457359, "loss": 2.4115, "step": 469180 }, { "epoch": 0.934730810914191, "grad_norm": 0.25707778334617615, "learning_rate": 0.00033638946865559906, "loss": 2.4151, "step": 469190 }, { "epoch": 0.9347507331378299, "grad_norm": 0.20892684161663055, "learning_rate": 0.0003362692525531663, "loss": 2.4235, "step": 469200 }, { "epoch": 0.9347706553614689, "grad_norm": 0.28741681575775146, "learning_rate": 0.0003361490451365543, "loss": 2.4317, "step": 469210 }, { "epoch": 0.9347905775851078, "grad_norm": 0.22292904555797577, "learning_rate": 0.0003360288464038808, "loss": 2.4244, "step": 469220 }, { "epoch": 0.9348104998087466, "grad_norm": 0.21806585788726807, "learning_rate": 0.00033590865635326384, "loss": 2.4225, "step": 469230 }, { "epoch": 0.9348304220323855, "grad_norm": 0.21639752388000488, "learning_rate": 0.0003357884749828224, "loss": 2.4397, "step": 469240 }, { "epoch": 0.9348503442560245, "grad_norm": 0.5954965353012085, "learning_rate": 0.0003356683022906761, "loss": 2.417, "step": 469250 }, { "epoch": 0.9348702664796634, "grad_norm": 0.2121686041355133, "learning_rate": 0.00033554813827494544, "loss": 2.4201, "step": 469260 }, { "epoch": 0.9348901887033023, "grad_norm": 0.23699462413787842, "learning_rate": 0.0003354279829337512, "loss": 2.4127, "step": 469270 }, { "epoch": 0.9349101109269412, "grad_norm": 0.2009991854429245, "learning_rate": 0.0003353078362652151, "loss": 2.4206, "step": 469280 }, { "epoch": 0.9349300331505801, "grad_norm": 0.21009691059589386, "learning_rate": 0.00033518769826745934, "loss": 2.4322, "step": 469290 }, { "epoch": 0.9349499553742191, "grad_norm": 0.2287721037864685, "learning_rate": 0.00033506756893860715, "loss": 2.4255, "step": 469300 }, { "epoch": 0.934969877597858, "grad_norm": 0.229308620095253, "learning_rate": 0.0003349474482767818, "loss": 2.429, "step": 469310 }, { "epoch": 0.9349897998214969, "grad_norm": 0.20616406202316284, "learning_rate": 0.0003348273362801082, "loss": 2.436, "step": 469320 }, { "epoch": 0.9350097220451358, "grad_norm": 0.21110281348228455, "learning_rate": 0.0003347072329467109, "loss": 2.4266, "step": 469330 }, { "epoch": 0.9350296442687747, "grad_norm": 0.21277184784412384, "learning_rate": 0.00033458713827471586, "loss": 2.4171, "step": 469340 }, { "epoch": 0.9350495664924137, "grad_norm": 0.22385378181934357, "learning_rate": 0.0003344670522622495, "loss": 2.4284, "step": 469350 }, { "epoch": 0.9350694887160526, "grad_norm": 0.22007541358470917, "learning_rate": 0.0003343469749074388, "loss": 2.4296, "step": 469360 }, { "epoch": 0.9350894109396914, "grad_norm": 0.23506952822208405, "learning_rate": 0.00033422690620841156, "loss": 2.4068, "step": 469370 }, { "epoch": 0.9351093331633303, "grad_norm": 0.21313375234603882, "learning_rate": 0.00033410684616329614, "loss": 2.4244, "step": 469380 }, { "epoch": 0.9351292553869692, "grad_norm": 0.22058990597724915, "learning_rate": 0.0003339867947702215, "loss": 2.4308, "step": 469390 }, { "epoch": 0.9351491776106082, "grad_norm": 0.20635956525802612, "learning_rate": 0.00033386675202731774, "loss": 2.4286, "step": 469400 }, { "epoch": 0.9351690998342471, "grad_norm": 0.20812533795833588, "learning_rate": 0.00033374671793271474, "loss": 2.4357, "step": 469410 }, { "epoch": 0.935189022057886, "grad_norm": 0.2066250890493393, "learning_rate": 0.00033362669248454416, "loss": 2.4141, "step": 469420 }, { "epoch": 0.9352089442815249, "grad_norm": 0.2135295569896698, "learning_rate": 0.0003335066756809375, "loss": 2.4292, "step": 469430 }, { "epoch": 0.9352288665051638, "grad_norm": 0.2558029890060425, "learning_rate": 0.00033338666752002723, "loss": 2.4036, "step": 469440 }, { "epoch": 0.9352487887288028, "grad_norm": 0.228822723031044, "learning_rate": 0.0003332666679999468, "loss": 2.4419, "step": 469450 }, { "epoch": 0.9352687109524417, "grad_norm": 0.20940835773944855, "learning_rate": 0.0003331466771188294, "loss": 2.4334, "step": 469460 }, { "epoch": 0.9352886331760806, "grad_norm": 0.19746814668178558, "learning_rate": 0.00033302669487481, "loss": 2.4155, "step": 469470 }, { "epoch": 0.9353085553997195, "grad_norm": 0.2073632925748825, "learning_rate": 0.0003329067212660235, "loss": 2.4391, "step": 469480 }, { "epoch": 0.9353284776233584, "grad_norm": 0.21080023050308228, "learning_rate": 0.0003327867562906057, "loss": 2.4105, "step": 469490 }, { "epoch": 0.9353483998469974, "grad_norm": 0.2222958654165268, "learning_rate": 0.0003326667999466935, "loss": 2.4151, "step": 469500 }, { "epoch": 0.9353683220706362, "grad_norm": 0.22332601249217987, "learning_rate": 0.00033254685223242333, "loss": 2.4237, "step": 469510 }, { "epoch": 0.9353882442942751, "grad_norm": 0.21015168726444244, "learning_rate": 0.0003324269131459334, "loss": 2.4187, "step": 469520 }, { "epoch": 0.935408166517914, "grad_norm": 0.232860267162323, "learning_rate": 0.00033230698268536243, "loss": 2.4186, "step": 469530 }, { "epoch": 0.935428088741553, "grad_norm": 0.20949068665504456, "learning_rate": 0.0003321870608488493, "loss": 2.4149, "step": 469540 }, { "epoch": 0.9354480109651919, "grad_norm": 0.24152489006519318, "learning_rate": 0.00033206714763453385, "loss": 2.434, "step": 469550 }, { "epoch": 0.9354679331888308, "grad_norm": 0.2189275622367859, "learning_rate": 0.00033194724304055656, "loss": 2.4414, "step": 469560 }, { "epoch": 0.9354878554124697, "grad_norm": 0.2118118703365326, "learning_rate": 0.0003318273470650588, "loss": 2.4221, "step": 469570 }, { "epoch": 0.9355077776361086, "grad_norm": 0.2426438182592392, "learning_rate": 0.00033170745970618223, "loss": 2.4294, "step": 469580 }, { "epoch": 0.9355276998597476, "grad_norm": 0.19694125652313232, "learning_rate": 0.0003315875809620692, "loss": 2.4258, "step": 469590 }, { "epoch": 0.9355476220833865, "grad_norm": 0.21071875095367432, "learning_rate": 0.00033146771083086325, "loss": 2.4349, "step": 469600 }, { "epoch": 0.9355675443070254, "grad_norm": 0.21798816323280334, "learning_rate": 0.0003313478493107078, "loss": 2.4206, "step": 469610 }, { "epoch": 0.9355874665306643, "grad_norm": 0.7600581645965576, "learning_rate": 0.0003312279963997478, "loss": 2.414, "step": 469620 }, { "epoch": 0.9356073887543032, "grad_norm": 0.23170189559459686, "learning_rate": 0.0003311081520961281, "loss": 2.4237, "step": 469630 }, { "epoch": 0.9356273109779422, "grad_norm": 0.22514525055885315, "learning_rate": 0.00033098831639799477, "loss": 2.451, "step": 469640 }, { "epoch": 0.935647233201581, "grad_norm": 0.21881859004497528, "learning_rate": 0.0003308684893034941, "loss": 2.4075, "step": 469650 }, { "epoch": 0.9356671554252199, "grad_norm": 0.2160756140947342, "learning_rate": 0.0003307486708107734, "loss": 2.4301, "step": 469660 }, { "epoch": 0.9356870776488588, "grad_norm": 0.20914992690086365, "learning_rate": 0.00033062886091798037, "loss": 2.4328, "step": 469670 }, { "epoch": 0.9357069998724977, "grad_norm": 0.21688002347946167, "learning_rate": 0.0003305090596232636, "loss": 2.4203, "step": 469680 }, { "epoch": 0.9357269220961367, "grad_norm": 0.20244471728801727, "learning_rate": 0.0003303892669247721, "loss": 2.4274, "step": 469690 }, { "epoch": 0.9357468443197756, "grad_norm": 0.2286587506532669, "learning_rate": 0.00033026948282065586, "loss": 2.4184, "step": 469700 }, { "epoch": 0.9357667665434145, "grad_norm": 0.21472477912902832, "learning_rate": 0.00033014970730906533, "loss": 2.4248, "step": 469710 }, { "epoch": 0.9357866887670534, "grad_norm": 0.21522141993045807, "learning_rate": 0.0003300299403881517, "loss": 2.4233, "step": 469720 }, { "epoch": 0.9358066109906923, "grad_norm": 0.22758807241916656, "learning_rate": 0.00032991018205606683, "loss": 2.4256, "step": 469730 }, { "epoch": 0.9358265332143313, "grad_norm": 0.20367629826068878, "learning_rate": 0.00032979043231096285, "loss": 2.425, "step": 469740 }, { "epoch": 0.9358464554379702, "grad_norm": 0.22697025537490845, "learning_rate": 0.0003296706911509935, "loss": 2.431, "step": 469750 }, { "epoch": 0.9358663776616091, "grad_norm": 0.2173595279455185, "learning_rate": 0.00032955095857431195, "loss": 2.4216, "step": 469760 }, { "epoch": 0.935886299885248, "grad_norm": 0.1940293163061142, "learning_rate": 0.0003294312345790731, "loss": 2.4295, "step": 469770 }, { "epoch": 0.9359062221088869, "grad_norm": 0.2233877032995224, "learning_rate": 0.00032931151916343196, "loss": 2.4086, "step": 469780 }, { "epoch": 0.9359261443325259, "grad_norm": 0.21296755969524384, "learning_rate": 0.0003291918123255442, "loss": 2.4126, "step": 469790 }, { "epoch": 0.9359460665561647, "grad_norm": 0.22491295635700226, "learning_rate": 0.0003290721140635662, "loss": 2.438, "step": 469800 }, { "epoch": 0.9359659887798036, "grad_norm": 0.2154526561498642, "learning_rate": 0.0003289524243756552, "loss": 2.416, "step": 469810 }, { "epoch": 0.9359859110034425, "grad_norm": 0.2289041429758072, "learning_rate": 0.00032883274325996913, "loss": 2.427, "step": 469820 }, { "epoch": 0.9360058332270814, "grad_norm": 0.22475405037403107, "learning_rate": 0.0003287130707146662, "loss": 2.4357, "step": 469830 }, { "epoch": 0.9360257554507204, "grad_norm": 0.21013224124908447, "learning_rate": 0.00032859340673790574, "loss": 2.423, "step": 469840 }, { "epoch": 0.9360456776743593, "grad_norm": 0.21852537989616394, "learning_rate": 0.0003284737513278471, "loss": 2.4205, "step": 469850 }, { "epoch": 0.9360655998979982, "grad_norm": 0.21847519278526306, "learning_rate": 0.00032835410448265104, "loss": 2.4018, "step": 469860 }, { "epoch": 0.9360855221216371, "grad_norm": 0.20890066027641296, "learning_rate": 0.0003282344662004781, "loss": 2.4132, "step": 469870 }, { "epoch": 0.9361054443452761, "grad_norm": 0.22001640498638153, "learning_rate": 0.0003281148364794908, "loss": 2.4261, "step": 469880 }, { "epoch": 0.936125366568915, "grad_norm": 0.24124464392662048, "learning_rate": 0.00032799521531785093, "loss": 2.4192, "step": 469890 }, { "epoch": 0.9361452887925539, "grad_norm": 0.23420318961143494, "learning_rate": 0.00032787560271372153, "loss": 2.4021, "step": 469900 }, { "epoch": 0.9361652110161928, "grad_norm": 0.21560312807559967, "learning_rate": 0.0003277559986652665, "loss": 2.4164, "step": 469910 }, { "epoch": 0.9361851332398317, "grad_norm": 0.2203575074672699, "learning_rate": 0.00032763640317065026, "loss": 2.4262, "step": 469920 }, { "epoch": 0.9362050554634707, "grad_norm": 0.2190832644701004, "learning_rate": 0.00032751681622803755, "loss": 2.4276, "step": 469930 }, { "epoch": 0.9362249776871095, "grad_norm": 0.20796887576580048, "learning_rate": 0.00032739723783559404, "loss": 2.4114, "step": 469940 }, { "epoch": 0.9362448999107484, "grad_norm": 0.22896833717823029, "learning_rate": 0.0003272776679914864, "loss": 2.4277, "step": 469950 }, { "epoch": 0.9362648221343873, "grad_norm": 0.2169833481311798, "learning_rate": 0.00032715810669388135, "loss": 2.4248, "step": 469960 }, { "epoch": 0.9362847443580262, "grad_norm": 0.20414109528064728, "learning_rate": 0.00032703855394094637, "loss": 2.4398, "step": 469970 }, { "epoch": 0.9363046665816652, "grad_norm": 0.21892571449279785, "learning_rate": 0.00032691900973084987, "loss": 2.4308, "step": 469980 }, { "epoch": 0.9363245888053041, "grad_norm": 0.2175378054380417, "learning_rate": 0.00032679947406176105, "loss": 2.4106, "step": 469990 }, { "epoch": 0.936344511028943, "grad_norm": 0.2976144254207611, "learning_rate": 0.00032667994693184887, "loss": 2.4257, "step": 470000 }, { "epoch": 0.9363644332525819, "grad_norm": 0.22740435600280762, "learning_rate": 0.0003265604283392842, "loss": 2.4234, "step": 470010 }, { "epoch": 0.9363843554762208, "grad_norm": 0.21726450324058533, "learning_rate": 0.00032644091828223763, "loss": 2.4121, "step": 470020 }, { "epoch": 0.9364042776998598, "grad_norm": 0.2080066055059433, "learning_rate": 0.00032632141675888085, "loss": 2.43, "step": 470030 }, { "epoch": 0.9364241999234987, "grad_norm": 0.2011534422636032, "learning_rate": 0.0003262019237673859, "loss": 2.435, "step": 470040 }, { "epoch": 0.9364441221471376, "grad_norm": 0.21424785256385803, "learning_rate": 0.0003260824393059256, "loss": 2.449, "step": 470050 }, { "epoch": 0.9364640443707765, "grad_norm": 0.21271668374538422, "learning_rate": 0.00032596296337267374, "loss": 2.4274, "step": 470060 }, { "epoch": 0.9364839665944154, "grad_norm": 0.20976565778255463, "learning_rate": 0.0003258434959658043, "loss": 2.4109, "step": 470070 }, { "epoch": 0.9365038888180544, "grad_norm": 0.21614010632038116, "learning_rate": 0.000325724037083492, "loss": 2.4158, "step": 470080 }, { "epoch": 0.9365238110416932, "grad_norm": 0.2057904750108719, "learning_rate": 0.00032560458672391257, "loss": 2.4073, "step": 470090 }, { "epoch": 0.9365437332653321, "grad_norm": 0.2136717140674591, "learning_rate": 0.00032548514488524186, "loss": 2.423, "step": 470100 }, { "epoch": 0.936563655488971, "grad_norm": 0.2239486277103424, "learning_rate": 0.00032536571156565653, "loss": 2.4294, "step": 470110 }, { "epoch": 0.9365835777126099, "grad_norm": 0.2053869217634201, "learning_rate": 0.0003252462867633341, "loss": 2.4257, "step": 470120 }, { "epoch": 0.9366034999362489, "grad_norm": 0.23189547657966614, "learning_rate": 0.0003251268704764529, "loss": 2.4213, "step": 470130 }, { "epoch": 0.9366234221598878, "grad_norm": 0.20097769796848297, "learning_rate": 0.0003250074627031916, "loss": 2.4286, "step": 470140 }, { "epoch": 0.9366433443835267, "grad_norm": 0.23478403687477112, "learning_rate": 0.00032488806344172926, "loss": 2.4174, "step": 470150 }, { "epoch": 0.9366632666071656, "grad_norm": 0.2573052942752838, "learning_rate": 0.00032476867269024614, "loss": 2.433, "step": 470160 }, { "epoch": 0.9366831888308046, "grad_norm": 0.2199241816997528, "learning_rate": 0.00032464929044692247, "loss": 2.4263, "step": 470170 }, { "epoch": 0.9367031110544435, "grad_norm": 0.20975038409233093, "learning_rate": 0.00032452991670994027, "loss": 2.4178, "step": 470180 }, { "epoch": 0.9367230332780824, "grad_norm": 0.2346118539571762, "learning_rate": 0.0003244105514774809, "loss": 2.4144, "step": 470190 }, { "epoch": 0.9367429555017213, "grad_norm": 0.20846302807331085, "learning_rate": 0.00032429119474772716, "loss": 2.4242, "step": 470200 }, { "epoch": 0.9367628777253602, "grad_norm": 0.20924443006515503, "learning_rate": 0.00032417184651886233, "loss": 2.4173, "step": 470210 }, { "epoch": 0.9367827999489992, "grad_norm": 0.20891806483268738, "learning_rate": 0.00032405250678907004, "loss": 2.4164, "step": 470220 }, { "epoch": 0.936802722172638, "grad_norm": 0.23382896184921265, "learning_rate": 0.00032393317555653534, "loss": 2.4102, "step": 470230 }, { "epoch": 0.9368226443962769, "grad_norm": 0.21387110650539398, "learning_rate": 0.000323813852819443, "loss": 2.4296, "step": 470240 }, { "epoch": 0.9368425666199158, "grad_norm": 0.21487177908420563, "learning_rate": 0.0003236945385759791, "loss": 2.422, "step": 470250 }, { "epoch": 0.9368624888435547, "grad_norm": 0.22457949817180634, "learning_rate": 0.0003235752328243298, "loss": 2.4208, "step": 470260 }, { "epoch": 0.9368824110671937, "grad_norm": 0.22024713456630707, "learning_rate": 0.0003234559355626825, "loss": 2.4243, "step": 470270 }, { "epoch": 0.9369023332908326, "grad_norm": 0.2200605720281601, "learning_rate": 0.0003233366467892251, "loss": 2.4258, "step": 470280 }, { "epoch": 0.9369222555144715, "grad_norm": 0.20867522060871124, "learning_rate": 0.00032321736650214563, "loss": 2.4214, "step": 470290 }, { "epoch": 0.9369421777381104, "grad_norm": 0.1959902048110962, "learning_rate": 0.00032309809469963335, "loss": 2.4268, "step": 470300 }, { "epoch": 0.9369620999617493, "grad_norm": 0.2109174132347107, "learning_rate": 0.00032297883137987805, "loss": 2.4226, "step": 470310 }, { "epoch": 0.9369820221853883, "grad_norm": 0.26733359694480896, "learning_rate": 0.00032285957654106954, "loss": 2.4166, "step": 470320 }, { "epoch": 0.9370019444090272, "grad_norm": 0.24257037043571472, "learning_rate": 0.0003227403301813996, "loss": 2.4398, "step": 470330 }, { "epoch": 0.9370218666326661, "grad_norm": 0.20900845527648926, "learning_rate": 0.00032262109229905955, "loss": 2.4206, "step": 470340 }, { "epoch": 0.937041788856305, "grad_norm": 0.26294630765914917, "learning_rate": 0.0003225018628922416, "loss": 2.4224, "step": 470350 }, { "epoch": 0.9370617110799438, "grad_norm": 0.22067862749099731, "learning_rate": 0.0003223826419591385, "loss": 2.4321, "step": 470360 }, { "epoch": 0.9370816333035829, "grad_norm": 0.2140183448791504, "learning_rate": 0.0003222634294979441, "loss": 2.4016, "step": 470370 }, { "epoch": 0.9371015555272217, "grad_norm": 0.2090267390012741, "learning_rate": 0.0003221442255068525, "loss": 2.4309, "step": 470380 }, { "epoch": 0.9371214777508606, "grad_norm": 0.20290763676166534, "learning_rate": 0.00032202502998405857, "loss": 2.432, "step": 470390 }, { "epoch": 0.9371413999744995, "grad_norm": 0.20731845498085022, "learning_rate": 0.0003219058429277575, "loss": 2.4254, "step": 470400 }, { "epoch": 0.9371613221981384, "grad_norm": 0.21388676762580872, "learning_rate": 0.000321786664336146, "loss": 2.4294, "step": 470410 }, { "epoch": 0.9371812444217774, "grad_norm": 0.1950177699327469, "learning_rate": 0.00032166749420742003, "loss": 2.4216, "step": 470420 }, { "epoch": 0.9372011666454163, "grad_norm": 0.20321768522262573, "learning_rate": 0.0003215483325397779, "loss": 2.4259, "step": 470430 }, { "epoch": 0.9372210888690552, "grad_norm": 0.21256378293037415, "learning_rate": 0.00032142917933141704, "loss": 2.4294, "step": 470440 }, { "epoch": 0.9372410110926941, "grad_norm": 0.21146701276302338, "learning_rate": 0.0003213100345805364, "loss": 2.4179, "step": 470450 }, { "epoch": 0.9372609333163331, "grad_norm": 0.22186361253261566, "learning_rate": 0.0003211908982853351, "loss": 2.4269, "step": 470460 }, { "epoch": 0.937280855539972, "grad_norm": 0.21795660257339478, "learning_rate": 0.00032107177044401336, "loss": 2.4054, "step": 470470 }, { "epoch": 0.9373007777636109, "grad_norm": 0.21292446553707123, "learning_rate": 0.00032095265105477156, "loss": 2.4289, "step": 470480 }, { "epoch": 0.9373206999872498, "grad_norm": 0.22210103273391724, "learning_rate": 0.0003208335401158109, "loss": 2.4248, "step": 470490 }, { "epoch": 0.9373406222108887, "grad_norm": 0.21448761224746704, "learning_rate": 0.0003207144376253335, "loss": 2.4374, "step": 470500 }, { "epoch": 0.9373605444345277, "grad_norm": 0.20605777204036713, "learning_rate": 0.00032059534358154187, "loss": 2.4221, "step": 470510 }, { "epoch": 0.9373804666581665, "grad_norm": 0.24719248712062836, "learning_rate": 0.0003204762579826386, "loss": 2.4166, "step": 470520 }, { "epoch": 0.9374003888818054, "grad_norm": 0.19757895171642303, "learning_rate": 0.0003203571808268282, "loss": 2.4062, "step": 470530 }, { "epoch": 0.9374203111054443, "grad_norm": 0.21834629774093628, "learning_rate": 0.0003202381121123148, "loss": 2.4133, "step": 470540 }, { "epoch": 0.9374402333290832, "grad_norm": 0.19621556997299194, "learning_rate": 0.0003201190518373036, "loss": 2.4276, "step": 470550 }, { "epoch": 0.9374601555527222, "grad_norm": 0.22922468185424805, "learning_rate": 0.0003200000000000001, "loss": 2.4097, "step": 470560 }, { "epoch": 0.9374800777763611, "grad_norm": 0.21811139583587646, "learning_rate": 0.00031988095659861047, "loss": 2.4278, "step": 470570 }, { "epoch": 0.9375, "grad_norm": 0.21149727702140808, "learning_rate": 0.0003197619216313421, "loss": 2.4185, "step": 470580 }, { "epoch": 0.9375199222236389, "grad_norm": 0.22637112438678741, "learning_rate": 0.0003196428950964025, "loss": 2.4212, "step": 470590 }, { "epoch": 0.9375398444472778, "grad_norm": 0.21305407583713531, "learning_rate": 0.0003195238769919997, "loss": 2.4106, "step": 470600 }, { "epoch": 0.9375597666709168, "grad_norm": 0.2133072167634964, "learning_rate": 0.0003194048673163428, "loss": 2.4235, "step": 470610 }, { "epoch": 0.9375796888945557, "grad_norm": 0.23841777443885803, "learning_rate": 0.00031928586606764077, "loss": 2.4132, "step": 470620 }, { "epoch": 0.9375996111181946, "grad_norm": 0.23058941960334778, "learning_rate": 0.00031916687324410464, "loss": 2.4248, "step": 470630 }, { "epoch": 0.9376195333418335, "grad_norm": 0.21964387595653534, "learning_rate": 0.00031904788884394456, "loss": 2.4156, "step": 470640 }, { "epoch": 0.9376394555654723, "grad_norm": 0.21134300529956818, "learning_rate": 0.00031892891286537206, "loss": 2.4228, "step": 470650 }, { "epoch": 0.9376593777891113, "grad_norm": 0.22529102861881256, "learning_rate": 0.00031880994530659914, "loss": 2.4226, "step": 470660 }, { "epoch": 0.9376793000127502, "grad_norm": 0.21326924860477448, "learning_rate": 0.0003186909861658387, "loss": 2.4253, "step": 470670 }, { "epoch": 0.9376992222363891, "grad_norm": 0.23143593966960907, "learning_rate": 0.00031857203544130374, "loss": 2.4209, "step": 470680 }, { "epoch": 0.937719144460028, "grad_norm": 0.21153508126735687, "learning_rate": 0.00031845309313120865, "loss": 2.402, "step": 470690 }, { "epoch": 0.9377390666836669, "grad_norm": 0.20169255137443542, "learning_rate": 0.0003183341592337674, "loss": 2.4424, "step": 470700 }, { "epoch": 0.9377589889073059, "grad_norm": 0.21474702656269073, "learning_rate": 0.00031821523374719575, "loss": 2.4297, "step": 470710 }, { "epoch": 0.9377789111309448, "grad_norm": 0.22936712205410004, "learning_rate": 0.0003180963166697088, "loss": 2.4066, "step": 470720 }, { "epoch": 0.9377988333545837, "grad_norm": 0.2547454535961151, "learning_rate": 0.0003179774079995239, "loss": 2.4242, "step": 470730 }, { "epoch": 0.9378187555782226, "grad_norm": 0.21627818048000336, "learning_rate": 0.0003178585077348577, "loss": 2.4102, "step": 470740 }, { "epoch": 0.9378386778018616, "grad_norm": 0.2225673794746399, "learning_rate": 0.00031773961587392785, "loss": 2.4339, "step": 470750 }, { "epoch": 0.9378586000255005, "grad_norm": 0.2121545970439911, "learning_rate": 0.0003176207324149527, "loss": 2.4234, "step": 470760 }, { "epoch": 0.9378785222491394, "grad_norm": 0.22240595519542694, "learning_rate": 0.00031750185735615144, "loss": 2.4225, "step": 470770 }, { "epoch": 0.9378984444727783, "grad_norm": 0.23800891637802124, "learning_rate": 0.0003173829906957437, "loss": 2.4099, "step": 470780 }, { "epoch": 0.9379183666964171, "grad_norm": 0.22559750080108643, "learning_rate": 0.0003172641324319494, "loss": 2.4185, "step": 470790 }, { "epoch": 0.9379382889200562, "grad_norm": 0.22178538143634796, "learning_rate": 0.0003171452825629897, "loss": 2.4203, "step": 470800 }, { "epoch": 0.937958211143695, "grad_norm": 0.22294940054416656, "learning_rate": 0.000317026441087086, "loss": 2.4107, "step": 470810 }, { "epoch": 0.9379781333673339, "grad_norm": 0.22264926135540009, "learning_rate": 0.00031690760800246023, "loss": 2.4058, "step": 470820 }, { "epoch": 0.9379980555909728, "grad_norm": 0.20948882400989532, "learning_rate": 0.00031678878330733553, "loss": 2.418, "step": 470830 }, { "epoch": 0.9380179778146117, "grad_norm": 0.22241349518299103, "learning_rate": 0.0003166699669999349, "loss": 2.4251, "step": 470840 }, { "epoch": 0.9380379000382507, "grad_norm": 0.21354790031909943, "learning_rate": 0.00031655115907848263, "loss": 2.4224, "step": 470850 }, { "epoch": 0.9380578222618896, "grad_norm": 0.23619307577610016, "learning_rate": 0.0003164323595412033, "loss": 2.4197, "step": 470860 }, { "epoch": 0.9380777444855285, "grad_norm": 0.21945853531360626, "learning_rate": 0.0003163135683863221, "loss": 2.4231, "step": 470870 }, { "epoch": 0.9380976667091674, "grad_norm": 0.20508277416229248, "learning_rate": 0.00031619478561206504, "loss": 2.4234, "step": 470880 }, { "epoch": 0.9381175889328063, "grad_norm": 0.20158034563064575, "learning_rate": 0.0003160760112166583, "loss": 2.396, "step": 470890 }, { "epoch": 0.9381375111564453, "grad_norm": 0.2094212919473648, "learning_rate": 0.00031595724519832926, "loss": 2.4085, "step": 470900 }, { "epoch": 0.9381574333800842, "grad_norm": 0.23805367946624756, "learning_rate": 0.00031583848755530573, "loss": 2.4086, "step": 470910 }, { "epoch": 0.9381773556037231, "grad_norm": 0.21954278647899628, "learning_rate": 0.0003157197382858159, "loss": 2.433, "step": 470920 }, { "epoch": 0.938197277827362, "grad_norm": 0.21635082364082336, "learning_rate": 0.00031560099738808914, "loss": 2.4286, "step": 470930 }, { "epoch": 0.9382172000510008, "grad_norm": 0.19933795928955078, "learning_rate": 0.0003154822648603548, "loss": 2.4238, "step": 470940 }, { "epoch": 0.9382371222746398, "grad_norm": 0.21087074279785156, "learning_rate": 0.0003153635407008431, "loss": 2.4203, "step": 470950 }, { "epoch": 0.9382570444982787, "grad_norm": 0.22107207775115967, "learning_rate": 0.00031524482490778504, "loss": 2.4408, "step": 470960 }, { "epoch": 0.9382769667219176, "grad_norm": 0.22337552905082703, "learning_rate": 0.000315126117479412, "loss": 2.4231, "step": 470970 }, { "epoch": 0.9382968889455565, "grad_norm": 0.8906289339065552, "learning_rate": 0.00031500741841395644, "loss": 2.4175, "step": 470980 }, { "epoch": 0.9383168111691954, "grad_norm": 0.22624805569648743, "learning_rate": 0.0003148887277096506, "loss": 2.4306, "step": 470990 }, { "epoch": 0.9383367333928344, "grad_norm": 0.20941811800003052, "learning_rate": 0.00031477004536472844, "loss": 2.4348, "step": 471000 }, { "epoch": 0.9383566556164733, "grad_norm": 0.20572668313980103, "learning_rate": 0.0003146513713774233, "loss": 2.4155, "step": 471010 }, { "epoch": 0.9383765778401122, "grad_norm": 0.2084328532218933, "learning_rate": 0.00031453270574597014, "loss": 2.4205, "step": 471020 }, { "epoch": 0.9383965000637511, "grad_norm": 0.3413831293582916, "learning_rate": 0.00031441404846860446, "loss": 2.4182, "step": 471030 }, { "epoch": 0.9384164222873901, "grad_norm": 0.20500017702579498, "learning_rate": 0.0003142953995435618, "loss": 2.4387, "step": 471040 }, { "epoch": 0.938436344511029, "grad_norm": 0.23851390182971954, "learning_rate": 0.0003141767589690787, "loss": 2.418, "step": 471050 }, { "epoch": 0.9384562667346679, "grad_norm": 0.2251257300376892, "learning_rate": 0.0003140581267433922, "loss": 2.4141, "step": 471060 }, { "epoch": 0.9384761889583068, "grad_norm": 0.22004905343055725, "learning_rate": 0.00031393950286474003, "loss": 2.4283, "step": 471070 }, { "epoch": 0.9384961111819456, "grad_norm": 0.1948387175798416, "learning_rate": 0.0003138208873313606, "loss": 2.4108, "step": 471080 }, { "epoch": 0.9385160334055846, "grad_norm": 0.22355741262435913, "learning_rate": 0.000313702280141493, "loss": 2.4239, "step": 471090 }, { "epoch": 0.9385359556292235, "grad_norm": 0.2226892113685608, "learning_rate": 0.00031358368129337657, "loss": 2.4331, "step": 471100 }, { "epoch": 0.9385558778528624, "grad_norm": 0.23187458515167236, "learning_rate": 0.0003134650907852516, "loss": 2.4075, "step": 471110 }, { "epoch": 0.9385758000765013, "grad_norm": 0.2290917932987213, "learning_rate": 0.00031334650861535864, "loss": 2.4196, "step": 471120 }, { "epoch": 0.9385957223001402, "grad_norm": 0.2626497745513916, "learning_rate": 0.0003132279347819398, "loss": 2.4373, "step": 471130 }, { "epoch": 0.9386156445237792, "grad_norm": 0.22452445328235626, "learning_rate": 0.00031310936928323653, "loss": 2.4189, "step": 471140 }, { "epoch": 0.9386355667474181, "grad_norm": 0.24060720205307007, "learning_rate": 0.0003129908121174918, "loss": 2.4339, "step": 471150 }, { "epoch": 0.938655488971057, "grad_norm": 0.22480151057243347, "learning_rate": 0.0003128722632829486, "loss": 2.433, "step": 471160 }, { "epoch": 0.9386754111946959, "grad_norm": 0.2133876532316208, "learning_rate": 0.00031275372277785117, "loss": 2.4141, "step": 471170 }, { "epoch": 0.9386953334183348, "grad_norm": 0.22040751576423645, "learning_rate": 0.0003126351906004441, "loss": 2.4301, "step": 471180 }, { "epoch": 0.9387152556419738, "grad_norm": 0.23252138495445251, "learning_rate": 0.00031251666674897205, "loss": 2.4218, "step": 471190 }, { "epoch": 0.9387351778656127, "grad_norm": 0.2114914357662201, "learning_rate": 0.00031239815122168115, "loss": 2.4229, "step": 471200 }, { "epoch": 0.9387551000892516, "grad_norm": 0.21587154269218445, "learning_rate": 0.0003122796440168176, "loss": 2.4199, "step": 471210 }, { "epoch": 0.9387750223128905, "grad_norm": 0.22786381840705872, "learning_rate": 0.0003121611451326285, "loss": 2.4197, "step": 471220 }, { "epoch": 0.9387949445365293, "grad_norm": 0.23036284744739532, "learning_rate": 0.00031204265456736136, "loss": 2.4156, "step": 471230 }, { "epoch": 0.9388148667601683, "grad_norm": 0.20822347700595856, "learning_rate": 0.00031192417231926453, "loss": 2.4285, "step": 471240 }, { "epoch": 0.9388347889838072, "grad_norm": 0.21126864850521088, "learning_rate": 0.0003118056983865867, "loss": 2.4297, "step": 471250 }, { "epoch": 0.9388547112074461, "grad_norm": 0.21360260248184204, "learning_rate": 0.0003116872327675775, "loss": 2.4087, "step": 471260 }, { "epoch": 0.938874633431085, "grad_norm": 0.2057746797800064, "learning_rate": 0.0003115687754604868, "loss": 2.4102, "step": 471270 }, { "epoch": 0.9388945556547239, "grad_norm": 0.2058030068874359, "learning_rate": 0.0003114503264635653, "loss": 2.4277, "step": 471280 }, { "epoch": 0.9389144778783629, "grad_norm": 0.2164788693189621, "learning_rate": 0.00031133188577506444, "loss": 2.4221, "step": 471290 }, { "epoch": 0.9389344001020018, "grad_norm": 0.20638836920261383, "learning_rate": 0.00031121345339323606, "loss": 2.411, "step": 471300 }, { "epoch": 0.9389543223256407, "grad_norm": 0.2266930490732193, "learning_rate": 0.0003110950293163326, "loss": 2.4252, "step": 471310 }, { "epoch": 0.9389742445492796, "grad_norm": 0.22262148559093475, "learning_rate": 0.0003109766135426071, "loss": 2.4245, "step": 471320 }, { "epoch": 0.9389941667729186, "grad_norm": 0.20848529040813446, "learning_rate": 0.00031085820607031333, "loss": 2.429, "step": 471330 }, { "epoch": 0.9390140889965575, "grad_norm": 0.21069711446762085, "learning_rate": 0.00031073980689770586, "loss": 2.4288, "step": 471340 }, { "epoch": 0.9390340112201964, "grad_norm": 0.22401978075504303, "learning_rate": 0.0003106214160230396, "loss": 2.4122, "step": 471350 }, { "epoch": 0.9390539334438353, "grad_norm": 0.207924485206604, "learning_rate": 0.00031050303344457, "loss": 2.4292, "step": 471360 }, { "epoch": 0.9390738556674741, "grad_norm": 0.2092038094997406, "learning_rate": 0.00031038465916055326, "loss": 2.4233, "step": 471370 }, { "epoch": 0.9390937778911131, "grad_norm": 0.21637143194675446, "learning_rate": 0.00031026629316924616, "loss": 2.4293, "step": 471380 }, { "epoch": 0.939113700114752, "grad_norm": 0.20809483528137207, "learning_rate": 0.0003101479354689063, "loss": 2.4342, "step": 471390 }, { "epoch": 0.9391336223383909, "grad_norm": 0.2163878083229065, "learning_rate": 0.0003100295860577913, "loss": 2.4271, "step": 471400 }, { "epoch": 0.9391535445620298, "grad_norm": 0.2302473485469818, "learning_rate": 0.00030991124493416036, "loss": 2.4396, "step": 471410 }, { "epoch": 0.9391734667856687, "grad_norm": 0.23462165892124176, "learning_rate": 0.00030979291209627213, "loss": 2.4141, "step": 471420 }, { "epoch": 0.9391933890093077, "grad_norm": 0.24476206302642822, "learning_rate": 0.0003096745875423868, "loss": 2.4336, "step": 471430 }, { "epoch": 0.9392133112329466, "grad_norm": 0.2249818593263626, "learning_rate": 0.0003095562712707649, "loss": 2.4178, "step": 471440 }, { "epoch": 0.9392332334565855, "grad_norm": 0.1996001899242401, "learning_rate": 0.0003094379632796671, "loss": 2.4107, "step": 471450 }, { "epoch": 0.9392531556802244, "grad_norm": 0.22733530402183533, "learning_rate": 0.0003093196635673554, "loss": 2.4064, "step": 471460 }, { "epoch": 0.9392730779038633, "grad_norm": 0.22191479802131653, "learning_rate": 0.0003092013721320921, "loss": 2.4333, "step": 471470 }, { "epoch": 0.9392930001275023, "grad_norm": 0.21795713901519775, "learning_rate": 0.00030908308897213986, "loss": 2.4155, "step": 471480 }, { "epoch": 0.9393129223511412, "grad_norm": 0.19705531001091003, "learning_rate": 0.0003089648140857624, "loss": 2.4089, "step": 471490 }, { "epoch": 0.9393328445747801, "grad_norm": 0.2163466364145279, "learning_rate": 0.0003088465474712237, "loss": 2.4115, "step": 471500 }, { "epoch": 0.939352766798419, "grad_norm": 0.2167125940322876, "learning_rate": 0.0003087282891267886, "loss": 2.4292, "step": 471510 }, { "epoch": 0.9393726890220578, "grad_norm": 0.22003990411758423, "learning_rate": 0.00030861003905072204, "loss": 2.4309, "step": 471520 }, { "epoch": 0.9393926112456968, "grad_norm": 0.19594797492027283, "learning_rate": 0.0003084917972412904, "loss": 2.4177, "step": 471530 }, { "epoch": 0.9394125334693357, "grad_norm": 0.23486687242984772, "learning_rate": 0.00030837356369676, "loss": 2.4079, "step": 471540 }, { "epoch": 0.9394324556929746, "grad_norm": 0.22691045701503754, "learning_rate": 0.0003082553384153979, "loss": 2.4177, "step": 471550 }, { "epoch": 0.9394523779166135, "grad_norm": 0.23172537982463837, "learning_rate": 0.000308137121395472, "loss": 2.4148, "step": 471560 }, { "epoch": 0.9394723001402524, "grad_norm": 0.21972157061100006, "learning_rate": 0.0003080189126352506, "loss": 2.4375, "step": 471570 }, { "epoch": 0.9394922223638914, "grad_norm": 0.20079562067985535, "learning_rate": 0.00030790071213300264, "loss": 2.4271, "step": 471580 }, { "epoch": 0.9395121445875303, "grad_norm": 0.218746155500412, "learning_rate": 0.0003077825198869975, "loss": 2.4163, "step": 471590 }, { "epoch": 0.9395320668111692, "grad_norm": 0.21620704233646393, "learning_rate": 0.00030766433589550556, "loss": 2.4191, "step": 471600 }, { "epoch": 0.9395519890348081, "grad_norm": 0.22181686758995056, "learning_rate": 0.0003075461601567977, "loss": 2.4357, "step": 471610 }, { "epoch": 0.939571911258447, "grad_norm": 0.213827446103096, "learning_rate": 0.000307427992669145, "loss": 2.4165, "step": 471620 }, { "epoch": 0.939591833482086, "grad_norm": 0.23370899260044098, "learning_rate": 0.00030730983343081955, "loss": 2.4189, "step": 471630 }, { "epoch": 0.9396117557057249, "grad_norm": 0.2434576451778412, "learning_rate": 0.00030719168244009375, "loss": 2.4185, "step": 471640 }, { "epoch": 0.9396316779293638, "grad_norm": 0.23303215205669403, "learning_rate": 0.0003070735396952411, "loss": 2.4196, "step": 471650 }, { "epoch": 0.9396516001530026, "grad_norm": 0.21897603571414948, "learning_rate": 0.00030695540519453535, "loss": 2.4041, "step": 471660 }, { "epoch": 0.9396715223766416, "grad_norm": 0.21947361528873444, "learning_rate": 0.0003068372789362506, "loss": 2.4167, "step": 471670 }, { "epoch": 0.9396914446002805, "grad_norm": 0.20532876253128052, "learning_rate": 0.00030671916091866213, "loss": 2.3969, "step": 471680 }, { "epoch": 0.9397113668239194, "grad_norm": 0.2121516764163971, "learning_rate": 0.0003066010511400452, "loss": 2.4065, "step": 471690 }, { "epoch": 0.9397312890475583, "grad_norm": 0.20999941229820251, "learning_rate": 0.0003064829495986763, "loss": 2.4257, "step": 471700 }, { "epoch": 0.9397512112711972, "grad_norm": 0.21127374470233917, "learning_rate": 0.00030636485629283183, "loss": 2.4295, "step": 471710 }, { "epoch": 0.9397711334948362, "grad_norm": 0.22476857900619507, "learning_rate": 0.0003062467712207897, "loss": 2.4155, "step": 471720 }, { "epoch": 0.9397910557184751, "grad_norm": 0.19935885071754456, "learning_rate": 0.0003061286943808275, "loss": 2.4143, "step": 471730 }, { "epoch": 0.939810977942114, "grad_norm": 0.20441462099552155, "learning_rate": 0.00030601062577122407, "loss": 2.4185, "step": 471740 }, { "epoch": 0.9398309001657529, "grad_norm": 0.22874271869659424, "learning_rate": 0.0003058925653902585, "loss": 2.4244, "step": 471750 }, { "epoch": 0.9398508223893918, "grad_norm": 0.23444953560829163, "learning_rate": 0.0003057745132362104, "loss": 2.4141, "step": 471760 }, { "epoch": 0.9398707446130308, "grad_norm": 0.21962516009807587, "learning_rate": 0.00030565646930736024, "loss": 2.4236, "step": 471770 }, { "epoch": 0.9398906668366697, "grad_norm": 0.2264406979084015, "learning_rate": 0.0003055384336019893, "loss": 2.4274, "step": 471780 }, { "epoch": 0.9399105890603086, "grad_norm": 0.21476328372955322, "learning_rate": 0.000305420406118379, "loss": 2.4206, "step": 471790 }, { "epoch": 0.9399305112839474, "grad_norm": 0.20479416847229004, "learning_rate": 0.0003053023868548113, "loss": 2.428, "step": 471800 }, { "epoch": 0.9399504335075863, "grad_norm": 0.21415746212005615, "learning_rate": 0.00030518437580956913, "loss": 2.4224, "step": 471810 }, { "epoch": 0.9399703557312253, "grad_norm": 0.20442478358745575, "learning_rate": 0.00030506637298093595, "loss": 2.4077, "step": 471820 }, { "epoch": 0.9399902779548642, "grad_norm": 0.20460373163223267, "learning_rate": 0.0003049483783671956, "loss": 2.4059, "step": 471830 }, { "epoch": 0.9400102001785031, "grad_norm": 0.2177804559469223, "learning_rate": 0.0003048303919666327, "loss": 2.4206, "step": 471840 }, { "epoch": 0.940030122402142, "grad_norm": 0.2174820750951767, "learning_rate": 0.00030471241377753253, "loss": 2.4142, "step": 471850 }, { "epoch": 0.9400500446257809, "grad_norm": 0.22613081336021423, "learning_rate": 0.0003045944437981807, "loss": 2.4149, "step": 471860 }, { "epoch": 0.9400699668494199, "grad_norm": 0.20204217731952667, "learning_rate": 0.0003044764820268637, "loss": 2.4123, "step": 471870 }, { "epoch": 0.9400898890730588, "grad_norm": 0.21046747267246246, "learning_rate": 0.00030435852846186863, "loss": 2.4164, "step": 471880 }, { "epoch": 0.9401098112966977, "grad_norm": 0.2224133163690567, "learning_rate": 0.0003042405831014825, "loss": 2.4262, "step": 471890 }, { "epoch": 0.9401297335203366, "grad_norm": 0.21183796226978302, "learning_rate": 0.0003041226459439941, "loss": 2.4093, "step": 471900 }, { "epoch": 0.9401496557439755, "grad_norm": 0.217070072889328, "learning_rate": 0.00030400471698769163, "loss": 2.4222, "step": 471910 }, { "epoch": 0.9401695779676145, "grad_norm": 0.21845431625843048, "learning_rate": 0.0003038867962308649, "loss": 2.4208, "step": 471920 }, { "epoch": 0.9401895001912534, "grad_norm": 0.20870089530944824, "learning_rate": 0.0003037688836718033, "loss": 2.4172, "step": 471930 }, { "epoch": 0.9402094224148922, "grad_norm": 0.25195199251174927, "learning_rate": 0.00030365097930879804, "loss": 2.4272, "step": 471940 }, { "epoch": 0.9402293446385311, "grad_norm": 0.23571744561195374, "learning_rate": 0.0003035330831401397, "loss": 2.4307, "step": 471950 }, { "epoch": 0.9402492668621701, "grad_norm": 0.21053482592105865, "learning_rate": 0.0003034151951641202, "loss": 2.4225, "step": 471960 }, { "epoch": 0.940269189085809, "grad_norm": 0.2138071060180664, "learning_rate": 0.0003032973153790319, "loss": 2.4026, "step": 471970 }, { "epoch": 0.9402891113094479, "grad_norm": 0.20217899978160858, "learning_rate": 0.00030317944378316787, "loss": 2.4059, "step": 471980 }, { "epoch": 0.9403090335330868, "grad_norm": 0.2066134363412857, "learning_rate": 0.0003030615803748211, "loss": 2.4106, "step": 471990 }, { "epoch": 0.9403289557567257, "grad_norm": 0.23408228158950806, "learning_rate": 0.00030294372515228595, "loss": 2.428, "step": 472000 }, { "epoch": 0.9403488779803647, "grad_norm": 0.22310473024845123, "learning_rate": 0.0003028258781138573, "loss": 2.4212, "step": 472010 }, { "epoch": 0.9403688002040036, "grad_norm": 0.2090727835893631, "learning_rate": 0.00030270803925783006, "loss": 2.4159, "step": 472020 }, { "epoch": 0.9403887224276425, "grad_norm": 0.20474368333816528, "learning_rate": 0.0003025902085825003, "loss": 2.427, "step": 472030 }, { "epoch": 0.9404086446512814, "grad_norm": 0.2134426087141037, "learning_rate": 0.0003024723860861645, "loss": 2.4325, "step": 472040 }, { "epoch": 0.9404285668749203, "grad_norm": 0.21206237375736237, "learning_rate": 0.00030235457176711945, "loss": 2.4127, "step": 472050 }, { "epoch": 0.9404484890985593, "grad_norm": 0.24252386391162872, "learning_rate": 0.0003022367656236633, "loss": 2.4039, "step": 472060 }, { "epoch": 0.9404684113221982, "grad_norm": 0.21167075634002686, "learning_rate": 0.0003021189676540939, "loss": 2.4213, "step": 472070 }, { "epoch": 0.940488333545837, "grad_norm": 0.2215089201927185, "learning_rate": 0.00030200117785671, "loss": 2.4318, "step": 472080 }, { "epoch": 0.9405082557694759, "grad_norm": 0.2222977876663208, "learning_rate": 0.0003018833962298113, "loss": 2.4087, "step": 472090 }, { "epoch": 0.9405281779931148, "grad_norm": 0.23147287964820862, "learning_rate": 0.0003017656227716976, "loss": 2.4119, "step": 472100 }, { "epoch": 0.9405481002167538, "grad_norm": 0.23432013392448425, "learning_rate": 0.0003016478574806696, "loss": 2.4139, "step": 472110 }, { "epoch": 0.9405680224403927, "grad_norm": 0.2480134516954422, "learning_rate": 0.00030153010035502837, "loss": 2.4286, "step": 472120 }, { "epoch": 0.9405879446640316, "grad_norm": 0.222159281373024, "learning_rate": 0.00030141235139307575, "loss": 2.4086, "step": 472130 }, { "epoch": 0.9406078668876705, "grad_norm": 0.20931458473205566, "learning_rate": 0.000301294610593114, "loss": 2.4098, "step": 472140 }, { "epoch": 0.9406277891113094, "grad_norm": 0.20911793410778046, "learning_rate": 0.0003011768779534463, "loss": 2.4257, "step": 472150 }, { "epoch": 0.9406477113349484, "grad_norm": 0.21561557054519653, "learning_rate": 0.00030105915347237587, "loss": 2.4309, "step": 472160 }, { "epoch": 0.9406676335585873, "grad_norm": 0.21260854601860046, "learning_rate": 0.0003009414371482071, "loss": 2.4222, "step": 472170 }, { "epoch": 0.9406875557822262, "grad_norm": 0.22299696505069733, "learning_rate": 0.00030082372897924457, "loss": 2.4256, "step": 472180 }, { "epoch": 0.9407074780058651, "grad_norm": 0.21653693914413452, "learning_rate": 0.0003007060289637935, "loss": 2.4215, "step": 472190 }, { "epoch": 0.940727400229504, "grad_norm": 0.21035897731781006, "learning_rate": 0.00030058833710015987, "loss": 2.409, "step": 472200 }, { "epoch": 0.940747322453143, "grad_norm": 0.22326792776584625, "learning_rate": 0.0003004706533866501, "loss": 2.4038, "step": 472210 }, { "epoch": 0.9407672446767819, "grad_norm": 0.2288367748260498, "learning_rate": 0.0003003529778215715, "loss": 2.4195, "step": 472220 }, { "epoch": 0.9407871669004207, "grad_norm": 0.23412838578224182, "learning_rate": 0.00030023531040323116, "loss": 2.415, "step": 472230 }, { "epoch": 0.9408070891240596, "grad_norm": 0.2020711749792099, "learning_rate": 0.0003001176511299377, "loss": 2.4165, "step": 472240 }, { "epoch": 0.9408270113476986, "grad_norm": 0.20039278268814087, "learning_rate": 0.00030000000000000003, "loss": 2.4308, "step": 472250 }, { "epoch": 0.9408469335713375, "grad_norm": 0.2297537624835968, "learning_rate": 0.00029988235701172727, "loss": 2.4288, "step": 472260 }, { "epoch": 0.9408668557949764, "grad_norm": 0.23086293041706085, "learning_rate": 0.0002997647221634294, "loss": 2.4144, "step": 472270 }, { "epoch": 0.9408867780186153, "grad_norm": 0.24623306095600128, "learning_rate": 0.0002996470954534174, "loss": 2.4045, "step": 472280 }, { "epoch": 0.9409067002422542, "grad_norm": 0.2348618358373642, "learning_rate": 0.0002995294768800019, "loss": 2.4228, "step": 472290 }, { "epoch": 0.9409266224658932, "grad_norm": 0.21445785462856293, "learning_rate": 0.00029941186644149486, "loss": 2.4265, "step": 472300 }, { "epoch": 0.9409465446895321, "grad_norm": 0.227890282869339, "learning_rate": 0.00029929426413620864, "loss": 2.4169, "step": 472310 }, { "epoch": 0.940966466913171, "grad_norm": 0.2115999162197113, "learning_rate": 0.0002991766699624561, "loss": 2.4126, "step": 472320 }, { "epoch": 0.9409863891368099, "grad_norm": 0.2144930213689804, "learning_rate": 0.0002990590839185505, "loss": 2.427, "step": 472330 }, { "epoch": 0.9410063113604488, "grad_norm": 0.2366190105676651, "learning_rate": 0.00029894150600280644, "loss": 2.423, "step": 472340 }, { "epoch": 0.9410262335840878, "grad_norm": 0.2282303422689438, "learning_rate": 0.0002988239362135381, "loss": 2.4267, "step": 472350 }, { "epoch": 0.9410461558077267, "grad_norm": 0.2069673240184784, "learning_rate": 0.000298706374549061, "loss": 2.4148, "step": 472360 }, { "epoch": 0.9410660780313655, "grad_norm": 0.2301611602306366, "learning_rate": 0.00029858882100769057, "loss": 2.4205, "step": 472370 }, { "epoch": 0.9410860002550044, "grad_norm": 0.20317021012306213, "learning_rate": 0.0002984712755877437, "loss": 2.429, "step": 472380 }, { "epoch": 0.9411059224786433, "grad_norm": 0.21834053099155426, "learning_rate": 0.00029835373828753696, "loss": 2.3935, "step": 472390 }, { "epoch": 0.9411258447022823, "grad_norm": 0.2182273119688034, "learning_rate": 0.0002982362091053883, "loss": 2.4247, "step": 472400 }, { "epoch": 0.9411457669259212, "grad_norm": 0.22529439628124237, "learning_rate": 0.0002981186880396154, "loss": 2.407, "step": 472410 }, { "epoch": 0.9411656891495601, "grad_norm": 0.21049083769321442, "learning_rate": 0.0002980011750885374, "loss": 2.4185, "step": 472420 }, { "epoch": 0.941185611373199, "grad_norm": 0.20070405304431915, "learning_rate": 0.00029788367025047305, "loss": 2.3996, "step": 472430 }, { "epoch": 0.9412055335968379, "grad_norm": 0.23313917219638824, "learning_rate": 0.00029776617352374294, "loss": 2.4271, "step": 472440 }, { "epoch": 0.9412254558204769, "grad_norm": 0.23583975434303284, "learning_rate": 0.0002976486849066671, "loss": 2.4327, "step": 472450 }, { "epoch": 0.9412453780441158, "grad_norm": 0.21400776505470276, "learning_rate": 0.00029753120439756666, "loss": 2.4222, "step": 472460 }, { "epoch": 0.9412653002677547, "grad_norm": 0.22061088681221008, "learning_rate": 0.0002974137319947632, "loss": 2.4133, "step": 472470 }, { "epoch": 0.9412852224913936, "grad_norm": 0.21513132750988007, "learning_rate": 0.00029729626769657894, "loss": 2.3972, "step": 472480 }, { "epoch": 0.9413051447150325, "grad_norm": 0.23106344044208527, "learning_rate": 0.00029717881150133677, "loss": 2.42, "step": 472490 }, { "epoch": 0.9413250669386715, "grad_norm": 0.20847107470035553, "learning_rate": 0.00029706136340736, "loss": 2.415, "step": 472500 }, { "epoch": 0.9413449891623104, "grad_norm": 0.22930479049682617, "learning_rate": 0.00029694392341297226, "loss": 2.4335, "step": 472510 }, { "epoch": 0.9413649113859492, "grad_norm": 0.29032471776008606, "learning_rate": 0.00029682649151649865, "loss": 2.425, "step": 472520 }, { "epoch": 0.9413848336095881, "grad_norm": 0.23454146087169647, "learning_rate": 0.0002967090677162636, "loss": 2.414, "step": 472530 }, { "epoch": 0.9414047558332271, "grad_norm": 0.2161671668291092, "learning_rate": 0.0002965916520105931, "loss": 2.4125, "step": 472540 }, { "epoch": 0.941424678056866, "grad_norm": 0.22252440452575684, "learning_rate": 0.0002964742443978137, "loss": 2.4115, "step": 472550 }, { "epoch": 0.9414446002805049, "grad_norm": 0.2008717656135559, "learning_rate": 0.0002963568448762517, "loss": 2.423, "step": 472560 }, { "epoch": 0.9414645225041438, "grad_norm": 0.19931654632091522, "learning_rate": 0.00029623945344423475, "loss": 2.4316, "step": 472570 }, { "epoch": 0.9414844447277827, "grad_norm": 0.2304491400718689, "learning_rate": 0.00029612207010009083, "loss": 2.4194, "step": 472580 }, { "epoch": 0.9415043669514217, "grad_norm": 0.23489496111869812, "learning_rate": 0.00029600469484214844, "loss": 2.4136, "step": 472590 }, { "epoch": 0.9415242891750606, "grad_norm": 0.23026272654533386, "learning_rate": 0.0002958873276687366, "loss": 2.4081, "step": 472600 }, { "epoch": 0.9415442113986995, "grad_norm": 0.22588996589183807, "learning_rate": 0.0002957699685781852, "loss": 2.4005, "step": 472610 }, { "epoch": 0.9415641336223384, "grad_norm": 0.24237488210201263, "learning_rate": 0.00029565261756882435, "loss": 2.4168, "step": 472620 }, { "epoch": 0.9415840558459773, "grad_norm": 0.24594606459140778, "learning_rate": 0.0002955352746389852, "loss": 2.4217, "step": 472630 }, { "epoch": 0.9416039780696163, "grad_norm": 0.2314479947090149, "learning_rate": 0.00029541793978699873, "loss": 2.4149, "step": 472640 }, { "epoch": 0.9416239002932552, "grad_norm": 0.21558800339698792, "learning_rate": 0.00029530061301119727, "loss": 2.4194, "step": 472650 }, { "epoch": 0.941643822516894, "grad_norm": 0.22866939008235931, "learning_rate": 0.0002951832943099131, "loss": 2.4214, "step": 472660 }, { "epoch": 0.9416637447405329, "grad_norm": 0.3547076880931854, "learning_rate": 0.00029506598368147976, "loss": 2.4259, "step": 472670 }, { "epoch": 0.9416836669641718, "grad_norm": 0.22817406058311462, "learning_rate": 0.0002949486811242308, "loss": 2.4259, "step": 472680 }, { "epoch": 0.9417035891878108, "grad_norm": 0.2112693041563034, "learning_rate": 0.00029483138663650044, "loss": 2.4087, "step": 472690 }, { "epoch": 0.9417235114114497, "grad_norm": 0.20386990904808044, "learning_rate": 0.0002947141002166236, "loss": 2.4187, "step": 472700 }, { "epoch": 0.9417434336350886, "grad_norm": 0.23865832388401031, "learning_rate": 0.00029459682186293556, "loss": 2.4137, "step": 472710 }, { "epoch": 0.9417633558587275, "grad_norm": 0.22290480136871338, "learning_rate": 0.0002944795515737726, "loss": 2.4099, "step": 472720 }, { "epoch": 0.9417832780823664, "grad_norm": 0.22195599973201752, "learning_rate": 0.0002943622893474711, "loss": 2.4243, "step": 472730 }, { "epoch": 0.9418032003060054, "grad_norm": 0.2169499546289444, "learning_rate": 0.00029424503518236825, "loss": 2.4086, "step": 472740 }, { "epoch": 0.9418231225296443, "grad_norm": 0.21280841529369354, "learning_rate": 0.00029412778907680193, "loss": 2.4115, "step": 472750 }, { "epoch": 0.9418430447532832, "grad_norm": 0.20835405588150024, "learning_rate": 0.00029401055102911024, "loss": 2.4149, "step": 472760 }, { "epoch": 0.9418629669769221, "grad_norm": 0.22276504337787628, "learning_rate": 0.00029389332103763223, "loss": 2.4209, "step": 472770 }, { "epoch": 0.941882889200561, "grad_norm": 0.22302810847759247, "learning_rate": 0.000293776099100707, "loss": 2.4138, "step": 472780 }, { "epoch": 0.9419028114242, "grad_norm": 0.2102380245923996, "learning_rate": 0.00029365888521667505, "loss": 2.4124, "step": 472790 }, { "epoch": 0.9419227336478389, "grad_norm": 0.2223755270242691, "learning_rate": 0.00029354167938387677, "loss": 2.4156, "step": 472800 }, { "epoch": 0.9419426558714777, "grad_norm": 0.30689537525177, "learning_rate": 0.00029342448160065305, "loss": 2.3983, "step": 472810 }, { "epoch": 0.9419625780951166, "grad_norm": 0.24861320853233337, "learning_rate": 0.0002933072918653459, "loss": 2.422, "step": 472820 }, { "epoch": 0.9419825003187556, "grad_norm": 0.2179565578699112, "learning_rate": 0.0002931901101762975, "loss": 2.4293, "step": 472830 }, { "epoch": 0.9420024225423945, "grad_norm": 0.219508096575737, "learning_rate": 0.0002930729365318505, "loss": 2.4239, "step": 472840 }, { "epoch": 0.9420223447660334, "grad_norm": 0.22199387848377228, "learning_rate": 0.00029295577093034876, "loss": 2.4129, "step": 472850 }, { "epoch": 0.9420422669896723, "grad_norm": 0.23687869310379028, "learning_rate": 0.00029283861337013595, "loss": 2.4334, "step": 472860 }, { "epoch": 0.9420621892133112, "grad_norm": 0.21880586445331573, "learning_rate": 0.00029272146384955676, "loss": 2.415, "step": 472870 }, { "epoch": 0.9420821114369502, "grad_norm": 0.1940450817346573, "learning_rate": 0.00029260432236695634, "loss": 2.4099, "step": 472880 }, { "epoch": 0.9421020336605891, "grad_norm": 0.22590087354183197, "learning_rate": 0.0002924871889206804, "loss": 2.4169, "step": 472890 }, { "epoch": 0.942121955884228, "grad_norm": 0.21200889348983765, "learning_rate": 0.00029237006350907514, "loss": 2.4147, "step": 472900 }, { "epoch": 0.9421418781078669, "grad_norm": 0.21254925429821014, "learning_rate": 0.0002922529461304875, "loss": 2.4123, "step": 472910 }, { "epoch": 0.9421618003315058, "grad_norm": 0.2327783703804016, "learning_rate": 0.0002921358367832645, "loss": 2.4182, "step": 472920 }, { "epoch": 0.9421817225551448, "grad_norm": 0.2204168140888214, "learning_rate": 0.00029201873546575466, "loss": 2.4149, "step": 472930 }, { "epoch": 0.9422016447787837, "grad_norm": 0.20934195816516876, "learning_rate": 0.0002919016421763061, "loss": 2.4191, "step": 472940 }, { "epoch": 0.9422215670024225, "grad_norm": 0.21067506074905396, "learning_rate": 0.000291784556913268, "loss": 2.41, "step": 472950 }, { "epoch": 0.9422414892260614, "grad_norm": 0.22009049355983734, "learning_rate": 0.00029166747967499006, "loss": 2.4266, "step": 472960 }, { "epoch": 0.9422614114497003, "grad_norm": 0.19747458398342133, "learning_rate": 0.00029155041045982277, "loss": 2.428, "step": 472970 }, { "epoch": 0.9422813336733393, "grad_norm": 0.22244220972061157, "learning_rate": 0.00029143334926611653, "loss": 2.4268, "step": 472980 }, { "epoch": 0.9423012558969782, "grad_norm": 0.22100992500782013, "learning_rate": 0.00029131629609222286, "loss": 2.4195, "step": 472990 }, { "epoch": 0.9423211781206171, "grad_norm": 0.2007385641336441, "learning_rate": 0.0002911992509364938, "loss": 2.4306, "step": 473000 }, { "epoch": 0.942341100344256, "grad_norm": 0.21285642683506012, "learning_rate": 0.0002910822137972817, "loss": 2.4236, "step": 473010 }, { "epoch": 0.9423610225678949, "grad_norm": 0.2184431254863739, "learning_rate": 0.00029096518467293956, "loss": 2.4003, "step": 473020 }, { "epoch": 0.9423809447915339, "grad_norm": 0.24007487297058105, "learning_rate": 0.00029084816356182095, "loss": 2.4265, "step": 473030 }, { "epoch": 0.9424008670151728, "grad_norm": 0.22957827150821686, "learning_rate": 0.0002907311504622803, "loss": 2.4197, "step": 473040 }, { "epoch": 0.9424207892388117, "grad_norm": 0.20469430088996887, "learning_rate": 0.00029061414537267204, "loss": 2.411, "step": 473050 }, { "epoch": 0.9424407114624506, "grad_norm": 0.21947501599788666, "learning_rate": 0.0002904971482913519, "loss": 2.4177, "step": 473060 }, { "epoch": 0.9424606336860895, "grad_norm": 0.2285996973514557, "learning_rate": 0.0002903801592166755, "loss": 2.4102, "step": 473070 }, { "epoch": 0.9424805559097285, "grad_norm": 0.24031402170658112, "learning_rate": 0.0002902631781469991, "loss": 2.4106, "step": 473080 }, { "epoch": 0.9425004781333673, "grad_norm": 0.2425404042005539, "learning_rate": 0.00029014620508068, "loss": 2.4125, "step": 473090 }, { "epoch": 0.9425204003570062, "grad_norm": 0.22708499431610107, "learning_rate": 0.00029002924001607556, "loss": 2.4226, "step": 473100 }, { "epoch": 0.9425403225806451, "grad_norm": 0.20868101716041565, "learning_rate": 0.00028991228295154416, "loss": 2.4307, "step": 473110 }, { "epoch": 0.942560244804284, "grad_norm": 0.23283454775810242, "learning_rate": 0.000289795333885444, "loss": 2.4084, "step": 473120 }, { "epoch": 0.942580167027923, "grad_norm": 0.23245441913604736, "learning_rate": 0.00028967839281613484, "loss": 2.415, "step": 473130 }, { "epoch": 0.9426000892515619, "grad_norm": 0.23020631074905396, "learning_rate": 0.000289561459741976, "loss": 2.4363, "step": 473140 }, { "epoch": 0.9426200114752008, "grad_norm": 0.22308778762817383, "learning_rate": 0.00028944453466132815, "loss": 2.4289, "step": 473150 }, { "epoch": 0.9426399336988397, "grad_norm": 0.20054484903812408, "learning_rate": 0.0002893276175725523, "loss": 2.4206, "step": 473160 }, { "epoch": 0.9426598559224787, "grad_norm": 0.21925821900367737, "learning_rate": 0.00028921070847400963, "loss": 2.4331, "step": 473170 }, { "epoch": 0.9426797781461176, "grad_norm": 0.21716435253620148, "learning_rate": 0.0002890938073640623, "loss": 2.4216, "step": 473180 }, { "epoch": 0.9426997003697565, "grad_norm": 0.21893930435180664, "learning_rate": 0.0002889769142410732, "loss": 2.4197, "step": 473190 }, { "epoch": 0.9427196225933954, "grad_norm": 0.22379352152347565, "learning_rate": 0.0002888600291034049, "loss": 2.4094, "step": 473200 }, { "epoch": 0.9427395448170343, "grad_norm": 0.2179010808467865, "learning_rate": 0.0002887431519494217, "loss": 2.4223, "step": 473210 }, { "epoch": 0.9427594670406733, "grad_norm": 0.21188470721244812, "learning_rate": 0.00028862628277748773, "loss": 2.4161, "step": 473220 }, { "epoch": 0.9427793892643122, "grad_norm": 0.22723978757858276, "learning_rate": 0.00028850942158596736, "loss": 2.4197, "step": 473230 }, { "epoch": 0.942799311487951, "grad_norm": 0.21876773238182068, "learning_rate": 0.00028839256837322647, "loss": 2.4238, "step": 473240 }, { "epoch": 0.9428192337115899, "grad_norm": 0.2119964212179184, "learning_rate": 0.00028827572313763094, "loss": 2.4194, "step": 473250 }, { "epoch": 0.9428391559352288, "grad_norm": 0.2264743596315384, "learning_rate": 0.00028815888587754726, "loss": 2.4241, "step": 473260 }, { "epoch": 0.9428590781588678, "grad_norm": 0.2052369862794876, "learning_rate": 0.00028804205659134263, "loss": 2.4169, "step": 473270 }, { "epoch": 0.9428790003825067, "grad_norm": 0.22611171007156372, "learning_rate": 0.00028792523527738424, "loss": 2.4126, "step": 473280 }, { "epoch": 0.9428989226061456, "grad_norm": 0.24538575112819672, "learning_rate": 0.0002878084219340407, "loss": 2.4211, "step": 473290 }, { "epoch": 0.9429188448297845, "grad_norm": 0.22599004209041595, "learning_rate": 0.00028769161655968056, "loss": 2.4226, "step": 473300 }, { "epoch": 0.9429387670534234, "grad_norm": 0.2507641315460205, "learning_rate": 0.00028757481915267323, "loss": 2.4138, "step": 473310 }, { "epoch": 0.9429586892770624, "grad_norm": 0.2390206754207611, "learning_rate": 0.00028745802971138844, "loss": 2.4076, "step": 473320 }, { "epoch": 0.9429786115007013, "grad_norm": 0.22366154193878174, "learning_rate": 0.0002873412482341966, "loss": 2.4129, "step": 473330 }, { "epoch": 0.9429985337243402, "grad_norm": 0.21959836781024933, "learning_rate": 0.00028722447471946853, "loss": 2.4109, "step": 473340 }, { "epoch": 0.9430184559479791, "grad_norm": 0.23537826538085938, "learning_rate": 0.0002871077091655763, "loss": 2.406, "step": 473350 }, { "epoch": 0.943038378171618, "grad_norm": 0.20077168941497803, "learning_rate": 0.0002869909515708915, "loss": 2.4275, "step": 473360 }, { "epoch": 0.943058300395257, "grad_norm": 0.22895145416259766, "learning_rate": 0.00028687420193378665, "loss": 2.4143, "step": 473370 }, { "epoch": 0.9430782226188958, "grad_norm": 0.22815990447998047, "learning_rate": 0.0002867574602526355, "loss": 2.4149, "step": 473380 }, { "epoch": 0.9430981448425347, "grad_norm": 0.20883196592330933, "learning_rate": 0.0002866407265258113, "loss": 2.41, "step": 473390 }, { "epoch": 0.9431180670661736, "grad_norm": 0.25104349851608276, "learning_rate": 0.0002865240007516885, "loss": 2.4072, "step": 473400 }, { "epoch": 0.9431379892898125, "grad_norm": 0.23530155420303345, "learning_rate": 0.0002864072829286417, "loss": 2.4204, "step": 473410 }, { "epoch": 0.9431579115134515, "grad_norm": 0.2202361524105072, "learning_rate": 0.0002862905730550469, "loss": 2.41, "step": 473420 }, { "epoch": 0.9431778337370904, "grad_norm": 0.23787036538124084, "learning_rate": 0.0002861738711292794, "loss": 2.4111, "step": 473430 }, { "epoch": 0.9431977559607293, "grad_norm": 0.21356210112571716, "learning_rate": 0.000286057177149716, "loss": 2.4238, "step": 473440 }, { "epoch": 0.9432176781843682, "grad_norm": 0.2128792107105255, "learning_rate": 0.0002859404911147336, "loss": 2.4187, "step": 473450 }, { "epoch": 0.9432376004080072, "grad_norm": 0.22466543316841125, "learning_rate": 0.00028582381302271024, "loss": 2.4245, "step": 473460 }, { "epoch": 0.9432575226316461, "grad_norm": 0.2299293726682663, "learning_rate": 0.0002857071428720237, "loss": 2.4143, "step": 473470 }, { "epoch": 0.943277444855285, "grad_norm": 0.23599308729171753, "learning_rate": 0.00028559048066105274, "loss": 2.4066, "step": 473480 }, { "epoch": 0.9432973670789239, "grad_norm": 0.22643794119358063, "learning_rate": 0.00028547382638817665, "loss": 2.4026, "step": 473490 }, { "epoch": 0.9433172893025628, "grad_norm": 0.2120962142944336, "learning_rate": 0.0002853571800517754, "loss": 2.4189, "step": 473500 }, { "epoch": 0.9433372115262018, "grad_norm": 0.22370736300945282, "learning_rate": 0.00028524054165022904, "loss": 2.4117, "step": 473510 }, { "epoch": 0.9433571337498406, "grad_norm": 0.23277413845062256, "learning_rate": 0.00028512391118191863, "loss": 2.4303, "step": 473520 }, { "epoch": 0.9433770559734795, "grad_norm": 0.23753035068511963, "learning_rate": 0.0002850072886452257, "loss": 2.4279, "step": 473530 }, { "epoch": 0.9433969781971184, "grad_norm": 0.20634061098098755, "learning_rate": 0.0002848906740385322, "loss": 2.4233, "step": 473540 }, { "epoch": 0.9434169004207573, "grad_norm": 0.24289917945861816, "learning_rate": 0.0002847740673602208, "loss": 2.4054, "step": 473550 }, { "epoch": 0.9434368226443963, "grad_norm": 0.221888929605484, "learning_rate": 0.0002846574686086747, "loss": 2.417, "step": 473560 }, { "epoch": 0.9434567448680352, "grad_norm": 0.2307872176170349, "learning_rate": 0.0002845408777822771, "loss": 2.4137, "step": 473570 }, { "epoch": 0.9434766670916741, "grad_norm": 0.22777198255062103, "learning_rate": 0.0002844242948794127, "loss": 2.4118, "step": 473580 }, { "epoch": 0.943496589315313, "grad_norm": 0.22931945323944092, "learning_rate": 0.0002843077198984663, "loss": 2.4126, "step": 473590 }, { "epoch": 0.9435165115389519, "grad_norm": 0.2222311943769455, "learning_rate": 0.0002841911528378227, "loss": 2.4141, "step": 473600 }, { "epoch": 0.9435364337625909, "grad_norm": 0.2198338806629181, "learning_rate": 0.0002840745936958682, "loss": 2.4116, "step": 473610 }, { "epoch": 0.9435563559862298, "grad_norm": 0.2182711362838745, "learning_rate": 0.00028395804247098914, "loss": 2.4273, "step": 473620 }, { "epoch": 0.9435762782098687, "grad_norm": 0.22038239240646362, "learning_rate": 0.00028384149916157233, "loss": 2.4116, "step": 473630 }, { "epoch": 0.9435962004335076, "grad_norm": 0.23168118298053741, "learning_rate": 0.0002837249637660051, "loss": 2.4263, "step": 473640 }, { "epoch": 0.9436161226571464, "grad_norm": 0.2138950526714325, "learning_rate": 0.0002836084362826761, "loss": 2.4197, "step": 473650 }, { "epoch": 0.9436360448807855, "grad_norm": 0.21808621287345886, "learning_rate": 0.00028349191670997364, "loss": 2.4223, "step": 473660 }, { "epoch": 0.9436559671044243, "grad_norm": 0.21720103919506073, "learning_rate": 0.00028337540504628665, "loss": 2.4115, "step": 473670 }, { "epoch": 0.9436758893280632, "grad_norm": 0.22765031456947327, "learning_rate": 0.0002832589012900053, "loss": 2.4137, "step": 473680 }, { "epoch": 0.9436958115517021, "grad_norm": 0.222342848777771, "learning_rate": 0.0002831424054395193, "loss": 2.4108, "step": 473690 }, { "epoch": 0.943715733775341, "grad_norm": 0.28370991349220276, "learning_rate": 0.0002830259174932197, "loss": 2.4121, "step": 473700 }, { "epoch": 0.94373565599898, "grad_norm": 0.22338128089904785, "learning_rate": 0.0002829094374494978, "loss": 2.4145, "step": 473710 }, { "epoch": 0.9437555782226189, "grad_norm": 0.2435120940208435, "learning_rate": 0.0002827929653067456, "loss": 2.4149, "step": 473720 }, { "epoch": 0.9437755004462578, "grad_norm": 0.2195577323436737, "learning_rate": 0.0002826765010633554, "loss": 2.4064, "step": 473730 }, { "epoch": 0.9437954226698967, "grad_norm": 0.2137717455625534, "learning_rate": 0.0002825600447177199, "loss": 2.4164, "step": 473740 }, { "epoch": 0.9438153448935357, "grad_norm": 0.2259376347064972, "learning_rate": 0.00028244359626823325, "loss": 2.4079, "step": 473750 }, { "epoch": 0.9438352671171746, "grad_norm": 0.20979246497154236, "learning_rate": 0.0002823271557132889, "loss": 2.4042, "step": 473760 }, { "epoch": 0.9438551893408135, "grad_norm": 0.23017607629299164, "learning_rate": 0.0002822107230512818, "loss": 2.4094, "step": 473770 }, { "epoch": 0.9438751115644524, "grad_norm": 0.2251071184873581, "learning_rate": 0.00028209429828060696, "loss": 2.4052, "step": 473780 }, { "epoch": 0.9438950337880913, "grad_norm": 0.19593773782253265, "learning_rate": 0.0002819778813996603, "loss": 2.4239, "step": 473790 }, { "epoch": 0.9439149560117303, "grad_norm": 0.21638493239879608, "learning_rate": 0.00028186147240683756, "loss": 2.413, "step": 473800 }, { "epoch": 0.9439348782353691, "grad_norm": 0.2230331152677536, "learning_rate": 0.0002817450713005361, "loss": 2.4139, "step": 473810 }, { "epoch": 0.943954800459008, "grad_norm": 0.21790890395641327, "learning_rate": 0.0002816286780791528, "loss": 2.4184, "step": 473820 }, { "epoch": 0.9439747226826469, "grad_norm": 0.22372564673423767, "learning_rate": 0.0002815122927410858, "loss": 2.419, "step": 473830 }, { "epoch": 0.9439946449062858, "grad_norm": 0.22976435720920563, "learning_rate": 0.0002813959152847332, "loss": 2.4345, "step": 473840 }, { "epoch": 0.9440145671299248, "grad_norm": 0.21959751844406128, "learning_rate": 0.00028127954570849424, "loss": 2.4205, "step": 473850 }, { "epoch": 0.9440344893535637, "grad_norm": 0.21412959694862366, "learning_rate": 0.00028116318401076823, "loss": 2.4116, "step": 473860 }, { "epoch": 0.9440544115772026, "grad_norm": 0.21432644128799438, "learning_rate": 0.0002810468301899554, "loss": 2.4147, "step": 473870 }, { "epoch": 0.9440743338008415, "grad_norm": 0.23854465782642365, "learning_rate": 0.0002809304842444562, "loss": 2.3963, "step": 473880 }, { "epoch": 0.9440942560244804, "grad_norm": 0.24345390498638153, "learning_rate": 0.0002808141461726716, "loss": 2.4054, "step": 473890 }, { "epoch": 0.9441141782481194, "grad_norm": 0.2160249501466751, "learning_rate": 0.0002806978159730036, "loss": 2.416, "step": 473900 }, { "epoch": 0.9441341004717583, "grad_norm": 0.20730334520339966, "learning_rate": 0.000280581493643854, "loss": 2.4167, "step": 473910 }, { "epoch": 0.9441540226953972, "grad_norm": 0.22590003907680511, "learning_rate": 0.0002804651791836259, "loss": 2.4254, "step": 473920 }, { "epoch": 0.9441739449190361, "grad_norm": 0.208446204662323, "learning_rate": 0.00028034887259072237, "loss": 2.4099, "step": 473930 }, { "epoch": 0.944193867142675, "grad_norm": 0.22087110579013824, "learning_rate": 0.0002802325738635472, "loss": 2.4241, "step": 473940 }, { "epoch": 0.944213789366314, "grad_norm": 0.23538804054260254, "learning_rate": 0.0002801162830005046, "loss": 2.4255, "step": 473950 }, { "epoch": 0.9442337115899528, "grad_norm": 0.22084425389766693, "learning_rate": 0.00028000000000000003, "loss": 2.4119, "step": 473960 }, { "epoch": 0.9442536338135917, "grad_norm": 0.22611486911773682, "learning_rate": 0.00027988372486043847, "loss": 2.4222, "step": 473970 }, { "epoch": 0.9442735560372306, "grad_norm": 0.23311321437358856, "learning_rate": 0.0002797674575802258, "loss": 2.4167, "step": 473980 }, { "epoch": 0.9442934782608695, "grad_norm": 0.23447374999523163, "learning_rate": 0.00027965119815776896, "loss": 2.4187, "step": 473990 }, { "epoch": 0.9443134004845085, "grad_norm": 0.20900997519493103, "learning_rate": 0.0002795349465914747, "loss": 2.4247, "step": 474000 }, { "epoch": 0.9443333227081474, "grad_norm": 0.21887673437595367, "learning_rate": 0.0002794187028797506, "loss": 2.4291, "step": 474010 }, { "epoch": 0.9443532449317863, "grad_norm": 0.2168893814086914, "learning_rate": 0.0002793024670210049, "loss": 2.4099, "step": 474020 }, { "epoch": 0.9443731671554252, "grad_norm": 0.20202067494392395, "learning_rate": 0.0002791862390136461, "loss": 2.4203, "step": 474030 }, { "epoch": 0.9443930893790642, "grad_norm": 0.22871367633342743, "learning_rate": 0.00027907001885608376, "loss": 2.4172, "step": 474040 }, { "epoch": 0.9444130116027031, "grad_norm": 0.22457073628902435, "learning_rate": 0.0002789538065467272, "loss": 2.4098, "step": 474050 }, { "epoch": 0.944432933826342, "grad_norm": 0.22514089941978455, "learning_rate": 0.0002788376020839869, "loss": 2.402, "step": 474060 }, { "epoch": 0.9444528560499809, "grad_norm": 0.2390960156917572, "learning_rate": 0.0002787214054662737, "loss": 2.4178, "step": 474070 }, { "epoch": 0.9444727782736198, "grad_norm": 0.2286260724067688, "learning_rate": 0.000278605216691999, "loss": 2.3991, "step": 474080 }, { "epoch": 0.9444927004972588, "grad_norm": 0.22503317892551422, "learning_rate": 0.00027848903575957443, "loss": 2.4156, "step": 474090 }, { "epoch": 0.9445126227208976, "grad_norm": 0.25512585043907166, "learning_rate": 0.0002783728626674125, "loss": 2.4235, "step": 474100 }, { "epoch": 0.9445325449445365, "grad_norm": 0.23715631663799286, "learning_rate": 0.00027825669741392647, "loss": 2.4184, "step": 474110 }, { "epoch": 0.9445524671681754, "grad_norm": 0.2385823279619217, "learning_rate": 0.0002781405399975294, "loss": 2.4263, "step": 474120 }, { "epoch": 0.9445723893918143, "grad_norm": 1.0221203565597534, "learning_rate": 0.0002780243904166355, "loss": 2.4139, "step": 474130 }, { "epoch": 0.9445923116154533, "grad_norm": 0.226094588637352, "learning_rate": 0.00027790824866965935, "loss": 2.4158, "step": 474140 }, { "epoch": 0.9446122338390922, "grad_norm": 0.21682673692703247, "learning_rate": 0.000277792114755016, "loss": 2.4263, "step": 474150 }, { "epoch": 0.9446321560627311, "grad_norm": 0.2244999259710312, "learning_rate": 0.0002776759886711211, "loss": 2.4222, "step": 474160 }, { "epoch": 0.94465207828637, "grad_norm": 0.2230820208787918, "learning_rate": 0.00027755987041639085, "loss": 2.4123, "step": 474170 }, { "epoch": 0.9446720005100089, "grad_norm": 0.20851287245750427, "learning_rate": 0.0002774437599892421, "loss": 2.4283, "step": 474180 }, { "epoch": 0.9446919227336479, "grad_norm": 0.21488967537879944, "learning_rate": 0.0002773276573880916, "loss": 2.402, "step": 474190 }, { "epoch": 0.9447118449572868, "grad_norm": 0.22763462364673615, "learning_rate": 0.0002772115626113576, "loss": 2.4177, "step": 474200 }, { "epoch": 0.9447317671809257, "grad_norm": 0.21204820275306702, "learning_rate": 0.0002770954756574582, "loss": 2.4142, "step": 474210 }, { "epoch": 0.9447516894045646, "grad_norm": 0.2197425812482834, "learning_rate": 0.0002769793965248124, "loss": 2.4144, "step": 474220 }, { "epoch": 0.9447716116282034, "grad_norm": 0.27111080288887024, "learning_rate": 0.0002768633252118391, "loss": 2.4215, "step": 474230 }, { "epoch": 0.9447915338518424, "grad_norm": 0.22405527532100677, "learning_rate": 0.00027674726171695885, "loss": 2.4243, "step": 474240 }, { "epoch": 0.9448114560754813, "grad_norm": 0.2159460037946701, "learning_rate": 0.00027663120603859135, "loss": 2.4276, "step": 474250 }, { "epoch": 0.9448313782991202, "grad_norm": 0.23135848343372345, "learning_rate": 0.00027651515817515816, "loss": 2.3961, "step": 474260 }, { "epoch": 0.9448513005227591, "grad_norm": 0.21315442025661469, "learning_rate": 0.00027639911812508044, "loss": 2.4215, "step": 474270 }, { "epoch": 0.944871222746398, "grad_norm": 0.2114032655954361, "learning_rate": 0.00027628308588678043, "loss": 2.4185, "step": 474280 }, { "epoch": 0.944891144970037, "grad_norm": 0.1996901035308838, "learning_rate": 0.00027616706145868066, "loss": 2.4213, "step": 474290 }, { "epoch": 0.9449110671936759, "grad_norm": 0.20535427331924438, "learning_rate": 0.00027605104483920416, "loss": 2.413, "step": 474300 }, { "epoch": 0.9449309894173148, "grad_norm": 0.29893580079078674, "learning_rate": 0.0002759350360267743, "loss": 2.4155, "step": 474310 }, { "epoch": 0.9449509116409537, "grad_norm": 0.21497777104377747, "learning_rate": 0.0002758190350198155, "loss": 2.4182, "step": 474320 }, { "epoch": 0.9449708338645927, "grad_norm": 0.22491930425167084, "learning_rate": 0.00027570304181675256, "loss": 2.4153, "step": 474330 }, { "epoch": 0.9449907560882316, "grad_norm": 0.21268731355667114, "learning_rate": 0.00027558705641601036, "loss": 2.4103, "step": 474340 }, { "epoch": 0.9450106783118705, "grad_norm": 0.20465438067913055, "learning_rate": 0.00027547107881601465, "loss": 2.4203, "step": 474350 }, { "epoch": 0.9450306005355094, "grad_norm": 0.21357093751430511, "learning_rate": 0.000275355109015192, "loss": 2.402, "step": 474360 }, { "epoch": 0.9450505227591482, "grad_norm": 0.226933091878891, "learning_rate": 0.00027523914701196904, "loss": 2.4213, "step": 474370 }, { "epoch": 0.9450704449827872, "grad_norm": 0.22814378142356873, "learning_rate": 0.00027512319280477307, "loss": 2.4177, "step": 474380 }, { "epoch": 0.9450903672064261, "grad_norm": 0.22270531952381134, "learning_rate": 0.0002750072463920319, "loss": 2.4055, "step": 474390 }, { "epoch": 0.945110289430065, "grad_norm": 0.23416773974895477, "learning_rate": 0.000274891307772174, "loss": 2.4327, "step": 474400 }, { "epoch": 0.9451302116537039, "grad_norm": 0.22789382934570312, "learning_rate": 0.0002747753769436283, "loss": 2.3932, "step": 474410 }, { "epoch": 0.9451501338773428, "grad_norm": 0.21552981436252594, "learning_rate": 0.00027465945390482417, "loss": 2.4289, "step": 474420 }, { "epoch": 0.9451700561009818, "grad_norm": 0.22647322714328766, "learning_rate": 0.0002745435386541915, "loss": 2.412, "step": 474430 }, { "epoch": 0.9451899783246207, "grad_norm": 0.21524696052074432, "learning_rate": 0.0002744276311901608, "loss": 2.4165, "step": 474440 }, { "epoch": 0.9452099005482596, "grad_norm": 0.2130373865365982, "learning_rate": 0.00027431173151116296, "loss": 2.4165, "step": 474450 }, { "epoch": 0.9452298227718985, "grad_norm": 0.22645124793052673, "learning_rate": 0.0002741958396156301, "loss": 2.4127, "step": 474460 }, { "epoch": 0.9452497449955374, "grad_norm": 0.22973787784576416, "learning_rate": 0.0002740799555019937, "loss": 2.4077, "step": 474470 }, { "epoch": 0.9452696672191764, "grad_norm": 0.21357087790966034, "learning_rate": 0.0002739640791686866, "loss": 2.4138, "step": 474480 }, { "epoch": 0.9452895894428153, "grad_norm": 0.2061772346496582, "learning_rate": 0.0002738482106141418, "loss": 2.4293, "step": 474490 }, { "epoch": 0.9453095116664542, "grad_norm": 0.23910439014434814, "learning_rate": 0.00027373234983679316, "loss": 2.4261, "step": 474500 }, { "epoch": 0.945329433890093, "grad_norm": 0.24852816760540009, "learning_rate": 0.0002736164968350747, "loss": 2.425, "step": 474510 }, { "epoch": 0.9453493561137319, "grad_norm": 0.21302242577075958, "learning_rate": 0.000273500651607421, "loss": 2.4098, "step": 474520 }, { "epoch": 0.9453692783373709, "grad_norm": 0.2086244374513626, "learning_rate": 0.00027338481415226746, "loss": 2.4375, "step": 474530 }, { "epoch": 0.9453892005610098, "grad_norm": 0.20836065709590912, "learning_rate": 0.00027326898446804985, "loss": 2.4242, "step": 474540 }, { "epoch": 0.9454091227846487, "grad_norm": 0.21478918194770813, "learning_rate": 0.00027315316255320423, "loss": 2.4219, "step": 474550 }, { "epoch": 0.9454290450082876, "grad_norm": 0.26138991117477417, "learning_rate": 0.0002730373484061677, "loss": 2.4188, "step": 474560 }, { "epoch": 0.9454489672319265, "grad_norm": 0.2378511130809784, "learning_rate": 0.00027292154202537746, "loss": 2.4291, "step": 474570 }, { "epoch": 0.9454688894555655, "grad_norm": 0.23447957634925842, "learning_rate": 0.00027280574340927125, "loss": 2.3975, "step": 474580 }, { "epoch": 0.9454888116792044, "grad_norm": 0.22717861831188202, "learning_rate": 0.0002726899525562876, "loss": 2.4078, "step": 474590 }, { "epoch": 0.9455087339028433, "grad_norm": 0.2146306335926056, "learning_rate": 0.00027257416946486535, "loss": 2.4101, "step": 474600 }, { "epoch": 0.9455286561264822, "grad_norm": 0.24288113415241241, "learning_rate": 0.00027245839413344376, "loss": 2.415, "step": 474610 }, { "epoch": 0.9455485783501211, "grad_norm": 0.2121889889240265, "learning_rate": 0.0002723426265604629, "loss": 2.4037, "step": 474620 }, { "epoch": 0.9455685005737601, "grad_norm": 0.2442106008529663, "learning_rate": 0.0002722268667443635, "loss": 2.4151, "step": 474630 }, { "epoch": 0.945588422797399, "grad_norm": 0.22516246140003204, "learning_rate": 0.00027211111468358594, "loss": 2.4121, "step": 474640 }, { "epoch": 0.9456083450210379, "grad_norm": 0.2365887612104416, "learning_rate": 0.00027199537037657205, "loss": 2.4228, "step": 474650 }, { "epoch": 0.9456282672446767, "grad_norm": 0.2301388531923294, "learning_rate": 0.00027187963382176416, "loss": 2.4275, "step": 474660 }, { "epoch": 0.9456481894683157, "grad_norm": 0.22610396146774292, "learning_rate": 0.0002717639050176044, "loss": 2.4089, "step": 474670 }, { "epoch": 0.9456681116919546, "grad_norm": 0.21262601017951965, "learning_rate": 0.000271648183962536, "loss": 2.4141, "step": 474680 }, { "epoch": 0.9456880339155935, "grad_norm": 0.20882020890712738, "learning_rate": 0.00027153247065500244, "loss": 2.4209, "step": 474690 }, { "epoch": 0.9457079561392324, "grad_norm": 0.22630257904529572, "learning_rate": 0.00027141676509344803, "loss": 2.4156, "step": 474700 }, { "epoch": 0.9457278783628713, "grad_norm": 0.22090449929237366, "learning_rate": 0.0002713010672763172, "loss": 2.415, "step": 474710 }, { "epoch": 0.9457478005865103, "grad_norm": 0.2181393802165985, "learning_rate": 0.00027118537720205536, "loss": 2.4348, "step": 474720 }, { "epoch": 0.9457677228101492, "grad_norm": 0.2173081338405609, "learning_rate": 0.0002710696948691078, "loss": 2.4108, "step": 474730 }, { "epoch": 0.9457876450337881, "grad_norm": 0.22360505163669586, "learning_rate": 0.0002709540202759211, "loss": 2.4215, "step": 474740 }, { "epoch": 0.945807567257427, "grad_norm": 0.21430818736553192, "learning_rate": 0.0002708383534209418, "loss": 2.4079, "step": 474750 }, { "epoch": 0.9458274894810659, "grad_norm": 0.22519131004810333, "learning_rate": 0.00027072269430261707, "loss": 2.4066, "step": 474760 }, { "epoch": 0.9458474117047049, "grad_norm": 0.23559261858463287, "learning_rate": 0.00027060704291939473, "loss": 2.42, "step": 474770 }, { "epoch": 0.9458673339283438, "grad_norm": 0.24209202826023102, "learning_rate": 0.0002704913992697231, "loss": 2.419, "step": 474780 }, { "epoch": 0.9458872561519827, "grad_norm": 0.20999930799007416, "learning_rate": 0.00027037576335205116, "loss": 2.4045, "step": 474790 }, { "epoch": 0.9459071783756215, "grad_norm": 0.23936139047145844, "learning_rate": 0.0002702601351648277, "loss": 2.4245, "step": 474800 }, { "epoch": 0.9459271005992604, "grad_norm": 0.22874745726585388, "learning_rate": 0.00027014451470650294, "loss": 2.4154, "step": 474810 }, { "epoch": 0.9459470228228994, "grad_norm": 0.32027921080589294, "learning_rate": 0.0002700289019755273, "loss": 2.4134, "step": 474820 }, { "epoch": 0.9459669450465383, "grad_norm": 0.22088396549224854, "learning_rate": 0.0002699132969703513, "loss": 2.4163, "step": 474830 }, { "epoch": 0.9459868672701772, "grad_norm": 0.21225382387638092, "learning_rate": 0.00026979769968942647, "loss": 2.4057, "step": 474840 }, { "epoch": 0.9460067894938161, "grad_norm": 0.2265920490026474, "learning_rate": 0.0002696821101312048, "loss": 2.4129, "step": 474850 }, { "epoch": 0.946026711717455, "grad_norm": 0.21677416563034058, "learning_rate": 0.00026956652829413863, "loss": 2.405, "step": 474860 }, { "epoch": 0.946046633941094, "grad_norm": 0.2353758066892624, "learning_rate": 0.000269450954176681, "loss": 2.4076, "step": 474870 }, { "epoch": 0.9460665561647329, "grad_norm": 0.21192088723182678, "learning_rate": 0.00026933538777728505, "loss": 2.4045, "step": 474880 }, { "epoch": 0.9460864783883718, "grad_norm": 0.21800658106803894, "learning_rate": 0.00026921982909440523, "loss": 2.4073, "step": 474890 }, { "epoch": 0.9461064006120107, "grad_norm": 0.22958917915821075, "learning_rate": 0.0002691042781264956, "loss": 2.4248, "step": 474900 }, { "epoch": 0.9461263228356496, "grad_norm": 0.23119108378887177, "learning_rate": 0.00026898873487201123, "loss": 2.4046, "step": 474910 }, { "epoch": 0.9461462450592886, "grad_norm": 0.22541846334934235, "learning_rate": 0.000268873199329408, "loss": 2.4135, "step": 474920 }, { "epoch": 0.9461661672829275, "grad_norm": 0.21686604619026184, "learning_rate": 0.00026875767149714137, "loss": 2.4129, "step": 474930 }, { "epoch": 0.9461860895065664, "grad_norm": 0.21613629162311554, "learning_rate": 0.0002686421513736683, "loss": 2.4012, "step": 474940 }, { "epoch": 0.9462060117302052, "grad_norm": 0.2181769609451294, "learning_rate": 0.00026852663895744544, "loss": 2.4188, "step": 474950 }, { "epoch": 0.9462259339538442, "grad_norm": 0.21173425018787384, "learning_rate": 0.0002684111342469309, "loss": 2.423, "step": 474960 }, { "epoch": 0.9462458561774831, "grad_norm": 0.22749894857406616, "learning_rate": 0.0002682956372405825, "loss": 2.4101, "step": 474970 }, { "epoch": 0.946265778401122, "grad_norm": 0.22764858603477478, "learning_rate": 0.00026818014793685886, "loss": 2.4233, "step": 474980 }, { "epoch": 0.9462857006247609, "grad_norm": 0.2278425693511963, "learning_rate": 0.000268064666334219, "loss": 2.4118, "step": 474990 }, { "epoch": 0.9463056228483998, "grad_norm": 0.22725774347782135, "learning_rate": 0.0002679491924311228, "loss": 2.4266, "step": 475000 }, { "epoch": 0.9463255450720388, "grad_norm": 0.24022738635540009, "learning_rate": 0.00026783372622603, "loss": 2.4356, "step": 475010 }, { "epoch": 0.9463454672956777, "grad_norm": 0.22908546030521393, "learning_rate": 0.0002677182677174017, "loss": 2.4143, "step": 475020 }, { "epoch": 0.9463653895193166, "grad_norm": 0.2337341159582138, "learning_rate": 0.0002676028169036986, "loss": 2.4197, "step": 475030 }, { "epoch": 0.9463853117429555, "grad_norm": 0.23194311559200287, "learning_rate": 0.00026748737378338294, "loss": 2.4174, "step": 475040 }, { "epoch": 0.9464052339665944, "grad_norm": 0.20176275074481964, "learning_rate": 0.00026737193835491647, "loss": 2.4143, "step": 475050 }, { "epoch": 0.9464251561902334, "grad_norm": 0.21534232795238495, "learning_rate": 0.0002672565106167617, "loss": 2.4124, "step": 475060 }, { "epoch": 0.9464450784138723, "grad_norm": 0.2178005874156952, "learning_rate": 0.00026714109056738257, "loss": 2.3965, "step": 475070 }, { "epoch": 0.9464650006375112, "grad_norm": 0.23757874965667725, "learning_rate": 0.0002670256782052425, "loss": 2.3982, "step": 475080 }, { "epoch": 0.94648492286115, "grad_norm": 0.23643474280834198, "learning_rate": 0.0002669102735288054, "loss": 2.42, "step": 475090 }, { "epoch": 0.9465048450847889, "grad_norm": 0.2103886753320694, "learning_rate": 0.00026679487653653644, "loss": 2.4055, "step": 475100 }, { "epoch": 0.9465247673084279, "grad_norm": 0.23720908164978027, "learning_rate": 0.0002666794872269007, "loss": 2.416, "step": 475110 }, { "epoch": 0.9465446895320668, "grad_norm": 0.21423478424549103, "learning_rate": 0.0002665641055983639, "loss": 2.4058, "step": 475120 }, { "epoch": 0.9465646117557057, "grad_norm": 0.23859497904777527, "learning_rate": 0.0002664487316493924, "loss": 2.4015, "step": 475130 }, { "epoch": 0.9465845339793446, "grad_norm": 0.21667976677417755, "learning_rate": 0.0002663333653784532, "loss": 2.4144, "step": 475140 }, { "epoch": 0.9466044562029835, "grad_norm": 0.22518591582775116, "learning_rate": 0.0002662180067840132, "loss": 2.4139, "step": 475150 }, { "epoch": 0.9466243784266225, "grad_norm": 0.23176264762878418, "learning_rate": 0.0002661026558645405, "loss": 2.4318, "step": 475160 }, { "epoch": 0.9466443006502614, "grad_norm": 0.23907120525836945, "learning_rate": 0.00026598731261850327, "loss": 2.3994, "step": 475170 }, { "epoch": 0.9466642228739003, "grad_norm": 0.22508010268211365, "learning_rate": 0.0002658719770443705, "loss": 2.4198, "step": 475180 }, { "epoch": 0.9466841450975392, "grad_norm": 0.21502161026000977, "learning_rate": 0.0002657566491406116, "loss": 2.4166, "step": 475190 }, { "epoch": 0.9467040673211781, "grad_norm": 0.2444729208946228, "learning_rate": 0.00026564132890569603, "loss": 2.4151, "step": 475200 }, { "epoch": 0.9467239895448171, "grad_norm": 0.22975929081439972, "learning_rate": 0.0002655260163380946, "loss": 2.4114, "step": 475210 }, { "epoch": 0.946743911768456, "grad_norm": 0.2269403487443924, "learning_rate": 0.00026541071143627783, "loss": 2.4133, "step": 475220 }, { "epoch": 0.9467638339920948, "grad_norm": 0.22865155339241028, "learning_rate": 0.0002652954141987172, "loss": 2.4121, "step": 475230 }, { "epoch": 0.9467837562157337, "grad_norm": 0.20443721115589142, "learning_rate": 0.00026518012462388473, "loss": 2.4358, "step": 475240 }, { "epoch": 0.9468036784393727, "grad_norm": 0.19996236264705658, "learning_rate": 0.0002650648427102529, "loss": 2.4044, "step": 475250 }, { "epoch": 0.9468236006630116, "grad_norm": 0.2177584171295166, "learning_rate": 0.00026494956845629414, "loss": 2.4083, "step": 475260 }, { "epoch": 0.9468435228866505, "grad_norm": 0.23126932978630066, "learning_rate": 0.00026483430186048217, "loss": 2.4112, "step": 475270 }, { "epoch": 0.9468634451102894, "grad_norm": 0.21717476844787598, "learning_rate": 0.0002647190429212911, "loss": 2.4112, "step": 475280 }, { "epoch": 0.9468833673339283, "grad_norm": 0.2163633555173874, "learning_rate": 0.00026460379163719506, "loss": 2.4321, "step": 475290 }, { "epoch": 0.9469032895575673, "grad_norm": 0.22262397408485413, "learning_rate": 0.000264488548006669, "loss": 2.4121, "step": 475300 }, { "epoch": 0.9469232117812062, "grad_norm": 0.2089308500289917, "learning_rate": 0.0002643733120281886, "loss": 2.4226, "step": 475310 }, { "epoch": 0.9469431340048451, "grad_norm": 0.19378162920475006, "learning_rate": 0.00026425808370022933, "loss": 2.4094, "step": 475320 }, { "epoch": 0.946963056228484, "grad_norm": 0.2136608213186264, "learning_rate": 0.0002641428630212681, "loss": 2.4092, "step": 475330 }, { "epoch": 0.9469829784521229, "grad_norm": 0.21343521773815155, "learning_rate": 0.0002640276499897818, "loss": 2.4134, "step": 475340 }, { "epoch": 0.9470029006757619, "grad_norm": 0.23721091449260712, "learning_rate": 0.00026391244460424756, "loss": 2.4063, "step": 475350 }, { "epoch": 0.9470228228994008, "grad_norm": 0.22559857368469238, "learning_rate": 0.00026379724686314356, "loss": 2.4063, "step": 475360 }, { "epoch": 0.9470427451230397, "grad_norm": 0.23618744313716888, "learning_rate": 0.00026368205676494827, "loss": 2.4035, "step": 475370 }, { "epoch": 0.9470626673466785, "grad_norm": 0.20169579982757568, "learning_rate": 0.00026356687430814077, "loss": 2.4154, "step": 475380 }, { "epoch": 0.9470825895703174, "grad_norm": 0.23353852331638336, "learning_rate": 0.0002634516994912004, "loss": 2.4125, "step": 475390 }, { "epoch": 0.9471025117939564, "grad_norm": 0.22591076791286469, "learning_rate": 0.0002633365323126071, "loss": 2.4114, "step": 475400 }, { "epoch": 0.9471224340175953, "grad_norm": 0.21505767107009888, "learning_rate": 0.0002632213727708417, "loss": 2.3906, "step": 475410 }, { "epoch": 0.9471423562412342, "grad_norm": 0.2467934489250183, "learning_rate": 0.0002631062208643846, "loss": 2.3949, "step": 475420 }, { "epoch": 0.9471622784648731, "grad_norm": 0.2400972694158554, "learning_rate": 0.00026299107659171763, "loss": 2.4155, "step": 475430 }, { "epoch": 0.947182200688512, "grad_norm": 0.2141699194908142, "learning_rate": 0.00026287593995132297, "loss": 2.4127, "step": 475440 }, { "epoch": 0.947202122912151, "grad_norm": 0.2192869484424591, "learning_rate": 0.0002627608109416828, "loss": 2.4057, "step": 475450 }, { "epoch": 0.9472220451357899, "grad_norm": 0.20940876007080078, "learning_rate": 0.0002626456895612801, "loss": 2.4216, "step": 475460 }, { "epoch": 0.9472419673594288, "grad_norm": 0.23301507532596588, "learning_rate": 0.0002625305758085985, "loss": 2.4131, "step": 475470 }, { "epoch": 0.9472618895830677, "grad_norm": 0.21788208186626434, "learning_rate": 0.00026241546968212213, "loss": 2.4096, "step": 475480 }, { "epoch": 0.9472818118067066, "grad_norm": 0.22757762670516968, "learning_rate": 0.00026230037118033536, "loss": 2.4378, "step": 475490 }, { "epoch": 0.9473017340303456, "grad_norm": 0.20026572048664093, "learning_rate": 0.0002621852803017233, "loss": 2.3988, "step": 475500 }, { "epoch": 0.9473216562539845, "grad_norm": 0.22403928637504578, "learning_rate": 0.00026207019704477144, "loss": 2.4053, "step": 475510 }, { "epoch": 0.9473415784776233, "grad_norm": 0.21182045340538025, "learning_rate": 0.0002619551214079654, "loss": 2.4148, "step": 475520 }, { "epoch": 0.9473615007012622, "grad_norm": 0.24447940289974213, "learning_rate": 0.0002618400533897924, "loss": 2.4248, "step": 475530 }, { "epoch": 0.9473814229249012, "grad_norm": 0.26636403799057007, "learning_rate": 0.0002617249929887389, "loss": 2.4033, "step": 475540 }, { "epoch": 0.9474013451485401, "grad_norm": 0.20368438959121704, "learning_rate": 0.00026160994020329255, "loss": 2.4205, "step": 475550 }, { "epoch": 0.947421267372179, "grad_norm": 0.22744537889957428, "learning_rate": 0.00026149489503194155, "loss": 2.414, "step": 475560 }, { "epoch": 0.9474411895958179, "grad_norm": 0.22180259227752686, "learning_rate": 0.0002613798574731743, "loss": 2.4191, "step": 475570 }, { "epoch": 0.9474611118194568, "grad_norm": 0.201262965798378, "learning_rate": 0.00026126482752547966, "loss": 2.4011, "step": 475580 }, { "epoch": 0.9474810340430958, "grad_norm": 0.2082451581954956, "learning_rate": 0.0002611498051873473, "loss": 2.4255, "step": 475590 }, { "epoch": 0.9475009562667347, "grad_norm": 0.24948790669441223, "learning_rate": 0.0002610347904572674, "loss": 2.4189, "step": 475600 }, { "epoch": 0.9475208784903736, "grad_norm": 0.23296546936035156, "learning_rate": 0.0002609197833337302, "loss": 2.395, "step": 475610 }, { "epoch": 0.9475408007140125, "grad_norm": 0.21866032481193542, "learning_rate": 0.00026080478381522676, "loss": 2.3915, "step": 475620 }, { "epoch": 0.9475607229376514, "grad_norm": 0.22236596047878265, "learning_rate": 0.00026068979190024886, "loss": 2.419, "step": 475630 }, { "epoch": 0.9475806451612904, "grad_norm": 0.21956516802310944, "learning_rate": 0.000260574807587288, "loss": 2.3986, "step": 475640 }, { "epoch": 0.9476005673849293, "grad_norm": 0.22308064997196198, "learning_rate": 0.00026045983087483713, "loss": 2.4368, "step": 475650 }, { "epoch": 0.9476204896085682, "grad_norm": 0.23109187185764313, "learning_rate": 0.00026034486176138927, "loss": 2.4084, "step": 475660 }, { "epoch": 0.947640411832207, "grad_norm": 0.22187195718288422, "learning_rate": 0.00026022990024543755, "loss": 2.4159, "step": 475670 }, { "epoch": 0.9476603340558459, "grad_norm": 0.24708370864391327, "learning_rate": 0.0002601149463254764, "loss": 2.405, "step": 475680 }, { "epoch": 0.9476802562794849, "grad_norm": 0.3501989245414734, "learning_rate": 0.00026000000000000003, "loss": 2.4217, "step": 475690 }, { "epoch": 0.9477001785031238, "grad_norm": 0.21430975198745728, "learning_rate": 0.00025988506126750345, "loss": 2.407, "step": 475700 }, { "epoch": 0.9477201007267627, "grad_norm": 0.2400965690612793, "learning_rate": 0.00025977013012648233, "loss": 2.4206, "step": 475710 }, { "epoch": 0.9477400229504016, "grad_norm": 0.20623093843460083, "learning_rate": 0.0002596552065754327, "loss": 2.4033, "step": 475720 }, { "epoch": 0.9477599451740405, "grad_norm": 0.23372112214565277, "learning_rate": 0.0002595402906128508, "loss": 2.4159, "step": 475730 }, { "epoch": 0.9477798673976795, "grad_norm": 0.22998355329036713, "learning_rate": 0.00025942538223723365, "loss": 2.4162, "step": 475740 }, { "epoch": 0.9477997896213184, "grad_norm": 0.2249590903520584, "learning_rate": 0.00025931048144707904, "loss": 2.4148, "step": 475750 }, { "epoch": 0.9478197118449573, "grad_norm": 0.20023715496063232, "learning_rate": 0.00025919558824088454, "loss": 2.4147, "step": 475760 }, { "epoch": 0.9478396340685962, "grad_norm": 0.21840810775756836, "learning_rate": 0.0002590807026171489, "loss": 2.4003, "step": 475770 }, { "epoch": 0.9478595562922351, "grad_norm": 0.2120252251625061, "learning_rate": 0.0002589658245743709, "loss": 2.4137, "step": 475780 }, { "epoch": 0.9478794785158741, "grad_norm": 0.2240770310163498, "learning_rate": 0.0002588509541110502, "loss": 2.4096, "step": 475790 }, { "epoch": 0.947899400739513, "grad_norm": 0.2173888087272644, "learning_rate": 0.00025873609122568664, "loss": 2.4165, "step": 475800 }, { "epoch": 0.9479193229631518, "grad_norm": 0.2394305318593979, "learning_rate": 0.0002586212359167808, "loss": 2.4354, "step": 475810 }, { "epoch": 0.9479392451867907, "grad_norm": 0.22452470660209656, "learning_rate": 0.00025850638818283357, "loss": 2.4214, "step": 475820 }, { "epoch": 0.9479591674104297, "grad_norm": 0.22208988666534424, "learning_rate": 0.0002583915480223462, "loss": 2.4208, "step": 475830 }, { "epoch": 0.9479790896340686, "grad_norm": 0.21225027740001678, "learning_rate": 0.0002582767154338208, "loss": 2.4138, "step": 475840 }, { "epoch": 0.9479990118577075, "grad_norm": 0.21322162449359894, "learning_rate": 0.00025816189041575965, "loss": 2.4161, "step": 475850 }, { "epoch": 0.9480189340813464, "grad_norm": 0.23158228397369385, "learning_rate": 0.000258047072966666, "loss": 2.3911, "step": 475860 }, { "epoch": 0.9480388563049853, "grad_norm": 0.22363293170928955, "learning_rate": 0.0002579322630850429, "loss": 2.413, "step": 475870 }, { "epoch": 0.9480587785286243, "grad_norm": 0.22722956538200378, "learning_rate": 0.0002578174607693946, "loss": 2.4106, "step": 475880 }, { "epoch": 0.9480787007522632, "grad_norm": 0.2194758653640747, "learning_rate": 0.0002577026660182251, "loss": 2.4016, "step": 475890 }, { "epoch": 0.9480986229759021, "grad_norm": 0.223908469080925, "learning_rate": 0.00025758787883003987, "loss": 2.4264, "step": 475900 }, { "epoch": 0.948118545199541, "grad_norm": 0.21001049876213074, "learning_rate": 0.0002574730992033438, "loss": 2.3877, "step": 475910 }, { "epoch": 0.9481384674231799, "grad_norm": 0.19764716923236847, "learning_rate": 0.0002573583271366429, "loss": 2.401, "step": 475920 }, { "epoch": 0.9481583896468189, "grad_norm": 0.23090361058712006, "learning_rate": 0.0002572435626284435, "loss": 2.4037, "step": 475930 }, { "epoch": 0.9481783118704578, "grad_norm": 0.22081215679645538, "learning_rate": 0.00025712880567725275, "loss": 2.411, "step": 475940 }, { "epoch": 0.9481982340940966, "grad_norm": 0.2491789013147354, "learning_rate": 0.0002570140562815775, "loss": 2.4264, "step": 475950 }, { "epoch": 0.9482181563177355, "grad_norm": 0.21374396979808807, "learning_rate": 0.00025689931443992586, "loss": 2.4093, "step": 475960 }, { "epoch": 0.9482380785413744, "grad_norm": 0.2386835664510727, "learning_rate": 0.00025678458015080644, "loss": 2.3972, "step": 475970 }, { "epoch": 0.9482580007650134, "grad_norm": 0.22039785981178284, "learning_rate": 0.00025666985341272786, "loss": 2.3963, "step": 475980 }, { "epoch": 0.9482779229886523, "grad_norm": 0.22650353610515594, "learning_rate": 0.0002565551342241992, "loss": 2.4155, "step": 475990 }, { "epoch": 0.9482978452122912, "grad_norm": 0.23969459533691406, "learning_rate": 0.0002564404225837307, "loss": 2.4141, "step": 476000 }, { "epoch": 0.9483177674359301, "grad_norm": 0.2015814185142517, "learning_rate": 0.0002563257184898322, "loss": 2.4004, "step": 476010 }, { "epoch": 0.948337689659569, "grad_norm": 0.22263924777507782, "learning_rate": 0.00025621102194101476, "loss": 2.4056, "step": 476020 }, { "epoch": 0.948357611883208, "grad_norm": 0.22441843152046204, "learning_rate": 0.0002560963329357897, "loss": 2.4089, "step": 476030 }, { "epoch": 0.9483775341068469, "grad_norm": 0.22448283433914185, "learning_rate": 0.00025598165147266874, "loss": 2.4109, "step": 476040 }, { "epoch": 0.9483974563304858, "grad_norm": 0.2351921647787094, "learning_rate": 0.0002558669775501641, "loss": 2.4289, "step": 476050 }, { "epoch": 0.9484173785541247, "grad_norm": 0.23817501962184906, "learning_rate": 0.00025575231116678834, "loss": 2.4209, "step": 476060 }, { "epoch": 0.9484373007777636, "grad_norm": 0.23902426660060883, "learning_rate": 0.00025563765232105505, "loss": 2.3985, "step": 476070 }, { "epoch": 0.9484572230014026, "grad_norm": 0.28669625520706177, "learning_rate": 0.00025552300101147797, "loss": 2.4104, "step": 476080 }, { "epoch": 0.9484771452250415, "grad_norm": 0.20392562448978424, "learning_rate": 0.00025540835723657083, "loss": 2.4062, "step": 476090 }, { "epoch": 0.9484970674486803, "grad_norm": 0.2175832837820053, "learning_rate": 0.0002552937209948489, "loss": 2.4204, "step": 476100 }, { "epoch": 0.9485169896723192, "grad_norm": 0.21570314466953278, "learning_rate": 0.00025517909228482715, "loss": 2.404, "step": 476110 }, { "epoch": 0.9485369118959582, "grad_norm": 0.2187589704990387, "learning_rate": 0.00025506447110502093, "loss": 2.4003, "step": 476120 }, { "epoch": 0.9485568341195971, "grad_norm": 0.22025759518146515, "learning_rate": 0.00025494985745394707, "loss": 2.4232, "step": 476130 }, { "epoch": 0.948576756343236, "grad_norm": 0.21874023973941803, "learning_rate": 0.0002548352513301215, "loss": 2.4069, "step": 476140 }, { "epoch": 0.9485966785668749, "grad_norm": 0.23904761672019958, "learning_rate": 0.00025472065273206203, "loss": 2.3994, "step": 476150 }, { "epoch": 0.9486166007905138, "grad_norm": 0.275398313999176, "learning_rate": 0.0002546060616582857, "loss": 2.4163, "step": 476160 }, { "epoch": 0.9486365230141528, "grad_norm": 0.2155599147081375, "learning_rate": 0.0002544914781073109, "loss": 2.3783, "step": 476170 }, { "epoch": 0.9486564452377917, "grad_norm": 0.24601620435714722, "learning_rate": 0.0002543769020776563, "loss": 2.4199, "step": 476180 }, { "epoch": 0.9486763674614306, "grad_norm": 0.24448628723621368, "learning_rate": 0.0002542623335678409, "loss": 2.4099, "step": 476190 }, { "epoch": 0.9486962896850695, "grad_norm": 0.2192257046699524, "learning_rate": 0.00025414777257638435, "loss": 2.4077, "step": 476200 }, { "epoch": 0.9487162119087084, "grad_norm": 0.22613051533699036, "learning_rate": 0.00025403321910180666, "loss": 2.3968, "step": 476210 }, { "epoch": 0.9487361341323474, "grad_norm": 0.22449268400669098, "learning_rate": 0.0002539186731426282, "loss": 2.4057, "step": 476220 }, { "epoch": 0.9487560563559863, "grad_norm": 0.24209202826023102, "learning_rate": 0.0002538041346973703, "loss": 2.4232, "step": 476230 }, { "epoch": 0.9487759785796251, "grad_norm": 0.21842430531978607, "learning_rate": 0.00025368960376455417, "loss": 2.4181, "step": 476240 }, { "epoch": 0.948795900803264, "grad_norm": 0.21298226714134216, "learning_rate": 0.0002535750803427019, "loss": 2.4115, "step": 476250 }, { "epoch": 0.9488158230269029, "grad_norm": 0.22744476795196533, "learning_rate": 0.0002534605644303363, "loss": 2.4141, "step": 476260 }, { "epoch": 0.9488357452505419, "grad_norm": 0.22596032917499542, "learning_rate": 0.0002533460560259797, "loss": 2.4201, "step": 476270 }, { "epoch": 0.9488556674741808, "grad_norm": 0.2173938751220703, "learning_rate": 0.0002532315551281561, "loss": 2.4071, "step": 476280 }, { "epoch": 0.9488755896978197, "grad_norm": 0.2275506854057312, "learning_rate": 0.0002531170617353893, "loss": 2.4, "step": 476290 }, { "epoch": 0.9488955119214586, "grad_norm": 0.2235209047794342, "learning_rate": 0.00025300257584620357, "loss": 2.4039, "step": 476300 }, { "epoch": 0.9489154341450975, "grad_norm": 0.21952350437641144, "learning_rate": 0.0002528880974591239, "loss": 2.4035, "step": 476310 }, { "epoch": 0.9489353563687365, "grad_norm": 0.20642922818660736, "learning_rate": 0.00025277362657267566, "loss": 2.4113, "step": 476320 }, { "epoch": 0.9489552785923754, "grad_norm": 0.25685811042785645, "learning_rate": 0.0002526591631853847, "loss": 2.4062, "step": 476330 }, { "epoch": 0.9489752008160143, "grad_norm": 0.24127264320850372, "learning_rate": 0.0002525447072957776, "loss": 2.3975, "step": 476340 }, { "epoch": 0.9489951230396532, "grad_norm": 0.22995051741600037, "learning_rate": 0.0002524302589023808, "loss": 2.4045, "step": 476350 }, { "epoch": 0.9490150452632921, "grad_norm": 0.22064267098903656, "learning_rate": 0.00025231581800372197, "loss": 2.4068, "step": 476360 }, { "epoch": 0.9490349674869311, "grad_norm": 0.22006520628929138, "learning_rate": 0.0002522013845983284, "loss": 2.4071, "step": 476370 }, { "epoch": 0.94905488971057, "grad_norm": 0.23666083812713623, "learning_rate": 0.000252086958684729, "loss": 2.4154, "step": 476380 }, { "epoch": 0.9490748119342088, "grad_norm": 0.25291767716407776, "learning_rate": 0.0002519725402614523, "loss": 2.4141, "step": 476390 }, { "epoch": 0.9490947341578477, "grad_norm": 0.21891382336616516, "learning_rate": 0.0002518581293270274, "loss": 2.4018, "step": 476400 }, { "epoch": 0.9491146563814866, "grad_norm": 0.2300935983657837, "learning_rate": 0.0002517437258799842, "loss": 2.4077, "step": 476410 }, { "epoch": 0.9491345786051256, "grad_norm": 0.21918827295303345, "learning_rate": 0.0002516293299188528, "loss": 2.417, "step": 476420 }, { "epoch": 0.9491545008287645, "grad_norm": 0.227239191532135, "learning_rate": 0.0002515149414421638, "loss": 2.4139, "step": 476430 }, { "epoch": 0.9491744230524034, "grad_norm": 0.21294665336608887, "learning_rate": 0.0002514005604484486, "loss": 2.4056, "step": 476440 }, { "epoch": 0.9491943452760423, "grad_norm": 0.24240049719810486, "learning_rate": 0.0002512861869362386, "loss": 2.4047, "step": 476450 }, { "epoch": 0.9492142674996813, "grad_norm": 0.27793505787849426, "learning_rate": 0.0002511718209040661, "loss": 2.4229, "step": 476460 }, { "epoch": 0.9492341897233202, "grad_norm": 0.22583280503749847, "learning_rate": 0.0002510574623504636, "loss": 2.4193, "step": 476470 }, { "epoch": 0.9492541119469591, "grad_norm": 0.2250775843858719, "learning_rate": 0.00025094311127396416, "loss": 2.4086, "step": 476480 }, { "epoch": 0.949274034170598, "grad_norm": 0.23116014897823334, "learning_rate": 0.0002508287676731016, "loss": 2.415, "step": 476490 }, { "epoch": 0.9492939563942369, "grad_norm": 0.23716440796852112, "learning_rate": 0.00025071443154640985, "loss": 2.4277, "step": 476500 }, { "epoch": 0.9493138786178759, "grad_norm": 0.23430611193180084, "learning_rate": 0.00025060010289242317, "loss": 2.4206, "step": 476510 }, { "epoch": 0.9493338008415148, "grad_norm": 0.23412460088729858, "learning_rate": 0.0002504857817096771, "loss": 2.4112, "step": 476520 }, { "epoch": 0.9493537230651536, "grad_norm": 0.22609025239944458, "learning_rate": 0.00025037146799670643, "loss": 2.4332, "step": 476530 }, { "epoch": 0.9493736452887925, "grad_norm": 0.26983484625816345, "learning_rate": 0.0002502571617520477, "loss": 2.4191, "step": 476540 }, { "epoch": 0.9493935675124314, "grad_norm": 0.2277810424566269, "learning_rate": 0.00025014286297423706, "loss": 2.4124, "step": 476550 }, { "epoch": 0.9494134897360704, "grad_norm": 0.21130922436714172, "learning_rate": 0.0002500285716618114, "loss": 2.4199, "step": 476560 }, { "epoch": 0.9494334119597093, "grad_norm": 0.25598353147506714, "learning_rate": 0.0002499142878133083, "loss": 2.4126, "step": 476570 }, { "epoch": 0.9494533341833482, "grad_norm": 0.22129449248313904, "learning_rate": 0.0002498000114272654, "loss": 2.4176, "step": 476580 }, { "epoch": 0.9494732564069871, "grad_norm": 0.21547579765319824, "learning_rate": 0.0002496857425022214, "loss": 2.4257, "step": 476590 }, { "epoch": 0.949493178630626, "grad_norm": 0.23171262443065643, "learning_rate": 0.0002495714810367149, "loss": 2.4045, "step": 476600 }, { "epoch": 0.949513100854265, "grad_norm": 0.24533937871456146, "learning_rate": 0.000249457227029285, "loss": 2.4074, "step": 476610 }, { "epoch": 0.9495330230779039, "grad_norm": 0.21923182904720306, "learning_rate": 0.0002493429804784719, "loss": 2.3884, "step": 476620 }, { "epoch": 0.9495529453015428, "grad_norm": 0.23995864391326904, "learning_rate": 0.0002492287413828156, "loss": 2.4185, "step": 476630 }, { "epoch": 0.9495728675251817, "grad_norm": 1.0974655151367188, "learning_rate": 0.00024911450974085693, "loss": 2.4058, "step": 476640 }, { "epoch": 0.9495927897488206, "grad_norm": 0.210622176527977, "learning_rate": 0.0002490002855511371, "loss": 2.4101, "step": 476650 }, { "epoch": 0.9496127119724596, "grad_norm": 0.22869472205638885, "learning_rate": 0.00024888606881219745, "loss": 2.413, "step": 476660 }, { "epoch": 0.9496326341960984, "grad_norm": 0.21649612486362457, "learning_rate": 0.0002487718595225805, "loss": 2.4011, "step": 476670 }, { "epoch": 0.9496525564197373, "grad_norm": 0.2217327505350113, "learning_rate": 0.000248657657680829, "loss": 2.4334, "step": 476680 }, { "epoch": 0.9496724786433762, "grad_norm": 0.2102997750043869, "learning_rate": 0.0002485434632854859, "loss": 2.4085, "step": 476690 }, { "epoch": 0.9496924008670151, "grad_norm": 0.22484806180000305, "learning_rate": 0.0002484292763350946, "loss": 2.4175, "step": 476700 }, { "epoch": 0.9497123230906541, "grad_norm": 0.23720794916152954, "learning_rate": 0.0002483150968281995, "loss": 2.4165, "step": 476710 }, { "epoch": 0.949732245314293, "grad_norm": 0.21475298702716827, "learning_rate": 0.000248200924763345, "loss": 2.404, "step": 476720 }, { "epoch": 0.9497521675379319, "grad_norm": 0.21124036610126495, "learning_rate": 0.00024808676013907593, "loss": 2.401, "step": 476730 }, { "epoch": 0.9497720897615708, "grad_norm": 0.23137301206588745, "learning_rate": 0.00024797260295393776, "loss": 2.3944, "step": 476740 }, { "epoch": 0.9497920119852098, "grad_norm": 0.23294545710086823, "learning_rate": 0.00024785845320647696, "loss": 2.4223, "step": 476750 }, { "epoch": 0.9498119342088487, "grad_norm": 0.33779603242874146, "learning_rate": 0.0002477443108952393, "loss": 2.416, "step": 476760 }, { "epoch": 0.9498318564324876, "grad_norm": 0.22561179101467133, "learning_rate": 0.000247630176018772, "loss": 2.4108, "step": 476770 }, { "epoch": 0.9498517786561265, "grad_norm": 0.20865048468112946, "learning_rate": 0.0002475160485756225, "loss": 2.4102, "step": 476780 }, { "epoch": 0.9498717008797654, "grad_norm": 0.2234179526567459, "learning_rate": 0.0002474019285643385, "loss": 2.4021, "step": 476790 }, { "epoch": 0.9498916231034044, "grad_norm": 0.23078566789627075, "learning_rate": 0.0002472878159834684, "loss": 2.4063, "step": 476800 }, { "epoch": 0.9499115453270432, "grad_norm": 0.22764787077903748, "learning_rate": 0.00024717371083156104, "loss": 2.4116, "step": 476810 }, { "epoch": 0.9499314675506821, "grad_norm": 0.22804509103298187, "learning_rate": 0.0002470596131071656, "loss": 2.4216, "step": 476820 }, { "epoch": 0.949951389774321, "grad_norm": 0.2420961856842041, "learning_rate": 0.00024694552280883175, "loss": 2.4021, "step": 476830 }, { "epoch": 0.9499713119979599, "grad_norm": 0.22645056247711182, "learning_rate": 0.0002468314399351099, "loss": 2.402, "step": 476840 }, { "epoch": 0.9499912342215989, "grad_norm": 0.22031879425048828, "learning_rate": 0.00024671736448455064, "loss": 2.4067, "step": 476850 }, { "epoch": 0.9500111564452378, "grad_norm": 0.2375621199607849, "learning_rate": 0.0002466032964557052, "loss": 2.4107, "step": 476860 }, { "epoch": 0.9500310786688767, "grad_norm": 0.22266896069049835, "learning_rate": 0.0002464892358471249, "loss": 2.4131, "step": 476870 }, { "epoch": 0.9500510008925156, "grad_norm": 0.2405560314655304, "learning_rate": 0.0002463751826573621, "loss": 2.4112, "step": 476880 }, { "epoch": 0.9500709231161545, "grad_norm": 0.2275281548500061, "learning_rate": 0.0002462611368849694, "loss": 2.406, "step": 476890 }, { "epoch": 0.9500908453397935, "grad_norm": 0.21922214329242706, "learning_rate": 0.0002461470985284997, "loss": 2.4151, "step": 476900 }, { "epoch": 0.9501107675634324, "grad_norm": 0.2185075581073761, "learning_rate": 0.0002460330675865066, "loss": 2.415, "step": 476910 }, { "epoch": 0.9501306897870713, "grad_norm": 0.22419267892837524, "learning_rate": 0.00024591904405754406, "loss": 2.4089, "step": 476920 }, { "epoch": 0.9501506120107102, "grad_norm": 0.22371481359004974, "learning_rate": 0.00024580502794016644, "loss": 2.4122, "step": 476930 }, { "epoch": 0.950170534234349, "grad_norm": 0.23281332850456238, "learning_rate": 0.0002456910192329289, "loss": 2.4231, "step": 476940 }, { "epoch": 0.950190456457988, "grad_norm": 0.21912603080272675, "learning_rate": 0.0002455770179343866, "loss": 2.4031, "step": 476950 }, { "epoch": 0.9502103786816269, "grad_norm": 0.22305604815483093, "learning_rate": 0.0002454630240430953, "loss": 2.4077, "step": 476960 }, { "epoch": 0.9502303009052658, "grad_norm": 0.2451898455619812, "learning_rate": 0.0002453490375576115, "loss": 2.4067, "step": 476970 }, { "epoch": 0.9502502231289047, "grad_norm": 0.23219481110572815, "learning_rate": 0.0002452350584764922, "loss": 2.4032, "step": 476980 }, { "epoch": 0.9502701453525436, "grad_norm": 0.23471617698669434, "learning_rate": 0.00024512108679829425, "loss": 2.4073, "step": 476990 }, { "epoch": 0.9502900675761826, "grad_norm": 0.22979126870632172, "learning_rate": 0.0002450071225215755, "loss": 2.3994, "step": 477000 }, { "epoch": 0.9503099897998215, "grad_norm": 0.24171152710914612, "learning_rate": 0.0002448931656448945, "loss": 2.415, "step": 477010 }, { "epoch": 0.9503299120234604, "grad_norm": 0.22117941081523895, "learning_rate": 0.0002447792161668094, "loss": 2.4022, "step": 477020 }, { "epoch": 0.9503498342470993, "grad_norm": 0.44311487674713135, "learning_rate": 0.0002446652740858797, "loss": 2.4024, "step": 477030 }, { "epoch": 0.9503697564707383, "grad_norm": 0.2377415895462036, "learning_rate": 0.0002445513394006649, "loss": 2.4079, "step": 477040 }, { "epoch": 0.9503896786943772, "grad_norm": 0.21226581931114197, "learning_rate": 0.00024443741210972484, "loss": 2.4155, "step": 477050 }, { "epoch": 0.9504096009180161, "grad_norm": 0.22443945705890656, "learning_rate": 0.00024432349221162043, "loss": 2.4051, "step": 477060 }, { "epoch": 0.950429523141655, "grad_norm": 0.24397680163383484, "learning_rate": 0.00024420957970491244, "loss": 2.4062, "step": 477070 }, { "epoch": 0.9504494453652939, "grad_norm": 0.2338455617427826, "learning_rate": 0.0002440956745881624, "loss": 2.4139, "step": 477080 }, { "epoch": 0.9504693675889329, "grad_norm": 0.22011640667915344, "learning_rate": 0.0002439817768599324, "loss": 2.393, "step": 477090 }, { "epoch": 0.9504892898125717, "grad_norm": 0.22221001982688904, "learning_rate": 0.00024386788651878467, "loss": 2.409, "step": 477100 }, { "epoch": 0.9505092120362106, "grad_norm": 0.23385043442249298, "learning_rate": 0.00024375400356328215, "loss": 2.3988, "step": 477110 }, { "epoch": 0.9505291342598495, "grad_norm": 0.22691267728805542, "learning_rate": 0.0002436401279919882, "loss": 2.4114, "step": 477120 }, { "epoch": 0.9505490564834884, "grad_norm": 0.23981218039989471, "learning_rate": 0.00024352625980346643, "loss": 2.4063, "step": 477130 }, { "epoch": 0.9505689787071274, "grad_norm": 0.22980238497257233, "learning_rate": 0.0002434123989962813, "loss": 2.4218, "step": 477140 }, { "epoch": 0.9505889009307663, "grad_norm": 0.21125638484954834, "learning_rate": 0.00024329854556899734, "loss": 2.4123, "step": 477150 }, { "epoch": 0.9506088231544052, "grad_norm": 0.2237209528684616, "learning_rate": 0.00024318469952018008, "loss": 2.4091, "step": 477160 }, { "epoch": 0.9506287453780441, "grad_norm": 0.22608226537704468, "learning_rate": 0.00024307086084839492, "loss": 2.4157, "step": 477170 }, { "epoch": 0.950648667601683, "grad_norm": 0.2330518513917923, "learning_rate": 0.00024295702955220812, "loss": 2.4066, "step": 477180 }, { "epoch": 0.950668589825322, "grad_norm": 0.2467477023601532, "learning_rate": 0.00024284320563018613, "loss": 2.3919, "step": 477190 }, { "epoch": 0.9506885120489609, "grad_norm": 0.23618747293949127, "learning_rate": 0.00024272938908089637, "loss": 2.4151, "step": 477200 }, { "epoch": 0.9507084342725998, "grad_norm": 0.21214216947555542, "learning_rate": 0.00024261557990290573, "loss": 2.4074, "step": 477210 }, { "epoch": 0.9507283564962387, "grad_norm": 0.23364748060703278, "learning_rate": 0.00024250177809478292, "loss": 2.4041, "step": 477220 }, { "epoch": 0.9507482787198775, "grad_norm": 0.21393580734729767, "learning_rate": 0.00024238798365509574, "loss": 2.3939, "step": 477230 }, { "epoch": 0.9507682009435166, "grad_norm": 0.21970058977603912, "learning_rate": 0.00024227419658241357, "loss": 2.4398, "step": 477240 }, { "epoch": 0.9507881231671554, "grad_norm": 0.22547955811023712, "learning_rate": 0.00024216041687530554, "loss": 2.3974, "step": 477250 }, { "epoch": 0.9508080453907943, "grad_norm": 0.2368321269750595, "learning_rate": 0.0002420466445323415, "loss": 2.402, "step": 477260 }, { "epoch": 0.9508279676144332, "grad_norm": 0.22277957201004028, "learning_rate": 0.00024193287955209166, "loss": 2.4166, "step": 477270 }, { "epoch": 0.9508478898380721, "grad_norm": 0.22522182762622833, "learning_rate": 0.00024181912193312717, "loss": 2.4163, "step": 477280 }, { "epoch": 0.9508678120617111, "grad_norm": 0.2258274406194687, "learning_rate": 0.00024170537167401897, "loss": 2.4126, "step": 477290 }, { "epoch": 0.95088773428535, "grad_norm": 0.23234347999095917, "learning_rate": 0.0002415916287733386, "loss": 2.4157, "step": 477300 }, { "epoch": 0.9509076565089889, "grad_norm": 0.2205270379781723, "learning_rate": 0.00024147789322965862, "loss": 2.4102, "step": 477310 }, { "epoch": 0.9509275787326278, "grad_norm": 0.2565160095691681, "learning_rate": 0.00024136416504155124, "loss": 2.4014, "step": 477320 }, { "epoch": 0.9509475009562668, "grad_norm": 0.2209997922182083, "learning_rate": 0.0002412504442075898, "loss": 2.3946, "step": 477330 }, { "epoch": 0.9509674231799057, "grad_norm": 0.21520254015922546, "learning_rate": 0.0002411367307263479, "loss": 2.4062, "step": 477340 }, { "epoch": 0.9509873454035446, "grad_norm": 0.25155243277549744, "learning_rate": 0.00024102302459639912, "loss": 2.4128, "step": 477350 }, { "epoch": 0.9510072676271835, "grad_norm": 0.23776079714298248, "learning_rate": 0.0002409093258163182, "loss": 2.3982, "step": 477360 }, { "epoch": 0.9510271898508224, "grad_norm": 0.21473723649978638, "learning_rate": 0.00024079563438468, "loss": 2.4175, "step": 477370 }, { "epoch": 0.9510471120744614, "grad_norm": 0.22370198369026184, "learning_rate": 0.00024068195030006013, "loss": 2.415, "step": 477380 }, { "epoch": 0.9510670342981002, "grad_norm": 0.2163931429386139, "learning_rate": 0.00024056827356103415, "loss": 2.405, "step": 477390 }, { "epoch": 0.9510869565217391, "grad_norm": 0.21044707298278809, "learning_rate": 0.00024045460416617838, "loss": 2.4121, "step": 477400 }, { "epoch": 0.951106878745378, "grad_norm": 0.23551513254642487, "learning_rate": 0.00024034094211406964, "loss": 2.4087, "step": 477410 }, { "epoch": 0.9511268009690169, "grad_norm": 0.23093682527542114, "learning_rate": 0.00024022728740328515, "loss": 2.4019, "step": 477420 }, { "epoch": 0.9511467231926559, "grad_norm": 0.22625072300434113, "learning_rate": 0.00024011364003240267, "loss": 2.4077, "step": 477430 }, { "epoch": 0.9511666454162948, "grad_norm": 0.25746458768844604, "learning_rate": 0.00024, "loss": 2.412, "step": 477440 }, { "epoch": 0.9511865676399337, "grad_norm": 0.22813411056995392, "learning_rate": 0.00023988636730465606, "loss": 2.4141, "step": 477450 }, { "epoch": 0.9512064898635726, "grad_norm": 0.2499294877052307, "learning_rate": 0.00023977274194495002, "loss": 2.4022, "step": 477460 }, { "epoch": 0.9512264120872115, "grad_norm": 0.22904814779758453, "learning_rate": 0.00023965912391946077, "loss": 2.4019, "step": 477470 }, { "epoch": 0.9512463343108505, "grad_norm": 0.20731204748153687, "learning_rate": 0.000239545513226769, "loss": 2.4149, "step": 477480 }, { "epoch": 0.9512662565344894, "grad_norm": 0.23841701447963715, "learning_rate": 0.0002394319098654547, "loss": 2.4112, "step": 477490 }, { "epoch": 0.9512861787581283, "grad_norm": 0.23050369322299957, "learning_rate": 0.0002393183138340991, "loss": 2.4056, "step": 477500 }, { "epoch": 0.9513061009817672, "grad_norm": 0.2419177144765854, "learning_rate": 0.00023920472513128322, "loss": 2.4182, "step": 477510 }, { "epoch": 0.951326023205406, "grad_norm": 0.2218671441078186, "learning_rate": 0.00023909114375558893, "loss": 2.4113, "step": 477520 }, { "epoch": 0.951345945429045, "grad_norm": 0.22064383327960968, "learning_rate": 0.00023897756970559848, "loss": 2.3999, "step": 477530 }, { "epoch": 0.9513658676526839, "grad_norm": 0.23484787344932556, "learning_rate": 0.00023886400297989474, "loss": 2.4039, "step": 477540 }, { "epoch": 0.9513857898763228, "grad_norm": 0.23765718936920166, "learning_rate": 0.00023875044357706088, "loss": 2.3997, "step": 477550 }, { "epoch": 0.9514057120999617, "grad_norm": 0.2205507606267929, "learning_rate": 0.00023863689149568025, "loss": 2.4065, "step": 477560 }, { "epoch": 0.9514256343236006, "grad_norm": 0.2234058976173401, "learning_rate": 0.0002385233467343373, "loss": 2.4113, "step": 477570 }, { "epoch": 0.9514455565472396, "grad_norm": 0.26471805572509766, "learning_rate": 0.00023840980929161625, "loss": 2.3935, "step": 477580 }, { "epoch": 0.9514654787708785, "grad_norm": 0.23240379989147186, "learning_rate": 0.0002382962791661023, "loss": 2.406, "step": 477590 }, { "epoch": 0.9514854009945174, "grad_norm": 0.2222445011138916, "learning_rate": 0.00023818275635638077, "loss": 2.407, "step": 477600 }, { "epoch": 0.9515053232181563, "grad_norm": 0.23061102628707886, "learning_rate": 0.00023806924086103764, "loss": 2.4356, "step": 477610 }, { "epoch": 0.9515252454417953, "grad_norm": 0.22478075325489044, "learning_rate": 0.00023795573267865945, "loss": 2.4069, "step": 477620 }, { "epoch": 0.9515451676654342, "grad_norm": 0.24939532577991486, "learning_rate": 0.00023784223180783282, "loss": 2.4134, "step": 477630 }, { "epoch": 0.9515650898890731, "grad_norm": 0.2140139490365982, "learning_rate": 0.0002377287382471449, "loss": 2.4079, "step": 477640 }, { "epoch": 0.951585012112712, "grad_norm": 0.2364858090877533, "learning_rate": 0.00023761525199518375, "loss": 2.4104, "step": 477650 }, { "epoch": 0.9516049343363508, "grad_norm": 0.22706669569015503, "learning_rate": 0.00023750177305053734, "loss": 2.4102, "step": 477660 }, { "epoch": 0.9516248565599899, "grad_norm": 0.23068252205848694, "learning_rate": 0.00023738830141179434, "loss": 2.4141, "step": 477670 }, { "epoch": 0.9516447787836287, "grad_norm": 0.23546317219734192, "learning_rate": 0.00023727483707754372, "loss": 2.3997, "step": 477680 }, { "epoch": 0.9516647010072676, "grad_norm": 0.21423223614692688, "learning_rate": 0.00023716138004637523, "loss": 2.4242, "step": 477690 }, { "epoch": 0.9516846232309065, "grad_norm": 0.22260069847106934, "learning_rate": 0.00023704793031687887, "loss": 2.4189, "step": 477700 }, { "epoch": 0.9517045454545454, "grad_norm": 0.21945686638355255, "learning_rate": 0.00023693448788764517, "loss": 2.4067, "step": 477710 }, { "epoch": 0.9517244676781844, "grad_norm": 0.2334023267030716, "learning_rate": 0.00023682105275726474, "loss": 2.4154, "step": 477720 }, { "epoch": 0.9517443899018233, "grad_norm": 0.2167283147573471, "learning_rate": 0.00023670762492432917, "loss": 2.3978, "step": 477730 }, { "epoch": 0.9517643121254622, "grad_norm": 0.23288579285144806, "learning_rate": 0.00023659420438743028, "loss": 2.406, "step": 477740 }, { "epoch": 0.9517842343491011, "grad_norm": 0.24941366910934448, "learning_rate": 0.0002364807911451603, "loss": 2.4044, "step": 477750 }, { "epoch": 0.95180415657274, "grad_norm": 0.2322673499584198, "learning_rate": 0.0002363673851961119, "loss": 2.4219, "step": 477760 }, { "epoch": 0.951824078796379, "grad_norm": 0.22502344846725464, "learning_rate": 0.0002362539865388782, "loss": 2.4034, "step": 477770 }, { "epoch": 0.9518440010200179, "grad_norm": 0.226438507437706, "learning_rate": 0.00023614059517205278, "loss": 2.3987, "step": 477780 }, { "epoch": 0.9518639232436568, "grad_norm": 0.23473374545574188, "learning_rate": 0.00023602721109423008, "loss": 2.4051, "step": 477790 }, { "epoch": 0.9518838454672957, "grad_norm": 0.22463548183441162, "learning_rate": 0.00023591383430400438, "loss": 2.3811, "step": 477800 }, { "epoch": 0.9519037676909345, "grad_norm": 0.28037402033805847, "learning_rate": 0.00023580046479997052, "loss": 2.4157, "step": 477810 }, { "epoch": 0.9519236899145735, "grad_norm": 0.21666568517684937, "learning_rate": 0.00023568710258072413, "loss": 2.4105, "step": 477820 }, { "epoch": 0.9519436121382124, "grad_norm": 0.2905028164386749, "learning_rate": 0.00023557374764486116, "loss": 2.395, "step": 477830 }, { "epoch": 0.9519635343618513, "grad_norm": 0.22938884794712067, "learning_rate": 0.0002354603999909779, "loss": 2.3977, "step": 477840 }, { "epoch": 0.9519834565854902, "grad_norm": 0.21953853964805603, "learning_rate": 0.000235347059617671, "loss": 2.4063, "step": 477850 }, { "epoch": 0.9520033788091291, "grad_norm": 0.24371497333049774, "learning_rate": 0.0002352337265235376, "loss": 2.4161, "step": 477860 }, { "epoch": 0.9520233010327681, "grad_norm": 0.21920688450336456, "learning_rate": 0.0002351204007071759, "loss": 2.3966, "step": 477870 }, { "epoch": 0.952043223256407, "grad_norm": 0.23466691374778748, "learning_rate": 0.00023500708216718326, "loss": 2.4151, "step": 477880 }, { "epoch": 0.9520631454800459, "grad_norm": 0.22863425314426422, "learning_rate": 0.00023489377090215902, "loss": 2.3986, "step": 477890 }, { "epoch": 0.9520830677036848, "grad_norm": 0.2069091498851776, "learning_rate": 0.00023478046691070164, "loss": 2.4012, "step": 477900 }, { "epoch": 0.9521029899273237, "grad_norm": 0.21036116778850555, "learning_rate": 0.00023466717019141116, "loss": 2.4092, "step": 477910 }, { "epoch": 0.9521229121509627, "grad_norm": 0.2769741714000702, "learning_rate": 0.00023455388074288707, "loss": 2.4091, "step": 477920 }, { "epoch": 0.9521428343746016, "grad_norm": 0.23121555149555206, "learning_rate": 0.00023444059856373011, "loss": 2.408, "step": 477930 }, { "epoch": 0.9521627565982405, "grad_norm": 0.23189996182918549, "learning_rate": 0.00023432732365254072, "loss": 2.3992, "step": 477940 }, { "epoch": 0.9521826788218793, "grad_norm": 0.23564499616622925, "learning_rate": 0.00023421405600792045, "loss": 2.4102, "step": 477950 }, { "epoch": 0.9522026010455183, "grad_norm": 0.218624085187912, "learning_rate": 0.00023410079562847087, "loss": 2.4226, "step": 477960 }, { "epoch": 0.9522225232691572, "grad_norm": 0.21394333243370056, "learning_rate": 0.0002339875425127942, "loss": 2.4179, "step": 477970 }, { "epoch": 0.9522424454927961, "grad_norm": 0.22162683308124542, "learning_rate": 0.0002338742966594931, "loss": 2.4055, "step": 477980 }, { "epoch": 0.952262367716435, "grad_norm": 0.23244798183441162, "learning_rate": 0.00023376105806717073, "loss": 2.4117, "step": 477990 }, { "epoch": 0.9522822899400739, "grad_norm": 0.21689298748970032, "learning_rate": 0.00023364782673443064, "loss": 2.4287, "step": 478000 }, { "epoch": 0.9523022121637129, "grad_norm": 0.2265092432498932, "learning_rate": 0.00023353460265987657, "loss": 2.4073, "step": 478010 }, { "epoch": 0.9523221343873518, "grad_norm": 0.21763750910758972, "learning_rate": 0.00023342138584211326, "loss": 2.4192, "step": 478020 }, { "epoch": 0.9523420566109907, "grad_norm": 0.24838890135288239, "learning_rate": 0.0002333081762797451, "loss": 2.3999, "step": 478030 }, { "epoch": 0.9523619788346296, "grad_norm": 0.21084342896938324, "learning_rate": 0.0002331949739713779, "loss": 2.403, "step": 478040 }, { "epoch": 0.9523819010582685, "grad_norm": 0.20916074514389038, "learning_rate": 0.00023308177891561723, "loss": 2.4169, "step": 478050 }, { "epoch": 0.9524018232819075, "grad_norm": 0.21360154449939728, "learning_rate": 0.00023296859111106928, "loss": 2.3952, "step": 478060 }, { "epoch": 0.9524217455055464, "grad_norm": 0.21417869627475739, "learning_rate": 0.00023285541055634052, "loss": 2.4049, "step": 478070 }, { "epoch": 0.9524416677291853, "grad_norm": 0.23797567188739777, "learning_rate": 0.0002327422372500383, "loss": 2.4081, "step": 478080 }, { "epoch": 0.9524615899528242, "grad_norm": 0.21862494945526123, "learning_rate": 0.00023262907119077014, "loss": 2.4015, "step": 478090 }, { "epoch": 0.952481512176463, "grad_norm": 0.24772794544696808, "learning_rate": 0.00023251591237714387, "loss": 2.4131, "step": 478100 }, { "epoch": 0.952501434400102, "grad_norm": 0.23665568232536316, "learning_rate": 0.00023240276080776812, "loss": 2.4067, "step": 478110 }, { "epoch": 0.9525213566237409, "grad_norm": 0.23423761129379272, "learning_rate": 0.0002322896164812518, "loss": 2.4123, "step": 478120 }, { "epoch": 0.9525412788473798, "grad_norm": 0.22556403279304504, "learning_rate": 0.000232176479396204, "loss": 2.4119, "step": 478130 }, { "epoch": 0.9525612010710187, "grad_norm": 0.22790725529193878, "learning_rate": 0.00023206334955123476, "loss": 2.4119, "step": 478140 }, { "epoch": 0.9525811232946576, "grad_norm": 0.21671070158481598, "learning_rate": 0.00023195022694495404, "loss": 2.4196, "step": 478150 }, { "epoch": 0.9526010455182966, "grad_norm": 0.225632905960083, "learning_rate": 0.00023183711157597254, "loss": 2.4088, "step": 478160 }, { "epoch": 0.9526209677419355, "grad_norm": 0.23545128107070923, "learning_rate": 0.00023172400344290156, "loss": 2.4085, "step": 478170 }, { "epoch": 0.9526408899655744, "grad_norm": 0.22314968705177307, "learning_rate": 0.00023161090254435223, "loss": 2.3665, "step": 478180 }, { "epoch": 0.9526608121892133, "grad_norm": 0.22512397170066833, "learning_rate": 0.00023149780887893724, "loss": 2.4125, "step": 478190 }, { "epoch": 0.9526807344128522, "grad_norm": 0.20945791900157928, "learning_rate": 0.00023138472244526854, "loss": 2.4018, "step": 478200 }, { "epoch": 0.9527006566364912, "grad_norm": 0.206638365983963, "learning_rate": 0.00023127164324195903, "loss": 2.4052, "step": 478210 }, { "epoch": 0.9527205788601301, "grad_norm": 0.23159651458263397, "learning_rate": 0.00023115857126762207, "loss": 2.402, "step": 478220 }, { "epoch": 0.952740501083769, "grad_norm": 0.23807652294635773, "learning_rate": 0.00023104550652087164, "loss": 2.3993, "step": 478230 }, { "epoch": 0.9527604233074078, "grad_norm": 0.2236979752779007, "learning_rate": 0.0002309324490003215, "loss": 2.4103, "step": 478240 }, { "epoch": 0.9527803455310468, "grad_norm": 0.23207205533981323, "learning_rate": 0.0002308193987045868, "loss": 2.4064, "step": 478250 }, { "epoch": 0.9528002677546857, "grad_norm": 0.21109050512313843, "learning_rate": 0.00023070635563228237, "loss": 2.3951, "step": 478260 }, { "epoch": 0.9528201899783246, "grad_norm": 0.25950703024864197, "learning_rate": 0.0002305933197820238, "loss": 2.4059, "step": 478270 }, { "epoch": 0.9528401122019635, "grad_norm": 0.2625035345554352, "learning_rate": 0.00023048029115242685, "loss": 2.3922, "step": 478280 }, { "epoch": 0.9528600344256024, "grad_norm": 0.25304022431373596, "learning_rate": 0.0002303672697421082, "loss": 2.41, "step": 478290 }, { "epoch": 0.9528799566492414, "grad_norm": 0.2328627109527588, "learning_rate": 0.00023025425554968492, "loss": 2.4089, "step": 478300 }, { "epoch": 0.9528998788728803, "grad_norm": 0.2232821136713028, "learning_rate": 0.00023014124857377395, "loss": 2.4069, "step": 478310 }, { "epoch": 0.9529198010965192, "grad_norm": 0.24704045057296753, "learning_rate": 0.000230028248812993, "loss": 2.3934, "step": 478320 }, { "epoch": 0.9529397233201581, "grad_norm": 0.22791332006454468, "learning_rate": 0.00022991525626596054, "loss": 2.4104, "step": 478330 }, { "epoch": 0.952959645543797, "grad_norm": 0.24192072451114655, "learning_rate": 0.000229802270931295, "loss": 2.3997, "step": 478340 }, { "epoch": 0.952979567767436, "grad_norm": 0.22505033016204834, "learning_rate": 0.00022968929280761574, "loss": 2.4079, "step": 478350 }, { "epoch": 0.9529994899910749, "grad_norm": 0.22448857128620148, "learning_rate": 0.00022957632189354182, "loss": 2.4111, "step": 478360 }, { "epoch": 0.9530194122147138, "grad_norm": 0.23989826440811157, "learning_rate": 0.0002294633581876935, "loss": 2.4152, "step": 478370 }, { "epoch": 0.9530393344383526, "grad_norm": 0.23158691823482513, "learning_rate": 0.00022935040168869093, "loss": 2.3863, "step": 478380 }, { "epoch": 0.9530592566619915, "grad_norm": 0.2632333040237427, "learning_rate": 0.00022923745239515525, "loss": 2.3939, "step": 478390 }, { "epoch": 0.9530791788856305, "grad_norm": 0.25594761967658997, "learning_rate": 0.00022912451030570759, "loss": 2.4058, "step": 478400 }, { "epoch": 0.9530991011092694, "grad_norm": 0.224770650267601, "learning_rate": 0.00022901157541896965, "loss": 2.3999, "step": 478410 }, { "epoch": 0.9531190233329083, "grad_norm": 0.24329718947410583, "learning_rate": 0.0002288986477335635, "loss": 2.4028, "step": 478420 }, { "epoch": 0.9531389455565472, "grad_norm": 0.23611415922641754, "learning_rate": 0.00022878572724811198, "loss": 2.3951, "step": 478430 }, { "epoch": 0.9531588677801861, "grad_norm": 0.2206447720527649, "learning_rate": 0.00022867281396123773, "loss": 2.4003, "step": 478440 }, { "epoch": 0.9531787900038251, "grad_norm": 0.2273123562335968, "learning_rate": 0.00022855990787156455, "loss": 2.4179, "step": 478450 }, { "epoch": 0.953198712227464, "grad_norm": 0.23682564496994019, "learning_rate": 0.0002284470089777162, "loss": 2.3971, "step": 478460 }, { "epoch": 0.9532186344511029, "grad_norm": 0.21738006174564362, "learning_rate": 0.0002283341172783171, "loss": 2.3994, "step": 478470 }, { "epoch": 0.9532385566747418, "grad_norm": 0.21410122513771057, "learning_rate": 0.00022822123277199192, "loss": 2.3975, "step": 478480 }, { "epoch": 0.9532584788983807, "grad_norm": 0.21867740154266357, "learning_rate": 0.00022810835545736597, "loss": 2.414, "step": 478490 }, { "epoch": 0.9532784011220197, "grad_norm": 0.24046435952186584, "learning_rate": 0.000227995485333065, "loss": 2.4128, "step": 478500 }, { "epoch": 0.9532983233456586, "grad_norm": 0.22883233428001404, "learning_rate": 0.00022788262239771485, "loss": 2.4118, "step": 478510 }, { "epoch": 0.9533182455692975, "grad_norm": 0.2101719230413437, "learning_rate": 0.00022776976664994232, "loss": 2.4076, "step": 478520 }, { "epoch": 0.9533381677929363, "grad_norm": 0.21278659999370575, "learning_rate": 0.00022765691808837409, "loss": 2.3949, "step": 478530 }, { "epoch": 0.9533580900165753, "grad_norm": 0.23314884305000305, "learning_rate": 0.0002275440767116379, "loss": 2.4057, "step": 478540 }, { "epoch": 0.9533780122402142, "grad_norm": 0.2223755419254303, "learning_rate": 0.00022743124251836157, "loss": 2.4069, "step": 478550 }, { "epoch": 0.9533979344638531, "grad_norm": 0.21967381238937378, "learning_rate": 0.00022731841550717303, "loss": 2.4007, "step": 478560 }, { "epoch": 0.953417856687492, "grad_norm": 0.23191507160663605, "learning_rate": 0.00022720559567670118, "loss": 2.4087, "step": 478570 }, { "epoch": 0.9534377789111309, "grad_norm": 0.2403297871351242, "learning_rate": 0.00022709278302557513, "loss": 2.3908, "step": 478580 }, { "epoch": 0.9534577011347699, "grad_norm": 0.22363242506980896, "learning_rate": 0.00022697997755242484, "loss": 2.4095, "step": 478590 }, { "epoch": 0.9534776233584088, "grad_norm": 0.2459482103586197, "learning_rate": 0.00022686717925587984, "loss": 2.4012, "step": 478600 }, { "epoch": 0.9534975455820477, "grad_norm": 0.23594631254673004, "learning_rate": 0.00022675438813457083, "loss": 2.405, "step": 478610 }, { "epoch": 0.9535174678056866, "grad_norm": 0.25796395540237427, "learning_rate": 0.00022664160418712888, "loss": 2.403, "step": 478620 }, { "epoch": 0.9535373900293255, "grad_norm": 0.2498280256986618, "learning_rate": 0.00022652882741218484, "loss": 2.4073, "step": 478630 }, { "epoch": 0.9535573122529645, "grad_norm": 0.24038921296596527, "learning_rate": 0.00022641605780837095, "loss": 2.4179, "step": 478640 }, { "epoch": 0.9535772344766034, "grad_norm": 0.25751832127571106, "learning_rate": 0.00022630329537431915, "loss": 2.3894, "step": 478650 }, { "epoch": 0.9535971567002423, "grad_norm": 0.2288849651813507, "learning_rate": 0.00022619054010866214, "loss": 2.4005, "step": 478660 }, { "epoch": 0.9536170789238811, "grad_norm": 0.22787980735301971, "learning_rate": 0.00022607779201003298, "loss": 2.3846, "step": 478670 }, { "epoch": 0.95363700114752, "grad_norm": 0.22338810563087463, "learning_rate": 0.00022596505107706522, "loss": 2.4129, "step": 478680 }, { "epoch": 0.953656923371159, "grad_norm": 0.2362843006849289, "learning_rate": 0.0002258523173083926, "loss": 2.4102, "step": 478690 }, { "epoch": 0.9536768455947979, "grad_norm": 0.402589350938797, "learning_rate": 0.00022573959070265004, "loss": 2.4002, "step": 478700 }, { "epoch": 0.9536967678184368, "grad_norm": 0.24242213368415833, "learning_rate": 0.0002256268712584717, "loss": 2.4135, "step": 478710 }, { "epoch": 0.9537166900420757, "grad_norm": 0.5741501450538635, "learning_rate": 0.00022551415897449336, "loss": 2.3954, "step": 478720 }, { "epoch": 0.9537366122657146, "grad_norm": 0.21120500564575195, "learning_rate": 0.0002254014538493503, "loss": 2.434, "step": 478730 }, { "epoch": 0.9537565344893536, "grad_norm": 0.24550259113311768, "learning_rate": 0.00022528875588167873, "loss": 2.4186, "step": 478740 }, { "epoch": 0.9537764567129925, "grad_norm": 0.22408592700958252, "learning_rate": 0.00022517606507011512, "loss": 2.4062, "step": 478750 }, { "epoch": 0.9537963789366314, "grad_norm": 0.22951039671897888, "learning_rate": 0.00022506338141329674, "loss": 2.4005, "step": 478760 }, { "epoch": 0.9538163011602703, "grad_norm": 0.2358432114124298, "learning_rate": 0.00022495070490986092, "loss": 2.4167, "step": 478770 }, { "epoch": 0.9538362233839092, "grad_norm": 0.2240968942642212, "learning_rate": 0.00022483803555844518, "loss": 2.4, "step": 478780 }, { "epoch": 0.9538561456075482, "grad_norm": 0.25687888264656067, "learning_rate": 0.00022472537335768795, "loss": 2.3991, "step": 478790 }, { "epoch": 0.9538760678311871, "grad_norm": 0.2295917123556137, "learning_rate": 0.0002246127183062283, "loss": 2.4184, "step": 478800 }, { "epoch": 0.953895990054826, "grad_norm": 0.24367286264896393, "learning_rate": 0.00022450007040270491, "loss": 2.3962, "step": 478810 }, { "epoch": 0.9539159122784648, "grad_norm": 0.23190699517726898, "learning_rate": 0.00022438742964575755, "loss": 2.4084, "step": 478820 }, { "epoch": 0.9539358345021038, "grad_norm": 0.22826965153217316, "learning_rate": 0.0002242747960340259, "loss": 2.4075, "step": 478830 }, { "epoch": 0.9539557567257427, "grad_norm": 0.21636825799942017, "learning_rate": 0.0002241621695661509, "loss": 2.3963, "step": 478840 }, { "epoch": 0.9539756789493816, "grad_norm": 0.23491425812244415, "learning_rate": 0.00022404955024077312, "loss": 2.4008, "step": 478850 }, { "epoch": 0.9539956011730205, "grad_norm": 0.8529599905014038, "learning_rate": 0.00022393693805653392, "loss": 2.4026, "step": 478860 }, { "epoch": 0.9540155233966594, "grad_norm": 0.2388000339269638, "learning_rate": 0.00022382433301207505, "loss": 2.4074, "step": 478870 }, { "epoch": 0.9540354456202984, "grad_norm": 0.242977112531662, "learning_rate": 0.0002237117351060387, "loss": 2.4077, "step": 478880 }, { "epoch": 0.9540553678439373, "grad_norm": 0.24294859170913696, "learning_rate": 0.00022359914433706706, "loss": 2.4155, "step": 478890 }, { "epoch": 0.9540752900675762, "grad_norm": 0.24048860371112823, "learning_rate": 0.00022348656070380368, "loss": 2.3837, "step": 478900 }, { "epoch": 0.9540952122912151, "grad_norm": 0.20281799137592316, "learning_rate": 0.00022337398420489187, "loss": 2.3998, "step": 478910 }, { "epoch": 0.954115134514854, "grad_norm": 0.21091468632221222, "learning_rate": 0.00022326141483897533, "loss": 2.4124, "step": 478920 }, { "epoch": 0.954135056738493, "grad_norm": 0.22261999547481537, "learning_rate": 0.0002231488526046983, "loss": 2.4256, "step": 478930 }, { "epoch": 0.9541549789621319, "grad_norm": 0.21914182603359222, "learning_rate": 0.00022303629750070587, "loss": 2.3949, "step": 478940 }, { "epoch": 0.9541749011857708, "grad_norm": 0.2154710590839386, "learning_rate": 0.00022292374952564287, "loss": 2.4027, "step": 478950 }, { "epoch": 0.9541948234094096, "grad_norm": 0.24442248046398163, "learning_rate": 0.00022281120867815508, "loss": 2.4089, "step": 478960 }, { "epoch": 0.9542147456330485, "grad_norm": 0.24231919646263123, "learning_rate": 0.00022269867495688844, "loss": 2.4174, "step": 478970 }, { "epoch": 0.9542346678566875, "grad_norm": 0.2463129311800003, "learning_rate": 0.00022258614836048962, "loss": 2.3977, "step": 478980 }, { "epoch": 0.9542545900803264, "grad_norm": 0.2269713282585144, "learning_rate": 0.0002224736288876048, "loss": 2.3907, "step": 478990 }, { "epoch": 0.9542745123039653, "grad_norm": 0.24418960511684418, "learning_rate": 0.0002223611165368822, "loss": 2.406, "step": 479000 }, { "epoch": 0.9542944345276042, "grad_norm": 0.22755539417266846, "learning_rate": 0.00022224861130696905, "loss": 2.4105, "step": 479010 }, { "epoch": 0.9543143567512431, "grad_norm": 0.22545531392097473, "learning_rate": 0.0002221361131965136, "loss": 2.3917, "step": 479020 }, { "epoch": 0.9543342789748821, "grad_norm": 0.22321999073028564, "learning_rate": 0.0002220236222041643, "loss": 2.4058, "step": 479030 }, { "epoch": 0.954354201198521, "grad_norm": 0.2255503535270691, "learning_rate": 0.00022191113832857056, "loss": 2.3977, "step": 479040 }, { "epoch": 0.9543741234221599, "grad_norm": 0.22715574502944946, "learning_rate": 0.00022179866156838134, "loss": 2.3985, "step": 479050 }, { "epoch": 0.9543940456457988, "grad_norm": 0.23058409988880157, "learning_rate": 0.0002216861919222468, "loss": 2.4227, "step": 479060 }, { "epoch": 0.9544139678694377, "grad_norm": 0.22689379751682281, "learning_rate": 0.00022157372938881714, "loss": 2.3978, "step": 479070 }, { "epoch": 0.9544338900930767, "grad_norm": 0.22589661180973053, "learning_rate": 0.0002214612739667432, "loss": 2.3974, "step": 479080 }, { "epoch": 0.9544538123167156, "grad_norm": 0.22469644248485565, "learning_rate": 0.00022134882565467607, "loss": 2.4106, "step": 479090 }, { "epoch": 0.9544737345403544, "grad_norm": 0.29462331533432007, "learning_rate": 0.00022123638445126727, "loss": 2.4092, "step": 479100 }, { "epoch": 0.9544936567639933, "grad_norm": 0.24598635733127594, "learning_rate": 0.00022112395035516874, "loss": 2.4063, "step": 479110 }, { "epoch": 0.9545135789876323, "grad_norm": 0.27626827359199524, "learning_rate": 0.00022101152336503316, "loss": 2.4091, "step": 479120 }, { "epoch": 0.9545335012112712, "grad_norm": 0.23384380340576172, "learning_rate": 0.00022089910347951314, "loss": 2.4016, "step": 479130 }, { "epoch": 0.9545534234349101, "grad_norm": 0.23361510038375854, "learning_rate": 0.0002207866906972622, "loss": 2.4122, "step": 479140 }, { "epoch": 0.954573345658549, "grad_norm": 0.25844722986221313, "learning_rate": 0.00022067428501693366, "loss": 2.4026, "step": 479150 }, { "epoch": 0.9545932678821879, "grad_norm": 0.2310975044965744, "learning_rate": 0.00022056188643718213, "loss": 2.3889, "step": 479160 }, { "epoch": 0.9546131901058269, "grad_norm": 0.234217569231987, "learning_rate": 0.0002204494949566618, "loss": 2.4005, "step": 479170 }, { "epoch": 0.9546331123294658, "grad_norm": 0.2215733677148819, "learning_rate": 0.00022033711057402794, "loss": 2.4057, "step": 479180 }, { "epoch": 0.9546530345531047, "grad_norm": 0.2235473245382309, "learning_rate": 0.00022022473328793546, "loss": 2.4045, "step": 479190 }, { "epoch": 0.9546729567767436, "grad_norm": 0.24347510933876038, "learning_rate": 0.00022011236309704075, "loss": 2.407, "step": 479200 }, { "epoch": 0.9546928790003825, "grad_norm": 0.2311682254076004, "learning_rate": 0.00021999999999999998, "loss": 2.4193, "step": 479210 }, { "epoch": 0.9547128012240215, "grad_norm": 0.21784593164920807, "learning_rate": 0.00021988764399546957, "loss": 2.4058, "step": 479220 }, { "epoch": 0.9547327234476604, "grad_norm": 0.26059675216674805, "learning_rate": 0.0002197752950821068, "loss": 2.4029, "step": 479230 }, { "epoch": 0.9547526456712992, "grad_norm": 0.21918699145317078, "learning_rate": 0.000219662953258569, "loss": 2.388, "step": 479240 }, { "epoch": 0.9547725678949381, "grad_norm": 0.2205524444580078, "learning_rate": 0.00021955061852351455, "loss": 2.4013, "step": 479250 }, { "epoch": 0.954792490118577, "grad_norm": 0.23527413606643677, "learning_rate": 0.0002194382908756012, "loss": 2.418, "step": 479260 }, { "epoch": 0.954812412342216, "grad_norm": 0.23256033658981323, "learning_rate": 0.00021932597031348844, "loss": 2.4127, "step": 479270 }, { "epoch": 0.9548323345658549, "grad_norm": 0.21821770071983337, "learning_rate": 0.00021921365683583494, "loss": 2.4045, "step": 479280 }, { "epoch": 0.9548522567894938, "grad_norm": 0.2606986165046692, "learning_rate": 0.0002191013504413004, "loss": 2.397, "step": 479290 }, { "epoch": 0.9548721790131327, "grad_norm": 0.20457354187965393, "learning_rate": 0.00021898905112854506, "loss": 2.4032, "step": 479300 }, { "epoch": 0.9548921012367716, "grad_norm": 0.25381842255592346, "learning_rate": 0.00021887675889622948, "loss": 2.3953, "step": 479310 }, { "epoch": 0.9549120234604106, "grad_norm": 0.22526893019676208, "learning_rate": 0.00021876447374301455, "loss": 2.4093, "step": 479320 }, { "epoch": 0.9549319456840495, "grad_norm": 0.22019271552562714, "learning_rate": 0.00021865219566756134, "loss": 2.3866, "step": 479330 }, { "epoch": 0.9549518679076884, "grad_norm": 0.233178973197937, "learning_rate": 0.000218539924668532, "loss": 2.4177, "step": 479340 }, { "epoch": 0.9549717901313273, "grad_norm": 0.2525941729545593, "learning_rate": 0.00021842766074458832, "loss": 2.4098, "step": 479350 }, { "epoch": 0.9549917123549662, "grad_norm": 0.231408029794693, "learning_rate": 0.0002183154038943931, "loss": 2.4077, "step": 479360 }, { "epoch": 0.9550116345786052, "grad_norm": 0.22662925720214844, "learning_rate": 0.0002182031541166092, "loss": 2.4179, "step": 479370 }, { "epoch": 0.955031556802244, "grad_norm": 0.2142130732536316, "learning_rate": 0.00021809091140990034, "loss": 2.3941, "step": 479380 }, { "epoch": 0.9550514790258829, "grad_norm": 0.2348579615354538, "learning_rate": 0.00021797867577293007, "loss": 2.4039, "step": 479390 }, { "epoch": 0.9550714012495218, "grad_norm": 0.22311006486415863, "learning_rate": 0.00021786644720436278, "loss": 2.4138, "step": 479400 }, { "epoch": 0.9550913234731607, "grad_norm": 0.22431273758411407, "learning_rate": 0.0002177542257028633, "loss": 2.4017, "step": 479410 }, { "epoch": 0.9551112456967997, "grad_norm": 0.2305484116077423, "learning_rate": 0.0002176420112670967, "loss": 2.4073, "step": 479420 }, { "epoch": 0.9551311679204386, "grad_norm": 0.23170414566993713, "learning_rate": 0.00021752980389572852, "loss": 2.4201, "step": 479430 }, { "epoch": 0.9551510901440775, "grad_norm": 0.2230503261089325, "learning_rate": 0.0002174176035874247, "loss": 2.4126, "step": 479440 }, { "epoch": 0.9551710123677164, "grad_norm": 0.22243523597717285, "learning_rate": 0.00021730541034085138, "loss": 2.4126, "step": 479450 }, { "epoch": 0.9551909345913554, "grad_norm": 0.23996300995349884, "learning_rate": 0.0002171932241546759, "loss": 2.3924, "step": 479460 }, { "epoch": 0.9552108568149943, "grad_norm": 0.23893745243549347, "learning_rate": 0.00021708104502756486, "loss": 2.4116, "step": 479470 }, { "epoch": 0.9552307790386332, "grad_norm": 0.2283181995153427, "learning_rate": 0.00021696887295818645, "loss": 2.4052, "step": 479480 }, { "epoch": 0.9552507012622721, "grad_norm": 0.22050784528255463, "learning_rate": 0.0002168567079452084, "loss": 2.3971, "step": 479490 }, { "epoch": 0.955270623485911, "grad_norm": 0.23602810502052307, "learning_rate": 0.00021674454998729908, "loss": 2.4163, "step": 479500 }, { "epoch": 0.95529054570955, "grad_norm": 0.226209357380867, "learning_rate": 0.00021663239908312781, "loss": 2.4062, "step": 479510 }, { "epoch": 0.9553104679331889, "grad_norm": 0.23090724647045135, "learning_rate": 0.00021652025523136365, "loss": 2.4232, "step": 479520 }, { "epoch": 0.9553303901568277, "grad_norm": 0.2302345484495163, "learning_rate": 0.0002164081184306763, "loss": 2.4123, "step": 479530 }, { "epoch": 0.9553503123804666, "grad_norm": 0.21919475495815277, "learning_rate": 0.0002162959886797362, "loss": 2.3949, "step": 479540 }, { "epoch": 0.9553702346041055, "grad_norm": 0.23633645474910736, "learning_rate": 0.00021618386597721352, "loss": 2.415, "step": 479550 }, { "epoch": 0.9553901568277445, "grad_norm": 0.228633850812912, "learning_rate": 0.0002160717503217793, "loss": 2.3947, "step": 479560 }, { "epoch": 0.9554100790513834, "grad_norm": 0.21419286727905273, "learning_rate": 0.00021595964171210524, "loss": 2.3991, "step": 479570 }, { "epoch": 0.9554300012750223, "grad_norm": 0.23654243350028992, "learning_rate": 0.00021584754014686292, "loss": 2.3921, "step": 479580 }, { "epoch": 0.9554499234986612, "grad_norm": 0.2509324252605438, "learning_rate": 0.00021573544562472448, "loss": 2.4016, "step": 479590 }, { "epoch": 0.9554698457223001, "grad_norm": 0.22357626259326935, "learning_rate": 0.00021562335814436295, "loss": 2.4059, "step": 479600 }, { "epoch": 0.9554897679459391, "grad_norm": 0.2248293161392212, "learning_rate": 0.00021551127770445122, "loss": 2.4019, "step": 479610 }, { "epoch": 0.955509690169578, "grad_norm": 0.2551861107349396, "learning_rate": 0.0002153992043036628, "loss": 2.4006, "step": 479620 }, { "epoch": 0.9555296123932169, "grad_norm": 0.22497613728046417, "learning_rate": 0.00021528713794067135, "loss": 2.399, "step": 479630 }, { "epoch": 0.9555495346168558, "grad_norm": 0.33829641342163086, "learning_rate": 0.00021517507861415152, "loss": 2.3955, "step": 479640 }, { "epoch": 0.9555694568404947, "grad_norm": 0.23844237625598907, "learning_rate": 0.00021506302632277795, "loss": 2.4091, "step": 479650 }, { "epoch": 0.9555893790641337, "grad_norm": 0.23403654992580414, "learning_rate": 0.00021495098106522571, "loss": 2.3947, "step": 479660 }, { "epoch": 0.9556093012877726, "grad_norm": 0.2324288785457611, "learning_rate": 0.00021483894284017046, "loss": 2.3797, "step": 479670 }, { "epoch": 0.9556292235114114, "grad_norm": 0.23186880350112915, "learning_rate": 0.0002147269116462882, "loss": 2.3938, "step": 479680 }, { "epoch": 0.9556491457350503, "grad_norm": 0.23571720719337463, "learning_rate": 0.00021461488748225532, "loss": 2.3936, "step": 479690 }, { "epoch": 0.9556690679586892, "grad_norm": 0.21970100700855255, "learning_rate": 0.00021450287034674843, "loss": 2.4083, "step": 479700 }, { "epoch": 0.9556889901823282, "grad_norm": 0.22766803205013275, "learning_rate": 0.000214390860238445, "loss": 2.4152, "step": 479710 }, { "epoch": 0.9557089124059671, "grad_norm": 0.21278709173202515, "learning_rate": 0.0002142788571560228, "loss": 2.3947, "step": 479720 }, { "epoch": 0.955728834629606, "grad_norm": 0.22249282896518707, "learning_rate": 0.00021416686109815953, "loss": 2.4013, "step": 479730 }, { "epoch": 0.9557487568532449, "grad_norm": 0.2446334958076477, "learning_rate": 0.00021405487206353402, "loss": 2.4121, "step": 479740 }, { "epoch": 0.9557686790768839, "grad_norm": 0.23170402646064758, "learning_rate": 0.0002139428900508249, "loss": 2.4177, "step": 479750 }, { "epoch": 0.9557886013005228, "grad_norm": 0.22289130091667175, "learning_rate": 0.00021383091505871145, "loss": 2.3955, "step": 479760 }, { "epoch": 0.9558085235241617, "grad_norm": 0.23862390220165253, "learning_rate": 0.0002137189470858736, "loss": 2.4044, "step": 479770 }, { "epoch": 0.9558284457478006, "grad_norm": 0.21891061961650848, "learning_rate": 0.0002136069861309915, "loss": 2.4061, "step": 479780 }, { "epoch": 0.9558483679714395, "grad_norm": 0.5891247987747192, "learning_rate": 0.00021349503219274536, "loss": 2.4013, "step": 479790 }, { "epoch": 0.9558682901950785, "grad_norm": 0.22718194127082825, "learning_rate": 0.00021338308526981642, "loss": 2.392, "step": 479800 }, { "epoch": 0.9558882124187174, "grad_norm": 0.24155913293361664, "learning_rate": 0.000213271145360886, "loss": 2.4082, "step": 479810 }, { "epoch": 0.9559081346423562, "grad_norm": 0.26061806082725525, "learning_rate": 0.00021315921246463598, "loss": 2.4018, "step": 479820 }, { "epoch": 0.9559280568659951, "grad_norm": 0.21948009729385376, "learning_rate": 0.00021304728657974836, "loss": 2.3941, "step": 479830 }, { "epoch": 0.955947979089634, "grad_norm": 0.22519826889038086, "learning_rate": 0.00021293536770490595, "loss": 2.4187, "step": 479840 }, { "epoch": 0.955967901313273, "grad_norm": 0.2133927196264267, "learning_rate": 0.00021282345583879158, "loss": 2.4086, "step": 479850 }, { "epoch": 0.9559878235369119, "grad_norm": 0.22494959831237793, "learning_rate": 0.00021271155098008876, "loss": 2.4054, "step": 479860 }, { "epoch": 0.9560077457605508, "grad_norm": 0.21199478209018707, "learning_rate": 0.0002125996531274814, "loss": 2.4035, "step": 479870 }, { "epoch": 0.9560276679841897, "grad_norm": 0.23398593068122864, "learning_rate": 0.0002124877622796537, "loss": 2.4036, "step": 479880 }, { "epoch": 0.9560475902078286, "grad_norm": 0.2262723594903946, "learning_rate": 0.00021237587843529027, "loss": 2.41, "step": 479890 }, { "epoch": 0.9560675124314676, "grad_norm": 0.24590550363063812, "learning_rate": 0.00021226400159307635, "loss": 2.3984, "step": 479900 }, { "epoch": 0.9560874346551065, "grad_norm": 0.22885757684707642, "learning_rate": 0.00021215213175169745, "loss": 2.4137, "step": 479910 }, { "epoch": 0.9561073568787454, "grad_norm": 0.2216259390115738, "learning_rate": 0.00021204026890983928, "loss": 2.3874, "step": 479920 }, { "epoch": 0.9561272791023843, "grad_norm": 0.21499738097190857, "learning_rate": 0.00021192841306618828, "loss": 2.4059, "step": 479930 }, { "epoch": 0.9561472013260232, "grad_norm": 0.22398121654987335, "learning_rate": 0.0002118165642194312, "loss": 2.4078, "step": 479940 }, { "epoch": 0.9561671235496622, "grad_norm": 0.23149535059928894, "learning_rate": 0.0002117047223682549, "loss": 2.4128, "step": 479950 }, { "epoch": 0.956187045773301, "grad_norm": 0.21895429491996765, "learning_rate": 0.00021159288751134754, "loss": 2.4131, "step": 479960 }, { "epoch": 0.9562069679969399, "grad_norm": 0.22235742211341858, "learning_rate": 0.0002114810596473964, "loss": 2.395, "step": 479970 }, { "epoch": 0.9562268902205788, "grad_norm": 0.2449875771999359, "learning_rate": 0.0002113692387750903, "loss": 2.4068, "step": 479980 }, { "epoch": 0.9562468124442177, "grad_norm": 0.2648172676563263, "learning_rate": 0.00021125742489311783, "loss": 2.4089, "step": 479990 }, { "epoch": 0.9562667346678567, "grad_norm": 0.2572685778141022, "learning_rate": 0.0002111456180001683, "loss": 2.392, "step": 480000 }, { "epoch": 0.9562866568914956, "grad_norm": 0.23242822289466858, "learning_rate": 0.00021103381809493116, "loss": 2.4058, "step": 480010 }, { "epoch": 0.9563065791151345, "grad_norm": 0.2173372209072113, "learning_rate": 0.00021092202517609637, "loss": 2.3943, "step": 480020 }, { "epoch": 0.9563265013387734, "grad_norm": 0.22102090716362, "learning_rate": 0.00021081023924235476, "loss": 2.4145, "step": 480030 }, { "epoch": 0.9563464235624124, "grad_norm": 0.24370992183685303, "learning_rate": 0.0002106984602923967, "loss": 2.3944, "step": 480040 }, { "epoch": 0.9563663457860513, "grad_norm": 0.2545781433582306, "learning_rate": 0.0002105866883249137, "loss": 2.4051, "step": 480050 }, { "epoch": 0.9563862680096902, "grad_norm": 0.2173018604516983, "learning_rate": 0.0002104749233385972, "loss": 2.3948, "step": 480060 }, { "epoch": 0.9564061902333291, "grad_norm": 0.2184630185365677, "learning_rate": 0.00021036316533213918, "loss": 2.4023, "step": 480070 }, { "epoch": 0.956426112456968, "grad_norm": 0.21204069256782532, "learning_rate": 0.00021025141430423266, "loss": 2.4126, "step": 480080 }, { "epoch": 0.956446034680607, "grad_norm": 0.22015702724456787, "learning_rate": 0.00021013967025356982, "loss": 2.4053, "step": 480090 }, { "epoch": 0.9564659569042459, "grad_norm": 0.24926957488059998, "learning_rate": 0.00021002793317884418, "loss": 2.4035, "step": 480100 }, { "epoch": 0.9564858791278847, "grad_norm": 0.23378917574882507, "learning_rate": 0.00020991620307874958, "loss": 2.3819, "step": 480110 }, { "epoch": 0.9565058013515236, "grad_norm": 0.2347153127193451, "learning_rate": 0.00020980447995198027, "loss": 2.4097, "step": 480120 }, { "epoch": 0.9565257235751625, "grad_norm": 0.23085452616214752, "learning_rate": 0.00020969276379723034, "loss": 2.4, "step": 480130 }, { "epoch": 0.9565456457988015, "grad_norm": 0.2285050004720688, "learning_rate": 0.00020958105461319466, "loss": 2.3982, "step": 480140 }, { "epoch": 0.9565655680224404, "grad_norm": 0.23536929488182068, "learning_rate": 0.0002094693523985689, "loss": 2.4063, "step": 480150 }, { "epoch": 0.9565854902460793, "grad_norm": 0.25540831685066223, "learning_rate": 0.0002093576571520488, "loss": 2.3901, "step": 480160 }, { "epoch": 0.9566054124697182, "grad_norm": 0.23676876723766327, "learning_rate": 0.00020924596887233004, "loss": 2.4111, "step": 480170 }, { "epoch": 0.9566253346933571, "grad_norm": 0.23846134543418884, "learning_rate": 0.0002091342875581095, "loss": 2.4124, "step": 480180 }, { "epoch": 0.9566452569169961, "grad_norm": 0.260921448469162, "learning_rate": 0.00020902261320808414, "loss": 2.401, "step": 480190 }, { "epoch": 0.956665179140635, "grad_norm": 0.22178401052951813, "learning_rate": 0.00020891094582095105, "loss": 2.4044, "step": 480200 }, { "epoch": 0.9566851013642739, "grad_norm": 0.21852006018161774, "learning_rate": 0.00020879928539540814, "loss": 2.3979, "step": 480210 }, { "epoch": 0.9567050235879128, "grad_norm": 0.23190410435199738, "learning_rate": 0.00020868763193015384, "loss": 2.4019, "step": 480220 }, { "epoch": 0.9567249458115517, "grad_norm": 0.22425876557826996, "learning_rate": 0.00020857598542388622, "loss": 2.4002, "step": 480230 }, { "epoch": 0.9567448680351907, "grad_norm": 0.2205306440591812, "learning_rate": 0.0002084643458753046, "loss": 2.3898, "step": 480240 }, { "epoch": 0.9567647902588295, "grad_norm": 0.23474475741386414, "learning_rate": 0.00020835271328310822, "loss": 2.4105, "step": 480250 }, { "epoch": 0.9567847124824684, "grad_norm": 0.25266459584236145, "learning_rate": 0.00020824108764599703, "loss": 2.4105, "step": 480260 }, { "epoch": 0.9568046347061073, "grad_norm": 0.22359292209148407, "learning_rate": 0.00020812946896267093, "loss": 2.3999, "step": 480270 }, { "epoch": 0.9568245569297462, "grad_norm": 0.22807131707668304, "learning_rate": 0.00020801785723183053, "loss": 2.4041, "step": 480280 }, { "epoch": 0.9568444791533852, "grad_norm": 0.2322109490633011, "learning_rate": 0.00020790625245217708, "loss": 2.405, "step": 480290 }, { "epoch": 0.9568644013770241, "grad_norm": 0.23971323668956757, "learning_rate": 0.00020779465462241188, "loss": 2.3975, "step": 480300 }, { "epoch": 0.956884323600663, "grad_norm": 0.23035797476768494, "learning_rate": 0.00020768306374123658, "loss": 2.3926, "step": 480310 }, { "epoch": 0.9569042458243019, "grad_norm": 0.2203315794467926, "learning_rate": 0.00020757147980735358, "loss": 2.4057, "step": 480320 }, { "epoch": 0.9569241680479409, "grad_norm": 0.23042617738246918, "learning_rate": 0.00020745990281946546, "loss": 2.3968, "step": 480330 }, { "epoch": 0.9569440902715798, "grad_norm": 0.2336147278547287, "learning_rate": 0.0002073483327762753, "loss": 2.4043, "step": 480340 }, { "epoch": 0.9569640124952187, "grad_norm": 0.2378932684659958, "learning_rate": 0.0002072367696764863, "loss": 2.3981, "step": 480350 }, { "epoch": 0.9569839347188576, "grad_norm": 0.2366086095571518, "learning_rate": 0.00020712521351880242, "loss": 2.3997, "step": 480360 }, { "epoch": 0.9570038569424965, "grad_norm": 0.23095540702342987, "learning_rate": 0.00020701366430192782, "loss": 2.4045, "step": 480370 }, { "epoch": 0.9570237791661355, "grad_norm": 0.26141828298568726, "learning_rate": 0.00020690212202456725, "loss": 2.4025, "step": 480380 }, { "epoch": 0.9570437013897743, "grad_norm": 0.24234524369239807, "learning_rate": 0.0002067905866854256, "loss": 2.4055, "step": 480390 }, { "epoch": 0.9570636236134132, "grad_norm": 0.23487074673175812, "learning_rate": 0.00020667905828320832, "loss": 2.3909, "step": 480400 }, { "epoch": 0.9570835458370521, "grad_norm": 0.2374110072851181, "learning_rate": 0.00020656753681662154, "loss": 2.402, "step": 480410 }, { "epoch": 0.957103468060691, "grad_norm": 0.23059968650341034, "learning_rate": 0.00020645602228437122, "loss": 2.4142, "step": 480420 }, { "epoch": 0.95712339028433, "grad_norm": 1.1803747415542603, "learning_rate": 0.00020634451468516392, "loss": 2.3933, "step": 480430 }, { "epoch": 0.9571433125079689, "grad_norm": 0.21683581173419952, "learning_rate": 0.0002062330140177069, "loss": 2.3811, "step": 480440 }, { "epoch": 0.9571632347316078, "grad_norm": 0.23275549709796906, "learning_rate": 0.00020612152028070762, "loss": 2.3932, "step": 480450 }, { "epoch": 0.9571831569552467, "grad_norm": 0.23121453821659088, "learning_rate": 0.0002060100334728736, "loss": 2.4039, "step": 480460 }, { "epoch": 0.9572030791788856, "grad_norm": 0.2372218817472458, "learning_rate": 0.00020589855359291344, "loss": 2.4047, "step": 480470 }, { "epoch": 0.9572230014025246, "grad_norm": 0.22780278325080872, "learning_rate": 0.00020578708063953566, "loss": 2.4015, "step": 480480 }, { "epoch": 0.9572429236261635, "grad_norm": 0.2273862659931183, "learning_rate": 0.00020567561461144935, "loss": 2.4026, "step": 480490 }, { "epoch": 0.9572628458498024, "grad_norm": 0.21953095495700836, "learning_rate": 0.00020556415550736397, "loss": 2.3963, "step": 480500 }, { "epoch": 0.9572827680734413, "grad_norm": 0.32753103971481323, "learning_rate": 0.0002054527033259892, "loss": 2.3939, "step": 480510 }, { "epoch": 0.9573026902970801, "grad_norm": 0.22642695903778076, "learning_rate": 0.00020534125806603542, "loss": 2.3963, "step": 480520 }, { "epoch": 0.9573226125207192, "grad_norm": 0.22774119675159454, "learning_rate": 0.00020522981972621347, "loss": 2.4143, "step": 480530 }, { "epoch": 0.957342534744358, "grad_norm": 0.264201283454895, "learning_rate": 0.00020511838830523411, "loss": 2.3983, "step": 480540 }, { "epoch": 0.9573624569679969, "grad_norm": 0.2251235842704773, "learning_rate": 0.00020500696380180904, "loss": 2.3933, "step": 480550 }, { "epoch": 0.9573823791916358, "grad_norm": 0.2200050950050354, "learning_rate": 0.00020489554621465, "loss": 2.3991, "step": 480560 }, { "epoch": 0.9574023014152747, "grad_norm": 0.24512992799282074, "learning_rate": 0.0002047841355424691, "loss": 2.4053, "step": 480570 }, { "epoch": 0.9574222236389137, "grad_norm": 0.2119695246219635, "learning_rate": 0.00020467273178397915, "loss": 2.3988, "step": 480580 }, { "epoch": 0.9574421458625526, "grad_norm": 0.253340482711792, "learning_rate": 0.0002045613349378932, "loss": 2.398, "step": 480590 }, { "epoch": 0.9574620680861915, "grad_norm": 0.24415549635887146, "learning_rate": 0.00020444994500292467, "loss": 2.4061, "step": 480600 }, { "epoch": 0.9574819903098304, "grad_norm": 0.23803749680519104, "learning_rate": 0.00020433856197778756, "loss": 2.3963, "step": 480610 }, { "epoch": 0.9575019125334694, "grad_norm": 0.23163458704948425, "learning_rate": 0.00020422718586119592, "loss": 2.3923, "step": 480620 }, { "epoch": 0.9575218347571083, "grad_norm": 0.2293778508901596, "learning_rate": 0.0002041158166518644, "loss": 2.4008, "step": 480630 }, { "epoch": 0.9575417569807472, "grad_norm": 0.2557825744152069, "learning_rate": 0.00020400445434850823, "loss": 2.3878, "step": 480640 }, { "epoch": 0.9575616792043861, "grad_norm": 0.2363768070936203, "learning_rate": 0.00020389309894984264, "loss": 2.4022, "step": 480650 }, { "epoch": 0.957581601428025, "grad_norm": 0.2560465931892395, "learning_rate": 0.0002037817504545836, "loss": 2.3929, "step": 480660 }, { "epoch": 0.957601523651664, "grad_norm": 0.23233562707901, "learning_rate": 0.0002036704088614474, "loss": 2.4023, "step": 480670 }, { "epoch": 0.9576214458753028, "grad_norm": 0.20591787993907928, "learning_rate": 0.00020355907416915042, "loss": 2.3992, "step": 480680 }, { "epoch": 0.9576413680989417, "grad_norm": 0.21441328525543213, "learning_rate": 0.00020344774637641016, "loss": 2.4077, "step": 480690 }, { "epoch": 0.9576612903225806, "grad_norm": 0.21077027916908264, "learning_rate": 0.0002033364254819434, "loss": 2.4111, "step": 480700 }, { "epoch": 0.9576812125462195, "grad_norm": 0.23123177886009216, "learning_rate": 0.00020322511148446854, "loss": 2.4175, "step": 480710 }, { "epoch": 0.9577011347698585, "grad_norm": 0.23790276050567627, "learning_rate": 0.00020311380438270388, "loss": 2.3846, "step": 480720 }, { "epoch": 0.9577210569934974, "grad_norm": 0.22414949536323547, "learning_rate": 0.00020300250417536758, "loss": 2.4006, "step": 480730 }, { "epoch": 0.9577409792171363, "grad_norm": 0.21180309355258942, "learning_rate": 0.0002028912108611789, "loss": 2.4128, "step": 480740 }, { "epoch": 0.9577609014407752, "grad_norm": 0.22109119594097137, "learning_rate": 0.00020277992443885708, "loss": 2.4022, "step": 480750 }, { "epoch": 0.9577808236644141, "grad_norm": 0.21820248663425446, "learning_rate": 0.00020266864490712223, "loss": 2.4048, "step": 480760 }, { "epoch": 0.9578007458880531, "grad_norm": 0.2237815260887146, "learning_rate": 0.0002025573722646945, "loss": 2.4021, "step": 480770 }, { "epoch": 0.957820668111692, "grad_norm": 0.240736186504364, "learning_rate": 0.0002024461065102945, "loss": 2.4079, "step": 480780 }, { "epoch": 0.9578405903353309, "grad_norm": 0.22369006276130676, "learning_rate": 0.00020233484764264298, "loss": 2.3861, "step": 480790 }, { "epoch": 0.9578605125589698, "grad_norm": 0.2218916267156601, "learning_rate": 0.00020222359566046146, "loss": 2.3908, "step": 480800 }, { "epoch": 0.9578804347826086, "grad_norm": 0.25659018754959106, "learning_rate": 0.0002021123505624718, "loss": 2.3901, "step": 480810 }, { "epoch": 0.9579003570062476, "grad_norm": 0.23972564935684204, "learning_rate": 0.00020200111234739638, "loss": 2.4031, "step": 480820 }, { "epoch": 0.9579202792298865, "grad_norm": 0.21950331330299377, "learning_rate": 0.00020188988101395755, "loss": 2.3914, "step": 480830 }, { "epoch": 0.9579402014535254, "grad_norm": 0.2355286329984665, "learning_rate": 0.00020177865656087813, "loss": 2.3863, "step": 480840 }, { "epoch": 0.9579601236771643, "grad_norm": 0.23196633160114288, "learning_rate": 0.000201667438986882, "loss": 2.3947, "step": 480850 }, { "epoch": 0.9579800459008032, "grad_norm": 0.2733207941055298, "learning_rate": 0.00020155622829069243, "loss": 2.4041, "step": 480860 }, { "epoch": 0.9579999681244422, "grad_norm": 0.23449590802192688, "learning_rate": 0.00020144502447103397, "loss": 2.4063, "step": 480870 }, { "epoch": 0.9580198903480811, "grad_norm": 0.21885335445404053, "learning_rate": 0.00020133382752663076, "loss": 2.3967, "step": 480880 }, { "epoch": 0.95803981257172, "grad_norm": 0.2169729322195053, "learning_rate": 0.00020122263745620827, "loss": 2.3959, "step": 480890 }, { "epoch": 0.9580597347953589, "grad_norm": 0.22148312628269196, "learning_rate": 0.0002011114542584913, "loss": 2.4212, "step": 480900 }, { "epoch": 0.9580796570189979, "grad_norm": 0.23894986510276794, "learning_rate": 0.00020100027793220598, "loss": 2.3907, "step": 480910 }, { "epoch": 0.9580995792426368, "grad_norm": 0.25466737151145935, "learning_rate": 0.00020088910847607843, "loss": 2.3929, "step": 480920 }, { "epoch": 0.9581195014662757, "grad_norm": 0.23265805840492249, "learning_rate": 0.00020077794588883502, "loss": 2.3885, "step": 480930 }, { "epoch": 0.9581394236899146, "grad_norm": 0.2299344390630722, "learning_rate": 0.00020066679016920275, "loss": 2.392, "step": 480940 }, { "epoch": 0.9581593459135535, "grad_norm": 0.23195630311965942, "learning_rate": 0.00020055564131590887, "loss": 2.3981, "step": 480950 }, { "epoch": 0.9581792681371925, "grad_norm": 0.23371778428554535, "learning_rate": 0.00020044449932768126, "loss": 2.3975, "step": 480960 }, { "epoch": 0.9581991903608313, "grad_norm": 0.21874846518039703, "learning_rate": 0.00020033336420324788, "loss": 2.4136, "step": 480970 }, { "epoch": 0.9582191125844702, "grad_norm": 0.24692511558532715, "learning_rate": 0.00020022223594133725, "loss": 2.4097, "step": 480980 }, { "epoch": 0.9582390348081091, "grad_norm": 0.22674015164375305, "learning_rate": 0.0002001111145406782, "loss": 2.3913, "step": 480990 }, { "epoch": 0.958258957031748, "grad_norm": 0.26690924167633057, "learning_rate": 0.00019999999999999996, "loss": 2.3995, "step": 481000 }, { "epoch": 0.958278879255387, "grad_norm": 0.22414833307266235, "learning_rate": 0.00019988889231803243, "loss": 2.4101, "step": 481010 }, { "epoch": 0.9582988014790259, "grad_norm": 0.22528478503227234, "learning_rate": 0.00019977779149350571, "loss": 2.3932, "step": 481020 }, { "epoch": 0.9583187237026648, "grad_norm": 0.2324887365102768, "learning_rate": 0.00019966669752514998, "loss": 2.3871, "step": 481030 }, { "epoch": 0.9583386459263037, "grad_norm": 0.24751847982406616, "learning_rate": 0.00019955561041169623, "loss": 2.3933, "step": 481040 }, { "epoch": 0.9583585681499426, "grad_norm": 0.2317611426115036, "learning_rate": 0.0001994445301518757, "loss": 2.3902, "step": 481050 }, { "epoch": 0.9583784903735816, "grad_norm": 0.22367730736732483, "learning_rate": 0.00019933345674441982, "loss": 2.3957, "step": 481060 }, { "epoch": 0.9583984125972205, "grad_norm": 0.23640041053295135, "learning_rate": 0.000199222390188061, "loss": 2.4141, "step": 481070 }, { "epoch": 0.9584183348208594, "grad_norm": 0.22726550698280334, "learning_rate": 0.00019911133048153152, "loss": 2.4004, "step": 481080 }, { "epoch": 0.9584382570444983, "grad_norm": 0.22415922582149506, "learning_rate": 0.000199000277623564, "loss": 2.3975, "step": 481090 }, { "epoch": 0.9584581792681371, "grad_norm": 0.22668318450450897, "learning_rate": 0.00019888923161289163, "loss": 2.4119, "step": 481100 }, { "epoch": 0.9584781014917761, "grad_norm": 0.22621989250183105, "learning_rate": 0.00019877819244824813, "loss": 2.3971, "step": 481110 }, { "epoch": 0.958498023715415, "grad_norm": 0.2360614389181137, "learning_rate": 0.0001986671601283676, "loss": 2.3971, "step": 481120 }, { "epoch": 0.9585179459390539, "grad_norm": 0.26208847761154175, "learning_rate": 0.00019855613465198418, "loss": 2.4003, "step": 481130 }, { "epoch": 0.9585378681626928, "grad_norm": 0.22166353464126587, "learning_rate": 0.00019844511601783266, "loss": 2.409, "step": 481140 }, { "epoch": 0.9585577903863317, "grad_norm": 0.2331174612045288, "learning_rate": 0.00019833410422464849, "loss": 2.4187, "step": 481150 }, { "epoch": 0.9585777126099707, "grad_norm": 0.24709898233413696, "learning_rate": 0.00019822309927116667, "loss": 2.3929, "step": 481160 }, { "epoch": 0.9585976348336096, "grad_norm": 0.2475731372833252, "learning_rate": 0.00019811210115612356, "loss": 2.4074, "step": 481170 }, { "epoch": 0.9586175570572485, "grad_norm": 0.2207692265510559, "learning_rate": 0.00019800110987825526, "loss": 2.4066, "step": 481180 }, { "epoch": 0.9586374792808874, "grad_norm": 0.22302836179733276, "learning_rate": 0.00019789012543629857, "loss": 2.4011, "step": 481190 }, { "epoch": 0.9586574015045263, "grad_norm": 0.2196461260318756, "learning_rate": 0.00019777914782899032, "loss": 2.3888, "step": 481200 }, { "epoch": 0.9586773237281653, "grad_norm": 0.23182576894760132, "learning_rate": 0.00019766817705506835, "loss": 2.3949, "step": 481210 }, { "epoch": 0.9586972459518042, "grad_norm": 0.24117302894592285, "learning_rate": 0.00019755721311327058, "loss": 2.3863, "step": 481220 }, { "epoch": 0.9587171681754431, "grad_norm": 0.22862371802330017, "learning_rate": 0.0001974462560023349, "loss": 2.3858, "step": 481230 }, { "epoch": 0.958737090399082, "grad_norm": 0.21878337860107422, "learning_rate": 0.00019733530572100012, "loss": 2.3956, "step": 481240 }, { "epoch": 0.958757012622721, "grad_norm": 0.2279611974954605, "learning_rate": 0.00019722436226800544, "loss": 2.3908, "step": 481250 }, { "epoch": 0.9587769348463598, "grad_norm": 0.22721055150032043, "learning_rate": 0.0001971134256420899, "loss": 2.3987, "step": 481260 }, { "epoch": 0.9587968570699987, "grad_norm": 0.24398867785930634, "learning_rate": 0.00019700249584199382, "loss": 2.4086, "step": 481270 }, { "epoch": 0.9588167792936376, "grad_norm": 0.23865702748298645, "learning_rate": 0.00019689157286645686, "loss": 2.4021, "step": 481280 }, { "epoch": 0.9588367015172765, "grad_norm": 0.22484301030635834, "learning_rate": 0.00019678065671422008, "loss": 2.4104, "step": 481290 }, { "epoch": 0.9588566237409155, "grad_norm": 0.227656751871109, "learning_rate": 0.00019666974738402398, "loss": 2.4162, "step": 481300 }, { "epoch": 0.9588765459645544, "grad_norm": 0.23902840912342072, "learning_rate": 0.0001965588448746103, "loss": 2.3973, "step": 481310 }, { "epoch": 0.9588964681881933, "grad_norm": 0.2216510772705078, "learning_rate": 0.00019644794918472063, "loss": 2.3922, "step": 481320 }, { "epoch": 0.9589163904118322, "grad_norm": 0.25726118683815, "learning_rate": 0.00019633706031309717, "loss": 2.4048, "step": 481330 }, { "epoch": 0.9589363126354711, "grad_norm": 0.245117649435997, "learning_rate": 0.00019622617825848244, "loss": 2.3971, "step": 481340 }, { "epoch": 0.9589562348591101, "grad_norm": 0.23397010564804077, "learning_rate": 0.0001961153030196192, "loss": 2.3968, "step": 481350 }, { "epoch": 0.958976157082749, "grad_norm": 0.2389943152666092, "learning_rate": 0.00019600443459525096, "loss": 2.419, "step": 481360 }, { "epoch": 0.9589960793063879, "grad_norm": 0.26008421182632446, "learning_rate": 0.00019589357298412135, "loss": 2.4095, "step": 481370 }, { "epoch": 0.9590160015300268, "grad_norm": 0.24072301387786865, "learning_rate": 0.00019578271818497428, "loss": 2.4034, "step": 481380 }, { "epoch": 0.9590359237536656, "grad_norm": 0.2546021342277527, "learning_rate": 0.0001956718701965543, "loss": 2.3926, "step": 481390 }, { "epoch": 0.9590558459773046, "grad_norm": 0.22411179542541504, "learning_rate": 0.00019556102901760643, "loss": 2.3973, "step": 481400 }, { "epoch": 0.9590757682009435, "grad_norm": 0.21524034440517426, "learning_rate": 0.00019545019464687542, "loss": 2.404, "step": 481410 }, { "epoch": 0.9590956904245824, "grad_norm": 0.221506267786026, "learning_rate": 0.00019533936708310718, "loss": 2.3948, "step": 481420 }, { "epoch": 0.9591156126482213, "grad_norm": 0.2498454749584198, "learning_rate": 0.00019522854632504782, "loss": 2.4069, "step": 481430 }, { "epoch": 0.9591355348718602, "grad_norm": 0.2254389375448227, "learning_rate": 0.0001951177323714435, "loss": 2.4072, "step": 481440 }, { "epoch": 0.9591554570954992, "grad_norm": 0.22748573124408722, "learning_rate": 0.00019500692522104112, "loss": 2.3965, "step": 481450 }, { "epoch": 0.9591753793191381, "grad_norm": 0.23101015388965607, "learning_rate": 0.0001948961248725878, "loss": 2.3916, "step": 481460 }, { "epoch": 0.959195301542777, "grad_norm": 0.23938384652137756, "learning_rate": 0.00019478533132483112, "loss": 2.3953, "step": 481470 }, { "epoch": 0.9592152237664159, "grad_norm": 0.2581217288970947, "learning_rate": 0.0001946745445765188, "loss": 2.389, "step": 481480 }, { "epoch": 0.9592351459900548, "grad_norm": 0.2189728021621704, "learning_rate": 0.00019456376462639936, "loss": 2.4054, "step": 481490 }, { "epoch": 0.9592550682136938, "grad_norm": 0.2288779318332672, "learning_rate": 0.00019445299147322117, "loss": 2.39, "step": 481500 }, { "epoch": 0.9592749904373327, "grad_norm": 0.222983717918396, "learning_rate": 0.00019434222511573363, "loss": 2.3932, "step": 481510 }, { "epoch": 0.9592949126609716, "grad_norm": 0.21582108736038208, "learning_rate": 0.000194231465552686, "loss": 2.4023, "step": 481520 }, { "epoch": 0.9593148348846104, "grad_norm": 0.23999150097370148, "learning_rate": 0.00019412071278282838, "loss": 2.406, "step": 481530 }, { "epoch": 0.9593347571082494, "grad_norm": 0.21507401764392853, "learning_rate": 0.00019400996680491068, "loss": 2.3879, "step": 481540 }, { "epoch": 0.9593546793318883, "grad_norm": 0.2316046804189682, "learning_rate": 0.00019389922761768363, "loss": 2.4048, "step": 481550 }, { "epoch": 0.9593746015555272, "grad_norm": 0.23691816627979279, "learning_rate": 0.00019378849521989804, "loss": 2.4063, "step": 481560 }, { "epoch": 0.9593945237791661, "grad_norm": 0.2303999364376068, "learning_rate": 0.0001936777696103056, "loss": 2.394, "step": 481570 }, { "epoch": 0.959414446002805, "grad_norm": 0.2086106836795807, "learning_rate": 0.00019356705078765792, "loss": 2.3915, "step": 481580 }, { "epoch": 0.959434368226444, "grad_norm": 0.21477007865905762, "learning_rate": 0.00019345633875070712, "loss": 2.4238, "step": 481590 }, { "epoch": 0.9594542904500829, "grad_norm": 0.22805558145046234, "learning_rate": 0.00019334563349820578, "loss": 2.3972, "step": 481600 }, { "epoch": 0.9594742126737218, "grad_norm": 0.22928392887115479, "learning_rate": 0.00019323493502890643, "loss": 2.3953, "step": 481610 }, { "epoch": 0.9594941348973607, "grad_norm": 0.23302997648715973, "learning_rate": 0.00019312424334156275, "loss": 2.3998, "step": 481620 }, { "epoch": 0.9595140571209996, "grad_norm": 0.23561416566371918, "learning_rate": 0.00019301355843492842, "loss": 2.4006, "step": 481630 }, { "epoch": 0.9595339793446386, "grad_norm": 0.25177639722824097, "learning_rate": 0.00019290288030775728, "loss": 2.3943, "step": 481640 }, { "epoch": 0.9595539015682775, "grad_norm": 0.2394835501909256, "learning_rate": 0.00019279220895880366, "loss": 2.3932, "step": 481650 }, { "epoch": 0.9595738237919164, "grad_norm": 0.230674147605896, "learning_rate": 0.00019268154438682283, "loss": 2.4199, "step": 481660 }, { "epoch": 0.9595937460155552, "grad_norm": 0.21637623012065887, "learning_rate": 0.00019257088659056955, "loss": 2.3983, "step": 481670 }, { "epoch": 0.9596136682391941, "grad_norm": 0.2202867716550827, "learning_rate": 0.00019246023556879942, "loss": 2.3927, "step": 481680 }, { "epoch": 0.9596335904628331, "grad_norm": 0.2407175898551941, "learning_rate": 0.00019234959132026863, "loss": 2.4046, "step": 481690 }, { "epoch": 0.959653512686472, "grad_norm": 0.26656976342201233, "learning_rate": 0.00019223895384373323, "loss": 2.4005, "step": 481700 }, { "epoch": 0.9596734349101109, "grad_norm": 0.244442418217659, "learning_rate": 0.00019212832313795003, "loss": 2.4072, "step": 481710 }, { "epoch": 0.9596933571337498, "grad_norm": 0.22917044162750244, "learning_rate": 0.00019201769920167621, "loss": 2.3938, "step": 481720 }, { "epoch": 0.9597132793573887, "grad_norm": 0.21186016499996185, "learning_rate": 0.00019190708203366925, "loss": 2.3996, "step": 481730 }, { "epoch": 0.9597332015810277, "grad_norm": 0.23472647368907928, "learning_rate": 0.000191796471632687, "loss": 2.4078, "step": 481740 }, { "epoch": 0.9597531238046666, "grad_norm": 0.2485065460205078, "learning_rate": 0.00019168586799748756, "loss": 2.4095, "step": 481750 }, { "epoch": 0.9597730460283055, "grad_norm": 0.22270986437797546, "learning_rate": 0.0001915752711268295, "loss": 2.399, "step": 481760 }, { "epoch": 0.9597929682519444, "grad_norm": 0.22668708860874176, "learning_rate": 0.000191464681019472, "loss": 2.3819, "step": 481770 }, { "epoch": 0.9598128904755833, "grad_norm": 0.2345632165670395, "learning_rate": 0.00019135409767417433, "loss": 2.408, "step": 481780 }, { "epoch": 0.9598328126992223, "grad_norm": 0.23691613972187042, "learning_rate": 0.00019124352108969633, "loss": 2.3925, "step": 481790 }, { "epoch": 0.9598527349228612, "grad_norm": 0.2159482091665268, "learning_rate": 0.00019113295126479792, "loss": 2.406, "step": 481800 }, { "epoch": 0.9598726571465, "grad_norm": 0.22120527923107147, "learning_rate": 0.0001910223881982398, "loss": 2.3931, "step": 481810 }, { "epoch": 0.9598925793701389, "grad_norm": 0.22993974387645721, "learning_rate": 0.0001909118318887826, "loss": 2.4032, "step": 481820 }, { "epoch": 0.959912501593778, "grad_norm": 0.2607935070991516, "learning_rate": 0.0001908012823351879, "loss": 2.4036, "step": 481830 }, { "epoch": 0.9599324238174168, "grad_norm": 0.24092791974544525, "learning_rate": 0.0001906907395362174, "loss": 2.3893, "step": 481840 }, { "epoch": 0.9599523460410557, "grad_norm": 0.24351392686367035, "learning_rate": 0.00019058020349063277, "loss": 2.3942, "step": 481850 }, { "epoch": 0.9599722682646946, "grad_norm": 0.2372363656759262, "learning_rate": 0.00019046967419719652, "loss": 2.409, "step": 481860 }, { "epoch": 0.9599921904883335, "grad_norm": 0.22775483131408691, "learning_rate": 0.00019035915165467165, "loss": 2.4028, "step": 481870 }, { "epoch": 0.9600121127119725, "grad_norm": 0.2377655804157257, "learning_rate": 0.00019024863586182095, "loss": 2.4025, "step": 481880 }, { "epoch": 0.9600320349356114, "grad_norm": 0.28145819902420044, "learning_rate": 0.0001901381268174083, "loss": 2.3964, "step": 481890 }, { "epoch": 0.9600519571592503, "grad_norm": 0.24517881870269775, "learning_rate": 0.00019002762452019728, "loss": 2.3957, "step": 481900 }, { "epoch": 0.9600718793828892, "grad_norm": 0.24890366196632385, "learning_rate": 0.0001899171289689523, "loss": 2.397, "step": 481910 }, { "epoch": 0.9600918016065281, "grad_norm": 0.2345954179763794, "learning_rate": 0.00018980664016243832, "loss": 2.4078, "step": 481920 }, { "epoch": 0.9601117238301671, "grad_norm": 0.22845512628555298, "learning_rate": 0.00018969615809942008, "loss": 2.3888, "step": 481930 }, { "epoch": 0.960131646053806, "grad_norm": 0.2278786450624466, "learning_rate": 0.00018958568277866283, "loss": 2.3997, "step": 481940 }, { "epoch": 0.9601515682774449, "grad_norm": 0.22890885174274445, "learning_rate": 0.00018947521419893288, "loss": 2.3975, "step": 481950 }, { "epoch": 0.9601714905010837, "grad_norm": 0.229453444480896, "learning_rate": 0.0001893647523589961, "loss": 2.4006, "step": 481960 }, { "epoch": 0.9601914127247226, "grad_norm": 0.23669444024562836, "learning_rate": 0.00018925429725761878, "loss": 2.4085, "step": 481970 }, { "epoch": 0.9602113349483616, "grad_norm": 0.24472929537296295, "learning_rate": 0.00018914384889356817, "loss": 2.4087, "step": 481980 }, { "epoch": 0.9602312571720005, "grad_norm": 0.22561900317668915, "learning_rate": 0.0001890334072656117, "loss": 2.389, "step": 481990 }, { "epoch": 0.9602511793956394, "grad_norm": 0.22099563479423523, "learning_rate": 0.00018892297237251677, "loss": 2.3954, "step": 482000 }, { "epoch": 0.9602711016192783, "grad_norm": 0.2664729952812195, "learning_rate": 0.00018881254421305129, "loss": 2.4024, "step": 482010 }, { "epoch": 0.9602910238429172, "grad_norm": 0.22551582753658295, "learning_rate": 0.0001887021227859842, "loss": 2.3991, "step": 482020 }, { "epoch": 0.9603109460665562, "grad_norm": 0.23540185391902924, "learning_rate": 0.00018859170809008385, "loss": 2.4003, "step": 482030 }, { "epoch": 0.9603308682901951, "grad_norm": 0.22902482748031616, "learning_rate": 0.00018848130012411968, "loss": 2.3941, "step": 482040 }, { "epoch": 0.960350790513834, "grad_norm": 0.24519741535186768, "learning_rate": 0.0001883708988868611, "loss": 2.3987, "step": 482050 }, { "epoch": 0.9603707127374729, "grad_norm": 0.2568231523036957, "learning_rate": 0.000188260504377078, "loss": 2.3962, "step": 482060 }, { "epoch": 0.9603906349611118, "grad_norm": 0.22396224737167358, "learning_rate": 0.00018815011659354086, "loss": 2.3975, "step": 482070 }, { "epoch": 0.9604105571847508, "grad_norm": 0.24963581562042236, "learning_rate": 0.00018803973553502008, "loss": 2.3921, "step": 482080 }, { "epoch": 0.9604304794083897, "grad_norm": 0.24212640523910522, "learning_rate": 0.00018792936120028703, "loss": 2.3925, "step": 482090 }, { "epoch": 0.9604504016320285, "grad_norm": 0.24497175216674805, "learning_rate": 0.00018781899358811293, "loss": 2.4086, "step": 482100 }, { "epoch": 0.9604703238556674, "grad_norm": 0.2646888792514801, "learning_rate": 0.00018770863269726968, "loss": 2.3991, "step": 482110 }, { "epoch": 0.9604902460793064, "grad_norm": 0.25749191641807556, "learning_rate": 0.00018759827852652934, "loss": 2.3854, "step": 482120 }, { "epoch": 0.9605101683029453, "grad_norm": 0.2313535064458847, "learning_rate": 0.00018748793107466443, "loss": 2.3844, "step": 482130 }, { "epoch": 0.9605300905265842, "grad_norm": 0.22623123228549957, "learning_rate": 0.0001873775903404482, "loss": 2.397, "step": 482140 }, { "epoch": 0.9605500127502231, "grad_norm": 0.23110438883304596, "learning_rate": 0.00018726725632265363, "loss": 2.4167, "step": 482150 }, { "epoch": 0.960569934973862, "grad_norm": 0.2507913112640381, "learning_rate": 0.00018715692902005433, "loss": 2.3992, "step": 482160 }, { "epoch": 0.960589857197501, "grad_norm": 0.21624179184436798, "learning_rate": 0.00018704660843142464, "loss": 2.3794, "step": 482170 }, { "epoch": 0.9606097794211399, "grad_norm": 0.21957406401634216, "learning_rate": 0.00018693629455553886, "loss": 2.4169, "step": 482180 }, { "epoch": 0.9606297016447788, "grad_norm": 0.2520206868648529, "learning_rate": 0.0001868259873911715, "loss": 2.3928, "step": 482190 }, { "epoch": 0.9606496238684177, "grad_norm": 0.24404913187026978, "learning_rate": 0.00018671568693709806, "loss": 2.4058, "step": 482200 }, { "epoch": 0.9606695460920566, "grad_norm": 0.224511057138443, "learning_rate": 0.0001866053931920939, "loss": 2.3986, "step": 482210 }, { "epoch": 0.9606894683156956, "grad_norm": 0.22617313265800476, "learning_rate": 0.00018649510615493514, "loss": 2.4162, "step": 482220 }, { "epoch": 0.9607093905393345, "grad_norm": 0.2359374314546585, "learning_rate": 0.00018638482582439763, "loss": 2.3952, "step": 482230 }, { "epoch": 0.9607293127629734, "grad_norm": 0.238678976893425, "learning_rate": 0.0001862745521992586, "loss": 2.4001, "step": 482240 }, { "epoch": 0.9607492349866122, "grad_norm": 0.23623302578926086, "learning_rate": 0.00018616428527829453, "loss": 2.4121, "step": 482250 }, { "epoch": 0.9607691572102511, "grad_norm": 0.22768552601337433, "learning_rate": 0.00018605402506028312, "loss": 2.4021, "step": 482260 }, { "epoch": 0.9607890794338901, "grad_norm": 0.22062957286834717, "learning_rate": 0.000185943771544002, "loss": 2.3767, "step": 482270 }, { "epoch": 0.960809001657529, "grad_norm": 0.23555582761764526, "learning_rate": 0.00018583352472822945, "loss": 2.385, "step": 482280 }, { "epoch": 0.9608289238811679, "grad_norm": 0.22208793461322784, "learning_rate": 0.0001857232846117438, "loss": 2.3906, "step": 482290 }, { "epoch": 0.9608488461048068, "grad_norm": 0.24418805539608002, "learning_rate": 0.00018561305119332383, "loss": 2.3829, "step": 482300 }, { "epoch": 0.9608687683284457, "grad_norm": 0.6731482148170471, "learning_rate": 0.00018550282447174916, "loss": 2.4011, "step": 482310 }, { "epoch": 0.9608886905520847, "grad_norm": 0.23995405435562134, "learning_rate": 0.0001853926044457992, "loss": 2.404, "step": 482320 }, { "epoch": 0.9609086127757236, "grad_norm": 0.21332372725009918, "learning_rate": 0.00018528239111425382, "loss": 2.39, "step": 482330 }, { "epoch": 0.9609285349993625, "grad_norm": 0.22537441551685333, "learning_rate": 0.00018517218447589357, "loss": 2.3992, "step": 482340 }, { "epoch": 0.9609484572230014, "grad_norm": 0.25356975197792053, "learning_rate": 0.0001850619845294992, "loss": 2.4028, "step": 482350 }, { "epoch": 0.9609683794466403, "grad_norm": 0.23169752955436707, "learning_rate": 0.00018495179127385166, "loss": 2.3935, "step": 482360 }, { "epoch": 0.9609883016702793, "grad_norm": 0.24080923199653625, "learning_rate": 0.00018484160470773236, "loss": 2.3977, "step": 482370 }, { "epoch": 0.9610082238939182, "grad_norm": 0.2232186198234558, "learning_rate": 0.00018473142482992345, "loss": 2.3755, "step": 482380 }, { "epoch": 0.961028146117557, "grad_norm": 0.2365078330039978, "learning_rate": 0.00018462125163920694, "loss": 2.4006, "step": 482390 }, { "epoch": 0.9610480683411959, "grad_norm": 0.2267204374074936, "learning_rate": 0.0001845110851343652, "loss": 2.3906, "step": 482400 }, { "epoch": 0.9610679905648349, "grad_norm": 0.231662780046463, "learning_rate": 0.00018440092531418162, "loss": 2.3994, "step": 482410 }, { "epoch": 0.9610879127884738, "grad_norm": 0.22563467919826508, "learning_rate": 0.00018429077217743916, "loss": 2.4067, "step": 482420 }, { "epoch": 0.9611078350121127, "grad_norm": 0.25030505657196045, "learning_rate": 0.0001841806257229217, "loss": 2.4034, "step": 482430 }, { "epoch": 0.9611277572357516, "grad_norm": 0.21979500353336334, "learning_rate": 0.00018407048594941332, "loss": 2.3953, "step": 482440 }, { "epoch": 0.9611476794593905, "grad_norm": 0.23265299201011658, "learning_rate": 0.00018396035285569835, "loss": 2.4113, "step": 482450 }, { "epoch": 0.9611676016830295, "grad_norm": 0.2396278977394104, "learning_rate": 0.00018385022644056147, "loss": 2.3937, "step": 482460 }, { "epoch": 0.9611875239066684, "grad_norm": 0.23980093002319336, "learning_rate": 0.00018374010670278796, "loss": 2.3855, "step": 482470 }, { "epoch": 0.9612074461303073, "grad_norm": 0.24323667585849762, "learning_rate": 0.0001836299936411634, "loss": 2.4037, "step": 482480 }, { "epoch": 0.9612273683539462, "grad_norm": 0.22134855389595032, "learning_rate": 0.00018351988725447366, "loss": 2.4011, "step": 482490 }, { "epoch": 0.9612472905775851, "grad_norm": 0.2314482480287552, "learning_rate": 0.00018340978754150506, "loss": 2.4028, "step": 482500 }, { "epoch": 0.9612672128012241, "grad_norm": 0.22581632435321808, "learning_rate": 0.00018329969450104413, "loss": 2.3825, "step": 482510 }, { "epoch": 0.961287135024863, "grad_norm": 0.2460959404706955, "learning_rate": 0.0001831896081318778, "loss": 2.4, "step": 482520 }, { "epoch": 0.9613070572485019, "grad_norm": 0.23111560940742493, "learning_rate": 0.0001830795284327935, "loss": 2.3899, "step": 482530 }, { "epoch": 0.9613269794721407, "grad_norm": 0.21837542951107025, "learning_rate": 0.00018296945540257915, "loss": 2.4031, "step": 482540 }, { "epoch": 0.9613469016957796, "grad_norm": 0.22134384512901306, "learning_rate": 0.00018285938904002253, "loss": 2.3895, "step": 482550 }, { "epoch": 0.9613668239194186, "grad_norm": 0.22061705589294434, "learning_rate": 0.00018274932934391242, "loss": 2.4001, "step": 482560 }, { "epoch": 0.9613867461430575, "grad_norm": 0.22998742759227753, "learning_rate": 0.00018263927631303757, "loss": 2.4043, "step": 482570 }, { "epoch": 0.9614066683666964, "grad_norm": 0.2512701153755188, "learning_rate": 0.00018252922994618692, "loss": 2.3981, "step": 482580 }, { "epoch": 0.9614265905903353, "grad_norm": 0.24324212968349457, "learning_rate": 0.00018241919024215058, "loss": 2.4058, "step": 482590 }, { "epoch": 0.9614465128139742, "grad_norm": 0.23371624946594238, "learning_rate": 0.00018230915719971797, "loss": 2.4077, "step": 482600 }, { "epoch": 0.9614664350376132, "grad_norm": 0.24273689091205597, "learning_rate": 0.00018219913081767957, "loss": 2.3869, "step": 482610 }, { "epoch": 0.9614863572612521, "grad_norm": 0.23033766448497772, "learning_rate": 0.00018208911109482596, "loss": 2.392, "step": 482620 }, { "epoch": 0.961506279484891, "grad_norm": 0.2568092346191406, "learning_rate": 0.00018197909802994828, "loss": 2.4143, "step": 482630 }, { "epoch": 0.9615262017085299, "grad_norm": 0.22350826859474182, "learning_rate": 0.00018186909162183818, "loss": 2.3928, "step": 482640 }, { "epoch": 0.9615461239321688, "grad_norm": 0.22692011296749115, "learning_rate": 0.00018175909186928709, "loss": 2.3928, "step": 482650 }, { "epoch": 0.9615660461558078, "grad_norm": 0.24082276225090027, "learning_rate": 0.00018164909877108705, "loss": 2.399, "step": 482660 }, { "epoch": 0.9615859683794467, "grad_norm": 0.22682613134384155, "learning_rate": 0.0001815391123260308, "loss": 2.3817, "step": 482670 }, { "epoch": 0.9616058906030855, "grad_norm": 0.22394412755966187, "learning_rate": 0.00018142913253291115, "loss": 2.3926, "step": 482680 }, { "epoch": 0.9616258128267244, "grad_norm": 0.2476455718278885, "learning_rate": 0.00018131915939052147, "loss": 2.4115, "step": 482690 }, { "epoch": 0.9616457350503633, "grad_norm": 0.2455894947052002, "learning_rate": 0.00018120919289765493, "loss": 2.3813, "step": 482700 }, { "epoch": 0.9616656572740023, "grad_norm": 0.25370660424232483, "learning_rate": 0.00018109923305310583, "loss": 2.3861, "step": 482710 }, { "epoch": 0.9616855794976412, "grad_norm": 0.2539231479167938, "learning_rate": 0.00018098927985566826, "loss": 2.3899, "step": 482720 }, { "epoch": 0.9617055017212801, "grad_norm": 0.23637251555919647, "learning_rate": 0.0001808793333041372, "loss": 2.4071, "step": 482730 }, { "epoch": 0.961725423944919, "grad_norm": 0.23432035744190216, "learning_rate": 0.00018076939339730757, "loss": 2.3837, "step": 482740 }, { "epoch": 0.961745346168558, "grad_norm": 0.22664564847946167, "learning_rate": 0.00018065946013397482, "loss": 2.3959, "step": 482750 }, { "epoch": 0.9617652683921969, "grad_norm": 0.23467448353767395, "learning_rate": 0.00018054953351293458, "loss": 2.3895, "step": 482760 }, { "epoch": 0.9617851906158358, "grad_norm": 0.23417732119560242, "learning_rate": 0.0001804396135329831, "loss": 2.3833, "step": 482770 }, { "epoch": 0.9618051128394747, "grad_norm": 0.25489193201065063, "learning_rate": 0.00018032970019291695, "loss": 2.3997, "step": 482780 }, { "epoch": 0.9618250350631136, "grad_norm": 0.23355847597122192, "learning_rate": 0.00018021979349153262, "loss": 2.3901, "step": 482790 }, { "epoch": 0.9618449572867526, "grad_norm": 0.22951191663742065, "learning_rate": 0.00018010989342762796, "loss": 2.3893, "step": 482800 }, { "epoch": 0.9618648795103915, "grad_norm": 0.2401479035615921, "learning_rate": 0.00018000000000000017, "loss": 2.4045, "step": 482810 }, { "epoch": 0.9618848017340303, "grad_norm": 0.2161339968442917, "learning_rate": 0.0001798901132074471, "loss": 2.4148, "step": 482820 }, { "epoch": 0.9619047239576692, "grad_norm": 0.24868568778038025, "learning_rate": 0.00017978023304876723, "loss": 2.4056, "step": 482830 }, { "epoch": 0.9619246461813081, "grad_norm": 0.2297907918691635, "learning_rate": 0.00017967035952275935, "loss": 2.3969, "step": 482840 }, { "epoch": 0.9619445684049471, "grad_norm": 0.29525327682495117, "learning_rate": 0.0001795604926282224, "loss": 2.3927, "step": 482850 }, { "epoch": 0.961964490628586, "grad_norm": 0.2465655356645584, "learning_rate": 0.00017945063236395598, "loss": 2.394, "step": 482860 }, { "epoch": 0.9619844128522249, "grad_norm": 0.23821622133255005, "learning_rate": 0.00017934077872875954, "loss": 2.3954, "step": 482870 }, { "epoch": 0.9620043350758638, "grad_norm": 0.2367747575044632, "learning_rate": 0.00017923093172143335, "loss": 2.4088, "step": 482880 }, { "epoch": 0.9620242572995027, "grad_norm": 0.21696437895298004, "learning_rate": 0.00017912109134077793, "loss": 2.3837, "step": 482890 }, { "epoch": 0.9620441795231417, "grad_norm": 0.22797340154647827, "learning_rate": 0.000179011257585594, "loss": 2.3894, "step": 482900 }, { "epoch": 0.9620641017467806, "grad_norm": 0.23004819452762604, "learning_rate": 0.000178901430454683, "loss": 2.3892, "step": 482910 }, { "epoch": 0.9620840239704195, "grad_norm": 0.22853820025920868, "learning_rate": 0.0001787916099468463, "loss": 2.4048, "step": 482920 }, { "epoch": 0.9621039461940584, "grad_norm": 0.22636406123638153, "learning_rate": 0.00017868179606088597, "loss": 2.4044, "step": 482930 }, { "epoch": 0.9621238684176973, "grad_norm": 0.2530619204044342, "learning_rate": 0.00017857198879560432, "loss": 2.3834, "step": 482940 }, { "epoch": 0.9621437906413363, "grad_norm": 0.2231733500957489, "learning_rate": 0.00017846218814980409, "loss": 2.3898, "step": 482950 }, { "epoch": 0.9621637128649752, "grad_norm": 0.2437005639076233, "learning_rate": 0.00017835239412228798, "loss": 2.3828, "step": 482960 }, { "epoch": 0.962183635088614, "grad_norm": 0.24295777082443237, "learning_rate": 0.00017824260671185966, "loss": 2.3945, "step": 482970 }, { "epoch": 0.9622035573122529, "grad_norm": 0.238625630736351, "learning_rate": 0.00017813282591732293, "loss": 2.4084, "step": 482980 }, { "epoch": 0.9622234795358918, "grad_norm": 0.23727279901504517, "learning_rate": 0.00017802305173748146, "loss": 2.3925, "step": 482990 }, { "epoch": 0.9622434017595308, "grad_norm": 0.26260390877723694, "learning_rate": 0.00017791328417114015, "loss": 2.3854, "step": 483000 }, { "epoch": 0.9622633239831697, "grad_norm": 0.43039754033088684, "learning_rate": 0.0001778035232171038, "loss": 2.3917, "step": 483010 }, { "epoch": 0.9622832462068086, "grad_norm": 0.21549426019191742, "learning_rate": 0.00017769376887417733, "loss": 2.4035, "step": 483020 }, { "epoch": 0.9623031684304475, "grad_norm": 0.2387695014476776, "learning_rate": 0.00017758402114116656, "loss": 2.4026, "step": 483030 }, { "epoch": 0.9623230906540865, "grad_norm": 0.23042398691177368, "learning_rate": 0.00017747428001687715, "loss": 2.3966, "step": 483040 }, { "epoch": 0.9623430128777254, "grad_norm": 0.2366408109664917, "learning_rate": 0.0001773645455001156, "loss": 2.4013, "step": 483050 }, { "epoch": 0.9623629351013643, "grad_norm": 0.25909218192100525, "learning_rate": 0.00017725481758968842, "loss": 2.3875, "step": 483060 }, { "epoch": 0.9623828573250032, "grad_norm": 0.22935791313648224, "learning_rate": 0.00017714509628440257, "loss": 2.3996, "step": 483070 }, { "epoch": 0.9624027795486421, "grad_norm": 0.25673362612724304, "learning_rate": 0.0001770353815830654, "loss": 2.4018, "step": 483080 }, { "epoch": 0.9624227017722811, "grad_norm": 0.23681576550006866, "learning_rate": 0.00017692567348448463, "loss": 2.3792, "step": 483090 }, { "epoch": 0.96244262399592, "grad_norm": 0.24300602078437805, "learning_rate": 0.00017681597198746825, "loss": 2.3952, "step": 483100 }, { "epoch": 0.9624625462195588, "grad_norm": 0.23135338723659515, "learning_rate": 0.00017670627709082497, "loss": 2.3941, "step": 483110 }, { "epoch": 0.9624824684431977, "grad_norm": 0.24264633655548096, "learning_rate": 0.00017659658879336294, "loss": 2.3947, "step": 483120 }, { "epoch": 0.9625023906668366, "grad_norm": 0.22899441421031952, "learning_rate": 0.00017648690709389192, "loss": 2.3899, "step": 483130 }, { "epoch": 0.9625223128904756, "grad_norm": 0.22316411137580872, "learning_rate": 0.00017637723199122137, "loss": 2.4095, "step": 483140 }, { "epoch": 0.9625422351141145, "grad_norm": 0.4786483943462372, "learning_rate": 0.00017626756348416062, "loss": 2.3976, "step": 483150 }, { "epoch": 0.9625621573377534, "grad_norm": 0.22676198184490204, "learning_rate": 0.00017615790157152046, "loss": 2.3817, "step": 483160 }, { "epoch": 0.9625820795613923, "grad_norm": 0.23407067358493805, "learning_rate": 0.0001760482462521109, "loss": 2.4213, "step": 483170 }, { "epoch": 0.9626020017850312, "grad_norm": 0.23122599720954895, "learning_rate": 0.00017593859752474338, "loss": 2.3968, "step": 483180 }, { "epoch": 0.9626219240086702, "grad_norm": 0.23149417340755463, "learning_rate": 0.00017582895538822908, "loss": 2.3947, "step": 483190 }, { "epoch": 0.9626418462323091, "grad_norm": 0.2282685488462448, "learning_rate": 0.00017571931984137934, "loss": 2.3868, "step": 483200 }, { "epoch": 0.962661768455948, "grad_norm": 0.2027006298303604, "learning_rate": 0.00017560969088300626, "loss": 2.3843, "step": 483210 }, { "epoch": 0.9626816906795869, "grad_norm": 0.2507687211036682, "learning_rate": 0.00017550006851192236, "loss": 2.3857, "step": 483220 }, { "epoch": 0.9627016129032258, "grad_norm": 0.2370961606502533, "learning_rate": 0.00017539045272694031, "loss": 2.3872, "step": 483230 }, { "epoch": 0.9627215351268648, "grad_norm": 0.2533937096595764, "learning_rate": 0.0001752808435268729, "loss": 2.3869, "step": 483240 }, { "epoch": 0.9627414573505036, "grad_norm": 0.2345910370349884, "learning_rate": 0.00017517124091053415, "loss": 2.3861, "step": 483250 }, { "epoch": 0.9627613795741425, "grad_norm": 0.23581743240356445, "learning_rate": 0.00017506164487673725, "loss": 2.3842, "step": 483260 }, { "epoch": 0.9627813017977814, "grad_norm": 0.2808712124824524, "learning_rate": 0.00017495205542429647, "loss": 2.386, "step": 483270 }, { "epoch": 0.9628012240214203, "grad_norm": 0.23000435531139374, "learning_rate": 0.00017484247255202657, "loss": 2.4071, "step": 483280 }, { "epoch": 0.9628211462450593, "grad_norm": 0.23001360893249512, "learning_rate": 0.000174732896258742, "loss": 2.3835, "step": 483290 }, { "epoch": 0.9628410684686982, "grad_norm": 0.2278420329093933, "learning_rate": 0.00017462332654325841, "loss": 2.3973, "step": 483300 }, { "epoch": 0.9628609906923371, "grad_norm": 0.23519659042358398, "learning_rate": 0.00017451376340439095, "loss": 2.3759, "step": 483310 }, { "epoch": 0.962880912915976, "grad_norm": 0.23888841271400452, "learning_rate": 0.0001744042068409557, "loss": 2.3822, "step": 483320 }, { "epoch": 0.962900835139615, "grad_norm": 0.236885204911232, "learning_rate": 0.00017429465685176894, "loss": 2.3886, "step": 483330 }, { "epoch": 0.9629207573632539, "grad_norm": 0.23090921342372894, "learning_rate": 0.00017418511343564736, "loss": 2.3975, "step": 483340 }, { "epoch": 0.9629406795868928, "grad_norm": 0.22720178961753845, "learning_rate": 0.00017407557659140772, "loss": 2.4053, "step": 483350 }, { "epoch": 0.9629606018105317, "grad_norm": 0.23919029533863068, "learning_rate": 0.0001739660463178676, "loss": 2.3886, "step": 483360 }, { "epoch": 0.9629805240341706, "grad_norm": 0.23987741768360138, "learning_rate": 0.00017385652261384465, "loss": 2.3949, "step": 483370 }, { "epoch": 0.9630004462578096, "grad_norm": 0.24703115224838257, "learning_rate": 0.0001737470054781567, "loss": 2.3899, "step": 483380 }, { "epoch": 0.9630203684814485, "grad_norm": 0.25605276226997375, "learning_rate": 0.0001736374949096222, "loss": 2.3879, "step": 483390 }, { "epoch": 0.9630402907050873, "grad_norm": 0.22733476758003235, "learning_rate": 0.00017352799090706016, "loss": 2.3747, "step": 483400 }, { "epoch": 0.9630602129287262, "grad_norm": 0.23573726415634155, "learning_rate": 0.00017341849346928952, "loss": 2.3772, "step": 483410 }, { "epoch": 0.9630801351523651, "grad_norm": 0.22221769392490387, "learning_rate": 0.0001733090025951296, "loss": 2.3959, "step": 483420 }, { "epoch": 0.9631000573760041, "grad_norm": 0.2513312101364136, "learning_rate": 0.00017319951828340054, "loss": 2.3699, "step": 483430 }, { "epoch": 0.963119979599643, "grad_norm": 0.24153362214565277, "learning_rate": 0.0001730900405329221, "loss": 2.4082, "step": 483440 }, { "epoch": 0.9631399018232819, "grad_norm": 0.25524601340293884, "learning_rate": 0.00017298056934251504, "loss": 2.3867, "step": 483450 }, { "epoch": 0.9631598240469208, "grad_norm": 0.23973850905895233, "learning_rate": 0.00017287110471100032, "loss": 2.4003, "step": 483460 }, { "epoch": 0.9631797462705597, "grad_norm": 0.35207438468933105, "learning_rate": 0.00017276164663719906, "loss": 2.3958, "step": 483470 }, { "epoch": 0.9631996684941987, "grad_norm": 0.24736572802066803, "learning_rate": 0.0001726521951199329, "loss": 2.389, "step": 483480 }, { "epoch": 0.9632195907178376, "grad_norm": 0.22690938413143158, "learning_rate": 0.00017254275015802346, "loss": 2.3837, "step": 483490 }, { "epoch": 0.9632395129414765, "grad_norm": 0.24211174249649048, "learning_rate": 0.0001724333117502934, "loss": 2.4172, "step": 483500 }, { "epoch": 0.9632594351651154, "grad_norm": 0.23016752302646637, "learning_rate": 0.00017232387989556531, "loss": 2.3927, "step": 483510 }, { "epoch": 0.9632793573887543, "grad_norm": 0.2761654853820801, "learning_rate": 0.00017221445459266206, "loss": 2.3886, "step": 483520 }, { "epoch": 0.9632992796123933, "grad_norm": 0.2474464476108551, "learning_rate": 0.00017210503584040682, "loss": 2.3933, "step": 483530 }, { "epoch": 0.9633192018360321, "grad_norm": 0.24439406394958496, "learning_rate": 0.00017199562363762368, "loss": 2.394, "step": 483540 }, { "epoch": 0.963339124059671, "grad_norm": 0.23585359752178192, "learning_rate": 0.00017188621798313664, "loss": 2.3858, "step": 483550 }, { "epoch": 0.9633590462833099, "grad_norm": 0.23947475850582123, "learning_rate": 0.0001717768188757698, "loss": 2.4016, "step": 483560 }, { "epoch": 0.9633789685069488, "grad_norm": 0.23311550915241241, "learning_rate": 0.00017166742631434808, "loss": 2.3836, "step": 483570 }, { "epoch": 0.9633988907305878, "grad_norm": 0.2410261034965515, "learning_rate": 0.00017155804029769662, "loss": 2.39, "step": 483580 }, { "epoch": 0.9634188129542267, "grad_norm": 0.21919465065002441, "learning_rate": 0.0001714486608246406, "loss": 2.391, "step": 483590 }, { "epoch": 0.9634387351778656, "grad_norm": 0.23566684126853943, "learning_rate": 0.00017133928789400633, "loss": 2.4051, "step": 483600 }, { "epoch": 0.9634586574015045, "grad_norm": 0.2503674626350403, "learning_rate": 0.00017122992150461958, "loss": 2.407, "step": 483610 }, { "epoch": 0.9634785796251435, "grad_norm": 0.23857088387012482, "learning_rate": 0.00017112056165530687, "loss": 2.4141, "step": 483620 }, { "epoch": 0.9634985018487824, "grad_norm": 0.23008744418621063, "learning_rate": 0.00017101120834489537, "loss": 2.408, "step": 483630 }, { "epoch": 0.9635184240724213, "grad_norm": 0.22703441977500916, "learning_rate": 0.00017090186157221176, "loss": 2.3941, "step": 483640 }, { "epoch": 0.9635383462960602, "grad_norm": 0.23489849269390106, "learning_rate": 0.00017079252133608413, "loss": 2.3981, "step": 483650 }, { "epoch": 0.9635582685196991, "grad_norm": 0.24781355261802673, "learning_rate": 0.00017068318763534008, "loss": 2.3948, "step": 483660 }, { "epoch": 0.9635781907433381, "grad_norm": 0.23995724320411682, "learning_rate": 0.00017057386046880808, "loss": 2.3906, "step": 483670 }, { "epoch": 0.963598112966977, "grad_norm": 0.2400350570678711, "learning_rate": 0.0001704645398353166, "loss": 2.3851, "step": 483680 }, { "epoch": 0.9636180351906158, "grad_norm": 0.2262609750032425, "learning_rate": 0.00017035522573369445, "loss": 2.4032, "step": 483690 }, { "epoch": 0.9636379574142547, "grad_norm": 0.22898045182228088, "learning_rate": 0.00017024591816277134, "loss": 2.3861, "step": 483700 }, { "epoch": 0.9636578796378936, "grad_norm": 0.24023842811584473, "learning_rate": 0.0001701366171213765, "loss": 2.4121, "step": 483710 }, { "epoch": 0.9636778018615326, "grad_norm": 0.25377723574638367, "learning_rate": 0.0001700273226083402, "loss": 2.3931, "step": 483720 }, { "epoch": 0.9636977240851715, "grad_norm": 0.2441047579050064, "learning_rate": 0.00016991803462249267, "loss": 2.4006, "step": 483730 }, { "epoch": 0.9637176463088104, "grad_norm": 0.22312356531620026, "learning_rate": 0.00016980875316266464, "loss": 2.3905, "step": 483740 }, { "epoch": 0.9637375685324493, "grad_norm": 0.24794170260429382, "learning_rate": 0.00016969947822768728, "loss": 2.3865, "step": 483750 }, { "epoch": 0.9637574907560882, "grad_norm": 0.22256869077682495, "learning_rate": 0.00016959020981639194, "loss": 2.3883, "step": 483760 }, { "epoch": 0.9637774129797272, "grad_norm": 0.2405729442834854, "learning_rate": 0.00016948094792761025, "loss": 2.3956, "step": 483770 }, { "epoch": 0.9637973352033661, "grad_norm": 0.24334876239299774, "learning_rate": 0.0001693716925601745, "loss": 2.4035, "step": 483780 }, { "epoch": 0.963817257427005, "grad_norm": 0.23296640813350677, "learning_rate": 0.0001692624437129171, "loss": 2.3883, "step": 483790 }, { "epoch": 0.9638371796506439, "grad_norm": 0.2198728621006012, "learning_rate": 0.00016915320138467083, "loss": 2.3694, "step": 483800 }, { "epoch": 0.9638571018742828, "grad_norm": 0.2203895002603531, "learning_rate": 0.00016904396557426859, "loss": 2.3928, "step": 483810 }, { "epoch": 0.9638770240979218, "grad_norm": 0.22413593530654907, "learning_rate": 0.00016893473628054424, "loss": 2.3868, "step": 483820 }, { "epoch": 0.9638969463215606, "grad_norm": 0.22154195606708527, "learning_rate": 0.00016882551350233134, "loss": 2.3934, "step": 483830 }, { "epoch": 0.9639168685451995, "grad_norm": 0.23007529973983765, "learning_rate": 0.00016871629723846438, "loss": 2.4071, "step": 483840 }, { "epoch": 0.9639367907688384, "grad_norm": 0.24530401825904846, "learning_rate": 0.00016860708748777785, "loss": 2.4025, "step": 483850 }, { "epoch": 0.9639567129924773, "grad_norm": 0.22975221276283264, "learning_rate": 0.0001684978842491063, "loss": 2.3992, "step": 483860 }, { "epoch": 0.9639766352161163, "grad_norm": 0.21729032695293427, "learning_rate": 0.0001683886875212852, "loss": 2.3863, "step": 483870 }, { "epoch": 0.9639965574397552, "grad_norm": 0.25041720271110535, "learning_rate": 0.00016827949730315029, "loss": 2.3823, "step": 483880 }, { "epoch": 0.9640164796633941, "grad_norm": 0.21646307408809662, "learning_rate": 0.0001681703135935373, "loss": 2.3853, "step": 483890 }, { "epoch": 0.964036401887033, "grad_norm": 0.24670718610286713, "learning_rate": 0.00016806113639128228, "loss": 2.4008, "step": 483900 }, { "epoch": 0.964056324110672, "grad_norm": 0.23559752106666565, "learning_rate": 0.00016795196569522242, "loss": 2.3945, "step": 483910 }, { "epoch": 0.9640762463343109, "grad_norm": 0.23854780197143555, "learning_rate": 0.00016784280150419417, "loss": 2.3704, "step": 483920 }, { "epoch": 0.9640961685579498, "grad_norm": 0.22710208594799042, "learning_rate": 0.00016773364381703494, "loss": 2.3986, "step": 483930 }, { "epoch": 0.9641160907815887, "grad_norm": 0.23173168301582336, "learning_rate": 0.00016762449263258274, "loss": 2.383, "step": 483940 }, { "epoch": 0.9641360130052276, "grad_norm": 0.22188225388526917, "learning_rate": 0.00016751534794967515, "loss": 2.3982, "step": 483950 }, { "epoch": 0.9641559352288666, "grad_norm": 0.24307017028331757, "learning_rate": 0.00016740620976715071, "loss": 2.3993, "step": 483960 }, { "epoch": 0.9641758574525054, "grad_norm": 0.23067444562911987, "learning_rate": 0.00016729707808384831, "loss": 2.3896, "step": 483970 }, { "epoch": 0.9641957796761443, "grad_norm": 0.22752657532691956, "learning_rate": 0.00016718795289860666, "loss": 2.3785, "step": 483980 }, { "epoch": 0.9642157018997832, "grad_norm": 0.2368890792131424, "learning_rate": 0.00016707883421026538, "loss": 2.3877, "step": 483990 }, { "epoch": 0.9642356241234221, "grad_norm": 0.24308903515338898, "learning_rate": 0.00016696972201766402, "loss": 2.3985, "step": 484000 }, { "epoch": 0.9642555463470611, "grad_norm": 0.22327972948551178, "learning_rate": 0.00016686061631964288, "loss": 2.3922, "step": 484010 }, { "epoch": 0.9642754685707, "grad_norm": 0.23312976956367493, "learning_rate": 0.000166751517115042, "loss": 2.3748, "step": 484020 }, { "epoch": 0.9642953907943389, "grad_norm": 0.2353822886943817, "learning_rate": 0.0001666424244027025, "loss": 2.3917, "step": 484030 }, { "epoch": 0.9643153130179778, "grad_norm": 0.22055114805698395, "learning_rate": 0.00016653333818146554, "loss": 2.3989, "step": 484040 }, { "epoch": 0.9643352352416167, "grad_norm": 0.2371106892824173, "learning_rate": 0.0001664242584501725, "loss": 2.3979, "step": 484050 }, { "epoch": 0.9643551574652557, "grad_norm": 0.24211901426315308, "learning_rate": 0.00016631518520766498, "loss": 2.3811, "step": 484060 }, { "epoch": 0.9643750796888946, "grad_norm": 0.23840324580669403, "learning_rate": 0.00016620611845278544, "loss": 2.4007, "step": 484070 }, { "epoch": 0.9643950019125335, "grad_norm": 0.23715943098068237, "learning_rate": 0.00016609705818437592, "loss": 2.3993, "step": 484080 }, { "epoch": 0.9644149241361724, "grad_norm": 0.25149276852607727, "learning_rate": 0.00016598800440127982, "loss": 2.3848, "step": 484090 }, { "epoch": 0.9644348463598112, "grad_norm": 0.24798433482646942, "learning_rate": 0.00016587895710234, "loss": 2.3834, "step": 484100 }, { "epoch": 0.9644547685834503, "grad_norm": 0.22725234925746918, "learning_rate": 0.00016576991628640015, "loss": 2.3956, "step": 484110 }, { "epoch": 0.9644746908070891, "grad_norm": 0.23292917013168335, "learning_rate": 0.0001656608819523038, "loss": 2.3921, "step": 484120 }, { "epoch": 0.964494613030728, "grad_norm": 0.22134123742580414, "learning_rate": 0.0001655518540988954, "loss": 2.4142, "step": 484130 }, { "epoch": 0.9645145352543669, "grad_norm": 0.25033724308013916, "learning_rate": 0.0001654428327250197, "loss": 2.3989, "step": 484140 }, { "epoch": 0.9645344574780058, "grad_norm": 0.23837071657180786, "learning_rate": 0.0001653338178295214, "loss": 2.3837, "step": 484150 }, { "epoch": 0.9645543797016448, "grad_norm": 0.2342568188905716, "learning_rate": 0.00016522480941124563, "loss": 2.392, "step": 484160 }, { "epoch": 0.9645743019252837, "grad_norm": 0.29219529032707214, "learning_rate": 0.00016511580746903821, "loss": 2.4159, "step": 484170 }, { "epoch": 0.9645942241489226, "grad_norm": 0.2323838174343109, "learning_rate": 0.00016500681200174472, "loss": 2.3932, "step": 484180 }, { "epoch": 0.9646141463725615, "grad_norm": 0.24893616139888763, "learning_rate": 0.00016489782300821188, "loss": 2.4022, "step": 484190 }, { "epoch": 0.9646340685962004, "grad_norm": 0.2462407648563385, "learning_rate": 0.00016478884048728616, "loss": 2.3925, "step": 484200 }, { "epoch": 0.9646539908198394, "grad_norm": 0.22718343138694763, "learning_rate": 0.00016467986443781423, "loss": 2.4023, "step": 484210 }, { "epoch": 0.9646739130434783, "grad_norm": 0.214363232254982, "learning_rate": 0.00016457089485864373, "loss": 2.3961, "step": 484220 }, { "epoch": 0.9646938352671172, "grad_norm": 0.21681790053844452, "learning_rate": 0.00016446193174862202, "loss": 2.3896, "step": 484230 }, { "epoch": 0.964713757490756, "grad_norm": 0.21344926953315735, "learning_rate": 0.00016435297510659753, "loss": 2.3758, "step": 484240 }, { "epoch": 0.964733679714395, "grad_norm": 0.22119784355163574, "learning_rate": 0.0001642440249314181, "loss": 2.4066, "step": 484250 }, { "epoch": 0.9647536019380339, "grad_norm": 0.2467772215604782, "learning_rate": 0.00016413508122193265, "loss": 2.3864, "step": 484260 }, { "epoch": 0.9647735241616728, "grad_norm": 0.23188970983028412, "learning_rate": 0.0001640261439769901, "loss": 2.4222, "step": 484270 }, { "epoch": 0.9647934463853117, "grad_norm": 0.23861373960971832, "learning_rate": 0.00016391721319543984, "loss": 2.3956, "step": 484280 }, { "epoch": 0.9648133686089506, "grad_norm": 0.22929249703884125, "learning_rate": 0.00016380828887613163, "loss": 2.3849, "step": 484290 }, { "epoch": 0.9648332908325896, "grad_norm": 0.25509101152420044, "learning_rate": 0.00016369937101791532, "loss": 2.3924, "step": 484300 }, { "epoch": 0.9648532130562285, "grad_norm": 0.23537200689315796, "learning_rate": 0.00016359045961964136, "loss": 2.3957, "step": 484310 }, { "epoch": 0.9648731352798674, "grad_norm": 0.23701073229312897, "learning_rate": 0.00016348155468016068, "loss": 2.4061, "step": 484320 }, { "epoch": 0.9648930575035063, "grad_norm": 0.23489314317703247, "learning_rate": 0.00016337265619832398, "loss": 2.387, "step": 484330 }, { "epoch": 0.9649129797271452, "grad_norm": 0.23771071434020996, "learning_rate": 0.00016326376417298284, "loss": 2.3792, "step": 484340 }, { "epoch": 0.9649329019507842, "grad_norm": 0.4636304974555969, "learning_rate": 0.00016315487860298905, "loss": 2.3983, "step": 484350 }, { "epoch": 0.9649528241744231, "grad_norm": 0.2205870896577835, "learning_rate": 0.00016304599948719446, "loss": 2.3925, "step": 484360 }, { "epoch": 0.964972746398062, "grad_norm": 0.22961834073066711, "learning_rate": 0.00016293712682445194, "loss": 2.3857, "step": 484370 }, { "epoch": 0.9649926686217009, "grad_norm": 0.22560492157936096, "learning_rate": 0.00016282826061361379, "loss": 2.3919, "step": 484380 }, { "epoch": 0.9650125908453397, "grad_norm": 0.2177005559206009, "learning_rate": 0.0001627194008535333, "loss": 2.4085, "step": 484390 }, { "epoch": 0.9650325130689787, "grad_norm": 0.23216678202152252, "learning_rate": 0.0001626105475430637, "loss": 2.3852, "step": 484400 }, { "epoch": 0.9650524352926176, "grad_norm": 0.23615354299545288, "learning_rate": 0.0001625017006810592, "loss": 2.3975, "step": 484410 }, { "epoch": 0.9650723575162565, "grad_norm": 0.23081764578819275, "learning_rate": 0.00016239286026637357, "loss": 2.3926, "step": 484420 }, { "epoch": 0.9650922797398954, "grad_norm": 0.2352958768606186, "learning_rate": 0.00016228402629786109, "loss": 2.3937, "step": 484430 }, { "epoch": 0.9651122019635343, "grad_norm": 0.2290230691432953, "learning_rate": 0.00016217519877437714, "loss": 2.3806, "step": 484440 }, { "epoch": 0.9651321241871733, "grad_norm": 0.23650223016738892, "learning_rate": 0.00016206637769477638, "loss": 2.3844, "step": 484450 }, { "epoch": 0.9651520464108122, "grad_norm": 0.21740826964378357, "learning_rate": 0.00016195756305791465, "loss": 2.4009, "step": 484460 }, { "epoch": 0.9651719686344511, "grad_norm": 0.2378147840499878, "learning_rate": 0.00016184875486264727, "loss": 2.3889, "step": 484470 }, { "epoch": 0.96519189085809, "grad_norm": 0.2371291220188141, "learning_rate": 0.00016173995310783074, "loss": 2.3922, "step": 484480 }, { "epoch": 0.9652118130817289, "grad_norm": 0.24169160425662994, "learning_rate": 0.00016163115779232152, "loss": 2.4008, "step": 484490 }, { "epoch": 0.9652317353053679, "grad_norm": 0.22893598675727844, "learning_rate": 0.00016152236891497652, "loss": 2.3921, "step": 484500 }, { "epoch": 0.9652516575290068, "grad_norm": 0.2305298149585724, "learning_rate": 0.00016141358647465265, "loss": 2.3872, "step": 484510 }, { "epoch": 0.9652715797526457, "grad_norm": 0.22830389440059662, "learning_rate": 0.00016130481047020752, "loss": 2.3979, "step": 484520 }, { "epoch": 0.9652915019762845, "grad_norm": 0.228162482380867, "learning_rate": 0.00016119604090049865, "loss": 2.3929, "step": 484530 }, { "epoch": 0.9653114241999236, "grad_norm": 0.23354963958263397, "learning_rate": 0.00016108727776438503, "loss": 2.386, "step": 484540 }, { "epoch": 0.9653313464235624, "grad_norm": 0.2214588075876236, "learning_rate": 0.00016097852106072442, "loss": 2.3887, "step": 484550 }, { "epoch": 0.9653512686472013, "grad_norm": 0.2150762677192688, "learning_rate": 0.00016086977078837617, "loss": 2.3841, "step": 484560 }, { "epoch": 0.9653711908708402, "grad_norm": 0.23661057651042938, "learning_rate": 0.00016076102694619922, "loss": 2.4028, "step": 484570 }, { "epoch": 0.9653911130944791, "grad_norm": 0.24183756113052368, "learning_rate": 0.0001606522895330529, "loss": 2.4028, "step": 484580 }, { "epoch": 0.9654110353181181, "grad_norm": 0.2174670398235321, "learning_rate": 0.00016054355854779745, "loss": 2.3871, "step": 484590 }, { "epoch": 0.965430957541757, "grad_norm": 0.2636847198009491, "learning_rate": 0.00016043483398929292, "loss": 2.3784, "step": 484600 }, { "epoch": 0.9654508797653959, "grad_norm": 0.23637878894805908, "learning_rate": 0.0001603261158564, "loss": 2.3846, "step": 484610 }, { "epoch": 0.9654708019890348, "grad_norm": 0.23184674978256226, "learning_rate": 0.00016021740414797937, "loss": 2.3823, "step": 484620 }, { "epoch": 0.9654907242126737, "grad_norm": 0.2355068475008011, "learning_rate": 0.00016010869886289193, "loss": 2.4012, "step": 484630 }, { "epoch": 0.9655106464363127, "grad_norm": 0.20880179107189178, "learning_rate": 0.00015999999999999993, "loss": 2.3865, "step": 484640 }, { "epoch": 0.9655305686599516, "grad_norm": 0.39696380496025085, "learning_rate": 0.00015989130755816495, "loss": 2.3975, "step": 484650 }, { "epoch": 0.9655504908835905, "grad_norm": 0.21302102506160736, "learning_rate": 0.000159782621536249, "loss": 2.3777, "step": 484660 }, { "epoch": 0.9655704131072294, "grad_norm": 0.22573764622211456, "learning_rate": 0.00015967394193311502, "loss": 2.397, "step": 484670 }, { "epoch": 0.9655903353308682, "grad_norm": 0.2381439507007599, "learning_rate": 0.00015956526874762544, "loss": 2.415, "step": 484680 }, { "epoch": 0.9656102575545072, "grad_norm": 0.32432466745376587, "learning_rate": 0.00015945660197864387, "loss": 2.4005, "step": 484690 }, { "epoch": 0.9656301797781461, "grad_norm": 0.2609347999095917, "learning_rate": 0.00015934794162503386, "loss": 2.4111, "step": 484700 }, { "epoch": 0.965650102001785, "grad_norm": 0.23895299434661865, "learning_rate": 0.000159239287685659, "loss": 2.3985, "step": 484710 }, { "epoch": 0.9656700242254239, "grad_norm": 0.26589420437812805, "learning_rate": 0.00015913064015938395, "loss": 2.382, "step": 484720 }, { "epoch": 0.9656899464490628, "grad_norm": 0.24457396566867828, "learning_rate": 0.00015902199904507276, "loss": 2.3977, "step": 484730 }, { "epoch": 0.9657098686727018, "grad_norm": 0.24138544499874115, "learning_rate": 0.00015891336434159075, "loss": 2.3864, "step": 484740 }, { "epoch": 0.9657297908963407, "grad_norm": 0.2409122735261917, "learning_rate": 0.0001588047360478031, "loss": 2.3868, "step": 484750 }, { "epoch": 0.9657497131199796, "grad_norm": 0.22983650863170624, "learning_rate": 0.00015869611416257533, "loss": 2.3928, "step": 484760 }, { "epoch": 0.9657696353436185, "grad_norm": 0.21268266439437866, "learning_rate": 0.0001585874986847733, "loss": 2.3936, "step": 484770 }, { "epoch": 0.9657895575672574, "grad_norm": 0.22003580629825592, "learning_rate": 0.00015847888961326316, "loss": 2.3933, "step": 484780 }, { "epoch": 0.9658094797908964, "grad_norm": 0.22819359600543976, "learning_rate": 0.0001583702869469119, "loss": 2.3768, "step": 484790 }, { "epoch": 0.9658294020145353, "grad_norm": 0.23183926939964294, "learning_rate": 0.00015826169068458595, "loss": 2.3924, "step": 484800 }, { "epoch": 0.9658493242381742, "grad_norm": 0.25425657629966736, "learning_rate": 0.00015815310082515266, "loss": 2.3932, "step": 484810 }, { "epoch": 0.965869246461813, "grad_norm": 0.24365268647670746, "learning_rate": 0.00015804451736748005, "loss": 2.408, "step": 484820 }, { "epoch": 0.965889168685452, "grad_norm": 0.22289584577083588, "learning_rate": 0.00015793594031043523, "loss": 2.3931, "step": 484830 }, { "epoch": 0.9659090909090909, "grad_norm": 0.23054371774196625, "learning_rate": 0.00015782736965288714, "loss": 2.3925, "step": 484840 }, { "epoch": 0.9659290131327298, "grad_norm": 0.215837299823761, "learning_rate": 0.0001577188053937042, "loss": 2.3992, "step": 484850 }, { "epoch": 0.9659489353563687, "grad_norm": 0.2455836534500122, "learning_rate": 0.00015761024753175535, "loss": 2.3883, "step": 484860 }, { "epoch": 0.9659688575800076, "grad_norm": 0.24748486280441284, "learning_rate": 0.00015750169606590947, "loss": 2.3957, "step": 484870 }, { "epoch": 0.9659887798036466, "grad_norm": 0.24227559566497803, "learning_rate": 0.0001573931509950366, "loss": 2.3927, "step": 484880 }, { "epoch": 0.9660087020272855, "grad_norm": 0.2281857430934906, "learning_rate": 0.00015728461231800651, "loss": 2.4013, "step": 484890 }, { "epoch": 0.9660286242509244, "grad_norm": 0.24396838247776031, "learning_rate": 0.0001571760800336892, "loss": 2.4002, "step": 484900 }, { "epoch": 0.9660485464745633, "grad_norm": 0.24650222063064575, "learning_rate": 0.00015706755414095563, "loss": 2.3839, "step": 484910 }, { "epoch": 0.9660684686982022, "grad_norm": 0.22965215146541595, "learning_rate": 0.00015695903463867645, "loss": 2.3855, "step": 484920 }, { "epoch": 0.9660883909218412, "grad_norm": 0.23416760563850403, "learning_rate": 0.00015685052152572277, "loss": 2.383, "step": 484930 }, { "epoch": 0.9661083131454801, "grad_norm": 0.2478017359972, "learning_rate": 0.0001567420148009666, "loss": 2.391, "step": 484940 }, { "epoch": 0.966128235369119, "grad_norm": 0.25677230954170227, "learning_rate": 0.00015663351446327956, "loss": 2.3862, "step": 484950 }, { "epoch": 0.9661481575927579, "grad_norm": 0.23473531007766724, "learning_rate": 0.00015652502051153406, "loss": 2.4031, "step": 484960 }, { "epoch": 0.9661680798163967, "grad_norm": 0.22152183949947357, "learning_rate": 0.00015641653294460255, "loss": 2.3955, "step": 484970 }, { "epoch": 0.9661880020400357, "grad_norm": 0.21904556453227997, "learning_rate": 0.00015630805176135775, "loss": 2.4149, "step": 484980 }, { "epoch": 0.9662079242636746, "grad_norm": 0.2425973117351532, "learning_rate": 0.00015619957696067345, "loss": 2.381, "step": 484990 }, { "epoch": 0.9662278464873135, "grad_norm": 0.22333645820617676, "learning_rate": 0.0001560911085414225, "loss": 2.398, "step": 485000 }, { "epoch": 0.9662477687109524, "grad_norm": 0.24051469564437866, "learning_rate": 0.00015598264650247938, "loss": 2.3833, "step": 485010 }, { "epoch": 0.9662676909345913, "grad_norm": 0.2479291558265686, "learning_rate": 0.00015587419084271814, "loss": 2.3822, "step": 485020 }, { "epoch": 0.9662876131582303, "grad_norm": 0.24881403148174286, "learning_rate": 0.0001557657415610132, "loss": 2.3817, "step": 485030 }, { "epoch": 0.9663075353818692, "grad_norm": 0.2588239014148712, "learning_rate": 0.00015565729865623946, "loss": 2.3816, "step": 485040 }, { "epoch": 0.9663274576055081, "grad_norm": 0.23462536931037903, "learning_rate": 0.00015554886212727225, "loss": 2.3994, "step": 485050 }, { "epoch": 0.966347379829147, "grad_norm": 0.2416413575410843, "learning_rate": 0.00015544043197298718, "loss": 2.3952, "step": 485060 }, { "epoch": 0.9663673020527859, "grad_norm": 0.24102936685085297, "learning_rate": 0.00015533200819226002, "loss": 2.388, "step": 485070 }, { "epoch": 0.9663872242764249, "grad_norm": 0.2309550940990448, "learning_rate": 0.0001552235907839672, "loss": 2.384, "step": 485080 }, { "epoch": 0.9664071465000638, "grad_norm": 0.2341269999742508, "learning_rate": 0.00015511517974698498, "loss": 2.3949, "step": 485090 }, { "epoch": 0.9664270687237027, "grad_norm": 0.22658655047416687, "learning_rate": 0.00015500677508019023, "loss": 2.3921, "step": 485100 }, { "epoch": 0.9664469909473415, "grad_norm": 0.23409996926784515, "learning_rate": 0.00015489837678246031, "loss": 2.3992, "step": 485110 }, { "epoch": 0.9664669131709805, "grad_norm": 0.22945554554462433, "learning_rate": 0.00015478998485267281, "loss": 2.3923, "step": 485120 }, { "epoch": 0.9664868353946194, "grad_norm": 0.24273911118507385, "learning_rate": 0.00015468159928970525, "loss": 2.3868, "step": 485130 }, { "epoch": 0.9665067576182583, "grad_norm": 0.2308964729309082, "learning_rate": 0.00015457322009243614, "loss": 2.3971, "step": 485140 }, { "epoch": 0.9665266798418972, "grad_norm": 0.24833165109157562, "learning_rate": 0.00015446484725974407, "loss": 2.4078, "step": 485150 }, { "epoch": 0.9665466020655361, "grad_norm": 0.28953054547309875, "learning_rate": 0.00015435648079050758, "loss": 2.3797, "step": 485160 }, { "epoch": 0.9665665242891751, "grad_norm": 0.22959329187870026, "learning_rate": 0.00015424812068360615, "loss": 2.4012, "step": 485170 }, { "epoch": 0.966586446512814, "grad_norm": 0.21587488055229187, "learning_rate": 0.00015413976693791898, "loss": 2.3784, "step": 485180 }, { "epoch": 0.9666063687364529, "grad_norm": 0.24679844081401825, "learning_rate": 0.00015403141955232623, "loss": 2.3874, "step": 485190 }, { "epoch": 0.9666262909600918, "grad_norm": 0.2477363646030426, "learning_rate": 0.00015392307852570776, "loss": 2.3847, "step": 485200 }, { "epoch": 0.9666462131837307, "grad_norm": 0.2505776286125183, "learning_rate": 0.00015381474385694439, "loss": 2.3778, "step": 485210 }, { "epoch": 0.9666661354073697, "grad_norm": 0.217993825674057, "learning_rate": 0.0001537064155449166, "loss": 2.3788, "step": 485220 }, { "epoch": 0.9666860576310086, "grad_norm": 1.456566333770752, "learning_rate": 0.00015359809358850597, "loss": 2.3776, "step": 485230 }, { "epoch": 0.9667059798546475, "grad_norm": 0.24503298103809357, "learning_rate": 0.00015348977798659337, "loss": 2.3887, "step": 485240 }, { "epoch": 0.9667259020782863, "grad_norm": 0.2474430501461029, "learning_rate": 0.00015338146873806124, "loss": 2.3863, "step": 485250 }, { "epoch": 0.9667458243019252, "grad_norm": 0.47577187418937683, "learning_rate": 0.0001532731658417912, "loss": 2.3989, "step": 485260 }, { "epoch": 0.9667657465255642, "grad_norm": 0.2309773713350296, "learning_rate": 0.0001531648692966663, "loss": 2.3902, "step": 485270 }, { "epoch": 0.9667856687492031, "grad_norm": 0.2318412959575653, "learning_rate": 0.00015305657910156877, "loss": 2.3876, "step": 485280 }, { "epoch": 0.966805590972842, "grad_norm": 0.22102472186088562, "learning_rate": 0.00015294829525538178, "loss": 2.4006, "step": 485290 }, { "epoch": 0.9668255131964809, "grad_norm": 0.2299993485212326, "learning_rate": 0.0001528400177569893, "loss": 2.3774, "step": 485300 }, { "epoch": 0.9668454354201198, "grad_norm": 0.22619947791099548, "learning_rate": 0.00015273174660527446, "loss": 2.3804, "step": 485310 }, { "epoch": 0.9668653576437588, "grad_norm": 0.22530606389045715, "learning_rate": 0.00015262348179912165, "loss": 2.3909, "step": 485320 }, { "epoch": 0.9668852798673977, "grad_norm": 0.23086859285831451, "learning_rate": 0.00015251522333741541, "loss": 2.3756, "step": 485330 }, { "epoch": 0.9669052020910366, "grad_norm": 0.22640998661518097, "learning_rate": 0.00015240697121904034, "loss": 2.3854, "step": 485340 }, { "epoch": 0.9669251243146755, "grad_norm": 0.23080122470855713, "learning_rate": 0.00015229872544288159, "loss": 2.3911, "step": 485350 }, { "epoch": 0.9669450465383144, "grad_norm": 0.2257559448480606, "learning_rate": 0.00015219048600782447, "loss": 2.3989, "step": 485360 }, { "epoch": 0.9669649687619534, "grad_norm": 0.23717783391475677, "learning_rate": 0.00015208225291275478, "loss": 2.3905, "step": 485370 }, { "epoch": 0.9669848909855923, "grad_norm": 0.21464182436466217, "learning_rate": 0.00015197402615655853, "loss": 2.4004, "step": 485380 }, { "epoch": 0.9670048132092312, "grad_norm": 0.22726848721504211, "learning_rate": 0.00015186580573812235, "loss": 2.3932, "step": 485390 }, { "epoch": 0.96702473543287, "grad_norm": 0.2402927130460739, "learning_rate": 0.00015175759165633252, "loss": 2.3798, "step": 485400 }, { "epoch": 0.967044657656509, "grad_norm": 0.24131345748901367, "learning_rate": 0.00015164938391007654, "loss": 2.3866, "step": 485410 }, { "epoch": 0.9670645798801479, "grad_norm": 0.23819299042224884, "learning_rate": 0.00015154118249824134, "loss": 2.3843, "step": 485420 }, { "epoch": 0.9670845021037868, "grad_norm": 0.2280503511428833, "learning_rate": 0.0001514329874197149, "loss": 2.3969, "step": 485430 }, { "epoch": 0.9671044243274257, "grad_norm": 0.24130932986736298, "learning_rate": 0.000151324798673385, "loss": 2.3898, "step": 485440 }, { "epoch": 0.9671243465510646, "grad_norm": 0.22676439583301544, "learning_rate": 0.00015121661625814033, "loss": 2.38, "step": 485450 }, { "epoch": 0.9671442687747036, "grad_norm": 0.22875581681728363, "learning_rate": 0.0001511084401728693, "loss": 2.3968, "step": 485460 }, { "epoch": 0.9671641909983425, "grad_norm": 0.22780077159404755, "learning_rate": 0.00015100027041646102, "loss": 2.3949, "step": 485470 }, { "epoch": 0.9671841132219814, "grad_norm": 0.22385506331920624, "learning_rate": 0.00015089210698780486, "loss": 2.3872, "step": 485480 }, { "epoch": 0.9672040354456203, "grad_norm": 0.23803897202014923, "learning_rate": 0.00015078394988579015, "loss": 2.3962, "step": 485490 }, { "epoch": 0.9672239576692592, "grad_norm": 0.23328325152397156, "learning_rate": 0.00015067579910930706, "loss": 2.3931, "step": 485500 }, { "epoch": 0.9672438798928982, "grad_norm": 0.23367030918598175, "learning_rate": 0.00015056765465724586, "loss": 2.3922, "step": 485510 }, { "epoch": 0.9672638021165371, "grad_norm": 0.23518897593021393, "learning_rate": 0.00015045951652849744, "loss": 2.3898, "step": 485520 }, { "epoch": 0.967283724340176, "grad_norm": 0.24122095108032227, "learning_rate": 0.0001503513847219522, "loss": 2.3815, "step": 485530 }, { "epoch": 0.9673036465638148, "grad_norm": 0.2310819774866104, "learning_rate": 0.0001502432592365015, "loss": 2.4042, "step": 485540 }, { "epoch": 0.9673235687874537, "grad_norm": 0.253042608499527, "learning_rate": 0.00015013514007103756, "loss": 2.3879, "step": 485550 }, { "epoch": 0.9673434910110927, "grad_norm": 0.2648928463459015, "learning_rate": 0.00015002702722445149, "loss": 2.392, "step": 485560 }, { "epoch": 0.9673634132347316, "grad_norm": 0.24027200043201447, "learning_rate": 0.00014991892069563618, "loss": 2.4048, "step": 485570 }, { "epoch": 0.9673833354583705, "grad_norm": 0.23494116961956024, "learning_rate": 0.0001498108204834836, "loss": 2.3933, "step": 485580 }, { "epoch": 0.9674032576820094, "grad_norm": 0.22021155059337616, "learning_rate": 0.0001497027265868871, "loss": 2.3922, "step": 485590 }, { "epoch": 0.9674231799056483, "grad_norm": 0.24611663818359375, "learning_rate": 0.00014959463900473958, "loss": 2.3961, "step": 485600 }, { "epoch": 0.9674431021292873, "grad_norm": 0.255538672208786, "learning_rate": 0.00014948655773593477, "loss": 2.3916, "step": 485610 }, { "epoch": 0.9674630243529262, "grad_norm": 0.22920022904872894, "learning_rate": 0.0001493784827793665, "loss": 2.3836, "step": 485620 }, { "epoch": 0.9674829465765651, "grad_norm": 0.2340896874666214, "learning_rate": 0.00014927041413392871, "loss": 2.3809, "step": 485630 }, { "epoch": 0.967502868800204, "grad_norm": 0.22589723765850067, "learning_rate": 0.00014916235179851612, "loss": 2.3795, "step": 485640 }, { "epoch": 0.9675227910238429, "grad_norm": 0.23874567449092865, "learning_rate": 0.00014905429577202333, "loss": 2.3825, "step": 485650 }, { "epoch": 0.9675427132474819, "grad_norm": 0.24017372727394104, "learning_rate": 0.00014894624605334594, "loss": 2.3854, "step": 485660 }, { "epoch": 0.9675626354711208, "grad_norm": 0.24106967449188232, "learning_rate": 0.00014883820264137903, "loss": 2.3801, "step": 485670 }, { "epoch": 0.9675825576947596, "grad_norm": 0.22833910584449768, "learning_rate": 0.00014873016553501839, "loss": 2.3854, "step": 485680 }, { "epoch": 0.9676024799183985, "grad_norm": 0.2264004945755005, "learning_rate": 0.00014862213473316045, "loss": 2.3866, "step": 485690 }, { "epoch": 0.9676224021420375, "grad_norm": 0.2371007800102234, "learning_rate": 0.00014851411023470118, "loss": 2.4032, "step": 485700 }, { "epoch": 0.9676423243656764, "grad_norm": 0.2314937710762024, "learning_rate": 0.00014840609203853772, "loss": 2.3787, "step": 485710 }, { "epoch": 0.9676622465893153, "grad_norm": 0.24559591710567474, "learning_rate": 0.00014829808014356694, "loss": 2.3921, "step": 485720 }, { "epoch": 0.9676821688129542, "grad_norm": 0.2364216446876526, "learning_rate": 0.00014819007454868637, "loss": 2.3846, "step": 485730 }, { "epoch": 0.9677020910365931, "grad_norm": 0.23404735326766968, "learning_rate": 0.00014808207525279337, "loss": 2.3897, "step": 485740 }, { "epoch": 0.9677220132602321, "grad_norm": 0.22658050060272217, "learning_rate": 0.0001479740822547866, "loss": 2.3852, "step": 485750 }, { "epoch": 0.967741935483871, "grad_norm": 0.24738375842571259, "learning_rate": 0.000147866095553564, "loss": 2.3971, "step": 485760 }, { "epoch": 0.9677618577075099, "grad_norm": 0.24344907701015472, "learning_rate": 0.00014775811514802428, "loss": 2.3952, "step": 485770 }, { "epoch": 0.9677817799311488, "grad_norm": 0.24535788595676422, "learning_rate": 0.00014765014103706631, "loss": 2.3878, "step": 485780 }, { "epoch": 0.9678017021547877, "grad_norm": 0.2508731186389923, "learning_rate": 0.00014754217321958984, "loss": 2.3916, "step": 485790 }, { "epoch": 0.9678216243784267, "grad_norm": 0.2386656254529953, "learning_rate": 0.00014743421169449424, "loss": 2.3881, "step": 485800 }, { "epoch": 0.9678415466020656, "grad_norm": 0.2557663023471832, "learning_rate": 0.00014732625646067944, "loss": 2.4016, "step": 485810 }, { "epoch": 0.9678614688257045, "grad_norm": 0.2391655594110489, "learning_rate": 0.0001472183075170459, "loss": 2.376, "step": 485820 }, { "epoch": 0.9678813910493433, "grad_norm": 0.2221204936504364, "learning_rate": 0.00014711036486249408, "loss": 2.3815, "step": 485830 }, { "epoch": 0.9679013132729822, "grad_norm": 0.23346133530139923, "learning_rate": 0.00014700242849592483, "loss": 2.3945, "step": 485840 }, { "epoch": 0.9679212354966212, "grad_norm": 0.2568691074848175, "learning_rate": 0.00014689449841623968, "loss": 2.3909, "step": 485850 }, { "epoch": 0.9679411577202601, "grad_norm": 0.2362416833639145, "learning_rate": 0.00014678657462234, "loss": 2.3931, "step": 485860 }, { "epoch": 0.967961079943899, "grad_norm": 0.21917784214019775, "learning_rate": 0.00014667865711312755, "loss": 2.397, "step": 485870 }, { "epoch": 0.9679810021675379, "grad_norm": 0.2339840829372406, "learning_rate": 0.00014657074588750497, "loss": 2.3907, "step": 485880 }, { "epoch": 0.9680009243911768, "grad_norm": 0.23155617713928223, "learning_rate": 0.00014646284094437423, "loss": 2.3758, "step": 485890 }, { "epoch": 0.9680208466148158, "grad_norm": 0.26579418778419495, "learning_rate": 0.00014635494228263868, "loss": 2.3988, "step": 485900 }, { "epoch": 0.9680407688384547, "grad_norm": 0.25988486409187317, "learning_rate": 0.00014624704990120118, "loss": 2.3855, "step": 485910 }, { "epoch": 0.9680606910620936, "grad_norm": 0.23222076892852783, "learning_rate": 0.0001461391637989653, "loss": 2.3759, "step": 485920 }, { "epoch": 0.9680806132857325, "grad_norm": 0.24772781133651733, "learning_rate": 0.00014603128397483477, "loss": 2.3903, "step": 485930 }, { "epoch": 0.9681005355093714, "grad_norm": 0.21735839545726776, "learning_rate": 0.00014592341042771363, "loss": 2.3963, "step": 485940 }, { "epoch": 0.9681204577330104, "grad_norm": 0.23165275156497955, "learning_rate": 0.00014581554315650668, "loss": 2.3838, "step": 485950 }, { "epoch": 0.9681403799566493, "grad_norm": 0.23405952751636505, "learning_rate": 0.00014570768216011842, "loss": 2.3909, "step": 485960 }, { "epoch": 0.9681603021802881, "grad_norm": 0.23741577565670013, "learning_rate": 0.00014559982743745414, "loss": 2.4065, "step": 485970 }, { "epoch": 0.968180224403927, "grad_norm": 0.23069509863853455, "learning_rate": 0.00014549197898741894, "loss": 2.3872, "step": 485980 }, { "epoch": 0.9682001466275659, "grad_norm": 0.23068541288375854, "learning_rate": 0.0001453841368089186, "loss": 2.3743, "step": 485990 }, { "epoch": 0.9682200688512049, "grad_norm": 0.22659191489219666, "learning_rate": 0.0001452763009008593, "loss": 2.3875, "step": 486000 }, { "epoch": 0.9682399910748438, "grad_norm": 0.23708228766918182, "learning_rate": 0.00014516847126214727, "loss": 2.3949, "step": 486010 }, { "epoch": 0.9682599132984827, "grad_norm": 0.23568499088287354, "learning_rate": 0.00014506064789168916, "loss": 2.3872, "step": 486020 }, { "epoch": 0.9682798355221216, "grad_norm": 0.2416008710861206, "learning_rate": 0.00014495283078839205, "loss": 2.379, "step": 486030 }, { "epoch": 0.9682997577457606, "grad_norm": 0.23180823028087616, "learning_rate": 0.00014484501995116306, "loss": 2.3962, "step": 486040 }, { "epoch": 0.9683196799693995, "grad_norm": 0.24288924038410187, "learning_rate": 0.00014473721537891016, "loss": 2.3833, "step": 486050 }, { "epoch": 0.9683396021930384, "grad_norm": 0.24211934208869934, "learning_rate": 0.00014462941707054112, "loss": 2.4025, "step": 486060 }, { "epoch": 0.9683595244166773, "grad_norm": 0.2426312118768692, "learning_rate": 0.00014452162502496412, "loss": 2.392, "step": 486070 }, { "epoch": 0.9683794466403162, "grad_norm": 0.2278674840927124, "learning_rate": 0.00014441383924108765, "loss": 2.3935, "step": 486080 }, { "epoch": 0.9683993688639552, "grad_norm": 0.2212945967912674, "learning_rate": 0.00014430605971782075, "loss": 2.3992, "step": 486090 }, { "epoch": 0.9684192910875941, "grad_norm": 0.240979865193367, "learning_rate": 0.00014419828645407273, "loss": 2.3841, "step": 486100 }, { "epoch": 0.968439213311233, "grad_norm": 0.23400548100471497, "learning_rate": 0.00014409051944875296, "loss": 2.3861, "step": 486110 }, { "epoch": 0.9684591355348718, "grad_norm": 0.2304328829050064, "learning_rate": 0.00014398275870077115, "loss": 2.3881, "step": 486120 }, { "epoch": 0.9684790577585107, "grad_norm": 0.22571136057376862, "learning_rate": 0.00014387500420903775, "loss": 2.4162, "step": 486130 }, { "epoch": 0.9684989799821497, "grad_norm": 0.2376677691936493, "learning_rate": 0.00014376725597246276, "loss": 2.389, "step": 486140 }, { "epoch": 0.9685189022057886, "grad_norm": 0.22814664244651794, "learning_rate": 0.0001436595139899577, "loss": 2.397, "step": 486150 }, { "epoch": 0.9685388244294275, "grad_norm": 0.2374463826417923, "learning_rate": 0.00014355177826043318, "loss": 2.3928, "step": 486160 }, { "epoch": 0.9685587466530664, "grad_norm": 0.231350377202034, "learning_rate": 0.00014344404878280058, "loss": 2.3842, "step": 486170 }, { "epoch": 0.9685786688767053, "grad_norm": 0.2619422376155853, "learning_rate": 0.00014333632555597187, "loss": 2.3845, "step": 486180 }, { "epoch": 0.9685985911003443, "grad_norm": 0.22228777408599854, "learning_rate": 0.000143228608578859, "loss": 2.3961, "step": 486190 }, { "epoch": 0.9686185133239832, "grad_norm": 0.2290068417787552, "learning_rate": 0.0001431208978503744, "loss": 2.3992, "step": 486200 }, { "epoch": 0.9686384355476221, "grad_norm": 0.23777292668819427, "learning_rate": 0.00014301319336943052, "loss": 2.3737, "step": 486210 }, { "epoch": 0.968658357771261, "grad_norm": 0.23838019371032715, "learning_rate": 0.00014290549513494067, "loss": 2.4064, "step": 486220 }, { "epoch": 0.9686782799948999, "grad_norm": 0.2316126823425293, "learning_rate": 0.00014279780314581793, "loss": 2.377, "step": 486230 }, { "epoch": 0.9686982022185389, "grad_norm": 0.2262478917837143, "learning_rate": 0.00014269011740097604, "loss": 2.381, "step": 486240 }, { "epoch": 0.9687181244421778, "grad_norm": 1.3151205778121948, "learning_rate": 0.000142582437899329, "loss": 2.3832, "step": 486250 }, { "epoch": 0.9687380466658166, "grad_norm": 0.23276454210281372, "learning_rate": 0.00014247476463979104, "loss": 2.3892, "step": 486260 }, { "epoch": 0.9687579688894555, "grad_norm": 0.24412770569324493, "learning_rate": 0.00014236709762127653, "loss": 2.3727, "step": 486270 }, { "epoch": 0.9687778911130944, "grad_norm": 0.2569328546524048, "learning_rate": 0.0001422594368427006, "loss": 2.3901, "step": 486280 }, { "epoch": 0.9687978133367334, "grad_norm": 0.24472233653068542, "learning_rate": 0.0001421517823029783, "loss": 2.39, "step": 486290 }, { "epoch": 0.9688177355603723, "grad_norm": 0.24570955336093903, "learning_rate": 0.00014204413400102523, "loss": 2.385, "step": 486300 }, { "epoch": 0.9688376577840112, "grad_norm": 0.24335931241512299, "learning_rate": 0.0001419364919357573, "loss": 2.3843, "step": 486310 }, { "epoch": 0.9688575800076501, "grad_norm": 0.2449510246515274, "learning_rate": 0.00014182885610609054, "loss": 2.3949, "step": 486320 }, { "epoch": 0.9688775022312891, "grad_norm": 0.2432340830564499, "learning_rate": 0.00014172122651094155, "loss": 2.3857, "step": 486330 }, { "epoch": 0.968897424454928, "grad_norm": 0.24757644534111023, "learning_rate": 0.00014161360314922679, "loss": 2.3911, "step": 486340 }, { "epoch": 0.9689173466785669, "grad_norm": 0.23195308446884155, "learning_rate": 0.0001415059860198633, "loss": 2.3907, "step": 486350 }, { "epoch": 0.9689372689022058, "grad_norm": 0.22798828780651093, "learning_rate": 0.00014139837512176911, "loss": 2.3922, "step": 486360 }, { "epoch": 0.9689571911258447, "grad_norm": 0.23919275403022766, "learning_rate": 0.00014129077045386151, "loss": 2.3846, "step": 486370 }, { "epoch": 0.9689771133494837, "grad_norm": 0.25051137804985046, "learning_rate": 0.00014118317201505847, "loss": 2.3877, "step": 486380 }, { "epoch": 0.9689970355731226, "grad_norm": 0.2398044466972351, "learning_rate": 0.00014107557980427843, "loss": 2.3942, "step": 486390 }, { "epoch": 0.9690169577967614, "grad_norm": 0.23097014427185059, "learning_rate": 0.00014096799382044, "loss": 2.3784, "step": 486400 }, { "epoch": 0.9690368800204003, "grad_norm": 0.22545801103115082, "learning_rate": 0.00014086041406246208, "loss": 2.3811, "step": 486410 }, { "epoch": 0.9690568022440392, "grad_norm": 0.2351747453212738, "learning_rate": 0.00014075284052926417, "loss": 2.3921, "step": 486420 }, { "epoch": 0.9690767244676782, "grad_norm": 0.23131369054317474, "learning_rate": 0.0001406452732197656, "loss": 2.3802, "step": 486430 }, { "epoch": 0.9690966466913171, "grad_norm": 0.26597315073013306, "learning_rate": 0.00014053771213288658, "loss": 2.3883, "step": 486440 }, { "epoch": 0.969116568914956, "grad_norm": 0.2665604054927826, "learning_rate": 0.00014043015726754703, "loss": 2.3703, "step": 486450 }, { "epoch": 0.9691364911385949, "grad_norm": 0.20614440739154816, "learning_rate": 0.00014032260862266766, "loss": 2.383, "step": 486460 }, { "epoch": 0.9691564133622338, "grad_norm": 0.23204904794692993, "learning_rate": 0.00014021506619716907, "loss": 2.3678, "step": 486470 }, { "epoch": 0.9691763355858728, "grad_norm": 0.2460455447435379, "learning_rate": 0.00014010752998997277, "loss": 2.3868, "step": 486480 }, { "epoch": 0.9691962578095117, "grad_norm": 0.26319363713264465, "learning_rate": 0.0001399999999999999, "loss": 2.3868, "step": 486490 }, { "epoch": 0.9692161800331506, "grad_norm": 0.2450878769159317, "learning_rate": 0.00013989247622617261, "loss": 2.3817, "step": 486500 }, { "epoch": 0.9692361022567895, "grad_norm": 0.23624290525913239, "learning_rate": 0.00013978495866741246, "loss": 2.3963, "step": 486510 }, { "epoch": 0.9692560244804284, "grad_norm": 0.22727070748806, "learning_rate": 0.0001396774473226423, "loss": 2.3833, "step": 486520 }, { "epoch": 0.9692759467040674, "grad_norm": 0.22787216305732727, "learning_rate": 0.0001395699421907848, "loss": 2.392, "step": 486530 }, { "epoch": 0.9692958689277063, "grad_norm": 0.2597998380661011, "learning_rate": 0.00013946244327076273, "loss": 2.395, "step": 486540 }, { "epoch": 0.9693157911513451, "grad_norm": 0.2510361671447754, "learning_rate": 0.0001393549505614995, "loss": 2.3868, "step": 486550 }, { "epoch": 0.969335713374984, "grad_norm": 0.2382764369249344, "learning_rate": 0.0001392474640619188, "loss": 2.3747, "step": 486560 }, { "epoch": 0.9693556355986229, "grad_norm": 0.22689290344715118, "learning_rate": 0.0001391399837709446, "loss": 2.3928, "step": 486570 }, { "epoch": 0.9693755578222619, "grad_norm": 0.2440398633480072, "learning_rate": 0.00013903250968750136, "loss": 2.3849, "step": 486580 }, { "epoch": 0.9693954800459008, "grad_norm": 0.24149751663208008, "learning_rate": 0.00013892504181051325, "loss": 2.3828, "step": 486590 }, { "epoch": 0.9694154022695397, "grad_norm": 0.2479632943868637, "learning_rate": 0.00013881758013890532, "loss": 2.3895, "step": 486600 }, { "epoch": 0.9694353244931786, "grad_norm": 0.2408313900232315, "learning_rate": 0.00013871012467160293, "loss": 2.394, "step": 486610 }, { "epoch": 0.9694552467168176, "grad_norm": 0.23578643798828125, "learning_rate": 0.00013860267540753135, "loss": 2.3973, "step": 486620 }, { "epoch": 0.9694751689404565, "grad_norm": 0.270231157541275, "learning_rate": 0.00013849523234561655, "loss": 2.3754, "step": 486630 }, { "epoch": 0.9694950911640954, "grad_norm": 0.36086058616638184, "learning_rate": 0.00013838779548478475, "loss": 2.4173, "step": 486640 }, { "epoch": 0.9695150133877343, "grad_norm": 0.24703288078308105, "learning_rate": 0.00013828036482396188, "loss": 2.3841, "step": 486650 }, { "epoch": 0.9695349356113732, "grad_norm": 0.24633730947971344, "learning_rate": 0.00013817294036207528, "loss": 2.4009, "step": 486660 }, { "epoch": 0.9695548578350122, "grad_norm": 0.23719125986099243, "learning_rate": 0.0001380655220980518, "loss": 2.3864, "step": 486670 }, { "epoch": 0.969574780058651, "grad_norm": 0.21762743592262268, "learning_rate": 0.00013795811003081894, "loss": 2.3926, "step": 486680 }, { "epoch": 0.9695947022822899, "grad_norm": 0.22570709884166718, "learning_rate": 0.00013785070415930402, "loss": 2.39, "step": 486690 }, { "epoch": 0.9696146245059288, "grad_norm": 0.2381884902715683, "learning_rate": 0.0001377433044824352, "loss": 2.3941, "step": 486700 }, { "epoch": 0.9696345467295677, "grad_norm": 0.2327594757080078, "learning_rate": 0.00013763591099914097, "loss": 2.3807, "step": 486710 }, { "epoch": 0.9696544689532067, "grad_norm": 0.22646960616111755, "learning_rate": 0.00013752852370834946, "loss": 2.3947, "step": 486720 }, { "epoch": 0.9696743911768456, "grad_norm": 2.2163915634155273, "learning_rate": 0.0001374211426089902, "loss": 2.3927, "step": 486730 }, { "epoch": 0.9696943134004845, "grad_norm": 0.2372526079416275, "learning_rate": 0.0001373137676999918, "loss": 2.399, "step": 486740 }, { "epoch": 0.9697142356241234, "grad_norm": 0.24094584584236145, "learning_rate": 0.00013720639898028407, "loss": 2.3821, "step": 486750 }, { "epoch": 0.9697341578477623, "grad_norm": 0.2335161566734314, "learning_rate": 0.00013709903644879717, "loss": 2.3841, "step": 486760 }, { "epoch": 0.9697540800714013, "grad_norm": 0.25601157546043396, "learning_rate": 0.0001369916801044606, "loss": 2.399, "step": 486770 }, { "epoch": 0.9697740022950402, "grad_norm": 0.22932657599449158, "learning_rate": 0.0001368843299462055, "loss": 2.3782, "step": 486780 }, { "epoch": 0.9697939245186791, "grad_norm": 0.23293103277683258, "learning_rate": 0.00013677698597296196, "loss": 2.3771, "step": 486790 }, { "epoch": 0.969813846742318, "grad_norm": 0.22307758033275604, "learning_rate": 0.00013666964818366157, "loss": 2.3776, "step": 486800 }, { "epoch": 0.9698337689659569, "grad_norm": 0.23285917937755585, "learning_rate": 0.00013656231657723517, "loss": 2.3959, "step": 486810 }, { "epoch": 0.9698536911895959, "grad_norm": 0.3881131410598755, "learning_rate": 0.00013645499115261516, "loss": 2.3913, "step": 486820 }, { "epoch": 0.9698736134132347, "grad_norm": 0.23144900798797607, "learning_rate": 0.00013634767190873288, "loss": 2.3798, "step": 486830 }, { "epoch": 0.9698935356368736, "grad_norm": 0.22158555686473846, "learning_rate": 0.0001362403588445209, "loss": 2.3741, "step": 486840 }, { "epoch": 0.9699134578605125, "grad_norm": 0.25127193331718445, "learning_rate": 0.0001361330519589119, "loss": 2.3784, "step": 486850 }, { "epoch": 0.9699333800841514, "grad_norm": 0.23876361548900604, "learning_rate": 0.0001360257512508387, "loss": 2.3827, "step": 486860 }, { "epoch": 0.9699533023077904, "grad_norm": 0.23645612597465515, "learning_rate": 0.00013591845671923465, "loss": 2.3869, "step": 486870 }, { "epoch": 0.9699732245314293, "grad_norm": 0.2461419701576233, "learning_rate": 0.00013581116836303297, "loss": 2.3953, "step": 486880 }, { "epoch": 0.9699931467550682, "grad_norm": 0.23986637592315674, "learning_rate": 0.0001357038861811679, "loss": 2.3766, "step": 486890 }, { "epoch": 0.9700130689787071, "grad_norm": 0.24012847244739532, "learning_rate": 0.00013559661017257317, "loss": 2.3829, "step": 486900 }, { "epoch": 0.9700329912023461, "grad_norm": 0.23465313017368317, "learning_rate": 0.00013548934033618366, "loss": 2.3873, "step": 486910 }, { "epoch": 0.970052913425985, "grad_norm": 0.23650798201560974, "learning_rate": 0.0001353820766709337, "loss": 2.3859, "step": 486920 }, { "epoch": 0.9700728356496239, "grad_norm": 0.2296363115310669, "learning_rate": 0.00013527481917575867, "loss": 2.384, "step": 486930 }, { "epoch": 0.9700927578732628, "grad_norm": 0.24495752155780792, "learning_rate": 0.0001351675678495936, "loss": 2.3917, "step": 486940 }, { "epoch": 0.9701126800969017, "grad_norm": 0.2349763959646225, "learning_rate": 0.00013506032269137446, "loss": 2.3839, "step": 486950 }, { "epoch": 0.9701326023205407, "grad_norm": 0.23694536089897156, "learning_rate": 0.00013495308370003724, "loss": 2.3793, "step": 486960 }, { "epoch": 0.9701525245441796, "grad_norm": 0.24191024899482727, "learning_rate": 0.00013484585087451828, "loss": 2.3973, "step": 486970 }, { "epoch": 0.9701724467678184, "grad_norm": 0.23972490429878235, "learning_rate": 0.00013473862421375382, "loss": 2.3895, "step": 486980 }, { "epoch": 0.9701923689914573, "grad_norm": 0.22596481442451477, "learning_rate": 0.00013463140371668093, "loss": 2.409, "step": 486990 }, { "epoch": 0.9702122912150962, "grad_norm": 0.22794310748577118, "learning_rate": 0.0001345241893822371, "loss": 2.3912, "step": 487000 }, { "epoch": 0.9702322134387352, "grad_norm": 0.24343445897102356, "learning_rate": 0.00013441698120935942, "loss": 2.3944, "step": 487010 }, { "epoch": 0.9702521356623741, "grad_norm": 0.25634056329727173, "learning_rate": 0.00013430977919698584, "loss": 2.3744, "step": 487020 }, { "epoch": 0.970272057886013, "grad_norm": 0.2354341596364975, "learning_rate": 0.00013420258334405455, "loss": 2.3948, "step": 487030 }, { "epoch": 0.9702919801096519, "grad_norm": 0.24518470466136932, "learning_rate": 0.00013409539364950395, "loss": 2.3854, "step": 487040 }, { "epoch": 0.9703119023332908, "grad_norm": 0.22203035652637482, "learning_rate": 0.00013398821011227248, "loss": 2.3972, "step": 487050 }, { "epoch": 0.9703318245569298, "grad_norm": 0.22383546829223633, "learning_rate": 0.00013388103273129982, "loss": 2.3888, "step": 487060 }, { "epoch": 0.9703517467805687, "grad_norm": 0.23879458010196686, "learning_rate": 0.00013377386150552483, "loss": 2.3739, "step": 487070 }, { "epoch": 0.9703716690042076, "grad_norm": 0.25329622626304626, "learning_rate": 0.0001336666964338873, "loss": 2.4011, "step": 487080 }, { "epoch": 0.9703915912278465, "grad_norm": 0.23139545321464539, "learning_rate": 0.0001335595375153269, "loss": 2.3828, "step": 487090 }, { "epoch": 0.9704115134514854, "grad_norm": 0.2145688235759735, "learning_rate": 0.0001334523847487843, "loss": 2.3871, "step": 487100 }, { "epoch": 0.9704314356751244, "grad_norm": 0.22995075583457947, "learning_rate": 0.00013334523813319986, "loss": 2.3988, "step": 487110 }, { "epoch": 0.9704513578987632, "grad_norm": 0.252291202545166, "learning_rate": 0.0001332380976675145, "loss": 2.389, "step": 487120 }, { "epoch": 0.9704712801224021, "grad_norm": 0.23614250123500824, "learning_rate": 0.00013313096335066944, "loss": 2.3817, "step": 487130 }, { "epoch": 0.970491202346041, "grad_norm": 0.2639828622341156, "learning_rate": 0.00013302383518160578, "loss": 2.3777, "step": 487140 }, { "epoch": 0.9705111245696799, "grad_norm": 0.2591904103755951, "learning_rate": 0.00013291671315926568, "loss": 2.399, "step": 487150 }, { "epoch": 0.9705310467933189, "grad_norm": 0.23312407732009888, "learning_rate": 0.0001328095972825909, "loss": 2.3801, "step": 487160 }, { "epoch": 0.9705509690169578, "grad_norm": 0.22866596281528473, "learning_rate": 0.00013270248755052426, "loss": 2.3724, "step": 487170 }, { "epoch": 0.9705708912405967, "grad_norm": 0.2352130115032196, "learning_rate": 0.000132595383962008, "loss": 2.3884, "step": 487180 }, { "epoch": 0.9705908134642356, "grad_norm": 0.23048529028892517, "learning_rate": 0.00013248828651598555, "loss": 2.3876, "step": 487190 }, { "epoch": 0.9706107356878746, "grad_norm": 0.23807387053966522, "learning_rate": 0.00013238119521139958, "loss": 2.3894, "step": 487200 }, { "epoch": 0.9706306579115135, "grad_norm": 0.2406109869480133, "learning_rate": 0.00013227411004719425, "loss": 2.3881, "step": 487210 }, { "epoch": 0.9706505801351524, "grad_norm": 0.24126076698303223, "learning_rate": 0.0001321670310223133, "loss": 2.3879, "step": 487220 }, { "epoch": 0.9706705023587913, "grad_norm": 0.22358927130699158, "learning_rate": 0.0001320599581357007, "loss": 2.3834, "step": 487230 }, { "epoch": 0.9706904245824302, "grad_norm": 0.22869646549224854, "learning_rate": 0.0001319528913863013, "loss": 2.4007, "step": 487240 }, { "epoch": 0.9707103468060692, "grad_norm": 0.230141282081604, "learning_rate": 0.00013184583077305946, "loss": 2.3766, "step": 487250 }, { "epoch": 0.970730269029708, "grad_norm": 0.2244325578212738, "learning_rate": 0.0001317387762949207, "loss": 2.3925, "step": 487260 }, { "epoch": 0.9707501912533469, "grad_norm": 0.25293922424316406, "learning_rate": 0.00013163172795083034, "loss": 2.3764, "step": 487270 }, { "epoch": 0.9707701134769858, "grad_norm": 0.21384303271770477, "learning_rate": 0.00013152468573973387, "loss": 2.3749, "step": 487280 }, { "epoch": 0.9707900357006247, "grad_norm": 0.21655228734016418, "learning_rate": 0.00013141764966057769, "loss": 2.3838, "step": 487290 }, { "epoch": 0.9708099579242637, "grad_norm": 0.2241925448179245, "learning_rate": 0.0001313106197123075, "loss": 2.3909, "step": 487300 }, { "epoch": 0.9708298801479026, "grad_norm": 0.22691984474658966, "learning_rate": 0.00013120359589387042, "loss": 2.3968, "step": 487310 }, { "epoch": 0.9708498023715415, "grad_norm": 0.23998567461967468, "learning_rate": 0.00013109657820421327, "loss": 2.3957, "step": 487320 }, { "epoch": 0.9708697245951804, "grad_norm": 0.22408585250377655, "learning_rate": 0.00013098956664228334, "loss": 2.3886, "step": 487330 }, { "epoch": 0.9708896468188193, "grad_norm": 0.23678137362003326, "learning_rate": 0.0001308825612070279, "loss": 2.378, "step": 487340 }, { "epoch": 0.9709095690424583, "grad_norm": 0.221750870347023, "learning_rate": 0.0001307755618973947, "loss": 2.3815, "step": 487350 }, { "epoch": 0.9709294912660972, "grad_norm": 0.2337491363286972, "learning_rate": 0.00013066856871233234, "loss": 2.3795, "step": 487360 }, { "epoch": 0.9709494134897361, "grad_norm": 0.22512932121753693, "learning_rate": 0.0001305615816507888, "loss": 2.3851, "step": 487370 }, { "epoch": 0.970969335713375, "grad_norm": 0.2337074726819992, "learning_rate": 0.0001304546007117131, "loss": 2.385, "step": 487380 }, { "epoch": 0.9709892579370138, "grad_norm": 0.2419455200433731, "learning_rate": 0.00013034762589405415, "loss": 2.3803, "step": 487390 }, { "epoch": 0.9710091801606529, "grad_norm": 0.23385369777679443, "learning_rate": 0.00013024065719676115, "loss": 2.3796, "step": 487400 }, { "epoch": 0.9710291023842917, "grad_norm": 0.2392885833978653, "learning_rate": 0.00013013369461878366, "loss": 2.3748, "step": 487410 }, { "epoch": 0.9710490246079306, "grad_norm": 0.2530181109905243, "learning_rate": 0.00013002673815907208, "loss": 2.3843, "step": 487420 }, { "epoch": 0.9710689468315695, "grad_norm": 0.22422347962856293, "learning_rate": 0.0001299197878165761, "loss": 2.3857, "step": 487430 }, { "epoch": 0.9710888690552084, "grad_norm": 0.2567971348762512, "learning_rate": 0.00012981284359024658, "loss": 2.3904, "step": 487440 }, { "epoch": 0.9711087912788474, "grad_norm": 0.23689645528793335, "learning_rate": 0.00012970590547903393, "loss": 2.3875, "step": 487450 }, { "epoch": 0.9711287135024863, "grad_norm": 0.23558078706264496, "learning_rate": 0.00012959897348188988, "loss": 2.3946, "step": 487460 }, { "epoch": 0.9711486357261252, "grad_norm": 0.22997939586639404, "learning_rate": 0.00012949204759776545, "loss": 2.3943, "step": 487470 }, { "epoch": 0.9711685579497641, "grad_norm": 0.241929829120636, "learning_rate": 0.0001293851278256124, "loss": 2.3963, "step": 487480 }, { "epoch": 0.971188480173403, "grad_norm": 0.22417204082012177, "learning_rate": 0.0001292782141643829, "loss": 2.3811, "step": 487490 }, { "epoch": 0.971208402397042, "grad_norm": 0.24078914523124695, "learning_rate": 0.00012917130661302935, "loss": 2.3812, "step": 487500 }, { "epoch": 0.9712283246206809, "grad_norm": 0.24662213027477264, "learning_rate": 0.0001290644051705041, "loss": 2.374, "step": 487510 }, { "epoch": 0.9712482468443198, "grad_norm": 0.23801448941230774, "learning_rate": 0.00012895750983576005, "loss": 2.3899, "step": 487520 }, { "epoch": 0.9712681690679587, "grad_norm": 0.2508908212184906, "learning_rate": 0.00012885062060775067, "loss": 2.3826, "step": 487530 }, { "epoch": 0.9712880912915977, "grad_norm": 0.22642099857330322, "learning_rate": 0.00012874373748542924, "loss": 2.385, "step": 487540 }, { "epoch": 0.9713080135152365, "grad_norm": 0.31663742661476135, "learning_rate": 0.00012863686046774993, "loss": 2.4009, "step": 487550 }, { "epoch": 0.9713279357388754, "grad_norm": 0.23941746354103088, "learning_rate": 0.00012852998955366647, "loss": 2.3904, "step": 487560 }, { "epoch": 0.9713478579625143, "grad_norm": 0.2376895695924759, "learning_rate": 0.00012842312474213325, "loss": 2.3944, "step": 487570 }, { "epoch": 0.9713677801861532, "grad_norm": 0.22883081436157227, "learning_rate": 0.00012831626603210556, "loss": 2.393, "step": 487580 }, { "epoch": 0.9713877024097922, "grad_norm": 0.2318875789642334, "learning_rate": 0.00012820941342253777, "loss": 2.3946, "step": 487590 }, { "epoch": 0.9714076246334311, "grad_norm": 0.2394792139530182, "learning_rate": 0.00012810256691238563, "loss": 2.3867, "step": 487600 }, { "epoch": 0.97142754685707, "grad_norm": 0.23488755524158478, "learning_rate": 0.00012799572650060444, "loss": 2.3665, "step": 487610 }, { "epoch": 0.9714474690807089, "grad_norm": 0.2257804125547409, "learning_rate": 0.0001278888921861503, "loss": 2.39, "step": 487620 }, { "epoch": 0.9714673913043478, "grad_norm": 0.27196505665779114, "learning_rate": 0.0001277820639679792, "loss": 2.3823, "step": 487630 }, { "epoch": 0.9714873135279868, "grad_norm": 0.25619402527809143, "learning_rate": 0.00012767524184504798, "loss": 2.3955, "step": 487640 }, { "epoch": 0.9715072357516257, "grad_norm": 0.23300182819366455, "learning_rate": 0.000127568425816313, "loss": 2.3684, "step": 487650 }, { "epoch": 0.9715271579752646, "grad_norm": 0.23514120280742645, "learning_rate": 0.0001274616158807318, "loss": 2.3715, "step": 487660 }, { "epoch": 0.9715470801989035, "grad_norm": 0.2365484982728958, "learning_rate": 0.0001273548120372616, "loss": 2.3803, "step": 487670 }, { "epoch": 0.9715670024225423, "grad_norm": 0.23496003448963165, "learning_rate": 0.00012724801428486, "loss": 2.3795, "step": 487680 }, { "epoch": 0.9715869246461813, "grad_norm": 0.22072403132915497, "learning_rate": 0.00012714122262248506, "loss": 2.3894, "step": 487690 }, { "epoch": 0.9716068468698202, "grad_norm": 0.2249165177345276, "learning_rate": 0.00012703443704909523, "loss": 2.3878, "step": 487700 }, { "epoch": 0.9716267690934591, "grad_norm": 0.2306867092847824, "learning_rate": 0.00012692765756364865, "loss": 2.3861, "step": 487710 }, { "epoch": 0.971646691317098, "grad_norm": 0.22599366307258606, "learning_rate": 0.00012682088416510485, "loss": 2.3894, "step": 487720 }, { "epoch": 0.9716666135407369, "grad_norm": 0.2411404550075531, "learning_rate": 0.0001267141168524224, "loss": 2.3804, "step": 487730 }, { "epoch": 0.9716865357643759, "grad_norm": 0.22862738370895386, "learning_rate": 0.00012660735562456104, "loss": 2.3859, "step": 487740 }, { "epoch": 0.9717064579880148, "grad_norm": 0.2563559114933014, "learning_rate": 0.0001265006004804805, "loss": 2.3878, "step": 487750 }, { "epoch": 0.9717263802116537, "grad_norm": 0.240756094455719, "learning_rate": 0.00012639385141914095, "loss": 2.3827, "step": 487760 }, { "epoch": 0.9717463024352926, "grad_norm": 0.24231669306755066, "learning_rate": 0.00012628710843950276, "loss": 2.3799, "step": 487770 }, { "epoch": 0.9717662246589315, "grad_norm": 0.22896328568458557, "learning_rate": 0.0001261803715405263, "loss": 2.3887, "step": 487780 }, { "epoch": 0.9717861468825705, "grad_norm": 0.24679109454154968, "learning_rate": 0.00012607364072117288, "loss": 2.377, "step": 487790 }, { "epoch": 0.9718060691062094, "grad_norm": 0.22721651196479797, "learning_rate": 0.00012596691598040354, "loss": 2.3892, "step": 487800 }, { "epoch": 0.9718259913298483, "grad_norm": 0.2581610381603241, "learning_rate": 0.00012586019731717978, "loss": 2.3781, "step": 487810 }, { "epoch": 0.9718459135534872, "grad_norm": 0.2327818125486374, "learning_rate": 0.00012575348473046376, "loss": 2.3792, "step": 487820 }, { "epoch": 0.9718658357771262, "grad_norm": 0.2856535315513611, "learning_rate": 0.00012564677821921743, "loss": 2.3918, "step": 487830 }, { "epoch": 0.971885758000765, "grad_norm": 0.23272816836833954, "learning_rate": 0.00012554007778240296, "loss": 2.397, "step": 487840 }, { "epoch": 0.9719056802244039, "grad_norm": 0.23342877626419067, "learning_rate": 0.0001254333834189836, "loss": 2.3888, "step": 487850 }, { "epoch": 0.9719256024480428, "grad_norm": 0.24196840822696686, "learning_rate": 0.0001253266951279217, "loss": 2.3971, "step": 487860 }, { "epoch": 0.9719455246716817, "grad_norm": 0.2350180447101593, "learning_rate": 0.0001252200129081813, "loss": 2.3909, "step": 487870 }, { "epoch": 0.9719654468953207, "grad_norm": 0.24043241143226624, "learning_rate": 0.0001251133367587256, "loss": 2.3914, "step": 487880 }, { "epoch": 0.9719853691189596, "grad_norm": 0.21506129205226898, "learning_rate": 0.00012500666667851856, "loss": 2.3762, "step": 487890 }, { "epoch": 0.9720052913425985, "grad_norm": 0.22599364817142487, "learning_rate": 0.00012490000266652434, "loss": 2.3937, "step": 487900 }, { "epoch": 0.9720252135662374, "grad_norm": 0.24803638458251953, "learning_rate": 0.00012479334472170777, "loss": 2.3849, "step": 487910 }, { "epoch": 0.9720451357898763, "grad_norm": 0.2298109233379364, "learning_rate": 0.00012468669284303324, "loss": 2.3918, "step": 487920 }, { "epoch": 0.9720650580135153, "grad_norm": 0.24478337168693542, "learning_rate": 0.00012458004702946602, "loss": 2.3672, "step": 487930 }, { "epoch": 0.9720849802371542, "grad_norm": 0.2652318775653839, "learning_rate": 0.00012447340727997136, "loss": 2.378, "step": 487940 }, { "epoch": 0.9721049024607931, "grad_norm": 0.2179672122001648, "learning_rate": 0.00012436677359351522, "loss": 2.3658, "step": 487950 }, { "epoch": 0.972124824684432, "grad_norm": 0.24383822083473206, "learning_rate": 0.0001242601459690631, "loss": 2.3825, "step": 487960 }, { "epoch": 0.9721447469080708, "grad_norm": 0.23699970543384552, "learning_rate": 0.0001241535244055818, "loss": 2.3782, "step": 487970 }, { "epoch": 0.9721646691317098, "grad_norm": 0.23743082582950592, "learning_rate": 0.0001240469089020375, "loss": 2.3795, "step": 487980 }, { "epoch": 0.9721845913553487, "grad_norm": 0.246563121676445, "learning_rate": 0.00012394029945739726, "loss": 2.3752, "step": 487990 }, { "epoch": 0.9722045135789876, "grad_norm": 0.23219619691371918, "learning_rate": 0.00012383369607062812, "loss": 2.3804, "step": 488000 }, { "epoch": 0.9722244358026265, "grad_norm": 0.2225598692893982, "learning_rate": 0.00012372709874069755, "loss": 2.3972, "step": 488010 }, { "epoch": 0.9722443580262654, "grad_norm": 0.22581791877746582, "learning_rate": 0.00012362050746657328, "loss": 2.3806, "step": 488020 }, { "epoch": 0.9722642802499044, "grad_norm": 0.25902295112609863, "learning_rate": 0.00012351392224722323, "loss": 2.3707, "step": 488030 }, { "epoch": 0.9722842024735433, "grad_norm": 0.23918816447257996, "learning_rate": 0.00012340734308161606, "loss": 2.3914, "step": 488040 }, { "epoch": 0.9723041246971822, "grad_norm": 0.2442593276500702, "learning_rate": 0.00012330076996871987, "loss": 2.3857, "step": 488050 }, { "epoch": 0.9723240469208211, "grad_norm": 0.2283218801021576, "learning_rate": 0.0001231942029075037, "loss": 2.384, "step": 488060 }, { "epoch": 0.97234396914446, "grad_norm": 0.23652300238609314, "learning_rate": 0.0001230876418969371, "loss": 2.3763, "step": 488070 }, { "epoch": 0.972363891368099, "grad_norm": 0.23004885017871857, "learning_rate": 0.0001229810869359893, "loss": 2.4002, "step": 488080 }, { "epoch": 0.9723838135917379, "grad_norm": 0.2530868649482727, "learning_rate": 0.00012287453802363002, "loss": 2.3912, "step": 488090 }, { "epoch": 0.9724037358153768, "grad_norm": 0.2299429178237915, "learning_rate": 0.00012276799515882964, "loss": 2.3791, "step": 488100 }, { "epoch": 0.9724236580390156, "grad_norm": 0.23802608251571655, "learning_rate": 0.0001226614583405581, "loss": 2.3762, "step": 488110 }, { "epoch": 0.9724435802626546, "grad_norm": 0.23404110968112946, "learning_rate": 0.00012255492756778642, "loss": 2.3937, "step": 488120 }, { "epoch": 0.9724635024862935, "grad_norm": 0.24610942602157593, "learning_rate": 0.00012244840283948523, "loss": 2.3756, "step": 488130 }, { "epoch": 0.9724834247099324, "grad_norm": 0.25483760237693787, "learning_rate": 0.000122341884154626, "loss": 2.3708, "step": 488140 }, { "epoch": 0.9725033469335713, "grad_norm": 0.2289208173751831, "learning_rate": 0.00012223537151218023, "loss": 2.3688, "step": 488150 }, { "epoch": 0.9725232691572102, "grad_norm": 0.23695114254951477, "learning_rate": 0.00012212886491111963, "loss": 2.3725, "step": 488160 }, { "epoch": 0.9725431913808492, "grad_norm": 0.23735357820987701, "learning_rate": 0.00012202236435041637, "loss": 2.3947, "step": 488170 }, { "epoch": 0.9725631136044881, "grad_norm": 0.2510116696357727, "learning_rate": 0.00012191586982904301, "loss": 2.4005, "step": 488180 }, { "epoch": 0.972583035828127, "grad_norm": 0.24211879074573517, "learning_rate": 0.00012180938134597219, "loss": 2.3938, "step": 488190 }, { "epoch": 0.9726029580517659, "grad_norm": 0.24303196370601654, "learning_rate": 0.00012170289890017671, "loss": 2.362, "step": 488200 }, { "epoch": 0.9726228802754048, "grad_norm": 0.23638536036014557, "learning_rate": 0.00012159642249062985, "loss": 2.3784, "step": 488210 }, { "epoch": 0.9726428024990438, "grad_norm": 0.22953054308891296, "learning_rate": 0.00012148995211630553, "loss": 2.3693, "step": 488220 }, { "epoch": 0.9726627247226827, "grad_norm": 0.24139836430549622, "learning_rate": 0.00012138348777617747, "loss": 2.3865, "step": 488230 }, { "epoch": 0.9726826469463216, "grad_norm": 0.23193208873271942, "learning_rate": 0.0001212770294692196, "loss": 2.3681, "step": 488240 }, { "epoch": 0.9727025691699605, "grad_norm": 0.235905259847641, "learning_rate": 0.0001211705771944065, "loss": 2.3872, "step": 488250 }, { "epoch": 0.9727224913935993, "grad_norm": 0.24331165850162506, "learning_rate": 0.00012106413095071278, "loss": 2.3785, "step": 488260 }, { "epoch": 0.9727424136172383, "grad_norm": 0.26624518632888794, "learning_rate": 0.00012095769073711371, "loss": 2.3715, "step": 488270 }, { "epoch": 0.9727623358408772, "grad_norm": 0.26144397258758545, "learning_rate": 0.00012085125655258455, "loss": 2.3903, "step": 488280 }, { "epoch": 0.9727822580645161, "grad_norm": 0.22618062794208527, "learning_rate": 0.00012074482839610102, "loss": 2.3665, "step": 488290 }, { "epoch": 0.972802180288155, "grad_norm": 0.23367244005203247, "learning_rate": 0.00012063840626663858, "loss": 2.3927, "step": 488300 }, { "epoch": 0.9728221025117939, "grad_norm": 0.22530844807624817, "learning_rate": 0.00012053199016317384, "loss": 2.3929, "step": 488310 }, { "epoch": 0.9728420247354329, "grad_norm": 0.23722516000270844, "learning_rate": 0.00012042558008468318, "loss": 2.3788, "step": 488320 }, { "epoch": 0.9728619469590718, "grad_norm": 0.22797498106956482, "learning_rate": 0.00012031917603014319, "loss": 2.3785, "step": 488330 }, { "epoch": 0.9728818691827107, "grad_norm": 0.23295949399471283, "learning_rate": 0.00012021277799853114, "loss": 2.3784, "step": 488340 }, { "epoch": 0.9729017914063496, "grad_norm": 0.23999366164207458, "learning_rate": 0.00012010638598882407, "loss": 2.3725, "step": 488350 }, { "epoch": 0.9729217136299885, "grad_norm": 0.5871918797492981, "learning_rate": 0.00011999999999999988, "loss": 2.378, "step": 488360 }, { "epoch": 0.9729416358536275, "grad_norm": 0.23935554921627045, "learning_rate": 0.00011989362003103655, "loss": 2.374, "step": 488370 }, { "epoch": 0.9729615580772664, "grad_norm": 0.26168501377105713, "learning_rate": 0.00011978724608091218, "loss": 2.3737, "step": 488380 }, { "epoch": 0.9729814803009053, "grad_norm": 0.23773857951164246, "learning_rate": 0.00011968087814860539, "loss": 2.385, "step": 488390 }, { "epoch": 0.9730014025245441, "grad_norm": 0.24024651944637299, "learning_rate": 0.00011957451623309457, "loss": 2.3674, "step": 488400 }, { "epoch": 0.9730213247481831, "grad_norm": 0.24118667840957642, "learning_rate": 0.00011946816033335938, "loss": 2.3767, "step": 488410 }, { "epoch": 0.973041246971822, "grad_norm": 0.2262786477804184, "learning_rate": 0.00011936181044837868, "loss": 2.3754, "step": 488420 }, { "epoch": 0.9730611691954609, "grad_norm": 0.22713133692741394, "learning_rate": 0.00011925546657713238, "loss": 2.385, "step": 488430 }, { "epoch": 0.9730810914190998, "grad_norm": 0.2522929608821869, "learning_rate": 0.0001191491287186004, "loss": 2.405, "step": 488440 }, { "epoch": 0.9731010136427387, "grad_norm": 0.4000512659549713, "learning_rate": 0.00011904279687176312, "loss": 2.3907, "step": 488450 }, { "epoch": 0.9731209358663777, "grad_norm": 0.23455490171909332, "learning_rate": 0.00011893647103560046, "loss": 2.3635, "step": 488460 }, { "epoch": 0.9731408580900166, "grad_norm": 0.23922309279441833, "learning_rate": 0.00011883015120909391, "loss": 2.3827, "step": 488470 }, { "epoch": 0.9731607803136555, "grad_norm": 0.2256113737821579, "learning_rate": 0.0001187238373912245, "loss": 2.3735, "step": 488480 }, { "epoch": 0.9731807025372944, "grad_norm": 0.2331286519765854, "learning_rate": 0.00011861752958097328, "loss": 2.3829, "step": 488490 }, { "epoch": 0.9732006247609333, "grad_norm": 0.22438021004199982, "learning_rate": 0.00011851122777732215, "loss": 2.3796, "step": 488500 }, { "epoch": 0.9732205469845723, "grad_norm": 0.23211103677749634, "learning_rate": 0.00011840493197925285, "loss": 2.3731, "step": 488510 }, { "epoch": 0.9732404692082112, "grad_norm": 0.2320125550031662, "learning_rate": 0.00011829864218574792, "loss": 2.3662, "step": 488520 }, { "epoch": 0.9732603914318501, "grad_norm": 0.24099846184253693, "learning_rate": 0.0001181923583957898, "loss": 2.3944, "step": 488530 }, { "epoch": 0.973280313655489, "grad_norm": 0.2417367696762085, "learning_rate": 0.00011808608060836123, "loss": 2.3847, "step": 488540 }, { "epoch": 0.9733002358791278, "grad_norm": 0.2570765018463135, "learning_rate": 0.0001179798088224453, "loss": 2.3743, "step": 488550 }, { "epoch": 0.9733201581027668, "grad_norm": 0.24303066730499268, "learning_rate": 0.00011787354303702525, "loss": 2.3933, "step": 488560 }, { "epoch": 0.9733400803264057, "grad_norm": 0.22520902752876282, "learning_rate": 0.00011776728325108521, "loss": 2.3881, "step": 488570 }, { "epoch": 0.9733600025500446, "grad_norm": 0.22324472665786743, "learning_rate": 0.00011766102946360912, "loss": 2.3998, "step": 488580 }, { "epoch": 0.9733799247736835, "grad_norm": 0.23457010090351105, "learning_rate": 0.00011755478167358069, "loss": 2.383, "step": 488590 }, { "epoch": 0.9733998469973224, "grad_norm": 0.24320268630981445, "learning_rate": 0.00011744853987998516, "loss": 2.3951, "step": 488600 }, { "epoch": 0.9734197692209614, "grad_norm": 0.24297863245010376, "learning_rate": 0.00011734230408180691, "loss": 2.3753, "step": 488610 }, { "epoch": 0.9734396914446003, "grad_norm": 0.23742131888866425, "learning_rate": 0.00011723607427803095, "loss": 2.3617, "step": 488620 }, { "epoch": 0.9734596136682392, "grad_norm": 0.23685383796691895, "learning_rate": 0.00011712985046764325, "loss": 2.4034, "step": 488630 }, { "epoch": 0.9734795358918781, "grad_norm": 0.8419590592384338, "learning_rate": 0.00011702363264962879, "loss": 2.3677, "step": 488640 }, { "epoch": 0.973499458115517, "grad_norm": 0.24198904633522034, "learning_rate": 0.00011691742082297418, "loss": 2.3856, "step": 488650 }, { "epoch": 0.973519380339156, "grad_norm": 0.22918996214866638, "learning_rate": 0.00011681121498666514, "loss": 2.3805, "step": 488660 }, { "epoch": 0.9735393025627949, "grad_norm": 0.22262825071811676, "learning_rate": 0.00011670501513968867, "loss": 2.3652, "step": 488670 }, { "epoch": 0.9735592247864338, "grad_norm": 0.23615361750125885, "learning_rate": 0.00011659882128103139, "loss": 2.3991, "step": 488680 }, { "epoch": 0.9735791470100726, "grad_norm": 0.24375151097774506, "learning_rate": 0.00011649263340968052, "loss": 2.3687, "step": 488690 }, { "epoch": 0.9735990692337116, "grad_norm": 0.23379312455654144, "learning_rate": 0.00011638645152462335, "loss": 2.3908, "step": 488700 }, { "epoch": 0.9736189914573505, "grad_norm": 0.2250717282295227, "learning_rate": 0.00011628027562484755, "loss": 2.3794, "step": 488710 }, { "epoch": 0.9736389136809894, "grad_norm": 0.2270420789718628, "learning_rate": 0.00011617410570934128, "loss": 2.3864, "step": 488720 }, { "epoch": 0.9736588359046283, "grad_norm": 0.24050238728523254, "learning_rate": 0.00011606794177709268, "loss": 2.3872, "step": 488730 }, { "epoch": 0.9736787581282672, "grad_norm": 0.22126249969005585, "learning_rate": 0.00011596178382709033, "loss": 2.381, "step": 488740 }, { "epoch": 0.9736986803519062, "grad_norm": 0.24306544661521912, "learning_rate": 0.00011585563185832282, "loss": 2.3978, "step": 488750 }, { "epoch": 0.9737186025755451, "grad_norm": 0.2363254576921463, "learning_rate": 0.00011574948586977962, "loss": 2.3813, "step": 488760 }, { "epoch": 0.973738524799184, "grad_norm": 0.23364649713039398, "learning_rate": 0.00011564334586044978, "loss": 2.3699, "step": 488770 }, { "epoch": 0.9737584470228229, "grad_norm": 0.2364775687456131, "learning_rate": 0.00011553721182932343, "loss": 2.3806, "step": 488780 }, { "epoch": 0.9737783692464618, "grad_norm": 0.27605608105659485, "learning_rate": 0.00011543108377539024, "loss": 2.3908, "step": 488790 }, { "epoch": 0.9737982914701008, "grad_norm": 0.24834410846233368, "learning_rate": 0.00011532496169764061, "loss": 2.3729, "step": 488800 }, { "epoch": 0.9738182136937397, "grad_norm": 0.23466172814369202, "learning_rate": 0.0001152188455950649, "loss": 2.3756, "step": 488810 }, { "epoch": 0.9738381359173786, "grad_norm": 0.21587543189525604, "learning_rate": 0.0001151127354666539, "loss": 2.3776, "step": 488820 }, { "epoch": 0.9738580581410174, "grad_norm": 0.23214583098888397, "learning_rate": 0.0001150066313113991, "loss": 2.3943, "step": 488830 }, { "epoch": 0.9738779803646563, "grad_norm": 0.2749638557434082, "learning_rate": 0.00011490053312829151, "loss": 2.3887, "step": 488840 }, { "epoch": 0.9738979025882953, "grad_norm": 0.2403741329908371, "learning_rate": 0.00011479444091632285, "loss": 2.3882, "step": 488850 }, { "epoch": 0.9739178248119342, "grad_norm": 0.25446048378944397, "learning_rate": 0.00011468835467448524, "loss": 2.3595, "step": 488860 }, { "epoch": 0.9739377470355731, "grad_norm": 0.24267533421516418, "learning_rate": 0.0001145822744017706, "loss": 2.3885, "step": 488870 }, { "epoch": 0.973957669259212, "grad_norm": 0.25153636932373047, "learning_rate": 0.00011447620009717197, "loss": 2.3816, "step": 488880 }, { "epoch": 0.9739775914828509, "grad_norm": 0.23903153836727142, "learning_rate": 0.0001143701317596817, "loss": 2.3727, "step": 488890 }, { "epoch": 0.9739975137064899, "grad_norm": 0.2340572476387024, "learning_rate": 0.00011426406938829326, "loss": 2.371, "step": 488900 }, { "epoch": 0.9740174359301288, "grad_norm": 0.22860567271709442, "learning_rate": 0.00011415801298199969, "loss": 2.3825, "step": 488910 }, { "epoch": 0.9740373581537677, "grad_norm": 0.2462492734193802, "learning_rate": 0.00011405196253979466, "loss": 2.3749, "step": 488920 }, { "epoch": 0.9740572803774066, "grad_norm": 0.26085159182548523, "learning_rate": 0.00011394591806067233, "loss": 2.371, "step": 488930 }, { "epoch": 0.9740772026010455, "grad_norm": 0.23735125362873077, "learning_rate": 0.00011383987954362685, "loss": 2.3718, "step": 488940 }, { "epoch": 0.9740971248246845, "grad_norm": 0.23886337876319885, "learning_rate": 0.00011373384698765276, "loss": 2.3762, "step": 488950 }, { "epoch": 0.9741170470483234, "grad_norm": 0.23110561072826385, "learning_rate": 0.00011362782039174468, "loss": 2.3666, "step": 488960 }, { "epoch": 0.9741369692719622, "grad_norm": 0.23805516958236694, "learning_rate": 0.00011352179975489763, "loss": 2.3822, "step": 488970 }, { "epoch": 0.9741568914956011, "grad_norm": 0.2314550280570984, "learning_rate": 0.0001134157850761075, "loss": 2.3853, "step": 488980 }, { "epoch": 0.97417681371924, "grad_norm": 0.24164117872714996, "learning_rate": 0.00011330977635436934, "loss": 2.3806, "step": 488990 }, { "epoch": 0.974196735942879, "grad_norm": 0.24098491668701172, "learning_rate": 0.00011320377358867929, "loss": 2.3666, "step": 489000 }, { "epoch": 0.9742166581665179, "grad_norm": 0.24449138343334198, "learning_rate": 0.00011309777677803346, "loss": 2.3942, "step": 489010 }, { "epoch": 0.9742365803901568, "grad_norm": 0.25222355127334595, "learning_rate": 0.00011299178592142844, "loss": 2.412, "step": 489020 }, { "epoch": 0.9742565026137957, "grad_norm": 0.23424740135669708, "learning_rate": 0.00011288580101786105, "loss": 2.3878, "step": 489030 }, { "epoch": 0.9742764248374347, "grad_norm": 0.2564144730567932, "learning_rate": 0.00011277982206632808, "loss": 2.3914, "step": 489040 }, { "epoch": 0.9742963470610736, "grad_norm": 0.23231655359268188, "learning_rate": 0.00011267384906582723, "loss": 2.3738, "step": 489050 }, { "epoch": 0.9743162692847125, "grad_norm": 0.22671735286712646, "learning_rate": 0.00011256788201535573, "loss": 2.392, "step": 489060 }, { "epoch": 0.9743361915083514, "grad_norm": 0.24724939465522766, "learning_rate": 0.00011246192091391172, "loss": 2.3742, "step": 489070 }, { "epoch": 0.9743561137319903, "grad_norm": 0.2440149188041687, "learning_rate": 0.00011235596576049334, "loss": 2.3664, "step": 489080 }, { "epoch": 0.9743760359556293, "grad_norm": 0.24112658202648163, "learning_rate": 0.00011225001655409894, "loss": 2.3915, "step": 489090 }, { "epoch": 0.9743959581792682, "grad_norm": 0.2508159577846527, "learning_rate": 0.00011214407329372711, "loss": 2.3838, "step": 489100 }, { "epoch": 0.974415880402907, "grad_norm": 0.22699332237243652, "learning_rate": 0.00011203813597837731, "loss": 2.3883, "step": 489110 }, { "epoch": 0.9744358026265459, "grad_norm": 0.243299201130867, "learning_rate": 0.00011193220460704856, "loss": 2.3867, "step": 489120 }, { "epoch": 0.9744557248501848, "grad_norm": 0.23992879688739777, "learning_rate": 0.00011182627917874033, "loss": 2.3872, "step": 489130 }, { "epoch": 0.9744756470738238, "grad_norm": 0.2525433599948883, "learning_rate": 0.00011172035969245275, "loss": 2.3809, "step": 489140 }, { "epoch": 0.9744955692974627, "grad_norm": 0.22877688705921173, "learning_rate": 0.00011161444614718574, "loss": 2.379, "step": 489150 }, { "epoch": 0.9745154915211016, "grad_norm": 0.2635750472545624, "learning_rate": 0.00011150853854193987, "loss": 2.3938, "step": 489160 }, { "epoch": 0.9745354137447405, "grad_norm": 0.2333413064479828, "learning_rate": 0.0001114026368757155, "loss": 2.3795, "step": 489170 }, { "epoch": 0.9745553359683794, "grad_norm": 0.24261987209320068, "learning_rate": 0.00011129674114751409, "loss": 2.371, "step": 489180 }, { "epoch": 0.9745752581920184, "grad_norm": 0.23431073129177094, "learning_rate": 0.00011119085135633666, "loss": 2.3645, "step": 489190 }, { "epoch": 0.9745951804156573, "grad_norm": 0.25286123156547546, "learning_rate": 0.0001110849675011849, "loss": 2.3819, "step": 489200 }, { "epoch": 0.9746151026392962, "grad_norm": 0.23393937945365906, "learning_rate": 0.00011097908958106028, "loss": 2.3729, "step": 489210 }, { "epoch": 0.9746350248629351, "grad_norm": 0.2510249614715576, "learning_rate": 0.00011087321759496517, "loss": 2.3794, "step": 489220 }, { "epoch": 0.974654947086574, "grad_norm": 0.2491598278284073, "learning_rate": 0.00011076735154190188, "loss": 2.3687, "step": 489230 }, { "epoch": 0.974674869310213, "grad_norm": 0.23349326848983765, "learning_rate": 0.000110661491420873, "loss": 2.3775, "step": 489240 }, { "epoch": 0.9746947915338519, "grad_norm": 0.24382959306240082, "learning_rate": 0.00011055563723088157, "loss": 2.3732, "step": 489250 }, { "epoch": 0.9747147137574907, "grad_norm": 0.24236422777175903, "learning_rate": 0.00011044978897093084, "loss": 2.3721, "step": 489260 }, { "epoch": 0.9747346359811296, "grad_norm": 0.2571442425251007, "learning_rate": 0.000110343946640024, "loss": 2.3649, "step": 489270 }, { "epoch": 0.9747545582047685, "grad_norm": 0.24138106405735016, "learning_rate": 0.00011023811023716523, "loss": 2.3669, "step": 489280 }, { "epoch": 0.9747744804284075, "grad_norm": 0.23102617263793945, "learning_rate": 0.00011013227976135842, "loss": 2.3701, "step": 489290 }, { "epoch": 0.9747944026520464, "grad_norm": 0.2371377795934677, "learning_rate": 0.00011002645521160792, "loss": 2.389, "step": 489300 }, { "epoch": 0.9748143248756853, "grad_norm": 0.2398725003004074, "learning_rate": 0.00010992063658691832, "loss": 2.3691, "step": 489310 }, { "epoch": 0.9748342470993242, "grad_norm": 0.2367473691701889, "learning_rate": 0.00010981482388629438, "loss": 2.3847, "step": 489320 }, { "epoch": 0.9748541693229632, "grad_norm": 0.24879716336727142, "learning_rate": 0.00010970901710874159, "loss": 2.3798, "step": 489330 }, { "epoch": 0.9748740915466021, "grad_norm": 0.23736536502838135, "learning_rate": 0.00010960321625326497, "loss": 2.3724, "step": 489340 }, { "epoch": 0.974894013770241, "grad_norm": 0.2319232076406479, "learning_rate": 0.00010949742131887063, "loss": 2.3922, "step": 489350 }, { "epoch": 0.9749139359938799, "grad_norm": 0.24459779262542725, "learning_rate": 0.00010939163230456428, "loss": 2.3813, "step": 489360 }, { "epoch": 0.9749338582175188, "grad_norm": 0.24302031099796295, "learning_rate": 0.00010928584920935225, "loss": 2.3772, "step": 489370 }, { "epoch": 0.9749537804411578, "grad_norm": 0.2234238237142563, "learning_rate": 0.00010918007203224135, "loss": 2.3699, "step": 489380 }, { "epoch": 0.9749737026647967, "grad_norm": 0.25316449999809265, "learning_rate": 0.00010907430077223812, "loss": 2.3799, "step": 489390 }, { "epoch": 0.9749936248884356, "grad_norm": 0.23004432022571564, "learning_rate": 0.00010896853542834984, "loss": 2.3589, "step": 489400 }, { "epoch": 0.9750135471120744, "grad_norm": 0.23001323640346527, "learning_rate": 0.00010886277599958394, "loss": 2.3838, "step": 489410 }, { "epoch": 0.9750334693357133, "grad_norm": 0.24887420237064362, "learning_rate": 0.00010875702248494789, "loss": 2.3781, "step": 489420 }, { "epoch": 0.9750533915593523, "grad_norm": 0.2368202656507492, "learning_rate": 0.00010865127488344984, "loss": 2.3916, "step": 489430 }, { "epoch": 0.9750733137829912, "grad_norm": 0.24715624749660492, "learning_rate": 0.00010854553319409788, "loss": 2.3864, "step": 489440 }, { "epoch": 0.9750932360066301, "grad_norm": 0.2348634898662567, "learning_rate": 0.00010843979741590037, "loss": 2.3768, "step": 489450 }, { "epoch": 0.975113158230269, "grad_norm": 0.22722460329532623, "learning_rate": 0.00010833406754786656, "loss": 2.378, "step": 489460 }, { "epoch": 0.9751330804539079, "grad_norm": 0.23148512840270996, "learning_rate": 0.00010822834358900479, "loss": 2.378, "step": 489470 }, { "epoch": 0.9751530026775469, "grad_norm": 0.23230646550655365, "learning_rate": 0.00010812262553832519, "loss": 2.383, "step": 489480 }, { "epoch": 0.9751729249011858, "grad_norm": 0.23920568823814392, "learning_rate": 0.000108016913394837, "loss": 2.3866, "step": 489490 }, { "epoch": 0.9751928471248247, "grad_norm": 0.26900315284729004, "learning_rate": 0.00010791120715754987, "loss": 2.39, "step": 489500 }, { "epoch": 0.9752127693484636, "grad_norm": 0.24270319938659668, "learning_rate": 0.00010780550682547441, "loss": 2.3859, "step": 489510 }, { "epoch": 0.9752326915721025, "grad_norm": 0.24222531914710999, "learning_rate": 0.00010769981239762072, "loss": 2.3749, "step": 489520 }, { "epoch": 0.9752526137957415, "grad_norm": 0.24101406335830688, "learning_rate": 0.0001075941238729996, "loss": 2.3818, "step": 489530 }, { "epoch": 0.9752725360193804, "grad_norm": 0.24411700665950775, "learning_rate": 0.00010748844125062207, "loss": 2.375, "step": 489540 }, { "epoch": 0.9752924582430192, "grad_norm": 0.2526775896549225, "learning_rate": 0.00010738276452949957, "loss": 2.3784, "step": 489550 }, { "epoch": 0.9753123804666581, "grad_norm": 0.23172198235988617, "learning_rate": 0.00010727709370864335, "loss": 2.363, "step": 489560 }, { "epoch": 0.975332302690297, "grad_norm": 0.24072030186653137, "learning_rate": 0.0001071714287870651, "loss": 2.3883, "step": 489570 }, { "epoch": 0.975352224913936, "grad_norm": 0.22404903173446655, "learning_rate": 0.00010706576976377757, "loss": 2.3854, "step": 489580 }, { "epoch": 0.9753721471375749, "grad_norm": 0.24117042124271393, "learning_rate": 0.00010696011663779248, "loss": 2.3801, "step": 489590 }, { "epoch": 0.9753920693612138, "grad_norm": 0.23830659687519073, "learning_rate": 0.00010685446940812282, "loss": 2.3851, "step": 489600 }, { "epoch": 0.9754119915848527, "grad_norm": 0.2725326418876648, "learning_rate": 0.0001067488280737814, "loss": 2.3829, "step": 489610 }, { "epoch": 0.9754319138084917, "grad_norm": 0.23875494301319122, "learning_rate": 0.00010664319263378142, "loss": 2.3878, "step": 489620 }, { "epoch": 0.9754518360321306, "grad_norm": 0.22935007512569427, "learning_rate": 0.00010653756308713635, "loss": 2.3995, "step": 489630 }, { "epoch": 0.9754717582557695, "grad_norm": 0.2305530607700348, "learning_rate": 0.00010643193943286011, "loss": 2.3713, "step": 489640 }, { "epoch": 0.9754916804794084, "grad_norm": 0.2370292693376541, "learning_rate": 0.00010632632166996636, "loss": 2.3806, "step": 489650 }, { "epoch": 0.9755116027030473, "grad_norm": 0.25297197699546814, "learning_rate": 0.00010622070979746967, "loss": 2.3843, "step": 489660 }, { "epoch": 0.9755315249266863, "grad_norm": 0.2434578388929367, "learning_rate": 0.00010611510381438439, "loss": 2.3849, "step": 489670 }, { "epoch": 0.9755514471503252, "grad_norm": 0.24962057173252106, "learning_rate": 0.00010600950371972551, "loss": 2.3853, "step": 489680 }, { "epoch": 0.975571369373964, "grad_norm": 0.24800536036491394, "learning_rate": 0.00010590390951250828, "loss": 2.3673, "step": 489690 }, { "epoch": 0.9755912915976029, "grad_norm": 0.22885218262672424, "learning_rate": 0.00010579832119174814, "loss": 2.3816, "step": 489700 }, { "epoch": 0.9756112138212418, "grad_norm": 0.22712503373622894, "learning_rate": 0.00010569273875646035, "loss": 2.3722, "step": 489710 }, { "epoch": 0.9756311360448808, "grad_norm": 0.22701925039291382, "learning_rate": 0.00010558716220566122, "loss": 2.3754, "step": 489720 }, { "epoch": 0.9756510582685197, "grad_norm": 0.23528017103672028, "learning_rate": 0.00010548159153836667, "loss": 2.3809, "step": 489730 }, { "epoch": 0.9756709804921586, "grad_norm": 0.23935355246067047, "learning_rate": 0.00010537602675359348, "loss": 2.3767, "step": 489740 }, { "epoch": 0.9756909027157975, "grad_norm": 0.22461359202861786, "learning_rate": 0.00010527046785035843, "loss": 2.3735, "step": 489750 }, { "epoch": 0.9757108249394364, "grad_norm": 0.24759601056575775, "learning_rate": 0.00010516491482767832, "loss": 2.3717, "step": 489760 }, { "epoch": 0.9757307471630754, "grad_norm": 0.244569793343544, "learning_rate": 0.00010505936768457036, "loss": 2.3992, "step": 489770 }, { "epoch": 0.9757506693867143, "grad_norm": 0.23081311583518982, "learning_rate": 0.0001049538264200527, "loss": 2.3757, "step": 489780 }, { "epoch": 0.9757705916103532, "grad_norm": 0.2433760017156601, "learning_rate": 0.00010484829103314253, "loss": 2.3808, "step": 489790 }, { "epoch": 0.9757905138339921, "grad_norm": 0.2326512336730957, "learning_rate": 0.00010474276152285867, "loss": 2.3786, "step": 489800 }, { "epoch": 0.975810436057631, "grad_norm": 0.23274023830890656, "learning_rate": 0.00010463723788821899, "loss": 2.388, "step": 489810 }, { "epoch": 0.97583035828127, "grad_norm": 0.25390928983688354, "learning_rate": 0.00010453172012824231, "loss": 2.3642, "step": 489820 }, { "epoch": 0.9758502805049089, "grad_norm": 0.2473851442337036, "learning_rate": 0.00010442620824194759, "loss": 2.3798, "step": 489830 }, { "epoch": 0.9758702027285477, "grad_norm": 0.24653974175453186, "learning_rate": 0.00010432070222835433, "loss": 2.3827, "step": 489840 }, { "epoch": 0.9758901249521866, "grad_norm": 0.24172018468379974, "learning_rate": 0.00010421520208648149, "loss": 2.3769, "step": 489850 }, { "epoch": 0.9759100471758255, "grad_norm": 0.23760151863098145, "learning_rate": 0.00010410970781534945, "loss": 2.384, "step": 489860 }, { "epoch": 0.9759299693994645, "grad_norm": 0.24747662246227264, "learning_rate": 0.00010400421941397764, "loss": 2.3648, "step": 489870 }, { "epoch": 0.9759498916231034, "grad_norm": 0.22499863803386688, "learning_rate": 0.00010389873688138684, "loss": 2.3714, "step": 489880 }, { "epoch": 0.9759698138467423, "grad_norm": 0.25872182846069336, "learning_rate": 0.00010379326021659763, "loss": 2.3645, "step": 489890 }, { "epoch": 0.9759897360703812, "grad_norm": 0.26418229937553406, "learning_rate": 0.00010368778941863055, "loss": 2.3748, "step": 489900 }, { "epoch": 0.9760096582940202, "grad_norm": 0.23072819411754608, "learning_rate": 0.00010358232448650707, "loss": 2.3881, "step": 489910 }, { "epoch": 0.9760295805176591, "grad_norm": 0.23257224261760712, "learning_rate": 0.00010347686541924839, "loss": 2.3814, "step": 489920 }, { "epoch": 0.976049502741298, "grad_norm": 0.24233604967594147, "learning_rate": 0.0001033714122158762, "loss": 2.3686, "step": 489930 }, { "epoch": 0.9760694249649369, "grad_norm": 0.22212013602256775, "learning_rate": 0.0001032659648754124, "loss": 2.3792, "step": 489940 }, { "epoch": 0.9760893471885758, "grad_norm": 0.21651749312877655, "learning_rate": 0.00010316052339687953, "loss": 2.3693, "step": 489950 }, { "epoch": 0.9761092694122148, "grad_norm": 0.2383161336183548, "learning_rate": 0.00010305508777929995, "loss": 2.3838, "step": 489960 }, { "epoch": 0.9761291916358537, "grad_norm": 0.23240534961223602, "learning_rate": 0.00010294965802169598, "loss": 2.3704, "step": 489970 }, { "epoch": 0.9761491138594925, "grad_norm": 0.25031712651252747, "learning_rate": 0.0001028442341230913, "loss": 2.3652, "step": 489980 }, { "epoch": 0.9761690360831314, "grad_norm": 0.2516874074935913, "learning_rate": 0.00010273881608250891, "loss": 2.3873, "step": 489990 }, { "epoch": 0.9761889583067703, "grad_norm": 0.2127109169960022, "learning_rate": 0.00010263340389897247, "loss": 2.3755, "step": 490000 }, { "epoch": 0.9762088805304093, "grad_norm": 0.23342174291610718, "learning_rate": 0.00010252799757150566, "loss": 2.3688, "step": 490010 }, { "epoch": 0.9762288027540482, "grad_norm": 0.22486257553100586, "learning_rate": 0.00010242259709913282, "loss": 2.3698, "step": 490020 }, { "epoch": 0.9762487249776871, "grad_norm": 0.21836963295936584, "learning_rate": 0.00010231720248087829, "loss": 2.3822, "step": 490030 }, { "epoch": 0.976268647201326, "grad_norm": 0.23403289914131165, "learning_rate": 0.0001022118137157666, "loss": 2.3747, "step": 490040 }, { "epoch": 0.9762885694249649, "grad_norm": 0.2368512600660324, "learning_rate": 0.00010210643080282301, "loss": 2.3794, "step": 490050 }, { "epoch": 0.9763084916486039, "grad_norm": 0.2328939288854599, "learning_rate": 0.00010200105374107228, "loss": 2.3798, "step": 490060 }, { "epoch": 0.9763284138722428, "grad_norm": 0.2202802300453186, "learning_rate": 0.00010189568252954029, "loss": 2.3782, "step": 490070 }, { "epoch": 0.9763483360958817, "grad_norm": 0.25575727224349976, "learning_rate": 0.00010179031716725252, "loss": 2.3768, "step": 490080 }, { "epoch": 0.9763682583195206, "grad_norm": 0.23618552088737488, "learning_rate": 0.00010168495765323504, "loss": 2.3735, "step": 490090 }, { "epoch": 0.9763881805431595, "grad_norm": 0.23369644582271576, "learning_rate": 0.00010157960398651423, "loss": 2.3803, "step": 490100 }, { "epoch": 0.9764081027667985, "grad_norm": 0.2453376054763794, "learning_rate": 0.00010147425616611661, "loss": 2.3783, "step": 490110 }, { "epoch": 0.9764280249904373, "grad_norm": 0.2542062997817993, "learning_rate": 0.00010136891419106898, "loss": 2.382, "step": 490120 }, { "epoch": 0.9764479472140762, "grad_norm": 0.24333308637142181, "learning_rate": 0.00010126357806039855, "loss": 2.3847, "step": 490130 }, { "epoch": 0.9764678694377151, "grad_norm": 0.23360510170459747, "learning_rate": 0.00010115824777313253, "loss": 2.3633, "step": 490140 }, { "epoch": 0.976487791661354, "grad_norm": 0.2611250877380371, "learning_rate": 0.00010105292332829885, "loss": 2.3738, "step": 490150 }, { "epoch": 0.976507713884993, "grad_norm": 0.24668586254119873, "learning_rate": 0.00010094760472492493, "loss": 2.3883, "step": 490160 }, { "epoch": 0.9765276361086319, "grad_norm": 0.2366187572479248, "learning_rate": 0.00010084229196203931, "loss": 2.3969, "step": 490170 }, { "epoch": 0.9765475583322708, "grad_norm": 0.2489505410194397, "learning_rate": 0.00010073698503867035, "loss": 2.3813, "step": 490180 }, { "epoch": 0.9765674805559097, "grad_norm": 0.23467570543289185, "learning_rate": 0.00010063168395384658, "loss": 2.3811, "step": 490190 }, { "epoch": 0.9765874027795487, "grad_norm": 0.22591276466846466, "learning_rate": 0.00010052638870659747, "loss": 2.3862, "step": 490200 }, { "epoch": 0.9766073250031876, "grad_norm": 0.23549677431583405, "learning_rate": 0.00010042109929595178, "loss": 2.3735, "step": 490210 }, { "epoch": 0.9766272472268265, "grad_norm": 0.25116807222366333, "learning_rate": 0.0001003158157209394, "loss": 2.3827, "step": 490220 }, { "epoch": 0.9766471694504654, "grad_norm": 0.2474631667137146, "learning_rate": 0.00010021053798058977, "loss": 2.3707, "step": 490230 }, { "epoch": 0.9766670916741043, "grad_norm": 0.23160940408706665, "learning_rate": 0.00010010526607393322, "loss": 2.3842, "step": 490240 }, { "epoch": 0.9766870138977433, "grad_norm": 0.23106403648853302, "learning_rate": 0.00010000000000000009, "loss": 2.3876, "step": 490250 }, { "epoch": 0.9767069361213822, "grad_norm": 0.23665547370910645, "learning_rate": 9.98947397578207e-05, "loss": 2.3748, "step": 490260 }, { "epoch": 0.976726858345021, "grad_norm": 0.2228742241859436, "learning_rate": 9.978948534642629e-05, "loss": 2.3854, "step": 490270 }, { "epoch": 0.9767467805686599, "grad_norm": 0.25919675827026367, "learning_rate": 9.96842367648474e-05, "loss": 2.376, "step": 490280 }, { "epoch": 0.9767667027922988, "grad_norm": 0.24088062345981598, "learning_rate": 9.957899401211634e-05, "loss": 2.3855, "step": 490290 }, { "epoch": 0.9767866250159378, "grad_norm": 0.24239522218704224, "learning_rate": 9.947375708726392e-05, "loss": 2.381, "step": 490300 }, { "epoch": 0.9768065472395767, "grad_norm": 0.25309014320373535, "learning_rate": 9.936852598932267e-05, "loss": 2.3799, "step": 490310 }, { "epoch": 0.9768264694632156, "grad_norm": 0.23940633237361908, "learning_rate": 9.926330071732448e-05, "loss": 2.3722, "step": 490320 }, { "epoch": 0.9768463916868545, "grad_norm": 0.22640059888362885, "learning_rate": 9.91580812703019e-05, "loss": 2.3824, "step": 490330 }, { "epoch": 0.9768663139104934, "grad_norm": 0.2502064108848572, "learning_rate": 9.905286764728772e-05, "loss": 2.3837, "step": 490340 }, { "epoch": 0.9768862361341324, "grad_norm": 0.24160687625408173, "learning_rate": 9.894765984731513e-05, "loss": 2.3763, "step": 490350 }, { "epoch": 0.9769061583577713, "grad_norm": 0.24898838996887207, "learning_rate": 9.884245786941693e-05, "loss": 2.3762, "step": 490360 }, { "epoch": 0.9769260805814102, "grad_norm": 0.2487340122461319, "learning_rate": 9.873726171262698e-05, "loss": 2.3857, "step": 490370 }, { "epoch": 0.9769460028050491, "grad_norm": 0.24697881937026978, "learning_rate": 9.863207137597897e-05, "loss": 2.3701, "step": 490380 }, { "epoch": 0.976965925028688, "grad_norm": 0.24819254875183105, "learning_rate": 9.852688685850719e-05, "loss": 2.3737, "step": 490390 }, { "epoch": 0.976985847252327, "grad_norm": 0.2344323694705963, "learning_rate": 9.8421708159246e-05, "loss": 2.3811, "step": 490400 }, { "epoch": 0.9770057694759658, "grad_norm": 0.2689020335674286, "learning_rate": 9.831653527722973e-05, "loss": 2.3676, "step": 490410 }, { "epoch": 0.9770256916996047, "grad_norm": 0.2264048457145691, "learning_rate": 9.821136821149334e-05, "loss": 2.3872, "step": 490420 }, { "epoch": 0.9770456139232436, "grad_norm": 0.24150033295154572, "learning_rate": 9.810620696107209e-05, "loss": 2.3611, "step": 490430 }, { "epoch": 0.9770655361468825, "grad_norm": 0.2282208651304245, "learning_rate": 9.80010515250016e-05, "loss": 2.373, "step": 490440 }, { "epoch": 0.9770854583705215, "grad_norm": 0.23682639002799988, "learning_rate": 9.789590190231712e-05, "loss": 2.3855, "step": 490450 }, { "epoch": 0.9771053805941604, "grad_norm": 0.22298991680145264, "learning_rate": 9.779075809205473e-05, "loss": 2.3931, "step": 490460 }, { "epoch": 0.9771253028177993, "grad_norm": 0.2335321307182312, "learning_rate": 9.768562009325077e-05, "loss": 2.3804, "step": 490470 }, { "epoch": 0.9771452250414382, "grad_norm": 0.2620251178741455, "learning_rate": 9.758048790494156e-05, "loss": 2.3843, "step": 490480 }, { "epoch": 0.9771651472650772, "grad_norm": 0.22252655029296875, "learning_rate": 9.747536152616409e-05, "loss": 2.372, "step": 490490 }, { "epoch": 0.9771850694887161, "grad_norm": 0.23286594450473785, "learning_rate": 9.737024095595515e-05, "loss": 2.3863, "step": 490500 }, { "epoch": 0.977204991712355, "grad_norm": 0.2802174687385559, "learning_rate": 9.726512619335215e-05, "loss": 2.3774, "step": 490510 }, { "epoch": 0.9772249139359939, "grad_norm": 0.2542615532875061, "learning_rate": 9.716001723739254e-05, "loss": 2.3729, "step": 490520 }, { "epoch": 0.9772448361596328, "grad_norm": 0.23421378433704376, "learning_rate": 9.70549140871142e-05, "loss": 2.3734, "step": 490530 }, { "epoch": 0.9772647583832718, "grad_norm": 0.2532903850078583, "learning_rate": 9.694981674155523e-05, "loss": 2.3863, "step": 490540 }, { "epoch": 0.9772846806069106, "grad_norm": 0.812419593334198, "learning_rate": 9.684472519975396e-05, "loss": 2.3763, "step": 490550 }, { "epoch": 0.9773046028305495, "grad_norm": 0.22853125631809235, "learning_rate": 9.673963946074893e-05, "loss": 2.3672, "step": 490560 }, { "epoch": 0.9773245250541884, "grad_norm": 0.23261556029319763, "learning_rate": 9.663455952357913e-05, "loss": 2.3883, "step": 490570 }, { "epoch": 0.9773444472778273, "grad_norm": 0.2427777647972107, "learning_rate": 9.652948538728334e-05, "loss": 2.3786, "step": 490580 }, { "epoch": 0.9773643695014663, "grad_norm": 0.23607978224754333, "learning_rate": 9.642441705090143e-05, "loss": 2.3861, "step": 490590 }, { "epoch": 0.9773842917251052, "grad_norm": 0.22331084311008453, "learning_rate": 9.631935451347307e-05, "loss": 2.3688, "step": 490600 }, { "epoch": 0.9774042139487441, "grad_norm": 0.24959148466587067, "learning_rate": 9.62142977740379e-05, "loss": 2.3682, "step": 490610 }, { "epoch": 0.977424136172383, "grad_norm": 0.2564157545566559, "learning_rate": 9.610924683163602e-05, "loss": 2.3841, "step": 490620 }, { "epoch": 0.9774440583960219, "grad_norm": 0.2877447009086609, "learning_rate": 9.600420168530844e-05, "loss": 2.3817, "step": 490630 }, { "epoch": 0.9774639806196609, "grad_norm": 0.23532943427562714, "learning_rate": 9.589916233409523e-05, "loss": 2.3586, "step": 490640 }, { "epoch": 0.9774839028432998, "grad_norm": 0.23682807385921478, "learning_rate": 9.579412877703786e-05, "loss": 2.3811, "step": 490650 }, { "epoch": 0.9775038250669387, "grad_norm": 0.24726401269435883, "learning_rate": 9.568910101317752e-05, "loss": 2.3965, "step": 490660 }, { "epoch": 0.9775237472905776, "grad_norm": 0.29360711574554443, "learning_rate": 9.558407904155563e-05, "loss": 2.3827, "step": 490670 }, { "epoch": 0.9775436695142165, "grad_norm": 0.24272559583187103, "learning_rate": 9.547906286121389e-05, "loss": 2.3747, "step": 490680 }, { "epoch": 0.9775635917378555, "grad_norm": 0.26497989892959595, "learning_rate": 9.537405247119457e-05, "loss": 2.3866, "step": 490690 }, { "epoch": 0.9775835139614943, "grad_norm": 0.25502896308898926, "learning_rate": 9.52690478705398e-05, "loss": 2.3849, "step": 490700 }, { "epoch": 0.9776034361851332, "grad_norm": 0.2338200956583023, "learning_rate": 9.516404905829234e-05, "loss": 2.38, "step": 490710 }, { "epoch": 0.9776233584087721, "grad_norm": 0.2223983108997345, "learning_rate": 9.505905603349474e-05, "loss": 2.3718, "step": 490720 }, { "epoch": 0.977643280632411, "grad_norm": 0.25387847423553467, "learning_rate": 9.495406879519042e-05, "loss": 2.3686, "step": 490730 }, { "epoch": 0.97766320285605, "grad_norm": 0.2438964545726776, "learning_rate": 9.484908734242259e-05, "loss": 2.388, "step": 490740 }, { "epoch": 0.9776831250796889, "grad_norm": 0.2627217769622803, "learning_rate": 9.474411167423491e-05, "loss": 2.3777, "step": 490750 }, { "epoch": 0.9777030473033278, "grad_norm": 0.22902260720729828, "learning_rate": 9.463914178967147e-05, "loss": 2.3887, "step": 490760 }, { "epoch": 0.9777229695269667, "grad_norm": 0.22227469086647034, "learning_rate": 9.453417768777595e-05, "loss": 2.3758, "step": 490770 }, { "epoch": 0.9777428917506056, "grad_norm": 0.24604061245918274, "learning_rate": 9.44292193675933e-05, "loss": 2.3856, "step": 490780 }, { "epoch": 0.9777628139742446, "grad_norm": 0.22998450696468353, "learning_rate": 9.432426682816786e-05, "loss": 2.3806, "step": 490790 }, { "epoch": 0.9777827361978835, "grad_norm": 0.2343377023935318, "learning_rate": 9.421932006854461e-05, "loss": 2.374, "step": 490800 }, { "epoch": 0.9778026584215224, "grad_norm": 0.25654157996177673, "learning_rate": 9.411437908776899e-05, "loss": 2.3723, "step": 490810 }, { "epoch": 0.9778225806451613, "grad_norm": 0.25026339292526245, "learning_rate": 9.400944388488641e-05, "loss": 2.3737, "step": 490820 }, { "epoch": 0.9778425028688003, "grad_norm": 0.2531452178955078, "learning_rate": 9.390451445894254e-05, "loss": 2.3834, "step": 490830 }, { "epoch": 0.9778624250924391, "grad_norm": 0.2401338517665863, "learning_rate": 9.379959080898325e-05, "loss": 2.3784, "step": 490840 }, { "epoch": 0.977882347316078, "grad_norm": 0.24058398604393005, "learning_rate": 9.369467293405488e-05, "loss": 2.3729, "step": 490850 }, { "epoch": 0.9779022695397169, "grad_norm": 0.2389722317457199, "learning_rate": 9.358976083320414e-05, "loss": 2.3688, "step": 490860 }, { "epoch": 0.9779221917633558, "grad_norm": 0.248091459274292, "learning_rate": 9.348485450547761e-05, "loss": 2.3687, "step": 490870 }, { "epoch": 0.9779421139869948, "grad_norm": 0.4065764248371124, "learning_rate": 9.337995394992227e-05, "loss": 2.3837, "step": 490880 }, { "epoch": 0.9779620362106337, "grad_norm": 0.24113047122955322, "learning_rate": 9.327505916558576e-05, "loss": 2.3734, "step": 490890 }, { "epoch": 0.9779819584342726, "grad_norm": 0.23702362179756165, "learning_rate": 9.317017015151552e-05, "loss": 2.3749, "step": 490900 }, { "epoch": 0.9780018806579115, "grad_norm": 0.2344173938035965, "learning_rate": 9.306528690675942e-05, "loss": 2.37, "step": 490910 }, { "epoch": 0.9780218028815504, "grad_norm": 0.22257527709007263, "learning_rate": 9.296040943036532e-05, "loss": 2.3634, "step": 490920 }, { "epoch": 0.9780417251051894, "grad_norm": 0.230694979429245, "learning_rate": 9.285553772138178e-05, "loss": 2.3822, "step": 490930 }, { "epoch": 0.9780616473288283, "grad_norm": 0.2710737884044647, "learning_rate": 9.275067177885732e-05, "loss": 2.3798, "step": 490940 }, { "epoch": 0.9780815695524672, "grad_norm": 0.24031421542167664, "learning_rate": 9.264581160184094e-05, "loss": 2.3709, "step": 490950 }, { "epoch": 0.9781014917761061, "grad_norm": 0.25032857060432434, "learning_rate": 9.254095718938182e-05, "loss": 2.3738, "step": 490960 }, { "epoch": 0.978121413999745, "grad_norm": 0.2722664773464203, "learning_rate": 9.243610854052919e-05, "loss": 2.382, "step": 490970 }, { "epoch": 0.978141336223384, "grad_norm": 0.23036734759807587, "learning_rate": 9.23312656543327e-05, "loss": 2.3754, "step": 490980 }, { "epoch": 0.9781612584470228, "grad_norm": 0.2282288372516632, "learning_rate": 9.222642852984242e-05, "loss": 2.3693, "step": 490990 }, { "epoch": 0.9781811806706617, "grad_norm": 0.24431239068508148, "learning_rate": 9.212159716610868e-05, "loss": 2.3778, "step": 491000 }, { "epoch": 0.9782011028943006, "grad_norm": 0.2471073716878891, "learning_rate": 9.201677156218158e-05, "loss": 2.3773, "step": 491010 }, { "epoch": 0.9782210251179395, "grad_norm": 0.24088981747627258, "learning_rate": 9.191195171711208e-05, "loss": 2.3689, "step": 491020 }, { "epoch": 0.9782409473415785, "grad_norm": 0.22985371947288513, "learning_rate": 9.180713762995119e-05, "loss": 2.3737, "step": 491030 }, { "epoch": 0.9782608695652174, "grad_norm": 0.23663869500160217, "learning_rate": 9.170232929974986e-05, "loss": 2.3753, "step": 491040 }, { "epoch": 0.9782807917888563, "grad_norm": 0.23583146929740906, "learning_rate": 9.159752672555999e-05, "loss": 2.3897, "step": 491050 }, { "epoch": 0.9783007140124952, "grad_norm": 0.2256697565317154, "learning_rate": 9.149272990643298e-05, "loss": 2.3782, "step": 491060 }, { "epoch": 0.9783206362361341, "grad_norm": 0.2331233024597168, "learning_rate": 9.138793884142093e-05, "loss": 2.355, "step": 491070 }, { "epoch": 0.9783405584597731, "grad_norm": 0.23968873918056488, "learning_rate": 9.128315352957595e-05, "loss": 2.3809, "step": 491080 }, { "epoch": 0.978360480683412, "grad_norm": 0.25369569659233093, "learning_rate": 9.1178373969951e-05, "loss": 2.3773, "step": 491090 }, { "epoch": 0.9783804029070509, "grad_norm": 0.23896759748458862, "learning_rate": 9.10736001615986e-05, "loss": 2.3774, "step": 491100 }, { "epoch": 0.9784003251306898, "grad_norm": 0.22631719708442688, "learning_rate": 9.096883210357199e-05, "loss": 2.3756, "step": 491110 }, { "epoch": 0.9784202473543288, "grad_norm": 0.22583690285682678, "learning_rate": 9.086406979492412e-05, "loss": 2.3712, "step": 491120 }, { "epoch": 0.9784401695779676, "grad_norm": 0.22418582439422607, "learning_rate": 9.07593132347091e-05, "loss": 2.3764, "step": 491130 }, { "epoch": 0.9784600918016065, "grad_norm": 0.23229378461837769, "learning_rate": 9.065456242198011e-05, "loss": 2.3726, "step": 491140 }, { "epoch": 0.9784800140252454, "grad_norm": 0.24027948081493378, "learning_rate": 9.054981735579193e-05, "loss": 2.3832, "step": 491150 }, { "epoch": 0.9784999362488843, "grad_norm": 0.22788338363170624, "learning_rate": 9.044507803519841e-05, "loss": 2.3819, "step": 491160 }, { "epoch": 0.9785198584725233, "grad_norm": 0.23426084220409393, "learning_rate": 9.034034445925433e-05, "loss": 2.3763, "step": 491170 }, { "epoch": 0.9785397806961622, "grad_norm": 0.23650015890598297, "learning_rate": 9.02356166270144e-05, "loss": 2.3813, "step": 491180 }, { "epoch": 0.9785597029198011, "grad_norm": 0.24536335468292236, "learning_rate": 9.013089453753387e-05, "loss": 2.3703, "step": 491190 }, { "epoch": 0.97857962514344, "grad_norm": 0.2408021092414856, "learning_rate": 9.002617818986836e-05, "loss": 2.376, "step": 491200 }, { "epoch": 0.9785995473670789, "grad_norm": 0.23270873725414276, "learning_rate": 8.992146758307352e-05, "loss": 2.3842, "step": 491210 }, { "epoch": 0.9786194695907179, "grad_norm": 0.25589519739151, "learning_rate": 8.981676271620476e-05, "loss": 2.3817, "step": 491220 }, { "epoch": 0.9786393918143568, "grad_norm": 0.2542879283428192, "learning_rate": 8.971206358831862e-05, "loss": 2.3708, "step": 491230 }, { "epoch": 0.9786593140379957, "grad_norm": 0.24668708443641663, "learning_rate": 8.960737019847143e-05, "loss": 2.3909, "step": 491240 }, { "epoch": 0.9786792362616346, "grad_norm": 0.2461281567811966, "learning_rate": 8.950268254571992e-05, "loss": 2.3814, "step": 491250 }, { "epoch": 0.9786991584852734, "grad_norm": 0.23506373167037964, "learning_rate": 8.939800062912106e-05, "loss": 2.3813, "step": 491260 }, { "epoch": 0.9787190807089124, "grad_norm": 0.24058477580547333, "learning_rate": 8.929332444773208e-05, "loss": 2.3821, "step": 491270 }, { "epoch": 0.9787390029325513, "grad_norm": 0.23115086555480957, "learning_rate": 8.918865400061016e-05, "loss": 2.3887, "step": 491280 }, { "epoch": 0.9787589251561902, "grad_norm": 0.2517114579677582, "learning_rate": 8.908398928681315e-05, "loss": 2.3778, "step": 491290 }, { "epoch": 0.9787788473798291, "grad_norm": 0.263882577419281, "learning_rate": 8.89793303053994e-05, "loss": 2.3845, "step": 491300 }, { "epoch": 0.978798769603468, "grad_norm": 0.2558504641056061, "learning_rate": 8.887467705542651e-05, "loss": 2.3766, "step": 491310 }, { "epoch": 0.978818691827107, "grad_norm": 0.31388020515441895, "learning_rate": 8.877002953595348e-05, "loss": 2.3733, "step": 491320 }, { "epoch": 0.9788386140507459, "grad_norm": 0.2510250210762024, "learning_rate": 8.866538774603883e-05, "loss": 2.3771, "step": 491330 }, { "epoch": 0.9788585362743848, "grad_norm": 0.24485595524311066, "learning_rate": 8.856075168474153e-05, "loss": 2.3683, "step": 491340 }, { "epoch": 0.9788784584980237, "grad_norm": 0.2313879430294037, "learning_rate": 8.845612135112103e-05, "loss": 2.3747, "step": 491350 }, { "epoch": 0.9788983807216626, "grad_norm": 0.24268262088298798, "learning_rate": 8.835149674423671e-05, "loss": 2.38, "step": 491360 }, { "epoch": 0.9789183029453016, "grad_norm": 0.22487057745456696, "learning_rate": 8.824687786314845e-05, "loss": 2.3709, "step": 491370 }, { "epoch": 0.9789382251689405, "grad_norm": 0.23838569223880768, "learning_rate": 8.81422647069161e-05, "loss": 2.374, "step": 491380 }, { "epoch": 0.9789581473925794, "grad_norm": 0.24473409354686737, "learning_rate": 8.80376572746e-05, "loss": 2.3878, "step": 491390 }, { "epoch": 0.9789780696162182, "grad_norm": 0.23816271126270294, "learning_rate": 8.793305556526087e-05, "loss": 2.3742, "step": 491400 }, { "epoch": 0.9789979918398573, "grad_norm": 0.24607674777507782, "learning_rate": 8.782845957795927e-05, "loss": 2.3709, "step": 491410 }, { "epoch": 0.9790179140634961, "grad_norm": 0.22503696382045746, "learning_rate": 8.772386931175657e-05, "loss": 2.3839, "step": 491420 }, { "epoch": 0.979037836287135, "grad_norm": 0.23124735057353973, "learning_rate": 8.761928476571379e-05, "loss": 2.3801, "step": 491430 }, { "epoch": 0.9790577585107739, "grad_norm": 0.2485591471195221, "learning_rate": 8.751470593889277e-05, "loss": 2.3706, "step": 491440 }, { "epoch": 0.9790776807344128, "grad_norm": 0.2393416166305542, "learning_rate": 8.741013283035515e-05, "loss": 2.3808, "step": 491450 }, { "epoch": 0.9790976029580518, "grad_norm": 0.2417825609445572, "learning_rate": 8.730556543916301e-05, "loss": 2.3771, "step": 491460 }, { "epoch": 0.9791175251816907, "grad_norm": 0.2440246194601059, "learning_rate": 8.72010037643789e-05, "loss": 2.3802, "step": 491470 }, { "epoch": 0.9791374474053296, "grad_norm": 0.2503422200679779, "learning_rate": 8.709644780506509e-05, "loss": 2.377, "step": 491480 }, { "epoch": 0.9791573696289685, "grad_norm": 0.23404347896575928, "learning_rate": 8.699189756028481e-05, "loss": 2.378, "step": 491490 }, { "epoch": 0.9791772918526074, "grad_norm": 0.22411879897117615, "learning_rate": 8.688735302910078e-05, "loss": 2.3811, "step": 491500 }, { "epoch": 0.9791972140762464, "grad_norm": 0.21427525579929352, "learning_rate": 8.678281421057687e-05, "loss": 2.3734, "step": 491510 }, { "epoch": 0.9792171362998853, "grad_norm": 0.23243768513202667, "learning_rate": 8.66782811037763e-05, "loss": 2.378, "step": 491520 }, { "epoch": 0.9792370585235242, "grad_norm": 0.297796368598938, "learning_rate": 8.65737537077631e-05, "loss": 2.3685, "step": 491530 }, { "epoch": 0.979256980747163, "grad_norm": 0.22580648958683014, "learning_rate": 8.64692320216014e-05, "loss": 2.3831, "step": 491540 }, { "epoch": 0.9792769029708019, "grad_norm": 0.233085036277771, "learning_rate": 8.636471604435547e-05, "loss": 2.3635, "step": 491550 }, { "epoch": 0.979296825194441, "grad_norm": 0.2584918737411499, "learning_rate": 8.62602057750903e-05, "loss": 2.3696, "step": 491560 }, { "epoch": 0.9793167474180798, "grad_norm": 0.2586972415447235, "learning_rate": 8.615570121287042e-05, "loss": 2.371, "step": 491570 }, { "epoch": 0.9793366696417187, "grad_norm": 0.24126259982585907, "learning_rate": 8.605120235676122e-05, "loss": 2.3834, "step": 491580 }, { "epoch": 0.9793565918653576, "grad_norm": 0.24441923201084137, "learning_rate": 8.59467092058277e-05, "loss": 2.3839, "step": 491590 }, { "epoch": 0.9793765140889965, "grad_norm": 0.2605118155479431, "learning_rate": 8.584222175913614e-05, "loss": 2.3773, "step": 491600 }, { "epoch": 0.9793964363126355, "grad_norm": 0.24348126351833344, "learning_rate": 8.573774001575219e-05, "loss": 2.3702, "step": 491610 }, { "epoch": 0.9794163585362744, "grad_norm": 0.23509815335273743, "learning_rate": 8.563326397474191e-05, "loss": 2.3774, "step": 491620 }, { "epoch": 0.9794362807599133, "grad_norm": 0.22813716530799866, "learning_rate": 8.552879363517185e-05, "loss": 2.3758, "step": 491630 }, { "epoch": 0.9794562029835522, "grad_norm": 0.23288589715957642, "learning_rate": 8.542432899610898e-05, "loss": 2.3692, "step": 491640 }, { "epoch": 0.9794761252071911, "grad_norm": 0.23539578914642334, "learning_rate": 8.531987005661957e-05, "loss": 2.3709, "step": 491650 }, { "epoch": 0.9794960474308301, "grad_norm": 0.2363514006137848, "learning_rate": 8.521541681577149e-05, "loss": 2.3722, "step": 491660 }, { "epoch": 0.979515969654469, "grad_norm": 0.23831723630428314, "learning_rate": 8.511096927263174e-05, "loss": 2.3743, "step": 491670 }, { "epoch": 0.9795358918781079, "grad_norm": 0.23657584190368652, "learning_rate": 8.500652742626836e-05, "loss": 2.3725, "step": 491680 }, { "epoch": 0.9795558141017467, "grad_norm": 0.246324822306633, "learning_rate": 8.490209127574877e-05, "loss": 2.3674, "step": 491690 }, { "epoch": 0.9795757363253857, "grad_norm": 0.22132408618927002, "learning_rate": 8.479766082014196e-05, "loss": 2.3726, "step": 491700 }, { "epoch": 0.9795956585490246, "grad_norm": 0.2753385901451111, "learning_rate": 8.469323605851575e-05, "loss": 2.3569, "step": 491710 }, { "epoch": 0.9796155807726635, "grad_norm": 0.23379570245742798, "learning_rate": 8.458881698993936e-05, "loss": 2.3651, "step": 491720 }, { "epoch": 0.9796355029963024, "grad_norm": 0.23344677686691284, "learning_rate": 8.448440361348131e-05, "loss": 2.374, "step": 491730 }, { "epoch": 0.9796554252199413, "grad_norm": 0.23672917485237122, "learning_rate": 8.437999592821121e-05, "loss": 2.3657, "step": 491740 }, { "epoch": 0.9796753474435803, "grad_norm": 0.2344183772802353, "learning_rate": 8.427559393319828e-05, "loss": 2.3787, "step": 491750 }, { "epoch": 0.9796952696672192, "grad_norm": 0.22266001999378204, "learning_rate": 8.417119762751257e-05, "loss": 2.362, "step": 491760 }, { "epoch": 0.9797151918908581, "grad_norm": 0.2319074422121048, "learning_rate": 8.406680701022352e-05, "loss": 2.3726, "step": 491770 }, { "epoch": 0.979735114114497, "grad_norm": 0.24736344814300537, "learning_rate": 8.396242208040183e-05, "loss": 2.3685, "step": 491780 }, { "epoch": 0.9797550363381359, "grad_norm": 0.24320700764656067, "learning_rate": 8.385804283711784e-05, "loss": 2.3734, "step": 491790 }, { "epoch": 0.9797749585617749, "grad_norm": 0.24747657775878906, "learning_rate": 8.37536692794425e-05, "loss": 2.3699, "step": 491800 }, { "epoch": 0.9797948807854138, "grad_norm": 0.25044193863868713, "learning_rate": 8.364930140644655e-05, "loss": 2.3673, "step": 491810 }, { "epoch": 0.9798148030090527, "grad_norm": 0.2148677259683609, "learning_rate": 8.35449392172014e-05, "loss": 2.3663, "step": 491820 }, { "epoch": 0.9798347252326916, "grad_norm": 0.23810169100761414, "learning_rate": 8.344058271077847e-05, "loss": 2.3763, "step": 491830 }, { "epoch": 0.9798546474563304, "grad_norm": 0.24172429740428925, "learning_rate": 8.333623188624961e-05, "loss": 2.3789, "step": 491840 }, { "epoch": 0.9798745696799694, "grad_norm": 0.2415677160024643, "learning_rate": 8.323188674268689e-05, "loss": 2.3802, "step": 491850 }, { "epoch": 0.9798944919036083, "grad_norm": 0.24260474741458893, "learning_rate": 8.312754727916238e-05, "loss": 2.3687, "step": 491860 }, { "epoch": 0.9799144141272472, "grad_norm": 0.2426830381155014, "learning_rate": 8.302321349474862e-05, "loss": 2.3697, "step": 491870 }, { "epoch": 0.9799343363508861, "grad_norm": 0.250744491815567, "learning_rate": 8.291888538851854e-05, "loss": 2.3726, "step": 491880 }, { "epoch": 0.979954258574525, "grad_norm": 0.2372511327266693, "learning_rate": 8.281456295954515e-05, "loss": 2.3767, "step": 491890 }, { "epoch": 0.979974180798164, "grad_norm": 0.23354534804821014, "learning_rate": 8.27102462069016e-05, "loss": 2.3811, "step": 491900 }, { "epoch": 0.9799941030218029, "grad_norm": 0.2589298486709595, "learning_rate": 8.260593512966153e-05, "loss": 2.3749, "step": 491910 }, { "epoch": 0.9800140252454418, "grad_norm": 0.2538719177246094, "learning_rate": 8.250162972689856e-05, "loss": 2.3746, "step": 491920 }, { "epoch": 0.9800339474690807, "grad_norm": 0.25197699666023254, "learning_rate": 8.2397329997687e-05, "loss": 2.374, "step": 491930 }, { "epoch": 0.9800538696927196, "grad_norm": 0.24121426045894623, "learning_rate": 8.229303594110093e-05, "loss": 2.3724, "step": 491940 }, { "epoch": 0.9800737919163586, "grad_norm": 0.24729222059249878, "learning_rate": 8.218874755621485e-05, "loss": 2.3687, "step": 491950 }, { "epoch": 0.9800937141399975, "grad_norm": 0.23747582733631134, "learning_rate": 8.208446484210374e-05, "loss": 2.3855, "step": 491960 }, { "epoch": 0.9801136363636364, "grad_norm": 0.23916684091091156, "learning_rate": 8.198018779784234e-05, "loss": 2.3678, "step": 491970 }, { "epoch": 0.9801335585872752, "grad_norm": 0.24024806916713715, "learning_rate": 8.187591642250647e-05, "loss": 2.3856, "step": 491980 }, { "epoch": 0.9801534808109142, "grad_norm": 0.24892006814479828, "learning_rate": 8.17716507151709e-05, "loss": 2.3754, "step": 491990 }, { "epoch": 0.9801734030345531, "grad_norm": 0.22983728349208832, "learning_rate": 8.166739067491213e-05, "loss": 2.3872, "step": 492000 }, { "epoch": 0.980193325258192, "grad_norm": 0.23582428693771362, "learning_rate": 8.156313630080603e-05, "loss": 2.3803, "step": 492010 }, { "epoch": 0.9802132474818309, "grad_norm": 0.23377786576747894, "learning_rate": 8.145888759192866e-05, "loss": 2.378, "step": 492020 }, { "epoch": 0.9802331697054698, "grad_norm": 0.22788602113723755, "learning_rate": 8.135464454735675e-05, "loss": 2.3715, "step": 492030 }, { "epoch": 0.9802530919291088, "grad_norm": 0.23776090145111084, "learning_rate": 8.125040716616706e-05, "loss": 2.3745, "step": 492040 }, { "epoch": 0.9802730141527477, "grad_norm": 0.2384653240442276, "learning_rate": 8.114617544743653e-05, "loss": 2.3771, "step": 492050 }, { "epoch": 0.9802929363763866, "grad_norm": 0.24510371685028076, "learning_rate": 8.104194939024278e-05, "loss": 2.3724, "step": 492060 }, { "epoch": 0.9803128586000255, "grad_norm": 0.24297277629375458, "learning_rate": 8.093772899366303e-05, "loss": 2.3854, "step": 492070 }, { "epoch": 0.9803327808236644, "grad_norm": 0.22999699413776398, "learning_rate": 8.083351425677509e-05, "loss": 2.3834, "step": 492080 }, { "epoch": 0.9803527030473034, "grad_norm": 0.24057146906852722, "learning_rate": 8.072930517865706e-05, "loss": 2.3531, "step": 492090 }, { "epoch": 0.9803726252709423, "grad_norm": 0.23314157128334045, "learning_rate": 8.062510175838744e-05, "loss": 2.3533, "step": 492100 }, { "epoch": 0.9803925474945812, "grad_norm": 0.24742577970027924, "learning_rate": 8.052090399504475e-05, "loss": 2.3741, "step": 492110 }, { "epoch": 0.98041246971822, "grad_norm": 0.2624073326587677, "learning_rate": 8.041671188770772e-05, "loss": 2.3642, "step": 492120 }, { "epoch": 0.9804323919418589, "grad_norm": 0.23747283220291138, "learning_rate": 8.031252543545509e-05, "loss": 2.376, "step": 492130 }, { "epoch": 0.9804523141654979, "grad_norm": 0.23330436646938324, "learning_rate": 8.02083446373667e-05, "loss": 2.3584, "step": 492140 }, { "epoch": 0.9804722363891368, "grad_norm": 0.2302769124507904, "learning_rate": 8.010416949252175e-05, "loss": 2.3837, "step": 492150 }, { "epoch": 0.9804921586127757, "grad_norm": 0.25731101632118225, "learning_rate": 8.000000000000007e-05, "loss": 2.3776, "step": 492160 }, { "epoch": 0.9805120808364146, "grad_norm": 0.2336856573820114, "learning_rate": 7.989583615888174e-05, "loss": 2.3756, "step": 492170 }, { "epoch": 0.9805320030600535, "grad_norm": 0.22734495997428894, "learning_rate": 7.979167796824727e-05, "loss": 2.3657, "step": 492180 }, { "epoch": 0.9805519252836925, "grad_norm": 0.2289925515651703, "learning_rate": 7.968752542717673e-05, "loss": 2.3541, "step": 492190 }, { "epoch": 0.9805718475073314, "grad_norm": 0.2229265719652176, "learning_rate": 7.958337853475129e-05, "loss": 2.3771, "step": 492200 }, { "epoch": 0.9805917697309703, "grad_norm": 0.23942182958126068, "learning_rate": 7.947923729005213e-05, "loss": 2.3569, "step": 492210 }, { "epoch": 0.9806116919546092, "grad_norm": 0.2330438792705536, "learning_rate": 7.937510169216022e-05, "loss": 2.359, "step": 492220 }, { "epoch": 0.9806316141782481, "grad_norm": 0.2953547537326813, "learning_rate": 7.927097174015718e-05, "loss": 2.3686, "step": 492230 }, { "epoch": 0.9806515364018871, "grad_norm": 0.28154826164245605, "learning_rate": 7.916684743312486e-05, "loss": 2.3496, "step": 492240 }, { "epoch": 0.980671458625526, "grad_norm": 0.23949766159057617, "learning_rate": 7.906272877014531e-05, "loss": 2.3681, "step": 492250 }, { "epoch": 0.9806913808491649, "grad_norm": 0.23890848457813263, "learning_rate": 7.895861575030106e-05, "loss": 2.3796, "step": 492260 }, { "epoch": 0.9807113030728037, "grad_norm": 0.24609945714473724, "learning_rate": 7.885450837267416e-05, "loss": 2.3736, "step": 492270 }, { "epoch": 0.9807312252964426, "grad_norm": 0.2311260849237442, "learning_rate": 7.875040663634758e-05, "loss": 2.3765, "step": 492280 }, { "epoch": 0.9807511475200816, "grad_norm": 0.23028483986854553, "learning_rate": 7.864631054040427e-05, "loss": 2.3767, "step": 492290 }, { "epoch": 0.9807710697437205, "grad_norm": 0.23031070828437805, "learning_rate": 7.854222008392809e-05, "loss": 2.386, "step": 492300 }, { "epoch": 0.9807909919673594, "grad_norm": 0.24362905323505402, "learning_rate": 7.843813526600197e-05, "loss": 2.3661, "step": 492310 }, { "epoch": 0.9808109141909983, "grad_norm": 0.23903103172779083, "learning_rate": 7.833405608570977e-05, "loss": 2.3729, "step": 492320 }, { "epoch": 0.9808308364146373, "grad_norm": 0.2275395691394806, "learning_rate": 7.822998254213576e-05, "loss": 2.3839, "step": 492330 }, { "epoch": 0.9808507586382762, "grad_norm": 0.22230178117752075, "learning_rate": 7.812591463436403e-05, "loss": 2.3647, "step": 492340 }, { "epoch": 0.9808706808619151, "grad_norm": 0.2295650988817215, "learning_rate": 7.802185236147908e-05, "loss": 2.3755, "step": 492350 }, { "epoch": 0.980890603085554, "grad_norm": 0.24996590614318848, "learning_rate": 7.791779572256585e-05, "loss": 2.385, "step": 492360 }, { "epoch": 0.9809105253091929, "grad_norm": 0.24518568813800812, "learning_rate": 7.781374471670933e-05, "loss": 2.3525, "step": 492370 }, { "epoch": 0.9809304475328319, "grad_norm": 0.23277170956134796, "learning_rate": 7.770969934299466e-05, "loss": 2.3469, "step": 492380 }, { "epoch": 0.9809503697564708, "grad_norm": 0.2254745364189148, "learning_rate": 7.760565960050747e-05, "loss": 2.3686, "step": 492390 }, { "epoch": 0.9809702919801097, "grad_norm": 0.23204916715621948, "learning_rate": 7.75016254883334e-05, "loss": 2.3828, "step": 492400 }, { "epoch": 0.9809902142037485, "grad_norm": 0.2585492432117462, "learning_rate": 7.739759700555871e-05, "loss": 2.3714, "step": 492410 }, { "epoch": 0.9810101364273874, "grad_norm": 0.243551105260849, "learning_rate": 7.729357415126948e-05, "loss": 2.3757, "step": 492420 }, { "epoch": 0.9810300586510264, "grad_norm": 0.23383183777332306, "learning_rate": 7.71895569245522e-05, "loss": 2.3598, "step": 492430 }, { "epoch": 0.9810499808746653, "grad_norm": 0.23237168788909912, "learning_rate": 7.708554532449364e-05, "loss": 2.3709, "step": 492440 }, { "epoch": 0.9810699030983042, "grad_norm": 0.23608584702014923, "learning_rate": 7.698153935018093e-05, "loss": 2.3822, "step": 492450 }, { "epoch": 0.9810898253219431, "grad_norm": 0.30766984820365906, "learning_rate": 7.687753900070105e-05, "loss": 2.3777, "step": 492460 }, { "epoch": 0.981109747545582, "grad_norm": 0.23202864825725555, "learning_rate": 7.677354427514182e-05, "loss": 2.362, "step": 492470 }, { "epoch": 0.981129669769221, "grad_norm": 0.2458362579345703, "learning_rate": 7.666955517259089e-05, "loss": 2.3919, "step": 492480 }, { "epoch": 0.9811495919928599, "grad_norm": 0.24053417146205902, "learning_rate": 7.656557169213585e-05, "loss": 2.3689, "step": 492490 }, { "epoch": 0.9811695142164988, "grad_norm": 0.25876009464263916, "learning_rate": 7.646159383286543e-05, "loss": 2.383, "step": 492500 }, { "epoch": 0.9811894364401377, "grad_norm": 0.2454383671283722, "learning_rate": 7.635762159386816e-05, "loss": 2.3708, "step": 492510 }, { "epoch": 0.9812093586637766, "grad_norm": 0.2412743866443634, "learning_rate": 7.625365497423231e-05, "loss": 2.3672, "step": 492520 }, { "epoch": 0.9812292808874156, "grad_norm": 0.24549151957035065, "learning_rate": 7.614969397304727e-05, "loss": 2.3624, "step": 492530 }, { "epoch": 0.9812492031110545, "grad_norm": 0.23667778074741364, "learning_rate": 7.604573858940201e-05, "loss": 2.373, "step": 492540 }, { "epoch": 0.9812691253346933, "grad_norm": 0.2259090095758438, "learning_rate": 7.59417888223859e-05, "loss": 2.3676, "step": 492550 }, { "epoch": 0.9812890475583322, "grad_norm": 0.23148652911186218, "learning_rate": 7.583784467108901e-05, "loss": 2.3751, "step": 492560 }, { "epoch": 0.9813089697819711, "grad_norm": 0.24264486134052277, "learning_rate": 7.573390613460118e-05, "loss": 2.3961, "step": 492570 }, { "epoch": 0.9813288920056101, "grad_norm": 0.2349986582994461, "learning_rate": 7.562997321201249e-05, "loss": 2.381, "step": 492580 }, { "epoch": 0.981348814229249, "grad_norm": 0.6745191216468811, "learning_rate": 7.552604590241342e-05, "loss": 2.3793, "step": 492590 }, { "epoch": 0.9813687364528879, "grad_norm": 0.2414899468421936, "learning_rate": 7.54221242048947e-05, "loss": 2.383, "step": 492600 }, { "epoch": 0.9813886586765268, "grad_norm": 0.2314750850200653, "learning_rate": 7.531820811854705e-05, "loss": 2.3802, "step": 492610 }, { "epoch": 0.9814085809001658, "grad_norm": 0.24952994287014008, "learning_rate": 7.521429764246212e-05, "loss": 2.3748, "step": 492620 }, { "epoch": 0.9814285031238047, "grad_norm": 0.22807204723358154, "learning_rate": 7.511039277573107e-05, "loss": 2.3802, "step": 492630 }, { "epoch": 0.9814484253474436, "grad_norm": 0.23647871613502502, "learning_rate": 7.500649351744571e-05, "loss": 2.3679, "step": 492640 }, { "epoch": 0.9814683475710825, "grad_norm": 0.26316189765930176, "learning_rate": 7.490259986669768e-05, "loss": 2.3826, "step": 492650 }, { "epoch": 0.9814882697947214, "grad_norm": 0.2729750871658325, "learning_rate": 7.479871182257924e-05, "loss": 2.3687, "step": 492660 }, { "epoch": 0.9815081920183604, "grad_norm": 0.24044671654701233, "learning_rate": 7.469482938418314e-05, "loss": 2.391, "step": 492670 }, { "epoch": 0.9815281142419993, "grad_norm": 0.23513680696487427, "learning_rate": 7.459095255060167e-05, "loss": 2.3733, "step": 492680 }, { "epoch": 0.9815480364656382, "grad_norm": 0.24197952449321747, "learning_rate": 7.448708132092774e-05, "loss": 2.3806, "step": 492690 }, { "epoch": 0.981567958689277, "grad_norm": 0.22974807024002075, "learning_rate": 7.438321569425454e-05, "loss": 2.3635, "step": 492700 }, { "epoch": 0.9815878809129159, "grad_norm": 0.24070844054222107, "learning_rate": 7.427935566967547e-05, "loss": 2.3559, "step": 492710 }, { "epoch": 0.9816078031365549, "grad_norm": 0.23206721246242523, "learning_rate": 7.417550124628436e-05, "loss": 2.3619, "step": 492720 }, { "epoch": 0.9816277253601938, "grad_norm": 0.24891862273216248, "learning_rate": 7.407165242317481e-05, "loss": 2.3708, "step": 492730 }, { "epoch": 0.9816476475838327, "grad_norm": 0.24294999241828918, "learning_rate": 7.396780919944113e-05, "loss": 2.3838, "step": 492740 }, { "epoch": 0.9816675698074716, "grad_norm": 0.242233008146286, "learning_rate": 7.38639715741778e-05, "loss": 2.3699, "step": 492750 }, { "epoch": 0.9816874920311105, "grad_norm": 0.23710393905639648, "learning_rate": 7.37601395464791e-05, "loss": 2.3736, "step": 492760 }, { "epoch": 0.9817074142547495, "grad_norm": 0.24444817006587982, "learning_rate": 7.365631311543996e-05, "loss": 2.3735, "step": 492770 }, { "epoch": 0.9817273364783884, "grad_norm": 0.23164445161819458, "learning_rate": 7.35524922801556e-05, "loss": 2.3664, "step": 492780 }, { "epoch": 0.9817472587020273, "grad_norm": 0.2432379275560379, "learning_rate": 7.344867703972136e-05, "loss": 2.3801, "step": 492790 }, { "epoch": 0.9817671809256662, "grad_norm": 0.22844934463500977, "learning_rate": 7.334486739323265e-05, "loss": 2.3885, "step": 492800 }, { "epoch": 0.9817871031493051, "grad_norm": 0.23335550725460052, "learning_rate": 7.324106333978552e-05, "loss": 2.365, "step": 492810 }, { "epoch": 0.9818070253729441, "grad_norm": 1.257739543914795, "learning_rate": 7.313726487847605e-05, "loss": 2.3766, "step": 492820 }, { "epoch": 0.981826947596583, "grad_norm": 0.2330903261899948, "learning_rate": 7.30334720084005e-05, "loss": 2.3619, "step": 492830 }, { "epoch": 0.9818468698202218, "grad_norm": 0.26238465309143066, "learning_rate": 7.292968472865536e-05, "loss": 2.3609, "step": 492840 }, { "epoch": 0.9818667920438607, "grad_norm": 0.23178179562091827, "learning_rate": 7.282590303833735e-05, "loss": 2.3679, "step": 492850 }, { "epoch": 0.9818867142674996, "grad_norm": 0.23587748408317566, "learning_rate": 7.272212693654367e-05, "loss": 2.3656, "step": 492860 }, { "epoch": 0.9819066364911386, "grad_norm": 0.23560485243797302, "learning_rate": 7.26183564223717e-05, "loss": 2.3683, "step": 492870 }, { "epoch": 0.9819265587147775, "grad_norm": 0.2149152010679245, "learning_rate": 7.251459149491879e-05, "loss": 2.3486, "step": 492880 }, { "epoch": 0.9819464809384164, "grad_norm": 0.22792574763298035, "learning_rate": 7.241083215328281e-05, "loss": 2.3639, "step": 492890 }, { "epoch": 0.9819664031620553, "grad_norm": 0.2320021241903305, "learning_rate": 7.230707839656159e-05, "loss": 2.3789, "step": 492900 }, { "epoch": 0.9819863253856943, "grad_norm": 0.23145155608654022, "learning_rate": 7.220333022385362e-05, "loss": 2.3719, "step": 492910 }, { "epoch": 0.9820062476093332, "grad_norm": 0.23758696019649506, "learning_rate": 7.209958763425739e-05, "loss": 2.3651, "step": 492920 }, { "epoch": 0.9820261698329721, "grad_norm": 0.22727756202220917, "learning_rate": 7.199585062687164e-05, "loss": 2.3753, "step": 492930 }, { "epoch": 0.982046092056611, "grad_norm": 0.25566422939300537, "learning_rate": 7.189211920079531e-05, "loss": 2.3586, "step": 492940 }, { "epoch": 0.9820660142802499, "grad_norm": 0.26260218024253845, "learning_rate": 7.178839335512755e-05, "loss": 2.3879, "step": 492950 }, { "epoch": 0.9820859365038889, "grad_norm": 0.2268095463514328, "learning_rate": 7.168467308896797e-05, "loss": 2.3623, "step": 492960 }, { "epoch": 0.9821058587275278, "grad_norm": 0.22344572842121124, "learning_rate": 7.158095840141643e-05, "loss": 2.3687, "step": 492970 }, { "epoch": 0.9821257809511666, "grad_norm": 0.24959561228752136, "learning_rate": 7.147724929157251e-05, "loss": 2.3615, "step": 492980 }, { "epoch": 0.9821457031748055, "grad_norm": 0.24263924360275269, "learning_rate": 7.137354575853649e-05, "loss": 2.3797, "step": 492990 }, { "epoch": 0.9821656253984444, "grad_norm": 0.2401660829782486, "learning_rate": 7.126984780140888e-05, "loss": 2.3679, "step": 493000 }, { "epoch": 0.9821855476220834, "grad_norm": 0.2300679087638855, "learning_rate": 7.116615541929061e-05, "loss": 2.3816, "step": 493010 }, { "epoch": 0.9822054698457223, "grad_norm": 0.23377671837806702, "learning_rate": 7.10624686112824e-05, "loss": 2.3743, "step": 493020 }, { "epoch": 0.9822253920693612, "grad_norm": 0.24484029412269592, "learning_rate": 7.09587873764852e-05, "loss": 2.3552, "step": 493030 }, { "epoch": 0.9822453142930001, "grad_norm": 0.23336859047412872, "learning_rate": 7.085511171400083e-05, "loss": 2.3618, "step": 493040 }, { "epoch": 0.982265236516639, "grad_norm": 0.23015376925468445, "learning_rate": 7.075144162293068e-05, "loss": 2.3831, "step": 493050 }, { "epoch": 0.982285158740278, "grad_norm": 0.2611418664455414, "learning_rate": 7.06477771023768e-05, "loss": 2.362, "step": 493060 }, { "epoch": 0.9823050809639169, "grad_norm": 0.24145178496837616, "learning_rate": 7.054411815144123e-05, "loss": 2.3709, "step": 493070 }, { "epoch": 0.9823250031875558, "grad_norm": 0.2501586675643921, "learning_rate": 7.044046476922628e-05, "loss": 2.3631, "step": 493080 }, { "epoch": 0.9823449254111947, "grad_norm": 0.23673753440380096, "learning_rate": 7.033681695483463e-05, "loss": 2.3699, "step": 493090 }, { "epoch": 0.9823648476348336, "grad_norm": 0.22829101979732513, "learning_rate": 7.0233174707369e-05, "loss": 2.3644, "step": 493100 }, { "epoch": 0.9823847698584726, "grad_norm": 0.2855754494667053, "learning_rate": 7.012953802593258e-05, "loss": 2.3532, "step": 493110 }, { "epoch": 0.9824046920821115, "grad_norm": 0.23910091817378998, "learning_rate": 7.002590690962896e-05, "loss": 2.3666, "step": 493120 }, { "epoch": 0.9824246143057503, "grad_norm": 0.23595695197582245, "learning_rate": 6.992228135756152e-05, "loss": 2.3658, "step": 493130 }, { "epoch": 0.9824445365293892, "grad_norm": 0.24696289002895355, "learning_rate": 6.981866136883408e-05, "loss": 2.3694, "step": 493140 }, { "epoch": 0.9824644587530281, "grad_norm": 0.2296096682548523, "learning_rate": 6.971504694255049e-05, "loss": 2.3888, "step": 493150 }, { "epoch": 0.9824843809766671, "grad_norm": 0.23728793859481812, "learning_rate": 6.961143807781545e-05, "loss": 2.3928, "step": 493160 }, { "epoch": 0.982504303200306, "grad_norm": 0.2541067898273468, "learning_rate": 6.950783477373324e-05, "loss": 2.3678, "step": 493170 }, { "epoch": 0.9825242254239449, "grad_norm": 0.2400050163269043, "learning_rate": 6.940423702940857e-05, "loss": 2.3728, "step": 493180 }, { "epoch": 0.9825441476475838, "grad_norm": 0.24044516682624817, "learning_rate": 6.930064484394683e-05, "loss": 2.3569, "step": 493190 }, { "epoch": 0.9825640698712228, "grad_norm": 0.2349081188440323, "learning_rate": 6.919705821645272e-05, "loss": 2.3811, "step": 493200 }, { "epoch": 0.9825839920948617, "grad_norm": 0.24195218086242676, "learning_rate": 6.909347714603232e-05, "loss": 2.3566, "step": 493210 }, { "epoch": 0.9826039143185006, "grad_norm": 0.23839643597602844, "learning_rate": 6.898990163179097e-05, "loss": 2.37, "step": 493220 }, { "epoch": 0.9826238365421395, "grad_norm": 0.24583756923675537, "learning_rate": 6.888633167283498e-05, "loss": 2.3651, "step": 493230 }, { "epoch": 0.9826437587657784, "grad_norm": 0.24143554270267487, "learning_rate": 6.878276726827015e-05, "loss": 2.3804, "step": 493240 }, { "epoch": 0.9826636809894174, "grad_norm": 0.2328808754682541, "learning_rate": 6.867920841720343e-05, "loss": 2.3645, "step": 493250 }, { "epoch": 0.9826836032130563, "grad_norm": 0.24150562286376953, "learning_rate": 6.857565511874109e-05, "loss": 2.3635, "step": 493260 }, { "epoch": 0.9827035254366951, "grad_norm": 0.23429125547409058, "learning_rate": 6.847210737199028e-05, "loss": 2.3737, "step": 493270 }, { "epoch": 0.982723447660334, "grad_norm": 0.256496787071228, "learning_rate": 6.836856517605816e-05, "loss": 2.369, "step": 493280 }, { "epoch": 0.9827433698839729, "grad_norm": 0.24749411642551422, "learning_rate": 6.826502853005234e-05, "loss": 2.3893, "step": 493290 }, { "epoch": 0.9827632921076119, "grad_norm": 0.23085187375545502, "learning_rate": 6.816149743307998e-05, "loss": 2.3698, "step": 493300 }, { "epoch": 0.9827832143312508, "grad_norm": 0.23206809163093567, "learning_rate": 6.805797188424934e-05, "loss": 2.3747, "step": 493310 }, { "epoch": 0.9828031365548897, "grad_norm": 0.22985364496707916, "learning_rate": 6.795445188266847e-05, "loss": 2.3614, "step": 493320 }, { "epoch": 0.9828230587785286, "grad_norm": 0.24905836582183838, "learning_rate": 6.785093742744586e-05, "loss": 2.3871, "step": 493330 }, { "epoch": 0.9828429810021675, "grad_norm": 0.22898420691490173, "learning_rate": 6.774742851768978e-05, "loss": 2.3679, "step": 493340 }, { "epoch": 0.9828629032258065, "grad_norm": 0.2438155710697174, "learning_rate": 6.764392515250962e-05, "loss": 2.3864, "step": 493350 }, { "epoch": 0.9828828254494454, "grad_norm": 0.25267690420150757, "learning_rate": 6.754042733101406e-05, "loss": 2.3723, "step": 493360 }, { "epoch": 0.9829027476730843, "grad_norm": 0.22707079350948334, "learning_rate": 6.743693505231252e-05, "loss": 2.3901, "step": 493370 }, { "epoch": 0.9829226698967232, "grad_norm": 0.23122334480285645, "learning_rate": 6.733344831551458e-05, "loss": 2.371, "step": 493380 }, { "epoch": 0.9829425921203621, "grad_norm": 0.23465590178966522, "learning_rate": 6.722996711973006e-05, "loss": 2.3692, "step": 493390 }, { "epoch": 0.9829625143440011, "grad_norm": 0.235973060131073, "learning_rate": 6.712649146406879e-05, "loss": 2.3591, "step": 493400 }, { "epoch": 0.98298243656764, "grad_norm": 0.22221951186656952, "learning_rate": 6.70230213476415e-05, "loss": 2.355, "step": 493410 }, { "epoch": 0.9830023587912788, "grad_norm": 0.243504598736763, "learning_rate": 6.691955676955841e-05, "loss": 2.3803, "step": 493420 }, { "epoch": 0.9830222810149177, "grad_norm": 0.24423521757125854, "learning_rate": 6.681609772893049e-05, "loss": 2.3595, "step": 493430 }, { "epoch": 0.9830422032385566, "grad_norm": 0.2383095920085907, "learning_rate": 6.671264422486845e-05, "loss": 2.3654, "step": 493440 }, { "epoch": 0.9830621254621956, "grad_norm": 0.24033941328525543, "learning_rate": 6.660919625648365e-05, "loss": 2.3717, "step": 493450 }, { "epoch": 0.9830820476858345, "grad_norm": 0.24462996423244476, "learning_rate": 6.65057538228877e-05, "loss": 2.383, "step": 493460 }, { "epoch": 0.9831019699094734, "grad_norm": 0.23244886100292206, "learning_rate": 6.640231692319199e-05, "loss": 2.3764, "step": 493470 }, { "epoch": 0.9831218921331123, "grad_norm": 0.22629138827323914, "learning_rate": 6.629888555650876e-05, "loss": 2.3746, "step": 493480 }, { "epoch": 0.9831418143567513, "grad_norm": 0.24345417320728302, "learning_rate": 6.619545972195007e-05, "loss": 2.3729, "step": 493490 }, { "epoch": 0.9831617365803902, "grad_norm": 0.2505212724208832, "learning_rate": 6.60920394186284e-05, "loss": 2.3719, "step": 493500 }, { "epoch": 0.9831816588040291, "grad_norm": 0.2298991084098816, "learning_rate": 6.598862464565625e-05, "loss": 2.3574, "step": 493510 }, { "epoch": 0.983201581027668, "grad_norm": 0.24436885118484497, "learning_rate": 6.588521540214676e-05, "loss": 2.3618, "step": 493520 }, { "epoch": 0.9832215032513069, "grad_norm": 0.24375496804714203, "learning_rate": 6.578181168721287e-05, "loss": 2.3724, "step": 493530 }, { "epoch": 0.9832414254749459, "grad_norm": 0.23123115301132202, "learning_rate": 6.567841349996817e-05, "loss": 2.365, "step": 493540 }, { "epoch": 0.9832613476985848, "grad_norm": 2.2475619316101074, "learning_rate": 6.557502083952605e-05, "loss": 2.3661, "step": 493550 }, { "epoch": 0.9832812699222236, "grad_norm": 0.250862717628479, "learning_rate": 6.547163370500053e-05, "loss": 2.3658, "step": 493560 }, { "epoch": 0.9833011921458625, "grad_norm": 0.26169195771217346, "learning_rate": 6.536825209550546e-05, "loss": 2.3545, "step": 493570 }, { "epoch": 0.9833211143695014, "grad_norm": 0.24903513491153717, "learning_rate": 6.526487601015529e-05, "loss": 2.3554, "step": 493580 }, { "epoch": 0.9833410365931404, "grad_norm": 0.2322860211133957, "learning_rate": 6.516150544806454e-05, "loss": 2.366, "step": 493590 }, { "epoch": 0.9833609588167793, "grad_norm": 0.22575990855693817, "learning_rate": 6.505814040834812e-05, "loss": 2.3801, "step": 493600 }, { "epoch": 0.9833808810404182, "grad_norm": 0.25125738978385925, "learning_rate": 6.495478089012097e-05, "loss": 2.3529, "step": 493610 }, { "epoch": 0.9834008032640571, "grad_norm": 0.25367918610572815, "learning_rate": 6.485142689249823e-05, "loss": 2.3816, "step": 493620 }, { "epoch": 0.983420725487696, "grad_norm": 0.22133612632751465, "learning_rate": 6.474807841459574e-05, "loss": 2.3503, "step": 493630 }, { "epoch": 0.983440647711335, "grad_norm": 0.23610900342464447, "learning_rate": 6.464473545552885e-05, "loss": 2.3615, "step": 493640 }, { "epoch": 0.9834605699349739, "grad_norm": 0.22164590656757355, "learning_rate": 6.454139801441383e-05, "loss": 2.3673, "step": 493650 }, { "epoch": 0.9834804921586128, "grad_norm": 0.25701719522476196, "learning_rate": 6.443806609036674e-05, "loss": 2.3646, "step": 493660 }, { "epoch": 0.9835004143822517, "grad_norm": 0.22553591430187225, "learning_rate": 6.43347396825038e-05, "loss": 2.3674, "step": 493670 }, { "epoch": 0.9835203366058906, "grad_norm": 0.22255730628967285, "learning_rate": 6.423141878994221e-05, "loss": 2.3771, "step": 493680 }, { "epoch": 0.9835402588295296, "grad_norm": 0.2209957391023636, "learning_rate": 6.412810341179865e-05, "loss": 2.3712, "step": 493690 }, { "epoch": 0.9835601810531684, "grad_norm": 0.2448558509349823, "learning_rate": 6.402479354719004e-05, "loss": 2.3738, "step": 493700 }, { "epoch": 0.9835801032768073, "grad_norm": 0.24427975714206696, "learning_rate": 6.392148919523399e-05, "loss": 2.3604, "step": 493710 }, { "epoch": 0.9836000255004462, "grad_norm": 0.23586587607860565, "learning_rate": 6.381819035504832e-05, "loss": 2.3595, "step": 493720 }, { "epoch": 0.9836199477240851, "grad_norm": 0.213813915848732, "learning_rate": 6.37148970257504e-05, "loss": 2.3907, "step": 493730 }, { "epoch": 0.9836398699477241, "grad_norm": 0.25248515605926514, "learning_rate": 6.36116092064587e-05, "loss": 2.3687, "step": 493740 }, { "epoch": 0.983659792171363, "grad_norm": 0.23388813436031342, "learning_rate": 6.350832689629149e-05, "loss": 2.3825, "step": 493750 }, { "epoch": 0.9836797143950019, "grad_norm": 0.2500448226928711, "learning_rate": 6.340505009436726e-05, "loss": 2.3756, "step": 493760 }, { "epoch": 0.9836996366186408, "grad_norm": 0.23487022519111633, "learning_rate": 6.330177879980492e-05, "loss": 2.3704, "step": 493770 }, { "epoch": 0.9837195588422798, "grad_norm": 0.23740307986736298, "learning_rate": 6.319851301172319e-05, "loss": 2.3829, "step": 493780 }, { "epoch": 0.9837394810659187, "grad_norm": 0.2241716980934143, "learning_rate": 6.309525272924166e-05, "loss": 2.3599, "step": 493790 }, { "epoch": 0.9837594032895576, "grad_norm": 0.24580487608909607, "learning_rate": 6.299199795147992e-05, "loss": 2.3777, "step": 493800 }, { "epoch": 0.9837793255131965, "grad_norm": 0.2508065700531006, "learning_rate": 6.288874867755712e-05, "loss": 2.3806, "step": 493810 }, { "epoch": 0.9837992477368354, "grad_norm": 0.22717159986495972, "learning_rate": 6.278550490659395e-05, "loss": 2.3748, "step": 493820 }, { "epoch": 0.9838191699604744, "grad_norm": 0.23941566050052643, "learning_rate": 6.268226663771026e-05, "loss": 2.3686, "step": 493830 }, { "epoch": 0.9838390921841133, "grad_norm": 0.23640775680541992, "learning_rate": 6.25790338700265e-05, "loss": 2.3801, "step": 493840 }, { "epoch": 0.9838590144077521, "grad_norm": 0.2455252856016159, "learning_rate": 6.247580660266339e-05, "loss": 2.3789, "step": 493850 }, { "epoch": 0.983878936631391, "grad_norm": 0.24684642255306244, "learning_rate": 6.237258483474184e-05, "loss": 2.3748, "step": 493860 }, { "epoch": 0.9838988588550299, "grad_norm": 0.22990091145038605, "learning_rate": 6.226936856538279e-05, "loss": 2.362, "step": 493870 }, { "epoch": 0.9839187810786689, "grad_norm": 0.22897551953792572, "learning_rate": 6.216615779370805e-05, "loss": 2.3639, "step": 493880 }, { "epoch": 0.9839387033023078, "grad_norm": 0.22267352044582367, "learning_rate": 6.206295251883898e-05, "loss": 2.3681, "step": 493890 }, { "epoch": 0.9839586255259467, "grad_norm": 0.23402294516563416, "learning_rate": 6.19597527398974e-05, "loss": 2.3623, "step": 493900 }, { "epoch": 0.9839785477495856, "grad_norm": 0.24235838651657104, "learning_rate": 6.185655845600536e-05, "loss": 2.3648, "step": 493910 }, { "epoch": 0.9839984699732245, "grad_norm": 0.22805951535701752, "learning_rate": 6.175336966628508e-05, "loss": 2.369, "step": 493920 }, { "epoch": 0.9840183921968635, "grad_norm": 0.2377900928258896, "learning_rate": 6.165018636985953e-05, "loss": 2.3607, "step": 493930 }, { "epoch": 0.9840383144205024, "grad_norm": 0.23515404760837555, "learning_rate": 6.154700856585117e-05, "loss": 2.3761, "step": 493940 }, { "epoch": 0.9840582366441413, "grad_norm": 0.2560063898563385, "learning_rate": 6.144383625338312e-05, "loss": 2.3647, "step": 493950 }, { "epoch": 0.9840781588677802, "grad_norm": 0.229947030544281, "learning_rate": 6.134066943157856e-05, "loss": 2.3648, "step": 493960 }, { "epoch": 0.984098081091419, "grad_norm": 0.2384105771780014, "learning_rate": 6.123750809956108e-05, "loss": 2.3588, "step": 493970 }, { "epoch": 0.984118003315058, "grad_norm": 0.2147132158279419, "learning_rate": 6.113435225645403e-05, "loss": 2.3502, "step": 493980 }, { "epoch": 0.984137925538697, "grad_norm": 0.24088384211063385, "learning_rate": 6.10312019013819e-05, "loss": 2.3525, "step": 493990 }, { "epoch": 0.9841578477623358, "grad_norm": 0.2386206090450287, "learning_rate": 6.092805703346849e-05, "loss": 2.3785, "step": 494000 }, { "epoch": 0.9841777699859747, "grad_norm": 0.23813684284687042, "learning_rate": 6.0824917651838285e-05, "loss": 2.3674, "step": 494010 }, { "epoch": 0.9841976922096136, "grad_norm": 0.24245640635490417, "learning_rate": 6.072178375561599e-05, "loss": 2.3758, "step": 494020 }, { "epoch": 0.9842176144332526, "grad_norm": 0.22264422476291656, "learning_rate": 6.061865534392652e-05, "loss": 2.3724, "step": 494030 }, { "epoch": 0.9842375366568915, "grad_norm": 0.212423175573349, "learning_rate": 6.0515532415894805e-05, "loss": 2.3454, "step": 494040 }, { "epoch": 0.9842574588805304, "grad_norm": 0.24150876700878143, "learning_rate": 6.0412414970646424e-05, "loss": 2.3658, "step": 494050 }, { "epoch": 0.9842773811041693, "grad_norm": 0.23169592022895813, "learning_rate": 6.030930300730675e-05, "loss": 2.359, "step": 494060 }, { "epoch": 0.9842973033278082, "grad_norm": 0.24196788668632507, "learning_rate": 6.020619652500181e-05, "loss": 2.3684, "step": 494070 }, { "epoch": 0.9843172255514472, "grad_norm": 0.23246164619922638, "learning_rate": 6.010309552285742e-05, "loss": 2.3576, "step": 494080 }, { "epoch": 0.9843371477750861, "grad_norm": 0.254101425409317, "learning_rate": 6.0000000000000056e-05, "loss": 2.3581, "step": 494090 }, { "epoch": 0.984357069998725, "grad_norm": 0.22818300127983093, "learning_rate": 5.9896909955555964e-05, "loss": 2.3758, "step": 494100 }, { "epoch": 0.9843769922223639, "grad_norm": 0.22181949019432068, "learning_rate": 5.979382538865208e-05, "loss": 2.3653, "step": 494110 }, { "epoch": 0.9843969144460029, "grad_norm": 0.24334119260311127, "learning_rate": 5.969074629841531e-05, "loss": 2.3617, "step": 494120 }, { "epoch": 0.9844168366696417, "grad_norm": 0.22932003438472748, "learning_rate": 5.958767268397303e-05, "loss": 2.3707, "step": 494130 }, { "epoch": 0.9844367588932806, "grad_norm": 0.22965669631958008, "learning_rate": 5.948460454445237e-05, "loss": 2.3806, "step": 494140 }, { "epoch": 0.9844566811169195, "grad_norm": 0.235983207821846, "learning_rate": 5.938154187898137e-05, "loss": 2.3659, "step": 494150 }, { "epoch": 0.9844766033405584, "grad_norm": 0.25113773345947266, "learning_rate": 5.9278484686687396e-05, "loss": 2.3736, "step": 494160 }, { "epoch": 0.9844965255641974, "grad_norm": 0.24779945611953735, "learning_rate": 5.9175432966699136e-05, "loss": 2.3826, "step": 494170 }, { "epoch": 0.9845164477878363, "grad_norm": 0.23601701855659485, "learning_rate": 5.9072386718144635e-05, "loss": 2.3929, "step": 494180 }, { "epoch": 0.9845363700114752, "grad_norm": 0.22988824546337128, "learning_rate": 5.8969345940152356e-05, "loss": 2.3773, "step": 494190 }, { "epoch": 0.9845562922351141, "grad_norm": 0.24601422250270844, "learning_rate": 5.886631063185144e-05, "loss": 2.3675, "step": 494200 }, { "epoch": 0.984576214458753, "grad_norm": 0.2328043431043625, "learning_rate": 5.876328079237081e-05, "loss": 2.3705, "step": 494210 }, { "epoch": 0.984596136682392, "grad_norm": 0.23671776056289673, "learning_rate": 5.866025642083961e-05, "loss": 2.3759, "step": 494220 }, { "epoch": 0.9846160589060309, "grad_norm": 0.23325276374816895, "learning_rate": 5.855723751638764e-05, "loss": 2.3625, "step": 494230 }, { "epoch": 0.9846359811296698, "grad_norm": 0.24300651252269745, "learning_rate": 5.8454224078144494e-05, "loss": 2.3601, "step": 494240 }, { "epoch": 0.9846559033533087, "grad_norm": 0.23720291256904602, "learning_rate": 5.835121610524019e-05, "loss": 2.3659, "step": 494250 }, { "epoch": 0.9846758255769475, "grad_norm": 0.2376936823129654, "learning_rate": 5.824821359680477e-05, "loss": 2.3738, "step": 494260 }, { "epoch": 0.9846957478005866, "grad_norm": 0.2394784837961197, "learning_rate": 5.8145216551968916e-05, "loss": 2.3809, "step": 494270 }, { "epoch": 0.9847156700242254, "grad_norm": 0.25595712661743164, "learning_rate": 5.804222496986311e-05, "loss": 2.3698, "step": 494280 }, { "epoch": 0.9847355922478643, "grad_norm": 0.2485409826040268, "learning_rate": 5.793923884961827e-05, "loss": 2.3702, "step": 494290 }, { "epoch": 0.9847555144715032, "grad_norm": 0.24520932137966156, "learning_rate": 5.7836258190365755e-05, "loss": 2.3663, "step": 494300 }, { "epoch": 0.9847754366951421, "grad_norm": 0.23345080018043518, "learning_rate": 5.773328299123648e-05, "loss": 2.3618, "step": 494310 }, { "epoch": 0.9847953589187811, "grad_norm": 0.2275351583957672, "learning_rate": 5.7630313251362474e-05, "loss": 2.3662, "step": 494320 }, { "epoch": 0.98481528114242, "grad_norm": 0.2520223557949066, "learning_rate": 5.752734896987533e-05, "loss": 2.351, "step": 494330 }, { "epoch": 0.9848352033660589, "grad_norm": 0.2339172661304474, "learning_rate": 5.742439014590728e-05, "loss": 2.3643, "step": 494340 }, { "epoch": 0.9848551255896978, "grad_norm": 0.22823581099510193, "learning_rate": 5.7321436778590586e-05, "loss": 2.3891, "step": 494350 }, { "epoch": 0.9848750478133367, "grad_norm": 0.2839323878288269, "learning_rate": 5.721848886705749e-05, "loss": 2.3671, "step": 494360 }, { "epoch": 0.9848949700369757, "grad_norm": 0.2547854483127594, "learning_rate": 5.711554641044092e-05, "loss": 2.3537, "step": 494370 }, { "epoch": 0.9849148922606146, "grad_norm": 0.2301127016544342, "learning_rate": 5.701260940787378e-05, "loss": 2.3542, "step": 494380 }, { "epoch": 0.9849348144842535, "grad_norm": 0.22539277374744415, "learning_rate": 5.6909677858489216e-05, "loss": 2.3667, "step": 494390 }, { "epoch": 0.9849547367078924, "grad_norm": 0.254623144865036, "learning_rate": 5.680675176142103e-05, "loss": 2.3719, "step": 494400 }, { "epoch": 0.9849746589315314, "grad_norm": 0.23488138616085052, "learning_rate": 5.670383111580235e-05, "loss": 2.3669, "step": 494410 }, { "epoch": 0.9849945811551702, "grad_norm": 0.2226640284061432, "learning_rate": 5.660091592076744e-05, "loss": 2.3744, "step": 494420 }, { "epoch": 0.9850145033788091, "grad_norm": 0.251138299703598, "learning_rate": 5.649800617545031e-05, "loss": 2.3613, "step": 494430 }, { "epoch": 0.985034425602448, "grad_norm": 0.22827871143817902, "learning_rate": 5.639510187898522e-05, "loss": 2.3777, "step": 494440 }, { "epoch": 0.9850543478260869, "grad_norm": 0.23990169167518616, "learning_rate": 5.629220303050686e-05, "loss": 2.3568, "step": 494450 }, { "epoch": 0.9850742700497259, "grad_norm": 0.24617443978786469, "learning_rate": 5.6189309629150146e-05, "loss": 2.3634, "step": 494460 }, { "epoch": 0.9850941922733648, "grad_norm": 0.24909575283527374, "learning_rate": 5.608642167404976e-05, "loss": 2.3685, "step": 494470 }, { "epoch": 0.9851141144970037, "grad_norm": 0.22406497597694397, "learning_rate": 5.598353916434129e-05, "loss": 2.3621, "step": 494480 }, { "epoch": 0.9851340367206426, "grad_norm": 0.24024763703346252, "learning_rate": 5.58806620991601e-05, "loss": 2.364, "step": 494490 }, { "epoch": 0.9851539589442815, "grad_norm": 0.25029852986335754, "learning_rate": 5.577779047764198e-05, "loss": 2.3607, "step": 494500 }, { "epoch": 0.9851738811679205, "grad_norm": 0.22944723069667816, "learning_rate": 5.5674924298922735e-05, "loss": 2.3571, "step": 494510 }, { "epoch": 0.9851938033915594, "grad_norm": 0.24017632007598877, "learning_rate": 5.557206356213862e-05, "loss": 2.3606, "step": 494520 }, { "epoch": 0.9852137256151983, "grad_norm": 0.24725675582885742, "learning_rate": 5.54692082664261e-05, "loss": 2.3688, "step": 494530 }, { "epoch": 0.9852336478388372, "grad_norm": 0.24827079474925995, "learning_rate": 5.536635841092163e-05, "loss": 2.3494, "step": 494540 }, { "epoch": 0.985253570062476, "grad_norm": 0.2438230961561203, "learning_rate": 5.526351399476237e-05, "loss": 2.3569, "step": 494550 }, { "epoch": 0.985273492286115, "grad_norm": 0.2389591485261917, "learning_rate": 5.516067501708522e-05, "loss": 2.3512, "step": 494560 }, { "epoch": 0.9852934145097539, "grad_norm": 0.22988377511501312, "learning_rate": 5.5057841477027304e-05, "loss": 2.3433, "step": 494570 }, { "epoch": 0.9853133367333928, "grad_norm": 0.24500900506973267, "learning_rate": 5.495501337372666e-05, "loss": 2.3694, "step": 494580 }, { "epoch": 0.9853332589570317, "grad_norm": 0.2348233163356781, "learning_rate": 5.485219070632064e-05, "loss": 2.3484, "step": 494590 }, { "epoch": 0.9853531811806706, "grad_norm": 0.23019836843013763, "learning_rate": 5.474937347394748e-05, "loss": 2.36, "step": 494600 }, { "epoch": 0.9853731034043096, "grad_norm": 0.2308075875043869, "learning_rate": 5.464656167574522e-05, "loss": 2.367, "step": 494610 }, { "epoch": 0.9853930256279485, "grad_norm": 0.2289334237575531, "learning_rate": 5.454375531085254e-05, "loss": 2.3639, "step": 494620 }, { "epoch": 0.9854129478515874, "grad_norm": 0.2382705956697464, "learning_rate": 5.44409543784079e-05, "loss": 2.3694, "step": 494630 }, { "epoch": 0.9854328700752263, "grad_norm": 0.2382131963968277, "learning_rate": 5.4338158877550446e-05, "loss": 2.3833, "step": 494640 }, { "epoch": 0.9854527922988652, "grad_norm": 0.2314022183418274, "learning_rate": 5.4235368807419085e-05, "loss": 2.3685, "step": 494650 }, { "epoch": 0.9854727145225042, "grad_norm": 0.23858919739723206, "learning_rate": 5.413258416715339e-05, "loss": 2.3688, "step": 494660 }, { "epoch": 0.9854926367461431, "grad_norm": 0.2601067125797272, "learning_rate": 5.402980495589294e-05, "loss": 2.3607, "step": 494670 }, { "epoch": 0.985512558969782, "grad_norm": 0.2449796050786972, "learning_rate": 5.392703117277753e-05, "loss": 2.3624, "step": 494680 }, { "epoch": 0.9855324811934209, "grad_norm": 0.23905552923679352, "learning_rate": 5.382426281694697e-05, "loss": 2.357, "step": 494690 }, { "epoch": 0.9855524034170599, "grad_norm": 0.23943845927715302, "learning_rate": 5.372149988754193e-05, "loss": 2.3616, "step": 494700 }, { "epoch": 0.9855723256406987, "grad_norm": 0.22552430629730225, "learning_rate": 5.361874238370246e-05, "loss": 2.3642, "step": 494710 }, { "epoch": 0.9855922478643376, "grad_norm": 0.2218589186668396, "learning_rate": 5.351599030456966e-05, "loss": 2.371, "step": 494720 }, { "epoch": 0.9856121700879765, "grad_norm": 0.22217485308647156, "learning_rate": 5.341324364928446e-05, "loss": 2.3684, "step": 494730 }, { "epoch": 0.9856320923116154, "grad_norm": 0.23276697099208832, "learning_rate": 5.331050241698798e-05, "loss": 2.3636, "step": 494740 }, { "epoch": 0.9856520145352544, "grad_norm": 0.2315138578414917, "learning_rate": 5.320776660682158e-05, "loss": 2.3812, "step": 494750 }, { "epoch": 0.9856719367588933, "grad_norm": 0.23540063202381134, "learning_rate": 5.310503621792684e-05, "loss": 2.3567, "step": 494760 }, { "epoch": 0.9856918589825322, "grad_norm": 0.23886077105998993, "learning_rate": 5.3002311249445764e-05, "loss": 2.3739, "step": 494770 }, { "epoch": 0.9857117812061711, "grad_norm": 0.24033910036087036, "learning_rate": 5.289959170052039e-05, "loss": 2.3789, "step": 494780 }, { "epoch": 0.98573170342981, "grad_norm": 0.23698444664478302, "learning_rate": 5.279687757029317e-05, "loss": 2.3541, "step": 494790 }, { "epoch": 0.985751625653449, "grad_norm": 0.22284579277038574, "learning_rate": 5.269416885790634e-05, "loss": 2.366, "step": 494800 }, { "epoch": 0.9857715478770879, "grad_norm": 0.23109520971775055, "learning_rate": 5.259146556250283e-05, "loss": 2.3777, "step": 494810 }, { "epoch": 0.9857914701007268, "grad_norm": 0.24330882728099823, "learning_rate": 5.248876768322597e-05, "loss": 2.371, "step": 494820 }, { "epoch": 0.9858113923243657, "grad_norm": 0.2277572751045227, "learning_rate": 5.238607521921846e-05, "loss": 2.36, "step": 494830 }, { "epoch": 0.9858313145480045, "grad_norm": 0.23763006925582886, "learning_rate": 5.228338816962386e-05, "loss": 2.3792, "step": 494840 }, { "epoch": 0.9858512367716435, "grad_norm": 0.23233316838741302, "learning_rate": 5.2180706533585976e-05, "loss": 2.3511, "step": 494850 }, { "epoch": 0.9858711589952824, "grad_norm": 0.23445667326450348, "learning_rate": 5.207803031024882e-05, "loss": 2.3684, "step": 494860 }, { "epoch": 0.9858910812189213, "grad_norm": 0.23819635808467865, "learning_rate": 5.19753594987562e-05, "loss": 2.3628, "step": 494870 }, { "epoch": 0.9859110034425602, "grad_norm": 0.21911950409412384, "learning_rate": 5.187269409825279e-05, "loss": 2.3772, "step": 494880 }, { "epoch": 0.9859309256661991, "grad_norm": 0.22490496933460236, "learning_rate": 5.177003410788283e-05, "loss": 2.3544, "step": 494890 }, { "epoch": 0.9859508478898381, "grad_norm": 0.25895267724990845, "learning_rate": 5.1667379526791456e-05, "loss": 2.3689, "step": 494900 }, { "epoch": 0.985970770113477, "grad_norm": 0.22880685329437256, "learning_rate": 5.156473035412335e-05, "loss": 2.3645, "step": 494910 }, { "epoch": 0.9859906923371159, "grad_norm": 0.25063636898994446, "learning_rate": 5.1462086589024074e-05, "loss": 2.3604, "step": 494920 }, { "epoch": 0.9860106145607548, "grad_norm": 0.2298067957162857, "learning_rate": 5.135944823063898e-05, "loss": 2.3626, "step": 494930 }, { "epoch": 0.9860305367843937, "grad_norm": 0.28572753071784973, "learning_rate": 5.1256815278113654e-05, "loss": 2.3729, "step": 494940 }, { "epoch": 0.9860504590080327, "grad_norm": 0.26906508207321167, "learning_rate": 5.11541877305941e-05, "loss": 2.3714, "step": 494950 }, { "epoch": 0.9860703812316716, "grad_norm": 0.23841792345046997, "learning_rate": 5.105156558722679e-05, "loss": 2.3646, "step": 494960 }, { "epoch": 0.9860903034553105, "grad_norm": 0.22907663881778717, "learning_rate": 5.09489488471575e-05, "loss": 2.3624, "step": 494970 }, { "epoch": 0.9861102256789493, "grad_norm": 0.23249906301498413, "learning_rate": 5.0846337509533384e-05, "loss": 2.3667, "step": 494980 }, { "epoch": 0.9861301479025883, "grad_norm": 0.22918301820755005, "learning_rate": 5.0743731573500874e-05, "loss": 2.3647, "step": 494990 }, { "epoch": 0.9861500701262272, "grad_norm": 0.22928079962730408, "learning_rate": 5.0641131038207336e-05, "loss": 2.3517, "step": 495000 }, { "epoch": 0.9861699923498661, "grad_norm": 0.22145293653011322, "learning_rate": 5.0538535902799665e-05, "loss": 2.3708, "step": 495010 }, { "epoch": 0.986189914573505, "grad_norm": 0.21858566999435425, "learning_rate": 5.043594616642566e-05, "loss": 2.3746, "step": 495020 }, { "epoch": 0.9862098367971439, "grad_norm": 0.23245279490947723, "learning_rate": 5.033336182823289e-05, "loss": 2.3629, "step": 495030 }, { "epoch": 0.9862297590207829, "grad_norm": 0.21433058381080627, "learning_rate": 5.0230782887369596e-05, "loss": 2.3762, "step": 495040 }, { "epoch": 0.9862496812444218, "grad_norm": 0.23568233847618103, "learning_rate": 5.0128209342983566e-05, "loss": 2.359, "step": 495050 }, { "epoch": 0.9862696034680607, "grad_norm": 0.258055180311203, "learning_rate": 5.002564119422326e-05, "loss": 2.3841, "step": 495060 }, { "epoch": 0.9862895256916996, "grad_norm": 0.22247308492660522, "learning_rate": 4.992307844023758e-05, "loss": 2.3625, "step": 495070 }, { "epoch": 0.9863094479153385, "grad_norm": 0.23542512953281403, "learning_rate": 4.982052108017499e-05, "loss": 2.3577, "step": 495080 }, { "epoch": 0.9863293701389775, "grad_norm": 0.23216529190540314, "learning_rate": 4.9717969113185044e-05, "loss": 2.3595, "step": 495090 }, { "epoch": 0.9863492923626164, "grad_norm": 0.22133749723434448, "learning_rate": 4.9615422538416445e-05, "loss": 2.3635, "step": 495100 }, { "epoch": 0.9863692145862553, "grad_norm": 0.23001696169376373, "learning_rate": 4.951288135501919e-05, "loss": 2.354, "step": 495110 }, { "epoch": 0.9863891368098942, "grad_norm": 0.23419423401355743, "learning_rate": 4.941034556214263e-05, "loss": 2.3711, "step": 495120 }, { "epoch": 0.986409059033533, "grad_norm": 0.2338441014289856, "learning_rate": 4.930781515893701e-05, "loss": 2.3701, "step": 495130 }, { "epoch": 0.986428981257172, "grad_norm": 0.239291712641716, "learning_rate": 4.9205290144552326e-05, "loss": 2.3664, "step": 495140 }, { "epoch": 0.9864489034808109, "grad_norm": 0.24208074808120728, "learning_rate": 4.9102770518139274e-05, "loss": 2.3758, "step": 495150 }, { "epoch": 0.9864688257044498, "grad_norm": 0.2374625951051712, "learning_rate": 4.90002562788483e-05, "loss": 2.3511, "step": 495160 }, { "epoch": 0.9864887479280887, "grad_norm": 0.21312294900417328, "learning_rate": 4.8897747425830086e-05, "loss": 2.3552, "step": 495170 }, { "epoch": 0.9865086701517276, "grad_norm": 0.22601956129074097, "learning_rate": 4.879524395823598e-05, "loss": 2.3619, "step": 495180 }, { "epoch": 0.9865285923753666, "grad_norm": 0.23736342787742615, "learning_rate": 4.8692745875217107e-05, "loss": 2.369, "step": 495190 }, { "epoch": 0.9865485145990055, "grad_norm": 0.7877179980278015, "learning_rate": 4.8590253175925246e-05, "loss": 2.3516, "step": 495200 }, { "epoch": 0.9865684368226444, "grad_norm": 0.25173187255859375, "learning_rate": 4.848776585951176e-05, "loss": 2.365, "step": 495210 }, { "epoch": 0.9865883590462833, "grad_norm": 0.2730611264705658, "learning_rate": 4.838528392512886e-05, "loss": 2.3779, "step": 495220 }, { "epoch": 0.9866082812699222, "grad_norm": 0.23292385041713715, "learning_rate": 4.8282807371928586e-05, "loss": 2.379, "step": 495230 }, { "epoch": 0.9866282034935612, "grad_norm": 0.22297759354114532, "learning_rate": 4.8180336199063593e-05, "loss": 2.3808, "step": 495240 }, { "epoch": 0.9866481257172001, "grad_norm": 0.23144938051700592, "learning_rate": 4.8077870405686566e-05, "loss": 2.3623, "step": 495250 }, { "epoch": 0.986668047940839, "grad_norm": 0.27442291378974915, "learning_rate": 4.7975409990949957e-05, "loss": 2.3858, "step": 495260 }, { "epoch": 0.9866879701644778, "grad_norm": 0.22049681842327118, "learning_rate": 4.787295495400712e-05, "loss": 2.3583, "step": 495270 }, { "epoch": 0.9867078923881168, "grad_norm": 0.2202196717262268, "learning_rate": 4.777050529401139e-05, "loss": 2.3672, "step": 495280 }, { "epoch": 0.9867278146117557, "grad_norm": 0.23226457834243774, "learning_rate": 4.766806101011611e-05, "loss": 2.359, "step": 495290 }, { "epoch": 0.9867477368353946, "grad_norm": 0.22990025579929352, "learning_rate": 4.756562210147508e-05, "loss": 2.3621, "step": 495300 }, { "epoch": 0.9867676590590335, "grad_norm": 0.2219182699918747, "learning_rate": 4.746318856724252e-05, "loss": 2.3797, "step": 495310 }, { "epoch": 0.9867875812826724, "grad_norm": 0.22395989298820496, "learning_rate": 4.736076040657222e-05, "loss": 2.3528, "step": 495320 }, { "epoch": 0.9868075035063114, "grad_norm": 0.2477625012397766, "learning_rate": 4.725833761861886e-05, "loss": 2.3548, "step": 495330 }, { "epoch": 0.9868274257299503, "grad_norm": 0.23388375341892242, "learning_rate": 4.7155920202536896e-05, "loss": 2.3652, "step": 495340 }, { "epoch": 0.9868473479535892, "grad_norm": 0.2371239811182022, "learning_rate": 4.705350815748144e-05, "loss": 2.3747, "step": 495350 }, { "epoch": 0.9868672701772281, "grad_norm": 0.23624174296855927, "learning_rate": 4.6951101482607614e-05, "loss": 2.3671, "step": 495360 }, { "epoch": 0.986887192400867, "grad_norm": 0.2926568388938904, "learning_rate": 4.6848700177070324e-05, "loss": 2.3646, "step": 495370 }, { "epoch": 0.986907114624506, "grad_norm": 0.24050292372703552, "learning_rate": 4.6746304240025354e-05, "loss": 2.3758, "step": 495380 }, { "epoch": 0.9869270368481449, "grad_norm": 0.21804960072040558, "learning_rate": 4.664391367062826e-05, "loss": 2.3689, "step": 495390 }, { "epoch": 0.9869469590717838, "grad_norm": 0.22570550441741943, "learning_rate": 4.6541528468035277e-05, "loss": 2.3401, "step": 495400 }, { "epoch": 0.9869668812954226, "grad_norm": 0.25084713101387024, "learning_rate": 4.643914863140242e-05, "loss": 2.3784, "step": 495410 }, { "epoch": 0.9869868035190615, "grad_norm": 0.241024911403656, "learning_rate": 4.633677415988613e-05, "loss": 2.3655, "step": 495420 }, { "epoch": 0.9870067257427005, "grad_norm": 0.24772867560386658, "learning_rate": 4.623440505264287e-05, "loss": 2.3559, "step": 495430 }, { "epoch": 0.9870266479663394, "grad_norm": 0.23701439797878265, "learning_rate": 4.613204130882998e-05, "loss": 2.3807, "step": 495440 }, { "epoch": 0.9870465701899783, "grad_norm": 0.22461126744747162, "learning_rate": 4.60296829276039e-05, "loss": 2.3718, "step": 495450 }, { "epoch": 0.9870664924136172, "grad_norm": 0.22274355590343475, "learning_rate": 4.592732990812243e-05, "loss": 2.3596, "step": 495460 }, { "epoch": 0.9870864146372561, "grad_norm": 0.21476443111896515, "learning_rate": 4.582498224954268e-05, "loss": 2.3596, "step": 495470 }, { "epoch": 0.9871063368608951, "grad_norm": 0.22837570309638977, "learning_rate": 4.572263995102266e-05, "loss": 2.3749, "step": 495480 }, { "epoch": 0.987126259084534, "grad_norm": 0.22546203434467316, "learning_rate": 4.562030301172038e-05, "loss": 2.3585, "step": 495490 }, { "epoch": 0.9871461813081729, "grad_norm": 0.2447579950094223, "learning_rate": 4.551797143079361e-05, "loss": 2.3719, "step": 495500 }, { "epoch": 0.9871661035318118, "grad_norm": 0.2288377732038498, "learning_rate": 4.541564520740127e-05, "loss": 2.3622, "step": 495510 }, { "epoch": 0.9871860257554507, "grad_norm": 0.23823368549346924, "learning_rate": 4.531332434070135e-05, "loss": 2.3714, "step": 495520 }, { "epoch": 0.9872059479790897, "grad_norm": 0.2210710346698761, "learning_rate": 4.521100882985318e-05, "loss": 2.3717, "step": 495530 }, { "epoch": 0.9872258702027286, "grad_norm": 0.22641591727733612, "learning_rate": 4.510869867401568e-05, "loss": 2.3575, "step": 495540 }, { "epoch": 0.9872457924263675, "grad_norm": 0.23221924901008606, "learning_rate": 4.500639387234817e-05, "loss": 2.364, "step": 495550 }, { "epoch": 0.9872657146500063, "grad_norm": 0.2362261563539505, "learning_rate": 4.490409442401e-05, "loss": 2.364, "step": 495560 }, { "epoch": 0.9872856368736452, "grad_norm": 0.249307781457901, "learning_rate": 4.4801800328161166e-05, "loss": 2.3798, "step": 495570 }, { "epoch": 0.9873055590972842, "grad_norm": 0.22903788089752197, "learning_rate": 4.4699511583961236e-05, "loss": 2.3546, "step": 495580 }, { "epoch": 0.9873254813209231, "grad_norm": 0.22956186532974243, "learning_rate": 4.4597228190570437e-05, "loss": 2.3626, "step": 495590 }, { "epoch": 0.987345403544562, "grad_norm": 0.21804066002368927, "learning_rate": 4.449495014714944e-05, "loss": 2.3623, "step": 495600 }, { "epoch": 0.9873653257682009, "grad_norm": 0.23220999538898468, "learning_rate": 4.4392677452858464e-05, "loss": 2.3717, "step": 495610 }, { "epoch": 0.9873852479918399, "grad_norm": 0.22845283150672913, "learning_rate": 4.4290410106858417e-05, "loss": 2.3578, "step": 495620 }, { "epoch": 0.9874051702154788, "grad_norm": 0.23537352681159973, "learning_rate": 4.41881481083104e-05, "loss": 2.3636, "step": 495630 }, { "epoch": 0.9874250924391177, "grad_norm": 0.22597286105155945, "learning_rate": 4.408589145637576e-05, "loss": 2.3702, "step": 495640 }, { "epoch": 0.9874450146627566, "grad_norm": 0.22682563960552216, "learning_rate": 4.398364015021583e-05, "loss": 2.3618, "step": 495650 }, { "epoch": 0.9874649368863955, "grad_norm": 0.22234785556793213, "learning_rate": 4.38813941889924e-05, "loss": 2.3721, "step": 495660 }, { "epoch": 0.9874848591100345, "grad_norm": 0.2254454642534256, "learning_rate": 4.3779153571867234e-05, "loss": 2.3548, "step": 495670 }, { "epoch": 0.9875047813336734, "grad_norm": 0.2213827669620514, "learning_rate": 4.367691829800258e-05, "loss": 2.3713, "step": 495680 }, { "epoch": 0.9875247035573123, "grad_norm": 0.2198779433965683, "learning_rate": 4.3574688366560645e-05, "loss": 2.3691, "step": 495690 }, { "epoch": 0.9875446257809511, "grad_norm": 0.22565972805023193, "learning_rate": 4.347246377670433e-05, "loss": 2.3613, "step": 495700 }, { "epoch": 0.98756454800459, "grad_norm": 0.2373092919588089, "learning_rate": 4.337024452759586e-05, "loss": 2.3633, "step": 495710 }, { "epoch": 0.987584470228229, "grad_norm": 0.23457883298397064, "learning_rate": 4.326803061839857e-05, "loss": 2.3749, "step": 495720 }, { "epoch": 0.9876043924518679, "grad_norm": 0.23826654255390167, "learning_rate": 4.3165822048275795e-05, "loss": 2.3713, "step": 495730 }, { "epoch": 0.9876243146755068, "grad_norm": 0.23147547245025635, "learning_rate": 4.306361881639087e-05, "loss": 2.3612, "step": 495740 }, { "epoch": 0.9876442368991457, "grad_norm": 0.22651632130146027, "learning_rate": 4.296142092190736e-05, "loss": 2.3479, "step": 495750 }, { "epoch": 0.9876641591227846, "grad_norm": 0.2242211550474167, "learning_rate": 4.285922836398903e-05, "loss": 2.3578, "step": 495760 }, { "epoch": 0.9876840813464236, "grad_norm": 0.23393172025680542, "learning_rate": 4.2757041141800345e-05, "loss": 2.3507, "step": 495770 }, { "epoch": 0.9877040035700625, "grad_norm": 0.23138445615768433, "learning_rate": 4.265485925450552e-05, "loss": 2.362, "step": 495780 }, { "epoch": 0.9877239257937014, "grad_norm": 0.2333829253911972, "learning_rate": 4.2552682701268776e-05, "loss": 2.3465, "step": 495790 }, { "epoch": 0.9877438480173403, "grad_norm": 0.220403790473938, "learning_rate": 4.2450511481255e-05, "loss": 2.356, "step": 495800 }, { "epoch": 0.9877637702409792, "grad_norm": 0.21891482174396515, "learning_rate": 4.2348345593629325e-05, "loss": 2.3761, "step": 495810 }, { "epoch": 0.9877836924646182, "grad_norm": 0.23347222805023193, "learning_rate": 4.224618503755684e-05, "loss": 2.3604, "step": 495820 }, { "epoch": 0.9878036146882571, "grad_norm": 0.22622130811214447, "learning_rate": 4.2144029812202886e-05, "loss": 2.3616, "step": 495830 }, { "epoch": 0.987823536911896, "grad_norm": 0.2298029512166977, "learning_rate": 4.204187991673325e-05, "loss": 2.3657, "step": 495840 }, { "epoch": 0.9878434591355348, "grad_norm": 0.22609898447990417, "learning_rate": 4.193973535031348e-05, "loss": 2.3641, "step": 495850 }, { "epoch": 0.9878633813591737, "grad_norm": 0.22672110795974731, "learning_rate": 4.183759611211002e-05, "loss": 2.373, "step": 495860 }, { "epoch": 0.9878833035828127, "grad_norm": 0.2290237843990326, "learning_rate": 4.173546220128888e-05, "loss": 2.3579, "step": 495870 }, { "epoch": 0.9879032258064516, "grad_norm": 0.225350022315979, "learning_rate": 4.163333361701649e-05, "loss": 2.355, "step": 495880 }, { "epoch": 0.9879231480300905, "grad_norm": 0.21901842951774597, "learning_rate": 4.153121035845975e-05, "loss": 2.346, "step": 495890 }, { "epoch": 0.9879430702537294, "grad_norm": 0.23386162519454956, "learning_rate": 4.142909242478532e-05, "loss": 2.3474, "step": 495900 }, { "epoch": 0.9879629924773684, "grad_norm": 0.25328293442726135, "learning_rate": 4.132697981516076e-05, "loss": 2.3672, "step": 495910 }, { "epoch": 0.9879829147010073, "grad_norm": 0.2326851338148117, "learning_rate": 4.1224872528752954e-05, "loss": 2.3742, "step": 495920 }, { "epoch": 0.9880028369246462, "grad_norm": 0.23455488681793213, "learning_rate": 4.112277056472991e-05, "loss": 2.3777, "step": 495930 }, { "epoch": 0.9880227591482851, "grad_norm": 0.2287091761827469, "learning_rate": 4.1020673922258944e-05, "loss": 2.3546, "step": 495940 }, { "epoch": 0.988042681371924, "grad_norm": 0.2248246818780899, "learning_rate": 4.091858260050873e-05, "loss": 2.3592, "step": 495950 }, { "epoch": 0.988062603595563, "grad_norm": 0.23172752559185028, "learning_rate": 4.081649659864684e-05, "loss": 2.3614, "step": 495960 }, { "epoch": 0.9880825258192019, "grad_norm": 0.257135808467865, "learning_rate": 4.071441591584213e-05, "loss": 2.3452, "step": 495970 }, { "epoch": 0.9881024480428408, "grad_norm": 0.24718256294727325, "learning_rate": 4.061234055126306e-05, "loss": 2.3571, "step": 495980 }, { "epoch": 0.9881223702664796, "grad_norm": 0.23521728813648224, "learning_rate": 4.051027050407852e-05, "loss": 2.3572, "step": 495990 }, { "epoch": 0.9881422924901185, "grad_norm": 0.2321195751428604, "learning_rate": 4.040820577345761e-05, "loss": 2.3626, "step": 496000 }, { "epoch": 0.9881622147137575, "grad_norm": 0.2514367699623108, "learning_rate": 4.030614635856966e-05, "loss": 2.369, "step": 496010 }, { "epoch": 0.9881821369373964, "grad_norm": 0.23869669437408447, "learning_rate": 4.020409225858423e-05, "loss": 2.3811, "step": 496020 }, { "epoch": 0.9882020591610353, "grad_norm": 0.23766183853149414, "learning_rate": 4.010204347267088e-05, "loss": 2.3559, "step": 496030 }, { "epoch": 0.9882219813846742, "grad_norm": 0.22220027446746826, "learning_rate": 4.000000000000004e-05, "loss": 2.3594, "step": 496040 }, { "epoch": 0.9882419036083131, "grad_norm": 0.2291034758090973, "learning_rate": 3.989796183974126e-05, "loss": 2.3672, "step": 496050 }, { "epoch": 0.9882618258319521, "grad_norm": 0.22371865808963776, "learning_rate": 3.979592899106543e-05, "loss": 2.366, "step": 496060 }, { "epoch": 0.988281748055591, "grad_norm": 0.22397750616073608, "learning_rate": 3.9693901453143e-05, "loss": 2.3563, "step": 496070 }, { "epoch": 0.9883016702792299, "grad_norm": 0.39126360416412354, "learning_rate": 3.959187922514462e-05, "loss": 2.3632, "step": 496080 }, { "epoch": 0.9883215925028688, "grad_norm": 0.2366257607936859, "learning_rate": 3.948986230624141e-05, "loss": 2.3765, "step": 496090 }, { "epoch": 0.9883415147265077, "grad_norm": 0.2221282422542572, "learning_rate": 3.938785069560491e-05, "loss": 2.3722, "step": 496100 }, { "epoch": 0.9883614369501467, "grad_norm": 0.23258748650550842, "learning_rate": 3.928584439240623e-05, "loss": 2.3651, "step": 496110 }, { "epoch": 0.9883813591737856, "grad_norm": 0.232407808303833, "learning_rate": 3.918384339581693e-05, "loss": 2.3897, "step": 496120 }, { "epoch": 0.9884012813974244, "grad_norm": 0.22789114713668823, "learning_rate": 3.908184770500945e-05, "loss": 2.353, "step": 496130 }, { "epoch": 0.9884212036210633, "grad_norm": 0.23393888771533966, "learning_rate": 3.897985731915532e-05, "loss": 2.3604, "step": 496140 }, { "epoch": 0.9884411258447022, "grad_norm": 0.21903358399868011, "learning_rate": 3.887787223742745e-05, "loss": 2.3672, "step": 496150 }, { "epoch": 0.9884610480683412, "grad_norm": 0.24582751095294952, "learning_rate": 3.877589245899804e-05, "loss": 2.3741, "step": 496160 }, { "epoch": 0.9884809702919801, "grad_norm": 0.22391964495182037, "learning_rate": 3.867391798303976e-05, "loss": 2.377, "step": 496170 }, { "epoch": 0.988500892515619, "grad_norm": 0.22359608113765717, "learning_rate": 3.857194880872572e-05, "loss": 2.3573, "step": 496180 }, { "epoch": 0.9885208147392579, "grad_norm": 0.22796905040740967, "learning_rate": 3.846998493522924e-05, "loss": 2.3725, "step": 496190 }, { "epoch": 0.9885407369628969, "grad_norm": 0.2158452570438385, "learning_rate": 3.836802636172343e-05, "loss": 2.3621, "step": 496200 }, { "epoch": 0.9885606591865358, "grad_norm": 0.22659114003181458, "learning_rate": 3.826607308738228e-05, "loss": 2.3465, "step": 496210 }, { "epoch": 0.9885805814101747, "grad_norm": 0.24545101821422577, "learning_rate": 3.8164125111379125e-05, "loss": 2.3638, "step": 496220 }, { "epoch": 0.9886005036338136, "grad_norm": 0.23156026005744934, "learning_rate": 3.8062182432888616e-05, "loss": 2.3664, "step": 496230 }, { "epoch": 0.9886204258574525, "grad_norm": 0.22722230851650238, "learning_rate": 3.796024505108453e-05, "loss": 2.3497, "step": 496240 }, { "epoch": 0.9886403480810915, "grad_norm": 0.21891936659812927, "learning_rate": 3.785831296514153e-05, "loss": 2.3674, "step": 496250 }, { "epoch": 0.9886602703047304, "grad_norm": 0.222175732254982, "learning_rate": 3.775638617423449e-05, "loss": 2.3658, "step": 496260 }, { "epoch": 0.9886801925283693, "grad_norm": 0.2258857935667038, "learning_rate": 3.765446467753808e-05, "loss": 2.3563, "step": 496270 }, { "epoch": 0.9887001147520081, "grad_norm": 0.232057586312294, "learning_rate": 3.75525484742274e-05, "loss": 2.3526, "step": 496280 }, { "epoch": 0.988720036975647, "grad_norm": 0.2321612685918808, "learning_rate": 3.745063756347778e-05, "loss": 2.364, "step": 496290 }, { "epoch": 0.988739959199286, "grad_norm": 0.2303396761417389, "learning_rate": 3.7348731944464974e-05, "loss": 2.3567, "step": 496300 }, { "epoch": 0.9887598814229249, "grad_norm": 0.2323225736618042, "learning_rate": 3.724683161636455e-05, "loss": 2.3545, "step": 496310 }, { "epoch": 0.9887798036465638, "grad_norm": 0.21564850211143494, "learning_rate": 3.714493657835249e-05, "loss": 2.355, "step": 496320 }, { "epoch": 0.9887997258702027, "grad_norm": 0.29403263330459595, "learning_rate": 3.704304682960502e-05, "loss": 2.3676, "step": 496330 }, { "epoch": 0.9888196480938416, "grad_norm": 0.22577881813049316, "learning_rate": 3.694116236929878e-05, "loss": 2.3698, "step": 496340 }, { "epoch": 0.9888395703174806, "grad_norm": 0.22377243638038635, "learning_rate": 3.6839283196610006e-05, "loss": 2.3679, "step": 496350 }, { "epoch": 0.9888594925411195, "grad_norm": 0.2351633459329605, "learning_rate": 3.6737409310715784e-05, "loss": 2.3691, "step": 496360 }, { "epoch": 0.9888794147647584, "grad_norm": 0.22451847791671753, "learning_rate": 3.6635540710793005e-05, "loss": 2.3605, "step": 496370 }, { "epoch": 0.9888993369883973, "grad_norm": 0.23408788442611694, "learning_rate": 3.6533677396019205e-05, "loss": 2.3681, "step": 496380 }, { "epoch": 0.9889192592120362, "grad_norm": 0.21799518167972565, "learning_rate": 3.6431819365571496e-05, "loss": 2.3745, "step": 496390 }, { "epoch": 0.9889391814356752, "grad_norm": 0.22974282503128052, "learning_rate": 3.632996661862764e-05, "loss": 2.366, "step": 496400 }, { "epoch": 0.988959103659314, "grad_norm": 0.22957481443881989, "learning_rate": 3.622811915436586e-05, "loss": 2.3596, "step": 496410 }, { "epoch": 0.988979025882953, "grad_norm": 0.22864562273025513, "learning_rate": 3.612627697196391e-05, "loss": 2.3723, "step": 496420 }, { "epoch": 0.9889989481065918, "grad_norm": 0.2470616102218628, "learning_rate": 3.6024440070600016e-05, "loss": 2.3651, "step": 496430 }, { "epoch": 0.9890188703302307, "grad_norm": 0.22687818109989166, "learning_rate": 3.592260844945305e-05, "loss": 2.3596, "step": 496440 }, { "epoch": 0.9890387925538697, "grad_norm": 0.2344551384449005, "learning_rate": 3.58207821077019e-05, "loss": 2.3565, "step": 496450 }, { "epoch": 0.9890587147775086, "grad_norm": 0.22493532299995422, "learning_rate": 3.5718961044524986e-05, "loss": 2.3609, "step": 496460 }, { "epoch": 0.9890786370011475, "grad_norm": 0.23088152706623077, "learning_rate": 3.561714525910209e-05, "loss": 2.3826, "step": 496470 }, { "epoch": 0.9890985592247864, "grad_norm": 0.22916775941848755, "learning_rate": 3.5515334750612085e-05, "loss": 2.3622, "step": 496480 }, { "epoch": 0.9891184814484254, "grad_norm": 0.23878884315490723, "learning_rate": 3.5413529518234735e-05, "loss": 2.3574, "step": 496490 }, { "epoch": 0.9891384036720643, "grad_norm": 0.23801492154598236, "learning_rate": 3.531172956115003e-05, "loss": 2.365, "step": 496500 }, { "epoch": 0.9891583258957032, "grad_norm": 0.22639364004135132, "learning_rate": 3.5209934878537744e-05, "loss": 2.368, "step": 496510 }, { "epoch": 0.9891782481193421, "grad_norm": 0.2449168860912323, "learning_rate": 3.51081454695783e-05, "loss": 2.3692, "step": 496520 }, { "epoch": 0.989198170342981, "grad_norm": 0.25880369544029236, "learning_rate": 3.500636133345192e-05, "loss": 2.3591, "step": 496530 }, { "epoch": 0.98921809256662, "grad_norm": 0.2314663678407669, "learning_rate": 3.49045824693397e-05, "loss": 2.3686, "step": 496540 }, { "epoch": 0.9892380147902589, "grad_norm": 0.23956407606601715, "learning_rate": 3.480280887642206e-05, "loss": 2.3576, "step": 496550 }, { "epoch": 0.9892579370138977, "grad_norm": 0.21956947445869446, "learning_rate": 3.470104055388057e-05, "loss": 2.3576, "step": 496560 }, { "epoch": 0.9892778592375366, "grad_norm": 0.22277584671974182, "learning_rate": 3.459927750089609e-05, "loss": 2.367, "step": 496570 }, { "epoch": 0.9892977814611755, "grad_norm": 0.2225135862827301, "learning_rate": 3.449751971665016e-05, "loss": 2.3667, "step": 496580 }, { "epoch": 0.9893177036848145, "grad_norm": 0.22447721660137177, "learning_rate": 3.439576720032478e-05, "loss": 2.3634, "step": 496590 }, { "epoch": 0.9893376259084534, "grad_norm": 0.2370062917470932, "learning_rate": 3.4294019951101705e-05, "loss": 2.3708, "step": 496600 }, { "epoch": 0.9893575481320923, "grad_norm": 0.24184100329875946, "learning_rate": 3.4192277968163156e-05, "loss": 2.3666, "step": 496610 }, { "epoch": 0.9893774703557312, "grad_norm": 0.21777787804603577, "learning_rate": 3.4090541250691334e-05, "loss": 2.3693, "step": 496620 }, { "epoch": 0.9893973925793701, "grad_norm": 0.22654466331005096, "learning_rate": 3.398880979786889e-05, "loss": 2.3668, "step": 496630 }, { "epoch": 0.9894173148030091, "grad_norm": 0.22698579728603363, "learning_rate": 3.388708360887871e-05, "loss": 2.3533, "step": 496640 }, { "epoch": 0.989437237026648, "grad_norm": 0.22953179478645325, "learning_rate": 3.378536268290389e-05, "loss": 2.3551, "step": 496650 }, { "epoch": 0.9894571592502869, "grad_norm": 0.21769435703754425, "learning_rate": 3.36836470191273e-05, "loss": 2.3672, "step": 496660 }, { "epoch": 0.9894770814739258, "grad_norm": 0.22232267260551453, "learning_rate": 3.358193661673248e-05, "loss": 2.3676, "step": 496670 }, { "epoch": 0.9894970036975647, "grad_norm": 0.21763987839221954, "learning_rate": 3.34802314749032e-05, "loss": 2.353, "step": 496680 }, { "epoch": 0.9895169259212037, "grad_norm": 0.23110082745552063, "learning_rate": 3.3378531592823225e-05, "loss": 2.3653, "step": 496690 }, { "epoch": 0.9895368481448426, "grad_norm": 0.24298615753650665, "learning_rate": 3.327683696967654e-05, "loss": 2.3484, "step": 496700 }, { "epoch": 0.9895567703684814, "grad_norm": 0.23017345368862152, "learning_rate": 3.317514760464757e-05, "loss": 2.3659, "step": 496710 }, { "epoch": 0.9895766925921203, "grad_norm": 0.23624548316001892, "learning_rate": 3.307346349692053e-05, "loss": 2.355, "step": 496720 }, { "epoch": 0.9895966148157592, "grad_norm": 0.2387436479330063, "learning_rate": 3.297178464568029e-05, "loss": 2.361, "step": 496730 }, { "epoch": 0.9896165370393982, "grad_norm": 0.2389344871044159, "learning_rate": 3.287011105011173e-05, "loss": 2.3583, "step": 496740 }, { "epoch": 0.9896364592630371, "grad_norm": 0.22021374106407166, "learning_rate": 3.276844270939971e-05, "loss": 2.3558, "step": 496750 }, { "epoch": 0.989656381486676, "grad_norm": 0.22834603488445282, "learning_rate": 3.2666779622730017e-05, "loss": 2.3702, "step": 496760 }, { "epoch": 0.9896763037103149, "grad_norm": 0.23168911039829254, "learning_rate": 3.2565121789287725e-05, "loss": 2.345, "step": 496770 }, { "epoch": 0.9896962259339539, "grad_norm": 0.2175946682691574, "learning_rate": 3.2463469208258826e-05, "loss": 2.3609, "step": 496780 }, { "epoch": 0.9897161481575928, "grad_norm": 0.231510728597641, "learning_rate": 3.23618218788293e-05, "loss": 2.3787, "step": 496790 }, { "epoch": 0.9897360703812317, "grad_norm": 0.22869844734668732, "learning_rate": 3.226017980018514e-05, "loss": 2.3488, "step": 496800 }, { "epoch": 0.9897559926048706, "grad_norm": 0.23538857698440552, "learning_rate": 3.215854297151277e-05, "loss": 2.3663, "step": 496810 }, { "epoch": 0.9897759148285095, "grad_norm": 0.2536877393722534, "learning_rate": 3.205691139199862e-05, "loss": 2.3528, "step": 496820 }, { "epoch": 0.9897958370521485, "grad_norm": 0.221329465508461, "learning_rate": 3.1955285060829564e-05, "loss": 2.3631, "step": 496830 }, { "epoch": 0.9898157592757874, "grad_norm": 0.2345385104417801, "learning_rate": 3.185366397719292e-05, "loss": 2.3414, "step": 496840 }, { "epoch": 0.9898356814994262, "grad_norm": 0.2803681194782257, "learning_rate": 3.175204814027555e-05, "loss": 2.3482, "step": 496850 }, { "epoch": 0.9898556037230651, "grad_norm": 0.21769745647907257, "learning_rate": 3.1650437549265e-05, "loss": 2.3723, "step": 496860 }, { "epoch": 0.989875525946704, "grad_norm": 0.2277243584394455, "learning_rate": 3.154883220334881e-05, "loss": 2.3555, "step": 496870 }, { "epoch": 0.989895448170343, "grad_norm": 0.22914759814739227, "learning_rate": 3.144723210171474e-05, "loss": 2.3636, "step": 496880 }, { "epoch": 0.9899153703939819, "grad_norm": 0.2169717699289322, "learning_rate": 3.1345637243550997e-05, "loss": 2.3661, "step": 496890 }, { "epoch": 0.9899352926176208, "grad_norm": 0.22738564014434814, "learning_rate": 3.1244047628046e-05, "loss": 2.3831, "step": 496900 }, { "epoch": 0.9899552148412597, "grad_norm": 0.2269999086856842, "learning_rate": 3.1142463254387746e-05, "loss": 2.3537, "step": 496910 }, { "epoch": 0.9899751370648986, "grad_norm": 0.22885237634181976, "learning_rate": 3.104088412176531e-05, "loss": 2.3559, "step": 496920 }, { "epoch": 0.9899950592885376, "grad_norm": 0.22370502352714539, "learning_rate": 3.0939310229367135e-05, "loss": 2.362, "step": 496930 }, { "epoch": 0.9900149815121765, "grad_norm": 0.23779296875, "learning_rate": 3.083774157638297e-05, "loss": 2.3593, "step": 496940 }, { "epoch": 0.9900349037358154, "grad_norm": 0.23337271809577942, "learning_rate": 3.0736178162001474e-05, "loss": 2.3663, "step": 496950 }, { "epoch": 0.9900548259594543, "grad_norm": 0.22141964733600616, "learning_rate": 3.0634619985412617e-05, "loss": 2.364, "step": 496960 }, { "epoch": 0.9900747481830932, "grad_norm": 0.22188445925712585, "learning_rate": 3.053306704580594e-05, "loss": 2.3576, "step": 496970 }, { "epoch": 0.9900946704067322, "grad_norm": 0.22201499342918396, "learning_rate": 3.0431519342371205e-05, "loss": 2.3641, "step": 496980 }, { "epoch": 0.990114592630371, "grad_norm": 0.23249615728855133, "learning_rate": 3.0329976874298837e-05, "loss": 2.3527, "step": 496990 }, { "epoch": 0.9901345148540099, "grad_norm": 0.2182525396347046, "learning_rate": 3.022843964077904e-05, "loss": 2.3518, "step": 497000 }, { "epoch": 0.9901544370776488, "grad_norm": 0.2101294994354248, "learning_rate": 3.0126907641002456e-05, "loss": 2.3589, "step": 497010 }, { "epoch": 0.9901743593012877, "grad_norm": 0.24021777510643005, "learning_rate": 3.0025380874159737e-05, "loss": 2.361, "step": 497020 }, { "epoch": 0.9901942815249267, "grad_norm": 0.2194831818342209, "learning_rate": 2.992385933944153e-05, "loss": 2.3654, "step": 497030 }, { "epoch": 0.9902142037485656, "grad_norm": 0.21886080503463745, "learning_rate": 2.982234303603981e-05, "loss": 2.3599, "step": 497040 }, { "epoch": 0.9902341259722045, "grad_norm": 0.2169434279203415, "learning_rate": 2.9720831963145457e-05, "loss": 2.3506, "step": 497050 }, { "epoch": 0.9902540481958434, "grad_norm": 0.22274263203144073, "learning_rate": 2.9619326119949996e-05, "loss": 2.3638, "step": 497060 }, { "epoch": 0.9902739704194823, "grad_norm": 0.2266196757555008, "learning_rate": 2.9517825505645414e-05, "loss": 2.3716, "step": 497070 }, { "epoch": 0.9902938926431213, "grad_norm": 0.22443550825119019, "learning_rate": 2.9416330119423684e-05, "loss": 2.3759, "step": 497080 }, { "epoch": 0.9903138148667602, "grad_norm": 0.2333250790834427, "learning_rate": 2.9314839960477014e-05, "loss": 2.3659, "step": 497090 }, { "epoch": 0.9903337370903991, "grad_norm": 0.23216775059700012, "learning_rate": 2.9213355027998045e-05, "loss": 2.3578, "step": 497100 }, { "epoch": 0.990353659314038, "grad_norm": 0.2267492115497589, "learning_rate": 2.9111875321178983e-05, "loss": 2.3694, "step": 497110 }, { "epoch": 0.990373581537677, "grad_norm": 0.2316361516714096, "learning_rate": 2.9010400839212915e-05, "loss": 2.3596, "step": 497120 }, { "epoch": 0.9903935037613159, "grad_norm": 0.2294822782278061, "learning_rate": 2.890893158129293e-05, "loss": 2.3636, "step": 497130 }, { "epoch": 0.9904134259849547, "grad_norm": 0.23758544027805328, "learning_rate": 2.880746754661234e-05, "loss": 2.3559, "step": 497140 }, { "epoch": 0.9904333482085936, "grad_norm": 0.222225621342659, "learning_rate": 2.870600873436424e-05, "loss": 2.3669, "step": 497150 }, { "epoch": 0.9904532704322325, "grad_norm": 0.21596741676330566, "learning_rate": 2.8604555143742827e-05, "loss": 2.3536, "step": 497160 }, { "epoch": 0.9904731926558715, "grad_norm": 0.2239551544189453, "learning_rate": 2.8503106773941635e-05, "loss": 2.3661, "step": 497170 }, { "epoch": 0.9904931148795104, "grad_norm": 0.2214469015598297, "learning_rate": 2.840166362415486e-05, "loss": 2.3616, "step": 497180 }, { "epoch": 0.9905130371031493, "grad_norm": 0.21648982167243958, "learning_rate": 2.8300225693576932e-05, "loss": 2.3551, "step": 497190 }, { "epoch": 0.9905329593267882, "grad_norm": 0.22053028643131256, "learning_rate": 2.8198792981402042e-05, "loss": 2.3513, "step": 497200 }, { "epoch": 0.9905528815504271, "grad_norm": 0.2164963036775589, "learning_rate": 2.8097365486825065e-05, "loss": 2.362, "step": 497210 }, { "epoch": 0.9905728037740661, "grad_norm": 0.20990069210529327, "learning_rate": 2.7995943209041086e-05, "loss": 2.3554, "step": 497220 }, { "epoch": 0.990592725997705, "grad_norm": 0.22335892915725708, "learning_rate": 2.7894526147244748e-05, "loss": 2.3512, "step": 497230 }, { "epoch": 0.9906126482213439, "grad_norm": 0.23499734699726105, "learning_rate": 2.779311430063203e-05, "loss": 2.3664, "step": 497240 }, { "epoch": 0.9906325704449828, "grad_norm": 0.21533726155757904, "learning_rate": 2.7691707668398015e-05, "loss": 2.3671, "step": 497250 }, { "epoch": 0.9906524926686217, "grad_norm": 0.22276650369167328, "learning_rate": 2.759030624973846e-05, "loss": 2.3554, "step": 497260 }, { "epoch": 0.9906724148922607, "grad_norm": 0.22295814752578735, "learning_rate": 2.7488910043849792e-05, "loss": 2.3772, "step": 497270 }, { "epoch": 0.9906923371158995, "grad_norm": 0.22461098432540894, "learning_rate": 2.7387519049927534e-05, "loss": 2.3692, "step": 497280 }, { "epoch": 0.9907122593395384, "grad_norm": 0.22930069267749786, "learning_rate": 2.7286133267168333e-05, "loss": 2.3614, "step": 497290 }, { "epoch": 0.9907321815631773, "grad_norm": 0.24123241007328033, "learning_rate": 2.7184752694769054e-05, "loss": 2.3573, "step": 497300 }, { "epoch": 0.9907521037868162, "grad_norm": 0.22183901071548462, "learning_rate": 2.7083377331925896e-05, "loss": 2.3579, "step": 497310 }, { "epoch": 0.9907720260104552, "grad_norm": 0.22987452149391174, "learning_rate": 2.6982007177836166e-05, "loss": 2.3596, "step": 497320 }, { "epoch": 0.9907919482340941, "grad_norm": 0.21547354757785797, "learning_rate": 2.6880642231697172e-05, "loss": 2.3555, "step": 497330 }, { "epoch": 0.990811870457733, "grad_norm": 0.23783138394355774, "learning_rate": 2.6779282492706005e-05, "loss": 2.3632, "step": 497340 }, { "epoch": 0.9908317926813719, "grad_norm": 0.22414767742156982, "learning_rate": 2.6677927960060412e-05, "loss": 2.3523, "step": 497350 }, { "epoch": 0.9908517149050108, "grad_norm": 0.22146861255168915, "learning_rate": 2.657657863295837e-05, "loss": 2.3402, "step": 497360 }, { "epoch": 0.9908716371286498, "grad_norm": 0.22835928201675415, "learning_rate": 2.6475234510597857e-05, "loss": 2.3567, "step": 497370 }, { "epoch": 0.9908915593522887, "grad_norm": 0.22005298733711243, "learning_rate": 2.6373895592176845e-05, "loss": 2.3699, "step": 497380 }, { "epoch": 0.9909114815759276, "grad_norm": 0.2181505560874939, "learning_rate": 2.6272561876893752e-05, "loss": 2.3678, "step": 497390 }, { "epoch": 0.9909314037995665, "grad_norm": 0.22209469974040985, "learning_rate": 2.6171233363947667e-05, "loss": 2.3651, "step": 497400 }, { "epoch": 0.9909513260232055, "grad_norm": 0.22051523625850677, "learning_rate": 2.606991005253678e-05, "loss": 2.367, "step": 497410 }, { "epoch": 0.9909712482468443, "grad_norm": 0.22265294194221497, "learning_rate": 2.5968591941860853e-05, "loss": 2.3575, "step": 497420 }, { "epoch": 0.9909911704704832, "grad_norm": 0.21730439364910126, "learning_rate": 2.5867279031118294e-05, "loss": 2.3428, "step": 497430 }, { "epoch": 0.9910110926941221, "grad_norm": 0.21449598670005798, "learning_rate": 2.5765971319509306e-05, "loss": 2.358, "step": 497440 }, { "epoch": 0.991031014917761, "grad_norm": 0.22268334031105042, "learning_rate": 2.5664668806233414e-05, "loss": 2.3745, "step": 497450 }, { "epoch": 0.9910509371414, "grad_norm": 0.2282249480485916, "learning_rate": 2.556337149049015e-05, "loss": 2.3552, "step": 497460 }, { "epoch": 0.9910708593650389, "grad_norm": 0.24785469472408295, "learning_rate": 2.546207937147993e-05, "loss": 2.3684, "step": 497470 }, { "epoch": 0.9910907815886778, "grad_norm": 0.22135718166828156, "learning_rate": 2.5360792448402726e-05, "loss": 2.374, "step": 497480 }, { "epoch": 0.9911107038123167, "grad_norm": 0.26598024368286133, "learning_rate": 2.525951072045918e-05, "loss": 2.3502, "step": 497490 }, { "epoch": 0.9911306260359556, "grad_norm": 0.248317152261734, "learning_rate": 2.5158234186850148e-05, "loss": 2.3612, "step": 497500 }, { "epoch": 0.9911505482595946, "grad_norm": 0.20863758027553558, "learning_rate": 2.5056962846776276e-05, "loss": 2.3604, "step": 497510 }, { "epoch": 0.9911704704832335, "grad_norm": 0.22700557112693787, "learning_rate": 2.4955696699438647e-05, "loss": 2.3581, "step": 497520 }, { "epoch": 0.9911903927068724, "grad_norm": 0.2623431980609894, "learning_rate": 2.4854435744038785e-05, "loss": 2.3808, "step": 497530 }, { "epoch": 0.9912103149305113, "grad_norm": 0.23778066039085388, "learning_rate": 2.4753179979777774e-05, "loss": 2.3808, "step": 497540 }, { "epoch": 0.9912302371541502, "grad_norm": 0.23322847485542297, "learning_rate": 2.4651929405857808e-05, "loss": 2.3568, "step": 497550 }, { "epoch": 0.9912501593777892, "grad_norm": 0.22157692909240723, "learning_rate": 2.4550684021480642e-05, "loss": 2.3562, "step": 497560 }, { "epoch": 0.991270081601428, "grad_norm": 0.21603135764598846, "learning_rate": 2.4449443825848238e-05, "loss": 2.3618, "step": 497570 }, { "epoch": 0.9912900038250669, "grad_norm": 0.23142601549625397, "learning_rate": 2.4348208818163242e-05, "loss": 2.3556, "step": 497580 }, { "epoch": 0.9913099260487058, "grad_norm": 0.21560776233673096, "learning_rate": 2.4246978997627843e-05, "loss": 2.3518, "step": 497590 }, { "epoch": 0.9913298482723447, "grad_norm": 0.21774064004421234, "learning_rate": 2.4145754363444906e-05, "loss": 2.3546, "step": 497600 }, { "epoch": 0.9913497704959837, "grad_norm": 0.25214049220085144, "learning_rate": 2.4044534914817507e-05, "loss": 2.3522, "step": 497610 }, { "epoch": 0.9913696927196226, "grad_norm": 0.2290845513343811, "learning_rate": 2.394332065094851e-05, "loss": 2.3581, "step": 497620 }, { "epoch": 0.9913896149432615, "grad_norm": 0.23041099309921265, "learning_rate": 2.3842111571041658e-05, "loss": 2.3575, "step": 497630 }, { "epoch": 0.9914095371669004, "grad_norm": 0.24070607125759125, "learning_rate": 2.3740907674299816e-05, "loss": 2.3723, "step": 497640 }, { "epoch": 0.9914294593905393, "grad_norm": 0.22727183997631073, "learning_rate": 2.3639708959927398e-05, "loss": 2.3679, "step": 497650 }, { "epoch": 0.9914493816141783, "grad_norm": 0.21611392498016357, "learning_rate": 2.353851542712837e-05, "loss": 2.3627, "step": 497660 }, { "epoch": 0.9914693038378172, "grad_norm": 0.21638977527618408, "learning_rate": 2.3437327075106263e-05, "loss": 2.3562, "step": 497670 }, { "epoch": 0.9914892260614561, "grad_norm": 0.20882374048233032, "learning_rate": 2.3336143903066155e-05, "loss": 2.3495, "step": 497680 }, { "epoch": 0.991509148285095, "grad_norm": 0.23936094343662262, "learning_rate": 2.3234965910212235e-05, "loss": 2.3688, "step": 497690 }, { "epoch": 0.991529070508734, "grad_norm": 0.22556239366531372, "learning_rate": 2.3133793095749146e-05, "loss": 2.3615, "step": 497700 }, { "epoch": 0.9915489927323728, "grad_norm": 0.21503931283950806, "learning_rate": 2.3032625458882405e-05, "loss": 2.359, "step": 497710 }, { "epoch": 0.9915689149560117, "grad_norm": 0.2228376716375351, "learning_rate": 2.2931462998816655e-05, "loss": 2.3613, "step": 497720 }, { "epoch": 0.9915888371796506, "grad_norm": 0.2701131999492645, "learning_rate": 2.2830305714757417e-05, "loss": 2.3754, "step": 497730 }, { "epoch": 0.9916087594032895, "grad_norm": 0.22115714848041534, "learning_rate": 2.2729153605910213e-05, "loss": 2.3742, "step": 497740 }, { "epoch": 0.9916286816269285, "grad_norm": 0.21756190061569214, "learning_rate": 2.2628006671481016e-05, "loss": 2.3677, "step": 497750 }, { "epoch": 0.9916486038505674, "grad_norm": 0.21457089483737946, "learning_rate": 2.2526864910676016e-05, "loss": 2.3513, "step": 497760 }, { "epoch": 0.9916685260742063, "grad_norm": 0.22511880099773407, "learning_rate": 2.2425728322700957e-05, "loss": 2.3655, "step": 497770 }, { "epoch": 0.9916884482978452, "grad_norm": 0.22252877056598663, "learning_rate": 2.232459690676225e-05, "loss": 2.3514, "step": 497780 }, { "epoch": 0.9917083705214841, "grad_norm": 0.22922103106975555, "learning_rate": 2.222347066206698e-05, "loss": 2.3516, "step": 497790 }, { "epoch": 0.9917282927451231, "grad_norm": 0.22135844826698303, "learning_rate": 2.2122349587821554e-05, "loss": 2.345, "step": 497800 }, { "epoch": 0.991748214968762, "grad_norm": 0.21716448664665222, "learning_rate": 2.2021233683232834e-05, "loss": 2.3647, "step": 497810 }, { "epoch": 0.9917681371924009, "grad_norm": 0.22585421800613403, "learning_rate": 2.192012294750856e-05, "loss": 2.3555, "step": 497820 }, { "epoch": 0.9917880594160398, "grad_norm": 0.2383575737476349, "learning_rate": 2.1819017379855587e-05, "loss": 2.3557, "step": 497830 }, { "epoch": 0.9918079816396786, "grad_norm": 0.22106501460075378, "learning_rate": 2.1717916979481888e-05, "loss": 2.3479, "step": 497840 }, { "epoch": 0.9918279038633177, "grad_norm": 0.21734638512134552, "learning_rate": 2.1616821745595206e-05, "loss": 2.3631, "step": 497850 }, { "epoch": 0.9918478260869565, "grad_norm": 0.22217468917369843, "learning_rate": 2.1515731677403506e-05, "loss": 2.3556, "step": 497860 }, { "epoch": 0.9918677483105954, "grad_norm": 0.21734178066253662, "learning_rate": 2.1414646774114977e-05, "loss": 2.3598, "step": 497870 }, { "epoch": 0.9918876705342343, "grad_norm": 0.22367942333221436, "learning_rate": 2.1313567034938254e-05, "loss": 2.3471, "step": 497880 }, { "epoch": 0.9919075927578732, "grad_norm": 0.23260857164859772, "learning_rate": 2.1212492459081746e-05, "loss": 2.3636, "step": 497890 }, { "epoch": 0.9919275149815122, "grad_norm": 0.23582018911838531, "learning_rate": 2.1111423045754306e-05, "loss": 2.3514, "step": 497900 }, { "epoch": 0.9919474372051511, "grad_norm": 0.21949368715286255, "learning_rate": 2.101035879416502e-05, "loss": 2.3505, "step": 497910 }, { "epoch": 0.99196735942879, "grad_norm": 0.21612581610679626, "learning_rate": 2.0909299703522954e-05, "loss": 2.3548, "step": 497920 }, { "epoch": 0.9919872816524289, "grad_norm": 0.22426903247833252, "learning_rate": 2.0808245773037858e-05, "loss": 2.3583, "step": 497930 }, { "epoch": 0.9920072038760678, "grad_norm": 0.21632322669029236, "learning_rate": 2.070719700191903e-05, "loss": 2.3658, "step": 497940 }, { "epoch": 0.9920271260997068, "grad_norm": 0.2196090817451477, "learning_rate": 2.060615338937666e-05, "loss": 2.352, "step": 497950 }, { "epoch": 0.9920470483233457, "grad_norm": 0.22415883839130402, "learning_rate": 2.0505114934620484e-05, "loss": 2.3553, "step": 497960 }, { "epoch": 0.9920669705469846, "grad_norm": 0.22532658278942108, "learning_rate": 2.0404081636860917e-05, "loss": 2.3477, "step": 497970 }, { "epoch": 0.9920868927706235, "grad_norm": 0.22545978426933289, "learning_rate": 2.0303053495308364e-05, "loss": 2.3582, "step": 497980 }, { "epoch": 0.9921068149942625, "grad_norm": 0.22147533297538757, "learning_rate": 2.0202030509173463e-05, "loss": 2.3494, "step": 497990 }, { "epoch": 0.9921267372179013, "grad_norm": 0.2210381031036377, "learning_rate": 2.010101267766684e-05, "loss": 2.36, "step": 498000 }, { "epoch": 0.9921466594415402, "grad_norm": 0.2258332520723343, "learning_rate": 2.000000000000002e-05, "loss": 2.3579, "step": 498010 }, { "epoch": 0.9921665816651791, "grad_norm": 0.22359400987625122, "learning_rate": 1.9898992475383847e-05, "loss": 2.3711, "step": 498020 }, { "epoch": 0.992186503888818, "grad_norm": 0.22375275194644928, "learning_rate": 1.979799010303007e-05, "loss": 2.3565, "step": 498030 }, { "epoch": 0.992206426112457, "grad_norm": 0.21985533833503723, "learning_rate": 1.9696992882149767e-05, "loss": 2.3371, "step": 498040 }, { "epoch": 0.9922263483360959, "grad_norm": 0.21407771110534668, "learning_rate": 1.959600081195556e-05, "loss": 2.3607, "step": 498050 }, { "epoch": 0.9922462705597348, "grad_norm": 0.22396570444107056, "learning_rate": 1.9495013891658974e-05, "loss": 2.3646, "step": 498060 }, { "epoch": 0.9922661927833737, "grad_norm": 0.22779661417007446, "learning_rate": 1.9394032120472417e-05, "loss": 2.3582, "step": 498070 }, { "epoch": 0.9922861150070126, "grad_norm": 0.2186116874217987, "learning_rate": 1.9293055497608513e-05, "loss": 2.3451, "step": 498080 }, { "epoch": 0.9923060372306516, "grad_norm": 0.21711647510528564, "learning_rate": 1.9192084022279675e-05, "loss": 2.349, "step": 498090 }, { "epoch": 0.9923259594542905, "grad_norm": 0.3914143443107605, "learning_rate": 1.9091117693698758e-05, "loss": 2.3657, "step": 498100 }, { "epoch": 0.9923458816779294, "grad_norm": 0.21763187646865845, "learning_rate": 1.8990156511078826e-05, "loss": 2.3482, "step": 498110 }, { "epoch": 0.9923658039015683, "grad_norm": 0.22298434376716614, "learning_rate": 1.8889200473633406e-05, "loss": 2.3589, "step": 498120 }, { "epoch": 0.9923857261252071, "grad_norm": 0.24476683139801025, "learning_rate": 1.8788249580575567e-05, "loss": 2.3574, "step": 498130 }, { "epoch": 0.9924056483488461, "grad_norm": 0.2312743365764618, "learning_rate": 1.8687303831119053e-05, "loss": 2.3522, "step": 498140 }, { "epoch": 0.992425570572485, "grad_norm": 0.21633034944534302, "learning_rate": 1.8586363224478043e-05, "loss": 2.3482, "step": 498150 }, { "epoch": 0.9924454927961239, "grad_norm": 0.2307272106409073, "learning_rate": 1.848542775986628e-05, "loss": 2.3498, "step": 498160 }, { "epoch": 0.9924654150197628, "grad_norm": 0.21577373147010803, "learning_rate": 1.838449743649817e-05, "loss": 2.3614, "step": 498170 }, { "epoch": 0.9924853372434017, "grad_norm": 0.22027038037776947, "learning_rate": 1.828357225358812e-05, "loss": 2.3525, "step": 498180 }, { "epoch": 0.9925052594670407, "grad_norm": 0.2239212989807129, "learning_rate": 1.818265221035076e-05, "loss": 2.374, "step": 498190 }, { "epoch": 0.9925251816906796, "grad_norm": 0.2265864461660385, "learning_rate": 1.8081737306000713e-05, "loss": 2.3697, "step": 498200 }, { "epoch": 0.9925451039143185, "grad_norm": 0.20780497789382935, "learning_rate": 1.798082753975372e-05, "loss": 2.3617, "step": 498210 }, { "epoch": 0.9925650261379574, "grad_norm": 0.2163214236497879, "learning_rate": 1.7879922910824186e-05, "loss": 2.3597, "step": 498220 }, { "epoch": 0.9925849483615963, "grad_norm": 0.2168918251991272, "learning_rate": 1.7779023418428296e-05, "loss": 2.3599, "step": 498230 }, { "epoch": 0.9926048705852353, "grad_norm": 0.2150905430316925, "learning_rate": 1.767812906178112e-05, "loss": 2.3447, "step": 498240 }, { "epoch": 0.9926247928088742, "grad_norm": 0.21191872656345367, "learning_rate": 1.757723984009907e-05, "loss": 2.3608, "step": 498250 }, { "epoch": 0.9926447150325131, "grad_norm": 0.21685650944709778, "learning_rate": 1.7476355752597872e-05, "loss": 2.3503, "step": 498260 }, { "epoch": 0.992664637256152, "grad_norm": 0.21792548894882202, "learning_rate": 1.737547679849372e-05, "loss": 2.3631, "step": 498270 }, { "epoch": 0.992684559479791, "grad_norm": 0.23803836107254028, "learning_rate": 1.727460297700323e-05, "loss": 2.3506, "step": 498280 }, { "epoch": 0.9927044817034298, "grad_norm": 0.23482289910316467, "learning_rate": 1.7173734287343033e-05, "loss": 2.3688, "step": 498290 }, { "epoch": 0.9927244039270687, "grad_norm": 0.23352587223052979, "learning_rate": 1.7072870728729985e-05, "loss": 2.3655, "step": 498300 }, { "epoch": 0.9927443261507076, "grad_norm": 0.22767499089241028, "learning_rate": 1.6972012300381145e-05, "loss": 2.3532, "step": 498310 }, { "epoch": 0.9927642483743465, "grad_norm": 0.21786484122276306, "learning_rate": 1.6871159001513594e-05, "loss": 2.3444, "step": 498320 }, { "epoch": 0.9927841705979855, "grad_norm": 0.22565773129463196, "learning_rate": 1.677031083134506e-05, "loss": 2.3605, "step": 498330 }, { "epoch": 0.9928040928216244, "grad_norm": 0.22160175442695618, "learning_rate": 1.666946778909284e-05, "loss": 2.3728, "step": 498340 }, { "epoch": 0.9928240150452633, "grad_norm": 0.2255462408065796, "learning_rate": 1.6568629873975116e-05, "loss": 2.3793, "step": 498350 }, { "epoch": 0.9928439372689022, "grad_norm": 0.22141797840595245, "learning_rate": 1.6467797085209844e-05, "loss": 2.3591, "step": 498360 }, { "epoch": 0.9928638594925411, "grad_norm": 0.21547159552574158, "learning_rate": 1.636696942201521e-05, "loss": 2.3471, "step": 498370 }, { "epoch": 0.9928837817161801, "grad_norm": 0.21259434521198273, "learning_rate": 1.6266146883609835e-05, "loss": 2.3487, "step": 498380 }, { "epoch": 0.992903703939819, "grad_norm": 0.21576233208179474, "learning_rate": 1.616532946921212e-05, "loss": 2.3673, "step": 498390 }, { "epoch": 0.9929236261634579, "grad_norm": 0.20890557765960693, "learning_rate": 1.6064517178040916e-05, "loss": 2.3589, "step": 498400 }, { "epoch": 0.9929435483870968, "grad_norm": 0.20460225641727448, "learning_rate": 1.5963710009315514e-05, "loss": 2.3542, "step": 498410 }, { "epoch": 0.9929634706107356, "grad_norm": 0.22083862125873566, "learning_rate": 1.5862907962254757e-05, "loss": 2.3712, "step": 498420 }, { "epoch": 0.9929833928343746, "grad_norm": 0.21688058972358704, "learning_rate": 1.5762111036078385e-05, "loss": 2.3502, "step": 498430 }, { "epoch": 0.9930033150580135, "grad_norm": 0.21389923989772797, "learning_rate": 1.566131923000591e-05, "loss": 2.3681, "step": 498440 }, { "epoch": 0.9930232372816524, "grad_norm": 0.21327191591262817, "learning_rate": 1.5560532543257512e-05, "loss": 2.3622, "step": 498450 }, { "epoch": 0.9930431595052913, "grad_norm": 0.22338160872459412, "learning_rate": 1.54597509750527e-05, "loss": 2.3467, "step": 498460 }, { "epoch": 0.9930630817289302, "grad_norm": 0.2104458063840866, "learning_rate": 1.535897452461188e-05, "loss": 2.3568, "step": 498470 }, { "epoch": 0.9930830039525692, "grad_norm": 0.21889419853687286, "learning_rate": 1.5258203191155672e-05, "loss": 2.3526, "step": 498480 }, { "epoch": 0.9931029261762081, "grad_norm": 0.2096276581287384, "learning_rate": 1.5157436973904481e-05, "loss": 2.3512, "step": 498490 }, { "epoch": 0.993122848399847, "grad_norm": 0.21343261003494263, "learning_rate": 1.505667587207915e-05, "loss": 2.3615, "step": 498500 }, { "epoch": 0.9931427706234859, "grad_norm": 0.2086000144481659, "learning_rate": 1.495591988490097e-05, "loss": 2.3651, "step": 498510 }, { "epoch": 0.9931626928471248, "grad_norm": 0.2215135544538498, "learning_rate": 1.4855169011590785e-05, "loss": 2.3782, "step": 498520 }, { "epoch": 0.9931826150707638, "grad_norm": 0.23106005787849426, "learning_rate": 1.4754423251370109e-05, "loss": 2.3438, "step": 498530 }, { "epoch": 0.9932025372944027, "grad_norm": 0.4606436789035797, "learning_rate": 1.4653682603460672e-05, "loss": 2.3473, "step": 498540 }, { "epoch": 0.9932224595180416, "grad_norm": 0.24081356823444366, "learning_rate": 1.4552947067084211e-05, "loss": 2.3575, "step": 498550 }, { "epoch": 0.9932423817416804, "grad_norm": 0.20981328189373016, "learning_rate": 1.44522166414629e-05, "loss": 2.3533, "step": 498560 }, { "epoch": 0.9932623039653194, "grad_norm": 0.21780388057231903, "learning_rate": 1.43514913258187e-05, "loss": 2.3558, "step": 498570 }, { "epoch": 0.9932822261889583, "grad_norm": 0.22177621722221375, "learning_rate": 1.4250771119374228e-05, "loss": 2.3554, "step": 498580 }, { "epoch": 0.9933021484125972, "grad_norm": 0.21360059082508087, "learning_rate": 1.4150056021351886e-05, "loss": 2.3561, "step": 498590 }, { "epoch": 0.9933220706362361, "grad_norm": 0.24508275091648102, "learning_rate": 1.404934603097452e-05, "loss": 2.3594, "step": 498600 }, { "epoch": 0.993341992859875, "grad_norm": 0.21862447261810303, "learning_rate": 1.3948641147465191e-05, "loss": 2.3626, "step": 498610 }, { "epoch": 0.993361915083514, "grad_norm": 0.5672198534011841, "learning_rate": 1.384794137004719e-05, "loss": 2.3592, "step": 498620 }, { "epoch": 0.9933818373071529, "grad_norm": 0.21314366161823273, "learning_rate": 1.3747246697943583e-05, "loss": 2.3606, "step": 498630 }, { "epoch": 0.9934017595307918, "grad_norm": 0.2253543734550476, "learning_rate": 1.3646557130378101e-05, "loss": 2.3704, "step": 498640 }, { "epoch": 0.9934216817544307, "grad_norm": 0.21987715363502502, "learning_rate": 1.3545872666574699e-05, "loss": 2.374, "step": 498650 }, { "epoch": 0.9934416039780696, "grad_norm": 0.21699711680412292, "learning_rate": 1.344519330575733e-05, "loss": 2.3572, "step": 498660 }, { "epoch": 0.9934615262017086, "grad_norm": 0.21899081766605377, "learning_rate": 1.334451904714995e-05, "loss": 2.3524, "step": 498670 }, { "epoch": 0.9934814484253475, "grad_norm": 0.2194470316171646, "learning_rate": 1.3243849889976956e-05, "loss": 2.3604, "step": 498680 }, { "epoch": 0.9935013706489864, "grad_norm": 0.21507495641708374, "learning_rate": 1.3143185833463188e-05, "loss": 2.3473, "step": 498690 }, { "epoch": 0.9935212928726253, "grad_norm": 0.2155853807926178, "learning_rate": 1.3042526876833271e-05, "loss": 2.3413, "step": 498700 }, { "epoch": 0.9935412150962641, "grad_norm": 0.21806243062019348, "learning_rate": 1.2941873019312045e-05, "loss": 2.3501, "step": 498710 }, { "epoch": 0.9935611373199031, "grad_norm": 0.21466021239757538, "learning_rate": 1.2841224260124795e-05, "loss": 2.3616, "step": 498720 }, { "epoch": 0.993581059543542, "grad_norm": 0.21262513101100922, "learning_rate": 1.2740580598496809e-05, "loss": 2.3509, "step": 498730 }, { "epoch": 0.9936009817671809, "grad_norm": 0.2153940200805664, "learning_rate": 1.2639942033653373e-05, "loss": 2.3605, "step": 498740 }, { "epoch": 0.9936209039908198, "grad_norm": 0.21366550028324127, "learning_rate": 1.2539308564820884e-05, "loss": 2.3421, "step": 498750 }, { "epoch": 0.9936408262144587, "grad_norm": 0.21829424798488617, "learning_rate": 1.2438680191224849e-05, "loss": 2.3527, "step": 498760 }, { "epoch": 0.9936607484380977, "grad_norm": 0.21368686854839325, "learning_rate": 1.2338056912091445e-05, "loss": 2.3495, "step": 498770 }, { "epoch": 0.9936806706617366, "grad_norm": 0.22476992011070251, "learning_rate": 1.2237438726647066e-05, "loss": 2.3638, "step": 498780 }, { "epoch": 0.9937005928853755, "grad_norm": 0.2083667367696762, "learning_rate": 1.2136825634118331e-05, "loss": 2.3614, "step": 498790 }, { "epoch": 0.9937205151090144, "grad_norm": 0.21459181606769562, "learning_rate": 1.2036217633731638e-05, "loss": 2.3535, "step": 498800 }, { "epoch": 0.9937404373326533, "grad_norm": 0.21642203629016876, "learning_rate": 1.1935614724714273e-05, "loss": 2.3615, "step": 498810 }, { "epoch": 0.9937603595562923, "grad_norm": 0.20698390901088715, "learning_rate": 1.1835016906293073e-05, "loss": 2.3485, "step": 498820 }, { "epoch": 0.9937802817799312, "grad_norm": 0.21500951051712036, "learning_rate": 1.1734424177695547e-05, "loss": 2.3368, "step": 498830 }, { "epoch": 0.99380020400357, "grad_norm": 0.21450269222259521, "learning_rate": 1.16338365381492e-05, "loss": 2.3557, "step": 498840 }, { "epoch": 0.9938201262272089, "grad_norm": 0.2107662856578827, "learning_rate": 1.1533253986881543e-05, "loss": 2.3696, "step": 498850 }, { "epoch": 0.9938400484508478, "grad_norm": 0.20538786053657532, "learning_rate": 1.1432676523120522e-05, "loss": 2.3403, "step": 498860 }, { "epoch": 0.9938599706744868, "grad_norm": 0.2154727727174759, "learning_rate": 1.1332104146094536e-05, "loss": 2.3499, "step": 498870 }, { "epoch": 0.9938798928981257, "grad_norm": 0.21566326916217804, "learning_rate": 1.1231536855031533e-05, "loss": 2.3595, "step": 498880 }, { "epoch": 0.9938998151217646, "grad_norm": 0.2240685671567917, "learning_rate": 1.1130974649160131e-05, "loss": 2.3483, "step": 498890 }, { "epoch": 0.9939197373454035, "grad_norm": 0.21161586046218872, "learning_rate": 1.1030417527708946e-05, "loss": 2.3771, "step": 498900 }, { "epoch": 0.9939396595690425, "grad_norm": 0.21638984978199005, "learning_rate": 1.092986548990682e-05, "loss": 2.3783, "step": 498910 }, { "epoch": 0.9939595817926814, "grad_norm": 0.2132142335176468, "learning_rate": 1.0829318534983035e-05, "loss": 2.3622, "step": 498920 }, { "epoch": 0.9939795040163203, "grad_norm": 0.2191867232322693, "learning_rate": 1.072877666216665e-05, "loss": 2.3454, "step": 498930 }, { "epoch": 0.9939994262399592, "grad_norm": 0.2101505547761917, "learning_rate": 1.0628239870687173e-05, "loss": 2.3406, "step": 498940 }, { "epoch": 0.9940193484635981, "grad_norm": 0.2085966318845749, "learning_rate": 1.052770815977433e-05, "loss": 2.3519, "step": 498950 }, { "epoch": 0.9940392706872371, "grad_norm": 0.21591606736183167, "learning_rate": 1.0427181528657847e-05, "loss": 2.3418, "step": 498960 }, { "epoch": 0.994059192910876, "grad_norm": 0.2064594030380249, "learning_rate": 1.0326659976567899e-05, "loss": 2.37, "step": 498970 }, { "epoch": 0.9940791151345149, "grad_norm": 0.21556879580020905, "learning_rate": 1.0226143502734654e-05, "loss": 2.3615, "step": 498980 }, { "epoch": 0.9940990373581537, "grad_norm": 0.2180403172969818, "learning_rate": 1.0125632106388505e-05, "loss": 2.3577, "step": 498990 }, { "epoch": 0.9941189595817926, "grad_norm": 0.2090252786874771, "learning_rate": 1.002512578676007e-05, "loss": 2.3473, "step": 499000 }, { "epoch": 0.9941388818054316, "grad_norm": 0.4040455222129822, "learning_rate": 9.924624543080408e-06, "loss": 2.3692, "step": 499010 }, { "epoch": 0.9941588040290705, "grad_norm": 0.21297481656074524, "learning_rate": 9.824128374580132e-06, "loss": 2.3623, "step": 499020 }, { "epoch": 0.9941787262527094, "grad_norm": 0.21603751182556152, "learning_rate": 9.72363728049075e-06, "loss": 2.3461, "step": 499030 }, { "epoch": 0.9941986484763483, "grad_norm": 0.21984311938285828, "learning_rate": 9.623151260043539e-06, "loss": 2.3448, "step": 499040 }, { "epoch": 0.9942185706999872, "grad_norm": 0.21760603785514832, "learning_rate": 9.522670312470006e-06, "loss": 2.3462, "step": 499050 }, { "epoch": 0.9942384929236262, "grad_norm": 0.20491866767406464, "learning_rate": 9.422194437002097e-06, "loss": 2.3554, "step": 499060 }, { "epoch": 0.9942584151472651, "grad_norm": 0.2188839167356491, "learning_rate": 9.321723632871982e-06, "loss": 2.3513, "step": 499070 }, { "epoch": 0.994278337370904, "grad_norm": 0.21806548535823822, "learning_rate": 9.221257899311385e-06, "loss": 2.3644, "step": 499080 }, { "epoch": 0.9942982595945429, "grad_norm": 3.7807390689849854, "learning_rate": 9.120797235552925e-06, "loss": 2.3482, "step": 499090 }, { "epoch": 0.9943181818181818, "grad_norm": 0.227929025888443, "learning_rate": 9.020341640829432e-06, "loss": 2.3736, "step": 499100 }, { "epoch": 0.9943381040418208, "grad_norm": 0.22413446009159088, "learning_rate": 8.91989111437308e-06, "loss": 2.3498, "step": 499110 }, { "epoch": 0.9943580262654597, "grad_norm": 0.2066362500190735, "learning_rate": 8.819445655417146e-06, "loss": 2.3647, "step": 499120 }, { "epoch": 0.9943779484890986, "grad_norm": 0.20915347337722778, "learning_rate": 8.719005263194912e-06, "loss": 2.3524, "step": 499130 }, { "epoch": 0.9943978707127374, "grad_norm": 0.213627889752388, "learning_rate": 8.618569936939436e-06, "loss": 2.3537, "step": 499140 }, { "epoch": 0.9944177929363763, "grad_norm": 0.21842609345912933, "learning_rate": 8.51813967588444e-06, "loss": 2.3565, "step": 499150 }, { "epoch": 0.9944377151600153, "grad_norm": 0.21478378772735596, "learning_rate": 8.41771447926365e-06, "loss": 2.3505, "step": 499160 }, { "epoch": 0.9944576373836542, "grad_norm": 0.2120763659477234, "learning_rate": 8.317294346310789e-06, "loss": 2.339, "step": 499170 }, { "epoch": 0.9944775596072931, "grad_norm": 0.21941789984703064, "learning_rate": 8.216879276260248e-06, "loss": 2.3492, "step": 499180 }, { "epoch": 0.994497481830932, "grad_norm": 0.21322548389434814, "learning_rate": 8.116469268345971e-06, "loss": 2.3602, "step": 499190 }, { "epoch": 0.994517404054571, "grad_norm": 0.21101446449756622, "learning_rate": 8.016064321802797e-06, "loss": 2.3587, "step": 499200 }, { "epoch": 0.9945373262782099, "grad_norm": 0.20852035284042358, "learning_rate": 7.915664435865332e-06, "loss": 2.3565, "step": 499210 }, { "epoch": 0.9945572485018488, "grad_norm": 0.20719631016254425, "learning_rate": 7.815269609768417e-06, "loss": 2.3652, "step": 499220 }, { "epoch": 0.9945771707254877, "grad_norm": 0.21285727620124817, "learning_rate": 7.714879842747102e-06, "loss": 2.3511, "step": 499230 }, { "epoch": 0.9945970929491266, "grad_norm": 0.20349545776844025, "learning_rate": 7.61449513403667e-06, "loss": 2.355, "step": 499240 }, { "epoch": 0.9946170151727656, "grad_norm": 0.21284377574920654, "learning_rate": 7.514115482872397e-06, "loss": 2.3602, "step": 499250 }, { "epoch": 0.9946369373964045, "grad_norm": 0.21059070527553558, "learning_rate": 7.41374088849045e-06, "loss": 2.3517, "step": 499260 }, { "epoch": 0.9946568596200434, "grad_norm": 0.2134198099374771, "learning_rate": 7.313371350126108e-06, "loss": 2.3448, "step": 499270 }, { "epoch": 0.9946767818436822, "grad_norm": 0.2193852812051773, "learning_rate": 7.213006867015981e-06, "loss": 2.3664, "step": 499280 }, { "epoch": 0.9946967040673211, "grad_norm": 0.2080325484275818, "learning_rate": 7.112647438395792e-06, "loss": 2.3579, "step": 499290 }, { "epoch": 0.9947166262909601, "grad_norm": 0.2220756560564041, "learning_rate": 7.012293063502151e-06, "loss": 2.3592, "step": 499300 }, { "epoch": 0.994736548514599, "grad_norm": 0.2151905745267868, "learning_rate": 6.911943741571669e-06, "loss": 2.368, "step": 499310 }, { "epoch": 0.9947564707382379, "grad_norm": 0.22363457083702087, "learning_rate": 6.811599471841179e-06, "loss": 2.3619, "step": 499320 }, { "epoch": 0.9947763929618768, "grad_norm": 0.21380117535591125, "learning_rate": 6.711260253547735e-06, "loss": 2.3583, "step": 499330 }, { "epoch": 0.9947963151855157, "grad_norm": 0.20036622881889343, "learning_rate": 6.610926085928392e-06, "loss": 2.3609, "step": 499340 }, { "epoch": 0.9948162374091547, "grad_norm": 0.21253672242164612, "learning_rate": 6.51059696822065e-06, "loss": 2.3462, "step": 499350 }, { "epoch": 0.9948361596327936, "grad_norm": 0.20703071355819702, "learning_rate": 6.4102728996620065e-06, "loss": 2.3448, "step": 499360 }, { "epoch": 0.9948560818564325, "grad_norm": 0.2107241004705429, "learning_rate": 6.309953879490404e-06, "loss": 2.3474, "step": 499370 }, { "epoch": 0.9948760040800714, "grad_norm": 0.20949004590511322, "learning_rate": 6.2096399069435646e-06, "loss": 2.351, "step": 499380 }, { "epoch": 0.9948959263037103, "grad_norm": 0.21904465556144714, "learning_rate": 6.1093309812596534e-06, "loss": 2.3652, "step": 499390 }, { "epoch": 0.9949158485273493, "grad_norm": 0.20609652996063232, "learning_rate": 6.009027101677278e-06, "loss": 2.3485, "step": 499400 }, { "epoch": 0.9949357707509882, "grad_norm": 0.2075912207365036, "learning_rate": 5.908728267434604e-06, "loss": 2.3556, "step": 499410 }, { "epoch": 0.994955692974627, "grad_norm": 0.21206702291965485, "learning_rate": 5.808434477770685e-06, "loss": 2.371, "step": 499420 }, { "epoch": 0.9949756151982659, "grad_norm": 0.20942308008670807, "learning_rate": 5.708145731924352e-06, "loss": 2.3546, "step": 499430 }, { "epoch": 0.9949955374219048, "grad_norm": 0.20913052558898926, "learning_rate": 5.607862029134436e-06, "loss": 2.3466, "step": 499440 }, { "epoch": 0.9950154596455438, "grad_norm": 0.21282947063446045, "learning_rate": 5.507583368640656e-06, "loss": 2.3498, "step": 499450 }, { "epoch": 0.9950353818691827, "grad_norm": 0.2070603221654892, "learning_rate": 5.407309749682288e-06, "loss": 2.3701, "step": 499460 }, { "epoch": 0.9950553040928216, "grad_norm": 0.2081710547208786, "learning_rate": 5.3070411714990495e-06, "loss": 2.3713, "step": 499470 }, { "epoch": 0.9950752263164605, "grad_norm": 0.2100197672843933, "learning_rate": 5.206777633330662e-06, "loss": 2.3498, "step": 499480 }, { "epoch": 0.9950951485400995, "grad_norm": 0.21089842915534973, "learning_rate": 5.106519134417509e-06, "loss": 2.3475, "step": 499490 }, { "epoch": 0.9951150707637384, "grad_norm": 0.2069600373506546, "learning_rate": 5.006265673999755e-06, "loss": 2.3575, "step": 499500 }, { "epoch": 0.9951349929873773, "grad_norm": 0.21530795097351074, "learning_rate": 4.906017251317563e-06, "loss": 2.3475, "step": 499510 }, { "epoch": 0.9951549152110162, "grad_norm": 0.21470794081687927, "learning_rate": 4.805773865611984e-06, "loss": 2.3555, "step": 499520 }, { "epoch": 0.9951748374346551, "grad_norm": 0.21427495777606964, "learning_rate": 4.705535516123405e-06, "loss": 2.3625, "step": 499530 }, { "epoch": 0.9951947596582941, "grad_norm": 0.21277491748332977, "learning_rate": 4.605302202093098e-06, "loss": 2.3645, "step": 499540 }, { "epoch": 0.995214681881933, "grad_norm": 0.21674667298793793, "learning_rate": 4.505073922762337e-06, "loss": 2.3643, "step": 499550 }, { "epoch": 0.9952346041055719, "grad_norm": 0.2165653109550476, "learning_rate": 4.404850677372396e-06, "loss": 2.3491, "step": 499560 }, { "epoch": 0.9952545263292107, "grad_norm": 0.2103971391916275, "learning_rate": 4.3046324651649925e-06, "loss": 2.3539, "step": 499570 }, { "epoch": 0.9952744485528496, "grad_norm": 0.20669634640216827, "learning_rate": 4.2044192853818445e-06, "loss": 2.3415, "step": 499580 }, { "epoch": 0.9952943707764886, "grad_norm": 0.21472716331481934, "learning_rate": 4.104211137264891e-06, "loss": 2.3621, "step": 499590 }, { "epoch": 0.9953142930001275, "grad_norm": 0.21703684329986572, "learning_rate": 4.004008020056071e-06, "loss": 2.3646, "step": 499600 }, { "epoch": 0.9953342152237664, "grad_norm": 0.20852917432785034, "learning_rate": 3.903809932998215e-06, "loss": 2.3392, "step": 499610 }, { "epoch": 0.9953541374474053, "grad_norm": 0.20939011871814728, "learning_rate": 3.803616875333704e-06, "loss": 2.3524, "step": 499620 }, { "epoch": 0.9953740596710442, "grad_norm": 0.2096579670906067, "learning_rate": 3.7034288463049236e-06, "loss": 2.3789, "step": 499630 }, { "epoch": 0.9953939818946832, "grad_norm": 0.20857855677604675, "learning_rate": 3.6032458451551454e-06, "loss": 2.3559, "step": 499640 }, { "epoch": 0.9954139041183221, "grad_norm": 0.2125541865825653, "learning_rate": 3.5030678711274187e-06, "loss": 2.3458, "step": 499650 }, { "epoch": 0.995433826341961, "grad_norm": 0.20835569500923157, "learning_rate": 3.4028949234650164e-06, "loss": 2.3609, "step": 499660 }, { "epoch": 0.9954537485655999, "grad_norm": 0.20769841969013214, "learning_rate": 3.302727001411432e-06, "loss": 2.3631, "step": 499670 }, { "epoch": 0.9954736707892388, "grad_norm": 0.202268585562706, "learning_rate": 3.2025641042103817e-06, "loss": 2.3649, "step": 499680 }, { "epoch": 0.9954935930128778, "grad_norm": 0.20682434737682343, "learning_rate": 3.1024062311058033e-06, "loss": 2.346, "step": 499690 }, { "epoch": 0.9955135152365167, "grad_norm": 0.20079518854618073, "learning_rate": 3.0022533813414134e-06, "loss": 2.3379, "step": 499700 }, { "epoch": 0.9955334374601555, "grad_norm": 0.20557382702827454, "learning_rate": 2.9021055541618156e-06, "loss": 2.3577, "step": 499710 }, { "epoch": 0.9955533596837944, "grad_norm": 0.21007031202316284, "learning_rate": 2.801962748811393e-06, "loss": 2.3401, "step": 499720 }, { "epoch": 0.9955732819074333, "grad_norm": 0.2069600522518158, "learning_rate": 2.7018249645347493e-06, "loss": 2.3545, "step": 499730 }, { "epoch": 0.9955932041310723, "grad_norm": 3.7017147541046143, "learning_rate": 2.601692200576711e-06, "loss": 2.3382, "step": 499740 }, { "epoch": 0.9956131263547112, "grad_norm": 0.21109391748905182, "learning_rate": 2.5015644561821038e-06, "loss": 2.3601, "step": 499750 }, { "epoch": 0.9956330485783501, "grad_norm": 0.2027348130941391, "learning_rate": 2.401441730596421e-06, "loss": 2.3459, "step": 499760 }, { "epoch": 0.995652970801989, "grad_norm": 0.20082062482833862, "learning_rate": 2.3013240230647102e-06, "loss": 2.3439, "step": 499770 }, { "epoch": 0.995672893025628, "grad_norm": 0.2025667428970337, "learning_rate": 2.2012113328329085e-06, "loss": 2.3599, "step": 499780 }, { "epoch": 0.9956928152492669, "grad_norm": 0.20509551465511322, "learning_rate": 2.1011036591465082e-06, "loss": 2.3383, "step": 499790 }, { "epoch": 0.9957127374729058, "grad_norm": 0.20798689126968384, "learning_rate": 2.0010010012516676e-06, "loss": 2.3487, "step": 499800 }, { "epoch": 0.9957326596965447, "grad_norm": 0.20162509381771088, "learning_rate": 1.9009033583945456e-06, "loss": 2.3481, "step": 499810 }, { "epoch": 0.9957525819201836, "grad_norm": 0.20489761233329773, "learning_rate": 1.8008107298210784e-06, "loss": 2.3505, "step": 499820 }, { "epoch": 0.9957725041438226, "grad_norm": 0.21294938027858734, "learning_rate": 1.700723114778313e-06, "loss": 2.3544, "step": 499830 }, { "epoch": 0.9957924263674615, "grad_norm": 0.20294281840324402, "learning_rate": 1.60064051251263e-06, "loss": 2.3509, "step": 499840 }, { "epoch": 0.9958123485911003, "grad_norm": 0.20622308552265167, "learning_rate": 1.5005629222708539e-06, "loss": 2.3561, "step": 499850 }, { "epoch": 0.9958322708147392, "grad_norm": 0.1989254355430603, "learning_rate": 1.4004903433004756e-06, "loss": 2.3691, "step": 499860 }, { "epoch": 0.9958521930383781, "grad_norm": 0.203949972987175, "learning_rate": 1.30042277484832e-06, "loss": 2.3592, "step": 499870 }, { "epoch": 0.9958721152620171, "grad_norm": 0.20957021415233612, "learning_rate": 1.2003602161620997e-06, "loss": 2.351, "step": 499880 }, { "epoch": 0.995892037485656, "grad_norm": 0.2050975263118744, "learning_rate": 1.1003026664895277e-06, "loss": 2.372, "step": 499890 }, { "epoch": 0.9959119597092949, "grad_norm": 0.20201388001441956, "learning_rate": 1.000250125078095e-06, "loss": 2.3684, "step": 499900 }, { "epoch": 0.9959318819329338, "grad_norm": 0.20386092364788055, "learning_rate": 9.002025911764023e-07, "loss": 2.367, "step": 499910 }, { "epoch": 0.9959518041565727, "grad_norm": 0.2194950431585312, "learning_rate": 8.001600640319406e-07, "loss": 2.381, "step": 499920 }, { "epoch": 0.9959717263802117, "grad_norm": 0.20578767359256744, "learning_rate": 7.001225428937552e-07, "loss": 2.3623, "step": 499930 }, { "epoch": 0.9959916486038506, "grad_norm": 0.20574450492858887, "learning_rate": 6.00090027010225e-07, "loss": 2.3608, "step": 499940 }, { "epoch": 0.9960115708274895, "grad_norm": 0.2017340213060379, "learning_rate": 5.000625156297289e-07, "loss": 2.3517, "step": 499950 }, { "epoch": 0.9960314930511284, "grad_norm": 0.20380868017673492, "learning_rate": 4.0004000800197835e-07, "loss": 2.3476, "step": 499960 }, { "epoch": 0.9960514152747673, "grad_norm": 0.20280882716178894, "learning_rate": 3.0002250337557434e-07, "loss": 2.3481, "step": 499970 }, { "epoch": 0.9960713374984063, "grad_norm": 0.20181645452976227, "learning_rate": 2.000100010000061e-07, "loss": 2.3558, "step": 499980 }, { "epoch": 0.9960912597220452, "grad_norm": 0.20480583608150482, "learning_rate": 1.0000250012498491e-07, "loss": 2.3536, "step": 499990 }, { "epoch": 0.996111181945684, "grad_norm": 0.20562244951725006, "learning_rate": 0.0, "loss": 2.3593, "step": 500000 } ], "logging_steps": 10, "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.038964239826944e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }