{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0162634681845903, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004065867046147591, "grad_norm": 0.22144322097301483, "learning_rate": 0.0, "loss": 1.3598, "step": 1 }, { "epoch": 0.0008131734092295182, "grad_norm": 0.199473574757576, "learning_rate": 4e-05, "loss": 1.405, "step": 2 }, { "epoch": 0.0012197601138442774, "grad_norm": 0.20758001506328583, "learning_rate": 8e-05, "loss": 1.2815, "step": 3 }, { "epoch": 0.0016263468184590363, "grad_norm": 0.21362783014774323, "learning_rate": 0.00012, "loss": 1.245, "step": 4 }, { "epoch": 0.0020329335230737954, "grad_norm": 0.24631692469120026, "learning_rate": 0.00016, "loss": 1.3086, "step": 5 }, { "epoch": 0.002439520227688555, "grad_norm": 0.20009225606918335, "learning_rate": 0.0002, "loss": 1.2443, "step": 6 }, { "epoch": 0.0028461069323033137, "grad_norm": 0.1735246330499649, "learning_rate": 0.00019995929167514756, "loss": 1.1878, "step": 7 }, { "epoch": 0.0032526936369180726, "grad_norm": 0.18904437124729156, "learning_rate": 0.00019991858335029514, "loss": 1.2478, "step": 8 }, { "epoch": 0.003659280341532832, "grad_norm": 0.1645248979330063, "learning_rate": 0.0001998778750254427, "loss": 1.2098, "step": 9 }, { "epoch": 0.004065867046147591, "grad_norm": 0.22034819424152374, "learning_rate": 0.00019983716670059028, "loss": 1.1183, "step": 10 }, { "epoch": 0.00447245375076235, "grad_norm": 0.3233634829521179, "learning_rate": 0.00019979645837573783, "loss": 1.0974, "step": 11 }, { "epoch": 0.00487904045537711, "grad_norm": 0.2592090368270874, "learning_rate": 0.00019975575005088542, "loss": 1.1611, "step": 12 }, { "epoch": 0.005285627159991868, "grad_norm": 0.14754348993301392, "learning_rate": 0.000199715041726033, "loss": 1.1932, "step": 13 }, { "epoch": 0.005692213864606627, "grad_norm": 0.09341374039649963, "learning_rate": 0.00019967433340118055, "loss": 1.348, "step": 14 }, { "epoch": 0.006098800569221387, "grad_norm": 0.10229193419218063, "learning_rate": 0.00019963362507632813, "loss": 1.0927, "step": 15 }, { "epoch": 0.006505387273836145, "grad_norm": 0.14015386998653412, "learning_rate": 0.00019959291675147569, "loss": 1.2263, "step": 16 }, { "epoch": 0.006911973978450905, "grad_norm": 0.17507047951221466, "learning_rate": 0.00019955220842662327, "loss": 1.1951, "step": 17 }, { "epoch": 0.007318560683065664, "grad_norm": 0.17176274955272675, "learning_rate": 0.00019951150010177082, "loss": 1.1895, "step": 18 }, { "epoch": 0.007725147387680423, "grad_norm": 0.13839803636074066, "learning_rate": 0.00019947079177691838, "loss": 0.9549, "step": 19 }, { "epoch": 0.008131734092295182, "grad_norm": 0.0970696285367012, "learning_rate": 0.00019943008345206596, "loss": 1.0867, "step": 20 }, { "epoch": 0.008538320796909941, "grad_norm": 0.08836886286735535, "learning_rate": 0.0001993893751272135, "loss": 1.155, "step": 21 }, { "epoch": 0.0089449075015247, "grad_norm": 0.11885025352239609, "learning_rate": 0.0001993486668023611, "loss": 1.1231, "step": 22 }, { "epoch": 0.00935149420613946, "grad_norm": 0.15120816230773926, "learning_rate": 0.00019930795847750865, "loss": 1.1078, "step": 23 }, { "epoch": 0.00975808091075422, "grad_norm": 0.16326424479484558, "learning_rate": 0.00019926725015265623, "loss": 1.079, "step": 24 }, { "epoch": 0.010164667615368977, "grad_norm": 0.1179085448384285, "learning_rate": 0.0001992265418278038, "loss": 0.932, "step": 25 }, { "epoch": 0.010571254319983736, "grad_norm": 0.10621985793113708, "learning_rate": 0.00019918583350295136, "loss": 1.1386, "step": 26 }, { "epoch": 0.010977841024598495, "grad_norm": 0.08408638089895248, "learning_rate": 0.00019914512517809894, "loss": 1.0987, "step": 27 }, { "epoch": 0.011384427729213255, "grad_norm": 0.08222135156393051, "learning_rate": 0.0001991044168532465, "loss": 1.0378, "step": 28 }, { "epoch": 0.011791014433828014, "grad_norm": 0.08763129264116287, "learning_rate": 0.00019906370852839408, "loss": 0.983, "step": 29 }, { "epoch": 0.012197601138442773, "grad_norm": 0.10638878494501114, "learning_rate": 0.00019902300020354163, "loss": 1.0258, "step": 30 }, { "epoch": 0.012604187843057533, "grad_norm": 0.10155023634433746, "learning_rate": 0.0001989822918786892, "loss": 0.9579, "step": 31 }, { "epoch": 0.01301077454767229, "grad_norm": 0.08844579011201859, "learning_rate": 0.00019894158355383677, "loss": 1.1007, "step": 32 }, { "epoch": 0.01341736125228705, "grad_norm": 0.10394158959388733, "learning_rate": 0.00019890087522898432, "loss": 1.0459, "step": 33 }, { "epoch": 0.01382394795690181, "grad_norm": 0.08938682824373245, "learning_rate": 0.0001988601669041319, "loss": 1.0985, "step": 34 }, { "epoch": 0.014230534661516568, "grad_norm": 0.08639086782932281, "learning_rate": 0.00019881945857927948, "loss": 1.0712, "step": 35 }, { "epoch": 0.014637121366131328, "grad_norm": 0.08568435162305832, "learning_rate": 0.00019877875025442704, "loss": 1.0549, "step": 36 }, { "epoch": 0.015043708070746087, "grad_norm": 0.0859316885471344, "learning_rate": 0.00019873804192957462, "loss": 1.1042, "step": 37 }, { "epoch": 0.015450294775360847, "grad_norm": 0.09534381330013275, "learning_rate": 0.00019869733360472217, "loss": 1.0127, "step": 38 }, { "epoch": 0.015856881479975604, "grad_norm": 0.09103580564260483, "learning_rate": 0.00019865662527986976, "loss": 0.9347, "step": 39 }, { "epoch": 0.016263468184590364, "grad_norm": 0.0928095132112503, "learning_rate": 0.0001986159169550173, "loss": 1.0559, "step": 40 }, { "epoch": 0.016670054889205123, "grad_norm": 0.09370871633291245, "learning_rate": 0.0001985752086301649, "loss": 1.1473, "step": 41 }, { "epoch": 0.017076641593819882, "grad_norm": 0.07691123336553574, "learning_rate": 0.00019853450030531244, "loss": 1.0128, "step": 42 }, { "epoch": 0.01748322829843464, "grad_norm": 0.09201047569513321, "learning_rate": 0.00019849379198046, "loss": 1.1296, "step": 43 }, { "epoch": 0.0178898150030494, "grad_norm": 0.08490074425935745, "learning_rate": 0.00019845308365560758, "loss": 1.0444, "step": 44 }, { "epoch": 0.01829640170766416, "grad_norm": 0.08623114228248596, "learning_rate": 0.00019841237533075513, "loss": 1.066, "step": 45 }, { "epoch": 0.01870298841227892, "grad_norm": 0.09486474096775055, "learning_rate": 0.00019837166700590271, "loss": 1.0788, "step": 46 }, { "epoch": 0.01910957511689368, "grad_norm": 0.08024484664201736, "learning_rate": 0.0001983309586810503, "loss": 1.0262, "step": 47 }, { "epoch": 0.01951616182150844, "grad_norm": 0.09256327897310257, "learning_rate": 0.00019829025035619785, "loss": 1.107, "step": 48 }, { "epoch": 0.019922748526123194, "grad_norm": 0.09877921640872955, "learning_rate": 0.00019824954203134543, "loss": 1.1731, "step": 49 }, { "epoch": 0.020329335230737954, "grad_norm": 0.08699575811624527, "learning_rate": 0.00019820883370649299, "loss": 1.0809, "step": 50 }, { "epoch": 0.020735921935352713, "grad_norm": 0.089649498462677, "learning_rate": 0.00019816812538164057, "loss": 1.1564, "step": 51 }, { "epoch": 0.021142508639967472, "grad_norm": 0.08757214993238449, "learning_rate": 0.00019812741705678812, "loss": 1.0272, "step": 52 }, { "epoch": 0.02154909534458223, "grad_norm": 0.08320939540863037, "learning_rate": 0.0001980867087319357, "loss": 0.9931, "step": 53 }, { "epoch": 0.02195568204919699, "grad_norm": 0.08898070454597473, "learning_rate": 0.00019804600040708326, "loss": 0.9421, "step": 54 }, { "epoch": 0.02236226875381175, "grad_norm": 0.08072236180305481, "learning_rate": 0.0001980052920822308, "loss": 1.0304, "step": 55 }, { "epoch": 0.02276885545842651, "grad_norm": 0.09354112297296524, "learning_rate": 0.0001979645837573784, "loss": 1.1041, "step": 56 }, { "epoch": 0.02317544216304127, "grad_norm": 0.09214304387569427, "learning_rate": 0.00019792387543252595, "loss": 1.0666, "step": 57 }, { "epoch": 0.02358202886765603, "grad_norm": 0.08546210825443268, "learning_rate": 0.00019788316710767353, "loss": 1.0795, "step": 58 }, { "epoch": 0.023988615572270788, "grad_norm": 0.09029046446084976, "learning_rate": 0.0001978424587828211, "loss": 1.199, "step": 59 }, { "epoch": 0.024395202276885547, "grad_norm": 0.08200937509536743, "learning_rate": 0.00019780175045796866, "loss": 0.9853, "step": 60 }, { "epoch": 0.024801788981500306, "grad_norm": 0.08928566426038742, "learning_rate": 0.00019776104213311624, "loss": 0.9948, "step": 61 }, { "epoch": 0.025208375686115066, "grad_norm": 0.08067034929990768, "learning_rate": 0.0001977203338082638, "loss": 0.9824, "step": 62 }, { "epoch": 0.02561496239072982, "grad_norm": 0.07509499788284302, "learning_rate": 0.00019767962548341138, "loss": 0.9166, "step": 63 }, { "epoch": 0.02602154909534458, "grad_norm": 0.10127029567956924, "learning_rate": 0.00019763891715855893, "loss": 0.978, "step": 64 }, { "epoch": 0.02642813579995934, "grad_norm": 0.08480218052864075, "learning_rate": 0.0001975982088337065, "loss": 1.0019, "step": 65 }, { "epoch": 0.0268347225045741, "grad_norm": 0.0922696441411972, "learning_rate": 0.00019755750050885407, "loss": 1.0213, "step": 66 }, { "epoch": 0.02724130920918886, "grad_norm": 0.0819278433918953, "learning_rate": 0.00019751679218400162, "loss": 0.9792, "step": 67 }, { "epoch": 0.02764789591380362, "grad_norm": 0.09971120208501816, "learning_rate": 0.0001974760838591492, "loss": 0.9605, "step": 68 }, { "epoch": 0.028054482618418378, "grad_norm": 0.09195531904697418, "learning_rate": 0.00019743537553429676, "loss": 1.1203, "step": 69 }, { "epoch": 0.028461069323033137, "grad_norm": 0.09179981052875519, "learning_rate": 0.00019739466720944434, "loss": 1.0586, "step": 70 }, { "epoch": 0.028867656027647896, "grad_norm": 0.0866156816482544, "learning_rate": 0.00019735395888459192, "loss": 1.0558, "step": 71 }, { "epoch": 0.029274242732262656, "grad_norm": 0.09198956191539764, "learning_rate": 0.00019731325055973947, "loss": 1.117, "step": 72 }, { "epoch": 0.029680829436877415, "grad_norm": 0.0912180244922638, "learning_rate": 0.00019727254223488705, "loss": 1.0235, "step": 73 }, { "epoch": 0.030087416141492174, "grad_norm": 0.092186838388443, "learning_rate": 0.0001972318339100346, "loss": 1.0119, "step": 74 }, { "epoch": 0.030494002846106934, "grad_norm": 0.091013602912426, "learning_rate": 0.0001971911255851822, "loss": 1.0523, "step": 75 }, { "epoch": 0.030900589550721693, "grad_norm": 0.0932595282793045, "learning_rate": 0.00019715041726032974, "loss": 1.0471, "step": 76 }, { "epoch": 0.03130717625533645, "grad_norm": 0.089345782995224, "learning_rate": 0.0001971097089354773, "loss": 1.0214, "step": 77 }, { "epoch": 0.03171376295995121, "grad_norm": 0.09476006776094437, "learning_rate": 0.00019706900061062488, "loss": 0.9888, "step": 78 }, { "epoch": 0.03212034966456597, "grad_norm": 0.09379832446575165, "learning_rate": 0.00019702829228577243, "loss": 1.1039, "step": 79 }, { "epoch": 0.03252693636918073, "grad_norm": 0.10659569501876831, "learning_rate": 0.00019698758396092001, "loss": 1.1377, "step": 80 }, { "epoch": 0.03293352307379549, "grad_norm": 0.09652398526668549, "learning_rate": 0.0001969468756360676, "loss": 1.0194, "step": 81 }, { "epoch": 0.033340109778410246, "grad_norm": 0.08641666918992996, "learning_rate": 0.00019690616731121515, "loss": 1.0239, "step": 82 }, { "epoch": 0.03374669648302501, "grad_norm": 0.0956072062253952, "learning_rate": 0.00019686545898636273, "loss": 1.032, "step": 83 }, { "epoch": 0.034153283187639764, "grad_norm": 0.08402691036462784, "learning_rate": 0.00019682475066151029, "loss": 0.9802, "step": 84 }, { "epoch": 0.03455986989225452, "grad_norm": 0.08827648311853409, "learning_rate": 0.00019678404233665787, "loss": 1.1805, "step": 85 }, { "epoch": 0.03496645659686928, "grad_norm": 0.08757660537958145, "learning_rate": 0.00019674333401180542, "loss": 0.952, "step": 86 }, { "epoch": 0.03537304330148404, "grad_norm": 0.09728538244962692, "learning_rate": 0.000196702625686953, "loss": 1.0875, "step": 87 }, { "epoch": 0.0357796300060988, "grad_norm": 0.08561044931411743, "learning_rate": 0.00019666191736210056, "loss": 0.9818, "step": 88 }, { "epoch": 0.03618621671071356, "grad_norm": 0.08389468491077423, "learning_rate": 0.0001966212090372481, "loss": 0.9962, "step": 89 }, { "epoch": 0.03659280341532832, "grad_norm": 0.08847957849502563, "learning_rate": 0.0001965805007123957, "loss": 1.0138, "step": 90 }, { "epoch": 0.036999390119943076, "grad_norm": 0.08515489101409912, "learning_rate": 0.00019653979238754324, "loss": 1.0119, "step": 91 }, { "epoch": 0.03740597682455784, "grad_norm": 0.09340325742959976, "learning_rate": 0.00019649908406269083, "loss": 1.0635, "step": 92 }, { "epoch": 0.037812563529172595, "grad_norm": 0.09383916854858398, "learning_rate": 0.0001964583757378384, "loss": 1.0999, "step": 93 }, { "epoch": 0.03821915023378736, "grad_norm": 0.09956547617912292, "learning_rate": 0.00019641766741298596, "loss": 1.0186, "step": 94 }, { "epoch": 0.038625736938402114, "grad_norm": 0.09809234738349915, "learning_rate": 0.00019637695908813354, "loss": 1.0641, "step": 95 }, { "epoch": 0.03903232364301688, "grad_norm": 0.08520065993070602, "learning_rate": 0.0001963362507632811, "loss": 0.9255, "step": 96 }, { "epoch": 0.03943891034763163, "grad_norm": 0.09007880836725235, "learning_rate": 0.00019629554243842868, "loss": 1.0963, "step": 97 }, { "epoch": 0.03984549705224639, "grad_norm": 0.08900373429059982, "learning_rate": 0.00019625483411357623, "loss": 0.9908, "step": 98 }, { "epoch": 0.04025208375686115, "grad_norm": 0.09613076597452164, "learning_rate": 0.0001962141257887238, "loss": 0.9729, "step": 99 }, { "epoch": 0.04065867046147591, "grad_norm": 0.09987878054380417, "learning_rate": 0.00019617341746387137, "loss": 1.0554, "step": 100 }, { "epoch": 0.04106525716609067, "grad_norm": 0.10209144651889801, "learning_rate": 0.00019613270913901892, "loss": 1.1162, "step": 101 }, { "epoch": 0.041471843870705426, "grad_norm": 0.10085388273000717, "learning_rate": 0.0001960920008141665, "loss": 1.1355, "step": 102 }, { "epoch": 0.04187843057532019, "grad_norm": 0.08966121822595596, "learning_rate": 0.00019605129248931406, "loss": 0.9275, "step": 103 }, { "epoch": 0.042285017279934944, "grad_norm": 0.10507562756538391, "learning_rate": 0.00019601058416446166, "loss": 1.081, "step": 104 }, { "epoch": 0.04269160398454971, "grad_norm": 0.09719648957252502, "learning_rate": 0.00019596987583960922, "loss": 1.0884, "step": 105 }, { "epoch": 0.04309819068916446, "grad_norm": 0.09457529336214066, "learning_rate": 0.00019592916751475677, "loss": 1.0413, "step": 106 }, { "epoch": 0.043504777393779226, "grad_norm": 0.11330179125070572, "learning_rate": 0.00019588845918990435, "loss": 1.0937, "step": 107 }, { "epoch": 0.04391136409839398, "grad_norm": 0.09778840839862823, "learning_rate": 0.0001958477508650519, "loss": 1.1316, "step": 108 }, { "epoch": 0.044317950803008745, "grad_norm": 0.09848835319280624, "learning_rate": 0.0001958070425401995, "loss": 1.1244, "step": 109 }, { "epoch": 0.0447245375076235, "grad_norm": 0.0965428277850151, "learning_rate": 0.00019576633421534704, "loss": 0.9952, "step": 110 }, { "epoch": 0.045131124212238256, "grad_norm": 0.0857444629073143, "learning_rate": 0.00019572562589049462, "loss": 0.9822, "step": 111 }, { "epoch": 0.04553771091685302, "grad_norm": 0.10461942851543427, "learning_rate": 0.00019568491756564218, "loss": 1.1463, "step": 112 }, { "epoch": 0.045944297621467775, "grad_norm": 0.08575154095888138, "learning_rate": 0.00019564420924078973, "loss": 0.8976, "step": 113 }, { "epoch": 0.04635088432608254, "grad_norm": 0.0948256254196167, "learning_rate": 0.00019560350091593731, "loss": 1.1205, "step": 114 }, { "epoch": 0.046757471030697294, "grad_norm": 0.09214090555906296, "learning_rate": 0.00019556279259108487, "loss": 1.1416, "step": 115 }, { "epoch": 0.04716405773531206, "grad_norm": 0.09885852038860321, "learning_rate": 0.00019552208426623248, "loss": 1.079, "step": 116 }, { "epoch": 0.04757064443992681, "grad_norm": 0.09071148931980133, "learning_rate": 0.00019548137594138003, "loss": 1.0128, "step": 117 }, { "epoch": 0.047977231144541575, "grad_norm": 0.09190430492162704, "learning_rate": 0.00019544066761652758, "loss": 0.9631, "step": 118 }, { "epoch": 0.04838381784915633, "grad_norm": 0.08024870604276657, "learning_rate": 0.00019539995929167517, "loss": 0.9086, "step": 119 }, { "epoch": 0.048790404553771094, "grad_norm": 0.09223239868879318, "learning_rate": 0.00019535925096682272, "loss": 1.0255, "step": 120 }, { "epoch": 0.04919699125838585, "grad_norm": 0.09259685128927231, "learning_rate": 0.0001953185426419703, "loss": 1.0221, "step": 121 }, { "epoch": 0.04960357796300061, "grad_norm": 0.08371948450803757, "learning_rate": 0.00019527783431711786, "loss": 0.966, "step": 122 }, { "epoch": 0.05001016466761537, "grad_norm": 0.0957912728190422, "learning_rate": 0.00019523712599226544, "loss": 1.0919, "step": 123 }, { "epoch": 0.05041675137223013, "grad_norm": 0.09397678077220917, "learning_rate": 0.000195196417667413, "loss": 0.9666, "step": 124 }, { "epoch": 0.05082333807684489, "grad_norm": 0.1014254167675972, "learning_rate": 0.00019515570934256054, "loss": 0.9321, "step": 125 }, { "epoch": 0.05122992478145964, "grad_norm": 0.09339801222085953, "learning_rate": 0.00019511500101770813, "loss": 1.0487, "step": 126 }, { "epoch": 0.051636511486074406, "grad_norm": 0.08642175793647766, "learning_rate": 0.0001950742926928557, "loss": 1.0606, "step": 127 }, { "epoch": 0.05204309819068916, "grad_norm": 0.09092641621828079, "learning_rate": 0.0001950335843680033, "loss": 0.904, "step": 128 }, { "epoch": 0.052449684895303925, "grad_norm": 0.09896791726350784, "learning_rate": 0.00019499287604315084, "loss": 1.0325, "step": 129 }, { "epoch": 0.05285627159991868, "grad_norm": 0.08731307834386826, "learning_rate": 0.0001949521677182984, "loss": 0.9258, "step": 130 }, { "epoch": 0.05326285830453344, "grad_norm": 0.09673330187797546, "learning_rate": 0.00019491145939344598, "loss": 1.1198, "step": 131 }, { "epoch": 0.0536694450091482, "grad_norm": 0.09038975089788437, "learning_rate": 0.00019487075106859353, "loss": 1.0295, "step": 132 }, { "epoch": 0.05407603171376296, "grad_norm": 0.0918399840593338, "learning_rate": 0.0001948300427437411, "loss": 1.0127, "step": 133 }, { "epoch": 0.05448261841837772, "grad_norm": 0.08970967680215836, "learning_rate": 0.00019478933441888867, "loss": 1.0238, "step": 134 }, { "epoch": 0.05488920512299248, "grad_norm": 0.09728217124938965, "learning_rate": 0.00019474862609403625, "loss": 1.069, "step": 135 }, { "epoch": 0.05529579182760724, "grad_norm": 0.10240956395864487, "learning_rate": 0.0001947079177691838, "loss": 1.1467, "step": 136 }, { "epoch": 0.055702378532222, "grad_norm": 0.10397852212190628, "learning_rate": 0.00019466720944433136, "loss": 1.0415, "step": 137 }, { "epoch": 0.056108965236836755, "grad_norm": 0.10451675951480865, "learning_rate": 0.00019462650111947894, "loss": 1.0309, "step": 138 }, { "epoch": 0.05651555194145151, "grad_norm": 0.09685720503330231, "learning_rate": 0.00019458579279462652, "loss": 1.11, "step": 139 }, { "epoch": 0.056922138646066274, "grad_norm": 0.09885822236537933, "learning_rate": 0.00019454508446977407, "loss": 0.993, "step": 140 }, { "epoch": 0.05732872535068103, "grad_norm": 0.10943586379289627, "learning_rate": 0.00019450437614492165, "loss": 0.9749, "step": 141 }, { "epoch": 0.05773531205529579, "grad_norm": 0.10964591801166534, "learning_rate": 0.0001944636678200692, "loss": 1.1108, "step": 142 }, { "epoch": 0.05814189875991055, "grad_norm": 0.10109028965234756, "learning_rate": 0.0001944229594952168, "loss": 1.0897, "step": 143 }, { "epoch": 0.05854848546452531, "grad_norm": 0.11243695765733719, "learning_rate": 0.00019438225117036434, "loss": 1.0338, "step": 144 }, { "epoch": 0.05895507216914007, "grad_norm": 0.1047658622264862, "learning_rate": 0.00019434154284551192, "loss": 0.9566, "step": 145 }, { "epoch": 0.05936165887375483, "grad_norm": 0.09534204006195068, "learning_rate": 0.00019430083452065948, "loss": 1.0313, "step": 146 }, { "epoch": 0.059768245578369586, "grad_norm": 0.10418044775724411, "learning_rate": 0.00019426012619580706, "loss": 0.9759, "step": 147 }, { "epoch": 0.06017483228298435, "grad_norm": 0.10020595043897629, "learning_rate": 0.00019421941787095461, "loss": 0.9368, "step": 148 }, { "epoch": 0.060581418987599105, "grad_norm": 0.09832129627466202, "learning_rate": 0.00019417870954610217, "loss": 1.0494, "step": 149 }, { "epoch": 0.06098800569221387, "grad_norm": 0.09458506107330322, "learning_rate": 0.00019413800122124978, "loss": 0.9631, "step": 150 }, { "epoch": 0.06139459239682862, "grad_norm": 0.10380101203918457, "learning_rate": 0.00019409729289639733, "loss": 1.1003, "step": 151 }, { "epoch": 0.061801179101443386, "grad_norm": 0.107131227850914, "learning_rate": 0.00019405658457154488, "loss": 1.0819, "step": 152 }, { "epoch": 0.06220776580605814, "grad_norm": 0.10330741852521896, "learning_rate": 0.00019401587624669247, "loss": 1.128, "step": 153 }, { "epoch": 0.0626143525106729, "grad_norm": 0.08829359710216522, "learning_rate": 0.00019397516792184002, "loss": 0.8754, "step": 154 }, { "epoch": 0.06302093921528766, "grad_norm": 0.10422427207231522, "learning_rate": 0.0001939344595969876, "loss": 0.9633, "step": 155 }, { "epoch": 0.06342752591990242, "grad_norm": 0.11499015986919403, "learning_rate": 0.00019389375127213515, "loss": 0.9735, "step": 156 }, { "epoch": 0.06383411262451717, "grad_norm": 0.0938427522778511, "learning_rate": 0.00019385304294728274, "loss": 0.9219, "step": 157 }, { "epoch": 0.06424069932913194, "grad_norm": 0.1080261766910553, "learning_rate": 0.0001938123346224303, "loss": 0.9678, "step": 158 }, { "epoch": 0.0646472860337467, "grad_norm": 0.10001271218061447, "learning_rate": 0.00019377162629757784, "loss": 1.0854, "step": 159 }, { "epoch": 0.06505387273836145, "grad_norm": 0.10731212794780731, "learning_rate": 0.00019373091797272543, "loss": 1.0108, "step": 160 }, { "epoch": 0.06546045944297621, "grad_norm": 0.10019373893737793, "learning_rate": 0.00019369020964787298, "loss": 1.0315, "step": 161 }, { "epoch": 0.06586704614759098, "grad_norm": 0.0947297066450119, "learning_rate": 0.0001936495013230206, "loss": 1.0634, "step": 162 }, { "epoch": 0.06627363285220574, "grad_norm": 0.12204254418611526, "learning_rate": 0.00019360879299816814, "loss": 1.0635, "step": 163 }, { "epoch": 0.06668021955682049, "grad_norm": 0.10462553054094315, "learning_rate": 0.0001935680846733157, "loss": 1.0248, "step": 164 }, { "epoch": 0.06708680626143525, "grad_norm": 0.09576130658388138, "learning_rate": 0.00019352737634846328, "loss": 0.9671, "step": 165 }, { "epoch": 0.06749339296605002, "grad_norm": 0.10027123987674713, "learning_rate": 0.00019348666802361083, "loss": 0.9317, "step": 166 }, { "epoch": 0.06789997967066477, "grad_norm": 0.10674256086349487, "learning_rate": 0.0001934459596987584, "loss": 1.0058, "step": 167 }, { "epoch": 0.06830656637527953, "grad_norm": 0.12352320551872253, "learning_rate": 0.00019340525137390597, "loss": 1.0926, "step": 168 }, { "epoch": 0.06871315307989428, "grad_norm": 0.09426864236593246, "learning_rate": 0.00019336454304905355, "loss": 1.0876, "step": 169 }, { "epoch": 0.06911973978450904, "grad_norm": 0.09280996024608612, "learning_rate": 0.0001933238347242011, "loss": 0.977, "step": 170 }, { "epoch": 0.06952632648912381, "grad_norm": 0.11547420918941498, "learning_rate": 0.00019328312639934866, "loss": 1.0598, "step": 171 }, { "epoch": 0.06993291319373857, "grad_norm": 0.12538915872573853, "learning_rate": 0.00019324241807449624, "loss": 1.0996, "step": 172 }, { "epoch": 0.07033949989835332, "grad_norm": 0.08110898733139038, "learning_rate": 0.00019320170974964382, "loss": 0.8776, "step": 173 }, { "epoch": 0.07074608660296808, "grad_norm": 0.10475198924541473, "learning_rate": 0.0001931610014247914, "loss": 1.0876, "step": 174 }, { "epoch": 0.07115267330758285, "grad_norm": 0.1095360517501831, "learning_rate": 0.00019312029309993895, "loss": 1.054, "step": 175 }, { "epoch": 0.0715592600121976, "grad_norm": 0.09516473114490509, "learning_rate": 0.0001930795847750865, "loss": 1.0558, "step": 176 }, { "epoch": 0.07196584671681236, "grad_norm": 0.09316466003656387, "learning_rate": 0.0001930388764502341, "loss": 0.9467, "step": 177 }, { "epoch": 0.07237243342142712, "grad_norm": 0.11777061969041824, "learning_rate": 0.00019299816812538164, "loss": 1.1441, "step": 178 }, { "epoch": 0.07277902012604189, "grad_norm": 0.09438811987638474, "learning_rate": 0.00019295745980052922, "loss": 0.9521, "step": 179 }, { "epoch": 0.07318560683065664, "grad_norm": 0.08892639726400375, "learning_rate": 0.00019291675147567678, "loss": 0.9804, "step": 180 }, { "epoch": 0.0735921935352714, "grad_norm": 0.08963356912136078, "learning_rate": 0.00019287604315082436, "loss": 1.0427, "step": 181 }, { "epoch": 0.07399878023988615, "grad_norm": 0.09870661795139313, "learning_rate": 0.0001928353348259719, "loss": 1.051, "step": 182 }, { "epoch": 0.07440536694450091, "grad_norm": 0.11843609809875488, "learning_rate": 0.00019279462650111947, "loss": 1.0109, "step": 183 }, { "epoch": 0.07481195364911568, "grad_norm": 0.08860404789447784, "learning_rate": 0.00019275391817626705, "loss": 1.0035, "step": 184 }, { "epoch": 0.07521854035373043, "grad_norm": 0.09085170924663544, "learning_rate": 0.00019271320985141463, "loss": 0.9461, "step": 185 }, { "epoch": 0.07562512705834519, "grad_norm": 0.09071815758943558, "learning_rate": 0.0001926725015265622, "loss": 0.9542, "step": 186 }, { "epoch": 0.07603171376295995, "grad_norm": 0.09566846489906311, "learning_rate": 0.00019263179320170976, "loss": 0.9958, "step": 187 }, { "epoch": 0.07643830046757472, "grad_norm": 0.11846338212490082, "learning_rate": 0.00019259108487685732, "loss": 1.0737, "step": 188 }, { "epoch": 0.07684488717218947, "grad_norm": 0.09295649081468582, "learning_rate": 0.0001925503765520049, "loss": 1.0162, "step": 189 }, { "epoch": 0.07725147387680423, "grad_norm": 0.0917876660823822, "learning_rate": 0.00019250966822715245, "loss": 1.0432, "step": 190 }, { "epoch": 0.07765806058141898, "grad_norm": 0.10864109545946121, "learning_rate": 0.00019246895990230004, "loss": 1.1107, "step": 191 }, { "epoch": 0.07806464728603375, "grad_norm": 0.09689877927303314, "learning_rate": 0.0001924282515774476, "loss": 1.0421, "step": 192 }, { "epoch": 0.07847123399064851, "grad_norm": 0.09406042098999023, "learning_rate": 0.00019238754325259517, "loss": 1.1042, "step": 193 }, { "epoch": 0.07887782069526326, "grad_norm": 0.08346063643693924, "learning_rate": 0.00019234683492774272, "loss": 0.9554, "step": 194 }, { "epoch": 0.07928440739987802, "grad_norm": 0.10317754745483398, "learning_rate": 0.00019230612660289028, "loss": 1.0835, "step": 195 }, { "epoch": 0.07969099410449278, "grad_norm": 0.08712919056415558, "learning_rate": 0.0001922654182780379, "loss": 0.9799, "step": 196 }, { "epoch": 0.08009758080910755, "grad_norm": 0.0860556811094284, "learning_rate": 0.00019222470995318544, "loss": 0.8661, "step": 197 }, { "epoch": 0.0805041675137223, "grad_norm": 0.07940655201673508, "learning_rate": 0.00019218400162833302, "loss": 0.8305, "step": 198 }, { "epoch": 0.08091075421833706, "grad_norm": 0.09200199693441391, "learning_rate": 0.00019214329330348058, "loss": 0.9774, "step": 199 }, { "epoch": 0.08131734092295181, "grad_norm": 0.09980164468288422, "learning_rate": 0.00019210258497862813, "loss": 0.9791, "step": 200 }, { "epoch": 0.08172392762756658, "grad_norm": 0.09660688042640686, "learning_rate": 0.0001920618766537757, "loss": 1.027, "step": 201 }, { "epoch": 0.08213051433218134, "grad_norm": 0.09518909454345703, "learning_rate": 0.00019202116832892327, "loss": 0.9939, "step": 202 }, { "epoch": 0.0825371010367961, "grad_norm": 0.0886114165186882, "learning_rate": 0.00019198046000407085, "loss": 0.985, "step": 203 }, { "epoch": 0.08294368774141085, "grad_norm": 0.09820783883333206, "learning_rate": 0.0001919397516792184, "loss": 1.0064, "step": 204 }, { "epoch": 0.08335027444602562, "grad_norm": 0.0957496389746666, "learning_rate": 0.00019189904335436598, "loss": 1.1126, "step": 205 }, { "epoch": 0.08375686115064038, "grad_norm": 0.09990067780017853, "learning_rate": 0.00019185833502951354, "loss": 1.1517, "step": 206 }, { "epoch": 0.08416344785525513, "grad_norm": 0.0953991562128067, "learning_rate": 0.0001918176267046611, "loss": 1.087, "step": 207 }, { "epoch": 0.08457003455986989, "grad_norm": 0.10291532427072525, "learning_rate": 0.0001917769183798087, "loss": 1.0366, "step": 208 }, { "epoch": 0.08497662126448464, "grad_norm": 0.09986121207475662, "learning_rate": 0.00019173621005495625, "loss": 0.9581, "step": 209 }, { "epoch": 0.08538320796909941, "grad_norm": 0.09369988739490509, "learning_rate": 0.00019169550173010383, "loss": 1.0048, "step": 210 }, { "epoch": 0.08578979467371417, "grad_norm": 0.0968063622713089, "learning_rate": 0.0001916547934052514, "loss": 1.0005, "step": 211 }, { "epoch": 0.08619638137832893, "grad_norm": 0.11241315305233002, "learning_rate": 0.00019161408508039894, "loss": 1.0316, "step": 212 }, { "epoch": 0.08660296808294368, "grad_norm": 0.09230878949165344, "learning_rate": 0.00019157337675554652, "loss": 0.917, "step": 213 }, { "epoch": 0.08700955478755845, "grad_norm": 0.08461520820856094, "learning_rate": 0.00019153266843069408, "loss": 0.9144, "step": 214 }, { "epoch": 0.08741614149217321, "grad_norm": 0.09011861681938171, "learning_rate": 0.00019149196010584166, "loss": 1.0092, "step": 215 }, { "epoch": 0.08782272819678796, "grad_norm": 0.09200841188430786, "learning_rate": 0.0001914512517809892, "loss": 1.0552, "step": 216 }, { "epoch": 0.08822931490140272, "grad_norm": 0.09052886068820953, "learning_rate": 0.0001914105434561368, "loss": 0.9067, "step": 217 }, { "epoch": 0.08863590160601749, "grad_norm": 0.08740741014480591, "learning_rate": 0.00019136983513128435, "loss": 0.9182, "step": 218 }, { "epoch": 0.08904248831063225, "grad_norm": 0.08494284749031067, "learning_rate": 0.00019132912680643193, "loss": 0.8321, "step": 219 }, { "epoch": 0.089449075015247, "grad_norm": 0.0890796035528183, "learning_rate": 0.0001912884184815795, "loss": 0.9801, "step": 220 }, { "epoch": 0.08985566171986176, "grad_norm": 0.094822458922863, "learning_rate": 0.00019124771015672706, "loss": 0.9779, "step": 221 }, { "epoch": 0.09026224842447651, "grad_norm": 0.09756983071565628, "learning_rate": 0.00019120700183187465, "loss": 1.0385, "step": 222 }, { "epoch": 0.09066883512909128, "grad_norm": 0.09434107691049576, "learning_rate": 0.0001911662935070222, "loss": 1.063, "step": 223 }, { "epoch": 0.09107542183370604, "grad_norm": 0.0925639271736145, "learning_rate": 0.00019112558518216975, "loss": 0.9061, "step": 224 }, { "epoch": 0.0914820085383208, "grad_norm": 0.10531201958656311, "learning_rate": 0.00019108487685731734, "loss": 1.1593, "step": 225 }, { "epoch": 0.09188859524293555, "grad_norm": 0.08259832113981247, "learning_rate": 0.0001910441685324649, "loss": 0.8463, "step": 226 }, { "epoch": 0.09229518194755032, "grad_norm": 431.5063171386719, "learning_rate": 0.00019100346020761247, "loss": 1.0632, "step": 227 }, { "epoch": 0.09270176865216508, "grad_norm": 0.10764740407466888, "learning_rate": 0.00019096275188276002, "loss": 1.0083, "step": 228 }, { "epoch": 0.09310835535677983, "grad_norm": 0.08872029185295105, "learning_rate": 0.0001909220435579076, "loss": 0.9301, "step": 229 }, { "epoch": 0.09351494206139459, "grad_norm": 0.1006346270442009, "learning_rate": 0.00019088133523305516, "loss": 1.0103, "step": 230 }, { "epoch": 0.09392152876600936, "grad_norm": 0.0970514565706253, "learning_rate": 0.00019084062690820274, "loss": 1.0522, "step": 231 }, { "epoch": 0.09432811547062411, "grad_norm": 0.09807727485895157, "learning_rate": 0.00019079991858335032, "loss": 1.0498, "step": 232 }, { "epoch": 0.09473470217523887, "grad_norm": 0.09828022867441177, "learning_rate": 0.00019075921025849788, "loss": 0.9871, "step": 233 }, { "epoch": 0.09514128887985362, "grad_norm": 0.10089042782783508, "learning_rate": 0.00019071850193364543, "loss": 0.977, "step": 234 }, { "epoch": 0.0955478755844684, "grad_norm": 0.09905245155096054, "learning_rate": 0.000190677793608793, "loss": 1.0135, "step": 235 }, { "epoch": 0.09595446228908315, "grad_norm": 0.1002473533153534, "learning_rate": 0.00019063708528394057, "loss": 1.0219, "step": 236 }, { "epoch": 0.0963610489936979, "grad_norm": 0.09028339385986328, "learning_rate": 0.00019059637695908815, "loss": 0.909, "step": 237 }, { "epoch": 0.09676763569831266, "grad_norm": 0.0950377881526947, "learning_rate": 0.0001905556686342357, "loss": 0.9749, "step": 238 }, { "epoch": 0.09717422240292742, "grad_norm": 0.09866049885749817, "learning_rate": 0.00019051496030938328, "loss": 1.0927, "step": 239 }, { "epoch": 0.09758080910754219, "grad_norm": 0.09754758328199387, "learning_rate": 0.00019047425198453084, "loss": 1.059, "step": 240 }, { "epoch": 0.09798739581215694, "grad_norm": 0.09261766821146011, "learning_rate": 0.00019043354365967842, "loss": 1.0912, "step": 241 }, { "epoch": 0.0983939825167717, "grad_norm": 0.08637125045061111, "learning_rate": 0.000190392835334826, "loss": 0.8925, "step": 242 }, { "epoch": 0.09880056922138646, "grad_norm": 0.0962812602519989, "learning_rate": 0.00019035212700997355, "loss": 1.0435, "step": 243 }, { "epoch": 0.09920715592600123, "grad_norm": 0.09047430753707886, "learning_rate": 0.00019031141868512113, "loss": 1.0787, "step": 244 }, { "epoch": 0.09961374263061598, "grad_norm": 0.09183438867330551, "learning_rate": 0.0001902707103602687, "loss": 0.9338, "step": 245 }, { "epoch": 0.10002032933523074, "grad_norm": 0.09977632761001587, "learning_rate": 0.00019023000203541624, "loss": 1.1605, "step": 246 }, { "epoch": 0.10042691603984549, "grad_norm": 0.10386580228805542, "learning_rate": 0.00019018929371056382, "loss": 1.0493, "step": 247 }, { "epoch": 0.10083350274446026, "grad_norm": 0.09106533974409103, "learning_rate": 0.00019014858538571138, "loss": 0.9891, "step": 248 }, { "epoch": 0.10124008944907502, "grad_norm": 0.09407884627580643, "learning_rate": 0.00019010787706085896, "loss": 1.0367, "step": 249 }, { "epoch": 0.10164667615368977, "grad_norm": 0.10133463889360428, "learning_rate": 0.0001900671687360065, "loss": 1.0743, "step": 250 }, { "epoch": 0.10205326285830453, "grad_norm": 0.11877205967903137, "learning_rate": 0.0001900264604111541, "loss": 1.1572, "step": 251 }, { "epoch": 0.10245984956291929, "grad_norm": 0.10216309130191803, "learning_rate": 0.00018998575208630165, "loss": 1.0687, "step": 252 }, { "epoch": 0.10286643626753406, "grad_norm": 0.09023922681808472, "learning_rate": 0.0001899450437614492, "loss": 0.9153, "step": 253 }, { "epoch": 0.10327302297214881, "grad_norm": 0.09972742944955826, "learning_rate": 0.0001899043354365968, "loss": 0.9059, "step": 254 }, { "epoch": 0.10367960967676357, "grad_norm": 0.1175752505660057, "learning_rate": 0.00018986362711174436, "loss": 1.0659, "step": 255 }, { "epoch": 0.10408619638137832, "grad_norm": 0.09030337631702423, "learning_rate": 0.00018982291878689195, "loss": 0.9577, "step": 256 }, { "epoch": 0.1044927830859931, "grad_norm": 0.08850797265768051, "learning_rate": 0.0001897822104620395, "loss": 0.9193, "step": 257 }, { "epoch": 0.10489936979060785, "grad_norm": 1767.7669677734375, "learning_rate": 0.00018974150213718705, "loss": 0.9977, "step": 258 }, { "epoch": 0.1053059564952226, "grad_norm": 0.11435185372829437, "learning_rate": 0.00018970079381233463, "loss": 1.0468, "step": 259 }, { "epoch": 0.10571254319983736, "grad_norm": 0.10342080891132355, "learning_rate": 0.0001896600854874822, "loss": 1.0119, "step": 260 }, { "epoch": 0.10611912990445213, "grad_norm": 0.11568263173103333, "learning_rate": 0.00018961937716262977, "loss": 1.025, "step": 261 }, { "epoch": 0.10652571660906689, "grad_norm": 0.12752321362495422, "learning_rate": 0.00018957866883777732, "loss": 1.1283, "step": 262 }, { "epoch": 0.10693230331368164, "grad_norm": 0.10688795894384384, "learning_rate": 0.0001895379605129249, "loss": 0.9052, "step": 263 }, { "epoch": 0.1073388900182964, "grad_norm": 0.10426552593708038, "learning_rate": 0.00018949725218807246, "loss": 0.9556, "step": 264 }, { "epoch": 0.10774547672291115, "grad_norm": 0.09953362494707108, "learning_rate": 0.00018945654386322004, "loss": 1.0734, "step": 265 }, { "epoch": 0.10815206342752592, "grad_norm": 0.09143470227718353, "learning_rate": 0.00018941583553836762, "loss": 1.0063, "step": 266 }, { "epoch": 0.10855865013214068, "grad_norm": 0.10831563919782639, "learning_rate": 0.00018937512721351518, "loss": 1.011, "step": 267 }, { "epoch": 0.10896523683675544, "grad_norm": 0.10352573543787003, "learning_rate": 0.00018933441888866276, "loss": 1.0625, "step": 268 }, { "epoch": 0.10937182354137019, "grad_norm": 0.09499429166316986, "learning_rate": 0.0001892937105638103, "loss": 0.8775, "step": 269 }, { "epoch": 0.10977841024598496, "grad_norm": 0.10296636819839478, "learning_rate": 0.00018925300223895787, "loss": 0.985, "step": 270 }, { "epoch": 0.11018499695059972, "grad_norm": 0.10464894771575928, "learning_rate": 0.00018921229391410545, "loss": 1.0051, "step": 271 }, { "epoch": 0.11059158365521447, "grad_norm": 0.09429532289505005, "learning_rate": 0.000189171585589253, "loss": 0.9793, "step": 272 }, { "epoch": 0.11099817035982923, "grad_norm": 0.09751992672681808, "learning_rate": 0.00018913087726440058, "loss": 1.0756, "step": 273 }, { "epoch": 0.111404757064444, "grad_norm": 0.11418993026018143, "learning_rate": 0.00018909016893954814, "loss": 1.0742, "step": 274 }, { "epoch": 0.11181134376905875, "grad_norm": 0.10320629924535751, "learning_rate": 0.00018904946061469572, "loss": 1.036, "step": 275 }, { "epoch": 0.11221793047367351, "grad_norm": 0.09697311371564865, "learning_rate": 0.00018900875228984327, "loss": 1.0317, "step": 276 }, { "epoch": 0.11262451717828827, "grad_norm": 0.09579788893461227, "learning_rate": 0.00018896804396499085, "loss": 0.9621, "step": 277 }, { "epoch": 0.11303110388290302, "grad_norm": 0.09918879717588425, "learning_rate": 0.00018892733564013843, "loss": 1.0292, "step": 278 }, { "epoch": 0.11343769058751779, "grad_norm": 0.0923212468624115, "learning_rate": 0.000188886627315286, "loss": 1.0611, "step": 279 }, { "epoch": 0.11384427729213255, "grad_norm": 0.09480055421590805, "learning_rate": 0.00018884591899043357, "loss": 0.9809, "step": 280 }, { "epoch": 0.1142508639967473, "grad_norm": 0.09431526064872742, "learning_rate": 0.00018880521066558112, "loss": 1.0326, "step": 281 }, { "epoch": 0.11465745070136206, "grad_norm": 0.09080514311790466, "learning_rate": 0.00018876450234072868, "loss": 0.9115, "step": 282 }, { "epoch": 0.11506403740597683, "grad_norm": 0.10855970531702042, "learning_rate": 0.00018872379401587626, "loss": 1.0422, "step": 283 }, { "epoch": 0.11547062411059159, "grad_norm": 0.0941060334444046, "learning_rate": 0.0001886830856910238, "loss": 1.0352, "step": 284 }, { "epoch": 0.11587721081520634, "grad_norm": 0.08903583139181137, "learning_rate": 0.0001886423773661714, "loss": 0.964, "step": 285 }, { "epoch": 0.1162837975198211, "grad_norm": 0.08521820604801178, "learning_rate": 0.00018860166904131895, "loss": 0.917, "step": 286 }, { "epoch": 0.11669038422443587, "grad_norm": 0.1058691143989563, "learning_rate": 0.00018856096071646653, "loss": 1.0375, "step": 287 }, { "epoch": 0.11709697092905062, "grad_norm": 0.09435714781284332, "learning_rate": 0.0001885202523916141, "loss": 0.9766, "step": 288 }, { "epoch": 0.11750355763366538, "grad_norm": 0.09868729114532471, "learning_rate": 0.00018847954406676166, "loss": 1.1059, "step": 289 }, { "epoch": 0.11791014433828013, "grad_norm": 0.08855635672807693, "learning_rate": 0.00018843883574190924, "loss": 0.9424, "step": 290 }, { "epoch": 0.11831673104289489, "grad_norm": 0.09142837673425674, "learning_rate": 0.0001883981274170568, "loss": 1.0425, "step": 291 }, { "epoch": 0.11872331774750966, "grad_norm": 0.0971277505159378, "learning_rate": 0.00018835741909220438, "loss": 1.108, "step": 292 }, { "epoch": 0.11912990445212442, "grad_norm": 0.09940122812986374, "learning_rate": 0.00018831671076735193, "loss": 1.0172, "step": 293 }, { "epoch": 0.11953649115673917, "grad_norm": 0.10263317078351974, "learning_rate": 0.0001882760024424995, "loss": 1.0956, "step": 294 }, { "epoch": 0.11994307786135393, "grad_norm": 0.1092846542596817, "learning_rate": 0.00018823529411764707, "loss": 0.9454, "step": 295 }, { "epoch": 0.1203496645659687, "grad_norm": 0.10364726930856705, "learning_rate": 0.00018819458579279462, "loss": 0.8884, "step": 296 }, { "epoch": 0.12075625127058345, "grad_norm": 0.0889100730419159, "learning_rate": 0.0001881538774679422, "loss": 0.9922, "step": 297 }, { "epoch": 0.12116283797519821, "grad_norm": 0.09209653735160828, "learning_rate": 0.00018811316914308976, "loss": 0.977, "step": 298 }, { "epoch": 0.12156942467981297, "grad_norm": 0.11542046815156937, "learning_rate": 0.00018807246081823734, "loss": 1.0694, "step": 299 }, { "epoch": 0.12197601138442773, "grad_norm": 0.10896503180265427, "learning_rate": 0.00018803175249338492, "loss": 1.0508, "step": 300 }, { "epoch": 0.12238259808904249, "grad_norm": 0.09302002936601639, "learning_rate": 0.00018799104416853248, "loss": 1.0512, "step": 301 }, { "epoch": 0.12278918479365725, "grad_norm": 0.09081271290779114, "learning_rate": 0.00018795033584368006, "loss": 0.9688, "step": 302 }, { "epoch": 0.123195771498272, "grad_norm": 0.1059931218624115, "learning_rate": 0.0001879096275188276, "loss": 1.0483, "step": 303 }, { "epoch": 0.12360235820288677, "grad_norm": 0.1018669605255127, "learning_rate": 0.0001878689191939752, "loss": 1.019, "step": 304 }, { "epoch": 0.12400894490750153, "grad_norm": 0.1040007546544075, "learning_rate": 0.00018782821086912275, "loss": 1.037, "step": 305 }, { "epoch": 0.12441553161211628, "grad_norm": 0.10204601287841797, "learning_rate": 0.0001877875025442703, "loss": 0.9816, "step": 306 }, { "epoch": 0.12482211831673104, "grad_norm": 0.10591764748096466, "learning_rate": 0.00018774679421941788, "loss": 1.0939, "step": 307 }, { "epoch": 0.1252287050213458, "grad_norm": 0.09306305646896362, "learning_rate": 0.00018770608589456544, "loss": 1.0476, "step": 308 }, { "epoch": 0.12563529172596055, "grad_norm": 11.22681713104248, "learning_rate": 0.00018766537756971302, "loss": 1.0573, "step": 309 }, { "epoch": 0.12604187843057532, "grad_norm": 0.09422402083873749, "learning_rate": 0.00018762466924486057, "loss": 0.9993, "step": 310 }, { "epoch": 0.1264484651351901, "grad_norm": 0.0982229933142662, "learning_rate": 0.00018758396092000815, "loss": 0.9159, "step": 311 }, { "epoch": 0.12685505183980483, "grad_norm": 0.12579265236854553, "learning_rate": 0.00018754325259515573, "loss": 1.0935, "step": 312 }, { "epoch": 0.1272616385444196, "grad_norm": 0.10069390386343002, "learning_rate": 0.0001875025442703033, "loss": 1.0127, "step": 313 }, { "epoch": 0.12766822524903434, "grad_norm": 0.10948827862739563, "learning_rate": 0.00018746183594545087, "loss": 1.0576, "step": 314 }, { "epoch": 0.12807481195364911, "grad_norm": 0.09232445061206818, "learning_rate": 0.00018742112762059842, "loss": 0.9856, "step": 315 }, { "epoch": 0.12848139865826388, "grad_norm": 0.08319563418626785, "learning_rate": 0.000187380419295746, "loss": 0.9172, "step": 316 }, { "epoch": 0.12888798536287863, "grad_norm": 0.09697309136390686, "learning_rate": 0.00018733971097089356, "loss": 1.0567, "step": 317 }, { "epoch": 0.1292945720674934, "grad_norm": 0.09254255145788193, "learning_rate": 0.0001872990026460411, "loss": 1.0177, "step": 318 }, { "epoch": 0.12970115877210814, "grad_norm": 0.09254108369350433, "learning_rate": 0.0001872582943211887, "loss": 1.0079, "step": 319 }, { "epoch": 0.1301077454767229, "grad_norm": 0.09095866233110428, "learning_rate": 0.00018721758599633625, "loss": 1.0633, "step": 320 }, { "epoch": 0.13051433218133768, "grad_norm": 0.09073010087013245, "learning_rate": 0.00018717687767148383, "loss": 0.9059, "step": 321 }, { "epoch": 0.13092091888595242, "grad_norm": 0.09842764586210251, "learning_rate": 0.00018713616934663138, "loss": 1.0766, "step": 322 }, { "epoch": 0.1313275055905672, "grad_norm": 0.09325529634952545, "learning_rate": 0.00018709546102177896, "loss": 1.066, "step": 323 }, { "epoch": 0.13173409229518196, "grad_norm": 0.09692969918251038, "learning_rate": 0.00018705475269692654, "loss": 0.9743, "step": 324 }, { "epoch": 0.1321406789997967, "grad_norm": 0.09432708472013474, "learning_rate": 0.0001870140443720741, "loss": 1.0141, "step": 325 }, { "epoch": 0.13254726570441147, "grad_norm": 0.09226994961500168, "learning_rate": 0.00018697333604722168, "loss": 0.9837, "step": 326 }, { "epoch": 0.1329538524090262, "grad_norm": 0.10843974351882935, "learning_rate": 0.00018693262772236923, "loss": 1.0248, "step": 327 }, { "epoch": 0.13336043911364098, "grad_norm": 0.09324774891138077, "learning_rate": 0.00018689191939751681, "loss": 1.0642, "step": 328 }, { "epoch": 0.13376702581825575, "grad_norm": 0.08934729546308517, "learning_rate": 0.00018685121107266437, "loss": 0.9792, "step": 329 }, { "epoch": 0.1341736125228705, "grad_norm": 0.09125274419784546, "learning_rate": 0.00018681050274781192, "loss": 1.0093, "step": 330 }, { "epoch": 0.13458019922748526, "grad_norm": 0.09645108133554459, "learning_rate": 0.0001867697944229595, "loss": 0.9503, "step": 331 }, { "epoch": 0.13498678593210003, "grad_norm": 0.09900861978530884, "learning_rate": 0.00018672908609810706, "loss": 0.9966, "step": 332 }, { "epoch": 0.13539337263671478, "grad_norm": 0.09018311649560928, "learning_rate": 0.00018668837777325464, "loss": 0.965, "step": 333 }, { "epoch": 0.13579995934132955, "grad_norm": 0.10296136885881424, "learning_rate": 0.00018664766944840222, "loss": 1.1011, "step": 334 }, { "epoch": 0.1362065460459443, "grad_norm": 0.09104129672050476, "learning_rate": 0.00018660696112354977, "loss": 0.9814, "step": 335 }, { "epoch": 0.13661313275055906, "grad_norm": 0.09881450235843658, "learning_rate": 0.00018656625279869736, "loss": 1.0989, "step": 336 }, { "epoch": 0.13701971945517383, "grad_norm": 0.09691241383552551, "learning_rate": 0.0001865255444738449, "loss": 1.0967, "step": 337 }, { "epoch": 0.13742630615978857, "grad_norm": 0.10152243077754974, "learning_rate": 0.0001864848361489925, "loss": 1.0951, "step": 338 }, { "epoch": 0.13783289286440334, "grad_norm": 0.10802541673183441, "learning_rate": 0.00018644412782414005, "loss": 0.8742, "step": 339 }, { "epoch": 0.13823947956901808, "grad_norm": 0.09942565858364105, "learning_rate": 0.0001864034194992876, "loss": 0.9961, "step": 340 }, { "epoch": 0.13864606627363285, "grad_norm": 0.08618199825286865, "learning_rate": 0.00018636271117443518, "loss": 0.9645, "step": 341 }, { "epoch": 0.13905265297824762, "grad_norm": 0.1056099608540535, "learning_rate": 0.00018632200284958273, "loss": 0.9885, "step": 342 }, { "epoch": 0.13945923968286236, "grad_norm": 0.08862382173538208, "learning_rate": 0.00018628129452473032, "loss": 0.9316, "step": 343 }, { "epoch": 0.13986582638747713, "grad_norm": 0.09923135489225388, "learning_rate": 0.00018624058619987787, "loss": 0.9959, "step": 344 }, { "epoch": 0.1402724130920919, "grad_norm": 0.09120538830757141, "learning_rate": 0.00018619987787502545, "loss": 0.968, "step": 345 }, { "epoch": 0.14067899979670664, "grad_norm": 0.09669141471385956, "learning_rate": 0.00018615916955017303, "loss": 1.085, "step": 346 }, { "epoch": 0.1410855865013214, "grad_norm": 0.08598754554986954, "learning_rate": 0.00018611846122532059, "loss": 0.9504, "step": 347 }, { "epoch": 0.14149217320593616, "grad_norm": 0.09238371253013611, "learning_rate": 0.00018607775290046817, "loss": 0.9742, "step": 348 }, { "epoch": 0.14189875991055093, "grad_norm": 0.091258205473423, "learning_rate": 0.00018603704457561572, "loss": 0.9341, "step": 349 }, { "epoch": 0.1423053466151657, "grad_norm": 0.10129548609256744, "learning_rate": 0.0001859963362507633, "loss": 1.0814, "step": 350 }, { "epoch": 0.14271193331978044, "grad_norm": 0.09523019194602966, "learning_rate": 0.00018595562792591086, "loss": 0.9848, "step": 351 }, { "epoch": 0.1431185200243952, "grad_norm": 0.09485248476266861, "learning_rate": 0.0001859149196010584, "loss": 0.9828, "step": 352 }, { "epoch": 0.14352510672900995, "grad_norm": 0.09963666647672653, "learning_rate": 0.000185874211276206, "loss": 1.1075, "step": 353 }, { "epoch": 0.14393169343362472, "grad_norm": 0.09067155420780182, "learning_rate": 0.00018583350295135355, "loss": 0.971, "step": 354 }, { "epoch": 0.1443382801382395, "grad_norm": 0.09153544157743454, "learning_rate": 0.00018579279462650113, "loss": 0.9405, "step": 355 }, { "epoch": 0.14474486684285423, "grad_norm": 0.1024472787976265, "learning_rate": 0.00018575208630164868, "loss": 0.9967, "step": 356 }, { "epoch": 0.145151453547469, "grad_norm": 0.09804495424032211, "learning_rate": 0.00018571137797679626, "loss": 0.9578, "step": 357 }, { "epoch": 0.14555804025208377, "grad_norm": 0.099054716527462, "learning_rate": 0.00018567066965194384, "loss": 0.9999, "step": 358 }, { "epoch": 0.1459646269566985, "grad_norm": 0.09781336784362793, "learning_rate": 0.0001856299613270914, "loss": 1.09, "step": 359 }, { "epoch": 0.14637121366131328, "grad_norm": 0.08993211388587952, "learning_rate": 0.00018558925300223898, "loss": 1.0719, "step": 360 }, { "epoch": 0.14677780036592802, "grad_norm": 0.09146003425121307, "learning_rate": 0.00018554854467738653, "loss": 1.0008, "step": 361 }, { "epoch": 0.1471843870705428, "grad_norm": 0.09643495827913284, "learning_rate": 0.00018550783635253411, "loss": 1.0791, "step": 362 }, { "epoch": 0.14759097377515756, "grad_norm": 0.09078676998615265, "learning_rate": 0.00018546712802768167, "loss": 0.8641, "step": 363 }, { "epoch": 0.1479975604797723, "grad_norm": 0.08719085901975632, "learning_rate": 0.00018542641970282922, "loss": 0.985, "step": 364 }, { "epoch": 0.14840414718438708, "grad_norm": 0.09189736843109131, "learning_rate": 0.0001853857113779768, "loss": 0.9638, "step": 365 }, { "epoch": 0.14881073388900182, "grad_norm": 0.09381456673145294, "learning_rate": 0.00018534500305312436, "loss": 1.0036, "step": 366 }, { "epoch": 0.1492173205936166, "grad_norm": 0.0922684445977211, "learning_rate": 0.00018530429472827194, "loss": 1.0391, "step": 367 }, { "epoch": 0.14962390729823136, "grad_norm": 0.09465248882770538, "learning_rate": 0.0001852635864034195, "loss": 0.8874, "step": 368 }, { "epoch": 0.1500304940028461, "grad_norm": 0.0938408225774765, "learning_rate": 0.00018522287807856707, "loss": 1.0269, "step": 369 }, { "epoch": 0.15043708070746087, "grad_norm": 0.09377933293581009, "learning_rate": 0.00018518216975371466, "loss": 1.0142, "step": 370 }, { "epoch": 0.15084366741207564, "grad_norm": 0.1117277517914772, "learning_rate": 0.0001851414614288622, "loss": 1.0371, "step": 371 }, { "epoch": 0.15125025411669038, "grad_norm": 0.10293183475732803, "learning_rate": 0.0001851007531040098, "loss": 1.0, "step": 372 }, { "epoch": 0.15165684082130515, "grad_norm": 0.09216313809156418, "learning_rate": 0.00018506004477915734, "loss": 0.9703, "step": 373 }, { "epoch": 0.1520634275259199, "grad_norm": 0.09088669717311859, "learning_rate": 0.00018501933645430493, "loss": 0.8766, "step": 374 }, { "epoch": 0.15247001423053466, "grad_norm": 0.09916643798351288, "learning_rate": 0.00018497862812945248, "loss": 1.0958, "step": 375 }, { "epoch": 0.15287660093514943, "grad_norm": 0.08404985070228577, "learning_rate": 0.00018493791980460003, "loss": 0.9602, "step": 376 }, { "epoch": 0.15328318763976417, "grad_norm": 0.10011377185583115, "learning_rate": 0.00018489721147974762, "loss": 1.0377, "step": 377 }, { "epoch": 0.15368977434437894, "grad_norm": 0.09958089143037796, "learning_rate": 0.00018485650315489517, "loss": 1.0213, "step": 378 }, { "epoch": 0.15409636104899369, "grad_norm": 0.09488838911056519, "learning_rate": 0.00018481579483004275, "loss": 0.941, "step": 379 }, { "epoch": 0.15450294775360846, "grad_norm": 0.09099314361810684, "learning_rate": 0.00018477508650519033, "loss": 0.8913, "step": 380 }, { "epoch": 0.15490953445822322, "grad_norm": 0.0956854447722435, "learning_rate": 0.00018473437818033789, "loss": 1.1478, "step": 381 }, { "epoch": 0.15531612116283797, "grad_norm": 0.11225584149360657, "learning_rate": 0.00018469366985548547, "loss": 1.0795, "step": 382 }, { "epoch": 0.15572270786745274, "grad_norm": 0.11592987924814224, "learning_rate": 0.00018465296153063302, "loss": 1.0863, "step": 383 }, { "epoch": 0.1561292945720675, "grad_norm": 0.09232570976018906, "learning_rate": 0.0001846122532057806, "loss": 0.9551, "step": 384 }, { "epoch": 0.15653588127668225, "grad_norm": 0.08860056847333908, "learning_rate": 0.00018457154488092816, "loss": 1.0206, "step": 385 }, { "epoch": 0.15694246798129702, "grad_norm": 0.10788331180810928, "learning_rate": 0.00018453083655607574, "loss": 0.9378, "step": 386 }, { "epoch": 0.15734905468591176, "grad_norm": 0.10758615285158157, "learning_rate": 0.0001844901282312233, "loss": 1.1149, "step": 387 }, { "epoch": 0.15775564139052653, "grad_norm": 0.10551386326551437, "learning_rate": 0.00018444941990637085, "loss": 1.0729, "step": 388 }, { "epoch": 0.1581622280951413, "grad_norm": 0.08733198046684265, "learning_rate": 0.00018440871158151843, "loss": 1.0058, "step": 389 }, { "epoch": 0.15856881479975604, "grad_norm": 0.1095399409532547, "learning_rate": 0.00018436800325666598, "loss": 1.0566, "step": 390 }, { "epoch": 0.1589754015043708, "grad_norm": 0.12356330454349518, "learning_rate": 0.00018432729493181356, "loss": 1.0173, "step": 391 }, { "epoch": 0.15938198820898555, "grad_norm": 0.09934639930725098, "learning_rate": 0.00018428658660696114, "loss": 1.1237, "step": 392 }, { "epoch": 0.15978857491360032, "grad_norm": 0.09402013570070267, "learning_rate": 0.0001842458782821087, "loss": 1.0018, "step": 393 }, { "epoch": 0.1601951616182151, "grad_norm": 0.10511749237775803, "learning_rate": 0.00018420516995725628, "loss": 0.9844, "step": 394 }, { "epoch": 0.16060174832282983, "grad_norm": 0.11193688213825226, "learning_rate": 0.00018416446163240383, "loss": 0.9888, "step": 395 }, { "epoch": 0.1610083350274446, "grad_norm": 0.09895443916320801, "learning_rate": 0.00018412375330755141, "loss": 1.1045, "step": 396 }, { "epoch": 0.16141492173205937, "grad_norm": 0.09660319238901138, "learning_rate": 0.00018408304498269897, "loss": 1.0457, "step": 397 }, { "epoch": 0.16182150843667412, "grad_norm": 0.1339186728000641, "learning_rate": 0.00018404233665784655, "loss": 1.1266, "step": 398 }, { "epoch": 0.16222809514128889, "grad_norm": 0.1154564693570137, "learning_rate": 0.0001840016283329941, "loss": 1.0299, "step": 399 }, { "epoch": 0.16263468184590363, "grad_norm": 0.09698904305696487, "learning_rate": 0.00018396092000814166, "loss": 1.1101, "step": 400 }, { "epoch": 0.1630412685505184, "grad_norm": 0.09455164521932602, "learning_rate": 0.00018392021168328924, "loss": 0.9928, "step": 401 }, { "epoch": 0.16344785525513317, "grad_norm": 0.09728690981864929, "learning_rate": 0.0001838795033584368, "loss": 1.0603, "step": 402 }, { "epoch": 0.1638544419597479, "grad_norm": 0.10577269643545151, "learning_rate": 0.0001838387950335844, "loss": 0.9922, "step": 403 }, { "epoch": 0.16426102866436268, "grad_norm": 0.08850935101509094, "learning_rate": 0.00018379808670873196, "loss": 0.9758, "step": 404 }, { "epoch": 0.16466761536897742, "grad_norm": 0.09496256709098816, "learning_rate": 0.0001837573783838795, "loss": 1.0949, "step": 405 }, { "epoch": 0.1650742020735922, "grad_norm": 0.09768050909042358, "learning_rate": 0.0001837166700590271, "loss": 1.0054, "step": 406 }, { "epoch": 0.16548078877820696, "grad_norm": 0.09913921356201172, "learning_rate": 0.00018367596173417464, "loss": 1.0272, "step": 407 }, { "epoch": 0.1658873754828217, "grad_norm": 0.0901927724480629, "learning_rate": 0.00018363525340932223, "loss": 1.0264, "step": 408 }, { "epoch": 0.16629396218743647, "grad_norm": 0.09796515852212906, "learning_rate": 0.00018359454508446978, "loss": 1.0338, "step": 409 }, { "epoch": 0.16670054889205124, "grad_norm": 0.1018638014793396, "learning_rate": 0.00018355383675961736, "loss": 1.0409, "step": 410 }, { "epoch": 0.16710713559666598, "grad_norm": 0.10666611790657043, "learning_rate": 0.00018351312843476492, "loss": 1.0924, "step": 411 }, { "epoch": 0.16751372230128075, "grad_norm": 0.0986141785979271, "learning_rate": 0.00018347242010991247, "loss": 0.9468, "step": 412 }, { "epoch": 0.1679203090058955, "grad_norm": 0.09429168701171875, "learning_rate": 0.00018343171178506005, "loss": 0.9706, "step": 413 }, { "epoch": 0.16832689571051027, "grad_norm": 0.09704872965812683, "learning_rate": 0.0001833910034602076, "loss": 1.0692, "step": 414 }, { "epoch": 0.16873348241512504, "grad_norm": 0.0980519950389862, "learning_rate": 0.00018335029513535519, "loss": 1.0218, "step": 415 }, { "epoch": 0.16914006911973978, "grad_norm": 0.08980212360620499, "learning_rate": 0.00018330958681050277, "loss": 0.9243, "step": 416 }, { "epoch": 0.16954665582435455, "grad_norm": 0.09630506485700607, "learning_rate": 0.00018326887848565032, "loss": 0.9599, "step": 417 }, { "epoch": 0.1699532425289693, "grad_norm": 0.08608522266149521, "learning_rate": 0.0001832281701607979, "loss": 0.9577, "step": 418 }, { "epoch": 0.17035982923358406, "grad_norm": 0.09151248633861542, "learning_rate": 0.00018318746183594546, "loss": 0.9956, "step": 419 }, { "epoch": 0.17076641593819883, "grad_norm": 0.09689094871282578, "learning_rate": 0.00018314675351109304, "loss": 1.0999, "step": 420 }, { "epoch": 0.17117300264281357, "grad_norm": 0.09316612035036087, "learning_rate": 0.0001831060451862406, "loss": 0.8572, "step": 421 }, { "epoch": 0.17157958934742834, "grad_norm": 0.11449979990720749, "learning_rate": 0.00018306533686138817, "loss": 1.0328, "step": 422 }, { "epoch": 0.1719861760520431, "grad_norm": 0.10802194476127625, "learning_rate": 0.00018302462853653573, "loss": 0.9785, "step": 423 }, { "epoch": 0.17239276275665785, "grad_norm": 0.09997294098138809, "learning_rate": 0.00018298392021168328, "loss": 0.9778, "step": 424 }, { "epoch": 0.17279934946127262, "grad_norm": 0.10244690626859665, "learning_rate": 0.00018294321188683086, "loss": 1.0874, "step": 425 }, { "epoch": 0.17320593616588736, "grad_norm": 0.10659472644329071, "learning_rate": 0.00018290250356197844, "loss": 1.0196, "step": 426 }, { "epoch": 0.17361252287050213, "grad_norm": 0.09812036156654358, "learning_rate": 0.000182861795237126, "loss": 0.9051, "step": 427 }, { "epoch": 0.1740191095751169, "grad_norm": 0.845235288143158, "learning_rate": 0.00018282108691227358, "loss": 1.0531, "step": 428 }, { "epoch": 0.17442569627973165, "grad_norm": 0.109995998442173, "learning_rate": 0.00018278037858742113, "loss": 1.001, "step": 429 }, { "epoch": 0.17483228298434642, "grad_norm": 0.12578758597373962, "learning_rate": 0.00018273967026256871, "loss": 0.9513, "step": 430 }, { "epoch": 0.17523886968896116, "grad_norm": 0.1585826873779297, "learning_rate": 0.00018269896193771627, "loss": 1.0091, "step": 431 }, { "epoch": 0.17564545639357593, "grad_norm": 0.15150819718837738, "learning_rate": 0.00018265825361286385, "loss": 1.1045, "step": 432 }, { "epoch": 0.1760520430981907, "grad_norm": 0.1110219806432724, "learning_rate": 0.0001826175452880114, "loss": 0.9877, "step": 433 }, { "epoch": 0.17645862980280544, "grad_norm": 0.11296675354242325, "learning_rate": 0.00018257683696315896, "loss": 1.1317, "step": 434 }, { "epoch": 0.1768652165074202, "grad_norm": 0.11464451253414154, "learning_rate": 0.00018253612863830654, "loss": 0.9485, "step": 435 }, { "epoch": 0.17727180321203498, "grad_norm": 0.08836513012647629, "learning_rate": 0.0001824954203134541, "loss": 0.8667, "step": 436 }, { "epoch": 0.17767838991664972, "grad_norm": 0.10697431862354279, "learning_rate": 0.00018245471198860167, "loss": 1.0692, "step": 437 }, { "epoch": 0.1780849766212645, "grad_norm": 0.10565032064914703, "learning_rate": 0.00018241400366374925, "loss": 1.0723, "step": 438 }, { "epoch": 0.17849156332587923, "grad_norm": 0.11343531310558319, "learning_rate": 0.0001823732953388968, "loss": 1.1038, "step": 439 }, { "epoch": 0.178898150030494, "grad_norm": 0.10002034902572632, "learning_rate": 0.0001823325870140444, "loss": 0.9859, "step": 440 }, { "epoch": 0.17930473673510877, "grad_norm": 0.10602378845214844, "learning_rate": 0.00018229187868919194, "loss": 1.1091, "step": 441 }, { "epoch": 0.1797113234397235, "grad_norm": 0.09775001555681229, "learning_rate": 0.00018225117036433953, "loss": 1.0473, "step": 442 }, { "epoch": 0.18011791014433828, "grad_norm": 0.09872320294380188, "learning_rate": 0.00018221046203948708, "loss": 1.0657, "step": 443 }, { "epoch": 0.18052449684895303, "grad_norm": 0.0893816128373146, "learning_rate": 0.00018216975371463466, "loss": 0.915, "step": 444 }, { "epoch": 0.1809310835535678, "grad_norm": 0.09870447218418121, "learning_rate": 0.00018212904538978221, "loss": 0.8847, "step": 445 }, { "epoch": 0.18133767025818257, "grad_norm": 0.09775330871343613, "learning_rate": 0.00018208833706492977, "loss": 0.841, "step": 446 }, { "epoch": 0.1817442569627973, "grad_norm": 0.10025996714830399, "learning_rate": 0.00018204762874007735, "loss": 0.9965, "step": 447 }, { "epoch": 0.18215084366741208, "grad_norm": 0.09369905292987823, "learning_rate": 0.0001820069204152249, "loss": 0.9998, "step": 448 }, { "epoch": 0.18255743037202685, "grad_norm": 0.09244808554649353, "learning_rate": 0.0001819662120903725, "loss": 0.9938, "step": 449 }, { "epoch": 0.1829640170766416, "grad_norm": 0.12163155525922775, "learning_rate": 0.00018192550376552007, "loss": 1.1384, "step": 450 }, { "epoch": 0.18337060378125636, "grad_norm": 0.08755457401275635, "learning_rate": 0.00018188479544066762, "loss": 0.9002, "step": 451 }, { "epoch": 0.1837771904858711, "grad_norm": 0.0917607769370079, "learning_rate": 0.0001818440871158152, "loss": 0.9874, "step": 452 }, { "epoch": 0.18418377719048587, "grad_norm": 0.09113719314336777, "learning_rate": 0.00018180337879096276, "loss": 1.0187, "step": 453 }, { "epoch": 0.18459036389510064, "grad_norm": 0.08795943111181259, "learning_rate": 0.00018176267046611034, "loss": 0.902, "step": 454 }, { "epoch": 0.18499695059971538, "grad_norm": 0.1016731783747673, "learning_rate": 0.0001817219621412579, "loss": 0.9933, "step": 455 }, { "epoch": 0.18540353730433015, "grad_norm": 0.09413068741559982, "learning_rate": 0.00018168125381640547, "loss": 0.9448, "step": 456 }, { "epoch": 0.18581012400894492, "grad_norm": 0.10015012323856354, "learning_rate": 0.00018164054549155303, "loss": 1.1458, "step": 457 }, { "epoch": 0.18621671071355966, "grad_norm": 0.09086768329143524, "learning_rate": 0.00018159983716670058, "loss": 1.0543, "step": 458 }, { "epoch": 0.18662329741817443, "grad_norm": 0.10910352319478989, "learning_rate": 0.00018155912884184816, "loss": 1.0078, "step": 459 }, { "epoch": 0.18702988412278918, "grad_norm": 0.09674135595560074, "learning_rate": 0.00018151842051699572, "loss": 0.9758, "step": 460 }, { "epoch": 0.18743647082740394, "grad_norm": 0.09108126163482666, "learning_rate": 0.00018147771219214332, "loss": 1.0038, "step": 461 }, { "epoch": 0.18784305753201871, "grad_norm": 0.09710326045751572, "learning_rate": 0.00018143700386729088, "loss": 0.9693, "step": 462 }, { "epoch": 0.18824964423663346, "grad_norm": 0.10069318860769272, "learning_rate": 0.00018139629554243843, "loss": 1.1005, "step": 463 }, { "epoch": 0.18865623094124823, "grad_norm": 0.09434141218662262, "learning_rate": 0.000181355587217586, "loss": 1.0359, "step": 464 }, { "epoch": 0.18906281764586297, "grad_norm": 0.09208261221647263, "learning_rate": 0.00018131487889273357, "loss": 1.0374, "step": 465 }, { "epoch": 0.18946940435047774, "grad_norm": 0.09581121802330017, "learning_rate": 0.00018127417056788115, "loss": 1.0267, "step": 466 }, { "epoch": 0.1898759910550925, "grad_norm": 0.09809669107198715, "learning_rate": 0.0001812334622430287, "loss": 1.0652, "step": 467 }, { "epoch": 0.19028257775970725, "grad_norm": 0.08496394008398056, "learning_rate": 0.00018119275391817628, "loss": 0.9468, "step": 468 }, { "epoch": 0.19068916446432202, "grad_norm": 0.09247399121522903, "learning_rate": 0.00018115204559332384, "loss": 1.0247, "step": 469 }, { "epoch": 0.1910957511689368, "grad_norm": 0.10010971128940582, "learning_rate": 0.0001811113372684714, "loss": 0.9674, "step": 470 }, { "epoch": 0.19150233787355153, "grad_norm": 0.09562191367149353, "learning_rate": 0.00018107062894361897, "loss": 0.9819, "step": 471 }, { "epoch": 0.1919089245781663, "grad_norm": 0.09223975241184235, "learning_rate": 0.00018102992061876655, "loss": 1.0051, "step": 472 }, { "epoch": 0.19231551128278104, "grad_norm": 0.09564565122127533, "learning_rate": 0.00018098921229391414, "loss": 0.908, "step": 473 }, { "epoch": 0.1927220979873958, "grad_norm": 0.09371364116668701, "learning_rate": 0.0001809485039690617, "loss": 1.0195, "step": 474 }, { "epoch": 0.19312868469201058, "grad_norm": 0.0895533412694931, "learning_rate": 0.00018090779564420924, "loss": 0.8912, "step": 475 }, { "epoch": 0.19353527139662532, "grad_norm": 0.08874888718128204, "learning_rate": 0.00018086708731935682, "loss": 0.9941, "step": 476 }, { "epoch": 0.1939418581012401, "grad_norm": 8989.1748046875, "learning_rate": 0.00018082637899450438, "loss": 1.0191, "step": 477 }, { "epoch": 0.19434844480585484, "grad_norm": 0.09893982112407684, "learning_rate": 0.00018078567066965196, "loss": 1.1682, "step": 478 }, { "epoch": 0.1947550315104696, "grad_norm": 0.09100797772407532, "learning_rate": 0.00018074496234479951, "loss": 0.9466, "step": 479 }, { "epoch": 0.19516161821508438, "grad_norm": 0.10540256649255753, "learning_rate": 0.0001807042540199471, "loss": 1.0735, "step": 480 }, { "epoch": 0.19556820491969912, "grad_norm": 0.09110235422849655, "learning_rate": 0.00018066354569509465, "loss": 1.0097, "step": 481 }, { "epoch": 0.1959747916243139, "grad_norm": 0.10651825368404388, "learning_rate": 0.0001806228373702422, "loss": 1.014, "step": 482 }, { "epoch": 0.19638137832892866, "grad_norm": 0.08685674518346786, "learning_rate": 0.00018058212904538978, "loss": 0.9755, "step": 483 }, { "epoch": 0.1967879650335434, "grad_norm": 0.10092045366764069, "learning_rate": 0.00018054142072053737, "loss": 0.9397, "step": 484 }, { "epoch": 0.19719455173815817, "grad_norm": 0.1056622639298439, "learning_rate": 0.00018050071239568495, "loss": 0.9864, "step": 485 }, { "epoch": 0.1976011384427729, "grad_norm": 0.10525202006101608, "learning_rate": 0.0001804600040708325, "loss": 1.1085, "step": 486 }, { "epoch": 0.19800772514738768, "grad_norm": 0.10073073953390121, "learning_rate": 0.00018041929574598006, "loss": 1.1264, "step": 487 }, { "epoch": 0.19841431185200245, "grad_norm": 0.09659091383218765, "learning_rate": 0.00018037858742112764, "loss": 0.9848, "step": 488 }, { "epoch": 0.1988208985566172, "grad_norm": 0.09986629337072372, "learning_rate": 0.0001803378790962752, "loss": 1.0732, "step": 489 }, { "epoch": 0.19922748526123196, "grad_norm": 0.11215290427207947, "learning_rate": 0.00018029717077142277, "loss": 1.1259, "step": 490 }, { "epoch": 0.1996340719658467, "grad_norm": 0.11136343330144882, "learning_rate": 0.00018025646244657033, "loss": 1.0857, "step": 491 }, { "epoch": 0.20004065867046147, "grad_norm": 0.10452030599117279, "learning_rate": 0.0001802157541217179, "loss": 0.9997, "step": 492 }, { "epoch": 0.20044724537507624, "grad_norm": 0.10394178330898285, "learning_rate": 0.00018017504579686546, "loss": 1.0852, "step": 493 }, { "epoch": 0.20085383207969099, "grad_norm": 0.10206598043441772, "learning_rate": 0.00018013433747201302, "loss": 0.9629, "step": 494 }, { "epoch": 0.20126041878430576, "grad_norm": 0.09365608543157578, "learning_rate": 0.00018009362914716062, "loss": 0.9504, "step": 495 }, { "epoch": 0.20166700548892053, "grad_norm": 0.09425178170204163, "learning_rate": 0.00018005292082230818, "loss": 1.0038, "step": 496 }, { "epoch": 0.20207359219353527, "grad_norm": 0.09562011808156967, "learning_rate": 0.00018001221249745576, "loss": 1.0877, "step": 497 }, { "epoch": 0.20248017889815004, "grad_norm": 0.11452426016330719, "learning_rate": 0.0001799715041726033, "loss": 1.0688, "step": 498 }, { "epoch": 0.20288676560276478, "grad_norm": 0.0930696651339531, "learning_rate": 0.00017993079584775087, "loss": 1.0255, "step": 499 }, { "epoch": 0.20329335230737955, "grad_norm": 0.10522327572107315, "learning_rate": 0.00017989008752289845, "loss": 1.085, "step": 500 }, { "epoch": 0.20369993901199432, "grad_norm": 0.08499190211296082, "learning_rate": 0.000179849379198046, "loss": 0.9235, "step": 501 }, { "epoch": 0.20410652571660906, "grad_norm": 0.09169955551624298, "learning_rate": 0.00017980867087319358, "loss": 0.9836, "step": 502 }, { "epoch": 0.20451311242122383, "grad_norm": 0.10331466048955917, "learning_rate": 0.00017976796254834114, "loss": 1.0255, "step": 503 }, { "epoch": 0.20491969912583857, "grad_norm": 0.0900363028049469, "learning_rate": 0.00017972725422348872, "loss": 0.9691, "step": 504 }, { "epoch": 0.20532628583045334, "grad_norm": 0.10095544904470444, "learning_rate": 0.00017968654589863627, "loss": 1.0289, "step": 505 }, { "epoch": 0.2057328725350681, "grad_norm": 0.0992627814412117, "learning_rate": 0.00017964583757378383, "loss": 0.9785, "step": 506 }, { "epoch": 0.20613945923968285, "grad_norm": 0.0954422652721405, "learning_rate": 0.00017960512924893144, "loss": 1.0105, "step": 507 }, { "epoch": 0.20654604594429762, "grad_norm": 0.0994410440325737, "learning_rate": 0.000179564420924079, "loss": 1.0894, "step": 508 }, { "epoch": 0.2069526326489124, "grad_norm": 0.08866444230079651, "learning_rate": 0.00017952371259922654, "loss": 0.9725, "step": 509 }, { "epoch": 0.20735921935352714, "grad_norm": 0.09361348301172256, "learning_rate": 0.00017948300427437412, "loss": 1.0441, "step": 510 }, { "epoch": 0.2077658060581419, "grad_norm": 0.08215323090553284, "learning_rate": 0.00017944229594952168, "loss": 0.9214, "step": 511 }, { "epoch": 0.20817239276275665, "grad_norm": 0.09752262383699417, "learning_rate": 0.00017940158762466926, "loss": 0.9456, "step": 512 }, { "epoch": 0.20857897946737142, "grad_norm": 0.10021419823169708, "learning_rate": 0.00017936087929981681, "loss": 1.1158, "step": 513 }, { "epoch": 0.2089855661719862, "grad_norm": 0.09550227969884872, "learning_rate": 0.0001793201709749644, "loss": 0.9789, "step": 514 }, { "epoch": 0.20939215287660093, "grad_norm": 0.09059977531433105, "learning_rate": 0.00017927946265011195, "loss": 0.9649, "step": 515 }, { "epoch": 0.2097987395812157, "grad_norm": 0.09227627515792847, "learning_rate": 0.00017923875432525953, "loss": 0.9779, "step": 516 }, { "epoch": 0.21020532628583044, "grad_norm": 0.09919798374176025, "learning_rate": 0.00017919804600040708, "loss": 1.0155, "step": 517 }, { "epoch": 0.2106119129904452, "grad_norm": 0.09044051915407181, "learning_rate": 0.00017915733767555464, "loss": 0.9428, "step": 518 }, { "epoch": 0.21101849969505998, "grad_norm": 0.09017504006624222, "learning_rate": 0.00017911662935070225, "loss": 0.9244, "step": 519 }, { "epoch": 0.21142508639967472, "grad_norm": 0.09257036447525024, "learning_rate": 0.0001790759210258498, "loss": 1.0168, "step": 520 }, { "epoch": 0.2118316731042895, "grad_norm": 0.0926235020160675, "learning_rate": 0.00017903521270099735, "loss": 0.9363, "step": 521 }, { "epoch": 0.21223825980890426, "grad_norm": 0.08785069733858109, "learning_rate": 0.00017899450437614494, "loss": 0.9428, "step": 522 }, { "epoch": 0.212644846513519, "grad_norm": 0.09824348986148834, "learning_rate": 0.0001789537960512925, "loss": 1.0378, "step": 523 }, { "epoch": 0.21305143321813377, "grad_norm": 0.0915142148733139, "learning_rate": 0.00017891308772644007, "loss": 0.9603, "step": 524 }, { "epoch": 0.21345801992274852, "grad_norm": 0.09466978907585144, "learning_rate": 0.00017887237940158763, "loss": 1.013, "step": 525 }, { "epoch": 0.21386460662736329, "grad_norm": 0.09305880963802338, "learning_rate": 0.0001788316710767352, "loss": 0.9386, "step": 526 }, { "epoch": 0.21427119333197805, "grad_norm": 0.09210691601037979, "learning_rate": 0.00017879096275188276, "loss": 0.9797, "step": 527 }, { "epoch": 0.2146777800365928, "grad_norm": 0.10415366291999817, "learning_rate": 0.00017875025442703031, "loss": 1.0125, "step": 528 }, { "epoch": 0.21508436674120757, "grad_norm": 0.10259640216827393, "learning_rate": 0.0001787095461021779, "loss": 1.0473, "step": 529 }, { "epoch": 0.2154909534458223, "grad_norm": 0.09523239731788635, "learning_rate": 0.00017866883777732548, "loss": 0.9603, "step": 530 }, { "epoch": 0.21589754015043708, "grad_norm": 0.10005185008049011, "learning_rate": 0.00017862812945247306, "loss": 1.0768, "step": 531 }, { "epoch": 0.21630412685505185, "grad_norm": 0.09643250703811646, "learning_rate": 0.0001785874211276206, "loss": 1.0799, "step": 532 }, { "epoch": 0.2167107135596666, "grad_norm": 0.09473159909248352, "learning_rate": 0.00017854671280276817, "loss": 1.0657, "step": 533 }, { "epoch": 0.21711730026428136, "grad_norm": 0.09550385922193527, "learning_rate": 0.00017850600447791575, "loss": 1.0389, "step": 534 }, { "epoch": 0.21752388696889613, "grad_norm": 0.09414463490247726, "learning_rate": 0.0001784652961530633, "loss": 1.0317, "step": 535 }, { "epoch": 0.21793047367351087, "grad_norm": 0.090250164270401, "learning_rate": 0.00017842458782821088, "loss": 1.0212, "step": 536 }, { "epoch": 0.21833706037812564, "grad_norm": 0.09635050594806671, "learning_rate": 0.00017838387950335844, "loss": 0.9473, "step": 537 }, { "epoch": 0.21874364708274038, "grad_norm": 0.0985347330570221, "learning_rate": 0.00017834317117850602, "loss": 1.1372, "step": 538 }, { "epoch": 0.21915023378735515, "grad_norm": 0.09789203107357025, "learning_rate": 0.00017830246285365357, "loss": 1.0369, "step": 539 }, { "epoch": 0.21955682049196992, "grad_norm": 0.09777568280696869, "learning_rate": 0.00017826175452880113, "loss": 1.0746, "step": 540 }, { "epoch": 0.21996340719658466, "grad_norm": 0.09013503789901733, "learning_rate": 0.0001782210462039487, "loss": 1.0124, "step": 541 }, { "epoch": 0.22036999390119943, "grad_norm": 0.10604355484247208, "learning_rate": 0.0001781803378790963, "loss": 1.0158, "step": 542 }, { "epoch": 0.22077658060581418, "grad_norm": 0.09194648265838623, "learning_rate": 0.00017813962955424387, "loss": 0.9544, "step": 543 }, { "epoch": 0.22118316731042895, "grad_norm": 0.09223110228776932, "learning_rate": 0.00017809892122939142, "loss": 1.0094, "step": 544 }, { "epoch": 0.22158975401504372, "grad_norm": 0.09049870073795319, "learning_rate": 0.00017805821290453898, "loss": 0.8829, "step": 545 }, { "epoch": 0.22199634071965846, "grad_norm": 0.10157813131809235, "learning_rate": 0.00017801750457968656, "loss": 1.0904, "step": 546 }, { "epoch": 0.22240292742427323, "grad_norm": 0.09934356063604355, "learning_rate": 0.0001779767962548341, "loss": 1.0708, "step": 547 }, { "epoch": 0.222809514128888, "grad_norm": 0.09037156403064728, "learning_rate": 0.0001779360879299817, "loss": 0.916, "step": 548 }, { "epoch": 0.22321610083350274, "grad_norm": 0.09347829967737198, "learning_rate": 0.00017789537960512925, "loss": 1.0328, "step": 549 }, { "epoch": 0.2236226875381175, "grad_norm": 0.087796151638031, "learning_rate": 0.00017785467128027683, "loss": 0.9961, "step": 550 }, { "epoch": 0.22402927424273225, "grad_norm": 0.09518422931432724, "learning_rate": 0.00017781396295542438, "loss": 0.9855, "step": 551 }, { "epoch": 0.22443586094734702, "grad_norm": 0.09606748074293137, "learning_rate": 0.00017777325463057194, "loss": 0.954, "step": 552 }, { "epoch": 0.2248424476519618, "grad_norm": 0.09338165074586868, "learning_rate": 0.00017773254630571955, "loss": 1.0876, "step": 553 }, { "epoch": 0.22524903435657653, "grad_norm": 0.09242440015077591, "learning_rate": 0.0001776918379808671, "loss": 0.9418, "step": 554 }, { "epoch": 0.2256556210611913, "grad_norm": 0.0990302637219429, "learning_rate": 0.00017765112965601468, "loss": 1.0641, "step": 555 }, { "epoch": 0.22606220776580604, "grad_norm": 0.09444238990545273, "learning_rate": 0.00017761042133116224, "loss": 1.0315, "step": 556 }, { "epoch": 0.22646879447042081, "grad_norm": 0.08771083503961563, "learning_rate": 0.0001775697130063098, "loss": 0.9898, "step": 557 }, { "epoch": 0.22687538117503558, "grad_norm": 0.10041147470474243, "learning_rate": 0.00017752900468145737, "loss": 1.0478, "step": 558 }, { "epoch": 0.22728196787965033, "grad_norm": 0.0933571383357048, "learning_rate": 0.00017748829635660492, "loss": 1.0002, "step": 559 }, { "epoch": 0.2276885545842651, "grad_norm": 0.0912991389632225, "learning_rate": 0.0001774475880317525, "loss": 1.0807, "step": 560 }, { "epoch": 0.22809514128887987, "grad_norm": 0.09350984543561935, "learning_rate": 0.00017740687970690006, "loss": 0.8962, "step": 561 }, { "epoch": 0.2285017279934946, "grad_norm": 0.0978541299700737, "learning_rate": 0.00017736617138204764, "loss": 1.0339, "step": 562 }, { "epoch": 0.22890831469810938, "grad_norm": 0.08964958041906357, "learning_rate": 0.0001773254630571952, "loss": 1.051, "step": 563 }, { "epoch": 0.22931490140272412, "grad_norm": 0.09241898357868195, "learning_rate": 0.00017728475473234275, "loss": 0.903, "step": 564 }, { "epoch": 0.2297214881073389, "grad_norm": 0.09366483986377716, "learning_rate": 0.00017724404640749036, "loss": 1.0055, "step": 565 }, { "epoch": 0.23012807481195366, "grad_norm": 0.10184673964977264, "learning_rate": 0.0001772033380826379, "loss": 1.004, "step": 566 }, { "epoch": 0.2305346615165684, "grad_norm": 0.09287306666374207, "learning_rate": 0.0001771626297577855, "loss": 0.9667, "step": 567 }, { "epoch": 0.23094124822118317, "grad_norm": 0.08905091136693954, "learning_rate": 0.00017712192143293305, "loss": 0.9295, "step": 568 }, { "epoch": 0.2313478349257979, "grad_norm": 0.0908786877989769, "learning_rate": 0.0001770812131080806, "loss": 0.8957, "step": 569 }, { "epoch": 0.23175442163041268, "grad_norm": 0.10284281522035599, "learning_rate": 0.00017704050478322818, "loss": 1.1311, "step": 570 }, { "epoch": 0.23216100833502745, "grad_norm": 0.09007006883621216, "learning_rate": 0.00017699979645837574, "loss": 0.9919, "step": 571 }, { "epoch": 0.2325675950396422, "grad_norm": 0.09025272727012634, "learning_rate": 0.00017695908813352332, "loss": 0.9057, "step": 572 }, { "epoch": 0.23297418174425696, "grad_norm": 0.0994710698723793, "learning_rate": 0.00017691837980867087, "loss": 1.1472, "step": 573 }, { "epoch": 0.23338076844887173, "grad_norm": 0.09117428958415985, "learning_rate": 0.00017687767148381845, "loss": 0.9665, "step": 574 }, { "epoch": 0.23378735515348648, "grad_norm": 0.0893009826540947, "learning_rate": 0.000176836963158966, "loss": 0.951, "step": 575 }, { "epoch": 0.23419394185810125, "grad_norm": 0.08649599552154541, "learning_rate": 0.0001767962548341136, "loss": 0.925, "step": 576 }, { "epoch": 0.234600528562716, "grad_norm": 0.0928448736667633, "learning_rate": 0.00017675554650926117, "loss": 0.9253, "step": 577 }, { "epoch": 0.23500711526733076, "grad_norm": 0.10335158556699753, "learning_rate": 0.00017671483818440872, "loss": 1.1171, "step": 578 }, { "epoch": 0.23541370197194553, "grad_norm": 0.09889842569828033, "learning_rate": 0.0001766741298595563, "loss": 1.0005, "step": 579 }, { "epoch": 0.23582028867656027, "grad_norm": 0.09655506163835526, "learning_rate": 0.00017663342153470386, "loss": 1.0273, "step": 580 }, { "epoch": 0.23622687538117504, "grad_norm": 0.09516560286283493, "learning_rate": 0.0001765927132098514, "loss": 1.024, "step": 581 }, { "epoch": 0.23663346208578978, "grad_norm": 0.10024843364953995, "learning_rate": 0.000176552004884999, "loss": 1.0299, "step": 582 }, { "epoch": 0.23704004879040455, "grad_norm": 0.10152596235275269, "learning_rate": 0.00017651129656014655, "loss": 0.9658, "step": 583 }, { "epoch": 0.23744663549501932, "grad_norm": 0.09654249995946884, "learning_rate": 0.00017647058823529413, "loss": 1.0722, "step": 584 }, { "epoch": 0.23785322219963406, "grad_norm": 0.09112072736024857, "learning_rate": 0.00017642987991044168, "loss": 0.9846, "step": 585 }, { "epoch": 0.23825980890424883, "grad_norm": 0.09640034288167953, "learning_rate": 0.00017638917158558926, "loss": 1.0501, "step": 586 }, { "epoch": 0.2386663956088636, "grad_norm": 0.09564584493637085, "learning_rate": 0.00017634846326073682, "loss": 0.955, "step": 587 }, { "epoch": 0.23907298231347834, "grad_norm": 0.10815359652042389, "learning_rate": 0.0001763077549358844, "loss": 1.203, "step": 588 }, { "epoch": 0.2394795690180931, "grad_norm": 0.09078256040811539, "learning_rate": 0.00017626704661103198, "loss": 0.9881, "step": 589 }, { "epoch": 0.23988615572270786, "grad_norm": 0.09075487405061722, "learning_rate": 0.00017622633828617954, "loss": 0.984, "step": 590 }, { "epoch": 0.24029274242732263, "grad_norm": 0.09048381447792053, "learning_rate": 0.00017618562996132712, "loss": 1.0235, "step": 591 }, { "epoch": 0.2406993291319374, "grad_norm": 0.09820905327796936, "learning_rate": 0.00017614492163647467, "loss": 0.9763, "step": 592 }, { "epoch": 0.24110591583655214, "grad_norm": 0.0961097925901413, "learning_rate": 0.00017610421331162222, "loss": 1.1035, "step": 593 }, { "epoch": 0.2415125025411669, "grad_norm": 0.0877358540892601, "learning_rate": 0.0001760635049867698, "loss": 0.8962, "step": 594 }, { "epoch": 0.24191908924578168, "grad_norm": 0.09730017930269241, "learning_rate": 0.00017602279666191736, "loss": 1.1232, "step": 595 }, { "epoch": 0.24232567595039642, "grad_norm": 0.09486240148544312, "learning_rate": 0.00017598208833706494, "loss": 1.0566, "step": 596 }, { "epoch": 0.2427322626550112, "grad_norm": 0.09367606788873672, "learning_rate": 0.0001759413800122125, "loss": 0.9934, "step": 597 }, { "epoch": 0.24313884935962593, "grad_norm": 0.09046703577041626, "learning_rate": 0.00017590067168736008, "loss": 0.9137, "step": 598 }, { "epoch": 0.2435454360642407, "grad_norm": 0.09512536972761154, "learning_rate": 0.00017585996336250766, "loss": 0.9733, "step": 599 }, { "epoch": 0.24395202276885547, "grad_norm": 0.08619649708271027, "learning_rate": 0.0001758192550376552, "loss": 0.8777, "step": 600 }, { "epoch": 0.2443586094734702, "grad_norm": 0.09386080503463745, "learning_rate": 0.0001757785467128028, "loss": 1.0171, "step": 601 }, { "epoch": 0.24476519617808498, "grad_norm": 0.098211869597435, "learning_rate": 0.00017573783838795035, "loss": 1.0283, "step": 602 }, { "epoch": 0.24517178288269972, "grad_norm": 0.08785215020179749, "learning_rate": 0.00017569713006309793, "loss": 0.9256, "step": 603 }, { "epoch": 0.2455783695873145, "grad_norm": 0.09419196099042892, "learning_rate": 0.00017565642173824548, "loss": 1.0727, "step": 604 }, { "epoch": 0.24598495629192926, "grad_norm": 0.08359744399785995, "learning_rate": 0.00017561571341339304, "loss": 0.8128, "step": 605 }, { "epoch": 0.246391542996544, "grad_norm": 0.09521903842687607, "learning_rate": 0.00017557500508854062, "loss": 1.0039, "step": 606 }, { "epoch": 0.24679812970115877, "grad_norm": 0.09249220043420792, "learning_rate": 0.00017553429676368817, "loss": 0.9793, "step": 607 }, { "epoch": 0.24720471640577354, "grad_norm": 0.09042320400476456, "learning_rate": 0.00017549358843883575, "loss": 0.9552, "step": 608 }, { "epoch": 0.2476113031103883, "grad_norm": 0.09921760857105255, "learning_rate": 0.0001754528801139833, "loss": 1.0438, "step": 609 }, { "epoch": 0.24801788981500306, "grad_norm": 0.09381897002458572, "learning_rate": 0.0001754121717891309, "loss": 0.9345, "step": 610 }, { "epoch": 0.2484244765196178, "grad_norm": 0.10531708598136902, "learning_rate": 0.00017537146346427847, "loss": 1.0504, "step": 611 }, { "epoch": 0.24883106322423257, "grad_norm": 0.08689824491739273, "learning_rate": 0.00017533075513942602, "loss": 0.9314, "step": 612 }, { "epoch": 0.24923764992884734, "grad_norm": 0.0852714404463768, "learning_rate": 0.0001752900468145736, "loss": 0.9555, "step": 613 }, { "epoch": 0.24964423663346208, "grad_norm": 0.09126532077789307, "learning_rate": 0.00017524933848972116, "loss": 1.0025, "step": 614 }, { "epoch": 0.2500508233380768, "grad_norm": 0.09271440654993057, "learning_rate": 0.0001752086301648687, "loss": 0.9753, "step": 615 }, { "epoch": 0.2504574100426916, "grad_norm": 0.09532623738050461, "learning_rate": 0.0001751679218400163, "loss": 1.0158, "step": 616 }, { "epoch": 0.25086399674730636, "grad_norm": 0.09074733406305313, "learning_rate": 0.00017512721351516385, "loss": 0.9335, "step": 617 }, { "epoch": 0.2512705834519211, "grad_norm": 0.08851765096187592, "learning_rate": 0.00017508650519031143, "loss": 0.9356, "step": 618 }, { "epoch": 0.2516771701565359, "grad_norm": 0.09362735599279404, "learning_rate": 0.00017504579686545898, "loss": 0.9671, "step": 619 }, { "epoch": 0.25208375686115064, "grad_norm": 0.09651681780815125, "learning_rate": 0.00017500508854060656, "loss": 1.0424, "step": 620 }, { "epoch": 0.2524903435657654, "grad_norm": 0.0909847766160965, "learning_rate": 0.00017496438021575412, "loss": 0.9606, "step": 621 }, { "epoch": 0.2528969302703802, "grad_norm": 0.09061215072870255, "learning_rate": 0.0001749236718909017, "loss": 1.0444, "step": 622 }, { "epoch": 0.2533035169749949, "grad_norm": 0.10378221422433853, "learning_rate": 0.00017488296356604928, "loss": 0.991, "step": 623 }, { "epoch": 0.25371010367960967, "grad_norm": 0.09829584509134293, "learning_rate": 0.00017484225524119683, "loss": 0.8911, "step": 624 }, { "epoch": 0.2541166903842244, "grad_norm": 0.0915360227227211, "learning_rate": 0.00017480154691634442, "loss": 0.9616, "step": 625 }, { "epoch": 0.2545232770888392, "grad_norm": 0.09093396365642548, "learning_rate": 0.00017476083859149197, "loss": 0.886, "step": 626 }, { "epoch": 0.25492986379345395, "grad_norm": 0.09621252864599228, "learning_rate": 0.00017472013026663952, "loss": 1.0167, "step": 627 }, { "epoch": 0.2553364504980687, "grad_norm": 0.10381032526493073, "learning_rate": 0.0001746794219417871, "loss": 1.0008, "step": 628 }, { "epoch": 0.2557430372026835, "grad_norm": 0.10300707817077637, "learning_rate": 0.00017463871361693466, "loss": 1.0176, "step": 629 }, { "epoch": 0.25614962390729823, "grad_norm": 0.09527314454317093, "learning_rate": 0.00017459800529208224, "loss": 0.9698, "step": 630 }, { "epoch": 0.25655621061191297, "grad_norm": 0.09126346558332443, "learning_rate": 0.0001745572969672298, "loss": 0.9575, "step": 631 }, { "epoch": 0.25696279731652777, "grad_norm": 0.10338298231363297, "learning_rate": 0.00017451658864237738, "loss": 0.9666, "step": 632 }, { "epoch": 0.2573693840211425, "grad_norm": 0.10729333758354187, "learning_rate": 0.00017447588031752493, "loss": 1.0857, "step": 633 }, { "epoch": 0.25777597072575725, "grad_norm": 0.09935057163238525, "learning_rate": 0.0001744351719926725, "loss": 1.0387, "step": 634 }, { "epoch": 0.25818255743037205, "grad_norm": 0.10013054311275482, "learning_rate": 0.0001743944636678201, "loss": 1.0938, "step": 635 }, { "epoch": 0.2585891441349868, "grad_norm": 0.1010405421257019, "learning_rate": 0.00017435375534296765, "loss": 0.9507, "step": 636 }, { "epoch": 0.25899573083960153, "grad_norm": 0.09375156462192535, "learning_rate": 0.00017431304701811523, "loss": 0.965, "step": 637 }, { "epoch": 0.2594023175442163, "grad_norm": 0.09231811761856079, "learning_rate": 0.00017427233869326278, "loss": 1.0549, "step": 638 }, { "epoch": 0.2598089042488311, "grad_norm": 0.09252883493900299, "learning_rate": 0.00017423163036841034, "loss": 0.9562, "step": 639 }, { "epoch": 0.2602154909534458, "grad_norm": 0.08758233487606049, "learning_rate": 0.00017419092204355792, "loss": 0.8267, "step": 640 }, { "epoch": 0.26062207765806056, "grad_norm": 0.11551918089389801, "learning_rate": 0.00017415021371870547, "loss": 1.1932, "step": 641 }, { "epoch": 0.26102866436267536, "grad_norm": 0.09874440729618073, "learning_rate": 0.00017410950539385305, "loss": 0.994, "step": 642 }, { "epoch": 0.2614352510672901, "grad_norm": 0.0956895500421524, "learning_rate": 0.0001740687970690006, "loss": 1.0509, "step": 643 }, { "epoch": 0.26184183777190484, "grad_norm": 0.09503257274627686, "learning_rate": 0.0001740280887441482, "loss": 1.0729, "step": 644 }, { "epoch": 0.26224842447651964, "grad_norm": 0.09010849893093109, "learning_rate": 0.00017398738041929577, "loss": 1.0029, "step": 645 }, { "epoch": 0.2626550111811344, "grad_norm": 0.1030508279800415, "learning_rate": 0.00017394667209444332, "loss": 1.0345, "step": 646 }, { "epoch": 0.2630615978857491, "grad_norm": 0.09376908838748932, "learning_rate": 0.0001739059637695909, "loss": 0.9222, "step": 647 }, { "epoch": 0.2634681845903639, "grad_norm": 0.09775326400995255, "learning_rate": 0.00017386525544473846, "loss": 1.0516, "step": 648 }, { "epoch": 0.26387477129497866, "grad_norm": 0.08575090020895004, "learning_rate": 0.00017382454711988604, "loss": 0.9617, "step": 649 }, { "epoch": 0.2642813579995934, "grad_norm": 0.10131283849477768, "learning_rate": 0.0001737838387950336, "loss": 0.9831, "step": 650 }, { "epoch": 0.2646879447042082, "grad_norm": 0.10559401661157608, "learning_rate": 0.00017374313047018115, "loss": 1.1504, "step": 651 }, { "epoch": 0.26509453140882294, "grad_norm": 0.10393818467855453, "learning_rate": 0.00017370242214532873, "loss": 0.9879, "step": 652 }, { "epoch": 0.2655011181134377, "grad_norm": 0.0909782201051712, "learning_rate": 0.00017366171382047628, "loss": 0.9822, "step": 653 }, { "epoch": 0.2659077048180524, "grad_norm": 0.07904715836048126, "learning_rate": 0.00017362100549562386, "loss": 0.9603, "step": 654 }, { "epoch": 0.2663142915226672, "grad_norm": 0.1078203096985817, "learning_rate": 0.00017358029717077142, "loss": 0.9873, "step": 655 }, { "epoch": 0.26672087822728197, "grad_norm": 0.09209142625331879, "learning_rate": 0.000173539588845919, "loss": 0.9763, "step": 656 }, { "epoch": 0.2671274649318967, "grad_norm": 0.10040608793497086, "learning_rate": 0.00017349888052106658, "loss": 1.0362, "step": 657 }, { "epoch": 0.2675340516365115, "grad_norm": 0.1019914448261261, "learning_rate": 0.00017345817219621413, "loss": 0.9715, "step": 658 }, { "epoch": 0.26794063834112625, "grad_norm": 0.09650052338838577, "learning_rate": 0.00017341746387136172, "loss": 0.9212, "step": 659 }, { "epoch": 0.268347225045741, "grad_norm": 0.10352949053049088, "learning_rate": 0.00017337675554650927, "loss": 0.9535, "step": 660 }, { "epoch": 0.2687538117503558, "grad_norm": 0.0910978689789772, "learning_rate": 0.00017333604722165685, "loss": 0.92, "step": 661 }, { "epoch": 0.26916039845497053, "grad_norm": 0.0917704775929451, "learning_rate": 0.0001732953388968044, "loss": 0.9926, "step": 662 }, { "epoch": 0.26956698515958527, "grad_norm": 0.09258386492729187, "learning_rate": 0.00017325463057195196, "loss": 1.0495, "step": 663 }, { "epoch": 0.26997357186420007, "grad_norm": 0.08842117339372635, "learning_rate": 0.00017321392224709954, "loss": 0.8509, "step": 664 }, { "epoch": 0.2703801585688148, "grad_norm": 0.10442432016134262, "learning_rate": 0.0001731732139222471, "loss": 1.0109, "step": 665 }, { "epoch": 0.27078674527342955, "grad_norm": 0.0932939201593399, "learning_rate": 0.00017313250559739468, "loss": 0.979, "step": 666 }, { "epoch": 0.2711933319780443, "grad_norm": 0.09750665724277496, "learning_rate": 0.00017309179727254223, "loss": 1.0937, "step": 667 }, { "epoch": 0.2715999186826591, "grad_norm": 0.0916363000869751, "learning_rate": 0.0001730510889476898, "loss": 0.9497, "step": 668 }, { "epoch": 0.27200650538727383, "grad_norm": 0.08987271040678024, "learning_rate": 0.0001730103806228374, "loss": 0.9528, "step": 669 }, { "epoch": 0.2724130920918886, "grad_norm": 0.10026190429925919, "learning_rate": 0.00017296967229798495, "loss": 0.9323, "step": 670 }, { "epoch": 0.2728196787965034, "grad_norm": 0.09267815947532654, "learning_rate": 0.00017292896397313253, "loss": 0.9469, "step": 671 }, { "epoch": 0.2732262655011181, "grad_norm": 0.0934181734919548, "learning_rate": 0.00017288825564828008, "loss": 0.9217, "step": 672 }, { "epoch": 0.27363285220573286, "grad_norm": 0.09324284642934799, "learning_rate": 0.00017284754732342766, "loss": 0.941, "step": 673 }, { "epoch": 0.27403943891034765, "grad_norm": 0.09280567616224289, "learning_rate": 0.00017280683899857522, "loss": 0.9792, "step": 674 }, { "epoch": 0.2744460256149624, "grad_norm": 0.09610418230295181, "learning_rate": 0.00017276613067372277, "loss": 1.0206, "step": 675 }, { "epoch": 0.27485261231957714, "grad_norm": 0.09881840646266937, "learning_rate": 0.00017272542234887035, "loss": 1.027, "step": 676 }, { "epoch": 0.27525919902419194, "grad_norm": 0.08691172301769257, "learning_rate": 0.0001726847140240179, "loss": 0.9146, "step": 677 }, { "epoch": 0.2756657857288067, "grad_norm": 0.09320187568664551, "learning_rate": 0.0001726440056991655, "loss": 0.9386, "step": 678 }, { "epoch": 0.2760723724334214, "grad_norm": 0.10148660838603973, "learning_rate": 0.00017260329737431304, "loss": 1.0792, "step": 679 }, { "epoch": 0.27647895913803616, "grad_norm": 0.09676487743854523, "learning_rate": 0.00017256258904946062, "loss": 0.9966, "step": 680 }, { "epoch": 0.27688554584265096, "grad_norm": 0.10451057553291321, "learning_rate": 0.0001725218807246082, "loss": 1.0894, "step": 681 }, { "epoch": 0.2772921325472657, "grad_norm": 0.0993044376373291, "learning_rate": 0.00017248117239975576, "loss": 0.9681, "step": 682 }, { "epoch": 0.27769871925188044, "grad_norm": 0.10459738969802856, "learning_rate": 0.00017244046407490334, "loss": 1.0805, "step": 683 }, { "epoch": 0.27810530595649524, "grad_norm": 0.09404733031988144, "learning_rate": 0.0001723997557500509, "loss": 1.019, "step": 684 }, { "epoch": 0.27851189266111, "grad_norm": 0.09058643132448196, "learning_rate": 0.00017235904742519847, "loss": 1.0262, "step": 685 }, { "epoch": 0.2789184793657247, "grad_norm": 0.08885390311479568, "learning_rate": 0.00017231833910034603, "loss": 0.9735, "step": 686 }, { "epoch": 0.2793250660703395, "grad_norm": 0.09429844468832016, "learning_rate": 0.00017227763077549358, "loss": 0.9265, "step": 687 }, { "epoch": 0.27973165277495426, "grad_norm": 0.09403959661722183, "learning_rate": 0.00017223692245064116, "loss": 1.105, "step": 688 }, { "epoch": 0.280138239479569, "grad_norm": 0.0966695174574852, "learning_rate": 0.00017219621412578872, "loss": 1.1486, "step": 689 }, { "epoch": 0.2805448261841838, "grad_norm": 0.08807272464036942, "learning_rate": 0.0001721555058009363, "loss": 0.7858, "step": 690 }, { "epoch": 0.28095141288879855, "grad_norm": 0.09138672798871994, "learning_rate": 0.00017211479747608388, "loss": 1.0746, "step": 691 }, { "epoch": 0.2813579995934133, "grad_norm": 0.1015787124633789, "learning_rate": 0.00017207408915123143, "loss": 1.0996, "step": 692 }, { "epoch": 0.28176458629802803, "grad_norm": 0.1018923744559288, "learning_rate": 0.00017203338082637901, "loss": 0.9986, "step": 693 }, { "epoch": 0.2821711730026428, "grad_norm": 0.10809756815433502, "learning_rate": 0.00017199267250152657, "loss": 1.0796, "step": 694 }, { "epoch": 0.28257775970725757, "grad_norm": 0.08786237984895706, "learning_rate": 0.00017195196417667415, "loss": 0.9383, "step": 695 }, { "epoch": 0.2829843464118723, "grad_norm": 0.12186282873153687, "learning_rate": 0.0001719112558518217, "loss": 0.9624, "step": 696 }, { "epoch": 0.2833909331164871, "grad_norm": 0.09969845414161682, "learning_rate": 0.00017187054752696929, "loss": 0.9895, "step": 697 }, { "epoch": 0.28379751982110185, "grad_norm": 0.09089677035808563, "learning_rate": 0.00017182983920211684, "loss": 1.0026, "step": 698 }, { "epoch": 0.2842041065257166, "grad_norm": 0.10096369683742523, "learning_rate": 0.0001717891308772644, "loss": 1.0723, "step": 699 }, { "epoch": 0.2846106932303314, "grad_norm": 0.09782766550779343, "learning_rate": 0.00017174842255241197, "loss": 1.0235, "step": 700 }, { "epoch": 0.28501727993494613, "grad_norm": 0.10204874724149704, "learning_rate": 0.00017170771422755953, "loss": 1.0794, "step": 701 }, { "epoch": 0.2854238666395609, "grad_norm": 0.08970806747674942, "learning_rate": 0.0001716670059027071, "loss": 1.0124, "step": 702 }, { "epoch": 0.2858304533441757, "grad_norm": 0.10221361368894577, "learning_rate": 0.0001716262975778547, "loss": 1.0053, "step": 703 }, { "epoch": 0.2862370400487904, "grad_norm": 0.09354016184806824, "learning_rate": 0.00017158558925300225, "loss": 1.0673, "step": 704 }, { "epoch": 0.28664362675340516, "grad_norm": 0.10699865967035294, "learning_rate": 0.00017154488092814983, "loss": 1.1191, "step": 705 }, { "epoch": 0.2870502134580199, "grad_norm": 0.10488536953926086, "learning_rate": 0.00017150417260329738, "loss": 1.0351, "step": 706 }, { "epoch": 0.2874568001626347, "grad_norm": 0.09427926689386368, "learning_rate": 0.00017146346427844496, "loss": 0.9539, "step": 707 }, { "epoch": 0.28786338686724944, "grad_norm": 0.1017470583319664, "learning_rate": 0.00017142275595359252, "loss": 1.0699, "step": 708 }, { "epoch": 0.2882699735718642, "grad_norm": 0.09857328236103058, "learning_rate": 0.00017138204762874007, "loss": 1.1037, "step": 709 }, { "epoch": 0.288676560276479, "grad_norm": 0.08667341619729996, "learning_rate": 0.00017134133930388765, "loss": 0.9774, "step": 710 }, { "epoch": 0.2890831469810937, "grad_norm": 0.09951958060264587, "learning_rate": 0.0001713006309790352, "loss": 0.9343, "step": 711 }, { "epoch": 0.28948973368570846, "grad_norm": 0.11067653447389603, "learning_rate": 0.0001712599226541828, "loss": 1.004, "step": 712 }, { "epoch": 0.28989632039032326, "grad_norm": 0.09549311548471451, "learning_rate": 0.00017121921432933034, "loss": 0.9841, "step": 713 }, { "epoch": 0.290302907094938, "grad_norm": 0.08448553830385208, "learning_rate": 0.00017117850600447792, "loss": 0.8772, "step": 714 }, { "epoch": 0.29070949379955274, "grad_norm": 0.10418415814638138, "learning_rate": 0.0001711377976796255, "loss": 1.0146, "step": 715 }, { "epoch": 0.29111608050416754, "grad_norm": 0.0994696393609047, "learning_rate": 0.00017109708935477306, "loss": 1.0321, "step": 716 }, { "epoch": 0.2915226672087823, "grad_norm": 0.10770169645547867, "learning_rate": 0.00017105638102992064, "loss": 0.9757, "step": 717 }, { "epoch": 0.291929253913397, "grad_norm": 0.09281262755393982, "learning_rate": 0.0001710156727050682, "loss": 0.9315, "step": 718 }, { "epoch": 0.29233584061801177, "grad_norm": 0.09385684132575989, "learning_rate": 0.00017097496438021577, "loss": 0.984, "step": 719 }, { "epoch": 0.29274242732262656, "grad_norm": 0.09654410928487778, "learning_rate": 0.00017093425605536333, "loss": 1.0594, "step": 720 }, { "epoch": 0.2931490140272413, "grad_norm": 0.09549154341220856, "learning_rate": 0.00017089354773051088, "loss": 0.8973, "step": 721 }, { "epoch": 0.29355560073185605, "grad_norm": 0.10645559430122375, "learning_rate": 0.00017085283940565846, "loss": 1.0742, "step": 722 }, { "epoch": 0.29396218743647085, "grad_norm": 0.10242202132940292, "learning_rate": 0.00017081213108080602, "loss": 1.0859, "step": 723 }, { "epoch": 0.2943687741410856, "grad_norm": 0.09050785005092621, "learning_rate": 0.0001707714227559536, "loss": 0.9552, "step": 724 }, { "epoch": 0.29477536084570033, "grad_norm": 0.09605402499437332, "learning_rate": 0.00017073071443110115, "loss": 1.0419, "step": 725 }, { "epoch": 0.2951819475503151, "grad_norm": 0.10179407894611359, "learning_rate": 0.00017069000610624873, "loss": 1.018, "step": 726 }, { "epoch": 0.29558853425492987, "grad_norm": 0.10907282680273056, "learning_rate": 0.00017064929778139631, "loss": 0.9834, "step": 727 }, { "epoch": 0.2959951209595446, "grad_norm": 0.09972859919071198, "learning_rate": 0.00017060858945654387, "loss": 0.9138, "step": 728 }, { "epoch": 0.2964017076641594, "grad_norm": 0.10035282373428345, "learning_rate": 0.00017056788113169145, "loss": 1.0388, "step": 729 }, { "epoch": 0.29680829436877415, "grad_norm": 0.09865434467792511, "learning_rate": 0.000170527172806839, "loss": 1.088, "step": 730 }, { "epoch": 0.2972148810733889, "grad_norm": 0.09653953462839127, "learning_rate": 0.00017048646448198659, "loss": 1.0562, "step": 731 }, { "epoch": 0.29762146777800363, "grad_norm": 0.09930591285228729, "learning_rate": 0.00017044575615713414, "loss": 1.0312, "step": 732 }, { "epoch": 0.29802805448261843, "grad_norm": 0.10804631561040878, "learning_rate": 0.0001704050478322817, "loss": 0.9913, "step": 733 }, { "epoch": 0.2984346411872332, "grad_norm": 0.09772603958845139, "learning_rate": 0.00017036433950742927, "loss": 1.0463, "step": 734 }, { "epoch": 0.2988412278918479, "grad_norm": 0.09799183160066605, "learning_rate": 0.00017032363118257683, "loss": 1.0048, "step": 735 }, { "epoch": 0.2992478145964627, "grad_norm": 0.09203165769577026, "learning_rate": 0.0001702829228577244, "loss": 0.9581, "step": 736 }, { "epoch": 0.29965440130107746, "grad_norm": 0.1009124219417572, "learning_rate": 0.000170242214532872, "loss": 1.0613, "step": 737 }, { "epoch": 0.3000609880056922, "grad_norm": 0.10120426118373871, "learning_rate": 0.00017020150620801954, "loss": 0.9948, "step": 738 }, { "epoch": 0.300467574710307, "grad_norm": 0.09432508796453476, "learning_rate": 0.00017016079788316713, "loss": 1.0334, "step": 739 }, { "epoch": 0.30087416141492174, "grad_norm": 0.09501038491725922, "learning_rate": 0.00017012008955831468, "loss": 1.0526, "step": 740 }, { "epoch": 0.3012807481195365, "grad_norm": 0.08357376605272293, "learning_rate": 0.00017007938123346226, "loss": 0.9447, "step": 741 }, { "epoch": 0.3016873348241513, "grad_norm": 0.10098525881767273, "learning_rate": 0.00017003867290860982, "loss": 1.0812, "step": 742 }, { "epoch": 0.302093921528766, "grad_norm": 0.09913761168718338, "learning_rate": 0.0001699979645837574, "loss": 0.9696, "step": 743 }, { "epoch": 0.30250050823338076, "grad_norm": 0.10989236831665039, "learning_rate": 0.00016995725625890495, "loss": 1.1352, "step": 744 }, { "epoch": 0.3029070949379955, "grad_norm": 0.09131161123514175, "learning_rate": 0.0001699165479340525, "loss": 0.9211, "step": 745 }, { "epoch": 0.3033136816426103, "grad_norm": 0.09235724061727524, "learning_rate": 0.00016987583960920009, "loss": 0.9645, "step": 746 }, { "epoch": 0.30372026834722504, "grad_norm": 0.09928274899721146, "learning_rate": 0.00016983513128434764, "loss": 1.0359, "step": 747 }, { "epoch": 0.3041268550518398, "grad_norm": 0.094466932117939, "learning_rate": 0.00016979442295949522, "loss": 0.9406, "step": 748 }, { "epoch": 0.3045334417564546, "grad_norm": 0.0981815829873085, "learning_rate": 0.0001697537146346428, "loss": 1.0819, "step": 749 }, { "epoch": 0.3049400284610693, "grad_norm": 0.10226401686668396, "learning_rate": 0.00016971300630979036, "loss": 1.0499, "step": 750 }, { "epoch": 0.30534661516568407, "grad_norm": 0.09603258967399597, "learning_rate": 0.00016967229798493794, "loss": 1.046, "step": 751 }, { "epoch": 0.30575320187029886, "grad_norm": 0.08673054724931717, "learning_rate": 0.0001696315896600855, "loss": 0.9594, "step": 752 }, { "epoch": 0.3061597885749136, "grad_norm": 0.08766573667526245, "learning_rate": 0.00016959088133523307, "loss": 0.9368, "step": 753 }, { "epoch": 0.30656637527952835, "grad_norm": 0.09147453308105469, "learning_rate": 0.00016955017301038063, "loss": 1.0122, "step": 754 }, { "epoch": 0.30697296198414314, "grad_norm": 0.09681879729032516, "learning_rate": 0.0001695094646855282, "loss": 1.0743, "step": 755 }, { "epoch": 0.3073795486887579, "grad_norm": 0.09195173531770706, "learning_rate": 0.00016946875636067576, "loss": 0.9974, "step": 756 }, { "epoch": 0.30778613539337263, "grad_norm": 0.08841879665851593, "learning_rate": 0.00016942804803582332, "loss": 0.9233, "step": 757 }, { "epoch": 0.30819272209798737, "grad_norm": 0.09924349188804626, "learning_rate": 0.0001693873397109709, "loss": 1.0277, "step": 758 }, { "epoch": 0.30859930880260217, "grad_norm": 0.09324993193149567, "learning_rate": 0.00016934663138611845, "loss": 0.9836, "step": 759 }, { "epoch": 0.3090058955072169, "grad_norm": 0.0990774929523468, "learning_rate": 0.00016930592306126606, "loss": 1.1064, "step": 760 }, { "epoch": 0.30941248221183165, "grad_norm": 0.09344697743654251, "learning_rate": 0.00016926521473641361, "loss": 0.9465, "step": 761 }, { "epoch": 0.30981906891644645, "grad_norm": 0.10055997967720032, "learning_rate": 0.00016922450641156117, "loss": 1.0706, "step": 762 }, { "epoch": 0.3102256556210612, "grad_norm": 0.09746789187192917, "learning_rate": 0.00016918379808670875, "loss": 1.0355, "step": 763 }, { "epoch": 0.31063224232567593, "grad_norm": 0.08599979430437088, "learning_rate": 0.0001691430897618563, "loss": 0.9445, "step": 764 }, { "epoch": 0.31103882903029073, "grad_norm": 0.09424544125795364, "learning_rate": 0.00016910238143700388, "loss": 1.0393, "step": 765 }, { "epoch": 0.3114454157349055, "grad_norm": 0.09379003942012787, "learning_rate": 0.00016906167311215144, "loss": 0.9328, "step": 766 }, { "epoch": 0.3118520024395202, "grad_norm": 0.09361857920885086, "learning_rate": 0.00016902096478729902, "loss": 1.017, "step": 767 }, { "epoch": 0.312258589144135, "grad_norm": 0.09718876332044601, "learning_rate": 0.00016898025646244657, "loss": 1.057, "step": 768 }, { "epoch": 0.31266517584874975, "grad_norm": 0.10596045851707458, "learning_rate": 0.00016893954813759413, "loss": 1.1706, "step": 769 }, { "epoch": 0.3130717625533645, "grad_norm": 0.09151418507099152, "learning_rate": 0.0001688988398127417, "loss": 1.0758, "step": 770 }, { "epoch": 0.31347834925797924, "grad_norm": 0.11438169330358505, "learning_rate": 0.00016885813148788926, "loss": 0.9558, "step": 771 }, { "epoch": 0.31388493596259404, "grad_norm": 0.10611554235219955, "learning_rate": 0.00016881742316303687, "loss": 1.1583, "step": 772 }, { "epoch": 0.3142915226672088, "grad_norm": 0.10111712664365768, "learning_rate": 0.00016877671483818443, "loss": 1.0377, "step": 773 }, { "epoch": 0.3146981093718235, "grad_norm": 0.08957924693822861, "learning_rate": 0.00016873600651333198, "loss": 0.9819, "step": 774 }, { "epoch": 0.3151046960764383, "grad_norm": 0.09603768587112427, "learning_rate": 0.00016869529818847956, "loss": 0.9849, "step": 775 }, { "epoch": 0.31551128278105306, "grad_norm": 0.10142724215984344, "learning_rate": 0.00016865458986362712, "loss": 1.0325, "step": 776 }, { "epoch": 0.3159178694856678, "grad_norm": 0.09229385852813721, "learning_rate": 0.0001686138815387747, "loss": 1.0085, "step": 777 }, { "epoch": 0.3163244561902826, "grad_norm": 0.09404195845127106, "learning_rate": 0.00016857317321392225, "loss": 0.9239, "step": 778 }, { "epoch": 0.31673104289489734, "grad_norm": 0.0984378457069397, "learning_rate": 0.00016853246488906983, "loss": 0.9911, "step": 779 }, { "epoch": 0.3171376295995121, "grad_norm": 0.09309301525354385, "learning_rate": 0.00016849175656421739, "loss": 0.9738, "step": 780 }, { "epoch": 0.3175442163041269, "grad_norm": 0.09041745960712433, "learning_rate": 0.00016845104823936494, "loss": 0.9106, "step": 781 }, { "epoch": 0.3179508030087416, "grad_norm": 0.09435202926397324, "learning_rate": 0.00016841033991451252, "loss": 0.9261, "step": 782 }, { "epoch": 0.31835738971335636, "grad_norm": 0.10118155926465988, "learning_rate": 0.0001683696315896601, "loss": 0.9127, "step": 783 }, { "epoch": 0.3187639764179711, "grad_norm": 0.09075888991355896, "learning_rate": 0.00016832892326480768, "loss": 0.9201, "step": 784 }, { "epoch": 0.3191705631225859, "grad_norm": 0.11029943823814392, "learning_rate": 0.00016828821493995524, "loss": 1.1284, "step": 785 }, { "epoch": 0.31957714982720065, "grad_norm": 0.08948411792516708, "learning_rate": 0.0001682475066151028, "loss": 1.0084, "step": 786 }, { "epoch": 0.3199837365318154, "grad_norm": 0.09159238636493683, "learning_rate": 0.00016820679829025037, "loss": 0.9912, "step": 787 }, { "epoch": 0.3203903232364302, "grad_norm": 0.10452720522880554, "learning_rate": 0.00016816608996539793, "loss": 1.0907, "step": 788 }, { "epoch": 0.3207969099410449, "grad_norm": 0.10225928574800491, "learning_rate": 0.0001681253816405455, "loss": 1.06, "step": 789 }, { "epoch": 0.32120349664565967, "grad_norm": 0.09055547416210175, "learning_rate": 0.00016808467331569306, "loss": 0.9644, "step": 790 }, { "epoch": 0.32161008335027447, "grad_norm": 0.10098310559988022, "learning_rate": 0.00016804396499084064, "loss": 1.2431, "step": 791 }, { "epoch": 0.3220166700548892, "grad_norm": 0.09253955632448196, "learning_rate": 0.0001680032566659882, "loss": 0.8664, "step": 792 }, { "epoch": 0.32242325675950395, "grad_norm": 0.09952203184366226, "learning_rate": 0.00016796254834113575, "loss": 1.0275, "step": 793 }, { "epoch": 0.32282984346411875, "grad_norm": 1376.9244384765625, "learning_rate": 0.00016792184001628333, "loss": 0.971, "step": 794 }, { "epoch": 0.3232364301687335, "grad_norm": 0.0970926433801651, "learning_rate": 0.00016788113169143091, "loss": 1.0805, "step": 795 }, { "epoch": 0.32364301687334823, "grad_norm": 0.0982482060790062, "learning_rate": 0.00016784042336657847, "loss": 1.0573, "step": 796 }, { "epoch": 0.324049603577963, "grad_norm": 0.0871889740228653, "learning_rate": 0.00016779971504172605, "loss": 0.9305, "step": 797 }, { "epoch": 0.32445619028257777, "grad_norm": 0.10493195801973343, "learning_rate": 0.0001677590067168736, "loss": 1.052, "step": 798 }, { "epoch": 0.3248627769871925, "grad_norm": 0.10067540407180786, "learning_rate": 0.00016771829839202118, "loss": 1.0014, "step": 799 }, { "epoch": 0.32526936369180726, "grad_norm": 0.12116575986146927, "learning_rate": 0.00016767759006716874, "loss": 1.0831, "step": 800 }, { "epoch": 0.32567595039642205, "grad_norm": 0.10948335379362106, "learning_rate": 0.00016763688174231632, "loss": 0.9823, "step": 801 }, { "epoch": 0.3260825371010368, "grad_norm": 0.12705056369304657, "learning_rate": 0.00016759617341746387, "loss": 1.0244, "step": 802 }, { "epoch": 0.32648912380565154, "grad_norm": 0.10819011181592941, "learning_rate": 0.00016755546509261143, "loss": 1.057, "step": 803 }, { "epoch": 0.32689571051026634, "grad_norm": 0.0998898446559906, "learning_rate": 0.000167514756767759, "loss": 1.0062, "step": 804 }, { "epoch": 0.3273022972148811, "grad_norm": 0.10293715447187424, "learning_rate": 0.00016747404844290656, "loss": 0.9149, "step": 805 }, { "epoch": 0.3277088839194958, "grad_norm": 0.10077858716249466, "learning_rate": 0.00016743334011805417, "loss": 0.9758, "step": 806 }, { "epoch": 0.3281154706241106, "grad_norm": 0.10782469809055328, "learning_rate": 0.00016739263179320173, "loss": 1.1042, "step": 807 }, { "epoch": 0.32852205732872536, "grad_norm": 0.12658415734767914, "learning_rate": 0.00016735192346834928, "loss": 0.9899, "step": 808 }, { "epoch": 0.3289286440333401, "grad_norm": 0.10533516108989716, "learning_rate": 0.00016731121514349686, "loss": 1.0378, "step": 809 }, { "epoch": 0.32933523073795484, "grad_norm": 0.09154223650693893, "learning_rate": 0.00016727050681864441, "loss": 0.9687, "step": 810 }, { "epoch": 0.32974181744256964, "grad_norm": 0.10092673450708389, "learning_rate": 0.000167229798493792, "loss": 1.1347, "step": 811 }, { "epoch": 0.3301484041471844, "grad_norm": 0.11135310679674149, "learning_rate": 0.00016718909016893955, "loss": 1.1032, "step": 812 }, { "epoch": 0.3305549908517991, "grad_norm": 0.11181894689798355, "learning_rate": 0.00016714838184408713, "loss": 1.0279, "step": 813 }, { "epoch": 0.3309615775564139, "grad_norm": 0.09581311047077179, "learning_rate": 0.00016710767351923469, "loss": 0.9012, "step": 814 }, { "epoch": 0.33136816426102866, "grad_norm": 0.09267252683639526, "learning_rate": 0.00016706696519438224, "loss": 0.872, "step": 815 }, { "epoch": 0.3317747509656434, "grad_norm": 0.10392332822084427, "learning_rate": 0.00016702625686952982, "loss": 1.017, "step": 816 }, { "epoch": 0.3321813376702582, "grad_norm": 0.10805673897266388, "learning_rate": 0.00016698554854467737, "loss": 1.0013, "step": 817 }, { "epoch": 0.33258792437487295, "grad_norm": 0.09724876284599304, "learning_rate": 0.00016694484021982498, "loss": 0.9708, "step": 818 }, { "epoch": 0.3329945110794877, "grad_norm": 0.09445172548294067, "learning_rate": 0.00016690413189497254, "loss": 1.0422, "step": 819 }, { "epoch": 0.3334010977841025, "grad_norm": 0.09621834754943848, "learning_rate": 0.0001668634235701201, "loss": 1.0557, "step": 820 }, { "epoch": 0.3338076844887172, "grad_norm": 0.09800583869218826, "learning_rate": 0.00016682271524526767, "loss": 0.8907, "step": 821 }, { "epoch": 0.33421427119333197, "grad_norm": 0.0968775674700737, "learning_rate": 0.00016678200692041523, "loss": 0.9531, "step": 822 }, { "epoch": 0.3346208578979467, "grad_norm": 0.10581623017787933, "learning_rate": 0.0001667412985955628, "loss": 0.9462, "step": 823 }, { "epoch": 0.3350274446025615, "grad_norm": 0.10356664657592773, "learning_rate": 0.00016670059027071036, "loss": 1.0315, "step": 824 }, { "epoch": 0.33543403130717625, "grad_norm": 0.10202962160110474, "learning_rate": 0.00016665988194585794, "loss": 1.004, "step": 825 }, { "epoch": 0.335840618011791, "grad_norm": 0.09426257014274597, "learning_rate": 0.0001666191736210055, "loss": 0.958, "step": 826 }, { "epoch": 0.3362472047164058, "grad_norm": 0.09597484767436981, "learning_rate": 0.00016657846529615305, "loss": 0.9446, "step": 827 }, { "epoch": 0.33665379142102053, "grad_norm": 0.10052574425935745, "learning_rate": 0.00016653775697130063, "loss": 1.0467, "step": 828 }, { "epoch": 0.3370603781256353, "grad_norm": 0.09183933585882187, "learning_rate": 0.0001664970486464482, "loss": 0.9665, "step": 829 }, { "epoch": 0.33746696483025007, "grad_norm": 0.10629544407129288, "learning_rate": 0.0001664563403215958, "loss": 1.1186, "step": 830 }, { "epoch": 0.3378735515348648, "grad_norm": 0.09273683279752731, "learning_rate": 0.00016641563199674335, "loss": 1.0235, "step": 831 }, { "epoch": 0.33828013823947956, "grad_norm": 0.10386747121810913, "learning_rate": 0.0001663749236718909, "loss": 1.1214, "step": 832 }, { "epoch": 0.33868672494409435, "grad_norm": 0.0918426588177681, "learning_rate": 0.00016633421534703848, "loss": 0.9122, "step": 833 }, { "epoch": 0.3390933116487091, "grad_norm": 0.09304569661617279, "learning_rate": 0.00016629350702218604, "loss": 0.9478, "step": 834 }, { "epoch": 0.33949989835332384, "grad_norm": 0.09941792488098145, "learning_rate": 0.00016625279869733362, "loss": 0.9306, "step": 835 }, { "epoch": 0.3399064850579386, "grad_norm": 0.10528752952814102, "learning_rate": 0.00016621209037248117, "loss": 1.0313, "step": 836 }, { "epoch": 0.3403130717625534, "grad_norm": 0.09300185739994049, "learning_rate": 0.00016617138204762875, "loss": 0.9339, "step": 837 }, { "epoch": 0.3407196584671681, "grad_norm": 0.10146823525428772, "learning_rate": 0.0001661306737227763, "loss": 1.0543, "step": 838 }, { "epoch": 0.34112624517178286, "grad_norm": 0.10440155863761902, "learning_rate": 0.00016608996539792386, "loss": 1.1149, "step": 839 }, { "epoch": 0.34153283187639766, "grad_norm": 0.1079946756362915, "learning_rate": 0.00016604925707307144, "loss": 1.1279, "step": 840 }, { "epoch": 0.3419394185810124, "grad_norm": 0.08682949841022491, "learning_rate": 0.00016600854874821902, "loss": 0.878, "step": 841 }, { "epoch": 0.34234600528562714, "grad_norm": 0.10855992883443832, "learning_rate": 0.0001659678404233666, "loss": 0.9611, "step": 842 }, { "epoch": 0.34275259199024194, "grad_norm": 0.089480921626091, "learning_rate": 0.00016592713209851416, "loss": 0.9065, "step": 843 }, { "epoch": 0.3431591786948567, "grad_norm": 0.0934784933924675, "learning_rate": 0.00016588642377366171, "loss": 1.0118, "step": 844 }, { "epoch": 0.3435657653994714, "grad_norm": 0.10359178483486176, "learning_rate": 0.0001658457154488093, "loss": 1.0117, "step": 845 }, { "epoch": 0.3439723521040862, "grad_norm": 0.09534402191638947, "learning_rate": 0.00016580500712395685, "loss": 0.9719, "step": 846 }, { "epoch": 0.34437893880870096, "grad_norm": 0.1008550152182579, "learning_rate": 0.00016576429879910443, "loss": 1.1044, "step": 847 }, { "epoch": 0.3447855255133157, "grad_norm": 0.09598672389984131, "learning_rate": 0.00016572359047425198, "loss": 0.9764, "step": 848 }, { "epoch": 0.34519211221793045, "grad_norm": 0.09562084078788757, "learning_rate": 0.00016568288214939957, "loss": 0.9964, "step": 849 }, { "epoch": 0.34559869892254524, "grad_norm": 0.0925833135843277, "learning_rate": 0.00016564217382454712, "loss": 0.9717, "step": 850 }, { "epoch": 0.34600528562716, "grad_norm": 0.09141584485769272, "learning_rate": 0.00016560146549969467, "loss": 0.8164, "step": 851 }, { "epoch": 0.34641187233177473, "grad_norm": 0.09384193271398544, "learning_rate": 0.00016556075717484228, "loss": 1.0281, "step": 852 }, { "epoch": 0.3468184590363895, "grad_norm": 0.08962181210517883, "learning_rate": 0.00016552004884998984, "loss": 0.9792, "step": 853 }, { "epoch": 0.34722504574100427, "grad_norm": 0.1070941761136055, "learning_rate": 0.00016547934052513742, "loss": 0.9392, "step": 854 }, { "epoch": 0.347631632445619, "grad_norm": 0.0990995392203331, "learning_rate": 0.00016543863220028497, "loss": 1.015, "step": 855 }, { "epoch": 0.3480382191502338, "grad_norm": 0.10476068407297134, "learning_rate": 0.00016539792387543253, "loss": 0.974, "step": 856 }, { "epoch": 0.34844480585484855, "grad_norm": 0.09355341643095016, "learning_rate": 0.0001653572155505801, "loss": 0.9558, "step": 857 }, { "epoch": 0.3488513925594633, "grad_norm": 0.10174727439880371, "learning_rate": 0.00016531650722572766, "loss": 0.9864, "step": 858 }, { "epoch": 0.3492579792640781, "grad_norm": 0.09851422905921936, "learning_rate": 0.00016527579890087524, "loss": 0.8641, "step": 859 }, { "epoch": 0.34966456596869283, "grad_norm": 0.10708778351545334, "learning_rate": 0.0001652350905760228, "loss": 0.9883, "step": 860 }, { "epoch": 0.3500711526733076, "grad_norm": 0.0919436663389206, "learning_rate": 0.00016519438225117038, "loss": 1.0178, "step": 861 }, { "epoch": 0.3504777393779223, "grad_norm": 0.09071869403123856, "learning_rate": 0.00016515367392631793, "loss": 1.0118, "step": 862 }, { "epoch": 0.3508843260825371, "grad_norm": 0.10379180312156677, "learning_rate": 0.00016511296560146549, "loss": 1.0068, "step": 863 }, { "epoch": 0.35129091278715185, "grad_norm": 0.10693410784006119, "learning_rate": 0.0001650722572766131, "loss": 1.0659, "step": 864 }, { "epoch": 0.3516974994917666, "grad_norm": 0.08886521309614182, "learning_rate": 0.00016503154895176065, "loss": 0.9019, "step": 865 }, { "epoch": 0.3521040861963814, "grad_norm": 0.110934779047966, "learning_rate": 0.00016499084062690823, "loss": 1.0018, "step": 866 }, { "epoch": 0.35251067290099614, "grad_norm": 0.09179084002971649, "learning_rate": 0.00016495013230205578, "loss": 1.0337, "step": 867 }, { "epoch": 0.3529172596056109, "grad_norm": 0.10181482136249542, "learning_rate": 0.00016490942397720334, "loss": 1.0436, "step": 868 }, { "epoch": 0.3533238463102257, "grad_norm": 0.10369636118412018, "learning_rate": 0.00016486871565235092, "loss": 1.0509, "step": 869 }, { "epoch": 0.3537304330148404, "grad_norm": 0.09437630325555801, "learning_rate": 0.00016482800732749847, "loss": 0.9957, "step": 870 }, { "epoch": 0.35413701971945516, "grad_norm": 0.0950985848903656, "learning_rate": 0.00016478729900264605, "loss": 1.037, "step": 871 }, { "epoch": 0.35454360642406996, "grad_norm": 0.09876928478479385, "learning_rate": 0.0001647465906777936, "loss": 0.9807, "step": 872 }, { "epoch": 0.3549501931286847, "grad_norm": 0.1047278568148613, "learning_rate": 0.0001647058823529412, "loss": 1.1218, "step": 873 }, { "epoch": 0.35535677983329944, "grad_norm": 0.10372647643089294, "learning_rate": 0.00016466517402808874, "loss": 0.9792, "step": 874 }, { "epoch": 0.3557633665379142, "grad_norm": 0.09626027941703796, "learning_rate": 0.00016462446570323632, "loss": 1.0559, "step": 875 }, { "epoch": 0.356169953242529, "grad_norm": 0.09671976417303085, "learning_rate": 0.0001645837573783839, "loss": 0.9966, "step": 876 }, { "epoch": 0.3565765399471437, "grad_norm": 0.09746148437261581, "learning_rate": 0.00016454304905353146, "loss": 1.0355, "step": 877 }, { "epoch": 0.35698312665175846, "grad_norm": 0.10173183679580688, "learning_rate": 0.00016450234072867904, "loss": 1.0395, "step": 878 }, { "epoch": 0.35738971335637326, "grad_norm": 0.09872651845216751, "learning_rate": 0.0001644616324038266, "loss": 0.9183, "step": 879 }, { "epoch": 0.357796300060988, "grad_norm": 0.08974689990282059, "learning_rate": 0.00016442092407897415, "loss": 0.9767, "step": 880 }, { "epoch": 0.35820288676560275, "grad_norm": 0.10287570208311081, "learning_rate": 0.00016438021575412173, "loss": 0.9762, "step": 881 }, { "epoch": 0.35860947347021754, "grad_norm": 0.09830573201179504, "learning_rate": 0.00016433950742926928, "loss": 0.9674, "step": 882 }, { "epoch": 0.3590160601748323, "grad_norm": 0.0909447893500328, "learning_rate": 0.00016429879910441687, "loss": 0.9603, "step": 883 }, { "epoch": 0.359422646879447, "grad_norm": 0.09882505238056183, "learning_rate": 0.00016425809077956442, "loss": 0.994, "step": 884 }, { "epoch": 0.3598292335840618, "grad_norm": 0.09665820002555847, "learning_rate": 0.000164217382454712, "loss": 1.0242, "step": 885 }, { "epoch": 0.36023582028867657, "grad_norm": 0.09496365487575531, "learning_rate": 0.00016417667412985955, "loss": 0.9487, "step": 886 }, { "epoch": 0.3606424069932913, "grad_norm": 0.10158530622720718, "learning_rate": 0.00016413596580500714, "loss": 1.0484, "step": 887 }, { "epoch": 0.36104899369790605, "grad_norm": 0.09193973243236542, "learning_rate": 0.00016409525748015472, "loss": 0.9148, "step": 888 }, { "epoch": 0.36145558040252085, "grad_norm": 0.08965738117694855, "learning_rate": 0.00016405454915530227, "loss": 0.8086, "step": 889 }, { "epoch": 0.3618621671071356, "grad_norm": 0.09757012128829956, "learning_rate": 0.00016401384083044983, "loss": 1.0015, "step": 890 }, { "epoch": 0.36226875381175033, "grad_norm": 0.10004543513059616, "learning_rate": 0.0001639731325055974, "loss": 0.9308, "step": 891 }, { "epoch": 0.36267534051636513, "grad_norm": 0.09754510223865509, "learning_rate": 0.00016393242418074496, "loss": 1.1236, "step": 892 }, { "epoch": 0.36308192722097987, "grad_norm": 0.08879724144935608, "learning_rate": 0.00016389171585589254, "loss": 0.9708, "step": 893 }, { "epoch": 0.3634885139255946, "grad_norm": 0.09469077736139297, "learning_rate": 0.0001638510075310401, "loss": 0.9263, "step": 894 }, { "epoch": 0.3638951006302094, "grad_norm": 0.09939096122980118, "learning_rate": 0.00016381029920618768, "loss": 1.0967, "step": 895 }, { "epoch": 0.36430168733482415, "grad_norm": 0.10240636765956879, "learning_rate": 0.00016376959088133523, "loss": 0.992, "step": 896 }, { "epoch": 0.3647082740394389, "grad_norm": 0.09816282987594604, "learning_rate": 0.0001637288825564828, "loss": 1.051, "step": 897 }, { "epoch": 0.3651148607440537, "grad_norm": 0.10622686892747879, "learning_rate": 0.0001636881742316304, "loss": 1.0324, "step": 898 }, { "epoch": 0.36552144744866844, "grad_norm": 0.08964063972234726, "learning_rate": 0.00016364746590677795, "loss": 0.9818, "step": 899 }, { "epoch": 0.3659280341532832, "grad_norm": 0.09420112520456314, "learning_rate": 0.00016360675758192553, "loss": 1.0712, "step": 900 }, { "epoch": 0.3663346208578979, "grad_norm": 0.09154132008552551, "learning_rate": 0.00016356604925707308, "loss": 0.9368, "step": 901 }, { "epoch": 0.3667412075625127, "grad_norm": 0.09309022128582001, "learning_rate": 0.00016352534093222064, "loss": 0.9532, "step": 902 }, { "epoch": 0.36714779426712746, "grad_norm": 0.09177148342132568, "learning_rate": 0.00016348463260736822, "loss": 0.9278, "step": 903 }, { "epoch": 0.3675543809717422, "grad_norm": 0.09655489027500153, "learning_rate": 0.00016344392428251577, "loss": 0.989, "step": 904 }, { "epoch": 0.367960967676357, "grad_norm": 0.09367511421442032, "learning_rate": 0.00016340321595766335, "loss": 0.9545, "step": 905 }, { "epoch": 0.36836755438097174, "grad_norm": 0.09844557195901871, "learning_rate": 0.0001633625076328109, "loss": 0.9813, "step": 906 }, { "epoch": 0.3687741410855865, "grad_norm": 0.09753169119358063, "learning_rate": 0.0001633217993079585, "loss": 0.9828, "step": 907 }, { "epoch": 0.3691807277902013, "grad_norm": 0.09012485295534134, "learning_rate": 0.00016328109098310604, "loss": 0.9234, "step": 908 }, { "epoch": 0.369587314494816, "grad_norm": 0.09102308750152588, "learning_rate": 0.0001632403826582536, "loss": 0.9435, "step": 909 }, { "epoch": 0.36999390119943076, "grad_norm": 0.10014689713716507, "learning_rate": 0.0001631996743334012, "loss": 0.9706, "step": 910 }, { "epoch": 0.37040048790404556, "grad_norm": 0.09847233444452286, "learning_rate": 0.00016315896600854876, "loss": 1.0053, "step": 911 }, { "epoch": 0.3708070746086603, "grad_norm": 0.08919807523488998, "learning_rate": 0.00016311825768369634, "loss": 0.9453, "step": 912 }, { "epoch": 0.37121366131327505, "grad_norm": 0.09738612174987793, "learning_rate": 0.0001630775493588439, "loss": 0.9532, "step": 913 }, { "epoch": 0.37162024801788984, "grad_norm": 0.09383881837129593, "learning_rate": 0.00016303684103399145, "loss": 0.9785, "step": 914 }, { "epoch": 0.3720268347225046, "grad_norm": 0.09604702144861221, "learning_rate": 0.00016299613270913903, "loss": 0.9903, "step": 915 }, { "epoch": 0.3724334214271193, "grad_norm": 0.10132728517055511, "learning_rate": 0.00016295542438428658, "loss": 0.9108, "step": 916 }, { "epoch": 0.37284000813173407, "grad_norm": 0.09230207651853561, "learning_rate": 0.00016291471605943417, "loss": 0.9311, "step": 917 }, { "epoch": 0.37324659483634887, "grad_norm": 0.1002974808216095, "learning_rate": 0.00016287400773458172, "loss": 1.036, "step": 918 }, { "epoch": 0.3736531815409636, "grad_norm": 0.10646216571331024, "learning_rate": 0.0001628332994097293, "loss": 1.0322, "step": 919 }, { "epoch": 0.37405976824557835, "grad_norm": 0.10693056881427765, "learning_rate": 0.00016279259108487685, "loss": 0.9632, "step": 920 }, { "epoch": 0.37446635495019315, "grad_norm": 0.09401918947696686, "learning_rate": 0.00016275188276002444, "loss": 0.9099, "step": 921 }, { "epoch": 0.3748729416548079, "grad_norm": 0.09380720555782318, "learning_rate": 0.00016271117443517202, "loss": 0.9477, "step": 922 }, { "epoch": 0.37527952835942263, "grad_norm": 0.10077174752950668, "learning_rate": 0.00016267046611031957, "loss": 1.0681, "step": 923 }, { "epoch": 0.37568611506403743, "grad_norm": 0.09828921407461166, "learning_rate": 0.00016262975778546715, "loss": 1.0714, "step": 924 }, { "epoch": 0.37609270176865217, "grad_norm": 0.09168947488069534, "learning_rate": 0.0001625890494606147, "loss": 0.9704, "step": 925 }, { "epoch": 0.3764992884732669, "grad_norm": 0.11230597645044327, "learning_rate": 0.00016254834113576226, "loss": 1.035, "step": 926 }, { "epoch": 0.3769058751778817, "grad_norm": 0.09307698905467987, "learning_rate": 0.00016250763281090984, "loss": 1.0032, "step": 927 }, { "epoch": 0.37731246188249645, "grad_norm": 0.0920303463935852, "learning_rate": 0.0001624669244860574, "loss": 0.9544, "step": 928 }, { "epoch": 0.3777190485871112, "grad_norm": 0.09571109712123871, "learning_rate": 0.00016242621616120498, "loss": 1.0121, "step": 929 }, { "epoch": 0.37812563529172594, "grad_norm": 0.10284926742315292, "learning_rate": 0.00016238550783635253, "loss": 1.0774, "step": 930 }, { "epoch": 0.37853222199634073, "grad_norm": 0.10457168519496918, "learning_rate": 0.0001623447995115001, "loss": 1.0838, "step": 931 }, { "epoch": 0.3789388087009555, "grad_norm": 0.09635209292173386, "learning_rate": 0.00016230409118664767, "loss": 1.0938, "step": 932 }, { "epoch": 0.3793453954055702, "grad_norm": 0.09526656568050385, "learning_rate": 0.00016226338286179525, "loss": 0.8854, "step": 933 }, { "epoch": 0.379751982110185, "grad_norm": 0.09416744112968445, "learning_rate": 0.00016222267453694283, "loss": 0.9763, "step": 934 }, { "epoch": 0.38015856881479976, "grad_norm": 0.10477912425994873, "learning_rate": 0.00016218196621209038, "loss": 1.0202, "step": 935 }, { "epoch": 0.3805651555194145, "grad_norm": 0.1058596596121788, "learning_rate": 0.00016214125788723796, "loss": 1.094, "step": 936 }, { "epoch": 0.3809717422240293, "grad_norm": 0.09116796404123306, "learning_rate": 0.00016210054956238552, "loss": 0.8538, "step": 937 }, { "epoch": 0.38137832892864404, "grad_norm": 0.10126717388629913, "learning_rate": 0.00016205984123753307, "loss": 1.0668, "step": 938 }, { "epoch": 0.3817849156332588, "grad_norm": 0.09571392089128494, "learning_rate": 0.00016201913291268065, "loss": 1.0369, "step": 939 }, { "epoch": 0.3821915023378736, "grad_norm": 0.0956893190741539, "learning_rate": 0.0001619784245878282, "loss": 1.0634, "step": 940 }, { "epoch": 0.3825980890424883, "grad_norm": 0.09609857201576233, "learning_rate": 0.0001619377162629758, "loss": 1.0129, "step": 941 }, { "epoch": 0.38300467574710306, "grad_norm": 0.09440251439809799, "learning_rate": 0.00016189700793812334, "loss": 1.0692, "step": 942 }, { "epoch": 0.3834112624517178, "grad_norm": 0.09696158766746521, "learning_rate": 0.00016185629961327092, "loss": 1.0562, "step": 943 }, { "epoch": 0.3838178491563326, "grad_norm": 0.09598648548126221, "learning_rate": 0.0001618155912884185, "loss": 1.0046, "step": 944 }, { "epoch": 0.38422443586094734, "grad_norm": 0.09640836715698242, "learning_rate": 0.00016177488296356606, "loss": 0.9735, "step": 945 }, { "epoch": 0.3846310225655621, "grad_norm": 0.08648312091827393, "learning_rate": 0.00016173417463871364, "loss": 0.8721, "step": 946 }, { "epoch": 0.3850376092701769, "grad_norm": 0.09831465780735016, "learning_rate": 0.0001616934663138612, "loss": 0.9943, "step": 947 }, { "epoch": 0.3854441959747916, "grad_norm": 0.09906010329723358, "learning_rate": 0.00016165275798900878, "loss": 1.0565, "step": 948 }, { "epoch": 0.38585078267940637, "grad_norm": 0.08963965624570847, "learning_rate": 0.00016161204966415633, "loss": 0.9376, "step": 949 }, { "epoch": 0.38625736938402117, "grad_norm": 0.09176084399223328, "learning_rate": 0.00016157134133930388, "loss": 0.9542, "step": 950 }, { "epoch": 0.3866639560886359, "grad_norm": 0.09641337394714355, "learning_rate": 0.00016153063301445146, "loss": 1.0248, "step": 951 }, { "epoch": 0.38707054279325065, "grad_norm": 0.09608114510774612, "learning_rate": 0.00016148992468959902, "loss": 0.9336, "step": 952 }, { "epoch": 0.38747712949786545, "grad_norm": 0.1011141762137413, "learning_rate": 0.0001614492163647466, "loss": 0.9581, "step": 953 }, { "epoch": 0.3878837162024802, "grad_norm": 0.08915555477142334, "learning_rate": 0.00016140850803989415, "loss": 0.9766, "step": 954 }, { "epoch": 0.38829030290709493, "grad_norm": 0.09591024369001389, "learning_rate": 0.00016136779971504174, "loss": 0.9601, "step": 955 }, { "epoch": 0.3886968896117097, "grad_norm": 0.0986289530992508, "learning_rate": 0.00016132709139018932, "loss": 0.9403, "step": 956 }, { "epoch": 0.38910347631632447, "grad_norm": 0.11183958500623703, "learning_rate": 0.00016128638306533687, "loss": 1.1319, "step": 957 }, { "epoch": 0.3895100630209392, "grad_norm": 0.09588544070720673, "learning_rate": 0.00016124567474048445, "loss": 0.949, "step": 958 }, { "epoch": 0.38991664972555395, "grad_norm": 0.09099708497524261, "learning_rate": 0.000161204966415632, "loss": 0.8462, "step": 959 }, { "epoch": 0.39032323643016875, "grad_norm": 0.08816317468881607, "learning_rate": 0.0001611642580907796, "loss": 0.9555, "step": 960 }, { "epoch": 0.3907298231347835, "grad_norm": 0.10011658817529678, "learning_rate": 0.00016112354976592714, "loss": 0.984, "step": 961 }, { "epoch": 0.39113640983939824, "grad_norm": 0.10004715621471405, "learning_rate": 0.0001610828414410747, "loss": 1.0356, "step": 962 }, { "epoch": 0.39154299654401303, "grad_norm": 0.09157074242830276, "learning_rate": 0.00016104213311622228, "loss": 1.0009, "step": 963 }, { "epoch": 0.3919495832486278, "grad_norm": 0.09388852119445801, "learning_rate": 0.00016100142479136983, "loss": 0.9339, "step": 964 }, { "epoch": 0.3923561699532425, "grad_norm": 0.08737456053495407, "learning_rate": 0.0001609607164665174, "loss": 0.9075, "step": 965 }, { "epoch": 0.3927627566578573, "grad_norm": 0.10345963388681412, "learning_rate": 0.00016092000814166497, "loss": 0.9599, "step": 966 }, { "epoch": 0.39316934336247206, "grad_norm": 0.09817633777856827, "learning_rate": 0.00016087929981681255, "loss": 0.9688, "step": 967 }, { "epoch": 0.3935759300670868, "grad_norm": 0.09691375494003296, "learning_rate": 0.00016083859149196013, "loss": 0.9936, "step": 968 }, { "epoch": 0.39398251677170154, "grad_norm": 0.09408018738031387, "learning_rate": 0.00016079788316710768, "loss": 1.0414, "step": 969 }, { "epoch": 0.39438910347631634, "grad_norm": 0.0967404916882515, "learning_rate": 0.00016075717484225526, "loss": 1.0127, "step": 970 }, { "epoch": 0.3947956901809311, "grad_norm": 14.380797386169434, "learning_rate": 0.00016071646651740282, "loss": 1.0321, "step": 971 }, { "epoch": 0.3952022768855458, "grad_norm": 0.1003538966178894, "learning_rate": 0.0001606757581925504, "loss": 1.0326, "step": 972 }, { "epoch": 0.3956088635901606, "grad_norm": 0.11198288947343826, "learning_rate": 0.00016063504986769795, "loss": 1.0124, "step": 973 }, { "epoch": 0.39601545029477536, "grad_norm": 0.11186987906694412, "learning_rate": 0.0001605943415428455, "loss": 1.0373, "step": 974 }, { "epoch": 0.3964220369993901, "grad_norm": 0.10965568572282791, "learning_rate": 0.0001605536332179931, "loss": 1.004, "step": 975 }, { "epoch": 0.3968286237040049, "grad_norm": 0.09624014794826508, "learning_rate": 0.00016051292489314064, "loss": 0.9294, "step": 976 }, { "epoch": 0.39723521040861964, "grad_norm": 0.10577430576086044, "learning_rate": 0.00016047221656828822, "loss": 1.0446, "step": 977 }, { "epoch": 0.3976417971132344, "grad_norm": 0.10079281777143478, "learning_rate": 0.00016043150824343578, "loss": 1.0926, "step": 978 }, { "epoch": 0.3980483838178492, "grad_norm": 0.09973543137311935, "learning_rate": 0.00016039079991858336, "loss": 1.0352, "step": 979 }, { "epoch": 0.3984549705224639, "grad_norm": 0.10137680172920227, "learning_rate": 0.00016035009159373094, "loss": 0.9871, "step": 980 }, { "epoch": 0.39886155722707867, "grad_norm": 0.09879370778799057, "learning_rate": 0.0001603093832688785, "loss": 1.0077, "step": 981 }, { "epoch": 0.3992681439316934, "grad_norm": 0.09389031678438187, "learning_rate": 0.00016026867494402607, "loss": 0.8345, "step": 982 }, { "epoch": 0.3996747306363082, "grad_norm": 0.09968902170658112, "learning_rate": 0.00016022796661917363, "loss": 1.0025, "step": 983 }, { "epoch": 0.40008131734092295, "grad_norm": 0.09817297756671906, "learning_rate": 0.00016018725829432118, "loss": 1.0436, "step": 984 }, { "epoch": 0.4004879040455377, "grad_norm": 0.09468533098697662, "learning_rate": 0.00016014654996946876, "loss": 0.9757, "step": 985 }, { "epoch": 0.4008944907501525, "grad_norm": 0.10573722422122955, "learning_rate": 0.00016010584164461632, "loss": 0.9627, "step": 986 }, { "epoch": 0.40130107745476723, "grad_norm": 0.09328682720661163, "learning_rate": 0.0001600651333197639, "loss": 0.8463, "step": 987 }, { "epoch": 0.40170766415938197, "grad_norm": 0.10987431555986404, "learning_rate": 0.00016002442499491145, "loss": 1.0123, "step": 988 }, { "epoch": 0.40211425086399677, "grad_norm": 0.09426723420619965, "learning_rate": 0.00015998371667005903, "loss": 0.8706, "step": 989 }, { "epoch": 0.4025208375686115, "grad_norm": 0.10630480945110321, "learning_rate": 0.00015994300834520662, "loss": 0.9596, "step": 990 }, { "epoch": 0.40292742427322625, "grad_norm": 0.11168541014194489, "learning_rate": 0.00015990230002035417, "loss": 1.0848, "step": 991 }, { "epoch": 0.40333401097784105, "grad_norm": 0.09651850908994675, "learning_rate": 0.00015986159169550175, "loss": 0.9965, "step": 992 }, { "epoch": 0.4037405976824558, "grad_norm": 0.10979650169610977, "learning_rate": 0.0001598208833706493, "loss": 1.0658, "step": 993 }, { "epoch": 0.40414718438707053, "grad_norm": 0.10453470051288605, "learning_rate": 0.00015978017504579689, "loss": 0.8697, "step": 994 }, { "epoch": 0.4045537710916853, "grad_norm": 0.09393549710512161, "learning_rate": 0.00015973946672094444, "loss": 0.9818, "step": 995 }, { "epoch": 0.4049603577963001, "grad_norm": 0.10618766397237778, "learning_rate": 0.000159698758396092, "loss": 1.1107, "step": 996 }, { "epoch": 0.4053669445009148, "grad_norm": 0.09401122480630875, "learning_rate": 0.00015965805007123958, "loss": 0.9484, "step": 997 }, { "epoch": 0.40577353120552956, "grad_norm": 0.10404767096042633, "learning_rate": 0.00015961734174638713, "loss": 1.0457, "step": 998 }, { "epoch": 0.40618011791014436, "grad_norm": 0.10144046694040298, "learning_rate": 0.0001595766334215347, "loss": 1.0164, "step": 999 }, { "epoch": 0.4065867046147591, "grad_norm": 69.98524475097656, "learning_rate": 0.00015953592509668227, "loss": 1.0974, "step": 1000 }, { "epoch": 0.40699329131937384, "grad_norm": 0.09672264754772186, "learning_rate": 0.00015949521677182985, "loss": 0.9351, "step": 1001 }, { "epoch": 0.40739987802398864, "grad_norm": 0.09717651456594467, "learning_rate": 0.00015945450844697743, "loss": 0.992, "step": 1002 }, { "epoch": 0.4078064647286034, "grad_norm": 0.10012587159872055, "learning_rate": 0.00015941380012212498, "loss": 0.9564, "step": 1003 }, { "epoch": 0.4082130514332181, "grad_norm": 0.11782870441675186, "learning_rate": 0.00015937309179727256, "loss": 1.0572, "step": 1004 }, { "epoch": 0.4086196381378329, "grad_norm": 0.12483621388673782, "learning_rate": 0.00015933238347242012, "loss": 0.9195, "step": 1005 }, { "epoch": 0.40902622484244766, "grad_norm": 0.16169683635234833, "learning_rate": 0.0001592916751475677, "loss": 0.9553, "step": 1006 }, { "epoch": 0.4094328115470624, "grad_norm": 0.18174675107002258, "learning_rate": 0.00015925096682271525, "loss": 0.9884, "step": 1007 }, { "epoch": 0.40983939825167714, "grad_norm": 0.15436168015003204, "learning_rate": 0.0001592102584978628, "loss": 0.975, "step": 1008 }, { "epoch": 0.41024598495629194, "grad_norm": 0.37080836296081543, "learning_rate": 0.0001591695501730104, "loss": 0.9542, "step": 1009 }, { "epoch": 0.4106525716609067, "grad_norm": 0.10444851219654083, "learning_rate": 0.00015912884184815794, "loss": 0.8729, "step": 1010 }, { "epoch": 0.4110591583655214, "grad_norm": 0.09934143722057343, "learning_rate": 0.00015908813352330552, "loss": 1.0016, "step": 1011 }, { "epoch": 0.4114657450701362, "grad_norm": 0.10826974362134933, "learning_rate": 0.00015904742519845308, "loss": 1.0141, "step": 1012 }, { "epoch": 0.41187233177475097, "grad_norm": 0.0943305566906929, "learning_rate": 0.00015900671687360066, "loss": 0.9172, "step": 1013 }, { "epoch": 0.4122789184793657, "grad_norm": 0.0978141725063324, "learning_rate": 0.00015896600854874824, "loss": 1.0325, "step": 1014 }, { "epoch": 0.4126855051839805, "grad_norm": 0.10199011117219925, "learning_rate": 0.0001589253002238958, "loss": 1.1241, "step": 1015 }, { "epoch": 0.41309209188859525, "grad_norm": 0.09425395727157593, "learning_rate": 0.00015888459189904337, "loss": 0.9909, "step": 1016 }, { "epoch": 0.41349867859321, "grad_norm": 0.10020224750041962, "learning_rate": 0.00015884388357419093, "loss": 1.048, "step": 1017 }, { "epoch": 0.4139052652978248, "grad_norm": 0.09428106248378754, "learning_rate": 0.0001588031752493385, "loss": 1.0091, "step": 1018 }, { "epoch": 0.41431185200243953, "grad_norm": 0.10922541469335556, "learning_rate": 0.00015876246692448606, "loss": 1.0321, "step": 1019 }, { "epoch": 0.41471843870705427, "grad_norm": 0.10005990415811539, "learning_rate": 0.00015872175859963362, "loss": 0.9331, "step": 1020 }, { "epoch": 0.415125025411669, "grad_norm": 0.09880723059177399, "learning_rate": 0.0001586810502747812, "loss": 1.0831, "step": 1021 }, { "epoch": 0.4155316121162838, "grad_norm": 0.10210402309894562, "learning_rate": 0.00015864034194992875, "loss": 0.8376, "step": 1022 }, { "epoch": 0.41593819882089855, "grad_norm": 0.10243164747953415, "learning_rate": 0.00015859963362507633, "loss": 0.974, "step": 1023 }, { "epoch": 0.4163447855255133, "grad_norm": 0.0910453349351883, "learning_rate": 0.0001585589253002239, "loss": 0.9429, "step": 1024 }, { "epoch": 0.4167513722301281, "grad_norm": 0.10028322786092758, "learning_rate": 0.00015851821697537147, "loss": 0.9692, "step": 1025 }, { "epoch": 0.41715795893474283, "grad_norm": 0.10679830610752106, "learning_rate": 0.00015847750865051905, "loss": 1.0561, "step": 1026 }, { "epoch": 0.4175645456393576, "grad_norm": 0.10921266674995422, "learning_rate": 0.0001584368003256666, "loss": 1.0046, "step": 1027 }, { "epoch": 0.4179711323439724, "grad_norm": 0.09717408567667007, "learning_rate": 0.00015839609200081419, "loss": 0.9963, "step": 1028 }, { "epoch": 0.4183777190485871, "grad_norm": 0.10907028615474701, "learning_rate": 0.00015835538367596174, "loss": 1.1112, "step": 1029 }, { "epoch": 0.41878430575320186, "grad_norm": 0.0934014692902565, "learning_rate": 0.00015831467535110932, "loss": 0.9392, "step": 1030 }, { "epoch": 0.41919089245781666, "grad_norm": 0.10372751951217651, "learning_rate": 0.00015827396702625688, "loss": 0.9911, "step": 1031 }, { "epoch": 0.4195974791624314, "grad_norm": 0.0926424190402031, "learning_rate": 0.00015823325870140443, "loss": 0.9568, "step": 1032 }, { "epoch": 0.42000406586704614, "grad_norm": 0.09991902112960815, "learning_rate": 0.000158192550376552, "loss": 1.1551, "step": 1033 }, { "epoch": 0.4204106525716609, "grad_norm": 0.10407492518424988, "learning_rate": 0.00015815184205169956, "loss": 1.0001, "step": 1034 }, { "epoch": 0.4208172392762757, "grad_norm": 0.09984209388494492, "learning_rate": 0.00015811113372684715, "loss": 1.0661, "step": 1035 }, { "epoch": 0.4212238259808904, "grad_norm": 0.08815161138772964, "learning_rate": 0.00015807042540199473, "loss": 0.9132, "step": 1036 }, { "epoch": 0.42163041268550516, "grad_norm": 0.10167308151721954, "learning_rate": 0.00015802971707714228, "loss": 1.0113, "step": 1037 }, { "epoch": 0.42203699939011996, "grad_norm": 0.09093226492404938, "learning_rate": 0.00015798900875228986, "loss": 0.9016, "step": 1038 }, { "epoch": 0.4224435860947347, "grad_norm": 0.09932513535022736, "learning_rate": 0.00015794830042743742, "loss": 1.0756, "step": 1039 }, { "epoch": 0.42285017279934944, "grad_norm": 0.09752842038869858, "learning_rate": 0.000157907592102585, "loss": 1.0552, "step": 1040 }, { "epoch": 0.42325675950396424, "grad_norm": 0.09833484143018723, "learning_rate": 0.00015786688377773255, "loss": 1.0448, "step": 1041 }, { "epoch": 0.423663346208579, "grad_norm": 0.09440255910158157, "learning_rate": 0.00015782617545288013, "loss": 0.966, "step": 1042 }, { "epoch": 0.4240699329131937, "grad_norm": 0.09800337255001068, "learning_rate": 0.0001577854671280277, "loss": 0.9517, "step": 1043 }, { "epoch": 0.4244765196178085, "grad_norm": 0.100920170545578, "learning_rate": 0.00015774475880317524, "loss": 1.0075, "step": 1044 }, { "epoch": 0.42488310632242327, "grad_norm": 0.10229222476482391, "learning_rate": 0.00015770405047832282, "loss": 1.0644, "step": 1045 }, { "epoch": 0.425289693027038, "grad_norm": 0.09247329086065292, "learning_rate": 0.00015766334215347038, "loss": 0.9628, "step": 1046 }, { "epoch": 0.42569627973165275, "grad_norm": 0.08849867433309555, "learning_rate": 0.00015762263382861796, "loss": 0.9044, "step": 1047 }, { "epoch": 0.42610286643626755, "grad_norm": 0.10035345703363419, "learning_rate": 0.00015758192550376554, "loss": 1.0025, "step": 1048 }, { "epoch": 0.4265094531408823, "grad_norm": 0.10530912131071091, "learning_rate": 0.0001575412171789131, "loss": 1.1156, "step": 1049 }, { "epoch": 0.42691603984549703, "grad_norm": 0.0959988534450531, "learning_rate": 0.00015750050885406067, "loss": 0.9927, "step": 1050 }, { "epoch": 0.42732262655011183, "grad_norm": 0.09642820060253143, "learning_rate": 0.00015745980052920823, "loss": 1.0081, "step": 1051 }, { "epoch": 0.42772921325472657, "grad_norm": 0.09695859253406525, "learning_rate": 0.0001574190922043558, "loss": 1.0104, "step": 1052 }, { "epoch": 0.4281357999593413, "grad_norm": 0.09271597862243652, "learning_rate": 0.00015737838387950336, "loss": 0.9655, "step": 1053 }, { "epoch": 0.4285423866639561, "grad_norm": 0.11482039839029312, "learning_rate": 0.00015733767555465094, "loss": 1.1689, "step": 1054 }, { "epoch": 0.42894897336857085, "grad_norm": 0.12072457373142242, "learning_rate": 0.0001572969672297985, "loss": 1.1573, "step": 1055 }, { "epoch": 0.4293555600731856, "grad_norm": 0.10628031194210052, "learning_rate": 0.00015725625890494605, "loss": 1.0604, "step": 1056 }, { "epoch": 0.4297621467778004, "grad_norm": 0.09997066110372543, "learning_rate": 0.00015721555058009363, "loss": 1.0791, "step": 1057 }, { "epoch": 0.43016873348241513, "grad_norm": 0.09063227474689484, "learning_rate": 0.0001571748422552412, "loss": 0.8821, "step": 1058 }, { "epoch": 0.4305753201870299, "grad_norm": 0.09447956085205078, "learning_rate": 0.0001571341339303888, "loss": 0.9104, "step": 1059 }, { "epoch": 0.4309819068916446, "grad_norm": 0.09488890320062637, "learning_rate": 0.00015709342560553635, "loss": 0.9476, "step": 1060 }, { "epoch": 0.4313884935962594, "grad_norm": 0.09842818230390549, "learning_rate": 0.0001570527172806839, "loss": 1.0041, "step": 1061 }, { "epoch": 0.43179508030087416, "grad_norm": 0.10026121884584427, "learning_rate": 0.00015701200895583149, "loss": 0.9704, "step": 1062 }, { "epoch": 0.4322016670054889, "grad_norm": 0.10602670162916183, "learning_rate": 0.00015697130063097904, "loss": 0.9626, "step": 1063 }, { "epoch": 0.4326082537101037, "grad_norm": 0.09817321598529816, "learning_rate": 0.00015693059230612662, "loss": 1.018, "step": 1064 }, { "epoch": 0.43301484041471844, "grad_norm": 0.10956291854381561, "learning_rate": 0.00015688988398127417, "loss": 1.0773, "step": 1065 }, { "epoch": 0.4334214271193332, "grad_norm": 0.10461815446615219, "learning_rate": 0.00015684917565642176, "loss": 1.0276, "step": 1066 }, { "epoch": 0.433828013823948, "grad_norm": 0.1066046878695488, "learning_rate": 0.0001568084673315693, "loss": 1.0104, "step": 1067 }, { "epoch": 0.4342346005285627, "grad_norm": 0.09685570001602173, "learning_rate": 0.00015676775900671686, "loss": 0.9324, "step": 1068 }, { "epoch": 0.43464118723317746, "grad_norm": 0.10849763453006744, "learning_rate": 0.00015672705068186445, "loss": 1.1898, "step": 1069 }, { "epoch": 0.43504777393779226, "grad_norm": 0.09181284159421921, "learning_rate": 0.000156686342357012, "loss": 0.9655, "step": 1070 }, { "epoch": 0.435454360642407, "grad_norm": 0.09956375509500504, "learning_rate": 0.00015664563403215958, "loss": 0.9767, "step": 1071 }, { "epoch": 0.43586094734702174, "grad_norm": 0.09587504714727402, "learning_rate": 0.00015660492570730716, "loss": 1.0046, "step": 1072 }, { "epoch": 0.4362675340516365, "grad_norm": 0.09740083664655685, "learning_rate": 0.00015656421738245472, "loss": 1.0235, "step": 1073 }, { "epoch": 0.4366741207562513, "grad_norm": 0.1067059263586998, "learning_rate": 0.0001565235090576023, "loss": 1.0495, "step": 1074 }, { "epoch": 0.437080707460866, "grad_norm": 0.0951162800192833, "learning_rate": 0.00015648280073274985, "loss": 1.0601, "step": 1075 }, { "epoch": 0.43748729416548077, "grad_norm": 0.10814306139945984, "learning_rate": 0.00015644209240789743, "loss": 1.0642, "step": 1076 }, { "epoch": 0.43789388087009556, "grad_norm": 0.10104648023843765, "learning_rate": 0.000156401384083045, "loss": 1.0183, "step": 1077 }, { "epoch": 0.4383004675747103, "grad_norm": 0.10644647479057312, "learning_rate": 0.00015636067575819254, "loss": 0.9845, "step": 1078 }, { "epoch": 0.43870705427932505, "grad_norm": 0.10958357155323029, "learning_rate": 0.00015631996743334012, "loss": 1.0803, "step": 1079 }, { "epoch": 0.43911364098393985, "grad_norm": 0.09988164156675339, "learning_rate": 0.00015627925910848768, "loss": 0.9468, "step": 1080 }, { "epoch": 0.4395202276885546, "grad_norm": 0.09617158770561218, "learning_rate": 0.00015623855078363526, "loss": 0.9929, "step": 1081 }, { "epoch": 0.43992681439316933, "grad_norm": 0.09235814958810806, "learning_rate": 0.00015619784245878284, "loss": 0.9681, "step": 1082 }, { "epoch": 0.4403334010977841, "grad_norm": 0.0999334529042244, "learning_rate": 0.0001561571341339304, "loss": 1.0971, "step": 1083 }, { "epoch": 0.44073998780239887, "grad_norm": 0.09117653220891953, "learning_rate": 0.00015611642580907797, "loss": 0.9176, "step": 1084 }, { "epoch": 0.4411465745070136, "grad_norm": 0.11608845740556717, "learning_rate": 0.00015607571748422553, "loss": 1.016, "step": 1085 }, { "epoch": 0.44155316121162835, "grad_norm": NaN, "learning_rate": 0.0001560350091593731, "loss": 3.9953, "step": 1086 }, { "epoch": 0.44195974791624315, "grad_norm": 0.08910229802131653, "learning_rate": 0.00015599430083452066, "loss": 0.9387, "step": 1087 }, { "epoch": 0.4423663346208579, "grad_norm": NaN, "learning_rate": 0.00015595359250966824, "loss": 0.9939, "step": 1088 }, { "epoch": 0.44277292132547263, "grad_norm": 0.13621561229228973, "learning_rate": 0.0001559128841848158, "loss": 0.9376, "step": 1089 }, { "epoch": 0.44317950803008743, "grad_norm": 0.263536661863327, "learning_rate": 0.00015587217585996335, "loss": 1.0808, "step": 1090 }, { "epoch": 0.4435860947347022, "grad_norm": 0.21123525500297546, "learning_rate": 0.00015583146753511093, "loss": 0.9142, "step": 1091 }, { "epoch": 0.4439926814393169, "grad_norm": 0.16994574666023254, "learning_rate": 0.0001557907592102585, "loss": 1.0273, "step": 1092 }, { "epoch": 0.4443992681439317, "grad_norm": 0.1400166153907776, "learning_rate": 0.00015575005088540607, "loss": 0.9135, "step": 1093 }, { "epoch": 0.44480585484854646, "grad_norm": 0.13885940611362457, "learning_rate": 0.00015570934256055365, "loss": 1.158, "step": 1094 }, { "epoch": 0.4452124415531612, "grad_norm": 0.12671105563640594, "learning_rate": 0.0001556686342357012, "loss": 0.9401, "step": 1095 }, { "epoch": 0.445619028257776, "grad_norm": 0.11388255655765533, "learning_rate": 0.00015562792591084879, "loss": 0.9454, "step": 1096 }, { "epoch": 0.44602561496239074, "grad_norm": 0.13421480357646942, "learning_rate": 0.00015558721758599634, "loss": 1.0017, "step": 1097 }, { "epoch": 0.4464322016670055, "grad_norm": 0.11914326995611191, "learning_rate": 0.00015554650926114392, "loss": 1.0312, "step": 1098 }, { "epoch": 0.4468387883716202, "grad_norm": 0.11101624369621277, "learning_rate": 0.00015550580093629147, "loss": 1.0555, "step": 1099 }, { "epoch": 0.447245375076235, "grad_norm": 0.12158175557851791, "learning_rate": 0.00015546509261143906, "loss": 1.008, "step": 1100 }, { "epoch": 0.44765196178084976, "grad_norm": 0.09680108726024628, "learning_rate": 0.0001554243842865866, "loss": 0.8603, "step": 1101 }, { "epoch": 0.4480585484854645, "grad_norm": 0.12374867498874664, "learning_rate": 0.00015538367596173416, "loss": 0.9282, "step": 1102 }, { "epoch": 0.4484651351900793, "grad_norm": 0.12144714593887329, "learning_rate": 0.00015534296763688175, "loss": 1.1072, "step": 1103 }, { "epoch": 0.44887172189469404, "grad_norm": 0.13777373731136322, "learning_rate": 0.0001553022593120293, "loss": 1.0914, "step": 1104 }, { "epoch": 0.4492783085993088, "grad_norm": 0.14908930659294128, "learning_rate": 0.0001552615509871769, "loss": 1.0349, "step": 1105 }, { "epoch": 0.4496848953039236, "grad_norm": 0.09202148765325546, "learning_rate": 0.00015522084266232446, "loss": 1.0544, "step": 1106 }, { "epoch": 0.4500914820085383, "grad_norm": 0.14155222475528717, "learning_rate": 0.00015518013433747202, "loss": 1.145, "step": 1107 }, { "epoch": 0.45049806871315307, "grad_norm": 0.13090363144874573, "learning_rate": 0.0001551394260126196, "loss": 1.0815, "step": 1108 }, { "epoch": 0.45090465541776786, "grad_norm": 0.09763860702514648, "learning_rate": 0.00015509871768776715, "loss": 0.9798, "step": 1109 }, { "epoch": 0.4513112421223826, "grad_norm": 0.11425314843654633, "learning_rate": 0.00015505800936291473, "loss": 1.0609, "step": 1110 }, { "epoch": 0.45171782882699735, "grad_norm": 0.1132175624370575, "learning_rate": 0.00015501730103806229, "loss": 1.0784, "step": 1111 }, { "epoch": 0.4521244155316121, "grad_norm": 0.09365850687026978, "learning_rate": 0.00015497659271320987, "loss": 0.971, "step": 1112 }, { "epoch": 0.4525310022362269, "grad_norm": 0.10959959030151367, "learning_rate": 0.00015493588438835742, "loss": 1.0991, "step": 1113 }, { "epoch": 0.45293758894084163, "grad_norm": 0.1113215982913971, "learning_rate": 0.00015489517606350498, "loss": 0.9664, "step": 1114 }, { "epoch": 0.45334417564545637, "grad_norm": 0.09337687492370605, "learning_rate": 0.00015485446773865256, "loss": 0.9801, "step": 1115 }, { "epoch": 0.45375076235007117, "grad_norm": 0.09887603670358658, "learning_rate": 0.0001548137594138001, "loss": 0.9329, "step": 1116 }, { "epoch": 0.4541573490546859, "grad_norm": 0.09895873069763184, "learning_rate": 0.00015477305108894772, "loss": 0.9742, "step": 1117 }, { "epoch": 0.45456393575930065, "grad_norm": 0.10547256469726562, "learning_rate": 0.00015473234276409527, "loss": 1.0917, "step": 1118 }, { "epoch": 0.45497052246391545, "grad_norm": 0.10243359208106995, "learning_rate": 0.00015469163443924283, "loss": 1.153, "step": 1119 }, { "epoch": 0.4553771091685302, "grad_norm": 0.10679526627063751, "learning_rate": 0.0001546509261143904, "loss": 1.0256, "step": 1120 }, { "epoch": 0.45578369587314493, "grad_norm": 0.10002291947603226, "learning_rate": 0.00015461021778953796, "loss": 1.0984, "step": 1121 }, { "epoch": 0.45619028257775973, "grad_norm": 0.0953390821814537, "learning_rate": 0.00015456950946468554, "loss": 1.015, "step": 1122 }, { "epoch": 0.4565968692823745, "grad_norm": 0.09738897532224655, "learning_rate": 0.0001545288011398331, "loss": 1.0193, "step": 1123 }, { "epoch": 0.4570034559869892, "grad_norm": 0.09633835405111313, "learning_rate": 0.00015448809281498068, "loss": 1.0595, "step": 1124 }, { "epoch": 0.45741004269160396, "grad_norm": 0.09380267560482025, "learning_rate": 0.00015444738449012823, "loss": 1.0411, "step": 1125 }, { "epoch": 0.45781662939621875, "grad_norm": 0.09572221338748932, "learning_rate": 0.0001544066761652758, "loss": 1.0509, "step": 1126 }, { "epoch": 0.4582232161008335, "grad_norm": 0.09846567362546921, "learning_rate": 0.00015436596784042337, "loss": 1.0026, "step": 1127 }, { "epoch": 0.45862980280544824, "grad_norm": 0.10050946474075317, "learning_rate": 0.00015432525951557095, "loss": 0.9278, "step": 1128 }, { "epoch": 0.45903638951006304, "grad_norm": 0.09319213777780533, "learning_rate": 0.00015428455119071853, "loss": 1.0591, "step": 1129 }, { "epoch": 0.4594429762146778, "grad_norm": 0.10778182744979858, "learning_rate": 0.00015424384286586608, "loss": 1.1913, "step": 1130 }, { "epoch": 0.4598495629192925, "grad_norm": 0.09819093346595764, "learning_rate": 0.00015420313454101364, "loss": 1.0254, "step": 1131 }, { "epoch": 0.4602561496239073, "grad_norm": 0.09300455451011658, "learning_rate": 0.00015416242621616122, "loss": 0.9092, "step": 1132 }, { "epoch": 0.46066273632852206, "grad_norm": 0.09690682590007782, "learning_rate": 0.00015412171789130877, "loss": 1.0309, "step": 1133 }, { "epoch": 0.4610693230331368, "grad_norm": 0.10080096125602722, "learning_rate": 0.00015408100956645636, "loss": 1.059, "step": 1134 }, { "epoch": 0.4614759097377516, "grad_norm": 0.10120131820440292, "learning_rate": 0.0001540403012416039, "loss": 1.0201, "step": 1135 }, { "epoch": 0.46188249644236634, "grad_norm": 0.09029684960842133, "learning_rate": 0.0001539995929167515, "loss": 0.9981, "step": 1136 }, { "epoch": 0.4622890831469811, "grad_norm": 0.10337984561920166, "learning_rate": 0.00015395888459189904, "loss": 1.0746, "step": 1137 }, { "epoch": 0.4626956698515958, "grad_norm": 0.10107820481061935, "learning_rate": 0.0001539181762670466, "loss": 1.0901, "step": 1138 }, { "epoch": 0.4631022565562106, "grad_norm": 0.09064685553312302, "learning_rate": 0.00015387746794219418, "loss": 0.9654, "step": 1139 }, { "epoch": 0.46350884326082537, "grad_norm": 0.08879990130662918, "learning_rate": 0.00015383675961734176, "loss": 0.9099, "step": 1140 }, { "epoch": 0.4639154299654401, "grad_norm": 0.09138944000005722, "learning_rate": 0.00015379605129248934, "loss": 1.029, "step": 1141 }, { "epoch": 0.4643220166700549, "grad_norm": 0.08852239698171616, "learning_rate": 0.0001537553429676369, "loss": 0.8866, "step": 1142 }, { "epoch": 0.46472860337466965, "grad_norm": 0.1031791940331459, "learning_rate": 0.00015371463464278445, "loss": 1.0403, "step": 1143 }, { "epoch": 0.4651351900792844, "grad_norm": 0.10525615513324738, "learning_rate": 0.00015367392631793203, "loss": 1.0979, "step": 1144 }, { "epoch": 0.4655417767838992, "grad_norm": 0.08951327204704285, "learning_rate": 0.00015363321799307959, "loss": 1.1415, "step": 1145 }, { "epoch": 0.46594836348851393, "grad_norm": 0.08904453366994858, "learning_rate": 0.00015359250966822717, "loss": 0.9916, "step": 1146 }, { "epoch": 0.46635495019312867, "grad_norm": 0.09936080127954483, "learning_rate": 0.00015355180134337472, "loss": 0.8986, "step": 1147 }, { "epoch": 0.46676153689774347, "grad_norm": 0.09393945336341858, "learning_rate": 0.0001535110930185223, "loss": 0.9999, "step": 1148 }, { "epoch": 0.4671681236023582, "grad_norm": 0.09378618746995926, "learning_rate": 0.00015347038469366986, "loss": 1.047, "step": 1149 }, { "epoch": 0.46757471030697295, "grad_norm": 0.08764394372701645, "learning_rate": 0.0001534296763688174, "loss": 1.0553, "step": 1150 }, { "epoch": 0.4679812970115877, "grad_norm": 0.09421446919441223, "learning_rate": 0.00015338896804396502, "loss": 0.9849, "step": 1151 }, { "epoch": 0.4683878837162025, "grad_norm": 0.08507819473743439, "learning_rate": 0.00015334825971911257, "loss": 0.9776, "step": 1152 }, { "epoch": 0.46879447042081723, "grad_norm": 0.08929714560508728, "learning_rate": 0.00015330755139426015, "loss": 0.9386, "step": 1153 }, { "epoch": 0.469201057125432, "grad_norm": 0.08826079219579697, "learning_rate": 0.0001532668430694077, "loss": 0.9566, "step": 1154 }, { "epoch": 0.4696076438300468, "grad_norm": 0.09339980781078339, "learning_rate": 0.00015322613474455526, "loss": 1.0428, "step": 1155 }, { "epoch": 0.4700142305346615, "grad_norm": 0.09100881218910217, "learning_rate": 0.00015318542641970284, "loss": 0.998, "step": 1156 }, { "epoch": 0.47042081723927626, "grad_norm": 0.10815288126468658, "learning_rate": 0.0001531447180948504, "loss": 0.9677, "step": 1157 }, { "epoch": 0.47082740394389105, "grad_norm": 0.10011841356754303, "learning_rate": 0.00015310400976999798, "loss": 1.0712, "step": 1158 }, { "epoch": 0.4712339906485058, "grad_norm": 0.09442432969808578, "learning_rate": 0.00015306330144514553, "loss": 1.0916, "step": 1159 }, { "epoch": 0.47164057735312054, "grad_norm": 0.09668919444084167, "learning_rate": 0.00015302259312029311, "loss": 1.0755, "step": 1160 }, { "epoch": 0.47204716405773534, "grad_norm": 0.09985285252332687, "learning_rate": 0.00015298188479544067, "loss": 1.0688, "step": 1161 }, { "epoch": 0.4724537507623501, "grad_norm": 0.10555320233106613, "learning_rate": 0.00015294117647058822, "loss": 1.0152, "step": 1162 }, { "epoch": 0.4728603374669648, "grad_norm": 0.0884140282869339, "learning_rate": 0.00015290046814573583, "loss": 0.9648, "step": 1163 }, { "epoch": 0.47326692417157956, "grad_norm": 0.07746291160583496, "learning_rate": 0.00015285975982088338, "loss": 0.8335, "step": 1164 }, { "epoch": 0.47367351087619436, "grad_norm": 0.09735523909330368, "learning_rate": 0.00015281905149603094, "loss": 1.0043, "step": 1165 }, { "epoch": 0.4740800975808091, "grad_norm": 0.0871511772274971, "learning_rate": 0.00015277834317117852, "loss": 0.9071, "step": 1166 }, { "epoch": 0.47448668428542384, "grad_norm": 0.08971349149942398, "learning_rate": 0.00015273763484632607, "loss": 1.0586, "step": 1167 }, { "epoch": 0.47489327099003864, "grad_norm": 0.0872373878955841, "learning_rate": 0.00015269692652147365, "loss": 1.0302, "step": 1168 }, { "epoch": 0.4752998576946534, "grad_norm": 0.07631363719701767, "learning_rate": 0.0001526562181966212, "loss": 0.8899, "step": 1169 }, { "epoch": 0.4757064443992681, "grad_norm": 0.0988103449344635, "learning_rate": 0.0001526155098717688, "loss": 1.1254, "step": 1170 }, { "epoch": 0.4761130311038829, "grad_norm": 0.097597636282444, "learning_rate": 0.00015257480154691634, "loss": 1.1146, "step": 1171 }, { "epoch": 0.47651961780849766, "grad_norm": 0.09990191459655762, "learning_rate": 0.00015253409322206393, "loss": 1.2176, "step": 1172 }, { "epoch": 0.4769262045131124, "grad_norm": 0.09328643232584, "learning_rate": 0.00015249338489721148, "loss": 1.0341, "step": 1173 }, { "epoch": 0.4773327912177272, "grad_norm": 0.10171747207641602, "learning_rate": 0.00015245267657235906, "loss": 1.0254, "step": 1174 }, { "epoch": 0.47773937792234195, "grad_norm": 0.10708395391702652, "learning_rate": 0.00015241196824750664, "loss": 1.0829, "step": 1175 }, { "epoch": 0.4781459646269567, "grad_norm": 0.08677671104669571, "learning_rate": 0.0001523712599226542, "loss": 1.0284, "step": 1176 }, { "epoch": 0.4785525513315715, "grad_norm": 0.09038002789020538, "learning_rate": 0.00015233055159780175, "loss": 0.9153, "step": 1177 }, { "epoch": 0.4789591380361862, "grad_norm": 0.11192218214273453, "learning_rate": 0.00015228984327294933, "loss": 1.0457, "step": 1178 }, { "epoch": 0.47936572474080097, "grad_norm": 0.09288083016872406, "learning_rate": 0.00015224913494809689, "loss": 1.0015, "step": 1179 }, { "epoch": 0.4797723114454157, "grad_norm": 0.09631673991680145, "learning_rate": 0.00015220842662324447, "loss": 1.0815, "step": 1180 }, { "epoch": 0.4801788981500305, "grad_norm": 0.10445179790258408, "learning_rate": 0.00015216771829839202, "loss": 1.0739, "step": 1181 }, { "epoch": 0.48058548485464525, "grad_norm": 0.09268762916326523, "learning_rate": 0.0001521270099735396, "loss": 0.8934, "step": 1182 }, { "epoch": 0.48099207155926, "grad_norm": 0.08889751881361008, "learning_rate": 0.00015208630164868716, "loss": 0.9938, "step": 1183 }, { "epoch": 0.4813986582638748, "grad_norm": 45.80461883544922, "learning_rate": 0.0001520455933238347, "loss": 1.1104, "step": 1184 }, { "epoch": 0.48180524496848953, "grad_norm": 0.10641971975564957, "learning_rate": 0.0001520048849989823, "loss": 0.94, "step": 1185 }, { "epoch": 0.4822118316731043, "grad_norm": 0.1041031926870346, "learning_rate": 0.00015196417667412987, "loss": 1.0479, "step": 1186 }, { "epoch": 0.48261841837771907, "grad_norm": 0.09576927870512009, "learning_rate": 0.00015192346834927745, "loss": 1.0385, "step": 1187 }, { "epoch": 0.4830250050823338, "grad_norm": 26.211715698242188, "learning_rate": 0.000151882760024425, "loss": 0.9019, "step": 1188 }, { "epoch": 0.48343159178694856, "grad_norm": 0.10039546340703964, "learning_rate": 0.00015184205169957256, "loss": 0.9887, "step": 1189 }, { "epoch": 0.48383817849156335, "grad_norm": 0.14768731594085693, "learning_rate": 0.00015180134337472014, "loss": 0.9373, "step": 1190 }, { "epoch": 0.4842447651961781, "grad_norm": 0.29760250449180603, "learning_rate": 0.0001517606350498677, "loss": 0.9899, "step": 1191 }, { "epoch": 0.48465135190079284, "grad_norm": 0.29652246832847595, "learning_rate": 0.00015171992672501528, "loss": 0.961, "step": 1192 }, { "epoch": 0.4850579386054076, "grad_norm": 0.7517414689064026, "learning_rate": 0.00015167921840016283, "loss": 1.0964, "step": 1193 }, { "epoch": 0.4854645253100224, "grad_norm": 0.14506421983242035, "learning_rate": 0.0001516385100753104, "loss": 1.1155, "step": 1194 }, { "epoch": 0.4858711120146371, "grad_norm": 0.11916639655828476, "learning_rate": 0.00015159780175045797, "loss": 0.9494, "step": 1195 }, { "epoch": 0.48627769871925186, "grad_norm": 0.10341714322566986, "learning_rate": 0.00015155709342560552, "loss": 0.9381, "step": 1196 }, { "epoch": 0.48668428542386666, "grad_norm": 0.10921141505241394, "learning_rate": 0.00015151638510075313, "loss": 1.0357, "step": 1197 }, { "epoch": 0.4870908721284814, "grad_norm": 0.12874668836593628, "learning_rate": 0.00015147567677590068, "loss": 0.9918, "step": 1198 }, { "epoch": 0.48749745883309614, "grad_norm": 0.10311154276132584, "learning_rate": 0.00015143496845104827, "loss": 1.0575, "step": 1199 }, { "epoch": 0.48790404553771094, "grad_norm": 0.09126869589090347, "learning_rate": 0.00015139426012619582, "loss": 0.9125, "step": 1200 }, { "epoch": 0.4883106322423257, "grad_norm": 0.11038295179605484, "learning_rate": 0.00015135355180134337, "loss": 1.0761, "step": 1201 }, { "epoch": 0.4887172189469404, "grad_norm": 0.10550364851951599, "learning_rate": 0.00015131284347649095, "loss": 1.0513, "step": 1202 }, { "epoch": 0.4891238056515552, "grad_norm": 0.08666063100099564, "learning_rate": 0.0001512721351516385, "loss": 0.8815, "step": 1203 }, { "epoch": 0.48953039235616996, "grad_norm": 0.09860862046480179, "learning_rate": 0.0001512314268267861, "loss": 1.0451, "step": 1204 }, { "epoch": 0.4899369790607847, "grad_norm": 0.10188648104667664, "learning_rate": 0.00015119071850193364, "loss": 0.9911, "step": 1205 }, { "epoch": 0.49034356576539945, "grad_norm": 0.09538048505783081, "learning_rate": 0.00015115001017708122, "loss": 0.8973, "step": 1206 }, { "epoch": 0.49075015247001424, "grad_norm": 0.10558182001113892, "learning_rate": 0.00015110930185222878, "loss": 1.0272, "step": 1207 }, { "epoch": 0.491156739174629, "grad_norm": 0.10072223097085953, "learning_rate": 0.00015106859352737633, "loss": 1.0966, "step": 1208 }, { "epoch": 0.49156332587924373, "grad_norm": 0.10667192190885544, "learning_rate": 0.00015102788520252394, "loss": 1.0805, "step": 1209 }, { "epoch": 0.4919699125838585, "grad_norm": 0.10285364836454391, "learning_rate": 0.0001509871768776715, "loss": 1.0553, "step": 1210 }, { "epoch": 0.49237649928847327, "grad_norm": 0.09896936267614365, "learning_rate": 0.00015094646855281908, "loss": 1.032, "step": 1211 }, { "epoch": 0.492783085993088, "grad_norm": 0.08868112415075302, "learning_rate": 0.00015090576022796663, "loss": 1.013, "step": 1212 }, { "epoch": 0.4931896726977028, "grad_norm": 0.10103127360343933, "learning_rate": 0.00015086505190311418, "loss": 1.0589, "step": 1213 }, { "epoch": 0.49359625940231755, "grad_norm": 0.11582531780004501, "learning_rate": 0.00015082434357826177, "loss": 1.0731, "step": 1214 }, { "epoch": 0.4940028461069323, "grad_norm": 0.0953935906291008, "learning_rate": 0.00015078363525340932, "loss": 0.9751, "step": 1215 }, { "epoch": 0.4944094328115471, "grad_norm": 0.10135170817375183, "learning_rate": 0.0001507429269285569, "loss": 1.0676, "step": 1216 }, { "epoch": 0.49481601951616183, "grad_norm": 0.10529596358537674, "learning_rate": 0.00015070221860370446, "loss": 1.0274, "step": 1217 }, { "epoch": 0.4952226062207766, "grad_norm": 0.11172258853912354, "learning_rate": 0.00015066151027885204, "loss": 1.1241, "step": 1218 }, { "epoch": 0.4956291929253913, "grad_norm": 0.10328125208616257, "learning_rate": 0.0001506208019539996, "loss": 1.1032, "step": 1219 }, { "epoch": 0.4960357796300061, "grad_norm": 0.09035445749759674, "learning_rate": 0.00015058009362914717, "loss": 1.0394, "step": 1220 }, { "epoch": 0.49644236633462085, "grad_norm": 0.0988045334815979, "learning_rate": 0.00015053938530429475, "loss": 1.0092, "step": 1221 }, { "epoch": 0.4968489530392356, "grad_norm": 0.12335261702537537, "learning_rate": 0.0001504986769794423, "loss": 1.0994, "step": 1222 }, { "epoch": 0.4972555397438504, "grad_norm": 0.09677151590585709, "learning_rate": 0.0001504579686545899, "loss": 0.9352, "step": 1223 }, { "epoch": 0.49766212644846514, "grad_norm": 0.0954160988330841, "learning_rate": 0.00015041726032973744, "loss": 1.0526, "step": 1224 }, { "epoch": 0.4980687131530799, "grad_norm": 0.09783489257097244, "learning_rate": 0.000150376552004885, "loss": 0.9689, "step": 1225 }, { "epoch": 0.4984752998576947, "grad_norm": 0.09221793711185455, "learning_rate": 0.00015033584368003258, "loss": 0.9458, "step": 1226 }, { "epoch": 0.4988818865623094, "grad_norm": 0.09968589246273041, "learning_rate": 0.00015029513535518013, "loss": 0.9938, "step": 1227 }, { "epoch": 0.49928847326692416, "grad_norm": 0.10488888621330261, "learning_rate": 0.0001502544270303277, "loss": 0.9525, "step": 1228 }, { "epoch": 0.49969505997153896, "grad_norm": 0.08479832857847214, "learning_rate": 0.00015021371870547527, "loss": 0.976, "step": 1229 }, { "epoch": 0.5001016466761536, "grad_norm": 0.0930403620004654, "learning_rate": 0.00015017301038062285, "loss": 1.0259, "step": 1230 }, { "epoch": 0.5005082333807684, "grad_norm": 0.09309448301792145, "learning_rate": 0.0001501323020557704, "loss": 0.9997, "step": 1231 }, { "epoch": 0.5009148200853832, "grad_norm": 0.09209504723548889, "learning_rate": 0.00015009159373091798, "loss": 0.9365, "step": 1232 }, { "epoch": 0.5013214067899979, "grad_norm": 0.09045909345149994, "learning_rate": 0.00015005088540606556, "loss": 0.9572, "step": 1233 }, { "epoch": 0.5017279934946127, "grad_norm": 0.0892348513007164, "learning_rate": 0.00015001017708121312, "loss": 0.9593, "step": 1234 }, { "epoch": 0.5021345801992275, "grad_norm": 0.08853106945753098, "learning_rate": 0.0001499694687563607, "loss": 0.9518, "step": 1235 }, { "epoch": 0.5025411669038422, "grad_norm": 0.0941222533583641, "learning_rate": 0.00014992876043150825, "loss": 0.9474, "step": 1236 }, { "epoch": 0.502947753608457, "grad_norm": 0.09374161809682846, "learning_rate": 0.0001498880521066558, "loss": 1.0018, "step": 1237 }, { "epoch": 0.5033543403130718, "grad_norm": 0.08115139603614807, "learning_rate": 0.0001498473437818034, "loss": 0.8797, "step": 1238 }, { "epoch": 0.5037609270176865, "grad_norm": 0.09270316362380981, "learning_rate": 0.00014980663545695094, "loss": 1.0203, "step": 1239 }, { "epoch": 0.5041675137223013, "grad_norm": 0.08950728923082352, "learning_rate": 0.00014976592713209852, "loss": 0.9909, "step": 1240 }, { "epoch": 0.5045741004269161, "grad_norm": 0.09764236211776733, "learning_rate": 0.00014972521880724608, "loss": 0.9851, "step": 1241 }, { "epoch": 0.5049806871315308, "grad_norm": 0.09275151789188385, "learning_rate": 0.00014968451048239366, "loss": 0.9452, "step": 1242 }, { "epoch": 0.5053872738361456, "grad_norm": 0.09436964988708496, "learning_rate": 0.00014964380215754124, "loss": 1.0731, "step": 1243 }, { "epoch": 0.5057938605407604, "grad_norm": 0.09008494764566422, "learning_rate": 0.0001496030938326888, "loss": 1.0238, "step": 1244 }, { "epoch": 0.506200447245375, "grad_norm": 0.08599425107240677, "learning_rate": 0.00014956238550783638, "loss": 0.9148, "step": 1245 }, { "epoch": 0.5066070339499898, "grad_norm": 0.09270120412111282, "learning_rate": 0.00014952167718298393, "loss": 0.9348, "step": 1246 }, { "epoch": 0.5070136206546046, "grad_norm": 0.09423110634088516, "learning_rate": 0.0001494809688581315, "loss": 1.0746, "step": 1247 }, { "epoch": 0.5074202073592193, "grad_norm": 0.08819740265607834, "learning_rate": 0.00014944026053327907, "loss": 1.0913, "step": 1248 }, { "epoch": 0.5078267940638341, "grad_norm": 0.08502914011478424, "learning_rate": 0.00014939955220842662, "loss": 1.0142, "step": 1249 }, { "epoch": 0.5082333807684488, "grad_norm": 0.09372544288635254, "learning_rate": 0.0001493588438835742, "loss": 0.9297, "step": 1250 }, { "epoch": 0.5086399674730636, "grad_norm": 0.09857220202684402, "learning_rate": 0.00014931813555872175, "loss": 1.0665, "step": 1251 }, { "epoch": 0.5090465541776784, "grad_norm": 0.09227776527404785, "learning_rate": 0.00014927742723386934, "loss": 0.9791, "step": 1252 }, { "epoch": 0.5094531408822931, "grad_norm": 0.09301433712244034, "learning_rate": 0.0001492367189090169, "loss": 0.8855, "step": 1253 }, { "epoch": 0.5098597275869079, "grad_norm": 0.09796632081270218, "learning_rate": 0.00014919601058416447, "loss": 1.0645, "step": 1254 }, { "epoch": 0.5102663142915227, "grad_norm": 0.09791705757379532, "learning_rate": 0.00014915530225931205, "loss": 0.9949, "step": 1255 }, { "epoch": 0.5106729009961374, "grad_norm": 0.09171664714813232, "learning_rate": 0.0001491145939344596, "loss": 0.8958, "step": 1256 }, { "epoch": 0.5110794877007522, "grad_norm": 0.10115580260753632, "learning_rate": 0.0001490738856096072, "loss": 1.0141, "step": 1257 }, { "epoch": 0.511486074405367, "grad_norm": 0.08854761719703674, "learning_rate": 0.00014903317728475474, "loss": 0.9733, "step": 1258 }, { "epoch": 0.5118926611099817, "grad_norm": 0.0944913849234581, "learning_rate": 0.0001489924689599023, "loss": 1.0187, "step": 1259 }, { "epoch": 0.5122992478145965, "grad_norm": 0.08820286393165588, "learning_rate": 0.00014895176063504988, "loss": 0.984, "step": 1260 }, { "epoch": 0.5127058345192113, "grad_norm": 0.0941242128610611, "learning_rate": 0.00014891105231019743, "loss": 1.0333, "step": 1261 }, { "epoch": 0.5131124212238259, "grad_norm": 0.09355438500642776, "learning_rate": 0.000148870343985345, "loss": 1.1186, "step": 1262 }, { "epoch": 0.5135190079284407, "grad_norm": 0.09487958997488022, "learning_rate": 0.00014882963566049257, "loss": 1.1297, "step": 1263 }, { "epoch": 0.5139255946330555, "grad_norm": 0.08488618582487106, "learning_rate": 0.00014878892733564015, "loss": 0.9725, "step": 1264 }, { "epoch": 0.5143321813376702, "grad_norm": 0.09238637238740921, "learning_rate": 0.0001487482190107877, "loss": 0.9798, "step": 1265 }, { "epoch": 0.514738768042285, "grad_norm": 0.09334023296833038, "learning_rate": 0.00014870751068593528, "loss": 1.0818, "step": 1266 }, { "epoch": 0.5151453547468998, "grad_norm": 0.09130462259054184, "learning_rate": 0.00014866680236108286, "loss": 0.9885, "step": 1267 }, { "epoch": 0.5155519414515145, "grad_norm": 0.08275487273931503, "learning_rate": 0.00014862609403623042, "loss": 0.8525, "step": 1268 }, { "epoch": 0.5159585281561293, "grad_norm": 0.09485149383544922, "learning_rate": 0.000148585385711378, "loss": 1.0424, "step": 1269 }, { "epoch": 0.5163651148607441, "grad_norm": 0.08834747970104218, "learning_rate": 0.00014854467738652555, "loss": 0.9235, "step": 1270 }, { "epoch": 0.5167717015653588, "grad_norm": 0.09200993925333023, "learning_rate": 0.0001485039690616731, "loss": 1.0669, "step": 1271 }, { "epoch": 0.5171782882699736, "grad_norm": 0.08159536123275757, "learning_rate": 0.0001484632607368207, "loss": 0.9067, "step": 1272 }, { "epoch": 0.5175848749745884, "grad_norm": 0.08643992245197296, "learning_rate": 0.00014842255241196824, "loss": 0.9632, "step": 1273 }, { "epoch": 0.5179914616792031, "grad_norm": 0.09672199934720993, "learning_rate": 0.00014838184408711582, "loss": 1.0263, "step": 1274 }, { "epoch": 0.5183980483838179, "grad_norm": 0.09713756293058395, "learning_rate": 0.00014834113576226338, "loss": 0.9166, "step": 1275 }, { "epoch": 0.5188046350884326, "grad_norm": 0.08467654883861542, "learning_rate": 0.00014830042743741096, "loss": 0.9201, "step": 1276 }, { "epoch": 0.5192112217930474, "grad_norm": 0.08024970442056656, "learning_rate": 0.0001482597191125585, "loss": 0.8556, "step": 1277 }, { "epoch": 0.5196178084976621, "grad_norm": 0.09249437600374222, "learning_rate": 0.0001482190107877061, "loss": 1.0381, "step": 1278 }, { "epoch": 0.5200243952022768, "grad_norm": 0.08076690137386322, "learning_rate": 0.00014817830246285368, "loss": 0.9216, "step": 1279 }, { "epoch": 0.5204309819068916, "grad_norm": 0.09259018301963806, "learning_rate": 0.00014813759413800123, "loss": 1.0547, "step": 1280 }, { "epoch": 0.5208375686115064, "grad_norm": 0.08734786510467529, "learning_rate": 0.0001480968858131488, "loss": 0.8811, "step": 1281 }, { "epoch": 0.5212441553161211, "grad_norm": 0.094956174492836, "learning_rate": 0.00014805617748829637, "loss": 0.9665, "step": 1282 }, { "epoch": 0.5216507420207359, "grad_norm": 0.08848060667514801, "learning_rate": 0.00014801546916344392, "loss": 0.9945, "step": 1283 }, { "epoch": 0.5220573287253507, "grad_norm": 0.0921303778886795, "learning_rate": 0.0001479747608385915, "loss": 1.0927, "step": 1284 }, { "epoch": 0.5224639154299654, "grad_norm": 0.08918172121047974, "learning_rate": 0.00014793405251373905, "loss": 0.9598, "step": 1285 }, { "epoch": 0.5228705021345802, "grad_norm": 0.10177495330572128, "learning_rate": 0.00014789334418888664, "loss": 1.1625, "step": 1286 }, { "epoch": 0.523277088839195, "grad_norm": 0.0942060649394989, "learning_rate": 0.0001478526358640342, "loss": 1.0612, "step": 1287 }, { "epoch": 0.5236836755438097, "grad_norm": 0.09780838340520859, "learning_rate": 0.00014781192753918177, "loss": 1.1024, "step": 1288 }, { "epoch": 0.5240902622484245, "grad_norm": 0.08893782645463943, "learning_rate": 0.00014777121921432935, "loss": 1.043, "step": 1289 }, { "epoch": 0.5244968489530393, "grad_norm": 0.0918479710817337, "learning_rate": 0.0001477305108894769, "loss": 0.9824, "step": 1290 }, { "epoch": 0.524903435657654, "grad_norm": 0.09912838041782379, "learning_rate": 0.0001476898025646245, "loss": 1.0346, "step": 1291 }, { "epoch": 0.5253100223622688, "grad_norm": 0.10609038919210434, "learning_rate": 0.00014764909423977204, "loss": 1.0133, "step": 1292 }, { "epoch": 0.5257166090668836, "grad_norm": 0.09957921504974365, "learning_rate": 0.00014760838591491962, "loss": 0.9842, "step": 1293 }, { "epoch": 0.5261231957714982, "grad_norm": 0.09777513146400452, "learning_rate": 0.00014756767759006718, "loss": 1.0092, "step": 1294 }, { "epoch": 0.526529782476113, "grad_norm": 0.08816764503717422, "learning_rate": 0.00014752696926521473, "loss": 0.9064, "step": 1295 }, { "epoch": 0.5269363691807278, "grad_norm": 0.09163589775562286, "learning_rate": 0.0001474862609403623, "loss": 0.9682, "step": 1296 }, { "epoch": 0.5273429558853425, "grad_norm": 21.36524772644043, "learning_rate": 0.00014744555261550987, "loss": 1.0146, "step": 1297 }, { "epoch": 0.5277495425899573, "grad_norm": 0.09484653919935226, "learning_rate": 0.00014740484429065745, "loss": 0.979, "step": 1298 }, { "epoch": 0.5281561292945721, "grad_norm": 0.09288137406110764, "learning_rate": 0.000147364135965805, "loss": 0.9692, "step": 1299 }, { "epoch": 0.5285627159991868, "grad_norm": 0.09847582131624222, "learning_rate": 0.00014732342764095258, "loss": 1.0519, "step": 1300 }, { "epoch": 0.5289693027038016, "grad_norm": 0.09856998920440674, "learning_rate": 0.00014728271931610016, "loss": 0.9929, "step": 1301 }, { "epoch": 0.5293758894084164, "grad_norm": 0.0969497561454773, "learning_rate": 0.00014724201099124772, "loss": 0.9412, "step": 1302 }, { "epoch": 0.5297824761130311, "grad_norm": 0.09796781092882156, "learning_rate": 0.0001472013026663953, "loss": 0.9538, "step": 1303 }, { "epoch": 0.5301890628176459, "grad_norm": 0.09267283231019974, "learning_rate": 0.00014716059434154285, "loss": 0.9616, "step": 1304 }, { "epoch": 0.5305956495222606, "grad_norm": 0.10447274148464203, "learning_rate": 0.00014711988601669043, "loss": 0.9485, "step": 1305 }, { "epoch": 0.5310022362268754, "grad_norm": 0.10163460671901703, "learning_rate": 0.000147079177691838, "loss": 0.9419, "step": 1306 }, { "epoch": 0.5314088229314902, "grad_norm": 0.09405020624399185, "learning_rate": 0.00014703846936698554, "loss": 0.9806, "step": 1307 }, { "epoch": 0.5318154096361049, "grad_norm": 0.09395210444927216, "learning_rate": 0.00014699776104213312, "loss": 1.0278, "step": 1308 }, { "epoch": 0.5322219963407196, "grad_norm": 0.09595540910959244, "learning_rate": 0.00014695705271728068, "loss": 1.0625, "step": 1309 }, { "epoch": 0.5326285830453344, "grad_norm": 0.0832480788230896, "learning_rate": 0.00014691634439242826, "loss": 0.9134, "step": 1310 }, { "epoch": 0.5330351697499491, "grad_norm": 0.10631989687681198, "learning_rate": 0.0001468756360675758, "loss": 0.9753, "step": 1311 }, { "epoch": 0.5334417564545639, "grad_norm": 0.0866394117474556, "learning_rate": 0.0001468349277427234, "loss": 0.9492, "step": 1312 }, { "epoch": 0.5338483431591787, "grad_norm": 0.10123784095048904, "learning_rate": 0.00014679421941787098, "loss": 0.9819, "step": 1313 }, { "epoch": 0.5342549298637934, "grad_norm": 0.08982353657484055, "learning_rate": 0.00014675351109301853, "loss": 0.9026, "step": 1314 }, { "epoch": 0.5346615165684082, "grad_norm": 0.08998806774616241, "learning_rate": 0.0001467128027681661, "loss": 0.9467, "step": 1315 }, { "epoch": 0.535068103273023, "grad_norm": 0.09901012480258942, "learning_rate": 0.00014667209444331366, "loss": 0.9655, "step": 1316 }, { "epoch": 0.5354746899776377, "grad_norm": 0.10991565883159637, "learning_rate": 0.00014663138611846125, "loss": 1.0236, "step": 1317 }, { "epoch": 0.5358812766822525, "grad_norm": 0.10133833438158035, "learning_rate": 0.0001465906777936088, "loss": 1.0453, "step": 1318 }, { "epoch": 0.5362878633868673, "grad_norm": 0.10197743028402328, "learning_rate": 0.00014654996946875635, "loss": 0.9008, "step": 1319 }, { "epoch": 0.536694450091482, "grad_norm": 0.09654685854911804, "learning_rate": 0.00014650926114390394, "loss": 1.0586, "step": 1320 }, { "epoch": 0.5371010367960968, "grad_norm": 0.10006607323884964, "learning_rate": 0.0001464685528190515, "loss": 0.9627, "step": 1321 }, { "epoch": 0.5375076235007116, "grad_norm": 0.09992939233779907, "learning_rate": 0.00014642784449419907, "loss": 0.9841, "step": 1322 }, { "epoch": 0.5379142102053263, "grad_norm": 0.098929263651371, "learning_rate": 0.00014638713616934662, "loss": 0.9764, "step": 1323 }, { "epoch": 0.5383207969099411, "grad_norm": 0.09640022367238998, "learning_rate": 0.0001463464278444942, "loss": 0.9922, "step": 1324 }, { "epoch": 0.5387273836145559, "grad_norm": 0.09175208956003189, "learning_rate": 0.0001463057195196418, "loss": 1.0659, "step": 1325 }, { "epoch": 0.5391339703191705, "grad_norm": 0.09107311069965363, "learning_rate": 0.00014626501119478934, "loss": 0.9898, "step": 1326 }, { "epoch": 0.5395405570237853, "grad_norm": 0.10652513056993484, "learning_rate": 0.00014622430286993692, "loss": 1.1346, "step": 1327 }, { "epoch": 0.5399471437284001, "grad_norm": 0.09096572548151016, "learning_rate": 0.00014618359454508448, "loss": 0.9296, "step": 1328 }, { "epoch": 0.5403537304330148, "grad_norm": 0.0995742529630661, "learning_rate": 0.00014614288622023206, "loss": 1.0034, "step": 1329 }, { "epoch": 0.5407603171376296, "grad_norm": 0.08811762928962708, "learning_rate": 0.0001461021778953796, "loss": 0.9928, "step": 1330 }, { "epoch": 0.5411669038422443, "grad_norm": 0.09473133832216263, "learning_rate": 0.00014606146957052717, "loss": 1.002, "step": 1331 }, { "epoch": 0.5415734905468591, "grad_norm": 0.08898860216140747, "learning_rate": 0.00014602076124567475, "loss": 0.9358, "step": 1332 }, { "epoch": 0.5419800772514739, "grad_norm": 0.093483105301857, "learning_rate": 0.0001459800529208223, "loss": 0.9319, "step": 1333 }, { "epoch": 0.5423866639560886, "grad_norm": 0.09663254022598267, "learning_rate": 0.00014593934459596988, "loss": 1.0041, "step": 1334 }, { "epoch": 0.5427932506607034, "grad_norm": 0.08969207853078842, "learning_rate": 0.00014589863627111746, "loss": 0.996, "step": 1335 }, { "epoch": 0.5431998373653182, "grad_norm": 0.08921096473932266, "learning_rate": 0.00014585792794626502, "loss": 0.9263, "step": 1336 }, { "epoch": 0.5436064240699329, "grad_norm": 0.08625603467226028, "learning_rate": 0.0001458172196214126, "loss": 0.9372, "step": 1337 }, { "epoch": 0.5440130107745477, "grad_norm": 0.09406933933496475, "learning_rate": 0.00014577651129656015, "loss": 1.0037, "step": 1338 }, { "epoch": 0.5444195974791625, "grad_norm": 0.08918149769306183, "learning_rate": 0.00014573580297170773, "loss": 0.9418, "step": 1339 }, { "epoch": 0.5448261841837772, "grad_norm": 0.09736087918281555, "learning_rate": 0.0001456950946468553, "loss": 0.965, "step": 1340 }, { "epoch": 0.545232770888392, "grad_norm": 0.09973054379224777, "learning_rate": 0.00014565438632200287, "loss": 0.8948, "step": 1341 }, { "epoch": 0.5456393575930067, "grad_norm": 0.08326181769371033, "learning_rate": 0.00014561367799715042, "loss": 0.9051, "step": 1342 }, { "epoch": 0.5460459442976214, "grad_norm": 0.0919221043586731, "learning_rate": 0.00014557296967229798, "loss": 0.95, "step": 1343 }, { "epoch": 0.5464525310022362, "grad_norm": 0.08741891384124756, "learning_rate": 0.00014553226134744556, "loss": 0.9682, "step": 1344 }, { "epoch": 0.546859117706851, "grad_norm": 0.09859665483236313, "learning_rate": 0.0001454915530225931, "loss": 1.0564, "step": 1345 }, { "epoch": 0.5472657044114657, "grad_norm": 0.09352114796638489, "learning_rate": 0.0001454508446977407, "loss": 0.982, "step": 1346 }, { "epoch": 0.5476722911160805, "grad_norm": 0.09592889994382858, "learning_rate": 0.00014541013637288827, "loss": 0.9874, "step": 1347 }, { "epoch": 0.5480788778206953, "grad_norm": 0.08276782929897308, "learning_rate": 0.00014536942804803583, "loss": 1.0243, "step": 1348 }, { "epoch": 0.54848546452531, "grad_norm": 0.09625902026891708, "learning_rate": 0.0001453287197231834, "loss": 1.0977, "step": 1349 }, { "epoch": 0.5488920512299248, "grad_norm": 0.08539925515651703, "learning_rate": 0.00014528801139833096, "loss": 0.9816, "step": 1350 }, { "epoch": 0.5492986379345396, "grad_norm": 0.08654636144638062, "learning_rate": 0.00014524730307347855, "loss": 1.02, "step": 1351 }, { "epoch": 0.5497052246391543, "grad_norm": 0.09811274707317352, "learning_rate": 0.0001452065947486261, "loss": 1.1509, "step": 1352 }, { "epoch": 0.5501118113437691, "grad_norm": 0.09280407428741455, "learning_rate": 0.00014516588642377365, "loss": 1.0163, "step": 1353 }, { "epoch": 0.5505183980483839, "grad_norm": 0.08086491376161575, "learning_rate": 0.00014512517809892123, "loss": 0.853, "step": 1354 }, { "epoch": 0.5509249847529986, "grad_norm": 0.0827447846531868, "learning_rate": 0.0001450844697740688, "loss": 0.9749, "step": 1355 }, { "epoch": 0.5513315714576134, "grad_norm": 0.09065467119216919, "learning_rate": 0.00014504376144921637, "loss": 1.0186, "step": 1356 }, { "epoch": 0.551738158162228, "grad_norm": 0.08642933517694473, "learning_rate": 0.00014500305312436392, "loss": 0.9175, "step": 1357 }, { "epoch": 0.5521447448668428, "grad_norm": 0.08930498361587524, "learning_rate": 0.0001449623447995115, "loss": 1.0027, "step": 1358 }, { "epoch": 0.5525513315714576, "grad_norm": 0.09525667130947113, "learning_rate": 0.0001449216364746591, "loss": 1.1328, "step": 1359 }, { "epoch": 0.5529579182760723, "grad_norm": 0.08723597228527069, "learning_rate": 0.00014488092814980664, "loss": 0.9025, "step": 1360 }, { "epoch": 0.5533645049806871, "grad_norm": 0.08364204317331314, "learning_rate": 0.00014484021982495422, "loss": 0.939, "step": 1361 }, { "epoch": 0.5537710916853019, "grad_norm": 0.08982790261507034, "learning_rate": 0.00014479951150010178, "loss": 0.8604, "step": 1362 }, { "epoch": 0.5541776783899166, "grad_norm": 0.08386033028364182, "learning_rate": 0.00014475880317524936, "loss": 0.957, "step": 1363 }, { "epoch": 0.5545842650945314, "grad_norm": 0.0920158326625824, "learning_rate": 0.0001447180948503969, "loss": 0.9388, "step": 1364 }, { "epoch": 0.5549908517991462, "grad_norm": 0.08764606714248657, "learning_rate": 0.00014467738652554447, "loss": 0.9721, "step": 1365 }, { "epoch": 0.5553974385037609, "grad_norm": 0.09296350926160812, "learning_rate": 0.00014463667820069205, "loss": 1.0195, "step": 1366 }, { "epoch": 0.5558040252083757, "grad_norm": 0.08107852935791016, "learning_rate": 0.0001445959698758396, "loss": 0.9001, "step": 1367 }, { "epoch": 0.5562106119129905, "grad_norm": 0.08827921748161316, "learning_rate": 0.00014455526155098718, "loss": 1.0009, "step": 1368 }, { "epoch": 0.5566171986176052, "grad_norm": 0.08549787849187851, "learning_rate": 0.00014451455322613474, "loss": 0.8675, "step": 1369 }, { "epoch": 0.55702378532222, "grad_norm": 0.10005125403404236, "learning_rate": 0.00014447384490128232, "loss": 1.1293, "step": 1370 }, { "epoch": 0.5574303720268348, "grad_norm": 0.09509359300136566, "learning_rate": 0.0001444331365764299, "loss": 1.0033, "step": 1371 }, { "epoch": 0.5578369587314495, "grad_norm": 0.09246810525655746, "learning_rate": 0.00014439242825157745, "loss": 0.9563, "step": 1372 }, { "epoch": 0.5582435454360642, "grad_norm": 0.09919826686382294, "learning_rate": 0.00014435171992672503, "loss": 1.0925, "step": 1373 }, { "epoch": 0.558650132140679, "grad_norm": 0.09652990102767944, "learning_rate": 0.0001443110116018726, "loss": 1.0716, "step": 1374 }, { "epoch": 0.5590567188452937, "grad_norm": 0.08819134533405304, "learning_rate": 0.00014427030327702017, "loss": 0.9586, "step": 1375 }, { "epoch": 0.5594633055499085, "grad_norm": 0.09266290068626404, "learning_rate": 0.00014422959495216772, "loss": 1.0399, "step": 1376 }, { "epoch": 0.5598698922545233, "grad_norm": 0.08892200142145157, "learning_rate": 0.00014418888662731528, "loss": 0.9844, "step": 1377 }, { "epoch": 0.560276478959138, "grad_norm": 0.09452232718467712, "learning_rate": 0.00014414817830246286, "loss": 1.0875, "step": 1378 }, { "epoch": 0.5606830656637528, "grad_norm": 0.08958882093429565, "learning_rate": 0.0001441074699776104, "loss": 1.0234, "step": 1379 }, { "epoch": 0.5610896523683676, "grad_norm": 0.09218178689479828, "learning_rate": 0.000144066761652758, "loss": 1.0871, "step": 1380 }, { "epoch": 0.5614962390729823, "grad_norm": 0.08819695562124252, "learning_rate": 0.00014402605332790557, "loss": 0.9046, "step": 1381 }, { "epoch": 0.5619028257775971, "grad_norm": 0.09621118754148483, "learning_rate": 0.00014398534500305313, "loss": 0.9789, "step": 1382 }, { "epoch": 0.5623094124822118, "grad_norm": 0.08230914175510406, "learning_rate": 0.0001439446366782007, "loss": 0.817, "step": 1383 }, { "epoch": 0.5627159991868266, "grad_norm": 0.08805210143327713, "learning_rate": 0.00014390392835334826, "loss": 0.9488, "step": 1384 }, { "epoch": 0.5631225858914414, "grad_norm": 0.09026028960943222, "learning_rate": 0.00014386322002849584, "loss": 0.9837, "step": 1385 }, { "epoch": 0.5635291725960561, "grad_norm": 0.09834691882133484, "learning_rate": 0.0001438225117036434, "loss": 0.999, "step": 1386 }, { "epoch": 0.5639357593006709, "grad_norm": 0.09209754317998886, "learning_rate": 0.00014378180337879098, "loss": 0.9923, "step": 1387 }, { "epoch": 0.5643423460052857, "grad_norm": 0.08959315717220306, "learning_rate": 0.00014374109505393853, "loss": 0.9282, "step": 1388 }, { "epoch": 0.5647489327099003, "grad_norm": 0.08573776483535767, "learning_rate": 0.0001437003867290861, "loss": 0.9504, "step": 1389 }, { "epoch": 0.5651555194145151, "grad_norm": 0.08887659013271332, "learning_rate": 0.00014365967840423367, "loss": 0.9195, "step": 1390 }, { "epoch": 0.5655621061191299, "grad_norm": 0.08740208297967911, "learning_rate": 0.00014361897007938122, "loss": 0.9537, "step": 1391 }, { "epoch": 0.5659686928237446, "grad_norm": 0.08976002782583237, "learning_rate": 0.0001435782617545288, "loss": 0.9126, "step": 1392 }, { "epoch": 0.5663752795283594, "grad_norm": 0.09727158397436142, "learning_rate": 0.00014353755342967639, "loss": 1.0088, "step": 1393 }, { "epoch": 0.5667818662329742, "grad_norm": 0.09165914356708527, "learning_rate": 0.00014349684510482394, "loss": 1.0443, "step": 1394 }, { "epoch": 0.5671884529375889, "grad_norm": 0.08791441470384598, "learning_rate": 0.00014345613677997152, "loss": 0.9708, "step": 1395 }, { "epoch": 0.5675950396422037, "grad_norm": 0.08658348023891449, "learning_rate": 0.00014341542845511908, "loss": 0.9347, "step": 1396 }, { "epoch": 0.5680016263468185, "grad_norm": 0.08867420256137848, "learning_rate": 0.00014337472013026666, "loss": 1.0331, "step": 1397 }, { "epoch": 0.5684082130514332, "grad_norm": 0.09206686913967133, "learning_rate": 0.0001433340118054142, "loss": 1.0469, "step": 1398 }, { "epoch": 0.568814799756048, "grad_norm": 0.09050408750772476, "learning_rate": 0.0001432933034805618, "loss": 0.9426, "step": 1399 }, { "epoch": 0.5692213864606628, "grad_norm": 0.08967922627925873, "learning_rate": 0.00014325259515570935, "loss": 0.9217, "step": 1400 }, { "epoch": 0.5696279731652775, "grad_norm": 0.08758019655942917, "learning_rate": 0.0001432118868308569, "loss": 0.9559, "step": 1401 }, { "epoch": 0.5700345598698923, "grad_norm": 0.09254743903875351, "learning_rate": 0.00014317117850600448, "loss": 0.9779, "step": 1402 }, { "epoch": 0.5704411465745071, "grad_norm": 0.09395452588796616, "learning_rate": 0.00014313047018115204, "loss": 1.0009, "step": 1403 }, { "epoch": 0.5708477332791217, "grad_norm": 0.09259745478630066, "learning_rate": 0.00014308976185629964, "loss": 1.0154, "step": 1404 }, { "epoch": 0.5712543199837365, "grad_norm": 0.09286468476057053, "learning_rate": 0.0001430490535314472, "loss": 1.0889, "step": 1405 }, { "epoch": 0.5716609066883513, "grad_norm": 0.08744499087333679, "learning_rate": 0.00014300834520659475, "loss": 0.9786, "step": 1406 }, { "epoch": 0.572067493392966, "grad_norm": 0.09346942603588104, "learning_rate": 0.00014296763688174233, "loss": 0.9789, "step": 1407 }, { "epoch": 0.5724740800975808, "grad_norm": 0.09010860323905945, "learning_rate": 0.0001429269285568899, "loss": 1.018, "step": 1408 }, { "epoch": 0.5728806668021955, "grad_norm": 0.0881861224770546, "learning_rate": 0.00014288622023203747, "loss": 1.0898, "step": 1409 }, { "epoch": 0.5732872535068103, "grad_norm": 0.08293981850147247, "learning_rate": 0.00014284551190718502, "loss": 0.9129, "step": 1410 }, { "epoch": 0.5736938402114251, "grad_norm": 0.09111000597476959, "learning_rate": 0.0001428048035823326, "loss": 0.9556, "step": 1411 }, { "epoch": 0.5741004269160398, "grad_norm": 0.09435521066188812, "learning_rate": 0.00014276409525748016, "loss": 1.1178, "step": 1412 }, { "epoch": 0.5745070136206546, "grad_norm": 0.08865281194448471, "learning_rate": 0.0001427233869326277, "loss": 0.9682, "step": 1413 }, { "epoch": 0.5749136003252694, "grad_norm": 0.08608002215623856, "learning_rate": 0.0001426826786077753, "loss": 0.9144, "step": 1414 }, { "epoch": 0.5753201870298841, "grad_norm": 0.08543986827135086, "learning_rate": 0.00014264197028292285, "loss": 0.9314, "step": 1415 }, { "epoch": 0.5757267737344989, "grad_norm": 0.09068971127271652, "learning_rate": 0.00014260126195807046, "loss": 0.9835, "step": 1416 }, { "epoch": 0.5761333604391137, "grad_norm": 0.08598853647708893, "learning_rate": 0.000142560553633218, "loss": 0.9396, "step": 1417 }, { "epoch": 0.5765399471437284, "grad_norm": 0.08450654149055481, "learning_rate": 0.00014251984530836556, "loss": 0.9472, "step": 1418 }, { "epoch": 0.5769465338483432, "grad_norm": 0.09064414352178574, "learning_rate": 0.00014247913698351314, "loss": 1.0167, "step": 1419 }, { "epoch": 0.577353120552958, "grad_norm": 0.08948381245136261, "learning_rate": 0.0001424384286586607, "loss": 0.9459, "step": 1420 }, { "epoch": 0.5777597072575726, "grad_norm": 0.0811019316315651, "learning_rate": 0.00014239772033380828, "loss": 0.8846, "step": 1421 }, { "epoch": 0.5781662939621874, "grad_norm": 0.09058842808008194, "learning_rate": 0.00014235701200895583, "loss": 0.9999, "step": 1422 }, { "epoch": 0.5785728806668022, "grad_norm": 0.09327298402786255, "learning_rate": 0.00014231630368410342, "loss": 1.0694, "step": 1423 }, { "epoch": 0.5789794673714169, "grad_norm": 0.08615417778491974, "learning_rate": 0.00014227559535925097, "loss": 0.9884, "step": 1424 }, { "epoch": 0.5793860540760317, "grad_norm": 0.09632913023233414, "learning_rate": 0.00014223488703439852, "loss": 1.0215, "step": 1425 }, { "epoch": 0.5797926407806465, "grad_norm": 0.0939357578754425, "learning_rate": 0.0001421941787095461, "loss": 1.0785, "step": 1426 }, { "epoch": 0.5801992274852612, "grad_norm": 0.08809401839971542, "learning_rate": 0.00014215347038469369, "loss": 0.9597, "step": 1427 }, { "epoch": 0.580605814189876, "grad_norm": 0.08961009234189987, "learning_rate": 0.00014211276205984127, "loss": 0.9988, "step": 1428 }, { "epoch": 0.5810124008944908, "grad_norm": 0.0883122980594635, "learning_rate": 0.00014207205373498882, "loss": 1.0566, "step": 1429 }, { "epoch": 0.5814189875991055, "grad_norm": 0.09150592237710953, "learning_rate": 0.00014203134541013637, "loss": 1.0875, "step": 1430 }, { "epoch": 0.5818255743037203, "grad_norm": 0.097344771027565, "learning_rate": 0.00014199063708528396, "loss": 1.0141, "step": 1431 }, { "epoch": 0.5822321610083351, "grad_norm": 0.09442117810249329, "learning_rate": 0.0001419499287604315, "loss": 0.9769, "step": 1432 }, { "epoch": 0.5826387477129498, "grad_norm": 0.08522289991378784, "learning_rate": 0.0001419092204355791, "loss": 0.9396, "step": 1433 }, { "epoch": 0.5830453344175646, "grad_norm": 0.0909838005900383, "learning_rate": 0.00014186851211072665, "loss": 0.9983, "step": 1434 }, { "epoch": 0.5834519211221793, "grad_norm": 0.09627141058444977, "learning_rate": 0.00014182780378587423, "loss": 1.0929, "step": 1435 }, { "epoch": 0.583858507826794, "grad_norm": 0.08965554088354111, "learning_rate": 0.00014178709546102178, "loss": 0.9145, "step": 1436 }, { "epoch": 0.5842650945314088, "grad_norm": 0.09004207700490952, "learning_rate": 0.00014174638713616933, "loss": 0.9921, "step": 1437 }, { "epoch": 0.5846716812360235, "grad_norm": 0.09295787662267685, "learning_rate": 0.00014170567881131692, "loss": 0.9756, "step": 1438 }, { "epoch": 0.5850782679406383, "grad_norm": 0.0893683210015297, "learning_rate": 0.0001416649704864645, "loss": 0.8974, "step": 1439 }, { "epoch": 0.5854848546452531, "grad_norm": 0.08255141973495483, "learning_rate": 0.00014162426216161205, "loss": 0.9201, "step": 1440 }, { "epoch": 0.5858914413498678, "grad_norm": 0.0966111272573471, "learning_rate": 0.00014158355383675963, "loss": 1.0611, "step": 1441 }, { "epoch": 0.5862980280544826, "grad_norm": 0.09531056135892868, "learning_rate": 0.0001415428455119072, "loss": 1.0989, "step": 1442 }, { "epoch": 0.5867046147590974, "grad_norm": 0.09289577603340149, "learning_rate": 0.00014150213718705477, "loss": 1.0232, "step": 1443 }, { "epoch": 0.5871112014637121, "grad_norm": 0.10038848221302032, "learning_rate": 0.00014146142886220232, "loss": 1.013, "step": 1444 }, { "epoch": 0.5875177881683269, "grad_norm": 0.09008078277111053, "learning_rate": 0.0001414207205373499, "loss": 0.9039, "step": 1445 }, { "epoch": 0.5879243748729417, "grad_norm": 0.08941890299320221, "learning_rate": 0.00014138001221249746, "loss": 0.8866, "step": 1446 }, { "epoch": 0.5883309615775564, "grad_norm": 0.08407185226678848, "learning_rate": 0.00014133930388764504, "loss": 0.9468, "step": 1447 }, { "epoch": 0.5887375482821712, "grad_norm": 0.096216581761837, "learning_rate": 0.0001412985955627926, "loss": 1.0516, "step": 1448 }, { "epoch": 0.589144134986786, "grad_norm": 0.09403221309185028, "learning_rate": 0.00014125788723794015, "loss": 0.9771, "step": 1449 }, { "epoch": 0.5895507216914007, "grad_norm": 0.08534131199121475, "learning_rate": 0.00014121717891308775, "loss": 0.9012, "step": 1450 }, { "epoch": 0.5899573083960155, "grad_norm": 0.09011968225240707, "learning_rate": 0.0001411764705882353, "loss": 0.9724, "step": 1451 }, { "epoch": 0.5903638951006303, "grad_norm": 0.08891688287258148, "learning_rate": 0.00014113576226338286, "loss": 0.9225, "step": 1452 }, { "epoch": 0.5907704818052449, "grad_norm": 0.08605680614709854, "learning_rate": 0.00014109505393853044, "loss": 0.9403, "step": 1453 }, { "epoch": 0.5911770685098597, "grad_norm": 0.08760562539100647, "learning_rate": 0.000141054345613678, "loss": 0.9728, "step": 1454 }, { "epoch": 0.5915836552144745, "grad_norm": 0.08932702243328094, "learning_rate": 0.00014101363728882558, "loss": 1.0377, "step": 1455 }, { "epoch": 0.5919902419190892, "grad_norm": 0.09998058527708054, "learning_rate": 0.00014097292896397313, "loss": 1.0434, "step": 1456 }, { "epoch": 0.592396828623704, "grad_norm": 0.09377194941043854, "learning_rate": 0.00014093222063912071, "loss": 0.9308, "step": 1457 }, { "epoch": 0.5928034153283188, "grad_norm": 0.08387821912765503, "learning_rate": 0.00014089151231426827, "loss": 0.8875, "step": 1458 }, { "epoch": 0.5932100020329335, "grad_norm": 0.08756202459335327, "learning_rate": 0.00014085080398941582, "loss": 1.0069, "step": 1459 }, { "epoch": 0.5936165887375483, "grad_norm": 0.08637526631355286, "learning_rate": 0.0001408100956645634, "loss": 0.9067, "step": 1460 }, { "epoch": 0.594023175442163, "grad_norm": 0.08818566054105759, "learning_rate": 0.00014076938733971096, "loss": 0.9375, "step": 1461 }, { "epoch": 0.5944297621467778, "grad_norm": 0.09050768613815308, "learning_rate": 0.00014072867901485857, "loss": 0.9742, "step": 1462 }, { "epoch": 0.5948363488513926, "grad_norm": 0.08764854818582535, "learning_rate": 0.00014068797069000612, "loss": 0.8995, "step": 1463 }, { "epoch": 0.5952429355560073, "grad_norm": 0.0841783955693245, "learning_rate": 0.00014064726236515367, "loss": 0.9179, "step": 1464 }, { "epoch": 0.5956495222606221, "grad_norm": 0.08915995806455612, "learning_rate": 0.00014060655404030126, "loss": 0.9973, "step": 1465 }, { "epoch": 0.5960561089652369, "grad_norm": 0.08400030434131622, "learning_rate": 0.0001405658457154488, "loss": 0.9374, "step": 1466 }, { "epoch": 0.5964626956698516, "grad_norm": 0.08585075289011002, "learning_rate": 0.0001405251373905964, "loss": 0.9371, "step": 1467 }, { "epoch": 0.5968692823744663, "grad_norm": 5.189364433288574, "learning_rate": 0.00014048442906574395, "loss": 0.9443, "step": 1468 }, { "epoch": 0.5972758690790811, "grad_norm": 0.11196129769086838, "learning_rate": 0.00014044372074089153, "loss": 0.8475, "step": 1469 }, { "epoch": 0.5976824557836958, "grad_norm": 0.13671468198299408, "learning_rate": 0.00014040301241603908, "loss": 0.9082, "step": 1470 }, { "epoch": 0.5980890424883106, "grad_norm": 0.1605953872203827, "learning_rate": 0.00014036230409118663, "loss": 1.0311, "step": 1471 }, { "epoch": 0.5984956291929254, "grad_norm": 0.1232098862528801, "learning_rate": 0.00014032159576633422, "loss": 0.9131, "step": 1472 }, { "epoch": 0.5989022158975401, "grad_norm": 0.10262708365917206, "learning_rate": 0.0001402808874414818, "loss": 0.998, "step": 1473 }, { "epoch": 0.5993088026021549, "grad_norm": 0.10314701497554779, "learning_rate": 0.00014024017911662938, "loss": 0.9527, "step": 1474 }, { "epoch": 0.5997153893067697, "grad_norm": 0.10268500447273254, "learning_rate": 0.00014019947079177693, "loss": 1.0287, "step": 1475 }, { "epoch": 0.6001219760113844, "grad_norm": 0.10218296945095062, "learning_rate": 0.00014015876246692449, "loss": 1.0562, "step": 1476 }, { "epoch": 0.6005285627159992, "grad_norm": 0.10347164422273636, "learning_rate": 0.00014011805414207207, "loss": 1.0227, "step": 1477 }, { "epoch": 0.600935149420614, "grad_norm": 0.09892403334379196, "learning_rate": 0.00014007734581721962, "loss": 0.8526, "step": 1478 }, { "epoch": 0.6013417361252287, "grad_norm": 0.10327230393886566, "learning_rate": 0.0001400366374923672, "loss": 0.9473, "step": 1479 }, { "epoch": 0.6017483228298435, "grad_norm": 0.10661543160676956, "learning_rate": 0.00013999592916751476, "loss": 1.0807, "step": 1480 }, { "epoch": 0.6021549095344583, "grad_norm": 0.10507283359766006, "learning_rate": 0.00013995522084266234, "loss": 1.0405, "step": 1481 }, { "epoch": 0.602561496239073, "grad_norm": 0.09952735900878906, "learning_rate": 0.0001399145125178099, "loss": 1.0233, "step": 1482 }, { "epoch": 0.6029680829436878, "grad_norm": 0.0861600711941719, "learning_rate": 0.00013987380419295745, "loss": 0.9264, "step": 1483 }, { "epoch": 0.6033746696483026, "grad_norm": 0.09560652077198029, "learning_rate": 0.00013983309586810503, "loss": 0.961, "step": 1484 }, { "epoch": 0.6037812563529172, "grad_norm": 0.09961631894111633, "learning_rate": 0.0001397923875432526, "loss": 1.0687, "step": 1485 }, { "epoch": 0.604187843057532, "grad_norm": 0.10031979531049728, "learning_rate": 0.0001397516792184002, "loss": 1.0088, "step": 1486 }, { "epoch": 0.6045944297621467, "grad_norm": 0.09212915599346161, "learning_rate": 0.00013971097089354774, "loss": 1.0183, "step": 1487 }, { "epoch": 0.6050010164667615, "grad_norm": 0.09258651733398438, "learning_rate": 0.0001396702625686953, "loss": 0.9473, "step": 1488 }, { "epoch": 0.6054076031713763, "grad_norm": 0.09315144270658493, "learning_rate": 0.00013962955424384288, "loss": 1.0049, "step": 1489 }, { "epoch": 0.605814189875991, "grad_norm": 0.08820061385631561, "learning_rate": 0.00013958884591899043, "loss": 0.9485, "step": 1490 }, { "epoch": 0.6062207765806058, "grad_norm": 852.4391479492188, "learning_rate": 0.00013954813759413801, "loss": 0.9986, "step": 1491 }, { "epoch": 0.6066273632852206, "grad_norm": 0.0940237045288086, "learning_rate": 0.00013950742926928557, "loss": 1.0065, "step": 1492 }, { "epoch": 0.6070339499898353, "grad_norm": 0.09184816479682922, "learning_rate": 0.00013946672094443315, "loss": 1.0186, "step": 1493 }, { "epoch": 0.6074405366944501, "grad_norm": 0.08479593694210052, "learning_rate": 0.0001394260126195807, "loss": 0.9213, "step": 1494 }, { "epoch": 0.6078471233990649, "grad_norm": 0.10088304430246353, "learning_rate": 0.00013938530429472826, "loss": 1.1029, "step": 1495 }, { "epoch": 0.6082537101036796, "grad_norm": 0.08876685053110123, "learning_rate": 0.00013934459596987584, "loss": 0.9532, "step": 1496 }, { "epoch": 0.6086602968082944, "grad_norm": 0.10209202021360397, "learning_rate": 0.00013930388764502342, "loss": 1.0292, "step": 1497 }, { "epoch": 0.6090668835129092, "grad_norm": 0.09144751727581024, "learning_rate": 0.000139263179320171, "loss": 0.9214, "step": 1498 }, { "epoch": 0.6094734702175238, "grad_norm": 0.08805158734321594, "learning_rate": 0.00013922247099531856, "loss": 0.8983, "step": 1499 }, { "epoch": 0.6098800569221386, "grad_norm": 0.0918235033750534, "learning_rate": 0.0001391817626704661, "loss": 0.9198, "step": 1500 }, { "epoch": 0.6102866436267534, "grad_norm": 0.09051943570375443, "learning_rate": 0.0001391410543456137, "loss": 0.9885, "step": 1501 }, { "epoch": 0.6106932303313681, "grad_norm": 0.09222988784313202, "learning_rate": 0.00013910034602076124, "loss": 0.8739, "step": 1502 }, { "epoch": 0.6110998170359829, "grad_norm": 0.1040385290980339, "learning_rate": 0.00013905963769590883, "loss": 1.0559, "step": 1503 }, { "epoch": 0.6115064037405977, "grad_norm": 0.09393730759620667, "learning_rate": 0.00013901892937105638, "loss": 1.0138, "step": 1504 }, { "epoch": 0.6119129904452124, "grad_norm": 0.09828665107488632, "learning_rate": 0.00013897822104620396, "loss": 0.99, "step": 1505 }, { "epoch": 0.6123195771498272, "grad_norm": 0.08924803137779236, "learning_rate": 0.00013893751272135152, "loss": 0.9948, "step": 1506 }, { "epoch": 0.612726163854442, "grad_norm": 0.09292086958885193, "learning_rate": 0.00013889680439649907, "loss": 0.9319, "step": 1507 }, { "epoch": 0.6131327505590567, "grad_norm": 0.09370770305395126, "learning_rate": 0.00013885609607164668, "loss": 0.9535, "step": 1508 }, { "epoch": 0.6135393372636715, "grad_norm": 0.0799320712685585, "learning_rate": 0.00013881538774679423, "loss": 0.8244, "step": 1509 }, { "epoch": 0.6139459239682863, "grad_norm": 0.0891839936375618, "learning_rate": 0.0001387746794219418, "loss": 1.0157, "step": 1510 }, { "epoch": 0.614352510672901, "grad_norm": 0.09138181805610657, "learning_rate": 0.00013873397109708937, "loss": 0.9066, "step": 1511 }, { "epoch": 0.6147590973775158, "grad_norm": 0.09552167356014252, "learning_rate": 0.00013869326277223692, "loss": 1.0479, "step": 1512 }, { "epoch": 0.6151656840821305, "grad_norm": 0.09162238240242004, "learning_rate": 0.0001386525544473845, "loss": 1.016, "step": 1513 }, { "epoch": 0.6155722707867453, "grad_norm": 0.09654813259840012, "learning_rate": 0.00013861184612253206, "loss": 0.993, "step": 1514 }, { "epoch": 0.61597885749136, "grad_norm": 0.0941232442855835, "learning_rate": 0.00013857113779767964, "loss": 1.0068, "step": 1515 }, { "epoch": 0.6163854441959747, "grad_norm": 0.0947796180844307, "learning_rate": 0.0001385304294728272, "loss": 1.0618, "step": 1516 }, { "epoch": 0.6167920309005895, "grad_norm": 0.08732841163873672, "learning_rate": 0.00013848972114797477, "loss": 0.9348, "step": 1517 }, { "epoch": 0.6171986176052043, "grad_norm": 0.09297166019678116, "learning_rate": 0.00013844901282312233, "loss": 1.0029, "step": 1518 }, { "epoch": 0.617605204309819, "grad_norm": 0.09339512884616852, "learning_rate": 0.00013840830449826988, "loss": 1.0557, "step": 1519 }, { "epoch": 0.6180117910144338, "grad_norm": 0.09277696907520294, "learning_rate": 0.0001383675961734175, "loss": 0.9322, "step": 1520 }, { "epoch": 0.6184183777190486, "grad_norm": 0.08687552809715271, "learning_rate": 0.00013832688784856504, "loss": 0.9162, "step": 1521 }, { "epoch": 0.6188249644236633, "grad_norm": 0.08844698965549469, "learning_rate": 0.00013828617952371262, "loss": 0.9958, "step": 1522 }, { "epoch": 0.6192315511282781, "grad_norm": 0.09178265184164047, "learning_rate": 0.00013824547119886018, "loss": 0.8926, "step": 1523 }, { "epoch": 0.6196381378328929, "grad_norm": 0.09063131362199783, "learning_rate": 0.00013820476287400773, "loss": 1.0047, "step": 1524 }, { "epoch": 0.6200447245375076, "grad_norm": 0.09506388753652573, "learning_rate": 0.00013816405454915531, "loss": 1.112, "step": 1525 }, { "epoch": 0.6204513112421224, "grad_norm": 0.0870959535241127, "learning_rate": 0.00013812334622430287, "loss": 1.0074, "step": 1526 }, { "epoch": 0.6208578979467372, "grad_norm": 0.08569116145372391, "learning_rate": 0.00013808263789945045, "loss": 0.9702, "step": 1527 }, { "epoch": 0.6212644846513519, "grad_norm": 0.09870801120996475, "learning_rate": 0.000138041929574598, "loss": 1.0475, "step": 1528 }, { "epoch": 0.6216710713559667, "grad_norm": 0.09899303317070007, "learning_rate": 0.00013800122124974558, "loss": 1.0806, "step": 1529 }, { "epoch": 0.6220776580605815, "grad_norm": 0.09373268485069275, "learning_rate": 0.00013796051292489314, "loss": 1.0295, "step": 1530 }, { "epoch": 0.6224842447651961, "grad_norm": 0.09074109047651291, "learning_rate": 0.00013791980460004072, "loss": 0.9462, "step": 1531 }, { "epoch": 0.622890831469811, "grad_norm": 0.09384390711784363, "learning_rate": 0.0001378790962751883, "loss": 1.0606, "step": 1532 }, { "epoch": 0.6232974181744257, "grad_norm": 0.0943252295255661, "learning_rate": 0.00013783838795033585, "loss": 1.1258, "step": 1533 }, { "epoch": 0.6237040048790404, "grad_norm": 0.08777976781129837, "learning_rate": 0.0001377976796254834, "loss": 0.9621, "step": 1534 }, { "epoch": 0.6241105915836552, "grad_norm": 0.09006936848163605, "learning_rate": 0.000137756971300631, "loss": 0.9815, "step": 1535 }, { "epoch": 0.62451717828827, "grad_norm": 0.1147993803024292, "learning_rate": 0.00013771626297577854, "loss": 0.9917, "step": 1536 }, { "epoch": 0.6249237649928847, "grad_norm": 0.09408791363239288, "learning_rate": 0.00013767555465092613, "loss": 1.024, "step": 1537 }, { "epoch": 0.6253303516974995, "grad_norm": 0.09530872851610184, "learning_rate": 0.00013763484632607368, "loss": 1.0339, "step": 1538 }, { "epoch": 0.6257369384021142, "grad_norm": 0.09337632358074188, "learning_rate": 0.00013759413800122126, "loss": 1.031, "step": 1539 }, { "epoch": 0.626143525106729, "grad_norm": 0.08535618335008621, "learning_rate": 0.00013755342967636881, "loss": 0.9597, "step": 1540 }, { "epoch": 0.6265501118113438, "grad_norm": 0.09226896613836288, "learning_rate": 0.0001375127213515164, "loss": 1.0017, "step": 1541 }, { "epoch": 0.6269566985159585, "grad_norm": 0.08831244707107544, "learning_rate": 0.00013747201302666395, "loss": 0.9695, "step": 1542 }, { "epoch": 0.6273632852205733, "grad_norm": 0.07351087778806686, "learning_rate": 0.00013743130470181153, "loss": 0.8212, "step": 1543 }, { "epoch": 0.6277698719251881, "grad_norm": 0.09002837538719177, "learning_rate": 0.0001373905963769591, "loss": 0.9882, "step": 1544 }, { "epoch": 0.6281764586298028, "grad_norm": 0.09743615984916687, "learning_rate": 0.00013734988805210667, "loss": 1.0246, "step": 1545 }, { "epoch": 0.6285830453344176, "grad_norm": 0.09634383767843246, "learning_rate": 0.00013730917972725422, "loss": 1.0452, "step": 1546 }, { "epoch": 0.6289896320390324, "grad_norm": 0.09213767200708389, "learning_rate": 0.0001372684714024018, "loss": 1.0618, "step": 1547 }, { "epoch": 0.629396218743647, "grad_norm": 0.08717525005340576, "learning_rate": 0.00013722776307754936, "loss": 0.9834, "step": 1548 }, { "epoch": 0.6298028054482618, "grad_norm": 0.08541104942560196, "learning_rate": 0.00013718705475269694, "loss": 0.9332, "step": 1549 }, { "epoch": 0.6302093921528766, "grad_norm": 0.09747796505689621, "learning_rate": 0.0001371463464278445, "loss": 1.0459, "step": 1550 }, { "epoch": 0.6306159788574913, "grad_norm": 0.09076548367738724, "learning_rate": 0.00013710563810299207, "loss": 0.9951, "step": 1551 }, { "epoch": 0.6310225655621061, "grad_norm": 0.08712035417556763, "learning_rate": 0.00013706492977813963, "loss": 0.9876, "step": 1552 }, { "epoch": 0.6314291522667209, "grad_norm": 0.09062602370977402, "learning_rate": 0.00013702422145328718, "loss": 0.9246, "step": 1553 }, { "epoch": 0.6318357389713356, "grad_norm": 0.0910324677824974, "learning_rate": 0.0001369835131284348, "loss": 0.884, "step": 1554 }, { "epoch": 0.6322423256759504, "grad_norm": 0.09255006164312363, "learning_rate": 0.00013694280480358234, "loss": 1.0172, "step": 1555 }, { "epoch": 0.6326489123805652, "grad_norm": 0.0950237512588501, "learning_rate": 0.00013690209647872992, "loss": 1.0263, "step": 1556 }, { "epoch": 0.6330554990851799, "grad_norm": 0.09103222191333771, "learning_rate": 0.00013686138815387748, "loss": 1.0245, "step": 1557 }, { "epoch": 0.6334620857897947, "grad_norm": 0.09043283015489578, "learning_rate": 0.00013682067982902503, "loss": 1.0197, "step": 1558 }, { "epoch": 0.6338686724944095, "grad_norm": 0.08311565965414047, "learning_rate": 0.0001367799715041726, "loss": 0.9372, "step": 1559 }, { "epoch": 0.6342752591990242, "grad_norm": 0.09156910330057144, "learning_rate": 0.00013673926317932017, "loss": 1.0579, "step": 1560 }, { "epoch": 0.634681845903639, "grad_norm": 0.08262625336647034, "learning_rate": 0.00013669855485446775, "loss": 0.9047, "step": 1561 }, { "epoch": 0.6350884326082538, "grad_norm": 0.09856829047203064, "learning_rate": 0.0001366578465296153, "loss": 1.0933, "step": 1562 }, { "epoch": 0.6354950193128684, "grad_norm": 0.09453229606151581, "learning_rate": 0.00013661713820476288, "loss": 1.0361, "step": 1563 }, { "epoch": 0.6359016060174832, "grad_norm": 0.09291166812181473, "learning_rate": 0.00013657642987991044, "loss": 0.9099, "step": 1564 }, { "epoch": 0.636308192722098, "grad_norm": 0.09416390210390091, "learning_rate": 0.000136535721555058, "loss": 0.9919, "step": 1565 }, { "epoch": 0.6367147794267127, "grad_norm": 0.08964714407920837, "learning_rate": 0.0001364950132302056, "loss": 1.0352, "step": 1566 }, { "epoch": 0.6371213661313275, "grad_norm": 0.1002277210354805, "learning_rate": 0.00013645430490535315, "loss": 1.0121, "step": 1567 }, { "epoch": 0.6375279528359422, "grad_norm": 0.09013176709413528, "learning_rate": 0.00013641359658050074, "loss": 0.943, "step": 1568 }, { "epoch": 0.637934539540557, "grad_norm": 0.09195754677057266, "learning_rate": 0.0001363728882556483, "loss": 1.0101, "step": 1569 }, { "epoch": 0.6383411262451718, "grad_norm": 0.09277264773845673, "learning_rate": 0.00013633217993079584, "loss": 1.0411, "step": 1570 }, { "epoch": 0.6387477129497865, "grad_norm": 0.09677015990018845, "learning_rate": 0.00013629147160594342, "loss": 1.0047, "step": 1571 }, { "epoch": 0.6391542996544013, "grad_norm": 0.09898823499679565, "learning_rate": 0.00013625076328109098, "loss": 1.0897, "step": 1572 }, { "epoch": 0.6395608863590161, "grad_norm": 0.09134434908628464, "learning_rate": 0.00013621005495623856, "loss": 1.0471, "step": 1573 }, { "epoch": 0.6399674730636308, "grad_norm": 0.09015446901321411, "learning_rate": 0.00013616934663138611, "loss": 0.9521, "step": 1574 }, { "epoch": 0.6403740597682456, "grad_norm": 0.09361066669225693, "learning_rate": 0.0001361286383065337, "loss": 1.0378, "step": 1575 }, { "epoch": 0.6407806464728604, "grad_norm": 0.10741425305604935, "learning_rate": 0.00013608792998168125, "loss": 1.0042, "step": 1576 }, { "epoch": 0.6411872331774751, "grad_norm": 0.09339326620101929, "learning_rate": 0.00013604722165682883, "loss": 0.9641, "step": 1577 }, { "epoch": 0.6415938198820899, "grad_norm": 0.09786434471607208, "learning_rate": 0.0001360065133319764, "loss": 0.9643, "step": 1578 }, { "epoch": 0.6420004065867047, "grad_norm": 0.08545216172933578, "learning_rate": 0.00013596580500712397, "loss": 0.9413, "step": 1579 }, { "epoch": 0.6424069932913193, "grad_norm": 0.09042125940322876, "learning_rate": 0.00013592509668227155, "loss": 0.9105, "step": 1580 }, { "epoch": 0.6428135799959341, "grad_norm": 0.08778928220272064, "learning_rate": 0.0001358843883574191, "loss": 1.0262, "step": 1581 }, { "epoch": 0.6432201667005489, "grad_norm": 0.08905961364507675, "learning_rate": 0.00013584368003256666, "loss": 1.0317, "step": 1582 }, { "epoch": 0.6436267534051636, "grad_norm": 0.09242242574691772, "learning_rate": 0.00013580297170771424, "loss": 0.9415, "step": 1583 }, { "epoch": 0.6440333401097784, "grad_norm": 0.08425027132034302, "learning_rate": 0.0001357622633828618, "loss": 0.8964, "step": 1584 }, { "epoch": 0.6444399268143932, "grad_norm": 0.0858960896730423, "learning_rate": 0.00013572155505800937, "loss": 0.9441, "step": 1585 }, { "epoch": 0.6448465135190079, "grad_norm": 0.09374553710222244, "learning_rate": 0.00013568084673315693, "loss": 0.9784, "step": 1586 }, { "epoch": 0.6452531002236227, "grad_norm": 0.09684876352548599, "learning_rate": 0.0001356401384083045, "loss": 0.9867, "step": 1587 }, { "epoch": 0.6456596869282375, "grad_norm": 0.0853944793343544, "learning_rate": 0.00013559943008345206, "loss": 0.9136, "step": 1588 }, { "epoch": 0.6460662736328522, "grad_norm": 0.0905388742685318, "learning_rate": 0.00013555872175859964, "loss": 0.9335, "step": 1589 }, { "epoch": 0.646472860337467, "grad_norm": 0.08938907831907272, "learning_rate": 0.00013551801343374722, "loss": 0.9889, "step": 1590 }, { "epoch": 0.6468794470420818, "grad_norm": 0.08857300132513046, "learning_rate": 0.00013547730510889478, "loss": 0.9986, "step": 1591 }, { "epoch": 0.6472860337466965, "grad_norm": 0.09151600301265717, "learning_rate": 0.00013543659678404236, "loss": 1.0025, "step": 1592 }, { "epoch": 0.6476926204513113, "grad_norm": 0.08548744767904282, "learning_rate": 0.0001353958884591899, "loss": 1.012, "step": 1593 }, { "epoch": 0.648099207155926, "grad_norm": 0.08982311189174652, "learning_rate": 0.00013535518013433747, "loss": 0.9827, "step": 1594 }, { "epoch": 0.6485057938605407, "grad_norm": 0.09153248369693756, "learning_rate": 0.00013531447180948505, "loss": 0.9818, "step": 1595 }, { "epoch": 0.6489123805651555, "grad_norm": 0.1022023931145668, "learning_rate": 0.0001352737634846326, "loss": 1.054, "step": 1596 }, { "epoch": 0.6493189672697702, "grad_norm": 0.09080366045236588, "learning_rate": 0.00013523305515978018, "loss": 0.9667, "step": 1597 }, { "epoch": 0.649725553974385, "grad_norm": 0.0935145765542984, "learning_rate": 0.00013519234683492774, "loss": 0.9668, "step": 1598 }, { "epoch": 0.6501321406789998, "grad_norm": 0.09892317652702332, "learning_rate": 0.00013515163851007532, "loss": 1.012, "step": 1599 }, { "epoch": 0.6505387273836145, "grad_norm": 0.09385450929403305, "learning_rate": 0.0001351109301852229, "loss": 1.1, "step": 1600 }, { "epoch": 0.6509453140882293, "grad_norm": 0.09270552545785904, "learning_rate": 0.00013507022186037045, "loss": 0.9509, "step": 1601 }, { "epoch": 0.6513519007928441, "grad_norm": 0.09725828468799591, "learning_rate": 0.00013502951353551804, "loss": 1.0435, "step": 1602 }, { "epoch": 0.6517584874974588, "grad_norm": 0.096989206969738, "learning_rate": 0.0001349888052106656, "loss": 1.0152, "step": 1603 }, { "epoch": 0.6521650742020736, "grad_norm": 0.09739220887422562, "learning_rate": 0.00013494809688581317, "loss": 0.9834, "step": 1604 }, { "epoch": 0.6525716609066884, "grad_norm": 0.07972859591245651, "learning_rate": 0.00013490738856096072, "loss": 0.8542, "step": 1605 }, { "epoch": 0.6529782476113031, "grad_norm": 0.09360089153051376, "learning_rate": 0.00013486668023610828, "loss": 1.0077, "step": 1606 }, { "epoch": 0.6533848343159179, "grad_norm": 0.08999258279800415, "learning_rate": 0.00013482597191125586, "loss": 0.8802, "step": 1607 }, { "epoch": 0.6537914210205327, "grad_norm": 0.0885370746254921, "learning_rate": 0.00013478526358640341, "loss": 0.9867, "step": 1608 }, { "epoch": 0.6541980077251474, "grad_norm": 0.0924537256360054, "learning_rate": 0.000134744555261551, "loss": 0.9653, "step": 1609 }, { "epoch": 0.6546045944297622, "grad_norm": 0.08841130137443542, "learning_rate": 0.00013470384693669855, "loss": 0.9005, "step": 1610 }, { "epoch": 0.655011181134377, "grad_norm": 0.0968664139509201, "learning_rate": 0.00013466313861184613, "loss": 1.1191, "step": 1611 }, { "epoch": 0.6554177678389916, "grad_norm": 0.0909125879406929, "learning_rate": 0.0001346224302869937, "loss": 1.0247, "step": 1612 }, { "epoch": 0.6558243545436064, "grad_norm": 0.1032382994890213, "learning_rate": 0.00013458172196214127, "loss": 1.021, "step": 1613 }, { "epoch": 0.6562309412482212, "grad_norm": 0.08680799603462219, "learning_rate": 0.00013454101363728885, "loss": 0.9425, "step": 1614 }, { "epoch": 0.6566375279528359, "grad_norm": 0.08841447532176971, "learning_rate": 0.0001345003053124364, "loss": 0.9105, "step": 1615 }, { "epoch": 0.6570441146574507, "grad_norm": 0.09229273349046707, "learning_rate": 0.00013445959698758398, "loss": 0.9492, "step": 1616 }, { "epoch": 0.6574507013620655, "grad_norm": 0.09328685700893402, "learning_rate": 0.00013441888866273154, "loss": 1.0456, "step": 1617 }, { "epoch": 0.6578572880666802, "grad_norm": 0.08448266983032227, "learning_rate": 0.0001343781803378791, "loss": 0.9209, "step": 1618 }, { "epoch": 0.658263874771295, "grad_norm": 0.09344170242547989, "learning_rate": 0.00013433747201302667, "loss": 1.0107, "step": 1619 }, { "epoch": 0.6586704614759097, "grad_norm": 0.08675231039524078, "learning_rate": 0.00013429676368817423, "loss": 0.989, "step": 1620 }, { "epoch": 0.6590770481805245, "grad_norm": 0.09648977965116501, "learning_rate": 0.0001342560553633218, "loss": 1.079, "step": 1621 }, { "epoch": 0.6594836348851393, "grad_norm": 0.08079522848129272, "learning_rate": 0.00013421534703846936, "loss": 0.862, "step": 1622 }, { "epoch": 0.659890221589754, "grad_norm": 0.1015796810388565, "learning_rate": 0.00013417463871361694, "loss": 1.136, "step": 1623 }, { "epoch": 0.6602968082943688, "grad_norm": 0.08189254999160767, "learning_rate": 0.00013413393038876452, "loss": 0.9161, "step": 1624 }, { "epoch": 0.6607033949989836, "grad_norm": 0.09128617495298386, "learning_rate": 0.00013409322206391208, "loss": 0.9605, "step": 1625 }, { "epoch": 0.6611099817035982, "grad_norm": 0.09256181865930557, "learning_rate": 0.00013405251373905966, "loss": 0.9844, "step": 1626 }, { "epoch": 0.661516568408213, "grad_norm": 0.092183917760849, "learning_rate": 0.0001340118054142072, "loss": 1.0694, "step": 1627 }, { "epoch": 0.6619231551128278, "grad_norm": 0.10037260502576828, "learning_rate": 0.00013397109708935477, "loss": 1.1395, "step": 1628 }, { "epoch": 0.6623297418174425, "grad_norm": 0.08758927881717682, "learning_rate": 0.00013393038876450235, "loss": 0.9494, "step": 1629 }, { "epoch": 0.6627363285220573, "grad_norm": 0.08407801389694214, "learning_rate": 0.0001338896804396499, "loss": 0.8953, "step": 1630 }, { "epoch": 0.6631429152266721, "grad_norm": 0.10363683849573135, "learning_rate": 0.00013384897211479748, "loss": 1.0613, "step": 1631 }, { "epoch": 0.6635495019312868, "grad_norm": 0.0939316600561142, "learning_rate": 0.00013380826378994504, "loss": 0.9668, "step": 1632 }, { "epoch": 0.6639560886359016, "grad_norm": 0.097317174077034, "learning_rate": 0.00013376755546509262, "loss": 1.1024, "step": 1633 }, { "epoch": 0.6643626753405164, "grad_norm": 0.10394629091024399, "learning_rate": 0.00013372684714024017, "loss": 1.0706, "step": 1634 }, { "epoch": 0.6647692620451311, "grad_norm": 0.09405668824911118, "learning_rate": 0.00013368613881538775, "loss": 1.0496, "step": 1635 }, { "epoch": 0.6651758487497459, "grad_norm": 0.08976142853498459, "learning_rate": 0.00013364543049053533, "loss": 0.9811, "step": 1636 }, { "epoch": 0.6655824354543607, "grad_norm": 0.09220533818006516, "learning_rate": 0.0001336047221656829, "loss": 1.0655, "step": 1637 }, { "epoch": 0.6659890221589754, "grad_norm": 0.09313860535621643, "learning_rate": 0.00013356401384083047, "loss": 0.9664, "step": 1638 }, { "epoch": 0.6663956088635902, "grad_norm": 0.08653722703456879, "learning_rate": 0.00013352330551597802, "loss": 0.91, "step": 1639 }, { "epoch": 0.666802195568205, "grad_norm": 0.09094205498695374, "learning_rate": 0.00013348259719112558, "loss": 1.0011, "step": 1640 }, { "epoch": 0.6672087822728197, "grad_norm": 0.09969717264175415, "learning_rate": 0.00013344188886627316, "loss": 1.0853, "step": 1641 }, { "epoch": 0.6676153689774345, "grad_norm": 0.08996472507715225, "learning_rate": 0.0001334011805414207, "loss": 0.9693, "step": 1642 }, { "epoch": 0.6680219556820493, "grad_norm": 0.08930208534002304, "learning_rate": 0.0001333604722165683, "loss": 1.0078, "step": 1643 }, { "epoch": 0.6684285423866639, "grad_norm": 0.09799496084451675, "learning_rate": 0.00013331976389171585, "loss": 1.0764, "step": 1644 }, { "epoch": 0.6688351290912787, "grad_norm": 0.1000712588429451, "learning_rate": 0.00013327905556686343, "loss": 1.0769, "step": 1645 }, { "epoch": 0.6692417157958934, "grad_norm": 0.09583432227373123, "learning_rate": 0.000133238347242011, "loss": 1.0311, "step": 1646 }, { "epoch": 0.6696483025005082, "grad_norm": 0.10381270945072174, "learning_rate": 0.00013319763891715857, "loss": 1.0879, "step": 1647 }, { "epoch": 0.670054889205123, "grad_norm": 0.09310910850763321, "learning_rate": 0.00013315693059230615, "loss": 0.9875, "step": 1648 }, { "epoch": 0.6704614759097377, "grad_norm": 0.09691096842288971, "learning_rate": 0.0001331162222674537, "loss": 1.001, "step": 1649 }, { "epoch": 0.6708680626143525, "grad_norm": 0.08782976865768433, "learning_rate": 0.00013307551394260128, "loss": 1.0192, "step": 1650 }, { "epoch": 0.6712746493189673, "grad_norm": 0.07851552218198776, "learning_rate": 0.00013303480561774884, "loss": 0.8345, "step": 1651 }, { "epoch": 0.671681236023582, "grad_norm": 0.09602700173854828, "learning_rate": 0.0001329940972928964, "loss": 0.9761, "step": 1652 }, { "epoch": 0.6720878227281968, "grad_norm": 0.09454475343227386, "learning_rate": 0.00013295338896804397, "loss": 0.9775, "step": 1653 }, { "epoch": 0.6724944094328116, "grad_norm": 0.09530249238014221, "learning_rate": 0.00013291268064319153, "loss": 0.9155, "step": 1654 }, { "epoch": 0.6729009961374263, "grad_norm": 0.09022442251443863, "learning_rate": 0.0001328719723183391, "loss": 0.9651, "step": 1655 }, { "epoch": 0.6733075828420411, "grad_norm": 0.09096933156251907, "learning_rate": 0.00013283126399348666, "loss": 1.003, "step": 1656 }, { "epoch": 0.6737141695466559, "grad_norm": 0.09274188429117203, "learning_rate": 0.00013279055566863424, "loss": 1.0024, "step": 1657 }, { "epoch": 0.6741207562512705, "grad_norm": 0.09318679571151733, "learning_rate": 0.00013274984734378182, "loss": 0.9613, "step": 1658 }, { "epoch": 0.6745273429558853, "grad_norm": 0.1088038757443428, "learning_rate": 0.00013270913901892938, "loss": 0.9718, "step": 1659 }, { "epoch": 0.6749339296605001, "grad_norm": 0.08833767473697662, "learning_rate": 0.00013266843069407696, "loss": 0.8893, "step": 1660 }, { "epoch": 0.6753405163651148, "grad_norm": 0.09868477284908295, "learning_rate": 0.0001326277223692245, "loss": 1.0233, "step": 1661 }, { "epoch": 0.6757471030697296, "grad_norm": 0.09289266169071198, "learning_rate": 0.0001325870140443721, "loss": 0.8636, "step": 1662 }, { "epoch": 0.6761536897743444, "grad_norm": 0.08200156688690186, "learning_rate": 0.00013254630571951965, "loss": 0.8317, "step": 1663 }, { "epoch": 0.6765602764789591, "grad_norm": 0.09031883627176285, "learning_rate": 0.0001325055973946672, "loss": 0.9759, "step": 1664 }, { "epoch": 0.6769668631835739, "grad_norm": 0.09911596029996872, "learning_rate": 0.00013246488906981478, "loss": 1.1484, "step": 1665 }, { "epoch": 0.6773734498881887, "grad_norm": 0.09470785409212112, "learning_rate": 0.00013242418074496234, "loss": 1.0261, "step": 1666 }, { "epoch": 0.6777800365928034, "grad_norm": 0.09936736524105072, "learning_rate": 0.00013238347242010992, "loss": 0.9697, "step": 1667 }, { "epoch": 0.6781866232974182, "grad_norm": 0.08819877356290817, "learning_rate": 0.00013234276409525747, "loss": 1.0686, "step": 1668 }, { "epoch": 0.678593210002033, "grad_norm": 0.0861021876335144, "learning_rate": 0.00013230205577040505, "loss": 0.9567, "step": 1669 }, { "epoch": 0.6789997967066477, "grad_norm": 0.092157743871212, "learning_rate": 0.00013226134744555263, "loss": 1.0348, "step": 1670 }, { "epoch": 0.6794063834112625, "grad_norm": 0.08593881130218506, "learning_rate": 0.0001322206391207002, "loss": 0.9318, "step": 1671 }, { "epoch": 0.6798129701158772, "grad_norm": 0.09625545144081116, "learning_rate": 0.00013217993079584777, "loss": 0.9666, "step": 1672 }, { "epoch": 0.680219556820492, "grad_norm": 0.09877568483352661, "learning_rate": 0.00013213922247099532, "loss": 0.8862, "step": 1673 }, { "epoch": 0.6806261435251068, "grad_norm": 0.09340859204530716, "learning_rate": 0.0001320985141461429, "loss": 1.0305, "step": 1674 }, { "epoch": 0.6810327302297214, "grad_norm": 0.08883026987314224, "learning_rate": 0.00013205780582129046, "loss": 0.9499, "step": 1675 }, { "epoch": 0.6814393169343362, "grad_norm": 0.09625538438558578, "learning_rate": 0.000132017097496438, "loss": 1.0381, "step": 1676 }, { "epoch": 0.681845903638951, "grad_norm": 0.0917878970503807, "learning_rate": 0.0001319763891715856, "loss": 0.8924, "step": 1677 }, { "epoch": 0.6822524903435657, "grad_norm": 0.08996240794658661, "learning_rate": 0.00013193568084673315, "loss": 0.936, "step": 1678 }, { "epoch": 0.6826590770481805, "grad_norm": 0.09168268740177155, "learning_rate": 0.00013189497252188073, "loss": 0.9608, "step": 1679 }, { "epoch": 0.6830656637527953, "grad_norm": 0.09493600577116013, "learning_rate": 0.00013185426419702828, "loss": 1.0394, "step": 1680 }, { "epoch": 0.68347225045741, "grad_norm": 0.094533272087574, "learning_rate": 0.00013181355587217586, "loss": 0.9437, "step": 1681 }, { "epoch": 0.6838788371620248, "grad_norm": 0.09590426087379456, "learning_rate": 0.00013177284754732345, "loss": 1.0504, "step": 1682 }, { "epoch": 0.6842854238666396, "grad_norm": 0.1008445993065834, "learning_rate": 0.000131732139222471, "loss": 0.9966, "step": 1683 }, { "epoch": 0.6846920105712543, "grad_norm": 0.09178382903337479, "learning_rate": 0.00013169143089761858, "loss": 1.0171, "step": 1684 }, { "epoch": 0.6850985972758691, "grad_norm": 0.09064016491174698, "learning_rate": 0.00013165072257276614, "loss": 1.0259, "step": 1685 }, { "epoch": 0.6855051839804839, "grad_norm": 0.09577952325344086, "learning_rate": 0.00013161001424791372, "loss": 0.9391, "step": 1686 }, { "epoch": 0.6859117706850986, "grad_norm": 0.08866085112094879, "learning_rate": 0.00013156930592306127, "loss": 1.0251, "step": 1687 }, { "epoch": 0.6863183573897134, "grad_norm": 0.09070689976215363, "learning_rate": 0.00013152859759820882, "loss": 0.9223, "step": 1688 }, { "epoch": 0.6867249440943282, "grad_norm": 0.08675026893615723, "learning_rate": 0.0001314878892733564, "loss": 0.94, "step": 1689 }, { "epoch": 0.6871315307989428, "grad_norm": 0.08852765709161758, "learning_rate": 0.00013144718094850396, "loss": 0.9587, "step": 1690 }, { "epoch": 0.6875381175035576, "grad_norm": 0.09738162159919739, "learning_rate": 0.00013140647262365154, "loss": 1.0469, "step": 1691 }, { "epoch": 0.6879447042081724, "grad_norm": 0.09765305370092392, "learning_rate": 0.00013136576429879912, "loss": 1.0384, "step": 1692 }, { "epoch": 0.6883512909127871, "grad_norm": 0.09691577404737473, "learning_rate": 0.00013132505597394668, "loss": 1.1035, "step": 1693 }, { "epoch": 0.6887578776174019, "grad_norm": 0.09987527132034302, "learning_rate": 0.00013128434764909426, "loss": 1.0447, "step": 1694 }, { "epoch": 0.6891644643220167, "grad_norm": 0.09481899440288544, "learning_rate": 0.0001312436393242418, "loss": 1.0686, "step": 1695 }, { "epoch": 0.6895710510266314, "grad_norm": 0.08769707381725311, "learning_rate": 0.0001312029309993894, "loss": 0.9485, "step": 1696 }, { "epoch": 0.6899776377312462, "grad_norm": 0.08787425607442856, "learning_rate": 0.00013116222267453695, "loss": 0.9945, "step": 1697 }, { "epoch": 0.6903842244358609, "grad_norm": 0.09898071736097336, "learning_rate": 0.00013112151434968453, "loss": 1.0373, "step": 1698 }, { "epoch": 0.6907908111404757, "grad_norm": 0.09396618604660034, "learning_rate": 0.00013108080602483208, "loss": 1.0555, "step": 1699 }, { "epoch": 0.6911973978450905, "grad_norm": 0.09377385675907135, "learning_rate": 0.00013104009769997964, "loss": 0.9912, "step": 1700 }, { "epoch": 0.6916039845497052, "grad_norm": 0.09066810458898544, "learning_rate": 0.00013099938937512722, "loss": 1.0106, "step": 1701 }, { "epoch": 0.69201057125432, "grad_norm": 0.10170560330152512, "learning_rate": 0.00013095868105027477, "loss": 1.1167, "step": 1702 }, { "epoch": 0.6924171579589348, "grad_norm": 0.10096985846757889, "learning_rate": 0.00013091797272542235, "loss": 1.1092, "step": 1703 }, { "epoch": 0.6928237446635495, "grad_norm": 0.08942307531833649, "learning_rate": 0.00013087726440056993, "loss": 0.8489, "step": 1704 }, { "epoch": 0.6932303313681643, "grad_norm": 0.0931686982512474, "learning_rate": 0.0001308365560757175, "loss": 1.0615, "step": 1705 }, { "epoch": 0.693636918072779, "grad_norm": 0.08369520306587219, "learning_rate": 0.00013079584775086507, "loss": 0.9376, "step": 1706 }, { "epoch": 0.6940435047773937, "grad_norm": 0.09754310548305511, "learning_rate": 0.00013075513942601262, "loss": 1.076, "step": 1707 }, { "epoch": 0.6944500914820085, "grad_norm": 0.09425446391105652, "learning_rate": 0.0001307144311011602, "loss": 1.0354, "step": 1708 }, { "epoch": 0.6948566781866233, "grad_norm": 0.08762680739164352, "learning_rate": 0.00013067372277630776, "loss": 0.892, "step": 1709 }, { "epoch": 0.695263264891238, "grad_norm": 0.08966252207756042, "learning_rate": 0.00013063301445145534, "loss": 0.9067, "step": 1710 }, { "epoch": 0.6956698515958528, "grad_norm": 0.08628804236650467, "learning_rate": 0.0001305923061266029, "loss": 0.8314, "step": 1711 }, { "epoch": 0.6960764383004676, "grad_norm": 0.0932592824101448, "learning_rate": 0.00013055159780175045, "loss": 0.9557, "step": 1712 }, { "epoch": 0.6964830250050823, "grad_norm": 0.0861787497997284, "learning_rate": 0.00013051088947689803, "loss": 1.0075, "step": 1713 }, { "epoch": 0.6968896117096971, "grad_norm": 0.08896369487047195, "learning_rate": 0.00013047018115204558, "loss": 0.9439, "step": 1714 }, { "epoch": 0.6972961984143119, "grad_norm": 0.09481415897607803, "learning_rate": 0.00013042947282719316, "loss": 1.0008, "step": 1715 }, { "epoch": 0.6977027851189266, "grad_norm": 0.09036390483379364, "learning_rate": 0.00013038876450234075, "loss": 1.0723, "step": 1716 }, { "epoch": 0.6981093718235414, "grad_norm": 0.09333796054124832, "learning_rate": 0.0001303480561774883, "loss": 0.9998, "step": 1717 }, { "epoch": 0.6985159585281562, "grad_norm": 0.09343329071998596, "learning_rate": 0.00013030734785263588, "loss": 1.069, "step": 1718 }, { "epoch": 0.6989225452327709, "grad_norm": 0.10213945806026459, "learning_rate": 0.00013026663952778343, "loss": 1.1121, "step": 1719 }, { "epoch": 0.6993291319373857, "grad_norm": 0.08944682031869888, "learning_rate": 0.00013022593120293102, "loss": 1.0139, "step": 1720 }, { "epoch": 0.6997357186420005, "grad_norm": 0.09763380140066147, "learning_rate": 0.00013018522287807857, "loss": 1.1057, "step": 1721 }, { "epoch": 0.7001423053466151, "grad_norm": 0.08643307536840439, "learning_rate": 0.00013014451455322615, "loss": 0.825, "step": 1722 }, { "epoch": 0.7005488920512299, "grad_norm": 0.0778571143746376, "learning_rate": 0.0001301038062283737, "loss": 0.8161, "step": 1723 }, { "epoch": 0.7009554787558446, "grad_norm": 0.08897890895605087, "learning_rate": 0.00013006309790352126, "loss": 0.9659, "step": 1724 }, { "epoch": 0.7013620654604594, "grad_norm": 0.08511462807655334, "learning_rate": 0.00013002238957866884, "loss": 0.8577, "step": 1725 }, { "epoch": 0.7017686521650742, "grad_norm": 0.09079938381910324, "learning_rate": 0.0001299816812538164, "loss": 1.0091, "step": 1726 }, { "epoch": 0.7021752388696889, "grad_norm": 0.08795303851366043, "learning_rate": 0.00012994097292896398, "loss": 0.9966, "step": 1727 }, { "epoch": 0.7025818255743037, "grad_norm": 0.0925462394952774, "learning_rate": 0.00012990026460411156, "loss": 1.0207, "step": 1728 }, { "epoch": 0.7029884122789185, "grad_norm": 0.0894242599606514, "learning_rate": 0.0001298595562792591, "loss": 0.9207, "step": 1729 }, { "epoch": 0.7033949989835332, "grad_norm": 0.09216928482055664, "learning_rate": 0.0001298188479544067, "loss": 0.9725, "step": 1730 }, { "epoch": 0.703801585688148, "grad_norm": 0.09627533704042435, "learning_rate": 0.00012977813962955425, "loss": 0.998, "step": 1731 }, { "epoch": 0.7042081723927628, "grad_norm": 0.0950872004032135, "learning_rate": 0.00012973743130470183, "loss": 1.0275, "step": 1732 }, { "epoch": 0.7046147590973775, "grad_norm": 0.09819149225950241, "learning_rate": 0.00012969672297984938, "loss": 1.0179, "step": 1733 }, { "epoch": 0.7050213458019923, "grad_norm": 0.09157780557870865, "learning_rate": 0.00012965601465499694, "loss": 0.998, "step": 1734 }, { "epoch": 0.7054279325066071, "grad_norm": 0.09206783026456833, "learning_rate": 0.00012961530633014452, "loss": 0.9698, "step": 1735 }, { "epoch": 0.7058345192112218, "grad_norm": 0.08928617089986801, "learning_rate": 0.00012957459800529207, "loss": 0.9288, "step": 1736 }, { "epoch": 0.7062411059158366, "grad_norm": 0.09673994034528732, "learning_rate": 0.00012953388968043965, "loss": 1.0768, "step": 1737 }, { "epoch": 0.7066476926204514, "grad_norm": 0.09382779896259308, "learning_rate": 0.00012949318135558723, "loss": 1.0142, "step": 1738 }, { "epoch": 0.707054279325066, "grad_norm": 0.08966720104217529, "learning_rate": 0.0001294524730307348, "loss": 0.8738, "step": 1739 }, { "epoch": 0.7074608660296808, "grad_norm": 0.09402105212211609, "learning_rate": 0.00012941176470588237, "loss": 0.9459, "step": 1740 }, { "epoch": 0.7078674527342956, "grad_norm": 0.08750198781490326, "learning_rate": 0.00012937105638102992, "loss": 0.9953, "step": 1741 }, { "epoch": 0.7082740394389103, "grad_norm": 0.09970106184482574, "learning_rate": 0.0001293303480561775, "loss": 1.0423, "step": 1742 }, { "epoch": 0.7086806261435251, "grad_norm": 0.08987673372030258, "learning_rate": 0.00012928963973132506, "loss": 0.9796, "step": 1743 }, { "epoch": 0.7090872128481399, "grad_norm": 0.09364349395036697, "learning_rate": 0.00012924893140647264, "loss": 1.0452, "step": 1744 }, { "epoch": 0.7094937995527546, "grad_norm": 0.09844768047332764, "learning_rate": 0.0001292082230816202, "loss": 1.0507, "step": 1745 }, { "epoch": 0.7099003862573694, "grad_norm": 0.08439893275499344, "learning_rate": 0.00012916751475676775, "loss": 0.9159, "step": 1746 }, { "epoch": 0.7103069729619842, "grad_norm": 0.08530126512050629, "learning_rate": 0.00012912680643191533, "loss": 0.8958, "step": 1747 }, { "epoch": 0.7107135596665989, "grad_norm": 0.09442596137523651, "learning_rate": 0.00012908609810706288, "loss": 1.0103, "step": 1748 }, { "epoch": 0.7111201463712137, "grad_norm": 0.09051500260829926, "learning_rate": 0.00012904538978221046, "loss": 0.9922, "step": 1749 }, { "epoch": 0.7115267330758284, "grad_norm": 0.09218533337116241, "learning_rate": 0.00012900468145735805, "loss": 1.0237, "step": 1750 }, { "epoch": 0.7119333197804432, "grad_norm": 0.09059412032365799, "learning_rate": 0.0001289639731325056, "loss": 0.8807, "step": 1751 }, { "epoch": 0.712339906485058, "grad_norm": 0.09302126616239548, "learning_rate": 0.00012892326480765318, "loss": 0.9996, "step": 1752 }, { "epoch": 0.7127464931896726, "grad_norm": 0.0886523425579071, "learning_rate": 0.00012888255648280073, "loss": 0.9456, "step": 1753 }, { "epoch": 0.7131530798942874, "grad_norm": 0.08531109243631363, "learning_rate": 0.00012884184815794832, "loss": 0.8851, "step": 1754 }, { "epoch": 0.7135596665989022, "grad_norm": 0.08533506095409393, "learning_rate": 0.00012880113983309587, "loss": 1.004, "step": 1755 }, { "epoch": 0.7139662533035169, "grad_norm": 0.10868436843156815, "learning_rate": 0.00012876043150824345, "loss": 1.0434, "step": 1756 }, { "epoch": 0.7143728400081317, "grad_norm": 0.08798620849847794, "learning_rate": 0.000128719723183391, "loss": 0.944, "step": 1757 }, { "epoch": 0.7147794267127465, "grad_norm": 0.08957348763942719, "learning_rate": 0.00012867901485853856, "loss": 0.9431, "step": 1758 }, { "epoch": 0.7151860134173612, "grad_norm": 0.09171691536903381, "learning_rate": 0.00012863830653368614, "loss": 0.9877, "step": 1759 }, { "epoch": 0.715592600121976, "grad_norm": 0.10308198630809784, "learning_rate": 0.0001285975982088337, "loss": 1.0491, "step": 1760 }, { "epoch": 0.7159991868265908, "grad_norm": 0.09395022690296173, "learning_rate": 0.0001285568898839813, "loss": 0.9605, "step": 1761 }, { "epoch": 0.7164057735312055, "grad_norm": 0.09098276495933533, "learning_rate": 0.00012851618155912886, "loss": 0.9623, "step": 1762 }, { "epoch": 0.7168123602358203, "grad_norm": 0.09622596204280853, "learning_rate": 0.0001284754732342764, "loss": 0.9981, "step": 1763 }, { "epoch": 0.7172189469404351, "grad_norm": 0.09966776520013809, "learning_rate": 0.000128434764909424, "loss": 1.1082, "step": 1764 }, { "epoch": 0.7176255336450498, "grad_norm": 0.08151479065418243, "learning_rate": 0.00012839405658457155, "loss": 0.9498, "step": 1765 }, { "epoch": 0.7180321203496646, "grad_norm": 0.10801077634096146, "learning_rate": 0.00012835334825971913, "loss": 1.0845, "step": 1766 }, { "epoch": 0.7184387070542794, "grad_norm": 0.10468696802854538, "learning_rate": 0.00012831263993486668, "loss": 1.1407, "step": 1767 }, { "epoch": 0.718845293758894, "grad_norm": 0.08649425953626633, "learning_rate": 0.00012827193161001426, "loss": 1.0136, "step": 1768 }, { "epoch": 0.7192518804635089, "grad_norm": 0.0891176387667656, "learning_rate": 0.00012823122328516182, "loss": 0.9647, "step": 1769 }, { "epoch": 0.7196584671681237, "grad_norm": 0.08572922646999359, "learning_rate": 0.00012819051496030937, "loss": 0.9131, "step": 1770 }, { "epoch": 0.7200650538727383, "grad_norm": 0.09400682896375656, "learning_rate": 0.00012814980663545695, "loss": 1.0212, "step": 1771 }, { "epoch": 0.7204716405773531, "grad_norm": 0.08426962792873383, "learning_rate": 0.0001281090983106045, "loss": 0.9203, "step": 1772 }, { "epoch": 0.7208782272819679, "grad_norm": 0.08990871161222458, "learning_rate": 0.00012806838998575211, "loss": 0.9154, "step": 1773 }, { "epoch": 0.7212848139865826, "grad_norm": 0.09853409230709076, "learning_rate": 0.00012802768166089967, "loss": 1.0219, "step": 1774 }, { "epoch": 0.7216914006911974, "grad_norm": 0.09549330174922943, "learning_rate": 0.00012798697333604722, "loss": 1.0584, "step": 1775 }, { "epoch": 0.7220979873958121, "grad_norm": 0.09176405519247055, "learning_rate": 0.0001279462650111948, "loss": 1.0623, "step": 1776 }, { "epoch": 0.7225045741004269, "grad_norm": 0.0894324779510498, "learning_rate": 0.00012790555668634236, "loss": 0.8873, "step": 1777 }, { "epoch": 0.7229111608050417, "grad_norm": 0.09495782852172852, "learning_rate": 0.00012786484836148994, "loss": 0.9914, "step": 1778 }, { "epoch": 0.7233177475096564, "grad_norm": 0.09165625274181366, "learning_rate": 0.0001278241400366375, "loss": 0.9946, "step": 1779 }, { "epoch": 0.7237243342142712, "grad_norm": 0.08971066772937775, "learning_rate": 0.00012778343171178507, "loss": 0.9684, "step": 1780 }, { "epoch": 0.724130920918886, "grad_norm": 0.09194676578044891, "learning_rate": 0.00012774272338693263, "loss": 0.9845, "step": 1781 }, { "epoch": 0.7245375076235007, "grad_norm": 0.08844684064388275, "learning_rate": 0.00012770201506208018, "loss": 1.0189, "step": 1782 }, { "epoch": 0.7249440943281155, "grad_norm": 0.09508199989795685, "learning_rate": 0.00012766130673722776, "loss": 0.9609, "step": 1783 }, { "epoch": 0.7253506810327303, "grad_norm": 0.08686284720897675, "learning_rate": 0.00012762059841237534, "loss": 0.9364, "step": 1784 }, { "epoch": 0.725757267737345, "grad_norm": 0.08749787509441376, "learning_rate": 0.00012757989008752293, "loss": 0.9021, "step": 1785 }, { "epoch": 0.7261638544419597, "grad_norm": 0.09259208291769028, "learning_rate": 0.00012753918176267048, "loss": 1.031, "step": 1786 }, { "epoch": 0.7265704411465745, "grad_norm": 0.09524762630462646, "learning_rate": 0.00012749847343781803, "loss": 0.9958, "step": 1787 }, { "epoch": 0.7269770278511892, "grad_norm": 0.08385960757732391, "learning_rate": 0.00012745776511296562, "loss": 0.943, "step": 1788 }, { "epoch": 0.727383614555804, "grad_norm": 0.09703537821769714, "learning_rate": 0.00012741705678811317, "loss": 0.9854, "step": 1789 }, { "epoch": 0.7277902012604188, "grad_norm": 0.08761659264564514, "learning_rate": 0.00012737634846326075, "loss": 0.8797, "step": 1790 }, { "epoch": 0.7281967879650335, "grad_norm": 0.08612256497144699, "learning_rate": 0.0001273356401384083, "loss": 0.894, "step": 1791 }, { "epoch": 0.7286033746696483, "grad_norm": 0.09343304485082626, "learning_rate": 0.00012729493181355589, "loss": 0.969, "step": 1792 }, { "epoch": 0.7290099613742631, "grad_norm": 0.09733837842941284, "learning_rate": 0.00012725422348870344, "loss": 1.0479, "step": 1793 }, { "epoch": 0.7294165480788778, "grad_norm": 0.08351567387580872, "learning_rate": 0.000127213515163851, "loss": 0.9141, "step": 1794 }, { "epoch": 0.7298231347834926, "grad_norm": 0.09528695791959763, "learning_rate": 0.00012717280683899858, "loss": 1.0193, "step": 1795 }, { "epoch": 0.7302297214881074, "grad_norm": 0.0906892865896225, "learning_rate": 0.00012713209851414616, "loss": 0.9095, "step": 1796 }, { "epoch": 0.7306363081927221, "grad_norm": 119.45793151855469, "learning_rate": 0.00012709139018929374, "loss": 1.0114, "step": 1797 }, { "epoch": 0.7310428948973369, "grad_norm": 0.0933651253581047, "learning_rate": 0.0001270506818644413, "loss": 1.0666, "step": 1798 }, { "epoch": 0.7314494816019517, "grad_norm": 0.10169385373592377, "learning_rate": 0.00012700997353958885, "loss": 0.9892, "step": 1799 }, { "epoch": 0.7318560683065664, "grad_norm": 0.0868530198931694, "learning_rate": 0.00012696926521473643, "loss": 0.9162, "step": 1800 }, { "epoch": 0.7322626550111812, "grad_norm": 0.09074793756008148, "learning_rate": 0.00012692855688988398, "loss": 0.9388, "step": 1801 }, { "epoch": 0.7326692417157958, "grad_norm": 0.10199327766895294, "learning_rate": 0.00012688784856503156, "loss": 0.9585, "step": 1802 }, { "epoch": 0.7330758284204106, "grad_norm": 0.10722784698009491, "learning_rate": 0.00012684714024017912, "loss": 1.0226, "step": 1803 }, { "epoch": 0.7334824151250254, "grad_norm": 0.10113389045000076, "learning_rate": 0.0001268064319153267, "loss": 1.0593, "step": 1804 }, { "epoch": 0.7338890018296401, "grad_norm": 0.1125817522406578, "learning_rate": 0.00012676572359047425, "loss": 0.8962, "step": 1805 }, { "epoch": 0.7342955885342549, "grad_norm": 0.10177897661924362, "learning_rate": 0.0001267250152656218, "loss": 1.0323, "step": 1806 }, { "epoch": 0.7347021752388697, "grad_norm": 0.10272479057312012, "learning_rate": 0.00012668430694076941, "loss": 0.9947, "step": 1807 }, { "epoch": 0.7351087619434844, "grad_norm": 0.11395642906427383, "learning_rate": 0.00012664359861591697, "loss": 1.0144, "step": 1808 }, { "epoch": 0.7355153486480992, "grad_norm": 0.09565427899360657, "learning_rate": 0.00012660289029106452, "loss": 1.0052, "step": 1809 }, { "epoch": 0.735921935352714, "grad_norm": 0.09244798123836517, "learning_rate": 0.0001265621819662121, "loss": 0.8411, "step": 1810 }, { "epoch": 0.7363285220573287, "grad_norm": 0.08985315263271332, "learning_rate": 0.00012652147364135966, "loss": 1.0301, "step": 1811 }, { "epoch": 0.7367351087619435, "grad_norm": 0.09606938809156418, "learning_rate": 0.00012648076531650724, "loss": 1.0053, "step": 1812 }, { "epoch": 0.7371416954665583, "grad_norm": 0.10566183179616928, "learning_rate": 0.0001264400569916548, "loss": 0.9527, "step": 1813 }, { "epoch": 0.737548282171173, "grad_norm": 0.10999652743339539, "learning_rate": 0.00012639934866680237, "loss": 1.0756, "step": 1814 }, { "epoch": 0.7379548688757878, "grad_norm": 0.09473931044340134, "learning_rate": 0.00012635864034194993, "loss": 0.94, "step": 1815 }, { "epoch": 0.7383614555804026, "grad_norm": 0.09815262258052826, "learning_rate": 0.0001263179320170975, "loss": 1.0436, "step": 1816 }, { "epoch": 0.7387680422850172, "grad_norm": 0.08889912813901901, "learning_rate": 0.00012627722369224506, "loss": 0.9368, "step": 1817 }, { "epoch": 0.739174628989632, "grad_norm": 0.09337257593870163, "learning_rate": 0.00012623651536739262, "loss": 1.0949, "step": 1818 }, { "epoch": 0.7395812156942468, "grad_norm": 0.09112720191478729, "learning_rate": 0.00012619580704254023, "loss": 1.0239, "step": 1819 }, { "epoch": 0.7399878023988615, "grad_norm": 0.0988708958029747, "learning_rate": 0.00012615509871768778, "loss": 1.0648, "step": 1820 }, { "epoch": 0.7403943891034763, "grad_norm": 0.09849932789802551, "learning_rate": 0.00012611439039283533, "loss": 0.9867, "step": 1821 }, { "epoch": 0.7408009758080911, "grad_norm": 0.09254156798124313, "learning_rate": 0.00012607368206798291, "loss": 0.9903, "step": 1822 }, { "epoch": 0.7412075625127058, "grad_norm": 0.0954776182770729, "learning_rate": 0.00012603297374313047, "loss": 1.0081, "step": 1823 }, { "epoch": 0.7416141492173206, "grad_norm": 0.08610807359218597, "learning_rate": 0.00012599226541827805, "loss": 0.9229, "step": 1824 }, { "epoch": 0.7420207359219354, "grad_norm": 0.0977591797709465, "learning_rate": 0.0001259515570934256, "loss": 0.9076, "step": 1825 }, { "epoch": 0.7424273226265501, "grad_norm": 0.0858481377363205, "learning_rate": 0.00012591084876857319, "loss": 0.8604, "step": 1826 }, { "epoch": 0.7428339093311649, "grad_norm": 0.09642601758241653, "learning_rate": 0.00012587014044372074, "loss": 1.0476, "step": 1827 }, { "epoch": 0.7432404960357797, "grad_norm": 0.08871784061193466, "learning_rate": 0.0001258294321188683, "loss": 0.9597, "step": 1828 }, { "epoch": 0.7436470827403944, "grad_norm": 0.10808097571134567, "learning_rate": 0.00012578872379401587, "loss": 1.1415, "step": 1829 }, { "epoch": 0.7440536694450092, "grad_norm": 0.09339917451143265, "learning_rate": 0.00012574801546916346, "loss": 0.9437, "step": 1830 }, { "epoch": 0.7444602561496239, "grad_norm": 0.08945673704147339, "learning_rate": 0.00012570730714431104, "loss": 0.9714, "step": 1831 }, { "epoch": 0.7448668428542387, "grad_norm": 0.0939527079463005, "learning_rate": 0.0001256665988194586, "loss": 0.9868, "step": 1832 }, { "epoch": 0.7452734295588535, "grad_norm": 0.09327416867017746, "learning_rate": 0.00012562589049460615, "loss": 1.0001, "step": 1833 }, { "epoch": 0.7456800162634681, "grad_norm": 0.10278622061014175, "learning_rate": 0.00012558518216975373, "loss": 1.0724, "step": 1834 }, { "epoch": 0.7460866029680829, "grad_norm": 0.09421471506357193, "learning_rate": 0.00012554447384490128, "loss": 1.0088, "step": 1835 }, { "epoch": 0.7464931896726977, "grad_norm": 0.1009073331952095, "learning_rate": 0.00012550376552004886, "loss": 1.0485, "step": 1836 }, { "epoch": 0.7468997763773124, "grad_norm": 0.09199651330709457, "learning_rate": 0.00012546305719519642, "loss": 0.9765, "step": 1837 }, { "epoch": 0.7473063630819272, "grad_norm": 0.09672168642282486, "learning_rate": 0.000125422348870344, "loss": 1.018, "step": 1838 }, { "epoch": 0.747712949786542, "grad_norm": 0.09036868065595627, "learning_rate": 0.00012538164054549155, "loss": 0.9067, "step": 1839 }, { "epoch": 0.7481195364911567, "grad_norm": 0.09706352651119232, "learning_rate": 0.0001253409322206391, "loss": 1.0439, "step": 1840 }, { "epoch": 0.7485261231957715, "grad_norm": 0.09940480440855026, "learning_rate": 0.00012530022389578669, "loss": 1.0936, "step": 1841 }, { "epoch": 0.7489327099003863, "grad_norm": 0.09489309787750244, "learning_rate": 0.00012525951557093427, "loss": 1.0606, "step": 1842 }, { "epoch": 0.749339296605001, "grad_norm": 0.07897097617387772, "learning_rate": 0.00012521880724608185, "loss": 0.8109, "step": 1843 }, { "epoch": 0.7497458833096158, "grad_norm": 0.09423919022083282, "learning_rate": 0.0001251780989212294, "loss": 1.0703, "step": 1844 }, { "epoch": 0.7501524700142306, "grad_norm": 0.09601794928312302, "learning_rate": 0.00012513739059637696, "loss": 0.9692, "step": 1845 }, { "epoch": 0.7505590567188453, "grad_norm": 0.09051002562046051, "learning_rate": 0.00012509668227152454, "loss": 0.9727, "step": 1846 }, { "epoch": 0.7509656434234601, "grad_norm": 0.09665656834840775, "learning_rate": 0.0001250559739466721, "loss": 1.0701, "step": 1847 }, { "epoch": 0.7513722301280749, "grad_norm": 0.08956587314605713, "learning_rate": 0.00012501526562181967, "loss": 0.9863, "step": 1848 }, { "epoch": 0.7517788168326895, "grad_norm": 0.09464751929044724, "learning_rate": 0.00012497455729696723, "loss": 1.043, "step": 1849 }, { "epoch": 0.7521854035373043, "grad_norm": 0.09246315807104111, "learning_rate": 0.0001249338489721148, "loss": 1.0306, "step": 1850 }, { "epoch": 0.7525919902419191, "grad_norm": 0.0943431407213211, "learning_rate": 0.00012489314064726236, "loss": 0.9251, "step": 1851 }, { "epoch": 0.7529985769465338, "grad_norm": 0.08852697908878326, "learning_rate": 0.00012485243232240992, "loss": 0.919, "step": 1852 }, { "epoch": 0.7534051636511486, "grad_norm": 0.08856131881475449, "learning_rate": 0.00012481172399755752, "loss": 0.9874, "step": 1853 }, { "epoch": 0.7538117503557634, "grad_norm": 0.08715582638978958, "learning_rate": 0.00012477101567270508, "loss": 0.9569, "step": 1854 }, { "epoch": 0.7542183370603781, "grad_norm": 0.1005750522017479, "learning_rate": 0.00012473030734785266, "loss": 1.118, "step": 1855 }, { "epoch": 0.7546249237649929, "grad_norm": 0.0848010703921318, "learning_rate": 0.00012468959902300021, "loss": 0.8808, "step": 1856 }, { "epoch": 0.7550315104696076, "grad_norm": 0.10509838908910751, "learning_rate": 0.00012464889069814777, "loss": 1.0019, "step": 1857 }, { "epoch": 0.7554380971742224, "grad_norm": 0.09729699045419693, "learning_rate": 0.00012460818237329535, "loss": 0.9275, "step": 1858 }, { "epoch": 0.7558446838788372, "grad_norm": 0.0901610478758812, "learning_rate": 0.0001245674740484429, "loss": 1.0285, "step": 1859 }, { "epoch": 0.7562512705834519, "grad_norm": 0.08691520988941193, "learning_rate": 0.00012452676572359048, "loss": 0.9524, "step": 1860 }, { "epoch": 0.7566578572880667, "grad_norm": 0.09559500962495804, "learning_rate": 0.00012448605739873804, "loss": 1.0781, "step": 1861 }, { "epoch": 0.7570644439926815, "grad_norm": 0.09581112861633301, "learning_rate": 0.00012444534907388562, "loss": 1.068, "step": 1862 }, { "epoch": 0.7574710306972962, "grad_norm": 0.10235914587974548, "learning_rate": 0.00012440464074903317, "loss": 1.078, "step": 1863 }, { "epoch": 0.757877617401911, "grad_norm": 0.09794023633003235, "learning_rate": 0.00012436393242418073, "loss": 1.0951, "step": 1864 }, { "epoch": 0.7582842041065257, "grad_norm": 0.08910951763391495, "learning_rate": 0.00012432322409932834, "loss": 1.002, "step": 1865 }, { "epoch": 0.7586907908111404, "grad_norm": 0.08909524232149124, "learning_rate": 0.0001242825157744759, "loss": 0.9027, "step": 1866 }, { "epoch": 0.7590973775157552, "grad_norm": 0.09639742970466614, "learning_rate": 0.00012424180744962347, "loss": 1.1356, "step": 1867 }, { "epoch": 0.75950396422037, "grad_norm": 0.08606995642185211, "learning_rate": 0.00012420109912477103, "loss": 0.8974, "step": 1868 }, { "epoch": 0.7599105509249847, "grad_norm": 0.09715355932712555, "learning_rate": 0.00012416039079991858, "loss": 1.078, "step": 1869 }, { "epoch": 0.7603171376295995, "grad_norm": 0.08933407068252563, "learning_rate": 0.00012411968247506616, "loss": 0.9177, "step": 1870 }, { "epoch": 0.7607237243342143, "grad_norm": 0.0859113335609436, "learning_rate": 0.00012407897415021372, "loss": 0.9703, "step": 1871 }, { "epoch": 0.761130311038829, "grad_norm": 0.09086931496858597, "learning_rate": 0.0001240382658253613, "loss": 1.0298, "step": 1872 }, { "epoch": 0.7615368977434438, "grad_norm": 0.09112663567066193, "learning_rate": 0.00012399755750050885, "loss": 0.9918, "step": 1873 }, { "epoch": 0.7619434844480586, "grad_norm": 0.09044841676950455, "learning_rate": 0.00012395684917565643, "loss": 0.9469, "step": 1874 }, { "epoch": 0.7623500711526733, "grad_norm": 0.08345028758049011, "learning_rate": 0.00012391614085080399, "loss": 0.879, "step": 1875 }, { "epoch": 0.7627566578572881, "grad_norm": 0.10249708592891693, "learning_rate": 0.00012387543252595157, "loss": 1.0247, "step": 1876 }, { "epoch": 0.7631632445619029, "grad_norm": 0.0914909839630127, "learning_rate": 0.00012383472420109915, "loss": 0.9341, "step": 1877 }, { "epoch": 0.7635698312665176, "grad_norm": 0.08616846054792404, "learning_rate": 0.0001237940158762467, "loss": 0.918, "step": 1878 }, { "epoch": 0.7639764179711324, "grad_norm": 0.0853181779384613, "learning_rate": 0.00012375330755139428, "loss": 0.8903, "step": 1879 }, { "epoch": 0.7643830046757472, "grad_norm": 0.0943385511636734, "learning_rate": 0.00012371259922654184, "loss": 1.0437, "step": 1880 }, { "epoch": 0.7647895913803618, "grad_norm": 0.08487629890441895, "learning_rate": 0.0001236718909016894, "loss": 0.9655, "step": 1881 }, { "epoch": 0.7651961780849766, "grad_norm": 0.09635015577077866, "learning_rate": 0.00012363118257683697, "loss": 1.0047, "step": 1882 }, { "epoch": 0.7656027647895913, "grad_norm": 0.09787151217460632, "learning_rate": 0.00012359047425198453, "loss": 1.1058, "step": 1883 }, { "epoch": 0.7660093514942061, "grad_norm": 0.10217342525720596, "learning_rate": 0.0001235497659271321, "loss": 1.1407, "step": 1884 }, { "epoch": 0.7664159381988209, "grad_norm": 0.08770392835140228, "learning_rate": 0.00012350905760227966, "loss": 0.8851, "step": 1885 }, { "epoch": 0.7668225249034356, "grad_norm": 0.08978156745433807, "learning_rate": 0.00012346834927742724, "loss": 1.0138, "step": 1886 }, { "epoch": 0.7672291116080504, "grad_norm": 0.09110313653945923, "learning_rate": 0.0001234276409525748, "loss": 0.8872, "step": 1887 }, { "epoch": 0.7676356983126652, "grad_norm": 0.0905870720744133, "learning_rate": 0.00012338693262772238, "loss": 0.9819, "step": 1888 }, { "epoch": 0.7680422850172799, "grad_norm": 0.09418340027332306, "learning_rate": 0.00012334622430286996, "loss": 1.0486, "step": 1889 }, { "epoch": 0.7684488717218947, "grad_norm": 0.09140585362911224, "learning_rate": 0.00012330551597801751, "loss": 0.9463, "step": 1890 }, { "epoch": 0.7688554584265095, "grad_norm": 0.08720141649246216, "learning_rate": 0.0001232648076531651, "loss": 0.9833, "step": 1891 }, { "epoch": 0.7692620451311242, "grad_norm": 0.09206419438123703, "learning_rate": 0.00012322409932831265, "loss": 0.9554, "step": 1892 }, { "epoch": 0.769668631835739, "grad_norm": 0.09324870258569717, "learning_rate": 0.0001231833910034602, "loss": 1.0703, "step": 1893 }, { "epoch": 0.7700752185403538, "grad_norm": 0.0868481770157814, "learning_rate": 0.00012314268267860778, "loss": 0.9374, "step": 1894 }, { "epoch": 0.7704818052449685, "grad_norm": 0.0907289981842041, "learning_rate": 0.00012310197435375534, "loss": 1.0148, "step": 1895 }, { "epoch": 0.7708883919495833, "grad_norm": 0.09804967790842056, "learning_rate": 0.00012306126602890292, "loss": 1.0541, "step": 1896 }, { "epoch": 0.771294978654198, "grad_norm": 0.09168083965778351, "learning_rate": 0.00012302055770405047, "loss": 0.9363, "step": 1897 }, { "epoch": 0.7717015653588127, "grad_norm": 0.09078045189380646, "learning_rate": 0.00012297984937919805, "loss": 1.0683, "step": 1898 }, { "epoch": 0.7721081520634275, "grad_norm": 0.08930620551109314, "learning_rate": 0.00012293914105434564, "loss": 0.9659, "step": 1899 }, { "epoch": 0.7725147387680423, "grad_norm": 0.09990911930799484, "learning_rate": 0.0001228984327294932, "loss": 1.1301, "step": 1900 }, { "epoch": 0.772921325472657, "grad_norm": 0.08707278221845627, "learning_rate": 0.00012285772440464077, "loss": 0.919, "step": 1901 }, { "epoch": 0.7733279121772718, "grad_norm": 0.0829259380698204, "learning_rate": 0.00012281701607978833, "loss": 0.8806, "step": 1902 }, { "epoch": 0.7737344988818866, "grad_norm": 0.09047359228134155, "learning_rate": 0.0001227763077549359, "loss": 0.9478, "step": 1903 }, { "epoch": 0.7741410855865013, "grad_norm": 0.08373644202947617, "learning_rate": 0.00012273559943008346, "loss": 0.8043, "step": 1904 }, { "epoch": 0.7745476722911161, "grad_norm": 0.08893609046936035, "learning_rate": 0.00012269489110523101, "loss": 0.9849, "step": 1905 }, { "epoch": 0.7749542589957309, "grad_norm": 0.09218044579029083, "learning_rate": 0.0001226541827803786, "loss": 0.8856, "step": 1906 }, { "epoch": 0.7753608457003456, "grad_norm": 0.08562997728586197, "learning_rate": 0.00012261347445552615, "loss": 0.8934, "step": 1907 }, { "epoch": 0.7757674324049604, "grad_norm": 0.09083337336778641, "learning_rate": 0.00012257276613067373, "loss": 1.0118, "step": 1908 }, { "epoch": 0.7761740191095751, "grad_norm": 0.08940907567739487, "learning_rate": 0.00012253205780582129, "loss": 0.9138, "step": 1909 }, { "epoch": 0.7765806058141899, "grad_norm": 0.09383655339479446, "learning_rate": 0.00012249134948096887, "loss": 0.9727, "step": 1910 }, { "epoch": 0.7769871925188047, "grad_norm": 0.10210567712783813, "learning_rate": 0.00012245064115611645, "loss": 1.0628, "step": 1911 }, { "epoch": 0.7773937792234193, "grad_norm": 0.09698057174682617, "learning_rate": 0.000122409932831264, "loss": 1.0863, "step": 1912 }, { "epoch": 0.7778003659280341, "grad_norm": 0.0920233353972435, "learning_rate": 0.00012236922450641158, "loss": 0.9377, "step": 1913 }, { "epoch": 0.7782069526326489, "grad_norm": 0.08810736984014511, "learning_rate": 0.00012232851618155914, "loss": 0.9152, "step": 1914 }, { "epoch": 0.7786135393372636, "grad_norm": 0.0930081456899643, "learning_rate": 0.0001222878078567067, "loss": 0.9493, "step": 1915 }, { "epoch": 0.7790201260418784, "grad_norm": 0.09303618222475052, "learning_rate": 0.00012224709953185427, "loss": 0.9638, "step": 1916 }, { "epoch": 0.7794267127464932, "grad_norm": 0.09462623298168182, "learning_rate": 0.00012220639120700183, "loss": 1.0028, "step": 1917 }, { "epoch": 0.7798332994511079, "grad_norm": 0.08258619159460068, "learning_rate": 0.0001221656828821494, "loss": 0.934, "step": 1918 }, { "epoch": 0.7802398861557227, "grad_norm": 0.0895158126950264, "learning_rate": 0.00012212497455729696, "loss": 1.0514, "step": 1919 }, { "epoch": 0.7806464728603375, "grad_norm": 0.0877576693892479, "learning_rate": 0.00012208426623244454, "loss": 0.906, "step": 1920 }, { "epoch": 0.7810530595649522, "grad_norm": 0.08271359652280807, "learning_rate": 0.0001220435579075921, "loss": 0.8446, "step": 1921 }, { "epoch": 0.781459646269567, "grad_norm": 0.09866933524608612, "learning_rate": 0.00012200284958273969, "loss": 1.0477, "step": 1922 }, { "epoch": 0.7818662329741818, "grad_norm": 0.0881706029176712, "learning_rate": 0.00012196214125788725, "loss": 0.9121, "step": 1923 }, { "epoch": 0.7822728196787965, "grad_norm": 0.08888103812932968, "learning_rate": 0.00012192143293303481, "loss": 0.9179, "step": 1924 }, { "epoch": 0.7826794063834113, "grad_norm": 0.08678455650806427, "learning_rate": 0.00012188072460818238, "loss": 0.8909, "step": 1925 }, { "epoch": 0.7830859930880261, "grad_norm": 0.08965113759040833, "learning_rate": 0.00012184001628332995, "loss": 0.9483, "step": 1926 }, { "epoch": 0.7834925797926408, "grad_norm": 0.09123353660106659, "learning_rate": 0.00012179930795847752, "loss": 0.9552, "step": 1927 }, { "epoch": 0.7838991664972556, "grad_norm": 0.08975458890199661, "learning_rate": 0.00012175859963362508, "loss": 0.9155, "step": 1928 }, { "epoch": 0.7843057532018703, "grad_norm": 0.09666619449853897, "learning_rate": 0.00012171789130877265, "loss": 0.9306, "step": 1929 }, { "epoch": 0.784712339906485, "grad_norm": 0.10168115049600601, "learning_rate": 0.00012167718298392022, "loss": 1.0875, "step": 1930 }, { "epoch": 0.7851189266110998, "grad_norm": 0.09472563117742538, "learning_rate": 0.00012163647465906777, "loss": 1.0703, "step": 1931 }, { "epoch": 0.7855255133157146, "grad_norm": 0.09953609853982925, "learning_rate": 0.00012159576633421534, "loss": 1.0868, "step": 1932 }, { "epoch": 0.7859321000203293, "grad_norm": 0.0972660630941391, "learning_rate": 0.00012155505800936291, "loss": 0.9368, "step": 1933 }, { "epoch": 0.7863386867249441, "grad_norm": 0.08514858037233353, "learning_rate": 0.0001215143496845105, "loss": 0.9114, "step": 1934 }, { "epoch": 0.7867452734295588, "grad_norm": 0.08503813296556473, "learning_rate": 0.00012147364135965806, "loss": 0.9315, "step": 1935 }, { "epoch": 0.7871518601341736, "grad_norm": 0.09241003543138504, "learning_rate": 0.00012143293303480563, "loss": 0.9656, "step": 1936 }, { "epoch": 0.7875584468387884, "grad_norm": 0.08896864950656891, "learning_rate": 0.00012139222470995319, "loss": 1.0197, "step": 1937 }, { "epoch": 0.7879650335434031, "grad_norm": 0.0944843515753746, "learning_rate": 0.00012135151638510076, "loss": 1.0548, "step": 1938 }, { "epoch": 0.7883716202480179, "grad_norm": 0.09366561472415924, "learning_rate": 0.00012131080806024833, "loss": 1.0353, "step": 1939 }, { "epoch": 0.7887782069526327, "grad_norm": 0.094940185546875, "learning_rate": 0.0001212700997353959, "loss": 0.9467, "step": 1940 }, { "epoch": 0.7891847936572474, "grad_norm": 0.08708861470222473, "learning_rate": 0.00012122939141054346, "loss": 0.9762, "step": 1941 }, { "epoch": 0.7895913803618622, "grad_norm": 0.09528307616710663, "learning_rate": 0.00012118868308569102, "loss": 0.9989, "step": 1942 }, { "epoch": 0.789997967066477, "grad_norm": 0.09655644744634628, "learning_rate": 0.00012114797476083858, "loss": 1.0961, "step": 1943 }, { "epoch": 0.7904045537710916, "grad_norm": 0.09209448844194412, "learning_rate": 0.00012110726643598615, "loss": 0.9271, "step": 1944 }, { "epoch": 0.7908111404757064, "grad_norm": 0.10271915793418884, "learning_rate": 0.00012106655811113375, "loss": 1.1772, "step": 1945 }, { "epoch": 0.7912177271803212, "grad_norm": 0.08724693953990936, "learning_rate": 0.00012102584978628131, "loss": 1.0258, "step": 1946 }, { "epoch": 0.7916243138849359, "grad_norm": 0.09265289455652237, "learning_rate": 0.00012098514146142887, "loss": 0.9987, "step": 1947 }, { "epoch": 0.7920309005895507, "grad_norm": 0.08839931339025497, "learning_rate": 0.00012094443313657644, "loss": 0.9955, "step": 1948 }, { "epoch": 0.7924374872941655, "grad_norm": 0.0918072909116745, "learning_rate": 0.000120903724811724, "loss": 0.966, "step": 1949 }, { "epoch": 0.7928440739987802, "grad_norm": 0.09354092925786972, "learning_rate": 0.00012086301648687157, "loss": 1.0129, "step": 1950 }, { "epoch": 0.793250660703395, "grad_norm": 0.0950416848063469, "learning_rate": 0.00012082230816201914, "loss": 1.1276, "step": 1951 }, { "epoch": 0.7936572474080098, "grad_norm": 0.08683070540428162, "learning_rate": 0.00012078159983716671, "loss": 0.8137, "step": 1952 }, { "epoch": 0.7940638341126245, "grad_norm": 0.0931641161441803, "learning_rate": 0.00012074089151231427, "loss": 1.0094, "step": 1953 }, { "epoch": 0.7944704208172393, "grad_norm": 0.09602310508489609, "learning_rate": 0.00012070018318746183, "loss": 1.0643, "step": 1954 }, { "epoch": 0.7948770075218541, "grad_norm": 0.08961457759141922, "learning_rate": 0.0001206594748626094, "loss": 0.9338, "step": 1955 }, { "epoch": 0.7952835942264688, "grad_norm": 0.09515352547168732, "learning_rate": 0.00012061876653775696, "loss": 1.0151, "step": 1956 }, { "epoch": 0.7956901809310836, "grad_norm": 0.08361717313528061, "learning_rate": 0.00012057805821290456, "loss": 0.9363, "step": 1957 }, { "epoch": 0.7960967676356984, "grad_norm": 0.09745500981807709, "learning_rate": 0.00012053734988805213, "loss": 1.0578, "step": 1958 }, { "epoch": 0.796503354340313, "grad_norm": 0.09218847006559372, "learning_rate": 0.00012049664156319968, "loss": 0.9899, "step": 1959 }, { "epoch": 0.7969099410449278, "grad_norm": 0.08713049441576004, "learning_rate": 0.00012045593323834725, "loss": 0.9283, "step": 1960 }, { "epoch": 0.7973165277495425, "grad_norm": 0.09556014090776443, "learning_rate": 0.00012041522491349482, "loss": 1.0288, "step": 1961 }, { "epoch": 0.7977231144541573, "grad_norm": 0.09281028062105179, "learning_rate": 0.00012037451658864238, "loss": 0.9812, "step": 1962 }, { "epoch": 0.7981297011587721, "grad_norm": 0.10029471665620804, "learning_rate": 0.00012033380826378995, "loss": 1.1787, "step": 1963 }, { "epoch": 0.7985362878633868, "grad_norm": 0.08875210583209991, "learning_rate": 0.00012029309993893752, "loss": 0.9432, "step": 1964 }, { "epoch": 0.7989428745680016, "grad_norm": 0.09191716462373734, "learning_rate": 0.00012025239161408509, "loss": 1.0301, "step": 1965 }, { "epoch": 0.7993494612726164, "grad_norm": 0.09936721622943878, "learning_rate": 0.00012021168328923264, "loss": 1.0008, "step": 1966 }, { "epoch": 0.7997560479772311, "grad_norm": 0.09157298505306244, "learning_rate": 0.00012017097496438021, "loss": 1.0065, "step": 1967 }, { "epoch": 0.8001626346818459, "grad_norm": 0.0955449789762497, "learning_rate": 0.0001201302666395278, "loss": 1.1102, "step": 1968 }, { "epoch": 0.8005692213864607, "grad_norm": 0.10182837396860123, "learning_rate": 0.00012008955831467537, "loss": 1.1306, "step": 1969 }, { "epoch": 0.8009758080910754, "grad_norm": 0.09424961358308792, "learning_rate": 0.00012004884998982292, "loss": 0.953, "step": 1970 }, { "epoch": 0.8013823947956902, "grad_norm": 0.09814165532588959, "learning_rate": 0.00012000814166497049, "loss": 1.1101, "step": 1971 }, { "epoch": 0.801788981500305, "grad_norm": 0.09669219702482224, "learning_rate": 0.00011996743334011806, "loss": 0.9598, "step": 1972 }, { "epoch": 0.8021955682049197, "grad_norm": 0.08699534088373184, "learning_rate": 0.00011992672501526563, "loss": 0.8809, "step": 1973 }, { "epoch": 0.8026021549095345, "grad_norm": 0.08246033638715744, "learning_rate": 0.0001198860166904132, "loss": 0.9572, "step": 1974 }, { "epoch": 0.8030087416141493, "grad_norm": 0.08641736209392548, "learning_rate": 0.00011984530836556076, "loss": 0.9201, "step": 1975 }, { "epoch": 0.8034153283187639, "grad_norm": 0.09478481858968735, "learning_rate": 0.00011980460004070833, "loss": 0.9875, "step": 1976 }, { "epoch": 0.8038219150233787, "grad_norm": 0.09944069385528564, "learning_rate": 0.0001197638917158559, "loss": 1.01, "step": 1977 }, { "epoch": 0.8042285017279935, "grad_norm": 0.09341408312320709, "learning_rate": 0.00011972318339100345, "loss": 0.9655, "step": 1978 }, { "epoch": 0.8046350884326082, "grad_norm": 0.09643781185150146, "learning_rate": 0.00011968247506615102, "loss": 0.9764, "step": 1979 }, { "epoch": 0.805041675137223, "grad_norm": 0.08750199526548386, "learning_rate": 0.00011964176674129861, "loss": 0.9561, "step": 1980 }, { "epoch": 0.8054482618418378, "grad_norm": 0.09079190343618393, "learning_rate": 0.00011960105841644618, "loss": 0.9068, "step": 1981 }, { "epoch": 0.8058548485464525, "grad_norm": 0.08659665286540985, "learning_rate": 0.00011956035009159374, "loss": 0.9414, "step": 1982 }, { "epoch": 0.8062614352510673, "grad_norm": 0.08911033719778061, "learning_rate": 0.0001195196417667413, "loss": 0.9566, "step": 1983 }, { "epoch": 0.8066680219556821, "grad_norm": 0.09575940668582916, "learning_rate": 0.00011947893344188887, "loss": 0.9836, "step": 1984 }, { "epoch": 0.8070746086602968, "grad_norm": 0.09464087337255478, "learning_rate": 0.00011943822511703644, "loss": 1.0854, "step": 1985 }, { "epoch": 0.8074811953649116, "grad_norm": 0.09153340756893158, "learning_rate": 0.000119397516792184, "loss": 1.0362, "step": 1986 }, { "epoch": 0.8078877820695263, "grad_norm": 0.09390057623386383, "learning_rate": 0.00011935680846733157, "loss": 0.9829, "step": 1987 }, { "epoch": 0.8082943687741411, "grad_norm": 0.09109530597925186, "learning_rate": 0.00011931610014247914, "loss": 0.9472, "step": 1988 }, { "epoch": 0.8087009554787559, "grad_norm": 0.09163598716259003, "learning_rate": 0.0001192753918176267, "loss": 1.0141, "step": 1989 }, { "epoch": 0.8091075421833706, "grad_norm": 0.09769194573163986, "learning_rate": 0.00011923468349277426, "loss": 1.0536, "step": 1990 }, { "epoch": 0.8095141288879854, "grad_norm": 0.09787027537822723, "learning_rate": 0.00011919397516792186, "loss": 1.0384, "step": 1991 }, { "epoch": 0.8099207155926001, "grad_norm": 0.09753425419330597, "learning_rate": 0.00011915326684306943, "loss": 0.9425, "step": 1992 }, { "epoch": 0.8103273022972148, "grad_norm": 0.0975935086607933, "learning_rate": 0.000119112558518217, "loss": 0.9755, "step": 1993 }, { "epoch": 0.8107338890018296, "grad_norm": 0.09270279854536057, "learning_rate": 0.00011907185019336455, "loss": 0.9501, "step": 1994 }, { "epoch": 0.8111404757064444, "grad_norm": 0.09884528815746307, "learning_rate": 0.00011903114186851212, "loss": 0.986, "step": 1995 }, { "epoch": 0.8115470624110591, "grad_norm": 0.09327102452516556, "learning_rate": 0.00011899043354365968, "loss": 0.9258, "step": 1996 }, { "epoch": 0.8119536491156739, "grad_norm": 0.09072525054216385, "learning_rate": 0.00011894972521880725, "loss": 0.8975, "step": 1997 }, { "epoch": 0.8123602358202887, "grad_norm": 0.09030098468065262, "learning_rate": 0.00011890901689395482, "loss": 0.9163, "step": 1998 }, { "epoch": 0.8127668225249034, "grad_norm": 0.09437917172908783, "learning_rate": 0.00011886830856910239, "loss": 1.0101, "step": 1999 }, { "epoch": 0.8131734092295182, "grad_norm": 0.092490553855896, "learning_rate": 0.00011882760024424995, "loss": 0.9865, "step": 2000 }, { "epoch": 0.813579995934133, "grad_norm": 0.08658891916275024, "learning_rate": 0.00011878689191939751, "loss": 0.878, "step": 2001 }, { "epoch": 0.8139865826387477, "grad_norm": 0.08677167445421219, "learning_rate": 0.00011874618359454508, "loss": 0.9192, "step": 2002 }, { "epoch": 0.8143931693433625, "grad_norm": 0.09392429143190384, "learning_rate": 0.00011870547526969267, "loss": 1.023, "step": 2003 }, { "epoch": 0.8147997560479773, "grad_norm": 0.09641133248806, "learning_rate": 0.00011866476694484024, "loss": 1.0088, "step": 2004 }, { "epoch": 0.815206342752592, "grad_norm": 0.0969158262014389, "learning_rate": 0.0001186240586199878, "loss": 1.0194, "step": 2005 }, { "epoch": 0.8156129294572068, "grad_norm": 0.09078145772218704, "learning_rate": 0.00011858335029513536, "loss": 0.9206, "step": 2006 }, { "epoch": 0.8160195161618216, "grad_norm": 0.0925087034702301, "learning_rate": 0.00011854264197028293, "loss": 1.0193, "step": 2007 }, { "epoch": 0.8164261028664362, "grad_norm": 0.08056949079036713, "learning_rate": 0.0001185019336454305, "loss": 0.8271, "step": 2008 }, { "epoch": 0.816832689571051, "grad_norm": 0.08815829455852509, "learning_rate": 0.00011846122532057806, "loss": 0.9061, "step": 2009 }, { "epoch": 0.8172392762756658, "grad_norm": 0.0914953425526619, "learning_rate": 0.00011842051699572563, "loss": 1.058, "step": 2010 }, { "epoch": 0.8176458629802805, "grad_norm": 0.09589337557554245, "learning_rate": 0.0001183798086708732, "loss": 1.0244, "step": 2011 }, { "epoch": 0.8180524496848953, "grad_norm": 0.09444423019886017, "learning_rate": 0.00011833910034602077, "loss": 1.007, "step": 2012 }, { "epoch": 0.81845903638951, "grad_norm": 0.0973111018538475, "learning_rate": 0.00011829839202116832, "loss": 1.1077, "step": 2013 }, { "epoch": 0.8188656230941248, "grad_norm": 0.09395329654216766, "learning_rate": 0.00011825768369631591, "loss": 0.9592, "step": 2014 }, { "epoch": 0.8192722097987396, "grad_norm": 1295.30712890625, "learning_rate": 0.00011821697537146348, "loss": 0.9503, "step": 2015 }, { "epoch": 0.8196787965033543, "grad_norm": 0.09247137606143951, "learning_rate": 0.00011817626704661105, "loss": 0.9733, "step": 2016 }, { "epoch": 0.8200853832079691, "grad_norm": 0.09796010702848434, "learning_rate": 0.0001181355587217586, "loss": 1.0192, "step": 2017 }, { "epoch": 0.8204919699125839, "grad_norm": 0.11081703752279282, "learning_rate": 0.00011809485039690617, "loss": 1.0732, "step": 2018 }, { "epoch": 0.8208985566171986, "grad_norm": 0.1036204919219017, "learning_rate": 0.00011805414207205374, "loss": 1.0547, "step": 2019 }, { "epoch": 0.8213051433218134, "grad_norm": 0.09200289845466614, "learning_rate": 0.0001180134337472013, "loss": 0.8751, "step": 2020 }, { "epoch": 0.8217117300264282, "grad_norm": 0.09226184338331223, "learning_rate": 0.00011797272542234887, "loss": 0.8352, "step": 2021 }, { "epoch": 0.8221183167310429, "grad_norm": 0.09109731018543243, "learning_rate": 0.00011793201709749644, "loss": 0.8986, "step": 2022 }, { "epoch": 0.8225249034356577, "grad_norm": 0.0922974944114685, "learning_rate": 0.00011789130877264401, "loss": 0.9166, "step": 2023 }, { "epoch": 0.8229314901402724, "grad_norm": 0.09215478599071503, "learning_rate": 0.00011785060044779158, "loss": 0.9764, "step": 2024 }, { "epoch": 0.8233380768448871, "grad_norm": 0.09852897375822067, "learning_rate": 0.00011780989212293913, "loss": 1.0164, "step": 2025 }, { "epoch": 0.8237446635495019, "grad_norm": 0.08374243229627609, "learning_rate": 0.00011776918379808673, "loss": 0.947, "step": 2026 }, { "epoch": 0.8241512502541167, "grad_norm": 0.09260760992765427, "learning_rate": 0.0001177284754732343, "loss": 1.0338, "step": 2027 }, { "epoch": 0.8245578369587314, "grad_norm": 0.09498609602451324, "learning_rate": 0.00011768776714838186, "loss": 1.1077, "step": 2028 }, { "epoch": 0.8249644236633462, "grad_norm": 0.0914779007434845, "learning_rate": 0.00011764705882352942, "loss": 0.9499, "step": 2029 }, { "epoch": 0.825371010367961, "grad_norm": 0.09603306651115417, "learning_rate": 0.00011760635049867698, "loss": 0.9652, "step": 2030 }, { "epoch": 0.8257775970725757, "grad_norm": 0.09358631074428558, "learning_rate": 0.00011756564217382455, "loss": 0.9593, "step": 2031 }, { "epoch": 0.8261841837771905, "grad_norm": 0.08833447843790054, "learning_rate": 0.00011752493384897212, "loss": 0.9582, "step": 2032 }, { "epoch": 0.8265907704818053, "grad_norm": 0.09237752854824066, "learning_rate": 0.00011748422552411969, "loss": 0.9893, "step": 2033 }, { "epoch": 0.82699735718642, "grad_norm": 0.09216301888227463, "learning_rate": 0.00011744351719926725, "loss": 1.0804, "step": 2034 }, { "epoch": 0.8274039438910348, "grad_norm": 0.09359832108020782, "learning_rate": 0.00011740280887441482, "loss": 1.0516, "step": 2035 }, { "epoch": 0.8278105305956496, "grad_norm": 0.08962893486022949, "learning_rate": 0.00011736210054956237, "loss": 0.9592, "step": 2036 }, { "epoch": 0.8282171173002643, "grad_norm": 0.08905650675296783, "learning_rate": 0.00011732139222470997, "loss": 1.0088, "step": 2037 }, { "epoch": 0.8286237040048791, "grad_norm": 0.09745819866657257, "learning_rate": 0.00011728068389985754, "loss": 1.0656, "step": 2038 }, { "epoch": 0.8290302907094937, "grad_norm": 0.09807167202234268, "learning_rate": 0.0001172399755750051, "loss": 1.0324, "step": 2039 }, { "epoch": 0.8294368774141085, "grad_norm": 0.094350166618824, "learning_rate": 0.00011719926725015267, "loss": 0.9412, "step": 2040 }, { "epoch": 0.8298434641187233, "grad_norm": 0.10582345724105835, "learning_rate": 0.00011715855892530023, "loss": 1.0602, "step": 2041 }, { "epoch": 0.830250050823338, "grad_norm": 0.0957132950425148, "learning_rate": 0.0001171178506004478, "loss": 1.0394, "step": 2042 }, { "epoch": 0.8306566375279528, "grad_norm": 0.09163911640644073, "learning_rate": 0.00011707714227559536, "loss": 0.966, "step": 2043 }, { "epoch": 0.8310632242325676, "grad_norm": 0.08863722532987595, "learning_rate": 0.00011703643395074293, "loss": 1.0096, "step": 2044 }, { "epoch": 0.8314698109371823, "grad_norm": 0.0891941711306572, "learning_rate": 0.0001169957256258905, "loss": 0.9694, "step": 2045 }, { "epoch": 0.8318763976417971, "grad_norm": 0.097014419734478, "learning_rate": 0.00011695501730103806, "loss": 1.022, "step": 2046 }, { "epoch": 0.8322829843464119, "grad_norm": 0.09055501222610474, "learning_rate": 0.00011691430897618563, "loss": 0.9859, "step": 2047 }, { "epoch": 0.8326895710510266, "grad_norm": 0.09768117219209671, "learning_rate": 0.00011687360065133319, "loss": 1.1293, "step": 2048 }, { "epoch": 0.8330961577556414, "grad_norm": 0.09261562675237656, "learning_rate": 0.00011683289232648078, "loss": 0.9639, "step": 2049 }, { "epoch": 0.8335027444602562, "grad_norm": 0.09100788086652756, "learning_rate": 0.00011679218400162835, "loss": 0.9564, "step": 2050 }, { "epoch": 0.8339093311648709, "grad_norm": 0.08779970556497574, "learning_rate": 0.00011675147567677592, "loss": 0.9273, "step": 2051 }, { "epoch": 0.8343159178694857, "grad_norm": 0.09557755291461945, "learning_rate": 0.00011671076735192348, "loss": 1.0858, "step": 2052 }, { "epoch": 0.8347225045741005, "grad_norm": 0.09423234313726425, "learning_rate": 0.00011667005902707104, "loss": 0.9901, "step": 2053 }, { "epoch": 0.8351290912787152, "grad_norm": 0.08855794370174408, "learning_rate": 0.0001166293507022186, "loss": 0.9226, "step": 2054 }, { "epoch": 0.83553567798333, "grad_norm": 0.09653773158788681, "learning_rate": 0.00011658864237736617, "loss": 1.0381, "step": 2055 }, { "epoch": 0.8359422646879447, "grad_norm": 0.08749355375766754, "learning_rate": 0.00011654793405251374, "loss": 0.964, "step": 2056 }, { "epoch": 0.8363488513925594, "grad_norm": 0.09068714082241058, "learning_rate": 0.00011650722572766131, "loss": 0.9222, "step": 2057 }, { "epoch": 0.8367554380971742, "grad_norm": 0.09309016168117523, "learning_rate": 0.00011646651740280888, "loss": 0.9706, "step": 2058 }, { "epoch": 0.837162024801789, "grad_norm": 0.08905037492513657, "learning_rate": 0.00011642580907795644, "loss": 0.925, "step": 2059 }, { "epoch": 0.8375686115064037, "grad_norm": 0.09465768933296204, "learning_rate": 0.00011638510075310403, "loss": 0.9834, "step": 2060 }, { "epoch": 0.8379751982110185, "grad_norm": 0.09916462749242783, "learning_rate": 0.00011634439242825159, "loss": 1.1026, "step": 2061 }, { "epoch": 0.8383817849156333, "grad_norm": 0.10114342719316483, "learning_rate": 0.00011630368410339916, "loss": 1.1066, "step": 2062 }, { "epoch": 0.838788371620248, "grad_norm": 0.09570196270942688, "learning_rate": 0.00011626297577854673, "loss": 0.9225, "step": 2063 }, { "epoch": 0.8391949583248628, "grad_norm": 0.09646128118038177, "learning_rate": 0.0001162222674536943, "loss": 0.8873, "step": 2064 }, { "epoch": 0.8396015450294775, "grad_norm": 0.08988897502422333, "learning_rate": 0.00011618155912884185, "loss": 0.8496, "step": 2065 }, { "epoch": 0.8400081317340923, "grad_norm": 0.09540429711341858, "learning_rate": 0.00011614085080398942, "loss": 0.9864, "step": 2066 }, { "epoch": 0.8404147184387071, "grad_norm": 0.09202653169631958, "learning_rate": 0.00011610014247913699, "loss": 0.9331, "step": 2067 }, { "epoch": 0.8408213051433218, "grad_norm": 0.0908489003777504, "learning_rate": 0.00011605943415428455, "loss": 0.9212, "step": 2068 }, { "epoch": 0.8412278918479366, "grad_norm": 0.08785940706729889, "learning_rate": 0.00011601872582943212, "loss": 0.8732, "step": 2069 }, { "epoch": 0.8416344785525514, "grad_norm": 0.09724607318639755, "learning_rate": 0.00011597801750457969, "loss": 1.0057, "step": 2070 }, { "epoch": 0.842041065257166, "grad_norm": 0.09459156543016434, "learning_rate": 0.00011593730917972726, "loss": 1.0179, "step": 2071 }, { "epoch": 0.8424476519617808, "grad_norm": 0.09080464392900467, "learning_rate": 0.00011589660085487484, "loss": 0.9499, "step": 2072 }, { "epoch": 0.8428542386663956, "grad_norm": 0.0882730633020401, "learning_rate": 0.0001158558925300224, "loss": 0.9109, "step": 2073 }, { "epoch": 0.8432608253710103, "grad_norm": 0.08633995056152344, "learning_rate": 0.00011581518420516997, "loss": 1.0101, "step": 2074 }, { "epoch": 0.8436674120756251, "grad_norm": 0.08903708308935165, "learning_rate": 0.00011577447588031754, "loss": 0.9705, "step": 2075 }, { "epoch": 0.8440739987802399, "grad_norm": 0.09651392698287964, "learning_rate": 0.0001157337675554651, "loss": 1.1204, "step": 2076 }, { "epoch": 0.8444805854848546, "grad_norm": 0.08830486238002777, "learning_rate": 0.00011569305923061266, "loss": 0.918, "step": 2077 }, { "epoch": 0.8448871721894694, "grad_norm": 0.09067387878894806, "learning_rate": 0.00011565235090576023, "loss": 0.9139, "step": 2078 }, { "epoch": 0.8452937588940842, "grad_norm": 0.08710314333438873, "learning_rate": 0.0001156116425809078, "loss": 0.8869, "step": 2079 }, { "epoch": 0.8457003455986989, "grad_norm": 0.09208957850933075, "learning_rate": 0.00011557093425605536, "loss": 0.9318, "step": 2080 }, { "epoch": 0.8461069323033137, "grad_norm": 0.09385235607624054, "learning_rate": 0.00011553022593120293, "loss": 1.0103, "step": 2081 }, { "epoch": 0.8465135190079285, "grad_norm": 0.09042852371931076, "learning_rate": 0.0001154895176063505, "loss": 0.9433, "step": 2082 }, { "epoch": 0.8469201057125432, "grad_norm": 0.09457480907440186, "learning_rate": 0.00011544880928149808, "loss": 0.9826, "step": 2083 }, { "epoch": 0.847326692417158, "grad_norm": 0.09332386404275894, "learning_rate": 0.00011540810095664565, "loss": 1.0264, "step": 2084 }, { "epoch": 0.8477332791217728, "grad_norm": 0.09104622900485992, "learning_rate": 0.00011536739263179322, "loss": 0.997, "step": 2085 }, { "epoch": 0.8481398658263875, "grad_norm": 0.08547891676425934, "learning_rate": 0.00011532668430694078, "loss": 0.9314, "step": 2086 }, { "epoch": 0.8485464525310022, "grad_norm": 0.08525467664003372, "learning_rate": 0.00011528597598208835, "loss": 0.9128, "step": 2087 }, { "epoch": 0.848953039235617, "grad_norm": 0.08956707268953323, "learning_rate": 0.0001152452676572359, "loss": 0.9383, "step": 2088 }, { "epoch": 0.8493596259402317, "grad_norm": 0.09025274217128754, "learning_rate": 0.00011520455933238347, "loss": 0.975, "step": 2089 }, { "epoch": 0.8497662126448465, "grad_norm": 0.09149152785539627, "learning_rate": 0.00011516385100753104, "loss": 0.9633, "step": 2090 }, { "epoch": 0.8501727993494613, "grad_norm": 0.0975874587893486, "learning_rate": 0.00011512314268267861, "loss": 1.0283, "step": 2091 }, { "epoch": 0.850579386054076, "grad_norm": 0.09499591588973999, "learning_rate": 0.00011508243435782618, "loss": 1.0041, "step": 2092 }, { "epoch": 0.8509859727586908, "grad_norm": 0.09098786115646362, "learning_rate": 0.00011504172603297374, "loss": 1.0112, "step": 2093 }, { "epoch": 0.8513925594633055, "grad_norm": 0.08904889225959778, "learning_rate": 0.00011500101770812131, "loss": 0.8774, "step": 2094 }, { "epoch": 0.8517991461679203, "grad_norm": 0.08806558698415756, "learning_rate": 0.00011496030938326889, "loss": 0.8791, "step": 2095 }, { "epoch": 0.8522057328725351, "grad_norm": 0.09309332817792892, "learning_rate": 0.00011491960105841646, "loss": 1.0235, "step": 2096 }, { "epoch": 0.8526123195771498, "grad_norm": 0.0923120453953743, "learning_rate": 0.00011487889273356403, "loss": 0.9926, "step": 2097 }, { "epoch": 0.8530189062817646, "grad_norm": 0.09120898693799973, "learning_rate": 0.0001148381844087116, "loss": 1.0387, "step": 2098 }, { "epoch": 0.8534254929863794, "grad_norm": 0.09038707613945007, "learning_rate": 0.00011479747608385916, "loss": 0.9998, "step": 2099 }, { "epoch": 0.8538320796909941, "grad_norm": 0.09675489366054535, "learning_rate": 0.00011475676775900672, "loss": 0.9872, "step": 2100 }, { "epoch": 0.8542386663956089, "grad_norm": 0.09282051771879196, "learning_rate": 0.00011471605943415428, "loss": 0.9191, "step": 2101 }, { "epoch": 0.8546452531002237, "grad_norm": 0.09295305609703064, "learning_rate": 0.00011467535110930185, "loss": 0.9979, "step": 2102 }, { "epoch": 0.8550518398048383, "grad_norm": 0.09186626225709915, "learning_rate": 0.00011463464278444942, "loss": 0.9737, "step": 2103 }, { "epoch": 0.8554584265094531, "grad_norm": 0.09875518828630447, "learning_rate": 0.00011459393445959699, "loss": 0.9602, "step": 2104 }, { "epoch": 0.8558650132140679, "grad_norm": 0.09007591754198074, "learning_rate": 0.00011455322613474456, "loss": 0.9516, "step": 2105 }, { "epoch": 0.8562715999186826, "grad_norm": 0.08967866748571396, "learning_rate": 0.00011451251780989214, "loss": 0.846, "step": 2106 }, { "epoch": 0.8566781866232974, "grad_norm": 0.08921065926551819, "learning_rate": 0.0001144718094850397, "loss": 0.912, "step": 2107 }, { "epoch": 0.8570847733279122, "grad_norm": 0.09793677181005478, "learning_rate": 0.00011443110116018727, "loss": 1.036, "step": 2108 }, { "epoch": 0.8574913600325269, "grad_norm": 0.08594641089439392, "learning_rate": 0.00011439039283533484, "loss": 0.9059, "step": 2109 }, { "epoch": 0.8578979467371417, "grad_norm": 0.09588687121868134, "learning_rate": 0.00011434968451048241, "loss": 1.0412, "step": 2110 }, { "epoch": 0.8583045334417565, "grad_norm": 0.09782074391841888, "learning_rate": 0.00011430897618562997, "loss": 0.9654, "step": 2111 }, { "epoch": 0.8587111201463712, "grad_norm": 0.0923461988568306, "learning_rate": 0.00011426826786077753, "loss": 0.8857, "step": 2112 }, { "epoch": 0.859117706850986, "grad_norm": 0.09952476620674133, "learning_rate": 0.0001142275595359251, "loss": 1.1011, "step": 2113 }, { "epoch": 0.8595242935556008, "grad_norm": 0.09214503318071365, "learning_rate": 0.00011418685121107266, "loss": 1.0602, "step": 2114 }, { "epoch": 0.8599308802602155, "grad_norm": 0.08914364874362946, "learning_rate": 0.00011414614288622023, "loss": 0.9854, "step": 2115 }, { "epoch": 0.8603374669648303, "grad_norm": 0.07836323231458664, "learning_rate": 0.0001141054345613678, "loss": 0.8843, "step": 2116 }, { "epoch": 0.8607440536694451, "grad_norm": 0.09170486778020859, "learning_rate": 0.00011406472623651537, "loss": 1.0271, "step": 2117 }, { "epoch": 0.8611506403740598, "grad_norm": 0.1003408208489418, "learning_rate": 0.00011402401791166295, "loss": 1.0277, "step": 2118 }, { "epoch": 0.8615572270786745, "grad_norm": 0.09307452291250229, "learning_rate": 0.00011398330958681052, "loss": 0.9288, "step": 2119 }, { "epoch": 0.8619638137832892, "grad_norm": 0.0927853137254715, "learning_rate": 0.00011394260126195808, "loss": 0.9514, "step": 2120 }, { "epoch": 0.862370400487904, "grad_norm": 0.08969385176897049, "learning_rate": 0.00011390189293710565, "loss": 0.9732, "step": 2121 }, { "epoch": 0.8627769871925188, "grad_norm": 0.09284186363220215, "learning_rate": 0.00011386118461225322, "loss": 0.9383, "step": 2122 }, { "epoch": 0.8631835738971335, "grad_norm": 0.09814801067113876, "learning_rate": 0.00011382047628740077, "loss": 1.1479, "step": 2123 }, { "epoch": 0.8635901606017483, "grad_norm": 0.09331676363945007, "learning_rate": 0.00011377976796254834, "loss": 1.0516, "step": 2124 }, { "epoch": 0.8639967473063631, "grad_norm": 0.1085168719291687, "learning_rate": 0.00011373905963769591, "loss": 1.0429, "step": 2125 }, { "epoch": 0.8644033340109778, "grad_norm": 0.084463931620121, "learning_rate": 0.00011369835131284348, "loss": 0.8698, "step": 2126 }, { "epoch": 0.8648099207155926, "grad_norm": 0.08983217924833298, "learning_rate": 0.00011365764298799104, "loss": 0.9252, "step": 2127 }, { "epoch": 0.8652165074202074, "grad_norm": 0.09315849840641022, "learning_rate": 0.00011361693466313861, "loss": 0.9502, "step": 2128 }, { "epoch": 0.8656230941248221, "grad_norm": 0.08930740505456924, "learning_rate": 0.00011357622633828619, "loss": 0.9571, "step": 2129 }, { "epoch": 0.8660296808294369, "grad_norm": 0.09786850959062576, "learning_rate": 0.00011353551801343376, "loss": 0.9578, "step": 2130 }, { "epoch": 0.8664362675340517, "grad_norm": 0.09109771996736526, "learning_rate": 0.00011349480968858133, "loss": 1.0178, "step": 2131 }, { "epoch": 0.8668428542386664, "grad_norm": 0.09116113185882568, "learning_rate": 0.0001134541013637289, "loss": 0.9807, "step": 2132 }, { "epoch": 0.8672494409432812, "grad_norm": 0.08628479391336441, "learning_rate": 0.00011341339303887646, "loss": 0.8486, "step": 2133 }, { "epoch": 0.867656027647896, "grad_norm": 0.08679687231779099, "learning_rate": 0.00011337268471402403, "loss": 0.9621, "step": 2134 }, { "epoch": 0.8680626143525106, "grad_norm": 0.08852676302194595, "learning_rate": 0.00011333197638917158, "loss": 1.0283, "step": 2135 }, { "epoch": 0.8684692010571254, "grad_norm": 0.0838993713259697, "learning_rate": 0.00011329126806431915, "loss": 0.9123, "step": 2136 }, { "epoch": 0.8688757877617402, "grad_norm": 0.09657544642686844, "learning_rate": 0.00011325055973946672, "loss": 1.0619, "step": 2137 }, { "epoch": 0.8692823744663549, "grad_norm": 0.0973362997174263, "learning_rate": 0.00011320985141461429, "loss": 1.0232, "step": 2138 }, { "epoch": 0.8696889611709697, "grad_norm": 0.09646733105182648, "learning_rate": 0.00011316914308976185, "loss": 0.972, "step": 2139 }, { "epoch": 0.8700955478755845, "grad_norm": 0.09988803416490555, "learning_rate": 0.00011312843476490942, "loss": 1.0555, "step": 2140 }, { "epoch": 0.8705021345801992, "grad_norm": 0.08326305449008942, "learning_rate": 0.000113087726440057, "loss": 0.8427, "step": 2141 }, { "epoch": 0.870908721284814, "grad_norm": 0.08908620476722717, "learning_rate": 0.00011304701811520457, "loss": 0.9304, "step": 2142 }, { "epoch": 0.8713153079894288, "grad_norm": 0.09493114799261093, "learning_rate": 0.00011300630979035214, "loss": 0.9481, "step": 2143 }, { "epoch": 0.8717218946940435, "grad_norm": 0.09405462443828583, "learning_rate": 0.0001129656014654997, "loss": 0.8995, "step": 2144 }, { "epoch": 0.8721284813986583, "grad_norm": 0.09000107645988464, "learning_rate": 0.00011292489314064727, "loss": 0.9969, "step": 2145 }, { "epoch": 0.872535068103273, "grad_norm": 0.08611016720533371, "learning_rate": 0.00011288418481579484, "loss": 0.9461, "step": 2146 }, { "epoch": 0.8729416548078878, "grad_norm": 0.09909865260124207, "learning_rate": 0.0001128434764909424, "loss": 1.0668, "step": 2147 }, { "epoch": 0.8733482415125026, "grad_norm": 0.09296669065952301, "learning_rate": 0.00011280276816608996, "loss": 1.0196, "step": 2148 }, { "epoch": 0.8737548282171173, "grad_norm": 0.09515411406755447, "learning_rate": 0.00011276205984123753, "loss": 0.9542, "step": 2149 }, { "epoch": 0.874161414921732, "grad_norm": 0.1016170084476471, "learning_rate": 0.0001127213515163851, "loss": 1.0545, "step": 2150 }, { "epoch": 0.8745680016263468, "grad_norm": 0.09408387541770935, "learning_rate": 0.00011268064319153267, "loss": 0.9852, "step": 2151 }, { "epoch": 0.8749745883309615, "grad_norm": 0.09204485267400742, "learning_rate": 0.00011263993486668025, "loss": 0.9893, "step": 2152 }, { "epoch": 0.8753811750355763, "grad_norm": 0.10141453891992569, "learning_rate": 0.00011259922654182782, "loss": 0.9496, "step": 2153 }, { "epoch": 0.8757877617401911, "grad_norm": 0.09088826179504395, "learning_rate": 0.00011255851821697538, "loss": 0.9073, "step": 2154 }, { "epoch": 0.8761943484448058, "grad_norm": 0.09122118353843689, "learning_rate": 0.00011251780989212295, "loss": 0.9927, "step": 2155 }, { "epoch": 0.8766009351494206, "grad_norm": 0.08325305581092834, "learning_rate": 0.00011247710156727052, "loss": 0.8863, "step": 2156 }, { "epoch": 0.8770075218540354, "grad_norm": 0.09161413460969925, "learning_rate": 0.00011243639324241809, "loss": 0.9652, "step": 2157 }, { "epoch": 0.8774141085586501, "grad_norm": 0.08764609694480896, "learning_rate": 0.00011239568491756565, "loss": 0.9851, "step": 2158 }, { "epoch": 0.8778206952632649, "grad_norm": 0.09217865765094757, "learning_rate": 0.00011235497659271321, "loss": 0.9546, "step": 2159 }, { "epoch": 0.8782272819678797, "grad_norm": 0.08746439218521118, "learning_rate": 0.00011231426826786078, "loss": 0.9115, "step": 2160 }, { "epoch": 0.8786338686724944, "grad_norm": 0.09703024476766586, "learning_rate": 0.00011227355994300834, "loss": 1.0464, "step": 2161 }, { "epoch": 0.8790404553771092, "grad_norm": 0.08776511996984482, "learning_rate": 0.00011223285161815591, "loss": 0.9828, "step": 2162 }, { "epoch": 0.879447042081724, "grad_norm": 0.09440065920352936, "learning_rate": 0.00011219214329330348, "loss": 1.0458, "step": 2163 }, { "epoch": 0.8798536287863387, "grad_norm": 0.08808255940675735, "learning_rate": 0.00011215143496845106, "loss": 0.9835, "step": 2164 }, { "epoch": 0.8802602154909535, "grad_norm": 0.09321518242359161, "learning_rate": 0.00011211072664359863, "loss": 0.9592, "step": 2165 }, { "epoch": 0.8806668021955683, "grad_norm": 0.08485117554664612, "learning_rate": 0.0001120700183187462, "loss": 0.8574, "step": 2166 }, { "epoch": 0.8810733889001829, "grad_norm": 0.09101716428995132, "learning_rate": 0.00011202930999389376, "loss": 1.0562, "step": 2167 }, { "epoch": 0.8814799756047977, "grad_norm": 0.0868394672870636, "learning_rate": 0.00011198860166904133, "loss": 0.9244, "step": 2168 }, { "epoch": 0.8818865623094125, "grad_norm": 0.09465855360031128, "learning_rate": 0.0001119478933441889, "loss": 1.0169, "step": 2169 }, { "epoch": 0.8822931490140272, "grad_norm": 0.08937587589025497, "learning_rate": 0.00011190718501933645, "loss": 1.0126, "step": 2170 }, { "epoch": 0.882699735718642, "grad_norm": 0.09273424744606018, "learning_rate": 0.00011186647669448402, "loss": 0.9194, "step": 2171 }, { "epoch": 0.8831063224232567, "grad_norm": 0.09193231910467148, "learning_rate": 0.00011182576836963159, "loss": 1.0218, "step": 2172 }, { "epoch": 0.8835129091278715, "grad_norm": 0.09555093199014664, "learning_rate": 0.00011178506004477915, "loss": 0.9679, "step": 2173 }, { "epoch": 0.8839194958324863, "grad_norm": 0.09123765677213669, "learning_rate": 0.00011174435171992672, "loss": 0.9842, "step": 2174 }, { "epoch": 0.884326082537101, "grad_norm": 0.0927356630563736, "learning_rate": 0.0001117036433950743, "loss": 0.9448, "step": 2175 }, { "epoch": 0.8847326692417158, "grad_norm": 0.09687252342700958, "learning_rate": 0.00011166293507022187, "loss": 1.0458, "step": 2176 }, { "epoch": 0.8851392559463306, "grad_norm": 0.10106469690799713, "learning_rate": 0.00011162222674536944, "loss": 0.9754, "step": 2177 }, { "epoch": 0.8855458426509453, "grad_norm": 0.09762795269489288, "learning_rate": 0.000111581518420517, "loss": 0.922, "step": 2178 }, { "epoch": 0.8859524293555601, "grad_norm": 0.09456496685743332, "learning_rate": 0.00011154081009566457, "loss": 0.9345, "step": 2179 }, { "epoch": 0.8863590160601749, "grad_norm": 0.09217999875545502, "learning_rate": 0.00011150010177081214, "loss": 1.0163, "step": 2180 }, { "epoch": 0.8867656027647896, "grad_norm": 0.0955888032913208, "learning_rate": 0.00011145939344595971, "loss": 0.9464, "step": 2181 }, { "epoch": 0.8871721894694043, "grad_norm": 0.09351805597543716, "learning_rate": 0.00011141868512110726, "loss": 0.9911, "step": 2182 }, { "epoch": 0.8875787761740191, "grad_norm": 0.08360351622104645, "learning_rate": 0.00011137797679625483, "loss": 0.8656, "step": 2183 }, { "epoch": 0.8879853628786338, "grad_norm": 0.09139275550842285, "learning_rate": 0.0001113372684714024, "loss": 0.9629, "step": 2184 }, { "epoch": 0.8883919495832486, "grad_norm": 0.0988682433962822, "learning_rate": 0.00011129656014654997, "loss": 0.9856, "step": 2185 }, { "epoch": 0.8887985362878634, "grad_norm": 0.098371222615242, "learning_rate": 0.00011125585182169753, "loss": 1.0566, "step": 2186 }, { "epoch": 0.8892051229924781, "grad_norm": 0.09045372158288956, "learning_rate": 0.00011121514349684511, "loss": 0.9015, "step": 2187 }, { "epoch": 0.8896117096970929, "grad_norm": 0.09395705908536911, "learning_rate": 0.00011117443517199268, "loss": 1.0059, "step": 2188 }, { "epoch": 0.8900182964017077, "grad_norm": 0.09204548597335815, "learning_rate": 0.00011113372684714025, "loss": 1.0135, "step": 2189 }, { "epoch": 0.8904248831063224, "grad_norm": 0.08476635068655014, "learning_rate": 0.00011109301852228782, "loss": 0.9098, "step": 2190 }, { "epoch": 0.8908314698109372, "grad_norm": 0.09019143879413605, "learning_rate": 0.00011105231019743539, "loss": 0.9087, "step": 2191 }, { "epoch": 0.891238056515552, "grad_norm": 0.0935545563697815, "learning_rate": 0.00011101160187258295, "loss": 1.0204, "step": 2192 }, { "epoch": 0.8916446432201667, "grad_norm": 0.09029703587293625, "learning_rate": 0.00011097089354773052, "loss": 0.9913, "step": 2193 }, { "epoch": 0.8920512299247815, "grad_norm": 0.0886225774884224, "learning_rate": 0.00011093018522287807, "loss": 0.9958, "step": 2194 }, { "epoch": 0.8924578166293963, "grad_norm": 0.09101995080709457, "learning_rate": 0.00011088947689802564, "loss": 0.918, "step": 2195 }, { "epoch": 0.892864403334011, "grad_norm": 0.10184985399246216, "learning_rate": 0.00011084876857317321, "loss": 1.1037, "step": 2196 }, { "epoch": 0.8932709900386258, "grad_norm": 0.09409435093402863, "learning_rate": 0.00011080806024832078, "loss": 0.9088, "step": 2197 }, { "epoch": 0.8936775767432404, "grad_norm": 0.09551674872636795, "learning_rate": 0.00011076735192346836, "loss": 1.0379, "step": 2198 }, { "epoch": 0.8940841634478552, "grad_norm": 0.08619996160268784, "learning_rate": 0.00011072664359861593, "loss": 0.9068, "step": 2199 }, { "epoch": 0.89449075015247, "grad_norm": 0.09373293071985245, "learning_rate": 0.0001106859352737635, "loss": 0.9394, "step": 2200 }, { "epoch": 0.8948973368570847, "grad_norm": 0.09360924363136292, "learning_rate": 0.00011064522694891106, "loss": 0.918, "step": 2201 }, { "epoch": 0.8953039235616995, "grad_norm": 0.08794824033975601, "learning_rate": 0.00011060451862405863, "loss": 0.9127, "step": 2202 }, { "epoch": 0.8957105102663143, "grad_norm": 0.09011366963386536, "learning_rate": 0.0001105638102992062, "loss": 0.9744, "step": 2203 }, { "epoch": 0.896117096970929, "grad_norm": 0.09070491790771484, "learning_rate": 0.00011052310197435376, "loss": 0.9182, "step": 2204 }, { "epoch": 0.8965236836755438, "grad_norm": 0.09090661257505417, "learning_rate": 0.00011048239364950133, "loss": 0.8662, "step": 2205 }, { "epoch": 0.8969302703801586, "grad_norm": 0.1035584807395935, "learning_rate": 0.00011044168532464889, "loss": 1.0132, "step": 2206 }, { "epoch": 0.8973368570847733, "grad_norm": 0.09471878409385681, "learning_rate": 0.00011040097699979645, "loss": 0.9183, "step": 2207 }, { "epoch": 0.8977434437893881, "grad_norm": 0.08386964350938797, "learning_rate": 0.00011036026867494402, "loss": 0.8727, "step": 2208 }, { "epoch": 0.8981500304940029, "grad_norm": 0.09777465462684631, "learning_rate": 0.00011031956035009159, "loss": 1.1244, "step": 2209 }, { "epoch": 0.8985566171986176, "grad_norm": 0.0950189158320427, "learning_rate": 0.00011027885202523917, "loss": 0.9494, "step": 2210 }, { "epoch": 0.8989632039032324, "grad_norm": 0.10297118872404099, "learning_rate": 0.00011023814370038674, "loss": 1.0345, "step": 2211 }, { "epoch": 0.8993697906078472, "grad_norm": 0.10186666250228882, "learning_rate": 0.0001101974353755343, "loss": 1.0064, "step": 2212 }, { "epoch": 0.8997763773124619, "grad_norm": 0.09332112222909927, "learning_rate": 0.00011015672705068187, "loss": 0.9915, "step": 2213 }, { "epoch": 0.9001829640170766, "grad_norm": 0.09262728691101074, "learning_rate": 0.00011011601872582944, "loss": 0.9909, "step": 2214 }, { "epoch": 0.9005895507216914, "grad_norm": 0.08695352077484131, "learning_rate": 0.00011007531040097701, "loss": 0.9143, "step": 2215 }, { "epoch": 0.9009961374263061, "grad_norm": 0.09473065286874771, "learning_rate": 0.00011003460207612458, "loss": 0.9297, "step": 2216 }, { "epoch": 0.9014027241309209, "grad_norm": 0.09609273076057434, "learning_rate": 0.00010999389375127213, "loss": 0.9357, "step": 2217 }, { "epoch": 0.9018093108355357, "grad_norm": 0.09273882955312729, "learning_rate": 0.0001099531854264197, "loss": 0.9215, "step": 2218 }, { "epoch": 0.9022158975401504, "grad_norm": 0.09666993468999863, "learning_rate": 0.00010991247710156727, "loss": 1.0015, "step": 2219 }, { "epoch": 0.9026224842447652, "grad_norm": 0.09521298855543137, "learning_rate": 0.00010987176877671483, "loss": 1.0203, "step": 2220 }, { "epoch": 0.90302907094938, "grad_norm": 0.08719142526388168, "learning_rate": 0.00010983106045186243, "loss": 0.8722, "step": 2221 }, { "epoch": 0.9034356576539947, "grad_norm": 0.09398588538169861, "learning_rate": 0.00010979035212700998, "loss": 1.0722, "step": 2222 }, { "epoch": 0.9038422443586095, "grad_norm": 0.09667246043682098, "learning_rate": 0.00010974964380215755, "loss": 1.0235, "step": 2223 }, { "epoch": 0.9042488310632242, "grad_norm": 0.08866921067237854, "learning_rate": 0.00010970893547730512, "loss": 0.9155, "step": 2224 }, { "epoch": 0.904655417767839, "grad_norm": 0.08643452823162079, "learning_rate": 0.00010966822715245268, "loss": 0.9939, "step": 2225 }, { "epoch": 0.9050620044724538, "grad_norm": 0.09741934388875961, "learning_rate": 0.00010962751882760025, "loss": 1.094, "step": 2226 }, { "epoch": 0.9054685911770685, "grad_norm": 0.09106621891260147, "learning_rate": 0.00010958681050274782, "loss": 0.9378, "step": 2227 }, { "epoch": 0.9058751778816833, "grad_norm": 0.09541244804859161, "learning_rate": 0.00010954610217789539, "loss": 1.0023, "step": 2228 }, { "epoch": 0.906281764586298, "grad_norm": 0.09381993860006332, "learning_rate": 0.00010950539385304294, "loss": 1.0045, "step": 2229 }, { "epoch": 0.9066883512909127, "grad_norm": 0.09603835642337799, "learning_rate": 0.00010946468552819051, "loss": 1.0988, "step": 2230 }, { "epoch": 0.9070949379955275, "grad_norm": 0.10151727497577667, "learning_rate": 0.00010942397720333808, "loss": 1.0537, "step": 2231 }, { "epoch": 0.9075015247001423, "grad_norm": 0.09192585945129395, "learning_rate": 0.00010938326887848564, "loss": 0.9195, "step": 2232 }, { "epoch": 0.907908111404757, "grad_norm": 0.09959591180086136, "learning_rate": 0.00010934256055363324, "loss": 1.0567, "step": 2233 }, { "epoch": 0.9083146981093718, "grad_norm": 0.09753983467817307, "learning_rate": 0.0001093018522287808, "loss": 0.9355, "step": 2234 }, { "epoch": 0.9087212848139866, "grad_norm": 0.10025233775377274, "learning_rate": 0.00010926114390392836, "loss": 0.9571, "step": 2235 }, { "epoch": 0.9091278715186013, "grad_norm": 0.09255032986402512, "learning_rate": 0.00010922043557907593, "loss": 1.0291, "step": 2236 }, { "epoch": 0.9095344582232161, "grad_norm": 0.09453842043876648, "learning_rate": 0.0001091797272542235, "loss": 0.9489, "step": 2237 }, { "epoch": 0.9099410449278309, "grad_norm": 0.09328801184892654, "learning_rate": 0.00010913901892937106, "loss": 1.0596, "step": 2238 }, { "epoch": 0.9103476316324456, "grad_norm": 0.08745749294757843, "learning_rate": 0.00010909831060451863, "loss": 0.846, "step": 2239 }, { "epoch": 0.9107542183370604, "grad_norm": 0.09585551172494888, "learning_rate": 0.0001090576022796662, "loss": 0.8888, "step": 2240 }, { "epoch": 0.9111608050416752, "grad_norm": 0.09437873214483261, "learning_rate": 0.00010901689395481375, "loss": 1.0954, "step": 2241 }, { "epoch": 0.9115673917462899, "grad_norm": 0.09190462529659271, "learning_rate": 0.00010897618562996132, "loss": 0.9484, "step": 2242 }, { "epoch": 0.9119739784509047, "grad_norm": 0.09598547965288162, "learning_rate": 0.00010893547730510889, "loss": 0.9765, "step": 2243 }, { "epoch": 0.9123805651555195, "grad_norm": 0.08472473174333572, "learning_rate": 0.00010889476898025648, "loss": 0.914, "step": 2244 }, { "epoch": 0.9127871518601341, "grad_norm": 0.09113691002130508, "learning_rate": 0.00010885406065540404, "loss": 1.0507, "step": 2245 }, { "epoch": 0.913193738564749, "grad_norm": 0.09340670704841614, "learning_rate": 0.0001088133523305516, "loss": 0.9908, "step": 2246 }, { "epoch": 0.9136003252693637, "grad_norm": 0.09673475474119186, "learning_rate": 0.00010877264400569917, "loss": 0.966, "step": 2247 }, { "epoch": 0.9140069119739784, "grad_norm": 0.09419335424900055, "learning_rate": 0.00010873193568084674, "loss": 0.9484, "step": 2248 }, { "epoch": 0.9144134986785932, "grad_norm": 0.09127677232027054, "learning_rate": 0.00010869122735599431, "loss": 0.9786, "step": 2249 }, { "epoch": 0.9148200853832079, "grad_norm": 0.09134241938591003, "learning_rate": 0.00010865051903114188, "loss": 0.9651, "step": 2250 }, { "epoch": 0.9152266720878227, "grad_norm": 0.08164233714342117, "learning_rate": 0.00010860981070628944, "loss": 0.8301, "step": 2251 }, { "epoch": 0.9156332587924375, "grad_norm": 0.09648903459310532, "learning_rate": 0.00010856910238143701, "loss": 0.9931, "step": 2252 }, { "epoch": 0.9160398454970522, "grad_norm": 0.09599076956510544, "learning_rate": 0.00010852839405658457, "loss": 1.1588, "step": 2253 }, { "epoch": 0.916446432201667, "grad_norm": 0.09624163806438446, "learning_rate": 0.00010848768573173213, "loss": 1.0291, "step": 2254 }, { "epoch": 0.9168530189062818, "grad_norm": 0.09379248321056366, "learning_rate": 0.0001084469774068797, "loss": 1.0189, "step": 2255 }, { "epoch": 0.9172596056108965, "grad_norm": 0.1004246398806572, "learning_rate": 0.0001084062690820273, "loss": 1.0819, "step": 2256 }, { "epoch": 0.9176661923155113, "grad_norm": 0.0896550863981247, "learning_rate": 0.00010836556075717485, "loss": 0.9514, "step": 2257 }, { "epoch": 0.9180727790201261, "grad_norm": 0.08566062897443771, "learning_rate": 0.00010832485243232242, "loss": 0.9827, "step": 2258 }, { "epoch": 0.9184793657247408, "grad_norm": 0.09392201900482178, "learning_rate": 0.00010828414410746998, "loss": 1.0118, "step": 2259 }, { "epoch": 0.9188859524293556, "grad_norm": 0.09124386310577393, "learning_rate": 0.00010824343578261755, "loss": 0.9892, "step": 2260 }, { "epoch": 0.9192925391339704, "grad_norm": 0.10101054608821869, "learning_rate": 0.00010820272745776512, "loss": 1.1112, "step": 2261 }, { "epoch": 0.919699125838585, "grad_norm": 0.0995619148015976, "learning_rate": 0.00010816201913291269, "loss": 0.9978, "step": 2262 }, { "epoch": 0.9201057125431998, "grad_norm": 0.10450758039951324, "learning_rate": 0.00010812131080806025, "loss": 1.0496, "step": 2263 }, { "epoch": 0.9205122992478146, "grad_norm": 0.08600231260061264, "learning_rate": 0.00010808060248320781, "loss": 0.9513, "step": 2264 }, { "epoch": 0.9209188859524293, "grad_norm": 0.09189002215862274, "learning_rate": 0.00010803989415835538, "loss": 0.9342, "step": 2265 }, { "epoch": 0.9213254726570441, "grad_norm": 0.0933215469121933, "learning_rate": 0.00010799918583350294, "loss": 0.9806, "step": 2266 }, { "epoch": 0.9217320593616589, "grad_norm": 0.09535648673772812, "learning_rate": 0.00010795847750865054, "loss": 1.045, "step": 2267 }, { "epoch": 0.9221386460662736, "grad_norm": 0.09350398182868958, "learning_rate": 0.0001079177691837981, "loss": 0.948, "step": 2268 }, { "epoch": 0.9225452327708884, "grad_norm": 0.09485659748315811, "learning_rate": 0.00010787706085894566, "loss": 1.0113, "step": 2269 }, { "epoch": 0.9229518194755032, "grad_norm": 0.08902882784605026, "learning_rate": 0.00010783635253409323, "loss": 0.9287, "step": 2270 }, { "epoch": 0.9233584061801179, "grad_norm": 0.09547727555036545, "learning_rate": 0.0001077956442092408, "loss": 0.9704, "step": 2271 }, { "epoch": 0.9237649928847327, "grad_norm": 0.0938442051410675, "learning_rate": 0.00010775493588438836, "loss": 1.0824, "step": 2272 }, { "epoch": 0.9241715795893475, "grad_norm": 0.09499689936637878, "learning_rate": 0.00010771422755953593, "loss": 1.0162, "step": 2273 }, { "epoch": 0.9245781662939622, "grad_norm": 0.08982361853122711, "learning_rate": 0.0001076735192346835, "loss": 1.0051, "step": 2274 }, { "epoch": 0.924984752998577, "grad_norm": 0.08913452923297882, "learning_rate": 0.00010763281090983107, "loss": 0.9585, "step": 2275 }, { "epoch": 0.9253913397031917, "grad_norm": 0.09322965890169144, "learning_rate": 0.00010759210258497862, "loss": 0.9951, "step": 2276 }, { "epoch": 0.9257979264078064, "grad_norm": 0.08852788060903549, "learning_rate": 0.00010755139426012619, "loss": 0.8826, "step": 2277 }, { "epoch": 0.9262045131124212, "grad_norm": 0.08934798091650009, "learning_rate": 0.00010751068593527376, "loss": 0.9592, "step": 2278 }, { "epoch": 0.9266110998170359, "grad_norm": 0.08754114806652069, "learning_rate": 0.00010746997761042135, "loss": 0.8947, "step": 2279 }, { "epoch": 0.9270176865216507, "grad_norm": 0.08998506516218185, "learning_rate": 0.00010742926928556892, "loss": 0.9905, "step": 2280 }, { "epoch": 0.9274242732262655, "grad_norm": 0.09599866718053818, "learning_rate": 0.00010738856096071647, "loss": 0.9931, "step": 2281 }, { "epoch": 0.9278308599308802, "grad_norm": 0.0930427685379982, "learning_rate": 0.00010734785263586404, "loss": 1.0059, "step": 2282 }, { "epoch": 0.928237446635495, "grad_norm": 0.0885154977440834, "learning_rate": 0.00010730714431101161, "loss": 0.9802, "step": 2283 }, { "epoch": 0.9286440333401098, "grad_norm": 0.0902063325047493, "learning_rate": 0.00010726643598615918, "loss": 0.9687, "step": 2284 }, { "epoch": 0.9290506200447245, "grad_norm": 0.08460281789302826, "learning_rate": 0.00010722572766130674, "loss": 0.8834, "step": 2285 }, { "epoch": 0.9294572067493393, "grad_norm": 0.0936511978507042, "learning_rate": 0.00010718501933645431, "loss": 1.0907, "step": 2286 }, { "epoch": 0.9298637934539541, "grad_norm": 0.09102717787027359, "learning_rate": 0.00010714431101160188, "loss": 0.9573, "step": 2287 }, { "epoch": 0.9302703801585688, "grad_norm": 0.08209431916475296, "learning_rate": 0.00010710360268674943, "loss": 0.79, "step": 2288 }, { "epoch": 0.9306769668631836, "grad_norm": 0.09181005507707596, "learning_rate": 0.000107062894361897, "loss": 1.0394, "step": 2289 }, { "epoch": 0.9310835535677984, "grad_norm": 0.09006737917661667, "learning_rate": 0.0001070221860370446, "loss": 0.976, "step": 2290 }, { "epoch": 0.9314901402724131, "grad_norm": 0.08806903660297394, "learning_rate": 0.00010698147771219216, "loss": 0.9429, "step": 2291 }, { "epoch": 0.9318967269770279, "grad_norm": 0.09663230180740356, "learning_rate": 0.00010694076938733973, "loss": 0.9936, "step": 2292 }, { "epoch": 0.9323033136816427, "grad_norm": 0.09236756712198257, "learning_rate": 0.00010690006106248728, "loss": 0.9775, "step": 2293 }, { "epoch": 0.9327099003862573, "grad_norm": 0.0875551626086235, "learning_rate": 0.00010685935273763485, "loss": 0.9222, "step": 2294 }, { "epoch": 0.9331164870908721, "grad_norm": 0.09144583344459534, "learning_rate": 0.00010681864441278242, "loss": 0.9166, "step": 2295 }, { "epoch": 0.9335230737954869, "grad_norm": 0.09605292975902557, "learning_rate": 0.00010677793608792999, "loss": 1.0085, "step": 2296 }, { "epoch": 0.9339296605001016, "grad_norm": 0.09013127535581589, "learning_rate": 0.00010673722776307755, "loss": 0.9473, "step": 2297 }, { "epoch": 0.9343362472047164, "grad_norm": 0.09012243151664734, "learning_rate": 0.00010669651943822512, "loss": 0.953, "step": 2298 }, { "epoch": 0.9347428339093312, "grad_norm": 0.0961398184299469, "learning_rate": 0.00010665581111337269, "loss": 1.0658, "step": 2299 }, { "epoch": 0.9351494206139459, "grad_norm": 0.09278837591409683, "learning_rate": 0.00010661510278852024, "loss": 0.9739, "step": 2300 }, { "epoch": 0.9355560073185607, "grad_norm": 0.08477824926376343, "learning_rate": 0.00010657439446366781, "loss": 0.9376, "step": 2301 }, { "epoch": 0.9359625940231754, "grad_norm": 0.08817529678344727, "learning_rate": 0.0001065336861388154, "loss": 0.9371, "step": 2302 }, { "epoch": 0.9363691807277902, "grad_norm": 0.09441924840211868, "learning_rate": 0.00010649297781396297, "loss": 0.8977, "step": 2303 }, { "epoch": 0.936775767432405, "grad_norm": 0.09430365264415741, "learning_rate": 0.00010645226948911053, "loss": 1.0525, "step": 2304 }, { "epoch": 0.9371823541370197, "grad_norm": 0.09169165045022964, "learning_rate": 0.0001064115611642581, "loss": 0.9261, "step": 2305 }, { "epoch": 0.9375889408416345, "grad_norm": 0.09943647682666779, "learning_rate": 0.00010637085283940566, "loss": 0.9956, "step": 2306 }, { "epoch": 0.9379955275462493, "grad_norm": 0.0941019132733345, "learning_rate": 0.00010633014451455323, "loss": 1.0029, "step": 2307 }, { "epoch": 0.938402114250864, "grad_norm": 0.08687194436788559, "learning_rate": 0.0001062894361897008, "loss": 0.9077, "step": 2308 }, { "epoch": 0.9388087009554787, "grad_norm": 0.09248825162649155, "learning_rate": 0.00010624872786484837, "loss": 1.0412, "step": 2309 }, { "epoch": 0.9392152876600935, "grad_norm": 0.09985529631376266, "learning_rate": 0.00010620801953999593, "loss": 1.0573, "step": 2310 }, { "epoch": 0.9396218743647082, "grad_norm": 0.09216563403606415, "learning_rate": 0.00010616731121514349, "loss": 0.9448, "step": 2311 }, { "epoch": 0.940028461069323, "grad_norm": 0.092438243329525, "learning_rate": 0.00010612660289029106, "loss": 0.9679, "step": 2312 }, { "epoch": 0.9404350477739378, "grad_norm": 0.0857539102435112, "learning_rate": 0.00010608589456543865, "loss": 0.8766, "step": 2313 }, { "epoch": 0.9408416344785525, "grad_norm": 0.09243746846914291, "learning_rate": 0.00010604518624058622, "loss": 0.9536, "step": 2314 }, { "epoch": 0.9412482211831673, "grad_norm": 0.08617236465215683, "learning_rate": 0.00010600447791573379, "loss": 0.9518, "step": 2315 }, { "epoch": 0.9416548078877821, "grad_norm": 0.08910689502954483, "learning_rate": 0.00010596376959088134, "loss": 0.9602, "step": 2316 }, { "epoch": 0.9420613945923968, "grad_norm": 0.08643607795238495, "learning_rate": 0.00010592306126602891, "loss": 0.8827, "step": 2317 }, { "epoch": 0.9424679812970116, "grad_norm": 0.0912124440073967, "learning_rate": 0.00010588235294117647, "loss": 0.9965, "step": 2318 }, { "epoch": 0.9428745680016264, "grad_norm": 0.09088627249002457, "learning_rate": 0.00010584164461632404, "loss": 0.9025, "step": 2319 }, { "epoch": 0.9432811547062411, "grad_norm": 0.09329286962747574, "learning_rate": 0.00010580093629147161, "loss": 0.9791, "step": 2320 }, { "epoch": 0.9436877414108559, "grad_norm": 0.10339915007352829, "learning_rate": 0.00010576022796661918, "loss": 1.0807, "step": 2321 }, { "epoch": 0.9440943281154707, "grad_norm": 0.09373354911804199, "learning_rate": 0.00010571951964176675, "loss": 0.9911, "step": 2322 }, { "epoch": 0.9445009148200854, "grad_norm": 0.10617939382791519, "learning_rate": 0.0001056788113169143, "loss": 1.0851, "step": 2323 }, { "epoch": 0.9449075015247002, "grad_norm": 0.09167637676000595, "learning_rate": 0.00010563810299206187, "loss": 0.9047, "step": 2324 }, { "epoch": 0.945314088229315, "grad_norm": 0.08472510427236557, "learning_rate": 0.00010559739466720946, "loss": 0.8727, "step": 2325 }, { "epoch": 0.9457206749339296, "grad_norm": 0.0884479507803917, "learning_rate": 0.00010555668634235703, "loss": 0.9784, "step": 2326 }, { "epoch": 0.9461272616385444, "grad_norm": 0.09533506631851196, "learning_rate": 0.0001055159780175046, "loss": 0.9641, "step": 2327 }, { "epoch": 0.9465338483431591, "grad_norm": 0.09487663954496384, "learning_rate": 0.00010547526969265215, "loss": 0.9594, "step": 2328 }, { "epoch": 0.9469404350477739, "grad_norm": 0.09608594328165054, "learning_rate": 0.00010543456136779972, "loss": 0.9552, "step": 2329 }, { "epoch": 0.9473470217523887, "grad_norm": 0.08777690678834915, "learning_rate": 0.00010539385304294729, "loss": 0.944, "step": 2330 }, { "epoch": 0.9477536084570034, "grad_norm": 0.09336721152067184, "learning_rate": 0.00010535314471809485, "loss": 0.9872, "step": 2331 }, { "epoch": 0.9481601951616182, "grad_norm": 0.0932617112994194, "learning_rate": 0.00010531243639324242, "loss": 1.0259, "step": 2332 }, { "epoch": 0.948566781866233, "grad_norm": 0.09936727583408356, "learning_rate": 0.00010527172806838999, "loss": 1.0559, "step": 2333 }, { "epoch": 0.9489733685708477, "grad_norm": 0.08607706427574158, "learning_rate": 0.00010523101974353756, "loss": 0.8735, "step": 2334 }, { "epoch": 0.9493799552754625, "grad_norm": 0.10083240270614624, "learning_rate": 0.00010519031141868511, "loss": 1.1199, "step": 2335 }, { "epoch": 0.9497865419800773, "grad_norm": 0.09380745142698288, "learning_rate": 0.0001051496030938327, "loss": 0.9708, "step": 2336 }, { "epoch": 0.950193128684692, "grad_norm": 0.09522271901369095, "learning_rate": 0.00010510889476898027, "loss": 0.9576, "step": 2337 }, { "epoch": 0.9505997153893068, "grad_norm": 0.08754262328147888, "learning_rate": 0.00010506818644412784, "loss": 0.8834, "step": 2338 }, { "epoch": 0.9510063020939216, "grad_norm": 0.09373676776885986, "learning_rate": 0.00010502747811927541, "loss": 1.0229, "step": 2339 }, { "epoch": 0.9514128887985362, "grad_norm": 0.09756851196289062, "learning_rate": 0.00010498676979442296, "loss": 1.0262, "step": 2340 }, { "epoch": 0.951819475503151, "grad_norm": 0.09419600665569305, "learning_rate": 0.00010494606146957053, "loss": 1.0049, "step": 2341 }, { "epoch": 0.9522260622077658, "grad_norm": 0.08849748224020004, "learning_rate": 0.0001049053531447181, "loss": 1.0045, "step": 2342 }, { "epoch": 0.9526326489123805, "grad_norm": 0.09651193022727966, "learning_rate": 0.00010486464481986567, "loss": 1.0209, "step": 2343 }, { "epoch": 0.9530392356169953, "grad_norm": 0.09986065328121185, "learning_rate": 0.00010482393649501323, "loss": 1.0789, "step": 2344 }, { "epoch": 0.9534458223216101, "grad_norm": 0.0957985445857048, "learning_rate": 0.0001047832281701608, "loss": 1.106, "step": 2345 }, { "epoch": 0.9538524090262248, "grad_norm": 0.1007857397198677, "learning_rate": 0.00010474251984530837, "loss": 1.027, "step": 2346 }, { "epoch": 0.9542589957308396, "grad_norm": 0.09330718219280243, "learning_rate": 0.00010470181152045592, "loss": 1.0046, "step": 2347 }, { "epoch": 0.9546655824354544, "grad_norm": 0.09503220021724701, "learning_rate": 0.00010466110319560352, "loss": 1.0119, "step": 2348 }, { "epoch": 0.9550721691400691, "grad_norm": 0.09526234120130539, "learning_rate": 0.00010462039487075109, "loss": 0.9898, "step": 2349 }, { "epoch": 0.9554787558446839, "grad_norm": 0.0942670926451683, "learning_rate": 0.00010457968654589865, "loss": 1.0538, "step": 2350 }, { "epoch": 0.9558853425492987, "grad_norm": 0.09694371372461319, "learning_rate": 0.00010453897822104621, "loss": 0.9101, "step": 2351 }, { "epoch": 0.9562919292539134, "grad_norm": 0.09850834310054779, "learning_rate": 0.00010449826989619377, "loss": 1.0476, "step": 2352 }, { "epoch": 0.9566985159585282, "grad_norm": 0.09078159183263779, "learning_rate": 0.00010445756157134134, "loss": 0.8798, "step": 2353 }, { "epoch": 0.957105102663143, "grad_norm": 0.09196247905492783, "learning_rate": 0.00010441685324648891, "loss": 0.9571, "step": 2354 }, { "epoch": 0.9575116893677577, "grad_norm": 0.09725657850503922, "learning_rate": 0.00010437614492163648, "loss": 1.0229, "step": 2355 }, { "epoch": 0.9579182760723725, "grad_norm": 0.09602061659097672, "learning_rate": 0.00010433543659678404, "loss": 0.9666, "step": 2356 }, { "epoch": 0.9583248627769871, "grad_norm": 0.09440819919109344, "learning_rate": 0.00010429472827193161, "loss": 1.0165, "step": 2357 }, { "epoch": 0.9587314494816019, "grad_norm": 0.09775765985250473, "learning_rate": 0.00010425401994707917, "loss": 1.0927, "step": 2358 }, { "epoch": 0.9591380361862167, "grad_norm": 0.10038933902978897, "learning_rate": 0.00010421331162222676, "loss": 1.1155, "step": 2359 }, { "epoch": 0.9595446228908314, "grad_norm": 0.09265521913766861, "learning_rate": 0.00010417260329737433, "loss": 0.9965, "step": 2360 }, { "epoch": 0.9599512095954462, "grad_norm": 0.09679180383682251, "learning_rate": 0.0001041318949725219, "loss": 0.9484, "step": 2361 }, { "epoch": 0.960357796300061, "grad_norm": 0.09756863862276077, "learning_rate": 0.00010409118664766946, "loss": 0.9929, "step": 2362 }, { "epoch": 0.9607643830046757, "grad_norm": 0.09271581470966339, "learning_rate": 0.00010405047832281702, "loss": 0.9717, "step": 2363 }, { "epoch": 0.9611709697092905, "grad_norm": 0.08519497513771057, "learning_rate": 0.00010400976999796459, "loss": 0.9248, "step": 2364 }, { "epoch": 0.9615775564139053, "grad_norm": 0.0930318683385849, "learning_rate": 0.00010396906167311215, "loss": 0.9269, "step": 2365 }, { "epoch": 0.96198414311852, "grad_norm": 0.0876484215259552, "learning_rate": 0.00010392835334825972, "loss": 0.8956, "step": 2366 }, { "epoch": 0.9623907298231348, "grad_norm": 0.10773497074842453, "learning_rate": 0.00010388764502340729, "loss": 1.0162, "step": 2367 }, { "epoch": 0.9627973165277496, "grad_norm": 0.10369701683521271, "learning_rate": 0.00010384693669855486, "loss": 1.0242, "step": 2368 }, { "epoch": 0.9632039032323643, "grad_norm": 0.09781001508235931, "learning_rate": 0.00010380622837370242, "loss": 0.9984, "step": 2369 }, { "epoch": 0.9636104899369791, "grad_norm": 0.09027720987796783, "learning_rate": 0.00010376552004884998, "loss": 0.9459, "step": 2370 }, { "epoch": 0.9640170766415939, "grad_norm": 0.0846111848950386, "learning_rate": 0.00010372481172399757, "loss": 0.8168, "step": 2371 }, { "epoch": 0.9644236633462085, "grad_norm": 0.09253893047571182, "learning_rate": 0.00010368410339914514, "loss": 1.036, "step": 2372 }, { "epoch": 0.9648302500508233, "grad_norm": 0.09075961261987686, "learning_rate": 0.00010364339507429271, "loss": 0.9765, "step": 2373 }, { "epoch": 0.9652368367554381, "grad_norm": 0.09227050840854645, "learning_rate": 0.00010360268674944028, "loss": 0.9577, "step": 2374 }, { "epoch": 0.9656434234600528, "grad_norm": 0.09381213039159775, "learning_rate": 0.00010356197842458783, "loss": 1.041, "step": 2375 }, { "epoch": 0.9660500101646676, "grad_norm": 0.08584290742874146, "learning_rate": 0.0001035212700997354, "loss": 0.7906, "step": 2376 }, { "epoch": 0.9664565968692824, "grad_norm": 0.09522596746683121, "learning_rate": 0.00010348056177488297, "loss": 0.9739, "step": 2377 }, { "epoch": 0.9668631835738971, "grad_norm": 0.09105250984430313, "learning_rate": 0.00010343985345003053, "loss": 0.943, "step": 2378 }, { "epoch": 0.9672697702785119, "grad_norm": 0.09327445179224014, "learning_rate": 0.0001033991451251781, "loss": 1.0486, "step": 2379 }, { "epoch": 0.9676763569831267, "grad_norm": 0.08443416655063629, "learning_rate": 0.00010335843680032567, "loss": 0.8889, "step": 2380 }, { "epoch": 0.9680829436877414, "grad_norm": 0.09366993606090546, "learning_rate": 0.00010331772847547324, "loss": 0.9585, "step": 2381 }, { "epoch": 0.9684895303923562, "grad_norm": 0.1025518849492073, "learning_rate": 0.00010327702015062082, "loss": 0.9062, "step": 2382 }, { "epoch": 0.9688961170969709, "grad_norm": 0.08948516100645065, "learning_rate": 0.00010323631182576838, "loss": 0.9477, "step": 2383 }, { "epoch": 0.9693027038015857, "grad_norm": 0.09162997454404831, "learning_rate": 0.00010319560350091595, "loss": 0.9069, "step": 2384 }, { "epoch": 0.9697092905062005, "grad_norm": 0.09584391862154007, "learning_rate": 0.00010315489517606352, "loss": 0.9816, "step": 2385 }, { "epoch": 0.9701158772108152, "grad_norm": 0.08747036010026932, "learning_rate": 0.00010311418685121109, "loss": 0.9845, "step": 2386 }, { "epoch": 0.97052246391543, "grad_norm": 0.09000515937805176, "learning_rate": 0.00010307347852635864, "loss": 0.8898, "step": 2387 }, { "epoch": 0.9709290506200448, "grad_norm": 0.0957585796713829, "learning_rate": 0.00010303277020150621, "loss": 1.0053, "step": 2388 }, { "epoch": 0.9713356373246594, "grad_norm": 0.0985213965177536, "learning_rate": 0.00010299206187665378, "loss": 1.0988, "step": 2389 }, { "epoch": 0.9717422240292742, "grad_norm": 0.09285228699445724, "learning_rate": 0.00010295135355180134, "loss": 0.957, "step": 2390 }, { "epoch": 0.972148810733889, "grad_norm": 0.08875738829374313, "learning_rate": 0.00010291064522694891, "loss": 0.9324, "step": 2391 }, { "epoch": 0.9725553974385037, "grad_norm": 0.09840039908885956, "learning_rate": 0.00010286993690209648, "loss": 0.9047, "step": 2392 }, { "epoch": 0.9729619841431185, "grad_norm": 0.09745080024003983, "learning_rate": 0.00010282922857724405, "loss": 1.0707, "step": 2393 }, { "epoch": 0.9733685708477333, "grad_norm": 0.09076414257287979, "learning_rate": 0.00010278852025239163, "loss": 0.947, "step": 2394 }, { "epoch": 0.973775157552348, "grad_norm": 0.08922093361616135, "learning_rate": 0.0001027478119275392, "loss": 0.8983, "step": 2395 }, { "epoch": 0.9741817442569628, "grad_norm": 0.09455031156539917, "learning_rate": 0.00010270710360268676, "loss": 1.0877, "step": 2396 }, { "epoch": 0.9745883309615776, "grad_norm": 0.09286132454872131, "learning_rate": 0.00010266639527783433, "loss": 0.98, "step": 2397 }, { "epoch": 0.9749949176661923, "grad_norm": 0.10121460258960724, "learning_rate": 0.00010262568695298189, "loss": 1.0906, "step": 2398 }, { "epoch": 0.9754015043708071, "grad_norm": 0.0891910120844841, "learning_rate": 0.00010258497862812945, "loss": 0.8889, "step": 2399 }, { "epoch": 0.9758080910754219, "grad_norm": 0.0938873440027237, "learning_rate": 0.00010254427030327702, "loss": 0.8787, "step": 2400 }, { "epoch": 0.9762146777800366, "grad_norm": 0.09117105603218079, "learning_rate": 0.00010250356197842459, "loss": 0.9053, "step": 2401 }, { "epoch": 0.9766212644846514, "grad_norm": 0.09840644896030426, "learning_rate": 0.00010246285365357216, "loss": 1.0462, "step": 2402 }, { "epoch": 0.9770278511892662, "grad_norm": 0.09379451721906662, "learning_rate": 0.00010242214532871972, "loss": 0.9617, "step": 2403 }, { "epoch": 0.9774344378938808, "grad_norm": 0.09142056852579117, "learning_rate": 0.00010238143700386729, "loss": 1.0022, "step": 2404 }, { "epoch": 0.9778410245984956, "grad_norm": 0.09325367957353592, "learning_rate": 0.00010234072867901487, "loss": 0.9356, "step": 2405 }, { "epoch": 0.9782476113031104, "grad_norm": 0.09714538604021072, "learning_rate": 0.00010230002035416244, "loss": 1.0685, "step": 2406 }, { "epoch": 0.9786541980077251, "grad_norm": 0.09502388536930084, "learning_rate": 0.00010225931202931001, "loss": 1.0158, "step": 2407 }, { "epoch": 0.9790607847123399, "grad_norm": 0.09626177698373795, "learning_rate": 0.00010221860370445758, "loss": 1.0249, "step": 2408 }, { "epoch": 0.9794673714169546, "grad_norm": 0.09790710359811783, "learning_rate": 0.00010217789537960514, "loss": 0.9974, "step": 2409 }, { "epoch": 0.9798739581215694, "grad_norm": 0.0907469391822815, "learning_rate": 0.0001021371870547527, "loss": 0.994, "step": 2410 }, { "epoch": 0.9802805448261842, "grad_norm": 0.10248905420303345, "learning_rate": 0.00010209647872990026, "loss": 1.0214, "step": 2411 }, { "epoch": 0.9806871315307989, "grad_norm": 0.09504317492246628, "learning_rate": 0.00010205577040504783, "loss": 1.0642, "step": 2412 }, { "epoch": 0.9810937182354137, "grad_norm": 0.09868543595075607, "learning_rate": 0.0001020150620801954, "loss": 1.0595, "step": 2413 }, { "epoch": 0.9815003049400285, "grad_norm": 0.08648547530174255, "learning_rate": 0.00010197435375534297, "loss": 0.9273, "step": 2414 }, { "epoch": 0.9819068916446432, "grad_norm": 0.0870203897356987, "learning_rate": 0.00010193364543049054, "loss": 0.8661, "step": 2415 }, { "epoch": 0.982313478349258, "grad_norm": 0.09689280390739441, "learning_rate": 0.0001018929371056381, "loss": 1.0179, "step": 2416 }, { "epoch": 0.9827200650538728, "grad_norm": 0.09497373551130295, "learning_rate": 0.00010185222878078568, "loss": 0.9292, "step": 2417 }, { "epoch": 0.9831266517584875, "grad_norm": 0.09194166213274002, "learning_rate": 0.00010181152045593325, "loss": 0.969, "step": 2418 }, { "epoch": 0.9835332384631023, "grad_norm": 0.08828569948673248, "learning_rate": 0.00010177081213108082, "loss": 0.8936, "step": 2419 }, { "epoch": 0.983939825167717, "grad_norm": 0.095185786485672, "learning_rate": 0.00010173010380622839, "loss": 0.9859, "step": 2420 }, { "epoch": 0.9843464118723317, "grad_norm": 0.09699594974517822, "learning_rate": 0.00010168939548137595, "loss": 1.0568, "step": 2421 }, { "epoch": 0.9847529985769465, "grad_norm": 0.09333425760269165, "learning_rate": 0.00010164868715652351, "loss": 0.9503, "step": 2422 }, { "epoch": 0.9851595852815613, "grad_norm": 0.0883539542555809, "learning_rate": 0.00010160797883167108, "loss": 0.9711, "step": 2423 }, { "epoch": 0.985566171986176, "grad_norm": 0.09544458985328674, "learning_rate": 0.00010156727050681864, "loss": 0.8668, "step": 2424 }, { "epoch": 0.9859727586907908, "grad_norm": 0.0979728177189827, "learning_rate": 0.00010152656218196621, "loss": 1.0685, "step": 2425 }, { "epoch": 0.9863793453954056, "grad_norm": 0.08907411992549896, "learning_rate": 0.00010148585385711378, "loss": 0.8947, "step": 2426 }, { "epoch": 0.9867859321000203, "grad_norm": 0.09532100707292557, "learning_rate": 0.00010144514553226135, "loss": 1.0793, "step": 2427 }, { "epoch": 0.9871925188046351, "grad_norm": 0.0916009321808815, "learning_rate": 0.00010140443720740893, "loss": 0.9604, "step": 2428 }, { "epoch": 0.9875991055092499, "grad_norm": 0.0960593968629837, "learning_rate": 0.0001013637288825565, "loss": 1.0012, "step": 2429 }, { "epoch": 0.9880056922138646, "grad_norm": 0.0948946550488472, "learning_rate": 0.00010132302055770406, "loss": 0.9555, "step": 2430 }, { "epoch": 0.9884122789184794, "grad_norm": 0.08670156449079514, "learning_rate": 0.00010128231223285163, "loss": 0.8863, "step": 2431 }, { "epoch": 0.9888188656230942, "grad_norm": 0.0870981365442276, "learning_rate": 0.0001012416039079992, "loss": 0.949, "step": 2432 }, { "epoch": 0.9892254523277089, "grad_norm": 0.09065506607294083, "learning_rate": 0.00010120089558314677, "loss": 1.0791, "step": 2433 }, { "epoch": 0.9896320390323237, "grad_norm": 0.08753534406423569, "learning_rate": 0.00010116018725829432, "loss": 0.8656, "step": 2434 }, { "epoch": 0.9900386257369383, "grad_norm": 0.08939878642559052, "learning_rate": 0.00010111947893344189, "loss": 0.8983, "step": 2435 }, { "epoch": 0.9904452124415531, "grad_norm": 0.09110575914382935, "learning_rate": 0.00010107877060858946, "loss": 0.8971, "step": 2436 }, { "epoch": 0.9908517991461679, "grad_norm": 0.08614566922187805, "learning_rate": 0.00010103806228373702, "loss": 0.9746, "step": 2437 }, { "epoch": 0.9912583858507826, "grad_norm": 0.09685923904180527, "learning_rate": 0.00010099735395888459, "loss": 0.9638, "step": 2438 }, { "epoch": 0.9916649725553974, "grad_norm": 0.10014784336090088, "learning_rate": 0.00010095664563403216, "loss": 1.0335, "step": 2439 }, { "epoch": 0.9920715592600122, "grad_norm": 0.09917939454317093, "learning_rate": 0.00010091593730917974, "loss": 1.0288, "step": 2440 }, { "epoch": 0.9924781459646269, "grad_norm": 0.09158805757761002, "learning_rate": 0.00010087522898432731, "loss": 0.9372, "step": 2441 }, { "epoch": 0.9928847326692417, "grad_norm": 0.09151756763458252, "learning_rate": 0.00010083452065947488, "loss": 1.0042, "step": 2442 }, { "epoch": 0.9932913193738565, "grad_norm": 0.09201864898204803, "learning_rate": 0.00010079381233462244, "loss": 0.937, "step": 2443 }, { "epoch": 0.9936979060784712, "grad_norm": 0.10031972825527191, "learning_rate": 0.00010075310400977001, "loss": 0.989, "step": 2444 }, { "epoch": 0.994104492783086, "grad_norm": 0.09593512862920761, "learning_rate": 0.00010071239568491756, "loss": 0.9259, "step": 2445 }, { "epoch": 0.9945110794877008, "grad_norm": 0.10088519006967545, "learning_rate": 0.00010067168736006513, "loss": 1.0888, "step": 2446 }, { "epoch": 0.9949176661923155, "grad_norm": 0.09052947163581848, "learning_rate": 0.0001006309790352127, "loss": 0.9643, "step": 2447 }, { "epoch": 0.9953242528969303, "grad_norm": 0.0943833664059639, "learning_rate": 0.00010059027071036027, "loss": 1.0308, "step": 2448 }, { "epoch": 0.9957308396015451, "grad_norm": 0.0929458737373352, "learning_rate": 0.00010054956238550783, "loss": 0.8993, "step": 2449 }, { "epoch": 0.9961374263061598, "grad_norm": 0.09643827378749847, "learning_rate": 0.0001005088540606554, "loss": 0.9708, "step": 2450 }, { "epoch": 0.9965440130107746, "grad_norm": 0.08925779908895493, "learning_rate": 0.00010046814573580298, "loss": 0.9209, "step": 2451 }, { "epoch": 0.9969505997153894, "grad_norm": 0.08630047738552094, "learning_rate": 0.00010042743741095055, "loss": 0.9324, "step": 2452 }, { "epoch": 0.997357186420004, "grad_norm": 0.10127938538789749, "learning_rate": 0.00010038672908609812, "loss": 0.9926, "step": 2453 }, { "epoch": 0.9977637731246188, "grad_norm": 0.09573110938072205, "learning_rate": 0.00010034602076124569, "loss": 0.9801, "step": 2454 }, { "epoch": 0.9981703598292336, "grad_norm": 0.0963260605931282, "learning_rate": 0.00010030531243639325, "loss": 0.98, "step": 2455 }, { "epoch": 0.9985769465338483, "grad_norm": 0.08414101600646973, "learning_rate": 0.00010026460411154082, "loss": 0.8676, "step": 2456 }, { "epoch": 0.9989835332384631, "grad_norm": 0.09320447593927383, "learning_rate": 0.00010022389578668838, "loss": 0.998, "step": 2457 }, { "epoch": 0.9993901199430779, "grad_norm": 0.09721797704696655, "learning_rate": 0.00010018318746183594, "loss": 1.0123, "step": 2458 }, { "epoch": 0.9997967066476926, "grad_norm": 0.08773447573184967, "learning_rate": 0.00010014247913698351, "loss": 0.9673, "step": 2459 }, { "epoch": 1.0, "grad_norm": 0.15718789398670197, "learning_rate": 0.00010010177081213108, "loss": 1.1286, "step": 2460 }, { "epoch": 1.0004065867046148, "grad_norm": 0.09029074758291245, "learning_rate": 0.00010006106248727865, "loss": 0.9905, "step": 2461 }, { "epoch": 1.0008131734092296, "grad_norm": 0.09984813630580902, "learning_rate": 0.00010002035416242621, "loss": 0.9981, "step": 2462 }, { "epoch": 1.0012197601138442, "grad_norm": 0.09808840602636337, "learning_rate": 9.997964583757378e-05, "loss": 1.0156, "step": 2463 }, { "epoch": 1.001626346818459, "grad_norm": 0.08917602896690369, "learning_rate": 9.993893751272135e-05, "loss": 0.944, "step": 2464 }, { "epoch": 1.0020329335230738, "grad_norm": 0.0943906158208847, "learning_rate": 9.989822918786892e-05, "loss": 0.9294, "step": 2465 }, { "epoch": 1.0024395202276886, "grad_norm": 0.09091315418481827, "learning_rate": 9.98575208630165e-05, "loss": 0.9707, "step": 2466 }, { "epoch": 1.0028461069323034, "grad_norm": 0.09035106003284454, "learning_rate": 9.981681253816407e-05, "loss": 0.9562, "step": 2467 }, { "epoch": 1.0032526936369182, "grad_norm": 0.09709779173135757, "learning_rate": 9.977610421331163e-05, "loss": 0.9287, "step": 2468 }, { "epoch": 1.0036592803415327, "grad_norm": 0.09063035994768143, "learning_rate": 9.973539588845919e-05, "loss": 0.9138, "step": 2469 }, { "epoch": 1.0040658670461475, "grad_norm": 0.09490003436803818, "learning_rate": 9.969468756360676e-05, "loss": 0.9475, "step": 2470 }, { "epoch": 1.0044724537507623, "grad_norm": 0.10134010761976242, "learning_rate": 9.965397923875432e-05, "loss": 1.0092, "step": 2471 }, { "epoch": 1.0048790404553771, "grad_norm": 0.09728873521089554, "learning_rate": 9.96132709139019e-05, "loss": 0.9498, "step": 2472 }, { "epoch": 1.005285627159992, "grad_norm": 0.09160648286342621, "learning_rate": 9.957256258904947e-05, "loss": 0.8707, "step": 2473 }, { "epoch": 1.0056922138646067, "grad_norm": 0.0939764603972435, "learning_rate": 9.953185426419704e-05, "loss": 0.9619, "step": 2474 }, { "epoch": 1.0060988005692213, "grad_norm": 0.08643637597560883, "learning_rate": 9.94911459393446e-05, "loss": 0.9377, "step": 2475 }, { "epoch": 1.006505387273836, "grad_norm": 0.09141729027032852, "learning_rate": 9.945043761449216e-05, "loss": 0.8859, "step": 2476 }, { "epoch": 1.006911973978451, "grad_norm": 0.09555509686470032, "learning_rate": 9.940972928963974e-05, "loss": 0.933, "step": 2477 }, { "epoch": 1.0073185606830657, "grad_norm": 0.0935022309422493, "learning_rate": 9.936902096478731e-05, "loss": 0.9368, "step": 2478 }, { "epoch": 1.0077251473876805, "grad_norm": 0.09959034621715546, "learning_rate": 9.932831263993488e-05, "loss": 0.974, "step": 2479 }, { "epoch": 1.0081317340922953, "grad_norm": 0.09246455878019333, "learning_rate": 9.928760431508245e-05, "loss": 0.9248, "step": 2480 }, { "epoch": 1.0085383207969099, "grad_norm": 0.10091500729322433, "learning_rate": 9.924689599023e-05, "loss": 1.122, "step": 2481 }, { "epoch": 1.0089449075015247, "grad_norm": 0.10083048790693283, "learning_rate": 9.920618766537757e-05, "loss": 1.0199, "step": 2482 }, { "epoch": 1.0093514942061395, "grad_norm": 0.09641805291175842, "learning_rate": 9.916547934052515e-05, "loss": 0.9971, "step": 2483 }, { "epoch": 1.0097580809107543, "grad_norm": 0.10362432897090912, "learning_rate": 9.912477101567272e-05, "loss": 0.9596, "step": 2484 }, { "epoch": 1.010164667615369, "grad_norm": 0.09050238877534866, "learning_rate": 9.908406269082028e-05, "loss": 0.9423, "step": 2485 }, { "epoch": 1.0105712543199838, "grad_norm": 0.10209590941667557, "learning_rate": 9.904335436596785e-05, "loss": 0.9366, "step": 2486 }, { "epoch": 1.0109778410245984, "grad_norm": 0.104631707072258, "learning_rate": 9.90026460411154e-05, "loss": 1.0476, "step": 2487 }, { "epoch": 1.0113844277292132, "grad_norm": 0.09572993963956833, "learning_rate": 9.896193771626297e-05, "loss": 1.0523, "step": 2488 }, { "epoch": 1.011791014433828, "grad_norm": 0.10640837252140045, "learning_rate": 9.892122939141055e-05, "loss": 1.1238, "step": 2489 }, { "epoch": 1.0121976011384428, "grad_norm": 0.09798834472894669, "learning_rate": 9.888052106655812e-05, "loss": 0.9597, "step": 2490 }, { "epoch": 1.0126041878430576, "grad_norm": 0.08913593739271164, "learning_rate": 9.883981274170569e-05, "loss": 0.9258, "step": 2491 }, { "epoch": 1.0130107745476722, "grad_norm": 0.09719277173280716, "learning_rate": 9.879910441685324e-05, "loss": 0.9812, "step": 2492 }, { "epoch": 1.013417361252287, "grad_norm": 0.09699688851833344, "learning_rate": 9.875839609200081e-05, "loss": 0.8946, "step": 2493 }, { "epoch": 1.0138239479569018, "grad_norm": 0.09061427414417267, "learning_rate": 9.871768776714838e-05, "loss": 0.9075, "step": 2494 }, { "epoch": 1.0142305346615166, "grad_norm": 0.08979996293783188, "learning_rate": 9.867697944229596e-05, "loss": 0.933, "step": 2495 }, { "epoch": 1.0146371213661314, "grad_norm": 0.09325064718723297, "learning_rate": 9.863627111744353e-05, "loss": 0.9604, "step": 2496 }, { "epoch": 1.0150437080707462, "grad_norm": 0.09821408241987228, "learning_rate": 9.85955627925911e-05, "loss": 1.0871, "step": 2497 }, { "epoch": 1.0154502947753608, "grad_norm": 0.09746625274419785, "learning_rate": 9.855485446773865e-05, "loss": 0.9304, "step": 2498 }, { "epoch": 1.0158568814799755, "grad_norm": 0.09508597105741501, "learning_rate": 9.851414614288622e-05, "loss": 0.9469, "step": 2499 }, { "epoch": 1.0162634681845903, "grad_norm": 0.10357919335365295, "learning_rate": 9.84734378180338e-05, "loss": 1.0272, "step": 2500 } ], "logging_steps": 1, "max_steps": 4918, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.991705590388761e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }