{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999390119943078, "eval_steps": 500, "global_step": 4918, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004065867046147591, "grad_norm": 0.22144322097301483, "learning_rate": 0.0, "loss": 1.3598, "step": 1 }, { "epoch": 0.0008131734092295182, "grad_norm": 0.199473574757576, "learning_rate": 4e-05, "loss": 1.405, "step": 2 }, { "epoch": 0.0012197601138442774, "grad_norm": 0.20758001506328583, "learning_rate": 8e-05, "loss": 1.2815, "step": 3 }, { "epoch": 0.0016263468184590363, "grad_norm": 0.21362783014774323, "learning_rate": 0.00012, "loss": 1.245, "step": 4 }, { "epoch": 0.0020329335230737954, "grad_norm": 0.24631692469120026, "learning_rate": 0.00016, "loss": 1.3086, "step": 5 }, { "epoch": 0.002439520227688555, "grad_norm": 0.20009225606918335, "learning_rate": 0.0002, "loss": 1.2443, "step": 6 }, { "epoch": 0.0028461069323033137, "grad_norm": 0.1735246330499649, "learning_rate": 0.00019995929167514756, "loss": 1.1878, "step": 7 }, { "epoch": 0.0032526936369180726, "grad_norm": 0.18904437124729156, "learning_rate": 0.00019991858335029514, "loss": 1.2478, "step": 8 }, { "epoch": 0.003659280341532832, "grad_norm": 0.1645248979330063, "learning_rate": 0.0001998778750254427, "loss": 1.2098, "step": 9 }, { "epoch": 0.004065867046147591, "grad_norm": 0.22034819424152374, "learning_rate": 0.00019983716670059028, "loss": 1.1183, "step": 10 }, { "epoch": 0.00447245375076235, "grad_norm": 0.3233634829521179, "learning_rate": 0.00019979645837573783, "loss": 1.0974, "step": 11 }, { "epoch": 0.00487904045537711, "grad_norm": 0.2592090368270874, "learning_rate": 0.00019975575005088542, "loss": 1.1611, "step": 12 }, { "epoch": 0.005285627159991868, "grad_norm": 0.14754348993301392, "learning_rate": 0.000199715041726033, "loss": 1.1932, "step": 13 }, { "epoch": 0.005692213864606627, "grad_norm": 0.09341374039649963, "learning_rate": 0.00019967433340118055, "loss": 1.348, "step": 14 }, { "epoch": 0.006098800569221387, "grad_norm": 0.10229193419218063, "learning_rate": 0.00019963362507632813, "loss": 1.0927, "step": 15 }, { "epoch": 0.006505387273836145, "grad_norm": 0.14015386998653412, "learning_rate": 0.00019959291675147569, "loss": 1.2263, "step": 16 }, { "epoch": 0.006911973978450905, "grad_norm": 0.17507047951221466, "learning_rate": 0.00019955220842662327, "loss": 1.1951, "step": 17 }, { "epoch": 0.007318560683065664, "grad_norm": 0.17176274955272675, "learning_rate": 0.00019951150010177082, "loss": 1.1895, "step": 18 }, { "epoch": 0.007725147387680423, "grad_norm": 0.13839803636074066, "learning_rate": 0.00019947079177691838, "loss": 0.9549, "step": 19 }, { "epoch": 0.008131734092295182, "grad_norm": 0.0970696285367012, "learning_rate": 0.00019943008345206596, "loss": 1.0867, "step": 20 }, { "epoch": 0.008538320796909941, "grad_norm": 0.08836886286735535, "learning_rate": 0.0001993893751272135, "loss": 1.155, "step": 21 }, { "epoch": 0.0089449075015247, "grad_norm": 0.11885025352239609, "learning_rate": 0.0001993486668023611, "loss": 1.1231, "step": 22 }, { "epoch": 0.00935149420613946, "grad_norm": 0.15120816230773926, "learning_rate": 0.00019930795847750865, "loss": 1.1078, "step": 23 }, { "epoch": 0.00975808091075422, "grad_norm": 0.16326424479484558, "learning_rate": 0.00019926725015265623, "loss": 1.079, "step": 24 }, { "epoch": 0.010164667615368977, "grad_norm": 0.1179085448384285, "learning_rate": 0.0001992265418278038, "loss": 0.932, "step": 25 }, { "epoch": 0.010571254319983736, "grad_norm": 0.10621985793113708, "learning_rate": 0.00019918583350295136, "loss": 1.1386, "step": 26 }, { "epoch": 0.010977841024598495, "grad_norm": 0.08408638089895248, "learning_rate": 0.00019914512517809894, "loss": 1.0987, "step": 27 }, { "epoch": 0.011384427729213255, "grad_norm": 0.08222135156393051, "learning_rate": 0.0001991044168532465, "loss": 1.0378, "step": 28 }, { "epoch": 0.011791014433828014, "grad_norm": 0.08763129264116287, "learning_rate": 0.00019906370852839408, "loss": 0.983, "step": 29 }, { "epoch": 0.012197601138442773, "grad_norm": 0.10638878494501114, "learning_rate": 0.00019902300020354163, "loss": 1.0258, "step": 30 }, { "epoch": 0.012604187843057533, "grad_norm": 0.10155023634433746, "learning_rate": 0.0001989822918786892, "loss": 0.9579, "step": 31 }, { "epoch": 0.01301077454767229, "grad_norm": 0.08844579011201859, "learning_rate": 0.00019894158355383677, "loss": 1.1007, "step": 32 }, { "epoch": 0.01341736125228705, "grad_norm": 0.10394158959388733, "learning_rate": 0.00019890087522898432, "loss": 1.0459, "step": 33 }, { "epoch": 0.01382394795690181, "grad_norm": 0.08938682824373245, "learning_rate": 0.0001988601669041319, "loss": 1.0985, "step": 34 }, { "epoch": 0.014230534661516568, "grad_norm": 0.08639086782932281, "learning_rate": 0.00019881945857927948, "loss": 1.0712, "step": 35 }, { "epoch": 0.014637121366131328, "grad_norm": 0.08568435162305832, "learning_rate": 0.00019877875025442704, "loss": 1.0549, "step": 36 }, { "epoch": 0.015043708070746087, "grad_norm": 0.0859316885471344, "learning_rate": 0.00019873804192957462, "loss": 1.1042, "step": 37 }, { "epoch": 0.015450294775360847, "grad_norm": 0.09534381330013275, "learning_rate": 0.00019869733360472217, "loss": 1.0127, "step": 38 }, { "epoch": 0.015856881479975604, "grad_norm": 0.09103580564260483, "learning_rate": 0.00019865662527986976, "loss": 0.9347, "step": 39 }, { "epoch": 0.016263468184590364, "grad_norm": 0.0928095132112503, "learning_rate": 0.0001986159169550173, "loss": 1.0559, "step": 40 }, { "epoch": 0.016670054889205123, "grad_norm": 0.09370871633291245, "learning_rate": 0.0001985752086301649, "loss": 1.1473, "step": 41 }, { "epoch": 0.017076641593819882, "grad_norm": 0.07691123336553574, "learning_rate": 0.00019853450030531244, "loss": 1.0128, "step": 42 }, { "epoch": 0.01748322829843464, "grad_norm": 0.09201047569513321, "learning_rate": 0.00019849379198046, "loss": 1.1296, "step": 43 }, { "epoch": 0.0178898150030494, "grad_norm": 0.08490074425935745, "learning_rate": 0.00019845308365560758, "loss": 1.0444, "step": 44 }, { "epoch": 0.01829640170766416, "grad_norm": 0.08623114228248596, "learning_rate": 0.00019841237533075513, "loss": 1.066, "step": 45 }, { "epoch": 0.01870298841227892, "grad_norm": 0.09486474096775055, "learning_rate": 0.00019837166700590271, "loss": 1.0788, "step": 46 }, { "epoch": 0.01910957511689368, "grad_norm": 0.08024484664201736, "learning_rate": 0.0001983309586810503, "loss": 1.0262, "step": 47 }, { "epoch": 0.01951616182150844, "grad_norm": 0.09256327897310257, "learning_rate": 0.00019829025035619785, "loss": 1.107, "step": 48 }, { "epoch": 0.019922748526123194, "grad_norm": 0.09877921640872955, "learning_rate": 0.00019824954203134543, "loss": 1.1731, "step": 49 }, { "epoch": 0.020329335230737954, "grad_norm": 0.08699575811624527, "learning_rate": 0.00019820883370649299, "loss": 1.0809, "step": 50 }, { "epoch": 0.020735921935352713, "grad_norm": 0.089649498462677, "learning_rate": 0.00019816812538164057, "loss": 1.1564, "step": 51 }, { "epoch": 0.021142508639967472, "grad_norm": 0.08757214993238449, "learning_rate": 0.00019812741705678812, "loss": 1.0272, "step": 52 }, { "epoch": 0.02154909534458223, "grad_norm": 0.08320939540863037, "learning_rate": 0.0001980867087319357, "loss": 0.9931, "step": 53 }, { "epoch": 0.02195568204919699, "grad_norm": 0.08898070454597473, "learning_rate": 0.00019804600040708326, "loss": 0.9421, "step": 54 }, { "epoch": 0.02236226875381175, "grad_norm": 0.08072236180305481, "learning_rate": 0.0001980052920822308, "loss": 1.0304, "step": 55 }, { "epoch": 0.02276885545842651, "grad_norm": 0.09354112297296524, "learning_rate": 0.0001979645837573784, "loss": 1.1041, "step": 56 }, { "epoch": 0.02317544216304127, "grad_norm": 0.09214304387569427, "learning_rate": 0.00019792387543252595, "loss": 1.0666, "step": 57 }, { "epoch": 0.02358202886765603, "grad_norm": 0.08546210825443268, "learning_rate": 0.00019788316710767353, "loss": 1.0795, "step": 58 }, { "epoch": 0.023988615572270788, "grad_norm": 0.09029046446084976, "learning_rate": 0.0001978424587828211, "loss": 1.199, "step": 59 }, { "epoch": 0.024395202276885547, "grad_norm": 0.08200937509536743, "learning_rate": 0.00019780175045796866, "loss": 0.9853, "step": 60 }, { "epoch": 0.024801788981500306, "grad_norm": 0.08928566426038742, "learning_rate": 0.00019776104213311624, "loss": 0.9948, "step": 61 }, { "epoch": 0.025208375686115066, "grad_norm": 0.08067034929990768, "learning_rate": 0.0001977203338082638, "loss": 0.9824, "step": 62 }, { "epoch": 0.02561496239072982, "grad_norm": 0.07509499788284302, "learning_rate": 0.00019767962548341138, "loss": 0.9166, "step": 63 }, { "epoch": 0.02602154909534458, "grad_norm": 0.10127029567956924, "learning_rate": 0.00019763891715855893, "loss": 0.978, "step": 64 }, { "epoch": 0.02642813579995934, "grad_norm": 0.08480218052864075, "learning_rate": 0.0001975982088337065, "loss": 1.0019, "step": 65 }, { "epoch": 0.0268347225045741, "grad_norm": 0.0922696441411972, "learning_rate": 0.00019755750050885407, "loss": 1.0213, "step": 66 }, { "epoch": 0.02724130920918886, "grad_norm": 0.0819278433918953, "learning_rate": 0.00019751679218400162, "loss": 0.9792, "step": 67 }, { "epoch": 0.02764789591380362, "grad_norm": 0.09971120208501816, "learning_rate": 0.0001974760838591492, "loss": 0.9605, "step": 68 }, { "epoch": 0.028054482618418378, "grad_norm": 0.09195531904697418, "learning_rate": 0.00019743537553429676, "loss": 1.1203, "step": 69 }, { "epoch": 0.028461069323033137, "grad_norm": 0.09179981052875519, "learning_rate": 0.00019739466720944434, "loss": 1.0586, "step": 70 }, { "epoch": 0.028867656027647896, "grad_norm": 0.0866156816482544, "learning_rate": 0.00019735395888459192, "loss": 1.0558, "step": 71 }, { "epoch": 0.029274242732262656, "grad_norm": 0.09198956191539764, "learning_rate": 0.00019731325055973947, "loss": 1.117, "step": 72 }, { "epoch": 0.029680829436877415, "grad_norm": 0.0912180244922638, "learning_rate": 0.00019727254223488705, "loss": 1.0235, "step": 73 }, { "epoch": 0.030087416141492174, "grad_norm": 0.092186838388443, "learning_rate": 0.0001972318339100346, "loss": 1.0119, "step": 74 }, { "epoch": 0.030494002846106934, "grad_norm": 0.091013602912426, "learning_rate": 0.0001971911255851822, "loss": 1.0523, "step": 75 }, { "epoch": 0.030900589550721693, "grad_norm": 0.0932595282793045, "learning_rate": 0.00019715041726032974, "loss": 1.0471, "step": 76 }, { "epoch": 0.03130717625533645, "grad_norm": 0.089345782995224, "learning_rate": 0.0001971097089354773, "loss": 1.0214, "step": 77 }, { "epoch": 0.03171376295995121, "grad_norm": 0.09476006776094437, "learning_rate": 0.00019706900061062488, "loss": 0.9888, "step": 78 }, { "epoch": 0.03212034966456597, "grad_norm": 0.09379832446575165, "learning_rate": 0.00019702829228577243, "loss": 1.1039, "step": 79 }, { "epoch": 0.03252693636918073, "grad_norm": 0.10659569501876831, "learning_rate": 0.00019698758396092001, "loss": 1.1377, "step": 80 }, { "epoch": 0.03293352307379549, "grad_norm": 0.09652398526668549, "learning_rate": 0.0001969468756360676, "loss": 1.0194, "step": 81 }, { "epoch": 0.033340109778410246, "grad_norm": 0.08641666918992996, "learning_rate": 0.00019690616731121515, "loss": 1.0239, "step": 82 }, { "epoch": 0.03374669648302501, "grad_norm": 0.0956072062253952, "learning_rate": 0.00019686545898636273, "loss": 1.032, "step": 83 }, { "epoch": 0.034153283187639764, "grad_norm": 0.08402691036462784, "learning_rate": 0.00019682475066151029, "loss": 0.9802, "step": 84 }, { "epoch": 0.03455986989225452, "grad_norm": 0.08827648311853409, "learning_rate": 0.00019678404233665787, "loss": 1.1805, "step": 85 }, { "epoch": 0.03496645659686928, "grad_norm": 0.08757660537958145, "learning_rate": 0.00019674333401180542, "loss": 0.952, "step": 86 }, { "epoch": 0.03537304330148404, "grad_norm": 0.09728538244962692, "learning_rate": 0.000196702625686953, "loss": 1.0875, "step": 87 }, { "epoch": 0.0357796300060988, "grad_norm": 0.08561044931411743, "learning_rate": 0.00019666191736210056, "loss": 0.9818, "step": 88 }, { "epoch": 0.03618621671071356, "grad_norm": 0.08389468491077423, "learning_rate": 0.0001966212090372481, "loss": 0.9962, "step": 89 }, { "epoch": 0.03659280341532832, "grad_norm": 0.08847957849502563, "learning_rate": 0.0001965805007123957, "loss": 1.0138, "step": 90 }, { "epoch": 0.036999390119943076, "grad_norm": 0.08515489101409912, "learning_rate": 0.00019653979238754324, "loss": 1.0119, "step": 91 }, { "epoch": 0.03740597682455784, "grad_norm": 0.09340325742959976, "learning_rate": 0.00019649908406269083, "loss": 1.0635, "step": 92 }, { "epoch": 0.037812563529172595, "grad_norm": 0.09383916854858398, "learning_rate": 0.0001964583757378384, "loss": 1.0999, "step": 93 }, { "epoch": 0.03821915023378736, "grad_norm": 0.09956547617912292, "learning_rate": 0.00019641766741298596, "loss": 1.0186, "step": 94 }, { "epoch": 0.038625736938402114, "grad_norm": 0.09809234738349915, "learning_rate": 0.00019637695908813354, "loss": 1.0641, "step": 95 }, { "epoch": 0.03903232364301688, "grad_norm": 0.08520065993070602, "learning_rate": 0.0001963362507632811, "loss": 0.9255, "step": 96 }, { "epoch": 0.03943891034763163, "grad_norm": 0.09007880836725235, "learning_rate": 0.00019629554243842868, "loss": 1.0963, "step": 97 }, { "epoch": 0.03984549705224639, "grad_norm": 0.08900373429059982, "learning_rate": 0.00019625483411357623, "loss": 0.9908, "step": 98 }, { "epoch": 0.04025208375686115, "grad_norm": 0.09613076597452164, "learning_rate": 0.0001962141257887238, "loss": 0.9729, "step": 99 }, { "epoch": 0.04065867046147591, "grad_norm": 0.09987878054380417, "learning_rate": 0.00019617341746387137, "loss": 1.0554, "step": 100 }, { "epoch": 0.04106525716609067, "grad_norm": 0.10209144651889801, "learning_rate": 0.00019613270913901892, "loss": 1.1162, "step": 101 }, { "epoch": 0.041471843870705426, "grad_norm": 0.10085388273000717, "learning_rate": 0.0001960920008141665, "loss": 1.1355, "step": 102 }, { "epoch": 0.04187843057532019, "grad_norm": 0.08966121822595596, "learning_rate": 0.00019605129248931406, "loss": 0.9275, "step": 103 }, { "epoch": 0.042285017279934944, "grad_norm": 0.10507562756538391, "learning_rate": 0.00019601058416446166, "loss": 1.081, "step": 104 }, { "epoch": 0.04269160398454971, "grad_norm": 0.09719648957252502, "learning_rate": 0.00019596987583960922, "loss": 1.0884, "step": 105 }, { "epoch": 0.04309819068916446, "grad_norm": 0.09457529336214066, "learning_rate": 0.00019592916751475677, "loss": 1.0413, "step": 106 }, { "epoch": 0.043504777393779226, "grad_norm": 0.11330179125070572, "learning_rate": 0.00019588845918990435, "loss": 1.0937, "step": 107 }, { "epoch": 0.04391136409839398, "grad_norm": 0.09778840839862823, "learning_rate": 0.0001958477508650519, "loss": 1.1316, "step": 108 }, { "epoch": 0.044317950803008745, "grad_norm": 0.09848835319280624, "learning_rate": 0.0001958070425401995, "loss": 1.1244, "step": 109 }, { "epoch": 0.0447245375076235, "grad_norm": 0.0965428277850151, "learning_rate": 0.00019576633421534704, "loss": 0.9952, "step": 110 }, { "epoch": 0.045131124212238256, "grad_norm": 0.0857444629073143, "learning_rate": 0.00019572562589049462, "loss": 0.9822, "step": 111 }, { "epoch": 0.04553771091685302, "grad_norm": 0.10461942851543427, "learning_rate": 0.00019568491756564218, "loss": 1.1463, "step": 112 }, { "epoch": 0.045944297621467775, "grad_norm": 0.08575154095888138, "learning_rate": 0.00019564420924078973, "loss": 0.8976, "step": 113 }, { "epoch": 0.04635088432608254, "grad_norm": 0.0948256254196167, "learning_rate": 0.00019560350091593731, "loss": 1.1205, "step": 114 }, { "epoch": 0.046757471030697294, "grad_norm": 0.09214090555906296, "learning_rate": 0.00019556279259108487, "loss": 1.1416, "step": 115 }, { "epoch": 0.04716405773531206, "grad_norm": 0.09885852038860321, "learning_rate": 0.00019552208426623248, "loss": 1.079, "step": 116 }, { "epoch": 0.04757064443992681, "grad_norm": 0.09071148931980133, "learning_rate": 0.00019548137594138003, "loss": 1.0128, "step": 117 }, { "epoch": 0.047977231144541575, "grad_norm": 0.09190430492162704, "learning_rate": 0.00019544066761652758, "loss": 0.9631, "step": 118 }, { "epoch": 0.04838381784915633, "grad_norm": 0.08024870604276657, "learning_rate": 0.00019539995929167517, "loss": 0.9086, "step": 119 }, { "epoch": 0.048790404553771094, "grad_norm": 0.09223239868879318, "learning_rate": 0.00019535925096682272, "loss": 1.0255, "step": 120 }, { "epoch": 0.04919699125838585, "grad_norm": 0.09259685128927231, "learning_rate": 0.0001953185426419703, "loss": 1.0221, "step": 121 }, { "epoch": 0.04960357796300061, "grad_norm": 0.08371948450803757, "learning_rate": 0.00019527783431711786, "loss": 0.966, "step": 122 }, { "epoch": 0.05001016466761537, "grad_norm": 0.0957912728190422, "learning_rate": 0.00019523712599226544, "loss": 1.0919, "step": 123 }, { "epoch": 0.05041675137223013, "grad_norm": 0.09397678077220917, "learning_rate": 0.000195196417667413, "loss": 0.9666, "step": 124 }, { "epoch": 0.05082333807684489, "grad_norm": 0.1014254167675972, "learning_rate": 0.00019515570934256054, "loss": 0.9321, "step": 125 }, { "epoch": 0.05122992478145964, "grad_norm": 0.09339801222085953, "learning_rate": 0.00019511500101770813, "loss": 1.0487, "step": 126 }, { "epoch": 0.051636511486074406, "grad_norm": 0.08642175793647766, "learning_rate": 0.0001950742926928557, "loss": 1.0606, "step": 127 }, { "epoch": 0.05204309819068916, "grad_norm": 0.09092641621828079, "learning_rate": 0.0001950335843680033, "loss": 0.904, "step": 128 }, { "epoch": 0.052449684895303925, "grad_norm": 0.09896791726350784, "learning_rate": 0.00019499287604315084, "loss": 1.0325, "step": 129 }, { "epoch": 0.05285627159991868, "grad_norm": 0.08731307834386826, "learning_rate": 0.0001949521677182984, "loss": 0.9258, "step": 130 }, { "epoch": 0.05326285830453344, "grad_norm": 0.09673330187797546, "learning_rate": 0.00019491145939344598, "loss": 1.1198, "step": 131 }, { "epoch": 0.0536694450091482, "grad_norm": 0.09038975089788437, "learning_rate": 0.00019487075106859353, "loss": 1.0295, "step": 132 }, { "epoch": 0.05407603171376296, "grad_norm": 0.0918399840593338, "learning_rate": 0.0001948300427437411, "loss": 1.0127, "step": 133 }, { "epoch": 0.05448261841837772, "grad_norm": 0.08970967680215836, "learning_rate": 0.00019478933441888867, "loss": 1.0238, "step": 134 }, { "epoch": 0.05488920512299248, "grad_norm": 0.09728217124938965, "learning_rate": 0.00019474862609403625, "loss": 1.069, "step": 135 }, { "epoch": 0.05529579182760724, "grad_norm": 0.10240956395864487, "learning_rate": 0.0001947079177691838, "loss": 1.1467, "step": 136 }, { "epoch": 0.055702378532222, "grad_norm": 0.10397852212190628, "learning_rate": 0.00019466720944433136, "loss": 1.0415, "step": 137 }, { "epoch": 0.056108965236836755, "grad_norm": 0.10451675951480865, "learning_rate": 0.00019462650111947894, "loss": 1.0309, "step": 138 }, { "epoch": 0.05651555194145151, "grad_norm": 0.09685720503330231, "learning_rate": 0.00019458579279462652, "loss": 1.11, "step": 139 }, { "epoch": 0.056922138646066274, "grad_norm": 0.09885822236537933, "learning_rate": 0.00019454508446977407, "loss": 0.993, "step": 140 }, { "epoch": 0.05732872535068103, "grad_norm": 0.10943586379289627, "learning_rate": 0.00019450437614492165, "loss": 0.9749, "step": 141 }, { "epoch": 0.05773531205529579, "grad_norm": 0.10964591801166534, "learning_rate": 0.0001944636678200692, "loss": 1.1108, "step": 142 }, { "epoch": 0.05814189875991055, "grad_norm": 0.10109028965234756, "learning_rate": 0.0001944229594952168, "loss": 1.0897, "step": 143 }, { "epoch": 0.05854848546452531, "grad_norm": 0.11243695765733719, "learning_rate": 0.00019438225117036434, "loss": 1.0338, "step": 144 }, { "epoch": 0.05895507216914007, "grad_norm": 0.1047658622264862, "learning_rate": 0.00019434154284551192, "loss": 0.9566, "step": 145 }, { "epoch": 0.05936165887375483, "grad_norm": 0.09534204006195068, "learning_rate": 0.00019430083452065948, "loss": 1.0313, "step": 146 }, { "epoch": 0.059768245578369586, "grad_norm": 0.10418044775724411, "learning_rate": 0.00019426012619580706, "loss": 0.9759, "step": 147 }, { "epoch": 0.06017483228298435, "grad_norm": 0.10020595043897629, "learning_rate": 0.00019421941787095461, "loss": 0.9368, "step": 148 }, { "epoch": 0.060581418987599105, "grad_norm": 0.09832129627466202, "learning_rate": 0.00019417870954610217, "loss": 1.0494, "step": 149 }, { "epoch": 0.06098800569221387, "grad_norm": 0.09458506107330322, "learning_rate": 0.00019413800122124978, "loss": 0.9631, "step": 150 }, { "epoch": 0.06139459239682862, "grad_norm": 0.10380101203918457, "learning_rate": 0.00019409729289639733, "loss": 1.1003, "step": 151 }, { "epoch": 0.061801179101443386, "grad_norm": 0.107131227850914, "learning_rate": 0.00019405658457154488, "loss": 1.0819, "step": 152 }, { "epoch": 0.06220776580605814, "grad_norm": 0.10330741852521896, "learning_rate": 0.00019401587624669247, "loss": 1.128, "step": 153 }, { "epoch": 0.0626143525106729, "grad_norm": 0.08829359710216522, "learning_rate": 0.00019397516792184002, "loss": 0.8754, "step": 154 }, { "epoch": 0.06302093921528766, "grad_norm": 0.10422427207231522, "learning_rate": 0.0001939344595969876, "loss": 0.9633, "step": 155 }, { "epoch": 0.06342752591990242, "grad_norm": 0.11499015986919403, "learning_rate": 0.00019389375127213515, "loss": 0.9735, "step": 156 }, { "epoch": 0.06383411262451717, "grad_norm": 0.0938427522778511, "learning_rate": 0.00019385304294728274, "loss": 0.9219, "step": 157 }, { "epoch": 0.06424069932913194, "grad_norm": 0.1080261766910553, "learning_rate": 0.0001938123346224303, "loss": 0.9678, "step": 158 }, { "epoch": 0.0646472860337467, "grad_norm": 0.10001271218061447, "learning_rate": 0.00019377162629757784, "loss": 1.0854, "step": 159 }, { "epoch": 0.06505387273836145, "grad_norm": 0.10731212794780731, "learning_rate": 0.00019373091797272543, "loss": 1.0108, "step": 160 }, { "epoch": 0.06546045944297621, "grad_norm": 0.10019373893737793, "learning_rate": 0.00019369020964787298, "loss": 1.0315, "step": 161 }, { "epoch": 0.06586704614759098, "grad_norm": 0.0947297066450119, "learning_rate": 0.0001936495013230206, "loss": 1.0634, "step": 162 }, { "epoch": 0.06627363285220574, "grad_norm": 0.12204254418611526, "learning_rate": 0.00019360879299816814, "loss": 1.0635, "step": 163 }, { "epoch": 0.06668021955682049, "grad_norm": 0.10462553054094315, "learning_rate": 0.0001935680846733157, "loss": 1.0248, "step": 164 }, { "epoch": 0.06708680626143525, "grad_norm": 0.09576130658388138, "learning_rate": 0.00019352737634846328, "loss": 0.9671, "step": 165 }, { "epoch": 0.06749339296605002, "grad_norm": 0.10027123987674713, "learning_rate": 0.00019348666802361083, "loss": 0.9317, "step": 166 }, { "epoch": 0.06789997967066477, "grad_norm": 0.10674256086349487, "learning_rate": 0.0001934459596987584, "loss": 1.0058, "step": 167 }, { "epoch": 0.06830656637527953, "grad_norm": 0.12352320551872253, "learning_rate": 0.00019340525137390597, "loss": 1.0926, "step": 168 }, { "epoch": 0.06871315307989428, "grad_norm": 0.09426864236593246, "learning_rate": 0.00019336454304905355, "loss": 1.0876, "step": 169 }, { "epoch": 0.06911973978450904, "grad_norm": 0.09280996024608612, "learning_rate": 0.0001933238347242011, "loss": 0.977, "step": 170 }, { "epoch": 0.06952632648912381, "grad_norm": 0.11547420918941498, "learning_rate": 0.00019328312639934866, "loss": 1.0598, "step": 171 }, { "epoch": 0.06993291319373857, "grad_norm": 0.12538915872573853, "learning_rate": 0.00019324241807449624, "loss": 1.0996, "step": 172 }, { "epoch": 0.07033949989835332, "grad_norm": 0.08110898733139038, "learning_rate": 0.00019320170974964382, "loss": 0.8776, "step": 173 }, { "epoch": 0.07074608660296808, "grad_norm": 0.10475198924541473, "learning_rate": 0.0001931610014247914, "loss": 1.0876, "step": 174 }, { "epoch": 0.07115267330758285, "grad_norm": 0.1095360517501831, "learning_rate": 0.00019312029309993895, "loss": 1.054, "step": 175 }, { "epoch": 0.0715592600121976, "grad_norm": 0.09516473114490509, "learning_rate": 0.0001930795847750865, "loss": 1.0558, "step": 176 }, { "epoch": 0.07196584671681236, "grad_norm": 0.09316466003656387, "learning_rate": 0.0001930388764502341, "loss": 0.9467, "step": 177 }, { "epoch": 0.07237243342142712, "grad_norm": 0.11777061969041824, "learning_rate": 0.00019299816812538164, "loss": 1.1441, "step": 178 }, { "epoch": 0.07277902012604189, "grad_norm": 0.09438811987638474, "learning_rate": 0.00019295745980052922, "loss": 0.9521, "step": 179 }, { "epoch": 0.07318560683065664, "grad_norm": 0.08892639726400375, "learning_rate": 0.00019291675147567678, "loss": 0.9804, "step": 180 }, { "epoch": 0.0735921935352714, "grad_norm": 0.08963356912136078, "learning_rate": 0.00019287604315082436, "loss": 1.0427, "step": 181 }, { "epoch": 0.07399878023988615, "grad_norm": 0.09870661795139313, "learning_rate": 0.0001928353348259719, "loss": 1.051, "step": 182 }, { "epoch": 0.07440536694450091, "grad_norm": 0.11843609809875488, "learning_rate": 0.00019279462650111947, "loss": 1.0109, "step": 183 }, { "epoch": 0.07481195364911568, "grad_norm": 0.08860404789447784, "learning_rate": 0.00019275391817626705, "loss": 1.0035, "step": 184 }, { "epoch": 0.07521854035373043, "grad_norm": 0.09085170924663544, "learning_rate": 0.00019271320985141463, "loss": 0.9461, "step": 185 }, { "epoch": 0.07562512705834519, "grad_norm": 0.09071815758943558, "learning_rate": 0.0001926725015265622, "loss": 0.9542, "step": 186 }, { "epoch": 0.07603171376295995, "grad_norm": 0.09566846489906311, "learning_rate": 0.00019263179320170976, "loss": 0.9958, "step": 187 }, { "epoch": 0.07643830046757472, "grad_norm": 0.11846338212490082, "learning_rate": 0.00019259108487685732, "loss": 1.0737, "step": 188 }, { "epoch": 0.07684488717218947, "grad_norm": 0.09295649081468582, "learning_rate": 0.0001925503765520049, "loss": 1.0162, "step": 189 }, { "epoch": 0.07725147387680423, "grad_norm": 0.0917876660823822, "learning_rate": 0.00019250966822715245, "loss": 1.0432, "step": 190 }, { "epoch": 0.07765806058141898, "grad_norm": 0.10864109545946121, "learning_rate": 0.00019246895990230004, "loss": 1.1107, "step": 191 }, { "epoch": 0.07806464728603375, "grad_norm": 0.09689877927303314, "learning_rate": 0.0001924282515774476, "loss": 1.0421, "step": 192 }, { "epoch": 0.07847123399064851, "grad_norm": 0.09406042098999023, "learning_rate": 0.00019238754325259517, "loss": 1.1042, "step": 193 }, { "epoch": 0.07887782069526326, "grad_norm": 0.08346063643693924, "learning_rate": 0.00019234683492774272, "loss": 0.9554, "step": 194 }, { "epoch": 0.07928440739987802, "grad_norm": 0.10317754745483398, "learning_rate": 0.00019230612660289028, "loss": 1.0835, "step": 195 }, { "epoch": 0.07969099410449278, "grad_norm": 0.08712919056415558, "learning_rate": 0.0001922654182780379, "loss": 0.9799, "step": 196 }, { "epoch": 0.08009758080910755, "grad_norm": 0.0860556811094284, "learning_rate": 0.00019222470995318544, "loss": 0.8661, "step": 197 }, { "epoch": 0.0805041675137223, "grad_norm": 0.07940655201673508, "learning_rate": 0.00019218400162833302, "loss": 0.8305, "step": 198 }, { "epoch": 0.08091075421833706, "grad_norm": 0.09200199693441391, "learning_rate": 0.00019214329330348058, "loss": 0.9774, "step": 199 }, { "epoch": 0.08131734092295181, "grad_norm": 0.09980164468288422, "learning_rate": 0.00019210258497862813, "loss": 0.9791, "step": 200 }, { "epoch": 0.08172392762756658, "grad_norm": 0.09660688042640686, "learning_rate": 0.0001920618766537757, "loss": 1.027, "step": 201 }, { "epoch": 0.08213051433218134, "grad_norm": 0.09518909454345703, "learning_rate": 0.00019202116832892327, "loss": 0.9939, "step": 202 }, { "epoch": 0.0825371010367961, "grad_norm": 0.0886114165186882, "learning_rate": 0.00019198046000407085, "loss": 0.985, "step": 203 }, { "epoch": 0.08294368774141085, "grad_norm": 0.09820783883333206, "learning_rate": 0.0001919397516792184, "loss": 1.0064, "step": 204 }, { "epoch": 0.08335027444602562, "grad_norm": 0.0957496389746666, "learning_rate": 0.00019189904335436598, "loss": 1.1126, "step": 205 }, { "epoch": 0.08375686115064038, "grad_norm": 0.09990067780017853, "learning_rate": 0.00019185833502951354, "loss": 1.1517, "step": 206 }, { "epoch": 0.08416344785525513, "grad_norm": 0.0953991562128067, "learning_rate": 0.0001918176267046611, "loss": 1.087, "step": 207 }, { "epoch": 0.08457003455986989, "grad_norm": 0.10291532427072525, "learning_rate": 0.0001917769183798087, "loss": 1.0366, "step": 208 }, { "epoch": 0.08497662126448464, "grad_norm": 0.09986121207475662, "learning_rate": 0.00019173621005495625, "loss": 0.9581, "step": 209 }, { "epoch": 0.08538320796909941, "grad_norm": 0.09369988739490509, "learning_rate": 0.00019169550173010383, "loss": 1.0048, "step": 210 }, { "epoch": 0.08578979467371417, "grad_norm": 0.0968063622713089, "learning_rate": 0.0001916547934052514, "loss": 1.0005, "step": 211 }, { "epoch": 0.08619638137832893, "grad_norm": 0.11241315305233002, "learning_rate": 0.00019161408508039894, "loss": 1.0316, "step": 212 }, { "epoch": 0.08660296808294368, "grad_norm": 0.09230878949165344, "learning_rate": 0.00019157337675554652, "loss": 0.917, "step": 213 }, { "epoch": 0.08700955478755845, "grad_norm": 0.08461520820856094, "learning_rate": 0.00019153266843069408, "loss": 0.9144, "step": 214 }, { "epoch": 0.08741614149217321, "grad_norm": 0.09011861681938171, "learning_rate": 0.00019149196010584166, "loss": 1.0092, "step": 215 }, { "epoch": 0.08782272819678796, "grad_norm": 0.09200841188430786, "learning_rate": 0.0001914512517809892, "loss": 1.0552, "step": 216 }, { "epoch": 0.08822931490140272, "grad_norm": 0.09052886068820953, "learning_rate": 0.0001914105434561368, "loss": 0.9067, "step": 217 }, { "epoch": 0.08863590160601749, "grad_norm": 0.08740741014480591, "learning_rate": 0.00019136983513128435, "loss": 0.9182, "step": 218 }, { "epoch": 0.08904248831063225, "grad_norm": 0.08494284749031067, "learning_rate": 0.00019132912680643193, "loss": 0.8321, "step": 219 }, { "epoch": 0.089449075015247, "grad_norm": 0.0890796035528183, "learning_rate": 0.0001912884184815795, "loss": 0.9801, "step": 220 }, { "epoch": 0.08985566171986176, "grad_norm": 0.094822458922863, "learning_rate": 0.00019124771015672706, "loss": 0.9779, "step": 221 }, { "epoch": 0.09026224842447651, "grad_norm": 0.09756983071565628, "learning_rate": 0.00019120700183187465, "loss": 1.0385, "step": 222 }, { "epoch": 0.09066883512909128, "grad_norm": 0.09434107691049576, "learning_rate": 0.0001911662935070222, "loss": 1.063, "step": 223 }, { "epoch": 0.09107542183370604, "grad_norm": 0.0925639271736145, "learning_rate": 0.00019112558518216975, "loss": 0.9061, "step": 224 }, { "epoch": 0.0914820085383208, "grad_norm": 0.10531201958656311, "learning_rate": 0.00019108487685731734, "loss": 1.1593, "step": 225 }, { "epoch": 0.09188859524293555, "grad_norm": 0.08259832113981247, "learning_rate": 0.0001910441685324649, "loss": 0.8463, "step": 226 }, { "epoch": 0.09229518194755032, "grad_norm": 431.5063171386719, "learning_rate": 0.00019100346020761247, "loss": 1.0632, "step": 227 }, { "epoch": 0.09270176865216508, "grad_norm": 0.10764740407466888, "learning_rate": 0.00019096275188276002, "loss": 1.0083, "step": 228 }, { "epoch": 0.09310835535677983, "grad_norm": 0.08872029185295105, "learning_rate": 0.0001909220435579076, "loss": 0.9301, "step": 229 }, { "epoch": 0.09351494206139459, "grad_norm": 0.1006346270442009, "learning_rate": 0.00019088133523305516, "loss": 1.0103, "step": 230 }, { "epoch": 0.09392152876600936, "grad_norm": 0.0970514565706253, "learning_rate": 0.00019084062690820274, "loss": 1.0522, "step": 231 }, { "epoch": 0.09432811547062411, "grad_norm": 0.09807727485895157, "learning_rate": 0.00019079991858335032, "loss": 1.0498, "step": 232 }, { "epoch": 0.09473470217523887, "grad_norm": 0.09828022867441177, "learning_rate": 0.00019075921025849788, "loss": 0.9871, "step": 233 }, { "epoch": 0.09514128887985362, "grad_norm": 0.10089042782783508, "learning_rate": 0.00019071850193364543, "loss": 0.977, "step": 234 }, { "epoch": 0.0955478755844684, "grad_norm": 0.09905245155096054, "learning_rate": 0.000190677793608793, "loss": 1.0135, "step": 235 }, { "epoch": 0.09595446228908315, "grad_norm": 0.1002473533153534, "learning_rate": 0.00019063708528394057, "loss": 1.0219, "step": 236 }, { "epoch": 0.0963610489936979, "grad_norm": 0.09028339385986328, "learning_rate": 0.00019059637695908815, "loss": 0.909, "step": 237 }, { "epoch": 0.09676763569831266, "grad_norm": 0.0950377881526947, "learning_rate": 0.0001905556686342357, "loss": 0.9749, "step": 238 }, { "epoch": 0.09717422240292742, "grad_norm": 0.09866049885749817, "learning_rate": 0.00019051496030938328, "loss": 1.0927, "step": 239 }, { "epoch": 0.09758080910754219, "grad_norm": 0.09754758328199387, "learning_rate": 0.00019047425198453084, "loss": 1.059, "step": 240 }, { "epoch": 0.09798739581215694, "grad_norm": 0.09261766821146011, "learning_rate": 0.00019043354365967842, "loss": 1.0912, "step": 241 }, { "epoch": 0.0983939825167717, "grad_norm": 0.08637125045061111, "learning_rate": 0.000190392835334826, "loss": 0.8925, "step": 242 }, { "epoch": 0.09880056922138646, "grad_norm": 0.0962812602519989, "learning_rate": 0.00019035212700997355, "loss": 1.0435, "step": 243 }, { "epoch": 0.09920715592600123, "grad_norm": 0.09047430753707886, "learning_rate": 0.00019031141868512113, "loss": 1.0787, "step": 244 }, { "epoch": 0.09961374263061598, "grad_norm": 0.09183438867330551, "learning_rate": 0.0001902707103602687, "loss": 0.9338, "step": 245 }, { "epoch": 0.10002032933523074, "grad_norm": 0.09977632761001587, "learning_rate": 0.00019023000203541624, "loss": 1.1605, "step": 246 }, { "epoch": 0.10042691603984549, "grad_norm": 0.10386580228805542, "learning_rate": 0.00019018929371056382, "loss": 1.0493, "step": 247 }, { "epoch": 0.10083350274446026, "grad_norm": 0.09106533974409103, "learning_rate": 0.00019014858538571138, "loss": 0.9891, "step": 248 }, { "epoch": 0.10124008944907502, "grad_norm": 0.09407884627580643, "learning_rate": 0.00019010787706085896, "loss": 1.0367, "step": 249 }, { "epoch": 0.10164667615368977, "grad_norm": 0.10133463889360428, "learning_rate": 0.0001900671687360065, "loss": 1.0743, "step": 250 }, { "epoch": 0.10205326285830453, "grad_norm": 0.11877205967903137, "learning_rate": 0.0001900264604111541, "loss": 1.1572, "step": 251 }, { "epoch": 0.10245984956291929, "grad_norm": 0.10216309130191803, "learning_rate": 0.00018998575208630165, "loss": 1.0687, "step": 252 }, { "epoch": 0.10286643626753406, "grad_norm": 0.09023922681808472, "learning_rate": 0.0001899450437614492, "loss": 0.9153, "step": 253 }, { "epoch": 0.10327302297214881, "grad_norm": 0.09972742944955826, "learning_rate": 0.0001899043354365968, "loss": 0.9059, "step": 254 }, { "epoch": 0.10367960967676357, "grad_norm": 0.1175752505660057, "learning_rate": 0.00018986362711174436, "loss": 1.0659, "step": 255 }, { "epoch": 0.10408619638137832, "grad_norm": 0.09030337631702423, "learning_rate": 0.00018982291878689195, "loss": 0.9577, "step": 256 }, { "epoch": 0.1044927830859931, "grad_norm": 0.08850797265768051, "learning_rate": 0.0001897822104620395, "loss": 0.9193, "step": 257 }, { "epoch": 0.10489936979060785, "grad_norm": 1767.7669677734375, "learning_rate": 0.00018974150213718705, "loss": 0.9977, "step": 258 }, { "epoch": 0.1053059564952226, "grad_norm": 0.11435185372829437, "learning_rate": 0.00018970079381233463, "loss": 1.0468, "step": 259 }, { "epoch": 0.10571254319983736, "grad_norm": 0.10342080891132355, "learning_rate": 0.0001896600854874822, "loss": 1.0119, "step": 260 }, { "epoch": 0.10611912990445213, "grad_norm": 0.11568263173103333, "learning_rate": 0.00018961937716262977, "loss": 1.025, "step": 261 }, { "epoch": 0.10652571660906689, "grad_norm": 0.12752321362495422, "learning_rate": 0.00018957866883777732, "loss": 1.1283, "step": 262 }, { "epoch": 0.10693230331368164, "grad_norm": 0.10688795894384384, "learning_rate": 0.0001895379605129249, "loss": 0.9052, "step": 263 }, { "epoch": 0.1073388900182964, "grad_norm": 0.10426552593708038, "learning_rate": 0.00018949725218807246, "loss": 0.9556, "step": 264 }, { "epoch": 0.10774547672291115, "grad_norm": 0.09953362494707108, "learning_rate": 0.00018945654386322004, "loss": 1.0734, "step": 265 }, { "epoch": 0.10815206342752592, "grad_norm": 0.09143470227718353, "learning_rate": 0.00018941583553836762, "loss": 1.0063, "step": 266 }, { "epoch": 0.10855865013214068, "grad_norm": 0.10831563919782639, "learning_rate": 0.00018937512721351518, "loss": 1.011, "step": 267 }, { "epoch": 0.10896523683675544, "grad_norm": 0.10352573543787003, "learning_rate": 0.00018933441888866276, "loss": 1.0625, "step": 268 }, { "epoch": 0.10937182354137019, "grad_norm": 0.09499429166316986, "learning_rate": 0.0001892937105638103, "loss": 0.8775, "step": 269 }, { "epoch": 0.10977841024598496, "grad_norm": 0.10296636819839478, "learning_rate": 0.00018925300223895787, "loss": 0.985, "step": 270 }, { "epoch": 0.11018499695059972, "grad_norm": 0.10464894771575928, "learning_rate": 0.00018921229391410545, "loss": 1.0051, "step": 271 }, { "epoch": 0.11059158365521447, "grad_norm": 0.09429532289505005, "learning_rate": 0.000189171585589253, "loss": 0.9793, "step": 272 }, { "epoch": 0.11099817035982923, "grad_norm": 0.09751992672681808, "learning_rate": 0.00018913087726440058, "loss": 1.0756, "step": 273 }, { "epoch": 0.111404757064444, "grad_norm": 0.11418993026018143, "learning_rate": 0.00018909016893954814, "loss": 1.0742, "step": 274 }, { "epoch": 0.11181134376905875, "grad_norm": 0.10320629924535751, "learning_rate": 0.00018904946061469572, "loss": 1.036, "step": 275 }, { "epoch": 0.11221793047367351, "grad_norm": 0.09697311371564865, "learning_rate": 0.00018900875228984327, "loss": 1.0317, "step": 276 }, { "epoch": 0.11262451717828827, "grad_norm": 0.09579788893461227, "learning_rate": 0.00018896804396499085, "loss": 0.9621, "step": 277 }, { "epoch": 0.11303110388290302, "grad_norm": 0.09918879717588425, "learning_rate": 0.00018892733564013843, "loss": 1.0292, "step": 278 }, { "epoch": 0.11343769058751779, "grad_norm": 0.0923212468624115, "learning_rate": 0.000188886627315286, "loss": 1.0611, "step": 279 }, { "epoch": 0.11384427729213255, "grad_norm": 0.09480055421590805, "learning_rate": 0.00018884591899043357, "loss": 0.9809, "step": 280 }, { "epoch": 0.1142508639967473, "grad_norm": 0.09431526064872742, "learning_rate": 0.00018880521066558112, "loss": 1.0326, "step": 281 }, { "epoch": 0.11465745070136206, "grad_norm": 0.09080514311790466, "learning_rate": 0.00018876450234072868, "loss": 0.9115, "step": 282 }, { "epoch": 0.11506403740597683, "grad_norm": 0.10855970531702042, "learning_rate": 0.00018872379401587626, "loss": 1.0422, "step": 283 }, { "epoch": 0.11547062411059159, "grad_norm": 0.0941060334444046, "learning_rate": 0.0001886830856910238, "loss": 1.0352, "step": 284 }, { "epoch": 0.11587721081520634, "grad_norm": 0.08903583139181137, "learning_rate": 0.0001886423773661714, "loss": 0.964, "step": 285 }, { "epoch": 0.1162837975198211, "grad_norm": 0.08521820604801178, "learning_rate": 0.00018860166904131895, "loss": 0.917, "step": 286 }, { "epoch": 0.11669038422443587, "grad_norm": 0.1058691143989563, "learning_rate": 0.00018856096071646653, "loss": 1.0375, "step": 287 }, { "epoch": 0.11709697092905062, "grad_norm": 0.09435714781284332, "learning_rate": 0.0001885202523916141, "loss": 0.9766, "step": 288 }, { "epoch": 0.11750355763366538, "grad_norm": 0.09868729114532471, "learning_rate": 0.00018847954406676166, "loss": 1.1059, "step": 289 }, { "epoch": 0.11791014433828013, "grad_norm": 0.08855635672807693, "learning_rate": 0.00018843883574190924, "loss": 0.9424, "step": 290 }, { "epoch": 0.11831673104289489, "grad_norm": 0.09142837673425674, "learning_rate": 0.0001883981274170568, "loss": 1.0425, "step": 291 }, { "epoch": 0.11872331774750966, "grad_norm": 0.0971277505159378, "learning_rate": 0.00018835741909220438, "loss": 1.108, "step": 292 }, { "epoch": 0.11912990445212442, "grad_norm": 0.09940122812986374, "learning_rate": 0.00018831671076735193, "loss": 1.0172, "step": 293 }, { "epoch": 0.11953649115673917, "grad_norm": 0.10263317078351974, "learning_rate": 0.0001882760024424995, "loss": 1.0956, "step": 294 }, { "epoch": 0.11994307786135393, "grad_norm": 0.1092846542596817, "learning_rate": 0.00018823529411764707, "loss": 0.9454, "step": 295 }, { "epoch": 0.1203496645659687, "grad_norm": 0.10364726930856705, "learning_rate": 0.00018819458579279462, "loss": 0.8884, "step": 296 }, { "epoch": 0.12075625127058345, "grad_norm": 0.0889100730419159, "learning_rate": 0.0001881538774679422, "loss": 0.9922, "step": 297 }, { "epoch": 0.12116283797519821, "grad_norm": 0.09209653735160828, "learning_rate": 0.00018811316914308976, "loss": 0.977, "step": 298 }, { "epoch": 0.12156942467981297, "grad_norm": 0.11542046815156937, "learning_rate": 0.00018807246081823734, "loss": 1.0694, "step": 299 }, { "epoch": 0.12197601138442773, "grad_norm": 0.10896503180265427, "learning_rate": 0.00018803175249338492, "loss": 1.0508, "step": 300 }, { "epoch": 0.12238259808904249, "grad_norm": 0.09302002936601639, "learning_rate": 0.00018799104416853248, "loss": 1.0512, "step": 301 }, { "epoch": 0.12278918479365725, "grad_norm": 0.09081271290779114, "learning_rate": 0.00018795033584368006, "loss": 0.9688, "step": 302 }, { "epoch": 0.123195771498272, "grad_norm": 0.1059931218624115, "learning_rate": 0.0001879096275188276, "loss": 1.0483, "step": 303 }, { "epoch": 0.12360235820288677, "grad_norm": 0.1018669605255127, "learning_rate": 0.0001878689191939752, "loss": 1.019, "step": 304 }, { "epoch": 0.12400894490750153, "grad_norm": 0.1040007546544075, "learning_rate": 0.00018782821086912275, "loss": 1.037, "step": 305 }, { "epoch": 0.12441553161211628, "grad_norm": 0.10204601287841797, "learning_rate": 0.0001877875025442703, "loss": 0.9816, "step": 306 }, { "epoch": 0.12482211831673104, "grad_norm": 0.10591764748096466, "learning_rate": 0.00018774679421941788, "loss": 1.0939, "step": 307 }, { "epoch": 0.1252287050213458, "grad_norm": 0.09306305646896362, "learning_rate": 0.00018770608589456544, "loss": 1.0476, "step": 308 }, { "epoch": 0.12563529172596055, "grad_norm": 11.22681713104248, "learning_rate": 0.00018766537756971302, "loss": 1.0573, "step": 309 }, { "epoch": 0.12604187843057532, "grad_norm": 0.09422402083873749, "learning_rate": 0.00018762466924486057, "loss": 0.9993, "step": 310 }, { "epoch": 0.1264484651351901, "grad_norm": 0.0982229933142662, "learning_rate": 0.00018758396092000815, "loss": 0.9159, "step": 311 }, { "epoch": 0.12685505183980483, "grad_norm": 0.12579265236854553, "learning_rate": 0.00018754325259515573, "loss": 1.0935, "step": 312 }, { "epoch": 0.1272616385444196, "grad_norm": 0.10069390386343002, "learning_rate": 0.0001875025442703033, "loss": 1.0127, "step": 313 }, { "epoch": 0.12766822524903434, "grad_norm": 0.10948827862739563, "learning_rate": 0.00018746183594545087, "loss": 1.0576, "step": 314 }, { "epoch": 0.12807481195364911, "grad_norm": 0.09232445061206818, "learning_rate": 0.00018742112762059842, "loss": 0.9856, "step": 315 }, { "epoch": 0.12848139865826388, "grad_norm": 0.08319563418626785, "learning_rate": 0.000187380419295746, "loss": 0.9172, "step": 316 }, { "epoch": 0.12888798536287863, "grad_norm": 0.09697309136390686, "learning_rate": 0.00018733971097089356, "loss": 1.0567, "step": 317 }, { "epoch": 0.1292945720674934, "grad_norm": 0.09254255145788193, "learning_rate": 0.0001872990026460411, "loss": 1.0177, "step": 318 }, { "epoch": 0.12970115877210814, "grad_norm": 0.09254108369350433, "learning_rate": 0.0001872582943211887, "loss": 1.0079, "step": 319 }, { "epoch": 0.1301077454767229, "grad_norm": 0.09095866233110428, "learning_rate": 0.00018721758599633625, "loss": 1.0633, "step": 320 }, { "epoch": 0.13051433218133768, "grad_norm": 0.09073010087013245, "learning_rate": 0.00018717687767148383, "loss": 0.9059, "step": 321 }, { "epoch": 0.13092091888595242, "grad_norm": 0.09842764586210251, "learning_rate": 0.00018713616934663138, "loss": 1.0766, "step": 322 }, { "epoch": 0.1313275055905672, "grad_norm": 0.09325529634952545, "learning_rate": 0.00018709546102177896, "loss": 1.066, "step": 323 }, { "epoch": 0.13173409229518196, "grad_norm": 0.09692969918251038, "learning_rate": 0.00018705475269692654, "loss": 0.9743, "step": 324 }, { "epoch": 0.1321406789997967, "grad_norm": 0.09432708472013474, "learning_rate": 0.0001870140443720741, "loss": 1.0141, "step": 325 }, { "epoch": 0.13254726570441147, "grad_norm": 0.09226994961500168, "learning_rate": 0.00018697333604722168, "loss": 0.9837, "step": 326 }, { "epoch": 0.1329538524090262, "grad_norm": 0.10843974351882935, "learning_rate": 0.00018693262772236923, "loss": 1.0248, "step": 327 }, { "epoch": 0.13336043911364098, "grad_norm": 0.09324774891138077, "learning_rate": 0.00018689191939751681, "loss": 1.0642, "step": 328 }, { "epoch": 0.13376702581825575, "grad_norm": 0.08934729546308517, "learning_rate": 0.00018685121107266437, "loss": 0.9792, "step": 329 }, { "epoch": 0.1341736125228705, "grad_norm": 0.09125274419784546, "learning_rate": 0.00018681050274781192, "loss": 1.0093, "step": 330 }, { "epoch": 0.13458019922748526, "grad_norm": 0.09645108133554459, "learning_rate": 0.0001867697944229595, "loss": 0.9503, "step": 331 }, { "epoch": 0.13498678593210003, "grad_norm": 0.09900861978530884, "learning_rate": 0.00018672908609810706, "loss": 0.9966, "step": 332 }, { "epoch": 0.13539337263671478, "grad_norm": 0.09018311649560928, "learning_rate": 0.00018668837777325464, "loss": 0.965, "step": 333 }, { "epoch": 0.13579995934132955, "grad_norm": 0.10296136885881424, "learning_rate": 0.00018664766944840222, "loss": 1.1011, "step": 334 }, { "epoch": 0.1362065460459443, "grad_norm": 0.09104129672050476, "learning_rate": 0.00018660696112354977, "loss": 0.9814, "step": 335 }, { "epoch": 0.13661313275055906, "grad_norm": 0.09881450235843658, "learning_rate": 0.00018656625279869736, "loss": 1.0989, "step": 336 }, { "epoch": 0.13701971945517383, "grad_norm": 0.09691241383552551, "learning_rate": 0.0001865255444738449, "loss": 1.0967, "step": 337 }, { "epoch": 0.13742630615978857, "grad_norm": 0.10152243077754974, "learning_rate": 0.0001864848361489925, "loss": 1.0951, "step": 338 }, { "epoch": 0.13783289286440334, "grad_norm": 0.10802541673183441, "learning_rate": 0.00018644412782414005, "loss": 0.8742, "step": 339 }, { "epoch": 0.13823947956901808, "grad_norm": 0.09942565858364105, "learning_rate": 0.0001864034194992876, "loss": 0.9961, "step": 340 }, { "epoch": 0.13864606627363285, "grad_norm": 0.08618199825286865, "learning_rate": 0.00018636271117443518, "loss": 0.9645, "step": 341 }, { "epoch": 0.13905265297824762, "grad_norm": 0.1056099608540535, "learning_rate": 0.00018632200284958273, "loss": 0.9885, "step": 342 }, { "epoch": 0.13945923968286236, "grad_norm": 0.08862382173538208, "learning_rate": 0.00018628129452473032, "loss": 0.9316, "step": 343 }, { "epoch": 0.13986582638747713, "grad_norm": 0.09923135489225388, "learning_rate": 0.00018624058619987787, "loss": 0.9959, "step": 344 }, { "epoch": 0.1402724130920919, "grad_norm": 0.09120538830757141, "learning_rate": 0.00018619987787502545, "loss": 0.968, "step": 345 }, { "epoch": 0.14067899979670664, "grad_norm": 0.09669141471385956, "learning_rate": 0.00018615916955017303, "loss": 1.085, "step": 346 }, { "epoch": 0.1410855865013214, "grad_norm": 0.08598754554986954, "learning_rate": 0.00018611846122532059, "loss": 0.9504, "step": 347 }, { "epoch": 0.14149217320593616, "grad_norm": 0.09238371253013611, "learning_rate": 0.00018607775290046817, "loss": 0.9742, "step": 348 }, { "epoch": 0.14189875991055093, "grad_norm": 0.091258205473423, "learning_rate": 0.00018603704457561572, "loss": 0.9341, "step": 349 }, { "epoch": 0.1423053466151657, "grad_norm": 0.10129548609256744, "learning_rate": 0.0001859963362507633, "loss": 1.0814, "step": 350 }, { "epoch": 0.14271193331978044, "grad_norm": 0.09523019194602966, "learning_rate": 0.00018595562792591086, "loss": 0.9848, "step": 351 }, { "epoch": 0.1431185200243952, "grad_norm": 0.09485248476266861, "learning_rate": 0.0001859149196010584, "loss": 0.9828, "step": 352 }, { "epoch": 0.14352510672900995, "grad_norm": 0.09963666647672653, "learning_rate": 0.000185874211276206, "loss": 1.1075, "step": 353 }, { "epoch": 0.14393169343362472, "grad_norm": 0.09067155420780182, "learning_rate": 0.00018583350295135355, "loss": 0.971, "step": 354 }, { "epoch": 0.1443382801382395, "grad_norm": 0.09153544157743454, "learning_rate": 0.00018579279462650113, "loss": 0.9405, "step": 355 }, { "epoch": 0.14474486684285423, "grad_norm": 0.1024472787976265, "learning_rate": 0.00018575208630164868, "loss": 0.9967, "step": 356 }, { "epoch": 0.145151453547469, "grad_norm": 0.09804495424032211, "learning_rate": 0.00018571137797679626, "loss": 0.9578, "step": 357 }, { "epoch": 0.14555804025208377, "grad_norm": 0.099054716527462, "learning_rate": 0.00018567066965194384, "loss": 0.9999, "step": 358 }, { "epoch": 0.1459646269566985, "grad_norm": 0.09781336784362793, "learning_rate": 0.0001856299613270914, "loss": 1.09, "step": 359 }, { "epoch": 0.14637121366131328, "grad_norm": 0.08993211388587952, "learning_rate": 0.00018558925300223898, "loss": 1.0719, "step": 360 }, { "epoch": 0.14677780036592802, "grad_norm": 0.09146003425121307, "learning_rate": 0.00018554854467738653, "loss": 1.0008, "step": 361 }, { "epoch": 0.1471843870705428, "grad_norm": 0.09643495827913284, "learning_rate": 0.00018550783635253411, "loss": 1.0791, "step": 362 }, { "epoch": 0.14759097377515756, "grad_norm": 0.09078676998615265, "learning_rate": 0.00018546712802768167, "loss": 0.8641, "step": 363 }, { "epoch": 0.1479975604797723, "grad_norm": 0.08719085901975632, "learning_rate": 0.00018542641970282922, "loss": 0.985, "step": 364 }, { "epoch": 0.14840414718438708, "grad_norm": 0.09189736843109131, "learning_rate": 0.0001853857113779768, "loss": 0.9638, "step": 365 }, { "epoch": 0.14881073388900182, "grad_norm": 0.09381456673145294, "learning_rate": 0.00018534500305312436, "loss": 1.0036, "step": 366 }, { "epoch": 0.1492173205936166, "grad_norm": 0.0922684445977211, "learning_rate": 0.00018530429472827194, "loss": 1.0391, "step": 367 }, { "epoch": 0.14962390729823136, "grad_norm": 0.09465248882770538, "learning_rate": 0.0001852635864034195, "loss": 0.8874, "step": 368 }, { "epoch": 0.1500304940028461, "grad_norm": 0.0938408225774765, "learning_rate": 0.00018522287807856707, "loss": 1.0269, "step": 369 }, { "epoch": 0.15043708070746087, "grad_norm": 0.09377933293581009, "learning_rate": 0.00018518216975371466, "loss": 1.0142, "step": 370 }, { "epoch": 0.15084366741207564, "grad_norm": 0.1117277517914772, "learning_rate": 0.0001851414614288622, "loss": 1.0371, "step": 371 }, { "epoch": 0.15125025411669038, "grad_norm": 0.10293183475732803, "learning_rate": 0.0001851007531040098, "loss": 1.0, "step": 372 }, { "epoch": 0.15165684082130515, "grad_norm": 0.09216313809156418, "learning_rate": 0.00018506004477915734, "loss": 0.9703, "step": 373 }, { "epoch": 0.1520634275259199, "grad_norm": 0.09088669717311859, "learning_rate": 0.00018501933645430493, "loss": 0.8766, "step": 374 }, { "epoch": 0.15247001423053466, "grad_norm": 0.09916643798351288, "learning_rate": 0.00018497862812945248, "loss": 1.0958, "step": 375 }, { "epoch": 0.15287660093514943, "grad_norm": 0.08404985070228577, "learning_rate": 0.00018493791980460003, "loss": 0.9602, "step": 376 }, { "epoch": 0.15328318763976417, "grad_norm": 0.10011377185583115, "learning_rate": 0.00018489721147974762, "loss": 1.0377, "step": 377 }, { "epoch": 0.15368977434437894, "grad_norm": 0.09958089143037796, "learning_rate": 0.00018485650315489517, "loss": 1.0213, "step": 378 }, { "epoch": 0.15409636104899369, "grad_norm": 0.09488838911056519, "learning_rate": 0.00018481579483004275, "loss": 0.941, "step": 379 }, { "epoch": 0.15450294775360846, "grad_norm": 0.09099314361810684, "learning_rate": 0.00018477508650519033, "loss": 0.8913, "step": 380 }, { "epoch": 0.15490953445822322, "grad_norm": 0.0956854447722435, "learning_rate": 0.00018473437818033789, "loss": 1.1478, "step": 381 }, { "epoch": 0.15531612116283797, "grad_norm": 0.11225584149360657, "learning_rate": 0.00018469366985548547, "loss": 1.0795, "step": 382 }, { "epoch": 0.15572270786745274, "grad_norm": 0.11592987924814224, "learning_rate": 0.00018465296153063302, "loss": 1.0863, "step": 383 }, { "epoch": 0.1561292945720675, "grad_norm": 0.09232570976018906, "learning_rate": 0.0001846122532057806, "loss": 0.9551, "step": 384 }, { "epoch": 0.15653588127668225, "grad_norm": 0.08860056847333908, "learning_rate": 0.00018457154488092816, "loss": 1.0206, "step": 385 }, { "epoch": 0.15694246798129702, "grad_norm": 0.10788331180810928, "learning_rate": 0.00018453083655607574, "loss": 0.9378, "step": 386 }, { "epoch": 0.15734905468591176, "grad_norm": 0.10758615285158157, "learning_rate": 0.0001844901282312233, "loss": 1.1149, "step": 387 }, { "epoch": 0.15775564139052653, "grad_norm": 0.10551386326551437, "learning_rate": 0.00018444941990637085, "loss": 1.0729, "step": 388 }, { "epoch": 0.1581622280951413, "grad_norm": 0.08733198046684265, "learning_rate": 0.00018440871158151843, "loss": 1.0058, "step": 389 }, { "epoch": 0.15856881479975604, "grad_norm": 0.1095399409532547, "learning_rate": 0.00018436800325666598, "loss": 1.0566, "step": 390 }, { "epoch": 0.1589754015043708, "grad_norm": 0.12356330454349518, "learning_rate": 0.00018432729493181356, "loss": 1.0173, "step": 391 }, { "epoch": 0.15938198820898555, "grad_norm": 0.09934639930725098, "learning_rate": 0.00018428658660696114, "loss": 1.1237, "step": 392 }, { "epoch": 0.15978857491360032, "grad_norm": 0.09402013570070267, "learning_rate": 0.0001842458782821087, "loss": 1.0018, "step": 393 }, { "epoch": 0.1601951616182151, "grad_norm": 0.10511749237775803, "learning_rate": 0.00018420516995725628, "loss": 0.9844, "step": 394 }, { "epoch": 0.16060174832282983, "grad_norm": 0.11193688213825226, "learning_rate": 0.00018416446163240383, "loss": 0.9888, "step": 395 }, { "epoch": 0.1610083350274446, "grad_norm": 0.09895443916320801, "learning_rate": 0.00018412375330755141, "loss": 1.1045, "step": 396 }, { "epoch": 0.16141492173205937, "grad_norm": 0.09660319238901138, "learning_rate": 0.00018408304498269897, "loss": 1.0457, "step": 397 }, { "epoch": 0.16182150843667412, "grad_norm": 0.1339186728000641, "learning_rate": 0.00018404233665784655, "loss": 1.1266, "step": 398 }, { "epoch": 0.16222809514128889, "grad_norm": 0.1154564693570137, "learning_rate": 0.0001840016283329941, "loss": 1.0299, "step": 399 }, { "epoch": 0.16263468184590363, "grad_norm": 0.09698904305696487, "learning_rate": 0.00018396092000814166, "loss": 1.1101, "step": 400 }, { "epoch": 0.1630412685505184, "grad_norm": 0.09455164521932602, "learning_rate": 0.00018392021168328924, "loss": 0.9928, "step": 401 }, { "epoch": 0.16344785525513317, "grad_norm": 0.09728690981864929, "learning_rate": 0.0001838795033584368, "loss": 1.0603, "step": 402 }, { "epoch": 0.1638544419597479, "grad_norm": 0.10577269643545151, "learning_rate": 0.0001838387950335844, "loss": 0.9922, "step": 403 }, { "epoch": 0.16426102866436268, "grad_norm": 0.08850935101509094, "learning_rate": 0.00018379808670873196, "loss": 0.9758, "step": 404 }, { "epoch": 0.16466761536897742, "grad_norm": 0.09496256709098816, "learning_rate": 0.0001837573783838795, "loss": 1.0949, "step": 405 }, { "epoch": 0.1650742020735922, "grad_norm": 0.09768050909042358, "learning_rate": 0.0001837166700590271, "loss": 1.0054, "step": 406 }, { "epoch": 0.16548078877820696, "grad_norm": 0.09913921356201172, "learning_rate": 0.00018367596173417464, "loss": 1.0272, "step": 407 }, { "epoch": 0.1658873754828217, "grad_norm": 0.0901927724480629, "learning_rate": 0.00018363525340932223, "loss": 1.0264, "step": 408 }, { "epoch": 0.16629396218743647, "grad_norm": 0.09796515852212906, "learning_rate": 0.00018359454508446978, "loss": 1.0338, "step": 409 }, { "epoch": 0.16670054889205124, "grad_norm": 0.1018638014793396, "learning_rate": 0.00018355383675961736, "loss": 1.0409, "step": 410 }, { "epoch": 0.16710713559666598, "grad_norm": 0.10666611790657043, "learning_rate": 0.00018351312843476492, "loss": 1.0924, "step": 411 }, { "epoch": 0.16751372230128075, "grad_norm": 0.0986141785979271, "learning_rate": 0.00018347242010991247, "loss": 0.9468, "step": 412 }, { "epoch": 0.1679203090058955, "grad_norm": 0.09429168701171875, "learning_rate": 0.00018343171178506005, "loss": 0.9706, "step": 413 }, { "epoch": 0.16832689571051027, "grad_norm": 0.09704872965812683, "learning_rate": 0.0001833910034602076, "loss": 1.0692, "step": 414 }, { "epoch": 0.16873348241512504, "grad_norm": 0.0980519950389862, "learning_rate": 0.00018335029513535519, "loss": 1.0218, "step": 415 }, { "epoch": 0.16914006911973978, "grad_norm": 0.08980212360620499, "learning_rate": 0.00018330958681050277, "loss": 0.9243, "step": 416 }, { "epoch": 0.16954665582435455, "grad_norm": 0.09630506485700607, "learning_rate": 0.00018326887848565032, "loss": 0.9599, "step": 417 }, { "epoch": 0.1699532425289693, "grad_norm": 0.08608522266149521, "learning_rate": 0.0001832281701607979, "loss": 0.9577, "step": 418 }, { "epoch": 0.17035982923358406, "grad_norm": 0.09151248633861542, "learning_rate": 0.00018318746183594546, "loss": 0.9956, "step": 419 }, { "epoch": 0.17076641593819883, "grad_norm": 0.09689094871282578, "learning_rate": 0.00018314675351109304, "loss": 1.0999, "step": 420 }, { "epoch": 0.17117300264281357, "grad_norm": 0.09316612035036087, "learning_rate": 0.0001831060451862406, "loss": 0.8572, "step": 421 }, { "epoch": 0.17157958934742834, "grad_norm": 0.11449979990720749, "learning_rate": 0.00018306533686138817, "loss": 1.0328, "step": 422 }, { "epoch": 0.1719861760520431, "grad_norm": 0.10802194476127625, "learning_rate": 0.00018302462853653573, "loss": 0.9785, "step": 423 }, { "epoch": 0.17239276275665785, "grad_norm": 0.09997294098138809, "learning_rate": 0.00018298392021168328, "loss": 0.9778, "step": 424 }, { "epoch": 0.17279934946127262, "grad_norm": 0.10244690626859665, "learning_rate": 0.00018294321188683086, "loss": 1.0874, "step": 425 }, { "epoch": 0.17320593616588736, "grad_norm": 0.10659472644329071, "learning_rate": 0.00018290250356197844, "loss": 1.0196, "step": 426 }, { "epoch": 0.17361252287050213, "grad_norm": 0.09812036156654358, "learning_rate": 0.000182861795237126, "loss": 0.9051, "step": 427 }, { "epoch": 0.1740191095751169, "grad_norm": 0.845235288143158, "learning_rate": 0.00018282108691227358, "loss": 1.0531, "step": 428 }, { "epoch": 0.17442569627973165, "grad_norm": 0.109995998442173, "learning_rate": 0.00018278037858742113, "loss": 1.001, "step": 429 }, { "epoch": 0.17483228298434642, "grad_norm": 0.12578758597373962, "learning_rate": 0.00018273967026256871, "loss": 0.9513, "step": 430 }, { "epoch": 0.17523886968896116, "grad_norm": 0.1585826873779297, "learning_rate": 0.00018269896193771627, "loss": 1.0091, "step": 431 }, { "epoch": 0.17564545639357593, "grad_norm": 0.15150819718837738, "learning_rate": 0.00018265825361286385, "loss": 1.1045, "step": 432 }, { "epoch": 0.1760520430981907, "grad_norm": 0.1110219806432724, "learning_rate": 0.0001826175452880114, "loss": 0.9877, "step": 433 }, { "epoch": 0.17645862980280544, "grad_norm": 0.11296675354242325, "learning_rate": 0.00018257683696315896, "loss": 1.1317, "step": 434 }, { "epoch": 0.1768652165074202, "grad_norm": 0.11464451253414154, "learning_rate": 0.00018253612863830654, "loss": 0.9485, "step": 435 }, { "epoch": 0.17727180321203498, "grad_norm": 0.08836513012647629, "learning_rate": 0.0001824954203134541, "loss": 0.8667, "step": 436 }, { "epoch": 0.17767838991664972, "grad_norm": 0.10697431862354279, "learning_rate": 0.00018245471198860167, "loss": 1.0692, "step": 437 }, { "epoch": 0.1780849766212645, "grad_norm": 0.10565032064914703, "learning_rate": 0.00018241400366374925, "loss": 1.0723, "step": 438 }, { "epoch": 0.17849156332587923, "grad_norm": 0.11343531310558319, "learning_rate": 0.0001823732953388968, "loss": 1.1038, "step": 439 }, { "epoch": 0.178898150030494, "grad_norm": 0.10002034902572632, "learning_rate": 0.0001823325870140444, "loss": 0.9859, "step": 440 }, { "epoch": 0.17930473673510877, "grad_norm": 0.10602378845214844, "learning_rate": 0.00018229187868919194, "loss": 1.1091, "step": 441 }, { "epoch": 0.1797113234397235, "grad_norm": 0.09775001555681229, "learning_rate": 0.00018225117036433953, "loss": 1.0473, "step": 442 }, { "epoch": 0.18011791014433828, "grad_norm": 0.09872320294380188, "learning_rate": 0.00018221046203948708, "loss": 1.0657, "step": 443 }, { "epoch": 0.18052449684895303, "grad_norm": 0.0893816128373146, "learning_rate": 0.00018216975371463466, "loss": 0.915, "step": 444 }, { "epoch": 0.1809310835535678, "grad_norm": 0.09870447218418121, "learning_rate": 0.00018212904538978221, "loss": 0.8847, "step": 445 }, { "epoch": 0.18133767025818257, "grad_norm": 0.09775330871343613, "learning_rate": 0.00018208833706492977, "loss": 0.841, "step": 446 }, { "epoch": 0.1817442569627973, "grad_norm": 0.10025996714830399, "learning_rate": 0.00018204762874007735, "loss": 0.9965, "step": 447 }, { "epoch": 0.18215084366741208, "grad_norm": 0.09369905292987823, "learning_rate": 0.0001820069204152249, "loss": 0.9998, "step": 448 }, { "epoch": 0.18255743037202685, "grad_norm": 0.09244808554649353, "learning_rate": 0.0001819662120903725, "loss": 0.9938, "step": 449 }, { "epoch": 0.1829640170766416, "grad_norm": 0.12163155525922775, "learning_rate": 0.00018192550376552007, "loss": 1.1384, "step": 450 }, { "epoch": 0.18337060378125636, "grad_norm": 0.08755457401275635, "learning_rate": 0.00018188479544066762, "loss": 0.9002, "step": 451 }, { "epoch": 0.1837771904858711, "grad_norm": 0.0917607769370079, "learning_rate": 0.0001818440871158152, "loss": 0.9874, "step": 452 }, { "epoch": 0.18418377719048587, "grad_norm": 0.09113719314336777, "learning_rate": 0.00018180337879096276, "loss": 1.0187, "step": 453 }, { "epoch": 0.18459036389510064, "grad_norm": 0.08795943111181259, "learning_rate": 0.00018176267046611034, "loss": 0.902, "step": 454 }, { "epoch": 0.18499695059971538, "grad_norm": 0.1016731783747673, "learning_rate": 0.0001817219621412579, "loss": 0.9933, "step": 455 }, { "epoch": 0.18540353730433015, "grad_norm": 0.09413068741559982, "learning_rate": 0.00018168125381640547, "loss": 0.9448, "step": 456 }, { "epoch": 0.18581012400894492, "grad_norm": 0.10015012323856354, "learning_rate": 0.00018164054549155303, "loss": 1.1458, "step": 457 }, { "epoch": 0.18621671071355966, "grad_norm": 0.09086768329143524, "learning_rate": 0.00018159983716670058, "loss": 1.0543, "step": 458 }, { "epoch": 0.18662329741817443, "grad_norm": 0.10910352319478989, "learning_rate": 0.00018155912884184816, "loss": 1.0078, "step": 459 }, { "epoch": 0.18702988412278918, "grad_norm": 0.09674135595560074, "learning_rate": 0.00018151842051699572, "loss": 0.9758, "step": 460 }, { "epoch": 0.18743647082740394, "grad_norm": 0.09108126163482666, "learning_rate": 0.00018147771219214332, "loss": 1.0038, "step": 461 }, { "epoch": 0.18784305753201871, "grad_norm": 0.09710326045751572, "learning_rate": 0.00018143700386729088, "loss": 0.9693, "step": 462 }, { "epoch": 0.18824964423663346, "grad_norm": 0.10069318860769272, "learning_rate": 0.00018139629554243843, "loss": 1.1005, "step": 463 }, { "epoch": 0.18865623094124823, "grad_norm": 0.09434141218662262, "learning_rate": 0.000181355587217586, "loss": 1.0359, "step": 464 }, { "epoch": 0.18906281764586297, "grad_norm": 0.09208261221647263, "learning_rate": 0.00018131487889273357, "loss": 1.0374, "step": 465 }, { "epoch": 0.18946940435047774, "grad_norm": 0.09581121802330017, "learning_rate": 0.00018127417056788115, "loss": 1.0267, "step": 466 }, { "epoch": 0.1898759910550925, "grad_norm": 0.09809669107198715, "learning_rate": 0.0001812334622430287, "loss": 1.0652, "step": 467 }, { "epoch": 0.19028257775970725, "grad_norm": 0.08496394008398056, "learning_rate": 0.00018119275391817628, "loss": 0.9468, "step": 468 }, { "epoch": 0.19068916446432202, "grad_norm": 0.09247399121522903, "learning_rate": 0.00018115204559332384, "loss": 1.0247, "step": 469 }, { "epoch": 0.1910957511689368, "grad_norm": 0.10010971128940582, "learning_rate": 0.0001811113372684714, "loss": 0.9674, "step": 470 }, { "epoch": 0.19150233787355153, "grad_norm": 0.09562191367149353, "learning_rate": 0.00018107062894361897, "loss": 0.9819, "step": 471 }, { "epoch": 0.1919089245781663, "grad_norm": 0.09223975241184235, "learning_rate": 0.00018102992061876655, "loss": 1.0051, "step": 472 }, { "epoch": 0.19231551128278104, "grad_norm": 0.09564565122127533, "learning_rate": 0.00018098921229391414, "loss": 0.908, "step": 473 }, { "epoch": 0.1927220979873958, "grad_norm": 0.09371364116668701, "learning_rate": 0.0001809485039690617, "loss": 1.0195, "step": 474 }, { "epoch": 0.19312868469201058, "grad_norm": 0.0895533412694931, "learning_rate": 0.00018090779564420924, "loss": 0.8912, "step": 475 }, { "epoch": 0.19353527139662532, "grad_norm": 0.08874888718128204, "learning_rate": 0.00018086708731935682, "loss": 0.9941, "step": 476 }, { "epoch": 0.1939418581012401, "grad_norm": 8989.1748046875, "learning_rate": 0.00018082637899450438, "loss": 1.0191, "step": 477 }, { "epoch": 0.19434844480585484, "grad_norm": 0.09893982112407684, "learning_rate": 0.00018078567066965196, "loss": 1.1682, "step": 478 }, { "epoch": 0.1947550315104696, "grad_norm": 0.09100797772407532, "learning_rate": 0.00018074496234479951, "loss": 0.9466, "step": 479 }, { "epoch": 0.19516161821508438, "grad_norm": 0.10540256649255753, "learning_rate": 0.0001807042540199471, "loss": 1.0735, "step": 480 }, { "epoch": 0.19556820491969912, "grad_norm": 0.09110235422849655, "learning_rate": 0.00018066354569509465, "loss": 1.0097, "step": 481 }, { "epoch": 0.1959747916243139, "grad_norm": 0.10651825368404388, "learning_rate": 0.0001806228373702422, "loss": 1.014, "step": 482 }, { "epoch": 0.19638137832892866, "grad_norm": 0.08685674518346786, "learning_rate": 0.00018058212904538978, "loss": 0.9755, "step": 483 }, { "epoch": 0.1967879650335434, "grad_norm": 0.10092045366764069, "learning_rate": 0.00018054142072053737, "loss": 0.9397, "step": 484 }, { "epoch": 0.19719455173815817, "grad_norm": 0.1056622639298439, "learning_rate": 0.00018050071239568495, "loss": 0.9864, "step": 485 }, { "epoch": 0.1976011384427729, "grad_norm": 0.10525202006101608, "learning_rate": 0.0001804600040708325, "loss": 1.1085, "step": 486 }, { "epoch": 0.19800772514738768, "grad_norm": 0.10073073953390121, "learning_rate": 0.00018041929574598006, "loss": 1.1264, "step": 487 }, { "epoch": 0.19841431185200245, "grad_norm": 0.09659091383218765, "learning_rate": 0.00018037858742112764, "loss": 0.9848, "step": 488 }, { "epoch": 0.1988208985566172, "grad_norm": 0.09986629337072372, "learning_rate": 0.0001803378790962752, "loss": 1.0732, "step": 489 }, { "epoch": 0.19922748526123196, "grad_norm": 0.11215290427207947, "learning_rate": 0.00018029717077142277, "loss": 1.1259, "step": 490 }, { "epoch": 0.1996340719658467, "grad_norm": 0.11136343330144882, "learning_rate": 0.00018025646244657033, "loss": 1.0857, "step": 491 }, { "epoch": 0.20004065867046147, "grad_norm": 0.10452030599117279, "learning_rate": 0.0001802157541217179, "loss": 0.9997, "step": 492 }, { "epoch": 0.20044724537507624, "grad_norm": 0.10394178330898285, "learning_rate": 0.00018017504579686546, "loss": 1.0852, "step": 493 }, { "epoch": 0.20085383207969099, "grad_norm": 0.10206598043441772, "learning_rate": 0.00018013433747201302, "loss": 0.9629, "step": 494 }, { "epoch": 0.20126041878430576, "grad_norm": 0.09365608543157578, "learning_rate": 0.00018009362914716062, "loss": 0.9504, "step": 495 }, { "epoch": 0.20166700548892053, "grad_norm": 0.09425178170204163, "learning_rate": 0.00018005292082230818, "loss": 1.0038, "step": 496 }, { "epoch": 0.20207359219353527, "grad_norm": 0.09562011808156967, "learning_rate": 0.00018001221249745576, "loss": 1.0877, "step": 497 }, { "epoch": 0.20248017889815004, "grad_norm": 0.11452426016330719, "learning_rate": 0.0001799715041726033, "loss": 1.0688, "step": 498 }, { "epoch": 0.20288676560276478, "grad_norm": 0.0930696651339531, "learning_rate": 0.00017993079584775087, "loss": 1.0255, "step": 499 }, { "epoch": 0.20329335230737955, "grad_norm": 0.10522327572107315, "learning_rate": 0.00017989008752289845, "loss": 1.085, "step": 500 }, { "epoch": 0.20369993901199432, "grad_norm": 0.08499190211296082, "learning_rate": 0.000179849379198046, "loss": 0.9235, "step": 501 }, { "epoch": 0.20410652571660906, "grad_norm": 0.09169955551624298, "learning_rate": 0.00017980867087319358, "loss": 0.9836, "step": 502 }, { "epoch": 0.20451311242122383, "grad_norm": 0.10331466048955917, "learning_rate": 0.00017976796254834114, "loss": 1.0255, "step": 503 }, { "epoch": 0.20491969912583857, "grad_norm": 0.0900363028049469, "learning_rate": 0.00017972725422348872, "loss": 0.9691, "step": 504 }, { "epoch": 0.20532628583045334, "grad_norm": 0.10095544904470444, "learning_rate": 0.00017968654589863627, "loss": 1.0289, "step": 505 }, { "epoch": 0.2057328725350681, "grad_norm": 0.0992627814412117, "learning_rate": 0.00017964583757378383, "loss": 0.9785, "step": 506 }, { "epoch": 0.20613945923968285, "grad_norm": 0.0954422652721405, "learning_rate": 0.00017960512924893144, "loss": 1.0105, "step": 507 }, { "epoch": 0.20654604594429762, "grad_norm": 0.0994410440325737, "learning_rate": 0.000179564420924079, "loss": 1.0894, "step": 508 }, { "epoch": 0.2069526326489124, "grad_norm": 0.08866444230079651, "learning_rate": 0.00017952371259922654, "loss": 0.9725, "step": 509 }, { "epoch": 0.20735921935352714, "grad_norm": 0.09361348301172256, "learning_rate": 0.00017948300427437412, "loss": 1.0441, "step": 510 }, { "epoch": 0.2077658060581419, "grad_norm": 0.08215323090553284, "learning_rate": 0.00017944229594952168, "loss": 0.9214, "step": 511 }, { "epoch": 0.20817239276275665, "grad_norm": 0.09752262383699417, "learning_rate": 0.00017940158762466926, "loss": 0.9456, "step": 512 }, { "epoch": 0.20857897946737142, "grad_norm": 0.10021419823169708, "learning_rate": 0.00017936087929981681, "loss": 1.1158, "step": 513 }, { "epoch": 0.2089855661719862, "grad_norm": 0.09550227969884872, "learning_rate": 0.0001793201709749644, "loss": 0.9789, "step": 514 }, { "epoch": 0.20939215287660093, "grad_norm": 0.09059977531433105, "learning_rate": 0.00017927946265011195, "loss": 0.9649, "step": 515 }, { "epoch": 0.2097987395812157, "grad_norm": 0.09227627515792847, "learning_rate": 0.00017923875432525953, "loss": 0.9779, "step": 516 }, { "epoch": 0.21020532628583044, "grad_norm": 0.09919798374176025, "learning_rate": 0.00017919804600040708, "loss": 1.0155, "step": 517 }, { "epoch": 0.2106119129904452, "grad_norm": 0.09044051915407181, "learning_rate": 0.00017915733767555464, "loss": 0.9428, "step": 518 }, { "epoch": 0.21101849969505998, "grad_norm": 0.09017504006624222, "learning_rate": 0.00017911662935070225, "loss": 0.9244, "step": 519 }, { "epoch": 0.21142508639967472, "grad_norm": 0.09257036447525024, "learning_rate": 0.0001790759210258498, "loss": 1.0168, "step": 520 }, { "epoch": 0.2118316731042895, "grad_norm": 0.0926235020160675, "learning_rate": 0.00017903521270099735, "loss": 0.9363, "step": 521 }, { "epoch": 0.21223825980890426, "grad_norm": 0.08785069733858109, "learning_rate": 0.00017899450437614494, "loss": 0.9428, "step": 522 }, { "epoch": 0.212644846513519, "grad_norm": 0.09824348986148834, "learning_rate": 0.0001789537960512925, "loss": 1.0378, "step": 523 }, { "epoch": 0.21305143321813377, "grad_norm": 0.0915142148733139, "learning_rate": 0.00017891308772644007, "loss": 0.9603, "step": 524 }, { "epoch": 0.21345801992274852, "grad_norm": 0.09466978907585144, "learning_rate": 0.00017887237940158763, "loss": 1.013, "step": 525 }, { "epoch": 0.21386460662736329, "grad_norm": 0.09305880963802338, "learning_rate": 0.0001788316710767352, "loss": 0.9386, "step": 526 }, { "epoch": 0.21427119333197805, "grad_norm": 0.09210691601037979, "learning_rate": 0.00017879096275188276, "loss": 0.9797, "step": 527 }, { "epoch": 0.2146777800365928, "grad_norm": 0.10415366291999817, "learning_rate": 0.00017875025442703031, "loss": 1.0125, "step": 528 }, { "epoch": 0.21508436674120757, "grad_norm": 0.10259640216827393, "learning_rate": 0.0001787095461021779, "loss": 1.0473, "step": 529 }, { "epoch": 0.2154909534458223, "grad_norm": 0.09523239731788635, "learning_rate": 0.00017866883777732548, "loss": 0.9603, "step": 530 }, { "epoch": 0.21589754015043708, "grad_norm": 0.10005185008049011, "learning_rate": 0.00017862812945247306, "loss": 1.0768, "step": 531 }, { "epoch": 0.21630412685505185, "grad_norm": 0.09643250703811646, "learning_rate": 0.0001785874211276206, "loss": 1.0799, "step": 532 }, { "epoch": 0.2167107135596666, "grad_norm": 0.09473159909248352, "learning_rate": 0.00017854671280276817, "loss": 1.0657, "step": 533 }, { "epoch": 0.21711730026428136, "grad_norm": 0.09550385922193527, "learning_rate": 0.00017850600447791575, "loss": 1.0389, "step": 534 }, { "epoch": 0.21752388696889613, "grad_norm": 0.09414463490247726, "learning_rate": 0.0001784652961530633, "loss": 1.0317, "step": 535 }, { "epoch": 0.21793047367351087, "grad_norm": 0.090250164270401, "learning_rate": 0.00017842458782821088, "loss": 1.0212, "step": 536 }, { "epoch": 0.21833706037812564, "grad_norm": 0.09635050594806671, "learning_rate": 0.00017838387950335844, "loss": 0.9473, "step": 537 }, { "epoch": 0.21874364708274038, "grad_norm": 0.0985347330570221, "learning_rate": 0.00017834317117850602, "loss": 1.1372, "step": 538 }, { "epoch": 0.21915023378735515, "grad_norm": 0.09789203107357025, "learning_rate": 0.00017830246285365357, "loss": 1.0369, "step": 539 }, { "epoch": 0.21955682049196992, "grad_norm": 0.09777568280696869, "learning_rate": 0.00017826175452880113, "loss": 1.0746, "step": 540 }, { "epoch": 0.21996340719658466, "grad_norm": 0.09013503789901733, "learning_rate": 0.0001782210462039487, "loss": 1.0124, "step": 541 }, { "epoch": 0.22036999390119943, "grad_norm": 0.10604355484247208, "learning_rate": 0.0001781803378790963, "loss": 1.0158, "step": 542 }, { "epoch": 0.22077658060581418, "grad_norm": 0.09194648265838623, "learning_rate": 0.00017813962955424387, "loss": 0.9544, "step": 543 }, { "epoch": 0.22118316731042895, "grad_norm": 0.09223110228776932, "learning_rate": 0.00017809892122939142, "loss": 1.0094, "step": 544 }, { "epoch": 0.22158975401504372, "grad_norm": 0.09049870073795319, "learning_rate": 0.00017805821290453898, "loss": 0.8829, "step": 545 }, { "epoch": 0.22199634071965846, "grad_norm": 0.10157813131809235, "learning_rate": 0.00017801750457968656, "loss": 1.0904, "step": 546 }, { "epoch": 0.22240292742427323, "grad_norm": 0.09934356063604355, "learning_rate": 0.0001779767962548341, "loss": 1.0708, "step": 547 }, { "epoch": 0.222809514128888, "grad_norm": 0.09037156403064728, "learning_rate": 0.0001779360879299817, "loss": 0.916, "step": 548 }, { "epoch": 0.22321610083350274, "grad_norm": 0.09347829967737198, "learning_rate": 0.00017789537960512925, "loss": 1.0328, "step": 549 }, { "epoch": 0.2236226875381175, "grad_norm": 0.087796151638031, "learning_rate": 0.00017785467128027683, "loss": 0.9961, "step": 550 }, { "epoch": 0.22402927424273225, "grad_norm": 0.09518422931432724, "learning_rate": 0.00017781396295542438, "loss": 0.9855, "step": 551 }, { "epoch": 0.22443586094734702, "grad_norm": 0.09606748074293137, "learning_rate": 0.00017777325463057194, "loss": 0.954, "step": 552 }, { "epoch": 0.2248424476519618, "grad_norm": 0.09338165074586868, "learning_rate": 0.00017773254630571955, "loss": 1.0876, "step": 553 }, { "epoch": 0.22524903435657653, "grad_norm": 0.09242440015077591, "learning_rate": 0.0001776918379808671, "loss": 0.9418, "step": 554 }, { "epoch": 0.2256556210611913, "grad_norm": 0.0990302637219429, "learning_rate": 0.00017765112965601468, "loss": 1.0641, "step": 555 }, { "epoch": 0.22606220776580604, "grad_norm": 0.09444238990545273, "learning_rate": 0.00017761042133116224, "loss": 1.0315, "step": 556 }, { "epoch": 0.22646879447042081, "grad_norm": 0.08771083503961563, "learning_rate": 0.0001775697130063098, "loss": 0.9898, "step": 557 }, { "epoch": 0.22687538117503558, "grad_norm": 0.10041147470474243, "learning_rate": 0.00017752900468145737, "loss": 1.0478, "step": 558 }, { "epoch": 0.22728196787965033, "grad_norm": 0.0933571383357048, "learning_rate": 0.00017748829635660492, "loss": 1.0002, "step": 559 }, { "epoch": 0.2276885545842651, "grad_norm": 0.0912991389632225, "learning_rate": 0.0001774475880317525, "loss": 1.0807, "step": 560 }, { "epoch": 0.22809514128887987, "grad_norm": 0.09350984543561935, "learning_rate": 0.00017740687970690006, "loss": 0.8962, "step": 561 }, { "epoch": 0.2285017279934946, "grad_norm": 0.0978541299700737, "learning_rate": 0.00017736617138204764, "loss": 1.0339, "step": 562 }, { "epoch": 0.22890831469810938, "grad_norm": 0.08964958041906357, "learning_rate": 0.0001773254630571952, "loss": 1.051, "step": 563 }, { "epoch": 0.22931490140272412, "grad_norm": 0.09241898357868195, "learning_rate": 0.00017728475473234275, "loss": 0.903, "step": 564 }, { "epoch": 0.2297214881073389, "grad_norm": 0.09366483986377716, "learning_rate": 0.00017724404640749036, "loss": 1.0055, "step": 565 }, { "epoch": 0.23012807481195366, "grad_norm": 0.10184673964977264, "learning_rate": 0.0001772033380826379, "loss": 1.004, "step": 566 }, { "epoch": 0.2305346615165684, "grad_norm": 0.09287306666374207, "learning_rate": 0.0001771626297577855, "loss": 0.9667, "step": 567 }, { "epoch": 0.23094124822118317, "grad_norm": 0.08905091136693954, "learning_rate": 0.00017712192143293305, "loss": 0.9295, "step": 568 }, { "epoch": 0.2313478349257979, "grad_norm": 0.0908786877989769, "learning_rate": 0.0001770812131080806, "loss": 0.8957, "step": 569 }, { "epoch": 0.23175442163041268, "grad_norm": 0.10284281522035599, "learning_rate": 0.00017704050478322818, "loss": 1.1311, "step": 570 }, { "epoch": 0.23216100833502745, "grad_norm": 0.09007006883621216, "learning_rate": 0.00017699979645837574, "loss": 0.9919, "step": 571 }, { "epoch": 0.2325675950396422, "grad_norm": 0.09025272727012634, "learning_rate": 0.00017695908813352332, "loss": 0.9057, "step": 572 }, { "epoch": 0.23297418174425696, "grad_norm": 0.0994710698723793, "learning_rate": 0.00017691837980867087, "loss": 1.1472, "step": 573 }, { "epoch": 0.23338076844887173, "grad_norm": 0.09117428958415985, "learning_rate": 0.00017687767148381845, "loss": 0.9665, "step": 574 }, { "epoch": 0.23378735515348648, "grad_norm": 0.0893009826540947, "learning_rate": 0.000176836963158966, "loss": 0.951, "step": 575 }, { "epoch": 0.23419394185810125, "grad_norm": 0.08649599552154541, "learning_rate": 0.0001767962548341136, "loss": 0.925, "step": 576 }, { "epoch": 0.234600528562716, "grad_norm": 0.0928448736667633, "learning_rate": 0.00017675554650926117, "loss": 0.9253, "step": 577 }, { "epoch": 0.23500711526733076, "grad_norm": 0.10335158556699753, "learning_rate": 0.00017671483818440872, "loss": 1.1171, "step": 578 }, { "epoch": 0.23541370197194553, "grad_norm": 0.09889842569828033, "learning_rate": 0.0001766741298595563, "loss": 1.0005, "step": 579 }, { "epoch": 0.23582028867656027, "grad_norm": 0.09655506163835526, "learning_rate": 0.00017663342153470386, "loss": 1.0273, "step": 580 }, { "epoch": 0.23622687538117504, "grad_norm": 0.09516560286283493, "learning_rate": 0.0001765927132098514, "loss": 1.024, "step": 581 }, { "epoch": 0.23663346208578978, "grad_norm": 0.10024843364953995, "learning_rate": 0.000176552004884999, "loss": 1.0299, "step": 582 }, { "epoch": 0.23704004879040455, "grad_norm": 0.10152596235275269, "learning_rate": 0.00017651129656014655, "loss": 0.9658, "step": 583 }, { "epoch": 0.23744663549501932, "grad_norm": 0.09654249995946884, "learning_rate": 0.00017647058823529413, "loss": 1.0722, "step": 584 }, { "epoch": 0.23785322219963406, "grad_norm": 0.09112072736024857, "learning_rate": 0.00017642987991044168, "loss": 0.9846, "step": 585 }, { "epoch": 0.23825980890424883, "grad_norm": 0.09640034288167953, "learning_rate": 0.00017638917158558926, "loss": 1.0501, "step": 586 }, { "epoch": 0.2386663956088636, "grad_norm": 0.09564584493637085, "learning_rate": 0.00017634846326073682, "loss": 0.955, "step": 587 }, { "epoch": 0.23907298231347834, "grad_norm": 0.10815359652042389, "learning_rate": 0.0001763077549358844, "loss": 1.203, "step": 588 }, { "epoch": 0.2394795690180931, "grad_norm": 0.09078256040811539, "learning_rate": 0.00017626704661103198, "loss": 0.9881, "step": 589 }, { "epoch": 0.23988615572270786, "grad_norm": 0.09075487405061722, "learning_rate": 0.00017622633828617954, "loss": 0.984, "step": 590 }, { "epoch": 0.24029274242732263, "grad_norm": 0.09048381447792053, "learning_rate": 0.00017618562996132712, "loss": 1.0235, "step": 591 }, { "epoch": 0.2406993291319374, "grad_norm": 0.09820905327796936, "learning_rate": 0.00017614492163647467, "loss": 0.9763, "step": 592 }, { "epoch": 0.24110591583655214, "grad_norm": 0.0961097925901413, "learning_rate": 0.00017610421331162222, "loss": 1.1035, "step": 593 }, { "epoch": 0.2415125025411669, "grad_norm": 0.0877358540892601, "learning_rate": 0.0001760635049867698, "loss": 0.8962, "step": 594 }, { "epoch": 0.24191908924578168, "grad_norm": 0.09730017930269241, "learning_rate": 0.00017602279666191736, "loss": 1.1232, "step": 595 }, { "epoch": 0.24232567595039642, "grad_norm": 0.09486240148544312, "learning_rate": 0.00017598208833706494, "loss": 1.0566, "step": 596 }, { "epoch": 0.2427322626550112, "grad_norm": 0.09367606788873672, "learning_rate": 0.0001759413800122125, "loss": 0.9934, "step": 597 }, { "epoch": 0.24313884935962593, "grad_norm": 0.09046703577041626, "learning_rate": 0.00017590067168736008, "loss": 0.9137, "step": 598 }, { "epoch": 0.2435454360642407, "grad_norm": 0.09512536972761154, "learning_rate": 0.00017585996336250766, "loss": 0.9733, "step": 599 }, { "epoch": 0.24395202276885547, "grad_norm": 0.08619649708271027, "learning_rate": 0.0001758192550376552, "loss": 0.8777, "step": 600 }, { "epoch": 0.2443586094734702, "grad_norm": 0.09386080503463745, "learning_rate": 0.0001757785467128028, "loss": 1.0171, "step": 601 }, { "epoch": 0.24476519617808498, "grad_norm": 0.098211869597435, "learning_rate": 0.00017573783838795035, "loss": 1.0283, "step": 602 }, { "epoch": 0.24517178288269972, "grad_norm": 0.08785215020179749, "learning_rate": 0.00017569713006309793, "loss": 0.9256, "step": 603 }, { "epoch": 0.2455783695873145, "grad_norm": 0.09419196099042892, "learning_rate": 0.00017565642173824548, "loss": 1.0727, "step": 604 }, { "epoch": 0.24598495629192926, "grad_norm": 0.08359744399785995, "learning_rate": 0.00017561571341339304, "loss": 0.8128, "step": 605 }, { "epoch": 0.246391542996544, "grad_norm": 0.09521903842687607, "learning_rate": 0.00017557500508854062, "loss": 1.0039, "step": 606 }, { "epoch": 0.24679812970115877, "grad_norm": 0.09249220043420792, "learning_rate": 0.00017553429676368817, "loss": 0.9793, "step": 607 }, { "epoch": 0.24720471640577354, "grad_norm": 0.09042320400476456, "learning_rate": 0.00017549358843883575, "loss": 0.9552, "step": 608 }, { "epoch": 0.2476113031103883, "grad_norm": 0.09921760857105255, "learning_rate": 0.0001754528801139833, "loss": 1.0438, "step": 609 }, { "epoch": 0.24801788981500306, "grad_norm": 0.09381897002458572, "learning_rate": 0.0001754121717891309, "loss": 0.9345, "step": 610 }, { "epoch": 0.2484244765196178, "grad_norm": 0.10531708598136902, "learning_rate": 0.00017537146346427847, "loss": 1.0504, "step": 611 }, { "epoch": 0.24883106322423257, "grad_norm": 0.08689824491739273, "learning_rate": 0.00017533075513942602, "loss": 0.9314, "step": 612 }, { "epoch": 0.24923764992884734, "grad_norm": 0.0852714404463768, "learning_rate": 0.0001752900468145736, "loss": 0.9555, "step": 613 }, { "epoch": 0.24964423663346208, "grad_norm": 0.09126532077789307, "learning_rate": 0.00017524933848972116, "loss": 1.0025, "step": 614 }, { "epoch": 0.2500508233380768, "grad_norm": 0.09271440654993057, "learning_rate": 0.0001752086301648687, "loss": 0.9753, "step": 615 }, { "epoch": 0.2504574100426916, "grad_norm": 0.09532623738050461, "learning_rate": 0.0001751679218400163, "loss": 1.0158, "step": 616 }, { "epoch": 0.25086399674730636, "grad_norm": 0.09074733406305313, "learning_rate": 0.00017512721351516385, "loss": 0.9335, "step": 617 }, { "epoch": 0.2512705834519211, "grad_norm": 0.08851765096187592, "learning_rate": 0.00017508650519031143, "loss": 0.9356, "step": 618 }, { "epoch": 0.2516771701565359, "grad_norm": 0.09362735599279404, "learning_rate": 0.00017504579686545898, "loss": 0.9671, "step": 619 }, { "epoch": 0.25208375686115064, "grad_norm": 0.09651681780815125, "learning_rate": 0.00017500508854060656, "loss": 1.0424, "step": 620 }, { "epoch": 0.2524903435657654, "grad_norm": 0.0909847766160965, "learning_rate": 0.00017496438021575412, "loss": 0.9606, "step": 621 }, { "epoch": 0.2528969302703802, "grad_norm": 0.09061215072870255, "learning_rate": 0.0001749236718909017, "loss": 1.0444, "step": 622 }, { "epoch": 0.2533035169749949, "grad_norm": 0.10378221422433853, "learning_rate": 0.00017488296356604928, "loss": 0.991, "step": 623 }, { "epoch": 0.25371010367960967, "grad_norm": 0.09829584509134293, "learning_rate": 0.00017484225524119683, "loss": 0.8911, "step": 624 }, { "epoch": 0.2541166903842244, "grad_norm": 0.0915360227227211, "learning_rate": 0.00017480154691634442, "loss": 0.9616, "step": 625 }, { "epoch": 0.2545232770888392, "grad_norm": 0.09093396365642548, "learning_rate": 0.00017476083859149197, "loss": 0.886, "step": 626 }, { "epoch": 0.25492986379345395, "grad_norm": 0.09621252864599228, "learning_rate": 0.00017472013026663952, "loss": 1.0167, "step": 627 }, { "epoch": 0.2553364504980687, "grad_norm": 0.10381032526493073, "learning_rate": 0.0001746794219417871, "loss": 1.0008, "step": 628 }, { "epoch": 0.2557430372026835, "grad_norm": 0.10300707817077637, "learning_rate": 0.00017463871361693466, "loss": 1.0176, "step": 629 }, { "epoch": 0.25614962390729823, "grad_norm": 0.09527314454317093, "learning_rate": 0.00017459800529208224, "loss": 0.9698, "step": 630 }, { "epoch": 0.25655621061191297, "grad_norm": 0.09126346558332443, "learning_rate": 0.0001745572969672298, "loss": 0.9575, "step": 631 }, { "epoch": 0.25696279731652777, "grad_norm": 0.10338298231363297, "learning_rate": 0.00017451658864237738, "loss": 0.9666, "step": 632 }, { "epoch": 0.2573693840211425, "grad_norm": 0.10729333758354187, "learning_rate": 0.00017447588031752493, "loss": 1.0857, "step": 633 }, { "epoch": 0.25777597072575725, "grad_norm": 0.09935057163238525, "learning_rate": 0.0001744351719926725, "loss": 1.0387, "step": 634 }, { "epoch": 0.25818255743037205, "grad_norm": 0.10013054311275482, "learning_rate": 0.0001743944636678201, "loss": 1.0938, "step": 635 }, { "epoch": 0.2585891441349868, "grad_norm": 0.1010405421257019, "learning_rate": 0.00017435375534296765, "loss": 0.9507, "step": 636 }, { "epoch": 0.25899573083960153, "grad_norm": 0.09375156462192535, "learning_rate": 0.00017431304701811523, "loss": 0.965, "step": 637 }, { "epoch": 0.2594023175442163, "grad_norm": 0.09231811761856079, "learning_rate": 0.00017427233869326278, "loss": 1.0549, "step": 638 }, { "epoch": 0.2598089042488311, "grad_norm": 0.09252883493900299, "learning_rate": 0.00017423163036841034, "loss": 0.9562, "step": 639 }, { "epoch": 0.2602154909534458, "grad_norm": 0.08758233487606049, "learning_rate": 0.00017419092204355792, "loss": 0.8267, "step": 640 }, { "epoch": 0.26062207765806056, "grad_norm": 0.11551918089389801, "learning_rate": 0.00017415021371870547, "loss": 1.1932, "step": 641 }, { "epoch": 0.26102866436267536, "grad_norm": 0.09874440729618073, "learning_rate": 0.00017410950539385305, "loss": 0.994, "step": 642 }, { "epoch": 0.2614352510672901, "grad_norm": 0.0956895500421524, "learning_rate": 0.0001740687970690006, "loss": 1.0509, "step": 643 }, { "epoch": 0.26184183777190484, "grad_norm": 0.09503257274627686, "learning_rate": 0.0001740280887441482, "loss": 1.0729, "step": 644 }, { "epoch": 0.26224842447651964, "grad_norm": 0.09010849893093109, "learning_rate": 0.00017398738041929577, "loss": 1.0029, "step": 645 }, { "epoch": 0.2626550111811344, "grad_norm": 0.1030508279800415, "learning_rate": 0.00017394667209444332, "loss": 1.0345, "step": 646 }, { "epoch": 0.2630615978857491, "grad_norm": 0.09376908838748932, "learning_rate": 0.0001739059637695909, "loss": 0.9222, "step": 647 }, { "epoch": 0.2634681845903639, "grad_norm": 0.09775326400995255, "learning_rate": 0.00017386525544473846, "loss": 1.0516, "step": 648 }, { "epoch": 0.26387477129497866, "grad_norm": 0.08575090020895004, "learning_rate": 0.00017382454711988604, "loss": 0.9617, "step": 649 }, { "epoch": 0.2642813579995934, "grad_norm": 0.10131283849477768, "learning_rate": 0.0001737838387950336, "loss": 0.9831, "step": 650 }, { "epoch": 0.2646879447042082, "grad_norm": 0.10559401661157608, "learning_rate": 0.00017374313047018115, "loss": 1.1504, "step": 651 }, { "epoch": 0.26509453140882294, "grad_norm": 0.10393818467855453, "learning_rate": 0.00017370242214532873, "loss": 0.9879, "step": 652 }, { "epoch": 0.2655011181134377, "grad_norm": 0.0909782201051712, "learning_rate": 0.00017366171382047628, "loss": 0.9822, "step": 653 }, { "epoch": 0.2659077048180524, "grad_norm": 0.07904715836048126, "learning_rate": 0.00017362100549562386, "loss": 0.9603, "step": 654 }, { "epoch": 0.2663142915226672, "grad_norm": 0.1078203096985817, "learning_rate": 0.00017358029717077142, "loss": 0.9873, "step": 655 }, { "epoch": 0.26672087822728197, "grad_norm": 0.09209142625331879, "learning_rate": 0.000173539588845919, "loss": 0.9763, "step": 656 }, { "epoch": 0.2671274649318967, "grad_norm": 0.10040608793497086, "learning_rate": 0.00017349888052106658, "loss": 1.0362, "step": 657 }, { "epoch": 0.2675340516365115, "grad_norm": 0.1019914448261261, "learning_rate": 0.00017345817219621413, "loss": 0.9715, "step": 658 }, { "epoch": 0.26794063834112625, "grad_norm": 0.09650052338838577, "learning_rate": 0.00017341746387136172, "loss": 0.9212, "step": 659 }, { "epoch": 0.268347225045741, "grad_norm": 0.10352949053049088, "learning_rate": 0.00017337675554650927, "loss": 0.9535, "step": 660 }, { "epoch": 0.2687538117503558, "grad_norm": 0.0910978689789772, "learning_rate": 0.00017333604722165685, "loss": 0.92, "step": 661 }, { "epoch": 0.26916039845497053, "grad_norm": 0.0917704775929451, "learning_rate": 0.0001732953388968044, "loss": 0.9926, "step": 662 }, { "epoch": 0.26956698515958527, "grad_norm": 0.09258386492729187, "learning_rate": 0.00017325463057195196, "loss": 1.0495, "step": 663 }, { "epoch": 0.26997357186420007, "grad_norm": 0.08842117339372635, "learning_rate": 0.00017321392224709954, "loss": 0.8509, "step": 664 }, { "epoch": 0.2703801585688148, "grad_norm": 0.10442432016134262, "learning_rate": 0.0001731732139222471, "loss": 1.0109, "step": 665 }, { "epoch": 0.27078674527342955, "grad_norm": 0.0932939201593399, "learning_rate": 0.00017313250559739468, "loss": 0.979, "step": 666 }, { "epoch": 0.2711933319780443, "grad_norm": 0.09750665724277496, "learning_rate": 0.00017309179727254223, "loss": 1.0937, "step": 667 }, { "epoch": 0.2715999186826591, "grad_norm": 0.0916363000869751, "learning_rate": 0.0001730510889476898, "loss": 0.9497, "step": 668 }, { "epoch": 0.27200650538727383, "grad_norm": 0.08987271040678024, "learning_rate": 0.0001730103806228374, "loss": 0.9528, "step": 669 }, { "epoch": 0.2724130920918886, "grad_norm": 0.10026190429925919, "learning_rate": 0.00017296967229798495, "loss": 0.9323, "step": 670 }, { "epoch": 0.2728196787965034, "grad_norm": 0.09267815947532654, "learning_rate": 0.00017292896397313253, "loss": 0.9469, "step": 671 }, { "epoch": 0.2732262655011181, "grad_norm": 0.0934181734919548, "learning_rate": 0.00017288825564828008, "loss": 0.9217, "step": 672 }, { "epoch": 0.27363285220573286, "grad_norm": 0.09324284642934799, "learning_rate": 0.00017284754732342766, "loss": 0.941, "step": 673 }, { "epoch": 0.27403943891034765, "grad_norm": 0.09280567616224289, "learning_rate": 0.00017280683899857522, "loss": 0.9792, "step": 674 }, { "epoch": 0.2744460256149624, "grad_norm": 0.09610418230295181, "learning_rate": 0.00017276613067372277, "loss": 1.0206, "step": 675 }, { "epoch": 0.27485261231957714, "grad_norm": 0.09881840646266937, "learning_rate": 0.00017272542234887035, "loss": 1.027, "step": 676 }, { "epoch": 0.27525919902419194, "grad_norm": 0.08691172301769257, "learning_rate": 0.0001726847140240179, "loss": 0.9146, "step": 677 }, { "epoch": 0.2756657857288067, "grad_norm": 0.09320187568664551, "learning_rate": 0.0001726440056991655, "loss": 0.9386, "step": 678 }, { "epoch": 0.2760723724334214, "grad_norm": 0.10148660838603973, "learning_rate": 0.00017260329737431304, "loss": 1.0792, "step": 679 }, { "epoch": 0.27647895913803616, "grad_norm": 0.09676487743854523, "learning_rate": 0.00017256258904946062, "loss": 0.9966, "step": 680 }, { "epoch": 0.27688554584265096, "grad_norm": 0.10451057553291321, "learning_rate": 0.0001725218807246082, "loss": 1.0894, "step": 681 }, { "epoch": 0.2772921325472657, "grad_norm": 0.0993044376373291, "learning_rate": 0.00017248117239975576, "loss": 0.9681, "step": 682 }, { "epoch": 0.27769871925188044, "grad_norm": 0.10459738969802856, "learning_rate": 0.00017244046407490334, "loss": 1.0805, "step": 683 }, { "epoch": 0.27810530595649524, "grad_norm": 0.09404733031988144, "learning_rate": 0.0001723997557500509, "loss": 1.019, "step": 684 }, { "epoch": 0.27851189266111, "grad_norm": 0.09058643132448196, "learning_rate": 0.00017235904742519847, "loss": 1.0262, "step": 685 }, { "epoch": 0.2789184793657247, "grad_norm": 0.08885390311479568, "learning_rate": 0.00017231833910034603, "loss": 0.9735, "step": 686 }, { "epoch": 0.2793250660703395, "grad_norm": 0.09429844468832016, "learning_rate": 0.00017227763077549358, "loss": 0.9265, "step": 687 }, { "epoch": 0.27973165277495426, "grad_norm": 0.09403959661722183, "learning_rate": 0.00017223692245064116, "loss": 1.105, "step": 688 }, { "epoch": 0.280138239479569, "grad_norm": 0.0966695174574852, "learning_rate": 0.00017219621412578872, "loss": 1.1486, "step": 689 }, { "epoch": 0.2805448261841838, "grad_norm": 0.08807272464036942, "learning_rate": 0.0001721555058009363, "loss": 0.7858, "step": 690 }, { "epoch": 0.28095141288879855, "grad_norm": 0.09138672798871994, "learning_rate": 0.00017211479747608388, "loss": 1.0746, "step": 691 }, { "epoch": 0.2813579995934133, "grad_norm": 0.1015787124633789, "learning_rate": 0.00017207408915123143, "loss": 1.0996, "step": 692 }, { "epoch": 0.28176458629802803, "grad_norm": 0.1018923744559288, "learning_rate": 0.00017203338082637901, "loss": 0.9986, "step": 693 }, { "epoch": 0.2821711730026428, "grad_norm": 0.10809756815433502, "learning_rate": 0.00017199267250152657, "loss": 1.0796, "step": 694 }, { "epoch": 0.28257775970725757, "grad_norm": 0.08786237984895706, "learning_rate": 0.00017195196417667415, "loss": 0.9383, "step": 695 }, { "epoch": 0.2829843464118723, "grad_norm": 0.12186282873153687, "learning_rate": 0.0001719112558518217, "loss": 0.9624, "step": 696 }, { "epoch": 0.2833909331164871, "grad_norm": 0.09969845414161682, "learning_rate": 0.00017187054752696929, "loss": 0.9895, "step": 697 }, { "epoch": 0.28379751982110185, "grad_norm": 0.09089677035808563, "learning_rate": 0.00017182983920211684, "loss": 1.0026, "step": 698 }, { "epoch": 0.2842041065257166, "grad_norm": 0.10096369683742523, "learning_rate": 0.0001717891308772644, "loss": 1.0723, "step": 699 }, { "epoch": 0.2846106932303314, "grad_norm": 0.09782766550779343, "learning_rate": 0.00017174842255241197, "loss": 1.0235, "step": 700 }, { "epoch": 0.28501727993494613, "grad_norm": 0.10204874724149704, "learning_rate": 0.00017170771422755953, "loss": 1.0794, "step": 701 }, { "epoch": 0.2854238666395609, "grad_norm": 0.08970806747674942, "learning_rate": 0.0001716670059027071, "loss": 1.0124, "step": 702 }, { "epoch": 0.2858304533441757, "grad_norm": 0.10221361368894577, "learning_rate": 0.0001716262975778547, "loss": 1.0053, "step": 703 }, { "epoch": 0.2862370400487904, "grad_norm": 0.09354016184806824, "learning_rate": 0.00017158558925300225, "loss": 1.0673, "step": 704 }, { "epoch": 0.28664362675340516, "grad_norm": 0.10699865967035294, "learning_rate": 0.00017154488092814983, "loss": 1.1191, "step": 705 }, { "epoch": 0.2870502134580199, "grad_norm": 0.10488536953926086, "learning_rate": 0.00017150417260329738, "loss": 1.0351, "step": 706 }, { "epoch": 0.2874568001626347, "grad_norm": 0.09427926689386368, "learning_rate": 0.00017146346427844496, "loss": 0.9539, "step": 707 }, { "epoch": 0.28786338686724944, "grad_norm": 0.1017470583319664, "learning_rate": 0.00017142275595359252, "loss": 1.0699, "step": 708 }, { "epoch": 0.2882699735718642, "grad_norm": 0.09857328236103058, "learning_rate": 0.00017138204762874007, "loss": 1.1037, "step": 709 }, { "epoch": 0.288676560276479, "grad_norm": 0.08667341619729996, "learning_rate": 0.00017134133930388765, "loss": 0.9774, "step": 710 }, { "epoch": 0.2890831469810937, "grad_norm": 0.09951958060264587, "learning_rate": 0.0001713006309790352, "loss": 0.9343, "step": 711 }, { "epoch": 0.28948973368570846, "grad_norm": 0.11067653447389603, "learning_rate": 0.0001712599226541828, "loss": 1.004, "step": 712 }, { "epoch": 0.28989632039032326, "grad_norm": 0.09549311548471451, "learning_rate": 0.00017121921432933034, "loss": 0.9841, "step": 713 }, { "epoch": 0.290302907094938, "grad_norm": 0.08448553830385208, "learning_rate": 0.00017117850600447792, "loss": 0.8772, "step": 714 }, { "epoch": 0.29070949379955274, "grad_norm": 0.10418415814638138, "learning_rate": 0.0001711377976796255, "loss": 1.0146, "step": 715 }, { "epoch": 0.29111608050416754, "grad_norm": 0.0994696393609047, "learning_rate": 0.00017109708935477306, "loss": 1.0321, "step": 716 }, { "epoch": 0.2915226672087823, "grad_norm": 0.10770169645547867, "learning_rate": 0.00017105638102992064, "loss": 0.9757, "step": 717 }, { "epoch": 0.291929253913397, "grad_norm": 0.09281262755393982, "learning_rate": 0.0001710156727050682, "loss": 0.9315, "step": 718 }, { "epoch": 0.29233584061801177, "grad_norm": 0.09385684132575989, "learning_rate": 0.00017097496438021577, "loss": 0.984, "step": 719 }, { "epoch": 0.29274242732262656, "grad_norm": 0.09654410928487778, "learning_rate": 0.00017093425605536333, "loss": 1.0594, "step": 720 }, { "epoch": 0.2931490140272413, "grad_norm": 0.09549154341220856, "learning_rate": 0.00017089354773051088, "loss": 0.8973, "step": 721 }, { "epoch": 0.29355560073185605, "grad_norm": 0.10645559430122375, "learning_rate": 0.00017085283940565846, "loss": 1.0742, "step": 722 }, { "epoch": 0.29396218743647085, "grad_norm": 0.10242202132940292, "learning_rate": 0.00017081213108080602, "loss": 1.0859, "step": 723 }, { "epoch": 0.2943687741410856, "grad_norm": 0.09050785005092621, "learning_rate": 0.0001707714227559536, "loss": 0.9552, "step": 724 }, { "epoch": 0.29477536084570033, "grad_norm": 0.09605402499437332, "learning_rate": 0.00017073071443110115, "loss": 1.0419, "step": 725 }, { "epoch": 0.2951819475503151, "grad_norm": 0.10179407894611359, "learning_rate": 0.00017069000610624873, "loss": 1.018, "step": 726 }, { "epoch": 0.29558853425492987, "grad_norm": 0.10907282680273056, "learning_rate": 0.00017064929778139631, "loss": 0.9834, "step": 727 }, { "epoch": 0.2959951209595446, "grad_norm": 0.09972859919071198, "learning_rate": 0.00017060858945654387, "loss": 0.9138, "step": 728 }, { "epoch": 0.2964017076641594, "grad_norm": 0.10035282373428345, "learning_rate": 0.00017056788113169145, "loss": 1.0388, "step": 729 }, { "epoch": 0.29680829436877415, "grad_norm": 0.09865434467792511, "learning_rate": 0.000170527172806839, "loss": 1.088, "step": 730 }, { "epoch": 0.2972148810733889, "grad_norm": 0.09653953462839127, "learning_rate": 0.00017048646448198659, "loss": 1.0562, "step": 731 }, { "epoch": 0.29762146777800363, "grad_norm": 0.09930591285228729, "learning_rate": 0.00017044575615713414, "loss": 1.0312, "step": 732 }, { "epoch": 0.29802805448261843, "grad_norm": 0.10804631561040878, "learning_rate": 0.0001704050478322817, "loss": 0.9913, "step": 733 }, { "epoch": 0.2984346411872332, "grad_norm": 0.09772603958845139, "learning_rate": 0.00017036433950742927, "loss": 1.0463, "step": 734 }, { "epoch": 0.2988412278918479, "grad_norm": 0.09799183160066605, "learning_rate": 0.00017032363118257683, "loss": 1.0048, "step": 735 }, { "epoch": 0.2992478145964627, "grad_norm": 0.09203165769577026, "learning_rate": 0.0001702829228577244, "loss": 0.9581, "step": 736 }, { "epoch": 0.29965440130107746, "grad_norm": 0.1009124219417572, "learning_rate": 0.000170242214532872, "loss": 1.0613, "step": 737 }, { "epoch": 0.3000609880056922, "grad_norm": 0.10120426118373871, "learning_rate": 0.00017020150620801954, "loss": 0.9948, "step": 738 }, { "epoch": 0.300467574710307, "grad_norm": 0.09432508796453476, "learning_rate": 0.00017016079788316713, "loss": 1.0334, "step": 739 }, { "epoch": 0.30087416141492174, "grad_norm": 0.09501038491725922, "learning_rate": 0.00017012008955831468, "loss": 1.0526, "step": 740 }, { "epoch": 0.3012807481195365, "grad_norm": 0.08357376605272293, "learning_rate": 0.00017007938123346226, "loss": 0.9447, "step": 741 }, { "epoch": 0.3016873348241513, "grad_norm": 0.10098525881767273, "learning_rate": 0.00017003867290860982, "loss": 1.0812, "step": 742 }, { "epoch": 0.302093921528766, "grad_norm": 0.09913761168718338, "learning_rate": 0.0001699979645837574, "loss": 0.9696, "step": 743 }, { "epoch": 0.30250050823338076, "grad_norm": 0.10989236831665039, "learning_rate": 0.00016995725625890495, "loss": 1.1352, "step": 744 }, { "epoch": 0.3029070949379955, "grad_norm": 0.09131161123514175, "learning_rate": 0.0001699165479340525, "loss": 0.9211, "step": 745 }, { "epoch": 0.3033136816426103, "grad_norm": 0.09235724061727524, "learning_rate": 0.00016987583960920009, "loss": 0.9645, "step": 746 }, { "epoch": 0.30372026834722504, "grad_norm": 0.09928274899721146, "learning_rate": 0.00016983513128434764, "loss": 1.0359, "step": 747 }, { "epoch": 0.3041268550518398, "grad_norm": 0.094466932117939, "learning_rate": 0.00016979442295949522, "loss": 0.9406, "step": 748 }, { "epoch": 0.3045334417564546, "grad_norm": 0.0981815829873085, "learning_rate": 0.0001697537146346428, "loss": 1.0819, "step": 749 }, { "epoch": 0.3049400284610693, "grad_norm": 0.10226401686668396, "learning_rate": 0.00016971300630979036, "loss": 1.0499, "step": 750 }, { "epoch": 0.30534661516568407, "grad_norm": 0.09603258967399597, "learning_rate": 0.00016967229798493794, "loss": 1.046, "step": 751 }, { "epoch": 0.30575320187029886, "grad_norm": 0.08673054724931717, "learning_rate": 0.0001696315896600855, "loss": 0.9594, "step": 752 }, { "epoch": 0.3061597885749136, "grad_norm": 0.08766573667526245, "learning_rate": 0.00016959088133523307, "loss": 0.9368, "step": 753 }, { "epoch": 0.30656637527952835, "grad_norm": 0.09147453308105469, "learning_rate": 0.00016955017301038063, "loss": 1.0122, "step": 754 }, { "epoch": 0.30697296198414314, "grad_norm": 0.09681879729032516, "learning_rate": 0.0001695094646855282, "loss": 1.0743, "step": 755 }, { "epoch": 0.3073795486887579, "grad_norm": 0.09195173531770706, "learning_rate": 0.00016946875636067576, "loss": 0.9974, "step": 756 }, { "epoch": 0.30778613539337263, "grad_norm": 0.08841879665851593, "learning_rate": 0.00016942804803582332, "loss": 0.9233, "step": 757 }, { "epoch": 0.30819272209798737, "grad_norm": 0.09924349188804626, "learning_rate": 0.0001693873397109709, "loss": 1.0277, "step": 758 }, { "epoch": 0.30859930880260217, "grad_norm": 0.09324993193149567, "learning_rate": 0.00016934663138611845, "loss": 0.9836, "step": 759 }, { "epoch": 0.3090058955072169, "grad_norm": 0.0990774929523468, "learning_rate": 0.00016930592306126606, "loss": 1.1064, "step": 760 }, { "epoch": 0.30941248221183165, "grad_norm": 0.09344697743654251, "learning_rate": 0.00016926521473641361, "loss": 0.9465, "step": 761 }, { "epoch": 0.30981906891644645, "grad_norm": 0.10055997967720032, "learning_rate": 0.00016922450641156117, "loss": 1.0706, "step": 762 }, { "epoch": 0.3102256556210612, "grad_norm": 0.09746789187192917, "learning_rate": 0.00016918379808670875, "loss": 1.0355, "step": 763 }, { "epoch": 0.31063224232567593, "grad_norm": 0.08599979430437088, "learning_rate": 0.0001691430897618563, "loss": 0.9445, "step": 764 }, { "epoch": 0.31103882903029073, "grad_norm": 0.09424544125795364, "learning_rate": 0.00016910238143700388, "loss": 1.0393, "step": 765 }, { "epoch": 0.3114454157349055, "grad_norm": 0.09379003942012787, "learning_rate": 0.00016906167311215144, "loss": 0.9328, "step": 766 }, { "epoch": 0.3118520024395202, "grad_norm": 0.09361857920885086, "learning_rate": 0.00016902096478729902, "loss": 1.017, "step": 767 }, { "epoch": 0.312258589144135, "grad_norm": 0.09718876332044601, "learning_rate": 0.00016898025646244657, "loss": 1.057, "step": 768 }, { "epoch": 0.31266517584874975, "grad_norm": 0.10596045851707458, "learning_rate": 0.00016893954813759413, "loss": 1.1706, "step": 769 }, { "epoch": 0.3130717625533645, "grad_norm": 0.09151418507099152, "learning_rate": 0.0001688988398127417, "loss": 1.0758, "step": 770 }, { "epoch": 0.31347834925797924, "grad_norm": 0.11438169330358505, "learning_rate": 0.00016885813148788926, "loss": 0.9558, "step": 771 }, { "epoch": 0.31388493596259404, "grad_norm": 0.10611554235219955, "learning_rate": 0.00016881742316303687, "loss": 1.1583, "step": 772 }, { "epoch": 0.3142915226672088, "grad_norm": 0.10111712664365768, "learning_rate": 0.00016877671483818443, "loss": 1.0377, "step": 773 }, { "epoch": 0.3146981093718235, "grad_norm": 0.08957924693822861, "learning_rate": 0.00016873600651333198, "loss": 0.9819, "step": 774 }, { "epoch": 0.3151046960764383, "grad_norm": 0.09603768587112427, "learning_rate": 0.00016869529818847956, "loss": 0.9849, "step": 775 }, { "epoch": 0.31551128278105306, "grad_norm": 0.10142724215984344, "learning_rate": 0.00016865458986362712, "loss": 1.0325, "step": 776 }, { "epoch": 0.3159178694856678, "grad_norm": 0.09229385852813721, "learning_rate": 0.0001686138815387747, "loss": 1.0085, "step": 777 }, { "epoch": 0.3163244561902826, "grad_norm": 0.09404195845127106, "learning_rate": 0.00016857317321392225, "loss": 0.9239, "step": 778 }, { "epoch": 0.31673104289489734, "grad_norm": 0.0984378457069397, "learning_rate": 0.00016853246488906983, "loss": 0.9911, "step": 779 }, { "epoch": 0.3171376295995121, "grad_norm": 0.09309301525354385, "learning_rate": 0.00016849175656421739, "loss": 0.9738, "step": 780 }, { "epoch": 0.3175442163041269, "grad_norm": 0.09041745960712433, "learning_rate": 0.00016845104823936494, "loss": 0.9106, "step": 781 }, { "epoch": 0.3179508030087416, "grad_norm": 0.09435202926397324, "learning_rate": 0.00016841033991451252, "loss": 0.9261, "step": 782 }, { "epoch": 0.31835738971335636, "grad_norm": 0.10118155926465988, "learning_rate": 0.0001683696315896601, "loss": 0.9127, "step": 783 }, { "epoch": 0.3187639764179711, "grad_norm": 0.09075888991355896, "learning_rate": 0.00016832892326480768, "loss": 0.9201, "step": 784 }, { "epoch": 0.3191705631225859, "grad_norm": 0.11029943823814392, "learning_rate": 0.00016828821493995524, "loss": 1.1284, "step": 785 }, { "epoch": 0.31957714982720065, "grad_norm": 0.08948411792516708, "learning_rate": 0.0001682475066151028, "loss": 1.0084, "step": 786 }, { "epoch": 0.3199837365318154, "grad_norm": 0.09159238636493683, "learning_rate": 0.00016820679829025037, "loss": 0.9912, "step": 787 }, { "epoch": 0.3203903232364302, "grad_norm": 0.10452720522880554, "learning_rate": 0.00016816608996539793, "loss": 1.0907, "step": 788 }, { "epoch": 0.3207969099410449, "grad_norm": 0.10225928574800491, "learning_rate": 0.0001681253816405455, "loss": 1.06, "step": 789 }, { "epoch": 0.32120349664565967, "grad_norm": 0.09055547416210175, "learning_rate": 0.00016808467331569306, "loss": 0.9644, "step": 790 }, { "epoch": 0.32161008335027447, "grad_norm": 0.10098310559988022, "learning_rate": 0.00016804396499084064, "loss": 1.2431, "step": 791 }, { "epoch": 0.3220166700548892, "grad_norm": 0.09253955632448196, "learning_rate": 0.0001680032566659882, "loss": 0.8664, "step": 792 }, { "epoch": 0.32242325675950395, "grad_norm": 0.09952203184366226, "learning_rate": 0.00016796254834113575, "loss": 1.0275, "step": 793 }, { "epoch": 0.32282984346411875, "grad_norm": 1376.9244384765625, "learning_rate": 0.00016792184001628333, "loss": 0.971, "step": 794 }, { "epoch": 0.3232364301687335, "grad_norm": 0.0970926433801651, "learning_rate": 0.00016788113169143091, "loss": 1.0805, "step": 795 }, { "epoch": 0.32364301687334823, "grad_norm": 0.0982482060790062, "learning_rate": 0.00016784042336657847, "loss": 1.0573, "step": 796 }, { "epoch": 0.324049603577963, "grad_norm": 0.0871889740228653, "learning_rate": 0.00016779971504172605, "loss": 0.9305, "step": 797 }, { "epoch": 0.32445619028257777, "grad_norm": 0.10493195801973343, "learning_rate": 0.0001677590067168736, "loss": 1.052, "step": 798 }, { "epoch": 0.3248627769871925, "grad_norm": 0.10067540407180786, "learning_rate": 0.00016771829839202118, "loss": 1.0014, "step": 799 }, { "epoch": 0.32526936369180726, "grad_norm": 0.12116575986146927, "learning_rate": 0.00016767759006716874, "loss": 1.0831, "step": 800 }, { "epoch": 0.32567595039642205, "grad_norm": 0.10948335379362106, "learning_rate": 0.00016763688174231632, "loss": 0.9823, "step": 801 }, { "epoch": 0.3260825371010368, "grad_norm": 0.12705056369304657, "learning_rate": 0.00016759617341746387, "loss": 1.0244, "step": 802 }, { "epoch": 0.32648912380565154, "grad_norm": 0.10819011181592941, "learning_rate": 0.00016755546509261143, "loss": 1.057, "step": 803 }, { "epoch": 0.32689571051026634, "grad_norm": 0.0998898446559906, "learning_rate": 0.000167514756767759, "loss": 1.0062, "step": 804 }, { "epoch": 0.3273022972148811, "grad_norm": 0.10293715447187424, "learning_rate": 0.00016747404844290656, "loss": 0.9149, "step": 805 }, { "epoch": 0.3277088839194958, "grad_norm": 0.10077858716249466, "learning_rate": 0.00016743334011805417, "loss": 0.9758, "step": 806 }, { "epoch": 0.3281154706241106, "grad_norm": 0.10782469809055328, "learning_rate": 0.00016739263179320173, "loss": 1.1042, "step": 807 }, { "epoch": 0.32852205732872536, "grad_norm": 0.12658415734767914, "learning_rate": 0.00016735192346834928, "loss": 0.9899, "step": 808 }, { "epoch": 0.3289286440333401, "grad_norm": 0.10533516108989716, "learning_rate": 0.00016731121514349686, "loss": 1.0378, "step": 809 }, { "epoch": 0.32933523073795484, "grad_norm": 0.09154223650693893, "learning_rate": 0.00016727050681864441, "loss": 0.9687, "step": 810 }, { "epoch": 0.32974181744256964, "grad_norm": 0.10092673450708389, "learning_rate": 0.000167229798493792, "loss": 1.1347, "step": 811 }, { "epoch": 0.3301484041471844, "grad_norm": 0.11135310679674149, "learning_rate": 0.00016718909016893955, "loss": 1.1032, "step": 812 }, { "epoch": 0.3305549908517991, "grad_norm": 0.11181894689798355, "learning_rate": 0.00016714838184408713, "loss": 1.0279, "step": 813 }, { "epoch": 0.3309615775564139, "grad_norm": 0.09581311047077179, "learning_rate": 0.00016710767351923469, "loss": 0.9012, "step": 814 }, { "epoch": 0.33136816426102866, "grad_norm": 0.09267252683639526, "learning_rate": 0.00016706696519438224, "loss": 0.872, "step": 815 }, { "epoch": 0.3317747509656434, "grad_norm": 0.10392332822084427, "learning_rate": 0.00016702625686952982, "loss": 1.017, "step": 816 }, { "epoch": 0.3321813376702582, "grad_norm": 0.10805673897266388, "learning_rate": 0.00016698554854467737, "loss": 1.0013, "step": 817 }, { "epoch": 0.33258792437487295, "grad_norm": 0.09724876284599304, "learning_rate": 0.00016694484021982498, "loss": 0.9708, "step": 818 }, { "epoch": 0.3329945110794877, "grad_norm": 0.09445172548294067, "learning_rate": 0.00016690413189497254, "loss": 1.0422, "step": 819 }, { "epoch": 0.3334010977841025, "grad_norm": 0.09621834754943848, "learning_rate": 0.0001668634235701201, "loss": 1.0557, "step": 820 }, { "epoch": 0.3338076844887172, "grad_norm": 0.09800583869218826, "learning_rate": 0.00016682271524526767, "loss": 0.8907, "step": 821 }, { "epoch": 0.33421427119333197, "grad_norm": 0.0968775674700737, "learning_rate": 0.00016678200692041523, "loss": 0.9531, "step": 822 }, { "epoch": 0.3346208578979467, "grad_norm": 0.10581623017787933, "learning_rate": 0.0001667412985955628, "loss": 0.9462, "step": 823 }, { "epoch": 0.3350274446025615, "grad_norm": 0.10356664657592773, "learning_rate": 0.00016670059027071036, "loss": 1.0315, "step": 824 }, { "epoch": 0.33543403130717625, "grad_norm": 0.10202962160110474, "learning_rate": 0.00016665988194585794, "loss": 1.004, "step": 825 }, { "epoch": 0.335840618011791, "grad_norm": 0.09426257014274597, "learning_rate": 0.0001666191736210055, "loss": 0.958, "step": 826 }, { "epoch": 0.3362472047164058, "grad_norm": 0.09597484767436981, "learning_rate": 0.00016657846529615305, "loss": 0.9446, "step": 827 }, { "epoch": 0.33665379142102053, "grad_norm": 0.10052574425935745, "learning_rate": 0.00016653775697130063, "loss": 1.0467, "step": 828 }, { "epoch": 0.3370603781256353, "grad_norm": 0.09183933585882187, "learning_rate": 0.0001664970486464482, "loss": 0.9665, "step": 829 }, { "epoch": 0.33746696483025007, "grad_norm": 0.10629544407129288, "learning_rate": 0.0001664563403215958, "loss": 1.1186, "step": 830 }, { "epoch": 0.3378735515348648, "grad_norm": 0.09273683279752731, "learning_rate": 0.00016641563199674335, "loss": 1.0235, "step": 831 }, { "epoch": 0.33828013823947956, "grad_norm": 0.10386747121810913, "learning_rate": 0.0001663749236718909, "loss": 1.1214, "step": 832 }, { "epoch": 0.33868672494409435, "grad_norm": 0.0918426588177681, "learning_rate": 0.00016633421534703848, "loss": 0.9122, "step": 833 }, { "epoch": 0.3390933116487091, "grad_norm": 0.09304569661617279, "learning_rate": 0.00016629350702218604, "loss": 0.9478, "step": 834 }, { "epoch": 0.33949989835332384, "grad_norm": 0.09941792488098145, "learning_rate": 0.00016625279869733362, "loss": 0.9306, "step": 835 }, { "epoch": 0.3399064850579386, "grad_norm": 0.10528752952814102, "learning_rate": 0.00016621209037248117, "loss": 1.0313, "step": 836 }, { "epoch": 0.3403130717625534, "grad_norm": 0.09300185739994049, "learning_rate": 0.00016617138204762875, "loss": 0.9339, "step": 837 }, { "epoch": 0.3407196584671681, "grad_norm": 0.10146823525428772, "learning_rate": 0.0001661306737227763, "loss": 1.0543, "step": 838 }, { "epoch": 0.34112624517178286, "grad_norm": 0.10440155863761902, "learning_rate": 0.00016608996539792386, "loss": 1.1149, "step": 839 }, { "epoch": 0.34153283187639766, "grad_norm": 0.1079946756362915, "learning_rate": 0.00016604925707307144, "loss": 1.1279, "step": 840 }, { "epoch": 0.3419394185810124, "grad_norm": 0.08682949841022491, "learning_rate": 0.00016600854874821902, "loss": 0.878, "step": 841 }, { "epoch": 0.34234600528562714, "grad_norm": 0.10855992883443832, "learning_rate": 0.0001659678404233666, "loss": 0.9611, "step": 842 }, { "epoch": 0.34275259199024194, "grad_norm": 0.089480921626091, "learning_rate": 0.00016592713209851416, "loss": 0.9065, "step": 843 }, { "epoch": 0.3431591786948567, "grad_norm": 0.0934784933924675, "learning_rate": 0.00016588642377366171, "loss": 1.0118, "step": 844 }, { "epoch": 0.3435657653994714, "grad_norm": 0.10359178483486176, "learning_rate": 0.0001658457154488093, "loss": 1.0117, "step": 845 }, { "epoch": 0.3439723521040862, "grad_norm": 0.09534402191638947, "learning_rate": 0.00016580500712395685, "loss": 0.9719, "step": 846 }, { "epoch": 0.34437893880870096, "grad_norm": 0.1008550152182579, "learning_rate": 0.00016576429879910443, "loss": 1.1044, "step": 847 }, { "epoch": 0.3447855255133157, "grad_norm": 0.09598672389984131, "learning_rate": 0.00016572359047425198, "loss": 0.9764, "step": 848 }, { "epoch": 0.34519211221793045, "grad_norm": 0.09562084078788757, "learning_rate": 0.00016568288214939957, "loss": 0.9964, "step": 849 }, { "epoch": 0.34559869892254524, "grad_norm": 0.0925833135843277, "learning_rate": 0.00016564217382454712, "loss": 0.9717, "step": 850 }, { "epoch": 0.34600528562716, "grad_norm": 0.09141584485769272, "learning_rate": 0.00016560146549969467, "loss": 0.8164, "step": 851 }, { "epoch": 0.34641187233177473, "grad_norm": 0.09384193271398544, "learning_rate": 0.00016556075717484228, "loss": 1.0281, "step": 852 }, { "epoch": 0.3468184590363895, "grad_norm": 0.08962181210517883, "learning_rate": 0.00016552004884998984, "loss": 0.9792, "step": 853 }, { "epoch": 0.34722504574100427, "grad_norm": 0.1070941761136055, "learning_rate": 0.00016547934052513742, "loss": 0.9392, "step": 854 }, { "epoch": 0.347631632445619, "grad_norm": 0.0990995392203331, "learning_rate": 0.00016543863220028497, "loss": 1.015, "step": 855 }, { "epoch": 0.3480382191502338, "grad_norm": 0.10476068407297134, "learning_rate": 0.00016539792387543253, "loss": 0.974, "step": 856 }, { "epoch": 0.34844480585484855, "grad_norm": 0.09355341643095016, "learning_rate": 0.0001653572155505801, "loss": 0.9558, "step": 857 }, { "epoch": 0.3488513925594633, "grad_norm": 0.10174727439880371, "learning_rate": 0.00016531650722572766, "loss": 0.9864, "step": 858 }, { "epoch": 0.3492579792640781, "grad_norm": 0.09851422905921936, "learning_rate": 0.00016527579890087524, "loss": 0.8641, "step": 859 }, { "epoch": 0.34966456596869283, "grad_norm": 0.10708778351545334, "learning_rate": 0.0001652350905760228, "loss": 0.9883, "step": 860 }, { "epoch": 0.3500711526733076, "grad_norm": 0.0919436663389206, "learning_rate": 0.00016519438225117038, "loss": 1.0178, "step": 861 }, { "epoch": 0.3504777393779223, "grad_norm": 0.09071869403123856, "learning_rate": 0.00016515367392631793, "loss": 1.0118, "step": 862 }, { "epoch": 0.3508843260825371, "grad_norm": 0.10379180312156677, "learning_rate": 0.00016511296560146549, "loss": 1.0068, "step": 863 }, { "epoch": 0.35129091278715185, "grad_norm": 0.10693410784006119, "learning_rate": 0.0001650722572766131, "loss": 1.0659, "step": 864 }, { "epoch": 0.3516974994917666, "grad_norm": 0.08886521309614182, "learning_rate": 0.00016503154895176065, "loss": 0.9019, "step": 865 }, { "epoch": 0.3521040861963814, "grad_norm": 0.110934779047966, "learning_rate": 0.00016499084062690823, "loss": 1.0018, "step": 866 }, { "epoch": 0.35251067290099614, "grad_norm": 0.09179084002971649, "learning_rate": 0.00016495013230205578, "loss": 1.0337, "step": 867 }, { "epoch": 0.3529172596056109, "grad_norm": 0.10181482136249542, "learning_rate": 0.00016490942397720334, "loss": 1.0436, "step": 868 }, { "epoch": 0.3533238463102257, "grad_norm": 0.10369636118412018, "learning_rate": 0.00016486871565235092, "loss": 1.0509, "step": 869 }, { "epoch": 0.3537304330148404, "grad_norm": 0.09437630325555801, "learning_rate": 0.00016482800732749847, "loss": 0.9957, "step": 870 }, { "epoch": 0.35413701971945516, "grad_norm": 0.0950985848903656, "learning_rate": 0.00016478729900264605, "loss": 1.037, "step": 871 }, { "epoch": 0.35454360642406996, "grad_norm": 0.09876928478479385, "learning_rate": 0.0001647465906777936, "loss": 0.9807, "step": 872 }, { "epoch": 0.3549501931286847, "grad_norm": 0.1047278568148613, "learning_rate": 0.0001647058823529412, "loss": 1.1218, "step": 873 }, { "epoch": 0.35535677983329944, "grad_norm": 0.10372647643089294, "learning_rate": 0.00016466517402808874, "loss": 0.9792, "step": 874 }, { "epoch": 0.3557633665379142, "grad_norm": 0.09626027941703796, "learning_rate": 0.00016462446570323632, "loss": 1.0559, "step": 875 }, { "epoch": 0.356169953242529, "grad_norm": 0.09671976417303085, "learning_rate": 0.0001645837573783839, "loss": 0.9966, "step": 876 }, { "epoch": 0.3565765399471437, "grad_norm": 0.09746148437261581, "learning_rate": 0.00016454304905353146, "loss": 1.0355, "step": 877 }, { "epoch": 0.35698312665175846, "grad_norm": 0.10173183679580688, "learning_rate": 0.00016450234072867904, "loss": 1.0395, "step": 878 }, { "epoch": 0.35738971335637326, "grad_norm": 0.09872651845216751, "learning_rate": 0.0001644616324038266, "loss": 0.9183, "step": 879 }, { "epoch": 0.357796300060988, "grad_norm": 0.08974689990282059, "learning_rate": 0.00016442092407897415, "loss": 0.9767, "step": 880 }, { "epoch": 0.35820288676560275, "grad_norm": 0.10287570208311081, "learning_rate": 0.00016438021575412173, "loss": 0.9762, "step": 881 }, { "epoch": 0.35860947347021754, "grad_norm": 0.09830573201179504, "learning_rate": 0.00016433950742926928, "loss": 0.9674, "step": 882 }, { "epoch": 0.3590160601748323, "grad_norm": 0.0909447893500328, "learning_rate": 0.00016429879910441687, "loss": 0.9603, "step": 883 }, { "epoch": 0.359422646879447, "grad_norm": 0.09882505238056183, "learning_rate": 0.00016425809077956442, "loss": 0.994, "step": 884 }, { "epoch": 0.3598292335840618, "grad_norm": 0.09665820002555847, "learning_rate": 0.000164217382454712, "loss": 1.0242, "step": 885 }, { "epoch": 0.36023582028867657, "grad_norm": 0.09496365487575531, "learning_rate": 0.00016417667412985955, "loss": 0.9487, "step": 886 }, { "epoch": 0.3606424069932913, "grad_norm": 0.10158530622720718, "learning_rate": 0.00016413596580500714, "loss": 1.0484, "step": 887 }, { "epoch": 0.36104899369790605, "grad_norm": 0.09193973243236542, "learning_rate": 0.00016409525748015472, "loss": 0.9148, "step": 888 }, { "epoch": 0.36145558040252085, "grad_norm": 0.08965738117694855, "learning_rate": 0.00016405454915530227, "loss": 0.8086, "step": 889 }, { "epoch": 0.3618621671071356, "grad_norm": 0.09757012128829956, "learning_rate": 0.00016401384083044983, "loss": 1.0015, "step": 890 }, { "epoch": 0.36226875381175033, "grad_norm": 0.10004543513059616, "learning_rate": 0.0001639731325055974, "loss": 0.9308, "step": 891 }, { "epoch": 0.36267534051636513, "grad_norm": 0.09754510223865509, "learning_rate": 0.00016393242418074496, "loss": 1.1236, "step": 892 }, { "epoch": 0.36308192722097987, "grad_norm": 0.08879724144935608, "learning_rate": 0.00016389171585589254, "loss": 0.9708, "step": 893 }, { "epoch": 0.3634885139255946, "grad_norm": 0.09469077736139297, "learning_rate": 0.0001638510075310401, "loss": 0.9263, "step": 894 }, { "epoch": 0.3638951006302094, "grad_norm": 0.09939096122980118, "learning_rate": 0.00016381029920618768, "loss": 1.0967, "step": 895 }, { "epoch": 0.36430168733482415, "grad_norm": 0.10240636765956879, "learning_rate": 0.00016376959088133523, "loss": 0.992, "step": 896 }, { "epoch": 0.3647082740394389, "grad_norm": 0.09816282987594604, "learning_rate": 0.0001637288825564828, "loss": 1.051, "step": 897 }, { "epoch": 0.3651148607440537, "grad_norm": 0.10622686892747879, "learning_rate": 0.0001636881742316304, "loss": 1.0324, "step": 898 }, { "epoch": 0.36552144744866844, "grad_norm": 0.08964063972234726, "learning_rate": 0.00016364746590677795, "loss": 0.9818, "step": 899 }, { "epoch": 0.3659280341532832, "grad_norm": 0.09420112520456314, "learning_rate": 0.00016360675758192553, "loss": 1.0712, "step": 900 }, { "epoch": 0.3663346208578979, "grad_norm": 0.09154132008552551, "learning_rate": 0.00016356604925707308, "loss": 0.9368, "step": 901 }, { "epoch": 0.3667412075625127, "grad_norm": 0.09309022128582001, "learning_rate": 0.00016352534093222064, "loss": 0.9532, "step": 902 }, { "epoch": 0.36714779426712746, "grad_norm": 0.09177148342132568, "learning_rate": 0.00016348463260736822, "loss": 0.9278, "step": 903 }, { "epoch": 0.3675543809717422, "grad_norm": 0.09655489027500153, "learning_rate": 0.00016344392428251577, "loss": 0.989, "step": 904 }, { "epoch": 0.367960967676357, "grad_norm": 0.09367511421442032, "learning_rate": 0.00016340321595766335, "loss": 0.9545, "step": 905 }, { "epoch": 0.36836755438097174, "grad_norm": 0.09844557195901871, "learning_rate": 0.0001633625076328109, "loss": 0.9813, "step": 906 }, { "epoch": 0.3687741410855865, "grad_norm": 0.09753169119358063, "learning_rate": 0.0001633217993079585, "loss": 0.9828, "step": 907 }, { "epoch": 0.3691807277902013, "grad_norm": 0.09012485295534134, "learning_rate": 0.00016328109098310604, "loss": 0.9234, "step": 908 }, { "epoch": 0.369587314494816, "grad_norm": 0.09102308750152588, "learning_rate": 0.0001632403826582536, "loss": 0.9435, "step": 909 }, { "epoch": 0.36999390119943076, "grad_norm": 0.10014689713716507, "learning_rate": 0.0001631996743334012, "loss": 0.9706, "step": 910 }, { "epoch": 0.37040048790404556, "grad_norm": 0.09847233444452286, "learning_rate": 0.00016315896600854876, "loss": 1.0053, "step": 911 }, { "epoch": 0.3708070746086603, "grad_norm": 0.08919807523488998, "learning_rate": 0.00016311825768369634, "loss": 0.9453, "step": 912 }, { "epoch": 0.37121366131327505, "grad_norm": 0.09738612174987793, "learning_rate": 0.0001630775493588439, "loss": 0.9532, "step": 913 }, { "epoch": 0.37162024801788984, "grad_norm": 0.09383881837129593, "learning_rate": 0.00016303684103399145, "loss": 0.9785, "step": 914 }, { "epoch": 0.3720268347225046, "grad_norm": 0.09604702144861221, "learning_rate": 0.00016299613270913903, "loss": 0.9903, "step": 915 }, { "epoch": 0.3724334214271193, "grad_norm": 0.10132728517055511, "learning_rate": 0.00016295542438428658, "loss": 0.9108, "step": 916 }, { "epoch": 0.37284000813173407, "grad_norm": 0.09230207651853561, "learning_rate": 0.00016291471605943417, "loss": 0.9311, "step": 917 }, { "epoch": 0.37324659483634887, "grad_norm": 0.1002974808216095, "learning_rate": 0.00016287400773458172, "loss": 1.036, "step": 918 }, { "epoch": 0.3736531815409636, "grad_norm": 0.10646216571331024, "learning_rate": 0.0001628332994097293, "loss": 1.0322, "step": 919 }, { "epoch": 0.37405976824557835, "grad_norm": 0.10693056881427765, "learning_rate": 0.00016279259108487685, "loss": 0.9632, "step": 920 }, { "epoch": 0.37446635495019315, "grad_norm": 0.09401918947696686, "learning_rate": 0.00016275188276002444, "loss": 0.9099, "step": 921 }, { "epoch": 0.3748729416548079, "grad_norm": 0.09380720555782318, "learning_rate": 0.00016271117443517202, "loss": 0.9477, "step": 922 }, { "epoch": 0.37527952835942263, "grad_norm": 0.10077174752950668, "learning_rate": 0.00016267046611031957, "loss": 1.0681, "step": 923 }, { "epoch": 0.37568611506403743, "grad_norm": 0.09828921407461166, "learning_rate": 0.00016262975778546715, "loss": 1.0714, "step": 924 }, { "epoch": 0.37609270176865217, "grad_norm": 0.09168947488069534, "learning_rate": 0.0001625890494606147, "loss": 0.9704, "step": 925 }, { "epoch": 0.3764992884732669, "grad_norm": 0.11230597645044327, "learning_rate": 0.00016254834113576226, "loss": 1.035, "step": 926 }, { "epoch": 0.3769058751778817, "grad_norm": 0.09307698905467987, "learning_rate": 0.00016250763281090984, "loss": 1.0032, "step": 927 }, { "epoch": 0.37731246188249645, "grad_norm": 0.0920303463935852, "learning_rate": 0.0001624669244860574, "loss": 0.9544, "step": 928 }, { "epoch": 0.3777190485871112, "grad_norm": 0.09571109712123871, "learning_rate": 0.00016242621616120498, "loss": 1.0121, "step": 929 }, { "epoch": 0.37812563529172594, "grad_norm": 0.10284926742315292, "learning_rate": 0.00016238550783635253, "loss": 1.0774, "step": 930 }, { "epoch": 0.37853222199634073, "grad_norm": 0.10457168519496918, "learning_rate": 0.0001623447995115001, "loss": 1.0838, "step": 931 }, { "epoch": 0.3789388087009555, "grad_norm": 0.09635209292173386, "learning_rate": 0.00016230409118664767, "loss": 1.0938, "step": 932 }, { "epoch": 0.3793453954055702, "grad_norm": 0.09526656568050385, "learning_rate": 0.00016226338286179525, "loss": 0.8854, "step": 933 }, { "epoch": 0.379751982110185, "grad_norm": 0.09416744112968445, "learning_rate": 0.00016222267453694283, "loss": 0.9763, "step": 934 }, { "epoch": 0.38015856881479976, "grad_norm": 0.10477912425994873, "learning_rate": 0.00016218196621209038, "loss": 1.0202, "step": 935 }, { "epoch": 0.3805651555194145, "grad_norm": 0.1058596596121788, "learning_rate": 0.00016214125788723796, "loss": 1.094, "step": 936 }, { "epoch": 0.3809717422240293, "grad_norm": 0.09116796404123306, "learning_rate": 0.00016210054956238552, "loss": 0.8538, "step": 937 }, { "epoch": 0.38137832892864404, "grad_norm": 0.10126717388629913, "learning_rate": 0.00016205984123753307, "loss": 1.0668, "step": 938 }, { "epoch": 0.3817849156332588, "grad_norm": 0.09571392089128494, "learning_rate": 0.00016201913291268065, "loss": 1.0369, "step": 939 }, { "epoch": 0.3821915023378736, "grad_norm": 0.0956893190741539, "learning_rate": 0.0001619784245878282, "loss": 1.0634, "step": 940 }, { "epoch": 0.3825980890424883, "grad_norm": 0.09609857201576233, "learning_rate": 0.0001619377162629758, "loss": 1.0129, "step": 941 }, { "epoch": 0.38300467574710306, "grad_norm": 0.09440251439809799, "learning_rate": 0.00016189700793812334, "loss": 1.0692, "step": 942 }, { "epoch": 0.3834112624517178, "grad_norm": 0.09696158766746521, "learning_rate": 0.00016185629961327092, "loss": 1.0562, "step": 943 }, { "epoch": 0.3838178491563326, "grad_norm": 0.09598648548126221, "learning_rate": 0.0001618155912884185, "loss": 1.0046, "step": 944 }, { "epoch": 0.38422443586094734, "grad_norm": 0.09640836715698242, "learning_rate": 0.00016177488296356606, "loss": 0.9735, "step": 945 }, { "epoch": 0.3846310225655621, "grad_norm": 0.08648312091827393, "learning_rate": 0.00016173417463871364, "loss": 0.8721, "step": 946 }, { "epoch": 0.3850376092701769, "grad_norm": 0.09831465780735016, "learning_rate": 0.0001616934663138612, "loss": 0.9943, "step": 947 }, { "epoch": 0.3854441959747916, "grad_norm": 0.09906010329723358, "learning_rate": 0.00016165275798900878, "loss": 1.0565, "step": 948 }, { "epoch": 0.38585078267940637, "grad_norm": 0.08963965624570847, "learning_rate": 0.00016161204966415633, "loss": 0.9376, "step": 949 }, { "epoch": 0.38625736938402117, "grad_norm": 0.09176084399223328, "learning_rate": 0.00016157134133930388, "loss": 0.9542, "step": 950 }, { "epoch": 0.3866639560886359, "grad_norm": 0.09641337394714355, "learning_rate": 0.00016153063301445146, "loss": 1.0248, "step": 951 }, { "epoch": 0.38707054279325065, "grad_norm": 0.09608114510774612, "learning_rate": 0.00016148992468959902, "loss": 0.9336, "step": 952 }, { "epoch": 0.38747712949786545, "grad_norm": 0.1011141762137413, "learning_rate": 0.0001614492163647466, "loss": 0.9581, "step": 953 }, { "epoch": 0.3878837162024802, "grad_norm": 0.08915555477142334, "learning_rate": 0.00016140850803989415, "loss": 0.9766, "step": 954 }, { "epoch": 0.38829030290709493, "grad_norm": 0.09591024369001389, "learning_rate": 0.00016136779971504174, "loss": 0.9601, "step": 955 }, { "epoch": 0.3886968896117097, "grad_norm": 0.0986289530992508, "learning_rate": 0.00016132709139018932, "loss": 0.9403, "step": 956 }, { "epoch": 0.38910347631632447, "grad_norm": 0.11183958500623703, "learning_rate": 0.00016128638306533687, "loss": 1.1319, "step": 957 }, { "epoch": 0.3895100630209392, "grad_norm": 0.09588544070720673, "learning_rate": 0.00016124567474048445, "loss": 0.949, "step": 958 }, { "epoch": 0.38991664972555395, "grad_norm": 0.09099708497524261, "learning_rate": 0.000161204966415632, "loss": 0.8462, "step": 959 }, { "epoch": 0.39032323643016875, "grad_norm": 0.08816317468881607, "learning_rate": 0.0001611642580907796, "loss": 0.9555, "step": 960 }, { "epoch": 0.3907298231347835, "grad_norm": 0.10011658817529678, "learning_rate": 0.00016112354976592714, "loss": 0.984, "step": 961 }, { "epoch": 0.39113640983939824, "grad_norm": 0.10004715621471405, "learning_rate": 0.0001610828414410747, "loss": 1.0356, "step": 962 }, { "epoch": 0.39154299654401303, "grad_norm": 0.09157074242830276, "learning_rate": 0.00016104213311622228, "loss": 1.0009, "step": 963 }, { "epoch": 0.3919495832486278, "grad_norm": 0.09388852119445801, "learning_rate": 0.00016100142479136983, "loss": 0.9339, "step": 964 }, { "epoch": 0.3923561699532425, "grad_norm": 0.08737456053495407, "learning_rate": 0.0001609607164665174, "loss": 0.9075, "step": 965 }, { "epoch": 0.3927627566578573, "grad_norm": 0.10345963388681412, "learning_rate": 0.00016092000814166497, "loss": 0.9599, "step": 966 }, { "epoch": 0.39316934336247206, "grad_norm": 0.09817633777856827, "learning_rate": 0.00016087929981681255, "loss": 0.9688, "step": 967 }, { "epoch": 0.3935759300670868, "grad_norm": 0.09691375494003296, "learning_rate": 0.00016083859149196013, "loss": 0.9936, "step": 968 }, { "epoch": 0.39398251677170154, "grad_norm": 0.09408018738031387, "learning_rate": 0.00016079788316710768, "loss": 1.0414, "step": 969 }, { "epoch": 0.39438910347631634, "grad_norm": 0.0967404916882515, "learning_rate": 0.00016075717484225526, "loss": 1.0127, "step": 970 }, { "epoch": 0.3947956901809311, "grad_norm": 14.380797386169434, "learning_rate": 0.00016071646651740282, "loss": 1.0321, "step": 971 }, { "epoch": 0.3952022768855458, "grad_norm": 0.1003538966178894, "learning_rate": 0.0001606757581925504, "loss": 1.0326, "step": 972 }, { "epoch": 0.3956088635901606, "grad_norm": 0.11198288947343826, "learning_rate": 0.00016063504986769795, "loss": 1.0124, "step": 973 }, { "epoch": 0.39601545029477536, "grad_norm": 0.11186987906694412, "learning_rate": 0.0001605943415428455, "loss": 1.0373, "step": 974 }, { "epoch": 0.3964220369993901, "grad_norm": 0.10965568572282791, "learning_rate": 0.0001605536332179931, "loss": 1.004, "step": 975 }, { "epoch": 0.3968286237040049, "grad_norm": 0.09624014794826508, "learning_rate": 0.00016051292489314064, "loss": 0.9294, "step": 976 }, { "epoch": 0.39723521040861964, "grad_norm": 0.10577430576086044, "learning_rate": 0.00016047221656828822, "loss": 1.0446, "step": 977 }, { "epoch": 0.3976417971132344, "grad_norm": 0.10079281777143478, "learning_rate": 0.00016043150824343578, "loss": 1.0926, "step": 978 }, { "epoch": 0.3980483838178492, "grad_norm": 0.09973543137311935, "learning_rate": 0.00016039079991858336, "loss": 1.0352, "step": 979 }, { "epoch": 0.3984549705224639, "grad_norm": 0.10137680172920227, "learning_rate": 0.00016035009159373094, "loss": 0.9871, "step": 980 }, { "epoch": 0.39886155722707867, "grad_norm": 0.09879370778799057, "learning_rate": 0.0001603093832688785, "loss": 1.0077, "step": 981 }, { "epoch": 0.3992681439316934, "grad_norm": 0.09389031678438187, "learning_rate": 0.00016026867494402607, "loss": 0.8345, "step": 982 }, { "epoch": 0.3996747306363082, "grad_norm": 0.09968902170658112, "learning_rate": 0.00016022796661917363, "loss": 1.0025, "step": 983 }, { "epoch": 0.40008131734092295, "grad_norm": 0.09817297756671906, "learning_rate": 0.00016018725829432118, "loss": 1.0436, "step": 984 }, { "epoch": 0.4004879040455377, "grad_norm": 0.09468533098697662, "learning_rate": 0.00016014654996946876, "loss": 0.9757, "step": 985 }, { "epoch": 0.4008944907501525, "grad_norm": 0.10573722422122955, "learning_rate": 0.00016010584164461632, "loss": 0.9627, "step": 986 }, { "epoch": 0.40130107745476723, "grad_norm": 0.09328682720661163, "learning_rate": 0.0001600651333197639, "loss": 0.8463, "step": 987 }, { "epoch": 0.40170766415938197, "grad_norm": 0.10987431555986404, "learning_rate": 0.00016002442499491145, "loss": 1.0123, "step": 988 }, { "epoch": 0.40211425086399677, "grad_norm": 0.09426723420619965, "learning_rate": 0.00015998371667005903, "loss": 0.8706, "step": 989 }, { "epoch": 0.4025208375686115, "grad_norm": 0.10630480945110321, "learning_rate": 0.00015994300834520662, "loss": 0.9596, "step": 990 }, { "epoch": 0.40292742427322625, "grad_norm": 0.11168541014194489, "learning_rate": 0.00015990230002035417, "loss": 1.0848, "step": 991 }, { "epoch": 0.40333401097784105, "grad_norm": 0.09651850908994675, "learning_rate": 0.00015986159169550175, "loss": 0.9965, "step": 992 }, { "epoch": 0.4037405976824558, "grad_norm": 0.10979650169610977, "learning_rate": 0.0001598208833706493, "loss": 1.0658, "step": 993 }, { "epoch": 0.40414718438707053, "grad_norm": 0.10453470051288605, "learning_rate": 0.00015978017504579689, "loss": 0.8697, "step": 994 }, { "epoch": 0.4045537710916853, "grad_norm": 0.09393549710512161, "learning_rate": 0.00015973946672094444, "loss": 0.9818, "step": 995 }, { "epoch": 0.4049603577963001, "grad_norm": 0.10618766397237778, "learning_rate": 0.000159698758396092, "loss": 1.1107, "step": 996 }, { "epoch": 0.4053669445009148, "grad_norm": 0.09401122480630875, "learning_rate": 0.00015965805007123958, "loss": 0.9484, "step": 997 }, { "epoch": 0.40577353120552956, "grad_norm": 0.10404767096042633, "learning_rate": 0.00015961734174638713, "loss": 1.0457, "step": 998 }, { "epoch": 0.40618011791014436, "grad_norm": 0.10144046694040298, "learning_rate": 0.0001595766334215347, "loss": 1.0164, "step": 999 }, { "epoch": 0.4065867046147591, "grad_norm": 69.98524475097656, "learning_rate": 0.00015953592509668227, "loss": 1.0974, "step": 1000 }, { "epoch": 0.40699329131937384, "grad_norm": 0.09672264754772186, "learning_rate": 0.00015949521677182985, "loss": 0.9351, "step": 1001 }, { "epoch": 0.40739987802398864, "grad_norm": 0.09717651456594467, "learning_rate": 0.00015945450844697743, "loss": 0.992, "step": 1002 }, { "epoch": 0.4078064647286034, "grad_norm": 0.10012587159872055, "learning_rate": 0.00015941380012212498, "loss": 0.9564, "step": 1003 }, { "epoch": 0.4082130514332181, "grad_norm": 0.11782870441675186, "learning_rate": 0.00015937309179727256, "loss": 1.0572, "step": 1004 }, { "epoch": 0.4086196381378329, "grad_norm": 0.12483621388673782, "learning_rate": 0.00015933238347242012, "loss": 0.9195, "step": 1005 }, { "epoch": 0.40902622484244766, "grad_norm": 0.16169683635234833, "learning_rate": 0.0001592916751475677, "loss": 0.9553, "step": 1006 }, { "epoch": 0.4094328115470624, "grad_norm": 0.18174675107002258, "learning_rate": 0.00015925096682271525, "loss": 0.9884, "step": 1007 }, { "epoch": 0.40983939825167714, "grad_norm": 0.15436168015003204, "learning_rate": 0.0001592102584978628, "loss": 0.975, "step": 1008 }, { "epoch": 0.41024598495629194, "grad_norm": 0.37080836296081543, "learning_rate": 0.0001591695501730104, "loss": 0.9542, "step": 1009 }, { "epoch": 0.4106525716609067, "grad_norm": 0.10444851219654083, "learning_rate": 0.00015912884184815794, "loss": 0.8729, "step": 1010 }, { "epoch": 0.4110591583655214, "grad_norm": 0.09934143722057343, "learning_rate": 0.00015908813352330552, "loss": 1.0016, "step": 1011 }, { "epoch": 0.4114657450701362, "grad_norm": 0.10826974362134933, "learning_rate": 0.00015904742519845308, "loss": 1.0141, "step": 1012 }, { "epoch": 0.41187233177475097, "grad_norm": 0.0943305566906929, "learning_rate": 0.00015900671687360066, "loss": 0.9172, "step": 1013 }, { "epoch": 0.4122789184793657, "grad_norm": 0.0978141725063324, "learning_rate": 0.00015896600854874824, "loss": 1.0325, "step": 1014 }, { "epoch": 0.4126855051839805, "grad_norm": 0.10199011117219925, "learning_rate": 0.0001589253002238958, "loss": 1.1241, "step": 1015 }, { "epoch": 0.41309209188859525, "grad_norm": 0.09425395727157593, "learning_rate": 0.00015888459189904337, "loss": 0.9909, "step": 1016 }, { "epoch": 0.41349867859321, "grad_norm": 0.10020224750041962, "learning_rate": 0.00015884388357419093, "loss": 1.048, "step": 1017 }, { "epoch": 0.4139052652978248, "grad_norm": 0.09428106248378754, "learning_rate": 0.0001588031752493385, "loss": 1.0091, "step": 1018 }, { "epoch": 0.41431185200243953, "grad_norm": 0.10922541469335556, "learning_rate": 0.00015876246692448606, "loss": 1.0321, "step": 1019 }, { "epoch": 0.41471843870705427, "grad_norm": 0.10005990415811539, "learning_rate": 0.00015872175859963362, "loss": 0.9331, "step": 1020 }, { "epoch": 0.415125025411669, "grad_norm": 0.09880723059177399, "learning_rate": 0.0001586810502747812, "loss": 1.0831, "step": 1021 }, { "epoch": 0.4155316121162838, "grad_norm": 0.10210402309894562, "learning_rate": 0.00015864034194992875, "loss": 0.8376, "step": 1022 }, { "epoch": 0.41593819882089855, "grad_norm": 0.10243164747953415, "learning_rate": 0.00015859963362507633, "loss": 0.974, "step": 1023 }, { "epoch": 0.4163447855255133, "grad_norm": 0.0910453349351883, "learning_rate": 0.0001585589253002239, "loss": 0.9429, "step": 1024 }, { "epoch": 0.4167513722301281, "grad_norm": 0.10028322786092758, "learning_rate": 0.00015851821697537147, "loss": 0.9692, "step": 1025 }, { "epoch": 0.41715795893474283, "grad_norm": 0.10679830610752106, "learning_rate": 0.00015847750865051905, "loss": 1.0561, "step": 1026 }, { "epoch": 0.4175645456393576, "grad_norm": 0.10921266674995422, "learning_rate": 0.0001584368003256666, "loss": 1.0046, "step": 1027 }, { "epoch": 0.4179711323439724, "grad_norm": 0.09717408567667007, "learning_rate": 0.00015839609200081419, "loss": 0.9963, "step": 1028 }, { "epoch": 0.4183777190485871, "grad_norm": 0.10907028615474701, "learning_rate": 0.00015835538367596174, "loss": 1.1112, "step": 1029 }, { "epoch": 0.41878430575320186, "grad_norm": 0.0934014692902565, "learning_rate": 0.00015831467535110932, "loss": 0.9392, "step": 1030 }, { "epoch": 0.41919089245781666, "grad_norm": 0.10372751951217651, "learning_rate": 0.00015827396702625688, "loss": 0.9911, "step": 1031 }, { "epoch": 0.4195974791624314, "grad_norm": 0.0926424190402031, "learning_rate": 0.00015823325870140443, "loss": 0.9568, "step": 1032 }, { "epoch": 0.42000406586704614, "grad_norm": 0.09991902112960815, "learning_rate": 0.000158192550376552, "loss": 1.1551, "step": 1033 }, { "epoch": 0.4204106525716609, "grad_norm": 0.10407492518424988, "learning_rate": 0.00015815184205169956, "loss": 1.0001, "step": 1034 }, { "epoch": 0.4208172392762757, "grad_norm": 0.09984209388494492, "learning_rate": 0.00015811113372684715, "loss": 1.0661, "step": 1035 }, { "epoch": 0.4212238259808904, "grad_norm": 0.08815161138772964, "learning_rate": 0.00015807042540199473, "loss": 0.9132, "step": 1036 }, { "epoch": 0.42163041268550516, "grad_norm": 0.10167308151721954, "learning_rate": 0.00015802971707714228, "loss": 1.0113, "step": 1037 }, { "epoch": 0.42203699939011996, "grad_norm": 0.09093226492404938, "learning_rate": 0.00015798900875228986, "loss": 0.9016, "step": 1038 }, { "epoch": 0.4224435860947347, "grad_norm": 0.09932513535022736, "learning_rate": 0.00015794830042743742, "loss": 1.0756, "step": 1039 }, { "epoch": 0.42285017279934944, "grad_norm": 0.09752842038869858, "learning_rate": 0.000157907592102585, "loss": 1.0552, "step": 1040 }, { "epoch": 0.42325675950396424, "grad_norm": 0.09833484143018723, "learning_rate": 0.00015786688377773255, "loss": 1.0448, "step": 1041 }, { "epoch": 0.423663346208579, "grad_norm": 0.09440255910158157, "learning_rate": 0.00015782617545288013, "loss": 0.966, "step": 1042 }, { "epoch": 0.4240699329131937, "grad_norm": 0.09800337255001068, "learning_rate": 0.0001577854671280277, "loss": 0.9517, "step": 1043 }, { "epoch": 0.4244765196178085, "grad_norm": 0.100920170545578, "learning_rate": 0.00015774475880317524, "loss": 1.0075, "step": 1044 }, { "epoch": 0.42488310632242327, "grad_norm": 0.10229222476482391, "learning_rate": 0.00015770405047832282, "loss": 1.0644, "step": 1045 }, { "epoch": 0.425289693027038, "grad_norm": 0.09247329086065292, "learning_rate": 0.00015766334215347038, "loss": 0.9628, "step": 1046 }, { "epoch": 0.42569627973165275, "grad_norm": 0.08849867433309555, "learning_rate": 0.00015762263382861796, "loss": 0.9044, "step": 1047 }, { "epoch": 0.42610286643626755, "grad_norm": 0.10035345703363419, "learning_rate": 0.00015758192550376554, "loss": 1.0025, "step": 1048 }, { "epoch": 0.4265094531408823, "grad_norm": 0.10530912131071091, "learning_rate": 0.0001575412171789131, "loss": 1.1156, "step": 1049 }, { "epoch": 0.42691603984549703, "grad_norm": 0.0959988534450531, "learning_rate": 0.00015750050885406067, "loss": 0.9927, "step": 1050 }, { "epoch": 0.42732262655011183, "grad_norm": 0.09642820060253143, "learning_rate": 0.00015745980052920823, "loss": 1.0081, "step": 1051 }, { "epoch": 0.42772921325472657, "grad_norm": 0.09695859253406525, "learning_rate": 0.0001574190922043558, "loss": 1.0104, "step": 1052 }, { "epoch": 0.4281357999593413, "grad_norm": 0.09271597862243652, "learning_rate": 0.00015737838387950336, "loss": 0.9655, "step": 1053 }, { "epoch": 0.4285423866639561, "grad_norm": 0.11482039839029312, "learning_rate": 0.00015733767555465094, "loss": 1.1689, "step": 1054 }, { "epoch": 0.42894897336857085, "grad_norm": 0.12072457373142242, "learning_rate": 0.0001572969672297985, "loss": 1.1573, "step": 1055 }, { "epoch": 0.4293555600731856, "grad_norm": 0.10628031194210052, "learning_rate": 0.00015725625890494605, "loss": 1.0604, "step": 1056 }, { "epoch": 0.4297621467778004, "grad_norm": 0.09997066110372543, "learning_rate": 0.00015721555058009363, "loss": 1.0791, "step": 1057 }, { "epoch": 0.43016873348241513, "grad_norm": 0.09063227474689484, "learning_rate": 0.0001571748422552412, "loss": 0.8821, "step": 1058 }, { "epoch": 0.4305753201870299, "grad_norm": 0.09447956085205078, "learning_rate": 0.0001571341339303888, "loss": 0.9104, "step": 1059 }, { "epoch": 0.4309819068916446, "grad_norm": 0.09488890320062637, "learning_rate": 0.00015709342560553635, "loss": 0.9476, "step": 1060 }, { "epoch": 0.4313884935962594, "grad_norm": 0.09842818230390549, "learning_rate": 0.0001570527172806839, "loss": 1.0041, "step": 1061 }, { "epoch": 0.43179508030087416, "grad_norm": 0.10026121884584427, "learning_rate": 0.00015701200895583149, "loss": 0.9704, "step": 1062 }, { "epoch": 0.4322016670054889, "grad_norm": 0.10602670162916183, "learning_rate": 0.00015697130063097904, "loss": 0.9626, "step": 1063 }, { "epoch": 0.4326082537101037, "grad_norm": 0.09817321598529816, "learning_rate": 0.00015693059230612662, "loss": 1.018, "step": 1064 }, { "epoch": 0.43301484041471844, "grad_norm": 0.10956291854381561, "learning_rate": 0.00015688988398127417, "loss": 1.0773, "step": 1065 }, { "epoch": 0.4334214271193332, "grad_norm": 0.10461815446615219, "learning_rate": 0.00015684917565642176, "loss": 1.0276, "step": 1066 }, { "epoch": 0.433828013823948, "grad_norm": 0.1066046878695488, "learning_rate": 0.0001568084673315693, "loss": 1.0104, "step": 1067 }, { "epoch": 0.4342346005285627, "grad_norm": 0.09685570001602173, "learning_rate": 0.00015676775900671686, "loss": 0.9324, "step": 1068 }, { "epoch": 0.43464118723317746, "grad_norm": 0.10849763453006744, "learning_rate": 0.00015672705068186445, "loss": 1.1898, "step": 1069 }, { "epoch": 0.43504777393779226, "grad_norm": 0.09181284159421921, "learning_rate": 0.000156686342357012, "loss": 0.9655, "step": 1070 }, { "epoch": 0.435454360642407, "grad_norm": 0.09956375509500504, "learning_rate": 0.00015664563403215958, "loss": 0.9767, "step": 1071 }, { "epoch": 0.43586094734702174, "grad_norm": 0.09587504714727402, "learning_rate": 0.00015660492570730716, "loss": 1.0046, "step": 1072 }, { "epoch": 0.4362675340516365, "grad_norm": 0.09740083664655685, "learning_rate": 0.00015656421738245472, "loss": 1.0235, "step": 1073 }, { "epoch": 0.4366741207562513, "grad_norm": 0.1067059263586998, "learning_rate": 0.0001565235090576023, "loss": 1.0495, "step": 1074 }, { "epoch": 0.437080707460866, "grad_norm": 0.0951162800192833, "learning_rate": 0.00015648280073274985, "loss": 1.0601, "step": 1075 }, { "epoch": 0.43748729416548077, "grad_norm": 0.10814306139945984, "learning_rate": 0.00015644209240789743, "loss": 1.0642, "step": 1076 }, { "epoch": 0.43789388087009556, "grad_norm": 0.10104648023843765, "learning_rate": 0.000156401384083045, "loss": 1.0183, "step": 1077 }, { "epoch": 0.4383004675747103, "grad_norm": 0.10644647479057312, "learning_rate": 0.00015636067575819254, "loss": 0.9845, "step": 1078 }, { "epoch": 0.43870705427932505, "grad_norm": 0.10958357155323029, "learning_rate": 0.00015631996743334012, "loss": 1.0803, "step": 1079 }, { "epoch": 0.43911364098393985, "grad_norm": 0.09988164156675339, "learning_rate": 0.00015627925910848768, "loss": 0.9468, "step": 1080 }, { "epoch": 0.4395202276885546, "grad_norm": 0.09617158770561218, "learning_rate": 0.00015623855078363526, "loss": 0.9929, "step": 1081 }, { "epoch": 0.43992681439316933, "grad_norm": 0.09235814958810806, "learning_rate": 0.00015619784245878284, "loss": 0.9681, "step": 1082 }, { "epoch": 0.4403334010977841, "grad_norm": 0.0999334529042244, "learning_rate": 0.0001561571341339304, "loss": 1.0971, "step": 1083 }, { "epoch": 0.44073998780239887, "grad_norm": 0.09117653220891953, "learning_rate": 0.00015611642580907797, "loss": 0.9176, "step": 1084 }, { "epoch": 0.4411465745070136, "grad_norm": 0.11608845740556717, "learning_rate": 0.00015607571748422553, "loss": 1.016, "step": 1085 }, { "epoch": 0.44155316121162835, "grad_norm": NaN, "learning_rate": 0.0001560350091593731, "loss": 3.9953, "step": 1086 }, { "epoch": 0.44195974791624315, "grad_norm": 0.08910229802131653, "learning_rate": 0.00015599430083452066, "loss": 0.9387, "step": 1087 }, { "epoch": 0.4423663346208579, "grad_norm": NaN, "learning_rate": 0.00015595359250966824, "loss": 0.9939, "step": 1088 }, { "epoch": 0.44277292132547263, "grad_norm": 0.13621561229228973, "learning_rate": 0.0001559128841848158, "loss": 0.9376, "step": 1089 }, { "epoch": 0.44317950803008743, "grad_norm": 0.263536661863327, "learning_rate": 0.00015587217585996335, "loss": 1.0808, "step": 1090 }, { "epoch": 0.4435860947347022, "grad_norm": 0.21123525500297546, "learning_rate": 0.00015583146753511093, "loss": 0.9142, "step": 1091 }, { "epoch": 0.4439926814393169, "grad_norm": 0.16994574666023254, "learning_rate": 0.0001557907592102585, "loss": 1.0273, "step": 1092 }, { "epoch": 0.4443992681439317, "grad_norm": 0.1400166153907776, "learning_rate": 0.00015575005088540607, "loss": 0.9135, "step": 1093 }, { "epoch": 0.44480585484854646, "grad_norm": 0.13885940611362457, "learning_rate": 0.00015570934256055365, "loss": 1.158, "step": 1094 }, { "epoch": 0.4452124415531612, "grad_norm": 0.12671105563640594, "learning_rate": 0.0001556686342357012, "loss": 0.9401, "step": 1095 }, { "epoch": 0.445619028257776, "grad_norm": 0.11388255655765533, "learning_rate": 0.00015562792591084879, "loss": 0.9454, "step": 1096 }, { "epoch": 0.44602561496239074, "grad_norm": 0.13421480357646942, "learning_rate": 0.00015558721758599634, "loss": 1.0017, "step": 1097 }, { "epoch": 0.4464322016670055, "grad_norm": 0.11914326995611191, "learning_rate": 0.00015554650926114392, "loss": 1.0312, "step": 1098 }, { "epoch": 0.4468387883716202, "grad_norm": 0.11101624369621277, "learning_rate": 0.00015550580093629147, "loss": 1.0555, "step": 1099 }, { "epoch": 0.447245375076235, "grad_norm": 0.12158175557851791, "learning_rate": 0.00015546509261143906, "loss": 1.008, "step": 1100 }, { "epoch": 0.44765196178084976, "grad_norm": 0.09680108726024628, "learning_rate": 0.0001554243842865866, "loss": 0.8603, "step": 1101 }, { "epoch": 0.4480585484854645, "grad_norm": 0.12374867498874664, "learning_rate": 0.00015538367596173416, "loss": 0.9282, "step": 1102 }, { "epoch": 0.4484651351900793, "grad_norm": 0.12144714593887329, "learning_rate": 0.00015534296763688175, "loss": 1.1072, "step": 1103 }, { "epoch": 0.44887172189469404, "grad_norm": 0.13777373731136322, "learning_rate": 0.0001553022593120293, "loss": 1.0914, "step": 1104 }, { "epoch": 0.4492783085993088, "grad_norm": 0.14908930659294128, "learning_rate": 0.0001552615509871769, "loss": 1.0349, "step": 1105 }, { "epoch": 0.4496848953039236, "grad_norm": 0.09202148765325546, "learning_rate": 0.00015522084266232446, "loss": 1.0544, "step": 1106 }, { "epoch": 0.4500914820085383, "grad_norm": 0.14155222475528717, "learning_rate": 0.00015518013433747202, "loss": 1.145, "step": 1107 }, { "epoch": 0.45049806871315307, "grad_norm": 0.13090363144874573, "learning_rate": 0.0001551394260126196, "loss": 1.0815, "step": 1108 }, { "epoch": 0.45090465541776786, "grad_norm": 0.09763860702514648, "learning_rate": 0.00015509871768776715, "loss": 0.9798, "step": 1109 }, { "epoch": 0.4513112421223826, "grad_norm": 0.11425314843654633, "learning_rate": 0.00015505800936291473, "loss": 1.0609, "step": 1110 }, { "epoch": 0.45171782882699735, "grad_norm": 0.1132175624370575, "learning_rate": 0.00015501730103806229, "loss": 1.0784, "step": 1111 }, { "epoch": 0.4521244155316121, "grad_norm": 0.09365850687026978, "learning_rate": 0.00015497659271320987, "loss": 0.971, "step": 1112 }, { "epoch": 0.4525310022362269, "grad_norm": 0.10959959030151367, "learning_rate": 0.00015493588438835742, "loss": 1.0991, "step": 1113 }, { "epoch": 0.45293758894084163, "grad_norm": 0.1113215982913971, "learning_rate": 0.00015489517606350498, "loss": 0.9664, "step": 1114 }, { "epoch": 0.45334417564545637, "grad_norm": 0.09337687492370605, "learning_rate": 0.00015485446773865256, "loss": 0.9801, "step": 1115 }, { "epoch": 0.45375076235007117, "grad_norm": 0.09887603670358658, "learning_rate": 0.0001548137594138001, "loss": 0.9329, "step": 1116 }, { "epoch": 0.4541573490546859, "grad_norm": 0.09895873069763184, "learning_rate": 0.00015477305108894772, "loss": 0.9742, "step": 1117 }, { "epoch": 0.45456393575930065, "grad_norm": 0.10547256469726562, "learning_rate": 0.00015473234276409527, "loss": 1.0917, "step": 1118 }, { "epoch": 0.45497052246391545, "grad_norm": 0.10243359208106995, "learning_rate": 0.00015469163443924283, "loss": 1.153, "step": 1119 }, { "epoch": 0.4553771091685302, "grad_norm": 0.10679526627063751, "learning_rate": 0.0001546509261143904, "loss": 1.0256, "step": 1120 }, { "epoch": 0.45578369587314493, "grad_norm": 0.10002291947603226, "learning_rate": 0.00015461021778953796, "loss": 1.0984, "step": 1121 }, { "epoch": 0.45619028257775973, "grad_norm": 0.0953390821814537, "learning_rate": 0.00015456950946468554, "loss": 1.015, "step": 1122 }, { "epoch": 0.4565968692823745, "grad_norm": 0.09738897532224655, "learning_rate": 0.0001545288011398331, "loss": 1.0193, "step": 1123 }, { "epoch": 0.4570034559869892, "grad_norm": 0.09633835405111313, "learning_rate": 0.00015448809281498068, "loss": 1.0595, "step": 1124 }, { "epoch": 0.45741004269160396, "grad_norm": 0.09380267560482025, "learning_rate": 0.00015444738449012823, "loss": 1.0411, "step": 1125 }, { "epoch": 0.45781662939621875, "grad_norm": 0.09572221338748932, "learning_rate": 0.0001544066761652758, "loss": 1.0509, "step": 1126 }, { "epoch": 0.4582232161008335, "grad_norm": 0.09846567362546921, "learning_rate": 0.00015436596784042337, "loss": 1.0026, "step": 1127 }, { "epoch": 0.45862980280544824, "grad_norm": 0.10050946474075317, "learning_rate": 0.00015432525951557095, "loss": 0.9278, "step": 1128 }, { "epoch": 0.45903638951006304, "grad_norm": 0.09319213777780533, "learning_rate": 0.00015428455119071853, "loss": 1.0591, "step": 1129 }, { "epoch": 0.4594429762146778, "grad_norm": 0.10778182744979858, "learning_rate": 0.00015424384286586608, "loss": 1.1913, "step": 1130 }, { "epoch": 0.4598495629192925, "grad_norm": 0.09819093346595764, "learning_rate": 0.00015420313454101364, "loss": 1.0254, "step": 1131 }, { "epoch": 0.4602561496239073, "grad_norm": 0.09300455451011658, "learning_rate": 0.00015416242621616122, "loss": 0.9092, "step": 1132 }, { "epoch": 0.46066273632852206, "grad_norm": 0.09690682590007782, "learning_rate": 0.00015412171789130877, "loss": 1.0309, "step": 1133 }, { "epoch": 0.4610693230331368, "grad_norm": 0.10080096125602722, "learning_rate": 0.00015408100956645636, "loss": 1.059, "step": 1134 }, { "epoch": 0.4614759097377516, "grad_norm": 0.10120131820440292, "learning_rate": 0.0001540403012416039, "loss": 1.0201, "step": 1135 }, { "epoch": 0.46188249644236634, "grad_norm": 0.09029684960842133, "learning_rate": 0.0001539995929167515, "loss": 0.9981, "step": 1136 }, { "epoch": 0.4622890831469811, "grad_norm": 0.10337984561920166, "learning_rate": 0.00015395888459189904, "loss": 1.0746, "step": 1137 }, { "epoch": 0.4626956698515958, "grad_norm": 0.10107820481061935, "learning_rate": 0.0001539181762670466, "loss": 1.0901, "step": 1138 }, { "epoch": 0.4631022565562106, "grad_norm": 0.09064685553312302, "learning_rate": 0.00015387746794219418, "loss": 0.9654, "step": 1139 }, { "epoch": 0.46350884326082537, "grad_norm": 0.08879990130662918, "learning_rate": 0.00015383675961734176, "loss": 0.9099, "step": 1140 }, { "epoch": 0.4639154299654401, "grad_norm": 0.09138944000005722, "learning_rate": 0.00015379605129248934, "loss": 1.029, "step": 1141 }, { "epoch": 0.4643220166700549, "grad_norm": 0.08852239698171616, "learning_rate": 0.0001537553429676369, "loss": 0.8866, "step": 1142 }, { "epoch": 0.46472860337466965, "grad_norm": 0.1031791940331459, "learning_rate": 0.00015371463464278445, "loss": 1.0403, "step": 1143 }, { "epoch": 0.4651351900792844, "grad_norm": 0.10525615513324738, "learning_rate": 0.00015367392631793203, "loss": 1.0979, "step": 1144 }, { "epoch": 0.4655417767838992, "grad_norm": 0.08951327204704285, "learning_rate": 0.00015363321799307959, "loss": 1.1415, "step": 1145 }, { "epoch": 0.46594836348851393, "grad_norm": 0.08904453366994858, "learning_rate": 0.00015359250966822717, "loss": 0.9916, "step": 1146 }, { "epoch": 0.46635495019312867, "grad_norm": 0.09936080127954483, "learning_rate": 0.00015355180134337472, "loss": 0.8986, "step": 1147 }, { "epoch": 0.46676153689774347, "grad_norm": 0.09393945336341858, "learning_rate": 0.0001535110930185223, "loss": 0.9999, "step": 1148 }, { "epoch": 0.4671681236023582, "grad_norm": 0.09378618746995926, "learning_rate": 0.00015347038469366986, "loss": 1.047, "step": 1149 }, { "epoch": 0.46757471030697295, "grad_norm": 0.08764394372701645, "learning_rate": 0.0001534296763688174, "loss": 1.0553, "step": 1150 }, { "epoch": 0.4679812970115877, "grad_norm": 0.09421446919441223, "learning_rate": 0.00015338896804396502, "loss": 0.9849, "step": 1151 }, { "epoch": 0.4683878837162025, "grad_norm": 0.08507819473743439, "learning_rate": 0.00015334825971911257, "loss": 0.9776, "step": 1152 }, { "epoch": 0.46879447042081723, "grad_norm": 0.08929714560508728, "learning_rate": 0.00015330755139426015, "loss": 0.9386, "step": 1153 }, { "epoch": 0.469201057125432, "grad_norm": 0.08826079219579697, "learning_rate": 0.0001532668430694077, "loss": 0.9566, "step": 1154 }, { "epoch": 0.4696076438300468, "grad_norm": 0.09339980781078339, "learning_rate": 0.00015322613474455526, "loss": 1.0428, "step": 1155 }, { "epoch": 0.4700142305346615, "grad_norm": 0.09100881218910217, "learning_rate": 0.00015318542641970284, "loss": 0.998, "step": 1156 }, { "epoch": 0.47042081723927626, "grad_norm": 0.10815288126468658, "learning_rate": 0.0001531447180948504, "loss": 0.9677, "step": 1157 }, { "epoch": 0.47082740394389105, "grad_norm": 0.10011841356754303, "learning_rate": 0.00015310400976999798, "loss": 1.0712, "step": 1158 }, { "epoch": 0.4712339906485058, "grad_norm": 0.09442432969808578, "learning_rate": 0.00015306330144514553, "loss": 1.0916, "step": 1159 }, { "epoch": 0.47164057735312054, "grad_norm": 0.09668919444084167, "learning_rate": 0.00015302259312029311, "loss": 1.0755, "step": 1160 }, { "epoch": 0.47204716405773534, "grad_norm": 0.09985285252332687, "learning_rate": 0.00015298188479544067, "loss": 1.0688, "step": 1161 }, { "epoch": 0.4724537507623501, "grad_norm": 0.10555320233106613, "learning_rate": 0.00015294117647058822, "loss": 1.0152, "step": 1162 }, { "epoch": 0.4728603374669648, "grad_norm": 0.0884140282869339, "learning_rate": 0.00015290046814573583, "loss": 0.9648, "step": 1163 }, { "epoch": 0.47326692417157956, "grad_norm": 0.07746291160583496, "learning_rate": 0.00015285975982088338, "loss": 0.8335, "step": 1164 }, { "epoch": 0.47367351087619436, "grad_norm": 0.09735523909330368, "learning_rate": 0.00015281905149603094, "loss": 1.0043, "step": 1165 }, { "epoch": 0.4740800975808091, "grad_norm": 0.0871511772274971, "learning_rate": 0.00015277834317117852, "loss": 0.9071, "step": 1166 }, { "epoch": 0.47448668428542384, "grad_norm": 0.08971349149942398, "learning_rate": 0.00015273763484632607, "loss": 1.0586, "step": 1167 }, { "epoch": 0.47489327099003864, "grad_norm": 0.0872373878955841, "learning_rate": 0.00015269692652147365, "loss": 1.0302, "step": 1168 }, { "epoch": 0.4752998576946534, "grad_norm": 0.07631363719701767, "learning_rate": 0.0001526562181966212, "loss": 0.8899, "step": 1169 }, { "epoch": 0.4757064443992681, "grad_norm": 0.0988103449344635, "learning_rate": 0.0001526155098717688, "loss": 1.1254, "step": 1170 }, { "epoch": 0.4761130311038829, "grad_norm": 0.097597636282444, "learning_rate": 0.00015257480154691634, "loss": 1.1146, "step": 1171 }, { "epoch": 0.47651961780849766, "grad_norm": 0.09990191459655762, "learning_rate": 0.00015253409322206393, "loss": 1.2176, "step": 1172 }, { "epoch": 0.4769262045131124, "grad_norm": 0.09328643232584, "learning_rate": 0.00015249338489721148, "loss": 1.0341, "step": 1173 }, { "epoch": 0.4773327912177272, "grad_norm": 0.10171747207641602, "learning_rate": 0.00015245267657235906, "loss": 1.0254, "step": 1174 }, { "epoch": 0.47773937792234195, "grad_norm": 0.10708395391702652, "learning_rate": 0.00015241196824750664, "loss": 1.0829, "step": 1175 }, { "epoch": 0.4781459646269567, "grad_norm": 0.08677671104669571, "learning_rate": 0.0001523712599226542, "loss": 1.0284, "step": 1176 }, { "epoch": 0.4785525513315715, "grad_norm": 0.09038002789020538, "learning_rate": 0.00015233055159780175, "loss": 0.9153, "step": 1177 }, { "epoch": 0.4789591380361862, "grad_norm": 0.11192218214273453, "learning_rate": 0.00015228984327294933, "loss": 1.0457, "step": 1178 }, { "epoch": 0.47936572474080097, "grad_norm": 0.09288083016872406, "learning_rate": 0.00015224913494809689, "loss": 1.0015, "step": 1179 }, { "epoch": 0.4797723114454157, "grad_norm": 0.09631673991680145, "learning_rate": 0.00015220842662324447, "loss": 1.0815, "step": 1180 }, { "epoch": 0.4801788981500305, "grad_norm": 0.10445179790258408, "learning_rate": 0.00015216771829839202, "loss": 1.0739, "step": 1181 }, { "epoch": 0.48058548485464525, "grad_norm": 0.09268762916326523, "learning_rate": 0.0001521270099735396, "loss": 0.8934, "step": 1182 }, { "epoch": 0.48099207155926, "grad_norm": 0.08889751881361008, "learning_rate": 0.00015208630164868716, "loss": 0.9938, "step": 1183 }, { "epoch": 0.4813986582638748, "grad_norm": 45.80461883544922, "learning_rate": 0.0001520455933238347, "loss": 1.1104, "step": 1184 }, { "epoch": 0.48180524496848953, "grad_norm": 0.10641971975564957, "learning_rate": 0.0001520048849989823, "loss": 0.94, "step": 1185 }, { "epoch": 0.4822118316731043, "grad_norm": 0.1041031926870346, "learning_rate": 0.00015196417667412987, "loss": 1.0479, "step": 1186 }, { "epoch": 0.48261841837771907, "grad_norm": 0.09576927870512009, "learning_rate": 0.00015192346834927745, "loss": 1.0385, "step": 1187 }, { "epoch": 0.4830250050823338, "grad_norm": 26.211715698242188, "learning_rate": 0.000151882760024425, "loss": 0.9019, "step": 1188 }, { "epoch": 0.48343159178694856, "grad_norm": 0.10039546340703964, "learning_rate": 0.00015184205169957256, "loss": 0.9887, "step": 1189 }, { "epoch": 0.48383817849156335, "grad_norm": 0.14768731594085693, "learning_rate": 0.00015180134337472014, "loss": 0.9373, "step": 1190 }, { "epoch": 0.4842447651961781, "grad_norm": 0.29760250449180603, "learning_rate": 0.0001517606350498677, "loss": 0.9899, "step": 1191 }, { "epoch": 0.48465135190079284, "grad_norm": 0.29652246832847595, "learning_rate": 0.00015171992672501528, "loss": 0.961, "step": 1192 }, { "epoch": 0.4850579386054076, "grad_norm": 0.7517414689064026, "learning_rate": 0.00015167921840016283, "loss": 1.0964, "step": 1193 }, { "epoch": 0.4854645253100224, "grad_norm": 0.14506421983242035, "learning_rate": 0.0001516385100753104, "loss": 1.1155, "step": 1194 }, { "epoch": 0.4858711120146371, "grad_norm": 0.11916639655828476, "learning_rate": 0.00015159780175045797, "loss": 0.9494, "step": 1195 }, { "epoch": 0.48627769871925186, "grad_norm": 0.10341714322566986, "learning_rate": 0.00015155709342560552, "loss": 0.9381, "step": 1196 }, { "epoch": 0.48668428542386666, "grad_norm": 0.10921141505241394, "learning_rate": 0.00015151638510075313, "loss": 1.0357, "step": 1197 }, { "epoch": 0.4870908721284814, "grad_norm": 0.12874668836593628, "learning_rate": 0.00015147567677590068, "loss": 0.9918, "step": 1198 }, { "epoch": 0.48749745883309614, "grad_norm": 0.10311154276132584, "learning_rate": 0.00015143496845104827, "loss": 1.0575, "step": 1199 }, { "epoch": 0.48790404553771094, "grad_norm": 0.09126869589090347, "learning_rate": 0.00015139426012619582, "loss": 0.9125, "step": 1200 }, { "epoch": 0.4883106322423257, "grad_norm": 0.11038295179605484, "learning_rate": 0.00015135355180134337, "loss": 1.0761, "step": 1201 }, { "epoch": 0.4887172189469404, "grad_norm": 0.10550364851951599, "learning_rate": 0.00015131284347649095, "loss": 1.0513, "step": 1202 }, { "epoch": 0.4891238056515552, "grad_norm": 0.08666063100099564, "learning_rate": 0.0001512721351516385, "loss": 0.8815, "step": 1203 }, { "epoch": 0.48953039235616996, "grad_norm": 0.09860862046480179, "learning_rate": 0.0001512314268267861, "loss": 1.0451, "step": 1204 }, { "epoch": 0.4899369790607847, "grad_norm": 0.10188648104667664, "learning_rate": 0.00015119071850193364, "loss": 0.9911, "step": 1205 }, { "epoch": 0.49034356576539945, "grad_norm": 0.09538048505783081, "learning_rate": 0.00015115001017708122, "loss": 0.8973, "step": 1206 }, { "epoch": 0.49075015247001424, "grad_norm": 0.10558182001113892, "learning_rate": 0.00015110930185222878, "loss": 1.0272, "step": 1207 }, { "epoch": 0.491156739174629, "grad_norm": 0.10072223097085953, "learning_rate": 0.00015106859352737633, "loss": 1.0966, "step": 1208 }, { "epoch": 0.49156332587924373, "grad_norm": 0.10667192190885544, "learning_rate": 0.00015102788520252394, "loss": 1.0805, "step": 1209 }, { "epoch": 0.4919699125838585, "grad_norm": 0.10285364836454391, "learning_rate": 0.0001509871768776715, "loss": 1.0553, "step": 1210 }, { "epoch": 0.49237649928847327, "grad_norm": 0.09896936267614365, "learning_rate": 0.00015094646855281908, "loss": 1.032, "step": 1211 }, { "epoch": 0.492783085993088, "grad_norm": 0.08868112415075302, "learning_rate": 0.00015090576022796663, "loss": 1.013, "step": 1212 }, { "epoch": 0.4931896726977028, "grad_norm": 0.10103127360343933, "learning_rate": 0.00015086505190311418, "loss": 1.0589, "step": 1213 }, { "epoch": 0.49359625940231755, "grad_norm": 0.11582531780004501, "learning_rate": 0.00015082434357826177, "loss": 1.0731, "step": 1214 }, { "epoch": 0.4940028461069323, "grad_norm": 0.0953935906291008, "learning_rate": 0.00015078363525340932, "loss": 0.9751, "step": 1215 }, { "epoch": 0.4944094328115471, "grad_norm": 0.10135170817375183, "learning_rate": 0.0001507429269285569, "loss": 1.0676, "step": 1216 }, { "epoch": 0.49481601951616183, "grad_norm": 0.10529596358537674, "learning_rate": 0.00015070221860370446, "loss": 1.0274, "step": 1217 }, { "epoch": 0.4952226062207766, "grad_norm": 0.11172258853912354, "learning_rate": 0.00015066151027885204, "loss": 1.1241, "step": 1218 }, { "epoch": 0.4956291929253913, "grad_norm": 0.10328125208616257, "learning_rate": 0.0001506208019539996, "loss": 1.1032, "step": 1219 }, { "epoch": 0.4960357796300061, "grad_norm": 0.09035445749759674, "learning_rate": 0.00015058009362914717, "loss": 1.0394, "step": 1220 }, { "epoch": 0.49644236633462085, "grad_norm": 0.0988045334815979, "learning_rate": 0.00015053938530429475, "loss": 1.0092, "step": 1221 }, { "epoch": 0.4968489530392356, "grad_norm": 0.12335261702537537, "learning_rate": 0.0001504986769794423, "loss": 1.0994, "step": 1222 }, { "epoch": 0.4972555397438504, "grad_norm": 0.09677151590585709, "learning_rate": 0.0001504579686545899, "loss": 0.9352, "step": 1223 }, { "epoch": 0.49766212644846514, "grad_norm": 0.0954160988330841, "learning_rate": 0.00015041726032973744, "loss": 1.0526, "step": 1224 }, { "epoch": 0.4980687131530799, "grad_norm": 0.09783489257097244, "learning_rate": 0.000150376552004885, "loss": 0.9689, "step": 1225 }, { "epoch": 0.4984752998576947, "grad_norm": 0.09221793711185455, "learning_rate": 0.00015033584368003258, "loss": 0.9458, "step": 1226 }, { "epoch": 0.4988818865623094, "grad_norm": 0.09968589246273041, "learning_rate": 0.00015029513535518013, "loss": 0.9938, "step": 1227 }, { "epoch": 0.49928847326692416, "grad_norm": 0.10488888621330261, "learning_rate": 0.0001502544270303277, "loss": 0.9525, "step": 1228 }, { "epoch": 0.49969505997153896, "grad_norm": 0.08479832857847214, "learning_rate": 0.00015021371870547527, "loss": 0.976, "step": 1229 }, { "epoch": 0.5001016466761536, "grad_norm": 0.0930403620004654, "learning_rate": 0.00015017301038062285, "loss": 1.0259, "step": 1230 }, { "epoch": 0.5005082333807684, "grad_norm": 0.09309448301792145, "learning_rate": 0.0001501323020557704, "loss": 0.9997, "step": 1231 }, { "epoch": 0.5009148200853832, "grad_norm": 0.09209504723548889, "learning_rate": 0.00015009159373091798, "loss": 0.9365, "step": 1232 }, { "epoch": 0.5013214067899979, "grad_norm": 0.09045909345149994, "learning_rate": 0.00015005088540606556, "loss": 0.9572, "step": 1233 }, { "epoch": 0.5017279934946127, "grad_norm": 0.0892348513007164, "learning_rate": 0.00015001017708121312, "loss": 0.9593, "step": 1234 }, { "epoch": 0.5021345801992275, "grad_norm": 0.08853106945753098, "learning_rate": 0.0001499694687563607, "loss": 0.9518, "step": 1235 }, { "epoch": 0.5025411669038422, "grad_norm": 0.0941222533583641, "learning_rate": 0.00014992876043150825, "loss": 0.9474, "step": 1236 }, { "epoch": 0.502947753608457, "grad_norm": 0.09374161809682846, "learning_rate": 0.0001498880521066558, "loss": 1.0018, "step": 1237 }, { "epoch": 0.5033543403130718, "grad_norm": 0.08115139603614807, "learning_rate": 0.0001498473437818034, "loss": 0.8797, "step": 1238 }, { "epoch": 0.5037609270176865, "grad_norm": 0.09270316362380981, "learning_rate": 0.00014980663545695094, "loss": 1.0203, "step": 1239 }, { "epoch": 0.5041675137223013, "grad_norm": 0.08950728923082352, "learning_rate": 0.00014976592713209852, "loss": 0.9909, "step": 1240 }, { "epoch": 0.5045741004269161, "grad_norm": 0.09764236211776733, "learning_rate": 0.00014972521880724608, "loss": 0.9851, "step": 1241 }, { "epoch": 0.5049806871315308, "grad_norm": 0.09275151789188385, "learning_rate": 0.00014968451048239366, "loss": 0.9452, "step": 1242 }, { "epoch": 0.5053872738361456, "grad_norm": 0.09436964988708496, "learning_rate": 0.00014964380215754124, "loss": 1.0731, "step": 1243 }, { "epoch": 0.5057938605407604, "grad_norm": 0.09008494764566422, "learning_rate": 0.0001496030938326888, "loss": 1.0238, "step": 1244 }, { "epoch": 0.506200447245375, "grad_norm": 0.08599425107240677, "learning_rate": 0.00014956238550783638, "loss": 0.9148, "step": 1245 }, { "epoch": 0.5066070339499898, "grad_norm": 0.09270120412111282, "learning_rate": 0.00014952167718298393, "loss": 0.9348, "step": 1246 }, { "epoch": 0.5070136206546046, "grad_norm": 0.09423110634088516, "learning_rate": 0.0001494809688581315, "loss": 1.0746, "step": 1247 }, { "epoch": 0.5074202073592193, "grad_norm": 0.08819740265607834, "learning_rate": 0.00014944026053327907, "loss": 1.0913, "step": 1248 }, { "epoch": 0.5078267940638341, "grad_norm": 0.08502914011478424, "learning_rate": 0.00014939955220842662, "loss": 1.0142, "step": 1249 }, { "epoch": 0.5082333807684488, "grad_norm": 0.09372544288635254, "learning_rate": 0.0001493588438835742, "loss": 0.9297, "step": 1250 }, { "epoch": 0.5086399674730636, "grad_norm": 0.09857220202684402, "learning_rate": 0.00014931813555872175, "loss": 1.0665, "step": 1251 }, { "epoch": 0.5090465541776784, "grad_norm": 0.09227776527404785, "learning_rate": 0.00014927742723386934, "loss": 0.9791, "step": 1252 }, { "epoch": 0.5094531408822931, "grad_norm": 0.09301433712244034, "learning_rate": 0.0001492367189090169, "loss": 0.8855, "step": 1253 }, { "epoch": 0.5098597275869079, "grad_norm": 0.09796632081270218, "learning_rate": 0.00014919601058416447, "loss": 1.0645, "step": 1254 }, { "epoch": 0.5102663142915227, "grad_norm": 0.09791705757379532, "learning_rate": 0.00014915530225931205, "loss": 0.9949, "step": 1255 }, { "epoch": 0.5106729009961374, "grad_norm": 0.09171664714813232, "learning_rate": 0.0001491145939344596, "loss": 0.8958, "step": 1256 }, { "epoch": 0.5110794877007522, "grad_norm": 0.10115580260753632, "learning_rate": 0.0001490738856096072, "loss": 1.0141, "step": 1257 }, { "epoch": 0.511486074405367, "grad_norm": 0.08854761719703674, "learning_rate": 0.00014903317728475474, "loss": 0.9733, "step": 1258 }, { "epoch": 0.5118926611099817, "grad_norm": 0.0944913849234581, "learning_rate": 0.0001489924689599023, "loss": 1.0187, "step": 1259 }, { "epoch": 0.5122992478145965, "grad_norm": 0.08820286393165588, "learning_rate": 0.00014895176063504988, "loss": 0.984, "step": 1260 }, { "epoch": 0.5127058345192113, "grad_norm": 0.0941242128610611, "learning_rate": 0.00014891105231019743, "loss": 1.0333, "step": 1261 }, { "epoch": 0.5131124212238259, "grad_norm": 0.09355438500642776, "learning_rate": 0.000148870343985345, "loss": 1.1186, "step": 1262 }, { "epoch": 0.5135190079284407, "grad_norm": 0.09487958997488022, "learning_rate": 0.00014882963566049257, "loss": 1.1297, "step": 1263 }, { "epoch": 0.5139255946330555, "grad_norm": 0.08488618582487106, "learning_rate": 0.00014878892733564015, "loss": 0.9725, "step": 1264 }, { "epoch": 0.5143321813376702, "grad_norm": 0.09238637238740921, "learning_rate": 0.0001487482190107877, "loss": 0.9798, "step": 1265 }, { "epoch": 0.514738768042285, "grad_norm": 0.09334023296833038, "learning_rate": 0.00014870751068593528, "loss": 1.0818, "step": 1266 }, { "epoch": 0.5151453547468998, "grad_norm": 0.09130462259054184, "learning_rate": 0.00014866680236108286, "loss": 0.9885, "step": 1267 }, { "epoch": 0.5155519414515145, "grad_norm": 0.08275487273931503, "learning_rate": 0.00014862609403623042, "loss": 0.8525, "step": 1268 }, { "epoch": 0.5159585281561293, "grad_norm": 0.09485149383544922, "learning_rate": 0.000148585385711378, "loss": 1.0424, "step": 1269 }, { "epoch": 0.5163651148607441, "grad_norm": 0.08834747970104218, "learning_rate": 0.00014854467738652555, "loss": 0.9235, "step": 1270 }, { "epoch": 0.5167717015653588, "grad_norm": 0.09200993925333023, "learning_rate": 0.0001485039690616731, "loss": 1.0669, "step": 1271 }, { "epoch": 0.5171782882699736, "grad_norm": 0.08159536123275757, "learning_rate": 0.0001484632607368207, "loss": 0.9067, "step": 1272 }, { "epoch": 0.5175848749745884, "grad_norm": 0.08643992245197296, "learning_rate": 0.00014842255241196824, "loss": 0.9632, "step": 1273 }, { "epoch": 0.5179914616792031, "grad_norm": 0.09672199934720993, "learning_rate": 0.00014838184408711582, "loss": 1.0263, "step": 1274 }, { "epoch": 0.5183980483838179, "grad_norm": 0.09713756293058395, "learning_rate": 0.00014834113576226338, "loss": 0.9166, "step": 1275 }, { "epoch": 0.5188046350884326, "grad_norm": 0.08467654883861542, "learning_rate": 0.00014830042743741096, "loss": 0.9201, "step": 1276 }, { "epoch": 0.5192112217930474, "grad_norm": 0.08024970442056656, "learning_rate": 0.0001482597191125585, "loss": 0.8556, "step": 1277 }, { "epoch": 0.5196178084976621, "grad_norm": 0.09249437600374222, "learning_rate": 0.0001482190107877061, "loss": 1.0381, "step": 1278 }, { "epoch": 0.5200243952022768, "grad_norm": 0.08076690137386322, "learning_rate": 0.00014817830246285368, "loss": 0.9216, "step": 1279 }, { "epoch": 0.5204309819068916, "grad_norm": 0.09259018301963806, "learning_rate": 0.00014813759413800123, "loss": 1.0547, "step": 1280 }, { "epoch": 0.5208375686115064, "grad_norm": 0.08734786510467529, "learning_rate": 0.0001480968858131488, "loss": 0.8811, "step": 1281 }, { "epoch": 0.5212441553161211, "grad_norm": 0.094956174492836, "learning_rate": 0.00014805617748829637, "loss": 0.9665, "step": 1282 }, { "epoch": 0.5216507420207359, "grad_norm": 0.08848060667514801, "learning_rate": 0.00014801546916344392, "loss": 0.9945, "step": 1283 }, { "epoch": 0.5220573287253507, "grad_norm": 0.0921303778886795, "learning_rate": 0.0001479747608385915, "loss": 1.0927, "step": 1284 }, { "epoch": 0.5224639154299654, "grad_norm": 0.08918172121047974, "learning_rate": 0.00014793405251373905, "loss": 0.9598, "step": 1285 }, { "epoch": 0.5228705021345802, "grad_norm": 0.10177495330572128, "learning_rate": 0.00014789334418888664, "loss": 1.1625, "step": 1286 }, { "epoch": 0.523277088839195, "grad_norm": 0.0942060649394989, "learning_rate": 0.0001478526358640342, "loss": 1.0612, "step": 1287 }, { "epoch": 0.5236836755438097, "grad_norm": 0.09780838340520859, "learning_rate": 0.00014781192753918177, "loss": 1.1024, "step": 1288 }, { "epoch": 0.5240902622484245, "grad_norm": 0.08893782645463943, "learning_rate": 0.00014777121921432935, "loss": 1.043, "step": 1289 }, { "epoch": 0.5244968489530393, "grad_norm": 0.0918479710817337, "learning_rate": 0.0001477305108894769, "loss": 0.9824, "step": 1290 }, { "epoch": 0.524903435657654, "grad_norm": 0.09912838041782379, "learning_rate": 0.0001476898025646245, "loss": 1.0346, "step": 1291 }, { "epoch": 0.5253100223622688, "grad_norm": 0.10609038919210434, "learning_rate": 0.00014764909423977204, "loss": 1.0133, "step": 1292 }, { "epoch": 0.5257166090668836, "grad_norm": 0.09957921504974365, "learning_rate": 0.00014760838591491962, "loss": 0.9842, "step": 1293 }, { "epoch": 0.5261231957714982, "grad_norm": 0.09777513146400452, "learning_rate": 0.00014756767759006718, "loss": 1.0092, "step": 1294 }, { "epoch": 0.526529782476113, "grad_norm": 0.08816764503717422, "learning_rate": 0.00014752696926521473, "loss": 0.9064, "step": 1295 }, { "epoch": 0.5269363691807278, "grad_norm": 0.09163589775562286, "learning_rate": 0.0001474862609403623, "loss": 0.9682, "step": 1296 }, { "epoch": 0.5273429558853425, "grad_norm": 21.36524772644043, "learning_rate": 0.00014744555261550987, "loss": 1.0146, "step": 1297 }, { "epoch": 0.5277495425899573, "grad_norm": 0.09484653919935226, "learning_rate": 0.00014740484429065745, "loss": 0.979, "step": 1298 }, { "epoch": 0.5281561292945721, "grad_norm": 0.09288137406110764, "learning_rate": 0.000147364135965805, "loss": 0.9692, "step": 1299 }, { "epoch": 0.5285627159991868, "grad_norm": 0.09847582131624222, "learning_rate": 0.00014732342764095258, "loss": 1.0519, "step": 1300 }, { "epoch": 0.5289693027038016, "grad_norm": 0.09856998920440674, "learning_rate": 0.00014728271931610016, "loss": 0.9929, "step": 1301 }, { "epoch": 0.5293758894084164, "grad_norm": 0.0969497561454773, "learning_rate": 0.00014724201099124772, "loss": 0.9412, "step": 1302 }, { "epoch": 0.5297824761130311, "grad_norm": 0.09796781092882156, "learning_rate": 0.0001472013026663953, "loss": 0.9538, "step": 1303 }, { "epoch": 0.5301890628176459, "grad_norm": 0.09267283231019974, "learning_rate": 0.00014716059434154285, "loss": 0.9616, "step": 1304 }, { "epoch": 0.5305956495222606, "grad_norm": 0.10447274148464203, "learning_rate": 0.00014711988601669043, "loss": 0.9485, "step": 1305 }, { "epoch": 0.5310022362268754, "grad_norm": 0.10163460671901703, "learning_rate": 0.000147079177691838, "loss": 0.9419, "step": 1306 }, { "epoch": 0.5314088229314902, "grad_norm": 0.09405020624399185, "learning_rate": 0.00014703846936698554, "loss": 0.9806, "step": 1307 }, { "epoch": 0.5318154096361049, "grad_norm": 0.09395210444927216, "learning_rate": 0.00014699776104213312, "loss": 1.0278, "step": 1308 }, { "epoch": 0.5322219963407196, "grad_norm": 0.09595540910959244, "learning_rate": 0.00014695705271728068, "loss": 1.0625, "step": 1309 }, { "epoch": 0.5326285830453344, "grad_norm": 0.0832480788230896, "learning_rate": 0.00014691634439242826, "loss": 0.9134, "step": 1310 }, { "epoch": 0.5330351697499491, "grad_norm": 0.10631989687681198, "learning_rate": 0.0001468756360675758, "loss": 0.9753, "step": 1311 }, { "epoch": 0.5334417564545639, "grad_norm": 0.0866394117474556, "learning_rate": 0.0001468349277427234, "loss": 0.9492, "step": 1312 }, { "epoch": 0.5338483431591787, "grad_norm": 0.10123784095048904, "learning_rate": 0.00014679421941787098, "loss": 0.9819, "step": 1313 }, { "epoch": 0.5342549298637934, "grad_norm": 0.08982353657484055, "learning_rate": 0.00014675351109301853, "loss": 0.9026, "step": 1314 }, { "epoch": 0.5346615165684082, "grad_norm": 0.08998806774616241, "learning_rate": 0.0001467128027681661, "loss": 0.9467, "step": 1315 }, { "epoch": 0.535068103273023, "grad_norm": 0.09901012480258942, "learning_rate": 0.00014667209444331366, "loss": 0.9655, "step": 1316 }, { "epoch": 0.5354746899776377, "grad_norm": 0.10991565883159637, "learning_rate": 0.00014663138611846125, "loss": 1.0236, "step": 1317 }, { "epoch": 0.5358812766822525, "grad_norm": 0.10133833438158035, "learning_rate": 0.0001465906777936088, "loss": 1.0453, "step": 1318 }, { "epoch": 0.5362878633868673, "grad_norm": 0.10197743028402328, "learning_rate": 0.00014654996946875635, "loss": 0.9008, "step": 1319 }, { "epoch": 0.536694450091482, "grad_norm": 0.09654685854911804, "learning_rate": 0.00014650926114390394, "loss": 1.0586, "step": 1320 }, { "epoch": 0.5371010367960968, "grad_norm": 0.10006607323884964, "learning_rate": 0.0001464685528190515, "loss": 0.9627, "step": 1321 }, { "epoch": 0.5375076235007116, "grad_norm": 0.09992939233779907, "learning_rate": 0.00014642784449419907, "loss": 0.9841, "step": 1322 }, { "epoch": 0.5379142102053263, "grad_norm": 0.098929263651371, "learning_rate": 0.00014638713616934662, "loss": 0.9764, "step": 1323 }, { "epoch": 0.5383207969099411, "grad_norm": 0.09640022367238998, "learning_rate": 0.0001463464278444942, "loss": 0.9922, "step": 1324 }, { "epoch": 0.5387273836145559, "grad_norm": 0.09175208956003189, "learning_rate": 0.0001463057195196418, "loss": 1.0659, "step": 1325 }, { "epoch": 0.5391339703191705, "grad_norm": 0.09107311069965363, "learning_rate": 0.00014626501119478934, "loss": 0.9898, "step": 1326 }, { "epoch": 0.5395405570237853, "grad_norm": 0.10652513056993484, "learning_rate": 0.00014622430286993692, "loss": 1.1346, "step": 1327 }, { "epoch": 0.5399471437284001, "grad_norm": 0.09096572548151016, "learning_rate": 0.00014618359454508448, "loss": 0.9296, "step": 1328 }, { "epoch": 0.5403537304330148, "grad_norm": 0.0995742529630661, "learning_rate": 0.00014614288622023206, "loss": 1.0034, "step": 1329 }, { "epoch": 0.5407603171376296, "grad_norm": 0.08811762928962708, "learning_rate": 0.0001461021778953796, "loss": 0.9928, "step": 1330 }, { "epoch": 0.5411669038422443, "grad_norm": 0.09473133832216263, "learning_rate": 0.00014606146957052717, "loss": 1.002, "step": 1331 }, { "epoch": 0.5415734905468591, "grad_norm": 0.08898860216140747, "learning_rate": 0.00014602076124567475, "loss": 0.9358, "step": 1332 }, { "epoch": 0.5419800772514739, "grad_norm": 0.093483105301857, "learning_rate": 0.0001459800529208223, "loss": 0.9319, "step": 1333 }, { "epoch": 0.5423866639560886, "grad_norm": 0.09663254022598267, "learning_rate": 0.00014593934459596988, "loss": 1.0041, "step": 1334 }, { "epoch": 0.5427932506607034, "grad_norm": 0.08969207853078842, "learning_rate": 0.00014589863627111746, "loss": 0.996, "step": 1335 }, { "epoch": 0.5431998373653182, "grad_norm": 0.08921096473932266, "learning_rate": 0.00014585792794626502, "loss": 0.9263, "step": 1336 }, { "epoch": 0.5436064240699329, "grad_norm": 0.08625603467226028, "learning_rate": 0.0001458172196214126, "loss": 0.9372, "step": 1337 }, { "epoch": 0.5440130107745477, "grad_norm": 0.09406933933496475, "learning_rate": 0.00014577651129656015, "loss": 1.0037, "step": 1338 }, { "epoch": 0.5444195974791625, "grad_norm": 0.08918149769306183, "learning_rate": 0.00014573580297170773, "loss": 0.9418, "step": 1339 }, { "epoch": 0.5448261841837772, "grad_norm": 0.09736087918281555, "learning_rate": 0.0001456950946468553, "loss": 0.965, "step": 1340 }, { "epoch": 0.545232770888392, "grad_norm": 0.09973054379224777, "learning_rate": 0.00014565438632200287, "loss": 0.8948, "step": 1341 }, { "epoch": 0.5456393575930067, "grad_norm": 0.08326181769371033, "learning_rate": 0.00014561367799715042, "loss": 0.9051, "step": 1342 }, { "epoch": 0.5460459442976214, "grad_norm": 0.0919221043586731, "learning_rate": 0.00014557296967229798, "loss": 0.95, "step": 1343 }, { "epoch": 0.5464525310022362, "grad_norm": 0.08741891384124756, "learning_rate": 0.00014553226134744556, "loss": 0.9682, "step": 1344 }, { "epoch": 0.546859117706851, "grad_norm": 0.09859665483236313, "learning_rate": 0.0001454915530225931, "loss": 1.0564, "step": 1345 }, { "epoch": 0.5472657044114657, "grad_norm": 0.09352114796638489, "learning_rate": 0.0001454508446977407, "loss": 0.982, "step": 1346 }, { "epoch": 0.5476722911160805, "grad_norm": 0.09592889994382858, "learning_rate": 0.00014541013637288827, "loss": 0.9874, "step": 1347 }, { "epoch": 0.5480788778206953, "grad_norm": 0.08276782929897308, "learning_rate": 0.00014536942804803583, "loss": 1.0243, "step": 1348 }, { "epoch": 0.54848546452531, "grad_norm": 0.09625902026891708, "learning_rate": 0.0001453287197231834, "loss": 1.0977, "step": 1349 }, { "epoch": 0.5488920512299248, "grad_norm": 0.08539925515651703, "learning_rate": 0.00014528801139833096, "loss": 0.9816, "step": 1350 }, { "epoch": 0.5492986379345396, "grad_norm": 0.08654636144638062, "learning_rate": 0.00014524730307347855, "loss": 1.02, "step": 1351 }, { "epoch": 0.5497052246391543, "grad_norm": 0.09811274707317352, "learning_rate": 0.0001452065947486261, "loss": 1.1509, "step": 1352 }, { "epoch": 0.5501118113437691, "grad_norm": 0.09280407428741455, "learning_rate": 0.00014516588642377365, "loss": 1.0163, "step": 1353 }, { "epoch": 0.5505183980483839, "grad_norm": 0.08086491376161575, "learning_rate": 0.00014512517809892123, "loss": 0.853, "step": 1354 }, { "epoch": 0.5509249847529986, "grad_norm": 0.0827447846531868, "learning_rate": 0.0001450844697740688, "loss": 0.9749, "step": 1355 }, { "epoch": 0.5513315714576134, "grad_norm": 0.09065467119216919, "learning_rate": 0.00014504376144921637, "loss": 1.0186, "step": 1356 }, { "epoch": 0.551738158162228, "grad_norm": 0.08642933517694473, "learning_rate": 0.00014500305312436392, "loss": 0.9175, "step": 1357 }, { "epoch": 0.5521447448668428, "grad_norm": 0.08930498361587524, "learning_rate": 0.0001449623447995115, "loss": 1.0027, "step": 1358 }, { "epoch": 0.5525513315714576, "grad_norm": 0.09525667130947113, "learning_rate": 0.0001449216364746591, "loss": 1.1328, "step": 1359 }, { "epoch": 0.5529579182760723, "grad_norm": 0.08723597228527069, "learning_rate": 0.00014488092814980664, "loss": 0.9025, "step": 1360 }, { "epoch": 0.5533645049806871, "grad_norm": 0.08364204317331314, "learning_rate": 0.00014484021982495422, "loss": 0.939, "step": 1361 }, { "epoch": 0.5537710916853019, "grad_norm": 0.08982790261507034, "learning_rate": 0.00014479951150010178, "loss": 0.8604, "step": 1362 }, { "epoch": 0.5541776783899166, "grad_norm": 0.08386033028364182, "learning_rate": 0.00014475880317524936, "loss": 0.957, "step": 1363 }, { "epoch": 0.5545842650945314, "grad_norm": 0.0920158326625824, "learning_rate": 0.0001447180948503969, "loss": 0.9388, "step": 1364 }, { "epoch": 0.5549908517991462, "grad_norm": 0.08764606714248657, "learning_rate": 0.00014467738652554447, "loss": 0.9721, "step": 1365 }, { "epoch": 0.5553974385037609, "grad_norm": 0.09296350926160812, "learning_rate": 0.00014463667820069205, "loss": 1.0195, "step": 1366 }, { "epoch": 0.5558040252083757, "grad_norm": 0.08107852935791016, "learning_rate": 0.0001445959698758396, "loss": 0.9001, "step": 1367 }, { "epoch": 0.5562106119129905, "grad_norm": 0.08827921748161316, "learning_rate": 0.00014455526155098718, "loss": 1.0009, "step": 1368 }, { "epoch": 0.5566171986176052, "grad_norm": 0.08549787849187851, "learning_rate": 0.00014451455322613474, "loss": 0.8675, "step": 1369 }, { "epoch": 0.55702378532222, "grad_norm": 0.10005125403404236, "learning_rate": 0.00014447384490128232, "loss": 1.1293, "step": 1370 }, { "epoch": 0.5574303720268348, "grad_norm": 0.09509359300136566, "learning_rate": 0.0001444331365764299, "loss": 1.0033, "step": 1371 }, { "epoch": 0.5578369587314495, "grad_norm": 0.09246810525655746, "learning_rate": 0.00014439242825157745, "loss": 0.9563, "step": 1372 }, { "epoch": 0.5582435454360642, "grad_norm": 0.09919826686382294, "learning_rate": 0.00014435171992672503, "loss": 1.0925, "step": 1373 }, { "epoch": 0.558650132140679, "grad_norm": 0.09652990102767944, "learning_rate": 0.0001443110116018726, "loss": 1.0716, "step": 1374 }, { "epoch": 0.5590567188452937, "grad_norm": 0.08819134533405304, "learning_rate": 0.00014427030327702017, "loss": 0.9586, "step": 1375 }, { "epoch": 0.5594633055499085, "grad_norm": 0.09266290068626404, "learning_rate": 0.00014422959495216772, "loss": 1.0399, "step": 1376 }, { "epoch": 0.5598698922545233, "grad_norm": 0.08892200142145157, "learning_rate": 0.00014418888662731528, "loss": 0.9844, "step": 1377 }, { "epoch": 0.560276478959138, "grad_norm": 0.09452232718467712, "learning_rate": 0.00014414817830246286, "loss": 1.0875, "step": 1378 }, { "epoch": 0.5606830656637528, "grad_norm": 0.08958882093429565, "learning_rate": 0.0001441074699776104, "loss": 1.0234, "step": 1379 }, { "epoch": 0.5610896523683676, "grad_norm": 0.09218178689479828, "learning_rate": 0.000144066761652758, "loss": 1.0871, "step": 1380 }, { "epoch": 0.5614962390729823, "grad_norm": 0.08819695562124252, "learning_rate": 0.00014402605332790557, "loss": 0.9046, "step": 1381 }, { "epoch": 0.5619028257775971, "grad_norm": 0.09621118754148483, "learning_rate": 0.00014398534500305313, "loss": 0.9789, "step": 1382 }, { "epoch": 0.5623094124822118, "grad_norm": 0.08230914175510406, "learning_rate": 0.0001439446366782007, "loss": 0.817, "step": 1383 }, { "epoch": 0.5627159991868266, "grad_norm": 0.08805210143327713, "learning_rate": 0.00014390392835334826, "loss": 0.9488, "step": 1384 }, { "epoch": 0.5631225858914414, "grad_norm": 0.09026028960943222, "learning_rate": 0.00014386322002849584, "loss": 0.9837, "step": 1385 }, { "epoch": 0.5635291725960561, "grad_norm": 0.09834691882133484, "learning_rate": 0.0001438225117036434, "loss": 0.999, "step": 1386 }, { "epoch": 0.5639357593006709, "grad_norm": 0.09209754317998886, "learning_rate": 0.00014378180337879098, "loss": 0.9923, "step": 1387 }, { "epoch": 0.5643423460052857, "grad_norm": 0.08959315717220306, "learning_rate": 0.00014374109505393853, "loss": 0.9282, "step": 1388 }, { "epoch": 0.5647489327099003, "grad_norm": 0.08573776483535767, "learning_rate": 0.0001437003867290861, "loss": 0.9504, "step": 1389 }, { "epoch": 0.5651555194145151, "grad_norm": 0.08887659013271332, "learning_rate": 0.00014365967840423367, "loss": 0.9195, "step": 1390 }, { "epoch": 0.5655621061191299, "grad_norm": 0.08740208297967911, "learning_rate": 0.00014361897007938122, "loss": 0.9537, "step": 1391 }, { "epoch": 0.5659686928237446, "grad_norm": 0.08976002782583237, "learning_rate": 0.0001435782617545288, "loss": 0.9126, "step": 1392 }, { "epoch": 0.5663752795283594, "grad_norm": 0.09727158397436142, "learning_rate": 0.00014353755342967639, "loss": 1.0088, "step": 1393 }, { "epoch": 0.5667818662329742, "grad_norm": 0.09165914356708527, "learning_rate": 0.00014349684510482394, "loss": 1.0443, "step": 1394 }, { "epoch": 0.5671884529375889, "grad_norm": 0.08791441470384598, "learning_rate": 0.00014345613677997152, "loss": 0.9708, "step": 1395 }, { "epoch": 0.5675950396422037, "grad_norm": 0.08658348023891449, "learning_rate": 0.00014341542845511908, "loss": 0.9347, "step": 1396 }, { "epoch": 0.5680016263468185, "grad_norm": 0.08867420256137848, "learning_rate": 0.00014337472013026666, "loss": 1.0331, "step": 1397 }, { "epoch": 0.5684082130514332, "grad_norm": 0.09206686913967133, "learning_rate": 0.0001433340118054142, "loss": 1.0469, "step": 1398 }, { "epoch": 0.568814799756048, "grad_norm": 0.09050408750772476, "learning_rate": 0.0001432933034805618, "loss": 0.9426, "step": 1399 }, { "epoch": 0.5692213864606628, "grad_norm": 0.08967922627925873, "learning_rate": 0.00014325259515570935, "loss": 0.9217, "step": 1400 }, { "epoch": 0.5696279731652775, "grad_norm": 0.08758019655942917, "learning_rate": 0.0001432118868308569, "loss": 0.9559, "step": 1401 }, { "epoch": 0.5700345598698923, "grad_norm": 0.09254743903875351, "learning_rate": 0.00014317117850600448, "loss": 0.9779, "step": 1402 }, { "epoch": 0.5704411465745071, "grad_norm": 0.09395452588796616, "learning_rate": 0.00014313047018115204, "loss": 1.0009, "step": 1403 }, { "epoch": 0.5708477332791217, "grad_norm": 0.09259745478630066, "learning_rate": 0.00014308976185629964, "loss": 1.0154, "step": 1404 }, { "epoch": 0.5712543199837365, "grad_norm": 0.09286468476057053, "learning_rate": 0.0001430490535314472, "loss": 1.0889, "step": 1405 }, { "epoch": 0.5716609066883513, "grad_norm": 0.08744499087333679, "learning_rate": 0.00014300834520659475, "loss": 0.9786, "step": 1406 }, { "epoch": 0.572067493392966, "grad_norm": 0.09346942603588104, "learning_rate": 0.00014296763688174233, "loss": 0.9789, "step": 1407 }, { "epoch": 0.5724740800975808, "grad_norm": 0.09010860323905945, "learning_rate": 0.0001429269285568899, "loss": 1.018, "step": 1408 }, { "epoch": 0.5728806668021955, "grad_norm": 0.0881861224770546, "learning_rate": 0.00014288622023203747, "loss": 1.0898, "step": 1409 }, { "epoch": 0.5732872535068103, "grad_norm": 0.08293981850147247, "learning_rate": 0.00014284551190718502, "loss": 0.9129, "step": 1410 }, { "epoch": 0.5736938402114251, "grad_norm": 0.09111000597476959, "learning_rate": 0.0001428048035823326, "loss": 0.9556, "step": 1411 }, { "epoch": 0.5741004269160398, "grad_norm": 0.09435521066188812, "learning_rate": 0.00014276409525748016, "loss": 1.1178, "step": 1412 }, { "epoch": 0.5745070136206546, "grad_norm": 0.08865281194448471, "learning_rate": 0.0001427233869326277, "loss": 0.9682, "step": 1413 }, { "epoch": 0.5749136003252694, "grad_norm": 0.08608002215623856, "learning_rate": 0.0001426826786077753, "loss": 0.9144, "step": 1414 }, { "epoch": 0.5753201870298841, "grad_norm": 0.08543986827135086, "learning_rate": 0.00014264197028292285, "loss": 0.9314, "step": 1415 }, { "epoch": 0.5757267737344989, "grad_norm": 0.09068971127271652, "learning_rate": 0.00014260126195807046, "loss": 0.9835, "step": 1416 }, { "epoch": 0.5761333604391137, "grad_norm": 0.08598853647708893, "learning_rate": 0.000142560553633218, "loss": 0.9396, "step": 1417 }, { "epoch": 0.5765399471437284, "grad_norm": 0.08450654149055481, "learning_rate": 0.00014251984530836556, "loss": 0.9472, "step": 1418 }, { "epoch": 0.5769465338483432, "grad_norm": 0.09064414352178574, "learning_rate": 0.00014247913698351314, "loss": 1.0167, "step": 1419 }, { "epoch": 0.577353120552958, "grad_norm": 0.08948381245136261, "learning_rate": 0.0001424384286586607, "loss": 0.9459, "step": 1420 }, { "epoch": 0.5777597072575726, "grad_norm": 0.0811019316315651, "learning_rate": 0.00014239772033380828, "loss": 0.8846, "step": 1421 }, { "epoch": 0.5781662939621874, "grad_norm": 0.09058842808008194, "learning_rate": 0.00014235701200895583, "loss": 0.9999, "step": 1422 }, { "epoch": 0.5785728806668022, "grad_norm": 0.09327298402786255, "learning_rate": 0.00014231630368410342, "loss": 1.0694, "step": 1423 }, { "epoch": 0.5789794673714169, "grad_norm": 0.08615417778491974, "learning_rate": 0.00014227559535925097, "loss": 0.9884, "step": 1424 }, { "epoch": 0.5793860540760317, "grad_norm": 0.09632913023233414, "learning_rate": 0.00014223488703439852, "loss": 1.0215, "step": 1425 }, { "epoch": 0.5797926407806465, "grad_norm": 0.0939357578754425, "learning_rate": 0.0001421941787095461, "loss": 1.0785, "step": 1426 }, { "epoch": 0.5801992274852612, "grad_norm": 0.08809401839971542, "learning_rate": 0.00014215347038469369, "loss": 0.9597, "step": 1427 }, { "epoch": 0.580605814189876, "grad_norm": 0.08961009234189987, "learning_rate": 0.00014211276205984127, "loss": 0.9988, "step": 1428 }, { "epoch": 0.5810124008944908, "grad_norm": 0.0883122980594635, "learning_rate": 0.00014207205373498882, "loss": 1.0566, "step": 1429 }, { "epoch": 0.5814189875991055, "grad_norm": 0.09150592237710953, "learning_rate": 0.00014203134541013637, "loss": 1.0875, "step": 1430 }, { "epoch": 0.5818255743037203, "grad_norm": 0.097344771027565, "learning_rate": 0.00014199063708528396, "loss": 1.0141, "step": 1431 }, { "epoch": 0.5822321610083351, "grad_norm": 0.09442117810249329, "learning_rate": 0.0001419499287604315, "loss": 0.9769, "step": 1432 }, { "epoch": 0.5826387477129498, "grad_norm": 0.08522289991378784, "learning_rate": 0.0001419092204355791, "loss": 0.9396, "step": 1433 }, { "epoch": 0.5830453344175646, "grad_norm": 0.0909838005900383, "learning_rate": 0.00014186851211072665, "loss": 0.9983, "step": 1434 }, { "epoch": 0.5834519211221793, "grad_norm": 0.09627141058444977, "learning_rate": 0.00014182780378587423, "loss": 1.0929, "step": 1435 }, { "epoch": 0.583858507826794, "grad_norm": 0.08965554088354111, "learning_rate": 0.00014178709546102178, "loss": 0.9145, "step": 1436 }, { "epoch": 0.5842650945314088, "grad_norm": 0.09004207700490952, "learning_rate": 0.00014174638713616933, "loss": 0.9921, "step": 1437 }, { "epoch": 0.5846716812360235, "grad_norm": 0.09295787662267685, "learning_rate": 0.00014170567881131692, "loss": 0.9756, "step": 1438 }, { "epoch": 0.5850782679406383, "grad_norm": 0.0893683210015297, "learning_rate": 0.0001416649704864645, "loss": 0.8974, "step": 1439 }, { "epoch": 0.5854848546452531, "grad_norm": 0.08255141973495483, "learning_rate": 0.00014162426216161205, "loss": 0.9201, "step": 1440 }, { "epoch": 0.5858914413498678, "grad_norm": 0.0966111272573471, "learning_rate": 0.00014158355383675963, "loss": 1.0611, "step": 1441 }, { "epoch": 0.5862980280544826, "grad_norm": 0.09531056135892868, "learning_rate": 0.0001415428455119072, "loss": 1.0989, "step": 1442 }, { "epoch": 0.5867046147590974, "grad_norm": 0.09289577603340149, "learning_rate": 0.00014150213718705477, "loss": 1.0232, "step": 1443 }, { "epoch": 0.5871112014637121, "grad_norm": 0.10038848221302032, "learning_rate": 0.00014146142886220232, "loss": 1.013, "step": 1444 }, { "epoch": 0.5875177881683269, "grad_norm": 0.09008078277111053, "learning_rate": 0.0001414207205373499, "loss": 0.9039, "step": 1445 }, { "epoch": 0.5879243748729417, "grad_norm": 0.08941890299320221, "learning_rate": 0.00014138001221249746, "loss": 0.8866, "step": 1446 }, { "epoch": 0.5883309615775564, "grad_norm": 0.08407185226678848, "learning_rate": 0.00014133930388764504, "loss": 0.9468, "step": 1447 }, { "epoch": 0.5887375482821712, "grad_norm": 0.096216581761837, "learning_rate": 0.0001412985955627926, "loss": 1.0516, "step": 1448 }, { "epoch": 0.589144134986786, "grad_norm": 0.09403221309185028, "learning_rate": 0.00014125788723794015, "loss": 0.9771, "step": 1449 }, { "epoch": 0.5895507216914007, "grad_norm": 0.08534131199121475, "learning_rate": 0.00014121717891308775, "loss": 0.9012, "step": 1450 }, { "epoch": 0.5899573083960155, "grad_norm": 0.09011968225240707, "learning_rate": 0.0001411764705882353, "loss": 0.9724, "step": 1451 }, { "epoch": 0.5903638951006303, "grad_norm": 0.08891688287258148, "learning_rate": 0.00014113576226338286, "loss": 0.9225, "step": 1452 }, { "epoch": 0.5907704818052449, "grad_norm": 0.08605680614709854, "learning_rate": 0.00014109505393853044, "loss": 0.9403, "step": 1453 }, { "epoch": 0.5911770685098597, "grad_norm": 0.08760562539100647, "learning_rate": 0.000141054345613678, "loss": 0.9728, "step": 1454 }, { "epoch": 0.5915836552144745, "grad_norm": 0.08932702243328094, "learning_rate": 0.00014101363728882558, "loss": 1.0377, "step": 1455 }, { "epoch": 0.5919902419190892, "grad_norm": 0.09998058527708054, "learning_rate": 0.00014097292896397313, "loss": 1.0434, "step": 1456 }, { "epoch": 0.592396828623704, "grad_norm": 0.09377194941043854, "learning_rate": 0.00014093222063912071, "loss": 0.9308, "step": 1457 }, { "epoch": 0.5928034153283188, "grad_norm": 0.08387821912765503, "learning_rate": 0.00014089151231426827, "loss": 0.8875, "step": 1458 }, { "epoch": 0.5932100020329335, "grad_norm": 0.08756202459335327, "learning_rate": 0.00014085080398941582, "loss": 1.0069, "step": 1459 }, { "epoch": 0.5936165887375483, "grad_norm": 0.08637526631355286, "learning_rate": 0.0001408100956645634, "loss": 0.9067, "step": 1460 }, { "epoch": 0.594023175442163, "grad_norm": 0.08818566054105759, "learning_rate": 0.00014076938733971096, "loss": 0.9375, "step": 1461 }, { "epoch": 0.5944297621467778, "grad_norm": 0.09050768613815308, "learning_rate": 0.00014072867901485857, "loss": 0.9742, "step": 1462 }, { "epoch": 0.5948363488513926, "grad_norm": 0.08764854818582535, "learning_rate": 0.00014068797069000612, "loss": 0.8995, "step": 1463 }, { "epoch": 0.5952429355560073, "grad_norm": 0.0841783955693245, "learning_rate": 0.00014064726236515367, "loss": 0.9179, "step": 1464 }, { "epoch": 0.5956495222606221, "grad_norm": 0.08915995806455612, "learning_rate": 0.00014060655404030126, "loss": 0.9973, "step": 1465 }, { "epoch": 0.5960561089652369, "grad_norm": 0.08400030434131622, "learning_rate": 0.0001405658457154488, "loss": 0.9374, "step": 1466 }, { "epoch": 0.5964626956698516, "grad_norm": 0.08585075289011002, "learning_rate": 0.0001405251373905964, "loss": 0.9371, "step": 1467 }, { "epoch": 0.5968692823744663, "grad_norm": 5.189364433288574, "learning_rate": 0.00014048442906574395, "loss": 0.9443, "step": 1468 }, { "epoch": 0.5972758690790811, "grad_norm": 0.11196129769086838, "learning_rate": 0.00014044372074089153, "loss": 0.8475, "step": 1469 }, { "epoch": 0.5976824557836958, "grad_norm": 0.13671468198299408, "learning_rate": 0.00014040301241603908, "loss": 0.9082, "step": 1470 }, { "epoch": 0.5980890424883106, "grad_norm": 0.1605953872203827, "learning_rate": 0.00014036230409118663, "loss": 1.0311, "step": 1471 }, { "epoch": 0.5984956291929254, "grad_norm": 0.1232098862528801, "learning_rate": 0.00014032159576633422, "loss": 0.9131, "step": 1472 }, { "epoch": 0.5989022158975401, "grad_norm": 0.10262708365917206, "learning_rate": 0.0001402808874414818, "loss": 0.998, "step": 1473 }, { "epoch": 0.5993088026021549, "grad_norm": 0.10314701497554779, "learning_rate": 0.00014024017911662938, "loss": 0.9527, "step": 1474 }, { "epoch": 0.5997153893067697, "grad_norm": 0.10268500447273254, "learning_rate": 0.00014019947079177693, "loss": 1.0287, "step": 1475 }, { "epoch": 0.6001219760113844, "grad_norm": 0.10218296945095062, "learning_rate": 0.00014015876246692449, "loss": 1.0562, "step": 1476 }, { "epoch": 0.6005285627159992, "grad_norm": 0.10347164422273636, "learning_rate": 0.00014011805414207207, "loss": 1.0227, "step": 1477 }, { "epoch": 0.600935149420614, "grad_norm": 0.09892403334379196, "learning_rate": 0.00014007734581721962, "loss": 0.8526, "step": 1478 }, { "epoch": 0.6013417361252287, "grad_norm": 0.10327230393886566, "learning_rate": 0.0001400366374923672, "loss": 0.9473, "step": 1479 }, { "epoch": 0.6017483228298435, "grad_norm": 0.10661543160676956, "learning_rate": 0.00013999592916751476, "loss": 1.0807, "step": 1480 }, { "epoch": 0.6021549095344583, "grad_norm": 0.10507283359766006, "learning_rate": 0.00013995522084266234, "loss": 1.0405, "step": 1481 }, { "epoch": 0.602561496239073, "grad_norm": 0.09952735900878906, "learning_rate": 0.0001399145125178099, "loss": 1.0233, "step": 1482 }, { "epoch": 0.6029680829436878, "grad_norm": 0.0861600711941719, "learning_rate": 0.00013987380419295745, "loss": 0.9264, "step": 1483 }, { "epoch": 0.6033746696483026, "grad_norm": 0.09560652077198029, "learning_rate": 0.00013983309586810503, "loss": 0.961, "step": 1484 }, { "epoch": 0.6037812563529172, "grad_norm": 0.09961631894111633, "learning_rate": 0.0001397923875432526, "loss": 1.0687, "step": 1485 }, { "epoch": 0.604187843057532, "grad_norm": 0.10031979531049728, "learning_rate": 0.0001397516792184002, "loss": 1.0088, "step": 1486 }, { "epoch": 0.6045944297621467, "grad_norm": 0.09212915599346161, "learning_rate": 0.00013971097089354774, "loss": 1.0183, "step": 1487 }, { "epoch": 0.6050010164667615, "grad_norm": 0.09258651733398438, "learning_rate": 0.0001396702625686953, "loss": 0.9473, "step": 1488 }, { "epoch": 0.6054076031713763, "grad_norm": 0.09315144270658493, "learning_rate": 0.00013962955424384288, "loss": 1.0049, "step": 1489 }, { "epoch": 0.605814189875991, "grad_norm": 0.08820061385631561, "learning_rate": 0.00013958884591899043, "loss": 0.9485, "step": 1490 }, { "epoch": 0.6062207765806058, "grad_norm": 852.4391479492188, "learning_rate": 0.00013954813759413801, "loss": 0.9986, "step": 1491 }, { "epoch": 0.6066273632852206, "grad_norm": 0.0940237045288086, "learning_rate": 0.00013950742926928557, "loss": 1.0065, "step": 1492 }, { "epoch": 0.6070339499898353, "grad_norm": 0.09184816479682922, "learning_rate": 0.00013946672094443315, "loss": 1.0186, "step": 1493 }, { "epoch": 0.6074405366944501, "grad_norm": 0.08479593694210052, "learning_rate": 0.0001394260126195807, "loss": 0.9213, "step": 1494 }, { "epoch": 0.6078471233990649, "grad_norm": 0.10088304430246353, "learning_rate": 0.00013938530429472826, "loss": 1.1029, "step": 1495 }, { "epoch": 0.6082537101036796, "grad_norm": 0.08876685053110123, "learning_rate": 0.00013934459596987584, "loss": 0.9532, "step": 1496 }, { "epoch": 0.6086602968082944, "grad_norm": 0.10209202021360397, "learning_rate": 0.00013930388764502342, "loss": 1.0292, "step": 1497 }, { "epoch": 0.6090668835129092, "grad_norm": 0.09144751727581024, "learning_rate": 0.000139263179320171, "loss": 0.9214, "step": 1498 }, { "epoch": 0.6094734702175238, "grad_norm": 0.08805158734321594, "learning_rate": 0.00013922247099531856, "loss": 0.8983, "step": 1499 }, { "epoch": 0.6098800569221386, "grad_norm": 0.0918235033750534, "learning_rate": 0.0001391817626704661, "loss": 0.9198, "step": 1500 }, { "epoch": 0.6102866436267534, "grad_norm": 0.09051943570375443, "learning_rate": 0.0001391410543456137, "loss": 0.9885, "step": 1501 }, { "epoch": 0.6106932303313681, "grad_norm": 0.09222988784313202, "learning_rate": 0.00013910034602076124, "loss": 0.8739, "step": 1502 }, { "epoch": 0.6110998170359829, "grad_norm": 0.1040385290980339, "learning_rate": 0.00013905963769590883, "loss": 1.0559, "step": 1503 }, { "epoch": 0.6115064037405977, "grad_norm": 0.09393730759620667, "learning_rate": 0.00013901892937105638, "loss": 1.0138, "step": 1504 }, { "epoch": 0.6119129904452124, "grad_norm": 0.09828665107488632, "learning_rate": 0.00013897822104620396, "loss": 0.99, "step": 1505 }, { "epoch": 0.6123195771498272, "grad_norm": 0.08924803137779236, "learning_rate": 0.00013893751272135152, "loss": 0.9948, "step": 1506 }, { "epoch": 0.612726163854442, "grad_norm": 0.09292086958885193, "learning_rate": 0.00013889680439649907, "loss": 0.9319, "step": 1507 }, { "epoch": 0.6131327505590567, "grad_norm": 0.09370770305395126, "learning_rate": 0.00013885609607164668, "loss": 0.9535, "step": 1508 }, { "epoch": 0.6135393372636715, "grad_norm": 0.0799320712685585, "learning_rate": 0.00013881538774679423, "loss": 0.8244, "step": 1509 }, { "epoch": 0.6139459239682863, "grad_norm": 0.0891839936375618, "learning_rate": 0.0001387746794219418, "loss": 1.0157, "step": 1510 }, { "epoch": 0.614352510672901, "grad_norm": 0.09138181805610657, "learning_rate": 0.00013873397109708937, "loss": 0.9066, "step": 1511 }, { "epoch": 0.6147590973775158, "grad_norm": 0.09552167356014252, "learning_rate": 0.00013869326277223692, "loss": 1.0479, "step": 1512 }, { "epoch": 0.6151656840821305, "grad_norm": 0.09162238240242004, "learning_rate": 0.0001386525544473845, "loss": 1.016, "step": 1513 }, { "epoch": 0.6155722707867453, "grad_norm": 0.09654813259840012, "learning_rate": 0.00013861184612253206, "loss": 0.993, "step": 1514 }, { "epoch": 0.61597885749136, "grad_norm": 0.0941232442855835, "learning_rate": 0.00013857113779767964, "loss": 1.0068, "step": 1515 }, { "epoch": 0.6163854441959747, "grad_norm": 0.0947796180844307, "learning_rate": 0.0001385304294728272, "loss": 1.0618, "step": 1516 }, { "epoch": 0.6167920309005895, "grad_norm": 0.08732841163873672, "learning_rate": 0.00013848972114797477, "loss": 0.9348, "step": 1517 }, { "epoch": 0.6171986176052043, "grad_norm": 0.09297166019678116, "learning_rate": 0.00013844901282312233, "loss": 1.0029, "step": 1518 }, { "epoch": 0.617605204309819, "grad_norm": 0.09339512884616852, "learning_rate": 0.00013840830449826988, "loss": 1.0557, "step": 1519 }, { "epoch": 0.6180117910144338, "grad_norm": 0.09277696907520294, "learning_rate": 0.0001383675961734175, "loss": 0.9322, "step": 1520 }, { "epoch": 0.6184183777190486, "grad_norm": 0.08687552809715271, "learning_rate": 0.00013832688784856504, "loss": 0.9162, "step": 1521 }, { "epoch": 0.6188249644236633, "grad_norm": 0.08844698965549469, "learning_rate": 0.00013828617952371262, "loss": 0.9958, "step": 1522 }, { "epoch": 0.6192315511282781, "grad_norm": 0.09178265184164047, "learning_rate": 0.00013824547119886018, "loss": 0.8926, "step": 1523 }, { "epoch": 0.6196381378328929, "grad_norm": 0.09063131362199783, "learning_rate": 0.00013820476287400773, "loss": 1.0047, "step": 1524 }, { "epoch": 0.6200447245375076, "grad_norm": 0.09506388753652573, "learning_rate": 0.00013816405454915531, "loss": 1.112, "step": 1525 }, { "epoch": 0.6204513112421224, "grad_norm": 0.0870959535241127, "learning_rate": 0.00013812334622430287, "loss": 1.0074, "step": 1526 }, { "epoch": 0.6208578979467372, "grad_norm": 0.08569116145372391, "learning_rate": 0.00013808263789945045, "loss": 0.9702, "step": 1527 }, { "epoch": 0.6212644846513519, "grad_norm": 0.09870801120996475, "learning_rate": 0.000138041929574598, "loss": 1.0475, "step": 1528 }, { "epoch": 0.6216710713559667, "grad_norm": 0.09899303317070007, "learning_rate": 0.00013800122124974558, "loss": 1.0806, "step": 1529 }, { "epoch": 0.6220776580605815, "grad_norm": 0.09373268485069275, "learning_rate": 0.00013796051292489314, "loss": 1.0295, "step": 1530 }, { "epoch": 0.6224842447651961, "grad_norm": 0.09074109047651291, "learning_rate": 0.00013791980460004072, "loss": 0.9462, "step": 1531 }, { "epoch": 0.622890831469811, "grad_norm": 0.09384390711784363, "learning_rate": 0.0001378790962751883, "loss": 1.0606, "step": 1532 }, { "epoch": 0.6232974181744257, "grad_norm": 0.0943252295255661, "learning_rate": 0.00013783838795033585, "loss": 1.1258, "step": 1533 }, { "epoch": 0.6237040048790404, "grad_norm": 0.08777976781129837, "learning_rate": 0.0001377976796254834, "loss": 0.9621, "step": 1534 }, { "epoch": 0.6241105915836552, "grad_norm": 0.09006936848163605, "learning_rate": 0.000137756971300631, "loss": 0.9815, "step": 1535 }, { "epoch": 0.62451717828827, "grad_norm": 0.1147993803024292, "learning_rate": 0.00013771626297577854, "loss": 0.9917, "step": 1536 }, { "epoch": 0.6249237649928847, "grad_norm": 0.09408791363239288, "learning_rate": 0.00013767555465092613, "loss": 1.024, "step": 1537 }, { "epoch": 0.6253303516974995, "grad_norm": 0.09530872851610184, "learning_rate": 0.00013763484632607368, "loss": 1.0339, "step": 1538 }, { "epoch": 0.6257369384021142, "grad_norm": 0.09337632358074188, "learning_rate": 0.00013759413800122126, "loss": 1.031, "step": 1539 }, { "epoch": 0.626143525106729, "grad_norm": 0.08535618335008621, "learning_rate": 0.00013755342967636881, "loss": 0.9597, "step": 1540 }, { "epoch": 0.6265501118113438, "grad_norm": 0.09226896613836288, "learning_rate": 0.0001375127213515164, "loss": 1.0017, "step": 1541 }, { "epoch": 0.6269566985159585, "grad_norm": 0.08831244707107544, "learning_rate": 0.00013747201302666395, "loss": 0.9695, "step": 1542 }, { "epoch": 0.6273632852205733, "grad_norm": 0.07351087778806686, "learning_rate": 0.00013743130470181153, "loss": 0.8212, "step": 1543 }, { "epoch": 0.6277698719251881, "grad_norm": 0.09002837538719177, "learning_rate": 0.0001373905963769591, "loss": 0.9882, "step": 1544 }, { "epoch": 0.6281764586298028, "grad_norm": 0.09743615984916687, "learning_rate": 0.00013734988805210667, "loss": 1.0246, "step": 1545 }, { "epoch": 0.6285830453344176, "grad_norm": 0.09634383767843246, "learning_rate": 0.00013730917972725422, "loss": 1.0452, "step": 1546 }, { "epoch": 0.6289896320390324, "grad_norm": 0.09213767200708389, "learning_rate": 0.0001372684714024018, "loss": 1.0618, "step": 1547 }, { "epoch": 0.629396218743647, "grad_norm": 0.08717525005340576, "learning_rate": 0.00013722776307754936, "loss": 0.9834, "step": 1548 }, { "epoch": 0.6298028054482618, "grad_norm": 0.08541104942560196, "learning_rate": 0.00013718705475269694, "loss": 0.9332, "step": 1549 }, { "epoch": 0.6302093921528766, "grad_norm": 0.09747796505689621, "learning_rate": 0.0001371463464278445, "loss": 1.0459, "step": 1550 }, { "epoch": 0.6306159788574913, "grad_norm": 0.09076548367738724, "learning_rate": 0.00013710563810299207, "loss": 0.9951, "step": 1551 }, { "epoch": 0.6310225655621061, "grad_norm": 0.08712035417556763, "learning_rate": 0.00013706492977813963, "loss": 0.9876, "step": 1552 }, { "epoch": 0.6314291522667209, "grad_norm": 0.09062602370977402, "learning_rate": 0.00013702422145328718, "loss": 0.9246, "step": 1553 }, { "epoch": 0.6318357389713356, "grad_norm": 0.0910324677824974, "learning_rate": 0.0001369835131284348, "loss": 0.884, "step": 1554 }, { "epoch": 0.6322423256759504, "grad_norm": 0.09255006164312363, "learning_rate": 0.00013694280480358234, "loss": 1.0172, "step": 1555 }, { "epoch": 0.6326489123805652, "grad_norm": 0.0950237512588501, "learning_rate": 0.00013690209647872992, "loss": 1.0263, "step": 1556 }, { "epoch": 0.6330554990851799, "grad_norm": 0.09103222191333771, "learning_rate": 0.00013686138815387748, "loss": 1.0245, "step": 1557 }, { "epoch": 0.6334620857897947, "grad_norm": 0.09043283015489578, "learning_rate": 0.00013682067982902503, "loss": 1.0197, "step": 1558 }, { "epoch": 0.6338686724944095, "grad_norm": 0.08311565965414047, "learning_rate": 0.0001367799715041726, "loss": 0.9372, "step": 1559 }, { "epoch": 0.6342752591990242, "grad_norm": 0.09156910330057144, "learning_rate": 0.00013673926317932017, "loss": 1.0579, "step": 1560 }, { "epoch": 0.634681845903639, "grad_norm": 0.08262625336647034, "learning_rate": 0.00013669855485446775, "loss": 0.9047, "step": 1561 }, { "epoch": 0.6350884326082538, "grad_norm": 0.09856829047203064, "learning_rate": 0.0001366578465296153, "loss": 1.0933, "step": 1562 }, { "epoch": 0.6354950193128684, "grad_norm": 0.09453229606151581, "learning_rate": 0.00013661713820476288, "loss": 1.0361, "step": 1563 }, { "epoch": 0.6359016060174832, "grad_norm": 0.09291166812181473, "learning_rate": 0.00013657642987991044, "loss": 0.9099, "step": 1564 }, { "epoch": 0.636308192722098, "grad_norm": 0.09416390210390091, "learning_rate": 0.000136535721555058, "loss": 0.9919, "step": 1565 }, { "epoch": 0.6367147794267127, "grad_norm": 0.08964714407920837, "learning_rate": 0.0001364950132302056, "loss": 1.0352, "step": 1566 }, { "epoch": 0.6371213661313275, "grad_norm": 0.1002277210354805, "learning_rate": 0.00013645430490535315, "loss": 1.0121, "step": 1567 }, { "epoch": 0.6375279528359422, "grad_norm": 0.09013176709413528, "learning_rate": 0.00013641359658050074, "loss": 0.943, "step": 1568 }, { "epoch": 0.637934539540557, "grad_norm": 0.09195754677057266, "learning_rate": 0.0001363728882556483, "loss": 1.0101, "step": 1569 }, { "epoch": 0.6383411262451718, "grad_norm": 0.09277264773845673, "learning_rate": 0.00013633217993079584, "loss": 1.0411, "step": 1570 }, { "epoch": 0.6387477129497865, "grad_norm": 0.09677015990018845, "learning_rate": 0.00013629147160594342, "loss": 1.0047, "step": 1571 }, { "epoch": 0.6391542996544013, "grad_norm": 0.09898823499679565, "learning_rate": 0.00013625076328109098, "loss": 1.0897, "step": 1572 }, { "epoch": 0.6395608863590161, "grad_norm": 0.09134434908628464, "learning_rate": 0.00013621005495623856, "loss": 1.0471, "step": 1573 }, { "epoch": 0.6399674730636308, "grad_norm": 0.09015446901321411, "learning_rate": 0.00013616934663138611, "loss": 0.9521, "step": 1574 }, { "epoch": 0.6403740597682456, "grad_norm": 0.09361066669225693, "learning_rate": 0.0001361286383065337, "loss": 1.0378, "step": 1575 }, { "epoch": 0.6407806464728604, "grad_norm": 0.10741425305604935, "learning_rate": 0.00013608792998168125, "loss": 1.0042, "step": 1576 }, { "epoch": 0.6411872331774751, "grad_norm": 0.09339326620101929, "learning_rate": 0.00013604722165682883, "loss": 0.9641, "step": 1577 }, { "epoch": 0.6415938198820899, "grad_norm": 0.09786434471607208, "learning_rate": 0.0001360065133319764, "loss": 0.9643, "step": 1578 }, { "epoch": 0.6420004065867047, "grad_norm": 0.08545216172933578, "learning_rate": 0.00013596580500712397, "loss": 0.9413, "step": 1579 }, { "epoch": 0.6424069932913193, "grad_norm": 0.09042125940322876, "learning_rate": 0.00013592509668227155, "loss": 0.9105, "step": 1580 }, { "epoch": 0.6428135799959341, "grad_norm": 0.08778928220272064, "learning_rate": 0.0001358843883574191, "loss": 1.0262, "step": 1581 }, { "epoch": 0.6432201667005489, "grad_norm": 0.08905961364507675, "learning_rate": 0.00013584368003256666, "loss": 1.0317, "step": 1582 }, { "epoch": 0.6436267534051636, "grad_norm": 0.09242242574691772, "learning_rate": 0.00013580297170771424, "loss": 0.9415, "step": 1583 }, { "epoch": 0.6440333401097784, "grad_norm": 0.08425027132034302, "learning_rate": 0.0001357622633828618, "loss": 0.8964, "step": 1584 }, { "epoch": 0.6444399268143932, "grad_norm": 0.0858960896730423, "learning_rate": 0.00013572155505800937, "loss": 0.9441, "step": 1585 }, { "epoch": 0.6448465135190079, "grad_norm": 0.09374553710222244, "learning_rate": 0.00013568084673315693, "loss": 0.9784, "step": 1586 }, { "epoch": 0.6452531002236227, "grad_norm": 0.09684876352548599, "learning_rate": 0.0001356401384083045, "loss": 0.9867, "step": 1587 }, { "epoch": 0.6456596869282375, "grad_norm": 0.0853944793343544, "learning_rate": 0.00013559943008345206, "loss": 0.9136, "step": 1588 }, { "epoch": 0.6460662736328522, "grad_norm": 0.0905388742685318, "learning_rate": 0.00013555872175859964, "loss": 0.9335, "step": 1589 }, { "epoch": 0.646472860337467, "grad_norm": 0.08938907831907272, "learning_rate": 0.00013551801343374722, "loss": 0.9889, "step": 1590 }, { "epoch": 0.6468794470420818, "grad_norm": 0.08857300132513046, "learning_rate": 0.00013547730510889478, "loss": 0.9986, "step": 1591 }, { "epoch": 0.6472860337466965, "grad_norm": 0.09151600301265717, "learning_rate": 0.00013543659678404236, "loss": 1.0025, "step": 1592 }, { "epoch": 0.6476926204513113, "grad_norm": 0.08548744767904282, "learning_rate": 0.0001353958884591899, "loss": 1.012, "step": 1593 }, { "epoch": 0.648099207155926, "grad_norm": 0.08982311189174652, "learning_rate": 0.00013535518013433747, "loss": 0.9827, "step": 1594 }, { "epoch": 0.6485057938605407, "grad_norm": 0.09153248369693756, "learning_rate": 0.00013531447180948505, "loss": 0.9818, "step": 1595 }, { "epoch": 0.6489123805651555, "grad_norm": 0.1022023931145668, "learning_rate": 0.0001352737634846326, "loss": 1.054, "step": 1596 }, { "epoch": 0.6493189672697702, "grad_norm": 0.09080366045236588, "learning_rate": 0.00013523305515978018, "loss": 0.9667, "step": 1597 }, { "epoch": 0.649725553974385, "grad_norm": 0.0935145765542984, "learning_rate": 0.00013519234683492774, "loss": 0.9668, "step": 1598 }, { "epoch": 0.6501321406789998, "grad_norm": 0.09892317652702332, "learning_rate": 0.00013515163851007532, "loss": 1.012, "step": 1599 }, { "epoch": 0.6505387273836145, "grad_norm": 0.09385450929403305, "learning_rate": 0.0001351109301852229, "loss": 1.1, "step": 1600 }, { "epoch": 0.6509453140882293, "grad_norm": 0.09270552545785904, "learning_rate": 0.00013507022186037045, "loss": 0.9509, "step": 1601 }, { "epoch": 0.6513519007928441, "grad_norm": 0.09725828468799591, "learning_rate": 0.00013502951353551804, "loss": 1.0435, "step": 1602 }, { "epoch": 0.6517584874974588, "grad_norm": 0.096989206969738, "learning_rate": 0.0001349888052106656, "loss": 1.0152, "step": 1603 }, { "epoch": 0.6521650742020736, "grad_norm": 0.09739220887422562, "learning_rate": 0.00013494809688581317, "loss": 0.9834, "step": 1604 }, { "epoch": 0.6525716609066884, "grad_norm": 0.07972859591245651, "learning_rate": 0.00013490738856096072, "loss": 0.8542, "step": 1605 }, { "epoch": 0.6529782476113031, "grad_norm": 0.09360089153051376, "learning_rate": 0.00013486668023610828, "loss": 1.0077, "step": 1606 }, { "epoch": 0.6533848343159179, "grad_norm": 0.08999258279800415, "learning_rate": 0.00013482597191125586, "loss": 0.8802, "step": 1607 }, { "epoch": 0.6537914210205327, "grad_norm": 0.0885370746254921, "learning_rate": 0.00013478526358640341, "loss": 0.9867, "step": 1608 }, { "epoch": 0.6541980077251474, "grad_norm": 0.0924537256360054, "learning_rate": 0.000134744555261551, "loss": 0.9653, "step": 1609 }, { "epoch": 0.6546045944297622, "grad_norm": 0.08841130137443542, "learning_rate": 0.00013470384693669855, "loss": 0.9005, "step": 1610 }, { "epoch": 0.655011181134377, "grad_norm": 0.0968664139509201, "learning_rate": 0.00013466313861184613, "loss": 1.1191, "step": 1611 }, { "epoch": 0.6554177678389916, "grad_norm": 0.0909125879406929, "learning_rate": 0.0001346224302869937, "loss": 1.0247, "step": 1612 }, { "epoch": 0.6558243545436064, "grad_norm": 0.1032382994890213, "learning_rate": 0.00013458172196214127, "loss": 1.021, "step": 1613 }, { "epoch": 0.6562309412482212, "grad_norm": 0.08680799603462219, "learning_rate": 0.00013454101363728885, "loss": 0.9425, "step": 1614 }, { "epoch": 0.6566375279528359, "grad_norm": 0.08841447532176971, "learning_rate": 0.0001345003053124364, "loss": 0.9105, "step": 1615 }, { "epoch": 0.6570441146574507, "grad_norm": 0.09229273349046707, "learning_rate": 0.00013445959698758398, "loss": 0.9492, "step": 1616 }, { "epoch": 0.6574507013620655, "grad_norm": 0.09328685700893402, "learning_rate": 0.00013441888866273154, "loss": 1.0456, "step": 1617 }, { "epoch": 0.6578572880666802, "grad_norm": 0.08448266983032227, "learning_rate": 0.0001343781803378791, "loss": 0.9209, "step": 1618 }, { "epoch": 0.658263874771295, "grad_norm": 0.09344170242547989, "learning_rate": 0.00013433747201302667, "loss": 1.0107, "step": 1619 }, { "epoch": 0.6586704614759097, "grad_norm": 0.08675231039524078, "learning_rate": 0.00013429676368817423, "loss": 0.989, "step": 1620 }, { "epoch": 0.6590770481805245, "grad_norm": 0.09648977965116501, "learning_rate": 0.0001342560553633218, "loss": 1.079, "step": 1621 }, { "epoch": 0.6594836348851393, "grad_norm": 0.08079522848129272, "learning_rate": 0.00013421534703846936, "loss": 0.862, "step": 1622 }, { "epoch": 0.659890221589754, "grad_norm": 0.1015796810388565, "learning_rate": 0.00013417463871361694, "loss": 1.136, "step": 1623 }, { "epoch": 0.6602968082943688, "grad_norm": 0.08189254999160767, "learning_rate": 0.00013413393038876452, "loss": 0.9161, "step": 1624 }, { "epoch": 0.6607033949989836, "grad_norm": 0.09128617495298386, "learning_rate": 0.00013409322206391208, "loss": 0.9605, "step": 1625 }, { "epoch": 0.6611099817035982, "grad_norm": 0.09256181865930557, "learning_rate": 0.00013405251373905966, "loss": 0.9844, "step": 1626 }, { "epoch": 0.661516568408213, "grad_norm": 0.092183917760849, "learning_rate": 0.0001340118054142072, "loss": 1.0694, "step": 1627 }, { "epoch": 0.6619231551128278, "grad_norm": 0.10037260502576828, "learning_rate": 0.00013397109708935477, "loss": 1.1395, "step": 1628 }, { "epoch": 0.6623297418174425, "grad_norm": 0.08758927881717682, "learning_rate": 0.00013393038876450235, "loss": 0.9494, "step": 1629 }, { "epoch": 0.6627363285220573, "grad_norm": 0.08407801389694214, "learning_rate": 0.0001338896804396499, "loss": 0.8953, "step": 1630 }, { "epoch": 0.6631429152266721, "grad_norm": 0.10363683849573135, "learning_rate": 0.00013384897211479748, "loss": 1.0613, "step": 1631 }, { "epoch": 0.6635495019312868, "grad_norm": 0.0939316600561142, "learning_rate": 0.00013380826378994504, "loss": 0.9668, "step": 1632 }, { "epoch": 0.6639560886359016, "grad_norm": 0.097317174077034, "learning_rate": 0.00013376755546509262, "loss": 1.1024, "step": 1633 }, { "epoch": 0.6643626753405164, "grad_norm": 0.10394629091024399, "learning_rate": 0.00013372684714024017, "loss": 1.0706, "step": 1634 }, { "epoch": 0.6647692620451311, "grad_norm": 0.09405668824911118, "learning_rate": 0.00013368613881538775, "loss": 1.0496, "step": 1635 }, { "epoch": 0.6651758487497459, "grad_norm": 0.08976142853498459, "learning_rate": 0.00013364543049053533, "loss": 0.9811, "step": 1636 }, { "epoch": 0.6655824354543607, "grad_norm": 0.09220533818006516, "learning_rate": 0.0001336047221656829, "loss": 1.0655, "step": 1637 }, { "epoch": 0.6659890221589754, "grad_norm": 0.09313860535621643, "learning_rate": 0.00013356401384083047, "loss": 0.9664, "step": 1638 }, { "epoch": 0.6663956088635902, "grad_norm": 0.08653722703456879, "learning_rate": 0.00013352330551597802, "loss": 0.91, "step": 1639 }, { "epoch": 0.666802195568205, "grad_norm": 0.09094205498695374, "learning_rate": 0.00013348259719112558, "loss": 1.0011, "step": 1640 }, { "epoch": 0.6672087822728197, "grad_norm": 0.09969717264175415, "learning_rate": 0.00013344188886627316, "loss": 1.0853, "step": 1641 }, { "epoch": 0.6676153689774345, "grad_norm": 0.08996472507715225, "learning_rate": 0.0001334011805414207, "loss": 0.9693, "step": 1642 }, { "epoch": 0.6680219556820493, "grad_norm": 0.08930208534002304, "learning_rate": 0.0001333604722165683, "loss": 1.0078, "step": 1643 }, { "epoch": 0.6684285423866639, "grad_norm": 0.09799496084451675, "learning_rate": 0.00013331976389171585, "loss": 1.0764, "step": 1644 }, { "epoch": 0.6688351290912787, "grad_norm": 0.1000712588429451, "learning_rate": 0.00013327905556686343, "loss": 1.0769, "step": 1645 }, { "epoch": 0.6692417157958934, "grad_norm": 0.09583432227373123, "learning_rate": 0.000133238347242011, "loss": 1.0311, "step": 1646 }, { "epoch": 0.6696483025005082, "grad_norm": 0.10381270945072174, "learning_rate": 0.00013319763891715857, "loss": 1.0879, "step": 1647 }, { "epoch": 0.670054889205123, "grad_norm": 0.09310910850763321, "learning_rate": 0.00013315693059230615, "loss": 0.9875, "step": 1648 }, { "epoch": 0.6704614759097377, "grad_norm": 0.09691096842288971, "learning_rate": 0.0001331162222674537, "loss": 1.001, "step": 1649 }, { "epoch": 0.6708680626143525, "grad_norm": 0.08782976865768433, "learning_rate": 0.00013307551394260128, "loss": 1.0192, "step": 1650 }, { "epoch": 0.6712746493189673, "grad_norm": 0.07851552218198776, "learning_rate": 0.00013303480561774884, "loss": 0.8345, "step": 1651 }, { "epoch": 0.671681236023582, "grad_norm": 0.09602700173854828, "learning_rate": 0.0001329940972928964, "loss": 0.9761, "step": 1652 }, { "epoch": 0.6720878227281968, "grad_norm": 0.09454475343227386, "learning_rate": 0.00013295338896804397, "loss": 0.9775, "step": 1653 }, { "epoch": 0.6724944094328116, "grad_norm": 0.09530249238014221, "learning_rate": 0.00013291268064319153, "loss": 0.9155, "step": 1654 }, { "epoch": 0.6729009961374263, "grad_norm": 0.09022442251443863, "learning_rate": 0.0001328719723183391, "loss": 0.9651, "step": 1655 }, { "epoch": 0.6733075828420411, "grad_norm": 0.09096933156251907, "learning_rate": 0.00013283126399348666, "loss": 1.003, "step": 1656 }, { "epoch": 0.6737141695466559, "grad_norm": 0.09274188429117203, "learning_rate": 0.00013279055566863424, "loss": 1.0024, "step": 1657 }, { "epoch": 0.6741207562512705, "grad_norm": 0.09318679571151733, "learning_rate": 0.00013274984734378182, "loss": 0.9613, "step": 1658 }, { "epoch": 0.6745273429558853, "grad_norm": 0.1088038757443428, "learning_rate": 0.00013270913901892938, "loss": 0.9718, "step": 1659 }, { "epoch": 0.6749339296605001, "grad_norm": 0.08833767473697662, "learning_rate": 0.00013266843069407696, "loss": 0.8893, "step": 1660 }, { "epoch": 0.6753405163651148, "grad_norm": 0.09868477284908295, "learning_rate": 0.0001326277223692245, "loss": 1.0233, "step": 1661 }, { "epoch": 0.6757471030697296, "grad_norm": 0.09289266169071198, "learning_rate": 0.0001325870140443721, "loss": 0.8636, "step": 1662 }, { "epoch": 0.6761536897743444, "grad_norm": 0.08200156688690186, "learning_rate": 0.00013254630571951965, "loss": 0.8317, "step": 1663 }, { "epoch": 0.6765602764789591, "grad_norm": 0.09031883627176285, "learning_rate": 0.0001325055973946672, "loss": 0.9759, "step": 1664 }, { "epoch": 0.6769668631835739, "grad_norm": 0.09911596029996872, "learning_rate": 0.00013246488906981478, "loss": 1.1484, "step": 1665 }, { "epoch": 0.6773734498881887, "grad_norm": 0.09470785409212112, "learning_rate": 0.00013242418074496234, "loss": 1.0261, "step": 1666 }, { "epoch": 0.6777800365928034, "grad_norm": 0.09936736524105072, "learning_rate": 0.00013238347242010992, "loss": 0.9697, "step": 1667 }, { "epoch": 0.6781866232974182, "grad_norm": 0.08819877356290817, "learning_rate": 0.00013234276409525747, "loss": 1.0686, "step": 1668 }, { "epoch": 0.678593210002033, "grad_norm": 0.0861021876335144, "learning_rate": 0.00013230205577040505, "loss": 0.9567, "step": 1669 }, { "epoch": 0.6789997967066477, "grad_norm": 0.092157743871212, "learning_rate": 0.00013226134744555263, "loss": 1.0348, "step": 1670 }, { "epoch": 0.6794063834112625, "grad_norm": 0.08593881130218506, "learning_rate": 0.0001322206391207002, "loss": 0.9318, "step": 1671 }, { "epoch": 0.6798129701158772, "grad_norm": 0.09625545144081116, "learning_rate": 0.00013217993079584777, "loss": 0.9666, "step": 1672 }, { "epoch": 0.680219556820492, "grad_norm": 0.09877568483352661, "learning_rate": 0.00013213922247099532, "loss": 0.8862, "step": 1673 }, { "epoch": 0.6806261435251068, "grad_norm": 0.09340859204530716, "learning_rate": 0.0001320985141461429, "loss": 1.0305, "step": 1674 }, { "epoch": 0.6810327302297214, "grad_norm": 0.08883026987314224, "learning_rate": 0.00013205780582129046, "loss": 0.9499, "step": 1675 }, { "epoch": 0.6814393169343362, "grad_norm": 0.09625538438558578, "learning_rate": 0.000132017097496438, "loss": 1.0381, "step": 1676 }, { "epoch": 0.681845903638951, "grad_norm": 0.0917878970503807, "learning_rate": 0.0001319763891715856, "loss": 0.8924, "step": 1677 }, { "epoch": 0.6822524903435657, "grad_norm": 0.08996240794658661, "learning_rate": 0.00013193568084673315, "loss": 0.936, "step": 1678 }, { "epoch": 0.6826590770481805, "grad_norm": 0.09168268740177155, "learning_rate": 0.00013189497252188073, "loss": 0.9608, "step": 1679 }, { "epoch": 0.6830656637527953, "grad_norm": 0.09493600577116013, "learning_rate": 0.00013185426419702828, "loss": 1.0394, "step": 1680 }, { "epoch": 0.68347225045741, "grad_norm": 0.094533272087574, "learning_rate": 0.00013181355587217586, "loss": 0.9437, "step": 1681 }, { "epoch": 0.6838788371620248, "grad_norm": 0.09590426087379456, "learning_rate": 0.00013177284754732345, "loss": 1.0504, "step": 1682 }, { "epoch": 0.6842854238666396, "grad_norm": 0.1008445993065834, "learning_rate": 0.000131732139222471, "loss": 0.9966, "step": 1683 }, { "epoch": 0.6846920105712543, "grad_norm": 0.09178382903337479, "learning_rate": 0.00013169143089761858, "loss": 1.0171, "step": 1684 }, { "epoch": 0.6850985972758691, "grad_norm": 0.09064016491174698, "learning_rate": 0.00013165072257276614, "loss": 1.0259, "step": 1685 }, { "epoch": 0.6855051839804839, "grad_norm": 0.09577952325344086, "learning_rate": 0.00013161001424791372, "loss": 0.9391, "step": 1686 }, { "epoch": 0.6859117706850986, "grad_norm": 0.08866085112094879, "learning_rate": 0.00013156930592306127, "loss": 1.0251, "step": 1687 }, { "epoch": 0.6863183573897134, "grad_norm": 0.09070689976215363, "learning_rate": 0.00013152859759820882, "loss": 0.9223, "step": 1688 }, { "epoch": 0.6867249440943282, "grad_norm": 0.08675026893615723, "learning_rate": 0.0001314878892733564, "loss": 0.94, "step": 1689 }, { "epoch": 0.6871315307989428, "grad_norm": 0.08852765709161758, "learning_rate": 0.00013144718094850396, "loss": 0.9587, "step": 1690 }, { "epoch": 0.6875381175035576, "grad_norm": 0.09738162159919739, "learning_rate": 0.00013140647262365154, "loss": 1.0469, "step": 1691 }, { "epoch": 0.6879447042081724, "grad_norm": 0.09765305370092392, "learning_rate": 0.00013136576429879912, "loss": 1.0384, "step": 1692 }, { "epoch": 0.6883512909127871, "grad_norm": 0.09691577404737473, "learning_rate": 0.00013132505597394668, "loss": 1.1035, "step": 1693 }, { "epoch": 0.6887578776174019, "grad_norm": 0.09987527132034302, "learning_rate": 0.00013128434764909426, "loss": 1.0447, "step": 1694 }, { "epoch": 0.6891644643220167, "grad_norm": 0.09481899440288544, "learning_rate": 0.0001312436393242418, "loss": 1.0686, "step": 1695 }, { "epoch": 0.6895710510266314, "grad_norm": 0.08769707381725311, "learning_rate": 0.0001312029309993894, "loss": 0.9485, "step": 1696 }, { "epoch": 0.6899776377312462, "grad_norm": 0.08787425607442856, "learning_rate": 0.00013116222267453695, "loss": 0.9945, "step": 1697 }, { "epoch": 0.6903842244358609, "grad_norm": 0.09898071736097336, "learning_rate": 0.00013112151434968453, "loss": 1.0373, "step": 1698 }, { "epoch": 0.6907908111404757, "grad_norm": 0.09396618604660034, "learning_rate": 0.00013108080602483208, "loss": 1.0555, "step": 1699 }, { "epoch": 0.6911973978450905, "grad_norm": 0.09377385675907135, "learning_rate": 0.00013104009769997964, "loss": 0.9912, "step": 1700 }, { "epoch": 0.6916039845497052, "grad_norm": 0.09066810458898544, "learning_rate": 0.00013099938937512722, "loss": 1.0106, "step": 1701 }, { "epoch": 0.69201057125432, "grad_norm": 0.10170560330152512, "learning_rate": 0.00013095868105027477, "loss": 1.1167, "step": 1702 }, { "epoch": 0.6924171579589348, "grad_norm": 0.10096985846757889, "learning_rate": 0.00013091797272542235, "loss": 1.1092, "step": 1703 }, { "epoch": 0.6928237446635495, "grad_norm": 0.08942307531833649, "learning_rate": 0.00013087726440056993, "loss": 0.8489, "step": 1704 }, { "epoch": 0.6932303313681643, "grad_norm": 0.0931686982512474, "learning_rate": 0.0001308365560757175, "loss": 1.0615, "step": 1705 }, { "epoch": 0.693636918072779, "grad_norm": 0.08369520306587219, "learning_rate": 0.00013079584775086507, "loss": 0.9376, "step": 1706 }, { "epoch": 0.6940435047773937, "grad_norm": 0.09754310548305511, "learning_rate": 0.00013075513942601262, "loss": 1.076, "step": 1707 }, { "epoch": 0.6944500914820085, "grad_norm": 0.09425446391105652, "learning_rate": 0.0001307144311011602, "loss": 1.0354, "step": 1708 }, { "epoch": 0.6948566781866233, "grad_norm": 0.08762680739164352, "learning_rate": 0.00013067372277630776, "loss": 0.892, "step": 1709 }, { "epoch": 0.695263264891238, "grad_norm": 0.08966252207756042, "learning_rate": 0.00013063301445145534, "loss": 0.9067, "step": 1710 }, { "epoch": 0.6956698515958528, "grad_norm": 0.08628804236650467, "learning_rate": 0.0001305923061266029, "loss": 0.8314, "step": 1711 }, { "epoch": 0.6960764383004676, "grad_norm": 0.0932592824101448, "learning_rate": 0.00013055159780175045, "loss": 0.9557, "step": 1712 }, { "epoch": 0.6964830250050823, "grad_norm": 0.0861787497997284, "learning_rate": 0.00013051088947689803, "loss": 1.0075, "step": 1713 }, { "epoch": 0.6968896117096971, "grad_norm": 0.08896369487047195, "learning_rate": 0.00013047018115204558, "loss": 0.9439, "step": 1714 }, { "epoch": 0.6972961984143119, "grad_norm": 0.09481415897607803, "learning_rate": 0.00013042947282719316, "loss": 1.0008, "step": 1715 }, { "epoch": 0.6977027851189266, "grad_norm": 0.09036390483379364, "learning_rate": 0.00013038876450234075, "loss": 1.0723, "step": 1716 }, { "epoch": 0.6981093718235414, "grad_norm": 0.09333796054124832, "learning_rate": 0.0001303480561774883, "loss": 0.9998, "step": 1717 }, { "epoch": 0.6985159585281562, "grad_norm": 0.09343329071998596, "learning_rate": 0.00013030734785263588, "loss": 1.069, "step": 1718 }, { "epoch": 0.6989225452327709, "grad_norm": 0.10213945806026459, "learning_rate": 0.00013026663952778343, "loss": 1.1121, "step": 1719 }, { "epoch": 0.6993291319373857, "grad_norm": 0.08944682031869888, "learning_rate": 0.00013022593120293102, "loss": 1.0139, "step": 1720 }, { "epoch": 0.6997357186420005, "grad_norm": 0.09763380140066147, "learning_rate": 0.00013018522287807857, "loss": 1.1057, "step": 1721 }, { "epoch": 0.7001423053466151, "grad_norm": 0.08643307536840439, "learning_rate": 0.00013014451455322615, "loss": 0.825, "step": 1722 }, { "epoch": 0.7005488920512299, "grad_norm": 0.0778571143746376, "learning_rate": 0.0001301038062283737, "loss": 0.8161, "step": 1723 }, { "epoch": 0.7009554787558446, "grad_norm": 0.08897890895605087, "learning_rate": 0.00013006309790352126, "loss": 0.9659, "step": 1724 }, { "epoch": 0.7013620654604594, "grad_norm": 0.08511462807655334, "learning_rate": 0.00013002238957866884, "loss": 0.8577, "step": 1725 }, { "epoch": 0.7017686521650742, "grad_norm": 0.09079938381910324, "learning_rate": 0.0001299816812538164, "loss": 1.0091, "step": 1726 }, { "epoch": 0.7021752388696889, "grad_norm": 0.08795303851366043, "learning_rate": 0.00012994097292896398, "loss": 0.9966, "step": 1727 }, { "epoch": 0.7025818255743037, "grad_norm": 0.0925462394952774, "learning_rate": 0.00012990026460411156, "loss": 1.0207, "step": 1728 }, { "epoch": 0.7029884122789185, "grad_norm": 0.0894242599606514, "learning_rate": 0.0001298595562792591, "loss": 0.9207, "step": 1729 }, { "epoch": 0.7033949989835332, "grad_norm": 0.09216928482055664, "learning_rate": 0.0001298188479544067, "loss": 0.9725, "step": 1730 }, { "epoch": 0.703801585688148, "grad_norm": 0.09627533704042435, "learning_rate": 0.00012977813962955425, "loss": 0.998, "step": 1731 }, { "epoch": 0.7042081723927628, "grad_norm": 0.0950872004032135, "learning_rate": 0.00012973743130470183, "loss": 1.0275, "step": 1732 }, { "epoch": 0.7046147590973775, "grad_norm": 0.09819149225950241, "learning_rate": 0.00012969672297984938, "loss": 1.0179, "step": 1733 }, { "epoch": 0.7050213458019923, "grad_norm": 0.09157780557870865, "learning_rate": 0.00012965601465499694, "loss": 0.998, "step": 1734 }, { "epoch": 0.7054279325066071, "grad_norm": 0.09206783026456833, "learning_rate": 0.00012961530633014452, "loss": 0.9698, "step": 1735 }, { "epoch": 0.7058345192112218, "grad_norm": 0.08928617089986801, "learning_rate": 0.00012957459800529207, "loss": 0.9288, "step": 1736 }, { "epoch": 0.7062411059158366, "grad_norm": 0.09673994034528732, "learning_rate": 0.00012953388968043965, "loss": 1.0768, "step": 1737 }, { "epoch": 0.7066476926204514, "grad_norm": 0.09382779896259308, "learning_rate": 0.00012949318135558723, "loss": 1.0142, "step": 1738 }, { "epoch": 0.707054279325066, "grad_norm": 0.08966720104217529, "learning_rate": 0.0001294524730307348, "loss": 0.8738, "step": 1739 }, { "epoch": 0.7074608660296808, "grad_norm": 0.09402105212211609, "learning_rate": 0.00012941176470588237, "loss": 0.9459, "step": 1740 }, { "epoch": 0.7078674527342956, "grad_norm": 0.08750198781490326, "learning_rate": 0.00012937105638102992, "loss": 0.9953, "step": 1741 }, { "epoch": 0.7082740394389103, "grad_norm": 0.09970106184482574, "learning_rate": 0.0001293303480561775, "loss": 1.0423, "step": 1742 }, { "epoch": 0.7086806261435251, "grad_norm": 0.08987673372030258, "learning_rate": 0.00012928963973132506, "loss": 0.9796, "step": 1743 }, { "epoch": 0.7090872128481399, "grad_norm": 0.09364349395036697, "learning_rate": 0.00012924893140647264, "loss": 1.0452, "step": 1744 }, { "epoch": 0.7094937995527546, "grad_norm": 0.09844768047332764, "learning_rate": 0.0001292082230816202, "loss": 1.0507, "step": 1745 }, { "epoch": 0.7099003862573694, "grad_norm": 0.08439893275499344, "learning_rate": 0.00012916751475676775, "loss": 0.9159, "step": 1746 }, { "epoch": 0.7103069729619842, "grad_norm": 0.08530126512050629, "learning_rate": 0.00012912680643191533, "loss": 0.8958, "step": 1747 }, { "epoch": 0.7107135596665989, "grad_norm": 0.09442596137523651, "learning_rate": 0.00012908609810706288, "loss": 1.0103, "step": 1748 }, { "epoch": 0.7111201463712137, "grad_norm": 0.09051500260829926, "learning_rate": 0.00012904538978221046, "loss": 0.9922, "step": 1749 }, { "epoch": 0.7115267330758284, "grad_norm": 0.09218533337116241, "learning_rate": 0.00012900468145735805, "loss": 1.0237, "step": 1750 }, { "epoch": 0.7119333197804432, "grad_norm": 0.09059412032365799, "learning_rate": 0.0001289639731325056, "loss": 0.8807, "step": 1751 }, { "epoch": 0.712339906485058, "grad_norm": 0.09302126616239548, "learning_rate": 0.00012892326480765318, "loss": 0.9996, "step": 1752 }, { "epoch": 0.7127464931896726, "grad_norm": 0.0886523425579071, "learning_rate": 0.00012888255648280073, "loss": 0.9456, "step": 1753 }, { "epoch": 0.7131530798942874, "grad_norm": 0.08531109243631363, "learning_rate": 0.00012884184815794832, "loss": 0.8851, "step": 1754 }, { "epoch": 0.7135596665989022, "grad_norm": 0.08533506095409393, "learning_rate": 0.00012880113983309587, "loss": 1.004, "step": 1755 }, { "epoch": 0.7139662533035169, "grad_norm": 0.10868436843156815, "learning_rate": 0.00012876043150824345, "loss": 1.0434, "step": 1756 }, { "epoch": 0.7143728400081317, "grad_norm": 0.08798620849847794, "learning_rate": 0.000128719723183391, "loss": 0.944, "step": 1757 }, { "epoch": 0.7147794267127465, "grad_norm": 0.08957348763942719, "learning_rate": 0.00012867901485853856, "loss": 0.9431, "step": 1758 }, { "epoch": 0.7151860134173612, "grad_norm": 0.09171691536903381, "learning_rate": 0.00012863830653368614, "loss": 0.9877, "step": 1759 }, { "epoch": 0.715592600121976, "grad_norm": 0.10308198630809784, "learning_rate": 0.0001285975982088337, "loss": 1.0491, "step": 1760 }, { "epoch": 0.7159991868265908, "grad_norm": 0.09395022690296173, "learning_rate": 0.0001285568898839813, "loss": 0.9605, "step": 1761 }, { "epoch": 0.7164057735312055, "grad_norm": 0.09098276495933533, "learning_rate": 0.00012851618155912886, "loss": 0.9623, "step": 1762 }, { "epoch": 0.7168123602358203, "grad_norm": 0.09622596204280853, "learning_rate": 0.0001284754732342764, "loss": 0.9981, "step": 1763 }, { "epoch": 0.7172189469404351, "grad_norm": 0.09966776520013809, "learning_rate": 0.000128434764909424, "loss": 1.1082, "step": 1764 }, { "epoch": 0.7176255336450498, "grad_norm": 0.08151479065418243, "learning_rate": 0.00012839405658457155, "loss": 0.9498, "step": 1765 }, { "epoch": 0.7180321203496646, "grad_norm": 0.10801077634096146, "learning_rate": 0.00012835334825971913, "loss": 1.0845, "step": 1766 }, { "epoch": 0.7184387070542794, "grad_norm": 0.10468696802854538, "learning_rate": 0.00012831263993486668, "loss": 1.1407, "step": 1767 }, { "epoch": 0.718845293758894, "grad_norm": 0.08649425953626633, "learning_rate": 0.00012827193161001426, "loss": 1.0136, "step": 1768 }, { "epoch": 0.7192518804635089, "grad_norm": 0.0891176387667656, "learning_rate": 0.00012823122328516182, "loss": 0.9647, "step": 1769 }, { "epoch": 0.7196584671681237, "grad_norm": 0.08572922646999359, "learning_rate": 0.00012819051496030937, "loss": 0.9131, "step": 1770 }, { "epoch": 0.7200650538727383, "grad_norm": 0.09400682896375656, "learning_rate": 0.00012814980663545695, "loss": 1.0212, "step": 1771 }, { "epoch": 0.7204716405773531, "grad_norm": 0.08426962792873383, "learning_rate": 0.0001281090983106045, "loss": 0.9203, "step": 1772 }, { "epoch": 0.7208782272819679, "grad_norm": 0.08990871161222458, "learning_rate": 0.00012806838998575211, "loss": 0.9154, "step": 1773 }, { "epoch": 0.7212848139865826, "grad_norm": 0.09853409230709076, "learning_rate": 0.00012802768166089967, "loss": 1.0219, "step": 1774 }, { "epoch": 0.7216914006911974, "grad_norm": 0.09549330174922943, "learning_rate": 0.00012798697333604722, "loss": 1.0584, "step": 1775 }, { "epoch": 0.7220979873958121, "grad_norm": 0.09176405519247055, "learning_rate": 0.0001279462650111948, "loss": 1.0623, "step": 1776 }, { "epoch": 0.7225045741004269, "grad_norm": 0.0894324779510498, "learning_rate": 0.00012790555668634236, "loss": 0.8873, "step": 1777 }, { "epoch": 0.7229111608050417, "grad_norm": 0.09495782852172852, "learning_rate": 0.00012786484836148994, "loss": 0.9914, "step": 1778 }, { "epoch": 0.7233177475096564, "grad_norm": 0.09165625274181366, "learning_rate": 0.0001278241400366375, "loss": 0.9946, "step": 1779 }, { "epoch": 0.7237243342142712, "grad_norm": 0.08971066772937775, "learning_rate": 0.00012778343171178507, "loss": 0.9684, "step": 1780 }, { "epoch": 0.724130920918886, "grad_norm": 0.09194676578044891, "learning_rate": 0.00012774272338693263, "loss": 0.9845, "step": 1781 }, { "epoch": 0.7245375076235007, "grad_norm": 0.08844684064388275, "learning_rate": 0.00012770201506208018, "loss": 1.0189, "step": 1782 }, { "epoch": 0.7249440943281155, "grad_norm": 0.09508199989795685, "learning_rate": 0.00012766130673722776, "loss": 0.9609, "step": 1783 }, { "epoch": 0.7253506810327303, "grad_norm": 0.08686284720897675, "learning_rate": 0.00012762059841237534, "loss": 0.9364, "step": 1784 }, { "epoch": 0.725757267737345, "grad_norm": 0.08749787509441376, "learning_rate": 0.00012757989008752293, "loss": 0.9021, "step": 1785 }, { "epoch": 0.7261638544419597, "grad_norm": 0.09259208291769028, "learning_rate": 0.00012753918176267048, "loss": 1.031, "step": 1786 }, { "epoch": 0.7265704411465745, "grad_norm": 0.09524762630462646, "learning_rate": 0.00012749847343781803, "loss": 0.9958, "step": 1787 }, { "epoch": 0.7269770278511892, "grad_norm": 0.08385960757732391, "learning_rate": 0.00012745776511296562, "loss": 0.943, "step": 1788 }, { "epoch": 0.727383614555804, "grad_norm": 0.09703537821769714, "learning_rate": 0.00012741705678811317, "loss": 0.9854, "step": 1789 }, { "epoch": 0.7277902012604188, "grad_norm": 0.08761659264564514, "learning_rate": 0.00012737634846326075, "loss": 0.8797, "step": 1790 }, { "epoch": 0.7281967879650335, "grad_norm": 0.08612256497144699, "learning_rate": 0.0001273356401384083, "loss": 0.894, "step": 1791 }, { "epoch": 0.7286033746696483, "grad_norm": 0.09343304485082626, "learning_rate": 0.00012729493181355589, "loss": 0.969, "step": 1792 }, { "epoch": 0.7290099613742631, "grad_norm": 0.09733837842941284, "learning_rate": 0.00012725422348870344, "loss": 1.0479, "step": 1793 }, { "epoch": 0.7294165480788778, "grad_norm": 0.08351567387580872, "learning_rate": 0.000127213515163851, "loss": 0.9141, "step": 1794 }, { "epoch": 0.7298231347834926, "grad_norm": 0.09528695791959763, "learning_rate": 0.00012717280683899858, "loss": 1.0193, "step": 1795 }, { "epoch": 0.7302297214881074, "grad_norm": 0.0906892865896225, "learning_rate": 0.00012713209851414616, "loss": 0.9095, "step": 1796 }, { "epoch": 0.7306363081927221, "grad_norm": 119.45793151855469, "learning_rate": 0.00012709139018929374, "loss": 1.0114, "step": 1797 }, { "epoch": 0.7310428948973369, "grad_norm": 0.0933651253581047, "learning_rate": 0.0001270506818644413, "loss": 1.0666, "step": 1798 }, { "epoch": 0.7314494816019517, "grad_norm": 0.10169385373592377, "learning_rate": 0.00012700997353958885, "loss": 0.9892, "step": 1799 }, { "epoch": 0.7318560683065664, "grad_norm": 0.0868530198931694, "learning_rate": 0.00012696926521473643, "loss": 0.9162, "step": 1800 }, { "epoch": 0.7322626550111812, "grad_norm": 0.09074793756008148, "learning_rate": 0.00012692855688988398, "loss": 0.9388, "step": 1801 }, { "epoch": 0.7326692417157958, "grad_norm": 0.10199327766895294, "learning_rate": 0.00012688784856503156, "loss": 0.9585, "step": 1802 }, { "epoch": 0.7330758284204106, "grad_norm": 0.10722784698009491, "learning_rate": 0.00012684714024017912, "loss": 1.0226, "step": 1803 }, { "epoch": 0.7334824151250254, "grad_norm": 0.10113389045000076, "learning_rate": 0.0001268064319153267, "loss": 1.0593, "step": 1804 }, { "epoch": 0.7338890018296401, "grad_norm": 0.1125817522406578, "learning_rate": 0.00012676572359047425, "loss": 0.8962, "step": 1805 }, { "epoch": 0.7342955885342549, "grad_norm": 0.10177897661924362, "learning_rate": 0.0001267250152656218, "loss": 1.0323, "step": 1806 }, { "epoch": 0.7347021752388697, "grad_norm": 0.10272479057312012, "learning_rate": 0.00012668430694076941, "loss": 0.9947, "step": 1807 }, { "epoch": 0.7351087619434844, "grad_norm": 0.11395642906427383, "learning_rate": 0.00012664359861591697, "loss": 1.0144, "step": 1808 }, { "epoch": 0.7355153486480992, "grad_norm": 0.09565427899360657, "learning_rate": 0.00012660289029106452, "loss": 1.0052, "step": 1809 }, { "epoch": 0.735921935352714, "grad_norm": 0.09244798123836517, "learning_rate": 0.0001265621819662121, "loss": 0.8411, "step": 1810 }, { "epoch": 0.7363285220573287, "grad_norm": 0.08985315263271332, "learning_rate": 0.00012652147364135966, "loss": 1.0301, "step": 1811 }, { "epoch": 0.7367351087619435, "grad_norm": 0.09606938809156418, "learning_rate": 0.00012648076531650724, "loss": 1.0053, "step": 1812 }, { "epoch": 0.7371416954665583, "grad_norm": 0.10566183179616928, "learning_rate": 0.0001264400569916548, "loss": 0.9527, "step": 1813 }, { "epoch": 0.737548282171173, "grad_norm": 0.10999652743339539, "learning_rate": 0.00012639934866680237, "loss": 1.0756, "step": 1814 }, { "epoch": 0.7379548688757878, "grad_norm": 0.09473931044340134, "learning_rate": 0.00012635864034194993, "loss": 0.94, "step": 1815 }, { "epoch": 0.7383614555804026, "grad_norm": 0.09815262258052826, "learning_rate": 0.0001263179320170975, "loss": 1.0436, "step": 1816 }, { "epoch": 0.7387680422850172, "grad_norm": 0.08889912813901901, "learning_rate": 0.00012627722369224506, "loss": 0.9368, "step": 1817 }, { "epoch": 0.739174628989632, "grad_norm": 0.09337257593870163, "learning_rate": 0.00012623651536739262, "loss": 1.0949, "step": 1818 }, { "epoch": 0.7395812156942468, "grad_norm": 0.09112720191478729, "learning_rate": 0.00012619580704254023, "loss": 1.0239, "step": 1819 }, { "epoch": 0.7399878023988615, "grad_norm": 0.0988708958029747, "learning_rate": 0.00012615509871768778, "loss": 1.0648, "step": 1820 }, { "epoch": 0.7403943891034763, "grad_norm": 0.09849932789802551, "learning_rate": 0.00012611439039283533, "loss": 0.9867, "step": 1821 }, { "epoch": 0.7408009758080911, "grad_norm": 0.09254156798124313, "learning_rate": 0.00012607368206798291, "loss": 0.9903, "step": 1822 }, { "epoch": 0.7412075625127058, "grad_norm": 0.0954776182770729, "learning_rate": 0.00012603297374313047, "loss": 1.0081, "step": 1823 }, { "epoch": 0.7416141492173206, "grad_norm": 0.08610807359218597, "learning_rate": 0.00012599226541827805, "loss": 0.9229, "step": 1824 }, { "epoch": 0.7420207359219354, "grad_norm": 0.0977591797709465, "learning_rate": 0.0001259515570934256, "loss": 0.9076, "step": 1825 }, { "epoch": 0.7424273226265501, "grad_norm": 0.0858481377363205, "learning_rate": 0.00012591084876857319, "loss": 0.8604, "step": 1826 }, { "epoch": 0.7428339093311649, "grad_norm": 0.09642601758241653, "learning_rate": 0.00012587014044372074, "loss": 1.0476, "step": 1827 }, { "epoch": 0.7432404960357797, "grad_norm": 0.08871784061193466, "learning_rate": 0.0001258294321188683, "loss": 0.9597, "step": 1828 }, { "epoch": 0.7436470827403944, "grad_norm": 0.10808097571134567, "learning_rate": 0.00012578872379401587, "loss": 1.1415, "step": 1829 }, { "epoch": 0.7440536694450092, "grad_norm": 0.09339917451143265, "learning_rate": 0.00012574801546916346, "loss": 0.9437, "step": 1830 }, { "epoch": 0.7444602561496239, "grad_norm": 0.08945673704147339, "learning_rate": 0.00012570730714431104, "loss": 0.9714, "step": 1831 }, { "epoch": 0.7448668428542387, "grad_norm": 0.0939527079463005, "learning_rate": 0.0001256665988194586, "loss": 0.9868, "step": 1832 }, { "epoch": 0.7452734295588535, "grad_norm": 0.09327416867017746, "learning_rate": 0.00012562589049460615, "loss": 1.0001, "step": 1833 }, { "epoch": 0.7456800162634681, "grad_norm": 0.10278622061014175, "learning_rate": 0.00012558518216975373, "loss": 1.0724, "step": 1834 }, { "epoch": 0.7460866029680829, "grad_norm": 0.09421471506357193, "learning_rate": 0.00012554447384490128, "loss": 1.0088, "step": 1835 }, { "epoch": 0.7464931896726977, "grad_norm": 0.1009073331952095, "learning_rate": 0.00012550376552004886, "loss": 1.0485, "step": 1836 }, { "epoch": 0.7468997763773124, "grad_norm": 0.09199651330709457, "learning_rate": 0.00012546305719519642, "loss": 0.9765, "step": 1837 }, { "epoch": 0.7473063630819272, "grad_norm": 0.09672168642282486, "learning_rate": 0.000125422348870344, "loss": 1.018, "step": 1838 }, { "epoch": 0.747712949786542, "grad_norm": 0.09036868065595627, "learning_rate": 0.00012538164054549155, "loss": 0.9067, "step": 1839 }, { "epoch": 0.7481195364911567, "grad_norm": 0.09706352651119232, "learning_rate": 0.0001253409322206391, "loss": 1.0439, "step": 1840 }, { "epoch": 0.7485261231957715, "grad_norm": 0.09940480440855026, "learning_rate": 0.00012530022389578669, "loss": 1.0936, "step": 1841 }, { "epoch": 0.7489327099003863, "grad_norm": 0.09489309787750244, "learning_rate": 0.00012525951557093427, "loss": 1.0606, "step": 1842 }, { "epoch": 0.749339296605001, "grad_norm": 0.07897097617387772, "learning_rate": 0.00012521880724608185, "loss": 0.8109, "step": 1843 }, { "epoch": 0.7497458833096158, "grad_norm": 0.09423919022083282, "learning_rate": 0.0001251780989212294, "loss": 1.0703, "step": 1844 }, { "epoch": 0.7501524700142306, "grad_norm": 0.09601794928312302, "learning_rate": 0.00012513739059637696, "loss": 0.9692, "step": 1845 }, { "epoch": 0.7505590567188453, "grad_norm": 0.09051002562046051, "learning_rate": 0.00012509668227152454, "loss": 0.9727, "step": 1846 }, { "epoch": 0.7509656434234601, "grad_norm": 0.09665656834840775, "learning_rate": 0.0001250559739466721, "loss": 1.0701, "step": 1847 }, { "epoch": 0.7513722301280749, "grad_norm": 0.08956587314605713, "learning_rate": 0.00012501526562181967, "loss": 0.9863, "step": 1848 }, { "epoch": 0.7517788168326895, "grad_norm": 0.09464751929044724, "learning_rate": 0.00012497455729696723, "loss": 1.043, "step": 1849 }, { "epoch": 0.7521854035373043, "grad_norm": 0.09246315807104111, "learning_rate": 0.0001249338489721148, "loss": 1.0306, "step": 1850 }, { "epoch": 0.7525919902419191, "grad_norm": 0.0943431407213211, "learning_rate": 0.00012489314064726236, "loss": 0.9251, "step": 1851 }, { "epoch": 0.7529985769465338, "grad_norm": 0.08852697908878326, "learning_rate": 0.00012485243232240992, "loss": 0.919, "step": 1852 }, { "epoch": 0.7534051636511486, "grad_norm": 0.08856131881475449, "learning_rate": 0.00012481172399755752, "loss": 0.9874, "step": 1853 }, { "epoch": 0.7538117503557634, "grad_norm": 0.08715582638978958, "learning_rate": 0.00012477101567270508, "loss": 0.9569, "step": 1854 }, { "epoch": 0.7542183370603781, "grad_norm": 0.1005750522017479, "learning_rate": 0.00012473030734785266, "loss": 1.118, "step": 1855 }, { "epoch": 0.7546249237649929, "grad_norm": 0.0848010703921318, "learning_rate": 0.00012468959902300021, "loss": 0.8808, "step": 1856 }, { "epoch": 0.7550315104696076, "grad_norm": 0.10509838908910751, "learning_rate": 0.00012464889069814777, "loss": 1.0019, "step": 1857 }, { "epoch": 0.7554380971742224, "grad_norm": 0.09729699045419693, "learning_rate": 0.00012460818237329535, "loss": 0.9275, "step": 1858 }, { "epoch": 0.7558446838788372, "grad_norm": 0.0901610478758812, "learning_rate": 0.0001245674740484429, "loss": 1.0285, "step": 1859 }, { "epoch": 0.7562512705834519, "grad_norm": 0.08691520988941193, "learning_rate": 0.00012452676572359048, "loss": 0.9524, "step": 1860 }, { "epoch": 0.7566578572880667, "grad_norm": 0.09559500962495804, "learning_rate": 0.00012448605739873804, "loss": 1.0781, "step": 1861 }, { "epoch": 0.7570644439926815, "grad_norm": 0.09581112861633301, "learning_rate": 0.00012444534907388562, "loss": 1.068, "step": 1862 }, { "epoch": 0.7574710306972962, "grad_norm": 0.10235914587974548, "learning_rate": 0.00012440464074903317, "loss": 1.078, "step": 1863 }, { "epoch": 0.757877617401911, "grad_norm": 0.09794023633003235, "learning_rate": 0.00012436393242418073, "loss": 1.0951, "step": 1864 }, { "epoch": 0.7582842041065257, "grad_norm": 0.08910951763391495, "learning_rate": 0.00012432322409932834, "loss": 1.002, "step": 1865 }, { "epoch": 0.7586907908111404, "grad_norm": 0.08909524232149124, "learning_rate": 0.0001242825157744759, "loss": 0.9027, "step": 1866 }, { "epoch": 0.7590973775157552, "grad_norm": 0.09639742970466614, "learning_rate": 0.00012424180744962347, "loss": 1.1356, "step": 1867 }, { "epoch": 0.75950396422037, "grad_norm": 0.08606995642185211, "learning_rate": 0.00012420109912477103, "loss": 0.8974, "step": 1868 }, { "epoch": 0.7599105509249847, "grad_norm": 0.09715355932712555, "learning_rate": 0.00012416039079991858, "loss": 1.078, "step": 1869 }, { "epoch": 0.7603171376295995, "grad_norm": 0.08933407068252563, "learning_rate": 0.00012411968247506616, "loss": 0.9177, "step": 1870 }, { "epoch": 0.7607237243342143, "grad_norm": 0.0859113335609436, "learning_rate": 0.00012407897415021372, "loss": 0.9703, "step": 1871 }, { "epoch": 0.761130311038829, "grad_norm": 0.09086931496858597, "learning_rate": 0.0001240382658253613, "loss": 1.0298, "step": 1872 }, { "epoch": 0.7615368977434438, "grad_norm": 0.09112663567066193, "learning_rate": 0.00012399755750050885, "loss": 0.9918, "step": 1873 }, { "epoch": 0.7619434844480586, "grad_norm": 0.09044841676950455, "learning_rate": 0.00012395684917565643, "loss": 0.9469, "step": 1874 }, { "epoch": 0.7623500711526733, "grad_norm": 0.08345028758049011, "learning_rate": 0.00012391614085080399, "loss": 0.879, "step": 1875 }, { "epoch": 0.7627566578572881, "grad_norm": 0.10249708592891693, "learning_rate": 0.00012387543252595157, "loss": 1.0247, "step": 1876 }, { "epoch": 0.7631632445619029, "grad_norm": 0.0914909839630127, "learning_rate": 0.00012383472420109915, "loss": 0.9341, "step": 1877 }, { "epoch": 0.7635698312665176, "grad_norm": 0.08616846054792404, "learning_rate": 0.0001237940158762467, "loss": 0.918, "step": 1878 }, { "epoch": 0.7639764179711324, "grad_norm": 0.0853181779384613, "learning_rate": 0.00012375330755139428, "loss": 0.8903, "step": 1879 }, { "epoch": 0.7643830046757472, "grad_norm": 0.0943385511636734, "learning_rate": 0.00012371259922654184, "loss": 1.0437, "step": 1880 }, { "epoch": 0.7647895913803618, "grad_norm": 0.08487629890441895, "learning_rate": 0.0001236718909016894, "loss": 0.9655, "step": 1881 }, { "epoch": 0.7651961780849766, "grad_norm": 0.09635015577077866, "learning_rate": 0.00012363118257683697, "loss": 1.0047, "step": 1882 }, { "epoch": 0.7656027647895913, "grad_norm": 0.09787151217460632, "learning_rate": 0.00012359047425198453, "loss": 1.1058, "step": 1883 }, { "epoch": 0.7660093514942061, "grad_norm": 0.10217342525720596, "learning_rate": 0.0001235497659271321, "loss": 1.1407, "step": 1884 }, { "epoch": 0.7664159381988209, "grad_norm": 0.08770392835140228, "learning_rate": 0.00012350905760227966, "loss": 0.8851, "step": 1885 }, { "epoch": 0.7668225249034356, "grad_norm": 0.08978156745433807, "learning_rate": 0.00012346834927742724, "loss": 1.0138, "step": 1886 }, { "epoch": 0.7672291116080504, "grad_norm": 0.09110313653945923, "learning_rate": 0.0001234276409525748, "loss": 0.8872, "step": 1887 }, { "epoch": 0.7676356983126652, "grad_norm": 0.0905870720744133, "learning_rate": 0.00012338693262772238, "loss": 0.9819, "step": 1888 }, { "epoch": 0.7680422850172799, "grad_norm": 0.09418340027332306, "learning_rate": 0.00012334622430286996, "loss": 1.0486, "step": 1889 }, { "epoch": 0.7684488717218947, "grad_norm": 0.09140585362911224, "learning_rate": 0.00012330551597801751, "loss": 0.9463, "step": 1890 }, { "epoch": 0.7688554584265095, "grad_norm": 0.08720141649246216, "learning_rate": 0.0001232648076531651, "loss": 0.9833, "step": 1891 }, { "epoch": 0.7692620451311242, "grad_norm": 0.09206419438123703, "learning_rate": 0.00012322409932831265, "loss": 0.9554, "step": 1892 }, { "epoch": 0.769668631835739, "grad_norm": 0.09324870258569717, "learning_rate": 0.0001231833910034602, "loss": 1.0703, "step": 1893 }, { "epoch": 0.7700752185403538, "grad_norm": 0.0868481770157814, "learning_rate": 0.00012314268267860778, "loss": 0.9374, "step": 1894 }, { "epoch": 0.7704818052449685, "grad_norm": 0.0907289981842041, "learning_rate": 0.00012310197435375534, "loss": 1.0148, "step": 1895 }, { "epoch": 0.7708883919495833, "grad_norm": 0.09804967790842056, "learning_rate": 0.00012306126602890292, "loss": 1.0541, "step": 1896 }, { "epoch": 0.771294978654198, "grad_norm": 0.09168083965778351, "learning_rate": 0.00012302055770405047, "loss": 0.9363, "step": 1897 }, { "epoch": 0.7717015653588127, "grad_norm": 0.09078045189380646, "learning_rate": 0.00012297984937919805, "loss": 1.0683, "step": 1898 }, { "epoch": 0.7721081520634275, "grad_norm": 0.08930620551109314, "learning_rate": 0.00012293914105434564, "loss": 0.9659, "step": 1899 }, { "epoch": 0.7725147387680423, "grad_norm": 0.09990911930799484, "learning_rate": 0.0001228984327294932, "loss": 1.1301, "step": 1900 }, { "epoch": 0.772921325472657, "grad_norm": 0.08707278221845627, "learning_rate": 0.00012285772440464077, "loss": 0.919, "step": 1901 }, { "epoch": 0.7733279121772718, "grad_norm": 0.0829259380698204, "learning_rate": 0.00012281701607978833, "loss": 0.8806, "step": 1902 }, { "epoch": 0.7737344988818866, "grad_norm": 0.09047359228134155, "learning_rate": 0.0001227763077549359, "loss": 0.9478, "step": 1903 }, { "epoch": 0.7741410855865013, "grad_norm": 0.08373644202947617, "learning_rate": 0.00012273559943008346, "loss": 0.8043, "step": 1904 }, { "epoch": 0.7745476722911161, "grad_norm": 0.08893609046936035, "learning_rate": 0.00012269489110523101, "loss": 0.9849, "step": 1905 }, { "epoch": 0.7749542589957309, "grad_norm": 0.09218044579029083, "learning_rate": 0.0001226541827803786, "loss": 0.8856, "step": 1906 }, { "epoch": 0.7753608457003456, "grad_norm": 0.08562997728586197, "learning_rate": 0.00012261347445552615, "loss": 0.8934, "step": 1907 }, { "epoch": 0.7757674324049604, "grad_norm": 0.09083337336778641, "learning_rate": 0.00012257276613067373, "loss": 1.0118, "step": 1908 }, { "epoch": 0.7761740191095751, "grad_norm": 0.08940907567739487, "learning_rate": 0.00012253205780582129, "loss": 0.9138, "step": 1909 }, { "epoch": 0.7765806058141899, "grad_norm": 0.09383655339479446, "learning_rate": 0.00012249134948096887, "loss": 0.9727, "step": 1910 }, { "epoch": 0.7769871925188047, "grad_norm": 0.10210567712783813, "learning_rate": 0.00012245064115611645, "loss": 1.0628, "step": 1911 }, { "epoch": 0.7773937792234193, "grad_norm": 0.09698057174682617, "learning_rate": 0.000122409932831264, "loss": 1.0863, "step": 1912 }, { "epoch": 0.7778003659280341, "grad_norm": 0.0920233353972435, "learning_rate": 0.00012236922450641158, "loss": 0.9377, "step": 1913 }, { "epoch": 0.7782069526326489, "grad_norm": 0.08810736984014511, "learning_rate": 0.00012232851618155914, "loss": 0.9152, "step": 1914 }, { "epoch": 0.7786135393372636, "grad_norm": 0.0930081456899643, "learning_rate": 0.0001222878078567067, "loss": 0.9493, "step": 1915 }, { "epoch": 0.7790201260418784, "grad_norm": 0.09303618222475052, "learning_rate": 0.00012224709953185427, "loss": 0.9638, "step": 1916 }, { "epoch": 0.7794267127464932, "grad_norm": 0.09462623298168182, "learning_rate": 0.00012220639120700183, "loss": 1.0028, "step": 1917 }, { "epoch": 0.7798332994511079, "grad_norm": 0.08258619159460068, "learning_rate": 0.0001221656828821494, "loss": 0.934, "step": 1918 }, { "epoch": 0.7802398861557227, "grad_norm": 0.0895158126950264, "learning_rate": 0.00012212497455729696, "loss": 1.0514, "step": 1919 }, { "epoch": 0.7806464728603375, "grad_norm": 0.0877576693892479, "learning_rate": 0.00012208426623244454, "loss": 0.906, "step": 1920 }, { "epoch": 0.7810530595649522, "grad_norm": 0.08271359652280807, "learning_rate": 0.0001220435579075921, "loss": 0.8446, "step": 1921 }, { "epoch": 0.781459646269567, "grad_norm": 0.09866933524608612, "learning_rate": 0.00012200284958273969, "loss": 1.0477, "step": 1922 }, { "epoch": 0.7818662329741818, "grad_norm": 0.0881706029176712, "learning_rate": 0.00012196214125788725, "loss": 0.9121, "step": 1923 }, { "epoch": 0.7822728196787965, "grad_norm": 0.08888103812932968, "learning_rate": 0.00012192143293303481, "loss": 0.9179, "step": 1924 }, { "epoch": 0.7826794063834113, "grad_norm": 0.08678455650806427, "learning_rate": 0.00012188072460818238, "loss": 0.8909, "step": 1925 }, { "epoch": 0.7830859930880261, "grad_norm": 0.08965113759040833, "learning_rate": 0.00012184001628332995, "loss": 0.9483, "step": 1926 }, { "epoch": 0.7834925797926408, "grad_norm": 0.09123353660106659, "learning_rate": 0.00012179930795847752, "loss": 0.9552, "step": 1927 }, { "epoch": 0.7838991664972556, "grad_norm": 0.08975458890199661, "learning_rate": 0.00012175859963362508, "loss": 0.9155, "step": 1928 }, { "epoch": 0.7843057532018703, "grad_norm": 0.09666619449853897, "learning_rate": 0.00012171789130877265, "loss": 0.9306, "step": 1929 }, { "epoch": 0.784712339906485, "grad_norm": 0.10168115049600601, "learning_rate": 0.00012167718298392022, "loss": 1.0875, "step": 1930 }, { "epoch": 0.7851189266110998, "grad_norm": 0.09472563117742538, "learning_rate": 0.00012163647465906777, "loss": 1.0703, "step": 1931 }, { "epoch": 0.7855255133157146, "grad_norm": 0.09953609853982925, "learning_rate": 0.00012159576633421534, "loss": 1.0868, "step": 1932 }, { "epoch": 0.7859321000203293, "grad_norm": 0.0972660630941391, "learning_rate": 0.00012155505800936291, "loss": 0.9368, "step": 1933 }, { "epoch": 0.7863386867249441, "grad_norm": 0.08514858037233353, "learning_rate": 0.0001215143496845105, "loss": 0.9114, "step": 1934 }, { "epoch": 0.7867452734295588, "grad_norm": 0.08503813296556473, "learning_rate": 0.00012147364135965806, "loss": 0.9315, "step": 1935 }, { "epoch": 0.7871518601341736, "grad_norm": 0.09241003543138504, "learning_rate": 0.00012143293303480563, "loss": 0.9656, "step": 1936 }, { "epoch": 0.7875584468387884, "grad_norm": 0.08896864950656891, "learning_rate": 0.00012139222470995319, "loss": 1.0197, "step": 1937 }, { "epoch": 0.7879650335434031, "grad_norm": 0.0944843515753746, "learning_rate": 0.00012135151638510076, "loss": 1.0548, "step": 1938 }, { "epoch": 0.7883716202480179, "grad_norm": 0.09366561472415924, "learning_rate": 0.00012131080806024833, "loss": 1.0353, "step": 1939 }, { "epoch": 0.7887782069526327, "grad_norm": 0.094940185546875, "learning_rate": 0.0001212700997353959, "loss": 0.9467, "step": 1940 }, { "epoch": 0.7891847936572474, "grad_norm": 0.08708861470222473, "learning_rate": 0.00012122939141054346, "loss": 0.9762, "step": 1941 }, { "epoch": 0.7895913803618622, "grad_norm": 0.09528307616710663, "learning_rate": 0.00012118868308569102, "loss": 0.9989, "step": 1942 }, { "epoch": 0.789997967066477, "grad_norm": 0.09655644744634628, "learning_rate": 0.00012114797476083858, "loss": 1.0961, "step": 1943 }, { "epoch": 0.7904045537710916, "grad_norm": 0.09209448844194412, "learning_rate": 0.00012110726643598615, "loss": 0.9271, "step": 1944 }, { "epoch": 0.7908111404757064, "grad_norm": 0.10271915793418884, "learning_rate": 0.00012106655811113375, "loss": 1.1772, "step": 1945 }, { "epoch": 0.7912177271803212, "grad_norm": 0.08724693953990936, "learning_rate": 0.00012102584978628131, "loss": 1.0258, "step": 1946 }, { "epoch": 0.7916243138849359, "grad_norm": 0.09265289455652237, "learning_rate": 0.00012098514146142887, "loss": 0.9987, "step": 1947 }, { "epoch": 0.7920309005895507, "grad_norm": 0.08839931339025497, "learning_rate": 0.00012094443313657644, "loss": 0.9955, "step": 1948 }, { "epoch": 0.7924374872941655, "grad_norm": 0.0918072909116745, "learning_rate": 0.000120903724811724, "loss": 0.966, "step": 1949 }, { "epoch": 0.7928440739987802, "grad_norm": 0.09354092925786972, "learning_rate": 0.00012086301648687157, "loss": 1.0129, "step": 1950 }, { "epoch": 0.793250660703395, "grad_norm": 0.0950416848063469, "learning_rate": 0.00012082230816201914, "loss": 1.1276, "step": 1951 }, { "epoch": 0.7936572474080098, "grad_norm": 0.08683070540428162, "learning_rate": 0.00012078159983716671, "loss": 0.8137, "step": 1952 }, { "epoch": 0.7940638341126245, "grad_norm": 0.0931641161441803, "learning_rate": 0.00012074089151231427, "loss": 1.0094, "step": 1953 }, { "epoch": 0.7944704208172393, "grad_norm": 0.09602310508489609, "learning_rate": 0.00012070018318746183, "loss": 1.0643, "step": 1954 }, { "epoch": 0.7948770075218541, "grad_norm": 0.08961457759141922, "learning_rate": 0.0001206594748626094, "loss": 0.9338, "step": 1955 }, { "epoch": 0.7952835942264688, "grad_norm": 0.09515352547168732, "learning_rate": 0.00012061876653775696, "loss": 1.0151, "step": 1956 }, { "epoch": 0.7956901809310836, "grad_norm": 0.08361717313528061, "learning_rate": 0.00012057805821290456, "loss": 0.9363, "step": 1957 }, { "epoch": 0.7960967676356984, "grad_norm": 0.09745500981807709, "learning_rate": 0.00012053734988805213, "loss": 1.0578, "step": 1958 }, { "epoch": 0.796503354340313, "grad_norm": 0.09218847006559372, "learning_rate": 0.00012049664156319968, "loss": 0.9899, "step": 1959 }, { "epoch": 0.7969099410449278, "grad_norm": 0.08713049441576004, "learning_rate": 0.00012045593323834725, "loss": 0.9283, "step": 1960 }, { "epoch": 0.7973165277495425, "grad_norm": 0.09556014090776443, "learning_rate": 0.00012041522491349482, "loss": 1.0288, "step": 1961 }, { "epoch": 0.7977231144541573, "grad_norm": 0.09281028062105179, "learning_rate": 0.00012037451658864238, "loss": 0.9812, "step": 1962 }, { "epoch": 0.7981297011587721, "grad_norm": 0.10029471665620804, "learning_rate": 0.00012033380826378995, "loss": 1.1787, "step": 1963 }, { "epoch": 0.7985362878633868, "grad_norm": 0.08875210583209991, "learning_rate": 0.00012029309993893752, "loss": 0.9432, "step": 1964 }, { "epoch": 0.7989428745680016, "grad_norm": 0.09191716462373734, "learning_rate": 0.00012025239161408509, "loss": 1.0301, "step": 1965 }, { "epoch": 0.7993494612726164, "grad_norm": 0.09936721622943878, "learning_rate": 0.00012021168328923264, "loss": 1.0008, "step": 1966 }, { "epoch": 0.7997560479772311, "grad_norm": 0.09157298505306244, "learning_rate": 0.00012017097496438021, "loss": 1.0065, "step": 1967 }, { "epoch": 0.8001626346818459, "grad_norm": 0.0955449789762497, "learning_rate": 0.0001201302666395278, "loss": 1.1102, "step": 1968 }, { "epoch": 0.8005692213864607, "grad_norm": 0.10182837396860123, "learning_rate": 0.00012008955831467537, "loss": 1.1306, "step": 1969 }, { "epoch": 0.8009758080910754, "grad_norm": 0.09424961358308792, "learning_rate": 0.00012004884998982292, "loss": 0.953, "step": 1970 }, { "epoch": 0.8013823947956902, "grad_norm": 0.09814165532588959, "learning_rate": 0.00012000814166497049, "loss": 1.1101, "step": 1971 }, { "epoch": 0.801788981500305, "grad_norm": 0.09669219702482224, "learning_rate": 0.00011996743334011806, "loss": 0.9598, "step": 1972 }, { "epoch": 0.8021955682049197, "grad_norm": 0.08699534088373184, "learning_rate": 0.00011992672501526563, "loss": 0.8809, "step": 1973 }, { "epoch": 0.8026021549095345, "grad_norm": 0.08246033638715744, "learning_rate": 0.0001198860166904132, "loss": 0.9572, "step": 1974 }, { "epoch": 0.8030087416141493, "grad_norm": 0.08641736209392548, "learning_rate": 0.00011984530836556076, "loss": 0.9201, "step": 1975 }, { "epoch": 0.8034153283187639, "grad_norm": 0.09478481858968735, "learning_rate": 0.00011980460004070833, "loss": 0.9875, "step": 1976 }, { "epoch": 0.8038219150233787, "grad_norm": 0.09944069385528564, "learning_rate": 0.0001197638917158559, "loss": 1.01, "step": 1977 }, { "epoch": 0.8042285017279935, "grad_norm": 0.09341408312320709, "learning_rate": 0.00011972318339100345, "loss": 0.9655, "step": 1978 }, { "epoch": 0.8046350884326082, "grad_norm": 0.09643781185150146, "learning_rate": 0.00011968247506615102, "loss": 0.9764, "step": 1979 }, { "epoch": 0.805041675137223, "grad_norm": 0.08750199526548386, "learning_rate": 0.00011964176674129861, "loss": 0.9561, "step": 1980 }, { "epoch": 0.8054482618418378, "grad_norm": 0.09079190343618393, "learning_rate": 0.00011960105841644618, "loss": 0.9068, "step": 1981 }, { "epoch": 0.8058548485464525, "grad_norm": 0.08659665286540985, "learning_rate": 0.00011956035009159374, "loss": 0.9414, "step": 1982 }, { "epoch": 0.8062614352510673, "grad_norm": 0.08911033719778061, "learning_rate": 0.0001195196417667413, "loss": 0.9566, "step": 1983 }, { "epoch": 0.8066680219556821, "grad_norm": 0.09575940668582916, "learning_rate": 0.00011947893344188887, "loss": 0.9836, "step": 1984 }, { "epoch": 0.8070746086602968, "grad_norm": 0.09464087337255478, "learning_rate": 0.00011943822511703644, "loss": 1.0854, "step": 1985 }, { "epoch": 0.8074811953649116, "grad_norm": 0.09153340756893158, "learning_rate": 0.000119397516792184, "loss": 1.0362, "step": 1986 }, { "epoch": 0.8078877820695263, "grad_norm": 0.09390057623386383, "learning_rate": 0.00011935680846733157, "loss": 0.9829, "step": 1987 }, { "epoch": 0.8082943687741411, "grad_norm": 0.09109530597925186, "learning_rate": 0.00011931610014247914, "loss": 0.9472, "step": 1988 }, { "epoch": 0.8087009554787559, "grad_norm": 0.09163598716259003, "learning_rate": 0.0001192753918176267, "loss": 1.0141, "step": 1989 }, { "epoch": 0.8091075421833706, "grad_norm": 0.09769194573163986, "learning_rate": 0.00011923468349277426, "loss": 1.0536, "step": 1990 }, { "epoch": 0.8095141288879854, "grad_norm": 0.09787027537822723, "learning_rate": 0.00011919397516792186, "loss": 1.0384, "step": 1991 }, { "epoch": 0.8099207155926001, "grad_norm": 0.09753425419330597, "learning_rate": 0.00011915326684306943, "loss": 0.9425, "step": 1992 }, { "epoch": 0.8103273022972148, "grad_norm": 0.0975935086607933, "learning_rate": 0.000119112558518217, "loss": 0.9755, "step": 1993 }, { "epoch": 0.8107338890018296, "grad_norm": 0.09270279854536057, "learning_rate": 0.00011907185019336455, "loss": 0.9501, "step": 1994 }, { "epoch": 0.8111404757064444, "grad_norm": 0.09884528815746307, "learning_rate": 0.00011903114186851212, "loss": 0.986, "step": 1995 }, { "epoch": 0.8115470624110591, "grad_norm": 0.09327102452516556, "learning_rate": 0.00011899043354365968, "loss": 0.9258, "step": 1996 }, { "epoch": 0.8119536491156739, "grad_norm": 0.09072525054216385, "learning_rate": 0.00011894972521880725, "loss": 0.8975, "step": 1997 }, { "epoch": 0.8123602358202887, "grad_norm": 0.09030098468065262, "learning_rate": 0.00011890901689395482, "loss": 0.9163, "step": 1998 }, { "epoch": 0.8127668225249034, "grad_norm": 0.09437917172908783, "learning_rate": 0.00011886830856910239, "loss": 1.0101, "step": 1999 }, { "epoch": 0.8131734092295182, "grad_norm": 0.092490553855896, "learning_rate": 0.00011882760024424995, "loss": 0.9865, "step": 2000 }, { "epoch": 0.813579995934133, "grad_norm": 0.08658891916275024, "learning_rate": 0.00011878689191939751, "loss": 0.878, "step": 2001 }, { "epoch": 0.8139865826387477, "grad_norm": 0.08677167445421219, "learning_rate": 0.00011874618359454508, "loss": 0.9192, "step": 2002 }, { "epoch": 0.8143931693433625, "grad_norm": 0.09392429143190384, "learning_rate": 0.00011870547526969267, "loss": 1.023, "step": 2003 }, { "epoch": 0.8147997560479773, "grad_norm": 0.09641133248806, "learning_rate": 0.00011866476694484024, "loss": 1.0088, "step": 2004 }, { "epoch": 0.815206342752592, "grad_norm": 0.0969158262014389, "learning_rate": 0.0001186240586199878, "loss": 1.0194, "step": 2005 }, { "epoch": 0.8156129294572068, "grad_norm": 0.09078145772218704, "learning_rate": 0.00011858335029513536, "loss": 0.9206, "step": 2006 }, { "epoch": 0.8160195161618216, "grad_norm": 0.0925087034702301, "learning_rate": 0.00011854264197028293, "loss": 1.0193, "step": 2007 }, { "epoch": 0.8164261028664362, "grad_norm": 0.08056949079036713, "learning_rate": 0.0001185019336454305, "loss": 0.8271, "step": 2008 }, { "epoch": 0.816832689571051, "grad_norm": 0.08815829455852509, "learning_rate": 0.00011846122532057806, "loss": 0.9061, "step": 2009 }, { "epoch": 0.8172392762756658, "grad_norm": 0.0914953425526619, "learning_rate": 0.00011842051699572563, "loss": 1.058, "step": 2010 }, { "epoch": 0.8176458629802805, "grad_norm": 0.09589337557554245, "learning_rate": 0.0001183798086708732, "loss": 1.0244, "step": 2011 }, { "epoch": 0.8180524496848953, "grad_norm": 0.09444423019886017, "learning_rate": 0.00011833910034602077, "loss": 1.007, "step": 2012 }, { "epoch": 0.81845903638951, "grad_norm": 0.0973111018538475, "learning_rate": 0.00011829839202116832, "loss": 1.1077, "step": 2013 }, { "epoch": 0.8188656230941248, "grad_norm": 0.09395329654216766, "learning_rate": 0.00011825768369631591, "loss": 0.9592, "step": 2014 }, { "epoch": 0.8192722097987396, "grad_norm": 1295.30712890625, "learning_rate": 0.00011821697537146348, "loss": 0.9503, "step": 2015 }, { "epoch": 0.8196787965033543, "grad_norm": 0.09247137606143951, "learning_rate": 0.00011817626704661105, "loss": 0.9733, "step": 2016 }, { "epoch": 0.8200853832079691, "grad_norm": 0.09796010702848434, "learning_rate": 0.0001181355587217586, "loss": 1.0192, "step": 2017 }, { "epoch": 0.8204919699125839, "grad_norm": 0.11081703752279282, "learning_rate": 0.00011809485039690617, "loss": 1.0732, "step": 2018 }, { "epoch": 0.8208985566171986, "grad_norm": 0.1036204919219017, "learning_rate": 0.00011805414207205374, "loss": 1.0547, "step": 2019 }, { "epoch": 0.8213051433218134, "grad_norm": 0.09200289845466614, "learning_rate": 0.0001180134337472013, "loss": 0.8751, "step": 2020 }, { "epoch": 0.8217117300264282, "grad_norm": 0.09226184338331223, "learning_rate": 0.00011797272542234887, "loss": 0.8352, "step": 2021 }, { "epoch": 0.8221183167310429, "grad_norm": 0.09109731018543243, "learning_rate": 0.00011793201709749644, "loss": 0.8986, "step": 2022 }, { "epoch": 0.8225249034356577, "grad_norm": 0.0922974944114685, "learning_rate": 0.00011789130877264401, "loss": 0.9166, "step": 2023 }, { "epoch": 0.8229314901402724, "grad_norm": 0.09215478599071503, "learning_rate": 0.00011785060044779158, "loss": 0.9764, "step": 2024 }, { "epoch": 0.8233380768448871, "grad_norm": 0.09852897375822067, "learning_rate": 0.00011780989212293913, "loss": 1.0164, "step": 2025 }, { "epoch": 0.8237446635495019, "grad_norm": 0.08374243229627609, "learning_rate": 0.00011776918379808673, "loss": 0.947, "step": 2026 }, { "epoch": 0.8241512502541167, "grad_norm": 0.09260760992765427, "learning_rate": 0.0001177284754732343, "loss": 1.0338, "step": 2027 }, { "epoch": 0.8245578369587314, "grad_norm": 0.09498609602451324, "learning_rate": 0.00011768776714838186, "loss": 1.1077, "step": 2028 }, { "epoch": 0.8249644236633462, "grad_norm": 0.0914779007434845, "learning_rate": 0.00011764705882352942, "loss": 0.9499, "step": 2029 }, { "epoch": 0.825371010367961, "grad_norm": 0.09603306651115417, "learning_rate": 0.00011760635049867698, "loss": 0.9652, "step": 2030 }, { "epoch": 0.8257775970725757, "grad_norm": 0.09358631074428558, "learning_rate": 0.00011756564217382455, "loss": 0.9593, "step": 2031 }, { "epoch": 0.8261841837771905, "grad_norm": 0.08833447843790054, "learning_rate": 0.00011752493384897212, "loss": 0.9582, "step": 2032 }, { "epoch": 0.8265907704818053, "grad_norm": 0.09237752854824066, "learning_rate": 0.00011748422552411969, "loss": 0.9893, "step": 2033 }, { "epoch": 0.82699735718642, "grad_norm": 0.09216301888227463, "learning_rate": 0.00011744351719926725, "loss": 1.0804, "step": 2034 }, { "epoch": 0.8274039438910348, "grad_norm": 0.09359832108020782, "learning_rate": 0.00011740280887441482, "loss": 1.0516, "step": 2035 }, { "epoch": 0.8278105305956496, "grad_norm": 0.08962893486022949, "learning_rate": 0.00011736210054956237, "loss": 0.9592, "step": 2036 }, { "epoch": 0.8282171173002643, "grad_norm": 0.08905650675296783, "learning_rate": 0.00011732139222470997, "loss": 1.0088, "step": 2037 }, { "epoch": 0.8286237040048791, "grad_norm": 0.09745819866657257, "learning_rate": 0.00011728068389985754, "loss": 1.0656, "step": 2038 }, { "epoch": 0.8290302907094937, "grad_norm": 0.09807167202234268, "learning_rate": 0.0001172399755750051, "loss": 1.0324, "step": 2039 }, { "epoch": 0.8294368774141085, "grad_norm": 0.094350166618824, "learning_rate": 0.00011719926725015267, "loss": 0.9412, "step": 2040 }, { "epoch": 0.8298434641187233, "grad_norm": 0.10582345724105835, "learning_rate": 0.00011715855892530023, "loss": 1.0602, "step": 2041 }, { "epoch": 0.830250050823338, "grad_norm": 0.0957132950425148, "learning_rate": 0.0001171178506004478, "loss": 1.0394, "step": 2042 }, { "epoch": 0.8306566375279528, "grad_norm": 0.09163911640644073, "learning_rate": 0.00011707714227559536, "loss": 0.966, "step": 2043 }, { "epoch": 0.8310632242325676, "grad_norm": 0.08863722532987595, "learning_rate": 0.00011703643395074293, "loss": 1.0096, "step": 2044 }, { "epoch": 0.8314698109371823, "grad_norm": 0.0891941711306572, "learning_rate": 0.0001169957256258905, "loss": 0.9694, "step": 2045 }, { "epoch": 0.8318763976417971, "grad_norm": 0.097014419734478, "learning_rate": 0.00011695501730103806, "loss": 1.022, "step": 2046 }, { "epoch": 0.8322829843464119, "grad_norm": 0.09055501222610474, "learning_rate": 0.00011691430897618563, "loss": 0.9859, "step": 2047 }, { "epoch": 0.8326895710510266, "grad_norm": 0.09768117219209671, "learning_rate": 0.00011687360065133319, "loss": 1.1293, "step": 2048 }, { "epoch": 0.8330961577556414, "grad_norm": 0.09261562675237656, "learning_rate": 0.00011683289232648078, "loss": 0.9639, "step": 2049 }, { "epoch": 0.8335027444602562, "grad_norm": 0.09100788086652756, "learning_rate": 0.00011679218400162835, "loss": 0.9564, "step": 2050 }, { "epoch": 0.8339093311648709, "grad_norm": 0.08779970556497574, "learning_rate": 0.00011675147567677592, "loss": 0.9273, "step": 2051 }, { "epoch": 0.8343159178694857, "grad_norm": 0.09557755291461945, "learning_rate": 0.00011671076735192348, "loss": 1.0858, "step": 2052 }, { "epoch": 0.8347225045741005, "grad_norm": 0.09423234313726425, "learning_rate": 0.00011667005902707104, "loss": 0.9901, "step": 2053 }, { "epoch": 0.8351290912787152, "grad_norm": 0.08855794370174408, "learning_rate": 0.0001166293507022186, "loss": 0.9226, "step": 2054 }, { "epoch": 0.83553567798333, "grad_norm": 0.09653773158788681, "learning_rate": 0.00011658864237736617, "loss": 1.0381, "step": 2055 }, { "epoch": 0.8359422646879447, "grad_norm": 0.08749355375766754, "learning_rate": 0.00011654793405251374, "loss": 0.964, "step": 2056 }, { "epoch": 0.8363488513925594, "grad_norm": 0.09068714082241058, "learning_rate": 0.00011650722572766131, "loss": 0.9222, "step": 2057 }, { "epoch": 0.8367554380971742, "grad_norm": 0.09309016168117523, "learning_rate": 0.00011646651740280888, "loss": 0.9706, "step": 2058 }, { "epoch": 0.837162024801789, "grad_norm": 0.08905037492513657, "learning_rate": 0.00011642580907795644, "loss": 0.925, "step": 2059 }, { "epoch": 0.8375686115064037, "grad_norm": 0.09465768933296204, "learning_rate": 0.00011638510075310403, "loss": 0.9834, "step": 2060 }, { "epoch": 0.8379751982110185, "grad_norm": 0.09916462749242783, "learning_rate": 0.00011634439242825159, "loss": 1.1026, "step": 2061 }, { "epoch": 0.8383817849156333, "grad_norm": 0.10114342719316483, "learning_rate": 0.00011630368410339916, "loss": 1.1066, "step": 2062 }, { "epoch": 0.838788371620248, "grad_norm": 0.09570196270942688, "learning_rate": 0.00011626297577854673, "loss": 0.9225, "step": 2063 }, { "epoch": 0.8391949583248628, "grad_norm": 0.09646128118038177, "learning_rate": 0.0001162222674536943, "loss": 0.8873, "step": 2064 }, { "epoch": 0.8396015450294775, "grad_norm": 0.08988897502422333, "learning_rate": 0.00011618155912884185, "loss": 0.8496, "step": 2065 }, { "epoch": 0.8400081317340923, "grad_norm": 0.09540429711341858, "learning_rate": 0.00011614085080398942, "loss": 0.9864, "step": 2066 }, { "epoch": 0.8404147184387071, "grad_norm": 0.09202653169631958, "learning_rate": 0.00011610014247913699, "loss": 0.9331, "step": 2067 }, { "epoch": 0.8408213051433218, "grad_norm": 0.0908489003777504, "learning_rate": 0.00011605943415428455, "loss": 0.9212, "step": 2068 }, { "epoch": 0.8412278918479366, "grad_norm": 0.08785940706729889, "learning_rate": 0.00011601872582943212, "loss": 0.8732, "step": 2069 }, { "epoch": 0.8416344785525514, "grad_norm": 0.09724607318639755, "learning_rate": 0.00011597801750457969, "loss": 1.0057, "step": 2070 }, { "epoch": 0.842041065257166, "grad_norm": 0.09459156543016434, "learning_rate": 0.00011593730917972726, "loss": 1.0179, "step": 2071 }, { "epoch": 0.8424476519617808, "grad_norm": 0.09080464392900467, "learning_rate": 0.00011589660085487484, "loss": 0.9499, "step": 2072 }, { "epoch": 0.8428542386663956, "grad_norm": 0.0882730633020401, "learning_rate": 0.0001158558925300224, "loss": 0.9109, "step": 2073 }, { "epoch": 0.8432608253710103, "grad_norm": 0.08633995056152344, "learning_rate": 0.00011581518420516997, "loss": 1.0101, "step": 2074 }, { "epoch": 0.8436674120756251, "grad_norm": 0.08903708308935165, "learning_rate": 0.00011577447588031754, "loss": 0.9705, "step": 2075 }, { "epoch": 0.8440739987802399, "grad_norm": 0.09651392698287964, "learning_rate": 0.0001157337675554651, "loss": 1.1204, "step": 2076 }, { "epoch": 0.8444805854848546, "grad_norm": 0.08830486238002777, "learning_rate": 0.00011569305923061266, "loss": 0.918, "step": 2077 }, { "epoch": 0.8448871721894694, "grad_norm": 0.09067387878894806, "learning_rate": 0.00011565235090576023, "loss": 0.9139, "step": 2078 }, { "epoch": 0.8452937588940842, "grad_norm": 0.08710314333438873, "learning_rate": 0.0001156116425809078, "loss": 0.8869, "step": 2079 }, { "epoch": 0.8457003455986989, "grad_norm": 0.09208957850933075, "learning_rate": 0.00011557093425605536, "loss": 0.9318, "step": 2080 }, { "epoch": 0.8461069323033137, "grad_norm": 0.09385235607624054, "learning_rate": 0.00011553022593120293, "loss": 1.0103, "step": 2081 }, { "epoch": 0.8465135190079285, "grad_norm": 0.09042852371931076, "learning_rate": 0.0001154895176063505, "loss": 0.9433, "step": 2082 }, { "epoch": 0.8469201057125432, "grad_norm": 0.09457480907440186, "learning_rate": 0.00011544880928149808, "loss": 0.9826, "step": 2083 }, { "epoch": 0.847326692417158, "grad_norm": 0.09332386404275894, "learning_rate": 0.00011540810095664565, "loss": 1.0264, "step": 2084 }, { "epoch": 0.8477332791217728, "grad_norm": 0.09104622900485992, "learning_rate": 0.00011536739263179322, "loss": 0.997, "step": 2085 }, { "epoch": 0.8481398658263875, "grad_norm": 0.08547891676425934, "learning_rate": 0.00011532668430694078, "loss": 0.9314, "step": 2086 }, { "epoch": 0.8485464525310022, "grad_norm": 0.08525467664003372, "learning_rate": 0.00011528597598208835, "loss": 0.9128, "step": 2087 }, { "epoch": 0.848953039235617, "grad_norm": 0.08956707268953323, "learning_rate": 0.0001152452676572359, "loss": 0.9383, "step": 2088 }, { "epoch": 0.8493596259402317, "grad_norm": 0.09025274217128754, "learning_rate": 0.00011520455933238347, "loss": 0.975, "step": 2089 }, { "epoch": 0.8497662126448465, "grad_norm": 0.09149152785539627, "learning_rate": 0.00011516385100753104, "loss": 0.9633, "step": 2090 }, { "epoch": 0.8501727993494613, "grad_norm": 0.0975874587893486, "learning_rate": 0.00011512314268267861, "loss": 1.0283, "step": 2091 }, { "epoch": 0.850579386054076, "grad_norm": 0.09499591588973999, "learning_rate": 0.00011508243435782618, "loss": 1.0041, "step": 2092 }, { "epoch": 0.8509859727586908, "grad_norm": 0.09098786115646362, "learning_rate": 0.00011504172603297374, "loss": 1.0112, "step": 2093 }, { "epoch": 0.8513925594633055, "grad_norm": 0.08904889225959778, "learning_rate": 0.00011500101770812131, "loss": 0.8774, "step": 2094 }, { "epoch": 0.8517991461679203, "grad_norm": 0.08806558698415756, "learning_rate": 0.00011496030938326889, "loss": 0.8791, "step": 2095 }, { "epoch": 0.8522057328725351, "grad_norm": 0.09309332817792892, "learning_rate": 0.00011491960105841646, "loss": 1.0235, "step": 2096 }, { "epoch": 0.8526123195771498, "grad_norm": 0.0923120453953743, "learning_rate": 0.00011487889273356403, "loss": 0.9926, "step": 2097 }, { "epoch": 0.8530189062817646, "grad_norm": 0.09120898693799973, "learning_rate": 0.0001148381844087116, "loss": 1.0387, "step": 2098 }, { "epoch": 0.8534254929863794, "grad_norm": 0.09038707613945007, "learning_rate": 0.00011479747608385916, "loss": 0.9998, "step": 2099 }, { "epoch": 0.8538320796909941, "grad_norm": 0.09675489366054535, "learning_rate": 0.00011475676775900672, "loss": 0.9872, "step": 2100 }, { "epoch": 0.8542386663956089, "grad_norm": 0.09282051771879196, "learning_rate": 0.00011471605943415428, "loss": 0.9191, "step": 2101 }, { "epoch": 0.8546452531002237, "grad_norm": 0.09295305609703064, "learning_rate": 0.00011467535110930185, "loss": 0.9979, "step": 2102 }, { "epoch": 0.8550518398048383, "grad_norm": 0.09186626225709915, "learning_rate": 0.00011463464278444942, "loss": 0.9737, "step": 2103 }, { "epoch": 0.8554584265094531, "grad_norm": 0.09875518828630447, "learning_rate": 0.00011459393445959699, "loss": 0.9602, "step": 2104 }, { "epoch": 0.8558650132140679, "grad_norm": 0.09007591754198074, "learning_rate": 0.00011455322613474456, "loss": 0.9516, "step": 2105 }, { "epoch": 0.8562715999186826, "grad_norm": 0.08967866748571396, "learning_rate": 0.00011451251780989214, "loss": 0.846, "step": 2106 }, { "epoch": 0.8566781866232974, "grad_norm": 0.08921065926551819, "learning_rate": 0.0001144718094850397, "loss": 0.912, "step": 2107 }, { "epoch": 0.8570847733279122, "grad_norm": 0.09793677181005478, "learning_rate": 0.00011443110116018727, "loss": 1.036, "step": 2108 }, { "epoch": 0.8574913600325269, "grad_norm": 0.08594641089439392, "learning_rate": 0.00011439039283533484, "loss": 0.9059, "step": 2109 }, { "epoch": 0.8578979467371417, "grad_norm": 0.09588687121868134, "learning_rate": 0.00011434968451048241, "loss": 1.0412, "step": 2110 }, { "epoch": 0.8583045334417565, "grad_norm": 0.09782074391841888, "learning_rate": 0.00011430897618562997, "loss": 0.9654, "step": 2111 }, { "epoch": 0.8587111201463712, "grad_norm": 0.0923461988568306, "learning_rate": 0.00011426826786077753, "loss": 0.8857, "step": 2112 }, { "epoch": 0.859117706850986, "grad_norm": 0.09952476620674133, "learning_rate": 0.0001142275595359251, "loss": 1.1011, "step": 2113 }, { "epoch": 0.8595242935556008, "grad_norm": 0.09214503318071365, "learning_rate": 0.00011418685121107266, "loss": 1.0602, "step": 2114 }, { "epoch": 0.8599308802602155, "grad_norm": 0.08914364874362946, "learning_rate": 0.00011414614288622023, "loss": 0.9854, "step": 2115 }, { "epoch": 0.8603374669648303, "grad_norm": 0.07836323231458664, "learning_rate": 0.0001141054345613678, "loss": 0.8843, "step": 2116 }, { "epoch": 0.8607440536694451, "grad_norm": 0.09170486778020859, "learning_rate": 0.00011406472623651537, "loss": 1.0271, "step": 2117 }, { "epoch": 0.8611506403740598, "grad_norm": 0.1003408208489418, "learning_rate": 0.00011402401791166295, "loss": 1.0277, "step": 2118 }, { "epoch": 0.8615572270786745, "grad_norm": 0.09307452291250229, "learning_rate": 0.00011398330958681052, "loss": 0.9288, "step": 2119 }, { "epoch": 0.8619638137832892, "grad_norm": 0.0927853137254715, "learning_rate": 0.00011394260126195808, "loss": 0.9514, "step": 2120 }, { "epoch": 0.862370400487904, "grad_norm": 0.08969385176897049, "learning_rate": 0.00011390189293710565, "loss": 0.9732, "step": 2121 }, { "epoch": 0.8627769871925188, "grad_norm": 0.09284186363220215, "learning_rate": 0.00011386118461225322, "loss": 0.9383, "step": 2122 }, { "epoch": 0.8631835738971335, "grad_norm": 0.09814801067113876, "learning_rate": 0.00011382047628740077, "loss": 1.1479, "step": 2123 }, { "epoch": 0.8635901606017483, "grad_norm": 0.09331676363945007, "learning_rate": 0.00011377976796254834, "loss": 1.0516, "step": 2124 }, { "epoch": 0.8639967473063631, "grad_norm": 0.1085168719291687, "learning_rate": 0.00011373905963769591, "loss": 1.0429, "step": 2125 }, { "epoch": 0.8644033340109778, "grad_norm": 0.084463931620121, "learning_rate": 0.00011369835131284348, "loss": 0.8698, "step": 2126 }, { "epoch": 0.8648099207155926, "grad_norm": 0.08983217924833298, "learning_rate": 0.00011365764298799104, "loss": 0.9252, "step": 2127 }, { "epoch": 0.8652165074202074, "grad_norm": 0.09315849840641022, "learning_rate": 0.00011361693466313861, "loss": 0.9502, "step": 2128 }, { "epoch": 0.8656230941248221, "grad_norm": 0.08930740505456924, "learning_rate": 0.00011357622633828619, "loss": 0.9571, "step": 2129 }, { "epoch": 0.8660296808294369, "grad_norm": 0.09786850959062576, "learning_rate": 0.00011353551801343376, "loss": 0.9578, "step": 2130 }, { "epoch": 0.8664362675340517, "grad_norm": 0.09109771996736526, "learning_rate": 0.00011349480968858133, "loss": 1.0178, "step": 2131 }, { "epoch": 0.8668428542386664, "grad_norm": 0.09116113185882568, "learning_rate": 0.0001134541013637289, "loss": 0.9807, "step": 2132 }, { "epoch": 0.8672494409432812, "grad_norm": 0.08628479391336441, "learning_rate": 0.00011341339303887646, "loss": 0.8486, "step": 2133 }, { "epoch": 0.867656027647896, "grad_norm": 0.08679687231779099, "learning_rate": 0.00011337268471402403, "loss": 0.9621, "step": 2134 }, { "epoch": 0.8680626143525106, "grad_norm": 0.08852676302194595, "learning_rate": 0.00011333197638917158, "loss": 1.0283, "step": 2135 }, { "epoch": 0.8684692010571254, "grad_norm": 0.0838993713259697, "learning_rate": 0.00011329126806431915, "loss": 0.9123, "step": 2136 }, { "epoch": 0.8688757877617402, "grad_norm": 0.09657544642686844, "learning_rate": 0.00011325055973946672, "loss": 1.0619, "step": 2137 }, { "epoch": 0.8692823744663549, "grad_norm": 0.0973362997174263, "learning_rate": 0.00011320985141461429, "loss": 1.0232, "step": 2138 }, { "epoch": 0.8696889611709697, "grad_norm": 0.09646733105182648, "learning_rate": 0.00011316914308976185, "loss": 0.972, "step": 2139 }, { "epoch": 0.8700955478755845, "grad_norm": 0.09988803416490555, "learning_rate": 0.00011312843476490942, "loss": 1.0555, "step": 2140 }, { "epoch": 0.8705021345801992, "grad_norm": 0.08326305449008942, "learning_rate": 0.000113087726440057, "loss": 0.8427, "step": 2141 }, { "epoch": 0.870908721284814, "grad_norm": 0.08908620476722717, "learning_rate": 0.00011304701811520457, "loss": 0.9304, "step": 2142 }, { "epoch": 0.8713153079894288, "grad_norm": 0.09493114799261093, "learning_rate": 0.00011300630979035214, "loss": 0.9481, "step": 2143 }, { "epoch": 0.8717218946940435, "grad_norm": 0.09405462443828583, "learning_rate": 0.0001129656014654997, "loss": 0.8995, "step": 2144 }, { "epoch": 0.8721284813986583, "grad_norm": 0.09000107645988464, "learning_rate": 0.00011292489314064727, "loss": 0.9969, "step": 2145 }, { "epoch": 0.872535068103273, "grad_norm": 0.08611016720533371, "learning_rate": 0.00011288418481579484, "loss": 0.9461, "step": 2146 }, { "epoch": 0.8729416548078878, "grad_norm": 0.09909865260124207, "learning_rate": 0.0001128434764909424, "loss": 1.0668, "step": 2147 }, { "epoch": 0.8733482415125026, "grad_norm": 0.09296669065952301, "learning_rate": 0.00011280276816608996, "loss": 1.0196, "step": 2148 }, { "epoch": 0.8737548282171173, "grad_norm": 0.09515411406755447, "learning_rate": 0.00011276205984123753, "loss": 0.9542, "step": 2149 }, { "epoch": 0.874161414921732, "grad_norm": 0.1016170084476471, "learning_rate": 0.0001127213515163851, "loss": 1.0545, "step": 2150 }, { "epoch": 0.8745680016263468, "grad_norm": 0.09408387541770935, "learning_rate": 0.00011268064319153267, "loss": 0.9852, "step": 2151 }, { "epoch": 0.8749745883309615, "grad_norm": 0.09204485267400742, "learning_rate": 0.00011263993486668025, "loss": 0.9893, "step": 2152 }, { "epoch": 0.8753811750355763, "grad_norm": 0.10141453891992569, "learning_rate": 0.00011259922654182782, "loss": 0.9496, "step": 2153 }, { "epoch": 0.8757877617401911, "grad_norm": 0.09088826179504395, "learning_rate": 0.00011255851821697538, "loss": 0.9073, "step": 2154 }, { "epoch": 0.8761943484448058, "grad_norm": 0.09122118353843689, "learning_rate": 0.00011251780989212295, "loss": 0.9927, "step": 2155 }, { "epoch": 0.8766009351494206, "grad_norm": 0.08325305581092834, "learning_rate": 0.00011247710156727052, "loss": 0.8863, "step": 2156 }, { "epoch": 0.8770075218540354, "grad_norm": 0.09161413460969925, "learning_rate": 0.00011243639324241809, "loss": 0.9652, "step": 2157 }, { "epoch": 0.8774141085586501, "grad_norm": 0.08764609694480896, "learning_rate": 0.00011239568491756565, "loss": 0.9851, "step": 2158 }, { "epoch": 0.8778206952632649, "grad_norm": 0.09217865765094757, "learning_rate": 0.00011235497659271321, "loss": 0.9546, "step": 2159 }, { "epoch": 0.8782272819678797, "grad_norm": 0.08746439218521118, "learning_rate": 0.00011231426826786078, "loss": 0.9115, "step": 2160 }, { "epoch": 0.8786338686724944, "grad_norm": 0.09703024476766586, "learning_rate": 0.00011227355994300834, "loss": 1.0464, "step": 2161 }, { "epoch": 0.8790404553771092, "grad_norm": 0.08776511996984482, "learning_rate": 0.00011223285161815591, "loss": 0.9828, "step": 2162 }, { "epoch": 0.879447042081724, "grad_norm": 0.09440065920352936, "learning_rate": 0.00011219214329330348, "loss": 1.0458, "step": 2163 }, { "epoch": 0.8798536287863387, "grad_norm": 0.08808255940675735, "learning_rate": 0.00011215143496845106, "loss": 0.9835, "step": 2164 }, { "epoch": 0.8802602154909535, "grad_norm": 0.09321518242359161, "learning_rate": 0.00011211072664359863, "loss": 0.9592, "step": 2165 }, { "epoch": 0.8806668021955683, "grad_norm": 0.08485117554664612, "learning_rate": 0.0001120700183187462, "loss": 0.8574, "step": 2166 }, { "epoch": 0.8810733889001829, "grad_norm": 0.09101716428995132, "learning_rate": 0.00011202930999389376, "loss": 1.0562, "step": 2167 }, { "epoch": 0.8814799756047977, "grad_norm": 0.0868394672870636, "learning_rate": 0.00011198860166904133, "loss": 0.9244, "step": 2168 }, { "epoch": 0.8818865623094125, "grad_norm": 0.09465855360031128, "learning_rate": 0.0001119478933441889, "loss": 1.0169, "step": 2169 }, { "epoch": 0.8822931490140272, "grad_norm": 0.08937587589025497, "learning_rate": 0.00011190718501933645, "loss": 1.0126, "step": 2170 }, { "epoch": 0.882699735718642, "grad_norm": 0.09273424744606018, "learning_rate": 0.00011186647669448402, "loss": 0.9194, "step": 2171 }, { "epoch": 0.8831063224232567, "grad_norm": 0.09193231910467148, "learning_rate": 0.00011182576836963159, "loss": 1.0218, "step": 2172 }, { "epoch": 0.8835129091278715, "grad_norm": 0.09555093199014664, "learning_rate": 0.00011178506004477915, "loss": 0.9679, "step": 2173 }, { "epoch": 0.8839194958324863, "grad_norm": 0.09123765677213669, "learning_rate": 0.00011174435171992672, "loss": 0.9842, "step": 2174 }, { "epoch": 0.884326082537101, "grad_norm": 0.0927356630563736, "learning_rate": 0.0001117036433950743, "loss": 0.9448, "step": 2175 }, { "epoch": 0.8847326692417158, "grad_norm": 0.09687252342700958, "learning_rate": 0.00011166293507022187, "loss": 1.0458, "step": 2176 }, { "epoch": 0.8851392559463306, "grad_norm": 0.10106469690799713, "learning_rate": 0.00011162222674536944, "loss": 0.9754, "step": 2177 }, { "epoch": 0.8855458426509453, "grad_norm": 0.09762795269489288, "learning_rate": 0.000111581518420517, "loss": 0.922, "step": 2178 }, { "epoch": 0.8859524293555601, "grad_norm": 0.09456496685743332, "learning_rate": 0.00011154081009566457, "loss": 0.9345, "step": 2179 }, { "epoch": 0.8863590160601749, "grad_norm": 0.09217999875545502, "learning_rate": 0.00011150010177081214, "loss": 1.0163, "step": 2180 }, { "epoch": 0.8867656027647896, "grad_norm": 0.0955888032913208, "learning_rate": 0.00011145939344595971, "loss": 0.9464, "step": 2181 }, { "epoch": 0.8871721894694043, "grad_norm": 0.09351805597543716, "learning_rate": 0.00011141868512110726, "loss": 0.9911, "step": 2182 }, { "epoch": 0.8875787761740191, "grad_norm": 0.08360351622104645, "learning_rate": 0.00011137797679625483, "loss": 0.8656, "step": 2183 }, { "epoch": 0.8879853628786338, "grad_norm": 0.09139275550842285, "learning_rate": 0.0001113372684714024, "loss": 0.9629, "step": 2184 }, { "epoch": 0.8883919495832486, "grad_norm": 0.0988682433962822, "learning_rate": 0.00011129656014654997, "loss": 0.9856, "step": 2185 }, { "epoch": 0.8887985362878634, "grad_norm": 0.098371222615242, "learning_rate": 0.00011125585182169753, "loss": 1.0566, "step": 2186 }, { "epoch": 0.8892051229924781, "grad_norm": 0.09045372158288956, "learning_rate": 0.00011121514349684511, "loss": 0.9015, "step": 2187 }, { "epoch": 0.8896117096970929, "grad_norm": 0.09395705908536911, "learning_rate": 0.00011117443517199268, "loss": 1.0059, "step": 2188 }, { "epoch": 0.8900182964017077, "grad_norm": 0.09204548597335815, "learning_rate": 0.00011113372684714025, "loss": 1.0135, "step": 2189 }, { "epoch": 0.8904248831063224, "grad_norm": 0.08476635068655014, "learning_rate": 0.00011109301852228782, "loss": 0.9098, "step": 2190 }, { "epoch": 0.8908314698109372, "grad_norm": 0.09019143879413605, "learning_rate": 0.00011105231019743539, "loss": 0.9087, "step": 2191 }, { "epoch": 0.891238056515552, "grad_norm": 0.0935545563697815, "learning_rate": 0.00011101160187258295, "loss": 1.0204, "step": 2192 }, { "epoch": 0.8916446432201667, "grad_norm": 0.09029703587293625, "learning_rate": 0.00011097089354773052, "loss": 0.9913, "step": 2193 }, { "epoch": 0.8920512299247815, "grad_norm": 0.0886225774884224, "learning_rate": 0.00011093018522287807, "loss": 0.9958, "step": 2194 }, { "epoch": 0.8924578166293963, "grad_norm": 0.09101995080709457, "learning_rate": 0.00011088947689802564, "loss": 0.918, "step": 2195 }, { "epoch": 0.892864403334011, "grad_norm": 0.10184985399246216, "learning_rate": 0.00011084876857317321, "loss": 1.1037, "step": 2196 }, { "epoch": 0.8932709900386258, "grad_norm": 0.09409435093402863, "learning_rate": 0.00011080806024832078, "loss": 0.9088, "step": 2197 }, { "epoch": 0.8936775767432404, "grad_norm": 0.09551674872636795, "learning_rate": 0.00011076735192346836, "loss": 1.0379, "step": 2198 }, { "epoch": 0.8940841634478552, "grad_norm": 0.08619996160268784, "learning_rate": 0.00011072664359861593, "loss": 0.9068, "step": 2199 }, { "epoch": 0.89449075015247, "grad_norm": 0.09373293071985245, "learning_rate": 0.0001106859352737635, "loss": 0.9394, "step": 2200 }, { "epoch": 0.8948973368570847, "grad_norm": 0.09360924363136292, "learning_rate": 0.00011064522694891106, "loss": 0.918, "step": 2201 }, { "epoch": 0.8953039235616995, "grad_norm": 0.08794824033975601, "learning_rate": 0.00011060451862405863, "loss": 0.9127, "step": 2202 }, { "epoch": 0.8957105102663143, "grad_norm": 0.09011366963386536, "learning_rate": 0.0001105638102992062, "loss": 0.9744, "step": 2203 }, { "epoch": 0.896117096970929, "grad_norm": 0.09070491790771484, "learning_rate": 0.00011052310197435376, "loss": 0.9182, "step": 2204 }, { "epoch": 0.8965236836755438, "grad_norm": 0.09090661257505417, "learning_rate": 0.00011048239364950133, "loss": 0.8662, "step": 2205 }, { "epoch": 0.8969302703801586, "grad_norm": 0.1035584807395935, "learning_rate": 0.00011044168532464889, "loss": 1.0132, "step": 2206 }, { "epoch": 0.8973368570847733, "grad_norm": 0.09471878409385681, "learning_rate": 0.00011040097699979645, "loss": 0.9183, "step": 2207 }, { "epoch": 0.8977434437893881, "grad_norm": 0.08386964350938797, "learning_rate": 0.00011036026867494402, "loss": 0.8727, "step": 2208 }, { "epoch": 0.8981500304940029, "grad_norm": 0.09777465462684631, "learning_rate": 0.00011031956035009159, "loss": 1.1244, "step": 2209 }, { "epoch": 0.8985566171986176, "grad_norm": 0.0950189158320427, "learning_rate": 0.00011027885202523917, "loss": 0.9494, "step": 2210 }, { "epoch": 0.8989632039032324, "grad_norm": 0.10297118872404099, "learning_rate": 0.00011023814370038674, "loss": 1.0345, "step": 2211 }, { "epoch": 0.8993697906078472, "grad_norm": 0.10186666250228882, "learning_rate": 0.0001101974353755343, "loss": 1.0064, "step": 2212 }, { "epoch": 0.8997763773124619, "grad_norm": 0.09332112222909927, "learning_rate": 0.00011015672705068187, "loss": 0.9915, "step": 2213 }, { "epoch": 0.9001829640170766, "grad_norm": 0.09262728691101074, "learning_rate": 0.00011011601872582944, "loss": 0.9909, "step": 2214 }, { "epoch": 0.9005895507216914, "grad_norm": 0.08695352077484131, "learning_rate": 0.00011007531040097701, "loss": 0.9143, "step": 2215 }, { "epoch": 0.9009961374263061, "grad_norm": 0.09473065286874771, "learning_rate": 0.00011003460207612458, "loss": 0.9297, "step": 2216 }, { "epoch": 0.9014027241309209, "grad_norm": 0.09609273076057434, "learning_rate": 0.00010999389375127213, "loss": 0.9357, "step": 2217 }, { "epoch": 0.9018093108355357, "grad_norm": 0.09273882955312729, "learning_rate": 0.0001099531854264197, "loss": 0.9215, "step": 2218 }, { "epoch": 0.9022158975401504, "grad_norm": 0.09666993468999863, "learning_rate": 0.00010991247710156727, "loss": 1.0015, "step": 2219 }, { "epoch": 0.9026224842447652, "grad_norm": 0.09521298855543137, "learning_rate": 0.00010987176877671483, "loss": 1.0203, "step": 2220 }, { "epoch": 0.90302907094938, "grad_norm": 0.08719142526388168, "learning_rate": 0.00010983106045186243, "loss": 0.8722, "step": 2221 }, { "epoch": 0.9034356576539947, "grad_norm": 0.09398588538169861, "learning_rate": 0.00010979035212700998, "loss": 1.0722, "step": 2222 }, { "epoch": 0.9038422443586095, "grad_norm": 0.09667246043682098, "learning_rate": 0.00010974964380215755, "loss": 1.0235, "step": 2223 }, { "epoch": 0.9042488310632242, "grad_norm": 0.08866921067237854, "learning_rate": 0.00010970893547730512, "loss": 0.9155, "step": 2224 }, { "epoch": 0.904655417767839, "grad_norm": 0.08643452823162079, "learning_rate": 0.00010966822715245268, "loss": 0.9939, "step": 2225 }, { "epoch": 0.9050620044724538, "grad_norm": 0.09741934388875961, "learning_rate": 0.00010962751882760025, "loss": 1.094, "step": 2226 }, { "epoch": 0.9054685911770685, "grad_norm": 0.09106621891260147, "learning_rate": 0.00010958681050274782, "loss": 0.9378, "step": 2227 }, { "epoch": 0.9058751778816833, "grad_norm": 0.09541244804859161, "learning_rate": 0.00010954610217789539, "loss": 1.0023, "step": 2228 }, { "epoch": 0.906281764586298, "grad_norm": 0.09381993860006332, "learning_rate": 0.00010950539385304294, "loss": 1.0045, "step": 2229 }, { "epoch": 0.9066883512909127, "grad_norm": 0.09603835642337799, "learning_rate": 0.00010946468552819051, "loss": 1.0988, "step": 2230 }, { "epoch": 0.9070949379955275, "grad_norm": 0.10151727497577667, "learning_rate": 0.00010942397720333808, "loss": 1.0537, "step": 2231 }, { "epoch": 0.9075015247001423, "grad_norm": 0.09192585945129395, "learning_rate": 0.00010938326887848564, "loss": 0.9195, "step": 2232 }, { "epoch": 0.907908111404757, "grad_norm": 0.09959591180086136, "learning_rate": 0.00010934256055363324, "loss": 1.0567, "step": 2233 }, { "epoch": 0.9083146981093718, "grad_norm": 0.09753983467817307, "learning_rate": 0.0001093018522287808, "loss": 0.9355, "step": 2234 }, { "epoch": 0.9087212848139866, "grad_norm": 0.10025233775377274, "learning_rate": 0.00010926114390392836, "loss": 0.9571, "step": 2235 }, { "epoch": 0.9091278715186013, "grad_norm": 0.09255032986402512, "learning_rate": 0.00010922043557907593, "loss": 1.0291, "step": 2236 }, { "epoch": 0.9095344582232161, "grad_norm": 0.09453842043876648, "learning_rate": 0.0001091797272542235, "loss": 0.9489, "step": 2237 }, { "epoch": 0.9099410449278309, "grad_norm": 0.09328801184892654, "learning_rate": 0.00010913901892937106, "loss": 1.0596, "step": 2238 }, { "epoch": 0.9103476316324456, "grad_norm": 0.08745749294757843, "learning_rate": 0.00010909831060451863, "loss": 0.846, "step": 2239 }, { "epoch": 0.9107542183370604, "grad_norm": 0.09585551172494888, "learning_rate": 0.0001090576022796662, "loss": 0.8888, "step": 2240 }, { "epoch": 0.9111608050416752, "grad_norm": 0.09437873214483261, "learning_rate": 0.00010901689395481375, "loss": 1.0954, "step": 2241 }, { "epoch": 0.9115673917462899, "grad_norm": 0.09190462529659271, "learning_rate": 0.00010897618562996132, "loss": 0.9484, "step": 2242 }, { "epoch": 0.9119739784509047, "grad_norm": 0.09598547965288162, "learning_rate": 0.00010893547730510889, "loss": 0.9765, "step": 2243 }, { "epoch": 0.9123805651555195, "grad_norm": 0.08472473174333572, "learning_rate": 0.00010889476898025648, "loss": 0.914, "step": 2244 }, { "epoch": 0.9127871518601341, "grad_norm": 0.09113691002130508, "learning_rate": 0.00010885406065540404, "loss": 1.0507, "step": 2245 }, { "epoch": 0.913193738564749, "grad_norm": 0.09340670704841614, "learning_rate": 0.0001088133523305516, "loss": 0.9908, "step": 2246 }, { "epoch": 0.9136003252693637, "grad_norm": 0.09673475474119186, "learning_rate": 0.00010877264400569917, "loss": 0.966, "step": 2247 }, { "epoch": 0.9140069119739784, "grad_norm": 0.09419335424900055, "learning_rate": 0.00010873193568084674, "loss": 0.9484, "step": 2248 }, { "epoch": 0.9144134986785932, "grad_norm": 0.09127677232027054, "learning_rate": 0.00010869122735599431, "loss": 0.9786, "step": 2249 }, { "epoch": 0.9148200853832079, "grad_norm": 0.09134241938591003, "learning_rate": 0.00010865051903114188, "loss": 0.9651, "step": 2250 }, { "epoch": 0.9152266720878227, "grad_norm": 0.08164233714342117, "learning_rate": 0.00010860981070628944, "loss": 0.8301, "step": 2251 }, { "epoch": 0.9156332587924375, "grad_norm": 0.09648903459310532, "learning_rate": 0.00010856910238143701, "loss": 0.9931, "step": 2252 }, { "epoch": 0.9160398454970522, "grad_norm": 0.09599076956510544, "learning_rate": 0.00010852839405658457, "loss": 1.1588, "step": 2253 }, { "epoch": 0.916446432201667, "grad_norm": 0.09624163806438446, "learning_rate": 0.00010848768573173213, "loss": 1.0291, "step": 2254 }, { "epoch": 0.9168530189062818, "grad_norm": 0.09379248321056366, "learning_rate": 0.0001084469774068797, "loss": 1.0189, "step": 2255 }, { "epoch": 0.9172596056108965, "grad_norm": 0.1004246398806572, "learning_rate": 0.0001084062690820273, "loss": 1.0819, "step": 2256 }, { "epoch": 0.9176661923155113, "grad_norm": 0.0896550863981247, "learning_rate": 0.00010836556075717485, "loss": 0.9514, "step": 2257 }, { "epoch": 0.9180727790201261, "grad_norm": 0.08566062897443771, "learning_rate": 0.00010832485243232242, "loss": 0.9827, "step": 2258 }, { "epoch": 0.9184793657247408, "grad_norm": 0.09392201900482178, "learning_rate": 0.00010828414410746998, "loss": 1.0118, "step": 2259 }, { "epoch": 0.9188859524293556, "grad_norm": 0.09124386310577393, "learning_rate": 0.00010824343578261755, "loss": 0.9892, "step": 2260 }, { "epoch": 0.9192925391339704, "grad_norm": 0.10101054608821869, "learning_rate": 0.00010820272745776512, "loss": 1.1112, "step": 2261 }, { "epoch": 0.919699125838585, "grad_norm": 0.0995619148015976, "learning_rate": 0.00010816201913291269, "loss": 0.9978, "step": 2262 }, { "epoch": 0.9201057125431998, "grad_norm": 0.10450758039951324, "learning_rate": 0.00010812131080806025, "loss": 1.0496, "step": 2263 }, { "epoch": 0.9205122992478146, "grad_norm": 0.08600231260061264, "learning_rate": 0.00010808060248320781, "loss": 0.9513, "step": 2264 }, { "epoch": 0.9209188859524293, "grad_norm": 0.09189002215862274, "learning_rate": 0.00010803989415835538, "loss": 0.9342, "step": 2265 }, { "epoch": 0.9213254726570441, "grad_norm": 0.0933215469121933, "learning_rate": 0.00010799918583350294, "loss": 0.9806, "step": 2266 }, { "epoch": 0.9217320593616589, "grad_norm": 0.09535648673772812, "learning_rate": 0.00010795847750865054, "loss": 1.045, "step": 2267 }, { "epoch": 0.9221386460662736, "grad_norm": 0.09350398182868958, "learning_rate": 0.0001079177691837981, "loss": 0.948, "step": 2268 }, { "epoch": 0.9225452327708884, "grad_norm": 0.09485659748315811, "learning_rate": 0.00010787706085894566, "loss": 1.0113, "step": 2269 }, { "epoch": 0.9229518194755032, "grad_norm": 0.08902882784605026, "learning_rate": 0.00010783635253409323, "loss": 0.9287, "step": 2270 }, { "epoch": 0.9233584061801179, "grad_norm": 0.09547727555036545, "learning_rate": 0.0001077956442092408, "loss": 0.9704, "step": 2271 }, { "epoch": 0.9237649928847327, "grad_norm": 0.0938442051410675, "learning_rate": 0.00010775493588438836, "loss": 1.0824, "step": 2272 }, { "epoch": 0.9241715795893475, "grad_norm": 0.09499689936637878, "learning_rate": 0.00010771422755953593, "loss": 1.0162, "step": 2273 }, { "epoch": 0.9245781662939622, "grad_norm": 0.08982361853122711, "learning_rate": 0.0001076735192346835, "loss": 1.0051, "step": 2274 }, { "epoch": 0.924984752998577, "grad_norm": 0.08913452923297882, "learning_rate": 0.00010763281090983107, "loss": 0.9585, "step": 2275 }, { "epoch": 0.9253913397031917, "grad_norm": 0.09322965890169144, "learning_rate": 0.00010759210258497862, "loss": 0.9951, "step": 2276 }, { "epoch": 0.9257979264078064, "grad_norm": 0.08852788060903549, "learning_rate": 0.00010755139426012619, "loss": 0.8826, "step": 2277 }, { "epoch": 0.9262045131124212, "grad_norm": 0.08934798091650009, "learning_rate": 0.00010751068593527376, "loss": 0.9592, "step": 2278 }, { "epoch": 0.9266110998170359, "grad_norm": 0.08754114806652069, "learning_rate": 0.00010746997761042135, "loss": 0.8947, "step": 2279 }, { "epoch": 0.9270176865216507, "grad_norm": 0.08998506516218185, "learning_rate": 0.00010742926928556892, "loss": 0.9905, "step": 2280 }, { "epoch": 0.9274242732262655, "grad_norm": 0.09599866718053818, "learning_rate": 0.00010738856096071647, "loss": 0.9931, "step": 2281 }, { "epoch": 0.9278308599308802, "grad_norm": 0.0930427685379982, "learning_rate": 0.00010734785263586404, "loss": 1.0059, "step": 2282 }, { "epoch": 0.928237446635495, "grad_norm": 0.0885154977440834, "learning_rate": 0.00010730714431101161, "loss": 0.9802, "step": 2283 }, { "epoch": 0.9286440333401098, "grad_norm": 0.0902063325047493, "learning_rate": 0.00010726643598615918, "loss": 0.9687, "step": 2284 }, { "epoch": 0.9290506200447245, "grad_norm": 0.08460281789302826, "learning_rate": 0.00010722572766130674, "loss": 0.8834, "step": 2285 }, { "epoch": 0.9294572067493393, "grad_norm": 0.0936511978507042, "learning_rate": 0.00010718501933645431, "loss": 1.0907, "step": 2286 }, { "epoch": 0.9298637934539541, "grad_norm": 0.09102717787027359, "learning_rate": 0.00010714431101160188, "loss": 0.9573, "step": 2287 }, { "epoch": 0.9302703801585688, "grad_norm": 0.08209431916475296, "learning_rate": 0.00010710360268674943, "loss": 0.79, "step": 2288 }, { "epoch": 0.9306769668631836, "grad_norm": 0.09181005507707596, "learning_rate": 0.000107062894361897, "loss": 1.0394, "step": 2289 }, { "epoch": 0.9310835535677984, "grad_norm": 0.09006737917661667, "learning_rate": 0.0001070221860370446, "loss": 0.976, "step": 2290 }, { "epoch": 0.9314901402724131, "grad_norm": 0.08806903660297394, "learning_rate": 0.00010698147771219216, "loss": 0.9429, "step": 2291 }, { "epoch": 0.9318967269770279, "grad_norm": 0.09663230180740356, "learning_rate": 0.00010694076938733973, "loss": 0.9936, "step": 2292 }, { "epoch": 0.9323033136816427, "grad_norm": 0.09236756712198257, "learning_rate": 0.00010690006106248728, "loss": 0.9775, "step": 2293 }, { "epoch": 0.9327099003862573, "grad_norm": 0.0875551626086235, "learning_rate": 0.00010685935273763485, "loss": 0.9222, "step": 2294 }, { "epoch": 0.9331164870908721, "grad_norm": 0.09144583344459534, "learning_rate": 0.00010681864441278242, "loss": 0.9166, "step": 2295 }, { "epoch": 0.9335230737954869, "grad_norm": 0.09605292975902557, "learning_rate": 0.00010677793608792999, "loss": 1.0085, "step": 2296 }, { "epoch": 0.9339296605001016, "grad_norm": 0.09013127535581589, "learning_rate": 0.00010673722776307755, "loss": 0.9473, "step": 2297 }, { "epoch": 0.9343362472047164, "grad_norm": 0.09012243151664734, "learning_rate": 0.00010669651943822512, "loss": 0.953, "step": 2298 }, { "epoch": 0.9347428339093312, "grad_norm": 0.0961398184299469, "learning_rate": 0.00010665581111337269, "loss": 1.0658, "step": 2299 }, { "epoch": 0.9351494206139459, "grad_norm": 0.09278837591409683, "learning_rate": 0.00010661510278852024, "loss": 0.9739, "step": 2300 }, { "epoch": 0.9355560073185607, "grad_norm": 0.08477824926376343, "learning_rate": 0.00010657439446366781, "loss": 0.9376, "step": 2301 }, { "epoch": 0.9359625940231754, "grad_norm": 0.08817529678344727, "learning_rate": 0.0001065336861388154, "loss": 0.9371, "step": 2302 }, { "epoch": 0.9363691807277902, "grad_norm": 0.09441924840211868, "learning_rate": 0.00010649297781396297, "loss": 0.8977, "step": 2303 }, { "epoch": 0.936775767432405, "grad_norm": 0.09430365264415741, "learning_rate": 0.00010645226948911053, "loss": 1.0525, "step": 2304 }, { "epoch": 0.9371823541370197, "grad_norm": 0.09169165045022964, "learning_rate": 0.0001064115611642581, "loss": 0.9261, "step": 2305 }, { "epoch": 0.9375889408416345, "grad_norm": 0.09943647682666779, "learning_rate": 0.00010637085283940566, "loss": 0.9956, "step": 2306 }, { "epoch": 0.9379955275462493, "grad_norm": 0.0941019132733345, "learning_rate": 0.00010633014451455323, "loss": 1.0029, "step": 2307 }, { "epoch": 0.938402114250864, "grad_norm": 0.08687194436788559, "learning_rate": 0.0001062894361897008, "loss": 0.9077, "step": 2308 }, { "epoch": 0.9388087009554787, "grad_norm": 0.09248825162649155, "learning_rate": 0.00010624872786484837, "loss": 1.0412, "step": 2309 }, { "epoch": 0.9392152876600935, "grad_norm": 0.09985529631376266, "learning_rate": 0.00010620801953999593, "loss": 1.0573, "step": 2310 }, { "epoch": 0.9396218743647082, "grad_norm": 0.09216563403606415, "learning_rate": 0.00010616731121514349, "loss": 0.9448, "step": 2311 }, { "epoch": 0.940028461069323, "grad_norm": 0.092438243329525, "learning_rate": 0.00010612660289029106, "loss": 0.9679, "step": 2312 }, { "epoch": 0.9404350477739378, "grad_norm": 0.0857539102435112, "learning_rate": 0.00010608589456543865, "loss": 0.8766, "step": 2313 }, { "epoch": 0.9408416344785525, "grad_norm": 0.09243746846914291, "learning_rate": 0.00010604518624058622, "loss": 0.9536, "step": 2314 }, { "epoch": 0.9412482211831673, "grad_norm": 0.08617236465215683, "learning_rate": 0.00010600447791573379, "loss": 0.9518, "step": 2315 }, { "epoch": 0.9416548078877821, "grad_norm": 0.08910689502954483, "learning_rate": 0.00010596376959088134, "loss": 0.9602, "step": 2316 }, { "epoch": 0.9420613945923968, "grad_norm": 0.08643607795238495, "learning_rate": 0.00010592306126602891, "loss": 0.8827, "step": 2317 }, { "epoch": 0.9424679812970116, "grad_norm": 0.0912124440073967, "learning_rate": 0.00010588235294117647, "loss": 0.9965, "step": 2318 }, { "epoch": 0.9428745680016264, "grad_norm": 0.09088627249002457, "learning_rate": 0.00010584164461632404, "loss": 0.9025, "step": 2319 }, { "epoch": 0.9432811547062411, "grad_norm": 0.09329286962747574, "learning_rate": 0.00010580093629147161, "loss": 0.9791, "step": 2320 }, { "epoch": 0.9436877414108559, "grad_norm": 0.10339915007352829, "learning_rate": 0.00010576022796661918, "loss": 1.0807, "step": 2321 }, { "epoch": 0.9440943281154707, "grad_norm": 0.09373354911804199, "learning_rate": 0.00010571951964176675, "loss": 0.9911, "step": 2322 }, { "epoch": 0.9445009148200854, "grad_norm": 0.10617939382791519, "learning_rate": 0.0001056788113169143, "loss": 1.0851, "step": 2323 }, { "epoch": 0.9449075015247002, "grad_norm": 0.09167637676000595, "learning_rate": 0.00010563810299206187, "loss": 0.9047, "step": 2324 }, { "epoch": 0.945314088229315, "grad_norm": 0.08472510427236557, "learning_rate": 0.00010559739466720946, "loss": 0.8727, "step": 2325 }, { "epoch": 0.9457206749339296, "grad_norm": 0.0884479507803917, "learning_rate": 0.00010555668634235703, "loss": 0.9784, "step": 2326 }, { "epoch": 0.9461272616385444, "grad_norm": 0.09533506631851196, "learning_rate": 0.0001055159780175046, "loss": 0.9641, "step": 2327 }, { "epoch": 0.9465338483431591, "grad_norm": 0.09487663954496384, "learning_rate": 0.00010547526969265215, "loss": 0.9594, "step": 2328 }, { "epoch": 0.9469404350477739, "grad_norm": 0.09608594328165054, "learning_rate": 0.00010543456136779972, "loss": 0.9552, "step": 2329 }, { "epoch": 0.9473470217523887, "grad_norm": 0.08777690678834915, "learning_rate": 0.00010539385304294729, "loss": 0.944, "step": 2330 }, { "epoch": 0.9477536084570034, "grad_norm": 0.09336721152067184, "learning_rate": 0.00010535314471809485, "loss": 0.9872, "step": 2331 }, { "epoch": 0.9481601951616182, "grad_norm": 0.0932617112994194, "learning_rate": 0.00010531243639324242, "loss": 1.0259, "step": 2332 }, { "epoch": 0.948566781866233, "grad_norm": 0.09936727583408356, "learning_rate": 0.00010527172806838999, "loss": 1.0559, "step": 2333 }, { "epoch": 0.9489733685708477, "grad_norm": 0.08607706427574158, "learning_rate": 0.00010523101974353756, "loss": 0.8735, "step": 2334 }, { "epoch": 0.9493799552754625, "grad_norm": 0.10083240270614624, "learning_rate": 0.00010519031141868511, "loss": 1.1199, "step": 2335 }, { "epoch": 0.9497865419800773, "grad_norm": 0.09380745142698288, "learning_rate": 0.0001051496030938327, "loss": 0.9708, "step": 2336 }, { "epoch": 0.950193128684692, "grad_norm": 0.09522271901369095, "learning_rate": 0.00010510889476898027, "loss": 0.9576, "step": 2337 }, { "epoch": 0.9505997153893068, "grad_norm": 0.08754262328147888, "learning_rate": 0.00010506818644412784, "loss": 0.8834, "step": 2338 }, { "epoch": 0.9510063020939216, "grad_norm": 0.09373676776885986, "learning_rate": 0.00010502747811927541, "loss": 1.0229, "step": 2339 }, { "epoch": 0.9514128887985362, "grad_norm": 0.09756851196289062, "learning_rate": 0.00010498676979442296, "loss": 1.0262, "step": 2340 }, { "epoch": 0.951819475503151, "grad_norm": 0.09419600665569305, "learning_rate": 0.00010494606146957053, "loss": 1.0049, "step": 2341 }, { "epoch": 0.9522260622077658, "grad_norm": 0.08849748224020004, "learning_rate": 0.0001049053531447181, "loss": 1.0045, "step": 2342 }, { "epoch": 0.9526326489123805, "grad_norm": 0.09651193022727966, "learning_rate": 0.00010486464481986567, "loss": 1.0209, "step": 2343 }, { "epoch": 0.9530392356169953, "grad_norm": 0.09986065328121185, "learning_rate": 0.00010482393649501323, "loss": 1.0789, "step": 2344 }, { "epoch": 0.9534458223216101, "grad_norm": 0.0957985445857048, "learning_rate": 0.0001047832281701608, "loss": 1.106, "step": 2345 }, { "epoch": 0.9538524090262248, "grad_norm": 0.1007857397198677, "learning_rate": 0.00010474251984530837, "loss": 1.027, "step": 2346 }, { "epoch": 0.9542589957308396, "grad_norm": 0.09330718219280243, "learning_rate": 0.00010470181152045592, "loss": 1.0046, "step": 2347 }, { "epoch": 0.9546655824354544, "grad_norm": 0.09503220021724701, "learning_rate": 0.00010466110319560352, "loss": 1.0119, "step": 2348 }, { "epoch": 0.9550721691400691, "grad_norm": 0.09526234120130539, "learning_rate": 0.00010462039487075109, "loss": 0.9898, "step": 2349 }, { "epoch": 0.9554787558446839, "grad_norm": 0.0942670926451683, "learning_rate": 0.00010457968654589865, "loss": 1.0538, "step": 2350 }, { "epoch": 0.9558853425492987, "grad_norm": 0.09694371372461319, "learning_rate": 0.00010453897822104621, "loss": 0.9101, "step": 2351 }, { "epoch": 0.9562919292539134, "grad_norm": 0.09850834310054779, "learning_rate": 0.00010449826989619377, "loss": 1.0476, "step": 2352 }, { "epoch": 0.9566985159585282, "grad_norm": 0.09078159183263779, "learning_rate": 0.00010445756157134134, "loss": 0.8798, "step": 2353 }, { "epoch": 0.957105102663143, "grad_norm": 0.09196247905492783, "learning_rate": 0.00010441685324648891, "loss": 0.9571, "step": 2354 }, { "epoch": 0.9575116893677577, "grad_norm": 0.09725657850503922, "learning_rate": 0.00010437614492163648, "loss": 1.0229, "step": 2355 }, { "epoch": 0.9579182760723725, "grad_norm": 0.09602061659097672, "learning_rate": 0.00010433543659678404, "loss": 0.9666, "step": 2356 }, { "epoch": 0.9583248627769871, "grad_norm": 0.09440819919109344, "learning_rate": 0.00010429472827193161, "loss": 1.0165, "step": 2357 }, { "epoch": 0.9587314494816019, "grad_norm": 0.09775765985250473, "learning_rate": 0.00010425401994707917, "loss": 1.0927, "step": 2358 }, { "epoch": 0.9591380361862167, "grad_norm": 0.10038933902978897, "learning_rate": 0.00010421331162222676, "loss": 1.1155, "step": 2359 }, { "epoch": 0.9595446228908314, "grad_norm": 0.09265521913766861, "learning_rate": 0.00010417260329737433, "loss": 0.9965, "step": 2360 }, { "epoch": 0.9599512095954462, "grad_norm": 0.09679180383682251, "learning_rate": 0.0001041318949725219, "loss": 0.9484, "step": 2361 }, { "epoch": 0.960357796300061, "grad_norm": 0.09756863862276077, "learning_rate": 0.00010409118664766946, "loss": 0.9929, "step": 2362 }, { "epoch": 0.9607643830046757, "grad_norm": 0.09271581470966339, "learning_rate": 0.00010405047832281702, "loss": 0.9717, "step": 2363 }, { "epoch": 0.9611709697092905, "grad_norm": 0.08519497513771057, "learning_rate": 0.00010400976999796459, "loss": 0.9248, "step": 2364 }, { "epoch": 0.9615775564139053, "grad_norm": 0.0930318683385849, "learning_rate": 0.00010396906167311215, "loss": 0.9269, "step": 2365 }, { "epoch": 0.96198414311852, "grad_norm": 0.0876484215259552, "learning_rate": 0.00010392835334825972, "loss": 0.8956, "step": 2366 }, { "epoch": 0.9623907298231348, "grad_norm": 0.10773497074842453, "learning_rate": 0.00010388764502340729, "loss": 1.0162, "step": 2367 }, { "epoch": 0.9627973165277496, "grad_norm": 0.10369701683521271, "learning_rate": 0.00010384693669855486, "loss": 1.0242, "step": 2368 }, { "epoch": 0.9632039032323643, "grad_norm": 0.09781001508235931, "learning_rate": 0.00010380622837370242, "loss": 0.9984, "step": 2369 }, { "epoch": 0.9636104899369791, "grad_norm": 0.09027720987796783, "learning_rate": 0.00010376552004884998, "loss": 0.9459, "step": 2370 }, { "epoch": 0.9640170766415939, "grad_norm": 0.0846111848950386, "learning_rate": 0.00010372481172399757, "loss": 0.8168, "step": 2371 }, { "epoch": 0.9644236633462085, "grad_norm": 0.09253893047571182, "learning_rate": 0.00010368410339914514, "loss": 1.036, "step": 2372 }, { "epoch": 0.9648302500508233, "grad_norm": 0.09075961261987686, "learning_rate": 0.00010364339507429271, "loss": 0.9765, "step": 2373 }, { "epoch": 0.9652368367554381, "grad_norm": 0.09227050840854645, "learning_rate": 0.00010360268674944028, "loss": 0.9577, "step": 2374 }, { "epoch": 0.9656434234600528, "grad_norm": 0.09381213039159775, "learning_rate": 0.00010356197842458783, "loss": 1.041, "step": 2375 }, { "epoch": 0.9660500101646676, "grad_norm": 0.08584290742874146, "learning_rate": 0.0001035212700997354, "loss": 0.7906, "step": 2376 }, { "epoch": 0.9664565968692824, "grad_norm": 0.09522596746683121, "learning_rate": 0.00010348056177488297, "loss": 0.9739, "step": 2377 }, { "epoch": 0.9668631835738971, "grad_norm": 0.09105250984430313, "learning_rate": 0.00010343985345003053, "loss": 0.943, "step": 2378 }, { "epoch": 0.9672697702785119, "grad_norm": 0.09327445179224014, "learning_rate": 0.0001033991451251781, "loss": 1.0486, "step": 2379 }, { "epoch": 0.9676763569831267, "grad_norm": 0.08443416655063629, "learning_rate": 0.00010335843680032567, "loss": 0.8889, "step": 2380 }, { "epoch": 0.9680829436877414, "grad_norm": 0.09366993606090546, "learning_rate": 0.00010331772847547324, "loss": 0.9585, "step": 2381 }, { "epoch": 0.9684895303923562, "grad_norm": 0.1025518849492073, "learning_rate": 0.00010327702015062082, "loss": 0.9062, "step": 2382 }, { "epoch": 0.9688961170969709, "grad_norm": 0.08948516100645065, "learning_rate": 0.00010323631182576838, "loss": 0.9477, "step": 2383 }, { "epoch": 0.9693027038015857, "grad_norm": 0.09162997454404831, "learning_rate": 0.00010319560350091595, "loss": 0.9069, "step": 2384 }, { "epoch": 0.9697092905062005, "grad_norm": 0.09584391862154007, "learning_rate": 0.00010315489517606352, "loss": 0.9816, "step": 2385 }, { "epoch": 0.9701158772108152, "grad_norm": 0.08747036010026932, "learning_rate": 0.00010311418685121109, "loss": 0.9845, "step": 2386 }, { "epoch": 0.97052246391543, "grad_norm": 0.09000515937805176, "learning_rate": 0.00010307347852635864, "loss": 0.8898, "step": 2387 }, { "epoch": 0.9709290506200448, "grad_norm": 0.0957585796713829, "learning_rate": 0.00010303277020150621, "loss": 1.0053, "step": 2388 }, { "epoch": 0.9713356373246594, "grad_norm": 0.0985213965177536, "learning_rate": 0.00010299206187665378, "loss": 1.0988, "step": 2389 }, { "epoch": 0.9717422240292742, "grad_norm": 0.09285228699445724, "learning_rate": 0.00010295135355180134, "loss": 0.957, "step": 2390 }, { "epoch": 0.972148810733889, "grad_norm": 0.08875738829374313, "learning_rate": 0.00010291064522694891, "loss": 0.9324, "step": 2391 }, { "epoch": 0.9725553974385037, "grad_norm": 0.09840039908885956, "learning_rate": 0.00010286993690209648, "loss": 0.9047, "step": 2392 }, { "epoch": 0.9729619841431185, "grad_norm": 0.09745080024003983, "learning_rate": 0.00010282922857724405, "loss": 1.0707, "step": 2393 }, { "epoch": 0.9733685708477333, "grad_norm": 0.09076414257287979, "learning_rate": 0.00010278852025239163, "loss": 0.947, "step": 2394 }, { "epoch": 0.973775157552348, "grad_norm": 0.08922093361616135, "learning_rate": 0.0001027478119275392, "loss": 0.8983, "step": 2395 }, { "epoch": 0.9741817442569628, "grad_norm": 0.09455031156539917, "learning_rate": 0.00010270710360268676, "loss": 1.0877, "step": 2396 }, { "epoch": 0.9745883309615776, "grad_norm": 0.09286132454872131, "learning_rate": 0.00010266639527783433, "loss": 0.98, "step": 2397 }, { "epoch": 0.9749949176661923, "grad_norm": 0.10121460258960724, "learning_rate": 0.00010262568695298189, "loss": 1.0906, "step": 2398 }, { "epoch": 0.9754015043708071, "grad_norm": 0.0891910120844841, "learning_rate": 0.00010258497862812945, "loss": 0.8889, "step": 2399 }, { "epoch": 0.9758080910754219, "grad_norm": 0.0938873440027237, "learning_rate": 0.00010254427030327702, "loss": 0.8787, "step": 2400 }, { "epoch": 0.9762146777800366, "grad_norm": 0.09117105603218079, "learning_rate": 0.00010250356197842459, "loss": 0.9053, "step": 2401 }, { "epoch": 0.9766212644846514, "grad_norm": 0.09840644896030426, "learning_rate": 0.00010246285365357216, "loss": 1.0462, "step": 2402 }, { "epoch": 0.9770278511892662, "grad_norm": 0.09379451721906662, "learning_rate": 0.00010242214532871972, "loss": 0.9617, "step": 2403 }, { "epoch": 0.9774344378938808, "grad_norm": 0.09142056852579117, "learning_rate": 0.00010238143700386729, "loss": 1.0022, "step": 2404 }, { "epoch": 0.9778410245984956, "grad_norm": 0.09325367957353592, "learning_rate": 0.00010234072867901487, "loss": 0.9356, "step": 2405 }, { "epoch": 0.9782476113031104, "grad_norm": 0.09714538604021072, "learning_rate": 0.00010230002035416244, "loss": 1.0685, "step": 2406 }, { "epoch": 0.9786541980077251, "grad_norm": 0.09502388536930084, "learning_rate": 0.00010225931202931001, "loss": 1.0158, "step": 2407 }, { "epoch": 0.9790607847123399, "grad_norm": 0.09626177698373795, "learning_rate": 0.00010221860370445758, "loss": 1.0249, "step": 2408 }, { "epoch": 0.9794673714169546, "grad_norm": 0.09790710359811783, "learning_rate": 0.00010217789537960514, "loss": 0.9974, "step": 2409 }, { "epoch": 0.9798739581215694, "grad_norm": 0.0907469391822815, "learning_rate": 0.0001021371870547527, "loss": 0.994, "step": 2410 }, { "epoch": 0.9802805448261842, "grad_norm": 0.10248905420303345, "learning_rate": 0.00010209647872990026, "loss": 1.0214, "step": 2411 }, { "epoch": 0.9806871315307989, "grad_norm": 0.09504317492246628, "learning_rate": 0.00010205577040504783, "loss": 1.0642, "step": 2412 }, { "epoch": 0.9810937182354137, "grad_norm": 0.09868543595075607, "learning_rate": 0.0001020150620801954, "loss": 1.0595, "step": 2413 }, { "epoch": 0.9815003049400285, "grad_norm": 0.08648547530174255, "learning_rate": 0.00010197435375534297, "loss": 0.9273, "step": 2414 }, { "epoch": 0.9819068916446432, "grad_norm": 0.0870203897356987, "learning_rate": 0.00010193364543049054, "loss": 0.8661, "step": 2415 }, { "epoch": 0.982313478349258, "grad_norm": 0.09689280390739441, "learning_rate": 0.0001018929371056381, "loss": 1.0179, "step": 2416 }, { "epoch": 0.9827200650538728, "grad_norm": 0.09497373551130295, "learning_rate": 0.00010185222878078568, "loss": 0.9292, "step": 2417 }, { "epoch": 0.9831266517584875, "grad_norm": 0.09194166213274002, "learning_rate": 0.00010181152045593325, "loss": 0.969, "step": 2418 }, { "epoch": 0.9835332384631023, "grad_norm": 0.08828569948673248, "learning_rate": 0.00010177081213108082, "loss": 0.8936, "step": 2419 }, { "epoch": 0.983939825167717, "grad_norm": 0.095185786485672, "learning_rate": 0.00010173010380622839, "loss": 0.9859, "step": 2420 }, { "epoch": 0.9843464118723317, "grad_norm": 0.09699594974517822, "learning_rate": 0.00010168939548137595, "loss": 1.0568, "step": 2421 }, { "epoch": 0.9847529985769465, "grad_norm": 0.09333425760269165, "learning_rate": 0.00010164868715652351, "loss": 0.9503, "step": 2422 }, { "epoch": 0.9851595852815613, "grad_norm": 0.0883539542555809, "learning_rate": 0.00010160797883167108, "loss": 0.9711, "step": 2423 }, { "epoch": 0.985566171986176, "grad_norm": 0.09544458985328674, "learning_rate": 0.00010156727050681864, "loss": 0.8668, "step": 2424 }, { "epoch": 0.9859727586907908, "grad_norm": 0.0979728177189827, "learning_rate": 0.00010152656218196621, "loss": 1.0685, "step": 2425 }, { "epoch": 0.9863793453954056, "grad_norm": 0.08907411992549896, "learning_rate": 0.00010148585385711378, "loss": 0.8947, "step": 2426 }, { "epoch": 0.9867859321000203, "grad_norm": 0.09532100707292557, "learning_rate": 0.00010144514553226135, "loss": 1.0793, "step": 2427 }, { "epoch": 0.9871925188046351, "grad_norm": 0.0916009321808815, "learning_rate": 0.00010140443720740893, "loss": 0.9604, "step": 2428 }, { "epoch": 0.9875991055092499, "grad_norm": 0.0960593968629837, "learning_rate": 0.0001013637288825565, "loss": 1.0012, "step": 2429 }, { "epoch": 0.9880056922138646, "grad_norm": 0.0948946550488472, "learning_rate": 0.00010132302055770406, "loss": 0.9555, "step": 2430 }, { "epoch": 0.9884122789184794, "grad_norm": 0.08670156449079514, "learning_rate": 0.00010128231223285163, "loss": 0.8863, "step": 2431 }, { "epoch": 0.9888188656230942, "grad_norm": 0.0870981365442276, "learning_rate": 0.0001012416039079992, "loss": 0.949, "step": 2432 }, { "epoch": 0.9892254523277089, "grad_norm": 0.09065506607294083, "learning_rate": 0.00010120089558314677, "loss": 1.0791, "step": 2433 }, { "epoch": 0.9896320390323237, "grad_norm": 0.08753534406423569, "learning_rate": 0.00010116018725829432, "loss": 0.8656, "step": 2434 }, { "epoch": 0.9900386257369383, "grad_norm": 0.08939878642559052, "learning_rate": 0.00010111947893344189, "loss": 0.8983, "step": 2435 }, { "epoch": 0.9904452124415531, "grad_norm": 0.09110575914382935, "learning_rate": 0.00010107877060858946, "loss": 0.8971, "step": 2436 }, { "epoch": 0.9908517991461679, "grad_norm": 0.08614566922187805, "learning_rate": 0.00010103806228373702, "loss": 0.9746, "step": 2437 }, { "epoch": 0.9912583858507826, "grad_norm": 0.09685923904180527, "learning_rate": 0.00010099735395888459, "loss": 0.9638, "step": 2438 }, { "epoch": 0.9916649725553974, "grad_norm": 0.10014784336090088, "learning_rate": 0.00010095664563403216, "loss": 1.0335, "step": 2439 }, { "epoch": 0.9920715592600122, "grad_norm": 0.09917939454317093, "learning_rate": 0.00010091593730917974, "loss": 1.0288, "step": 2440 }, { "epoch": 0.9924781459646269, "grad_norm": 0.09158805757761002, "learning_rate": 0.00010087522898432731, "loss": 0.9372, "step": 2441 }, { "epoch": 0.9928847326692417, "grad_norm": 0.09151756763458252, "learning_rate": 0.00010083452065947488, "loss": 1.0042, "step": 2442 }, { "epoch": 0.9932913193738565, "grad_norm": 0.09201864898204803, "learning_rate": 0.00010079381233462244, "loss": 0.937, "step": 2443 }, { "epoch": 0.9936979060784712, "grad_norm": 0.10031972825527191, "learning_rate": 0.00010075310400977001, "loss": 0.989, "step": 2444 }, { "epoch": 0.994104492783086, "grad_norm": 0.09593512862920761, "learning_rate": 0.00010071239568491756, "loss": 0.9259, "step": 2445 }, { "epoch": 0.9945110794877008, "grad_norm": 0.10088519006967545, "learning_rate": 0.00010067168736006513, "loss": 1.0888, "step": 2446 }, { "epoch": 0.9949176661923155, "grad_norm": 0.09052947163581848, "learning_rate": 0.0001006309790352127, "loss": 0.9643, "step": 2447 }, { "epoch": 0.9953242528969303, "grad_norm": 0.0943833664059639, "learning_rate": 0.00010059027071036027, "loss": 1.0308, "step": 2448 }, { "epoch": 0.9957308396015451, "grad_norm": 0.0929458737373352, "learning_rate": 0.00010054956238550783, "loss": 0.8993, "step": 2449 }, { "epoch": 0.9961374263061598, "grad_norm": 0.09643827378749847, "learning_rate": 0.0001005088540606554, "loss": 0.9708, "step": 2450 }, { "epoch": 0.9965440130107746, "grad_norm": 0.08925779908895493, "learning_rate": 0.00010046814573580298, "loss": 0.9209, "step": 2451 }, { "epoch": 0.9969505997153894, "grad_norm": 0.08630047738552094, "learning_rate": 0.00010042743741095055, "loss": 0.9324, "step": 2452 }, { "epoch": 0.997357186420004, "grad_norm": 0.10127938538789749, "learning_rate": 0.00010038672908609812, "loss": 0.9926, "step": 2453 }, { "epoch": 0.9977637731246188, "grad_norm": 0.09573110938072205, "learning_rate": 0.00010034602076124569, "loss": 0.9801, "step": 2454 }, { "epoch": 0.9981703598292336, "grad_norm": 0.0963260605931282, "learning_rate": 0.00010030531243639325, "loss": 0.98, "step": 2455 }, { "epoch": 0.9985769465338483, "grad_norm": 0.08414101600646973, "learning_rate": 0.00010026460411154082, "loss": 0.8676, "step": 2456 }, { "epoch": 0.9989835332384631, "grad_norm": 0.09320447593927383, "learning_rate": 0.00010022389578668838, "loss": 0.998, "step": 2457 }, { "epoch": 0.9993901199430779, "grad_norm": 0.09721797704696655, "learning_rate": 0.00010018318746183594, "loss": 1.0123, "step": 2458 }, { "epoch": 0.9997967066476926, "grad_norm": 0.08773447573184967, "learning_rate": 0.00010014247913698351, "loss": 0.9673, "step": 2459 }, { "epoch": 1.0, "grad_norm": 0.15718789398670197, "learning_rate": 0.00010010177081213108, "loss": 1.1286, "step": 2460 }, { "epoch": 1.0004065867046148, "grad_norm": 0.09029074758291245, "learning_rate": 0.00010006106248727865, "loss": 0.9905, "step": 2461 }, { "epoch": 1.0008131734092296, "grad_norm": 0.09984813630580902, "learning_rate": 0.00010002035416242621, "loss": 0.9981, "step": 2462 }, { "epoch": 1.0012197601138442, "grad_norm": 0.09808840602636337, "learning_rate": 9.997964583757378e-05, "loss": 1.0156, "step": 2463 }, { "epoch": 1.001626346818459, "grad_norm": 0.08917602896690369, "learning_rate": 9.993893751272135e-05, "loss": 0.944, "step": 2464 }, { "epoch": 1.0020329335230738, "grad_norm": 0.0943906158208847, "learning_rate": 9.989822918786892e-05, "loss": 0.9294, "step": 2465 }, { "epoch": 1.0024395202276886, "grad_norm": 0.09091315418481827, "learning_rate": 9.98575208630165e-05, "loss": 0.9707, "step": 2466 }, { "epoch": 1.0028461069323034, "grad_norm": 0.09035106003284454, "learning_rate": 9.981681253816407e-05, "loss": 0.9562, "step": 2467 }, { "epoch": 1.0032526936369182, "grad_norm": 0.09709779173135757, "learning_rate": 9.977610421331163e-05, "loss": 0.9287, "step": 2468 }, { "epoch": 1.0036592803415327, "grad_norm": 0.09063035994768143, "learning_rate": 9.973539588845919e-05, "loss": 0.9138, "step": 2469 }, { "epoch": 1.0040658670461475, "grad_norm": 0.09490003436803818, "learning_rate": 9.969468756360676e-05, "loss": 0.9475, "step": 2470 }, { "epoch": 1.0044724537507623, "grad_norm": 0.10134010761976242, "learning_rate": 9.965397923875432e-05, "loss": 1.0092, "step": 2471 }, { "epoch": 1.0048790404553771, "grad_norm": 0.09728873521089554, "learning_rate": 9.96132709139019e-05, "loss": 0.9498, "step": 2472 }, { "epoch": 1.005285627159992, "grad_norm": 0.09160648286342621, "learning_rate": 9.957256258904947e-05, "loss": 0.8707, "step": 2473 }, { "epoch": 1.0056922138646067, "grad_norm": 0.0939764603972435, "learning_rate": 9.953185426419704e-05, "loss": 0.9619, "step": 2474 }, { "epoch": 1.0060988005692213, "grad_norm": 0.08643637597560883, "learning_rate": 9.94911459393446e-05, "loss": 0.9377, "step": 2475 }, { "epoch": 1.006505387273836, "grad_norm": 0.09141729027032852, "learning_rate": 9.945043761449216e-05, "loss": 0.8859, "step": 2476 }, { "epoch": 1.006911973978451, "grad_norm": 0.09555509686470032, "learning_rate": 9.940972928963974e-05, "loss": 0.933, "step": 2477 }, { "epoch": 1.0073185606830657, "grad_norm": 0.0935022309422493, "learning_rate": 9.936902096478731e-05, "loss": 0.9368, "step": 2478 }, { "epoch": 1.0077251473876805, "grad_norm": 0.09959034621715546, "learning_rate": 9.932831263993488e-05, "loss": 0.974, "step": 2479 }, { "epoch": 1.0081317340922953, "grad_norm": 0.09246455878019333, "learning_rate": 9.928760431508245e-05, "loss": 0.9248, "step": 2480 }, { "epoch": 1.0085383207969099, "grad_norm": 0.10091500729322433, "learning_rate": 9.924689599023e-05, "loss": 1.122, "step": 2481 }, { "epoch": 1.0089449075015247, "grad_norm": 0.10083048790693283, "learning_rate": 9.920618766537757e-05, "loss": 1.0199, "step": 2482 }, { "epoch": 1.0093514942061395, "grad_norm": 0.09641805291175842, "learning_rate": 9.916547934052515e-05, "loss": 0.9971, "step": 2483 }, { "epoch": 1.0097580809107543, "grad_norm": 0.10362432897090912, "learning_rate": 9.912477101567272e-05, "loss": 0.9596, "step": 2484 }, { "epoch": 1.010164667615369, "grad_norm": 0.09050238877534866, "learning_rate": 9.908406269082028e-05, "loss": 0.9423, "step": 2485 }, { "epoch": 1.0105712543199838, "grad_norm": 0.10209590941667557, "learning_rate": 9.904335436596785e-05, "loss": 0.9366, "step": 2486 }, { "epoch": 1.0109778410245984, "grad_norm": 0.104631707072258, "learning_rate": 9.90026460411154e-05, "loss": 1.0476, "step": 2487 }, { "epoch": 1.0113844277292132, "grad_norm": 0.09572993963956833, "learning_rate": 9.896193771626297e-05, "loss": 1.0523, "step": 2488 }, { "epoch": 1.011791014433828, "grad_norm": 0.10640837252140045, "learning_rate": 9.892122939141055e-05, "loss": 1.1238, "step": 2489 }, { "epoch": 1.0121976011384428, "grad_norm": 0.09798834472894669, "learning_rate": 9.888052106655812e-05, "loss": 0.9597, "step": 2490 }, { "epoch": 1.0126041878430576, "grad_norm": 0.08913593739271164, "learning_rate": 9.883981274170569e-05, "loss": 0.9258, "step": 2491 }, { "epoch": 1.0130107745476722, "grad_norm": 0.09719277173280716, "learning_rate": 9.879910441685324e-05, "loss": 0.9812, "step": 2492 }, { "epoch": 1.013417361252287, "grad_norm": 0.09699688851833344, "learning_rate": 9.875839609200081e-05, "loss": 0.8946, "step": 2493 }, { "epoch": 1.0138239479569018, "grad_norm": 0.09061427414417267, "learning_rate": 9.871768776714838e-05, "loss": 0.9075, "step": 2494 }, { "epoch": 1.0142305346615166, "grad_norm": 0.08979996293783188, "learning_rate": 9.867697944229596e-05, "loss": 0.933, "step": 2495 }, { "epoch": 1.0146371213661314, "grad_norm": 0.09325064718723297, "learning_rate": 9.863627111744353e-05, "loss": 0.9604, "step": 2496 }, { "epoch": 1.0150437080707462, "grad_norm": 0.09821408241987228, "learning_rate": 9.85955627925911e-05, "loss": 1.0871, "step": 2497 }, { "epoch": 1.0154502947753608, "grad_norm": 0.09746625274419785, "learning_rate": 9.855485446773865e-05, "loss": 0.9304, "step": 2498 }, { "epoch": 1.0158568814799755, "grad_norm": 0.09508597105741501, "learning_rate": 9.851414614288622e-05, "loss": 0.9469, "step": 2499 }, { "epoch": 1.0162634681845903, "grad_norm": 0.10357919335365295, "learning_rate": 9.84734378180338e-05, "loss": 1.0272, "step": 2500 }, { "epoch": 1.0166700548892051, "grad_norm": 0.09326835721731186, "learning_rate": 9.843272949318137e-05, "loss": 0.8754, "step": 2501 }, { "epoch": 1.01707664159382, "grad_norm": 0.0892389789223671, "learning_rate": 9.839202116832893e-05, "loss": 0.831, "step": 2502 }, { "epoch": 1.0174832282984347, "grad_norm": 0.09790865331888199, "learning_rate": 9.83513128434765e-05, "loss": 1.0179, "step": 2503 }, { "epoch": 1.0178898150030493, "grad_norm": 0.09933339804410934, "learning_rate": 9.831060451862405e-05, "loss": 1.012, "step": 2504 }, { "epoch": 1.0182964017076641, "grad_norm": 0.09628647565841675, "learning_rate": 9.826989619377162e-05, "loss": 0.9739, "step": 2505 }, { "epoch": 1.018702988412279, "grad_norm": 0.09639148414134979, "learning_rate": 9.82291878689192e-05, "loss": 0.9626, "step": 2506 }, { "epoch": 1.0191095751168937, "grad_norm": 0.10145976394414902, "learning_rate": 9.818847954406677e-05, "loss": 0.9734, "step": 2507 }, { "epoch": 1.0195161618215085, "grad_norm": 0.09076192229986191, "learning_rate": 9.814777121921434e-05, "loss": 0.898, "step": 2508 }, { "epoch": 1.0199227485261233, "grad_norm": 0.09159097820520401, "learning_rate": 9.81070628943619e-05, "loss": 0.9218, "step": 2509 }, { "epoch": 1.0203293352307379, "grad_norm": 0.08706653863191605, "learning_rate": 9.806635456950946e-05, "loss": 0.8249, "step": 2510 }, { "epoch": 1.0207359219353527, "grad_norm": 0.10595209151506424, "learning_rate": 9.802564624465703e-05, "loss": 1.0499, "step": 2511 }, { "epoch": 1.0211425086399675, "grad_norm": 0.08821277320384979, "learning_rate": 9.798493791980461e-05, "loss": 0.8806, "step": 2512 }, { "epoch": 1.0215490953445823, "grad_norm": 0.09965387731790543, "learning_rate": 9.794422959495218e-05, "loss": 1.0049, "step": 2513 }, { "epoch": 1.021955682049197, "grad_norm": 0.09820786118507385, "learning_rate": 9.790352127009974e-05, "loss": 0.9716, "step": 2514 }, { "epoch": 1.0223622687538116, "grad_norm": 0.10157819837331772, "learning_rate": 9.786281294524731e-05, "loss": 0.9659, "step": 2515 }, { "epoch": 1.0227688554584264, "grad_norm": 0.09603773802518845, "learning_rate": 9.782210462039487e-05, "loss": 0.9056, "step": 2516 }, { "epoch": 1.0231754421630412, "grad_norm": 0.10223423689603806, "learning_rate": 9.778139629554243e-05, "loss": 1.0357, "step": 2517 }, { "epoch": 1.023582028867656, "grad_norm": 0.10251198709011078, "learning_rate": 9.774068797069002e-05, "loss": 1.0054, "step": 2518 }, { "epoch": 1.0239886155722708, "grad_norm": 0.11098898202180862, "learning_rate": 9.769997964583758e-05, "loss": 1.0292, "step": 2519 }, { "epoch": 1.0243952022768856, "grad_norm": 0.1087106242775917, "learning_rate": 9.765927132098515e-05, "loss": 1.0838, "step": 2520 }, { "epoch": 1.0248017889815002, "grad_norm": 0.09911047667264938, "learning_rate": 9.761856299613272e-05, "loss": 0.9538, "step": 2521 }, { "epoch": 1.025208375686115, "grad_norm": 0.10460842400789261, "learning_rate": 9.757785467128027e-05, "loss": 1.008, "step": 2522 }, { "epoch": 1.0256149623907298, "grad_norm": 0.09570446610450745, "learning_rate": 9.753714634642785e-05, "loss": 0.8979, "step": 2523 }, { "epoch": 1.0260215490953446, "grad_norm": 0.10213327407836914, "learning_rate": 9.749643802157542e-05, "loss": 0.9863, "step": 2524 }, { "epoch": 1.0264281357999594, "grad_norm": 0.11625881493091583, "learning_rate": 9.745572969672299e-05, "loss": 1.102, "step": 2525 }, { "epoch": 1.0268347225045742, "grad_norm": 0.09997177869081497, "learning_rate": 9.741502137187056e-05, "loss": 1.0134, "step": 2526 }, { "epoch": 1.0272413092091888, "grad_norm": 0.09349930286407471, "learning_rate": 9.737431304701812e-05, "loss": 0.9323, "step": 2527 }, { "epoch": 1.0276478959138036, "grad_norm": 0.09024021774530411, "learning_rate": 9.733360472216568e-05, "loss": 0.9381, "step": 2528 }, { "epoch": 1.0280544826184184, "grad_norm": 0.09808880090713501, "learning_rate": 9.729289639731326e-05, "loss": 0.902, "step": 2529 }, { "epoch": 1.0284610693230332, "grad_norm": 0.09804200381040573, "learning_rate": 9.725218807246083e-05, "loss": 0.9712, "step": 2530 }, { "epoch": 1.028867656027648, "grad_norm": 0.09585238248109818, "learning_rate": 9.72114797476084e-05, "loss": 0.944, "step": 2531 }, { "epoch": 1.0292742427322628, "grad_norm": 0.10107024013996124, "learning_rate": 9.717077142275596e-05, "loss": 0.9959, "step": 2532 }, { "epoch": 1.0296808294368773, "grad_norm": 0.09128806740045547, "learning_rate": 9.713006309790353e-05, "loss": 0.8755, "step": 2533 }, { "epoch": 1.0300874161414921, "grad_norm": 0.10488265007734299, "learning_rate": 9.708935477305108e-05, "loss": 1.0307, "step": 2534 }, { "epoch": 1.030494002846107, "grad_norm": 0.09195258468389511, "learning_rate": 9.704864644819867e-05, "loss": 0.8642, "step": 2535 }, { "epoch": 1.0309005895507217, "grad_norm": 0.09590499103069305, "learning_rate": 9.700793812334623e-05, "loss": 0.8461, "step": 2536 }, { "epoch": 1.0313071762553365, "grad_norm": 0.09776647388935089, "learning_rate": 9.69672297984938e-05, "loss": 0.9141, "step": 2537 }, { "epoch": 1.031713762959951, "grad_norm": 0.09859136492013931, "learning_rate": 9.692652147364137e-05, "loss": 1.0216, "step": 2538 }, { "epoch": 1.032120349664566, "grad_norm": 0.10951580852270126, "learning_rate": 9.688581314878892e-05, "loss": 0.979, "step": 2539 }, { "epoch": 1.0325269363691807, "grad_norm": 0.09757594019174576, "learning_rate": 9.684510482393649e-05, "loss": 0.8619, "step": 2540 }, { "epoch": 1.0329335230737955, "grad_norm": 0.10344915837049484, "learning_rate": 9.680439649908407e-05, "loss": 0.9842, "step": 2541 }, { "epoch": 1.0333401097784103, "grad_norm": 0.09753288328647614, "learning_rate": 9.676368817423164e-05, "loss": 0.8892, "step": 2542 }, { "epoch": 1.033746696483025, "grad_norm": 0.10194489359855652, "learning_rate": 9.67229798493792e-05, "loss": 0.953, "step": 2543 }, { "epoch": 1.0341532831876397, "grad_norm": 0.0974886566400528, "learning_rate": 9.668227152452677e-05, "loss": 0.9984, "step": 2544 }, { "epoch": 1.0345598698922545, "grad_norm": 0.09721877425909042, "learning_rate": 9.664156319967433e-05, "loss": 0.8556, "step": 2545 }, { "epoch": 1.0349664565968693, "grad_norm": 0.1020737811923027, "learning_rate": 9.660085487482191e-05, "loss": 1.0099, "step": 2546 }, { "epoch": 1.035373043301484, "grad_norm": 0.10895517468452454, "learning_rate": 9.656014654996948e-05, "loss": 0.9703, "step": 2547 }, { "epoch": 1.0357796300060989, "grad_norm": 0.10454720258712769, "learning_rate": 9.651943822511704e-05, "loss": 1.0158, "step": 2548 }, { "epoch": 1.0361862167107136, "grad_norm": 0.09759974479675293, "learning_rate": 9.647872990026461e-05, "loss": 0.9259, "step": 2549 }, { "epoch": 1.0365928034153282, "grad_norm": 0.09353537857532501, "learning_rate": 9.643802157541218e-05, "loss": 0.8987, "step": 2550 }, { "epoch": 1.036999390119943, "grad_norm": 0.10114728659391403, "learning_rate": 9.639731325055973e-05, "loss": 0.9499, "step": 2551 }, { "epoch": 1.0374059768245578, "grad_norm": 0.09962712973356247, "learning_rate": 9.635660492570731e-05, "loss": 1.0086, "step": 2552 }, { "epoch": 1.0378125635291726, "grad_norm": 0.09930434823036194, "learning_rate": 9.631589660085488e-05, "loss": 0.891, "step": 2553 }, { "epoch": 1.0382191502337874, "grad_norm": 0.10339832305908203, "learning_rate": 9.627518827600245e-05, "loss": 0.9528, "step": 2554 }, { "epoch": 1.0386257369384022, "grad_norm": 0.09386780112981796, "learning_rate": 9.623447995115002e-05, "loss": 0.879, "step": 2555 }, { "epoch": 1.0390323236430168, "grad_norm": 0.09669435769319534, "learning_rate": 9.619377162629759e-05, "loss": 0.9047, "step": 2556 }, { "epoch": 1.0394389103476316, "grad_norm": 0.08469796180725098, "learning_rate": 9.615306330144514e-05, "loss": 0.782, "step": 2557 }, { "epoch": 1.0398454970522464, "grad_norm": 0.09662485867738724, "learning_rate": 9.611235497659272e-05, "loss": 0.9099, "step": 2558 }, { "epoch": 1.0402520837568612, "grad_norm": 0.09601373970508575, "learning_rate": 9.607164665174029e-05, "loss": 0.9126, "step": 2559 }, { "epoch": 1.040658670461476, "grad_norm": 0.10070160031318665, "learning_rate": 9.603093832688786e-05, "loss": 0.9936, "step": 2560 }, { "epoch": 1.0410652571660908, "grad_norm": 0.09629065543413162, "learning_rate": 9.599023000203542e-05, "loss": 0.8927, "step": 2561 }, { "epoch": 1.0414718438707053, "grad_norm": 0.08817669004201889, "learning_rate": 9.594952167718299e-05, "loss": 0.854, "step": 2562 }, { "epoch": 1.0418784305753201, "grad_norm": 0.0914379209280014, "learning_rate": 9.590881335233055e-05, "loss": 0.9465, "step": 2563 }, { "epoch": 1.042285017279935, "grad_norm": 0.10295330733060837, "learning_rate": 9.586810502747813e-05, "loss": 0.9533, "step": 2564 }, { "epoch": 1.0426916039845497, "grad_norm": 0.1070484146475792, "learning_rate": 9.58273967026257e-05, "loss": 1.0482, "step": 2565 }, { "epoch": 1.0430981906891645, "grad_norm": 0.10043883323669434, "learning_rate": 9.578668837777326e-05, "loss": 1.06, "step": 2566 }, { "epoch": 1.0435047773937791, "grad_norm": 0.09721029549837112, "learning_rate": 9.574598005292083e-05, "loss": 0.9336, "step": 2567 }, { "epoch": 1.043911364098394, "grad_norm": 0.08390473574399948, "learning_rate": 9.57052717280684e-05, "loss": 0.7674, "step": 2568 }, { "epoch": 1.0443179508030087, "grad_norm": 0.09861475974321365, "learning_rate": 9.566456340321596e-05, "loss": 0.9026, "step": 2569 }, { "epoch": 1.0447245375076235, "grad_norm": 0.10255376249551773, "learning_rate": 9.562385507836353e-05, "loss": 0.9269, "step": 2570 }, { "epoch": 1.0451311242122383, "grad_norm": 0.09502318501472473, "learning_rate": 9.55831467535111e-05, "loss": 0.9383, "step": 2571 }, { "epoch": 1.045537710916853, "grad_norm": 0.09613403677940369, "learning_rate": 9.554243842865867e-05, "loss": 0.9479, "step": 2572 }, { "epoch": 1.0459442976214677, "grad_norm": 0.09740449488162994, "learning_rate": 9.550173010380624e-05, "loss": 0.9378, "step": 2573 }, { "epoch": 1.0463508843260825, "grad_norm": 0.09630079567432404, "learning_rate": 9.54610217789538e-05, "loss": 0.9408, "step": 2574 }, { "epoch": 1.0467574710306973, "grad_norm": 0.10916483402252197, "learning_rate": 9.542031345410137e-05, "loss": 1.0303, "step": 2575 }, { "epoch": 1.047164057735312, "grad_norm": 0.10121887922286987, "learning_rate": 9.537960512924894e-05, "loss": 0.9663, "step": 2576 }, { "epoch": 1.0475706444399269, "grad_norm": 0.10116361826658249, "learning_rate": 9.53388968043965e-05, "loss": 1.0089, "step": 2577 }, { "epoch": 1.0479772311445417, "grad_norm": 0.09431501477956772, "learning_rate": 9.529818847954407e-05, "loss": 0.9011, "step": 2578 }, { "epoch": 1.0483838178491562, "grad_norm": 0.09918123483657837, "learning_rate": 9.525748015469164e-05, "loss": 1.0262, "step": 2579 }, { "epoch": 1.048790404553771, "grad_norm": 0.0959305465221405, "learning_rate": 9.521677182983921e-05, "loss": 0.9491, "step": 2580 }, { "epoch": 1.0491969912583858, "grad_norm": 0.0992065966129303, "learning_rate": 9.517606350498678e-05, "loss": 1.0223, "step": 2581 }, { "epoch": 1.0496035779630006, "grad_norm": 0.10246460884809494, "learning_rate": 9.513535518013434e-05, "loss": 0.892, "step": 2582 }, { "epoch": 1.0500101646676154, "grad_norm": 0.10209383815526962, "learning_rate": 9.509464685528191e-05, "loss": 1.0251, "step": 2583 }, { "epoch": 1.0504167513722302, "grad_norm": 173.35934448242188, "learning_rate": 9.505393853042948e-05, "loss": 0.9476, "step": 2584 }, { "epoch": 1.0508233380768448, "grad_norm": 0.10001904517412186, "learning_rate": 9.501323020557705e-05, "loss": 0.9104, "step": 2585 }, { "epoch": 1.0512299247814596, "grad_norm": 0.09232128411531448, "learning_rate": 9.49725218807246e-05, "loss": 0.8637, "step": 2586 }, { "epoch": 1.0516365114860744, "grad_norm": 0.09748049825429916, "learning_rate": 9.493181355587218e-05, "loss": 0.9357, "step": 2587 }, { "epoch": 1.0520430981906892, "grad_norm": 0.11075956374406815, "learning_rate": 9.489110523101975e-05, "loss": 1.0479, "step": 2588 }, { "epoch": 1.052449684895304, "grad_norm": 0.10015081614255905, "learning_rate": 9.485039690616732e-05, "loss": 0.8373, "step": 2589 }, { "epoch": 1.0528562715999188, "grad_norm": 0.10808718949556351, "learning_rate": 9.480968858131488e-05, "loss": 0.9984, "step": 2590 }, { "epoch": 1.0532628583045334, "grad_norm": 0.09263164550065994, "learning_rate": 9.476898025646245e-05, "loss": 0.8195, "step": 2591 }, { "epoch": 1.0536694450091482, "grad_norm": 0.10392975807189941, "learning_rate": 9.472827193161002e-05, "loss": 0.9241, "step": 2592 }, { "epoch": 1.054076031713763, "grad_norm": 0.10209937393665314, "learning_rate": 9.468756360675759e-05, "loss": 0.9368, "step": 2593 }, { "epoch": 1.0544826184183778, "grad_norm": 0.0948430597782135, "learning_rate": 9.464685528190516e-05, "loss": 0.9274, "step": 2594 }, { "epoch": 1.0548892051229926, "grad_norm": 0.09769920259714127, "learning_rate": 9.460614695705272e-05, "loss": 0.9379, "step": 2595 }, { "epoch": 1.0552957918276071, "grad_norm": 0.09840547293424606, "learning_rate": 9.456543863220029e-05, "loss": 0.9622, "step": 2596 }, { "epoch": 1.055702378532222, "grad_norm": 0.10511568933725357, "learning_rate": 9.452473030734786e-05, "loss": 0.9294, "step": 2597 }, { "epoch": 1.0561089652368367, "grad_norm": 0.10543198883533478, "learning_rate": 9.448402198249543e-05, "loss": 0.879, "step": 2598 }, { "epoch": 1.0565155519414515, "grad_norm": 0.11027191579341888, "learning_rate": 9.4443313657643e-05, "loss": 1.1137, "step": 2599 }, { "epoch": 1.0569221386460663, "grad_norm": 0.10025797039270401, "learning_rate": 9.440260533279056e-05, "loss": 0.9876, "step": 2600 }, { "epoch": 1.0573287253506811, "grad_norm": 0.09540455043315887, "learning_rate": 9.436189700793813e-05, "loss": 0.9154, "step": 2601 }, { "epoch": 1.0577353120552957, "grad_norm": 0.1021379828453064, "learning_rate": 9.43211886830857e-05, "loss": 0.9915, "step": 2602 }, { "epoch": 1.0581418987599105, "grad_norm": 0.09948939830064774, "learning_rate": 9.428048035823326e-05, "loss": 0.9677, "step": 2603 }, { "epoch": 1.0585484854645253, "grad_norm": 0.10177826136350632, "learning_rate": 9.423977203338083e-05, "loss": 0.9533, "step": 2604 }, { "epoch": 1.05895507216914, "grad_norm": 0.10010895878076553, "learning_rate": 9.41990637085284e-05, "loss": 1.0366, "step": 2605 }, { "epoch": 1.0593616588737549, "grad_norm": 0.0993037298321724, "learning_rate": 9.415835538367597e-05, "loss": 1.0429, "step": 2606 }, { "epoch": 1.0597682455783697, "grad_norm": 0.09915399551391602, "learning_rate": 9.411764705882353e-05, "loss": 0.9091, "step": 2607 }, { "epoch": 1.0601748322829843, "grad_norm": 0.09989267587661743, "learning_rate": 9.40769387339711e-05, "loss": 1.0243, "step": 2608 }, { "epoch": 1.060581418987599, "grad_norm": 0.10886070877313614, "learning_rate": 9.403623040911867e-05, "loss": 1.0211, "step": 2609 }, { "epoch": 1.0609880056922139, "grad_norm": 0.09823065251111984, "learning_rate": 9.399552208426624e-05, "loss": 0.9097, "step": 2610 }, { "epoch": 1.0613945923968287, "grad_norm": 0.09129935503005981, "learning_rate": 9.39548137594138e-05, "loss": 0.9066, "step": 2611 }, { "epoch": 1.0618011791014434, "grad_norm": 0.09999019652605057, "learning_rate": 9.391410543456137e-05, "loss": 0.9658, "step": 2612 }, { "epoch": 1.0622077658060582, "grad_norm": 0.1015915721654892, "learning_rate": 9.387339710970894e-05, "loss": 1.0863, "step": 2613 }, { "epoch": 1.0626143525106728, "grad_norm": 0.11688552051782608, "learning_rate": 9.383268878485651e-05, "loss": 1.0107, "step": 2614 }, { "epoch": 1.0630209392152876, "grad_norm": 0.09864789247512817, "learning_rate": 9.379198046000408e-05, "loss": 0.9256, "step": 2615 }, { "epoch": 1.0634275259199024, "grad_norm": 0.11037877202033997, "learning_rate": 9.375127213515164e-05, "loss": 1.089, "step": 2616 }, { "epoch": 1.0638341126245172, "grad_norm": 0.10612582415342331, "learning_rate": 9.371056381029921e-05, "loss": 1.0316, "step": 2617 }, { "epoch": 1.064240699329132, "grad_norm": 0.09921829402446747, "learning_rate": 9.366985548544678e-05, "loss": 0.9821, "step": 2618 }, { "epoch": 1.0646472860337468, "grad_norm": 0.0911387950181961, "learning_rate": 9.362914716059435e-05, "loss": 0.864, "step": 2619 }, { "epoch": 1.0650538727383614, "grad_norm": 0.10604958236217499, "learning_rate": 9.358843883574191e-05, "loss": 0.9687, "step": 2620 }, { "epoch": 1.0654604594429762, "grad_norm": 0.10203681141138077, "learning_rate": 9.354773051088948e-05, "loss": 1.0158, "step": 2621 }, { "epoch": 1.065867046147591, "grad_norm": 0.09713797271251678, "learning_rate": 9.350702218603705e-05, "loss": 0.9671, "step": 2622 }, { "epoch": 1.0662736328522058, "grad_norm": 0.10216394811868668, "learning_rate": 9.346631386118462e-05, "loss": 0.9156, "step": 2623 }, { "epoch": 1.0666802195568206, "grad_norm": 0.09904835373163223, "learning_rate": 9.342560553633218e-05, "loss": 0.9856, "step": 2624 }, { "epoch": 1.0670868062614352, "grad_norm": 0.09881392866373062, "learning_rate": 9.338489721147975e-05, "loss": 0.964, "step": 2625 }, { "epoch": 1.06749339296605, "grad_norm": 0.10800333321094513, "learning_rate": 9.334418888662732e-05, "loss": 1.0773, "step": 2626 }, { "epoch": 1.0678999796706647, "grad_norm": 0.09454644471406937, "learning_rate": 9.330348056177489e-05, "loss": 0.8724, "step": 2627 }, { "epoch": 1.0683065663752795, "grad_norm": 0.10942061245441437, "learning_rate": 9.326277223692246e-05, "loss": 1.0294, "step": 2628 }, { "epoch": 1.0687131530798943, "grad_norm": 0.10133802890777588, "learning_rate": 9.322206391207002e-05, "loss": 0.9862, "step": 2629 }, { "epoch": 1.0691197397845091, "grad_norm": 0.08950886130332947, "learning_rate": 9.318135558721759e-05, "loss": 0.8339, "step": 2630 }, { "epoch": 1.0695263264891237, "grad_norm": 0.09953132271766663, "learning_rate": 9.314064726236516e-05, "loss": 0.94, "step": 2631 }, { "epoch": 1.0699329131937385, "grad_norm": 0.09424632787704468, "learning_rate": 9.309993893751273e-05, "loss": 0.8335, "step": 2632 }, { "epoch": 1.0703394998983533, "grad_norm": 0.09430480003356934, "learning_rate": 9.305923061266029e-05, "loss": 0.9151, "step": 2633 }, { "epoch": 1.070746086602968, "grad_norm": 0.10957103222608566, "learning_rate": 9.301852228780786e-05, "loss": 1.0281, "step": 2634 }, { "epoch": 1.071152673307583, "grad_norm": 0.09792932122945786, "learning_rate": 9.297781396295543e-05, "loss": 0.8805, "step": 2635 }, { "epoch": 1.0715592600121977, "grad_norm": 0.09845682233572006, "learning_rate": 9.2937105638103e-05, "loss": 0.8915, "step": 2636 }, { "epoch": 1.0719658467168123, "grad_norm": 0.10949815809726715, "learning_rate": 9.289639731325056e-05, "loss": 0.9888, "step": 2637 }, { "epoch": 1.072372433421427, "grad_norm": 0.09920839220285416, "learning_rate": 9.285568898839813e-05, "loss": 0.9253, "step": 2638 }, { "epoch": 1.0727790201260419, "grad_norm": 0.10604346543550491, "learning_rate": 9.28149806635457e-05, "loss": 1.0617, "step": 2639 }, { "epoch": 1.0731856068306567, "grad_norm": 0.09291350096464157, "learning_rate": 9.277427233869327e-05, "loss": 0.8628, "step": 2640 }, { "epoch": 1.0735921935352715, "grad_norm": 0.0925416573882103, "learning_rate": 9.273356401384083e-05, "loss": 0.8899, "step": 2641 }, { "epoch": 1.073998780239886, "grad_norm": 0.0955965593457222, "learning_rate": 9.26928556889884e-05, "loss": 0.9303, "step": 2642 }, { "epoch": 1.0744053669445008, "grad_norm": 0.08776776492595673, "learning_rate": 9.265214736413597e-05, "loss": 0.8305, "step": 2643 }, { "epoch": 1.0748119536491156, "grad_norm": 0.09283957630395889, "learning_rate": 9.261143903928354e-05, "loss": 0.8323, "step": 2644 }, { "epoch": 1.0752185403537304, "grad_norm": 0.1023586168885231, "learning_rate": 9.25707307144311e-05, "loss": 0.9612, "step": 2645 }, { "epoch": 1.0756251270583452, "grad_norm": 0.09603264182806015, "learning_rate": 9.253002238957867e-05, "loss": 0.8514, "step": 2646 }, { "epoch": 1.07603171376296, "grad_norm": 0.09656079113483429, "learning_rate": 9.248931406472624e-05, "loss": 0.9003, "step": 2647 }, { "epoch": 1.0764383004675748, "grad_norm": 0.10394012928009033, "learning_rate": 9.244860573987381e-05, "loss": 1.0031, "step": 2648 }, { "epoch": 1.0768448871721894, "grad_norm": 0.1057206466794014, "learning_rate": 9.240789741502138e-05, "loss": 0.9915, "step": 2649 }, { "epoch": 1.0772514738768042, "grad_norm": 0.09920359402894974, "learning_rate": 9.236718909016894e-05, "loss": 0.9242, "step": 2650 }, { "epoch": 1.077658060581419, "grad_norm": 0.09820383042097092, "learning_rate": 9.232648076531651e-05, "loss": 0.889, "step": 2651 }, { "epoch": 1.0780646472860338, "grad_norm": 0.09957915544509888, "learning_rate": 9.228577244046408e-05, "loss": 0.9418, "step": 2652 }, { "epoch": 1.0784712339906486, "grad_norm": 0.09969063103199005, "learning_rate": 9.224506411561165e-05, "loss": 0.905, "step": 2653 }, { "epoch": 1.0788778206952632, "grad_norm": 0.10219317674636841, "learning_rate": 9.220435579075921e-05, "loss": 1.0422, "step": 2654 }, { "epoch": 1.079284407399878, "grad_norm": 0.10804678499698639, "learning_rate": 9.216364746590678e-05, "loss": 0.9122, "step": 2655 }, { "epoch": 1.0796909941044928, "grad_norm": 0.10206186026334763, "learning_rate": 9.212293914105435e-05, "loss": 0.9674, "step": 2656 }, { "epoch": 1.0800975808091076, "grad_norm": 0.1036703959107399, "learning_rate": 9.208223081620192e-05, "loss": 0.9717, "step": 2657 }, { "epoch": 1.0805041675137224, "grad_norm": 0.09528395533561707, "learning_rate": 9.204152249134948e-05, "loss": 0.8668, "step": 2658 }, { "epoch": 1.0809107542183372, "grad_norm": 0.08862180262804031, "learning_rate": 9.200081416649705e-05, "loss": 0.8139, "step": 2659 }, { "epoch": 1.0813173409229517, "grad_norm": 0.11283780634403229, "learning_rate": 9.196010584164462e-05, "loss": 0.99, "step": 2660 }, { "epoch": 1.0817239276275665, "grad_norm": 0.10028998553752899, "learning_rate": 9.19193975167922e-05, "loss": 0.8809, "step": 2661 }, { "epoch": 1.0821305143321813, "grad_norm": 0.11139478534460068, "learning_rate": 9.187868919193975e-05, "loss": 0.9946, "step": 2662 }, { "epoch": 1.0825371010367961, "grad_norm": 0.1029946580529213, "learning_rate": 9.183798086708732e-05, "loss": 0.9297, "step": 2663 }, { "epoch": 1.082943687741411, "grad_norm": 0.10619094967842102, "learning_rate": 9.179727254223489e-05, "loss": 1.0645, "step": 2664 }, { "epoch": 1.0833502744460257, "grad_norm": 0.1395910680294037, "learning_rate": 9.175656421738246e-05, "loss": 1.0362, "step": 2665 }, { "epoch": 1.0837568611506403, "grad_norm": 0.09811388701200485, "learning_rate": 9.171585589253003e-05, "loss": 0.9939, "step": 2666 }, { "epoch": 1.084163447855255, "grad_norm": 0.1118270680308342, "learning_rate": 9.167514756767759e-05, "loss": 1.031, "step": 2667 }, { "epoch": 1.0845700345598699, "grad_norm": 0.11443159729242325, "learning_rate": 9.163443924282516e-05, "loss": 1.0236, "step": 2668 }, { "epoch": 1.0849766212644847, "grad_norm": 0.10500071197748184, "learning_rate": 9.159373091797273e-05, "loss": 0.9794, "step": 2669 }, { "epoch": 1.0853832079690995, "grad_norm": 0.10952949523925781, "learning_rate": 9.15530225931203e-05, "loss": 1.0597, "step": 2670 }, { "epoch": 1.085789794673714, "grad_norm": 0.10230562835931778, "learning_rate": 9.151231426826786e-05, "loss": 1.0483, "step": 2671 }, { "epoch": 1.0861963813783289, "grad_norm": 0.1016286313533783, "learning_rate": 9.147160594341543e-05, "loss": 0.9346, "step": 2672 }, { "epoch": 1.0866029680829437, "grad_norm": 0.08879520744085312, "learning_rate": 9.1430897618563e-05, "loss": 0.8451, "step": 2673 }, { "epoch": 1.0870095547875585, "grad_norm": 0.11024922877550125, "learning_rate": 9.139018929371057e-05, "loss": 1.0428, "step": 2674 }, { "epoch": 1.0874161414921732, "grad_norm": 0.08919038623571396, "learning_rate": 9.134948096885813e-05, "loss": 0.8909, "step": 2675 }, { "epoch": 1.087822728196788, "grad_norm": 0.09503115713596344, "learning_rate": 9.13087726440057e-05, "loss": 0.9496, "step": 2676 }, { "epoch": 1.0882293149014026, "grad_norm": 0.09637421369552612, "learning_rate": 9.126806431915327e-05, "loss": 0.8955, "step": 2677 }, { "epoch": 1.0886359016060174, "grad_norm": 0.10244832187891006, "learning_rate": 9.122735599430084e-05, "loss": 1.061, "step": 2678 }, { "epoch": 1.0890424883106322, "grad_norm": 0.09347443282604218, "learning_rate": 9.11866476694484e-05, "loss": 0.9011, "step": 2679 }, { "epoch": 1.089449075015247, "grad_norm": 0.09415366500616074, "learning_rate": 9.114593934459597e-05, "loss": 0.8837, "step": 2680 }, { "epoch": 1.0898556617198618, "grad_norm": 0.1009860560297966, "learning_rate": 9.110523101974354e-05, "loss": 1.0073, "step": 2681 }, { "epoch": 1.0902622484244766, "grad_norm": 0.10200529545545578, "learning_rate": 9.106452269489111e-05, "loss": 0.9762, "step": 2682 }, { "epoch": 1.0906688351290912, "grad_norm": 0.08658542484045029, "learning_rate": 9.102381437003867e-05, "loss": 0.7831, "step": 2683 }, { "epoch": 1.091075421833706, "grad_norm": 0.11266512423753738, "learning_rate": 9.098310604518626e-05, "loss": 0.9758, "step": 2684 }, { "epoch": 1.0914820085383208, "grad_norm": 0.09850563108921051, "learning_rate": 9.094239772033381e-05, "loss": 0.9202, "step": 2685 }, { "epoch": 1.0918885952429356, "grad_norm": 0.10467302799224854, "learning_rate": 9.090168939548138e-05, "loss": 0.983, "step": 2686 }, { "epoch": 1.0922951819475504, "grad_norm": 0.10072293132543564, "learning_rate": 9.086098107062895e-05, "loss": 0.9643, "step": 2687 }, { "epoch": 1.0927017686521652, "grad_norm": 0.10193373262882233, "learning_rate": 9.082027274577651e-05, "loss": 0.9569, "step": 2688 }, { "epoch": 1.0931083553567797, "grad_norm": 0.09216511994600296, "learning_rate": 9.077956442092408e-05, "loss": 0.8781, "step": 2689 }, { "epoch": 1.0935149420613945, "grad_norm": 0.09577429294586182, "learning_rate": 9.073885609607166e-05, "loss": 0.9484, "step": 2690 }, { "epoch": 1.0939215287660093, "grad_norm": 0.08991552889347076, "learning_rate": 9.069814777121922e-05, "loss": 0.843, "step": 2691 }, { "epoch": 1.0943281154706241, "grad_norm": 0.09744630008935928, "learning_rate": 9.065743944636678e-05, "loss": 0.9266, "step": 2692 }, { "epoch": 1.094734702175239, "grad_norm": 0.11267295479774475, "learning_rate": 9.061673112151435e-05, "loss": 1.1497, "step": 2693 }, { "epoch": 1.0951412888798537, "grad_norm": 0.10447680950164795, "learning_rate": 9.057602279666192e-05, "loss": 0.9988, "step": 2694 }, { "epoch": 1.0955478755844683, "grad_norm": 0.09360986948013306, "learning_rate": 9.053531447180949e-05, "loss": 0.8463, "step": 2695 }, { "epoch": 1.095954462289083, "grad_norm": 0.10234752297401428, "learning_rate": 9.049460614695707e-05, "loss": 0.9858, "step": 2696 }, { "epoch": 1.096361048993698, "grad_norm": 0.09333793818950653, "learning_rate": 9.045389782210462e-05, "loss": 0.8818, "step": 2697 }, { "epoch": 1.0967676356983127, "grad_norm": 0.10592950135469437, "learning_rate": 9.041318949725219e-05, "loss": 1.1268, "step": 2698 }, { "epoch": 1.0971742224029275, "grad_norm": 0.10253705084323883, "learning_rate": 9.037248117239976e-05, "loss": 1.0303, "step": 2699 }, { "epoch": 1.097580809107542, "grad_norm": 0.09744442999362946, "learning_rate": 9.033177284754732e-05, "loss": 0.9919, "step": 2700 }, { "epoch": 1.0979873958121569, "grad_norm": 0.0978417843580246, "learning_rate": 9.029106452269489e-05, "loss": 0.9778, "step": 2701 }, { "epoch": 1.0983939825167717, "grad_norm": 0.10374154895544052, "learning_rate": 9.025035619784247e-05, "loss": 0.9441, "step": 2702 }, { "epoch": 1.0988005692213865, "grad_norm": 0.10075423121452332, "learning_rate": 9.020964787299003e-05, "loss": 0.9613, "step": 2703 }, { "epoch": 1.0992071559260013, "grad_norm": 0.10084596276283264, "learning_rate": 9.01689395481376e-05, "loss": 1.0501, "step": 2704 }, { "epoch": 1.099613742630616, "grad_norm": 0.09317726641893387, "learning_rate": 9.012823122328516e-05, "loss": 0.9205, "step": 2705 }, { "epoch": 1.1000203293352306, "grad_norm": 0.10026173293590546, "learning_rate": 9.008752289843273e-05, "loss": 0.9694, "step": 2706 }, { "epoch": 1.1004269160398454, "grad_norm": 0.10271118581295013, "learning_rate": 9.004681457358031e-05, "loss": 0.966, "step": 2707 }, { "epoch": 1.1008335027444602, "grad_norm": 0.1029544472694397, "learning_rate": 9.000610624872788e-05, "loss": 1.0658, "step": 2708 }, { "epoch": 1.101240089449075, "grad_norm": 0.08744987100362778, "learning_rate": 8.996539792387543e-05, "loss": 0.7922, "step": 2709 }, { "epoch": 1.1016466761536898, "grad_norm": 0.10885384678840637, "learning_rate": 8.9924689599023e-05, "loss": 0.966, "step": 2710 }, { "epoch": 1.1020532628583046, "grad_norm": 0.09841740131378174, "learning_rate": 8.988398127417057e-05, "loss": 0.93, "step": 2711 }, { "epoch": 1.1024598495629192, "grad_norm": 0.09065406024456024, "learning_rate": 8.984327294931814e-05, "loss": 0.8524, "step": 2712 }, { "epoch": 1.102866436267534, "grad_norm": 0.1025364026427269, "learning_rate": 8.980256462446572e-05, "loss": 0.9865, "step": 2713 }, { "epoch": 1.1032730229721488, "grad_norm": 0.10353400558233261, "learning_rate": 8.976185629961327e-05, "loss": 0.9485, "step": 2714 }, { "epoch": 1.1036796096767636, "grad_norm": 0.10245194286108017, "learning_rate": 8.972114797476084e-05, "loss": 0.95, "step": 2715 }, { "epoch": 1.1040861963813784, "grad_norm": 0.10163327306509018, "learning_rate": 8.968043964990841e-05, "loss": 0.9418, "step": 2716 }, { "epoch": 1.1044927830859932, "grad_norm": 0.0943874716758728, "learning_rate": 8.963973132505597e-05, "loss": 0.9057, "step": 2717 }, { "epoch": 1.1048993697906078, "grad_norm": 0.10380052775144577, "learning_rate": 8.959902300020354e-05, "loss": 0.908, "step": 2718 }, { "epoch": 1.1053059564952226, "grad_norm": 0.1012316420674324, "learning_rate": 8.955831467535112e-05, "loss": 1.0247, "step": 2719 }, { "epoch": 1.1057125431998374, "grad_norm": 0.10259490460157394, "learning_rate": 8.951760635049868e-05, "loss": 1.02, "step": 2720 }, { "epoch": 1.1061191299044522, "grad_norm": 0.09200392663478851, "learning_rate": 8.947689802564625e-05, "loss": 0.8184, "step": 2721 }, { "epoch": 1.106525716609067, "grad_norm": 0.10655350238084793, "learning_rate": 8.943618970079381e-05, "loss": 1.0162, "step": 2722 }, { "epoch": 1.1069323033136818, "grad_norm": 0.09815651178359985, "learning_rate": 8.939548137594138e-05, "loss": 0.8743, "step": 2723 }, { "epoch": 1.1073388900182963, "grad_norm": 0.09680456668138504, "learning_rate": 8.935477305108895e-05, "loss": 0.8738, "step": 2724 }, { "epoch": 1.1077454767229111, "grad_norm": 0.09177197515964508, "learning_rate": 8.931406472623653e-05, "loss": 0.8669, "step": 2725 }, { "epoch": 1.108152063427526, "grad_norm": 0.10532869398593903, "learning_rate": 8.927335640138408e-05, "loss": 0.9685, "step": 2726 }, { "epoch": 1.1085586501321407, "grad_norm": 0.09062668681144714, "learning_rate": 8.923264807653165e-05, "loss": 0.815, "step": 2727 }, { "epoch": 1.1089652368367555, "grad_norm": 0.10259625315666199, "learning_rate": 8.919193975167922e-05, "loss": 1.0085, "step": 2728 }, { "epoch": 1.10937182354137, "grad_norm": 0.10090707242488861, "learning_rate": 8.915123142682679e-05, "loss": 0.9229, "step": 2729 }, { "epoch": 1.109778410245985, "grad_norm": 0.10648062825202942, "learning_rate": 8.911052310197435e-05, "loss": 0.9649, "step": 2730 }, { "epoch": 1.1101849969505997, "grad_norm": 0.10565739125013351, "learning_rate": 8.906981477712193e-05, "loss": 0.9967, "step": 2731 }, { "epoch": 1.1105915836552145, "grad_norm": 0.10286445170640945, "learning_rate": 8.902910645226949e-05, "loss": 0.9484, "step": 2732 }, { "epoch": 1.1109981703598293, "grad_norm": 0.09817038476467133, "learning_rate": 8.898839812741706e-05, "loss": 0.9006, "step": 2733 }, { "epoch": 1.111404757064444, "grad_norm": 0.10235543549060822, "learning_rate": 8.894768980256462e-05, "loss": 0.9197, "step": 2734 }, { "epoch": 1.1118113437690587, "grad_norm": 0.10497331619262695, "learning_rate": 8.890698147771219e-05, "loss": 0.9529, "step": 2735 }, { "epoch": 1.1122179304736735, "grad_norm": 0.0995490625500679, "learning_rate": 8.886627315285977e-05, "loss": 0.9327, "step": 2736 }, { "epoch": 1.1126245171782883, "grad_norm": 0.10197664797306061, "learning_rate": 8.882556482800734e-05, "loss": 0.967, "step": 2737 }, { "epoch": 1.113031103882903, "grad_norm": 0.09399368613958359, "learning_rate": 8.87848565031549e-05, "loss": 0.9018, "step": 2738 }, { "epoch": 1.1134376905875178, "grad_norm": 0.09783720225095749, "learning_rate": 8.874414817830246e-05, "loss": 0.9248, "step": 2739 }, { "epoch": 1.1138442772921326, "grad_norm": 0.10185014456510544, "learning_rate": 8.870343985345003e-05, "loss": 1.0671, "step": 2740 }, { "epoch": 1.1142508639967472, "grad_norm": 0.09915787726640701, "learning_rate": 8.86627315285976e-05, "loss": 0.9916, "step": 2741 }, { "epoch": 1.114657450701362, "grad_norm": 0.08849018812179565, "learning_rate": 8.862202320374518e-05, "loss": 0.8339, "step": 2742 }, { "epoch": 1.1150640374059768, "grad_norm": 0.09823833405971527, "learning_rate": 8.858131487889275e-05, "loss": 0.8801, "step": 2743 }, { "epoch": 1.1154706241105916, "grad_norm": 0.10026133805513382, "learning_rate": 8.85406065540403e-05, "loss": 0.9545, "step": 2744 }, { "epoch": 1.1158772108152064, "grad_norm": 0.10472730547189713, "learning_rate": 8.849989822918787e-05, "loss": 0.9644, "step": 2745 }, { "epoch": 1.116283797519821, "grad_norm": 1352.027587890625, "learning_rate": 8.845918990433544e-05, "loss": 0.9703, "step": 2746 }, { "epoch": 1.1166903842244358, "grad_norm": 0.10632047057151794, "learning_rate": 8.8418481579483e-05, "loss": 0.9849, "step": 2747 }, { "epoch": 1.1170969709290506, "grad_norm": 0.09786203503608704, "learning_rate": 8.837777325463058e-05, "loss": 0.9203, "step": 2748 }, { "epoch": 1.1175035576336654, "grad_norm": 0.09631546586751938, "learning_rate": 8.833706492977815e-05, "loss": 0.8382, "step": 2749 }, { "epoch": 1.1179101443382802, "grad_norm": 0.1131991297006607, "learning_rate": 8.82963566049257e-05, "loss": 1.1161, "step": 2750 }, { "epoch": 1.118316731042895, "grad_norm": 0.11070824414491653, "learning_rate": 8.825564828007327e-05, "loss": 1.0256, "step": 2751 }, { "epoch": 1.1187233177475098, "grad_norm": 0.09996247291564941, "learning_rate": 8.821493995522084e-05, "loss": 0.8926, "step": 2752 }, { "epoch": 1.1191299044521243, "grad_norm": 0.1090439185500145, "learning_rate": 8.817423163036841e-05, "loss": 0.9081, "step": 2753 }, { "epoch": 1.1195364911567391, "grad_norm": 0.10079578310251236, "learning_rate": 8.813352330551599e-05, "loss": 0.9501, "step": 2754 }, { "epoch": 1.119943077861354, "grad_norm": 0.10204090178012848, "learning_rate": 8.809281498066356e-05, "loss": 0.8608, "step": 2755 }, { "epoch": 1.1203496645659687, "grad_norm": 0.09947852045297623, "learning_rate": 8.805210665581111e-05, "loss": 0.969, "step": 2756 }, { "epoch": 1.1207562512705835, "grad_norm": 0.10679657757282257, "learning_rate": 8.801139833095868e-05, "loss": 0.9747, "step": 2757 }, { "epoch": 1.1211628379751981, "grad_norm": 0.10643206536769867, "learning_rate": 8.797069000610625e-05, "loss": 0.9658, "step": 2758 }, { "epoch": 1.121569424679813, "grad_norm": 0.09698309749364853, "learning_rate": 8.792998168125383e-05, "loss": 0.8848, "step": 2759 }, { "epoch": 1.1219760113844277, "grad_norm": 0.10456421971321106, "learning_rate": 8.78892733564014e-05, "loss": 1.0093, "step": 2760 }, { "epoch": 1.1223825980890425, "grad_norm": 0.10722696781158447, "learning_rate": 8.784856503154896e-05, "loss": 0.9656, "step": 2761 }, { "epoch": 1.1227891847936573, "grad_norm": 0.09573463350534439, "learning_rate": 8.780785670669652e-05, "loss": 0.8725, "step": 2762 }, { "epoch": 1.123195771498272, "grad_norm": 0.10508301854133606, "learning_rate": 8.776714838184409e-05, "loss": 1.0974, "step": 2763 }, { "epoch": 1.1236023582028867, "grad_norm": 0.10459071397781372, "learning_rate": 8.772644005699165e-05, "loss": 0.991, "step": 2764 }, { "epoch": 1.1240089449075015, "grad_norm": 0.10841382294893265, "learning_rate": 8.768573173213923e-05, "loss": 1.0578, "step": 2765 }, { "epoch": 1.1244155316121163, "grad_norm": 0.09497111290693283, "learning_rate": 8.76450234072868e-05, "loss": 0.8538, "step": 2766 }, { "epoch": 1.124822118316731, "grad_norm": 0.10247037559747696, "learning_rate": 8.760431508243436e-05, "loss": 0.9779, "step": 2767 }, { "epoch": 1.1252287050213459, "grad_norm": 0.09652630984783173, "learning_rate": 8.756360675758192e-05, "loss": 0.8701, "step": 2768 }, { "epoch": 1.1256352917259607, "grad_norm": 0.09543488174676895, "learning_rate": 8.752289843272949e-05, "loss": 0.8204, "step": 2769 }, { "epoch": 1.1260418784305752, "grad_norm": 0.10089685767889023, "learning_rate": 8.748219010787706e-05, "loss": 0.9666, "step": 2770 }, { "epoch": 1.12644846513519, "grad_norm": 0.09978599101305008, "learning_rate": 8.744148178302464e-05, "loss": 0.8704, "step": 2771 }, { "epoch": 1.1268550518398048, "grad_norm": 0.0978054329752922, "learning_rate": 8.740077345817221e-05, "loss": 0.9111, "step": 2772 }, { "epoch": 1.1272616385444196, "grad_norm": 0.11330624669790268, "learning_rate": 8.736006513331976e-05, "loss": 1.0663, "step": 2773 }, { "epoch": 1.1276682252490344, "grad_norm": 0.10306650400161743, "learning_rate": 8.731935680846733e-05, "loss": 0.8753, "step": 2774 }, { "epoch": 1.128074811953649, "grad_norm": 0.10659723728895187, "learning_rate": 8.72786484836149e-05, "loss": 1.0044, "step": 2775 }, { "epoch": 1.1284813986582638, "grad_norm": 0.09779758006334305, "learning_rate": 8.723794015876246e-05, "loss": 0.904, "step": 2776 }, { "epoch": 1.1288879853628786, "grad_norm": 0.11017712950706482, "learning_rate": 8.719723183391005e-05, "loss": 0.977, "step": 2777 }, { "epoch": 1.1292945720674934, "grad_norm": 0.11215135455131531, "learning_rate": 8.715652350905761e-05, "loss": 1.0298, "step": 2778 }, { "epoch": 1.1297011587721082, "grad_norm": 0.09850891679525375, "learning_rate": 8.711581518420517e-05, "loss": 0.9411, "step": 2779 }, { "epoch": 1.130107745476723, "grad_norm": 0.11157305538654327, "learning_rate": 8.707510685935274e-05, "loss": 1.0069, "step": 2780 }, { "epoch": 1.1305143321813378, "grad_norm": 0.09551572054624557, "learning_rate": 8.70343985345003e-05, "loss": 0.8586, "step": 2781 }, { "epoch": 1.1309209188859524, "grad_norm": 0.09917795658111572, "learning_rate": 8.699369020964788e-05, "loss": 0.928, "step": 2782 }, { "epoch": 1.1313275055905672, "grad_norm": 0.10252156853675842, "learning_rate": 8.695298188479545e-05, "loss": 0.9748, "step": 2783 }, { "epoch": 1.131734092295182, "grad_norm": 0.09795645624399185, "learning_rate": 8.691227355994302e-05, "loss": 0.9089, "step": 2784 }, { "epoch": 1.1321406789997968, "grad_norm": 0.1064736470580101, "learning_rate": 8.687156523509057e-05, "loss": 1.0198, "step": 2785 }, { "epoch": 1.1325472657044116, "grad_norm": 0.10220332443714142, "learning_rate": 8.683085691023814e-05, "loss": 1.0092, "step": 2786 }, { "epoch": 1.1329538524090261, "grad_norm": 0.10353989899158478, "learning_rate": 8.679014858538571e-05, "loss": 0.993, "step": 2787 }, { "epoch": 1.133360439113641, "grad_norm": 0.10385473817586899, "learning_rate": 8.674944026053329e-05, "loss": 0.9788, "step": 2788 }, { "epoch": 1.1337670258182557, "grad_norm": 0.10164317488670349, "learning_rate": 8.670873193568086e-05, "loss": 0.9838, "step": 2789 }, { "epoch": 1.1341736125228705, "grad_norm": 0.10048189759254456, "learning_rate": 8.666802361082843e-05, "loss": 0.9583, "step": 2790 }, { "epoch": 1.1345801992274853, "grad_norm": 0.1055910512804985, "learning_rate": 8.662731528597598e-05, "loss": 1.01, "step": 2791 }, { "epoch": 1.1349867859321001, "grad_norm": 0.10301291197538376, "learning_rate": 8.658660696112355e-05, "loss": 0.9385, "step": 2792 }, { "epoch": 1.1353933726367147, "grad_norm": 0.10312401503324509, "learning_rate": 8.654589863627111e-05, "loss": 0.9724, "step": 2793 }, { "epoch": 1.1357999593413295, "grad_norm": 0.09779727458953857, "learning_rate": 8.65051903114187e-05, "loss": 0.9319, "step": 2794 }, { "epoch": 1.1362065460459443, "grad_norm": 0.1034865453839302, "learning_rate": 8.646448198656626e-05, "loss": 1.008, "step": 2795 }, { "epoch": 1.136613132750559, "grad_norm": 0.10120035707950592, "learning_rate": 8.642377366171383e-05, "loss": 0.9098, "step": 2796 }, { "epoch": 1.1370197194551739, "grad_norm": 0.09492117911577225, "learning_rate": 8.638306533686139e-05, "loss": 0.9175, "step": 2797 }, { "epoch": 1.1374263061597887, "grad_norm": 0.10626331716775894, "learning_rate": 8.634235701200895e-05, "loss": 1.0008, "step": 2798 }, { "epoch": 1.1378328928644033, "grad_norm": 0.1041049063205719, "learning_rate": 8.630164868715652e-05, "loss": 1.0499, "step": 2799 }, { "epoch": 1.138239479569018, "grad_norm": 0.1089131087064743, "learning_rate": 8.62609403623041e-05, "loss": 1.0344, "step": 2800 }, { "epoch": 1.1386460662736329, "grad_norm": 0.10952405631542206, "learning_rate": 8.622023203745167e-05, "loss": 1.0276, "step": 2801 }, { "epoch": 1.1390526529782476, "grad_norm": 0.10866481065750122, "learning_rate": 8.617952371259924e-05, "loss": 1.1198, "step": 2802 }, { "epoch": 1.1394592396828624, "grad_norm": 0.09285107254981995, "learning_rate": 8.613881538774679e-05, "loss": 0.8278, "step": 2803 }, { "epoch": 1.139865826387477, "grad_norm": 0.10245712101459503, "learning_rate": 8.609810706289436e-05, "loss": 0.8754, "step": 2804 }, { "epoch": 1.1402724130920918, "grad_norm": 0.10147379338741302, "learning_rate": 8.605739873804194e-05, "loss": 0.9467, "step": 2805 }, { "epoch": 1.1406789997967066, "grad_norm": 0.10768549889326096, "learning_rate": 8.601669041318951e-05, "loss": 0.9884, "step": 2806 }, { "epoch": 1.1410855865013214, "grad_norm": 0.10503536462783813, "learning_rate": 8.597598208833708e-05, "loss": 1.07, "step": 2807 }, { "epoch": 1.1414921732059362, "grad_norm": 0.09846587479114532, "learning_rate": 8.593527376348464e-05, "loss": 0.8614, "step": 2808 }, { "epoch": 1.141898759910551, "grad_norm": 0.10765058547258377, "learning_rate": 8.58945654386322e-05, "loss": 0.8949, "step": 2809 }, { "epoch": 1.1423053466151658, "grad_norm": 0.08815496414899826, "learning_rate": 8.585385711377976e-05, "loss": 0.8567, "step": 2810 }, { "epoch": 1.1427119333197804, "grad_norm": 0.08793221414089203, "learning_rate": 8.581314878892735e-05, "loss": 0.7961, "step": 2811 }, { "epoch": 1.1431185200243952, "grad_norm": 0.10945441573858261, "learning_rate": 8.577244046407491e-05, "loss": 0.9724, "step": 2812 }, { "epoch": 1.14352510672901, "grad_norm": 0.09829845279455185, "learning_rate": 8.573173213922248e-05, "loss": 0.9575, "step": 2813 }, { "epoch": 1.1439316934336248, "grad_norm": 0.10379641503095627, "learning_rate": 8.569102381437004e-05, "loss": 0.9815, "step": 2814 }, { "epoch": 1.1443382801382396, "grad_norm": 0.10781152546405792, "learning_rate": 8.56503154895176e-05, "loss": 1.0267, "step": 2815 }, { "epoch": 1.1447448668428541, "grad_norm": 0.09144961833953857, "learning_rate": 8.560960716466517e-05, "loss": 0.8425, "step": 2816 }, { "epoch": 1.145151453547469, "grad_norm": 0.1059332862496376, "learning_rate": 8.556889883981275e-05, "loss": 0.964, "step": 2817 }, { "epoch": 1.1455580402520837, "grad_norm": 0.09091661870479584, "learning_rate": 8.552819051496032e-05, "loss": 0.89, "step": 2818 }, { "epoch": 1.1459646269566985, "grad_norm": 0.09638272970914841, "learning_rate": 8.548748219010789e-05, "loss": 0.8643, "step": 2819 }, { "epoch": 1.1463712136613133, "grad_norm": 0.09995229542255402, "learning_rate": 8.544677386525544e-05, "loss": 0.9955, "step": 2820 }, { "epoch": 1.146777800365928, "grad_norm": 0.10457552224397659, "learning_rate": 8.540606554040301e-05, "loss": 1.0031, "step": 2821 }, { "epoch": 1.1471843870705427, "grad_norm": 0.10142842680215836, "learning_rate": 8.536535721555058e-05, "loss": 0.9013, "step": 2822 }, { "epoch": 1.1475909737751575, "grad_norm": 0.09488385915756226, "learning_rate": 8.532464889069816e-05, "loss": 0.9549, "step": 2823 }, { "epoch": 1.1479975604797723, "grad_norm": 0.10237988084554672, "learning_rate": 8.528394056584572e-05, "loss": 0.8964, "step": 2824 }, { "epoch": 1.148404147184387, "grad_norm": 0.09889756143093109, "learning_rate": 8.524323224099329e-05, "loss": 0.8978, "step": 2825 }, { "epoch": 1.148810733889002, "grad_norm": 0.10641611367464066, "learning_rate": 8.520252391614085e-05, "loss": 0.9597, "step": 2826 }, { "epoch": 1.1492173205936167, "grad_norm": 0.09953330457210541, "learning_rate": 8.516181559128841e-05, "loss": 0.9997, "step": 2827 }, { "epoch": 1.1496239072982313, "grad_norm": 0.10381393134593964, "learning_rate": 8.5121107266436e-05, "loss": 0.9151, "step": 2828 }, { "epoch": 1.150030494002846, "grad_norm": 0.09743472933769226, "learning_rate": 8.508039894158356e-05, "loss": 0.8761, "step": 2829 }, { "epoch": 1.1504370807074609, "grad_norm": 0.10740388184785843, "learning_rate": 8.503969061673113e-05, "loss": 1.0341, "step": 2830 }, { "epoch": 1.1508436674120757, "grad_norm": 0.10258743166923523, "learning_rate": 8.49989822918787e-05, "loss": 0.9992, "step": 2831 }, { "epoch": 1.1512502541166905, "grad_norm": 0.09499403089284897, "learning_rate": 8.495827396702625e-05, "loss": 0.9685, "step": 2832 }, { "epoch": 1.151656840821305, "grad_norm": 0.09847860038280487, "learning_rate": 8.491756564217382e-05, "loss": 0.8483, "step": 2833 }, { "epoch": 1.1520634275259198, "grad_norm": 0.09773585200309753, "learning_rate": 8.48768573173214e-05, "loss": 0.9926, "step": 2834 }, { "epoch": 1.1524700142305346, "grad_norm": 0.10191180557012558, "learning_rate": 8.483614899246897e-05, "loss": 0.9724, "step": 2835 }, { "epoch": 1.1528766009351494, "grad_norm": 0.0922137051820755, "learning_rate": 8.479544066761654e-05, "loss": 0.9134, "step": 2836 }, { "epoch": 1.1532831876397642, "grad_norm": 0.10144314914941788, "learning_rate": 8.47547323427641e-05, "loss": 0.9085, "step": 2837 }, { "epoch": 1.153689774344379, "grad_norm": 0.1033085286617279, "learning_rate": 8.471402401791166e-05, "loss": 1.0122, "step": 2838 }, { "epoch": 1.1540963610489936, "grad_norm": 0.1011093407869339, "learning_rate": 8.467331569305923e-05, "loss": 0.9573, "step": 2839 }, { "epoch": 1.1545029477536084, "grad_norm": 0.09297510981559753, "learning_rate": 8.463260736820681e-05, "loss": 0.9212, "step": 2840 }, { "epoch": 1.1549095344582232, "grad_norm": 0.09835392981767654, "learning_rate": 8.459189904335437e-05, "loss": 0.9009, "step": 2841 }, { "epoch": 1.155316121162838, "grad_norm": 0.09254229813814163, "learning_rate": 8.455119071850194e-05, "loss": 0.8968, "step": 2842 }, { "epoch": 1.1557227078674528, "grad_norm": 0.09188991039991379, "learning_rate": 8.451048239364951e-05, "loss": 0.8474, "step": 2843 }, { "epoch": 1.1561292945720676, "grad_norm": 0.0945422425866127, "learning_rate": 8.446977406879706e-05, "loss": 0.9277, "step": 2844 }, { "epoch": 1.1565358812766822, "grad_norm": 0.09804350137710571, "learning_rate": 8.442906574394463e-05, "loss": 0.9676, "step": 2845 }, { "epoch": 1.156942467981297, "grad_norm": 0.107129767537117, "learning_rate": 8.438835741909221e-05, "loss": 1.0516, "step": 2846 }, { "epoch": 1.1573490546859118, "grad_norm": 0.08845387399196625, "learning_rate": 8.434764909423978e-05, "loss": 0.8411, "step": 2847 }, { "epoch": 1.1577556413905266, "grad_norm": 0.09986454993486404, "learning_rate": 8.430694076938735e-05, "loss": 0.9952, "step": 2848 }, { "epoch": 1.1581622280951414, "grad_norm": 0.10270238667726517, "learning_rate": 8.426623244453492e-05, "loss": 0.9298, "step": 2849 }, { "epoch": 1.158568814799756, "grad_norm": 0.10141734033823013, "learning_rate": 8.422552411968247e-05, "loss": 0.9347, "step": 2850 }, { "epoch": 1.1589754015043707, "grad_norm": 0.1073596179485321, "learning_rate": 8.418481579483005e-05, "loss": 1.0582, "step": 2851 }, { "epoch": 1.1593819882089855, "grad_norm": 0.1060674786567688, "learning_rate": 8.414410746997762e-05, "loss": 1.1045, "step": 2852 }, { "epoch": 1.1597885749136003, "grad_norm": 0.09997183829545975, "learning_rate": 8.410339914512519e-05, "loss": 1.0096, "step": 2853 }, { "epoch": 1.1601951616182151, "grad_norm": 0.10038676112890244, "learning_rate": 8.406269082027275e-05, "loss": 0.9495, "step": 2854 }, { "epoch": 1.16060174832283, "grad_norm": 0.10116416215896606, "learning_rate": 8.402198249542032e-05, "loss": 0.941, "step": 2855 }, { "epoch": 1.1610083350274447, "grad_norm": 0.10599818825721741, "learning_rate": 8.398127417056788e-05, "loss": 0.977, "step": 2856 }, { "epoch": 1.1614149217320593, "grad_norm": 0.10183148086071014, "learning_rate": 8.394056584571546e-05, "loss": 0.8848, "step": 2857 }, { "epoch": 1.161821508436674, "grad_norm": 0.10016648471355438, "learning_rate": 8.389985752086302e-05, "loss": 0.9616, "step": 2858 }, { "epoch": 1.1622280951412889, "grad_norm": 0.0992264375090599, "learning_rate": 8.385914919601059e-05, "loss": 0.9242, "step": 2859 }, { "epoch": 1.1626346818459037, "grad_norm": 0.09841668605804443, "learning_rate": 8.381844087115816e-05, "loss": 0.8993, "step": 2860 }, { "epoch": 1.1630412685505185, "grad_norm": 0.10682433098554611, "learning_rate": 8.377773254630571e-05, "loss": 1.0783, "step": 2861 }, { "epoch": 1.163447855255133, "grad_norm": 0.10249704122543335, "learning_rate": 8.373702422145328e-05, "loss": 0.9872, "step": 2862 }, { "epoch": 1.1638544419597479, "grad_norm": 0.10155528038740158, "learning_rate": 8.369631589660086e-05, "loss": 0.9609, "step": 2863 }, { "epoch": 1.1642610286643627, "grad_norm": 0.10584763437509537, "learning_rate": 8.365560757174843e-05, "loss": 0.9927, "step": 2864 }, { "epoch": 1.1646676153689774, "grad_norm": 0.0969410091638565, "learning_rate": 8.3614899246896e-05, "loss": 0.9124, "step": 2865 }, { "epoch": 1.1650742020735922, "grad_norm": 0.10030529648065567, "learning_rate": 8.357419092204357e-05, "loss": 0.9888, "step": 2866 }, { "epoch": 1.165480788778207, "grad_norm": 0.11542686820030212, "learning_rate": 8.353348259719112e-05, "loss": 1.0132, "step": 2867 }, { "epoch": 1.1658873754828216, "grad_norm": 0.10081325471401215, "learning_rate": 8.349277427233869e-05, "loss": 0.8826, "step": 2868 }, { "epoch": 1.1662939621874364, "grad_norm": 0.10607606172561646, "learning_rate": 8.345206594748627e-05, "loss": 0.9492, "step": 2869 }, { "epoch": 1.1667005488920512, "grad_norm": 0.10742900520563126, "learning_rate": 8.341135762263384e-05, "loss": 1.0318, "step": 2870 }, { "epoch": 1.167107135596666, "grad_norm": 0.09361705929040909, "learning_rate": 8.33706492977814e-05, "loss": 0.9225, "step": 2871 }, { "epoch": 1.1675137223012808, "grad_norm": 0.09765168279409409, "learning_rate": 8.332994097292897e-05, "loss": 0.9638, "step": 2872 }, { "epoch": 1.1679203090058956, "grad_norm": 0.09763183444738388, "learning_rate": 8.328923264807653e-05, "loss": 0.8912, "step": 2873 }, { "epoch": 1.1683268957105102, "grad_norm": 0.10219339281320572, "learning_rate": 8.32485243232241e-05, "loss": 0.9928, "step": 2874 }, { "epoch": 1.168733482415125, "grad_norm": 0.10122732818126678, "learning_rate": 8.320781599837167e-05, "loss": 0.9395, "step": 2875 }, { "epoch": 1.1691400691197398, "grad_norm": 0.10562714189291, "learning_rate": 8.316710767351924e-05, "loss": 1.0062, "step": 2876 }, { "epoch": 1.1695466558243546, "grad_norm": 0.1061634048819542, "learning_rate": 8.312639934866681e-05, "loss": 0.9781, "step": 2877 }, { "epoch": 1.1699532425289694, "grad_norm": 0.09807330369949341, "learning_rate": 8.308569102381438e-05, "loss": 0.9942, "step": 2878 }, { "epoch": 1.170359829233584, "grad_norm": 0.09426051378250122, "learning_rate": 8.304498269896193e-05, "loss": 0.8829, "step": 2879 }, { "epoch": 1.1707664159381987, "grad_norm": 0.10720623284578323, "learning_rate": 8.300427437410951e-05, "loss": 1.013, "step": 2880 }, { "epoch": 1.1711730026428135, "grad_norm": 0.1007690355181694, "learning_rate": 8.296356604925708e-05, "loss": 0.9141, "step": 2881 }, { "epoch": 1.1715795893474283, "grad_norm": 0.09463895857334137, "learning_rate": 8.292285772440465e-05, "loss": 0.8538, "step": 2882 }, { "epoch": 1.1719861760520431, "grad_norm": 0.10601162165403366, "learning_rate": 8.288214939955222e-05, "loss": 0.9914, "step": 2883 }, { "epoch": 1.172392762756658, "grad_norm": 0.10166117548942566, "learning_rate": 8.284144107469978e-05, "loss": 1.0497, "step": 2884 }, { "epoch": 1.1727993494612727, "grad_norm": 0.09678583592176437, "learning_rate": 8.280073274984734e-05, "loss": 0.877, "step": 2885 }, { "epoch": 1.1732059361658873, "grad_norm": 0.10086601227521896, "learning_rate": 8.276002442499492e-05, "loss": 0.9405, "step": 2886 }, { "epoch": 1.173612522870502, "grad_norm": 0.09687767922878265, "learning_rate": 8.271931610014249e-05, "loss": 0.9439, "step": 2887 }, { "epoch": 1.174019109575117, "grad_norm": 0.10354665666818619, "learning_rate": 8.267860777529005e-05, "loss": 0.9474, "step": 2888 }, { "epoch": 1.1744256962797317, "grad_norm": 0.10761476308107376, "learning_rate": 8.263789945043762e-05, "loss": 0.9987, "step": 2889 }, { "epoch": 1.1748322829843465, "grad_norm": 0.10557498037815094, "learning_rate": 8.259719112558519e-05, "loss": 0.9969, "step": 2890 }, { "epoch": 1.175238869688961, "grad_norm": 0.09625912457704544, "learning_rate": 8.255648280073274e-05, "loss": 0.8682, "step": 2891 }, { "epoch": 1.1756454563935759, "grad_norm": 0.10188374668359756, "learning_rate": 8.251577447588032e-05, "loss": 0.9408, "step": 2892 }, { "epoch": 1.1760520430981907, "grad_norm": 0.10539949685335159, "learning_rate": 8.247506615102789e-05, "loss": 0.9603, "step": 2893 }, { "epoch": 1.1764586298028055, "grad_norm": 0.10070807486772537, "learning_rate": 8.243435782617546e-05, "loss": 0.9721, "step": 2894 }, { "epoch": 1.1768652165074203, "grad_norm": 0.10509887337684631, "learning_rate": 8.239364950132303e-05, "loss": 1.0064, "step": 2895 }, { "epoch": 1.177271803212035, "grad_norm": 0.10489141196012497, "learning_rate": 8.23529411764706e-05, "loss": 0.9902, "step": 2896 }, { "epoch": 1.1776783899166496, "grad_norm": 0.1037009060382843, "learning_rate": 8.231223285161816e-05, "loss": 0.9923, "step": 2897 }, { "epoch": 1.1780849766212644, "grad_norm": 0.10252012312412262, "learning_rate": 8.227152452676573e-05, "loss": 0.9177, "step": 2898 }, { "epoch": 1.1784915633258792, "grad_norm": 0.10643766820430756, "learning_rate": 8.22308162019133e-05, "loss": 0.9861, "step": 2899 }, { "epoch": 1.178898150030494, "grad_norm": 0.10174702107906342, "learning_rate": 8.219010787706087e-05, "loss": 1.0039, "step": 2900 }, { "epoch": 1.1793047367351088, "grad_norm": 0.10257185995578766, "learning_rate": 8.214939955220843e-05, "loss": 1.0292, "step": 2901 }, { "epoch": 1.1797113234397236, "grad_norm": 0.09647761285305023, "learning_rate": 8.2108691227356e-05, "loss": 0.9235, "step": 2902 }, { "epoch": 1.1801179101443382, "grad_norm": 0.09710411727428436, "learning_rate": 8.206798290250357e-05, "loss": 0.8994, "step": 2903 }, { "epoch": 1.180524496848953, "grad_norm": 0.10740290582180023, "learning_rate": 8.202727457765114e-05, "loss": 0.9419, "step": 2904 }, { "epoch": 1.1809310835535678, "grad_norm": 0.10176997631788254, "learning_rate": 8.19865662527987e-05, "loss": 0.9735, "step": 2905 }, { "epoch": 1.1813376702581826, "grad_norm": 0.11002610623836517, "learning_rate": 8.194585792794627e-05, "loss": 1.0246, "step": 2906 }, { "epoch": 1.1817442569627974, "grad_norm": 0.09396279603242874, "learning_rate": 8.190514960309384e-05, "loss": 0.9448, "step": 2907 }, { "epoch": 1.182150843667412, "grad_norm": 0.09984367340803146, "learning_rate": 8.18644412782414e-05, "loss": 0.9305, "step": 2908 }, { "epoch": 1.1825574303720268, "grad_norm": 0.10197685658931732, "learning_rate": 8.182373295338897e-05, "loss": 0.9936, "step": 2909 }, { "epoch": 1.1829640170766416, "grad_norm": 0.10787008702754974, "learning_rate": 8.178302462853654e-05, "loss": 0.9962, "step": 2910 }, { "epoch": 1.1833706037812564, "grad_norm": 0.09014932066202164, "learning_rate": 8.174231630368411e-05, "loss": 0.8241, "step": 2911 }, { "epoch": 1.1837771904858712, "grad_norm": 0.10313025861978531, "learning_rate": 8.170160797883168e-05, "loss": 0.9016, "step": 2912 }, { "epoch": 1.184183777190486, "grad_norm": 0.1036885604262352, "learning_rate": 8.166089965397924e-05, "loss": 0.9661, "step": 2913 }, { "epoch": 1.1845903638951008, "grad_norm": 0.09668964147567749, "learning_rate": 8.16201913291268e-05, "loss": 0.8826, "step": 2914 }, { "epoch": 1.1849969505997153, "grad_norm": 0.09810838848352432, "learning_rate": 8.157948300427438e-05, "loss": 0.8429, "step": 2915 }, { "epoch": 1.1854035373043301, "grad_norm": 0.10050015151500702, "learning_rate": 8.153877467942195e-05, "loss": 0.8683, "step": 2916 }, { "epoch": 1.185810124008945, "grad_norm": 0.10292979329824448, "learning_rate": 8.149806635456951e-05, "loss": 0.9993, "step": 2917 }, { "epoch": 1.1862167107135597, "grad_norm": 0.11106216162443161, "learning_rate": 8.145735802971708e-05, "loss": 1.0484, "step": 2918 }, { "epoch": 1.1866232974181745, "grad_norm": 0.10027094185352325, "learning_rate": 8.141664970486465e-05, "loss": 0.9296, "step": 2919 }, { "epoch": 1.187029884122789, "grad_norm": 0.1021319329738617, "learning_rate": 8.137594138001222e-05, "loss": 0.9502, "step": 2920 }, { "epoch": 1.187436470827404, "grad_norm": 0.09963817149400711, "learning_rate": 8.133523305515979e-05, "loss": 0.9158, "step": 2921 }, { "epoch": 1.1878430575320187, "grad_norm": 0.10387451201677322, "learning_rate": 8.129452473030735e-05, "loss": 0.9993, "step": 2922 }, { "epoch": 1.1882496442366335, "grad_norm": 0.09406285732984543, "learning_rate": 8.125381640545492e-05, "loss": 0.7994, "step": 2923 }, { "epoch": 1.1886562309412483, "grad_norm": 0.10746529698371887, "learning_rate": 8.121310808060249e-05, "loss": 1.0508, "step": 2924 }, { "epoch": 1.1890628176458629, "grad_norm": 0.09646695107221603, "learning_rate": 8.117239975575006e-05, "loss": 0.8983, "step": 2925 }, { "epoch": 1.1894694043504777, "grad_norm": 0.10675112158060074, "learning_rate": 8.113169143089762e-05, "loss": 1.0671, "step": 2926 }, { "epoch": 1.1898759910550925, "grad_norm": 0.10293237864971161, "learning_rate": 8.109098310604519e-05, "loss": 1.0689, "step": 2927 }, { "epoch": 1.1902825777597072, "grad_norm": 0.10142801702022552, "learning_rate": 8.105027478119276e-05, "loss": 1.0164, "step": 2928 }, { "epoch": 1.190689164464322, "grad_norm": 0.10416755080223083, "learning_rate": 8.100956645634033e-05, "loss": 1.0012, "step": 2929 }, { "epoch": 1.1910957511689368, "grad_norm": 0.102670818567276, "learning_rate": 8.09688581314879e-05, "loss": 0.9931, "step": 2930 }, { "epoch": 1.1915023378735516, "grad_norm": 0.09856782853603363, "learning_rate": 8.092814980663546e-05, "loss": 0.9588, "step": 2931 }, { "epoch": 1.1919089245781662, "grad_norm": 0.09374082833528519, "learning_rate": 8.088744148178303e-05, "loss": 0.8243, "step": 2932 }, { "epoch": 1.192315511282781, "grad_norm": 0.10136809945106506, "learning_rate": 8.08467331569306e-05, "loss": 0.907, "step": 2933 }, { "epoch": 1.1927220979873958, "grad_norm": 0.09178245067596436, "learning_rate": 8.080602483207816e-05, "loss": 0.8386, "step": 2934 }, { "epoch": 1.1931286846920106, "grad_norm": 0.10176187753677368, "learning_rate": 8.076531650722573e-05, "loss": 0.991, "step": 2935 }, { "epoch": 1.1935352713966254, "grad_norm": 0.11457332223653793, "learning_rate": 8.07246081823733e-05, "loss": 1.0754, "step": 2936 }, { "epoch": 1.19394185810124, "grad_norm": 0.10390684008598328, "learning_rate": 8.068389985752087e-05, "loss": 0.9519, "step": 2937 }, { "epoch": 1.1943484448058548, "grad_norm": 0.09363167732954025, "learning_rate": 8.064319153266844e-05, "loss": 0.8842, "step": 2938 }, { "epoch": 1.1947550315104696, "grad_norm": 0.09722575545310974, "learning_rate": 8.0602483207816e-05, "loss": 0.9828, "step": 2939 }, { "epoch": 1.1951616182150844, "grad_norm": 1.2541481256484985, "learning_rate": 8.056177488296357e-05, "loss": 1.0317, "step": 2940 }, { "epoch": 1.1955682049196992, "grad_norm": 0.10507947951555252, "learning_rate": 8.052106655811114e-05, "loss": 0.9299, "step": 2941 }, { "epoch": 1.195974791624314, "grad_norm": 0.10633766651153564, "learning_rate": 8.04803582332587e-05, "loss": 0.9335, "step": 2942 }, { "epoch": 1.1963813783289288, "grad_norm": 0.11865809559822083, "learning_rate": 8.043964990840627e-05, "loss": 0.986, "step": 2943 }, { "epoch": 1.1967879650335433, "grad_norm": 0.11408359557390213, "learning_rate": 8.039894158355384e-05, "loss": 0.9278, "step": 2944 }, { "epoch": 1.1971945517381581, "grad_norm": 0.117740198969841, "learning_rate": 8.035823325870141e-05, "loss": 0.9724, "step": 2945 }, { "epoch": 1.197601138442773, "grad_norm": 0.12005554139614105, "learning_rate": 8.031752493384898e-05, "loss": 1.054, "step": 2946 }, { "epoch": 1.1980077251473877, "grad_norm": 0.10749775171279907, "learning_rate": 8.027681660899654e-05, "loss": 0.9919, "step": 2947 }, { "epoch": 1.1984143118520025, "grad_norm": 0.110999695956707, "learning_rate": 8.023610828414411e-05, "loss": 0.9795, "step": 2948 }, { "epoch": 1.198820898556617, "grad_norm": 0.09761643409729004, "learning_rate": 8.019539995929168e-05, "loss": 0.9312, "step": 2949 }, { "epoch": 1.199227485261232, "grad_norm": 0.10558291524648666, "learning_rate": 8.015469163443925e-05, "loss": 0.9006, "step": 2950 }, { "epoch": 1.1996340719658467, "grad_norm": 0.10757201164960861, "learning_rate": 8.011398330958681e-05, "loss": 0.9704, "step": 2951 }, { "epoch": 1.2000406586704615, "grad_norm": 0.11688996106386185, "learning_rate": 8.007327498473438e-05, "loss": 1.0249, "step": 2952 }, { "epoch": 1.2004472453750763, "grad_norm": 0.10010217875242233, "learning_rate": 8.003256665988195e-05, "loss": 0.9378, "step": 2953 }, { "epoch": 1.2008538320796909, "grad_norm": 0.10797873884439468, "learning_rate": 7.999185833502952e-05, "loss": 1.0828, "step": 2954 }, { "epoch": 1.2012604187843057, "grad_norm": 0.09149176627397537, "learning_rate": 7.995115001017708e-05, "loss": 0.871, "step": 2955 }, { "epoch": 1.2016670054889205, "grad_norm": 0.10462988913059235, "learning_rate": 7.991044168532465e-05, "loss": 0.905, "step": 2956 }, { "epoch": 1.2020735921935353, "grad_norm": 0.10012760758399963, "learning_rate": 7.986973336047222e-05, "loss": 0.9237, "step": 2957 }, { "epoch": 1.20248017889815, "grad_norm": 0.0970139279961586, "learning_rate": 7.982902503561979e-05, "loss": 0.8619, "step": 2958 }, { "epoch": 1.2028867656027649, "grad_norm": 0.1061381995677948, "learning_rate": 7.978831671076736e-05, "loss": 0.9103, "step": 2959 }, { "epoch": 1.2032933523073797, "grad_norm": 0.09973873943090439, "learning_rate": 7.974760838591492e-05, "loss": 0.8852, "step": 2960 }, { "epoch": 1.2036999390119942, "grad_norm": 0.11318770796060562, "learning_rate": 7.970690006106249e-05, "loss": 1.1028, "step": 2961 }, { "epoch": 1.204106525716609, "grad_norm": 0.09135531634092331, "learning_rate": 7.966619173621006e-05, "loss": 0.8432, "step": 2962 }, { "epoch": 1.2045131124212238, "grad_norm": 0.1008799597620964, "learning_rate": 7.962548341135763e-05, "loss": 0.9173, "step": 2963 }, { "epoch": 1.2049196991258386, "grad_norm": 0.09507846087217331, "learning_rate": 7.95847750865052e-05, "loss": 0.9164, "step": 2964 }, { "epoch": 1.2053262858304534, "grad_norm": 0.10645583271980286, "learning_rate": 7.954406676165276e-05, "loss": 0.968, "step": 2965 }, { "epoch": 1.205732872535068, "grad_norm": 0.09509435296058655, "learning_rate": 7.950335843680033e-05, "loss": 0.8426, "step": 2966 }, { "epoch": 1.2061394592396828, "grad_norm": 0.09644295275211334, "learning_rate": 7.94626501119479e-05, "loss": 0.9195, "step": 2967 }, { "epoch": 1.2065460459442976, "grad_norm": 0.1063341349363327, "learning_rate": 7.942194178709546e-05, "loss": 0.8805, "step": 2968 }, { "epoch": 1.2069526326489124, "grad_norm": 0.1006791740655899, "learning_rate": 7.938123346224303e-05, "loss": 0.96, "step": 2969 }, { "epoch": 1.2073592193535272, "grad_norm": 0.11306698620319366, "learning_rate": 7.93405251373906e-05, "loss": 1.0238, "step": 2970 }, { "epoch": 1.207765806058142, "grad_norm": 0.10371936857700348, "learning_rate": 7.929981681253817e-05, "loss": 0.9954, "step": 2971 }, { "epoch": 1.2081723927627566, "grad_norm": 0.11341479420661926, "learning_rate": 7.925910848768573e-05, "loss": 1.122, "step": 2972 }, { "epoch": 1.2085789794673714, "grad_norm": 0.09975296258926392, "learning_rate": 7.92184001628333e-05, "loss": 0.8842, "step": 2973 }, { "epoch": 1.2089855661719862, "grad_norm": 0.10060261934995651, "learning_rate": 7.917769183798087e-05, "loss": 0.8922, "step": 2974 }, { "epoch": 1.209392152876601, "grad_norm": 0.10362927615642548, "learning_rate": 7.913698351312844e-05, "loss": 0.9477, "step": 2975 }, { "epoch": 1.2097987395812158, "grad_norm": 0.10995787382125854, "learning_rate": 7.9096275188276e-05, "loss": 0.9736, "step": 2976 }, { "epoch": 1.2102053262858306, "grad_norm": 0.10389982908964157, "learning_rate": 7.905556686342357e-05, "loss": 1.0125, "step": 2977 }, { "epoch": 1.2106119129904451, "grad_norm": 0.10749273002147675, "learning_rate": 7.901485853857114e-05, "loss": 0.973, "step": 2978 }, { "epoch": 1.21101849969506, "grad_norm": 0.10813795030117035, "learning_rate": 7.897415021371871e-05, "loss": 1.0413, "step": 2979 }, { "epoch": 1.2114250863996747, "grad_norm": 0.10621776431798935, "learning_rate": 7.893344188886628e-05, "loss": 0.9746, "step": 2980 }, { "epoch": 1.2118316731042895, "grad_norm": 0.09385337680578232, "learning_rate": 7.889273356401384e-05, "loss": 0.9024, "step": 2981 }, { "epoch": 1.2122382598089043, "grad_norm": 0.10254476219415665, "learning_rate": 7.885202523916141e-05, "loss": 0.956, "step": 2982 }, { "epoch": 1.212644846513519, "grad_norm": 0.11078932881355286, "learning_rate": 7.881131691430898e-05, "loss": 1.0671, "step": 2983 }, { "epoch": 1.2130514332181337, "grad_norm": 0.10841862857341766, "learning_rate": 7.877060858945655e-05, "loss": 0.8946, "step": 2984 }, { "epoch": 1.2134580199227485, "grad_norm": 0.09590809792280197, "learning_rate": 7.872990026460411e-05, "loss": 0.8795, "step": 2985 }, { "epoch": 1.2138646066273633, "grad_norm": 0.10805724561214447, "learning_rate": 7.868919193975168e-05, "loss": 1.096, "step": 2986 }, { "epoch": 1.214271193331978, "grad_norm": 0.10256502032279968, "learning_rate": 7.864848361489925e-05, "loss": 0.9098, "step": 2987 }, { "epoch": 1.2146777800365929, "grad_norm": 0.10332726687192917, "learning_rate": 7.860777529004682e-05, "loss": 0.9683, "step": 2988 }, { "epoch": 1.2150843667412077, "grad_norm": 0.10192207992076874, "learning_rate": 7.85670669651944e-05, "loss": 0.8888, "step": 2989 }, { "epoch": 1.2154909534458223, "grad_norm": 0.10659588873386383, "learning_rate": 7.852635864034195e-05, "loss": 1.0104, "step": 2990 }, { "epoch": 1.215897540150437, "grad_norm": 0.11742359399795532, "learning_rate": 7.848565031548952e-05, "loss": 0.9874, "step": 2991 }, { "epoch": 1.2163041268550518, "grad_norm": 0.10197114944458008, "learning_rate": 7.844494199063709e-05, "loss": 0.9632, "step": 2992 }, { "epoch": 1.2167107135596666, "grad_norm": 0.10864005237817764, "learning_rate": 7.840423366578466e-05, "loss": 1.0323, "step": 2993 }, { "epoch": 1.2171173002642814, "grad_norm": 0.09765638411045074, "learning_rate": 7.836352534093222e-05, "loss": 0.8922, "step": 2994 }, { "epoch": 1.217523886968896, "grad_norm": 0.09913370013237, "learning_rate": 7.832281701607979e-05, "loss": 0.842, "step": 2995 }, { "epoch": 1.2179304736735108, "grad_norm": 0.10157128423452377, "learning_rate": 7.828210869122736e-05, "loss": 0.9314, "step": 2996 }, { "epoch": 1.2183370603781256, "grad_norm": 0.11263057589530945, "learning_rate": 7.824140036637493e-05, "loss": 0.9824, "step": 2997 }, { "epoch": 1.2187436470827404, "grad_norm": 0.1071547195315361, "learning_rate": 7.82006920415225e-05, "loss": 1.0332, "step": 2998 }, { "epoch": 1.2191502337873552, "grad_norm": 0.097862608730793, "learning_rate": 7.815998371667006e-05, "loss": 0.891, "step": 2999 }, { "epoch": 1.21955682049197, "grad_norm": 0.100653737783432, "learning_rate": 7.811927539181763e-05, "loss": 0.8556, "step": 3000 }, { "epoch": 1.2199634071965846, "grad_norm": 0.09933151304721832, "learning_rate": 7.80785670669652e-05, "loss": 0.9655, "step": 3001 }, { "epoch": 1.2203699939011994, "grad_norm": 0.09901740401983261, "learning_rate": 7.803785874211276e-05, "loss": 0.8788, "step": 3002 }, { "epoch": 1.2207765806058142, "grad_norm": 0.10724866390228271, "learning_rate": 7.799715041726033e-05, "loss": 1.0607, "step": 3003 }, { "epoch": 1.221183167310429, "grad_norm": 0.10218902677297592, "learning_rate": 7.79564420924079e-05, "loss": 0.8872, "step": 3004 }, { "epoch": 1.2215897540150438, "grad_norm": 0.10645647346973419, "learning_rate": 7.791573376755547e-05, "loss": 0.9713, "step": 3005 }, { "epoch": 1.2219963407196586, "grad_norm": 0.09906148910522461, "learning_rate": 7.787502544270303e-05, "loss": 0.893, "step": 3006 }, { "epoch": 1.2224029274242731, "grad_norm": 0.10134434700012207, "learning_rate": 7.78343171178506e-05, "loss": 0.9174, "step": 3007 }, { "epoch": 1.222809514128888, "grad_norm": 0.09524626284837723, "learning_rate": 7.779360879299817e-05, "loss": 0.884, "step": 3008 }, { "epoch": 1.2232161008335027, "grad_norm": 0.1112762987613678, "learning_rate": 7.775290046814574e-05, "loss": 0.9625, "step": 3009 }, { "epoch": 1.2236226875381175, "grad_norm": 0.10021709650754929, "learning_rate": 7.77121921432933e-05, "loss": 0.9371, "step": 3010 }, { "epoch": 1.2240292742427323, "grad_norm": 0.09872548282146454, "learning_rate": 7.767148381844087e-05, "loss": 0.9013, "step": 3011 }, { "epoch": 1.224435860947347, "grad_norm": 0.10400618612766266, "learning_rate": 7.763077549358845e-05, "loss": 0.8636, "step": 3012 }, { "epoch": 1.2248424476519617, "grad_norm": 0.09894006699323654, "learning_rate": 7.759006716873601e-05, "loss": 0.8982, "step": 3013 }, { "epoch": 1.2252490343565765, "grad_norm": 0.10343599319458008, "learning_rate": 7.754935884388358e-05, "loss": 0.9316, "step": 3014 }, { "epoch": 1.2256556210611913, "grad_norm": 0.10449540615081787, "learning_rate": 7.750865051903114e-05, "loss": 0.9958, "step": 3015 }, { "epoch": 1.226062207765806, "grad_norm": 0.11554834991693497, "learning_rate": 7.746794219417871e-05, "loss": 1.0841, "step": 3016 }, { "epoch": 1.226468794470421, "grad_norm": 0.0996081531047821, "learning_rate": 7.742723386932628e-05, "loss": 0.8628, "step": 3017 }, { "epoch": 1.2268753811750357, "grad_norm": 0.10145995020866394, "learning_rate": 7.738652554447386e-05, "loss": 0.9285, "step": 3018 }, { "epoch": 1.2272819678796503, "grad_norm": 0.10826444625854492, "learning_rate": 7.734581721962141e-05, "loss": 0.9654, "step": 3019 }, { "epoch": 1.227688554584265, "grad_norm": 0.09943236410617828, "learning_rate": 7.730510889476898e-05, "loss": 0.9524, "step": 3020 }, { "epoch": 1.2280951412888799, "grad_norm": 0.10398366302251816, "learning_rate": 7.726440056991655e-05, "loss": 0.9339, "step": 3021 }, { "epoch": 1.2285017279934947, "grad_norm": 0.10858220607042313, "learning_rate": 7.722369224506412e-05, "loss": 1.0769, "step": 3022 }, { "epoch": 1.2289083146981095, "grad_norm": 0.10792049020528793, "learning_rate": 7.718298392021168e-05, "loss": 1.0769, "step": 3023 }, { "epoch": 1.229314901402724, "grad_norm": 0.10450518876314163, "learning_rate": 7.714227559535927e-05, "loss": 0.9088, "step": 3024 }, { "epoch": 1.2297214881073388, "grad_norm": 0.10447126626968384, "learning_rate": 7.710156727050682e-05, "loss": 0.9865, "step": 3025 }, { "epoch": 1.2301280748119536, "grad_norm": 0.1073504388332367, "learning_rate": 7.706085894565439e-05, "loss": 0.9695, "step": 3026 }, { "epoch": 1.2305346615165684, "grad_norm": 0.09741394966840744, "learning_rate": 7.702015062080195e-05, "loss": 0.9064, "step": 3027 }, { "epoch": 1.2309412482211832, "grad_norm": 0.09577346593141556, "learning_rate": 7.697944229594952e-05, "loss": 0.9028, "step": 3028 }, { "epoch": 1.2313478349257978, "grad_norm": 7.139596939086914, "learning_rate": 7.693873397109709e-05, "loss": 0.9565, "step": 3029 }, { "epoch": 1.2317544216304126, "grad_norm": 0.1124730035662651, "learning_rate": 7.689802564624467e-05, "loss": 1.0396, "step": 3030 }, { "epoch": 1.2321610083350274, "grad_norm": 0.10936611145734787, "learning_rate": 7.685731732139223e-05, "loss": 0.9758, "step": 3031 }, { "epoch": 1.2325675950396422, "grad_norm": 0.09854471683502197, "learning_rate": 7.681660899653979e-05, "loss": 0.9238, "step": 3032 }, { "epoch": 1.232974181744257, "grad_norm": 0.10498196631669998, "learning_rate": 7.677590067168736e-05, "loss": 0.9476, "step": 3033 }, { "epoch": 1.2333807684488718, "grad_norm": 0.10828989744186401, "learning_rate": 7.673519234683493e-05, "loss": 1.1091, "step": 3034 }, { "epoch": 1.2337873551534866, "grad_norm": 0.10605454444885254, "learning_rate": 7.669448402198251e-05, "loss": 0.9726, "step": 3035 }, { "epoch": 1.2341939418581012, "grad_norm": 0.10483945906162262, "learning_rate": 7.665377569713008e-05, "loss": 0.9175, "step": 3036 }, { "epoch": 1.234600528562716, "grad_norm": 0.1095857173204422, "learning_rate": 7.661306737227763e-05, "loss": 0.9373, "step": 3037 }, { "epoch": 1.2350071152673308, "grad_norm": 0.1086532399058342, "learning_rate": 7.65723590474252e-05, "loss": 1.0688, "step": 3038 }, { "epoch": 1.2354137019719456, "grad_norm": 0.1058100163936615, "learning_rate": 7.653165072257277e-05, "loss": 0.9784, "step": 3039 }, { "epoch": 1.2358202886765604, "grad_norm": 0.10250196605920792, "learning_rate": 7.649094239772033e-05, "loss": 0.9483, "step": 3040 }, { "epoch": 1.236226875381175, "grad_norm": 0.10203064978122711, "learning_rate": 7.645023407286792e-05, "loss": 0.9149, "step": 3041 }, { "epoch": 1.2366334620857897, "grad_norm": 0.10342703759670258, "learning_rate": 7.640952574801547e-05, "loss": 1.0001, "step": 3042 }, { "epoch": 1.2370400487904045, "grad_norm": 0.10385413467884064, "learning_rate": 7.636881742316304e-05, "loss": 0.9545, "step": 3043 }, { "epoch": 1.2374466354950193, "grad_norm": 0.11165875196456909, "learning_rate": 7.63281090983106e-05, "loss": 1.0679, "step": 3044 }, { "epoch": 1.2378532221996341, "grad_norm": 0.09251503646373749, "learning_rate": 7.628740077345817e-05, "loss": 0.8941, "step": 3045 }, { "epoch": 1.238259808904249, "grad_norm": 0.1017691120505333, "learning_rate": 7.624669244860574e-05, "loss": 0.9473, "step": 3046 }, { "epoch": 1.2386663956088637, "grad_norm": 0.11081571877002716, "learning_rate": 7.620598412375332e-05, "loss": 0.9368, "step": 3047 }, { "epoch": 1.2390729823134783, "grad_norm": 0.09933064877986908, "learning_rate": 7.616527579890087e-05, "loss": 0.96, "step": 3048 }, { "epoch": 1.239479569018093, "grad_norm": 0.10422008484601974, "learning_rate": 7.612456747404844e-05, "loss": 0.9896, "step": 3049 }, { "epoch": 1.2398861557227079, "grad_norm": 0.10104691237211227, "learning_rate": 7.608385914919601e-05, "loss": 0.9302, "step": 3050 }, { "epoch": 1.2402927424273227, "grad_norm": 0.10157372057437897, "learning_rate": 7.604315082434358e-05, "loss": 0.9781, "step": 3051 }, { "epoch": 1.2406993291319375, "grad_norm": 0.1113799512386322, "learning_rate": 7.600244249949115e-05, "loss": 0.9526, "step": 3052 }, { "epoch": 1.241105915836552, "grad_norm": 0.09875572472810745, "learning_rate": 7.596173417463873e-05, "loss": 0.9739, "step": 3053 }, { "epoch": 1.2415125025411669, "grad_norm": 0.09874456375837326, "learning_rate": 7.592102584978628e-05, "loss": 0.8575, "step": 3054 }, { "epoch": 1.2419190892457816, "grad_norm": 0.10408841073513031, "learning_rate": 7.588031752493385e-05, "loss": 0.9692, "step": 3055 }, { "epoch": 1.2423256759503964, "grad_norm": 0.10415156930685043, "learning_rate": 7.583960920008142e-05, "loss": 0.9416, "step": 3056 }, { "epoch": 1.2427322626550112, "grad_norm": 0.08860078454017639, "learning_rate": 7.579890087522898e-05, "loss": 0.8252, "step": 3057 }, { "epoch": 1.2431388493596258, "grad_norm": 0.10806316137313843, "learning_rate": 7.575819255037656e-05, "loss": 1.0561, "step": 3058 }, { "epoch": 1.2435454360642406, "grad_norm": 0.10151507705450058, "learning_rate": 7.571748422552413e-05, "loss": 1.0323, "step": 3059 }, { "epoch": 1.2439520227688554, "grad_norm": 0.10553670674562454, "learning_rate": 7.567677590067169e-05, "loss": 0.9316, "step": 3060 }, { "epoch": 1.2443586094734702, "grad_norm": 0.10227076709270477, "learning_rate": 7.563606757581925e-05, "loss": 0.9502, "step": 3061 }, { "epoch": 1.244765196178085, "grad_norm": 0.09627656638622284, "learning_rate": 7.559535925096682e-05, "loss": 0.8739, "step": 3062 }, { "epoch": 1.2451717828826998, "grad_norm": 0.10383637249469757, "learning_rate": 7.555465092611439e-05, "loss": 0.9143, "step": 3063 }, { "epoch": 1.2455783695873146, "grad_norm": 0.09451835602521896, "learning_rate": 7.551394260126197e-05, "loss": 0.876, "step": 3064 }, { "epoch": 1.2459849562919292, "grad_norm": 0.09634227305650711, "learning_rate": 7.547323427640954e-05, "loss": 0.9188, "step": 3065 }, { "epoch": 1.246391542996544, "grad_norm": 0.10271312296390533, "learning_rate": 7.543252595155709e-05, "loss": 0.8986, "step": 3066 }, { "epoch": 1.2467981297011588, "grad_norm": 0.10047610104084015, "learning_rate": 7.539181762670466e-05, "loss": 0.9865, "step": 3067 }, { "epoch": 1.2472047164057736, "grad_norm": 0.09453471750020981, "learning_rate": 7.535110930185223e-05, "loss": 0.8758, "step": 3068 }, { "epoch": 1.2476113031103884, "grad_norm": 0.10748513042926788, "learning_rate": 7.53104009769998e-05, "loss": 0.9987, "step": 3069 }, { "epoch": 1.248017889815003, "grad_norm": 0.11174870282411575, "learning_rate": 7.526969265214738e-05, "loss": 0.9974, "step": 3070 }, { "epoch": 1.2484244765196177, "grad_norm": 0.10034792870283127, "learning_rate": 7.522898432729494e-05, "loss": 0.8953, "step": 3071 }, { "epoch": 1.2488310632242325, "grad_norm": 0.10158214718103409, "learning_rate": 7.51882760024425e-05, "loss": 0.9504, "step": 3072 }, { "epoch": 1.2492376499288473, "grad_norm": 0.10856463760137558, "learning_rate": 7.514756767759007e-05, "loss": 0.9936, "step": 3073 }, { "epoch": 1.2496442366334621, "grad_norm": 0.09521564841270447, "learning_rate": 7.510685935273763e-05, "loss": 0.8602, "step": 3074 }, { "epoch": 1.2500508233380767, "grad_norm": 0.1103881299495697, "learning_rate": 7.50661510278852e-05, "loss": 0.949, "step": 3075 }, { "epoch": 1.2504574100426917, "grad_norm": 0.10218459367752075, "learning_rate": 7.502544270303278e-05, "loss": 0.9122, "step": 3076 }, { "epoch": 1.2508639967473063, "grad_norm": 0.1006489172577858, "learning_rate": 7.498473437818035e-05, "loss": 0.9942, "step": 3077 }, { "epoch": 1.251270583451921, "grad_norm": 0.09525283426046371, "learning_rate": 7.49440260533279e-05, "loss": 0.8852, "step": 3078 }, { "epoch": 1.251677170156536, "grad_norm": 0.10537436604499817, "learning_rate": 7.490331772847547e-05, "loss": 1.0135, "step": 3079 }, { "epoch": 1.2520837568611507, "grad_norm": 0.10263707488775253, "learning_rate": 7.486260940362304e-05, "loss": 1.0011, "step": 3080 }, { "epoch": 1.2524903435657655, "grad_norm": 0.10623662173748016, "learning_rate": 7.482190107877062e-05, "loss": 1.0535, "step": 3081 }, { "epoch": 1.25289693027038, "grad_norm": 0.09408336877822876, "learning_rate": 7.478119275391819e-05, "loss": 0.8362, "step": 3082 }, { "epoch": 1.2533035169749949, "grad_norm": 0.0979636088013649, "learning_rate": 7.474048442906576e-05, "loss": 0.9462, "step": 3083 }, { "epoch": 1.2537101036796097, "grad_norm": 0.10067994147539139, "learning_rate": 7.469977610421331e-05, "loss": 0.9655, "step": 3084 }, { "epoch": 1.2541166903842245, "grad_norm": 0.10560835152864456, "learning_rate": 7.465906777936088e-05, "loss": 1.0768, "step": 3085 }, { "epoch": 1.2545232770888393, "grad_norm": 0.09928199648857117, "learning_rate": 7.461835945450845e-05, "loss": 0.93, "step": 3086 }, { "epoch": 1.2549298637934538, "grad_norm": 0.10560108721256256, "learning_rate": 7.457765112965603e-05, "loss": 0.9143, "step": 3087 }, { "epoch": 1.2553364504980686, "grad_norm": 0.10322803258895874, "learning_rate": 7.45369428048036e-05, "loss": 0.9063, "step": 3088 }, { "epoch": 1.2557430372026834, "grad_norm": 0.10367201268672943, "learning_rate": 7.449623447995115e-05, "loss": 0.9393, "step": 3089 }, { "epoch": 1.2561496239072982, "grad_norm": 0.0985729992389679, "learning_rate": 7.445552615509872e-05, "loss": 0.9015, "step": 3090 }, { "epoch": 1.256556210611913, "grad_norm": 0.09679027646780014, "learning_rate": 7.441481783024628e-05, "loss": 0.9666, "step": 3091 }, { "epoch": 1.2569627973165278, "grad_norm": 0.10759008675813675, "learning_rate": 7.437410950539385e-05, "loss": 1.0382, "step": 3092 }, { "epoch": 1.2573693840211426, "grad_norm": 0.10421041399240494, "learning_rate": 7.433340118054143e-05, "loss": 0.9283, "step": 3093 }, { "epoch": 1.2577759707257572, "grad_norm": 0.10084979981184006, "learning_rate": 7.4292692855689e-05, "loss": 0.9368, "step": 3094 }, { "epoch": 1.258182557430372, "grad_norm": 0.09285192936658859, "learning_rate": 7.425198453083655e-05, "loss": 0.8245, "step": 3095 }, { "epoch": 1.2585891441349868, "grad_norm": 0.10228876024484634, "learning_rate": 7.421127620598412e-05, "loss": 0.9542, "step": 3096 }, { "epoch": 1.2589957308396016, "grad_norm": 0.11151353269815445, "learning_rate": 7.417056788113169e-05, "loss": 1.0976, "step": 3097 }, { "epoch": 1.2594023175442164, "grad_norm": 0.10276535898447037, "learning_rate": 7.412985955627926e-05, "loss": 0.9056, "step": 3098 }, { "epoch": 1.259808904248831, "grad_norm": 0.09734170138835907, "learning_rate": 7.408915123142684e-05, "loss": 0.9009, "step": 3099 }, { "epoch": 1.2602154909534458, "grad_norm": 0.09822948276996613, "learning_rate": 7.40484429065744e-05, "loss": 1.0048, "step": 3100 }, { "epoch": 1.2606220776580606, "grad_norm": 0.10522522032260895, "learning_rate": 7.400773458172196e-05, "loss": 1.043, "step": 3101 }, { "epoch": 1.2610286643626754, "grad_norm": 0.10531225055456161, "learning_rate": 7.396702625686953e-05, "loss": 1.0161, "step": 3102 }, { "epoch": 1.2614352510672902, "grad_norm": 0.08887706696987152, "learning_rate": 7.39263179320171e-05, "loss": 0.7876, "step": 3103 }, { "epoch": 1.2618418377719047, "grad_norm": 0.10563154518604279, "learning_rate": 7.388560960716468e-05, "loss": 0.952, "step": 3104 }, { "epoch": 1.2622484244765197, "grad_norm": 0.09857525676488876, "learning_rate": 7.384490128231224e-05, "loss": 1.0119, "step": 3105 }, { "epoch": 1.2626550111811343, "grad_norm": 0.10056712478399277, "learning_rate": 7.380419295745981e-05, "loss": 0.9256, "step": 3106 }, { "epoch": 1.2630615978857491, "grad_norm": 0.11001981049776077, "learning_rate": 7.376348463260737e-05, "loss": 1.0066, "step": 3107 }, { "epoch": 1.263468184590364, "grad_norm": 0.10128811001777649, "learning_rate": 7.372277630775493e-05, "loss": 1.0057, "step": 3108 }, { "epoch": 1.2638747712949787, "grad_norm": 0.08895913511514664, "learning_rate": 7.36820679829025e-05, "loss": 0.8264, "step": 3109 }, { "epoch": 1.2642813579995935, "grad_norm": 0.10075806081295013, "learning_rate": 7.364135965805008e-05, "loss": 0.9723, "step": 3110 }, { "epoch": 1.264687944704208, "grad_norm": 0.1001645028591156, "learning_rate": 7.360065133319765e-05, "loss": 0.9413, "step": 3111 }, { "epoch": 1.2650945314088229, "grad_norm": 0.10511557012796402, "learning_rate": 7.355994300834522e-05, "loss": 0.9366, "step": 3112 }, { "epoch": 1.2655011181134377, "grad_norm": 0.09894498437643051, "learning_rate": 7.351923468349277e-05, "loss": 0.8659, "step": 3113 }, { "epoch": 1.2659077048180525, "grad_norm": 0.10577372461557388, "learning_rate": 7.347852635864034e-05, "loss": 0.9615, "step": 3114 }, { "epoch": 1.2663142915226673, "grad_norm": 0.10764329135417938, "learning_rate": 7.34378180337879e-05, "loss": 1.031, "step": 3115 }, { "epoch": 1.2667208782272819, "grad_norm": 0.09749144315719604, "learning_rate": 7.339710970893549e-05, "loss": 0.895, "step": 3116 }, { "epoch": 1.2671274649318967, "grad_norm": 0.09897952526807785, "learning_rate": 7.335640138408306e-05, "loss": 0.882, "step": 3117 }, { "epoch": 1.2675340516365114, "grad_norm": 0.11318530142307281, "learning_rate": 7.331569305923062e-05, "loss": 1.0453, "step": 3118 }, { "epoch": 1.2679406383411262, "grad_norm": 0.09759360551834106, "learning_rate": 7.327498473437818e-05, "loss": 0.8508, "step": 3119 }, { "epoch": 1.268347225045741, "grad_norm": 0.10094036906957626, "learning_rate": 7.323427640952574e-05, "loss": 1.0193, "step": 3120 }, { "epoch": 1.2687538117503558, "grad_norm": 0.11087031662464142, "learning_rate": 7.319356808467331e-05, "loss": 1.0469, "step": 3121 }, { "epoch": 1.2691603984549706, "grad_norm": 0.10733988881111145, "learning_rate": 7.31528597598209e-05, "loss": 0.9555, "step": 3122 }, { "epoch": 1.2695669851595852, "grad_norm": 0.10932072252035141, "learning_rate": 7.311215143496846e-05, "loss": 0.9682, "step": 3123 }, { "epoch": 1.2699735718642, "grad_norm": 0.09542959183454514, "learning_rate": 7.307144311011603e-05, "loss": 0.9259, "step": 3124 }, { "epoch": 1.2703801585688148, "grad_norm": 0.10774809122085571, "learning_rate": 7.303073478526358e-05, "loss": 0.9209, "step": 3125 }, { "epoch": 1.2707867452734296, "grad_norm": 0.09668859839439392, "learning_rate": 7.299002646041115e-05, "loss": 0.9183, "step": 3126 }, { "epoch": 1.2711933319780444, "grad_norm": 0.10144450515508652, "learning_rate": 7.294931813555873e-05, "loss": 0.8618, "step": 3127 }, { "epoch": 1.271599918682659, "grad_norm": 0.09714296460151672, "learning_rate": 7.29086098107063e-05, "loss": 0.9043, "step": 3128 }, { "epoch": 1.2720065053872738, "grad_norm": 0.10313305258750916, "learning_rate": 7.286790148585387e-05, "loss": 0.9729, "step": 3129 }, { "epoch": 1.2724130920918886, "grad_norm": 0.09867827594280243, "learning_rate": 7.282719316100143e-05, "loss": 0.8913, "step": 3130 }, { "epoch": 1.2728196787965034, "grad_norm": 0.09492600709199905, "learning_rate": 7.278648483614899e-05, "loss": 0.8386, "step": 3131 }, { "epoch": 1.2732262655011182, "grad_norm": 0.10441063344478607, "learning_rate": 7.274577651129656e-05, "loss": 0.946, "step": 3132 }, { "epoch": 1.2736328522057327, "grad_norm": 0.10944189876317978, "learning_rate": 7.270506818644414e-05, "loss": 0.9703, "step": 3133 }, { "epoch": 1.2740394389103478, "grad_norm": 0.09535184502601624, "learning_rate": 7.26643598615917e-05, "loss": 0.8566, "step": 3134 }, { "epoch": 1.2744460256149623, "grad_norm": 0.10626508295536041, "learning_rate": 7.262365153673927e-05, "loss": 0.9717, "step": 3135 }, { "epoch": 1.2748526123195771, "grad_norm": 0.10682171583175659, "learning_rate": 7.258294321188683e-05, "loss": 0.9931, "step": 3136 }, { "epoch": 1.275259199024192, "grad_norm": 0.10641193389892578, "learning_rate": 7.25422348870344e-05, "loss": 0.9644, "step": 3137 }, { "epoch": 1.2756657857288067, "grad_norm": 0.10564321279525757, "learning_rate": 7.250152656218196e-05, "loss": 0.9756, "step": 3138 }, { "epoch": 1.2760723724334215, "grad_norm": 0.10609064251184464, "learning_rate": 7.246081823732954e-05, "loss": 0.9403, "step": 3139 }, { "epoch": 1.276478959138036, "grad_norm": 0.10627634078264236, "learning_rate": 7.242010991247711e-05, "loss": 1.0142, "step": 3140 }, { "epoch": 1.276885545842651, "grad_norm": 0.09416361898183823, "learning_rate": 7.237940158762468e-05, "loss": 0.8662, "step": 3141 }, { "epoch": 1.2772921325472657, "grad_norm": 0.0893898606300354, "learning_rate": 7.233869326277223e-05, "loss": 0.8637, "step": 3142 }, { "epoch": 1.2776987192518805, "grad_norm": 0.09307090193033218, "learning_rate": 7.22979849379198e-05, "loss": 0.8181, "step": 3143 }, { "epoch": 1.2781053059564953, "grad_norm": 0.10833927989006042, "learning_rate": 7.225727661306737e-05, "loss": 1.018, "step": 3144 }, { "epoch": 1.2785118926611099, "grad_norm": 0.10201290994882584, "learning_rate": 7.221656828821495e-05, "loss": 0.975, "step": 3145 }, { "epoch": 1.2789184793657247, "grad_norm": 0.1058439314365387, "learning_rate": 7.217585996336252e-05, "loss": 0.9321, "step": 3146 }, { "epoch": 1.2793250660703395, "grad_norm": 0.10530184209346771, "learning_rate": 7.213515163851008e-05, "loss": 1.0229, "step": 3147 }, { "epoch": 1.2797316527749543, "grad_norm": 0.10850049555301666, "learning_rate": 7.209444331365764e-05, "loss": 0.9531, "step": 3148 }, { "epoch": 1.280138239479569, "grad_norm": 0.09624800086021423, "learning_rate": 7.20537349888052e-05, "loss": 0.8507, "step": 3149 }, { "epoch": 1.2805448261841839, "grad_norm": 0.1027805283665657, "learning_rate": 7.201302666395279e-05, "loss": 0.8931, "step": 3150 }, { "epoch": 1.2809514128887987, "grad_norm": 0.09510186314582825, "learning_rate": 7.197231833910035e-05, "loss": 0.8505, "step": 3151 }, { "epoch": 1.2813579995934132, "grad_norm": 0.09477930516004562, "learning_rate": 7.193161001424792e-05, "loss": 0.8898, "step": 3152 }, { "epoch": 1.281764586298028, "grad_norm": 0.10591868311166763, "learning_rate": 7.189090168939549e-05, "loss": 1.0329, "step": 3153 }, { "epoch": 1.2821711730026428, "grad_norm": 0.10289102792739868, "learning_rate": 7.185019336454304e-05, "loss": 0.9094, "step": 3154 }, { "epoch": 1.2825777597072576, "grad_norm": 0.09908158332109451, "learning_rate": 7.180948503969061e-05, "loss": 0.9129, "step": 3155 }, { "epoch": 1.2829843464118724, "grad_norm": 0.1069243773818016, "learning_rate": 7.176877671483819e-05, "loss": 1.0386, "step": 3156 }, { "epoch": 1.283390933116487, "grad_norm": 0.09986919909715652, "learning_rate": 7.172806838998576e-05, "loss": 0.9309, "step": 3157 }, { "epoch": 1.2837975198211018, "grad_norm": 0.11003892868757248, "learning_rate": 7.168736006513333e-05, "loss": 1.0736, "step": 3158 }, { "epoch": 1.2842041065257166, "grad_norm": 0.09842909872531891, "learning_rate": 7.16466517402809e-05, "loss": 0.8691, "step": 3159 }, { "epoch": 1.2846106932303314, "grad_norm": 0.10040794312953949, "learning_rate": 7.160594341542845e-05, "loss": 1.0088, "step": 3160 }, { "epoch": 1.2850172799349462, "grad_norm": 0.10622259974479675, "learning_rate": 7.156523509057602e-05, "loss": 0.9163, "step": 3161 }, { "epoch": 1.2854238666395608, "grad_norm": 0.10022327303886414, "learning_rate": 7.15245267657236e-05, "loss": 0.8016, "step": 3162 }, { "epoch": 1.2858304533441758, "grad_norm": 0.09203210473060608, "learning_rate": 7.148381844087117e-05, "loss": 0.872, "step": 3163 }, { "epoch": 1.2862370400487904, "grad_norm": 0.10571742057800293, "learning_rate": 7.144311011601873e-05, "loss": 0.9389, "step": 3164 }, { "epoch": 1.2866436267534052, "grad_norm": 0.09963870793581009, "learning_rate": 7.14024017911663e-05, "loss": 0.8696, "step": 3165 }, { "epoch": 1.28705021345802, "grad_norm": 0.11249116063117981, "learning_rate": 7.136169346631386e-05, "loss": 1.0006, "step": 3166 }, { "epoch": 1.2874568001626348, "grad_norm": 0.10685020685195923, "learning_rate": 7.132098514146142e-05, "loss": 0.9818, "step": 3167 }, { "epoch": 1.2878633868672495, "grad_norm": 0.10361088067293167, "learning_rate": 7.1280276816609e-05, "loss": 0.9227, "step": 3168 }, { "epoch": 1.2882699735718641, "grad_norm": 0.103251151740551, "learning_rate": 7.123956849175657e-05, "loss": 0.9239, "step": 3169 }, { "epoch": 1.288676560276479, "grad_norm": 0.10526666045188904, "learning_rate": 7.119886016690414e-05, "loss": 0.9133, "step": 3170 }, { "epoch": 1.2890831469810937, "grad_norm": 0.1041581928730011, "learning_rate": 7.115815184205171e-05, "loss": 1.0211, "step": 3171 }, { "epoch": 1.2894897336857085, "grad_norm": 0.1083633154630661, "learning_rate": 7.111744351719926e-05, "loss": 0.8242, "step": 3172 }, { "epoch": 1.2898963203903233, "grad_norm": 0.10779697448015213, "learning_rate": 7.107673519234684e-05, "loss": 0.995, "step": 3173 }, { "epoch": 1.290302907094938, "grad_norm": 0.11221274733543396, "learning_rate": 7.103602686749441e-05, "loss": 1.0821, "step": 3174 }, { "epoch": 1.2907094937995527, "grad_norm": 0.0996582880616188, "learning_rate": 7.099531854264198e-05, "loss": 0.9197, "step": 3175 }, { "epoch": 1.2911160805041675, "grad_norm": 0.10174833238124847, "learning_rate": 7.095461021778955e-05, "loss": 0.911, "step": 3176 }, { "epoch": 1.2915226672087823, "grad_norm": 0.09850712865591049, "learning_rate": 7.091390189293711e-05, "loss": 0.9195, "step": 3177 }, { "epoch": 1.291929253913397, "grad_norm": 0.1044590100646019, "learning_rate": 7.087319356808467e-05, "loss": 0.9239, "step": 3178 }, { "epoch": 1.2923358406180117, "grad_norm": 0.10324393957853317, "learning_rate": 7.083248524323225e-05, "loss": 0.9766, "step": 3179 }, { "epoch": 1.2927424273226267, "grad_norm": 0.10219740122556686, "learning_rate": 7.079177691837982e-05, "loss": 0.8427, "step": 3180 }, { "epoch": 1.2931490140272413, "grad_norm": 0.09563204646110535, "learning_rate": 7.075106859352738e-05, "loss": 0.8523, "step": 3181 }, { "epoch": 1.293555600731856, "grad_norm": 0.10874439030885696, "learning_rate": 7.071036026867495e-05, "loss": 1.0068, "step": 3182 }, { "epoch": 1.2939621874364708, "grad_norm": 0.10866596549749374, "learning_rate": 7.066965194382252e-05, "loss": 0.97, "step": 3183 }, { "epoch": 1.2943687741410856, "grad_norm": 0.1108192652463913, "learning_rate": 7.062894361897007e-05, "loss": 1.0094, "step": 3184 }, { "epoch": 1.2947753608457004, "grad_norm": 0.10688415169715881, "learning_rate": 7.058823529411765e-05, "loss": 0.9784, "step": 3185 }, { "epoch": 1.295181947550315, "grad_norm": 0.11468330025672913, "learning_rate": 7.054752696926522e-05, "loss": 1.0524, "step": 3186 }, { "epoch": 1.2955885342549298, "grad_norm": 0.09452933818101883, "learning_rate": 7.050681864441279e-05, "loss": 0.84, "step": 3187 }, { "epoch": 1.2959951209595446, "grad_norm": 0.10682807862758636, "learning_rate": 7.046611031956036e-05, "loss": 0.9884, "step": 3188 }, { "epoch": 1.2964017076641594, "grad_norm": 0.09095878899097443, "learning_rate": 7.042540199470791e-05, "loss": 0.8412, "step": 3189 }, { "epoch": 1.2968082943687742, "grad_norm": 0.09463642537593842, "learning_rate": 7.038469366985548e-05, "loss": 0.8523, "step": 3190 }, { "epoch": 1.2972148810733888, "grad_norm": 0.10467414557933807, "learning_rate": 7.034398534500306e-05, "loss": 0.946, "step": 3191 }, { "epoch": 1.2976214677780036, "grad_norm": 0.09827042371034622, "learning_rate": 7.030327702015063e-05, "loss": 0.8918, "step": 3192 }, { "epoch": 1.2980280544826184, "grad_norm": 0.10762523114681244, "learning_rate": 7.02625686952982e-05, "loss": 0.8899, "step": 3193 }, { "epoch": 1.2984346411872332, "grad_norm": 0.10172978043556213, "learning_rate": 7.022186037044576e-05, "loss": 1.0034, "step": 3194 }, { "epoch": 1.298841227891848, "grad_norm": 0.10815691202878952, "learning_rate": 7.018115204559332e-05, "loss": 1.067, "step": 3195 }, { "epoch": 1.2992478145964628, "grad_norm": 0.10719442367553711, "learning_rate": 7.01404437207409e-05, "loss": 1.1058, "step": 3196 }, { "epoch": 1.2996544013010776, "grad_norm": 0.1034155860543251, "learning_rate": 7.009973539588847e-05, "loss": 1.0444, "step": 3197 }, { "epoch": 1.3000609880056921, "grad_norm": 0.10380623489618301, "learning_rate": 7.005902707103603e-05, "loss": 0.9943, "step": 3198 }, { "epoch": 1.300467574710307, "grad_norm": 0.1016862615942955, "learning_rate": 7.00183187461836e-05, "loss": 0.8967, "step": 3199 }, { "epoch": 1.3008741614149217, "grad_norm": 0.0974292978644371, "learning_rate": 6.997761042133117e-05, "loss": 0.9727, "step": 3200 }, { "epoch": 1.3012807481195365, "grad_norm": 0.10616238415241241, "learning_rate": 6.993690209647872e-05, "loss": 0.942, "step": 3201 }, { "epoch": 1.3016873348241513, "grad_norm": 0.10380051285028458, "learning_rate": 6.98961937716263e-05, "loss": 0.919, "step": 3202 }, { "epoch": 1.302093921528766, "grad_norm": 0.10846268385648727, "learning_rate": 6.985548544677387e-05, "loss": 1.0066, "step": 3203 }, { "epoch": 1.3025005082333807, "grad_norm": 0.09922856092453003, "learning_rate": 6.981477712192144e-05, "loss": 0.9161, "step": 3204 }, { "epoch": 1.3029070949379955, "grad_norm": 0.09791149944067001, "learning_rate": 6.977406879706901e-05, "loss": 0.8916, "step": 3205 }, { "epoch": 1.3033136816426103, "grad_norm": 0.10441888123750687, "learning_rate": 6.973336047221657e-05, "loss": 0.8901, "step": 3206 }, { "epoch": 1.303720268347225, "grad_norm": 0.10184387862682343, "learning_rate": 6.969265214736413e-05, "loss": 0.9033, "step": 3207 }, { "epoch": 1.3041268550518397, "grad_norm": 0.10396290570497513, "learning_rate": 6.965194382251171e-05, "loss": 0.9896, "step": 3208 }, { "epoch": 1.3045334417564547, "grad_norm": 0.10278751701116562, "learning_rate": 6.961123549765928e-05, "loss": 0.8561, "step": 3209 }, { "epoch": 1.3049400284610693, "grad_norm": 0.1081685721874237, "learning_rate": 6.957052717280685e-05, "loss": 1.0042, "step": 3210 }, { "epoch": 1.305346615165684, "grad_norm": 0.10850156843662262, "learning_rate": 6.952981884795441e-05, "loss": 0.9461, "step": 3211 }, { "epoch": 1.3057532018702989, "grad_norm": 0.10625606030225754, "learning_rate": 6.948911052310198e-05, "loss": 0.9943, "step": 3212 }, { "epoch": 1.3061597885749137, "grad_norm": 0.1126924604177475, "learning_rate": 6.944840219824953e-05, "loss": 1.1293, "step": 3213 }, { "epoch": 1.3065663752795285, "grad_norm": 0.09927353262901306, "learning_rate": 6.940769387339712e-05, "loss": 0.8866, "step": 3214 }, { "epoch": 1.306972961984143, "grad_norm": 0.11918359994888306, "learning_rate": 6.936698554854468e-05, "loss": 1.0915, "step": 3215 }, { "epoch": 1.3073795486887578, "grad_norm": 0.11078538745641708, "learning_rate": 6.932627722369225e-05, "loss": 1.0182, "step": 3216 }, { "epoch": 1.3077861353933726, "grad_norm": 0.10765037685632706, "learning_rate": 6.928556889883982e-05, "loss": 0.9387, "step": 3217 }, { "epoch": 1.3081927220979874, "grad_norm": 0.09321583807468414, "learning_rate": 6.924486057398739e-05, "loss": 0.8916, "step": 3218 }, { "epoch": 1.3085993088026022, "grad_norm": 0.09504148364067078, "learning_rate": 6.920415224913494e-05, "loss": 0.8669, "step": 3219 }, { "epoch": 1.3090058955072168, "grad_norm": 0.09915818274021149, "learning_rate": 6.916344392428252e-05, "loss": 0.9645, "step": 3220 }, { "epoch": 1.3094124822118316, "grad_norm": 0.10582833737134933, "learning_rate": 6.912273559943009e-05, "loss": 0.9428, "step": 3221 }, { "epoch": 1.3098190689164464, "grad_norm": 0.09936497360467911, "learning_rate": 6.908202727457766e-05, "loss": 0.9818, "step": 3222 }, { "epoch": 1.3102256556210612, "grad_norm": 0.0910920724272728, "learning_rate": 6.904131894972522e-05, "loss": 0.8306, "step": 3223 }, { "epoch": 1.310632242325676, "grad_norm": 0.10046056658029556, "learning_rate": 6.900061062487279e-05, "loss": 0.9159, "step": 3224 }, { "epoch": 1.3110388290302908, "grad_norm": 0.09471949934959412, "learning_rate": 6.895990230002036e-05, "loss": 0.8514, "step": 3225 }, { "epoch": 1.3114454157349056, "grad_norm": 0.09952743351459503, "learning_rate": 6.891919397516793e-05, "loss": 0.8931, "step": 3226 }, { "epoch": 1.3118520024395202, "grad_norm": 0.11039602011442184, "learning_rate": 6.88784856503155e-05, "loss": 1.0626, "step": 3227 }, { "epoch": 1.312258589144135, "grad_norm": 0.09720449149608612, "learning_rate": 6.883777732546306e-05, "loss": 0.8322, "step": 3228 }, { "epoch": 1.3126651758487498, "grad_norm": 0.10088366270065308, "learning_rate": 6.879706900061063e-05, "loss": 0.9092, "step": 3229 }, { "epoch": 1.3130717625533646, "grad_norm": 0.09886251389980316, "learning_rate": 6.87563606757582e-05, "loss": 0.8671, "step": 3230 }, { "epoch": 1.3134783492579793, "grad_norm": 0.09615595638751984, "learning_rate": 6.871565235090577e-05, "loss": 0.8902, "step": 3231 }, { "epoch": 1.313884935962594, "grad_norm": 0.09880536794662476, "learning_rate": 6.867494402605333e-05, "loss": 0.986, "step": 3232 }, { "epoch": 1.3142915226672087, "grad_norm": 0.08802687376737595, "learning_rate": 6.86342357012009e-05, "loss": 0.8119, "step": 3233 }, { "epoch": 1.3146981093718235, "grad_norm": 0.10455913096666336, "learning_rate": 6.859352737634847e-05, "loss": 1.0014, "step": 3234 }, { "epoch": 1.3151046960764383, "grad_norm": 0.10536748915910721, "learning_rate": 6.855281905149604e-05, "loss": 0.9696, "step": 3235 }, { "epoch": 1.3155112827810531, "grad_norm": 0.10749450325965881, "learning_rate": 6.851211072664359e-05, "loss": 1.0153, "step": 3236 }, { "epoch": 1.3159178694856677, "grad_norm": 0.10122597217559814, "learning_rate": 6.847140240179117e-05, "loss": 0.9241, "step": 3237 }, { "epoch": 1.3163244561902827, "grad_norm": 0.09782005101442337, "learning_rate": 6.843069407693874e-05, "loss": 0.8996, "step": 3238 }, { "epoch": 1.3167310428948973, "grad_norm": 0.09286556392908096, "learning_rate": 6.83899857520863e-05, "loss": 0.8474, "step": 3239 }, { "epoch": 1.317137629599512, "grad_norm": 0.10210733860731125, "learning_rate": 6.834927742723387e-05, "loss": 0.9524, "step": 3240 }, { "epoch": 1.3175442163041269, "grad_norm": 0.09640049189329147, "learning_rate": 6.830856910238144e-05, "loss": 0.915, "step": 3241 }, { "epoch": 1.3179508030087417, "grad_norm": 0.11026381701231003, "learning_rate": 6.8267860777529e-05, "loss": 1.0658, "step": 3242 }, { "epoch": 1.3183573897133565, "grad_norm": 0.11060287058353424, "learning_rate": 6.822715245267658e-05, "loss": 0.9972, "step": 3243 }, { "epoch": 1.318763976417971, "grad_norm": 0.09819656610488892, "learning_rate": 6.818644412782414e-05, "loss": 0.8394, "step": 3244 }, { "epoch": 1.3191705631225858, "grad_norm": 0.09501704573631287, "learning_rate": 6.814573580297171e-05, "loss": 0.8501, "step": 3245 }, { "epoch": 1.3195771498272006, "grad_norm": 0.10703945904970169, "learning_rate": 6.810502747811928e-05, "loss": 1.0179, "step": 3246 }, { "epoch": 1.3199837365318154, "grad_norm": 0.09531719982624054, "learning_rate": 6.806431915326685e-05, "loss": 0.8521, "step": 3247 }, { "epoch": 1.3203903232364302, "grad_norm": 0.10462060570716858, "learning_rate": 6.802361082841442e-05, "loss": 0.9666, "step": 3248 }, { "epoch": 1.3207969099410448, "grad_norm": 0.11056574434041977, "learning_rate": 6.798290250356198e-05, "loss": 1.0324, "step": 3249 }, { "epoch": 1.3212034966456596, "grad_norm": 0.10290572047233582, "learning_rate": 6.794219417870955e-05, "loss": 0.9468, "step": 3250 }, { "epoch": 1.3216100833502744, "grad_norm": 0.11361809819936752, "learning_rate": 6.790148585385712e-05, "loss": 1.1379, "step": 3251 }, { "epoch": 1.3220166700548892, "grad_norm": 0.10172957926988602, "learning_rate": 6.786077752900469e-05, "loss": 0.9012, "step": 3252 }, { "epoch": 1.322423256759504, "grad_norm": 0.10785951465368271, "learning_rate": 6.782006920415225e-05, "loss": 1.0223, "step": 3253 }, { "epoch": 1.3228298434641188, "grad_norm": 0.10551370680332184, "learning_rate": 6.777936087929982e-05, "loss": 0.9104, "step": 3254 }, { "epoch": 1.3232364301687336, "grad_norm": 0.10205043107271194, "learning_rate": 6.773865255444739e-05, "loss": 0.9634, "step": 3255 }, { "epoch": 1.3236430168733482, "grad_norm": 0.11159402132034302, "learning_rate": 6.769794422959496e-05, "loss": 0.9731, "step": 3256 }, { "epoch": 1.324049603577963, "grad_norm": 0.10272105038166046, "learning_rate": 6.765723590474252e-05, "loss": 0.8697, "step": 3257 }, { "epoch": 1.3244561902825778, "grad_norm": 0.09885246306657791, "learning_rate": 6.761652757989009e-05, "loss": 0.8714, "step": 3258 }, { "epoch": 1.3248627769871926, "grad_norm": 0.10272397845983505, "learning_rate": 6.757581925503766e-05, "loss": 0.9957, "step": 3259 }, { "epoch": 1.3252693636918074, "grad_norm": 0.10085637867450714, "learning_rate": 6.753511093018523e-05, "loss": 0.8967, "step": 3260 }, { "epoch": 1.325675950396422, "grad_norm": 0.09773235023021698, "learning_rate": 6.74944026053328e-05, "loss": 0.9523, "step": 3261 }, { "epoch": 1.3260825371010367, "grad_norm": 0.09937093406915665, "learning_rate": 6.745369428048036e-05, "loss": 0.9269, "step": 3262 }, { "epoch": 1.3264891238056515, "grad_norm": 0.1010124534368515, "learning_rate": 6.741298595562793e-05, "loss": 0.917, "step": 3263 }, { "epoch": 1.3268957105102663, "grad_norm": 0.09788401424884796, "learning_rate": 6.73722776307755e-05, "loss": 0.9131, "step": 3264 }, { "epoch": 1.3273022972148811, "grad_norm": 0.1079770028591156, "learning_rate": 6.733156930592307e-05, "loss": 0.9804, "step": 3265 }, { "epoch": 1.3277088839194957, "grad_norm": 0.10159478336572647, "learning_rate": 6.729086098107063e-05, "loss": 0.9987, "step": 3266 }, { "epoch": 1.3281154706241107, "grad_norm": 0.10081673413515091, "learning_rate": 6.72501526562182e-05, "loss": 0.9141, "step": 3267 }, { "epoch": 1.3285220573287253, "grad_norm": 0.10763905942440033, "learning_rate": 6.720944433136577e-05, "loss": 0.9234, "step": 3268 }, { "epoch": 1.32892864403334, "grad_norm": 0.10242763161659241, "learning_rate": 6.716873600651334e-05, "loss": 0.8658, "step": 3269 }, { "epoch": 1.329335230737955, "grad_norm": 0.09579820930957794, "learning_rate": 6.71280276816609e-05, "loss": 0.8644, "step": 3270 }, { "epoch": 1.3297418174425697, "grad_norm": 0.10901151597499847, "learning_rate": 6.708731935680847e-05, "loss": 0.9082, "step": 3271 }, { "epoch": 1.3301484041471845, "grad_norm": 0.10376943647861481, "learning_rate": 6.704661103195604e-05, "loss": 1.0499, "step": 3272 }, { "epoch": 1.330554990851799, "grad_norm": 0.09166496247053146, "learning_rate": 6.70059027071036e-05, "loss": 0.8626, "step": 3273 }, { "epoch": 1.3309615775564139, "grad_norm": 0.10346846282482147, "learning_rate": 6.696519438225117e-05, "loss": 1.0357, "step": 3274 }, { "epoch": 1.3313681642610287, "grad_norm": 0.09558013081550598, "learning_rate": 6.692448605739874e-05, "loss": 0.8998, "step": 3275 }, { "epoch": 1.3317747509656435, "grad_norm": 0.10983523726463318, "learning_rate": 6.688377773254631e-05, "loss": 1.0083, "step": 3276 }, { "epoch": 1.3321813376702583, "grad_norm": 0.1021072268486023, "learning_rate": 6.684306940769388e-05, "loss": 0.9392, "step": 3277 }, { "epoch": 1.3325879243748728, "grad_norm": 0.09880812466144562, "learning_rate": 6.680236108284144e-05, "loss": 0.9355, "step": 3278 }, { "epoch": 1.3329945110794876, "grad_norm": 0.1031312495470047, "learning_rate": 6.676165275798901e-05, "loss": 0.9932, "step": 3279 }, { "epoch": 1.3334010977841024, "grad_norm": 0.09794270247220993, "learning_rate": 6.672094443313658e-05, "loss": 0.8731, "step": 3280 }, { "epoch": 1.3338076844887172, "grad_norm": 0.11040783673524857, "learning_rate": 6.668023610828415e-05, "loss": 0.9403, "step": 3281 }, { "epoch": 1.334214271193332, "grad_norm": 0.11623408645391464, "learning_rate": 6.663952778343171e-05, "loss": 1.0711, "step": 3282 }, { "epoch": 1.3346208578979466, "grad_norm": 0.11237382143735886, "learning_rate": 6.659881945857928e-05, "loss": 0.9668, "step": 3283 }, { "epoch": 1.3350274446025616, "grad_norm": 0.10134642571210861, "learning_rate": 6.655811113372685e-05, "loss": 0.9352, "step": 3284 }, { "epoch": 1.3354340313071762, "grad_norm": 0.09573974460363388, "learning_rate": 6.651740280887442e-05, "loss": 0.91, "step": 3285 }, { "epoch": 1.335840618011791, "grad_norm": 0.10902750492095947, "learning_rate": 6.647669448402199e-05, "loss": 1.0084, "step": 3286 }, { "epoch": 1.3362472047164058, "grad_norm": 0.10625172406435013, "learning_rate": 6.643598615916955e-05, "loss": 0.994, "step": 3287 }, { "epoch": 1.3366537914210206, "grad_norm": 0.11479779332876205, "learning_rate": 6.639527783431712e-05, "loss": 1.0517, "step": 3288 }, { "epoch": 1.3370603781256354, "grad_norm": 0.10585431009531021, "learning_rate": 6.635456950946469e-05, "loss": 1.0049, "step": 3289 }, { "epoch": 1.33746696483025, "grad_norm": 0.10156022757291794, "learning_rate": 6.631386118461226e-05, "loss": 0.8845, "step": 3290 }, { "epoch": 1.3378735515348648, "grad_norm": 0.1020706295967102, "learning_rate": 6.627315285975982e-05, "loss": 0.9834, "step": 3291 }, { "epoch": 1.3382801382394796, "grad_norm": 0.09861770272254944, "learning_rate": 6.623244453490739e-05, "loss": 0.9128, "step": 3292 }, { "epoch": 1.3386867249440944, "grad_norm": 0.0913916602730751, "learning_rate": 6.619173621005496e-05, "loss": 0.8242, "step": 3293 }, { "epoch": 1.3390933116487092, "grad_norm": 0.10706761479377747, "learning_rate": 6.615102788520253e-05, "loss": 1.0844, "step": 3294 }, { "epoch": 1.3394998983533237, "grad_norm": 0.09800904244184494, "learning_rate": 6.61103195603501e-05, "loss": 0.881, "step": 3295 }, { "epoch": 1.3399064850579385, "grad_norm": 0.09987018257379532, "learning_rate": 6.606961123549766e-05, "loss": 0.9368, "step": 3296 }, { "epoch": 1.3403130717625533, "grad_norm": 0.10405978560447693, "learning_rate": 6.602890291064523e-05, "loss": 0.9642, "step": 3297 }, { "epoch": 1.3407196584671681, "grad_norm": 0.10836942493915558, "learning_rate": 6.59881945857928e-05, "loss": 0.9626, "step": 3298 }, { "epoch": 1.341126245171783, "grad_norm": 0.11826450377702713, "learning_rate": 6.594748626094036e-05, "loss": 0.9083, "step": 3299 }, { "epoch": 1.3415328318763977, "grad_norm": 0.09229649603366852, "learning_rate": 6.590677793608793e-05, "loss": 0.886, "step": 3300 }, { "epoch": 1.3419394185810125, "grad_norm": 0.10104688256978989, "learning_rate": 6.58660696112355e-05, "loss": 0.8792, "step": 3301 }, { "epoch": 1.342346005285627, "grad_norm": 0.10879054665565491, "learning_rate": 6.582536128638307e-05, "loss": 1.0057, "step": 3302 }, { "epoch": 1.3427525919902419, "grad_norm": 0.10086936503648758, "learning_rate": 6.578465296153064e-05, "loss": 0.9508, "step": 3303 }, { "epoch": 1.3431591786948567, "grad_norm": 0.10607967525720596, "learning_rate": 6.57439446366782e-05, "loss": 0.9406, "step": 3304 }, { "epoch": 1.3435657653994715, "grad_norm": 0.0975409746170044, "learning_rate": 6.570323631182577e-05, "loss": 0.8687, "step": 3305 }, { "epoch": 1.3439723521040863, "grad_norm": 0.09562422335147858, "learning_rate": 6.566252798697334e-05, "loss": 0.7939, "step": 3306 }, { "epoch": 1.3443789388087009, "grad_norm": 0.102279432117939, "learning_rate": 6.56218196621209e-05, "loss": 0.8287, "step": 3307 }, { "epoch": 1.3447855255133156, "grad_norm": 0.11360716074705124, "learning_rate": 6.558111133726847e-05, "loss": 0.923, "step": 3308 }, { "epoch": 1.3451921122179304, "grad_norm": 0.09787465631961823, "learning_rate": 6.554040301241604e-05, "loss": 0.8715, "step": 3309 }, { "epoch": 1.3455986989225452, "grad_norm": 0.1050533726811409, "learning_rate": 6.549969468756361e-05, "loss": 0.9855, "step": 3310 }, { "epoch": 1.34600528562716, "grad_norm": 0.09932462126016617, "learning_rate": 6.545898636271118e-05, "loss": 0.8941, "step": 3311 }, { "epoch": 1.3464118723317746, "grad_norm": 0.10330644994974136, "learning_rate": 6.541827803785874e-05, "loss": 1.0426, "step": 3312 }, { "epoch": 1.3468184590363896, "grad_norm": 0.10034661740064621, "learning_rate": 6.537756971300631e-05, "loss": 0.869, "step": 3313 }, { "epoch": 1.3472250457410042, "grad_norm": 0.10637210309505463, "learning_rate": 6.533686138815388e-05, "loss": 0.9451, "step": 3314 }, { "epoch": 1.347631632445619, "grad_norm": 0.1081581711769104, "learning_rate": 6.529615306330145e-05, "loss": 1.0438, "step": 3315 }, { "epoch": 1.3480382191502338, "grad_norm": 0.10719630867242813, "learning_rate": 6.525544473844901e-05, "loss": 0.903, "step": 3316 }, { "epoch": 1.3484448058548486, "grad_norm": 0.09714970737695694, "learning_rate": 6.521473641359658e-05, "loss": 0.8939, "step": 3317 }, { "epoch": 1.3488513925594634, "grad_norm": 0.10341964662075043, "learning_rate": 6.517402808874415e-05, "loss": 0.9428, "step": 3318 }, { "epoch": 1.349257979264078, "grad_norm": 0.10897208005189896, "learning_rate": 6.513331976389172e-05, "loss": 0.9621, "step": 3319 }, { "epoch": 1.3496645659686928, "grad_norm": 0.09563440829515457, "learning_rate": 6.509261143903929e-05, "loss": 0.9126, "step": 3320 }, { "epoch": 1.3500711526733076, "grad_norm": 0.10758570581674576, "learning_rate": 6.505190311418685e-05, "loss": 1.039, "step": 3321 }, { "epoch": 1.3504777393779224, "grad_norm": 0.10049328953027725, "learning_rate": 6.501119478933442e-05, "loss": 0.8708, "step": 3322 }, { "epoch": 1.3508843260825372, "grad_norm": 0.10646460205316544, "learning_rate": 6.497048646448199e-05, "loss": 1.01, "step": 3323 }, { "epoch": 1.3512909127871517, "grad_norm": 0.10543674230575562, "learning_rate": 6.492977813962956e-05, "loss": 0.9799, "step": 3324 }, { "epoch": 1.3516974994917665, "grad_norm": 0.10643398016691208, "learning_rate": 6.488906981477712e-05, "loss": 0.9281, "step": 3325 }, { "epoch": 1.3521040861963813, "grad_norm": 0.10901035368442535, "learning_rate": 6.484836148992469e-05, "loss": 1.0218, "step": 3326 }, { "epoch": 1.3525106729009961, "grad_norm": 0.12187345325946808, "learning_rate": 6.480765316507226e-05, "loss": 1.0313, "step": 3327 }, { "epoch": 1.352917259605611, "grad_norm": 0.10931206494569778, "learning_rate": 6.476694484021983e-05, "loss": 1.0675, "step": 3328 }, { "epoch": 1.3533238463102257, "grad_norm": 0.10836771130561829, "learning_rate": 6.47262365153674e-05, "loss": 1.0236, "step": 3329 }, { "epoch": 1.3537304330148405, "grad_norm": 0.10628294199705124, "learning_rate": 6.468552819051496e-05, "loss": 0.961, "step": 3330 }, { "epoch": 1.354137019719455, "grad_norm": 0.10702615231275558, "learning_rate": 6.464481986566253e-05, "loss": 0.9972, "step": 3331 }, { "epoch": 1.35454360642407, "grad_norm": 0.10359417647123337, "learning_rate": 6.46041115408101e-05, "loss": 0.9665, "step": 3332 }, { "epoch": 1.3549501931286847, "grad_norm": 0.10689602792263031, "learning_rate": 6.456340321595766e-05, "loss": 0.9761, "step": 3333 }, { "epoch": 1.3553567798332995, "grad_norm": 0.10960511118173599, "learning_rate": 6.452269489110523e-05, "loss": 0.9182, "step": 3334 }, { "epoch": 1.3557633665379143, "grad_norm": 0.1095903292298317, "learning_rate": 6.44819865662528e-05, "loss": 1.0502, "step": 3335 }, { "epoch": 1.3561699532425289, "grad_norm": 0.11125817894935608, "learning_rate": 6.444127824140037e-05, "loss": 0.9591, "step": 3336 }, { "epoch": 1.3565765399471437, "grad_norm": 0.09888199716806412, "learning_rate": 6.440056991654793e-05, "loss": 0.9291, "step": 3337 }, { "epoch": 1.3569831266517585, "grad_norm": 0.10406041145324707, "learning_rate": 6.43598615916955e-05, "loss": 0.8849, "step": 3338 }, { "epoch": 1.3573897133563733, "grad_norm": 0.10289091616868973, "learning_rate": 6.431915326684307e-05, "loss": 0.9114, "step": 3339 }, { "epoch": 1.357796300060988, "grad_norm": 0.10479141771793365, "learning_rate": 6.427844494199065e-05, "loss": 0.9903, "step": 3340 }, { "epoch": 1.3582028867656026, "grad_norm": 0.1062149703502655, "learning_rate": 6.42377366171382e-05, "loss": 0.9887, "step": 3341 }, { "epoch": 1.3586094734702177, "grad_norm": 0.10779149830341339, "learning_rate": 6.419702829228577e-05, "loss": 0.9764, "step": 3342 }, { "epoch": 1.3590160601748322, "grad_norm": 0.10290367156267166, "learning_rate": 6.415631996743334e-05, "loss": 0.9468, "step": 3343 }, { "epoch": 1.359422646879447, "grad_norm": 0.10836590826511383, "learning_rate": 6.411561164258091e-05, "loss": 0.9258, "step": 3344 }, { "epoch": 1.3598292335840618, "grad_norm": 0.10067134350538254, "learning_rate": 6.407490331772848e-05, "loss": 0.9739, "step": 3345 }, { "epoch": 1.3602358202886766, "grad_norm": 0.1004357561469078, "learning_rate": 6.403419499287606e-05, "loss": 0.8851, "step": 3346 }, { "epoch": 1.3606424069932914, "grad_norm": 0.10546833276748657, "learning_rate": 6.399348666802361e-05, "loss": 0.9733, "step": 3347 }, { "epoch": 1.361048993697906, "grad_norm": 0.10163208842277527, "learning_rate": 6.395277834317118e-05, "loss": 0.9647, "step": 3348 }, { "epoch": 1.3614555804025208, "grad_norm": 0.09670364856719971, "learning_rate": 6.391207001831875e-05, "loss": 0.812, "step": 3349 }, { "epoch": 1.3618621671071356, "grad_norm": 0.10565786063671112, "learning_rate": 6.387136169346631e-05, "loss": 0.9251, "step": 3350 }, { "epoch": 1.3622687538117504, "grad_norm": 0.10906309634447098, "learning_rate": 6.383065336861388e-05, "loss": 0.9706, "step": 3351 }, { "epoch": 1.3626753405163652, "grad_norm": 0.10507390648126602, "learning_rate": 6.378994504376146e-05, "loss": 0.9618, "step": 3352 }, { "epoch": 1.3630819272209798, "grad_norm": 0.1075945496559143, "learning_rate": 6.374923671890902e-05, "loss": 1.0055, "step": 3353 }, { "epoch": 1.3634885139255946, "grad_norm": 0.10433916747570038, "learning_rate": 6.370852839405658e-05, "loss": 1.0028, "step": 3354 }, { "epoch": 1.3638951006302094, "grad_norm": 0.09030991047620773, "learning_rate": 6.366782006920415e-05, "loss": 0.8476, "step": 3355 }, { "epoch": 1.3643016873348242, "grad_norm": 0.09600205719470978, "learning_rate": 6.362711174435172e-05, "loss": 0.8717, "step": 3356 }, { "epoch": 1.364708274039439, "grad_norm": 0.10889092087745667, "learning_rate": 6.358640341949929e-05, "loss": 0.9637, "step": 3357 }, { "epoch": 1.3651148607440537, "grad_norm": 0.09938116371631622, "learning_rate": 6.354569509464687e-05, "loss": 0.896, "step": 3358 }, { "epoch": 1.3655214474486685, "grad_norm": 0.10416344553232193, "learning_rate": 6.350498676979442e-05, "loss": 0.9297, "step": 3359 }, { "epoch": 1.3659280341532831, "grad_norm": 0.11025689542293549, "learning_rate": 6.346427844494199e-05, "loss": 1.0943, "step": 3360 }, { "epoch": 1.366334620857898, "grad_norm": 0.10861194133758545, "learning_rate": 6.342357012008956e-05, "loss": 1.0826, "step": 3361 }, { "epoch": 1.3667412075625127, "grad_norm": 0.09897050261497498, "learning_rate": 6.338286179523713e-05, "loss": 0.8662, "step": 3362 }, { "epoch": 1.3671477942671275, "grad_norm": 0.1111622229218483, "learning_rate": 6.334215347038471e-05, "loss": 1.059, "step": 3363 }, { "epoch": 1.3675543809717423, "grad_norm": 0.10954444110393524, "learning_rate": 6.330144514553226e-05, "loss": 1.0297, "step": 3364 }, { "epoch": 1.3679609676763569, "grad_norm": 0.09817023575305939, "learning_rate": 6.326073682067983e-05, "loss": 1.0391, "step": 3365 }, { "epoch": 1.3683675543809717, "grad_norm": 0.10183369368314743, "learning_rate": 6.32200284958274e-05, "loss": 0.8658, "step": 3366 }, { "epoch": 1.3687741410855865, "grad_norm": 0.1102672666311264, "learning_rate": 6.317932017097496e-05, "loss": 1.0753, "step": 3367 }, { "epoch": 1.3691807277902013, "grad_norm": 0.10442694276571274, "learning_rate": 6.313861184612253e-05, "loss": 0.979, "step": 3368 }, { "epoch": 1.369587314494816, "grad_norm": 0.10101038962602615, "learning_rate": 6.309790352127011e-05, "loss": 0.9198, "step": 3369 }, { "epoch": 1.3699939011994307, "grad_norm": 0.10193105787038803, "learning_rate": 6.305719519641767e-05, "loss": 0.934, "step": 3370 }, { "epoch": 1.3704004879040457, "grad_norm": 0.10245472937822342, "learning_rate": 6.301648687156523e-05, "loss": 0.9839, "step": 3371 }, { "epoch": 1.3708070746086602, "grad_norm": 58.737693786621094, "learning_rate": 6.29757785467128e-05, "loss": 0.9931, "step": 3372 }, { "epoch": 1.371213661313275, "grad_norm": 0.11053670197725296, "learning_rate": 6.293507022186037e-05, "loss": 0.9689, "step": 3373 }, { "epoch": 1.3716202480178898, "grad_norm": 0.10878605395555496, "learning_rate": 6.289436189700794e-05, "loss": 0.988, "step": 3374 }, { "epoch": 1.3720268347225046, "grad_norm": 0.11353089660406113, "learning_rate": 6.285365357215552e-05, "loss": 1.0371, "step": 3375 }, { "epoch": 1.3724334214271194, "grad_norm": 0.10434375703334808, "learning_rate": 6.281294524730307e-05, "loss": 0.9719, "step": 3376 }, { "epoch": 1.372840008131734, "grad_norm": 0.11517924815416336, "learning_rate": 6.277223692245064e-05, "loss": 0.97, "step": 3377 }, { "epoch": 1.3732465948363488, "grad_norm": 0.09761136770248413, "learning_rate": 6.273152859759821e-05, "loss": 0.9268, "step": 3378 }, { "epoch": 1.3736531815409636, "grad_norm": 0.11084869503974915, "learning_rate": 6.269082027274578e-05, "loss": 1.0954, "step": 3379 }, { "epoch": 1.3740597682455784, "grad_norm": 0.11147692054510117, "learning_rate": 6.265011194789334e-05, "loss": 0.9841, "step": 3380 }, { "epoch": 1.3744663549501932, "grad_norm": 0.09878647327423096, "learning_rate": 6.260940362304092e-05, "loss": 0.9229, "step": 3381 }, { "epoch": 1.3748729416548078, "grad_norm": 0.11191148310899734, "learning_rate": 6.256869529818848e-05, "loss": 1.0527, "step": 3382 }, { "epoch": 1.3752795283594226, "grad_norm": 0.09895115345716476, "learning_rate": 6.252798697333605e-05, "loss": 0.8546, "step": 3383 }, { "epoch": 1.3756861150640374, "grad_norm": 0.10903530567884445, "learning_rate": 6.248727864848361e-05, "loss": 0.9679, "step": 3384 }, { "epoch": 1.3760927017686522, "grad_norm": 0.1128024086356163, "learning_rate": 6.244657032363118e-05, "loss": 0.9939, "step": 3385 }, { "epoch": 1.376499288473267, "grad_norm": 0.10549446940422058, "learning_rate": 6.240586199877876e-05, "loss": 0.9451, "step": 3386 }, { "epoch": 1.3769058751778818, "grad_norm": 0.10267242044210434, "learning_rate": 6.236515367392633e-05, "loss": 0.9846, "step": 3387 }, { "epoch": 1.3773124618824966, "grad_norm": 0.09926862269639969, "learning_rate": 6.232444534907388e-05, "loss": 0.8743, "step": 3388 }, { "epoch": 1.3777190485871111, "grad_norm": 0.09842429310083389, "learning_rate": 6.228373702422145e-05, "loss": 0.8384, "step": 3389 }, { "epoch": 1.378125635291726, "grad_norm": 0.10774646699428558, "learning_rate": 6.224302869936902e-05, "loss": 1.0182, "step": 3390 }, { "epoch": 1.3785322219963407, "grad_norm": 0.10634428262710571, "learning_rate": 6.220232037451659e-05, "loss": 0.9824, "step": 3391 }, { "epoch": 1.3789388087009555, "grad_norm": 0.1014680489897728, "learning_rate": 6.216161204966417e-05, "loss": 0.8391, "step": 3392 }, { "epoch": 1.3793453954055703, "grad_norm": 0.1142488569021225, "learning_rate": 6.212090372481174e-05, "loss": 0.9874, "step": 3393 }, { "epoch": 1.379751982110185, "grad_norm": 0.11224708706140518, "learning_rate": 6.208019539995929e-05, "loss": 1.1148, "step": 3394 }, { "epoch": 1.3801585688147997, "grad_norm": 0.0988827645778656, "learning_rate": 6.203948707510686e-05, "loss": 0.9135, "step": 3395 }, { "epoch": 1.3805651555194145, "grad_norm": 0.10036725550889969, "learning_rate": 6.199877875025443e-05, "loss": 0.9165, "step": 3396 }, { "epoch": 1.3809717422240293, "grad_norm": 0.09397398680448532, "learning_rate": 6.195807042540199e-05, "loss": 0.8081, "step": 3397 }, { "epoch": 1.381378328928644, "grad_norm": 0.10657388716936111, "learning_rate": 6.191736210054957e-05, "loss": 0.9866, "step": 3398 }, { "epoch": 1.3817849156332587, "grad_norm": 0.11075644195079803, "learning_rate": 6.187665377569714e-05, "loss": 0.9838, "step": 3399 }, { "epoch": 1.3821915023378737, "grad_norm": 0.10680566728115082, "learning_rate": 6.18359454508447e-05, "loss": 0.9901, "step": 3400 }, { "epoch": 1.3825980890424883, "grad_norm": 0.11256230622529984, "learning_rate": 6.179523712599226e-05, "loss": 0.954, "step": 3401 }, { "epoch": 1.383004675747103, "grad_norm": 0.10656154155731201, "learning_rate": 6.175452880113983e-05, "loss": 0.9413, "step": 3402 }, { "epoch": 1.3834112624517179, "grad_norm": 0.10036874562501907, "learning_rate": 6.17138204762874e-05, "loss": 0.8834, "step": 3403 }, { "epoch": 1.3838178491563327, "grad_norm": 0.1091538667678833, "learning_rate": 6.167311215143498e-05, "loss": 0.9906, "step": 3404 }, { "epoch": 1.3842244358609475, "grad_norm": 0.09833438694477081, "learning_rate": 6.163240382658255e-05, "loss": 0.9316, "step": 3405 }, { "epoch": 1.384631022565562, "grad_norm": 0.11654241383075714, "learning_rate": 6.15916955017301e-05, "loss": 1.0712, "step": 3406 }, { "epoch": 1.3850376092701768, "grad_norm": 0.10447299480438232, "learning_rate": 6.155098717687767e-05, "loss": 0.9511, "step": 3407 }, { "epoch": 1.3854441959747916, "grad_norm": 0.1039959117770195, "learning_rate": 6.151027885202524e-05, "loss": 0.953, "step": 3408 }, { "epoch": 1.3858507826794064, "grad_norm": 0.10340915620326996, "learning_rate": 6.146957052717282e-05, "loss": 0.9893, "step": 3409 }, { "epoch": 1.3862573693840212, "grad_norm": 0.10685911029577255, "learning_rate": 6.142886220232039e-05, "loss": 0.8551, "step": 3410 }, { "epoch": 1.3866639560886358, "grad_norm": 0.10175175964832306, "learning_rate": 6.138815387746795e-05, "loss": 0.9222, "step": 3411 }, { "epoch": 1.3870705427932506, "grad_norm": 0.10555768758058548, "learning_rate": 6.134744555261551e-05, "loss": 0.9464, "step": 3412 }, { "epoch": 1.3874771294978654, "grad_norm": 0.10031068325042725, "learning_rate": 6.130673722776308e-05, "loss": 0.9038, "step": 3413 }, { "epoch": 1.3878837162024802, "grad_norm": 0.10450392216444016, "learning_rate": 6.126602890291064e-05, "loss": 0.9718, "step": 3414 }, { "epoch": 1.388290302907095, "grad_norm": 0.1052430272102356, "learning_rate": 6.122532057805822e-05, "loss": 0.9401, "step": 3415 }, { "epoch": 1.3886968896117096, "grad_norm": 0.11526583880186081, "learning_rate": 6.118461225320579e-05, "loss": 1.0543, "step": 3416 }, { "epoch": 1.3891034763163246, "grad_norm": 0.10712984949350357, "learning_rate": 6.114390392835335e-05, "loss": 1.0006, "step": 3417 }, { "epoch": 1.3895100630209392, "grad_norm": 0.10705496370792389, "learning_rate": 6.110319560350091e-05, "loss": 1.0115, "step": 3418 }, { "epoch": 1.389916649725554, "grad_norm": 0.10251638293266296, "learning_rate": 6.106248727864848e-05, "loss": 0.9905, "step": 3419 }, { "epoch": 1.3903232364301688, "grad_norm": 0.10566147416830063, "learning_rate": 6.102177895379605e-05, "loss": 0.9758, "step": 3420 }, { "epoch": 1.3907298231347835, "grad_norm": 0.09883508831262589, "learning_rate": 6.098107062894362e-05, "loss": 0.8685, "step": 3421 }, { "epoch": 1.3911364098393983, "grad_norm": 0.10582920163869858, "learning_rate": 6.094036230409119e-05, "loss": 0.9492, "step": 3422 }, { "epoch": 1.391542996544013, "grad_norm": 0.0955805778503418, "learning_rate": 6.089965397923876e-05, "loss": 0.8709, "step": 3423 }, { "epoch": 1.3919495832486277, "grad_norm": 0.10453370958566666, "learning_rate": 6.0858945654386326e-05, "loss": 0.9211, "step": 3424 }, { "epoch": 1.3923561699532425, "grad_norm": 0.09814976155757904, "learning_rate": 6.081823732953389e-05, "loss": 0.872, "step": 3425 }, { "epoch": 1.3927627566578573, "grad_norm": 0.11669237911701202, "learning_rate": 6.0777529004681454e-05, "loss": 1.0086, "step": 3426 }, { "epoch": 1.3931693433624721, "grad_norm": 0.10733123868703842, "learning_rate": 6.073682067982903e-05, "loss": 0.9742, "step": 3427 }, { "epoch": 1.3935759300670867, "grad_norm": 0.10307322442531586, "learning_rate": 6.0696112354976596e-05, "loss": 0.9764, "step": 3428 }, { "epoch": 1.3939825167717015, "grad_norm": 0.09604302793741226, "learning_rate": 6.0655404030124164e-05, "loss": 0.8573, "step": 3429 }, { "epoch": 1.3943891034763163, "grad_norm": 0.10685808956623077, "learning_rate": 6.061469570527173e-05, "loss": 0.9497, "step": 3430 }, { "epoch": 1.394795690180931, "grad_norm": 0.10362914949655533, "learning_rate": 6.057398738041929e-05, "loss": 0.947, "step": 3431 }, { "epoch": 1.3952022768855459, "grad_norm": 0.10438563674688339, "learning_rate": 6.0533279055566874e-05, "loss": 0.9665, "step": 3432 }, { "epoch": 1.3956088635901607, "grad_norm": 0.11194411665201187, "learning_rate": 6.0492570730714435e-05, "loss": 0.9862, "step": 3433 }, { "epoch": 1.3960154502947755, "grad_norm": 0.11035612970590591, "learning_rate": 6.0451862405862e-05, "loss": 0.9924, "step": 3434 }, { "epoch": 1.39642203699939, "grad_norm": 0.10321981459856033, "learning_rate": 6.041115408100957e-05, "loss": 0.997, "step": 3435 }, { "epoch": 1.3968286237040048, "grad_norm": 0.10283782333135605, "learning_rate": 6.037044575615714e-05, "loss": 1.001, "step": 3436 }, { "epoch": 1.3972352104086196, "grad_norm": 0.10645655542612076, "learning_rate": 6.03297374313047e-05, "loss": 0.9283, "step": 3437 }, { "epoch": 1.3976417971132344, "grad_norm": 0.11539904773235321, "learning_rate": 6.028902910645228e-05, "loss": 1.0165, "step": 3438 }, { "epoch": 1.3980483838178492, "grad_norm": 0.1119072213768959, "learning_rate": 6.024832078159984e-05, "loss": 1.0619, "step": 3439 }, { "epoch": 1.3984549705224638, "grad_norm": 0.10923818498849869, "learning_rate": 6.020761245674741e-05, "loss": 1.0457, "step": 3440 }, { "epoch": 1.3988615572270786, "grad_norm": 0.09986556321382523, "learning_rate": 6.0166904131894976e-05, "loss": 0.9656, "step": 3441 }, { "epoch": 1.3992681439316934, "grad_norm": 0.10424059629440308, "learning_rate": 6.012619580704254e-05, "loss": 0.9985, "step": 3442 }, { "epoch": 1.3996747306363082, "grad_norm": 0.09739932417869568, "learning_rate": 6.0085487482190104e-05, "loss": 0.8726, "step": 3443 }, { "epoch": 1.400081317340923, "grad_norm": 0.10547023266553879, "learning_rate": 6.0044779157337685e-05, "loss": 1.0137, "step": 3444 }, { "epoch": 1.4004879040455376, "grad_norm": 0.09859997779130936, "learning_rate": 6.0004070832485246e-05, "loss": 0.9356, "step": 3445 }, { "epoch": 1.4008944907501526, "grad_norm": 0.10799646377563477, "learning_rate": 5.9963362507632814e-05, "loss": 0.9075, "step": 3446 }, { "epoch": 1.4013010774547672, "grad_norm": 0.10137838125228882, "learning_rate": 5.992265418278038e-05, "loss": 0.9959, "step": 3447 }, { "epoch": 1.401707664159382, "grad_norm": 0.10512836277484894, "learning_rate": 5.988194585792795e-05, "loss": 0.9675, "step": 3448 }, { "epoch": 1.4021142508639968, "grad_norm": 0.1019698828458786, "learning_rate": 5.984123753307551e-05, "loss": 0.8817, "step": 3449 }, { "epoch": 1.4025208375686116, "grad_norm": 0.10622533410787582, "learning_rate": 5.980052920822309e-05, "loss": 0.9868, "step": 3450 }, { "epoch": 1.4029274242732264, "grad_norm": 0.1112954169511795, "learning_rate": 5.975982088337065e-05, "loss": 1.0354, "step": 3451 }, { "epoch": 1.403334010977841, "grad_norm": 0.09042443335056305, "learning_rate": 5.971911255851822e-05, "loss": 0.819, "step": 3452 }, { "epoch": 1.4037405976824557, "grad_norm": 0.10594133287668228, "learning_rate": 5.967840423366579e-05, "loss": 0.9371, "step": 3453 }, { "epoch": 1.4041471843870705, "grad_norm": 0.10706698149442673, "learning_rate": 5.963769590881335e-05, "loss": 1.0281, "step": 3454 }, { "epoch": 1.4045537710916853, "grad_norm": 0.09997668862342834, "learning_rate": 5.959698758396093e-05, "loss": 0.9686, "step": 3455 }, { "epoch": 1.4049603577963001, "grad_norm": 0.11369183659553528, "learning_rate": 5.95562792591085e-05, "loss": 1.0679, "step": 3456 }, { "epoch": 1.4053669445009147, "grad_norm": 0.0964435562491417, "learning_rate": 5.951557093425606e-05, "loss": 0.8741, "step": 3457 }, { "epoch": 1.4057735312055295, "grad_norm": 0.10728949308395386, "learning_rate": 5.9474862609403625e-05, "loss": 0.9704, "step": 3458 }, { "epoch": 1.4061801179101443, "grad_norm": 0.0988539531826973, "learning_rate": 5.943415428455119e-05, "loss": 0.8462, "step": 3459 }, { "epoch": 1.406586704614759, "grad_norm": 0.09907898306846619, "learning_rate": 5.9393445959698754e-05, "loss": 0.9082, "step": 3460 }, { "epoch": 1.406993291319374, "grad_norm": 0.10469436645507812, "learning_rate": 5.9352737634846335e-05, "loss": 0.9228, "step": 3461 }, { "epoch": 1.4073998780239887, "grad_norm": 0.09769507497549057, "learning_rate": 5.93120293099939e-05, "loss": 0.8791, "step": 3462 }, { "epoch": 1.4078064647286035, "grad_norm": 0.10890262573957443, "learning_rate": 5.9271320985141464e-05, "loss": 0.984, "step": 3463 }, { "epoch": 1.408213051433218, "grad_norm": 0.10154248028993607, "learning_rate": 5.923061266028903e-05, "loss": 0.9842, "step": 3464 }, { "epoch": 1.4086196381378329, "grad_norm": 0.10687004029750824, "learning_rate": 5.91899043354366e-05, "loss": 0.9201, "step": 3465 }, { "epoch": 1.4090262248424477, "grad_norm": 0.10408183187246323, "learning_rate": 5.914919601058416e-05, "loss": 0.866, "step": 3466 }, { "epoch": 1.4094328115470625, "grad_norm": 0.11365069448947906, "learning_rate": 5.910848768573174e-05, "loss": 1.0448, "step": 3467 }, { "epoch": 1.4098393982516773, "grad_norm": 0.10718761384487152, "learning_rate": 5.90677793608793e-05, "loss": 0.9858, "step": 3468 }, { "epoch": 1.4102459849562918, "grad_norm": 0.11558745801448822, "learning_rate": 5.902707103602687e-05, "loss": 0.9746, "step": 3469 }, { "epoch": 1.4106525716609066, "grad_norm": 0.10393566638231277, "learning_rate": 5.898636271117444e-05, "loss": 0.9475, "step": 3470 }, { "epoch": 1.4110591583655214, "grad_norm": 0.10006933659315109, "learning_rate": 5.8945654386322005e-05, "loss": 0.9519, "step": 3471 }, { "epoch": 1.4114657450701362, "grad_norm": 0.10952023416757584, "learning_rate": 5.8904946061469565e-05, "loss": 0.9115, "step": 3472 }, { "epoch": 1.411872331774751, "grad_norm": 0.10266458243131638, "learning_rate": 5.886423773661715e-05, "loss": 0.8776, "step": 3473 }, { "epoch": 1.4122789184793656, "grad_norm": 0.10357289016246796, "learning_rate": 5.882352941176471e-05, "loss": 0.8574, "step": 3474 }, { "epoch": 1.4126855051839806, "grad_norm": 0.10914462804794312, "learning_rate": 5.8782821086912275e-05, "loss": 0.9747, "step": 3475 }, { "epoch": 1.4130920918885952, "grad_norm": 0.10908231884241104, "learning_rate": 5.874211276205984e-05, "loss": 1.0257, "step": 3476 }, { "epoch": 1.41349867859321, "grad_norm": 0.1127469390630722, "learning_rate": 5.870140443720741e-05, "loss": 1.0647, "step": 3477 }, { "epoch": 1.4139052652978248, "grad_norm": 0.10754305124282837, "learning_rate": 5.8660696112354985e-05, "loss": 1.0463, "step": 3478 }, { "epoch": 1.4143118520024396, "grad_norm": 0.10822881013154984, "learning_rate": 5.861998778750255e-05, "loss": 0.9339, "step": 3479 }, { "epoch": 1.4147184387070544, "grad_norm": 0.10191161185503006, "learning_rate": 5.857927946265011e-05, "loss": 0.9637, "step": 3480 }, { "epoch": 1.415125025411669, "grad_norm": 0.10310634225606918, "learning_rate": 5.853857113779768e-05, "loss": 0.9967, "step": 3481 }, { "epoch": 1.4155316121162838, "grad_norm": 0.10512620210647583, "learning_rate": 5.849786281294525e-05, "loss": 1.0131, "step": 3482 }, { "epoch": 1.4159381988208986, "grad_norm": 0.10264958441257477, "learning_rate": 5.8457154488092816e-05, "loss": 0.9531, "step": 3483 }, { "epoch": 1.4163447855255134, "grad_norm": 0.1196940541267395, "learning_rate": 5.841644616324039e-05, "loss": 1.1297, "step": 3484 }, { "epoch": 1.4167513722301281, "grad_norm": 0.10680915415287018, "learning_rate": 5.837573783838796e-05, "loss": 0.8811, "step": 3485 }, { "epoch": 1.4171579589347427, "grad_norm": 0.10389327257871628, "learning_rate": 5.833502951353552e-05, "loss": 0.9699, "step": 3486 }, { "epoch": 1.4175645456393575, "grad_norm": 0.10058487951755524, "learning_rate": 5.829432118868309e-05, "loss": 0.8641, "step": 3487 }, { "epoch": 1.4179711323439723, "grad_norm": 0.10796665400266647, "learning_rate": 5.8253612863830654e-05, "loss": 0.9222, "step": 3488 }, { "epoch": 1.4183777190485871, "grad_norm": 0.10643795132637024, "learning_rate": 5.821290453897822e-05, "loss": 0.9759, "step": 3489 }, { "epoch": 1.418784305753202, "grad_norm": 0.1110243946313858, "learning_rate": 5.8172196214125796e-05, "loss": 1.0283, "step": 3490 }, { "epoch": 1.4191908924578167, "grad_norm": 0.10213904827833176, "learning_rate": 5.8131487889273364e-05, "loss": 0.9287, "step": 3491 }, { "epoch": 1.4195974791624315, "grad_norm": 0.11450214684009552, "learning_rate": 5.8090779564420925e-05, "loss": 1.0305, "step": 3492 }, { "epoch": 1.420004065867046, "grad_norm": 0.0998944565653801, "learning_rate": 5.805007123956849e-05, "loss": 0.9308, "step": 3493 }, { "epoch": 1.4204106525716609, "grad_norm": 0.10851487517356873, "learning_rate": 5.800936291471606e-05, "loss": 0.9519, "step": 3494 }, { "epoch": 1.4208172392762757, "grad_norm": 0.10219226777553558, "learning_rate": 5.796865458986363e-05, "loss": 0.9442, "step": 3495 }, { "epoch": 1.4212238259808905, "grad_norm": 0.10230858623981476, "learning_rate": 5.79279462650112e-05, "loss": 0.9174, "step": 3496 }, { "epoch": 1.4216304126855053, "grad_norm": 0.10335145145654678, "learning_rate": 5.788723794015877e-05, "loss": 0.9702, "step": 3497 }, { "epoch": 1.4220369993901198, "grad_norm": 0.10137276351451874, "learning_rate": 5.784652961530633e-05, "loss": 0.9462, "step": 3498 }, { "epoch": 1.4224435860947346, "grad_norm": 0.10088439285755157, "learning_rate": 5.78058212904539e-05, "loss": 0.9083, "step": 3499 }, { "epoch": 1.4228501727993494, "grad_norm": 51.91322708129883, "learning_rate": 5.7765112965601466e-05, "loss": 1.0057, "step": 3500 }, { "epoch": 1.4232567595039642, "grad_norm": 0.11275726556777954, "learning_rate": 5.772440464074904e-05, "loss": 1.0476, "step": 3501 }, { "epoch": 1.423663346208579, "grad_norm": 0.10602027922868729, "learning_rate": 5.768369631589661e-05, "loss": 0.9777, "step": 3502 }, { "epoch": 1.4240699329131936, "grad_norm": 0.10429500043392181, "learning_rate": 5.7642987991044176e-05, "loss": 0.9427, "step": 3503 }, { "epoch": 1.4244765196178086, "grad_norm": 0.10678494721651077, "learning_rate": 5.7602279666191737e-05, "loss": 0.961, "step": 3504 }, { "epoch": 1.4248831063224232, "grad_norm": 0.10437629371881485, "learning_rate": 5.7561571341339304e-05, "loss": 0.943, "step": 3505 }, { "epoch": 1.425289693027038, "grad_norm": 0.11122875660657883, "learning_rate": 5.752086301648687e-05, "loss": 0.9774, "step": 3506 }, { "epoch": 1.4256962797316528, "grad_norm": 0.10330124944448471, "learning_rate": 5.7480154691634446e-05, "loss": 0.9258, "step": 3507 }, { "epoch": 1.4261028664362676, "grad_norm": 0.11003297567367554, "learning_rate": 5.7439446366782014e-05, "loss": 0.9034, "step": 3508 }, { "epoch": 1.4265094531408824, "grad_norm": 0.1117345541715622, "learning_rate": 5.739873804192958e-05, "loss": 0.9539, "step": 3509 }, { "epoch": 1.426916039845497, "grad_norm": 0.10259495675563812, "learning_rate": 5.735802971707714e-05, "loss": 0.9002, "step": 3510 }, { "epoch": 1.4273226265501118, "grad_norm": 0.11275705695152283, "learning_rate": 5.731732139222471e-05, "loss": 1.1176, "step": 3511 }, { "epoch": 1.4277292132547266, "grad_norm": 0.10503105074167252, "learning_rate": 5.727661306737228e-05, "loss": 0.9467, "step": 3512 }, { "epoch": 1.4281357999593414, "grad_norm": 0.1054445430636406, "learning_rate": 5.723590474251985e-05, "loss": 1.0557, "step": 3513 }, { "epoch": 1.4285423866639562, "grad_norm": 0.1129273921251297, "learning_rate": 5.719519641766742e-05, "loss": 1.0294, "step": 3514 }, { "epoch": 1.4289489733685707, "grad_norm": 0.10943814367055893, "learning_rate": 5.715448809281499e-05, "loss": 0.9208, "step": 3515 }, { "epoch": 1.4293555600731855, "grad_norm": 0.1063983365893364, "learning_rate": 5.711377976796255e-05, "loss": 0.938, "step": 3516 }, { "epoch": 1.4297621467778003, "grad_norm": 0.11121434718370438, "learning_rate": 5.7073071443110116e-05, "loss": 0.9533, "step": 3517 }, { "epoch": 1.4301687334824151, "grad_norm": 0.1092074066400528, "learning_rate": 5.7032363118257683e-05, "loss": 1.0131, "step": 3518 }, { "epoch": 1.43057532018703, "grad_norm": 0.1058395728468895, "learning_rate": 5.699165479340526e-05, "loss": 0.9296, "step": 3519 }, { "epoch": 1.4309819068916445, "grad_norm": 0.10550788789987564, "learning_rate": 5.6950946468552825e-05, "loss": 1.0363, "step": 3520 }, { "epoch": 1.4313884935962595, "grad_norm": 0.09717314690351486, "learning_rate": 5.6910238143700386e-05, "loss": 0.896, "step": 3521 }, { "epoch": 1.431795080300874, "grad_norm": 0.10000022500753403, "learning_rate": 5.6869529818847954e-05, "loss": 0.9379, "step": 3522 }, { "epoch": 1.432201667005489, "grad_norm": 0.11158038675785065, "learning_rate": 5.682882149399552e-05, "loss": 1.0327, "step": 3523 }, { "epoch": 1.4326082537101037, "grad_norm": 0.11050955951213837, "learning_rate": 5.6788113169143096e-05, "loss": 0.9745, "step": 3524 }, { "epoch": 1.4330148404147185, "grad_norm": 0.10940226912498474, "learning_rate": 5.6747404844290664e-05, "loss": 1.0722, "step": 3525 }, { "epoch": 1.4334214271193333, "grad_norm": 0.09851626306772232, "learning_rate": 5.670669651943823e-05, "loss": 0.8825, "step": 3526 }, { "epoch": 1.4338280138239479, "grad_norm": 0.10658843070268631, "learning_rate": 5.666598819458579e-05, "loss": 0.9659, "step": 3527 }, { "epoch": 1.4342346005285627, "grad_norm": 0.10467347502708435, "learning_rate": 5.662527986973336e-05, "loss": 1.0181, "step": 3528 }, { "epoch": 1.4346411872331775, "grad_norm": 0.09940101206302643, "learning_rate": 5.658457154488093e-05, "loss": 0.9297, "step": 3529 }, { "epoch": 1.4350477739377923, "grad_norm": 0.10660876333713531, "learning_rate": 5.65438632200285e-05, "loss": 0.9767, "step": 3530 }, { "epoch": 1.435454360642407, "grad_norm": 0.10834870487451553, "learning_rate": 5.650315489517607e-05, "loss": 0.9989, "step": 3531 }, { "epoch": 1.4358609473470216, "grad_norm": 0.11498194932937622, "learning_rate": 5.646244657032364e-05, "loss": 1.062, "step": 3532 }, { "epoch": 1.4362675340516364, "grad_norm": 0.10452236235141754, "learning_rate": 5.64217382454712e-05, "loss": 0.9455, "step": 3533 }, { "epoch": 1.4366741207562512, "grad_norm": 0.10281018912792206, "learning_rate": 5.6381029920618766e-05, "loss": 1.0519, "step": 3534 }, { "epoch": 1.437080707460866, "grad_norm": 0.1045062243938446, "learning_rate": 5.634032159576633e-05, "loss": 0.9285, "step": 3535 }, { "epoch": 1.4374872941654808, "grad_norm": 0.09464049339294434, "learning_rate": 5.629961327091391e-05, "loss": 0.8502, "step": 3536 }, { "epoch": 1.4378938808700956, "grad_norm": 0.11245562881231308, "learning_rate": 5.6258904946061475e-05, "loss": 1.0127, "step": 3537 }, { "epoch": 1.4383004675747104, "grad_norm": 0.10821034759283066, "learning_rate": 5.621819662120904e-05, "loss": 0.9578, "step": 3538 }, { "epoch": 1.438707054279325, "grad_norm": 0.09722383320331573, "learning_rate": 5.6177488296356604e-05, "loss": 0.9295, "step": 3539 }, { "epoch": 1.4391136409839398, "grad_norm": 0.09911882877349854, "learning_rate": 5.613677997150417e-05, "loss": 0.8467, "step": 3540 }, { "epoch": 1.4395202276885546, "grad_norm": 0.0992516502737999, "learning_rate": 5.609607164665174e-05, "loss": 0.8095, "step": 3541 }, { "epoch": 1.4399268143931694, "grad_norm": 0.10205196589231491, "learning_rate": 5.6055363321799313e-05, "loss": 0.8391, "step": 3542 }, { "epoch": 1.4403334010977842, "grad_norm": 0.10054369270801544, "learning_rate": 5.601465499694688e-05, "loss": 0.8823, "step": 3543 }, { "epoch": 1.4407399878023988, "grad_norm": 0.10473167896270752, "learning_rate": 5.597394667209445e-05, "loss": 1.0052, "step": 3544 }, { "epoch": 1.4411465745070136, "grad_norm": 0.1048002541065216, "learning_rate": 5.593323834724201e-05, "loss": 0.9106, "step": 3545 }, { "epoch": 1.4415531612116284, "grad_norm": 0.10600331425666809, "learning_rate": 5.589253002238958e-05, "loss": 1.0016, "step": 3546 }, { "epoch": 1.4419597479162432, "grad_norm": 0.10743583738803864, "learning_rate": 5.585182169753715e-05, "loss": 1.0009, "step": 3547 }, { "epoch": 1.442366334620858, "grad_norm": 0.10742181539535522, "learning_rate": 5.581111337268472e-05, "loss": 0.861, "step": 3548 }, { "epoch": 1.4427729213254725, "grad_norm": 0.10677099227905273, "learning_rate": 5.577040504783229e-05, "loss": 1.0, "step": 3549 }, { "epoch": 1.4431795080300875, "grad_norm": 0.10537279397249222, "learning_rate": 5.5729696722979854e-05, "loss": 0.9087, "step": 3550 }, { "epoch": 1.4435860947347021, "grad_norm": 0.11347132921218872, "learning_rate": 5.5688988398127415e-05, "loss": 1.0444, "step": 3551 }, { "epoch": 1.443992681439317, "grad_norm": 0.10851515829563141, "learning_rate": 5.564828007327498e-05, "loss": 0.9637, "step": 3552 }, { "epoch": 1.4443992681439317, "grad_norm": 0.11019739508628845, "learning_rate": 5.560757174842256e-05, "loss": 1.0358, "step": 3553 }, { "epoch": 1.4448058548485465, "grad_norm": 0.10217945277690887, "learning_rate": 5.5566863423570125e-05, "loss": 0.8874, "step": 3554 }, { "epoch": 1.4452124415531613, "grad_norm": 0.099341981112957, "learning_rate": 5.552615509871769e-05, "loss": 0.8806, "step": 3555 }, { "epoch": 1.4456190282577759, "grad_norm": 0.11381018906831741, "learning_rate": 5.548544677386526e-05, "loss": 1.0111, "step": 3556 }, { "epoch": 1.4460256149623907, "grad_norm": 0.10798200964927673, "learning_rate": 5.544473844901282e-05, "loss": 1.0352, "step": 3557 }, { "epoch": 1.4464322016670055, "grad_norm": 0.10731607675552368, "learning_rate": 5.540403012416039e-05, "loss": 0.8602, "step": 3558 }, { "epoch": 1.4468387883716203, "grad_norm": 0.10364881157875061, "learning_rate": 5.536332179930796e-05, "loss": 0.9294, "step": 3559 }, { "epoch": 1.447245375076235, "grad_norm": 0.10474424064159393, "learning_rate": 5.532261347445553e-05, "loss": 0.922, "step": 3560 }, { "epoch": 1.4476519617808496, "grad_norm": 0.0995747447013855, "learning_rate": 5.52819051496031e-05, "loss": 0.9506, "step": 3561 }, { "epoch": 1.4480585484854644, "grad_norm": 0.11012274771928787, "learning_rate": 5.5241196824750666e-05, "loss": 1.0621, "step": 3562 }, { "epoch": 1.4484651351900792, "grad_norm": 0.10382426530122757, "learning_rate": 5.520048849989823e-05, "loss": 0.8777, "step": 3563 }, { "epoch": 1.448871721894694, "grad_norm": 0.11155311018228531, "learning_rate": 5.5159780175045795e-05, "loss": 0.9709, "step": 3564 }, { "epoch": 1.4492783085993088, "grad_norm": 0.11111366748809814, "learning_rate": 5.511907185019337e-05, "loss": 0.9665, "step": 3565 }, { "epoch": 1.4496848953039236, "grad_norm": 0.09758581221103668, "learning_rate": 5.507836352534094e-05, "loss": 0.8338, "step": 3566 }, { "epoch": 1.4500914820085384, "grad_norm": 0.09697642177343369, "learning_rate": 5.5037655200488504e-05, "loss": 0.8714, "step": 3567 }, { "epoch": 1.450498068713153, "grad_norm": 0.11022147536277771, "learning_rate": 5.4996946875636065e-05, "loss": 1.0589, "step": 3568 }, { "epoch": 1.4509046554177678, "grad_norm": 0.10376861691474915, "learning_rate": 5.495623855078363e-05, "loss": 0.9349, "step": 3569 }, { "epoch": 1.4513112421223826, "grad_norm": 0.11290717124938965, "learning_rate": 5.4915530225931214e-05, "loss": 0.9621, "step": 3570 }, { "epoch": 1.4517178288269974, "grad_norm": 0.11141788959503174, "learning_rate": 5.4874821901078775e-05, "loss": 0.9851, "step": 3571 }, { "epoch": 1.4521244155316122, "grad_norm": 0.097492516040802, "learning_rate": 5.483411357622634e-05, "loss": 0.8546, "step": 3572 }, { "epoch": 1.4525310022362268, "grad_norm": 0.1105538010597229, "learning_rate": 5.479340525137391e-05, "loss": 0.9972, "step": 3573 }, { "epoch": 1.4529375889408416, "grad_norm": 0.11950520426034927, "learning_rate": 5.475269692652147e-05, "loss": 1.0069, "step": 3574 }, { "epoch": 1.4533441756454564, "grad_norm": 0.11691899597644806, "learning_rate": 5.471198860166904e-05, "loss": 1.1141, "step": 3575 }, { "epoch": 1.4537507623500712, "grad_norm": 0.10801886022090912, "learning_rate": 5.467128027681662e-05, "loss": 0.9687, "step": 3576 }, { "epoch": 1.454157349054686, "grad_norm": 0.10855500400066376, "learning_rate": 5.463057195196418e-05, "loss": 0.9704, "step": 3577 }, { "epoch": 1.4545639357593005, "grad_norm": 0.10491110384464264, "learning_rate": 5.458986362711175e-05, "loss": 0.8492, "step": 3578 }, { "epoch": 1.4549705224639156, "grad_norm": 0.10783471167087555, "learning_rate": 5.4549155302259316e-05, "loss": 0.955, "step": 3579 }, { "epoch": 1.4553771091685301, "grad_norm": 0.10471615940332413, "learning_rate": 5.450844697740688e-05, "loss": 0.9144, "step": 3580 }, { "epoch": 1.455783695873145, "grad_norm": 0.10220612585544586, "learning_rate": 5.4467738652554444e-05, "loss": 0.9811, "step": 3581 }, { "epoch": 1.4561902825777597, "grad_norm": 0.10163000226020813, "learning_rate": 5.442703032770202e-05, "loss": 0.9597, "step": 3582 }, { "epoch": 1.4565968692823745, "grad_norm": 0.11535109579563141, "learning_rate": 5.4386322002849586e-05, "loss": 1.0492, "step": 3583 }, { "epoch": 1.4570034559869893, "grad_norm": 0.11015263944864273, "learning_rate": 5.4345613677997154e-05, "loss": 1.0299, "step": 3584 }, { "epoch": 1.457410042691604, "grad_norm": 0.10480646044015884, "learning_rate": 5.430490535314472e-05, "loss": 1.0163, "step": 3585 }, { "epoch": 1.4578166293962187, "grad_norm": 0.09951364248991013, "learning_rate": 5.426419702829228e-05, "loss": 0.8642, "step": 3586 }, { "epoch": 1.4582232161008335, "grad_norm": 0.10405910015106201, "learning_rate": 5.422348870343985e-05, "loss": 0.9905, "step": 3587 }, { "epoch": 1.4586298028054483, "grad_norm": 0.10927630215883255, "learning_rate": 5.4182780378587425e-05, "loss": 0.9738, "step": 3588 }, { "epoch": 1.459036389510063, "grad_norm": 0.10846424102783203, "learning_rate": 5.414207205373499e-05, "loss": 0.9662, "step": 3589 }, { "epoch": 1.4594429762146777, "grad_norm": 0.1038946732878685, "learning_rate": 5.410136372888256e-05, "loss": 0.9247, "step": 3590 }, { "epoch": 1.4598495629192925, "grad_norm": 0.10503431409597397, "learning_rate": 5.406065540403013e-05, "loss": 0.9135, "step": 3591 }, { "epoch": 1.4602561496239073, "grad_norm": 0.11139222979545593, "learning_rate": 5.401994707917769e-05, "loss": 1.0107, "step": 3592 }, { "epoch": 1.460662736328522, "grad_norm": 0.1007532998919487, "learning_rate": 5.397923875432527e-05, "loss": 0.8869, "step": 3593 }, { "epoch": 1.4610693230331369, "grad_norm": 0.10141917318105698, "learning_rate": 5.393853042947283e-05, "loss": 0.9378, "step": 3594 }, { "epoch": 1.4614759097377517, "grad_norm": 0.10145898908376694, "learning_rate": 5.38978221046204e-05, "loss": 0.9691, "step": 3595 }, { "epoch": 1.4618824964423665, "grad_norm": 0.11464249342679977, "learning_rate": 5.3857113779767966e-05, "loss": 1.0785, "step": 3596 }, { "epoch": 1.462289083146981, "grad_norm": 0.11114252358675003, "learning_rate": 5.381640545491553e-05, "loss": 1.0755, "step": 3597 }, { "epoch": 1.4626956698515958, "grad_norm": 0.10882499814033508, "learning_rate": 5.3775697130063094e-05, "loss": 0.9431, "step": 3598 }, { "epoch": 1.4631022565562106, "grad_norm": 0.09124311059713364, "learning_rate": 5.3734988805210675e-05, "loss": 0.8333, "step": 3599 }, { "epoch": 1.4635088432608254, "grad_norm": 0.1018030196428299, "learning_rate": 5.3694280480358236e-05, "loss": 0.965, "step": 3600 }, { "epoch": 1.4639154299654402, "grad_norm": 0.10035334527492523, "learning_rate": 5.3653572155505804e-05, "loss": 0.7887, "step": 3601 }, { "epoch": 1.4643220166700548, "grad_norm": 0.1025049090385437, "learning_rate": 5.361286383065337e-05, "loss": 0.981, "step": 3602 }, { "epoch": 1.4647286033746696, "grad_norm": 0.09964943677186966, "learning_rate": 5.357215550580094e-05, "loss": 0.795, "step": 3603 }, { "epoch": 1.4651351900792844, "grad_norm": 0.10191620141267776, "learning_rate": 5.35314471809485e-05, "loss": 0.9007, "step": 3604 }, { "epoch": 1.4655417767838992, "grad_norm": 0.10153241455554962, "learning_rate": 5.349073885609608e-05, "loss": 0.9305, "step": 3605 }, { "epoch": 1.465948363488514, "grad_norm": 0.09318286180496216, "learning_rate": 5.345003053124364e-05, "loss": 0.8347, "step": 3606 }, { "epoch": 1.4663549501931286, "grad_norm": 0.10995296388864517, "learning_rate": 5.340932220639121e-05, "loss": 0.9451, "step": 3607 }, { "epoch": 1.4667615368977436, "grad_norm": 0.10251913964748383, "learning_rate": 5.336861388153878e-05, "loss": 0.9004, "step": 3608 }, { "epoch": 1.4671681236023582, "grad_norm": 0.10498196631669998, "learning_rate": 5.3327905556686345e-05, "loss": 0.9679, "step": 3609 }, { "epoch": 1.467574710306973, "grad_norm": 0.09520924836397171, "learning_rate": 5.3287197231833906e-05, "loss": 0.8822, "step": 3610 }, { "epoch": 1.4679812970115877, "grad_norm": 0.10204574465751648, "learning_rate": 5.324648890698149e-05, "loss": 0.9217, "step": 3611 }, { "epoch": 1.4683878837162025, "grad_norm": 0.11443029344081879, "learning_rate": 5.320578058212905e-05, "loss": 1.0609, "step": 3612 }, { "epoch": 1.4687944704208173, "grad_norm": 0.10830609500408173, "learning_rate": 5.3165072257276615e-05, "loss": 1.0313, "step": 3613 }, { "epoch": 1.469201057125432, "grad_norm": 0.10037508606910706, "learning_rate": 5.312436393242418e-05, "loss": 0.9576, "step": 3614 }, { "epoch": 1.4696076438300467, "grad_norm": 0.09800178557634354, "learning_rate": 5.3083655607571744e-05, "loss": 0.8664, "step": 3615 }, { "epoch": 1.4700142305346615, "grad_norm": 0.10950475931167603, "learning_rate": 5.3042947282719325e-05, "loss": 0.9401, "step": 3616 }, { "epoch": 1.4704208172392763, "grad_norm": 0.10776437073945999, "learning_rate": 5.300223895786689e-05, "loss": 0.8952, "step": 3617 }, { "epoch": 1.470827403943891, "grad_norm": 0.10562727600336075, "learning_rate": 5.2961530633014454e-05, "loss": 0.9192, "step": 3618 }, { "epoch": 1.4712339906485057, "grad_norm": 0.10100565105676651, "learning_rate": 5.292082230816202e-05, "loss": 0.9039, "step": 3619 }, { "epoch": 1.4716405773531205, "grad_norm": 0.10758239030838013, "learning_rate": 5.288011398330959e-05, "loss": 0.8568, "step": 3620 }, { "epoch": 1.4720471640577353, "grad_norm": 0.10886979103088379, "learning_rate": 5.283940565845715e-05, "loss": 0.9095, "step": 3621 }, { "epoch": 1.47245375076235, "grad_norm": 0.10650348663330078, "learning_rate": 5.279869733360473e-05, "loss": 0.9933, "step": 3622 }, { "epoch": 1.4728603374669649, "grad_norm": 0.1114429384469986, "learning_rate": 5.27579890087523e-05, "loss": 1.014, "step": 3623 }, { "epoch": 1.4732669241715795, "grad_norm": 0.10653100162744522, "learning_rate": 5.271728068389986e-05, "loss": 0.9635, "step": 3624 }, { "epoch": 1.4736735108761945, "grad_norm": 0.09530437737703323, "learning_rate": 5.267657235904743e-05, "loss": 0.8877, "step": 3625 }, { "epoch": 1.474080097580809, "grad_norm": 0.10414159297943115, "learning_rate": 5.2635864034194995e-05, "loss": 0.9762, "step": 3626 }, { "epoch": 1.4744866842854238, "grad_norm": 0.1114436537027359, "learning_rate": 5.2595155709342556e-05, "loss": 1.0022, "step": 3627 }, { "epoch": 1.4748932709900386, "grad_norm": 0.1078386902809143, "learning_rate": 5.255444738449014e-05, "loss": 0.9903, "step": 3628 }, { "epoch": 1.4752998576946534, "grad_norm": 0.11050703376531601, "learning_rate": 5.2513739059637704e-05, "loss": 1.0857, "step": 3629 }, { "epoch": 1.4757064443992682, "grad_norm": 0.10695330053567886, "learning_rate": 5.2473030734785265e-05, "loss": 0.9716, "step": 3630 }, { "epoch": 1.4761130311038828, "grad_norm": 0.0953662171959877, "learning_rate": 5.243232240993283e-05, "loss": 0.908, "step": 3631 }, { "epoch": 1.4765196178084976, "grad_norm": 0.10216762125492096, "learning_rate": 5.23916140850804e-05, "loss": 0.8609, "step": 3632 }, { "epoch": 1.4769262045131124, "grad_norm": 0.10386509448289871, "learning_rate": 5.235090576022796e-05, "loss": 0.929, "step": 3633 }, { "epoch": 1.4773327912177272, "grad_norm": 0.11064155399799347, "learning_rate": 5.231019743537554e-05, "loss": 0.96, "step": 3634 }, { "epoch": 1.477739377922342, "grad_norm": 0.09775776416063309, "learning_rate": 5.2269489110523103e-05, "loss": 0.9248, "step": 3635 }, { "epoch": 1.4781459646269566, "grad_norm": 0.10205373913049698, "learning_rate": 5.222878078567067e-05, "loss": 0.9947, "step": 3636 }, { "epoch": 1.4785525513315716, "grad_norm": 0.10354917496442795, "learning_rate": 5.218807246081824e-05, "loss": 0.93, "step": 3637 }, { "epoch": 1.4789591380361862, "grad_norm": 0.11006239056587219, "learning_rate": 5.2147364135965806e-05, "loss": 0.9533, "step": 3638 }, { "epoch": 1.479365724740801, "grad_norm": 0.10362465679645538, "learning_rate": 5.210665581111338e-05, "loss": 0.8871, "step": 3639 }, { "epoch": 1.4797723114454158, "grad_norm": 0.103641577064991, "learning_rate": 5.206594748626095e-05, "loss": 0.9655, "step": 3640 }, { "epoch": 1.4801788981500306, "grad_norm": 0.10225971043109894, "learning_rate": 5.202523916140851e-05, "loss": 0.9341, "step": 3641 }, { "epoch": 1.4805854848546454, "grad_norm": 0.10774664580821991, "learning_rate": 5.198453083655608e-05, "loss": 0.8913, "step": 3642 }, { "epoch": 1.48099207155926, "grad_norm": 0.10669755935668945, "learning_rate": 5.1943822511703644e-05, "loss": 0.9488, "step": 3643 }, { "epoch": 1.4813986582638747, "grad_norm": 0.10005049407482147, "learning_rate": 5.190311418685121e-05, "loss": 0.8892, "step": 3644 }, { "epoch": 1.4818052449684895, "grad_norm": 0.09732303023338318, "learning_rate": 5.1862405861998787e-05, "loss": 0.8575, "step": 3645 }, { "epoch": 1.4822118316731043, "grad_norm": 0.10400817543268204, "learning_rate": 5.1821697537146354e-05, "loss": 0.9641, "step": 3646 }, { "epoch": 1.4826184183777191, "grad_norm": 0.09669985622167587, "learning_rate": 5.1780989212293915e-05, "loss": 0.8836, "step": 3647 }, { "epoch": 1.4830250050823337, "grad_norm": 0.0959673672914505, "learning_rate": 5.174028088744148e-05, "loss": 0.8678, "step": 3648 }, { "epoch": 1.4834315917869485, "grad_norm": 0.10482336580753326, "learning_rate": 5.169957256258905e-05, "loss": 0.94, "step": 3649 }, { "epoch": 1.4838381784915633, "grad_norm": 0.10262557864189148, "learning_rate": 5.165886423773662e-05, "loss": 0.9732, "step": 3650 }, { "epoch": 1.484244765196178, "grad_norm": 0.09837047010660172, "learning_rate": 5.161815591288419e-05, "loss": 0.9231, "step": 3651 }, { "epoch": 1.484651351900793, "grad_norm": 0.09526870399713516, "learning_rate": 5.157744758803176e-05, "loss": 0.8501, "step": 3652 }, { "epoch": 1.4850579386054075, "grad_norm": 0.10179829597473145, "learning_rate": 5.153673926317932e-05, "loss": 0.9574, "step": 3653 }, { "epoch": 1.4854645253100225, "grad_norm": 0.09894520044326782, "learning_rate": 5.149603093832689e-05, "loss": 0.9625, "step": 3654 }, { "epoch": 1.485871112014637, "grad_norm": 0.1013031080365181, "learning_rate": 5.1455322613474456e-05, "loss": 0.8199, "step": 3655 }, { "epoch": 1.4862776987192519, "grad_norm": 0.11384668201208115, "learning_rate": 5.1414614288622024e-05, "loss": 1.0381, "step": 3656 }, { "epoch": 1.4866842854238667, "grad_norm": 0.10072863847017288, "learning_rate": 5.13739059637696e-05, "loss": 0.9349, "step": 3657 }, { "epoch": 1.4870908721284815, "grad_norm": 0.10364335775375366, "learning_rate": 5.1333197638917166e-05, "loss": 0.8993, "step": 3658 }, { "epoch": 1.4874974588330963, "grad_norm": 0.09980635344982147, "learning_rate": 5.129248931406473e-05, "loss": 0.8615, "step": 3659 }, { "epoch": 1.4879040455377108, "grad_norm": 0.1187555268406868, "learning_rate": 5.1251780989212294e-05, "loss": 1.0562, "step": 3660 }, { "epoch": 1.4883106322423256, "grad_norm": 0.10120145976543427, "learning_rate": 5.121107266435986e-05, "loss": 0.9573, "step": 3661 }, { "epoch": 1.4887172189469404, "grad_norm": 0.10651153326034546, "learning_rate": 5.1170364339507436e-05, "loss": 1.0343, "step": 3662 }, { "epoch": 1.4891238056515552, "grad_norm": 0.10437972843647003, "learning_rate": 5.1129656014655004e-05, "loss": 0.9506, "step": 3663 }, { "epoch": 1.48953039235617, "grad_norm": 0.12389584630727768, "learning_rate": 5.108894768980257e-05, "loss": 1.0083, "step": 3664 }, { "epoch": 1.4899369790607846, "grad_norm": 0.12323293089866638, "learning_rate": 5.104823936495013e-05, "loss": 1.0145, "step": 3665 }, { "epoch": 1.4903435657653994, "grad_norm": 0.10193384438753128, "learning_rate": 5.10075310400977e-05, "loss": 0.9636, "step": 3666 }, { "epoch": 1.4907501524700142, "grad_norm": 0.11072493344545364, "learning_rate": 5.096682271524527e-05, "loss": 0.9733, "step": 3667 }, { "epoch": 1.491156739174629, "grad_norm": 0.11082090437412262, "learning_rate": 5.092611439039284e-05, "loss": 0.9273, "step": 3668 }, { "epoch": 1.4915633258792438, "grad_norm": 0.11326603591442108, "learning_rate": 5.088540606554041e-05, "loss": 1.0009, "step": 3669 }, { "epoch": 1.4919699125838586, "grad_norm": 0.10686499625444412, "learning_rate": 5.084469774068798e-05, "loss": 0.9802, "step": 3670 }, { "epoch": 1.4923764992884734, "grad_norm": 0.10559657961130142, "learning_rate": 5.080398941583554e-05, "loss": 0.933, "step": 3671 }, { "epoch": 1.492783085993088, "grad_norm": 0.11181288212537766, "learning_rate": 5.0763281090983106e-05, "loss": 1.0625, "step": 3672 }, { "epoch": 1.4931896726977028, "grad_norm": 0.11131290346384048, "learning_rate": 5.0722572766130673e-05, "loss": 1.0343, "step": 3673 }, { "epoch": 1.4935962594023175, "grad_norm": 0.1017846018075943, "learning_rate": 5.068186444127825e-05, "loss": 0.9214, "step": 3674 }, { "epoch": 1.4940028461069323, "grad_norm": 0.10858796536922455, "learning_rate": 5.0641156116425816e-05, "loss": 0.976, "step": 3675 }, { "epoch": 1.4944094328115471, "grad_norm": 0.10481224209070206, "learning_rate": 5.060044779157338e-05, "loss": 1.0067, "step": 3676 }, { "epoch": 1.4948160195161617, "grad_norm": 0.10739448666572571, "learning_rate": 5.0559739466720944e-05, "loss": 0.9278, "step": 3677 }, { "epoch": 1.4952226062207765, "grad_norm": 0.10282362252473831, "learning_rate": 5.051903114186851e-05, "loss": 0.8945, "step": 3678 }, { "epoch": 1.4956291929253913, "grad_norm": 0.10915033519268036, "learning_rate": 5.047832281701608e-05, "loss": 1.0137, "step": 3679 }, { "epoch": 1.4960357796300061, "grad_norm": 0.10232996195554733, "learning_rate": 5.0437614492163654e-05, "loss": 0.9135, "step": 3680 }, { "epoch": 1.496442366334621, "grad_norm": 0.10414308309555054, "learning_rate": 5.039690616731122e-05, "loss": 0.9884, "step": 3681 }, { "epoch": 1.4968489530392355, "grad_norm": 0.09622911363840103, "learning_rate": 5.035619784245878e-05, "loss": 0.9511, "step": 3682 }, { "epoch": 1.4972555397438505, "grad_norm": 0.10247783362865448, "learning_rate": 5.031548951760635e-05, "loss": 0.89, "step": 3683 }, { "epoch": 1.497662126448465, "grad_norm": 0.09925010800361633, "learning_rate": 5.027478119275392e-05, "loss": 0.9652, "step": 3684 }, { "epoch": 1.4980687131530799, "grad_norm": 0.10200038552284241, "learning_rate": 5.023407286790149e-05, "loss": 0.9097, "step": 3685 }, { "epoch": 1.4984752998576947, "grad_norm": 0.11495770514011383, "learning_rate": 5.019336454304906e-05, "loss": 1.0503, "step": 3686 }, { "epoch": 1.4988818865623095, "grad_norm": 0.10580781102180481, "learning_rate": 5.015265621819663e-05, "loss": 0.8902, "step": 3687 }, { "epoch": 1.4992884732669243, "grad_norm": 0.10454212874174118, "learning_rate": 5.011194789334419e-05, "loss": 0.8997, "step": 3688 }, { "epoch": 1.4996950599715388, "grad_norm": 0.09890579432249069, "learning_rate": 5.0071239568491756e-05, "loss": 0.9537, "step": 3689 }, { "epoch": 1.5001016466761536, "grad_norm": 0.10192213952541351, "learning_rate": 5.003053124363932e-05, "loss": 0.912, "step": 3690 }, { "epoch": 1.5005082333807684, "grad_norm": 0.10794500261545181, "learning_rate": 4.998982291878689e-05, "loss": 0.9215, "step": 3691 }, { "epoch": 1.5009148200853832, "grad_norm": 0.10989861935377121, "learning_rate": 4.994911459393446e-05, "loss": 0.9642, "step": 3692 }, { "epoch": 1.501321406789998, "grad_norm": 0.11163085699081421, "learning_rate": 4.990840626908203e-05, "loss": 0.9854, "step": 3693 }, { "epoch": 1.5017279934946126, "grad_norm": 0.0962003841996193, "learning_rate": 4.9867697944229594e-05, "loss": 0.843, "step": 3694 }, { "epoch": 1.5021345801992276, "grad_norm": 0.10547157377004623, "learning_rate": 4.982698961937716e-05, "loss": 0.8982, "step": 3695 }, { "epoch": 1.5025411669038422, "grad_norm": 0.11247014254331589, "learning_rate": 4.9786281294524736e-05, "loss": 1.0197, "step": 3696 }, { "epoch": 1.502947753608457, "grad_norm": 0.11089324206113815, "learning_rate": 4.97455729696723e-05, "loss": 1.0034, "step": 3697 }, { "epoch": 1.5033543403130718, "grad_norm": 0.11227573454380035, "learning_rate": 4.970486464481987e-05, "loss": 1.0355, "step": 3698 }, { "epoch": 1.5037609270176864, "grad_norm": 0.09788957238197327, "learning_rate": 4.966415631996744e-05, "loss": 0.8086, "step": 3699 }, { "epoch": 1.5041675137223014, "grad_norm": 0.106124147772789, "learning_rate": 4.9623447995115e-05, "loss": 0.9087, "step": 3700 }, { "epoch": 1.504574100426916, "grad_norm": 0.10806267708539963, "learning_rate": 4.9582739670262574e-05, "loss": 0.8783, "step": 3701 }, { "epoch": 1.5049806871315308, "grad_norm": 0.10819346457719803, "learning_rate": 4.954203134541014e-05, "loss": 0.9358, "step": 3702 }, { "epoch": 1.5053872738361456, "grad_norm": 0.10454476624727249, "learning_rate": 4.95013230205577e-05, "loss": 0.9136, "step": 3703 }, { "epoch": 1.5057938605407604, "grad_norm": 0.09978950023651123, "learning_rate": 4.946061469570528e-05, "loss": 0.9259, "step": 3704 }, { "epoch": 1.5062004472453752, "grad_norm": 0.10548686236143112, "learning_rate": 4.9419906370852845e-05, "loss": 1.0147, "step": 3705 }, { "epoch": 1.5066070339499897, "grad_norm": 0.09171932935714722, "learning_rate": 4.9379198046000405e-05, "loss": 0.8624, "step": 3706 }, { "epoch": 1.5070136206546048, "grad_norm": 0.10809264332056046, "learning_rate": 4.933848972114798e-05, "loss": 0.9103, "step": 3707 }, { "epoch": 1.5074202073592193, "grad_norm": 0.09878364950418472, "learning_rate": 4.929778139629555e-05, "loss": 0.8816, "step": 3708 }, { "epoch": 1.5078267940638341, "grad_norm": 0.10659473389387131, "learning_rate": 4.925707307144311e-05, "loss": 0.9861, "step": 3709 }, { "epoch": 1.508233380768449, "grad_norm": 0.11606935411691666, "learning_rate": 4.921636474659068e-05, "loss": 1.0614, "step": 3710 }, { "epoch": 1.5086399674730635, "grad_norm": 0.09988582134246826, "learning_rate": 4.917565642173825e-05, "loss": 0.856, "step": 3711 }, { "epoch": 1.5090465541776785, "grad_norm": 0.10419981181621552, "learning_rate": 4.913494809688581e-05, "loss": 0.9892, "step": 3712 }, { "epoch": 1.509453140882293, "grad_norm": 0.10400033742189407, "learning_rate": 4.9094239772033386e-05, "loss": 0.9907, "step": 3713 }, { "epoch": 1.509859727586908, "grad_norm": 0.10473748296499252, "learning_rate": 4.905353144718095e-05, "loss": 0.9011, "step": 3714 }, { "epoch": 1.5102663142915227, "grad_norm": 0.10208045691251755, "learning_rate": 4.9012823122328514e-05, "loss": 0.8276, "step": 3715 }, { "epoch": 1.5106729009961373, "grad_norm": 0.10542485117912292, "learning_rate": 4.897211479747609e-05, "loss": 1.0149, "step": 3716 }, { "epoch": 1.5110794877007523, "grad_norm": 0.10585687309503555, "learning_rate": 4.8931406472623656e-05, "loss": 0.9461, "step": 3717 }, { "epoch": 1.5114860744053669, "grad_norm": 0.10241574048995972, "learning_rate": 4.889069814777122e-05, "loss": 0.868, "step": 3718 }, { "epoch": 1.5118926611099817, "grad_norm": 0.10915213078260422, "learning_rate": 4.884998982291879e-05, "loss": 1.0097, "step": 3719 }, { "epoch": 1.5122992478145965, "grad_norm": 0.09911471605300903, "learning_rate": 4.880928149806636e-05, "loss": 0.8691, "step": 3720 }, { "epoch": 1.5127058345192113, "grad_norm": 0.10516642034053802, "learning_rate": 4.876857317321393e-05, "loss": 0.9422, "step": 3721 }, { "epoch": 1.513112421223826, "grad_norm": 0.10460437089204788, "learning_rate": 4.8727864848361494e-05, "loss": 0.9295, "step": 3722 }, { "epoch": 1.5135190079284406, "grad_norm": 0.10897176712751389, "learning_rate": 4.868715652350906e-05, "loss": 0.9497, "step": 3723 }, { "epoch": 1.5139255946330556, "grad_norm": 0.11495667695999146, "learning_rate": 4.864644819865663e-05, "loss": 0.9758, "step": 3724 }, { "epoch": 1.5143321813376702, "grad_norm": 0.1012863963842392, "learning_rate": 4.86057398738042e-05, "loss": 0.852, "step": 3725 }, { "epoch": 1.514738768042285, "grad_norm": 0.09959638863801956, "learning_rate": 4.8565031548951765e-05, "loss": 0.8484, "step": 3726 }, { "epoch": 1.5151453547468998, "grad_norm": 0.10632819682359695, "learning_rate": 4.852432322409933e-05, "loss": 0.9661, "step": 3727 }, { "epoch": 1.5155519414515144, "grad_norm": 0.11513801664113998, "learning_rate": 4.84836148992469e-05, "loss": 1.0592, "step": 3728 }, { "epoch": 1.5159585281561294, "grad_norm": 0.10134799033403397, "learning_rate": 4.844290657439446e-05, "loss": 0.9568, "step": 3729 }, { "epoch": 1.516365114860744, "grad_norm": 0.11659684777259827, "learning_rate": 4.8402198249542035e-05, "loss": 1.0208, "step": 3730 }, { "epoch": 1.5167717015653588, "grad_norm": 0.1074221208691597, "learning_rate": 4.83614899246896e-05, "loss": 0.8979, "step": 3731 }, { "epoch": 1.5171782882699736, "grad_norm": 0.10155625641345978, "learning_rate": 4.8320781599837164e-05, "loss": 0.8938, "step": 3732 }, { "epoch": 1.5175848749745884, "grad_norm": 0.11608471721410751, "learning_rate": 4.828007327498474e-05, "loss": 1.1216, "step": 3733 }, { "epoch": 1.5179914616792032, "grad_norm": 0.09947482496500015, "learning_rate": 4.8239364950132306e-05, "loss": 0.8629, "step": 3734 }, { "epoch": 1.5183980483838178, "grad_norm": 0.10868632793426514, "learning_rate": 4.819865662527987e-05, "loss": 0.9978, "step": 3735 }, { "epoch": 1.5188046350884326, "grad_norm": 7.418512344360352, "learning_rate": 4.815794830042744e-05, "loss": 0.953, "step": 3736 }, { "epoch": 1.5192112217930474, "grad_norm": 0.1094009056687355, "learning_rate": 4.811723997557501e-05, "loss": 0.9911, "step": 3737 }, { "epoch": 1.5196178084976621, "grad_norm": 0.10767845809459686, "learning_rate": 4.807653165072257e-05, "loss": 0.987, "step": 3738 }, { "epoch": 1.520024395202277, "grad_norm": 0.10403701663017273, "learning_rate": 4.8035823325870144e-05, "loss": 0.9136, "step": 3739 }, { "epoch": 1.5204309819068915, "grad_norm": 0.10876458138227463, "learning_rate": 4.799511500101771e-05, "loss": 0.9448, "step": 3740 }, { "epoch": 1.5208375686115065, "grad_norm": 0.09987885504961014, "learning_rate": 4.795440667616527e-05, "loss": 0.9005, "step": 3741 }, { "epoch": 1.5212441553161211, "grad_norm": 0.10714446008205414, "learning_rate": 4.791369835131285e-05, "loss": 0.9782, "step": 3742 }, { "epoch": 1.521650742020736, "grad_norm": 0.10182036459445953, "learning_rate": 4.7872990026460415e-05, "loss": 0.8578, "step": 3743 }, { "epoch": 1.5220573287253507, "grad_norm": 0.1074320524930954, "learning_rate": 4.783228170160798e-05, "loss": 0.9118, "step": 3744 }, { "epoch": 1.5224639154299653, "grad_norm": 0.11088522523641586, "learning_rate": 4.779157337675555e-05, "loss": 1.0281, "step": 3745 }, { "epoch": 1.5228705021345803, "grad_norm": 0.10585159808397293, "learning_rate": 4.775086505190312e-05, "loss": 0.9213, "step": 3746 }, { "epoch": 1.5232770888391949, "grad_norm": 0.10831379890441895, "learning_rate": 4.7710156727050685e-05, "loss": 0.9331, "step": 3747 }, { "epoch": 1.5236836755438097, "grad_norm": 0.09734898805618286, "learning_rate": 4.766944840219825e-05, "loss": 0.8925, "step": 3748 }, { "epoch": 1.5240902622484245, "grad_norm": 0.10137004405260086, "learning_rate": 4.762874007734582e-05, "loss": 0.9447, "step": 3749 }, { "epoch": 1.5244968489530393, "grad_norm": 0.1120719313621521, "learning_rate": 4.758803175249339e-05, "loss": 1.1214, "step": 3750 }, { "epoch": 1.524903435657654, "grad_norm": 0.11059883236885071, "learning_rate": 4.7547323427640956e-05, "loss": 0.9733, "step": 3751 }, { "epoch": 1.5253100223622686, "grad_norm": 0.10553129762411118, "learning_rate": 4.750661510278852e-05, "loss": 0.8984, "step": 3752 }, { "epoch": 1.5257166090668837, "grad_norm": 0.09985724091529846, "learning_rate": 4.746590677793609e-05, "loss": 0.8949, "step": 3753 }, { "epoch": 1.5261231957714982, "grad_norm": 0.09900239109992981, "learning_rate": 4.742519845308366e-05, "loss": 0.8907, "step": 3754 }, { "epoch": 1.526529782476113, "grad_norm": 0.10111631453037262, "learning_rate": 4.7384490128231226e-05, "loss": 0.9111, "step": 3755 }, { "epoch": 1.5269363691807278, "grad_norm": 0.10198728740215302, "learning_rate": 4.7343781803378794e-05, "loss": 0.9014, "step": 3756 }, { "epoch": 1.5273429558853424, "grad_norm": 0.10502500087022781, "learning_rate": 4.730307347852636e-05, "loss": 0.9398, "step": 3757 }, { "epoch": 1.5277495425899574, "grad_norm": 0.10820775479078293, "learning_rate": 4.726236515367393e-05, "loss": 0.9785, "step": 3758 }, { "epoch": 1.528156129294572, "grad_norm": 0.09791271388530731, "learning_rate": 4.72216568288215e-05, "loss": 0.8864, "step": 3759 }, { "epoch": 1.5285627159991868, "grad_norm": 0.10859858244657516, "learning_rate": 4.7180948503969064e-05, "loss": 0.9726, "step": 3760 }, { "epoch": 1.5289693027038016, "grad_norm": 0.11191640049219131, "learning_rate": 4.714024017911663e-05, "loss": 1.018, "step": 3761 }, { "epoch": 1.5293758894084164, "grad_norm": 0.10120069235563278, "learning_rate": 4.70995318542642e-05, "loss": 0.8942, "step": 3762 }, { "epoch": 1.5297824761130312, "grad_norm": 0.09827437251806259, "learning_rate": 4.705882352941177e-05, "loss": 0.8239, "step": 3763 }, { "epoch": 1.5301890628176458, "grad_norm": 0.11061054468154907, "learning_rate": 4.7018115204559335e-05, "loss": 1.0035, "step": 3764 }, { "epoch": 1.5305956495222606, "grad_norm": 0.11817970126867294, "learning_rate": 4.69774068797069e-05, "loss": 1.0296, "step": 3765 }, { "epoch": 1.5310022362268754, "grad_norm": 0.10169284790754318, "learning_rate": 4.693669855485447e-05, "loss": 0.9097, "step": 3766 }, { "epoch": 1.5314088229314902, "grad_norm": 0.11417925357818604, "learning_rate": 4.689599023000204e-05, "loss": 0.9863, "step": 3767 }, { "epoch": 1.531815409636105, "grad_norm": 0.11385629326105118, "learning_rate": 4.6855281905149606e-05, "loss": 1.0187, "step": 3768 }, { "epoch": 1.5322219963407195, "grad_norm": 0.10632526874542236, "learning_rate": 4.681457358029717e-05, "loss": 1.0233, "step": 3769 }, { "epoch": 1.5326285830453346, "grad_norm": 0.1070982813835144, "learning_rate": 4.677386525544474e-05, "loss": 0.9944, "step": 3770 }, { "epoch": 1.5330351697499491, "grad_norm": 0.10576360672712326, "learning_rate": 4.673315693059231e-05, "loss": 0.8884, "step": 3771 }, { "epoch": 1.533441756454564, "grad_norm": 0.11129205673933029, "learning_rate": 4.6692448605739876e-05, "loss": 0.9304, "step": 3772 }, { "epoch": 1.5338483431591787, "grad_norm": 0.10366874188184738, "learning_rate": 4.6651740280887444e-05, "loss": 0.8997, "step": 3773 }, { "epoch": 1.5342549298637933, "grad_norm": 0.10038387775421143, "learning_rate": 4.661103195603501e-05, "loss": 0.9118, "step": 3774 }, { "epoch": 1.5346615165684083, "grad_norm": 0.1056869626045227, "learning_rate": 4.657032363118258e-05, "loss": 0.9021, "step": 3775 }, { "epoch": 1.535068103273023, "grad_norm": 0.10934474319219589, "learning_rate": 4.6529615306330147e-05, "loss": 0.9975, "step": 3776 }, { "epoch": 1.5354746899776377, "grad_norm": 0.1077047809958458, "learning_rate": 4.6488906981477714e-05, "loss": 1.0119, "step": 3777 }, { "epoch": 1.5358812766822525, "grad_norm": 0.10552367568016052, "learning_rate": 4.644819865662528e-05, "loss": 0.8493, "step": 3778 }, { "epoch": 1.5362878633868673, "grad_norm": 0.09804526716470718, "learning_rate": 4.640749033177285e-05, "loss": 0.8906, "step": 3779 }, { "epoch": 1.536694450091482, "grad_norm": 0.10530523955821991, "learning_rate": 4.636678200692042e-05, "loss": 0.8358, "step": 3780 }, { "epoch": 1.5371010367960967, "grad_norm": 0.10684414952993393, "learning_rate": 4.6326073682067985e-05, "loss": 0.9865, "step": 3781 }, { "epoch": 1.5375076235007117, "grad_norm": 0.1129271611571312, "learning_rate": 4.628536535721555e-05, "loss": 1.0377, "step": 3782 }, { "epoch": 1.5379142102053263, "grad_norm": 0.10309012234210968, "learning_rate": 4.624465703236312e-05, "loss": 0.9689, "step": 3783 }, { "epoch": 1.538320796909941, "grad_norm": 0.10697636753320694, "learning_rate": 4.620394870751069e-05, "loss": 0.9462, "step": 3784 }, { "epoch": 1.5387273836145559, "grad_norm": 0.11021671444177628, "learning_rate": 4.6163240382658255e-05, "loss": 0.9503, "step": 3785 }, { "epoch": 1.5391339703191704, "grad_norm": 0.10349755734205246, "learning_rate": 4.612253205780582e-05, "loss": 0.9063, "step": 3786 }, { "epoch": 1.5395405570237854, "grad_norm": 0.10515953600406647, "learning_rate": 4.608182373295339e-05, "loss": 0.8909, "step": 3787 }, { "epoch": 1.5399471437284, "grad_norm": 0.10489808022975922, "learning_rate": 4.604111540810096e-05, "loss": 0.9039, "step": 3788 }, { "epoch": 1.5403537304330148, "grad_norm": 0.10147853195667267, "learning_rate": 4.6000407083248526e-05, "loss": 0.8947, "step": 3789 }, { "epoch": 1.5407603171376296, "grad_norm": 0.09851264208555222, "learning_rate": 4.59596987583961e-05, "loss": 0.8955, "step": 3790 }, { "epoch": 1.5411669038422442, "grad_norm": 0.10256364941596985, "learning_rate": 4.591899043354366e-05, "loss": 0.9723, "step": 3791 }, { "epoch": 1.5415734905468592, "grad_norm": 0.09893185645341873, "learning_rate": 4.587828210869123e-05, "loss": 0.8901, "step": 3792 }, { "epoch": 1.5419800772514738, "grad_norm": 0.1070183515548706, "learning_rate": 4.5837573783838796e-05, "loss": 0.9648, "step": 3793 }, { "epoch": 1.5423866639560886, "grad_norm": 0.10921451449394226, "learning_rate": 4.5796865458986364e-05, "loss": 0.926, "step": 3794 }, { "epoch": 1.5427932506607034, "grad_norm": 0.10185564309358597, "learning_rate": 4.575615713413393e-05, "loss": 0.9776, "step": 3795 }, { "epoch": 1.5431998373653182, "grad_norm": 0.11901550740003586, "learning_rate": 4.57154488092815e-05, "loss": 0.9979, "step": 3796 }, { "epoch": 1.543606424069933, "grad_norm": 0.10940925031900406, "learning_rate": 4.567474048442907e-05, "loss": 0.9565, "step": 3797 }, { "epoch": 1.5440130107745476, "grad_norm": 0.10260502994060516, "learning_rate": 4.5634032159576635e-05, "loss": 0.8529, "step": 3798 }, { "epoch": 1.5444195974791626, "grad_norm": 0.10641606152057648, "learning_rate": 4.55933238347242e-05, "loss": 0.9345, "step": 3799 }, { "epoch": 1.5448261841837772, "grad_norm": 0.09509759396314621, "learning_rate": 4.555261550987177e-05, "loss": 0.8873, "step": 3800 }, { "epoch": 1.545232770888392, "grad_norm": 0.10529722273349762, "learning_rate": 4.551190718501934e-05, "loss": 0.9447, "step": 3801 }, { "epoch": 1.5456393575930067, "grad_norm": 0.113713838160038, "learning_rate": 4.5471198860166905e-05, "loss": 0.9965, "step": 3802 }, { "epoch": 1.5460459442976213, "grad_norm": 0.11015161871910095, "learning_rate": 4.543049053531447e-05, "loss": 1.0089, "step": 3803 }, { "epoch": 1.5464525310022363, "grad_norm": 0.09919530153274536, "learning_rate": 4.538978221046204e-05, "loss": 0.885, "step": 3804 }, { "epoch": 1.546859117706851, "grad_norm": 0.10103622823953629, "learning_rate": 4.534907388560961e-05, "loss": 0.8922, "step": 3805 }, { "epoch": 1.5472657044114657, "grad_norm": 0.10861583799123764, "learning_rate": 4.5308365560757176e-05, "loss": 0.8814, "step": 3806 }, { "epoch": 1.5476722911160805, "grad_norm": 0.10311048477888107, "learning_rate": 4.526765723590474e-05, "loss": 0.8925, "step": 3807 }, { "epoch": 1.5480788778206953, "grad_norm": 0.10552438348531723, "learning_rate": 4.522694891105231e-05, "loss": 0.9121, "step": 3808 }, { "epoch": 1.54848546452531, "grad_norm": 0.1031796857714653, "learning_rate": 4.518624058619988e-05, "loss": 0.8877, "step": 3809 }, { "epoch": 1.5488920512299247, "grad_norm": 0.11424022167921066, "learning_rate": 4.5145532261347446e-05, "loss": 1.0865, "step": 3810 }, { "epoch": 1.5492986379345397, "grad_norm": 0.10076258331537247, "learning_rate": 4.5104823936495014e-05, "loss": 0.8801, "step": 3811 }, { "epoch": 1.5497052246391543, "grad_norm": 0.10160887986421585, "learning_rate": 4.506411561164258e-05, "loss": 0.9645, "step": 3812 }, { "epoch": 1.550111811343769, "grad_norm": 0.10750345140695572, "learning_rate": 4.5023407286790156e-05, "loss": 0.9595, "step": 3813 }, { "epoch": 1.5505183980483839, "grad_norm": 0.09937632828950882, "learning_rate": 4.498269896193772e-05, "loss": 0.9412, "step": 3814 }, { "epoch": 1.5509249847529984, "grad_norm": 0.1044396162033081, "learning_rate": 4.4941990637085284e-05, "loss": 0.9602, "step": 3815 }, { "epoch": 1.5513315714576135, "grad_norm": 0.09803607314825058, "learning_rate": 4.490128231223286e-05, "loss": 0.8488, "step": 3816 }, { "epoch": 1.551738158162228, "grad_norm": 0.09826266020536423, "learning_rate": 4.486057398738042e-05, "loss": 0.9429, "step": 3817 }, { "epoch": 1.5521447448668428, "grad_norm": 0.10339567065238953, "learning_rate": 4.481986566252799e-05, "loss": 0.9106, "step": 3818 }, { "epoch": 1.5525513315714576, "grad_norm": 0.10419493913650513, "learning_rate": 4.477915733767556e-05, "loss": 0.935, "step": 3819 }, { "epoch": 1.5529579182760722, "grad_norm": 0.10235986858606339, "learning_rate": 4.473844901282312e-05, "loss": 0.9237, "step": 3820 }, { "epoch": 1.5533645049806872, "grad_norm": 0.10505925863981247, "learning_rate": 4.469774068797069e-05, "loss": 0.9719, "step": 3821 }, { "epoch": 1.5537710916853018, "grad_norm": 0.1147008091211319, "learning_rate": 4.4657032363118265e-05, "loss": 0.9764, "step": 3822 }, { "epoch": 1.5541776783899166, "grad_norm": 0.10121449083089828, "learning_rate": 4.4616324038265825e-05, "loss": 0.8847, "step": 3823 }, { "epoch": 1.5545842650945314, "grad_norm": 0.10162410885095596, "learning_rate": 4.457561571341339e-05, "loss": 0.9766, "step": 3824 }, { "epoch": 1.5549908517991462, "grad_norm": 0.10154290497303009, "learning_rate": 4.453490738856097e-05, "loss": 0.9663, "step": 3825 }, { "epoch": 1.555397438503761, "grad_norm": 0.10372976958751678, "learning_rate": 4.449419906370853e-05, "loss": 0.9369, "step": 3826 }, { "epoch": 1.5558040252083756, "grad_norm": 0.10194465517997742, "learning_rate": 4.4453490738856096e-05, "loss": 0.9677, "step": 3827 }, { "epoch": 1.5562106119129906, "grad_norm": 0.11616487801074982, "learning_rate": 4.441278241400367e-05, "loss": 0.9678, "step": 3828 }, { "epoch": 1.5566171986176052, "grad_norm": 0.0968397706747055, "learning_rate": 4.437207408915123e-05, "loss": 0.8619, "step": 3829 }, { "epoch": 1.55702378532222, "grad_norm": 0.10480852425098419, "learning_rate": 4.43313657642988e-05, "loss": 0.9306, "step": 3830 }, { "epoch": 1.5574303720268348, "grad_norm": 0.099884994328022, "learning_rate": 4.429065743944637e-05, "loss": 0.8609, "step": 3831 }, { "epoch": 1.5578369587314493, "grad_norm": 0.10909198969602585, "learning_rate": 4.4249949114593934e-05, "loss": 0.9682, "step": 3832 }, { "epoch": 1.5582435454360644, "grad_norm": 0.10736821591854095, "learning_rate": 4.42092407897415e-05, "loss": 0.9483, "step": 3833 }, { "epoch": 1.558650132140679, "grad_norm": 0.10474716871976852, "learning_rate": 4.4168532464889076e-05, "loss": 0.9911, "step": 3834 }, { "epoch": 1.5590567188452937, "grad_norm": 0.09613660722970963, "learning_rate": 4.412782414003664e-05, "loss": 0.8299, "step": 3835 }, { "epoch": 1.5594633055499085, "grad_norm": 0.11056198924779892, "learning_rate": 4.4087115815184205e-05, "loss": 0.9418, "step": 3836 }, { "epoch": 1.5598698922545233, "grad_norm": 0.11200756579637527, "learning_rate": 4.404640749033178e-05, "loss": 1.0011, "step": 3837 }, { "epoch": 1.5602764789591381, "grad_norm": 0.10487156361341476, "learning_rate": 4.400569916547934e-05, "loss": 0.9965, "step": 3838 }, { "epoch": 1.5606830656637527, "grad_norm": 0.12069255858659744, "learning_rate": 4.3964990840626914e-05, "loss": 1.0753, "step": 3839 }, { "epoch": 1.5610896523683677, "grad_norm": 0.09854745864868164, "learning_rate": 4.392428251577448e-05, "loss": 0.8222, "step": 3840 }, { "epoch": 1.5614962390729823, "grad_norm": 0.10288074612617493, "learning_rate": 4.388357419092204e-05, "loss": 0.9101, "step": 3841 }, { "epoch": 1.561902825777597, "grad_norm": 0.11022932827472687, "learning_rate": 4.384286586606962e-05, "loss": 0.9908, "step": 3842 }, { "epoch": 1.562309412482212, "grad_norm": 0.10680433362722397, "learning_rate": 4.380215754121718e-05, "loss": 1.0098, "step": 3843 }, { "epoch": 1.5627159991868265, "grad_norm": 0.10699717700481415, "learning_rate": 4.3761449216364746e-05, "loss": 1.0427, "step": 3844 }, { "epoch": 1.5631225858914415, "grad_norm": 0.11625601351261139, "learning_rate": 4.372074089151232e-05, "loss": 1.0313, "step": 3845 }, { "epoch": 1.563529172596056, "grad_norm": 0.1125851646065712, "learning_rate": 4.368003256665988e-05, "loss": 0.9602, "step": 3846 }, { "epoch": 1.5639357593006709, "grad_norm": 0.09537433832883835, "learning_rate": 4.363932424180745e-05, "loss": 0.8338, "step": 3847 }, { "epoch": 1.5643423460052857, "grad_norm": 0.10118943452835083, "learning_rate": 4.359861591695502e-05, "loss": 0.8858, "step": 3848 }, { "epoch": 1.5647489327099002, "grad_norm": 0.12094996869564056, "learning_rate": 4.3557907592102584e-05, "loss": 1.107, "step": 3849 }, { "epoch": 1.5651555194145153, "grad_norm": 0.11586831510066986, "learning_rate": 4.351719926725015e-05, "loss": 0.9132, "step": 3850 }, { "epoch": 1.5655621061191298, "grad_norm": 0.10483038425445557, "learning_rate": 4.3476490942397726e-05, "loss": 0.8894, "step": 3851 }, { "epoch": 1.5659686928237446, "grad_norm": 0.10702569782733917, "learning_rate": 4.343578261754529e-05, "loss": 0.9354, "step": 3852 }, { "epoch": 1.5663752795283594, "grad_norm": 0.09847092628479004, "learning_rate": 4.3395074292692854e-05, "loss": 0.8192, "step": 3853 }, { "epoch": 1.5667818662329742, "grad_norm": 0.11142577975988388, "learning_rate": 4.335436596784043e-05, "loss": 1.0106, "step": 3854 }, { "epoch": 1.567188452937589, "grad_norm": 0.11825080960988998, "learning_rate": 4.331365764298799e-05, "loss": 1.0841, "step": 3855 }, { "epoch": 1.5675950396422036, "grad_norm": 0.10718253254890442, "learning_rate": 4.327294931813556e-05, "loss": 0.9786, "step": 3856 }, { "epoch": 1.5680016263468186, "grad_norm": 0.10958700627088547, "learning_rate": 4.323224099328313e-05, "loss": 1.0322, "step": 3857 }, { "epoch": 1.5684082130514332, "grad_norm": 0.09295843541622162, "learning_rate": 4.319153266843069e-05, "loss": 0.8317, "step": 3858 }, { "epoch": 1.568814799756048, "grad_norm": 0.11297357827425003, "learning_rate": 4.315082434357826e-05, "loss": 1.0802, "step": 3859 }, { "epoch": 1.5692213864606628, "grad_norm": 0.11033076047897339, "learning_rate": 4.3110116018725835e-05, "loss": 0.9561, "step": 3860 }, { "epoch": 1.5696279731652774, "grad_norm": 0.11169704794883728, "learning_rate": 4.3069407693873396e-05, "loss": 1.0622, "step": 3861 }, { "epoch": 1.5700345598698924, "grad_norm": 0.10404906421899796, "learning_rate": 4.302869936902097e-05, "loss": 0.9427, "step": 3862 }, { "epoch": 1.570441146574507, "grad_norm": 0.10455071181058884, "learning_rate": 4.298799104416854e-05, "loss": 0.9227, "step": 3863 }, { "epoch": 1.5708477332791217, "grad_norm": 0.10467982292175293, "learning_rate": 4.29472827193161e-05, "loss": 0.9466, "step": 3864 }, { "epoch": 1.5712543199837365, "grad_norm": 0.11249608546495438, "learning_rate": 4.290657439446367e-05, "loss": 1.0259, "step": 3865 }, { "epoch": 1.5716609066883513, "grad_norm": 0.10060025006532669, "learning_rate": 4.286586606961124e-05, "loss": 0.866, "step": 3866 }, { "epoch": 1.5720674933929661, "grad_norm": 0.10907735675573349, "learning_rate": 4.28251577447588e-05, "loss": 0.9269, "step": 3867 }, { "epoch": 1.5724740800975807, "grad_norm": 0.10577044636011124, "learning_rate": 4.2784449419906376e-05, "loss": 0.9555, "step": 3868 }, { "epoch": 1.5728806668021955, "grad_norm": 0.09949744492769241, "learning_rate": 4.274374109505394e-05, "loss": 0.9662, "step": 3869 }, { "epoch": 1.5732872535068103, "grad_norm": 0.11094196140766144, "learning_rate": 4.2703032770201504e-05, "loss": 0.9078, "step": 3870 }, { "epoch": 1.573693840211425, "grad_norm": 0.1124429702758789, "learning_rate": 4.266232444534908e-05, "loss": 1.0132, "step": 3871 }, { "epoch": 1.57410042691604, "grad_norm": 0.10895200073719025, "learning_rate": 4.2621616120496646e-05, "loss": 0.9692, "step": 3872 }, { "epoch": 1.5745070136206545, "grad_norm": 0.10914913564920425, "learning_rate": 4.258090779564421e-05, "loss": 0.944, "step": 3873 }, { "epoch": 1.5749136003252695, "grad_norm": 0.10090178996324539, "learning_rate": 4.254019947079178e-05, "loss": 0.9228, "step": 3874 }, { "epoch": 1.575320187029884, "grad_norm": 0.10193730890750885, "learning_rate": 4.249949114593935e-05, "loss": 0.9152, "step": 3875 }, { "epoch": 1.5757267737344989, "grad_norm": 0.10624190419912338, "learning_rate": 4.245878282108691e-05, "loss": 0.9702, "step": 3876 }, { "epoch": 1.5761333604391137, "grad_norm": 0.11110203713178635, "learning_rate": 4.2418074496234484e-05, "loss": 0.9431, "step": 3877 }, { "epoch": 1.5765399471437282, "grad_norm": 0.10701338946819305, "learning_rate": 4.237736617138205e-05, "loss": 0.9673, "step": 3878 }, { "epoch": 1.5769465338483433, "grad_norm": 0.10539649426937103, "learning_rate": 4.233665784652961e-05, "loss": 0.9492, "step": 3879 }, { "epoch": 1.5773531205529578, "grad_norm": 0.10319969803094864, "learning_rate": 4.229594952167719e-05, "loss": 0.8976, "step": 3880 }, { "epoch": 1.5777597072575726, "grad_norm": 0.11089177429676056, "learning_rate": 4.2255241196824755e-05, "loss": 0.8858, "step": 3881 }, { "epoch": 1.5781662939621874, "grad_norm": 0.10143184661865234, "learning_rate": 4.2214532871972316e-05, "loss": 0.8669, "step": 3882 }, { "epoch": 1.5785728806668022, "grad_norm": 0.10318692028522491, "learning_rate": 4.217382454711989e-05, "loss": 0.8788, "step": 3883 }, { "epoch": 1.578979467371417, "grad_norm": 0.11176548898220062, "learning_rate": 4.213311622226746e-05, "loss": 0.9668, "step": 3884 }, { "epoch": 1.5793860540760316, "grad_norm": 0.10598577558994293, "learning_rate": 4.2092407897415026e-05, "loss": 0.9206, "step": 3885 }, { "epoch": 1.5797926407806466, "grad_norm": 0.10618636012077332, "learning_rate": 4.205169957256259e-05, "loss": 0.9956, "step": 3886 }, { "epoch": 1.5801992274852612, "grad_norm": 0.11371050029993057, "learning_rate": 4.201099124771016e-05, "loss": 1.0301, "step": 3887 }, { "epoch": 1.580605814189876, "grad_norm": 0.10739018023014069, "learning_rate": 4.197028292285773e-05, "loss": 0.9769, "step": 3888 }, { "epoch": 1.5810124008944908, "grad_norm": 0.10079289227724075, "learning_rate": 4.1929574598005296e-05, "loss": 0.9888, "step": 3889 }, { "epoch": 1.5814189875991054, "grad_norm": 0.10348872095346451, "learning_rate": 4.188886627315286e-05, "loss": 0.936, "step": 3890 }, { "epoch": 1.5818255743037204, "grad_norm": 0.10830751806497574, "learning_rate": 4.184815794830043e-05, "loss": 1.0448, "step": 3891 }, { "epoch": 1.582232161008335, "grad_norm": 0.10880016535520554, "learning_rate": 4.1807449623448e-05, "loss": 1.0058, "step": 3892 }, { "epoch": 1.5826387477129498, "grad_norm": 0.10958044230937958, "learning_rate": 4.176674129859556e-05, "loss": 0.9277, "step": 3893 }, { "epoch": 1.5830453344175646, "grad_norm": 0.09938797354698181, "learning_rate": 4.1726032973743134e-05, "loss": 0.8826, "step": 3894 }, { "epoch": 1.5834519211221791, "grad_norm": 0.0986451730132103, "learning_rate": 4.16853246488907e-05, "loss": 0.835, "step": 3895 }, { "epoch": 1.5838585078267942, "grad_norm": 0.10798802226781845, "learning_rate": 4.164461632403826e-05, "loss": 0.9923, "step": 3896 }, { "epoch": 1.5842650945314087, "grad_norm": 0.10113084316253662, "learning_rate": 4.160390799918584e-05, "loss": 0.9178, "step": 3897 }, { "epoch": 1.5846716812360235, "grad_norm": 0.09963071346282959, "learning_rate": 4.1563199674333405e-05, "loss": 0.8942, "step": 3898 }, { "epoch": 1.5850782679406383, "grad_norm": 0.0994904637336731, "learning_rate": 4.1522491349480966e-05, "loss": 0.8754, "step": 3899 }, { "epoch": 1.5854848546452531, "grad_norm": 0.09343326836824417, "learning_rate": 4.148178302462854e-05, "loss": 0.8143, "step": 3900 }, { "epoch": 1.585891441349868, "grad_norm": 0.11337709426879883, "learning_rate": 4.144107469977611e-05, "loss": 1.0421, "step": 3901 }, { "epoch": 1.5862980280544825, "grad_norm": 0.10667706280946732, "learning_rate": 4.140036637492367e-05, "loss": 0.9213, "step": 3902 }, { "epoch": 1.5867046147590975, "grad_norm": 0.09903930872678757, "learning_rate": 4.135965805007124e-05, "loss": 0.8786, "step": 3903 }, { "epoch": 1.587111201463712, "grad_norm": 0.10696469992399216, "learning_rate": 4.131894972521881e-05, "loss": 0.9659, "step": 3904 }, { "epoch": 1.587517788168327, "grad_norm": 0.10829825699329376, "learning_rate": 4.127824140036637e-05, "loss": 0.9465, "step": 3905 }, { "epoch": 1.5879243748729417, "grad_norm": 0.09446293860673904, "learning_rate": 4.1237533075513946e-05, "loss": 0.8023, "step": 3906 }, { "epoch": 1.5883309615775563, "grad_norm": 0.11186923086643219, "learning_rate": 4.1196824750661513e-05, "loss": 1.0979, "step": 3907 }, { "epoch": 1.5887375482821713, "grad_norm": 0.1068292185664177, "learning_rate": 4.115611642580908e-05, "loss": 0.9801, "step": 3908 }, { "epoch": 1.5891441349867859, "grad_norm": 0.11344651877880096, "learning_rate": 4.111540810095665e-05, "loss": 1.0279, "step": 3909 }, { "epoch": 1.5895507216914007, "grad_norm": 0.11451148241758347, "learning_rate": 4.1074699776104216e-05, "loss": 1.022, "step": 3910 }, { "epoch": 1.5899573083960155, "grad_norm": 0.10979126393795013, "learning_rate": 4.1033991451251784e-05, "loss": 1.003, "step": 3911 }, { "epoch": 1.5903638951006303, "grad_norm": 0.10487376898527145, "learning_rate": 4.099328312639935e-05, "loss": 0.9479, "step": 3912 }, { "epoch": 1.590770481805245, "grad_norm": 0.10622530430555344, "learning_rate": 4.095257480154692e-05, "loss": 0.9565, "step": 3913 }, { "epoch": 1.5911770685098596, "grad_norm": 0.11741635948419571, "learning_rate": 4.091186647669449e-05, "loss": 1.0789, "step": 3914 }, { "epoch": 1.5915836552144746, "grad_norm": 0.11563640832901001, "learning_rate": 4.0871158151842055e-05, "loss": 0.9594, "step": 3915 }, { "epoch": 1.5919902419190892, "grad_norm": 0.09948024898767471, "learning_rate": 4.083044982698962e-05, "loss": 0.9188, "step": 3916 }, { "epoch": 1.592396828623704, "grad_norm": 0.10058055073022842, "learning_rate": 4.078974150213719e-05, "loss": 0.88, "step": 3917 }, { "epoch": 1.5928034153283188, "grad_norm": 0.11370790004730225, "learning_rate": 4.074903317728476e-05, "loss": 1.0117, "step": 3918 }, { "epoch": 1.5932100020329334, "grad_norm": 0.1042017936706543, "learning_rate": 4.0708324852432325e-05, "loss": 0.9712, "step": 3919 }, { "epoch": 1.5936165887375484, "grad_norm": 0.10406166315078735, "learning_rate": 4.066761652757989e-05, "loss": 0.9896, "step": 3920 }, { "epoch": 1.594023175442163, "grad_norm": 0.10339275002479553, "learning_rate": 4.062690820272746e-05, "loss": 0.9653, "step": 3921 }, { "epoch": 1.5944297621467778, "grad_norm": 0.1000717282295227, "learning_rate": 4.058619987787503e-05, "loss": 0.8748, "step": 3922 }, { "epoch": 1.5948363488513926, "grad_norm": 0.1118224561214447, "learning_rate": 4.0545491553022596e-05, "loss": 0.9712, "step": 3923 }, { "epoch": 1.5952429355560072, "grad_norm": 0.11152620613574982, "learning_rate": 4.050478322817016e-05, "loss": 0.9215, "step": 3924 }, { "epoch": 1.5956495222606222, "grad_norm": 0.11174870282411575, "learning_rate": 4.046407490331773e-05, "loss": 1.021, "step": 3925 }, { "epoch": 1.5960561089652368, "grad_norm": 0.11456409096717834, "learning_rate": 4.04233665784653e-05, "loss": 0.9776, "step": 3926 }, { "epoch": 1.5964626956698516, "grad_norm": 0.10741414874792099, "learning_rate": 4.0382658253612866e-05, "loss": 0.9605, "step": 3927 }, { "epoch": 1.5968692823744663, "grad_norm": 0.09653212875127792, "learning_rate": 4.0341949928760434e-05, "loss": 0.9439, "step": 3928 }, { "epoch": 1.5972758690790811, "grad_norm": 0.10057616978883743, "learning_rate": 4.0301241603908e-05, "loss": 0.9304, "step": 3929 }, { "epoch": 1.597682455783696, "grad_norm": 0.10348949581384659, "learning_rate": 4.026053327905557e-05, "loss": 0.9121, "step": 3930 }, { "epoch": 1.5980890424883105, "grad_norm": 0.11406022310256958, "learning_rate": 4.021982495420314e-05, "loss": 1.0656, "step": 3931 }, { "epoch": 1.5984956291929255, "grad_norm": 0.10392772406339645, "learning_rate": 4.0179116629350704e-05, "loss": 0.8859, "step": 3932 }, { "epoch": 1.5989022158975401, "grad_norm": 0.10656527429819107, "learning_rate": 4.013840830449827e-05, "loss": 0.9224, "step": 3933 }, { "epoch": 1.599308802602155, "grad_norm": 0.11588657647371292, "learning_rate": 4.009769997964584e-05, "loss": 1.0287, "step": 3934 }, { "epoch": 1.5997153893067697, "grad_norm": 0.11138034611940384, "learning_rate": 4.005699165479341e-05, "loss": 1.1117, "step": 3935 }, { "epoch": 1.6001219760113843, "grad_norm": 0.10900641232728958, "learning_rate": 4.0016283329940975e-05, "loss": 0.9072, "step": 3936 }, { "epoch": 1.6005285627159993, "grad_norm": 0.10668104141950607, "learning_rate": 3.997557500508854e-05, "loss": 0.8867, "step": 3937 }, { "epoch": 1.6009351494206139, "grad_norm": 0.09906437993049622, "learning_rate": 3.993486668023611e-05, "loss": 0.863, "step": 3938 }, { "epoch": 1.6013417361252287, "grad_norm": 0.10709775984287262, "learning_rate": 3.989415835538368e-05, "loss": 0.9174, "step": 3939 }, { "epoch": 1.6017483228298435, "grad_norm": 0.10237723588943481, "learning_rate": 3.9853450030531245e-05, "loss": 0.9447, "step": 3940 }, { "epoch": 1.6021549095344583, "grad_norm": 0.09341628849506378, "learning_rate": 3.981274170567881e-05, "loss": 0.8767, "step": 3941 }, { "epoch": 1.602561496239073, "grad_norm": 0.09939193725585938, "learning_rate": 3.977203338082638e-05, "loss": 0.8718, "step": 3942 }, { "epoch": 1.6029680829436876, "grad_norm": 0.11073851585388184, "learning_rate": 3.973132505597395e-05, "loss": 0.9874, "step": 3943 }, { "epoch": 1.6033746696483027, "grad_norm": 0.10323592275381088, "learning_rate": 3.9690616731121516e-05, "loss": 0.8661, "step": 3944 }, { "epoch": 1.6037812563529172, "grad_norm": 0.1047651469707489, "learning_rate": 3.9649908406269084e-05, "loss": 0.8734, "step": 3945 }, { "epoch": 1.604187843057532, "grad_norm": 0.10787742584943771, "learning_rate": 3.960920008141665e-05, "loss": 0.9911, "step": 3946 }, { "epoch": 1.6045944297621468, "grad_norm": 0.10103908181190491, "learning_rate": 3.956849175656422e-05, "loss": 0.9259, "step": 3947 }, { "epoch": 1.6050010164667614, "grad_norm": 0.10705123096704483, "learning_rate": 3.9527783431711786e-05, "loss": 0.9721, "step": 3948 }, { "epoch": 1.6054076031713764, "grad_norm": 0.11182446777820587, "learning_rate": 3.9487075106859354e-05, "loss": 1.049, "step": 3949 }, { "epoch": 1.605814189875991, "grad_norm": 0.1003599539399147, "learning_rate": 3.944636678200692e-05, "loss": 0.9564, "step": 3950 }, { "epoch": 1.6062207765806058, "grad_norm": 0.10500559955835342, "learning_rate": 3.940565845715449e-05, "loss": 0.8966, "step": 3951 }, { "epoch": 1.6066273632852206, "grad_norm": 0.09841228276491165, "learning_rate": 3.936495013230206e-05, "loss": 0.847, "step": 3952 }, { "epoch": 1.6070339499898352, "grad_norm": 0.11207973212003708, "learning_rate": 3.9324241807449625e-05, "loss": 1.0527, "step": 3953 }, { "epoch": 1.6074405366944502, "grad_norm": 0.10874827206134796, "learning_rate": 3.92835334825972e-05, "loss": 0.9597, "step": 3954 }, { "epoch": 1.6078471233990648, "grad_norm": 0.1068238690495491, "learning_rate": 3.924282515774476e-05, "loss": 0.9619, "step": 3955 }, { "epoch": 1.6082537101036796, "grad_norm": 0.10551256686449051, "learning_rate": 3.920211683289233e-05, "loss": 0.9647, "step": 3956 }, { "epoch": 1.6086602968082944, "grad_norm": 0.1033085510134697, "learning_rate": 3.9161408508039895e-05, "loss": 0.9121, "step": 3957 }, { "epoch": 1.6090668835129092, "grad_norm": 0.11028590798377991, "learning_rate": 3.912070018318746e-05, "loss": 1.0249, "step": 3958 }, { "epoch": 1.609473470217524, "grad_norm": 0.10885387659072876, "learning_rate": 3.907999185833503e-05, "loss": 0.9739, "step": 3959 }, { "epoch": 1.6098800569221385, "grad_norm": 0.09786680340766907, "learning_rate": 3.90392835334826e-05, "loss": 0.8931, "step": 3960 }, { "epoch": 1.6102866436267536, "grad_norm": 0.09145115315914154, "learning_rate": 3.8998575208630166e-05, "loss": 0.8121, "step": 3961 }, { "epoch": 1.6106932303313681, "grad_norm": 0.09834929555654526, "learning_rate": 3.895786688377773e-05, "loss": 0.8897, "step": 3962 }, { "epoch": 1.611099817035983, "grad_norm": 0.10126276314258575, "learning_rate": 3.89171585589253e-05, "loss": 0.8728, "step": 3963 }, { "epoch": 1.6115064037405977, "grad_norm": 0.10853146016597748, "learning_rate": 3.887645023407287e-05, "loss": 0.9516, "step": 3964 }, { "epoch": 1.6119129904452123, "grad_norm": 0.10366170108318329, "learning_rate": 3.8835741909220436e-05, "loss": 0.8539, "step": 3965 }, { "epoch": 1.6123195771498273, "grad_norm": 0.1102977991104126, "learning_rate": 3.8795033584368004e-05, "loss": 1.0544, "step": 3966 }, { "epoch": 1.612726163854442, "grad_norm": 0.09886328876018524, "learning_rate": 3.875432525951557e-05, "loss": 0.8698, "step": 3967 }, { "epoch": 1.6131327505590567, "grad_norm": 0.11173603683710098, "learning_rate": 3.871361693466314e-05, "loss": 1.0237, "step": 3968 }, { "epoch": 1.6135393372636715, "grad_norm": 0.10802386701107025, "learning_rate": 3.867290860981071e-05, "loss": 1.0073, "step": 3969 }, { "epoch": 1.6139459239682863, "grad_norm": 0.09934094548225403, "learning_rate": 3.8632200284958274e-05, "loss": 0.9459, "step": 3970 }, { "epoch": 1.614352510672901, "grad_norm": 0.09900476038455963, "learning_rate": 3.859149196010584e-05, "loss": 0.9196, "step": 3971 }, { "epoch": 1.6147590973775157, "grad_norm": 0.09684500098228455, "learning_rate": 3.855078363525341e-05, "loss": 0.8908, "step": 3972 }, { "epoch": 1.6151656840821305, "grad_norm": 0.10286570340394974, "learning_rate": 3.851007531040098e-05, "loss": 0.8566, "step": 3973 }, { "epoch": 1.6155722707867453, "grad_norm": 0.09902996569871902, "learning_rate": 3.8469366985548545e-05, "loss": 0.8783, "step": 3974 }, { "epoch": 1.61597885749136, "grad_norm": 0.11378595232963562, "learning_rate": 3.842865866069611e-05, "loss": 0.9855, "step": 3975 }, { "epoch": 1.6163854441959749, "grad_norm": 0.10718195885419846, "learning_rate": 3.838795033584368e-05, "loss": 0.9116, "step": 3976 }, { "epoch": 1.6167920309005894, "grad_norm": 0.11180385947227478, "learning_rate": 3.8347242010991255e-05, "loss": 0.9737, "step": 3977 }, { "epoch": 1.6171986176052044, "grad_norm": 0.12050411105155945, "learning_rate": 3.8306533686138816e-05, "loss": 1.1243, "step": 3978 }, { "epoch": 1.617605204309819, "grad_norm": 0.14076527953147888, "learning_rate": 3.826582536128638e-05, "loss": 0.9308, "step": 3979 }, { "epoch": 1.6180117910144338, "grad_norm": 0.10641691833734512, "learning_rate": 3.822511703643396e-05, "loss": 0.9298, "step": 3980 }, { "epoch": 1.6184183777190486, "grad_norm": 0.11289351433515549, "learning_rate": 3.818440871158152e-05, "loss": 0.9239, "step": 3981 }, { "epoch": 1.6188249644236632, "grad_norm": 0.11315469443798065, "learning_rate": 3.8143700386729086e-05, "loss": 1.1374, "step": 3982 }, { "epoch": 1.6192315511282782, "grad_norm": 0.11176195740699768, "learning_rate": 3.810299206187666e-05, "loss": 0.9552, "step": 3983 }, { "epoch": 1.6196381378328928, "grad_norm": 0.09827902913093567, "learning_rate": 3.806228373702422e-05, "loss": 0.8914, "step": 3984 }, { "epoch": 1.6200447245375076, "grad_norm": 0.10546936839818954, "learning_rate": 3.802157541217179e-05, "loss": 0.9572, "step": 3985 }, { "epoch": 1.6204513112421224, "grad_norm": 0.10368131846189499, "learning_rate": 3.798086708731936e-05, "loss": 0.884, "step": 3986 }, { "epoch": 1.6208578979467372, "grad_norm": 0.1099054366350174, "learning_rate": 3.7940158762466924e-05, "loss": 0.9648, "step": 3987 }, { "epoch": 1.621264484651352, "grad_norm": 0.10556711256504059, "learning_rate": 3.789945043761449e-05, "loss": 1.0355, "step": 3988 }, { "epoch": 1.6216710713559666, "grad_norm": 0.10475050657987595, "learning_rate": 3.7858742112762066e-05, "loss": 0.9746, "step": 3989 }, { "epoch": 1.6220776580605816, "grad_norm": 0.10798493772745132, "learning_rate": 3.781803378790963e-05, "loss": 1.0124, "step": 3990 }, { "epoch": 1.6224842447651961, "grad_norm": 0.10683320462703705, "learning_rate": 3.7777325463057195e-05, "loss": 0.9305, "step": 3991 }, { "epoch": 1.622890831469811, "grad_norm": 0.10677673667669296, "learning_rate": 3.773661713820477e-05, "loss": 0.9599, "step": 3992 }, { "epoch": 1.6232974181744257, "grad_norm": 0.11520379036664963, "learning_rate": 3.769590881335233e-05, "loss": 1.0277, "step": 3993 }, { "epoch": 1.6237040048790403, "grad_norm": 0.10414712876081467, "learning_rate": 3.76552004884999e-05, "loss": 1.0, "step": 3994 }, { "epoch": 1.6241105915836553, "grad_norm": 0.10957575589418411, "learning_rate": 3.761449216364747e-05, "loss": 0.9932, "step": 3995 }, { "epoch": 1.62451717828827, "grad_norm": 0.104839026927948, "learning_rate": 3.757378383879503e-05, "loss": 0.9374, "step": 3996 }, { "epoch": 1.6249237649928847, "grad_norm": 0.11117900162935257, "learning_rate": 3.75330755139426e-05, "loss": 1.0527, "step": 3997 }, { "epoch": 1.6253303516974995, "grad_norm": 0.10853046923875809, "learning_rate": 3.7492367189090175e-05, "loss": 1.0176, "step": 3998 }, { "epoch": 1.625736938402114, "grad_norm": 0.1107804998755455, "learning_rate": 3.7451658864237736e-05, "loss": 0.9765, "step": 3999 }, { "epoch": 1.626143525106729, "grad_norm": 0.10636276006698608, "learning_rate": 3.741095053938531e-05, "loss": 0.9435, "step": 4000 }, { "epoch": 1.6265501118113437, "grad_norm": 0.11162041872739792, "learning_rate": 3.737024221453288e-05, "loss": 0.9547, "step": 4001 }, { "epoch": 1.6269566985159585, "grad_norm": 0.11098874360322952, "learning_rate": 3.732953388968044e-05, "loss": 1.0169, "step": 4002 }, { "epoch": 1.6273632852205733, "grad_norm": 0.10745534300804138, "learning_rate": 3.728882556482801e-05, "loss": 1.0881, "step": 4003 }, { "epoch": 1.627769871925188, "grad_norm": 0.10475347936153412, "learning_rate": 3.7248117239975574e-05, "loss": 0.9398, "step": 4004 }, { "epoch": 1.6281764586298029, "grad_norm": 0.11412277817726135, "learning_rate": 3.720740891512314e-05, "loss": 1.0969, "step": 4005 }, { "epoch": 1.6285830453344174, "grad_norm": 0.1082950234413147, "learning_rate": 3.7166700590270716e-05, "loss": 0.8739, "step": 4006 }, { "epoch": 1.6289896320390325, "grad_norm": 0.10246314853429794, "learning_rate": 3.712599226541828e-05, "loss": 0.9542, "step": 4007 }, { "epoch": 1.629396218743647, "grad_norm": 0.10546763986349106, "learning_rate": 3.7085283940565845e-05, "loss": 0.9319, "step": 4008 }, { "epoch": 1.6298028054482618, "grad_norm": 0.10902021825313568, "learning_rate": 3.704457561571342e-05, "loss": 0.9704, "step": 4009 }, { "epoch": 1.6302093921528766, "grad_norm": 0.10512792319059372, "learning_rate": 3.700386729086098e-05, "loss": 0.8641, "step": 4010 }, { "epoch": 1.6306159788574912, "grad_norm": 0.11129160970449448, "learning_rate": 3.696315896600855e-05, "loss": 1.0655, "step": 4011 }, { "epoch": 1.6310225655621062, "grad_norm": 0.11214631050825119, "learning_rate": 3.692245064115612e-05, "loss": 1.0528, "step": 4012 }, { "epoch": 1.6314291522667208, "grad_norm": 0.11774066835641861, "learning_rate": 3.688174231630368e-05, "loss": 1.0389, "step": 4013 }, { "epoch": 1.6318357389713356, "grad_norm": 0.10817237198352814, "learning_rate": 3.684103399145125e-05, "loss": 0.9741, "step": 4014 }, { "epoch": 1.6322423256759504, "grad_norm": 0.10697966814041138, "learning_rate": 3.6800325666598825e-05, "loss": 0.9523, "step": 4015 }, { "epoch": 1.6326489123805652, "grad_norm": 0.09764157235622406, "learning_rate": 3.6759617341746386e-05, "loss": 0.8462, "step": 4016 }, { "epoch": 1.63305549908518, "grad_norm": 0.10472942143678665, "learning_rate": 3.671890901689395e-05, "loss": 0.9925, "step": 4017 }, { "epoch": 1.6334620857897946, "grad_norm": 0.1018701046705246, "learning_rate": 3.667820069204153e-05, "loss": 0.8639, "step": 4018 }, { "epoch": 1.6338686724944096, "grad_norm": 0.09498531371355057, "learning_rate": 3.663749236718909e-05, "loss": 0.8634, "step": 4019 }, { "epoch": 1.6342752591990242, "grad_norm": 0.10013435781002045, "learning_rate": 3.6596784042336656e-05, "loss": 0.8305, "step": 4020 }, { "epoch": 1.634681845903639, "grad_norm": 0.10749876499176025, "learning_rate": 3.655607571748423e-05, "loss": 0.9569, "step": 4021 }, { "epoch": 1.6350884326082538, "grad_norm": 0.10520561784505844, "learning_rate": 3.651536739263179e-05, "loss": 0.9167, "step": 4022 }, { "epoch": 1.6354950193128683, "grad_norm": 0.11006909608840942, "learning_rate": 3.6474659067779366e-05, "loss": 0.9469, "step": 4023 }, { "epoch": 1.6359016060174834, "grad_norm": 0.10426217317581177, "learning_rate": 3.6433950742926933e-05, "loss": 0.8905, "step": 4024 }, { "epoch": 1.636308192722098, "grad_norm": 0.10819458216428757, "learning_rate": 3.6393242418074494e-05, "loss": 1.0306, "step": 4025 }, { "epoch": 1.6367147794267127, "grad_norm": 0.10247842967510223, "learning_rate": 3.635253409322207e-05, "loss": 0.8619, "step": 4026 }, { "epoch": 1.6371213661313275, "grad_norm": 0.10943040996789932, "learning_rate": 3.6311825768369636e-05, "loss": 0.9491, "step": 4027 }, { "epoch": 1.637527952835942, "grad_norm": 0.10056941211223602, "learning_rate": 3.62711174435172e-05, "loss": 0.9287, "step": 4028 }, { "epoch": 1.6379345395405571, "grad_norm": 0.11309908330440521, "learning_rate": 3.623040911866477e-05, "loss": 1.0414, "step": 4029 }, { "epoch": 1.6383411262451717, "grad_norm": 0.10608269274234772, "learning_rate": 3.618970079381234e-05, "loss": 1.0614, "step": 4030 }, { "epoch": 1.6387477129497865, "grad_norm": 0.10660211741924286, "learning_rate": 3.61489924689599e-05, "loss": 0.9974, "step": 4031 }, { "epoch": 1.6391542996544013, "grad_norm": 0.10648634284734726, "learning_rate": 3.6108284144107475e-05, "loss": 0.9815, "step": 4032 }, { "epoch": 1.639560886359016, "grad_norm": 0.10975086688995361, "learning_rate": 3.606757581925504e-05, "loss": 0.9608, "step": 4033 }, { "epoch": 1.6399674730636309, "grad_norm": 0.09726303815841675, "learning_rate": 3.60268674944026e-05, "loss": 0.8703, "step": 4034 }, { "epoch": 1.6403740597682455, "grad_norm": NaN, "learning_rate": 3.598615916955018e-05, "loss": 1.128, "step": 4035 }, { "epoch": 1.6407806464728605, "grad_norm": 0.10834196954965591, "learning_rate": 3.5945450844697745e-05, "loss": 0.9676, "step": 4036 }, { "epoch": 1.641187233177475, "grad_norm": 0.10544741898775101, "learning_rate": 3.5904742519845306e-05, "loss": 0.8797, "step": 4037 }, { "epoch": 1.6415938198820899, "grad_norm": 0.11281091719865799, "learning_rate": 3.586403419499288e-05, "loss": 0.8458, "step": 4038 }, { "epoch": 1.6420004065867047, "grad_norm": 0.11323501914739609, "learning_rate": 3.582332587014045e-05, "loss": 1.054, "step": 4039 }, { "epoch": 1.6424069932913192, "grad_norm": 0.1242019459605217, "learning_rate": 3.578261754528801e-05, "loss": 0.978, "step": 4040 }, { "epoch": 1.6428135799959342, "grad_norm": 0.11558779329061508, "learning_rate": 3.574190922043558e-05, "loss": 0.9664, "step": 4041 }, { "epoch": 1.6432201667005488, "grad_norm": 0.11010782420635223, "learning_rate": 3.570120089558315e-05, "loss": 0.9728, "step": 4042 }, { "epoch": 1.6436267534051636, "grad_norm": 0.11480515450239182, "learning_rate": 3.566049257073071e-05, "loss": 0.9653, "step": 4043 }, { "epoch": 1.6440333401097784, "grad_norm": 0.10841669887304306, "learning_rate": 3.5619784245878286e-05, "loss": 0.9257, "step": 4044 }, { "epoch": 1.6444399268143932, "grad_norm": 0.11190009117126465, "learning_rate": 3.5579075921025854e-05, "loss": 0.979, "step": 4045 }, { "epoch": 1.644846513519008, "grad_norm": 0.12733308970928192, "learning_rate": 3.553836759617342e-05, "loss": 1.1891, "step": 4046 }, { "epoch": 1.6452531002236226, "grad_norm": 0.11118471622467041, "learning_rate": 3.549765927132099e-05, "loss": 0.9259, "step": 4047 }, { "epoch": 1.6456596869282376, "grad_norm": 0.10010193288326263, "learning_rate": 3.545695094646856e-05, "loss": 0.8568, "step": 4048 }, { "epoch": 1.6460662736328522, "grad_norm": 0.10972374677658081, "learning_rate": 3.5416242621616124e-05, "loss": 1.0115, "step": 4049 }, { "epoch": 1.646472860337467, "grad_norm": 0.10799884051084518, "learning_rate": 3.537553429676369e-05, "loss": 0.962, "step": 4050 }, { "epoch": 1.6468794470420818, "grad_norm": 0.10064574331045151, "learning_rate": 3.533482597191126e-05, "loss": 0.8995, "step": 4051 }, { "epoch": 1.6472860337466964, "grad_norm": 0.1071900725364685, "learning_rate": 3.529411764705883e-05, "loss": 0.9491, "step": 4052 }, { "epoch": 1.6476926204513114, "grad_norm": 0.09920529276132584, "learning_rate": 3.5253409322206395e-05, "loss": 0.8961, "step": 4053 }, { "epoch": 1.648099207155926, "grad_norm": 0.12487678974866867, "learning_rate": 3.5212700997353956e-05, "loss": 1.0935, "step": 4054 }, { "epoch": 1.6485057938605407, "grad_norm": 0.10418154299259186, "learning_rate": 3.517199267250153e-05, "loss": 0.9397, "step": 4055 }, { "epoch": 1.6489123805651555, "grad_norm": 0.1157752275466919, "learning_rate": 3.51312843476491e-05, "loss": 0.98, "step": 4056 }, { "epoch": 1.6493189672697701, "grad_norm": 0.11210603266954422, "learning_rate": 3.509057602279666e-05, "loss": 0.9784, "step": 4057 }, { "epoch": 1.6497255539743851, "grad_norm": 0.10065794736146927, "learning_rate": 3.504986769794423e-05, "loss": 0.8451, "step": 4058 }, { "epoch": 1.6501321406789997, "grad_norm": 0.11964450031518936, "learning_rate": 3.50091593730918e-05, "loss": 0.9799, "step": 4059 }, { "epoch": 1.6505387273836145, "grad_norm": 0.10936059057712555, "learning_rate": 3.496845104823936e-05, "loss": 0.927, "step": 4060 }, { "epoch": 1.6509453140882293, "grad_norm": 0.1044425368309021, "learning_rate": 3.4927742723386936e-05, "loss": 0.9029, "step": 4061 }, { "epoch": 1.651351900792844, "grad_norm": 0.1131415069103241, "learning_rate": 3.4887034398534504e-05, "loss": 0.9837, "step": 4062 }, { "epoch": 1.651758487497459, "grad_norm": 0.10537821054458618, "learning_rate": 3.4846326073682064e-05, "loss": 0.9502, "step": 4063 }, { "epoch": 1.6521650742020735, "grad_norm": 0.10901257395744324, "learning_rate": 3.480561774882964e-05, "loss": 0.9424, "step": 4064 }, { "epoch": 1.6525716609066885, "grad_norm": 0.1178942546248436, "learning_rate": 3.4764909423977206e-05, "loss": 1.0408, "step": 4065 }, { "epoch": 1.652978247611303, "grad_norm": 0.10703755915164948, "learning_rate": 3.472420109912477e-05, "loss": 0.8829, "step": 4066 }, { "epoch": 1.6533848343159179, "grad_norm": 0.11088605970144272, "learning_rate": 3.468349277427234e-05, "loss": 0.9954, "step": 4067 }, { "epoch": 1.6537914210205327, "grad_norm": 0.09962453693151474, "learning_rate": 3.464278444941991e-05, "loss": 0.8575, "step": 4068 }, { "epoch": 1.6541980077251472, "grad_norm": 0.111634261906147, "learning_rate": 3.460207612456747e-05, "loss": 1.0138, "step": 4069 }, { "epoch": 1.6546045944297623, "grad_norm": 0.11267593502998352, "learning_rate": 3.4561367799715045e-05, "loss": 1.0642, "step": 4070 }, { "epoch": 1.6550111811343768, "grad_norm": 0.09662552177906036, "learning_rate": 3.452065947486261e-05, "loss": 0.8354, "step": 4071 }, { "epoch": 1.6554177678389916, "grad_norm": 0.10733507573604584, "learning_rate": 3.447995115001018e-05, "loss": 0.9678, "step": 4072 }, { "epoch": 1.6558243545436064, "grad_norm": 0.11181973665952682, "learning_rate": 3.443924282515775e-05, "loss": 0.9635, "step": 4073 }, { "epoch": 1.6562309412482212, "grad_norm": 0.10606959462165833, "learning_rate": 3.4398534500305315e-05, "loss": 0.9822, "step": 4074 }, { "epoch": 1.656637527952836, "grad_norm": 0.10522132366895676, "learning_rate": 3.435782617545288e-05, "loss": 0.9106, "step": 4075 }, { "epoch": 1.6570441146574506, "grad_norm": 0.10762479156255722, "learning_rate": 3.431711785060045e-05, "loss": 0.8781, "step": 4076 }, { "epoch": 1.6574507013620656, "grad_norm": 0.10766679793596268, "learning_rate": 3.427640952574802e-05, "loss": 0.902, "step": 4077 }, { "epoch": 1.6578572880666802, "grad_norm": 0.10841795802116394, "learning_rate": 3.4235701200895586e-05, "loss": 0.9509, "step": 4078 }, { "epoch": 1.658263874771295, "grad_norm": 0.10554873943328857, "learning_rate": 3.419499287604315e-05, "loss": 0.9276, "step": 4079 }, { "epoch": 1.6586704614759098, "grad_norm": 0.10341285914182663, "learning_rate": 3.415428455119072e-05, "loss": 0.8962, "step": 4080 }, { "epoch": 1.6590770481805244, "grad_norm": 0.11342310160398483, "learning_rate": 3.411357622633829e-05, "loss": 0.9239, "step": 4081 }, { "epoch": 1.6594836348851394, "grad_norm": 0.11349890381097794, "learning_rate": 3.4072867901485856e-05, "loss": 1.0019, "step": 4082 }, { "epoch": 1.659890221589754, "grad_norm": 0.11031408607959747, "learning_rate": 3.4032159576633424e-05, "loss": 0.9638, "step": 4083 }, { "epoch": 1.6602968082943688, "grad_norm": 0.10704563558101654, "learning_rate": 3.399145125178099e-05, "loss": 1.0363, "step": 4084 }, { "epoch": 1.6607033949989836, "grad_norm": 0.10755988955497742, "learning_rate": 3.395074292692856e-05, "loss": 0.9333, "step": 4085 }, { "epoch": 1.6611099817035981, "grad_norm": 0.11083410680294037, "learning_rate": 3.391003460207613e-05, "loss": 0.9933, "step": 4086 }, { "epoch": 1.6615165684082132, "grad_norm": 0.11164813488721848, "learning_rate": 3.3869326277223694e-05, "loss": 0.9725, "step": 4087 }, { "epoch": 1.6619231551128277, "grad_norm": 0.10156459361314774, "learning_rate": 3.382861795237126e-05, "loss": 0.9173, "step": 4088 }, { "epoch": 1.6623297418174425, "grad_norm": 0.11313030123710632, "learning_rate": 3.378790962751883e-05, "loss": 1.0201, "step": 4089 }, { "epoch": 1.6627363285220573, "grad_norm": 0.1112866923213005, "learning_rate": 3.37472013026664e-05, "loss": 0.9779, "step": 4090 }, { "epoch": 1.6631429152266721, "grad_norm": 0.11510057747364044, "learning_rate": 3.3706492977813965e-05, "loss": 1.0491, "step": 4091 }, { "epoch": 1.663549501931287, "grad_norm": 0.10526982694864273, "learning_rate": 3.366578465296153e-05, "loss": 0.9397, "step": 4092 }, { "epoch": 1.6639560886359015, "grad_norm": 0.11020489037036896, "learning_rate": 3.36250763281091e-05, "loss": 1.0285, "step": 4093 }, { "epoch": 1.6643626753405165, "grad_norm": 0.12323461472988129, "learning_rate": 3.358436800325667e-05, "loss": 0.9692, "step": 4094 }, { "epoch": 1.664769262045131, "grad_norm": 0.1031549721956253, "learning_rate": 3.3543659678404235e-05, "loss": 0.9761, "step": 4095 }, { "epoch": 1.665175848749746, "grad_norm": 0.11584251374006271, "learning_rate": 3.35029513535518e-05, "loss": 0.9312, "step": 4096 }, { "epoch": 1.6655824354543607, "grad_norm": 0.12074989080429077, "learning_rate": 3.346224302869937e-05, "loss": 1.1116, "step": 4097 }, { "epoch": 1.6659890221589753, "grad_norm": 0.10211546719074249, "learning_rate": 3.342153470384694e-05, "loss": 0.9243, "step": 4098 }, { "epoch": 1.6663956088635903, "grad_norm": 0.10890009999275208, "learning_rate": 3.3380826378994506e-05, "loss": 0.9122, "step": 4099 }, { "epoch": 1.6668021955682049, "grad_norm": 0.10587424039840698, "learning_rate": 3.3340118054142074e-05, "loss": 0.9334, "step": 4100 }, { "epoch": 1.6672087822728197, "grad_norm": 0.11641372740268707, "learning_rate": 3.329940972928964e-05, "loss": 1.0075, "step": 4101 }, { "epoch": 1.6676153689774345, "grad_norm": 0.1025613471865654, "learning_rate": 3.325870140443721e-05, "loss": 0.8918, "step": 4102 }, { "epoch": 1.6680219556820493, "grad_norm": 0.10911375284194946, "learning_rate": 3.3217993079584777e-05, "loss": 0.9776, "step": 4103 }, { "epoch": 1.668428542386664, "grad_norm": 0.10893717408180237, "learning_rate": 3.3177284754732344e-05, "loss": 0.9123, "step": 4104 }, { "epoch": 1.6688351290912786, "grad_norm": 0.09526187181472778, "learning_rate": 3.313657642987991e-05, "loss": 0.8188, "step": 4105 }, { "epoch": 1.6692417157958934, "grad_norm": 0.12063385546207428, "learning_rate": 3.309586810502748e-05, "loss": 1.0367, "step": 4106 }, { "epoch": 1.6696483025005082, "grad_norm": 0.09977064281702042, "learning_rate": 3.305515978017505e-05, "loss": 0.8625, "step": 4107 }, { "epoch": 1.670054889205123, "grad_norm": 0.1041639968752861, "learning_rate": 3.3014451455322615e-05, "loss": 0.8441, "step": 4108 }, { "epoch": 1.6704614759097378, "grad_norm": 0.11623057723045349, "learning_rate": 3.297374313047018e-05, "loss": 0.9207, "step": 4109 }, { "epoch": 1.6708680626143524, "grad_norm": 0.1048574224114418, "learning_rate": 3.293303480561775e-05, "loss": 0.8714, "step": 4110 }, { "epoch": 1.6712746493189674, "grad_norm": 0.10759609192609787, "learning_rate": 3.289232648076532e-05, "loss": 0.9579, "step": 4111 }, { "epoch": 1.671681236023582, "grad_norm": 0.10754968971014023, "learning_rate": 3.2851618155912885e-05, "loss": 0.9247, "step": 4112 }, { "epoch": 1.6720878227281968, "grad_norm": 0.11272035539150238, "learning_rate": 3.281090983106045e-05, "loss": 0.9927, "step": 4113 }, { "epoch": 1.6724944094328116, "grad_norm": 0.11457539349794388, "learning_rate": 3.277020150620802e-05, "loss": 1.005, "step": 4114 }, { "epoch": 1.6729009961374262, "grad_norm": 0.10070742666721344, "learning_rate": 3.272949318135559e-05, "loss": 0.8809, "step": 4115 }, { "epoch": 1.6733075828420412, "grad_norm": 0.11025416105985641, "learning_rate": 3.2688784856503156e-05, "loss": 0.9467, "step": 4116 }, { "epoch": 1.6737141695466558, "grad_norm": 0.09717261791229248, "learning_rate": 3.2648076531650723e-05, "loss": 0.846, "step": 4117 }, { "epoch": 1.6741207562512705, "grad_norm": 0.10521706938743591, "learning_rate": 3.260736820679829e-05, "loss": 0.9839, "step": 4118 }, { "epoch": 1.6745273429558853, "grad_norm": 0.10194703936576843, "learning_rate": 3.256665988194586e-05, "loss": 0.8897, "step": 4119 }, { "epoch": 1.6749339296605001, "grad_norm": 0.1110045537352562, "learning_rate": 3.2525951557093426e-05, "loss": 0.9729, "step": 4120 }, { "epoch": 1.675340516365115, "grad_norm": 0.11252006888389587, "learning_rate": 3.2485243232240994e-05, "loss": 0.9685, "step": 4121 }, { "epoch": 1.6757471030697295, "grad_norm": 0.11200141161680222, "learning_rate": 3.244453490738856e-05, "loss": 0.9663, "step": 4122 }, { "epoch": 1.6761536897743445, "grad_norm": 0.116104856133461, "learning_rate": 3.240382658253613e-05, "loss": 1.1104, "step": 4123 }, { "epoch": 1.676560276478959, "grad_norm": 0.11117435991764069, "learning_rate": 3.23631182576837e-05, "loss": 0.9643, "step": 4124 }, { "epoch": 1.676966863183574, "grad_norm": 0.10212714970111847, "learning_rate": 3.2322409932831265e-05, "loss": 0.911, "step": 4125 }, { "epoch": 1.6773734498881887, "grad_norm": 0.11014258116483688, "learning_rate": 3.228170160797883e-05, "loss": 0.9405, "step": 4126 }, { "epoch": 1.6777800365928033, "grad_norm": 0.10939601808786392, "learning_rate": 3.22409932831264e-05, "loss": 0.9238, "step": 4127 }, { "epoch": 1.6781866232974183, "grad_norm": 0.10850725322961807, "learning_rate": 3.220028495827397e-05, "loss": 0.9634, "step": 4128 }, { "epoch": 1.6785932100020329, "grad_norm": 0.10427679866552353, "learning_rate": 3.2159576633421535e-05, "loss": 0.8854, "step": 4129 }, { "epoch": 1.6789997967066477, "grad_norm": 0.11108868569135666, "learning_rate": 3.21188683085691e-05, "loss": 1.0035, "step": 4130 }, { "epoch": 1.6794063834112625, "grad_norm": 0.09677024930715561, "learning_rate": 3.207815998371667e-05, "loss": 0.8773, "step": 4131 }, { "epoch": 1.679812970115877, "grad_norm": 0.11575711518526077, "learning_rate": 3.203745165886424e-05, "loss": 0.961, "step": 4132 }, { "epoch": 1.680219556820492, "grad_norm": 0.10279621928930283, "learning_rate": 3.1996743334011806e-05, "loss": 0.8362, "step": 4133 }, { "epoch": 1.6806261435251066, "grad_norm": 0.10634070634841919, "learning_rate": 3.195603500915937e-05, "loss": 0.9402, "step": 4134 }, { "epoch": 1.6810327302297214, "grad_norm": 0.09615077078342438, "learning_rate": 3.191532668430694e-05, "loss": 0.9251, "step": 4135 }, { "epoch": 1.6814393169343362, "grad_norm": 0.11199648678302765, "learning_rate": 3.187461835945451e-05, "loss": 1.0387, "step": 4136 }, { "epoch": 1.681845903638951, "grad_norm": 0.10691442340612411, "learning_rate": 3.1833910034602076e-05, "loss": 0.9449, "step": 4137 }, { "epoch": 1.6822524903435658, "grad_norm": 0.10192760825157166, "learning_rate": 3.1793201709749644e-05, "loss": 0.9167, "step": 4138 }, { "epoch": 1.6826590770481804, "grad_norm": 0.09949979931116104, "learning_rate": 3.175249338489721e-05, "loss": 0.897, "step": 4139 }, { "epoch": 1.6830656637527954, "grad_norm": 0.10565482079982758, "learning_rate": 3.171178506004478e-05, "loss": 0.8484, "step": 4140 }, { "epoch": 1.68347225045741, "grad_norm": 0.11706092953681946, "learning_rate": 3.1671076735192353e-05, "loss": 1.0888, "step": 4141 }, { "epoch": 1.6838788371620248, "grad_norm": 0.10487108677625656, "learning_rate": 3.1630368410339914e-05, "loss": 0.9405, "step": 4142 }, { "epoch": 1.6842854238666396, "grad_norm": 0.10859554260969162, "learning_rate": 3.158966008548748e-05, "loss": 1.017, "step": 4143 }, { "epoch": 1.6846920105712542, "grad_norm": 0.10497396439313889, "learning_rate": 3.1548951760635056e-05, "loss": 0.8899, "step": 4144 }, { "epoch": 1.6850985972758692, "grad_norm": 0.10445064306259155, "learning_rate": 3.150824343578262e-05, "loss": 0.9581, "step": 4145 }, { "epoch": 1.6855051839804838, "grad_norm": 0.11567474156618118, "learning_rate": 3.1467535110930185e-05, "loss": 1.0488, "step": 4146 }, { "epoch": 1.6859117706850986, "grad_norm": 0.10222821682691574, "learning_rate": 3.142682678607776e-05, "loss": 0.8523, "step": 4147 }, { "epoch": 1.6863183573897134, "grad_norm": 0.10138101130723953, "learning_rate": 3.138611846122532e-05, "loss": 0.8837, "step": 4148 }, { "epoch": 1.6867249440943282, "grad_norm": 0.10402019321918488, "learning_rate": 3.134541013637289e-05, "loss": 0.8731, "step": 4149 }, { "epoch": 1.687131530798943, "grad_norm": 0.10087074339389801, "learning_rate": 3.130470181152046e-05, "loss": 0.8866, "step": 4150 }, { "epoch": 1.6875381175035575, "grad_norm": 0.10861088335514069, "learning_rate": 3.126399348666802e-05, "loss": 0.9968, "step": 4151 }, { "epoch": 1.6879447042081726, "grad_norm": 0.10692057758569717, "learning_rate": 3.122328516181559e-05, "loss": 0.9646, "step": 4152 }, { "epoch": 1.6883512909127871, "grad_norm": 0.10919506847858429, "learning_rate": 3.1182576836963165e-05, "loss": 0.9265, "step": 4153 }, { "epoch": 1.688757877617402, "grad_norm": 0.11321806907653809, "learning_rate": 3.1141868512110726e-05, "loss": 1.0094, "step": 4154 }, { "epoch": 1.6891644643220167, "grad_norm": 0.09933071583509445, "learning_rate": 3.1101160187258294e-05, "loss": 0.8347, "step": 4155 }, { "epoch": 1.6895710510266313, "grad_norm": 0.10513114184141159, "learning_rate": 3.106045186240587e-05, "loss": 0.9218, "step": 4156 }, { "epoch": 1.6899776377312463, "grad_norm": 0.10074515640735626, "learning_rate": 3.101974353755343e-05, "loss": 0.8773, "step": 4157 }, { "epoch": 1.690384224435861, "grad_norm": 0.11252142488956451, "learning_rate": 3.0979035212700996e-05, "loss": 1.0388, "step": 4158 }, { "epoch": 1.6907908111404757, "grad_norm": 0.11315988004207611, "learning_rate": 3.093832688784857e-05, "loss": 0.9604, "step": 4159 }, { "epoch": 1.6911973978450905, "grad_norm": 0.09731707721948624, "learning_rate": 3.089761856299613e-05, "loss": 0.8388, "step": 4160 }, { "epoch": 1.691603984549705, "grad_norm": 0.117193803191185, "learning_rate": 3.08569102381437e-05, "loss": 1.0092, "step": 4161 }, { "epoch": 1.69201057125432, "grad_norm": 0.10533732920885086, "learning_rate": 3.0816201913291274e-05, "loss": 0.9124, "step": 4162 }, { "epoch": 1.6924171579589347, "grad_norm": 0.11219623684883118, "learning_rate": 3.0775493588438835e-05, "loss": 0.9709, "step": 4163 }, { "epoch": 1.6928237446635495, "grad_norm": 0.10370016098022461, "learning_rate": 3.073478526358641e-05, "loss": 1.0248, "step": 4164 }, { "epoch": 1.6932303313681643, "grad_norm": 0.10810839384794235, "learning_rate": 3.069407693873398e-05, "loss": 0.9126, "step": 4165 }, { "epoch": 1.693636918072779, "grad_norm": 0.09728986769914627, "learning_rate": 3.065336861388154e-05, "loss": 0.7728, "step": 4166 }, { "epoch": 1.6940435047773938, "grad_norm": 0.11202438920736313, "learning_rate": 3.061266028902911e-05, "loss": 0.9593, "step": 4167 }, { "epoch": 1.6944500914820084, "grad_norm": 0.9556000232696533, "learning_rate": 3.057195196417667e-05, "loss": 0.9398, "step": 4168 }, { "epoch": 1.6948566781866234, "grad_norm": 0.11005814373493195, "learning_rate": 3.053124363932424e-05, "loss": 0.9694, "step": 4169 }, { "epoch": 1.695263264891238, "grad_norm": 0.11754006147384644, "learning_rate": 3.049053531447181e-05, "loss": 1.0125, "step": 4170 }, { "epoch": 1.6956698515958528, "grad_norm": 0.1051679253578186, "learning_rate": 3.044982698961938e-05, "loss": 0.9359, "step": 4171 }, { "epoch": 1.6960764383004676, "grad_norm": 0.11605421453714371, "learning_rate": 3.0409118664766943e-05, "loss": 1.0338, "step": 4172 }, { "epoch": 1.6964830250050822, "grad_norm": 0.10814854502677917, "learning_rate": 3.0368410339914514e-05, "loss": 0.9421, "step": 4173 }, { "epoch": 1.6968896117096972, "grad_norm": 0.10694431513547897, "learning_rate": 3.0327702015062082e-05, "loss": 0.8746, "step": 4174 }, { "epoch": 1.6972961984143118, "grad_norm": 0.10969371348619461, "learning_rate": 3.0286993690209646e-05, "loss": 0.9407, "step": 4175 }, { "epoch": 1.6977027851189266, "grad_norm": 0.11277522146701813, "learning_rate": 3.0246285365357217e-05, "loss": 0.9694, "step": 4176 }, { "epoch": 1.6981093718235414, "grad_norm": 0.10470854490995407, "learning_rate": 3.0205577040504785e-05, "loss": 0.9102, "step": 4177 }, { "epoch": 1.6985159585281562, "grad_norm": 0.11162138730287552, "learning_rate": 3.016486871565235e-05, "loss": 0.9992, "step": 4178 }, { "epoch": 1.698922545232771, "grad_norm": 0.10514409840106964, "learning_rate": 3.012416039079992e-05, "loss": 0.9071, "step": 4179 }, { "epoch": 1.6993291319373856, "grad_norm": 0.10722570866346359, "learning_rate": 3.0083452065947488e-05, "loss": 0.9165, "step": 4180 }, { "epoch": 1.6997357186420006, "grad_norm": 0.11843981593847275, "learning_rate": 3.0042743741095052e-05, "loss": 1.0993, "step": 4181 }, { "epoch": 1.7001423053466151, "grad_norm": 0.09846517443656921, "learning_rate": 3.0002035416242623e-05, "loss": 0.7917, "step": 4182 }, { "epoch": 1.70054889205123, "grad_norm": 0.09692200273275375, "learning_rate": 2.996132709139019e-05, "loss": 0.8828, "step": 4183 }, { "epoch": 1.7009554787558447, "grad_norm": 0.11660193651914597, "learning_rate": 2.9920618766537755e-05, "loss": 1.0471, "step": 4184 }, { "epoch": 1.7013620654604593, "grad_norm": 0.10081440955400467, "learning_rate": 2.9879910441685326e-05, "loss": 0.8514, "step": 4185 }, { "epoch": 1.7017686521650743, "grad_norm": 0.10978380590677261, "learning_rate": 2.9839202116832894e-05, "loss": 0.9892, "step": 4186 }, { "epoch": 1.702175238869689, "grad_norm": 0.1116599589586258, "learning_rate": 2.9798493791980465e-05, "loss": 0.9694, "step": 4187 }, { "epoch": 1.7025818255743037, "grad_norm": 0.11739277094602585, "learning_rate": 2.975778546712803e-05, "loss": 1.0799, "step": 4188 }, { "epoch": 1.7029884122789185, "grad_norm": 0.1063208132982254, "learning_rate": 2.9717077142275596e-05, "loss": 0.8933, "step": 4189 }, { "epoch": 1.703394998983533, "grad_norm": 0.12064635753631592, "learning_rate": 2.9676368817423168e-05, "loss": 0.9448, "step": 4190 }, { "epoch": 1.703801585688148, "grad_norm": 0.11632698774337769, "learning_rate": 2.9635660492570732e-05, "loss": 1.0139, "step": 4191 }, { "epoch": 1.7042081723927627, "grad_norm": 0.10843649506568909, "learning_rate": 2.95949521677183e-05, "loss": 0.909, "step": 4192 }, { "epoch": 1.7046147590973775, "grad_norm": 0.11414767056703568, "learning_rate": 2.955424384286587e-05, "loss": 1.0077, "step": 4193 }, { "epoch": 1.7050213458019923, "grad_norm": 0.11367535591125488, "learning_rate": 2.9513535518013435e-05, "loss": 1.01, "step": 4194 }, { "epoch": 1.705427932506607, "grad_norm": 0.11367020756006241, "learning_rate": 2.9472827193161002e-05, "loss": 1.024, "step": 4195 }, { "epoch": 1.7058345192112219, "grad_norm": 0.1111876368522644, "learning_rate": 2.9432118868308573e-05, "loss": 0.9485, "step": 4196 }, { "epoch": 1.7062411059158364, "grad_norm": 0.11318478733301163, "learning_rate": 2.9391410543456138e-05, "loss": 0.9367, "step": 4197 }, { "epoch": 1.7066476926204515, "grad_norm": 0.10784564912319183, "learning_rate": 2.9350702218603705e-05, "loss": 0.8722, "step": 4198 }, { "epoch": 1.707054279325066, "grad_norm": 0.10362992435693741, "learning_rate": 2.9309993893751276e-05, "loss": 0.9683, "step": 4199 }, { "epoch": 1.7074608660296808, "grad_norm": 0.10294868052005768, "learning_rate": 2.926928556889884e-05, "loss": 0.8937, "step": 4200 }, { "epoch": 1.7078674527342956, "grad_norm": 0.09753353893756866, "learning_rate": 2.9228577244046408e-05, "loss": 0.8074, "step": 4201 }, { "epoch": 1.7082740394389102, "grad_norm": 0.1013207882642746, "learning_rate": 2.918786891919398e-05, "loss": 0.8929, "step": 4202 }, { "epoch": 1.7086806261435252, "grad_norm": 0.10623973608016968, "learning_rate": 2.9147160594341543e-05, "loss": 0.9603, "step": 4203 }, { "epoch": 1.7090872128481398, "grad_norm": 0.10540005564689636, "learning_rate": 2.910645226948911e-05, "loss": 0.9633, "step": 4204 }, { "epoch": 1.7094937995527546, "grad_norm": 0.10837602615356445, "learning_rate": 2.9065743944636682e-05, "loss": 0.9487, "step": 4205 }, { "epoch": 1.7099003862573694, "grad_norm": 0.11080582439899445, "learning_rate": 2.9025035619784246e-05, "loss": 0.9073, "step": 4206 }, { "epoch": 1.7103069729619842, "grad_norm": 0.09433023631572723, "learning_rate": 2.8984327294931814e-05, "loss": 0.8649, "step": 4207 }, { "epoch": 1.710713559666599, "grad_norm": 0.1045960932970047, "learning_rate": 2.8943618970079385e-05, "loss": 0.9516, "step": 4208 }, { "epoch": 1.7111201463712136, "grad_norm": 0.10738299041986465, "learning_rate": 2.890291064522695e-05, "loss": 0.971, "step": 4209 }, { "epoch": 1.7115267330758284, "grad_norm": 0.11573982238769531, "learning_rate": 2.886220232037452e-05, "loss": 1.0267, "step": 4210 }, { "epoch": 1.7119333197804432, "grad_norm": 0.0996336117386818, "learning_rate": 2.8821493995522088e-05, "loss": 0.8579, "step": 4211 }, { "epoch": 1.712339906485058, "grad_norm": 0.09528303146362305, "learning_rate": 2.8780785670669652e-05, "loss": 0.8302, "step": 4212 }, { "epoch": 1.7127464931896728, "grad_norm": 0.10783466696739197, "learning_rate": 2.8740077345817223e-05, "loss": 0.9427, "step": 4213 }, { "epoch": 1.7131530798942873, "grad_norm": 0.11174463480710983, "learning_rate": 2.869936902096479e-05, "loss": 1.046, "step": 4214 }, { "epoch": 1.7135596665989024, "grad_norm": 0.10815679281949997, "learning_rate": 2.8658660696112355e-05, "loss": 0.9673, "step": 4215 }, { "epoch": 1.713966253303517, "grad_norm": 0.11043757200241089, "learning_rate": 2.8617952371259926e-05, "loss": 0.9417, "step": 4216 }, { "epoch": 1.7143728400081317, "grad_norm": 0.10383492708206177, "learning_rate": 2.8577244046407494e-05, "loss": 0.9286, "step": 4217 }, { "epoch": 1.7147794267127465, "grad_norm": 0.11312952637672424, "learning_rate": 2.8536535721555058e-05, "loss": 0.9456, "step": 4218 }, { "epoch": 1.715186013417361, "grad_norm": 0.10684169828891754, "learning_rate": 2.849582739670263e-05, "loss": 0.9141, "step": 4219 }, { "epoch": 1.7155926001219761, "grad_norm": 0.11260170489549637, "learning_rate": 2.8455119071850193e-05, "loss": 1.0298, "step": 4220 }, { "epoch": 1.7159991868265907, "grad_norm": 0.10850688070058823, "learning_rate": 2.841441074699776e-05, "loss": 0.9917, "step": 4221 }, { "epoch": 1.7164057735312055, "grad_norm": 0.11166807264089584, "learning_rate": 2.8373702422145332e-05, "loss": 0.9929, "step": 4222 }, { "epoch": 1.7168123602358203, "grad_norm": 0.1088751032948494, "learning_rate": 2.8332994097292896e-05, "loss": 0.9841, "step": 4223 }, { "epoch": 1.717218946940435, "grad_norm": 0.10074079036712646, "learning_rate": 2.8292285772440464e-05, "loss": 0.925, "step": 4224 }, { "epoch": 1.7176255336450499, "grad_norm": 0.11211121827363968, "learning_rate": 2.8251577447588035e-05, "loss": 0.9529, "step": 4225 }, { "epoch": 1.7180321203496645, "grad_norm": 0.12092123180627823, "learning_rate": 2.82108691227356e-05, "loss": 1.078, "step": 4226 }, { "epoch": 1.7184387070542795, "grad_norm": 0.11624202877283096, "learning_rate": 2.8170160797883167e-05, "loss": 1.0283, "step": 4227 }, { "epoch": 1.718845293758894, "grad_norm": 0.09251300990581512, "learning_rate": 2.8129452473030738e-05, "loss": 0.8062, "step": 4228 }, { "epoch": 1.7192518804635089, "grad_norm": 0.10410971194505692, "learning_rate": 2.8088744148178302e-05, "loss": 0.9062, "step": 4229 }, { "epoch": 1.7196584671681237, "grad_norm": 0.10773943364620209, "learning_rate": 2.804803582332587e-05, "loss": 1.0352, "step": 4230 }, { "epoch": 1.7200650538727382, "grad_norm": 0.11410236358642578, "learning_rate": 2.800732749847344e-05, "loss": 1.1001, "step": 4231 }, { "epoch": 1.7204716405773532, "grad_norm": 0.10976003855466843, "learning_rate": 2.7966619173621005e-05, "loss": 0.9473, "step": 4232 }, { "epoch": 1.7208782272819678, "grad_norm": 0.10007011145353317, "learning_rate": 2.7925910848768576e-05, "loss": 0.8742, "step": 4233 }, { "epoch": 1.7212848139865826, "grad_norm": 0.10780924558639526, "learning_rate": 2.7885202523916143e-05, "loss": 1.0167, "step": 4234 }, { "epoch": 1.7216914006911974, "grad_norm": 0.10659775882959366, "learning_rate": 2.7844494199063708e-05, "loss": 0.8895, "step": 4235 }, { "epoch": 1.722097987395812, "grad_norm": 0.11290939152240753, "learning_rate": 2.780378587421128e-05, "loss": 0.9768, "step": 4236 }, { "epoch": 1.722504574100427, "grad_norm": 0.10749030113220215, "learning_rate": 2.7763077549358846e-05, "loss": 0.9507, "step": 4237 }, { "epoch": 1.7229111608050416, "grad_norm": 0.11117120832204819, "learning_rate": 2.772236922450641e-05, "loss": 1.0331, "step": 4238 }, { "epoch": 1.7233177475096564, "grad_norm": 0.10173874348402023, "learning_rate": 2.768166089965398e-05, "loss": 0.9539, "step": 4239 }, { "epoch": 1.7237243342142712, "grad_norm": 0.10627079010009766, "learning_rate": 2.764095257480155e-05, "loss": 0.9851, "step": 4240 }, { "epoch": 1.724130920918886, "grad_norm": 0.11093501001596451, "learning_rate": 2.7600244249949113e-05, "loss": 0.9549, "step": 4241 }, { "epoch": 1.7245375076235008, "grad_norm": 0.10598506033420563, "learning_rate": 2.7559535925096684e-05, "loss": 0.8464, "step": 4242 }, { "epoch": 1.7249440943281154, "grad_norm": 0.10644206404685974, "learning_rate": 2.7518827600244252e-05, "loss": 0.9621, "step": 4243 }, { "epoch": 1.7253506810327304, "grad_norm": 0.10193706303834915, "learning_rate": 2.7478119275391816e-05, "loss": 0.9247, "step": 4244 }, { "epoch": 1.725757267737345, "grad_norm": 0.11588188260793686, "learning_rate": 2.7437410950539387e-05, "loss": 0.9922, "step": 4245 }, { "epoch": 1.7261638544419597, "grad_norm": 0.102876678109169, "learning_rate": 2.7396702625686955e-05, "loss": 0.932, "step": 4246 }, { "epoch": 1.7265704411465745, "grad_norm": 0.11138436198234558, "learning_rate": 2.735599430083452e-05, "loss": 1.0089, "step": 4247 }, { "epoch": 1.7269770278511891, "grad_norm": 0.10576023161411285, "learning_rate": 2.731528597598209e-05, "loss": 0.9773, "step": 4248 }, { "epoch": 1.7273836145558041, "grad_norm": 0.112242192029953, "learning_rate": 2.7274577651129658e-05, "loss": 0.954, "step": 4249 }, { "epoch": 1.7277902012604187, "grad_norm": 0.1062515377998352, "learning_rate": 2.7233869326277222e-05, "loss": 0.9338, "step": 4250 }, { "epoch": 1.7281967879650335, "grad_norm": 0.10844583809375763, "learning_rate": 2.7193161001424793e-05, "loss": 1.0064, "step": 4251 }, { "epoch": 1.7286033746696483, "grad_norm": 0.11245914548635483, "learning_rate": 2.715245267657236e-05, "loss": 1.0079, "step": 4252 }, { "epoch": 1.729009961374263, "grad_norm": 0.10556118190288544, "learning_rate": 2.7111744351719925e-05, "loss": 0.8712, "step": 4253 }, { "epoch": 1.729416548078878, "grad_norm": 0.0991094782948494, "learning_rate": 2.7071036026867496e-05, "loss": 0.8997, "step": 4254 }, { "epoch": 1.7298231347834925, "grad_norm": 0.10705914348363876, "learning_rate": 2.7030327702015064e-05, "loss": 1.0391, "step": 4255 }, { "epoch": 1.7302297214881075, "grad_norm": 0.09982667118310928, "learning_rate": 2.6989619377162635e-05, "loss": 0.8559, "step": 4256 }, { "epoch": 1.730636308192722, "grad_norm": 0.11347133666276932, "learning_rate": 2.69489110523102e-05, "loss": 1.0799, "step": 4257 }, { "epoch": 1.7310428948973369, "grad_norm": 0.10920675843954086, "learning_rate": 2.6908202727457767e-05, "loss": 1.0552, "step": 4258 }, { "epoch": 1.7314494816019517, "grad_norm": 0.1066790297627449, "learning_rate": 2.6867494402605338e-05, "loss": 0.955, "step": 4259 }, { "epoch": 1.7318560683065662, "grad_norm": 0.10464853793382645, "learning_rate": 2.6826786077752902e-05, "loss": 0.9153, "step": 4260 }, { "epoch": 1.7322626550111813, "grad_norm": 0.10659107565879822, "learning_rate": 2.678607775290047e-05, "loss": 0.9485, "step": 4261 }, { "epoch": 1.7326692417157958, "grad_norm": 0.11267217993736267, "learning_rate": 2.674536942804804e-05, "loss": 1.0526, "step": 4262 }, { "epoch": 1.7330758284204106, "grad_norm": 0.11317162215709686, "learning_rate": 2.6704661103195605e-05, "loss": 0.9879, "step": 4263 }, { "epoch": 1.7334824151250254, "grad_norm": 0.10813785344362259, "learning_rate": 2.6663952778343172e-05, "loss": 0.9666, "step": 4264 }, { "epoch": 1.73388900182964, "grad_norm": 0.1164456233382225, "learning_rate": 2.6623244453490743e-05, "loss": 1.0068, "step": 4265 }, { "epoch": 1.734295588534255, "grad_norm": 0.10661870241165161, "learning_rate": 2.6582536128638308e-05, "loss": 0.91, "step": 4266 }, { "epoch": 1.7347021752388696, "grad_norm": 0.10373251140117645, "learning_rate": 2.6541827803785872e-05, "loss": 0.9769, "step": 4267 }, { "epoch": 1.7351087619434844, "grad_norm": 0.10973814874887466, "learning_rate": 2.6501119478933446e-05, "loss": 0.986, "step": 4268 }, { "epoch": 1.7355153486480992, "grad_norm": 0.11517725884914398, "learning_rate": 2.646041115408101e-05, "loss": 1.0583, "step": 4269 }, { "epoch": 1.735921935352714, "grad_norm": 0.11015557497739792, "learning_rate": 2.6419702829228575e-05, "loss": 0.8886, "step": 4270 }, { "epoch": 1.7363285220573288, "grad_norm": 0.09546652436256409, "learning_rate": 2.637899450437615e-05, "loss": 0.8361, "step": 4271 }, { "epoch": 1.7367351087619434, "grad_norm": 0.1123289093375206, "learning_rate": 2.6338286179523714e-05, "loss": 0.9679, "step": 4272 }, { "epoch": 1.7371416954665584, "grad_norm": 0.1125122606754303, "learning_rate": 2.6297577854671278e-05, "loss": 1.012, "step": 4273 }, { "epoch": 1.737548282171173, "grad_norm": 0.09992215782403946, "learning_rate": 2.6256869529818852e-05, "loss": 0.8977, "step": 4274 }, { "epoch": 1.7379548688757878, "grad_norm": 0.10778994858264923, "learning_rate": 2.6216161204966416e-05, "loss": 0.8809, "step": 4275 }, { "epoch": 1.7383614555804026, "grad_norm": 0.10639583319425583, "learning_rate": 2.617545288011398e-05, "loss": 0.8556, "step": 4276 }, { "epoch": 1.7387680422850171, "grad_norm": 0.11742980778217316, "learning_rate": 2.6134744555261552e-05, "loss": 1.0188, "step": 4277 }, { "epoch": 1.7391746289896322, "grad_norm": 0.10542727261781693, "learning_rate": 2.609403623040912e-05, "loss": 0.9337, "step": 4278 }, { "epoch": 1.7395812156942467, "grad_norm": 0.11276593804359436, "learning_rate": 2.605332790555669e-05, "loss": 0.919, "step": 4279 }, { "epoch": 1.7399878023988615, "grad_norm": 0.11198127269744873, "learning_rate": 2.6012619580704255e-05, "loss": 0.985, "step": 4280 }, { "epoch": 1.7403943891034763, "grad_norm": 0.10808583348989487, "learning_rate": 2.5971911255851822e-05, "loss": 0.9746, "step": 4281 }, { "epoch": 1.7408009758080911, "grad_norm": 0.10986792296171188, "learning_rate": 2.5931202930999393e-05, "loss": 0.9551, "step": 4282 }, { "epoch": 1.741207562512706, "grad_norm": 0.10446632653474808, "learning_rate": 2.5890494606146958e-05, "loss": 0.9267, "step": 4283 }, { "epoch": 1.7416141492173205, "grad_norm": 0.10390999913215637, "learning_rate": 2.5849786281294525e-05, "loss": 0.9594, "step": 4284 }, { "epoch": 1.7420207359219355, "grad_norm": 0.10901391506195068, "learning_rate": 2.5809077956442096e-05, "loss": 0.9243, "step": 4285 }, { "epoch": 1.74242732262655, "grad_norm": 0.1013227254152298, "learning_rate": 2.576836963158966e-05, "loss": 0.8896, "step": 4286 }, { "epoch": 1.7428339093311649, "grad_norm": 0.1072046160697937, "learning_rate": 2.5727661306737228e-05, "loss": 0.9676, "step": 4287 }, { "epoch": 1.7432404960357797, "grad_norm": 0.11231736093759537, "learning_rate": 2.56869529818848e-05, "loss": 1.0102, "step": 4288 }, { "epoch": 1.7436470827403943, "grad_norm": 0.10860041528940201, "learning_rate": 2.5646244657032363e-05, "loss": 0.9542, "step": 4289 }, { "epoch": 1.7440536694450093, "grad_norm": 0.10603651404380798, "learning_rate": 2.560553633217993e-05, "loss": 0.9641, "step": 4290 }, { "epoch": 1.7444602561496239, "grad_norm": 0.10350662469863892, "learning_rate": 2.5564828007327502e-05, "loss": 0.8459, "step": 4291 }, { "epoch": 1.7448668428542387, "grad_norm": 114.04313659667969, "learning_rate": 2.5524119682475066e-05, "loss": 0.9247, "step": 4292 }, { "epoch": 1.7452734295588535, "grad_norm": 0.10801190882921219, "learning_rate": 2.5483411357622634e-05, "loss": 0.8786, "step": 4293 }, { "epoch": 1.745680016263468, "grad_norm": 0.10285909473896027, "learning_rate": 2.5442703032770205e-05, "loss": 0.8542, "step": 4294 }, { "epoch": 1.746086602968083, "grad_norm": 0.10545431822538376, "learning_rate": 2.540199470791777e-05, "loss": 0.9207, "step": 4295 }, { "epoch": 1.7464931896726976, "grad_norm": 0.09978693723678589, "learning_rate": 2.5361286383065337e-05, "loss": 0.9143, "step": 4296 }, { "epoch": 1.7468997763773124, "grad_norm": 0.11697062104940414, "learning_rate": 2.5320578058212908e-05, "loss": 1.0319, "step": 4297 }, { "epoch": 1.7473063630819272, "grad_norm": 0.11616303026676178, "learning_rate": 2.5279869733360472e-05, "loss": 0.9238, "step": 4298 }, { "epoch": 1.747712949786542, "grad_norm": 0.10795867443084717, "learning_rate": 2.523916140850804e-05, "loss": 0.9084, "step": 4299 }, { "epoch": 1.7481195364911568, "grad_norm": 0.11287292838096619, "learning_rate": 2.519845308365561e-05, "loss": 1.1124, "step": 4300 }, { "epoch": 1.7485261231957714, "grad_norm": 0.09773558378219604, "learning_rate": 2.5157744758803175e-05, "loss": 0.9261, "step": 4301 }, { "epoch": 1.7489327099003864, "grad_norm": 0.10864662379026413, "learning_rate": 2.5117036433950746e-05, "loss": 0.9403, "step": 4302 }, { "epoch": 1.749339296605001, "grad_norm": 0.10947711020708084, "learning_rate": 2.5076328109098314e-05, "loss": 1.0085, "step": 4303 }, { "epoch": 1.7497458833096158, "grad_norm": 0.10114283859729767, "learning_rate": 2.5035619784245878e-05, "loss": 0.8175, "step": 4304 }, { "epoch": 1.7501524700142306, "grad_norm": 0.12078159302473068, "learning_rate": 2.4994911459393445e-05, "loss": 1.0181, "step": 4305 }, { "epoch": 1.7505590567188452, "grad_norm": 0.11305717378854752, "learning_rate": 2.4954203134541016e-05, "loss": 1.0076, "step": 4306 }, { "epoch": 1.7509656434234602, "grad_norm": 0.10791384428739548, "learning_rate": 2.491349480968858e-05, "loss": 0.942, "step": 4307 }, { "epoch": 1.7513722301280747, "grad_norm": 0.10973495990037918, "learning_rate": 2.487278648483615e-05, "loss": 0.9548, "step": 4308 }, { "epoch": 1.7517788168326895, "grad_norm": 0.11003004014492035, "learning_rate": 2.483207815998372e-05, "loss": 0.9808, "step": 4309 }, { "epoch": 1.7521854035373043, "grad_norm": 0.10978732258081436, "learning_rate": 2.4791369835131287e-05, "loss": 0.9631, "step": 4310 }, { "epoch": 1.7525919902419191, "grad_norm": 0.1190049797296524, "learning_rate": 2.475066151027885e-05, "loss": 1.0111, "step": 4311 }, { "epoch": 1.752998576946534, "grad_norm": 0.11224553734064102, "learning_rate": 2.4709953185426422e-05, "loss": 1.0278, "step": 4312 }, { "epoch": 1.7534051636511485, "grad_norm": 0.10069207847118378, "learning_rate": 2.466924486057399e-05, "loss": 0.9471, "step": 4313 }, { "epoch": 1.7538117503557635, "grad_norm": 0.10624197125434875, "learning_rate": 2.4628536535721554e-05, "loss": 0.9147, "step": 4314 }, { "epoch": 1.754218337060378, "grad_norm": 0.11235160380601883, "learning_rate": 2.4587828210869125e-05, "loss": 0.8664, "step": 4315 }, { "epoch": 1.754624923764993, "grad_norm": 0.10248378664255142, "learning_rate": 2.4547119886016693e-05, "loss": 0.9251, "step": 4316 }, { "epoch": 1.7550315104696077, "grad_norm": 0.10120463371276855, "learning_rate": 2.4506411561164257e-05, "loss": 0.9171, "step": 4317 }, { "epoch": 1.7554380971742223, "grad_norm": 0.1045912504196167, "learning_rate": 2.4465703236311828e-05, "loss": 0.8547, "step": 4318 }, { "epoch": 1.7558446838788373, "grad_norm": 0.10243544727563858, "learning_rate": 2.4424994911459396e-05, "loss": 0.8316, "step": 4319 }, { "epoch": 1.7562512705834519, "grad_norm": 0.1228775754570961, "learning_rate": 2.4384286586606963e-05, "loss": 1.0419, "step": 4320 }, { "epoch": 1.7566578572880667, "grad_norm": 0.10931418836116791, "learning_rate": 2.434357826175453e-05, "loss": 0.9295, "step": 4321 }, { "epoch": 1.7570644439926815, "grad_norm": 0.10567274689674377, "learning_rate": 2.43028699369021e-05, "loss": 0.8475, "step": 4322 }, { "epoch": 1.757471030697296, "grad_norm": 0.10765715688467026, "learning_rate": 2.4262161612049666e-05, "loss": 0.9455, "step": 4323 }, { "epoch": 1.757877617401911, "grad_norm": 0.1101839691400528, "learning_rate": 2.422145328719723e-05, "loss": 1.0432, "step": 4324 }, { "epoch": 1.7582842041065256, "grad_norm": 0.11012637615203857, "learning_rate": 2.41807449623448e-05, "loss": 1.0319, "step": 4325 }, { "epoch": 1.7586907908111404, "grad_norm": 0.10784610360860825, "learning_rate": 2.414003663749237e-05, "loss": 0.9805, "step": 4326 }, { "epoch": 1.7590973775157552, "grad_norm": 0.11275933682918549, "learning_rate": 2.4099328312639933e-05, "loss": 1.0016, "step": 4327 }, { "epoch": 1.75950396422037, "grad_norm": 0.12135179340839386, "learning_rate": 2.4058619987787504e-05, "loss": 1.0446, "step": 4328 }, { "epoch": 1.7599105509249848, "grad_norm": 0.11638684570789337, "learning_rate": 2.4017911662935072e-05, "loss": 0.9194, "step": 4329 }, { "epoch": 1.7603171376295994, "grad_norm": 0.10643386095762253, "learning_rate": 2.3977203338082636e-05, "loss": 0.8486, "step": 4330 }, { "epoch": 1.7607237243342144, "grad_norm": 0.11009577661752701, "learning_rate": 2.3936495013230207e-05, "loss": 0.9088, "step": 4331 }, { "epoch": 1.761130311038829, "grad_norm": 0.10450370609760284, "learning_rate": 2.3895786688377775e-05, "loss": 0.9038, "step": 4332 }, { "epoch": 1.7615368977434438, "grad_norm": 0.10662252455949783, "learning_rate": 2.3855078363525343e-05, "loss": 0.9645, "step": 4333 }, { "epoch": 1.7619434844480586, "grad_norm": 0.11794573813676834, "learning_rate": 2.381437003867291e-05, "loss": 1.0474, "step": 4334 }, { "epoch": 1.7623500711526732, "grad_norm": 0.09998206794261932, "learning_rate": 2.3773661713820478e-05, "loss": 0.8884, "step": 4335 }, { "epoch": 1.7627566578572882, "grad_norm": 0.10653196275234222, "learning_rate": 2.3732953388968046e-05, "loss": 0.9707, "step": 4336 }, { "epoch": 1.7631632445619028, "grad_norm": 0.10006160289049149, "learning_rate": 2.3692245064115613e-05, "loss": 0.8823, "step": 4337 }, { "epoch": 1.7635698312665176, "grad_norm": 0.10839603841304779, "learning_rate": 2.365153673926318e-05, "loss": 0.8994, "step": 4338 }, { "epoch": 1.7639764179711324, "grad_norm": 0.11682058125734329, "learning_rate": 2.361082841441075e-05, "loss": 0.997, "step": 4339 }, { "epoch": 1.7643830046757472, "grad_norm": 0.1066887304186821, "learning_rate": 2.3570120089558316e-05, "loss": 0.9488, "step": 4340 }, { "epoch": 1.764789591380362, "grad_norm": 0.10981225222349167, "learning_rate": 2.3529411764705884e-05, "loss": 0.9737, "step": 4341 }, { "epoch": 1.7651961780849765, "grad_norm": 0.10463444888591766, "learning_rate": 2.348870343985345e-05, "loss": 0.9606, "step": 4342 }, { "epoch": 1.7656027647895913, "grad_norm": 0.10684582591056824, "learning_rate": 2.344799511500102e-05, "loss": 0.9596, "step": 4343 }, { "epoch": 1.7660093514942061, "grad_norm": 0.10298759490251541, "learning_rate": 2.3407286790148587e-05, "loss": 0.9313, "step": 4344 }, { "epoch": 1.766415938198821, "grad_norm": 0.10359034687280655, "learning_rate": 2.3366578465296154e-05, "loss": 0.9196, "step": 4345 }, { "epoch": 1.7668225249034357, "grad_norm": 0.09916497021913528, "learning_rate": 2.3325870140443722e-05, "loss": 0.8912, "step": 4346 }, { "epoch": 1.7672291116080503, "grad_norm": 0.11100557446479797, "learning_rate": 2.328516181559129e-05, "loss": 0.9454, "step": 4347 }, { "epoch": 1.7676356983126653, "grad_norm": 0.10253948718309402, "learning_rate": 2.3244453490738857e-05, "loss": 0.9319, "step": 4348 }, { "epoch": 1.76804228501728, "grad_norm": 0.10206755995750427, "learning_rate": 2.3203745165886425e-05, "loss": 0.934, "step": 4349 }, { "epoch": 1.7684488717218947, "grad_norm": 0.11519122123718262, "learning_rate": 2.3163036841033992e-05, "loss": 1.0003, "step": 4350 }, { "epoch": 1.7688554584265095, "grad_norm": 0.10560178756713867, "learning_rate": 2.312232851618156e-05, "loss": 0.8264, "step": 4351 }, { "epoch": 1.769262045131124, "grad_norm": 0.10667610168457031, "learning_rate": 2.3081620191329128e-05, "loss": 0.9664, "step": 4352 }, { "epoch": 1.769668631835739, "grad_norm": 0.10857968032360077, "learning_rate": 2.3040911866476695e-05, "loss": 0.9806, "step": 4353 }, { "epoch": 1.7700752185403537, "grad_norm": 0.10619470477104187, "learning_rate": 2.3000203541624263e-05, "loss": 0.9465, "step": 4354 }, { "epoch": 1.7704818052449685, "grad_norm": 0.10277079790830612, "learning_rate": 2.295949521677183e-05, "loss": 0.9143, "step": 4355 }, { "epoch": 1.7708883919495833, "grad_norm": 0.10691865533590317, "learning_rate": 2.2918786891919398e-05, "loss": 1.0253, "step": 4356 }, { "epoch": 1.771294978654198, "grad_norm": 0.11174901574850082, "learning_rate": 2.2878078567066966e-05, "loss": 0.9733, "step": 4357 }, { "epoch": 1.7717015653588128, "grad_norm": 0.10610245913267136, "learning_rate": 2.2837370242214533e-05, "loss": 0.8913, "step": 4358 }, { "epoch": 1.7721081520634274, "grad_norm": 0.10555868595838547, "learning_rate": 2.27966619173621e-05, "loss": 0.9519, "step": 4359 }, { "epoch": 1.7725147387680424, "grad_norm": 0.10912936925888062, "learning_rate": 2.275595359250967e-05, "loss": 0.9556, "step": 4360 }, { "epoch": 1.772921325472657, "grad_norm": 0.10051053762435913, "learning_rate": 2.2715245267657236e-05, "loss": 0.9013, "step": 4361 }, { "epoch": 1.7733279121772718, "grad_norm": 0.11846382170915604, "learning_rate": 2.2674536942804804e-05, "loss": 1.0445, "step": 4362 }, { "epoch": 1.7737344988818866, "grad_norm": 0.11906945705413818, "learning_rate": 2.263382861795237e-05, "loss": 1.034, "step": 4363 }, { "epoch": 1.7741410855865012, "grad_norm": 0.10891875624656677, "learning_rate": 2.259312029309994e-05, "loss": 0.9367, "step": 4364 }, { "epoch": 1.7745476722911162, "grad_norm": 0.10430287569761276, "learning_rate": 2.2552411968247507e-05, "loss": 0.9557, "step": 4365 }, { "epoch": 1.7749542589957308, "grad_norm": 0.0998322144150734, "learning_rate": 2.2511703643395078e-05, "loss": 0.9317, "step": 4366 }, { "epoch": 1.7753608457003456, "grad_norm": 0.11559992283582687, "learning_rate": 2.2470995318542642e-05, "loss": 1.0788, "step": 4367 }, { "epoch": 1.7757674324049604, "grad_norm": 0.09992285072803497, "learning_rate": 2.243028699369021e-05, "loss": 0.8919, "step": 4368 }, { "epoch": 1.776174019109575, "grad_norm": 0.09705512970685959, "learning_rate": 2.238957866883778e-05, "loss": 0.8681, "step": 4369 }, { "epoch": 1.77658060581419, "grad_norm": 0.10248728096485138, "learning_rate": 2.2348870343985345e-05, "loss": 0.8919, "step": 4370 }, { "epoch": 1.7769871925188045, "grad_norm": 0.10350560396909714, "learning_rate": 2.2308162019132913e-05, "loss": 0.9809, "step": 4371 }, { "epoch": 1.7773937792234193, "grad_norm": 0.1068427711725235, "learning_rate": 2.2267453694280484e-05, "loss": 0.9996, "step": 4372 }, { "epoch": 1.7778003659280341, "grad_norm": 0.10956123471260071, "learning_rate": 2.2226745369428048e-05, "loss": 1.0518, "step": 4373 }, { "epoch": 1.778206952632649, "grad_norm": 0.108606718480587, "learning_rate": 2.2186037044575616e-05, "loss": 0.9675, "step": 4374 }, { "epoch": 1.7786135393372637, "grad_norm": 0.09994582086801529, "learning_rate": 2.2145328719723187e-05, "loss": 0.8852, "step": 4375 }, { "epoch": 1.7790201260418783, "grad_norm": 0.10806925594806671, "learning_rate": 2.210462039487075e-05, "loss": 0.8325, "step": 4376 }, { "epoch": 1.7794267127464933, "grad_norm": 0.10712796449661255, "learning_rate": 2.206391207001832e-05, "loss": 0.8711, "step": 4377 }, { "epoch": 1.779833299451108, "grad_norm": 0.0954805314540863, "learning_rate": 2.202320374516589e-05, "loss": 0.8266, "step": 4378 }, { "epoch": 1.7802398861557227, "grad_norm": 0.10875259339809418, "learning_rate": 2.1982495420313457e-05, "loss": 0.8732, "step": 4379 }, { "epoch": 1.7806464728603375, "grad_norm": 0.10435421019792557, "learning_rate": 2.194178709546102e-05, "loss": 0.9109, "step": 4380 }, { "epoch": 1.781053059564952, "grad_norm": 0.11331689357757568, "learning_rate": 2.190107877060859e-05, "loss": 1.0809, "step": 4381 }, { "epoch": 1.781459646269567, "grad_norm": 0.10554377734661102, "learning_rate": 2.186037044575616e-05, "loss": 0.9462, "step": 4382 }, { "epoch": 1.7818662329741817, "grad_norm": 0.10803189128637314, "learning_rate": 2.1819662120903724e-05, "loss": 0.8824, "step": 4383 }, { "epoch": 1.7822728196787965, "grad_norm": 0.11196300387382507, "learning_rate": 2.1778953796051292e-05, "loss": 1.0187, "step": 4384 }, { "epoch": 1.7826794063834113, "grad_norm": 0.11103217303752899, "learning_rate": 2.1738245471198863e-05, "loss": 0.8834, "step": 4385 }, { "epoch": 1.783085993088026, "grad_norm": 0.11082977801561356, "learning_rate": 2.1697537146346427e-05, "loss": 1.0117, "step": 4386 }, { "epoch": 1.7834925797926409, "grad_norm": 0.11907029151916504, "learning_rate": 2.1656828821493995e-05, "loss": 0.9475, "step": 4387 }, { "epoch": 1.7838991664972554, "grad_norm": 0.11966660618782043, "learning_rate": 2.1616120496641566e-05, "loss": 0.9932, "step": 4388 }, { "epoch": 1.7843057532018705, "grad_norm": 0.11029841005802155, "learning_rate": 2.157541217178913e-05, "loss": 0.9592, "step": 4389 }, { "epoch": 1.784712339906485, "grad_norm": 0.11848790943622589, "learning_rate": 2.1534703846936698e-05, "loss": 1.0834, "step": 4390 }, { "epoch": 1.7851189266110998, "grad_norm": 0.10386286675930023, "learning_rate": 2.149399552208427e-05, "loss": 0.9237, "step": 4391 }, { "epoch": 1.7855255133157146, "grad_norm": 0.10579628497362137, "learning_rate": 2.1453287197231836e-05, "loss": 0.953, "step": 4392 }, { "epoch": 1.7859321000203292, "grad_norm": 0.1021723523736, "learning_rate": 2.14125788723794e-05, "loss": 0.9341, "step": 4393 }, { "epoch": 1.7863386867249442, "grad_norm": 0.10522866249084473, "learning_rate": 2.137187054752697e-05, "loss": 0.9835, "step": 4394 }, { "epoch": 1.7867452734295588, "grad_norm": 0.10765165835618973, "learning_rate": 2.133116222267454e-05, "loss": 0.9663, "step": 4395 }, { "epoch": 1.7871518601341736, "grad_norm": 0.11220693588256836, "learning_rate": 2.1290453897822104e-05, "loss": 0.9155, "step": 4396 }, { "epoch": 1.7875584468387884, "grad_norm": 0.10197433829307556, "learning_rate": 2.1249745572969675e-05, "loss": 1.0136, "step": 4397 }, { "epoch": 1.787965033543403, "grad_norm": 0.10165443271398544, "learning_rate": 2.1209037248117242e-05, "loss": 0.8976, "step": 4398 }, { "epoch": 1.788371620248018, "grad_norm": 0.09981327503919601, "learning_rate": 2.1168328923264806e-05, "loss": 0.9235, "step": 4399 }, { "epoch": 1.7887782069526326, "grad_norm": 0.11018567532300949, "learning_rate": 2.1127620598412377e-05, "loss": 1.0019, "step": 4400 }, { "epoch": 1.7891847936572474, "grad_norm": 0.1057438850402832, "learning_rate": 2.1086912273559945e-05, "loss": 0.9121, "step": 4401 }, { "epoch": 1.7895913803618622, "grad_norm": 0.10720144212245941, "learning_rate": 2.1046203948707513e-05, "loss": 0.972, "step": 4402 }, { "epoch": 1.789997967066477, "grad_norm": 0.10562118142843246, "learning_rate": 2.100549562385508e-05, "loss": 0.959, "step": 4403 }, { "epoch": 1.7904045537710918, "grad_norm": 0.09891306608915329, "learning_rate": 2.0964787299002648e-05, "loss": 0.902, "step": 4404 }, { "epoch": 1.7908111404757063, "grad_norm": 0.10641779005527496, "learning_rate": 2.0924078974150216e-05, "loss": 0.8979, "step": 4405 }, { "epoch": 1.7912177271803214, "grad_norm": 0.11061422526836395, "learning_rate": 2.088337064929778e-05, "loss": 1.0014, "step": 4406 }, { "epoch": 1.791624313884936, "grad_norm": 0.11537324637174606, "learning_rate": 2.084266232444535e-05, "loss": 1.0432, "step": 4407 }, { "epoch": 1.7920309005895507, "grad_norm": 0.10998007655143738, "learning_rate": 2.080195399959292e-05, "loss": 0.9634, "step": 4408 }, { "epoch": 1.7924374872941655, "grad_norm": 0.10309130698442459, "learning_rate": 2.0761245674740483e-05, "loss": 0.8738, "step": 4409 }, { "epoch": 1.79284407399878, "grad_norm": 0.11149647831916809, "learning_rate": 2.0720537349888054e-05, "loss": 1.0297, "step": 4410 }, { "epoch": 1.7932506607033951, "grad_norm": 0.09828011691570282, "learning_rate": 2.067982902503562e-05, "loss": 0.8313, "step": 4411 }, { "epoch": 1.7936572474080097, "grad_norm": 0.10721366852521896, "learning_rate": 2.0639120700183186e-05, "loss": 0.9765, "step": 4412 }, { "epoch": 1.7940638341126245, "grad_norm": 0.10838527232408524, "learning_rate": 2.0598412375330757e-05, "loss": 0.9149, "step": 4413 }, { "epoch": 1.7944704208172393, "grad_norm": 0.10356704890727997, "learning_rate": 2.0557704050478324e-05, "loss": 0.8851, "step": 4414 }, { "epoch": 1.794877007521854, "grad_norm": 0.11622883379459381, "learning_rate": 2.0516995725625892e-05, "loss": 1.0725, "step": 4415 }, { "epoch": 1.7952835942264689, "grad_norm": 0.10502596199512482, "learning_rate": 2.047628740077346e-05, "loss": 0.9569, "step": 4416 }, { "epoch": 1.7956901809310835, "grad_norm": 0.11332449316978455, "learning_rate": 2.0435579075921027e-05, "loss": 1.0243, "step": 4417 }, { "epoch": 1.7960967676356985, "grad_norm": 0.10270780324935913, "learning_rate": 2.0394870751068595e-05, "loss": 0.892, "step": 4418 }, { "epoch": 1.796503354340313, "grad_norm": 0.11144386976957321, "learning_rate": 2.0354162426216163e-05, "loss": 1.0023, "step": 4419 }, { "epoch": 1.7969099410449278, "grad_norm": 0.10902906954288483, "learning_rate": 2.031345410136373e-05, "loss": 0.9472, "step": 4420 }, { "epoch": 1.7973165277495426, "grad_norm": 0.10864941775798798, "learning_rate": 2.0272745776511298e-05, "loss": 0.9976, "step": 4421 }, { "epoch": 1.7977231144541572, "grad_norm": 0.10872267186641693, "learning_rate": 2.0232037451658865e-05, "loss": 0.9251, "step": 4422 }, { "epoch": 1.7981297011587722, "grad_norm": 0.10153832286596298, "learning_rate": 2.0191329126806433e-05, "loss": 0.9093, "step": 4423 }, { "epoch": 1.7985362878633868, "grad_norm": 0.10530535131692886, "learning_rate": 2.0150620801954e-05, "loss": 0.9548, "step": 4424 }, { "epoch": 1.7989428745680016, "grad_norm": 0.10547920316457748, "learning_rate": 2.010991247710157e-05, "loss": 0.9341, "step": 4425 }, { "epoch": 1.7993494612726164, "grad_norm": 0.10784605890512466, "learning_rate": 2.0069204152249136e-05, "loss": 0.9403, "step": 4426 }, { "epoch": 1.799756047977231, "grad_norm": 0.10222501307725906, "learning_rate": 2.0028495827396704e-05, "loss": 0.9046, "step": 4427 }, { "epoch": 1.800162634681846, "grad_norm": 0.11365248262882233, "learning_rate": 1.998778750254427e-05, "loss": 0.9915, "step": 4428 }, { "epoch": 1.8005692213864606, "grad_norm": 0.10497547686100006, "learning_rate": 1.994707917769184e-05, "loss": 0.8882, "step": 4429 }, { "epoch": 1.8009758080910754, "grad_norm": 0.10671515762805939, "learning_rate": 1.9906370852839407e-05, "loss": 0.9459, "step": 4430 }, { "epoch": 1.8013823947956902, "grad_norm": 0.11096837371587753, "learning_rate": 1.9865662527986974e-05, "loss": 0.9122, "step": 4431 }, { "epoch": 1.801788981500305, "grad_norm": 0.10286138951778412, "learning_rate": 1.9824954203134542e-05, "loss": 0.8998, "step": 4432 }, { "epoch": 1.8021955682049198, "grad_norm": 0.09729085862636566, "learning_rate": 1.978424587828211e-05, "loss": 0.8153, "step": 4433 }, { "epoch": 1.8026021549095343, "grad_norm": 0.10512160509824753, "learning_rate": 1.9743537553429677e-05, "loss": 0.9588, "step": 4434 }, { "epoch": 1.8030087416141494, "grad_norm": 0.11304861307144165, "learning_rate": 1.9702829228577245e-05, "loss": 0.9857, "step": 4435 }, { "epoch": 1.803415328318764, "grad_norm": 0.11357636004686356, "learning_rate": 1.9662120903724812e-05, "loss": 1.0866, "step": 4436 }, { "epoch": 1.8038219150233787, "grad_norm": 0.10194739699363708, "learning_rate": 1.962141257887238e-05, "loss": 0.87, "step": 4437 }, { "epoch": 1.8042285017279935, "grad_norm": 0.10487458109855652, "learning_rate": 1.9580704254019948e-05, "loss": 0.9706, "step": 4438 }, { "epoch": 1.8046350884326081, "grad_norm": 0.10020367056131363, "learning_rate": 1.9539995929167515e-05, "loss": 0.9203, "step": 4439 }, { "epoch": 1.8050416751372231, "grad_norm": 0.10388026386499405, "learning_rate": 1.9499287604315083e-05, "loss": 0.8797, "step": 4440 }, { "epoch": 1.8054482618418377, "grad_norm": 0.11745689809322357, "learning_rate": 1.945857927946265e-05, "loss": 0.9467, "step": 4441 }, { "epoch": 1.8058548485464525, "grad_norm": 0.1165374368429184, "learning_rate": 1.9417870954610218e-05, "loss": 0.9579, "step": 4442 }, { "epoch": 1.8062614352510673, "grad_norm": 0.10849489271640778, "learning_rate": 1.9377162629757786e-05, "loss": 0.9887, "step": 4443 }, { "epoch": 1.806668021955682, "grad_norm": 0.10759218782186508, "learning_rate": 1.9336454304905353e-05, "loss": 0.8644, "step": 4444 }, { "epoch": 1.807074608660297, "grad_norm": 0.10003377497196198, "learning_rate": 1.929574598005292e-05, "loss": 0.8778, "step": 4445 }, { "epoch": 1.8074811953649115, "grad_norm": 0.11562558263540268, "learning_rate": 1.925503765520049e-05, "loss": 1.0265, "step": 4446 }, { "epoch": 1.8078877820695263, "grad_norm": 0.11590316146612167, "learning_rate": 1.9214329330348056e-05, "loss": 1.0377, "step": 4447 }, { "epoch": 1.808294368774141, "grad_norm": 0.10737669467926025, "learning_rate": 1.9173621005495627e-05, "loss": 0.9406, "step": 4448 }, { "epoch": 1.8087009554787559, "grad_norm": 0.10374566167593002, "learning_rate": 1.913291268064319e-05, "loss": 0.9487, "step": 4449 }, { "epoch": 1.8091075421833707, "grad_norm": 0.10888830572366714, "learning_rate": 1.909220435579076e-05, "loss": 0.9484, "step": 4450 }, { "epoch": 1.8095141288879852, "grad_norm": 0.09871388971805573, "learning_rate": 1.905149603093833e-05, "loss": 0.8424, "step": 4451 }, { "epoch": 1.8099207155926003, "grad_norm": 0.10062243789434433, "learning_rate": 1.9010787706085894e-05, "loss": 0.8976, "step": 4452 }, { "epoch": 1.8103273022972148, "grad_norm": 0.11193433403968811, "learning_rate": 1.8970079381233462e-05, "loss": 1.0214, "step": 4453 }, { "epoch": 1.8107338890018296, "grad_norm": 0.1083875447511673, "learning_rate": 1.8929371056381033e-05, "loss": 0.9821, "step": 4454 }, { "epoch": 1.8111404757064444, "grad_norm": 0.11033230274915695, "learning_rate": 1.8888662731528597e-05, "loss": 0.9665, "step": 4455 }, { "epoch": 1.811547062411059, "grad_norm": 0.10582058876752853, "learning_rate": 1.8847954406676165e-05, "loss": 0.8739, "step": 4456 }, { "epoch": 1.811953649115674, "grad_norm": 0.10182987153530121, "learning_rate": 1.8807246081823736e-05, "loss": 0.9246, "step": 4457 }, { "epoch": 1.8123602358202886, "grad_norm": 0.09945760667324066, "learning_rate": 1.87665377569713e-05, "loss": 0.9657, "step": 4458 }, { "epoch": 1.8127668225249034, "grad_norm": 0.11163794994354248, "learning_rate": 1.8725829432118868e-05, "loss": 1.0577, "step": 4459 }, { "epoch": 1.8131734092295182, "grad_norm": 0.11032053083181381, "learning_rate": 1.868512110726644e-05, "loss": 1.023, "step": 4460 }, { "epoch": 1.813579995934133, "grad_norm": 0.1140703558921814, "learning_rate": 1.8644412782414007e-05, "loss": 1.0206, "step": 4461 }, { "epoch": 1.8139865826387478, "grad_norm": 0.10543038696050644, "learning_rate": 1.860370445756157e-05, "loss": 0.891, "step": 4462 }, { "epoch": 1.8143931693433624, "grad_norm": 0.11133403331041336, "learning_rate": 1.856299613270914e-05, "loss": 0.9955, "step": 4463 }, { "epoch": 1.8147997560479774, "grad_norm": 0.10132287442684174, "learning_rate": 1.852228780785671e-05, "loss": 0.9612, "step": 4464 }, { "epoch": 1.815206342752592, "grad_norm": 0.10729202628135681, "learning_rate": 1.8481579483004274e-05, "loss": 0.9838, "step": 4465 }, { "epoch": 1.8156129294572068, "grad_norm": 0.10240829735994339, "learning_rate": 1.844087115815184e-05, "loss": 0.903, "step": 4466 }, { "epoch": 1.8160195161618216, "grad_norm": 0.09923898428678513, "learning_rate": 1.8400162833299412e-05, "loss": 0.9138, "step": 4467 }, { "epoch": 1.8164261028664361, "grad_norm": 0.11736813932657242, "learning_rate": 1.8359454508446977e-05, "loss": 1.0701, "step": 4468 }, { "epoch": 1.8168326895710512, "grad_norm": 0.10433974862098694, "learning_rate": 1.8318746183594544e-05, "loss": 0.8582, "step": 4469 }, { "epoch": 1.8172392762756657, "grad_norm": 0.10506631433963776, "learning_rate": 1.8278037858742115e-05, "loss": 1.002, "step": 4470 }, { "epoch": 1.8176458629802805, "grad_norm": 0.11175213009119034, "learning_rate": 1.8237329533889683e-05, "loss": 1.0239, "step": 4471 }, { "epoch": 1.8180524496848953, "grad_norm": 0.10588959604501724, "learning_rate": 1.8196621209037247e-05, "loss": 0.9909, "step": 4472 }, { "epoch": 1.81845903638951, "grad_norm": 0.11378835886716843, "learning_rate": 1.8155912884184818e-05, "loss": 0.9572, "step": 4473 }, { "epoch": 1.818865623094125, "grad_norm": 0.09671740978956223, "learning_rate": 1.8115204559332386e-05, "loss": 0.9486, "step": 4474 }, { "epoch": 1.8192722097987395, "grad_norm": 0.10830000042915344, "learning_rate": 1.807449623447995e-05, "loss": 0.9055, "step": 4475 }, { "epoch": 1.8196787965033543, "grad_norm": 0.1054878979921341, "learning_rate": 1.803378790962752e-05, "loss": 0.9878, "step": 4476 }, { "epoch": 1.820085383207969, "grad_norm": 0.11090472340583801, "learning_rate": 1.799307958477509e-05, "loss": 0.8731, "step": 4477 }, { "epoch": 1.8204919699125839, "grad_norm": 0.10842659324407578, "learning_rate": 1.7952371259922653e-05, "loss": 0.9303, "step": 4478 }, { "epoch": 1.8208985566171987, "grad_norm": 0.10983911901712418, "learning_rate": 1.7911662935070224e-05, "loss": 0.9713, "step": 4479 }, { "epoch": 1.8213051433218133, "grad_norm": 0.10775753855705261, "learning_rate": 1.787095461021779e-05, "loss": 1.008, "step": 4480 }, { "epoch": 1.8217117300264283, "grad_norm": 0.11450286209583282, "learning_rate": 1.7830246285365356e-05, "loss": 0.9387, "step": 4481 }, { "epoch": 1.8221183167310429, "grad_norm": 0.11208988726139069, "learning_rate": 1.7789537960512927e-05, "loss": 0.8588, "step": 4482 }, { "epoch": 1.8225249034356577, "grad_norm": 0.10878584533929825, "learning_rate": 1.7748829635660495e-05, "loss": 0.9913, "step": 4483 }, { "epoch": 1.8229314901402724, "grad_norm": 0.0990230068564415, "learning_rate": 1.7708121310808062e-05, "loss": 0.8097, "step": 4484 }, { "epoch": 1.823338076844887, "grad_norm": 0.10294988006353378, "learning_rate": 1.766741298595563e-05, "loss": 0.9606, "step": 4485 }, { "epoch": 1.823744663549502, "grad_norm": 0.1195545643568039, "learning_rate": 1.7626704661103197e-05, "loss": 1.1278, "step": 4486 }, { "epoch": 1.8241512502541166, "grad_norm": 0.10764475166797638, "learning_rate": 1.7585996336250765e-05, "loss": 0.86, "step": 4487 }, { "epoch": 1.8245578369587314, "grad_norm": 0.10787981748580933, "learning_rate": 1.754528801139833e-05, "loss": 0.9484, "step": 4488 }, { "epoch": 1.8249644236633462, "grad_norm": 0.11306875199079514, "learning_rate": 1.75045796865459e-05, "loss": 0.9843, "step": 4489 }, { "epoch": 1.825371010367961, "grad_norm": 0.10255679488182068, "learning_rate": 1.7463871361693468e-05, "loss": 0.9468, "step": 4490 }, { "epoch": 1.8257775970725758, "grad_norm": 0.11567334085702896, "learning_rate": 1.7423163036841032e-05, "loss": 1.1066, "step": 4491 }, { "epoch": 1.8261841837771904, "grad_norm": 0.0961284264922142, "learning_rate": 1.7382454711988603e-05, "loss": 0.8327, "step": 4492 }, { "epoch": 1.8265907704818054, "grad_norm": 0.11782846599817276, "learning_rate": 1.734174638713617e-05, "loss": 1.0241, "step": 4493 }, { "epoch": 1.82699735718642, "grad_norm": 0.10835573822259903, "learning_rate": 1.7301038062283735e-05, "loss": 0.989, "step": 4494 }, { "epoch": 1.8274039438910348, "grad_norm": 0.1051306203007698, "learning_rate": 1.7260329737431306e-05, "loss": 0.8624, "step": 4495 }, { "epoch": 1.8278105305956496, "grad_norm": 0.11646796017885208, "learning_rate": 1.7219621412578874e-05, "loss": 0.9987, "step": 4496 }, { "epoch": 1.8282171173002641, "grad_norm": 0.11364038288593292, "learning_rate": 1.717891308772644e-05, "loss": 1.034, "step": 4497 }, { "epoch": 1.8286237040048792, "grad_norm": 0.12037666887044907, "learning_rate": 1.713820476287401e-05, "loss": 1.004, "step": 4498 }, { "epoch": 1.8290302907094937, "grad_norm": 0.11295337229967117, "learning_rate": 1.7097496438021577e-05, "loss": 0.977, "step": 4499 }, { "epoch": 1.8294368774141085, "grad_norm": 0.12203505635261536, "learning_rate": 1.7056788113169144e-05, "loss": 1.122, "step": 4500 }, { "epoch": 1.8298434641187233, "grad_norm": 0.10481808334589005, "learning_rate": 1.7016079788316712e-05, "loss": 0.9512, "step": 4501 }, { "epoch": 1.830250050823338, "grad_norm": 0.11461866647005081, "learning_rate": 1.697537146346428e-05, "loss": 1.0002, "step": 4502 }, { "epoch": 1.830656637527953, "grad_norm": 0.10656670480966568, "learning_rate": 1.6934663138611847e-05, "loss": 0.9555, "step": 4503 }, { "epoch": 1.8310632242325675, "grad_norm": 0.10964150726795197, "learning_rate": 1.6893954813759415e-05, "loss": 0.9794, "step": 4504 }, { "epoch": 1.8314698109371823, "grad_norm": 0.12029408663511276, "learning_rate": 1.6853246488906982e-05, "loss": 1.0442, "step": 4505 }, { "epoch": 1.831876397641797, "grad_norm": 0.1036851778626442, "learning_rate": 1.681253816405455e-05, "loss": 0.8313, "step": 4506 }, { "epoch": 1.832282984346412, "grad_norm": 0.11831417679786682, "learning_rate": 1.6771829839202118e-05, "loss": 1.0545, "step": 4507 }, { "epoch": 1.8326895710510267, "grad_norm": 0.10131677240133286, "learning_rate": 1.6731121514349685e-05, "loss": 0.857, "step": 4508 }, { "epoch": 1.8330961577556413, "grad_norm": 0.10643882304430008, "learning_rate": 1.6690413189497253e-05, "loss": 0.9356, "step": 4509 }, { "epoch": 1.8335027444602563, "grad_norm": 0.10721322149038315, "learning_rate": 1.664970486464482e-05, "loss": 0.9221, "step": 4510 }, { "epoch": 1.8339093311648709, "grad_norm": 0.10363822430372238, "learning_rate": 1.6608996539792388e-05, "loss": 0.863, "step": 4511 }, { "epoch": 1.8343159178694857, "grad_norm": 0.10171811282634735, "learning_rate": 1.6568288214939956e-05, "loss": 0.86, "step": 4512 }, { "epoch": 1.8347225045741005, "grad_norm": 0.11216262727975845, "learning_rate": 1.6527579890087524e-05, "loss": 1.027, "step": 4513 }, { "epoch": 1.835129091278715, "grad_norm": 0.11542665213346481, "learning_rate": 1.648687156523509e-05, "loss": 0.9954, "step": 4514 }, { "epoch": 1.83553567798333, "grad_norm": 0.10395301133394241, "learning_rate": 1.644616324038266e-05, "loss": 0.8623, "step": 4515 }, { "epoch": 1.8359422646879446, "grad_norm": 0.0989793911576271, "learning_rate": 1.6405454915530226e-05, "loss": 0.8632, "step": 4516 }, { "epoch": 1.8363488513925594, "grad_norm": 0.1062735840678215, "learning_rate": 1.6364746590677794e-05, "loss": 1.0057, "step": 4517 }, { "epoch": 1.8367554380971742, "grad_norm": 0.11202707141637802, "learning_rate": 1.6324038265825362e-05, "loss": 0.9849, "step": 4518 }, { "epoch": 1.837162024801789, "grad_norm": 0.11205235123634338, "learning_rate": 1.628332994097293e-05, "loss": 0.9858, "step": 4519 }, { "epoch": 1.8375686115064038, "grad_norm": 0.10820986330509186, "learning_rate": 1.6242621616120497e-05, "loss": 0.9558, "step": 4520 }, { "epoch": 1.8379751982110184, "grad_norm": 0.10929597169160843, "learning_rate": 1.6201913291268065e-05, "loss": 0.9891, "step": 4521 }, { "epoch": 1.8383817849156334, "grad_norm": 0.10770545899868011, "learning_rate": 1.6161204966415632e-05, "loss": 0.9391, "step": 4522 }, { "epoch": 1.838788371620248, "grad_norm": 0.10973810404539108, "learning_rate": 1.61204966415632e-05, "loss": 0.9154, "step": 4523 }, { "epoch": 1.8391949583248628, "grad_norm": 0.09949386119842529, "learning_rate": 1.6079788316710768e-05, "loss": 0.885, "step": 4524 }, { "epoch": 1.8396015450294776, "grad_norm": 0.11300302296876907, "learning_rate": 1.6039079991858335e-05, "loss": 0.9821, "step": 4525 }, { "epoch": 1.8400081317340922, "grad_norm": 0.10687512159347534, "learning_rate": 1.5998371667005903e-05, "loss": 1.0058, "step": 4526 }, { "epoch": 1.8404147184387072, "grad_norm": 0.10568513721227646, "learning_rate": 1.595766334215347e-05, "loss": 0.9018, "step": 4527 }, { "epoch": 1.8408213051433218, "grad_norm": 0.10843189060688019, "learning_rate": 1.5916955017301038e-05, "loss": 0.9419, "step": 4528 }, { "epoch": 1.8412278918479366, "grad_norm": 0.10421909391880035, "learning_rate": 1.5876246692448606e-05, "loss": 0.853, "step": 4529 }, { "epoch": 1.8416344785525514, "grad_norm": 0.10745032131671906, "learning_rate": 1.5835538367596177e-05, "loss": 0.88, "step": 4530 }, { "epoch": 1.842041065257166, "grad_norm": 0.10188285261392593, "learning_rate": 1.579483004274374e-05, "loss": 0.9143, "step": 4531 }, { "epoch": 1.842447651961781, "grad_norm": 0.10237755626440048, "learning_rate": 1.575412171789131e-05, "loss": 0.8407, "step": 4532 }, { "epoch": 1.8428542386663955, "grad_norm": 0.11180367320775986, "learning_rate": 1.571341339303888e-05, "loss": 0.981, "step": 4533 }, { "epoch": 1.8432608253710103, "grad_norm": 0.11422519385814667, "learning_rate": 1.5672705068186444e-05, "loss": 1.0089, "step": 4534 }, { "epoch": 1.8436674120756251, "grad_norm": 0.10691160708665848, "learning_rate": 1.563199674333401e-05, "loss": 0.9363, "step": 4535 }, { "epoch": 1.84407399878024, "grad_norm": 0.1037185937166214, "learning_rate": 1.5591288418481583e-05, "loss": 0.8672, "step": 4536 }, { "epoch": 1.8444805854848547, "grad_norm": 0.11030171811580658, "learning_rate": 1.5550580093629147e-05, "loss": 0.975, "step": 4537 }, { "epoch": 1.8448871721894693, "grad_norm": 0.10458967834711075, "learning_rate": 1.5509871768776714e-05, "loss": 0.9169, "step": 4538 }, { "epoch": 1.8452937588940843, "grad_norm": 0.10651843994855881, "learning_rate": 1.5469163443924285e-05, "loss": 0.9499, "step": 4539 }, { "epoch": 1.8457003455986989, "grad_norm": 0.10716937482357025, "learning_rate": 1.542845511907185e-05, "loss": 1.0137, "step": 4540 }, { "epoch": 1.8461069323033137, "grad_norm": 0.09491477161645889, "learning_rate": 1.5387746794219417e-05, "loss": 0.8159, "step": 4541 }, { "epoch": 1.8465135190079285, "grad_norm": 0.11105383187532425, "learning_rate": 1.534703846936699e-05, "loss": 0.9554, "step": 4542 }, { "epoch": 1.846920105712543, "grad_norm": 0.1065712422132492, "learning_rate": 1.5306330144514556e-05, "loss": 0.896, "step": 4543 }, { "epoch": 1.847326692417158, "grad_norm": 0.11683394014835358, "learning_rate": 1.526562181966212e-05, "loss": 1.0732, "step": 4544 }, { "epoch": 1.8477332791217727, "grad_norm": 0.10906050354242325, "learning_rate": 1.522491349480969e-05, "loss": 1.0323, "step": 4545 }, { "epoch": 1.8481398658263875, "grad_norm": 0.11309327185153961, "learning_rate": 1.5184205169957257e-05, "loss": 0.9693, "step": 4546 }, { "epoch": 1.8485464525310022, "grad_norm": 0.11471321433782578, "learning_rate": 1.5143496845104823e-05, "loss": 0.9795, "step": 4547 }, { "epoch": 1.848953039235617, "grad_norm": 0.10957857966423035, "learning_rate": 1.5102788520252392e-05, "loss": 0.9773, "step": 4548 }, { "epoch": 1.8493596259402318, "grad_norm": 0.10189523547887802, "learning_rate": 1.506208019539996e-05, "loss": 0.887, "step": 4549 }, { "epoch": 1.8497662126448464, "grad_norm": 0.10361644625663757, "learning_rate": 1.5021371870547526e-05, "loss": 0.8694, "step": 4550 }, { "epoch": 1.8501727993494614, "grad_norm": 0.10304764658212662, "learning_rate": 1.4980663545695095e-05, "loss": 0.9005, "step": 4551 }, { "epoch": 1.850579386054076, "grad_norm": 0.1083732470870018, "learning_rate": 1.4939955220842663e-05, "loss": 0.9132, "step": 4552 }, { "epoch": 1.8509859727586908, "grad_norm": 0.109224334359169, "learning_rate": 1.4899246895990232e-05, "loss": 1.03, "step": 4553 }, { "epoch": 1.8513925594633056, "grad_norm": 0.09817371517419815, "learning_rate": 1.4858538571137798e-05, "loss": 0.8626, "step": 4554 }, { "epoch": 1.8517991461679202, "grad_norm": 0.10599929839372635, "learning_rate": 1.4817830246285366e-05, "loss": 1.0029, "step": 4555 }, { "epoch": 1.8522057328725352, "grad_norm": 0.10864468663930893, "learning_rate": 1.4777121921432935e-05, "loss": 0.9572, "step": 4556 }, { "epoch": 1.8526123195771498, "grad_norm": 0.11035089194774628, "learning_rate": 1.4736413596580501e-05, "loss": 0.9821, "step": 4557 }, { "epoch": 1.8530189062817646, "grad_norm": 0.10766597837209702, "learning_rate": 1.4695705271728069e-05, "loss": 0.9368, "step": 4558 }, { "epoch": 1.8534254929863794, "grad_norm": 0.1012941375374794, "learning_rate": 1.4654996946875638e-05, "loss": 0.9643, "step": 4559 }, { "epoch": 1.853832079690994, "grad_norm": 0.10959739238023758, "learning_rate": 1.4614288622023204e-05, "loss": 0.9543, "step": 4560 }, { "epoch": 1.854238666395609, "grad_norm": 0.10180888324975967, "learning_rate": 1.4573580297170772e-05, "loss": 0.9113, "step": 4561 }, { "epoch": 1.8546452531002235, "grad_norm": 0.1098785549402237, "learning_rate": 1.4532871972318341e-05, "loss": 0.9224, "step": 4562 }, { "epoch": 1.8550518398048383, "grad_norm": 0.10429586470127106, "learning_rate": 1.4492163647465907e-05, "loss": 0.9505, "step": 4563 }, { "epoch": 1.8554584265094531, "grad_norm": 0.1072763204574585, "learning_rate": 1.4451455322613475e-05, "loss": 0.9893, "step": 4564 }, { "epoch": 1.855865013214068, "grad_norm": 0.11279455572366714, "learning_rate": 1.4410746997761044e-05, "loss": 0.9223, "step": 4565 }, { "epoch": 1.8562715999186827, "grad_norm": 0.11097732186317444, "learning_rate": 1.4370038672908612e-05, "loss": 0.9855, "step": 4566 }, { "epoch": 1.8566781866232973, "grad_norm": 0.09808061271905899, "learning_rate": 1.4329330348056177e-05, "loss": 0.9168, "step": 4567 }, { "epoch": 1.8570847733279123, "grad_norm": 0.10069447755813599, "learning_rate": 1.4288622023203747e-05, "loss": 0.8709, "step": 4568 }, { "epoch": 1.857491360032527, "grad_norm": 0.10356453061103821, "learning_rate": 1.4247913698351314e-05, "loss": 0.876, "step": 4569 }, { "epoch": 1.8578979467371417, "grad_norm": 0.10439286381006241, "learning_rate": 1.420720537349888e-05, "loss": 1.0009, "step": 4570 }, { "epoch": 1.8583045334417565, "grad_norm": 0.1026233658194542, "learning_rate": 1.4166497048646448e-05, "loss": 0.8783, "step": 4571 }, { "epoch": 1.858711120146371, "grad_norm": 0.0925007164478302, "learning_rate": 1.4125788723794017e-05, "loss": 0.818, "step": 4572 }, { "epoch": 1.859117706850986, "grad_norm": 0.09667190164327621, "learning_rate": 1.4085080398941583e-05, "loss": 0.857, "step": 4573 }, { "epoch": 1.8595242935556007, "grad_norm": 0.10364139080047607, "learning_rate": 1.4044372074089151e-05, "loss": 0.8918, "step": 4574 }, { "epoch": 1.8599308802602155, "grad_norm": 0.10816872864961624, "learning_rate": 1.400366374923672e-05, "loss": 0.9472, "step": 4575 }, { "epoch": 1.8603374669648303, "grad_norm": 0.10876300930976868, "learning_rate": 1.3962955424384288e-05, "loss": 0.946, "step": 4576 }, { "epoch": 1.860744053669445, "grad_norm": 0.1095566526055336, "learning_rate": 1.3922247099531854e-05, "loss": 0.9764, "step": 4577 }, { "epoch": 1.8611506403740599, "grad_norm": 0.1096457913517952, "learning_rate": 1.3881538774679423e-05, "loss": 0.9573, "step": 4578 }, { "epoch": 1.8615572270786744, "grad_norm": 0.11302363127470016, "learning_rate": 1.384083044982699e-05, "loss": 1.0051, "step": 4579 }, { "epoch": 1.8619638137832892, "grad_norm": 0.098774753510952, "learning_rate": 1.3800122124974557e-05, "loss": 0.8779, "step": 4580 }, { "epoch": 1.862370400487904, "grad_norm": 0.10221240669488907, "learning_rate": 1.3759413800122126e-05, "loss": 0.9076, "step": 4581 }, { "epoch": 1.8627769871925188, "grad_norm": 0.11016833037137985, "learning_rate": 1.3718705475269694e-05, "loss": 0.9252, "step": 4582 }, { "epoch": 1.8631835738971336, "grad_norm": 0.1114361584186554, "learning_rate": 1.367799715041726e-05, "loss": 0.9954, "step": 4583 }, { "epoch": 1.8635901606017482, "grad_norm": 0.10552554577589035, "learning_rate": 1.3637288825564829e-05, "loss": 0.8927, "step": 4584 }, { "epoch": 1.8639967473063632, "grad_norm": 0.10773453116416931, "learning_rate": 1.3596580500712397e-05, "loss": 0.9652, "step": 4585 }, { "epoch": 1.8644033340109778, "grad_norm": 0.10485775023698807, "learning_rate": 1.3555872175859963e-05, "loss": 0.9559, "step": 4586 }, { "epoch": 1.8648099207155926, "grad_norm": 0.10935719311237335, "learning_rate": 1.3515163851007532e-05, "loss": 0.9871, "step": 4587 }, { "epoch": 1.8652165074202074, "grad_norm": 0.10393113642930984, "learning_rate": 1.34744555261551e-05, "loss": 0.861, "step": 4588 }, { "epoch": 1.865623094124822, "grad_norm": 0.11607681959867477, "learning_rate": 1.3433747201302669e-05, "loss": 0.978, "step": 4589 }, { "epoch": 1.866029680829437, "grad_norm": 0.1011018306016922, "learning_rate": 1.3393038876450235e-05, "loss": 0.9045, "step": 4590 }, { "epoch": 1.8664362675340516, "grad_norm": 0.11329011619091034, "learning_rate": 1.3352330551597802e-05, "loss": 1.0339, "step": 4591 }, { "epoch": 1.8668428542386664, "grad_norm": 0.10683320462703705, "learning_rate": 1.3311622226745372e-05, "loss": 0.9406, "step": 4592 }, { "epoch": 1.8672494409432812, "grad_norm": 0.1118168905377388, "learning_rate": 1.3270913901892936e-05, "loss": 0.9632, "step": 4593 }, { "epoch": 1.867656027647896, "grad_norm": 0.10472162812948227, "learning_rate": 1.3230205577040505e-05, "loss": 0.9109, "step": 4594 }, { "epoch": 1.8680626143525108, "grad_norm": 0.11069684475660324, "learning_rate": 1.3189497252188075e-05, "loss": 1.0543, "step": 4595 }, { "epoch": 1.8684692010571253, "grad_norm": 0.105756476521492, "learning_rate": 1.3148788927335639e-05, "loss": 0.9705, "step": 4596 }, { "epoch": 1.8688757877617403, "grad_norm": 0.1085345521569252, "learning_rate": 1.3108080602483208e-05, "loss": 0.9485, "step": 4597 }, { "epoch": 1.869282374466355, "grad_norm": 0.10738877952098846, "learning_rate": 1.3067372277630776e-05, "loss": 1.0104, "step": 4598 }, { "epoch": 1.8696889611709697, "grad_norm": 0.11370383948087692, "learning_rate": 1.3026663952778345e-05, "loss": 0.9926, "step": 4599 }, { "epoch": 1.8700955478755845, "grad_norm": 0.11408769339323044, "learning_rate": 1.2985955627925911e-05, "loss": 0.9852, "step": 4600 }, { "epoch": 1.870502134580199, "grad_norm": 0.1028301939368248, "learning_rate": 1.2945247303073479e-05, "loss": 0.9189, "step": 4601 }, { "epoch": 1.8709087212848141, "grad_norm": 0.10195198655128479, "learning_rate": 1.2904538978221048e-05, "loss": 0.907, "step": 4602 }, { "epoch": 1.8713153079894287, "grad_norm": 0.10509088635444641, "learning_rate": 1.2863830653368614e-05, "loss": 0.9704, "step": 4603 }, { "epoch": 1.8717218946940435, "grad_norm": 0.10894305258989334, "learning_rate": 1.2823122328516182e-05, "loss": 0.8871, "step": 4604 }, { "epoch": 1.8721284813986583, "grad_norm": 0.11078134924173355, "learning_rate": 1.2782414003663751e-05, "loss": 0.9472, "step": 4605 }, { "epoch": 1.8725350681032729, "grad_norm": 0.0979735478758812, "learning_rate": 1.2741705678811317e-05, "loss": 0.8495, "step": 4606 }, { "epoch": 1.8729416548078879, "grad_norm": 0.09745296090841293, "learning_rate": 1.2700997353958885e-05, "loss": 0.8289, "step": 4607 }, { "epoch": 1.8733482415125025, "grad_norm": 0.10826481133699417, "learning_rate": 1.2660289029106454e-05, "loss": 0.9967, "step": 4608 }, { "epoch": 1.8737548282171173, "grad_norm": 0.1063094213604927, "learning_rate": 1.261958070425402e-05, "loss": 0.9209, "step": 4609 }, { "epoch": 1.874161414921732, "grad_norm": 0.11431606858968735, "learning_rate": 1.2578872379401587e-05, "loss": 1.0041, "step": 4610 }, { "epoch": 1.8745680016263468, "grad_norm": 0.10370271652936935, "learning_rate": 1.2538164054549157e-05, "loss": 0.9162, "step": 4611 }, { "epoch": 1.8749745883309616, "grad_norm": 0.10634543746709824, "learning_rate": 1.2497455729696723e-05, "loss": 0.9517, "step": 4612 }, { "epoch": 1.8753811750355762, "grad_norm": 0.11096760630607605, "learning_rate": 1.245674740484429e-05, "loss": 0.9201, "step": 4613 }, { "epoch": 1.8757877617401912, "grad_norm": 0.10658212751150131, "learning_rate": 1.241603907999186e-05, "loss": 0.8743, "step": 4614 }, { "epoch": 1.8761943484448058, "grad_norm": 0.11457500606775284, "learning_rate": 1.2375330755139426e-05, "loss": 1.0447, "step": 4615 }, { "epoch": 1.8766009351494206, "grad_norm": 0.10246486961841583, "learning_rate": 1.2334622430286995e-05, "loss": 0.8899, "step": 4616 }, { "epoch": 1.8770075218540354, "grad_norm": 0.1061936691403389, "learning_rate": 1.2293914105434563e-05, "loss": 0.8886, "step": 4617 }, { "epoch": 1.87741410855865, "grad_norm": 0.11726386845111847, "learning_rate": 1.2253205780582129e-05, "loss": 1.0317, "step": 4618 }, { "epoch": 1.877820695263265, "grad_norm": 0.11178486049175262, "learning_rate": 1.2212497455729698e-05, "loss": 1.0469, "step": 4619 }, { "epoch": 1.8782272819678796, "grad_norm": 0.10353215783834457, "learning_rate": 1.2171789130877265e-05, "loss": 0.9078, "step": 4620 }, { "epoch": 1.8786338686724944, "grad_norm": 0.10023000091314316, "learning_rate": 1.2131080806024833e-05, "loss": 0.8645, "step": 4621 }, { "epoch": 1.8790404553771092, "grad_norm": 0.11288487911224365, "learning_rate": 1.20903724811724e-05, "loss": 1.0863, "step": 4622 }, { "epoch": 1.879447042081724, "grad_norm": 0.10953675955533981, "learning_rate": 1.2049664156319967e-05, "loss": 0.9686, "step": 4623 }, { "epoch": 1.8798536287863388, "grad_norm": 0.10264912992715836, "learning_rate": 1.2008955831467536e-05, "loss": 0.9058, "step": 4624 }, { "epoch": 1.8802602154909533, "grad_norm": 0.10558958351612091, "learning_rate": 1.1968247506615104e-05, "loss": 0.8903, "step": 4625 }, { "epoch": 1.8806668021955684, "grad_norm": 0.11385960131883621, "learning_rate": 1.1927539181762671e-05, "loss": 0.9852, "step": 4626 }, { "epoch": 1.881073388900183, "grad_norm": 0.127496600151062, "learning_rate": 1.1886830856910239e-05, "loss": 1.0174, "step": 4627 }, { "epoch": 1.8814799756047977, "grad_norm": 0.1023222878575325, "learning_rate": 1.1846122532057807e-05, "loss": 0.8181, "step": 4628 }, { "epoch": 1.8818865623094125, "grad_norm": 0.10723838210105896, "learning_rate": 1.1805414207205374e-05, "loss": 0.8972, "step": 4629 }, { "epoch": 1.8822931490140271, "grad_norm": 0.10070095211267471, "learning_rate": 1.1764705882352942e-05, "loss": 0.8543, "step": 4630 }, { "epoch": 1.8826997357186421, "grad_norm": 0.10262621194124222, "learning_rate": 1.172399755750051e-05, "loss": 0.8568, "step": 4631 }, { "epoch": 1.8831063224232567, "grad_norm": 0.1155037060379982, "learning_rate": 1.1683289232648077e-05, "loss": 0.9725, "step": 4632 }, { "epoch": 1.8835129091278715, "grad_norm": 0.1091943308711052, "learning_rate": 1.1642580907795645e-05, "loss": 0.9869, "step": 4633 }, { "epoch": 1.8839194958324863, "grad_norm": 0.10687655955553055, "learning_rate": 1.1601872582943212e-05, "loss": 0.9417, "step": 4634 }, { "epoch": 1.8843260825371009, "grad_norm": 0.11003026366233826, "learning_rate": 1.156116425809078e-05, "loss": 0.9662, "step": 4635 }, { "epoch": 1.884732669241716, "grad_norm": 0.10457431524991989, "learning_rate": 1.1520455933238348e-05, "loss": 0.8877, "step": 4636 }, { "epoch": 1.8851392559463305, "grad_norm": 0.09627239406108856, "learning_rate": 1.1479747608385915e-05, "loss": 0.8781, "step": 4637 }, { "epoch": 1.8855458426509453, "grad_norm": 0.10630346089601517, "learning_rate": 1.1439039283533483e-05, "loss": 0.9602, "step": 4638 }, { "epoch": 1.88595242935556, "grad_norm": 0.10510063916444778, "learning_rate": 1.139833095868105e-05, "loss": 0.9213, "step": 4639 }, { "epoch": 1.8863590160601749, "grad_norm": 0.11134610325098038, "learning_rate": 1.1357622633828618e-05, "loss": 0.9601, "step": 4640 }, { "epoch": 1.8867656027647897, "grad_norm": 0.11184284090995789, "learning_rate": 1.1316914308976186e-05, "loss": 0.9936, "step": 4641 }, { "epoch": 1.8871721894694042, "grad_norm": 0.10282327234745026, "learning_rate": 1.1276205984123753e-05, "loss": 0.8698, "step": 4642 }, { "epoch": 1.8875787761740193, "grad_norm": 0.1153402030467987, "learning_rate": 1.1235497659271321e-05, "loss": 0.9564, "step": 4643 }, { "epoch": 1.8879853628786338, "grad_norm": 0.10442263633012772, "learning_rate": 1.119478933441889e-05, "loss": 0.9136, "step": 4644 }, { "epoch": 1.8883919495832486, "grad_norm": 0.09091400355100632, "learning_rate": 1.1154081009566456e-05, "loss": 0.7862, "step": 4645 }, { "epoch": 1.8887985362878634, "grad_norm": 0.11083805561065674, "learning_rate": 1.1113372684714024e-05, "loss": 0.9317, "step": 4646 }, { "epoch": 1.889205122992478, "grad_norm": 0.10703961551189423, "learning_rate": 1.1072664359861593e-05, "loss": 0.9247, "step": 4647 }, { "epoch": 1.889611709697093, "grad_norm": 0.10570546984672546, "learning_rate": 1.103195603500916e-05, "loss": 0.97, "step": 4648 }, { "epoch": 1.8900182964017076, "grad_norm": 0.10433092713356018, "learning_rate": 1.0991247710156729e-05, "loss": 0.8298, "step": 4649 }, { "epoch": 1.8904248831063224, "grad_norm": 0.10301043838262558, "learning_rate": 1.0950539385304295e-05, "loss": 0.8934, "step": 4650 }, { "epoch": 1.8908314698109372, "grad_norm": 0.1076212227344513, "learning_rate": 1.0909831060451862e-05, "loss": 1.0001, "step": 4651 }, { "epoch": 1.891238056515552, "grad_norm": 0.11841297894716263, "learning_rate": 1.0869122735599431e-05, "loss": 0.9722, "step": 4652 }, { "epoch": 1.8916446432201668, "grad_norm": 0.10684975981712341, "learning_rate": 1.0828414410746997e-05, "loss": 0.9199, "step": 4653 }, { "epoch": 1.8920512299247814, "grad_norm": 0.11354047805070877, "learning_rate": 1.0787706085894565e-05, "loss": 0.9114, "step": 4654 }, { "epoch": 1.8924578166293964, "grad_norm": 0.11264258623123169, "learning_rate": 1.0746997761042134e-05, "loss": 1.0013, "step": 4655 }, { "epoch": 1.892864403334011, "grad_norm": 0.11007174849510193, "learning_rate": 1.07062894361897e-05, "loss": 0.8797, "step": 4656 }, { "epoch": 1.8932709900386258, "grad_norm": 0.10631585121154785, "learning_rate": 1.066558111133727e-05, "loss": 0.8326, "step": 4657 }, { "epoch": 1.8936775767432406, "grad_norm": 0.10875297337770462, "learning_rate": 1.0624872786484837e-05, "loss": 1.0055, "step": 4658 }, { "epoch": 1.8940841634478551, "grad_norm": 0.0996069461107254, "learning_rate": 1.0584164461632403e-05, "loss": 0.8888, "step": 4659 }, { "epoch": 1.8944907501524701, "grad_norm": 0.1021905168890953, "learning_rate": 1.0543456136779973e-05, "loss": 0.961, "step": 4660 }, { "epoch": 1.8948973368570847, "grad_norm": 0.1107843890786171, "learning_rate": 1.050274781192754e-05, "loss": 0.9494, "step": 4661 }, { "epoch": 1.8953039235616995, "grad_norm": 0.10896456986665726, "learning_rate": 1.0462039487075108e-05, "loss": 0.9543, "step": 4662 }, { "epoch": 1.8957105102663143, "grad_norm": 0.09973134100437164, "learning_rate": 1.0421331162222675e-05, "loss": 0.865, "step": 4663 }, { "epoch": 1.896117096970929, "grad_norm": 0.11522912234067917, "learning_rate": 1.0380622837370241e-05, "loss": 0.9884, "step": 4664 }, { "epoch": 1.896523683675544, "grad_norm": 0.11264660954475403, "learning_rate": 1.033991451251781e-05, "loss": 0.9839, "step": 4665 }, { "epoch": 1.8969302703801585, "grad_norm": 0.10416486859321594, "learning_rate": 1.0299206187665378e-05, "loss": 0.9232, "step": 4666 }, { "epoch": 1.8973368570847733, "grad_norm": 0.10714686661958694, "learning_rate": 1.0258497862812946e-05, "loss": 0.9947, "step": 4667 }, { "epoch": 1.897743443789388, "grad_norm": 0.10765139013528824, "learning_rate": 1.0217789537960514e-05, "loss": 0.9115, "step": 4668 }, { "epoch": 1.8981500304940029, "grad_norm": 0.1037706732749939, "learning_rate": 1.0177081213108081e-05, "loss": 0.9301, "step": 4669 }, { "epoch": 1.8985566171986177, "grad_norm": 0.10889194905757904, "learning_rate": 1.0136372888255649e-05, "loss": 0.9222, "step": 4670 }, { "epoch": 1.8989632039032323, "grad_norm": 0.11055561900138855, "learning_rate": 1.0095664563403217e-05, "loss": 1.0298, "step": 4671 }, { "epoch": 1.8993697906078473, "grad_norm": 0.10710859298706055, "learning_rate": 1.0054956238550784e-05, "loss": 0.9167, "step": 4672 }, { "epoch": 1.8997763773124619, "grad_norm": 0.10510309040546417, "learning_rate": 1.0014247913698352e-05, "loss": 0.941, "step": 4673 }, { "epoch": 1.9001829640170766, "grad_norm": 0.11385677009820938, "learning_rate": 9.97353958884592e-06, "loss": 0.9312, "step": 4674 }, { "epoch": 1.9005895507216914, "grad_norm": 0.11595090478658676, "learning_rate": 9.932831263993487e-06, "loss": 1.0468, "step": 4675 }, { "epoch": 1.900996137426306, "grad_norm": 0.10831689089536667, "learning_rate": 9.892122939141055e-06, "loss": 0.9394, "step": 4676 }, { "epoch": 1.901402724130921, "grad_norm": 0.10828686505556107, "learning_rate": 9.851414614288622e-06, "loss": 0.8823, "step": 4677 }, { "epoch": 1.9018093108355356, "grad_norm": 0.11457982659339905, "learning_rate": 9.81070628943619e-06, "loss": 0.9871, "step": 4678 }, { "epoch": 1.9022158975401504, "grad_norm": 0.10354585945606232, "learning_rate": 9.769997964583758e-06, "loss": 0.9164, "step": 4679 }, { "epoch": 1.9026224842447652, "grad_norm": 0.1089097335934639, "learning_rate": 9.729289639731325e-06, "loss": 0.8705, "step": 4680 }, { "epoch": 1.90302907094938, "grad_norm": 0.09594661742448807, "learning_rate": 9.688581314878893e-06, "loss": 0.8553, "step": 4681 }, { "epoch": 1.9034356576539948, "grad_norm": 0.0939720869064331, "learning_rate": 9.64787299002646e-06, "loss": 0.9019, "step": 4682 }, { "epoch": 1.9038422443586094, "grad_norm": 0.10637430101633072, "learning_rate": 9.607164665174028e-06, "loss": 0.9498, "step": 4683 }, { "epoch": 1.9042488310632242, "grad_norm": 0.11103527247905731, "learning_rate": 9.566456340321596e-06, "loss": 0.9628, "step": 4684 }, { "epoch": 1.904655417767839, "grad_norm": 0.1133730486035347, "learning_rate": 9.525748015469165e-06, "loss": 0.9158, "step": 4685 }, { "epoch": 1.9050620044724538, "grad_norm": 0.11607538908720016, "learning_rate": 9.485039690616731e-06, "loss": 0.9259, "step": 4686 }, { "epoch": 1.9054685911770686, "grad_norm": 0.10650195926427841, "learning_rate": 9.444331365764299e-06, "loss": 0.8266, "step": 4687 }, { "epoch": 1.9058751778816831, "grad_norm": 0.11444000154733658, "learning_rate": 9.403623040911868e-06, "loss": 0.9656, "step": 4688 }, { "epoch": 1.9062817645862982, "grad_norm": 0.11130890995264053, "learning_rate": 9.362914716059434e-06, "loss": 0.9636, "step": 4689 }, { "epoch": 1.9066883512909127, "grad_norm": 0.114923857152462, "learning_rate": 9.322206391207003e-06, "loss": 0.9988, "step": 4690 }, { "epoch": 1.9070949379955275, "grad_norm": 0.11439431458711624, "learning_rate": 9.28149806635457e-06, "loss": 1.0728, "step": 4691 }, { "epoch": 1.9075015247001423, "grad_norm": 0.10544802248477936, "learning_rate": 9.240789741502137e-06, "loss": 0.9356, "step": 4692 }, { "epoch": 1.907908111404757, "grad_norm": 0.1091354712843895, "learning_rate": 9.200081416649706e-06, "loss": 1.0261, "step": 4693 }, { "epoch": 1.908314698109372, "grad_norm": 0.11757560819387436, "learning_rate": 9.159373091797272e-06, "loss": 1.0694, "step": 4694 }, { "epoch": 1.9087212848139865, "grad_norm": 0.1126813217997551, "learning_rate": 9.118664766944841e-06, "loss": 0.995, "step": 4695 }, { "epoch": 1.9091278715186013, "grad_norm": 0.10527123510837555, "learning_rate": 9.077956442092409e-06, "loss": 0.8656, "step": 4696 }, { "epoch": 1.909534458223216, "grad_norm": 0.11032869666814804, "learning_rate": 9.037248117239975e-06, "loss": 1.0283, "step": 4697 }, { "epoch": 1.909941044927831, "grad_norm": 0.11626307666301727, "learning_rate": 8.996539792387544e-06, "loss": 1.0522, "step": 4698 }, { "epoch": 1.9103476316324457, "grad_norm": 0.11547650396823883, "learning_rate": 8.955831467535112e-06, "loss": 1.0599, "step": 4699 }, { "epoch": 1.9107542183370603, "grad_norm": 0.1148945763707161, "learning_rate": 8.915123142682678e-06, "loss": 1.0609, "step": 4700 }, { "epoch": 1.9111608050416753, "grad_norm": 0.11297351121902466, "learning_rate": 8.874414817830247e-06, "loss": 0.9328, "step": 4701 }, { "epoch": 1.9115673917462899, "grad_norm": 0.10873715579509735, "learning_rate": 8.833706492977815e-06, "loss": 1.0021, "step": 4702 }, { "epoch": 1.9119739784509047, "grad_norm": 0.10393388569355011, "learning_rate": 8.792998168125383e-06, "loss": 0.9525, "step": 4703 }, { "epoch": 1.9123805651555195, "grad_norm": 0.10606920719146729, "learning_rate": 8.75228984327295e-06, "loss": 0.929, "step": 4704 }, { "epoch": 1.912787151860134, "grad_norm": 0.11683373153209686, "learning_rate": 8.711581518420516e-06, "loss": 1.0064, "step": 4705 }, { "epoch": 1.913193738564749, "grad_norm": 0.09577450901269913, "learning_rate": 8.670873193568085e-06, "loss": 0.849, "step": 4706 }, { "epoch": 1.9136003252693636, "grad_norm": 0.10738305747509003, "learning_rate": 8.630164868715653e-06, "loss": 0.8143, "step": 4707 }, { "epoch": 1.9140069119739784, "grad_norm": 0.10720358788967133, "learning_rate": 8.58945654386322e-06, "loss": 0.9651, "step": 4708 }, { "epoch": 1.9144134986785932, "grad_norm": 0.10535360872745514, "learning_rate": 8.548748219010788e-06, "loss": 0.9265, "step": 4709 }, { "epoch": 1.9148200853832078, "grad_norm": 0.0978621169924736, "learning_rate": 8.508039894158356e-06, "loss": 0.8808, "step": 4710 }, { "epoch": 1.9152266720878228, "grad_norm": 0.09929387271404266, "learning_rate": 8.467331569305924e-06, "loss": 0.9438, "step": 4711 }, { "epoch": 1.9156332587924374, "grad_norm": 0.10723693668842316, "learning_rate": 8.426623244453491e-06, "loss": 0.9293, "step": 4712 }, { "epoch": 1.9160398454970522, "grad_norm": 0.11256638914346695, "learning_rate": 8.385914919601059e-06, "loss": 0.9946, "step": 4713 }, { "epoch": 1.916446432201667, "grad_norm": 0.10032477974891663, "learning_rate": 8.345206594748627e-06, "loss": 0.9008, "step": 4714 }, { "epoch": 1.9168530189062818, "grad_norm": 0.10227346420288086, "learning_rate": 8.304498269896194e-06, "loss": 0.9495, "step": 4715 }, { "epoch": 1.9172596056108966, "grad_norm": 0.11047019064426422, "learning_rate": 8.263789945043762e-06, "loss": 1.0152, "step": 4716 }, { "epoch": 1.9176661923155112, "grad_norm": 0.10809038579463959, "learning_rate": 8.22308162019133e-06, "loss": 0.9082, "step": 4717 }, { "epoch": 1.9180727790201262, "grad_norm": 0.10758131742477417, "learning_rate": 8.182373295338897e-06, "loss": 0.9599, "step": 4718 }, { "epoch": 1.9184793657247408, "grad_norm": 0.11720570921897888, "learning_rate": 8.141664970486465e-06, "loss": 1.0244, "step": 4719 }, { "epoch": 1.9188859524293556, "grad_norm": 0.10745330154895782, "learning_rate": 8.100956645634032e-06, "loss": 1.0118, "step": 4720 }, { "epoch": 1.9192925391339704, "grad_norm": 0.10367954522371292, "learning_rate": 8.0602483207816e-06, "loss": 0.9511, "step": 4721 }, { "epoch": 1.919699125838585, "grad_norm": 0.10603120177984238, "learning_rate": 8.019539995929168e-06, "loss": 0.8881, "step": 4722 }, { "epoch": 1.9201057125432, "grad_norm": 0.11715482175350189, "learning_rate": 7.978831671076735e-06, "loss": 0.9882, "step": 4723 }, { "epoch": 1.9205122992478145, "grad_norm": 0.10853135585784912, "learning_rate": 7.938123346224303e-06, "loss": 0.9684, "step": 4724 }, { "epoch": 1.9209188859524293, "grad_norm": 0.10487642139196396, "learning_rate": 7.89741502137187e-06, "loss": 0.9444, "step": 4725 }, { "epoch": 1.9213254726570441, "grad_norm": 0.10407551378011703, "learning_rate": 7.85670669651944e-06, "loss": 0.9244, "step": 4726 }, { "epoch": 1.921732059361659, "grad_norm": 0.11093860864639282, "learning_rate": 7.815998371667006e-06, "loss": 0.8877, "step": 4727 }, { "epoch": 1.9221386460662737, "grad_norm": 0.10927627235651016, "learning_rate": 7.775290046814573e-06, "loss": 0.9305, "step": 4728 }, { "epoch": 1.9225452327708883, "grad_norm": 0.1054539605975151, "learning_rate": 7.734581721962143e-06, "loss": 0.9661, "step": 4729 }, { "epoch": 1.9229518194755033, "grad_norm": 0.10301047563552856, "learning_rate": 7.693873397109709e-06, "loss": 0.9134, "step": 4730 }, { "epoch": 1.9233584061801179, "grad_norm": 0.1252659112215042, "learning_rate": 7.653165072257278e-06, "loss": 0.9546, "step": 4731 }, { "epoch": 1.9237649928847327, "grad_norm": 0.09721864759922028, "learning_rate": 7.612456747404845e-06, "loss": 0.9061, "step": 4732 }, { "epoch": 1.9241715795893475, "grad_norm": 0.0942339301109314, "learning_rate": 7.5717484225524116e-06, "loss": 0.8571, "step": 4733 }, { "epoch": 1.924578166293962, "grad_norm": 0.11213699728250504, "learning_rate": 7.53104009769998e-06, "loss": 1.0756, "step": 4734 }, { "epoch": 1.924984752998577, "grad_norm": 0.11195732653141022, "learning_rate": 7.490331772847548e-06, "loss": 1.0448, "step": 4735 }, { "epoch": 1.9253913397031917, "grad_norm": 0.11141734570264816, "learning_rate": 7.449623447995116e-06, "loss": 0.966, "step": 4736 }, { "epoch": 1.9257979264078064, "grad_norm": 0.11802522838115692, "learning_rate": 7.408915123142683e-06, "loss": 1.0228, "step": 4737 }, { "epoch": 1.9262045131124212, "grad_norm": 0.11186794191598892, "learning_rate": 7.3682067982902506e-06, "loss": 0.9879, "step": 4738 }, { "epoch": 1.9266110998170358, "grad_norm": 0.10062884539365768, "learning_rate": 7.327498473437819e-06, "loss": 0.9387, "step": 4739 }, { "epoch": 1.9270176865216508, "grad_norm": 0.09903592616319656, "learning_rate": 7.286790148585386e-06, "loss": 0.8054, "step": 4740 }, { "epoch": 1.9274242732262654, "grad_norm": 0.11208473145961761, "learning_rate": 7.2460818237329535e-06, "loss": 0.9749, "step": 4741 }, { "epoch": 1.9278308599308802, "grad_norm": 0.10989855974912643, "learning_rate": 7.205373498880522e-06, "loss": 1.0421, "step": 4742 }, { "epoch": 1.928237446635495, "grad_norm": 0.10330630093812943, "learning_rate": 7.164665174028089e-06, "loss": 0.9525, "step": 4743 }, { "epoch": 1.9286440333401098, "grad_norm": 0.10825500637292862, "learning_rate": 7.123956849175657e-06, "loss": 1.0131, "step": 4744 }, { "epoch": 1.9290506200447246, "grad_norm": 0.11086854338645935, "learning_rate": 7.083248524323224e-06, "loss": 0.9638, "step": 4745 }, { "epoch": 1.9294572067493392, "grad_norm": 0.09380614757537842, "learning_rate": 7.042540199470792e-06, "loss": 0.7884, "step": 4746 }, { "epoch": 1.9298637934539542, "grad_norm": 0.11035917699337006, "learning_rate": 7.00183187461836e-06, "loss": 0.9481, "step": 4747 }, { "epoch": 1.9302703801585688, "grad_norm": 0.10530402511358261, "learning_rate": 6.961123549765927e-06, "loss": 0.9452, "step": 4748 }, { "epoch": 1.9306769668631836, "grad_norm": 0.10548150539398193, "learning_rate": 6.920415224913495e-06, "loss": 0.9303, "step": 4749 }, { "epoch": 1.9310835535677984, "grad_norm": 0.11273462325334549, "learning_rate": 6.879706900061063e-06, "loss": 0.9262, "step": 4750 }, { "epoch": 1.931490140272413, "grad_norm": 0.10211534053087234, "learning_rate": 6.83899857520863e-06, "loss": 0.8864, "step": 4751 }, { "epoch": 1.931896726977028, "grad_norm": 0.108225978910923, "learning_rate": 6.798290250356198e-06, "loss": 0.9758, "step": 4752 }, { "epoch": 1.9323033136816425, "grad_norm": NaN, "learning_rate": 6.757581925503766e-06, "loss": 0.9683, "step": 4753 }, { "epoch": 1.9327099003862573, "grad_norm": 0.10094906389713287, "learning_rate": 6.716873600651334e-06, "loss": 0.8494, "step": 4754 }, { "epoch": 1.9331164870908721, "grad_norm": 0.10754916816949844, "learning_rate": 6.676165275798901e-06, "loss": 1.0235, "step": 4755 }, { "epoch": 1.933523073795487, "grad_norm": 0.10774201899766922, "learning_rate": 6.635456950946468e-06, "loss": 0.9243, "step": 4756 }, { "epoch": 1.9339296605001017, "grad_norm": 0.10798453539609909, "learning_rate": 6.594748626094037e-06, "loss": 0.9023, "step": 4757 }, { "epoch": 1.9343362472047163, "grad_norm": 0.10341257601976395, "learning_rate": 6.554040301241604e-06, "loss": 0.9192, "step": 4758 }, { "epoch": 1.9347428339093313, "grad_norm": 0.10943766683340073, "learning_rate": 6.513331976389173e-06, "loss": 0.9635, "step": 4759 }, { "epoch": 1.935149420613946, "grad_norm": 0.10420777648687363, "learning_rate": 6.472623651536739e-06, "loss": 0.9566, "step": 4760 }, { "epoch": 1.9355560073185607, "grad_norm": 0.1093553900718689, "learning_rate": 6.431915326684307e-06, "loss": 0.9353, "step": 4761 }, { "epoch": 1.9359625940231755, "grad_norm": 0.10667470842599869, "learning_rate": 6.3912070018318755e-06, "loss": 0.9312, "step": 4762 }, { "epoch": 1.93636918072779, "grad_norm": 0.10576347261667252, "learning_rate": 6.350498676979442e-06, "loss": 0.9651, "step": 4763 }, { "epoch": 1.936775767432405, "grad_norm": 0.106935515999794, "learning_rate": 6.30979035212701e-06, "loss": 0.9552, "step": 4764 }, { "epoch": 1.9371823541370197, "grad_norm": 0.11244690418243408, "learning_rate": 6.269082027274578e-06, "loss": 0.9965, "step": 4765 }, { "epoch": 1.9375889408416345, "grad_norm": 0.10496662557125092, "learning_rate": 6.228373702422145e-06, "loss": 0.9158, "step": 4766 }, { "epoch": 1.9379955275462493, "grad_norm": 0.11604844778776169, "learning_rate": 6.187665377569713e-06, "loss": 1.0076, "step": 4767 }, { "epoch": 1.9384021142508638, "grad_norm": 0.10365528613328934, "learning_rate": 6.146957052717281e-06, "loss": 0.9314, "step": 4768 }, { "epoch": 1.9388087009554789, "grad_norm": 0.1046256497502327, "learning_rate": 6.106248727864849e-06, "loss": 0.8937, "step": 4769 }, { "epoch": 1.9392152876600934, "grad_norm": 0.10845932364463806, "learning_rate": 6.0655404030124166e-06, "loss": 0.9361, "step": 4770 }, { "epoch": 1.9396218743647082, "grad_norm": 0.1074332445859909, "learning_rate": 6.024832078159983e-06, "loss": 0.8536, "step": 4771 }, { "epoch": 1.940028461069323, "grad_norm": 0.10240095853805542, "learning_rate": 5.984123753307552e-06, "loss": 0.965, "step": 4772 }, { "epoch": 1.9404350477739378, "grad_norm": 0.11463471502065659, "learning_rate": 5.9434154284551195e-06, "loss": 1.0433, "step": 4773 }, { "epoch": 1.9408416344785526, "grad_norm": 0.1043282225728035, "learning_rate": 5.902707103602687e-06, "loss": 0.9409, "step": 4774 }, { "epoch": 1.9412482211831672, "grad_norm": 0.10512422770261765, "learning_rate": 5.861998778750255e-06, "loss": 0.9548, "step": 4775 }, { "epoch": 1.9416548078877822, "grad_norm": 0.11762488633394241, "learning_rate": 5.821290453897822e-06, "loss": 1.02, "step": 4776 }, { "epoch": 1.9420613945923968, "grad_norm": 0.10974336415529251, "learning_rate": 5.78058212904539e-06, "loss": 0.9563, "step": 4777 }, { "epoch": 1.9424679812970116, "grad_norm": 0.1021999642252922, "learning_rate": 5.739873804192958e-06, "loss": 0.8839, "step": 4778 }, { "epoch": 1.9428745680016264, "grad_norm": 0.09945038706064224, "learning_rate": 5.699165479340525e-06, "loss": 0.9092, "step": 4779 }, { "epoch": 1.943281154706241, "grad_norm": 0.10554639250040054, "learning_rate": 5.658457154488093e-06, "loss": 0.9512, "step": 4780 }, { "epoch": 1.943687741410856, "grad_norm": 0.10311925411224365, "learning_rate": 5.6177488296356605e-06, "loss": 0.895, "step": 4781 }, { "epoch": 1.9440943281154706, "grad_norm": 0.1108926311135292, "learning_rate": 5.577040504783228e-06, "loss": 1.0093, "step": 4782 }, { "epoch": 1.9445009148200854, "grad_norm": 0.12254206836223602, "learning_rate": 5.536332179930797e-06, "loss": 0.9449, "step": 4783 }, { "epoch": 1.9449075015247002, "grad_norm": 0.11230570077896118, "learning_rate": 5.495623855078364e-06, "loss": 0.9471, "step": 4784 }, { "epoch": 1.945314088229315, "grad_norm": 0.11160276085138321, "learning_rate": 5.454915530225931e-06, "loss": 0.9878, "step": 4785 }, { "epoch": 1.9457206749339298, "grad_norm": 0.11728110909461975, "learning_rate": 5.414207205373499e-06, "loss": 1.0934, "step": 4786 }, { "epoch": 1.9461272616385443, "grad_norm": 0.10678225010633469, "learning_rate": 5.373498880521067e-06, "loss": 0.9944, "step": 4787 }, { "epoch": 1.9465338483431591, "grad_norm": 0.11184896528720856, "learning_rate": 5.332790555668635e-06, "loss": 0.8622, "step": 4788 }, { "epoch": 1.946940435047774, "grad_norm": 0.1041015014052391, "learning_rate": 5.292082230816202e-06, "loss": 0.8879, "step": 4789 }, { "epoch": 1.9473470217523887, "grad_norm": 0.11516954004764557, "learning_rate": 5.25137390596377e-06, "loss": 1.0263, "step": 4790 }, { "epoch": 1.9477536084570035, "grad_norm": 0.1053466945886612, "learning_rate": 5.210665581111338e-06, "loss": 0.957, "step": 4791 }, { "epoch": 1.948160195161618, "grad_norm": 0.10122139006853104, "learning_rate": 5.169957256258905e-06, "loss": 0.9314, "step": 4792 }, { "epoch": 1.948566781866233, "grad_norm": 0.11456303298473358, "learning_rate": 5.129248931406473e-06, "loss": 0.9948, "step": 4793 }, { "epoch": 1.9489733685708477, "grad_norm": 0.11062067002058029, "learning_rate": 5.088540606554041e-06, "loss": 0.9113, "step": 4794 }, { "epoch": 1.9493799552754625, "grad_norm": 0.10972128063440323, "learning_rate": 5.047832281701608e-06, "loss": 1.0531, "step": 4795 }, { "epoch": 1.9497865419800773, "grad_norm": 0.10982213169336319, "learning_rate": 5.007123956849176e-06, "loss": 0.8759, "step": 4796 }, { "epoch": 1.9501931286846919, "grad_norm": 0.10222429037094116, "learning_rate": 4.9664156319967435e-06, "loss": 0.8341, "step": 4797 }, { "epoch": 1.9505997153893069, "grad_norm": 0.1121751070022583, "learning_rate": 4.925707307144311e-06, "loss": 1.0322, "step": 4798 }, { "epoch": 1.9510063020939215, "grad_norm": 0.09666823595762253, "learning_rate": 4.884998982291879e-06, "loss": 0.8805, "step": 4799 }, { "epoch": 1.9514128887985362, "grad_norm": 0.09243001788854599, "learning_rate": 4.8442906574394464e-06, "loss": 0.746, "step": 4800 }, { "epoch": 1.951819475503151, "grad_norm": 0.10632304102182388, "learning_rate": 4.803582332587014e-06, "loss": 0.9029, "step": 4801 }, { "epoch": 1.9522260622077658, "grad_norm": 0.11486592143774033, "learning_rate": 4.7628740077345826e-06, "loss": 1.0341, "step": 4802 }, { "epoch": 1.9526326489123806, "grad_norm": 0.10841212421655655, "learning_rate": 4.722165682882149e-06, "loss": 0.9374, "step": 4803 }, { "epoch": 1.9530392356169952, "grad_norm": 0.11145360767841339, "learning_rate": 4.681457358029717e-06, "loss": 0.9147, "step": 4804 }, { "epoch": 1.9534458223216102, "grad_norm": 0.11122753471136093, "learning_rate": 4.640749033177285e-06, "loss": 0.9332, "step": 4805 }, { "epoch": 1.9538524090262248, "grad_norm": 0.10207870602607727, "learning_rate": 4.600040708324853e-06, "loss": 0.937, "step": 4806 }, { "epoch": 1.9542589957308396, "grad_norm": 0.11454325169324875, "learning_rate": 4.559332383472421e-06, "loss": 1.0435, "step": 4807 }, { "epoch": 1.9546655824354544, "grad_norm": 0.10648126155138016, "learning_rate": 4.5186240586199875e-06, "loss": 0.927, "step": 4808 }, { "epoch": 1.955072169140069, "grad_norm": 0.10996894538402557, "learning_rate": 4.477915733767556e-06, "loss": 0.9693, "step": 4809 }, { "epoch": 1.955478755844684, "grad_norm": 0.10057996213436127, "learning_rate": 4.437207408915124e-06, "loss": 0.9325, "step": 4810 }, { "epoch": 1.9558853425492986, "grad_norm": 0.10628996044397354, "learning_rate": 4.396499084062691e-06, "loss": 0.891, "step": 4811 }, { "epoch": 1.9562919292539134, "grad_norm": 0.10557537525892258, "learning_rate": 4.355790759210258e-06, "loss": 0.8736, "step": 4812 }, { "epoch": 1.9566985159585282, "grad_norm": 0.10447331517934799, "learning_rate": 4.3150824343578265e-06, "loss": 0.9717, "step": 4813 }, { "epoch": 1.957105102663143, "grad_norm": 0.10446681082248688, "learning_rate": 4.274374109505394e-06, "loss": 1.0103, "step": 4814 }, { "epoch": 1.9575116893677578, "grad_norm": 0.10121920704841614, "learning_rate": 4.233665784652962e-06, "loss": 0.875, "step": 4815 }, { "epoch": 1.9579182760723723, "grad_norm": 0.10913816094398499, "learning_rate": 4.1929574598005294e-06, "loss": 1.0491, "step": 4816 }, { "epoch": 1.9583248627769871, "grad_norm": 0.11767001450061798, "learning_rate": 4.152249134948097e-06, "loss": 1.025, "step": 4817 }, { "epoch": 1.958731449481602, "grad_norm": 0.10180991888046265, "learning_rate": 4.111540810095665e-06, "loss": 0.892, "step": 4818 }, { "epoch": 1.9591380361862167, "grad_norm": 0.11216012388467789, "learning_rate": 4.070832485243232e-06, "loss": 0.9754, "step": 4819 }, { "epoch": 1.9595446228908315, "grad_norm": 0.1098812147974968, "learning_rate": 4.0301241603908e-06, "loss": 0.9805, "step": 4820 }, { "epoch": 1.959951209595446, "grad_norm": 0.10524158924818039, "learning_rate": 3.989415835538368e-06, "loss": 0.9045, "step": 4821 }, { "epoch": 1.9603577963000611, "grad_norm": 0.09650178253650665, "learning_rate": 3.948707510685935e-06, "loss": 0.7913, "step": 4822 }, { "epoch": 1.9607643830046757, "grad_norm": 0.11418919265270233, "learning_rate": 3.907999185833503e-06, "loss": 0.9991, "step": 4823 }, { "epoch": 1.9611709697092905, "grad_norm": 0.11137097328901291, "learning_rate": 3.867290860981071e-06, "loss": 0.978, "step": 4824 }, { "epoch": 1.9615775564139053, "grad_norm": 0.1029028594493866, "learning_rate": 3.826582536128639e-06, "loss": 0.8791, "step": 4825 }, { "epoch": 1.9619841431185199, "grad_norm": 0.10152295976877213, "learning_rate": 3.7858742112762058e-06, "loss": 0.8855, "step": 4826 }, { "epoch": 1.962390729823135, "grad_norm": 0.11157593131065369, "learning_rate": 3.745165886423774e-06, "loss": 1.0097, "step": 4827 }, { "epoch": 1.9627973165277495, "grad_norm": 0.10975543409585953, "learning_rate": 3.7044575615713415e-06, "loss": 1.0269, "step": 4828 }, { "epoch": 1.9632039032323643, "grad_norm": 0.10318556427955627, "learning_rate": 3.6637492367189095e-06, "loss": 0.9094, "step": 4829 }, { "epoch": 1.963610489936979, "grad_norm": 0.09540821611881256, "learning_rate": 3.6230409118664767e-06, "loss": 0.7923, "step": 4830 }, { "epoch": 1.9640170766415939, "grad_norm": 0.11185004562139511, "learning_rate": 3.5823325870140444e-06, "loss": 0.9945, "step": 4831 }, { "epoch": 1.9644236633462087, "grad_norm": 0.1030164510011673, "learning_rate": 3.541624262161612e-06, "loss": 0.8952, "step": 4832 }, { "epoch": 1.9648302500508232, "grad_norm": 0.10606315732002258, "learning_rate": 3.50091593730918e-06, "loss": 0.8872, "step": 4833 }, { "epoch": 1.9652368367554383, "grad_norm": 0.10676340013742447, "learning_rate": 3.4602076124567477e-06, "loss": 0.9616, "step": 4834 }, { "epoch": 1.9656434234600528, "grad_norm": 0.11374758929014206, "learning_rate": 3.419499287604315e-06, "loss": 1.0619, "step": 4835 }, { "epoch": 1.9660500101646676, "grad_norm": 0.10142536461353302, "learning_rate": 3.378790962751883e-06, "loss": 0.8787, "step": 4836 }, { "epoch": 1.9664565968692824, "grad_norm": 0.1088085025548935, "learning_rate": 3.3380826378994506e-06, "loss": 1.0706, "step": 4837 }, { "epoch": 1.966863183573897, "grad_norm": 0.11617989093065262, "learning_rate": 3.2973743130470187e-06, "loss": 1.0758, "step": 4838 }, { "epoch": 1.967269770278512, "grad_norm": 0.10999471694231033, "learning_rate": 3.2566659881945863e-06, "loss": 0.8955, "step": 4839 }, { "epoch": 1.9676763569831266, "grad_norm": 0.10413683950901031, "learning_rate": 3.2159576633421535e-06, "loss": 0.8774, "step": 4840 }, { "epoch": 1.9680829436877414, "grad_norm": 0.10912149399518967, "learning_rate": 3.175249338489721e-06, "loss": 0.9151, "step": 4841 }, { "epoch": 1.9684895303923562, "grad_norm": 0.10065335780382156, "learning_rate": 3.134541013637289e-06, "loss": 0.8947, "step": 4842 }, { "epoch": 1.9688961170969708, "grad_norm": 0.10842598974704742, "learning_rate": 3.0938326887848564e-06, "loss": 0.9149, "step": 4843 }, { "epoch": 1.9693027038015858, "grad_norm": 0.09546621143817902, "learning_rate": 3.0531243639324245e-06, "loss": 0.8106, "step": 4844 }, { "epoch": 1.9697092905062004, "grad_norm": 0.10605739057064056, "learning_rate": 3.0124160390799917e-06, "loss": 0.8663, "step": 4845 }, { "epoch": 1.9701158772108152, "grad_norm": 0.11531540751457214, "learning_rate": 2.9717077142275597e-06, "loss": 0.9487, "step": 4846 }, { "epoch": 1.97052246391543, "grad_norm": 0.112498939037323, "learning_rate": 2.9309993893751274e-06, "loss": 0.9846, "step": 4847 }, { "epoch": 1.9709290506200448, "grad_norm": 0.10680878907442093, "learning_rate": 2.890291064522695e-06, "loss": 0.9092, "step": 4848 }, { "epoch": 1.9713356373246596, "grad_norm": 0.11008645594120026, "learning_rate": 2.8495827396702626e-06, "loss": 0.918, "step": 4849 }, { "epoch": 1.9717422240292741, "grad_norm": 0.1180918887257576, "learning_rate": 2.8088744148178303e-06, "loss": 1.1026, "step": 4850 }, { "epoch": 1.9721488107338891, "grad_norm": 0.10788023471832275, "learning_rate": 2.7681660899653983e-06, "loss": 0.9422, "step": 4851 }, { "epoch": 1.9725553974385037, "grad_norm": 0.11532583087682724, "learning_rate": 2.7274577651129655e-06, "loss": 0.9619, "step": 4852 }, { "epoch": 1.9729619841431185, "grad_norm": 0.1164373904466629, "learning_rate": 2.6867494402605336e-06, "loss": 1.0735, "step": 4853 }, { "epoch": 1.9733685708477333, "grad_norm": 0.10352805256843567, "learning_rate": 2.646041115408101e-06, "loss": 0.9302, "step": 4854 }, { "epoch": 1.973775157552348, "grad_norm": 0.09697481989860535, "learning_rate": 2.605332790555669e-06, "loss": 0.8169, "step": 4855 }, { "epoch": 1.974181744256963, "grad_norm": 0.10641641169786453, "learning_rate": 2.5646244657032365e-06, "loss": 0.9379, "step": 4856 }, { "epoch": 1.9745883309615775, "grad_norm": 0.12247955799102783, "learning_rate": 2.523916140850804e-06, "loss": 1.1005, "step": 4857 }, { "epoch": 1.9749949176661923, "grad_norm": 0.11470235139131546, "learning_rate": 2.4832078159983718e-06, "loss": 1.0682, "step": 4858 }, { "epoch": 1.975401504370807, "grad_norm": 0.10415980964899063, "learning_rate": 2.4424994911459394e-06, "loss": 0.9184, "step": 4859 }, { "epoch": 1.9758080910754219, "grad_norm": 0.10580716282129288, "learning_rate": 2.401791166293507e-06, "loss": 0.9137, "step": 4860 }, { "epoch": 1.9762146777800367, "grad_norm": 0.10806702822446823, "learning_rate": 2.3610828414410747e-06, "loss": 1.0023, "step": 4861 }, { "epoch": 1.9766212644846513, "grad_norm": 0.10730385035276413, "learning_rate": 2.3203745165886423e-06, "loss": 0.9394, "step": 4862 }, { "epoch": 1.9770278511892663, "grad_norm": 0.11646751314401627, "learning_rate": 2.2796661917362104e-06, "loss": 1.0452, "step": 4863 }, { "epoch": 1.9774344378938808, "grad_norm": 0.11328614503145218, "learning_rate": 2.238957866883778e-06, "loss": 1.0363, "step": 4864 }, { "epoch": 1.9778410245984956, "grad_norm": 0.10477136820554733, "learning_rate": 2.1982495420313456e-06, "loss": 0.8967, "step": 4865 }, { "epoch": 1.9782476113031104, "grad_norm": 0.1011333018541336, "learning_rate": 2.1575412171789133e-06, "loss": 0.9051, "step": 4866 }, { "epoch": 1.978654198007725, "grad_norm": 0.10585794597864151, "learning_rate": 2.116832892326481e-06, "loss": 0.9641, "step": 4867 }, { "epoch": 1.97906078471234, "grad_norm": 0.10518283396959305, "learning_rate": 2.0761245674740485e-06, "loss": 0.9738, "step": 4868 }, { "epoch": 1.9794673714169546, "grad_norm": 0.10781599581241608, "learning_rate": 2.035416242621616e-06, "loss": 0.9535, "step": 4869 }, { "epoch": 1.9798739581215694, "grad_norm": 0.10149887949228287, "learning_rate": 1.994707917769184e-06, "loss": 0.7832, "step": 4870 }, { "epoch": 1.9802805448261842, "grad_norm": 0.10625772923231125, "learning_rate": 1.9539995929167514e-06, "loss": 0.8969, "step": 4871 }, { "epoch": 1.9806871315307988, "grad_norm": 0.100648894906044, "learning_rate": 1.9132912680643195e-06, "loss": 0.8592, "step": 4872 }, { "epoch": 1.9810937182354138, "grad_norm": 0.10639602690935135, "learning_rate": 1.872582943211887e-06, "loss": 0.9377, "step": 4873 }, { "epoch": 1.9815003049400284, "grad_norm": 0.10608502477407455, "learning_rate": 1.8318746183594548e-06, "loss": 0.8221, "step": 4874 }, { "epoch": 1.9819068916446432, "grad_norm": 0.1076526865363121, "learning_rate": 1.7911662935070222e-06, "loss": 1.0001, "step": 4875 }, { "epoch": 1.982313478349258, "grad_norm": 0.10484609007835388, "learning_rate": 1.75045796865459e-06, "loss": 0.9281, "step": 4876 }, { "epoch": 1.9827200650538728, "grad_norm": 0.11033840477466583, "learning_rate": 1.7097496438021575e-06, "loss": 1.012, "step": 4877 }, { "epoch": 1.9831266517584876, "grad_norm": 0.10178755968809128, "learning_rate": 1.6690413189497253e-06, "loss": 0.8751, "step": 4878 }, { "epoch": 1.9835332384631021, "grad_norm": 0.09968069940805435, "learning_rate": 1.6283329940972931e-06, "loss": 0.8481, "step": 4879 }, { "epoch": 1.9839398251677172, "grad_norm": 0.11199220269918442, "learning_rate": 1.5876246692448606e-06, "loss": 1.0553, "step": 4880 }, { "epoch": 1.9843464118723317, "grad_norm": 0.10771384090185165, "learning_rate": 1.5469163443924282e-06, "loss": 0.9871, "step": 4881 }, { "epoch": 1.9847529985769465, "grad_norm": 0.1033516600728035, "learning_rate": 1.5062080195399958e-06, "loss": 0.8731, "step": 4882 }, { "epoch": 1.9851595852815613, "grad_norm": 0.10771310329437256, "learning_rate": 1.4654996946875637e-06, "loss": 1.0152, "step": 4883 }, { "epoch": 1.985566171986176, "grad_norm": 0.10385514050722122, "learning_rate": 1.4247913698351313e-06, "loss": 0.8569, "step": 4884 }, { "epoch": 1.985972758690791, "grad_norm": 0.10435989499092102, "learning_rate": 1.3840830449826992e-06, "loss": 0.8999, "step": 4885 }, { "epoch": 1.9863793453954055, "grad_norm": 0.10604739189147949, "learning_rate": 1.3433747201302668e-06, "loss": 0.8837, "step": 4886 }, { "epoch": 1.9867859321000203, "grad_norm": 0.11071362346410751, "learning_rate": 1.3026663952778344e-06, "loss": 0.9995, "step": 4887 }, { "epoch": 1.987192518804635, "grad_norm": 0.11492349952459335, "learning_rate": 1.261958070425402e-06, "loss": 1.0693, "step": 4888 }, { "epoch": 1.98759910550925, "grad_norm": 0.11402280628681183, "learning_rate": 1.2212497455729697e-06, "loss": 1.0973, "step": 4889 }, { "epoch": 1.9880056922138647, "grad_norm": 0.10784902423620224, "learning_rate": 1.1805414207205373e-06, "loss": 0.9591, "step": 4890 }, { "epoch": 1.9884122789184793, "grad_norm": 0.10509707778692245, "learning_rate": 1.1398330958681052e-06, "loss": 0.9233, "step": 4891 }, { "epoch": 1.9888188656230943, "grad_norm": 0.10772809386253357, "learning_rate": 1.0991247710156728e-06, "loss": 0.9239, "step": 4892 }, { "epoch": 1.9892254523277089, "grad_norm": 0.10139593482017517, "learning_rate": 1.0584164461632405e-06, "loss": 0.8991, "step": 4893 }, { "epoch": 1.9896320390323237, "grad_norm": 0.11088011413812637, "learning_rate": 1.017708121310808e-06, "loss": 0.9746, "step": 4894 }, { "epoch": 1.9900386257369385, "grad_norm": 0.1069415956735611, "learning_rate": 9.769997964583757e-07, "loss": 0.9667, "step": 4895 }, { "epoch": 1.990445212441553, "grad_norm": 0.11252355575561523, "learning_rate": 9.362914716059435e-07, "loss": 0.9521, "step": 4896 }, { "epoch": 1.990851799146168, "grad_norm": 0.11555030941963196, "learning_rate": 8.955831467535111e-07, "loss": 0.9464, "step": 4897 }, { "epoch": 1.9912583858507826, "grad_norm": 0.10089296847581863, "learning_rate": 8.548748219010787e-07, "loss": 0.9118, "step": 4898 }, { "epoch": 1.9916649725553974, "grad_norm": 0.10483364015817642, "learning_rate": 8.141664970486466e-07, "loss": 0.9561, "step": 4899 }, { "epoch": 1.9920715592600122, "grad_norm": 0.10259924083948135, "learning_rate": 7.734581721962141e-07, "loss": 0.937, "step": 4900 }, { "epoch": 1.9924781459646268, "grad_norm": 0.10515905171632767, "learning_rate": 7.327498473437818e-07, "loss": 0.9686, "step": 4901 }, { "epoch": 1.9928847326692418, "grad_norm": 0.1109880730509758, "learning_rate": 6.920415224913496e-07, "loss": 0.9375, "step": 4902 }, { "epoch": 1.9932913193738564, "grad_norm": 0.10059867799282074, "learning_rate": 6.513331976389172e-07, "loss": 0.9148, "step": 4903 }, { "epoch": 1.9936979060784712, "grad_norm": 0.1153227686882019, "learning_rate": 6.106248727864849e-07, "loss": 1.065, "step": 4904 }, { "epoch": 1.994104492783086, "grad_norm": 0.10817611962556839, "learning_rate": 5.699165479340526e-07, "loss": 0.9162, "step": 4905 }, { "epoch": 1.9945110794877008, "grad_norm": 0.09951157122850418, "learning_rate": 5.292082230816202e-07, "loss": 0.885, "step": 4906 }, { "epoch": 1.9949176661923156, "grad_norm": 0.1026596650481224, "learning_rate": 4.884998982291879e-07, "loss": 0.9054, "step": 4907 }, { "epoch": 1.9953242528969302, "grad_norm": 0.10928881913423538, "learning_rate": 4.4779157337675555e-07, "loss": 0.9206, "step": 4908 }, { "epoch": 1.9957308396015452, "grad_norm": 0.1039741113781929, "learning_rate": 4.070832485243233e-07, "loss": 0.9762, "step": 4909 }, { "epoch": 1.9961374263061598, "grad_norm": 0.10720765590667725, "learning_rate": 3.663749236718909e-07, "loss": 0.9376, "step": 4910 }, { "epoch": 1.9965440130107746, "grad_norm": 0.11087562888860703, "learning_rate": 3.256665988194586e-07, "loss": 1.0135, "step": 4911 }, { "epoch": 1.9969505997153894, "grad_norm": 0.11333035677671432, "learning_rate": 2.849582739670263e-07, "loss": 0.9378, "step": 4912 }, { "epoch": 1.997357186420004, "grad_norm": 0.10567180067300797, "learning_rate": 2.4424994911459393e-07, "loss": 0.8727, "step": 4913 }, { "epoch": 1.997763773124619, "grad_norm": 0.09908761829137802, "learning_rate": 2.0354162426216164e-07, "loss": 0.8175, "step": 4914 }, { "epoch": 1.9981703598292335, "grad_norm": 0.1148877665400505, "learning_rate": 1.628332994097293e-07, "loss": 0.9689, "step": 4915 }, { "epoch": 1.9985769465338483, "grad_norm": 0.1073300689458847, "learning_rate": 1.2212497455729696e-07, "loss": 0.9064, "step": 4916 }, { "epoch": 1.9989835332384631, "grad_norm": 0.10753702372312546, "learning_rate": 8.141664970486465e-08, "loss": 0.9366, "step": 4917 }, { "epoch": 1.999390119943078, "grad_norm": 0.10542717576026917, "learning_rate": 4.0708324852432326e-08, "loss": 0.8963, "step": 4918 } ], "logging_steps": 1, "max_steps": 4918, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.57185946392996e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }