{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5450294775360844, "eval_steps": 500, "global_step": 7600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020329335230737954, "grad_norm": 0.36621615290641785, "learning_rate": 0.0, "loss": 1.5428, "step": 1 }, { "epoch": 0.0004065867046147591, "grad_norm": 0.38205745816230774, "learning_rate": 4e-05, "loss": 1.3117, "step": 2 }, { "epoch": 0.0006098800569221387, "grad_norm": 0.38206785917282104, "learning_rate": 8e-05, "loss": 1.5818, "step": 3 }, { "epoch": 0.0008131734092295182, "grad_norm": 0.2856779396533966, "learning_rate": 0.00012, "loss": 1.4046, "step": 4 }, { "epoch": 0.0010164667615368977, "grad_norm": 0.28240150213241577, "learning_rate": 0.00016, "loss": 1.1899, "step": 5 }, { "epoch": 0.0012197601138442774, "grad_norm": 0.27537214756011963, "learning_rate": 0.0002, "loss": 1.3741, "step": 6 }, { "epoch": 0.0014230534661516568, "grad_norm": 0.1832859218120575, "learning_rate": 0.00019997966032746873, "loss": 1.2432, "step": 7 }, { "epoch": 0.0016263468184590363, "grad_norm": 0.1194186806678772, "learning_rate": 0.00019995932065493746, "loss": 1.1152, "step": 8 }, { "epoch": 0.001829640170766416, "grad_norm": 0.287241131067276, "learning_rate": 0.0001999389809824062, "loss": 1.2556, "step": 9 }, { "epoch": 0.0020329335230737954, "grad_norm": 0.2129606008529663, "learning_rate": 0.00019991864130987493, "loss": 1.2698, "step": 10 }, { "epoch": 0.002236226875381175, "grad_norm": 0.16557097434997559, "learning_rate": 0.00019989830163734363, "loss": 1.2364, "step": 11 }, { "epoch": 0.002439520227688555, "grad_norm": 0.15639722347259521, "learning_rate": 0.00019987796196481236, "loss": 1.2499, "step": 12 }, { "epoch": 0.002642813579995934, "grad_norm": 0.11634412407875061, "learning_rate": 0.0001998576222922811, "loss": 1.148, "step": 13 }, { "epoch": 0.0028461069323033137, "grad_norm": 0.13249927759170532, "learning_rate": 0.00019983728261974983, "loss": 1.267, "step": 14 }, { "epoch": 0.0030494002846106934, "grad_norm": 0.12251798063516617, "learning_rate": 0.00019981694294721856, "loss": 1.1868, "step": 15 }, { "epoch": 0.0032526936369180726, "grad_norm": 0.1279357224702835, "learning_rate": 0.00019979660327468728, "loss": 1.3611, "step": 16 }, { "epoch": 0.0034559869892254523, "grad_norm": 0.10690166801214218, "learning_rate": 0.00019977626360215603, "loss": 1.2934, "step": 17 }, { "epoch": 0.003659280341532832, "grad_norm": 0.09634354710578918, "learning_rate": 0.00019975592392962476, "loss": 1.1628, "step": 18 }, { "epoch": 0.0038625736938402116, "grad_norm": 0.08993979543447495, "learning_rate": 0.00019973558425709345, "loss": 1.1328, "step": 19 }, { "epoch": 0.004065867046147591, "grad_norm": 0.09834206104278564, "learning_rate": 0.00019971524458456218, "loss": 1.1424, "step": 20 }, { "epoch": 0.0042691603984549705, "grad_norm": 0.10754235088825226, "learning_rate": 0.00019969490491203093, "loss": 1.0843, "step": 21 }, { "epoch": 0.00447245375076235, "grad_norm": 0.09953349828720093, "learning_rate": 0.00019967456523949966, "loss": 1.1475, "step": 22 }, { "epoch": 0.00467574710306973, "grad_norm": 0.09910175204277039, "learning_rate": 0.00019965422556696838, "loss": 1.1532, "step": 23 }, { "epoch": 0.00487904045537711, "grad_norm": 0.10800202935934067, "learning_rate": 0.0001996338858944371, "loss": 1.2336, "step": 24 }, { "epoch": 0.005082333807684488, "grad_norm": 0.10042817890644073, "learning_rate": 0.00019961354622190586, "loss": 1.1235, "step": 25 }, { "epoch": 0.005285627159991868, "grad_norm": 0.10839787125587463, "learning_rate": 0.00019959320654937458, "loss": 1.3561, "step": 26 }, { "epoch": 0.005488920512299248, "grad_norm": 0.10559111088514328, "learning_rate": 0.00019957286687684328, "loss": 1.3255, "step": 27 }, { "epoch": 0.005692213864606627, "grad_norm": 0.12504474818706512, "learning_rate": 0.000199552527204312, "loss": 1.4834, "step": 28 }, { "epoch": 0.005895507216914007, "grad_norm": 0.09099473804235458, "learning_rate": 0.00019953218753178075, "loss": 1.1404, "step": 29 }, { "epoch": 0.006098800569221387, "grad_norm": 0.09648846834897995, "learning_rate": 0.00019951184785924948, "loss": 1.1302, "step": 30 }, { "epoch": 0.006302093921528766, "grad_norm": 0.0992799773812294, "learning_rate": 0.0001994915081867182, "loss": 1.3508, "step": 31 }, { "epoch": 0.006505387273836145, "grad_norm": 0.10332880914211273, "learning_rate": 0.00019947116851418693, "loss": 1.1819, "step": 32 }, { "epoch": 0.006708680626143525, "grad_norm": 0.1172771006822586, "learning_rate": 0.00019945082884165568, "loss": 1.2512, "step": 33 }, { "epoch": 0.006911973978450905, "grad_norm": 0.10649558156728745, "learning_rate": 0.0001994304891691244, "loss": 1.2394, "step": 34 }, { "epoch": 0.007115267330758284, "grad_norm": 0.10849574208259583, "learning_rate": 0.0001994101494965931, "loss": 1.1847, "step": 35 }, { "epoch": 0.007318560683065664, "grad_norm": 0.11581245064735413, "learning_rate": 0.00019938980982406182, "loss": 1.2788, "step": 36 }, { "epoch": 0.007521854035373044, "grad_norm": 0.09857199341058731, "learning_rate": 0.00019936947015153058, "loss": 1.1072, "step": 37 }, { "epoch": 0.007725147387680423, "grad_norm": 0.08895204216241837, "learning_rate": 0.0001993491304789993, "loss": 0.8741, "step": 38 }, { "epoch": 0.007928440739987802, "grad_norm": 0.09715849161148071, "learning_rate": 0.00019932879080646803, "loss": 1.2999, "step": 39 }, { "epoch": 0.008131734092295182, "grad_norm": 0.09145913273096085, "learning_rate": 0.00019930845113393675, "loss": 0.9852, "step": 40 }, { "epoch": 0.008335027444602561, "grad_norm": 0.09298407286405563, "learning_rate": 0.0001992881114614055, "loss": 1.1033, "step": 41 }, { "epoch": 0.008538320796909941, "grad_norm": 0.09957871586084366, "learning_rate": 0.00019926777178887423, "loss": 1.3203, "step": 42 }, { "epoch": 0.00874161414921732, "grad_norm": 0.1212410032749176, "learning_rate": 0.00019924743211634292, "loss": 1.166, "step": 43 }, { "epoch": 0.0089449075015247, "grad_norm": 0.10740375518798828, "learning_rate": 0.00019922709244381165, "loss": 1.1905, "step": 44 }, { "epoch": 0.00914820085383208, "grad_norm": 0.10571859031915665, "learning_rate": 0.0001992067527712804, "loss": 1.07, "step": 45 }, { "epoch": 0.00935149420613946, "grad_norm": 0.11196234077215195, "learning_rate": 0.00019918641309874912, "loss": 1.2516, "step": 46 }, { "epoch": 0.00955478755844684, "grad_norm": 0.1025981530547142, "learning_rate": 0.00019916607342621785, "loss": 1.1118, "step": 47 }, { "epoch": 0.00975808091075422, "grad_norm": 0.10584773868322372, "learning_rate": 0.00019914573375368657, "loss": 1.1551, "step": 48 }, { "epoch": 0.009961374263061597, "grad_norm": 0.10157100111246109, "learning_rate": 0.0001991253940811553, "loss": 0.9638, "step": 49 }, { "epoch": 0.010164667615368977, "grad_norm": 0.10194176435470581, "learning_rate": 0.00019910505440862405, "loss": 1.0024, "step": 50 }, { "epoch": 0.010367960967676356, "grad_norm": 0.10047532618045807, "learning_rate": 0.00019908471473609275, "loss": 1.1767, "step": 51 }, { "epoch": 0.010571254319983736, "grad_norm": 0.10448278486728668, "learning_rate": 0.00019906437506356147, "loss": 1.2346, "step": 52 }, { "epoch": 0.010774547672291116, "grad_norm": 0.09438527375459671, "learning_rate": 0.0001990440353910302, "loss": 1.0517, "step": 53 }, { "epoch": 0.010977841024598495, "grad_norm": 0.12310227751731873, "learning_rate": 0.00019902369571849895, "loss": 1.2809, "step": 54 }, { "epoch": 0.011181134376905875, "grad_norm": 0.10811592638492584, "learning_rate": 0.00019900335604596767, "loss": 1.1171, "step": 55 }, { "epoch": 0.011384427729213255, "grad_norm": 0.12063754349946976, "learning_rate": 0.0001989830163734364, "loss": 1.0719, "step": 56 }, { "epoch": 0.011587721081520634, "grad_norm": 0.10147465020418167, "learning_rate": 0.00019896267670090512, "loss": 1.0643, "step": 57 }, { "epoch": 0.011791014433828014, "grad_norm": 0.10184100270271301, "learning_rate": 0.00019894233702837387, "loss": 1.0148, "step": 58 }, { "epoch": 0.011994307786135394, "grad_norm": 0.1090080663561821, "learning_rate": 0.00019892199735584257, "loss": 1.0762, "step": 59 }, { "epoch": 0.012197601138442773, "grad_norm": 0.0960102528333664, "learning_rate": 0.0001989016576833113, "loss": 1.09, "step": 60 }, { "epoch": 0.012400894490750153, "grad_norm": 0.1056618019938469, "learning_rate": 0.00019888131801078002, "loss": 1.0092, "step": 61 }, { "epoch": 0.012604187843057533, "grad_norm": 0.10064820945262909, "learning_rate": 0.00019886097833824877, "loss": 1.0135, "step": 62 }, { "epoch": 0.01280748119536491, "grad_norm": 0.11083406209945679, "learning_rate": 0.0001988406386657175, "loss": 1.2344, "step": 63 }, { "epoch": 0.01301077454767229, "grad_norm": 0.10507647693157196, "learning_rate": 0.00019882029899318622, "loss": 1.1035, "step": 64 }, { "epoch": 0.01321406789997967, "grad_norm": 0.10840694606304169, "learning_rate": 0.00019879995932065494, "loss": 1.1458, "step": 65 }, { "epoch": 0.01341736125228705, "grad_norm": 0.1149667352437973, "learning_rate": 0.0001987796196481237, "loss": 1.0722, "step": 66 }, { "epoch": 0.01362065460459443, "grad_norm": 0.1219320297241211, "learning_rate": 0.00019875927997559242, "loss": 1.1392, "step": 67 }, { "epoch": 0.01382394795690181, "grad_norm": 0.1296710968017578, "learning_rate": 0.00019873894030306112, "loss": 1.1831, "step": 68 }, { "epoch": 0.014027241309209189, "grad_norm": 0.11214271187782288, "learning_rate": 0.00019871860063052984, "loss": 1.0385, "step": 69 }, { "epoch": 0.014230534661516568, "grad_norm": 0.12374462932348251, "learning_rate": 0.0001986982609579986, "loss": 1.2633, "step": 70 }, { "epoch": 0.014433828013823948, "grad_norm": 0.09856373071670532, "learning_rate": 0.00019867792128546732, "loss": 1.0495, "step": 71 }, { "epoch": 0.014637121366131328, "grad_norm": 0.1356707215309143, "learning_rate": 0.00019865758161293604, "loss": 1.2245, "step": 72 }, { "epoch": 0.014840414718438708, "grad_norm": 0.10265105217695236, "learning_rate": 0.00019863724194040477, "loss": 1.0076, "step": 73 }, { "epoch": 0.015043708070746087, "grad_norm": 0.1307467520236969, "learning_rate": 0.00019861690226787352, "loss": 1.3838, "step": 74 }, { "epoch": 0.015247001423053467, "grad_norm": 0.12013835459947586, "learning_rate": 0.00019859656259534224, "loss": 1.1509, "step": 75 }, { "epoch": 0.015450294775360847, "grad_norm": 0.10897748917341232, "learning_rate": 0.00019857622292281094, "loss": 1.025, "step": 76 }, { "epoch": 0.015653588127668226, "grad_norm": 0.10924937576055527, "learning_rate": 0.00019855588325027966, "loss": 1.06, "step": 77 }, { "epoch": 0.015856881479975604, "grad_norm": 0.10474475473165512, "learning_rate": 0.00019853554357774841, "loss": 0.9267, "step": 78 }, { "epoch": 0.016060174832282986, "grad_norm": 0.12250765413045883, "learning_rate": 0.00019851520390521714, "loss": 1.1231, "step": 79 }, { "epoch": 0.016263468184590364, "grad_norm": 0.12869718670845032, "learning_rate": 0.00019849486423268586, "loss": 1.1219, "step": 80 }, { "epoch": 0.016466761536897745, "grad_norm": 0.11656077951192856, "learning_rate": 0.0001984745245601546, "loss": 1.1494, "step": 81 }, { "epoch": 0.016670054889205123, "grad_norm": 0.12449704855680466, "learning_rate": 0.00019845418488762334, "loss": 1.3177, "step": 82 }, { "epoch": 0.016873348241512504, "grad_norm": 0.10715439915657043, "learning_rate": 0.00019843384521509206, "loss": 1.0944, "step": 83 }, { "epoch": 0.017076641593819882, "grad_norm": 0.11231628060340881, "learning_rate": 0.00019841350554256076, "loss": 1.0681, "step": 84 }, { "epoch": 0.01727993494612726, "grad_norm": 0.1253119856119156, "learning_rate": 0.00019839316587002949, "loss": 1.104, "step": 85 }, { "epoch": 0.01748322829843464, "grad_norm": 0.12725764513015747, "learning_rate": 0.00019837282619749824, "loss": 1.307, "step": 86 }, { "epoch": 0.01768652165074202, "grad_norm": 0.11470405012369156, "learning_rate": 0.00019835248652496696, "loss": 1.0729, "step": 87 }, { "epoch": 0.0178898150030494, "grad_norm": 0.12006914615631104, "learning_rate": 0.0001983321468524357, "loss": 1.1608, "step": 88 }, { "epoch": 0.01809310835535678, "grad_norm": 0.11256147921085358, "learning_rate": 0.0001983118071799044, "loss": 1.0816, "step": 89 }, { "epoch": 0.01829640170766416, "grad_norm": 0.12627887725830078, "learning_rate": 0.00019829146750737314, "loss": 1.2015, "step": 90 }, { "epoch": 0.018499695059971538, "grad_norm": 0.12313251942396164, "learning_rate": 0.0001982711278348419, "loss": 1.0837, "step": 91 }, { "epoch": 0.01870298841227892, "grad_norm": 0.1349901705980301, "learning_rate": 0.00019825078816231058, "loss": 1.2277, "step": 92 }, { "epoch": 0.018906281764586298, "grad_norm": 0.11006023734807968, "learning_rate": 0.0001982304484897793, "loss": 1.0768, "step": 93 }, { "epoch": 0.01910957511689368, "grad_norm": 0.1100686565041542, "learning_rate": 0.00019821010881724803, "loss": 1.1119, "step": 94 }, { "epoch": 0.019312868469201057, "grad_norm": 0.1252383440732956, "learning_rate": 0.00019818976914471678, "loss": 1.1862, "step": 95 }, { "epoch": 0.01951616182150844, "grad_norm": 0.1430596113204956, "learning_rate": 0.0001981694294721855, "loss": 1.1765, "step": 96 }, { "epoch": 0.019719455173815816, "grad_norm": 0.130848690867424, "learning_rate": 0.00019814908979965423, "loss": 1.3255, "step": 97 }, { "epoch": 0.019922748526123194, "grad_norm": 0.13019633293151855, "learning_rate": 0.00019812875012712296, "loss": 1.1853, "step": 98 }, { "epoch": 0.020126041878430576, "grad_norm": 0.11539386957883835, "learning_rate": 0.0001981084104545917, "loss": 1.1897, "step": 99 }, { "epoch": 0.020329335230737954, "grad_norm": 0.11749454587697983, "learning_rate": 0.0001980880707820604, "loss": 1.1189, "step": 100 }, { "epoch": 0.020532628583045335, "grad_norm": 0.12270624190568924, "learning_rate": 0.00019806773110952913, "loss": 1.1821, "step": 101 }, { "epoch": 0.020735921935352713, "grad_norm": 0.1244652196764946, "learning_rate": 0.00019804739143699786, "loss": 1.2847, "step": 102 }, { "epoch": 0.020939215287660094, "grad_norm": 0.1163022369146347, "learning_rate": 0.0001980270517644666, "loss": 0.9405, "step": 103 }, { "epoch": 0.021142508639967472, "grad_norm": 0.13008280098438263, "learning_rate": 0.00019800671209193533, "loss": 1.272, "step": 104 }, { "epoch": 0.021345801992274854, "grad_norm": 0.11667025834321976, "learning_rate": 0.00019798637241940406, "loss": 1.0145, "step": 105 }, { "epoch": 0.02154909534458223, "grad_norm": 0.10680039972066879, "learning_rate": 0.00019796603274687278, "loss": 1.0929, "step": 106 }, { "epoch": 0.021752388696889613, "grad_norm": 0.12161742150783539, "learning_rate": 0.00019794569307434153, "loss": 1.0314, "step": 107 }, { "epoch": 0.02195568204919699, "grad_norm": 0.10798537731170654, "learning_rate": 0.00019792535340181023, "loss": 0.9918, "step": 108 }, { "epoch": 0.022158975401504372, "grad_norm": 0.10750683397054672, "learning_rate": 0.00019790501372927895, "loss": 1.0899, "step": 109 }, { "epoch": 0.02236226875381175, "grad_norm": 0.10349331051111221, "learning_rate": 0.00019788467405674768, "loss": 1.1011, "step": 110 }, { "epoch": 0.022565562106119128, "grad_norm": 0.12401413917541504, "learning_rate": 0.00019786433438421643, "loss": 1.1993, "step": 111 }, { "epoch": 0.02276885545842651, "grad_norm": 0.12651224434375763, "learning_rate": 0.00019784399471168515, "loss": 1.1643, "step": 112 }, { "epoch": 0.022972148810733888, "grad_norm": 0.12746116518974304, "learning_rate": 0.00019782365503915388, "loss": 1.2093, "step": 113 }, { "epoch": 0.02317544216304127, "grad_norm": 0.12016987800598145, "learning_rate": 0.0001978033153666226, "loss": 1.0743, "step": 114 }, { "epoch": 0.023378735515348647, "grad_norm": 0.11947723478078842, "learning_rate": 0.00019778297569409136, "loss": 1.0899, "step": 115 }, { "epoch": 0.02358202886765603, "grad_norm": 0.13821756839752197, "learning_rate": 0.00019776263602156005, "loss": 1.2247, "step": 116 }, { "epoch": 0.023785322219963406, "grad_norm": 0.1487802267074585, "learning_rate": 0.00019774229634902878, "loss": 1.5157, "step": 117 }, { "epoch": 0.023988615572270788, "grad_norm": 0.11825321614742279, "learning_rate": 0.0001977219566764975, "loss": 1.0966, "step": 118 }, { "epoch": 0.024191908924578166, "grad_norm": 0.11644168198108673, "learning_rate": 0.00019770161700396625, "loss": 1.057, "step": 119 }, { "epoch": 0.024395202276885547, "grad_norm": 0.12633183598518372, "learning_rate": 0.00019768127733143498, "loss": 1.039, "step": 120 }, { "epoch": 0.024598495629192925, "grad_norm": 0.11956316977739334, "learning_rate": 0.0001976609376589037, "loss": 1.0974, "step": 121 }, { "epoch": 0.024801788981500306, "grad_norm": 0.11445662379264832, "learning_rate": 0.00019764059798637243, "loss": 1.0289, "step": 122 }, { "epoch": 0.025005082333807684, "grad_norm": 0.11500035226345062, "learning_rate": 0.00019762025831384118, "loss": 1.0781, "step": 123 }, { "epoch": 0.025208375686115066, "grad_norm": 0.11663355678319931, "learning_rate": 0.0001975999186413099, "loss": 1.0258, "step": 124 }, { "epoch": 0.025411669038422444, "grad_norm": 0.13630478084087372, "learning_rate": 0.0001975795789687786, "loss": 1.1725, "step": 125 }, { "epoch": 0.02561496239072982, "grad_norm": 0.09818248450756073, "learning_rate": 0.00019755923929624732, "loss": 0.8459, "step": 126 }, { "epoch": 0.025818255743037203, "grad_norm": 0.12124455720186234, "learning_rate": 0.00019753889962371608, "loss": 1.0427, "step": 127 }, { "epoch": 0.02602154909534458, "grad_norm": 0.12221626192331314, "learning_rate": 0.0001975185599511848, "loss": 1.0463, "step": 128 }, { "epoch": 0.026224842447651962, "grad_norm": 0.12191324681043625, "learning_rate": 0.00019749822027865352, "loss": 1.0816, "step": 129 }, { "epoch": 0.02642813579995934, "grad_norm": 0.12219464778900146, "learning_rate": 0.00019747788060612225, "loss": 1.0725, "step": 130 }, { "epoch": 0.02663142915226672, "grad_norm": 0.11998015642166138, "learning_rate": 0.00019745754093359097, "loss": 1.1146, "step": 131 }, { "epoch": 0.0268347225045741, "grad_norm": 0.1426505446434021, "learning_rate": 0.00019743720126105973, "loss": 1.0739, "step": 132 }, { "epoch": 0.02703801585688148, "grad_norm": 0.11330442875623703, "learning_rate": 0.00019741686158852842, "loss": 1.0534, "step": 133 }, { "epoch": 0.02724130920918886, "grad_norm": 0.11867399513721466, "learning_rate": 0.00019739652191599715, "loss": 1.0519, "step": 134 }, { "epoch": 0.02744460256149624, "grad_norm": 0.11425293982028961, "learning_rate": 0.00019737618224346587, "loss": 1.1641, "step": 135 }, { "epoch": 0.02764789591380362, "grad_norm": 0.12021425366401672, "learning_rate": 0.00019735584257093462, "loss": 0.8639, "step": 136 }, { "epoch": 0.027851189266111, "grad_norm": 0.13119390606880188, "learning_rate": 0.00019733550289840335, "loss": 1.1486, "step": 137 }, { "epoch": 0.028054482618418378, "grad_norm": 0.12350285053253174, "learning_rate": 0.00019731516322587207, "loss": 1.2504, "step": 138 }, { "epoch": 0.028257775970725756, "grad_norm": 0.11073335260152817, "learning_rate": 0.0001972948235533408, "loss": 1.0745, "step": 139 }, { "epoch": 0.028461069323033137, "grad_norm": 0.1184212937951088, "learning_rate": 0.00019727448388080955, "loss": 1.2013, "step": 140 }, { "epoch": 0.028664362675340515, "grad_norm": 0.11723863333463669, "learning_rate": 0.00019725414420827825, "loss": 1.0349, "step": 141 }, { "epoch": 0.028867656027647896, "grad_norm": 0.12323645502328873, "learning_rate": 0.00019723380453574697, "loss": 1.2585, "step": 142 }, { "epoch": 0.029070949379955274, "grad_norm": 0.12688735127449036, "learning_rate": 0.0001972134648632157, "loss": 1.2059, "step": 143 }, { "epoch": 0.029274242732262656, "grad_norm": 0.11595512181520462, "learning_rate": 0.00019719312519068445, "loss": 1.1894, "step": 144 }, { "epoch": 0.029477536084570034, "grad_norm": 0.11859143525362015, "learning_rate": 0.00019717278551815317, "loss": 1.0111, "step": 145 }, { "epoch": 0.029680829436877415, "grad_norm": 0.13400156795978546, "learning_rate": 0.0001971524458456219, "loss": 1.1861, "step": 146 }, { "epoch": 0.029884122789184793, "grad_norm": 0.12621331214904785, "learning_rate": 0.00019713210617309062, "loss": 1.1099, "step": 147 }, { "epoch": 0.030087416141492174, "grad_norm": 0.11988019198179245, "learning_rate": 0.00019711176650055937, "loss": 1.0571, "step": 148 }, { "epoch": 0.030290709493799552, "grad_norm": 0.11711090058088303, "learning_rate": 0.00019709142682802807, "loss": 1.0591, "step": 149 }, { "epoch": 0.030494002846106934, "grad_norm": 0.12458360195159912, "learning_rate": 0.0001970710871554968, "loss": 1.1863, "step": 150 }, { "epoch": 0.03069729619841431, "grad_norm": 0.11751321703195572, "learning_rate": 0.00019705074748296552, "loss": 1.0735, "step": 151 }, { "epoch": 0.030900589550721693, "grad_norm": 0.13113288581371307, "learning_rate": 0.00019703040781043427, "loss": 1.2103, "step": 152 }, { "epoch": 0.03110388290302907, "grad_norm": 0.13107489049434662, "learning_rate": 0.000197010068137903, "loss": 1.17, "step": 153 }, { "epoch": 0.03130717625533645, "grad_norm": 0.12383049726486206, "learning_rate": 0.00019698972846537172, "loss": 1.0346, "step": 154 }, { "epoch": 0.03151046960764383, "grad_norm": 0.11656415462493896, "learning_rate": 0.00019696938879284044, "loss": 1.1545, "step": 155 }, { "epoch": 0.03171376295995121, "grad_norm": 0.12201374024152756, "learning_rate": 0.0001969490491203092, "loss": 0.9697, "step": 156 }, { "epoch": 0.031917056312258586, "grad_norm": 0.12756960093975067, "learning_rate": 0.0001969287094477779, "loss": 1.1668, "step": 157 }, { "epoch": 0.03212034966456597, "grad_norm": 0.13866621255874634, "learning_rate": 0.00019690836977524662, "loss": 1.2137, "step": 158 }, { "epoch": 0.03232364301687335, "grad_norm": 0.1329393833875656, "learning_rate": 0.00019688803010271534, "loss": 1.1864, "step": 159 }, { "epoch": 0.03252693636918073, "grad_norm": 0.14017806947231293, "learning_rate": 0.0001968676904301841, "loss": 1.2564, "step": 160 }, { "epoch": 0.032730229721488105, "grad_norm": 0.13004827499389648, "learning_rate": 0.00019684735075765282, "loss": 1.1301, "step": 161 }, { "epoch": 0.03293352307379549, "grad_norm": 0.11768215149641037, "learning_rate": 0.00019682701108512154, "loss": 1.0638, "step": 162 }, { "epoch": 0.03313681642610287, "grad_norm": 0.12334595620632172, "learning_rate": 0.00019680667141259026, "loss": 1.277, "step": 163 }, { "epoch": 0.033340109778410246, "grad_norm": 0.12338969856500626, "learning_rate": 0.00019678633174005902, "loss": 0.9211, "step": 164 }, { "epoch": 0.033543403130717624, "grad_norm": 0.10570957511663437, "learning_rate": 0.00019676599206752771, "loss": 1.0919, "step": 165 }, { "epoch": 0.03374669648302501, "grad_norm": 0.1223050057888031, "learning_rate": 0.00019674565239499644, "loss": 1.124, "step": 166 }, { "epoch": 0.033949989835332386, "grad_norm": 0.12787429988384247, "learning_rate": 0.00019672531272246516, "loss": 1.1717, "step": 167 }, { "epoch": 0.034153283187639764, "grad_norm": 0.10991297662258148, "learning_rate": 0.00019670497304993391, "loss": 0.9493, "step": 168 }, { "epoch": 0.03435657653994714, "grad_norm": 0.11774353682994843, "learning_rate": 0.00019668463337740264, "loss": 1.1527, "step": 169 }, { "epoch": 0.03455986989225452, "grad_norm": 0.12775689363479614, "learning_rate": 0.00019666429370487136, "loss": 1.389, "step": 170 }, { "epoch": 0.034763163244561905, "grad_norm": 0.12656515836715698, "learning_rate": 0.0001966439540323401, "loss": 1.182, "step": 171 }, { "epoch": 0.03496645659686928, "grad_norm": 0.11234056204557419, "learning_rate": 0.0001966236143598088, "loss": 0.8927, "step": 172 }, { "epoch": 0.03516974994917666, "grad_norm": 0.12165993452072144, "learning_rate": 0.00019660327468727754, "loss": 1.199, "step": 173 }, { "epoch": 0.03537304330148404, "grad_norm": 0.13241636753082275, "learning_rate": 0.00019658293501474626, "loss": 1.1245, "step": 174 }, { "epoch": 0.035576336653791424, "grad_norm": 0.12375210225582123, "learning_rate": 0.00019656259534221499, "loss": 1.0997, "step": 175 }, { "epoch": 0.0357796300060988, "grad_norm": 0.10730253159999847, "learning_rate": 0.0001965422556696837, "loss": 1.0075, "step": 176 }, { "epoch": 0.03598292335840618, "grad_norm": 0.11313315480947495, "learning_rate": 0.00019652191599715246, "loss": 1.003, "step": 177 }, { "epoch": 0.03618621671071356, "grad_norm": 0.11057975888252258, "learning_rate": 0.00019650157632462119, "loss": 1.1361, "step": 178 }, { "epoch": 0.03638951006302094, "grad_norm": 0.13612353801727295, "learning_rate": 0.0001964812366520899, "loss": 1.091, "step": 179 }, { "epoch": 0.03659280341532832, "grad_norm": 0.10917545855045319, "learning_rate": 0.00019646089697955864, "loss": 1.0915, "step": 180 }, { "epoch": 0.0367960967676357, "grad_norm": 0.11874423176050186, "learning_rate": 0.00019644055730702736, "loss": 1.1054, "step": 181 }, { "epoch": 0.036999390119943076, "grad_norm": 0.11719070374965668, "learning_rate": 0.00019642021763449608, "loss": 1.0794, "step": 182 }, { "epoch": 0.037202683472250454, "grad_norm": 0.11768540740013123, "learning_rate": 0.0001963998779619648, "loss": 1.1453, "step": 183 }, { "epoch": 0.03740597682455784, "grad_norm": 0.12951141595840454, "learning_rate": 0.00019637953828943353, "loss": 1.1443, "step": 184 }, { "epoch": 0.03760927017686522, "grad_norm": 0.12669187784194946, "learning_rate": 0.00019635919861690228, "loss": 1.121, "step": 185 }, { "epoch": 0.037812563529172595, "grad_norm": 0.13488180935382843, "learning_rate": 0.000196338858944371, "loss": 1.2476, "step": 186 }, { "epoch": 0.03801585688147997, "grad_norm": 0.1352519690990448, "learning_rate": 0.00019631851927183973, "loss": 1.2076, "step": 187 }, { "epoch": 0.03821915023378736, "grad_norm": 0.11772511899471283, "learning_rate": 0.00019629817959930846, "loss": 1.0063, "step": 188 }, { "epoch": 0.038422443586094736, "grad_norm": 0.12861546874046326, "learning_rate": 0.0001962778399267772, "loss": 1.1495, "step": 189 }, { "epoch": 0.038625736938402114, "grad_norm": 0.1372981071472168, "learning_rate": 0.0001962575002542459, "loss": 1.1458, "step": 190 }, { "epoch": 0.03882903029070949, "grad_norm": 0.11377538740634918, "learning_rate": 0.00019623716058171463, "loss": 0.9751, "step": 191 }, { "epoch": 0.03903232364301688, "grad_norm": 0.11588437110185623, "learning_rate": 0.00019621682090918336, "loss": 1.0187, "step": 192 }, { "epoch": 0.039235616995324255, "grad_norm": 0.12083633244037628, "learning_rate": 0.0001961964812366521, "loss": 1.0916, "step": 193 }, { "epoch": 0.03943891034763163, "grad_norm": 0.12773993611335754, "learning_rate": 0.00019617614156412083, "loss": 1.2734, "step": 194 }, { "epoch": 0.03964220369993901, "grad_norm": 0.11607804894447327, "learning_rate": 0.00019615580189158956, "loss": 1.0007, "step": 195 }, { "epoch": 0.03984549705224639, "grad_norm": 0.12700581550598145, "learning_rate": 0.00019613546221905828, "loss": 1.1465, "step": 196 }, { "epoch": 0.04004879040455377, "grad_norm": 0.12830078601837158, "learning_rate": 0.00019611512254652703, "loss": 1.056, "step": 197 }, { "epoch": 0.04025208375686115, "grad_norm": 0.12503017485141754, "learning_rate": 0.00019609478287399573, "loss": 1.0332, "step": 198 }, { "epoch": 0.04045537710916853, "grad_norm": 0.13521379232406616, "learning_rate": 0.00019607444320146445, "loss": 1.209, "step": 199 }, { "epoch": 0.04065867046147591, "grad_norm": 0.12014853954315186, "learning_rate": 0.00019605410352893318, "loss": 1.0632, "step": 200 }, { "epoch": 0.04086196381378329, "grad_norm": 0.14510953426361084, "learning_rate": 0.00019603376385640193, "loss": 1.2434, "step": 201 }, { "epoch": 0.04106525716609067, "grad_norm": 0.12610237300395966, "learning_rate": 0.00019601342418387065, "loss": 1.1765, "step": 202 }, { "epoch": 0.04126855051839805, "grad_norm": 0.12680204212665558, "learning_rate": 0.00019599308451133938, "loss": 1.1754, "step": 203 }, { "epoch": 0.041471843870705426, "grad_norm": 0.1301220804452896, "learning_rate": 0.0001959727448388081, "loss": 1.2629, "step": 204 }, { "epoch": 0.04167513722301281, "grad_norm": 0.11219633370637894, "learning_rate": 0.00019595240516627685, "loss": 1.0403, "step": 205 }, { "epoch": 0.04187843057532019, "grad_norm": 0.11882266402244568, "learning_rate": 0.00019593206549374555, "loss": 0.9514, "step": 206 }, { "epoch": 0.042081723927627566, "grad_norm": 0.11426525563001633, "learning_rate": 0.00019591172582121428, "loss": 1.163, "step": 207 }, { "epoch": 0.042285017279934944, "grad_norm": 0.12243502587080002, "learning_rate": 0.000195891386148683, "loss": 1.166, "step": 208 }, { "epoch": 0.04248831063224232, "grad_norm": 0.12537699937820435, "learning_rate": 0.00019587104647615175, "loss": 1.1335, "step": 209 }, { "epoch": 0.04269160398454971, "grad_norm": 0.12613898515701294, "learning_rate": 0.00019585070680362048, "loss": 1.2059, "step": 210 }, { "epoch": 0.042894897336857085, "grad_norm": 0.11983931809663773, "learning_rate": 0.0001958303671310892, "loss": 1.0849, "step": 211 }, { "epoch": 0.04309819068916446, "grad_norm": 0.12225540727376938, "learning_rate": 0.00019581002745855793, "loss": 1.1581, "step": 212 }, { "epoch": 0.04330148404147184, "grad_norm": 0.12691259384155273, "learning_rate": 0.00019578968778602668, "loss": 1.2828, "step": 213 }, { "epoch": 0.043504777393779226, "grad_norm": 0.11088678985834122, "learning_rate": 0.00019576934811349538, "loss": 1.0987, "step": 214 }, { "epoch": 0.043708070746086604, "grad_norm": 0.14477139711380005, "learning_rate": 0.0001957490084409641, "loss": 1.3049, "step": 215 }, { "epoch": 0.04391136409839398, "grad_norm": 0.12409314513206482, "learning_rate": 0.00019572866876843282, "loss": 1.1617, "step": 216 }, { "epoch": 0.04411465745070136, "grad_norm": 0.12076637893915176, "learning_rate": 0.00019570832909590155, "loss": 1.1163, "step": 217 }, { "epoch": 0.044317950803008745, "grad_norm": 0.1195930689573288, "learning_rate": 0.0001956879894233703, "loss": 1.3286, "step": 218 }, { "epoch": 0.04452124415531612, "grad_norm": 0.11751043051481247, "learning_rate": 0.00019566764975083902, "loss": 1.0493, "step": 219 }, { "epoch": 0.0447245375076235, "grad_norm": 0.12073373049497604, "learning_rate": 0.00019564731007830775, "loss": 1.088, "step": 220 }, { "epoch": 0.04492783085993088, "grad_norm": 0.11219310760498047, "learning_rate": 0.00019562697040577647, "loss": 1.0455, "step": 221 }, { "epoch": 0.045131124212238256, "grad_norm": 0.11247701942920685, "learning_rate": 0.0001956066307332452, "loss": 1.0703, "step": 222 }, { "epoch": 0.04533441756454564, "grad_norm": 0.11884698271751404, "learning_rate": 0.00019558629106071392, "loss": 1.2793, "step": 223 }, { "epoch": 0.04553771091685302, "grad_norm": 0.12219983339309692, "learning_rate": 0.00019556595138818265, "loss": 1.1819, "step": 224 }, { "epoch": 0.0457410042691604, "grad_norm": 0.1017618402838707, "learning_rate": 0.00019554561171565137, "loss": 0.9348, "step": 225 }, { "epoch": 0.045944297621467775, "grad_norm": 0.11600673943758011, "learning_rate": 0.00019552527204312012, "loss": 1.0054, "step": 226 }, { "epoch": 0.04614759097377516, "grad_norm": 0.13273292779922485, "learning_rate": 0.00019550493237058885, "loss": 1.2805, "step": 227 }, { "epoch": 0.04635088432608254, "grad_norm": 0.11936032027006149, "learning_rate": 0.00019548459269805757, "loss": 1.1255, "step": 228 }, { "epoch": 0.046554177678389916, "grad_norm": 0.12319690734148026, "learning_rate": 0.0001954642530255263, "loss": 1.3222, "step": 229 }, { "epoch": 0.046757471030697294, "grad_norm": 0.11699585616588593, "learning_rate": 0.00019544391335299502, "loss": 1.1299, "step": 230 }, { "epoch": 0.04696076438300468, "grad_norm": 0.11112070828676224, "learning_rate": 0.00019542357368046375, "loss": 1.1836, "step": 231 }, { "epoch": 0.04716405773531206, "grad_norm": 0.13511928915977478, "learning_rate": 0.00019540323400793247, "loss": 1.1319, "step": 232 }, { "epoch": 0.047367351087619435, "grad_norm": 0.12584780156612396, "learning_rate": 0.0001953828943354012, "loss": 1.0862, "step": 233 }, { "epoch": 0.04757064443992681, "grad_norm": 0.12041206657886505, "learning_rate": 0.00019536255466286995, "loss": 1.0866, "step": 234 }, { "epoch": 0.0477739377922342, "grad_norm": 0.1117459088563919, "learning_rate": 0.00019534221499033867, "loss": 1.0334, "step": 235 }, { "epoch": 0.047977231144541575, "grad_norm": 0.11388564109802246, "learning_rate": 0.0001953218753178074, "loss": 1.0335, "step": 236 }, { "epoch": 0.04818052449684895, "grad_norm": 0.11740144342184067, "learning_rate": 0.00019530153564527612, "loss": 1.0301, "step": 237 }, { "epoch": 0.04838381784915633, "grad_norm": 0.1059211865067482, "learning_rate": 0.00019528119597274484, "loss": 0.933, "step": 238 }, { "epoch": 0.04858711120146371, "grad_norm": 0.12493643909692764, "learning_rate": 0.00019526085630021357, "loss": 1.129, "step": 239 }, { "epoch": 0.048790404553771094, "grad_norm": 0.11791351437568665, "learning_rate": 0.0001952405166276823, "loss": 1.0817, "step": 240 }, { "epoch": 0.04899369790607847, "grad_norm": 0.12103426456451416, "learning_rate": 0.00019522017695515102, "loss": 1.0588, "step": 241 }, { "epoch": 0.04919699125838585, "grad_norm": 0.12383697926998138, "learning_rate": 0.00019519983728261977, "loss": 1.1269, "step": 242 }, { "epoch": 0.04940028461069323, "grad_norm": 0.10310048609972, "learning_rate": 0.0001951794976100885, "loss": 0.9393, "step": 243 }, { "epoch": 0.04960357796300061, "grad_norm": 0.11793255805969238, "learning_rate": 0.00019515915793755722, "loss": 1.1511, "step": 244 }, { "epoch": 0.04980687131530799, "grad_norm": 0.12708383798599243, "learning_rate": 0.00019513881826502594, "loss": 1.1525, "step": 245 }, { "epoch": 0.05001016466761537, "grad_norm": 0.13343508541584015, "learning_rate": 0.0001951184785924947, "loss": 1.2057, "step": 246 }, { "epoch": 0.050213458019922746, "grad_norm": 0.12891672551631927, "learning_rate": 0.0001950981389199634, "loss": 1.1788, "step": 247 }, { "epoch": 0.05041675137223013, "grad_norm": 0.11919089406728745, "learning_rate": 0.00019507779924743212, "loss": 0.8968, "step": 248 }, { "epoch": 0.05062004472453751, "grad_norm": 0.11912382394075394, "learning_rate": 0.00019505745957490084, "loss": 1.0667, "step": 249 }, { "epoch": 0.05082333807684489, "grad_norm": 0.12510718405246735, "learning_rate": 0.0001950371199023696, "loss": 0.9301, "step": 250 }, { "epoch": 0.051026631429152265, "grad_norm": 0.13244077563285828, "learning_rate": 0.00019501678022983832, "loss": 1.1302, "step": 251 }, { "epoch": 0.05122992478145964, "grad_norm": 0.11624693870544434, "learning_rate": 0.00019499644055730704, "loss": 1.1317, "step": 252 }, { "epoch": 0.05143321813376703, "grad_norm": 0.11178990453481674, "learning_rate": 0.00019497610088477576, "loss": 1.0696, "step": 253 }, { "epoch": 0.051636511486074406, "grad_norm": 0.12613075971603394, "learning_rate": 0.00019495576121224452, "loss": 1.2195, "step": 254 }, { "epoch": 0.051839804838381784, "grad_norm": 0.13160696625709534, "learning_rate": 0.00019493542153971321, "loss": 1.0333, "step": 255 }, { "epoch": 0.05204309819068916, "grad_norm": 0.11390336602926254, "learning_rate": 0.00019491508186718194, "loss": 0.9062, "step": 256 }, { "epoch": 0.05224639154299655, "grad_norm": 0.12177371233701706, "learning_rate": 0.00019489474219465066, "loss": 1.1687, "step": 257 }, { "epoch": 0.052449684895303925, "grad_norm": 0.12821920216083527, "learning_rate": 0.0001948744025221194, "loss": 1.0509, "step": 258 }, { "epoch": 0.0526529782476113, "grad_norm": 0.11554522067308426, "learning_rate": 0.00019485406284958814, "loss": 0.9503, "step": 259 }, { "epoch": 0.05285627159991868, "grad_norm": 0.1144140213727951, "learning_rate": 0.00019483372317705686, "loss": 1.0419, "step": 260 }, { "epoch": 0.053059564952226065, "grad_norm": 0.12091881781816483, "learning_rate": 0.0001948133835045256, "loss": 1.1362, "step": 261 }, { "epoch": 0.05326285830453344, "grad_norm": 0.1322740912437439, "learning_rate": 0.0001947930438319943, "loss": 1.2769, "step": 262 }, { "epoch": 0.05346615165684082, "grad_norm": 0.12368176877498627, "learning_rate": 0.00019477270415946304, "loss": 1.1871, "step": 263 }, { "epoch": 0.0536694450091482, "grad_norm": 0.11115586012601852, "learning_rate": 0.00019475236448693176, "loss": 1.0479, "step": 264 }, { "epoch": 0.05387273836145558, "grad_norm": 0.1282634437084198, "learning_rate": 0.00019473202481440049, "loss": 1.038, "step": 265 }, { "epoch": 0.05407603171376296, "grad_norm": 0.11252263188362122, "learning_rate": 0.0001947116851418692, "loss": 1.1254, "step": 266 }, { "epoch": 0.05427932506607034, "grad_norm": 0.10750589519739151, "learning_rate": 0.00019469134546933796, "loss": 0.8887, "step": 267 }, { "epoch": 0.05448261841837772, "grad_norm": 0.1257811188697815, "learning_rate": 0.00019467100579680669, "loss": 1.3177, "step": 268 }, { "epoch": 0.054685911770685096, "grad_norm": 0.13415637612342834, "learning_rate": 0.0001946506661242754, "loss": 1.2538, "step": 269 }, { "epoch": 0.05488920512299248, "grad_norm": 0.11637566983699799, "learning_rate": 0.00019463032645174413, "loss": 1.0536, "step": 270 }, { "epoch": 0.05509249847529986, "grad_norm": 0.12544845044612885, "learning_rate": 0.00019460998677921286, "loss": 1.2673, "step": 271 }, { "epoch": 0.05529579182760724, "grad_norm": 0.13013462722301483, "learning_rate": 0.00019458964710668158, "loss": 1.1977, "step": 272 }, { "epoch": 0.055499085179914615, "grad_norm": 0.13211217522621155, "learning_rate": 0.0001945693074341503, "loss": 1.1317, "step": 273 }, { "epoch": 0.055702378532222, "grad_norm": 0.13389961421489716, "learning_rate": 0.00019454896776161903, "loss": 1.1075, "step": 274 }, { "epoch": 0.05590567188452938, "grad_norm": 0.12008912861347198, "learning_rate": 0.00019452862808908778, "loss": 0.9692, "step": 275 }, { "epoch": 0.056108965236836755, "grad_norm": 0.1291409730911255, "learning_rate": 0.0001945082884165565, "loss": 1.2363, "step": 276 }, { "epoch": 0.05631225858914413, "grad_norm": 0.12915107607841492, "learning_rate": 0.00019448794874402523, "loss": 1.3091, "step": 277 }, { "epoch": 0.05651555194145151, "grad_norm": 0.11216573417186737, "learning_rate": 0.00019446760907149396, "loss": 1.0992, "step": 278 }, { "epoch": 0.056718845293758896, "grad_norm": 0.10683475434780121, "learning_rate": 0.00019444726939896268, "loss": 0.9928, "step": 279 }, { "epoch": 0.056922138646066274, "grad_norm": 0.12699760496616364, "learning_rate": 0.0001944269297264314, "loss": 1.1559, "step": 280 }, { "epoch": 0.05712543199837365, "grad_norm": 0.1270214468240738, "learning_rate": 0.00019440659005390013, "loss": 0.9364, "step": 281 }, { "epoch": 0.05732872535068103, "grad_norm": 0.13140781223773956, "learning_rate": 0.00019438625038136886, "loss": 1.1587, "step": 282 }, { "epoch": 0.057532018702988415, "grad_norm": 0.127557173371315, "learning_rate": 0.0001943659107088376, "loss": 1.0203, "step": 283 }, { "epoch": 0.05773531205529579, "grad_norm": 0.13735321164131165, "learning_rate": 0.00019434557103630633, "loss": 1.3412, "step": 284 }, { "epoch": 0.05793860540760317, "grad_norm": 0.11763381958007812, "learning_rate": 0.00019432523136377506, "loss": 1.1305, "step": 285 }, { "epoch": 0.05814189875991055, "grad_norm": 0.1292058527469635, "learning_rate": 0.00019430489169124378, "loss": 1.227, "step": 286 }, { "epoch": 0.05834519211221793, "grad_norm": 0.1357347071170807, "learning_rate": 0.0001942845520187125, "loss": 1.0898, "step": 287 }, { "epoch": 0.05854848546452531, "grad_norm": 0.13546323776245117, "learning_rate": 0.00019426421234618123, "loss": 1.1308, "step": 288 }, { "epoch": 0.05875177881683269, "grad_norm": 0.12612831592559814, "learning_rate": 0.00019424387267364995, "loss": 1.1161, "step": 289 }, { "epoch": 0.05895507216914007, "grad_norm": 0.12061580270528793, "learning_rate": 0.00019422353300111868, "loss": 0.9395, "step": 290 }, { "epoch": 0.059158365521447445, "grad_norm": 0.12118272483348846, "learning_rate": 0.00019420319332858743, "loss": 1.0597, "step": 291 }, { "epoch": 0.05936165887375483, "grad_norm": 0.11357955634593964, "learning_rate": 0.00019418285365605615, "loss": 1.1651, "step": 292 }, { "epoch": 0.05956495222606221, "grad_norm": 0.11546896398067474, "learning_rate": 0.00019416251398352488, "loss": 0.9093, "step": 293 }, { "epoch": 0.059768245578369586, "grad_norm": 0.12699609994888306, "learning_rate": 0.0001941421743109936, "loss": 1.1878, "step": 294 }, { "epoch": 0.059971538930676964, "grad_norm": 0.11789494752883911, "learning_rate": 0.00019412183463846233, "loss": 1.0162, "step": 295 }, { "epoch": 0.06017483228298435, "grad_norm": 0.11362869292497635, "learning_rate": 0.00019410149496593105, "loss": 1.0056, "step": 296 }, { "epoch": 0.06037812563529173, "grad_norm": 0.125663161277771, "learning_rate": 0.00019408115529339978, "loss": 1.083, "step": 297 }, { "epoch": 0.060581418987599105, "grad_norm": 0.11303743720054626, "learning_rate": 0.0001940608156208685, "loss": 1.1573, "step": 298 }, { "epoch": 0.06078471233990648, "grad_norm": 0.11955615878105164, "learning_rate": 0.00019404047594833723, "loss": 0.9637, "step": 299 }, { "epoch": 0.06098800569221387, "grad_norm": 0.11959411948919296, "learning_rate": 0.00019402013627580598, "loss": 1.1023, "step": 300 }, { "epoch": 0.061191299044521245, "grad_norm": 0.1248716339468956, "learning_rate": 0.0001939997966032747, "loss": 1.2881, "step": 301 }, { "epoch": 0.06139459239682862, "grad_norm": 0.1136515811085701, "learning_rate": 0.00019397945693074343, "loss": 1.0921, "step": 302 }, { "epoch": 0.061597885749136, "grad_norm": 0.11583786457777023, "learning_rate": 0.00019395911725821215, "loss": 1.0758, "step": 303 }, { "epoch": 0.061801179101443386, "grad_norm": 0.12685681879520416, "learning_rate": 0.00019393877758568087, "loss": 1.2444, "step": 304 }, { "epoch": 0.062004472453750764, "grad_norm": 0.15549907088279724, "learning_rate": 0.0001939184379131496, "loss": 1.4011, "step": 305 }, { "epoch": 0.06220776580605814, "grad_norm": 0.11548073589801788, "learning_rate": 0.00019389809824061832, "loss": 1.0536, "step": 306 }, { "epoch": 0.06241105915836552, "grad_norm": 0.11526035517454147, "learning_rate": 0.00019387775856808705, "loss": 0.9051, "step": 307 }, { "epoch": 0.0626143525106729, "grad_norm": 0.10682015866041183, "learning_rate": 0.0001938574188955558, "loss": 0.9744, "step": 308 }, { "epoch": 0.06281764586298028, "grad_norm": 0.11594579368829727, "learning_rate": 0.00019383707922302452, "loss": 1.0771, "step": 309 }, { "epoch": 0.06302093921528766, "grad_norm": 0.11397954076528549, "learning_rate": 0.00019381673955049325, "loss": 0.9991, "step": 310 }, { "epoch": 0.06322423256759505, "grad_norm": 0.12746506929397583, "learning_rate": 0.00019379639987796197, "loss": 1.1141, "step": 311 }, { "epoch": 0.06342752591990242, "grad_norm": 0.11370940506458282, "learning_rate": 0.0001937760602054307, "loss": 0.9784, "step": 312 }, { "epoch": 0.0636308192722098, "grad_norm": 0.11094705015420914, "learning_rate": 0.00019375572053289942, "loss": 0.9092, "step": 313 }, { "epoch": 0.06383411262451717, "grad_norm": 0.12067949026823044, "learning_rate": 0.00019373538086036815, "loss": 1.0812, "step": 314 }, { "epoch": 0.06403740597682456, "grad_norm": 0.11797504872083664, "learning_rate": 0.00019371504118783687, "loss": 1.0659, "step": 315 }, { "epoch": 0.06424069932913194, "grad_norm": 0.10436304658651352, "learning_rate": 0.00019369470151530562, "loss": 1.0226, "step": 316 }, { "epoch": 0.06444399268143931, "grad_norm": 0.1373065710067749, "learning_rate": 0.00019367436184277435, "loss": 1.1967, "step": 317 }, { "epoch": 0.0646472860337467, "grad_norm": 0.12204968929290771, "learning_rate": 0.00019365402217024307, "loss": 1.138, "step": 318 }, { "epoch": 0.06485057938605407, "grad_norm": 0.11520784348249435, "learning_rate": 0.0001936336824977118, "loss": 1.0148, "step": 319 }, { "epoch": 0.06505387273836145, "grad_norm": 0.12380523979663849, "learning_rate": 0.00019361334282518052, "loss": 1.1577, "step": 320 }, { "epoch": 0.06525716609066884, "grad_norm": 0.12227565050125122, "learning_rate": 0.00019359300315264924, "loss": 1.0843, "step": 321 }, { "epoch": 0.06546045944297621, "grad_norm": 0.12836994230747223, "learning_rate": 0.00019357266348011797, "loss": 1.137, "step": 322 }, { "epoch": 0.0656637527952836, "grad_norm": 0.1091795489192009, "learning_rate": 0.0001935523238075867, "loss": 1.1794, "step": 323 }, { "epoch": 0.06586704614759098, "grad_norm": 0.11629168689250946, "learning_rate": 0.00019353198413505545, "loss": 1.1052, "step": 324 }, { "epoch": 0.06607033949989835, "grad_norm": 0.12525077164173126, "learning_rate": 0.00019351164446252417, "loss": 1.0891, "step": 325 }, { "epoch": 0.06627363285220574, "grad_norm": 0.12222876399755478, "learning_rate": 0.0001934913047899929, "loss": 1.2059, "step": 326 }, { "epoch": 0.0664769262045131, "grad_norm": 0.12146129459142685, "learning_rate": 0.00019347096511746162, "loss": 1.1067, "step": 327 }, { "epoch": 0.06668021955682049, "grad_norm": 0.11807144433259964, "learning_rate": 0.00019345062544493034, "loss": 1.0953, "step": 328 }, { "epoch": 0.06688351290912788, "grad_norm": 0.11966339498758316, "learning_rate": 0.00019343028577239907, "loss": 1.071, "step": 329 }, { "epoch": 0.06708680626143525, "grad_norm": 0.1203102245926857, "learning_rate": 0.0001934099460998678, "loss": 1.0197, "step": 330 }, { "epoch": 0.06729009961374263, "grad_norm": 0.1138140857219696, "learning_rate": 0.00019338960642733652, "loss": 0.9191, "step": 331 }, { "epoch": 0.06749339296605002, "grad_norm": 0.12846186757087708, "learning_rate": 0.00019336926675480527, "loss": 1.0951, "step": 332 }, { "epoch": 0.06769668631835739, "grad_norm": 0.12961986660957336, "learning_rate": 0.000193348927082274, "loss": 1.1309, "step": 333 }, { "epoch": 0.06789997967066477, "grad_norm": 0.12339945137500763, "learning_rate": 0.00019332858740974272, "loss": 1.0363, "step": 334 }, { "epoch": 0.06810327302297214, "grad_norm": 0.13385798037052155, "learning_rate": 0.00019330824773721144, "loss": 1.114, "step": 335 }, { "epoch": 0.06830656637527953, "grad_norm": 0.13270089030265808, "learning_rate": 0.00019328790806468017, "loss": 1.2388, "step": 336 }, { "epoch": 0.06850985972758691, "grad_norm": 0.11112480610609055, "learning_rate": 0.0001932675683921489, "loss": 1.1178, "step": 337 }, { "epoch": 0.06871315307989428, "grad_norm": 0.12246957421302795, "learning_rate": 0.00019324722871961761, "loss": 1.233, "step": 338 }, { "epoch": 0.06891644643220167, "grad_norm": 0.12208685278892517, "learning_rate": 0.00019322688904708634, "loss": 1.1145, "step": 339 }, { "epoch": 0.06911973978450904, "grad_norm": 0.11839979141950607, "learning_rate": 0.00019320654937455506, "loss": 0.986, "step": 340 }, { "epoch": 0.06932303313681643, "grad_norm": 0.13268662989139557, "learning_rate": 0.00019318620970202382, "loss": 1.0527, "step": 341 }, { "epoch": 0.06952632648912381, "grad_norm": 0.11831391602754593, "learning_rate": 0.00019316587002949254, "loss": 1.2318, "step": 342 }, { "epoch": 0.06972961984143118, "grad_norm": 0.11892188340425491, "learning_rate": 0.00019314553035696126, "loss": 1.2, "step": 343 }, { "epoch": 0.06993291319373857, "grad_norm": 0.13015909492969513, "learning_rate": 0.00019312519068443, "loss": 1.1622, "step": 344 }, { "epoch": 0.07013620654604595, "grad_norm": 0.10422676056623459, "learning_rate": 0.0001931048510118987, "loss": 0.9258, "step": 345 }, { "epoch": 0.07033949989835332, "grad_norm": 0.10162926465272903, "learning_rate": 0.00019308451133936744, "loss": 0.9709, "step": 346 }, { "epoch": 0.0705427932506607, "grad_norm": 0.12753081321716309, "learning_rate": 0.00019306417166683616, "loss": 1.1193, "step": 347 }, { "epoch": 0.07074608660296808, "grad_norm": 0.12309850752353668, "learning_rate": 0.0001930438319943049, "loss": 1.2146, "step": 348 }, { "epoch": 0.07094937995527546, "grad_norm": 0.13199441134929657, "learning_rate": 0.00019302349232177364, "loss": 1.1723, "step": 349 }, { "epoch": 0.07115267330758285, "grad_norm": 0.12041430175304413, "learning_rate": 0.00019300315264924236, "loss": 1.1115, "step": 350 }, { "epoch": 0.07135596665989022, "grad_norm": 0.11456899344921112, "learning_rate": 0.0001929828129767111, "loss": 1.0829, "step": 351 }, { "epoch": 0.0715592600121976, "grad_norm": 0.12147854268550873, "learning_rate": 0.0001929624733041798, "loss": 1.1963, "step": 352 }, { "epoch": 0.07176255336450497, "grad_norm": 0.13312789797782898, "learning_rate": 0.00019294213363164854, "loss": 1.1182, "step": 353 }, { "epoch": 0.07196584671681236, "grad_norm": 0.1078067272901535, "learning_rate": 0.00019292179395911726, "loss": 0.9415, "step": 354 }, { "epoch": 0.07216914006911974, "grad_norm": 0.1231444925069809, "learning_rate": 0.00019290145428658598, "loss": 1.2428, "step": 355 }, { "epoch": 0.07237243342142712, "grad_norm": 0.13848941028118134, "learning_rate": 0.0001928811146140547, "loss": 1.1953, "step": 356 }, { "epoch": 0.0725757267737345, "grad_norm": 0.11954299360513687, "learning_rate": 0.00019286077494152346, "loss": 0.9834, "step": 357 }, { "epoch": 0.07277902012604189, "grad_norm": 0.11029402166604996, "learning_rate": 0.00019284043526899219, "loss": 1.0635, "step": 358 }, { "epoch": 0.07298231347834926, "grad_norm": 0.11941875517368317, "learning_rate": 0.0001928200955964609, "loss": 1.1698, "step": 359 }, { "epoch": 0.07318560683065664, "grad_norm": 0.11633221805095673, "learning_rate": 0.00019279975592392963, "loss": 0.9411, "step": 360 }, { "epoch": 0.07338890018296401, "grad_norm": 0.11820893734693527, "learning_rate": 0.00019277941625139836, "loss": 1.0487, "step": 361 }, { "epoch": 0.0735921935352714, "grad_norm": 0.14069049060344696, "learning_rate": 0.00019275907657886708, "loss": 1.2295, "step": 362 }, { "epoch": 0.07379548688757878, "grad_norm": 0.12828344106674194, "learning_rate": 0.0001927387369063358, "loss": 1.1904, "step": 363 }, { "epoch": 0.07399878023988615, "grad_norm": 0.12259247899055481, "learning_rate": 0.00019271839723380453, "loss": 1.0655, "step": 364 }, { "epoch": 0.07420207359219354, "grad_norm": 0.12864744663238525, "learning_rate": 0.00019269805756127328, "loss": 1.207, "step": 365 }, { "epoch": 0.07440536694450091, "grad_norm": 0.1141364574432373, "learning_rate": 0.000192677717888742, "loss": 0.9853, "step": 366 }, { "epoch": 0.0746086602968083, "grad_norm": 0.10614699870347977, "learning_rate": 0.00019265737821621073, "loss": 1.0003, "step": 367 }, { "epoch": 0.07481195364911568, "grad_norm": 0.1159566193819046, "learning_rate": 0.00019263703854367946, "loss": 1.1753, "step": 368 }, { "epoch": 0.07501524700142305, "grad_norm": 0.11285501718521118, "learning_rate": 0.00019261669887114818, "loss": 1.0592, "step": 369 }, { "epoch": 0.07521854035373043, "grad_norm": 0.11360286176204681, "learning_rate": 0.0001925963591986169, "loss": 0.9719, "step": 370 }, { "epoch": 0.07542183370603782, "grad_norm": 0.1143144741654396, "learning_rate": 0.00019257601952608563, "loss": 1.0623, "step": 371 }, { "epoch": 0.07562512705834519, "grad_norm": 0.11664289981126785, "learning_rate": 0.00019255567985355436, "loss": 0.9849, "step": 372 }, { "epoch": 0.07582842041065257, "grad_norm": 0.11677186191082001, "learning_rate": 0.0001925353401810231, "loss": 0.9926, "step": 373 }, { "epoch": 0.07603171376295995, "grad_norm": 0.12509550154209137, "learning_rate": 0.00019251500050849183, "loss": 1.1648, "step": 374 }, { "epoch": 0.07623500711526733, "grad_norm": 0.13659395277500153, "learning_rate": 0.00019249466083596056, "loss": 1.2005, "step": 375 }, { "epoch": 0.07643830046757472, "grad_norm": 0.11500003188848495, "learning_rate": 0.00019247432116342928, "loss": 1.1287, "step": 376 }, { "epoch": 0.07664159381988209, "grad_norm": 0.11376544088125229, "learning_rate": 0.000192453981490898, "loss": 1.0013, "step": 377 }, { "epoch": 0.07684488717218947, "grad_norm": 0.13335828483104706, "learning_rate": 0.00019243364181836673, "loss": 1.1969, "step": 378 }, { "epoch": 0.07704818052449684, "grad_norm": 0.1245710700750351, "learning_rate": 0.00019241330214583545, "loss": 1.2461, "step": 379 }, { "epoch": 0.07725147387680423, "grad_norm": 0.12159935384988785, "learning_rate": 0.00019239296247330418, "loss": 1.0066, "step": 380 }, { "epoch": 0.07745476722911161, "grad_norm": 0.1263132244348526, "learning_rate": 0.0001923726228007729, "loss": 1.1993, "step": 381 }, { "epoch": 0.07765806058141898, "grad_norm": 0.11738517135381699, "learning_rate": 0.00019235228312824165, "loss": 1.19, "step": 382 }, { "epoch": 0.07786135393372637, "grad_norm": 0.13438478112220764, "learning_rate": 0.00019233194345571038, "loss": 1.1794, "step": 383 }, { "epoch": 0.07806464728603375, "grad_norm": 0.1180570125579834, "learning_rate": 0.0001923116037831791, "loss": 1.0685, "step": 384 }, { "epoch": 0.07826794063834112, "grad_norm": 0.13014809787273407, "learning_rate": 0.00019229126411064783, "loss": 1.1838, "step": 385 }, { "epoch": 0.07847123399064851, "grad_norm": 0.12478948384523392, "learning_rate": 0.00019227092443811655, "loss": 1.1978, "step": 386 }, { "epoch": 0.07867452734295588, "grad_norm": 0.10319990664720535, "learning_rate": 0.00019225058476558528, "loss": 1.1273, "step": 387 }, { "epoch": 0.07887782069526326, "grad_norm": 0.11172400414943695, "learning_rate": 0.000192230245093054, "loss": 0.9054, "step": 388 }, { "epoch": 0.07908111404757065, "grad_norm": 0.12951341271400452, "learning_rate": 0.00019220990542052273, "loss": 1.1554, "step": 389 }, { "epoch": 0.07928440739987802, "grad_norm": 0.13350042700767517, "learning_rate": 0.00019218956574799148, "loss": 1.1787, "step": 390 }, { "epoch": 0.0794877007521854, "grad_norm": 0.11068174242973328, "learning_rate": 0.0001921692260754602, "loss": 1.1072, "step": 391 }, { "epoch": 0.07969099410449278, "grad_norm": 0.09952767938375473, "learning_rate": 0.00019214888640292893, "loss": 1.0071, "step": 392 }, { "epoch": 0.07989428745680016, "grad_norm": 0.10815319418907166, "learning_rate": 0.00019212854673039765, "loss": 0.8681, "step": 393 }, { "epoch": 0.08009758080910755, "grad_norm": 0.1121988445520401, "learning_rate": 0.00019210820705786637, "loss": 0.987, "step": 394 }, { "epoch": 0.08030087416141492, "grad_norm": 0.10137449204921722, "learning_rate": 0.0001920878673853351, "loss": 0.8968, "step": 395 }, { "epoch": 0.0805041675137223, "grad_norm": 0.09827956557273865, "learning_rate": 0.00019206752771280382, "loss": 0.8864, "step": 396 }, { "epoch": 0.08070746086602969, "grad_norm": 0.11967012286186218, "learning_rate": 0.00019204718804027255, "loss": 1.085, "step": 397 }, { "epoch": 0.08091075421833706, "grad_norm": 0.11249358206987381, "learning_rate": 0.0001920268483677413, "loss": 1.0201, "step": 398 }, { "epoch": 0.08111404757064444, "grad_norm": 0.12788376212120056, "learning_rate": 0.00019200650869521002, "loss": 1.0529, "step": 399 }, { "epoch": 0.08131734092295181, "grad_norm": 0.11879412829875946, "learning_rate": 0.00019198616902267875, "loss": 1.0418, "step": 400 }, { "epoch": 0.0815206342752592, "grad_norm": 0.11404243856668472, "learning_rate": 0.00019196582935014747, "loss": 0.9311, "step": 401 }, { "epoch": 0.08172392762756658, "grad_norm": 0.13113105297088623, "learning_rate": 0.0001919454896776162, "loss": 1.2886, "step": 402 }, { "epoch": 0.08192722097987395, "grad_norm": 0.12636548280715942, "learning_rate": 0.00019192515000508492, "loss": 1.0967, "step": 403 }, { "epoch": 0.08213051433218134, "grad_norm": 0.1245994120836258, "learning_rate": 0.00019190481033255365, "loss": 1.0426, "step": 404 }, { "epoch": 0.08233380768448871, "grad_norm": 0.12495577335357666, "learning_rate": 0.00019188447066002237, "loss": 1.1212, "step": 405 }, { "epoch": 0.0825371010367961, "grad_norm": 0.112003855407238, "learning_rate": 0.00019186413098749112, "loss": 0.9948, "step": 406 }, { "epoch": 0.08274039438910348, "grad_norm": 0.11918698996305466, "learning_rate": 0.00019184379131495985, "loss": 1.0927, "step": 407 }, { "epoch": 0.08294368774141085, "grad_norm": 0.11620672792196274, "learning_rate": 0.00019182345164242857, "loss": 1.0805, "step": 408 }, { "epoch": 0.08314698109371824, "grad_norm": 0.12570421397686005, "learning_rate": 0.0001918031119698973, "loss": 1.1484, "step": 409 }, { "epoch": 0.08335027444602562, "grad_norm": 0.12078004330396652, "learning_rate": 0.00019178277229736602, "loss": 1.248, "step": 410 }, { "epoch": 0.08355356779833299, "grad_norm": 0.1178092435002327, "learning_rate": 0.00019176243262483474, "loss": 1.1365, "step": 411 }, { "epoch": 0.08375686115064038, "grad_norm": 0.13181130588054657, "learning_rate": 0.00019174209295230347, "loss": 1.335, "step": 412 }, { "epoch": 0.08396015450294775, "grad_norm": 0.1192195788025856, "learning_rate": 0.0001917217532797722, "loss": 1.119, "step": 413 }, { "epoch": 0.08416344785525513, "grad_norm": 0.12525242567062378, "learning_rate": 0.00019170141360724095, "loss": 1.2269, "step": 414 }, { "epoch": 0.08436674120756252, "grad_norm": 0.12473724037408829, "learning_rate": 0.00019168107393470967, "loss": 1.2479, "step": 415 }, { "epoch": 0.08457003455986989, "grad_norm": 0.1118764728307724, "learning_rate": 0.0001916607342621784, "loss": 1.0089, "step": 416 }, { "epoch": 0.08477332791217727, "grad_norm": 0.11220741271972656, "learning_rate": 0.00019164039458964712, "loss": 0.9793, "step": 417 }, { "epoch": 0.08497662126448464, "grad_norm": 0.1261814385652542, "learning_rate": 0.00019162005491711584, "loss": 1.092, "step": 418 }, { "epoch": 0.08517991461679203, "grad_norm": 0.12782973051071167, "learning_rate": 0.00019159971524458457, "loss": 1.08, "step": 419 }, { "epoch": 0.08538320796909941, "grad_norm": 0.12007841467857361, "learning_rate": 0.0001915793755720533, "loss": 1.0856, "step": 420 }, { "epoch": 0.08558650132140679, "grad_norm": 0.1249847337603569, "learning_rate": 0.00019155903589952202, "loss": 1.1314, "step": 421 }, { "epoch": 0.08578979467371417, "grad_norm": 0.10619431734085083, "learning_rate": 0.00019153869622699074, "loss": 1.0298, "step": 422 }, { "epoch": 0.08599308802602156, "grad_norm": 0.12282367795705795, "learning_rate": 0.0001915183565544595, "loss": 1.1277, "step": 423 }, { "epoch": 0.08619638137832893, "grad_norm": 0.12001215666532516, "learning_rate": 0.00019149801688192822, "loss": 1.0792, "step": 424 }, { "epoch": 0.08639967473063631, "grad_norm": 0.10283269733190536, "learning_rate": 0.00019147767720939694, "loss": 0.9422, "step": 425 }, { "epoch": 0.08660296808294368, "grad_norm": 0.11698923259973526, "learning_rate": 0.00019145733753686567, "loss": 1.0371, "step": 426 }, { "epoch": 0.08680626143525107, "grad_norm": 0.11874233931303024, "learning_rate": 0.0001914369978643344, "loss": 1.042, "step": 427 }, { "epoch": 0.08700955478755845, "grad_norm": 0.10154362767934799, "learning_rate": 0.00019141665819180311, "loss": 0.9436, "step": 428 }, { "epoch": 0.08721284813986582, "grad_norm": 0.10885417461395264, "learning_rate": 0.00019139631851927184, "loss": 1.0823, "step": 429 }, { "epoch": 0.08741614149217321, "grad_norm": 0.11313669383525848, "learning_rate": 0.00019137597884674056, "loss": 1.0905, "step": 430 }, { "epoch": 0.08761943484448058, "grad_norm": 0.12074249237775803, "learning_rate": 0.00019135563917420932, "loss": 1.1466, "step": 431 }, { "epoch": 0.08782272819678796, "grad_norm": 0.12890012562274933, "learning_rate": 0.00019133529950167804, "loss": 1.1222, "step": 432 }, { "epoch": 0.08802602154909535, "grad_norm": 0.12527287006378174, "learning_rate": 0.00019131495982914676, "loss": 1.0391, "step": 433 }, { "epoch": 0.08822931490140272, "grad_norm": 0.11698780208826065, "learning_rate": 0.0001912946201566155, "loss": 0.9235, "step": 434 }, { "epoch": 0.0884326082537101, "grad_norm": 0.11191095411777496, "learning_rate": 0.0001912742804840842, "loss": 0.9763, "step": 435 }, { "epoch": 0.08863590160601749, "grad_norm": 0.1118699237704277, "learning_rate": 0.00019125394081155294, "loss": 0.9919, "step": 436 }, { "epoch": 0.08883919495832486, "grad_norm": 0.10507287830114365, "learning_rate": 0.00019123360113902166, "loss": 0.8505, "step": 437 }, { "epoch": 0.08904248831063225, "grad_norm": 0.1091250404715538, "learning_rate": 0.00019121326146649039, "loss": 0.9453, "step": 438 }, { "epoch": 0.08924578166293962, "grad_norm": 0.10213371366262436, "learning_rate": 0.00019119292179395914, "loss": 0.9082, "step": 439 }, { "epoch": 0.089449075015247, "grad_norm": 0.1446637064218521, "learning_rate": 0.00019117258212142786, "loss": 1.2307, "step": 440 }, { "epoch": 0.08965236836755439, "grad_norm": 0.13018859922885895, "learning_rate": 0.0001911522424488966, "loss": 1.1052, "step": 441 }, { "epoch": 0.08985566171986176, "grad_norm": 0.1239272952079773, "learning_rate": 0.0001911319027763653, "loss": 1.0036, "step": 442 }, { "epoch": 0.09005895507216914, "grad_norm": 0.1135847195982933, "learning_rate": 0.00019111156310383404, "loss": 1.0524, "step": 443 }, { "epoch": 0.09026224842447651, "grad_norm": 0.1171732023358345, "learning_rate": 0.00019109122343130276, "loss": 1.1769, "step": 444 }, { "epoch": 0.0904655417767839, "grad_norm": 0.12947380542755127, "learning_rate": 0.00019107088375877148, "loss": 1.2071, "step": 445 }, { "epoch": 0.09066883512909128, "grad_norm": 0.1240135133266449, "learning_rate": 0.0001910505440862402, "loss": 1.0782, "step": 446 }, { "epoch": 0.09087212848139865, "grad_norm": 0.1232561394572258, "learning_rate": 0.00019103020441370896, "loss": 1.0068, "step": 447 }, { "epoch": 0.09107542183370604, "grad_norm": 0.11200708150863647, "learning_rate": 0.00019100986474117769, "loss": 0.9491, "step": 448 }, { "epoch": 0.09127871518601342, "grad_norm": 0.1400870531797409, "learning_rate": 0.0001909895250686464, "loss": 1.3197, "step": 449 }, { "epoch": 0.0914820085383208, "grad_norm": 0.12712709605693817, "learning_rate": 0.00019096918539611513, "loss": 1.1853, "step": 450 }, { "epoch": 0.09168530189062818, "grad_norm": 0.11399099975824356, "learning_rate": 0.00019094884572358386, "loss": 0.8887, "step": 451 }, { "epoch": 0.09188859524293555, "grad_norm": 0.10861057788133621, "learning_rate": 0.00019092850605105258, "loss": 0.9224, "step": 452 }, { "epoch": 0.09209188859524294, "grad_norm": 0.12274569272994995, "learning_rate": 0.0001909081663785213, "loss": 1.1491, "step": 453 }, { "epoch": 0.09229518194755032, "grad_norm": 0.11641780287027359, "learning_rate": 0.00019088782670599003, "loss": 1.1646, "step": 454 }, { "epoch": 0.09249847529985769, "grad_norm": 0.1300159990787506, "learning_rate": 0.00019086748703345878, "loss": 1.1239, "step": 455 }, { "epoch": 0.09270176865216508, "grad_norm": 0.12116070836782455, "learning_rate": 0.0001908471473609275, "loss": 1.0475, "step": 456 }, { "epoch": 0.09290506200447246, "grad_norm": 0.11318276822566986, "learning_rate": 0.00019082680768839623, "loss": 1.0162, "step": 457 }, { "epoch": 0.09310835535677983, "grad_norm": 0.10791938006877899, "learning_rate": 0.00019080646801586496, "loss": 0.9874, "step": 458 }, { "epoch": 0.09331164870908722, "grad_norm": 0.10658224672079086, "learning_rate": 0.00019078612834333368, "loss": 0.9483, "step": 459 }, { "epoch": 0.09351494206139459, "grad_norm": 0.12912395596504211, "learning_rate": 0.0001907657886708024, "loss": 1.2248, "step": 460 }, { "epoch": 0.09371823541370197, "grad_norm": 0.1268775314092636, "learning_rate": 0.00019074544899827113, "loss": 1.079, "step": 461 }, { "epoch": 0.09392152876600936, "grad_norm": 0.11810900270938873, "learning_rate": 0.00019072510932573985, "loss": 1.1856, "step": 462 }, { "epoch": 0.09412482211831673, "grad_norm": 0.13081328570842743, "learning_rate": 0.00019070476965320858, "loss": 1.125, "step": 463 }, { "epoch": 0.09432811547062411, "grad_norm": 0.11875245720148087, "learning_rate": 0.00019068442998067733, "loss": 1.1341, "step": 464 }, { "epoch": 0.09453140882293148, "grad_norm": 0.10965297371149063, "learning_rate": 0.00019066409030814606, "loss": 0.9892, "step": 465 }, { "epoch": 0.09473470217523887, "grad_norm": 0.1167355626821518, "learning_rate": 0.00019064375063561478, "loss": 1.1234, "step": 466 }, { "epoch": 0.09493799552754625, "grad_norm": 0.1092626228928566, "learning_rate": 0.0001906234109630835, "loss": 0.9734, "step": 467 }, { "epoch": 0.09514128887985362, "grad_norm": 0.12768998742103577, "learning_rate": 0.00019060307129055223, "loss": 1.1349, "step": 468 }, { "epoch": 0.09534458223216101, "grad_norm": 0.13227547705173492, "learning_rate": 0.00019058273161802095, "loss": 1.2362, "step": 469 }, { "epoch": 0.0955478755844684, "grad_norm": 0.11458224058151245, "learning_rate": 0.00019056239194548968, "loss": 0.9707, "step": 470 }, { "epoch": 0.09575116893677577, "grad_norm": 0.11045580357313156, "learning_rate": 0.0001905420522729584, "loss": 0.973, "step": 471 }, { "epoch": 0.09595446228908315, "grad_norm": 0.1274811327457428, "learning_rate": 0.00019052171260042715, "loss": 1.2641, "step": 472 }, { "epoch": 0.09615775564139052, "grad_norm": 0.11694994568824768, "learning_rate": 0.00019050137292789588, "loss": 0.9362, "step": 473 }, { "epoch": 0.0963610489936979, "grad_norm": 0.11511142551898956, "learning_rate": 0.0001904810332553646, "loss": 1.0127, "step": 474 }, { "epoch": 0.09656434234600529, "grad_norm": 0.1253817081451416, "learning_rate": 0.00019046069358283333, "loss": 1.0489, "step": 475 }, { "epoch": 0.09676763569831266, "grad_norm": 0.11795701086521149, "learning_rate": 0.00019044035391030205, "loss": 1.0528, "step": 476 }, { "epoch": 0.09697092905062005, "grad_norm": 0.12703485786914825, "learning_rate": 0.00019042001423777078, "loss": 1.2692, "step": 477 }, { "epoch": 0.09717422240292742, "grad_norm": 0.12391920387744904, "learning_rate": 0.0001903996745652395, "loss": 1.0765, "step": 478 }, { "epoch": 0.0973775157552348, "grad_norm": 0.12939028441905975, "learning_rate": 0.00019037933489270822, "loss": 1.2686, "step": 479 }, { "epoch": 0.09758080910754219, "grad_norm": 0.11955651640892029, "learning_rate": 0.00019035899522017698, "loss": 1.0179, "step": 480 }, { "epoch": 0.09778410245984956, "grad_norm": 0.11481709033250809, "learning_rate": 0.0001903386555476457, "loss": 1.1008, "step": 481 }, { "epoch": 0.09798739581215694, "grad_norm": 0.12216270714998245, "learning_rate": 0.00019031831587511443, "loss": 1.2387, "step": 482 }, { "epoch": 0.09819068916446433, "grad_norm": 0.10991356521844864, "learning_rate": 0.00019029797620258315, "loss": 0.9913, "step": 483 }, { "epoch": 0.0983939825167717, "grad_norm": 0.11534951627254486, "learning_rate": 0.00019027763653005187, "loss": 0.9248, "step": 484 }, { "epoch": 0.09859727586907908, "grad_norm": 0.11887869983911514, "learning_rate": 0.0001902572968575206, "loss": 1.065, "step": 485 }, { "epoch": 0.09880056922138646, "grad_norm": 0.12391136586666107, "learning_rate": 0.00019023695718498932, "loss": 1.1692, "step": 486 }, { "epoch": 0.09900386257369384, "grad_norm": 0.10672067850828171, "learning_rate": 0.00019021661751245805, "loss": 1.154, "step": 487 }, { "epoch": 0.09920715592600123, "grad_norm": 0.14061135053634644, "learning_rate": 0.0001901962778399268, "loss": 1.168, "step": 488 }, { "epoch": 0.0994104492783086, "grad_norm": 0.11371248215436935, "learning_rate": 0.00019017593816739552, "loss": 0.9905, "step": 489 }, { "epoch": 0.09961374263061598, "grad_norm": 0.11754601448774338, "learning_rate": 0.00019015559849486425, "loss": 1.01, "step": 490 }, { "epoch": 0.09981703598292335, "grad_norm": 0.12492667138576508, "learning_rate": 0.00019013525882233297, "loss": 1.1024, "step": 491 }, { "epoch": 0.10002032933523074, "grad_norm": 0.12676015496253967, "learning_rate": 0.0001901149191498017, "loss": 1.3976, "step": 492 }, { "epoch": 0.10022362268753812, "grad_norm": 0.13545620441436768, "learning_rate": 0.00019009457947727042, "loss": 1.1542, "step": 493 }, { "epoch": 0.10042691603984549, "grad_norm": 0.12883707880973816, "learning_rate": 0.00019007423980473915, "loss": 1.1068, "step": 494 }, { "epoch": 0.10063020939215288, "grad_norm": 0.11707032471895218, "learning_rate": 0.00019005390013220787, "loss": 0.9906, "step": 495 }, { "epoch": 0.10083350274446026, "grad_norm": 0.13158461451530457, "learning_rate": 0.00019003356045967662, "loss": 1.1453, "step": 496 }, { "epoch": 0.10103679609676763, "grad_norm": 0.1244715005159378, "learning_rate": 0.00019001322078714535, "loss": 1.1244, "step": 497 }, { "epoch": 0.10124008944907502, "grad_norm": 0.12620943784713745, "learning_rate": 0.00018999288111461407, "loss": 1.1018, "step": 498 }, { "epoch": 0.10144338280138239, "grad_norm": 0.1192685067653656, "learning_rate": 0.0001899725414420828, "loss": 1.1069, "step": 499 }, { "epoch": 0.10164667615368977, "grad_norm": 0.12764599919319153, "learning_rate": 0.00018995220176955152, "loss": 1.2122, "step": 500 }, { "epoch": 0.10184996950599716, "grad_norm": 0.12098994851112366, "learning_rate": 0.00018993186209702024, "loss": 1.1113, "step": 501 }, { "epoch": 0.10205326285830453, "grad_norm": 0.14677678048610687, "learning_rate": 0.00018991152242448897, "loss": 1.399, "step": 502 }, { "epoch": 0.10225655621061192, "grad_norm": 0.1371246576309204, "learning_rate": 0.0001898911827519577, "loss": 1.2582, "step": 503 }, { "epoch": 0.10245984956291929, "grad_norm": 0.11643920093774796, "learning_rate": 0.00018987084307942642, "loss": 1.0707, "step": 504 }, { "epoch": 0.10266314291522667, "grad_norm": 0.1150643602013588, "learning_rate": 0.00018985050340689517, "loss": 1.0886, "step": 505 }, { "epoch": 0.10286643626753406, "grad_norm": 0.10518593341112137, "learning_rate": 0.0001898301637343639, "loss": 0.8955, "step": 506 }, { "epoch": 0.10306972961984143, "grad_norm": 0.11445560306310654, "learning_rate": 0.00018980982406183262, "loss": 0.93, "step": 507 }, { "epoch": 0.10327302297214881, "grad_norm": 0.11920091509819031, "learning_rate": 0.00018978948438930134, "loss": 1.0148, "step": 508 }, { "epoch": 0.1034763163244562, "grad_norm": 0.12822504341602325, "learning_rate": 0.00018976914471677007, "loss": 1.2004, "step": 509 }, { "epoch": 0.10367960967676357, "grad_norm": 0.12469658255577087, "learning_rate": 0.0001897488050442388, "loss": 1.0902, "step": 510 }, { "epoch": 0.10388290302907095, "grad_norm": 0.12136801332235336, "learning_rate": 0.00018972846537170752, "loss": 1.059, "step": 511 }, { "epoch": 0.10408619638137832, "grad_norm": 0.10618099570274353, "learning_rate": 0.00018970812569917624, "loss": 0.9972, "step": 512 }, { "epoch": 0.10428948973368571, "grad_norm": 0.12111090868711472, "learning_rate": 0.000189687786026645, "loss": 1.0789, "step": 513 }, { "epoch": 0.1044927830859931, "grad_norm": 0.1108577698469162, "learning_rate": 0.00018966744635411372, "loss": 0.9024, "step": 514 }, { "epoch": 0.10469607643830046, "grad_norm": 0.1184157282114029, "learning_rate": 0.00018964710668158244, "loss": 0.9548, "step": 515 }, { "epoch": 0.10489936979060785, "grad_norm": 0.1288694143295288, "learning_rate": 0.00018962676700905117, "loss": 1.2128, "step": 516 }, { "epoch": 0.10510266314291522, "grad_norm": 0.12015259265899658, "learning_rate": 0.0001896064273365199, "loss": 1.1964, "step": 517 }, { "epoch": 0.1053059564952226, "grad_norm": 0.13204379379749298, "learning_rate": 0.00018958608766398861, "loss": 1.0473, "step": 518 }, { "epoch": 0.10550924984752999, "grad_norm": 0.11321057379245758, "learning_rate": 0.00018956574799145734, "loss": 1.0961, "step": 519 }, { "epoch": 0.10571254319983736, "grad_norm": 0.13245680928230286, "learning_rate": 0.00018954540831892606, "loss": 1.0835, "step": 520 }, { "epoch": 0.10591583655214475, "grad_norm": 0.12220027297735214, "learning_rate": 0.00018952506864639481, "loss": 1.1246, "step": 521 }, { "epoch": 0.10611912990445213, "grad_norm": 0.11933163553476334, "learning_rate": 0.00018950472897386354, "loss": 1.0739, "step": 522 }, { "epoch": 0.1063224232567595, "grad_norm": 0.14022572338581085, "learning_rate": 0.00018948438930133226, "loss": 1.1557, "step": 523 }, { "epoch": 0.10652571660906689, "grad_norm": 0.13287031650543213, "learning_rate": 0.000189464049628801, "loss": 1.2597, "step": 524 }, { "epoch": 0.10672900996137426, "grad_norm": 0.11653829365968704, "learning_rate": 0.0001894437099562697, "loss": 0.9564, "step": 525 }, { "epoch": 0.10693230331368164, "grad_norm": 0.11488767713308334, "learning_rate": 0.00018942337028373844, "loss": 0.9883, "step": 526 }, { "epoch": 0.10713559666598903, "grad_norm": 0.11149357259273529, "learning_rate": 0.00018940303061120716, "loss": 1.0004, "step": 527 }, { "epoch": 0.1073388900182964, "grad_norm": 0.11848779767751694, "learning_rate": 0.00018938269093867589, "loss": 1.0462, "step": 528 }, { "epoch": 0.10754218337060378, "grad_norm": 0.11932095140218735, "learning_rate": 0.00018936235126614464, "loss": 1.1001, "step": 529 }, { "epoch": 0.10774547672291115, "grad_norm": 0.11937075853347778, "learning_rate": 0.00018934201159361336, "loss": 1.2032, "step": 530 }, { "epoch": 0.10794877007521854, "grad_norm": 0.10601939260959625, "learning_rate": 0.00018932167192108209, "loss": 0.9689, "step": 531 }, { "epoch": 0.10815206342752592, "grad_norm": 0.11901092529296875, "learning_rate": 0.0001893013322485508, "loss": 1.1913, "step": 532 }, { "epoch": 0.1083553567798333, "grad_norm": 0.1308038979768753, "learning_rate": 0.00018928099257601954, "loss": 1.2393, "step": 533 }, { "epoch": 0.10855865013214068, "grad_norm": 0.1222740039229393, "learning_rate": 0.00018926065290348826, "loss": 0.9574, "step": 534 }, { "epoch": 0.10876194348444806, "grad_norm": 0.12856149673461914, "learning_rate": 0.00018924031323095698, "loss": 1.1296, "step": 535 }, { "epoch": 0.10896523683675544, "grad_norm": 0.12045751512050629, "learning_rate": 0.0001892199735584257, "loss": 1.153, "step": 536 }, { "epoch": 0.10916853018906282, "grad_norm": 0.11606315523386002, "learning_rate": 0.00018919963388589446, "loss": 0.9028, "step": 537 }, { "epoch": 0.10937182354137019, "grad_norm": 0.10877380520105362, "learning_rate": 0.00018917929421336318, "loss": 0.9954, "step": 538 }, { "epoch": 0.10957511689367758, "grad_norm": 0.10476227104663849, "learning_rate": 0.0001891589545408319, "loss": 0.9486, "step": 539 }, { "epoch": 0.10977841024598496, "grad_norm": 0.12538990378379822, "learning_rate": 0.00018913861486830063, "loss": 1.1749, "step": 540 }, { "epoch": 0.10998170359829233, "grad_norm": 0.13290320336818695, "learning_rate": 0.00018911827519576936, "loss": 1.0933, "step": 541 }, { "epoch": 0.11018499695059972, "grad_norm": 0.11773636192083359, "learning_rate": 0.00018909793552323808, "loss": 1.0693, "step": 542 }, { "epoch": 0.11038829030290709, "grad_norm": 0.11466556787490845, "learning_rate": 0.0001890775958507068, "loss": 1.0589, "step": 543 }, { "epoch": 0.11059158365521447, "grad_norm": 0.1275825798511505, "learning_rate": 0.00018905725617817553, "loss": 1.0582, "step": 544 }, { "epoch": 0.11079487700752186, "grad_norm": 0.1283504068851471, "learning_rate": 0.00018903691650564428, "loss": 1.1702, "step": 545 }, { "epoch": 0.11099817035982923, "grad_norm": 0.13250254094600677, "learning_rate": 0.000189016576833113, "loss": 1.1467, "step": 546 }, { "epoch": 0.11120146371213661, "grad_norm": 0.15396709740161896, "learning_rate": 0.00018899623716058173, "loss": 1.1299, "step": 547 }, { "epoch": 0.111404757064444, "grad_norm": 0.13014012575149536, "learning_rate": 0.00018897589748805046, "loss": 1.1633, "step": 548 }, { "epoch": 0.11160805041675137, "grad_norm": 0.11697974056005478, "learning_rate": 0.00018895555781551918, "loss": 1.1052, "step": 549 }, { "epoch": 0.11181134376905875, "grad_norm": 0.13976189494132996, "learning_rate": 0.0001889352181429879, "loss": 1.1199, "step": 550 }, { "epoch": 0.11201463712136613, "grad_norm": 0.13051995635032654, "learning_rate": 0.00018891487847045663, "loss": 1.2532, "step": 551 }, { "epoch": 0.11221793047367351, "grad_norm": 0.11212155967950821, "learning_rate": 0.00018889453879792535, "loss": 0.9873, "step": 552 }, { "epoch": 0.1124212238259809, "grad_norm": 0.1334063857793808, "learning_rate": 0.00018887419912539408, "loss": 1.1102, "step": 553 }, { "epoch": 0.11262451717828827, "grad_norm": 0.1290140599012375, "learning_rate": 0.00018885385945286283, "loss": 0.9598, "step": 554 }, { "epoch": 0.11282781053059565, "grad_norm": 0.12794511020183563, "learning_rate": 0.00018883351978033155, "loss": 1.2875, "step": 555 }, { "epoch": 0.11303110388290302, "grad_norm": 0.11270211637020111, "learning_rate": 0.00018881318010780028, "loss": 0.9414, "step": 556 }, { "epoch": 0.11323439723521041, "grad_norm": 0.12074756622314453, "learning_rate": 0.000188792840435269, "loss": 1.0734, "step": 557 }, { "epoch": 0.11343769058751779, "grad_norm": 0.11245666444301605, "learning_rate": 0.00018877250076273773, "loss": 1.2024, "step": 558 }, { "epoch": 0.11364098393982516, "grad_norm": 0.10953640192747116, "learning_rate": 0.00018875216109020645, "loss": 0.9572, "step": 559 }, { "epoch": 0.11384427729213255, "grad_norm": 0.11975332349538803, "learning_rate": 0.00018873182141767518, "loss": 1.1559, "step": 560 }, { "epoch": 0.11404757064443993, "grad_norm": 0.10940812528133392, "learning_rate": 0.0001887114817451439, "loss": 0.926, "step": 561 }, { "epoch": 0.1142508639967473, "grad_norm": 0.139595165848732, "learning_rate": 0.00018869114207261265, "loss": 1.3275, "step": 562 }, { "epoch": 0.11445415734905469, "grad_norm": 0.10891355574131012, "learning_rate": 0.00018867080240008138, "loss": 0.9736, "step": 563 }, { "epoch": 0.11465745070136206, "grad_norm": 0.1192033439874649, "learning_rate": 0.0001886504627275501, "loss": 0.9881, "step": 564 }, { "epoch": 0.11486074405366944, "grad_norm": 0.12635888159275055, "learning_rate": 0.00018863012305501883, "loss": 1.1483, "step": 565 }, { "epoch": 0.11506403740597683, "grad_norm": 0.13440972566604614, "learning_rate": 0.00018860978338248755, "loss": 1.0852, "step": 566 }, { "epoch": 0.1152673307582842, "grad_norm": 0.12328968942165375, "learning_rate": 0.00018858944370995628, "loss": 1.1048, "step": 567 }, { "epoch": 0.11547062411059159, "grad_norm": 0.12037025392055511, "learning_rate": 0.000188569104037425, "loss": 1.1171, "step": 568 }, { "epoch": 0.11567391746289896, "grad_norm": 0.11991129070520401, "learning_rate": 0.00018854876436489372, "loss": 1.0827, "step": 569 }, { "epoch": 0.11587721081520634, "grad_norm": 0.11372412741184235, "learning_rate": 0.00018852842469236248, "loss": 0.9993, "step": 570 }, { "epoch": 0.11608050416751373, "grad_norm": 0.10992924124002457, "learning_rate": 0.0001885080850198312, "loss": 0.9812, "step": 571 }, { "epoch": 0.1162837975198211, "grad_norm": 0.11675936728715897, "learning_rate": 0.00018848774534729992, "loss": 0.9844, "step": 572 }, { "epoch": 0.11648709087212848, "grad_norm": 0.10757414996623993, "learning_rate": 0.00018846740567476865, "loss": 1.0607, "step": 573 }, { "epoch": 0.11669038422443587, "grad_norm": 0.11255379766225815, "learning_rate": 0.00018844706600223737, "loss": 1.1638, "step": 574 }, { "epoch": 0.11689367757674324, "grad_norm": 0.10737176239490509, "learning_rate": 0.0001884267263297061, "loss": 1.0055, "step": 575 }, { "epoch": 0.11709697092905062, "grad_norm": 0.1193508729338646, "learning_rate": 0.00018840638665717482, "loss": 1.0924, "step": 576 }, { "epoch": 0.117300264281358, "grad_norm": 0.12564769387245178, "learning_rate": 0.00018838604698464355, "loss": 1.3088, "step": 577 }, { "epoch": 0.11750355763366538, "grad_norm": 0.12675485014915466, "learning_rate": 0.0001883657073121123, "loss": 1.0682, "step": 578 }, { "epoch": 0.11770685098597276, "grad_norm": 0.12016987055540085, "learning_rate": 0.00018834536763958102, "loss": 0.9511, "step": 579 }, { "epoch": 0.11791014433828013, "grad_norm": 0.11664092540740967, "learning_rate": 0.00018832502796704975, "loss": 1.0758, "step": 580 }, { "epoch": 0.11811343769058752, "grad_norm": 0.11402445286512375, "learning_rate": 0.00018830468829451847, "loss": 1.0959, "step": 581 }, { "epoch": 0.11831673104289489, "grad_norm": 0.12505365908145905, "learning_rate": 0.0001882843486219872, "loss": 1.1621, "step": 582 }, { "epoch": 0.11852002439520228, "grad_norm": 0.13434186577796936, "learning_rate": 0.00018826400894945592, "loss": 1.273, "step": 583 }, { "epoch": 0.11872331774750966, "grad_norm": 0.1284523904323578, "learning_rate": 0.00018824366927692465, "loss": 1.1161, "step": 584 }, { "epoch": 0.11892661109981703, "grad_norm": 0.1141962930560112, "learning_rate": 0.00018822332960439337, "loss": 1.012, "step": 585 }, { "epoch": 0.11912990445212442, "grad_norm": 0.1280459314584732, "learning_rate": 0.00018820298993186212, "loss": 1.1797, "step": 586 }, { "epoch": 0.1193331978044318, "grad_norm": 0.12705819308757782, "learning_rate": 0.00018818265025933085, "loss": 1.2323, "step": 587 }, { "epoch": 0.11953649115673917, "grad_norm": 0.1341540366411209, "learning_rate": 0.00018816231058679957, "loss": 1.1219, "step": 588 }, { "epoch": 0.11973978450904656, "grad_norm": 0.1307908147573471, "learning_rate": 0.0001881419709142683, "loss": 0.992, "step": 589 }, { "epoch": 0.11994307786135393, "grad_norm": 0.127479687333107, "learning_rate": 0.00018812163124173702, "loss": 1.0326, "step": 590 }, { "epoch": 0.12014637121366131, "grad_norm": 0.09779065102338791, "learning_rate": 0.00018810129156920574, "loss": 0.7614, "step": 591 }, { "epoch": 0.1203496645659687, "grad_norm": 0.14188863337039948, "learning_rate": 0.00018808095189667447, "loss": 1.23, "step": 592 }, { "epoch": 0.12055295791827607, "grad_norm": 0.12969130277633667, "learning_rate": 0.0001880606122241432, "loss": 1.1229, "step": 593 }, { "epoch": 0.12075625127058345, "grad_norm": 0.13516603410243988, "learning_rate": 0.00018804027255161192, "loss": 1.0147, "step": 594 }, { "epoch": 0.12095954462289084, "grad_norm": 0.13307668268680573, "learning_rate": 0.00018801993287908067, "loss": 1.1908, "step": 595 }, { "epoch": 0.12116283797519821, "grad_norm": 0.11288546770811081, "learning_rate": 0.0001879995932065494, "loss": 0.9319, "step": 596 }, { "epoch": 0.1213661313275056, "grad_norm": 0.12034857273101807, "learning_rate": 0.00018797925353401812, "loss": 1.0976, "step": 597 }, { "epoch": 0.12156942467981297, "grad_norm": 0.136747807264328, "learning_rate": 0.00018795891386148684, "loss": 1.2298, "step": 598 }, { "epoch": 0.12177271803212035, "grad_norm": 0.11699377000331879, "learning_rate": 0.00018793857418895557, "loss": 1.0377, "step": 599 }, { "epoch": 0.12197601138442773, "grad_norm": 0.15257331728935242, "learning_rate": 0.0001879182345164243, "loss": 1.2306, "step": 600 }, { "epoch": 0.1221793047367351, "grad_norm": 0.1361241340637207, "learning_rate": 0.00018789789484389302, "loss": 1.1563, "step": 601 }, { "epoch": 0.12238259808904249, "grad_norm": 0.11735684424638748, "learning_rate": 0.00018787755517136174, "loss": 1.104, "step": 602 }, { "epoch": 0.12258589144134986, "grad_norm": 0.11648523807525635, "learning_rate": 0.0001878572154988305, "loss": 1.0008, "step": 603 }, { "epoch": 0.12278918479365725, "grad_norm": 0.12473436444997787, "learning_rate": 0.00018783687582629922, "loss": 1.0741, "step": 604 }, { "epoch": 0.12299247814596463, "grad_norm": 0.11664781719446182, "learning_rate": 0.00018781653615376794, "loss": 1.1155, "step": 605 }, { "epoch": 0.123195771498272, "grad_norm": 0.12415888160467148, "learning_rate": 0.00018779619648123666, "loss": 1.158, "step": 606 }, { "epoch": 0.12339906485057939, "grad_norm": 0.1223251074552536, "learning_rate": 0.0001877758568087054, "loss": 1.1045, "step": 607 }, { "epoch": 0.12360235820288677, "grad_norm": 0.12289747595787048, "learning_rate": 0.00018775551713617411, "loss": 1.0768, "step": 608 }, { "epoch": 0.12380565155519414, "grad_norm": 0.1316901594400406, "learning_rate": 0.00018773517746364284, "loss": 1.2156, "step": 609 }, { "epoch": 0.12400894490750153, "grad_norm": 0.12060056626796722, "learning_rate": 0.00018771483779111156, "loss": 1.0221, "step": 610 }, { "epoch": 0.1242122382598089, "grad_norm": 0.1384373903274536, "learning_rate": 0.00018769449811858031, "loss": 1.1059, "step": 611 }, { "epoch": 0.12441553161211628, "grad_norm": 0.12399812787771225, "learning_rate": 0.00018767415844604904, "loss": 1.0193, "step": 612 }, { "epoch": 0.12461882496442367, "grad_norm": 0.13406959176063538, "learning_rate": 0.00018765381877351776, "loss": 1.1572, "step": 613 }, { "epoch": 0.12482211831673104, "grad_norm": 0.12881499528884888, "learning_rate": 0.0001876334791009865, "loss": 1.1914, "step": 614 }, { "epoch": 0.1250254116690384, "grad_norm": 0.11472728103399277, "learning_rate": 0.0001876131394284552, "loss": 1.0822, "step": 615 }, { "epoch": 0.1252287050213458, "grad_norm": 0.1251503825187683, "learning_rate": 0.00018759279975592394, "loss": 1.1783, "step": 616 }, { "epoch": 0.12543199837365318, "grad_norm": 0.1414482593536377, "learning_rate": 0.00018757246008339266, "loss": 1.1925, "step": 617 }, { "epoch": 0.12563529172596055, "grad_norm": 0.122686967253685, "learning_rate": 0.00018755212041086139, "loss": 1.091, "step": 618 }, { "epoch": 0.12583858507826795, "grad_norm": 0.12301596254110336, "learning_rate": 0.00018753178073833014, "loss": 1.108, "step": 619 }, { "epoch": 0.12604187843057532, "grad_norm": 0.1191742941737175, "learning_rate": 0.00018751144106579886, "loss": 1.0413, "step": 620 }, { "epoch": 0.1262451717828827, "grad_norm": 0.0971694141626358, "learning_rate": 0.00018749110139326759, "loss": 0.8473, "step": 621 }, { "epoch": 0.1264484651351901, "grad_norm": 0.12381591647863388, "learning_rate": 0.0001874707617207363, "loss": 1.1503, "step": 622 }, { "epoch": 0.12665175848749746, "grad_norm": 0.13411198556423187, "learning_rate": 0.00018745042204820504, "loss": 1.164, "step": 623 }, { "epoch": 0.12685505183980483, "grad_norm": 0.12838509678840637, "learning_rate": 0.00018743008237567376, "loss": 1.1768, "step": 624 }, { "epoch": 0.1270583451921122, "grad_norm": 0.11623813211917877, "learning_rate": 0.00018740974270314248, "loss": 1.1611, "step": 625 }, { "epoch": 0.1272616385444196, "grad_norm": 0.11001920700073242, "learning_rate": 0.0001873894030306112, "loss": 1.0182, "step": 626 }, { "epoch": 0.12746493189672697, "grad_norm": 0.11987441778182983, "learning_rate": 0.00018736906335807996, "loss": 1.0509, "step": 627 }, { "epoch": 0.12766822524903434, "grad_norm": 0.13036808371543884, "learning_rate": 0.00018734872368554868, "loss": 1.2035, "step": 628 }, { "epoch": 0.12787151860134174, "grad_norm": 0.12546774744987488, "learning_rate": 0.0001873283840130174, "loss": 1.1434, "step": 629 }, { "epoch": 0.12807481195364911, "grad_norm": 0.1025729849934578, "learning_rate": 0.00018730804434048613, "loss": 0.9868, "step": 630 }, { "epoch": 0.12827810530595649, "grad_norm": 0.1013616994023323, "learning_rate": 0.00018728770466795483, "loss": 0.9281, "step": 631 }, { "epoch": 0.12848139865826388, "grad_norm": 0.11066362261772156, "learning_rate": 0.00018726736499542358, "loss": 1.0345, "step": 632 }, { "epoch": 0.12868469201057126, "grad_norm": 0.1280633807182312, "learning_rate": 0.0001872470253228923, "loss": 1.2335, "step": 633 }, { "epoch": 0.12888798536287863, "grad_norm": 0.11954978853464127, "learning_rate": 0.00018722668565036103, "loss": 1.0298, "step": 634 }, { "epoch": 0.12909127871518603, "grad_norm": 0.11124943196773529, "learning_rate": 0.00018720634597782976, "loss": 1.0896, "step": 635 }, { "epoch": 0.1292945720674934, "grad_norm": 0.12496782839298248, "learning_rate": 0.0001871860063052985, "loss": 1.0897, "step": 636 }, { "epoch": 0.12949786541980077, "grad_norm": 0.1257556527853012, "learning_rate": 0.00018716566663276723, "loss": 1.0148, "step": 637 }, { "epoch": 0.12970115877210814, "grad_norm": 0.11928705126047134, "learning_rate": 0.00018714532696023596, "loss": 1.1415, "step": 638 }, { "epoch": 0.12990445212441554, "grad_norm": 0.1109057068824768, "learning_rate": 0.00018712498728770468, "loss": 1.063, "step": 639 }, { "epoch": 0.1301077454767229, "grad_norm": 0.13905195891857147, "learning_rate": 0.0001871046476151734, "loss": 1.2346, "step": 640 }, { "epoch": 0.13031103882903028, "grad_norm": 0.12306763231754303, "learning_rate": 0.00018708430794264213, "loss": 1.0504, "step": 641 }, { "epoch": 0.13051433218133768, "grad_norm": 0.1077868863940239, "learning_rate": 0.00018706396827011085, "loss": 0.9143, "step": 642 }, { "epoch": 0.13071762553364505, "grad_norm": 0.1328214555978775, "learning_rate": 0.00018704362859757958, "loss": 1.1223, "step": 643 }, { "epoch": 0.13092091888595242, "grad_norm": 0.12459075450897217, "learning_rate": 0.00018702328892504833, "loss": 1.1896, "step": 644 }, { "epoch": 0.13112421223825982, "grad_norm": 0.11860411614179611, "learning_rate": 0.00018700294925251705, "loss": 1.0472, "step": 645 }, { "epoch": 0.1313275055905672, "grad_norm": 0.11825944483280182, "learning_rate": 0.00018698260957998578, "loss": 1.2391, "step": 646 }, { "epoch": 0.13153079894287456, "grad_norm": 0.12103937566280365, "learning_rate": 0.0001869622699074545, "loss": 1.0087, "step": 647 }, { "epoch": 0.13173409229518196, "grad_norm": 0.12289803475141525, "learning_rate": 0.00018694193023492323, "loss": 1.0867, "step": 648 }, { "epoch": 0.13193738564748933, "grad_norm": 0.12652850151062012, "learning_rate": 0.00018692159056239195, "loss": 1.2047, "step": 649 }, { "epoch": 0.1321406789997967, "grad_norm": 0.12258271127939224, "learning_rate": 0.00018690125088986068, "loss": 0.9806, "step": 650 }, { "epoch": 0.1323439723521041, "grad_norm": 0.1285620778799057, "learning_rate": 0.0001868809112173294, "loss": 0.9993, "step": 651 }, { "epoch": 0.13254726570441147, "grad_norm": 0.11906328797340393, "learning_rate": 0.00018686057154479815, "loss": 1.1029, "step": 652 }, { "epoch": 0.13275055905671884, "grad_norm": 0.13393160700798035, "learning_rate": 0.00018684023187226688, "loss": 1.1263, "step": 653 }, { "epoch": 0.1329538524090262, "grad_norm": 0.13850244879722595, "learning_rate": 0.0001868198921997356, "loss": 1.0878, "step": 654 }, { "epoch": 0.1331571457613336, "grad_norm": 0.13923142850399017, "learning_rate": 0.00018679955252720433, "loss": 1.1637, "step": 655 }, { "epoch": 0.13336043911364098, "grad_norm": 0.11642129719257355, "learning_rate": 0.00018677921285467305, "loss": 1.1134, "step": 656 }, { "epoch": 0.13356373246594835, "grad_norm": 0.12743037939071655, "learning_rate": 0.00018675887318214178, "loss": 1.0345, "step": 657 }, { "epoch": 0.13376702581825575, "grad_norm": 0.11360882222652435, "learning_rate": 0.0001867385335096105, "loss": 1.072, "step": 658 }, { "epoch": 0.13397031917056312, "grad_norm": 0.1262228637933731, "learning_rate": 0.00018671819383707922, "loss": 1.1546, "step": 659 }, { "epoch": 0.1341736125228705, "grad_norm": 0.1144820973277092, "learning_rate": 0.00018669785416454798, "loss": 1.0152, "step": 660 }, { "epoch": 0.1343769058751779, "grad_norm": 0.12834620475769043, "learning_rate": 0.0001866775144920167, "loss": 1.0456, "step": 661 }, { "epoch": 0.13458019922748526, "grad_norm": 0.11835994571447372, "learning_rate": 0.00018665717481948542, "loss": 0.991, "step": 662 }, { "epoch": 0.13478349257979264, "grad_norm": 0.11445319652557373, "learning_rate": 0.00018663683514695415, "loss": 1.0116, "step": 663 }, { "epoch": 0.13498678593210003, "grad_norm": 0.13939061760902405, "learning_rate": 0.00018661649547442287, "loss": 1.153, "step": 664 }, { "epoch": 0.1351900792844074, "grad_norm": 0.1149614006280899, "learning_rate": 0.0001865961558018916, "loss": 0.9255, "step": 665 }, { "epoch": 0.13539337263671478, "grad_norm": 0.13376334309577942, "learning_rate": 0.00018657581612936032, "loss": 1.1502, "step": 666 }, { "epoch": 0.13559666598902215, "grad_norm": 0.13265709578990936, "learning_rate": 0.00018655547645682905, "loss": 1.1292, "step": 667 }, { "epoch": 0.13579995934132955, "grad_norm": 0.11729206144809723, "learning_rate": 0.0001865351367842978, "loss": 1.2166, "step": 668 }, { "epoch": 0.13600325269363692, "grad_norm": 0.11903608590364456, "learning_rate": 0.00018651479711176652, "loss": 1.1808, "step": 669 }, { "epoch": 0.1362065460459443, "grad_norm": 0.11009612679481506, "learning_rate": 0.00018649445743923525, "loss": 0.9364, "step": 670 }, { "epoch": 0.1364098393982517, "grad_norm": 0.13966090977191925, "learning_rate": 0.00018647411776670397, "loss": 1.2463, "step": 671 }, { "epoch": 0.13661313275055906, "grad_norm": 0.12319371849298477, "learning_rate": 0.00018645377809417267, "loss": 1.1192, "step": 672 }, { "epoch": 0.13681642610286643, "grad_norm": 0.13469716906547546, "learning_rate": 0.00018643343842164142, "loss": 1.2376, "step": 673 }, { "epoch": 0.13701971945517383, "grad_norm": 0.124245285987854, "learning_rate": 0.00018641309874911015, "loss": 1.1145, "step": 674 }, { "epoch": 0.1372230128074812, "grad_norm": 0.1325312852859497, "learning_rate": 0.00018639275907657887, "loss": 1.1453, "step": 675 }, { "epoch": 0.13742630615978857, "grad_norm": 0.13344690203666687, "learning_rate": 0.0001863724194040476, "loss": 1.2191, "step": 676 }, { "epoch": 0.13762959951209597, "grad_norm": 0.1301363855600357, "learning_rate": 0.00018635207973151635, "loss": 0.9982, "step": 677 }, { "epoch": 0.13783289286440334, "grad_norm": 0.10880762338638306, "learning_rate": 0.00018633174005898507, "loss": 0.8772, "step": 678 }, { "epoch": 0.1380361862167107, "grad_norm": 0.13281653821468353, "learning_rate": 0.0001863114003864538, "loss": 1.0529, "step": 679 }, { "epoch": 0.13823947956901808, "grad_norm": 0.13998745381832123, "learning_rate": 0.0001862910607139225, "loss": 1.0996, "step": 680 }, { "epoch": 0.13844277292132548, "grad_norm": 0.1195378452539444, "learning_rate": 0.00018627072104139124, "loss": 0.9776, "step": 681 }, { "epoch": 0.13864606627363285, "grad_norm": 0.10932020843029022, "learning_rate": 0.00018625038136885997, "loss": 1.1026, "step": 682 }, { "epoch": 0.13884935962594022, "grad_norm": 0.1420464664697647, "learning_rate": 0.0001862300416963287, "loss": 1.1428, "step": 683 }, { "epoch": 0.13905265297824762, "grad_norm": 0.11747555434703827, "learning_rate": 0.00018620970202379742, "loss": 0.9985, "step": 684 }, { "epoch": 0.139255946330555, "grad_norm": 0.11964225023984909, "learning_rate": 0.00018618936235126617, "loss": 1.0268, "step": 685 }, { "epoch": 0.13945923968286236, "grad_norm": 0.11939354985952377, "learning_rate": 0.0001861690226787349, "loss": 0.993, "step": 686 }, { "epoch": 0.13966253303516976, "grad_norm": 0.14188724756240845, "learning_rate": 0.00018614868300620362, "loss": 1.0672, "step": 687 }, { "epoch": 0.13986582638747713, "grad_norm": 0.12218412756919861, "learning_rate": 0.00018612834333367231, "loss": 1.0664, "step": 688 }, { "epoch": 0.1400691197397845, "grad_norm": 0.12363380193710327, "learning_rate": 0.00018610800366114107, "loss": 1.1202, "step": 689 }, { "epoch": 0.1402724130920919, "grad_norm": 0.12523901462554932, "learning_rate": 0.0001860876639886098, "loss": 0.9601, "step": 690 }, { "epoch": 0.14047570644439927, "grad_norm": 0.1359613537788391, "learning_rate": 0.00018606732431607852, "loss": 1.2674, "step": 691 }, { "epoch": 0.14067899979670664, "grad_norm": 0.12229263782501221, "learning_rate": 0.00018604698464354724, "loss": 1.0817, "step": 692 }, { "epoch": 0.14088229314901402, "grad_norm": 0.12188601493835449, "learning_rate": 0.000186026644971016, "loss": 1.1176, "step": 693 }, { "epoch": 0.1410855865013214, "grad_norm": 0.10588016360998154, "learning_rate": 0.00018600630529848472, "loss": 0.9546, "step": 694 }, { "epoch": 0.14128887985362878, "grad_norm": 0.11985071748495102, "learning_rate": 0.00018598596562595344, "loss": 1.0765, "step": 695 }, { "epoch": 0.14149217320593616, "grad_norm": 0.13118812441825867, "learning_rate": 0.00018596562595342216, "loss": 1.0117, "step": 696 }, { "epoch": 0.14169546655824355, "grad_norm": 0.11992435902357101, "learning_rate": 0.0001859452862808909, "loss": 0.9618, "step": 697 }, { "epoch": 0.14189875991055093, "grad_norm": 0.11617527902126312, "learning_rate": 0.00018592494660835961, "loss": 1.0459, "step": 698 }, { "epoch": 0.1421020532628583, "grad_norm": 0.12465415149927139, "learning_rate": 0.00018590460693582834, "loss": 1.0635, "step": 699 }, { "epoch": 0.1423053466151657, "grad_norm": 0.12672793865203857, "learning_rate": 0.00018588426726329706, "loss": 1.26, "step": 700 }, { "epoch": 0.14250863996747307, "grad_norm": 0.12191738188266754, "learning_rate": 0.00018586392759076581, "loss": 0.9333, "step": 701 }, { "epoch": 0.14271193331978044, "grad_norm": 0.13285742700099945, "learning_rate": 0.00018584358791823454, "loss": 1.2199, "step": 702 }, { "epoch": 0.14291522667208784, "grad_norm": 0.11525557935237885, "learning_rate": 0.00018582324824570326, "loss": 1.1212, "step": 703 }, { "epoch": 0.1431185200243952, "grad_norm": 0.12379605323076248, "learning_rate": 0.000185802908573172, "loss": 0.9767, "step": 704 }, { "epoch": 0.14332181337670258, "grad_norm": 0.13637319207191467, "learning_rate": 0.0001857825689006407, "loss": 1.1399, "step": 705 }, { "epoch": 0.14352510672900995, "grad_norm": 0.12638236582279205, "learning_rate": 0.00018576222922810944, "loss": 1.2348, "step": 706 }, { "epoch": 0.14372840008131735, "grad_norm": 0.11840532720088959, "learning_rate": 0.00018574188955557816, "loss": 1.0475, "step": 707 }, { "epoch": 0.14393169343362472, "grad_norm": 0.11270745098590851, "learning_rate": 0.00018572154988304689, "loss": 1.0503, "step": 708 }, { "epoch": 0.1441349867859321, "grad_norm": 0.12445101141929626, "learning_rate": 0.00018570121021051564, "loss": 1.0658, "step": 709 }, { "epoch": 0.1443382801382395, "grad_norm": 0.11352977156639099, "learning_rate": 0.00018568087053798436, "loss": 0.9473, "step": 710 }, { "epoch": 0.14454157349054686, "grad_norm": 0.11230108141899109, "learning_rate": 0.00018566053086545309, "loss": 1.0519, "step": 711 }, { "epoch": 0.14474486684285423, "grad_norm": 0.14274398982524872, "learning_rate": 0.0001856401911929218, "loss": 1.1135, "step": 712 }, { "epoch": 0.14494816019516163, "grad_norm": 0.11553295701742172, "learning_rate": 0.0001856198515203905, "loss": 1.044, "step": 713 }, { "epoch": 0.145151453547469, "grad_norm": 0.11737996339797974, "learning_rate": 0.00018559951184785926, "loss": 1.0154, "step": 714 }, { "epoch": 0.14535474689977637, "grad_norm": 0.1481630802154541, "learning_rate": 0.00018557917217532798, "loss": 1.1544, "step": 715 }, { "epoch": 0.14555804025208377, "grad_norm": 0.12081188708543777, "learning_rate": 0.0001855588325027967, "loss": 1.0034, "step": 716 }, { "epoch": 0.14576133360439114, "grad_norm": 0.13458681106567383, "learning_rate": 0.00018553849283026543, "loss": 1.1627, "step": 717 }, { "epoch": 0.1459646269566985, "grad_norm": 0.13506878912448883, "learning_rate": 0.00018551815315773418, "loss": 1.1927, "step": 718 }, { "epoch": 0.14616792030900588, "grad_norm": 0.10834948718547821, "learning_rate": 0.0001854978134852029, "loss": 1.0943, "step": 719 }, { "epoch": 0.14637121366131328, "grad_norm": 0.13779957592487335, "learning_rate": 0.00018547747381267163, "loss": 1.2356, "step": 720 }, { "epoch": 0.14657450701362065, "grad_norm": 0.12655863165855408, "learning_rate": 0.00018545713414014033, "loss": 1.1085, "step": 721 }, { "epoch": 0.14677780036592802, "grad_norm": 0.1144525483250618, "learning_rate": 0.00018543679446760908, "loss": 1.0517, "step": 722 }, { "epoch": 0.14698109371823542, "grad_norm": 0.12001293152570724, "learning_rate": 0.0001854164547950778, "loss": 1.1439, "step": 723 }, { "epoch": 0.1471843870705428, "grad_norm": 0.12786982953548431, "learning_rate": 0.00018539611512254653, "loss": 1.1846, "step": 724 }, { "epoch": 0.14738768042285016, "grad_norm": 0.1154879704117775, "learning_rate": 0.00018537577545001526, "loss": 0.941, "step": 725 }, { "epoch": 0.14759097377515756, "grad_norm": 0.10635704547166824, "learning_rate": 0.000185355435777484, "loss": 0.915, "step": 726 }, { "epoch": 0.14779426712746493, "grad_norm": 0.11456220597028732, "learning_rate": 0.00018533509610495273, "loss": 1.0387, "step": 727 }, { "epoch": 0.1479975604797723, "grad_norm": 0.11217451840639114, "learning_rate": 0.00018531475643242146, "loss": 1.0938, "step": 728 }, { "epoch": 0.1482008538320797, "grad_norm": 0.1105191633105278, "learning_rate": 0.00018529441675989015, "loss": 1.0398, "step": 729 }, { "epoch": 0.14840414718438708, "grad_norm": 0.11848670989274979, "learning_rate": 0.0001852740770873589, "loss": 1.04, "step": 730 }, { "epoch": 0.14860744053669445, "grad_norm": 0.11965551227331161, "learning_rate": 0.00018525373741482763, "loss": 0.966, "step": 731 }, { "epoch": 0.14881073388900182, "grad_norm": 0.12252170592546463, "learning_rate": 0.00018523339774229635, "loss": 1.1997, "step": 732 }, { "epoch": 0.14901402724130922, "grad_norm": 0.11600001901388168, "learning_rate": 0.00018521305806976508, "loss": 1.2425, "step": 733 }, { "epoch": 0.1492173205936166, "grad_norm": 0.11161402612924576, "learning_rate": 0.00018519271839723383, "loss": 0.9978, "step": 734 }, { "epoch": 0.14942061394592396, "grad_norm": 0.12365563958883286, "learning_rate": 0.00018517237872470255, "loss": 0.9652, "step": 735 }, { "epoch": 0.14962390729823136, "grad_norm": 0.11252112686634064, "learning_rate": 0.00018515203905217128, "loss": 0.948, "step": 736 }, { "epoch": 0.14982720065053873, "grad_norm": 0.12211350351572037, "learning_rate": 0.00018513169937963998, "loss": 1.0636, "step": 737 }, { "epoch": 0.1500304940028461, "grad_norm": 0.13200169801712036, "learning_rate": 0.00018511135970710873, "loss": 1.158, "step": 738 }, { "epoch": 0.1502337873551535, "grad_norm": 0.11223406344652176, "learning_rate": 0.00018509102003457745, "loss": 1.1194, "step": 739 }, { "epoch": 0.15043708070746087, "grad_norm": 0.11996794492006302, "learning_rate": 0.00018507068036204618, "loss": 1.0485, "step": 740 }, { "epoch": 0.15064037405976824, "grad_norm": 0.13017338514328003, "learning_rate": 0.0001850503406895149, "loss": 1.1304, "step": 741 }, { "epoch": 0.15084366741207564, "grad_norm": 0.1273190826177597, "learning_rate": 0.00018503000101698365, "loss": 1.0937, "step": 742 }, { "epoch": 0.151046960764383, "grad_norm": 0.1322571486234665, "learning_rate": 0.00018500966134445238, "loss": 1.1364, "step": 743 }, { "epoch": 0.15125025411669038, "grad_norm": 0.12314455956220627, "learning_rate": 0.0001849893216719211, "loss": 1.0005, "step": 744 }, { "epoch": 0.15145354746899775, "grad_norm": 0.1126449927687645, "learning_rate": 0.0001849689819993898, "loss": 1.0231, "step": 745 }, { "epoch": 0.15165684082130515, "grad_norm": 0.12586358189582825, "learning_rate": 0.00018494864232685855, "loss": 1.0816, "step": 746 }, { "epoch": 0.15186013417361252, "grad_norm": 0.09933953732252121, "learning_rate": 0.00018492830265432727, "loss": 0.8666, "step": 747 }, { "epoch": 0.1520634275259199, "grad_norm": 0.12422667443752289, "learning_rate": 0.000184907962981796, "loss": 1.0502, "step": 748 }, { "epoch": 0.1522667208782273, "grad_norm": 0.12274408340454102, "learning_rate": 0.00018488762330926472, "loss": 1.1445, "step": 749 }, { "epoch": 0.15247001423053466, "grad_norm": 0.1317015141248703, "learning_rate": 0.00018486728363673348, "loss": 1.2226, "step": 750 }, { "epoch": 0.15267330758284203, "grad_norm": 0.1201949417591095, "learning_rate": 0.0001848469439642022, "loss": 0.9285, "step": 751 }, { "epoch": 0.15287660093514943, "grad_norm": 0.11115135997533798, "learning_rate": 0.00018482660429167092, "loss": 1.1262, "step": 752 }, { "epoch": 0.1530798942874568, "grad_norm": 0.11809299886226654, "learning_rate": 0.00018480626461913965, "loss": 1.0792, "step": 753 }, { "epoch": 0.15328318763976417, "grad_norm": 0.14711928367614746, "learning_rate": 0.00018478592494660835, "loss": 1.1647, "step": 754 }, { "epoch": 0.15348648099207157, "grad_norm": 0.12082501500844955, "learning_rate": 0.0001847655852740771, "loss": 1.1866, "step": 755 }, { "epoch": 0.15368977434437894, "grad_norm": 0.1093011349439621, "learning_rate": 0.00018474524560154582, "loss": 0.9978, "step": 756 }, { "epoch": 0.15389306769668631, "grad_norm": 0.11525548994541168, "learning_rate": 0.00018472490592901455, "loss": 0.9134, "step": 757 }, { "epoch": 0.15409636104899369, "grad_norm": 0.12464176118373871, "learning_rate": 0.00018470456625648327, "loss": 1.0974, "step": 758 }, { "epoch": 0.15429965440130108, "grad_norm": 0.11930055171251297, "learning_rate": 0.00018468422658395202, "loss": 0.8953, "step": 759 }, { "epoch": 0.15450294775360846, "grad_norm": 0.12347722053527832, "learning_rate": 0.00018466388691142075, "loss": 1.0212, "step": 760 }, { "epoch": 0.15470624110591583, "grad_norm": 0.1258956342935562, "learning_rate": 0.00018464354723888947, "loss": 1.2616, "step": 761 }, { "epoch": 0.15490953445822322, "grad_norm": 0.12692275643348694, "learning_rate": 0.00018462320756635817, "loss": 1.1994, "step": 762 }, { "epoch": 0.1551128278105306, "grad_norm": 0.13774073123931885, "learning_rate": 0.00018460286789382692, "loss": 1.2109, "step": 763 }, { "epoch": 0.15531612116283797, "grad_norm": 0.12587130069732666, "learning_rate": 0.00018458252822129564, "loss": 1.1059, "step": 764 }, { "epoch": 0.15551941451514537, "grad_norm": 0.13462059199810028, "learning_rate": 0.00018456218854876437, "loss": 1.0648, "step": 765 }, { "epoch": 0.15572270786745274, "grad_norm": 0.1329740285873413, "learning_rate": 0.0001845418488762331, "loss": 1.264, "step": 766 }, { "epoch": 0.1559260012197601, "grad_norm": 0.12275559455156326, "learning_rate": 0.00018452150920370185, "loss": 0.9893, "step": 767 }, { "epoch": 0.1561292945720675, "grad_norm": 0.12821702659130096, "learning_rate": 0.00018450116953117057, "loss": 1.0681, "step": 768 }, { "epoch": 0.15633258792437488, "grad_norm": 0.11758620291948318, "learning_rate": 0.0001844808298586393, "loss": 1.0476, "step": 769 }, { "epoch": 0.15653588127668225, "grad_norm": 0.11491292715072632, "learning_rate": 0.000184460490186108, "loss": 1.1428, "step": 770 }, { "epoch": 0.15673917462898962, "grad_norm": 0.12064868956804276, "learning_rate": 0.00018444015051357674, "loss": 0.9565, "step": 771 }, { "epoch": 0.15694246798129702, "grad_norm": 0.12319160997867584, "learning_rate": 0.00018441981084104547, "loss": 1.0593, "step": 772 }, { "epoch": 0.1571457613336044, "grad_norm": 0.13514620065689087, "learning_rate": 0.0001843994711685142, "loss": 1.1908, "step": 773 }, { "epoch": 0.15734905468591176, "grad_norm": 0.1343378722667694, "learning_rate": 0.00018437913149598292, "loss": 1.2193, "step": 774 }, { "epoch": 0.15755234803821916, "grad_norm": 0.13351817429065704, "learning_rate": 0.00018435879182345167, "loss": 1.1141, "step": 775 }, { "epoch": 0.15775564139052653, "grad_norm": 0.11843458563089371, "learning_rate": 0.0001843384521509204, "loss": 1.1933, "step": 776 }, { "epoch": 0.1579589347428339, "grad_norm": 0.12293927371501923, "learning_rate": 0.00018431811247838912, "loss": 1.0682, "step": 777 }, { "epoch": 0.1581622280951413, "grad_norm": 0.11566301435232162, "learning_rate": 0.00018429777280585781, "loss": 1.1093, "step": 778 }, { "epoch": 0.15836552144744867, "grad_norm": 0.11641670763492584, "learning_rate": 0.00018427743313332657, "loss": 1.2028, "step": 779 }, { "epoch": 0.15856881479975604, "grad_norm": 0.14020314812660217, "learning_rate": 0.0001842570934607953, "loss": 1.0472, "step": 780 }, { "epoch": 0.15877210815206344, "grad_norm": 0.11766766011714935, "learning_rate": 0.00018423675378826401, "loss": 0.9908, "step": 781 }, { "epoch": 0.1589754015043708, "grad_norm": 0.14530715346336365, "learning_rate": 0.00018421641411573274, "loss": 1.2046, "step": 782 }, { "epoch": 0.15917869485667818, "grad_norm": 0.12271513789892197, "learning_rate": 0.0001841960744432015, "loss": 1.1401, "step": 783 }, { "epoch": 0.15938198820898555, "grad_norm": 0.12754741311073303, "learning_rate": 0.00018417573477067022, "loss": 1.2811, "step": 784 }, { "epoch": 0.15958528156129295, "grad_norm": 0.10751698166131973, "learning_rate": 0.00018415539509813894, "loss": 0.9566, "step": 785 }, { "epoch": 0.15978857491360032, "grad_norm": 0.12434156984090805, "learning_rate": 0.00018413505542560764, "loss": 1.2307, "step": 786 }, { "epoch": 0.1599918682659077, "grad_norm": 0.1130242571234703, "learning_rate": 0.0001841147157530764, "loss": 1.0406, "step": 787 }, { "epoch": 0.1601951616182151, "grad_norm": 0.12631991505622864, "learning_rate": 0.0001840943760805451, "loss": 1.0835, "step": 788 }, { "epoch": 0.16039845497052246, "grad_norm": 0.11642556637525558, "learning_rate": 0.00018407403640801384, "loss": 0.9743, "step": 789 }, { "epoch": 0.16060174832282983, "grad_norm": 0.1119033470749855, "learning_rate": 0.00018405369673548256, "loss": 1.1377, "step": 790 }, { "epoch": 0.16080504167513723, "grad_norm": 0.14675219357013702, "learning_rate": 0.00018403335706295131, "loss": 1.2846, "step": 791 }, { "epoch": 0.1610083350274446, "grad_norm": 0.1238279864192009, "learning_rate": 0.00018401301739042004, "loss": 1.1033, "step": 792 }, { "epoch": 0.16121162837975198, "grad_norm": 0.12538330256938934, "learning_rate": 0.00018399267771788876, "loss": 1.2344, "step": 793 }, { "epoch": 0.16141492173205937, "grad_norm": 0.11384537816047668, "learning_rate": 0.00018397233804535746, "loss": 1.0143, "step": 794 }, { "epoch": 0.16161821508436675, "grad_norm": 0.1444682627916336, "learning_rate": 0.00018395199837282618, "loss": 1.2364, "step": 795 }, { "epoch": 0.16182150843667412, "grad_norm": 0.12999016046524048, "learning_rate": 0.00018393165870029494, "loss": 1.1853, "step": 796 }, { "epoch": 0.1620248017889815, "grad_norm": 0.12258971482515335, "learning_rate": 0.00018391131902776366, "loss": 1.2673, "step": 797 }, { "epoch": 0.16222809514128889, "grad_norm": 0.13033455610275269, "learning_rate": 0.00018389097935523238, "loss": 0.8922, "step": 798 }, { "epoch": 0.16243138849359626, "grad_norm": 0.14746494591236115, "learning_rate": 0.0001838706396827011, "loss": 1.2164, "step": 799 }, { "epoch": 0.16263468184590363, "grad_norm": 0.12869805097579956, "learning_rate": 0.00018385030001016986, "loss": 1.1788, "step": 800 }, { "epoch": 0.16283797519821103, "grad_norm": 0.11467185616493225, "learning_rate": 0.00018382996033763859, "loss": 0.9527, "step": 801 }, { "epoch": 0.1630412685505184, "grad_norm": 0.129184752702713, "learning_rate": 0.00018380962066510728, "loss": 1.1758, "step": 802 }, { "epoch": 0.16324456190282577, "grad_norm": 0.11696959286928177, "learning_rate": 0.000183789280992576, "loss": 1.03, "step": 803 }, { "epoch": 0.16344785525513317, "grad_norm": 0.13689257204532623, "learning_rate": 0.00018376894132004476, "loss": 1.2516, "step": 804 }, { "epoch": 0.16365114860744054, "grad_norm": 0.11370982229709625, "learning_rate": 0.00018374860164751348, "loss": 1.0484, "step": 805 }, { "epoch": 0.1638544419597479, "grad_norm": 0.13201859593391418, "learning_rate": 0.0001837282619749822, "loss": 1.0903, "step": 806 }, { "epoch": 0.1640577353120553, "grad_norm": 0.10468725860118866, "learning_rate": 0.00018370792230245093, "loss": 0.9548, "step": 807 }, { "epoch": 0.16426102866436268, "grad_norm": 0.14737223088741302, "learning_rate": 0.00018368758262991968, "loss": 1.1607, "step": 808 }, { "epoch": 0.16446432201667005, "grad_norm": 0.11500222235918045, "learning_rate": 0.0001836672429573884, "loss": 1.1032, "step": 809 }, { "epoch": 0.16466761536897742, "grad_norm": 0.12849587202072144, "learning_rate": 0.00018364690328485713, "loss": 1.255, "step": 810 }, { "epoch": 0.16487090872128482, "grad_norm": 0.10878688842058182, "learning_rate": 0.00018362656361232583, "loss": 1.1075, "step": 811 }, { "epoch": 0.1650742020735922, "grad_norm": 0.10878860950469971, "learning_rate": 0.00018360622393979458, "loss": 1.0629, "step": 812 }, { "epoch": 0.16527749542589956, "grad_norm": 0.1280430108308792, "learning_rate": 0.0001835858842672633, "loss": 1.1377, "step": 813 }, { "epoch": 0.16548078877820696, "grad_norm": 0.11831233650445938, "learning_rate": 0.00018356554459473203, "loss": 1.0786, "step": 814 }, { "epoch": 0.16568408213051433, "grad_norm": 0.11453156918287277, "learning_rate": 0.00018354520492220075, "loss": 1.0477, "step": 815 }, { "epoch": 0.1658873754828217, "grad_norm": 0.13597573339939117, "learning_rate": 0.0001835248652496695, "loss": 1.1807, "step": 816 }, { "epoch": 0.1660906688351291, "grad_norm": 0.12008185684680939, "learning_rate": 0.00018350452557713823, "loss": 1.0676, "step": 817 }, { "epoch": 0.16629396218743647, "grad_norm": 0.1363888829946518, "learning_rate": 0.00018348418590460696, "loss": 1.1582, "step": 818 }, { "epoch": 0.16649725553974384, "grad_norm": 0.11310733109712601, "learning_rate": 0.00018346384623207565, "loss": 1.0931, "step": 819 }, { "epoch": 0.16670054889205124, "grad_norm": 0.13503344357013702, "learning_rate": 0.0001834435065595444, "loss": 1.1465, "step": 820 }, { "epoch": 0.1669038422443586, "grad_norm": 0.12744784355163574, "learning_rate": 0.00018342316688701313, "loss": 1.1662, "step": 821 }, { "epoch": 0.16710713559666598, "grad_norm": 0.13695518672466278, "learning_rate": 0.00018340282721448185, "loss": 1.1846, "step": 822 }, { "epoch": 0.16731042894897336, "grad_norm": 0.12580302357673645, "learning_rate": 0.00018338248754195058, "loss": 0.93, "step": 823 }, { "epoch": 0.16751372230128075, "grad_norm": 0.12266777455806732, "learning_rate": 0.00018336214786941933, "loss": 1.1033, "step": 824 }, { "epoch": 0.16771701565358813, "grad_norm": 0.1129806861281395, "learning_rate": 0.00018334180819688805, "loss": 1.0517, "step": 825 }, { "epoch": 0.1679203090058955, "grad_norm": 0.12590476870536804, "learning_rate": 0.00018332146852435678, "loss": 1.0374, "step": 826 }, { "epoch": 0.1681236023582029, "grad_norm": 0.12631377577781677, "learning_rate": 0.00018330112885182548, "loss": 1.1898, "step": 827 }, { "epoch": 0.16832689571051027, "grad_norm": 0.13719779253005981, "learning_rate": 0.00018328078917929423, "loss": 1.1108, "step": 828 }, { "epoch": 0.16853018906281764, "grad_norm": 0.12414206564426422, "learning_rate": 0.00018326044950676295, "loss": 1.1654, "step": 829 }, { "epoch": 0.16873348241512504, "grad_norm": 0.12075278162956238, "learning_rate": 0.00018324010983423168, "loss": 1.0255, "step": 830 }, { "epoch": 0.1689367757674324, "grad_norm": 0.11906860023736954, "learning_rate": 0.0001832197701617004, "loss": 1.0433, "step": 831 }, { "epoch": 0.16914006911973978, "grad_norm": 0.11960665136575699, "learning_rate": 0.00018319943048916915, "loss": 0.9501, "step": 832 }, { "epoch": 0.16934336247204718, "grad_norm": 0.1228812113404274, "learning_rate": 0.00018317909081663788, "loss": 1.002, "step": 833 }, { "epoch": 0.16954665582435455, "grad_norm": 0.12420972436666489, "learning_rate": 0.0001831587511441066, "loss": 1.062, "step": 834 }, { "epoch": 0.16974994917666192, "grad_norm": 0.11490360647439957, "learning_rate": 0.0001831384114715753, "loss": 0.9708, "step": 835 }, { "epoch": 0.1699532425289693, "grad_norm": 0.11945214867591858, "learning_rate": 0.00018311807179904402, "loss": 1.1042, "step": 836 }, { "epoch": 0.1701565358812767, "grad_norm": 0.1234474778175354, "learning_rate": 0.00018309773212651277, "loss": 1.0258, "step": 837 }, { "epoch": 0.17035982923358406, "grad_norm": 0.12447863817214966, "learning_rate": 0.0001830773924539815, "loss": 1.1132, "step": 838 }, { "epoch": 0.17056312258589143, "grad_norm": 0.1321963667869568, "learning_rate": 0.00018305705278145022, "loss": 1.1835, "step": 839 }, { "epoch": 0.17076641593819883, "grad_norm": 0.12708254158496857, "learning_rate": 0.00018303671310891895, "loss": 1.1787, "step": 840 }, { "epoch": 0.1709697092905062, "grad_norm": 0.11481820046901703, "learning_rate": 0.0001830163734363877, "loss": 0.8837, "step": 841 }, { "epoch": 0.17117300264281357, "grad_norm": 0.11851567029953003, "learning_rate": 0.00018299603376385642, "loss": 0.9516, "step": 842 }, { "epoch": 0.17137629599512097, "grad_norm": 0.13182471692562103, "learning_rate": 0.00018297569409132512, "loss": 1.1809, "step": 843 }, { "epoch": 0.17157958934742834, "grad_norm": 0.12840509414672852, "learning_rate": 0.00018295535441879385, "loss": 1.0557, "step": 844 }, { "epoch": 0.1717828826997357, "grad_norm": 0.11280561983585358, "learning_rate": 0.0001829350147462626, "loss": 1.0737, "step": 845 }, { "epoch": 0.1719861760520431, "grad_norm": 0.13144554197788239, "learning_rate": 0.00018291467507373132, "loss": 1.0275, "step": 846 }, { "epoch": 0.17218946940435048, "grad_norm": 0.1224883422255516, "learning_rate": 0.00018289433540120005, "loss": 1.1558, "step": 847 }, { "epoch": 0.17239276275665785, "grad_norm": 0.1263243854045868, "learning_rate": 0.00018287399572866877, "loss": 0.9381, "step": 848 }, { "epoch": 0.17259605610896522, "grad_norm": 0.13391436636447906, "learning_rate": 0.00018285365605613752, "loss": 1.2548, "step": 849 }, { "epoch": 0.17279934946127262, "grad_norm": 0.12166419625282288, "learning_rate": 0.00018283331638360625, "loss": 1.0981, "step": 850 }, { "epoch": 0.17300264281358, "grad_norm": 0.13190463185310364, "learning_rate": 0.00018281297671107494, "loss": 1.1847, "step": 851 }, { "epoch": 0.17320593616588736, "grad_norm": 0.11678186804056168, "learning_rate": 0.00018279263703854367, "loss": 1.0303, "step": 852 }, { "epoch": 0.17340922951819476, "grad_norm": 0.11716858297586441, "learning_rate": 0.00018277229736601242, "loss": 0.9274, "step": 853 }, { "epoch": 0.17361252287050213, "grad_norm": 0.1340217888355255, "learning_rate": 0.00018275195769348114, "loss": 1.0179, "step": 854 }, { "epoch": 0.1738158162228095, "grad_norm": 0.12650153040885925, "learning_rate": 0.00018273161802094987, "loss": 1.0234, "step": 855 }, { "epoch": 0.1740191095751169, "grad_norm": 0.1294967234134674, "learning_rate": 0.0001827112783484186, "loss": 1.2539, "step": 856 }, { "epoch": 0.17422240292742427, "grad_norm": 0.13714881241321564, "learning_rate": 0.00018269093867588734, "loss": 1.0106, "step": 857 }, { "epoch": 0.17442569627973165, "grad_norm": 0.12365014851093292, "learning_rate": 0.00018267059900335607, "loss": 1.1184, "step": 858 }, { "epoch": 0.17462898963203904, "grad_norm": 0.11030489951372147, "learning_rate": 0.00018265025933082477, "loss": 0.9478, "step": 859 }, { "epoch": 0.17483228298434642, "grad_norm": 0.1181483343243599, "learning_rate": 0.0001826299196582935, "loss": 1.0861, "step": 860 }, { "epoch": 0.1750355763366538, "grad_norm": 0.12873612344264984, "learning_rate": 0.00018260957998576224, "loss": 0.9811, "step": 861 }, { "epoch": 0.17523886968896116, "grad_norm": 0.11688394844532013, "learning_rate": 0.00018258924031323097, "loss": 1.1643, "step": 862 }, { "epoch": 0.17544216304126856, "grad_norm": 0.12729796767234802, "learning_rate": 0.0001825689006406997, "loss": 1.0692, "step": 863 }, { "epoch": 0.17564545639357593, "grad_norm": 0.12474660575389862, "learning_rate": 0.00018254856096816842, "loss": 1.2838, "step": 864 }, { "epoch": 0.1758487497458833, "grad_norm": 0.12324024736881256, "learning_rate": 0.00018252822129563717, "loss": 1.0029, "step": 865 }, { "epoch": 0.1760520430981907, "grad_norm": 0.13511407375335693, "learning_rate": 0.0001825078816231059, "loss": 1.1398, "step": 866 }, { "epoch": 0.17625533645049807, "grad_norm": 0.13292032480239868, "learning_rate": 0.0001824875419505746, "loss": 1.3107, "step": 867 }, { "epoch": 0.17645862980280544, "grad_norm": 0.12073294073343277, "learning_rate": 0.00018246720227804331, "loss": 1.1293, "step": 868 }, { "epoch": 0.17666192315511284, "grad_norm": 0.11789250373840332, "learning_rate": 0.00018244686260551207, "loss": 1.0462, "step": 869 }, { "epoch": 0.1768652165074202, "grad_norm": 0.1194562166929245, "learning_rate": 0.0001824265229329808, "loss": 1.0017, "step": 870 }, { "epoch": 0.17706850985972758, "grad_norm": 0.10480080544948578, "learning_rate": 0.00018240618326044951, "loss": 0.8659, "step": 871 }, { "epoch": 0.17727180321203498, "grad_norm": 0.1207701787352562, "learning_rate": 0.00018238584358791824, "loss": 0.9937, "step": 872 }, { "epoch": 0.17747509656434235, "grad_norm": 0.1190091222524643, "learning_rate": 0.000182365503915387, "loss": 1.0437, "step": 873 }, { "epoch": 0.17767838991664972, "grad_norm": 0.1277458369731903, "learning_rate": 0.00018234516424285572, "loss": 1.2392, "step": 874 }, { "epoch": 0.1778816832689571, "grad_norm": 0.12237963080406189, "learning_rate": 0.00018232482457032444, "loss": 1.1032, "step": 875 }, { "epoch": 0.1780849766212645, "grad_norm": 0.1319531798362732, "learning_rate": 0.00018230448489779314, "loss": 1.2012, "step": 876 }, { "epoch": 0.17828826997357186, "grad_norm": 0.11914216727018356, "learning_rate": 0.0001822841452252619, "loss": 1.0272, "step": 877 }, { "epoch": 0.17849156332587923, "grad_norm": 0.14588242769241333, "learning_rate": 0.0001822638055527306, "loss": 1.357, "step": 878 }, { "epoch": 0.17869485667818663, "grad_norm": 0.11982700973749161, "learning_rate": 0.00018224346588019934, "loss": 1.049, "step": 879 }, { "epoch": 0.178898150030494, "grad_norm": 0.12529560923576355, "learning_rate": 0.00018222312620766806, "loss": 1.0713, "step": 880 }, { "epoch": 0.17910144338280137, "grad_norm": 0.1316487044095993, "learning_rate": 0.00018220278653513679, "loss": 1.1749, "step": 881 }, { "epoch": 0.17930473673510877, "grad_norm": 0.12096232175827026, "learning_rate": 0.00018218244686260554, "loss": 1.2104, "step": 882 }, { "epoch": 0.17950803008741614, "grad_norm": 0.1313014030456543, "learning_rate": 0.00018216210719007426, "loss": 1.0554, "step": 883 }, { "epoch": 0.1797113234397235, "grad_norm": 0.1309378743171692, "learning_rate": 0.00018214176751754296, "loss": 1.2152, "step": 884 }, { "epoch": 0.1799146167920309, "grad_norm": 0.1286410242319107, "learning_rate": 0.00018212142784501168, "loss": 1.0922, "step": 885 }, { "epoch": 0.18011791014433828, "grad_norm": 0.12893226742744446, "learning_rate": 0.00018210108817248044, "loss": 1.1969, "step": 886 }, { "epoch": 0.18032120349664565, "grad_norm": 0.11664584279060364, "learning_rate": 0.00018208074849994916, "loss": 1.0085, "step": 887 }, { "epoch": 0.18052449684895303, "grad_norm": 0.10973158478736877, "learning_rate": 0.00018206040882741788, "loss": 0.9548, "step": 888 }, { "epoch": 0.18072779020126042, "grad_norm": 0.11281079053878784, "learning_rate": 0.0001820400691548866, "loss": 0.8521, "step": 889 }, { "epoch": 0.1809310835535678, "grad_norm": 0.12198197096586227, "learning_rate": 0.00018201972948235536, "loss": 1.0537, "step": 890 }, { "epoch": 0.18113437690587517, "grad_norm": 0.09405733644962311, "learning_rate": 0.00018199938980982409, "loss": 0.7193, "step": 891 }, { "epoch": 0.18133767025818257, "grad_norm": 0.13503974676132202, "learning_rate": 0.00018197905013729278, "loss": 1.1564, "step": 892 }, { "epoch": 0.18154096361048994, "grad_norm": 0.1322106271982193, "learning_rate": 0.0001819587104647615, "loss": 1.0733, "step": 893 }, { "epoch": 0.1817442569627973, "grad_norm": 0.12791374325752258, "learning_rate": 0.00018193837079223026, "loss": 1.0701, "step": 894 }, { "epoch": 0.1819475503151047, "grad_norm": 0.12342046946287155, "learning_rate": 0.00018191803111969898, "loss": 1.1255, "step": 895 }, { "epoch": 0.18215084366741208, "grad_norm": 0.12089495360851288, "learning_rate": 0.0001818976914471677, "loss": 1.0177, "step": 896 }, { "epoch": 0.18235413701971945, "grad_norm": 0.12383720278739929, "learning_rate": 0.00018187735177463643, "loss": 1.0188, "step": 897 }, { "epoch": 0.18255743037202685, "grad_norm": 0.12089379876852036, "learning_rate": 0.00018185701210210518, "loss": 1.1106, "step": 898 }, { "epoch": 0.18276072372433422, "grad_norm": 0.12939763069152832, "learning_rate": 0.0001818366724295739, "loss": 1.1939, "step": 899 }, { "epoch": 0.1829640170766416, "grad_norm": 0.14534543454647064, "learning_rate": 0.0001818163327570426, "loss": 1.252, "step": 900 }, { "epoch": 0.18316731042894896, "grad_norm": 0.13002236187458038, "learning_rate": 0.00018179599308451133, "loss": 0.9607, "step": 901 }, { "epoch": 0.18337060378125636, "grad_norm": 0.11892438679933548, "learning_rate": 0.00018177565341198008, "loss": 0.9641, "step": 902 }, { "epoch": 0.18357389713356373, "grad_norm": 0.11869879812002182, "learning_rate": 0.0001817553137394488, "loss": 0.886, "step": 903 }, { "epoch": 0.1837771904858711, "grad_norm": 0.11826761066913605, "learning_rate": 0.00018173497406691753, "loss": 1.2055, "step": 904 }, { "epoch": 0.1839804838381785, "grad_norm": 0.1275918185710907, "learning_rate": 0.00018171463439438625, "loss": 1.0468, "step": 905 }, { "epoch": 0.18418377719048587, "grad_norm": 0.12289033085107803, "learning_rate": 0.000181694294721855, "loss": 1.1464, "step": 906 }, { "epoch": 0.18438707054279324, "grad_norm": 0.11647521704435349, "learning_rate": 0.00018167395504932373, "loss": 0.8912, "step": 907 }, { "epoch": 0.18459036389510064, "grad_norm": 0.12756259739398956, "learning_rate": 0.00018165361537679243, "loss": 1.0672, "step": 908 }, { "epoch": 0.184793657247408, "grad_norm": 0.12525498867034912, "learning_rate": 0.00018163327570426115, "loss": 1.1493, "step": 909 }, { "epoch": 0.18499695059971538, "grad_norm": 0.11629681289196014, "learning_rate": 0.0001816129360317299, "loss": 1.0021, "step": 910 }, { "epoch": 0.18520024395202278, "grad_norm": 0.1350405514240265, "learning_rate": 0.00018159259635919863, "loss": 1.1597, "step": 911 }, { "epoch": 0.18540353730433015, "grad_norm": 0.10785862803459167, "learning_rate": 0.00018157225668666735, "loss": 0.9035, "step": 912 }, { "epoch": 0.18560683065663752, "grad_norm": 0.13618353009223938, "learning_rate": 0.00018155191701413608, "loss": 1.4084, "step": 913 }, { "epoch": 0.18581012400894492, "grad_norm": 0.12942783534526825, "learning_rate": 0.00018153157734160483, "loss": 1.0529, "step": 914 }, { "epoch": 0.1860134173612523, "grad_norm": 0.12829767167568207, "learning_rate": 0.00018151123766907355, "loss": 1.1734, "step": 915 }, { "epoch": 0.18621671071355966, "grad_norm": 0.11795412003993988, "learning_rate": 0.00018149089799654225, "loss": 1.0891, "step": 916 }, { "epoch": 0.18642000406586703, "grad_norm": 0.13184364140033722, "learning_rate": 0.00018147055832401098, "loss": 1.0332, "step": 917 }, { "epoch": 0.18662329741817443, "grad_norm": 0.13445381820201874, "learning_rate": 0.00018145021865147973, "loss": 1.1417, "step": 918 }, { "epoch": 0.1868265907704818, "grad_norm": 0.1418420672416687, "learning_rate": 0.00018142987897894845, "loss": 1.1279, "step": 919 }, { "epoch": 0.18702988412278918, "grad_norm": 0.11725430935621262, "learning_rate": 0.00018140953930641718, "loss": 0.9878, "step": 920 }, { "epoch": 0.18723317747509657, "grad_norm": 0.13889212906360626, "learning_rate": 0.0001813891996338859, "loss": 1.1153, "step": 921 }, { "epoch": 0.18743647082740394, "grad_norm": 0.12875622510910034, "learning_rate": 0.00018136885996135462, "loss": 1.0345, "step": 922 }, { "epoch": 0.18763976417971132, "grad_norm": 0.12533831596374512, "learning_rate": 0.00018134852028882338, "loss": 1.0741, "step": 923 }, { "epoch": 0.18784305753201871, "grad_norm": 0.12448123842477798, "learning_rate": 0.00018132818061629207, "loss": 1.0161, "step": 924 }, { "epoch": 0.18804635088432609, "grad_norm": 0.13820883631706238, "learning_rate": 0.0001813078409437608, "loss": 1.2834, "step": 925 }, { "epoch": 0.18824964423663346, "grad_norm": 0.1304212063550949, "learning_rate": 0.00018128750127122952, "loss": 1.0927, "step": 926 }, { "epoch": 0.18845293758894086, "grad_norm": 0.12558777630329132, "learning_rate": 0.00018126716159869827, "loss": 1.1516, "step": 927 }, { "epoch": 0.18865623094124823, "grad_norm": 0.13149550557136536, "learning_rate": 0.000181246821926167, "loss": 1.0791, "step": 928 }, { "epoch": 0.1888595242935556, "grad_norm": 0.12774059176445007, "learning_rate": 0.00018122648225363572, "loss": 1.108, "step": 929 }, { "epoch": 0.18906281764586297, "grad_norm": 0.12127216160297394, "learning_rate": 0.00018120614258110445, "loss": 1.1254, "step": 930 }, { "epoch": 0.18926611099817037, "grad_norm": 0.1251489520072937, "learning_rate": 0.0001811858029085732, "loss": 1.1306, "step": 931 }, { "epoch": 0.18946940435047774, "grad_norm": 0.12320549786090851, "learning_rate": 0.00018116546323604192, "loss": 1.082, "step": 932 }, { "epoch": 0.1896726977027851, "grad_norm": 0.12626154720783234, "learning_rate": 0.00018114512356351062, "loss": 1.176, "step": 933 }, { "epoch": 0.1898759910550925, "grad_norm": 0.12401305884122849, "learning_rate": 0.00018112478389097935, "loss": 1.107, "step": 934 }, { "epoch": 0.19007928440739988, "grad_norm": 0.13284708559513092, "learning_rate": 0.0001811044442184481, "loss": 1.1977, "step": 935 }, { "epoch": 0.19028257775970725, "grad_norm": 0.11293178796768188, "learning_rate": 0.00018108410454591682, "loss": 0.8484, "step": 936 }, { "epoch": 0.19048587111201465, "grad_norm": 0.12113649398088455, "learning_rate": 0.00018106376487338555, "loss": 1.0833, "step": 937 }, { "epoch": 0.19068916446432202, "grad_norm": 0.12353657186031342, "learning_rate": 0.00018104342520085427, "loss": 1.1154, "step": 938 }, { "epoch": 0.1908924578166294, "grad_norm": 0.13213786482810974, "learning_rate": 0.00018102308552832302, "loss": 1.0866, "step": 939 }, { "epoch": 0.1910957511689368, "grad_norm": 0.12303278595209122, "learning_rate": 0.00018100274585579175, "loss": 0.9889, "step": 940 }, { "epoch": 0.19129904452124416, "grad_norm": 0.12523289024829865, "learning_rate": 0.00018098240618326044, "loss": 0.9564, "step": 941 }, { "epoch": 0.19150233787355153, "grad_norm": 0.12457413971424103, "learning_rate": 0.00018096206651072917, "loss": 1.168, "step": 942 }, { "epoch": 0.1917056312258589, "grad_norm": 0.13440296053886414, "learning_rate": 0.00018094172683819792, "loss": 1.1655, "step": 943 }, { "epoch": 0.1919089245781663, "grad_norm": 0.11574854701757431, "learning_rate": 0.00018092138716566664, "loss": 0.9982, "step": 944 }, { "epoch": 0.19211221793047367, "grad_norm": 0.1216878592967987, "learning_rate": 0.00018090104749313537, "loss": 0.97, "step": 945 }, { "epoch": 0.19231551128278104, "grad_norm": 0.11920405179262161, "learning_rate": 0.0001808807078206041, "loss": 0.9783, "step": 946 }, { "epoch": 0.19251880463508844, "grad_norm": 0.12107307463884354, "learning_rate": 0.00018086036814807284, "loss": 1.0843, "step": 947 }, { "epoch": 0.1927220979873958, "grad_norm": 0.12287328392267227, "learning_rate": 0.00018084002847554157, "loss": 1.1068, "step": 948 }, { "epoch": 0.19292539133970318, "grad_norm": 0.12466049194335938, "learning_rate": 0.00018081968880301027, "loss": 0.9383, "step": 949 }, { "epoch": 0.19312868469201058, "grad_norm": 0.11762560158967972, "learning_rate": 0.000180799349130479, "loss": 0.9855, "step": 950 }, { "epoch": 0.19333197804431795, "grad_norm": 0.12275755405426025, "learning_rate": 0.00018077900945794774, "loss": 1.0528, "step": 951 }, { "epoch": 0.19353527139662532, "grad_norm": 0.12033812701702118, "learning_rate": 0.00018075866978541647, "loss": 1.0828, "step": 952 }, { "epoch": 0.19373856474893272, "grad_norm": 0.13380326330661774, "learning_rate": 0.0001807383301128852, "loss": 1.0634, "step": 953 }, { "epoch": 0.1939418581012401, "grad_norm": 0.13521994650363922, "learning_rate": 0.00018071799044035392, "loss": 1.1512, "step": 954 }, { "epoch": 0.19414515145354747, "grad_norm": 0.1331789344549179, "learning_rate": 0.00018069765076782267, "loss": 1.2343, "step": 955 }, { "epoch": 0.19434844480585484, "grad_norm": 0.12130323797464371, "learning_rate": 0.0001806773110952914, "loss": 1.291, "step": 956 }, { "epoch": 0.19455173815816224, "grad_norm": 0.10274801403284073, "learning_rate": 0.0001806569714227601, "loss": 0.8534, "step": 957 }, { "epoch": 0.1947550315104696, "grad_norm": 0.1255219727754593, "learning_rate": 0.00018063663175022881, "loss": 1.1804, "step": 958 }, { "epoch": 0.19495832486277698, "grad_norm": 0.13403509557247162, "learning_rate": 0.00018061629207769757, "loss": 1.1882, "step": 959 }, { "epoch": 0.19516161821508438, "grad_norm": 0.1277134269475937, "learning_rate": 0.0001805959524051663, "loss": 1.1059, "step": 960 }, { "epoch": 0.19536491156739175, "grad_norm": 0.1148851290345192, "learning_rate": 0.00018057561273263501, "loss": 1.125, "step": 961 }, { "epoch": 0.19556820491969912, "grad_norm": 0.10984671115875244, "learning_rate": 0.00018055527306010374, "loss": 1.0396, "step": 962 }, { "epoch": 0.19577149827200652, "grad_norm": 0.13988138735294342, "learning_rate": 0.00018053493338757246, "loss": 1.1672, "step": 963 }, { "epoch": 0.1959747916243139, "grad_norm": 0.12106659263372421, "learning_rate": 0.00018051459371504121, "loss": 1.0142, "step": 964 }, { "epoch": 0.19617808497662126, "grad_norm": 0.10751524567604065, "learning_rate": 0.0001804942540425099, "loss": 1.0027, "step": 965 }, { "epoch": 0.19638137832892866, "grad_norm": 0.12096796184778214, "learning_rate": 0.00018047391436997864, "loss": 1.0965, "step": 966 }, { "epoch": 0.19658467168123603, "grad_norm": 0.12069959938526154, "learning_rate": 0.00018045357469744736, "loss": 0.9869, "step": 967 }, { "epoch": 0.1967879650335434, "grad_norm": 0.13281071186065674, "learning_rate": 0.0001804332350249161, "loss": 1.0361, "step": 968 }, { "epoch": 0.19699125838585077, "grad_norm": 0.12690961360931396, "learning_rate": 0.00018041289535238484, "loss": 1.005, "step": 969 }, { "epoch": 0.19719455173815817, "grad_norm": 0.1329599916934967, "learning_rate": 0.00018039255567985356, "loss": 1.1184, "step": 970 }, { "epoch": 0.19739784509046554, "grad_norm": 0.12807321548461914, "learning_rate": 0.00018037221600732229, "loss": 1.1918, "step": 971 }, { "epoch": 0.1976011384427729, "grad_norm": 0.12155921012163162, "learning_rate": 0.00018035187633479104, "loss": 1.1934, "step": 972 }, { "epoch": 0.1978044317950803, "grad_norm": 0.11720109730958939, "learning_rate": 0.00018033153666225973, "loss": 1.2674, "step": 973 }, { "epoch": 0.19800772514738768, "grad_norm": 0.12774553894996643, "learning_rate": 0.00018031119698972846, "loss": 1.1394, "step": 974 }, { "epoch": 0.19821101849969505, "grad_norm": 0.11617007106542587, "learning_rate": 0.00018029085731719718, "loss": 1.0772, "step": 975 }, { "epoch": 0.19841431185200245, "grad_norm": 0.1182067021727562, "learning_rate": 0.00018027051764466594, "loss": 1.0433, "step": 976 }, { "epoch": 0.19861760520430982, "grad_norm": 0.128327414393425, "learning_rate": 0.00018025017797213466, "loss": 1.0616, "step": 977 }, { "epoch": 0.1988208985566172, "grad_norm": 0.12075836956501007, "learning_rate": 0.00018022983829960338, "loss": 1.2187, "step": 978 }, { "epoch": 0.1990241919089246, "grad_norm": 0.132186159491539, "learning_rate": 0.0001802094986270721, "loss": 1.0614, "step": 979 }, { "epoch": 0.19922748526123196, "grad_norm": 0.135267972946167, "learning_rate": 0.00018018915895454086, "loss": 1.3447, "step": 980 }, { "epoch": 0.19943077861353933, "grad_norm": 0.13122640550136566, "learning_rate": 0.00018016881928200956, "loss": 1.3336, "step": 981 }, { "epoch": 0.1996340719658467, "grad_norm": 0.11631322652101517, "learning_rate": 0.00018014847960947828, "loss": 1.024, "step": 982 }, { "epoch": 0.1998373653181541, "grad_norm": 0.12409427016973495, "learning_rate": 0.000180128139936947, "loss": 0.9806, "step": 983 }, { "epoch": 0.20004065867046147, "grad_norm": 0.1337365210056305, "learning_rate": 0.00018010780026441576, "loss": 1.1875, "step": 984 }, { "epoch": 0.20024395202276885, "grad_norm": 0.12941214442253113, "learning_rate": 0.00018008746059188448, "loss": 1.1962, "step": 985 }, { "epoch": 0.20044724537507624, "grad_norm": 0.12374356389045715, "learning_rate": 0.0001800671209193532, "loss": 1.1213, "step": 986 }, { "epoch": 0.20065053872738361, "grad_norm": 0.13427360355854034, "learning_rate": 0.00018004678124682193, "loss": 1.209, "step": 987 }, { "epoch": 0.20085383207969099, "grad_norm": 0.11423162370920181, "learning_rate": 0.00018002644157429068, "loss": 0.8628, "step": 988 }, { "epoch": 0.20105712543199838, "grad_norm": 0.12818945944309235, "learning_rate": 0.0001800061019017594, "loss": 1.1162, "step": 989 }, { "epoch": 0.20126041878430576, "grad_norm": 0.11825679987668991, "learning_rate": 0.0001799857622292281, "loss": 0.9222, "step": 990 }, { "epoch": 0.20146371213661313, "grad_norm": 0.11358822882175446, "learning_rate": 0.00017996542255669683, "loss": 0.992, "step": 991 }, { "epoch": 0.20166700548892053, "grad_norm": 0.12839291989803314, "learning_rate": 0.00017994508288416558, "loss": 1.1777, "step": 992 }, { "epoch": 0.2018702988412279, "grad_norm": 0.12416979670524597, "learning_rate": 0.0001799247432116343, "loss": 1.2411, "step": 993 }, { "epoch": 0.20207359219353527, "grad_norm": 0.12002628296613693, "learning_rate": 0.00017990440353910303, "loss": 1.0961, "step": 994 }, { "epoch": 0.20227688554584264, "grad_norm": 0.1268136203289032, "learning_rate": 0.00017988406386657175, "loss": 1.1405, "step": 995 }, { "epoch": 0.20248017889815004, "grad_norm": 0.12864577770233154, "learning_rate": 0.0001798637241940405, "loss": 1.136, "step": 996 }, { "epoch": 0.2026834722504574, "grad_norm": 0.11293767392635345, "learning_rate": 0.00017984338452150923, "loss": 1.0633, "step": 997 }, { "epoch": 0.20288676560276478, "grad_norm": 0.11901193857192993, "learning_rate": 0.00017982304484897793, "loss": 1.1404, "step": 998 }, { "epoch": 0.20309005895507218, "grad_norm": 0.14368772506713867, "learning_rate": 0.00017980270517644665, "loss": 1.2092, "step": 999 }, { "epoch": 0.20329335230737955, "grad_norm": 0.1403762251138687, "learning_rate": 0.0001797823655039154, "loss": 1.1212, "step": 1000 }, { "epoch": 0.20349664565968692, "grad_norm": 0.10853412747383118, "learning_rate": 0.00017976202583138413, "loss": 0.9489, "step": 1001 }, { "epoch": 0.20369993901199432, "grad_norm": 0.11670242995023727, "learning_rate": 0.00017974168615885285, "loss": 1.0397, "step": 1002 }, { "epoch": 0.2039032323643017, "grad_norm": 0.12957903742790222, "learning_rate": 0.00017972134648632158, "loss": 0.9898, "step": 1003 }, { "epoch": 0.20410652571660906, "grad_norm": 0.1174166351556778, "learning_rate": 0.0001797010068137903, "loss": 1.1126, "step": 1004 }, { "epoch": 0.20430981906891646, "grad_norm": 0.12919628620147705, "learning_rate": 0.00017968066714125905, "loss": 1.0282, "step": 1005 }, { "epoch": 0.20451311242122383, "grad_norm": 0.12586313486099243, "learning_rate": 0.00017966032746872775, "loss": 1.1674, "step": 1006 }, { "epoch": 0.2047164057735312, "grad_norm": 0.12239197641611099, "learning_rate": 0.00017963998779619647, "loss": 1.0543, "step": 1007 }, { "epoch": 0.20491969912583857, "grad_norm": 0.11404930055141449, "learning_rate": 0.0001796196481236652, "loss": 1.0193, "step": 1008 }, { "epoch": 0.20512299247814597, "grad_norm": 0.14286890625953674, "learning_rate": 0.00017959930845113395, "loss": 0.994, "step": 1009 }, { "epoch": 0.20532628583045334, "grad_norm": 0.12723585963249207, "learning_rate": 0.00017957896877860268, "loss": 1.1854, "step": 1010 }, { "epoch": 0.2055295791827607, "grad_norm": 0.13282720744609833, "learning_rate": 0.0001795586291060714, "loss": 1.0967, "step": 1011 }, { "epoch": 0.2057328725350681, "grad_norm": 0.11795739084482193, "learning_rate": 0.00017953828943354012, "loss": 1.0024, "step": 1012 }, { "epoch": 0.20593616588737548, "grad_norm": 0.123084157705307, "learning_rate": 0.00017951794976100888, "loss": 1.0015, "step": 1013 }, { "epoch": 0.20613945923968285, "grad_norm": 0.13757507503032684, "learning_rate": 0.00017949761008847757, "loss": 1.1926, "step": 1014 }, { "epoch": 0.20634275259199025, "grad_norm": 0.13981647789478302, "learning_rate": 0.0001794772704159463, "loss": 1.1296, "step": 1015 }, { "epoch": 0.20654604594429762, "grad_norm": 0.12356757372617722, "learning_rate": 0.00017945693074341502, "loss": 1.1867, "step": 1016 }, { "epoch": 0.206749339296605, "grad_norm": 0.11218491941690445, "learning_rate": 0.00017943659107088377, "loss": 0.9909, "step": 1017 }, { "epoch": 0.2069526326489124, "grad_norm": 0.11628386378288269, "learning_rate": 0.0001794162513983525, "loss": 1.1048, "step": 1018 }, { "epoch": 0.20715592600121976, "grad_norm": 0.1266728788614273, "learning_rate": 0.00017939591172582122, "loss": 1.123, "step": 1019 }, { "epoch": 0.20735921935352714, "grad_norm": 0.1243995800614357, "learning_rate": 0.00017937557205328995, "loss": 1.1161, "step": 1020 }, { "epoch": 0.2075625127058345, "grad_norm": 0.10625866800546646, "learning_rate": 0.0001793552323807587, "loss": 0.9773, "step": 1021 }, { "epoch": 0.2077658060581419, "grad_norm": 0.11653080582618713, "learning_rate": 0.0001793348927082274, "loss": 1.0115, "step": 1022 }, { "epoch": 0.20796909941044928, "grad_norm": 0.12603938579559326, "learning_rate": 0.00017931455303569612, "loss": 0.9879, "step": 1023 }, { "epoch": 0.20817239276275665, "grad_norm": 0.11850478500127792, "learning_rate": 0.00017929421336316485, "loss": 1.0412, "step": 1024 }, { "epoch": 0.20837568611506405, "grad_norm": 0.13597136735916138, "learning_rate": 0.0001792738736906336, "loss": 1.1984, "step": 1025 }, { "epoch": 0.20857897946737142, "grad_norm": 0.12899504601955414, "learning_rate": 0.00017925353401810232, "loss": 1.2007, "step": 1026 }, { "epoch": 0.2087822728196788, "grad_norm": 0.12255753576755524, "learning_rate": 0.00017923319434557105, "loss": 1.0632, "step": 1027 }, { "epoch": 0.2089855661719862, "grad_norm": 0.11182371526956558, "learning_rate": 0.00017921285467303977, "loss": 1.0428, "step": 1028 }, { "epoch": 0.20918885952429356, "grad_norm": 0.10728685557842255, "learning_rate": 0.00017919251500050852, "loss": 0.9964, "step": 1029 }, { "epoch": 0.20939215287660093, "grad_norm": 0.1301811784505844, "learning_rate": 0.00017917217532797722, "loss": 1.0812, "step": 1030 }, { "epoch": 0.20959544622890833, "grad_norm": 0.12470284849405289, "learning_rate": 0.00017915183565544594, "loss": 0.9967, "step": 1031 }, { "epoch": 0.2097987395812157, "grad_norm": 0.12017293274402618, "learning_rate": 0.00017913149598291467, "loss": 1.0986, "step": 1032 }, { "epoch": 0.21000203293352307, "grad_norm": 0.14881430566310883, "learning_rate": 0.00017911115631038342, "loss": 1.2407, "step": 1033 }, { "epoch": 0.21020532628583044, "grad_norm": 0.11730567365884781, "learning_rate": 0.00017909081663785214, "loss": 0.9414, "step": 1034 }, { "epoch": 0.21040861963813784, "grad_norm": 0.12763184309005737, "learning_rate": 0.00017907047696532087, "loss": 1.0773, "step": 1035 }, { "epoch": 0.2106119129904452, "grad_norm": 0.11463324725627899, "learning_rate": 0.0001790501372927896, "loss": 0.965, "step": 1036 }, { "epoch": 0.21081520634275258, "grad_norm": 0.13079042732715607, "learning_rate": 0.00017902979762025834, "loss": 1.0491, "step": 1037 }, { "epoch": 0.21101849969505998, "grad_norm": 0.13902175426483154, "learning_rate": 0.00017900945794772704, "loss": 0.9453, "step": 1038 }, { "epoch": 0.21122179304736735, "grad_norm": 0.12852630019187927, "learning_rate": 0.00017898911827519577, "loss": 1.221, "step": 1039 }, { "epoch": 0.21142508639967472, "grad_norm": 0.10965081304311752, "learning_rate": 0.0001789687786026645, "loss": 0.9923, "step": 1040 }, { "epoch": 0.21162837975198212, "grad_norm": 0.1155104711651802, "learning_rate": 0.00017894843893013324, "loss": 0.8918, "step": 1041 }, { "epoch": 0.2118316731042895, "grad_norm": 0.13126857578754425, "learning_rate": 0.00017892809925760197, "loss": 1.1116, "step": 1042 }, { "epoch": 0.21203496645659686, "grad_norm": 0.11619725823402405, "learning_rate": 0.0001789077595850707, "loss": 0.9448, "step": 1043 }, { "epoch": 0.21223825980890426, "grad_norm": 0.12041871249675751, "learning_rate": 0.00017888741991253942, "loss": 1.0778, "step": 1044 }, { "epoch": 0.21244155316121163, "grad_norm": 0.1230979636311531, "learning_rate": 0.00017886708024000814, "loss": 1.0807, "step": 1045 }, { "epoch": 0.212644846513519, "grad_norm": 0.1263006180524826, "learning_rate": 0.0001788467405674769, "loss": 1.1668, "step": 1046 }, { "epoch": 0.21284813986582637, "grad_norm": 0.11430171877145767, "learning_rate": 0.0001788264008949456, "loss": 1.1096, "step": 1047 }, { "epoch": 0.21305143321813377, "grad_norm": 0.1243266835808754, "learning_rate": 0.0001788060612224143, "loss": 0.9583, "step": 1048 }, { "epoch": 0.21325472657044114, "grad_norm": 0.12808263301849365, "learning_rate": 0.00017878572154988304, "loss": 1.0896, "step": 1049 }, { "epoch": 0.21345801992274852, "grad_norm": 0.13576021790504456, "learning_rate": 0.0001787653818773518, "loss": 1.0807, "step": 1050 }, { "epoch": 0.21366131327505591, "grad_norm": 0.10852668434381485, "learning_rate": 0.00017874504220482051, "loss": 0.9132, "step": 1051 }, { "epoch": 0.21386460662736329, "grad_norm": 0.13336928188800812, "learning_rate": 0.00017872470253228924, "loss": 1.1131, "step": 1052 }, { "epoch": 0.21406789997967066, "grad_norm": 0.12640543282032013, "learning_rate": 0.00017870436285975796, "loss": 1.0568, "step": 1053 }, { "epoch": 0.21427119333197805, "grad_norm": 0.12157181650400162, "learning_rate": 0.00017868402318722671, "loss": 1.033, "step": 1054 }, { "epoch": 0.21447448668428543, "grad_norm": 0.12272074073553085, "learning_rate": 0.0001786636835146954, "loss": 1.0301, "step": 1055 }, { "epoch": 0.2146777800365928, "grad_norm": 0.1594497114419937, "learning_rate": 0.00017864334384216414, "loss": 1.1618, "step": 1056 }, { "epoch": 0.2148810733889002, "grad_norm": 0.14059504866600037, "learning_rate": 0.00017862300416963286, "loss": 1.1223, "step": 1057 }, { "epoch": 0.21508436674120757, "grad_norm": 0.12746313214302063, "learning_rate": 0.0001786026644971016, "loss": 1.1294, "step": 1058 }, { "epoch": 0.21528766009351494, "grad_norm": 0.13382786512374878, "learning_rate": 0.00017858232482457034, "loss": 1.0969, "step": 1059 }, { "epoch": 0.2154909534458223, "grad_norm": 0.1192721351981163, "learning_rate": 0.00017856198515203906, "loss": 0.9751, "step": 1060 }, { "epoch": 0.2156942467981297, "grad_norm": 0.1318022906780243, "learning_rate": 0.00017854164547950779, "loss": 1.267, "step": 1061 }, { "epoch": 0.21589754015043708, "grad_norm": 0.12069433927536011, "learning_rate": 0.00017852130580697654, "loss": 1.0525, "step": 1062 }, { "epoch": 0.21610083350274445, "grad_norm": 0.12405405938625336, "learning_rate": 0.00017850096613444523, "loss": 1.1731, "step": 1063 }, { "epoch": 0.21630412685505185, "grad_norm": 0.11893291026353836, "learning_rate": 0.00017848062646191396, "loss": 1.1609, "step": 1064 }, { "epoch": 0.21650742020735922, "grad_norm": 0.11019967496395111, "learning_rate": 0.00017846028678938268, "loss": 0.9735, "step": 1065 }, { "epoch": 0.2167107135596666, "grad_norm": 0.11663123220205307, "learning_rate": 0.00017843994711685144, "loss": 1.2882, "step": 1066 }, { "epoch": 0.216914006911974, "grad_norm": 0.12803837656974792, "learning_rate": 0.00017841960744432016, "loss": 1.0365, "step": 1067 }, { "epoch": 0.21711730026428136, "grad_norm": 0.13295085728168488, "learning_rate": 0.00017839926777178888, "loss": 1.1925, "step": 1068 }, { "epoch": 0.21732059361658873, "grad_norm": 0.12314966320991516, "learning_rate": 0.0001783789280992576, "loss": 1.1064, "step": 1069 }, { "epoch": 0.21752388696889613, "grad_norm": 0.12015377730131149, "learning_rate": 0.00017835858842672636, "loss": 1.1056, "step": 1070 }, { "epoch": 0.2177271803212035, "grad_norm": 0.11665552854537964, "learning_rate": 0.00017833824875419506, "loss": 1.0592, "step": 1071 }, { "epoch": 0.21793047367351087, "grad_norm": 0.11458134651184082, "learning_rate": 0.00017831790908166378, "loss": 1.1203, "step": 1072 }, { "epoch": 0.21813376702581824, "grad_norm": 0.10290549695491791, "learning_rate": 0.0001782975694091325, "loss": 0.9427, "step": 1073 }, { "epoch": 0.21833706037812564, "grad_norm": 0.12680476903915405, "learning_rate": 0.00017827722973660126, "loss": 1.1047, "step": 1074 }, { "epoch": 0.218540353730433, "grad_norm": 0.1253194808959961, "learning_rate": 0.00017825689006406998, "loss": 1.1482, "step": 1075 }, { "epoch": 0.21874364708274038, "grad_norm": 0.1381319910287857, "learning_rate": 0.0001782365503915387, "loss": 1.3134, "step": 1076 }, { "epoch": 0.21894694043504778, "grad_norm": 0.12798373401165009, "learning_rate": 0.00017821621071900743, "loss": 1.1119, "step": 1077 }, { "epoch": 0.21915023378735515, "grad_norm": 0.1302616447210312, "learning_rate": 0.00017819587104647618, "loss": 1.1038, "step": 1078 }, { "epoch": 0.21935352713966252, "grad_norm": 0.1357065588235855, "learning_rate": 0.00017817553137394488, "loss": 1.1385, "step": 1079 }, { "epoch": 0.21955682049196992, "grad_norm": 0.1307210475206375, "learning_rate": 0.0001781551917014136, "loss": 1.1692, "step": 1080 }, { "epoch": 0.2197601138442773, "grad_norm": 0.12304160743951797, "learning_rate": 0.00017813485202888233, "loss": 1.1044, "step": 1081 }, { "epoch": 0.21996340719658466, "grad_norm": 0.12165479362010956, "learning_rate": 0.00017811451235635108, "loss": 1.0762, "step": 1082 }, { "epoch": 0.22016670054889206, "grad_norm": 0.12440644204616547, "learning_rate": 0.0001780941726838198, "loss": 1.0296, "step": 1083 }, { "epoch": 0.22036999390119943, "grad_norm": 0.14743392169475555, "learning_rate": 0.00017807383301128853, "loss": 1.1824, "step": 1084 }, { "epoch": 0.2205732872535068, "grad_norm": 0.13372984528541565, "learning_rate": 0.00017805349333875725, "loss": 1.0795, "step": 1085 }, { "epoch": 0.22077658060581418, "grad_norm": 0.11515718698501587, "learning_rate": 0.00017803315366622598, "loss": 0.9869, "step": 1086 }, { "epoch": 0.22097987395812158, "grad_norm": 0.1197754368185997, "learning_rate": 0.0001780128139936947, "loss": 1.1101, "step": 1087 }, { "epoch": 0.22118316731042895, "grad_norm": 0.121689073741436, "learning_rate": 0.00017799247432116343, "loss": 1.0624, "step": 1088 }, { "epoch": 0.22138646066273632, "grad_norm": 0.12425584346055984, "learning_rate": 0.00017797213464863215, "loss": 0.9962, "step": 1089 }, { "epoch": 0.22158975401504372, "grad_norm": 0.11786684393882751, "learning_rate": 0.00017795179497610088, "loss": 0.8943, "step": 1090 }, { "epoch": 0.2217930473673511, "grad_norm": 0.13555578887462616, "learning_rate": 0.00017793145530356963, "loss": 1.2069, "step": 1091 }, { "epoch": 0.22199634071965846, "grad_norm": 0.12431347370147705, "learning_rate": 0.00017791111563103835, "loss": 1.1376, "step": 1092 }, { "epoch": 0.22219963407196586, "grad_norm": 0.12472493946552277, "learning_rate": 0.00017789077595850708, "loss": 1.1486, "step": 1093 }, { "epoch": 0.22240292742427323, "grad_norm": 0.12927775084972382, "learning_rate": 0.0001778704362859758, "loss": 1.1581, "step": 1094 }, { "epoch": 0.2226062207765806, "grad_norm": 0.12910224497318268, "learning_rate": 0.00017785009661344453, "loss": 0.988, "step": 1095 }, { "epoch": 0.222809514128888, "grad_norm": 0.11531752347946167, "learning_rate": 0.00017782975694091325, "loss": 0.9782, "step": 1096 }, { "epoch": 0.22301280748119537, "grad_norm": 0.1250569224357605, "learning_rate": 0.00017780941726838197, "loss": 1.0796, "step": 1097 }, { "epoch": 0.22321610083350274, "grad_norm": 0.1234661191701889, "learning_rate": 0.0001777890775958507, "loss": 1.1571, "step": 1098 }, { "epoch": 0.2234193941858101, "grad_norm": 0.11324235796928406, "learning_rate": 0.00017776873792331945, "loss": 1.1156, "step": 1099 }, { "epoch": 0.2236226875381175, "grad_norm": 0.12516295909881592, "learning_rate": 0.00017774839825078818, "loss": 1.0161, "step": 1100 }, { "epoch": 0.22382598089042488, "grad_norm": 0.13084611296653748, "learning_rate": 0.0001777280585782569, "loss": 1.1408, "step": 1101 }, { "epoch": 0.22402927424273225, "grad_norm": 0.1189943253993988, "learning_rate": 0.00017770771890572562, "loss": 0.9823, "step": 1102 }, { "epoch": 0.22423256759503965, "grad_norm": 0.11955268681049347, "learning_rate": 0.00017768737923319438, "loss": 0.8977, "step": 1103 }, { "epoch": 0.22443586094734702, "grad_norm": 0.12528367340564728, "learning_rate": 0.00017766703956066307, "loss": 1.1579, "step": 1104 }, { "epoch": 0.2246391542996544, "grad_norm": 0.12829215824604034, "learning_rate": 0.0001776466998881318, "loss": 1.1892, "step": 1105 }, { "epoch": 0.2248424476519618, "grad_norm": 0.12263132631778717, "learning_rate": 0.00017762636021560052, "loss": 1.1609, "step": 1106 }, { "epoch": 0.22504574100426916, "grad_norm": 0.12810589373111725, "learning_rate": 0.00017760602054306927, "loss": 1.0712, "step": 1107 }, { "epoch": 0.22524903435657653, "grad_norm": 0.1171211376786232, "learning_rate": 0.000177585680870538, "loss": 0.9694, "step": 1108 }, { "epoch": 0.22545232770888393, "grad_norm": 0.12270856648683548, "learning_rate": 0.00017756534119800672, "loss": 1.0896, "step": 1109 }, { "epoch": 0.2256556210611913, "grad_norm": 0.13578352332115173, "learning_rate": 0.00017754500152547545, "loss": 1.2125, "step": 1110 }, { "epoch": 0.22585891441349867, "grad_norm": 0.1315973401069641, "learning_rate": 0.0001775246618529442, "loss": 1.2441, "step": 1111 }, { "epoch": 0.22606220776580604, "grad_norm": 0.1222010925412178, "learning_rate": 0.0001775043221804129, "loss": 0.9894, "step": 1112 }, { "epoch": 0.22626550111811344, "grad_norm": 0.12425290793180466, "learning_rate": 0.00017748398250788162, "loss": 1.273, "step": 1113 }, { "epoch": 0.22646879447042081, "grad_norm": 0.10960794985294342, "learning_rate": 0.00017746364283535034, "loss": 0.8637, "step": 1114 }, { "epoch": 0.22667208782272819, "grad_norm": 0.13080738484859467, "learning_rate": 0.0001774433031628191, "loss": 1.1506, "step": 1115 }, { "epoch": 0.22687538117503558, "grad_norm": 0.11546586453914642, "learning_rate": 0.00017742296349028782, "loss": 1.1043, "step": 1116 }, { "epoch": 0.22707867452734296, "grad_norm": 0.12280496209859848, "learning_rate": 0.00017740262381775655, "loss": 1.1203, "step": 1117 }, { "epoch": 0.22728196787965033, "grad_norm": 0.11661294102668762, "learning_rate": 0.00017738228414522527, "loss": 1.0486, "step": 1118 }, { "epoch": 0.22748526123195772, "grad_norm": 0.12169715762138367, "learning_rate": 0.00017736194447269402, "loss": 1.2318, "step": 1119 }, { "epoch": 0.2276885545842651, "grad_norm": 0.12962935864925385, "learning_rate": 0.00017734160480016272, "loss": 1.0912, "step": 1120 }, { "epoch": 0.22789184793657247, "grad_norm": 0.14488789439201355, "learning_rate": 0.00017732126512763144, "loss": 0.9774, "step": 1121 }, { "epoch": 0.22809514128887987, "grad_norm": 0.11455550044775009, "learning_rate": 0.00017730092545510017, "loss": 0.9454, "step": 1122 }, { "epoch": 0.22829843464118724, "grad_norm": 0.11764731258153915, "learning_rate": 0.00017728058578256892, "loss": 1.0895, "step": 1123 }, { "epoch": 0.2285017279934946, "grad_norm": 0.12537989020347595, "learning_rate": 0.00017726024611003764, "loss": 1.141, "step": 1124 }, { "epoch": 0.22870502134580198, "grad_norm": 0.11639077961444855, "learning_rate": 0.00017723990643750637, "loss": 1.1259, "step": 1125 }, { "epoch": 0.22890831469810938, "grad_norm": 0.12202929705381393, "learning_rate": 0.0001772195667649751, "loss": 1.141, "step": 1126 }, { "epoch": 0.22911160805041675, "grad_norm": 0.11307729780673981, "learning_rate": 0.00017719922709244382, "loss": 0.9076, "step": 1127 }, { "epoch": 0.22931490140272412, "grad_norm": 0.11854063719511032, "learning_rate": 0.00017717888741991254, "loss": 1.0369, "step": 1128 }, { "epoch": 0.22951819475503152, "grad_norm": 0.11729457229375839, "learning_rate": 0.00017715854774738127, "loss": 1.0503, "step": 1129 }, { "epoch": 0.2297214881073389, "grad_norm": 0.13550931215286255, "learning_rate": 0.00017713820807485, "loss": 1.1211, "step": 1130 }, { "epoch": 0.22992478145964626, "grad_norm": 0.1215146854519844, "learning_rate": 0.00017711786840231871, "loss": 1.0483, "step": 1131 }, { "epoch": 0.23012807481195366, "grad_norm": 0.12911346554756165, "learning_rate": 0.00017709752872978747, "loss": 1.1133, "step": 1132 }, { "epoch": 0.23033136816426103, "grad_norm": 0.1176094263792038, "learning_rate": 0.0001770771890572562, "loss": 0.9717, "step": 1133 }, { "epoch": 0.2305346615165684, "grad_norm": 0.1320810616016388, "learning_rate": 0.00017705684938472492, "loss": 1.0978, "step": 1134 }, { "epoch": 0.2307379548688758, "grad_norm": 0.119644396007061, "learning_rate": 0.00017703650971219364, "loss": 1.03, "step": 1135 }, { "epoch": 0.23094124822118317, "grad_norm": 0.11813725531101227, "learning_rate": 0.00017701617003966236, "loss": 0.9804, "step": 1136 }, { "epoch": 0.23114454157349054, "grad_norm": 0.12088938802480698, "learning_rate": 0.0001769958303671311, "loss": 1.052, "step": 1137 }, { "epoch": 0.2313478349257979, "grad_norm": 0.11971927434206009, "learning_rate": 0.0001769754906945998, "loss": 0.8725, "step": 1138 }, { "epoch": 0.2315511282781053, "grad_norm": 0.1300465613603592, "learning_rate": 0.00017695515102206854, "loss": 1.1638, "step": 1139 }, { "epoch": 0.23175442163041268, "grad_norm": 0.14033594727516174, "learning_rate": 0.0001769348113495373, "loss": 1.2757, "step": 1140 }, { "epoch": 0.23195771498272005, "grad_norm": 0.13062700629234314, "learning_rate": 0.000176914471677006, "loss": 1.1734, "step": 1141 }, { "epoch": 0.23216100833502745, "grad_norm": 0.11161787062883377, "learning_rate": 0.00017689413200447474, "loss": 0.9733, "step": 1142 }, { "epoch": 0.23236430168733482, "grad_norm": 0.11497635394334793, "learning_rate": 0.00017687379233194346, "loss": 0.9751, "step": 1143 }, { "epoch": 0.2325675950396422, "grad_norm": 0.12658412754535675, "learning_rate": 0.0001768534526594122, "loss": 0.9667, "step": 1144 }, { "epoch": 0.2327708883919496, "grad_norm": 0.125930517911911, "learning_rate": 0.0001768331129868809, "loss": 1.2521, "step": 1145 }, { "epoch": 0.23297418174425696, "grad_norm": 0.1267358511686325, "learning_rate": 0.00017681277331434964, "loss": 1.2034, "step": 1146 }, { "epoch": 0.23317747509656434, "grad_norm": 0.11235269904136658, "learning_rate": 0.00017679243364181836, "loss": 1.0049, "step": 1147 }, { "epoch": 0.23338076844887173, "grad_norm": 0.13258063793182373, "learning_rate": 0.0001767720939692871, "loss": 1.0893, "step": 1148 }, { "epoch": 0.2335840618011791, "grad_norm": 0.108503058552742, "learning_rate": 0.00017675175429675584, "loss": 0.9366, "step": 1149 }, { "epoch": 0.23378735515348648, "grad_norm": 0.12689101696014404, "learning_rate": 0.00017673141462422456, "loss": 1.1286, "step": 1150 }, { "epoch": 0.23399064850579385, "grad_norm": 0.12492146342992783, "learning_rate": 0.00017671107495169329, "loss": 0.9183, "step": 1151 }, { "epoch": 0.23419394185810125, "grad_norm": 0.10324962437152863, "learning_rate": 0.000176690735279162, "loss": 1.0456, "step": 1152 }, { "epoch": 0.23439723521040862, "grad_norm": 0.11633274704217911, "learning_rate": 0.00017667039560663073, "loss": 0.917, "step": 1153 }, { "epoch": 0.234600528562716, "grad_norm": 0.11803746968507767, "learning_rate": 0.00017665005593409946, "loss": 1.0733, "step": 1154 }, { "epoch": 0.2348038219150234, "grad_norm": 0.128416046500206, "learning_rate": 0.00017662971626156818, "loss": 1.1525, "step": 1155 }, { "epoch": 0.23500711526733076, "grad_norm": 0.13254918158054352, "learning_rate": 0.00017660937658903693, "loss": 1.2412, "step": 1156 }, { "epoch": 0.23521040861963813, "grad_norm": 0.13515497744083405, "learning_rate": 0.00017658903691650566, "loss": 1.0627, "step": 1157 }, { "epoch": 0.23541370197194553, "grad_norm": 0.12952685356140137, "learning_rate": 0.00017656869724397438, "loss": 1.0841, "step": 1158 }, { "epoch": 0.2356169953242529, "grad_norm": 0.14173516631126404, "learning_rate": 0.0001765483575714431, "loss": 1.1436, "step": 1159 }, { "epoch": 0.23582028867656027, "grad_norm": 0.11358428746461868, "learning_rate": 0.00017652801789891183, "loss": 1.0707, "step": 1160 }, { "epoch": 0.23602358202886767, "grad_norm": 0.11959460377693176, "learning_rate": 0.00017650767822638056, "loss": 1.0038, "step": 1161 }, { "epoch": 0.23622687538117504, "grad_norm": 0.13181112706661224, "learning_rate": 0.00017648733855384928, "loss": 1.2132, "step": 1162 }, { "epoch": 0.2364301687334824, "grad_norm": 0.12374672293663025, "learning_rate": 0.000176466998881318, "loss": 1.0432, "step": 1163 }, { "epoch": 0.23663346208578978, "grad_norm": 0.1308983564376831, "learning_rate": 0.00017644665920878676, "loss": 1.1725, "step": 1164 }, { "epoch": 0.23683675543809718, "grad_norm": 0.11602329462766647, "learning_rate": 0.00017642631953625548, "loss": 1.0136, "step": 1165 }, { "epoch": 0.23704004879040455, "grad_norm": 0.1398748755455017, "learning_rate": 0.0001764059798637242, "loss": 1.0655, "step": 1166 }, { "epoch": 0.23724334214271192, "grad_norm": 0.1302013248205185, "learning_rate": 0.00017638564019119293, "loss": 1.1052, "step": 1167 }, { "epoch": 0.23744663549501932, "grad_norm": 0.11932185292243958, "learning_rate": 0.00017636530051866166, "loss": 1.1945, "step": 1168 }, { "epoch": 0.2376499288473267, "grad_norm": 0.11323782801628113, "learning_rate": 0.00017634496084613038, "loss": 1.0412, "step": 1169 }, { "epoch": 0.23785322219963406, "grad_norm": 0.1345479041337967, "learning_rate": 0.0001763246211735991, "loss": 1.082, "step": 1170 }, { "epoch": 0.23805651555194146, "grad_norm": 0.12548640370368958, "learning_rate": 0.00017630428150106783, "loss": 1.1213, "step": 1171 }, { "epoch": 0.23825980890424883, "grad_norm": 0.12849657237529755, "learning_rate": 0.00017628394182853655, "loss": 1.13, "step": 1172 }, { "epoch": 0.2384631022565562, "grad_norm": 0.11670655757188797, "learning_rate": 0.0001762636021560053, "loss": 0.8984, "step": 1173 }, { "epoch": 0.2386663956088636, "grad_norm": 0.11539500951766968, "learning_rate": 0.00017624326248347403, "loss": 1.1453, "step": 1174 }, { "epoch": 0.23886968896117097, "grad_norm": 0.13686025142669678, "learning_rate": 0.00017622292281094275, "loss": 1.2811, "step": 1175 }, { "epoch": 0.23907298231347834, "grad_norm": 0.13845805823802948, "learning_rate": 0.00017620258313841148, "loss": 1.2939, "step": 1176 }, { "epoch": 0.23927627566578574, "grad_norm": 0.12209935486316681, "learning_rate": 0.0001761822434658802, "loss": 0.8311, "step": 1177 }, { "epoch": 0.2394795690180931, "grad_norm": 0.11880161613225937, "learning_rate": 0.00017616190379334893, "loss": 1.2844, "step": 1178 }, { "epoch": 0.23968286237040048, "grad_norm": 0.1326730102300644, "learning_rate": 0.00017614156412081765, "loss": 1.1336, "step": 1179 }, { "epoch": 0.23988615572270786, "grad_norm": 0.11547461152076721, "learning_rate": 0.00017612122444828638, "loss": 0.9757, "step": 1180 }, { "epoch": 0.24008944907501525, "grad_norm": 0.1296636462211609, "learning_rate": 0.00017610088477575513, "loss": 1.084, "step": 1181 }, { "epoch": 0.24029274242732263, "grad_norm": 0.12076129764318466, "learning_rate": 0.00017608054510322385, "loss": 1.1151, "step": 1182 }, { "epoch": 0.24049603577963, "grad_norm": 0.12159736454486847, "learning_rate": 0.00017606020543069258, "loss": 1.0461, "step": 1183 }, { "epoch": 0.2406993291319374, "grad_norm": 0.13127025961875916, "learning_rate": 0.0001760398657581613, "loss": 1.0541, "step": 1184 }, { "epoch": 0.24090262248424477, "grad_norm": 0.14702552556991577, "learning_rate": 0.00017601952608563003, "loss": 1.2109, "step": 1185 }, { "epoch": 0.24110591583655214, "grad_norm": 0.11683522909879684, "learning_rate": 0.00017599918641309875, "loss": 1.1588, "step": 1186 }, { "epoch": 0.24130920918885954, "grad_norm": 0.1130138412117958, "learning_rate": 0.00017597884674056747, "loss": 1.0268, "step": 1187 }, { "epoch": 0.2415125025411669, "grad_norm": 0.10920488089323044, "learning_rate": 0.0001759585070680362, "loss": 0.9038, "step": 1188 }, { "epoch": 0.24171579589347428, "grad_norm": 0.12897898256778717, "learning_rate": 0.00017593816739550495, "loss": 1.3032, "step": 1189 }, { "epoch": 0.24191908924578168, "grad_norm": 0.1289346069097519, "learning_rate": 0.00017591782772297367, "loss": 1.1169, "step": 1190 }, { "epoch": 0.24212238259808905, "grad_norm": 0.12478041648864746, "learning_rate": 0.0001758974880504424, "loss": 1.1723, "step": 1191 }, { "epoch": 0.24232567595039642, "grad_norm": 0.13389204442501068, "learning_rate": 0.00017587714837791112, "loss": 1.1079, "step": 1192 }, { "epoch": 0.2425289693027038, "grad_norm": 0.12659893929958344, "learning_rate": 0.00017585680870537985, "loss": 1.078, "step": 1193 }, { "epoch": 0.2427322626550112, "grad_norm": 0.13224546611309052, "learning_rate": 0.00017583646903284857, "loss": 1.0784, "step": 1194 }, { "epoch": 0.24293555600731856, "grad_norm": 0.13924521207809448, "learning_rate": 0.0001758161293603173, "loss": 1.1491, "step": 1195 }, { "epoch": 0.24313884935962593, "grad_norm": 0.10379677265882492, "learning_rate": 0.00017579578968778602, "loss": 0.8733, "step": 1196 }, { "epoch": 0.24334214271193333, "grad_norm": 0.17008356750011444, "learning_rate": 0.00017577545001525477, "loss": 1.1597, "step": 1197 }, { "epoch": 0.2435454360642407, "grad_norm": 0.11082588881254196, "learning_rate": 0.0001757551103427235, "loss": 0.966, "step": 1198 }, { "epoch": 0.24374872941654807, "grad_norm": 0.12224634736776352, "learning_rate": 0.00017573477067019222, "loss": 0.9494, "step": 1199 }, { "epoch": 0.24395202276885547, "grad_norm": 0.12597164511680603, "learning_rate": 0.00017571443099766095, "loss": 0.9376, "step": 1200 }, { "epoch": 0.24415531612116284, "grad_norm": 0.12282256036996841, "learning_rate": 0.00017569409132512967, "loss": 1.1645, "step": 1201 }, { "epoch": 0.2443586094734702, "grad_norm": 0.10933969169855118, "learning_rate": 0.0001756737516525984, "loss": 1.0338, "step": 1202 }, { "epoch": 0.2445619028257776, "grad_norm": 0.12132111936807632, "learning_rate": 0.00017565341198006712, "loss": 1.1541, "step": 1203 }, { "epoch": 0.24476519617808498, "grad_norm": 0.12675434350967407, "learning_rate": 0.00017563307230753584, "loss": 1.0506, "step": 1204 }, { "epoch": 0.24496848953039235, "grad_norm": 0.14764836430549622, "learning_rate": 0.0001756127326350046, "loss": 1.0209, "step": 1205 }, { "epoch": 0.24517178288269972, "grad_norm": 0.11838477104902267, "learning_rate": 0.00017559239296247332, "loss": 0.9789, "step": 1206 }, { "epoch": 0.24537507623500712, "grad_norm": 0.11526069790124893, "learning_rate": 0.00017557205328994204, "loss": 1.0871, "step": 1207 }, { "epoch": 0.2455783695873145, "grad_norm": 0.12997418642044067, "learning_rate": 0.00017555171361741077, "loss": 1.215, "step": 1208 }, { "epoch": 0.24578166293962186, "grad_norm": 0.1175120398402214, "learning_rate": 0.0001755313739448795, "loss": 0.8355, "step": 1209 }, { "epoch": 0.24598495629192926, "grad_norm": 0.11287759989500046, "learning_rate": 0.00017551103427234822, "loss": 0.9071, "step": 1210 }, { "epoch": 0.24618824964423663, "grad_norm": 0.11898453533649445, "learning_rate": 0.00017549069459981694, "loss": 1.0175, "step": 1211 }, { "epoch": 0.246391542996544, "grad_norm": 0.13262607157230377, "learning_rate": 0.00017547035492728567, "loss": 1.1498, "step": 1212 }, { "epoch": 0.2465948363488514, "grad_norm": 0.12178485840559006, "learning_rate": 0.0001754500152547544, "loss": 1.0405, "step": 1213 }, { "epoch": 0.24679812970115877, "grad_norm": 0.13001886010169983, "learning_rate": 0.00017542967558222314, "loss": 1.0465, "step": 1214 }, { "epoch": 0.24700142305346615, "grad_norm": 0.12525972723960876, "learning_rate": 0.00017540933590969187, "loss": 1.1144, "step": 1215 }, { "epoch": 0.24720471640577354, "grad_norm": 0.11287079751491547, "learning_rate": 0.0001753889962371606, "loss": 0.9362, "step": 1216 }, { "epoch": 0.24740800975808092, "grad_norm": 0.13626334071159363, "learning_rate": 0.00017536865656462932, "loss": 1.2352, "step": 1217 }, { "epoch": 0.2476113031103883, "grad_norm": 0.12724994122982025, "learning_rate": 0.00017534831689209804, "loss": 1.0396, "step": 1218 }, { "epoch": 0.24781459646269566, "grad_norm": 0.11603401601314545, "learning_rate": 0.00017532797721956677, "loss": 0.9778, "step": 1219 }, { "epoch": 0.24801788981500306, "grad_norm": 0.12654529511928558, "learning_rate": 0.0001753076375470355, "loss": 1.0312, "step": 1220 }, { "epoch": 0.24822118316731043, "grad_norm": 0.13385628163814545, "learning_rate": 0.00017528729787450421, "loss": 1.1297, "step": 1221 }, { "epoch": 0.2484244765196178, "grad_norm": 0.12190620601177216, "learning_rate": 0.00017526695820197297, "loss": 1.1187, "step": 1222 }, { "epoch": 0.2486277698719252, "grad_norm": 0.11775553971529007, "learning_rate": 0.0001752466185294417, "loss": 1.0193, "step": 1223 }, { "epoch": 0.24883106322423257, "grad_norm": 0.10721298307180405, "learning_rate": 0.00017522627885691041, "loss": 0.9781, "step": 1224 }, { "epoch": 0.24903435657653994, "grad_norm": 0.11292947083711624, "learning_rate": 0.00017520593918437914, "loss": 1.122, "step": 1225 }, { "epoch": 0.24923764992884734, "grad_norm": 0.11116209626197815, "learning_rate": 0.00017518559951184786, "loss": 0.9238, "step": 1226 }, { "epoch": 0.2494409432811547, "grad_norm": 0.12392593175172806, "learning_rate": 0.0001751652598393166, "loss": 1.1305, "step": 1227 }, { "epoch": 0.24964423663346208, "grad_norm": 0.124233178794384, "learning_rate": 0.0001751449201667853, "loss": 1.0218, "step": 1228 }, { "epoch": 0.24984752998576948, "grad_norm": 0.1181500032544136, "learning_rate": 0.00017512458049425404, "loss": 0.9349, "step": 1229 }, { "epoch": 0.2500508233380768, "grad_norm": 0.13005246222019196, "learning_rate": 0.0001751042408217228, "loss": 1.1636, "step": 1230 }, { "epoch": 0.2502541166903842, "grad_norm": 0.12866559624671936, "learning_rate": 0.0001750839011491915, "loss": 1.1384, "step": 1231 }, { "epoch": 0.2504574100426916, "grad_norm": 0.11397498100996017, "learning_rate": 0.00017506356147666024, "loss": 1.0519, "step": 1232 }, { "epoch": 0.25066070339499896, "grad_norm": 0.11991407722234726, "learning_rate": 0.00017504322180412896, "loss": 1.025, "step": 1233 }, { "epoch": 0.25086399674730636, "grad_norm": 0.11384415626525879, "learning_rate": 0.0001750228821315977, "loss": 0.9845, "step": 1234 }, { "epoch": 0.25106729009961376, "grad_norm": 0.12114489823579788, "learning_rate": 0.0001750025424590664, "loss": 0.9427, "step": 1235 }, { "epoch": 0.2512705834519211, "grad_norm": 0.12967409193515778, "learning_rate": 0.00017498220278653514, "loss": 1.0714, "step": 1236 }, { "epoch": 0.2514738768042285, "grad_norm": 0.13375937938690186, "learning_rate": 0.00017496186311400386, "loss": 1.0678, "step": 1237 }, { "epoch": 0.2516771701565359, "grad_norm": 0.12456507235765457, "learning_rate": 0.0001749415234414726, "loss": 1.0189, "step": 1238 }, { "epoch": 0.25188046350884324, "grad_norm": 0.1372321993112564, "learning_rate": 0.00017492118376894134, "loss": 1.2524, "step": 1239 }, { "epoch": 0.25208375686115064, "grad_norm": 0.11218629777431488, "learning_rate": 0.00017490084409641006, "loss": 1.0237, "step": 1240 }, { "epoch": 0.25228705021345804, "grad_norm": 0.12430521845817566, "learning_rate": 0.00017488050442387878, "loss": 0.9717, "step": 1241 }, { "epoch": 0.2524903435657654, "grad_norm": 0.12222771346569061, "learning_rate": 0.0001748601647513475, "loss": 1.087, "step": 1242 }, { "epoch": 0.2526936369180728, "grad_norm": 0.12341856956481934, "learning_rate": 0.00017483982507881623, "loss": 1.0789, "step": 1243 }, { "epoch": 0.2528969302703802, "grad_norm": 0.13263435661792755, "learning_rate": 0.00017481948540628496, "loss": 1.1757, "step": 1244 }, { "epoch": 0.2531002236226875, "grad_norm": 0.12904416024684906, "learning_rate": 0.00017479914573375368, "loss": 1.0812, "step": 1245 }, { "epoch": 0.2533035169749949, "grad_norm": 0.12575136125087738, "learning_rate": 0.00017477880606122243, "loss": 1.0435, "step": 1246 }, { "epoch": 0.2535068103273023, "grad_norm": 0.11990928649902344, "learning_rate": 0.00017475846638869116, "loss": 0.9689, "step": 1247 }, { "epoch": 0.25371010367960967, "grad_norm": 0.12164648622274399, "learning_rate": 0.00017473812671615988, "loss": 0.9309, "step": 1248 }, { "epoch": 0.25391339703191707, "grad_norm": 0.12410687655210495, "learning_rate": 0.0001747177870436286, "loss": 1.0254, "step": 1249 }, { "epoch": 0.2541166903842244, "grad_norm": 0.13339757919311523, "learning_rate": 0.00017469744737109733, "loss": 1.0455, "step": 1250 }, { "epoch": 0.2543199837365318, "grad_norm": 0.14127810299396515, "learning_rate": 0.00017467710769856606, "loss": 1.1633, "step": 1251 }, { "epoch": 0.2545232770888392, "grad_norm": 0.10454534739255905, "learning_rate": 0.00017465676802603478, "loss": 0.7574, "step": 1252 }, { "epoch": 0.25472657044114655, "grad_norm": 0.14605766534805298, "learning_rate": 0.0001746364283535035, "loss": 1.1348, "step": 1253 }, { "epoch": 0.25492986379345395, "grad_norm": 0.11716707050800323, "learning_rate": 0.00017461608868097223, "loss": 1.0645, "step": 1254 }, { "epoch": 0.25513315714576135, "grad_norm": 0.13623961806297302, "learning_rate": 0.00017459574900844098, "loss": 1.1175, "step": 1255 }, { "epoch": 0.2553364504980687, "grad_norm": 0.11011240631341934, "learning_rate": 0.0001745754093359097, "loss": 1.0454, "step": 1256 }, { "epoch": 0.2555397438503761, "grad_norm": 0.13665513694286346, "learning_rate": 0.00017455506966337843, "loss": 1.1112, "step": 1257 }, { "epoch": 0.2557430372026835, "grad_norm": 0.11241257190704346, "learning_rate": 0.00017453472999084715, "loss": 1.0808, "step": 1258 }, { "epoch": 0.25594633055499083, "grad_norm": 0.1247948557138443, "learning_rate": 0.00017451439031831588, "loss": 0.9803, "step": 1259 }, { "epoch": 0.25614962390729823, "grad_norm": 0.14268344640731812, "learning_rate": 0.0001744940506457846, "loss": 1.1095, "step": 1260 }, { "epoch": 0.25635291725960563, "grad_norm": 0.11472602188587189, "learning_rate": 0.00017447371097325333, "loss": 0.9911, "step": 1261 }, { "epoch": 0.25655621061191297, "grad_norm": 0.14191444218158722, "learning_rate": 0.00017445337130072205, "loss": 1.0679, "step": 1262 }, { "epoch": 0.25675950396422037, "grad_norm": 0.12657268345355988, "learning_rate": 0.0001744330316281908, "loss": 0.9211, "step": 1263 }, { "epoch": 0.25696279731652777, "grad_norm": 0.1397320032119751, "learning_rate": 0.00017441269195565953, "loss": 1.1604, "step": 1264 }, { "epoch": 0.2571660906688351, "grad_norm": 0.12176384776830673, "learning_rate": 0.00017439235228312825, "loss": 1.0919, "step": 1265 }, { "epoch": 0.2573693840211425, "grad_norm": 0.13282664120197296, "learning_rate": 0.00017437201261059698, "loss": 1.259, "step": 1266 }, { "epoch": 0.2575726773734499, "grad_norm": 0.14279745519161224, "learning_rate": 0.0001743516729380657, "loss": 1.3582, "step": 1267 }, { "epoch": 0.25777597072575725, "grad_norm": 0.11482515186071396, "learning_rate": 0.00017433133326553443, "loss": 0.9338, "step": 1268 }, { "epoch": 0.25797926407806465, "grad_norm": 0.12177598476409912, "learning_rate": 0.00017431099359300315, "loss": 1.0996, "step": 1269 }, { "epoch": 0.25818255743037205, "grad_norm": 0.12271133065223694, "learning_rate": 0.00017429065392047188, "loss": 1.2357, "step": 1270 }, { "epoch": 0.2583858507826794, "grad_norm": 0.11448093503713608, "learning_rate": 0.00017427031424794063, "loss": 1.0057, "step": 1271 }, { "epoch": 0.2585891441349868, "grad_norm": 0.11486377567052841, "learning_rate": 0.00017424997457540935, "loss": 1.0429, "step": 1272 }, { "epoch": 0.2587924374872942, "grad_norm": 0.12816710770130157, "learning_rate": 0.00017422963490287808, "loss": 1.0849, "step": 1273 }, { "epoch": 0.25899573083960153, "grad_norm": 0.13030269742012024, "learning_rate": 0.0001742092952303468, "loss": 0.9698, "step": 1274 }, { "epoch": 0.25919902419190893, "grad_norm": 0.12305210530757904, "learning_rate": 0.00017418895555781553, "loss": 1.0847, "step": 1275 }, { "epoch": 0.2594023175442163, "grad_norm": 0.11980848014354706, "learning_rate": 0.00017416861588528425, "loss": 1.1857, "step": 1276 }, { "epoch": 0.2596056108965237, "grad_norm": 0.1268121749162674, "learning_rate": 0.00017414827621275297, "loss": 1.057, "step": 1277 }, { "epoch": 0.2598089042488311, "grad_norm": 0.119362972676754, "learning_rate": 0.0001741279365402217, "loss": 0.9978, "step": 1278 }, { "epoch": 0.2600121976011384, "grad_norm": 0.11040918529033661, "learning_rate": 0.00017410759686769045, "loss": 0.8646, "step": 1279 }, { "epoch": 0.2602154909534458, "grad_norm": 0.1263931393623352, "learning_rate": 0.00017408725719515917, "loss": 0.9089, "step": 1280 }, { "epoch": 0.2604187843057532, "grad_norm": 0.1311492770910263, "learning_rate": 0.0001740669175226279, "loss": 1.2096, "step": 1281 }, { "epoch": 0.26062207765806056, "grad_norm": 0.15105241537094116, "learning_rate": 0.00017404657785009662, "loss": 1.3313, "step": 1282 }, { "epoch": 0.26082537101036796, "grad_norm": 0.14878205955028534, "learning_rate": 0.00017402623817756535, "loss": 1.1671, "step": 1283 }, { "epoch": 0.26102866436267536, "grad_norm": 0.12554128468036652, "learning_rate": 0.00017400589850503407, "loss": 0.9834, "step": 1284 }, { "epoch": 0.2612319577149827, "grad_norm": 0.12347958981990814, "learning_rate": 0.0001739855588325028, "loss": 1.1064, "step": 1285 }, { "epoch": 0.2614352510672901, "grad_norm": 0.13344120979309082, "learning_rate": 0.00017396521915997152, "loss": 1.1599, "step": 1286 }, { "epoch": 0.2616385444195975, "grad_norm": 0.11492400616407394, "learning_rate": 0.00017394487948744027, "loss": 1.0664, "step": 1287 }, { "epoch": 0.26184183777190484, "grad_norm": 0.1381841003894806, "learning_rate": 0.000173924539814909, "loss": 1.2612, "step": 1288 }, { "epoch": 0.26204513112421224, "grad_norm": 0.1126202642917633, "learning_rate": 0.00017390420014237772, "loss": 1.0606, "step": 1289 }, { "epoch": 0.26224842447651964, "grad_norm": 0.12391757220029831, "learning_rate": 0.00017388386046984645, "loss": 1.1284, "step": 1290 }, { "epoch": 0.262451717828827, "grad_norm": 0.14284935593605042, "learning_rate": 0.00017386352079731517, "loss": 1.2237, "step": 1291 }, { "epoch": 0.2626550111811344, "grad_norm": 0.11940843611955643, "learning_rate": 0.0001738431811247839, "loss": 1.0164, "step": 1292 }, { "epoch": 0.2628583045334418, "grad_norm": 0.11453817039728165, "learning_rate": 0.00017382284145225262, "loss": 0.919, "step": 1293 }, { "epoch": 0.2630615978857491, "grad_norm": 0.11902697384357452, "learning_rate": 0.00017380250177972134, "loss": 1.0669, "step": 1294 }, { "epoch": 0.2632648912380565, "grad_norm": 0.12861910462379456, "learning_rate": 0.00017378216210719007, "loss": 1.051, "step": 1295 }, { "epoch": 0.2634681845903639, "grad_norm": 0.13415683805942535, "learning_rate": 0.00017376182243465882, "loss": 1.2085, "step": 1296 }, { "epoch": 0.26367147794267126, "grad_norm": 0.11324958503246307, "learning_rate": 0.00017374148276212754, "loss": 1.0347, "step": 1297 }, { "epoch": 0.26387477129497866, "grad_norm": 0.11437279731035233, "learning_rate": 0.00017372114308959627, "loss": 1.0386, "step": 1298 }, { "epoch": 0.26407806464728606, "grad_norm": 0.1309337615966797, "learning_rate": 0.000173700803417065, "loss": 1.0251, "step": 1299 }, { "epoch": 0.2642813579995934, "grad_norm": 0.12801750004291534, "learning_rate": 0.00017368046374453372, "loss": 1.0661, "step": 1300 }, { "epoch": 0.2644846513519008, "grad_norm": 0.12607401609420776, "learning_rate": 0.00017366012407200244, "loss": 1.1156, "step": 1301 }, { "epoch": 0.2646879447042082, "grad_norm": 0.1417655348777771, "learning_rate": 0.00017363978439947117, "loss": 1.3765, "step": 1302 }, { "epoch": 0.26489123805651554, "grad_norm": 0.12621742486953735, "learning_rate": 0.0001736194447269399, "loss": 0.9138, "step": 1303 }, { "epoch": 0.26509453140882294, "grad_norm": 0.12521621584892273, "learning_rate": 0.00017359910505440864, "loss": 1.1882, "step": 1304 }, { "epoch": 0.2652978247611303, "grad_norm": 0.11669400334358215, "learning_rate": 0.00017357876538187737, "loss": 1.0113, "step": 1305 }, { "epoch": 0.2655011181134377, "grad_norm": 0.12276088446378708, "learning_rate": 0.0001735584257093461, "loss": 1.0964, "step": 1306 }, { "epoch": 0.2657044114657451, "grad_norm": 0.11636564135551453, "learning_rate": 0.00017353808603681482, "loss": 1.0141, "step": 1307 }, { "epoch": 0.2659077048180524, "grad_norm": 0.10083210468292236, "learning_rate": 0.00017351774636428354, "loss": 1.0403, "step": 1308 }, { "epoch": 0.2661109981703598, "grad_norm": 0.12461689859628677, "learning_rate": 0.00017349740669175227, "loss": 1.0841, "step": 1309 }, { "epoch": 0.2663142915226672, "grad_norm": 0.12346909195184708, "learning_rate": 0.000173477067019221, "loss": 1.0268, "step": 1310 }, { "epoch": 0.26651758487497457, "grad_norm": 0.11846248060464859, "learning_rate": 0.00017345672734668971, "loss": 1.028, "step": 1311 }, { "epoch": 0.26672087822728197, "grad_norm": 0.1329965591430664, "learning_rate": 0.00017343638767415847, "loss": 1.0805, "step": 1312 }, { "epoch": 0.26692417157958936, "grad_norm": 0.12369682639837265, "learning_rate": 0.0001734160480016272, "loss": 1.0047, "step": 1313 }, { "epoch": 0.2671274649318967, "grad_norm": 0.12594352662563324, "learning_rate": 0.00017339570832909591, "loss": 1.209, "step": 1314 }, { "epoch": 0.2673307582842041, "grad_norm": 0.1423029899597168, "learning_rate": 0.00017337536865656464, "loss": 1.0829, "step": 1315 }, { "epoch": 0.2675340516365115, "grad_norm": 0.11651685833930969, "learning_rate": 0.00017335502898403336, "loss": 1.0249, "step": 1316 }, { "epoch": 0.26773734498881885, "grad_norm": 0.10999172925949097, "learning_rate": 0.0001733346893115021, "loss": 0.8872, "step": 1317 }, { "epoch": 0.26794063834112625, "grad_norm": 0.125168576836586, "learning_rate": 0.0001733143496389708, "loss": 1.0853, "step": 1318 }, { "epoch": 0.26814393169343365, "grad_norm": 0.1307574361562729, "learning_rate": 0.00017329400996643954, "loss": 0.9643, "step": 1319 }, { "epoch": 0.268347225045741, "grad_norm": 0.136819988489151, "learning_rate": 0.0001732736702939083, "loss": 1.0952, "step": 1320 }, { "epoch": 0.2685505183980484, "grad_norm": 0.12915043532848358, "learning_rate": 0.000173253330621377, "loss": 0.9278, "step": 1321 }, { "epoch": 0.2687538117503558, "grad_norm": 0.12452216446399689, "learning_rate": 0.00017323299094884574, "loss": 1.0679, "step": 1322 }, { "epoch": 0.26895710510266313, "grad_norm": 0.1167951300740242, "learning_rate": 0.00017321265127631446, "loss": 1.009, "step": 1323 }, { "epoch": 0.26916039845497053, "grad_norm": 0.12355060130357742, "learning_rate": 0.00017319231160378319, "loss": 1.1398, "step": 1324 }, { "epoch": 0.2693636918072779, "grad_norm": 0.14160853624343872, "learning_rate": 0.0001731719719312519, "loss": 1.1444, "step": 1325 }, { "epoch": 0.26956698515958527, "grad_norm": 0.12388666719198227, "learning_rate": 0.00017315163225872064, "loss": 1.1242, "step": 1326 }, { "epoch": 0.26977027851189267, "grad_norm": 0.11084824055433273, "learning_rate": 0.00017313129258618936, "loss": 0.9006, "step": 1327 }, { "epoch": 0.26997357186420007, "grad_norm": 0.11720530688762665, "learning_rate": 0.0001731109529136581, "loss": 0.9474, "step": 1328 }, { "epoch": 0.2701768652165074, "grad_norm": 0.13025008141994476, "learning_rate": 0.00017309061324112684, "loss": 1.0815, "step": 1329 }, { "epoch": 0.2703801585688148, "grad_norm": 0.14168627560138702, "learning_rate": 0.00017307027356859556, "loss": 1.0938, "step": 1330 }, { "epoch": 0.27058345192112215, "grad_norm": 0.14329680800437927, "learning_rate": 0.00017304993389606428, "loss": 1.2552, "step": 1331 }, { "epoch": 0.27078674527342955, "grad_norm": 0.12423396855592728, "learning_rate": 0.000173029594223533, "loss": 0.8778, "step": 1332 }, { "epoch": 0.27099003862573695, "grad_norm": 0.13177728652954102, "learning_rate": 0.00017300925455100173, "loss": 1.2632, "step": 1333 }, { "epoch": 0.2711933319780443, "grad_norm": 0.12286023795604706, "learning_rate": 0.00017298891487847046, "loss": 1.078, "step": 1334 }, { "epoch": 0.2713966253303517, "grad_norm": 0.10991277545690536, "learning_rate": 0.00017296857520593918, "loss": 1.0038, "step": 1335 }, { "epoch": 0.2715999186826591, "grad_norm": 0.1368594616651535, "learning_rate": 0.0001729482355334079, "loss": 1.0416, "step": 1336 }, { "epoch": 0.27180321203496643, "grad_norm": 0.11537830531597137, "learning_rate": 0.00017292789586087666, "loss": 1.0933, "step": 1337 }, { "epoch": 0.27200650538727383, "grad_norm": 0.11709605902433395, "learning_rate": 0.00017290755618834538, "loss": 0.958, "step": 1338 }, { "epoch": 0.27220979873958123, "grad_norm": 0.1164301261305809, "learning_rate": 0.0001728872165158141, "loss": 0.8833, "step": 1339 }, { "epoch": 0.2724130920918886, "grad_norm": 0.13498760759830475, "learning_rate": 0.00017286687684328283, "loss": 1.1173, "step": 1340 }, { "epoch": 0.272616385444196, "grad_norm": 0.11391112208366394, "learning_rate": 0.00017284653717075156, "loss": 0.9329, "step": 1341 }, { "epoch": 0.2728196787965034, "grad_norm": 0.12780262529850006, "learning_rate": 0.00017282619749822028, "loss": 1.1273, "step": 1342 }, { "epoch": 0.2730229721488107, "grad_norm": 0.11829452961683273, "learning_rate": 0.000172805857825689, "loss": 0.8299, "step": 1343 }, { "epoch": 0.2732262655011181, "grad_norm": 0.12499269843101501, "learning_rate": 0.00017278551815315773, "loss": 1.1501, "step": 1344 }, { "epoch": 0.2734295588534255, "grad_norm": 0.13114666938781738, "learning_rate": 0.00017276517848062648, "loss": 1.0625, "step": 1345 }, { "epoch": 0.27363285220573286, "grad_norm": 0.1208108589053154, "learning_rate": 0.0001727448388080952, "loss": 0.965, "step": 1346 }, { "epoch": 0.27383614555804026, "grad_norm": 0.12325561046600342, "learning_rate": 0.00017272449913556393, "loss": 1.0976, "step": 1347 }, { "epoch": 0.27403943891034765, "grad_norm": 0.12004940211772919, "learning_rate": 0.00017270415946303265, "loss": 0.9958, "step": 1348 }, { "epoch": 0.274242732262655, "grad_norm": 0.1253954917192459, "learning_rate": 0.00017268381979050138, "loss": 1.1158, "step": 1349 }, { "epoch": 0.2744460256149624, "grad_norm": 0.12844887375831604, "learning_rate": 0.0001726634801179701, "loss": 1.0849, "step": 1350 }, { "epoch": 0.2746493189672698, "grad_norm": 0.1340886950492859, "learning_rate": 0.00017264314044543883, "loss": 1.2566, "step": 1351 }, { "epoch": 0.27485261231957714, "grad_norm": 0.12355068325996399, "learning_rate": 0.00017262280077290755, "loss": 0.9769, "step": 1352 }, { "epoch": 0.27505590567188454, "grad_norm": 0.10396768152713776, "learning_rate": 0.0001726024611003763, "loss": 0.9058, "step": 1353 }, { "epoch": 0.27525919902419194, "grad_norm": 0.1249571368098259, "learning_rate": 0.00017258212142784503, "loss": 1.0982, "step": 1354 }, { "epoch": 0.2754624923764993, "grad_norm": 0.13168682157993317, "learning_rate": 0.00017256178175531375, "loss": 1.077, "step": 1355 }, { "epoch": 0.2756657857288067, "grad_norm": 0.11570144444704056, "learning_rate": 0.00017254144208278248, "loss": 0.9515, "step": 1356 }, { "epoch": 0.275869079081114, "grad_norm": 0.13097792863845825, "learning_rate": 0.0001725211024102512, "loss": 1.1836, "step": 1357 }, { "epoch": 0.2760723724334214, "grad_norm": 0.13371975719928741, "learning_rate": 0.00017250076273771993, "loss": 1.1521, "step": 1358 }, { "epoch": 0.2762756657857288, "grad_norm": 0.11649662256240845, "learning_rate": 0.00017248042306518865, "loss": 0.9173, "step": 1359 }, { "epoch": 0.27647895913803616, "grad_norm": 0.1347874402999878, "learning_rate": 0.00017246008339265738, "loss": 1.2533, "step": 1360 }, { "epoch": 0.27668225249034356, "grad_norm": 0.13108506798744202, "learning_rate": 0.00017243974372012613, "loss": 1.28, "step": 1361 }, { "epoch": 0.27688554584265096, "grad_norm": 0.12440016865730286, "learning_rate": 0.00017241940404759485, "loss": 1.0599, "step": 1362 }, { "epoch": 0.2770888391949583, "grad_norm": 0.14487305283546448, "learning_rate": 0.00017239906437506358, "loss": 0.9908, "step": 1363 }, { "epoch": 0.2772921325472657, "grad_norm": 0.1289856880903244, "learning_rate": 0.0001723787247025323, "loss": 1.0855, "step": 1364 }, { "epoch": 0.2774954258995731, "grad_norm": 0.12901484966278076, "learning_rate": 0.00017235838503000102, "loss": 1.1945, "step": 1365 }, { "epoch": 0.27769871925188044, "grad_norm": 0.12738290429115295, "learning_rate": 0.00017233804535746975, "loss": 1.1233, "step": 1366 }, { "epoch": 0.27790201260418784, "grad_norm": 0.13745670020580292, "learning_rate": 0.00017231770568493847, "loss": 1.174, "step": 1367 }, { "epoch": 0.27810530595649524, "grad_norm": 0.1181466281414032, "learning_rate": 0.0001722973660124072, "loss": 1.0206, "step": 1368 }, { "epoch": 0.2783085993088026, "grad_norm": 0.11488956212997437, "learning_rate": 0.00017227702633987595, "loss": 1.0443, "step": 1369 }, { "epoch": 0.27851189266111, "grad_norm": 0.1327381134033203, "learning_rate": 0.00017225668666734467, "loss": 1.1791, "step": 1370 }, { "epoch": 0.2787151860134174, "grad_norm": 0.13029593229293823, "learning_rate": 0.0001722363469948134, "loss": 1.1022, "step": 1371 }, { "epoch": 0.2789184793657247, "grad_norm": 0.10697850584983826, "learning_rate": 0.00017221600732228212, "loss": 0.9923, "step": 1372 }, { "epoch": 0.2791217727180321, "grad_norm": 0.11224257200956345, "learning_rate": 0.00017219566764975085, "loss": 0.9701, "step": 1373 }, { "epoch": 0.2793250660703395, "grad_norm": 0.11932025849819183, "learning_rate": 0.00017217532797721957, "loss": 1.0136, "step": 1374 }, { "epoch": 0.27952835942264687, "grad_norm": 0.11104830354452133, "learning_rate": 0.0001721549883046883, "loss": 1.1131, "step": 1375 }, { "epoch": 0.27973165277495426, "grad_norm": 0.136908620595932, "learning_rate": 0.00017213464863215702, "loss": 1.2888, "step": 1376 }, { "epoch": 0.27993494612726166, "grad_norm": 0.13100826740264893, "learning_rate": 0.00017211430895962575, "loss": 1.2221, "step": 1377 }, { "epoch": 0.280138239479569, "grad_norm": 0.1406666785478592, "learning_rate": 0.0001720939692870945, "loss": 1.2672, "step": 1378 }, { "epoch": 0.2803415328318764, "grad_norm": 0.10946685820817947, "learning_rate": 0.00017207362961456322, "loss": 0.8652, "step": 1379 }, { "epoch": 0.2805448261841838, "grad_norm": 0.11411663144826889, "learning_rate": 0.00017205328994203195, "loss": 0.8172, "step": 1380 }, { "epoch": 0.28074811953649115, "grad_norm": 0.132404625415802, "learning_rate": 0.00017203295026950067, "loss": 1.12, "step": 1381 }, { "epoch": 0.28095141288879855, "grad_norm": 0.12594282627105713, "learning_rate": 0.0001720126105969694, "loss": 1.2019, "step": 1382 }, { "epoch": 0.2811547062411059, "grad_norm": 0.14421536028385162, "learning_rate": 0.00017199227092443812, "loss": 1.301, "step": 1383 }, { "epoch": 0.2813579995934133, "grad_norm": 0.118538998067379, "learning_rate": 0.00017197193125190684, "loss": 1.0952, "step": 1384 }, { "epoch": 0.2815612929457207, "grad_norm": 0.1211504191160202, "learning_rate": 0.00017195159157937557, "loss": 1.0272, "step": 1385 }, { "epoch": 0.28176458629802803, "grad_norm": 0.13460633158683777, "learning_rate": 0.00017193125190684432, "loss": 1.1372, "step": 1386 }, { "epoch": 0.28196787965033543, "grad_norm": 0.11669941991567612, "learning_rate": 0.00017191091223431304, "loss": 1.0313, "step": 1387 }, { "epoch": 0.2821711730026428, "grad_norm": 0.1414983719587326, "learning_rate": 0.00017189057256178177, "loss": 1.3215, "step": 1388 }, { "epoch": 0.28237446635495017, "grad_norm": 0.11535824090242386, "learning_rate": 0.0001718702328892505, "loss": 1.0569, "step": 1389 }, { "epoch": 0.28257775970725757, "grad_norm": 0.11279894411563873, "learning_rate": 0.00017184989321671922, "loss": 0.9706, "step": 1390 }, { "epoch": 0.28278105305956497, "grad_norm": 0.12699778378009796, "learning_rate": 0.00017182955354418794, "loss": 1.0541, "step": 1391 }, { "epoch": 0.2829843464118723, "grad_norm": 0.13677164912223816, "learning_rate": 0.00017180921387165667, "loss": 1.0118, "step": 1392 }, { "epoch": 0.2831876397641797, "grad_norm": 0.1261303573846817, "learning_rate": 0.0001717888741991254, "loss": 1.0019, "step": 1393 }, { "epoch": 0.2833909331164871, "grad_norm": 0.15269511938095093, "learning_rate": 0.00017176853452659414, "loss": 1.1414, "step": 1394 }, { "epoch": 0.28359422646879445, "grad_norm": 0.11726024746894836, "learning_rate": 0.00017174819485406287, "loss": 1.071, "step": 1395 }, { "epoch": 0.28379751982110185, "grad_norm": 0.10793468356132507, "learning_rate": 0.0001717278551815316, "loss": 1.0911, "step": 1396 }, { "epoch": 0.28400081317340925, "grad_norm": 0.13417348265647888, "learning_rate": 0.00017170751550900032, "loss": 1.182, "step": 1397 }, { "epoch": 0.2842041065257166, "grad_norm": 0.1220618337392807, "learning_rate": 0.00017168717583646904, "loss": 1.1165, "step": 1398 }, { "epoch": 0.284407399878024, "grad_norm": 0.1326867640018463, "learning_rate": 0.00017166683616393776, "loss": 1.0426, "step": 1399 }, { "epoch": 0.2846106932303314, "grad_norm": 0.12562425434589386, "learning_rate": 0.0001716464964914065, "loss": 1.1779, "step": 1400 }, { "epoch": 0.28481398658263873, "grad_norm": 0.13102425634860992, "learning_rate": 0.00017162615681887521, "loss": 1.1402, "step": 1401 }, { "epoch": 0.28501727993494613, "grad_norm": 0.12704792618751526, "learning_rate": 0.00017160581714634397, "loss": 1.18, "step": 1402 }, { "epoch": 0.28522057328725353, "grad_norm": 0.12526075541973114, "learning_rate": 0.0001715854774738127, "loss": 1.109, "step": 1403 }, { "epoch": 0.2854238666395609, "grad_norm": 0.12174190580844879, "learning_rate": 0.00017156513780128141, "loss": 1.0839, "step": 1404 }, { "epoch": 0.2856271599918683, "grad_norm": 0.13030166923999786, "learning_rate": 0.00017154479812875014, "loss": 1.0982, "step": 1405 }, { "epoch": 0.2858304533441757, "grad_norm": 0.12179411202669144, "learning_rate": 0.00017152445845621886, "loss": 1.0617, "step": 1406 }, { "epoch": 0.286033746696483, "grad_norm": 0.12964552640914917, "learning_rate": 0.0001715041187836876, "loss": 1.117, "step": 1407 }, { "epoch": 0.2862370400487904, "grad_norm": 0.12146733701229095, "learning_rate": 0.0001714837791111563, "loss": 1.1715, "step": 1408 }, { "epoch": 0.28644033340109776, "grad_norm": 0.12994210422039032, "learning_rate": 0.00017146343943862504, "loss": 1.1975, "step": 1409 }, { "epoch": 0.28664362675340516, "grad_norm": 0.12996168434619904, "learning_rate": 0.0001714430997660938, "loss": 1.1968, "step": 1410 }, { "epoch": 0.28684692010571256, "grad_norm": 0.13590598106384277, "learning_rate": 0.0001714227600935625, "loss": 1.1045, "step": 1411 }, { "epoch": 0.2870502134580199, "grad_norm": 0.12337225675582886, "learning_rate": 0.00017140242042103124, "loss": 1.1262, "step": 1412 }, { "epoch": 0.2872535068103273, "grad_norm": 0.11442485451698303, "learning_rate": 0.00017138208074849996, "loss": 1.0697, "step": 1413 }, { "epoch": 0.2874568001626347, "grad_norm": 0.1333555281162262, "learning_rate": 0.00017136174107596869, "loss": 0.9691, "step": 1414 }, { "epoch": 0.28766009351494204, "grad_norm": 0.13435356318950653, "learning_rate": 0.0001713414014034374, "loss": 1.071, "step": 1415 }, { "epoch": 0.28786338686724944, "grad_norm": 0.11869612336158752, "learning_rate": 0.00017132106173090613, "loss": 1.2081, "step": 1416 }, { "epoch": 0.28806668021955684, "grad_norm": 0.13402745127677917, "learning_rate": 0.00017130072205837486, "loss": 1.1887, "step": 1417 }, { "epoch": 0.2882699735718642, "grad_norm": 0.1282026469707489, "learning_rate": 0.00017128038238584358, "loss": 1.1802, "step": 1418 }, { "epoch": 0.2884732669241716, "grad_norm": 0.12006261944770813, "learning_rate": 0.00017126004271331234, "loss": 1.0366, "step": 1419 }, { "epoch": 0.288676560276479, "grad_norm": 0.10971211642026901, "learning_rate": 0.00017123970304078106, "loss": 1.0502, "step": 1420 }, { "epoch": 0.2888798536287863, "grad_norm": 0.12401802092790604, "learning_rate": 0.00017121936336824978, "loss": 0.9525, "step": 1421 }, { "epoch": 0.2890831469810937, "grad_norm": 0.12699580192565918, "learning_rate": 0.0001711990236957185, "loss": 1.0588, "step": 1422 }, { "epoch": 0.2892864403334011, "grad_norm": 0.10931636393070221, "learning_rate": 0.00017117868402318723, "loss": 0.9512, "step": 1423 }, { "epoch": 0.28948973368570846, "grad_norm": 0.13325555622577667, "learning_rate": 0.00017115834435065596, "loss": 1.2333, "step": 1424 }, { "epoch": 0.28969302703801586, "grad_norm": 0.1266210675239563, "learning_rate": 0.00017113800467812468, "loss": 1.018, "step": 1425 }, { "epoch": 0.28989632039032326, "grad_norm": 0.12187005579471588, "learning_rate": 0.0001711176650055934, "loss": 1.0865, "step": 1426 }, { "epoch": 0.2900996137426306, "grad_norm": 0.10819690674543381, "learning_rate": 0.00017109732533306216, "loss": 0.8355, "step": 1427 }, { "epoch": 0.290302907094938, "grad_norm": 0.11890331655740738, "learning_rate": 0.00017107698566053088, "loss": 1.0521, "step": 1428 }, { "epoch": 0.2905062004472454, "grad_norm": 0.12693597376346588, "learning_rate": 0.0001710566459879996, "loss": 1.1304, "step": 1429 }, { "epoch": 0.29070949379955274, "grad_norm": 0.12627696990966797, "learning_rate": 0.00017103630631546833, "loss": 1.0533, "step": 1430 }, { "epoch": 0.29091278715186014, "grad_norm": 0.13593046367168427, "learning_rate": 0.00017101596664293706, "loss": 1.2738, "step": 1431 }, { "epoch": 0.29111608050416754, "grad_norm": 0.10364729166030884, "learning_rate": 0.00017099562697040578, "loss": 0.9938, "step": 1432 }, { "epoch": 0.2913193738564749, "grad_norm": 0.11455982178449631, "learning_rate": 0.0001709752872978745, "loss": 0.9798, "step": 1433 }, { "epoch": 0.2915226672087823, "grad_norm": 0.12030831724405289, "learning_rate": 0.00017095494762534323, "loss": 1.1068, "step": 1434 }, { "epoch": 0.2917259605610896, "grad_norm": 0.12434829771518707, "learning_rate": 0.00017093460795281198, "loss": 0.9511, "step": 1435 }, { "epoch": 0.291929253913397, "grad_norm": 0.13269619643688202, "learning_rate": 0.0001709142682802807, "loss": 1.0469, "step": 1436 }, { "epoch": 0.2921325472657044, "grad_norm": 0.12037021666765213, "learning_rate": 0.00017089392860774943, "loss": 1.1413, "step": 1437 }, { "epoch": 0.29233584061801177, "grad_norm": 0.1290545016527176, "learning_rate": 0.00017087358893521815, "loss": 0.9745, "step": 1438 }, { "epoch": 0.29253913397031917, "grad_norm": 0.13319085538387299, "learning_rate": 0.00017085324926268688, "loss": 1.3102, "step": 1439 }, { "epoch": 0.29274242732262656, "grad_norm": 0.11888034641742706, "learning_rate": 0.0001708329095901556, "loss": 0.982, "step": 1440 }, { "epoch": 0.2929457206749339, "grad_norm": 0.10824552178382874, "learning_rate": 0.00017081256991762433, "loss": 0.922, "step": 1441 }, { "epoch": 0.2931490140272413, "grad_norm": 0.11319594085216522, "learning_rate": 0.00017079223024509305, "loss": 0.997, "step": 1442 }, { "epoch": 0.2933523073795487, "grad_norm": 0.12176964432001114, "learning_rate": 0.0001707718905725618, "loss": 0.9845, "step": 1443 }, { "epoch": 0.29355560073185605, "grad_norm": 0.13725343346595764, "learning_rate": 0.00017075155090003053, "loss": 1.3378, "step": 1444 }, { "epoch": 0.29375889408416345, "grad_norm": 0.1362079679965973, "learning_rate": 0.00017073121122749925, "loss": 1.2632, "step": 1445 }, { "epoch": 0.29396218743647085, "grad_norm": 0.12925031781196594, "learning_rate": 0.00017071087155496798, "loss": 1.0776, "step": 1446 }, { "epoch": 0.2941654807887782, "grad_norm": 0.11627811938524246, "learning_rate": 0.0001706905318824367, "loss": 0.9881, "step": 1447 }, { "epoch": 0.2943687741410856, "grad_norm": 0.13387028872966766, "learning_rate": 0.00017067019220990543, "loss": 1.0899, "step": 1448 }, { "epoch": 0.294572067493393, "grad_norm": 0.12257883697748184, "learning_rate": 0.00017064985253737415, "loss": 1.1031, "step": 1449 }, { "epoch": 0.29477536084570033, "grad_norm": 0.14938175678253174, "learning_rate": 0.00017062951286484287, "loss": 1.1557, "step": 1450 }, { "epoch": 0.29497865419800773, "grad_norm": 0.12559346854686737, "learning_rate": 0.00017060917319231163, "loss": 1.1062, "step": 1451 }, { "epoch": 0.2951819475503151, "grad_norm": 0.12475700676441193, "learning_rate": 0.00017058883351978035, "loss": 1.0834, "step": 1452 }, { "epoch": 0.29538524090262247, "grad_norm": 0.1364937126636505, "learning_rate": 0.00017056849384724908, "loss": 1.0028, "step": 1453 }, { "epoch": 0.29558853425492987, "grad_norm": 0.12429028004407883, "learning_rate": 0.0001705481541747178, "loss": 1.1087, "step": 1454 }, { "epoch": 0.29579182760723727, "grad_norm": 0.1251228153705597, "learning_rate": 0.00017052781450218652, "loss": 0.9675, "step": 1455 }, { "epoch": 0.2959951209595446, "grad_norm": 0.12485919892787933, "learning_rate": 0.00017050747482965525, "loss": 1.0045, "step": 1456 }, { "epoch": 0.296198414311852, "grad_norm": 0.12948845326900482, "learning_rate": 0.00017048713515712397, "loss": 1.1154, "step": 1457 }, { "epoch": 0.2964017076641594, "grad_norm": 0.1288408488035202, "learning_rate": 0.0001704667954845927, "loss": 1.1203, "step": 1458 }, { "epoch": 0.29660500101646675, "grad_norm": 0.13588744401931763, "learning_rate": 0.00017044645581206142, "loss": 1.1436, "step": 1459 }, { "epoch": 0.29680829436877415, "grad_norm": 0.1264243721961975, "learning_rate": 0.00017042611613953017, "loss": 1.1903, "step": 1460 }, { "epoch": 0.2970115877210815, "grad_norm": 0.12819139659404755, "learning_rate": 0.0001704057764669989, "loss": 1.1337, "step": 1461 }, { "epoch": 0.2972148810733889, "grad_norm": 0.1189684271812439, "learning_rate": 0.00017038543679446762, "loss": 1.1437, "step": 1462 }, { "epoch": 0.2974181744256963, "grad_norm": 0.1304028183221817, "learning_rate": 0.00017036509712193635, "loss": 1.1653, "step": 1463 }, { "epoch": 0.29762146777800363, "grad_norm": 0.12161426246166229, "learning_rate": 0.00017034475744940507, "loss": 1.0482, "step": 1464 }, { "epoch": 0.29782476113031103, "grad_norm": 0.1224290132522583, "learning_rate": 0.0001703244177768738, "loss": 1.0592, "step": 1465 }, { "epoch": 0.29802805448261843, "grad_norm": 0.1365649402141571, "learning_rate": 0.00017030407810434252, "loss": 1.0758, "step": 1466 }, { "epoch": 0.2982313478349258, "grad_norm": 0.12406224012374878, "learning_rate": 0.00017028373843181124, "loss": 1.0901, "step": 1467 }, { "epoch": 0.2984346411872332, "grad_norm": 0.13438360393047333, "learning_rate": 0.00017026339875928, "loss": 1.1552, "step": 1468 }, { "epoch": 0.2986379345395406, "grad_norm": 0.14297276735305786, "learning_rate": 0.00017024305908674872, "loss": 1.1271, "step": 1469 }, { "epoch": 0.2988412278918479, "grad_norm": 0.11946640908718109, "learning_rate": 0.00017022271941421745, "loss": 1.0295, "step": 1470 }, { "epoch": 0.2990445212441553, "grad_norm": 0.12182927876710892, "learning_rate": 0.00017020237974168617, "loss": 1.0381, "step": 1471 }, { "epoch": 0.2992478145964627, "grad_norm": 0.1238449215888977, "learning_rate": 0.0001701820400691549, "loss": 1.0399, "step": 1472 }, { "epoch": 0.29945110794877006, "grad_norm": 0.12575775384902954, "learning_rate": 0.00017016170039662362, "loss": 1.1552, "step": 1473 }, { "epoch": 0.29965440130107746, "grad_norm": 0.14087268710136414, "learning_rate": 0.00017014136072409234, "loss": 1.1225, "step": 1474 }, { "epoch": 0.29985769465338485, "grad_norm": 0.13070684671401978, "learning_rate": 0.00017012102105156107, "loss": 1.1097, "step": 1475 }, { "epoch": 0.3000609880056922, "grad_norm": 0.12527720630168915, "learning_rate": 0.00017010068137902982, "loss": 1.0171, "step": 1476 }, { "epoch": 0.3002642813579996, "grad_norm": 0.12080081552267075, "learning_rate": 0.00017008034170649854, "loss": 1.0934, "step": 1477 }, { "epoch": 0.300467574710307, "grad_norm": 0.13225379586219788, "learning_rate": 0.00017006000203396727, "loss": 1.1286, "step": 1478 }, { "epoch": 0.30067086806261434, "grad_norm": 0.14612498879432678, "learning_rate": 0.000170039662361436, "loss": 1.4, "step": 1479 }, { "epoch": 0.30087416141492174, "grad_norm": 0.12612837553024292, "learning_rate": 0.00017001932268890472, "loss": 0.9265, "step": 1480 }, { "epoch": 0.30107745476722914, "grad_norm": 0.11075981706380844, "learning_rate": 0.00016999898301637344, "loss": 0.992, "step": 1481 }, { "epoch": 0.3012807481195365, "grad_norm": 0.11420360207557678, "learning_rate": 0.00016997864334384217, "loss": 1.033, "step": 1482 }, { "epoch": 0.3014840414718439, "grad_norm": 0.1344219148159027, "learning_rate": 0.0001699583036713109, "loss": 1.0934, "step": 1483 }, { "epoch": 0.3016873348241513, "grad_norm": 0.13956451416015625, "learning_rate": 0.00016993796399877964, "loss": 1.2297, "step": 1484 }, { "epoch": 0.3018906281764586, "grad_norm": 0.1293005496263504, "learning_rate": 0.00016991762432624837, "loss": 1.0928, "step": 1485 }, { "epoch": 0.302093921528766, "grad_norm": 0.11039478331804276, "learning_rate": 0.0001698972846537171, "loss": 0.9906, "step": 1486 }, { "epoch": 0.30229721488107336, "grad_norm": 0.13603124022483826, "learning_rate": 0.00016987694498118582, "loss": 1.3131, "step": 1487 }, { "epoch": 0.30250050823338076, "grad_norm": 0.14525099098682404, "learning_rate": 0.00016985660530865454, "loss": 1.1205, "step": 1488 }, { "epoch": 0.30270380158568816, "grad_norm": 0.14237269759178162, "learning_rate": 0.00016983626563612326, "loss": 1.1144, "step": 1489 }, { "epoch": 0.3029070949379955, "grad_norm": 0.10434848070144653, "learning_rate": 0.000169815925963592, "loss": 0.9038, "step": 1490 }, { "epoch": 0.3031103882903029, "grad_norm": 0.11946713179349899, "learning_rate": 0.0001697955862910607, "loss": 1.0611, "step": 1491 }, { "epoch": 0.3033136816426103, "grad_norm": 0.12547194957733154, "learning_rate": 0.00016977524661852946, "loss": 0.999, "step": 1492 }, { "epoch": 0.30351697499491764, "grad_norm": 0.13156647980213165, "learning_rate": 0.0001697549069459982, "loss": 1.1174, "step": 1493 }, { "epoch": 0.30372026834722504, "grad_norm": 0.13008251786231995, "learning_rate": 0.00016973456727346691, "loss": 1.1239, "step": 1494 }, { "epoch": 0.30392356169953244, "grad_norm": 0.1194852888584137, "learning_rate": 0.00016971422760093564, "loss": 0.9974, "step": 1495 }, { "epoch": 0.3041268550518398, "grad_norm": 0.12988907098770142, "learning_rate": 0.00016969388792840436, "loss": 1.0105, "step": 1496 }, { "epoch": 0.3043301484041472, "grad_norm": 0.13736090064048767, "learning_rate": 0.0001696735482558731, "loss": 1.167, "step": 1497 }, { "epoch": 0.3045334417564546, "grad_norm": 0.12946954369544983, "learning_rate": 0.0001696532085833418, "loss": 1.1628, "step": 1498 }, { "epoch": 0.3047367351087619, "grad_norm": 0.12599951028823853, "learning_rate": 0.00016963286891081054, "loss": 1.2581, "step": 1499 }, { "epoch": 0.3049400284610693, "grad_norm": 0.12264920026063919, "learning_rate": 0.00016961252923827926, "loss": 0.966, "step": 1500 }, { "epoch": 0.3051433218133767, "grad_norm": 0.12567077577114105, "learning_rate": 0.000169592189565748, "loss": 1.0898, "step": 1501 }, { "epoch": 0.30534661516568407, "grad_norm": 0.12665922939777374, "learning_rate": 0.00016957184989321674, "loss": 1.1621, "step": 1502 }, { "epoch": 0.30554990851799146, "grad_norm": 0.10949800908565521, "learning_rate": 0.00016955151022068546, "loss": 0.9312, "step": 1503 }, { "epoch": 0.30575320187029886, "grad_norm": 0.13273455202579498, "learning_rate": 0.00016953117054815419, "loss": 1.1535, "step": 1504 }, { "epoch": 0.3059564952226062, "grad_norm": 0.13857555389404297, "learning_rate": 0.0001695108308756229, "loss": 1.1564, "step": 1505 }, { "epoch": 0.3061597885749136, "grad_norm": 0.10915102064609528, "learning_rate": 0.00016949049120309163, "loss": 0.8977, "step": 1506 }, { "epoch": 0.306363081927221, "grad_norm": 0.11122920364141464, "learning_rate": 0.00016947015153056036, "loss": 0.9389, "step": 1507 }, { "epoch": 0.30656637527952835, "grad_norm": 0.13575953245162964, "learning_rate": 0.00016944981185802908, "loss": 1.2634, "step": 1508 }, { "epoch": 0.30676966863183575, "grad_norm": 0.12309823930263519, "learning_rate": 0.00016942947218549783, "loss": 1.0562, "step": 1509 }, { "epoch": 0.30697296198414314, "grad_norm": 0.13939395546913147, "learning_rate": 0.00016940913251296656, "loss": 1.2801, "step": 1510 }, { "epoch": 0.3071762553364505, "grad_norm": 0.11922150105237961, "learning_rate": 0.00016938879284043528, "loss": 1.0694, "step": 1511 }, { "epoch": 0.3073795486887579, "grad_norm": 0.12427409738302231, "learning_rate": 0.000169368453167904, "loss": 1.0676, "step": 1512 }, { "epoch": 0.30758284204106523, "grad_norm": 0.11560991406440735, "learning_rate": 0.00016934811349537273, "loss": 0.9416, "step": 1513 }, { "epoch": 0.30778613539337263, "grad_norm": 0.12494566291570663, "learning_rate": 0.00016932777382284146, "loss": 1.0484, "step": 1514 }, { "epoch": 0.30798942874568, "grad_norm": 0.14169259369373322, "learning_rate": 0.00016930743415031018, "loss": 1.2045, "step": 1515 }, { "epoch": 0.30819272209798737, "grad_norm": 0.13265348970890045, "learning_rate": 0.0001692870944777789, "loss": 0.9964, "step": 1516 }, { "epoch": 0.30839601545029477, "grad_norm": 0.1246609166264534, "learning_rate": 0.00016926675480524766, "loss": 1.0218, "step": 1517 }, { "epoch": 0.30859930880260217, "grad_norm": 0.1305045336484909, "learning_rate": 0.00016924641513271638, "loss": 1.0899, "step": 1518 }, { "epoch": 0.3088026021549095, "grad_norm": 0.1269298940896988, "learning_rate": 0.0001692260754601851, "loss": 1.1613, "step": 1519 }, { "epoch": 0.3090058955072169, "grad_norm": 0.13356846570968628, "learning_rate": 0.00016920573578765383, "loss": 1.2171, "step": 1520 }, { "epoch": 0.3092091888595243, "grad_norm": 0.12417469173669815, "learning_rate": 0.00016918539611512256, "loss": 1.0345, "step": 1521 }, { "epoch": 0.30941248221183165, "grad_norm": 0.12965606153011322, "learning_rate": 0.00016916505644259128, "loss": 1.003, "step": 1522 }, { "epoch": 0.30961577556413905, "grad_norm": 0.13075895607471466, "learning_rate": 0.00016914471677006, "loss": 1.1521, "step": 1523 }, { "epoch": 0.30981906891644645, "grad_norm": 0.1491623818874359, "learning_rate": 0.00016912437709752873, "loss": 1.1669, "step": 1524 }, { "epoch": 0.3100223622687538, "grad_norm": 0.13368669152259827, "learning_rate": 0.00016910403742499748, "loss": 1.1996, "step": 1525 }, { "epoch": 0.3102256556210612, "grad_norm": 0.12484747171401978, "learning_rate": 0.0001690836977524662, "loss": 0.9979, "step": 1526 }, { "epoch": 0.3104289489733686, "grad_norm": 0.11716404557228088, "learning_rate": 0.00016906335807993493, "loss": 1.0686, "step": 1527 }, { "epoch": 0.31063224232567593, "grad_norm": 0.1104549840092659, "learning_rate": 0.00016904301840740365, "loss": 0.9746, "step": 1528 }, { "epoch": 0.31083553567798333, "grad_norm": 0.1288052350282669, "learning_rate": 0.00016902267873487238, "loss": 1.0593, "step": 1529 }, { "epoch": 0.31103882903029073, "grad_norm": 0.13284744322299957, "learning_rate": 0.0001690023390623411, "loss": 1.1837, "step": 1530 }, { "epoch": 0.3112421223825981, "grad_norm": 0.10993791371583939, "learning_rate": 0.00016898199938980983, "loss": 0.9076, "step": 1531 }, { "epoch": 0.3114454157349055, "grad_norm": 0.1289556920528412, "learning_rate": 0.00016896165971727855, "loss": 1.124, "step": 1532 }, { "epoch": 0.31164870908721287, "grad_norm": 0.12656551599502563, "learning_rate": 0.0001689413200447473, "loss": 1.0672, "step": 1533 }, { "epoch": 0.3118520024395202, "grad_norm": 0.12359779328107834, "learning_rate": 0.00016892098037221603, "loss": 1.1162, "step": 1534 }, { "epoch": 0.3120552957918276, "grad_norm": 0.13356052339076996, "learning_rate": 0.00016890064069968475, "loss": 1.1927, "step": 1535 }, { "epoch": 0.312258589144135, "grad_norm": 0.12397721409797668, "learning_rate": 0.00016888030102715348, "loss": 1.0744, "step": 1536 }, { "epoch": 0.31246188249644236, "grad_norm": 0.14322160184383392, "learning_rate": 0.0001688599613546222, "loss": 1.076, "step": 1537 }, { "epoch": 0.31266517584874975, "grad_norm": 0.1378001719713211, "learning_rate": 0.00016883962168209093, "loss": 1.4044, "step": 1538 }, { "epoch": 0.3128684692010571, "grad_norm": 0.12438174337148666, "learning_rate": 0.00016881928200955965, "loss": 1.1154, "step": 1539 }, { "epoch": 0.3130717625533645, "grad_norm": 0.12698177993297577, "learning_rate": 0.00016879894233702837, "loss": 1.1988, "step": 1540 }, { "epoch": 0.3132750559056719, "grad_norm": 0.12074883282184601, "learning_rate": 0.00016877860266449713, "loss": 1.0175, "step": 1541 }, { "epoch": 0.31347834925797924, "grad_norm": 0.13820214569568634, "learning_rate": 0.00016875826299196585, "loss": 1.0498, "step": 1542 }, { "epoch": 0.31368164261028664, "grad_norm": 0.14697261154651642, "learning_rate": 0.00016873792331943458, "loss": 1.2677, "step": 1543 }, { "epoch": 0.31388493596259404, "grad_norm": 0.13973405957221985, "learning_rate": 0.0001687175836469033, "loss": 1.2233, "step": 1544 }, { "epoch": 0.3140882293149014, "grad_norm": 0.1303880661725998, "learning_rate": 0.000168697243974372, "loss": 1.1023, "step": 1545 }, { "epoch": 0.3142915226672088, "grad_norm": 0.13434049487113953, "learning_rate": 0.00016867690430184075, "loss": 1.1206, "step": 1546 }, { "epoch": 0.3144948160195162, "grad_norm": 0.11447029560804367, "learning_rate": 0.00016865656462930947, "loss": 0.978, "step": 1547 }, { "epoch": 0.3146981093718235, "grad_norm": 0.12716947495937347, "learning_rate": 0.0001686362249567782, "loss": 1.1544, "step": 1548 }, { "epoch": 0.3149014027241309, "grad_norm": 0.12545545399188995, "learning_rate": 0.00016861588528424692, "loss": 0.9976, "step": 1549 }, { "epoch": 0.3151046960764383, "grad_norm": 0.13446862995624542, "learning_rate": 0.00016859554561171567, "loss": 1.1167, "step": 1550 }, { "epoch": 0.31530798942874566, "grad_norm": 0.12542487680912018, "learning_rate": 0.0001685752059391844, "loss": 1.1148, "step": 1551 }, { "epoch": 0.31551128278105306, "grad_norm": 0.12793605029582977, "learning_rate": 0.00016855486626665312, "loss": 1.1139, "step": 1552 }, { "epoch": 0.31571457613336046, "grad_norm": 0.13481125235557556, "learning_rate": 0.00016853452659412182, "loss": 1.144, "step": 1553 }, { "epoch": 0.3159178694856678, "grad_norm": 0.11555742472410202, "learning_rate": 0.00016851418692159057, "loss": 1.0276, "step": 1554 }, { "epoch": 0.3161211628379752, "grad_norm": 0.11695119738578796, "learning_rate": 0.0001684938472490593, "loss": 0.9493, "step": 1555 }, { "epoch": 0.3163244561902826, "grad_norm": 0.13503003120422363, "learning_rate": 0.00016847350757652802, "loss": 1.0556, "step": 1556 }, { "epoch": 0.31652774954258994, "grad_norm": 0.1347092092037201, "learning_rate": 0.00016845316790399674, "loss": 1.0362, "step": 1557 }, { "epoch": 0.31673104289489734, "grad_norm": 0.12576071918010712, "learning_rate": 0.0001684328282314655, "loss": 1.085, "step": 1558 }, { "epoch": 0.31693433624720474, "grad_norm": 0.1280100792646408, "learning_rate": 0.00016841248855893422, "loss": 1.2004, "step": 1559 }, { "epoch": 0.3171376295995121, "grad_norm": 0.11573471873998642, "learning_rate": 0.00016839214888640295, "loss": 0.9058, "step": 1560 }, { "epoch": 0.3173409229518195, "grad_norm": 0.12192318588495255, "learning_rate": 0.00016837180921387167, "loss": 0.9789, "step": 1561 }, { "epoch": 0.3175442163041269, "grad_norm": 0.1251290738582611, "learning_rate": 0.0001683514695413404, "loss": 0.9818, "step": 1562 }, { "epoch": 0.3177475096564342, "grad_norm": 0.12726342678070068, "learning_rate": 0.00016833112986880912, "loss": 0.9911, "step": 1563 }, { "epoch": 0.3179508030087416, "grad_norm": 0.12146829068660736, "learning_rate": 0.00016831079019627784, "loss": 1.0005, "step": 1564 }, { "epoch": 0.318154096361049, "grad_norm": 0.12948118150234222, "learning_rate": 0.00016829045052374657, "loss": 0.9286, "step": 1565 }, { "epoch": 0.31835738971335636, "grad_norm": 0.1411774903535843, "learning_rate": 0.00016827011085121532, "loss": 1.031, "step": 1566 }, { "epoch": 0.31856068306566376, "grad_norm": 0.12407765537500381, "learning_rate": 0.00016824977117868404, "loss": 1.0538, "step": 1567 }, { "epoch": 0.3187639764179711, "grad_norm": 0.1235983669757843, "learning_rate": 0.00016822943150615277, "loss": 0.9356, "step": 1568 }, { "epoch": 0.3189672697702785, "grad_norm": 0.13756640255451202, "learning_rate": 0.0001682090918336215, "loss": 1.3008, "step": 1569 }, { "epoch": 0.3191705631225859, "grad_norm": 0.14735132455825806, "learning_rate": 0.00016818875216109022, "loss": 1.1271, "step": 1570 }, { "epoch": 0.31937385647489325, "grad_norm": 0.14694719016551971, "learning_rate": 0.00016816841248855894, "loss": 1.1222, "step": 1571 }, { "epoch": 0.31957714982720065, "grad_norm": 0.10828382521867752, "learning_rate": 0.00016814807281602767, "loss": 1.0565, "step": 1572 }, { "epoch": 0.31978044317950804, "grad_norm": 0.1332756131887436, "learning_rate": 0.0001681277331434964, "loss": 1.0085, "step": 1573 }, { "epoch": 0.3199837365318154, "grad_norm": 0.12354031950235367, "learning_rate": 0.00016810739347096514, "loss": 1.1101, "step": 1574 }, { "epoch": 0.3201870298841228, "grad_norm": 0.1273805797100067, "learning_rate": 0.00016808705379843387, "loss": 1.1402, "step": 1575 }, { "epoch": 0.3203903232364302, "grad_norm": 0.1219901368021965, "learning_rate": 0.0001680667141259026, "loss": 1.1955, "step": 1576 }, { "epoch": 0.32059361658873753, "grad_norm": 0.13021346926689148, "learning_rate": 0.00016804637445337132, "loss": 1.2073, "step": 1577 }, { "epoch": 0.3207969099410449, "grad_norm": 0.11928975582122803, "learning_rate": 0.00016802603478084004, "loss": 1.0758, "step": 1578 }, { "epoch": 0.3210002032933523, "grad_norm": 0.10524530708789825, "learning_rate": 0.00016800569510830876, "loss": 0.9655, "step": 1579 }, { "epoch": 0.32120349664565967, "grad_norm": 0.13994352519512177, "learning_rate": 0.0001679853554357775, "loss": 1.1405, "step": 1580 }, { "epoch": 0.32140678999796707, "grad_norm": 0.13520392775535583, "learning_rate": 0.0001679650157632462, "loss": 1.3525, "step": 1581 }, { "epoch": 0.32161008335027447, "grad_norm": 0.13306692242622375, "learning_rate": 0.00016794467609071496, "loss": 1.3286, "step": 1582 }, { "epoch": 0.3218133767025818, "grad_norm": 0.1361495852470398, "learning_rate": 0.0001679243364181837, "loss": 1.0468, "step": 1583 }, { "epoch": 0.3220166700548892, "grad_norm": 0.1192341074347496, "learning_rate": 0.0001679039967456524, "loss": 0.7855, "step": 1584 }, { "epoch": 0.3222199634071966, "grad_norm": 0.12359831482172012, "learning_rate": 0.00016788365707312114, "loss": 1.1627, "step": 1585 }, { "epoch": 0.32242325675950395, "grad_norm": 0.1272861659526825, "learning_rate": 0.00016786331740058984, "loss": 1.0442, "step": 1586 }, { "epoch": 0.32262655011181135, "grad_norm": 0.1261843740940094, "learning_rate": 0.0001678429777280586, "loss": 1.0033, "step": 1587 }, { "epoch": 0.32282984346411875, "grad_norm": 0.11822490394115448, "learning_rate": 0.0001678226380555273, "loss": 1.0815, "step": 1588 }, { "epoch": 0.3230331368164261, "grad_norm": 0.13497643172740936, "learning_rate": 0.00016780229838299604, "loss": 1.0485, "step": 1589 }, { "epoch": 0.3232364301687335, "grad_norm": 0.12484399974346161, "learning_rate": 0.00016778195871046476, "loss": 1.244, "step": 1590 }, { "epoch": 0.3234397235210409, "grad_norm": 0.12844592332839966, "learning_rate": 0.0001677616190379335, "loss": 1.2803, "step": 1591 }, { "epoch": 0.32364301687334823, "grad_norm": 0.12499992549419403, "learning_rate": 0.00016774127936540224, "loss": 1.0049, "step": 1592 }, { "epoch": 0.32384631022565563, "grad_norm": 0.12357242405414581, "learning_rate": 0.00016772093969287096, "loss": 1.0463, "step": 1593 }, { "epoch": 0.324049603577963, "grad_norm": 0.11749047785997391, "learning_rate": 0.00016770060002033966, "loss": 0.9639, "step": 1594 }, { "epoch": 0.3242528969302704, "grad_norm": 0.1409110128879547, "learning_rate": 0.0001676802603478084, "loss": 1.0898, "step": 1595 }, { "epoch": 0.32445619028257777, "grad_norm": 0.1287623941898346, "learning_rate": 0.00016765992067527713, "loss": 1.1735, "step": 1596 }, { "epoch": 0.3246594836348851, "grad_norm": 0.1255931705236435, "learning_rate": 0.00016763958100274586, "loss": 1.0209, "step": 1597 }, { "epoch": 0.3248627769871925, "grad_norm": 0.1277484893798828, "learning_rate": 0.00016761924133021458, "loss": 1.1324, "step": 1598 }, { "epoch": 0.3250660703394999, "grad_norm": 0.14885109663009644, "learning_rate": 0.00016759890165768333, "loss": 1.1919, "step": 1599 }, { "epoch": 0.32526936369180726, "grad_norm": 0.12765826284885406, "learning_rate": 0.00016757856198515206, "loss": 1.1301, "step": 1600 }, { "epoch": 0.32547265704411465, "grad_norm": 0.12677320837974548, "learning_rate": 0.00016755822231262078, "loss": 1.1406, "step": 1601 }, { "epoch": 0.32567595039642205, "grad_norm": 0.12238804996013641, "learning_rate": 0.00016753788264008948, "loss": 0.9797, "step": 1602 }, { "epoch": 0.3258792437487294, "grad_norm": 0.13958637416362762, "learning_rate": 0.00016751754296755823, "loss": 1.1974, "step": 1603 }, { "epoch": 0.3260825371010368, "grad_norm": 0.12978553771972656, "learning_rate": 0.00016749720329502696, "loss": 0.9885, "step": 1604 }, { "epoch": 0.3262858304533442, "grad_norm": 0.12407691776752472, "learning_rate": 0.00016747686362249568, "loss": 1.1167, "step": 1605 }, { "epoch": 0.32648912380565154, "grad_norm": 0.13904057443141937, "learning_rate": 0.0001674565239499644, "loss": 1.1454, "step": 1606 }, { "epoch": 0.32669241715795894, "grad_norm": 0.1415109634399414, "learning_rate": 0.00016743618427743316, "loss": 1.2619, "step": 1607 }, { "epoch": 0.32689571051026634, "grad_norm": 0.11249466240406036, "learning_rate": 0.00016741584460490188, "loss": 0.9385, "step": 1608 }, { "epoch": 0.3270990038625737, "grad_norm": 0.11592496186494827, "learning_rate": 0.0001673955049323706, "loss": 0.9985, "step": 1609 }, { "epoch": 0.3273022972148811, "grad_norm": 0.11594976484775543, "learning_rate": 0.0001673751652598393, "loss": 0.9626, "step": 1610 }, { "epoch": 0.3275055905671885, "grad_norm": 0.12570694088935852, "learning_rate": 0.00016735482558730806, "loss": 0.9815, "step": 1611 }, { "epoch": 0.3277088839194958, "grad_norm": 0.12933030724525452, "learning_rate": 0.00016733448591477678, "loss": 1.0988, "step": 1612 }, { "epoch": 0.3279121772718032, "grad_norm": 0.14309881627559662, "learning_rate": 0.0001673141462422455, "loss": 1.1589, "step": 1613 }, { "epoch": 0.3281154706241106, "grad_norm": 0.14047057926654816, "learning_rate": 0.00016729380656971423, "loss": 1.2038, "step": 1614 }, { "epoch": 0.32831876397641796, "grad_norm": 0.1269095540046692, "learning_rate": 0.00016727346689718298, "loss": 0.9248, "step": 1615 }, { "epoch": 0.32852205732872536, "grad_norm": 0.14122694730758667, "learning_rate": 0.0001672531272246517, "loss": 1.1879, "step": 1616 }, { "epoch": 0.32872535068103276, "grad_norm": 0.133163183927536, "learning_rate": 0.00016723278755212043, "loss": 1.0718, "step": 1617 }, { "epoch": 0.3289286440333401, "grad_norm": 0.13817080855369568, "learning_rate": 0.00016721244787958915, "loss": 1.1519, "step": 1618 }, { "epoch": 0.3291319373856475, "grad_norm": 0.12117751687765121, "learning_rate": 0.00016719210820705788, "loss": 1.0104, "step": 1619 }, { "epoch": 0.32933523073795484, "grad_norm": 0.1269875317811966, "learning_rate": 0.0001671717685345266, "loss": 1.0644, "step": 1620 }, { "epoch": 0.32953852409026224, "grad_norm": 0.13901706039905548, "learning_rate": 0.00016715142886199533, "loss": 1.3002, "step": 1621 }, { "epoch": 0.32974181744256964, "grad_norm": 0.1284133940935135, "learning_rate": 0.00016713108918946405, "loss": 1.1447, "step": 1622 }, { "epoch": 0.329945110794877, "grad_norm": 0.13423141837120056, "learning_rate": 0.0001671107495169328, "loss": 1.2566, "step": 1623 }, { "epoch": 0.3301484041471844, "grad_norm": 0.12908455729484558, "learning_rate": 0.00016709040984440153, "loss": 1.1041, "step": 1624 }, { "epoch": 0.3303516974994918, "grad_norm": 0.1317860186100006, "learning_rate": 0.00016707007017187025, "loss": 1.1273, "step": 1625 }, { "epoch": 0.3305549908517991, "grad_norm": 0.1394864320755005, "learning_rate": 0.00016704973049933898, "loss": 1.0699, "step": 1626 }, { "epoch": 0.3307582842041065, "grad_norm": 0.1309152990579605, "learning_rate": 0.00016702939082680767, "loss": 0.9554, "step": 1627 }, { "epoch": 0.3309615775564139, "grad_norm": 0.11993929743766785, "learning_rate": 0.00016700905115427643, "loss": 0.9747, "step": 1628 }, { "epoch": 0.33116487090872126, "grad_norm": 0.11589863151311874, "learning_rate": 0.00016698871148174515, "loss": 0.9217, "step": 1629 }, { "epoch": 0.33136816426102866, "grad_norm": 0.12004578858613968, "learning_rate": 0.00016696837180921387, "loss": 0.9453, "step": 1630 }, { "epoch": 0.33157145761333606, "grad_norm": 0.1407518982887268, "learning_rate": 0.0001669480321366826, "loss": 1.1528, "step": 1631 }, { "epoch": 0.3317747509656434, "grad_norm": 0.1286914050579071, "learning_rate": 0.00016692769246415135, "loss": 1.038, "step": 1632 }, { "epoch": 0.3319780443179508, "grad_norm": 0.13304589688777924, "learning_rate": 0.00016690735279162007, "loss": 1.1159, "step": 1633 }, { "epoch": 0.3321813376702582, "grad_norm": 0.13245166838169098, "learning_rate": 0.0001668870131190888, "loss": 1.0368, "step": 1634 }, { "epoch": 0.33238463102256555, "grad_norm": 0.12715977430343628, "learning_rate": 0.0001668666734465575, "loss": 1.024, "step": 1635 }, { "epoch": 0.33258792437487295, "grad_norm": 0.13726472854614258, "learning_rate": 0.00016684633377402625, "loss": 1.0574, "step": 1636 }, { "epoch": 0.33279121772718034, "grad_norm": 0.10961025953292847, "learning_rate": 0.00016682599410149497, "loss": 0.9979, "step": 1637 }, { "epoch": 0.3329945110794877, "grad_norm": 0.13879232108592987, "learning_rate": 0.0001668056544289637, "loss": 1.2669, "step": 1638 }, { "epoch": 0.3331978044317951, "grad_norm": 0.12887312471866608, "learning_rate": 0.00016678531475643242, "loss": 1.1217, "step": 1639 }, { "epoch": 0.3334010977841025, "grad_norm": 0.1309410184621811, "learning_rate": 0.00016676497508390117, "loss": 1.1331, "step": 1640 }, { "epoch": 0.33360439113640983, "grad_norm": 0.12577351927757263, "learning_rate": 0.0001667446354113699, "loss": 0.9315, "step": 1641 }, { "epoch": 0.3338076844887172, "grad_norm": 0.1263495236635208, "learning_rate": 0.00016672429573883862, "loss": 0.9782, "step": 1642 }, { "epoch": 0.3340109778410246, "grad_norm": 0.12090608477592468, "learning_rate": 0.00016670395606630732, "loss": 0.9817, "step": 1643 }, { "epoch": 0.33421427119333197, "grad_norm": 0.1330811232328415, "learning_rate": 0.00016668361639377607, "loss": 1.0682, "step": 1644 }, { "epoch": 0.33441756454563937, "grad_norm": 0.13265149295330048, "learning_rate": 0.0001666632767212448, "loss": 0.8999, "step": 1645 }, { "epoch": 0.3346208578979467, "grad_norm": 0.12737800180912018, "learning_rate": 0.00016664293704871352, "loss": 1.1103, "step": 1646 }, { "epoch": 0.3348241512502541, "grad_norm": 0.13904881477355957, "learning_rate": 0.00016662259737618224, "loss": 1.1776, "step": 1647 }, { "epoch": 0.3350274446025615, "grad_norm": 0.13159041106700897, "learning_rate": 0.000166602257703651, "loss": 1.0343, "step": 1648 }, { "epoch": 0.33523073795486885, "grad_norm": 0.12564794719219208, "learning_rate": 0.00016658191803111972, "loss": 0.9786, "step": 1649 }, { "epoch": 0.33543403130717625, "grad_norm": 0.1561056673526764, "learning_rate": 0.00016656157835858844, "loss": 1.1937, "step": 1650 }, { "epoch": 0.33563732465948365, "grad_norm": 0.13286349177360535, "learning_rate": 0.00016654123868605714, "loss": 1.0153, "step": 1651 }, { "epoch": 0.335840618011791, "grad_norm": 0.12319796532392502, "learning_rate": 0.0001665208990135259, "loss": 1.0423, "step": 1652 }, { "epoch": 0.3360439113640984, "grad_norm": 0.13758210837841034, "learning_rate": 0.00016650055934099462, "loss": 1.0618, "step": 1653 }, { "epoch": 0.3362472047164058, "grad_norm": 0.11521997302770615, "learning_rate": 0.00016648021966846334, "loss": 0.9787, "step": 1654 }, { "epoch": 0.33645049806871313, "grad_norm": 0.1308450698852539, "learning_rate": 0.00016645987999593207, "loss": 1.1284, "step": 1655 }, { "epoch": 0.33665379142102053, "grad_norm": 0.13632404804229736, "learning_rate": 0.00016643954032340082, "loss": 1.1278, "step": 1656 }, { "epoch": 0.33685708477332793, "grad_norm": 0.12073387950658798, "learning_rate": 0.00016641920065086954, "loss": 1.0399, "step": 1657 }, { "epoch": 0.3370603781256353, "grad_norm": 0.12028390169143677, "learning_rate": 0.00016639886097833827, "loss": 1.0384, "step": 1658 }, { "epoch": 0.3372636714779427, "grad_norm": 0.12499553710222244, "learning_rate": 0.00016637852130580696, "loss": 1.0769, "step": 1659 }, { "epoch": 0.33746696483025007, "grad_norm": 0.16057424247264862, "learning_rate": 0.00016635818163327572, "loss": 1.3772, "step": 1660 }, { "epoch": 0.3376702581825574, "grad_norm": 0.12566526234149933, "learning_rate": 0.00016633784196074444, "loss": 1.1722, "step": 1661 }, { "epoch": 0.3378735515348648, "grad_norm": 0.11908633261919022, "learning_rate": 0.00016631750228821317, "loss": 1.0332, "step": 1662 }, { "epoch": 0.3380768448871722, "grad_norm": 0.14457720518112183, "learning_rate": 0.0001662971626156819, "loss": 1.187, "step": 1663 }, { "epoch": 0.33828013823947956, "grad_norm": 0.12620577216148376, "learning_rate": 0.00016627682294315064, "loss": 1.2064, "step": 1664 }, { "epoch": 0.33848343159178695, "grad_norm": 0.1155720204114914, "learning_rate": 0.00016625648327061937, "loss": 0.936, "step": 1665 }, { "epoch": 0.33868672494409435, "grad_norm": 0.12141234427690506, "learning_rate": 0.0001662361435980881, "loss": 1.0185, "step": 1666 }, { "epoch": 0.3388900182964017, "grad_norm": 0.11690623313188553, "learning_rate": 0.0001662158039255568, "loss": 0.9533, "step": 1667 }, { "epoch": 0.3390933116487091, "grad_norm": 0.127701997756958, "learning_rate": 0.0001661954642530255, "loss": 1.0834, "step": 1668 }, { "epoch": 0.3392966050010165, "grad_norm": 0.12167434394359589, "learning_rate": 0.00016617512458049426, "loss": 0.9166, "step": 1669 }, { "epoch": 0.33949989835332384, "grad_norm": 0.1415378600358963, "learning_rate": 0.000166154784907963, "loss": 1.0921, "step": 1670 }, { "epoch": 0.33970319170563124, "grad_norm": 0.13397271931171417, "learning_rate": 0.0001661344452354317, "loss": 1.1214, "step": 1671 }, { "epoch": 0.3399064850579386, "grad_norm": 0.1336379051208496, "learning_rate": 0.00016611410556290044, "loss": 1.1034, "step": 1672 }, { "epoch": 0.340109778410246, "grad_norm": 0.1404540240764618, "learning_rate": 0.0001660937658903692, "loss": 1.1435, "step": 1673 }, { "epoch": 0.3403130717625534, "grad_norm": 0.10813318192958832, "learning_rate": 0.0001660734262178379, "loss": 0.8935, "step": 1674 }, { "epoch": 0.3405163651148607, "grad_norm": 0.1491374522447586, "learning_rate": 0.00016605308654530664, "loss": 1.1378, "step": 1675 }, { "epoch": 0.3407196584671681, "grad_norm": 0.12213015556335449, "learning_rate": 0.00016603274687277534, "loss": 1.1236, "step": 1676 }, { "epoch": 0.3409229518194755, "grad_norm": 0.12762251496315002, "learning_rate": 0.0001660124072002441, "loss": 1.1892, "step": 1677 }, { "epoch": 0.34112624517178286, "grad_norm": 0.14703615009784698, "learning_rate": 0.0001659920675277128, "loss": 1.1918, "step": 1678 }, { "epoch": 0.34132953852409026, "grad_norm": 0.13121256232261658, "learning_rate": 0.00016597172785518154, "loss": 1.084, "step": 1679 }, { "epoch": 0.34153283187639766, "grad_norm": 0.15220001339912415, "learning_rate": 0.00016595138818265026, "loss": 1.3663, "step": 1680 }, { "epoch": 0.341736125228705, "grad_norm": 0.1325935572385788, "learning_rate": 0.000165931048510119, "loss": 1.0294, "step": 1681 }, { "epoch": 0.3419394185810124, "grad_norm": 0.10651461035013199, "learning_rate": 0.00016591070883758774, "loss": 0.8741, "step": 1682 }, { "epoch": 0.3421427119333198, "grad_norm": 0.1287640780210495, "learning_rate": 0.00016589036916505646, "loss": 1.087, "step": 1683 }, { "epoch": 0.34234600528562714, "grad_norm": 0.1286855936050415, "learning_rate": 0.00016587002949252516, "loss": 0.9785, "step": 1684 }, { "epoch": 0.34254929863793454, "grad_norm": 0.12485534697771072, "learning_rate": 0.0001658496898199939, "loss": 0.9792, "step": 1685 }, { "epoch": 0.34275259199024194, "grad_norm": 0.11311212927103043, "learning_rate": 0.00016582935014746263, "loss": 0.9772, "step": 1686 }, { "epoch": 0.3429558853425493, "grad_norm": 0.13208623230457306, "learning_rate": 0.00016580901047493136, "loss": 1.1733, "step": 1687 }, { "epoch": 0.3431591786948567, "grad_norm": 0.11595738679170609, "learning_rate": 0.00016578867080240008, "loss": 1.0305, "step": 1688 }, { "epoch": 0.3433624720471641, "grad_norm": 0.14235566556453705, "learning_rate": 0.00016576833112986883, "loss": 1.1916, "step": 1689 }, { "epoch": 0.3435657653994714, "grad_norm": 0.12602582573890686, "learning_rate": 0.00016574799145733756, "loss": 1.0003, "step": 1690 }, { "epoch": 0.3437690587517788, "grad_norm": 0.1448718011379242, "learning_rate": 0.00016572765178480628, "loss": 1.1201, "step": 1691 }, { "epoch": 0.3439723521040862, "grad_norm": 0.12688006460666656, "learning_rate": 0.00016570731211227498, "loss": 0.9846, "step": 1692 }, { "epoch": 0.34417564545639356, "grad_norm": 0.12715177237987518, "learning_rate": 0.00016568697243974373, "loss": 1.1093, "step": 1693 }, { "epoch": 0.34437893880870096, "grad_norm": 0.14105954766273499, "learning_rate": 0.00016566663276721246, "loss": 1.2714, "step": 1694 }, { "epoch": 0.34458223216100836, "grad_norm": 0.12558870017528534, "learning_rate": 0.00016564629309468118, "loss": 0.9724, "step": 1695 }, { "epoch": 0.3447855255133157, "grad_norm": 0.11886492371559143, "learning_rate": 0.0001656259534221499, "loss": 1.1076, "step": 1696 }, { "epoch": 0.3449888188656231, "grad_norm": 0.13078825175762177, "learning_rate": 0.00016560561374961866, "loss": 1.1457, "step": 1697 }, { "epoch": 0.34519211221793045, "grad_norm": 0.12331999093294144, "learning_rate": 0.00016558527407708738, "loss": 1.0065, "step": 1698 }, { "epoch": 0.34539540557023785, "grad_norm": 0.12109193205833435, "learning_rate": 0.0001655649344045561, "loss": 1.1099, "step": 1699 }, { "epoch": 0.34559869892254524, "grad_norm": 0.1176178902387619, "learning_rate": 0.0001655445947320248, "loss": 0.9692, "step": 1700 }, { "epoch": 0.3458019922748526, "grad_norm": 0.1067582294344902, "learning_rate": 0.00016552425505949355, "loss": 0.8452, "step": 1701 }, { "epoch": 0.34600528562716, "grad_norm": 0.11509659141302109, "learning_rate": 0.00016550391538696228, "loss": 0.9062, "step": 1702 }, { "epoch": 0.3462085789794674, "grad_norm": 0.12043119221925735, "learning_rate": 0.000165483575714431, "loss": 1.112, "step": 1703 }, { "epoch": 0.34641187233177473, "grad_norm": 0.12769265472888947, "learning_rate": 0.00016546323604189973, "loss": 1.1163, "step": 1704 }, { "epoch": 0.3466151656840821, "grad_norm": 0.13460403680801392, "learning_rate": 0.00016544289636936848, "loss": 1.2376, "step": 1705 }, { "epoch": 0.3468184590363895, "grad_norm": 0.11211954802274704, "learning_rate": 0.0001654225566968372, "loss": 0.8811, "step": 1706 }, { "epoch": 0.34702175238869687, "grad_norm": 0.1208495944738388, "learning_rate": 0.00016540221702430593, "loss": 0.9858, "step": 1707 }, { "epoch": 0.34722504574100427, "grad_norm": 0.13525189459323883, "learning_rate": 0.00016538187735177463, "loss": 1.0372, "step": 1708 }, { "epoch": 0.34742833909331167, "grad_norm": 0.11987826973199844, "learning_rate": 0.00016536153767924335, "loss": 1.0966, "step": 1709 }, { "epoch": 0.347631632445619, "grad_norm": 0.12538312375545502, "learning_rate": 0.0001653411980067121, "loss": 1.0718, "step": 1710 }, { "epoch": 0.3478349257979264, "grad_norm": 0.12674830853939056, "learning_rate": 0.00016532085833418083, "loss": 1.09, "step": 1711 }, { "epoch": 0.3480382191502338, "grad_norm": 0.11861549317836761, "learning_rate": 0.00016530051866164955, "loss": 0.9924, "step": 1712 }, { "epoch": 0.34824151250254115, "grad_norm": 0.12545670568943024, "learning_rate": 0.00016528017898911828, "loss": 1.1363, "step": 1713 }, { "epoch": 0.34844480585484855, "grad_norm": 0.12180805951356888, "learning_rate": 0.00016525983931658703, "loss": 0.9195, "step": 1714 }, { "epoch": 0.34864809920715595, "grad_norm": 0.14458616077899933, "learning_rate": 0.00016523949964405575, "loss": 1.0096, "step": 1715 }, { "epoch": 0.3488513925594633, "grad_norm": 0.13006000220775604, "learning_rate": 0.00016521915997152445, "loss": 1.1037, "step": 1716 }, { "epoch": 0.3490546859117707, "grad_norm": 0.11734442412853241, "learning_rate": 0.00016519882029899317, "loss": 0.9942, "step": 1717 }, { "epoch": 0.3492579792640781, "grad_norm": 0.10168986022472382, "learning_rate": 0.00016517848062646193, "loss": 0.8829, "step": 1718 }, { "epoch": 0.34946127261638543, "grad_norm": 0.13804613053798676, "learning_rate": 0.00016515814095393065, "loss": 1.021, "step": 1719 }, { "epoch": 0.34966456596869283, "grad_norm": 0.13653217256069183, "learning_rate": 0.00016513780128139937, "loss": 1.0905, "step": 1720 }, { "epoch": 0.34986785932100023, "grad_norm": 0.12326166778802872, "learning_rate": 0.0001651174616088681, "loss": 1.0843, "step": 1721 }, { "epoch": 0.3500711526733076, "grad_norm": 0.1265186071395874, "learning_rate": 0.00016509712193633685, "loss": 1.1012, "step": 1722 }, { "epoch": 0.35027444602561497, "grad_norm": 0.12159296125173569, "learning_rate": 0.00016507678226380557, "loss": 1.0491, "step": 1723 }, { "epoch": 0.3504777393779223, "grad_norm": 0.12199139595031738, "learning_rate": 0.00016505644259127427, "loss": 1.1179, "step": 1724 }, { "epoch": 0.3506810327302297, "grad_norm": 0.13243111968040466, "learning_rate": 0.000165036102918743, "loss": 1.0576, "step": 1725 }, { "epoch": 0.3508843260825371, "grad_norm": 0.1342582106590271, "learning_rate": 0.00016501576324621175, "loss": 1.1035, "step": 1726 }, { "epoch": 0.35108761943484446, "grad_norm": 0.15361081063747406, "learning_rate": 0.00016499542357368047, "loss": 1.1772, "step": 1727 }, { "epoch": 0.35129091278715185, "grad_norm": 0.1446637064218521, "learning_rate": 0.0001649750839011492, "loss": 1.0995, "step": 1728 }, { "epoch": 0.35149420613945925, "grad_norm": 0.12943841516971588, "learning_rate": 0.00016495474422861792, "loss": 1.0938, "step": 1729 }, { "epoch": 0.3516974994917666, "grad_norm": 0.11111871153116226, "learning_rate": 0.00016493440455608667, "loss": 0.8568, "step": 1730 }, { "epoch": 0.351900792844074, "grad_norm": 0.12905767560005188, "learning_rate": 0.0001649140648835554, "loss": 1.0485, "step": 1731 }, { "epoch": 0.3521040861963814, "grad_norm": 0.14140938222408295, "learning_rate": 0.00016489372521102412, "loss": 1.0996, "step": 1732 }, { "epoch": 0.35230737954868874, "grad_norm": 0.120769202709198, "learning_rate": 0.00016487338553849282, "loss": 1.0729, "step": 1733 }, { "epoch": 0.35251067290099614, "grad_norm": 0.1240081861615181, "learning_rate": 0.00016485304586596157, "loss": 1.1505, "step": 1734 }, { "epoch": 0.35271396625330353, "grad_norm": 0.128762885928154, "learning_rate": 0.0001648327061934303, "loss": 1.09, "step": 1735 }, { "epoch": 0.3529172596056109, "grad_norm": 0.13550743460655212, "learning_rate": 0.00016481236652089902, "loss": 1.1706, "step": 1736 }, { "epoch": 0.3531205529579183, "grad_norm": 0.13279037177562714, "learning_rate": 0.00016479202684836774, "loss": 1.2231, "step": 1737 }, { "epoch": 0.3533238463102257, "grad_norm": 0.11756809800863266, "learning_rate": 0.0001647716871758365, "loss": 1.0519, "step": 1738 }, { "epoch": 0.353527139662533, "grad_norm": 0.11612554639577866, "learning_rate": 0.00016475134750330522, "loss": 0.9618, "step": 1739 }, { "epoch": 0.3537304330148404, "grad_norm": 0.12984800338745117, "learning_rate": 0.00016473100783077394, "loss": 1.1865, "step": 1740 }, { "epoch": 0.3539337263671478, "grad_norm": 0.12334571778774261, "learning_rate": 0.00016471066815824264, "loss": 1.1693, "step": 1741 }, { "epoch": 0.35413701971945516, "grad_norm": 0.13324569165706635, "learning_rate": 0.0001646903284857114, "loss": 1.0449, "step": 1742 }, { "epoch": 0.35434031307176256, "grad_norm": 0.12119297683238983, "learning_rate": 0.00016466998881318012, "loss": 0.9664, "step": 1743 }, { "epoch": 0.35454360642406996, "grad_norm": 0.12139979749917984, "learning_rate": 0.00016464964914064884, "loss": 1.1399, "step": 1744 }, { "epoch": 0.3547468997763773, "grad_norm": 0.13679492473602295, "learning_rate": 0.00016462930946811757, "loss": 1.2815, "step": 1745 }, { "epoch": 0.3549501931286847, "grad_norm": 0.14377973973751068, "learning_rate": 0.00016460896979558632, "loss": 1.1322, "step": 1746 }, { "epoch": 0.3551534864809921, "grad_norm": 0.130259171128273, "learning_rate": 0.00016458863012305504, "loss": 1.0832, "step": 1747 }, { "epoch": 0.35535677983329944, "grad_norm": 0.14110639691352844, "learning_rate": 0.00016456829045052377, "loss": 1.0277, "step": 1748 }, { "epoch": 0.35556007318560684, "grad_norm": 0.1513645052909851, "learning_rate": 0.00016454795077799246, "loss": 1.1862, "step": 1749 }, { "epoch": 0.3557633665379142, "grad_norm": 0.1150139644742012, "learning_rate": 0.0001645276111054612, "loss": 1.1027, "step": 1750 }, { "epoch": 0.3559666598902216, "grad_norm": 0.11318166553974152, "learning_rate": 0.00016450727143292994, "loss": 1.0458, "step": 1751 }, { "epoch": 0.356169953242529, "grad_norm": 0.12470010668039322, "learning_rate": 0.00016448693176039867, "loss": 1.0969, "step": 1752 }, { "epoch": 0.3563732465948363, "grad_norm": 0.12987849116325378, "learning_rate": 0.0001644665920878674, "loss": 1.2132, "step": 1753 }, { "epoch": 0.3565765399471437, "grad_norm": 0.1229574978351593, "learning_rate": 0.00016444625241533611, "loss": 1.01, "step": 1754 }, { "epoch": 0.3567798332994511, "grad_norm": 0.12658950686454773, "learning_rate": 0.00016442591274280487, "loss": 1.1362, "step": 1755 }, { "epoch": 0.35698312665175846, "grad_norm": 0.13322791457176208, "learning_rate": 0.0001644055730702736, "loss": 1.1059, "step": 1756 }, { "epoch": 0.35718642000406586, "grad_norm": 0.12812237441539764, "learning_rate": 0.0001643852333977423, "loss": 0.9566, "step": 1757 }, { "epoch": 0.35738971335637326, "grad_norm": 0.12379775196313858, "learning_rate": 0.000164364893725211, "loss": 1.0166, "step": 1758 }, { "epoch": 0.3575930067086806, "grad_norm": 0.12064617872238159, "learning_rate": 0.00016434455405267976, "loss": 1.001, "step": 1759 }, { "epoch": 0.357796300060988, "grad_norm": 0.12102466076612473, "learning_rate": 0.0001643242143801485, "loss": 1.1048, "step": 1760 }, { "epoch": 0.3579995934132954, "grad_norm": 0.1230425089597702, "learning_rate": 0.0001643038747076172, "loss": 0.962, "step": 1761 }, { "epoch": 0.35820288676560275, "grad_norm": 0.12632609903812408, "learning_rate": 0.00016428353503508594, "loss": 1.1316, "step": 1762 }, { "epoch": 0.35840618011791014, "grad_norm": 0.1422523409128189, "learning_rate": 0.0001642631953625547, "loss": 1.2044, "step": 1763 }, { "epoch": 0.35860947347021754, "grad_norm": 0.1147986575961113, "learning_rate": 0.0001642428556900234, "loss": 0.9114, "step": 1764 }, { "epoch": 0.3588127668225249, "grad_norm": 0.1134926900267601, "learning_rate": 0.0001642225160174921, "loss": 1.066, "step": 1765 }, { "epoch": 0.3590160601748323, "grad_norm": 0.10886301100254059, "learning_rate": 0.00016420217634496083, "loss": 0.9999, "step": 1766 }, { "epoch": 0.3592193535271397, "grad_norm": 0.12393435835838318, "learning_rate": 0.00016418183667242959, "loss": 1.0631, "step": 1767 }, { "epoch": 0.359422646879447, "grad_norm": 0.1252308487892151, "learning_rate": 0.0001641614969998983, "loss": 1.088, "step": 1768 }, { "epoch": 0.3596259402317544, "grad_norm": 0.13078045845031738, "learning_rate": 0.00016414115732736704, "loss": 1.0634, "step": 1769 }, { "epoch": 0.3598292335840618, "grad_norm": 0.12720254063606262, "learning_rate": 0.00016412081765483576, "loss": 1.1295, "step": 1770 }, { "epoch": 0.36003252693636917, "grad_norm": 0.12251488864421844, "learning_rate": 0.0001641004779823045, "loss": 0.9843, "step": 1771 }, { "epoch": 0.36023582028867657, "grad_norm": 0.12935830652713776, "learning_rate": 0.00016408013830977324, "loss": 1.0496, "step": 1772 }, { "epoch": 0.36043911364098397, "grad_norm": 0.12900424003601074, "learning_rate": 0.00016405979863724193, "loss": 0.9895, "step": 1773 }, { "epoch": 0.3606424069932913, "grad_norm": 0.14351366460323334, "learning_rate": 0.00016403945896471066, "loss": 1.2852, "step": 1774 }, { "epoch": 0.3608457003455987, "grad_norm": 0.12761393189430237, "learning_rate": 0.0001640191192921794, "loss": 1.086, "step": 1775 }, { "epoch": 0.36104899369790605, "grad_norm": 0.1086045354604721, "learning_rate": 0.00016399877961964813, "loss": 0.8948, "step": 1776 }, { "epoch": 0.36125228705021345, "grad_norm": 0.11502155661582947, "learning_rate": 0.00016397843994711686, "loss": 0.8168, "step": 1777 }, { "epoch": 0.36145558040252085, "grad_norm": 0.12591351568698883, "learning_rate": 0.00016395810027458558, "loss": 0.9193, "step": 1778 }, { "epoch": 0.3616588737548282, "grad_norm": 0.1310427188873291, "learning_rate": 0.00016393776060205433, "loss": 0.8922, "step": 1779 }, { "epoch": 0.3618621671071356, "grad_norm": 0.12844674289226532, "learning_rate": 0.00016391742092952306, "loss": 1.213, "step": 1780 }, { "epoch": 0.362065460459443, "grad_norm": 0.12577317655086517, "learning_rate": 0.00016389708125699176, "loss": 0.8967, "step": 1781 }, { "epoch": 0.36226875381175033, "grad_norm": 0.12694710493087769, "learning_rate": 0.00016387674158446048, "loss": 1.0951, "step": 1782 }, { "epoch": 0.36247204716405773, "grad_norm": 0.14658670127391815, "learning_rate": 0.00016385640191192923, "loss": 1.2866, "step": 1783 }, { "epoch": 0.36267534051636513, "grad_norm": 0.11917047947645187, "learning_rate": 0.00016383606223939796, "loss": 1.1441, "step": 1784 }, { "epoch": 0.3628786338686725, "grad_norm": 0.11192582547664642, "learning_rate": 0.00016381572256686668, "loss": 1.0195, "step": 1785 }, { "epoch": 0.36308192722097987, "grad_norm": 0.12563778460025787, "learning_rate": 0.0001637953828943354, "loss": 1.0646, "step": 1786 }, { "epoch": 0.36328522057328727, "grad_norm": 0.11741344630718231, "learning_rate": 0.00016377504322180416, "loss": 1.0123, "step": 1787 }, { "epoch": 0.3634885139255946, "grad_norm": 0.12519432604312897, "learning_rate": 0.00016375470354927288, "loss": 0.9654, "step": 1788 }, { "epoch": 0.363691807277902, "grad_norm": 0.13249295949935913, "learning_rate": 0.0001637343638767416, "loss": 1.1636, "step": 1789 }, { "epoch": 0.3638951006302094, "grad_norm": 0.14184780418872833, "learning_rate": 0.0001637140242042103, "loss": 1.2066, "step": 1790 }, { "epoch": 0.36409839398251675, "grad_norm": 0.13372722268104553, "learning_rate": 0.00016369368453167903, "loss": 1.0466, "step": 1791 }, { "epoch": 0.36430168733482415, "grad_norm": 0.14696893095970154, "learning_rate": 0.00016367334485914778, "loss": 1.0983, "step": 1792 }, { "epoch": 0.36450498068713155, "grad_norm": 0.1386573314666748, "learning_rate": 0.0001636530051866165, "loss": 1.1763, "step": 1793 }, { "epoch": 0.3647082740394389, "grad_norm": 0.1271977722644806, "learning_rate": 0.00016363266551408523, "loss": 1.0824, "step": 1794 }, { "epoch": 0.3649115673917463, "grad_norm": 0.13254235684871674, "learning_rate": 0.00016361232584155395, "loss": 0.9628, "step": 1795 }, { "epoch": 0.3651148607440537, "grad_norm": 0.1489454209804535, "learning_rate": 0.0001635919861690227, "loss": 1.256, "step": 1796 }, { "epoch": 0.36531815409636104, "grad_norm": 0.11988960951566696, "learning_rate": 0.00016357164649649143, "loss": 1.0667, "step": 1797 }, { "epoch": 0.36552144744866844, "grad_norm": 0.11505492776632309, "learning_rate": 0.00016355130682396013, "loss": 1.037, "step": 1798 }, { "epoch": 0.36572474080097583, "grad_norm": 0.1133279800415039, "learning_rate": 0.00016353096715142885, "loss": 1.0006, "step": 1799 }, { "epoch": 0.3659280341532832, "grad_norm": 0.14962686598300934, "learning_rate": 0.0001635106274788976, "loss": 1.3546, "step": 1800 }, { "epoch": 0.3661313275055906, "grad_norm": 0.13253025710582733, "learning_rate": 0.00016349028780636633, "loss": 1.1064, "step": 1801 }, { "epoch": 0.3663346208578979, "grad_norm": 0.11647074669599533, "learning_rate": 0.00016346994813383505, "loss": 0.9201, "step": 1802 }, { "epoch": 0.3665379142102053, "grad_norm": 0.12080147862434387, "learning_rate": 0.00016344960846130378, "loss": 1.0346, "step": 1803 }, { "epoch": 0.3667412075625127, "grad_norm": 0.12051571905612946, "learning_rate": 0.00016342926878877253, "loss": 1.0125, "step": 1804 }, { "epoch": 0.36694450091482006, "grad_norm": 0.11931899935007095, "learning_rate": 0.00016340892911624125, "loss": 1.0123, "step": 1805 }, { "epoch": 0.36714779426712746, "grad_norm": 0.12983456254005432, "learning_rate": 0.00016338858944370995, "loss": 0.9814, "step": 1806 }, { "epoch": 0.36735108761943486, "grad_norm": 0.14519883692264557, "learning_rate": 0.00016336824977117867, "loss": 1.1524, "step": 1807 }, { "epoch": 0.3675543809717422, "grad_norm": 0.11531595140695572, "learning_rate": 0.00016334791009864742, "loss": 0.9979, "step": 1808 }, { "epoch": 0.3677576743240496, "grad_norm": 0.13013306260108948, "learning_rate": 0.00016332757042611615, "loss": 0.9104, "step": 1809 }, { "epoch": 0.367960967676357, "grad_norm": 0.12455404549837112, "learning_rate": 0.00016330723075358487, "loss": 1.1263, "step": 1810 }, { "epoch": 0.36816426102866434, "grad_norm": 0.1386694461107254, "learning_rate": 0.0001632868910810536, "loss": 1.1143, "step": 1811 }, { "epoch": 0.36836755438097174, "grad_norm": 0.12970969080924988, "learning_rate": 0.00016326655140852235, "loss": 0.9942, "step": 1812 }, { "epoch": 0.36857084773327914, "grad_norm": 0.13323652744293213, "learning_rate": 0.00016324621173599107, "loss": 1.1296, "step": 1813 }, { "epoch": 0.3687741410855865, "grad_norm": 0.12079238891601562, "learning_rate": 0.00016322587206345977, "loss": 0.9846, "step": 1814 }, { "epoch": 0.3689774344378939, "grad_norm": 0.11877255141735077, "learning_rate": 0.0001632055323909285, "loss": 1.0053, "step": 1815 }, { "epoch": 0.3691807277902013, "grad_norm": 0.12340681999921799, "learning_rate": 0.00016318519271839725, "loss": 0.9803, "step": 1816 }, { "epoch": 0.3693840211425086, "grad_norm": 0.11937633156776428, "learning_rate": 0.00016316485304586597, "loss": 1.0217, "step": 1817 }, { "epoch": 0.369587314494816, "grad_norm": 0.12306183576583862, "learning_rate": 0.0001631445133733347, "loss": 1.005, "step": 1818 }, { "epoch": 0.3697906078471234, "grad_norm": 0.1307355761528015, "learning_rate": 0.00016312417370080342, "loss": 1.0241, "step": 1819 }, { "epoch": 0.36999390119943076, "grad_norm": 0.12387688457965851, "learning_rate": 0.00016310383402827217, "loss": 1.0489, "step": 1820 }, { "epoch": 0.37019719455173816, "grad_norm": 0.13056257367134094, "learning_rate": 0.0001630834943557409, "loss": 1.0639, "step": 1821 }, { "epoch": 0.37040048790404556, "grad_norm": 0.1329268217086792, "learning_rate": 0.0001630631546832096, "loss": 1.0949, "step": 1822 }, { "epoch": 0.3706037812563529, "grad_norm": 0.1100173369050026, "learning_rate": 0.00016304281501067832, "loss": 0.9644, "step": 1823 }, { "epoch": 0.3708070746086603, "grad_norm": 0.13045302033424377, "learning_rate": 0.00016302247533814707, "loss": 1.0773, "step": 1824 }, { "epoch": 0.3710103679609677, "grad_norm": 0.12959614396095276, "learning_rate": 0.0001630021356656158, "loss": 1.0633, "step": 1825 }, { "epoch": 0.37121366131327505, "grad_norm": 0.1272924393415451, "learning_rate": 0.00016298179599308452, "loss": 0.9854, "step": 1826 }, { "epoch": 0.37141695466558244, "grad_norm": 0.13959196209907532, "learning_rate": 0.00016296145632055324, "loss": 1.1186, "step": 1827 }, { "epoch": 0.37162024801788984, "grad_norm": 0.120680071413517, "learning_rate": 0.000162941116648022, "loss": 0.9871, "step": 1828 }, { "epoch": 0.3718235413701972, "grad_norm": 0.11955247074365616, "learning_rate": 0.00016292077697549072, "loss": 1.0019, "step": 1829 }, { "epoch": 0.3720268347225046, "grad_norm": 0.13293783366680145, "learning_rate": 0.00016290043730295942, "loss": 1.1245, "step": 1830 }, { "epoch": 0.37223012807481193, "grad_norm": 0.13701294362545013, "learning_rate": 0.00016288009763042814, "loss": 0.9353, "step": 1831 }, { "epoch": 0.3724334214271193, "grad_norm": 0.12601931393146515, "learning_rate": 0.00016285975795789687, "loss": 1.0166, "step": 1832 }, { "epoch": 0.3726367147794267, "grad_norm": 0.12148377299308777, "learning_rate": 0.00016283941828536562, "loss": 1.0335, "step": 1833 }, { "epoch": 0.37284000813173407, "grad_norm": 0.1322852075099945, "learning_rate": 0.00016281907861283434, "loss": 0.9585, "step": 1834 }, { "epoch": 0.37304330148404147, "grad_norm": 0.13737133145332336, "learning_rate": 0.00016279873894030307, "loss": 1.0907, "step": 1835 }, { "epoch": 0.37324659483634887, "grad_norm": 0.12207762897014618, "learning_rate": 0.0001627783992677718, "loss": 1.1334, "step": 1836 }, { "epoch": 0.3734498881886562, "grad_norm": 0.13265001773834229, "learning_rate": 0.00016275805959524054, "loss": 1.1315, "step": 1837 }, { "epoch": 0.3736531815409636, "grad_norm": 0.1349770426750183, "learning_rate": 0.00016273771992270924, "loss": 1.0977, "step": 1838 }, { "epoch": 0.373856474893271, "grad_norm": 0.1335778385400772, "learning_rate": 0.00016271738025017796, "loss": 1.038, "step": 1839 }, { "epoch": 0.37405976824557835, "grad_norm": 0.13259084522724152, "learning_rate": 0.0001626970405776467, "loss": 1.0422, "step": 1840 }, { "epoch": 0.37426306159788575, "grad_norm": 0.13083282113075256, "learning_rate": 0.00016267670090511544, "loss": 1.0672, "step": 1841 }, { "epoch": 0.37446635495019315, "grad_norm": 0.12019068002700806, "learning_rate": 0.00016265636123258416, "loss": 0.8895, "step": 1842 }, { "epoch": 0.3746696483025005, "grad_norm": 0.12882567942142487, "learning_rate": 0.0001626360215600529, "loss": 1.071, "step": 1843 }, { "epoch": 0.3748729416548079, "grad_norm": 0.12891016900539398, "learning_rate": 0.00016261568188752161, "loss": 0.9553, "step": 1844 }, { "epoch": 0.3750762350071153, "grad_norm": 0.12769286334514618, "learning_rate": 0.00016259534221499037, "loss": 1.0616, "step": 1845 }, { "epoch": 0.37527952835942263, "grad_norm": 0.14067451655864716, "learning_rate": 0.00016257500254245906, "loss": 1.2434, "step": 1846 }, { "epoch": 0.37548282171173003, "grad_norm": 0.14272430539131165, "learning_rate": 0.0001625546628699278, "loss": 1.1185, "step": 1847 }, { "epoch": 0.37568611506403743, "grad_norm": 0.1343206763267517, "learning_rate": 0.0001625343231973965, "loss": 1.1766, "step": 1848 }, { "epoch": 0.3758894084163448, "grad_norm": 0.11800689250230789, "learning_rate": 0.00016251398352486526, "loss": 1.0509, "step": 1849 }, { "epoch": 0.37609270176865217, "grad_norm": 0.126071959733963, "learning_rate": 0.000162493643852334, "loss": 1.0304, "step": 1850 }, { "epoch": 0.37629599512095957, "grad_norm": 0.1479204297065735, "learning_rate": 0.0001624733041798027, "loss": 1.2349, "step": 1851 }, { "epoch": 0.3764992884732669, "grad_norm": 0.1391003429889679, "learning_rate": 0.00016245296450727144, "loss": 0.9928, "step": 1852 }, { "epoch": 0.3767025818255743, "grad_norm": 0.14163215458393097, "learning_rate": 0.0001624326248347402, "loss": 1.2806, "step": 1853 }, { "epoch": 0.3769058751778817, "grad_norm": 0.113652303814888, "learning_rate": 0.0001624122851622089, "loss": 0.9222, "step": 1854 }, { "epoch": 0.37710916853018905, "grad_norm": 0.13163653016090393, "learning_rate": 0.0001623919454896776, "loss": 0.9817, "step": 1855 }, { "epoch": 0.37731246188249645, "grad_norm": 0.12150076776742935, "learning_rate": 0.00016237160581714633, "loss": 1.0522, "step": 1856 }, { "epoch": 0.3775157552348038, "grad_norm": 0.12493383884429932, "learning_rate": 0.00016235126614461509, "loss": 1.1715, "step": 1857 }, { "epoch": 0.3777190485871112, "grad_norm": 0.12059423327445984, "learning_rate": 0.0001623309264720838, "loss": 1.0005, "step": 1858 }, { "epoch": 0.3779223419394186, "grad_norm": 0.13585112988948822, "learning_rate": 0.00016231058679955253, "loss": 1.0747, "step": 1859 }, { "epoch": 0.37812563529172594, "grad_norm": 0.13678506016731262, "learning_rate": 0.00016229024712702126, "loss": 1.2324, "step": 1860 }, { "epoch": 0.37832892864403334, "grad_norm": 0.13325399160385132, "learning_rate": 0.00016226990745449, "loss": 1.0719, "step": 1861 }, { "epoch": 0.37853222199634073, "grad_norm": 0.13250133395195007, "learning_rate": 0.00016224956778195874, "loss": 1.2482, "step": 1862 }, { "epoch": 0.3787355153486481, "grad_norm": 0.13788394629955292, "learning_rate": 0.00016222922810942743, "loss": 1.209, "step": 1863 }, { "epoch": 0.3789388087009555, "grad_norm": 0.13350476324558258, "learning_rate": 0.00016220888843689616, "loss": 1.133, "step": 1864 }, { "epoch": 0.3791421020532629, "grad_norm": 0.13107924163341522, "learning_rate": 0.0001621885487643649, "loss": 1.0644, "step": 1865 }, { "epoch": 0.3793453954055702, "grad_norm": 0.12230812013149261, "learning_rate": 0.00016216820909183363, "loss": 0.8282, "step": 1866 }, { "epoch": 0.3795486887578776, "grad_norm": 0.11637227237224579, "learning_rate": 0.00016214786941930236, "loss": 0.9427, "step": 1867 }, { "epoch": 0.379751982110185, "grad_norm": 0.12177541106939316, "learning_rate": 0.00016212752974677108, "loss": 1.1639, "step": 1868 }, { "epoch": 0.37995527546249236, "grad_norm": 0.149050772190094, "learning_rate": 0.00016210719007423983, "loss": 1.225, "step": 1869 }, { "epoch": 0.38015856881479976, "grad_norm": 0.12929648160934448, "learning_rate": 0.00016208685040170856, "loss": 0.9552, "step": 1870 }, { "epoch": 0.38036186216710716, "grad_norm": 0.13179321587085724, "learning_rate": 0.00016206651072917726, "loss": 1.1057, "step": 1871 }, { "epoch": 0.3805651555194145, "grad_norm": 0.15877602994441986, "learning_rate": 0.00016204617105664598, "loss": 1.2518, "step": 1872 }, { "epoch": 0.3807684488717219, "grad_norm": 0.12015218287706375, "learning_rate": 0.00016202583138411473, "loss": 0.9414, "step": 1873 }, { "epoch": 0.3809717422240293, "grad_norm": 0.11854024976491928, "learning_rate": 0.00016200549171158346, "loss": 0.8964, "step": 1874 }, { "epoch": 0.38117503557633664, "grad_norm": 0.1399824023246765, "learning_rate": 0.00016198515203905218, "loss": 1.1772, "step": 1875 }, { "epoch": 0.38137832892864404, "grad_norm": 0.13480430841445923, "learning_rate": 0.0001619648123665209, "loss": 1.1182, "step": 1876 }, { "epoch": 0.38158162228095144, "grad_norm": 0.13443569839000702, "learning_rate": 0.00016194447269398963, "loss": 1.1376, "step": 1877 }, { "epoch": 0.3817849156332588, "grad_norm": 0.12492494285106659, "learning_rate": 0.00016192413302145838, "loss": 1.0976, "step": 1878 }, { "epoch": 0.3819882089855662, "grad_norm": 0.1359935700893402, "learning_rate": 0.00016190379334892708, "loss": 1.2954, "step": 1879 }, { "epoch": 0.3821915023378736, "grad_norm": 0.11808416247367859, "learning_rate": 0.0001618834536763958, "loss": 0.9972, "step": 1880 }, { "epoch": 0.3823947956901809, "grad_norm": 0.10978496074676514, "learning_rate": 0.00016186311400386453, "loss": 1.0534, "step": 1881 }, { "epoch": 0.3825980890424883, "grad_norm": 0.13503976166248322, "learning_rate": 0.00016184277433133328, "loss": 1.1353, "step": 1882 }, { "epoch": 0.38280138239479566, "grad_norm": 0.1396964192390442, "learning_rate": 0.000161822434658802, "loss": 1.1936, "step": 1883 }, { "epoch": 0.38300467574710306, "grad_norm": 0.1180117055773735, "learning_rate": 0.00016180209498627073, "loss": 1.1091, "step": 1884 }, { "epoch": 0.38320796909941046, "grad_norm": 0.13506156206130981, "learning_rate": 0.00016178175531373945, "loss": 1.12, "step": 1885 }, { "epoch": 0.3834112624517178, "grad_norm": 0.12191524356603622, "learning_rate": 0.0001617614156412082, "loss": 1.1521, "step": 1886 }, { "epoch": 0.3836145558040252, "grad_norm": 0.13350510597229004, "learning_rate": 0.0001617410759686769, "loss": 1.1081, "step": 1887 }, { "epoch": 0.3838178491563326, "grad_norm": 0.1178809106349945, "learning_rate": 0.00016172073629614563, "loss": 1.0511, "step": 1888 }, { "epoch": 0.38402114250863995, "grad_norm": 0.13383956253528595, "learning_rate": 0.00016170039662361435, "loss": 1.0467, "step": 1889 }, { "epoch": 0.38422443586094734, "grad_norm": 0.12543490529060364, "learning_rate": 0.0001616800569510831, "loss": 1.038, "step": 1890 }, { "epoch": 0.38442772921325474, "grad_norm": 0.12253366410732269, "learning_rate": 0.00016165971727855183, "loss": 1.0494, "step": 1891 }, { "epoch": 0.3846310225655621, "grad_norm": 0.10339358448982239, "learning_rate": 0.00016163937760602055, "loss": 0.8586, "step": 1892 }, { "epoch": 0.3848343159178695, "grad_norm": 0.14473773539066315, "learning_rate": 0.00016161903793348927, "loss": 1.0651, "step": 1893 }, { "epoch": 0.3850376092701769, "grad_norm": 0.12131127715110779, "learning_rate": 0.00016159869826095803, "loss": 1.0674, "step": 1894 }, { "epoch": 0.3852409026224842, "grad_norm": 0.1297827512025833, "learning_rate": 0.00016157835858842672, "loss": 1.1171, "step": 1895 }, { "epoch": 0.3854441959747916, "grad_norm": 0.14175108075141907, "learning_rate": 0.00016155801891589545, "loss": 1.151, "step": 1896 }, { "epoch": 0.385647489327099, "grad_norm": 0.12038639187812805, "learning_rate": 0.00016153767924336417, "loss": 1.0481, "step": 1897 }, { "epoch": 0.38585078267940637, "grad_norm": 0.11626328527927399, "learning_rate": 0.00016151733957083292, "loss": 0.9606, "step": 1898 }, { "epoch": 0.38605407603171377, "grad_norm": 0.127833291888237, "learning_rate": 0.00016149699989830165, "loss": 1.0139, "step": 1899 }, { "epoch": 0.38625736938402117, "grad_norm": 0.13045917451381683, "learning_rate": 0.00016147666022577037, "loss": 1.035, "step": 1900 }, { "epoch": 0.3864606627363285, "grad_norm": 0.1294708251953125, "learning_rate": 0.0001614563205532391, "loss": 1.1797, "step": 1901 }, { "epoch": 0.3866639560886359, "grad_norm": 0.12301066517829895, "learning_rate": 0.00016143598088070785, "loss": 1.0155, "step": 1902 }, { "epoch": 0.3868672494409433, "grad_norm": 0.12555493414402008, "learning_rate": 0.00016141564120817655, "loss": 1.0664, "step": 1903 }, { "epoch": 0.38707054279325065, "grad_norm": 0.1144699901342392, "learning_rate": 0.00016139530153564527, "loss": 0.9436, "step": 1904 }, { "epoch": 0.38727383614555805, "grad_norm": 0.12643945217132568, "learning_rate": 0.000161374961863114, "loss": 1.0495, "step": 1905 }, { "epoch": 0.38747712949786545, "grad_norm": 0.13180217146873474, "learning_rate": 0.00016135462219058275, "loss": 1.0142, "step": 1906 }, { "epoch": 0.3876804228501728, "grad_norm": 0.12776418030261993, "learning_rate": 0.00016133428251805147, "loss": 1.0508, "step": 1907 }, { "epoch": 0.3878837162024802, "grad_norm": 0.12063184380531311, "learning_rate": 0.0001613139428455202, "loss": 1.0565, "step": 1908 }, { "epoch": 0.38808700955478753, "grad_norm": 0.12819765508174896, "learning_rate": 0.00016129360317298892, "loss": 1.0611, "step": 1909 }, { "epoch": 0.38829030290709493, "grad_norm": 0.12989814579486847, "learning_rate": 0.00016127326350045767, "loss": 0.9904, "step": 1910 }, { "epoch": 0.38849359625940233, "grad_norm": 0.11423414200544357, "learning_rate": 0.0001612529238279264, "loss": 0.9869, "step": 1911 }, { "epoch": 0.3886968896117097, "grad_norm": 0.14000189304351807, "learning_rate": 0.0001612325841553951, "loss": 1.0534, "step": 1912 }, { "epoch": 0.38890018296401707, "grad_norm": 0.13626928627490997, "learning_rate": 0.00016121224448286382, "loss": 1.2318, "step": 1913 }, { "epoch": 0.38910347631632447, "grad_norm": 0.15019413828849792, "learning_rate": 0.00016119190481033257, "loss": 1.2027, "step": 1914 }, { "epoch": 0.3893067696686318, "grad_norm": 0.1226695328950882, "learning_rate": 0.0001611715651378013, "loss": 0.9538, "step": 1915 }, { "epoch": 0.3895100630209392, "grad_norm": 0.12817354500293732, "learning_rate": 0.00016115122546527002, "loss": 1.0928, "step": 1916 }, { "epoch": 0.3897133563732466, "grad_norm": 0.12198452651500702, "learning_rate": 0.00016113088579273874, "loss": 0.9358, "step": 1917 }, { "epoch": 0.38991664972555395, "grad_norm": 0.11782688647508621, "learning_rate": 0.00016111054612020747, "loss": 0.8752, "step": 1918 }, { "epoch": 0.39011994307786135, "grad_norm": 0.10750327259302139, "learning_rate": 0.00016109020644767622, "loss": 0.897, "step": 1919 }, { "epoch": 0.39032323643016875, "grad_norm": 0.12854933738708496, "learning_rate": 0.00016106986677514492, "loss": 1.1868, "step": 1920 }, { "epoch": 0.3905265297824761, "grad_norm": 0.12874183058738708, "learning_rate": 0.00016104952710261364, "loss": 1.1017, "step": 1921 }, { "epoch": 0.3907298231347835, "grad_norm": 0.11504833400249481, "learning_rate": 0.00016102918743008237, "loss": 1.0192, "step": 1922 }, { "epoch": 0.3909331164870909, "grad_norm": 0.13493189215660095, "learning_rate": 0.00016100884775755112, "loss": 1.1141, "step": 1923 }, { "epoch": 0.39113640983939824, "grad_norm": 0.1267276406288147, "learning_rate": 0.00016098850808501984, "loss": 1.1147, "step": 1924 }, { "epoch": 0.39133970319170563, "grad_norm": 0.1132739931344986, "learning_rate": 0.00016096816841248857, "loss": 0.9775, "step": 1925 }, { "epoch": 0.39154299654401303, "grad_norm": 0.1274060159921646, "learning_rate": 0.0001609478287399573, "loss": 1.1937, "step": 1926 }, { "epoch": 0.3917462898963204, "grad_norm": 0.11706088483333588, "learning_rate": 0.00016092748906742604, "loss": 0.9774, "step": 1927 }, { "epoch": 0.3919495832486278, "grad_norm": 0.12287326157093048, "learning_rate": 0.00016090714939489474, "loss": 1.0348, "step": 1928 }, { "epoch": 0.3921528766009352, "grad_norm": 0.12456396222114563, "learning_rate": 0.00016088680972236346, "loss": 0.9097, "step": 1929 }, { "epoch": 0.3923561699532425, "grad_norm": 0.11585330218076706, "learning_rate": 0.0001608664700498322, "loss": 1.0469, "step": 1930 }, { "epoch": 0.3925594633055499, "grad_norm": 0.1536455601453781, "learning_rate": 0.00016084613037730094, "loss": 1.1005, "step": 1931 }, { "epoch": 0.3927627566578573, "grad_norm": 0.12221349030733109, "learning_rate": 0.00016082579070476966, "loss": 0.9721, "step": 1932 }, { "epoch": 0.39296605001016466, "grad_norm": 0.12621140480041504, "learning_rate": 0.0001608054510322384, "loss": 1.0791, "step": 1933 }, { "epoch": 0.39316934336247206, "grad_norm": 0.13487426936626434, "learning_rate": 0.0001607851113597071, "loss": 0.9798, "step": 1934 }, { "epoch": 0.3933726367147794, "grad_norm": 0.13655559718608856, "learning_rate": 0.00016076477168717586, "loss": 1.1909, "step": 1935 }, { "epoch": 0.3935759300670868, "grad_norm": 0.12217934429645538, "learning_rate": 0.00016074443201464456, "loss": 0.9678, "step": 1936 }, { "epoch": 0.3937792234193942, "grad_norm": 0.13106785714626312, "learning_rate": 0.0001607240923421133, "loss": 1.1034, "step": 1937 }, { "epoch": 0.39398251677170154, "grad_norm": 0.13911622762680054, "learning_rate": 0.000160703752669582, "loss": 1.136, "step": 1938 }, { "epoch": 0.39418581012400894, "grad_norm": 0.12952151894569397, "learning_rate": 0.00016068341299705076, "loss": 1.0292, "step": 1939 }, { "epoch": 0.39438910347631634, "grad_norm": 0.12866811454296112, "learning_rate": 0.0001606630733245195, "loss": 1.1424, "step": 1940 }, { "epoch": 0.3945923968286237, "grad_norm": 0.12720058858394623, "learning_rate": 0.0001606427336519882, "loss": 1.1688, "step": 1941 }, { "epoch": 0.3947956901809311, "grad_norm": 0.13742884993553162, "learning_rate": 0.00016062239397945694, "loss": 1.0547, "step": 1942 }, { "epoch": 0.3949989835332385, "grad_norm": 0.12812934815883636, "learning_rate": 0.0001606020543069257, "loss": 1.0, "step": 1943 }, { "epoch": 0.3952022768855458, "grad_norm": 0.1265181452035904, "learning_rate": 0.00016058171463439439, "loss": 1.2084, "step": 1944 }, { "epoch": 0.3954055702378532, "grad_norm": 0.11929038912057877, "learning_rate": 0.0001605613749618631, "loss": 1.0126, "step": 1945 }, { "epoch": 0.3956088635901606, "grad_norm": 0.128428652882576, "learning_rate": 0.00016054103528933183, "loss": 1.1729, "step": 1946 }, { "epoch": 0.39581215694246796, "grad_norm": 0.14802579581737518, "learning_rate": 0.00016052069561680059, "loss": 1.1445, "step": 1947 }, { "epoch": 0.39601545029477536, "grad_norm": 0.1259651482105255, "learning_rate": 0.0001605003559442693, "loss": 1.0906, "step": 1948 }, { "epoch": 0.39621874364708276, "grad_norm": 0.12911193072795868, "learning_rate": 0.00016048001627173803, "loss": 1.0483, "step": 1949 }, { "epoch": 0.3964220369993901, "grad_norm": 0.1306496411561966, "learning_rate": 0.00016045967659920676, "loss": 1.105, "step": 1950 }, { "epoch": 0.3966253303516975, "grad_norm": 0.12834158539772034, "learning_rate": 0.0001604393369266755, "loss": 1.0814, "step": 1951 }, { "epoch": 0.3968286237040049, "grad_norm": 0.11149043589830399, "learning_rate": 0.0001604189972541442, "loss": 0.9313, "step": 1952 }, { "epoch": 0.39703191705631224, "grad_norm": 0.13247650861740112, "learning_rate": 0.00016039865758161293, "loss": 1.0523, "step": 1953 }, { "epoch": 0.39723521040861964, "grad_norm": 0.13839392364025116, "learning_rate": 0.00016037831790908166, "loss": 1.1873, "step": 1954 }, { "epoch": 0.39743850376092704, "grad_norm": 0.13359107077121735, "learning_rate": 0.0001603579782365504, "loss": 1.1791, "step": 1955 }, { "epoch": 0.3976417971132344, "grad_norm": 0.13618066906929016, "learning_rate": 0.00016033763856401913, "loss": 1.1673, "step": 1956 }, { "epoch": 0.3978450904655418, "grad_norm": 0.13163338601589203, "learning_rate": 0.00016031729889148786, "loss": 1.1715, "step": 1957 }, { "epoch": 0.3980483838178492, "grad_norm": 0.12153584510087967, "learning_rate": 0.00016029695921895658, "loss": 1.0582, "step": 1958 }, { "epoch": 0.3982516771701565, "grad_norm": 0.13559706509113312, "learning_rate": 0.0001602766195464253, "loss": 1.1622, "step": 1959 }, { "epoch": 0.3984549705224639, "grad_norm": 0.12290627509355545, "learning_rate": 0.00016025627987389403, "loss": 0.9632, "step": 1960 }, { "epoch": 0.39865826387477127, "grad_norm": 0.1298772245645523, "learning_rate": 0.00016023594020136276, "loss": 1.1786, "step": 1961 }, { "epoch": 0.39886155722707867, "grad_norm": 0.13533517718315125, "learning_rate": 0.00016021560052883148, "loss": 0.9701, "step": 1962 }, { "epoch": 0.39906485057938607, "grad_norm": 0.12389865517616272, "learning_rate": 0.0001601952608563002, "loss": 0.9966, "step": 1963 }, { "epoch": 0.3992681439316934, "grad_norm": 0.11722499132156372, "learning_rate": 0.00016017492118376896, "loss": 0.7836, "step": 1964 }, { "epoch": 0.3994714372840008, "grad_norm": 0.142978236079216, "learning_rate": 0.00016015458151123768, "loss": 1.1237, "step": 1965 }, { "epoch": 0.3996747306363082, "grad_norm": 0.12385619431734085, "learning_rate": 0.0001601342418387064, "loss": 1.0396, "step": 1966 }, { "epoch": 0.39987802398861555, "grad_norm": 0.12524884939193726, "learning_rate": 0.00016011390216617513, "loss": 1.1242, "step": 1967 }, { "epoch": 0.40008131734092295, "grad_norm": 0.13346579670906067, "learning_rate": 0.00016009356249364388, "loss": 1.1125, "step": 1968 }, { "epoch": 0.40028461069323035, "grad_norm": 0.12904873490333557, "learning_rate": 0.00016007322282111258, "loss": 1.0506, "step": 1969 }, { "epoch": 0.4004879040455377, "grad_norm": 0.12711603939533234, "learning_rate": 0.0001600528831485813, "loss": 1.0445, "step": 1970 }, { "epoch": 0.4006911973978451, "grad_norm": 0.13451595604419708, "learning_rate": 0.00016003254347605003, "loss": 1.0552, "step": 1971 }, { "epoch": 0.4008944907501525, "grad_norm": 0.146467387676239, "learning_rate": 0.00016001220380351878, "loss": 1.0093, "step": 1972 }, { "epoch": 0.40109778410245983, "grad_norm": 0.12398801743984222, "learning_rate": 0.0001599918641309875, "loss": 0.9897, "step": 1973 }, { "epoch": 0.40130107745476723, "grad_norm": 0.10917028784751892, "learning_rate": 0.00015997152445845623, "loss": 0.8371, "step": 1974 }, { "epoch": 0.40150437080707463, "grad_norm": 0.14363138377666473, "learning_rate": 0.00015995118478592495, "loss": 1.0559, "step": 1975 }, { "epoch": 0.40170766415938197, "grad_norm": 0.12954387068748474, "learning_rate": 0.0001599308451133937, "loss": 1.1048, "step": 1976 }, { "epoch": 0.40191095751168937, "grad_norm": 0.12329546362161636, "learning_rate": 0.0001599105054408624, "loss": 1.0156, "step": 1977 }, { "epoch": 0.40211425086399677, "grad_norm": 0.11060404032468796, "learning_rate": 0.00015989016576833113, "loss": 0.8623, "step": 1978 }, { "epoch": 0.4023175442163041, "grad_norm": 0.1489768773317337, "learning_rate": 0.00015986982609579985, "loss": 1.0878, "step": 1979 }, { "epoch": 0.4025208375686115, "grad_norm": 0.12081994861364365, "learning_rate": 0.0001598494864232686, "loss": 0.9822, "step": 1980 }, { "epoch": 0.4027241309209189, "grad_norm": 0.14777058362960815, "learning_rate": 0.00015982914675073733, "loss": 1.2367, "step": 1981 }, { "epoch": 0.40292742427322625, "grad_norm": 0.12689609825611115, "learning_rate": 0.00015980880707820605, "loss": 1.1029, "step": 1982 }, { "epoch": 0.40313071762553365, "grad_norm": 0.12985149025917053, "learning_rate": 0.00015978846740567477, "loss": 1.1604, "step": 1983 }, { "epoch": 0.40333401097784105, "grad_norm": 0.11619044095277786, "learning_rate": 0.00015976812773314353, "loss": 0.9736, "step": 1984 }, { "epoch": 0.4035373043301484, "grad_norm": 0.13137032091617584, "learning_rate": 0.00015974778806061222, "loss": 1.0877, "step": 1985 }, { "epoch": 0.4037405976824558, "grad_norm": 0.13488256931304932, "learning_rate": 0.00015972744838808095, "loss": 1.2048, "step": 1986 }, { "epoch": 0.40394389103476314, "grad_norm": 0.2605299949645996, "learning_rate": 0.00015970710871554967, "loss": 0.9133, "step": 1987 }, { "epoch": 0.40414718438707053, "grad_norm": 0.13198648393154144, "learning_rate": 0.00015968676904301842, "loss": 0.9512, "step": 1988 }, { "epoch": 0.40435047773937793, "grad_norm": 0.1144537478685379, "learning_rate": 0.00015966642937048715, "loss": 1.0429, "step": 1989 }, { "epoch": 0.4045537710916853, "grad_norm": 0.1217435896396637, "learning_rate": 0.00015964608969795587, "loss": 1.0744, "step": 1990 }, { "epoch": 0.4047570644439927, "grad_norm": 0.12606003880500793, "learning_rate": 0.0001596257500254246, "loss": 1.0945, "step": 1991 }, { "epoch": 0.4049603577963001, "grad_norm": 0.13098153471946716, "learning_rate": 0.00015960541035289335, "loss": 1.2815, "step": 1992 }, { "epoch": 0.4051636511486074, "grad_norm": 0.11636700481176376, "learning_rate": 0.00015958507068036205, "loss": 0.9849, "step": 1993 }, { "epoch": 0.4053669445009148, "grad_norm": 0.12562847137451172, "learning_rate": 0.00015956473100783077, "loss": 1.0519, "step": 1994 }, { "epoch": 0.4055702378532222, "grad_norm": 0.13366295397281647, "learning_rate": 0.0001595443913352995, "loss": 1.2831, "step": 1995 }, { "epoch": 0.40577353120552956, "grad_norm": 0.12537652254104614, "learning_rate": 0.00015952405166276825, "loss": 0.9735, "step": 1996 }, { "epoch": 0.40597682455783696, "grad_norm": 0.12336364388465881, "learning_rate": 0.00015950371199023697, "loss": 1.0355, "step": 1997 }, { "epoch": 0.40618011791014436, "grad_norm": 0.13342751562595367, "learning_rate": 0.0001594833723177057, "loss": 1.1506, "step": 1998 }, { "epoch": 0.4063834112624517, "grad_norm": 0.13104167580604553, "learning_rate": 0.00015946303264517442, "loss": 1.2302, "step": 1999 }, { "epoch": 0.4065867046147591, "grad_norm": 0.1287468671798706, "learning_rate": 0.00015944269297264314, "loss": 1.1503, "step": 2000 }, { "epoch": 0.4067899979670665, "grad_norm": 0.1316487044095993, "learning_rate": 0.00015942235330011187, "loss": 1.0732, "step": 2001 }, { "epoch": 0.40699329131937384, "grad_norm": 0.12111165374517441, "learning_rate": 0.0001594020136275806, "loss": 0.9384, "step": 2002 }, { "epoch": 0.40719658467168124, "grad_norm": 0.11894603073596954, "learning_rate": 0.00015938167395504932, "loss": 1.0157, "step": 2003 }, { "epoch": 0.40739987802398864, "grad_norm": 0.13228829205036163, "learning_rate": 0.00015936133428251804, "loss": 1.11, "step": 2004 }, { "epoch": 0.407603171376296, "grad_norm": 0.12275683134794235, "learning_rate": 0.0001593409946099868, "loss": 1.0183, "step": 2005 }, { "epoch": 0.4078064647286034, "grad_norm": 0.1273687779903412, "learning_rate": 0.00015932065493745552, "loss": 1.024, "step": 2006 }, { "epoch": 0.4080097580809108, "grad_norm": 0.13763071596622467, "learning_rate": 0.00015930031526492424, "loss": 1.1465, "step": 2007 }, { "epoch": 0.4082130514332181, "grad_norm": 0.13428914546966553, "learning_rate": 0.00015927997559239297, "loss": 1.1119, "step": 2008 }, { "epoch": 0.4084163447855255, "grad_norm": 0.12992502748966217, "learning_rate": 0.0001592596359198617, "loss": 1.0341, "step": 2009 }, { "epoch": 0.4086196381378329, "grad_norm": 0.11928235739469528, "learning_rate": 0.00015923929624733042, "loss": 0.9454, "step": 2010 }, { "epoch": 0.40882293149014026, "grad_norm": 0.13093991577625275, "learning_rate": 0.00015921895657479914, "loss": 0.9528, "step": 2011 }, { "epoch": 0.40902622484244766, "grad_norm": 0.1252833604812622, "learning_rate": 0.00015919861690226787, "loss": 1.0849, "step": 2012 }, { "epoch": 0.409229518194755, "grad_norm": 0.13304093480110168, "learning_rate": 0.00015917827722973662, "loss": 1.1173, "step": 2013 }, { "epoch": 0.4094328115470624, "grad_norm": 0.11735294759273529, "learning_rate": 0.00015915793755720534, "loss": 0.9961, "step": 2014 }, { "epoch": 0.4096361048993698, "grad_norm": 0.133205845952034, "learning_rate": 0.00015913759788467407, "loss": 1.0818, "step": 2015 }, { "epoch": 0.40983939825167714, "grad_norm": 0.12949281930923462, "learning_rate": 0.0001591172582121428, "loss": 1.0151, "step": 2016 }, { "epoch": 0.41004269160398454, "grad_norm": 0.12819421291351318, "learning_rate": 0.00015909691853961151, "loss": 1.0449, "step": 2017 }, { "epoch": 0.41024598495629194, "grad_norm": 0.12694479525089264, "learning_rate": 0.00015907657886708024, "loss": 0.9756, "step": 2018 }, { "epoch": 0.4104492783085993, "grad_norm": 0.11785703897476196, "learning_rate": 0.00015905623919454896, "loss": 0.8295, "step": 2019 }, { "epoch": 0.4106525716609067, "grad_norm": 0.12432985007762909, "learning_rate": 0.0001590358995220177, "loss": 1.0375, "step": 2020 }, { "epoch": 0.4108558650132141, "grad_norm": 0.12006914615631104, "learning_rate": 0.00015901555984948644, "loss": 0.9818, "step": 2021 }, { "epoch": 0.4110591583655214, "grad_norm": 0.13180270791053772, "learning_rate": 0.00015899522017695516, "loss": 1.165, "step": 2022 }, { "epoch": 0.4112624517178288, "grad_norm": 0.13662603497505188, "learning_rate": 0.0001589748805044239, "loss": 1.0978, "step": 2023 }, { "epoch": 0.4114657450701362, "grad_norm": 0.137676402926445, "learning_rate": 0.0001589545408318926, "loss": 1.0695, "step": 2024 }, { "epoch": 0.41166903842244357, "grad_norm": 0.13281960785388947, "learning_rate": 0.00015893420115936136, "loss": 0.9717, "step": 2025 }, { "epoch": 0.41187233177475097, "grad_norm": 0.1159568652510643, "learning_rate": 0.00015891386148683006, "loss": 0.9863, "step": 2026 }, { "epoch": 0.41207562512705836, "grad_norm": 0.12235623598098755, "learning_rate": 0.00015889352181429879, "loss": 1.167, "step": 2027 }, { "epoch": 0.4122789184793657, "grad_norm": 0.11709940433502197, "learning_rate": 0.0001588731821417675, "loss": 1.0618, "step": 2028 }, { "epoch": 0.4124822118316731, "grad_norm": 0.13078409433364868, "learning_rate": 0.00015885284246923626, "loss": 1.1512, "step": 2029 }, { "epoch": 0.4126855051839805, "grad_norm": 0.12789343297481537, "learning_rate": 0.000158832502796705, "loss": 1.2586, "step": 2030 }, { "epoch": 0.41288879853628785, "grad_norm": 0.12031058967113495, "learning_rate": 0.0001588121631241737, "loss": 0.9543, "step": 2031 }, { "epoch": 0.41309209188859525, "grad_norm": 0.1303958296775818, "learning_rate": 0.00015879182345164244, "loss": 1.1823, "step": 2032 }, { "epoch": 0.41329538524090265, "grad_norm": 0.12915648519992828, "learning_rate": 0.0001587714837791112, "loss": 1.1199, "step": 2033 }, { "epoch": 0.41349867859321, "grad_norm": 0.13749873638153076, "learning_rate": 0.00015875114410657988, "loss": 1.1359, "step": 2034 }, { "epoch": 0.4137019719455174, "grad_norm": 0.120378777384758, "learning_rate": 0.0001587308044340486, "loss": 1.0311, "step": 2035 }, { "epoch": 0.4139052652978248, "grad_norm": 0.13083983957767487, "learning_rate": 0.00015871046476151733, "loss": 1.1491, "step": 2036 }, { "epoch": 0.41410855865013213, "grad_norm": 0.1264946162700653, "learning_rate": 0.00015869012508898609, "loss": 1.0575, "step": 2037 }, { "epoch": 0.41431185200243953, "grad_norm": 0.13813161849975586, "learning_rate": 0.0001586697854164548, "loss": 1.1596, "step": 2038 }, { "epoch": 0.4145151453547469, "grad_norm": 0.1298746019601822, "learning_rate": 0.00015864944574392353, "loss": 1.0107, "step": 2039 }, { "epoch": 0.41471843870705427, "grad_norm": 0.13159529864788055, "learning_rate": 0.00015862910607139226, "loss": 0.9777, "step": 2040 }, { "epoch": 0.41492173205936167, "grad_norm": 0.13329805433750153, "learning_rate": 0.00015860876639886098, "loss": 1.1749, "step": 2041 }, { "epoch": 0.415125025411669, "grad_norm": 0.12069873511791229, "learning_rate": 0.0001585884267263297, "loss": 1.1532, "step": 2042 }, { "epoch": 0.4153283187639764, "grad_norm": 0.11439201235771179, "learning_rate": 0.00015856808705379843, "loss": 0.8484, "step": 2043 }, { "epoch": 0.4155316121162838, "grad_norm": 0.14021088182926178, "learning_rate": 0.00015854774738126716, "loss": 0.9592, "step": 2044 }, { "epoch": 0.41573490546859115, "grad_norm": 0.12401128560304642, "learning_rate": 0.00015852740770873588, "loss": 0.9658, "step": 2045 }, { "epoch": 0.41593819882089855, "grad_norm": 0.1366535723209381, "learning_rate": 0.00015850706803620463, "loss": 1.1259, "step": 2046 }, { "epoch": 0.41614149217320595, "grad_norm": 0.11328650265932083, "learning_rate": 0.00015848672836367336, "loss": 0.9499, "step": 2047 }, { "epoch": 0.4163447855255133, "grad_norm": 0.13193942606449127, "learning_rate": 0.00015846638869114208, "loss": 1.0833, "step": 2048 }, { "epoch": 0.4165480788778207, "grad_norm": 0.1413910835981369, "learning_rate": 0.0001584460490186108, "loss": 1.0693, "step": 2049 }, { "epoch": 0.4167513722301281, "grad_norm": 0.11957409977912903, "learning_rate": 0.00015842570934607953, "loss": 1.0102, "step": 2050 }, { "epoch": 0.41695466558243544, "grad_norm": 0.12360769510269165, "learning_rate": 0.00015840536967354825, "loss": 1.1312, "step": 2051 }, { "epoch": 0.41715795893474283, "grad_norm": 0.12471318989992142, "learning_rate": 0.00015838503000101698, "loss": 1.1255, "step": 2052 }, { "epoch": 0.41736125228705023, "grad_norm": 0.129171222448349, "learning_rate": 0.0001583646903284857, "loss": 1.103, "step": 2053 }, { "epoch": 0.4175645456393576, "grad_norm": 0.14544697105884552, "learning_rate": 0.00015834435065595446, "loss": 1.0373, "step": 2054 }, { "epoch": 0.417767838991665, "grad_norm": 0.12571415305137634, "learning_rate": 0.00015832401098342318, "loss": 1.0675, "step": 2055 }, { "epoch": 0.4179711323439724, "grad_norm": 0.12805119156837463, "learning_rate": 0.0001583036713108919, "loss": 1.0713, "step": 2056 }, { "epoch": 0.4181744256962797, "grad_norm": 0.1386822909116745, "learning_rate": 0.00015828333163836063, "loss": 1.1084, "step": 2057 }, { "epoch": 0.4183777190485871, "grad_norm": 0.14066076278686523, "learning_rate": 0.00015826299196582935, "loss": 1.2577, "step": 2058 }, { "epoch": 0.4185810124008945, "grad_norm": 0.1231965720653534, "learning_rate": 0.00015824265229329808, "loss": 0.9684, "step": 2059 }, { "epoch": 0.41878430575320186, "grad_norm": 0.11889393627643585, "learning_rate": 0.0001582223126207668, "loss": 1.0393, "step": 2060 }, { "epoch": 0.41898759910550926, "grad_norm": 0.13274893164634705, "learning_rate": 0.00015820197294823553, "loss": 1.1742, "step": 2061 }, { "epoch": 0.41919089245781666, "grad_norm": 0.14034253358840942, "learning_rate": 0.00015818163327570428, "loss": 0.9673, "step": 2062 }, { "epoch": 0.419394185810124, "grad_norm": 0.14197202026844025, "learning_rate": 0.000158161293603173, "loss": 1.0931, "step": 2063 }, { "epoch": 0.4195974791624314, "grad_norm": 0.12458556890487671, "learning_rate": 0.00015814095393064173, "loss": 0.9713, "step": 2064 }, { "epoch": 0.41980077251473874, "grad_norm": 0.1311383694410324, "learning_rate": 0.00015812061425811045, "loss": 1.2641, "step": 2065 }, { "epoch": 0.42000406586704614, "grad_norm": 0.13218726217746735, "learning_rate": 0.00015810027458557918, "loss": 1.2132, "step": 2066 }, { "epoch": 0.42020735921935354, "grad_norm": 0.14619286358356476, "learning_rate": 0.0001580799349130479, "loss": 1.1031, "step": 2067 }, { "epoch": 0.4204106525716609, "grad_norm": 0.13700971007347107, "learning_rate": 0.00015805959524051662, "loss": 1.0364, "step": 2068 }, { "epoch": 0.4206139459239683, "grad_norm": 0.12797488272190094, "learning_rate": 0.00015803925556798535, "loss": 1.1025, "step": 2069 }, { "epoch": 0.4208172392762757, "grad_norm": 0.1373629868030548, "learning_rate": 0.0001580189158954541, "loss": 1.1924, "step": 2070 }, { "epoch": 0.421020532628583, "grad_norm": 0.11743365973234177, "learning_rate": 0.00015799857622292283, "loss": 0.9271, "step": 2071 }, { "epoch": 0.4212238259808904, "grad_norm": 0.12701068818569183, "learning_rate": 0.00015797823655039155, "loss": 1.0388, "step": 2072 }, { "epoch": 0.4214271193331978, "grad_norm": 0.14013634622097015, "learning_rate": 0.00015795789687786027, "loss": 1.1594, "step": 2073 }, { "epoch": 0.42163041268550516, "grad_norm": 0.12524859607219696, "learning_rate": 0.000157937557205329, "loss": 1.0191, "step": 2074 }, { "epoch": 0.42183370603781256, "grad_norm": 0.11082011461257935, "learning_rate": 0.00015791721753279772, "loss": 0.9211, "step": 2075 }, { "epoch": 0.42203699939011996, "grad_norm": 0.12579189240932465, "learning_rate": 0.00015789687786026645, "loss": 1.0269, "step": 2076 }, { "epoch": 0.4222402927424273, "grad_norm": 0.13665060698986053, "learning_rate": 0.00015787653818773517, "loss": 1.0662, "step": 2077 }, { "epoch": 0.4224435860947347, "grad_norm": 0.12775637209415436, "learning_rate": 0.00015785619851520392, "loss": 1.2206, "step": 2078 }, { "epoch": 0.4226468794470421, "grad_norm": 0.13883183896541595, "learning_rate": 0.00015783585884267265, "loss": 1.1484, "step": 2079 }, { "epoch": 0.42285017279934944, "grad_norm": 0.12899038195610046, "learning_rate": 0.00015781551917014137, "loss": 1.128, "step": 2080 }, { "epoch": 0.42305346615165684, "grad_norm": 0.12957079708576202, "learning_rate": 0.0001577951794976101, "loss": 1.1362, "step": 2081 }, { "epoch": 0.42325675950396424, "grad_norm": 0.12619063258171082, "learning_rate": 0.00015777483982507882, "loss": 1.1022, "step": 2082 }, { "epoch": 0.4234600528562716, "grad_norm": 0.12516093254089355, "learning_rate": 0.00015775450015254755, "loss": 1.1149, "step": 2083 }, { "epoch": 0.423663346208579, "grad_norm": 0.12650391459465027, "learning_rate": 0.00015773416048001627, "loss": 0.9535, "step": 2084 }, { "epoch": 0.4238666395608864, "grad_norm": 0.14746572077274323, "learning_rate": 0.000157713820807485, "loss": 1.222, "step": 2085 }, { "epoch": 0.4240699329131937, "grad_norm": 0.10933158546686172, "learning_rate": 0.00015769348113495372, "loss": 0.8832, "step": 2086 }, { "epoch": 0.4242732262655011, "grad_norm": 0.13668397068977356, "learning_rate": 0.00015767314146242247, "loss": 1.1808, "step": 2087 }, { "epoch": 0.4244765196178085, "grad_norm": 0.13031315803527832, "learning_rate": 0.0001576528017898912, "loss": 0.9752, "step": 2088 }, { "epoch": 0.42467981297011587, "grad_norm": 0.14403130114078522, "learning_rate": 0.00015763246211735992, "loss": 1.1372, "step": 2089 }, { "epoch": 0.42488310632242327, "grad_norm": 0.13902884721755981, "learning_rate": 0.00015761212244482864, "loss": 1.1492, "step": 2090 }, { "epoch": 0.42508639967473066, "grad_norm": 0.11546601355075836, "learning_rate": 0.00015759178277229737, "loss": 1.0091, "step": 2091 }, { "epoch": 0.425289693027038, "grad_norm": 0.12731419503688812, "learning_rate": 0.0001575714430997661, "loss": 1.0627, "step": 2092 }, { "epoch": 0.4254929863793454, "grad_norm": 0.11023043841123581, "learning_rate": 0.00015755110342723482, "loss": 0.8111, "step": 2093 }, { "epoch": 0.42569627973165275, "grad_norm": 0.14930586516857147, "learning_rate": 0.00015753076375470354, "loss": 1.206, "step": 2094 }, { "epoch": 0.42589957308396015, "grad_norm": 0.1300898790359497, "learning_rate": 0.0001575104240821723, "loss": 1.0915, "step": 2095 }, { "epoch": 0.42610286643626755, "grad_norm": 0.13914939761161804, "learning_rate": 0.00015749008440964102, "loss": 1.0512, "step": 2096 }, { "epoch": 0.4263061597885749, "grad_norm": 0.15199615061283112, "learning_rate": 0.00015746974473710974, "loss": 1.2611, "step": 2097 }, { "epoch": 0.4265094531408823, "grad_norm": 0.13422483205795288, "learning_rate": 0.00015744940506457847, "loss": 1.1213, "step": 2098 }, { "epoch": 0.4267127464931897, "grad_norm": 0.1285259872674942, "learning_rate": 0.0001574290653920472, "loss": 1.1144, "step": 2099 }, { "epoch": 0.42691603984549703, "grad_norm": 0.11812227219343185, "learning_rate": 0.00015740872571951592, "loss": 1.0033, "step": 2100 }, { "epoch": 0.42711933319780443, "grad_norm": 0.13837237656116486, "learning_rate": 0.00015738838604698464, "loss": 1.111, "step": 2101 }, { "epoch": 0.42732262655011183, "grad_norm": 0.1241428554058075, "learning_rate": 0.00015736804637445336, "loss": 1.0571, "step": 2102 }, { "epoch": 0.42752591990241917, "grad_norm": 0.1367318034172058, "learning_rate": 0.00015734770670192212, "loss": 1.1312, "step": 2103 }, { "epoch": 0.42772921325472657, "grad_norm": 0.12532354891300201, "learning_rate": 0.00015732736702939084, "loss": 1.0407, "step": 2104 }, { "epoch": 0.42793250660703397, "grad_norm": 0.12487448751926422, "learning_rate": 0.00015730702735685957, "loss": 1.072, "step": 2105 }, { "epoch": 0.4281357999593413, "grad_norm": 0.1195039302110672, "learning_rate": 0.0001572866876843283, "loss": 0.9995, "step": 2106 }, { "epoch": 0.4283390933116487, "grad_norm": 0.16366197168827057, "learning_rate": 0.00015726634801179701, "loss": 1.4101, "step": 2107 }, { "epoch": 0.4285423866639561, "grad_norm": 0.1324339210987091, "learning_rate": 0.00015724600833926574, "loss": 1.1455, "step": 2108 }, { "epoch": 0.42874568001626345, "grad_norm": 0.1581498682498932, "learning_rate": 0.00015722566866673446, "loss": 1.3092, "step": 2109 }, { "epoch": 0.42894897336857085, "grad_norm": 0.13514567911624908, "learning_rate": 0.0001572053289942032, "loss": 1.1998, "step": 2110 }, { "epoch": 0.42915226672087825, "grad_norm": 0.1304248571395874, "learning_rate": 0.00015718498932167194, "loss": 1.1488, "step": 2111 }, { "epoch": 0.4293555600731856, "grad_norm": 0.1261410117149353, "learning_rate": 0.00015716464964914066, "loss": 1.128, "step": 2112 }, { "epoch": 0.429558853425493, "grad_norm": 0.12761110067367554, "learning_rate": 0.0001571443099766094, "loss": 1.1816, "step": 2113 }, { "epoch": 0.4297621467778004, "grad_norm": 0.1307440996170044, "learning_rate": 0.0001571239703040781, "loss": 1.1362, "step": 2114 }, { "epoch": 0.42996544013010773, "grad_norm": 0.12130671739578247, "learning_rate": 0.00015710363063154684, "loss": 0.903, "step": 2115 }, { "epoch": 0.43016873348241513, "grad_norm": 0.12277641892433167, "learning_rate": 0.00015708329095901556, "loss": 0.9964, "step": 2116 }, { "epoch": 0.43037202683472253, "grad_norm": 0.13223405182361603, "learning_rate": 0.00015706295128648429, "loss": 0.9409, "step": 2117 }, { "epoch": 0.4305753201870299, "grad_norm": 0.12146423757076263, "learning_rate": 0.000157042611613953, "loss": 1.0116, "step": 2118 }, { "epoch": 0.4307786135393373, "grad_norm": 0.1319243311882019, "learning_rate": 0.00015702227194142176, "loss": 1.1354, "step": 2119 }, { "epoch": 0.4309819068916446, "grad_norm": 0.10945885628461838, "learning_rate": 0.0001570019322688905, "loss": 0.9314, "step": 2120 }, { "epoch": 0.431185200243952, "grad_norm": 0.12822148203849792, "learning_rate": 0.0001569815925963592, "loss": 0.9862, "step": 2121 }, { "epoch": 0.4313884935962594, "grad_norm": 0.13050609827041626, "learning_rate": 0.00015696125292382794, "loss": 1.1618, "step": 2122 }, { "epoch": 0.43159178694856676, "grad_norm": 0.1293252408504486, "learning_rate": 0.00015694091325129666, "loss": 1.0692, "step": 2123 }, { "epoch": 0.43179508030087416, "grad_norm": 0.15059755742549896, "learning_rate": 0.00015692057357876538, "loss": 0.9984, "step": 2124 }, { "epoch": 0.43199837365318156, "grad_norm": 0.13384853303432465, "learning_rate": 0.0001569002339062341, "loss": 1.1272, "step": 2125 }, { "epoch": 0.4322016670054889, "grad_norm": 0.1330154538154602, "learning_rate": 0.00015687989423370283, "loss": 0.958, "step": 2126 }, { "epoch": 0.4324049603577963, "grad_norm": 0.13418689370155334, "learning_rate": 0.00015685955456117156, "loss": 1.0939, "step": 2127 }, { "epoch": 0.4326082537101037, "grad_norm": 0.1272914856672287, "learning_rate": 0.0001568392148886403, "loss": 1.0839, "step": 2128 }, { "epoch": 0.43281154706241104, "grad_norm": 0.13827690482139587, "learning_rate": 0.00015681887521610903, "loss": 1.2304, "step": 2129 }, { "epoch": 0.43301484041471844, "grad_norm": 0.13577015697956085, "learning_rate": 0.00015679853554357776, "loss": 1.0749, "step": 2130 }, { "epoch": 0.43321813376702584, "grad_norm": 0.1302756816148758, "learning_rate": 0.00015677819587104648, "loss": 1.1381, "step": 2131 }, { "epoch": 0.4334214271193332, "grad_norm": 0.13045424222946167, "learning_rate": 0.0001567578561985152, "loss": 1.0538, "step": 2132 }, { "epoch": 0.4336247204716406, "grad_norm": 0.12351250648498535, "learning_rate": 0.00015673751652598393, "loss": 1.0061, "step": 2133 }, { "epoch": 0.433828013823948, "grad_norm": 0.1451653242111206, "learning_rate": 0.00015671717685345266, "loss": 1.1764, "step": 2134 }, { "epoch": 0.4340313071762553, "grad_norm": 0.12439122051000595, "learning_rate": 0.00015669683718092138, "loss": 0.9121, "step": 2135 }, { "epoch": 0.4342346005285627, "grad_norm": 0.1257990002632141, "learning_rate": 0.00015667649750839013, "loss": 1.0822, "step": 2136 }, { "epoch": 0.4344378938808701, "grad_norm": 0.13994207978248596, "learning_rate": 0.00015665615783585886, "loss": 1.2759, "step": 2137 }, { "epoch": 0.43464118723317746, "grad_norm": 0.14246414601802826, "learning_rate": 0.00015663581816332758, "loss": 1.2934, "step": 2138 }, { "epoch": 0.43484448058548486, "grad_norm": 0.1359516978263855, "learning_rate": 0.0001566154784907963, "loss": 1.1651, "step": 2139 }, { "epoch": 0.43504777393779226, "grad_norm": 0.12099796533584595, "learning_rate": 0.00015659513881826503, "loss": 0.9028, "step": 2140 }, { "epoch": 0.4352510672900996, "grad_norm": 0.12675108015537262, "learning_rate": 0.00015657479914573375, "loss": 1.026, "step": 2141 }, { "epoch": 0.435454360642407, "grad_norm": 0.13229331374168396, "learning_rate": 0.00015655445947320248, "loss": 1.0562, "step": 2142 }, { "epoch": 0.4356576539947144, "grad_norm": 0.12163117527961731, "learning_rate": 0.0001565341198006712, "loss": 1.1147, "step": 2143 }, { "epoch": 0.43586094734702174, "grad_norm": 0.13495270907878876, "learning_rate": 0.00015651378012813995, "loss": 1.0544, "step": 2144 }, { "epoch": 0.43606424069932914, "grad_norm": 0.11769222468137741, "learning_rate": 0.00015649344045560868, "loss": 1.0549, "step": 2145 }, { "epoch": 0.4362675340516365, "grad_norm": 0.14370734989643097, "learning_rate": 0.0001564731007830774, "loss": 1.1513, "step": 2146 }, { "epoch": 0.4364708274039439, "grad_norm": 0.1351398378610611, "learning_rate": 0.00015645276111054613, "loss": 1.0814, "step": 2147 }, { "epoch": 0.4366741207562513, "grad_norm": 0.13881100714206696, "learning_rate": 0.00015643242143801485, "loss": 1.1576, "step": 2148 }, { "epoch": 0.4368774141085586, "grad_norm": 0.11548882722854614, "learning_rate": 0.00015641208176548358, "loss": 1.0153, "step": 2149 }, { "epoch": 0.437080707460866, "grad_norm": 0.1512657254934311, "learning_rate": 0.0001563917420929523, "loss": 1.3081, "step": 2150 }, { "epoch": 0.4372840008131734, "grad_norm": 0.14576168358325958, "learning_rate": 0.00015637140242042103, "loss": 1.244, "step": 2151 }, { "epoch": 0.43748729416548077, "grad_norm": 0.14067471027374268, "learning_rate": 0.00015635106274788978, "loss": 1.023, "step": 2152 }, { "epoch": 0.43769058751778817, "grad_norm": 0.12753300368785858, "learning_rate": 0.0001563307230753585, "loss": 1.0292, "step": 2153 }, { "epoch": 0.43789388087009556, "grad_norm": 0.13246901333332062, "learning_rate": 0.00015631038340282723, "loss": 1.1607, "step": 2154 }, { "epoch": 0.4380971742224029, "grad_norm": 0.13709375262260437, "learning_rate": 0.00015629004373029595, "loss": 0.9848, "step": 2155 }, { "epoch": 0.4383004675747103, "grad_norm": 0.14666500687599182, "learning_rate": 0.00015626970405776468, "loss": 1.1349, "step": 2156 }, { "epoch": 0.4385037609270177, "grad_norm": 0.13992977142333984, "learning_rate": 0.0001562493643852334, "loss": 1.2692, "step": 2157 }, { "epoch": 0.43870705427932505, "grad_norm": 0.1444278359413147, "learning_rate": 0.00015622902471270212, "loss": 1.0354, "step": 2158 }, { "epoch": 0.43891034763163245, "grad_norm": 0.12587270140647888, "learning_rate": 0.00015620868504017085, "loss": 0.9783, "step": 2159 }, { "epoch": 0.43911364098393985, "grad_norm": 0.1311299055814743, "learning_rate": 0.0001561883453676396, "loss": 1.0541, "step": 2160 }, { "epoch": 0.4393169343362472, "grad_norm": 0.1246386170387268, "learning_rate": 0.00015616800569510832, "loss": 1.0384, "step": 2161 }, { "epoch": 0.4395202276885546, "grad_norm": 0.1286439597606659, "learning_rate": 0.00015614766602257705, "loss": 1.0909, "step": 2162 }, { "epoch": 0.439723521040862, "grad_norm": 0.12758703529834747, "learning_rate": 0.00015612732635004577, "loss": 1.0835, "step": 2163 }, { "epoch": 0.43992681439316933, "grad_norm": 0.1129162386059761, "learning_rate": 0.0001561069866775145, "loss": 0.9967, "step": 2164 }, { "epoch": 0.44013010774547673, "grad_norm": 0.12939536571502686, "learning_rate": 0.00015608664700498322, "loss": 1.133, "step": 2165 }, { "epoch": 0.4403334010977841, "grad_norm": 0.1393767148256302, "learning_rate": 0.00015606630733245195, "loss": 1.2379, "step": 2166 }, { "epoch": 0.44053669445009147, "grad_norm": 0.11744683235883713, "learning_rate": 0.00015604596765992067, "loss": 0.9686, "step": 2167 }, { "epoch": 0.44073998780239887, "grad_norm": 0.12241906672716141, "learning_rate": 0.0001560256279873894, "loss": 1.0188, "step": 2168 }, { "epoch": 0.44094328115470627, "grad_norm": 0.1316422075033188, "learning_rate": 0.00015600528831485815, "loss": 1.0292, "step": 2169 }, { "epoch": 0.4411465745070136, "grad_norm": 0.13489259779453278, "learning_rate": 0.00015598494864232687, "loss": 1.1332, "step": 2170 }, { "epoch": 0.441349867859321, "grad_norm": 0.13207697868347168, "learning_rate": 0.0001559646089697956, "loss": 1.0482, "step": 2171 }, { "epoch": 0.44155316121162835, "grad_norm": 0.13211561739444733, "learning_rate": 0.00015594426929726432, "loss": 1.064, "step": 2172 }, { "epoch": 0.44175645456393575, "grad_norm": 0.14821046590805054, "learning_rate": 0.00015592392962473305, "loss": 1.1058, "step": 2173 }, { "epoch": 0.44195974791624315, "grad_norm": 0.11314582824707031, "learning_rate": 0.00015590358995220177, "loss": 0.9351, "step": 2174 }, { "epoch": 0.4421630412685505, "grad_norm": 0.12001941353082657, "learning_rate": 0.0001558832502796705, "loss": 0.9875, "step": 2175 }, { "epoch": 0.4423663346208579, "grad_norm": 0.1433805674314499, "learning_rate": 0.00015586291060713922, "loss": 1.1552, "step": 2176 }, { "epoch": 0.4425696279731653, "grad_norm": 0.11265136301517487, "learning_rate": 0.00015584257093460797, "loss": 0.9431, "step": 2177 }, { "epoch": 0.44277292132547263, "grad_norm": 0.12973473966121674, "learning_rate": 0.0001558222312620767, "loss": 1.0591, "step": 2178 }, { "epoch": 0.44297621467778003, "grad_norm": 0.13749995827674866, "learning_rate": 0.00015580189158954542, "loss": 1.161, "step": 2179 }, { "epoch": 0.44317950803008743, "grad_norm": 0.12814348936080933, "learning_rate": 0.00015578155191701414, "loss": 1.1314, "step": 2180 }, { "epoch": 0.4433828013823948, "grad_norm": 0.1292744129896164, "learning_rate": 0.00015576121224448287, "loss": 0.9885, "step": 2181 }, { "epoch": 0.4435860947347022, "grad_norm": 0.11668647080659866, "learning_rate": 0.0001557408725719516, "loss": 0.9546, "step": 2182 }, { "epoch": 0.4437893880870096, "grad_norm": 0.13656672835350037, "learning_rate": 0.00015572053289942032, "loss": 1.1105, "step": 2183 }, { "epoch": 0.4439926814393169, "grad_norm": 0.13544489443302155, "learning_rate": 0.00015570019322688904, "loss": 1.0632, "step": 2184 }, { "epoch": 0.4441959747916243, "grad_norm": 0.14713092148303986, "learning_rate": 0.0001556798535543578, "loss": 1.0508, "step": 2185 }, { "epoch": 0.4443992681439317, "grad_norm": 0.1292864978313446, "learning_rate": 0.00015565951388182652, "loss": 0.8771, "step": 2186 }, { "epoch": 0.44460256149623906, "grad_norm": 0.12545311450958252, "learning_rate": 0.00015563917420929524, "loss": 1.1117, "step": 2187 }, { "epoch": 0.44480585484854646, "grad_norm": 0.1412542760372162, "learning_rate": 0.00015561883453676397, "loss": 1.3364, "step": 2188 }, { "epoch": 0.44500914820085385, "grad_norm": 0.11404701322317123, "learning_rate": 0.0001555984948642327, "loss": 0.9512, "step": 2189 }, { "epoch": 0.4452124415531612, "grad_norm": 0.11949559301137924, "learning_rate": 0.00015557815519170142, "loss": 1.0406, "step": 2190 }, { "epoch": 0.4454157349054686, "grad_norm": 0.10985735058784485, "learning_rate": 0.00015555781551917014, "loss": 0.9292, "step": 2191 }, { "epoch": 0.445619028257776, "grad_norm": 0.14047123491764069, "learning_rate": 0.00015553747584663886, "loss": 1.0895, "step": 2192 }, { "epoch": 0.44582232161008334, "grad_norm": 0.1520707756280899, "learning_rate": 0.00015551713617410762, "loss": 1.118, "step": 2193 }, { "epoch": 0.44602561496239074, "grad_norm": 0.13202513754367828, "learning_rate": 0.00015549679650157634, "loss": 1.0031, "step": 2194 }, { "epoch": 0.44622890831469814, "grad_norm": 0.13803257048130035, "learning_rate": 0.00015547645682904507, "loss": 1.106, "step": 2195 }, { "epoch": 0.4464322016670055, "grad_norm": 0.13089017570018768, "learning_rate": 0.0001554561171565138, "loss": 1.0706, "step": 2196 }, { "epoch": 0.4466354950193129, "grad_norm": 0.12646476924419403, "learning_rate": 0.00015543577748398251, "loss": 1.0548, "step": 2197 }, { "epoch": 0.4468387883716202, "grad_norm": 0.14208228886127472, "learning_rate": 0.00015541543781145124, "loss": 1.1952, "step": 2198 }, { "epoch": 0.4470420817239276, "grad_norm": 0.1471976786851883, "learning_rate": 0.00015539509813891996, "loss": 1.1987, "step": 2199 }, { "epoch": 0.447245375076235, "grad_norm": 0.11970525234937668, "learning_rate": 0.0001553747584663887, "loss": 0.9523, "step": 2200 }, { "epoch": 0.44744866842854236, "grad_norm": 0.1178225502371788, "learning_rate": 0.00015535441879385744, "loss": 0.9105, "step": 2201 }, { "epoch": 0.44765196178084976, "grad_norm": 0.11376915872097015, "learning_rate": 0.00015533407912132616, "loss": 0.9041, "step": 2202 }, { "epoch": 0.44785525513315716, "grad_norm": 0.12055668234825134, "learning_rate": 0.0001553137394487949, "loss": 0.9057, "step": 2203 }, { "epoch": 0.4480585484854645, "grad_norm": 0.1293669193983078, "learning_rate": 0.0001552933997762636, "loss": 1.047, "step": 2204 }, { "epoch": 0.4482618418377719, "grad_norm": 0.14985014498233795, "learning_rate": 0.00015527306010373234, "loss": 1.2782, "step": 2205 }, { "epoch": 0.4484651351900793, "grad_norm": 0.12716402113437653, "learning_rate": 0.00015525272043120106, "loss": 1.0629, "step": 2206 }, { "epoch": 0.44866842854238664, "grad_norm": 0.13868549466133118, "learning_rate": 0.00015523238075866979, "loss": 1.248, "step": 2207 }, { "epoch": 0.44887172189469404, "grad_norm": 0.12867020070552826, "learning_rate": 0.0001552120410861385, "loss": 1.0579, "step": 2208 }, { "epoch": 0.44907501524700144, "grad_norm": 0.14104703068733215, "learning_rate": 0.00015519170141360723, "loss": 1.2559, "step": 2209 }, { "epoch": 0.4492783085993088, "grad_norm": 0.13124023377895355, "learning_rate": 0.00015517136174107599, "loss": 0.9115, "step": 2210 }, { "epoch": 0.4494816019516162, "grad_norm": 0.1268378347158432, "learning_rate": 0.0001551510220685447, "loss": 1.0753, "step": 2211 }, { "epoch": 0.4496848953039236, "grad_norm": 0.12500889599323273, "learning_rate": 0.00015513068239601344, "loss": 1.1563, "step": 2212 }, { "epoch": 0.4498881886562309, "grad_norm": 0.15399597585201263, "learning_rate": 0.00015511034272348216, "loss": 1.3699, "step": 2213 }, { "epoch": 0.4500914820085383, "grad_norm": 0.13191649317741394, "learning_rate": 0.00015509000305095088, "loss": 1.0768, "step": 2214 }, { "epoch": 0.4502947753608457, "grad_norm": 0.13257142901420593, "learning_rate": 0.0001550696633784196, "loss": 1.2508, "step": 2215 }, { "epoch": 0.45049806871315307, "grad_norm": 0.1298341304063797, "learning_rate": 0.00015504932370588833, "loss": 1.0288, "step": 2216 }, { "epoch": 0.45070136206546046, "grad_norm": 0.12427882850170135, "learning_rate": 0.00015502898403335706, "loss": 1.0185, "step": 2217 }, { "epoch": 0.45090465541776786, "grad_norm": 0.13115955889225006, "learning_rate": 0.0001550086443608258, "loss": 1.0496, "step": 2218 }, { "epoch": 0.4511079487700752, "grad_norm": 0.12085919827222824, "learning_rate": 0.00015498830468829453, "loss": 1.0642, "step": 2219 }, { "epoch": 0.4513112421223826, "grad_norm": 0.14256790280342102, "learning_rate": 0.00015496796501576326, "loss": 1.199, "step": 2220 }, { "epoch": 0.45151453547469, "grad_norm": 0.14703063666820526, "learning_rate": 0.00015494762534323198, "loss": 1.232, "step": 2221 }, { "epoch": 0.45171782882699735, "grad_norm": 0.13513730466365814, "learning_rate": 0.0001549272856707007, "loss": 1.0598, "step": 2222 }, { "epoch": 0.45192112217930475, "grad_norm": 0.12173596769571304, "learning_rate": 0.00015490694599816943, "loss": 0.9541, "step": 2223 }, { "epoch": 0.4521244155316121, "grad_norm": 0.13201670348644257, "learning_rate": 0.00015488660632563816, "loss": 1.0898, "step": 2224 }, { "epoch": 0.4523277088839195, "grad_norm": 0.1306207776069641, "learning_rate": 0.00015486626665310688, "loss": 1.1851, "step": 2225 }, { "epoch": 0.4525310022362269, "grad_norm": 0.14152634143829346, "learning_rate": 0.00015484592698057563, "loss": 1.1348, "step": 2226 }, { "epoch": 0.45273429558853423, "grad_norm": 0.12412508577108383, "learning_rate": 0.00015482558730804436, "loss": 0.943, "step": 2227 }, { "epoch": 0.45293758894084163, "grad_norm": 0.1367032378911972, "learning_rate": 0.00015480524763551308, "loss": 1.1014, "step": 2228 }, { "epoch": 0.453140882293149, "grad_norm": 0.13346408307552338, "learning_rate": 0.0001547849079629818, "loss": 1.0675, "step": 2229 }, { "epoch": 0.45334417564545637, "grad_norm": 0.1253054141998291, "learning_rate": 0.00015476456829045053, "loss": 1.0117, "step": 2230 }, { "epoch": 0.45354746899776377, "grad_norm": 0.12263582646846771, "learning_rate": 0.00015474422861791925, "loss": 1.0491, "step": 2231 }, { "epoch": 0.45375076235007117, "grad_norm": 0.11762181669473648, "learning_rate": 0.00015472388894538798, "loss": 0.9007, "step": 2232 }, { "epoch": 0.4539540557023785, "grad_norm": 0.1264612227678299, "learning_rate": 0.0001547035492728567, "loss": 1.0378, "step": 2233 }, { "epoch": 0.4541573490546859, "grad_norm": 0.13340885937213898, "learning_rate": 0.00015468320960032545, "loss": 1.0248, "step": 2234 }, { "epoch": 0.4543606424069933, "grad_norm": 0.13584084808826447, "learning_rate": 0.00015466286992779418, "loss": 1.1549, "step": 2235 }, { "epoch": 0.45456393575930065, "grad_norm": 0.13265646994113922, "learning_rate": 0.0001546425302552629, "loss": 1.1589, "step": 2236 }, { "epoch": 0.45476722911160805, "grad_norm": 0.13352257013320923, "learning_rate": 0.00015462219058273163, "loss": 1.3275, "step": 2237 }, { "epoch": 0.45497052246391545, "grad_norm": 0.13025180995464325, "learning_rate": 0.00015460185091020035, "loss": 1.1053, "step": 2238 }, { "epoch": 0.4551738158162228, "grad_norm": 0.13518528640270233, "learning_rate": 0.00015458151123766908, "loss": 1.043, "step": 2239 }, { "epoch": 0.4553771091685302, "grad_norm": 0.13065437972545624, "learning_rate": 0.0001545611715651378, "loss": 1.1238, "step": 2240 }, { "epoch": 0.4555804025208376, "grad_norm": 0.12943416833877563, "learning_rate": 0.00015454083189260653, "loss": 1.2424, "step": 2241 }, { "epoch": 0.45578369587314493, "grad_norm": 0.1298946738243103, "learning_rate": 0.00015452049222007528, "loss": 1.0909, "step": 2242 }, { "epoch": 0.45598698922545233, "grad_norm": 0.1344904750585556, "learning_rate": 0.000154500152547544, "loss": 1.2078, "step": 2243 }, { "epoch": 0.45619028257775973, "grad_norm": 0.11595308780670166, "learning_rate": 0.00015447981287501273, "loss": 0.9557, "step": 2244 }, { "epoch": 0.4563935759300671, "grad_norm": 0.11699800193309784, "learning_rate": 0.00015445947320248145, "loss": 0.969, "step": 2245 }, { "epoch": 0.4565968692823745, "grad_norm": 0.13354718685150146, "learning_rate": 0.00015443913352995018, "loss": 1.2055, "step": 2246 }, { "epoch": 0.45680016263468187, "grad_norm": 0.12961523234844208, "learning_rate": 0.0001544187938574189, "loss": 1.1531, "step": 2247 }, { "epoch": 0.4570034559869892, "grad_norm": 0.12993821501731873, "learning_rate": 0.00015439845418488762, "loss": 1.0975, "step": 2248 }, { "epoch": 0.4572067493392966, "grad_norm": 0.12171147763729095, "learning_rate": 0.00015437811451235635, "loss": 1.1262, "step": 2249 }, { "epoch": 0.45741004269160396, "grad_norm": 0.1307455450296402, "learning_rate": 0.00015435777483982507, "loss": 1.0757, "step": 2250 }, { "epoch": 0.45761333604391136, "grad_norm": 0.12793178856372833, "learning_rate": 0.00015433743516729382, "loss": 1.1616, "step": 2251 }, { "epoch": 0.45781662939621875, "grad_norm": 0.13869251310825348, "learning_rate": 0.00015431709549476255, "loss": 1.0674, "step": 2252 }, { "epoch": 0.4580199227485261, "grad_norm": 0.13465169072151184, "learning_rate": 0.00015429675582223127, "loss": 1.0685, "step": 2253 }, { "epoch": 0.4582232161008335, "grad_norm": 0.12114840000867844, "learning_rate": 0.0001542764161497, "loss": 1.0562, "step": 2254 }, { "epoch": 0.4584265094531409, "grad_norm": 0.11819116771221161, "learning_rate": 0.00015425607647716872, "loss": 0.9489, "step": 2255 }, { "epoch": 0.45862980280544824, "grad_norm": 0.1262710839509964, "learning_rate": 0.00015423573680463745, "loss": 1.014, "step": 2256 }, { "epoch": 0.45883309615775564, "grad_norm": 0.1145327240228653, "learning_rate": 0.00015421539713210617, "loss": 1.0435, "step": 2257 }, { "epoch": 0.45903638951006304, "grad_norm": 0.13413353264331818, "learning_rate": 0.0001541950574595749, "loss": 1.2098, "step": 2258 }, { "epoch": 0.4592396828623704, "grad_norm": 0.14301779866218567, "learning_rate": 0.00015417471778704365, "loss": 1.2555, "step": 2259 }, { "epoch": 0.4594429762146778, "grad_norm": 0.12918636202812195, "learning_rate": 0.00015415437811451237, "loss": 1.2728, "step": 2260 }, { "epoch": 0.4596462695669852, "grad_norm": 0.13419827818870544, "learning_rate": 0.0001541340384419811, "loss": 1.1594, "step": 2261 }, { "epoch": 0.4598495629192925, "grad_norm": 0.132028728723526, "learning_rate": 0.00015411369876944982, "loss": 1.0049, "step": 2262 }, { "epoch": 0.4600528562715999, "grad_norm": 0.12330999970436096, "learning_rate": 0.00015409335909691855, "loss": 1.0211, "step": 2263 }, { "epoch": 0.4602561496239073, "grad_norm": 0.12041660398244858, "learning_rate": 0.00015407301942438727, "loss": 0.9111, "step": 2264 }, { "epoch": 0.46045944297621466, "grad_norm": 0.13959679007530212, "learning_rate": 0.000154052679751856, "loss": 1.2186, "step": 2265 }, { "epoch": 0.46066273632852206, "grad_norm": 0.12078391015529633, "learning_rate": 0.00015403234007932472, "loss": 0.9896, "step": 2266 }, { "epoch": 0.46086602968082946, "grad_norm": 0.13155217468738556, "learning_rate": 0.00015401200040679347, "loss": 1.1405, "step": 2267 }, { "epoch": 0.4610693230331368, "grad_norm": 0.13416320085525513, "learning_rate": 0.0001539916607342622, "loss": 1.1094, "step": 2268 }, { "epoch": 0.4612726163854442, "grad_norm": 0.13319726288318634, "learning_rate": 0.00015397132106173092, "loss": 1.0477, "step": 2269 }, { "epoch": 0.4614759097377516, "grad_norm": 0.1303132325410843, "learning_rate": 0.00015395098138919964, "loss": 1.1049, "step": 2270 }, { "epoch": 0.46167920309005894, "grad_norm": 0.1119418814778328, "learning_rate": 0.00015393064171666837, "loss": 0.8764, "step": 2271 }, { "epoch": 0.46188249644236634, "grad_norm": 0.13639549911022186, "learning_rate": 0.0001539103020441371, "loss": 1.2873, "step": 2272 }, { "epoch": 0.46208578979467374, "grad_norm": 0.1421010047197342, "learning_rate": 0.00015388996237160582, "loss": 1.2249, "step": 2273 }, { "epoch": 0.4622890831469811, "grad_norm": 0.12574367225170135, "learning_rate": 0.00015386962269907454, "loss": 1.063, "step": 2274 }, { "epoch": 0.4624923764992885, "grad_norm": 0.1510375589132309, "learning_rate": 0.0001538492830265433, "loss": 1.2542, "step": 2275 }, { "epoch": 0.4626956698515958, "grad_norm": 0.13016802072525024, "learning_rate": 0.00015382894335401202, "loss": 1.0646, "step": 2276 }, { "epoch": 0.4628989632039032, "grad_norm": 0.11884848028421402, "learning_rate": 0.00015380860368148074, "loss": 1.0169, "step": 2277 }, { "epoch": 0.4631022565562106, "grad_norm": 0.12734943628311157, "learning_rate": 0.00015378826400894947, "loss": 1.0399, "step": 2278 }, { "epoch": 0.46330554990851797, "grad_norm": 0.11856262385845184, "learning_rate": 0.0001537679243364182, "loss": 0.9773, "step": 2279 }, { "epoch": 0.46350884326082537, "grad_norm": 0.12701541185379028, "learning_rate": 0.00015374758466388692, "loss": 0.9503, "step": 2280 }, { "epoch": 0.46371213661313276, "grad_norm": 0.12200977653265, "learning_rate": 0.00015372724499135564, "loss": 0.9685, "step": 2281 }, { "epoch": 0.4639154299654401, "grad_norm": 0.1607646942138672, "learning_rate": 0.00015370690531882436, "loss": 1.2651, "step": 2282 }, { "epoch": 0.4641187233177475, "grad_norm": 0.1287887990474701, "learning_rate": 0.00015368656564629312, "loss": 1.0041, "step": 2283 }, { "epoch": 0.4643220166700549, "grad_norm": 0.11581754684448242, "learning_rate": 0.00015366622597376184, "loss": 0.8802, "step": 2284 }, { "epoch": 0.46452531002236225, "grad_norm": 0.12691698968410492, "learning_rate": 0.00015364588630123056, "loss": 0.9865, "step": 2285 }, { "epoch": 0.46472860337466965, "grad_norm": 0.15262743830680847, "learning_rate": 0.0001536255466286993, "loss": 1.2384, "step": 2286 }, { "epoch": 0.46493189672697705, "grad_norm": 0.12902504205703735, "learning_rate": 0.00015360520695616801, "loss": 1.0832, "step": 2287 }, { "epoch": 0.4651351900792844, "grad_norm": 0.13777056336402893, "learning_rate": 0.00015358486728363674, "loss": 1.2671, "step": 2288 }, { "epoch": 0.4653384834315918, "grad_norm": 0.12391048669815063, "learning_rate": 0.00015356452761110546, "loss": 1.1296, "step": 2289 }, { "epoch": 0.4655417767838992, "grad_norm": 0.13558468222618103, "learning_rate": 0.0001535441879385742, "loss": 1.3, "step": 2290 }, { "epoch": 0.46574507013620653, "grad_norm": 0.13611246645450592, "learning_rate": 0.0001535238482660429, "loss": 1.2885, "step": 2291 }, { "epoch": 0.46594836348851393, "grad_norm": 0.11027907580137253, "learning_rate": 0.00015350350859351166, "loss": 0.8807, "step": 2292 }, { "epoch": 0.4661516568408213, "grad_norm": 0.1303076446056366, "learning_rate": 0.0001534831689209804, "loss": 0.9479, "step": 2293 }, { "epoch": 0.46635495019312867, "grad_norm": 0.12296570837497711, "learning_rate": 0.0001534628292484491, "loss": 0.9483, "step": 2294 }, { "epoch": 0.46655824354543607, "grad_norm": 0.13646475970745087, "learning_rate": 0.00015344248957591784, "loss": 1.0576, "step": 2295 }, { "epoch": 0.46676153689774347, "grad_norm": 0.12281665205955505, "learning_rate": 0.00015342214990338656, "loss": 1.0582, "step": 2296 }, { "epoch": 0.4669648302500508, "grad_norm": 0.12840229272842407, "learning_rate": 0.00015340181023085529, "loss": 1.0531, "step": 2297 }, { "epoch": 0.4671681236023582, "grad_norm": 0.13027642667293549, "learning_rate": 0.000153381470558324, "loss": 1.1658, "step": 2298 }, { "epoch": 0.4673714169546656, "grad_norm": 0.13270190358161926, "learning_rate": 0.00015336113088579273, "loss": 1.2343, "step": 2299 }, { "epoch": 0.46757471030697295, "grad_norm": 0.12298402190208435, "learning_rate": 0.00015334079121326149, "loss": 1.0148, "step": 2300 }, { "epoch": 0.46777800365928035, "grad_norm": 0.11776307225227356, "learning_rate": 0.0001533204515407302, "loss": 1.0423, "step": 2301 }, { "epoch": 0.4679812970115877, "grad_norm": 0.1274150162935257, "learning_rate": 0.00015330011186819893, "loss": 1.0485, "step": 2302 }, { "epoch": 0.4681845903638951, "grad_norm": 0.12356690317392349, "learning_rate": 0.00015327977219566766, "loss": 1.0259, "step": 2303 }, { "epoch": 0.4683878837162025, "grad_norm": 0.11949564516544342, "learning_rate": 0.00015325943252313638, "loss": 1.0544, "step": 2304 }, { "epoch": 0.46859117706850983, "grad_norm": 0.11649688333272934, "learning_rate": 0.0001532390928506051, "loss": 1.0703, "step": 2305 }, { "epoch": 0.46879447042081723, "grad_norm": 0.12657220661640167, "learning_rate": 0.00015321875317807383, "loss": 0.9195, "step": 2306 }, { "epoch": 0.46899776377312463, "grad_norm": 0.11678668856620789, "learning_rate": 0.00015319841350554256, "loss": 1.0412, "step": 2307 }, { "epoch": 0.469201057125432, "grad_norm": 0.1137353926897049, "learning_rate": 0.0001531780738330113, "loss": 0.9843, "step": 2308 }, { "epoch": 0.4694043504777394, "grad_norm": 0.11690492928028107, "learning_rate": 0.00015315773416048003, "loss": 1.0313, "step": 2309 }, { "epoch": 0.4696076438300468, "grad_norm": 0.14086581766605377, "learning_rate": 0.00015313739448794876, "loss": 1.2184, "step": 2310 }, { "epoch": 0.4698109371823541, "grad_norm": 0.13605134189128876, "learning_rate": 0.00015311705481541748, "loss": 1.0273, "step": 2311 }, { "epoch": 0.4700142305346615, "grad_norm": 0.12567712366580963, "learning_rate": 0.0001530967151428862, "loss": 1.0822, "step": 2312 }, { "epoch": 0.4702175238869689, "grad_norm": 0.12103762477636337, "learning_rate": 0.00015307637547035493, "loss": 0.9511, "step": 2313 }, { "epoch": 0.47042081723927626, "grad_norm": 0.13223135471343994, "learning_rate": 0.00015305603579782366, "loss": 1.116, "step": 2314 }, { "epoch": 0.47062411059158366, "grad_norm": 0.12696783244609833, "learning_rate": 0.00015303569612529238, "loss": 1.046, "step": 2315 }, { "epoch": 0.47082740394389105, "grad_norm": 0.13583315908908844, "learning_rate": 0.00015301535645276113, "loss": 1.2365, "step": 2316 }, { "epoch": 0.4710306972961984, "grad_norm": 0.1245473176240921, "learning_rate": 0.00015299501678022986, "loss": 1.1478, "step": 2317 }, { "epoch": 0.4712339906485058, "grad_norm": 0.1365327388048172, "learning_rate": 0.00015297467710769858, "loss": 1.1697, "step": 2318 }, { "epoch": 0.4714372840008132, "grad_norm": 0.13741904497146606, "learning_rate": 0.0001529543374351673, "loss": 1.1585, "step": 2319 }, { "epoch": 0.47164057735312054, "grad_norm": 0.13385626673698425, "learning_rate": 0.00015293399776263603, "loss": 1.1195, "step": 2320 }, { "epoch": 0.47184387070542794, "grad_norm": 0.12970289587974548, "learning_rate": 0.00015291365809010475, "loss": 1.1, "step": 2321 }, { "epoch": 0.47204716405773534, "grad_norm": 0.13030849397182465, "learning_rate": 0.00015289331841757348, "loss": 1.1754, "step": 2322 }, { "epoch": 0.4722504574100427, "grad_norm": 0.1363505721092224, "learning_rate": 0.0001528729787450422, "loss": 1.2234, "step": 2323 }, { "epoch": 0.4724537507623501, "grad_norm": 0.1340765804052353, "learning_rate": 0.00015285263907251095, "loss": 0.9473, "step": 2324 }, { "epoch": 0.4726570441146575, "grad_norm": 0.12515921890735626, "learning_rate": 0.00015283229939997968, "loss": 1.0829, "step": 2325 }, { "epoch": 0.4728603374669648, "grad_norm": 0.1202256977558136, "learning_rate": 0.0001528119597274484, "loss": 0.9616, "step": 2326 }, { "epoch": 0.4730636308192722, "grad_norm": 0.10012631863355637, "learning_rate": 0.00015279162005491713, "loss": 0.7739, "step": 2327 }, { "epoch": 0.47326692417157956, "grad_norm": 0.12161195278167725, "learning_rate": 0.00015277128038238585, "loss": 1.0245, "step": 2328 }, { "epoch": 0.47347021752388696, "grad_norm": 0.12597283720970154, "learning_rate": 0.00015275094070985458, "loss": 1.1309, "step": 2329 }, { "epoch": 0.47367351087619436, "grad_norm": 0.12898840010166168, "learning_rate": 0.0001527306010373233, "loss": 1.0073, "step": 2330 }, { "epoch": 0.4738768042285017, "grad_norm": 0.11734145879745483, "learning_rate": 0.00015271026136479203, "loss": 0.8884, "step": 2331 }, { "epoch": 0.4740800975808091, "grad_norm": 0.11760027706623077, "learning_rate": 0.00015268992169226075, "loss": 1.0386, "step": 2332 }, { "epoch": 0.4742833909331165, "grad_norm": 0.13076893985271454, "learning_rate": 0.0001526695820197295, "loss": 1.1217, "step": 2333 }, { "epoch": 0.47448668428542384, "grad_norm": 0.12086467444896698, "learning_rate": 0.00015264924234719823, "loss": 1.1314, "step": 2334 }, { "epoch": 0.47468997763773124, "grad_norm": 0.1257351189851761, "learning_rate": 0.00015262890267466695, "loss": 1.0988, "step": 2335 }, { "epoch": 0.47489327099003864, "grad_norm": 0.13056614995002747, "learning_rate": 0.00015260856300213567, "loss": 1.0929, "step": 2336 }, { "epoch": 0.475096564342346, "grad_norm": 0.1115044355392456, "learning_rate": 0.0001525882233296044, "loss": 0.9365, "step": 2337 }, { "epoch": 0.4752998576946534, "grad_norm": 0.11613184213638306, "learning_rate": 0.00015256788365707312, "loss": 0.9492, "step": 2338 }, { "epoch": 0.4755031510469608, "grad_norm": 0.13431620597839355, "learning_rate": 0.00015254754398454185, "loss": 1.0483, "step": 2339 }, { "epoch": 0.4757064443992681, "grad_norm": 0.13704031705856323, "learning_rate": 0.00015252720431201057, "loss": 1.324, "step": 2340 }, { "epoch": 0.4759097377515755, "grad_norm": 0.14616814255714417, "learning_rate": 0.00015250686463947932, "loss": 1.2488, "step": 2341 }, { "epoch": 0.4761130311038829, "grad_norm": 0.14007219672203064, "learning_rate": 0.00015248652496694805, "loss": 1.1427, "step": 2342 }, { "epoch": 0.47631632445619027, "grad_norm": 0.14786280691623688, "learning_rate": 0.00015246618529441677, "loss": 1.3224, "step": 2343 }, { "epoch": 0.47651961780849766, "grad_norm": 0.13280178606510162, "learning_rate": 0.0001524458456218855, "loss": 1.2878, "step": 2344 }, { "epoch": 0.47672291116080506, "grad_norm": 0.13446266949176788, "learning_rate": 0.00015242550594935422, "loss": 0.9997, "step": 2345 }, { "epoch": 0.4769262045131124, "grad_norm": 0.1296195685863495, "learning_rate": 0.00015240516627682295, "loss": 1.196, "step": 2346 }, { "epoch": 0.4771294978654198, "grad_norm": 0.13888056576251984, "learning_rate": 0.00015238482660429167, "loss": 1.1782, "step": 2347 }, { "epoch": 0.4773327912177272, "grad_norm": 0.14144721627235413, "learning_rate": 0.0001523644869317604, "loss": 1.0023, "step": 2348 }, { "epoch": 0.47753608457003455, "grad_norm": 0.1382543295621872, "learning_rate": 0.00015234414725922915, "loss": 1.128, "step": 2349 }, { "epoch": 0.47773937792234195, "grad_norm": 0.14320622384548187, "learning_rate": 0.00015232380758669787, "loss": 1.1825, "step": 2350 }, { "epoch": 0.47794267127464934, "grad_norm": 0.13087749481201172, "learning_rate": 0.0001523034679141666, "loss": 1.1676, "step": 2351 }, { "epoch": 0.4781459646269567, "grad_norm": 0.12107618153095245, "learning_rate": 0.00015228312824163532, "loss": 1.0275, "step": 2352 }, { "epoch": 0.4783492579792641, "grad_norm": 0.12728255987167358, "learning_rate": 0.00015226278856910404, "loss": 0.9566, "step": 2353 }, { "epoch": 0.4785525513315715, "grad_norm": 0.13032306730747223, "learning_rate": 0.00015224244889657277, "loss": 0.9879, "step": 2354 }, { "epoch": 0.47875584468387883, "grad_norm": 0.13414493203163147, "learning_rate": 0.0001522221092240415, "loss": 1.1314, "step": 2355 }, { "epoch": 0.4789591380361862, "grad_norm": 0.13473325967788696, "learning_rate": 0.00015220176955151022, "loss": 1.0828, "step": 2356 }, { "epoch": 0.47916243138849357, "grad_norm": 0.13013584911823273, "learning_rate": 0.00015218142987897897, "loss": 0.9822, "step": 2357 }, { "epoch": 0.47936572474080097, "grad_norm": 0.13635900616645813, "learning_rate": 0.0001521610902064477, "loss": 1.1549, "step": 2358 }, { "epoch": 0.47956901809310837, "grad_norm": 0.14560578763484955, "learning_rate": 0.00015214075053391642, "loss": 1.1529, "step": 2359 }, { "epoch": 0.4797723114454157, "grad_norm": 0.13965454697608948, "learning_rate": 0.00015212041086138514, "loss": 1.1511, "step": 2360 }, { "epoch": 0.4799756047977231, "grad_norm": 0.14002491533756256, "learning_rate": 0.00015210007118885387, "loss": 1.1332, "step": 2361 }, { "epoch": 0.4801788981500305, "grad_norm": 0.14013326168060303, "learning_rate": 0.0001520797315163226, "loss": 1.1585, "step": 2362 }, { "epoch": 0.48038219150233785, "grad_norm": 0.114499032497406, "learning_rate": 0.00015205939184379132, "loss": 0.8636, "step": 2363 }, { "epoch": 0.48058548485464525, "grad_norm": 0.14330022037029266, "learning_rate": 0.00015203905217126004, "loss": 1.0499, "step": 2364 }, { "epoch": 0.48078877820695265, "grad_norm": 0.13167035579681396, "learning_rate": 0.0001520187124987288, "loss": 1.047, "step": 2365 }, { "epoch": 0.48099207155926, "grad_norm": 0.12093020975589752, "learning_rate": 0.00015199837282619752, "loss": 1.0635, "step": 2366 }, { "epoch": 0.4811953649115674, "grad_norm": 0.13088001310825348, "learning_rate": 0.00015197803315366624, "loss": 1.1499, "step": 2367 }, { "epoch": 0.4813986582638748, "grad_norm": 0.13969479501247406, "learning_rate": 0.00015195769348113497, "loss": 1.2346, "step": 2368 }, { "epoch": 0.48160195161618213, "grad_norm": 0.129147008061409, "learning_rate": 0.0001519373538086037, "loss": 0.9817, "step": 2369 }, { "epoch": 0.48180524496848953, "grad_norm": 0.13874943554401398, "learning_rate": 0.00015191701413607242, "loss": 1.0194, "step": 2370 }, { "epoch": 0.48200853832079693, "grad_norm": 0.13884292542934418, "learning_rate": 0.00015189667446354114, "loss": 1.1071, "step": 2371 }, { "epoch": 0.4822118316731043, "grad_norm": 0.13045528531074524, "learning_rate": 0.00015187633479100986, "loss": 1.1242, "step": 2372 }, { "epoch": 0.4824151250254117, "grad_norm": 0.15773905813694, "learning_rate": 0.0001518559951184786, "loss": 1.2639, "step": 2373 }, { "epoch": 0.48261841837771907, "grad_norm": 0.11095882952213287, "learning_rate": 0.00015183565544594734, "loss": 1.0023, "step": 2374 }, { "epoch": 0.4828217117300264, "grad_norm": 0.1181846410036087, "learning_rate": 0.00015181531577341606, "loss": 0.9518, "step": 2375 }, { "epoch": 0.4830250050823338, "grad_norm": 0.11797620356082916, "learning_rate": 0.0001517949761008848, "loss": 0.9792, "step": 2376 }, { "epoch": 0.4832282984346412, "grad_norm": 0.11560335010290146, "learning_rate": 0.0001517746364283535, "loss": 0.9539, "step": 2377 }, { "epoch": 0.48343159178694856, "grad_norm": 0.1399577260017395, "learning_rate": 0.00015175429675582224, "loss": 1.1419, "step": 2378 }, { "epoch": 0.48363488513925595, "grad_norm": 0.12643292546272278, "learning_rate": 0.00015173395708329096, "loss": 1.07, "step": 2379 }, { "epoch": 0.48383817849156335, "grad_norm": 0.11252279579639435, "learning_rate": 0.0001517136174107597, "loss": 0.9261, "step": 2380 }, { "epoch": 0.4840414718438707, "grad_norm": 0.12694686651229858, "learning_rate": 0.0001516932777382284, "loss": 1.0992, "step": 2381 }, { "epoch": 0.4842447651961781, "grad_norm": 0.11446068435907364, "learning_rate": 0.00015167293806569716, "loss": 0.9807, "step": 2382 }, { "epoch": 0.48444805854848544, "grad_norm": 0.12001042813062668, "learning_rate": 0.0001516525983931659, "loss": 0.9139, "step": 2383 }, { "epoch": 0.48465135190079284, "grad_norm": 0.12721174955368042, "learning_rate": 0.0001516322587206346, "loss": 1.0866, "step": 2384 }, { "epoch": 0.48485464525310024, "grad_norm": 0.12574180960655212, "learning_rate": 0.00015161191904810334, "loss": 1.1573, "step": 2385 }, { "epoch": 0.4850579386054076, "grad_norm": 0.12667550146579742, "learning_rate": 0.00015159157937557206, "loss": 1.0796, "step": 2386 }, { "epoch": 0.485261231957715, "grad_norm": 0.13312119245529175, "learning_rate": 0.00015157123970304079, "loss": 1.261, "step": 2387 }, { "epoch": 0.4854645253100224, "grad_norm": 0.13041463494300842, "learning_rate": 0.0001515509000305095, "loss": 1.0956, "step": 2388 }, { "epoch": 0.4856678186623297, "grad_norm": 0.12114804238080978, "learning_rate": 0.00015153056035797823, "loss": 0.887, "step": 2389 }, { "epoch": 0.4858711120146371, "grad_norm": 0.144356831908226, "learning_rate": 0.00015151022068544699, "loss": 1.1402, "step": 2390 }, { "epoch": 0.4860744053669445, "grad_norm": 0.12829992175102234, "learning_rate": 0.0001514898810129157, "loss": 0.9359, "step": 2391 }, { "epoch": 0.48627769871925186, "grad_norm": 0.12318047136068344, "learning_rate": 0.00015146954134038443, "loss": 1.0306, "step": 2392 }, { "epoch": 0.48648099207155926, "grad_norm": 0.12492537498474121, "learning_rate": 0.00015144920166785316, "loss": 1.0563, "step": 2393 }, { "epoch": 0.48668428542386666, "grad_norm": 0.130072683095932, "learning_rate": 0.00015142886199532188, "loss": 1.1365, "step": 2394 }, { "epoch": 0.486887578776174, "grad_norm": 0.11817184090614319, "learning_rate": 0.0001514085223227906, "loss": 1.0596, "step": 2395 }, { "epoch": 0.4870908721284814, "grad_norm": 0.1323062777519226, "learning_rate": 0.00015138818265025933, "loss": 1.0337, "step": 2396 }, { "epoch": 0.4872941654807888, "grad_norm": 0.13455109298229218, "learning_rate": 0.00015136784297772806, "loss": 1.1477, "step": 2397 }, { "epoch": 0.48749745883309614, "grad_norm": 0.11852074414491653, "learning_rate": 0.0001513475033051968, "loss": 1.1026, "step": 2398 }, { "epoch": 0.48770075218540354, "grad_norm": 0.11237514764070511, "learning_rate": 0.00015132716363266553, "loss": 0.9384, "step": 2399 }, { "epoch": 0.48790404553771094, "grad_norm": 0.10929456353187561, "learning_rate": 0.00015130682396013426, "loss": 0.9844, "step": 2400 }, { "epoch": 0.4881073388900183, "grad_norm": 0.13844764232635498, "learning_rate": 0.00015128648428760298, "loss": 1.1771, "step": 2401 }, { "epoch": 0.4883106322423257, "grad_norm": 0.13155733048915863, "learning_rate": 0.0001512661446150717, "loss": 1.085, "step": 2402 }, { "epoch": 0.4885139255946331, "grad_norm": 0.13567966222763062, "learning_rate": 0.00015124580494254043, "loss": 1.089, "step": 2403 }, { "epoch": 0.4887172189469404, "grad_norm": 0.1349712312221527, "learning_rate": 0.00015122546527000916, "loss": 1.1356, "step": 2404 }, { "epoch": 0.4889205122992478, "grad_norm": 0.11694735288619995, "learning_rate": 0.00015120512559747788, "loss": 0.8803, "step": 2405 }, { "epoch": 0.4891238056515552, "grad_norm": 0.11674166470766068, "learning_rate": 0.00015118478592494663, "loss": 0.9611, "step": 2406 }, { "epoch": 0.48932709900386256, "grad_norm": 0.1268279105424881, "learning_rate": 0.00015116444625241536, "loss": 1.0746, "step": 2407 }, { "epoch": 0.48953039235616996, "grad_norm": 0.1330219954252243, "learning_rate": 0.00015114410657988408, "loss": 1.1476, "step": 2408 }, { "epoch": 0.4897336857084773, "grad_norm": 0.13246414065361023, "learning_rate": 0.0001511237669073528, "loss": 1.0918, "step": 2409 }, { "epoch": 0.4899369790607847, "grad_norm": 0.12214238941669464, "learning_rate": 0.00015110342723482153, "loss": 1.0092, "step": 2410 }, { "epoch": 0.4901402724130921, "grad_norm": 0.1193271204829216, "learning_rate": 0.00015108308756229025, "loss": 0.8615, "step": 2411 }, { "epoch": 0.49034356576539945, "grad_norm": 0.12478460371494293, "learning_rate": 0.00015106274788975898, "loss": 1.0334, "step": 2412 }, { "epoch": 0.49054685911770685, "grad_norm": 0.14054545760154724, "learning_rate": 0.0001510424082172277, "loss": 1.1424, "step": 2413 }, { "epoch": 0.49075015247001424, "grad_norm": 0.15053215622901917, "learning_rate": 0.00015102206854469643, "loss": 1.0335, "step": 2414 }, { "epoch": 0.4909534458223216, "grad_norm": 0.13923850655555725, "learning_rate": 0.00015100172887216518, "loss": 1.1668, "step": 2415 }, { "epoch": 0.491156739174629, "grad_norm": 0.13445380330085754, "learning_rate": 0.0001509813891996339, "loss": 1.1683, "step": 2416 }, { "epoch": 0.4913600325269364, "grad_norm": 0.14007751643657684, "learning_rate": 0.00015096104952710263, "loss": 1.1405, "step": 2417 }, { "epoch": 0.49156332587924373, "grad_norm": 0.1334713101387024, "learning_rate": 0.00015094070985457135, "loss": 1.1468, "step": 2418 }, { "epoch": 0.4917666192315511, "grad_norm": 0.12781627476215363, "learning_rate": 0.00015092037018204008, "loss": 1.1051, "step": 2419 }, { "epoch": 0.4919699125838585, "grad_norm": 0.1371796876192093, "learning_rate": 0.0001509000305095088, "loss": 1.1274, "step": 2420 }, { "epoch": 0.49217320593616587, "grad_norm": 0.15052980184555054, "learning_rate": 0.00015087969083697753, "loss": 1.1741, "step": 2421 }, { "epoch": 0.49237649928847327, "grad_norm": 0.12332694232463837, "learning_rate": 0.00015085935116444625, "loss": 1.0382, "step": 2422 }, { "epoch": 0.49257979264078067, "grad_norm": 0.1108141764998436, "learning_rate": 0.000150839011491915, "loss": 1.0011, "step": 2423 }, { "epoch": 0.492783085993088, "grad_norm": 0.13298697769641876, "learning_rate": 0.00015081867181938373, "loss": 1.1736, "step": 2424 }, { "epoch": 0.4929863793453954, "grad_norm": 0.1383012980222702, "learning_rate": 0.00015079833214685245, "loss": 1.2248, "step": 2425 }, { "epoch": 0.4931896726977028, "grad_norm": 0.13049232959747314, "learning_rate": 0.00015077799247432117, "loss": 1.0214, "step": 2426 }, { "epoch": 0.49339296605001015, "grad_norm": 0.14081017673015594, "learning_rate": 0.0001507576528017899, "loss": 1.1836, "step": 2427 }, { "epoch": 0.49359625940231755, "grad_norm": 0.14135879278182983, "learning_rate": 0.00015073731312925862, "loss": 1.0908, "step": 2428 }, { "epoch": 0.49379955275462495, "grad_norm": 0.12276162207126617, "learning_rate": 0.00015071697345672735, "loss": 1.0615, "step": 2429 }, { "epoch": 0.4940028461069323, "grad_norm": 0.13314439356327057, "learning_rate": 0.00015069663378419607, "loss": 1.0126, "step": 2430 }, { "epoch": 0.4942061394592397, "grad_norm": 0.13110828399658203, "learning_rate": 0.00015067629411166482, "loss": 1.0914, "step": 2431 }, { "epoch": 0.4944094328115471, "grad_norm": 0.14637964963912964, "learning_rate": 0.00015065595443913355, "loss": 1.1895, "step": 2432 }, { "epoch": 0.49461272616385443, "grad_norm": 0.13631272315979004, "learning_rate": 0.00015063561476660227, "loss": 1.0877, "step": 2433 }, { "epoch": 0.49481601951616183, "grad_norm": 0.12627999484539032, "learning_rate": 0.000150615275094071, "loss": 1.0902, "step": 2434 }, { "epoch": 0.4950193128684692, "grad_norm": 0.1452523022890091, "learning_rate": 0.00015059493542153972, "loss": 1.2531, "step": 2435 }, { "epoch": 0.4952226062207766, "grad_norm": 0.12937428057193756, "learning_rate": 0.00015057459574900845, "loss": 1.149, "step": 2436 }, { "epoch": 0.49542589957308397, "grad_norm": 0.1313169300556183, "learning_rate": 0.00015055425607647717, "loss": 1.1124, "step": 2437 }, { "epoch": 0.4956291929253913, "grad_norm": 0.13300736248493195, "learning_rate": 0.0001505339164039459, "loss": 1.2259, "step": 2438 }, { "epoch": 0.4958324862776987, "grad_norm": 0.12567725777626038, "learning_rate": 0.00015051357673141465, "loss": 1.1431, "step": 2439 }, { "epoch": 0.4960357796300061, "grad_norm": 0.12322575598955154, "learning_rate": 0.00015049323705888337, "loss": 1.0824, "step": 2440 }, { "epoch": 0.49623907298231346, "grad_norm": 0.11976869404315948, "learning_rate": 0.0001504728973863521, "loss": 1.057, "step": 2441 }, { "epoch": 0.49644236633462085, "grad_norm": 0.13577309250831604, "learning_rate": 0.00015045255771382082, "loss": 1.1027, "step": 2442 }, { "epoch": 0.49664565968692825, "grad_norm": 0.13949300348758698, "learning_rate": 0.00015043221804128954, "loss": 1.1102, "step": 2443 }, { "epoch": 0.4968489530392356, "grad_norm": 0.1493709832429886, "learning_rate": 0.00015041187836875827, "loss": 1.2468, "step": 2444 }, { "epoch": 0.497052246391543, "grad_norm": 0.13680393993854523, "learning_rate": 0.000150391538696227, "loss": 1.0607, "step": 2445 }, { "epoch": 0.4972555397438504, "grad_norm": 0.12200003862380981, "learning_rate": 0.00015037119902369572, "loss": 0.931, "step": 2446 }, { "epoch": 0.49745883309615774, "grad_norm": 0.12486010044813156, "learning_rate": 0.00015035085935116447, "loss": 1.0529, "step": 2447 }, { "epoch": 0.49766212644846514, "grad_norm": 0.14312241971492767, "learning_rate": 0.0001503305196786332, "loss": 1.1921, "step": 2448 }, { "epoch": 0.49786541980077254, "grad_norm": 0.1357506513595581, "learning_rate": 0.00015031018000610192, "loss": 1.0561, "step": 2449 }, { "epoch": 0.4980687131530799, "grad_norm": 0.14294788241386414, "learning_rate": 0.00015028984033357064, "loss": 0.9931, "step": 2450 }, { "epoch": 0.4982720065053873, "grad_norm": 0.1087241843342781, "learning_rate": 0.00015026950066103937, "loss": 1.0038, "step": 2451 }, { "epoch": 0.4984752998576947, "grad_norm": 0.11965546011924744, "learning_rate": 0.0001502491609885081, "loss": 1.0055, "step": 2452 }, { "epoch": 0.498678593210002, "grad_norm": 0.13440768420696259, "learning_rate": 0.00015022882131597682, "loss": 0.9883, "step": 2453 }, { "epoch": 0.4988818865623094, "grad_norm": 0.12496986985206604, "learning_rate": 0.00015020848164344554, "loss": 1.1175, "step": 2454 }, { "epoch": 0.4990851799146168, "grad_norm": 0.1410161703824997, "learning_rate": 0.00015018814197091427, "loss": 1.0976, "step": 2455 }, { "epoch": 0.49928847326692416, "grad_norm": 0.12262056767940521, "learning_rate": 0.00015016780229838302, "loss": 0.9429, "step": 2456 }, { "epoch": 0.49949176661923156, "grad_norm": 0.12424588203430176, "learning_rate": 0.00015014746262585174, "loss": 1.01, "step": 2457 }, { "epoch": 0.49969505997153896, "grad_norm": 0.12014136463403702, "learning_rate": 0.00015012712295332047, "loss": 1.0769, "step": 2458 }, { "epoch": 0.4998983533238463, "grad_norm": 0.12330099940299988, "learning_rate": 0.00015010678328078916, "loss": 0.9982, "step": 2459 }, { "epoch": 0.5001016466761536, "grad_norm": 0.12830835580825806, "learning_rate": 0.00015008644360825791, "loss": 1.1893, "step": 2460 }, { "epoch": 0.500304940028461, "grad_norm": 0.12516823410987854, "learning_rate": 0.00015006610393572664, "loss": 0.9871, "step": 2461 }, { "epoch": 0.5005082333807684, "grad_norm": 0.13631972670555115, "learning_rate": 0.00015004576426319536, "loss": 1.1454, "step": 2462 }, { "epoch": 0.5007115267330758, "grad_norm": 0.1340373456478119, "learning_rate": 0.0001500254245906641, "loss": 1.0478, "step": 2463 }, { "epoch": 0.5009148200853832, "grad_norm": 0.13719302415847778, "learning_rate": 0.00015000508491813284, "loss": 0.9423, "step": 2464 }, { "epoch": 0.5011181134376906, "grad_norm": 0.12467597424983978, "learning_rate": 0.00014998474524560156, "loss": 1.0493, "step": 2465 }, { "epoch": 0.5013214067899979, "grad_norm": 0.11926814168691635, "learning_rate": 0.0001499644055730703, "loss": 0.9844, "step": 2466 }, { "epoch": 0.5015247001423053, "grad_norm": 0.12327981740236282, "learning_rate": 0.00014994406590053899, "loss": 1.0336, "step": 2467 }, { "epoch": 0.5017279934946127, "grad_norm": 0.126510351896286, "learning_rate": 0.00014992372622800774, "loss": 1.0061, "step": 2468 }, { "epoch": 0.5019312868469201, "grad_norm": 0.12146785855293274, "learning_rate": 0.00014990338655547646, "loss": 1.011, "step": 2469 }, { "epoch": 0.5021345801992275, "grad_norm": 0.12402217090129852, "learning_rate": 0.00014988304688294519, "loss": 1.0177, "step": 2470 }, { "epoch": 0.5023378735515349, "grad_norm": 0.13122454285621643, "learning_rate": 0.0001498627072104139, "loss": 0.9743, "step": 2471 }, { "epoch": 0.5025411669038422, "grad_norm": 0.13217094540596008, "learning_rate": 0.00014984236753788266, "loss": 1.0227, "step": 2472 }, { "epoch": 0.5027444602561496, "grad_norm": 0.14987598359584808, "learning_rate": 0.0001498220278653514, "loss": 1.1522, "step": 2473 }, { "epoch": 0.502947753608457, "grad_norm": 0.13689711689949036, "learning_rate": 0.0001498016881928201, "loss": 1.0014, "step": 2474 }, { "epoch": 0.5031510469607644, "grad_norm": 0.11815892159938812, "learning_rate": 0.00014978134852028884, "loss": 0.8749, "step": 2475 }, { "epoch": 0.5033543403130718, "grad_norm": 0.11772647500038147, "learning_rate": 0.00014976100884775756, "loss": 0.9894, "step": 2476 }, { "epoch": 0.5035576336653791, "grad_norm": 0.13443076610565186, "learning_rate": 0.00014974066917522628, "loss": 1.1191, "step": 2477 }, { "epoch": 0.5037609270176865, "grad_norm": 0.13787920773029327, "learning_rate": 0.000149720329502695, "loss": 1.0513, "step": 2478 }, { "epoch": 0.5039642203699939, "grad_norm": 0.13152827322483063, "learning_rate": 0.00014969998983016373, "loss": 1.1012, "step": 2479 }, { "epoch": 0.5041675137223013, "grad_norm": 0.12392322719097137, "learning_rate": 0.00014967965015763249, "loss": 1.0068, "step": 2480 }, { "epoch": 0.5043708070746087, "grad_norm": 0.13253094255924225, "learning_rate": 0.0001496593104851012, "loss": 1.0683, "step": 2481 }, { "epoch": 0.5045741004269161, "grad_norm": 0.12664328515529633, "learning_rate": 0.00014963897081256993, "loss": 1.0333, "step": 2482 }, { "epoch": 0.5047773937792234, "grad_norm": 0.13020643591880798, "learning_rate": 0.00014961863114003866, "loss": 1.0567, "step": 2483 }, { "epoch": 0.5049806871315308, "grad_norm": 0.1261332482099533, "learning_rate": 0.00014959829146750738, "loss": 0.9574, "step": 2484 }, { "epoch": 0.5051839804838382, "grad_norm": 0.13825035095214844, "learning_rate": 0.0001495779517949761, "loss": 1.2168, "step": 2485 }, { "epoch": 0.5053872738361456, "grad_norm": 0.1333974152803421, "learning_rate": 0.00014955761212244483, "loss": 1.0775, "step": 2486 }, { "epoch": 0.505590567188453, "grad_norm": 0.12436322122812271, "learning_rate": 0.00014953727244991356, "loss": 1.0043, "step": 2487 }, { "epoch": 0.5057938605407604, "grad_norm": 0.13626371324062347, "learning_rate": 0.0001495169327773823, "loss": 1.1736, "step": 2488 }, { "epoch": 0.5059971538930677, "grad_norm": 0.13061967492103577, "learning_rate": 0.00014949659310485103, "loss": 0.9288, "step": 2489 }, { "epoch": 0.506200447245375, "grad_norm": 0.12033544480800629, "learning_rate": 0.00014947625343231976, "loss": 1.0222, "step": 2490 }, { "epoch": 0.5064037405976825, "grad_norm": 0.14046040177345276, "learning_rate": 0.00014945591375978848, "loss": 1.1382, "step": 2491 }, { "epoch": 0.5066070339499898, "grad_norm": 0.1174360066652298, "learning_rate": 0.0001494355740872572, "loss": 0.8663, "step": 2492 }, { "epoch": 0.5068103273022972, "grad_norm": 0.13645724952220917, "learning_rate": 0.00014941523441472593, "loss": 1.1139, "step": 2493 }, { "epoch": 0.5070136206546046, "grad_norm": 0.1309158205986023, "learning_rate": 0.00014939489474219465, "loss": 1.1784, "step": 2494 }, { "epoch": 0.5072169140069119, "grad_norm": 0.12230408936738968, "learning_rate": 0.00014937455506966338, "loss": 1.1231, "step": 2495 }, { "epoch": 0.5074202073592193, "grad_norm": 0.1440531313419342, "learning_rate": 0.0001493542153971321, "loss": 1.2055, "step": 2496 }, { "epoch": 0.5076235007115267, "grad_norm": 0.13199447095394135, "learning_rate": 0.00014933387572460086, "loss": 1.1044, "step": 2497 }, { "epoch": 0.5078267940638341, "grad_norm": 0.12693634629249573, "learning_rate": 0.00014931353605206958, "loss": 1.0495, "step": 2498 }, { "epoch": 0.5080300874161415, "grad_norm": 0.1196681559085846, "learning_rate": 0.0001492931963795383, "loss": 0.9505, "step": 2499 }, { "epoch": 0.5082333807684488, "grad_norm": 0.1331620216369629, "learning_rate": 0.000149272856707007, "loss": 1.0419, "step": 2500 }, { "epoch": 0.5084366741207562, "grad_norm": 0.12307044863700867, "learning_rate": 0.00014925251703447575, "loss": 1.0216, "step": 2501 }, { "epoch": 0.5086399674730636, "grad_norm": 0.14936399459838867, "learning_rate": 0.00014923217736194448, "loss": 1.2894, "step": 2502 }, { "epoch": 0.508843260825371, "grad_norm": 0.1165819764137268, "learning_rate": 0.0001492118376894132, "loss": 1.0173, "step": 2503 }, { "epoch": 0.5090465541776784, "grad_norm": 0.13525764644145966, "learning_rate": 0.00014919149801688193, "loss": 1.0883, "step": 2504 }, { "epoch": 0.5092498475299858, "grad_norm": 0.13654504716396332, "learning_rate": 0.00014917115834435068, "loss": 0.9356, "step": 2505 }, { "epoch": 0.5094531408822931, "grad_norm": 0.12151267379522324, "learning_rate": 0.0001491508186718194, "loss": 0.9508, "step": 2506 }, { "epoch": 0.5096564342346005, "grad_norm": 0.13334833085536957, "learning_rate": 0.00014913047899928813, "loss": 1.2175, "step": 2507 }, { "epoch": 0.5098597275869079, "grad_norm": 0.13975641131401062, "learning_rate": 0.00014911013932675682, "loss": 1.0501, "step": 2508 }, { "epoch": 0.5100630209392153, "grad_norm": 0.13203707337379456, "learning_rate": 0.00014908979965422558, "loss": 0.9835, "step": 2509 }, { "epoch": 0.5102663142915227, "grad_norm": 0.154182568192482, "learning_rate": 0.0001490694599816943, "loss": 1.1632, "step": 2510 }, { "epoch": 0.5104696076438301, "grad_norm": 0.13297821581363678, "learning_rate": 0.00014904912030916302, "loss": 0.9965, "step": 2511 }, { "epoch": 0.5106729009961374, "grad_norm": 0.123105987906456, "learning_rate": 0.00014902878063663175, "loss": 0.9264, "step": 2512 }, { "epoch": 0.5108761943484448, "grad_norm": 0.1457197219133377, "learning_rate": 0.0001490084409641005, "loss": 1.1452, "step": 2513 }, { "epoch": 0.5110794877007522, "grad_norm": 0.12882955372333527, "learning_rate": 0.00014898810129156923, "loss": 1.0295, "step": 2514 }, { "epoch": 0.5112827810530596, "grad_norm": 0.1381346881389618, "learning_rate": 0.00014896776161903795, "loss": 1.1416, "step": 2515 }, { "epoch": 0.511486074405367, "grad_norm": 0.12074743956327438, "learning_rate": 0.00014894742194650665, "loss": 0.9549, "step": 2516 }, { "epoch": 0.5116893677576744, "grad_norm": 0.12559756636619568, "learning_rate": 0.0001489270822739754, "loss": 1.014, "step": 2517 }, { "epoch": 0.5118926611099817, "grad_norm": 0.13586939871311188, "learning_rate": 0.00014890674260144412, "loss": 1.1621, "step": 2518 }, { "epoch": 0.5120959544622891, "grad_norm": 0.1177433580160141, "learning_rate": 0.00014888640292891285, "loss": 0.9666, "step": 2519 }, { "epoch": 0.5122992478145965, "grad_norm": 0.12881316244602203, "learning_rate": 0.00014886606325638157, "loss": 1.1345, "step": 2520 }, { "epoch": 0.5125025411669039, "grad_norm": 0.1258634775876999, "learning_rate": 0.00014884572358385032, "loss": 1.05, "step": 2521 }, { "epoch": 0.5127058345192113, "grad_norm": 0.12486784160137177, "learning_rate": 0.00014882538391131905, "loss": 1.144, "step": 2522 }, { "epoch": 0.5129091278715187, "grad_norm": 0.13641564548015594, "learning_rate": 0.00014880504423878777, "loss": 1.2183, "step": 2523 }, { "epoch": 0.5131124212238259, "grad_norm": 0.13277971744537354, "learning_rate": 0.00014878470456625647, "loss": 1.1815, "step": 2524 }, { "epoch": 0.5133157145761333, "grad_norm": 0.14261163771152496, "learning_rate": 0.00014876436489372522, "loss": 1.217, "step": 2525 }, { "epoch": 0.5135190079284407, "grad_norm": 0.13848505914211273, "learning_rate": 0.00014874402522119395, "loss": 1.2031, "step": 2526 }, { "epoch": 0.5137223012807481, "grad_norm": 0.10906849801540375, "learning_rate": 0.00014872368554866267, "loss": 0.9407, "step": 2527 }, { "epoch": 0.5139255946330555, "grad_norm": 0.13533109426498413, "learning_rate": 0.0001487033458761314, "loss": 1.1576, "step": 2528 }, { "epoch": 0.5141288879853628, "grad_norm": 0.13062264025211334, "learning_rate": 0.00014868300620360015, "loss": 1.1019, "step": 2529 }, { "epoch": 0.5143321813376702, "grad_norm": 0.1373278796672821, "learning_rate": 0.00014866266653106887, "loss": 0.9672, "step": 2530 }, { "epoch": 0.5145354746899776, "grad_norm": 0.15875272452831268, "learning_rate": 0.0001486423268585376, "loss": 1.3291, "step": 2531 }, { "epoch": 0.514738768042285, "grad_norm": 0.1146063432097435, "learning_rate": 0.0001486219871860063, "loss": 1.0362, "step": 2532 }, { "epoch": 0.5149420613945924, "grad_norm": 0.13759560883045197, "learning_rate": 0.00014860164751347504, "loss": 1.0423, "step": 2533 }, { "epoch": 0.5151453547468998, "grad_norm": 0.1348053216934204, "learning_rate": 0.00014858130784094377, "loss": 1.0733, "step": 2534 }, { "epoch": 0.5153486480992071, "grad_norm": 0.12033452838659286, "learning_rate": 0.0001485609681684125, "loss": 0.9471, "step": 2535 }, { "epoch": 0.5155519414515145, "grad_norm": 0.12116893380880356, "learning_rate": 0.00014854062849588122, "loss": 0.8554, "step": 2536 }, { "epoch": 0.5157552348038219, "grad_norm": 0.13480456173419952, "learning_rate": 0.00014852028882334994, "loss": 1.0257, "step": 2537 }, { "epoch": 0.5159585281561293, "grad_norm": 0.1279120147228241, "learning_rate": 0.0001484999491508187, "loss": 1.1841, "step": 2538 }, { "epoch": 0.5161618215084367, "grad_norm": 0.12960465252399445, "learning_rate": 0.00014847960947828742, "loss": 1.022, "step": 2539 }, { "epoch": 0.5163651148607441, "grad_norm": 0.12386467307806015, "learning_rate": 0.00014845926980575614, "loss": 0.9364, "step": 2540 }, { "epoch": 0.5165684082130514, "grad_norm": 0.1340230405330658, "learning_rate": 0.00014843893013322484, "loss": 1.1693, "step": 2541 }, { "epoch": 0.5167717015653588, "grad_norm": 0.13475503027439117, "learning_rate": 0.0001484185904606936, "loss": 1.1208, "step": 2542 }, { "epoch": 0.5169749949176662, "grad_norm": 0.13605645298957825, "learning_rate": 0.00014839825078816232, "loss": 1.0327, "step": 2543 }, { "epoch": 0.5171782882699736, "grad_norm": 0.11159854382276535, "learning_rate": 0.00014837791111563104, "loss": 0.9095, "step": 2544 }, { "epoch": 0.517381581622281, "grad_norm": 0.12562917172908783, "learning_rate": 0.00014835757144309976, "loss": 1.0037, "step": 2545 }, { "epoch": 0.5175848749745884, "grad_norm": 0.12805363535881042, "learning_rate": 0.00014833723177056852, "loss": 1.053, "step": 2546 }, { "epoch": 0.5177881683268957, "grad_norm": 0.1303015947341919, "learning_rate": 0.00014831689209803724, "loss": 1.0277, "step": 2547 }, { "epoch": 0.5179914616792031, "grad_norm": 0.13903219997882843, "learning_rate": 0.00014829655242550597, "loss": 1.1639, "step": 2548 }, { "epoch": 0.5181947550315105, "grad_norm": 0.13119028508663177, "learning_rate": 0.00014827621275297466, "loss": 0.9134, "step": 2549 }, { "epoch": 0.5183980483838179, "grad_norm": 0.12713825702667236, "learning_rate": 0.00014825587308044341, "loss": 1.0313, "step": 2550 }, { "epoch": 0.5186013417361253, "grad_norm": 0.13641834259033203, "learning_rate": 0.00014823553340791214, "loss": 1.0787, "step": 2551 }, { "epoch": 0.5188046350884326, "grad_norm": 0.1124555915594101, "learning_rate": 0.00014821519373538086, "loss": 0.9135, "step": 2552 }, { "epoch": 0.51900792844074, "grad_norm": 0.10946158319711685, "learning_rate": 0.0001481948540628496, "loss": 0.8105, "step": 2553 }, { "epoch": 0.5192112217930474, "grad_norm": 0.12753844261169434, "learning_rate": 0.00014817451439031834, "loss": 1.0308, "step": 2554 }, { "epoch": 0.5194145151453547, "grad_norm": 0.14424805343151093, "learning_rate": 0.00014815417471778706, "loss": 1.0104, "step": 2555 }, { "epoch": 0.5196178084976621, "grad_norm": 0.13107620179653168, "learning_rate": 0.0001481338350452558, "loss": 1.1754, "step": 2556 }, { "epoch": 0.5198211018499695, "grad_norm": 0.11977977305650711, "learning_rate": 0.00014811349537272449, "loss": 1.0019, "step": 2557 }, { "epoch": 0.5200243952022768, "grad_norm": 0.11917620897293091, "learning_rate": 0.00014809315570019324, "loss": 0.9636, "step": 2558 }, { "epoch": 0.5202276885545842, "grad_norm": 0.12576279044151306, "learning_rate": 0.00014807281602766196, "loss": 1.1341, "step": 2559 }, { "epoch": 0.5204309819068916, "grad_norm": 0.1402411311864853, "learning_rate": 0.00014805247635513069, "loss": 1.1186, "step": 2560 }, { "epoch": 0.520634275259199, "grad_norm": 0.15055212378501892, "learning_rate": 0.0001480321366825994, "loss": 1.1878, "step": 2561 }, { "epoch": 0.5208375686115064, "grad_norm": 0.11402598023414612, "learning_rate": 0.00014801179701006816, "loss": 0.7465, "step": 2562 }, { "epoch": 0.5210408619638138, "grad_norm": 0.12650637328624725, "learning_rate": 0.0001479914573375369, "loss": 1.0605, "step": 2563 }, { "epoch": 0.5212441553161211, "grad_norm": 0.13538390398025513, "learning_rate": 0.0001479711176650056, "loss": 0.9924, "step": 2564 }, { "epoch": 0.5214474486684285, "grad_norm": 0.12981672585010529, "learning_rate": 0.0001479507779924743, "loss": 1.0908, "step": 2565 }, { "epoch": 0.5216507420207359, "grad_norm": 0.13389542698860168, "learning_rate": 0.00014793043831994306, "loss": 1.0369, "step": 2566 }, { "epoch": 0.5218540353730433, "grad_norm": 0.1256348341703415, "learning_rate": 0.00014791009864741178, "loss": 1.1209, "step": 2567 }, { "epoch": 0.5220573287253507, "grad_norm": 0.13984240591526031, "learning_rate": 0.0001478897589748805, "loss": 1.2199, "step": 2568 }, { "epoch": 0.5222606220776581, "grad_norm": 0.12872397899627686, "learning_rate": 0.00014786941930234923, "loss": 1.0793, "step": 2569 }, { "epoch": 0.5224639154299654, "grad_norm": 0.12694962322711945, "learning_rate": 0.00014784907962981798, "loss": 0.9623, "step": 2570 }, { "epoch": 0.5226672087822728, "grad_norm": 0.13034392893314362, "learning_rate": 0.0001478287399572867, "loss": 1.2404, "step": 2571 }, { "epoch": 0.5228705021345802, "grad_norm": 0.1416521519422531, "learning_rate": 0.00014780840028475543, "loss": 1.2426, "step": 2572 }, { "epoch": 0.5230737954868876, "grad_norm": 0.12421387434005737, "learning_rate": 0.00014778806061222413, "loss": 1.0685, "step": 2573 }, { "epoch": 0.523277088839195, "grad_norm": 0.1387767344713211, "learning_rate": 0.00014776772093969288, "loss": 1.202, "step": 2574 }, { "epoch": 0.5234803821915024, "grad_norm": 0.13308827579021454, "learning_rate": 0.0001477473812671616, "loss": 1.2395, "step": 2575 }, { "epoch": 0.5236836755438097, "grad_norm": 0.15293751657009125, "learning_rate": 0.00014772704159463033, "loss": 1.1062, "step": 2576 }, { "epoch": 0.5238869688961171, "grad_norm": 0.1332782655954361, "learning_rate": 0.00014770670192209906, "loss": 1.1205, "step": 2577 }, { "epoch": 0.5240902622484245, "grad_norm": 0.11857607960700989, "learning_rate": 0.0001476863622495678, "loss": 1.1111, "step": 2578 }, { "epoch": 0.5242935556007319, "grad_norm": 0.13509269058704376, "learning_rate": 0.00014766602257703653, "loss": 1.0806, "step": 2579 }, { "epoch": 0.5244968489530393, "grad_norm": 0.12904144823551178, "learning_rate": 0.00014764568290450526, "loss": 1.021, "step": 2580 }, { "epoch": 0.5247001423053466, "grad_norm": 0.1381101906299591, "learning_rate": 0.00014762534323197395, "loss": 1.2025, "step": 2581 }, { "epoch": 0.524903435657654, "grad_norm": 0.13160142302513123, "learning_rate": 0.00014760500355944268, "loss": 1.0126, "step": 2582 }, { "epoch": 0.5251067290099614, "grad_norm": 0.14287696778774261, "learning_rate": 0.00014758466388691143, "loss": 1.1466, "step": 2583 }, { "epoch": 0.5253100223622688, "grad_norm": 0.13337363302707672, "learning_rate": 0.00014756432421438015, "loss": 1.0036, "step": 2584 }, { "epoch": 0.5255133157145762, "grad_norm": 0.14575807750225067, "learning_rate": 0.00014754398454184888, "loss": 1.1033, "step": 2585 }, { "epoch": 0.5257166090668836, "grad_norm": 0.12519006431102753, "learning_rate": 0.0001475236448693176, "loss": 1.004, "step": 2586 }, { "epoch": 0.5259199024191908, "grad_norm": 0.12951436638832092, "learning_rate": 0.00014750330519678635, "loss": 0.9679, "step": 2587 }, { "epoch": 0.5261231957714982, "grad_norm": 0.1465519517660141, "learning_rate": 0.00014748296552425508, "loss": 1.19, "step": 2588 }, { "epoch": 0.5263264891238056, "grad_norm": 0.12192967534065247, "learning_rate": 0.00014746262585172378, "loss": 1.0086, "step": 2589 }, { "epoch": 0.526529782476113, "grad_norm": 0.13444490730762482, "learning_rate": 0.0001474422861791925, "loss": 0.9185, "step": 2590 }, { "epoch": 0.5267330758284204, "grad_norm": 0.13128428161144257, "learning_rate": 0.00014742194650666125, "loss": 0.8979, "step": 2591 }, { "epoch": 0.5269363691807278, "grad_norm": 0.14445891976356506, "learning_rate": 0.00014740160683412998, "loss": 1.1736, "step": 2592 }, { "epoch": 0.5271396625330351, "grad_norm": 0.13069060444831848, "learning_rate": 0.0001473812671615987, "loss": 1.0791, "step": 2593 }, { "epoch": 0.5273429558853425, "grad_norm": 0.11903716623783112, "learning_rate": 0.00014736092748906743, "loss": 1.0983, "step": 2594 }, { "epoch": 0.5275462492376499, "grad_norm": 0.14502301812171936, "learning_rate": 0.00014734058781653618, "loss": 1.1266, "step": 2595 }, { "epoch": 0.5277495425899573, "grad_norm": 0.12276476621627808, "learning_rate": 0.0001473202481440049, "loss": 0.9658, "step": 2596 }, { "epoch": 0.5279528359422647, "grad_norm": 0.1322438269853592, "learning_rate": 0.00014729990847147363, "loss": 1.068, "step": 2597 }, { "epoch": 0.5281561292945721, "grad_norm": 0.12933704257011414, "learning_rate": 0.00014727956879894232, "loss": 0.9955, "step": 2598 }, { "epoch": 0.5283594226468794, "grad_norm": 0.13503174483776093, "learning_rate": 0.00014725922912641108, "loss": 1.117, "step": 2599 }, { "epoch": 0.5285627159991868, "grad_norm": 0.13893373310565948, "learning_rate": 0.0001472388894538798, "loss": 1.1355, "step": 2600 }, { "epoch": 0.5287660093514942, "grad_norm": 0.13064657151699066, "learning_rate": 0.00014721854978134852, "loss": 0.9111, "step": 2601 }, { "epoch": 0.5289693027038016, "grad_norm": 0.13640174269676208, "learning_rate": 0.00014719821010881725, "loss": 1.1903, "step": 2602 }, { "epoch": 0.529172596056109, "grad_norm": 0.13113752007484436, "learning_rate": 0.000147177870436286, "loss": 0.926, "step": 2603 }, { "epoch": 0.5293758894084164, "grad_norm": 0.15011656284332275, "learning_rate": 0.00014715753076375472, "loss": 1.0861, "step": 2604 }, { "epoch": 0.5295791827607237, "grad_norm": 0.1330660730600357, "learning_rate": 0.00014713719109122345, "loss": 1.1027, "step": 2605 }, { "epoch": 0.5297824761130311, "grad_norm": 0.1252673864364624, "learning_rate": 0.00014711685141869215, "loss": 0.9304, "step": 2606 }, { "epoch": 0.5299857694653385, "grad_norm": 0.12724831700325012, "learning_rate": 0.0001470965117461609, "loss": 1.0286, "step": 2607 }, { "epoch": 0.5301890628176459, "grad_norm": 0.12352915853261948, "learning_rate": 0.00014707617207362962, "loss": 1.0159, "step": 2608 }, { "epoch": 0.5303923561699533, "grad_norm": 0.1302500218153, "learning_rate": 0.00014705583240109835, "loss": 1.0642, "step": 2609 }, { "epoch": 0.5305956495222606, "grad_norm": 0.12427016347646713, "learning_rate": 0.00014703549272856707, "loss": 0.9496, "step": 2610 }, { "epoch": 0.530798942874568, "grad_norm": 0.13810168206691742, "learning_rate": 0.00014701515305603582, "loss": 1.0421, "step": 2611 }, { "epoch": 0.5310022362268754, "grad_norm": 0.1359987109899521, "learning_rate": 0.00014699481338350455, "loss": 0.9605, "step": 2612 }, { "epoch": 0.5312055295791828, "grad_norm": 0.1282379925251007, "learning_rate": 0.00014697447371097327, "loss": 1.0654, "step": 2613 }, { "epoch": 0.5314088229314902, "grad_norm": 0.1283995509147644, "learning_rate": 0.00014695413403844197, "loss": 1.0312, "step": 2614 }, { "epoch": 0.5316121162837976, "grad_norm": 0.12052475661039352, "learning_rate": 0.00014693379436591072, "loss": 1.1057, "step": 2615 }, { "epoch": 0.5318154096361049, "grad_norm": 0.13645312190055847, "learning_rate": 0.00014691345469337945, "loss": 1.0701, "step": 2616 }, { "epoch": 0.5320187029884123, "grad_norm": 0.13875778019428253, "learning_rate": 0.00014689311502084817, "loss": 1.2868, "step": 2617 }, { "epoch": 0.5322219963407196, "grad_norm": 0.12762780487537384, "learning_rate": 0.0001468727753483169, "loss": 0.9696, "step": 2618 }, { "epoch": 0.532425289693027, "grad_norm": 0.14250846207141876, "learning_rate": 0.00014685243567578565, "loss": 1.1762, "step": 2619 }, { "epoch": 0.5326285830453344, "grad_norm": 0.10621387511491776, "learning_rate": 0.00014683209600325437, "loss": 0.8195, "step": 2620 }, { "epoch": 0.5328318763976418, "grad_norm": 0.14604990184307098, "learning_rate": 0.0001468117563307231, "loss": 1.0794, "step": 2621 }, { "epoch": 0.5330351697499491, "grad_norm": 0.13326723873615265, "learning_rate": 0.0001467914166581918, "loss": 1.0018, "step": 2622 }, { "epoch": 0.5332384631022565, "grad_norm": 0.12089519202709198, "learning_rate": 0.00014677107698566052, "loss": 1.0615, "step": 2623 }, { "epoch": 0.5334417564545639, "grad_norm": 0.1269814819097519, "learning_rate": 0.00014675073731312927, "loss": 0.9591, "step": 2624 }, { "epoch": 0.5336450498068713, "grad_norm": 0.13674674928188324, "learning_rate": 0.000146730397640598, "loss": 1.0315, "step": 2625 }, { "epoch": 0.5338483431591787, "grad_norm": 0.1372392177581787, "learning_rate": 0.00014671005796806672, "loss": 1.0483, "step": 2626 }, { "epoch": 0.5340516365114861, "grad_norm": 0.12088494002819061, "learning_rate": 0.00014668971829553544, "loss": 0.9172, "step": 2627 }, { "epoch": 0.5342549298637934, "grad_norm": 0.1240740641951561, "learning_rate": 0.0001466693786230042, "loss": 1.0149, "step": 2628 }, { "epoch": 0.5344582232161008, "grad_norm": 0.13450276851654053, "learning_rate": 0.00014664903895047292, "loss": 1.0719, "step": 2629 }, { "epoch": 0.5346615165684082, "grad_norm": 0.12809321284294128, "learning_rate": 0.00014662869927794162, "loss": 0.9452, "step": 2630 }, { "epoch": 0.5348648099207156, "grad_norm": 0.1411091536283493, "learning_rate": 0.00014660835960541034, "loss": 1.2103, "step": 2631 }, { "epoch": 0.535068103273023, "grad_norm": 0.12086781114339828, "learning_rate": 0.0001465880199328791, "loss": 0.8857, "step": 2632 }, { "epoch": 0.5352713966253303, "grad_norm": 0.13093651831150055, "learning_rate": 0.00014656768026034782, "loss": 1.1842, "step": 2633 }, { "epoch": 0.5354746899776377, "grad_norm": 0.11652904748916626, "learning_rate": 0.00014654734058781654, "loss": 1.0096, "step": 2634 }, { "epoch": 0.5356779833299451, "grad_norm": 0.13243702054023743, "learning_rate": 0.00014652700091528526, "loss": 1.1378, "step": 2635 }, { "epoch": 0.5358812766822525, "grad_norm": 0.14085280895233154, "learning_rate": 0.00014650666124275402, "loss": 1.0759, "step": 2636 }, { "epoch": 0.5360845700345599, "grad_norm": 0.126717671751976, "learning_rate": 0.00014648632157022274, "loss": 1.0012, "step": 2637 }, { "epoch": 0.5362878633868673, "grad_norm": 0.12660568952560425, "learning_rate": 0.00014646598189769144, "loss": 0.9195, "step": 2638 }, { "epoch": 0.5364911567391746, "grad_norm": 0.12521329522132874, "learning_rate": 0.00014644564222516016, "loss": 1.1181, "step": 2639 }, { "epoch": 0.536694450091482, "grad_norm": 0.1392340511083603, "learning_rate": 0.00014642530255262891, "loss": 1.1255, "step": 2640 }, { "epoch": 0.5368977434437894, "grad_norm": 0.1406872570514679, "learning_rate": 0.00014640496288009764, "loss": 1.0753, "step": 2641 }, { "epoch": 0.5371010367960968, "grad_norm": 0.12615209817886353, "learning_rate": 0.00014638462320756636, "loss": 0.9859, "step": 2642 }, { "epoch": 0.5373043301484042, "grad_norm": 0.12144862115383148, "learning_rate": 0.0001463642835350351, "loss": 0.9186, "step": 2643 }, { "epoch": 0.5375076235007116, "grad_norm": 0.12902086973190308, "learning_rate": 0.00014634394386250384, "loss": 1.1739, "step": 2644 }, { "epoch": 0.5377109168530189, "grad_norm": 0.12960048019886017, "learning_rate": 0.00014632360418997256, "loss": 1.0439, "step": 2645 }, { "epoch": 0.5379142102053263, "grad_norm": 0.12488772720098495, "learning_rate": 0.00014630326451744126, "loss": 1.0363, "step": 2646 }, { "epoch": 0.5381175035576337, "grad_norm": 0.14255747199058533, "learning_rate": 0.00014628292484490999, "loss": 1.1972, "step": 2647 }, { "epoch": 0.5383207969099411, "grad_norm": 0.11950040608644485, "learning_rate": 0.00014626258517237874, "loss": 0.931, "step": 2648 }, { "epoch": 0.5385240902622485, "grad_norm": 0.1382722705602646, "learning_rate": 0.00014624224549984746, "loss": 1.1622, "step": 2649 }, { "epoch": 0.5387273836145559, "grad_norm": 0.13348785042762756, "learning_rate": 0.00014622190582731619, "loss": 1.1186, "step": 2650 }, { "epoch": 0.5389306769668631, "grad_norm": 0.1255137175321579, "learning_rate": 0.0001462015661547849, "loss": 1.1545, "step": 2651 }, { "epoch": 0.5391339703191705, "grad_norm": 0.12063666433095932, "learning_rate": 0.00014618122648225366, "loss": 0.9628, "step": 2652 }, { "epoch": 0.5393372636714779, "grad_norm": 0.1361551582813263, "learning_rate": 0.00014616088680972239, "loss": 1.1738, "step": 2653 }, { "epoch": 0.5395405570237853, "grad_norm": 0.14640627801418304, "learning_rate": 0.0001461405471371911, "loss": 1.2436, "step": 2654 }, { "epoch": 0.5397438503760927, "grad_norm": 0.13391757011413574, "learning_rate": 0.0001461202074646598, "loss": 1.0063, "step": 2655 }, { "epoch": 0.5399471437284001, "grad_norm": 0.13022476434707642, "learning_rate": 0.00014609986779212856, "loss": 0.9757, "step": 2656 }, { "epoch": 0.5401504370807074, "grad_norm": 0.12605974078178406, "learning_rate": 0.00014607952811959728, "loss": 1.0168, "step": 2657 }, { "epoch": 0.5403537304330148, "grad_norm": 0.12972256541252136, "learning_rate": 0.000146059188447066, "loss": 1.1375, "step": 2658 }, { "epoch": 0.5405570237853222, "grad_norm": 0.12093812972307205, "learning_rate": 0.00014603884877453473, "loss": 1.0279, "step": 2659 }, { "epoch": 0.5407603171376296, "grad_norm": 0.13197238743305206, "learning_rate": 0.00014601850910200348, "loss": 1.1084, "step": 2660 }, { "epoch": 0.540963610489937, "grad_norm": 0.14289307594299316, "learning_rate": 0.0001459981694294722, "loss": 0.9985, "step": 2661 }, { "epoch": 0.5411669038422443, "grad_norm": 0.12929311394691467, "learning_rate": 0.00014597782975694093, "loss": 1.1129, "step": 2662 }, { "epoch": 0.5413701971945517, "grad_norm": 0.12893937528133392, "learning_rate": 0.00014595749008440963, "loss": 0.9588, "step": 2663 }, { "epoch": 0.5415734905468591, "grad_norm": 0.1215519979596138, "learning_rate": 0.00014593715041187836, "loss": 1.0356, "step": 2664 }, { "epoch": 0.5417767838991665, "grad_norm": 0.12775017321109772, "learning_rate": 0.0001459168107393471, "loss": 0.9293, "step": 2665 }, { "epoch": 0.5419800772514739, "grad_norm": 0.13559330999851227, "learning_rate": 0.00014589647106681583, "loss": 1.0579, "step": 2666 }, { "epoch": 0.5421833706037813, "grad_norm": 0.13883750140666962, "learning_rate": 0.00014587613139428456, "loss": 1.1288, "step": 2667 }, { "epoch": 0.5423866639560886, "grad_norm": 0.12956243753433228, "learning_rate": 0.00014585579172175328, "loss": 1.0156, "step": 2668 }, { "epoch": 0.542589957308396, "grad_norm": 0.12133780866861343, "learning_rate": 0.00014583545204922203, "loss": 1.0124, "step": 2669 }, { "epoch": 0.5427932506607034, "grad_norm": 0.13446684181690216, "learning_rate": 0.00014581511237669076, "loss": 1.1288, "step": 2670 }, { "epoch": 0.5429965440130108, "grad_norm": 0.1329856663942337, "learning_rate": 0.00014579477270415945, "loss": 1.009, "step": 2671 }, { "epoch": 0.5431998373653182, "grad_norm": 0.1257416158914566, "learning_rate": 0.00014577443303162818, "loss": 0.9678, "step": 2672 }, { "epoch": 0.5434031307176256, "grad_norm": 0.118684783577919, "learning_rate": 0.00014575409335909693, "loss": 0.9732, "step": 2673 }, { "epoch": 0.5436064240699329, "grad_norm": 0.12146252393722534, "learning_rate": 0.00014573375368656565, "loss": 1.0225, "step": 2674 }, { "epoch": 0.5438097174222403, "grad_norm": 0.13205134868621826, "learning_rate": 0.00014571341401403438, "loss": 1.1626, "step": 2675 }, { "epoch": 0.5440130107745477, "grad_norm": 0.1180446445941925, "learning_rate": 0.0001456930743415031, "loss": 0.9787, "step": 2676 }, { "epoch": 0.5442163041268551, "grad_norm": 0.12436480075120926, "learning_rate": 0.00014567273466897185, "loss": 0.9211, "step": 2677 }, { "epoch": 0.5444195974791625, "grad_norm": 0.13441622257232666, "learning_rate": 0.00014565239499644058, "loss": 1.082, "step": 2678 }, { "epoch": 0.5446228908314699, "grad_norm": 0.13546496629714966, "learning_rate": 0.00014563205532390928, "loss": 0.9564, "step": 2679 }, { "epoch": 0.5448261841837772, "grad_norm": 0.13210104405879974, "learning_rate": 0.000145611715651378, "loss": 1.1043, "step": 2680 }, { "epoch": 0.5450294775360846, "grad_norm": 0.12021714448928833, "learning_rate": 0.00014559137597884675, "loss": 1.0093, "step": 2681 }, { "epoch": 0.545232770888392, "grad_norm": 0.14060239493846893, "learning_rate": 0.00014557103630631548, "loss": 0.8708, "step": 2682 }, { "epoch": 0.5454360642406993, "grad_norm": 0.10503846406936646, "learning_rate": 0.0001455506966337842, "loss": 0.8049, "step": 2683 }, { "epoch": 0.5456393575930067, "grad_norm": 0.1391855627298355, "learning_rate": 0.00014553035696125293, "loss": 1.1862, "step": 2684 }, { "epoch": 0.545842650945314, "grad_norm": 0.13078033924102783, "learning_rate": 0.00014551001728872168, "loss": 1.0238, "step": 2685 }, { "epoch": 0.5460459442976214, "grad_norm": 0.12442688643932343, "learning_rate": 0.0001454896776161904, "loss": 0.9966, "step": 2686 }, { "epoch": 0.5462492376499288, "grad_norm": 0.11848010122776031, "learning_rate": 0.0001454693379436591, "loss": 0.9754, "step": 2687 }, { "epoch": 0.5464525310022362, "grad_norm": 0.13601583242416382, "learning_rate": 0.00014544899827112782, "loss": 1.0994, "step": 2688 }, { "epoch": 0.5466558243545436, "grad_norm": 0.13946221768856049, "learning_rate": 0.00014542865859859658, "loss": 1.1127, "step": 2689 }, { "epoch": 0.546859117706851, "grad_norm": 0.1452294886112213, "learning_rate": 0.0001454083189260653, "loss": 1.146, "step": 2690 }, { "epoch": 0.5470624110591583, "grad_norm": 0.14542357623577118, "learning_rate": 0.00014538797925353402, "loss": 1.1916, "step": 2691 }, { "epoch": 0.5472657044114657, "grad_norm": 0.11541703343391418, "learning_rate": 0.00014536763958100275, "loss": 0.9437, "step": 2692 }, { "epoch": 0.5474689977637731, "grad_norm": 0.12495800107717514, "learning_rate": 0.0001453472999084715, "loss": 1.0014, "step": 2693 }, { "epoch": 0.5476722911160805, "grad_norm": 0.13895189762115479, "learning_rate": 0.00014532696023594022, "loss": 1.1139, "step": 2694 }, { "epoch": 0.5478755844683879, "grad_norm": 0.12779201567173004, "learning_rate": 0.00014530662056340892, "loss": 1.0702, "step": 2695 }, { "epoch": 0.5480788778206953, "grad_norm": 0.14240634441375732, "learning_rate": 0.00014528628089087765, "loss": 1.1233, "step": 2696 }, { "epoch": 0.5482821711730026, "grad_norm": 0.12415528297424316, "learning_rate": 0.0001452659412183464, "loss": 1.0954, "step": 2697 }, { "epoch": 0.54848546452531, "grad_norm": 0.13816578686237335, "learning_rate": 0.00014524560154581512, "loss": 1.2772, "step": 2698 }, { "epoch": 0.5486887578776174, "grad_norm": 0.12729184329509735, "learning_rate": 0.00014522526187328385, "loss": 1.0519, "step": 2699 }, { "epoch": 0.5488920512299248, "grad_norm": 0.12732116878032684, "learning_rate": 0.00014520492220075257, "loss": 1.0562, "step": 2700 }, { "epoch": 0.5490953445822322, "grad_norm": 0.12312710285186768, "learning_rate": 0.00014518458252822132, "loss": 1.0082, "step": 2701 }, { "epoch": 0.5492986379345396, "grad_norm": 0.1302732676267624, "learning_rate": 0.00014516424285569005, "loss": 1.1847, "step": 2702 }, { "epoch": 0.5495019312868469, "grad_norm": 0.13683298230171204, "learning_rate": 0.00014514390318315874, "loss": 1.1784, "step": 2703 }, { "epoch": 0.5497052246391543, "grad_norm": 0.1429167538881302, "learning_rate": 0.00014512356351062747, "loss": 1.2792, "step": 2704 }, { "epoch": 0.5499085179914617, "grad_norm": 0.136098712682724, "learning_rate": 0.0001451032238380962, "loss": 1.2149, "step": 2705 }, { "epoch": 0.5501118113437691, "grad_norm": 0.1201593205332756, "learning_rate": 0.00014508288416556495, "loss": 0.9673, "step": 2706 }, { "epoch": 0.5503151046960765, "grad_norm": 0.10153687000274658, "learning_rate": 0.00014506254449303367, "loss": 0.8142, "step": 2707 }, { "epoch": 0.5505183980483839, "grad_norm": 0.11609897762537003, "learning_rate": 0.0001450422048205024, "loss": 1.0168, "step": 2708 }, { "epoch": 0.5507216914006912, "grad_norm": 0.12856177985668182, "learning_rate": 0.00014502186514797112, "loss": 1.1785, "step": 2709 }, { "epoch": 0.5509249847529986, "grad_norm": 0.11091580241918564, "learning_rate": 0.00014500152547543987, "loss": 0.9282, "step": 2710 }, { "epoch": 0.551128278105306, "grad_norm": 0.13458651304244995, "learning_rate": 0.0001449811858029086, "loss": 1.1647, "step": 2711 }, { "epoch": 0.5513315714576134, "grad_norm": 0.12265376001596451, "learning_rate": 0.0001449608461303773, "loss": 1.0149, "step": 2712 }, { "epoch": 0.5515348648099208, "grad_norm": 0.12033109366893768, "learning_rate": 0.00014494050645784602, "loss": 0.9313, "step": 2713 }, { "epoch": 0.551738158162228, "grad_norm": 0.13308046758174896, "learning_rate": 0.00014492016678531477, "loss": 1.044, "step": 2714 }, { "epoch": 0.5519414515145354, "grad_norm": 0.12852205336093903, "learning_rate": 0.0001448998271127835, "loss": 1.0578, "step": 2715 }, { "epoch": 0.5521447448668428, "grad_norm": 0.13972130417823792, "learning_rate": 0.00014487948744025222, "loss": 1.0903, "step": 2716 }, { "epoch": 0.5523480382191502, "grad_norm": 0.14152394235134125, "learning_rate": 0.00014485914776772094, "loss": 1.2561, "step": 2717 }, { "epoch": 0.5525513315714576, "grad_norm": 0.1381314992904663, "learning_rate": 0.0001448388080951897, "loss": 1.1794, "step": 2718 }, { "epoch": 0.552754624923765, "grad_norm": 0.11829142272472382, "learning_rate": 0.00014481846842265842, "loss": 0.9385, "step": 2719 }, { "epoch": 0.5529579182760723, "grad_norm": 0.13279980421066284, "learning_rate": 0.00014479812875012711, "loss": 1.0034, "step": 2720 }, { "epoch": 0.5531612116283797, "grad_norm": 0.1229550689458847, "learning_rate": 0.00014477778907759584, "loss": 0.9929, "step": 2721 }, { "epoch": 0.5533645049806871, "grad_norm": 0.12663327157497406, "learning_rate": 0.0001447574494050646, "loss": 1.0095, "step": 2722 }, { "epoch": 0.5535677983329945, "grad_norm": 0.14191538095474243, "learning_rate": 0.00014473710973253332, "loss": 1.0165, "step": 2723 }, { "epoch": 0.5537710916853019, "grad_norm": 0.12460799515247345, "learning_rate": 0.00014471677006000204, "loss": 0.8327, "step": 2724 }, { "epoch": 0.5539743850376093, "grad_norm": 0.11980767548084259, "learning_rate": 0.00014469643038747076, "loss": 0.9807, "step": 2725 }, { "epoch": 0.5541776783899166, "grad_norm": 0.12429416179656982, "learning_rate": 0.00014467609071493952, "loss": 1.0604, "step": 2726 }, { "epoch": 0.554380971742224, "grad_norm": 0.14179259538650513, "learning_rate": 0.00014465575104240824, "loss": 1.1217, "step": 2727 }, { "epoch": 0.5545842650945314, "grad_norm": 0.12223639339208603, "learning_rate": 0.00014463541136987694, "loss": 0.9083, "step": 2728 }, { "epoch": 0.5547875584468388, "grad_norm": 0.13745662569999695, "learning_rate": 0.00014461507169734566, "loss": 1.0864, "step": 2729 }, { "epoch": 0.5549908517991462, "grad_norm": 0.12111254036426544, "learning_rate": 0.00014459473202481441, "loss": 0.995, "step": 2730 }, { "epoch": 0.5551941451514536, "grad_norm": 0.14073847234249115, "learning_rate": 0.00014457439235228314, "loss": 1.0003, "step": 2731 }, { "epoch": 0.5553974385037609, "grad_norm": 0.13188788294792175, "learning_rate": 0.00014455405267975186, "loss": 1.1628, "step": 2732 }, { "epoch": 0.5556007318560683, "grad_norm": 0.10727431625127792, "learning_rate": 0.0001445337130072206, "loss": 0.9385, "step": 2733 }, { "epoch": 0.5558040252083757, "grad_norm": 0.12671469151973724, "learning_rate": 0.00014451337333468934, "loss": 0.984, "step": 2734 }, { "epoch": 0.5560073185606831, "grad_norm": 0.12647178769111633, "learning_rate": 0.00014449303366215806, "loss": 1.0865, "step": 2735 }, { "epoch": 0.5562106119129905, "grad_norm": 0.1198342889547348, "learning_rate": 0.00014447269398962676, "loss": 1.0589, "step": 2736 }, { "epoch": 0.5564139052652978, "grad_norm": 0.13245652616024017, "learning_rate": 0.00014445235431709548, "loss": 0.9953, "step": 2737 }, { "epoch": 0.5566171986176052, "grad_norm": 0.11206847429275513, "learning_rate": 0.00014443201464456424, "loss": 0.8762, "step": 2738 }, { "epoch": 0.5568204919699126, "grad_norm": 0.16584132611751556, "learning_rate": 0.00014441167497203296, "loss": 1.2808, "step": 2739 }, { "epoch": 0.55702378532222, "grad_norm": 0.1278923898935318, "learning_rate": 0.00014439133529950169, "loss": 1.1515, "step": 2740 }, { "epoch": 0.5572270786745274, "grad_norm": 0.1336185187101364, "learning_rate": 0.0001443709956269704, "loss": 1.0372, "step": 2741 }, { "epoch": 0.5574303720268348, "grad_norm": 0.13731592893600464, "learning_rate": 0.00014435065595443916, "loss": 1.0837, "step": 2742 }, { "epoch": 0.557633665379142, "grad_norm": 0.13053496181964874, "learning_rate": 0.00014433031628190789, "loss": 0.9402, "step": 2743 }, { "epoch": 0.5578369587314495, "grad_norm": 0.14074589312076569, "learning_rate": 0.00014430997660937658, "loss": 1.1168, "step": 2744 }, { "epoch": 0.5580402520837568, "grad_norm": 0.1500421017408371, "learning_rate": 0.0001442896369368453, "loss": 1.0726, "step": 2745 }, { "epoch": 0.5582435454360642, "grad_norm": 0.14489975571632385, "learning_rate": 0.00014426929726431403, "loss": 1.263, "step": 2746 }, { "epoch": 0.5584468387883716, "grad_norm": 0.14446121454238892, "learning_rate": 0.00014424895759178278, "loss": 1.2142, "step": 2747 }, { "epoch": 0.558650132140679, "grad_norm": 0.13410677015781403, "learning_rate": 0.0001442286179192515, "loss": 1.0715, "step": 2748 }, { "epoch": 0.5588534254929863, "grad_norm": 0.1425483077764511, "learning_rate": 0.00014420827824672023, "loss": 0.9858, "step": 2749 }, { "epoch": 0.5590567188452937, "grad_norm": 0.13073715567588806, "learning_rate": 0.00014418793857418896, "loss": 1.0657, "step": 2750 }, { "epoch": 0.5592600121976011, "grad_norm": 0.1257767379283905, "learning_rate": 0.0001441675989016577, "loss": 1.0749, "step": 2751 }, { "epoch": 0.5594633055499085, "grad_norm": 0.1408379077911377, "learning_rate": 0.0001441472592291264, "loss": 1.1656, "step": 2752 }, { "epoch": 0.5596665989022159, "grad_norm": 0.13191954791545868, "learning_rate": 0.00014412691955659513, "loss": 1.0291, "step": 2753 }, { "epoch": 0.5598698922545233, "grad_norm": 0.12902916967868805, "learning_rate": 0.00014410657988406385, "loss": 1.075, "step": 2754 }, { "epoch": 0.5600731856068306, "grad_norm": 0.13078373670578003, "learning_rate": 0.0001440862402115326, "loss": 1.1693, "step": 2755 }, { "epoch": 0.560276478959138, "grad_norm": 0.1379525512456894, "learning_rate": 0.00014406590053900133, "loss": 1.1614, "step": 2756 }, { "epoch": 0.5604797723114454, "grad_norm": 0.12570443749427795, "learning_rate": 0.00014404556086647006, "loss": 1.0245, "step": 2757 }, { "epoch": 0.5606830656637528, "grad_norm": 0.13668902218341827, "learning_rate": 0.00014402522119393878, "loss": 1.1636, "step": 2758 }, { "epoch": 0.5608863590160602, "grad_norm": 0.13914015889167786, "learning_rate": 0.00014400488152140753, "loss": 1.0136, "step": 2759 }, { "epoch": 0.5610896523683676, "grad_norm": 0.13811741769313812, "learning_rate": 0.00014398454184887623, "loss": 1.2955, "step": 2760 }, { "epoch": 0.5612929457206749, "grad_norm": 0.14095258712768555, "learning_rate": 0.00014396420217634495, "loss": 1.1373, "step": 2761 }, { "epoch": 0.5614962390729823, "grad_norm": 0.11365115642547607, "learning_rate": 0.00014394386250381368, "loss": 0.827, "step": 2762 }, { "epoch": 0.5616995324252897, "grad_norm": 0.1321718692779541, "learning_rate": 0.00014392352283128243, "loss": 1.0739, "step": 2763 }, { "epoch": 0.5619028257775971, "grad_norm": 0.13008981943130493, "learning_rate": 0.00014390318315875115, "loss": 1.0253, "step": 2764 }, { "epoch": 0.5621061191299045, "grad_norm": 0.11360891908407211, "learning_rate": 0.00014388284348621988, "loss": 0.8921, "step": 2765 }, { "epoch": 0.5623094124822118, "grad_norm": 0.1246936097741127, "learning_rate": 0.0001438625038136886, "loss": 0.8497, "step": 2766 }, { "epoch": 0.5625127058345192, "grad_norm": 0.14330574870109558, "learning_rate": 0.00014384216414115735, "loss": 1.1843, "step": 2767 }, { "epoch": 0.5627159991868266, "grad_norm": 0.1149834543466568, "learning_rate": 0.00014382182446862608, "loss": 0.8757, "step": 2768 }, { "epoch": 0.562919292539134, "grad_norm": 0.13841336965560913, "learning_rate": 0.00014380148479609478, "loss": 1.0849, "step": 2769 }, { "epoch": 0.5631225858914414, "grad_norm": 0.12189842760562897, "learning_rate": 0.0001437811451235635, "loss": 1.0182, "step": 2770 }, { "epoch": 0.5633258792437488, "grad_norm": 0.14273017644882202, "learning_rate": 0.00014376080545103225, "loss": 1.1301, "step": 2771 }, { "epoch": 0.5635291725960561, "grad_norm": 0.13799621164798737, "learning_rate": 0.00014374046577850098, "loss": 1.0078, "step": 2772 }, { "epoch": 0.5637324659483635, "grad_norm": 0.1299772560596466, "learning_rate": 0.0001437201261059697, "loss": 0.9765, "step": 2773 }, { "epoch": 0.5639357593006709, "grad_norm": 0.13939563930034637, "learning_rate": 0.00014369978643343843, "loss": 1.1519, "step": 2774 }, { "epoch": 0.5641390526529783, "grad_norm": 0.14570674300193787, "learning_rate": 0.00014367944676090718, "loss": 1.0858, "step": 2775 }, { "epoch": 0.5643423460052857, "grad_norm": 0.12805186212062836, "learning_rate": 0.0001436591070883759, "loss": 0.916, "step": 2776 }, { "epoch": 0.564545639357593, "grad_norm": 0.12251273542642593, "learning_rate": 0.0001436387674158446, "loss": 1.0465, "step": 2777 }, { "epoch": 0.5647489327099003, "grad_norm": 0.1256076544523239, "learning_rate": 0.00014361842774331332, "loss": 0.9972, "step": 2778 }, { "epoch": 0.5649522260622077, "grad_norm": 0.12593501806259155, "learning_rate": 0.00014359808807078207, "loss": 0.961, "step": 2779 }, { "epoch": 0.5651555194145151, "grad_norm": 0.1273297369480133, "learning_rate": 0.0001435777483982508, "loss": 0.9951, "step": 2780 }, { "epoch": 0.5653588127668225, "grad_norm": 0.1263994574546814, "learning_rate": 0.00014355740872571952, "loss": 1.0616, "step": 2781 }, { "epoch": 0.5655621061191299, "grad_norm": 0.11736489087343216, "learning_rate": 0.00014353706905318825, "loss": 0.9839, "step": 2782 }, { "epoch": 0.5657653994714373, "grad_norm": 0.12970155477523804, "learning_rate": 0.000143516729380657, "loss": 0.9299, "step": 2783 }, { "epoch": 0.5659686928237446, "grad_norm": 0.13361741602420807, "learning_rate": 0.00014349638970812572, "loss": 1.0209, "step": 2784 }, { "epoch": 0.566171986176052, "grad_norm": 0.13938020169734955, "learning_rate": 0.00014347605003559442, "loss": 1.0303, "step": 2785 }, { "epoch": 0.5663752795283594, "grad_norm": 0.13315965235233307, "learning_rate": 0.00014345571036306315, "loss": 1.1152, "step": 2786 }, { "epoch": 0.5665785728806668, "grad_norm": 0.14047378301620483, "learning_rate": 0.00014343537069053187, "loss": 1.2173, "step": 2787 }, { "epoch": 0.5667818662329742, "grad_norm": 0.1367003172636032, "learning_rate": 0.00014341503101800062, "loss": 1.0284, "step": 2788 }, { "epoch": 0.5669851595852815, "grad_norm": 0.1463545858860016, "learning_rate": 0.00014339469134546935, "loss": 1.0506, "step": 2789 }, { "epoch": 0.5671884529375889, "grad_norm": 0.12741826474666595, "learning_rate": 0.00014337435167293807, "loss": 1.0226, "step": 2790 }, { "epoch": 0.5673917462898963, "grad_norm": 0.1232975423336029, "learning_rate": 0.0001433540120004068, "loss": 0.9259, "step": 2791 }, { "epoch": 0.5675950396422037, "grad_norm": 0.13350965082645416, "learning_rate": 0.00014333367232787555, "loss": 1.0739, "step": 2792 }, { "epoch": 0.5677983329945111, "grad_norm": 0.1262935996055603, "learning_rate": 0.00014331333265534424, "loss": 1.1412, "step": 2793 }, { "epoch": 0.5680016263468185, "grad_norm": 0.1304781287908554, "learning_rate": 0.00014329299298281297, "loss": 1.0605, "step": 2794 }, { "epoch": 0.5682049196991258, "grad_norm": 0.13018850982189178, "learning_rate": 0.0001432726533102817, "loss": 1.048, "step": 2795 }, { "epoch": 0.5684082130514332, "grad_norm": 0.13948385417461395, "learning_rate": 0.00014325231363775044, "loss": 1.2018, "step": 2796 }, { "epoch": 0.5686115064037406, "grad_norm": 0.1164885088801384, "learning_rate": 0.00014323197396521917, "loss": 0.9532, "step": 2797 }, { "epoch": 0.568814799756048, "grad_norm": 0.1407950073480606, "learning_rate": 0.0001432116342926879, "loss": 1.0816, "step": 2798 }, { "epoch": 0.5690180931083554, "grad_norm": 0.12568843364715576, "learning_rate": 0.00014319129462015662, "loss": 0.9222, "step": 2799 }, { "epoch": 0.5692213864606628, "grad_norm": 0.14112015068531036, "learning_rate": 0.00014317095494762537, "loss": 1.057, "step": 2800 }, { "epoch": 0.5694246798129701, "grad_norm": 0.1322345733642578, "learning_rate": 0.00014315061527509407, "loss": 1.0804, "step": 2801 }, { "epoch": 0.5696279731652775, "grad_norm": 0.13166458904743195, "learning_rate": 0.0001431302756025628, "loss": 0.9637, "step": 2802 }, { "epoch": 0.5698312665175849, "grad_norm": 0.13725675642490387, "learning_rate": 0.00014310993593003152, "loss": 0.9894, "step": 2803 }, { "epoch": 0.5700345598698923, "grad_norm": 0.1358625739812851, "learning_rate": 0.00014308959625750027, "loss": 1.1097, "step": 2804 }, { "epoch": 0.5702378532221997, "grad_norm": 0.14208373427391052, "learning_rate": 0.000143069256584969, "loss": 1.1789, "step": 2805 }, { "epoch": 0.5704411465745071, "grad_norm": 0.12727318704128265, "learning_rate": 0.00014304891691243772, "loss": 0.9598, "step": 2806 }, { "epoch": 0.5706444399268144, "grad_norm": 0.12927868962287903, "learning_rate": 0.00014302857723990644, "loss": 1.0194, "step": 2807 }, { "epoch": 0.5708477332791217, "grad_norm": 0.14685644209384918, "learning_rate": 0.0001430082375673752, "loss": 1.1379, "step": 2808 }, { "epoch": 0.5710510266314291, "grad_norm": 0.14648008346557617, "learning_rate": 0.0001429878978948439, "loss": 1.2296, "step": 2809 }, { "epoch": 0.5712543199837365, "grad_norm": 0.12980784475803375, "learning_rate": 0.00014296755822231261, "loss": 1.0982, "step": 2810 }, { "epoch": 0.571457613336044, "grad_norm": 0.11192413419485092, "learning_rate": 0.00014294721854978134, "loss": 0.9545, "step": 2811 }, { "epoch": 0.5716609066883513, "grad_norm": 0.15568038821220398, "learning_rate": 0.0001429268788772501, "loss": 1.1671, "step": 2812 }, { "epoch": 0.5718642000406586, "grad_norm": 0.14970743656158447, "learning_rate": 0.00014290653920471881, "loss": 1.0711, "step": 2813 }, { "epoch": 0.572067493392966, "grad_norm": 0.13441245257854462, "learning_rate": 0.00014288619953218754, "loss": 1.0231, "step": 2814 }, { "epoch": 0.5722707867452734, "grad_norm": 0.12407507002353668, "learning_rate": 0.00014286585985965626, "loss": 1.0276, "step": 2815 }, { "epoch": 0.5724740800975808, "grad_norm": 0.13431482017040253, "learning_rate": 0.00014284552018712502, "loss": 1.1361, "step": 2816 }, { "epoch": 0.5726773734498882, "grad_norm": 0.132259339094162, "learning_rate": 0.0001428251805145937, "loss": 1.2343, "step": 2817 }, { "epoch": 0.5728806668021955, "grad_norm": 0.1342546045780182, "learning_rate": 0.00014280484084206244, "loss": 1.0906, "step": 2818 }, { "epoch": 0.5730839601545029, "grad_norm": 0.12521067261695862, "learning_rate": 0.00014278450116953116, "loss": 1.0881, "step": 2819 }, { "epoch": 0.5732872535068103, "grad_norm": 0.12174705415964127, "learning_rate": 0.0001427641614969999, "loss": 0.8563, "step": 2820 }, { "epoch": 0.5734905468591177, "grad_norm": 0.14310669898986816, "learning_rate": 0.00014274382182446864, "loss": 1.2119, "step": 2821 }, { "epoch": 0.5736938402114251, "grad_norm": 0.11739708483219147, "learning_rate": 0.00014272348215193736, "loss": 0.8849, "step": 2822 }, { "epoch": 0.5738971335637325, "grad_norm": 0.14041262865066528, "learning_rate": 0.0001427031424794061, "loss": 1.3593, "step": 2823 }, { "epoch": 0.5741004269160398, "grad_norm": 0.13473278284072876, "learning_rate": 0.00014268280280687484, "loss": 1.0379, "step": 2824 }, { "epoch": 0.5743037202683472, "grad_norm": 0.12364168465137482, "learning_rate": 0.00014266246313434354, "loss": 1.0167, "step": 2825 }, { "epoch": 0.5745070136206546, "grad_norm": 0.1333821415901184, "learning_rate": 0.00014264212346181226, "loss": 1.0472, "step": 2826 }, { "epoch": 0.574710306972962, "grad_norm": 0.11603229492902756, "learning_rate": 0.00014262178378928098, "loss": 0.8045, "step": 2827 }, { "epoch": 0.5749136003252694, "grad_norm": 0.13383187353610992, "learning_rate": 0.0001426014441167497, "loss": 1.1617, "step": 2828 }, { "epoch": 0.5751168936775768, "grad_norm": 0.1249544620513916, "learning_rate": 0.00014258110444421846, "loss": 1.0211, "step": 2829 }, { "epoch": 0.5753201870298841, "grad_norm": 0.12109317630529404, "learning_rate": 0.00014256076477168719, "loss": 0.9672, "step": 2830 }, { "epoch": 0.5755234803821915, "grad_norm": 0.1185065507888794, "learning_rate": 0.0001425404250991559, "loss": 0.857, "step": 2831 }, { "epoch": 0.5757267737344989, "grad_norm": 0.162327840924263, "learning_rate": 0.00014252008542662463, "loss": 1.2834, "step": 2832 }, { "epoch": 0.5759300670868063, "grad_norm": 0.12928487360477448, "learning_rate": 0.00014249974575409339, "loss": 1.1067, "step": 2833 }, { "epoch": 0.5761333604391137, "grad_norm": 0.12098827958106995, "learning_rate": 0.00014247940608156208, "loss": 0.8984, "step": 2834 }, { "epoch": 0.5763366537914211, "grad_norm": 0.12587502598762512, "learning_rate": 0.0001424590664090308, "loss": 1.0488, "step": 2835 }, { "epoch": 0.5765399471437284, "grad_norm": 0.12398620694875717, "learning_rate": 0.00014243872673649953, "loss": 0.9838, "step": 2836 }, { "epoch": 0.5767432404960358, "grad_norm": 0.12822575867176056, "learning_rate": 0.00014241838706396828, "loss": 1.0103, "step": 2837 }, { "epoch": 0.5769465338483432, "grad_norm": 0.13499167561531067, "learning_rate": 0.000142398047391437, "loss": 1.1553, "step": 2838 }, { "epoch": 0.5771498272006506, "grad_norm": 0.12537875771522522, "learning_rate": 0.00014237770771890573, "loss": 0.9383, "step": 2839 }, { "epoch": 0.577353120552958, "grad_norm": 0.13840174674987793, "learning_rate": 0.00014235736804637446, "loss": 1.0803, "step": 2840 }, { "epoch": 0.5775564139052652, "grad_norm": 0.11736918240785599, "learning_rate": 0.0001423370283738432, "loss": 0.908, "step": 2841 }, { "epoch": 0.5777597072575726, "grad_norm": 0.12442715466022491, "learning_rate": 0.0001423166887013119, "loss": 0.9844, "step": 2842 }, { "epoch": 0.57796300060988, "grad_norm": 0.13206282258033752, "learning_rate": 0.00014229634902878063, "loss": 1.063, "step": 2843 }, { "epoch": 0.5781662939621874, "grad_norm": 0.1393408477306366, "learning_rate": 0.00014227600935624935, "loss": 1.0633, "step": 2844 }, { "epoch": 0.5783695873144948, "grad_norm": 0.140583336353302, "learning_rate": 0.0001422556696837181, "loss": 1.1749, "step": 2845 }, { "epoch": 0.5785728806668022, "grad_norm": 0.1310548186302185, "learning_rate": 0.00014223533001118683, "loss": 1.1205, "step": 2846 }, { "epoch": 0.5787761740191095, "grad_norm": 0.1283491551876068, "learning_rate": 0.00014221499033865556, "loss": 1.0956, "step": 2847 }, { "epoch": 0.5789794673714169, "grad_norm": 0.12449255585670471, "learning_rate": 0.00014219465066612428, "loss": 1.0153, "step": 2848 }, { "epoch": 0.5791827607237243, "grad_norm": 0.13952034711837769, "learning_rate": 0.00014217431099359303, "loss": 1.0639, "step": 2849 }, { "epoch": 0.5793860540760317, "grad_norm": 0.1438504010438919, "learning_rate": 0.00014215397132106173, "loss": 1.1237, "step": 2850 }, { "epoch": 0.5795893474283391, "grad_norm": 0.13687646389007568, "learning_rate": 0.00014213363164853045, "loss": 1.0719, "step": 2851 }, { "epoch": 0.5797926407806465, "grad_norm": 0.14046727120876312, "learning_rate": 0.00014211329197599918, "loss": 1.2391, "step": 2852 }, { "epoch": 0.5799959341329538, "grad_norm": 0.1313040554523468, "learning_rate": 0.00014209295230346793, "loss": 1.0689, "step": 2853 }, { "epoch": 0.5801992274852612, "grad_norm": 0.1264270395040512, "learning_rate": 0.00014207261263093665, "loss": 0.9856, "step": 2854 }, { "epoch": 0.5804025208375686, "grad_norm": 0.12176066637039185, "learning_rate": 0.00014205227295840538, "loss": 1.0131, "step": 2855 }, { "epoch": 0.580605814189876, "grad_norm": 0.13929857313632965, "learning_rate": 0.0001420319332858741, "loss": 1.134, "step": 2856 }, { "epoch": 0.5808091075421834, "grad_norm": 0.12523682415485382, "learning_rate": 0.00014201159361334285, "loss": 1.0868, "step": 2857 }, { "epoch": 0.5810124008944908, "grad_norm": 0.13270434737205505, "learning_rate": 0.00014199125394081155, "loss": 1.183, "step": 2858 }, { "epoch": 0.5812156942467981, "grad_norm": 0.1330588310956955, "learning_rate": 0.00014197091426828028, "loss": 1.2487, "step": 2859 }, { "epoch": 0.5814189875991055, "grad_norm": 0.130279541015625, "learning_rate": 0.000141950574595749, "loss": 1.0885, "step": 2860 }, { "epoch": 0.5816222809514129, "grad_norm": 0.1529773771762848, "learning_rate": 0.00014193023492321775, "loss": 1.098, "step": 2861 }, { "epoch": 0.5818255743037203, "grad_norm": 0.14715005457401276, "learning_rate": 0.00014190989525068648, "loss": 1.086, "step": 2862 }, { "epoch": 0.5820288676560277, "grad_norm": 0.12468834221363068, "learning_rate": 0.0001418895555781552, "loss": 1.0349, "step": 2863 }, { "epoch": 0.5822321610083351, "grad_norm": 0.1332579404115677, "learning_rate": 0.00014186921590562393, "loss": 1.0514, "step": 2864 }, { "epoch": 0.5824354543606424, "grad_norm": 0.13424143195152283, "learning_rate": 0.00014184887623309268, "loss": 1.0859, "step": 2865 }, { "epoch": 0.5826387477129498, "grad_norm": 0.11994919180870056, "learning_rate": 0.00014182853656056137, "loss": 0.9416, "step": 2866 }, { "epoch": 0.5828420410652572, "grad_norm": 0.13324035704135895, "learning_rate": 0.0001418081968880301, "loss": 1.112, "step": 2867 }, { "epoch": 0.5830453344175646, "grad_norm": 0.14520680904388428, "learning_rate": 0.00014178785721549882, "loss": 1.0231, "step": 2868 }, { "epoch": 0.583248627769872, "grad_norm": 0.14066869020462036, "learning_rate": 0.00014176751754296755, "loss": 1.281, "step": 2869 }, { "epoch": 0.5834519211221793, "grad_norm": 0.1384185403585434, "learning_rate": 0.0001417471778704363, "loss": 1.0393, "step": 2870 }, { "epoch": 0.5836552144744866, "grad_norm": 0.1287851184606552, "learning_rate": 0.00014172683819790502, "loss": 1.0967, "step": 2871 }, { "epoch": 0.583858507826794, "grad_norm": 0.11896179616451263, "learning_rate": 0.00014170649852537375, "loss": 0.844, "step": 2872 }, { "epoch": 0.5840618011791014, "grad_norm": 0.1319238543510437, "learning_rate": 0.00014168615885284247, "loss": 1.0279, "step": 2873 }, { "epoch": 0.5842650945314088, "grad_norm": 0.1428615152835846, "learning_rate": 0.0001416658191803112, "loss": 1.0823, "step": 2874 }, { "epoch": 0.5844683878837162, "grad_norm": 0.11939448863267899, "learning_rate": 0.00014164547950777992, "loss": 1.0417, "step": 2875 }, { "epoch": 0.5846716812360235, "grad_norm": 0.13555167615413666, "learning_rate": 0.00014162513983524865, "loss": 1.0476, "step": 2876 }, { "epoch": 0.5848749745883309, "grad_norm": 0.12872137129306793, "learning_rate": 0.00014160480016271737, "loss": 0.9306, "step": 2877 }, { "epoch": 0.5850782679406383, "grad_norm": 0.12111514061689377, "learning_rate": 0.00014158446049018612, "loss": 0.9841, "step": 2878 }, { "epoch": 0.5852815612929457, "grad_norm": 0.12589818239212036, "learning_rate": 0.00014156412081765485, "loss": 1.0237, "step": 2879 }, { "epoch": 0.5854848546452531, "grad_norm": 0.12264888733625412, "learning_rate": 0.00014154378114512357, "loss": 0.9417, "step": 2880 }, { "epoch": 0.5856881479975605, "grad_norm": 0.14193598926067352, "learning_rate": 0.0001415234414725923, "loss": 1.2845, "step": 2881 }, { "epoch": 0.5858914413498678, "grad_norm": 0.14116251468658447, "learning_rate": 0.00014150310180006102, "loss": 1.001, "step": 2882 }, { "epoch": 0.5860947347021752, "grad_norm": 0.14120200276374817, "learning_rate": 0.00014148276212752974, "loss": 1.2458, "step": 2883 }, { "epoch": 0.5862980280544826, "grad_norm": 0.13560935854911804, "learning_rate": 0.00014146242245499847, "loss": 1.1217, "step": 2884 }, { "epoch": 0.58650132140679, "grad_norm": 0.14672443270683289, "learning_rate": 0.0001414420827824672, "loss": 1.188, "step": 2885 }, { "epoch": 0.5867046147590974, "grad_norm": 0.12481992691755295, "learning_rate": 0.00014142174310993594, "loss": 1.0063, "step": 2886 }, { "epoch": 0.5869079081114048, "grad_norm": 0.13482870161533356, "learning_rate": 0.00014140140343740467, "loss": 1.0498, "step": 2887 }, { "epoch": 0.5871112014637121, "grad_norm": 0.16956381499767303, "learning_rate": 0.0001413810637648734, "loss": 1.1202, "step": 2888 }, { "epoch": 0.5873144948160195, "grad_norm": 0.1285228431224823, "learning_rate": 0.00014136072409234212, "loss": 0.9175, "step": 2889 }, { "epoch": 0.5875177881683269, "grad_norm": 0.12045499682426453, "learning_rate": 0.00014134038441981087, "loss": 1.0062, "step": 2890 }, { "epoch": 0.5877210815206343, "grad_norm": 0.11959182471036911, "learning_rate": 0.00014132004474727957, "loss": 0.9041, "step": 2891 }, { "epoch": 0.5879243748729417, "grad_norm": 0.12867799401283264, "learning_rate": 0.0001412997050747483, "loss": 0.9985, "step": 2892 }, { "epoch": 0.5881276682252491, "grad_norm": 0.12349910289049149, "learning_rate": 0.00014127936540221702, "loss": 0.9357, "step": 2893 }, { "epoch": 0.5883309615775564, "grad_norm": 0.12842735648155212, "learning_rate": 0.00014125902572968577, "loss": 1.0862, "step": 2894 }, { "epoch": 0.5885342549298638, "grad_norm": 0.1375754177570343, "learning_rate": 0.0001412386860571545, "loss": 1.1911, "step": 2895 }, { "epoch": 0.5887375482821712, "grad_norm": 0.13770340383052826, "learning_rate": 0.00014121834638462322, "loss": 1.0785, "step": 2896 }, { "epoch": 0.5889408416344786, "grad_norm": 0.13841983675956726, "learning_rate": 0.00014119800671209194, "loss": 1.044, "step": 2897 }, { "epoch": 0.589144134986786, "grad_norm": 0.12044288218021393, "learning_rate": 0.0001411776670395607, "loss": 1.0482, "step": 2898 }, { "epoch": 0.5893474283390933, "grad_norm": 0.11521141231060028, "learning_rate": 0.0001411573273670294, "loss": 0.8782, "step": 2899 }, { "epoch": 0.5895507216914007, "grad_norm": 0.13133427500724792, "learning_rate": 0.00014113698769449811, "loss": 1.0602, "step": 2900 }, { "epoch": 0.5897540150437081, "grad_norm": 0.12748554348945618, "learning_rate": 0.00014111664802196684, "loss": 1.0442, "step": 2901 }, { "epoch": 0.5899573083960155, "grad_norm": 0.13325203955173492, "learning_rate": 0.0001410963083494356, "loss": 1.0469, "step": 2902 }, { "epoch": 0.5901606017483229, "grad_norm": 0.12706689536571503, "learning_rate": 0.00014107596867690431, "loss": 1.0692, "step": 2903 }, { "epoch": 0.5903638951006303, "grad_norm": 0.12228814512491226, "learning_rate": 0.00014105562900437304, "loss": 0.8914, "step": 2904 }, { "epoch": 0.5905671884529375, "grad_norm": 0.1334328055381775, "learning_rate": 0.00014103528933184176, "loss": 1.028, "step": 2905 }, { "epoch": 0.5907704818052449, "grad_norm": 0.11916909366846085, "learning_rate": 0.00014101494965931052, "loss": 0.9827, "step": 2906 }, { "epoch": 0.5909737751575523, "grad_norm": 0.12943509221076965, "learning_rate": 0.0001409946099867792, "loss": 1.1302, "step": 2907 }, { "epoch": 0.5911770685098597, "grad_norm": 0.1251513957977295, "learning_rate": 0.00014097427031424794, "loss": 0.9508, "step": 2908 }, { "epoch": 0.5913803618621671, "grad_norm": 0.13130627572536469, "learning_rate": 0.00014095393064171666, "loss": 1.0461, "step": 2909 }, { "epoch": 0.5915836552144745, "grad_norm": 0.13331666588783264, "learning_rate": 0.0001409335909691854, "loss": 1.1964, "step": 2910 }, { "epoch": 0.5917869485667818, "grad_norm": 0.12930695712566376, "learning_rate": 0.00014091325129665414, "loss": 1.0012, "step": 2911 }, { "epoch": 0.5919902419190892, "grad_norm": 0.1442381590604782, "learning_rate": 0.00014089291162412286, "loss": 1.2208, "step": 2912 }, { "epoch": 0.5921935352713966, "grad_norm": 0.12667718529701233, "learning_rate": 0.00014087257195159159, "loss": 0.9417, "step": 2913 }, { "epoch": 0.592396828623704, "grad_norm": 0.12730923295021057, "learning_rate": 0.0001408522322790603, "loss": 1.0481, "step": 2914 }, { "epoch": 0.5926001219760114, "grad_norm": 0.11554036289453506, "learning_rate": 0.00014083189260652904, "loss": 0.8343, "step": 2915 }, { "epoch": 0.5928034153283188, "grad_norm": 0.13052915036678314, "learning_rate": 0.00014081155293399776, "loss": 1.0713, "step": 2916 }, { "epoch": 0.5930067086806261, "grad_norm": 0.12292870879173279, "learning_rate": 0.00014079121326146648, "loss": 1.0347, "step": 2917 }, { "epoch": 0.5932100020329335, "grad_norm": 0.13543544709682465, "learning_rate": 0.0001407708735889352, "loss": 1.1291, "step": 2918 }, { "epoch": 0.5934132953852409, "grad_norm": 0.13335563242435455, "learning_rate": 0.00014075053391640396, "loss": 1.0373, "step": 2919 }, { "epoch": 0.5936165887375483, "grad_norm": 0.12110266089439392, "learning_rate": 0.00014073019424387268, "loss": 0.9317, "step": 2920 }, { "epoch": 0.5938198820898557, "grad_norm": 0.11466968059539795, "learning_rate": 0.0001407098545713414, "loss": 0.922, "step": 2921 }, { "epoch": 0.594023175442163, "grad_norm": 0.13369932770729065, "learning_rate": 0.00014068951489881013, "loss": 1.0871, "step": 2922 }, { "epoch": 0.5942264687944704, "grad_norm": 0.12968046963214874, "learning_rate": 0.00014066917522627886, "loss": 0.971, "step": 2923 }, { "epoch": 0.5944297621467778, "grad_norm": 0.12824739515781403, "learning_rate": 0.00014064883555374758, "loss": 1.1196, "step": 2924 }, { "epoch": 0.5946330554990852, "grad_norm": 0.12155873328447342, "learning_rate": 0.0001406284958812163, "loss": 0.886, "step": 2925 }, { "epoch": 0.5948363488513926, "grad_norm": 0.12435124814510345, "learning_rate": 0.00014060815620868503, "loss": 1.0395, "step": 2926 }, { "epoch": 0.5950396422037, "grad_norm": 0.1359453648328781, "learning_rate": 0.00014058781653615378, "loss": 1.1477, "step": 2927 }, { "epoch": 0.5952429355560073, "grad_norm": 0.10797560214996338, "learning_rate": 0.0001405674768636225, "loss": 0.8632, "step": 2928 }, { "epoch": 0.5954462289083147, "grad_norm": 0.12806884944438934, "learning_rate": 0.00014054713719109123, "loss": 0.9678, "step": 2929 }, { "epoch": 0.5956495222606221, "grad_norm": 0.13405455648899078, "learning_rate": 0.00014052679751855996, "loss": 1.1721, "step": 2930 }, { "epoch": 0.5958528156129295, "grad_norm": 0.11106649786233902, "learning_rate": 0.00014050645784602868, "loss": 0.8613, "step": 2931 }, { "epoch": 0.5960561089652369, "grad_norm": 0.1545085906982422, "learning_rate": 0.0001404861181734974, "loss": 1.2441, "step": 2932 }, { "epoch": 0.5962594023175443, "grad_norm": 0.1290442794561386, "learning_rate": 0.00014046577850096613, "loss": 1.0544, "step": 2933 }, { "epoch": 0.5964626956698516, "grad_norm": 0.11824672669172287, "learning_rate": 0.00014044543882843485, "loss": 0.953, "step": 2934 }, { "epoch": 0.596665989022159, "grad_norm": 0.13066919147968292, "learning_rate": 0.0001404250991559036, "loss": 1.1097, "step": 2935 }, { "epoch": 0.5968692823744663, "grad_norm": 0.13196654617786407, "learning_rate": 0.00014040475948337233, "loss": 0.9026, "step": 2936 }, { "epoch": 0.5970725757267737, "grad_norm": 0.1255139410495758, "learning_rate": 0.00014038441981084105, "loss": 0.9072, "step": 2937 }, { "epoch": 0.5972758690790811, "grad_norm": 0.12293802946805954, "learning_rate": 0.00014036408013830978, "loss": 0.9046, "step": 2938 }, { "epoch": 0.5974791624313885, "grad_norm": 0.15161147713661194, "learning_rate": 0.0001403437404657785, "loss": 1.1338, "step": 2939 }, { "epoch": 0.5976824557836958, "grad_norm": 0.1161181703209877, "learning_rate": 0.00014032340079324723, "loss": 0.8716, "step": 2940 }, { "epoch": 0.5978857491360032, "grad_norm": 0.14142772555351257, "learning_rate": 0.00014030306112071595, "loss": 1.0611, "step": 2941 }, { "epoch": 0.5980890424883106, "grad_norm": 0.13781876862049103, "learning_rate": 0.00014028272144818468, "loss": 1.149, "step": 2942 }, { "epoch": 0.598292335840618, "grad_norm": 0.12525886297225952, "learning_rate": 0.00014026238177565343, "loss": 0.9765, "step": 2943 }, { "epoch": 0.5984956291929254, "grad_norm": 0.11980410665273666, "learning_rate": 0.00014024204210312215, "loss": 0.979, "step": 2944 }, { "epoch": 0.5986989225452328, "grad_norm": 0.14083100855350494, "learning_rate": 0.00014022170243059088, "loss": 1.1407, "step": 2945 }, { "epoch": 0.5989022158975401, "grad_norm": 0.12020063400268555, "learning_rate": 0.0001402013627580596, "loss": 0.9946, "step": 2946 }, { "epoch": 0.5991055092498475, "grad_norm": 0.13902409374713898, "learning_rate": 0.00014018102308552835, "loss": 1.0152, "step": 2947 }, { "epoch": 0.5993088026021549, "grad_norm": 0.12778332829475403, "learning_rate": 0.00014016068341299705, "loss": 1.0196, "step": 2948 }, { "epoch": 0.5995120959544623, "grad_norm": 0.12210957705974579, "learning_rate": 0.00014014034374046578, "loss": 1.136, "step": 2949 }, { "epoch": 0.5997153893067697, "grad_norm": 0.1324332356452942, "learning_rate": 0.0001401200040679345, "loss": 1.055, "step": 2950 }, { "epoch": 0.599918682659077, "grad_norm": 0.14248095452785492, "learning_rate": 0.00014009966439540325, "loss": 1.2809, "step": 2951 }, { "epoch": 0.6001219760113844, "grad_norm": 0.12518227100372314, "learning_rate": 0.00014007932472287198, "loss": 1.0029, "step": 2952 }, { "epoch": 0.6003252693636918, "grad_norm": 0.14796386659145355, "learning_rate": 0.0001400589850503407, "loss": 1.2305, "step": 2953 }, { "epoch": 0.6005285627159992, "grad_norm": 0.1290920078754425, "learning_rate": 0.00014003864537780942, "loss": 0.9874, "step": 2954 }, { "epoch": 0.6007318560683066, "grad_norm": 0.12988100945949554, "learning_rate": 0.00014001830570527815, "loss": 0.9438, "step": 2955 }, { "epoch": 0.600935149420614, "grad_norm": 0.12497319281101227, "learning_rate": 0.00013999796603274687, "loss": 0.8797, "step": 2956 }, { "epoch": 0.6011384427729213, "grad_norm": 0.1346983015537262, "learning_rate": 0.0001399776263602156, "loss": 1.0419, "step": 2957 }, { "epoch": 0.6013417361252287, "grad_norm": 0.11253220587968826, "learning_rate": 0.00013995728668768432, "loss": 0.9803, "step": 2958 }, { "epoch": 0.6015450294775361, "grad_norm": 0.1398647278547287, "learning_rate": 0.00013993694701515305, "loss": 1.1748, "step": 2959 }, { "epoch": 0.6017483228298435, "grad_norm": 0.14113448560237885, "learning_rate": 0.0001399166073426218, "loss": 1.1243, "step": 2960 }, { "epoch": 0.6019516161821509, "grad_norm": 0.1343860626220703, "learning_rate": 0.00013989626767009052, "loss": 1.1181, "step": 2961 }, { "epoch": 0.6021549095344583, "grad_norm": 0.13300351798534393, "learning_rate": 0.00013987592799755925, "loss": 1.108, "step": 2962 }, { "epoch": 0.6023582028867656, "grad_norm": 0.1379079967737198, "learning_rate": 0.00013985558832502797, "loss": 1.1145, "step": 2963 }, { "epoch": 0.602561496239073, "grad_norm": 0.13258612155914307, "learning_rate": 0.0001398352486524967, "loss": 1.0888, "step": 2964 }, { "epoch": 0.6027647895913804, "grad_norm": 0.1152709499001503, "learning_rate": 0.00013981490897996542, "loss": 0.8821, "step": 2965 }, { "epoch": 0.6029680829436878, "grad_norm": 0.14803390204906464, "learning_rate": 0.00013979456930743415, "loss": 1.1625, "step": 2966 }, { "epoch": 0.6031713762959952, "grad_norm": 0.12902309000492096, "learning_rate": 0.00013977422963490287, "loss": 1.0304, "step": 2967 }, { "epoch": 0.6033746696483026, "grad_norm": 0.1235414445400238, "learning_rate": 0.00013975388996237162, "loss": 1.0143, "step": 2968 }, { "epoch": 0.6035779630006098, "grad_norm": 0.1427546590566635, "learning_rate": 0.00013973355028984035, "loss": 1.2047, "step": 2969 }, { "epoch": 0.6037812563529172, "grad_norm": 0.1456848382949829, "learning_rate": 0.00013971321061730907, "loss": 1.0673, "step": 2970 }, { "epoch": 0.6039845497052246, "grad_norm": 0.13823378086090088, "learning_rate": 0.0001396928709447778, "loss": 0.9845, "step": 2971 }, { "epoch": 0.604187843057532, "grad_norm": 0.15567836165428162, "learning_rate": 0.00013967253127224652, "loss": 1.1844, "step": 2972 }, { "epoch": 0.6043911364098394, "grad_norm": 0.12227654457092285, "learning_rate": 0.00013965219159971524, "loss": 1.0443, "step": 2973 }, { "epoch": 0.6045944297621467, "grad_norm": 0.14952129125595093, "learning_rate": 0.00013963185192718397, "loss": 1.1285, "step": 2974 }, { "epoch": 0.6047977231144541, "grad_norm": 0.1279451698064804, "learning_rate": 0.0001396115122546527, "loss": 0.9304, "step": 2975 }, { "epoch": 0.6050010164667615, "grad_norm": 0.13317649066448212, "learning_rate": 0.00013959117258212144, "loss": 1.0864, "step": 2976 }, { "epoch": 0.6052043098190689, "grad_norm": 0.13362491130828857, "learning_rate": 0.00013957083290959017, "loss": 1.0547, "step": 2977 }, { "epoch": 0.6054076031713763, "grad_norm": 0.13469024002552032, "learning_rate": 0.0001395504932370589, "loss": 1.0941, "step": 2978 }, { "epoch": 0.6056108965236837, "grad_norm": 0.1265508234500885, "learning_rate": 0.00013953015356452762, "loss": 0.9379, "step": 2979 }, { "epoch": 0.605814189875991, "grad_norm": 0.1344381868839264, "learning_rate": 0.00013950981389199634, "loss": 1.1047, "step": 2980 }, { "epoch": 0.6060174832282984, "grad_norm": 0.13309423625469208, "learning_rate": 0.00013948947421946507, "loss": 1.0916, "step": 2981 }, { "epoch": 0.6062207765806058, "grad_norm": 0.1394202560186386, "learning_rate": 0.0001394691345469338, "loss": 1.0521, "step": 2982 }, { "epoch": 0.6064240699329132, "grad_norm": 0.12950794398784637, "learning_rate": 0.00013944879487440252, "loss": 1.0414, "step": 2983 }, { "epoch": 0.6066273632852206, "grad_norm": 0.15191194415092468, "learning_rate": 0.00013942845520187127, "loss": 1.1255, "step": 2984 }, { "epoch": 0.606830656637528, "grad_norm": 0.142736554145813, "learning_rate": 0.00013940811552934, "loss": 1.1004, "step": 2985 }, { "epoch": 0.6070339499898353, "grad_norm": 0.13812166452407837, "learning_rate": 0.00013938777585680872, "loss": 1.0688, "step": 2986 }, { "epoch": 0.6072372433421427, "grad_norm": 0.1332339346408844, "learning_rate": 0.00013936743618427744, "loss": 1.0214, "step": 2987 }, { "epoch": 0.6074405366944501, "grad_norm": 0.11382775753736496, "learning_rate": 0.00013934709651174616, "loss": 0.9555, "step": 2988 }, { "epoch": 0.6076438300467575, "grad_norm": 0.1476142406463623, "learning_rate": 0.0001393267568392149, "loss": 1.1355, "step": 2989 }, { "epoch": 0.6078471233990649, "grad_norm": 0.15201976895332336, "learning_rate": 0.00013930641716668361, "loss": 1.2253, "step": 2990 }, { "epoch": 0.6080504167513723, "grad_norm": 0.13920465111732483, "learning_rate": 0.00013928607749415234, "loss": 1.1671, "step": 2991 }, { "epoch": 0.6082537101036796, "grad_norm": 0.11285021156072617, "learning_rate": 0.0001392657378216211, "loss": 0.8908, "step": 2992 }, { "epoch": 0.608457003455987, "grad_norm": 0.14005322754383087, "learning_rate": 0.00013924539814908981, "loss": 1.0991, "step": 2993 }, { "epoch": 0.6086602968082944, "grad_norm": 0.14553718268871307, "learning_rate": 0.00013922505847655854, "loss": 1.1135, "step": 2994 }, { "epoch": 0.6088635901606018, "grad_norm": 0.1322544664144516, "learning_rate": 0.00013920471880402726, "loss": 1.0209, "step": 2995 }, { "epoch": 0.6090668835129092, "grad_norm": 0.12357106804847717, "learning_rate": 0.000139184379131496, "loss": 0.9475, "step": 2996 }, { "epoch": 0.6092701768652166, "grad_norm": 0.1173151507973671, "learning_rate": 0.0001391640394589647, "loss": 0.9806, "step": 2997 }, { "epoch": 0.6094734702175238, "grad_norm": 0.12091773003339767, "learning_rate": 0.00013914369978643344, "loss": 0.9308, "step": 2998 }, { "epoch": 0.6096767635698312, "grad_norm": 0.12371361255645752, "learning_rate": 0.00013912336011390216, "loss": 1.001, "step": 2999 }, { "epoch": 0.6098800569221386, "grad_norm": 0.11926256865262985, "learning_rate": 0.00013910302044137089, "loss": 0.9655, "step": 3000 }, { "epoch": 0.610083350274446, "grad_norm": 0.12953068315982819, "learning_rate": 0.00013908268076883964, "loss": 1.0736, "step": 3001 }, { "epoch": 0.6102866436267534, "grad_norm": 0.12367159873247147, "learning_rate": 0.00013906234109630836, "loss": 1.0428, "step": 3002 }, { "epoch": 0.6104899369790607, "grad_norm": 0.12180911749601364, "learning_rate": 0.00013904200142377709, "loss": 0.9661, "step": 3003 }, { "epoch": 0.6106932303313681, "grad_norm": 0.13220947980880737, "learning_rate": 0.0001390216617512458, "loss": 0.8966, "step": 3004 }, { "epoch": 0.6108965236836755, "grad_norm": 0.15283820033073425, "learning_rate": 0.00013900132207871453, "loss": 1.2692, "step": 3005 }, { "epoch": 0.6110998170359829, "grad_norm": 0.1325535923242569, "learning_rate": 0.00013898098240618326, "loss": 0.9849, "step": 3006 }, { "epoch": 0.6113031103882903, "grad_norm": 0.14417356252670288, "learning_rate": 0.00013896064273365198, "loss": 1.2099, "step": 3007 }, { "epoch": 0.6115064037405977, "grad_norm": 0.1250670701265335, "learning_rate": 0.0001389403030611207, "loss": 0.9963, "step": 3008 }, { "epoch": 0.611709697092905, "grad_norm": 0.1299847513437271, "learning_rate": 0.00013891996338858946, "loss": 1.0131, "step": 3009 }, { "epoch": 0.6119129904452124, "grad_norm": 0.13631494343280792, "learning_rate": 0.00013889962371605818, "loss": 1.106, "step": 3010 }, { "epoch": 0.6121162837975198, "grad_norm": 0.12008505314588547, "learning_rate": 0.0001388792840435269, "loss": 0.9617, "step": 3011 }, { "epoch": 0.6123195771498272, "grad_norm": 0.13346195220947266, "learning_rate": 0.00013885894437099563, "loss": 1.1796, "step": 3012 }, { "epoch": 0.6125228705021346, "grad_norm": 0.12372852861881256, "learning_rate": 0.00013883860469846436, "loss": 0.8494, "step": 3013 }, { "epoch": 0.612726163854442, "grad_norm": 0.14432121813297272, "learning_rate": 0.00013881826502593308, "loss": 1.1595, "step": 3014 }, { "epoch": 0.6129294572067493, "grad_norm": 0.13419228792190552, "learning_rate": 0.0001387979253534018, "loss": 1.0886, "step": 3015 }, { "epoch": 0.6131327505590567, "grad_norm": 0.1301155984401703, "learning_rate": 0.00013877758568087053, "loss": 0.9456, "step": 3016 }, { "epoch": 0.6133360439113641, "grad_norm": 0.11167372018098831, "learning_rate": 0.00013875724600833928, "loss": 0.9077, "step": 3017 }, { "epoch": 0.6135393372636715, "grad_norm": 0.11222781985998154, "learning_rate": 0.000138736906335808, "loss": 0.8517, "step": 3018 }, { "epoch": 0.6137426306159789, "grad_norm": 0.1456783413887024, "learning_rate": 0.00013871656666327673, "loss": 1.1327, "step": 3019 }, { "epoch": 0.6139459239682863, "grad_norm": 0.12238568812608719, "learning_rate": 0.00013869622699074546, "loss": 1.0502, "step": 3020 }, { "epoch": 0.6141492173205936, "grad_norm": 0.1362997442483902, "learning_rate": 0.00013867588731821418, "loss": 0.9795, "step": 3021 }, { "epoch": 0.614352510672901, "grad_norm": 0.12421485036611557, "learning_rate": 0.0001386555476456829, "loss": 0.9604, "step": 3022 }, { "epoch": 0.6145558040252084, "grad_norm": 0.11413677036762238, "learning_rate": 0.00013863520797315163, "loss": 1.0068, "step": 3023 }, { "epoch": 0.6147590973775158, "grad_norm": 0.1455029845237732, "learning_rate": 0.00013861486830062035, "loss": 1.2662, "step": 3024 }, { "epoch": 0.6149623907298232, "grad_norm": 0.12818849086761475, "learning_rate": 0.0001385945286280891, "loss": 0.9195, "step": 3025 }, { "epoch": 0.6151656840821305, "grad_norm": 0.1426313817501068, "learning_rate": 0.00013857418895555783, "loss": 1.2388, "step": 3026 }, { "epoch": 0.6153689774344379, "grad_norm": 0.14491280913352966, "learning_rate": 0.00013855384928302655, "loss": 1.1537, "step": 3027 }, { "epoch": 0.6155722707867453, "grad_norm": 0.11689125746488571, "learning_rate": 0.00013853350961049528, "loss": 0.9963, "step": 3028 }, { "epoch": 0.6157755641390527, "grad_norm": 0.1245650127530098, "learning_rate": 0.000138513169937964, "loss": 0.9193, "step": 3029 }, { "epoch": 0.61597885749136, "grad_norm": 0.14654415845870972, "learning_rate": 0.00013849283026543273, "loss": 1.271, "step": 3030 }, { "epoch": 0.6161821508436675, "grad_norm": 0.13708455860614777, "learning_rate": 0.00013847249059290145, "loss": 1.1169, "step": 3031 }, { "epoch": 0.6163854441959747, "grad_norm": 0.13598188757896423, "learning_rate": 0.00013845215092037018, "loss": 1.1565, "step": 3032 }, { "epoch": 0.6165887375482821, "grad_norm": 0.13055184483528137, "learning_rate": 0.00013843181124783893, "loss": 0.9636, "step": 3033 }, { "epoch": 0.6167920309005895, "grad_norm": 0.12075616419315338, "learning_rate": 0.00013841147157530765, "loss": 1.0404, "step": 3034 }, { "epoch": 0.6169953242528969, "grad_norm": 0.12068097293376923, "learning_rate": 0.00013839113190277638, "loss": 0.9913, "step": 3035 }, { "epoch": 0.6171986176052043, "grad_norm": 0.13300339877605438, "learning_rate": 0.0001383707922302451, "loss": 1.1589, "step": 3036 }, { "epoch": 0.6174019109575117, "grad_norm": 0.13343989849090576, "learning_rate": 0.00013835045255771383, "loss": 1.1602, "step": 3037 }, { "epoch": 0.617605204309819, "grad_norm": 0.13028277456760406, "learning_rate": 0.00013833011288518255, "loss": 1.0985, "step": 3038 }, { "epoch": 0.6178084976621264, "grad_norm": 0.1125851422548294, "learning_rate": 0.00013830977321265128, "loss": 1.0002, "step": 3039 }, { "epoch": 0.6180117910144338, "grad_norm": 0.12342289090156555, "learning_rate": 0.00013828943354012, "loss": 1.0013, "step": 3040 }, { "epoch": 0.6182150843667412, "grad_norm": 0.12776073813438416, "learning_rate": 0.00013826909386758872, "loss": 0.9747, "step": 3041 }, { "epoch": 0.6184183777190486, "grad_norm": 0.12842942774295807, "learning_rate": 0.00013824875419505748, "loss": 0.9877, "step": 3042 }, { "epoch": 0.618621671071356, "grad_norm": 0.13102072477340698, "learning_rate": 0.0001382284145225262, "loss": 0.9636, "step": 3043 }, { "epoch": 0.6188249644236633, "grad_norm": 0.12905801832675934, "learning_rate": 0.00013820807484999492, "loss": 1.1595, "step": 3044 }, { "epoch": 0.6190282577759707, "grad_norm": 0.1274825781583786, "learning_rate": 0.00013818773517746365, "loss": 0.9346, "step": 3045 }, { "epoch": 0.6192315511282781, "grad_norm": 0.1235279068350792, "learning_rate": 0.00013816739550493237, "loss": 0.9774, "step": 3046 }, { "epoch": 0.6194348444805855, "grad_norm": 0.13355652987957, "learning_rate": 0.0001381470558324011, "loss": 1.1034, "step": 3047 }, { "epoch": 0.6196381378328929, "grad_norm": 0.12585759162902832, "learning_rate": 0.00013812671615986982, "loss": 1.0536, "step": 3048 }, { "epoch": 0.6198414311852003, "grad_norm": 0.12993231415748596, "learning_rate": 0.00013810637648733855, "loss": 1.2062, "step": 3049 }, { "epoch": 0.6200447245375076, "grad_norm": 0.1431044191122055, "learning_rate": 0.0001380860368148073, "loss": 1.1605, "step": 3050 }, { "epoch": 0.620248017889815, "grad_norm": 0.134634330868721, "learning_rate": 0.00013806569714227602, "loss": 1.101, "step": 3051 }, { "epoch": 0.6204513112421224, "grad_norm": 0.126140758395195, "learning_rate": 0.00013804535746974475, "loss": 1.0472, "step": 3052 }, { "epoch": 0.6206546045944298, "grad_norm": 0.1231079027056694, "learning_rate": 0.00013802501779721347, "loss": 0.9879, "step": 3053 }, { "epoch": 0.6208578979467372, "grad_norm": 0.12733492255210876, "learning_rate": 0.0001380046781246822, "loss": 1.0918, "step": 3054 }, { "epoch": 0.6210611912990445, "grad_norm": 0.14148791134357452, "learning_rate": 0.00013798433845215092, "loss": 1.145, "step": 3055 }, { "epoch": 0.6212644846513519, "grad_norm": 0.13087992370128632, "learning_rate": 0.00013796399877961965, "loss": 1.1101, "step": 3056 }, { "epoch": 0.6214677780036593, "grad_norm": 0.14443303644657135, "learning_rate": 0.00013794365910708837, "loss": 1.0992, "step": 3057 }, { "epoch": 0.6216710713559667, "grad_norm": 0.13422155380249023, "learning_rate": 0.00013792331943455712, "loss": 1.2044, "step": 3058 }, { "epoch": 0.6218743647082741, "grad_norm": 0.13146667182445526, "learning_rate": 0.00013790297976202585, "loss": 1.0769, "step": 3059 }, { "epoch": 0.6220776580605815, "grad_norm": 0.12982682883739471, "learning_rate": 0.00013788264008949457, "loss": 1.1232, "step": 3060 }, { "epoch": 0.6222809514128887, "grad_norm": 0.13256913423538208, "learning_rate": 0.0001378623004169633, "loss": 0.9969, "step": 3061 }, { "epoch": 0.6224842447651961, "grad_norm": 0.11935515701770782, "learning_rate": 0.00013784196074443202, "loss": 1.0282, "step": 3062 }, { "epoch": 0.6226875381175035, "grad_norm": 0.14199033379554749, "learning_rate": 0.00013782162107190074, "loss": 1.1328, "step": 3063 }, { "epoch": 0.622890831469811, "grad_norm": 0.12896639108657837, "learning_rate": 0.00013780128139936947, "loss": 1.1422, "step": 3064 }, { "epoch": 0.6230941248221183, "grad_norm": 0.12972599267959595, "learning_rate": 0.0001377809417268382, "loss": 1.0686, "step": 3065 }, { "epoch": 0.6232974181744257, "grad_norm": 0.14466549456119537, "learning_rate": 0.00013776060205430694, "loss": 1.3486, "step": 3066 }, { "epoch": 0.623500711526733, "grad_norm": 0.129892960190773, "learning_rate": 0.00013774026238177567, "loss": 0.9945, "step": 3067 }, { "epoch": 0.6237040048790404, "grad_norm": 0.1326766312122345, "learning_rate": 0.0001377199227092444, "loss": 1.0583, "step": 3068 }, { "epoch": 0.6239072982313478, "grad_norm": 0.14068090915679932, "learning_rate": 0.00013769958303671312, "loss": 1.1597, "step": 3069 }, { "epoch": 0.6241105915836552, "grad_norm": 0.12544094026088715, "learning_rate": 0.00013767924336418184, "loss": 0.9624, "step": 3070 }, { "epoch": 0.6243138849359626, "grad_norm": 0.13259856402873993, "learning_rate": 0.00013765890369165057, "loss": 1.0218, "step": 3071 }, { "epoch": 0.62451717828827, "grad_norm": 0.13529850542545319, "learning_rate": 0.0001376385640191193, "loss": 1.1063, "step": 3072 }, { "epoch": 0.6247204716405773, "grad_norm": 0.1389310508966446, "learning_rate": 0.00013761822434658802, "loss": 1.067, "step": 3073 }, { "epoch": 0.6249237649928847, "grad_norm": 0.1326620876789093, "learning_rate": 0.00013759788467405677, "loss": 1.1228, "step": 3074 }, { "epoch": 0.6251270583451921, "grad_norm": 0.1371268332004547, "learning_rate": 0.0001375775450015255, "loss": 1.056, "step": 3075 }, { "epoch": 0.6253303516974995, "grad_norm": 0.15050175786018372, "learning_rate": 0.00013755720532899422, "loss": 1.1679, "step": 3076 }, { "epoch": 0.6255336450498069, "grad_norm": 0.14462800323963165, "learning_rate": 0.00013753686565646294, "loss": 1.1155, "step": 3077 }, { "epoch": 0.6257369384021142, "grad_norm": 0.12994062900543213, "learning_rate": 0.00013751652598393166, "loss": 1.0954, "step": 3078 }, { "epoch": 0.6259402317544216, "grad_norm": 0.12979595363140106, "learning_rate": 0.0001374961863114004, "loss": 1.0912, "step": 3079 }, { "epoch": 0.626143525106729, "grad_norm": 0.12296707928180695, "learning_rate": 0.0001374758466388691, "loss": 0.9646, "step": 3080 }, { "epoch": 0.6263468184590364, "grad_norm": 0.14658544957637787, "learning_rate": 0.00013745550696633784, "loss": 1.1149, "step": 3081 }, { "epoch": 0.6265501118113438, "grad_norm": 0.12885436415672302, "learning_rate": 0.00013743516729380656, "loss": 1.0285, "step": 3082 }, { "epoch": 0.6267534051636512, "grad_norm": 0.13237449526786804, "learning_rate": 0.00013741482762127531, "loss": 1.1388, "step": 3083 }, { "epoch": 0.6269566985159585, "grad_norm": 0.11667048186063766, "learning_rate": 0.00013739448794874404, "loss": 0.9625, "step": 3084 }, { "epoch": 0.6271599918682659, "grad_norm": 0.09962797164916992, "learning_rate": 0.00013737414827621276, "loss": 0.8283, "step": 3085 }, { "epoch": 0.6273632852205733, "grad_norm": 0.11563806235790253, "learning_rate": 0.0001373538086036815, "loss": 0.9379, "step": 3086 }, { "epoch": 0.6275665785728807, "grad_norm": 0.14020705223083496, "learning_rate": 0.0001373334689311502, "loss": 1.0649, "step": 3087 }, { "epoch": 0.6277698719251881, "grad_norm": 0.1255711168050766, "learning_rate": 0.00013731312925861894, "loss": 1.0555, "step": 3088 }, { "epoch": 0.6279731652774955, "grad_norm": 0.1265256702899933, "learning_rate": 0.00013729278958608766, "loss": 0.9377, "step": 3089 }, { "epoch": 0.6281764586298028, "grad_norm": 0.13861151039600372, "learning_rate": 0.00013727244991355639, "loss": 1.2492, "step": 3090 }, { "epoch": 0.6283797519821102, "grad_norm": 0.1353643387556076, "learning_rate": 0.00013725211024102514, "loss": 1.0395, "step": 3091 }, { "epoch": 0.6285830453344176, "grad_norm": 0.14273463189601898, "learning_rate": 0.00013723177056849386, "loss": 1.2017, "step": 3092 }, { "epoch": 0.628786338686725, "grad_norm": 0.12992137670516968, "learning_rate": 0.00013721143089596259, "loss": 1.1135, "step": 3093 }, { "epoch": 0.6289896320390324, "grad_norm": 0.13525742292404175, "learning_rate": 0.0001371910912234313, "loss": 1.1695, "step": 3094 }, { "epoch": 0.6291929253913398, "grad_norm": 0.12449081242084503, "learning_rate": 0.00013717075155090003, "loss": 1.0187, "step": 3095 }, { "epoch": 0.629396218743647, "grad_norm": 0.12699362635612488, "learning_rate": 0.00013715041187836876, "loss": 1.0876, "step": 3096 }, { "epoch": 0.6295995120959544, "grad_norm": 0.12526580691337585, "learning_rate": 0.00013713007220583748, "loss": 1.0352, "step": 3097 }, { "epoch": 0.6298028054482618, "grad_norm": 0.1089174896478653, "learning_rate": 0.0001371097325333062, "loss": 0.9695, "step": 3098 }, { "epoch": 0.6300060988005692, "grad_norm": 0.1343061774969101, "learning_rate": 0.00013708939286077496, "loss": 1.0601, "step": 3099 }, { "epoch": 0.6302093921528766, "grad_norm": 0.14272217452526093, "learning_rate": 0.00013706905318824368, "loss": 1.1642, "step": 3100 }, { "epoch": 0.630412685505184, "grad_norm": 0.14062613248825073, "learning_rate": 0.0001370487135157124, "loss": 1.1965, "step": 3101 }, { "epoch": 0.6306159788574913, "grad_norm": 0.12888343632221222, "learning_rate": 0.00013702837384318113, "loss": 0.9552, "step": 3102 }, { "epoch": 0.6308192722097987, "grad_norm": 0.1350019872188568, "learning_rate": 0.00013700803417064986, "loss": 1.1513, "step": 3103 }, { "epoch": 0.6310225655621061, "grad_norm": 0.12076770514249802, "learning_rate": 0.00013698769449811858, "loss": 0.9807, "step": 3104 }, { "epoch": 0.6312258589144135, "grad_norm": 0.12005645036697388, "learning_rate": 0.0001369673548255873, "loss": 0.9309, "step": 3105 }, { "epoch": 0.6314291522667209, "grad_norm": 0.13432009518146515, "learning_rate": 0.00013694701515305603, "loss": 1.0728, "step": 3106 }, { "epoch": 0.6316324456190282, "grad_norm": 0.14083653688430786, "learning_rate": 0.00013692667548052478, "loss": 0.9339, "step": 3107 }, { "epoch": 0.6318357389713356, "grad_norm": 0.12383510172367096, "learning_rate": 0.0001369063358079935, "loss": 0.9525, "step": 3108 }, { "epoch": 0.632039032323643, "grad_norm": 0.12858064472675323, "learning_rate": 0.00013688599613546223, "loss": 1.1277, "step": 3109 }, { "epoch": 0.6322423256759504, "grad_norm": 0.1366434544324875, "learning_rate": 0.00013686565646293096, "loss": 1.0272, "step": 3110 }, { "epoch": 0.6324456190282578, "grad_norm": 0.12631452083587646, "learning_rate": 0.00013684531679039968, "loss": 1.0147, "step": 3111 }, { "epoch": 0.6326489123805652, "grad_norm": 0.1388847827911377, "learning_rate": 0.0001368249771178684, "loss": 1.1982, "step": 3112 }, { "epoch": 0.6328522057328725, "grad_norm": 0.1357526183128357, "learning_rate": 0.00013680463744533713, "loss": 1.1804, "step": 3113 }, { "epoch": 0.6330554990851799, "grad_norm": 0.1273118555545807, "learning_rate": 0.00013678429777280585, "loss": 1.0268, "step": 3114 }, { "epoch": 0.6332587924374873, "grad_norm": 0.13186684250831604, "learning_rate": 0.0001367639581002746, "loss": 1.135, "step": 3115 }, { "epoch": 0.6334620857897947, "grad_norm": 0.1217605397105217, "learning_rate": 0.00013674361842774333, "loss": 1.0364, "step": 3116 }, { "epoch": 0.6336653791421021, "grad_norm": 0.11785151809453964, "learning_rate": 0.00013672327875521205, "loss": 0.9248, "step": 3117 }, { "epoch": 0.6338686724944095, "grad_norm": 0.12986084818840027, "learning_rate": 0.00013670293908268078, "loss": 1.0988, "step": 3118 }, { "epoch": 0.6340719658467168, "grad_norm": 0.14195957779884338, "learning_rate": 0.0001366825994101495, "loss": 1.2182, "step": 3119 }, { "epoch": 0.6342752591990242, "grad_norm": 0.12939682602882385, "learning_rate": 0.00013666225973761823, "loss": 1.0573, "step": 3120 }, { "epoch": 0.6344785525513316, "grad_norm": 0.12343540787696838, "learning_rate": 0.00013664192006508695, "loss": 1.0057, "step": 3121 }, { "epoch": 0.634681845903639, "grad_norm": 0.12308801710605621, "learning_rate": 0.00013662158039255568, "loss": 0.9345, "step": 3122 }, { "epoch": 0.6348851392559464, "grad_norm": 0.13453471660614014, "learning_rate": 0.0001366012407200244, "loss": 1.1593, "step": 3123 }, { "epoch": 0.6350884326082538, "grad_norm": 0.14599518477916718, "learning_rate": 0.00013658090104749315, "loss": 1.19, "step": 3124 }, { "epoch": 0.635291725960561, "grad_norm": 0.13644537329673767, "learning_rate": 0.00013656056137496188, "loss": 1.1541, "step": 3125 }, { "epoch": 0.6354950193128684, "grad_norm": 0.1313880980014801, "learning_rate": 0.0001365402217024306, "loss": 1.0607, "step": 3126 }, { "epoch": 0.6356983126651758, "grad_norm": 0.12381511926651001, "learning_rate": 0.00013651988202989933, "loss": 1.0003, "step": 3127 }, { "epoch": 0.6359016060174832, "grad_norm": 0.1361168473958969, "learning_rate": 0.00013649954235736805, "loss": 0.9445, "step": 3128 }, { "epoch": 0.6361048993697906, "grad_norm": 0.13545829057693481, "learning_rate": 0.00013647920268483677, "loss": 1.046, "step": 3129 }, { "epoch": 0.636308192722098, "grad_norm": 0.1335272639989853, "learning_rate": 0.0001364588630123055, "loss": 1.0611, "step": 3130 }, { "epoch": 0.6365114860744053, "grad_norm": 0.13092759251594543, "learning_rate": 0.00013643852333977422, "loss": 0.991, "step": 3131 }, { "epoch": 0.6367147794267127, "grad_norm": 0.1328737437725067, "learning_rate": 0.00013641818366724298, "loss": 1.2151, "step": 3132 }, { "epoch": 0.6369180727790201, "grad_norm": 0.13247033953666687, "learning_rate": 0.0001363978439947117, "loss": 1.0918, "step": 3133 }, { "epoch": 0.6371213661313275, "grad_norm": 0.1463424563407898, "learning_rate": 0.00013637750432218042, "loss": 1.0753, "step": 3134 }, { "epoch": 0.6373246594836349, "grad_norm": 0.13314956426620483, "learning_rate": 0.00013635716464964915, "loss": 1.1224, "step": 3135 }, { "epoch": 0.6375279528359422, "grad_norm": 0.12841732800006866, "learning_rate": 0.00013633682497711787, "loss": 0.9049, "step": 3136 }, { "epoch": 0.6377312461882496, "grad_norm": 0.1303834468126297, "learning_rate": 0.0001363164853045866, "loss": 1.1208, "step": 3137 }, { "epoch": 0.637934539540557, "grad_norm": 0.1288985162973404, "learning_rate": 0.00013629614563205532, "loss": 1.0416, "step": 3138 }, { "epoch": 0.6381378328928644, "grad_norm": 0.13632969558238983, "learning_rate": 0.00013627580595952405, "loss": 1.1293, "step": 3139 }, { "epoch": 0.6383411262451718, "grad_norm": 0.12471256405115128, "learning_rate": 0.0001362554662869928, "loss": 1.1095, "step": 3140 }, { "epoch": 0.6385444195974792, "grad_norm": 0.13156485557556152, "learning_rate": 0.00013623512661446152, "loss": 1.0952, "step": 3141 }, { "epoch": 0.6387477129497865, "grad_norm": 0.13472090661525726, "learning_rate": 0.00013621478694193025, "loss": 1.0665, "step": 3142 }, { "epoch": 0.6389510063020939, "grad_norm": 0.1464674472808838, "learning_rate": 0.00013619444726939897, "loss": 1.3084, "step": 3143 }, { "epoch": 0.6391542996544013, "grad_norm": 0.13103194534778595, "learning_rate": 0.0001361741075968677, "loss": 1.0512, "step": 3144 }, { "epoch": 0.6393575930067087, "grad_norm": 0.13378995656967163, "learning_rate": 0.00013615376792433642, "loss": 1.0625, "step": 3145 }, { "epoch": 0.6395608863590161, "grad_norm": 0.13924111425876617, "learning_rate": 0.00013613342825180514, "loss": 1.1813, "step": 3146 }, { "epoch": 0.6397641797113235, "grad_norm": 0.13989883661270142, "learning_rate": 0.00013611308857927387, "loss": 1.0973, "step": 3147 }, { "epoch": 0.6399674730636308, "grad_norm": 0.12374843657016754, "learning_rate": 0.00013609274890674262, "loss": 0.9683, "step": 3148 }, { "epoch": 0.6401707664159382, "grad_norm": 0.14824433624744415, "learning_rate": 0.00013607240923421135, "loss": 1.1632, "step": 3149 }, { "epoch": 0.6403740597682456, "grad_norm": 0.13298064470291138, "learning_rate": 0.00013605206956168007, "loss": 1.0621, "step": 3150 }, { "epoch": 0.640577353120553, "grad_norm": 0.13271810114383698, "learning_rate": 0.0001360317298891488, "loss": 1.1239, "step": 3151 }, { "epoch": 0.6407806464728604, "grad_norm": 0.12920920550823212, "learning_rate": 0.00013601139021661752, "loss": 1.0332, "step": 3152 }, { "epoch": 0.6409839398251678, "grad_norm": 0.12078989297151566, "learning_rate": 0.00013599105054408624, "loss": 0.9747, "step": 3153 }, { "epoch": 0.6411872331774751, "grad_norm": 0.1309296190738678, "learning_rate": 0.00013597071087155497, "loss": 1.0871, "step": 3154 }, { "epoch": 0.6413905265297825, "grad_norm": 0.13290594518184662, "learning_rate": 0.0001359503711990237, "loss": 0.9874, "step": 3155 }, { "epoch": 0.6415938198820899, "grad_norm": 0.12248789519071579, "learning_rate": 0.00013593003152649244, "loss": 1.0674, "step": 3156 }, { "epoch": 0.6417971132343973, "grad_norm": 0.13262233138084412, "learning_rate": 0.00013590969185396117, "loss": 1.0918, "step": 3157 }, { "epoch": 0.6420004065867047, "grad_norm": 0.11638560891151428, "learning_rate": 0.0001358893521814299, "loss": 0.94, "step": 3158 }, { "epoch": 0.6422036999390119, "grad_norm": 0.13623739778995514, "learning_rate": 0.00013586901250889862, "loss": 0.9925, "step": 3159 }, { "epoch": 0.6424069932913193, "grad_norm": 0.13399013876914978, "learning_rate": 0.00013584867283636734, "loss": 0.9553, "step": 3160 }, { "epoch": 0.6426102866436267, "grad_norm": 0.12274351716041565, "learning_rate": 0.00013582833316383607, "loss": 1.0406, "step": 3161 }, { "epoch": 0.6428135799959341, "grad_norm": 0.13038837909698486, "learning_rate": 0.0001358079934913048, "loss": 1.1596, "step": 3162 }, { "epoch": 0.6430168733482415, "grad_norm": 0.13271398842334747, "learning_rate": 0.00013578765381877351, "loss": 1.0792, "step": 3163 }, { "epoch": 0.6432201667005489, "grad_norm": 0.1319563090801239, "learning_rate": 0.00013576731414624224, "loss": 1.1321, "step": 3164 }, { "epoch": 0.6434234600528562, "grad_norm": 0.13448521494865417, "learning_rate": 0.000135746974473711, "loss": 1.0278, "step": 3165 }, { "epoch": 0.6436267534051636, "grad_norm": 0.1246679350733757, "learning_rate": 0.00013572663480117972, "loss": 0.9818, "step": 3166 }, { "epoch": 0.643830046757471, "grad_norm": 0.11772032827138901, "learning_rate": 0.00013570629512864844, "loss": 0.9398, "step": 3167 }, { "epoch": 0.6440333401097784, "grad_norm": 0.12182223796844482, "learning_rate": 0.00013568595545611716, "loss": 0.9794, "step": 3168 }, { "epoch": 0.6442366334620858, "grad_norm": 0.11464784294366837, "learning_rate": 0.0001356656157835859, "loss": 0.9508, "step": 3169 }, { "epoch": 0.6444399268143932, "grad_norm": 0.12462913244962692, "learning_rate": 0.0001356452761110546, "loss": 1.0784, "step": 3170 }, { "epoch": 0.6446432201667005, "grad_norm": 0.14886057376861572, "learning_rate": 0.00013562493643852334, "loss": 1.1937, "step": 3171 }, { "epoch": 0.6448465135190079, "grad_norm": 0.12092513591051102, "learning_rate": 0.00013560459676599206, "loss": 0.9333, "step": 3172 }, { "epoch": 0.6450498068713153, "grad_norm": 0.13768193125724792, "learning_rate": 0.00013558425709346081, "loss": 1.0512, "step": 3173 }, { "epoch": 0.6452531002236227, "grad_norm": 0.13496732711791992, "learning_rate": 0.00013556391742092954, "loss": 1.0672, "step": 3174 }, { "epoch": 0.6454563935759301, "grad_norm": 0.1316104531288147, "learning_rate": 0.00013554357774839826, "loss": 1.0944, "step": 3175 }, { "epoch": 0.6456596869282375, "grad_norm": 0.12093289196491241, "learning_rate": 0.000135523238075867, "loss": 0.8777, "step": 3176 }, { "epoch": 0.6458629802805448, "grad_norm": 0.12371384352445602, "learning_rate": 0.0001355028984033357, "loss": 0.8997, "step": 3177 }, { "epoch": 0.6460662736328522, "grad_norm": 0.13598783314228058, "learning_rate": 0.00013548255873080444, "loss": 1.1167, "step": 3178 }, { "epoch": 0.6462695669851596, "grad_norm": 0.1385606974363327, "learning_rate": 0.00013546221905827316, "loss": 1.032, "step": 3179 }, { "epoch": 0.646472860337467, "grad_norm": 0.12756818532943726, "learning_rate": 0.00013544187938574188, "loss": 1.0912, "step": 3180 }, { "epoch": 0.6466761536897744, "grad_norm": 0.12240833789110184, "learning_rate": 0.00013542153971321064, "loss": 1.0065, "step": 3181 }, { "epoch": 0.6468794470420818, "grad_norm": 0.14103402197360992, "learning_rate": 0.00013540120004067936, "loss": 1.1357, "step": 3182 }, { "epoch": 0.6470827403943891, "grad_norm": 0.12278808653354645, "learning_rate": 0.00013538086036814809, "loss": 1.0462, "step": 3183 }, { "epoch": 0.6472860337466965, "grad_norm": 0.13968375325202942, "learning_rate": 0.0001353605206956168, "loss": 1.1164, "step": 3184 }, { "epoch": 0.6474893270990039, "grad_norm": 0.12311102449893951, "learning_rate": 0.00013534018102308553, "loss": 1.0793, "step": 3185 }, { "epoch": 0.6476926204513113, "grad_norm": 0.1307074874639511, "learning_rate": 0.00013531984135055426, "loss": 1.0874, "step": 3186 }, { "epoch": 0.6478959138036187, "grad_norm": 0.1303715705871582, "learning_rate": 0.00013529950167802298, "loss": 1.1111, "step": 3187 }, { "epoch": 0.648099207155926, "grad_norm": 0.13313518464565277, "learning_rate": 0.0001352791620054917, "loss": 0.9861, "step": 3188 }, { "epoch": 0.6483025005082333, "grad_norm": 0.13007265329360962, "learning_rate": 0.00013525882233296046, "loss": 0.9644, "step": 3189 }, { "epoch": 0.6485057938605407, "grad_norm": 0.14151926338672638, "learning_rate": 0.00013523848266042918, "loss": 1.1387, "step": 3190 }, { "epoch": 0.6487090872128481, "grad_norm": 0.13587616384029388, "learning_rate": 0.0001352181429878979, "loss": 1.0802, "step": 3191 }, { "epoch": 0.6489123805651555, "grad_norm": 0.14267796277999878, "learning_rate": 0.00013519780331536663, "loss": 1.1885, "step": 3192 }, { "epoch": 0.6491156739174629, "grad_norm": 0.11519461125135422, "learning_rate": 0.00013517746364283536, "loss": 0.9333, "step": 3193 }, { "epoch": 0.6493189672697702, "grad_norm": 0.14246360957622528, "learning_rate": 0.00013515712397030408, "loss": 1.1757, "step": 3194 }, { "epoch": 0.6495222606220776, "grad_norm": 0.14482155442237854, "learning_rate": 0.0001351367842977728, "loss": 1.097, "step": 3195 }, { "epoch": 0.649725553974385, "grad_norm": 0.1291578710079193, "learning_rate": 0.00013511644462524153, "loss": 0.9938, "step": 3196 }, { "epoch": 0.6499288473266924, "grad_norm": 0.13155002892017365, "learning_rate": 0.00013509610495271028, "loss": 1.0634, "step": 3197 }, { "epoch": 0.6501321406789998, "grad_norm": 0.1477162092924118, "learning_rate": 0.000135075765280179, "loss": 1.0995, "step": 3198 }, { "epoch": 0.6503354340313072, "grad_norm": 0.12841352820396423, "learning_rate": 0.00013505542560764773, "loss": 1.1185, "step": 3199 }, { "epoch": 0.6505387273836145, "grad_norm": 0.13000524044036865, "learning_rate": 0.00013503508593511646, "loss": 1.2428, "step": 3200 }, { "epoch": 0.6507420207359219, "grad_norm": 0.1218332052230835, "learning_rate": 0.00013501474626258518, "loss": 1.026, "step": 3201 }, { "epoch": 0.6509453140882293, "grad_norm": 0.12599121034145355, "learning_rate": 0.0001349944065900539, "loss": 1.0013, "step": 3202 }, { "epoch": 0.6511486074405367, "grad_norm": 0.15027253329753876, "learning_rate": 0.00013497406691752263, "loss": 1.2587, "step": 3203 }, { "epoch": 0.6513519007928441, "grad_norm": 0.12841476500034332, "learning_rate": 0.00013495372724499135, "loss": 0.9977, "step": 3204 }, { "epoch": 0.6515551941451515, "grad_norm": 0.13236485421657562, "learning_rate": 0.00013493338757246008, "loss": 1.0202, "step": 3205 }, { "epoch": 0.6517584874974588, "grad_norm": 0.1356945037841797, "learning_rate": 0.00013491304789992883, "loss": 1.1599, "step": 3206 }, { "epoch": 0.6519617808497662, "grad_norm": 0.13879364728927612, "learning_rate": 0.00013489270822739755, "loss": 1.0116, "step": 3207 }, { "epoch": 0.6521650742020736, "grad_norm": 0.15575814247131348, "learning_rate": 0.00013487236855486628, "loss": 1.0991, "step": 3208 }, { "epoch": 0.652368367554381, "grad_norm": 0.11463279277086258, "learning_rate": 0.000134852028882335, "loss": 0.8655, "step": 3209 }, { "epoch": 0.6525716609066884, "grad_norm": 0.1258864849805832, "learning_rate": 0.00013483168920980373, "loss": 0.9616, "step": 3210 }, { "epoch": 0.6527749542589957, "grad_norm": 0.13992567360401154, "learning_rate": 0.00013481134953727245, "loss": 1.078, "step": 3211 }, { "epoch": 0.6529782476113031, "grad_norm": 0.14613211154937744, "learning_rate": 0.00013479100986474118, "loss": 1.0828, "step": 3212 }, { "epoch": 0.6531815409636105, "grad_norm": 0.11749006807804108, "learning_rate": 0.0001347706701922099, "loss": 0.8586, "step": 3213 }, { "epoch": 0.6533848343159179, "grad_norm": 0.13639944791793823, "learning_rate": 0.00013475033051967865, "loss": 1.0437, "step": 3214 }, { "epoch": 0.6535881276682253, "grad_norm": 0.12744362652301788, "learning_rate": 0.00013472999084714738, "loss": 1.1059, "step": 3215 }, { "epoch": 0.6537914210205327, "grad_norm": 0.12434601038694382, "learning_rate": 0.0001347096511746161, "loss": 1.0041, "step": 3216 }, { "epoch": 0.65399471437284, "grad_norm": 0.12143322080373764, "learning_rate": 0.00013468931150208483, "loss": 0.9148, "step": 3217 }, { "epoch": 0.6541980077251474, "grad_norm": 0.142898291349411, "learning_rate": 0.00013466897182955355, "loss": 1.1715, "step": 3218 }, { "epoch": 0.6544013010774548, "grad_norm": 0.12720847129821777, "learning_rate": 0.00013464863215702227, "loss": 1.013, "step": 3219 }, { "epoch": 0.6546045944297622, "grad_norm": 0.1172272264957428, "learning_rate": 0.000134628292484491, "loss": 0.9205, "step": 3220 }, { "epoch": 0.6548078877820696, "grad_norm": 0.15361227095127106, "learning_rate": 0.00013460795281195972, "loss": 1.2636, "step": 3221 }, { "epoch": 0.655011181134377, "grad_norm": 0.1317681223154068, "learning_rate": 0.00013458761313942847, "loss": 1.1478, "step": 3222 }, { "epoch": 0.6552144744866842, "grad_norm": 0.1296282410621643, "learning_rate": 0.0001345672734668972, "loss": 1.0402, "step": 3223 }, { "epoch": 0.6554177678389916, "grad_norm": 0.1406709998846054, "learning_rate": 0.00013454693379436592, "loss": 1.1656, "step": 3224 }, { "epoch": 0.655621061191299, "grad_norm": 0.13919825851917267, "learning_rate": 0.00013452659412183465, "loss": 1.0382, "step": 3225 }, { "epoch": 0.6558243545436064, "grad_norm": 0.14981389045715332, "learning_rate": 0.00013450625444930337, "loss": 1.1494, "step": 3226 }, { "epoch": 0.6560276478959138, "grad_norm": 0.13149550557136536, "learning_rate": 0.0001344859147767721, "loss": 1.2005, "step": 3227 }, { "epoch": 0.6562309412482212, "grad_norm": 0.11929726600646973, "learning_rate": 0.00013446557510424082, "loss": 0.8001, "step": 3228 }, { "epoch": 0.6564342346005285, "grad_norm": 0.1304064244031906, "learning_rate": 0.00013444523543170955, "loss": 0.9621, "step": 3229 }, { "epoch": 0.6566375279528359, "grad_norm": 0.1286899745464325, "learning_rate": 0.0001344248957591783, "loss": 0.9954, "step": 3230 }, { "epoch": 0.6568408213051433, "grad_norm": 0.13308082520961761, "learning_rate": 0.00013440455608664702, "loss": 0.9092, "step": 3231 }, { "epoch": 0.6570441146574507, "grad_norm": 0.14997734129428864, "learning_rate": 0.00013438421641411575, "loss": 1.1089, "step": 3232 }, { "epoch": 0.6572474080097581, "grad_norm": 0.14065352082252502, "learning_rate": 0.00013436387674158447, "loss": 1.0664, "step": 3233 }, { "epoch": 0.6574507013620655, "grad_norm": 0.12980201840400696, "learning_rate": 0.0001343435370690532, "loss": 1.1631, "step": 3234 }, { "epoch": 0.6576539947143728, "grad_norm": 0.11543235182762146, "learning_rate": 0.00013432319739652192, "loss": 0.9523, "step": 3235 }, { "epoch": 0.6578572880666802, "grad_norm": 0.14717644453048706, "learning_rate": 0.00013430285772399064, "loss": 1.0466, "step": 3236 }, { "epoch": 0.6580605814189876, "grad_norm": 0.12715165317058563, "learning_rate": 0.00013428251805145937, "loss": 0.988, "step": 3237 }, { "epoch": 0.658263874771295, "grad_norm": 0.14531929790973663, "learning_rate": 0.00013426217837892812, "loss": 1.1871, "step": 3238 }, { "epoch": 0.6584671681236024, "grad_norm": 0.139459490776062, "learning_rate": 0.00013424183870639684, "loss": 1.1572, "step": 3239 }, { "epoch": 0.6586704614759097, "grad_norm": 0.11804230511188507, "learning_rate": 0.00013422149903386557, "loss": 0.9844, "step": 3240 }, { "epoch": 0.6588737548282171, "grad_norm": 0.14333584904670715, "learning_rate": 0.0001342011593613343, "loss": 1.1719, "step": 3241 }, { "epoch": 0.6590770481805245, "grad_norm": 0.14224494993686676, "learning_rate": 0.00013418081968880302, "loss": 1.1448, "step": 3242 }, { "epoch": 0.6592803415328319, "grad_norm": 0.11388222128152847, "learning_rate": 0.00013416048001627174, "loss": 0.9178, "step": 3243 }, { "epoch": 0.6594836348851393, "grad_norm": 0.12758168578147888, "learning_rate": 0.00013414014034374047, "loss": 0.9446, "step": 3244 }, { "epoch": 0.6596869282374467, "grad_norm": 0.159623384475708, "learning_rate": 0.0001341198006712092, "loss": 1.3143, "step": 3245 }, { "epoch": 0.659890221589754, "grad_norm": 0.13925635814666748, "learning_rate": 0.00013409946099867792, "loss": 1.131, "step": 3246 }, { "epoch": 0.6600935149420614, "grad_norm": 0.12121693789958954, "learning_rate": 0.00013407912132614667, "loss": 1.0076, "step": 3247 }, { "epoch": 0.6602968082943688, "grad_norm": 0.12954868376255035, "learning_rate": 0.0001340587816536154, "loss": 0.9662, "step": 3248 }, { "epoch": 0.6605001016466762, "grad_norm": 0.13503266870975494, "learning_rate": 0.00013403844198108412, "loss": 1.0233, "step": 3249 }, { "epoch": 0.6607033949989836, "grad_norm": 0.13549566268920898, "learning_rate": 0.00013401810230855284, "loss": 1.0545, "step": 3250 }, { "epoch": 0.660906688351291, "grad_norm": 0.13881300389766693, "learning_rate": 0.00013399776263602157, "loss": 1.0844, "step": 3251 }, { "epoch": 0.6611099817035982, "grad_norm": 0.13221535086631775, "learning_rate": 0.0001339774229634903, "loss": 1.0159, "step": 3252 }, { "epoch": 0.6613132750559056, "grad_norm": 0.1378117799758911, "learning_rate": 0.00013395708329095901, "loss": 1.2071, "step": 3253 }, { "epoch": 0.661516568408213, "grad_norm": 0.1307571530342102, "learning_rate": 0.00013393674361842774, "loss": 1.0874, "step": 3254 }, { "epoch": 0.6617198617605204, "grad_norm": 0.1532752364873886, "learning_rate": 0.0001339164039458965, "loss": 1.263, "step": 3255 }, { "epoch": 0.6619231551128278, "grad_norm": 0.14829877018928528, "learning_rate": 0.00013389606427336521, "loss": 1.1919, "step": 3256 }, { "epoch": 0.6621264484651352, "grad_norm": 0.12832298874855042, "learning_rate": 0.00013387572460083394, "loss": 1.0, "step": 3257 }, { "epoch": 0.6623297418174425, "grad_norm": 0.12127513438463211, "learning_rate": 0.00013385538492830266, "loss": 1.03, "step": 3258 }, { "epoch": 0.6625330351697499, "grad_norm": 0.1351458579301834, "learning_rate": 0.0001338350452557714, "loss": 1.0167, "step": 3259 }, { "epoch": 0.6627363285220573, "grad_norm": 0.11357429623603821, "learning_rate": 0.0001338147055832401, "loss": 0.9135, "step": 3260 }, { "epoch": 0.6629396218743647, "grad_norm": 0.14391832053661346, "learning_rate": 0.00013379436591070884, "loss": 1.1626, "step": 3261 }, { "epoch": 0.6631429152266721, "grad_norm": 0.1359371840953827, "learning_rate": 0.00013377402623817756, "loss": 1.1155, "step": 3262 }, { "epoch": 0.6633462085789794, "grad_norm": 0.14570018649101257, "learning_rate": 0.0001337536865656463, "loss": 1.0961, "step": 3263 }, { "epoch": 0.6635495019312868, "grad_norm": 0.12299071252346039, "learning_rate": 0.00013373334689311504, "loss": 0.9879, "step": 3264 }, { "epoch": 0.6637527952835942, "grad_norm": 0.1427142471075058, "learning_rate": 0.00013371300722058376, "loss": 1.2364, "step": 3265 }, { "epoch": 0.6639560886359016, "grad_norm": 0.1400018036365509, "learning_rate": 0.0001336926675480525, "loss": 1.1366, "step": 3266 }, { "epoch": 0.664159381988209, "grad_norm": 0.14757339656352997, "learning_rate": 0.0001336723278755212, "loss": 1.2415, "step": 3267 }, { "epoch": 0.6643626753405164, "grad_norm": 0.1404561698436737, "learning_rate": 0.00013365198820298994, "loss": 1.0536, "step": 3268 }, { "epoch": 0.6645659686928237, "grad_norm": 0.13608767092227936, "learning_rate": 0.00013363164853045866, "loss": 1.0977, "step": 3269 }, { "epoch": 0.6647692620451311, "grad_norm": 0.14513832330703735, "learning_rate": 0.00013361130885792738, "loss": 1.1665, "step": 3270 }, { "epoch": 0.6649725553974385, "grad_norm": 0.12752074003219604, "learning_rate": 0.00013359096918539614, "loss": 1.0611, "step": 3271 }, { "epoch": 0.6651758487497459, "grad_norm": 0.1297471970319748, "learning_rate": 0.00013357062951286486, "loss": 1.0336, "step": 3272 }, { "epoch": 0.6653791421020533, "grad_norm": 0.13528691232204437, "learning_rate": 0.00013355028984033359, "loss": 1.1205, "step": 3273 }, { "epoch": 0.6655824354543607, "grad_norm": 0.13278824090957642, "learning_rate": 0.0001335299501678023, "loss": 1.1608, "step": 3274 }, { "epoch": 0.665785728806668, "grad_norm": 0.1527799665927887, "learning_rate": 0.00013350961049527103, "loss": 1.1947, "step": 3275 }, { "epoch": 0.6659890221589754, "grad_norm": 0.11764834076166153, "learning_rate": 0.00013348927082273976, "loss": 0.8985, "step": 3276 }, { "epoch": 0.6661923155112828, "grad_norm": 0.12094051390886307, "learning_rate": 0.00013346893115020848, "loss": 0.8954, "step": 3277 }, { "epoch": 0.6663956088635902, "grad_norm": 0.1273156702518463, "learning_rate": 0.0001334485914776772, "loss": 1.0629, "step": 3278 }, { "epoch": 0.6665989022158976, "grad_norm": 0.12444844841957092, "learning_rate": 0.00013342825180514596, "loss": 1.0771, "step": 3279 }, { "epoch": 0.666802195568205, "grad_norm": 0.13100309669971466, "learning_rate": 0.00013340791213261468, "loss": 1.0665, "step": 3280 }, { "epoch": 0.6670054889205123, "grad_norm": 0.14003531634807587, "learning_rate": 0.0001333875724600834, "loss": 1.1218, "step": 3281 }, { "epoch": 0.6672087822728197, "grad_norm": 0.13837094604969025, "learning_rate": 0.00013336723278755213, "loss": 1.1864, "step": 3282 }, { "epoch": 0.667412075625127, "grad_norm": 0.1185075119137764, "learning_rate": 0.00013334689311502086, "loss": 0.9776, "step": 3283 }, { "epoch": 0.6676153689774345, "grad_norm": 0.1384880095720291, "learning_rate": 0.00013332655344248958, "loss": 1.117, "step": 3284 }, { "epoch": 0.6678186623297419, "grad_norm": 0.1331661343574524, "learning_rate": 0.0001333062137699583, "loss": 1.1523, "step": 3285 }, { "epoch": 0.6680219556820493, "grad_norm": 0.12203952670097351, "learning_rate": 0.00013328587409742703, "loss": 1.0236, "step": 3286 }, { "epoch": 0.6682252490343565, "grad_norm": 0.1446705311536789, "learning_rate": 0.00013326553442489575, "loss": 1.2011, "step": 3287 }, { "epoch": 0.6684285423866639, "grad_norm": 0.15075799822807312, "learning_rate": 0.0001332451947523645, "loss": 1.1135, "step": 3288 }, { "epoch": 0.6686318357389713, "grad_norm": 0.13888481259346008, "learning_rate": 0.00013322485507983323, "loss": 1.1357, "step": 3289 }, { "epoch": 0.6688351290912787, "grad_norm": 0.13847656548023224, "learning_rate": 0.00013320451540730196, "loss": 1.1679, "step": 3290 }, { "epoch": 0.6690384224435861, "grad_norm": 0.14227357506752014, "learning_rate": 0.00013318417573477068, "loss": 1.184, "step": 3291 }, { "epoch": 0.6692417157958934, "grad_norm": 0.13490445911884308, "learning_rate": 0.0001331638360622394, "loss": 1.026, "step": 3292 }, { "epoch": 0.6694450091482008, "grad_norm": 0.14282800257205963, "learning_rate": 0.00013314349638970813, "loss": 1.1685, "step": 3293 }, { "epoch": 0.6696483025005082, "grad_norm": 0.1398768126964569, "learning_rate": 0.00013312315671717685, "loss": 1.1653, "step": 3294 }, { "epoch": 0.6698515958528156, "grad_norm": 0.1359616070985794, "learning_rate": 0.00013310281704464558, "loss": 0.9866, "step": 3295 }, { "epoch": 0.670054889205123, "grad_norm": 0.14484332501888275, "learning_rate": 0.00013308247737211433, "loss": 1.119, "step": 3296 }, { "epoch": 0.6702581825574304, "grad_norm": 0.12202159315347672, "learning_rate": 0.00013306213769958305, "loss": 1.0366, "step": 3297 }, { "epoch": 0.6704614759097377, "grad_norm": 0.1427534818649292, "learning_rate": 0.00013304179802705178, "loss": 1.1238, "step": 3298 }, { "epoch": 0.6706647692620451, "grad_norm": 0.12576861679553986, "learning_rate": 0.0001330214583545205, "loss": 1.1628, "step": 3299 }, { "epoch": 0.6708680626143525, "grad_norm": 0.11372304707765579, "learning_rate": 0.00013300111868198923, "loss": 1.0396, "step": 3300 }, { "epoch": 0.6710713559666599, "grad_norm": 0.12820537388324738, "learning_rate": 0.00013298077900945795, "loss": 0.9094, "step": 3301 }, { "epoch": 0.6712746493189673, "grad_norm": 0.1097426563501358, "learning_rate": 0.00013296043933692668, "loss": 0.887, "step": 3302 }, { "epoch": 0.6714779426712747, "grad_norm": 0.13616250455379486, "learning_rate": 0.0001329400996643954, "loss": 1.0729, "step": 3303 }, { "epoch": 0.671681236023582, "grad_norm": 0.14476965367794037, "learning_rate": 0.00013291975999186415, "loss": 1.0224, "step": 3304 }, { "epoch": 0.6718845293758894, "grad_norm": 0.13365976512432098, "learning_rate": 0.00013289942031933288, "loss": 1.1, "step": 3305 }, { "epoch": 0.6720878227281968, "grad_norm": 0.12170373648405075, "learning_rate": 0.0001328790806468016, "loss": 0.9986, "step": 3306 }, { "epoch": 0.6722911160805042, "grad_norm": 0.1351754069328308, "learning_rate": 0.00013285874097427033, "loss": 1.0089, "step": 3307 }, { "epoch": 0.6724944094328116, "grad_norm": 0.13269051909446716, "learning_rate": 0.00013283840130173905, "loss": 0.9508, "step": 3308 }, { "epoch": 0.672697702785119, "grad_norm": 0.12628872692584991, "learning_rate": 0.00013281806162920777, "loss": 0.96, "step": 3309 }, { "epoch": 0.6729009961374263, "grad_norm": 0.13434316217899323, "learning_rate": 0.0001327977219566765, "loss": 1.0935, "step": 3310 }, { "epoch": 0.6731042894897337, "grad_norm": 0.137080579996109, "learning_rate": 0.00013277738228414522, "loss": 1.151, "step": 3311 }, { "epoch": 0.6733075828420411, "grad_norm": 0.1294548362493515, "learning_rate": 0.00013275704261161397, "loss": 1.0094, "step": 3312 }, { "epoch": 0.6735108761943485, "grad_norm": 0.14055456221103668, "learning_rate": 0.0001327367029390827, "loss": 1.056, "step": 3313 }, { "epoch": 0.6737141695466559, "grad_norm": 0.12785248458385468, "learning_rate": 0.00013271636326655142, "loss": 1.0964, "step": 3314 }, { "epoch": 0.6739174628989631, "grad_norm": 0.14090466499328613, "learning_rate": 0.00013269602359402015, "loss": 1.1419, "step": 3315 }, { "epoch": 0.6741207562512705, "grad_norm": 0.12105811387300491, "learning_rate": 0.00013267568392148887, "loss": 0.929, "step": 3316 }, { "epoch": 0.674324049603578, "grad_norm": 0.1410580724477768, "learning_rate": 0.0001326553442489576, "loss": 1.0849, "step": 3317 }, { "epoch": 0.6745273429558853, "grad_norm": 0.13689137995243073, "learning_rate": 0.00013263500457642632, "loss": 1.0011, "step": 3318 }, { "epoch": 0.6747306363081927, "grad_norm": 0.12887214124202728, "learning_rate": 0.00013261466490389505, "loss": 1.0231, "step": 3319 }, { "epoch": 0.6749339296605001, "grad_norm": 0.12463674694299698, "learning_rate": 0.0001325943252313638, "loss": 0.864, "step": 3320 }, { "epoch": 0.6751372230128074, "grad_norm": 0.13897714018821716, "learning_rate": 0.00013257398555883252, "loss": 1.1127, "step": 3321 }, { "epoch": 0.6753405163651148, "grad_norm": 0.1311863511800766, "learning_rate": 0.00013255364588630125, "loss": 1.0822, "step": 3322 }, { "epoch": 0.6755438097174222, "grad_norm": 0.1215839833021164, "learning_rate": 0.00013253330621376997, "loss": 0.9599, "step": 3323 }, { "epoch": 0.6757471030697296, "grad_norm": 0.12233379483222961, "learning_rate": 0.0001325129665412387, "loss": 0.8926, "step": 3324 }, { "epoch": 0.675950396422037, "grad_norm": 0.1159176304936409, "learning_rate": 0.00013249262686870742, "loss": 0.79, "step": 3325 }, { "epoch": 0.6761536897743444, "grad_norm": 0.1344752311706543, "learning_rate": 0.00013247228719617614, "loss": 1.0011, "step": 3326 }, { "epoch": 0.6763569831266517, "grad_norm": 0.14110898971557617, "learning_rate": 0.00013245194752364487, "loss": 1.1465, "step": 3327 }, { "epoch": 0.6765602764789591, "grad_norm": 0.12130746990442276, "learning_rate": 0.0001324316078511136, "loss": 0.9631, "step": 3328 }, { "epoch": 0.6767635698312665, "grad_norm": 0.12850743532180786, "learning_rate": 0.00013241126817858234, "loss": 1.0909, "step": 3329 }, { "epoch": 0.6769668631835739, "grad_norm": 0.14836134016513824, "learning_rate": 0.00013239092850605107, "loss": 1.3898, "step": 3330 }, { "epoch": 0.6771701565358813, "grad_norm": 0.1397714763879776, "learning_rate": 0.0001323705888335198, "loss": 1.1767, "step": 3331 }, { "epoch": 0.6773734498881887, "grad_norm": 0.13022536039352417, "learning_rate": 0.0001323502491609885, "loss": 1.0389, "step": 3332 }, { "epoch": 0.677576743240496, "grad_norm": 0.12649066746234894, "learning_rate": 0.00013232990948845724, "loss": 0.8931, "step": 3333 }, { "epoch": 0.6777800365928034, "grad_norm": 0.1422676295042038, "learning_rate": 0.00013230956981592597, "loss": 1.1758, "step": 3334 }, { "epoch": 0.6779833299451108, "grad_norm": 0.12162751704454422, "learning_rate": 0.0001322892301433947, "loss": 1.0629, "step": 3335 }, { "epoch": 0.6781866232974182, "grad_norm": 0.14175549149513245, "learning_rate": 0.00013226889047086342, "loss": 1.2327, "step": 3336 }, { "epoch": 0.6783899166497256, "grad_norm": 0.13854654133319855, "learning_rate": 0.00013224855079833217, "loss": 1.1758, "step": 3337 }, { "epoch": 0.678593210002033, "grad_norm": 0.11496133357286453, "learning_rate": 0.0001322282111258009, "loss": 0.8925, "step": 3338 }, { "epoch": 0.6787965033543403, "grad_norm": 0.1376158595085144, "learning_rate": 0.00013220787145326962, "loss": 1.2326, "step": 3339 }, { "epoch": 0.6789997967066477, "grad_norm": 0.12731988728046417, "learning_rate": 0.00013218753178073834, "loss": 1.0195, "step": 3340 }, { "epoch": 0.6792030900589551, "grad_norm": 0.1400342583656311, "learning_rate": 0.00013216719210820707, "loss": 1.1155, "step": 3341 }, { "epoch": 0.6794063834112625, "grad_norm": 0.11408770084381104, "learning_rate": 0.0001321468524356758, "loss": 0.8986, "step": 3342 }, { "epoch": 0.6796096767635699, "grad_norm": 0.13925215601921082, "learning_rate": 0.00013212651276314451, "loss": 1.0844, "step": 3343 }, { "epoch": 0.6798129701158772, "grad_norm": 0.13174065947532654, "learning_rate": 0.00013210617309061324, "loss": 0.9927, "step": 3344 }, { "epoch": 0.6800162634681846, "grad_norm": 0.12421359866857529, "learning_rate": 0.000132085833418082, "loss": 0.8822, "step": 3345 }, { "epoch": 0.680219556820492, "grad_norm": 0.14170731604099274, "learning_rate": 0.00013206549374555071, "loss": 1.0215, "step": 3346 }, { "epoch": 0.6804228501727994, "grad_norm": 0.13698481023311615, "learning_rate": 0.00013204515407301944, "loss": 1.1608, "step": 3347 }, { "epoch": 0.6806261435251068, "grad_norm": 0.12675851583480835, "learning_rate": 0.00013202481440048816, "loss": 1.0425, "step": 3348 }, { "epoch": 0.6808294368774142, "grad_norm": 0.13038714230060577, "learning_rate": 0.0001320044747279569, "loss": 1.0598, "step": 3349 }, { "epoch": 0.6810327302297214, "grad_norm": 0.1283421814441681, "learning_rate": 0.0001319841350554256, "loss": 0.9638, "step": 3350 }, { "epoch": 0.6812360235820288, "grad_norm": 0.1362680047750473, "learning_rate": 0.00013196379538289434, "loss": 1.1957, "step": 3351 }, { "epoch": 0.6814393169343362, "grad_norm": 0.12494239211082458, "learning_rate": 0.00013194345571036306, "loss": 1.0474, "step": 3352 }, { "epoch": 0.6816426102866436, "grad_norm": 0.11277607828378677, "learning_rate": 0.0001319231160378318, "loss": 0.9065, "step": 3353 }, { "epoch": 0.681845903638951, "grad_norm": 0.13010768592357635, "learning_rate": 0.00013190277636530054, "loss": 1.0045, "step": 3354 }, { "epoch": 0.6820491969912584, "grad_norm": 0.13375157117843628, "learning_rate": 0.00013188243669276926, "loss": 0.9506, "step": 3355 }, { "epoch": 0.6822524903435657, "grad_norm": 0.13150712847709656, "learning_rate": 0.00013186209702023799, "loss": 1.0485, "step": 3356 }, { "epoch": 0.6824557836958731, "grad_norm": 0.13057585060596466, "learning_rate": 0.0001318417573477067, "loss": 1.0687, "step": 3357 }, { "epoch": 0.6826590770481805, "grad_norm": 0.13433004915714264, "learning_rate": 0.00013182141767517544, "loss": 0.985, "step": 3358 }, { "epoch": 0.6828623704004879, "grad_norm": 0.1338491439819336, "learning_rate": 0.00013180107800264416, "loss": 1.1384, "step": 3359 }, { "epoch": 0.6830656637527953, "grad_norm": 0.13416750729084015, "learning_rate": 0.00013178073833011288, "loss": 1.0868, "step": 3360 }, { "epoch": 0.6832689571051027, "grad_norm": 0.13917329907417297, "learning_rate": 0.00013176039865758164, "loss": 1.1072, "step": 3361 }, { "epoch": 0.68347225045741, "grad_norm": 0.1197846531867981, "learning_rate": 0.00013174005898505036, "loss": 0.9208, "step": 3362 }, { "epoch": 0.6836755438097174, "grad_norm": 0.1425098180770874, "learning_rate": 0.00013171971931251908, "loss": 1.2693, "step": 3363 }, { "epoch": 0.6838788371620248, "grad_norm": 0.13614432513713837, "learning_rate": 0.0001316993796399878, "loss": 0.9667, "step": 3364 }, { "epoch": 0.6840821305143322, "grad_norm": 0.1563062071800232, "learning_rate": 0.00013167903996745653, "loss": 1.1234, "step": 3365 }, { "epoch": 0.6842854238666396, "grad_norm": 0.1402071714401245, "learning_rate": 0.00013165870029492526, "loss": 1.0228, "step": 3366 }, { "epoch": 0.684488717218947, "grad_norm": 0.14747624099254608, "learning_rate": 0.00013163836062239398, "loss": 1.2746, "step": 3367 }, { "epoch": 0.6846920105712543, "grad_norm": 0.11560353636741638, "learning_rate": 0.0001316180209498627, "loss": 0.9313, "step": 3368 }, { "epoch": 0.6848953039235617, "grad_norm": 0.12440039217472076, "learning_rate": 0.00013159768127733143, "loss": 0.986, "step": 3369 }, { "epoch": 0.6850985972758691, "grad_norm": 0.13954605162143707, "learning_rate": 0.00013157734160480018, "loss": 1.2206, "step": 3370 }, { "epoch": 0.6853018906281765, "grad_norm": 0.139942929148674, "learning_rate": 0.0001315570019322689, "loss": 0.9075, "step": 3371 }, { "epoch": 0.6855051839804839, "grad_norm": 0.13854482769966125, "learning_rate": 0.00013153666225973763, "loss": 1.1007, "step": 3372 }, { "epoch": 0.6857084773327912, "grad_norm": 0.12603192031383514, "learning_rate": 0.00013151632258720633, "loss": 1.1533, "step": 3373 }, { "epoch": 0.6859117706850986, "grad_norm": 0.12680287659168243, "learning_rate": 0.00013149598291467508, "loss": 1.0463, "step": 3374 }, { "epoch": 0.686115064037406, "grad_norm": 0.12043260782957077, "learning_rate": 0.0001314756432421438, "loss": 0.8653, "step": 3375 }, { "epoch": 0.6863183573897134, "grad_norm": 0.15314915776252747, "learning_rate": 0.00013145530356961253, "loss": 1.1384, "step": 3376 }, { "epoch": 0.6865216507420208, "grad_norm": 0.12305079400539398, "learning_rate": 0.00013143496389708125, "loss": 0.9134, "step": 3377 }, { "epoch": 0.6867249440943282, "grad_norm": 0.12972278892993927, "learning_rate": 0.00013141462422455, "loss": 1.1011, "step": 3378 }, { "epoch": 0.6869282374466354, "grad_norm": 0.12650032341480255, "learning_rate": 0.00013139428455201873, "loss": 1.0518, "step": 3379 }, { "epoch": 0.6871315307989428, "grad_norm": 0.13137362897396088, "learning_rate": 0.00013137394487948745, "loss": 1.0093, "step": 3380 }, { "epoch": 0.6873348241512502, "grad_norm": 0.1400621086359024, "learning_rate": 0.00013135360520695615, "loss": 1.2115, "step": 3381 }, { "epoch": 0.6875381175035576, "grad_norm": 0.1252133697271347, "learning_rate": 0.0001313332655344249, "loss": 1.0329, "step": 3382 }, { "epoch": 0.687741410855865, "grad_norm": 0.13961845636367798, "learning_rate": 0.00013131292586189363, "loss": 1.0093, "step": 3383 }, { "epoch": 0.6879447042081724, "grad_norm": 0.1432250738143921, "learning_rate": 0.00013129258618936235, "loss": 1.2186, "step": 3384 }, { "epoch": 0.6881479975604797, "grad_norm": 0.1433638036251068, "learning_rate": 0.00013127224651683108, "loss": 1.2933, "step": 3385 }, { "epoch": 0.6883512909127871, "grad_norm": 0.13323669135570526, "learning_rate": 0.00013125190684429983, "loss": 1.0775, "step": 3386 }, { "epoch": 0.6885545842650945, "grad_norm": 0.15013840794563293, "learning_rate": 0.00013123156717176855, "loss": 1.0446, "step": 3387 }, { "epoch": 0.6887578776174019, "grad_norm": 0.13675931096076965, "learning_rate": 0.00013121122749923728, "loss": 1.1743, "step": 3388 }, { "epoch": 0.6889611709697093, "grad_norm": 0.13321883976459503, "learning_rate": 0.00013119088782670597, "loss": 1.2053, "step": 3389 }, { "epoch": 0.6891644643220167, "grad_norm": 0.14458970725536346, "learning_rate": 0.00013117054815417473, "loss": 1.0808, "step": 3390 }, { "epoch": 0.689367757674324, "grad_norm": 0.12558375298976898, "learning_rate": 0.00013115020848164345, "loss": 0.9879, "step": 3391 }, { "epoch": 0.6895710510266314, "grad_norm": 0.13324345648288727, "learning_rate": 0.00013112986880911218, "loss": 1.0561, "step": 3392 }, { "epoch": 0.6897743443789388, "grad_norm": 0.1250324845314026, "learning_rate": 0.0001311095291365809, "loss": 1.0982, "step": 3393 }, { "epoch": 0.6899776377312462, "grad_norm": 0.13437926769256592, "learning_rate": 0.00013108918946404965, "loss": 1.0323, "step": 3394 }, { "epoch": 0.6901809310835536, "grad_norm": 0.1360880434513092, "learning_rate": 0.00013106884979151838, "loss": 1.079, "step": 3395 }, { "epoch": 0.6903842244358609, "grad_norm": 0.14753840863704681, "learning_rate": 0.0001310485101189871, "loss": 1.1558, "step": 3396 }, { "epoch": 0.6905875177881683, "grad_norm": 0.1305796205997467, "learning_rate": 0.00013102817044645582, "loss": 1.093, "step": 3397 }, { "epoch": 0.6907908111404757, "grad_norm": 0.14020781219005585, "learning_rate": 0.00013100783077392455, "loss": 1.1657, "step": 3398 }, { "epoch": 0.6909941044927831, "grad_norm": 0.1320771425962448, "learning_rate": 0.00013098749110139327, "loss": 0.9718, "step": 3399 }, { "epoch": 0.6911973978450905, "grad_norm": 0.13931889832019806, "learning_rate": 0.000130967151428862, "loss": 1.1547, "step": 3400 }, { "epoch": 0.6914006911973979, "grad_norm": 0.13345004618167877, "learning_rate": 0.00013094681175633072, "loss": 1.1103, "step": 3401 }, { "epoch": 0.6916039845497052, "grad_norm": 0.1303638517856598, "learning_rate": 0.00013092647208379947, "loss": 1.06, "step": 3402 }, { "epoch": 0.6918072779020126, "grad_norm": 0.12979425489902496, "learning_rate": 0.0001309061324112682, "loss": 1.025, "step": 3403 }, { "epoch": 0.69201057125432, "grad_norm": 0.1420203149318695, "learning_rate": 0.00013088579273873692, "loss": 1.3724, "step": 3404 }, { "epoch": 0.6922138646066274, "grad_norm": 0.13811589777469635, "learning_rate": 0.00013086545306620565, "loss": 1.1415, "step": 3405 }, { "epoch": 0.6924171579589348, "grad_norm": 0.1472085863351822, "learning_rate": 0.00013084511339367437, "loss": 1.2374, "step": 3406 }, { "epoch": 0.6926204513112422, "grad_norm": 0.12186230719089508, "learning_rate": 0.0001308247737211431, "loss": 0.8606, "step": 3407 }, { "epoch": 0.6928237446635495, "grad_norm": 0.14273689687252045, "learning_rate": 0.00013080443404861182, "loss": 0.9656, "step": 3408 }, { "epoch": 0.6930270380158569, "grad_norm": 0.1363956779241562, "learning_rate": 0.00013078409437608055, "loss": 1.1867, "step": 3409 }, { "epoch": 0.6932303313681643, "grad_norm": 0.1353340446949005, "learning_rate": 0.00013076375470354927, "loss": 1.0755, "step": 3410 }, { "epoch": 0.6934336247204717, "grad_norm": 0.12223875522613525, "learning_rate": 0.00013074341503101802, "loss": 0.9282, "step": 3411 }, { "epoch": 0.693636918072779, "grad_norm": 0.13030283153057098, "learning_rate": 0.00013072307535848675, "loss": 1.0846, "step": 3412 }, { "epoch": 0.6938402114250865, "grad_norm": 0.14898596704006195, "learning_rate": 0.00013070273568595547, "loss": 1.1225, "step": 3413 }, { "epoch": 0.6940435047773937, "grad_norm": 0.13688309490680695, "learning_rate": 0.00013068239601342417, "loss": 1.1666, "step": 3414 }, { "epoch": 0.6942467981297011, "grad_norm": 0.1352292150259018, "learning_rate": 0.00013066205634089292, "loss": 1.1107, "step": 3415 }, { "epoch": 0.6944500914820085, "grad_norm": 0.1321742832660675, "learning_rate": 0.00013064171666836164, "loss": 1.1059, "step": 3416 }, { "epoch": 0.6946533848343159, "grad_norm": 0.11616258323192596, "learning_rate": 0.00013062137699583037, "loss": 0.9273, "step": 3417 }, { "epoch": 0.6948566781866233, "grad_norm": 0.13355232775211334, "learning_rate": 0.0001306010373232991, "loss": 0.9926, "step": 3418 }, { "epoch": 0.6950599715389307, "grad_norm": 0.12835095822811127, "learning_rate": 0.00013058069765076784, "loss": 0.9222, "step": 3419 }, { "epoch": 0.695263264891238, "grad_norm": 0.13715249300003052, "learning_rate": 0.00013056035797823657, "loss": 1.0299, "step": 3420 }, { "epoch": 0.6954665582435454, "grad_norm": 0.12749621272087097, "learning_rate": 0.0001305400183057053, "loss": 0.8392, "step": 3421 }, { "epoch": 0.6956698515958528, "grad_norm": 0.12953422963619232, "learning_rate": 0.000130519678633174, "loss": 0.9364, "step": 3422 }, { "epoch": 0.6958731449481602, "grad_norm": 0.1335253268480301, "learning_rate": 0.00013049933896064274, "loss": 1.1024, "step": 3423 }, { "epoch": 0.6960764383004676, "grad_norm": 0.1350051760673523, "learning_rate": 0.00013047899928811147, "loss": 0.9491, "step": 3424 }, { "epoch": 0.6962797316527749, "grad_norm": 0.12581254541873932, "learning_rate": 0.0001304586596155802, "loss": 1.1341, "step": 3425 }, { "epoch": 0.6964830250050823, "grad_norm": 0.12518788874149323, "learning_rate": 0.00013043831994304892, "loss": 1.0329, "step": 3426 }, { "epoch": 0.6966863183573897, "grad_norm": 0.12527361512184143, "learning_rate": 0.00013041798027051767, "loss": 0.9014, "step": 3427 }, { "epoch": 0.6968896117096971, "grad_norm": 0.13964787125587463, "learning_rate": 0.0001303976405979864, "loss": 1.1231, "step": 3428 }, { "epoch": 0.6970929050620045, "grad_norm": 0.1401492953300476, "learning_rate": 0.00013037730092545512, "loss": 1.0639, "step": 3429 }, { "epoch": 0.6972961984143119, "grad_norm": 0.1398945450782776, "learning_rate": 0.0001303569612529238, "loss": 1.0882, "step": 3430 }, { "epoch": 0.6974994917666192, "grad_norm": 0.15390872955322266, "learning_rate": 0.00013033662158039256, "loss": 1.3927, "step": 3431 }, { "epoch": 0.6977027851189266, "grad_norm": 0.11634422838687897, "learning_rate": 0.0001303162819078613, "loss": 0.9213, "step": 3432 }, { "epoch": 0.697906078471234, "grad_norm": 0.14000141620635986, "learning_rate": 0.00013029594223533001, "loss": 1.1035, "step": 3433 }, { "epoch": 0.6981093718235414, "grad_norm": 0.13036206364631653, "learning_rate": 0.00013027560256279874, "loss": 1.0366, "step": 3434 }, { "epoch": 0.6983126651758488, "grad_norm": 0.1375044733285904, "learning_rate": 0.0001302552628902675, "loss": 1.1924, "step": 3435 }, { "epoch": 0.6985159585281562, "grad_norm": 0.13283680379390717, "learning_rate": 0.00013023492321773621, "loss": 1.1097, "step": 3436 }, { "epoch": 0.6987192518804635, "grad_norm": 0.14721041917800903, "learning_rate": 0.00013021458354520494, "loss": 1.1784, "step": 3437 }, { "epoch": 0.6989225452327709, "grad_norm": 0.1452692449092865, "learning_rate": 0.00013019424387267364, "loss": 1.1948, "step": 3438 }, { "epoch": 0.6991258385850783, "grad_norm": 0.12445453554391861, "learning_rate": 0.0001301739042001424, "loss": 1.0154, "step": 3439 }, { "epoch": 0.6993291319373857, "grad_norm": 0.13780944049358368, "learning_rate": 0.0001301535645276111, "loss": 1.1673, "step": 3440 }, { "epoch": 0.6995324252896931, "grad_norm": 0.14468298852443695, "learning_rate": 0.00013013322485507984, "loss": 1.2753, "step": 3441 }, { "epoch": 0.6997357186420005, "grad_norm": 0.13938096165657043, "learning_rate": 0.00013011288518254856, "loss": 1.1033, "step": 3442 }, { "epoch": 0.6999390119943077, "grad_norm": 0.12781304121017456, "learning_rate": 0.0001300925455100173, "loss": 0.8622, "step": 3443 }, { "epoch": 0.7001423053466151, "grad_norm": 0.12039446085691452, "learning_rate": 0.00013007220583748604, "loss": 0.923, "step": 3444 }, { "epoch": 0.7003455986989225, "grad_norm": 0.11263223737478256, "learning_rate": 0.00013005186616495476, "loss": 0.889, "step": 3445 }, { "epoch": 0.7005488920512299, "grad_norm": 0.10796971619129181, "learning_rate": 0.00013003152649242346, "loss": 0.852, "step": 3446 }, { "epoch": 0.7007521854035373, "grad_norm": 0.12779220938682556, "learning_rate": 0.0001300111868198922, "loss": 0.9844, "step": 3447 }, { "epoch": 0.7009554787558446, "grad_norm": 0.12436182051897049, "learning_rate": 0.00012999084714736093, "loss": 1.0729, "step": 3448 }, { "epoch": 0.701158772108152, "grad_norm": 0.12066857516765594, "learning_rate": 0.00012997050747482966, "loss": 0.9179, "step": 3449 }, { "epoch": 0.7013620654604594, "grad_norm": 0.12307177484035492, "learning_rate": 0.00012995016780229838, "loss": 0.9204, "step": 3450 }, { "epoch": 0.7015653588127668, "grad_norm": 0.1301327794790268, "learning_rate": 0.0001299298281297671, "loss": 1.1486, "step": 3451 }, { "epoch": 0.7017686521650742, "grad_norm": 0.13147859275341034, "learning_rate": 0.00012990948845723586, "loss": 1.0035, "step": 3452 }, { "epoch": 0.7019719455173816, "grad_norm": 0.13557538390159607, "learning_rate": 0.00012988914878470458, "loss": 1.1641, "step": 3453 }, { "epoch": 0.7021752388696889, "grad_norm": 0.11187610030174255, "learning_rate": 0.0001298688091121733, "loss": 0.9894, "step": 3454 }, { "epoch": 0.7023785322219963, "grad_norm": 0.12350699305534363, "learning_rate": 0.000129848469439642, "loss": 1.1538, "step": 3455 }, { "epoch": 0.7025818255743037, "grad_norm": 0.1363372653722763, "learning_rate": 0.00012982812976711076, "loss": 1.0104, "step": 3456 }, { "epoch": 0.7027851189266111, "grad_norm": 0.12748870253562927, "learning_rate": 0.00012980779009457948, "loss": 0.9978, "step": 3457 }, { "epoch": 0.7029884122789185, "grad_norm": 0.12273624539375305, "learning_rate": 0.0001297874504220482, "loss": 0.967, "step": 3458 }, { "epoch": 0.7031917056312259, "grad_norm": 0.13453403115272522, "learning_rate": 0.00012976711074951693, "loss": 1.1081, "step": 3459 }, { "epoch": 0.7033949989835332, "grad_norm": 0.13335007429122925, "learning_rate": 0.00012974677107698568, "loss": 0.9803, "step": 3460 }, { "epoch": 0.7035982923358406, "grad_norm": 0.13500504195690155, "learning_rate": 0.0001297264314044544, "loss": 0.967, "step": 3461 }, { "epoch": 0.703801585688148, "grad_norm": 0.145028218626976, "learning_rate": 0.00012970609173192313, "loss": 1.1643, "step": 3462 }, { "epoch": 0.7040048790404554, "grad_norm": 0.14210622012615204, "learning_rate": 0.00012968575205939183, "loss": 1.1898, "step": 3463 }, { "epoch": 0.7042081723927628, "grad_norm": 0.1239437535405159, "learning_rate": 0.00012966541238686058, "loss": 1.0432, "step": 3464 }, { "epoch": 0.7044114657450702, "grad_norm": 0.14510378241539001, "learning_rate": 0.0001296450727143293, "loss": 1.025, "step": 3465 }, { "epoch": 0.7046147590973775, "grad_norm": 0.13489870727062225, "learning_rate": 0.00012962473304179803, "loss": 1.1407, "step": 3466 }, { "epoch": 0.7048180524496849, "grad_norm": 0.12685105204582214, "learning_rate": 0.00012960439336926675, "loss": 1.0256, "step": 3467 }, { "epoch": 0.7050213458019923, "grad_norm": 0.14244306087493896, "learning_rate": 0.0001295840536967355, "loss": 1.1148, "step": 3468 }, { "epoch": 0.7052246391542997, "grad_norm": 0.13121604919433594, "learning_rate": 0.00012956371402420423, "loss": 0.9663, "step": 3469 }, { "epoch": 0.7054279325066071, "grad_norm": 0.14584699273109436, "learning_rate": 0.00012954337435167295, "loss": 1.1258, "step": 3470 }, { "epoch": 0.7056312258589145, "grad_norm": 0.130800262093544, "learning_rate": 0.00012952303467914165, "loss": 0.9679, "step": 3471 }, { "epoch": 0.7058345192112218, "grad_norm": 0.13025017082691193, "learning_rate": 0.0001295026950066104, "loss": 1.0312, "step": 3472 }, { "epoch": 0.7060378125635292, "grad_norm": 0.13392165303230286, "learning_rate": 0.00012948235533407913, "loss": 1.1851, "step": 3473 }, { "epoch": 0.7062411059158366, "grad_norm": 0.1399383693933487, "learning_rate": 0.00012946201566154785, "loss": 1.1286, "step": 3474 }, { "epoch": 0.706444399268144, "grad_norm": 0.12997640669345856, "learning_rate": 0.00012944167598901658, "loss": 1.0042, "step": 3475 }, { "epoch": 0.7066476926204514, "grad_norm": 0.1388452649116516, "learning_rate": 0.00012942133631648533, "loss": 1.1952, "step": 3476 }, { "epoch": 0.7068509859727586, "grad_norm": 0.13053801655769348, "learning_rate": 0.00012940099664395405, "loss": 0.9092, "step": 3477 }, { "epoch": 0.707054279325066, "grad_norm": 0.1334877759218216, "learning_rate": 0.00012938065697142278, "loss": 0.9635, "step": 3478 }, { "epoch": 0.7072575726773734, "grad_norm": 0.13902603089809418, "learning_rate": 0.00012936031729889147, "loss": 1.0507, "step": 3479 }, { "epoch": 0.7074608660296808, "grad_norm": 0.13863757252693176, "learning_rate": 0.00012933997762636023, "loss": 0.9798, "step": 3480 }, { "epoch": 0.7076641593819882, "grad_norm": 0.12283840775489807, "learning_rate": 0.00012931963795382895, "loss": 0.8997, "step": 3481 }, { "epoch": 0.7078674527342956, "grad_norm": 0.13135948777198792, "learning_rate": 0.00012929929828129768, "loss": 1.2408, "step": 3482 }, { "epoch": 0.7080707460866029, "grad_norm": 0.15379171073436737, "learning_rate": 0.0001292789586087664, "loss": 1.195, "step": 3483 }, { "epoch": 0.7082740394389103, "grad_norm": 0.13256476819515228, "learning_rate": 0.00012925861893623515, "loss": 1.0582, "step": 3484 }, { "epoch": 0.7084773327912177, "grad_norm": 0.12100596725940704, "learning_rate": 0.00012923827926370388, "loss": 0.8826, "step": 3485 }, { "epoch": 0.7086806261435251, "grad_norm": 0.13334119319915771, "learning_rate": 0.0001292179395911726, "loss": 1.2063, "step": 3486 }, { "epoch": 0.7088839194958325, "grad_norm": 0.14578770101070404, "learning_rate": 0.0001291975999186413, "loss": 1.2067, "step": 3487 }, { "epoch": 0.7090872128481399, "grad_norm": 0.13182413578033447, "learning_rate": 0.00012917726024611005, "loss": 1.0382, "step": 3488 }, { "epoch": 0.7092905062004472, "grad_norm": 0.13377144932746887, "learning_rate": 0.00012915692057357877, "loss": 1.0798, "step": 3489 }, { "epoch": 0.7094937995527546, "grad_norm": 0.15311647951602936, "learning_rate": 0.0001291365809010475, "loss": 1.1925, "step": 3490 }, { "epoch": 0.709697092905062, "grad_norm": 0.13385489583015442, "learning_rate": 0.00012911624122851622, "loss": 1.0437, "step": 3491 }, { "epoch": 0.7099003862573694, "grad_norm": 0.12438102066516876, "learning_rate": 0.00012909590155598495, "loss": 0.9325, "step": 3492 }, { "epoch": 0.7101036796096768, "grad_norm": 0.11423248052597046, "learning_rate": 0.0001290755618834537, "loss": 0.955, "step": 3493 }, { "epoch": 0.7103069729619842, "grad_norm": 0.1257968544960022, "learning_rate": 0.00012905522221092242, "loss": 0.9734, "step": 3494 }, { "epoch": 0.7105102663142915, "grad_norm": 0.12875302135944366, "learning_rate": 0.00012903488253839112, "loss": 0.9762, "step": 3495 }, { "epoch": 0.7107135596665989, "grad_norm": 0.15575377643108368, "learning_rate": 0.00012901454286585984, "loss": 1.199, "step": 3496 }, { "epoch": 0.7109168530189063, "grad_norm": 0.12835876643657684, "learning_rate": 0.0001289942031933286, "loss": 1.0026, "step": 3497 }, { "epoch": 0.7111201463712137, "grad_norm": 0.13237829506397247, "learning_rate": 0.00012897386352079732, "loss": 1.1309, "step": 3498 }, { "epoch": 0.7113234397235211, "grad_norm": 0.13372915983200073, "learning_rate": 0.00012895352384826605, "loss": 1.0218, "step": 3499 }, { "epoch": 0.7115267330758284, "grad_norm": 0.1394553929567337, "learning_rate": 0.00012893318417573477, "loss": 1.1678, "step": 3500 }, { "epoch": 0.7117300264281358, "grad_norm": 0.12859494984149933, "learning_rate": 0.00012891284450320352, "loss": 0.9039, "step": 3501 }, { "epoch": 0.7119333197804432, "grad_norm": 0.13142433762550354, "learning_rate": 0.00012889250483067225, "loss": 0.9692, "step": 3502 }, { "epoch": 0.7121366131327506, "grad_norm": 0.15247346460819244, "learning_rate": 0.00012887216515814094, "loss": 1.1068, "step": 3503 }, { "epoch": 0.712339906485058, "grad_norm": 0.1271810233592987, "learning_rate": 0.00012885182548560967, "loss": 1.05, "step": 3504 }, { "epoch": 0.7125431998373654, "grad_norm": 0.12222661077976227, "learning_rate": 0.00012883148581307842, "loss": 0.9563, "step": 3505 }, { "epoch": 0.7127464931896726, "grad_norm": 0.14147427678108215, "learning_rate": 0.00012881114614054714, "loss": 1.0697, "step": 3506 }, { "epoch": 0.71294978654198, "grad_norm": 0.12644895911216736, "learning_rate": 0.00012879080646801587, "loss": 0.9251, "step": 3507 }, { "epoch": 0.7131530798942874, "grad_norm": 0.126128152012825, "learning_rate": 0.0001287704667954846, "loss": 0.9617, "step": 3508 }, { "epoch": 0.7133563732465948, "grad_norm": 0.12538930773735046, "learning_rate": 0.00012875012712295334, "loss": 1.084, "step": 3509 }, { "epoch": 0.7135596665989022, "grad_norm": 0.1261541098356247, "learning_rate": 0.00012872978745042207, "loss": 1.0671, "step": 3510 }, { "epoch": 0.7137629599512096, "grad_norm": 0.15382623672485352, "learning_rate": 0.00012870944777789077, "loss": 1.1679, "step": 3511 }, { "epoch": 0.7139662533035169, "grad_norm": 0.14954978227615356, "learning_rate": 0.0001286891081053595, "loss": 1.0822, "step": 3512 }, { "epoch": 0.7141695466558243, "grad_norm": 0.12342054396867752, "learning_rate": 0.00012866876843282824, "loss": 1.0252, "step": 3513 }, { "epoch": 0.7143728400081317, "grad_norm": 0.13561514019966125, "learning_rate": 0.00012864842876029697, "loss": 1.0025, "step": 3514 }, { "epoch": 0.7145761333604391, "grad_norm": 0.13942426443099976, "learning_rate": 0.0001286280890877657, "loss": 1.0978, "step": 3515 }, { "epoch": 0.7147794267127465, "grad_norm": 0.13418523967266083, "learning_rate": 0.00012860774941523442, "loss": 0.9401, "step": 3516 }, { "epoch": 0.7149827200650539, "grad_norm": 0.13293065130710602, "learning_rate": 0.00012858740974270317, "loss": 0.9563, "step": 3517 }, { "epoch": 0.7151860134173612, "grad_norm": 0.13507983088493347, "learning_rate": 0.0001285670700701719, "loss": 1.1482, "step": 3518 }, { "epoch": 0.7153893067696686, "grad_norm": 0.13518783450126648, "learning_rate": 0.00012854673039764062, "loss": 1.0677, "step": 3519 }, { "epoch": 0.715592600121976, "grad_norm": 0.14185848832130432, "learning_rate": 0.0001285263907251093, "loss": 1.1828, "step": 3520 }, { "epoch": 0.7157958934742834, "grad_norm": 0.14050935208797455, "learning_rate": 0.00012850605105257806, "loss": 1.137, "step": 3521 }, { "epoch": 0.7159991868265908, "grad_norm": 0.12821073830127716, "learning_rate": 0.0001284857113800468, "loss": 0.9265, "step": 3522 }, { "epoch": 0.7162024801788982, "grad_norm": 0.14910835027694702, "learning_rate": 0.0001284653717075155, "loss": 1.1298, "step": 3523 }, { "epoch": 0.7164057735312055, "grad_norm": 0.12309451401233673, "learning_rate": 0.00012844503203498424, "loss": 0.9552, "step": 3524 }, { "epoch": 0.7166090668835129, "grad_norm": 0.13226357102394104, "learning_rate": 0.000128424692362453, "loss": 1.0618, "step": 3525 }, { "epoch": 0.7168123602358203, "grad_norm": 0.14502473175525665, "learning_rate": 0.00012840435268992171, "loss": 1.0766, "step": 3526 }, { "epoch": 0.7170156535881277, "grad_norm": 0.15114335715770721, "learning_rate": 0.00012838401301739044, "loss": 1.3117, "step": 3527 }, { "epoch": 0.7172189469404351, "grad_norm": 0.14016559720039368, "learning_rate": 0.00012836367334485914, "loss": 1.0747, "step": 3528 }, { "epoch": 0.7174222402927424, "grad_norm": 0.10465826839208603, "learning_rate": 0.0001283433336723279, "loss": 0.9244, "step": 3529 }, { "epoch": 0.7176255336450498, "grad_norm": 0.14475956559181213, "learning_rate": 0.0001283229939997966, "loss": 1.1458, "step": 3530 }, { "epoch": 0.7178288269973572, "grad_norm": 0.15967129170894623, "learning_rate": 0.00012830265432726534, "loss": 1.218, "step": 3531 }, { "epoch": 0.7180321203496646, "grad_norm": 0.16239500045776367, "learning_rate": 0.00012828231465473406, "loss": 1.1088, "step": 3532 }, { "epoch": 0.718235413701972, "grad_norm": 0.13778537511825562, "learning_rate": 0.00012826197498220279, "loss": 1.2284, "step": 3533 }, { "epoch": 0.7184387070542794, "grad_norm": 0.15743795037269592, "learning_rate": 0.00012824163530967154, "loss": 1.2057, "step": 3534 }, { "epoch": 0.7186420004065867, "grad_norm": 0.13260531425476074, "learning_rate": 0.00012822129563714026, "loss": 1.1239, "step": 3535 }, { "epoch": 0.718845293758894, "grad_norm": 0.12460935115814209, "learning_rate": 0.00012820095596460896, "loss": 1.0504, "step": 3536 }, { "epoch": 0.7190485871112015, "grad_norm": 0.13355574011802673, "learning_rate": 0.00012818061629207768, "loss": 1.0719, "step": 3537 }, { "epoch": 0.7192518804635089, "grad_norm": 0.1280195564031601, "learning_rate": 0.00012816027661954643, "loss": 1.012, "step": 3538 }, { "epoch": 0.7194551738158163, "grad_norm": 0.13986103236675262, "learning_rate": 0.00012813993694701516, "loss": 1.0658, "step": 3539 }, { "epoch": 0.7196584671681237, "grad_norm": 0.1170068234205246, "learning_rate": 0.00012811959727448388, "loss": 0.9056, "step": 3540 }, { "epoch": 0.7198617605204309, "grad_norm": 0.13569694757461548, "learning_rate": 0.0001280992576019526, "loss": 1.116, "step": 3541 }, { "epoch": 0.7200650538727383, "grad_norm": 0.1323375403881073, "learning_rate": 0.00012807891792942136, "loss": 1.0629, "step": 3542 }, { "epoch": 0.7202683472250457, "grad_norm": 0.12855368852615356, "learning_rate": 0.00012805857825689008, "loss": 0.89, "step": 3543 }, { "epoch": 0.7204716405773531, "grad_norm": 0.12019526958465576, "learning_rate": 0.00012803823858435878, "loss": 1.063, "step": 3544 }, { "epoch": 0.7206749339296605, "grad_norm": 0.13612791895866394, "learning_rate": 0.0001280178989118275, "loss": 0.9844, "step": 3545 }, { "epoch": 0.7208782272819679, "grad_norm": 0.1345546394586563, "learning_rate": 0.00012799755923929626, "loss": 0.9661, "step": 3546 }, { "epoch": 0.7210815206342752, "grad_norm": 0.12953819334506989, "learning_rate": 0.00012797721956676498, "loss": 0.968, "step": 3547 }, { "epoch": 0.7212848139865826, "grad_norm": 0.15265563130378723, "learning_rate": 0.0001279568798942337, "loss": 1.2455, "step": 3548 }, { "epoch": 0.72148810733889, "grad_norm": 0.16196173429489136, "learning_rate": 0.00012793654022170243, "loss": 1.1605, "step": 3549 }, { "epoch": 0.7216914006911974, "grad_norm": 0.13228391110897064, "learning_rate": 0.00012791620054917118, "loss": 1.1216, "step": 3550 }, { "epoch": 0.7218946940435048, "grad_norm": 0.1355789303779602, "learning_rate": 0.0001278958608766399, "loss": 1.1065, "step": 3551 }, { "epoch": 0.7220979873958121, "grad_norm": 0.13458067178726196, "learning_rate": 0.0001278755212041086, "loss": 1.173, "step": 3552 }, { "epoch": 0.7223012807481195, "grad_norm": 0.12841463088989258, "learning_rate": 0.00012785518153157733, "loss": 0.9425, "step": 3553 }, { "epoch": 0.7225045741004269, "grad_norm": 0.1273353099822998, "learning_rate": 0.00012783484185904608, "loss": 0.9527, "step": 3554 }, { "epoch": 0.7227078674527343, "grad_norm": 0.13753145933151245, "learning_rate": 0.0001278145021865148, "loss": 0.9917, "step": 3555 }, { "epoch": 0.7229111608050417, "grad_norm": 0.15175598859786987, "learning_rate": 0.00012779416251398353, "loss": 1.1534, "step": 3556 }, { "epoch": 0.7231144541573491, "grad_norm": 0.12491641193628311, "learning_rate": 0.00012777382284145225, "loss": 0.9861, "step": 3557 }, { "epoch": 0.7233177475096564, "grad_norm": 0.135353222489357, "learning_rate": 0.000127753483168921, "loss": 1.1638, "step": 3558 }, { "epoch": 0.7235210408619638, "grad_norm": 0.14735917747020721, "learning_rate": 0.00012773314349638973, "loss": 1.1341, "step": 3559 }, { "epoch": 0.7237243342142712, "grad_norm": 0.11300304532051086, "learning_rate": 0.00012771280382385843, "loss": 0.9734, "step": 3560 }, { "epoch": 0.7239276275665786, "grad_norm": 0.12589031457901, "learning_rate": 0.00012769246415132715, "loss": 0.9325, "step": 3561 }, { "epoch": 0.724130920918886, "grad_norm": 0.14478862285614014, "learning_rate": 0.0001276721244787959, "loss": 1.2025, "step": 3562 }, { "epoch": 0.7243342142711934, "grad_norm": 0.12382597476243973, "learning_rate": 0.00012765178480626463, "loss": 1.0629, "step": 3563 }, { "epoch": 0.7245375076235007, "grad_norm": 0.13786040246486664, "learning_rate": 0.00012763144513373335, "loss": 1.142, "step": 3564 }, { "epoch": 0.7247408009758081, "grad_norm": 0.13986682891845703, "learning_rate": 0.00012761110546120208, "loss": 1.1434, "step": 3565 }, { "epoch": 0.7249440943281155, "grad_norm": 0.13523870706558228, "learning_rate": 0.00012759076578867083, "loss": 0.889, "step": 3566 }, { "epoch": 0.7251473876804229, "grad_norm": 0.12312185764312744, "learning_rate": 0.00012757042611613955, "loss": 1.0227, "step": 3567 }, { "epoch": 0.7253506810327303, "grad_norm": 0.1324312686920166, "learning_rate": 0.00012755008644360825, "loss": 0.9739, "step": 3568 }, { "epoch": 0.7255539743850377, "grad_norm": 0.13704247772693634, "learning_rate": 0.00012752974677107697, "loss": 1.0356, "step": 3569 }, { "epoch": 0.725757267737345, "grad_norm": 0.12928558886051178, "learning_rate": 0.00012750940709854573, "loss": 0.9112, "step": 3570 }, { "epoch": 0.7259605610896523, "grad_norm": 0.12993620336055756, "learning_rate": 0.00012748906742601445, "loss": 1.0499, "step": 3571 }, { "epoch": 0.7261638544419597, "grad_norm": 0.13459739089012146, "learning_rate": 0.00012746872775348317, "loss": 1.1546, "step": 3572 }, { "epoch": 0.7263671477942671, "grad_norm": 0.14553983509540558, "learning_rate": 0.0001274483880809519, "loss": 1.1466, "step": 3573 }, { "epoch": 0.7265704411465745, "grad_norm": 0.1270923614501953, "learning_rate": 0.00012742804840842065, "loss": 0.9953, "step": 3574 }, { "epoch": 0.7267737344988819, "grad_norm": 0.11883358657360077, "learning_rate": 0.00012740770873588938, "loss": 0.9983, "step": 3575 }, { "epoch": 0.7269770278511892, "grad_norm": 0.13899964094161987, "learning_rate": 0.0001273873690633581, "loss": 1.0463, "step": 3576 }, { "epoch": 0.7271803212034966, "grad_norm": 0.12887227535247803, "learning_rate": 0.0001273670293908268, "loss": 0.9673, "step": 3577 }, { "epoch": 0.727383614555804, "grad_norm": 0.15297862887382507, "learning_rate": 0.00012734668971829552, "loss": 1.1517, "step": 3578 }, { "epoch": 0.7275869079081114, "grad_norm": 0.12056870758533478, "learning_rate": 0.00012732635004576427, "loss": 0.9494, "step": 3579 }, { "epoch": 0.7277902012604188, "grad_norm": 0.13357582688331604, "learning_rate": 0.000127306010373233, "loss": 0.9368, "step": 3580 }, { "epoch": 0.7279934946127261, "grad_norm": 0.1344243586063385, "learning_rate": 0.00012728567070070172, "loss": 0.9977, "step": 3581 }, { "epoch": 0.7281967879650335, "grad_norm": 0.12713217735290527, "learning_rate": 0.00012726533102817045, "loss": 0.9283, "step": 3582 }, { "epoch": 0.7284000813173409, "grad_norm": 0.1435747891664505, "learning_rate": 0.0001272449913556392, "loss": 1.0784, "step": 3583 }, { "epoch": 0.7286033746696483, "grad_norm": 0.1342409998178482, "learning_rate": 0.00012722465168310792, "loss": 0.99, "step": 3584 }, { "epoch": 0.7288066680219557, "grad_norm": 0.13497351109981537, "learning_rate": 0.00012720431201057662, "loss": 1.0655, "step": 3585 }, { "epoch": 0.7290099613742631, "grad_norm": 0.13522464036941528, "learning_rate": 0.00012718397233804534, "loss": 1.1785, "step": 3586 }, { "epoch": 0.7292132547265704, "grad_norm": 0.12016705423593521, "learning_rate": 0.0001271636326655141, "loss": 0.93, "step": 3587 }, { "epoch": 0.7294165480788778, "grad_norm": 0.1265437752008438, "learning_rate": 0.00012714329299298282, "loss": 1.0289, "step": 3588 }, { "epoch": 0.7296198414311852, "grad_norm": 0.12697303295135498, "learning_rate": 0.00012712295332045154, "loss": 1.0539, "step": 3589 }, { "epoch": 0.7298231347834926, "grad_norm": 0.14529366791248322, "learning_rate": 0.00012710261364792027, "loss": 1.134, "step": 3590 }, { "epoch": 0.7300264281358, "grad_norm": 0.1143953874707222, "learning_rate": 0.00012708227397538902, "loss": 0.7966, "step": 3591 }, { "epoch": 0.7302297214881074, "grad_norm": 0.14083142578601837, "learning_rate": 0.00012706193430285775, "loss": 1.1776, "step": 3592 }, { "epoch": 0.7304330148404147, "grad_norm": 0.12843115627765656, "learning_rate": 0.00012704159463032644, "loss": 1.075, "step": 3593 }, { "epoch": 0.7306363081927221, "grad_norm": 0.12742142379283905, "learning_rate": 0.00012702125495779517, "loss": 1.1087, "step": 3594 }, { "epoch": 0.7308396015450295, "grad_norm": 0.13479192554950714, "learning_rate": 0.00012700091528526392, "loss": 1.0954, "step": 3595 }, { "epoch": 0.7310428948973369, "grad_norm": 0.13784924149513245, "learning_rate": 0.00012698057561273264, "loss": 1.1773, "step": 3596 }, { "epoch": 0.7312461882496443, "grad_norm": 0.13474421203136444, "learning_rate": 0.00012696023594020137, "loss": 1.0023, "step": 3597 }, { "epoch": 0.7314494816019517, "grad_norm": 0.14545200765132904, "learning_rate": 0.0001269398962676701, "loss": 1.1166, "step": 3598 }, { "epoch": 0.731652774954259, "grad_norm": 0.11699052155017853, "learning_rate": 0.00012691955659513884, "loss": 0.982, "step": 3599 }, { "epoch": 0.7318560683065664, "grad_norm": 0.13108402490615845, "learning_rate": 0.00012689921692260757, "loss": 0.9805, "step": 3600 }, { "epoch": 0.7320593616588738, "grad_norm": 0.12493366003036499, "learning_rate": 0.00012687887725007627, "loss": 1.03, "step": 3601 }, { "epoch": 0.7322626550111812, "grad_norm": 0.12673288583755493, "learning_rate": 0.000126858537577545, "loss": 0.99, "step": 3602 }, { "epoch": 0.7324659483634886, "grad_norm": 0.14298030734062195, "learning_rate": 0.00012683819790501374, "loss": 0.9816, "step": 3603 }, { "epoch": 0.7326692417157958, "grad_norm": 0.1383986473083496, "learning_rate": 0.00012681785823248247, "loss": 1.0635, "step": 3604 }, { "epoch": 0.7328725350681032, "grad_norm": 0.13229741156101227, "learning_rate": 0.0001267975185599512, "loss": 1.0752, "step": 3605 }, { "epoch": 0.7330758284204106, "grad_norm": 0.12566420435905457, "learning_rate": 0.00012677717888741991, "loss": 1.0984, "step": 3606 }, { "epoch": 0.733279121772718, "grad_norm": 0.14375749230384827, "learning_rate": 0.00012675683921488867, "loss": 1.0459, "step": 3607 }, { "epoch": 0.7334824151250254, "grad_norm": 0.1541428565979004, "learning_rate": 0.0001267364995423574, "loss": 1.2405, "step": 3608 }, { "epoch": 0.7336857084773328, "grad_norm": 0.1277463287115097, "learning_rate": 0.0001267161598698261, "loss": 0.9784, "step": 3609 }, { "epoch": 0.7338890018296401, "grad_norm": 0.11879061907529831, "learning_rate": 0.0001266958201972948, "loss": 0.9301, "step": 3610 }, { "epoch": 0.7340922951819475, "grad_norm": 0.1343902200460434, "learning_rate": 0.00012667548052476356, "loss": 1.2015, "step": 3611 }, { "epoch": 0.7342955885342549, "grad_norm": 0.12574651837348938, "learning_rate": 0.0001266551408522323, "loss": 1.0215, "step": 3612 }, { "epoch": 0.7344988818865623, "grad_norm": 0.12160508334636688, "learning_rate": 0.000126634801179701, "loss": 1.0208, "step": 3613 }, { "epoch": 0.7347021752388697, "grad_norm": 0.13637933135032654, "learning_rate": 0.00012661446150716974, "loss": 1.1179, "step": 3614 }, { "epoch": 0.7349054685911771, "grad_norm": 0.14247237145900726, "learning_rate": 0.0001265941218346385, "loss": 1.1746, "step": 3615 }, { "epoch": 0.7351087619434844, "grad_norm": 0.14084017276763916, "learning_rate": 0.00012657378216210721, "loss": 0.9984, "step": 3616 }, { "epoch": 0.7353120552957918, "grad_norm": 0.1475144922733307, "learning_rate": 0.0001265534424895759, "loss": 1.0763, "step": 3617 }, { "epoch": 0.7355153486480992, "grad_norm": 0.12147875130176544, "learning_rate": 0.00012653310281704464, "loss": 1.0814, "step": 3618 }, { "epoch": 0.7357186420004066, "grad_norm": 0.12373865395784378, "learning_rate": 0.00012651276314451336, "loss": 0.9432, "step": 3619 }, { "epoch": 0.735921935352714, "grad_norm": 0.11395063996315002, "learning_rate": 0.0001264924234719821, "loss": 0.8629, "step": 3620 }, { "epoch": 0.7361252287050214, "grad_norm": 0.12742386758327484, "learning_rate": 0.00012647208379945084, "loss": 1.1148, "step": 3621 }, { "epoch": 0.7363285220573287, "grad_norm": 0.13474571704864502, "learning_rate": 0.00012645174412691956, "loss": 1.0862, "step": 3622 }, { "epoch": 0.7365318154096361, "grad_norm": 0.14104367792606354, "learning_rate": 0.00012643140445438828, "loss": 1.1224, "step": 3623 }, { "epoch": 0.7367351087619435, "grad_norm": 0.1266336888074875, "learning_rate": 0.00012641106478185704, "loss": 1.0388, "step": 3624 }, { "epoch": 0.7369384021142509, "grad_norm": 0.1418471783399582, "learning_rate": 0.00012639072510932573, "loss": 1.0211, "step": 3625 }, { "epoch": 0.7371416954665583, "grad_norm": 0.14462773501873016, "learning_rate": 0.00012637038543679446, "loss": 1.0212, "step": 3626 }, { "epoch": 0.7373449888188657, "grad_norm": 0.14509986340999603, "learning_rate": 0.00012635004576426318, "loss": 1.1922, "step": 3627 }, { "epoch": 0.737548282171173, "grad_norm": 0.13801227509975433, "learning_rate": 0.00012632970609173193, "loss": 1.1082, "step": 3628 }, { "epoch": 0.7377515755234804, "grad_norm": 0.13551753759384155, "learning_rate": 0.00012630936641920066, "loss": 1.0822, "step": 3629 }, { "epoch": 0.7379548688757878, "grad_norm": 0.12872062623500824, "learning_rate": 0.00012628902674666938, "loss": 0.9366, "step": 3630 }, { "epoch": 0.7381581622280952, "grad_norm": 0.13623321056365967, "learning_rate": 0.0001262686870741381, "loss": 1.1129, "step": 3631 }, { "epoch": 0.7383614555804026, "grad_norm": 0.14300891757011414, "learning_rate": 0.00012624834740160686, "loss": 1.1259, "step": 3632 }, { "epoch": 0.7385647489327098, "grad_norm": 0.13372913002967834, "learning_rate": 0.00012622800772907558, "loss": 1.0011, "step": 3633 }, { "epoch": 0.7387680422850172, "grad_norm": 0.11722072213888168, "learning_rate": 0.00012620766805654428, "loss": 1.0085, "step": 3634 }, { "epoch": 0.7389713356373246, "grad_norm": 0.13151319324970245, "learning_rate": 0.000126187328384013, "loss": 1.1878, "step": 3635 }, { "epoch": 0.739174628989632, "grad_norm": 0.13933278620243073, "learning_rate": 0.00012616698871148176, "loss": 1.1763, "step": 3636 }, { "epoch": 0.7393779223419394, "grad_norm": 0.13536275923252106, "learning_rate": 0.00012614664903895048, "loss": 1.2087, "step": 3637 }, { "epoch": 0.7395812156942468, "grad_norm": 0.13037016987800598, "learning_rate": 0.0001261263093664192, "loss": 1.0033, "step": 3638 }, { "epoch": 0.7397845090465541, "grad_norm": 0.1575489491224289, "learning_rate": 0.00012610596969388793, "loss": 1.1683, "step": 3639 }, { "epoch": 0.7399878023988615, "grad_norm": 0.12989576160907745, "learning_rate": 0.00012608563002135668, "loss": 1.127, "step": 3640 }, { "epoch": 0.7401910957511689, "grad_norm": 0.14293938875198364, "learning_rate": 0.0001260652903488254, "loss": 1.1641, "step": 3641 }, { "epoch": 0.7403943891034763, "grad_norm": 0.12480568885803223, "learning_rate": 0.0001260449506762941, "loss": 0.9612, "step": 3642 }, { "epoch": 0.7405976824557837, "grad_norm": 0.1387239396572113, "learning_rate": 0.00012602461100376283, "loss": 1.1289, "step": 3643 }, { "epoch": 0.7408009758080911, "grad_norm": 0.12974587082862854, "learning_rate": 0.00012600427133123158, "loss": 1.0032, "step": 3644 }, { "epoch": 0.7410042691603984, "grad_norm": 0.14558175206184387, "learning_rate": 0.0001259839316587003, "loss": 1.0957, "step": 3645 }, { "epoch": 0.7412075625127058, "grad_norm": 0.1346643716096878, "learning_rate": 0.00012596359198616903, "loss": 1.0644, "step": 3646 }, { "epoch": 0.7414108558650132, "grad_norm": 0.1275978684425354, "learning_rate": 0.00012594325231363775, "loss": 1.0638, "step": 3647 }, { "epoch": 0.7416141492173206, "grad_norm": 0.11669638752937317, "learning_rate": 0.0001259229126411065, "loss": 0.9147, "step": 3648 }, { "epoch": 0.741817442569628, "grad_norm": 0.12056609243154526, "learning_rate": 0.00012590257296857523, "loss": 0.8193, "step": 3649 }, { "epoch": 0.7420207359219354, "grad_norm": 0.1389569789171219, "learning_rate": 0.00012588223329604393, "loss": 1.1273, "step": 3650 }, { "epoch": 0.7422240292742427, "grad_norm": 0.1116948276758194, "learning_rate": 0.00012586189362351265, "loss": 0.8484, "step": 3651 }, { "epoch": 0.7424273226265501, "grad_norm": 0.13268932700157166, "learning_rate": 0.0001258415539509814, "loss": 1.0092, "step": 3652 }, { "epoch": 0.7426306159788575, "grad_norm": 0.13985766470432281, "learning_rate": 0.00012582121427845013, "loss": 1.0907, "step": 3653 }, { "epoch": 0.7428339093311649, "grad_norm": 0.13794921338558197, "learning_rate": 0.00012580087460591885, "loss": 1.1482, "step": 3654 }, { "epoch": 0.7430372026834723, "grad_norm": 0.13911883533000946, "learning_rate": 0.00012578053493338758, "loss": 1.1641, "step": 3655 }, { "epoch": 0.7432404960357797, "grad_norm": 0.11809851974248886, "learning_rate": 0.00012576019526085633, "loss": 0.9128, "step": 3656 }, { "epoch": 0.743443789388087, "grad_norm": 0.17306208610534668, "learning_rate": 0.00012573985558832505, "loss": 1.1575, "step": 3657 }, { "epoch": 0.7436470827403944, "grad_norm": 0.14551490545272827, "learning_rate": 0.00012571951591579375, "loss": 1.2794, "step": 3658 }, { "epoch": 0.7438503760927018, "grad_norm": 0.14065933227539062, "learning_rate": 0.00012569917624326247, "loss": 1.0652, "step": 3659 }, { "epoch": 0.7440536694450092, "grad_norm": 0.13010179996490479, "learning_rate": 0.0001256788365707312, "loss": 0.9541, "step": 3660 }, { "epoch": 0.7442569627973166, "grad_norm": 0.1264103651046753, "learning_rate": 0.00012565849689819995, "loss": 0.9767, "step": 3661 }, { "epoch": 0.7444602561496239, "grad_norm": 0.12325896322727203, "learning_rate": 0.00012563815722566867, "loss": 1.1016, "step": 3662 }, { "epoch": 0.7446635495019313, "grad_norm": 0.12374068796634674, "learning_rate": 0.0001256178175531374, "loss": 0.9282, "step": 3663 }, { "epoch": 0.7448668428542387, "grad_norm": 0.14597944915294647, "learning_rate": 0.00012559747788060612, "loss": 1.1961, "step": 3664 }, { "epoch": 0.745070136206546, "grad_norm": 0.128509983420372, "learning_rate": 0.00012557713820807487, "loss": 0.9838, "step": 3665 }, { "epoch": 0.7452734295588535, "grad_norm": 0.1421680897474289, "learning_rate": 0.00012555679853554357, "loss": 1.1563, "step": 3666 }, { "epoch": 0.7454767229111608, "grad_norm": 0.1468690037727356, "learning_rate": 0.0001255364588630123, "loss": 1.1203, "step": 3667 }, { "epoch": 0.7456800162634681, "grad_norm": 0.1539076417684555, "learning_rate": 0.00012551611919048102, "loss": 1.1775, "step": 3668 }, { "epoch": 0.7458833096157755, "grad_norm": 0.15628856420516968, "learning_rate": 0.00012549577951794977, "loss": 1.1265, "step": 3669 }, { "epoch": 0.7460866029680829, "grad_norm": 0.1251571774482727, "learning_rate": 0.0001254754398454185, "loss": 1.0525, "step": 3670 }, { "epoch": 0.7462898963203903, "grad_norm": 0.13868333399295807, "learning_rate": 0.00012545510017288722, "loss": 1.0449, "step": 3671 }, { "epoch": 0.7464931896726977, "grad_norm": 0.15435542166233063, "learning_rate": 0.00012543476050035595, "loss": 1.2156, "step": 3672 }, { "epoch": 0.7466964830250051, "grad_norm": 0.13579222559928894, "learning_rate": 0.0001254144208278247, "loss": 0.9683, "step": 3673 }, { "epoch": 0.7468997763773124, "grad_norm": 0.14346475899219513, "learning_rate": 0.0001253940811552934, "loss": 1.1188, "step": 3674 }, { "epoch": 0.7471030697296198, "grad_norm": 0.13663546741008759, "learning_rate": 0.00012537374148276212, "loss": 0.9814, "step": 3675 }, { "epoch": 0.7473063630819272, "grad_norm": 0.14386685192584991, "learning_rate": 0.00012535340181023084, "loss": 1.2206, "step": 3676 }, { "epoch": 0.7475096564342346, "grad_norm": 0.1263144165277481, "learning_rate": 0.0001253330621376996, "loss": 0.9859, "step": 3677 }, { "epoch": 0.747712949786542, "grad_norm": 0.12757907807826996, "learning_rate": 0.00012531272246516832, "loss": 0.9453, "step": 3678 }, { "epoch": 0.7479162431388494, "grad_norm": 0.13055284321308136, "learning_rate": 0.00012529238279263704, "loss": 0.9623, "step": 3679 }, { "epoch": 0.7481195364911567, "grad_norm": 0.15445955097675323, "learning_rate": 0.00012527204312010577, "loss": 1.3087, "step": 3680 }, { "epoch": 0.7483228298434641, "grad_norm": 0.1479884386062622, "learning_rate": 0.00012525170344757452, "loss": 1.2992, "step": 3681 }, { "epoch": 0.7485261231957715, "grad_norm": 0.14582955837249756, "learning_rate": 0.00012523136377504322, "loss": 1.0265, "step": 3682 }, { "epoch": 0.7487294165480789, "grad_norm": 0.1448071002960205, "learning_rate": 0.00012521102410251194, "loss": 1.0637, "step": 3683 }, { "epoch": 0.7489327099003863, "grad_norm": 0.13632971048355103, "learning_rate": 0.00012519068442998067, "loss": 1.204, "step": 3684 }, { "epoch": 0.7491360032526936, "grad_norm": 0.11724304407835007, "learning_rate": 0.00012517034475744942, "loss": 0.8373, "step": 3685 }, { "epoch": 0.749339296605001, "grad_norm": 0.12346580624580383, "learning_rate": 0.00012515000508491814, "loss": 0.9118, "step": 3686 }, { "epoch": 0.7495425899573084, "grad_norm": 0.12630046904087067, "learning_rate": 0.00012512966541238687, "loss": 1.0533, "step": 3687 }, { "epoch": 0.7497458833096158, "grad_norm": 0.14778174459934235, "learning_rate": 0.0001251093257398556, "loss": 1.2604, "step": 3688 }, { "epoch": 0.7499491766619232, "grad_norm": 0.13751018047332764, "learning_rate": 0.00012508898606732434, "loss": 1.0252, "step": 3689 }, { "epoch": 0.7501524700142306, "grad_norm": 0.14556734263896942, "learning_rate": 0.00012506864639479307, "loss": 1.0671, "step": 3690 }, { "epoch": 0.7503557633665379, "grad_norm": 0.12826183438301086, "learning_rate": 0.00012504830672226177, "loss": 1.0401, "step": 3691 }, { "epoch": 0.7505590567188453, "grad_norm": 0.12293746322393417, "learning_rate": 0.0001250279670497305, "loss": 1.0505, "step": 3692 }, { "epoch": 0.7507623500711527, "grad_norm": 0.14679206907749176, "learning_rate": 0.00012500762737719924, "loss": 1.1754, "step": 3693 }, { "epoch": 0.7509656434234601, "grad_norm": 0.13845571875572205, "learning_rate": 0.00012498728770466797, "loss": 1.1254, "step": 3694 }, { "epoch": 0.7511689367757675, "grad_norm": 0.126956969499588, "learning_rate": 0.0001249669480321367, "loss": 0.9822, "step": 3695 }, { "epoch": 0.7513722301280749, "grad_norm": 0.13764221966266632, "learning_rate": 0.00012494660835960541, "loss": 1.1326, "step": 3696 }, { "epoch": 0.7515755234803821, "grad_norm": 0.13586993515491486, "learning_rate": 0.00012492626868707417, "loss": 1.0949, "step": 3697 }, { "epoch": 0.7517788168326895, "grad_norm": 0.1523975282907486, "learning_rate": 0.0001249059290145429, "loss": 1.1545, "step": 3698 }, { "epoch": 0.7519821101849969, "grad_norm": 0.13115784525871277, "learning_rate": 0.0001248855893420116, "loss": 1.0304, "step": 3699 }, { "epoch": 0.7521854035373043, "grad_norm": 0.13575038313865662, "learning_rate": 0.0001248652496694803, "loss": 1.1775, "step": 3700 }, { "epoch": 0.7523886968896117, "grad_norm": 0.14293211698532104, "learning_rate": 0.00012484490999694904, "loss": 0.987, "step": 3701 }, { "epoch": 0.7525919902419191, "grad_norm": 0.13629594445228577, "learning_rate": 0.0001248245703244178, "loss": 0.9952, "step": 3702 }, { "epoch": 0.7527952835942264, "grad_norm": 0.1200501024723053, "learning_rate": 0.0001248042306518865, "loss": 0.9087, "step": 3703 }, { "epoch": 0.7529985769465338, "grad_norm": 0.12878622114658356, "learning_rate": 0.00012478389097935524, "loss": 1.0632, "step": 3704 }, { "epoch": 0.7532018702988412, "grad_norm": 0.13195644319057465, "learning_rate": 0.00012476355130682396, "loss": 1.1865, "step": 3705 }, { "epoch": 0.7534051636511486, "grad_norm": 0.13144764304161072, "learning_rate": 0.0001247432116342927, "loss": 0.9257, "step": 3706 }, { "epoch": 0.753608457003456, "grad_norm": 0.12141410261392593, "learning_rate": 0.0001247228719617614, "loss": 0.9196, "step": 3707 }, { "epoch": 0.7538117503557634, "grad_norm": 0.13238899409770966, "learning_rate": 0.00012470253228923014, "loss": 1.1404, "step": 3708 }, { "epoch": 0.7540150437080707, "grad_norm": 0.1436709612607956, "learning_rate": 0.00012468219261669886, "loss": 1.1169, "step": 3709 }, { "epoch": 0.7542183370603781, "grad_norm": 0.14147412776947021, "learning_rate": 0.0001246618529441676, "loss": 1.2736, "step": 3710 }, { "epoch": 0.7544216304126855, "grad_norm": 0.13145607709884644, "learning_rate": 0.00012464151327163634, "loss": 1.0299, "step": 3711 }, { "epoch": 0.7546249237649929, "grad_norm": 0.11213693022727966, "learning_rate": 0.00012462117359910506, "loss": 0.8738, "step": 3712 }, { "epoch": 0.7548282171173003, "grad_norm": 0.14354929327964783, "learning_rate": 0.00012460083392657378, "loss": 1.0871, "step": 3713 }, { "epoch": 0.7550315104696076, "grad_norm": 0.15012142062187195, "learning_rate": 0.00012458049425404254, "loss": 1.0701, "step": 3714 }, { "epoch": 0.755234803821915, "grad_norm": 0.15194512903690338, "learning_rate": 0.00012456015458151123, "loss": 1.0018, "step": 3715 }, { "epoch": 0.7554380971742224, "grad_norm": 0.13199283182621002, "learning_rate": 0.00012453981490897996, "loss": 0.9806, "step": 3716 }, { "epoch": 0.7556413905265298, "grad_norm": 0.1443110853433609, "learning_rate": 0.00012451947523644868, "loss": 1.1558, "step": 3717 }, { "epoch": 0.7558446838788372, "grad_norm": 0.1215786412358284, "learning_rate": 0.00012449913556391743, "loss": 1.0642, "step": 3718 }, { "epoch": 0.7560479772311446, "grad_norm": 0.12462542206048965, "learning_rate": 0.00012447879589138616, "loss": 0.9799, "step": 3719 }, { "epoch": 0.7562512705834519, "grad_norm": 0.1319034993648529, "learning_rate": 0.00012445845621885488, "loss": 1.0646, "step": 3720 }, { "epoch": 0.7564545639357593, "grad_norm": 0.14364975690841675, "learning_rate": 0.0001244381165463236, "loss": 1.0853, "step": 3721 }, { "epoch": 0.7566578572880667, "grad_norm": 0.13716979324817657, "learning_rate": 0.00012441777687379236, "loss": 1.2145, "step": 3722 }, { "epoch": 0.7568611506403741, "grad_norm": 0.1348930448293686, "learning_rate": 0.00012439743720126106, "loss": 1.1096, "step": 3723 }, { "epoch": 0.7570644439926815, "grad_norm": 0.15214388072490692, "learning_rate": 0.00012437709752872978, "loss": 1.1986, "step": 3724 }, { "epoch": 0.7572677373449889, "grad_norm": 0.14679096639156342, "learning_rate": 0.0001243567578561985, "loss": 1.2809, "step": 3725 }, { "epoch": 0.7574710306972962, "grad_norm": 0.1401345431804657, "learning_rate": 0.00012433641818366726, "loss": 1.013, "step": 3726 }, { "epoch": 0.7576743240496036, "grad_norm": 0.14611610770225525, "learning_rate": 0.00012431607851113598, "loss": 1.2145, "step": 3727 }, { "epoch": 0.757877617401911, "grad_norm": 0.13954514265060425, "learning_rate": 0.0001242957388386047, "loss": 1.1382, "step": 3728 }, { "epoch": 0.7580809107542184, "grad_norm": 0.1354246586561203, "learning_rate": 0.00012427539916607343, "loss": 1.0877, "step": 3729 }, { "epoch": 0.7582842041065257, "grad_norm": 0.12606988847255707, "learning_rate": 0.00012425505949354218, "loss": 1.0592, "step": 3730 }, { "epoch": 0.7584874974588331, "grad_norm": 0.13141503930091858, "learning_rate": 0.00012423471982101088, "loss": 0.9414, "step": 3731 }, { "epoch": 0.7586907908111404, "grad_norm": 0.1343068927526474, "learning_rate": 0.0001242143801484796, "loss": 0.9862, "step": 3732 }, { "epoch": 0.7588940841634478, "grad_norm": 0.13875959813594818, "learning_rate": 0.00012419404047594833, "loss": 1.3412, "step": 3733 }, { "epoch": 0.7590973775157552, "grad_norm": 0.14184454083442688, "learning_rate": 0.00012417370080341708, "loss": 1.0852, "step": 3734 }, { "epoch": 0.7593006708680626, "grad_norm": 0.13765336573123932, "learning_rate": 0.0001241533611308858, "loss": 1.1018, "step": 3735 }, { "epoch": 0.75950396422037, "grad_norm": 0.11607436835765839, "learning_rate": 0.00012413302145835453, "loss": 0.8447, "step": 3736 }, { "epoch": 0.7597072575726773, "grad_norm": 0.13869017362594604, "learning_rate": 0.00012411268178582325, "loss": 1.125, "step": 3737 }, { "epoch": 0.7599105509249847, "grad_norm": 0.14673906564712524, "learning_rate": 0.000124092342113292, "loss": 1.1876, "step": 3738 }, { "epoch": 0.7601138442772921, "grad_norm": 0.1397872269153595, "learning_rate": 0.0001240720024407607, "loss": 1.0398, "step": 3739 }, { "epoch": 0.7603171376295995, "grad_norm": 0.1220252513885498, "learning_rate": 0.00012405166276822943, "loss": 0.9359, "step": 3740 }, { "epoch": 0.7605204309819069, "grad_norm": 0.1372562199831009, "learning_rate": 0.00012403132309569815, "loss": 1.0704, "step": 3741 }, { "epoch": 0.7607237243342143, "grad_norm": 0.12001727521419525, "learning_rate": 0.00012401098342316688, "loss": 1.0201, "step": 3742 }, { "epoch": 0.7609270176865216, "grad_norm": 0.12373898923397064, "learning_rate": 0.00012399064375063563, "loss": 0.9927, "step": 3743 }, { "epoch": 0.761130311038829, "grad_norm": 0.1459614783525467, "learning_rate": 0.00012397030407810435, "loss": 1.23, "step": 3744 }, { "epoch": 0.7613336043911364, "grad_norm": 0.14972059428691864, "learning_rate": 0.00012394996440557308, "loss": 1.2213, "step": 3745 }, { "epoch": 0.7615368977434438, "grad_norm": 0.125379741191864, "learning_rate": 0.0001239296247330418, "loss": 0.9302, "step": 3746 }, { "epoch": 0.7617401910957512, "grad_norm": 0.13220852613449097, "learning_rate": 0.00012390928506051055, "loss": 0.9221, "step": 3747 }, { "epoch": 0.7619434844480586, "grad_norm": 0.1334318369626999, "learning_rate": 0.00012388894538797925, "loss": 1.1046, "step": 3748 }, { "epoch": 0.7621467778003659, "grad_norm": 0.11617275327444077, "learning_rate": 0.00012386860571544797, "loss": 0.8787, "step": 3749 }, { "epoch": 0.7623500711526733, "grad_norm": 0.12812359631061554, "learning_rate": 0.0001238482660429167, "loss": 1.0109, "step": 3750 }, { "epoch": 0.7625533645049807, "grad_norm": 0.1491006761789322, "learning_rate": 0.00012382792637038545, "loss": 1.1522, "step": 3751 }, { "epoch": 0.7627566578572881, "grad_norm": 0.1386028379201889, "learning_rate": 0.00012380758669785417, "loss": 1.0567, "step": 3752 }, { "epoch": 0.7629599512095955, "grad_norm": 0.12961892783641815, "learning_rate": 0.0001237872470253229, "loss": 0.9436, "step": 3753 }, { "epoch": 0.7631632445619029, "grad_norm": 0.13355448842048645, "learning_rate": 0.00012376690735279162, "loss": 1.063, "step": 3754 }, { "epoch": 0.7633665379142102, "grad_norm": 0.1302691102027893, "learning_rate": 0.00012374656768026037, "loss": 1.019, "step": 3755 }, { "epoch": 0.7635698312665176, "grad_norm": 0.12183891981840134, "learning_rate": 0.00012372622800772907, "loss": 0.964, "step": 3756 }, { "epoch": 0.763773124618825, "grad_norm": 0.12347770482301712, "learning_rate": 0.0001237058883351978, "loss": 0.9031, "step": 3757 }, { "epoch": 0.7639764179711324, "grad_norm": 0.12646906077861786, "learning_rate": 0.00012368554866266652, "loss": 1.009, "step": 3758 }, { "epoch": 0.7641797113234398, "grad_norm": 0.15650388598442078, "learning_rate": 0.00012366520899013527, "loss": 1.2043, "step": 3759 }, { "epoch": 0.7643830046757472, "grad_norm": 0.13092441856861115, "learning_rate": 0.000123644869317604, "loss": 1.038, "step": 3760 }, { "epoch": 0.7645862980280544, "grad_norm": 0.11747883260250092, "learning_rate": 0.00012362452964507272, "loss": 1.0147, "step": 3761 }, { "epoch": 0.7647895913803618, "grad_norm": 0.13621081411838531, "learning_rate": 0.00012360418997254145, "loss": 1.0798, "step": 3762 }, { "epoch": 0.7649928847326692, "grad_norm": 0.1359243243932724, "learning_rate": 0.0001235838503000102, "loss": 1.0795, "step": 3763 }, { "epoch": 0.7651961780849766, "grad_norm": 0.14412851631641388, "learning_rate": 0.0001235635106274789, "loss": 1.0907, "step": 3764 }, { "epoch": 0.765399471437284, "grad_norm": 0.14425703883171082, "learning_rate": 0.00012354317095494762, "loss": 1.1903, "step": 3765 }, { "epoch": 0.7656027647895913, "grad_norm": 0.15288187563419342, "learning_rate": 0.00012352283128241634, "loss": 1.1775, "step": 3766 }, { "epoch": 0.7658060581418987, "grad_norm": 0.149856299161911, "learning_rate": 0.0001235024916098851, "loss": 1.172, "step": 3767 }, { "epoch": 0.7660093514942061, "grad_norm": 0.1374143660068512, "learning_rate": 0.00012348215193735382, "loss": 1.2566, "step": 3768 }, { "epoch": 0.7662126448465135, "grad_norm": 0.13301679491996765, "learning_rate": 0.00012346181226482254, "loss": 1.0046, "step": 3769 }, { "epoch": 0.7664159381988209, "grad_norm": 0.1274683177471161, "learning_rate": 0.00012344147259229127, "loss": 0.8951, "step": 3770 }, { "epoch": 0.7666192315511283, "grad_norm": 0.12883058190345764, "learning_rate": 0.00012342113291976002, "loss": 0.955, "step": 3771 }, { "epoch": 0.7668225249034356, "grad_norm": 0.13394391536712646, "learning_rate": 0.00012340079324722872, "loss": 1.1987, "step": 3772 }, { "epoch": 0.767025818255743, "grad_norm": 0.1280052363872528, "learning_rate": 0.00012338045357469744, "loss": 0.9552, "step": 3773 }, { "epoch": 0.7672291116080504, "grad_norm": 0.13542263209819794, "learning_rate": 0.00012336011390216617, "loss": 0.9411, "step": 3774 }, { "epoch": 0.7674324049603578, "grad_norm": 0.13187946379184723, "learning_rate": 0.00012333977422963492, "loss": 1.0447, "step": 3775 }, { "epoch": 0.7676356983126652, "grad_norm": 0.13274554908275604, "learning_rate": 0.00012331943455710364, "loss": 1.0556, "step": 3776 }, { "epoch": 0.7678389916649726, "grad_norm": 0.1356000155210495, "learning_rate": 0.00012329909488457237, "loss": 1.0791, "step": 3777 }, { "epoch": 0.7680422850172799, "grad_norm": 0.1446497142314911, "learning_rate": 0.0001232787552120411, "loss": 1.1708, "step": 3778 }, { "epoch": 0.7682455783695873, "grad_norm": 0.14726495742797852, "learning_rate": 0.00012325841553950984, "loss": 1.0312, "step": 3779 }, { "epoch": 0.7684488717218947, "grad_norm": 0.1248805895447731, "learning_rate": 0.00012323807586697854, "loss": 1.001, "step": 3780 }, { "epoch": 0.7686521650742021, "grad_norm": 0.13720335066318512, "learning_rate": 0.00012321773619444726, "loss": 1.1051, "step": 3781 }, { "epoch": 0.7688554584265095, "grad_norm": 0.12258980423212051, "learning_rate": 0.000123197396521916, "loss": 1.014, "step": 3782 }, { "epoch": 0.7690587517788169, "grad_norm": 0.14602990448474884, "learning_rate": 0.00012317705684938471, "loss": 1.1083, "step": 3783 }, { "epoch": 0.7692620451311242, "grad_norm": 0.12902162969112396, "learning_rate": 0.00012315671717685347, "loss": 0.9431, "step": 3784 }, { "epoch": 0.7694653384834316, "grad_norm": 0.1396799236536026, "learning_rate": 0.0001231363775043222, "loss": 1.1026, "step": 3785 }, { "epoch": 0.769668631835739, "grad_norm": 0.13856211304664612, "learning_rate": 0.00012311603783179091, "loss": 1.2084, "step": 3786 }, { "epoch": 0.7698719251880464, "grad_norm": 0.12457921355962753, "learning_rate": 0.00012309569815925964, "loss": 1.0894, "step": 3787 }, { "epoch": 0.7700752185403538, "grad_norm": 0.12745535373687744, "learning_rate": 0.00012307535848672836, "loss": 0.9223, "step": 3788 }, { "epoch": 0.770278511892661, "grad_norm": 0.12804381549358368, "learning_rate": 0.0001230550188141971, "loss": 1.139, "step": 3789 }, { "epoch": 0.7704818052449685, "grad_norm": 0.12827259302139282, "learning_rate": 0.0001230346791416658, "loss": 1.0397, "step": 3790 }, { "epoch": 0.7706850985972759, "grad_norm": 0.14576175808906555, "learning_rate": 0.00012301433946913454, "loss": 1.1592, "step": 3791 }, { "epoch": 0.7708883919495833, "grad_norm": 0.13071264326572418, "learning_rate": 0.0001229939997966033, "loss": 1.1051, "step": 3792 }, { "epoch": 0.7710916853018907, "grad_norm": 0.1294952780008316, "learning_rate": 0.000122973660124072, "loss": 0.8815, "step": 3793 }, { "epoch": 0.771294978654198, "grad_norm": 0.13996455073356628, "learning_rate": 0.00012295332045154074, "loss": 1.1277, "step": 3794 }, { "epoch": 0.7714982720065053, "grad_norm": 0.14250068366527557, "learning_rate": 0.00012293298077900946, "loss": 1.2124, "step": 3795 }, { "epoch": 0.7717015653588127, "grad_norm": 0.12336855381727219, "learning_rate": 0.00012291264110647819, "loss": 1.0764, "step": 3796 }, { "epoch": 0.7719048587111201, "grad_norm": 0.14124532043933868, "learning_rate": 0.0001228923014339469, "loss": 1.156, "step": 3797 }, { "epoch": 0.7721081520634275, "grad_norm": 0.12716175615787506, "learning_rate": 0.00012287196176141563, "loss": 0.9228, "step": 3798 }, { "epoch": 0.7723114454157349, "grad_norm": 0.1456788033246994, "learning_rate": 0.00012285162208888436, "loss": 1.1462, "step": 3799 }, { "epoch": 0.7725147387680423, "grad_norm": 0.14236094057559967, "learning_rate": 0.0001228312824163531, "loss": 1.2849, "step": 3800 }, { "epoch": 0.7727180321203496, "grad_norm": 0.12564775347709656, "learning_rate": 0.00012281094274382184, "loss": 1.1172, "step": 3801 }, { "epoch": 0.772921325472657, "grad_norm": 0.11949034035205841, "learning_rate": 0.00012279060307129056, "loss": 0.8353, "step": 3802 }, { "epoch": 0.7731246188249644, "grad_norm": 0.13106048107147217, "learning_rate": 0.00012277026339875928, "loss": 1.0445, "step": 3803 }, { "epoch": 0.7733279121772718, "grad_norm": 0.11542949080467224, "learning_rate": 0.000122749923726228, "loss": 0.8345, "step": 3804 }, { "epoch": 0.7735312055295792, "grad_norm": 0.1267216056585312, "learning_rate": 0.00012272958405369673, "loss": 0.9304, "step": 3805 }, { "epoch": 0.7737344988818866, "grad_norm": 0.14101152122020721, "learning_rate": 0.00012270924438116546, "loss": 1.1243, "step": 3806 }, { "epoch": 0.7739377922341939, "grad_norm": 0.12236955761909485, "learning_rate": 0.00012268890470863418, "loss": 0.855, "step": 3807 }, { "epoch": 0.7741410855865013, "grad_norm": 0.11732099950313568, "learning_rate": 0.00012266856503610293, "loss": 0.8707, "step": 3808 }, { "epoch": 0.7743443789388087, "grad_norm": 0.12826688587665558, "learning_rate": 0.00012264822536357166, "loss": 1.0896, "step": 3809 }, { "epoch": 0.7745476722911161, "grad_norm": 0.13263994455337524, "learning_rate": 0.00012262788569104038, "loss": 1.0278, "step": 3810 }, { "epoch": 0.7747509656434235, "grad_norm": 0.12591175734996796, "learning_rate": 0.0001226075460185091, "loss": 0.8749, "step": 3811 }, { "epoch": 0.7749542589957309, "grad_norm": 0.13671188056468964, "learning_rate": 0.00012258720634597786, "loss": 1.0305, "step": 3812 }, { "epoch": 0.7751575523480382, "grad_norm": 0.13743267953395844, "learning_rate": 0.00012256686667344656, "loss": 1.0444, "step": 3813 }, { "epoch": 0.7753608457003456, "grad_norm": 0.12400873005390167, "learning_rate": 0.00012254652700091528, "loss": 0.8696, "step": 3814 }, { "epoch": 0.775564139052653, "grad_norm": 0.13085900247097015, "learning_rate": 0.000122526187328384, "loss": 1.0268, "step": 3815 }, { "epoch": 0.7757674324049604, "grad_norm": 0.13684894144535065, "learning_rate": 0.00012250584765585276, "loss": 1.1531, "step": 3816 }, { "epoch": 0.7759707257572678, "grad_norm": 0.12287990748882294, "learning_rate": 0.00012248550798332148, "loss": 0.9642, "step": 3817 }, { "epoch": 0.7761740191095751, "grad_norm": 0.1277002990245819, "learning_rate": 0.0001224651683107902, "loss": 0.9986, "step": 3818 }, { "epoch": 0.7763773124618825, "grad_norm": 0.13747401535511017, "learning_rate": 0.00012244482863825893, "loss": 1.084, "step": 3819 }, { "epoch": 0.7765806058141899, "grad_norm": 0.14822441339492798, "learning_rate": 0.00012242448896572768, "loss": 0.9895, "step": 3820 }, { "epoch": 0.7767838991664973, "grad_norm": 0.13352279365062714, "learning_rate": 0.00012240414929319638, "loss": 1.0556, "step": 3821 }, { "epoch": 0.7769871925188047, "grad_norm": 0.14219939708709717, "learning_rate": 0.0001223838096206651, "loss": 1.2207, "step": 3822 }, { "epoch": 0.7771904858711121, "grad_norm": 0.14143721759319305, "learning_rate": 0.00012236346994813383, "loss": 1.1582, "step": 3823 }, { "epoch": 0.7773937792234193, "grad_norm": 0.14316944777965546, "learning_rate": 0.00012234313027560255, "loss": 1.1662, "step": 3824 }, { "epoch": 0.7775970725757267, "grad_norm": 0.1315951645374298, "learning_rate": 0.0001223227906030713, "loss": 0.9502, "step": 3825 }, { "epoch": 0.7778003659280341, "grad_norm": 0.14173437654972076, "learning_rate": 0.00012230245093054003, "loss": 1.0675, "step": 3826 }, { "epoch": 0.7780036592803415, "grad_norm": 0.11854992806911469, "learning_rate": 0.00012228211125800875, "loss": 1.0158, "step": 3827 }, { "epoch": 0.7782069526326489, "grad_norm": 0.1441982090473175, "learning_rate": 0.00012226177158547748, "loss": 0.9432, "step": 3828 }, { "epoch": 0.7784102459849563, "grad_norm": 0.13182631134986877, "learning_rate": 0.0001222414319129462, "loss": 0.9372, "step": 3829 }, { "epoch": 0.7786135393372636, "grad_norm": 0.12839631736278534, "learning_rate": 0.00012222109224041493, "loss": 1.0888, "step": 3830 }, { "epoch": 0.778816832689571, "grad_norm": 0.14066271483898163, "learning_rate": 0.00012220075256788365, "loss": 1.0749, "step": 3831 }, { "epoch": 0.7790201260418784, "grad_norm": 0.13321231305599213, "learning_rate": 0.00012218041289535237, "loss": 0.9994, "step": 3832 }, { "epoch": 0.7792234193941858, "grad_norm": 0.12681256234645844, "learning_rate": 0.00012216007322282113, "loss": 0.9195, "step": 3833 }, { "epoch": 0.7794267127464932, "grad_norm": 0.14184892177581787, "learning_rate": 0.00012213973355028985, "loss": 1.2513, "step": 3834 }, { "epoch": 0.7796300060988006, "grad_norm": 0.10922446101903915, "learning_rate": 0.00012211939387775858, "loss": 0.8768, "step": 3835 }, { "epoch": 0.7798332994511079, "grad_norm": 0.135145902633667, "learning_rate": 0.0001220990542052273, "loss": 1.1626, "step": 3836 }, { "epoch": 0.7800365928034153, "grad_norm": 0.1317375898361206, "learning_rate": 0.00012207871453269602, "loss": 1.1827, "step": 3837 }, { "epoch": 0.7802398861557227, "grad_norm": 0.1291121393442154, "learning_rate": 0.00012205837486016476, "loss": 1.0689, "step": 3838 }, { "epoch": 0.7804431795080301, "grad_norm": 0.12500061094760895, "learning_rate": 0.00012203803518763349, "loss": 0.9881, "step": 3839 }, { "epoch": 0.7806464728603375, "grad_norm": 0.1250467747449875, "learning_rate": 0.0001220176955151022, "loss": 0.953, "step": 3840 }, { "epoch": 0.7808497662126448, "grad_norm": 0.1281813234090805, "learning_rate": 0.00012199735584257095, "loss": 1.0164, "step": 3841 }, { "epoch": 0.7810530595649522, "grad_norm": 0.11305128782987595, "learning_rate": 0.00012197701617003967, "loss": 0.8044, "step": 3842 }, { "epoch": 0.7812563529172596, "grad_norm": 0.14512300491333008, "learning_rate": 0.0001219566764975084, "loss": 1.121, "step": 3843 }, { "epoch": 0.781459646269567, "grad_norm": 0.1404501348733902, "learning_rate": 0.00012193633682497711, "loss": 1.1358, "step": 3844 }, { "epoch": 0.7816629396218744, "grad_norm": 0.13621416687965393, "learning_rate": 0.00012191599715244586, "loss": 1.0535, "step": 3845 }, { "epoch": 0.7818662329741818, "grad_norm": 0.11842742562294006, "learning_rate": 0.00012189565747991459, "loss": 0.9138, "step": 3846 }, { "epoch": 0.7820695263264891, "grad_norm": 0.14051960408687592, "learning_rate": 0.00012187531780738331, "loss": 1.0012, "step": 3847 }, { "epoch": 0.7822728196787965, "grad_norm": 0.12643176317214966, "learning_rate": 0.00012185497813485202, "loss": 0.9594, "step": 3848 }, { "epoch": 0.7824761130311039, "grad_norm": 0.13830742239952087, "learning_rate": 0.00012183463846232077, "loss": 1.0394, "step": 3849 }, { "epoch": 0.7826794063834113, "grad_norm": 0.12013565003871918, "learning_rate": 0.0001218142987897895, "loss": 0.8788, "step": 3850 }, { "epoch": 0.7828826997357187, "grad_norm": 0.14404936134815216, "learning_rate": 0.00012179395911725822, "loss": 1.0755, "step": 3851 }, { "epoch": 0.7830859930880261, "grad_norm": 0.1242976263165474, "learning_rate": 0.00012177361944472693, "loss": 0.9733, "step": 3852 }, { "epoch": 0.7832892864403334, "grad_norm": 0.13720235228538513, "learning_rate": 0.00012175327977219568, "loss": 1.1624, "step": 3853 }, { "epoch": 0.7834925797926408, "grad_norm": 0.11972963809967041, "learning_rate": 0.00012173294009966441, "loss": 0.8956, "step": 3854 }, { "epoch": 0.7836958731449482, "grad_norm": 0.1539568156003952, "learning_rate": 0.00012171260042713313, "loss": 1.1909, "step": 3855 }, { "epoch": 0.7838991664972556, "grad_norm": 0.11814553290605545, "learning_rate": 0.00012169226075460186, "loss": 0.8093, "step": 3856 }, { "epoch": 0.784102459849563, "grad_norm": 0.1346539407968521, "learning_rate": 0.0001216719210820706, "loss": 1.0815, "step": 3857 }, { "epoch": 0.7843057532018703, "grad_norm": 0.12297804653644562, "learning_rate": 0.00012165158140953932, "loss": 0.9216, "step": 3858 }, { "epoch": 0.7845090465541776, "grad_norm": 0.13503801822662354, "learning_rate": 0.00012163124173700804, "loss": 1.2251, "step": 3859 }, { "epoch": 0.784712339906485, "grad_norm": 0.1486554741859436, "learning_rate": 0.00012161090206447677, "loss": 1.0849, "step": 3860 }, { "epoch": 0.7849156332587924, "grad_norm": 0.13975729048252106, "learning_rate": 0.0001215905623919455, "loss": 1.0914, "step": 3861 }, { "epoch": 0.7851189266110998, "grad_norm": 0.14087165892124176, "learning_rate": 0.00012157022271941423, "loss": 1.2099, "step": 3862 }, { "epoch": 0.7853222199634072, "grad_norm": 0.14210177958011627, "learning_rate": 0.00012154988304688296, "loss": 1.1608, "step": 3863 }, { "epoch": 0.7855255133157146, "grad_norm": 0.14731276035308838, "learning_rate": 0.00012152954337435168, "loss": 1.1699, "step": 3864 }, { "epoch": 0.7857288066680219, "grad_norm": 0.12955504655838013, "learning_rate": 0.00012150920370182039, "loss": 1.0048, "step": 3865 }, { "epoch": 0.7859321000203293, "grad_norm": 0.13563545048236847, "learning_rate": 0.00012148886402928914, "loss": 1.0157, "step": 3866 }, { "epoch": 0.7861353933726367, "grad_norm": 0.12085787951946259, "learning_rate": 0.00012146852435675787, "loss": 0.9674, "step": 3867 }, { "epoch": 0.7863386867249441, "grad_norm": 0.12295902520418167, "learning_rate": 0.00012144818468422659, "loss": 0.9919, "step": 3868 }, { "epoch": 0.7865419800772515, "grad_norm": 0.12418414652347565, "learning_rate": 0.0001214278450116953, "loss": 1.0134, "step": 3869 }, { "epoch": 0.7867452734295588, "grad_norm": 0.13931810855865479, "learning_rate": 0.00012140750533916405, "loss": 0.9817, "step": 3870 }, { "epoch": 0.7869485667818662, "grad_norm": 0.14642778038978577, "learning_rate": 0.00012138716566663278, "loss": 1.0308, "step": 3871 }, { "epoch": 0.7871518601341736, "grad_norm": 0.1386035829782486, "learning_rate": 0.0001213668259941015, "loss": 1.0426, "step": 3872 }, { "epoch": 0.787355153486481, "grad_norm": 0.1395215541124344, "learning_rate": 0.00012134648632157021, "loss": 1.1342, "step": 3873 }, { "epoch": 0.7875584468387884, "grad_norm": 0.11670932918787003, "learning_rate": 0.00012132614664903896, "loss": 1.0713, "step": 3874 }, { "epoch": 0.7877617401910958, "grad_norm": 0.13401034474372864, "learning_rate": 0.00012130580697650769, "loss": 1.1358, "step": 3875 }, { "epoch": 0.7879650335434031, "grad_norm": 0.14385886490345, "learning_rate": 0.00012128546730397641, "loss": 1.1176, "step": 3876 }, { "epoch": 0.7881683268957105, "grad_norm": 0.12759087979793549, "learning_rate": 0.00012126512763144512, "loss": 1.0423, "step": 3877 }, { "epoch": 0.7883716202480179, "grad_norm": 0.155388742685318, "learning_rate": 0.00012124478795891388, "loss": 1.2041, "step": 3878 }, { "epoch": 0.7885749136003253, "grad_norm": 0.13593384623527527, "learning_rate": 0.0001212244482863826, "loss": 1.1223, "step": 3879 }, { "epoch": 0.7887782069526327, "grad_norm": 0.12947037816047668, "learning_rate": 0.00012120410861385133, "loss": 0.8951, "step": 3880 }, { "epoch": 0.7889815003049401, "grad_norm": 0.13151758909225464, "learning_rate": 0.00012118376894132004, "loss": 1.0879, "step": 3881 }, { "epoch": 0.7891847936572474, "grad_norm": 0.12490543723106384, "learning_rate": 0.00012116342926878879, "loss": 1.0054, "step": 3882 }, { "epoch": 0.7893880870095548, "grad_norm": 0.13789287209510803, "learning_rate": 0.00012114308959625751, "loss": 1.1069, "step": 3883 }, { "epoch": 0.7895913803618622, "grad_norm": 0.13065920770168304, "learning_rate": 0.00012112274992372624, "loss": 1.034, "step": 3884 }, { "epoch": 0.7897946737141696, "grad_norm": 0.1365562081336975, "learning_rate": 0.00012110241025119495, "loss": 1.1702, "step": 3885 }, { "epoch": 0.789997967066477, "grad_norm": 0.14647583663463593, "learning_rate": 0.0001210820705786637, "loss": 1.1919, "step": 3886 }, { "epoch": 0.7902012604187844, "grad_norm": 0.1417173445224762, "learning_rate": 0.00012106173090613242, "loss": 0.9846, "step": 3887 }, { "epoch": 0.7904045537710916, "grad_norm": 0.12423622608184814, "learning_rate": 0.00012104139123360115, "loss": 1.0007, "step": 3888 }, { "epoch": 0.790607847123399, "grad_norm": 0.1554161161184311, "learning_rate": 0.00012102105156106986, "loss": 1.3904, "step": 3889 }, { "epoch": 0.7908111404757064, "grad_norm": 0.14858123660087585, "learning_rate": 0.00012100071188853861, "loss": 1.1642, "step": 3890 }, { "epoch": 0.7910144338280138, "grad_norm": 0.1301809698343277, "learning_rate": 0.00012098037221600733, "loss": 1.0904, "step": 3891 }, { "epoch": 0.7912177271803212, "grad_norm": 0.13561727106571198, "learning_rate": 0.00012096003254347606, "loss": 1.1204, "step": 3892 }, { "epoch": 0.7914210205326286, "grad_norm": 0.13255447149276733, "learning_rate": 0.00012093969287094477, "loss": 1.0225, "step": 3893 }, { "epoch": 0.7916243138849359, "grad_norm": 0.14348706603050232, "learning_rate": 0.00012091935319841352, "loss": 1.1129, "step": 3894 }, { "epoch": 0.7918276072372433, "grad_norm": 0.1501035988330841, "learning_rate": 0.00012089901352588225, "loss": 1.1329, "step": 3895 }, { "epoch": 0.7920309005895507, "grad_norm": 0.11853793263435364, "learning_rate": 0.00012087867385335097, "loss": 1.0178, "step": 3896 }, { "epoch": 0.7922341939418581, "grad_norm": 0.12795880436897278, "learning_rate": 0.00012085833418081968, "loss": 0.9846, "step": 3897 }, { "epoch": 0.7924374872941655, "grad_norm": 0.13203004002571106, "learning_rate": 0.00012083799450828843, "loss": 1.0961, "step": 3898 }, { "epoch": 0.7926407806464728, "grad_norm": 0.1426658183336258, "learning_rate": 0.00012081765483575716, "loss": 1.101, "step": 3899 }, { "epoch": 0.7928440739987802, "grad_norm": 0.1364053338766098, "learning_rate": 0.00012079731516322588, "loss": 1.0818, "step": 3900 }, { "epoch": 0.7930473673510876, "grad_norm": 0.1505763679742813, "learning_rate": 0.00012077697549069459, "loss": 1.4502, "step": 3901 }, { "epoch": 0.793250660703395, "grad_norm": 0.133381187915802, "learning_rate": 0.00012075663581816334, "loss": 1.0036, "step": 3902 }, { "epoch": 0.7934539540557024, "grad_norm": 0.12915650010108948, "learning_rate": 0.00012073629614563207, "loss": 0.8927, "step": 3903 }, { "epoch": 0.7936572474080098, "grad_norm": 0.12571905553340912, "learning_rate": 0.0001207159564731008, "loss": 0.8493, "step": 3904 }, { "epoch": 0.7938605407603171, "grad_norm": 0.13056504726409912, "learning_rate": 0.0001206956168005695, "loss": 1.0281, "step": 3905 }, { "epoch": 0.7940638341126245, "grad_norm": 0.13789808750152588, "learning_rate": 0.00012067527712803826, "loss": 1.1499, "step": 3906 }, { "epoch": 0.7942671274649319, "grad_norm": 0.136823832988739, "learning_rate": 0.00012065493745550698, "loss": 1.122, "step": 3907 }, { "epoch": 0.7944704208172393, "grad_norm": 0.1468329280614853, "learning_rate": 0.0001206345977829757, "loss": 1.1591, "step": 3908 }, { "epoch": 0.7946737141695467, "grad_norm": 0.12451114505529404, "learning_rate": 0.00012061425811044442, "loss": 0.9909, "step": 3909 }, { "epoch": 0.7948770075218541, "grad_norm": 0.13311980664730072, "learning_rate": 0.00012059391843791314, "loss": 1.0117, "step": 3910 }, { "epoch": 0.7950803008741614, "grad_norm": 0.13651201128959656, "learning_rate": 0.00012057357876538189, "loss": 1.0057, "step": 3911 }, { "epoch": 0.7952835942264688, "grad_norm": 0.14184725284576416, "learning_rate": 0.00012055323909285062, "loss": 1.1766, "step": 3912 }, { "epoch": 0.7954868875787762, "grad_norm": 0.12303798645734787, "learning_rate": 0.00012053289942031933, "loss": 0.9439, "step": 3913 }, { "epoch": 0.7956901809310836, "grad_norm": 0.12423896044492722, "learning_rate": 0.00012051255974778805, "loss": 1.0675, "step": 3914 }, { "epoch": 0.795893474283391, "grad_norm": 0.16398456692695618, "learning_rate": 0.0001204922200752568, "loss": 1.2481, "step": 3915 }, { "epoch": 0.7960967676356984, "grad_norm": 0.13194435834884644, "learning_rate": 0.00012047188040272553, "loss": 1.0274, "step": 3916 }, { "epoch": 0.7963000609880057, "grad_norm": 0.13115055859088898, "learning_rate": 0.00012045154073019425, "loss": 0.9719, "step": 3917 }, { "epoch": 0.796503354340313, "grad_norm": 0.1376492977142334, "learning_rate": 0.00012043120105766296, "loss": 1.1586, "step": 3918 }, { "epoch": 0.7967066476926205, "grad_norm": 0.13110294938087463, "learning_rate": 0.00012041086138513171, "loss": 1.0523, "step": 3919 }, { "epoch": 0.7969099410449278, "grad_norm": 0.13019250333309174, "learning_rate": 0.00012039052171260044, "loss": 0.9233, "step": 3920 }, { "epoch": 0.7971132343972352, "grad_norm": 0.14026646316051483, "learning_rate": 0.00012037018204006916, "loss": 1.1442, "step": 3921 }, { "epoch": 0.7973165277495425, "grad_norm": 0.13189871609210968, "learning_rate": 0.00012034984236753787, "loss": 1.0618, "step": 3922 }, { "epoch": 0.7975198211018499, "grad_norm": 0.1411222368478775, "learning_rate": 0.00012032950269500663, "loss": 1.161, "step": 3923 }, { "epoch": 0.7977231144541573, "grad_norm": 0.13838204741477966, "learning_rate": 0.00012030916302247535, "loss": 0.9388, "step": 3924 }, { "epoch": 0.7979264078064647, "grad_norm": 0.13982777297496796, "learning_rate": 0.00012028882334994408, "loss": 1.2115, "step": 3925 }, { "epoch": 0.7981297011587721, "grad_norm": 0.16525013744831085, "learning_rate": 0.00012026848367741279, "loss": 1.3161, "step": 3926 }, { "epoch": 0.7983329945110795, "grad_norm": 0.12277159839868546, "learning_rate": 0.00012024814400488154, "loss": 0.9172, "step": 3927 }, { "epoch": 0.7985362878633868, "grad_norm": 0.13560567796230316, "learning_rate": 0.00012022780433235026, "loss": 1.1087, "step": 3928 }, { "epoch": 0.7987395812156942, "grad_norm": 0.13598614931106567, "learning_rate": 0.00012020746465981899, "loss": 1.0942, "step": 3929 }, { "epoch": 0.7989428745680016, "grad_norm": 0.12996844947338104, "learning_rate": 0.0001201871249872877, "loss": 1.1133, "step": 3930 }, { "epoch": 0.799146167920309, "grad_norm": 0.14834141731262207, "learning_rate": 0.00012016678531475645, "loss": 1.0568, "step": 3931 }, { "epoch": 0.7993494612726164, "grad_norm": 0.14238953590393066, "learning_rate": 0.00012014644564222517, "loss": 1.0885, "step": 3932 }, { "epoch": 0.7995527546249238, "grad_norm": 0.14690124988555908, "learning_rate": 0.0001201261059696939, "loss": 1.1593, "step": 3933 }, { "epoch": 0.7997560479772311, "grad_norm": 0.12939292192459106, "learning_rate": 0.00012010576629716261, "loss": 1.0157, "step": 3934 }, { "epoch": 0.7999593413295385, "grad_norm": 0.14343731105327606, "learning_rate": 0.00012008542662463136, "loss": 1.1929, "step": 3935 }, { "epoch": 0.8001626346818459, "grad_norm": 0.14443133771419525, "learning_rate": 0.00012006508695210008, "loss": 1.1939, "step": 3936 }, { "epoch": 0.8003659280341533, "grad_norm": 0.1503942608833313, "learning_rate": 0.00012004474727956881, "loss": 1.2368, "step": 3937 }, { "epoch": 0.8005692213864607, "grad_norm": 0.14112812280654907, "learning_rate": 0.00012002440760703752, "loss": 1.2037, "step": 3938 }, { "epoch": 0.8007725147387681, "grad_norm": 0.1394345462322235, "learning_rate": 0.00012000406793450627, "loss": 1.0076, "step": 3939 }, { "epoch": 0.8009758080910754, "grad_norm": 0.12623324990272522, "learning_rate": 0.000119983728261975, "loss": 1.0405, "step": 3940 }, { "epoch": 0.8011791014433828, "grad_norm": 0.14469188451766968, "learning_rate": 0.00011996338858944372, "loss": 1.2748, "step": 3941 }, { "epoch": 0.8013823947956902, "grad_norm": 0.14259637892246246, "learning_rate": 0.00011994304891691243, "loss": 1.1081, "step": 3942 }, { "epoch": 0.8015856881479976, "grad_norm": 0.13012677431106567, "learning_rate": 0.00011992270924438118, "loss": 0.9882, "step": 3943 }, { "epoch": 0.801788981500305, "grad_norm": 0.1390579491853714, "learning_rate": 0.00011990236957184991, "loss": 1.0806, "step": 3944 }, { "epoch": 0.8019922748526124, "grad_norm": 0.11860685795545578, "learning_rate": 0.00011988202989931863, "loss": 0.9036, "step": 3945 }, { "epoch": 0.8021955682049197, "grad_norm": 0.12979279458522797, "learning_rate": 0.00011986169022678734, "loss": 0.9918, "step": 3946 }, { "epoch": 0.8023988615572271, "grad_norm": 0.1302417367696762, "learning_rate": 0.0001198413505542561, "loss": 1.0498, "step": 3947 }, { "epoch": 0.8026021549095345, "grad_norm": 0.12292234599590302, "learning_rate": 0.00011982101088172482, "loss": 1.0181, "step": 3948 }, { "epoch": 0.8028054482618419, "grad_norm": 0.12002125382423401, "learning_rate": 0.00011980067120919354, "loss": 0.9503, "step": 3949 }, { "epoch": 0.8030087416141493, "grad_norm": 0.1403089463710785, "learning_rate": 0.00011978033153666225, "loss": 1.033, "step": 3950 }, { "epoch": 0.8032120349664565, "grad_norm": 0.13033491373062134, "learning_rate": 0.00011975999186413098, "loss": 0.9713, "step": 3951 }, { "epoch": 0.8034153283187639, "grad_norm": 0.14898493885993958, "learning_rate": 0.00011973965219159973, "loss": 1.1566, "step": 3952 }, { "epoch": 0.8036186216710713, "grad_norm": 0.1320907324552536, "learning_rate": 0.00011971931251906845, "loss": 0.9786, "step": 3953 }, { "epoch": 0.8038219150233787, "grad_norm": 0.14695419371128082, "learning_rate": 0.00011969897284653717, "loss": 1.1951, "step": 3954 }, { "epoch": 0.8040252083756861, "grad_norm": 0.13139761984348297, "learning_rate": 0.00011967863317400589, "loss": 1.0117, "step": 3955 }, { "epoch": 0.8042285017279935, "grad_norm": 0.13509678840637207, "learning_rate": 0.00011965829350147464, "loss": 1.0577, "step": 3956 }, { "epoch": 0.8044317950803008, "grad_norm": 0.12979759275913239, "learning_rate": 0.00011963795382894337, "loss": 1.0552, "step": 3957 }, { "epoch": 0.8046350884326082, "grad_norm": 0.12381764501333237, "learning_rate": 0.00011961761415641208, "loss": 1.045, "step": 3958 }, { "epoch": 0.8048383817849156, "grad_norm": 0.1273113489151001, "learning_rate": 0.0001195972744838808, "loss": 1.0352, "step": 3959 }, { "epoch": 0.805041675137223, "grad_norm": 0.13506780564785004, "learning_rate": 0.00011957693481134955, "loss": 1.0183, "step": 3960 }, { "epoch": 0.8052449684895304, "grad_norm": 0.13643690943717957, "learning_rate": 0.00011955659513881828, "loss": 0.9271, "step": 3961 }, { "epoch": 0.8054482618418378, "grad_norm": 0.13394352793693542, "learning_rate": 0.00011953625546628699, "loss": 1.01, "step": 3962 }, { "epoch": 0.8056515551941451, "grad_norm": 0.12335560470819473, "learning_rate": 0.00011951591579375571, "loss": 1.0166, "step": 3963 }, { "epoch": 0.8058548485464525, "grad_norm": 0.13454771041870117, "learning_rate": 0.00011949557612122446, "loss": 0.9962, "step": 3964 }, { "epoch": 0.8060581418987599, "grad_norm": 0.13157734274864197, "learning_rate": 0.00011947523644869319, "loss": 1.0938, "step": 3965 }, { "epoch": 0.8062614352510673, "grad_norm": 0.1234944686293602, "learning_rate": 0.0001194548967761619, "loss": 0.962, "step": 3966 }, { "epoch": 0.8064647286033747, "grad_norm": 0.1396668702363968, "learning_rate": 0.00011943455710363062, "loss": 1.1383, "step": 3967 }, { "epoch": 0.8066680219556821, "grad_norm": 0.14107537269592285, "learning_rate": 0.00011941421743109938, "loss": 0.9585, "step": 3968 }, { "epoch": 0.8068713153079894, "grad_norm": 0.14701543748378754, "learning_rate": 0.0001193938777585681, "loss": 1.1863, "step": 3969 }, { "epoch": 0.8070746086602968, "grad_norm": 0.13169316947460175, "learning_rate": 0.00011937353808603681, "loss": 1.1441, "step": 3970 }, { "epoch": 0.8072779020126042, "grad_norm": 0.1322471648454666, "learning_rate": 0.00011935319841350554, "loss": 1.0807, "step": 3971 }, { "epoch": 0.8074811953649116, "grad_norm": 0.1394157111644745, "learning_rate": 0.00011933285874097429, "loss": 1.1423, "step": 3972 }, { "epoch": 0.807684488717219, "grad_norm": 0.13044828176498413, "learning_rate": 0.00011931251906844301, "loss": 1.0136, "step": 3973 }, { "epoch": 0.8078877820695263, "grad_norm": 0.14347662031650543, "learning_rate": 0.00011929217939591174, "loss": 1.0989, "step": 3974 }, { "epoch": 0.8080910754218337, "grad_norm": 0.13873308897018433, "learning_rate": 0.00011927183972338045, "loss": 1.0194, "step": 3975 }, { "epoch": 0.8082943687741411, "grad_norm": 0.1266692876815796, "learning_rate": 0.0001192515000508492, "loss": 1.0114, "step": 3976 }, { "epoch": 0.8084976621264485, "grad_norm": 0.13810694217681885, "learning_rate": 0.00011923116037831792, "loss": 1.0433, "step": 3977 }, { "epoch": 0.8087009554787559, "grad_norm": 0.14580698311328888, "learning_rate": 0.00011921082070578665, "loss": 1.1527, "step": 3978 }, { "epoch": 0.8089042488310633, "grad_norm": 0.14972800016403198, "learning_rate": 0.00011919048103325536, "loss": 1.2265, "step": 3979 }, { "epoch": 0.8091075421833706, "grad_norm": 0.14041751623153687, "learning_rate": 0.00011917014136072411, "loss": 1.0299, "step": 3980 }, { "epoch": 0.809310835535678, "grad_norm": 0.14178511500358582, "learning_rate": 0.00011914980168819283, "loss": 1.1151, "step": 3981 }, { "epoch": 0.8095141288879854, "grad_norm": 0.14620938897132874, "learning_rate": 0.00011912946201566156, "loss": 1.1133, "step": 3982 }, { "epoch": 0.8097174222402928, "grad_norm": 0.14342685043811798, "learning_rate": 0.00011910912234313027, "loss": 0.9168, "step": 3983 }, { "epoch": 0.8099207155926001, "grad_norm": 0.1431896686553955, "learning_rate": 0.00011908878267059902, "loss": 1.1063, "step": 3984 }, { "epoch": 0.8101240089449075, "grad_norm": 0.13744968175888062, "learning_rate": 0.00011906844299806775, "loss": 1.0406, "step": 3985 }, { "epoch": 0.8103273022972148, "grad_norm": 0.1373889297246933, "learning_rate": 0.00011904810332553647, "loss": 1.0499, "step": 3986 }, { "epoch": 0.8105305956495222, "grad_norm": 0.13776156306266785, "learning_rate": 0.00011902776365300518, "loss": 1.0377, "step": 3987 }, { "epoch": 0.8107338890018296, "grad_norm": 0.13153620064258575, "learning_rate": 0.00011900742398047393, "loss": 0.995, "step": 3988 }, { "epoch": 0.810937182354137, "grad_norm": 0.153978168964386, "learning_rate": 0.00011898708430794266, "loss": 1.1654, "step": 3989 }, { "epoch": 0.8111404757064444, "grad_norm": 0.14656215906143188, "learning_rate": 0.00011896674463541138, "loss": 0.9426, "step": 3990 }, { "epoch": 0.8113437690587518, "grad_norm": 0.13378344476222992, "learning_rate": 0.00011894640496288009, "loss": 0.9992, "step": 3991 }, { "epoch": 0.8115470624110591, "grad_norm": 0.14194630086421967, "learning_rate": 0.00011892606529034882, "loss": 0.9897, "step": 3992 }, { "epoch": 0.8117503557633665, "grad_norm": 0.12744341790676117, "learning_rate": 0.00011890572561781757, "loss": 1.0271, "step": 3993 }, { "epoch": 0.8119536491156739, "grad_norm": 0.12593428790569305, "learning_rate": 0.0001188853859452863, "loss": 0.8918, "step": 3994 }, { "epoch": 0.8121569424679813, "grad_norm": 0.1353382021188736, "learning_rate": 0.000118865046272755, "loss": 0.9951, "step": 3995 }, { "epoch": 0.8123602358202887, "grad_norm": 0.13705074787139893, "learning_rate": 0.00011884470660022373, "loss": 0.9689, "step": 3996 }, { "epoch": 0.8125635291725961, "grad_norm": 0.1512332260608673, "learning_rate": 0.00011882436692769248, "loss": 1.1077, "step": 3997 }, { "epoch": 0.8127668225249034, "grad_norm": 0.1288524866104126, "learning_rate": 0.0001188040272551612, "loss": 1.0678, "step": 3998 }, { "epoch": 0.8129701158772108, "grad_norm": 0.13065043091773987, "learning_rate": 0.00011878368758262992, "loss": 1.0389, "step": 3999 }, { "epoch": 0.8131734092295182, "grad_norm": 0.14276419579982758, "learning_rate": 0.00011876334791009864, "loss": 1.0811, "step": 4000 }, { "epoch": 0.8133767025818256, "grad_norm": 0.12206871807575226, "learning_rate": 0.00011874300823756739, "loss": 0.8628, "step": 4001 }, { "epoch": 0.813579995934133, "grad_norm": 0.13889804482460022, "learning_rate": 0.00011872266856503612, "loss": 1.026, "step": 4002 }, { "epoch": 0.8137832892864403, "grad_norm": 0.13615025579929352, "learning_rate": 0.00011870232889250483, "loss": 1.0019, "step": 4003 }, { "epoch": 0.8139865826387477, "grad_norm": 0.12381166964769363, "learning_rate": 0.00011868198921997355, "loss": 0.9913, "step": 4004 }, { "epoch": 0.8141898759910551, "grad_norm": 0.13503628969192505, "learning_rate": 0.0001186616495474423, "loss": 1.0766, "step": 4005 }, { "epoch": 0.8143931693433625, "grad_norm": 0.143154576420784, "learning_rate": 0.00011864130987491103, "loss": 1.1219, "step": 4006 }, { "epoch": 0.8145964626956699, "grad_norm": 0.15656810998916626, "learning_rate": 0.00011862097020237974, "loss": 1.193, "step": 4007 }, { "epoch": 0.8147997560479773, "grad_norm": 0.14031293988227844, "learning_rate": 0.00011860063052984846, "loss": 0.9923, "step": 4008 }, { "epoch": 0.8150030494002846, "grad_norm": 0.13734276592731476, "learning_rate": 0.00011858029085731721, "loss": 1.0802, "step": 4009 }, { "epoch": 0.815206342752592, "grad_norm": 0.1422613561153412, "learning_rate": 0.00011855995118478594, "loss": 1.1129, "step": 4010 }, { "epoch": 0.8154096361048994, "grad_norm": 0.12899209558963776, "learning_rate": 0.00011853961151225465, "loss": 0.8745, "step": 4011 }, { "epoch": 0.8156129294572068, "grad_norm": 0.1424800455570221, "learning_rate": 0.00011851927183972337, "loss": 1.1192, "step": 4012 }, { "epoch": 0.8158162228095142, "grad_norm": 0.12653465569019318, "learning_rate": 0.00011849893216719213, "loss": 1.0847, "step": 4013 }, { "epoch": 0.8160195161618216, "grad_norm": 0.13867273926734924, "learning_rate": 0.00011847859249466085, "loss": 1.104, "step": 4014 }, { "epoch": 0.8162228095141288, "grad_norm": 0.12459316849708557, "learning_rate": 0.00011845825282212956, "loss": 0.9407, "step": 4015 }, { "epoch": 0.8164261028664362, "grad_norm": 0.11136915534734726, "learning_rate": 0.00011843791314959829, "loss": 0.8423, "step": 4016 }, { "epoch": 0.8166293962187436, "grad_norm": 0.13761533796787262, "learning_rate": 0.00011841757347706704, "loss": 1.0824, "step": 4017 }, { "epoch": 0.816832689571051, "grad_norm": 0.12137118726968765, "learning_rate": 0.00011839723380453576, "loss": 0.8622, "step": 4018 }, { "epoch": 0.8170359829233584, "grad_norm": 0.13748018443584442, "learning_rate": 0.00011837689413200447, "loss": 1.0754, "step": 4019 }, { "epoch": 0.8172392762756658, "grad_norm": 0.13673032820224762, "learning_rate": 0.0001183565544594732, "loss": 1.1766, "step": 4020 }, { "epoch": 0.8174425696279731, "grad_norm": 0.13649223744869232, "learning_rate": 0.00011833621478694195, "loss": 1.0473, "step": 4021 }, { "epoch": 0.8176458629802805, "grad_norm": 0.1405959576368332, "learning_rate": 0.00011831587511441067, "loss": 1.1525, "step": 4022 }, { "epoch": 0.8178491563325879, "grad_norm": 0.13079403340816498, "learning_rate": 0.00011829553544187938, "loss": 1.0007, "step": 4023 }, { "epoch": 0.8180524496848953, "grad_norm": 0.1415160447359085, "learning_rate": 0.00011827519576934811, "loss": 1.1784, "step": 4024 }, { "epoch": 0.8182557430372027, "grad_norm": 0.15281791985034943, "learning_rate": 0.00011825485609681686, "loss": 1.2122, "step": 4025 }, { "epoch": 0.81845903638951, "grad_norm": 0.138424813747406, "learning_rate": 0.00011823451642428558, "loss": 1.171, "step": 4026 }, { "epoch": 0.8186623297418174, "grad_norm": 0.13693661987781525, "learning_rate": 0.0001182141767517543, "loss": 1.1098, "step": 4027 }, { "epoch": 0.8188656230941248, "grad_norm": 0.13167519867420197, "learning_rate": 0.00011819383707922302, "loss": 0.9446, "step": 4028 }, { "epoch": 0.8190689164464322, "grad_norm": 0.13005776703357697, "learning_rate": 0.00011817349740669177, "loss": 0.9349, "step": 4029 }, { "epoch": 0.8192722097987396, "grad_norm": 0.1340433955192566, "learning_rate": 0.0001181531577341605, "loss": 1.1293, "step": 4030 }, { "epoch": 0.819475503151047, "grad_norm": 0.13463421165943146, "learning_rate": 0.0001181328180616292, "loss": 0.995, "step": 4031 }, { "epoch": 0.8196787965033543, "grad_norm": 0.13709862530231476, "learning_rate": 0.00011811247838909793, "loss": 1.0882, "step": 4032 }, { "epoch": 0.8198820898556617, "grad_norm": 0.1316375732421875, "learning_rate": 0.00011809213871656666, "loss": 1.1652, "step": 4033 }, { "epoch": 0.8200853832079691, "grad_norm": 0.12954191863536835, "learning_rate": 0.00011807179904403541, "loss": 1.0258, "step": 4034 }, { "epoch": 0.8202886765602765, "grad_norm": 0.15265285968780518, "learning_rate": 0.00011805145937150413, "loss": 1.162, "step": 4035 }, { "epoch": 0.8204919699125839, "grad_norm": 0.14315763115882874, "learning_rate": 0.00011803111969897284, "loss": 1.1478, "step": 4036 }, { "epoch": 0.8206952632648913, "grad_norm": 0.1538948267698288, "learning_rate": 0.00011801078002644157, "loss": 1.2476, "step": 4037 }, { "epoch": 0.8208985566171986, "grad_norm": 0.13834591209888458, "learning_rate": 0.00011799044035391032, "loss": 1.0098, "step": 4038 }, { "epoch": 0.821101849969506, "grad_norm": 0.12419674545526505, "learning_rate": 0.00011797010068137904, "loss": 0.9885, "step": 4039 }, { "epoch": 0.8213051433218134, "grad_norm": 0.12338082492351532, "learning_rate": 0.00011794976100884775, "loss": 0.8725, "step": 4040 }, { "epoch": 0.8215084366741208, "grad_norm": 0.12143974751234055, "learning_rate": 0.00011792942133631648, "loss": 0.8032, "step": 4041 }, { "epoch": 0.8217117300264282, "grad_norm": 0.11844722181558609, "learning_rate": 0.00011790908166378523, "loss": 0.9635, "step": 4042 }, { "epoch": 0.8219150233787356, "grad_norm": 0.12964794039726257, "learning_rate": 0.00011788874199125395, "loss": 0.9951, "step": 4043 }, { "epoch": 0.8221183167310429, "grad_norm": 0.11839304864406586, "learning_rate": 0.00011786840231872267, "loss": 0.9279, "step": 4044 }, { "epoch": 0.8223216100833503, "grad_norm": 0.1318419724702835, "learning_rate": 0.00011784806264619139, "loss": 0.9903, "step": 4045 }, { "epoch": 0.8225249034356577, "grad_norm": 0.12436816096305847, "learning_rate": 0.00011782772297366014, "loss": 0.9732, "step": 4046 }, { "epoch": 0.822728196787965, "grad_norm": 0.14113591611385345, "learning_rate": 0.00011780738330112887, "loss": 1.1141, "step": 4047 }, { "epoch": 0.8229314901402724, "grad_norm": 0.12076539546251297, "learning_rate": 0.00011778704362859758, "loss": 0.9827, "step": 4048 }, { "epoch": 0.8231347834925798, "grad_norm": 0.13398289680480957, "learning_rate": 0.0001177667039560663, "loss": 1.0446, "step": 4049 }, { "epoch": 0.8233380768448871, "grad_norm": 0.14761167764663696, "learning_rate": 0.00011774636428353505, "loss": 1.1347, "step": 4050 }, { "epoch": 0.8235413701971945, "grad_norm": 0.13124649226665497, "learning_rate": 0.00011772602461100378, "loss": 1.1484, "step": 4051 }, { "epoch": 0.8237446635495019, "grad_norm": 0.1205100268125534, "learning_rate": 0.00011770568493847249, "loss": 0.8889, "step": 4052 }, { "epoch": 0.8239479569018093, "grad_norm": 0.14440268278121948, "learning_rate": 0.00011768534526594121, "loss": 1.0888, "step": 4053 }, { "epoch": 0.8241512502541167, "grad_norm": 0.12991003692150116, "learning_rate": 0.00011766500559340996, "loss": 1.1111, "step": 4054 }, { "epoch": 0.824354543606424, "grad_norm": 0.13993045687675476, "learning_rate": 0.00011764466592087869, "loss": 1.1363, "step": 4055 }, { "epoch": 0.8245578369587314, "grad_norm": 0.14084355533123016, "learning_rate": 0.0001176243262483474, "loss": 1.2527, "step": 4056 }, { "epoch": 0.8247611303110388, "grad_norm": 0.13060720264911652, "learning_rate": 0.00011760398657581612, "loss": 0.9782, "step": 4057 }, { "epoch": 0.8249644236633462, "grad_norm": 0.13368849456310272, "learning_rate": 0.00011758364690328488, "loss": 1.0588, "step": 4058 }, { "epoch": 0.8251677170156536, "grad_norm": 0.1443461924791336, "learning_rate": 0.0001175633072307536, "loss": 1.0314, "step": 4059 }, { "epoch": 0.825371010367961, "grad_norm": 0.1611374020576477, "learning_rate": 0.00011754296755822231, "loss": 1.0414, "step": 4060 }, { "epoch": 0.8255743037202683, "grad_norm": 0.13659845292568207, "learning_rate": 0.00011752262788569104, "loss": 0.9511, "step": 4061 }, { "epoch": 0.8257775970725757, "grad_norm": 0.13321594893932343, "learning_rate": 0.00011750228821315979, "loss": 1.1095, "step": 4062 }, { "epoch": 0.8259808904248831, "grad_norm": 0.13120754063129425, "learning_rate": 0.00011748194854062851, "loss": 1.011, "step": 4063 }, { "epoch": 0.8261841837771905, "grad_norm": 0.12870921194553375, "learning_rate": 0.00011746160886809722, "loss": 1.0436, "step": 4064 }, { "epoch": 0.8263874771294979, "grad_norm": 0.14104719460010529, "learning_rate": 0.00011744126919556595, "loss": 1.1661, "step": 4065 }, { "epoch": 0.8265907704818053, "grad_norm": 0.12897245585918427, "learning_rate": 0.0001174209295230347, "loss": 0.9585, "step": 4066 }, { "epoch": 0.8267940638341126, "grad_norm": 0.13888487219810486, "learning_rate": 0.00011740058985050342, "loss": 1.1015, "step": 4067 }, { "epoch": 0.82699735718642, "grad_norm": 0.14110806584358215, "learning_rate": 0.00011738025017797213, "loss": 1.2207, "step": 4068 }, { "epoch": 0.8272006505387274, "grad_norm": 0.14423434436321259, "learning_rate": 0.00011735991050544086, "loss": 1.1922, "step": 4069 }, { "epoch": 0.8274039438910348, "grad_norm": 0.12947557866573334, "learning_rate": 0.00011733957083290961, "loss": 1.0836, "step": 4070 }, { "epoch": 0.8276072372433422, "grad_norm": 0.12978830933570862, "learning_rate": 0.00011731923116037833, "loss": 1.0118, "step": 4071 }, { "epoch": 0.8278105305956496, "grad_norm": 0.1388140469789505, "learning_rate": 0.00011729889148784705, "loss": 1.0495, "step": 4072 }, { "epoch": 0.8280138239479569, "grad_norm": 0.13801120221614838, "learning_rate": 0.00011727855181531577, "loss": 1.2402, "step": 4073 }, { "epoch": 0.8282171173002643, "grad_norm": 0.12440764904022217, "learning_rate": 0.0001172582121427845, "loss": 0.9277, "step": 4074 }, { "epoch": 0.8284204106525717, "grad_norm": 0.14315354824066162, "learning_rate": 0.00011723787247025325, "loss": 1.0844, "step": 4075 }, { "epoch": 0.8286237040048791, "grad_norm": 0.15074683725833893, "learning_rate": 0.00011721753279772196, "loss": 1.2032, "step": 4076 }, { "epoch": 0.8288269973571865, "grad_norm": 0.14017608761787415, "learning_rate": 0.00011719719312519068, "loss": 1.1352, "step": 4077 }, { "epoch": 0.8290302907094937, "grad_norm": 0.1432233303785324, "learning_rate": 0.0001171768534526594, "loss": 1.0653, "step": 4078 }, { "epoch": 0.8292335840618011, "grad_norm": 0.14064320921897888, "learning_rate": 0.00011715651378012816, "loss": 1.0208, "step": 4079 }, { "epoch": 0.8294368774141085, "grad_norm": 0.13602322340011597, "learning_rate": 0.00011713617410759687, "loss": 0.9782, "step": 4080 }, { "epoch": 0.8296401707664159, "grad_norm": 0.14761172235012054, "learning_rate": 0.00011711583443506559, "loss": 1.1826, "step": 4081 }, { "epoch": 0.8298434641187233, "grad_norm": 0.14076586067676544, "learning_rate": 0.00011709549476253432, "loss": 1.0883, "step": 4082 }, { "epoch": 0.8300467574710307, "grad_norm": 0.1385519951581955, "learning_rate": 0.00011707515509000307, "loss": 1.0841, "step": 4083 }, { "epoch": 0.830250050823338, "grad_norm": 0.1392289251089096, "learning_rate": 0.00011705481541747178, "loss": 1.1386, "step": 4084 }, { "epoch": 0.8304533441756454, "grad_norm": 0.14094628393650055, "learning_rate": 0.0001170344757449405, "loss": 0.983, "step": 4085 }, { "epoch": 0.8306566375279528, "grad_norm": 0.13715529441833496, "learning_rate": 0.00011701413607240923, "loss": 1.089, "step": 4086 }, { "epoch": 0.8308599308802602, "grad_norm": 0.13627447187900543, "learning_rate": 0.00011699379639987798, "loss": 1.2017, "step": 4087 }, { "epoch": 0.8310632242325676, "grad_norm": 0.12237659841775894, "learning_rate": 0.00011697345672734669, "loss": 0.9606, "step": 4088 }, { "epoch": 0.831266517584875, "grad_norm": 0.12152927368879318, "learning_rate": 0.00011695311705481542, "loss": 1.0262, "step": 4089 }, { "epoch": 0.8314698109371823, "grad_norm": 0.13828657567501068, "learning_rate": 0.00011693277738228414, "loss": 1.0506, "step": 4090 }, { "epoch": 0.8316731042894897, "grad_norm": 0.13322405517101288, "learning_rate": 0.00011691243770975289, "loss": 0.9554, "step": 4091 }, { "epoch": 0.8318763976417971, "grad_norm": 0.1526733636856079, "learning_rate": 0.00011689209803722162, "loss": 1.2395, "step": 4092 }, { "epoch": 0.8320796909941045, "grad_norm": 0.12804892659187317, "learning_rate": 0.00011687175836469033, "loss": 1.0469, "step": 4093 }, { "epoch": 0.8322829843464119, "grad_norm": 0.13679049909114838, "learning_rate": 0.00011685141869215905, "loss": 1.069, "step": 4094 }, { "epoch": 0.8324862776987193, "grad_norm": 0.15435020625591278, "learning_rate": 0.0001168310790196278, "loss": 1.2415, "step": 4095 }, { "epoch": 0.8326895710510266, "grad_norm": 0.13897407054901123, "learning_rate": 0.00011681073934709653, "loss": 1.1871, "step": 4096 }, { "epoch": 0.832892864403334, "grad_norm": 0.12856152653694153, "learning_rate": 0.00011679039967456524, "loss": 1.0499, "step": 4097 }, { "epoch": 0.8330961577556414, "grad_norm": 0.12705758213996887, "learning_rate": 0.00011677006000203396, "loss": 1.0097, "step": 4098 }, { "epoch": 0.8332994511079488, "grad_norm": 0.1295822411775589, "learning_rate": 0.00011674972032950271, "loss": 0.8506, "step": 4099 }, { "epoch": 0.8335027444602562, "grad_norm": 0.13831810653209686, "learning_rate": 0.00011672938065697144, "loss": 1.1784, "step": 4100 }, { "epoch": 0.8337060378125636, "grad_norm": 0.13451896607875824, "learning_rate": 0.00011670904098444015, "loss": 1.1079, "step": 4101 }, { "epoch": 0.8339093311648709, "grad_norm": 0.12430407106876373, "learning_rate": 0.00011668870131190887, "loss": 0.8755, "step": 4102 }, { "epoch": 0.8341126245171783, "grad_norm": 0.13857564330101013, "learning_rate": 0.00011666836163937763, "loss": 1.0993, "step": 4103 }, { "epoch": 0.8343159178694857, "grad_norm": 0.14329898357391357, "learning_rate": 0.00011664802196684635, "loss": 1.2359, "step": 4104 }, { "epoch": 0.8345192112217931, "grad_norm": 0.14642906188964844, "learning_rate": 0.00011662768229431506, "loss": 1.1454, "step": 4105 }, { "epoch": 0.8347225045741005, "grad_norm": 0.1252523809671402, "learning_rate": 0.00011660734262178379, "loss": 1.0017, "step": 4106 }, { "epoch": 0.8349257979264078, "grad_norm": 0.12852495908737183, "learning_rate": 0.00011658700294925254, "loss": 1.0273, "step": 4107 }, { "epoch": 0.8351290912787152, "grad_norm": 0.12575671076774597, "learning_rate": 0.00011656666327672126, "loss": 0.9608, "step": 4108 }, { "epoch": 0.8353323846310226, "grad_norm": 0.15008383989334106, "learning_rate": 0.00011654632360418997, "loss": 1.1065, "step": 4109 }, { "epoch": 0.83553567798333, "grad_norm": 0.14088520407676697, "learning_rate": 0.0001165259839316587, "loss": 1.1224, "step": 4110 }, { "epoch": 0.8357389713356373, "grad_norm": 0.12474369257688522, "learning_rate": 0.00011650564425912745, "loss": 0.9834, "step": 4111 }, { "epoch": 0.8359422646879447, "grad_norm": 0.1329812854528427, "learning_rate": 0.00011648530458659617, "loss": 1.0876, "step": 4112 }, { "epoch": 0.836145558040252, "grad_norm": 0.12517108023166656, "learning_rate": 0.00011646496491406488, "loss": 0.9772, "step": 4113 }, { "epoch": 0.8363488513925594, "grad_norm": 0.13506385684013367, "learning_rate": 0.00011644462524153361, "loss": 1.0004, "step": 4114 }, { "epoch": 0.8365521447448668, "grad_norm": 0.13172465562820435, "learning_rate": 0.00011642428556900233, "loss": 1.0687, "step": 4115 }, { "epoch": 0.8367554380971742, "grad_norm": 0.13177163898944855, "learning_rate": 0.00011640394589647108, "loss": 1.0225, "step": 4116 }, { "epoch": 0.8369587314494816, "grad_norm": 0.1380792260169983, "learning_rate": 0.0001163836062239398, "loss": 1.0243, "step": 4117 }, { "epoch": 0.837162024801789, "grad_norm": 0.13012027740478516, "learning_rate": 0.00011636326655140852, "loss": 0.9618, "step": 4118 }, { "epoch": 0.8373653181540963, "grad_norm": 0.13312657177448273, "learning_rate": 0.00011634292687887724, "loss": 0.9299, "step": 4119 }, { "epoch": 0.8375686115064037, "grad_norm": 0.1449914276599884, "learning_rate": 0.000116322587206346, "loss": 1.1712, "step": 4120 }, { "epoch": 0.8377719048587111, "grad_norm": 0.14391463994979858, "learning_rate": 0.0001163022475338147, "loss": 1.2165, "step": 4121 }, { "epoch": 0.8379751982110185, "grad_norm": 0.14427267014980316, "learning_rate": 0.00011628190786128343, "loss": 1.1486, "step": 4122 }, { "epoch": 0.8381784915633259, "grad_norm": 0.15920564532279968, "learning_rate": 0.00011626156818875216, "loss": 1.3503, "step": 4123 }, { "epoch": 0.8383817849156333, "grad_norm": 0.14215265214443207, "learning_rate": 0.00011624122851622091, "loss": 1.0607, "step": 4124 }, { "epoch": 0.8385850782679406, "grad_norm": 0.12936022877693176, "learning_rate": 0.00011622088884368962, "loss": 0.9739, "step": 4125 }, { "epoch": 0.838788371620248, "grad_norm": 0.13270482420921326, "learning_rate": 0.00011620054917115834, "loss": 1.006, "step": 4126 }, { "epoch": 0.8389916649725554, "grad_norm": 0.13230706751346588, "learning_rate": 0.00011618020949862707, "loss": 1.0016, "step": 4127 }, { "epoch": 0.8391949583248628, "grad_norm": 0.1272687166929245, "learning_rate": 0.00011615986982609582, "loss": 0.8975, "step": 4128 }, { "epoch": 0.8393982516771702, "grad_norm": 0.13361401855945587, "learning_rate": 0.00011613953015356453, "loss": 0.9294, "step": 4129 }, { "epoch": 0.8396015450294775, "grad_norm": 0.12465297430753708, "learning_rate": 0.00011611919048103325, "loss": 0.8893, "step": 4130 }, { "epoch": 0.8398048383817849, "grad_norm": 0.1458294540643692, "learning_rate": 0.00011609885080850198, "loss": 1.1928, "step": 4131 }, { "epoch": 0.8400081317340923, "grad_norm": 0.1325213760137558, "learning_rate": 0.00011607851113597073, "loss": 0.9254, "step": 4132 }, { "epoch": 0.8402114250863997, "grad_norm": 0.1297135353088379, "learning_rate": 0.00011605817146343944, "loss": 0.8737, "step": 4133 }, { "epoch": 0.8404147184387071, "grad_norm": 0.1350976824760437, "learning_rate": 0.00011603783179090817, "loss": 1.1101, "step": 4134 }, { "epoch": 0.8406180117910145, "grad_norm": 0.13058003783226013, "learning_rate": 0.00011601749211837689, "loss": 1.0, "step": 4135 }, { "epoch": 0.8408213051433218, "grad_norm": 0.13314960896968842, "learning_rate": 0.00011599715244584564, "loss": 0.9872, "step": 4136 }, { "epoch": 0.8410245984956292, "grad_norm": 0.12905332446098328, "learning_rate": 0.00011597681277331435, "loss": 1.0097, "step": 4137 }, { "epoch": 0.8412278918479366, "grad_norm": 0.12162060290575027, "learning_rate": 0.00011595647310078308, "loss": 0.8788, "step": 4138 }, { "epoch": 0.841431185200244, "grad_norm": 0.12525275349617004, "learning_rate": 0.0001159361334282518, "loss": 0.8742, "step": 4139 }, { "epoch": 0.8416344785525514, "grad_norm": 0.13911886513233185, "learning_rate": 0.00011591579375572055, "loss": 1.3075, "step": 4140 }, { "epoch": 0.8418377719048588, "grad_norm": 0.13579173386096954, "learning_rate": 0.00011589545408318926, "loss": 1.0646, "step": 4141 }, { "epoch": 0.842041065257166, "grad_norm": 0.14429797232151031, "learning_rate": 0.00011587511441065799, "loss": 1.1266, "step": 4142 }, { "epoch": 0.8422443586094734, "grad_norm": 0.11546068638563156, "learning_rate": 0.00011585477473812671, "loss": 0.8928, "step": 4143 }, { "epoch": 0.8424476519617808, "grad_norm": 0.14215877652168274, "learning_rate": 0.00011583443506559546, "loss": 1.1816, "step": 4144 }, { "epoch": 0.8426509453140882, "grad_norm": 0.12982290983200073, "learning_rate": 0.00011581409539306417, "loss": 0.9241, "step": 4145 }, { "epoch": 0.8428542386663956, "grad_norm": 0.13759194314479828, "learning_rate": 0.0001157937557205329, "loss": 1.0254, "step": 4146 }, { "epoch": 0.843057532018703, "grad_norm": 0.13671040534973145, "learning_rate": 0.00011577341604800162, "loss": 1.1823, "step": 4147 }, { "epoch": 0.8432608253710103, "grad_norm": 0.11964955925941467, "learning_rate": 0.00011575307637547038, "loss": 0.99, "step": 4148 }, { "epoch": 0.8434641187233177, "grad_norm": 0.1388668566942215, "learning_rate": 0.0001157327367029391, "loss": 1.0667, "step": 4149 }, { "epoch": 0.8436674120756251, "grad_norm": 0.12363268435001373, "learning_rate": 0.00011571239703040781, "loss": 1.0156, "step": 4150 }, { "epoch": 0.8438707054279325, "grad_norm": 0.14275164902210236, "learning_rate": 0.00011569205735787654, "loss": 1.2653, "step": 4151 }, { "epoch": 0.8440739987802399, "grad_norm": 0.13494303822517395, "learning_rate": 0.00011567171768534529, "loss": 1.148, "step": 4152 }, { "epoch": 0.8442772921325473, "grad_norm": 0.12230674922466278, "learning_rate": 0.00011565137801281401, "loss": 0.9444, "step": 4153 }, { "epoch": 0.8444805854848546, "grad_norm": 0.12223172187805176, "learning_rate": 0.00011563103834028272, "loss": 1.0189, "step": 4154 }, { "epoch": 0.844683878837162, "grad_norm": 0.12231465429067612, "learning_rate": 0.00011561069866775145, "loss": 0.9234, "step": 4155 }, { "epoch": 0.8448871721894694, "grad_norm": 0.13856825232505798, "learning_rate": 0.00011559035899522017, "loss": 1.0455, "step": 4156 }, { "epoch": 0.8450904655417768, "grad_norm": 0.12258224934339523, "learning_rate": 0.00011557001932268892, "loss": 0.8723, "step": 4157 }, { "epoch": 0.8452937588940842, "grad_norm": 0.13906900584697723, "learning_rate": 0.00011554967965015763, "loss": 1.0314, "step": 4158 }, { "epoch": 0.8454970522463915, "grad_norm": 0.13625988364219666, "learning_rate": 0.00011552933997762636, "loss": 1.0259, "step": 4159 }, { "epoch": 0.8457003455986989, "grad_norm": 0.13316601514816284, "learning_rate": 0.00011550900030509508, "loss": 0.9816, "step": 4160 }, { "epoch": 0.8459036389510063, "grad_norm": 0.1430322825908661, "learning_rate": 0.00011548866063256383, "loss": 1.0748, "step": 4161 }, { "epoch": 0.8461069323033137, "grad_norm": 0.14025886356830597, "learning_rate": 0.00011546832096003254, "loss": 1.0884, "step": 4162 }, { "epoch": 0.8463102256556211, "grad_norm": 0.12264370173215866, "learning_rate": 0.00011544798128750127, "loss": 0.844, "step": 4163 }, { "epoch": 0.8465135190079285, "grad_norm": 0.14598575234413147, "learning_rate": 0.00011542764161497, "loss": 1.1999, "step": 4164 }, { "epoch": 0.8467168123602358, "grad_norm": 0.139155313372612, "learning_rate": 0.00011540730194243875, "loss": 1.0555, "step": 4165 }, { "epoch": 0.8469201057125432, "grad_norm": 0.13013023138046265, "learning_rate": 0.00011538696226990746, "loss": 1.0511, "step": 4166 }, { "epoch": 0.8471233990648506, "grad_norm": 0.13424082100391388, "learning_rate": 0.00011536662259737618, "loss": 1.0551, "step": 4167 }, { "epoch": 0.847326692417158, "grad_norm": 0.14205624163150787, "learning_rate": 0.0001153462829248449, "loss": 1.1582, "step": 4168 }, { "epoch": 0.8475299857694654, "grad_norm": 0.13570645451545715, "learning_rate": 0.00011532594325231366, "loss": 1.0911, "step": 4169 }, { "epoch": 0.8477332791217728, "grad_norm": 0.1343654841184616, "learning_rate": 0.00011530560357978237, "loss": 1.0519, "step": 4170 }, { "epoch": 0.84793657247408, "grad_norm": 0.12332738190889359, "learning_rate": 0.00011528526390725109, "loss": 0.9967, "step": 4171 }, { "epoch": 0.8481398658263875, "grad_norm": 0.13061444461345673, "learning_rate": 0.00011526492423471982, "loss": 0.998, "step": 4172 }, { "epoch": 0.8483431591786949, "grad_norm": 0.11777007579803467, "learning_rate": 0.00011524458456218857, "loss": 0.8942, "step": 4173 }, { "epoch": 0.8485464525310022, "grad_norm": 0.13091976940631866, "learning_rate": 0.00011522424488965728, "loss": 1.0612, "step": 4174 }, { "epoch": 0.8487497458833096, "grad_norm": 0.13466595113277435, "learning_rate": 0.000115203905217126, "loss": 1.0211, "step": 4175 }, { "epoch": 0.848953039235617, "grad_norm": 0.12775756418704987, "learning_rate": 0.00011518356554459473, "loss": 0.9925, "step": 4176 }, { "epoch": 0.8491563325879243, "grad_norm": 0.144356831908226, "learning_rate": 0.00011516322587206348, "loss": 1.1657, "step": 4177 }, { "epoch": 0.8493596259402317, "grad_norm": 0.1248125433921814, "learning_rate": 0.00011514288619953219, "loss": 0.9492, "step": 4178 }, { "epoch": 0.8495629192925391, "grad_norm": 0.13141238689422607, "learning_rate": 0.00011512254652700091, "loss": 0.9265, "step": 4179 }, { "epoch": 0.8497662126448465, "grad_norm": 0.13851980865001678, "learning_rate": 0.00011510220685446964, "loss": 1.1475, "step": 4180 }, { "epoch": 0.8499695059971539, "grad_norm": 0.13344109058380127, "learning_rate": 0.00011508186718193839, "loss": 1.0393, "step": 4181 }, { "epoch": 0.8501727993494613, "grad_norm": 0.15251140296459198, "learning_rate": 0.0001150615275094071, "loss": 1.1769, "step": 4182 }, { "epoch": 0.8503760927017686, "grad_norm": 0.1376708298921585, "learning_rate": 0.00011504118783687583, "loss": 1.1571, "step": 4183 }, { "epoch": 0.850579386054076, "grad_norm": 0.1312796026468277, "learning_rate": 0.00011502084816434455, "loss": 1.0176, "step": 4184 }, { "epoch": 0.8507826794063834, "grad_norm": 0.13133344054222107, "learning_rate": 0.0001150005084918133, "loss": 1.1126, "step": 4185 }, { "epoch": 0.8509859727586908, "grad_norm": 0.1404520869255066, "learning_rate": 0.00011498016881928201, "loss": 1.0467, "step": 4186 }, { "epoch": 0.8511892661109982, "grad_norm": 0.13041868805885315, "learning_rate": 0.00011495982914675074, "loss": 0.9282, "step": 4187 }, { "epoch": 0.8513925594633055, "grad_norm": 0.1341453641653061, "learning_rate": 0.00011493948947421946, "loss": 0.9568, "step": 4188 }, { "epoch": 0.8515958528156129, "grad_norm": 0.13047213852405548, "learning_rate": 0.00011491914980168821, "loss": 0.9724, "step": 4189 }, { "epoch": 0.8517991461679203, "grad_norm": 0.12841585278511047, "learning_rate": 0.00011489881012915692, "loss": 0.9021, "step": 4190 }, { "epoch": 0.8520024395202277, "grad_norm": 0.1475822478532791, "learning_rate": 0.00011487847045662565, "loss": 1.1875, "step": 4191 }, { "epoch": 0.8522057328725351, "grad_norm": 0.12445596605539322, "learning_rate": 0.00011485813078409437, "loss": 1.0242, "step": 4192 }, { "epoch": 0.8524090262248425, "grad_norm": 0.12460153549909592, "learning_rate": 0.00011483779111156313, "loss": 1.048, "step": 4193 }, { "epoch": 0.8526123195771498, "grad_norm": 0.1400919258594513, "learning_rate": 0.00011481745143903184, "loss": 1.0866, "step": 4194 }, { "epoch": 0.8528156129294572, "grad_norm": 0.13624945282936096, "learning_rate": 0.00011479711176650056, "loss": 1.0692, "step": 4195 }, { "epoch": 0.8530189062817646, "grad_norm": 0.13561497628688812, "learning_rate": 0.00011477677209396928, "loss": 1.1576, "step": 4196 }, { "epoch": 0.853222199634072, "grad_norm": 0.13115760684013367, "learning_rate": 0.00011475643242143801, "loss": 1.0431, "step": 4197 }, { "epoch": 0.8534254929863794, "grad_norm": 0.14327464997768402, "learning_rate": 0.00011473609274890675, "loss": 1.1245, "step": 4198 }, { "epoch": 0.8536287863386868, "grad_norm": 0.14745375514030457, "learning_rate": 0.00011471575307637547, "loss": 1.1246, "step": 4199 }, { "epoch": 0.8538320796909941, "grad_norm": 0.13001082837581635, "learning_rate": 0.0001146954134038442, "loss": 0.979, "step": 4200 }, { "epoch": 0.8540353730433015, "grad_norm": 0.14172659814357758, "learning_rate": 0.00011467507373131292, "loss": 1.0395, "step": 4201 }, { "epoch": 0.8542386663956089, "grad_norm": 0.12964913249015808, "learning_rate": 0.00011465473405878166, "loss": 0.9479, "step": 4202 }, { "epoch": 0.8544419597479163, "grad_norm": 0.13716383278369904, "learning_rate": 0.00011463439438625038, "loss": 1.1041, "step": 4203 }, { "epoch": 0.8546452531002237, "grad_norm": 0.1309032440185547, "learning_rate": 0.00011461405471371911, "loss": 1.0235, "step": 4204 }, { "epoch": 0.854848546452531, "grad_norm": 0.13954511284828186, "learning_rate": 0.00011459371504118783, "loss": 1.077, "step": 4205 }, { "epoch": 0.8550518398048383, "grad_norm": 0.12911508977413177, "learning_rate": 0.00011457337536865657, "loss": 1.0156, "step": 4206 }, { "epoch": 0.8552551331571457, "grad_norm": 0.13105571269989014, "learning_rate": 0.0001145530356961253, "loss": 0.9734, "step": 4207 }, { "epoch": 0.8554584265094531, "grad_norm": 0.1499045491218567, "learning_rate": 0.00011453269602359402, "loss": 1.0985, "step": 4208 }, { "epoch": 0.8556617198617605, "grad_norm": 0.1255357265472412, "learning_rate": 0.00011451235635106274, "loss": 1.0147, "step": 4209 }, { "epoch": 0.8558650132140679, "grad_norm": 0.1335058957338333, "learning_rate": 0.0001144920166785315, "loss": 1.0306, "step": 4210 }, { "epoch": 0.8560683065663752, "grad_norm": 0.12359452992677689, "learning_rate": 0.0001144716770060002, "loss": 0.8835, "step": 4211 }, { "epoch": 0.8562715999186826, "grad_norm": 0.14083559811115265, "learning_rate": 0.00011445133733346893, "loss": 0.9393, "step": 4212 }, { "epoch": 0.85647489327099, "grad_norm": 0.13426551222801208, "learning_rate": 0.00011443099766093765, "loss": 0.8895, "step": 4213 }, { "epoch": 0.8566781866232974, "grad_norm": 0.13291719555854797, "learning_rate": 0.0001144106579884064, "loss": 1.0563, "step": 4214 }, { "epoch": 0.8568814799756048, "grad_norm": 0.14882031083106995, "learning_rate": 0.00011439031831587512, "loss": 1.1625, "step": 4215 }, { "epoch": 0.8570847733279122, "grad_norm": 0.13581587374210358, "learning_rate": 0.00011436997864334384, "loss": 1.065, "step": 4216 }, { "epoch": 0.8572880666802195, "grad_norm": 0.1269901543855667, "learning_rate": 0.00011434963897081257, "loss": 1.045, "step": 4217 }, { "epoch": 0.8574913600325269, "grad_norm": 0.13057155907154083, "learning_rate": 0.00011432929929828132, "loss": 0.8866, "step": 4218 }, { "epoch": 0.8576946533848343, "grad_norm": 0.1503707617521286, "learning_rate": 0.00011430895962575003, "loss": 1.1888, "step": 4219 }, { "epoch": 0.8578979467371417, "grad_norm": 0.1276797503232956, "learning_rate": 0.00011428861995321875, "loss": 1.0497, "step": 4220 }, { "epoch": 0.8581012400894491, "grad_norm": 0.15582577884197235, "learning_rate": 0.00011426828028068748, "loss": 1.2019, "step": 4221 }, { "epoch": 0.8583045334417565, "grad_norm": 0.1253650039434433, "learning_rate": 0.00011424794060815623, "loss": 0.9089, "step": 4222 }, { "epoch": 0.8585078267940638, "grad_norm": 0.13212646543979645, "learning_rate": 0.00011422760093562494, "loss": 0.8912, "step": 4223 }, { "epoch": 0.8587111201463712, "grad_norm": 0.13876405358314514, "learning_rate": 0.00011420726126309366, "loss": 1.0144, "step": 4224 }, { "epoch": 0.8589144134986786, "grad_norm": 0.15017178654670715, "learning_rate": 0.00011418692159056239, "loss": 1.2437, "step": 4225 }, { "epoch": 0.859117706850986, "grad_norm": 0.1450318694114685, "learning_rate": 0.00011416658191803114, "loss": 1.1114, "step": 4226 }, { "epoch": 0.8593210002032934, "grad_norm": 0.14307589828968048, "learning_rate": 0.00011414624224549985, "loss": 1.1785, "step": 4227 }, { "epoch": 0.8595242935556008, "grad_norm": 0.13084810972213745, "learning_rate": 0.00011412590257296858, "loss": 1.0871, "step": 4228 }, { "epoch": 0.8597275869079081, "grad_norm": 0.12914970517158508, "learning_rate": 0.0001141055629004373, "loss": 1.0432, "step": 4229 }, { "epoch": 0.8599308802602155, "grad_norm": 0.14787475764751434, "learning_rate": 0.00011408522322790605, "loss": 1.0795, "step": 4230 }, { "epoch": 0.8601341736125229, "grad_norm": 0.1235564798116684, "learning_rate": 0.00011406488355537476, "loss": 0.946, "step": 4231 }, { "epoch": 0.8603374669648303, "grad_norm": 0.11737848818302155, "learning_rate": 0.00011404454388284349, "loss": 0.9549, "step": 4232 }, { "epoch": 0.8605407603171377, "grad_norm": 0.12434041500091553, "learning_rate": 0.00011402420421031221, "loss": 1.0672, "step": 4233 }, { "epoch": 0.8607440536694451, "grad_norm": 0.14527469873428345, "learning_rate": 0.00011400386453778096, "loss": 1.1508, "step": 4234 }, { "epoch": 0.8609473470217524, "grad_norm": 0.14363646507263184, "learning_rate": 0.00011398352486524967, "loss": 1.1063, "step": 4235 }, { "epoch": 0.8611506403740598, "grad_norm": 0.14891605079174042, "learning_rate": 0.0001139631851927184, "loss": 1.1027, "step": 4236 }, { "epoch": 0.8613539337263671, "grad_norm": 0.13269458711147308, "learning_rate": 0.00011394284552018712, "loss": 0.9506, "step": 4237 }, { "epoch": 0.8615572270786745, "grad_norm": 0.13667765259742737, "learning_rate": 0.00011392250584765585, "loss": 1.0362, "step": 4238 }, { "epoch": 0.861760520430982, "grad_norm": 0.1343078464269638, "learning_rate": 0.00011390216617512459, "loss": 1.0644, "step": 4239 }, { "epoch": 0.8619638137832892, "grad_norm": 0.12829913198947906, "learning_rate": 0.00011388182650259331, "loss": 0.9775, "step": 4240 }, { "epoch": 0.8621671071355966, "grad_norm": 0.14578650891780853, "learning_rate": 0.00011386148683006203, "loss": 1.1502, "step": 4241 }, { "epoch": 0.862370400487904, "grad_norm": 0.12001452594995499, "learning_rate": 0.00011384114715753076, "loss": 0.9496, "step": 4242 }, { "epoch": 0.8625736938402114, "grad_norm": 0.14932505786418915, "learning_rate": 0.0001138208074849995, "loss": 1.0731, "step": 4243 }, { "epoch": 0.8627769871925188, "grad_norm": 0.12832188606262207, "learning_rate": 0.00011380046781246822, "loss": 0.9428, "step": 4244 }, { "epoch": 0.8629802805448262, "grad_norm": 0.15174297988414764, "learning_rate": 0.00011378012813993695, "loss": 1.259, "step": 4245 }, { "epoch": 0.8631835738971335, "grad_norm": 0.1397685408592224, "learning_rate": 0.00011375978846740567, "loss": 1.204, "step": 4246 }, { "epoch": 0.8633868672494409, "grad_norm": 0.1386864334344864, "learning_rate": 0.00011373944879487441, "loss": 1.2689, "step": 4247 }, { "epoch": 0.8635901606017483, "grad_norm": 0.13151347637176514, "learning_rate": 0.00011371910912234313, "loss": 0.9938, "step": 4248 }, { "epoch": 0.8637934539540557, "grad_norm": 0.15212032198905945, "learning_rate": 0.00011369876944981186, "loss": 1.0839, "step": 4249 }, { "epoch": 0.8639967473063631, "grad_norm": 0.1537848860025406, "learning_rate": 0.00011367842977728058, "loss": 1.1586, "step": 4250 }, { "epoch": 0.8642000406586705, "grad_norm": 0.12098225951194763, "learning_rate": 0.00011365809010474932, "loss": 0.9585, "step": 4251 }, { "epoch": 0.8644033340109778, "grad_norm": 0.1198868602514267, "learning_rate": 0.00011363775043221804, "loss": 0.9055, "step": 4252 }, { "epoch": 0.8646066273632852, "grad_norm": 0.13080951571464539, "learning_rate": 0.00011361741075968677, "loss": 1.0171, "step": 4253 }, { "epoch": 0.8648099207155926, "grad_norm": 0.13523563742637634, "learning_rate": 0.0001135970710871555, "loss": 0.9592, "step": 4254 }, { "epoch": 0.8650132140679, "grad_norm": 0.12738974392414093, "learning_rate": 0.00011357673141462423, "loss": 0.9856, "step": 4255 }, { "epoch": 0.8652165074202074, "grad_norm": 0.13558736443519592, "learning_rate": 0.00011355639174209296, "loss": 1.0518, "step": 4256 }, { "epoch": 0.8654198007725148, "grad_norm": 0.13021017611026764, "learning_rate": 0.00011353605206956168, "loss": 0.9969, "step": 4257 }, { "epoch": 0.8656230941248221, "grad_norm": 0.13365107774734497, "learning_rate": 0.0001135157123970304, "loss": 1.0569, "step": 4258 }, { "epoch": 0.8658263874771295, "grad_norm": 0.14550118148326874, "learning_rate": 0.00011349537272449914, "loss": 1.0975, "step": 4259 }, { "epoch": 0.8660296808294369, "grad_norm": 0.12725263833999634, "learning_rate": 0.00011347503305196787, "loss": 0.9558, "step": 4260 }, { "epoch": 0.8662329741817443, "grad_norm": 0.13404077291488647, "learning_rate": 0.00011345469337943659, "loss": 1.1282, "step": 4261 }, { "epoch": 0.8664362675340517, "grad_norm": 0.12755300104618073, "learning_rate": 0.00011343435370690532, "loss": 1.0461, "step": 4262 }, { "epoch": 0.866639560886359, "grad_norm": 0.13699626922607422, "learning_rate": 0.00011341401403437405, "loss": 1.0606, "step": 4263 }, { "epoch": 0.8668428542386664, "grad_norm": 0.13077600300312042, "learning_rate": 0.00011339367436184278, "loss": 1.0505, "step": 4264 }, { "epoch": 0.8670461475909738, "grad_norm": 0.13652461767196655, "learning_rate": 0.0001133733346893115, "loss": 0.9115, "step": 4265 }, { "epoch": 0.8672494409432812, "grad_norm": 0.1255892813205719, "learning_rate": 0.00011335299501678023, "loss": 0.9152, "step": 4266 }, { "epoch": 0.8674527342955886, "grad_norm": 0.13048523664474487, "learning_rate": 0.00011333265534424898, "loss": 1.0926, "step": 4267 }, { "epoch": 0.867656027647896, "grad_norm": 0.13757598400115967, "learning_rate": 0.00011331231567171769, "loss": 0.9713, "step": 4268 }, { "epoch": 0.8678593210002032, "grad_norm": 0.12873396277427673, "learning_rate": 0.00011329197599918641, "loss": 1.1876, "step": 4269 }, { "epoch": 0.8680626143525106, "grad_norm": 0.12738154828548431, "learning_rate": 0.00011327163632665514, "loss": 1.0222, "step": 4270 }, { "epoch": 0.868265907704818, "grad_norm": 0.11265822499990463, "learning_rate": 0.00011325129665412389, "loss": 0.8971, "step": 4271 }, { "epoch": 0.8684692010571254, "grad_norm": 0.1351097822189331, "learning_rate": 0.0001132309569815926, "loss": 1.0784, "step": 4272 }, { "epoch": 0.8686724944094328, "grad_norm": 0.1455054134130478, "learning_rate": 0.00011321061730906133, "loss": 1.1519, "step": 4273 }, { "epoch": 0.8688757877617402, "grad_norm": 0.13560084998607635, "learning_rate": 0.00011319027763653005, "loss": 1.1245, "step": 4274 }, { "epoch": 0.8690790811140475, "grad_norm": 0.13816951215267181, "learning_rate": 0.0001131699379639988, "loss": 1.1679, "step": 4275 }, { "epoch": 0.8692823744663549, "grad_norm": 0.13416263461112976, "learning_rate": 0.00011314959829146751, "loss": 1.0392, "step": 4276 }, { "epoch": 0.8694856678186623, "grad_norm": 0.1267019808292389, "learning_rate": 0.00011312925861893624, "loss": 0.9492, "step": 4277 }, { "epoch": 0.8696889611709697, "grad_norm": 0.14063285291194916, "learning_rate": 0.00011310891894640496, "loss": 1.1432, "step": 4278 }, { "epoch": 0.8698922545232771, "grad_norm": 0.149309441447258, "learning_rate": 0.00011308857927387371, "loss": 1.2309, "step": 4279 }, { "epoch": 0.8700955478755845, "grad_norm": 0.1392187476158142, "learning_rate": 0.00011306823960134242, "loss": 1.0393, "step": 4280 }, { "epoch": 0.8702988412278918, "grad_norm": 0.12659290432929993, "learning_rate": 0.00011304789992881115, "loss": 0.8555, "step": 4281 }, { "epoch": 0.8705021345801992, "grad_norm": 0.11759068816900253, "learning_rate": 0.00011302756025627987, "loss": 0.938, "step": 4282 }, { "epoch": 0.8707054279325066, "grad_norm": 0.13261142373085022, "learning_rate": 0.0001130072205837486, "loss": 1.0559, "step": 4283 }, { "epoch": 0.870908721284814, "grad_norm": 0.12003304809331894, "learning_rate": 0.00011298688091121734, "loss": 0.9442, "step": 4284 }, { "epoch": 0.8711120146371214, "grad_norm": 0.13861103355884552, "learning_rate": 0.00011296654123868606, "loss": 1.0972, "step": 4285 }, { "epoch": 0.8713153079894288, "grad_norm": 0.12716351449489594, "learning_rate": 0.00011294620156615478, "loss": 0.9511, "step": 4286 }, { "epoch": 0.8715186013417361, "grad_norm": 0.1347339004278183, "learning_rate": 0.00011292586189362351, "loss": 0.9783, "step": 4287 }, { "epoch": 0.8717218946940435, "grad_norm": 0.14212962985038757, "learning_rate": 0.00011290552222109225, "loss": 0.9568, "step": 4288 }, { "epoch": 0.8719251880463509, "grad_norm": 0.11800102889537811, "learning_rate": 0.00011288518254856097, "loss": 0.9524, "step": 4289 }, { "epoch": 0.8721284813986583, "grad_norm": 0.1591940075159073, "learning_rate": 0.0001128648428760297, "loss": 1.2547, "step": 4290 }, { "epoch": 0.8723317747509657, "grad_norm": 0.12767143547534943, "learning_rate": 0.00011284450320349842, "loss": 1.0574, "step": 4291 }, { "epoch": 0.872535068103273, "grad_norm": 0.1308542639017105, "learning_rate": 0.00011282416353096716, "loss": 0.9813, "step": 4292 }, { "epoch": 0.8727383614555804, "grad_norm": 0.15340617299079895, "learning_rate": 0.00011280382385843588, "loss": 1.2684, "step": 4293 }, { "epoch": 0.8729416548078878, "grad_norm": 0.14063572883605957, "learning_rate": 0.00011278348418590461, "loss": 1.0218, "step": 4294 }, { "epoch": 0.8731449481601952, "grad_norm": 0.12035755813121796, "learning_rate": 0.00011276314451337333, "loss": 1.0237, "step": 4295 }, { "epoch": 0.8733482415125026, "grad_norm": 0.1521058976650238, "learning_rate": 0.00011274280484084207, "loss": 1.1983, "step": 4296 }, { "epoch": 0.87355153486481, "grad_norm": 0.1308029145002365, "learning_rate": 0.0001127224651683108, "loss": 0.9343, "step": 4297 }, { "epoch": 0.8737548282171173, "grad_norm": 0.13655021786689758, "learning_rate": 0.00011270212549577952, "loss": 1.1253, "step": 4298 }, { "epoch": 0.8739581215694247, "grad_norm": 0.13754834234714508, "learning_rate": 0.00011268178582324824, "loss": 1.1018, "step": 4299 }, { "epoch": 0.874161414921732, "grad_norm": 0.14539092779159546, "learning_rate": 0.00011266144615071698, "loss": 1.1647, "step": 4300 }, { "epoch": 0.8743647082740394, "grad_norm": 0.1390954852104187, "learning_rate": 0.0001126411064781857, "loss": 1.0717, "step": 4301 }, { "epoch": 0.8745680016263468, "grad_norm": 0.13942857086658478, "learning_rate": 0.00011262076680565443, "loss": 1.0457, "step": 4302 }, { "epoch": 0.8747712949786542, "grad_norm": 0.13453049957752228, "learning_rate": 0.00011260042713312315, "loss": 1.1522, "step": 4303 }, { "epoch": 0.8749745883309615, "grad_norm": 0.13158947229385376, "learning_rate": 0.00011258008746059189, "loss": 0.969, "step": 4304 }, { "epoch": 0.8751778816832689, "grad_norm": 0.1394949108362198, "learning_rate": 0.00011255974778806062, "loss": 0.9924, "step": 4305 }, { "epoch": 0.8753811750355763, "grad_norm": 0.14436380565166473, "learning_rate": 0.00011253940811552934, "loss": 1.0416, "step": 4306 }, { "epoch": 0.8755844683878837, "grad_norm": 0.12444054335355759, "learning_rate": 0.00011251906844299807, "loss": 0.9734, "step": 4307 }, { "epoch": 0.8757877617401911, "grad_norm": 0.1411658078432083, "learning_rate": 0.0001124987287704668, "loss": 0.9828, "step": 4308 }, { "epoch": 0.8759910550924985, "grad_norm": 0.13278289139270782, "learning_rate": 0.00011247838909793553, "loss": 1.0184, "step": 4309 }, { "epoch": 0.8761943484448058, "grad_norm": 0.13630905747413635, "learning_rate": 0.00011245804942540425, "loss": 1.1146, "step": 4310 }, { "epoch": 0.8763976417971132, "grad_norm": 0.12063156068325043, "learning_rate": 0.00011243770975287298, "loss": 0.9559, "step": 4311 }, { "epoch": 0.8766009351494206, "grad_norm": 0.12756480276584625, "learning_rate": 0.00011241737008034172, "loss": 0.935, "step": 4312 }, { "epoch": 0.876804228501728, "grad_norm": 0.1388019174337387, "learning_rate": 0.00011239703040781044, "loss": 1.102, "step": 4313 }, { "epoch": 0.8770075218540354, "grad_norm": 0.13372951745986938, "learning_rate": 0.00011237669073527916, "loss": 0.9758, "step": 4314 }, { "epoch": 0.8772108152063427, "grad_norm": 0.12079128623008728, "learning_rate": 0.00011235635106274789, "loss": 0.9984, "step": 4315 }, { "epoch": 0.8774141085586501, "grad_norm": 0.1439303755760193, "learning_rate": 0.00011233601139021663, "loss": 1.131, "step": 4316 }, { "epoch": 0.8776174019109575, "grad_norm": 0.131261944770813, "learning_rate": 0.00011231567171768535, "loss": 0.9801, "step": 4317 }, { "epoch": 0.8778206952632649, "grad_norm": 0.1571865677833557, "learning_rate": 0.00011229533204515408, "loss": 1.0769, "step": 4318 }, { "epoch": 0.8780239886155723, "grad_norm": 0.1357412189245224, "learning_rate": 0.0001122749923726228, "loss": 0.992, "step": 4319 }, { "epoch": 0.8782272819678797, "grad_norm": 0.12698335945606232, "learning_rate": 0.00011225465270009154, "loss": 0.9678, "step": 4320 }, { "epoch": 0.878430575320187, "grad_norm": 0.15510526299476624, "learning_rate": 0.00011223431302756026, "loss": 1.2756, "step": 4321 }, { "epoch": 0.8786338686724944, "grad_norm": 0.13490548729896545, "learning_rate": 0.00011221397335502899, "loss": 0.9746, "step": 4322 }, { "epoch": 0.8788371620248018, "grad_norm": 0.1362731158733368, "learning_rate": 0.00011219363368249771, "loss": 1.1591, "step": 4323 }, { "epoch": 0.8790404553771092, "grad_norm": 0.12086111307144165, "learning_rate": 0.00011217329400996644, "loss": 0.9592, "step": 4324 }, { "epoch": 0.8792437487294166, "grad_norm": 0.13338525593280792, "learning_rate": 0.00011215295433743517, "loss": 1.0375, "step": 4325 }, { "epoch": 0.879447042081724, "grad_norm": 0.13681508600711823, "learning_rate": 0.0001121326146649039, "loss": 1.2068, "step": 4326 }, { "epoch": 0.8796503354340313, "grad_norm": 0.12971334159374237, "learning_rate": 0.00011211227499237262, "loss": 0.9959, "step": 4327 }, { "epoch": 0.8798536287863387, "grad_norm": 0.13908180594444275, "learning_rate": 0.00011209193531984135, "loss": 1.119, "step": 4328 }, { "epoch": 0.8800569221386461, "grad_norm": 0.13482098281383514, "learning_rate": 0.00011207159564731009, "loss": 1.0079, "step": 4329 }, { "epoch": 0.8802602154909535, "grad_norm": 0.14087046682834625, "learning_rate": 0.00011205125597477881, "loss": 1.0478, "step": 4330 }, { "epoch": 0.8804635088432609, "grad_norm": 0.12133046984672546, "learning_rate": 0.00011203091630224753, "loss": 0.8026, "step": 4331 }, { "epoch": 0.8806668021955683, "grad_norm": 0.12162627279758453, "learning_rate": 0.00011201057662971626, "loss": 1.0147, "step": 4332 }, { "epoch": 0.8808700955478755, "grad_norm": 0.1315440535545349, "learning_rate": 0.000111990236957185, "loss": 1.1736, "step": 4333 }, { "epoch": 0.8810733889001829, "grad_norm": 0.1336052566766739, "learning_rate": 0.00011196989728465372, "loss": 1.0814, "step": 4334 }, { "epoch": 0.8812766822524903, "grad_norm": 0.12887480854988098, "learning_rate": 0.00011194955761212245, "loss": 1.0392, "step": 4335 }, { "epoch": 0.8814799756047977, "grad_norm": 0.12557265162467957, "learning_rate": 0.00011192921793959117, "loss": 0.9376, "step": 4336 }, { "epoch": 0.8816832689571051, "grad_norm": 0.13946324586868286, "learning_rate": 0.00011190887826705991, "loss": 1.0847, "step": 4337 }, { "epoch": 0.8818865623094125, "grad_norm": 0.14429444074630737, "learning_rate": 0.00011188853859452863, "loss": 1.0925, "step": 4338 }, { "epoch": 0.8820898556617198, "grad_norm": 0.13866104185581207, "learning_rate": 0.00011186819892199736, "loss": 1.1063, "step": 4339 }, { "epoch": 0.8822931490140272, "grad_norm": 0.1266574114561081, "learning_rate": 0.00011184785924946608, "loss": 1.0786, "step": 4340 }, { "epoch": 0.8824964423663346, "grad_norm": 0.14879325032234192, "learning_rate": 0.00011182751957693482, "loss": 1.1059, "step": 4341 }, { "epoch": 0.882699735718642, "grad_norm": 0.11987625062465668, "learning_rate": 0.00011180717990440354, "loss": 0.8947, "step": 4342 }, { "epoch": 0.8829030290709494, "grad_norm": 0.13331225514411926, "learning_rate": 0.00011178684023187227, "loss": 1.1468, "step": 4343 }, { "epoch": 0.8831063224232567, "grad_norm": 0.13890080153942108, "learning_rate": 0.00011176650055934099, "loss": 1.0535, "step": 4344 }, { "epoch": 0.8833096157755641, "grad_norm": 0.14050957560539246, "learning_rate": 0.00011174616088680973, "loss": 1.1156, "step": 4345 }, { "epoch": 0.8835129091278715, "grad_norm": 0.14118660986423492, "learning_rate": 0.00011172582121427846, "loss": 0.9597, "step": 4346 }, { "epoch": 0.8837162024801789, "grad_norm": 0.13197362422943115, "learning_rate": 0.00011170548154174718, "loss": 0.9801, "step": 4347 }, { "epoch": 0.8839194958324863, "grad_norm": 0.1429329663515091, "learning_rate": 0.0001116851418692159, "loss": 1.1501, "step": 4348 }, { "epoch": 0.8841227891847937, "grad_norm": 0.14236941933631897, "learning_rate": 0.00011166480219668464, "loss": 1.0295, "step": 4349 }, { "epoch": 0.884326082537101, "grad_norm": 0.13247445225715637, "learning_rate": 0.00011164446252415337, "loss": 0.9995, "step": 4350 }, { "epoch": 0.8845293758894084, "grad_norm": 0.1475542187690735, "learning_rate": 0.00011162412285162209, "loss": 1.1062, "step": 4351 }, { "epoch": 0.8847326692417158, "grad_norm": 0.14314448833465576, "learning_rate": 0.00011160378317909082, "loss": 1.1257, "step": 4352 }, { "epoch": 0.8849359625940232, "grad_norm": 0.1297428011894226, "learning_rate": 0.00011158344350655955, "loss": 0.8739, "step": 4353 }, { "epoch": 0.8851392559463306, "grad_norm": 0.15738995373249054, "learning_rate": 0.00011156310383402828, "loss": 1.2495, "step": 4354 }, { "epoch": 0.885342549298638, "grad_norm": 0.13949069380760193, "learning_rate": 0.000111542764161497, "loss": 1.0207, "step": 4355 }, { "epoch": 0.8855458426509453, "grad_norm": 0.1462063193321228, "learning_rate": 0.00011152242448896573, "loss": 0.9444, "step": 4356 }, { "epoch": 0.8857491360032527, "grad_norm": 0.13881848752498627, "learning_rate": 0.00011150208481643447, "loss": 1.0071, "step": 4357 }, { "epoch": 0.8859524293555601, "grad_norm": 0.13828495144844055, "learning_rate": 0.00011148174514390319, "loss": 1.0035, "step": 4358 }, { "epoch": 0.8861557227078675, "grad_norm": 0.12428104132413864, "learning_rate": 0.00011146140547137191, "loss": 0.94, "step": 4359 }, { "epoch": 0.8863590160601749, "grad_norm": 0.14945100247859955, "learning_rate": 0.00011144106579884064, "loss": 1.262, "step": 4360 }, { "epoch": 0.8865623094124823, "grad_norm": 0.13491201400756836, "learning_rate": 0.00011142072612630938, "loss": 0.9794, "step": 4361 }, { "epoch": 0.8867656027647896, "grad_norm": 0.1441691815853119, "learning_rate": 0.0001114003864537781, "loss": 1.0569, "step": 4362 }, { "epoch": 0.886968896117097, "grad_norm": 0.14696361124515533, "learning_rate": 0.00011138004678124683, "loss": 1.263, "step": 4363 }, { "epoch": 0.8871721894694043, "grad_norm": 0.131379634141922, "learning_rate": 0.00011135970710871555, "loss": 0.8765, "step": 4364 }, { "epoch": 0.8873754828217117, "grad_norm": 0.13199898600578308, "learning_rate": 0.00011133936743618427, "loss": 0.9504, "step": 4365 }, { "epoch": 0.8875787761740191, "grad_norm": 0.12538810074329376, "learning_rate": 0.00011131902776365301, "loss": 0.9167, "step": 4366 }, { "epoch": 0.8877820695263264, "grad_norm": 0.14858978986740112, "learning_rate": 0.00011129868809112174, "loss": 1.1652, "step": 4367 }, { "epoch": 0.8879853628786338, "grad_norm": 0.12117012590169907, "learning_rate": 0.00011127834841859046, "loss": 0.9091, "step": 4368 }, { "epoch": 0.8881886562309412, "grad_norm": 0.13053376972675323, "learning_rate": 0.00011125800874605919, "loss": 0.9876, "step": 4369 }, { "epoch": 0.8883919495832486, "grad_norm": 0.15164178609848022, "learning_rate": 0.00011123766907352792, "loss": 1.1477, "step": 4370 }, { "epoch": 0.888595242935556, "grad_norm": 0.13139276206493378, "learning_rate": 0.00011121732940099665, "loss": 1.0332, "step": 4371 }, { "epoch": 0.8887985362878634, "grad_norm": 0.14275844395160675, "learning_rate": 0.00011119698972846537, "loss": 1.2397, "step": 4372 }, { "epoch": 0.8890018296401707, "grad_norm": 0.14269821345806122, "learning_rate": 0.0001111766500559341, "loss": 1.0065, "step": 4373 }, { "epoch": 0.8892051229924781, "grad_norm": 0.12749828398227692, "learning_rate": 0.00011115631038340284, "loss": 0.9364, "step": 4374 }, { "epoch": 0.8894084163447855, "grad_norm": 0.13233932852745056, "learning_rate": 0.00011113597071087156, "loss": 1.035, "step": 4375 }, { "epoch": 0.8896117096970929, "grad_norm": 0.14462941884994507, "learning_rate": 0.00011111563103834028, "loss": 1.1277, "step": 4376 }, { "epoch": 0.8898150030494003, "grad_norm": 0.14381466805934906, "learning_rate": 0.00011109529136580901, "loss": 1.166, "step": 4377 }, { "epoch": 0.8900182964017077, "grad_norm": 0.1264910101890564, "learning_rate": 0.00011107495169327775, "loss": 1.0343, "step": 4378 }, { "epoch": 0.890221589754015, "grad_norm": 0.12185248732566833, "learning_rate": 0.00011105461202074647, "loss": 1.0195, "step": 4379 }, { "epoch": 0.8904248831063224, "grad_norm": 0.13510321080684662, "learning_rate": 0.0001110342723482152, "loss": 0.9245, "step": 4380 }, { "epoch": 0.8906281764586298, "grad_norm": 0.13467377424240112, "learning_rate": 0.00011101393267568392, "loss": 0.9795, "step": 4381 }, { "epoch": 0.8908314698109372, "grad_norm": 0.1266263723373413, "learning_rate": 0.00011099359300315266, "loss": 0.9728, "step": 4382 }, { "epoch": 0.8910347631632446, "grad_norm": 0.12397301942110062, "learning_rate": 0.00011097325333062138, "loss": 0.9325, "step": 4383 }, { "epoch": 0.891238056515552, "grad_norm": 0.14966972172260284, "learning_rate": 0.00011095291365809011, "loss": 1.2973, "step": 4384 }, { "epoch": 0.8914413498678593, "grad_norm": 0.13662739098072052, "learning_rate": 0.00011093257398555883, "loss": 1.0907, "step": 4385 }, { "epoch": 0.8916446432201667, "grad_norm": 0.1289726197719574, "learning_rate": 0.00011091223431302757, "loss": 1.047, "step": 4386 }, { "epoch": 0.8918479365724741, "grad_norm": 0.13556358218193054, "learning_rate": 0.0001108918946404963, "loss": 0.9938, "step": 4387 }, { "epoch": 0.8920512299247815, "grad_norm": 0.13389402627944946, "learning_rate": 0.00011087155496796502, "loss": 1.1329, "step": 4388 }, { "epoch": 0.8922545232770889, "grad_norm": 0.13192865252494812, "learning_rate": 0.00011085121529543374, "loss": 0.945, "step": 4389 }, { "epoch": 0.8924578166293963, "grad_norm": 0.14545689523220062, "learning_rate": 0.00011083087562290248, "loss": 1.0199, "step": 4390 }, { "epoch": 0.8926611099817036, "grad_norm": 0.1357770413160324, "learning_rate": 0.0001108105359503712, "loss": 1.1277, "step": 4391 }, { "epoch": 0.892864403334011, "grad_norm": 0.1452401578426361, "learning_rate": 0.00011079019627783993, "loss": 1.2486, "step": 4392 }, { "epoch": 0.8930676966863184, "grad_norm": 0.12674301862716675, "learning_rate": 0.00011076985660530865, "loss": 0.9128, "step": 4393 }, { "epoch": 0.8932709900386258, "grad_norm": 0.14735066890716553, "learning_rate": 0.00011074951693277739, "loss": 1.0515, "step": 4394 }, { "epoch": 0.8934742833909332, "grad_norm": 0.14510585367679596, "learning_rate": 0.00011072917726024612, "loss": 1.106, "step": 4395 }, { "epoch": 0.8936775767432404, "grad_norm": 0.14333130419254303, "learning_rate": 0.00011070883758771484, "loss": 1.1362, "step": 4396 }, { "epoch": 0.8938808700955478, "grad_norm": 0.1307590752840042, "learning_rate": 0.00011068849791518357, "loss": 0.9749, "step": 4397 }, { "epoch": 0.8940841634478552, "grad_norm": 0.12639310956001282, "learning_rate": 0.0001106681582426523, "loss": 0.9695, "step": 4398 }, { "epoch": 0.8942874568001626, "grad_norm": 0.13830193877220154, "learning_rate": 0.00011064781857012103, "loss": 0.9268, "step": 4399 }, { "epoch": 0.89449075015247, "grad_norm": 0.1438985913991928, "learning_rate": 0.00011062747889758975, "loss": 1.0869, "step": 4400 }, { "epoch": 0.8946940435047774, "grad_norm": 0.1423654854297638, "learning_rate": 0.00011060713922505848, "loss": 1.1083, "step": 4401 }, { "epoch": 0.8948973368570847, "grad_norm": 0.1318962126970291, "learning_rate": 0.00011058679955252722, "loss": 0.8641, "step": 4402 }, { "epoch": 0.8951006302093921, "grad_norm": 0.13388904929161072, "learning_rate": 0.00011056645987999594, "loss": 0.9573, "step": 4403 }, { "epoch": 0.8953039235616995, "grad_norm": 0.13502460718154907, "learning_rate": 0.00011054612020746466, "loss": 1.0003, "step": 4404 }, { "epoch": 0.8955072169140069, "grad_norm": 0.13359855115413666, "learning_rate": 0.00011052578053493339, "loss": 1.0714, "step": 4405 }, { "epoch": 0.8957105102663143, "grad_norm": 0.12817350029945374, "learning_rate": 0.00011050544086240211, "loss": 1.0177, "step": 4406 }, { "epoch": 0.8959138036186217, "grad_norm": 0.13135068118572235, "learning_rate": 0.00011048510118987085, "loss": 0.9071, "step": 4407 }, { "epoch": 0.896117096970929, "grad_norm": 0.13310706615447998, "learning_rate": 0.00011046476151733958, "loss": 1.0575, "step": 4408 }, { "epoch": 0.8963203903232364, "grad_norm": 0.12109819054603577, "learning_rate": 0.0001104444218448083, "loss": 0.902, "step": 4409 }, { "epoch": 0.8965236836755438, "grad_norm": 0.1310720294713974, "learning_rate": 0.00011042408217227702, "loss": 0.9465, "step": 4410 }, { "epoch": 0.8967269770278512, "grad_norm": 0.1330663412809372, "learning_rate": 0.00011040374249974576, "loss": 1.0053, "step": 4411 }, { "epoch": 0.8969302703801586, "grad_norm": 0.14403831958770752, "learning_rate": 0.00011038340282721449, "loss": 1.1808, "step": 4412 }, { "epoch": 0.897133563732466, "grad_norm": 0.1323632299900055, "learning_rate": 0.00011036306315468321, "loss": 0.9314, "step": 4413 }, { "epoch": 0.8973368570847733, "grad_norm": 0.12776096165180206, "learning_rate": 0.00011034272348215194, "loss": 1.0343, "step": 4414 }, { "epoch": 0.8975401504370807, "grad_norm": 0.12130887806415558, "learning_rate": 0.00011032238380962067, "loss": 1.0066, "step": 4415 }, { "epoch": 0.8977434437893881, "grad_norm": 0.11282986402511597, "learning_rate": 0.0001103020441370894, "loss": 0.8551, "step": 4416 }, { "epoch": 0.8979467371416955, "grad_norm": 0.14610666036605835, "learning_rate": 0.00011028170446455812, "loss": 1.2288, "step": 4417 }, { "epoch": 0.8981500304940029, "grad_norm": 0.14186285436153412, "learning_rate": 0.00011026136479202685, "loss": 1.183, "step": 4418 }, { "epoch": 0.8983533238463103, "grad_norm": 0.1389775425195694, "learning_rate": 0.00011024102511949559, "loss": 1.1684, "step": 4419 }, { "epoch": 0.8985566171986176, "grad_norm": 0.12318051606416702, "learning_rate": 0.00011022068544696431, "loss": 0.8705, "step": 4420 }, { "epoch": 0.898759910550925, "grad_norm": 0.12933410704135895, "learning_rate": 0.00011020034577443303, "loss": 1.0982, "step": 4421 }, { "epoch": 0.8989632039032324, "grad_norm": 0.14935623109340668, "learning_rate": 0.00011018000610190176, "loss": 1.1293, "step": 4422 }, { "epoch": 0.8991664972555398, "grad_norm": 0.13630087673664093, "learning_rate": 0.0001101596664293705, "loss": 1.0667, "step": 4423 }, { "epoch": 0.8993697906078472, "grad_norm": 0.14735549688339233, "learning_rate": 0.00011013932675683922, "loss": 1.0931, "step": 4424 }, { "epoch": 0.8995730839601545, "grad_norm": 0.13349930942058563, "learning_rate": 0.00011011898708430795, "loss": 1.0843, "step": 4425 }, { "epoch": 0.8997763773124619, "grad_norm": 0.13748763501644135, "learning_rate": 0.00011009864741177667, "loss": 1.0533, "step": 4426 }, { "epoch": 0.8999796706647692, "grad_norm": 0.1320018768310547, "learning_rate": 0.00011007830773924541, "loss": 1.0631, "step": 4427 }, { "epoch": 0.9001829640170766, "grad_norm": 0.1377144604921341, "learning_rate": 0.00011005796806671413, "loss": 1.0616, "step": 4428 }, { "epoch": 0.900386257369384, "grad_norm": 0.13794207572937012, "learning_rate": 0.00011003762839418286, "loss": 1.043, "step": 4429 }, { "epoch": 0.9005895507216914, "grad_norm": 0.12091651558876038, "learning_rate": 0.00011001728872165158, "loss": 0.9347, "step": 4430 }, { "epoch": 0.9007928440739987, "grad_norm": 0.13244852423667908, "learning_rate": 0.00010999694904912032, "loss": 0.9094, "step": 4431 }, { "epoch": 0.9009961374263061, "grad_norm": 0.1419922262430191, "learning_rate": 0.00010997660937658904, "loss": 1.0749, "step": 4432 }, { "epoch": 0.9011994307786135, "grad_norm": 0.138065367937088, "learning_rate": 0.00010995626970405777, "loss": 1.0559, "step": 4433 }, { "epoch": 0.9014027241309209, "grad_norm": 0.13192395865917206, "learning_rate": 0.00010993593003152649, "loss": 0.9556, "step": 4434 }, { "epoch": 0.9016060174832283, "grad_norm": 0.13181698322296143, "learning_rate": 0.00010991559035899523, "loss": 1.0103, "step": 4435 }, { "epoch": 0.9018093108355357, "grad_norm": 0.1360086053609848, "learning_rate": 0.00010989525068646396, "loss": 0.9737, "step": 4436 }, { "epoch": 0.902012604187843, "grad_norm": 0.14762909710407257, "learning_rate": 0.00010987491101393268, "loss": 1.0592, "step": 4437 }, { "epoch": 0.9022158975401504, "grad_norm": 0.13677798211574554, "learning_rate": 0.0001098545713414014, "loss": 1.0893, "step": 4438 }, { "epoch": 0.9024191908924578, "grad_norm": 0.13737376034259796, "learning_rate": 0.00010983423166887014, "loss": 1.1157, "step": 4439 }, { "epoch": 0.9026224842447652, "grad_norm": 0.13454869389533997, "learning_rate": 0.00010981389199633887, "loss": 1.0837, "step": 4440 }, { "epoch": 0.9028257775970726, "grad_norm": 0.1382821798324585, "learning_rate": 0.00010979355232380759, "loss": 0.9586, "step": 4441 }, { "epoch": 0.90302907094938, "grad_norm": 0.12248346954584122, "learning_rate": 0.00010977321265127632, "loss": 0.9251, "step": 4442 }, { "epoch": 0.9032323643016873, "grad_norm": 0.13722175359725952, "learning_rate": 0.00010975287297874505, "loss": 1.192, "step": 4443 }, { "epoch": 0.9034356576539947, "grad_norm": 0.14339371025562286, "learning_rate": 0.00010973253330621378, "loss": 1.1055, "step": 4444 }, { "epoch": 0.9036389510063021, "grad_norm": 0.1536564826965332, "learning_rate": 0.0001097121936336825, "loss": 1.1969, "step": 4445 }, { "epoch": 0.9038422443586095, "grad_norm": 0.12401420623064041, "learning_rate": 0.00010969185396115123, "loss": 1.0346, "step": 4446 }, { "epoch": 0.9040455377109169, "grad_norm": 0.12466490268707275, "learning_rate": 0.00010967151428861995, "loss": 0.903, "step": 4447 }, { "epoch": 0.9042488310632242, "grad_norm": 0.1398215889930725, "learning_rate": 0.00010965117461608869, "loss": 1.0548, "step": 4448 }, { "epoch": 0.9044521244155316, "grad_norm": 0.1224413737654686, "learning_rate": 0.00010963083494355741, "loss": 0.9738, "step": 4449 }, { "epoch": 0.904655417767839, "grad_norm": 0.13140305876731873, "learning_rate": 0.00010961049527102614, "loss": 1.1668, "step": 4450 }, { "epoch": 0.9048587111201464, "grad_norm": 0.13816101849079132, "learning_rate": 0.00010959015559849486, "loss": 1.0785, "step": 4451 }, { "epoch": 0.9050620044724538, "grad_norm": 0.19513925909996033, "learning_rate": 0.0001095698159259636, "loss": 1.2728, "step": 4452 }, { "epoch": 0.9052652978247612, "grad_norm": 0.1294509470462799, "learning_rate": 0.00010954947625343233, "loss": 0.9757, "step": 4453 }, { "epoch": 0.9054685911770685, "grad_norm": 0.13822956383228302, "learning_rate": 0.00010952913658090105, "loss": 1.064, "step": 4454 }, { "epoch": 0.9056718845293759, "grad_norm": 0.13722215592861176, "learning_rate": 0.00010950879690836977, "loss": 1.0321, "step": 4455 }, { "epoch": 0.9058751778816833, "grad_norm": 0.1313597559928894, "learning_rate": 0.00010948845723583851, "loss": 1.117, "step": 4456 }, { "epoch": 0.9060784712339907, "grad_norm": 0.13262207806110382, "learning_rate": 0.00010946811756330724, "loss": 1.0395, "step": 4457 }, { "epoch": 0.906281764586298, "grad_norm": 0.15121689438819885, "learning_rate": 0.00010944777789077596, "loss": 1.1235, "step": 4458 }, { "epoch": 0.9064850579386055, "grad_norm": 0.14262863993644714, "learning_rate": 0.00010942743821824469, "loss": 1.2066, "step": 4459 }, { "epoch": 0.9066883512909127, "grad_norm": 0.13933706283569336, "learning_rate": 0.00010940709854571342, "loss": 1.15, "step": 4460 }, { "epoch": 0.9068916446432201, "grad_norm": 0.14884263277053833, "learning_rate": 0.00010938675887318215, "loss": 1.1161, "step": 4461 }, { "epoch": 0.9070949379955275, "grad_norm": 0.1426582783460617, "learning_rate": 0.00010936641920065087, "loss": 1.1462, "step": 4462 }, { "epoch": 0.9072982313478349, "grad_norm": 0.14341098070144653, "learning_rate": 0.0001093460795281196, "loss": 1.0028, "step": 4463 }, { "epoch": 0.9075015247001423, "grad_norm": 0.13192780315876007, "learning_rate": 0.00010932573985558834, "loss": 0.9766, "step": 4464 }, { "epoch": 0.9077048180524497, "grad_norm": 0.13691288232803345, "learning_rate": 0.00010930540018305706, "loss": 1.0718, "step": 4465 }, { "epoch": 0.907908111404757, "grad_norm": 0.1597934365272522, "learning_rate": 0.00010928506051052578, "loss": 1.2067, "step": 4466 }, { "epoch": 0.9081114047570644, "grad_norm": 0.128030464053154, "learning_rate": 0.00010926472083799451, "loss": 0.9896, "step": 4467 }, { "epoch": 0.9083146981093718, "grad_norm": 0.13701699674129486, "learning_rate": 0.00010924438116546325, "loss": 1.0203, "step": 4468 }, { "epoch": 0.9085179914616792, "grad_norm": 0.13079933822155, "learning_rate": 0.00010922404149293197, "loss": 1.0287, "step": 4469 }, { "epoch": 0.9087212848139866, "grad_norm": 0.15257249772548676, "learning_rate": 0.0001092037018204007, "loss": 1.0392, "step": 4470 }, { "epoch": 0.908924578166294, "grad_norm": 0.134558767080307, "learning_rate": 0.00010918336214786942, "loss": 1.1618, "step": 4471 }, { "epoch": 0.9091278715186013, "grad_norm": 0.13755445182323456, "learning_rate": 0.00010916302247533816, "loss": 1.0579, "step": 4472 }, { "epoch": 0.9093311648709087, "grad_norm": 0.14956828951835632, "learning_rate": 0.00010914268280280688, "loss": 1.0457, "step": 4473 }, { "epoch": 0.9095344582232161, "grad_norm": 0.138174906373024, "learning_rate": 0.0001091223431302756, "loss": 0.9902, "step": 4474 }, { "epoch": 0.9097377515755235, "grad_norm": 0.14548815786838531, "learning_rate": 0.00010910200345774433, "loss": 1.1674, "step": 4475 }, { "epoch": 0.9099410449278309, "grad_norm": 0.13372185826301575, "learning_rate": 0.00010908166378521307, "loss": 1.1141, "step": 4476 }, { "epoch": 0.9101443382801382, "grad_norm": 0.1349831521511078, "learning_rate": 0.0001090613241126818, "loss": 1.0383, "step": 4477 }, { "epoch": 0.9103476316324456, "grad_norm": 0.12056616693735123, "learning_rate": 0.00010904098444015052, "loss": 0.7852, "step": 4478 }, { "epoch": 0.910550924984753, "grad_norm": 0.14333753287792206, "learning_rate": 0.00010902064476761924, "loss": 1.0264, "step": 4479 }, { "epoch": 0.9107542183370604, "grad_norm": 0.1312333047389984, "learning_rate": 0.00010900030509508798, "loss": 0.8746, "step": 4480 }, { "epoch": 0.9109575116893678, "grad_norm": 0.14129756391048431, "learning_rate": 0.0001089799654225567, "loss": 1.1576, "step": 4481 }, { "epoch": 0.9111608050416752, "grad_norm": 0.135942742228508, "learning_rate": 0.00010895962575002543, "loss": 1.1842, "step": 4482 }, { "epoch": 0.9113640983939825, "grad_norm": 0.1423972100019455, "learning_rate": 0.00010893928607749415, "loss": 1.051, "step": 4483 }, { "epoch": 0.9115673917462899, "grad_norm": 0.13322605192661285, "learning_rate": 0.00010891894640496289, "loss": 0.9931, "step": 4484 }, { "epoch": 0.9117706850985973, "grad_norm": 0.14480316638946533, "learning_rate": 0.00010889860673243162, "loss": 1.0453, "step": 4485 }, { "epoch": 0.9119739784509047, "grad_norm": 0.1365094780921936, "learning_rate": 0.00010887826705990034, "loss": 1.0413, "step": 4486 }, { "epoch": 0.9121772718032121, "grad_norm": 0.128956139087677, "learning_rate": 0.00010885792738736907, "loss": 1.0774, "step": 4487 }, { "epoch": 0.9123805651555195, "grad_norm": 0.11314928531646729, "learning_rate": 0.00010883758771483779, "loss": 0.8729, "step": 4488 }, { "epoch": 0.9125838585078268, "grad_norm": 0.13904598355293274, "learning_rate": 0.00010881724804230653, "loss": 1.07, "step": 4489 }, { "epoch": 0.9127871518601341, "grad_norm": 0.1325247436761856, "learning_rate": 0.00010879690836977525, "loss": 1.1768, "step": 4490 }, { "epoch": 0.9129904452124415, "grad_norm": 0.13978269696235657, "learning_rate": 0.00010877656869724398, "loss": 1.0877, "step": 4491 }, { "epoch": 0.913193738564749, "grad_norm": 0.13564588129520416, "learning_rate": 0.0001087562290247127, "loss": 1.0527, "step": 4492 }, { "epoch": 0.9133970319170563, "grad_norm": 0.14008729159832, "learning_rate": 0.00010873588935218144, "loss": 1.0295, "step": 4493 }, { "epoch": 0.9136003252693637, "grad_norm": 0.14307157695293427, "learning_rate": 0.00010871554967965016, "loss": 1.0371, "step": 4494 }, { "epoch": 0.913803618621671, "grad_norm": 0.13670316338539124, "learning_rate": 0.00010869521000711889, "loss": 1.0557, "step": 4495 }, { "epoch": 0.9140069119739784, "grad_norm": 0.138756662607193, "learning_rate": 0.00010867487033458761, "loss": 0.9865, "step": 4496 }, { "epoch": 0.9142102053262858, "grad_norm": 0.132290780544281, "learning_rate": 0.00010865453066205635, "loss": 1.041, "step": 4497 }, { "epoch": 0.9144134986785932, "grad_norm": 0.13535267114639282, "learning_rate": 0.00010863419098952508, "loss": 1.0592, "step": 4498 }, { "epoch": 0.9146167920309006, "grad_norm": 0.12333885580301285, "learning_rate": 0.0001086138513169938, "loss": 1.0101, "step": 4499 }, { "epoch": 0.9148200853832079, "grad_norm": 0.14777310192584991, "learning_rate": 0.00010859351164446252, "loss": 1.0622, "step": 4500 }, { "epoch": 0.9150233787355153, "grad_norm": 0.11419006437063217, "learning_rate": 0.00010857317197193126, "loss": 0.8548, "step": 4501 }, { "epoch": 0.9152266720878227, "grad_norm": 0.12761832773685455, "learning_rate": 0.00010855283229939999, "loss": 0.9198, "step": 4502 }, { "epoch": 0.9154299654401301, "grad_norm": 0.1387338936328888, "learning_rate": 0.00010853249262686871, "loss": 1.0402, "step": 4503 }, { "epoch": 0.9156332587924375, "grad_norm": 0.13915283977985382, "learning_rate": 0.00010851215295433744, "loss": 1.1062, "step": 4504 }, { "epoch": 0.9158365521447449, "grad_norm": 0.13649246096611023, "learning_rate": 0.00010849181328180617, "loss": 1.1802, "step": 4505 }, { "epoch": 0.9160398454970522, "grad_norm": 0.15227414667606354, "learning_rate": 0.0001084714736092749, "loss": 1.3089, "step": 4506 }, { "epoch": 0.9162431388493596, "grad_norm": 0.1522645801305771, "learning_rate": 0.00010845113393674362, "loss": 1.1328, "step": 4507 }, { "epoch": 0.916446432201667, "grad_norm": 0.13502533733844757, "learning_rate": 0.00010843079426421235, "loss": 1.0838, "step": 4508 }, { "epoch": 0.9166497255539744, "grad_norm": 0.1440073549747467, "learning_rate": 0.00010841045459168108, "loss": 1.1632, "step": 4509 }, { "epoch": 0.9168530189062818, "grad_norm": 0.1380605548620224, "learning_rate": 0.00010839011491914981, "loss": 1.0128, "step": 4510 }, { "epoch": 0.9170563122585892, "grad_norm": 0.14944829046726227, "learning_rate": 0.00010836977524661853, "loss": 1.2041, "step": 4511 }, { "epoch": 0.9172596056108965, "grad_norm": 0.13469955325126648, "learning_rate": 0.00010834943557408726, "loss": 1.1208, "step": 4512 }, { "epoch": 0.9174628989632039, "grad_norm": 0.1321646124124527, "learning_rate": 0.000108329095901556, "loss": 0.9967, "step": 4513 }, { "epoch": 0.9176661923155113, "grad_norm": 0.1304931789636612, "learning_rate": 0.00010830875622902472, "loss": 1.0428, "step": 4514 }, { "epoch": 0.9178694856678187, "grad_norm": 0.12599384784698486, "learning_rate": 0.00010828841655649345, "loss": 1.0719, "step": 4515 }, { "epoch": 0.9180727790201261, "grad_norm": 0.12788186967372894, "learning_rate": 0.00010826807688396217, "loss": 1.0551, "step": 4516 }, { "epoch": 0.9182760723724335, "grad_norm": 0.16241435706615448, "learning_rate": 0.00010824773721143091, "loss": 1.3277, "step": 4517 }, { "epoch": 0.9184793657247408, "grad_norm": 0.12297213822603226, "learning_rate": 0.00010822739753889963, "loss": 0.9117, "step": 4518 }, { "epoch": 0.9186826590770482, "grad_norm": 0.13010992109775543, "learning_rate": 0.00010820705786636836, "loss": 0.9313, "step": 4519 }, { "epoch": 0.9188859524293556, "grad_norm": 0.13779647648334503, "learning_rate": 0.00010818671819383708, "loss": 1.1851, "step": 4520 }, { "epoch": 0.919089245781663, "grad_norm": 0.15298517048358917, "learning_rate": 0.00010816637852130582, "loss": 1.2739, "step": 4521 }, { "epoch": 0.9192925391339704, "grad_norm": 0.1386537402868271, "learning_rate": 0.00010814603884877454, "loss": 1.1061, "step": 4522 }, { "epoch": 0.9194958324862778, "grad_norm": 0.14241141080856323, "learning_rate": 0.00010812569917624327, "loss": 1.1449, "step": 4523 }, { "epoch": 0.919699125838585, "grad_norm": 0.14428827166557312, "learning_rate": 0.00010810535950371199, "loss": 0.9884, "step": 4524 }, { "epoch": 0.9199024191908924, "grad_norm": 0.15264667570590973, "learning_rate": 0.00010808501983118073, "loss": 1.2018, "step": 4525 }, { "epoch": 0.9201057125431998, "grad_norm": 0.14881928265094757, "learning_rate": 0.00010806468015864945, "loss": 1.0599, "step": 4526 }, { "epoch": 0.9203090058955072, "grad_norm": 0.12393801659345627, "learning_rate": 0.00010804434048611818, "loss": 1.0684, "step": 4527 }, { "epoch": 0.9205122992478146, "grad_norm": 0.1288781762123108, "learning_rate": 0.0001080240008135869, "loss": 0.9667, "step": 4528 }, { "epoch": 0.9207155926001219, "grad_norm": 0.12993919849395752, "learning_rate": 0.00010800366114105563, "loss": 0.985, "step": 4529 }, { "epoch": 0.9209188859524293, "grad_norm": 0.14005163311958313, "learning_rate": 0.00010798332146852437, "loss": 1.0121, "step": 4530 }, { "epoch": 0.9211221793047367, "grad_norm": 0.1298326551914215, "learning_rate": 0.00010796298179599309, "loss": 1.0357, "step": 4531 }, { "epoch": 0.9213254726570441, "grad_norm": 0.1444677710533142, "learning_rate": 0.00010794264212346182, "loss": 1.0837, "step": 4532 }, { "epoch": 0.9215287660093515, "grad_norm": 0.1372900754213333, "learning_rate": 0.00010792230245093054, "loss": 1.1198, "step": 4533 }, { "epoch": 0.9217320593616589, "grad_norm": 0.13712218403816223, "learning_rate": 0.00010790196277839928, "loss": 1.1203, "step": 4534 }, { "epoch": 0.9219353527139662, "grad_norm": 0.13176938891410828, "learning_rate": 0.000107881623105868, "loss": 0.9814, "step": 4535 }, { "epoch": 0.9221386460662736, "grad_norm": 0.14285510778427124, "learning_rate": 0.00010786128343333673, "loss": 1.0725, "step": 4536 }, { "epoch": 0.922341939418581, "grad_norm": 0.14509692788124084, "learning_rate": 0.00010784094376080545, "loss": 1.2281, "step": 4537 }, { "epoch": 0.9225452327708884, "grad_norm": 0.12854382395744324, "learning_rate": 0.00010782060408827419, "loss": 0.95, "step": 4538 }, { "epoch": 0.9227485261231958, "grad_norm": 0.13784833252429962, "learning_rate": 0.00010780026441574291, "loss": 1.0091, "step": 4539 }, { "epoch": 0.9229518194755032, "grad_norm": 0.12507863342761993, "learning_rate": 0.00010777992474321164, "loss": 0.9746, "step": 4540 }, { "epoch": 0.9231551128278105, "grad_norm": 0.14005503058433533, "learning_rate": 0.00010775958507068036, "loss": 0.9599, "step": 4541 }, { "epoch": 0.9233584061801179, "grad_norm": 0.15629933774471283, "learning_rate": 0.0001077392453981491, "loss": 1.1292, "step": 4542 }, { "epoch": 0.9235616995324253, "grad_norm": 0.12826746702194214, "learning_rate": 0.00010771890572561782, "loss": 1.0791, "step": 4543 }, { "epoch": 0.9237649928847327, "grad_norm": 0.1537964642047882, "learning_rate": 0.00010769856605308655, "loss": 1.2648, "step": 4544 }, { "epoch": 0.9239682862370401, "grad_norm": 0.13459934294223785, "learning_rate": 0.00010767822638055527, "loss": 1.1297, "step": 4545 }, { "epoch": 0.9241715795893475, "grad_norm": 0.1457410752773285, "learning_rate": 0.00010765788670802401, "loss": 1.0294, "step": 4546 }, { "epoch": 0.9243748729416548, "grad_norm": 0.12394455820322037, "learning_rate": 0.00010763754703549274, "loss": 0.9905, "step": 4547 }, { "epoch": 0.9245781662939622, "grad_norm": 0.14204509556293488, "learning_rate": 0.00010761720736296146, "loss": 1.1691, "step": 4548 }, { "epoch": 0.9247814596462696, "grad_norm": 0.1345042586326599, "learning_rate": 0.00010759686769043019, "loss": 1.0468, "step": 4549 }, { "epoch": 0.924984752998577, "grad_norm": 0.13902144134044647, "learning_rate": 0.00010757652801789892, "loss": 1.0194, "step": 4550 }, { "epoch": 0.9251880463508844, "grad_norm": 0.1317700892686844, "learning_rate": 0.00010755618834536765, "loss": 0.923, "step": 4551 }, { "epoch": 0.9253913397031917, "grad_norm": 0.15080450475215912, "learning_rate": 0.00010753584867283637, "loss": 1.2244, "step": 4552 }, { "epoch": 0.925594633055499, "grad_norm": 0.14415398240089417, "learning_rate": 0.0001075155090003051, "loss": 1.0809, "step": 4553 }, { "epoch": 0.9257979264078064, "grad_norm": 0.12147921323776245, "learning_rate": 0.00010749516932777383, "loss": 0.8445, "step": 4554 }, { "epoch": 0.9260012197601138, "grad_norm": 0.1352618932723999, "learning_rate": 0.00010747482965524256, "loss": 1.0397, "step": 4555 }, { "epoch": 0.9262045131124212, "grad_norm": 0.1354973316192627, "learning_rate": 0.00010745448998271128, "loss": 1.0103, "step": 4556 }, { "epoch": 0.9264078064647286, "grad_norm": 0.13657426834106445, "learning_rate": 0.00010743415031018001, "loss": 0.9979, "step": 4557 }, { "epoch": 0.9266110998170359, "grad_norm": 0.13294103741645813, "learning_rate": 0.00010741381063764875, "loss": 0.9317, "step": 4558 }, { "epoch": 0.9268143931693433, "grad_norm": 0.14303997159004211, "learning_rate": 0.00010739347096511747, "loss": 1.1488, "step": 4559 }, { "epoch": 0.9270176865216507, "grad_norm": 0.12142444401979446, "learning_rate": 0.0001073731312925862, "loss": 0.9833, "step": 4560 }, { "epoch": 0.9272209798739581, "grad_norm": 0.1350148767232895, "learning_rate": 0.00010735279162005492, "loss": 0.9737, "step": 4561 }, { "epoch": 0.9274242732262655, "grad_norm": 0.15613560378551483, "learning_rate": 0.00010733245194752366, "loss": 1.1749, "step": 4562 }, { "epoch": 0.9276275665785729, "grad_norm": 0.13186268508434296, "learning_rate": 0.00010731211227499238, "loss": 1.0103, "step": 4563 }, { "epoch": 0.9278308599308802, "grad_norm": 0.14699916541576385, "learning_rate": 0.0001072917726024611, "loss": 1.1501, "step": 4564 }, { "epoch": 0.9280341532831876, "grad_norm": 0.13133716583251953, "learning_rate": 0.00010727143292992983, "loss": 1.026, "step": 4565 }, { "epoch": 0.928237446635495, "grad_norm": 0.1365920603275299, "learning_rate": 0.00010725109325739857, "loss": 1.0866, "step": 4566 }, { "epoch": 0.9284407399878024, "grad_norm": 0.12985709309577942, "learning_rate": 0.0001072307535848673, "loss": 1.1098, "step": 4567 }, { "epoch": 0.9286440333401098, "grad_norm": 0.14012043178081512, "learning_rate": 0.00010721041391233602, "loss": 0.9724, "step": 4568 }, { "epoch": 0.9288473266924172, "grad_norm": 0.12195601314306259, "learning_rate": 0.00010719007423980474, "loss": 0.9906, "step": 4569 }, { "epoch": 0.9290506200447245, "grad_norm": 0.12102338671684265, "learning_rate": 0.00010716973456727347, "loss": 0.8993, "step": 4570 }, { "epoch": 0.9292539133970319, "grad_norm": 0.16343897581100464, "learning_rate": 0.0001071493948947422, "loss": 1.3669, "step": 4571 }, { "epoch": 0.9294572067493393, "grad_norm": 0.12324689328670502, "learning_rate": 0.00010712905522221093, "loss": 1.018, "step": 4572 }, { "epoch": 0.9296605001016467, "grad_norm": 0.14391222596168518, "learning_rate": 0.00010710871554967965, "loss": 1.0502, "step": 4573 }, { "epoch": 0.9298637934539541, "grad_norm": 0.13690593838691711, "learning_rate": 0.00010708837587714838, "loss": 1.0015, "step": 4574 }, { "epoch": 0.9300670868062615, "grad_norm": 0.11955592036247253, "learning_rate": 0.00010706803620461712, "loss": 0.8207, "step": 4575 }, { "epoch": 0.9302703801585688, "grad_norm": 0.12728698551654816, "learning_rate": 0.00010704769653208584, "loss": 0.8813, "step": 4576 }, { "epoch": 0.9304736735108762, "grad_norm": 0.14534975588321686, "learning_rate": 0.00010702735685955457, "loss": 1.0923, "step": 4577 }, { "epoch": 0.9306769668631836, "grad_norm": 0.12908664345741272, "learning_rate": 0.00010700701718702329, "loss": 1.1333, "step": 4578 }, { "epoch": 0.930880260215491, "grad_norm": 0.14262458682060242, "learning_rate": 0.00010698667751449203, "loss": 1.0474, "step": 4579 }, { "epoch": 0.9310835535677984, "grad_norm": 0.13423089683055878, "learning_rate": 0.00010696633784196075, "loss": 1.0581, "step": 4580 }, { "epoch": 0.9312868469201057, "grad_norm": 0.1267002373933792, "learning_rate": 0.00010694599816942948, "loss": 0.958, "step": 4581 }, { "epoch": 0.9314901402724131, "grad_norm": 0.13516265153884888, "learning_rate": 0.0001069256584968982, "loss": 1.0636, "step": 4582 }, { "epoch": 0.9316934336247205, "grad_norm": 0.14232146739959717, "learning_rate": 0.00010690531882436694, "loss": 1.1371, "step": 4583 }, { "epoch": 0.9318967269770279, "grad_norm": 0.13286015391349792, "learning_rate": 0.00010688497915183566, "loss": 0.9935, "step": 4584 }, { "epoch": 0.9321000203293353, "grad_norm": 0.1338234841823578, "learning_rate": 0.00010686463947930439, "loss": 1.0371, "step": 4585 }, { "epoch": 0.9323033136816427, "grad_norm": 0.13574783504009247, "learning_rate": 0.00010684429980677311, "loss": 1.0583, "step": 4586 }, { "epoch": 0.9325066070339499, "grad_norm": 0.1322636902332306, "learning_rate": 0.00010682396013424185, "loss": 0.9872, "step": 4587 }, { "epoch": 0.9327099003862573, "grad_norm": 0.13177639245986938, "learning_rate": 0.00010680362046171057, "loss": 0.9844, "step": 4588 }, { "epoch": 0.9329131937385647, "grad_norm": 0.13709305226802826, "learning_rate": 0.0001067832807891793, "loss": 1.0352, "step": 4589 }, { "epoch": 0.9331164870908721, "grad_norm": 0.13158872723579407, "learning_rate": 0.00010676294111664802, "loss": 0.9291, "step": 4590 }, { "epoch": 0.9333197804431795, "grad_norm": 0.1440209448337555, "learning_rate": 0.00010674260144411676, "loss": 1.1299, "step": 4591 }, { "epoch": 0.9335230737954869, "grad_norm": 0.14185591042041779, "learning_rate": 0.00010672226177158549, "loss": 1.0265, "step": 4592 }, { "epoch": 0.9337263671477942, "grad_norm": 0.13720087707042694, "learning_rate": 0.00010670192209905421, "loss": 1.065, "step": 4593 }, { "epoch": 0.9339296605001016, "grad_norm": 0.1312158852815628, "learning_rate": 0.00010668158242652294, "loss": 0.97, "step": 4594 }, { "epoch": 0.934132953852409, "grad_norm": 0.13442127406597137, "learning_rate": 0.00010666124275399167, "loss": 1.0517, "step": 4595 }, { "epoch": 0.9343362472047164, "grad_norm": 0.1302952766418457, "learning_rate": 0.0001066409030814604, "loss": 1.0052, "step": 4596 }, { "epoch": 0.9345395405570238, "grad_norm": 0.14878568053245544, "learning_rate": 0.00010662056340892912, "loss": 1.2183, "step": 4597 }, { "epoch": 0.9347428339093312, "grad_norm": 0.13958996534347534, "learning_rate": 0.00010660022373639785, "loss": 1.0758, "step": 4598 }, { "epoch": 0.9349461272616385, "grad_norm": 0.14994315803050995, "learning_rate": 0.00010657988406386658, "loss": 1.1696, "step": 4599 }, { "epoch": 0.9351494206139459, "grad_norm": 0.13476385176181793, "learning_rate": 0.00010655954439133531, "loss": 0.9507, "step": 4600 }, { "epoch": 0.9353527139662533, "grad_norm": 0.13115908205509186, "learning_rate": 0.00010653920471880403, "loss": 1.1128, "step": 4601 }, { "epoch": 0.9355560073185607, "grad_norm": 0.12260119616985321, "learning_rate": 0.00010651886504627276, "loss": 0.8933, "step": 4602 }, { "epoch": 0.9357593006708681, "grad_norm": 0.12978796660900116, "learning_rate": 0.0001064985253737415, "loss": 1.0094, "step": 4603 }, { "epoch": 0.9359625940231754, "grad_norm": 0.13168974220752716, "learning_rate": 0.00010647818570121022, "loss": 1.0007, "step": 4604 }, { "epoch": 0.9361658873754828, "grad_norm": 0.13790659606456757, "learning_rate": 0.00010645784602867894, "loss": 0.9609, "step": 4605 }, { "epoch": 0.9363691807277902, "grad_norm": 0.13622581958770752, "learning_rate": 0.00010643750635614767, "loss": 0.9742, "step": 4606 }, { "epoch": 0.9365724740800976, "grad_norm": 0.13826538622379303, "learning_rate": 0.00010641716668361641, "loss": 1.1061, "step": 4607 }, { "epoch": 0.936775767432405, "grad_norm": 0.13676097989082336, "learning_rate": 0.00010639682701108513, "loss": 1.1522, "step": 4608 }, { "epoch": 0.9369790607847124, "grad_norm": 0.13370144367218018, "learning_rate": 0.00010637648733855386, "loss": 0.9657, "step": 4609 }, { "epoch": 0.9371823541370197, "grad_norm": 0.12708503007888794, "learning_rate": 0.00010635614766602258, "loss": 1.0229, "step": 4610 }, { "epoch": 0.9373856474893271, "grad_norm": 0.14301814138889313, "learning_rate": 0.00010633580799349132, "loss": 0.9909, "step": 4611 }, { "epoch": 0.9375889408416345, "grad_norm": 0.14644454419612885, "learning_rate": 0.00010631546832096004, "loss": 1.1386, "step": 4612 }, { "epoch": 0.9377922341939419, "grad_norm": 0.13054661452770233, "learning_rate": 0.00010629512864842877, "loss": 0.9233, "step": 4613 }, { "epoch": 0.9379955275462493, "grad_norm": 0.13898830115795135, "learning_rate": 0.00010627478897589749, "loss": 1.2265, "step": 4614 }, { "epoch": 0.9381988208985567, "grad_norm": 0.13503706455230713, "learning_rate": 0.00010625444930336622, "loss": 1.0694, "step": 4615 }, { "epoch": 0.938402114250864, "grad_norm": 0.12382601946592331, "learning_rate": 0.00010623410963083495, "loss": 0.8756, "step": 4616 }, { "epoch": 0.9386054076031713, "grad_norm": 0.12934790551662445, "learning_rate": 0.00010621376995830368, "loss": 1.0435, "step": 4617 }, { "epoch": 0.9388087009554787, "grad_norm": 0.14618442952632904, "learning_rate": 0.0001061934302857724, "loss": 1.209, "step": 4618 }, { "epoch": 0.9390119943077861, "grad_norm": 0.1417202651500702, "learning_rate": 0.00010617309061324113, "loss": 1.1805, "step": 4619 }, { "epoch": 0.9392152876600935, "grad_norm": 0.14158600568771362, "learning_rate": 0.00010615275094070987, "loss": 1.0833, "step": 4620 }, { "epoch": 0.9394185810124009, "grad_norm": 0.13389776647090912, "learning_rate": 0.00010613241126817859, "loss": 1.1245, "step": 4621 }, { "epoch": 0.9396218743647082, "grad_norm": 0.1260322481393814, "learning_rate": 0.00010611207159564731, "loss": 0.8908, "step": 4622 }, { "epoch": 0.9398251677170156, "grad_norm": 0.1375802904367447, "learning_rate": 0.00010609173192311604, "loss": 1.0062, "step": 4623 }, { "epoch": 0.940028461069323, "grad_norm": 0.13388384878635406, "learning_rate": 0.00010607139225058478, "loss": 1.082, "step": 4624 }, { "epoch": 0.9402317544216304, "grad_norm": 0.13197797536849976, "learning_rate": 0.0001060510525780535, "loss": 0.9478, "step": 4625 }, { "epoch": 0.9404350477739378, "grad_norm": 0.1293218582868576, "learning_rate": 0.00010603071290552223, "loss": 0.9416, "step": 4626 }, { "epoch": 0.9406383411262452, "grad_norm": 0.1269448846578598, "learning_rate": 0.00010601037323299095, "loss": 0.9556, "step": 4627 }, { "epoch": 0.9408416344785525, "grad_norm": 0.15124647319316864, "learning_rate": 0.00010599003356045969, "loss": 1.1097, "step": 4628 }, { "epoch": 0.9410449278308599, "grad_norm": 0.12264547497034073, "learning_rate": 0.00010596969388792841, "loss": 1.0336, "step": 4629 }, { "epoch": 0.9412482211831673, "grad_norm": 0.13190335035324097, "learning_rate": 0.00010594935421539714, "loss": 1.0159, "step": 4630 }, { "epoch": 0.9414515145354747, "grad_norm": 0.13107061386108398, "learning_rate": 0.00010592901454286586, "loss": 1.0756, "step": 4631 }, { "epoch": 0.9416548078877821, "grad_norm": 0.13843277096748352, "learning_rate": 0.0001059086748703346, "loss": 0.9752, "step": 4632 }, { "epoch": 0.9418581012400894, "grad_norm": 0.12323298305273056, "learning_rate": 0.00010588833519780332, "loss": 0.8268, "step": 4633 }, { "epoch": 0.9420613945923968, "grad_norm": 0.136516734957695, "learning_rate": 0.00010586799552527205, "loss": 1.0694, "step": 4634 }, { "epoch": 0.9422646879447042, "grad_norm": 0.13739456236362457, "learning_rate": 0.00010584765585274077, "loss": 1.0251, "step": 4635 }, { "epoch": 0.9424679812970116, "grad_norm": 0.13358846306800842, "learning_rate": 0.00010582731618020951, "loss": 1.1104, "step": 4636 }, { "epoch": 0.942671274649319, "grad_norm": 0.13964349031448364, "learning_rate": 0.00010580697650767824, "loss": 0.987, "step": 4637 }, { "epoch": 0.9428745680016264, "grad_norm": 0.1372976303100586, "learning_rate": 0.00010578663683514696, "loss": 0.9547, "step": 4638 }, { "epoch": 0.9430778613539337, "grad_norm": 0.14359022676944733, "learning_rate": 0.00010576629716261568, "loss": 1.1619, "step": 4639 }, { "epoch": 0.9432811547062411, "grad_norm": 0.12636056542396545, "learning_rate": 0.00010574595749008442, "loss": 0.9485, "step": 4640 }, { "epoch": 0.9434844480585485, "grad_norm": 0.15746049582958221, "learning_rate": 0.00010572561781755315, "loss": 1.2212, "step": 4641 }, { "epoch": 0.9436877414108559, "grad_norm": 0.13888253271579742, "learning_rate": 0.00010570527814502187, "loss": 1.1069, "step": 4642 }, { "epoch": 0.9438910347631633, "grad_norm": 0.12905919551849365, "learning_rate": 0.0001056849384724906, "loss": 1.0055, "step": 4643 }, { "epoch": 0.9440943281154707, "grad_norm": 0.14576807618141174, "learning_rate": 0.00010566459879995933, "loss": 1.1185, "step": 4644 }, { "epoch": 0.944297621467778, "grad_norm": 0.15471163392066956, "learning_rate": 0.00010564425912742806, "loss": 1.2705, "step": 4645 }, { "epoch": 0.9445009148200854, "grad_norm": 0.1389993131160736, "learning_rate": 0.00010562391945489678, "loss": 1.0862, "step": 4646 }, { "epoch": 0.9447042081723928, "grad_norm": 0.1482502818107605, "learning_rate": 0.00010560357978236551, "loss": 1.1031, "step": 4647 }, { "epoch": 0.9449075015247002, "grad_norm": 0.12542898952960968, "learning_rate": 0.00010558324010983425, "loss": 0.8559, "step": 4648 }, { "epoch": 0.9451107948770076, "grad_norm": 0.12403812259435654, "learning_rate": 0.00010556290043730297, "loss": 0.9117, "step": 4649 }, { "epoch": 0.945314088229315, "grad_norm": 0.11655326187610626, "learning_rate": 0.0001055425607647717, "loss": 0.9568, "step": 4650 }, { "epoch": 0.9455173815816222, "grad_norm": 0.13780179619789124, "learning_rate": 0.00010552222109224042, "loss": 1.0537, "step": 4651 }, { "epoch": 0.9457206749339296, "grad_norm": 0.13035158812999725, "learning_rate": 0.00010550188141970916, "loss": 1.0461, "step": 4652 }, { "epoch": 0.945923968286237, "grad_norm": 0.13049277663230896, "learning_rate": 0.00010548154174717788, "loss": 0.8681, "step": 4653 }, { "epoch": 0.9461272616385444, "grad_norm": 0.149881973862648, "learning_rate": 0.0001054612020746466, "loss": 1.2271, "step": 4654 }, { "epoch": 0.9463305549908518, "grad_norm": 0.11799302697181702, "learning_rate": 0.00010544086240211533, "loss": 0.8407, "step": 4655 }, { "epoch": 0.9465338483431591, "grad_norm": 0.16021724045276642, "learning_rate": 0.00010542052272958404, "loss": 1.2769, "step": 4656 }, { "epoch": 0.9467371416954665, "grad_norm": 0.14058107137680054, "learning_rate": 0.00010540018305705279, "loss": 1.0519, "step": 4657 }, { "epoch": 0.9469404350477739, "grad_norm": 0.14473353326320648, "learning_rate": 0.00010537984338452152, "loss": 1.0002, "step": 4658 }, { "epoch": 0.9471437284000813, "grad_norm": 0.12458368390798569, "learning_rate": 0.00010535950371199024, "loss": 0.9717, "step": 4659 }, { "epoch": 0.9473470217523887, "grad_norm": 0.13984310626983643, "learning_rate": 0.00010533916403945897, "loss": 1.0642, "step": 4660 }, { "epoch": 0.9475503151046961, "grad_norm": 0.13739560544490814, "learning_rate": 0.0001053188243669277, "loss": 1.0281, "step": 4661 }, { "epoch": 0.9477536084570034, "grad_norm": 0.1382581740617752, "learning_rate": 0.00010529848469439643, "loss": 1.0816, "step": 4662 }, { "epoch": 0.9479569018093108, "grad_norm": 0.14696218073368073, "learning_rate": 0.00010527814502186515, "loss": 1.2176, "step": 4663 }, { "epoch": 0.9481601951616182, "grad_norm": 0.12849698960781097, "learning_rate": 0.00010525780534933388, "loss": 1.0097, "step": 4664 }, { "epoch": 0.9483634885139256, "grad_norm": 0.14687961339950562, "learning_rate": 0.00010523746567680262, "loss": 1.1417, "step": 4665 }, { "epoch": 0.948566781866233, "grad_norm": 0.14504985511302948, "learning_rate": 0.00010521712600427134, "loss": 1.1261, "step": 4666 }, { "epoch": 0.9487700752185404, "grad_norm": 0.12274103611707687, "learning_rate": 0.00010519678633174006, "loss": 0.9646, "step": 4667 }, { "epoch": 0.9489733685708477, "grad_norm": 0.11958125233650208, "learning_rate": 0.00010517644665920879, "loss": 0.9111, "step": 4668 }, { "epoch": 0.9491766619231551, "grad_norm": 0.14991825819015503, "learning_rate": 0.00010515610698667753, "loss": 1.1415, "step": 4669 }, { "epoch": 0.9493799552754625, "grad_norm": 0.14164093136787415, "learning_rate": 0.00010513576731414625, "loss": 1.2477, "step": 4670 }, { "epoch": 0.9495832486277699, "grad_norm": 0.13947711884975433, "learning_rate": 0.00010511542764161498, "loss": 1.0371, "step": 4671 }, { "epoch": 0.9497865419800773, "grad_norm": 0.12946373224258423, "learning_rate": 0.0001050950879690837, "loss": 1.0401, "step": 4672 }, { "epoch": 0.9499898353323847, "grad_norm": 0.1340869963169098, "learning_rate": 0.00010507474829655244, "loss": 1.0419, "step": 4673 }, { "epoch": 0.950193128684692, "grad_norm": 0.1419667899608612, "learning_rate": 0.00010505440862402116, "loss": 1.0027, "step": 4674 }, { "epoch": 0.9503964220369994, "grad_norm": 0.13332538306713104, "learning_rate": 0.00010503406895148989, "loss": 0.8884, "step": 4675 }, { "epoch": 0.9505997153893068, "grad_norm": 0.13271865248680115, "learning_rate": 0.00010501372927895861, "loss": 1.005, "step": 4676 }, { "epoch": 0.9508030087416142, "grad_norm": 0.15168990194797516, "learning_rate": 0.00010499338960642735, "loss": 1.168, "step": 4677 }, { "epoch": 0.9510063020939216, "grad_norm": 0.12381100654602051, "learning_rate": 0.00010497304993389607, "loss": 1.0467, "step": 4678 }, { "epoch": 0.951209595446229, "grad_norm": 0.14847104251384735, "learning_rate": 0.0001049527102613648, "loss": 1.1576, "step": 4679 }, { "epoch": 0.9514128887985362, "grad_norm": 0.1450118124485016, "learning_rate": 0.00010493237058883352, "loss": 1.0424, "step": 4680 }, { "epoch": 0.9516161821508436, "grad_norm": 0.13652002811431885, "learning_rate": 0.00010491203091630226, "loss": 1.0449, "step": 4681 }, { "epoch": 0.951819475503151, "grad_norm": 0.14836739003658295, "learning_rate": 0.00010489169124377099, "loss": 1.1076, "step": 4682 }, { "epoch": 0.9520227688554584, "grad_norm": 0.12465627491474152, "learning_rate": 0.00010487135157123971, "loss": 1.0594, "step": 4683 }, { "epoch": 0.9522260622077658, "grad_norm": 0.14319440722465515, "learning_rate": 0.00010485101189870843, "loss": 1.0954, "step": 4684 }, { "epoch": 0.9524293555600731, "grad_norm": 0.1305132359266281, "learning_rate": 0.00010483067222617717, "loss": 1.126, "step": 4685 }, { "epoch": 0.9526326489123805, "grad_norm": 0.14411622285842896, "learning_rate": 0.0001048103325536459, "loss": 1.069, "step": 4686 }, { "epoch": 0.9528359422646879, "grad_norm": 0.1547628790140152, "learning_rate": 0.00010478999288111462, "loss": 1.185, "step": 4687 }, { "epoch": 0.9530392356169953, "grad_norm": 0.1339641660451889, "learning_rate": 0.00010476965320858335, "loss": 1.1261, "step": 4688 }, { "epoch": 0.9532425289693027, "grad_norm": 0.15180015563964844, "learning_rate": 0.00010474931353605208, "loss": 1.2143, "step": 4689 }, { "epoch": 0.9534458223216101, "grad_norm": 0.13662739098072052, "learning_rate": 0.00010472897386352081, "loss": 1.1649, "step": 4690 }, { "epoch": 0.9536491156739174, "grad_norm": 0.14575301110744476, "learning_rate": 0.00010470863419098953, "loss": 1.0256, "step": 4691 }, { "epoch": 0.9538524090262248, "grad_norm": 0.13986723124980927, "learning_rate": 0.00010468829451845826, "loss": 1.1673, "step": 4692 }, { "epoch": 0.9540557023785322, "grad_norm": 0.14431442320346832, "learning_rate": 0.000104667954845927, "loss": 1.1316, "step": 4693 }, { "epoch": 0.9542589957308396, "grad_norm": 0.13795843720436096, "learning_rate": 0.00010464761517339572, "loss": 1.0331, "step": 4694 }, { "epoch": 0.954462289083147, "grad_norm": 0.1303069293498993, "learning_rate": 0.00010462727550086444, "loss": 0.9968, "step": 4695 }, { "epoch": 0.9546655824354544, "grad_norm": 0.15158216655254364, "learning_rate": 0.00010460693582833317, "loss": 1.1864, "step": 4696 }, { "epoch": 0.9548688757877617, "grad_norm": 0.1332157850265503, "learning_rate": 0.00010458659615580188, "loss": 1.0648, "step": 4697 }, { "epoch": 0.9550721691400691, "grad_norm": 0.14668650925159454, "learning_rate": 0.00010456625648327063, "loss": 1.0517, "step": 4698 }, { "epoch": 0.9552754624923765, "grad_norm": 0.15106475353240967, "learning_rate": 0.00010454591681073936, "loss": 1.2643, "step": 4699 }, { "epoch": 0.9554787558446839, "grad_norm": 0.1326945424079895, "learning_rate": 0.00010452557713820808, "loss": 1.0154, "step": 4700 }, { "epoch": 0.9556820491969913, "grad_norm": 0.14156071841716766, "learning_rate": 0.00010450523746567679, "loss": 0.988, "step": 4701 }, { "epoch": 0.9558853425492987, "grad_norm": 0.13619528710842133, "learning_rate": 0.00010448489779314554, "loss": 0.9761, "step": 4702 }, { "epoch": 0.956088635901606, "grad_norm": 0.1572863608598709, "learning_rate": 0.00010446455812061427, "loss": 1.2895, "step": 4703 }, { "epoch": 0.9562919292539134, "grad_norm": 0.13423630595207214, "learning_rate": 0.00010444421844808299, "loss": 0.9817, "step": 4704 }, { "epoch": 0.9564952226062208, "grad_norm": 0.13150696456432343, "learning_rate": 0.0001044238787755517, "loss": 0.9353, "step": 4705 }, { "epoch": 0.9566985159585282, "grad_norm": 0.13118426501750946, "learning_rate": 0.00010440353910302045, "loss": 0.956, "step": 4706 }, { "epoch": 0.9569018093108356, "grad_norm": 0.1445060670375824, "learning_rate": 0.00010438319943048918, "loss": 1.1343, "step": 4707 }, { "epoch": 0.957105102663143, "grad_norm": 0.12421584874391556, "learning_rate": 0.0001043628597579579, "loss": 0.9257, "step": 4708 }, { "epoch": 0.9573083960154503, "grad_norm": 0.1518603265285492, "learning_rate": 0.00010434252008542661, "loss": 1.0934, "step": 4709 }, { "epoch": 0.9575116893677577, "grad_norm": 0.13642071187496185, "learning_rate": 0.00010432218041289537, "loss": 1.1008, "step": 4710 }, { "epoch": 0.9577149827200651, "grad_norm": 0.13501964509487152, "learning_rate": 0.00010430184074036409, "loss": 1.0246, "step": 4711 }, { "epoch": 0.9579182760723725, "grad_norm": 0.14059419929981232, "learning_rate": 0.00010428150106783281, "loss": 1.0548, "step": 4712 }, { "epoch": 0.9581215694246799, "grad_norm": 0.13295401632785797, "learning_rate": 0.00010426116139530153, "loss": 1.0868, "step": 4713 }, { "epoch": 0.9583248627769871, "grad_norm": 0.1419042944908142, "learning_rate": 0.00010424082172277028, "loss": 1.0969, "step": 4714 }, { "epoch": 0.9585281561292945, "grad_norm": 0.13607093691825867, "learning_rate": 0.000104220482050239, "loss": 1.1988, "step": 4715 }, { "epoch": 0.9587314494816019, "grad_norm": 0.15295760333538055, "learning_rate": 0.00010420014237770773, "loss": 1.1384, "step": 4716 }, { "epoch": 0.9589347428339093, "grad_norm": 0.13830776512622833, "learning_rate": 0.00010417980270517644, "loss": 1.1107, "step": 4717 }, { "epoch": 0.9591380361862167, "grad_norm": 0.15392519533634186, "learning_rate": 0.00010415946303264519, "loss": 1.3216, "step": 4718 }, { "epoch": 0.9593413295385241, "grad_norm": 0.1344476342201233, "learning_rate": 0.00010413912336011391, "loss": 1.1368, "step": 4719 }, { "epoch": 0.9595446228908314, "grad_norm": 0.142112597823143, "learning_rate": 0.00010411878368758264, "loss": 0.997, "step": 4720 }, { "epoch": 0.9597479162431388, "grad_norm": 0.12999044358730316, "learning_rate": 0.00010409844401505136, "loss": 0.978, "step": 4721 }, { "epoch": 0.9599512095954462, "grad_norm": 0.13146638870239258, "learning_rate": 0.0001040781043425201, "loss": 1.0563, "step": 4722 }, { "epoch": 0.9601545029477536, "grad_norm": 0.15361693501472473, "learning_rate": 0.00010405776466998882, "loss": 1.1783, "step": 4723 }, { "epoch": 0.960357796300061, "grad_norm": 0.13315477967262268, "learning_rate": 0.00010403742499745755, "loss": 0.97, "step": 4724 }, { "epoch": 0.9605610896523684, "grad_norm": 0.13661111891269684, "learning_rate": 0.00010401708532492627, "loss": 1.06, "step": 4725 }, { "epoch": 0.9607643830046757, "grad_norm": 0.13284547626972198, "learning_rate": 0.00010399674565239501, "loss": 1.0313, "step": 4726 }, { "epoch": 0.9609676763569831, "grad_norm": 0.13400302827358246, "learning_rate": 0.00010397640597986374, "loss": 1.0858, "step": 4727 }, { "epoch": 0.9611709697092905, "grad_norm": 0.12329299002885818, "learning_rate": 0.00010395606630733246, "loss": 0.8973, "step": 4728 }, { "epoch": 0.9613742630615979, "grad_norm": 0.14118091762065887, "learning_rate": 0.00010393572663480118, "loss": 1.0407, "step": 4729 }, { "epoch": 0.9615775564139053, "grad_norm": 0.13104970753192902, "learning_rate": 0.00010391538696226992, "loss": 0.9438, "step": 4730 }, { "epoch": 0.9617808497662127, "grad_norm": 0.12976235151290894, "learning_rate": 0.00010389504728973865, "loss": 0.989, "step": 4731 }, { "epoch": 0.96198414311852, "grad_norm": 0.12546932697296143, "learning_rate": 0.00010387470761720737, "loss": 0.9445, "step": 4732 }, { "epoch": 0.9621874364708274, "grad_norm": 0.14191336929798126, "learning_rate": 0.0001038543679446761, "loss": 1.132, "step": 4733 }, { "epoch": 0.9623907298231348, "grad_norm": 0.14218741655349731, "learning_rate": 0.00010383402827214483, "loss": 1.046, "step": 4734 }, { "epoch": 0.9625940231754422, "grad_norm": 0.15692010521888733, "learning_rate": 0.00010381368859961356, "loss": 1.1352, "step": 4735 }, { "epoch": 0.9627973165277496, "grad_norm": 0.1295771300792694, "learning_rate": 0.00010379334892708228, "loss": 1.0675, "step": 4736 }, { "epoch": 0.9630006098800569, "grad_norm": 0.15568415820598602, "learning_rate": 0.00010377300925455101, "loss": 1.132, "step": 4737 }, { "epoch": 0.9632039032323643, "grad_norm": 0.12996648252010345, "learning_rate": 0.00010375266958201972, "loss": 1.0338, "step": 4738 }, { "epoch": 0.9634071965846717, "grad_norm": 0.14026613533496857, "learning_rate": 0.00010373232990948847, "loss": 1.1937, "step": 4739 }, { "epoch": 0.9636104899369791, "grad_norm": 0.13028547167778015, "learning_rate": 0.0001037119902369572, "loss": 0.8526, "step": 4740 }, { "epoch": 0.9638137832892865, "grad_norm": 0.12742145359516144, "learning_rate": 0.00010369165056442592, "loss": 0.85, "step": 4741 }, { "epoch": 0.9640170766415939, "grad_norm": 0.11644089221954346, "learning_rate": 0.00010367131089189463, "loss": 0.8878, "step": 4742 }, { "epoch": 0.9642203699939011, "grad_norm": 0.1318705528974533, "learning_rate": 0.00010365097121936338, "loss": 1.0346, "step": 4743 }, { "epoch": 0.9644236633462085, "grad_norm": 0.14477600157260895, "learning_rate": 0.0001036306315468321, "loss": 1.1933, "step": 4744 }, { "epoch": 0.964626956698516, "grad_norm": 0.13914555311203003, "learning_rate": 0.00010361029187430083, "loss": 1.1292, "step": 4745 }, { "epoch": 0.9648302500508233, "grad_norm": 0.1304524540901184, "learning_rate": 0.00010358995220176954, "loss": 0.9745, "step": 4746 }, { "epoch": 0.9650335434031307, "grad_norm": 0.1401352435350418, "learning_rate": 0.00010356961252923829, "loss": 1.072, "step": 4747 }, { "epoch": 0.9652368367554381, "grad_norm": 0.1341739445924759, "learning_rate": 0.00010354927285670702, "loss": 0.9751, "step": 4748 }, { "epoch": 0.9654401301077454, "grad_norm": 0.13538521528244019, "learning_rate": 0.00010352893318417574, "loss": 1.0814, "step": 4749 }, { "epoch": 0.9656434234600528, "grad_norm": 0.14047326147556305, "learning_rate": 0.00010350859351164445, "loss": 1.1741, "step": 4750 }, { "epoch": 0.9658467168123602, "grad_norm": 0.13722112774848938, "learning_rate": 0.0001034882538391132, "loss": 0.928, "step": 4751 }, { "epoch": 0.9660500101646676, "grad_norm": 0.12714186310768127, "learning_rate": 0.00010346791416658193, "loss": 0.7725, "step": 4752 }, { "epoch": 0.966253303516975, "grad_norm": 0.12423626333475113, "learning_rate": 0.00010344757449405065, "loss": 0.8907, "step": 4753 }, { "epoch": 0.9664565968692824, "grad_norm": 0.1605733186006546, "learning_rate": 0.00010342723482151936, "loss": 1.2174, "step": 4754 }, { "epoch": 0.9666598902215897, "grad_norm": 0.14010462164878845, "learning_rate": 0.00010340689514898812, "loss": 1.0363, "step": 4755 }, { "epoch": 0.9668631835738971, "grad_norm": 0.13562703132629395, "learning_rate": 0.00010338655547645684, "loss": 0.9911, "step": 4756 }, { "epoch": 0.9670664769262045, "grad_norm": 0.1469573825597763, "learning_rate": 0.00010336621580392556, "loss": 1.0719, "step": 4757 }, { "epoch": 0.9672697702785119, "grad_norm": 0.1374790370464325, "learning_rate": 0.00010334587613139428, "loss": 1.1695, "step": 4758 }, { "epoch": 0.9674730636308193, "grad_norm": 0.12282276153564453, "learning_rate": 0.00010332553645886303, "loss": 0.9288, "step": 4759 }, { "epoch": 0.9676763569831267, "grad_norm": 0.12597915530204773, "learning_rate": 0.00010330519678633175, "loss": 0.9853, "step": 4760 }, { "epoch": 0.967879650335434, "grad_norm": 0.13501757383346558, "learning_rate": 0.00010328485711380048, "loss": 1.0153, "step": 4761 }, { "epoch": 0.9680829436877414, "grad_norm": 0.1333313286304474, "learning_rate": 0.00010326451744126919, "loss": 1.0339, "step": 4762 }, { "epoch": 0.9682862370400488, "grad_norm": 0.13838358223438263, "learning_rate": 0.00010324417776873794, "loss": 0.9584, "step": 4763 }, { "epoch": 0.9684895303923562, "grad_norm": 0.14973820745944977, "learning_rate": 0.00010322383809620666, "loss": 0.9757, "step": 4764 }, { "epoch": 0.9686928237446636, "grad_norm": 0.12162914872169495, "learning_rate": 0.00010320349842367539, "loss": 0.9844, "step": 4765 }, { "epoch": 0.9688961170969709, "grad_norm": 0.1409245729446411, "learning_rate": 0.0001031831587511441, "loss": 1.0499, "step": 4766 }, { "epoch": 0.9690994104492783, "grad_norm": 0.1342407464981079, "learning_rate": 0.00010316281907861285, "loss": 1.0489, "step": 4767 }, { "epoch": 0.9693027038015857, "grad_norm": 0.12758475542068481, "learning_rate": 0.00010314247940608157, "loss": 0.8951, "step": 4768 }, { "epoch": 0.9695059971538931, "grad_norm": 0.13202863931655884, "learning_rate": 0.0001031221397335503, "loss": 1.0063, "step": 4769 }, { "epoch": 0.9697092905062005, "grad_norm": 0.14444518089294434, "learning_rate": 0.00010310180006101901, "loss": 1.1248, "step": 4770 }, { "epoch": 0.9699125838585079, "grad_norm": 0.12190812826156616, "learning_rate": 0.00010308146038848776, "loss": 0.9866, "step": 4771 }, { "epoch": 0.9701158772108152, "grad_norm": 0.1404780000448227, "learning_rate": 0.00010306112071595649, "loss": 1.1731, "step": 4772 }, { "epoch": 0.9703191705631226, "grad_norm": 0.13559852540493011, "learning_rate": 0.00010304078104342521, "loss": 0.8862, "step": 4773 }, { "epoch": 0.97052246391543, "grad_norm": 0.12813040614128113, "learning_rate": 0.00010302044137089392, "loss": 1.0137, "step": 4774 }, { "epoch": 0.9707257572677374, "grad_norm": 0.14801639318466187, "learning_rate": 0.00010300010169836267, "loss": 1.1613, "step": 4775 }, { "epoch": 0.9709290506200448, "grad_norm": 0.13504531979560852, "learning_rate": 0.0001029797620258314, "loss": 1.0006, "step": 4776 }, { "epoch": 0.9711323439723522, "grad_norm": 0.14465901255607605, "learning_rate": 0.00010295942235330012, "loss": 1.2234, "step": 4777 }, { "epoch": 0.9713356373246594, "grad_norm": 0.1441653072834015, "learning_rate": 0.00010293908268076885, "loss": 1.1343, "step": 4778 }, { "epoch": 0.9715389306769668, "grad_norm": 0.14294147491455078, "learning_rate": 0.00010291874300823756, "loss": 1.0931, "step": 4779 }, { "epoch": 0.9717422240292742, "grad_norm": 0.13316182792186737, "learning_rate": 0.00010289840333570631, "loss": 0.9781, "step": 4780 }, { "epoch": 0.9719455173815816, "grad_norm": 0.12570516765117645, "learning_rate": 0.00010287806366317503, "loss": 0.9451, "step": 4781 }, { "epoch": 0.972148810733889, "grad_norm": 0.14120420813560486, "learning_rate": 0.00010285772399064376, "loss": 1.0823, "step": 4782 }, { "epoch": 0.9723521040861964, "grad_norm": 0.12957200407981873, "learning_rate": 0.00010283738431811247, "loss": 0.9029, "step": 4783 }, { "epoch": 0.9725553974385037, "grad_norm": 0.1534145623445511, "learning_rate": 0.00010281704464558122, "loss": 1.0809, "step": 4784 }, { "epoch": 0.9727586907908111, "grad_norm": 0.1441192328929901, "learning_rate": 0.00010279670497304994, "loss": 1.0464, "step": 4785 }, { "epoch": 0.9729619841431185, "grad_norm": 0.14043961465358734, "learning_rate": 0.00010277636530051867, "loss": 1.2524, "step": 4786 }, { "epoch": 0.9731652774954259, "grad_norm": 0.1316906362771988, "learning_rate": 0.00010275602562798738, "loss": 0.9884, "step": 4787 }, { "epoch": 0.9733685708477333, "grad_norm": 0.14289647340774536, "learning_rate": 0.00010273568595545613, "loss": 1.0516, "step": 4788 }, { "epoch": 0.9735718642000406, "grad_norm": 0.14448580145835876, "learning_rate": 0.00010271534628292486, "loss": 1.0681, "step": 4789 }, { "epoch": 0.973775157552348, "grad_norm": 0.1232059895992279, "learning_rate": 0.00010269500661039358, "loss": 0.8642, "step": 4790 }, { "epoch": 0.9739784509046554, "grad_norm": 0.14698442816734314, "learning_rate": 0.00010267466693786229, "loss": 1.3623, "step": 4791 }, { "epoch": 0.9741817442569628, "grad_norm": 0.13110321760177612, "learning_rate": 0.00010265432726533104, "loss": 0.9757, "step": 4792 }, { "epoch": 0.9743850376092702, "grad_norm": 0.14491407573223114, "learning_rate": 0.00010263398759279977, "loss": 1.0556, "step": 4793 }, { "epoch": 0.9745883309615776, "grad_norm": 0.13034255802631378, "learning_rate": 0.00010261364792026849, "loss": 1.0508, "step": 4794 }, { "epoch": 0.9747916243138849, "grad_norm": 0.14367951452732086, "learning_rate": 0.0001025933082477372, "loss": 1.0724, "step": 4795 }, { "epoch": 0.9749949176661923, "grad_norm": 0.14647988975048065, "learning_rate": 0.00010257296857520595, "loss": 1.2706, "step": 4796 }, { "epoch": 0.9751982110184997, "grad_norm": 0.1294867992401123, "learning_rate": 0.00010255262890267468, "loss": 0.8556, "step": 4797 }, { "epoch": 0.9754015043708071, "grad_norm": 0.1383471041917801, "learning_rate": 0.0001025322892301434, "loss": 1.0519, "step": 4798 }, { "epoch": 0.9756047977231145, "grad_norm": 0.12977120280265808, "learning_rate": 0.00010251194955761211, "loss": 0.8823, "step": 4799 }, { "epoch": 0.9758080910754219, "grad_norm": 0.13635462522506714, "learning_rate": 0.00010249160988508087, "loss": 1.0164, "step": 4800 }, { "epoch": 0.9760113844277292, "grad_norm": 0.14203500747680664, "learning_rate": 0.00010247127021254959, "loss": 1.0384, "step": 4801 }, { "epoch": 0.9762146777800366, "grad_norm": 0.12203938513994217, "learning_rate": 0.00010245093054001831, "loss": 0.9222, "step": 4802 }, { "epoch": 0.976417971132344, "grad_norm": 0.16445662081241608, "learning_rate": 0.00010243059086748703, "loss": 1.2655, "step": 4803 }, { "epoch": 0.9766212644846514, "grad_norm": 0.13465256989002228, "learning_rate": 0.00010241025119495578, "loss": 1.0028, "step": 4804 }, { "epoch": 0.9768245578369588, "grad_norm": 0.13303688168525696, "learning_rate": 0.0001023899115224245, "loss": 0.9123, "step": 4805 }, { "epoch": 0.9770278511892662, "grad_norm": 0.15964846312999725, "learning_rate": 0.00010236957184989323, "loss": 1.203, "step": 4806 }, { "epoch": 0.9772311445415734, "grad_norm": 0.13582561910152435, "learning_rate": 0.00010234923217736194, "loss": 1.0838, "step": 4807 }, { "epoch": 0.9774344378938808, "grad_norm": 0.12856504321098328, "learning_rate": 0.00010232889250483069, "loss": 1.075, "step": 4808 }, { "epoch": 0.9776377312461882, "grad_norm": 0.15734675526618958, "learning_rate": 0.00010230855283229941, "loss": 1.0715, "step": 4809 }, { "epoch": 0.9778410245984956, "grad_norm": 0.12550866603851318, "learning_rate": 0.00010228821315976814, "loss": 0.9564, "step": 4810 }, { "epoch": 0.978044317950803, "grad_norm": 0.14596353471279144, "learning_rate": 0.00010226787348723685, "loss": 1.1543, "step": 4811 }, { "epoch": 0.9782476113031104, "grad_norm": 0.13755320012569427, "learning_rate": 0.0001022475338147056, "loss": 1.1465, "step": 4812 }, { "epoch": 0.9784509046554177, "grad_norm": 0.15162555873394012, "learning_rate": 0.00010222719414217432, "loss": 1.1004, "step": 4813 }, { "epoch": 0.9786541980077251, "grad_norm": 0.1351086050271988, "learning_rate": 0.00010220685446964305, "loss": 1.0812, "step": 4814 }, { "epoch": 0.9788574913600325, "grad_norm": 0.14810827374458313, "learning_rate": 0.00010218651479711176, "loss": 1.1604, "step": 4815 }, { "epoch": 0.9790607847123399, "grad_norm": 0.13627979159355164, "learning_rate": 0.00010216617512458051, "loss": 1.0392, "step": 4816 }, { "epoch": 0.9792640780646473, "grad_norm": 0.14900024235248566, "learning_rate": 0.00010214583545204924, "loss": 1.0946, "step": 4817 }, { "epoch": 0.9794673714169546, "grad_norm": 0.14076335728168488, "learning_rate": 0.00010212549577951796, "loss": 1.0511, "step": 4818 }, { "epoch": 0.979670664769262, "grad_norm": 0.12886211276054382, "learning_rate": 0.00010210515610698667, "loss": 1.0244, "step": 4819 }, { "epoch": 0.9798739581215694, "grad_norm": 0.1440308839082718, "learning_rate": 0.0001020848164344554, "loss": 1.1237, "step": 4820 }, { "epoch": 0.9800772514738768, "grad_norm": 0.1480611264705658, "learning_rate": 0.00010206447676192415, "loss": 1.1623, "step": 4821 }, { "epoch": 0.9802805448261842, "grad_norm": 0.1361786127090454, "learning_rate": 0.00010204413708939287, "loss": 1.0342, "step": 4822 }, { "epoch": 0.9804838381784916, "grad_norm": 0.13941383361816406, "learning_rate": 0.00010202379741686158, "loss": 1.2155, "step": 4823 }, { "epoch": 0.9806871315307989, "grad_norm": 0.13382181525230408, "learning_rate": 0.0001020034577443303, "loss": 1.0683, "step": 4824 }, { "epoch": 0.9808904248831063, "grad_norm": 0.14079181849956512, "learning_rate": 0.00010198311807179906, "loss": 1.1914, "step": 4825 }, { "epoch": 0.9810937182354137, "grad_norm": 0.1520659178495407, "learning_rate": 0.00010196277839926778, "loss": 1.0614, "step": 4826 }, { "epoch": 0.9812970115877211, "grad_norm": 0.12844201922416687, "learning_rate": 0.0001019424387267365, "loss": 0.9881, "step": 4827 }, { "epoch": 0.9815003049400285, "grad_norm": 0.12539590895175934, "learning_rate": 0.00010192209905420522, "loss": 1.0109, "step": 4828 }, { "epoch": 0.9817035982923359, "grad_norm": 0.11923157423734665, "learning_rate": 0.00010190175938167397, "loss": 0.8702, "step": 4829 }, { "epoch": 0.9819068916446432, "grad_norm": 0.1370554268360138, "learning_rate": 0.0001018814197091427, "loss": 0.9956, "step": 4830 }, { "epoch": 0.9821101849969506, "grad_norm": 0.14605580270290375, "learning_rate": 0.0001018610800366114, "loss": 1.1405, "step": 4831 }, { "epoch": 0.982313478349258, "grad_norm": 0.13594669103622437, "learning_rate": 0.00010184074036408013, "loss": 1.04, "step": 4832 }, { "epoch": 0.9825167717015654, "grad_norm": 0.13905544579029083, "learning_rate": 0.00010182040069154888, "loss": 1.0421, "step": 4833 }, { "epoch": 0.9827200650538728, "grad_norm": 0.13536664843559265, "learning_rate": 0.0001018000610190176, "loss": 0.952, "step": 4834 }, { "epoch": 0.9829233584061802, "grad_norm": 0.12906044721603394, "learning_rate": 0.00010177972134648633, "loss": 1.0468, "step": 4835 }, { "epoch": 0.9831266517584875, "grad_norm": 0.14076603949069977, "learning_rate": 0.00010175938167395504, "loss": 1.03, "step": 4836 }, { "epoch": 0.9833299451107949, "grad_norm": 0.12814532220363617, "learning_rate": 0.00010173904200142379, "loss": 0.9141, "step": 4837 }, { "epoch": 0.9835332384631023, "grad_norm": 0.13187144696712494, "learning_rate": 0.00010171870232889252, "loss": 0.9983, "step": 4838 }, { "epoch": 0.9837365318154097, "grad_norm": 0.12513421475887299, "learning_rate": 0.00010169836265636124, "loss": 0.9726, "step": 4839 }, { "epoch": 0.983939825167717, "grad_norm": 0.1411403864622116, "learning_rate": 0.00010167802298382995, "loss": 1.1544, "step": 4840 }, { "epoch": 0.9841431185200243, "grad_norm": 0.13814745843410492, "learning_rate": 0.0001016576833112987, "loss": 1.0902, "step": 4841 }, { "epoch": 0.9843464118723317, "grad_norm": 0.15102095901966095, "learning_rate": 0.00010163734363876743, "loss": 1.2006, "step": 4842 }, { "epoch": 0.9845497052246391, "grad_norm": 0.12632615864276886, "learning_rate": 0.00010161700396623615, "loss": 0.9465, "step": 4843 }, { "epoch": 0.9847529985769465, "grad_norm": 0.14967390894889832, "learning_rate": 0.00010159666429370486, "loss": 1.1275, "step": 4844 }, { "epoch": 0.9849562919292539, "grad_norm": 0.12356138229370117, "learning_rate": 0.00010157632462117362, "loss": 1.0253, "step": 4845 }, { "epoch": 0.9851595852815613, "grad_norm": 0.12783940136432648, "learning_rate": 0.00010155598494864234, "loss": 1.0647, "step": 4846 }, { "epoch": 0.9853628786338686, "grad_norm": 0.12966394424438477, "learning_rate": 0.00010153564527611106, "loss": 0.961, "step": 4847 }, { "epoch": 0.985566171986176, "grad_norm": 0.13176754117012024, "learning_rate": 0.00010151530560357977, "loss": 0.9036, "step": 4848 }, { "epoch": 0.9857694653384834, "grad_norm": 0.1355212777853012, "learning_rate": 0.00010149496593104853, "loss": 1.1102, "step": 4849 }, { "epoch": 0.9859727586907908, "grad_norm": 0.14852295815944672, "learning_rate": 0.00010147462625851725, "loss": 1.1925, "step": 4850 }, { "epoch": 0.9861760520430982, "grad_norm": 0.1206142008304596, "learning_rate": 0.00010145428658598598, "loss": 0.8656, "step": 4851 }, { "epoch": 0.9863793453954056, "grad_norm": 0.1338338702917099, "learning_rate": 0.00010143394691345469, "loss": 1.0639, "step": 4852 }, { "epoch": 0.9865826387477129, "grad_norm": 0.1332140564918518, "learning_rate": 0.00010141360724092344, "loss": 1.1448, "step": 4853 }, { "epoch": 0.9867859321000203, "grad_norm": 0.14103251695632935, "learning_rate": 0.00010139326756839216, "loss": 1.174, "step": 4854 }, { "epoch": 0.9869892254523277, "grad_norm": 0.13589176535606384, "learning_rate": 0.00010137292789586089, "loss": 0.9713, "step": 4855 }, { "epoch": 0.9871925188046351, "grad_norm": 0.13444828987121582, "learning_rate": 0.0001013525882233296, "loss": 1.0798, "step": 4856 }, { "epoch": 0.9873958121569425, "grad_norm": 0.15302203595638275, "learning_rate": 0.00010133224855079835, "loss": 1.1335, "step": 4857 }, { "epoch": 0.9875991055092499, "grad_norm": 0.1333010047674179, "learning_rate": 0.00010131190887826707, "loss": 1.0179, "step": 4858 }, { "epoch": 0.9878023988615572, "grad_norm": 0.1340804398059845, "learning_rate": 0.0001012915692057358, "loss": 0.9451, "step": 4859 }, { "epoch": 0.9880056922138646, "grad_norm": 0.14129623770713806, "learning_rate": 0.00010127122953320451, "loss": 1.1247, "step": 4860 }, { "epoch": 0.988208985566172, "grad_norm": 0.1130819022655487, "learning_rate": 0.00010125088986067323, "loss": 0.9078, "step": 4861 }, { "epoch": 0.9884122789184794, "grad_norm": 0.13647128641605377, "learning_rate": 0.00010123055018814199, "loss": 1.0182, "step": 4862 }, { "epoch": 0.9886155722707868, "grad_norm": 0.1271669566631317, "learning_rate": 0.00010121021051561071, "loss": 1.067, "step": 4863 }, { "epoch": 0.9888188656230942, "grad_norm": 0.12524248659610748, "learning_rate": 0.00010118987084307942, "loss": 0.9665, "step": 4864 }, { "epoch": 0.9890221589754015, "grad_norm": 0.13268783688545227, "learning_rate": 0.00010116953117054814, "loss": 1.1719, "step": 4865 }, { "epoch": 0.9892254523277089, "grad_norm": 0.13032928109169006, "learning_rate": 0.0001011491914980169, "loss": 1.1512, "step": 4866 }, { "epoch": 0.9894287456800163, "grad_norm": 0.1325322538614273, "learning_rate": 0.00010112885182548562, "loss": 0.954, "step": 4867 }, { "epoch": 0.9896320390323237, "grad_norm": 0.13537730276584625, "learning_rate": 0.00010110851215295433, "loss": 0.9071, "step": 4868 }, { "epoch": 0.9898353323846311, "grad_norm": 0.13004449009895325, "learning_rate": 0.00010108817248042306, "loss": 1.0135, "step": 4869 }, { "epoch": 0.9900386257369383, "grad_norm": 0.1332450807094574, "learning_rate": 0.00010106783280789181, "loss": 0.9139, "step": 4870 }, { "epoch": 0.9902419190892457, "grad_norm": 0.1302601844072342, "learning_rate": 0.00010104749313536053, "loss": 1.0315, "step": 4871 }, { "epoch": 0.9904452124415531, "grad_norm": 0.13632024824619293, "learning_rate": 0.00010102715346282924, "loss": 0.8711, "step": 4872 }, { "epoch": 0.9906485057938605, "grad_norm": 0.1373220980167389, "learning_rate": 0.00010100681379029797, "loss": 1.1581, "step": 4873 }, { "epoch": 0.9908517991461679, "grad_norm": 0.11876034736633301, "learning_rate": 0.00010098647411776672, "loss": 0.9445, "step": 4874 }, { "epoch": 0.9910550924984753, "grad_norm": 0.1467241793870926, "learning_rate": 0.00010096613444523544, "loss": 1.1618, "step": 4875 }, { "epoch": 0.9912583858507826, "grad_norm": 0.13242608308792114, "learning_rate": 0.00010094579477270415, "loss": 0.9251, "step": 4876 }, { "epoch": 0.99146167920309, "grad_norm": 0.1570916622877121, "learning_rate": 0.00010092545510017288, "loss": 1.1391, "step": 4877 }, { "epoch": 0.9916649725553974, "grad_norm": 0.1410514861345291, "learning_rate": 0.00010090511542764163, "loss": 1.067, "step": 4878 }, { "epoch": 0.9918682659077048, "grad_norm": 0.13417792320251465, "learning_rate": 0.00010088477575511036, "loss": 1.0545, "step": 4879 }, { "epoch": 0.9920715592600122, "grad_norm": 0.14980773627758026, "learning_rate": 0.00010086443608257907, "loss": 1.1653, "step": 4880 }, { "epoch": 0.9922748526123196, "grad_norm": 0.1144283264875412, "learning_rate": 0.00010084409641004779, "loss": 0.9679, "step": 4881 }, { "epoch": 0.9924781459646269, "grad_norm": 0.13899140059947968, "learning_rate": 0.00010082375673751654, "loss": 1.0324, "step": 4882 }, { "epoch": 0.9926814393169343, "grad_norm": 0.13620680570602417, "learning_rate": 0.00010080341706498527, "loss": 1.0691, "step": 4883 }, { "epoch": 0.9928847326692417, "grad_norm": 0.13614429533481598, "learning_rate": 0.00010078307739245398, "loss": 1.0911, "step": 4884 }, { "epoch": 0.9930880260215491, "grad_norm": 0.15049585700035095, "learning_rate": 0.0001007627377199227, "loss": 1.2096, "step": 4885 }, { "epoch": 0.9932913193738565, "grad_norm": 0.11920803040266037, "learning_rate": 0.00010074239804739145, "loss": 0.8579, "step": 4886 }, { "epoch": 0.9934946127261639, "grad_norm": 0.148103266954422, "learning_rate": 0.00010072205837486018, "loss": 0.9659, "step": 4887 }, { "epoch": 0.9936979060784712, "grad_norm": 0.14606502652168274, "learning_rate": 0.00010070171870232889, "loss": 1.1369, "step": 4888 }, { "epoch": 0.9939011994307786, "grad_norm": 0.15074527263641357, "learning_rate": 0.00010068137902979761, "loss": 1.1467, "step": 4889 }, { "epoch": 0.994104492783086, "grad_norm": 0.1285044550895691, "learning_rate": 0.00010066103935726636, "loss": 0.8631, "step": 4890 }, { "epoch": 0.9943077861353934, "grad_norm": 0.14599795639514923, "learning_rate": 0.00010064069968473509, "loss": 1.136, "step": 4891 }, { "epoch": 0.9945110794877008, "grad_norm": 0.15505965054035187, "learning_rate": 0.0001006203600122038, "loss": 1.2104, "step": 4892 }, { "epoch": 0.9947143728400081, "grad_norm": 0.13279956579208374, "learning_rate": 0.00010060002033967252, "loss": 1.0342, "step": 4893 }, { "epoch": 0.9949176661923155, "grad_norm": 0.1346520632505417, "learning_rate": 0.00010057968066714128, "loss": 1.0414, "step": 4894 }, { "epoch": 0.9951209595446229, "grad_norm": 0.1434224247932434, "learning_rate": 0.00010055934099461, "loss": 1.1259, "step": 4895 }, { "epoch": 0.9953242528969303, "grad_norm": 0.1336824744939804, "learning_rate": 0.00010053900132207873, "loss": 1.1013, "step": 4896 }, { "epoch": 0.9955275462492377, "grad_norm": 0.13659413158893585, "learning_rate": 0.00010051866164954744, "loss": 0.9543, "step": 4897 }, { "epoch": 0.9957308396015451, "grad_norm": 0.13040059804916382, "learning_rate": 0.00010049832197701619, "loss": 0.9669, "step": 4898 }, { "epoch": 0.9959341329538524, "grad_norm": 0.1415984034538269, "learning_rate": 0.00010047798230448491, "loss": 1.0195, "step": 4899 }, { "epoch": 0.9961374263061598, "grad_norm": 0.13746584951877594, "learning_rate": 0.00010045764263195364, "loss": 1.0605, "step": 4900 }, { "epoch": 0.9963407196584672, "grad_norm": 0.1430116593837738, "learning_rate": 0.00010043730295942235, "loss": 1.107, "step": 4901 }, { "epoch": 0.9965440130107746, "grad_norm": 0.11865589022636414, "learning_rate": 0.00010041696328689107, "loss": 0.8887, "step": 4902 }, { "epoch": 0.996747306363082, "grad_norm": 0.11495467275381088, "learning_rate": 0.00010039662361435982, "loss": 0.8365, "step": 4903 }, { "epoch": 0.9969505997153894, "grad_norm": 0.1354401409626007, "learning_rate": 0.00010037628394182855, "loss": 1.1705, "step": 4904 }, { "epoch": 0.9971538930676966, "grad_norm": 0.13998205959796906, "learning_rate": 0.00010035594426929726, "loss": 1.0365, "step": 4905 }, { "epoch": 0.997357186420004, "grad_norm": 0.15044035017490387, "learning_rate": 0.00010033560459676598, "loss": 1.1061, "step": 4906 }, { "epoch": 0.9975604797723114, "grad_norm": 0.1416459083557129, "learning_rate": 0.00010031526492423473, "loss": 1.1155, "step": 4907 }, { "epoch": 0.9977637731246188, "grad_norm": 0.13485343754291534, "learning_rate": 0.00010029492525170346, "loss": 0.9937, "step": 4908 }, { "epoch": 0.9979670664769262, "grad_norm": 0.14948885142803192, "learning_rate": 0.00010027458557917217, "loss": 1.1689, "step": 4909 }, { "epoch": 0.9981703598292336, "grad_norm": 0.1309768706560135, "learning_rate": 0.0001002542459066409, "loss": 0.9428, "step": 4910 }, { "epoch": 0.9983736531815409, "grad_norm": 0.11928943544626236, "learning_rate": 0.00010023390623410965, "loss": 0.8238, "step": 4911 }, { "epoch": 0.9985769465338483, "grad_norm": 0.1389857530593872, "learning_rate": 0.00010021356656157837, "loss": 1.0459, "step": 4912 }, { "epoch": 0.9987802398861557, "grad_norm": 0.14047744870185852, "learning_rate": 0.00010019322688904708, "loss": 0.9594, "step": 4913 }, { "epoch": 0.9989835332384631, "grad_norm": 0.1307019144296646, "learning_rate": 0.0001001728872165158, "loss": 1.1549, "step": 4914 }, { "epoch": 0.9991868265907705, "grad_norm": 0.13652239739894867, "learning_rate": 0.00010015254754398456, "loss": 1.142, "step": 4915 }, { "epoch": 0.9993901199430779, "grad_norm": 0.1404002457857132, "learning_rate": 0.00010013220787145328, "loss": 1.0275, "step": 4916 }, { "epoch": 0.9995934132953852, "grad_norm": 0.14137892425060272, "learning_rate": 0.00010011186819892199, "loss": 1.1169, "step": 4917 }, { "epoch": 0.9997967066476926, "grad_norm": 0.12362517416477203, "learning_rate": 0.00010009152852639072, "loss": 0.9733, "step": 4918 }, { "epoch": 1.0, "grad_norm": 0.16257604956626892, "learning_rate": 0.00010007118885385947, "loss": 1.214, "step": 4919 }, { "epoch": 1.0002032933523073, "grad_norm": 0.13455824553966522, "learning_rate": 0.0001000508491813282, "loss": 1.1717, "step": 4920 }, { "epoch": 1.0004065867046148, "grad_norm": 0.1244397908449173, "learning_rate": 0.0001000305095087969, "loss": 0.9873, "step": 4921 }, { "epoch": 1.000609880056922, "grad_norm": 0.13148358464241028, "learning_rate": 0.00010001016983626563, "loss": 1.0512, "step": 4922 }, { "epoch": 1.0008131734092296, "grad_norm": 0.14207464456558228, "learning_rate": 9.998983016373437e-05, "loss": 1.1071, "step": 4923 }, { "epoch": 1.0010164667615369, "grad_norm": 0.1350506693124771, "learning_rate": 9.99694904912031e-05, "loss": 1.1134, "step": 4924 }, { "epoch": 1.0012197601138442, "grad_norm": 0.14575833082199097, "learning_rate": 9.994915081867182e-05, "loss": 1.0793, "step": 4925 }, { "epoch": 1.0014230534661517, "grad_norm": 0.13254649937152863, "learning_rate": 9.992881114614055e-05, "loss": 0.9843, "step": 4926 }, { "epoch": 1.001626346818459, "grad_norm": 0.13385853171348572, "learning_rate": 9.990847147360928e-05, "loss": 1.0446, "step": 4927 }, { "epoch": 1.0018296401707665, "grad_norm": 0.13908478617668152, "learning_rate": 9.988813180107802e-05, "loss": 0.9968, "step": 4928 }, { "epoch": 1.0020329335230738, "grad_norm": 0.13923251628875732, "learning_rate": 9.986779212854673e-05, "loss": 1.0023, "step": 4929 }, { "epoch": 1.0022362268753813, "grad_norm": 0.1373911201953888, "learning_rate": 9.984745245601547e-05, "loss": 1.1753, "step": 4930 }, { "epoch": 1.0024395202276886, "grad_norm": 0.13491371273994446, "learning_rate": 9.982711278348419e-05, "loss": 0.893, "step": 4931 }, { "epoch": 1.0026428135799959, "grad_norm": 0.12279137223958969, "learning_rate": 9.980677311095293e-05, "loss": 0.8334, "step": 4932 }, { "epoch": 1.0028461069323034, "grad_norm": 0.1489049643278122, "learning_rate": 9.978643343842164e-05, "loss": 1.2196, "step": 4933 }, { "epoch": 1.0030494002846106, "grad_norm": 0.15800416469573975, "learning_rate": 9.976609376589038e-05, "loss": 1.1065, "step": 4934 }, { "epoch": 1.0032526936369182, "grad_norm": 0.12695717811584473, "learning_rate": 9.97457540933591e-05, "loss": 0.8969, "step": 4935 }, { "epoch": 1.0034559869892254, "grad_norm": 0.12970462441444397, "learning_rate": 9.972541442082784e-05, "loss": 0.9748, "step": 4936 }, { "epoch": 1.0036592803415327, "grad_norm": 0.13583384454250336, "learning_rate": 9.970507474829655e-05, "loss": 0.9943, "step": 4937 }, { "epoch": 1.0038625736938402, "grad_norm": 0.13171210885047913, "learning_rate": 9.968473507576529e-05, "loss": 1.0066, "step": 4938 }, { "epoch": 1.0040658670461475, "grad_norm": 0.140077605843544, "learning_rate": 9.966439540323401e-05, "loss": 1.0276, "step": 4939 }, { "epoch": 1.004269160398455, "grad_norm": 0.13248348236083984, "learning_rate": 9.964405573070275e-05, "loss": 0.9836, "step": 4940 }, { "epoch": 1.0044724537507623, "grad_norm": 0.1502828449010849, "learning_rate": 9.962371605817146e-05, "loss": 1.175, "step": 4941 }, { "epoch": 1.0046757471030698, "grad_norm": 0.14695493876934052, "learning_rate": 9.96033763856402e-05, "loss": 0.963, "step": 4942 }, { "epoch": 1.0048790404553771, "grad_norm": 0.14214938879013062, "learning_rate": 9.958303671310892e-05, "loss": 1.0651, "step": 4943 }, { "epoch": 1.0050823338076844, "grad_norm": 0.14761728048324585, "learning_rate": 9.956269704057765e-05, "loss": 0.9907, "step": 4944 }, { "epoch": 1.005285627159992, "grad_norm": 0.13151785731315613, "learning_rate": 9.954235736804637e-05, "loss": 0.8793, "step": 4945 }, { "epoch": 1.0054889205122992, "grad_norm": 0.1452670693397522, "learning_rate": 9.95220176955151e-05, "loss": 1.0906, "step": 4946 }, { "epoch": 1.0056922138646067, "grad_norm": 0.13930079340934753, "learning_rate": 9.950167802298384e-05, "loss": 0.9598, "step": 4947 }, { "epoch": 1.005895507216914, "grad_norm": 0.12317246198654175, "learning_rate": 9.948133835045256e-05, "loss": 0.9429, "step": 4948 }, { "epoch": 1.0060988005692213, "grad_norm": 0.13415516912937164, "learning_rate": 9.946099867792128e-05, "loss": 1.0848, "step": 4949 }, { "epoch": 1.0063020939215288, "grad_norm": 0.13976556062698364, "learning_rate": 9.944065900539001e-05, "loss": 0.934, "step": 4950 }, { "epoch": 1.006505387273836, "grad_norm": 0.13384398818016052, "learning_rate": 9.942031933285875e-05, "loss": 0.955, "step": 4951 }, { "epoch": 1.0067086806261436, "grad_norm": 0.14308519661426544, "learning_rate": 9.939997966032747e-05, "loss": 0.9543, "step": 4952 }, { "epoch": 1.006911973978451, "grad_norm": 0.14340607821941376, "learning_rate": 9.937963998779621e-05, "loss": 1.047, "step": 4953 }, { "epoch": 1.0071152673307582, "grad_norm": 0.14457905292510986, "learning_rate": 9.935930031526492e-05, "loss": 0.9937, "step": 4954 }, { "epoch": 1.0073185606830657, "grad_norm": 0.13555844128131866, "learning_rate": 9.933896064273366e-05, "loss": 1.0211, "step": 4955 }, { "epoch": 1.007521854035373, "grad_norm": 0.1536429524421692, "learning_rate": 9.931862097020238e-05, "loss": 1.188, "step": 4956 }, { "epoch": 1.0077251473876805, "grad_norm": 0.13193362951278687, "learning_rate": 9.929828129767112e-05, "loss": 0.9143, "step": 4957 }, { "epoch": 1.0079284407399878, "grad_norm": 0.14066417515277863, "learning_rate": 9.927794162513983e-05, "loss": 1.0662, "step": 4958 }, { "epoch": 1.0081317340922953, "grad_norm": 0.13579119741916656, "learning_rate": 9.925760195260857e-05, "loss": 0.8999, "step": 4959 }, { "epoch": 1.0083350274446026, "grad_norm": 0.14911122620105743, "learning_rate": 9.92372622800773e-05, "loss": 1.3171, "step": 4960 }, { "epoch": 1.0085383207969099, "grad_norm": 0.1447262316942215, "learning_rate": 9.921692260754603e-05, "loss": 1.0899, "step": 4961 }, { "epoch": 1.0087416141492174, "grad_norm": 0.1513487845659256, "learning_rate": 9.919658293501474e-05, "loss": 1.0844, "step": 4962 }, { "epoch": 1.0089449075015247, "grad_norm": 0.1470583975315094, "learning_rate": 9.917624326248348e-05, "loss": 1.1176, "step": 4963 }, { "epoch": 1.0091482008538322, "grad_norm": 0.13596630096435547, "learning_rate": 9.91559035899522e-05, "loss": 1.0829, "step": 4964 }, { "epoch": 1.0093514942061395, "grad_norm": 0.1411203145980835, "learning_rate": 9.913556391742094e-05, "loss": 1.0523, "step": 4965 }, { "epoch": 1.0095547875584467, "grad_norm": 0.14842981100082397, "learning_rate": 9.911522424488965e-05, "loss": 1.0513, "step": 4966 }, { "epoch": 1.0097580809107543, "grad_norm": 0.1505335569381714, "learning_rate": 9.909488457235839e-05, "loss": 0.9964, "step": 4967 }, { "epoch": 1.0099613742630615, "grad_norm": 0.12677620351314545, "learning_rate": 9.907454489982712e-05, "loss": 0.9546, "step": 4968 }, { "epoch": 1.010164667615369, "grad_norm": 0.13651777803897858, "learning_rate": 9.905420522729585e-05, "loss": 1.0823, "step": 4969 }, { "epoch": 1.0103679609676763, "grad_norm": 0.1392572969198227, "learning_rate": 9.903386555476457e-05, "loss": 0.9032, "step": 4970 }, { "epoch": 1.0105712543199838, "grad_norm": 0.16775289177894592, "learning_rate": 9.90135258822333e-05, "loss": 1.1434, "step": 4971 }, { "epoch": 1.0107745476722911, "grad_norm": 0.1534387320280075, "learning_rate": 9.899318620970203e-05, "loss": 1.166, "step": 4972 }, { "epoch": 1.0109778410245984, "grad_norm": 0.14180676639080048, "learning_rate": 9.897284653717077e-05, "loss": 1.0688, "step": 4973 }, { "epoch": 1.011181134376906, "grad_norm": 0.13633224368095398, "learning_rate": 9.895250686463948e-05, "loss": 1.0413, "step": 4974 }, { "epoch": 1.0113844277292132, "grad_norm": 0.15582099556922913, "learning_rate": 9.893216719210822e-05, "loss": 1.256, "step": 4975 }, { "epoch": 1.0115877210815207, "grad_norm": 0.16052106022834778, "learning_rate": 9.891182751957694e-05, "loss": 1.3048, "step": 4976 }, { "epoch": 1.011791014433828, "grad_norm": 0.15733475983142853, "learning_rate": 9.889148784704568e-05, "loss": 1.1024, "step": 4977 }, { "epoch": 1.0119943077861353, "grad_norm": 0.1398230642080307, "learning_rate": 9.887114817451439e-05, "loss": 1.0691, "step": 4978 }, { "epoch": 1.0121976011384428, "grad_norm": 0.15575705468654633, "learning_rate": 9.885080850198313e-05, "loss": 1.0019, "step": 4979 }, { "epoch": 1.01240089449075, "grad_norm": 0.13900624215602875, "learning_rate": 9.883046882945185e-05, "loss": 1.0318, "step": 4980 }, { "epoch": 1.0126041878430576, "grad_norm": 0.1266520619392395, "learning_rate": 9.881012915692059e-05, "loss": 0.9455, "step": 4981 }, { "epoch": 1.012807481195365, "grad_norm": 0.14327497780323029, "learning_rate": 9.87897894843893e-05, "loss": 1.1133, "step": 4982 }, { "epoch": 1.0130107745476722, "grad_norm": 0.14177127182483673, "learning_rate": 9.876944981185804e-05, "loss": 0.9969, "step": 4983 }, { "epoch": 1.0132140678999797, "grad_norm": 0.14066456258296967, "learning_rate": 9.874911013932676e-05, "loss": 0.9261, "step": 4984 }, { "epoch": 1.013417361252287, "grad_norm": 0.14441144466400146, "learning_rate": 9.872877046679549e-05, "loss": 1.0065, "step": 4985 }, { "epoch": 1.0136206546045945, "grad_norm": 0.12858086824417114, "learning_rate": 9.870843079426421e-05, "loss": 0.9306, "step": 4986 }, { "epoch": 1.0138239479569018, "grad_norm": 0.1305333971977234, "learning_rate": 9.868809112173294e-05, "loss": 1.0058, "step": 4987 }, { "epoch": 1.0140272413092093, "grad_norm": 0.1652311384677887, "learning_rate": 9.866775144920167e-05, "loss": 1.1992, "step": 4988 }, { "epoch": 1.0142305346615166, "grad_norm": 0.1123913899064064, "learning_rate": 9.86474117766704e-05, "loss": 0.8779, "step": 4989 }, { "epoch": 1.0144338280138239, "grad_norm": 0.15201310813426971, "learning_rate": 9.862707210413912e-05, "loss": 1.1553, "step": 4990 }, { "epoch": 1.0146371213661314, "grad_norm": 0.13241463899612427, "learning_rate": 9.860673243160785e-05, "loss": 0.9276, "step": 4991 }, { "epoch": 1.0148404147184387, "grad_norm": 0.15238632261753082, "learning_rate": 9.858639275907659e-05, "loss": 1.1528, "step": 4992 }, { "epoch": 1.0150437080707462, "grad_norm": 0.13771474361419678, "learning_rate": 9.856605308654531e-05, "loss": 1.1871, "step": 4993 }, { "epoch": 1.0152470014230535, "grad_norm": 0.135041743516922, "learning_rate": 9.854571341401403e-05, "loss": 0.9718, "step": 4994 }, { "epoch": 1.0154502947753608, "grad_norm": 0.14199897646903992, "learning_rate": 9.852537374148276e-05, "loss": 1.0454, "step": 4995 }, { "epoch": 1.0156535881276683, "grad_norm": 0.14556720852851868, "learning_rate": 9.85050340689515e-05, "loss": 1.102, "step": 4996 }, { "epoch": 1.0158568814799755, "grad_norm": 0.1287354975938797, "learning_rate": 9.848469439642022e-05, "loss": 0.929, "step": 4997 }, { "epoch": 1.016060174832283, "grad_norm": 0.15297791361808777, "learning_rate": 9.846435472388895e-05, "loss": 1.0234, "step": 4998 }, { "epoch": 1.0162634681845903, "grad_norm": 0.1549387276172638, "learning_rate": 9.844401505135767e-05, "loss": 1.1666, "step": 4999 }, { "epoch": 1.0164667615368979, "grad_norm": 0.15455321967601776, "learning_rate": 9.842367537882641e-05, "loss": 1.0845, "step": 5000 }, { "epoch": 1.0166700548892051, "grad_norm": 0.1259438842535019, "learning_rate": 9.840333570629513e-05, "loss": 0.8045, "step": 5001 }, { "epoch": 1.0168733482415124, "grad_norm": 0.12807638943195343, "learning_rate": 9.838299603376386e-05, "loss": 0.8213, "step": 5002 }, { "epoch": 1.01707664159382, "grad_norm": 0.14414066076278687, "learning_rate": 9.836265636123258e-05, "loss": 0.9676, "step": 5003 }, { "epoch": 1.0172799349461272, "grad_norm": 0.1384848803281784, "learning_rate": 9.834231668870132e-05, "loss": 1.0864, "step": 5004 }, { "epoch": 1.0174832282984347, "grad_norm": 0.1476108878850937, "learning_rate": 9.832197701617004e-05, "loss": 1.0906, "step": 5005 }, { "epoch": 1.017686521650742, "grad_norm": 0.14003942906856537, "learning_rate": 9.830163734363877e-05, "loss": 1.0768, "step": 5006 }, { "epoch": 1.0178898150030493, "grad_norm": 0.15422053635120392, "learning_rate": 9.828129767110749e-05, "loss": 1.0807, "step": 5007 }, { "epoch": 1.0180931083553568, "grad_norm": 0.15271607041358948, "learning_rate": 9.826095799857623e-05, "loss": 1.0285, "step": 5008 }, { "epoch": 1.0182964017076641, "grad_norm": 0.14479339122772217, "learning_rate": 9.824061832604496e-05, "loss": 1.0763, "step": 5009 }, { "epoch": 1.0184996950599716, "grad_norm": 0.16509747505187988, "learning_rate": 9.822027865351368e-05, "loss": 1.2167, "step": 5010 }, { "epoch": 1.018702988412279, "grad_norm": 0.12720637023448944, "learning_rate": 9.81999389809824e-05, "loss": 0.8985, "step": 5011 }, { "epoch": 1.0189062817645862, "grad_norm": 0.15708747506141663, "learning_rate": 9.817959930845114e-05, "loss": 1.0261, "step": 5012 }, { "epoch": 1.0191095751168937, "grad_norm": 0.1485368311405182, "learning_rate": 9.815925963591987e-05, "loss": 1.0739, "step": 5013 }, { "epoch": 1.019312868469201, "grad_norm": 0.1357945054769516, "learning_rate": 9.81389199633886e-05, "loss": 0.9953, "step": 5014 }, { "epoch": 1.0195161618215085, "grad_norm": 0.13788209855556488, "learning_rate": 9.811858029085732e-05, "loss": 0.9359, "step": 5015 }, { "epoch": 1.0197194551738158, "grad_norm": 0.13577666878700256, "learning_rate": 9.809824061832605e-05, "loss": 0.8714, "step": 5016 }, { "epoch": 1.0199227485261233, "grad_norm": 0.13565793633460999, "learning_rate": 9.807790094579478e-05, "loss": 1.0903, "step": 5017 }, { "epoch": 1.0201260418784306, "grad_norm": 0.13475680351257324, "learning_rate": 9.805756127326352e-05, "loss": 0.936, "step": 5018 }, { "epoch": 1.0203293352307379, "grad_norm": 0.12296731770038605, "learning_rate": 9.803722160073223e-05, "loss": 0.8435, "step": 5019 }, { "epoch": 1.0205326285830454, "grad_norm": 0.1502811461687088, "learning_rate": 9.801688192820097e-05, "loss": 1.22, "step": 5020 }, { "epoch": 1.0207359219353527, "grad_norm": 0.15247604250907898, "learning_rate": 9.799654225566969e-05, "loss": 1.0246, "step": 5021 }, { "epoch": 1.0209392152876602, "grad_norm": 0.1271592229604721, "learning_rate": 9.797620258313843e-05, "loss": 0.9396, "step": 5022 }, { "epoch": 1.0211425086399675, "grad_norm": 0.12880264222621918, "learning_rate": 9.795586291060714e-05, "loss": 0.9512, "step": 5023 }, { "epoch": 1.0213458019922748, "grad_norm": 0.14689882099628448, "learning_rate": 9.793552323807588e-05, "loss": 1.0981, "step": 5024 }, { "epoch": 1.0215490953445823, "grad_norm": 0.15388964116573334, "learning_rate": 9.79151835655446e-05, "loss": 1.0772, "step": 5025 }, { "epoch": 1.0217523886968896, "grad_norm": 0.13570699095726013, "learning_rate": 9.789484389301334e-05, "loss": 0.9322, "step": 5026 }, { "epoch": 1.021955682049197, "grad_norm": 0.1563068926334381, "learning_rate": 9.787450422048205e-05, "loss": 1.1555, "step": 5027 }, { "epoch": 1.0221589754015044, "grad_norm": 0.14098972082138062, "learning_rate": 9.785416454795077e-05, "loss": 0.9733, "step": 5028 }, { "epoch": 1.0223622687538116, "grad_norm": 0.15485285222530365, "learning_rate": 9.783382487541951e-05, "loss": 1.0956, "step": 5029 }, { "epoch": 1.0225655621061192, "grad_norm": 0.1376192718744278, "learning_rate": 9.781348520288824e-05, "loss": 0.9571, "step": 5030 }, { "epoch": 1.0227688554584264, "grad_norm": 0.1520523577928543, "learning_rate": 9.779314553035696e-05, "loss": 0.9815, "step": 5031 }, { "epoch": 1.022972148810734, "grad_norm": 0.14404673874378204, "learning_rate": 9.777280585782569e-05, "loss": 1.025, "step": 5032 }, { "epoch": 1.0231754421630412, "grad_norm": 0.15610331296920776, "learning_rate": 9.775246618529442e-05, "loss": 1.2032, "step": 5033 }, { "epoch": 1.0233787355153487, "grad_norm": 0.14844031631946564, "learning_rate": 9.773212651276315e-05, "loss": 1.133, "step": 5034 }, { "epoch": 1.023582028867656, "grad_norm": 0.15106390416622162, "learning_rate": 9.771178684023187e-05, "loss": 1.0273, "step": 5035 }, { "epoch": 1.0237853222199633, "grad_norm": 0.15353932976722717, "learning_rate": 9.76914471677006e-05, "loss": 1.0613, "step": 5036 }, { "epoch": 1.0239886155722708, "grad_norm": 0.14371348917484283, "learning_rate": 9.767110749516934e-05, "loss": 1.1526, "step": 5037 }, { "epoch": 1.0241919089245781, "grad_norm": 0.1600302904844284, "learning_rate": 9.765076782263806e-05, "loss": 1.2017, "step": 5038 }, { "epoch": 1.0243952022768856, "grad_norm": 0.1495140641927719, "learning_rate": 9.763042815010678e-05, "loss": 1.1276, "step": 5039 }, { "epoch": 1.024598495629193, "grad_norm": 0.13793641328811646, "learning_rate": 9.761008847757551e-05, "loss": 1.0703, "step": 5040 }, { "epoch": 1.0248017889815002, "grad_norm": 0.14516127109527588, "learning_rate": 9.758974880504425e-05, "loss": 0.9643, "step": 5041 }, { "epoch": 1.0250050823338077, "grad_norm": 0.14541226625442505, "learning_rate": 9.756940913251297e-05, "loss": 1.0796, "step": 5042 }, { "epoch": 1.025208375686115, "grad_norm": 0.1590830236673355, "learning_rate": 9.75490694599817e-05, "loss": 1.0886, "step": 5043 }, { "epoch": 1.0254116690384225, "grad_norm": 0.14074791967868805, "learning_rate": 9.752872978745042e-05, "loss": 1.0064, "step": 5044 }, { "epoch": 1.0256149623907298, "grad_norm": 0.13759545981884003, "learning_rate": 9.750839011491916e-05, "loss": 0.9204, "step": 5045 }, { "epoch": 1.0258182557430373, "grad_norm": 0.1406768560409546, "learning_rate": 9.748805044238788e-05, "loss": 1.0328, "step": 5046 }, { "epoch": 1.0260215490953446, "grad_norm": 0.14663250744342804, "learning_rate": 9.746771076985661e-05, "loss": 1.0868, "step": 5047 }, { "epoch": 1.0262248424476519, "grad_norm": 0.15370012819766998, "learning_rate": 9.744737109732533e-05, "loss": 1.1454, "step": 5048 }, { "epoch": 1.0264281357999594, "grad_norm": 0.16050153970718384, "learning_rate": 9.742703142479407e-05, "loss": 1.2114, "step": 5049 }, { "epoch": 1.0266314291522667, "grad_norm": 0.15520580112934113, "learning_rate": 9.74066917522628e-05, "loss": 1.1716, "step": 5050 }, { "epoch": 1.0268347225045742, "grad_norm": 0.14273221790790558, "learning_rate": 9.738635207973152e-05, "loss": 1.0047, "step": 5051 }, { "epoch": 1.0270380158568815, "grad_norm": 0.14701679348945618, "learning_rate": 9.736601240720024e-05, "loss": 1.1173, "step": 5052 }, { "epoch": 1.0272413092091888, "grad_norm": 0.1339159458875656, "learning_rate": 9.734567273466898e-05, "loss": 0.8771, "step": 5053 }, { "epoch": 1.0274446025614963, "grad_norm": 0.13483907282352448, "learning_rate": 9.73253330621377e-05, "loss": 0.9459, "step": 5054 }, { "epoch": 1.0276478959138036, "grad_norm": 0.13941912353038788, "learning_rate": 9.730499338960643e-05, "loss": 1.0684, "step": 5055 }, { "epoch": 1.027851189266111, "grad_norm": 0.15202683210372925, "learning_rate": 9.728465371707515e-05, "loss": 1.0361, "step": 5056 }, { "epoch": 1.0280544826184184, "grad_norm": 0.13405011594295502, "learning_rate": 9.726431404454389e-05, "loss": 0.9139, "step": 5057 }, { "epoch": 1.0282577759707257, "grad_norm": 0.15052016079425812, "learning_rate": 9.724397437201262e-05, "loss": 1.1181, "step": 5058 }, { "epoch": 1.0284610693230332, "grad_norm": 0.13862478733062744, "learning_rate": 9.722363469948134e-05, "loss": 0.9646, "step": 5059 }, { "epoch": 1.0286643626753404, "grad_norm": 0.13493207097053528, "learning_rate": 9.720329502695007e-05, "loss": 0.9966, "step": 5060 }, { "epoch": 1.028867656027648, "grad_norm": 0.13806003332138062, "learning_rate": 9.71829553544188e-05, "loss": 1.03, "step": 5061 }, { "epoch": 1.0290709493799552, "grad_norm": 0.13732276856899261, "learning_rate": 9.716261568188753e-05, "loss": 1.0235, "step": 5062 }, { "epoch": 1.0292742427322628, "grad_norm": 0.1543864756822586, "learning_rate": 9.714227600935625e-05, "loss": 1.124, "step": 5063 }, { "epoch": 1.02947753608457, "grad_norm": 0.12406022101640701, "learning_rate": 9.712193633682498e-05, "loss": 0.936, "step": 5064 }, { "epoch": 1.0296808294368773, "grad_norm": 0.1271096020936966, "learning_rate": 9.710159666429371e-05, "loss": 0.9351, "step": 5065 }, { "epoch": 1.0298841227891848, "grad_norm": 0.15488265454769135, "learning_rate": 9.708125699176244e-05, "loss": 1.1175, "step": 5066 }, { "epoch": 1.0300874161414921, "grad_norm": 0.15858517587184906, "learning_rate": 9.706091731923116e-05, "loss": 1.107, "step": 5067 }, { "epoch": 1.0302907094937996, "grad_norm": 0.12946265935897827, "learning_rate": 9.704057764669989e-05, "loss": 0.8796, "step": 5068 }, { "epoch": 1.030494002846107, "grad_norm": 0.1463090032339096, "learning_rate": 9.702023797416861e-05, "loss": 0.9929, "step": 5069 }, { "epoch": 1.0306972961984142, "grad_norm": 0.1322004348039627, "learning_rate": 9.699989830163735e-05, "loss": 0.9455, "step": 5070 }, { "epoch": 1.0309005895507217, "grad_norm": 0.1394505351781845, "learning_rate": 9.697955862910608e-05, "loss": 0.8651, "step": 5071 }, { "epoch": 1.031103882903029, "grad_norm": 0.14322896301746368, "learning_rate": 9.69592189565748e-05, "loss": 0.9789, "step": 5072 }, { "epoch": 1.0313071762553365, "grad_norm": 0.1413803994655609, "learning_rate": 9.693887928404352e-05, "loss": 0.9716, "step": 5073 }, { "epoch": 1.0315104696076438, "grad_norm": 0.14607295393943787, "learning_rate": 9.691853961151226e-05, "loss": 1.2096, "step": 5074 }, { "epoch": 1.031713762959951, "grad_norm": 0.13793663680553436, "learning_rate": 9.689819993898099e-05, "loss": 0.9774, "step": 5075 }, { "epoch": 1.0319170563122586, "grad_norm": 0.15151038765907288, "learning_rate": 9.687786026644971e-05, "loss": 1.1573, "step": 5076 }, { "epoch": 1.032120349664566, "grad_norm": 0.15216906368732452, "learning_rate": 9.685752059391844e-05, "loss": 0.9275, "step": 5077 }, { "epoch": 1.0323236430168734, "grad_norm": 0.13646896183490753, "learning_rate": 9.683718092138717e-05, "loss": 0.9482, "step": 5078 }, { "epoch": 1.0325269363691807, "grad_norm": 0.13924843072891235, "learning_rate": 9.68168412488559e-05, "loss": 0.9035, "step": 5079 }, { "epoch": 1.0327302297214882, "grad_norm": 0.15003299713134766, "learning_rate": 9.679650157632462e-05, "loss": 1.0521, "step": 5080 }, { "epoch": 1.0329335230737955, "grad_norm": 0.14902108907699585, "learning_rate": 9.677616190379335e-05, "loss": 1.0682, "step": 5081 }, { "epoch": 1.0331368164261028, "grad_norm": 0.14243461191654205, "learning_rate": 9.675582223126208e-05, "loss": 0.8912, "step": 5082 }, { "epoch": 1.0333401097784103, "grad_norm": 0.13720953464508057, "learning_rate": 9.673548255873081e-05, "loss": 0.9996, "step": 5083 }, { "epoch": 1.0335434031307176, "grad_norm": 0.1456315517425537, "learning_rate": 9.671514288619953e-05, "loss": 1.1464, "step": 5084 }, { "epoch": 1.033746696483025, "grad_norm": 0.14110687375068665, "learning_rate": 9.669480321366826e-05, "loss": 0.8695, "step": 5085 }, { "epoch": 1.0339499898353324, "grad_norm": 0.1465069204568863, "learning_rate": 9.6674463541137e-05, "loss": 1.1388, "step": 5086 }, { "epoch": 1.0341532831876397, "grad_norm": 0.15050126612186432, "learning_rate": 9.665412386860572e-05, "loss": 0.9978, "step": 5087 }, { "epoch": 1.0343565765399472, "grad_norm": 0.14004911482334137, "learning_rate": 9.663378419607445e-05, "loss": 0.89, "step": 5088 }, { "epoch": 1.0345598698922545, "grad_norm": 0.1353030502796173, "learning_rate": 9.661344452354317e-05, "loss": 0.9443, "step": 5089 }, { "epoch": 1.034763163244562, "grad_norm": 0.14427968859672546, "learning_rate": 9.659310485101191e-05, "loss": 1.0751, "step": 5090 }, { "epoch": 1.0349664565968693, "grad_norm": 0.1521504521369934, "learning_rate": 9.657276517848063e-05, "loss": 1.0727, "step": 5091 }, { "epoch": 1.0351697499491768, "grad_norm": 0.14062680304050446, "learning_rate": 9.655242550594936e-05, "loss": 0.9034, "step": 5092 }, { "epoch": 1.035373043301484, "grad_norm": 0.17620261013507843, "learning_rate": 9.653208583341808e-05, "loss": 1.2104, "step": 5093 }, { "epoch": 1.0355763366537913, "grad_norm": 0.1438150703907013, "learning_rate": 9.651174616088682e-05, "loss": 1.0179, "step": 5094 }, { "epoch": 1.0357796300060989, "grad_norm": 0.16635897755622864, "learning_rate": 9.649140648835554e-05, "loss": 1.1789, "step": 5095 }, { "epoch": 1.0359829233584061, "grad_norm": 0.14373865723609924, "learning_rate": 9.647106681582427e-05, "loss": 0.9776, "step": 5096 }, { "epoch": 1.0361862167107136, "grad_norm": 0.1490122526884079, "learning_rate": 9.645072714329299e-05, "loss": 1.0112, "step": 5097 }, { "epoch": 1.036389510063021, "grad_norm": 0.14242301881313324, "learning_rate": 9.643038747076173e-05, "loss": 1.0629, "step": 5098 }, { "epoch": 1.0365928034153282, "grad_norm": 0.13605085015296936, "learning_rate": 9.641004779823045e-05, "loss": 0.8603, "step": 5099 }, { "epoch": 1.0367960967676357, "grad_norm": 0.15348966419696808, "learning_rate": 9.638970812569918e-05, "loss": 1.1368, "step": 5100 }, { "epoch": 1.036999390119943, "grad_norm": 0.14863932132720947, "learning_rate": 9.63693684531679e-05, "loss": 0.8904, "step": 5101 }, { "epoch": 1.0372026834722505, "grad_norm": 0.15452273190021515, "learning_rate": 9.634902878063664e-05, "loss": 1.1195, "step": 5102 }, { "epoch": 1.0374059768245578, "grad_norm": 0.14655210077762604, "learning_rate": 9.632868910810537e-05, "loss": 1.0639, "step": 5103 }, { "epoch": 1.037609270176865, "grad_norm": 0.13452014327049255, "learning_rate": 9.630834943557409e-05, "loss": 0.9851, "step": 5104 }, { "epoch": 1.0378125635291726, "grad_norm": 0.15053342282772064, "learning_rate": 9.628800976304282e-05, "loss": 0.906, "step": 5105 }, { "epoch": 1.03801585688148, "grad_norm": 0.16290277242660522, "learning_rate": 9.626767009051155e-05, "loss": 1.1462, "step": 5106 }, { "epoch": 1.0382191502337874, "grad_norm": 0.1363389641046524, "learning_rate": 9.624733041798028e-05, "loss": 0.9143, "step": 5107 }, { "epoch": 1.0384224435860947, "grad_norm": 0.13875210285186768, "learning_rate": 9.6226990745449e-05, "loss": 0.823, "step": 5108 }, { "epoch": 1.0386257369384022, "grad_norm": 0.13508771359920502, "learning_rate": 9.620665107291773e-05, "loss": 1.061, "step": 5109 }, { "epoch": 1.0388290302907095, "grad_norm": 0.1293465942144394, "learning_rate": 9.618631140038645e-05, "loss": 0.8473, "step": 5110 }, { "epoch": 1.0390323236430168, "grad_norm": 0.1552773416042328, "learning_rate": 9.616597172785519e-05, "loss": 1.1096, "step": 5111 }, { "epoch": 1.0392356169953243, "grad_norm": 0.13282112777233124, "learning_rate": 9.614563205532391e-05, "loss": 0.8199, "step": 5112 }, { "epoch": 1.0394389103476316, "grad_norm": 0.12775538861751556, "learning_rate": 9.612529238279264e-05, "loss": 0.8483, "step": 5113 }, { "epoch": 1.039642203699939, "grad_norm": 0.14461298286914825, "learning_rate": 9.610495271026136e-05, "loss": 0.9767, "step": 5114 }, { "epoch": 1.0398454970522464, "grad_norm": 0.1476954072713852, "learning_rate": 9.60846130377301e-05, "loss": 0.9892, "step": 5115 }, { "epoch": 1.0400487904045537, "grad_norm": 0.13602770864963531, "learning_rate": 9.606427336519883e-05, "loss": 1.0392, "step": 5116 }, { "epoch": 1.0402520837568612, "grad_norm": 0.14463678002357483, "learning_rate": 9.604393369266755e-05, "loss": 0.8989, "step": 5117 }, { "epoch": 1.0404553771091685, "grad_norm": 0.1426476687192917, "learning_rate": 9.602359402013627e-05, "loss": 1.0073, "step": 5118 }, { "epoch": 1.040658670461476, "grad_norm": 0.14196395874023438, "learning_rate": 9.600325434760501e-05, "loss": 1.1132, "step": 5119 }, { "epoch": 1.0408619638137833, "grad_norm": 0.14742383360862732, "learning_rate": 9.598291467507374e-05, "loss": 0.9791, "step": 5120 }, { "epoch": 1.0410652571660908, "grad_norm": 0.13551802933216095, "learning_rate": 9.596257500254246e-05, "loss": 0.927, "step": 5121 }, { "epoch": 1.041268550518398, "grad_norm": 0.12378886342048645, "learning_rate": 9.594223533001119e-05, "loss": 0.9554, "step": 5122 }, { "epoch": 1.0414718438707053, "grad_norm": 0.13679155707359314, "learning_rate": 9.592189565747992e-05, "loss": 0.8767, "step": 5123 }, { "epoch": 1.0416751372230129, "grad_norm": 0.12900646030902863, "learning_rate": 9.590155598494865e-05, "loss": 1.0194, "step": 5124 }, { "epoch": 1.0418784305753201, "grad_norm": 0.14963632822036743, "learning_rate": 9.588121631241737e-05, "loss": 1.0235, "step": 5125 }, { "epoch": 1.0420817239276277, "grad_norm": 0.16666048765182495, "learning_rate": 9.58608766398861e-05, "loss": 1.0171, "step": 5126 }, { "epoch": 1.042285017279935, "grad_norm": 0.13959690928459167, "learning_rate": 9.584053696735483e-05, "loss": 1.0413, "step": 5127 }, { "epoch": 1.0424883106322422, "grad_norm": 0.1701730191707611, "learning_rate": 9.582019729482356e-05, "loss": 1.169, "step": 5128 }, { "epoch": 1.0426916039845497, "grad_norm": 0.147377148270607, "learning_rate": 9.579985762229228e-05, "loss": 1.0956, "step": 5129 }, { "epoch": 1.042894897336857, "grad_norm": 0.1471777856349945, "learning_rate": 9.577951794976101e-05, "loss": 1.1304, "step": 5130 }, { "epoch": 1.0430981906891645, "grad_norm": 0.14388343691825867, "learning_rate": 9.575917827722975e-05, "loss": 1.1626, "step": 5131 }, { "epoch": 1.0433014840414718, "grad_norm": 0.1318674087524414, "learning_rate": 9.573883860469847e-05, "loss": 0.958, "step": 5132 }, { "epoch": 1.0435047773937791, "grad_norm": 0.15218976140022278, "learning_rate": 9.57184989321672e-05, "loss": 1.0546, "step": 5133 }, { "epoch": 1.0437080707460866, "grad_norm": 0.13113273680210114, "learning_rate": 9.569815925963592e-05, "loss": 0.8427, "step": 5134 }, { "epoch": 1.043911364098394, "grad_norm": 0.12252294272184372, "learning_rate": 9.567781958710466e-05, "loss": 0.8234, "step": 5135 }, { "epoch": 1.0441146574507014, "grad_norm": 0.15691354870796204, "learning_rate": 9.565747991457338e-05, "loss": 1.0107, "step": 5136 }, { "epoch": 1.0443179508030087, "grad_norm": 0.13608700037002563, "learning_rate": 9.56371402420421e-05, "loss": 0.9373, "step": 5137 }, { "epoch": 1.0445212441553162, "grad_norm": 0.14586831629276276, "learning_rate": 9.561680056951083e-05, "loss": 0.9283, "step": 5138 }, { "epoch": 1.0447245375076235, "grad_norm": 0.15548555552959442, "learning_rate": 9.559646089697957e-05, "loss": 1.0762, "step": 5139 }, { "epoch": 1.0449278308599308, "grad_norm": 0.14028286933898926, "learning_rate": 9.55761212244483e-05, "loss": 1.0798, "step": 5140 }, { "epoch": 1.0451311242122383, "grad_norm": 0.13946202397346497, "learning_rate": 9.555578155191702e-05, "loss": 0.9123, "step": 5141 }, { "epoch": 1.0453344175645456, "grad_norm": 0.15305642783641815, "learning_rate": 9.553544187938574e-05, "loss": 1.1277, "step": 5142 }, { "epoch": 1.045537710916853, "grad_norm": 0.1336325705051422, "learning_rate": 9.551510220685448e-05, "loss": 0.943, "step": 5143 }, { "epoch": 1.0457410042691604, "grad_norm": 0.1418871283531189, "learning_rate": 9.54947625343232e-05, "loss": 0.9236, "step": 5144 }, { "epoch": 1.0459442976214677, "grad_norm": 0.1378573775291443, "learning_rate": 9.547442286179193e-05, "loss": 1.0725, "step": 5145 }, { "epoch": 1.0461475909737752, "grad_norm": 0.1409774124622345, "learning_rate": 9.545408318926065e-05, "loss": 1.0191, "step": 5146 }, { "epoch": 1.0463508843260825, "grad_norm": 0.14639027416706085, "learning_rate": 9.543374351672939e-05, "loss": 0.996, "step": 5147 }, { "epoch": 1.04655417767839, "grad_norm": 0.15926292538642883, "learning_rate": 9.541340384419812e-05, "loss": 1.1456, "step": 5148 }, { "epoch": 1.0467574710306973, "grad_norm": 0.15091705322265625, "learning_rate": 9.539306417166684e-05, "loss": 1.074, "step": 5149 }, { "epoch": 1.0469607643830048, "grad_norm": 0.1436360776424408, "learning_rate": 9.537272449913557e-05, "loss": 1.0315, "step": 5150 }, { "epoch": 1.047164057735312, "grad_norm": 0.15203581750392914, "learning_rate": 9.535238482660429e-05, "loss": 1.0482, "step": 5151 }, { "epoch": 1.0473673510876194, "grad_norm": 0.14809484779834747, "learning_rate": 9.533204515407303e-05, "loss": 1.1127, "step": 5152 }, { "epoch": 1.0475706444399269, "grad_norm": 0.142609104514122, "learning_rate": 9.531170548154175e-05, "loss": 1.0597, "step": 5153 }, { "epoch": 1.0477739377922342, "grad_norm": 0.14975500106811523, "learning_rate": 9.529136580901048e-05, "loss": 0.9545, "step": 5154 }, { "epoch": 1.0479772311445417, "grad_norm": 0.1335391253232956, "learning_rate": 9.52710261364792e-05, "loss": 0.9786, "step": 5155 }, { "epoch": 1.048180524496849, "grad_norm": 0.1525142341852188, "learning_rate": 9.525068646394794e-05, "loss": 1.1635, "step": 5156 }, { "epoch": 1.0483838178491562, "grad_norm": 0.14316526055335999, "learning_rate": 9.523034679141666e-05, "loss": 1.0514, "step": 5157 }, { "epoch": 1.0485871112014638, "grad_norm": 0.13840989768505096, "learning_rate": 9.521000711888539e-05, "loss": 0.9964, "step": 5158 }, { "epoch": 1.048790404553771, "grad_norm": 0.14186960458755493, "learning_rate": 9.518966744635411e-05, "loss": 1.0424, "step": 5159 }, { "epoch": 1.0489936979060785, "grad_norm": 0.14743170142173767, "learning_rate": 9.516932777382285e-05, "loss": 1.0868, "step": 5160 }, { "epoch": 1.0491969912583858, "grad_norm": 0.13749894499778748, "learning_rate": 9.514898810129157e-05, "loss": 1.1072, "step": 5161 }, { "epoch": 1.0494002846106931, "grad_norm": 0.15378478169441223, "learning_rate": 9.51286484287603e-05, "loss": 0.9729, "step": 5162 }, { "epoch": 1.0496035779630006, "grad_norm": 0.14535516500473022, "learning_rate": 9.510830875622902e-05, "loss": 0.9586, "step": 5163 }, { "epoch": 1.049806871315308, "grad_norm": 0.15049873292446136, "learning_rate": 9.508796908369776e-05, "loss": 1.076, "step": 5164 }, { "epoch": 1.0500101646676154, "grad_norm": 0.14930661022663116, "learning_rate": 9.506762941116649e-05, "loss": 1.1252, "step": 5165 }, { "epoch": 1.0502134580199227, "grad_norm": 0.13975845277309418, "learning_rate": 9.504728973863521e-05, "loss": 0.9545, "step": 5166 }, { "epoch": 1.0504167513722302, "grad_norm": 0.15114235877990723, "learning_rate": 9.502695006610394e-05, "loss": 1.0946, "step": 5167 }, { "epoch": 1.0506200447245375, "grad_norm": 0.1519806683063507, "learning_rate": 9.500661039357267e-05, "loss": 0.9512, "step": 5168 }, { "epoch": 1.0508233380768448, "grad_norm": 0.14767295122146606, "learning_rate": 9.49862707210414e-05, "loss": 0.9872, "step": 5169 }, { "epoch": 1.0510266314291523, "grad_norm": 0.1331123262643814, "learning_rate": 9.496593104851012e-05, "loss": 0.8648, "step": 5170 }, { "epoch": 1.0512299247814596, "grad_norm": 0.15055252611637115, "learning_rate": 9.494559137597885e-05, "loss": 0.9938, "step": 5171 }, { "epoch": 1.0514332181337671, "grad_norm": 0.14633582532405853, "learning_rate": 9.492525170344758e-05, "loss": 0.978, "step": 5172 }, { "epoch": 1.0516365114860744, "grad_norm": 0.13616123795509338, "learning_rate": 9.490491203091631e-05, "loss": 1.0177, "step": 5173 }, { "epoch": 1.0518398048383817, "grad_norm": 0.14793802797794342, "learning_rate": 9.488457235838503e-05, "loss": 1.1359, "step": 5174 }, { "epoch": 1.0520430981906892, "grad_norm": 0.14621852338314056, "learning_rate": 9.486423268585376e-05, "loss": 1.1179, "step": 5175 }, { "epoch": 1.0522463915429965, "grad_norm": 0.14446653425693512, "learning_rate": 9.48438930133225e-05, "loss": 0.9409, "step": 5176 }, { "epoch": 1.052449684895304, "grad_norm": 0.1389390528202057, "learning_rate": 9.482355334079122e-05, "loss": 0.8515, "step": 5177 }, { "epoch": 1.0526529782476113, "grad_norm": 0.144555002450943, "learning_rate": 9.480321366825994e-05, "loss": 1.0017, "step": 5178 }, { "epoch": 1.0528562715999188, "grad_norm": 0.1591474562883377, "learning_rate": 9.478287399572867e-05, "loss": 1.1194, "step": 5179 }, { "epoch": 1.053059564952226, "grad_norm": 0.13776561617851257, "learning_rate": 9.476253432319741e-05, "loss": 0.9357, "step": 5180 }, { "epoch": 1.0532628583045334, "grad_norm": 0.12213550508022308, "learning_rate": 9.474219465066613e-05, "loss": 0.8272, "step": 5181 }, { "epoch": 1.0534661516568409, "grad_norm": 0.15859781205654144, "learning_rate": 9.472185497813486e-05, "loss": 0.9405, "step": 5182 }, { "epoch": 1.0536694450091482, "grad_norm": 0.1493707299232483, "learning_rate": 9.470151530560358e-05, "loss": 1.037, "step": 5183 }, { "epoch": 1.0538727383614557, "grad_norm": 0.1524028778076172, "learning_rate": 9.468117563307232e-05, "loss": 1.1292, "step": 5184 }, { "epoch": 1.054076031713763, "grad_norm": 0.144907146692276, "learning_rate": 9.466083596054104e-05, "loss": 0.8658, "step": 5185 }, { "epoch": 1.0542793250660702, "grad_norm": 0.13830581307411194, "learning_rate": 9.464049628800977e-05, "loss": 0.9321, "step": 5186 }, { "epoch": 1.0544826184183778, "grad_norm": 0.13525360822677612, "learning_rate": 9.462015661547849e-05, "loss": 1.0633, "step": 5187 }, { "epoch": 1.054685911770685, "grad_norm": 0.14305701851844788, "learning_rate": 9.459981694294723e-05, "loss": 0.9783, "step": 5188 }, { "epoch": 1.0548892051229926, "grad_norm": 0.15196113288402557, "learning_rate": 9.457947727041595e-05, "loss": 1.0146, "step": 5189 }, { "epoch": 1.0550924984752998, "grad_norm": 0.14528249204158783, "learning_rate": 9.455913759788468e-05, "loss": 1.0114, "step": 5190 }, { "epoch": 1.0552957918276071, "grad_norm": 0.14285583794116974, "learning_rate": 9.45387979253534e-05, "loss": 1.0498, "step": 5191 }, { "epoch": 1.0554990851799146, "grad_norm": 0.15215423703193665, "learning_rate": 9.451845825282214e-05, "loss": 0.8786, "step": 5192 }, { "epoch": 1.055702378532222, "grad_norm": 0.15505358576774597, "learning_rate": 9.449811858029087e-05, "loss": 1.1052, "step": 5193 }, { "epoch": 1.0559056718845294, "grad_norm": 0.1445719599723816, "learning_rate": 9.447777890775959e-05, "loss": 0.9482, "step": 5194 }, { "epoch": 1.0561089652368367, "grad_norm": 0.1489405632019043, "learning_rate": 9.445743923522831e-05, "loss": 0.9503, "step": 5195 }, { "epoch": 1.0563122585891442, "grad_norm": 0.1727711409330368, "learning_rate": 9.443709956269704e-05, "loss": 1.2595, "step": 5196 }, { "epoch": 1.0565155519414515, "grad_norm": 0.1496013104915619, "learning_rate": 9.441675989016578e-05, "loss": 1.1557, "step": 5197 }, { "epoch": 1.0567188452937588, "grad_norm": 0.14185824990272522, "learning_rate": 9.43964202176345e-05, "loss": 1.04, "step": 5198 }, { "epoch": 1.0569221386460663, "grad_norm": 0.14532503485679626, "learning_rate": 9.437608054510323e-05, "loss": 1.0624, "step": 5199 }, { "epoch": 1.0571254319983736, "grad_norm": 0.13846822082996368, "learning_rate": 9.435574087257195e-05, "loss": 1.0004, "step": 5200 }, { "epoch": 1.0573287253506811, "grad_norm": 0.14008352160453796, "learning_rate": 9.433540120004069e-05, "loss": 0.9743, "step": 5201 }, { "epoch": 1.0575320187029884, "grad_norm": 0.14041657745838165, "learning_rate": 9.431506152750941e-05, "loss": 1.0694, "step": 5202 }, { "epoch": 1.0577353120552957, "grad_norm": 0.15184582769870758, "learning_rate": 9.429472185497814e-05, "loss": 1.0546, "step": 5203 }, { "epoch": 1.0579386054076032, "grad_norm": 0.14408542215824127, "learning_rate": 9.427438218244686e-05, "loss": 1.0136, "step": 5204 }, { "epoch": 1.0581418987599105, "grad_norm": 0.1553533673286438, "learning_rate": 9.42540425099156e-05, "loss": 1.0661, "step": 5205 }, { "epoch": 1.058345192112218, "grad_norm": 0.14355801045894623, "learning_rate": 9.423370283738432e-05, "loss": 1.0145, "step": 5206 }, { "epoch": 1.0585484854645253, "grad_norm": 0.15593981742858887, "learning_rate": 9.421336316485305e-05, "loss": 1.0197, "step": 5207 }, { "epoch": 1.0587517788168328, "grad_norm": 0.15501219034194946, "learning_rate": 9.419302349232177e-05, "loss": 1.1659, "step": 5208 }, { "epoch": 1.05895507216914, "grad_norm": 0.14046739041805267, "learning_rate": 9.417268381979051e-05, "loss": 1.0586, "step": 5209 }, { "epoch": 1.0591583655214474, "grad_norm": 0.13722114264965057, "learning_rate": 9.415234414725924e-05, "loss": 1.0901, "step": 5210 }, { "epoch": 1.0593616588737549, "grad_norm": 0.15568463504314423, "learning_rate": 9.413200447472796e-05, "loss": 1.1566, "step": 5211 }, { "epoch": 1.0595649522260622, "grad_norm": 0.1628820151090622, "learning_rate": 9.411166480219668e-05, "loss": 1.1257, "step": 5212 }, { "epoch": 1.0597682455783697, "grad_norm": 0.12070141732692719, "learning_rate": 9.409132512966542e-05, "loss": 0.8634, "step": 5213 }, { "epoch": 1.059971538930677, "grad_norm": 0.147191122174263, "learning_rate": 9.407098545713415e-05, "loss": 1.1329, "step": 5214 }, { "epoch": 1.0601748322829843, "grad_norm": 0.14349913597106934, "learning_rate": 9.405064578460287e-05, "loss": 1.061, "step": 5215 }, { "epoch": 1.0603781256352918, "grad_norm": 0.14183680713176727, "learning_rate": 9.40303061120716e-05, "loss": 0.9728, "step": 5216 }, { "epoch": 1.060581418987599, "grad_norm": 0.1697556972503662, "learning_rate": 9.400996643954033e-05, "loss": 1.2494, "step": 5217 }, { "epoch": 1.0607847123399066, "grad_norm": 0.1233520433306694, "learning_rate": 9.398962676700906e-05, "loss": 0.9116, "step": 5218 }, { "epoch": 1.0609880056922139, "grad_norm": 0.1457953006029129, "learning_rate": 9.396928709447778e-05, "loss": 1.0453, "step": 5219 }, { "epoch": 1.0611912990445211, "grad_norm": 0.13704726099967957, "learning_rate": 9.394894742194651e-05, "loss": 0.9965, "step": 5220 }, { "epoch": 1.0613945923968287, "grad_norm": 0.13508938252925873, "learning_rate": 9.392860774941525e-05, "loss": 0.9579, "step": 5221 }, { "epoch": 1.061597885749136, "grad_norm": 0.1393241584300995, "learning_rate": 9.390826807688397e-05, "loss": 0.9958, "step": 5222 }, { "epoch": 1.0618011791014434, "grad_norm": 0.15738067030906677, "learning_rate": 9.38879284043527e-05, "loss": 1.092, "step": 5223 }, { "epoch": 1.0620044724537507, "grad_norm": 0.1496880203485489, "learning_rate": 9.386758873182142e-05, "loss": 1.1966, "step": 5224 }, { "epoch": 1.0622077658060582, "grad_norm": 0.1586069017648697, "learning_rate": 9.384724905929016e-05, "loss": 1.1199, "step": 5225 }, { "epoch": 1.0624110591583655, "grad_norm": 0.14158375561237335, "learning_rate": 9.382690938675888e-05, "loss": 1.0343, "step": 5226 }, { "epoch": 1.0626143525106728, "grad_norm": 0.16692568361759186, "learning_rate": 9.38065697142276e-05, "loss": 1.1547, "step": 5227 }, { "epoch": 1.0628176458629803, "grad_norm": 0.14403125643730164, "learning_rate": 9.378623004169633e-05, "loss": 1.0403, "step": 5228 }, { "epoch": 1.0630209392152876, "grad_norm": 0.13261236250400543, "learning_rate": 9.376589036916507e-05, "loss": 0.9382, "step": 5229 }, { "epoch": 1.0632242325675951, "grad_norm": 0.15169022977352142, "learning_rate": 9.374555069663379e-05, "loss": 1.1496, "step": 5230 }, { "epoch": 1.0634275259199024, "grad_norm": 0.1648220419883728, "learning_rate": 9.372521102410252e-05, "loss": 1.2013, "step": 5231 }, { "epoch": 1.0636308192722097, "grad_norm": 0.1417074352502823, "learning_rate": 9.370487135157124e-05, "loss": 1.007, "step": 5232 }, { "epoch": 1.0638341126245172, "grad_norm": 0.15137724578380585, "learning_rate": 9.368453167903998e-05, "loss": 1.2016, "step": 5233 }, { "epoch": 1.0640374059768245, "grad_norm": 0.15120957791805267, "learning_rate": 9.36641920065087e-05, "loss": 1.1459, "step": 5234 }, { "epoch": 1.064240699329132, "grad_norm": 0.14129264652729034, "learning_rate": 9.364385233397742e-05, "loss": 0.9686, "step": 5235 }, { "epoch": 1.0644439926814393, "grad_norm": 0.13735541701316833, "learning_rate": 9.362351266144615e-05, "loss": 0.9117, "step": 5236 }, { "epoch": 1.0646472860337468, "grad_norm": 0.14280696213245392, "learning_rate": 9.360317298891488e-05, "loss": 0.9481, "step": 5237 }, { "epoch": 1.064850579386054, "grad_norm": 0.15263578295707703, "learning_rate": 9.358283331638362e-05, "loss": 1.0191, "step": 5238 }, { "epoch": 1.0650538727383614, "grad_norm": 0.14364738762378693, "learning_rate": 9.356249364385234e-05, "loss": 1.0692, "step": 5239 }, { "epoch": 1.065257166090669, "grad_norm": 0.15514370799064636, "learning_rate": 9.354215397132106e-05, "loss": 1.0808, "step": 5240 }, { "epoch": 1.0654604594429762, "grad_norm": 0.14693541824817657, "learning_rate": 9.352181429878979e-05, "loss": 1.0964, "step": 5241 }, { "epoch": 1.0656637527952837, "grad_norm": 0.15230287611484528, "learning_rate": 9.350147462625853e-05, "loss": 1.142, "step": 5242 }, { "epoch": 1.065867046147591, "grad_norm": 0.13272824883460999, "learning_rate": 9.348113495372725e-05, "loss": 0.954, "step": 5243 }, { "epoch": 1.0660703394998983, "grad_norm": 0.14376932382583618, "learning_rate": 9.346079528119598e-05, "loss": 0.9046, "step": 5244 }, { "epoch": 1.0662736328522058, "grad_norm": 0.15434645116329193, "learning_rate": 9.34404556086647e-05, "loss": 1.0499, "step": 5245 }, { "epoch": 1.066476926204513, "grad_norm": 0.14078587293624878, "learning_rate": 9.342011593613344e-05, "loss": 1.0289, "step": 5246 }, { "epoch": 1.0666802195568206, "grad_norm": 0.15372897684574127, "learning_rate": 9.339977626360216e-05, "loss": 1.0794, "step": 5247 }, { "epoch": 1.0668835129091279, "grad_norm": 0.1350528746843338, "learning_rate": 9.337943659107089e-05, "loss": 1.0136, "step": 5248 }, { "epoch": 1.0670868062614352, "grad_norm": 0.15009120106697083, "learning_rate": 9.335909691853961e-05, "loss": 1.0563, "step": 5249 }, { "epoch": 1.0672900996137427, "grad_norm": 0.15966548025608063, "learning_rate": 9.333875724600835e-05, "loss": 1.2194, "step": 5250 }, { "epoch": 1.06749339296605, "grad_norm": 0.14889566600322723, "learning_rate": 9.331841757347707e-05, "loss": 1.0898, "step": 5251 }, { "epoch": 1.0676966863183575, "grad_norm": 0.1444637030363083, "learning_rate": 9.32980779009458e-05, "loss": 0.9107, "step": 5252 }, { "epoch": 1.0678999796706647, "grad_norm": 0.13386781513690948, "learning_rate": 9.327773822841452e-05, "loss": 0.9542, "step": 5253 }, { "epoch": 1.068103273022972, "grad_norm": 0.16856461763381958, "learning_rate": 9.325739855588326e-05, "loss": 1.2317, "step": 5254 }, { "epoch": 1.0683065663752795, "grad_norm": 0.14999926090240479, "learning_rate": 9.323705888335199e-05, "loss": 0.9972, "step": 5255 }, { "epoch": 1.0685098597275868, "grad_norm": 0.16005565226078033, "learning_rate": 9.321671921082071e-05, "loss": 1.1752, "step": 5256 }, { "epoch": 1.0687131530798943, "grad_norm": 0.13948242366313934, "learning_rate": 9.319637953828943e-05, "loss": 0.9566, "step": 5257 }, { "epoch": 1.0689164464322016, "grad_norm": 0.14965321123600006, "learning_rate": 9.317603986575817e-05, "loss": 0.8965, "step": 5258 }, { "epoch": 1.0691197397845091, "grad_norm": 0.1279067099094391, "learning_rate": 9.31557001932269e-05, "loss": 0.894, "step": 5259 }, { "epoch": 1.0693230331368164, "grad_norm": 0.13281695544719696, "learning_rate": 9.313536052069562e-05, "loss": 1.0136, "step": 5260 }, { "epoch": 1.0695263264891237, "grad_norm": 0.16254045069217682, "learning_rate": 9.311502084816435e-05, "loss": 0.9889, "step": 5261 }, { "epoch": 1.0697296198414312, "grad_norm": 0.13825705647468567, "learning_rate": 9.309468117563308e-05, "loss": 0.7885, "step": 5262 }, { "epoch": 1.0699329131937385, "grad_norm": 0.13333582878112793, "learning_rate": 9.307434150310181e-05, "loss": 0.9819, "step": 5263 }, { "epoch": 1.070136206546046, "grad_norm": 0.13237889111042023, "learning_rate": 9.305400183057053e-05, "loss": 0.9114, "step": 5264 }, { "epoch": 1.0703394998983533, "grad_norm": 0.13572004437446594, "learning_rate": 9.303366215803926e-05, "loss": 1.0542, "step": 5265 }, { "epoch": 1.0705427932506608, "grad_norm": 0.1455228328704834, "learning_rate": 9.3013322485508e-05, "loss": 1.103, "step": 5266 }, { "epoch": 1.070746086602968, "grad_norm": 0.15622329711914062, "learning_rate": 9.299298281297672e-05, "loss": 1.115, "step": 5267 }, { "epoch": 1.0709493799552754, "grad_norm": 0.13965292274951935, "learning_rate": 9.297264314044544e-05, "loss": 0.9293, "step": 5268 }, { "epoch": 1.071152673307583, "grad_norm": 0.1426744908094406, "learning_rate": 9.295230346791417e-05, "loss": 0.9612, "step": 5269 }, { "epoch": 1.0713559666598902, "grad_norm": 0.1491684466600418, "learning_rate": 9.293196379538291e-05, "loss": 0.9873, "step": 5270 }, { "epoch": 1.0715592600121977, "grad_norm": 0.13515818119049072, "learning_rate": 9.291162412285163e-05, "loss": 0.9301, "step": 5271 }, { "epoch": 1.071762553364505, "grad_norm": 0.15374673902988434, "learning_rate": 9.289128445032036e-05, "loss": 1.093, "step": 5272 }, { "epoch": 1.0719658467168123, "grad_norm": 0.15958675742149353, "learning_rate": 9.287094477778908e-05, "loss": 1.017, "step": 5273 }, { "epoch": 1.0721691400691198, "grad_norm": 0.145900696516037, "learning_rate": 9.285060510525782e-05, "loss": 1.0264, "step": 5274 }, { "epoch": 1.072372433421427, "grad_norm": 0.14528687298297882, "learning_rate": 9.283026543272654e-05, "loss": 0.9493, "step": 5275 }, { "epoch": 1.0725757267737346, "grad_norm": 0.154060959815979, "learning_rate": 9.280992576019525e-05, "loss": 1.0978, "step": 5276 }, { "epoch": 1.0727790201260419, "grad_norm": 0.15888723731040955, "learning_rate": 9.278958608766399e-05, "loss": 1.1868, "step": 5277 }, { "epoch": 1.0729823134783492, "grad_norm": 0.15369616448879242, "learning_rate": 9.276924641513272e-05, "loss": 0.9594, "step": 5278 }, { "epoch": 1.0731856068306567, "grad_norm": 0.12550809979438782, "learning_rate": 9.274890674260145e-05, "loss": 0.903, "step": 5279 }, { "epoch": 1.073388900182964, "grad_norm": 0.14872346818447113, "learning_rate": 9.272856707007017e-05, "loss": 1.0651, "step": 5280 }, { "epoch": 1.0735921935352715, "grad_norm": 0.12688830494880676, "learning_rate": 9.27082273975389e-05, "loss": 0.8515, "step": 5281 }, { "epoch": 1.0737954868875788, "grad_norm": 0.14313896000385284, "learning_rate": 9.268788772500763e-05, "loss": 0.9985, "step": 5282 }, { "epoch": 1.073998780239886, "grad_norm": 0.1391676962375641, "learning_rate": 9.266754805247637e-05, "loss": 1.0091, "step": 5283 }, { "epoch": 1.0742020735921936, "grad_norm": 0.12431914359331131, "learning_rate": 9.264720837994508e-05, "loss": 0.8205, "step": 5284 }, { "epoch": 1.0744053669445008, "grad_norm": 0.13841819763183594, "learning_rate": 9.262686870741381e-05, "loss": 0.9676, "step": 5285 }, { "epoch": 1.0746086602968083, "grad_norm": 0.15717893838882446, "learning_rate": 9.260652903488254e-05, "loss": 0.9393, "step": 5286 }, { "epoch": 1.0748119536491156, "grad_norm": 0.1341715157032013, "learning_rate": 9.258618936235128e-05, "loss": 0.851, "step": 5287 }, { "epoch": 1.0750152470014231, "grad_norm": 0.165984645485878, "learning_rate": 9.256584968981999e-05, "loss": 1.1344, "step": 5288 }, { "epoch": 1.0752185403537304, "grad_norm": 0.13740302622318268, "learning_rate": 9.254551001728873e-05, "loss": 0.9482, "step": 5289 }, { "epoch": 1.0754218337060377, "grad_norm": 0.14151054620742798, "learning_rate": 9.252517034475745e-05, "loss": 0.926, "step": 5290 }, { "epoch": 1.0756251270583452, "grad_norm": 0.13971510529518127, "learning_rate": 9.250483067222619e-05, "loss": 0.8981, "step": 5291 }, { "epoch": 1.0758284204106525, "grad_norm": 0.13549084961414337, "learning_rate": 9.24844909996949e-05, "loss": 0.9937, "step": 5292 }, { "epoch": 1.07603171376296, "grad_norm": 0.1490660309791565, "learning_rate": 9.246415132716364e-05, "loss": 0.9437, "step": 5293 }, { "epoch": 1.0762350071152673, "grad_norm": 0.14780007302761078, "learning_rate": 9.244381165463236e-05, "loss": 0.9588, "step": 5294 }, { "epoch": 1.0764383004675748, "grad_norm": 0.15218015015125275, "learning_rate": 9.24234719821011e-05, "loss": 1.1914, "step": 5295 }, { "epoch": 1.0766415938198821, "grad_norm": 0.1452578455209732, "learning_rate": 9.240313230956982e-05, "loss": 1.0413, "step": 5296 }, { "epoch": 1.0768448871721894, "grad_norm": 0.15724782645702362, "learning_rate": 9.238279263703855e-05, "loss": 1.1071, "step": 5297 }, { "epoch": 1.077048180524497, "grad_norm": 0.13541807234287262, "learning_rate": 9.236245296450727e-05, "loss": 0.9328, "step": 5298 }, { "epoch": 1.0772514738768042, "grad_norm": 0.15523411333560944, "learning_rate": 9.234211329197601e-05, "loss": 1.0679, "step": 5299 }, { "epoch": 1.0774547672291117, "grad_norm": 0.15143822133541107, "learning_rate": 9.232177361944474e-05, "loss": 1.083, "step": 5300 }, { "epoch": 1.077658060581419, "grad_norm": 0.13371649384498596, "learning_rate": 9.230143394691346e-05, "loss": 0.8173, "step": 5301 }, { "epoch": 1.0778613539337263, "grad_norm": 0.15870672464370728, "learning_rate": 9.228109427438218e-05, "loss": 1.1819, "step": 5302 }, { "epoch": 1.0780646472860338, "grad_norm": 0.13374915719032288, "learning_rate": 9.226075460185092e-05, "loss": 0.8671, "step": 5303 }, { "epoch": 1.078267940638341, "grad_norm": 0.14287759363651276, "learning_rate": 9.224041492931965e-05, "loss": 0.9432, "step": 5304 }, { "epoch": 1.0784712339906486, "grad_norm": 0.14194057881832123, "learning_rate": 9.222007525678837e-05, "loss": 0.9783, "step": 5305 }, { "epoch": 1.0786745273429559, "grad_norm": 0.16892357170581818, "learning_rate": 9.21997355842571e-05, "loss": 1.1271, "step": 5306 }, { "epoch": 1.0788778206952632, "grad_norm": 0.13903899490833282, "learning_rate": 9.217939591172583e-05, "loss": 1.118, "step": 5307 }, { "epoch": 1.0790811140475707, "grad_norm": 0.1528656780719757, "learning_rate": 9.215905623919456e-05, "loss": 0.9426, "step": 5308 }, { "epoch": 1.079284407399878, "grad_norm": 0.14197883009910583, "learning_rate": 9.213871656666328e-05, "loss": 1.0157, "step": 5309 }, { "epoch": 1.0794877007521855, "grad_norm": 0.16497009992599487, "learning_rate": 9.211837689413201e-05, "loss": 1.1818, "step": 5310 }, { "epoch": 1.0796909941044928, "grad_norm": 0.1422882378101349, "learning_rate": 9.209803722160075e-05, "loss": 0.913, "step": 5311 }, { "epoch": 1.0798942874568, "grad_norm": 0.14429578185081482, "learning_rate": 9.207769754906947e-05, "loss": 1.0821, "step": 5312 }, { "epoch": 1.0800975808091076, "grad_norm": 0.1493476927280426, "learning_rate": 9.20573578765382e-05, "loss": 0.9935, "step": 5313 }, { "epoch": 1.0803008741614148, "grad_norm": 0.1300468146800995, "learning_rate": 9.203701820400692e-05, "loss": 0.8461, "step": 5314 }, { "epoch": 1.0805041675137224, "grad_norm": 0.15119443833827972, "learning_rate": 9.201667853147566e-05, "loss": 1.023, "step": 5315 }, { "epoch": 1.0807074608660296, "grad_norm": 0.11995972692966461, "learning_rate": 9.199633885894438e-05, "loss": 0.8724, "step": 5316 }, { "epoch": 1.0809107542183372, "grad_norm": 0.12740197777748108, "learning_rate": 9.197599918641309e-05, "loss": 0.8857, "step": 5317 }, { "epoch": 1.0811140475706444, "grad_norm": 0.1593119502067566, "learning_rate": 9.195565951388183e-05, "loss": 1.1509, "step": 5318 }, { "epoch": 1.0813173409229517, "grad_norm": 0.16390585899353027, "learning_rate": 9.193531984135055e-05, "loss": 0.9571, "step": 5319 }, { "epoch": 1.0815206342752592, "grad_norm": 0.1462492048740387, "learning_rate": 9.191498016881929e-05, "loss": 0.9622, "step": 5320 }, { "epoch": 1.0817239276275665, "grad_norm": 0.14783547818660736, "learning_rate": 9.1894640496288e-05, "loss": 0.9187, "step": 5321 }, { "epoch": 1.081927220979874, "grad_norm": 0.1550622433423996, "learning_rate": 9.187430082375674e-05, "loss": 1.0517, "step": 5322 }, { "epoch": 1.0821305143321813, "grad_norm": 0.16103710234165192, "learning_rate": 9.185396115122547e-05, "loss": 1.0936, "step": 5323 }, { "epoch": 1.0823338076844886, "grad_norm": 0.14218467473983765, "learning_rate": 9.18336214786942e-05, "loss": 1.0221, "step": 5324 }, { "epoch": 1.0825371010367961, "grad_norm": 0.13486315310001373, "learning_rate": 9.181328180616292e-05, "loss": 0.9754, "step": 5325 }, { "epoch": 1.0827403943891034, "grad_norm": 0.1479676365852356, "learning_rate": 9.179294213363165e-05, "loss": 1.077, "step": 5326 }, { "epoch": 1.082943687741411, "grad_norm": 0.16341105103492737, "learning_rate": 9.177260246110038e-05, "loss": 1.2277, "step": 5327 }, { "epoch": 1.0831469810937182, "grad_norm": 0.16840991377830505, "learning_rate": 9.175226278856912e-05, "loss": 1.2679, "step": 5328 }, { "epoch": 1.0833502744460257, "grad_norm": 0.1398116499185562, "learning_rate": 9.173192311603783e-05, "loss": 0.9881, "step": 5329 }, { "epoch": 1.083553567798333, "grad_norm": 0.13883648812770844, "learning_rate": 9.171158344350656e-05, "loss": 1.001, "step": 5330 }, { "epoch": 1.0837568611506403, "grad_norm": 0.1527661830186844, "learning_rate": 9.169124377097529e-05, "loss": 1.1497, "step": 5331 }, { "epoch": 1.0839601545029478, "grad_norm": 0.17001107335090637, "learning_rate": 9.167090409844403e-05, "loss": 1.1845, "step": 5332 }, { "epoch": 1.084163447855255, "grad_norm": 0.150950089097023, "learning_rate": 9.165056442591274e-05, "loss": 1.0365, "step": 5333 }, { "epoch": 1.0843667412075626, "grad_norm": 0.14451636373996735, "learning_rate": 9.163022475338148e-05, "loss": 0.9777, "step": 5334 }, { "epoch": 1.0845700345598699, "grad_norm": 0.16429436206817627, "learning_rate": 9.16098850808502e-05, "loss": 1.2175, "step": 5335 }, { "epoch": 1.0847733279121772, "grad_norm": 0.1558394581079483, "learning_rate": 9.158954540831894e-05, "loss": 1.1054, "step": 5336 }, { "epoch": 1.0849766212644847, "grad_norm": 0.13814805448055267, "learning_rate": 9.156920573578765e-05, "loss": 0.9972, "step": 5337 }, { "epoch": 1.085179914616792, "grad_norm": 0.15244194865226746, "learning_rate": 9.154886606325639e-05, "loss": 1.0955, "step": 5338 }, { "epoch": 1.0853832079690995, "grad_norm": 0.1596089005470276, "learning_rate": 9.152852639072511e-05, "loss": 1.1731, "step": 5339 }, { "epoch": 1.0855865013214068, "grad_norm": 0.15481899678707123, "learning_rate": 9.150818671819385e-05, "loss": 1.0905, "step": 5340 }, { "epoch": 1.085789794673714, "grad_norm": 0.14864175021648407, "learning_rate": 9.148784704566256e-05, "loss": 1.1572, "step": 5341 }, { "epoch": 1.0859930880260216, "grad_norm": 0.16066181659698486, "learning_rate": 9.14675073731313e-05, "loss": 1.1283, "step": 5342 }, { "epoch": 1.0861963813783289, "grad_norm": 0.13866567611694336, "learning_rate": 9.144716770060002e-05, "loss": 0.8998, "step": 5343 }, { "epoch": 1.0863996747306364, "grad_norm": 0.1368878036737442, "learning_rate": 9.142682802806876e-05, "loss": 0.9914, "step": 5344 }, { "epoch": 1.0866029680829437, "grad_norm": 0.12759725749492645, "learning_rate": 9.140648835553747e-05, "loss": 0.8174, "step": 5345 }, { "epoch": 1.0868062614352512, "grad_norm": 0.16169612109661102, "learning_rate": 9.138614868300621e-05, "loss": 1.13, "step": 5346 }, { "epoch": 1.0870095547875585, "grad_norm": 0.15470115840435028, "learning_rate": 9.136580901047493e-05, "loss": 1.1087, "step": 5347 }, { "epoch": 1.0872128481398657, "grad_norm": 0.1303420513868332, "learning_rate": 9.134546933794367e-05, "loss": 0.9249, "step": 5348 }, { "epoch": 1.0874161414921732, "grad_norm": 0.14182552695274353, "learning_rate": 9.132512966541238e-05, "loss": 0.9983, "step": 5349 }, { "epoch": 1.0876194348444805, "grad_norm": 0.14217984676361084, "learning_rate": 9.130478999288112e-05, "loss": 1.0707, "step": 5350 }, { "epoch": 1.087822728196788, "grad_norm": 0.14239375293254852, "learning_rate": 9.128445032034985e-05, "loss": 0.9731, "step": 5351 }, { "epoch": 1.0880260215490953, "grad_norm": 0.13025087118148804, "learning_rate": 9.126411064781858e-05, "loss": 0.8348, "step": 5352 }, { "epoch": 1.0882293149014026, "grad_norm": 0.15558715164661407, "learning_rate": 9.12437709752873e-05, "loss": 1.1197, "step": 5353 }, { "epoch": 1.0884326082537101, "grad_norm": 0.15348006784915924, "learning_rate": 9.122343130275603e-05, "loss": 1.1771, "step": 5354 }, { "epoch": 1.0886359016060174, "grad_norm": 0.15580473840236664, "learning_rate": 9.120309163022476e-05, "loss": 1.0984, "step": 5355 }, { "epoch": 1.088839194958325, "grad_norm": 0.1323358714580536, "learning_rate": 9.11827519576935e-05, "loss": 0.971, "step": 5356 }, { "epoch": 1.0890424883106322, "grad_norm": 0.14014890789985657, "learning_rate": 9.116241228516222e-05, "loss": 0.9644, "step": 5357 }, { "epoch": 1.0892457816629397, "grad_norm": 0.1312636435031891, "learning_rate": 9.114207261263094e-05, "loss": 0.8542, "step": 5358 }, { "epoch": 1.089449075015247, "grad_norm": 0.14939019083976746, "learning_rate": 9.112173294009967e-05, "loss": 1.053, "step": 5359 }, { "epoch": 1.0896523683675543, "grad_norm": 0.15127089619636536, "learning_rate": 9.110139326756839e-05, "loss": 1.0542, "step": 5360 }, { "epoch": 1.0898556617198618, "grad_norm": 0.14450344443321228, "learning_rate": 9.108105359503713e-05, "loss": 1.1007, "step": 5361 }, { "epoch": 1.090058955072169, "grad_norm": 0.1629301756620407, "learning_rate": 9.106071392250584e-05, "loss": 1.1548, "step": 5362 }, { "epoch": 1.0902622484244766, "grad_norm": 0.13532504439353943, "learning_rate": 9.104037424997458e-05, "loss": 0.9654, "step": 5363 }, { "epoch": 1.090465541776784, "grad_norm": 0.12162788212299347, "learning_rate": 9.10200345774433e-05, "loss": 0.8145, "step": 5364 }, { "epoch": 1.0906688351290912, "grad_norm": 0.1330665647983551, "learning_rate": 9.099969490491204e-05, "loss": 0.8723, "step": 5365 }, { "epoch": 1.0908721284813987, "grad_norm": 0.15818393230438232, "learning_rate": 9.097935523238075e-05, "loss": 0.9827, "step": 5366 }, { "epoch": 1.091075421833706, "grad_norm": 0.1608572006225586, "learning_rate": 9.095901555984949e-05, "loss": 1.0993, "step": 5367 }, { "epoch": 1.0912787151860135, "grad_norm": 0.14446404576301575, "learning_rate": 9.093867588731822e-05, "loss": 0.9614, "step": 5368 }, { "epoch": 1.0914820085383208, "grad_norm": 0.14879867434501648, "learning_rate": 9.091833621478695e-05, "loss": 1.0191, "step": 5369 }, { "epoch": 1.091685301890628, "grad_norm": 0.1518966406583786, "learning_rate": 9.089799654225566e-05, "loss": 1.0615, "step": 5370 }, { "epoch": 1.0918885952429356, "grad_norm": 0.15035811066627502, "learning_rate": 9.08776568697244e-05, "loss": 1.0437, "step": 5371 }, { "epoch": 1.0920918885952429, "grad_norm": 0.13971300423145294, "learning_rate": 9.085731719719313e-05, "loss": 0.9, "step": 5372 }, { "epoch": 1.0922951819475504, "grad_norm": 0.15553440153598785, "learning_rate": 9.083697752466187e-05, "loss": 1.1681, "step": 5373 }, { "epoch": 1.0924984752998577, "grad_norm": 0.1469622105360031, "learning_rate": 9.081663785213058e-05, "loss": 1.0313, "step": 5374 }, { "epoch": 1.0927017686521652, "grad_norm": 0.14632147550582886, "learning_rate": 9.079629817959931e-05, "loss": 0.9966, "step": 5375 }, { "epoch": 1.0929050620044725, "grad_norm": 0.14677052199840546, "learning_rate": 9.077595850706804e-05, "loss": 1.0508, "step": 5376 }, { "epoch": 1.0931083553567797, "grad_norm": 0.1287970244884491, "learning_rate": 9.075561883453678e-05, "loss": 0.8408, "step": 5377 }, { "epoch": 1.0933116487090873, "grad_norm": 0.14569233357906342, "learning_rate": 9.073527916200549e-05, "loss": 1.0676, "step": 5378 }, { "epoch": 1.0935149420613945, "grad_norm": 0.13696177303791046, "learning_rate": 9.071493948947423e-05, "loss": 0.9689, "step": 5379 }, { "epoch": 1.093718235413702, "grad_norm": 0.13838046789169312, "learning_rate": 9.069459981694295e-05, "loss": 0.9577, "step": 5380 }, { "epoch": 1.0939215287660093, "grad_norm": 0.13019011914730072, "learning_rate": 9.067426014441169e-05, "loss": 0.8568, "step": 5381 }, { "epoch": 1.0941248221183166, "grad_norm": 0.13824008405208588, "learning_rate": 9.06539204718804e-05, "loss": 0.8465, "step": 5382 }, { "epoch": 1.0943281154706241, "grad_norm": 0.15692386031150818, "learning_rate": 9.063358079934914e-05, "loss": 1.1576, "step": 5383 }, { "epoch": 1.0945314088229314, "grad_norm": 0.166419118642807, "learning_rate": 9.061324112681786e-05, "loss": 1.2284, "step": 5384 }, { "epoch": 1.094734702175239, "grad_norm": 0.1578996330499649, "learning_rate": 9.05929014542866e-05, "loss": 1.2505, "step": 5385 }, { "epoch": 1.0949379955275462, "grad_norm": 0.15102256834506989, "learning_rate": 9.057256178175531e-05, "loss": 1.2031, "step": 5386 }, { "epoch": 1.0951412888798537, "grad_norm": 0.15047991275787354, "learning_rate": 9.055222210922405e-05, "loss": 0.944, "step": 5387 }, { "epoch": 1.095344582232161, "grad_norm": 0.138466015458107, "learning_rate": 9.053188243669277e-05, "loss": 0.9546, "step": 5388 }, { "epoch": 1.0955478755844683, "grad_norm": 0.13472160696983337, "learning_rate": 9.051154276416151e-05, "loss": 0.8465, "step": 5389 }, { "epoch": 1.0957511689367758, "grad_norm": 0.14435645937919617, "learning_rate": 9.049120309163022e-05, "loss": 0.9928, "step": 5390 }, { "epoch": 1.095954462289083, "grad_norm": 0.16124622523784637, "learning_rate": 9.047086341909896e-05, "loss": 1.1246, "step": 5391 }, { "epoch": 1.0961577556413906, "grad_norm": 0.14154884219169617, "learning_rate": 9.045052374656768e-05, "loss": 1.0244, "step": 5392 }, { "epoch": 1.096361048993698, "grad_norm": 0.14055821299552917, "learning_rate": 9.043018407403642e-05, "loss": 0.8506, "step": 5393 }, { "epoch": 1.0965643423460052, "grad_norm": 0.16625897586345673, "learning_rate": 9.040984440150513e-05, "loss": 1.2688, "step": 5394 }, { "epoch": 1.0967676356983127, "grad_norm": 0.1494191437959671, "learning_rate": 9.038950472897387e-05, "loss": 1.1474, "step": 5395 }, { "epoch": 1.09697092905062, "grad_norm": 0.1614067256450653, "learning_rate": 9.03691650564426e-05, "loss": 1.2221, "step": 5396 }, { "epoch": 1.0971742224029275, "grad_norm": 0.1351841688156128, "learning_rate": 9.034882538391133e-05, "loss": 1.0106, "step": 5397 }, { "epoch": 1.0973775157552348, "grad_norm": 0.15088312327861786, "learning_rate": 9.032848571138004e-05, "loss": 1.0176, "step": 5398 }, { "epoch": 1.097580809107542, "grad_norm": 0.14838740229606628, "learning_rate": 9.030814603884878e-05, "loss": 1.1022, "step": 5399 }, { "epoch": 1.0977841024598496, "grad_norm": 0.13708491623401642, "learning_rate": 9.028780636631751e-05, "loss": 0.8909, "step": 5400 }, { "epoch": 1.0979873958121569, "grad_norm": 0.15131209790706635, "learning_rate": 9.026746669378623e-05, "loss": 1.218, "step": 5401 }, { "epoch": 1.0981906891644644, "grad_norm": 0.1643504798412323, "learning_rate": 9.024712702125496e-05, "loss": 1.1279, "step": 5402 }, { "epoch": 1.0983939825167717, "grad_norm": 0.1353635936975479, "learning_rate": 9.022678734872368e-05, "loss": 0.9174, "step": 5403 }, { "epoch": 1.0985972758690792, "grad_norm": 0.1374010145664215, "learning_rate": 9.020644767619242e-05, "loss": 1.0884, "step": 5404 }, { "epoch": 1.0988005692213865, "grad_norm": 0.15371763706207275, "learning_rate": 9.018610800366114e-05, "loss": 0.9519, "step": 5405 }, { "epoch": 1.0990038625736938, "grad_norm": 0.15383990108966827, "learning_rate": 9.016576833112987e-05, "loss": 1.2321, "step": 5406 }, { "epoch": 1.0992071559260013, "grad_norm": 0.1341893970966339, "learning_rate": 9.014542865859859e-05, "loss": 1.0372, "step": 5407 }, { "epoch": 1.0994104492783086, "grad_norm": 0.13906438648700714, "learning_rate": 9.012508898606733e-05, "loss": 0.8928, "step": 5408 }, { "epoch": 1.099613742630616, "grad_norm": 0.14861425757408142, "learning_rate": 9.010474931353605e-05, "loss": 1.0851, "step": 5409 }, { "epoch": 1.0998170359829234, "grad_norm": 0.1484142243862152, "learning_rate": 9.008440964100478e-05, "loss": 1.1029, "step": 5410 }, { "epoch": 1.1000203293352306, "grad_norm": 0.14029765129089355, "learning_rate": 9.00640699684735e-05, "loss": 0.9775, "step": 5411 }, { "epoch": 1.1002236226875381, "grad_norm": 0.1527182012796402, "learning_rate": 9.004373029594224e-05, "loss": 1.0849, "step": 5412 }, { "epoch": 1.1004269160398454, "grad_norm": 0.15019920468330383, "learning_rate": 9.002339062341097e-05, "loss": 0.978, "step": 5413 }, { "epoch": 1.100630209392153, "grad_norm": 0.15240661799907684, "learning_rate": 9.00030509508797e-05, "loss": 1.1096, "step": 5414 }, { "epoch": 1.1008335027444602, "grad_norm": 0.14962786436080933, "learning_rate": 8.998271127834841e-05, "loss": 1.1918, "step": 5415 }, { "epoch": 1.1010367960967677, "grad_norm": 0.13056641817092896, "learning_rate": 8.996237160581715e-05, "loss": 0.8757, "step": 5416 }, { "epoch": 1.101240089449075, "grad_norm": 0.1387203186750412, "learning_rate": 8.994203193328588e-05, "loss": 0.8188, "step": 5417 }, { "epoch": 1.1014433828013823, "grad_norm": 0.15652364492416382, "learning_rate": 8.992169226075462e-05, "loss": 0.9948, "step": 5418 }, { "epoch": 1.1016466761536898, "grad_norm": 0.16937415301799774, "learning_rate": 8.990135258822333e-05, "loss": 1.077, "step": 5419 }, { "epoch": 1.1018499695059971, "grad_norm": 0.1426754593849182, "learning_rate": 8.988101291569206e-05, "loss": 0.9465, "step": 5420 }, { "epoch": 1.1020532628583046, "grad_norm": 0.15397274494171143, "learning_rate": 8.986067324316079e-05, "loss": 1.0617, "step": 5421 }, { "epoch": 1.102256556210612, "grad_norm": 0.13782437145709991, "learning_rate": 8.984033357062953e-05, "loss": 0.9, "step": 5422 }, { "epoch": 1.1024598495629192, "grad_norm": 0.13675148785114288, "learning_rate": 8.981999389809824e-05, "loss": 0.9424, "step": 5423 }, { "epoch": 1.1026631429152267, "grad_norm": 0.15254589915275574, "learning_rate": 8.979965422556698e-05, "loss": 1.0416, "step": 5424 }, { "epoch": 1.102866436267534, "grad_norm": 0.15221473574638367, "learning_rate": 8.97793145530357e-05, "loss": 1.0762, "step": 5425 }, { "epoch": 1.1030697296198415, "grad_norm": 0.16279594600200653, "learning_rate": 8.975897488050444e-05, "loss": 1.116, "step": 5426 }, { "epoch": 1.1032730229721488, "grad_norm": 0.14563637971878052, "learning_rate": 8.973863520797315e-05, "loss": 0.9308, "step": 5427 }, { "epoch": 1.103476316324456, "grad_norm": 0.1332903355360031, "learning_rate": 8.971829553544189e-05, "loss": 0.901, "step": 5428 }, { "epoch": 1.1036796096767636, "grad_norm": 0.16691714525222778, "learning_rate": 8.969795586291061e-05, "loss": 1.1796, "step": 5429 }, { "epoch": 1.1038829030290709, "grad_norm": 0.15106704831123352, "learning_rate": 8.967761619037935e-05, "loss": 1.034, "step": 5430 }, { "epoch": 1.1040861963813784, "grad_norm": 0.14782746136188507, "learning_rate": 8.965727651784806e-05, "loss": 0.9958, "step": 5431 }, { "epoch": 1.1042894897336857, "grad_norm": 0.13478310406208038, "learning_rate": 8.96369368453168e-05, "loss": 0.9949, "step": 5432 }, { "epoch": 1.1044927830859932, "grad_norm": 0.1417469084262848, "learning_rate": 8.961659717278552e-05, "loss": 0.945, "step": 5433 }, { "epoch": 1.1046960764383005, "grad_norm": 0.15904608368873596, "learning_rate": 8.959625750025426e-05, "loss": 0.9804, "step": 5434 }, { "epoch": 1.1048993697906078, "grad_norm": 0.14991609752178192, "learning_rate": 8.957591782772297e-05, "loss": 0.9633, "step": 5435 }, { "epoch": 1.1051026631429153, "grad_norm": 0.14758650958538055, "learning_rate": 8.955557815519171e-05, "loss": 1.2216, "step": 5436 }, { "epoch": 1.1053059564952226, "grad_norm": 0.15049061179161072, "learning_rate": 8.953523848266043e-05, "loss": 0.975, "step": 5437 }, { "epoch": 1.10550924984753, "grad_norm": 0.15753579139709473, "learning_rate": 8.951489881012917e-05, "loss": 1.1518, "step": 5438 }, { "epoch": 1.1057125431998374, "grad_norm": 0.14249707758426666, "learning_rate": 8.949455913759788e-05, "loss": 1.0243, "step": 5439 }, { "epoch": 1.1059158365521446, "grad_norm": 0.14323362708091736, "learning_rate": 8.947421946506662e-05, "loss": 0.9114, "step": 5440 }, { "epoch": 1.1061191299044522, "grad_norm": 0.12730684876441956, "learning_rate": 8.945387979253535e-05, "loss": 0.8369, "step": 5441 }, { "epoch": 1.1063224232567594, "grad_norm": 0.16354194283485413, "learning_rate": 8.943354012000407e-05, "loss": 1.0706, "step": 5442 }, { "epoch": 1.106525716609067, "grad_norm": 0.1542551964521408, "learning_rate": 8.94132004474728e-05, "loss": 1.1125, "step": 5443 }, { "epoch": 1.1067290099613742, "grad_norm": 0.14237554371356964, "learning_rate": 8.939286077494152e-05, "loss": 0.8999, "step": 5444 }, { "epoch": 1.1069323033136818, "grad_norm": 0.15537720918655396, "learning_rate": 8.937252110241026e-05, "loss": 0.9615, "step": 5445 }, { "epoch": 1.107135596665989, "grad_norm": 0.15942874550819397, "learning_rate": 8.935218142987898e-05, "loss": 1.0923, "step": 5446 }, { "epoch": 1.1073388900182963, "grad_norm": 0.12632033228874207, "learning_rate": 8.93318417573477e-05, "loss": 0.8124, "step": 5447 }, { "epoch": 1.1075421833706038, "grad_norm": 0.1381000280380249, "learning_rate": 8.931150208481643e-05, "loss": 0.8588, "step": 5448 }, { "epoch": 1.1077454767229111, "grad_norm": 0.13437995314598083, "learning_rate": 8.929116241228517e-05, "loss": 0.9958, "step": 5449 }, { "epoch": 1.1079487700752186, "grad_norm": 0.14716283977031708, "learning_rate": 8.927082273975389e-05, "loss": 0.9405, "step": 5450 }, { "epoch": 1.108152063427526, "grad_norm": 0.1730317771434784, "learning_rate": 8.925048306722262e-05, "loss": 1.1552, "step": 5451 }, { "epoch": 1.1083553567798332, "grad_norm": 0.1391172856092453, "learning_rate": 8.923014339469134e-05, "loss": 0.8306, "step": 5452 }, { "epoch": 1.1085586501321407, "grad_norm": 0.12834659218788147, "learning_rate": 8.920980372216008e-05, "loss": 0.918, "step": 5453 }, { "epoch": 1.108761943484448, "grad_norm": 0.1469629555940628, "learning_rate": 8.91894640496288e-05, "loss": 1.066, "step": 5454 }, { "epoch": 1.1089652368367555, "grad_norm": 0.15368422865867615, "learning_rate": 8.916912437709753e-05, "loss": 1.097, "step": 5455 }, { "epoch": 1.1091685301890628, "grad_norm": 0.1652909219264984, "learning_rate": 8.914878470456625e-05, "loss": 0.9886, "step": 5456 }, { "epoch": 1.10937182354137, "grad_norm": 0.14498552680015564, "learning_rate": 8.912844503203499e-05, "loss": 0.9856, "step": 5457 }, { "epoch": 1.1095751168936776, "grad_norm": 0.15853099524974823, "learning_rate": 8.910810535950372e-05, "loss": 1.1654, "step": 5458 }, { "epoch": 1.109778410245985, "grad_norm": 0.14158296585083008, "learning_rate": 8.908776568697244e-05, "loss": 0.9035, "step": 5459 }, { "epoch": 1.1099817035982924, "grad_norm": 0.18183912336826324, "learning_rate": 8.906742601444116e-05, "loss": 1.3072, "step": 5460 }, { "epoch": 1.1101849969505997, "grad_norm": 0.13865096867084503, "learning_rate": 8.90470863419099e-05, "loss": 0.8537, "step": 5461 }, { "epoch": 1.110388290302907, "grad_norm": 0.15828418731689453, "learning_rate": 8.902674666937863e-05, "loss": 1.074, "step": 5462 }, { "epoch": 1.1105915836552145, "grad_norm": 0.1492200642824173, "learning_rate": 8.900640699684735e-05, "loss": 0.9791, "step": 5463 }, { "epoch": 1.1107948770075218, "grad_norm": 0.14875096082687378, "learning_rate": 8.898606732431608e-05, "loss": 0.9389, "step": 5464 }, { "epoch": 1.1109981703598293, "grad_norm": 0.1451048105955124, "learning_rate": 8.896572765178481e-05, "loss": 1.0095, "step": 5465 }, { "epoch": 1.1112014637121366, "grad_norm": 0.14733406901359558, "learning_rate": 8.894538797925354e-05, "loss": 0.9441, "step": 5466 }, { "epoch": 1.111404757064444, "grad_norm": 0.15560275316238403, "learning_rate": 8.892504830672226e-05, "loss": 1.0225, "step": 5467 }, { "epoch": 1.1116080504167514, "grad_norm": 0.15289406478405, "learning_rate": 8.890470863419099e-05, "loss": 1.044, "step": 5468 }, { "epoch": 1.1118113437690587, "grad_norm": 0.13992512226104736, "learning_rate": 8.888436896165973e-05, "loss": 0.9947, "step": 5469 }, { "epoch": 1.1120146371213662, "grad_norm": 0.150540292263031, "learning_rate": 8.886402928912845e-05, "loss": 1.0501, "step": 5470 }, { "epoch": 1.1122179304736735, "grad_norm": 0.1405702531337738, "learning_rate": 8.884368961659719e-05, "loss": 0.9497, "step": 5471 }, { "epoch": 1.112421223825981, "grad_norm": 0.15190722048282623, "learning_rate": 8.88233499440659e-05, "loss": 1.016, "step": 5472 }, { "epoch": 1.1126245171782883, "grad_norm": 0.14616787433624268, "learning_rate": 8.880301027153464e-05, "loss": 1.0629, "step": 5473 }, { "epoch": 1.1128278105305958, "grad_norm": 0.13169008493423462, "learning_rate": 8.878267059900336e-05, "loss": 0.9228, "step": 5474 }, { "epoch": 1.113031103882903, "grad_norm": 0.14765764772891998, "learning_rate": 8.87623309264721e-05, "loss": 1.0189, "step": 5475 }, { "epoch": 1.1132343972352103, "grad_norm": 0.1653669774532318, "learning_rate": 8.874199125394081e-05, "loss": 1.292, "step": 5476 }, { "epoch": 1.1134376905875178, "grad_norm": 0.12520195543766022, "learning_rate": 8.872165158140955e-05, "loss": 0.7488, "step": 5477 }, { "epoch": 1.1136409839398251, "grad_norm": 0.15892694890499115, "learning_rate": 8.870131190887827e-05, "loss": 1.1323, "step": 5478 }, { "epoch": 1.1138442772921326, "grad_norm": 0.15398187935352325, "learning_rate": 8.868097223634701e-05, "loss": 1.1707, "step": 5479 }, { "epoch": 1.11404757064444, "grad_norm": 0.15214598178863525, "learning_rate": 8.866063256381572e-05, "loss": 1.0544, "step": 5480 }, { "epoch": 1.1142508639967472, "grad_norm": 0.14790403842926025, "learning_rate": 8.864029289128446e-05, "loss": 1.073, "step": 5481 }, { "epoch": 1.1144541573490547, "grad_norm": 0.1339927762746811, "learning_rate": 8.861995321875318e-05, "loss": 0.8919, "step": 5482 }, { "epoch": 1.114657450701362, "grad_norm": 0.12794196605682373, "learning_rate": 8.859961354622191e-05, "loss": 0.8973, "step": 5483 }, { "epoch": 1.1148607440536695, "grad_norm": 0.1576857566833496, "learning_rate": 8.857927387369063e-05, "loss": 1.0195, "step": 5484 }, { "epoch": 1.1150640374059768, "grad_norm": 0.13559751212596893, "learning_rate": 8.855893420115936e-05, "loss": 0.8857, "step": 5485 }, { "epoch": 1.115267330758284, "grad_norm": 0.15552525222301483, "learning_rate": 8.85385945286281e-05, "loss": 1.1108, "step": 5486 }, { "epoch": 1.1154706241105916, "grad_norm": 0.14692267775535583, "learning_rate": 8.851825485609682e-05, "loss": 0.952, "step": 5487 }, { "epoch": 1.115673917462899, "grad_norm": 0.15186382830142975, "learning_rate": 8.849791518356554e-05, "loss": 1.0116, "step": 5488 }, { "epoch": 1.1158772108152064, "grad_norm": 0.1544073224067688, "learning_rate": 8.847757551103427e-05, "loss": 1.0548, "step": 5489 }, { "epoch": 1.1160805041675137, "grad_norm": 0.149419903755188, "learning_rate": 8.8457235838503e-05, "loss": 1.0116, "step": 5490 }, { "epoch": 1.116283797519821, "grad_norm": 0.14665424823760986, "learning_rate": 8.843689616597173e-05, "loss": 1.0848, "step": 5491 }, { "epoch": 1.1164870908721285, "grad_norm": 0.15898637473583221, "learning_rate": 8.841655649344046e-05, "loss": 1.0112, "step": 5492 }, { "epoch": 1.1166903842244358, "grad_norm": 0.1535964161157608, "learning_rate": 8.839621682090918e-05, "loss": 1.1055, "step": 5493 }, { "epoch": 1.1168936775767433, "grad_norm": 0.1359899640083313, "learning_rate": 8.837587714837792e-05, "loss": 0.9534, "step": 5494 }, { "epoch": 1.1170969709290506, "grad_norm": 0.1600065380334854, "learning_rate": 8.835553747584664e-05, "loss": 1.0518, "step": 5495 }, { "epoch": 1.117300264281358, "grad_norm": 0.15466952323913574, "learning_rate": 8.833519780331537e-05, "loss": 0.9669, "step": 5496 }, { "epoch": 1.1175035576336654, "grad_norm": 0.13147573173046112, "learning_rate": 8.831485813078409e-05, "loss": 0.8511, "step": 5497 }, { "epoch": 1.1177068509859727, "grad_norm": 0.15012076497077942, "learning_rate": 8.829451845825283e-05, "loss": 1.0451, "step": 5498 }, { "epoch": 1.1179101443382802, "grad_norm": 0.16345028579235077, "learning_rate": 8.827417878572155e-05, "loss": 1.3569, "step": 5499 }, { "epoch": 1.1181134376905875, "grad_norm": 0.1560164839029312, "learning_rate": 8.825383911319028e-05, "loss": 1.0785, "step": 5500 }, { "epoch": 1.118316731042895, "grad_norm": 0.16095221042633057, "learning_rate": 8.8233499440659e-05, "loss": 1.1265, "step": 5501 }, { "epoch": 1.1185200243952023, "grad_norm": 0.1449424773454666, "learning_rate": 8.821315976812774e-05, "loss": 1.0265, "step": 5502 }, { "epoch": 1.1187233177475098, "grad_norm": 0.13522765040397644, "learning_rate": 8.819282009559647e-05, "loss": 0.8884, "step": 5503 }, { "epoch": 1.118926611099817, "grad_norm": 0.15144997835159302, "learning_rate": 8.817248042306519e-05, "loss": 1.0492, "step": 5504 }, { "epoch": 1.1191299044521243, "grad_norm": 0.14827972650527954, "learning_rate": 8.815214075053391e-05, "loss": 0.8902, "step": 5505 }, { "epoch": 1.1193331978044319, "grad_norm": 0.15328602492809296, "learning_rate": 8.813180107800265e-05, "loss": 1.035, "step": 5506 }, { "epoch": 1.1195364911567391, "grad_norm": 0.1393328458070755, "learning_rate": 8.811146140547138e-05, "loss": 1.0063, "step": 5507 }, { "epoch": 1.1197397845090467, "grad_norm": 0.15321269631385803, "learning_rate": 8.80911217329401e-05, "loss": 0.8726, "step": 5508 }, { "epoch": 1.119943077861354, "grad_norm": 0.14183679223060608, "learning_rate": 8.807078206040883e-05, "loss": 0.9512, "step": 5509 }, { "epoch": 1.1201463712136612, "grad_norm": 0.14985686540603638, "learning_rate": 8.805044238787756e-05, "loss": 1.065, "step": 5510 }, { "epoch": 1.1203496645659687, "grad_norm": 0.13866209983825684, "learning_rate": 8.803010271534629e-05, "loss": 1.0288, "step": 5511 }, { "epoch": 1.120552957918276, "grad_norm": 0.14831571280956268, "learning_rate": 8.800976304281501e-05, "loss": 0.9417, "step": 5512 }, { "epoch": 1.1207562512705835, "grad_norm": 0.1543184220790863, "learning_rate": 8.798942337028374e-05, "loss": 1.1379, "step": 5513 }, { "epoch": 1.1209595446228908, "grad_norm": 0.14912952482700348, "learning_rate": 8.796908369775248e-05, "loss": 1.0517, "step": 5514 }, { "epoch": 1.1211628379751981, "grad_norm": 0.15913262963294983, "learning_rate": 8.79487440252212e-05, "loss": 1.0256, "step": 5515 }, { "epoch": 1.1213661313275056, "grad_norm": 0.14145322144031525, "learning_rate": 8.792840435268992e-05, "loss": 1.0517, "step": 5516 }, { "epoch": 1.121569424679813, "grad_norm": 0.1385679990053177, "learning_rate": 8.790806468015865e-05, "loss": 0.8375, "step": 5517 }, { "epoch": 1.1217727180321204, "grad_norm": 0.15951962769031525, "learning_rate": 8.788772500762739e-05, "loss": 1.0235, "step": 5518 }, { "epoch": 1.1219760113844277, "grad_norm": 0.14883315563201904, "learning_rate": 8.786738533509611e-05, "loss": 1.1446, "step": 5519 }, { "epoch": 1.122179304736735, "grad_norm": 0.15519806742668152, "learning_rate": 8.784704566256484e-05, "loss": 1.0978, "step": 5520 }, { "epoch": 1.1223825980890425, "grad_norm": 0.1433810442686081, "learning_rate": 8.782670599003356e-05, "loss": 0.9519, "step": 5521 }, { "epoch": 1.1225858914413498, "grad_norm": 0.13667570054531097, "learning_rate": 8.78063663175023e-05, "loss": 0.95, "step": 5522 }, { "epoch": 1.1227891847936573, "grad_norm": 0.1338377296924591, "learning_rate": 8.778602664497102e-05, "loss": 0.9165, "step": 5523 }, { "epoch": 1.1229924781459646, "grad_norm": 0.1531645655632019, "learning_rate": 8.776568697243975e-05, "loss": 1.2018, "step": 5524 }, { "epoch": 1.123195771498272, "grad_norm": 0.15291288495063782, "learning_rate": 8.774534729990847e-05, "loss": 1.1504, "step": 5525 }, { "epoch": 1.1233990648505794, "grad_norm": 0.1413464993238449, "learning_rate": 8.77250076273772e-05, "loss": 0.9624, "step": 5526 }, { "epoch": 1.1236023582028867, "grad_norm": 0.15262098610401154, "learning_rate": 8.770466795484593e-05, "loss": 1.1616, "step": 5527 }, { "epoch": 1.1238056515551942, "grad_norm": 0.1626216322183609, "learning_rate": 8.768432828231466e-05, "loss": 1.2003, "step": 5528 }, { "epoch": 1.1240089449075015, "grad_norm": 0.16189110279083252, "learning_rate": 8.766398860978338e-05, "loss": 1.0696, "step": 5529 }, { "epoch": 1.124212238259809, "grad_norm": 0.14506696164608002, "learning_rate": 8.764364893725211e-05, "loss": 1.0076, "step": 5530 }, { "epoch": 1.1244155316121163, "grad_norm": 0.135748028755188, "learning_rate": 8.762330926472085e-05, "loss": 0.8196, "step": 5531 }, { "epoch": 1.1246188249644238, "grad_norm": 0.15155237913131714, "learning_rate": 8.760296959218957e-05, "loss": 1.0832, "step": 5532 }, { "epoch": 1.124822118316731, "grad_norm": 0.14161360263824463, "learning_rate": 8.75826299196583e-05, "loss": 1.0264, "step": 5533 }, { "epoch": 1.1250254116690384, "grad_norm": 0.12882399559020996, "learning_rate": 8.756229024712702e-05, "loss": 0.8623, "step": 5534 }, { "epoch": 1.1252287050213459, "grad_norm": 0.14270846545696259, "learning_rate": 8.754195057459576e-05, "loss": 1.0163, "step": 5535 }, { "epoch": 1.1254319983736532, "grad_norm": 0.1428549438714981, "learning_rate": 8.752161090206448e-05, "loss": 0.9291, "step": 5536 }, { "epoch": 1.1256352917259607, "grad_norm": 0.13165485858917236, "learning_rate": 8.75012712295332e-05, "loss": 0.8326, "step": 5537 }, { "epoch": 1.125838585078268, "grad_norm": 0.14879177510738373, "learning_rate": 8.748093155700193e-05, "loss": 1.1023, "step": 5538 }, { "epoch": 1.1260418784305752, "grad_norm": 0.150389164686203, "learning_rate": 8.746059188447067e-05, "loss": 0.982, "step": 5539 }, { "epoch": 1.1262451717828827, "grad_norm": 0.16434215009212494, "learning_rate": 8.744025221193939e-05, "loss": 1.0485, "step": 5540 }, { "epoch": 1.12644846513519, "grad_norm": 0.13194431364536285, "learning_rate": 8.741991253940812e-05, "loss": 0.8257, "step": 5541 }, { "epoch": 1.1266517584874975, "grad_norm": 0.1411994993686676, "learning_rate": 8.739957286687684e-05, "loss": 0.9818, "step": 5542 }, { "epoch": 1.1268550518398048, "grad_norm": 0.13878868520259857, "learning_rate": 8.737923319434558e-05, "loss": 0.9704, "step": 5543 }, { "epoch": 1.1270583451921121, "grad_norm": 0.16277877986431122, "learning_rate": 8.73588935218143e-05, "loss": 1.0612, "step": 5544 }, { "epoch": 1.1272616385444196, "grad_norm": 0.16174069046974182, "learning_rate": 8.733855384928303e-05, "loss": 1.2214, "step": 5545 }, { "epoch": 1.127464931896727, "grad_norm": 0.1509084403514862, "learning_rate": 8.731821417675175e-05, "loss": 0.9823, "step": 5546 }, { "epoch": 1.1276682252490344, "grad_norm": 0.1313520222902298, "learning_rate": 8.729787450422049e-05, "loss": 0.8928, "step": 5547 }, { "epoch": 1.1278715186013417, "grad_norm": 0.14811810851097107, "learning_rate": 8.727753483168922e-05, "loss": 1.0147, "step": 5548 }, { "epoch": 1.128074811953649, "grad_norm": 0.16996385157108307, "learning_rate": 8.725719515915794e-05, "loss": 1.1564, "step": 5549 }, { "epoch": 1.1282781053059565, "grad_norm": 0.145395427942276, "learning_rate": 8.723685548662666e-05, "loss": 0.9943, "step": 5550 }, { "epoch": 1.1284813986582638, "grad_norm": 0.13872261345386505, "learning_rate": 8.72165158140954e-05, "loss": 0.9406, "step": 5551 }, { "epoch": 1.1286846920105713, "grad_norm": 0.15163040161132812, "learning_rate": 8.719617614156413e-05, "loss": 0.9178, "step": 5552 }, { "epoch": 1.1288879853628786, "grad_norm": 0.1619151532649994, "learning_rate": 8.717583646903285e-05, "loss": 1.184, "step": 5553 }, { "epoch": 1.129091278715186, "grad_norm": 0.1568858027458191, "learning_rate": 8.715549679650158e-05, "loss": 1.0981, "step": 5554 }, { "epoch": 1.1292945720674934, "grad_norm": 0.158450186252594, "learning_rate": 8.713515712397031e-05, "loss": 1.1141, "step": 5555 }, { "epoch": 1.1294978654198007, "grad_norm": 0.1398298293352127, "learning_rate": 8.711481745143904e-05, "loss": 0.9325, "step": 5556 }, { "epoch": 1.1297011587721082, "grad_norm": 0.14823031425476074, "learning_rate": 8.709447777890776e-05, "loss": 1.088, "step": 5557 }, { "epoch": 1.1299044521244155, "grad_norm": 0.1620652973651886, "learning_rate": 8.707413810637649e-05, "loss": 1.1094, "step": 5558 }, { "epoch": 1.130107745476723, "grad_norm": 0.15144942700862885, "learning_rate": 8.705379843384522e-05, "loss": 1.05, "step": 5559 }, { "epoch": 1.1303110388290303, "grad_norm": 0.14913499355316162, "learning_rate": 8.703345876131395e-05, "loss": 0.8884, "step": 5560 }, { "epoch": 1.1305143321813378, "grad_norm": 0.13716764748096466, "learning_rate": 8.701311908878267e-05, "loss": 0.96, "step": 5561 }, { "epoch": 1.130717625533645, "grad_norm": 0.14230920374393463, "learning_rate": 8.69927794162514e-05, "loss": 0.9695, "step": 5562 }, { "epoch": 1.1309209188859524, "grad_norm": 0.14691171050071716, "learning_rate": 8.697243974372014e-05, "loss": 1.0266, "step": 5563 }, { "epoch": 1.1311242122382599, "grad_norm": 0.15973179042339325, "learning_rate": 8.695210007118886e-05, "loss": 1.1036, "step": 5564 }, { "epoch": 1.1313275055905672, "grad_norm": 0.14725075662136078, "learning_rate": 8.693176039865759e-05, "loss": 1.0159, "step": 5565 }, { "epoch": 1.1315307989428747, "grad_norm": 0.15641410648822784, "learning_rate": 8.691142072612631e-05, "loss": 1.11, "step": 5566 }, { "epoch": 1.131734092295182, "grad_norm": 0.12851740419864655, "learning_rate": 8.689108105359503e-05, "loss": 0.8651, "step": 5567 }, { "epoch": 1.1319373856474892, "grad_norm": 0.15197080373764038, "learning_rate": 8.687074138106377e-05, "loss": 0.9718, "step": 5568 }, { "epoch": 1.1321406789997968, "grad_norm": 0.1637222021818161, "learning_rate": 8.68504017085325e-05, "loss": 1.2166, "step": 5569 }, { "epoch": 1.132343972352104, "grad_norm": 0.1471412032842636, "learning_rate": 8.683006203600122e-05, "loss": 1.0772, "step": 5570 }, { "epoch": 1.1325472657044116, "grad_norm": 0.1441943347454071, "learning_rate": 8.680972236346995e-05, "loss": 1.0778, "step": 5571 }, { "epoch": 1.1327505590567188, "grad_norm": 0.13848277926445007, "learning_rate": 8.678938269093868e-05, "loss": 0.9954, "step": 5572 }, { "epoch": 1.1329538524090261, "grad_norm": 0.15798182785511017, "learning_rate": 8.676904301840741e-05, "loss": 1.1491, "step": 5573 }, { "epoch": 1.1331571457613336, "grad_norm": 0.1413598209619522, "learning_rate": 8.674870334587613e-05, "loss": 1.0427, "step": 5574 }, { "epoch": 1.133360439113641, "grad_norm": 0.1488219052553177, "learning_rate": 8.672836367334486e-05, "loss": 1.0375, "step": 5575 }, { "epoch": 1.1335637324659484, "grad_norm": 0.16564448177814484, "learning_rate": 8.67080240008136e-05, "loss": 1.2393, "step": 5576 }, { "epoch": 1.1337670258182557, "grad_norm": 0.13589560985565186, "learning_rate": 8.668768432828232e-05, "loss": 0.8928, "step": 5577 }, { "epoch": 1.133970319170563, "grad_norm": 0.140016570687294, "learning_rate": 8.666734465575104e-05, "loss": 0.9078, "step": 5578 }, { "epoch": 1.1341736125228705, "grad_norm": 0.1660967916250229, "learning_rate": 8.664700498321977e-05, "loss": 1.1746, "step": 5579 }, { "epoch": 1.1343769058751778, "grad_norm": 0.14648735523223877, "learning_rate": 8.66266653106885e-05, "loss": 1.0231, "step": 5580 }, { "epoch": 1.1345801992274853, "grad_norm": 0.15857502818107605, "learning_rate": 8.660632563815723e-05, "loss": 1.1395, "step": 5581 }, { "epoch": 1.1347834925797926, "grad_norm": 0.15594516694545746, "learning_rate": 8.658598596562596e-05, "loss": 1.0801, "step": 5582 }, { "epoch": 1.1349867859321001, "grad_norm": 0.1439754217863083, "learning_rate": 8.656564629309468e-05, "loss": 0.9435, "step": 5583 }, { "epoch": 1.1351900792844074, "grad_norm": 0.1530708372592926, "learning_rate": 8.654530662056342e-05, "loss": 1.0227, "step": 5584 }, { "epoch": 1.1353933726367147, "grad_norm": 0.15302684903144836, "learning_rate": 8.652496694803214e-05, "loss": 1.0522, "step": 5585 }, { "epoch": 1.1355966659890222, "grad_norm": 0.15681840479373932, "learning_rate": 8.650462727550087e-05, "loss": 1.117, "step": 5586 }, { "epoch": 1.1357999593413295, "grad_norm": 0.14197994768619537, "learning_rate": 8.648428760296959e-05, "loss": 0.9114, "step": 5587 }, { "epoch": 1.136003252693637, "grad_norm": 0.15581487119197845, "learning_rate": 8.646394793043833e-05, "loss": 1.0896, "step": 5588 }, { "epoch": 1.1362065460459443, "grad_norm": 0.15578950941562653, "learning_rate": 8.644360825790705e-05, "loss": 1.0771, "step": 5589 }, { "epoch": 1.1364098393982518, "grad_norm": 0.15608197450637817, "learning_rate": 8.642326858537578e-05, "loss": 1.0018, "step": 5590 }, { "epoch": 1.136613132750559, "grad_norm": 0.14285367727279663, "learning_rate": 8.64029289128445e-05, "loss": 0.9629, "step": 5591 }, { "epoch": 1.1368164261028664, "grad_norm": 0.14336615800857544, "learning_rate": 8.638258924031324e-05, "loss": 1.0679, "step": 5592 }, { "epoch": 1.1370197194551739, "grad_norm": 0.13040810823440552, "learning_rate": 8.636224956778197e-05, "loss": 0.898, "step": 5593 }, { "epoch": 1.1372230128074812, "grad_norm": 0.14586962759494781, "learning_rate": 8.634190989525069e-05, "loss": 1.0594, "step": 5594 }, { "epoch": 1.1374263061597887, "grad_norm": 0.15765921771526337, "learning_rate": 8.632157022271941e-05, "loss": 1.0844, "step": 5595 }, { "epoch": 1.137629599512096, "grad_norm": 0.14407019317150116, "learning_rate": 8.630123055018815e-05, "loss": 1.0996, "step": 5596 }, { "epoch": 1.1378328928644033, "grad_norm": 0.16062206029891968, "learning_rate": 8.628089087765688e-05, "loss": 1.1719, "step": 5597 }, { "epoch": 1.1380361862167108, "grad_norm": 0.17504040896892548, "learning_rate": 8.62605512051256e-05, "loss": 1.1719, "step": 5598 }, { "epoch": 1.138239479569018, "grad_norm": 0.15141957998275757, "learning_rate": 8.624021153259433e-05, "loss": 1.064, "step": 5599 }, { "epoch": 1.1384427729213256, "grad_norm": 0.15264716744422913, "learning_rate": 8.621987186006306e-05, "loss": 1.0841, "step": 5600 }, { "epoch": 1.1386460662736329, "grad_norm": 0.16055592894554138, "learning_rate": 8.619953218753179e-05, "loss": 1.1176, "step": 5601 }, { "epoch": 1.1388493596259401, "grad_norm": 0.15006980299949646, "learning_rate": 8.617919251500051e-05, "loss": 1.161, "step": 5602 }, { "epoch": 1.1390526529782476, "grad_norm": 0.17045535147190094, "learning_rate": 8.615885284246924e-05, "loss": 1.2455, "step": 5603 }, { "epoch": 1.139255946330555, "grad_norm": 0.12832573056221008, "learning_rate": 8.613851316993797e-05, "loss": 0.8092, "step": 5604 }, { "epoch": 1.1394592396828624, "grad_norm": 0.13745355606079102, "learning_rate": 8.61181734974067e-05, "loss": 0.9675, "step": 5605 }, { "epoch": 1.1396625330351697, "grad_norm": 0.14315581321716309, "learning_rate": 8.609783382487542e-05, "loss": 0.9764, "step": 5606 }, { "epoch": 1.139865826387477, "grad_norm": 0.1444985270500183, "learning_rate": 8.607749415234415e-05, "loss": 0.8869, "step": 5607 }, { "epoch": 1.1400691197397845, "grad_norm": 0.1422191560268402, "learning_rate": 8.605715447981287e-05, "loss": 0.963, "step": 5608 }, { "epoch": 1.1402724130920918, "grad_norm": 0.1547440141439438, "learning_rate": 8.603681480728161e-05, "loss": 1.0648, "step": 5609 }, { "epoch": 1.1404757064443993, "grad_norm": 0.14438459277153015, "learning_rate": 8.601647513475034e-05, "loss": 0.9473, "step": 5610 }, { "epoch": 1.1406789997967066, "grad_norm": 0.15834081172943115, "learning_rate": 8.599613546221906e-05, "loss": 1.1726, "step": 5611 }, { "epoch": 1.140882293149014, "grad_norm": 0.15683820843696594, "learning_rate": 8.597579578968778e-05, "loss": 1.1011, "step": 5612 }, { "epoch": 1.1410855865013214, "grad_norm": 0.15861444175243378, "learning_rate": 8.595545611715652e-05, "loss": 1.1875, "step": 5613 }, { "epoch": 1.1412888798536287, "grad_norm": 0.14514437317848206, "learning_rate": 8.593511644462525e-05, "loss": 0.9358, "step": 5614 }, { "epoch": 1.1414921732059362, "grad_norm": 0.14084678888320923, "learning_rate": 8.591477677209397e-05, "loss": 0.9222, "step": 5615 }, { "epoch": 1.1416954665582435, "grad_norm": 0.15795862674713135, "learning_rate": 8.58944370995627e-05, "loss": 0.8645, "step": 5616 }, { "epoch": 1.141898759910551, "grad_norm": 0.16617043316364288, "learning_rate": 8.587409742703143e-05, "loss": 1.0558, "step": 5617 }, { "epoch": 1.1421020532628583, "grad_norm": 0.1414622664451599, "learning_rate": 8.585375775450016e-05, "loss": 0.9047, "step": 5618 }, { "epoch": 1.1423053466151658, "grad_norm": 0.12412548065185547, "learning_rate": 8.583341808196888e-05, "loss": 0.9193, "step": 5619 }, { "epoch": 1.142508639967473, "grad_norm": 0.14441967010498047, "learning_rate": 8.581307840943761e-05, "loss": 1.0402, "step": 5620 }, { "epoch": 1.1427119333197804, "grad_norm": 0.11869372427463531, "learning_rate": 8.579273873690634e-05, "loss": 0.6884, "step": 5621 }, { "epoch": 1.142915226672088, "grad_norm": 0.14870472252368927, "learning_rate": 8.577239906437507e-05, "loss": 1.0158, "step": 5622 }, { "epoch": 1.1431185200243952, "grad_norm": 0.1534339189529419, "learning_rate": 8.57520593918438e-05, "loss": 1.0615, "step": 5623 }, { "epoch": 1.1433218133767027, "grad_norm": 0.15304234623908997, "learning_rate": 8.573171971931252e-05, "loss": 1.0745, "step": 5624 }, { "epoch": 1.14352510672901, "grad_norm": 0.13723568618297577, "learning_rate": 8.571138004678126e-05, "loss": 0.9799, "step": 5625 }, { "epoch": 1.1437284000813173, "grad_norm": 0.1478075534105301, "learning_rate": 8.569104037424998e-05, "loss": 0.9989, "step": 5626 }, { "epoch": 1.1439316934336248, "grad_norm": 0.1592124104499817, "learning_rate": 8.56707007017187e-05, "loss": 1.1198, "step": 5627 }, { "epoch": 1.144134986785932, "grad_norm": 0.14707691967487335, "learning_rate": 8.565036102918743e-05, "loss": 1.0284, "step": 5628 }, { "epoch": 1.1443382801382396, "grad_norm": 0.16647355258464813, "learning_rate": 8.563002135665617e-05, "loss": 1.1762, "step": 5629 }, { "epoch": 1.1445415734905469, "grad_norm": 0.13492952287197113, "learning_rate": 8.560968168412489e-05, "loss": 0.8634, "step": 5630 }, { "epoch": 1.1447448668428541, "grad_norm": 0.13840839266777039, "learning_rate": 8.558934201159362e-05, "loss": 0.9339, "step": 5631 }, { "epoch": 1.1449481601951617, "grad_norm": 0.1501016616821289, "learning_rate": 8.556900233906234e-05, "loss": 0.9829, "step": 5632 }, { "epoch": 1.145151453547469, "grad_norm": 0.16703301668167114, "learning_rate": 8.554866266653108e-05, "loss": 1.0938, "step": 5633 }, { "epoch": 1.1453547468997765, "grad_norm": 0.14363522827625275, "learning_rate": 8.55283229939998e-05, "loss": 1.0352, "step": 5634 }, { "epoch": 1.1455580402520837, "grad_norm": 0.13370376825332642, "learning_rate": 8.550798332146853e-05, "loss": 0.8742, "step": 5635 }, { "epoch": 1.145761333604391, "grad_norm": 0.1378611922264099, "learning_rate": 8.548764364893725e-05, "loss": 0.9481, "step": 5636 }, { "epoch": 1.1459646269566985, "grad_norm": 0.14585551619529724, "learning_rate": 8.546730397640599e-05, "loss": 0.9109, "step": 5637 }, { "epoch": 1.1461679203090058, "grad_norm": 0.1484040468931198, "learning_rate": 8.544696430387471e-05, "loss": 1.1809, "step": 5638 }, { "epoch": 1.1463712136613133, "grad_norm": 0.14454567432403564, "learning_rate": 8.542662463134344e-05, "loss": 0.9458, "step": 5639 }, { "epoch": 1.1465745070136206, "grad_norm": 0.16539864242076874, "learning_rate": 8.540628495881216e-05, "loss": 1.1604, "step": 5640 }, { "epoch": 1.146777800365928, "grad_norm": 0.14397001266479492, "learning_rate": 8.53859452862809e-05, "loss": 1.0053, "step": 5641 }, { "epoch": 1.1469810937182354, "grad_norm": 0.1477411836385727, "learning_rate": 8.536560561374963e-05, "loss": 0.8505, "step": 5642 }, { "epoch": 1.1471843870705427, "grad_norm": 0.14993907511234283, "learning_rate": 8.534526594121835e-05, "loss": 1.0681, "step": 5643 }, { "epoch": 1.1473876804228502, "grad_norm": 0.1390916258096695, "learning_rate": 8.532492626868708e-05, "loss": 1.0582, "step": 5644 }, { "epoch": 1.1475909737751575, "grad_norm": 0.14765296876430511, "learning_rate": 8.530458659615581e-05, "loss": 0.9839, "step": 5645 }, { "epoch": 1.147794267127465, "grad_norm": 0.14012466371059418, "learning_rate": 8.528424692362454e-05, "loss": 0.8695, "step": 5646 }, { "epoch": 1.1479975604797723, "grad_norm": 0.16302555799484253, "learning_rate": 8.526390725109326e-05, "loss": 1.0865, "step": 5647 }, { "epoch": 1.1482008538320798, "grad_norm": 0.14376793801784515, "learning_rate": 8.524356757856199e-05, "loss": 0.9831, "step": 5648 }, { "epoch": 1.148404147184387, "grad_norm": 0.13375428318977356, "learning_rate": 8.522322790603071e-05, "loss": 0.9391, "step": 5649 }, { "epoch": 1.1486074405366944, "grad_norm": 0.16969521343708038, "learning_rate": 8.520288823349945e-05, "loss": 1.0134, "step": 5650 }, { "epoch": 1.148810733889002, "grad_norm": 0.1528272181749344, "learning_rate": 8.518254856096817e-05, "loss": 1.0294, "step": 5651 }, { "epoch": 1.1490140272413092, "grad_norm": 0.1412121057510376, "learning_rate": 8.51622088884369e-05, "loss": 1.0031, "step": 5652 }, { "epoch": 1.1492173205936167, "grad_norm": 0.15225495398044586, "learning_rate": 8.514186921590562e-05, "loss": 1.1583, "step": 5653 }, { "epoch": 1.149420613945924, "grad_norm": 0.15183210372924805, "learning_rate": 8.512152954337436e-05, "loss": 1.0097, "step": 5654 }, { "epoch": 1.1496239072982313, "grad_norm": 0.15771012008190155, "learning_rate": 8.510118987084308e-05, "loss": 0.9539, "step": 5655 }, { "epoch": 1.1498272006505388, "grad_norm": 0.13775765895843506, "learning_rate": 8.508085019831181e-05, "loss": 0.8771, "step": 5656 }, { "epoch": 1.150030494002846, "grad_norm": 0.15493398904800415, "learning_rate": 8.506051052578053e-05, "loss": 1.0152, "step": 5657 }, { "epoch": 1.1502337873551536, "grad_norm": 0.15068112313747406, "learning_rate": 8.504017085324927e-05, "loss": 1.0763, "step": 5658 }, { "epoch": 1.1504370807074609, "grad_norm": 0.17684713006019592, "learning_rate": 8.5019831180718e-05, "loss": 1.1555, "step": 5659 }, { "epoch": 1.1506403740597682, "grad_norm": 0.15432146191596985, "learning_rate": 8.499949150818672e-05, "loss": 1.0223, "step": 5660 }, { "epoch": 1.1508436674120757, "grad_norm": 0.1458262950181961, "learning_rate": 8.497915183565545e-05, "loss": 1.1111, "step": 5661 }, { "epoch": 1.151046960764383, "grad_norm": 0.14266814291477203, "learning_rate": 8.495881216312418e-05, "loss": 1.058, "step": 5662 }, { "epoch": 1.1512502541166905, "grad_norm": 0.14124521613121033, "learning_rate": 8.493847249059291e-05, "loss": 1.0221, "step": 5663 }, { "epoch": 1.1514535474689978, "grad_norm": 0.13366392254829407, "learning_rate": 8.491813281806163e-05, "loss": 0.8048, "step": 5664 }, { "epoch": 1.151656840821305, "grad_norm": 0.14957614243030548, "learning_rate": 8.489779314553036e-05, "loss": 1.0168, "step": 5665 }, { "epoch": 1.1518601341736125, "grad_norm": 0.15111826360225677, "learning_rate": 8.48774534729991e-05, "loss": 1.0378, "step": 5666 }, { "epoch": 1.1520634275259198, "grad_norm": 0.14935816824436188, "learning_rate": 8.485711380046782e-05, "loss": 1.0891, "step": 5667 }, { "epoch": 1.1522667208782273, "grad_norm": 0.14815278351306915, "learning_rate": 8.483677412793654e-05, "loss": 0.9746, "step": 5668 }, { "epoch": 1.1524700142305346, "grad_norm": 0.14705118536949158, "learning_rate": 8.481643445540527e-05, "loss": 1.1137, "step": 5669 }, { "epoch": 1.152673307582842, "grad_norm": 0.1314670443534851, "learning_rate": 8.4796094782874e-05, "loss": 0.951, "step": 5670 }, { "epoch": 1.1528766009351494, "grad_norm": 0.14406251907348633, "learning_rate": 8.477575511034273e-05, "loss": 1.0297, "step": 5671 }, { "epoch": 1.1530798942874567, "grad_norm": 0.14774449169635773, "learning_rate": 8.475541543781146e-05, "loss": 1.0561, "step": 5672 }, { "epoch": 1.1532831876397642, "grad_norm": 0.1500570923089981, "learning_rate": 8.473507576528018e-05, "loss": 0.8946, "step": 5673 }, { "epoch": 1.1534864809920715, "grad_norm": 0.1514650285243988, "learning_rate": 8.471473609274892e-05, "loss": 1.1588, "step": 5674 }, { "epoch": 1.153689774344379, "grad_norm": 0.14556598663330078, "learning_rate": 8.469439642021764e-05, "loss": 1.0184, "step": 5675 }, { "epoch": 1.1538930676966863, "grad_norm": 0.14652854204177856, "learning_rate": 8.467405674768637e-05, "loss": 0.9221, "step": 5676 }, { "epoch": 1.1540963610489936, "grad_norm": 0.15506261587142944, "learning_rate": 8.465371707515509e-05, "loss": 1.14, "step": 5677 }, { "epoch": 1.1542996544013011, "grad_norm": 0.14113929867744446, "learning_rate": 8.463337740262383e-05, "loss": 0.9412, "step": 5678 }, { "epoch": 1.1545029477536084, "grad_norm": 0.14070037007331848, "learning_rate": 8.461303773009255e-05, "loss": 1.0308, "step": 5679 }, { "epoch": 1.154706241105916, "grad_norm": 0.1333095133304596, "learning_rate": 8.459269805756128e-05, "loss": 0.9542, "step": 5680 }, { "epoch": 1.1549095344582232, "grad_norm": 0.15399445593357086, "learning_rate": 8.457235838503e-05, "loss": 0.9656, "step": 5681 }, { "epoch": 1.1551128278105307, "grad_norm": 0.14313603937625885, "learning_rate": 8.455201871249874e-05, "loss": 1.0213, "step": 5682 }, { "epoch": 1.155316121162838, "grad_norm": 0.1357993334531784, "learning_rate": 8.453167903996746e-05, "loss": 0.8993, "step": 5683 }, { "epoch": 1.1555194145151453, "grad_norm": 0.14041830599308014, "learning_rate": 8.451133936743619e-05, "loss": 0.9714, "step": 5684 }, { "epoch": 1.1557227078674528, "grad_norm": 0.13149768114089966, "learning_rate": 8.449099969490491e-05, "loss": 0.8474, "step": 5685 }, { "epoch": 1.15592600121976, "grad_norm": 0.15044553577899933, "learning_rate": 8.447066002237365e-05, "loss": 1.0609, "step": 5686 }, { "epoch": 1.1561292945720676, "grad_norm": 0.1325349062681198, "learning_rate": 8.445032034984238e-05, "loss": 0.9539, "step": 5687 }, { "epoch": 1.1563325879243749, "grad_norm": 0.1530589908361435, "learning_rate": 8.44299806773111e-05, "loss": 1.1166, "step": 5688 }, { "epoch": 1.1565358812766822, "grad_norm": 0.14407488703727722, "learning_rate": 8.440964100477983e-05, "loss": 0.9873, "step": 5689 }, { "epoch": 1.1567391746289897, "grad_norm": 0.15614476799964905, "learning_rate": 8.438930133224856e-05, "loss": 1.1084, "step": 5690 }, { "epoch": 1.156942467981297, "grad_norm": 0.15673379600048065, "learning_rate": 8.436896165971729e-05, "loss": 1.1468, "step": 5691 }, { "epoch": 1.1571457613336045, "grad_norm": 0.16642117500305176, "learning_rate": 8.4348621987186e-05, "loss": 1.1427, "step": 5692 }, { "epoch": 1.1573490546859118, "grad_norm": 0.11349500715732574, "learning_rate": 8.432828231465474e-05, "loss": 0.7629, "step": 5693 }, { "epoch": 1.157552348038219, "grad_norm": 0.15668603777885437, "learning_rate": 8.430794264212346e-05, "loss": 1.1667, "step": 5694 }, { "epoch": 1.1577556413905266, "grad_norm": 0.13904212415218353, "learning_rate": 8.42876029695922e-05, "loss": 0.9752, "step": 5695 }, { "epoch": 1.1579589347428338, "grad_norm": 0.15101003646850586, "learning_rate": 8.426726329706091e-05, "loss": 0.9314, "step": 5696 }, { "epoch": 1.1581622280951414, "grad_norm": 0.1544020175933838, "learning_rate": 8.424692362452965e-05, "loss": 1.0759, "step": 5697 }, { "epoch": 1.1583655214474486, "grad_norm": 0.15711095929145813, "learning_rate": 8.422658395199837e-05, "loss": 1.0702, "step": 5698 }, { "epoch": 1.158568814799756, "grad_norm": 0.1422794610261917, "learning_rate": 8.420624427946711e-05, "loss": 0.9433, "step": 5699 }, { "epoch": 1.1587721081520634, "grad_norm": 0.15745525062084198, "learning_rate": 8.418590460693583e-05, "loss": 1.0318, "step": 5700 }, { "epoch": 1.1589754015043707, "grad_norm": 0.1667427271604538, "learning_rate": 8.416556493440456e-05, "loss": 1.2325, "step": 5701 }, { "epoch": 1.1591786948566782, "grad_norm": 0.16718730330467224, "learning_rate": 8.414522526187328e-05, "loss": 1.259, "step": 5702 }, { "epoch": 1.1593819882089855, "grad_norm": 0.15154454112052917, "learning_rate": 8.412488558934202e-05, "loss": 1.1235, "step": 5703 }, { "epoch": 1.159585281561293, "grad_norm": 0.13686643540859222, "learning_rate": 8.410454591681075e-05, "loss": 0.9764, "step": 5704 }, { "epoch": 1.1597885749136003, "grad_norm": 0.1651173233985901, "learning_rate": 8.408420624427947e-05, "loss": 1.2071, "step": 5705 }, { "epoch": 1.1599918682659076, "grad_norm": 0.15027135610580444, "learning_rate": 8.40638665717482e-05, "loss": 0.999, "step": 5706 }, { "epoch": 1.1601951616182151, "grad_norm": 0.153062641620636, "learning_rate": 8.404352689921693e-05, "loss": 1.0434, "step": 5707 }, { "epoch": 1.1603984549705224, "grad_norm": 0.1480168104171753, "learning_rate": 8.402318722668566e-05, "loss": 1.042, "step": 5708 }, { "epoch": 1.16060174832283, "grad_norm": 0.14577078819274902, "learning_rate": 8.400284755415438e-05, "loss": 0.9825, "step": 5709 }, { "epoch": 1.1608050416751372, "grad_norm": 0.1533634513616562, "learning_rate": 8.39825078816231e-05, "loss": 1.0282, "step": 5710 }, { "epoch": 1.1610083350274447, "grad_norm": 0.16153539717197418, "learning_rate": 8.396216820909184e-05, "loss": 1.0728, "step": 5711 }, { "epoch": 1.161211628379752, "grad_norm": 0.17193733155727386, "learning_rate": 8.394182853656057e-05, "loss": 1.0292, "step": 5712 }, { "epoch": 1.1614149217320593, "grad_norm": 0.142516627907753, "learning_rate": 8.39214888640293e-05, "loss": 0.892, "step": 5713 }, { "epoch": 1.1616182150843668, "grad_norm": 0.1397552192211151, "learning_rate": 8.390114919149802e-05, "loss": 0.9634, "step": 5714 }, { "epoch": 1.161821508436674, "grad_norm": 0.15920226275920868, "learning_rate": 8.388080951896676e-05, "loss": 1.124, "step": 5715 }, { "epoch": 1.1620248017889816, "grad_norm": 0.15637387335300446, "learning_rate": 8.386046984643548e-05, "loss": 1.0682, "step": 5716 }, { "epoch": 1.1622280951412889, "grad_norm": 0.136688694357872, "learning_rate": 8.38401301739042e-05, "loss": 0.902, "step": 5717 }, { "epoch": 1.1624313884935962, "grad_norm": 0.15799644589424133, "learning_rate": 8.381979050137293e-05, "loss": 1.1659, "step": 5718 }, { "epoch": 1.1626346818459037, "grad_norm": 0.12915347516536713, "learning_rate": 8.379945082884167e-05, "loss": 0.7916, "step": 5719 }, { "epoch": 1.162837975198211, "grad_norm": 0.1522664725780487, "learning_rate": 8.377911115631039e-05, "loss": 1.1851, "step": 5720 }, { "epoch": 1.1630412685505185, "grad_norm": 0.16952194273471832, "learning_rate": 8.375877148377912e-05, "loss": 1.1392, "step": 5721 }, { "epoch": 1.1632445619028258, "grad_norm": 0.14386901259422302, "learning_rate": 8.373843181124784e-05, "loss": 1.0165, "step": 5722 }, { "epoch": 1.163447855255133, "grad_norm": 0.15671277046203613, "learning_rate": 8.371809213871658e-05, "loss": 1.1116, "step": 5723 }, { "epoch": 1.1636511486074406, "grad_norm": 0.1588602215051651, "learning_rate": 8.36977524661853e-05, "loss": 1.1571, "step": 5724 }, { "epoch": 1.1638544419597479, "grad_norm": 0.14656004309654236, "learning_rate": 8.367741279365403e-05, "loss": 0.9233, "step": 5725 }, { "epoch": 1.1640577353120554, "grad_norm": 0.1547871232032776, "learning_rate": 8.365707312112275e-05, "loss": 1.0689, "step": 5726 }, { "epoch": 1.1642610286643627, "grad_norm": 0.15510883927345276, "learning_rate": 8.363673344859149e-05, "loss": 1.0671, "step": 5727 }, { "epoch": 1.16446432201667, "grad_norm": 0.17547018826007843, "learning_rate": 8.361639377606021e-05, "loss": 1.2643, "step": 5728 }, { "epoch": 1.1646676153689774, "grad_norm": 0.12577450275421143, "learning_rate": 8.359605410352894e-05, "loss": 0.7717, "step": 5729 }, { "epoch": 1.1648709087212847, "grad_norm": 0.15457884967327118, "learning_rate": 8.357571443099766e-05, "loss": 1.0971, "step": 5730 }, { "epoch": 1.1650742020735922, "grad_norm": 0.14000065624713898, "learning_rate": 8.35553747584664e-05, "loss": 1.0287, "step": 5731 }, { "epoch": 1.1652774954258995, "grad_norm": 0.14675964415073395, "learning_rate": 8.353503508593513e-05, "loss": 1.1408, "step": 5732 }, { "epoch": 1.165480788778207, "grad_norm": 0.15442916750907898, "learning_rate": 8.351469541340384e-05, "loss": 1.0237, "step": 5733 }, { "epoch": 1.1656840821305143, "grad_norm": 0.13658323884010315, "learning_rate": 8.349435574087257e-05, "loss": 0.8282, "step": 5734 }, { "epoch": 1.1658873754828216, "grad_norm": 0.15540479123592377, "learning_rate": 8.34740160683413e-05, "loss": 1.0655, "step": 5735 }, { "epoch": 1.1660906688351291, "grad_norm": 0.16539782285690308, "learning_rate": 8.345367639581004e-05, "loss": 1.0761, "step": 5736 }, { "epoch": 1.1662939621874364, "grad_norm": 0.1480647623538971, "learning_rate": 8.343333672327875e-05, "loss": 0.9653, "step": 5737 }, { "epoch": 1.166497255539744, "grad_norm": 0.15932734310626984, "learning_rate": 8.341299705074749e-05, "loss": 1.0883, "step": 5738 }, { "epoch": 1.1667005488920512, "grad_norm": 0.1572417914867401, "learning_rate": 8.339265737821621e-05, "loss": 1.1278, "step": 5739 }, { "epoch": 1.1669038422443587, "grad_norm": 0.14322857558727264, "learning_rate": 8.337231770568495e-05, "loss": 1.0414, "step": 5740 }, { "epoch": 1.167107135596666, "grad_norm": 0.13125763833522797, "learning_rate": 8.335197803315366e-05, "loss": 0.9526, "step": 5741 }, { "epoch": 1.1673104289489733, "grad_norm": 0.15495732426643372, "learning_rate": 8.33316383606224e-05, "loss": 1.1722, "step": 5742 }, { "epoch": 1.1675137223012808, "grad_norm": 0.13582062721252441, "learning_rate": 8.331129868809112e-05, "loss": 0.9187, "step": 5743 }, { "epoch": 1.167717015653588, "grad_norm": 0.1391330063343048, "learning_rate": 8.329095901555986e-05, "loss": 0.9192, "step": 5744 }, { "epoch": 1.1679203090058956, "grad_norm": 0.13935451209545135, "learning_rate": 8.327061934302857e-05, "loss": 0.9838, "step": 5745 }, { "epoch": 1.168123602358203, "grad_norm": 0.14375537633895874, "learning_rate": 8.325027967049731e-05, "loss": 1.0435, "step": 5746 }, { "epoch": 1.1683268957105102, "grad_norm": 0.15256935358047485, "learning_rate": 8.322993999796603e-05, "loss": 1.095, "step": 5747 }, { "epoch": 1.1685301890628177, "grad_norm": 0.15851211547851562, "learning_rate": 8.320960032543477e-05, "loss": 1.0523, "step": 5748 }, { "epoch": 1.168733482415125, "grad_norm": 0.14433102309703827, "learning_rate": 8.318926065290348e-05, "loss": 0.9839, "step": 5749 }, { "epoch": 1.1689367757674325, "grad_norm": 0.1549297720193863, "learning_rate": 8.316892098037222e-05, "loss": 1.0124, "step": 5750 }, { "epoch": 1.1691400691197398, "grad_norm": 0.15880125761032104, "learning_rate": 8.314858130784094e-05, "loss": 1.1537, "step": 5751 }, { "epoch": 1.169343362472047, "grad_norm": 0.15532851219177246, "learning_rate": 8.312824163530968e-05, "loss": 0.9927, "step": 5752 }, { "epoch": 1.1695466558243546, "grad_norm": 0.16933661699295044, "learning_rate": 8.31079019627784e-05, "loss": 1.1238, "step": 5753 }, { "epoch": 1.1697499491766619, "grad_norm": 0.1438983678817749, "learning_rate": 8.308756229024713e-05, "loss": 1.1148, "step": 5754 }, { "epoch": 1.1699532425289694, "grad_norm": 0.14921659231185913, "learning_rate": 8.306722261771586e-05, "loss": 0.9964, "step": 5755 }, { "epoch": 1.1701565358812767, "grad_norm": 0.14099189639091492, "learning_rate": 8.30468829451846e-05, "loss": 0.9867, "step": 5756 }, { "epoch": 1.170359829233584, "grad_norm": 0.14442093670368195, "learning_rate": 8.302654327265332e-05, "loss": 0.9026, "step": 5757 }, { "epoch": 1.1705631225858915, "grad_norm": 0.1563616544008255, "learning_rate": 8.300620360012204e-05, "loss": 1.0916, "step": 5758 }, { "epoch": 1.1707664159381987, "grad_norm": 0.16063082218170166, "learning_rate": 8.298586392759077e-05, "loss": 1.0803, "step": 5759 }, { "epoch": 1.1709697092905063, "grad_norm": 0.13853336870670319, "learning_rate": 8.29655242550595e-05, "loss": 0.8939, "step": 5760 }, { "epoch": 1.1711730026428135, "grad_norm": 0.14909055829048157, "learning_rate": 8.294518458252823e-05, "loss": 1.0755, "step": 5761 }, { "epoch": 1.171376295995121, "grad_norm": 0.1274290829896927, "learning_rate": 8.292484490999695e-05, "loss": 0.8844, "step": 5762 }, { "epoch": 1.1715795893474283, "grad_norm": 0.144126296043396, "learning_rate": 8.290450523746568e-05, "loss": 0.9573, "step": 5763 }, { "epoch": 1.1717828826997356, "grad_norm": 0.16232167184352875, "learning_rate": 8.288416556493442e-05, "loss": 1.1019, "step": 5764 }, { "epoch": 1.1719861760520431, "grad_norm": 0.14691004157066345, "learning_rate": 8.286382589240314e-05, "loss": 1.0463, "step": 5765 }, { "epoch": 1.1721894694043504, "grad_norm": 0.148517444729805, "learning_rate": 8.284348621987187e-05, "loss": 1.115, "step": 5766 }, { "epoch": 1.172392762756658, "grad_norm": 0.1558620035648346, "learning_rate": 8.282314654734059e-05, "loss": 1.1398, "step": 5767 }, { "epoch": 1.1725960561089652, "grad_norm": 0.1335366815328598, "learning_rate": 8.280280687480933e-05, "loss": 0.8479, "step": 5768 }, { "epoch": 1.1727993494612727, "grad_norm": 0.16315288841724396, "learning_rate": 8.278246720227805e-05, "loss": 1.0577, "step": 5769 }, { "epoch": 1.17300264281358, "grad_norm": 0.15037906169891357, "learning_rate": 8.276212752974678e-05, "loss": 1.0101, "step": 5770 }, { "epoch": 1.1732059361658873, "grad_norm": 0.14638791978359222, "learning_rate": 8.27417878572155e-05, "loss": 1.003, "step": 5771 }, { "epoch": 1.1734092295181948, "grad_norm": 0.1514301598072052, "learning_rate": 8.272144818468424e-05, "loss": 0.9576, "step": 5772 }, { "epoch": 1.173612522870502, "grad_norm": 0.13897733390331268, "learning_rate": 8.270110851215296e-05, "loss": 1.0559, "step": 5773 }, { "epoch": 1.1738158162228096, "grad_norm": 0.15974655747413635, "learning_rate": 8.268076883962168e-05, "loss": 1.0193, "step": 5774 }, { "epoch": 1.174019109575117, "grad_norm": 0.15432977676391602, "learning_rate": 8.266042916709041e-05, "loss": 1.0128, "step": 5775 }, { "epoch": 1.1742224029274242, "grad_norm": 0.15510208904743195, "learning_rate": 8.264008949455914e-05, "loss": 1.105, "step": 5776 }, { "epoch": 1.1744256962797317, "grad_norm": 0.14643555879592896, "learning_rate": 8.261974982202788e-05, "loss": 1.0362, "step": 5777 }, { "epoch": 1.174628989632039, "grad_norm": 0.1518491804599762, "learning_rate": 8.259941014949659e-05, "loss": 0.9863, "step": 5778 }, { "epoch": 1.1748322829843465, "grad_norm": 0.16607214510440826, "learning_rate": 8.257907047696532e-05, "loss": 1.1544, "step": 5779 }, { "epoch": 1.1750355763366538, "grad_norm": 0.14509828388690948, "learning_rate": 8.255873080443405e-05, "loss": 0.9395, "step": 5780 }, { "epoch": 1.175238869688961, "grad_norm": 0.1378895491361618, "learning_rate": 8.253839113190279e-05, "loss": 0.9349, "step": 5781 }, { "epoch": 1.1754421630412686, "grad_norm": 0.14478927850723267, "learning_rate": 8.25180514593715e-05, "loss": 1.0171, "step": 5782 }, { "epoch": 1.1756454563935759, "grad_norm": 0.1440618932247162, "learning_rate": 8.249771178684024e-05, "loss": 1.0032, "step": 5783 }, { "epoch": 1.1758487497458834, "grad_norm": 0.15416118502616882, "learning_rate": 8.247737211430896e-05, "loss": 1.0134, "step": 5784 }, { "epoch": 1.1760520430981907, "grad_norm": 0.1631413996219635, "learning_rate": 8.24570324417777e-05, "loss": 1.0369, "step": 5785 }, { "epoch": 1.176255336450498, "grad_norm": 0.1420244723558426, "learning_rate": 8.243669276924641e-05, "loss": 1.0298, "step": 5786 }, { "epoch": 1.1764586298028055, "grad_norm": 0.14719687402248383, "learning_rate": 8.241635309671515e-05, "loss": 1.0469, "step": 5787 }, { "epoch": 1.1766619231551128, "grad_norm": 0.1576554775238037, "learning_rate": 8.239601342418387e-05, "loss": 1.0751, "step": 5788 }, { "epoch": 1.1768652165074203, "grad_norm": 0.14992351830005646, "learning_rate": 8.237567375165261e-05, "loss": 1.0858, "step": 5789 }, { "epoch": 1.1770685098597276, "grad_norm": 0.1365818977355957, "learning_rate": 8.235533407912132e-05, "loss": 0.9918, "step": 5790 }, { "epoch": 1.177271803212035, "grad_norm": 0.1521523892879486, "learning_rate": 8.233499440659006e-05, "loss": 1.1572, "step": 5791 }, { "epoch": 1.1774750965643423, "grad_norm": 0.15335099399089813, "learning_rate": 8.231465473405878e-05, "loss": 1.1044, "step": 5792 }, { "epoch": 1.1776783899166496, "grad_norm": 0.15510833263397217, "learning_rate": 8.229431506152752e-05, "loss": 1.014, "step": 5793 }, { "epoch": 1.1778816832689571, "grad_norm": 0.15188954770565033, "learning_rate": 8.227397538899623e-05, "loss": 1.0646, "step": 5794 }, { "epoch": 1.1780849766212644, "grad_norm": 0.14498646557331085, "learning_rate": 8.225363571646497e-05, "loss": 0.8946, "step": 5795 }, { "epoch": 1.178288269973572, "grad_norm": 0.16170433163642883, "learning_rate": 8.22332960439337e-05, "loss": 1.1044, "step": 5796 }, { "epoch": 1.1784915633258792, "grad_norm": 0.14234782755374908, "learning_rate": 8.221295637140243e-05, "loss": 1.0112, "step": 5797 }, { "epoch": 1.1786948566781867, "grad_norm": 0.15361133217811584, "learning_rate": 8.219261669887114e-05, "loss": 1.163, "step": 5798 }, { "epoch": 1.178898150030494, "grad_norm": 0.1423853635787964, "learning_rate": 8.217227702633988e-05, "loss": 1.0082, "step": 5799 }, { "epoch": 1.1791014433828013, "grad_norm": 0.14732852578163147, "learning_rate": 8.21519373538086e-05, "loss": 1.0438, "step": 5800 }, { "epoch": 1.1793047367351088, "grad_norm": 0.15599720180034637, "learning_rate": 8.213159768127734e-05, "loss": 1.1768, "step": 5801 }, { "epoch": 1.1795080300874161, "grad_norm": 0.14181433618068695, "learning_rate": 8.211125800874606e-05, "loss": 1.0371, "step": 5802 }, { "epoch": 1.1797113234397236, "grad_norm": 0.14503152668476105, "learning_rate": 8.209091833621479e-05, "loss": 0.9382, "step": 5803 }, { "epoch": 1.179914616792031, "grad_norm": 0.1486697643995285, "learning_rate": 8.207057866368352e-05, "loss": 1.0448, "step": 5804 }, { "epoch": 1.1801179101443382, "grad_norm": 0.13623777031898499, "learning_rate": 8.205023899115226e-05, "loss": 0.8852, "step": 5805 }, { "epoch": 1.1803212034966457, "grad_norm": 0.16604407131671906, "learning_rate": 8.202989931862097e-05, "loss": 1.1318, "step": 5806 }, { "epoch": 1.180524496848953, "grad_norm": 0.14802932739257812, "learning_rate": 8.20095596460897e-05, "loss": 0.9212, "step": 5807 }, { "epoch": 1.1807277902012605, "grad_norm": 0.1431393027305603, "learning_rate": 8.198921997355843e-05, "loss": 0.994, "step": 5808 }, { "epoch": 1.1809310835535678, "grad_norm": 0.164002925157547, "learning_rate": 8.196888030102717e-05, "loss": 1.1154, "step": 5809 }, { "epoch": 1.181134376905875, "grad_norm": 0.15111534297466278, "learning_rate": 8.194854062849588e-05, "loss": 1.0558, "step": 5810 }, { "epoch": 1.1813376702581826, "grad_norm": 0.1695423573255539, "learning_rate": 8.192820095596462e-05, "loss": 1.1482, "step": 5811 }, { "epoch": 1.1815409636104899, "grad_norm": 0.13451817631721497, "learning_rate": 8.190786128343334e-05, "loss": 0.976, "step": 5812 }, { "epoch": 1.1817442569627974, "grad_norm": 0.14814557135105133, "learning_rate": 8.188752161090208e-05, "loss": 1.0552, "step": 5813 }, { "epoch": 1.1819475503151047, "grad_norm": 0.1545884907245636, "learning_rate": 8.18671819383708e-05, "loss": 1.0192, "step": 5814 }, { "epoch": 1.182150843667412, "grad_norm": 0.14275769889354706, "learning_rate": 8.184684226583951e-05, "loss": 0.9805, "step": 5815 }, { "epoch": 1.1823541370197195, "grad_norm": 0.1415022313594818, "learning_rate": 8.182650259330825e-05, "loss": 0.9754, "step": 5816 }, { "epoch": 1.1825574303720268, "grad_norm": 0.1614035665988922, "learning_rate": 8.180616292077698e-05, "loss": 1.1702, "step": 5817 }, { "epoch": 1.1827607237243343, "grad_norm": 0.17317655682563782, "learning_rate": 8.178582324824571e-05, "loss": 1.2551, "step": 5818 }, { "epoch": 1.1829640170766416, "grad_norm": 0.1425011307001114, "learning_rate": 8.176548357571443e-05, "loss": 0.9335, "step": 5819 }, { "epoch": 1.1831673104289488, "grad_norm": 0.14078962802886963, "learning_rate": 8.174514390318316e-05, "loss": 0.9349, "step": 5820 }, { "epoch": 1.1833706037812564, "grad_norm": 0.1340751200914383, "learning_rate": 8.172480423065189e-05, "loss": 0.8411, "step": 5821 }, { "epoch": 1.1835738971335636, "grad_norm": 0.151747927069664, "learning_rate": 8.170446455812063e-05, "loss": 0.9972, "step": 5822 }, { "epoch": 1.1837771904858712, "grad_norm": 0.1423603892326355, "learning_rate": 8.168412488558934e-05, "loss": 0.9361, "step": 5823 }, { "epoch": 1.1839804838381784, "grad_norm": 0.14710086584091187, "learning_rate": 8.166378521305807e-05, "loss": 0.9928, "step": 5824 }, { "epoch": 1.184183777190486, "grad_norm": 0.15306688845157623, "learning_rate": 8.16434455405268e-05, "loss": 1.0885, "step": 5825 }, { "epoch": 1.1843870705427932, "grad_norm": 0.13832566142082214, "learning_rate": 8.162310586799554e-05, "loss": 0.9198, "step": 5826 }, { "epoch": 1.1845903638951008, "grad_norm": 0.15482375025749207, "learning_rate": 8.160276619546425e-05, "loss": 0.9793, "step": 5827 }, { "epoch": 1.184793657247408, "grad_norm": 0.15048684179782867, "learning_rate": 8.158242652293299e-05, "loss": 0.9304, "step": 5828 }, { "epoch": 1.1849969505997153, "grad_norm": 0.13934911787509918, "learning_rate": 8.156208685040171e-05, "loss": 0.8829, "step": 5829 }, { "epoch": 1.1852002439520228, "grad_norm": 0.132803812623024, "learning_rate": 8.154174717787045e-05, "loss": 0.9299, "step": 5830 }, { "epoch": 1.1854035373043301, "grad_norm": 0.15293893218040466, "learning_rate": 8.152140750533916e-05, "loss": 0.9339, "step": 5831 }, { "epoch": 1.1856068306566376, "grad_norm": 0.1575455665588379, "learning_rate": 8.15010678328079e-05, "loss": 1.1541, "step": 5832 }, { "epoch": 1.185810124008945, "grad_norm": 0.15015073120594025, "learning_rate": 8.148072816027662e-05, "loss": 0.9863, "step": 5833 }, { "epoch": 1.1860134173612522, "grad_norm": 0.1547766476869583, "learning_rate": 8.146038848774536e-05, "loss": 1.0668, "step": 5834 }, { "epoch": 1.1862167107135597, "grad_norm": 0.1677473932504654, "learning_rate": 8.144004881521407e-05, "loss": 1.1909, "step": 5835 }, { "epoch": 1.186420004065867, "grad_norm": 0.15054230391979218, "learning_rate": 8.141970914268281e-05, "loss": 0.9628, "step": 5836 }, { "epoch": 1.1866232974181745, "grad_norm": 0.14739026129245758, "learning_rate": 8.139936947015153e-05, "loss": 1.0336, "step": 5837 }, { "epoch": 1.1868265907704818, "grad_norm": 0.14510677754878998, "learning_rate": 8.137902979762027e-05, "loss": 1.0433, "step": 5838 }, { "epoch": 1.187029884122789, "grad_norm": 0.15220728516578674, "learning_rate": 8.135869012508898e-05, "loss": 0.98, "step": 5839 }, { "epoch": 1.1872331774750966, "grad_norm": 0.1537727415561676, "learning_rate": 8.133835045255772e-05, "loss": 1.0428, "step": 5840 }, { "epoch": 1.187436470827404, "grad_norm": 0.14347486197948456, "learning_rate": 8.131801078002644e-05, "loss": 0.9355, "step": 5841 }, { "epoch": 1.1876397641797114, "grad_norm": 0.15678545832633972, "learning_rate": 8.129767110749518e-05, "loss": 1.1894, "step": 5842 }, { "epoch": 1.1878430575320187, "grad_norm": 0.14617744088172913, "learning_rate": 8.12773314349639e-05, "loss": 0.9334, "step": 5843 }, { "epoch": 1.188046350884326, "grad_norm": 0.13615253567695618, "learning_rate": 8.125699176243263e-05, "loss": 0.8091, "step": 5844 }, { "epoch": 1.1882496442366335, "grad_norm": 0.14368019998073578, "learning_rate": 8.123665208990136e-05, "loss": 0.8983, "step": 5845 }, { "epoch": 1.1884529375889408, "grad_norm": 0.15359970927238464, "learning_rate": 8.12163124173701e-05, "loss": 1.0781, "step": 5846 }, { "epoch": 1.1886562309412483, "grad_norm": 0.15584875643253326, "learning_rate": 8.11959727448388e-05, "loss": 1.1816, "step": 5847 }, { "epoch": 1.1888595242935556, "grad_norm": 0.14251692593097687, "learning_rate": 8.117563307230754e-05, "loss": 0.9314, "step": 5848 }, { "epoch": 1.1890628176458629, "grad_norm": 0.14834555983543396, "learning_rate": 8.115529339977627e-05, "loss": 1.004, "step": 5849 }, { "epoch": 1.1892661109981704, "grad_norm": 0.1568392813205719, "learning_rate": 8.1134953727245e-05, "loss": 1.1264, "step": 5850 }, { "epoch": 1.1894694043504777, "grad_norm": 0.16257062554359436, "learning_rate": 8.111461405471372e-05, "loss": 1.1795, "step": 5851 }, { "epoch": 1.1896726977027852, "grad_norm": 0.15817001461982727, "learning_rate": 8.109427438218245e-05, "loss": 1.1681, "step": 5852 }, { "epoch": 1.1898759910550925, "grad_norm": 0.14932581782341003, "learning_rate": 8.107393470965118e-05, "loss": 1.1543, "step": 5853 }, { "epoch": 1.1900792844074, "grad_norm": 0.15669305622577667, "learning_rate": 8.105359503711992e-05, "loss": 1.1768, "step": 5854 }, { "epoch": 1.1902825777597072, "grad_norm": 0.14542458951473236, "learning_rate": 8.103325536458863e-05, "loss": 0.9986, "step": 5855 }, { "epoch": 1.1904858711120148, "grad_norm": 0.17203249037265778, "learning_rate": 8.101291569205737e-05, "loss": 1.1656, "step": 5856 }, { "epoch": 1.190689164464322, "grad_norm": 0.14532865583896637, "learning_rate": 8.099257601952609e-05, "loss": 1.0212, "step": 5857 }, { "epoch": 1.1908924578166293, "grad_norm": 0.1520928293466568, "learning_rate": 8.097223634699481e-05, "loss": 1.1934, "step": 5858 }, { "epoch": 1.1910957511689368, "grad_norm": 0.14724013209342957, "learning_rate": 8.095189667446354e-05, "loss": 0.9037, "step": 5859 }, { "epoch": 1.1912990445212441, "grad_norm": 0.13861322402954102, "learning_rate": 8.093155700193226e-05, "loss": 0.9378, "step": 5860 }, { "epoch": 1.1915023378735516, "grad_norm": 0.15509451925754547, "learning_rate": 8.0911217329401e-05, "loss": 1.1247, "step": 5861 }, { "epoch": 1.191705631225859, "grad_norm": 0.1269523799419403, "learning_rate": 8.089087765686973e-05, "loss": 0.829, "step": 5862 }, { "epoch": 1.1919089245781662, "grad_norm": 0.15172962844371796, "learning_rate": 8.087053798433845e-05, "loss": 0.9655, "step": 5863 }, { "epoch": 1.1921122179304737, "grad_norm": 0.14990444481372833, "learning_rate": 8.085019831180717e-05, "loss": 1.0249, "step": 5864 }, { "epoch": 1.192315511282781, "grad_norm": 0.14400093257427216, "learning_rate": 8.082985863927591e-05, "loss": 0.9218, "step": 5865 }, { "epoch": 1.1925188046350885, "grad_norm": 0.12698093056678772, "learning_rate": 8.080951896674464e-05, "loss": 0.8124, "step": 5866 }, { "epoch": 1.1927220979873958, "grad_norm": 0.13535602390766144, "learning_rate": 8.078917929421336e-05, "loss": 0.9892, "step": 5867 }, { "epoch": 1.192925391339703, "grad_norm": 0.1540592759847641, "learning_rate": 8.076883962168209e-05, "loss": 1.0578, "step": 5868 }, { "epoch": 1.1931286846920106, "grad_norm": 0.1440640687942505, "learning_rate": 8.074849994915082e-05, "loss": 1.0616, "step": 5869 }, { "epoch": 1.193331978044318, "grad_norm": 0.16932588815689087, "learning_rate": 8.072816027661955e-05, "loss": 1.1621, "step": 5870 }, { "epoch": 1.1935352713966254, "grad_norm": 0.1639503538608551, "learning_rate": 8.070782060408827e-05, "loss": 1.1424, "step": 5871 }, { "epoch": 1.1937385647489327, "grad_norm": 0.15571148693561554, "learning_rate": 8.0687480931557e-05, "loss": 1.1034, "step": 5872 }, { "epoch": 1.19394185810124, "grad_norm": 0.1500549167394638, "learning_rate": 8.066714125902574e-05, "loss": 0.921, "step": 5873 }, { "epoch": 1.1941451514535475, "grad_norm": 0.13628308475017548, "learning_rate": 8.064680158649446e-05, "loss": 0.8746, "step": 5874 }, { "epoch": 1.1943484448058548, "grad_norm": 0.14379121363162994, "learning_rate": 8.06264619139632e-05, "loss": 1.0226, "step": 5875 }, { "epoch": 1.1945517381581623, "grad_norm": 0.1455121785402298, "learning_rate": 8.060612224143191e-05, "loss": 1.0618, "step": 5876 }, { "epoch": 1.1947550315104696, "grad_norm": 0.14893971383571625, "learning_rate": 8.058578256890065e-05, "loss": 1.0479, "step": 5877 }, { "epoch": 1.1949583248627769, "grad_norm": 0.17038215696811676, "learning_rate": 8.056544289636937e-05, "loss": 1.1333, "step": 5878 }, { "epoch": 1.1951616182150844, "grad_norm": 0.16060739755630493, "learning_rate": 8.054510322383811e-05, "loss": 1.0709, "step": 5879 }, { "epoch": 1.1953649115673917, "grad_norm": 0.14657901227474213, "learning_rate": 8.052476355130682e-05, "loss": 0.9916, "step": 5880 }, { "epoch": 1.1955682049196992, "grad_norm": 0.1535673588514328, "learning_rate": 8.050442387877556e-05, "loss": 1.0064, "step": 5881 }, { "epoch": 1.1957714982720065, "grad_norm": 0.14547181129455566, "learning_rate": 8.048408420624428e-05, "loss": 1.0492, "step": 5882 }, { "epoch": 1.195974791624314, "grad_norm": 0.14238569140434265, "learning_rate": 8.046374453371302e-05, "loss": 0.9422, "step": 5883 }, { "epoch": 1.1961780849766213, "grad_norm": 0.1523994505405426, "learning_rate": 8.044340486118173e-05, "loss": 1.0175, "step": 5884 }, { "epoch": 1.1963813783289288, "grad_norm": 0.15338194370269775, "learning_rate": 8.042306518865047e-05, "loss": 1.0815, "step": 5885 }, { "epoch": 1.196584671681236, "grad_norm": 0.15962374210357666, "learning_rate": 8.04027255161192e-05, "loss": 1.0765, "step": 5886 }, { "epoch": 1.1967879650335433, "grad_norm": 0.13959115743637085, "learning_rate": 8.038238584358793e-05, "loss": 0.9235, "step": 5887 }, { "epoch": 1.1969912583858509, "grad_norm": 0.15868176519870758, "learning_rate": 8.036204617105664e-05, "loss": 1.0158, "step": 5888 }, { "epoch": 1.1971945517381581, "grad_norm": 0.1576426476240158, "learning_rate": 8.034170649852538e-05, "loss": 1.0851, "step": 5889 }, { "epoch": 1.1973978450904657, "grad_norm": 0.1631205826997757, "learning_rate": 8.03213668259941e-05, "loss": 1.1783, "step": 5890 }, { "epoch": 1.197601138442773, "grad_norm": 0.16545897722244263, "learning_rate": 8.030102715346284e-05, "loss": 1.1077, "step": 5891 }, { "epoch": 1.1978044317950802, "grad_norm": 0.1491439938545227, "learning_rate": 8.028068748093155e-05, "loss": 1.0368, "step": 5892 }, { "epoch": 1.1980077251473877, "grad_norm": 0.15724484622478485, "learning_rate": 8.026034780840029e-05, "loss": 1.0844, "step": 5893 }, { "epoch": 1.198211018499695, "grad_norm": 0.15338502824306488, "learning_rate": 8.024000813586902e-05, "loss": 1.0589, "step": 5894 }, { "epoch": 1.1984143118520025, "grad_norm": 0.1540132313966751, "learning_rate": 8.021966846333776e-05, "loss": 1.0517, "step": 5895 }, { "epoch": 1.1986176052043098, "grad_norm": 0.13505342602729797, "learning_rate": 8.019932879080647e-05, "loss": 0.9513, "step": 5896 }, { "epoch": 1.198820898556617, "grad_norm": 0.1482662409543991, "learning_rate": 8.01789891182752e-05, "loss": 1.0653, "step": 5897 }, { "epoch": 1.1990241919089246, "grad_norm": 0.15476658940315247, "learning_rate": 8.015864944574393e-05, "loss": 1.0626, "step": 5898 }, { "epoch": 1.199227485261232, "grad_norm": 0.14526322484016418, "learning_rate": 8.013830977321265e-05, "loss": 0.9032, "step": 5899 }, { "epoch": 1.1994307786135394, "grad_norm": 0.15483404695987701, "learning_rate": 8.011797010068138e-05, "loss": 1.091, "step": 5900 }, { "epoch": 1.1996340719658467, "grad_norm": 0.15213781595230103, "learning_rate": 8.00976304281501e-05, "loss": 0.9738, "step": 5901 }, { "epoch": 1.199837365318154, "grad_norm": 0.15703366696834564, "learning_rate": 8.007729075561884e-05, "loss": 1.1734, "step": 5902 }, { "epoch": 1.2000406586704615, "grad_norm": 0.14850756525993347, "learning_rate": 8.005695108308756e-05, "loss": 1.0282, "step": 5903 }, { "epoch": 1.2002439520227688, "grad_norm": 0.134627103805542, "learning_rate": 8.003661141055629e-05, "loss": 0.8677, "step": 5904 }, { "epoch": 1.2004472453750763, "grad_norm": 0.16509543359279633, "learning_rate": 8.001627173802501e-05, "loss": 1.1819, "step": 5905 }, { "epoch": 1.2006505387273836, "grad_norm": 0.15316465497016907, "learning_rate": 7.999593206549375e-05, "loss": 1.2094, "step": 5906 }, { "epoch": 1.2008538320796909, "grad_norm": 0.1590406894683838, "learning_rate": 7.997559239296248e-05, "loss": 1.0965, "step": 5907 }, { "epoch": 1.2010571254319984, "grad_norm": 0.13455016911029816, "learning_rate": 7.99552527204312e-05, "loss": 0.9488, "step": 5908 }, { "epoch": 1.2012604187843057, "grad_norm": 0.13477842509746552, "learning_rate": 7.993491304789992e-05, "loss": 0.9218, "step": 5909 }, { "epoch": 1.2014637121366132, "grad_norm": 0.14440806210041046, "learning_rate": 7.991457337536866e-05, "loss": 0.9193, "step": 5910 }, { "epoch": 1.2016670054889205, "grad_norm": 0.15229295194149017, "learning_rate": 7.989423370283739e-05, "loss": 1.0363, "step": 5911 }, { "epoch": 1.201870298841228, "grad_norm": 0.15386514365673065, "learning_rate": 7.987389403030611e-05, "loss": 1.0401, "step": 5912 }, { "epoch": 1.2020735921935353, "grad_norm": 0.14064916968345642, "learning_rate": 7.985355435777484e-05, "loss": 0.944, "step": 5913 }, { "epoch": 1.2022768855458426, "grad_norm": 0.14266358315944672, "learning_rate": 7.983321468524357e-05, "loss": 0.9544, "step": 5914 }, { "epoch": 1.20248017889815, "grad_norm": 0.13964015245437622, "learning_rate": 7.98128750127123e-05, "loss": 0.9036, "step": 5915 }, { "epoch": 1.2026834722504574, "grad_norm": 0.15103323757648468, "learning_rate": 7.979253534018102e-05, "loss": 1.051, "step": 5916 }, { "epoch": 1.2028867656027649, "grad_norm": 0.14747385680675507, "learning_rate": 7.977219566764975e-05, "loss": 0.8971, "step": 5917 }, { "epoch": 1.2030900589550722, "grad_norm": 0.14294147491455078, "learning_rate": 7.975185599511849e-05, "loss": 0.9279, "step": 5918 }, { "epoch": 1.2032933523073797, "grad_norm": 0.13962894678115845, "learning_rate": 7.973151632258721e-05, "loss": 0.9662, "step": 5919 }, { "epoch": 1.203496645659687, "grad_norm": 0.16897960007190704, "learning_rate": 7.971117665005593e-05, "loss": 1.2553, "step": 5920 }, { "epoch": 1.2036999390119942, "grad_norm": 0.1562163382768631, "learning_rate": 7.969083697752466e-05, "loss": 1.1168, "step": 5921 }, { "epoch": 1.2039032323643017, "grad_norm": 0.13830183446407318, "learning_rate": 7.96704973049934e-05, "loss": 0.8403, "step": 5922 }, { "epoch": 1.204106525716609, "grad_norm": 0.13156530261039734, "learning_rate": 7.965015763246212e-05, "loss": 0.952, "step": 5923 }, { "epoch": 1.2043098190689165, "grad_norm": 0.1500665247440338, "learning_rate": 7.962981795993085e-05, "loss": 1.0058, "step": 5924 }, { "epoch": 1.2045131124212238, "grad_norm": 0.14364342391490936, "learning_rate": 7.960947828739957e-05, "loss": 0.9573, "step": 5925 }, { "epoch": 1.2047164057735311, "grad_norm": 0.1456497609615326, "learning_rate": 7.958913861486831e-05, "loss": 0.9619, "step": 5926 }, { "epoch": 1.2049196991258386, "grad_norm": 0.13306300342082977, "learning_rate": 7.956879894233703e-05, "loss": 1.0108, "step": 5927 }, { "epoch": 1.205122992478146, "grad_norm": 0.16582590341567993, "learning_rate": 7.954845926980576e-05, "loss": 1.1483, "step": 5928 }, { "epoch": 1.2053262858304534, "grad_norm": 0.14681321382522583, "learning_rate": 7.952811959727448e-05, "loss": 0.9306, "step": 5929 }, { "epoch": 1.2055295791827607, "grad_norm": 0.1417584866285324, "learning_rate": 7.950777992474322e-05, "loss": 0.872, "step": 5930 }, { "epoch": 1.205732872535068, "grad_norm": 0.14118684828281403, "learning_rate": 7.948744025221194e-05, "loss": 0.9267, "step": 5931 }, { "epoch": 1.2059361658873755, "grad_norm": 0.13047218322753906, "learning_rate": 7.946710057968068e-05, "loss": 0.8934, "step": 5932 }, { "epoch": 1.2061394592396828, "grad_norm": 0.15511366724967957, "learning_rate": 7.944676090714939e-05, "loss": 1.1041, "step": 5933 }, { "epoch": 1.2063427525919903, "grad_norm": 0.14702750742435455, "learning_rate": 7.942642123461813e-05, "loss": 0.9171, "step": 5934 }, { "epoch": 1.2065460459442976, "grad_norm": 0.16108551621437073, "learning_rate": 7.940608156208686e-05, "loss": 0.9684, "step": 5935 }, { "epoch": 1.2067493392966049, "grad_norm": 0.15319964289665222, "learning_rate": 7.93857418895556e-05, "loss": 1.0696, "step": 5936 }, { "epoch": 1.2069526326489124, "grad_norm": 0.15028350055217743, "learning_rate": 7.93654022170243e-05, "loss": 0.9735, "step": 5937 }, { "epoch": 1.2071559260012197, "grad_norm": 0.15127158164978027, "learning_rate": 7.934506254449304e-05, "loss": 1.0975, "step": 5938 }, { "epoch": 1.2073592193535272, "grad_norm": 0.16552859544754028, "learning_rate": 7.932472287196177e-05, "loss": 1.0935, "step": 5939 }, { "epoch": 1.2075625127058345, "grad_norm": 0.14383484423160553, "learning_rate": 7.930438319943049e-05, "loss": 1.0105, "step": 5940 }, { "epoch": 1.207765806058142, "grad_norm": 0.15808679163455963, "learning_rate": 7.928404352689922e-05, "loss": 1.124, "step": 5941 }, { "epoch": 1.2079690994104493, "grad_norm": 0.1643362194299698, "learning_rate": 7.926370385436794e-05, "loss": 1.2667, "step": 5942 }, { "epoch": 1.2081723927627566, "grad_norm": 0.1681181788444519, "learning_rate": 7.924336418183668e-05, "loss": 1.132, "step": 5943 }, { "epoch": 1.208375686115064, "grad_norm": 0.17151467502117157, "learning_rate": 7.92230245093054e-05, "loss": 1.0677, "step": 5944 }, { "epoch": 1.2085789794673714, "grad_norm": 0.13617511093616486, "learning_rate": 7.920268483677413e-05, "loss": 0.8566, "step": 5945 }, { "epoch": 1.2087822728196789, "grad_norm": 0.1504543572664261, "learning_rate": 7.918234516424285e-05, "loss": 0.9746, "step": 5946 }, { "epoch": 1.2089855661719862, "grad_norm": 0.1469312161207199, "learning_rate": 7.916200549171159e-05, "loss": 0.9503, "step": 5947 }, { "epoch": 1.2091888595242937, "grad_norm": 0.1474432349205017, "learning_rate": 7.914166581918031e-05, "loss": 0.9862, "step": 5948 }, { "epoch": 1.209392152876601, "grad_norm": 0.1602153331041336, "learning_rate": 7.912132614664904e-05, "loss": 1.0523, "step": 5949 }, { "epoch": 1.2095954462289082, "grad_norm": 0.13873906433582306, "learning_rate": 7.910098647411776e-05, "loss": 0.8833, "step": 5950 }, { "epoch": 1.2097987395812158, "grad_norm": 0.18601791560649872, "learning_rate": 7.90806468015865e-05, "loss": 1.258, "step": 5951 }, { "epoch": 1.210002032933523, "grad_norm": 0.15273120999336243, "learning_rate": 7.906030712905523e-05, "loss": 1.0081, "step": 5952 }, { "epoch": 1.2102053262858306, "grad_norm": 0.16047626733779907, "learning_rate": 7.903996745652395e-05, "loss": 1.1502, "step": 5953 }, { "epoch": 1.2104086196381378, "grad_norm": 0.1480061113834381, "learning_rate": 7.901962778399267e-05, "loss": 1.0017, "step": 5954 }, { "epoch": 1.2106119129904451, "grad_norm": 0.16537447273731232, "learning_rate": 7.899928811146141e-05, "loss": 1.0887, "step": 5955 }, { "epoch": 1.2108152063427526, "grad_norm": 0.16818997263908386, "learning_rate": 7.897894843893014e-05, "loss": 1.0813, "step": 5956 }, { "epoch": 1.21101849969506, "grad_norm": 0.14905446767807007, "learning_rate": 7.895860876639886e-05, "loss": 1.1452, "step": 5957 }, { "epoch": 1.2112217930473674, "grad_norm": 0.16087594628334045, "learning_rate": 7.893826909386759e-05, "loss": 1.0707, "step": 5958 }, { "epoch": 1.2114250863996747, "grad_norm": 0.145945206284523, "learning_rate": 7.891792942133632e-05, "loss": 1.0248, "step": 5959 }, { "epoch": 1.211628379751982, "grad_norm": 0.151754230260849, "learning_rate": 7.889758974880505e-05, "loss": 1.0147, "step": 5960 }, { "epoch": 1.2118316731042895, "grad_norm": 0.139401376247406, "learning_rate": 7.887725007627377e-05, "loss": 0.9278, "step": 5961 }, { "epoch": 1.2120349664565968, "grad_norm": 0.1569097489118576, "learning_rate": 7.88569104037425e-05, "loss": 1.0747, "step": 5962 }, { "epoch": 1.2122382598089043, "grad_norm": 0.14463907480239868, "learning_rate": 7.883657073121124e-05, "loss": 0.9767, "step": 5963 }, { "epoch": 1.2124415531612116, "grad_norm": 0.1596505045890808, "learning_rate": 7.881623105867996e-05, "loss": 1.1514, "step": 5964 }, { "epoch": 1.212644846513519, "grad_norm": 0.16433337330818176, "learning_rate": 7.879589138614868e-05, "loss": 1.1582, "step": 5965 }, { "epoch": 1.2128481398658264, "grad_norm": 0.14704091846942902, "learning_rate": 7.877555171361741e-05, "loss": 0.9008, "step": 5966 }, { "epoch": 1.2130514332181337, "grad_norm": 0.16440390050411224, "learning_rate": 7.875521204108615e-05, "loss": 1.0331, "step": 5967 }, { "epoch": 1.2132547265704412, "grad_norm": 0.14984196424484253, "learning_rate": 7.873487236855487e-05, "loss": 1.0201, "step": 5968 }, { "epoch": 1.2134580199227485, "grad_norm": 0.13804790377616882, "learning_rate": 7.87145326960236e-05, "loss": 0.8756, "step": 5969 }, { "epoch": 1.213661313275056, "grad_norm": 0.15829437971115112, "learning_rate": 7.869419302349232e-05, "loss": 1.2565, "step": 5970 }, { "epoch": 1.2138646066273633, "grad_norm": 0.16005225479602814, "learning_rate": 7.867385335096106e-05, "loss": 1.0816, "step": 5971 }, { "epoch": 1.2140678999796706, "grad_norm": 0.14023050665855408, "learning_rate": 7.865351367842978e-05, "loss": 1.0056, "step": 5972 }, { "epoch": 1.214271193331978, "grad_norm": 0.1558290719985962, "learning_rate": 7.863317400589851e-05, "loss": 0.9293, "step": 5973 }, { "epoch": 1.2144744866842854, "grad_norm": 0.16956967115402222, "learning_rate": 7.861283433336723e-05, "loss": 1.0609, "step": 5974 }, { "epoch": 1.2146777800365929, "grad_norm": 0.13711321353912354, "learning_rate": 7.859249466083597e-05, "loss": 1.0161, "step": 5975 }, { "epoch": 1.2148810733889002, "grad_norm": 0.15096637606620789, "learning_rate": 7.85721549883047e-05, "loss": 0.9326, "step": 5976 }, { "epoch": 1.2150843667412077, "grad_norm": 0.15365126729011536, "learning_rate": 7.855181531577342e-05, "loss": 0.9785, "step": 5977 }, { "epoch": 1.215287660093515, "grad_norm": 0.1664636880159378, "learning_rate": 7.853147564324214e-05, "loss": 1.1646, "step": 5978 }, { "epoch": 1.2154909534458223, "grad_norm": 0.14389696717262268, "learning_rate": 7.851113597071088e-05, "loss": 1.0031, "step": 5979 }, { "epoch": 1.2156942467981298, "grad_norm": 0.1665438562631607, "learning_rate": 7.84907962981796e-05, "loss": 1.1069, "step": 5980 }, { "epoch": 1.215897540150437, "grad_norm": 0.15469186007976532, "learning_rate": 7.847045662564833e-05, "loss": 1.0227, "step": 5981 }, { "epoch": 1.2161008335027446, "grad_norm": 0.16661310195922852, "learning_rate": 7.845011695311705e-05, "loss": 1.1753, "step": 5982 }, { "epoch": 1.2163041268550518, "grad_norm": 0.13519755005836487, "learning_rate": 7.842977728058578e-05, "loss": 0.9268, "step": 5983 }, { "epoch": 1.2165074202073591, "grad_norm": 0.15839457511901855, "learning_rate": 7.840943760805452e-05, "loss": 1.1744, "step": 5984 }, { "epoch": 1.2167107135596666, "grad_norm": 0.15252776443958282, "learning_rate": 7.838909793552324e-05, "loss": 1.0487, "step": 5985 }, { "epoch": 1.216914006911974, "grad_norm": 0.15469424426555634, "learning_rate": 7.836875826299197e-05, "loss": 1.0426, "step": 5986 }, { "epoch": 1.2171173002642814, "grad_norm": 0.13677896559238434, "learning_rate": 7.834841859046069e-05, "loss": 0.8818, "step": 5987 }, { "epoch": 1.2173205936165887, "grad_norm": 0.13671749830245972, "learning_rate": 7.832807891792943e-05, "loss": 0.867, "step": 5988 }, { "epoch": 1.217523886968896, "grad_norm": 0.14619436860084534, "learning_rate": 7.830773924539815e-05, "loss": 0.9299, "step": 5989 }, { "epoch": 1.2177271803212035, "grad_norm": 0.15766948461532593, "learning_rate": 7.828739957286688e-05, "loss": 0.9758, "step": 5990 }, { "epoch": 1.2179304736735108, "grad_norm": 0.14414988458156586, "learning_rate": 7.82670599003356e-05, "loss": 1.0246, "step": 5991 }, { "epoch": 1.2181337670258183, "grad_norm": 0.1571853905916214, "learning_rate": 7.824672022780434e-05, "loss": 1.0623, "step": 5992 }, { "epoch": 1.2183370603781256, "grad_norm": 0.158509761095047, "learning_rate": 7.822638055527306e-05, "loss": 1.0383, "step": 5993 }, { "epoch": 1.218540353730433, "grad_norm": 0.16249847412109375, "learning_rate": 7.820604088274179e-05, "loss": 1.1868, "step": 5994 }, { "epoch": 1.2187436470827404, "grad_norm": 0.15063871443271637, "learning_rate": 7.818570121021051e-05, "loss": 1.0497, "step": 5995 }, { "epoch": 1.2189469404350477, "grad_norm": 0.15637479722499847, "learning_rate": 7.816536153767925e-05, "loss": 1.11, "step": 5996 }, { "epoch": 1.2191502337873552, "grad_norm": 0.12896780669689178, "learning_rate": 7.814502186514798e-05, "loss": 0.8355, "step": 5997 }, { "epoch": 1.2193535271396625, "grad_norm": 0.1477995067834854, "learning_rate": 7.81246821926167e-05, "loss": 1.0115, "step": 5998 }, { "epoch": 1.21955682049197, "grad_norm": 0.1346842646598816, "learning_rate": 7.810434252008542e-05, "loss": 0.8145, "step": 5999 }, { "epoch": 1.2197601138442773, "grad_norm": 0.16976101696491241, "learning_rate": 7.808400284755416e-05, "loss": 1.2802, "step": 6000 }, { "epoch": 1.2199634071965846, "grad_norm": 0.1319088190793991, "learning_rate": 7.806366317502289e-05, "loss": 0.8274, "step": 6001 }, { "epoch": 1.220166700548892, "grad_norm": 0.13133682310581207, "learning_rate": 7.804332350249161e-05, "loss": 0.8474, "step": 6002 }, { "epoch": 1.2203699939011994, "grad_norm": 0.1556311994791031, "learning_rate": 7.802298382996034e-05, "loss": 1.0508, "step": 6003 }, { "epoch": 1.2205732872535069, "grad_norm": 0.15803690254688263, "learning_rate": 7.800264415742907e-05, "loss": 1.1573, "step": 6004 }, { "epoch": 1.2207765806058142, "grad_norm": 0.1543922871351242, "learning_rate": 7.79823044848978e-05, "loss": 1.1319, "step": 6005 }, { "epoch": 1.2209798739581217, "grad_norm": 0.14866527915000916, "learning_rate": 7.796196481236652e-05, "loss": 0.949, "step": 6006 }, { "epoch": 1.221183167310429, "grad_norm": 0.14351116120815277, "learning_rate": 7.794162513983525e-05, "loss": 0.9472, "step": 6007 }, { "epoch": 1.2213864606627363, "grad_norm": 0.14699599146842957, "learning_rate": 7.792128546730399e-05, "loss": 0.9011, "step": 6008 }, { "epoch": 1.2215897540150438, "grad_norm": 0.17406338453292847, "learning_rate": 7.790094579477271e-05, "loss": 1.2005, "step": 6009 }, { "epoch": 1.221793047367351, "grad_norm": 0.15509524941444397, "learning_rate": 7.788060612224143e-05, "loss": 0.9375, "step": 6010 }, { "epoch": 1.2219963407196586, "grad_norm": 0.13967086374759674, "learning_rate": 7.786026644971016e-05, "loss": 0.9858, "step": 6011 }, { "epoch": 1.2221996340719659, "grad_norm": 0.1699836403131485, "learning_rate": 7.78399267771789e-05, "loss": 1.0934, "step": 6012 }, { "epoch": 1.2224029274242731, "grad_norm": 0.14084777235984802, "learning_rate": 7.781958710464762e-05, "loss": 0.8928, "step": 6013 }, { "epoch": 1.2226062207765807, "grad_norm": 0.1351868063211441, "learning_rate": 7.779924743211635e-05, "loss": 0.9211, "step": 6014 }, { "epoch": 1.222809514128888, "grad_norm": 0.1394532471895218, "learning_rate": 7.777890775958507e-05, "loss": 0.9834, "step": 6015 }, { "epoch": 1.2230128074811955, "grad_norm": 0.1532890498638153, "learning_rate": 7.775856808705381e-05, "loss": 1.07, "step": 6016 }, { "epoch": 1.2232161008335027, "grad_norm": 0.15940631926059723, "learning_rate": 7.773822841452253e-05, "loss": 0.9896, "step": 6017 }, { "epoch": 1.22341939418581, "grad_norm": 0.141846165060997, "learning_rate": 7.771788874199126e-05, "loss": 0.8771, "step": 6018 }, { "epoch": 1.2236226875381175, "grad_norm": 0.16429439187049866, "learning_rate": 7.769754906945998e-05, "loss": 1.1372, "step": 6019 }, { "epoch": 1.2238259808904248, "grad_norm": 0.1476045548915863, "learning_rate": 7.767720939692872e-05, "loss": 0.9792, "step": 6020 }, { "epoch": 1.2240292742427323, "grad_norm": 0.13393555581569672, "learning_rate": 7.765686972439744e-05, "loss": 0.9564, "step": 6021 }, { "epoch": 1.2242325675950396, "grad_norm": 0.15074948966503143, "learning_rate": 7.763653005186617e-05, "loss": 0.9082, "step": 6022 }, { "epoch": 1.224435860947347, "grad_norm": 0.14458337426185608, "learning_rate": 7.761619037933489e-05, "loss": 0.9419, "step": 6023 }, { "epoch": 1.2246391542996544, "grad_norm": 0.12875951826572418, "learning_rate": 7.759585070680362e-05, "loss": 0.8965, "step": 6024 }, { "epoch": 1.2248424476519617, "grad_norm": 0.16608628630638123, "learning_rate": 7.757551103427236e-05, "loss": 1.0796, "step": 6025 }, { "epoch": 1.2250457410042692, "grad_norm": 0.16221550107002258, "learning_rate": 7.755517136174108e-05, "loss": 1.0232, "step": 6026 }, { "epoch": 1.2252490343565765, "grad_norm": 0.1537492722272873, "learning_rate": 7.75348316892098e-05, "loss": 0.9851, "step": 6027 }, { "epoch": 1.225452327708884, "grad_norm": 0.15429674088954926, "learning_rate": 7.751449201667853e-05, "loss": 1.035, "step": 6028 }, { "epoch": 1.2256556210611913, "grad_norm": 0.15351472795009613, "learning_rate": 7.749415234414727e-05, "loss": 1.0984, "step": 6029 }, { "epoch": 1.2258589144134986, "grad_norm": 0.16499385237693787, "learning_rate": 7.747381267161599e-05, "loss": 1.1478, "step": 6030 }, { "epoch": 1.226062207765806, "grad_norm": 0.16109612584114075, "learning_rate": 7.745347299908472e-05, "loss": 1.1764, "step": 6031 }, { "epoch": 1.2262655011181134, "grad_norm": 0.14844362437725067, "learning_rate": 7.743313332655344e-05, "loss": 0.9723, "step": 6032 }, { "epoch": 1.226468794470421, "grad_norm": 0.142217755317688, "learning_rate": 7.741279365402218e-05, "loss": 0.8857, "step": 6033 }, { "epoch": 1.2266720878227282, "grad_norm": 0.14022211730480194, "learning_rate": 7.73924539814909e-05, "loss": 0.9567, "step": 6034 }, { "epoch": 1.2268753811750357, "grad_norm": 0.15379805862903595, "learning_rate": 7.737211430895963e-05, "loss": 1.0376, "step": 6035 }, { "epoch": 1.227078674527343, "grad_norm": 0.1771107167005539, "learning_rate": 7.735177463642835e-05, "loss": 1.1723, "step": 6036 }, { "epoch": 1.2272819678796503, "grad_norm": 0.14327488839626312, "learning_rate": 7.733143496389709e-05, "loss": 0.9283, "step": 6037 }, { "epoch": 1.2274852612319578, "grad_norm": 0.14578698575496674, "learning_rate": 7.731109529136581e-05, "loss": 0.9931, "step": 6038 }, { "epoch": 1.227688554584265, "grad_norm": 0.15075939893722534, "learning_rate": 7.729075561883454e-05, "loss": 1.0464, "step": 6039 }, { "epoch": 1.2278918479365726, "grad_norm": 0.16097469627857208, "learning_rate": 7.727041594630326e-05, "loss": 1.0914, "step": 6040 }, { "epoch": 1.2280951412888799, "grad_norm": 0.15317900478839874, "learning_rate": 7.7250076273772e-05, "loss": 0.9143, "step": 6041 }, { "epoch": 1.2282984346411872, "grad_norm": 0.17345957458019257, "learning_rate": 7.722973660124073e-05, "loss": 1.2072, "step": 6042 }, { "epoch": 1.2285017279934947, "grad_norm": 0.15269114077091217, "learning_rate": 7.720939692870945e-05, "loss": 1.1181, "step": 6043 }, { "epoch": 1.228705021345802, "grad_norm": 0.15105299651622772, "learning_rate": 7.718905725617817e-05, "loss": 1.0661, "step": 6044 }, { "epoch": 1.2289083146981095, "grad_norm": 0.17497089505195618, "learning_rate": 7.716871758364691e-05, "loss": 1.2526, "step": 6045 }, { "epoch": 1.2291116080504167, "grad_norm": 0.14528773725032806, "learning_rate": 7.714837791111564e-05, "loss": 0.9004, "step": 6046 }, { "epoch": 1.229314901402724, "grad_norm": 0.1562442183494568, "learning_rate": 7.712803823858436e-05, "loss": 1.0503, "step": 6047 }, { "epoch": 1.2295181947550315, "grad_norm": 0.15427474677562714, "learning_rate": 7.710769856605309e-05, "loss": 1.0865, "step": 6048 }, { "epoch": 1.2297214881073388, "grad_norm": 0.15478043258190155, "learning_rate": 7.708735889352182e-05, "loss": 1.0287, "step": 6049 }, { "epoch": 1.2299247814596463, "grad_norm": 0.16193340718746185, "learning_rate": 7.706701922099055e-05, "loss": 1.0488, "step": 6050 }, { "epoch": 1.2301280748119536, "grad_norm": 0.14678221940994263, "learning_rate": 7.704667954845927e-05, "loss": 1.0299, "step": 6051 }, { "epoch": 1.230331368164261, "grad_norm": 0.14202548563480377, "learning_rate": 7.7026339875928e-05, "loss": 0.8996, "step": 6052 }, { "epoch": 1.2305346615165684, "grad_norm": 0.15006931126117706, "learning_rate": 7.700600020339674e-05, "loss": 1.0583, "step": 6053 }, { "epoch": 1.2307379548688757, "grad_norm": 0.15628725290298462, "learning_rate": 7.698566053086546e-05, "loss": 1.0701, "step": 6054 }, { "epoch": 1.2309412482211832, "grad_norm": 0.13345082104206085, "learning_rate": 7.696532085833418e-05, "loss": 0.8766, "step": 6055 }, { "epoch": 1.2311445415734905, "grad_norm": 0.1641533076763153, "learning_rate": 7.694498118580291e-05, "loss": 1.0282, "step": 6056 }, { "epoch": 1.2313478349257978, "grad_norm": 0.16435573995113373, "learning_rate": 7.692464151327165e-05, "loss": 1.0209, "step": 6057 }, { "epoch": 1.2315511282781053, "grad_norm": 0.16523127257823944, "learning_rate": 7.690430184074037e-05, "loss": 1.1916, "step": 6058 }, { "epoch": 1.2317544216304126, "grad_norm": 0.15313848853111267, "learning_rate": 7.68839621682091e-05, "loss": 1.0295, "step": 6059 }, { "epoch": 1.23195771498272, "grad_norm": 0.15203092992305756, "learning_rate": 7.686362249567782e-05, "loss": 0.921, "step": 6060 }, { "epoch": 1.2321610083350274, "grad_norm": 0.1742718368768692, "learning_rate": 7.684328282314656e-05, "loss": 1.178, "step": 6061 }, { "epoch": 1.232364301687335, "grad_norm": 0.1357606202363968, "learning_rate": 7.682294315061528e-05, "loss": 0.8214, "step": 6062 }, { "epoch": 1.2325675950396422, "grad_norm": 0.1643107533454895, "learning_rate": 7.680260347808401e-05, "loss": 1.2033, "step": 6063 }, { "epoch": 1.2327708883919497, "grad_norm": 0.14524690806865692, "learning_rate": 7.678226380555273e-05, "loss": 0.9755, "step": 6064 }, { "epoch": 1.232974181744257, "grad_norm": 0.1577269285917282, "learning_rate": 7.676192413302146e-05, "loss": 1.0717, "step": 6065 }, { "epoch": 1.2331774750965643, "grad_norm": 0.1647750288248062, "learning_rate": 7.67415844604902e-05, "loss": 1.1883, "step": 6066 }, { "epoch": 1.2333807684488718, "grad_norm": 0.15586699545383453, "learning_rate": 7.672124478795892e-05, "loss": 1.1943, "step": 6067 }, { "epoch": 1.233584061801179, "grad_norm": 0.15125897526741028, "learning_rate": 7.670090511542764e-05, "loss": 0.9812, "step": 6068 }, { "epoch": 1.2337873551534866, "grad_norm": 0.15304671227931976, "learning_rate": 7.668056544289637e-05, "loss": 1.087, "step": 6069 }, { "epoch": 1.2339906485057939, "grad_norm": 0.16718313097953796, "learning_rate": 7.66602257703651e-05, "loss": 1.1206, "step": 6070 }, { "epoch": 1.2341939418581012, "grad_norm": 0.13378532230854034, "learning_rate": 7.663988609783383e-05, "loss": 0.8837, "step": 6071 }, { "epoch": 1.2343972352104087, "grad_norm": 0.15577539801597595, "learning_rate": 7.661954642530255e-05, "loss": 0.9845, "step": 6072 }, { "epoch": 1.234600528562716, "grad_norm": 0.15913674235343933, "learning_rate": 7.659920675277128e-05, "loss": 1.02, "step": 6073 }, { "epoch": 1.2348038219150235, "grad_norm": 0.159415602684021, "learning_rate": 7.657886708024002e-05, "loss": 1.1367, "step": 6074 }, { "epoch": 1.2350071152673308, "grad_norm": 0.16024009883403778, "learning_rate": 7.655852740770874e-05, "loss": 1.1639, "step": 6075 }, { "epoch": 1.235210408619638, "grad_norm": 0.14657573401927948, "learning_rate": 7.653818773517747e-05, "loss": 0.9429, "step": 6076 }, { "epoch": 1.2354137019719456, "grad_norm": 0.1571348011493683, "learning_rate": 7.651784806264619e-05, "loss": 1.1514, "step": 6077 }, { "epoch": 1.2356169953242528, "grad_norm": 0.14193733036518097, "learning_rate": 7.649750839011493e-05, "loss": 0.9404, "step": 6078 }, { "epoch": 1.2358202886765604, "grad_norm": 0.1605028212070465, "learning_rate": 7.647716871758365e-05, "loss": 1.0891, "step": 6079 }, { "epoch": 1.2360235820288676, "grad_norm": 0.14684657752513885, "learning_rate": 7.645682904505238e-05, "loss": 1.0208, "step": 6080 }, { "epoch": 1.236226875381175, "grad_norm": 0.14104479551315308, "learning_rate": 7.64364893725211e-05, "loss": 0.9516, "step": 6081 }, { "epoch": 1.2364301687334824, "grad_norm": 0.15257036685943604, "learning_rate": 7.641614969998984e-05, "loss": 1.1801, "step": 6082 }, { "epoch": 1.2366334620857897, "grad_norm": 0.14425641298294067, "learning_rate": 7.639581002745856e-05, "loss": 0.9674, "step": 6083 }, { "epoch": 1.2368367554380972, "grad_norm": 0.13775646686553955, "learning_rate": 7.637547035492729e-05, "loss": 0.9788, "step": 6084 }, { "epoch": 1.2370400487904045, "grad_norm": 0.17104454338550568, "learning_rate": 7.635513068239601e-05, "loss": 1.0818, "step": 6085 }, { "epoch": 1.2372433421427118, "grad_norm": 0.15287339687347412, "learning_rate": 7.633479100986475e-05, "loss": 1.0966, "step": 6086 }, { "epoch": 1.2374466354950193, "grad_norm": 0.17102481424808502, "learning_rate": 7.631445133733348e-05, "loss": 1.2057, "step": 6087 }, { "epoch": 1.2376499288473266, "grad_norm": 0.14067615568637848, "learning_rate": 7.62941116648022e-05, "loss": 0.8911, "step": 6088 }, { "epoch": 1.2378532221996341, "grad_norm": 0.13696187734603882, "learning_rate": 7.627377199227092e-05, "loss": 1.0254, "step": 6089 }, { "epoch": 1.2380565155519414, "grad_norm": 0.1611851453781128, "learning_rate": 7.625343231973966e-05, "loss": 1.1413, "step": 6090 }, { "epoch": 1.238259808904249, "grad_norm": 0.1464037150144577, "learning_rate": 7.623309264720839e-05, "loss": 0.9052, "step": 6091 }, { "epoch": 1.2384631022565562, "grad_norm": 0.16069039702415466, "learning_rate": 7.621275297467711e-05, "loss": 1.0247, "step": 6092 }, { "epoch": 1.2386663956088637, "grad_norm": 0.1654343605041504, "learning_rate": 7.619241330214584e-05, "loss": 0.9899, "step": 6093 }, { "epoch": 1.238869688961171, "grad_norm": 0.1397971659898758, "learning_rate": 7.617207362961457e-05, "loss": 1.0895, "step": 6094 }, { "epoch": 1.2390729823134783, "grad_norm": 0.14901459217071533, "learning_rate": 7.61517339570833e-05, "loss": 0.9465, "step": 6095 }, { "epoch": 1.2392762756657858, "grad_norm": 0.15366655588150024, "learning_rate": 7.613139428455202e-05, "loss": 1.1269, "step": 6096 }, { "epoch": 1.239479569018093, "grad_norm": 0.15471157431602478, "learning_rate": 7.611105461202075e-05, "loss": 0.9961, "step": 6097 }, { "epoch": 1.2396828623704006, "grad_norm": 0.1551191508769989, "learning_rate": 7.609071493948948e-05, "loss": 1.0627, "step": 6098 }, { "epoch": 1.2398861557227079, "grad_norm": 0.15324456989765167, "learning_rate": 7.607037526695821e-05, "loss": 0.9308, "step": 6099 }, { "epoch": 1.2400894490750152, "grad_norm": 0.14543670415878296, "learning_rate": 7.605003559442693e-05, "loss": 1.0393, "step": 6100 }, { "epoch": 1.2402927424273227, "grad_norm": 0.16721047461032867, "learning_rate": 7.602969592189566e-05, "loss": 1.0621, "step": 6101 }, { "epoch": 1.24049603577963, "grad_norm": 0.17093773186206818, "learning_rate": 7.60093562493644e-05, "loss": 1.0743, "step": 6102 }, { "epoch": 1.2406993291319375, "grad_norm": 0.17696061730384827, "learning_rate": 7.598901657683312e-05, "loss": 0.9628, "step": 6103 }, { "epoch": 1.2409026224842448, "grad_norm": 0.14208592474460602, "learning_rate": 7.596867690430185e-05, "loss": 0.9654, "step": 6104 }, { "epoch": 1.241105915836552, "grad_norm": 0.15835708379745483, "learning_rate": 7.594833723177057e-05, "loss": 1.1394, "step": 6105 }, { "epoch": 1.2413092091888596, "grad_norm": 0.1430591642856598, "learning_rate": 7.59279975592393e-05, "loss": 0.9112, "step": 6106 }, { "epoch": 1.2415125025411669, "grad_norm": 0.1534785032272339, "learning_rate": 7.590765788670803e-05, "loss": 0.9188, "step": 6107 }, { "epoch": 1.2417157958934744, "grad_norm": 0.15656499564647675, "learning_rate": 7.588731821417676e-05, "loss": 0.979, "step": 6108 }, { "epoch": 1.2419190892457816, "grad_norm": 0.1518164575099945, "learning_rate": 7.586697854164548e-05, "loss": 1.1044, "step": 6109 }, { "epoch": 1.242122382598089, "grad_norm": 0.14292199909687042, "learning_rate": 7.58466388691142e-05, "loss": 0.8677, "step": 6110 }, { "epoch": 1.2423256759503964, "grad_norm": 0.16828525066375732, "learning_rate": 7.582629919658294e-05, "loss": 1.1678, "step": 6111 }, { "epoch": 1.2425289693027037, "grad_norm": 0.1284974068403244, "learning_rate": 7.580595952405167e-05, "loss": 0.8286, "step": 6112 }, { "epoch": 1.2427322626550112, "grad_norm": 0.14029505848884583, "learning_rate": 7.578561985152039e-05, "loss": 0.9423, "step": 6113 }, { "epoch": 1.2429355560073185, "grad_norm": 0.1682073324918747, "learning_rate": 7.576528017898912e-05, "loss": 1.1954, "step": 6114 }, { "epoch": 1.2431388493596258, "grad_norm": 0.1521628051996231, "learning_rate": 7.574494050645786e-05, "loss": 1.0827, "step": 6115 }, { "epoch": 1.2433421427119333, "grad_norm": 0.15839159488677979, "learning_rate": 7.572460083392658e-05, "loss": 1.2435, "step": 6116 }, { "epoch": 1.2435454360642406, "grad_norm": 0.14326351881027222, "learning_rate": 7.57042611613953e-05, "loss": 0.9765, "step": 6117 }, { "epoch": 1.2437487294165481, "grad_norm": 0.15199799835681915, "learning_rate": 7.568392148886403e-05, "loss": 0.9618, "step": 6118 }, { "epoch": 1.2439520227688554, "grad_norm": 0.14783890545368195, "learning_rate": 7.566358181633277e-05, "loss": 1.0208, "step": 6119 }, { "epoch": 1.244155316121163, "grad_norm": 0.15681447088718414, "learning_rate": 7.564324214380149e-05, "loss": 1.0856, "step": 6120 }, { "epoch": 1.2443586094734702, "grad_norm": 0.1497943103313446, "learning_rate": 7.562290247127022e-05, "loss": 0.9668, "step": 6121 }, { "epoch": 1.2445619028257777, "grad_norm": 0.14835524559020996, "learning_rate": 7.560256279873894e-05, "loss": 0.9293, "step": 6122 }, { "epoch": 1.244765196178085, "grad_norm": 0.13807836174964905, "learning_rate": 7.558222312620768e-05, "loss": 0.95, "step": 6123 }, { "epoch": 1.2449684895303923, "grad_norm": 0.14951051771640778, "learning_rate": 7.55618834536764e-05, "loss": 0.9511, "step": 6124 }, { "epoch": 1.2451717828826998, "grad_norm": 0.16398605704307556, "learning_rate": 7.554154378114513e-05, "loss": 1.008, "step": 6125 }, { "epoch": 1.245375076235007, "grad_norm": 0.12974978983402252, "learning_rate": 7.552120410861385e-05, "loss": 0.8797, "step": 6126 }, { "epoch": 1.2455783695873146, "grad_norm": 0.15278978645801544, "learning_rate": 7.550086443608259e-05, "loss": 1.0084, "step": 6127 }, { "epoch": 1.245781662939622, "grad_norm": 0.14859604835510254, "learning_rate": 7.548052476355131e-05, "loss": 0.9819, "step": 6128 }, { "epoch": 1.2459849562919292, "grad_norm": 0.13911408185958862, "learning_rate": 7.546018509102004e-05, "loss": 0.9986, "step": 6129 }, { "epoch": 1.2461882496442367, "grad_norm": 0.1627921313047409, "learning_rate": 7.543984541848876e-05, "loss": 1.0073, "step": 6130 }, { "epoch": 1.246391542996544, "grad_norm": 0.14569565653800964, "learning_rate": 7.54195057459575e-05, "loss": 0.9272, "step": 6131 }, { "epoch": 1.2465948363488515, "grad_norm": 0.15063758194446564, "learning_rate": 7.539916607342623e-05, "loss": 1.0539, "step": 6132 }, { "epoch": 1.2467981297011588, "grad_norm": 0.15409833192825317, "learning_rate": 7.537882640089495e-05, "loss": 1.0715, "step": 6133 }, { "epoch": 1.247001423053466, "grad_norm": 0.15179461240768433, "learning_rate": 7.535848672836367e-05, "loss": 0.9781, "step": 6134 }, { "epoch": 1.2472047164057736, "grad_norm": 0.14283648133277893, "learning_rate": 7.533814705583241e-05, "loss": 0.9186, "step": 6135 }, { "epoch": 1.2474080097580809, "grad_norm": 0.16481667757034302, "learning_rate": 7.531780738330114e-05, "loss": 1.1317, "step": 6136 }, { "epoch": 1.2476113031103884, "grad_norm": 0.15260998904705048, "learning_rate": 7.529746771076986e-05, "loss": 1.029, "step": 6137 }, { "epoch": 1.2478145964626957, "grad_norm": 0.1735289990901947, "learning_rate": 7.527712803823859e-05, "loss": 1.1954, "step": 6138 }, { "epoch": 1.248017889815003, "grad_norm": 0.15304701030254364, "learning_rate": 7.525678836570732e-05, "loss": 0.9608, "step": 6139 }, { "epoch": 1.2482211831673105, "grad_norm": 0.14913487434387207, "learning_rate": 7.523644869317605e-05, "loss": 0.9169, "step": 6140 }, { "epoch": 1.2484244765196177, "grad_norm": 0.15657873451709747, "learning_rate": 7.521610902064477e-05, "loss": 1.0074, "step": 6141 }, { "epoch": 1.2486277698719253, "grad_norm": 0.1431896835565567, "learning_rate": 7.51957693481135e-05, "loss": 0.9779, "step": 6142 }, { "epoch": 1.2488310632242325, "grad_norm": 0.1579836755990982, "learning_rate": 7.517542967558223e-05, "loss": 1.0718, "step": 6143 }, { "epoch": 1.2490343565765398, "grad_norm": 0.1540554016828537, "learning_rate": 7.515509000305096e-05, "loss": 1.003, "step": 6144 }, { "epoch": 1.2492376499288473, "grad_norm": 0.1616765558719635, "learning_rate": 7.513475033051968e-05, "loss": 1.1288, "step": 6145 }, { "epoch": 1.2494409432811546, "grad_norm": 0.12108495086431503, "learning_rate": 7.511441065798841e-05, "loss": 0.8396, "step": 6146 }, { "epoch": 1.2496442366334621, "grad_norm": 0.1616901010274887, "learning_rate": 7.509407098545713e-05, "loss": 1.0475, "step": 6147 }, { "epoch": 1.2498475299857694, "grad_norm": 0.15256421267986298, "learning_rate": 7.507373131292587e-05, "loss": 0.988, "step": 6148 }, { "epoch": 1.2500508233380767, "grad_norm": 0.161456897854805, "learning_rate": 7.505339164039458e-05, "loss": 1.0345, "step": 6149 }, { "epoch": 1.2502541166903842, "grad_norm": 0.15116725862026215, "learning_rate": 7.503305196786332e-05, "loss": 0.9795, "step": 6150 }, { "epoch": 1.2504574100426917, "grad_norm": 0.15002092719078064, "learning_rate": 7.501271229533204e-05, "loss": 0.9661, "step": 6151 }, { "epoch": 1.250660703394999, "grad_norm": 0.14170213043689728, "learning_rate": 7.499237262280078e-05, "loss": 1.055, "step": 6152 }, { "epoch": 1.2508639967473063, "grad_norm": 0.15835924446582794, "learning_rate": 7.497203295026949e-05, "loss": 1.0649, "step": 6153 }, { "epoch": 1.2510672900996138, "grad_norm": 0.14524368941783905, "learning_rate": 7.495169327773823e-05, "loss": 0.9768, "step": 6154 }, { "epoch": 1.251270583451921, "grad_norm": 0.13836443424224854, "learning_rate": 7.493135360520696e-05, "loss": 0.9321, "step": 6155 }, { "epoch": 1.2514738768042286, "grad_norm": 0.14955168962478638, "learning_rate": 7.49110139326757e-05, "loss": 1.1414, "step": 6156 }, { "epoch": 1.251677170156536, "grad_norm": 0.14756670594215393, "learning_rate": 7.489067426014442e-05, "loss": 1.0315, "step": 6157 }, { "epoch": 1.2518804635088432, "grad_norm": 0.1563825011253357, "learning_rate": 7.487033458761314e-05, "loss": 1.1096, "step": 6158 }, { "epoch": 1.2520837568611507, "grad_norm": 0.1456524133682251, "learning_rate": 7.484999491508187e-05, "loss": 1.0566, "step": 6159 }, { "epoch": 1.252287050213458, "grad_norm": 0.1605212390422821, "learning_rate": 7.48296552425506e-05, "loss": 1.1749, "step": 6160 }, { "epoch": 1.2524903435657655, "grad_norm": 0.15637800097465515, "learning_rate": 7.480931557001933e-05, "loss": 1.0765, "step": 6161 }, { "epoch": 1.2526936369180728, "grad_norm": 0.1442786306142807, "learning_rate": 7.478897589748805e-05, "loss": 0.9542, "step": 6162 }, { "epoch": 1.25289693027038, "grad_norm": 0.12865842878818512, "learning_rate": 7.476863622495678e-05, "loss": 0.8635, "step": 6163 }, { "epoch": 1.2531002236226876, "grad_norm": 0.14644062519073486, "learning_rate": 7.474829655242552e-05, "loss": 1.0271, "step": 6164 }, { "epoch": 1.2533035169749949, "grad_norm": 0.14869025349617004, "learning_rate": 7.472795687989424e-05, "loss": 1.0252, "step": 6165 }, { "epoch": 1.2535068103273024, "grad_norm": 0.1454823762178421, "learning_rate": 7.470761720736297e-05, "loss": 1.0052, "step": 6166 }, { "epoch": 1.2537101036796097, "grad_norm": 0.15685810148715973, "learning_rate": 7.468727753483169e-05, "loss": 1.0575, "step": 6167 }, { "epoch": 1.253913397031917, "grad_norm": 0.16611304879188538, "learning_rate": 7.466693786230043e-05, "loss": 1.242, "step": 6168 }, { "epoch": 1.2541166903842245, "grad_norm": 0.15010212361812592, "learning_rate": 7.464659818976915e-05, "loss": 1.0678, "step": 6169 }, { "epoch": 1.2543199837365318, "grad_norm": 0.14836570620536804, "learning_rate": 7.462625851723788e-05, "loss": 1.0183, "step": 6170 }, { "epoch": 1.2545232770888393, "grad_norm": 0.14831973612308502, "learning_rate": 7.46059188447066e-05, "loss": 0.9672, "step": 6171 }, { "epoch": 1.2547265704411465, "grad_norm": 0.15523511171340942, "learning_rate": 7.458557917217534e-05, "loss": 1.0253, "step": 6172 }, { "epoch": 1.2549298637934538, "grad_norm": 0.1539629101753235, "learning_rate": 7.456523949964406e-05, "loss": 0.9151, "step": 6173 }, { "epoch": 1.2551331571457613, "grad_norm": 0.17283432185649872, "learning_rate": 7.454489982711279e-05, "loss": 1.0992, "step": 6174 }, { "epoch": 1.2553364504980686, "grad_norm": 0.13191667199134827, "learning_rate": 7.452456015458151e-05, "loss": 0.8772, "step": 6175 }, { "epoch": 1.2555397438503761, "grad_norm": 0.16160672903060913, "learning_rate": 7.450422048205025e-05, "loss": 1.0877, "step": 6176 }, { "epoch": 1.2557430372026834, "grad_norm": 0.13833656907081604, "learning_rate": 7.448388080951897e-05, "loss": 0.9369, "step": 6177 }, { "epoch": 1.2559463305549907, "grad_norm": 0.1532142162322998, "learning_rate": 7.44635411369877e-05, "loss": 0.977, "step": 6178 }, { "epoch": 1.2561496239072982, "grad_norm": 0.14689487218856812, "learning_rate": 7.444320146445642e-05, "loss": 0.9764, "step": 6179 }, { "epoch": 1.2563529172596057, "grad_norm": 0.1418536752462387, "learning_rate": 7.442286179192516e-05, "loss": 1.0234, "step": 6180 }, { "epoch": 1.256556210611913, "grad_norm": 0.14861689507961273, "learning_rate": 7.440252211939389e-05, "loss": 1.07, "step": 6181 }, { "epoch": 1.2567595039642203, "grad_norm": 0.1540762186050415, "learning_rate": 7.438218244686261e-05, "loss": 1.0956, "step": 6182 }, { "epoch": 1.2569627973165278, "grad_norm": 0.16416653990745544, "learning_rate": 7.436184277433134e-05, "loss": 1.1411, "step": 6183 }, { "epoch": 1.2571660906688351, "grad_norm": 0.14933699369430542, "learning_rate": 7.434150310180007e-05, "loss": 1.008, "step": 6184 }, { "epoch": 1.2573693840211426, "grad_norm": 0.14696922898292542, "learning_rate": 7.43211634292688e-05, "loss": 0.9848, "step": 6185 }, { "epoch": 1.25757267737345, "grad_norm": 0.142256960272789, "learning_rate": 7.430082375673752e-05, "loss": 0.9146, "step": 6186 }, { "epoch": 1.2577759707257572, "grad_norm": 0.1598552167415619, "learning_rate": 7.428048408420625e-05, "loss": 1.1008, "step": 6187 }, { "epoch": 1.2579792640780647, "grad_norm": 0.1336602419614792, "learning_rate": 7.426014441167497e-05, "loss": 0.8115, "step": 6188 }, { "epoch": 1.258182557430372, "grad_norm": 0.1413356214761734, "learning_rate": 7.423980473914371e-05, "loss": 0.9462, "step": 6189 }, { "epoch": 1.2583858507826795, "grad_norm": 0.15689724683761597, "learning_rate": 7.421946506661242e-05, "loss": 1.0426, "step": 6190 }, { "epoch": 1.2585891441349868, "grad_norm": 0.14870372414588928, "learning_rate": 7.419912539408116e-05, "loss": 1.0125, "step": 6191 }, { "epoch": 1.258792437487294, "grad_norm": 0.1621605008840561, "learning_rate": 7.417878572154988e-05, "loss": 1.098, "step": 6192 }, { "epoch": 1.2589957308396016, "grad_norm": 0.1651010513305664, "learning_rate": 7.415844604901862e-05, "loss": 1.2495, "step": 6193 }, { "epoch": 1.2591990241919089, "grad_norm": 0.16045209765434265, "learning_rate": 7.413810637648733e-05, "loss": 1.0258, "step": 6194 }, { "epoch": 1.2594023175442164, "grad_norm": 0.1479748636484146, "learning_rate": 7.411776670395607e-05, "loss": 0.9255, "step": 6195 }, { "epoch": 1.2596056108965237, "grad_norm": 0.14603246748447418, "learning_rate": 7.40974270314248e-05, "loss": 0.9924, "step": 6196 }, { "epoch": 1.259808904248831, "grad_norm": 0.14301127195358276, "learning_rate": 7.407708735889353e-05, "loss": 0.9358, "step": 6197 }, { "epoch": 1.2600121976011385, "grad_norm": 0.14495980739593506, "learning_rate": 7.405674768636224e-05, "loss": 1.071, "step": 6198 }, { "epoch": 1.2602154909534458, "grad_norm": 0.14776700735092163, "learning_rate": 7.403640801383098e-05, "loss": 1.1025, "step": 6199 }, { "epoch": 1.2604187843057533, "grad_norm": 0.14632262289524078, "learning_rate": 7.40160683412997e-05, "loss": 1.027, "step": 6200 }, { "epoch": 1.2606220776580606, "grad_norm": 0.16701005399227142, "learning_rate": 7.399572866876844e-05, "loss": 1.2313, "step": 6201 }, { "epoch": 1.2608253710103678, "grad_norm": 0.15396632254123688, "learning_rate": 7.397538899623715e-05, "loss": 1.1548, "step": 6202 }, { "epoch": 1.2610286643626754, "grad_norm": 0.15287038683891296, "learning_rate": 7.395504932370589e-05, "loss": 1.0077, "step": 6203 }, { "epoch": 1.2612319577149826, "grad_norm": 0.12379388511180878, "learning_rate": 7.393470965117462e-05, "loss": 0.7992, "step": 6204 }, { "epoch": 1.2614352510672902, "grad_norm": 0.1505446881055832, "learning_rate": 7.391436997864335e-05, "loss": 0.9166, "step": 6205 }, { "epoch": 1.2616385444195974, "grad_norm": 0.14362283051013947, "learning_rate": 7.389403030611207e-05, "loss": 0.9532, "step": 6206 }, { "epoch": 1.2618418377719047, "grad_norm": 0.14815200865268707, "learning_rate": 7.38736906335808e-05, "loss": 1.0684, "step": 6207 }, { "epoch": 1.2620451311242122, "grad_norm": 0.15640223026275635, "learning_rate": 7.385335096104953e-05, "loss": 1.2277, "step": 6208 }, { "epoch": 1.2622484244765197, "grad_norm": 0.1339944303035736, "learning_rate": 7.383301128851827e-05, "loss": 0.9633, "step": 6209 }, { "epoch": 1.262451717828827, "grad_norm": 0.15692713856697083, "learning_rate": 7.381267161598698e-05, "loss": 0.9958, "step": 6210 }, { "epoch": 1.2626550111811343, "grad_norm": 0.14405904710292816, "learning_rate": 7.379233194345571e-05, "loss": 1.0001, "step": 6211 }, { "epoch": 1.2628583045334418, "grad_norm": 0.1682267040014267, "learning_rate": 7.377199227092444e-05, "loss": 1.1671, "step": 6212 }, { "epoch": 1.2630615978857491, "grad_norm": 0.1584296077489853, "learning_rate": 7.375165259839318e-05, "loss": 0.9898, "step": 6213 }, { "epoch": 1.2632648912380566, "grad_norm": 0.14520923793315887, "learning_rate": 7.373131292586189e-05, "loss": 1.0861, "step": 6214 }, { "epoch": 1.263468184590364, "grad_norm": 0.1591317057609558, "learning_rate": 7.371097325333063e-05, "loss": 1.0768, "step": 6215 }, { "epoch": 1.2636714779426712, "grad_norm": 0.12823057174682617, "learning_rate": 7.369063358079935e-05, "loss": 0.8455, "step": 6216 }, { "epoch": 1.2638747712949787, "grad_norm": 0.15325266122817993, "learning_rate": 7.367029390826809e-05, "loss": 0.938, "step": 6217 }, { "epoch": 1.264078064647286, "grad_norm": 0.1660403162240982, "learning_rate": 7.364995423573681e-05, "loss": 1.1529, "step": 6218 }, { "epoch": 1.2642813579995935, "grad_norm": 0.13842836022377014, "learning_rate": 7.362961456320554e-05, "loss": 0.9513, "step": 6219 }, { "epoch": 1.2644846513519008, "grad_norm": 0.13178154826164246, "learning_rate": 7.360927489067426e-05, "loss": 0.8728, "step": 6220 }, { "epoch": 1.264687944704208, "grad_norm": 0.1772044599056244, "learning_rate": 7.3588935218143e-05, "loss": 1.1937, "step": 6221 }, { "epoch": 1.2648912380565156, "grad_norm": 0.1507595181465149, "learning_rate": 7.356859554561172e-05, "loss": 0.9797, "step": 6222 }, { "epoch": 1.2650945314088229, "grad_norm": 0.16947486996650696, "learning_rate": 7.354825587308045e-05, "loss": 1.0339, "step": 6223 }, { "epoch": 1.2652978247611304, "grad_norm": 0.15474039316177368, "learning_rate": 7.352791620054917e-05, "loss": 0.9519, "step": 6224 }, { "epoch": 1.2655011181134377, "grad_norm": 0.14558108150959015, "learning_rate": 7.350757652801791e-05, "loss": 0.9138, "step": 6225 }, { "epoch": 1.265704411465745, "grad_norm": 0.14644969999790192, "learning_rate": 7.348723685548664e-05, "loss": 1.0068, "step": 6226 }, { "epoch": 1.2659077048180525, "grad_norm": 0.17006921768188477, "learning_rate": 7.346689718295536e-05, "loss": 1.0676, "step": 6227 }, { "epoch": 1.2661109981703598, "grad_norm": 0.16292281448841095, "learning_rate": 7.344655751042409e-05, "loss": 1.0636, "step": 6228 }, { "epoch": 1.2663142915226673, "grad_norm": 0.15299753844738007, "learning_rate": 7.342621783789282e-05, "loss": 1.1362, "step": 6229 }, { "epoch": 1.2665175848749746, "grad_norm": 0.14997923374176025, "learning_rate": 7.340587816536155e-05, "loss": 0.9706, "step": 6230 }, { "epoch": 1.2667208782272819, "grad_norm": 0.14713294804096222, "learning_rate": 7.338553849283026e-05, "loss": 0.9487, "step": 6231 }, { "epoch": 1.2669241715795894, "grad_norm": 0.1373048573732376, "learning_rate": 7.3365198820299e-05, "loss": 0.8719, "step": 6232 }, { "epoch": 1.2671274649318967, "grad_norm": 0.14754988253116608, "learning_rate": 7.334485914776772e-05, "loss": 1.0212, "step": 6233 }, { "epoch": 1.2673307582842042, "grad_norm": 0.15924742817878723, "learning_rate": 7.332451947523646e-05, "loss": 1.0609, "step": 6234 }, { "epoch": 1.2675340516365114, "grad_norm": 0.1677163541316986, "learning_rate": 7.330417980270517e-05, "loss": 1.1928, "step": 6235 }, { "epoch": 1.2677373449888187, "grad_norm": 0.1397400200366974, "learning_rate": 7.328384013017391e-05, "loss": 0.8587, "step": 6236 }, { "epoch": 1.2679406383411262, "grad_norm": 0.14857067167758942, "learning_rate": 7.326350045764263e-05, "loss": 0.9723, "step": 6237 }, { "epoch": 1.2681439316934338, "grad_norm": 0.15431994199752808, "learning_rate": 7.324316078511137e-05, "loss": 1.2133, "step": 6238 }, { "epoch": 1.268347225045741, "grad_norm": 0.14655791223049164, "learning_rate": 7.322282111258008e-05, "loss": 0.9676, "step": 6239 }, { "epoch": 1.2685505183980483, "grad_norm": 0.16156570613384247, "learning_rate": 7.320248144004882e-05, "loss": 1.1429, "step": 6240 }, { "epoch": 1.2687538117503558, "grad_norm": 0.1661580204963684, "learning_rate": 7.318214176751754e-05, "loss": 1.1261, "step": 6241 }, { "epoch": 1.2689571051026631, "grad_norm": 0.15335386991500854, "learning_rate": 7.316180209498628e-05, "loss": 0.9301, "step": 6242 }, { "epoch": 1.2691603984549706, "grad_norm": 0.17967920005321503, "learning_rate": 7.314146242245499e-05, "loss": 1.1584, "step": 6243 }, { "epoch": 1.269363691807278, "grad_norm": 0.15790143609046936, "learning_rate": 7.312112274992373e-05, "loss": 1.0314, "step": 6244 }, { "epoch": 1.2695669851595852, "grad_norm": 0.17167986929416656, "learning_rate": 7.310078307739246e-05, "loss": 1.0303, "step": 6245 }, { "epoch": 1.2697702785118927, "grad_norm": 0.13613730669021606, "learning_rate": 7.308044340486119e-05, "loss": 0.9468, "step": 6246 }, { "epoch": 1.2699735718642, "grad_norm": 0.15060873329639435, "learning_rate": 7.30601037323299e-05, "loss": 1.0411, "step": 6247 }, { "epoch": 1.2701768652165075, "grad_norm": 0.14653441309928894, "learning_rate": 7.303976405979864e-05, "loss": 0.9397, "step": 6248 }, { "epoch": 1.2703801585688148, "grad_norm": 0.17043618857860565, "learning_rate": 7.301942438726737e-05, "loss": 1.04, "step": 6249 }, { "epoch": 1.270583451921122, "grad_norm": 0.1663060486316681, "learning_rate": 7.29990847147361e-05, "loss": 1.1406, "step": 6250 }, { "epoch": 1.2707867452734296, "grad_norm": 0.1324852705001831, "learning_rate": 7.297874504220482e-05, "loss": 0.8765, "step": 6251 }, { "epoch": 1.270990038625737, "grad_norm": 0.14403606951236725, "learning_rate": 7.295840536967355e-05, "loss": 0.8504, "step": 6252 }, { "epoch": 1.2711933319780444, "grad_norm": 0.1579236090183258, "learning_rate": 7.293806569714228e-05, "loss": 0.9858, "step": 6253 }, { "epoch": 1.2713966253303517, "grad_norm": 0.14210145175457, "learning_rate": 7.291772602461102e-05, "loss": 0.9863, "step": 6254 }, { "epoch": 1.271599918682659, "grad_norm": 0.1411530077457428, "learning_rate": 7.289738635207973e-05, "loss": 0.9379, "step": 6255 }, { "epoch": 1.2718032120349665, "grad_norm": 0.13940487802028656, "learning_rate": 7.287704667954846e-05, "loss": 0.9373, "step": 6256 }, { "epoch": 1.2720065053872738, "grad_norm": 0.16028715670108795, "learning_rate": 7.285670700701719e-05, "loss": 1.1488, "step": 6257 }, { "epoch": 1.2722097987395813, "grad_norm": 0.15219241380691528, "learning_rate": 7.283636733448593e-05, "loss": 0.9849, "step": 6258 }, { "epoch": 1.2724130920918886, "grad_norm": 0.14046461880207062, "learning_rate": 7.281602766195464e-05, "loss": 0.9298, "step": 6259 }, { "epoch": 1.2726163854441959, "grad_norm": 0.14675304293632507, "learning_rate": 7.279568798942338e-05, "loss": 1.0084, "step": 6260 }, { "epoch": 1.2728196787965034, "grad_norm": 0.13606025278568268, "learning_rate": 7.27753483168921e-05, "loss": 0.7989, "step": 6261 }, { "epoch": 1.2730229721488107, "grad_norm": 0.156595841050148, "learning_rate": 7.275500864436084e-05, "loss": 0.9788, "step": 6262 }, { "epoch": 1.2732262655011182, "grad_norm": 0.15707367658615112, "learning_rate": 7.273466897182955e-05, "loss": 1.047, "step": 6263 }, { "epoch": 1.2734295588534255, "grad_norm": 0.15577323734760284, "learning_rate": 7.271432929929829e-05, "loss": 1.0125, "step": 6264 }, { "epoch": 1.2736328522057327, "grad_norm": 0.16746199131011963, "learning_rate": 7.269398962676701e-05, "loss": 1.0778, "step": 6265 }, { "epoch": 1.2738361455580403, "grad_norm": 0.1447450965642929, "learning_rate": 7.267364995423575e-05, "loss": 0.9396, "step": 6266 }, { "epoch": 1.2740394389103478, "grad_norm": 0.13501743972301483, "learning_rate": 7.265331028170446e-05, "loss": 0.8996, "step": 6267 }, { "epoch": 1.274242732262655, "grad_norm": 0.15393932163715363, "learning_rate": 7.26329706091732e-05, "loss": 1.0001, "step": 6268 }, { "epoch": 1.2744460256149623, "grad_norm": 0.1617959439754486, "learning_rate": 7.261263093664192e-05, "loss": 1.0903, "step": 6269 }, { "epoch": 1.2746493189672699, "grad_norm": 0.15818221867084503, "learning_rate": 7.259229126411066e-05, "loss": 1.1434, "step": 6270 }, { "epoch": 1.2748526123195771, "grad_norm": 0.15312804281711578, "learning_rate": 7.257195159157937e-05, "loss": 0.9829, "step": 6271 }, { "epoch": 1.2750559056718846, "grad_norm": 0.1665237545967102, "learning_rate": 7.25516119190481e-05, "loss": 1.1018, "step": 6272 }, { "epoch": 1.275259199024192, "grad_norm": 0.14515459537506104, "learning_rate": 7.253127224651683e-05, "loss": 0.9767, "step": 6273 }, { "epoch": 1.2754624923764992, "grad_norm": 0.14783386886119843, "learning_rate": 7.251093257398556e-05, "loss": 0.9554, "step": 6274 }, { "epoch": 1.2756657857288067, "grad_norm": 0.15919606387615204, "learning_rate": 7.24905929014543e-05, "loss": 1.1356, "step": 6275 }, { "epoch": 1.275869079081114, "grad_norm": 0.1607399433851242, "learning_rate": 7.247025322892301e-05, "loss": 1.0811, "step": 6276 }, { "epoch": 1.2760723724334215, "grad_norm": 0.14537842571735382, "learning_rate": 7.244991355639175e-05, "loss": 0.9406, "step": 6277 }, { "epoch": 1.2762756657857288, "grad_norm": 0.16683556139469147, "learning_rate": 7.242957388386047e-05, "loss": 1.1229, "step": 6278 }, { "epoch": 1.276478959138036, "grad_norm": 0.15231481194496155, "learning_rate": 7.240923421132921e-05, "loss": 1.0703, "step": 6279 }, { "epoch": 1.2766822524903436, "grad_norm": 0.1400855928659439, "learning_rate": 7.238889453879792e-05, "loss": 0.9923, "step": 6280 }, { "epoch": 1.276885545842651, "grad_norm": 0.14501504600048065, "learning_rate": 7.236855486626666e-05, "loss": 0.8628, "step": 6281 }, { "epoch": 1.2770888391949584, "grad_norm": 0.14576661586761475, "learning_rate": 7.234821519373538e-05, "loss": 1.0154, "step": 6282 }, { "epoch": 1.2772921325472657, "grad_norm": 0.1263686865568161, "learning_rate": 7.232787552120412e-05, "loss": 0.8553, "step": 6283 }, { "epoch": 1.277495425899573, "grad_norm": 0.13566042482852936, "learning_rate": 7.230753584867283e-05, "loss": 0.8437, "step": 6284 }, { "epoch": 1.2776987192518805, "grad_norm": 0.14391617476940155, "learning_rate": 7.228719617614157e-05, "loss": 0.9109, "step": 6285 }, { "epoch": 1.2779020126041878, "grad_norm": 0.1513233482837677, "learning_rate": 7.22668565036103e-05, "loss": 1.0576, "step": 6286 }, { "epoch": 1.2781053059564953, "grad_norm": 0.16668738424777985, "learning_rate": 7.224651683107903e-05, "loss": 1.1339, "step": 6287 }, { "epoch": 1.2783085993088026, "grad_norm": 0.1570490002632141, "learning_rate": 7.222617715854774e-05, "loss": 1.0722, "step": 6288 }, { "epoch": 1.2785118926611099, "grad_norm": 0.14549487829208374, "learning_rate": 7.220583748601648e-05, "loss": 1.0256, "step": 6289 }, { "epoch": 1.2787151860134174, "grad_norm": 0.15154211223125458, "learning_rate": 7.21854978134852e-05, "loss": 0.9687, "step": 6290 }, { "epoch": 1.2789184793657247, "grad_norm": 0.14945222437381744, "learning_rate": 7.216515814095394e-05, "loss": 1.0218, "step": 6291 }, { "epoch": 1.2791217727180322, "grad_norm": 0.14871571958065033, "learning_rate": 7.214481846842265e-05, "loss": 1.057, "step": 6292 }, { "epoch": 1.2793250660703395, "grad_norm": 0.1632535755634308, "learning_rate": 7.212447879589139e-05, "loss": 1.1433, "step": 6293 }, { "epoch": 1.2795283594226468, "grad_norm": 0.1654619425535202, "learning_rate": 7.210413912336012e-05, "loss": 1.0672, "step": 6294 }, { "epoch": 1.2797316527749543, "grad_norm": 0.14849208295345306, "learning_rate": 7.208379945082885e-05, "loss": 0.9965, "step": 6295 }, { "epoch": 1.2799349461272618, "grad_norm": 0.141541987657547, "learning_rate": 7.206345977829757e-05, "loss": 0.9438, "step": 6296 }, { "epoch": 1.280138239479569, "grad_norm": 0.13782508671283722, "learning_rate": 7.20431201057663e-05, "loss": 0.8736, "step": 6297 }, { "epoch": 1.2803415328318763, "grad_norm": 0.14343823492527008, "learning_rate": 7.202278043323503e-05, "loss": 0.9404, "step": 6298 }, { "epoch": 1.2805448261841839, "grad_norm": 0.16715972125530243, "learning_rate": 7.200244076070377e-05, "loss": 0.998, "step": 6299 }, { "epoch": 1.2807481195364911, "grad_norm": 0.14622175693511963, "learning_rate": 7.198210108817248e-05, "loss": 0.9514, "step": 6300 }, { "epoch": 1.2809514128887987, "grad_norm": 0.13806544244289398, "learning_rate": 7.196176141564121e-05, "loss": 0.8665, "step": 6301 }, { "epoch": 1.281154706241106, "grad_norm": 0.1409773975610733, "learning_rate": 7.194142174310994e-05, "loss": 0.8885, "step": 6302 }, { "epoch": 1.2813579995934132, "grad_norm": 0.1428183764219284, "learning_rate": 7.192108207057868e-05, "loss": 1.0111, "step": 6303 }, { "epoch": 1.2815612929457207, "grad_norm": 0.1585017442703247, "learning_rate": 7.190074239804739e-05, "loss": 1.1361, "step": 6304 }, { "epoch": 1.281764586298028, "grad_norm": 0.15969137847423553, "learning_rate": 7.188040272551613e-05, "loss": 1.104, "step": 6305 }, { "epoch": 1.2819678796503355, "grad_norm": 0.13520383834838867, "learning_rate": 7.186006305298485e-05, "loss": 0.8027, "step": 6306 }, { "epoch": 1.2821711730026428, "grad_norm": 0.17720559239387512, "learning_rate": 7.183972338045359e-05, "loss": 1.1919, "step": 6307 }, { "epoch": 1.2823744663549501, "grad_norm": 0.15073487162590027, "learning_rate": 7.18193837079223e-05, "loss": 0.9968, "step": 6308 }, { "epoch": 1.2825777597072576, "grad_norm": 0.15003164112567902, "learning_rate": 7.179904403539104e-05, "loss": 0.9668, "step": 6309 }, { "epoch": 1.282781053059565, "grad_norm": 0.17337696254253387, "learning_rate": 7.177870436285976e-05, "loss": 1.2393, "step": 6310 }, { "epoch": 1.2829843464118724, "grad_norm": 0.14511317014694214, "learning_rate": 7.17583646903285e-05, "loss": 1.0078, "step": 6311 }, { "epoch": 1.2831876397641797, "grad_norm": 0.15251484513282776, "learning_rate": 7.173802501779721e-05, "loss": 1.0664, "step": 6312 }, { "epoch": 1.283390933116487, "grad_norm": 0.14497815072536469, "learning_rate": 7.171768534526594e-05, "loss": 0.9364, "step": 6313 }, { "epoch": 1.2835942264687945, "grad_norm": 0.15960972011089325, "learning_rate": 7.169734567273467e-05, "loss": 1.1536, "step": 6314 }, { "epoch": 1.2837975198211018, "grad_norm": 0.1687343269586563, "learning_rate": 7.16770060002034e-05, "loss": 1.168, "step": 6315 }, { "epoch": 1.2840008131734093, "grad_norm": 0.14428341388702393, "learning_rate": 7.165666632767212e-05, "loss": 0.8795, "step": 6316 }, { "epoch": 1.2842041065257166, "grad_norm": 0.15656180679798126, "learning_rate": 7.163632665514085e-05, "loss": 0.9864, "step": 6317 }, { "epoch": 1.2844073998780239, "grad_norm": 0.14219792187213898, "learning_rate": 7.161598698260958e-05, "loss": 1.0347, "step": 6318 }, { "epoch": 1.2846106932303314, "grad_norm": 0.1517137736082077, "learning_rate": 7.159564731007831e-05, "loss": 1.1361, "step": 6319 }, { "epoch": 1.2848139865826387, "grad_norm": 0.16079369187355042, "learning_rate": 7.157530763754703e-05, "loss": 1.0762, "step": 6320 }, { "epoch": 1.2850172799349462, "grad_norm": 0.1493234634399414, "learning_rate": 7.155496796501576e-05, "loss": 0.8898, "step": 6321 }, { "epoch": 1.2852205732872535, "grad_norm": 0.16232939064502716, "learning_rate": 7.15346282924845e-05, "loss": 0.8951, "step": 6322 }, { "epoch": 1.2854238666395608, "grad_norm": 0.1398724615573883, "learning_rate": 7.151428861995322e-05, "loss": 0.8304, "step": 6323 }, { "epoch": 1.2856271599918683, "grad_norm": 0.13646571338176727, "learning_rate": 7.149394894742195e-05, "loss": 0.8362, "step": 6324 }, { "epoch": 1.2858304533441758, "grad_norm": 0.14222213625907898, "learning_rate": 7.147360927489067e-05, "loss": 1.0308, "step": 6325 }, { "epoch": 1.286033746696483, "grad_norm": 0.1417984813451767, "learning_rate": 7.145326960235941e-05, "loss": 0.9775, "step": 6326 }, { "epoch": 1.2862370400487904, "grad_norm": 0.1756143420934677, "learning_rate": 7.143292992982813e-05, "loss": 1.041, "step": 6327 }, { "epoch": 1.2864403334010976, "grad_norm": 0.15248575806617737, "learning_rate": 7.141259025729686e-05, "loss": 0.9245, "step": 6328 }, { "epoch": 1.2866436267534052, "grad_norm": 0.14683492481708527, "learning_rate": 7.139225058476558e-05, "loss": 0.957, "step": 6329 }, { "epoch": 1.2868469201057127, "grad_norm": 0.16053462028503418, "learning_rate": 7.137191091223432e-05, "loss": 1.1298, "step": 6330 }, { "epoch": 1.28705021345802, "grad_norm": 0.16956062614917755, "learning_rate": 7.135157123970304e-05, "loss": 1.0127, "step": 6331 }, { "epoch": 1.2872535068103272, "grad_norm": 0.14983762800693512, "learning_rate": 7.133123156717177e-05, "loss": 0.9301, "step": 6332 }, { "epoch": 1.2874568001626348, "grad_norm": 0.15941564738750458, "learning_rate": 7.131089189464049e-05, "loss": 1.1836, "step": 6333 }, { "epoch": 1.287660093514942, "grad_norm": 0.15132249891757965, "learning_rate": 7.129055222210923e-05, "loss": 1.0463, "step": 6334 }, { "epoch": 1.2878633868672495, "grad_norm": 0.15944363176822662, "learning_rate": 7.127021254957795e-05, "loss": 0.9344, "step": 6335 }, { "epoch": 1.2880666802195568, "grad_norm": 0.15733520686626434, "learning_rate": 7.124987287704669e-05, "loss": 1.0877, "step": 6336 }, { "epoch": 1.2882699735718641, "grad_norm": 0.1465538740158081, "learning_rate": 7.12295332045154e-05, "loss": 0.8985, "step": 6337 }, { "epoch": 1.2884732669241716, "grad_norm": 0.16172672808170319, "learning_rate": 7.120919353198414e-05, "loss": 0.9402, "step": 6338 }, { "epoch": 1.288676560276479, "grad_norm": 0.14572155475616455, "learning_rate": 7.118885385945287e-05, "loss": 0.9993, "step": 6339 }, { "epoch": 1.2888798536287864, "grad_norm": 0.15885762870311737, "learning_rate": 7.11685141869216e-05, "loss": 1.1093, "step": 6340 }, { "epoch": 1.2890831469810937, "grad_norm": 0.15180082619190216, "learning_rate": 7.114817451439032e-05, "loss": 1.0909, "step": 6341 }, { "epoch": 1.289286440333401, "grad_norm": 0.13964922726154327, "learning_rate": 7.112783484185905e-05, "loss": 0.8004, "step": 6342 }, { "epoch": 1.2894897336857085, "grad_norm": 0.16804420948028564, "learning_rate": 7.110749516932778e-05, "loss": 0.9914, "step": 6343 }, { "epoch": 1.2896930270380158, "grad_norm": 0.16537639498710632, "learning_rate": 7.108715549679652e-05, "loss": 1.1608, "step": 6344 }, { "epoch": 1.2898963203903233, "grad_norm": 0.14818671345710754, "learning_rate": 7.106681582426523e-05, "loss": 0.9843, "step": 6345 }, { "epoch": 1.2900996137426306, "grad_norm": 0.17077666521072388, "learning_rate": 7.104647615173396e-05, "loss": 1.2554, "step": 6346 }, { "epoch": 1.290302907094938, "grad_norm": 0.1623799055814743, "learning_rate": 7.102613647920269e-05, "loss": 1.0618, "step": 6347 }, { "epoch": 1.2905062004472454, "grad_norm": 0.1475660651922226, "learning_rate": 7.100579680667143e-05, "loss": 1.0486, "step": 6348 }, { "epoch": 1.2907094937995527, "grad_norm": 0.14870400726795197, "learning_rate": 7.098545713414014e-05, "loss": 0.9235, "step": 6349 }, { "epoch": 1.2909127871518602, "grad_norm": 0.1594904661178589, "learning_rate": 7.096511746160888e-05, "loss": 1.097, "step": 6350 }, { "epoch": 1.2911160805041675, "grad_norm": 0.13609673082828522, "learning_rate": 7.09447777890776e-05, "loss": 0.8692, "step": 6351 }, { "epoch": 1.2913193738564748, "grad_norm": 0.16340984404087067, "learning_rate": 7.092443811654634e-05, "loss": 1.1089, "step": 6352 }, { "epoch": 1.2915226672087823, "grad_norm": 0.13281512260437012, "learning_rate": 7.090409844401505e-05, "loss": 0.8938, "step": 6353 }, { "epoch": 1.2917259605610896, "grad_norm": 0.16493913531303406, "learning_rate": 7.088375877148377e-05, "loss": 1.0575, "step": 6354 }, { "epoch": 1.291929253913397, "grad_norm": 0.14465326070785522, "learning_rate": 7.086341909895251e-05, "loss": 0.938, "step": 6355 }, { "epoch": 1.2921325472657044, "grad_norm": 0.16361325979232788, "learning_rate": 7.084307942642124e-05, "loss": 1.0142, "step": 6356 }, { "epoch": 1.2923358406180117, "grad_norm": 0.14788828790187836, "learning_rate": 7.082273975388996e-05, "loss": 1.0733, "step": 6357 }, { "epoch": 1.2925391339703192, "grad_norm": 0.15104612708091736, "learning_rate": 7.080240008135869e-05, "loss": 0.8354, "step": 6358 }, { "epoch": 1.2927424273226267, "grad_norm": 0.1521586775779724, "learning_rate": 7.078206040882742e-05, "loss": 0.967, "step": 6359 }, { "epoch": 1.292945720674934, "grad_norm": 0.1353432983160019, "learning_rate": 7.076172073629615e-05, "loss": 0.8673, "step": 6360 }, { "epoch": 1.2931490140272413, "grad_norm": 0.14922721683979034, "learning_rate": 7.074138106376487e-05, "loss": 0.9632, "step": 6361 }, { "epoch": 1.2933523073795488, "grad_norm": 0.16160540282726288, "learning_rate": 7.07210413912336e-05, "loss": 1.0371, "step": 6362 }, { "epoch": 1.293555600731856, "grad_norm": 0.1590534895658493, "learning_rate": 7.070070171870233e-05, "loss": 1.1161, "step": 6363 }, { "epoch": 1.2937588940841636, "grad_norm": 0.15857507288455963, "learning_rate": 7.068036204617106e-05, "loss": 1.0829, "step": 6364 }, { "epoch": 1.2939621874364708, "grad_norm": 0.15210936963558197, "learning_rate": 7.066002237363978e-05, "loss": 0.9997, "step": 6365 }, { "epoch": 1.2941654807887781, "grad_norm": 0.17027819156646729, "learning_rate": 7.063968270110851e-05, "loss": 1.1079, "step": 6366 }, { "epoch": 1.2943687741410856, "grad_norm": 0.15695655345916748, "learning_rate": 7.061934302857725e-05, "loss": 1.0675, "step": 6367 }, { "epoch": 1.294572067493393, "grad_norm": 0.15736332535743713, "learning_rate": 7.059900335604597e-05, "loss": 1.0686, "step": 6368 }, { "epoch": 1.2947753608457004, "grad_norm": 0.15472574532032013, "learning_rate": 7.05786636835147e-05, "loss": 1.0212, "step": 6369 }, { "epoch": 1.2949786541980077, "grad_norm": 0.17373047769069672, "learning_rate": 7.055832401098342e-05, "loss": 1.133, "step": 6370 }, { "epoch": 1.295181947550315, "grad_norm": 0.17195232212543488, "learning_rate": 7.053798433845216e-05, "loss": 1.1432, "step": 6371 }, { "epoch": 1.2953852409026225, "grad_norm": 0.14202889800071716, "learning_rate": 7.051764466592088e-05, "loss": 0.934, "step": 6372 }, { "epoch": 1.2955885342549298, "grad_norm": 0.1472562849521637, "learning_rate": 7.04973049933896e-05, "loss": 0.8657, "step": 6373 }, { "epoch": 1.2957918276072373, "grad_norm": 0.1462807059288025, "learning_rate": 7.047696532085833e-05, "loss": 1.0438, "step": 6374 }, { "epoch": 1.2959951209595446, "grad_norm": 0.16334788501262665, "learning_rate": 7.045662564832707e-05, "loss": 1.084, "step": 6375 }, { "epoch": 1.296198414311852, "grad_norm": 0.14090010523796082, "learning_rate": 7.043628597579579e-05, "loss": 0.9945, "step": 6376 }, { "epoch": 1.2964017076641594, "grad_norm": 0.13263094425201416, "learning_rate": 7.041594630326452e-05, "loss": 0.8053, "step": 6377 }, { "epoch": 1.2966050010164667, "grad_norm": 0.12848879396915436, "learning_rate": 7.039560663073324e-05, "loss": 0.8986, "step": 6378 }, { "epoch": 1.2968082943687742, "grad_norm": 0.16026438772678375, "learning_rate": 7.037526695820198e-05, "loss": 0.9383, "step": 6379 }, { "epoch": 1.2970115877210815, "grad_norm": 0.14478953182697296, "learning_rate": 7.03549272856707e-05, "loss": 0.9431, "step": 6380 }, { "epoch": 1.2972148810733888, "grad_norm": 0.16707251965999603, "learning_rate": 7.033458761313943e-05, "loss": 1.0886, "step": 6381 }, { "epoch": 1.2974181744256963, "grad_norm": 0.12309828400611877, "learning_rate": 7.031424794060815e-05, "loss": 0.8204, "step": 6382 }, { "epoch": 1.2976214677780036, "grad_norm": 0.17878857254981995, "learning_rate": 7.029390826807689e-05, "loss": 1.1639, "step": 6383 }, { "epoch": 1.297824761130311, "grad_norm": 0.15972448885440826, "learning_rate": 7.027356859554562e-05, "loss": 0.9845, "step": 6384 }, { "epoch": 1.2980280544826184, "grad_norm": 0.15551124513149261, "learning_rate": 7.025322892301434e-05, "loss": 0.9243, "step": 6385 }, { "epoch": 1.2982313478349257, "grad_norm": 0.1571023315191269, "learning_rate": 7.023288925048306e-05, "loss": 1.0748, "step": 6386 }, { "epoch": 1.2984346411872332, "grad_norm": 0.14938150346279144, "learning_rate": 7.02125495779518e-05, "loss": 1.0755, "step": 6387 }, { "epoch": 1.2986379345395407, "grad_norm": 0.14772380888462067, "learning_rate": 7.019220990542053e-05, "loss": 1.0526, "step": 6388 }, { "epoch": 1.298841227891848, "grad_norm": 0.16789360344409943, "learning_rate": 7.017187023288925e-05, "loss": 1.2418, "step": 6389 }, { "epoch": 1.2990445212441553, "grad_norm": 0.1547492891550064, "learning_rate": 7.015153056035798e-05, "loss": 1.2118, "step": 6390 }, { "epoch": 1.2992478145964628, "grad_norm": 0.16406840085983276, "learning_rate": 7.013119088782671e-05, "loss": 1.1584, "step": 6391 }, { "epoch": 1.29945110794877, "grad_norm": 0.15339142084121704, "learning_rate": 7.011085121529544e-05, "loss": 1.0863, "step": 6392 }, { "epoch": 1.2996544013010776, "grad_norm": 0.151136577129364, "learning_rate": 7.009051154276418e-05, "loss": 1.152, "step": 6393 }, { "epoch": 1.2998576946533849, "grad_norm": 0.16153016686439514, "learning_rate": 7.007017187023289e-05, "loss": 1.0774, "step": 6394 }, { "epoch": 1.3000609880056921, "grad_norm": 0.15656860172748566, "learning_rate": 7.004983219770163e-05, "loss": 1.0618, "step": 6395 }, { "epoch": 1.3002642813579997, "grad_norm": 0.1635272204875946, "learning_rate": 7.002949252517035e-05, "loss": 1.0452, "step": 6396 }, { "epoch": 1.300467574710307, "grad_norm": 0.14776968955993652, "learning_rate": 7.000915285263907e-05, "loss": 0.8822, "step": 6397 }, { "epoch": 1.3006708680626144, "grad_norm": 0.14170758426189423, "learning_rate": 6.99888131801078e-05, "loss": 1.0016, "step": 6398 }, { "epoch": 1.3008741614149217, "grad_norm": 0.14958837628364563, "learning_rate": 6.996847350757652e-05, "loss": 1.1093, "step": 6399 }, { "epoch": 1.301077454767229, "grad_norm": 0.1530427783727646, "learning_rate": 6.994813383504526e-05, "loss": 0.9991, "step": 6400 }, { "epoch": 1.3012807481195365, "grad_norm": 0.1651686280965805, "learning_rate": 6.992779416251399e-05, "loss": 1.0096, "step": 6401 }, { "epoch": 1.3014840414718438, "grad_norm": 0.14131589233875275, "learning_rate": 6.990745448998271e-05, "loss": 0.9807, "step": 6402 }, { "epoch": 1.3016873348241513, "grad_norm": 0.16334564983844757, "learning_rate": 6.988711481745143e-05, "loss": 0.9987, "step": 6403 }, { "epoch": 1.3018906281764586, "grad_norm": 0.16743913292884827, "learning_rate": 6.986677514492017e-05, "loss": 1.1994, "step": 6404 }, { "epoch": 1.302093921528766, "grad_norm": 0.15577523410320282, "learning_rate": 6.98464354723889e-05, "loss": 0.9693, "step": 6405 }, { "epoch": 1.3022972148810734, "grad_norm": 0.16516022384166718, "learning_rate": 6.982609579985762e-05, "loss": 1.102, "step": 6406 }, { "epoch": 1.3025005082333807, "grad_norm": 0.13962024450302124, "learning_rate": 6.980575612732635e-05, "loss": 0.885, "step": 6407 }, { "epoch": 1.3027038015856882, "grad_norm": 0.1331530511379242, "learning_rate": 6.978541645479508e-05, "loss": 0.9392, "step": 6408 }, { "epoch": 1.3029070949379955, "grad_norm": 0.15082453191280365, "learning_rate": 6.976507678226381e-05, "loss": 0.9806, "step": 6409 }, { "epoch": 1.3031103882903028, "grad_norm": 0.15130026638507843, "learning_rate": 6.974473710973253e-05, "loss": 0.9098, "step": 6410 }, { "epoch": 1.3033136816426103, "grad_norm": 0.1474311500787735, "learning_rate": 6.972439743720126e-05, "loss": 0.9963, "step": 6411 }, { "epoch": 1.3035169749949176, "grad_norm": 0.143365278840065, "learning_rate": 6.970405776467e-05, "loss": 0.9287, "step": 6412 }, { "epoch": 1.303720268347225, "grad_norm": 0.1591702550649643, "learning_rate": 6.968371809213872e-05, "loss": 1.0042, "step": 6413 }, { "epoch": 1.3039235616995324, "grad_norm": 0.14767907559871674, "learning_rate": 6.966337841960744e-05, "loss": 1.0521, "step": 6414 }, { "epoch": 1.3041268550518397, "grad_norm": 0.165874645113945, "learning_rate": 6.964303874707617e-05, "loss": 1.0874, "step": 6415 }, { "epoch": 1.3043301484041472, "grad_norm": 0.137615367770195, "learning_rate": 6.962269907454491e-05, "loss": 0.8331, "step": 6416 }, { "epoch": 1.3045334417564547, "grad_norm": 0.15427331626415253, "learning_rate": 6.960235940201363e-05, "loss": 1.0074, "step": 6417 }, { "epoch": 1.304736735108762, "grad_norm": 0.1635177880525589, "learning_rate": 6.958201972948236e-05, "loss": 1.1218, "step": 6418 }, { "epoch": 1.3049400284610693, "grad_norm": 0.15335600078105927, "learning_rate": 6.956168005695108e-05, "loss": 1.0503, "step": 6419 }, { "epoch": 1.3051433218133768, "grad_norm": 0.1703837513923645, "learning_rate": 6.954134038441982e-05, "loss": 1.1265, "step": 6420 }, { "epoch": 1.305346615165684, "grad_norm": 0.1549668163061142, "learning_rate": 6.952100071188854e-05, "loss": 0.9123, "step": 6421 }, { "epoch": 1.3055499085179916, "grad_norm": 0.17017358541488647, "learning_rate": 6.950066103935727e-05, "loss": 1.1507, "step": 6422 }, { "epoch": 1.3057532018702989, "grad_norm": 0.1454373002052307, "learning_rate": 6.948032136682599e-05, "loss": 0.9948, "step": 6423 }, { "epoch": 1.3059564952226062, "grad_norm": 0.16227105259895325, "learning_rate": 6.945998169429473e-05, "loss": 1.183, "step": 6424 }, { "epoch": 1.3061597885749137, "grad_norm": 0.1732262670993805, "learning_rate": 6.943964202176345e-05, "loss": 1.2444, "step": 6425 }, { "epoch": 1.306363081927221, "grad_norm": 0.1399673968553543, "learning_rate": 6.941930234923218e-05, "loss": 0.9059, "step": 6426 }, { "epoch": 1.3065663752795285, "grad_norm": 0.15425364673137665, "learning_rate": 6.93989626767009e-05, "loss": 1.0113, "step": 6427 }, { "epoch": 1.3067696686318357, "grad_norm": 0.16954706609249115, "learning_rate": 6.937862300416964e-05, "loss": 1.0581, "step": 6428 }, { "epoch": 1.306972961984143, "grad_norm": 0.17263741791248322, "learning_rate": 6.935828333163837e-05, "loss": 1.2699, "step": 6429 }, { "epoch": 1.3071762553364505, "grad_norm": 0.1669914275407791, "learning_rate": 6.933794365910709e-05, "loss": 1.0944, "step": 6430 }, { "epoch": 1.3073795486887578, "grad_norm": 0.1626417487859726, "learning_rate": 6.931760398657581e-05, "loss": 1.0985, "step": 6431 }, { "epoch": 1.3075828420410653, "grad_norm": 0.15837189555168152, "learning_rate": 6.929726431404455e-05, "loss": 0.9834, "step": 6432 }, { "epoch": 1.3077861353933726, "grad_norm": 0.14864079654216766, "learning_rate": 6.927692464151328e-05, "loss": 1.0147, "step": 6433 }, { "epoch": 1.30798942874568, "grad_norm": 0.13315309584140778, "learning_rate": 6.9256584968982e-05, "loss": 0.9718, "step": 6434 }, { "epoch": 1.3081927220979874, "grad_norm": 0.14859481155872345, "learning_rate": 6.923624529645073e-05, "loss": 0.9433, "step": 6435 }, { "epoch": 1.3083960154502947, "grad_norm": 0.1559152603149414, "learning_rate": 6.921590562391946e-05, "loss": 1.0366, "step": 6436 }, { "epoch": 1.3085993088026022, "grad_norm": 0.1377761960029602, "learning_rate": 6.919556595138819e-05, "loss": 0.8622, "step": 6437 }, { "epoch": 1.3088026021549095, "grad_norm": 0.15666405856609344, "learning_rate": 6.917522627885691e-05, "loss": 1.0464, "step": 6438 }, { "epoch": 1.3090058955072168, "grad_norm": 0.1416643261909485, "learning_rate": 6.915488660632564e-05, "loss": 1.0275, "step": 6439 }, { "epoch": 1.3092091888595243, "grad_norm": 0.15593840181827545, "learning_rate": 6.913454693379436e-05, "loss": 0.9657, "step": 6440 }, { "epoch": 1.3094124822118316, "grad_norm": 0.1602342128753662, "learning_rate": 6.91142072612631e-05, "loss": 1.0536, "step": 6441 }, { "epoch": 1.309615775564139, "grad_norm": 0.14962245523929596, "learning_rate": 6.909386758873182e-05, "loss": 1.0307, "step": 6442 }, { "epoch": 1.3098190689164464, "grad_norm": 0.1534949541091919, "learning_rate": 6.907352791620055e-05, "loss": 1.0847, "step": 6443 }, { "epoch": 1.3100223622687537, "grad_norm": 0.1267688274383545, "learning_rate": 6.905318824366927e-05, "loss": 0.8466, "step": 6444 }, { "epoch": 1.3102256556210612, "grad_norm": 0.14033158123493195, "learning_rate": 6.903284857113801e-05, "loss": 0.9265, "step": 6445 }, { "epoch": 1.3104289489733687, "grad_norm": 0.15192466974258423, "learning_rate": 6.901250889860674e-05, "loss": 0.931, "step": 6446 }, { "epoch": 1.310632242325676, "grad_norm": 0.15281356871128082, "learning_rate": 6.899216922607546e-05, "loss": 1.0464, "step": 6447 }, { "epoch": 1.3108355356779833, "grad_norm": 0.15561819076538086, "learning_rate": 6.897182955354418e-05, "loss": 1.0109, "step": 6448 }, { "epoch": 1.3110388290302908, "grad_norm": 0.13406097888946533, "learning_rate": 6.895148988101292e-05, "loss": 0.828, "step": 6449 }, { "epoch": 1.311242122382598, "grad_norm": 0.13384568691253662, "learning_rate": 6.893115020848165e-05, "loss": 0.9232, "step": 6450 }, { "epoch": 1.3114454157349056, "grad_norm": 0.16319598257541656, "learning_rate": 6.891081053595037e-05, "loss": 1.0027, "step": 6451 }, { "epoch": 1.3116487090872129, "grad_norm": 0.15781641006469727, "learning_rate": 6.88904708634191e-05, "loss": 1.1818, "step": 6452 }, { "epoch": 1.3118520024395202, "grad_norm": 0.16732369363307953, "learning_rate": 6.887013119088783e-05, "loss": 1.0994, "step": 6453 }, { "epoch": 1.3120552957918277, "grad_norm": 0.14292466640472412, "learning_rate": 6.884979151835656e-05, "loss": 0.8309, "step": 6454 }, { "epoch": 1.312258589144135, "grad_norm": 0.15096823871135712, "learning_rate": 6.882945184582528e-05, "loss": 0.9477, "step": 6455 }, { "epoch": 1.3124618824964425, "grad_norm": 0.14904417097568512, "learning_rate": 6.880911217329401e-05, "loss": 1.0092, "step": 6456 }, { "epoch": 1.3126651758487498, "grad_norm": 0.1489609032869339, "learning_rate": 6.878877250076275e-05, "loss": 0.9442, "step": 6457 }, { "epoch": 1.312868469201057, "grad_norm": 0.14513355493545532, "learning_rate": 6.876843282823147e-05, "loss": 0.8775, "step": 6458 }, { "epoch": 1.3130717625533646, "grad_norm": 0.15297158062458038, "learning_rate": 6.87480931557002e-05, "loss": 0.9901, "step": 6459 }, { "epoch": 1.3132750559056718, "grad_norm": 0.15674805641174316, "learning_rate": 6.872775348316892e-05, "loss": 1.0478, "step": 6460 }, { "epoch": 1.3134783492579793, "grad_norm": 0.132054403424263, "learning_rate": 6.870741381063766e-05, "loss": 0.8865, "step": 6461 }, { "epoch": 1.3136816426102866, "grad_norm": 0.1640552580356598, "learning_rate": 6.868707413810638e-05, "loss": 1.1175, "step": 6462 }, { "epoch": 1.313884935962594, "grad_norm": 0.13734549283981323, "learning_rate": 6.86667344655751e-05, "loss": 0.9978, "step": 6463 }, { "epoch": 1.3140882293149014, "grad_norm": 0.12565961480140686, "learning_rate": 6.864639479304383e-05, "loss": 0.8271, "step": 6464 }, { "epoch": 1.3142915226672087, "grad_norm": 0.14138418436050415, "learning_rate": 6.862605512051257e-05, "loss": 0.9234, "step": 6465 }, { "epoch": 1.3144948160195162, "grad_norm": 0.1520802527666092, "learning_rate": 6.860571544798129e-05, "loss": 1.0683, "step": 6466 }, { "epoch": 1.3146981093718235, "grad_norm": 0.1719176024198532, "learning_rate": 6.858537577545002e-05, "loss": 1.0938, "step": 6467 }, { "epoch": 1.3149014027241308, "grad_norm": 0.15719756484031677, "learning_rate": 6.856503610291874e-05, "loss": 1.1215, "step": 6468 }, { "epoch": 1.3151046960764383, "grad_norm": 0.15599600970745087, "learning_rate": 6.854469643038748e-05, "loss": 0.9656, "step": 6469 }, { "epoch": 1.3153079894287456, "grad_norm": 0.15448269248008728, "learning_rate": 6.85243567578562e-05, "loss": 1.0572, "step": 6470 }, { "epoch": 1.3155112827810531, "grad_norm": 0.1604229360818863, "learning_rate": 6.850401708532493e-05, "loss": 1.1076, "step": 6471 }, { "epoch": 1.3157145761333604, "grad_norm": 0.13794146478176117, "learning_rate": 6.848367741279365e-05, "loss": 0.8873, "step": 6472 }, { "epoch": 1.3159178694856677, "grad_norm": 0.16124792397022247, "learning_rate": 6.846333774026239e-05, "loss": 1.1082, "step": 6473 }, { "epoch": 1.3161211628379752, "grad_norm": 0.14321772754192352, "learning_rate": 6.844299806773112e-05, "loss": 0.9342, "step": 6474 }, { "epoch": 1.3163244561902827, "grad_norm": 0.14759552478790283, "learning_rate": 6.842265839519984e-05, "loss": 0.9947, "step": 6475 }, { "epoch": 1.31652774954259, "grad_norm": 0.1335640698671341, "learning_rate": 6.840231872266856e-05, "loss": 0.9202, "step": 6476 }, { "epoch": 1.3167310428948973, "grad_norm": 0.14602088928222656, "learning_rate": 6.83819790501373e-05, "loss": 0.8976, "step": 6477 }, { "epoch": 1.3169343362472048, "grad_norm": 0.15512679517269135, "learning_rate": 6.836163937760603e-05, "loss": 1.0567, "step": 6478 }, { "epoch": 1.317137629599512, "grad_norm": 0.14310981333255768, "learning_rate": 6.834129970507475e-05, "loss": 0.9974, "step": 6479 }, { "epoch": 1.3173409229518196, "grad_norm": 0.1316903829574585, "learning_rate": 6.832096003254348e-05, "loss": 0.9084, "step": 6480 }, { "epoch": 1.3175442163041269, "grad_norm": 0.15756045281887054, "learning_rate": 6.83006203600122e-05, "loss": 1.0742, "step": 6481 }, { "epoch": 1.3177475096564342, "grad_norm": 0.1692349910736084, "learning_rate": 6.828028068748094e-05, "loss": 1.2026, "step": 6482 }, { "epoch": 1.3179508030087417, "grad_norm": 0.1590135246515274, "learning_rate": 6.825994101494966e-05, "loss": 1.0709, "step": 6483 }, { "epoch": 1.318154096361049, "grad_norm": 0.15329840779304504, "learning_rate": 6.823960134241839e-05, "loss": 0.9894, "step": 6484 }, { "epoch": 1.3183573897133565, "grad_norm": 0.17357125878334045, "learning_rate": 6.821926166988711e-05, "loss": 1.1774, "step": 6485 }, { "epoch": 1.3185606830656638, "grad_norm": 0.15878167748451233, "learning_rate": 6.819892199735585e-05, "loss": 0.9951, "step": 6486 }, { "epoch": 1.318763976417971, "grad_norm": 0.14225813746452332, "learning_rate": 6.817858232482457e-05, "loss": 0.8056, "step": 6487 }, { "epoch": 1.3189672697702786, "grad_norm": 0.15075825154781342, "learning_rate": 6.81582426522933e-05, "loss": 1.0133, "step": 6488 }, { "epoch": 1.3191705631225858, "grad_norm": 0.1345609575510025, "learning_rate": 6.813790297976202e-05, "loss": 0.818, "step": 6489 }, { "epoch": 1.3193738564748934, "grad_norm": 0.147307887673378, "learning_rate": 6.811756330723076e-05, "loss": 1.0098, "step": 6490 }, { "epoch": 1.3195771498272006, "grad_norm": 0.17987053096294403, "learning_rate": 6.809722363469949e-05, "loss": 1.1921, "step": 6491 }, { "epoch": 1.319780443179508, "grad_norm": 0.13262401521205902, "learning_rate": 6.807688396216821e-05, "loss": 0.8263, "step": 6492 }, { "epoch": 1.3199837365318154, "grad_norm": 0.15621908009052277, "learning_rate": 6.805654428963693e-05, "loss": 1.0055, "step": 6493 }, { "epoch": 1.3201870298841227, "grad_norm": 0.1636243760585785, "learning_rate": 6.803620461710567e-05, "loss": 1.1082, "step": 6494 }, { "epoch": 1.3203903232364302, "grad_norm": 0.14250467717647552, "learning_rate": 6.80158649445744e-05, "loss": 0.9626, "step": 6495 }, { "epoch": 1.3205936165887375, "grad_norm": 0.15674500167369843, "learning_rate": 6.799552527204312e-05, "loss": 1.0997, "step": 6496 }, { "epoch": 1.3207969099410448, "grad_norm": 0.1689048558473587, "learning_rate": 6.797518559951185e-05, "loss": 1.1284, "step": 6497 }, { "epoch": 1.3210002032933523, "grad_norm": 0.15494292974472046, "learning_rate": 6.795484592698058e-05, "loss": 1.0416, "step": 6498 }, { "epoch": 1.3212034966456596, "grad_norm": 0.1549694538116455, "learning_rate": 6.793450625444931e-05, "loss": 0.9951, "step": 6499 }, { "epoch": 1.3214067899979671, "grad_norm": 0.16461819410324097, "learning_rate": 6.791416658191803e-05, "loss": 1.1806, "step": 6500 }, { "epoch": 1.3216100833502744, "grad_norm": 0.171092689037323, "learning_rate": 6.789382690938676e-05, "loss": 1.2561, "step": 6501 }, { "epoch": 1.3218133767025817, "grad_norm": 0.1471683531999588, "learning_rate": 6.78734872368555e-05, "loss": 1.0088, "step": 6502 }, { "epoch": 1.3220166700548892, "grad_norm": 0.14350157976150513, "learning_rate": 6.785314756432422e-05, "loss": 0.9284, "step": 6503 }, { "epoch": 1.3222199634071967, "grad_norm": 0.1558438092470169, "learning_rate": 6.783280789179294e-05, "loss": 1.0972, "step": 6504 }, { "epoch": 1.322423256759504, "grad_norm": 0.15591087937355042, "learning_rate": 6.781246821926167e-05, "loss": 1.1083, "step": 6505 }, { "epoch": 1.3226265501118113, "grad_norm": 0.14936377108097076, "learning_rate": 6.779212854673041e-05, "loss": 0.8758, "step": 6506 }, { "epoch": 1.3228298434641188, "grad_norm": 0.1665930598974228, "learning_rate": 6.777178887419913e-05, "loss": 1.0902, "step": 6507 }, { "epoch": 1.323033136816426, "grad_norm": 0.1540941298007965, "learning_rate": 6.775144920166786e-05, "loss": 1.1394, "step": 6508 }, { "epoch": 1.3232364301687336, "grad_norm": 0.1532563716173172, "learning_rate": 6.773110952913658e-05, "loss": 0.9367, "step": 6509 }, { "epoch": 1.323439723521041, "grad_norm": 0.1737111210823059, "learning_rate": 6.771076985660532e-05, "loss": 1.0788, "step": 6510 }, { "epoch": 1.3236430168733482, "grad_norm": 0.15493230521678925, "learning_rate": 6.769043018407404e-05, "loss": 1.0181, "step": 6511 }, { "epoch": 1.3238463102256557, "grad_norm": 0.14142099022865295, "learning_rate": 6.767009051154277e-05, "loss": 0.8269, "step": 6512 }, { "epoch": 1.324049603577963, "grad_norm": 0.15842103958129883, "learning_rate": 6.764975083901149e-05, "loss": 1.054, "step": 6513 }, { "epoch": 1.3242528969302705, "grad_norm": 0.15117564797401428, "learning_rate": 6.762941116648023e-05, "loss": 1.0109, "step": 6514 }, { "epoch": 1.3244561902825778, "grad_norm": 0.13780778646469116, "learning_rate": 6.760907149394895e-05, "loss": 0.8528, "step": 6515 }, { "epoch": 1.324659483634885, "grad_norm": 0.14925047755241394, "learning_rate": 6.758873182141768e-05, "loss": 1.0393, "step": 6516 }, { "epoch": 1.3248627769871926, "grad_norm": 0.1592148393392563, "learning_rate": 6.75683921488864e-05, "loss": 1.105, "step": 6517 }, { "epoch": 1.3250660703394999, "grad_norm": 0.14861708879470825, "learning_rate": 6.754805247635514e-05, "loss": 0.9957, "step": 6518 }, { "epoch": 1.3252693636918074, "grad_norm": 0.15767233073711395, "learning_rate": 6.752771280382387e-05, "loss": 0.9189, "step": 6519 }, { "epoch": 1.3254726570441147, "grad_norm": 0.1405247300863266, "learning_rate": 6.750737313129259e-05, "loss": 0.9857, "step": 6520 }, { "epoch": 1.325675950396422, "grad_norm": 0.1638457179069519, "learning_rate": 6.748703345876131e-05, "loss": 1.0637, "step": 6521 }, { "epoch": 1.3258792437487295, "grad_norm": 0.13822193443775177, "learning_rate": 6.746669378623004e-05, "loss": 0.9613, "step": 6522 }, { "epoch": 1.3260825371010367, "grad_norm": 0.16013990342617035, "learning_rate": 6.744635411369878e-05, "loss": 1.0393, "step": 6523 }, { "epoch": 1.3262858304533442, "grad_norm": 0.1428498774766922, "learning_rate": 6.74260144411675e-05, "loss": 0.98, "step": 6524 }, { "epoch": 1.3264891238056515, "grad_norm": 0.15162871778011322, "learning_rate": 6.740567476863623e-05, "loss": 0.9981, "step": 6525 }, { "epoch": 1.3266924171579588, "grad_norm": 0.14506307244300842, "learning_rate": 6.738533509610495e-05, "loss": 0.9618, "step": 6526 }, { "epoch": 1.3268957105102663, "grad_norm": 0.15427903831005096, "learning_rate": 6.736499542357369e-05, "loss": 0.9897, "step": 6527 }, { "epoch": 1.3270990038625736, "grad_norm": 0.1582299917936325, "learning_rate": 6.734465575104241e-05, "loss": 1.045, "step": 6528 }, { "epoch": 1.3273022972148811, "grad_norm": 0.15421707928180695, "learning_rate": 6.732431607851114e-05, "loss": 1.0482, "step": 6529 }, { "epoch": 1.3275055905671884, "grad_norm": 0.14663353562355042, "learning_rate": 6.730397640597986e-05, "loss": 0.9977, "step": 6530 }, { "epoch": 1.3277088839194957, "grad_norm": 0.16096587479114532, "learning_rate": 6.72836367334486e-05, "loss": 1.1346, "step": 6531 }, { "epoch": 1.3279121772718032, "grad_norm": 0.1554766148328781, "learning_rate": 6.726329706091732e-05, "loss": 0.9453, "step": 6532 }, { "epoch": 1.3281154706241107, "grad_norm": 0.1384490728378296, "learning_rate": 6.724295738838605e-05, "loss": 1.0076, "step": 6533 }, { "epoch": 1.328318763976418, "grad_norm": 0.16109387576580048, "learning_rate": 6.722261771585477e-05, "loss": 0.9302, "step": 6534 }, { "epoch": 1.3285220573287253, "grad_norm": 0.1677289754152298, "learning_rate": 6.720227804332351e-05, "loss": 1.0598, "step": 6535 }, { "epoch": 1.3287253506810328, "grad_norm": 0.16336236894130707, "learning_rate": 6.718193837079224e-05, "loss": 1.0249, "step": 6536 }, { "epoch": 1.32892864403334, "grad_norm": 0.14014337956905365, "learning_rate": 6.716159869826096e-05, "loss": 0.8409, "step": 6537 }, { "epoch": 1.3291319373856476, "grad_norm": 0.15761636197566986, "learning_rate": 6.714125902572968e-05, "loss": 1.0118, "step": 6538 }, { "epoch": 1.329335230737955, "grad_norm": 0.12533292174339294, "learning_rate": 6.712091935319842e-05, "loss": 0.8514, "step": 6539 }, { "epoch": 1.3295385240902622, "grad_norm": 0.1495424211025238, "learning_rate": 6.710057968066715e-05, "loss": 0.9738, "step": 6540 }, { "epoch": 1.3297418174425697, "grad_norm": 0.15687566995620728, "learning_rate": 6.708024000813587e-05, "loss": 0.9854, "step": 6541 }, { "epoch": 1.329945110794877, "grad_norm": 0.1550702452659607, "learning_rate": 6.70599003356046e-05, "loss": 1.1639, "step": 6542 }, { "epoch": 1.3301484041471845, "grad_norm": 0.1597202569246292, "learning_rate": 6.703956066307333e-05, "loss": 1.0971, "step": 6543 }, { "epoch": 1.3303516974994918, "grad_norm": 0.15224111080169678, "learning_rate": 6.701922099054206e-05, "loss": 1.0292, "step": 6544 }, { "epoch": 1.330554990851799, "grad_norm": 0.12689407169818878, "learning_rate": 6.699888131801078e-05, "loss": 0.8408, "step": 6545 }, { "epoch": 1.3307582842041066, "grad_norm": 0.14697900414466858, "learning_rate": 6.697854164547951e-05, "loss": 1.0648, "step": 6546 }, { "epoch": 1.3309615775564139, "grad_norm": 0.16403964161872864, "learning_rate": 6.695820197294825e-05, "loss": 1.1699, "step": 6547 }, { "epoch": 1.3311648709087214, "grad_norm": 0.14086556434631348, "learning_rate": 6.693786230041697e-05, "loss": 0.9051, "step": 6548 }, { "epoch": 1.3313681642610287, "grad_norm": 0.1466771811246872, "learning_rate": 6.69175226278857e-05, "loss": 1.022, "step": 6549 }, { "epoch": 1.331571457613336, "grad_norm": 0.16425777971744537, "learning_rate": 6.689718295535442e-05, "loss": 1.0438, "step": 6550 }, { "epoch": 1.3317747509656435, "grad_norm": 0.16799181699752808, "learning_rate": 6.687684328282316e-05, "loss": 1.1315, "step": 6551 }, { "epoch": 1.3319780443179507, "grad_norm": 0.14717894792556763, "learning_rate": 6.685650361029188e-05, "loss": 0.9153, "step": 6552 }, { "epoch": 1.3321813376702583, "grad_norm": 0.15937882661819458, "learning_rate": 6.68361639377606e-05, "loss": 1.1034, "step": 6553 }, { "epoch": 1.3323846310225655, "grad_norm": 0.14805994927883148, "learning_rate": 6.681582426522933e-05, "loss": 0.8675, "step": 6554 }, { "epoch": 1.3325879243748728, "grad_norm": 0.15249277651309967, "learning_rate": 6.679548459269807e-05, "loss": 1.135, "step": 6555 }, { "epoch": 1.3327912177271803, "grad_norm": 0.1499241143465042, "learning_rate": 6.677514492016679e-05, "loss": 0.9384, "step": 6556 }, { "epoch": 1.3329945110794876, "grad_norm": 0.16107714176177979, "learning_rate": 6.675480524763552e-05, "loss": 1.1863, "step": 6557 }, { "epoch": 1.3331978044317951, "grad_norm": 0.15378479659557343, "learning_rate": 6.673446557510424e-05, "loss": 0.9822, "step": 6558 }, { "epoch": 1.3334010977841024, "grad_norm": 0.13990168273448944, "learning_rate": 6.671412590257298e-05, "loss": 0.8935, "step": 6559 }, { "epoch": 1.3336043911364097, "grad_norm": 0.15115247666835785, "learning_rate": 6.66937862300417e-05, "loss": 0.9831, "step": 6560 }, { "epoch": 1.3338076844887172, "grad_norm": 0.17793822288513184, "learning_rate": 6.667344655751043e-05, "loss": 1.052, "step": 6561 }, { "epoch": 1.3340109778410247, "grad_norm": 0.16342271864414215, "learning_rate": 6.665310688497915e-05, "loss": 1.1521, "step": 6562 }, { "epoch": 1.334214271193332, "grad_norm": 0.171544149518013, "learning_rate": 6.663276721244788e-05, "loss": 1.1458, "step": 6563 }, { "epoch": 1.3344175645456393, "grad_norm": 0.17183944582939148, "learning_rate": 6.661242753991662e-05, "loss": 1.1076, "step": 6564 }, { "epoch": 1.3346208578979466, "grad_norm": 0.1565820276737213, "learning_rate": 6.659208786738534e-05, "loss": 0.9565, "step": 6565 }, { "epoch": 1.334824151250254, "grad_norm": 0.1608911156654358, "learning_rate": 6.657174819485406e-05, "loss": 1.0161, "step": 6566 }, { "epoch": 1.3350274446025616, "grad_norm": 0.15111416578292847, "learning_rate": 6.655140852232279e-05, "loss": 0.9956, "step": 6567 }, { "epoch": 1.335230737954869, "grad_norm": 0.15531237423419952, "learning_rate": 6.653106884979153e-05, "loss": 0.9907, "step": 6568 }, { "epoch": 1.3354340313071762, "grad_norm": 0.13924407958984375, "learning_rate": 6.651072917726025e-05, "loss": 0.9618, "step": 6569 }, { "epoch": 1.3356373246594837, "grad_norm": 0.16933512687683105, "learning_rate": 6.649038950472898e-05, "loss": 1.1476, "step": 6570 }, { "epoch": 1.335840618011791, "grad_norm": 0.1602686047554016, "learning_rate": 6.64700498321977e-05, "loss": 1.0173, "step": 6571 }, { "epoch": 1.3360439113640985, "grad_norm": 0.16032569110393524, "learning_rate": 6.644971015966644e-05, "loss": 1.0694, "step": 6572 }, { "epoch": 1.3362472047164058, "grad_norm": 0.15900370478630066, "learning_rate": 6.642937048713516e-05, "loss": 1.0537, "step": 6573 }, { "epoch": 1.336450498068713, "grad_norm": 0.16162076592445374, "learning_rate": 6.640903081460389e-05, "loss": 1.078, "step": 6574 }, { "epoch": 1.3366537914210206, "grad_norm": 0.18270519375801086, "learning_rate": 6.638869114207261e-05, "loss": 1.1913, "step": 6575 }, { "epoch": 1.3368570847733279, "grad_norm": 0.16038493812084198, "learning_rate": 6.636835146954135e-05, "loss": 1.0904, "step": 6576 }, { "epoch": 1.3370603781256354, "grad_norm": 0.1554563045501709, "learning_rate": 6.634801179701007e-05, "loss": 1.0685, "step": 6577 }, { "epoch": 1.3372636714779427, "grad_norm": 0.1518605649471283, "learning_rate": 6.63276721244788e-05, "loss": 0.9499, "step": 6578 }, { "epoch": 1.33746696483025, "grad_norm": 0.14802587032318115, "learning_rate": 6.630733245194752e-05, "loss": 0.9432, "step": 6579 }, { "epoch": 1.3376702581825575, "grad_norm": 0.14748792350292206, "learning_rate": 6.628699277941626e-05, "loss": 0.9841, "step": 6580 }, { "epoch": 1.3378735515348648, "grad_norm": 0.15375643968582153, "learning_rate": 6.626665310688499e-05, "loss": 1.1379, "step": 6581 }, { "epoch": 1.3380768448871723, "grad_norm": 0.13825255632400513, "learning_rate": 6.624631343435371e-05, "loss": 0.935, "step": 6582 }, { "epoch": 1.3382801382394796, "grad_norm": 0.16053850948810577, "learning_rate": 6.622597376182243e-05, "loss": 1.0313, "step": 6583 }, { "epoch": 1.3384834315917868, "grad_norm": 0.1293373554944992, "learning_rate": 6.620563408929117e-05, "loss": 0.8012, "step": 6584 }, { "epoch": 1.3386867249440944, "grad_norm": 0.14056281745433807, "learning_rate": 6.61852944167599e-05, "loss": 0.9578, "step": 6585 }, { "epoch": 1.3388900182964016, "grad_norm": 0.15911336243152618, "learning_rate": 6.616495474422862e-05, "loss": 1.1905, "step": 6586 }, { "epoch": 1.3390933116487092, "grad_norm": 0.16372515261173248, "learning_rate": 6.614461507169735e-05, "loss": 1.1501, "step": 6587 }, { "epoch": 1.3392966050010164, "grad_norm": 0.14411291480064392, "learning_rate": 6.612427539916608e-05, "loss": 0.9337, "step": 6588 }, { "epoch": 1.3394998983533237, "grad_norm": 0.15178608894348145, "learning_rate": 6.610393572663481e-05, "loss": 0.9428, "step": 6589 }, { "epoch": 1.3397031917056312, "grad_norm": 0.14842566847801208, "learning_rate": 6.608359605410353e-05, "loss": 0.9875, "step": 6590 }, { "epoch": 1.3399064850579385, "grad_norm": 0.14400069415569305, "learning_rate": 6.606325638157226e-05, "loss": 1.0277, "step": 6591 }, { "epoch": 1.340109778410246, "grad_norm": 0.14039835333824158, "learning_rate": 6.6042916709041e-05, "loss": 0.9858, "step": 6592 }, { "epoch": 1.3403130717625533, "grad_norm": 0.16195842623710632, "learning_rate": 6.602257703650972e-05, "loss": 1.1181, "step": 6593 }, { "epoch": 1.3405163651148606, "grad_norm": 0.15187060832977295, "learning_rate": 6.600223736397844e-05, "loss": 1.0089, "step": 6594 }, { "epoch": 1.3407196584671681, "grad_norm": 0.1642741560935974, "learning_rate": 6.598189769144717e-05, "loss": 1.0599, "step": 6595 }, { "epoch": 1.3409229518194756, "grad_norm": 0.16725251078605652, "learning_rate": 6.59615580189159e-05, "loss": 1.069, "step": 6596 }, { "epoch": 1.341126245171783, "grad_norm": 0.1582929641008377, "learning_rate": 6.594121834638463e-05, "loss": 0.8783, "step": 6597 }, { "epoch": 1.3413295385240902, "grad_norm": 0.149756520986557, "learning_rate": 6.592087867385336e-05, "loss": 0.9534, "step": 6598 }, { "epoch": 1.3415328318763977, "grad_norm": 0.13332435488700867, "learning_rate": 6.590053900132208e-05, "loss": 0.9478, "step": 6599 }, { "epoch": 1.341736125228705, "grad_norm": 0.1550353765487671, "learning_rate": 6.588019932879082e-05, "loss": 0.9224, "step": 6600 }, { "epoch": 1.3419394185810125, "grad_norm": 0.15263979136943817, "learning_rate": 6.585985965625954e-05, "loss": 0.9526, "step": 6601 }, { "epoch": 1.3421427119333198, "grad_norm": 0.1639474779367447, "learning_rate": 6.583951998372827e-05, "loss": 1.0998, "step": 6602 }, { "epoch": 1.342346005285627, "grad_norm": 0.15959811210632324, "learning_rate": 6.581918031119699e-05, "loss": 1.061, "step": 6603 }, { "epoch": 1.3425492986379346, "grad_norm": 0.1565035730600357, "learning_rate": 6.579884063866572e-05, "loss": 1.0391, "step": 6604 }, { "epoch": 1.3427525919902419, "grad_norm": 0.14973071217536926, "learning_rate": 6.577850096613445e-05, "loss": 1.0028, "step": 6605 }, { "epoch": 1.3429558853425494, "grad_norm": 0.14768008887767792, "learning_rate": 6.575816129360316e-05, "loss": 0.9624, "step": 6606 }, { "epoch": 1.3431591786948567, "grad_norm": 0.1557006537914276, "learning_rate": 6.57378216210719e-05, "loss": 1.0577, "step": 6607 }, { "epoch": 1.343362472047164, "grad_norm": 0.13694258034229279, "learning_rate": 6.571748194854063e-05, "loss": 0.9131, "step": 6608 }, { "epoch": 1.3435657653994715, "grad_norm": 0.16022591292858124, "learning_rate": 6.569714227600937e-05, "loss": 0.95, "step": 6609 }, { "epoch": 1.3437690587517788, "grad_norm": 0.15533234179019928, "learning_rate": 6.567680260347808e-05, "loss": 1.0009, "step": 6610 }, { "epoch": 1.3439723521040863, "grad_norm": 0.13041260838508606, "learning_rate": 6.565646293094681e-05, "loss": 0.748, "step": 6611 }, { "epoch": 1.3441756454563936, "grad_norm": 0.14454182982444763, "learning_rate": 6.563612325841554e-05, "loss": 0.8943, "step": 6612 }, { "epoch": 1.3443789388087009, "grad_norm": 0.14608418941497803, "learning_rate": 6.561578358588428e-05, "loss": 0.8877, "step": 6613 }, { "epoch": 1.3445822321610084, "grad_norm": 0.17098468542099, "learning_rate": 6.559544391335299e-05, "loss": 0.8842, "step": 6614 }, { "epoch": 1.3447855255133156, "grad_norm": 0.17818143963813782, "learning_rate": 6.557510424082173e-05, "loss": 1.073, "step": 6615 }, { "epoch": 1.3449888188656232, "grad_norm": 0.14222939312458038, "learning_rate": 6.555476456829045e-05, "loss": 0.9136, "step": 6616 }, { "epoch": 1.3451921122179304, "grad_norm": 0.1513351947069168, "learning_rate": 6.553442489575919e-05, "loss": 0.9578, "step": 6617 }, { "epoch": 1.3453954055702377, "grad_norm": 0.17002101242542267, "learning_rate": 6.551408522322791e-05, "loss": 1.1007, "step": 6618 }, { "epoch": 1.3455986989225452, "grad_norm": 0.15087753534317017, "learning_rate": 6.549374555069664e-05, "loss": 1.0228, "step": 6619 }, { "epoch": 1.3458019922748525, "grad_norm": 0.14262332022190094, "learning_rate": 6.547340587816536e-05, "loss": 0.8861, "step": 6620 }, { "epoch": 1.34600528562716, "grad_norm": 0.14896303415298462, "learning_rate": 6.54530662056341e-05, "loss": 1.0326, "step": 6621 }, { "epoch": 1.3462085789794673, "grad_norm": 0.1533699333667755, "learning_rate": 6.543272653310282e-05, "loss": 1.1764, "step": 6622 }, { "epoch": 1.3464118723317746, "grad_norm": 0.15011627972126007, "learning_rate": 6.541238686057155e-05, "loss": 1.0611, "step": 6623 }, { "epoch": 1.3466151656840821, "grad_norm": 0.1621440351009369, "learning_rate": 6.539204718804027e-05, "loss": 0.9875, "step": 6624 }, { "epoch": 1.3468184590363896, "grad_norm": 0.1371341049671173, "learning_rate": 6.537170751550901e-05, "loss": 0.9026, "step": 6625 }, { "epoch": 1.347021752388697, "grad_norm": 0.1659182608127594, "learning_rate": 6.535136784297774e-05, "loss": 1.1134, "step": 6626 }, { "epoch": 1.3472250457410042, "grad_norm": 0.14671935141086578, "learning_rate": 6.533102817044646e-05, "loss": 0.9337, "step": 6627 }, { "epoch": 1.3474283390933117, "grad_norm": 0.16731108725070953, "learning_rate": 6.531068849791518e-05, "loss": 1.297, "step": 6628 }, { "epoch": 1.347631632445619, "grad_norm": 0.1497422158718109, "learning_rate": 6.529034882538392e-05, "loss": 0.9333, "step": 6629 }, { "epoch": 1.3478349257979265, "grad_norm": 0.15278853476047516, "learning_rate": 6.527000915285265e-05, "loss": 0.8934, "step": 6630 }, { "epoch": 1.3480382191502338, "grad_norm": 0.14825202524662018, "learning_rate": 6.524966948032137e-05, "loss": 1.027, "step": 6631 }, { "epoch": 1.348241512502541, "grad_norm": 0.1355981081724167, "learning_rate": 6.52293298077901e-05, "loss": 0.8277, "step": 6632 }, { "epoch": 1.3484448058548486, "grad_norm": 0.15200775861740112, "learning_rate": 6.520899013525883e-05, "loss": 1.0864, "step": 6633 }, { "epoch": 1.348648099207156, "grad_norm": 0.15310457348823547, "learning_rate": 6.518865046272756e-05, "loss": 0.9925, "step": 6634 }, { "epoch": 1.3488513925594634, "grad_norm": 0.15348269045352936, "learning_rate": 6.516831079019628e-05, "loss": 1.0431, "step": 6635 }, { "epoch": 1.3490546859117707, "grad_norm": 0.17338477075099945, "learning_rate": 6.514797111766501e-05, "loss": 1.017, "step": 6636 }, { "epoch": 1.349257979264078, "grad_norm": 0.15221281349658966, "learning_rate": 6.512763144513374e-05, "loss": 1.0562, "step": 6637 }, { "epoch": 1.3494612726163855, "grad_norm": 0.13660123944282532, "learning_rate": 6.510729177260247e-05, "loss": 0.8976, "step": 6638 }, { "epoch": 1.3496645659686928, "grad_norm": 0.15344764292240143, "learning_rate": 6.50869521000712e-05, "loss": 1.0725, "step": 6639 }, { "epoch": 1.3498678593210003, "grad_norm": 0.13767392933368683, "learning_rate": 6.506661242753992e-05, "loss": 0.9795, "step": 6640 }, { "epoch": 1.3500711526733076, "grad_norm": 0.19037991762161255, "learning_rate": 6.504627275500866e-05, "loss": 1.3235, "step": 6641 }, { "epoch": 1.3502744460256149, "grad_norm": 0.153898686170578, "learning_rate": 6.502593308247738e-05, "loss": 0.9877, "step": 6642 }, { "epoch": 1.3504777393779224, "grad_norm": 0.13859529793262482, "learning_rate": 6.50055934099461e-05, "loss": 0.8863, "step": 6643 }, { "epoch": 1.3506810327302297, "grad_norm": 0.1579197347164154, "learning_rate": 6.498525373741483e-05, "loss": 1.0816, "step": 6644 }, { "epoch": 1.3508843260825372, "grad_norm": 0.1565553843975067, "learning_rate": 6.496491406488355e-05, "loss": 1.07, "step": 6645 }, { "epoch": 1.3510876194348445, "grad_norm": 0.14760999381542206, "learning_rate": 6.494457439235229e-05, "loss": 0.9851, "step": 6646 }, { "epoch": 1.3512909127871517, "grad_norm": 0.1595640629529953, "learning_rate": 6.4924234719821e-05, "loss": 1.109, "step": 6647 }, { "epoch": 1.3514942061394593, "grad_norm": 0.169783353805542, "learning_rate": 6.490389504728974e-05, "loss": 0.9721, "step": 6648 }, { "epoch": 1.3516974994917665, "grad_norm": 0.1549369841814041, "learning_rate": 6.488355537475847e-05, "loss": 1.0158, "step": 6649 }, { "epoch": 1.351900792844074, "grad_norm": 0.1654866486787796, "learning_rate": 6.48632157022272e-05, "loss": 1.2272, "step": 6650 }, { "epoch": 1.3521040861963813, "grad_norm": 0.16200962662696838, "learning_rate": 6.484287602969591e-05, "loss": 0.9346, "step": 6651 }, { "epoch": 1.3523073795486886, "grad_norm": 0.18851538002490997, "learning_rate": 6.482253635716465e-05, "loss": 1.1206, "step": 6652 }, { "epoch": 1.3525106729009961, "grad_norm": 0.16858980059623718, "learning_rate": 6.480219668463338e-05, "loss": 1.087, "step": 6653 }, { "epoch": 1.3527139662533036, "grad_norm": 0.17450284957885742, "learning_rate": 6.478185701210211e-05, "loss": 1.0501, "step": 6654 }, { "epoch": 1.352917259605611, "grad_norm": 0.1536058485507965, "learning_rate": 6.476151733957083e-05, "loss": 1.1982, "step": 6655 }, { "epoch": 1.3531205529579182, "grad_norm": 0.16767753660678864, "learning_rate": 6.474117766703956e-05, "loss": 1.1293, "step": 6656 }, { "epoch": 1.3533238463102257, "grad_norm": 0.1527155041694641, "learning_rate": 6.472083799450829e-05, "loss": 1.0746, "step": 6657 }, { "epoch": 1.353527139662533, "grad_norm": 0.13894875347614288, "learning_rate": 6.470049832197703e-05, "loss": 0.9043, "step": 6658 }, { "epoch": 1.3537304330148405, "grad_norm": 0.16773444414138794, "learning_rate": 6.468015864944574e-05, "loss": 1.1785, "step": 6659 }, { "epoch": 1.3539337263671478, "grad_norm": 0.1570262908935547, "learning_rate": 6.465981897691448e-05, "loss": 1.1152, "step": 6660 }, { "epoch": 1.354137019719455, "grad_norm": 0.16130763292312622, "learning_rate": 6.46394793043832e-05, "loss": 1.0144, "step": 6661 }, { "epoch": 1.3543403130717626, "grad_norm": 0.15057474374771118, "learning_rate": 6.461913963185194e-05, "loss": 1.0204, "step": 6662 }, { "epoch": 1.35454360642407, "grad_norm": 0.15316952764987946, "learning_rate": 6.459879995932065e-05, "loss": 1.0506, "step": 6663 }, { "epoch": 1.3547468997763774, "grad_norm": 0.1641097515821457, "learning_rate": 6.457846028678939e-05, "loss": 1.0777, "step": 6664 }, { "epoch": 1.3549501931286847, "grad_norm": 0.15480592846870422, "learning_rate": 6.455812061425811e-05, "loss": 1.0339, "step": 6665 }, { "epoch": 1.355153486480992, "grad_norm": 0.14954860508441925, "learning_rate": 6.453778094172685e-05, "loss": 0.9126, "step": 6666 }, { "epoch": 1.3553567798332995, "grad_norm": 0.17514324188232422, "learning_rate": 6.451744126919556e-05, "loss": 1.0698, "step": 6667 }, { "epoch": 1.3555600731856068, "grad_norm": 0.16481555998325348, "learning_rate": 6.44971015966643e-05, "loss": 1.1386, "step": 6668 }, { "epoch": 1.3557633665379143, "grad_norm": 0.15936695039272308, "learning_rate": 6.447676192413302e-05, "loss": 1.1154, "step": 6669 }, { "epoch": 1.3559666598902216, "grad_norm": 0.14329056441783905, "learning_rate": 6.445642225160176e-05, "loss": 0.9836, "step": 6670 }, { "epoch": 1.3561699532425289, "grad_norm": 0.1695948988199234, "learning_rate": 6.443608257907047e-05, "loss": 1.0999, "step": 6671 }, { "epoch": 1.3563732465948364, "grad_norm": 0.15652230381965637, "learning_rate": 6.441574290653921e-05, "loss": 0.992, "step": 6672 }, { "epoch": 1.3565765399471437, "grad_norm": 0.14709965884685516, "learning_rate": 6.439540323400793e-05, "loss": 1.0105, "step": 6673 }, { "epoch": 1.3567798332994512, "grad_norm": 0.14747577905654907, "learning_rate": 6.437506356147667e-05, "loss": 0.9413, "step": 6674 }, { "epoch": 1.3569831266517585, "grad_norm": 0.16515463590621948, "learning_rate": 6.435472388894538e-05, "loss": 0.9619, "step": 6675 }, { "epoch": 1.3571864200040658, "grad_norm": 0.1489870250225067, "learning_rate": 6.433438421641412e-05, "loss": 0.893, "step": 6676 }, { "epoch": 1.3573897133563733, "grad_norm": 0.1626134067773819, "learning_rate": 6.431404454388285e-05, "loss": 1.051, "step": 6677 }, { "epoch": 1.3575930067086805, "grad_norm": 0.16492238640785217, "learning_rate": 6.429370487135158e-05, "loss": 1.1629, "step": 6678 }, { "epoch": 1.357796300060988, "grad_norm": 0.1518096923828125, "learning_rate": 6.427336519882031e-05, "loss": 0.958, "step": 6679 }, { "epoch": 1.3579995934132953, "grad_norm": 0.14562292397022247, "learning_rate": 6.425302552628903e-05, "loss": 0.944, "step": 6680 }, { "epoch": 1.3582028867656026, "grad_norm": 0.17256395518779755, "learning_rate": 6.423268585375776e-05, "loss": 1.1921, "step": 6681 }, { "epoch": 1.3584061801179101, "grad_norm": 0.15304385125637054, "learning_rate": 6.42123461812265e-05, "loss": 1.0311, "step": 6682 }, { "epoch": 1.3586094734702177, "grad_norm": 0.159259632229805, "learning_rate": 6.419200650869522e-05, "loss": 1.0585, "step": 6683 }, { "epoch": 1.358812766822525, "grad_norm": 0.15427778661251068, "learning_rate": 6.417166683616394e-05, "loss": 1.0914, "step": 6684 }, { "epoch": 1.3590160601748322, "grad_norm": 0.14755503833293915, "learning_rate": 6.415132716363267e-05, "loss": 0.9637, "step": 6685 }, { "epoch": 1.3592193535271397, "grad_norm": 0.16953536868095398, "learning_rate": 6.413098749110139e-05, "loss": 1.0948, "step": 6686 }, { "epoch": 1.359422646879447, "grad_norm": 0.15146459639072418, "learning_rate": 6.411064781857013e-05, "loss": 0.8806, "step": 6687 }, { "epoch": 1.3596259402317545, "grad_norm": 0.14081160724163055, "learning_rate": 6.409030814603884e-05, "loss": 0.9432, "step": 6688 }, { "epoch": 1.3598292335840618, "grad_norm": 0.1574667990207672, "learning_rate": 6.406996847350758e-05, "loss": 1.1607, "step": 6689 }, { "epoch": 1.3600325269363691, "grad_norm": 0.14919863641262054, "learning_rate": 6.40496288009763e-05, "loss": 0.9258, "step": 6690 }, { "epoch": 1.3602358202886766, "grad_norm": 0.15582725405693054, "learning_rate": 6.402928912844504e-05, "loss": 0.9811, "step": 6691 }, { "epoch": 1.360439113640984, "grad_norm": 0.16567249596118927, "learning_rate": 6.400894945591375e-05, "loss": 1.1126, "step": 6692 }, { "epoch": 1.3606424069932914, "grad_norm": 0.1419053077697754, "learning_rate": 6.398860978338249e-05, "loss": 0.9852, "step": 6693 }, { "epoch": 1.3608457003455987, "grad_norm": 0.1396085023880005, "learning_rate": 6.396827011085122e-05, "loss": 0.9273, "step": 6694 }, { "epoch": 1.361048993697906, "grad_norm": 0.1635066717863083, "learning_rate": 6.394793043831995e-05, "loss": 1.1869, "step": 6695 }, { "epoch": 1.3612522870502135, "grad_norm": 0.14211490750312805, "learning_rate": 6.392759076578866e-05, "loss": 0.8805, "step": 6696 }, { "epoch": 1.3614555804025208, "grad_norm": 0.13765402138233185, "learning_rate": 6.39072510932574e-05, "loss": 0.8661, "step": 6697 }, { "epoch": 1.3616588737548283, "grad_norm": 0.15493972599506378, "learning_rate": 6.388691142072613e-05, "loss": 1.0499, "step": 6698 }, { "epoch": 1.3618621671071356, "grad_norm": 0.14159010350704193, "learning_rate": 6.386657174819486e-05, "loss": 0.9232, "step": 6699 }, { "epoch": 1.3620654604594429, "grad_norm": 0.14542338252067566, "learning_rate": 6.384623207566358e-05, "loss": 0.9365, "step": 6700 }, { "epoch": 1.3622687538117504, "grad_norm": 0.16629654169082642, "learning_rate": 6.382589240313231e-05, "loss": 1.1661, "step": 6701 }, { "epoch": 1.3624720471640577, "grad_norm": 0.15916363894939423, "learning_rate": 6.380555273060104e-05, "loss": 0.9942, "step": 6702 }, { "epoch": 1.3626753405163652, "grad_norm": 0.1526755392551422, "learning_rate": 6.378521305806978e-05, "loss": 1.049, "step": 6703 }, { "epoch": 1.3628786338686725, "grad_norm": 0.15990319848060608, "learning_rate": 6.376487338553849e-05, "loss": 1.0295, "step": 6704 }, { "epoch": 1.3630819272209798, "grad_norm": 0.1506752073764801, "learning_rate": 6.374453371300723e-05, "loss": 1.1331, "step": 6705 }, { "epoch": 1.3632852205732873, "grad_norm": 0.15335889160633087, "learning_rate": 6.372419404047595e-05, "loss": 1.1036, "step": 6706 }, { "epoch": 1.3634885139255946, "grad_norm": 0.15802709758281708, "learning_rate": 6.370385436794469e-05, "loss": 1.029, "step": 6707 }, { "epoch": 1.363691807277902, "grad_norm": 0.13966608047485352, "learning_rate": 6.36835146954134e-05, "loss": 0.9259, "step": 6708 }, { "epoch": 1.3638951006302094, "grad_norm": 0.12807948887348175, "learning_rate": 6.366317502288214e-05, "loss": 0.8966, "step": 6709 }, { "epoch": 1.3640983939825166, "grad_norm": 0.14886990189552307, "learning_rate": 6.364283535035086e-05, "loss": 0.9927, "step": 6710 }, { "epoch": 1.3643016873348242, "grad_norm": 0.13951446115970612, "learning_rate": 6.36224956778196e-05, "loss": 0.8846, "step": 6711 }, { "epoch": 1.3645049806871317, "grad_norm": 0.1629723608493805, "learning_rate": 6.360215600528831e-05, "loss": 1.147, "step": 6712 }, { "epoch": 1.364708274039439, "grad_norm": 0.1576232761144638, "learning_rate": 6.358181633275705e-05, "loss": 0.9147, "step": 6713 }, { "epoch": 1.3649115673917462, "grad_norm": 0.1496376097202301, "learning_rate": 6.356147666022577e-05, "loss": 0.9403, "step": 6714 }, { "epoch": 1.3651148607440537, "grad_norm": 0.144853413105011, "learning_rate": 6.354113698769451e-05, "loss": 0.9713, "step": 6715 }, { "epoch": 1.365318154096361, "grad_norm": 0.15076938271522522, "learning_rate": 6.352079731516322e-05, "loss": 0.9689, "step": 6716 }, { "epoch": 1.3655214474486685, "grad_norm": 0.15283893048763275, "learning_rate": 6.350045764263196e-05, "loss": 1.0111, "step": 6717 }, { "epoch": 1.3657247408009758, "grad_norm": 0.16923551261425018, "learning_rate": 6.348011797010068e-05, "loss": 1.1435, "step": 6718 }, { "epoch": 1.3659280341532831, "grad_norm": 0.16161029040813446, "learning_rate": 6.345977829756942e-05, "loss": 1.2014, "step": 6719 }, { "epoch": 1.3661313275055906, "grad_norm": 0.15895690023899078, "learning_rate": 6.343943862503813e-05, "loss": 1.1507, "step": 6720 }, { "epoch": 1.366334620857898, "grad_norm": 0.17195092141628265, "learning_rate": 6.341909895250687e-05, "loss": 1.1747, "step": 6721 }, { "epoch": 1.3665379142102054, "grad_norm": 0.14363127946853638, "learning_rate": 6.33987592799756e-05, "loss": 0.9227, "step": 6722 }, { "epoch": 1.3667412075625127, "grad_norm": 0.14739079773426056, "learning_rate": 6.337841960744433e-05, "loss": 0.9343, "step": 6723 }, { "epoch": 1.36694450091482, "grad_norm": 0.16694903373718262, "learning_rate": 6.335807993491304e-05, "loss": 1.1243, "step": 6724 }, { "epoch": 1.3671477942671275, "grad_norm": 0.15602125227451324, "learning_rate": 6.333774026238178e-05, "loss": 1.1501, "step": 6725 }, { "epoch": 1.3673510876194348, "grad_norm": 0.14906419813632965, "learning_rate": 6.33174005898505e-05, "loss": 1.0106, "step": 6726 }, { "epoch": 1.3675543809717423, "grad_norm": 0.17862831056118011, "learning_rate": 6.329706091731924e-05, "loss": 1.2313, "step": 6727 }, { "epoch": 1.3677576743240496, "grad_norm": 0.1424816995859146, "learning_rate": 6.327672124478796e-05, "loss": 1.1375, "step": 6728 }, { "epoch": 1.3679609676763569, "grad_norm": 0.15217390656471252, "learning_rate": 6.325638157225668e-05, "loss": 1.1057, "step": 6729 }, { "epoch": 1.3681642610286644, "grad_norm": 0.1580178588628769, "learning_rate": 6.323604189972542e-05, "loss": 0.9414, "step": 6730 }, { "epoch": 1.3683675543809717, "grad_norm": 0.14510442316532135, "learning_rate": 6.321570222719414e-05, "loss": 0.9276, "step": 6731 }, { "epoch": 1.3685708477332792, "grad_norm": 0.15985293686389923, "learning_rate": 6.319536255466287e-05, "loss": 1.2001, "step": 6732 }, { "epoch": 1.3687741410855865, "grad_norm": 0.16616447269916534, "learning_rate": 6.317502288213159e-05, "loss": 1.0798, "step": 6733 }, { "epoch": 1.3689774344378938, "grad_norm": 0.15168656408786774, "learning_rate": 6.315468320960033e-05, "loss": 1.0198, "step": 6734 }, { "epoch": 1.3691807277902013, "grad_norm": 0.15670320391654968, "learning_rate": 6.313434353706905e-05, "loss": 1.0823, "step": 6735 }, { "epoch": 1.3693840211425086, "grad_norm": 0.15181729197502136, "learning_rate": 6.311400386453779e-05, "loss": 1.0202, "step": 6736 }, { "epoch": 1.369587314494816, "grad_norm": 0.15091702342033386, "learning_rate": 6.30936641920065e-05, "loss": 0.9388, "step": 6737 }, { "epoch": 1.3697906078471234, "grad_norm": 0.1362704038619995, "learning_rate": 6.307332451947524e-05, "loss": 0.8708, "step": 6738 }, { "epoch": 1.3699939011994307, "grad_norm": 0.1704317331314087, "learning_rate": 6.305298484694397e-05, "loss": 1.1833, "step": 6739 }, { "epoch": 1.3701971945517382, "grad_norm": 0.14978265762329102, "learning_rate": 6.30326451744127e-05, "loss": 1.0367, "step": 6740 }, { "epoch": 1.3704004879040457, "grad_norm": 0.1564272940158844, "learning_rate": 6.301230550188141e-05, "loss": 1.0774, "step": 6741 }, { "epoch": 1.370603781256353, "grad_norm": 0.157321035861969, "learning_rate": 6.299196582935015e-05, "loss": 1.0637, "step": 6742 }, { "epoch": 1.3708070746086602, "grad_norm": 0.14078298211097717, "learning_rate": 6.297162615681888e-05, "loss": 1.0666, "step": 6743 }, { "epoch": 1.3710103679609678, "grad_norm": 0.17371569573879242, "learning_rate": 6.295128648428761e-05, "loss": 1.0625, "step": 6744 }, { "epoch": 1.371213661313275, "grad_norm": 0.15704642236232758, "learning_rate": 6.293094681175633e-05, "loss": 1.0261, "step": 6745 }, { "epoch": 1.3714169546655826, "grad_norm": 0.15526318550109863, "learning_rate": 6.291060713922506e-05, "loss": 1.0443, "step": 6746 }, { "epoch": 1.3716202480178898, "grad_norm": 0.16687656939029694, "learning_rate": 6.289026746669379e-05, "loss": 1.0815, "step": 6747 }, { "epoch": 1.3718235413701971, "grad_norm": 0.1629595309495926, "learning_rate": 6.286992779416253e-05, "loss": 1.0846, "step": 6748 }, { "epoch": 1.3720268347225046, "grad_norm": 0.1666935831308365, "learning_rate": 6.284958812163124e-05, "loss": 1.1281, "step": 6749 }, { "epoch": 1.372230128074812, "grad_norm": 0.1540839672088623, "learning_rate": 6.282924844909997e-05, "loss": 1.0241, "step": 6750 }, { "epoch": 1.3724334214271194, "grad_norm": 0.16391707956790924, "learning_rate": 6.28089087765687e-05, "loss": 1.0616, "step": 6751 }, { "epoch": 1.3726367147794267, "grad_norm": 0.18039560317993164, "learning_rate": 6.278856910403744e-05, "loss": 1.1968, "step": 6752 }, { "epoch": 1.372840008131734, "grad_norm": 0.15921954810619354, "learning_rate": 6.276822943150615e-05, "loss": 0.9092, "step": 6753 }, { "epoch": 1.3730433014840415, "grad_norm": 0.15045933425426483, "learning_rate": 6.274788975897489e-05, "loss": 1.0155, "step": 6754 }, { "epoch": 1.3732465948363488, "grad_norm": 0.14296181499958038, "learning_rate": 6.272755008644361e-05, "loss": 0.9719, "step": 6755 }, { "epoch": 1.3734498881886563, "grad_norm": 0.1567305028438568, "learning_rate": 6.270721041391235e-05, "loss": 1.1043, "step": 6756 }, { "epoch": 1.3736531815409636, "grad_norm": 0.17999158799648285, "learning_rate": 6.268687074138106e-05, "loss": 1.262, "step": 6757 }, { "epoch": 1.373856474893271, "grad_norm": 0.1612483412027359, "learning_rate": 6.26665310688498e-05, "loss": 1.0309, "step": 6758 }, { "epoch": 1.3740597682455784, "grad_norm": 0.1687174290418625, "learning_rate": 6.264619139631852e-05, "loss": 1.0872, "step": 6759 }, { "epoch": 1.3742630615978857, "grad_norm": 0.14612844586372375, "learning_rate": 6.262585172378726e-05, "loss": 0.9855, "step": 6760 }, { "epoch": 1.3744663549501932, "grad_norm": 0.15046149492263794, "learning_rate": 6.260551205125597e-05, "loss": 0.997, "step": 6761 }, { "epoch": 1.3746696483025005, "grad_norm": 0.16940894722938538, "learning_rate": 6.258517237872471e-05, "loss": 1.1318, "step": 6762 }, { "epoch": 1.3748729416548078, "grad_norm": 0.1634390503168106, "learning_rate": 6.256483270619343e-05, "loss": 1.1433, "step": 6763 }, { "epoch": 1.3750762350071153, "grad_norm": 0.14166627824306488, "learning_rate": 6.254449303366217e-05, "loss": 0.8849, "step": 6764 }, { "epoch": 1.3752795283594226, "grad_norm": 0.1518392264842987, "learning_rate": 6.252415336113088e-05, "loss": 0.9538, "step": 6765 }, { "epoch": 1.37548282171173, "grad_norm": 0.1509694904088974, "learning_rate": 6.250381368859962e-05, "loss": 1.137, "step": 6766 }, { "epoch": 1.3756861150640374, "grad_norm": 0.16514116525650024, "learning_rate": 6.248347401606835e-05, "loss": 0.894, "step": 6767 }, { "epoch": 1.3758894084163447, "grad_norm": 0.15788832306861877, "learning_rate": 6.246313434353708e-05, "loss": 0.9714, "step": 6768 }, { "epoch": 1.3760927017686522, "grad_norm": 0.1754407286643982, "learning_rate": 6.24427946710058e-05, "loss": 1.1633, "step": 6769 }, { "epoch": 1.3762959951209597, "grad_norm": 0.15434888005256653, "learning_rate": 6.242245499847452e-05, "loss": 1.0316, "step": 6770 }, { "epoch": 1.376499288473267, "grad_norm": 0.15279196202754974, "learning_rate": 6.240211532594326e-05, "loss": 0.9891, "step": 6771 }, { "epoch": 1.3767025818255743, "grad_norm": 0.15830421447753906, "learning_rate": 6.238177565341198e-05, "loss": 1.1078, "step": 6772 }, { "epoch": 1.3769058751778818, "grad_norm": 0.1486409604549408, "learning_rate": 6.23614359808807e-05, "loss": 1.0091, "step": 6773 }, { "epoch": 1.377109168530189, "grad_norm": 0.13369891047477722, "learning_rate": 6.234109630834943e-05, "loss": 0.9391, "step": 6774 }, { "epoch": 1.3773124618824966, "grad_norm": 0.15606249868869781, "learning_rate": 6.232075663581817e-05, "loss": 0.9228, "step": 6775 }, { "epoch": 1.3775157552348039, "grad_norm": 0.1541958600282669, "learning_rate": 6.230041696328689e-05, "loss": 0.9437, "step": 6776 }, { "epoch": 1.3777190485871111, "grad_norm": 0.14433227479457855, "learning_rate": 6.228007729075562e-05, "loss": 0.8677, "step": 6777 }, { "epoch": 1.3779223419394186, "grad_norm": 0.17673447728157043, "learning_rate": 6.225973761822434e-05, "loss": 1.1797, "step": 6778 }, { "epoch": 1.378125635291726, "grad_norm": 0.14402912557125092, "learning_rate": 6.223939794569308e-05, "loss": 1.0249, "step": 6779 }, { "epoch": 1.3783289286440334, "grad_norm": 0.17239032685756683, "learning_rate": 6.22190582731618e-05, "loss": 1.1306, "step": 6780 }, { "epoch": 1.3785322219963407, "grad_norm": 0.147936150431633, "learning_rate": 6.219871860063053e-05, "loss": 0.9927, "step": 6781 }, { "epoch": 1.378735515348648, "grad_norm": 0.16032187640666962, "learning_rate": 6.217837892809925e-05, "loss": 0.9473, "step": 6782 }, { "epoch": 1.3789388087009555, "grad_norm": 0.14192989468574524, "learning_rate": 6.215803925556799e-05, "loss": 0.8604, "step": 6783 }, { "epoch": 1.3791421020532628, "grad_norm": 0.16771048307418823, "learning_rate": 6.213769958303672e-05, "loss": 1.118, "step": 6784 }, { "epoch": 1.3793453954055703, "grad_norm": 0.14662449061870575, "learning_rate": 6.211735991050544e-05, "loss": 1.0081, "step": 6785 }, { "epoch": 1.3795486887578776, "grad_norm": 0.1614457666873932, "learning_rate": 6.209702023797416e-05, "loss": 1.2436, "step": 6786 }, { "epoch": 1.379751982110185, "grad_norm": 0.17378991842269897, "learning_rate": 6.20766805654429e-05, "loss": 1.1431, "step": 6787 }, { "epoch": 1.3799552754624924, "grad_norm": 0.1449557989835739, "learning_rate": 6.205634089291163e-05, "loss": 1.0458, "step": 6788 }, { "epoch": 1.3801585688147997, "grad_norm": 0.1508321315050125, "learning_rate": 6.203600122038035e-05, "loss": 0.9055, "step": 6789 }, { "epoch": 1.3803618621671072, "grad_norm": 0.14709822833538055, "learning_rate": 6.201566154784908e-05, "loss": 0.9653, "step": 6790 }, { "epoch": 1.3805651555194145, "grad_norm": 0.15812620520591736, "learning_rate": 6.199532187531781e-05, "loss": 1.0011, "step": 6791 }, { "epoch": 1.3807684488717218, "grad_norm": 0.135645791888237, "learning_rate": 6.197498220278654e-05, "loss": 0.8306, "step": 6792 }, { "epoch": 1.3809717422240293, "grad_norm": 0.15195050835609436, "learning_rate": 6.195464253025528e-05, "loss": 0.9038, "step": 6793 }, { "epoch": 1.3811750355763366, "grad_norm": 0.16135457158088684, "learning_rate": 6.193430285772399e-05, "loss": 1.1356, "step": 6794 }, { "epoch": 1.381378328928644, "grad_norm": 0.16234029829502106, "learning_rate": 6.191396318519272e-05, "loss": 0.9694, "step": 6795 }, { "epoch": 1.3815816222809514, "grad_norm": 0.16726230084896088, "learning_rate": 6.189362351266145e-05, "loss": 1.0675, "step": 6796 }, { "epoch": 1.3817849156332587, "grad_norm": 0.15473772585391998, "learning_rate": 6.187328384013019e-05, "loss": 1.0468, "step": 6797 }, { "epoch": 1.3819882089855662, "grad_norm": 0.1563412994146347, "learning_rate": 6.18529441675989e-05, "loss": 1.0272, "step": 6798 }, { "epoch": 1.3821915023378737, "grad_norm": 0.1610526293516159, "learning_rate": 6.183260449506764e-05, "loss": 1.1039, "step": 6799 }, { "epoch": 1.382394795690181, "grad_norm": 0.1570015698671341, "learning_rate": 6.181226482253636e-05, "loss": 1.0261, "step": 6800 }, { "epoch": 1.3825980890424883, "grad_norm": 0.16958126425743103, "learning_rate": 6.17919251500051e-05, "loss": 1.015, "step": 6801 }, { "epoch": 1.3828013823947956, "grad_norm": 0.15728716552257538, "learning_rate": 6.177158547747381e-05, "loss": 0.9877, "step": 6802 }, { "epoch": 1.383004675747103, "grad_norm": 0.15608420968055725, "learning_rate": 6.175124580494255e-05, "loss": 1.0438, "step": 6803 }, { "epoch": 1.3832079690994106, "grad_norm": 0.1442839503288269, "learning_rate": 6.173090613241127e-05, "loss": 0.9444, "step": 6804 }, { "epoch": 1.3834112624517179, "grad_norm": 0.15385085344314575, "learning_rate": 6.171056645988001e-05, "loss": 0.9501, "step": 6805 }, { "epoch": 1.3836145558040251, "grad_norm": 0.16265326738357544, "learning_rate": 6.169022678734872e-05, "loss": 1.0807, "step": 6806 }, { "epoch": 1.3838178491563327, "grad_norm": 0.1667504757642746, "learning_rate": 6.166988711481746e-05, "loss": 1.0506, "step": 6807 }, { "epoch": 1.38402114250864, "grad_norm": 0.132582888007164, "learning_rate": 6.164954744228618e-05, "loss": 0.9013, "step": 6808 }, { "epoch": 1.3842244358609475, "grad_norm": 0.1588151454925537, "learning_rate": 6.162920776975492e-05, "loss": 1.1206, "step": 6809 }, { "epoch": 1.3844277292132547, "grad_norm": 0.16293421387672424, "learning_rate": 6.160886809722363e-05, "loss": 1.1359, "step": 6810 }, { "epoch": 1.384631022565562, "grad_norm": 0.18560263514518738, "learning_rate": 6.158852842469236e-05, "loss": 1.1687, "step": 6811 }, { "epoch": 1.3848343159178695, "grad_norm": 0.14495523273944855, "learning_rate": 6.15681887521611e-05, "loss": 0.8765, "step": 6812 }, { "epoch": 1.3850376092701768, "grad_norm": 0.16819177567958832, "learning_rate": 6.154784907962982e-05, "loss": 1.1581, "step": 6813 }, { "epoch": 1.3852409026224843, "grad_norm": 0.15601789951324463, "learning_rate": 6.152750940709854e-05, "loss": 1.0977, "step": 6814 }, { "epoch": 1.3854441959747916, "grad_norm": 0.16021013259887695, "learning_rate": 6.150716973456727e-05, "loss": 0.9583, "step": 6815 }, { "epoch": 1.385647489327099, "grad_norm": 0.1587619185447693, "learning_rate": 6.1486830062036e-05, "loss": 1.0329, "step": 6816 }, { "epoch": 1.3858507826794064, "grad_norm": 0.1507323831319809, "learning_rate": 6.146649038950473e-05, "loss": 1.0919, "step": 6817 }, { "epoch": 1.3860540760317137, "grad_norm": 0.1662292331457138, "learning_rate": 6.144615071697346e-05, "loss": 1.0218, "step": 6818 }, { "epoch": 1.3862573693840212, "grad_norm": 0.13790364563465118, "learning_rate": 6.142581104444218e-05, "loss": 0.8317, "step": 6819 }, { "epoch": 1.3864606627363285, "grad_norm": 0.14748938381671906, "learning_rate": 6.140547137191092e-05, "loss": 0.9958, "step": 6820 }, { "epoch": 1.3866639560886358, "grad_norm": 0.14733365178108215, "learning_rate": 6.138513169937964e-05, "loss": 0.9832, "step": 6821 }, { "epoch": 1.3868672494409433, "grad_norm": 0.1622340977191925, "learning_rate": 6.136479202684837e-05, "loss": 1.1285, "step": 6822 }, { "epoch": 1.3870705427932506, "grad_norm": 0.15207748115062714, "learning_rate": 6.134445235431709e-05, "loss": 0.9082, "step": 6823 }, { "epoch": 1.387273836145558, "grad_norm": 0.14232246577739716, "learning_rate": 6.132411268178583e-05, "loss": 0.873, "step": 6824 }, { "epoch": 1.3874771294978654, "grad_norm": 0.1612071394920349, "learning_rate": 6.130377300925455e-05, "loss": 1.061, "step": 6825 }, { "epoch": 1.3876804228501727, "grad_norm": 0.16399390995502472, "learning_rate": 6.128343333672328e-05, "loss": 1.1603, "step": 6826 }, { "epoch": 1.3878837162024802, "grad_norm": 0.14788468182086945, "learning_rate": 6.1263093664192e-05, "loss": 0.9301, "step": 6827 }, { "epoch": 1.3880870095547875, "grad_norm": 0.17127063870429993, "learning_rate": 6.124275399166074e-05, "loss": 1.0749, "step": 6828 }, { "epoch": 1.388290302907095, "grad_norm": 0.15077459812164307, "learning_rate": 6.122241431912946e-05, "loss": 0.939, "step": 6829 }, { "epoch": 1.3884935962594023, "grad_norm": 0.16353972256183624, "learning_rate": 6.120207464659819e-05, "loss": 0.9913, "step": 6830 }, { "epoch": 1.3886968896117096, "grad_norm": 0.1791125386953354, "learning_rate": 6.118173497406691e-05, "loss": 1.2833, "step": 6831 }, { "epoch": 1.388900182964017, "grad_norm": 0.14946220815181732, "learning_rate": 6.116139530153565e-05, "loss": 1.0874, "step": 6832 }, { "epoch": 1.3891034763163246, "grad_norm": 0.1695471853017807, "learning_rate": 6.114105562900438e-05, "loss": 1.0632, "step": 6833 }, { "epoch": 1.3893067696686319, "grad_norm": 0.16645880043506622, "learning_rate": 6.11207159564731e-05, "loss": 1.1748, "step": 6834 }, { "epoch": 1.3895100630209392, "grad_norm": 0.1492735743522644, "learning_rate": 6.110037628394183e-05, "loss": 1.0108, "step": 6835 }, { "epoch": 1.3897133563732467, "grad_norm": 0.1486160010099411, "learning_rate": 6.108003661141056e-05, "loss": 1.0036, "step": 6836 }, { "epoch": 1.389916649725554, "grad_norm": 0.158762589097023, "learning_rate": 6.105969693887929e-05, "loss": 1.1261, "step": 6837 }, { "epoch": 1.3901199430778615, "grad_norm": 0.16165146231651306, "learning_rate": 6.103935726634801e-05, "loss": 1.0346, "step": 6838 }, { "epoch": 1.3903232364301688, "grad_norm": 0.147782564163208, "learning_rate": 6.101901759381674e-05, "loss": 1.0539, "step": 6839 }, { "epoch": 1.390526529782476, "grad_norm": 0.1514325588941574, "learning_rate": 6.0998677921285475e-05, "loss": 1.0151, "step": 6840 }, { "epoch": 1.3907298231347835, "grad_norm": 0.144688218832016, "learning_rate": 6.09783382487542e-05, "loss": 0.8587, "step": 6841 }, { "epoch": 1.3909331164870908, "grad_norm": 0.14820197224617004, "learning_rate": 6.095799857622293e-05, "loss": 0.9625, "step": 6842 }, { "epoch": 1.3911364098393983, "grad_norm": 0.16844014823436737, "learning_rate": 6.0937658903691655e-05, "loss": 1.0624, "step": 6843 }, { "epoch": 1.3913397031917056, "grad_norm": 0.1512283980846405, "learning_rate": 6.0917319231160386e-05, "loss": 1.0486, "step": 6844 }, { "epoch": 1.391542996544013, "grad_norm": 0.13112445175647736, "learning_rate": 6.089697955862911e-05, "loss": 0.8274, "step": 6845 }, { "epoch": 1.3917462898963204, "grad_norm": 0.16186438500881195, "learning_rate": 6.087663988609784e-05, "loss": 1.0592, "step": 6846 }, { "epoch": 1.3919495832486277, "grad_norm": 0.15068422257900238, "learning_rate": 6.0856300213566566e-05, "loss": 0.9117, "step": 6847 }, { "epoch": 1.3921528766009352, "grad_norm": 0.1424170434474945, "learning_rate": 6.08359605410353e-05, "loss": 0.887, "step": 6848 }, { "epoch": 1.3923561699532425, "grad_norm": 0.1514316201210022, "learning_rate": 6.081562086850402e-05, "loss": 0.9935, "step": 6849 }, { "epoch": 1.3925594633055498, "grad_norm": 0.16643765568733215, "learning_rate": 6.079528119597275e-05, "loss": 1.0625, "step": 6850 }, { "epoch": 1.3927627566578573, "grad_norm": 0.16870176792144775, "learning_rate": 6.077494152344148e-05, "loss": 1.1119, "step": 6851 }, { "epoch": 1.3929660500101646, "grad_norm": 0.16826272010803223, "learning_rate": 6.0754601850910195e-05, "loss": 0.9706, "step": 6852 }, { "epoch": 1.3931693433624721, "grad_norm": 0.15877872705459595, "learning_rate": 6.073426217837893e-05, "loss": 1.1201, "step": 6853 }, { "epoch": 1.3933726367147794, "grad_norm": 0.15222519636154175, "learning_rate": 6.071392250584765e-05, "loss": 1.0917, "step": 6854 }, { "epoch": 1.3935759300670867, "grad_norm": 0.15984229743480682, "learning_rate": 6.069358283331639e-05, "loss": 1.0069, "step": 6855 }, { "epoch": 1.3937792234193942, "grad_norm": 0.12499643117189407, "learning_rate": 6.067324316078511e-05, "loss": 0.8104, "step": 6856 }, { "epoch": 1.3939825167717015, "grad_norm": 0.1672174036502838, "learning_rate": 6.0652903488253845e-05, "loss": 1.0665, "step": 6857 }, { "epoch": 1.394185810124009, "grad_norm": 0.15274770557880402, "learning_rate": 6.063256381572256e-05, "loss": 1.0114, "step": 6858 }, { "epoch": 1.3943891034763163, "grad_norm": 0.1640719175338745, "learning_rate": 6.06122241431913e-05, "loss": 1.0346, "step": 6859 }, { "epoch": 1.3945923968286236, "grad_norm": 0.15316323935985565, "learning_rate": 6.059188447066002e-05, "loss": 1.0075, "step": 6860 }, { "epoch": 1.394795690180931, "grad_norm": 0.1497650146484375, "learning_rate": 6.0571544798128756e-05, "loss": 1.032, "step": 6861 }, { "epoch": 1.3949989835332386, "grad_norm": 0.15311315655708313, "learning_rate": 6.0551205125597474e-05, "loss": 0.9394, "step": 6862 }, { "epoch": 1.3952022768855459, "grad_norm": 0.16212143003940582, "learning_rate": 6.053086545306621e-05, "loss": 1.1373, "step": 6863 }, { "epoch": 1.3954055702378532, "grad_norm": 0.1610141098499298, "learning_rate": 6.051052578053493e-05, "loss": 1.068, "step": 6864 }, { "epoch": 1.3956088635901607, "grad_norm": 0.16300232708454132, "learning_rate": 6.049018610800367e-05, "loss": 1.027, "step": 6865 }, { "epoch": 1.395812156942468, "grad_norm": 0.16508182883262634, "learning_rate": 6.0469846435472385e-05, "loss": 1.0181, "step": 6866 }, { "epoch": 1.3960154502947755, "grad_norm": 0.16862276196479797, "learning_rate": 6.044950676294112e-05, "loss": 1.1144, "step": 6867 }, { "epoch": 1.3962187436470828, "grad_norm": 0.16415852308273315, "learning_rate": 6.042916709040984e-05, "loss": 1.1611, "step": 6868 }, { "epoch": 1.39642203699939, "grad_norm": 0.14976157248020172, "learning_rate": 6.040882741787858e-05, "loss": 0.9655, "step": 6869 }, { "epoch": 1.3966253303516976, "grad_norm": 0.15121661126613617, "learning_rate": 6.0388487745347297e-05, "loss": 1.0556, "step": 6870 }, { "epoch": 1.3968286237040048, "grad_norm": 0.14877311885356903, "learning_rate": 6.0368148072816035e-05, "loss": 1.1012, "step": 6871 }, { "epoch": 1.3970319170563124, "grad_norm": 0.17387987673282623, "learning_rate": 6.034780840028475e-05, "loss": 1.0203, "step": 6872 }, { "epoch": 1.3972352104086196, "grad_norm": 0.15571002662181854, "learning_rate": 6.032746872775349e-05, "loss": 0.9764, "step": 6873 }, { "epoch": 1.397438503760927, "grad_norm": 0.16663400828838348, "learning_rate": 6.030712905522221e-05, "loss": 1.0956, "step": 6874 }, { "epoch": 1.3976417971132344, "grad_norm": 0.17007339000701904, "learning_rate": 6.0286789382690946e-05, "loss": 1.0845, "step": 6875 }, { "epoch": 1.3978450904655417, "grad_norm": 0.1560770869255066, "learning_rate": 6.0266449710159664e-05, "loss": 1.0833, "step": 6876 }, { "epoch": 1.3980483838178492, "grad_norm": 0.16780199110507965, "learning_rate": 6.02461100376284e-05, "loss": 1.1923, "step": 6877 }, { "epoch": 1.3982516771701565, "grad_norm": 0.15513817965984344, "learning_rate": 6.0225770365097126e-05, "loss": 1.0929, "step": 6878 }, { "epoch": 1.3984549705224638, "grad_norm": 0.17190799117088318, "learning_rate": 6.020543069256586e-05, "loss": 1.1773, "step": 6879 }, { "epoch": 1.3986582638747713, "grad_norm": 0.14657704532146454, "learning_rate": 6.018509102003458e-05, "loss": 1.0549, "step": 6880 }, { "epoch": 1.3988615572270786, "grad_norm": 0.15406420826911926, "learning_rate": 6.016475134750331e-05, "loss": 1.0151, "step": 6881 }, { "epoch": 1.3990648505793861, "grad_norm": 0.16085892915725708, "learning_rate": 6.014441167497204e-05, "loss": 1.191, "step": 6882 }, { "epoch": 1.3992681439316934, "grad_norm": 0.1476157307624817, "learning_rate": 6.012407200244077e-05, "loss": 0.9507, "step": 6883 }, { "epoch": 1.3994714372840007, "grad_norm": 0.14898930490016937, "learning_rate": 6.010373232990949e-05, "loss": 0.8874, "step": 6884 }, { "epoch": 1.3996747306363082, "grad_norm": 0.13529084622859955, "learning_rate": 6.0083392657378224e-05, "loss": 0.9814, "step": 6885 }, { "epoch": 1.3998780239886155, "grad_norm": 0.16349592804908752, "learning_rate": 6.006305298484695e-05, "loss": 1.21, "step": 6886 }, { "epoch": 1.400081317340923, "grad_norm": 0.14734548330307007, "learning_rate": 6.004271331231568e-05, "loss": 0.9757, "step": 6887 }, { "epoch": 1.4002846106932303, "grad_norm": 0.1508965939283371, "learning_rate": 6.0022373639784405e-05, "loss": 0.9822, "step": 6888 }, { "epoch": 1.4004879040455376, "grad_norm": 0.144801065325737, "learning_rate": 6.0002033967253136e-05, "loss": 1.0354, "step": 6889 }, { "epoch": 1.400691197397845, "grad_norm": 0.1705603152513504, "learning_rate": 5.998169429472186e-05, "loss": 1.0624, "step": 6890 }, { "epoch": 1.4008944907501526, "grad_norm": 0.14195939898490906, "learning_rate": 5.996135462219059e-05, "loss": 0.8946, "step": 6891 }, { "epoch": 1.4010977841024599, "grad_norm": 0.14895643293857574, "learning_rate": 5.9941014949659316e-05, "loss": 1.0709, "step": 6892 }, { "epoch": 1.4013010774547672, "grad_norm": 0.15194135904312134, "learning_rate": 5.992067527712805e-05, "loss": 1.0716, "step": 6893 }, { "epoch": 1.4015043708070747, "grad_norm": 0.16107560694217682, "learning_rate": 5.990033560459677e-05, "loss": 1.0804, "step": 6894 }, { "epoch": 1.401707664159382, "grad_norm": 0.15506263077259064, "learning_rate": 5.987999593206549e-05, "loss": 1.0002, "step": 6895 }, { "epoch": 1.4019109575116895, "grad_norm": 0.14550381898880005, "learning_rate": 5.985965625953423e-05, "loss": 0.9334, "step": 6896 }, { "epoch": 1.4021142508639968, "grad_norm": 0.15243171155452728, "learning_rate": 5.9839316587002945e-05, "loss": 0.9551, "step": 6897 }, { "epoch": 1.402317544216304, "grad_norm": 0.1538599729537964, "learning_rate": 5.981897691447168e-05, "loss": 1.0155, "step": 6898 }, { "epoch": 1.4025208375686116, "grad_norm": 0.1565885841846466, "learning_rate": 5.97986372419404e-05, "loss": 1.0895, "step": 6899 }, { "epoch": 1.4027241309209189, "grad_norm": 0.16014187037944794, "learning_rate": 5.977829756940914e-05, "loss": 1.0441, "step": 6900 }, { "epoch": 1.4029274242732264, "grad_norm": 0.16669133305549622, "learning_rate": 5.9757957896877856e-05, "loss": 1.1586, "step": 6901 }, { "epoch": 1.4031307176255337, "grad_norm": 0.1303495317697525, "learning_rate": 5.9737618224346595e-05, "loss": 0.8572, "step": 6902 }, { "epoch": 1.403334010977841, "grad_norm": 0.14560671150684357, "learning_rate": 5.971727855181531e-05, "loss": 0.9101, "step": 6903 }, { "epoch": 1.4035373043301484, "grad_norm": 0.15325911343097687, "learning_rate": 5.969693887928405e-05, "loss": 1.002, "step": 6904 }, { "epoch": 1.4037405976824557, "grad_norm": 0.16374750435352325, "learning_rate": 5.967659920675277e-05, "loss": 1.0014, "step": 6905 }, { "epoch": 1.4039438910347632, "grad_norm": 0.16725143790245056, "learning_rate": 5.9656259534221506e-05, "loss": 1.1363, "step": 6906 }, { "epoch": 1.4041471843870705, "grad_norm": 0.15079721808433533, "learning_rate": 5.9635919861690224e-05, "loss": 1.085, "step": 6907 }, { "epoch": 1.4043504777393778, "grad_norm": 0.161165252327919, "learning_rate": 5.961558018915896e-05, "loss": 1.0947, "step": 6908 }, { "epoch": 1.4045537710916853, "grad_norm": 0.13901959359645844, "learning_rate": 5.959524051662768e-05, "loss": 0.9887, "step": 6909 }, { "epoch": 1.4047570644439926, "grad_norm": 0.17727665603160858, "learning_rate": 5.957490084409642e-05, "loss": 1.2254, "step": 6910 }, { "epoch": 1.4049603577963001, "grad_norm": 0.16449913382530212, "learning_rate": 5.9554561171565135e-05, "loss": 1.08, "step": 6911 }, { "epoch": 1.4051636511486074, "grad_norm": 0.1533275991678238, "learning_rate": 5.953422149903387e-05, "loss": 1.0263, "step": 6912 }, { "epoch": 1.4053669445009147, "grad_norm": 0.13713502883911133, "learning_rate": 5.951388182650259e-05, "loss": 0.8477, "step": 6913 }, { "epoch": 1.4055702378532222, "grad_norm": 0.15603326261043549, "learning_rate": 5.949354215397133e-05, "loss": 1.0248, "step": 6914 }, { "epoch": 1.4057735312055295, "grad_norm": 0.1701658070087433, "learning_rate": 5.9473202481440046e-05, "loss": 1.0635, "step": 6915 }, { "epoch": 1.405976824557837, "grad_norm": 0.15504860877990723, "learning_rate": 5.9452862808908784e-05, "loss": 0.9031, "step": 6916 }, { "epoch": 1.4061801179101443, "grad_norm": 0.15079347789287567, "learning_rate": 5.94325231363775e-05, "loss": 0.9189, "step": 6917 }, { "epoch": 1.4063834112624516, "grad_norm": 0.15121297538280487, "learning_rate": 5.941218346384624e-05, "loss": 0.9538, "step": 6918 }, { "epoch": 1.406586704614759, "grad_norm": 0.14441393315792084, "learning_rate": 5.939184379131496e-05, "loss": 0.9817, "step": 6919 }, { "epoch": 1.4067899979670666, "grad_norm": 0.15504005551338196, "learning_rate": 5.9371504118783696e-05, "loss": 0.917, "step": 6920 }, { "epoch": 1.406993291319374, "grad_norm": 0.1546851098537445, "learning_rate": 5.9351164446252413e-05, "loss": 1.056, "step": 6921 }, { "epoch": 1.4071965846716812, "grad_norm": 0.14573045074939728, "learning_rate": 5.933082477372115e-05, "loss": 0.8798, "step": 6922 }, { "epoch": 1.4073998780239887, "grad_norm": 0.14356198906898499, "learning_rate": 5.931048510118987e-05, "loss": 0.9946, "step": 6923 }, { "epoch": 1.407603171376296, "grad_norm": 0.15910224616527557, "learning_rate": 5.929014542865861e-05, "loss": 1.0241, "step": 6924 }, { "epoch": 1.4078064647286035, "grad_norm": 0.15715257823467255, "learning_rate": 5.9269805756127325e-05, "loss": 1.094, "step": 6925 }, { "epoch": 1.4080097580809108, "grad_norm": 0.15548628568649292, "learning_rate": 5.924946608359606e-05, "loss": 1.0977, "step": 6926 }, { "epoch": 1.408213051433218, "grad_norm": 0.15223243832588196, "learning_rate": 5.922912641106478e-05, "loss": 1.0118, "step": 6927 }, { "epoch": 1.4084163447855256, "grad_norm": 0.15407241880893707, "learning_rate": 5.920878673853352e-05, "loss": 0.998, "step": 6928 }, { "epoch": 1.4086196381378329, "grad_norm": 0.1569782793521881, "learning_rate": 5.9188447066002236e-05, "loss": 0.9734, "step": 6929 }, { "epoch": 1.4088229314901404, "grad_norm": 0.1538730412721634, "learning_rate": 5.9168107393470974e-05, "loss": 0.9054, "step": 6930 }, { "epoch": 1.4090262248424477, "grad_norm": 0.15854963660240173, "learning_rate": 5.914776772093969e-05, "loss": 0.9483, "step": 6931 }, { "epoch": 1.409229518194755, "grad_norm": 0.1683579981327057, "learning_rate": 5.912742804840843e-05, "loss": 1.0284, "step": 6932 }, { "epoch": 1.4094328115470625, "grad_norm": 0.16705630719661713, "learning_rate": 5.910708837587715e-05, "loss": 1.2061, "step": 6933 }, { "epoch": 1.4096361048993697, "grad_norm": 0.16468504071235657, "learning_rate": 5.9086748703345886e-05, "loss": 1.1124, "step": 6934 }, { "epoch": 1.4098393982516773, "grad_norm": 0.1535244882106781, "learning_rate": 5.90664090308146e-05, "loss": 1.0106, "step": 6935 }, { "epoch": 1.4100426916039845, "grad_norm": 0.16361945867538452, "learning_rate": 5.904606935828333e-05, "loss": 1.0341, "step": 6936 }, { "epoch": 1.4102459849562918, "grad_norm": 0.17501762509346008, "learning_rate": 5.9025729685752066e-05, "loss": 1.0759, "step": 6937 }, { "epoch": 1.4104492783085993, "grad_norm": 0.1443677693605423, "learning_rate": 5.9005390013220784e-05, "loss": 0.9883, "step": 6938 }, { "epoch": 1.4106525716609066, "grad_norm": 0.15969394147396088, "learning_rate": 5.898505034068952e-05, "loss": 1.0426, "step": 6939 }, { "epoch": 1.4108558650132141, "grad_norm": 0.14605602622032166, "learning_rate": 5.896471066815824e-05, "loss": 1.074, "step": 6940 }, { "epoch": 1.4110591583655214, "grad_norm": 0.15779732167720795, "learning_rate": 5.894437099562698e-05, "loss": 0.9753, "step": 6941 }, { "epoch": 1.4112624517178287, "grad_norm": 0.1594490110874176, "learning_rate": 5.8924031323095695e-05, "loss": 0.94, "step": 6942 }, { "epoch": 1.4114657450701362, "grad_norm": 0.15903176367282867, "learning_rate": 5.890369165056443e-05, "loss": 1.0057, "step": 6943 }, { "epoch": 1.4116690384224435, "grad_norm": 0.1548534482717514, "learning_rate": 5.888335197803315e-05, "loss": 0.9543, "step": 6944 }, { "epoch": 1.411872331774751, "grad_norm": 0.15342067182064056, "learning_rate": 5.886301230550189e-05, "loss": 0.9265, "step": 6945 }, { "epoch": 1.4120756251270583, "grad_norm": 0.1446494311094284, "learning_rate": 5.8842672632970606e-05, "loss": 0.777, "step": 6946 }, { "epoch": 1.4122789184793656, "grad_norm": 0.16386401653289795, "learning_rate": 5.8822332960439344e-05, "loss": 1.0756, "step": 6947 }, { "epoch": 1.412482211831673, "grad_norm": 0.16524311900138855, "learning_rate": 5.880199328790806e-05, "loss": 1.0776, "step": 6948 }, { "epoch": 1.4126855051839806, "grad_norm": 0.16265372931957245, "learning_rate": 5.87816536153768e-05, "loss": 1.0153, "step": 6949 }, { "epoch": 1.412888798536288, "grad_norm": 0.1490945667028427, "learning_rate": 5.876131394284552e-05, "loss": 1.0136, "step": 6950 }, { "epoch": 1.4130920918885952, "grad_norm": 0.17091575264930725, "learning_rate": 5.8740974270314256e-05, "loss": 1.1794, "step": 6951 }, { "epoch": 1.4132953852409027, "grad_norm": 0.17527081072330475, "learning_rate": 5.8720634597782973e-05, "loss": 1.1898, "step": 6952 }, { "epoch": 1.41349867859321, "grad_norm": 0.1660010814666748, "learning_rate": 5.870029492525171e-05, "loss": 1.1198, "step": 6953 }, { "epoch": 1.4137019719455175, "grad_norm": 0.17469993233680725, "learning_rate": 5.867995525272043e-05, "loss": 1.2353, "step": 6954 }, { "epoch": 1.4139052652978248, "grad_norm": 0.15284724533557892, "learning_rate": 5.865961558018917e-05, "loss": 1.0579, "step": 6955 }, { "epoch": 1.414108558650132, "grad_norm": 0.15508383512496948, "learning_rate": 5.8639275907657885e-05, "loss": 1.0113, "step": 6956 }, { "epoch": 1.4143118520024396, "grad_norm": 0.16307425498962402, "learning_rate": 5.861893623512662e-05, "loss": 0.9934, "step": 6957 }, { "epoch": 1.4145151453547469, "grad_norm": 0.1423729807138443, "learning_rate": 5.859859656259534e-05, "loss": 0.9477, "step": 6958 }, { "epoch": 1.4147184387070544, "grad_norm": 0.17191347479820251, "learning_rate": 5.857825689006408e-05, "loss": 1.1453, "step": 6959 }, { "epoch": 1.4149217320593617, "grad_norm": 0.14529524743556976, "learning_rate": 5.8557917217532796e-05, "loss": 1.06, "step": 6960 }, { "epoch": 1.415125025411669, "grad_norm": 0.16226617991924286, "learning_rate": 5.8537577545001534e-05, "loss": 1.085, "step": 6961 }, { "epoch": 1.4153283187639765, "grad_norm": 0.15359675884246826, "learning_rate": 5.851723787247025e-05, "loss": 1.0975, "step": 6962 }, { "epoch": 1.4155316121162838, "grad_norm": 0.1587379425764084, "learning_rate": 5.849689819993899e-05, "loss": 1.073, "step": 6963 }, { "epoch": 1.4157349054685913, "grad_norm": 0.14959818124771118, "learning_rate": 5.847655852740771e-05, "loss": 1.0038, "step": 6964 }, { "epoch": 1.4159381988208986, "grad_norm": 0.16169403493404388, "learning_rate": 5.8456218854876446e-05, "loss": 1.038, "step": 6965 }, { "epoch": 1.4161414921732058, "grad_norm": 0.16977471113204956, "learning_rate": 5.843587918234516e-05, "loss": 1.0902, "step": 6966 }, { "epoch": 1.4163447855255134, "grad_norm": 0.18036596477031708, "learning_rate": 5.84155395098139e-05, "loss": 1.3127, "step": 6967 }, { "epoch": 1.4165480788778206, "grad_norm": 0.15857172012329102, "learning_rate": 5.839519983728262e-05, "loss": 0.9297, "step": 6968 }, { "epoch": 1.4167513722301281, "grad_norm": 0.16160105168819427, "learning_rate": 5.837486016475136e-05, "loss": 0.9612, "step": 6969 }, { "epoch": 1.4169546655824354, "grad_norm": 0.15947425365447998, "learning_rate": 5.8354520492220075e-05, "loss": 1.0329, "step": 6970 }, { "epoch": 1.4171579589347427, "grad_norm": 0.15058821439743042, "learning_rate": 5.833418081968881e-05, "loss": 1.0494, "step": 6971 }, { "epoch": 1.4173612522870502, "grad_norm": 0.14137932658195496, "learning_rate": 5.831384114715753e-05, "loss": 0.8643, "step": 6972 }, { "epoch": 1.4175645456393575, "grad_norm": 0.15573449432849884, "learning_rate": 5.829350147462627e-05, "loss": 0.9866, "step": 6973 }, { "epoch": 1.417767838991665, "grad_norm": 0.17434577643871307, "learning_rate": 5.8273161802094986e-05, "loss": 1.0533, "step": 6974 }, { "epoch": 1.4179711323439723, "grad_norm": 0.14981883764266968, "learning_rate": 5.8252822129563724e-05, "loss": 0.9438, "step": 6975 }, { "epoch": 1.4181744256962796, "grad_norm": 0.17315039038658142, "learning_rate": 5.823248245703244e-05, "loss": 1.1614, "step": 6976 }, { "epoch": 1.4183777190485871, "grad_norm": 0.1493794322013855, "learning_rate": 5.8212142784501166e-05, "loss": 0.9655, "step": 6977 }, { "epoch": 1.4185810124008946, "grad_norm": 0.16317151486873627, "learning_rate": 5.81918031119699e-05, "loss": 1.0783, "step": 6978 }, { "epoch": 1.418784305753202, "grad_norm": 0.17804372310638428, "learning_rate": 5.817146343943862e-05, "loss": 1.1467, "step": 6979 }, { "epoch": 1.4189875991055092, "grad_norm": 0.14259546995162964, "learning_rate": 5.815112376690735e-05, "loss": 0.9369, "step": 6980 }, { "epoch": 1.4191908924578167, "grad_norm": 0.16736005246639252, "learning_rate": 5.813078409437608e-05, "loss": 1.0807, "step": 6981 }, { "epoch": 1.419394185810124, "grad_norm": 0.17183224856853485, "learning_rate": 5.811044442184481e-05, "loss": 1.1465, "step": 6982 }, { "epoch": 1.4195974791624315, "grad_norm": 0.16544954478740692, "learning_rate": 5.809010474931353e-05, "loss": 1.0671, "step": 6983 }, { "epoch": 1.4198007725147388, "grad_norm": 0.15119296312332153, "learning_rate": 5.8069765076782265e-05, "loss": 1.0459, "step": 6984 }, { "epoch": 1.420004065867046, "grad_norm": 0.14908432960510254, "learning_rate": 5.804942540425099e-05, "loss": 0.9562, "step": 6985 }, { "epoch": 1.4202073592193536, "grad_norm": 0.15488240122795105, "learning_rate": 5.802908573171972e-05, "loss": 1.0218, "step": 6986 }, { "epoch": 1.4204106525716609, "grad_norm": 0.16753670573234558, "learning_rate": 5.8008746059188445e-05, "loss": 1.0103, "step": 6987 }, { "epoch": 1.4206139459239684, "grad_norm": 0.15095356106758118, "learning_rate": 5.7988406386657176e-05, "loss": 0.9328, "step": 6988 }, { "epoch": 1.4208172392762757, "grad_norm": 0.15303654968738556, "learning_rate": 5.79680667141259e-05, "loss": 1.1052, "step": 6989 }, { "epoch": 1.421020532628583, "grad_norm": 0.15543197095394135, "learning_rate": 5.794772704159463e-05, "loss": 0.9239, "step": 6990 }, { "epoch": 1.4212238259808905, "grad_norm": 0.15274298191070557, "learning_rate": 5.7927387369063356e-05, "loss": 1.0457, "step": 6991 }, { "epoch": 1.4214271193331978, "grad_norm": 0.15703609585762024, "learning_rate": 5.790704769653209e-05, "loss": 0.9673, "step": 6992 }, { "epoch": 1.4216304126855053, "grad_norm": 0.1640772670507431, "learning_rate": 5.788670802400081e-05, "loss": 1.124, "step": 6993 }, { "epoch": 1.4218337060378126, "grad_norm": 0.16401225328445435, "learning_rate": 5.786636835146955e-05, "loss": 1.1243, "step": 6994 }, { "epoch": 1.4220369993901198, "grad_norm": 0.13957446813583374, "learning_rate": 5.784602867893827e-05, "loss": 0.9091, "step": 6995 }, { "epoch": 1.4222402927424274, "grad_norm": 0.14374911785125732, "learning_rate": 5.7825689006407006e-05, "loss": 0.9343, "step": 6996 }, { "epoch": 1.4224435860947346, "grad_norm": 0.1541910171508789, "learning_rate": 5.780534933387572e-05, "loss": 1.0193, "step": 6997 }, { "epoch": 1.4226468794470422, "grad_norm": 0.17299696803092957, "learning_rate": 5.778500966134446e-05, "loss": 1.1652, "step": 6998 }, { "epoch": 1.4228501727993494, "grad_norm": 0.15225568413734436, "learning_rate": 5.776466998881318e-05, "loss": 1.0108, "step": 6999 }, { "epoch": 1.4230534661516567, "grad_norm": 0.17570187151432037, "learning_rate": 5.774433031628192e-05, "loss": 1.2147, "step": 7000 }, { "epoch": 1.4232567595039642, "grad_norm": 0.16006618738174438, "learning_rate": 5.7723990643750635e-05, "loss": 1.044, "step": 7001 }, { "epoch": 1.4234600528562715, "grad_norm": 0.15728577971458435, "learning_rate": 5.770365097121937e-05, "loss": 1.1534, "step": 7002 }, { "epoch": 1.423663346208579, "grad_norm": 0.15255065262317657, "learning_rate": 5.768331129868809e-05, "loss": 0.9349, "step": 7003 }, { "epoch": 1.4238666395608863, "grad_norm": 0.14307983219623566, "learning_rate": 5.766297162615683e-05, "loss": 0.8236, "step": 7004 }, { "epoch": 1.4240699329131936, "grad_norm": 0.16135892271995544, "learning_rate": 5.7642631953625546e-05, "loss": 1.2023, "step": 7005 }, { "epoch": 1.4242732262655011, "grad_norm": 0.15913139283657074, "learning_rate": 5.7622292281094284e-05, "loss": 1.0975, "step": 7006 }, { "epoch": 1.4244765196178086, "grad_norm": 0.14146681129932404, "learning_rate": 5.7601952608563e-05, "loss": 0.9771, "step": 7007 }, { "epoch": 1.424679812970116, "grad_norm": 0.15485510230064392, "learning_rate": 5.758161293603174e-05, "loss": 1.0722, "step": 7008 }, { "epoch": 1.4248831063224232, "grad_norm": 0.1600322425365448, "learning_rate": 5.756127326350046e-05, "loss": 0.9445, "step": 7009 }, { "epoch": 1.4250863996747307, "grad_norm": 0.15899471938610077, "learning_rate": 5.7540933590969195e-05, "loss": 0.9792, "step": 7010 }, { "epoch": 1.425289693027038, "grad_norm": 0.16935110092163086, "learning_rate": 5.752059391843791e-05, "loss": 1.1119, "step": 7011 }, { "epoch": 1.4254929863793455, "grad_norm": 0.1555667668581009, "learning_rate": 5.750025424590665e-05, "loss": 1.01, "step": 7012 }, { "epoch": 1.4256962797316528, "grad_norm": 0.15003176033496857, "learning_rate": 5.747991457337537e-05, "loss": 0.9769, "step": 7013 }, { "epoch": 1.42589957308396, "grad_norm": 0.15961889922618866, "learning_rate": 5.745957490084411e-05, "loss": 0.9535, "step": 7014 }, { "epoch": 1.4261028664362676, "grad_norm": 0.1631239503622055, "learning_rate": 5.7439235228312825e-05, "loss": 0.9852, "step": 7015 }, { "epoch": 1.426306159788575, "grad_norm": 0.16456563770771027, "learning_rate": 5.741889555578156e-05, "loss": 1.0641, "step": 7016 }, { "epoch": 1.4265094531408824, "grad_norm": 0.15899543464183807, "learning_rate": 5.739855588325028e-05, "loss": 0.9772, "step": 7017 }, { "epoch": 1.4267127464931897, "grad_norm": 0.1519063413143158, "learning_rate": 5.7378216210719005e-05, "loss": 0.981, "step": 7018 }, { "epoch": 1.426916039845497, "grad_norm": 0.14312393963336945, "learning_rate": 5.7357876538187736e-05, "loss": 0.9407, "step": 7019 }, { "epoch": 1.4271193331978045, "grad_norm": 0.16814564168453217, "learning_rate": 5.733753686565646e-05, "loss": 1.1335, "step": 7020 }, { "epoch": 1.4273226265501118, "grad_norm": 0.17560997605323792, "learning_rate": 5.731719719312519e-05, "loss": 1.2545, "step": 7021 }, { "epoch": 1.4275259199024193, "grad_norm": 0.16009865701198578, "learning_rate": 5.7296857520593916e-05, "loss": 1.1073, "step": 7022 }, { "epoch": 1.4277292132547266, "grad_norm": 0.14616774022579193, "learning_rate": 5.727651784806265e-05, "loss": 0.9322, "step": 7023 }, { "epoch": 1.4279325066070339, "grad_norm": 0.15338778495788574, "learning_rate": 5.725617817553137e-05, "loss": 1.0607, "step": 7024 }, { "epoch": 1.4281357999593414, "grad_norm": 0.1624404937028885, "learning_rate": 5.72358385030001e-05, "loss": 1.2047, "step": 7025 }, { "epoch": 1.4283390933116487, "grad_norm": 0.1730973869562149, "learning_rate": 5.721549883046883e-05, "loss": 1.0891, "step": 7026 }, { "epoch": 1.4285423866639562, "grad_norm": 0.16130545735359192, "learning_rate": 5.719515915793756e-05, "loss": 1.1156, "step": 7027 }, { "epoch": 1.4287456800162635, "grad_norm": 0.1761913150548935, "learning_rate": 5.717481948540628e-05, "loss": 1.0594, "step": 7028 }, { "epoch": 1.4289489733685707, "grad_norm": 0.15182644128799438, "learning_rate": 5.7154479812875014e-05, "loss": 0.9237, "step": 7029 }, { "epoch": 1.4291522667208783, "grad_norm": 0.1545775681734085, "learning_rate": 5.713414014034374e-05, "loss": 1.0466, "step": 7030 }, { "epoch": 1.4293555600731855, "grad_norm": 0.15703696012496948, "learning_rate": 5.711380046781247e-05, "loss": 0.9723, "step": 7031 }, { "epoch": 1.429558853425493, "grad_norm": 0.15546222031116486, "learning_rate": 5.7093460795281195e-05, "loss": 0.9767, "step": 7032 }, { "epoch": 1.4297621467778003, "grad_norm": 0.16714654862880707, "learning_rate": 5.7073121122749926e-05, "loss": 1.0734, "step": 7033 }, { "epoch": 1.4299654401301076, "grad_norm": 0.15429727733135223, "learning_rate": 5.705278145021865e-05, "loss": 0.9735, "step": 7034 }, { "epoch": 1.4301687334824151, "grad_norm": 0.1618785411119461, "learning_rate": 5.703244177768738e-05, "loss": 1.1881, "step": 7035 }, { "epoch": 1.4303720268347226, "grad_norm": 0.16058021783828735, "learning_rate": 5.7012102105156106e-05, "loss": 1.0477, "step": 7036 }, { "epoch": 1.43057532018703, "grad_norm": 0.15133486688137054, "learning_rate": 5.699176243262484e-05, "loss": 0.9531, "step": 7037 }, { "epoch": 1.4307786135393372, "grad_norm": 0.15109995007514954, "learning_rate": 5.697142276009356e-05, "loss": 1.1346, "step": 7038 }, { "epoch": 1.4309819068916445, "grad_norm": 0.16786424815654755, "learning_rate": 5.695108308756229e-05, "loss": 1.0919, "step": 7039 }, { "epoch": 1.431185200243952, "grad_norm": 0.1384085714817047, "learning_rate": 5.693074341503102e-05, "loss": 0.8499, "step": 7040 }, { "epoch": 1.4313884935962595, "grad_norm": 0.14978425204753876, "learning_rate": 5.691040374249975e-05, "loss": 1.0893, "step": 7041 }, { "epoch": 1.4315917869485668, "grad_norm": 0.1528119444847107, "learning_rate": 5.689006406996847e-05, "loss": 1.0491, "step": 7042 }, { "epoch": 1.431795080300874, "grad_norm": 0.1478254795074463, "learning_rate": 5.6869724397437204e-05, "loss": 0.976, "step": 7043 }, { "epoch": 1.4319983736531816, "grad_norm": 0.16806158423423767, "learning_rate": 5.684938472490593e-05, "loss": 1.1223, "step": 7044 }, { "epoch": 1.432201667005489, "grad_norm": 0.16098909080028534, "learning_rate": 5.682904505237466e-05, "loss": 1.096, "step": 7045 }, { "epoch": 1.4324049603577964, "grad_norm": 0.15493112802505493, "learning_rate": 5.6808705379843384e-05, "loss": 0.9423, "step": 7046 }, { "epoch": 1.4326082537101037, "grad_norm": 0.16899438202381134, "learning_rate": 5.6788365707312116e-05, "loss": 1.1543, "step": 7047 }, { "epoch": 1.432811547062411, "grad_norm": 0.16474638879299164, "learning_rate": 5.676802603478084e-05, "loss": 1.1847, "step": 7048 }, { "epoch": 1.4330148404147185, "grad_norm": 0.15597054362297058, "learning_rate": 5.674768636224957e-05, "loss": 1.1128, "step": 7049 }, { "epoch": 1.4332181337670258, "grad_norm": 0.15467366576194763, "learning_rate": 5.6727346689718296e-05, "loss": 1.0166, "step": 7050 }, { "epoch": 1.4334214271193333, "grad_norm": 0.13610722124576569, "learning_rate": 5.670700701718703e-05, "loss": 0.8721, "step": 7051 }, { "epoch": 1.4336247204716406, "grad_norm": 0.17068330943584442, "learning_rate": 5.668666734465575e-05, "loss": 1.1137, "step": 7052 }, { "epoch": 1.4338280138239479, "grad_norm": 0.15879547595977783, "learning_rate": 5.666632767212449e-05, "loss": 0.9649, "step": 7053 }, { "epoch": 1.4340313071762554, "grad_norm": 0.14434680342674255, "learning_rate": 5.664598799959321e-05, "loss": 1.0354, "step": 7054 }, { "epoch": 1.4342346005285627, "grad_norm": 0.15887115895748138, "learning_rate": 5.6625648327061945e-05, "loss": 1.152, "step": 7055 }, { "epoch": 1.4344378938808702, "grad_norm": 0.15456242859363556, "learning_rate": 5.660530865453066e-05, "loss": 0.9352, "step": 7056 }, { "epoch": 1.4346411872331775, "grad_norm": 0.1485803872346878, "learning_rate": 5.65849689819994e-05, "loss": 1.0388, "step": 7057 }, { "epoch": 1.4348444805854847, "grad_norm": 0.14560282230377197, "learning_rate": 5.656462930946812e-05, "loss": 0.8804, "step": 7058 }, { "epoch": 1.4350477739377923, "grad_norm": 0.16780616343021393, "learning_rate": 5.654428963693686e-05, "loss": 1.2334, "step": 7059 }, { "epoch": 1.4352510672900995, "grad_norm": 0.16232897341251373, "learning_rate": 5.6523949964405574e-05, "loss": 1.0873, "step": 7060 }, { "epoch": 1.435454360642407, "grad_norm": 0.16009031236171722, "learning_rate": 5.65036102918743e-05, "loss": 1.043, "step": 7061 }, { "epoch": 1.4356576539947143, "grad_norm": 0.17730426788330078, "learning_rate": 5.648327061934303e-05, "loss": 1.0882, "step": 7062 }, { "epoch": 1.4358609473470216, "grad_norm": 0.16423147916793823, "learning_rate": 5.6462930946811755e-05, "loss": 1.1773, "step": 7063 }, { "epoch": 1.4360642406993291, "grad_norm": 0.14594976603984833, "learning_rate": 5.6442591274280486e-05, "loss": 0.8925, "step": 7064 }, { "epoch": 1.4362675340516364, "grad_norm": 0.1569969207048416, "learning_rate": 5.642225160174921e-05, "loss": 1.122, "step": 7065 }, { "epoch": 1.436470827403944, "grad_norm": 0.1664332002401352, "learning_rate": 5.640191192921794e-05, "loss": 1.2314, "step": 7066 }, { "epoch": 1.4366741207562512, "grad_norm": 0.1459050178527832, "learning_rate": 5.6381572256686666e-05, "loss": 1.0424, "step": 7067 }, { "epoch": 1.4368774141085585, "grad_norm": 0.14823487401008606, "learning_rate": 5.63612325841554e-05, "loss": 1.0088, "step": 7068 }, { "epoch": 1.437080707460866, "grad_norm": 0.15456153452396393, "learning_rate": 5.634089291162412e-05, "loss": 0.9902, "step": 7069 }, { "epoch": 1.4372840008131735, "grad_norm": 0.15063978731632233, "learning_rate": 5.632055323909285e-05, "loss": 0.9833, "step": 7070 }, { "epoch": 1.4374872941654808, "grad_norm": 0.1326543688774109, "learning_rate": 5.630021356656158e-05, "loss": 0.8563, "step": 7071 }, { "epoch": 1.437690587517788, "grad_norm": 0.1607581377029419, "learning_rate": 5.627987389403031e-05, "loss": 1.1066, "step": 7072 }, { "epoch": 1.4378938808700956, "grad_norm": 0.17142783105373383, "learning_rate": 5.625953422149903e-05, "loss": 1.0698, "step": 7073 }, { "epoch": 1.438097174222403, "grad_norm": 0.1687779724597931, "learning_rate": 5.6239194548967764e-05, "loss": 1.1368, "step": 7074 }, { "epoch": 1.4383004675747104, "grad_norm": 0.15616458654403687, "learning_rate": 5.621885487643649e-05, "loss": 0.9204, "step": 7075 }, { "epoch": 1.4385037609270177, "grad_norm": 0.14602220058441162, "learning_rate": 5.619851520390522e-05, "loss": 0.9276, "step": 7076 }, { "epoch": 1.438707054279325, "grad_norm": 0.14642690122127533, "learning_rate": 5.6178175531373944e-05, "loss": 1.0513, "step": 7077 }, { "epoch": 1.4389103476316325, "grad_norm": 0.14626696705818176, "learning_rate": 5.6157835858842676e-05, "loss": 0.9175, "step": 7078 }, { "epoch": 1.4391136409839398, "grad_norm": 0.14647045731544495, "learning_rate": 5.61374961863114e-05, "loss": 0.8942, "step": 7079 }, { "epoch": 1.4393169343362473, "grad_norm": 0.14955741167068481, "learning_rate": 5.611715651378013e-05, "loss": 0.8223, "step": 7080 }, { "epoch": 1.4395202276885546, "grad_norm": 0.1502169966697693, "learning_rate": 5.6096816841248856e-05, "loss": 0.8907, "step": 7081 }, { "epoch": 1.4397235210408619, "grad_norm": 0.15933147072792053, "learning_rate": 5.607647716871759e-05, "loss": 0.9065, "step": 7082 }, { "epoch": 1.4399268143931694, "grad_norm": 0.14342406392097473, "learning_rate": 5.605613749618631e-05, "loss": 0.8965, "step": 7083 }, { "epoch": 1.4401301077454767, "grad_norm": 0.14823979139328003, "learning_rate": 5.603579782365504e-05, "loss": 0.9602, "step": 7084 }, { "epoch": 1.4403334010977842, "grad_norm": 0.14995329082012177, "learning_rate": 5.601545815112377e-05, "loss": 0.9335, "step": 7085 }, { "epoch": 1.4405366944500915, "grad_norm": 0.15548937022686005, "learning_rate": 5.59951184785925e-05, "loss": 1.0812, "step": 7086 }, { "epoch": 1.4407399878023988, "grad_norm": 0.16731050610542297, "learning_rate": 5.597477880606122e-05, "loss": 1.098, "step": 7087 }, { "epoch": 1.4409432811547063, "grad_norm": 0.16047613322734833, "learning_rate": 5.5954439133529954e-05, "loss": 0.9614, "step": 7088 }, { "epoch": 1.4411465745070136, "grad_norm": 0.15521658957004547, "learning_rate": 5.593409946099868e-05, "loss": 0.9903, "step": 7089 }, { "epoch": 1.441349867859321, "grad_norm": 0.15141330659389496, "learning_rate": 5.591375978846741e-05, "loss": 1.0571, "step": 7090 }, { "epoch": 1.4415531612116284, "grad_norm": 0.16350041329860687, "learning_rate": 5.5893420115936134e-05, "loss": 1.0909, "step": 7091 }, { "epoch": 1.4417564545639356, "grad_norm": 0.15235590934753418, "learning_rate": 5.5873080443404866e-05, "loss": 1.0361, "step": 7092 }, { "epoch": 1.4419597479162432, "grad_norm": 0.16429497301578522, "learning_rate": 5.585274077087359e-05, "loss": 1.1117, "step": 7093 }, { "epoch": 1.4421630412685504, "grad_norm": 0.14635255932807922, "learning_rate": 5.583240109834232e-05, "loss": 0.8734, "step": 7094 }, { "epoch": 1.442366334620858, "grad_norm": 0.16316108405590057, "learning_rate": 5.5812061425811046e-05, "loss": 0.9637, "step": 7095 }, { "epoch": 1.4425696279731652, "grad_norm": 0.16636328399181366, "learning_rate": 5.579172175327978e-05, "loss": 1.2268, "step": 7096 }, { "epoch": 1.4427729213254725, "grad_norm": 0.1398182213306427, "learning_rate": 5.57713820807485e-05, "loss": 0.9425, "step": 7097 }, { "epoch": 1.44297621467778, "grad_norm": 0.17444168031215668, "learning_rate": 5.575104240821723e-05, "loss": 1.0043, "step": 7098 }, { "epoch": 1.4431795080300875, "grad_norm": 0.14904524385929108, "learning_rate": 5.573070273568596e-05, "loss": 0.9592, "step": 7099 }, { "epoch": 1.4433828013823948, "grad_norm": 0.17846673727035522, "learning_rate": 5.571036306315469e-05, "loss": 1.1698, "step": 7100 }, { "epoch": 1.4435860947347021, "grad_norm": 0.16818009316921234, "learning_rate": 5.569002339062341e-05, "loss": 1.0848, "step": 7101 }, { "epoch": 1.4437893880870096, "grad_norm": 0.1551516056060791, "learning_rate": 5.566968371809214e-05, "loss": 1.0045, "step": 7102 }, { "epoch": 1.443992681439317, "grad_norm": 0.17592954635620117, "learning_rate": 5.564934404556087e-05, "loss": 1.0643, "step": 7103 }, { "epoch": 1.4441959747916244, "grad_norm": 0.15838932991027832, "learning_rate": 5.562900437302959e-05, "loss": 1.1457, "step": 7104 }, { "epoch": 1.4443992681439317, "grad_norm": 0.17779286205768585, "learning_rate": 5.5608664700498324e-05, "loss": 1.0554, "step": 7105 }, { "epoch": 1.444602561496239, "grad_norm": 0.14753217995166779, "learning_rate": 5.558832502796705e-05, "loss": 0.9007, "step": 7106 }, { "epoch": 1.4448058548485465, "grad_norm": 0.15274490416049957, "learning_rate": 5.556798535543578e-05, "loss": 0.9968, "step": 7107 }, { "epoch": 1.4450091482008538, "grad_norm": 0.1444222331047058, "learning_rate": 5.5547645682904504e-05, "loss": 0.899, "step": 7108 }, { "epoch": 1.4452124415531613, "grad_norm": 0.16003580391407013, "learning_rate": 5.5527306010373236e-05, "loss": 1.0092, "step": 7109 }, { "epoch": 1.4454157349054686, "grad_norm": 0.16005735099315643, "learning_rate": 5.550696633784196e-05, "loss": 1.1286, "step": 7110 }, { "epoch": 1.4456190282577759, "grad_norm": 0.16826081275939941, "learning_rate": 5.548662666531069e-05, "loss": 1.034, "step": 7111 }, { "epoch": 1.4458223216100834, "grad_norm": 0.1600271761417389, "learning_rate": 5.5466286992779416e-05, "loss": 1.1682, "step": 7112 }, { "epoch": 1.4460256149623907, "grad_norm": 0.1603696346282959, "learning_rate": 5.544594732024815e-05, "loss": 1.04, "step": 7113 }, { "epoch": 1.4462289083146982, "grad_norm": 0.149916410446167, "learning_rate": 5.542560764771687e-05, "loss": 0.8601, "step": 7114 }, { "epoch": 1.4464322016670055, "grad_norm": 0.14548246562480927, "learning_rate": 5.54052679751856e-05, "loss": 0.9762, "step": 7115 }, { "epoch": 1.4466354950193128, "grad_norm": 0.16768839955329895, "learning_rate": 5.538492830265433e-05, "loss": 1.0964, "step": 7116 }, { "epoch": 1.4468387883716203, "grad_norm": 0.1450057178735733, "learning_rate": 5.536458863012306e-05, "loss": 0.8969, "step": 7117 }, { "epoch": 1.4470420817239276, "grad_norm": 0.15958139300346375, "learning_rate": 5.534424895759178e-05, "loss": 1.1208, "step": 7118 }, { "epoch": 1.447245375076235, "grad_norm": 0.13964813947677612, "learning_rate": 5.5323909285060514e-05, "loss": 0.8724, "step": 7119 }, { "epoch": 1.4474486684285424, "grad_norm": 0.14848533272743225, "learning_rate": 5.530356961252924e-05, "loss": 0.9282, "step": 7120 }, { "epoch": 1.4476519617808496, "grad_norm": 0.14798136055469513, "learning_rate": 5.528322993999797e-05, "loss": 1.087, "step": 7121 }, { "epoch": 1.4478552551331572, "grad_norm": 0.1584753543138504, "learning_rate": 5.5262890267466694e-05, "loss": 1.1091, "step": 7122 }, { "epoch": 1.4480585484854644, "grad_norm": 0.1623314768075943, "learning_rate": 5.5242550594935425e-05, "loss": 1.1573, "step": 7123 }, { "epoch": 1.448261841837772, "grad_norm": 0.15471680462360382, "learning_rate": 5.522221092240415e-05, "loss": 0.944, "step": 7124 }, { "epoch": 1.4484651351900792, "grad_norm": 0.1478460431098938, "learning_rate": 5.520187124987288e-05, "loss": 0.9436, "step": 7125 }, { "epoch": 1.4486684285423865, "grad_norm": 0.16853754222393036, "learning_rate": 5.5181531577341606e-05, "loss": 1.0439, "step": 7126 }, { "epoch": 1.448871721894694, "grad_norm": 0.16511282324790955, "learning_rate": 5.516119190481034e-05, "loss": 1.0378, "step": 7127 }, { "epoch": 1.4490750152470016, "grad_norm": 0.16189835965633392, "learning_rate": 5.514085223227906e-05, "loss": 1.0768, "step": 7128 }, { "epoch": 1.4492783085993088, "grad_norm": 0.16869410872459412, "learning_rate": 5.512051255974779e-05, "loss": 1.011, "step": 7129 }, { "epoch": 1.4494816019516161, "grad_norm": 0.15597373247146606, "learning_rate": 5.510017288721652e-05, "loss": 0.9521, "step": 7130 }, { "epoch": 1.4496848953039236, "grad_norm": 0.14260385930538177, "learning_rate": 5.507983321468525e-05, "loss": 0.8399, "step": 7131 }, { "epoch": 1.449888188656231, "grad_norm": 0.14267964661121368, "learning_rate": 5.505949354215397e-05, "loss": 0.925, "step": 7132 }, { "epoch": 1.4500914820085384, "grad_norm": 0.1477990448474884, "learning_rate": 5.5039153869622704e-05, "loss": 0.9636, "step": 7133 }, { "epoch": 1.4502947753608457, "grad_norm": 0.16236381232738495, "learning_rate": 5.501881419709143e-05, "loss": 1.1057, "step": 7134 }, { "epoch": 1.450498068713153, "grad_norm": 0.1601257473230362, "learning_rate": 5.499847452456016e-05, "loss": 1.1647, "step": 7135 }, { "epoch": 1.4507013620654605, "grad_norm": 0.15447348356246948, "learning_rate": 5.4978134852028884e-05, "loss": 1.0227, "step": 7136 }, { "epoch": 1.4509046554177678, "grad_norm": 0.15148869156837463, "learning_rate": 5.4957795179497615e-05, "loss": 0.9926, "step": 7137 }, { "epoch": 1.4511079487700753, "grad_norm": 0.1739932894706726, "learning_rate": 5.493745550696634e-05, "loss": 1.1065, "step": 7138 }, { "epoch": 1.4513112421223826, "grad_norm": 0.14690914750099182, "learning_rate": 5.491711583443507e-05, "loss": 0.9786, "step": 7139 }, { "epoch": 1.45151453547469, "grad_norm": 0.15842650830745697, "learning_rate": 5.4896776161903796e-05, "loss": 1.06, "step": 7140 }, { "epoch": 1.4517178288269974, "grad_norm": 0.1690451055765152, "learning_rate": 5.487643648937253e-05, "loss": 1.0769, "step": 7141 }, { "epoch": 1.4519211221793047, "grad_norm": 0.14513088762760162, "learning_rate": 5.485609681684125e-05, "loss": 0.9437, "step": 7142 }, { "epoch": 1.4521244155316122, "grad_norm": 0.15545345842838287, "learning_rate": 5.4835757144309976e-05, "loss": 0.8904, "step": 7143 }, { "epoch": 1.4523277088839195, "grad_norm": 0.1784391701221466, "learning_rate": 5.481541747177871e-05, "loss": 1.2087, "step": 7144 }, { "epoch": 1.4525310022362268, "grad_norm": 0.14300981163978577, "learning_rate": 5.479507779924743e-05, "loss": 0.9506, "step": 7145 }, { "epoch": 1.4527342955885343, "grad_norm": 0.18284142017364502, "learning_rate": 5.477473812671616e-05, "loss": 1.178, "step": 7146 }, { "epoch": 1.4529375889408416, "grad_norm": 0.16167539358139038, "learning_rate": 5.475439845418489e-05, "loss": 0.9989, "step": 7147 }, { "epoch": 1.453140882293149, "grad_norm": 0.17664743959903717, "learning_rate": 5.473405878165362e-05, "loss": 1.0911, "step": 7148 }, { "epoch": 1.4533441756454564, "grad_norm": 0.17195507884025574, "learning_rate": 5.471371910912234e-05, "loss": 1.2898, "step": 7149 }, { "epoch": 1.4535474689977637, "grad_norm": 0.15981166064739227, "learning_rate": 5.4693379436591074e-05, "loss": 1.0563, "step": 7150 }, { "epoch": 1.4537507623500712, "grad_norm": 0.16478121280670166, "learning_rate": 5.46730397640598e-05, "loss": 1.0171, "step": 7151 }, { "epoch": 1.4539540557023785, "grad_norm": 0.1612006425857544, "learning_rate": 5.465270009152853e-05, "loss": 0.9867, "step": 7152 }, { "epoch": 1.454157349054686, "grad_norm": 0.15977860987186432, "learning_rate": 5.4632360418997254e-05, "loss": 1.0997, "step": 7153 }, { "epoch": 1.4543606424069933, "grad_norm": 0.16326823830604553, "learning_rate": 5.4612020746465985e-05, "loss": 0.881, "step": 7154 }, { "epoch": 1.4545639357593005, "grad_norm": 0.13994139432907104, "learning_rate": 5.459168107393471e-05, "loss": 0.9196, "step": 7155 }, { "epoch": 1.454767229111608, "grad_norm": 0.1567743569612503, "learning_rate": 5.457134140140344e-05, "loss": 1.0773, "step": 7156 }, { "epoch": 1.4549705224639156, "grad_norm": 0.15339693427085876, "learning_rate": 5.4551001728872166e-05, "loss": 0.9771, "step": 7157 }, { "epoch": 1.4551738158162228, "grad_norm": 0.1575535386800766, "learning_rate": 5.45306620563409e-05, "loss": 1.0254, "step": 7158 }, { "epoch": 1.4553771091685301, "grad_norm": 0.14848507940769196, "learning_rate": 5.451032238380962e-05, "loss": 0.9372, "step": 7159 }, { "epoch": 1.4555804025208376, "grad_norm": 0.14759337902069092, "learning_rate": 5.448998271127835e-05, "loss": 1.083, "step": 7160 }, { "epoch": 1.455783695873145, "grad_norm": 0.15212364494800568, "learning_rate": 5.446964303874708e-05, "loss": 1.0207, "step": 7161 }, { "epoch": 1.4559869892254524, "grad_norm": 0.16381527483463287, "learning_rate": 5.444930336621581e-05, "loss": 1.056, "step": 7162 }, { "epoch": 1.4561902825777597, "grad_norm": 0.14709602296352386, "learning_rate": 5.442896369368453e-05, "loss": 1.0069, "step": 7163 }, { "epoch": 1.456393575930067, "grad_norm": 0.1730957329273224, "learning_rate": 5.4408624021153264e-05, "loss": 1.2098, "step": 7164 }, { "epoch": 1.4565968692823745, "grad_norm": 0.16779474914073944, "learning_rate": 5.438828434862199e-05, "loss": 1.025, "step": 7165 }, { "epoch": 1.4568001626346818, "grad_norm": 0.15644671022891998, "learning_rate": 5.436794467609072e-05, "loss": 1.1397, "step": 7166 }, { "epoch": 1.4570034559869893, "grad_norm": 0.16888047754764557, "learning_rate": 5.4347605003559444e-05, "loss": 1.0564, "step": 7167 }, { "epoch": 1.4572067493392966, "grad_norm": 0.14582239091396332, "learning_rate": 5.4327265331028175e-05, "loss": 1.0946, "step": 7168 }, { "epoch": 1.457410042691604, "grad_norm": 0.17389684915542603, "learning_rate": 5.43069256584969e-05, "loss": 1.1177, "step": 7169 }, { "epoch": 1.4576133360439114, "grad_norm": 0.15936750173568726, "learning_rate": 5.428658598596563e-05, "loss": 0.8842, "step": 7170 }, { "epoch": 1.4578166293962187, "grad_norm": 0.14537377655506134, "learning_rate": 5.4266246313434355e-05, "loss": 0.9604, "step": 7171 }, { "epoch": 1.4580199227485262, "grad_norm": 0.1616072803735733, "learning_rate": 5.424590664090309e-05, "loss": 1.1369, "step": 7172 }, { "epoch": 1.4582232161008335, "grad_norm": 0.1477457731962204, "learning_rate": 5.422556696837181e-05, "loss": 0.9956, "step": 7173 }, { "epoch": 1.4584265094531408, "grad_norm": 0.1592814177274704, "learning_rate": 5.420522729584054e-05, "loss": 1.0028, "step": 7174 }, { "epoch": 1.4586298028054483, "grad_norm": 0.1648184210062027, "learning_rate": 5.418488762330927e-05, "loss": 1.0978, "step": 7175 }, { "epoch": 1.4588330961577556, "grad_norm": 0.15443973243236542, "learning_rate": 5.4164547950778e-05, "loss": 1.0634, "step": 7176 }, { "epoch": 1.459036389510063, "grad_norm": 0.15998506546020508, "learning_rate": 5.414420827824672e-05, "loss": 1.0185, "step": 7177 }, { "epoch": 1.4592396828623704, "grad_norm": 0.14759159088134766, "learning_rate": 5.4123868605715454e-05, "loss": 0.9069, "step": 7178 }, { "epoch": 1.4594429762146777, "grad_norm": 0.15868420898914337, "learning_rate": 5.410352893318418e-05, "loss": 1.0571, "step": 7179 }, { "epoch": 1.4596462695669852, "grad_norm": 0.15032470226287842, "learning_rate": 5.408318926065291e-05, "loss": 0.9006, "step": 7180 }, { "epoch": 1.4598495629192925, "grad_norm": 0.16723573207855225, "learning_rate": 5.4062849588121634e-05, "loss": 1.0758, "step": 7181 }, { "epoch": 1.4600528562716, "grad_norm": 0.17118899524211884, "learning_rate": 5.4042509915590365e-05, "loss": 1.0273, "step": 7182 }, { "epoch": 1.4602561496239073, "grad_norm": 0.17768427729606628, "learning_rate": 5.402217024305909e-05, "loss": 1.1246, "step": 7183 }, { "epoch": 1.4604594429762146, "grad_norm": 0.14075639843940735, "learning_rate": 5.4001830570527814e-05, "loss": 0.874, "step": 7184 }, { "epoch": 1.460662736328522, "grad_norm": 0.15958434343338013, "learning_rate": 5.3981490897996545e-05, "loss": 1.0438, "step": 7185 }, { "epoch": 1.4608660296808296, "grad_norm": 0.14858020842075348, "learning_rate": 5.396115122546527e-05, "loss": 1.0044, "step": 7186 }, { "epoch": 1.4610693230331369, "grad_norm": 0.15487819910049438, "learning_rate": 5.3940811552934e-05, "loss": 1.0189, "step": 7187 }, { "epoch": 1.4612726163854441, "grad_norm": 0.14576295018196106, "learning_rate": 5.3920471880402726e-05, "loss": 0.998, "step": 7188 }, { "epoch": 1.4614759097377517, "grad_norm": 0.1598920375108719, "learning_rate": 5.390013220787146e-05, "loss": 1.0848, "step": 7189 }, { "epoch": 1.461679203090059, "grad_norm": 0.18071123957633972, "learning_rate": 5.387979253534018e-05, "loss": 1.274, "step": 7190 }, { "epoch": 1.4618824964423665, "grad_norm": 0.15929163992404938, "learning_rate": 5.385945286280891e-05, "loss": 1.0846, "step": 7191 }, { "epoch": 1.4620857897946737, "grad_norm": 0.17697520554065704, "learning_rate": 5.383911319027764e-05, "loss": 1.2681, "step": 7192 }, { "epoch": 1.462289083146981, "grad_norm": 0.1605558842420578, "learning_rate": 5.381877351774637e-05, "loss": 1.0593, "step": 7193 }, { "epoch": 1.4624923764992885, "grad_norm": 0.16298873722553253, "learning_rate": 5.379843384521509e-05, "loss": 1.0556, "step": 7194 }, { "epoch": 1.4626956698515958, "grad_norm": 0.1575620025396347, "learning_rate": 5.3778094172683824e-05, "loss": 0.9694, "step": 7195 }, { "epoch": 1.4628989632039033, "grad_norm": 0.14187492430210114, "learning_rate": 5.375775450015255e-05, "loss": 0.8606, "step": 7196 }, { "epoch": 1.4631022565562106, "grad_norm": 0.13117793202400208, "learning_rate": 5.373741482762128e-05, "loss": 0.9196, "step": 7197 }, { "epoch": 1.463305549908518, "grad_norm": 0.153340682387352, "learning_rate": 5.3717075155090004e-05, "loss": 0.9895, "step": 7198 }, { "epoch": 1.4635088432608254, "grad_norm": 0.15271683037281036, "learning_rate": 5.3696735482558735e-05, "loss": 1.0878, "step": 7199 }, { "epoch": 1.4637121366131327, "grad_norm": 0.1467808485031128, "learning_rate": 5.367639581002746e-05, "loss": 0.8338, "step": 7200 }, { "epoch": 1.4639154299654402, "grad_norm": 0.15968874096870422, "learning_rate": 5.365605613749619e-05, "loss": 0.853, "step": 7201 }, { "epoch": 1.4641187233177475, "grad_norm": 0.16309960186481476, "learning_rate": 5.3635716464964915e-05, "loss": 1.1147, "step": 7202 }, { "epoch": 1.4643220166700548, "grad_norm": 0.14612539112567902, "learning_rate": 5.361537679243365e-05, "loss": 0.9917, "step": 7203 }, { "epoch": 1.4645253100223623, "grad_norm": 0.15759453177452087, "learning_rate": 5.359503711990237e-05, "loss": 0.8709, "step": 7204 }, { "epoch": 1.4647286033746696, "grad_norm": 0.14185784757137299, "learning_rate": 5.35746974473711e-05, "loss": 0.828, "step": 7205 }, { "epoch": 1.464931896726977, "grad_norm": 0.15282003581523895, "learning_rate": 5.355435777483983e-05, "loss": 1.0519, "step": 7206 }, { "epoch": 1.4651351900792844, "grad_norm": 0.1477069854736328, "learning_rate": 5.353401810230856e-05, "loss": 0.8896, "step": 7207 }, { "epoch": 1.4653384834315917, "grad_norm": 0.14850087463855743, "learning_rate": 5.351367842977728e-05, "loss": 0.9474, "step": 7208 }, { "epoch": 1.4655417767838992, "grad_norm": 0.16145406663417816, "learning_rate": 5.3493338757246014e-05, "loss": 1.0498, "step": 7209 }, { "epoch": 1.4657450701362065, "grad_norm": 0.14072728157043457, "learning_rate": 5.347299908471474e-05, "loss": 0.9445, "step": 7210 }, { "epoch": 1.465948363488514, "grad_norm": 0.13681919872760773, "learning_rate": 5.345265941218347e-05, "loss": 0.8509, "step": 7211 }, { "epoch": 1.4661516568408213, "grad_norm": 0.16994976997375488, "learning_rate": 5.3432319739652194e-05, "loss": 1.093, "step": 7212 }, { "epoch": 1.4663549501931286, "grad_norm": 0.15965984761714935, "learning_rate": 5.3411980067120925e-05, "loss": 0.929, "step": 7213 }, { "epoch": 1.466558243545436, "grad_norm": 0.15343396365642548, "learning_rate": 5.339164039458965e-05, "loss": 0.9733, "step": 7214 }, { "epoch": 1.4667615368977436, "grad_norm": 0.1533103734254837, "learning_rate": 5.337130072205838e-05, "loss": 0.9694, "step": 7215 }, { "epoch": 1.4669648302500509, "grad_norm": 0.14872144162654877, "learning_rate": 5.3350961049527105e-05, "loss": 1.0066, "step": 7216 }, { "epoch": 1.4671681236023582, "grad_norm": 0.1545114368200302, "learning_rate": 5.3330621376995837e-05, "loss": 1.0491, "step": 7217 }, { "epoch": 1.4673714169546657, "grad_norm": 0.1492878943681717, "learning_rate": 5.331028170446456e-05, "loss": 0.9975, "step": 7218 }, { "epoch": 1.467574710306973, "grad_norm": 0.14209310710430145, "learning_rate": 5.328994203193329e-05, "loss": 0.9145, "step": 7219 }, { "epoch": 1.4677780036592805, "grad_norm": 0.14603732526302338, "learning_rate": 5.326960235940202e-05, "loss": 0.9558, "step": 7220 }, { "epoch": 1.4679812970115877, "grad_norm": 0.161206915974617, "learning_rate": 5.324926268687075e-05, "loss": 1.0212, "step": 7221 }, { "epoch": 1.468184590363895, "grad_norm": 0.18879006803035736, "learning_rate": 5.322892301433947e-05, "loss": 1.1686, "step": 7222 }, { "epoch": 1.4683878837162025, "grad_norm": 0.16243679821491241, "learning_rate": 5.3208583341808204e-05, "loss": 1.1121, "step": 7223 }, { "epoch": 1.4685911770685098, "grad_norm": 0.15002648532390594, "learning_rate": 5.318824366927693e-05, "loss": 0.9707, "step": 7224 }, { "epoch": 1.4687944704208173, "grad_norm": 0.167652428150177, "learning_rate": 5.316790399674566e-05, "loss": 1.2484, "step": 7225 }, { "epoch": 1.4689977637731246, "grad_norm": 0.15510743856430054, "learning_rate": 5.3147564324214384e-05, "loss": 1.0713, "step": 7226 }, { "epoch": 1.469201057125432, "grad_norm": 0.14573627710342407, "learning_rate": 5.312722465168311e-05, "loss": 0.9877, "step": 7227 }, { "epoch": 1.4694043504777394, "grad_norm": 0.15572385489940643, "learning_rate": 5.310688497915184e-05, "loss": 1.0008, "step": 7228 }, { "epoch": 1.4696076438300467, "grad_norm": 0.1423737108707428, "learning_rate": 5.3086545306620564e-05, "loss": 0.8672, "step": 7229 }, { "epoch": 1.4698109371823542, "grad_norm": 0.16984347999095917, "learning_rate": 5.3066205634089295e-05, "loss": 1.0749, "step": 7230 }, { "epoch": 1.4700142305346615, "grad_norm": 0.15581457316875458, "learning_rate": 5.304586596155802e-05, "loss": 0.9514, "step": 7231 }, { "epoch": 1.4702175238869688, "grad_norm": 0.16326789557933807, "learning_rate": 5.302552628902675e-05, "loss": 0.9513, "step": 7232 }, { "epoch": 1.4704208172392763, "grad_norm": 0.1650605946779251, "learning_rate": 5.3005186616495475e-05, "loss": 0.964, "step": 7233 }, { "epoch": 1.4706241105915836, "grad_norm": 0.17731869220733643, "learning_rate": 5.2984846943964207e-05, "loss": 1.0705, "step": 7234 }, { "epoch": 1.470827403943891, "grad_norm": 0.14582465589046478, "learning_rate": 5.296450727143293e-05, "loss": 0.9405, "step": 7235 }, { "epoch": 1.4710306972961984, "grad_norm": 0.158736914396286, "learning_rate": 5.294416759890166e-05, "loss": 1.0272, "step": 7236 }, { "epoch": 1.4712339906485057, "grad_norm": 0.13593992590904236, "learning_rate": 5.292382792637039e-05, "loss": 0.9333, "step": 7237 }, { "epoch": 1.4714372840008132, "grad_norm": 0.17388807237148285, "learning_rate": 5.290348825383912e-05, "loss": 1.0486, "step": 7238 }, { "epoch": 1.4716405773531205, "grad_norm": 0.14610832929611206, "learning_rate": 5.288314858130784e-05, "loss": 0.7977, "step": 7239 }, { "epoch": 1.471843870705428, "grad_norm": 0.16025376319885254, "learning_rate": 5.2862808908776574e-05, "loss": 0.9542, "step": 7240 }, { "epoch": 1.4720471640577353, "grad_norm": 0.15763407945632935, "learning_rate": 5.28424692362453e-05, "loss": 1.0019, "step": 7241 }, { "epoch": 1.4722504574100426, "grad_norm": 0.1741354763507843, "learning_rate": 5.282212956371403e-05, "loss": 1.1706, "step": 7242 }, { "epoch": 1.47245375076235, "grad_norm": 0.14979106187820435, "learning_rate": 5.2801789891182754e-05, "loss": 0.9875, "step": 7243 }, { "epoch": 1.4726570441146576, "grad_norm": 0.15838104486465454, "learning_rate": 5.2781450218651485e-05, "loss": 0.9881, "step": 7244 }, { "epoch": 1.4728603374669649, "grad_norm": 0.17783360183238983, "learning_rate": 5.276111054612021e-05, "loss": 1.1805, "step": 7245 }, { "epoch": 1.4730636308192722, "grad_norm": 0.15877732634544373, "learning_rate": 5.274077087358894e-05, "loss": 1.0377, "step": 7246 }, { "epoch": 1.4732669241715795, "grad_norm": 0.159766286611557, "learning_rate": 5.2720431201057665e-05, "loss": 1.0324, "step": 7247 }, { "epoch": 1.473470217523887, "grad_norm": 0.1405743509531021, "learning_rate": 5.2700091528526396e-05, "loss": 0.8722, "step": 7248 }, { "epoch": 1.4736735108761945, "grad_norm": 0.1469542533159256, "learning_rate": 5.267975185599512e-05, "loss": 1.0329, "step": 7249 }, { "epoch": 1.4738768042285018, "grad_norm": 0.14367324113845825, "learning_rate": 5.265941218346385e-05, "loss": 0.9612, "step": 7250 }, { "epoch": 1.474080097580809, "grad_norm": 0.17195919156074524, "learning_rate": 5.263907251093258e-05, "loss": 1.1384, "step": 7251 }, { "epoch": 1.4742833909331166, "grad_norm": 0.1590740978717804, "learning_rate": 5.261873283840131e-05, "loss": 1.0834, "step": 7252 }, { "epoch": 1.4744866842854238, "grad_norm": 0.17466309666633606, "learning_rate": 5.259839316587003e-05, "loss": 1.0824, "step": 7253 }, { "epoch": 1.4746899776377314, "grad_norm": 0.15977461636066437, "learning_rate": 5.2578053493338764e-05, "loss": 1.0415, "step": 7254 }, { "epoch": 1.4748932709900386, "grad_norm": 0.15965703129768372, "learning_rate": 5.255771382080749e-05, "loss": 1.0812, "step": 7255 }, { "epoch": 1.475096564342346, "grad_norm": 0.17108100652694702, "learning_rate": 5.253737414827622e-05, "loss": 1.2236, "step": 7256 }, { "epoch": 1.4752998576946534, "grad_norm": 0.1626596599817276, "learning_rate": 5.2517034475744944e-05, "loss": 1.1297, "step": 7257 }, { "epoch": 1.4755031510469607, "grad_norm": 0.16700440645217896, "learning_rate": 5.2496694803213675e-05, "loss": 1.1175, "step": 7258 }, { "epoch": 1.4757064443992682, "grad_norm": 0.15646840631961823, "learning_rate": 5.24763551306824e-05, "loss": 0.9831, "step": 7259 }, { "epoch": 1.4759097377515755, "grad_norm": 0.14843416213989258, "learning_rate": 5.245601545815113e-05, "loss": 0.9445, "step": 7260 }, { "epoch": 1.4761130311038828, "grad_norm": 0.13760924339294434, "learning_rate": 5.2435675785619855e-05, "loss": 0.9984, "step": 7261 }, { "epoch": 1.4763163244561903, "grad_norm": 0.16428524255752563, "learning_rate": 5.2415336113088586e-05, "loss": 1.0017, "step": 7262 }, { "epoch": 1.4765196178084976, "grad_norm": 0.14717566967010498, "learning_rate": 5.239499644055731e-05, "loss": 0.8459, "step": 7263 }, { "epoch": 1.4767229111608051, "grad_norm": 0.16615287959575653, "learning_rate": 5.237465676802604e-05, "loss": 1.0156, "step": 7264 }, { "epoch": 1.4769262045131124, "grad_norm": 0.14632081985473633, "learning_rate": 5.2354317095494767e-05, "loss": 0.9946, "step": 7265 }, { "epoch": 1.4771294978654197, "grad_norm": 0.1523721069097519, "learning_rate": 5.23339774229635e-05, "loss": 0.9279, "step": 7266 }, { "epoch": 1.4773327912177272, "grad_norm": 0.16455121338367462, "learning_rate": 5.231363775043222e-05, "loss": 1.1135, "step": 7267 }, { "epoch": 1.4775360845700345, "grad_norm": 0.1441405862569809, "learning_rate": 5.229329807790094e-05, "loss": 0.902, "step": 7268 }, { "epoch": 1.477739377922342, "grad_norm": 0.1584034264087677, "learning_rate": 5.227295840536968e-05, "loss": 1.0865, "step": 7269 }, { "epoch": 1.4779426712746493, "grad_norm": 0.14937792718410492, "learning_rate": 5.2252618732838396e-05, "loss": 1.0402, "step": 7270 }, { "epoch": 1.4781459646269566, "grad_norm": 0.15211902558803558, "learning_rate": 5.2232279060307134e-05, "loss": 1.1008, "step": 7271 }, { "epoch": 1.478349257979264, "grad_norm": 0.1474466472864151, "learning_rate": 5.221193938777585e-05, "loss": 0.9956, "step": 7272 }, { "epoch": 1.4785525513315716, "grad_norm": 0.1490325927734375, "learning_rate": 5.219159971524459e-05, "loss": 0.988, "step": 7273 }, { "epoch": 1.4787558446838789, "grad_norm": 0.18194523453712463, "learning_rate": 5.217126004271331e-05, "loss": 1.0924, "step": 7274 }, { "epoch": 1.4789591380361862, "grad_norm": 0.16092152893543243, "learning_rate": 5.2150920370182045e-05, "loss": 0.9684, "step": 7275 }, { "epoch": 1.4791624313884935, "grad_norm": 0.13924293220043182, "learning_rate": 5.213058069765076e-05, "loss": 0.8937, "step": 7276 }, { "epoch": 1.479365724740801, "grad_norm": 0.16983318328857422, "learning_rate": 5.21102410251195e-05, "loss": 1.0294, "step": 7277 }, { "epoch": 1.4795690180931085, "grad_norm": 0.1642337441444397, "learning_rate": 5.208990135258822e-05, "loss": 1.0405, "step": 7278 }, { "epoch": 1.4797723114454158, "grad_norm": 0.14544382691383362, "learning_rate": 5.2069561680056956e-05, "loss": 1.0347, "step": 7279 }, { "epoch": 1.479975604797723, "grad_norm": 0.16924284398555756, "learning_rate": 5.204922200752568e-05, "loss": 1.1748, "step": 7280 }, { "epoch": 1.4801788981500306, "grad_norm": 0.13653366267681122, "learning_rate": 5.202888233499441e-05, "loss": 0.8597, "step": 7281 }, { "epoch": 1.4803821915023379, "grad_norm": 0.16047342121601105, "learning_rate": 5.2008542662463137e-05, "loss": 0.9599, "step": 7282 }, { "epoch": 1.4805854848546454, "grad_norm": 0.16013282537460327, "learning_rate": 5.198820298993187e-05, "loss": 0.9445, "step": 7283 }, { "epoch": 1.4807887782069526, "grad_norm": 0.15885138511657715, "learning_rate": 5.196786331740059e-05, "loss": 0.9222, "step": 7284 }, { "epoch": 1.48099207155926, "grad_norm": 0.15349248051643372, "learning_rate": 5.1947523644869324e-05, "loss": 1.096, "step": 7285 }, { "epoch": 1.4811953649115674, "grad_norm": 0.165516197681427, "learning_rate": 5.192718397233805e-05, "loss": 1.0377, "step": 7286 }, { "epoch": 1.4813986582638747, "grad_norm": 0.13928279280662537, "learning_rate": 5.190684429980678e-05, "loss": 0.8888, "step": 7287 }, { "epoch": 1.4816019516161822, "grad_norm": 0.1546129435300827, "learning_rate": 5.1886504627275504e-05, "loss": 0.9901, "step": 7288 }, { "epoch": 1.4818052449684895, "grad_norm": 0.14182148873806, "learning_rate": 5.1866164954744235e-05, "loss": 0.873, "step": 7289 }, { "epoch": 1.4820085383207968, "grad_norm": 0.14797116816043854, "learning_rate": 5.184582528221296e-05, "loss": 0.9726, "step": 7290 }, { "epoch": 1.4822118316731043, "grad_norm": 0.16081807017326355, "learning_rate": 5.182548560968169e-05, "loss": 1.1054, "step": 7291 }, { "epoch": 1.4824151250254116, "grad_norm": 0.14762349426746368, "learning_rate": 5.1805145937150415e-05, "loss": 1.0579, "step": 7292 }, { "epoch": 1.4826184183777191, "grad_norm": 0.14200522005558014, "learning_rate": 5.1784806264619146e-05, "loss": 0.8272, "step": 7293 }, { "epoch": 1.4828217117300264, "grad_norm": 0.1498144418001175, "learning_rate": 5.176446659208787e-05, "loss": 1.0083, "step": 7294 }, { "epoch": 1.4830250050823337, "grad_norm": 0.13944904506206512, "learning_rate": 5.17441269195566e-05, "loss": 0.8614, "step": 7295 }, { "epoch": 1.4832282984346412, "grad_norm": 0.16318222880363464, "learning_rate": 5.1723787247025326e-05, "loss": 1.0711, "step": 7296 }, { "epoch": 1.4834315917869485, "grad_norm": 0.15789909660816193, "learning_rate": 5.170344757449406e-05, "loss": 0.9389, "step": 7297 }, { "epoch": 1.483634885139256, "grad_norm": 0.15837034583091736, "learning_rate": 5.168310790196278e-05, "loss": 1.0717, "step": 7298 }, { "epoch": 1.4838381784915633, "grad_norm": 0.1504717320203781, "learning_rate": 5.1662768229431513e-05, "loss": 1.033, "step": 7299 }, { "epoch": 1.4840414718438706, "grad_norm": 0.13542811572551727, "learning_rate": 5.164242855690024e-05, "loss": 0.9785, "step": 7300 }, { "epoch": 1.484244765196178, "grad_norm": 0.17283979058265686, "learning_rate": 5.162208888436897e-05, "loss": 1.0065, "step": 7301 }, { "epoch": 1.4844480585484854, "grad_norm": 0.14844317734241486, "learning_rate": 5.1601749211837694e-05, "loss": 0.9527, "step": 7302 }, { "epoch": 1.484651351900793, "grad_norm": 0.13952547311782837, "learning_rate": 5.1581409539306425e-05, "loss": 0.879, "step": 7303 }, { "epoch": 1.4848546452531002, "grad_norm": 0.1655363291501999, "learning_rate": 5.156106986677515e-05, "loss": 1.0691, "step": 7304 }, { "epoch": 1.4850579386054075, "grad_norm": 0.14126764237880707, "learning_rate": 5.154073019424388e-05, "loss": 0.9801, "step": 7305 }, { "epoch": 1.485261231957715, "grad_norm": 0.14697974920272827, "learning_rate": 5.1520390521712605e-05, "loss": 1.0006, "step": 7306 }, { "epoch": 1.4854645253100225, "grad_norm": 0.14936833083629608, "learning_rate": 5.1500050849181336e-05, "loss": 1.0641, "step": 7307 }, { "epoch": 1.4856678186623298, "grad_norm": 0.15351562201976776, "learning_rate": 5.147971117665006e-05, "loss": 0.8251, "step": 7308 }, { "epoch": 1.485871112014637, "grad_norm": 0.14514027535915375, "learning_rate": 5.145937150411878e-05, "loss": 0.9151, "step": 7309 }, { "epoch": 1.4860744053669446, "grad_norm": 0.16834424436092377, "learning_rate": 5.1439031831587516e-05, "loss": 1.1145, "step": 7310 }, { "epoch": 1.4862776987192519, "grad_norm": 0.16079841554164886, "learning_rate": 5.1418692159056234e-05, "loss": 1.1091, "step": 7311 }, { "epoch": 1.4864809920715594, "grad_norm": 0.16498316824436188, "learning_rate": 5.139835248652497e-05, "loss": 1.1002, "step": 7312 }, { "epoch": 1.4866842854238667, "grad_norm": 0.13437385857105255, "learning_rate": 5.137801281399369e-05, "loss": 0.9243, "step": 7313 }, { "epoch": 1.486887578776174, "grad_norm": 0.14202702045440674, "learning_rate": 5.135767314146243e-05, "loss": 0.9927, "step": 7314 }, { "epoch": 1.4870908721284815, "grad_norm": 0.1714879423379898, "learning_rate": 5.1337333468931145e-05, "loss": 0.922, "step": 7315 }, { "epoch": 1.4872941654807887, "grad_norm": 0.14939868450164795, "learning_rate": 5.1316993796399883e-05, "loss": 0.9081, "step": 7316 }, { "epoch": 1.4874974588330963, "grad_norm": 0.15223632752895355, "learning_rate": 5.12966541238686e-05, "loss": 0.9435, "step": 7317 }, { "epoch": 1.4877007521854035, "grad_norm": 0.1733781099319458, "learning_rate": 5.127631445133734e-05, "loss": 1.1178, "step": 7318 }, { "epoch": 1.4879040455377108, "grad_norm": 0.18424783647060394, "learning_rate": 5.125597477880606e-05, "loss": 1.1653, "step": 7319 }, { "epoch": 1.4881073388900183, "grad_norm": 0.16070927679538727, "learning_rate": 5.1235635106274795e-05, "loss": 1.1551, "step": 7320 }, { "epoch": 1.4883106322423256, "grad_norm": 0.1407182812690735, "learning_rate": 5.121529543374351e-05, "loss": 0.9149, "step": 7321 }, { "epoch": 1.4885139255946331, "grad_norm": 0.1472930908203125, "learning_rate": 5.119495576121225e-05, "loss": 1.0594, "step": 7322 }, { "epoch": 1.4887172189469404, "grad_norm": 0.17095035314559937, "learning_rate": 5.117461608868097e-05, "loss": 1.1867, "step": 7323 }, { "epoch": 1.4889205122992477, "grad_norm": 0.15441139042377472, "learning_rate": 5.1154276416149706e-05, "loss": 1.0417, "step": 7324 }, { "epoch": 1.4891238056515552, "grad_norm": 0.16108761727809906, "learning_rate": 5.1133936743618424e-05, "loss": 0.994, "step": 7325 }, { "epoch": 1.4893270990038625, "grad_norm": 0.1708153337240219, "learning_rate": 5.111359707108716e-05, "loss": 1.1945, "step": 7326 }, { "epoch": 1.48953039235617, "grad_norm": 0.15345723927021027, "learning_rate": 5.109325739855588e-05, "loss": 0.99, "step": 7327 }, { "epoch": 1.4897336857084773, "grad_norm": 0.19498048722743988, "learning_rate": 5.107291772602462e-05, "loss": 1.1735, "step": 7328 }, { "epoch": 1.4899369790607846, "grad_norm": 0.1828971654176712, "learning_rate": 5.1052578053493335e-05, "loss": 0.9769, "step": 7329 }, { "epoch": 1.490140272413092, "grad_norm": 0.15867459774017334, "learning_rate": 5.103223838096207e-05, "loss": 1.0853, "step": 7330 }, { "epoch": 1.4903435657653994, "grad_norm": 0.14858923852443695, "learning_rate": 5.101189870843079e-05, "loss": 0.9781, "step": 7331 }, { "epoch": 1.490546859117707, "grad_norm": 0.1680002361536026, "learning_rate": 5.099155903589953e-05, "loss": 1.1203, "step": 7332 }, { "epoch": 1.4907501524700142, "grad_norm": 0.1647680550813675, "learning_rate": 5.097121936336825e-05, "loss": 0.9581, "step": 7333 }, { "epoch": 1.4909534458223215, "grad_norm": 0.1701551079750061, "learning_rate": 5.0950879690836985e-05, "loss": 1.0304, "step": 7334 }, { "epoch": 1.491156739174629, "grad_norm": 0.14726072549819946, "learning_rate": 5.09305400183057e-05, "loss": 0.9659, "step": 7335 }, { "epoch": 1.4913600325269365, "grad_norm": 0.15449485182762146, "learning_rate": 5.091020034577444e-05, "loss": 1.0142, "step": 7336 }, { "epoch": 1.4915633258792438, "grad_norm": 0.17248129844665527, "learning_rate": 5.0889860673243165e-05, "loss": 1.1532, "step": 7337 }, { "epoch": 1.491766619231551, "grad_norm": 0.1599951684474945, "learning_rate": 5.0869521000711896e-05, "loss": 1.0177, "step": 7338 }, { "epoch": 1.4919699125838586, "grad_norm": 0.17572374641895294, "learning_rate": 5.084918132818062e-05, "loss": 1.1042, "step": 7339 }, { "epoch": 1.4921732059361659, "grad_norm": 0.16550900042057037, "learning_rate": 5.082884165564935e-05, "loss": 0.9186, "step": 7340 }, { "epoch": 1.4923764992884734, "grad_norm": 0.15446113049983978, "learning_rate": 5.0808501983118076e-05, "loss": 1.0586, "step": 7341 }, { "epoch": 1.4925797926407807, "grad_norm": 0.17314577102661133, "learning_rate": 5.078816231058681e-05, "loss": 1.0935, "step": 7342 }, { "epoch": 1.492783085993088, "grad_norm": 0.16413532197475433, "learning_rate": 5.076782263805553e-05, "loss": 1.1736, "step": 7343 }, { "epoch": 1.4929863793453955, "grad_norm": 0.1834377646446228, "learning_rate": 5.074748296552426e-05, "loss": 1.1242, "step": 7344 }, { "epoch": 1.4931896726977028, "grad_norm": 0.16163167357444763, "learning_rate": 5.072714329299299e-05, "loss": 1.1114, "step": 7345 }, { "epoch": 1.4933929660500103, "grad_norm": 0.14689160883426666, "learning_rate": 5.070680362046172e-05, "loss": 0.9022, "step": 7346 }, { "epoch": 1.4935962594023175, "grad_norm": 0.158969908952713, "learning_rate": 5.0686463947930443e-05, "loss": 1.069, "step": 7347 }, { "epoch": 1.4937995527546248, "grad_norm": 0.1602858155965805, "learning_rate": 5.0666124275399175e-05, "loss": 1.0554, "step": 7348 }, { "epoch": 1.4940028461069323, "grad_norm": 0.16024748980998993, "learning_rate": 5.06457846028679e-05, "loss": 1.0337, "step": 7349 }, { "epoch": 1.4942061394592396, "grad_norm": 0.14937053620815277, "learning_rate": 5.062544493033662e-05, "loss": 1.019, "step": 7350 }, { "epoch": 1.4944094328115471, "grad_norm": 0.16690638661384583, "learning_rate": 5.0605105257805355e-05, "loss": 1.1478, "step": 7351 }, { "epoch": 1.4946127261638544, "grad_norm": 0.162098228931427, "learning_rate": 5.058476558527407e-05, "loss": 1.026, "step": 7352 }, { "epoch": 1.4948160195161617, "grad_norm": 0.16602452099323273, "learning_rate": 5.056442591274281e-05, "loss": 0.9843, "step": 7353 }, { "epoch": 1.4950193128684692, "grad_norm": 0.15590983629226685, "learning_rate": 5.054408624021153e-05, "loss": 0.9234, "step": 7354 }, { "epoch": 1.4952226062207765, "grad_norm": 0.15621311962604523, "learning_rate": 5.0523746567680266e-05, "loss": 0.9961, "step": 7355 }, { "epoch": 1.495425899573084, "grad_norm": 0.14333868026733398, "learning_rate": 5.0503406895148984e-05, "loss": 0.9153, "step": 7356 }, { "epoch": 1.4956291929253913, "grad_norm": 0.19151797890663147, "learning_rate": 5.048306722261772e-05, "loss": 1.319, "step": 7357 }, { "epoch": 1.4958324862776986, "grad_norm": 0.15350469946861267, "learning_rate": 5.046272755008644e-05, "loss": 1.0148, "step": 7358 }, { "epoch": 1.4960357796300061, "grad_norm": 0.1540667861700058, "learning_rate": 5.044238787755518e-05, "loss": 0.9678, "step": 7359 }, { "epoch": 1.4962390729823134, "grad_norm": 0.1420007050037384, "learning_rate": 5.0422048205023895e-05, "loss": 1.0249, "step": 7360 }, { "epoch": 1.496442366334621, "grad_norm": 0.1667315810918808, "learning_rate": 5.040170853249263e-05, "loss": 1.1174, "step": 7361 }, { "epoch": 1.4966456596869282, "grad_norm": 0.13870370388031006, "learning_rate": 5.038136885996135e-05, "loss": 1.0738, "step": 7362 }, { "epoch": 1.4968489530392355, "grad_norm": 0.14800742268562317, "learning_rate": 5.036102918743009e-05, "loss": 0.9589, "step": 7363 }, { "epoch": 1.497052246391543, "grad_norm": 0.15649986267089844, "learning_rate": 5.034068951489881e-05, "loss": 0.9814, "step": 7364 }, { "epoch": 1.4972555397438505, "grad_norm": 0.15127186477184296, "learning_rate": 5.0320349842367545e-05, "loss": 0.9122, "step": 7365 }, { "epoch": 1.4974588330961578, "grad_norm": 0.14589205384254456, "learning_rate": 5.030001016983626e-05, "loss": 0.9516, "step": 7366 }, { "epoch": 1.497662126448465, "grad_norm": 0.14768987894058228, "learning_rate": 5.0279670497305e-05, "loss": 1.1394, "step": 7367 }, { "epoch": 1.4978654198007726, "grad_norm": 0.1471613049507141, "learning_rate": 5.025933082477372e-05, "loss": 0.8922, "step": 7368 }, { "epoch": 1.4980687131530799, "grad_norm": 0.15747645497322083, "learning_rate": 5.0238991152242456e-05, "loss": 1.0475, "step": 7369 }, { "epoch": 1.4982720065053874, "grad_norm": 0.16879907250404358, "learning_rate": 5.0218651479711174e-05, "loss": 1.2512, "step": 7370 }, { "epoch": 1.4984752998576947, "grad_norm": 0.1588255614042282, "learning_rate": 5.019831180717991e-05, "loss": 1.0016, "step": 7371 }, { "epoch": 1.498678593210002, "grad_norm": 0.1563660055398941, "learning_rate": 5.017797213464863e-05, "loss": 0.9547, "step": 7372 }, { "epoch": 1.4988818865623095, "grad_norm": 0.15730784833431244, "learning_rate": 5.015763246211737e-05, "loss": 0.9483, "step": 7373 }, { "epoch": 1.4990851799146168, "grad_norm": 0.14192761480808258, "learning_rate": 5.0137292789586085e-05, "loss": 0.9, "step": 7374 }, { "epoch": 1.4992884732669243, "grad_norm": 0.1614556461572647, "learning_rate": 5.011695311705482e-05, "loss": 1.0514, "step": 7375 }, { "epoch": 1.4994917666192316, "grad_norm": 0.133713498711586, "learning_rate": 5.009661344452354e-05, "loss": 0.9055, "step": 7376 }, { "epoch": 1.4996950599715388, "grad_norm": 0.16982313990592957, "learning_rate": 5.007627377199228e-05, "loss": 1.1643, "step": 7377 }, { "epoch": 1.4998983533238464, "grad_norm": 0.14745911955833435, "learning_rate": 5.0055934099460997e-05, "loss": 1.0721, "step": 7378 }, { "epoch": 1.5001016466761536, "grad_norm": 0.1416764110326767, "learning_rate": 5.0035594426929735e-05, "loss": 0.8848, "step": 7379 }, { "epoch": 1.5003049400284612, "grad_norm": 0.15031202137470245, "learning_rate": 5.001525475439845e-05, "loss": 0.9917, "step": 7380 }, { "epoch": 1.5005082333807684, "grad_norm": 0.1697656214237213, "learning_rate": 4.9994915081867184e-05, "loss": 1.0059, "step": 7381 }, { "epoch": 1.5007115267330757, "grad_norm": 0.16710081696510315, "learning_rate": 4.997457540933591e-05, "loss": 1.0506, "step": 7382 }, { "epoch": 1.5009148200853832, "grad_norm": 0.1541258841753006, "learning_rate": 4.995423573680464e-05, "loss": 1.0352, "step": 7383 }, { "epoch": 1.5011181134376907, "grad_norm": 0.1533016413450241, "learning_rate": 4.9933896064273364e-05, "loss": 1.0231, "step": 7384 }, { "epoch": 1.501321406789998, "grad_norm": 0.1787251979112625, "learning_rate": 4.9913556391742095e-05, "loss": 1.122, "step": 7385 }, { "epoch": 1.5015247001423053, "grad_norm": 0.15674886107444763, "learning_rate": 4.989321671921082e-05, "loss": 0.9164, "step": 7386 }, { "epoch": 1.5017279934946126, "grad_norm": 0.1377156525850296, "learning_rate": 4.987287704667955e-05, "loss": 0.8968, "step": 7387 }, { "epoch": 1.5019312868469201, "grad_norm": 0.1509624719619751, "learning_rate": 4.9852537374148275e-05, "loss": 0.9561, "step": 7388 }, { "epoch": 1.5021345801992276, "grad_norm": 0.1546659767627716, "learning_rate": 4.9832197701617006e-05, "loss": 0.9526, "step": 7389 }, { "epoch": 1.502337873551535, "grad_norm": 0.1705237329006195, "learning_rate": 4.981185802908573e-05, "loss": 1.1172, "step": 7390 }, { "epoch": 1.5025411669038422, "grad_norm": 0.16375771164894104, "learning_rate": 4.979151835655446e-05, "loss": 1.0827, "step": 7391 }, { "epoch": 1.5027444602561495, "grad_norm": 0.16147857904434204, "learning_rate": 4.9771178684023186e-05, "loss": 1.007, "step": 7392 }, { "epoch": 1.502947753608457, "grad_norm": 0.16066159307956696, "learning_rate": 4.975083901149192e-05, "loss": 1.1456, "step": 7393 }, { "epoch": 1.5031510469607645, "grad_norm": 0.15176479518413544, "learning_rate": 4.973049933896064e-05, "loss": 1.16, "step": 7394 }, { "epoch": 1.5033543403130718, "grad_norm": 0.19379957020282745, "learning_rate": 4.9710159666429373e-05, "loss": 1.0532, "step": 7395 }, { "epoch": 1.503557633665379, "grad_norm": 0.14722734689712524, "learning_rate": 4.9689819993898105e-05, "loss": 0.8578, "step": 7396 }, { "epoch": 1.5037609270176864, "grad_norm": 0.14202560484409332, "learning_rate": 4.966948032136683e-05, "loss": 0.8695, "step": 7397 }, { "epoch": 1.5039642203699939, "grad_norm": 0.14851757884025574, "learning_rate": 4.964914064883556e-05, "loss": 0.9655, "step": 7398 }, { "epoch": 1.5041675137223014, "grad_norm": 0.16248172521591187, "learning_rate": 4.9628800976304285e-05, "loss": 0.9906, "step": 7399 }, { "epoch": 1.5043708070746087, "grad_norm": 0.1500251442193985, "learning_rate": 4.9608461303773016e-05, "loss": 0.9284, "step": 7400 }, { "epoch": 1.504574100426916, "grad_norm": 0.17004640400409698, "learning_rate": 4.958812163124174e-05, "loss": 0.9729, "step": 7401 }, { "epoch": 1.5047773937792233, "grad_norm": 0.15960298478603363, "learning_rate": 4.956778195871047e-05, "loss": 1.0183, "step": 7402 }, { "epoch": 1.5049806871315308, "grad_norm": 0.15985259413719177, "learning_rate": 4.9547442286179196e-05, "loss": 0.9871, "step": 7403 }, { "epoch": 1.5051839804838383, "grad_norm": 0.12836353480815887, "learning_rate": 4.952710261364793e-05, "loss": 0.8302, "step": 7404 }, { "epoch": 1.5053872738361456, "grad_norm": 0.18904882669448853, "learning_rate": 4.950676294111665e-05, "loss": 1.2109, "step": 7405 }, { "epoch": 1.5055905671884529, "grad_norm": 0.16572581231594086, "learning_rate": 4.948642326858538e-05, "loss": 1.2165, "step": 7406 }, { "epoch": 1.5057938605407604, "grad_norm": 0.13275861740112305, "learning_rate": 4.946608359605411e-05, "loss": 0.8393, "step": 7407 }, { "epoch": 1.5059971538930677, "grad_norm": 0.15469948947429657, "learning_rate": 4.944574392352284e-05, "loss": 1.1301, "step": 7408 }, { "epoch": 1.5062004472453752, "grad_norm": 0.15814034640789032, "learning_rate": 4.942540425099156e-05, "loss": 1.0332, "step": 7409 }, { "epoch": 1.5064037405976825, "grad_norm": 0.14642979204654694, "learning_rate": 4.9405064578460295e-05, "loss": 0.9571, "step": 7410 }, { "epoch": 1.5066070339499897, "grad_norm": 0.13333429396152496, "learning_rate": 4.938472490592902e-05, "loss": 0.8927, "step": 7411 }, { "epoch": 1.5068103273022972, "grad_norm": 0.15922658145427704, "learning_rate": 4.9364385233397743e-05, "loss": 1.0756, "step": 7412 }, { "epoch": 1.5070136206546048, "grad_norm": 0.15627671778202057, "learning_rate": 4.934404556086647e-05, "loss": 0.8736, "step": 7413 }, { "epoch": 1.507216914006912, "grad_norm": 0.1477116048336029, "learning_rate": 4.93237058883352e-05, "loss": 0.9684, "step": 7414 }, { "epoch": 1.5074202073592193, "grad_norm": 0.15267114341259003, "learning_rate": 4.9303366215803924e-05, "loss": 0.9281, "step": 7415 }, { "epoch": 1.5076235007115266, "grad_norm": 0.14677409827709198, "learning_rate": 4.9283026543272655e-05, "loss": 1.0185, "step": 7416 }, { "epoch": 1.5078267940638341, "grad_norm": 0.1623264104127884, "learning_rate": 4.926268687074138e-05, "loss": 1.1089, "step": 7417 }, { "epoch": 1.5080300874161416, "grad_norm": 0.16942016780376434, "learning_rate": 4.924234719821011e-05, "loss": 1.2174, "step": 7418 }, { "epoch": 1.508233380768449, "grad_norm": 0.1675270050764084, "learning_rate": 4.9222007525678835e-05, "loss": 1.0502, "step": 7419 }, { "epoch": 1.5084366741207562, "grad_norm": 0.13707569241523743, "learning_rate": 4.9201667853147566e-05, "loss": 0.8527, "step": 7420 }, { "epoch": 1.5086399674730635, "grad_norm": 0.16163641214370728, "learning_rate": 4.918132818061629e-05, "loss": 1.0057, "step": 7421 }, { "epoch": 1.508843260825371, "grad_norm": 0.15098832547664642, "learning_rate": 4.916098850808502e-05, "loss": 0.9811, "step": 7422 }, { "epoch": 1.5090465541776785, "grad_norm": 0.16301870346069336, "learning_rate": 4.9140648835553746e-05, "loss": 1.1367, "step": 7423 }, { "epoch": 1.5092498475299858, "grad_norm": 0.16614048182964325, "learning_rate": 4.912030916302248e-05, "loss": 1.2153, "step": 7424 }, { "epoch": 1.509453140882293, "grad_norm": 0.14212031662464142, "learning_rate": 4.90999694904912e-05, "loss": 0.9264, "step": 7425 }, { "epoch": 1.5096564342346004, "grad_norm": 0.1504111886024475, "learning_rate": 4.907962981795993e-05, "loss": 0.928, "step": 7426 }, { "epoch": 1.509859727586908, "grad_norm": 0.1598103642463684, "learning_rate": 4.905929014542866e-05, "loss": 1.0198, "step": 7427 }, { "epoch": 1.5100630209392154, "grad_norm": 0.1484471559524536, "learning_rate": 4.903895047289739e-05, "loss": 0.9336, "step": 7428 }, { "epoch": 1.5102663142915227, "grad_norm": 0.14721804857254028, "learning_rate": 4.9018610800366114e-05, "loss": 0.8471, "step": 7429 }, { "epoch": 1.51046960764383, "grad_norm": 0.15284955501556396, "learning_rate": 4.8998271127834845e-05, "loss": 1.0969, "step": 7430 }, { "epoch": 1.5106729009961373, "grad_norm": 0.1515507698059082, "learning_rate": 4.897793145530357e-05, "loss": 1.0801, "step": 7431 }, { "epoch": 1.5108761943484448, "grad_norm": 0.15165692567825317, "learning_rate": 4.89575917827723e-05, "loss": 1.0073, "step": 7432 }, { "epoch": 1.5110794877007523, "grad_norm": 0.16193105280399323, "learning_rate": 4.8937252110241025e-05, "loss": 1.0088, "step": 7433 }, { "epoch": 1.5112827810530596, "grad_norm": 0.16101758182048798, "learning_rate": 4.8916912437709756e-05, "loss": 0.8903, "step": 7434 }, { "epoch": 1.5114860744053669, "grad_norm": 0.15236888825893402, "learning_rate": 4.889657276517848e-05, "loss": 0.9741, "step": 7435 }, { "epoch": 1.5116893677576744, "grad_norm": 0.15327027440071106, "learning_rate": 4.887623309264721e-05, "loss": 1.0023, "step": 7436 }, { "epoch": 1.5118926611099817, "grad_norm": 0.1660035252571106, "learning_rate": 4.8855893420115936e-05, "loss": 1.1632, "step": 7437 }, { "epoch": 1.5120959544622892, "grad_norm": 0.14389246702194214, "learning_rate": 4.883555374758467e-05, "loss": 0.8665, "step": 7438 }, { "epoch": 1.5122992478145965, "grad_norm": 0.15834391117095947, "learning_rate": 4.881521407505339e-05, "loss": 1.0066, "step": 7439 }, { "epoch": 1.5125025411669037, "grad_norm": 0.1547255963087082, "learning_rate": 4.879487440252212e-05, "loss": 0.9848, "step": 7440 }, { "epoch": 1.5127058345192113, "grad_norm": 0.1583252251148224, "learning_rate": 4.877453472999085e-05, "loss": 1.0239, "step": 7441 }, { "epoch": 1.5129091278715188, "grad_norm": 0.17128023505210876, "learning_rate": 4.875419505745958e-05, "loss": 1.0968, "step": 7442 }, { "epoch": 1.513112421223826, "grad_norm": 0.15349778532981873, "learning_rate": 4.8733855384928303e-05, "loss": 0.9062, "step": 7443 }, { "epoch": 1.5133157145761333, "grad_norm": 0.1570233255624771, "learning_rate": 4.8713515712397035e-05, "loss": 0.9631, "step": 7444 }, { "epoch": 1.5135190079284406, "grad_norm": 0.1659766435623169, "learning_rate": 4.869317603986576e-05, "loss": 1.077, "step": 7445 }, { "epoch": 1.5137223012807481, "grad_norm": 0.16281379759311676, "learning_rate": 4.867283636733449e-05, "loss": 1.0399, "step": 7446 }, { "epoch": 1.5139255946330556, "grad_norm": 0.16851572692394257, "learning_rate": 4.8652496694803215e-05, "loss": 1.0689, "step": 7447 }, { "epoch": 1.514128887985363, "grad_norm": 0.14893779158592224, "learning_rate": 4.8632157022271946e-05, "loss": 0.8254, "step": 7448 }, { "epoch": 1.5143321813376702, "grad_norm": 0.15788701176643372, "learning_rate": 4.861181734974067e-05, "loss": 0.9946, "step": 7449 }, { "epoch": 1.5145354746899775, "grad_norm": 0.14474782347679138, "learning_rate": 4.85914776772094e-05, "loss": 0.8394, "step": 7450 }, { "epoch": 1.514738768042285, "grad_norm": 0.15891660749912262, "learning_rate": 4.8571138004678126e-05, "loss": 0.9978, "step": 7451 }, { "epoch": 1.5149420613945925, "grad_norm": 0.15987549722194672, "learning_rate": 4.855079833214686e-05, "loss": 0.9478, "step": 7452 }, { "epoch": 1.5151453547468998, "grad_norm": 0.14981964230537415, "learning_rate": 4.853045865961558e-05, "loss": 1.0904, "step": 7453 }, { "epoch": 1.515348648099207, "grad_norm": 0.17206816375255585, "learning_rate": 4.8510118987084306e-05, "loss": 1.238, "step": 7454 }, { "epoch": 1.5155519414515144, "grad_norm": 0.17081889510154724, "learning_rate": 4.848977931455304e-05, "loss": 1.0376, "step": 7455 }, { "epoch": 1.515755234803822, "grad_norm": 0.15734606981277466, "learning_rate": 4.846943964202176e-05, "loss": 0.9347, "step": 7456 }, { "epoch": 1.5159585281561294, "grad_norm": 0.14848117530345917, "learning_rate": 4.844909996949049e-05, "loss": 1.103, "step": 7457 }, { "epoch": 1.5161618215084367, "grad_norm": 0.18724314868450165, "learning_rate": 4.842876029695922e-05, "loss": 1.1861, "step": 7458 }, { "epoch": 1.516365114860744, "grad_norm": 0.1609761267900467, "learning_rate": 4.840842062442795e-05, "loss": 1.0222, "step": 7459 }, { "epoch": 1.5165684082130513, "grad_norm": 0.14589814841747284, "learning_rate": 4.8388080951896673e-05, "loss": 0.909, "step": 7460 }, { "epoch": 1.5167717015653588, "grad_norm": 0.1623678058385849, "learning_rate": 4.8367741279365405e-05, "loss": 1.032, "step": 7461 }, { "epoch": 1.5169749949176663, "grad_norm": 0.15050582587718964, "learning_rate": 4.834740160683413e-05, "loss": 0.9827, "step": 7462 }, { "epoch": 1.5171782882699736, "grad_norm": 0.15027949213981628, "learning_rate": 4.832706193430286e-05, "loss": 0.9398, "step": 7463 }, { "epoch": 1.5173815816222809, "grad_norm": 0.18657010793685913, "learning_rate": 4.8306722261771585e-05, "loss": 1.3522, "step": 7464 }, { "epoch": 1.5175848749745884, "grad_norm": 0.16288447380065918, "learning_rate": 4.8286382589240316e-05, "loss": 1.0864, "step": 7465 }, { "epoch": 1.5177881683268957, "grad_norm": 0.14163288474082947, "learning_rate": 4.826604291670904e-05, "loss": 0.8257, "step": 7466 }, { "epoch": 1.5179914616792032, "grad_norm": 0.1605214625597, "learning_rate": 4.824570324417777e-05, "loss": 1.016, "step": 7467 }, { "epoch": 1.5181947550315105, "grad_norm": 0.15949556231498718, "learning_rate": 4.8225363571646496e-05, "loss": 1.0179, "step": 7468 }, { "epoch": 1.5183980483838178, "grad_norm": 0.1667870730161667, "learning_rate": 4.820502389911523e-05, "loss": 1.127, "step": 7469 }, { "epoch": 1.5186013417361253, "grad_norm": 0.1582157164812088, "learning_rate": 4.818468422658395e-05, "loss": 0.9873, "step": 7470 }, { "epoch": 1.5188046350884326, "grad_norm": 0.1699240505695343, "learning_rate": 4.816434455405268e-05, "loss": 1.0634, "step": 7471 }, { "epoch": 1.51900792844074, "grad_norm": 0.1658029556274414, "learning_rate": 4.814400488152141e-05, "loss": 1.1744, "step": 7472 }, { "epoch": 1.5192112217930474, "grad_norm": 0.16187456250190735, "learning_rate": 4.812366520899014e-05, "loss": 0.9219, "step": 7473 }, { "epoch": 1.5194145151453546, "grad_norm": 0.15792818367481232, "learning_rate": 4.810332553645886e-05, "loss": 1.0043, "step": 7474 }, { "epoch": 1.5196178084976621, "grad_norm": 0.16355274617671967, "learning_rate": 4.8082985863927595e-05, "loss": 1.1079, "step": 7475 }, { "epoch": 1.5198211018499697, "grad_norm": 0.15543818473815918, "learning_rate": 4.806264619139632e-05, "loss": 1.0714, "step": 7476 }, { "epoch": 1.520024395202277, "grad_norm": 0.14773859083652496, "learning_rate": 4.804230651886505e-05, "loss": 0.8847, "step": 7477 }, { "epoch": 1.5202276885545842, "grad_norm": 0.16230449080467224, "learning_rate": 4.8021966846333775e-05, "loss": 0.9657, "step": 7478 }, { "epoch": 1.5204309819068915, "grad_norm": 0.15943102538585663, "learning_rate": 4.8001627173802506e-05, "loss": 1.0591, "step": 7479 }, { "epoch": 1.520634275259199, "grad_norm": 0.12978902459144592, "learning_rate": 4.798128750127123e-05, "loss": 0.8861, "step": 7480 }, { "epoch": 1.5208375686115065, "grad_norm": 0.1726493388414383, "learning_rate": 4.796094782873996e-05, "loss": 1.0854, "step": 7481 }, { "epoch": 1.5210408619638138, "grad_norm": 0.15407046675682068, "learning_rate": 4.7940608156208686e-05, "loss": 1.0619, "step": 7482 }, { "epoch": 1.5212441553161211, "grad_norm": 0.1641497015953064, "learning_rate": 4.792026848367742e-05, "loss": 1.03, "step": 7483 }, { "epoch": 1.5214474486684284, "grad_norm": 0.15649664402008057, "learning_rate": 4.789992881114614e-05, "loss": 0.91, "step": 7484 }, { "epoch": 1.521650742020736, "grad_norm": 0.14974331855773926, "learning_rate": 4.787958913861487e-05, "loss": 0.9223, "step": 7485 }, { "epoch": 1.5218540353730434, "grad_norm": 0.16601666808128357, "learning_rate": 4.78592494660836e-05, "loss": 0.9536, "step": 7486 }, { "epoch": 1.5220573287253507, "grad_norm": 0.15077362954616547, "learning_rate": 4.783890979355233e-05, "loss": 1.0178, "step": 7487 }, { "epoch": 1.522260622077658, "grad_norm": 0.16418759524822235, "learning_rate": 4.781857012102105e-05, "loss": 1.0563, "step": 7488 }, { "epoch": 1.5224639154299653, "grad_norm": 0.1693541556596756, "learning_rate": 4.7798230448489784e-05, "loss": 1.1455, "step": 7489 }, { "epoch": 1.5226672087822728, "grad_norm": 0.1629193276166916, "learning_rate": 4.777789077595851e-05, "loss": 0.9781, "step": 7490 }, { "epoch": 1.5228705021345803, "grad_norm": 0.15155495703220367, "learning_rate": 4.775755110342724e-05, "loss": 0.9964, "step": 7491 }, { "epoch": 1.5230737954868876, "grad_norm": 0.167188361287117, "learning_rate": 4.7737211430895965e-05, "loss": 1.0437, "step": 7492 }, { "epoch": 1.5232770888391949, "grad_norm": 0.15585185587406158, "learning_rate": 4.7716871758364696e-05, "loss": 0.9516, "step": 7493 }, { "epoch": 1.5234803821915024, "grad_norm": 0.1460978090763092, "learning_rate": 4.769653208583342e-05, "loss": 0.9499, "step": 7494 }, { "epoch": 1.5236836755438097, "grad_norm": 0.1444913148880005, "learning_rate": 4.7676192413302145e-05, "loss": 0.9712, "step": 7495 }, { "epoch": 1.5238869688961172, "grad_norm": 0.15407614409923553, "learning_rate": 4.7655852740770876e-05, "loss": 1.0388, "step": 7496 }, { "epoch": 1.5240902622484245, "grad_norm": 0.14768806099891663, "learning_rate": 4.76355130682396e-05, "loss": 0.9901, "step": 7497 }, { "epoch": 1.5242935556007318, "grad_norm": 0.17865929007530212, "learning_rate": 4.761517339570833e-05, "loss": 1.1832, "step": 7498 }, { "epoch": 1.5244968489530393, "grad_norm": 0.16334161162376404, "learning_rate": 4.7594833723177056e-05, "loss": 1.2179, "step": 7499 }, { "epoch": 1.5247001423053466, "grad_norm": 0.14917631447315216, "learning_rate": 4.757449405064579e-05, "loss": 0.9683, "step": 7500 }, { "epoch": 1.524903435657654, "grad_norm": 0.16842851042747498, "learning_rate": 4.755415437811451e-05, "loss": 1.1311, "step": 7501 }, { "epoch": 1.5251067290099614, "grad_norm": 0.1710801124572754, "learning_rate": 4.753381470558324e-05, "loss": 1.051, "step": 7502 }, { "epoch": 1.5253100223622686, "grad_norm": 0.14032766222953796, "learning_rate": 4.751347503305197e-05, "loss": 0.886, "step": 7503 }, { "epoch": 1.5255133157145762, "grad_norm": 0.16021350026130676, "learning_rate": 4.74931353605207e-05, "loss": 1.1057, "step": 7504 }, { "epoch": 1.5257166090668837, "grad_norm": 0.13777956366539001, "learning_rate": 4.747279568798942e-05, "loss": 0.8405, "step": 7505 }, { "epoch": 1.525919902419191, "grad_norm": 0.15827859938144684, "learning_rate": 4.7452456015458155e-05, "loss": 0.9807, "step": 7506 }, { "epoch": 1.5261231957714982, "grad_norm": 0.14875048398971558, "learning_rate": 4.743211634292688e-05, "loss": 0.9142, "step": 7507 }, { "epoch": 1.5263264891238055, "grad_norm": 0.1471138298511505, "learning_rate": 4.741177667039561e-05, "loss": 0.9305, "step": 7508 }, { "epoch": 1.526529782476113, "grad_norm": 0.15858818590641022, "learning_rate": 4.7391436997864335e-05, "loss": 1.0326, "step": 7509 }, { "epoch": 1.5267330758284205, "grad_norm": 0.14757820963859558, "learning_rate": 4.7371097325333066e-05, "loss": 0.9423, "step": 7510 }, { "epoch": 1.5269363691807278, "grad_norm": 0.14671318233013153, "learning_rate": 4.735075765280179e-05, "loss": 0.9953, "step": 7511 }, { "epoch": 1.5271396625330351, "grad_norm": 0.16315753757953644, "learning_rate": 4.733041798027052e-05, "loss": 1.0674, "step": 7512 }, { "epoch": 1.5273429558853424, "grad_norm": 0.15158745646476746, "learning_rate": 4.7310078307739246e-05, "loss": 0.9416, "step": 7513 }, { "epoch": 1.52754624923765, "grad_norm": 0.1569458246231079, "learning_rate": 4.728973863520798e-05, "loss": 1.1336, "step": 7514 }, { "epoch": 1.5277495425899574, "grad_norm": 0.15651223063468933, "learning_rate": 4.72693989626767e-05, "loss": 0.9724, "step": 7515 }, { "epoch": 1.5279528359422647, "grad_norm": 0.1413620412349701, "learning_rate": 4.724905929014543e-05, "loss": 0.9164, "step": 7516 }, { "epoch": 1.528156129294572, "grad_norm": 0.14873231947422028, "learning_rate": 4.722871961761416e-05, "loss": 0.9904, "step": 7517 }, { "epoch": 1.5283594226468793, "grad_norm": 0.16632919013500214, "learning_rate": 4.720837994508289e-05, "loss": 1.0332, "step": 7518 }, { "epoch": 1.5285627159991868, "grad_norm": 0.1570500135421753, "learning_rate": 4.718804027255161e-05, "loss": 1.0491, "step": 7519 }, { "epoch": 1.5287660093514943, "grad_norm": 0.17738598585128784, "learning_rate": 4.7167700600020344e-05, "loss": 1.049, "step": 7520 }, { "epoch": 1.5289693027038016, "grad_norm": 0.1635134369134903, "learning_rate": 4.714736092748907e-05, "loss": 1.1412, "step": 7521 }, { "epoch": 1.529172596056109, "grad_norm": 0.16301599144935608, "learning_rate": 4.71270212549578e-05, "loss": 1.0967, "step": 7522 }, { "epoch": 1.5293758894084164, "grad_norm": 0.14440634846687317, "learning_rate": 4.7106681582426525e-05, "loss": 0.8357, "step": 7523 }, { "epoch": 1.5295791827607237, "grad_norm": 0.144694983959198, "learning_rate": 4.7086341909895256e-05, "loss": 0.8722, "step": 7524 }, { "epoch": 1.5297824761130312, "grad_norm": 0.14646680653095245, "learning_rate": 4.706600223736398e-05, "loss": 0.8988, "step": 7525 }, { "epoch": 1.5299857694653385, "grad_norm": 0.1644057333469391, "learning_rate": 4.704566256483271e-05, "loss": 1.1197, "step": 7526 }, { "epoch": 1.5301890628176458, "grad_norm": 0.1693752557039261, "learning_rate": 4.7025322892301436e-05, "loss": 1.0486, "step": 7527 }, { "epoch": 1.5303923561699533, "grad_norm": 0.1716986894607544, "learning_rate": 4.700498321977017e-05, "loss": 1.1087, "step": 7528 }, { "epoch": 1.5305956495222606, "grad_norm": 0.1740422248840332, "learning_rate": 4.698464354723889e-05, "loss": 1.0909, "step": 7529 }, { "epoch": 1.530798942874568, "grad_norm": 0.15906310081481934, "learning_rate": 4.696430387470762e-05, "loss": 1.0841, "step": 7530 }, { "epoch": 1.5310022362268754, "grad_norm": 0.14159859716892242, "learning_rate": 4.694396420217635e-05, "loss": 0.8766, "step": 7531 }, { "epoch": 1.5312055295791827, "grad_norm": 0.17096573114395142, "learning_rate": 4.692362452964508e-05, "loss": 1.1308, "step": 7532 }, { "epoch": 1.5314088229314902, "grad_norm": 0.16331720352172852, "learning_rate": 4.69032848571138e-05, "loss": 0.9884, "step": 7533 }, { "epoch": 1.5316121162837977, "grad_norm": 0.15612895786762238, "learning_rate": 4.6882945184582534e-05, "loss": 0.9737, "step": 7534 }, { "epoch": 1.531815409636105, "grad_norm": 0.1716272234916687, "learning_rate": 4.686260551205126e-05, "loss": 1.2049, "step": 7535 }, { "epoch": 1.5320187029884123, "grad_norm": 0.15378396213054657, "learning_rate": 4.684226583951999e-05, "loss": 1.0315, "step": 7536 }, { "epoch": 1.5322219963407195, "grad_norm": 0.16745533049106598, "learning_rate": 4.682192616698871e-05, "loss": 1.1749, "step": 7537 }, { "epoch": 1.532425289693027, "grad_norm": 0.16122505068778992, "learning_rate": 4.680158649445744e-05, "loss": 1.1481, "step": 7538 }, { "epoch": 1.5326285830453346, "grad_norm": 0.15753133594989777, "learning_rate": 4.678124682192617e-05, "loss": 1.0085, "step": 7539 }, { "epoch": 1.5328318763976418, "grad_norm": 0.15394344925880432, "learning_rate": 4.6760907149394895e-05, "loss": 0.9611, "step": 7540 }, { "epoch": 1.5330351697499491, "grad_norm": 0.1620665341615677, "learning_rate": 4.6740567476863626e-05, "loss": 0.9435, "step": 7541 }, { "epoch": 1.5332384631022564, "grad_norm": 0.15785206854343414, "learning_rate": 4.672022780433235e-05, "loss": 1.0311, "step": 7542 }, { "epoch": 1.533441756454564, "grad_norm": 0.15812784433364868, "learning_rate": 4.669988813180108e-05, "loss": 0.9304, "step": 7543 }, { "epoch": 1.5336450498068714, "grad_norm": 0.14829683303833008, "learning_rate": 4.6679548459269806e-05, "loss": 0.9735, "step": 7544 }, { "epoch": 1.5338483431591787, "grad_norm": 0.15306831896305084, "learning_rate": 4.665920878673854e-05, "loss": 0.9459, "step": 7545 }, { "epoch": 1.534051636511486, "grad_norm": 0.1524849534034729, "learning_rate": 4.663886911420726e-05, "loss": 0.9989, "step": 7546 }, { "epoch": 1.5342549298637933, "grad_norm": 0.1524866819381714, "learning_rate": 4.661852944167599e-05, "loss": 0.9516, "step": 7547 }, { "epoch": 1.5344582232161008, "grad_norm": 0.1561049073934555, "learning_rate": 4.659818976914472e-05, "loss": 0.9629, "step": 7548 }, { "epoch": 1.5346615165684083, "grad_norm": 0.15052708983421326, "learning_rate": 4.657785009661345e-05, "loss": 0.9709, "step": 7549 }, { "epoch": 1.5348648099207156, "grad_norm": 0.16317294538021088, "learning_rate": 4.655751042408217e-05, "loss": 1.0431, "step": 7550 }, { "epoch": 1.535068103273023, "grad_norm": 0.1577170193195343, "learning_rate": 4.6537170751550904e-05, "loss": 1.0794, "step": 7551 }, { "epoch": 1.5352713966253302, "grad_norm": 0.16741138696670532, "learning_rate": 4.651683107901963e-05, "loss": 1.2215, "step": 7552 }, { "epoch": 1.5354746899776377, "grad_norm": 0.1500609666109085, "learning_rate": 4.649649140648836e-05, "loss": 0.9439, "step": 7553 }, { "epoch": 1.5356779833299452, "grad_norm": 0.15758995711803436, "learning_rate": 4.6476151733957085e-05, "loss": 0.8848, "step": 7554 }, { "epoch": 1.5358812766822525, "grad_norm": 0.14967188239097595, "learning_rate": 4.6455812061425816e-05, "loss": 0.9408, "step": 7555 }, { "epoch": 1.5360845700345598, "grad_norm": 0.13587024807929993, "learning_rate": 4.643547238889454e-05, "loss": 0.9078, "step": 7556 }, { "epoch": 1.5362878633868673, "grad_norm": 0.1551710069179535, "learning_rate": 4.641513271636327e-05, "loss": 1.0213, "step": 7557 }, { "epoch": 1.5364911567391746, "grad_norm": 0.15696901082992554, "learning_rate": 4.6394793043831996e-05, "loss": 0.9854, "step": 7558 }, { "epoch": 1.536694450091482, "grad_norm": 0.14111942052841187, "learning_rate": 4.637445337130073e-05, "loss": 0.8104, "step": 7559 }, { "epoch": 1.5368977434437894, "grad_norm": 0.16344057023525238, "learning_rate": 4.635411369876945e-05, "loss": 1.0198, "step": 7560 }, { "epoch": 1.5371010367960967, "grad_norm": 0.15371447801589966, "learning_rate": 4.633377402623818e-05, "loss": 1.1088, "step": 7561 }, { "epoch": 1.5373043301484042, "grad_norm": 0.16405069828033447, "learning_rate": 4.631343435370691e-05, "loss": 1.1632, "step": 7562 }, { "epoch": 1.5375076235007117, "grad_norm": 0.17231358587741852, "learning_rate": 4.629309468117564e-05, "loss": 1.0729, "step": 7563 }, { "epoch": 1.537710916853019, "grad_norm": 0.16849292814731598, "learning_rate": 4.627275500864436e-05, "loss": 1.1287, "step": 7564 }, { "epoch": 1.5379142102053263, "grad_norm": 0.14124159514904022, "learning_rate": 4.6252415336113094e-05, "loss": 0.9726, "step": 7565 }, { "epoch": 1.5381175035576335, "grad_norm": 0.16582997143268585, "learning_rate": 4.623207566358182e-05, "loss": 1.048, "step": 7566 }, { "epoch": 1.538320796909941, "grad_norm": 0.15703178942203522, "learning_rate": 4.621173599105055e-05, "loss": 0.9917, "step": 7567 }, { "epoch": 1.5385240902622486, "grad_norm": 0.1521129459142685, "learning_rate": 4.6191396318519274e-05, "loss": 0.9314, "step": 7568 }, { "epoch": 1.5387273836145559, "grad_norm": 0.18239177763462067, "learning_rate": 4.6171056645988006e-05, "loss": 1.1309, "step": 7569 }, { "epoch": 1.5389306769668631, "grad_norm": 0.15608282387256622, "learning_rate": 4.615071697345673e-05, "loss": 0.9907, "step": 7570 }, { "epoch": 1.5391339703191704, "grad_norm": 0.14907321333885193, "learning_rate": 4.613037730092546e-05, "loss": 0.948, "step": 7571 }, { "epoch": 1.539337263671478, "grad_norm": 0.15870921313762665, "learning_rate": 4.6110037628394186e-05, "loss": 1.0293, "step": 7572 }, { "epoch": 1.5395405570237854, "grad_norm": 0.1471608281135559, "learning_rate": 4.608969795586292e-05, "loss": 0.9045, "step": 7573 }, { "epoch": 1.5397438503760927, "grad_norm": 0.1473323255777359, "learning_rate": 4.606935828333164e-05, "loss": 0.9773, "step": 7574 }, { "epoch": 1.5399471437284, "grad_norm": 0.15672756731510162, "learning_rate": 4.604901861080037e-05, "loss": 0.9564, "step": 7575 }, { "epoch": 1.5401504370807073, "grad_norm": 0.13355454802513123, "learning_rate": 4.60286789382691e-05, "loss": 0.9043, "step": 7576 }, { "epoch": 1.5403537304330148, "grad_norm": 0.16888266801834106, "learning_rate": 4.600833926573783e-05, "loss": 1.049, "step": 7577 }, { "epoch": 1.5405570237853223, "grad_norm": 0.14586526155471802, "learning_rate": 4.5987999593206546e-05, "loss": 0.991, "step": 7578 }, { "epoch": 1.5407603171376296, "grad_norm": 0.15697935223579407, "learning_rate": 4.596765992067528e-05, "loss": 0.9276, "step": 7579 }, { "epoch": 1.540963610489937, "grad_norm": 0.1606079787015915, "learning_rate": 4.5947320248144e-05, "loss": 1.1167, "step": 7580 }, { "epoch": 1.5411669038422442, "grad_norm": 0.14496320486068726, "learning_rate": 4.592698057561273e-05, "loss": 0.978, "step": 7581 }, { "epoch": 1.5413701971945517, "grad_norm": 0.1540028601884842, "learning_rate": 4.590664090308146e-05, "loss": 0.9584, "step": 7582 }, { "epoch": 1.5415734905468592, "grad_norm": 0.14273619651794434, "learning_rate": 4.588630123055019e-05, "loss": 0.9559, "step": 7583 }, { "epoch": 1.5417767838991665, "grad_norm": 0.15364350378513336, "learning_rate": 4.586596155801891e-05, "loss": 1.067, "step": 7584 }, { "epoch": 1.5419800772514738, "grad_norm": 0.15916843712329865, "learning_rate": 4.5845621885487644e-05, "loss": 0.9734, "step": 7585 }, { "epoch": 1.5421833706037813, "grad_norm": 0.16618654131889343, "learning_rate": 4.582528221295637e-05, "loss": 1.0467, "step": 7586 }, { "epoch": 1.5423866639560886, "grad_norm": 0.1487346738576889, "learning_rate": 4.58049425404251e-05, "loss": 0.9555, "step": 7587 }, { "epoch": 1.542589957308396, "grad_norm": 0.1543288677930832, "learning_rate": 4.5784602867893825e-05, "loss": 1.0203, "step": 7588 }, { "epoch": 1.5427932506607034, "grad_norm": 0.15385927259922028, "learning_rate": 4.5764263195362556e-05, "loss": 1.0728, "step": 7589 }, { "epoch": 1.5429965440130107, "grad_norm": 0.18972186744213104, "learning_rate": 4.574392352283128e-05, "loss": 1.1087, "step": 7590 }, { "epoch": 1.5431998373653182, "grad_norm": 0.17217358946800232, "learning_rate": 4.572358385030001e-05, "loss": 1.0384, "step": 7591 }, { "epoch": 1.5434031307176257, "grad_norm": 0.15717031061649323, "learning_rate": 4.5703244177768736e-05, "loss": 0.8593, "step": 7592 }, { "epoch": 1.543606424069933, "grad_norm": 0.17360135912895203, "learning_rate": 4.568290450523747e-05, "loss": 1.1914, "step": 7593 }, { "epoch": 1.5438097174222403, "grad_norm": 0.15492455661296844, "learning_rate": 4.566256483270619e-05, "loss": 0.9039, "step": 7594 }, { "epoch": 1.5440130107745476, "grad_norm": 0.15058903396129608, "learning_rate": 4.564222516017492e-05, "loss": 0.9224, "step": 7595 }, { "epoch": 1.544216304126855, "grad_norm": 0.16502228379249573, "learning_rate": 4.562188548764365e-05, "loss": 0.9956, "step": 7596 }, { "epoch": 1.5444195974791626, "grad_norm": 0.15759393572807312, "learning_rate": 4.560154581511238e-05, "loss": 1.0067, "step": 7597 }, { "epoch": 1.5446228908314699, "grad_norm": 0.1422048658132553, "learning_rate": 4.558120614258111e-05, "loss": 0.9564, "step": 7598 }, { "epoch": 1.5448261841837772, "grad_norm": 0.13447371125221252, "learning_rate": 4.5560866470049834e-05, "loss": 0.9567, "step": 7599 }, { "epoch": 1.5450294775360844, "grad_norm": 0.1465720683336258, "learning_rate": 4.5540526797518566e-05, "loss": 0.945, "step": 7600 } ], "logging_steps": 1, "max_steps": 9838, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.261864221527556e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }