{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0164667615368979, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020329335230737954, "grad_norm": 0.36621615290641785, "learning_rate": 0.0, "loss": 1.5428, "step": 1 }, { "epoch": 0.0004065867046147591, "grad_norm": 0.38205745816230774, "learning_rate": 4e-05, "loss": 1.3117, "step": 2 }, { "epoch": 0.0006098800569221387, "grad_norm": 0.38206785917282104, "learning_rate": 8e-05, "loss": 1.5818, "step": 3 }, { "epoch": 0.0008131734092295182, "grad_norm": 0.2856779396533966, "learning_rate": 0.00012, "loss": 1.4046, "step": 4 }, { "epoch": 0.0010164667615368977, "grad_norm": 0.28240150213241577, "learning_rate": 0.00016, "loss": 1.1899, "step": 5 }, { "epoch": 0.0012197601138442774, "grad_norm": 0.27537214756011963, "learning_rate": 0.0002, "loss": 1.3741, "step": 6 }, { "epoch": 0.0014230534661516568, "grad_norm": 0.1832859218120575, "learning_rate": 0.00019997966032746873, "loss": 1.2432, "step": 7 }, { "epoch": 0.0016263468184590363, "grad_norm": 0.1194186806678772, "learning_rate": 0.00019995932065493746, "loss": 1.1152, "step": 8 }, { "epoch": 0.001829640170766416, "grad_norm": 0.287241131067276, "learning_rate": 0.0001999389809824062, "loss": 1.2556, "step": 9 }, { "epoch": 0.0020329335230737954, "grad_norm": 0.2129606008529663, "learning_rate": 0.00019991864130987493, "loss": 1.2698, "step": 10 }, { "epoch": 0.002236226875381175, "grad_norm": 0.16557097434997559, "learning_rate": 0.00019989830163734363, "loss": 1.2364, "step": 11 }, { "epoch": 0.002439520227688555, "grad_norm": 0.15639722347259521, "learning_rate": 0.00019987796196481236, "loss": 1.2499, "step": 12 }, { "epoch": 0.002642813579995934, "grad_norm": 0.11634412407875061, "learning_rate": 0.0001998576222922811, "loss": 1.148, "step": 13 }, { "epoch": 0.0028461069323033137, "grad_norm": 0.13249927759170532, "learning_rate": 0.00019983728261974983, "loss": 1.267, "step": 14 }, { "epoch": 0.0030494002846106934, "grad_norm": 0.12251798063516617, "learning_rate": 0.00019981694294721856, "loss": 1.1868, "step": 15 }, { "epoch": 0.0032526936369180726, "grad_norm": 0.1279357224702835, "learning_rate": 0.00019979660327468728, "loss": 1.3611, "step": 16 }, { "epoch": 0.0034559869892254523, "grad_norm": 0.10690166801214218, "learning_rate": 0.00019977626360215603, "loss": 1.2934, "step": 17 }, { "epoch": 0.003659280341532832, "grad_norm": 0.09634354710578918, "learning_rate": 0.00019975592392962476, "loss": 1.1628, "step": 18 }, { "epoch": 0.0038625736938402116, "grad_norm": 0.08993979543447495, "learning_rate": 0.00019973558425709345, "loss": 1.1328, "step": 19 }, { "epoch": 0.004065867046147591, "grad_norm": 0.09834206104278564, "learning_rate": 0.00019971524458456218, "loss": 1.1424, "step": 20 }, { "epoch": 0.0042691603984549705, "grad_norm": 0.10754235088825226, "learning_rate": 0.00019969490491203093, "loss": 1.0843, "step": 21 }, { "epoch": 0.00447245375076235, "grad_norm": 0.09953349828720093, "learning_rate": 0.00019967456523949966, "loss": 1.1475, "step": 22 }, { "epoch": 0.00467574710306973, "grad_norm": 0.09910175204277039, "learning_rate": 0.00019965422556696838, "loss": 1.1532, "step": 23 }, { "epoch": 0.00487904045537711, "grad_norm": 0.10800202935934067, "learning_rate": 0.0001996338858944371, "loss": 1.2336, "step": 24 }, { "epoch": 0.005082333807684488, "grad_norm": 0.10042817890644073, "learning_rate": 0.00019961354622190586, "loss": 1.1235, "step": 25 }, { "epoch": 0.005285627159991868, "grad_norm": 0.10839787125587463, "learning_rate": 0.00019959320654937458, "loss": 1.3561, "step": 26 }, { "epoch": 0.005488920512299248, "grad_norm": 0.10559111088514328, "learning_rate": 0.00019957286687684328, "loss": 1.3255, "step": 27 }, { "epoch": 0.005692213864606627, "grad_norm": 0.12504474818706512, "learning_rate": 0.000199552527204312, "loss": 1.4834, "step": 28 }, { "epoch": 0.005895507216914007, "grad_norm": 0.09099473804235458, "learning_rate": 0.00019953218753178075, "loss": 1.1404, "step": 29 }, { "epoch": 0.006098800569221387, "grad_norm": 0.09648846834897995, "learning_rate": 0.00019951184785924948, "loss": 1.1302, "step": 30 }, { "epoch": 0.006302093921528766, "grad_norm": 0.0992799773812294, "learning_rate": 0.0001994915081867182, "loss": 1.3508, "step": 31 }, { "epoch": 0.006505387273836145, "grad_norm": 0.10332880914211273, "learning_rate": 0.00019947116851418693, "loss": 1.1819, "step": 32 }, { "epoch": 0.006708680626143525, "grad_norm": 0.1172771006822586, "learning_rate": 0.00019945082884165568, "loss": 1.2512, "step": 33 }, { "epoch": 0.006911973978450905, "grad_norm": 0.10649558156728745, "learning_rate": 0.0001994304891691244, "loss": 1.2394, "step": 34 }, { "epoch": 0.007115267330758284, "grad_norm": 0.10849574208259583, "learning_rate": 0.0001994101494965931, "loss": 1.1847, "step": 35 }, { "epoch": 0.007318560683065664, "grad_norm": 0.11581245064735413, "learning_rate": 0.00019938980982406182, "loss": 1.2788, "step": 36 }, { "epoch": 0.007521854035373044, "grad_norm": 0.09857199341058731, "learning_rate": 0.00019936947015153058, "loss": 1.1072, "step": 37 }, { "epoch": 0.007725147387680423, "grad_norm": 0.08895204216241837, "learning_rate": 0.0001993491304789993, "loss": 0.8741, "step": 38 }, { "epoch": 0.007928440739987802, "grad_norm": 0.09715849161148071, "learning_rate": 0.00019932879080646803, "loss": 1.2999, "step": 39 }, { "epoch": 0.008131734092295182, "grad_norm": 0.09145913273096085, "learning_rate": 0.00019930845113393675, "loss": 0.9852, "step": 40 }, { "epoch": 0.008335027444602561, "grad_norm": 0.09298407286405563, "learning_rate": 0.0001992881114614055, "loss": 1.1033, "step": 41 }, { "epoch": 0.008538320796909941, "grad_norm": 0.09957871586084366, "learning_rate": 0.00019926777178887423, "loss": 1.3203, "step": 42 }, { "epoch": 0.00874161414921732, "grad_norm": 0.1212410032749176, "learning_rate": 0.00019924743211634292, "loss": 1.166, "step": 43 }, { "epoch": 0.0089449075015247, "grad_norm": 0.10740375518798828, "learning_rate": 0.00019922709244381165, "loss": 1.1905, "step": 44 }, { "epoch": 0.00914820085383208, "grad_norm": 0.10571859031915665, "learning_rate": 0.0001992067527712804, "loss": 1.07, "step": 45 }, { "epoch": 0.00935149420613946, "grad_norm": 0.11196234077215195, "learning_rate": 0.00019918641309874912, "loss": 1.2516, "step": 46 }, { "epoch": 0.00955478755844684, "grad_norm": 0.1025981530547142, "learning_rate": 0.00019916607342621785, "loss": 1.1118, "step": 47 }, { "epoch": 0.00975808091075422, "grad_norm": 0.10584773868322372, "learning_rate": 0.00019914573375368657, "loss": 1.1551, "step": 48 }, { "epoch": 0.009961374263061597, "grad_norm": 0.10157100111246109, "learning_rate": 0.0001991253940811553, "loss": 0.9638, "step": 49 }, { "epoch": 0.010164667615368977, "grad_norm": 0.10194176435470581, "learning_rate": 0.00019910505440862405, "loss": 1.0024, "step": 50 }, { "epoch": 0.010367960967676356, "grad_norm": 0.10047532618045807, "learning_rate": 0.00019908471473609275, "loss": 1.1767, "step": 51 }, { "epoch": 0.010571254319983736, "grad_norm": 0.10448278486728668, "learning_rate": 0.00019906437506356147, "loss": 1.2346, "step": 52 }, { "epoch": 0.010774547672291116, "grad_norm": 0.09438527375459671, "learning_rate": 0.0001990440353910302, "loss": 1.0517, "step": 53 }, { "epoch": 0.010977841024598495, "grad_norm": 0.12310227751731873, "learning_rate": 0.00019902369571849895, "loss": 1.2809, "step": 54 }, { "epoch": 0.011181134376905875, "grad_norm": 0.10811592638492584, "learning_rate": 0.00019900335604596767, "loss": 1.1171, "step": 55 }, { "epoch": 0.011384427729213255, "grad_norm": 0.12063754349946976, "learning_rate": 0.0001989830163734364, "loss": 1.0719, "step": 56 }, { "epoch": 0.011587721081520634, "grad_norm": 0.10147465020418167, "learning_rate": 0.00019896267670090512, "loss": 1.0643, "step": 57 }, { "epoch": 0.011791014433828014, "grad_norm": 0.10184100270271301, "learning_rate": 0.00019894233702837387, "loss": 1.0148, "step": 58 }, { "epoch": 0.011994307786135394, "grad_norm": 0.1090080663561821, "learning_rate": 0.00019892199735584257, "loss": 1.0762, "step": 59 }, { "epoch": 0.012197601138442773, "grad_norm": 0.0960102528333664, "learning_rate": 0.0001989016576833113, "loss": 1.09, "step": 60 }, { "epoch": 0.012400894490750153, "grad_norm": 0.1056618019938469, "learning_rate": 0.00019888131801078002, "loss": 1.0092, "step": 61 }, { "epoch": 0.012604187843057533, "grad_norm": 0.10064820945262909, "learning_rate": 0.00019886097833824877, "loss": 1.0135, "step": 62 }, { "epoch": 0.01280748119536491, "grad_norm": 0.11083406209945679, "learning_rate": 0.0001988406386657175, "loss": 1.2344, "step": 63 }, { "epoch": 0.01301077454767229, "grad_norm": 0.10507647693157196, "learning_rate": 0.00019882029899318622, "loss": 1.1035, "step": 64 }, { "epoch": 0.01321406789997967, "grad_norm": 0.10840694606304169, "learning_rate": 0.00019879995932065494, "loss": 1.1458, "step": 65 }, { "epoch": 0.01341736125228705, "grad_norm": 0.1149667352437973, "learning_rate": 0.0001987796196481237, "loss": 1.0722, "step": 66 }, { "epoch": 0.01362065460459443, "grad_norm": 0.1219320297241211, "learning_rate": 0.00019875927997559242, "loss": 1.1392, "step": 67 }, { "epoch": 0.01382394795690181, "grad_norm": 0.1296710968017578, "learning_rate": 0.00019873894030306112, "loss": 1.1831, "step": 68 }, { "epoch": 0.014027241309209189, "grad_norm": 0.11214271187782288, "learning_rate": 0.00019871860063052984, "loss": 1.0385, "step": 69 }, { "epoch": 0.014230534661516568, "grad_norm": 0.12374462932348251, "learning_rate": 0.0001986982609579986, "loss": 1.2633, "step": 70 }, { "epoch": 0.014433828013823948, "grad_norm": 0.09856373071670532, "learning_rate": 0.00019867792128546732, "loss": 1.0495, "step": 71 }, { "epoch": 0.014637121366131328, "grad_norm": 0.1356707215309143, "learning_rate": 0.00019865758161293604, "loss": 1.2245, "step": 72 }, { "epoch": 0.014840414718438708, "grad_norm": 0.10265105217695236, "learning_rate": 0.00019863724194040477, "loss": 1.0076, "step": 73 }, { "epoch": 0.015043708070746087, "grad_norm": 0.1307467520236969, "learning_rate": 0.00019861690226787352, "loss": 1.3838, "step": 74 }, { "epoch": 0.015247001423053467, "grad_norm": 0.12013835459947586, "learning_rate": 0.00019859656259534224, "loss": 1.1509, "step": 75 }, { "epoch": 0.015450294775360847, "grad_norm": 0.10897748917341232, "learning_rate": 0.00019857622292281094, "loss": 1.025, "step": 76 }, { "epoch": 0.015653588127668226, "grad_norm": 0.10924937576055527, "learning_rate": 0.00019855588325027966, "loss": 1.06, "step": 77 }, { "epoch": 0.015856881479975604, "grad_norm": 0.10474475473165512, "learning_rate": 0.00019853554357774841, "loss": 0.9267, "step": 78 }, { "epoch": 0.016060174832282986, "grad_norm": 0.12250765413045883, "learning_rate": 0.00019851520390521714, "loss": 1.1231, "step": 79 }, { "epoch": 0.016263468184590364, "grad_norm": 0.12869718670845032, "learning_rate": 0.00019849486423268586, "loss": 1.1219, "step": 80 }, { "epoch": 0.016466761536897745, "grad_norm": 0.11656077951192856, "learning_rate": 0.0001984745245601546, "loss": 1.1494, "step": 81 }, { "epoch": 0.016670054889205123, "grad_norm": 0.12449704855680466, "learning_rate": 0.00019845418488762334, "loss": 1.3177, "step": 82 }, { "epoch": 0.016873348241512504, "grad_norm": 0.10715439915657043, "learning_rate": 0.00019843384521509206, "loss": 1.0944, "step": 83 }, { "epoch": 0.017076641593819882, "grad_norm": 0.11231628060340881, "learning_rate": 0.00019841350554256076, "loss": 1.0681, "step": 84 }, { "epoch": 0.01727993494612726, "grad_norm": 0.1253119856119156, "learning_rate": 0.00019839316587002949, "loss": 1.104, "step": 85 }, { "epoch": 0.01748322829843464, "grad_norm": 0.12725764513015747, "learning_rate": 0.00019837282619749824, "loss": 1.307, "step": 86 }, { "epoch": 0.01768652165074202, "grad_norm": 0.11470405012369156, "learning_rate": 0.00019835248652496696, "loss": 1.0729, "step": 87 }, { "epoch": 0.0178898150030494, "grad_norm": 0.12006914615631104, "learning_rate": 0.0001983321468524357, "loss": 1.1608, "step": 88 }, { "epoch": 0.01809310835535678, "grad_norm": 0.11256147921085358, "learning_rate": 0.0001983118071799044, "loss": 1.0816, "step": 89 }, { "epoch": 0.01829640170766416, "grad_norm": 0.12627887725830078, "learning_rate": 0.00019829146750737314, "loss": 1.2015, "step": 90 }, { "epoch": 0.018499695059971538, "grad_norm": 0.12313251942396164, "learning_rate": 0.0001982711278348419, "loss": 1.0837, "step": 91 }, { "epoch": 0.01870298841227892, "grad_norm": 0.1349901705980301, "learning_rate": 0.00019825078816231058, "loss": 1.2277, "step": 92 }, { "epoch": 0.018906281764586298, "grad_norm": 0.11006023734807968, "learning_rate": 0.0001982304484897793, "loss": 1.0768, "step": 93 }, { "epoch": 0.01910957511689368, "grad_norm": 0.1100686565041542, "learning_rate": 0.00019821010881724803, "loss": 1.1119, "step": 94 }, { "epoch": 0.019312868469201057, "grad_norm": 0.1252383440732956, "learning_rate": 0.00019818976914471678, "loss": 1.1862, "step": 95 }, { "epoch": 0.01951616182150844, "grad_norm": 0.1430596113204956, "learning_rate": 0.0001981694294721855, "loss": 1.1765, "step": 96 }, { "epoch": 0.019719455173815816, "grad_norm": 0.130848690867424, "learning_rate": 0.00019814908979965423, "loss": 1.3255, "step": 97 }, { "epoch": 0.019922748526123194, "grad_norm": 0.13019633293151855, "learning_rate": 0.00019812875012712296, "loss": 1.1853, "step": 98 }, { "epoch": 0.020126041878430576, "grad_norm": 0.11539386957883835, "learning_rate": 0.0001981084104545917, "loss": 1.1897, "step": 99 }, { "epoch": 0.020329335230737954, "grad_norm": 0.11749454587697983, "learning_rate": 0.0001980880707820604, "loss": 1.1189, "step": 100 }, { "epoch": 0.020532628583045335, "grad_norm": 0.12270624190568924, "learning_rate": 0.00019806773110952913, "loss": 1.1821, "step": 101 }, { "epoch": 0.020735921935352713, "grad_norm": 0.1244652196764946, "learning_rate": 0.00019804739143699786, "loss": 1.2847, "step": 102 }, { "epoch": 0.020939215287660094, "grad_norm": 0.1163022369146347, "learning_rate": 0.0001980270517644666, "loss": 0.9405, "step": 103 }, { "epoch": 0.021142508639967472, "grad_norm": 0.13008280098438263, "learning_rate": 0.00019800671209193533, "loss": 1.272, "step": 104 }, { "epoch": 0.021345801992274854, "grad_norm": 0.11667025834321976, "learning_rate": 0.00019798637241940406, "loss": 1.0145, "step": 105 }, { "epoch": 0.02154909534458223, "grad_norm": 0.10680039972066879, "learning_rate": 0.00019796603274687278, "loss": 1.0929, "step": 106 }, { "epoch": 0.021752388696889613, "grad_norm": 0.12161742150783539, "learning_rate": 0.00019794569307434153, "loss": 1.0314, "step": 107 }, { "epoch": 0.02195568204919699, "grad_norm": 0.10798537731170654, "learning_rate": 0.00019792535340181023, "loss": 0.9918, "step": 108 }, { "epoch": 0.022158975401504372, "grad_norm": 0.10750683397054672, "learning_rate": 0.00019790501372927895, "loss": 1.0899, "step": 109 }, { "epoch": 0.02236226875381175, "grad_norm": 0.10349331051111221, "learning_rate": 0.00019788467405674768, "loss": 1.1011, "step": 110 }, { "epoch": 0.022565562106119128, "grad_norm": 0.12401413917541504, "learning_rate": 0.00019786433438421643, "loss": 1.1993, "step": 111 }, { "epoch": 0.02276885545842651, "grad_norm": 0.12651224434375763, "learning_rate": 0.00019784399471168515, "loss": 1.1643, "step": 112 }, { "epoch": 0.022972148810733888, "grad_norm": 0.12746116518974304, "learning_rate": 0.00019782365503915388, "loss": 1.2093, "step": 113 }, { "epoch": 0.02317544216304127, "grad_norm": 0.12016987800598145, "learning_rate": 0.0001978033153666226, "loss": 1.0743, "step": 114 }, { "epoch": 0.023378735515348647, "grad_norm": 0.11947723478078842, "learning_rate": 0.00019778297569409136, "loss": 1.0899, "step": 115 }, { "epoch": 0.02358202886765603, "grad_norm": 0.13821756839752197, "learning_rate": 0.00019776263602156005, "loss": 1.2247, "step": 116 }, { "epoch": 0.023785322219963406, "grad_norm": 0.1487802267074585, "learning_rate": 0.00019774229634902878, "loss": 1.5157, "step": 117 }, { "epoch": 0.023988615572270788, "grad_norm": 0.11825321614742279, "learning_rate": 0.0001977219566764975, "loss": 1.0966, "step": 118 }, { "epoch": 0.024191908924578166, "grad_norm": 0.11644168198108673, "learning_rate": 0.00019770161700396625, "loss": 1.057, "step": 119 }, { "epoch": 0.024395202276885547, "grad_norm": 0.12633183598518372, "learning_rate": 0.00019768127733143498, "loss": 1.039, "step": 120 }, { "epoch": 0.024598495629192925, "grad_norm": 0.11956316977739334, "learning_rate": 0.0001976609376589037, "loss": 1.0974, "step": 121 }, { "epoch": 0.024801788981500306, "grad_norm": 0.11445662379264832, "learning_rate": 0.00019764059798637243, "loss": 1.0289, "step": 122 }, { "epoch": 0.025005082333807684, "grad_norm": 0.11500035226345062, "learning_rate": 0.00019762025831384118, "loss": 1.0781, "step": 123 }, { "epoch": 0.025208375686115066, "grad_norm": 0.11663355678319931, "learning_rate": 0.0001975999186413099, "loss": 1.0258, "step": 124 }, { "epoch": 0.025411669038422444, "grad_norm": 0.13630478084087372, "learning_rate": 0.0001975795789687786, "loss": 1.1725, "step": 125 }, { "epoch": 0.02561496239072982, "grad_norm": 0.09818248450756073, "learning_rate": 0.00019755923929624732, "loss": 0.8459, "step": 126 }, { "epoch": 0.025818255743037203, "grad_norm": 0.12124455720186234, "learning_rate": 0.00019753889962371608, "loss": 1.0427, "step": 127 }, { "epoch": 0.02602154909534458, "grad_norm": 0.12221626192331314, "learning_rate": 0.0001975185599511848, "loss": 1.0463, "step": 128 }, { "epoch": 0.026224842447651962, "grad_norm": 0.12191324681043625, "learning_rate": 0.00019749822027865352, "loss": 1.0816, "step": 129 }, { "epoch": 0.02642813579995934, "grad_norm": 0.12219464778900146, "learning_rate": 0.00019747788060612225, "loss": 1.0725, "step": 130 }, { "epoch": 0.02663142915226672, "grad_norm": 0.11998015642166138, "learning_rate": 0.00019745754093359097, "loss": 1.1146, "step": 131 }, { "epoch": 0.0268347225045741, "grad_norm": 0.1426505446434021, "learning_rate": 0.00019743720126105973, "loss": 1.0739, "step": 132 }, { "epoch": 0.02703801585688148, "grad_norm": 0.11330442875623703, "learning_rate": 0.00019741686158852842, "loss": 1.0534, "step": 133 }, { "epoch": 0.02724130920918886, "grad_norm": 0.11867399513721466, "learning_rate": 0.00019739652191599715, "loss": 1.0519, "step": 134 }, { "epoch": 0.02744460256149624, "grad_norm": 0.11425293982028961, "learning_rate": 0.00019737618224346587, "loss": 1.1641, "step": 135 }, { "epoch": 0.02764789591380362, "grad_norm": 0.12021425366401672, "learning_rate": 0.00019735584257093462, "loss": 0.8639, "step": 136 }, { "epoch": 0.027851189266111, "grad_norm": 0.13119390606880188, "learning_rate": 0.00019733550289840335, "loss": 1.1486, "step": 137 }, { "epoch": 0.028054482618418378, "grad_norm": 0.12350285053253174, "learning_rate": 0.00019731516322587207, "loss": 1.2504, "step": 138 }, { "epoch": 0.028257775970725756, "grad_norm": 0.11073335260152817, "learning_rate": 0.0001972948235533408, "loss": 1.0745, "step": 139 }, { "epoch": 0.028461069323033137, "grad_norm": 0.1184212937951088, "learning_rate": 0.00019727448388080955, "loss": 1.2013, "step": 140 }, { "epoch": 0.028664362675340515, "grad_norm": 0.11723863333463669, "learning_rate": 0.00019725414420827825, "loss": 1.0349, "step": 141 }, { "epoch": 0.028867656027647896, "grad_norm": 0.12323645502328873, "learning_rate": 0.00019723380453574697, "loss": 1.2585, "step": 142 }, { "epoch": 0.029070949379955274, "grad_norm": 0.12688735127449036, "learning_rate": 0.0001972134648632157, "loss": 1.2059, "step": 143 }, { "epoch": 0.029274242732262656, "grad_norm": 0.11595512181520462, "learning_rate": 0.00019719312519068445, "loss": 1.1894, "step": 144 }, { "epoch": 0.029477536084570034, "grad_norm": 0.11859143525362015, "learning_rate": 0.00019717278551815317, "loss": 1.0111, "step": 145 }, { "epoch": 0.029680829436877415, "grad_norm": 0.13400156795978546, "learning_rate": 0.0001971524458456219, "loss": 1.1861, "step": 146 }, { "epoch": 0.029884122789184793, "grad_norm": 0.12621331214904785, "learning_rate": 0.00019713210617309062, "loss": 1.1099, "step": 147 }, { "epoch": 0.030087416141492174, "grad_norm": 0.11988019198179245, "learning_rate": 0.00019711176650055937, "loss": 1.0571, "step": 148 }, { "epoch": 0.030290709493799552, "grad_norm": 0.11711090058088303, "learning_rate": 0.00019709142682802807, "loss": 1.0591, "step": 149 }, { "epoch": 0.030494002846106934, "grad_norm": 0.12458360195159912, "learning_rate": 0.0001970710871554968, "loss": 1.1863, "step": 150 }, { "epoch": 0.03069729619841431, "grad_norm": 0.11751321703195572, "learning_rate": 0.00019705074748296552, "loss": 1.0735, "step": 151 }, { "epoch": 0.030900589550721693, "grad_norm": 0.13113288581371307, "learning_rate": 0.00019703040781043427, "loss": 1.2103, "step": 152 }, { "epoch": 0.03110388290302907, "grad_norm": 0.13107489049434662, "learning_rate": 0.000197010068137903, "loss": 1.17, "step": 153 }, { "epoch": 0.03130717625533645, "grad_norm": 0.12383049726486206, "learning_rate": 0.00019698972846537172, "loss": 1.0346, "step": 154 }, { "epoch": 0.03151046960764383, "grad_norm": 0.11656415462493896, "learning_rate": 0.00019696938879284044, "loss": 1.1545, "step": 155 }, { "epoch": 0.03171376295995121, "grad_norm": 0.12201374024152756, "learning_rate": 0.0001969490491203092, "loss": 0.9697, "step": 156 }, { "epoch": 0.031917056312258586, "grad_norm": 0.12756960093975067, "learning_rate": 0.0001969287094477779, "loss": 1.1668, "step": 157 }, { "epoch": 0.03212034966456597, "grad_norm": 0.13866621255874634, "learning_rate": 0.00019690836977524662, "loss": 1.2137, "step": 158 }, { "epoch": 0.03232364301687335, "grad_norm": 0.1329393833875656, "learning_rate": 0.00019688803010271534, "loss": 1.1864, "step": 159 }, { "epoch": 0.03252693636918073, "grad_norm": 0.14017806947231293, "learning_rate": 0.0001968676904301841, "loss": 1.2564, "step": 160 }, { "epoch": 0.032730229721488105, "grad_norm": 0.13004827499389648, "learning_rate": 0.00019684735075765282, "loss": 1.1301, "step": 161 }, { "epoch": 0.03293352307379549, "grad_norm": 0.11768215149641037, "learning_rate": 0.00019682701108512154, "loss": 1.0638, "step": 162 }, { "epoch": 0.03313681642610287, "grad_norm": 0.12334595620632172, "learning_rate": 0.00019680667141259026, "loss": 1.277, "step": 163 }, { "epoch": 0.033340109778410246, "grad_norm": 0.12338969856500626, "learning_rate": 0.00019678633174005902, "loss": 0.9211, "step": 164 }, { "epoch": 0.033543403130717624, "grad_norm": 0.10570957511663437, "learning_rate": 0.00019676599206752771, "loss": 1.0919, "step": 165 }, { "epoch": 0.03374669648302501, "grad_norm": 0.1223050057888031, "learning_rate": 0.00019674565239499644, "loss": 1.124, "step": 166 }, { "epoch": 0.033949989835332386, "grad_norm": 0.12787429988384247, "learning_rate": 0.00019672531272246516, "loss": 1.1717, "step": 167 }, { "epoch": 0.034153283187639764, "grad_norm": 0.10991297662258148, "learning_rate": 0.00019670497304993391, "loss": 0.9493, "step": 168 }, { "epoch": 0.03435657653994714, "grad_norm": 0.11774353682994843, "learning_rate": 0.00019668463337740264, "loss": 1.1527, "step": 169 }, { "epoch": 0.03455986989225452, "grad_norm": 0.12775689363479614, "learning_rate": 0.00019666429370487136, "loss": 1.389, "step": 170 }, { "epoch": 0.034763163244561905, "grad_norm": 0.12656515836715698, "learning_rate": 0.0001966439540323401, "loss": 1.182, "step": 171 }, { "epoch": 0.03496645659686928, "grad_norm": 0.11234056204557419, "learning_rate": 0.0001966236143598088, "loss": 0.8927, "step": 172 }, { "epoch": 0.03516974994917666, "grad_norm": 0.12165993452072144, "learning_rate": 0.00019660327468727754, "loss": 1.199, "step": 173 }, { "epoch": 0.03537304330148404, "grad_norm": 0.13241636753082275, "learning_rate": 0.00019658293501474626, "loss": 1.1245, "step": 174 }, { "epoch": 0.035576336653791424, "grad_norm": 0.12375210225582123, "learning_rate": 0.00019656259534221499, "loss": 1.0997, "step": 175 }, { "epoch": 0.0357796300060988, "grad_norm": 0.10730253159999847, "learning_rate": 0.0001965422556696837, "loss": 1.0075, "step": 176 }, { "epoch": 0.03598292335840618, "grad_norm": 0.11313315480947495, "learning_rate": 0.00019652191599715246, "loss": 1.003, "step": 177 }, { "epoch": 0.03618621671071356, "grad_norm": 0.11057975888252258, "learning_rate": 0.00019650157632462119, "loss": 1.1361, "step": 178 }, { "epoch": 0.03638951006302094, "grad_norm": 0.13612353801727295, "learning_rate": 0.0001964812366520899, "loss": 1.091, "step": 179 }, { "epoch": 0.03659280341532832, "grad_norm": 0.10917545855045319, "learning_rate": 0.00019646089697955864, "loss": 1.0915, "step": 180 }, { "epoch": 0.0367960967676357, "grad_norm": 0.11874423176050186, "learning_rate": 0.00019644055730702736, "loss": 1.1054, "step": 181 }, { "epoch": 0.036999390119943076, "grad_norm": 0.11719070374965668, "learning_rate": 0.00019642021763449608, "loss": 1.0794, "step": 182 }, { "epoch": 0.037202683472250454, "grad_norm": 0.11768540740013123, "learning_rate": 0.0001963998779619648, "loss": 1.1453, "step": 183 }, { "epoch": 0.03740597682455784, "grad_norm": 0.12951141595840454, "learning_rate": 0.00019637953828943353, "loss": 1.1443, "step": 184 }, { "epoch": 0.03760927017686522, "grad_norm": 0.12669187784194946, "learning_rate": 0.00019635919861690228, "loss": 1.121, "step": 185 }, { "epoch": 0.037812563529172595, "grad_norm": 0.13488180935382843, "learning_rate": 0.000196338858944371, "loss": 1.2476, "step": 186 }, { "epoch": 0.03801585688147997, "grad_norm": 0.1352519690990448, "learning_rate": 0.00019631851927183973, "loss": 1.2076, "step": 187 }, { "epoch": 0.03821915023378736, "grad_norm": 0.11772511899471283, "learning_rate": 0.00019629817959930846, "loss": 1.0063, "step": 188 }, { "epoch": 0.038422443586094736, "grad_norm": 0.12861546874046326, "learning_rate": 0.0001962778399267772, "loss": 1.1495, "step": 189 }, { "epoch": 0.038625736938402114, "grad_norm": 0.1372981071472168, "learning_rate": 0.0001962575002542459, "loss": 1.1458, "step": 190 }, { "epoch": 0.03882903029070949, "grad_norm": 0.11377538740634918, "learning_rate": 0.00019623716058171463, "loss": 0.9751, "step": 191 }, { "epoch": 0.03903232364301688, "grad_norm": 0.11588437110185623, "learning_rate": 0.00019621682090918336, "loss": 1.0187, "step": 192 }, { "epoch": 0.039235616995324255, "grad_norm": 0.12083633244037628, "learning_rate": 0.0001961964812366521, "loss": 1.0916, "step": 193 }, { "epoch": 0.03943891034763163, "grad_norm": 0.12773993611335754, "learning_rate": 0.00019617614156412083, "loss": 1.2734, "step": 194 }, { "epoch": 0.03964220369993901, "grad_norm": 0.11607804894447327, "learning_rate": 0.00019615580189158956, "loss": 1.0007, "step": 195 }, { "epoch": 0.03984549705224639, "grad_norm": 0.12700581550598145, "learning_rate": 0.00019613546221905828, "loss": 1.1465, "step": 196 }, { "epoch": 0.04004879040455377, "grad_norm": 0.12830078601837158, "learning_rate": 0.00019611512254652703, "loss": 1.056, "step": 197 }, { "epoch": 0.04025208375686115, "grad_norm": 0.12503017485141754, "learning_rate": 0.00019609478287399573, "loss": 1.0332, "step": 198 }, { "epoch": 0.04045537710916853, "grad_norm": 0.13521379232406616, "learning_rate": 0.00019607444320146445, "loss": 1.209, "step": 199 }, { "epoch": 0.04065867046147591, "grad_norm": 0.12014853954315186, "learning_rate": 0.00019605410352893318, "loss": 1.0632, "step": 200 }, { "epoch": 0.04086196381378329, "grad_norm": 0.14510953426361084, "learning_rate": 0.00019603376385640193, "loss": 1.2434, "step": 201 }, { "epoch": 0.04106525716609067, "grad_norm": 0.12610237300395966, "learning_rate": 0.00019601342418387065, "loss": 1.1765, "step": 202 }, { "epoch": 0.04126855051839805, "grad_norm": 0.12680204212665558, "learning_rate": 0.00019599308451133938, "loss": 1.1754, "step": 203 }, { "epoch": 0.041471843870705426, "grad_norm": 0.1301220804452896, "learning_rate": 0.0001959727448388081, "loss": 1.2629, "step": 204 }, { "epoch": 0.04167513722301281, "grad_norm": 0.11219633370637894, "learning_rate": 0.00019595240516627685, "loss": 1.0403, "step": 205 }, { "epoch": 0.04187843057532019, "grad_norm": 0.11882266402244568, "learning_rate": 0.00019593206549374555, "loss": 0.9514, "step": 206 }, { "epoch": 0.042081723927627566, "grad_norm": 0.11426525563001633, "learning_rate": 0.00019591172582121428, "loss": 1.163, "step": 207 }, { "epoch": 0.042285017279934944, "grad_norm": 0.12243502587080002, "learning_rate": 0.000195891386148683, "loss": 1.166, "step": 208 }, { "epoch": 0.04248831063224232, "grad_norm": 0.12537699937820435, "learning_rate": 0.00019587104647615175, "loss": 1.1335, "step": 209 }, { "epoch": 0.04269160398454971, "grad_norm": 0.12613898515701294, "learning_rate": 0.00019585070680362048, "loss": 1.2059, "step": 210 }, { "epoch": 0.042894897336857085, "grad_norm": 0.11983931809663773, "learning_rate": 0.0001958303671310892, "loss": 1.0849, "step": 211 }, { "epoch": 0.04309819068916446, "grad_norm": 0.12225540727376938, "learning_rate": 0.00019581002745855793, "loss": 1.1581, "step": 212 }, { "epoch": 0.04330148404147184, "grad_norm": 0.12691259384155273, "learning_rate": 0.00019578968778602668, "loss": 1.2828, "step": 213 }, { "epoch": 0.043504777393779226, "grad_norm": 0.11088678985834122, "learning_rate": 0.00019576934811349538, "loss": 1.0987, "step": 214 }, { "epoch": 0.043708070746086604, "grad_norm": 0.14477139711380005, "learning_rate": 0.0001957490084409641, "loss": 1.3049, "step": 215 }, { "epoch": 0.04391136409839398, "grad_norm": 0.12409314513206482, "learning_rate": 0.00019572866876843282, "loss": 1.1617, "step": 216 }, { "epoch": 0.04411465745070136, "grad_norm": 0.12076637893915176, "learning_rate": 0.00019570832909590155, "loss": 1.1163, "step": 217 }, { "epoch": 0.044317950803008745, "grad_norm": 0.1195930689573288, "learning_rate": 0.0001956879894233703, "loss": 1.3286, "step": 218 }, { "epoch": 0.04452124415531612, "grad_norm": 0.11751043051481247, "learning_rate": 0.00019566764975083902, "loss": 1.0493, "step": 219 }, { "epoch": 0.0447245375076235, "grad_norm": 0.12073373049497604, "learning_rate": 0.00019564731007830775, "loss": 1.088, "step": 220 }, { "epoch": 0.04492783085993088, "grad_norm": 0.11219310760498047, "learning_rate": 0.00019562697040577647, "loss": 1.0455, "step": 221 }, { "epoch": 0.045131124212238256, "grad_norm": 0.11247701942920685, "learning_rate": 0.0001956066307332452, "loss": 1.0703, "step": 222 }, { "epoch": 0.04533441756454564, "grad_norm": 0.11884698271751404, "learning_rate": 0.00019558629106071392, "loss": 1.2793, "step": 223 }, { "epoch": 0.04553771091685302, "grad_norm": 0.12219983339309692, "learning_rate": 0.00019556595138818265, "loss": 1.1819, "step": 224 }, { "epoch": 0.0457410042691604, "grad_norm": 0.1017618402838707, "learning_rate": 0.00019554561171565137, "loss": 0.9348, "step": 225 }, { "epoch": 0.045944297621467775, "grad_norm": 0.11600673943758011, "learning_rate": 0.00019552527204312012, "loss": 1.0054, "step": 226 }, { "epoch": 0.04614759097377516, "grad_norm": 0.13273292779922485, "learning_rate": 0.00019550493237058885, "loss": 1.2805, "step": 227 }, { "epoch": 0.04635088432608254, "grad_norm": 0.11936032027006149, "learning_rate": 0.00019548459269805757, "loss": 1.1255, "step": 228 }, { "epoch": 0.046554177678389916, "grad_norm": 0.12319690734148026, "learning_rate": 0.0001954642530255263, "loss": 1.3222, "step": 229 }, { "epoch": 0.046757471030697294, "grad_norm": 0.11699585616588593, "learning_rate": 0.00019544391335299502, "loss": 1.1299, "step": 230 }, { "epoch": 0.04696076438300468, "grad_norm": 0.11112070828676224, "learning_rate": 0.00019542357368046375, "loss": 1.1836, "step": 231 }, { "epoch": 0.04716405773531206, "grad_norm": 0.13511928915977478, "learning_rate": 0.00019540323400793247, "loss": 1.1319, "step": 232 }, { "epoch": 0.047367351087619435, "grad_norm": 0.12584780156612396, "learning_rate": 0.0001953828943354012, "loss": 1.0862, "step": 233 }, { "epoch": 0.04757064443992681, "grad_norm": 0.12041206657886505, "learning_rate": 0.00019536255466286995, "loss": 1.0866, "step": 234 }, { "epoch": 0.0477739377922342, "grad_norm": 0.1117459088563919, "learning_rate": 0.00019534221499033867, "loss": 1.0334, "step": 235 }, { "epoch": 0.047977231144541575, "grad_norm": 0.11388564109802246, "learning_rate": 0.0001953218753178074, "loss": 1.0335, "step": 236 }, { "epoch": 0.04818052449684895, "grad_norm": 0.11740144342184067, "learning_rate": 0.00019530153564527612, "loss": 1.0301, "step": 237 }, { "epoch": 0.04838381784915633, "grad_norm": 0.1059211865067482, "learning_rate": 0.00019528119597274484, "loss": 0.933, "step": 238 }, { "epoch": 0.04858711120146371, "grad_norm": 0.12493643909692764, "learning_rate": 0.00019526085630021357, "loss": 1.129, "step": 239 }, { "epoch": 0.048790404553771094, "grad_norm": 0.11791351437568665, "learning_rate": 0.0001952405166276823, "loss": 1.0817, "step": 240 }, { "epoch": 0.04899369790607847, "grad_norm": 0.12103426456451416, "learning_rate": 0.00019522017695515102, "loss": 1.0588, "step": 241 }, { "epoch": 0.04919699125838585, "grad_norm": 0.12383697926998138, "learning_rate": 0.00019519983728261977, "loss": 1.1269, "step": 242 }, { "epoch": 0.04940028461069323, "grad_norm": 0.10310048609972, "learning_rate": 0.0001951794976100885, "loss": 0.9393, "step": 243 }, { "epoch": 0.04960357796300061, "grad_norm": 0.11793255805969238, "learning_rate": 0.00019515915793755722, "loss": 1.1511, "step": 244 }, { "epoch": 0.04980687131530799, "grad_norm": 0.12708383798599243, "learning_rate": 0.00019513881826502594, "loss": 1.1525, "step": 245 }, { "epoch": 0.05001016466761537, "grad_norm": 0.13343508541584015, "learning_rate": 0.0001951184785924947, "loss": 1.2057, "step": 246 }, { "epoch": 0.050213458019922746, "grad_norm": 0.12891672551631927, "learning_rate": 0.0001950981389199634, "loss": 1.1788, "step": 247 }, { "epoch": 0.05041675137223013, "grad_norm": 0.11919089406728745, "learning_rate": 0.00019507779924743212, "loss": 0.8968, "step": 248 }, { "epoch": 0.05062004472453751, "grad_norm": 0.11912382394075394, "learning_rate": 0.00019505745957490084, "loss": 1.0667, "step": 249 }, { "epoch": 0.05082333807684489, "grad_norm": 0.12510718405246735, "learning_rate": 0.0001950371199023696, "loss": 0.9301, "step": 250 }, { "epoch": 0.051026631429152265, "grad_norm": 0.13244077563285828, "learning_rate": 0.00019501678022983832, "loss": 1.1302, "step": 251 }, { "epoch": 0.05122992478145964, "grad_norm": 0.11624693870544434, "learning_rate": 0.00019499644055730704, "loss": 1.1317, "step": 252 }, { "epoch": 0.05143321813376703, "grad_norm": 0.11178990453481674, "learning_rate": 0.00019497610088477576, "loss": 1.0696, "step": 253 }, { "epoch": 0.051636511486074406, "grad_norm": 0.12613075971603394, "learning_rate": 0.00019495576121224452, "loss": 1.2195, "step": 254 }, { "epoch": 0.051839804838381784, "grad_norm": 0.13160696625709534, "learning_rate": 0.00019493542153971321, "loss": 1.0333, "step": 255 }, { "epoch": 0.05204309819068916, "grad_norm": 0.11390336602926254, "learning_rate": 0.00019491508186718194, "loss": 0.9062, "step": 256 }, { "epoch": 0.05224639154299655, "grad_norm": 0.12177371233701706, "learning_rate": 0.00019489474219465066, "loss": 1.1687, "step": 257 }, { "epoch": 0.052449684895303925, "grad_norm": 0.12821920216083527, "learning_rate": 0.0001948744025221194, "loss": 1.0509, "step": 258 }, { "epoch": 0.0526529782476113, "grad_norm": 0.11554522067308426, "learning_rate": 0.00019485406284958814, "loss": 0.9503, "step": 259 }, { "epoch": 0.05285627159991868, "grad_norm": 0.1144140213727951, "learning_rate": 0.00019483372317705686, "loss": 1.0419, "step": 260 }, { "epoch": 0.053059564952226065, "grad_norm": 0.12091881781816483, "learning_rate": 0.0001948133835045256, "loss": 1.1362, "step": 261 }, { "epoch": 0.05326285830453344, "grad_norm": 0.1322740912437439, "learning_rate": 0.0001947930438319943, "loss": 1.2769, "step": 262 }, { "epoch": 0.05346615165684082, "grad_norm": 0.12368176877498627, "learning_rate": 0.00019477270415946304, "loss": 1.1871, "step": 263 }, { "epoch": 0.0536694450091482, "grad_norm": 0.11115586012601852, "learning_rate": 0.00019475236448693176, "loss": 1.0479, "step": 264 }, { "epoch": 0.05387273836145558, "grad_norm": 0.1282634437084198, "learning_rate": 0.00019473202481440049, "loss": 1.038, "step": 265 }, { "epoch": 0.05407603171376296, "grad_norm": 0.11252263188362122, "learning_rate": 0.0001947116851418692, "loss": 1.1254, "step": 266 }, { "epoch": 0.05427932506607034, "grad_norm": 0.10750589519739151, "learning_rate": 0.00019469134546933796, "loss": 0.8887, "step": 267 }, { "epoch": 0.05448261841837772, "grad_norm": 0.1257811188697815, "learning_rate": 0.00019467100579680669, "loss": 1.3177, "step": 268 }, { "epoch": 0.054685911770685096, "grad_norm": 0.13415637612342834, "learning_rate": 0.0001946506661242754, "loss": 1.2538, "step": 269 }, { "epoch": 0.05488920512299248, "grad_norm": 0.11637566983699799, "learning_rate": 0.00019463032645174413, "loss": 1.0536, "step": 270 }, { "epoch": 0.05509249847529986, "grad_norm": 0.12544845044612885, "learning_rate": 0.00019460998677921286, "loss": 1.2673, "step": 271 }, { "epoch": 0.05529579182760724, "grad_norm": 0.13013462722301483, "learning_rate": 0.00019458964710668158, "loss": 1.1977, "step": 272 }, { "epoch": 0.055499085179914615, "grad_norm": 0.13211217522621155, "learning_rate": 0.0001945693074341503, "loss": 1.1317, "step": 273 }, { "epoch": 0.055702378532222, "grad_norm": 0.13389961421489716, "learning_rate": 0.00019454896776161903, "loss": 1.1075, "step": 274 }, { "epoch": 0.05590567188452938, "grad_norm": 0.12008912861347198, "learning_rate": 0.00019452862808908778, "loss": 0.9692, "step": 275 }, { "epoch": 0.056108965236836755, "grad_norm": 0.1291409730911255, "learning_rate": 0.0001945082884165565, "loss": 1.2363, "step": 276 }, { "epoch": 0.05631225858914413, "grad_norm": 0.12915107607841492, "learning_rate": 0.00019448794874402523, "loss": 1.3091, "step": 277 }, { "epoch": 0.05651555194145151, "grad_norm": 0.11216573417186737, "learning_rate": 0.00019446760907149396, "loss": 1.0992, "step": 278 }, { "epoch": 0.056718845293758896, "grad_norm": 0.10683475434780121, "learning_rate": 0.00019444726939896268, "loss": 0.9928, "step": 279 }, { "epoch": 0.056922138646066274, "grad_norm": 0.12699760496616364, "learning_rate": 0.0001944269297264314, "loss": 1.1559, "step": 280 }, { "epoch": 0.05712543199837365, "grad_norm": 0.1270214468240738, "learning_rate": 0.00019440659005390013, "loss": 0.9364, "step": 281 }, { "epoch": 0.05732872535068103, "grad_norm": 0.13140781223773956, "learning_rate": 0.00019438625038136886, "loss": 1.1587, "step": 282 }, { "epoch": 0.057532018702988415, "grad_norm": 0.127557173371315, "learning_rate": 0.0001943659107088376, "loss": 1.0203, "step": 283 }, { "epoch": 0.05773531205529579, "grad_norm": 0.13735321164131165, "learning_rate": 0.00019434557103630633, "loss": 1.3412, "step": 284 }, { "epoch": 0.05793860540760317, "grad_norm": 0.11763381958007812, "learning_rate": 0.00019432523136377506, "loss": 1.1305, "step": 285 }, { "epoch": 0.05814189875991055, "grad_norm": 0.1292058527469635, "learning_rate": 0.00019430489169124378, "loss": 1.227, "step": 286 }, { "epoch": 0.05834519211221793, "grad_norm": 0.1357347071170807, "learning_rate": 0.0001942845520187125, "loss": 1.0898, "step": 287 }, { "epoch": 0.05854848546452531, "grad_norm": 0.13546323776245117, "learning_rate": 0.00019426421234618123, "loss": 1.1308, "step": 288 }, { "epoch": 0.05875177881683269, "grad_norm": 0.12612831592559814, "learning_rate": 0.00019424387267364995, "loss": 1.1161, "step": 289 }, { "epoch": 0.05895507216914007, "grad_norm": 0.12061580270528793, "learning_rate": 0.00019422353300111868, "loss": 0.9395, "step": 290 }, { "epoch": 0.059158365521447445, "grad_norm": 0.12118272483348846, "learning_rate": 0.00019420319332858743, "loss": 1.0597, "step": 291 }, { "epoch": 0.05936165887375483, "grad_norm": 0.11357955634593964, "learning_rate": 0.00019418285365605615, "loss": 1.1651, "step": 292 }, { "epoch": 0.05956495222606221, "grad_norm": 0.11546896398067474, "learning_rate": 0.00019416251398352488, "loss": 0.9093, "step": 293 }, { "epoch": 0.059768245578369586, "grad_norm": 0.12699609994888306, "learning_rate": 0.0001941421743109936, "loss": 1.1878, "step": 294 }, { "epoch": 0.059971538930676964, "grad_norm": 0.11789494752883911, "learning_rate": 0.00019412183463846233, "loss": 1.0162, "step": 295 }, { "epoch": 0.06017483228298435, "grad_norm": 0.11362869292497635, "learning_rate": 0.00019410149496593105, "loss": 1.0056, "step": 296 }, { "epoch": 0.06037812563529173, "grad_norm": 0.125663161277771, "learning_rate": 0.00019408115529339978, "loss": 1.083, "step": 297 }, { "epoch": 0.060581418987599105, "grad_norm": 0.11303743720054626, "learning_rate": 0.0001940608156208685, "loss": 1.1573, "step": 298 }, { "epoch": 0.06078471233990648, "grad_norm": 0.11955615878105164, "learning_rate": 0.00019404047594833723, "loss": 0.9637, "step": 299 }, { "epoch": 0.06098800569221387, "grad_norm": 0.11959411948919296, "learning_rate": 0.00019402013627580598, "loss": 1.1023, "step": 300 }, { "epoch": 0.061191299044521245, "grad_norm": 0.1248716339468956, "learning_rate": 0.0001939997966032747, "loss": 1.2881, "step": 301 }, { "epoch": 0.06139459239682862, "grad_norm": 0.1136515811085701, "learning_rate": 0.00019397945693074343, "loss": 1.0921, "step": 302 }, { "epoch": 0.061597885749136, "grad_norm": 0.11583786457777023, "learning_rate": 0.00019395911725821215, "loss": 1.0758, "step": 303 }, { "epoch": 0.061801179101443386, "grad_norm": 0.12685681879520416, "learning_rate": 0.00019393877758568087, "loss": 1.2444, "step": 304 }, { "epoch": 0.062004472453750764, "grad_norm": 0.15549907088279724, "learning_rate": 0.0001939184379131496, "loss": 1.4011, "step": 305 }, { "epoch": 0.06220776580605814, "grad_norm": 0.11548073589801788, "learning_rate": 0.00019389809824061832, "loss": 1.0536, "step": 306 }, { "epoch": 0.06241105915836552, "grad_norm": 0.11526035517454147, "learning_rate": 0.00019387775856808705, "loss": 0.9051, "step": 307 }, { "epoch": 0.0626143525106729, "grad_norm": 0.10682015866041183, "learning_rate": 0.0001938574188955558, "loss": 0.9744, "step": 308 }, { "epoch": 0.06281764586298028, "grad_norm": 0.11594579368829727, "learning_rate": 0.00019383707922302452, "loss": 1.0771, "step": 309 }, { "epoch": 0.06302093921528766, "grad_norm": 0.11397954076528549, "learning_rate": 0.00019381673955049325, "loss": 0.9991, "step": 310 }, { "epoch": 0.06322423256759505, "grad_norm": 0.12746506929397583, "learning_rate": 0.00019379639987796197, "loss": 1.1141, "step": 311 }, { "epoch": 0.06342752591990242, "grad_norm": 0.11370940506458282, "learning_rate": 0.0001937760602054307, "loss": 0.9784, "step": 312 }, { "epoch": 0.0636308192722098, "grad_norm": 0.11094705015420914, "learning_rate": 0.00019375572053289942, "loss": 0.9092, "step": 313 }, { "epoch": 0.06383411262451717, "grad_norm": 0.12067949026823044, "learning_rate": 0.00019373538086036815, "loss": 1.0812, "step": 314 }, { "epoch": 0.06403740597682456, "grad_norm": 0.11797504872083664, "learning_rate": 0.00019371504118783687, "loss": 1.0659, "step": 315 }, { "epoch": 0.06424069932913194, "grad_norm": 0.10436304658651352, "learning_rate": 0.00019369470151530562, "loss": 1.0226, "step": 316 }, { "epoch": 0.06444399268143931, "grad_norm": 0.1373065710067749, "learning_rate": 0.00019367436184277435, "loss": 1.1967, "step": 317 }, { "epoch": 0.0646472860337467, "grad_norm": 0.12204968929290771, "learning_rate": 0.00019365402217024307, "loss": 1.138, "step": 318 }, { "epoch": 0.06485057938605407, "grad_norm": 0.11520784348249435, "learning_rate": 0.0001936336824977118, "loss": 1.0148, "step": 319 }, { "epoch": 0.06505387273836145, "grad_norm": 0.12380523979663849, "learning_rate": 0.00019361334282518052, "loss": 1.1577, "step": 320 }, { "epoch": 0.06525716609066884, "grad_norm": 0.12227565050125122, "learning_rate": 0.00019359300315264924, "loss": 1.0843, "step": 321 }, { "epoch": 0.06546045944297621, "grad_norm": 0.12836994230747223, "learning_rate": 0.00019357266348011797, "loss": 1.137, "step": 322 }, { "epoch": 0.0656637527952836, "grad_norm": 0.1091795489192009, "learning_rate": 0.0001935523238075867, "loss": 1.1794, "step": 323 }, { "epoch": 0.06586704614759098, "grad_norm": 0.11629168689250946, "learning_rate": 0.00019353198413505545, "loss": 1.1052, "step": 324 }, { "epoch": 0.06607033949989835, "grad_norm": 0.12525077164173126, "learning_rate": 0.00019351164446252417, "loss": 1.0891, "step": 325 }, { "epoch": 0.06627363285220574, "grad_norm": 0.12222876399755478, "learning_rate": 0.0001934913047899929, "loss": 1.2059, "step": 326 }, { "epoch": 0.0664769262045131, "grad_norm": 0.12146129459142685, "learning_rate": 0.00019347096511746162, "loss": 1.1067, "step": 327 }, { "epoch": 0.06668021955682049, "grad_norm": 0.11807144433259964, "learning_rate": 0.00019345062544493034, "loss": 1.0953, "step": 328 }, { "epoch": 0.06688351290912788, "grad_norm": 0.11966339498758316, "learning_rate": 0.00019343028577239907, "loss": 1.071, "step": 329 }, { "epoch": 0.06708680626143525, "grad_norm": 0.1203102245926857, "learning_rate": 0.0001934099460998678, "loss": 1.0197, "step": 330 }, { "epoch": 0.06729009961374263, "grad_norm": 0.1138140857219696, "learning_rate": 0.00019338960642733652, "loss": 0.9191, "step": 331 }, { "epoch": 0.06749339296605002, "grad_norm": 0.12846186757087708, "learning_rate": 0.00019336926675480527, "loss": 1.0951, "step": 332 }, { "epoch": 0.06769668631835739, "grad_norm": 0.12961986660957336, "learning_rate": 0.000193348927082274, "loss": 1.1309, "step": 333 }, { "epoch": 0.06789997967066477, "grad_norm": 0.12339945137500763, "learning_rate": 0.00019332858740974272, "loss": 1.0363, "step": 334 }, { "epoch": 0.06810327302297214, "grad_norm": 0.13385798037052155, "learning_rate": 0.00019330824773721144, "loss": 1.114, "step": 335 }, { "epoch": 0.06830656637527953, "grad_norm": 0.13270089030265808, "learning_rate": 0.00019328790806468017, "loss": 1.2388, "step": 336 }, { "epoch": 0.06850985972758691, "grad_norm": 0.11112480610609055, "learning_rate": 0.0001932675683921489, "loss": 1.1178, "step": 337 }, { "epoch": 0.06871315307989428, "grad_norm": 0.12246957421302795, "learning_rate": 0.00019324722871961761, "loss": 1.233, "step": 338 }, { "epoch": 0.06891644643220167, "grad_norm": 0.12208685278892517, "learning_rate": 0.00019322688904708634, "loss": 1.1145, "step": 339 }, { "epoch": 0.06911973978450904, "grad_norm": 0.11839979141950607, "learning_rate": 0.00019320654937455506, "loss": 0.986, "step": 340 }, { "epoch": 0.06932303313681643, "grad_norm": 0.13268662989139557, "learning_rate": 0.00019318620970202382, "loss": 1.0527, "step": 341 }, { "epoch": 0.06952632648912381, "grad_norm": 0.11831391602754593, "learning_rate": 0.00019316587002949254, "loss": 1.2318, "step": 342 }, { "epoch": 0.06972961984143118, "grad_norm": 0.11892188340425491, "learning_rate": 0.00019314553035696126, "loss": 1.2, "step": 343 }, { "epoch": 0.06993291319373857, "grad_norm": 0.13015909492969513, "learning_rate": 0.00019312519068443, "loss": 1.1622, "step": 344 }, { "epoch": 0.07013620654604595, "grad_norm": 0.10422676056623459, "learning_rate": 0.0001931048510118987, "loss": 0.9258, "step": 345 }, { "epoch": 0.07033949989835332, "grad_norm": 0.10162926465272903, "learning_rate": 0.00019308451133936744, "loss": 0.9709, "step": 346 }, { "epoch": 0.0705427932506607, "grad_norm": 0.12753081321716309, "learning_rate": 0.00019306417166683616, "loss": 1.1193, "step": 347 }, { "epoch": 0.07074608660296808, "grad_norm": 0.12309850752353668, "learning_rate": 0.0001930438319943049, "loss": 1.2146, "step": 348 }, { "epoch": 0.07094937995527546, "grad_norm": 0.13199441134929657, "learning_rate": 0.00019302349232177364, "loss": 1.1723, "step": 349 }, { "epoch": 0.07115267330758285, "grad_norm": 0.12041430175304413, "learning_rate": 0.00019300315264924236, "loss": 1.1115, "step": 350 }, { "epoch": 0.07135596665989022, "grad_norm": 0.11456899344921112, "learning_rate": 0.0001929828129767111, "loss": 1.0829, "step": 351 }, { "epoch": 0.0715592600121976, "grad_norm": 0.12147854268550873, "learning_rate": 0.0001929624733041798, "loss": 1.1963, "step": 352 }, { "epoch": 0.07176255336450497, "grad_norm": 0.13312789797782898, "learning_rate": 0.00019294213363164854, "loss": 1.1182, "step": 353 }, { "epoch": 0.07196584671681236, "grad_norm": 0.1078067272901535, "learning_rate": 0.00019292179395911726, "loss": 0.9415, "step": 354 }, { "epoch": 0.07216914006911974, "grad_norm": 0.1231444925069809, "learning_rate": 0.00019290145428658598, "loss": 1.2428, "step": 355 }, { "epoch": 0.07237243342142712, "grad_norm": 0.13848941028118134, "learning_rate": 0.0001928811146140547, "loss": 1.1953, "step": 356 }, { "epoch": 0.0725757267737345, "grad_norm": 0.11954299360513687, "learning_rate": 0.00019286077494152346, "loss": 0.9834, "step": 357 }, { "epoch": 0.07277902012604189, "grad_norm": 0.11029402166604996, "learning_rate": 0.00019284043526899219, "loss": 1.0635, "step": 358 }, { "epoch": 0.07298231347834926, "grad_norm": 0.11941875517368317, "learning_rate": 0.0001928200955964609, "loss": 1.1698, "step": 359 }, { "epoch": 0.07318560683065664, "grad_norm": 0.11633221805095673, "learning_rate": 0.00019279975592392963, "loss": 0.9411, "step": 360 }, { "epoch": 0.07338890018296401, "grad_norm": 0.11820893734693527, "learning_rate": 0.00019277941625139836, "loss": 1.0487, "step": 361 }, { "epoch": 0.0735921935352714, "grad_norm": 0.14069049060344696, "learning_rate": 0.00019275907657886708, "loss": 1.2295, "step": 362 }, { "epoch": 0.07379548688757878, "grad_norm": 0.12828344106674194, "learning_rate": 0.0001927387369063358, "loss": 1.1904, "step": 363 }, { "epoch": 0.07399878023988615, "grad_norm": 0.12259247899055481, "learning_rate": 0.00019271839723380453, "loss": 1.0655, "step": 364 }, { "epoch": 0.07420207359219354, "grad_norm": 0.12864744663238525, "learning_rate": 0.00019269805756127328, "loss": 1.207, "step": 365 }, { "epoch": 0.07440536694450091, "grad_norm": 0.1141364574432373, "learning_rate": 0.000192677717888742, "loss": 0.9853, "step": 366 }, { "epoch": 0.0746086602968083, "grad_norm": 0.10614699870347977, "learning_rate": 0.00019265737821621073, "loss": 1.0003, "step": 367 }, { "epoch": 0.07481195364911568, "grad_norm": 0.1159566193819046, "learning_rate": 0.00019263703854367946, "loss": 1.1753, "step": 368 }, { "epoch": 0.07501524700142305, "grad_norm": 0.11285501718521118, "learning_rate": 0.00019261669887114818, "loss": 1.0592, "step": 369 }, { "epoch": 0.07521854035373043, "grad_norm": 0.11360286176204681, "learning_rate": 0.0001925963591986169, "loss": 0.9719, "step": 370 }, { "epoch": 0.07542183370603782, "grad_norm": 0.1143144741654396, "learning_rate": 0.00019257601952608563, "loss": 1.0623, "step": 371 }, { "epoch": 0.07562512705834519, "grad_norm": 0.11664289981126785, "learning_rate": 0.00019255567985355436, "loss": 0.9849, "step": 372 }, { "epoch": 0.07582842041065257, "grad_norm": 0.11677186191082001, "learning_rate": 0.0001925353401810231, "loss": 0.9926, "step": 373 }, { "epoch": 0.07603171376295995, "grad_norm": 0.12509550154209137, "learning_rate": 0.00019251500050849183, "loss": 1.1648, "step": 374 }, { "epoch": 0.07623500711526733, "grad_norm": 0.13659395277500153, "learning_rate": 0.00019249466083596056, "loss": 1.2005, "step": 375 }, { "epoch": 0.07643830046757472, "grad_norm": 0.11500003188848495, "learning_rate": 0.00019247432116342928, "loss": 1.1287, "step": 376 }, { "epoch": 0.07664159381988209, "grad_norm": 0.11376544088125229, "learning_rate": 0.000192453981490898, "loss": 1.0013, "step": 377 }, { "epoch": 0.07684488717218947, "grad_norm": 0.13335828483104706, "learning_rate": 0.00019243364181836673, "loss": 1.1969, "step": 378 }, { "epoch": 0.07704818052449684, "grad_norm": 0.1245710700750351, "learning_rate": 0.00019241330214583545, "loss": 1.2461, "step": 379 }, { "epoch": 0.07725147387680423, "grad_norm": 0.12159935384988785, "learning_rate": 0.00019239296247330418, "loss": 1.0066, "step": 380 }, { "epoch": 0.07745476722911161, "grad_norm": 0.1263132244348526, "learning_rate": 0.0001923726228007729, "loss": 1.1993, "step": 381 }, { "epoch": 0.07765806058141898, "grad_norm": 0.11738517135381699, "learning_rate": 0.00019235228312824165, "loss": 1.19, "step": 382 }, { "epoch": 0.07786135393372637, "grad_norm": 0.13438478112220764, "learning_rate": 0.00019233194345571038, "loss": 1.1794, "step": 383 }, { "epoch": 0.07806464728603375, "grad_norm": 0.1180570125579834, "learning_rate": 0.0001923116037831791, "loss": 1.0685, "step": 384 }, { "epoch": 0.07826794063834112, "grad_norm": 0.13014809787273407, "learning_rate": 0.00019229126411064783, "loss": 1.1838, "step": 385 }, { "epoch": 0.07847123399064851, "grad_norm": 0.12478948384523392, "learning_rate": 0.00019227092443811655, "loss": 1.1978, "step": 386 }, { "epoch": 0.07867452734295588, "grad_norm": 0.10319990664720535, "learning_rate": 0.00019225058476558528, "loss": 1.1273, "step": 387 }, { "epoch": 0.07887782069526326, "grad_norm": 0.11172400414943695, "learning_rate": 0.000192230245093054, "loss": 0.9054, "step": 388 }, { "epoch": 0.07908111404757065, "grad_norm": 0.12951341271400452, "learning_rate": 0.00019220990542052273, "loss": 1.1554, "step": 389 }, { "epoch": 0.07928440739987802, "grad_norm": 0.13350042700767517, "learning_rate": 0.00019218956574799148, "loss": 1.1787, "step": 390 }, { "epoch": 0.0794877007521854, "grad_norm": 0.11068174242973328, "learning_rate": 0.0001921692260754602, "loss": 1.1072, "step": 391 }, { "epoch": 0.07969099410449278, "grad_norm": 0.09952767938375473, "learning_rate": 0.00019214888640292893, "loss": 1.0071, "step": 392 }, { "epoch": 0.07989428745680016, "grad_norm": 0.10815319418907166, "learning_rate": 0.00019212854673039765, "loss": 0.8681, "step": 393 }, { "epoch": 0.08009758080910755, "grad_norm": 0.1121988445520401, "learning_rate": 0.00019210820705786637, "loss": 0.987, "step": 394 }, { "epoch": 0.08030087416141492, "grad_norm": 0.10137449204921722, "learning_rate": 0.0001920878673853351, "loss": 0.8968, "step": 395 }, { "epoch": 0.0805041675137223, "grad_norm": 0.09827956557273865, "learning_rate": 0.00019206752771280382, "loss": 0.8864, "step": 396 }, { "epoch": 0.08070746086602969, "grad_norm": 0.11967012286186218, "learning_rate": 0.00019204718804027255, "loss": 1.085, "step": 397 }, { "epoch": 0.08091075421833706, "grad_norm": 0.11249358206987381, "learning_rate": 0.0001920268483677413, "loss": 1.0201, "step": 398 }, { "epoch": 0.08111404757064444, "grad_norm": 0.12788376212120056, "learning_rate": 0.00019200650869521002, "loss": 1.0529, "step": 399 }, { "epoch": 0.08131734092295181, "grad_norm": 0.11879412829875946, "learning_rate": 0.00019198616902267875, "loss": 1.0418, "step": 400 }, { "epoch": 0.0815206342752592, "grad_norm": 0.11404243856668472, "learning_rate": 0.00019196582935014747, "loss": 0.9311, "step": 401 }, { "epoch": 0.08172392762756658, "grad_norm": 0.13113105297088623, "learning_rate": 0.0001919454896776162, "loss": 1.2886, "step": 402 }, { "epoch": 0.08192722097987395, "grad_norm": 0.12636548280715942, "learning_rate": 0.00019192515000508492, "loss": 1.0967, "step": 403 }, { "epoch": 0.08213051433218134, "grad_norm": 0.1245994120836258, "learning_rate": 0.00019190481033255365, "loss": 1.0426, "step": 404 }, { "epoch": 0.08233380768448871, "grad_norm": 0.12495577335357666, "learning_rate": 0.00019188447066002237, "loss": 1.1212, "step": 405 }, { "epoch": 0.0825371010367961, "grad_norm": 0.112003855407238, "learning_rate": 0.00019186413098749112, "loss": 0.9948, "step": 406 }, { "epoch": 0.08274039438910348, "grad_norm": 0.11918698996305466, "learning_rate": 0.00019184379131495985, "loss": 1.0927, "step": 407 }, { "epoch": 0.08294368774141085, "grad_norm": 0.11620672792196274, "learning_rate": 0.00019182345164242857, "loss": 1.0805, "step": 408 }, { "epoch": 0.08314698109371824, "grad_norm": 0.12570421397686005, "learning_rate": 0.0001918031119698973, "loss": 1.1484, "step": 409 }, { "epoch": 0.08335027444602562, "grad_norm": 0.12078004330396652, "learning_rate": 0.00019178277229736602, "loss": 1.248, "step": 410 }, { "epoch": 0.08355356779833299, "grad_norm": 0.1178092435002327, "learning_rate": 0.00019176243262483474, "loss": 1.1365, "step": 411 }, { "epoch": 0.08375686115064038, "grad_norm": 0.13181130588054657, "learning_rate": 0.00019174209295230347, "loss": 1.335, "step": 412 }, { "epoch": 0.08396015450294775, "grad_norm": 0.1192195788025856, "learning_rate": 0.0001917217532797722, "loss": 1.119, "step": 413 }, { "epoch": 0.08416344785525513, "grad_norm": 0.12525242567062378, "learning_rate": 0.00019170141360724095, "loss": 1.2269, "step": 414 }, { "epoch": 0.08436674120756252, "grad_norm": 0.12473724037408829, "learning_rate": 0.00019168107393470967, "loss": 1.2479, "step": 415 }, { "epoch": 0.08457003455986989, "grad_norm": 0.1118764728307724, "learning_rate": 0.0001916607342621784, "loss": 1.0089, "step": 416 }, { "epoch": 0.08477332791217727, "grad_norm": 0.11220741271972656, "learning_rate": 0.00019164039458964712, "loss": 0.9793, "step": 417 }, { "epoch": 0.08497662126448464, "grad_norm": 0.1261814385652542, "learning_rate": 0.00019162005491711584, "loss": 1.092, "step": 418 }, { "epoch": 0.08517991461679203, "grad_norm": 0.12782973051071167, "learning_rate": 0.00019159971524458457, "loss": 1.08, "step": 419 }, { "epoch": 0.08538320796909941, "grad_norm": 0.12007841467857361, "learning_rate": 0.0001915793755720533, "loss": 1.0856, "step": 420 }, { "epoch": 0.08558650132140679, "grad_norm": 0.1249847337603569, "learning_rate": 0.00019155903589952202, "loss": 1.1314, "step": 421 }, { "epoch": 0.08578979467371417, "grad_norm": 0.10619431734085083, "learning_rate": 0.00019153869622699074, "loss": 1.0298, "step": 422 }, { "epoch": 0.08599308802602156, "grad_norm": 0.12282367795705795, "learning_rate": 0.0001915183565544595, "loss": 1.1277, "step": 423 }, { "epoch": 0.08619638137832893, "grad_norm": 0.12001215666532516, "learning_rate": 0.00019149801688192822, "loss": 1.0792, "step": 424 }, { "epoch": 0.08639967473063631, "grad_norm": 0.10283269733190536, "learning_rate": 0.00019147767720939694, "loss": 0.9422, "step": 425 }, { "epoch": 0.08660296808294368, "grad_norm": 0.11698923259973526, "learning_rate": 0.00019145733753686567, "loss": 1.0371, "step": 426 }, { "epoch": 0.08680626143525107, "grad_norm": 0.11874233931303024, "learning_rate": 0.0001914369978643344, "loss": 1.042, "step": 427 }, { "epoch": 0.08700955478755845, "grad_norm": 0.10154362767934799, "learning_rate": 0.00019141665819180311, "loss": 0.9436, "step": 428 }, { "epoch": 0.08721284813986582, "grad_norm": 0.10885417461395264, "learning_rate": 0.00019139631851927184, "loss": 1.0823, "step": 429 }, { "epoch": 0.08741614149217321, "grad_norm": 0.11313669383525848, "learning_rate": 0.00019137597884674056, "loss": 1.0905, "step": 430 }, { "epoch": 0.08761943484448058, "grad_norm": 0.12074249237775803, "learning_rate": 0.00019135563917420932, "loss": 1.1466, "step": 431 }, { "epoch": 0.08782272819678796, "grad_norm": 0.12890012562274933, "learning_rate": 0.00019133529950167804, "loss": 1.1222, "step": 432 }, { "epoch": 0.08802602154909535, "grad_norm": 0.12527287006378174, "learning_rate": 0.00019131495982914676, "loss": 1.0391, "step": 433 }, { "epoch": 0.08822931490140272, "grad_norm": 0.11698780208826065, "learning_rate": 0.0001912946201566155, "loss": 0.9235, "step": 434 }, { "epoch": 0.0884326082537101, "grad_norm": 0.11191095411777496, "learning_rate": 0.0001912742804840842, "loss": 0.9763, "step": 435 }, { "epoch": 0.08863590160601749, "grad_norm": 0.1118699237704277, "learning_rate": 0.00019125394081155294, "loss": 0.9919, "step": 436 }, { "epoch": 0.08883919495832486, "grad_norm": 0.10507287830114365, "learning_rate": 0.00019123360113902166, "loss": 0.8505, "step": 437 }, { "epoch": 0.08904248831063225, "grad_norm": 0.1091250404715538, "learning_rate": 0.00019121326146649039, "loss": 0.9453, "step": 438 }, { "epoch": 0.08924578166293962, "grad_norm": 0.10213371366262436, "learning_rate": 0.00019119292179395914, "loss": 0.9082, "step": 439 }, { "epoch": 0.089449075015247, "grad_norm": 0.1446637064218521, "learning_rate": 0.00019117258212142786, "loss": 1.2307, "step": 440 }, { "epoch": 0.08965236836755439, "grad_norm": 0.13018859922885895, "learning_rate": 0.0001911522424488966, "loss": 1.1052, "step": 441 }, { "epoch": 0.08985566171986176, "grad_norm": 0.1239272952079773, "learning_rate": 0.0001911319027763653, "loss": 1.0036, "step": 442 }, { "epoch": 0.09005895507216914, "grad_norm": 0.1135847195982933, "learning_rate": 0.00019111156310383404, "loss": 1.0524, "step": 443 }, { "epoch": 0.09026224842447651, "grad_norm": 0.1171732023358345, "learning_rate": 0.00019109122343130276, "loss": 1.1769, "step": 444 }, { "epoch": 0.0904655417767839, "grad_norm": 0.12947380542755127, "learning_rate": 0.00019107088375877148, "loss": 1.2071, "step": 445 }, { "epoch": 0.09066883512909128, "grad_norm": 0.1240135133266449, "learning_rate": 0.0001910505440862402, "loss": 1.0782, "step": 446 }, { "epoch": 0.09087212848139865, "grad_norm": 0.1232561394572258, "learning_rate": 0.00019103020441370896, "loss": 1.0068, "step": 447 }, { "epoch": 0.09107542183370604, "grad_norm": 0.11200708150863647, "learning_rate": 0.00019100986474117769, "loss": 0.9491, "step": 448 }, { "epoch": 0.09127871518601342, "grad_norm": 0.1400870531797409, "learning_rate": 0.0001909895250686464, "loss": 1.3197, "step": 449 }, { "epoch": 0.0914820085383208, "grad_norm": 0.12712709605693817, "learning_rate": 0.00019096918539611513, "loss": 1.1853, "step": 450 }, { "epoch": 0.09168530189062818, "grad_norm": 0.11399099975824356, "learning_rate": 0.00019094884572358386, "loss": 0.8887, "step": 451 }, { "epoch": 0.09188859524293555, "grad_norm": 0.10861057788133621, "learning_rate": 0.00019092850605105258, "loss": 0.9224, "step": 452 }, { "epoch": 0.09209188859524294, "grad_norm": 0.12274569272994995, "learning_rate": 0.0001909081663785213, "loss": 1.1491, "step": 453 }, { "epoch": 0.09229518194755032, "grad_norm": 0.11641780287027359, "learning_rate": 0.00019088782670599003, "loss": 1.1646, "step": 454 }, { "epoch": 0.09249847529985769, "grad_norm": 0.1300159990787506, "learning_rate": 0.00019086748703345878, "loss": 1.1239, "step": 455 }, { "epoch": 0.09270176865216508, "grad_norm": 0.12116070836782455, "learning_rate": 0.0001908471473609275, "loss": 1.0475, "step": 456 }, { "epoch": 0.09290506200447246, "grad_norm": 0.11318276822566986, "learning_rate": 0.00019082680768839623, "loss": 1.0162, "step": 457 }, { "epoch": 0.09310835535677983, "grad_norm": 0.10791938006877899, "learning_rate": 0.00019080646801586496, "loss": 0.9874, "step": 458 }, { "epoch": 0.09331164870908722, "grad_norm": 0.10658224672079086, "learning_rate": 0.00019078612834333368, "loss": 0.9483, "step": 459 }, { "epoch": 0.09351494206139459, "grad_norm": 0.12912395596504211, "learning_rate": 0.0001907657886708024, "loss": 1.2248, "step": 460 }, { "epoch": 0.09371823541370197, "grad_norm": 0.1268775314092636, "learning_rate": 0.00019074544899827113, "loss": 1.079, "step": 461 }, { "epoch": 0.09392152876600936, "grad_norm": 0.11810900270938873, "learning_rate": 0.00019072510932573985, "loss": 1.1856, "step": 462 }, { "epoch": 0.09412482211831673, "grad_norm": 0.13081328570842743, "learning_rate": 0.00019070476965320858, "loss": 1.125, "step": 463 }, { "epoch": 0.09432811547062411, "grad_norm": 0.11875245720148087, "learning_rate": 0.00019068442998067733, "loss": 1.1341, "step": 464 }, { "epoch": 0.09453140882293148, "grad_norm": 0.10965297371149063, "learning_rate": 0.00019066409030814606, "loss": 0.9892, "step": 465 }, { "epoch": 0.09473470217523887, "grad_norm": 0.1167355626821518, "learning_rate": 0.00019064375063561478, "loss": 1.1234, "step": 466 }, { "epoch": 0.09493799552754625, "grad_norm": 0.1092626228928566, "learning_rate": 0.0001906234109630835, "loss": 0.9734, "step": 467 }, { "epoch": 0.09514128887985362, "grad_norm": 0.12768998742103577, "learning_rate": 0.00019060307129055223, "loss": 1.1349, "step": 468 }, { "epoch": 0.09534458223216101, "grad_norm": 0.13227547705173492, "learning_rate": 0.00019058273161802095, "loss": 1.2362, "step": 469 }, { "epoch": 0.0955478755844684, "grad_norm": 0.11458224058151245, "learning_rate": 0.00019056239194548968, "loss": 0.9707, "step": 470 }, { "epoch": 0.09575116893677577, "grad_norm": 0.11045580357313156, "learning_rate": 0.0001905420522729584, "loss": 0.973, "step": 471 }, { "epoch": 0.09595446228908315, "grad_norm": 0.1274811327457428, "learning_rate": 0.00019052171260042715, "loss": 1.2641, "step": 472 }, { "epoch": 0.09615775564139052, "grad_norm": 0.11694994568824768, "learning_rate": 0.00019050137292789588, "loss": 0.9362, "step": 473 }, { "epoch": 0.0963610489936979, "grad_norm": 0.11511142551898956, "learning_rate": 0.0001904810332553646, "loss": 1.0127, "step": 474 }, { "epoch": 0.09656434234600529, "grad_norm": 0.1253817081451416, "learning_rate": 0.00019046069358283333, "loss": 1.0489, "step": 475 }, { "epoch": 0.09676763569831266, "grad_norm": 0.11795701086521149, "learning_rate": 0.00019044035391030205, "loss": 1.0528, "step": 476 }, { "epoch": 0.09697092905062005, "grad_norm": 0.12703485786914825, "learning_rate": 0.00019042001423777078, "loss": 1.2692, "step": 477 }, { "epoch": 0.09717422240292742, "grad_norm": 0.12391920387744904, "learning_rate": 0.0001903996745652395, "loss": 1.0765, "step": 478 }, { "epoch": 0.0973775157552348, "grad_norm": 0.12939028441905975, "learning_rate": 0.00019037933489270822, "loss": 1.2686, "step": 479 }, { "epoch": 0.09758080910754219, "grad_norm": 0.11955651640892029, "learning_rate": 0.00019035899522017698, "loss": 1.0179, "step": 480 }, { "epoch": 0.09778410245984956, "grad_norm": 0.11481709033250809, "learning_rate": 0.0001903386555476457, "loss": 1.1008, "step": 481 }, { "epoch": 0.09798739581215694, "grad_norm": 0.12216270714998245, "learning_rate": 0.00019031831587511443, "loss": 1.2387, "step": 482 }, { "epoch": 0.09819068916446433, "grad_norm": 0.10991356521844864, "learning_rate": 0.00019029797620258315, "loss": 0.9913, "step": 483 }, { "epoch": 0.0983939825167717, "grad_norm": 0.11534951627254486, "learning_rate": 0.00019027763653005187, "loss": 0.9248, "step": 484 }, { "epoch": 0.09859727586907908, "grad_norm": 0.11887869983911514, "learning_rate": 0.0001902572968575206, "loss": 1.065, "step": 485 }, { "epoch": 0.09880056922138646, "grad_norm": 0.12391136586666107, "learning_rate": 0.00019023695718498932, "loss": 1.1692, "step": 486 }, { "epoch": 0.09900386257369384, "grad_norm": 0.10672067850828171, "learning_rate": 0.00019021661751245805, "loss": 1.154, "step": 487 }, { "epoch": 0.09920715592600123, "grad_norm": 0.14061135053634644, "learning_rate": 0.0001901962778399268, "loss": 1.168, "step": 488 }, { "epoch": 0.0994104492783086, "grad_norm": 0.11371248215436935, "learning_rate": 0.00019017593816739552, "loss": 0.9905, "step": 489 }, { "epoch": 0.09961374263061598, "grad_norm": 0.11754601448774338, "learning_rate": 0.00019015559849486425, "loss": 1.01, "step": 490 }, { "epoch": 0.09981703598292335, "grad_norm": 0.12492667138576508, "learning_rate": 0.00019013525882233297, "loss": 1.1024, "step": 491 }, { "epoch": 0.10002032933523074, "grad_norm": 0.12676015496253967, "learning_rate": 0.0001901149191498017, "loss": 1.3976, "step": 492 }, { "epoch": 0.10022362268753812, "grad_norm": 0.13545620441436768, "learning_rate": 0.00019009457947727042, "loss": 1.1542, "step": 493 }, { "epoch": 0.10042691603984549, "grad_norm": 0.12883707880973816, "learning_rate": 0.00019007423980473915, "loss": 1.1068, "step": 494 }, { "epoch": 0.10063020939215288, "grad_norm": 0.11707032471895218, "learning_rate": 0.00019005390013220787, "loss": 0.9906, "step": 495 }, { "epoch": 0.10083350274446026, "grad_norm": 0.13158461451530457, "learning_rate": 0.00019003356045967662, "loss": 1.1453, "step": 496 }, { "epoch": 0.10103679609676763, "grad_norm": 0.1244715005159378, "learning_rate": 0.00019001322078714535, "loss": 1.1244, "step": 497 }, { "epoch": 0.10124008944907502, "grad_norm": 0.12620943784713745, "learning_rate": 0.00018999288111461407, "loss": 1.1018, "step": 498 }, { "epoch": 0.10144338280138239, "grad_norm": 0.1192685067653656, "learning_rate": 0.0001899725414420828, "loss": 1.1069, "step": 499 }, { "epoch": 0.10164667615368977, "grad_norm": 0.12764599919319153, "learning_rate": 0.00018995220176955152, "loss": 1.2122, "step": 500 }, { "epoch": 0.10184996950599716, "grad_norm": 0.12098994851112366, "learning_rate": 0.00018993186209702024, "loss": 1.1113, "step": 501 }, { "epoch": 0.10205326285830453, "grad_norm": 0.14677678048610687, "learning_rate": 0.00018991152242448897, "loss": 1.399, "step": 502 }, { "epoch": 0.10225655621061192, "grad_norm": 0.1371246576309204, "learning_rate": 0.0001898911827519577, "loss": 1.2582, "step": 503 }, { "epoch": 0.10245984956291929, "grad_norm": 0.11643920093774796, "learning_rate": 0.00018987084307942642, "loss": 1.0707, "step": 504 }, { "epoch": 0.10266314291522667, "grad_norm": 0.1150643602013588, "learning_rate": 0.00018985050340689517, "loss": 1.0886, "step": 505 }, { "epoch": 0.10286643626753406, "grad_norm": 0.10518593341112137, "learning_rate": 0.0001898301637343639, "loss": 0.8955, "step": 506 }, { "epoch": 0.10306972961984143, "grad_norm": 0.11445560306310654, "learning_rate": 0.00018980982406183262, "loss": 0.93, "step": 507 }, { "epoch": 0.10327302297214881, "grad_norm": 0.11920091509819031, "learning_rate": 0.00018978948438930134, "loss": 1.0148, "step": 508 }, { "epoch": 0.1034763163244562, "grad_norm": 0.12822504341602325, "learning_rate": 0.00018976914471677007, "loss": 1.2004, "step": 509 }, { "epoch": 0.10367960967676357, "grad_norm": 0.12469658255577087, "learning_rate": 0.0001897488050442388, "loss": 1.0902, "step": 510 }, { "epoch": 0.10388290302907095, "grad_norm": 0.12136801332235336, "learning_rate": 0.00018972846537170752, "loss": 1.059, "step": 511 }, { "epoch": 0.10408619638137832, "grad_norm": 0.10618099570274353, "learning_rate": 0.00018970812569917624, "loss": 0.9972, "step": 512 }, { "epoch": 0.10428948973368571, "grad_norm": 0.12111090868711472, "learning_rate": 0.000189687786026645, "loss": 1.0789, "step": 513 }, { "epoch": 0.1044927830859931, "grad_norm": 0.1108577698469162, "learning_rate": 0.00018966744635411372, "loss": 0.9024, "step": 514 }, { "epoch": 0.10469607643830046, "grad_norm": 0.1184157282114029, "learning_rate": 0.00018964710668158244, "loss": 0.9548, "step": 515 }, { "epoch": 0.10489936979060785, "grad_norm": 0.1288694143295288, "learning_rate": 0.00018962676700905117, "loss": 1.2128, "step": 516 }, { "epoch": 0.10510266314291522, "grad_norm": 0.12015259265899658, "learning_rate": 0.0001896064273365199, "loss": 1.1964, "step": 517 }, { "epoch": 0.1053059564952226, "grad_norm": 0.13204379379749298, "learning_rate": 0.00018958608766398861, "loss": 1.0473, "step": 518 }, { "epoch": 0.10550924984752999, "grad_norm": 0.11321057379245758, "learning_rate": 0.00018956574799145734, "loss": 1.0961, "step": 519 }, { "epoch": 0.10571254319983736, "grad_norm": 0.13245680928230286, "learning_rate": 0.00018954540831892606, "loss": 1.0835, "step": 520 }, { "epoch": 0.10591583655214475, "grad_norm": 0.12220027297735214, "learning_rate": 0.00018952506864639481, "loss": 1.1246, "step": 521 }, { "epoch": 0.10611912990445213, "grad_norm": 0.11933163553476334, "learning_rate": 0.00018950472897386354, "loss": 1.0739, "step": 522 }, { "epoch": 0.1063224232567595, "grad_norm": 0.14022572338581085, "learning_rate": 0.00018948438930133226, "loss": 1.1557, "step": 523 }, { "epoch": 0.10652571660906689, "grad_norm": 0.13287031650543213, "learning_rate": 0.000189464049628801, "loss": 1.2597, "step": 524 }, { "epoch": 0.10672900996137426, "grad_norm": 0.11653829365968704, "learning_rate": 0.0001894437099562697, "loss": 0.9564, "step": 525 }, { "epoch": 0.10693230331368164, "grad_norm": 0.11488767713308334, "learning_rate": 0.00018942337028373844, "loss": 0.9883, "step": 526 }, { "epoch": 0.10713559666598903, "grad_norm": 0.11149357259273529, "learning_rate": 0.00018940303061120716, "loss": 1.0004, "step": 527 }, { "epoch": 0.1073388900182964, "grad_norm": 0.11848779767751694, "learning_rate": 0.00018938269093867589, "loss": 1.0462, "step": 528 }, { "epoch": 0.10754218337060378, "grad_norm": 0.11932095140218735, "learning_rate": 0.00018936235126614464, "loss": 1.1001, "step": 529 }, { "epoch": 0.10774547672291115, "grad_norm": 0.11937075853347778, "learning_rate": 0.00018934201159361336, "loss": 1.2032, "step": 530 }, { "epoch": 0.10794877007521854, "grad_norm": 0.10601939260959625, "learning_rate": 0.00018932167192108209, "loss": 0.9689, "step": 531 }, { "epoch": 0.10815206342752592, "grad_norm": 0.11901092529296875, "learning_rate": 0.0001893013322485508, "loss": 1.1913, "step": 532 }, { "epoch": 0.1083553567798333, "grad_norm": 0.1308038979768753, "learning_rate": 0.00018928099257601954, "loss": 1.2393, "step": 533 }, { "epoch": 0.10855865013214068, "grad_norm": 0.1222740039229393, "learning_rate": 0.00018926065290348826, "loss": 0.9574, "step": 534 }, { "epoch": 0.10876194348444806, "grad_norm": 0.12856149673461914, "learning_rate": 0.00018924031323095698, "loss": 1.1296, "step": 535 }, { "epoch": 0.10896523683675544, "grad_norm": 0.12045751512050629, "learning_rate": 0.0001892199735584257, "loss": 1.153, "step": 536 }, { "epoch": 0.10916853018906282, "grad_norm": 0.11606315523386002, "learning_rate": 0.00018919963388589446, "loss": 0.9028, "step": 537 }, { "epoch": 0.10937182354137019, "grad_norm": 0.10877380520105362, "learning_rate": 0.00018917929421336318, "loss": 0.9954, "step": 538 }, { "epoch": 0.10957511689367758, "grad_norm": 0.10476227104663849, "learning_rate": 0.0001891589545408319, "loss": 0.9486, "step": 539 }, { "epoch": 0.10977841024598496, "grad_norm": 0.12538990378379822, "learning_rate": 0.00018913861486830063, "loss": 1.1749, "step": 540 }, { "epoch": 0.10998170359829233, "grad_norm": 0.13290320336818695, "learning_rate": 0.00018911827519576936, "loss": 1.0933, "step": 541 }, { "epoch": 0.11018499695059972, "grad_norm": 0.11773636192083359, "learning_rate": 0.00018909793552323808, "loss": 1.0693, "step": 542 }, { "epoch": 0.11038829030290709, "grad_norm": 0.11466556787490845, "learning_rate": 0.0001890775958507068, "loss": 1.0589, "step": 543 }, { "epoch": 0.11059158365521447, "grad_norm": 0.1275825798511505, "learning_rate": 0.00018905725617817553, "loss": 1.0582, "step": 544 }, { "epoch": 0.11079487700752186, "grad_norm": 0.1283504068851471, "learning_rate": 0.00018903691650564428, "loss": 1.1702, "step": 545 }, { "epoch": 0.11099817035982923, "grad_norm": 0.13250254094600677, "learning_rate": 0.000189016576833113, "loss": 1.1467, "step": 546 }, { "epoch": 0.11120146371213661, "grad_norm": 0.15396709740161896, "learning_rate": 0.00018899623716058173, "loss": 1.1299, "step": 547 }, { "epoch": 0.111404757064444, "grad_norm": 0.13014012575149536, "learning_rate": 0.00018897589748805046, "loss": 1.1633, "step": 548 }, { "epoch": 0.11160805041675137, "grad_norm": 0.11697974056005478, "learning_rate": 0.00018895555781551918, "loss": 1.1052, "step": 549 }, { "epoch": 0.11181134376905875, "grad_norm": 0.13976189494132996, "learning_rate": 0.0001889352181429879, "loss": 1.1199, "step": 550 }, { "epoch": 0.11201463712136613, "grad_norm": 0.13051995635032654, "learning_rate": 0.00018891487847045663, "loss": 1.2532, "step": 551 }, { "epoch": 0.11221793047367351, "grad_norm": 0.11212155967950821, "learning_rate": 0.00018889453879792535, "loss": 0.9873, "step": 552 }, { "epoch": 0.1124212238259809, "grad_norm": 0.1334063857793808, "learning_rate": 0.00018887419912539408, "loss": 1.1102, "step": 553 }, { "epoch": 0.11262451717828827, "grad_norm": 0.1290140599012375, "learning_rate": 0.00018885385945286283, "loss": 0.9598, "step": 554 }, { "epoch": 0.11282781053059565, "grad_norm": 0.12794511020183563, "learning_rate": 0.00018883351978033155, "loss": 1.2875, "step": 555 }, { "epoch": 0.11303110388290302, "grad_norm": 0.11270211637020111, "learning_rate": 0.00018881318010780028, "loss": 0.9414, "step": 556 }, { "epoch": 0.11323439723521041, "grad_norm": 0.12074756622314453, "learning_rate": 0.000188792840435269, "loss": 1.0734, "step": 557 }, { "epoch": 0.11343769058751779, "grad_norm": 0.11245666444301605, "learning_rate": 0.00018877250076273773, "loss": 1.2024, "step": 558 }, { "epoch": 0.11364098393982516, "grad_norm": 0.10953640192747116, "learning_rate": 0.00018875216109020645, "loss": 0.9572, "step": 559 }, { "epoch": 0.11384427729213255, "grad_norm": 0.11975332349538803, "learning_rate": 0.00018873182141767518, "loss": 1.1559, "step": 560 }, { "epoch": 0.11404757064443993, "grad_norm": 0.10940812528133392, "learning_rate": 0.0001887114817451439, "loss": 0.926, "step": 561 }, { "epoch": 0.1142508639967473, "grad_norm": 0.139595165848732, "learning_rate": 0.00018869114207261265, "loss": 1.3275, "step": 562 }, { "epoch": 0.11445415734905469, "grad_norm": 0.10891355574131012, "learning_rate": 0.00018867080240008138, "loss": 0.9736, "step": 563 }, { "epoch": 0.11465745070136206, "grad_norm": 0.1192033439874649, "learning_rate": 0.0001886504627275501, "loss": 0.9881, "step": 564 }, { "epoch": 0.11486074405366944, "grad_norm": 0.12635888159275055, "learning_rate": 0.00018863012305501883, "loss": 1.1483, "step": 565 }, { "epoch": 0.11506403740597683, "grad_norm": 0.13440972566604614, "learning_rate": 0.00018860978338248755, "loss": 1.0852, "step": 566 }, { "epoch": 0.1152673307582842, "grad_norm": 0.12328968942165375, "learning_rate": 0.00018858944370995628, "loss": 1.1048, "step": 567 }, { "epoch": 0.11547062411059159, "grad_norm": 0.12037025392055511, "learning_rate": 0.000188569104037425, "loss": 1.1171, "step": 568 }, { "epoch": 0.11567391746289896, "grad_norm": 0.11991129070520401, "learning_rate": 0.00018854876436489372, "loss": 1.0827, "step": 569 }, { "epoch": 0.11587721081520634, "grad_norm": 0.11372412741184235, "learning_rate": 0.00018852842469236248, "loss": 0.9993, "step": 570 }, { "epoch": 0.11608050416751373, "grad_norm": 0.10992924124002457, "learning_rate": 0.0001885080850198312, "loss": 0.9812, "step": 571 }, { "epoch": 0.1162837975198211, "grad_norm": 0.11675936728715897, "learning_rate": 0.00018848774534729992, "loss": 0.9844, "step": 572 }, { "epoch": 0.11648709087212848, "grad_norm": 0.10757414996623993, "learning_rate": 0.00018846740567476865, "loss": 1.0607, "step": 573 }, { "epoch": 0.11669038422443587, "grad_norm": 0.11255379766225815, "learning_rate": 0.00018844706600223737, "loss": 1.1638, "step": 574 }, { "epoch": 0.11689367757674324, "grad_norm": 0.10737176239490509, "learning_rate": 0.0001884267263297061, "loss": 1.0055, "step": 575 }, { "epoch": 0.11709697092905062, "grad_norm": 0.1193508729338646, "learning_rate": 0.00018840638665717482, "loss": 1.0924, "step": 576 }, { "epoch": 0.117300264281358, "grad_norm": 0.12564769387245178, "learning_rate": 0.00018838604698464355, "loss": 1.3088, "step": 577 }, { "epoch": 0.11750355763366538, "grad_norm": 0.12675485014915466, "learning_rate": 0.0001883657073121123, "loss": 1.0682, "step": 578 }, { "epoch": 0.11770685098597276, "grad_norm": 0.12016987055540085, "learning_rate": 0.00018834536763958102, "loss": 0.9511, "step": 579 }, { "epoch": 0.11791014433828013, "grad_norm": 0.11664092540740967, "learning_rate": 0.00018832502796704975, "loss": 1.0758, "step": 580 }, { "epoch": 0.11811343769058752, "grad_norm": 0.11402445286512375, "learning_rate": 0.00018830468829451847, "loss": 1.0959, "step": 581 }, { "epoch": 0.11831673104289489, "grad_norm": 0.12505365908145905, "learning_rate": 0.0001882843486219872, "loss": 1.1621, "step": 582 }, { "epoch": 0.11852002439520228, "grad_norm": 0.13434186577796936, "learning_rate": 0.00018826400894945592, "loss": 1.273, "step": 583 }, { "epoch": 0.11872331774750966, "grad_norm": 0.1284523904323578, "learning_rate": 0.00018824366927692465, "loss": 1.1161, "step": 584 }, { "epoch": 0.11892661109981703, "grad_norm": 0.1141962930560112, "learning_rate": 0.00018822332960439337, "loss": 1.012, "step": 585 }, { "epoch": 0.11912990445212442, "grad_norm": 0.1280459314584732, "learning_rate": 0.00018820298993186212, "loss": 1.1797, "step": 586 }, { "epoch": 0.1193331978044318, "grad_norm": 0.12705819308757782, "learning_rate": 0.00018818265025933085, "loss": 1.2323, "step": 587 }, { "epoch": 0.11953649115673917, "grad_norm": 0.1341540366411209, "learning_rate": 0.00018816231058679957, "loss": 1.1219, "step": 588 }, { "epoch": 0.11973978450904656, "grad_norm": 0.1307908147573471, "learning_rate": 0.0001881419709142683, "loss": 0.992, "step": 589 }, { "epoch": 0.11994307786135393, "grad_norm": 0.127479687333107, "learning_rate": 0.00018812163124173702, "loss": 1.0326, "step": 590 }, { "epoch": 0.12014637121366131, "grad_norm": 0.09779065102338791, "learning_rate": 0.00018810129156920574, "loss": 0.7614, "step": 591 }, { "epoch": 0.1203496645659687, "grad_norm": 0.14188863337039948, "learning_rate": 0.00018808095189667447, "loss": 1.23, "step": 592 }, { "epoch": 0.12055295791827607, "grad_norm": 0.12969130277633667, "learning_rate": 0.0001880606122241432, "loss": 1.1229, "step": 593 }, { "epoch": 0.12075625127058345, "grad_norm": 0.13516603410243988, "learning_rate": 0.00018804027255161192, "loss": 1.0147, "step": 594 }, { "epoch": 0.12095954462289084, "grad_norm": 0.13307668268680573, "learning_rate": 0.00018801993287908067, "loss": 1.1908, "step": 595 }, { "epoch": 0.12116283797519821, "grad_norm": 0.11288546770811081, "learning_rate": 0.0001879995932065494, "loss": 0.9319, "step": 596 }, { "epoch": 0.1213661313275056, "grad_norm": 0.12034857273101807, "learning_rate": 0.00018797925353401812, "loss": 1.0976, "step": 597 }, { "epoch": 0.12156942467981297, "grad_norm": 0.136747807264328, "learning_rate": 0.00018795891386148684, "loss": 1.2298, "step": 598 }, { "epoch": 0.12177271803212035, "grad_norm": 0.11699377000331879, "learning_rate": 0.00018793857418895557, "loss": 1.0377, "step": 599 }, { "epoch": 0.12197601138442773, "grad_norm": 0.15257331728935242, "learning_rate": 0.0001879182345164243, "loss": 1.2306, "step": 600 }, { "epoch": 0.1221793047367351, "grad_norm": 0.1361241340637207, "learning_rate": 0.00018789789484389302, "loss": 1.1563, "step": 601 }, { "epoch": 0.12238259808904249, "grad_norm": 0.11735684424638748, "learning_rate": 0.00018787755517136174, "loss": 1.104, "step": 602 }, { "epoch": 0.12258589144134986, "grad_norm": 0.11648523807525635, "learning_rate": 0.0001878572154988305, "loss": 1.0008, "step": 603 }, { "epoch": 0.12278918479365725, "grad_norm": 0.12473436444997787, "learning_rate": 0.00018783687582629922, "loss": 1.0741, "step": 604 }, { "epoch": 0.12299247814596463, "grad_norm": 0.11664781719446182, "learning_rate": 0.00018781653615376794, "loss": 1.1155, "step": 605 }, { "epoch": 0.123195771498272, "grad_norm": 0.12415888160467148, "learning_rate": 0.00018779619648123666, "loss": 1.158, "step": 606 }, { "epoch": 0.12339906485057939, "grad_norm": 0.1223251074552536, "learning_rate": 0.0001877758568087054, "loss": 1.1045, "step": 607 }, { "epoch": 0.12360235820288677, "grad_norm": 0.12289747595787048, "learning_rate": 0.00018775551713617411, "loss": 1.0768, "step": 608 }, { "epoch": 0.12380565155519414, "grad_norm": 0.1316901594400406, "learning_rate": 0.00018773517746364284, "loss": 1.2156, "step": 609 }, { "epoch": 0.12400894490750153, "grad_norm": 0.12060056626796722, "learning_rate": 0.00018771483779111156, "loss": 1.0221, "step": 610 }, { "epoch": 0.1242122382598089, "grad_norm": 0.1384373903274536, "learning_rate": 0.00018769449811858031, "loss": 1.1059, "step": 611 }, { "epoch": 0.12441553161211628, "grad_norm": 0.12399812787771225, "learning_rate": 0.00018767415844604904, "loss": 1.0193, "step": 612 }, { "epoch": 0.12461882496442367, "grad_norm": 0.13406959176063538, "learning_rate": 0.00018765381877351776, "loss": 1.1572, "step": 613 }, { "epoch": 0.12482211831673104, "grad_norm": 0.12881499528884888, "learning_rate": 0.0001876334791009865, "loss": 1.1914, "step": 614 }, { "epoch": 0.1250254116690384, "grad_norm": 0.11472728103399277, "learning_rate": 0.0001876131394284552, "loss": 1.0822, "step": 615 }, { "epoch": 0.1252287050213458, "grad_norm": 0.1251503825187683, "learning_rate": 0.00018759279975592394, "loss": 1.1783, "step": 616 }, { "epoch": 0.12543199837365318, "grad_norm": 0.1414482593536377, "learning_rate": 0.00018757246008339266, "loss": 1.1925, "step": 617 }, { "epoch": 0.12563529172596055, "grad_norm": 0.122686967253685, "learning_rate": 0.00018755212041086139, "loss": 1.091, "step": 618 }, { "epoch": 0.12583858507826795, "grad_norm": 0.12301596254110336, "learning_rate": 0.00018753178073833014, "loss": 1.108, "step": 619 }, { "epoch": 0.12604187843057532, "grad_norm": 0.1191742941737175, "learning_rate": 0.00018751144106579886, "loss": 1.0413, "step": 620 }, { "epoch": 0.1262451717828827, "grad_norm": 0.0971694141626358, "learning_rate": 0.00018749110139326759, "loss": 0.8473, "step": 621 }, { "epoch": 0.1264484651351901, "grad_norm": 0.12381591647863388, "learning_rate": 0.0001874707617207363, "loss": 1.1503, "step": 622 }, { "epoch": 0.12665175848749746, "grad_norm": 0.13411198556423187, "learning_rate": 0.00018745042204820504, "loss": 1.164, "step": 623 }, { "epoch": 0.12685505183980483, "grad_norm": 0.12838509678840637, "learning_rate": 0.00018743008237567376, "loss": 1.1768, "step": 624 }, { "epoch": 0.1270583451921122, "grad_norm": 0.11623813211917877, "learning_rate": 0.00018740974270314248, "loss": 1.1611, "step": 625 }, { "epoch": 0.1272616385444196, "grad_norm": 0.11001920700073242, "learning_rate": 0.0001873894030306112, "loss": 1.0182, "step": 626 }, { "epoch": 0.12746493189672697, "grad_norm": 0.11987441778182983, "learning_rate": 0.00018736906335807996, "loss": 1.0509, "step": 627 }, { "epoch": 0.12766822524903434, "grad_norm": 0.13036808371543884, "learning_rate": 0.00018734872368554868, "loss": 1.2035, "step": 628 }, { "epoch": 0.12787151860134174, "grad_norm": 0.12546774744987488, "learning_rate": 0.0001873283840130174, "loss": 1.1434, "step": 629 }, { "epoch": 0.12807481195364911, "grad_norm": 0.1025729849934578, "learning_rate": 0.00018730804434048613, "loss": 0.9868, "step": 630 }, { "epoch": 0.12827810530595649, "grad_norm": 0.1013616994023323, "learning_rate": 0.00018728770466795483, "loss": 0.9281, "step": 631 }, { "epoch": 0.12848139865826388, "grad_norm": 0.11066362261772156, "learning_rate": 0.00018726736499542358, "loss": 1.0345, "step": 632 }, { "epoch": 0.12868469201057126, "grad_norm": 0.1280633807182312, "learning_rate": 0.0001872470253228923, "loss": 1.2335, "step": 633 }, { "epoch": 0.12888798536287863, "grad_norm": 0.11954978853464127, "learning_rate": 0.00018722668565036103, "loss": 1.0298, "step": 634 }, { "epoch": 0.12909127871518603, "grad_norm": 0.11124943196773529, "learning_rate": 0.00018720634597782976, "loss": 1.0896, "step": 635 }, { "epoch": 0.1292945720674934, "grad_norm": 0.12496782839298248, "learning_rate": 0.0001871860063052985, "loss": 1.0897, "step": 636 }, { "epoch": 0.12949786541980077, "grad_norm": 0.1257556527853012, "learning_rate": 0.00018716566663276723, "loss": 1.0148, "step": 637 }, { "epoch": 0.12970115877210814, "grad_norm": 0.11928705126047134, "learning_rate": 0.00018714532696023596, "loss": 1.1415, "step": 638 }, { "epoch": 0.12990445212441554, "grad_norm": 0.1109057068824768, "learning_rate": 0.00018712498728770468, "loss": 1.063, "step": 639 }, { "epoch": 0.1301077454767229, "grad_norm": 0.13905195891857147, "learning_rate": 0.0001871046476151734, "loss": 1.2346, "step": 640 }, { "epoch": 0.13031103882903028, "grad_norm": 0.12306763231754303, "learning_rate": 0.00018708430794264213, "loss": 1.0504, "step": 641 }, { "epoch": 0.13051433218133768, "grad_norm": 0.1077868863940239, "learning_rate": 0.00018706396827011085, "loss": 0.9143, "step": 642 }, { "epoch": 0.13071762553364505, "grad_norm": 0.1328214555978775, "learning_rate": 0.00018704362859757958, "loss": 1.1223, "step": 643 }, { "epoch": 0.13092091888595242, "grad_norm": 0.12459075450897217, "learning_rate": 0.00018702328892504833, "loss": 1.1896, "step": 644 }, { "epoch": 0.13112421223825982, "grad_norm": 0.11860411614179611, "learning_rate": 0.00018700294925251705, "loss": 1.0472, "step": 645 }, { "epoch": 0.1313275055905672, "grad_norm": 0.11825944483280182, "learning_rate": 0.00018698260957998578, "loss": 1.2391, "step": 646 }, { "epoch": 0.13153079894287456, "grad_norm": 0.12103937566280365, "learning_rate": 0.0001869622699074545, "loss": 1.0087, "step": 647 }, { "epoch": 0.13173409229518196, "grad_norm": 0.12289803475141525, "learning_rate": 0.00018694193023492323, "loss": 1.0867, "step": 648 }, { "epoch": 0.13193738564748933, "grad_norm": 0.12652850151062012, "learning_rate": 0.00018692159056239195, "loss": 1.2047, "step": 649 }, { "epoch": 0.1321406789997967, "grad_norm": 0.12258271127939224, "learning_rate": 0.00018690125088986068, "loss": 0.9806, "step": 650 }, { "epoch": 0.1323439723521041, "grad_norm": 0.1285620778799057, "learning_rate": 0.0001868809112173294, "loss": 0.9993, "step": 651 }, { "epoch": 0.13254726570441147, "grad_norm": 0.11906328797340393, "learning_rate": 0.00018686057154479815, "loss": 1.1029, "step": 652 }, { "epoch": 0.13275055905671884, "grad_norm": 0.13393160700798035, "learning_rate": 0.00018684023187226688, "loss": 1.1263, "step": 653 }, { "epoch": 0.1329538524090262, "grad_norm": 0.13850244879722595, "learning_rate": 0.0001868198921997356, "loss": 1.0878, "step": 654 }, { "epoch": 0.1331571457613336, "grad_norm": 0.13923142850399017, "learning_rate": 0.00018679955252720433, "loss": 1.1637, "step": 655 }, { "epoch": 0.13336043911364098, "grad_norm": 0.11642129719257355, "learning_rate": 0.00018677921285467305, "loss": 1.1134, "step": 656 }, { "epoch": 0.13356373246594835, "grad_norm": 0.12743037939071655, "learning_rate": 0.00018675887318214178, "loss": 1.0345, "step": 657 }, { "epoch": 0.13376702581825575, "grad_norm": 0.11360882222652435, "learning_rate": 0.0001867385335096105, "loss": 1.072, "step": 658 }, { "epoch": 0.13397031917056312, "grad_norm": 0.1262228637933731, "learning_rate": 0.00018671819383707922, "loss": 1.1546, "step": 659 }, { "epoch": 0.1341736125228705, "grad_norm": 0.1144820973277092, "learning_rate": 0.00018669785416454798, "loss": 1.0152, "step": 660 }, { "epoch": 0.1343769058751779, "grad_norm": 0.12834620475769043, "learning_rate": 0.0001866775144920167, "loss": 1.0456, "step": 661 }, { "epoch": 0.13458019922748526, "grad_norm": 0.11835994571447372, "learning_rate": 0.00018665717481948542, "loss": 0.991, "step": 662 }, { "epoch": 0.13478349257979264, "grad_norm": 0.11445319652557373, "learning_rate": 0.00018663683514695415, "loss": 1.0116, "step": 663 }, { "epoch": 0.13498678593210003, "grad_norm": 0.13939061760902405, "learning_rate": 0.00018661649547442287, "loss": 1.153, "step": 664 }, { "epoch": 0.1351900792844074, "grad_norm": 0.1149614006280899, "learning_rate": 0.0001865961558018916, "loss": 0.9255, "step": 665 }, { "epoch": 0.13539337263671478, "grad_norm": 0.13376334309577942, "learning_rate": 0.00018657581612936032, "loss": 1.1502, "step": 666 }, { "epoch": 0.13559666598902215, "grad_norm": 0.13265709578990936, "learning_rate": 0.00018655547645682905, "loss": 1.1292, "step": 667 }, { "epoch": 0.13579995934132955, "grad_norm": 0.11729206144809723, "learning_rate": 0.0001865351367842978, "loss": 1.2166, "step": 668 }, { "epoch": 0.13600325269363692, "grad_norm": 0.11903608590364456, "learning_rate": 0.00018651479711176652, "loss": 1.1808, "step": 669 }, { "epoch": 0.1362065460459443, "grad_norm": 0.11009612679481506, "learning_rate": 0.00018649445743923525, "loss": 0.9364, "step": 670 }, { "epoch": 0.1364098393982517, "grad_norm": 0.13966090977191925, "learning_rate": 0.00018647411776670397, "loss": 1.2463, "step": 671 }, { "epoch": 0.13661313275055906, "grad_norm": 0.12319371849298477, "learning_rate": 0.00018645377809417267, "loss": 1.1192, "step": 672 }, { "epoch": 0.13681642610286643, "grad_norm": 0.13469716906547546, "learning_rate": 0.00018643343842164142, "loss": 1.2376, "step": 673 }, { "epoch": 0.13701971945517383, "grad_norm": 0.124245285987854, "learning_rate": 0.00018641309874911015, "loss": 1.1145, "step": 674 }, { "epoch": 0.1372230128074812, "grad_norm": 0.1325312852859497, "learning_rate": 0.00018639275907657887, "loss": 1.1453, "step": 675 }, { "epoch": 0.13742630615978857, "grad_norm": 0.13344690203666687, "learning_rate": 0.0001863724194040476, "loss": 1.2191, "step": 676 }, { "epoch": 0.13762959951209597, "grad_norm": 0.1301363855600357, "learning_rate": 0.00018635207973151635, "loss": 0.9982, "step": 677 }, { "epoch": 0.13783289286440334, "grad_norm": 0.10880762338638306, "learning_rate": 0.00018633174005898507, "loss": 0.8772, "step": 678 }, { "epoch": 0.1380361862167107, "grad_norm": 0.13281653821468353, "learning_rate": 0.0001863114003864538, "loss": 1.0529, "step": 679 }, { "epoch": 0.13823947956901808, "grad_norm": 0.13998745381832123, "learning_rate": 0.0001862910607139225, "loss": 1.0996, "step": 680 }, { "epoch": 0.13844277292132548, "grad_norm": 0.1195378452539444, "learning_rate": 0.00018627072104139124, "loss": 0.9776, "step": 681 }, { "epoch": 0.13864606627363285, "grad_norm": 0.10932020843029022, "learning_rate": 0.00018625038136885997, "loss": 1.1026, "step": 682 }, { "epoch": 0.13884935962594022, "grad_norm": 0.1420464664697647, "learning_rate": 0.0001862300416963287, "loss": 1.1428, "step": 683 }, { "epoch": 0.13905265297824762, "grad_norm": 0.11747555434703827, "learning_rate": 0.00018620970202379742, "loss": 0.9985, "step": 684 }, { "epoch": 0.139255946330555, "grad_norm": 0.11964225023984909, "learning_rate": 0.00018618936235126617, "loss": 1.0268, "step": 685 }, { "epoch": 0.13945923968286236, "grad_norm": 0.11939354985952377, "learning_rate": 0.0001861690226787349, "loss": 0.993, "step": 686 }, { "epoch": 0.13966253303516976, "grad_norm": 0.14188724756240845, "learning_rate": 0.00018614868300620362, "loss": 1.0672, "step": 687 }, { "epoch": 0.13986582638747713, "grad_norm": 0.12218412756919861, "learning_rate": 0.00018612834333367231, "loss": 1.0664, "step": 688 }, { "epoch": 0.1400691197397845, "grad_norm": 0.12363380193710327, "learning_rate": 0.00018610800366114107, "loss": 1.1202, "step": 689 }, { "epoch": 0.1402724130920919, "grad_norm": 0.12523901462554932, "learning_rate": 0.0001860876639886098, "loss": 0.9601, "step": 690 }, { "epoch": 0.14047570644439927, "grad_norm": 0.1359613537788391, "learning_rate": 0.00018606732431607852, "loss": 1.2674, "step": 691 }, { "epoch": 0.14067899979670664, "grad_norm": 0.12229263782501221, "learning_rate": 0.00018604698464354724, "loss": 1.0817, "step": 692 }, { "epoch": 0.14088229314901402, "grad_norm": 0.12188601493835449, "learning_rate": 0.000186026644971016, "loss": 1.1176, "step": 693 }, { "epoch": 0.1410855865013214, "grad_norm": 0.10588016360998154, "learning_rate": 0.00018600630529848472, "loss": 0.9546, "step": 694 }, { "epoch": 0.14128887985362878, "grad_norm": 0.11985071748495102, "learning_rate": 0.00018598596562595344, "loss": 1.0765, "step": 695 }, { "epoch": 0.14149217320593616, "grad_norm": 0.13118812441825867, "learning_rate": 0.00018596562595342216, "loss": 1.0117, "step": 696 }, { "epoch": 0.14169546655824355, "grad_norm": 0.11992435902357101, "learning_rate": 0.0001859452862808909, "loss": 0.9618, "step": 697 }, { "epoch": 0.14189875991055093, "grad_norm": 0.11617527902126312, "learning_rate": 0.00018592494660835961, "loss": 1.0459, "step": 698 }, { "epoch": 0.1421020532628583, "grad_norm": 0.12465415149927139, "learning_rate": 0.00018590460693582834, "loss": 1.0635, "step": 699 }, { "epoch": 0.1423053466151657, "grad_norm": 0.12672793865203857, "learning_rate": 0.00018588426726329706, "loss": 1.26, "step": 700 }, { "epoch": 0.14250863996747307, "grad_norm": 0.12191738188266754, "learning_rate": 0.00018586392759076581, "loss": 0.9333, "step": 701 }, { "epoch": 0.14271193331978044, "grad_norm": 0.13285742700099945, "learning_rate": 0.00018584358791823454, "loss": 1.2199, "step": 702 }, { "epoch": 0.14291522667208784, "grad_norm": 0.11525557935237885, "learning_rate": 0.00018582324824570326, "loss": 1.1212, "step": 703 }, { "epoch": 0.1431185200243952, "grad_norm": 0.12379605323076248, "learning_rate": 0.000185802908573172, "loss": 0.9767, "step": 704 }, { "epoch": 0.14332181337670258, "grad_norm": 0.13637319207191467, "learning_rate": 0.0001857825689006407, "loss": 1.1399, "step": 705 }, { "epoch": 0.14352510672900995, "grad_norm": 0.12638236582279205, "learning_rate": 0.00018576222922810944, "loss": 1.2348, "step": 706 }, { "epoch": 0.14372840008131735, "grad_norm": 0.11840532720088959, "learning_rate": 0.00018574188955557816, "loss": 1.0475, "step": 707 }, { "epoch": 0.14393169343362472, "grad_norm": 0.11270745098590851, "learning_rate": 0.00018572154988304689, "loss": 1.0503, "step": 708 }, { "epoch": 0.1441349867859321, "grad_norm": 0.12445101141929626, "learning_rate": 0.00018570121021051564, "loss": 1.0658, "step": 709 }, { "epoch": 0.1443382801382395, "grad_norm": 0.11352977156639099, "learning_rate": 0.00018568087053798436, "loss": 0.9473, "step": 710 }, { "epoch": 0.14454157349054686, "grad_norm": 0.11230108141899109, "learning_rate": 0.00018566053086545309, "loss": 1.0519, "step": 711 }, { "epoch": 0.14474486684285423, "grad_norm": 0.14274398982524872, "learning_rate": 0.0001856401911929218, "loss": 1.1135, "step": 712 }, { "epoch": 0.14494816019516163, "grad_norm": 0.11553295701742172, "learning_rate": 0.0001856198515203905, "loss": 1.044, "step": 713 }, { "epoch": 0.145151453547469, "grad_norm": 0.11737996339797974, "learning_rate": 0.00018559951184785926, "loss": 1.0154, "step": 714 }, { "epoch": 0.14535474689977637, "grad_norm": 0.1481630802154541, "learning_rate": 0.00018557917217532798, "loss": 1.1544, "step": 715 }, { "epoch": 0.14555804025208377, "grad_norm": 0.12081188708543777, "learning_rate": 0.0001855588325027967, "loss": 1.0034, "step": 716 }, { "epoch": 0.14576133360439114, "grad_norm": 0.13458681106567383, "learning_rate": 0.00018553849283026543, "loss": 1.1627, "step": 717 }, { "epoch": 0.1459646269566985, "grad_norm": 0.13506878912448883, "learning_rate": 0.00018551815315773418, "loss": 1.1927, "step": 718 }, { "epoch": 0.14616792030900588, "grad_norm": 0.10834948718547821, "learning_rate": 0.0001854978134852029, "loss": 1.0943, "step": 719 }, { "epoch": 0.14637121366131328, "grad_norm": 0.13779957592487335, "learning_rate": 0.00018547747381267163, "loss": 1.2356, "step": 720 }, { "epoch": 0.14657450701362065, "grad_norm": 0.12655863165855408, "learning_rate": 0.00018545713414014033, "loss": 1.1085, "step": 721 }, { "epoch": 0.14677780036592802, "grad_norm": 0.1144525483250618, "learning_rate": 0.00018543679446760908, "loss": 1.0517, "step": 722 }, { "epoch": 0.14698109371823542, "grad_norm": 0.12001293152570724, "learning_rate": 0.0001854164547950778, "loss": 1.1439, "step": 723 }, { "epoch": 0.1471843870705428, "grad_norm": 0.12786982953548431, "learning_rate": 0.00018539611512254653, "loss": 1.1846, "step": 724 }, { "epoch": 0.14738768042285016, "grad_norm": 0.1154879704117775, "learning_rate": 0.00018537577545001526, "loss": 0.941, "step": 725 }, { "epoch": 0.14759097377515756, "grad_norm": 0.10635704547166824, "learning_rate": 0.000185355435777484, "loss": 0.915, "step": 726 }, { "epoch": 0.14779426712746493, "grad_norm": 0.11456220597028732, "learning_rate": 0.00018533509610495273, "loss": 1.0387, "step": 727 }, { "epoch": 0.1479975604797723, "grad_norm": 0.11217451840639114, "learning_rate": 0.00018531475643242146, "loss": 1.0938, "step": 728 }, { "epoch": 0.1482008538320797, "grad_norm": 0.1105191633105278, "learning_rate": 0.00018529441675989015, "loss": 1.0398, "step": 729 }, { "epoch": 0.14840414718438708, "grad_norm": 0.11848670989274979, "learning_rate": 0.0001852740770873589, "loss": 1.04, "step": 730 }, { "epoch": 0.14860744053669445, "grad_norm": 0.11965551227331161, "learning_rate": 0.00018525373741482763, "loss": 0.966, "step": 731 }, { "epoch": 0.14881073388900182, "grad_norm": 0.12252170592546463, "learning_rate": 0.00018523339774229635, "loss": 1.1997, "step": 732 }, { "epoch": 0.14901402724130922, "grad_norm": 0.11600001901388168, "learning_rate": 0.00018521305806976508, "loss": 1.2425, "step": 733 }, { "epoch": 0.1492173205936166, "grad_norm": 0.11161402612924576, "learning_rate": 0.00018519271839723383, "loss": 0.9978, "step": 734 }, { "epoch": 0.14942061394592396, "grad_norm": 0.12365563958883286, "learning_rate": 0.00018517237872470255, "loss": 0.9652, "step": 735 }, { "epoch": 0.14962390729823136, "grad_norm": 0.11252112686634064, "learning_rate": 0.00018515203905217128, "loss": 0.948, "step": 736 }, { "epoch": 0.14982720065053873, "grad_norm": 0.12211350351572037, "learning_rate": 0.00018513169937963998, "loss": 1.0636, "step": 737 }, { "epoch": 0.1500304940028461, "grad_norm": 0.13200169801712036, "learning_rate": 0.00018511135970710873, "loss": 1.158, "step": 738 }, { "epoch": 0.1502337873551535, "grad_norm": 0.11223406344652176, "learning_rate": 0.00018509102003457745, "loss": 1.1194, "step": 739 }, { "epoch": 0.15043708070746087, "grad_norm": 0.11996794492006302, "learning_rate": 0.00018507068036204618, "loss": 1.0485, "step": 740 }, { "epoch": 0.15064037405976824, "grad_norm": 0.13017338514328003, "learning_rate": 0.0001850503406895149, "loss": 1.1304, "step": 741 }, { "epoch": 0.15084366741207564, "grad_norm": 0.1273190826177597, "learning_rate": 0.00018503000101698365, "loss": 1.0937, "step": 742 }, { "epoch": 0.151046960764383, "grad_norm": 0.1322571486234665, "learning_rate": 0.00018500966134445238, "loss": 1.1364, "step": 743 }, { "epoch": 0.15125025411669038, "grad_norm": 0.12314455956220627, "learning_rate": 0.0001849893216719211, "loss": 1.0005, "step": 744 }, { "epoch": 0.15145354746899775, "grad_norm": 0.1126449927687645, "learning_rate": 0.0001849689819993898, "loss": 1.0231, "step": 745 }, { "epoch": 0.15165684082130515, "grad_norm": 0.12586358189582825, "learning_rate": 0.00018494864232685855, "loss": 1.0816, "step": 746 }, { "epoch": 0.15186013417361252, "grad_norm": 0.09933953732252121, "learning_rate": 0.00018492830265432727, "loss": 0.8666, "step": 747 }, { "epoch": 0.1520634275259199, "grad_norm": 0.12422667443752289, "learning_rate": 0.000184907962981796, "loss": 1.0502, "step": 748 }, { "epoch": 0.1522667208782273, "grad_norm": 0.12274408340454102, "learning_rate": 0.00018488762330926472, "loss": 1.1445, "step": 749 }, { "epoch": 0.15247001423053466, "grad_norm": 0.1317015141248703, "learning_rate": 0.00018486728363673348, "loss": 1.2226, "step": 750 }, { "epoch": 0.15267330758284203, "grad_norm": 0.1201949417591095, "learning_rate": 0.0001848469439642022, "loss": 0.9285, "step": 751 }, { "epoch": 0.15287660093514943, "grad_norm": 0.11115135997533798, "learning_rate": 0.00018482660429167092, "loss": 1.1262, "step": 752 }, { "epoch": 0.1530798942874568, "grad_norm": 0.11809299886226654, "learning_rate": 0.00018480626461913965, "loss": 1.0792, "step": 753 }, { "epoch": 0.15328318763976417, "grad_norm": 0.14711928367614746, "learning_rate": 0.00018478592494660835, "loss": 1.1647, "step": 754 }, { "epoch": 0.15348648099207157, "grad_norm": 0.12082501500844955, "learning_rate": 0.0001847655852740771, "loss": 1.1866, "step": 755 }, { "epoch": 0.15368977434437894, "grad_norm": 0.1093011349439621, "learning_rate": 0.00018474524560154582, "loss": 0.9978, "step": 756 }, { "epoch": 0.15389306769668631, "grad_norm": 0.11525548994541168, "learning_rate": 0.00018472490592901455, "loss": 0.9134, "step": 757 }, { "epoch": 0.15409636104899369, "grad_norm": 0.12464176118373871, "learning_rate": 0.00018470456625648327, "loss": 1.0974, "step": 758 }, { "epoch": 0.15429965440130108, "grad_norm": 0.11930055171251297, "learning_rate": 0.00018468422658395202, "loss": 0.8953, "step": 759 }, { "epoch": 0.15450294775360846, "grad_norm": 0.12347722053527832, "learning_rate": 0.00018466388691142075, "loss": 1.0212, "step": 760 }, { "epoch": 0.15470624110591583, "grad_norm": 0.1258956342935562, "learning_rate": 0.00018464354723888947, "loss": 1.2616, "step": 761 }, { "epoch": 0.15490953445822322, "grad_norm": 0.12692275643348694, "learning_rate": 0.00018462320756635817, "loss": 1.1994, "step": 762 }, { "epoch": 0.1551128278105306, "grad_norm": 0.13774073123931885, "learning_rate": 0.00018460286789382692, "loss": 1.2109, "step": 763 }, { "epoch": 0.15531612116283797, "grad_norm": 0.12587130069732666, "learning_rate": 0.00018458252822129564, "loss": 1.1059, "step": 764 }, { "epoch": 0.15551941451514537, "grad_norm": 0.13462059199810028, "learning_rate": 0.00018456218854876437, "loss": 1.0648, "step": 765 }, { "epoch": 0.15572270786745274, "grad_norm": 0.1329740285873413, "learning_rate": 0.0001845418488762331, "loss": 1.264, "step": 766 }, { "epoch": 0.1559260012197601, "grad_norm": 0.12275559455156326, "learning_rate": 0.00018452150920370185, "loss": 0.9893, "step": 767 }, { "epoch": 0.1561292945720675, "grad_norm": 0.12821702659130096, "learning_rate": 0.00018450116953117057, "loss": 1.0681, "step": 768 }, { "epoch": 0.15633258792437488, "grad_norm": 0.11758620291948318, "learning_rate": 0.0001844808298586393, "loss": 1.0476, "step": 769 }, { "epoch": 0.15653588127668225, "grad_norm": 0.11491292715072632, "learning_rate": 0.000184460490186108, "loss": 1.1428, "step": 770 }, { "epoch": 0.15673917462898962, "grad_norm": 0.12064868956804276, "learning_rate": 0.00018444015051357674, "loss": 0.9565, "step": 771 }, { "epoch": 0.15694246798129702, "grad_norm": 0.12319160997867584, "learning_rate": 0.00018441981084104547, "loss": 1.0593, "step": 772 }, { "epoch": 0.1571457613336044, "grad_norm": 0.13514620065689087, "learning_rate": 0.0001843994711685142, "loss": 1.1908, "step": 773 }, { "epoch": 0.15734905468591176, "grad_norm": 0.1343378722667694, "learning_rate": 0.00018437913149598292, "loss": 1.2193, "step": 774 }, { "epoch": 0.15755234803821916, "grad_norm": 0.13351817429065704, "learning_rate": 0.00018435879182345167, "loss": 1.1141, "step": 775 }, { "epoch": 0.15775564139052653, "grad_norm": 0.11843458563089371, "learning_rate": 0.0001843384521509204, "loss": 1.1933, "step": 776 }, { "epoch": 0.1579589347428339, "grad_norm": 0.12293927371501923, "learning_rate": 0.00018431811247838912, "loss": 1.0682, "step": 777 }, { "epoch": 0.1581622280951413, "grad_norm": 0.11566301435232162, "learning_rate": 0.00018429777280585781, "loss": 1.1093, "step": 778 }, { "epoch": 0.15836552144744867, "grad_norm": 0.11641670763492584, "learning_rate": 0.00018427743313332657, "loss": 1.2028, "step": 779 }, { "epoch": 0.15856881479975604, "grad_norm": 0.14020314812660217, "learning_rate": 0.0001842570934607953, "loss": 1.0472, "step": 780 }, { "epoch": 0.15877210815206344, "grad_norm": 0.11766766011714935, "learning_rate": 0.00018423675378826401, "loss": 0.9908, "step": 781 }, { "epoch": 0.1589754015043708, "grad_norm": 0.14530715346336365, "learning_rate": 0.00018421641411573274, "loss": 1.2046, "step": 782 }, { "epoch": 0.15917869485667818, "grad_norm": 0.12271513789892197, "learning_rate": 0.0001841960744432015, "loss": 1.1401, "step": 783 }, { "epoch": 0.15938198820898555, "grad_norm": 0.12754741311073303, "learning_rate": 0.00018417573477067022, "loss": 1.2811, "step": 784 }, { "epoch": 0.15958528156129295, "grad_norm": 0.10751698166131973, "learning_rate": 0.00018415539509813894, "loss": 0.9566, "step": 785 }, { "epoch": 0.15978857491360032, "grad_norm": 0.12434156984090805, "learning_rate": 0.00018413505542560764, "loss": 1.2307, "step": 786 }, { "epoch": 0.1599918682659077, "grad_norm": 0.1130242571234703, "learning_rate": 0.0001841147157530764, "loss": 1.0406, "step": 787 }, { "epoch": 0.1601951616182151, "grad_norm": 0.12631991505622864, "learning_rate": 0.0001840943760805451, "loss": 1.0835, "step": 788 }, { "epoch": 0.16039845497052246, "grad_norm": 0.11642556637525558, "learning_rate": 0.00018407403640801384, "loss": 0.9743, "step": 789 }, { "epoch": 0.16060174832282983, "grad_norm": 0.1119033470749855, "learning_rate": 0.00018405369673548256, "loss": 1.1377, "step": 790 }, { "epoch": 0.16080504167513723, "grad_norm": 0.14675219357013702, "learning_rate": 0.00018403335706295131, "loss": 1.2846, "step": 791 }, { "epoch": 0.1610083350274446, "grad_norm": 0.1238279864192009, "learning_rate": 0.00018401301739042004, "loss": 1.1033, "step": 792 }, { "epoch": 0.16121162837975198, "grad_norm": 0.12538330256938934, "learning_rate": 0.00018399267771788876, "loss": 1.2344, "step": 793 }, { "epoch": 0.16141492173205937, "grad_norm": 0.11384537816047668, "learning_rate": 0.00018397233804535746, "loss": 1.0143, "step": 794 }, { "epoch": 0.16161821508436675, "grad_norm": 0.1444682627916336, "learning_rate": 0.00018395199837282618, "loss": 1.2364, "step": 795 }, { "epoch": 0.16182150843667412, "grad_norm": 0.12999016046524048, "learning_rate": 0.00018393165870029494, "loss": 1.1853, "step": 796 }, { "epoch": 0.1620248017889815, "grad_norm": 0.12258971482515335, "learning_rate": 0.00018391131902776366, "loss": 1.2673, "step": 797 }, { "epoch": 0.16222809514128889, "grad_norm": 0.13033455610275269, "learning_rate": 0.00018389097935523238, "loss": 0.8922, "step": 798 }, { "epoch": 0.16243138849359626, "grad_norm": 0.14746494591236115, "learning_rate": 0.0001838706396827011, "loss": 1.2164, "step": 799 }, { "epoch": 0.16263468184590363, "grad_norm": 0.12869805097579956, "learning_rate": 0.00018385030001016986, "loss": 1.1788, "step": 800 }, { "epoch": 0.16283797519821103, "grad_norm": 0.11467185616493225, "learning_rate": 0.00018382996033763859, "loss": 0.9527, "step": 801 }, { "epoch": 0.1630412685505184, "grad_norm": 0.129184752702713, "learning_rate": 0.00018380962066510728, "loss": 1.1758, "step": 802 }, { "epoch": 0.16324456190282577, "grad_norm": 0.11696959286928177, "learning_rate": 0.000183789280992576, "loss": 1.03, "step": 803 }, { "epoch": 0.16344785525513317, "grad_norm": 0.13689257204532623, "learning_rate": 0.00018376894132004476, "loss": 1.2516, "step": 804 }, { "epoch": 0.16365114860744054, "grad_norm": 0.11370982229709625, "learning_rate": 0.00018374860164751348, "loss": 1.0484, "step": 805 }, { "epoch": 0.1638544419597479, "grad_norm": 0.13201859593391418, "learning_rate": 0.0001837282619749822, "loss": 1.0903, "step": 806 }, { "epoch": 0.1640577353120553, "grad_norm": 0.10468725860118866, "learning_rate": 0.00018370792230245093, "loss": 0.9548, "step": 807 }, { "epoch": 0.16426102866436268, "grad_norm": 0.14737223088741302, "learning_rate": 0.00018368758262991968, "loss": 1.1607, "step": 808 }, { "epoch": 0.16446432201667005, "grad_norm": 0.11500222235918045, "learning_rate": 0.0001836672429573884, "loss": 1.1032, "step": 809 }, { "epoch": 0.16466761536897742, "grad_norm": 0.12849587202072144, "learning_rate": 0.00018364690328485713, "loss": 1.255, "step": 810 }, { "epoch": 0.16487090872128482, "grad_norm": 0.10878688842058182, "learning_rate": 0.00018362656361232583, "loss": 1.1075, "step": 811 }, { "epoch": 0.1650742020735922, "grad_norm": 0.10878860950469971, "learning_rate": 0.00018360622393979458, "loss": 1.0629, "step": 812 }, { "epoch": 0.16527749542589956, "grad_norm": 0.1280430108308792, "learning_rate": 0.0001835858842672633, "loss": 1.1377, "step": 813 }, { "epoch": 0.16548078877820696, "grad_norm": 0.11831233650445938, "learning_rate": 0.00018356554459473203, "loss": 1.0786, "step": 814 }, { "epoch": 0.16568408213051433, "grad_norm": 0.11453156918287277, "learning_rate": 0.00018354520492220075, "loss": 1.0477, "step": 815 }, { "epoch": 0.1658873754828217, "grad_norm": 0.13597573339939117, "learning_rate": 0.0001835248652496695, "loss": 1.1807, "step": 816 }, { "epoch": 0.1660906688351291, "grad_norm": 0.12008185684680939, "learning_rate": 0.00018350452557713823, "loss": 1.0676, "step": 817 }, { "epoch": 0.16629396218743647, "grad_norm": 0.1363888829946518, "learning_rate": 0.00018348418590460696, "loss": 1.1582, "step": 818 }, { "epoch": 0.16649725553974384, "grad_norm": 0.11310733109712601, "learning_rate": 0.00018346384623207565, "loss": 1.0931, "step": 819 }, { "epoch": 0.16670054889205124, "grad_norm": 0.13503344357013702, "learning_rate": 0.0001834435065595444, "loss": 1.1465, "step": 820 }, { "epoch": 0.1669038422443586, "grad_norm": 0.12744784355163574, "learning_rate": 0.00018342316688701313, "loss": 1.1662, "step": 821 }, { "epoch": 0.16710713559666598, "grad_norm": 0.13695518672466278, "learning_rate": 0.00018340282721448185, "loss": 1.1846, "step": 822 }, { "epoch": 0.16731042894897336, "grad_norm": 0.12580302357673645, "learning_rate": 0.00018338248754195058, "loss": 0.93, "step": 823 }, { "epoch": 0.16751372230128075, "grad_norm": 0.12266777455806732, "learning_rate": 0.00018336214786941933, "loss": 1.1033, "step": 824 }, { "epoch": 0.16771701565358813, "grad_norm": 0.1129806861281395, "learning_rate": 0.00018334180819688805, "loss": 1.0517, "step": 825 }, { "epoch": 0.1679203090058955, "grad_norm": 0.12590476870536804, "learning_rate": 0.00018332146852435678, "loss": 1.0374, "step": 826 }, { "epoch": 0.1681236023582029, "grad_norm": 0.12631377577781677, "learning_rate": 0.00018330112885182548, "loss": 1.1898, "step": 827 }, { "epoch": 0.16832689571051027, "grad_norm": 0.13719779253005981, "learning_rate": 0.00018328078917929423, "loss": 1.1108, "step": 828 }, { "epoch": 0.16853018906281764, "grad_norm": 0.12414206564426422, "learning_rate": 0.00018326044950676295, "loss": 1.1654, "step": 829 }, { "epoch": 0.16873348241512504, "grad_norm": 0.12075278162956238, "learning_rate": 0.00018324010983423168, "loss": 1.0255, "step": 830 }, { "epoch": 0.1689367757674324, "grad_norm": 0.11906860023736954, "learning_rate": 0.0001832197701617004, "loss": 1.0433, "step": 831 }, { "epoch": 0.16914006911973978, "grad_norm": 0.11960665136575699, "learning_rate": 0.00018319943048916915, "loss": 0.9501, "step": 832 }, { "epoch": 0.16934336247204718, "grad_norm": 0.1228812113404274, "learning_rate": 0.00018317909081663788, "loss": 1.002, "step": 833 }, { "epoch": 0.16954665582435455, "grad_norm": 0.12420972436666489, "learning_rate": 0.0001831587511441066, "loss": 1.062, "step": 834 }, { "epoch": 0.16974994917666192, "grad_norm": 0.11490360647439957, "learning_rate": 0.0001831384114715753, "loss": 0.9708, "step": 835 }, { "epoch": 0.1699532425289693, "grad_norm": 0.11945214867591858, "learning_rate": 0.00018311807179904402, "loss": 1.1042, "step": 836 }, { "epoch": 0.1701565358812767, "grad_norm": 0.1234474778175354, "learning_rate": 0.00018309773212651277, "loss": 1.0258, "step": 837 }, { "epoch": 0.17035982923358406, "grad_norm": 0.12447863817214966, "learning_rate": 0.0001830773924539815, "loss": 1.1132, "step": 838 }, { "epoch": 0.17056312258589143, "grad_norm": 0.1321963667869568, "learning_rate": 0.00018305705278145022, "loss": 1.1835, "step": 839 }, { "epoch": 0.17076641593819883, "grad_norm": 0.12708254158496857, "learning_rate": 0.00018303671310891895, "loss": 1.1787, "step": 840 }, { "epoch": 0.1709697092905062, "grad_norm": 0.11481820046901703, "learning_rate": 0.0001830163734363877, "loss": 0.8837, "step": 841 }, { "epoch": 0.17117300264281357, "grad_norm": 0.11851567029953003, "learning_rate": 0.00018299603376385642, "loss": 0.9516, "step": 842 }, { "epoch": 0.17137629599512097, "grad_norm": 0.13182471692562103, "learning_rate": 0.00018297569409132512, "loss": 1.1809, "step": 843 }, { "epoch": 0.17157958934742834, "grad_norm": 0.12840509414672852, "learning_rate": 0.00018295535441879385, "loss": 1.0557, "step": 844 }, { "epoch": 0.1717828826997357, "grad_norm": 0.11280561983585358, "learning_rate": 0.0001829350147462626, "loss": 1.0737, "step": 845 }, { "epoch": 0.1719861760520431, "grad_norm": 0.13144554197788239, "learning_rate": 0.00018291467507373132, "loss": 1.0275, "step": 846 }, { "epoch": 0.17218946940435048, "grad_norm": 0.1224883422255516, "learning_rate": 0.00018289433540120005, "loss": 1.1558, "step": 847 }, { "epoch": 0.17239276275665785, "grad_norm": 0.1263243854045868, "learning_rate": 0.00018287399572866877, "loss": 0.9381, "step": 848 }, { "epoch": 0.17259605610896522, "grad_norm": 0.13391436636447906, "learning_rate": 0.00018285365605613752, "loss": 1.2548, "step": 849 }, { "epoch": 0.17279934946127262, "grad_norm": 0.12166419625282288, "learning_rate": 0.00018283331638360625, "loss": 1.0981, "step": 850 }, { "epoch": 0.17300264281358, "grad_norm": 0.13190463185310364, "learning_rate": 0.00018281297671107494, "loss": 1.1847, "step": 851 }, { "epoch": 0.17320593616588736, "grad_norm": 0.11678186804056168, "learning_rate": 0.00018279263703854367, "loss": 1.0303, "step": 852 }, { "epoch": 0.17340922951819476, "grad_norm": 0.11716858297586441, "learning_rate": 0.00018277229736601242, "loss": 0.9274, "step": 853 }, { "epoch": 0.17361252287050213, "grad_norm": 0.1340217888355255, "learning_rate": 0.00018275195769348114, "loss": 1.0179, "step": 854 }, { "epoch": 0.1738158162228095, "grad_norm": 0.12650153040885925, "learning_rate": 0.00018273161802094987, "loss": 1.0234, "step": 855 }, { "epoch": 0.1740191095751169, "grad_norm": 0.1294967234134674, "learning_rate": 0.0001827112783484186, "loss": 1.2539, "step": 856 }, { "epoch": 0.17422240292742427, "grad_norm": 0.13714881241321564, "learning_rate": 0.00018269093867588734, "loss": 1.0106, "step": 857 }, { "epoch": 0.17442569627973165, "grad_norm": 0.12365014851093292, "learning_rate": 0.00018267059900335607, "loss": 1.1184, "step": 858 }, { "epoch": 0.17462898963203904, "grad_norm": 0.11030489951372147, "learning_rate": 0.00018265025933082477, "loss": 0.9478, "step": 859 }, { "epoch": 0.17483228298434642, "grad_norm": 0.1181483343243599, "learning_rate": 0.0001826299196582935, "loss": 1.0861, "step": 860 }, { "epoch": 0.1750355763366538, "grad_norm": 0.12873612344264984, "learning_rate": 0.00018260957998576224, "loss": 0.9811, "step": 861 }, { "epoch": 0.17523886968896116, "grad_norm": 0.11688394844532013, "learning_rate": 0.00018258924031323097, "loss": 1.1643, "step": 862 }, { "epoch": 0.17544216304126856, "grad_norm": 0.12729796767234802, "learning_rate": 0.0001825689006406997, "loss": 1.0692, "step": 863 }, { "epoch": 0.17564545639357593, "grad_norm": 0.12474660575389862, "learning_rate": 0.00018254856096816842, "loss": 1.2838, "step": 864 }, { "epoch": 0.1758487497458833, "grad_norm": 0.12324024736881256, "learning_rate": 0.00018252822129563717, "loss": 1.0029, "step": 865 }, { "epoch": 0.1760520430981907, "grad_norm": 0.13511407375335693, "learning_rate": 0.0001825078816231059, "loss": 1.1398, "step": 866 }, { "epoch": 0.17625533645049807, "grad_norm": 0.13292032480239868, "learning_rate": 0.0001824875419505746, "loss": 1.3107, "step": 867 }, { "epoch": 0.17645862980280544, "grad_norm": 0.12073294073343277, "learning_rate": 0.00018246720227804331, "loss": 1.1293, "step": 868 }, { "epoch": 0.17666192315511284, "grad_norm": 0.11789250373840332, "learning_rate": 0.00018244686260551207, "loss": 1.0462, "step": 869 }, { "epoch": 0.1768652165074202, "grad_norm": 0.1194562166929245, "learning_rate": 0.0001824265229329808, "loss": 1.0017, "step": 870 }, { "epoch": 0.17706850985972758, "grad_norm": 0.10480080544948578, "learning_rate": 0.00018240618326044951, "loss": 0.8659, "step": 871 }, { "epoch": 0.17727180321203498, "grad_norm": 0.1207701787352562, "learning_rate": 0.00018238584358791824, "loss": 0.9937, "step": 872 }, { "epoch": 0.17747509656434235, "grad_norm": 0.1190091222524643, "learning_rate": 0.000182365503915387, "loss": 1.0437, "step": 873 }, { "epoch": 0.17767838991664972, "grad_norm": 0.1277458369731903, "learning_rate": 0.00018234516424285572, "loss": 1.2392, "step": 874 }, { "epoch": 0.1778816832689571, "grad_norm": 0.12237963080406189, "learning_rate": 0.00018232482457032444, "loss": 1.1032, "step": 875 }, { "epoch": 0.1780849766212645, "grad_norm": 0.1319531798362732, "learning_rate": 0.00018230448489779314, "loss": 1.2012, "step": 876 }, { "epoch": 0.17828826997357186, "grad_norm": 0.11914216727018356, "learning_rate": 0.0001822841452252619, "loss": 1.0272, "step": 877 }, { "epoch": 0.17849156332587923, "grad_norm": 0.14588242769241333, "learning_rate": 0.0001822638055527306, "loss": 1.357, "step": 878 }, { "epoch": 0.17869485667818663, "grad_norm": 0.11982700973749161, "learning_rate": 0.00018224346588019934, "loss": 1.049, "step": 879 }, { "epoch": 0.178898150030494, "grad_norm": 0.12529560923576355, "learning_rate": 0.00018222312620766806, "loss": 1.0713, "step": 880 }, { "epoch": 0.17910144338280137, "grad_norm": 0.1316487044095993, "learning_rate": 0.00018220278653513679, "loss": 1.1749, "step": 881 }, { "epoch": 0.17930473673510877, "grad_norm": 0.12096232175827026, "learning_rate": 0.00018218244686260554, "loss": 1.2104, "step": 882 }, { "epoch": 0.17950803008741614, "grad_norm": 0.1313014030456543, "learning_rate": 0.00018216210719007426, "loss": 1.0554, "step": 883 }, { "epoch": 0.1797113234397235, "grad_norm": 0.1309378743171692, "learning_rate": 0.00018214176751754296, "loss": 1.2152, "step": 884 }, { "epoch": 0.1799146167920309, "grad_norm": 0.1286410242319107, "learning_rate": 0.00018212142784501168, "loss": 1.0922, "step": 885 }, { "epoch": 0.18011791014433828, "grad_norm": 0.12893226742744446, "learning_rate": 0.00018210108817248044, "loss": 1.1969, "step": 886 }, { "epoch": 0.18032120349664565, "grad_norm": 0.11664584279060364, "learning_rate": 0.00018208074849994916, "loss": 1.0085, "step": 887 }, { "epoch": 0.18052449684895303, "grad_norm": 0.10973158478736877, "learning_rate": 0.00018206040882741788, "loss": 0.9548, "step": 888 }, { "epoch": 0.18072779020126042, "grad_norm": 0.11281079053878784, "learning_rate": 0.0001820400691548866, "loss": 0.8521, "step": 889 }, { "epoch": 0.1809310835535678, "grad_norm": 0.12198197096586227, "learning_rate": 0.00018201972948235536, "loss": 1.0537, "step": 890 }, { "epoch": 0.18113437690587517, "grad_norm": 0.09405733644962311, "learning_rate": 0.00018199938980982409, "loss": 0.7193, "step": 891 }, { "epoch": 0.18133767025818257, "grad_norm": 0.13503974676132202, "learning_rate": 0.00018197905013729278, "loss": 1.1564, "step": 892 }, { "epoch": 0.18154096361048994, "grad_norm": 0.1322106271982193, "learning_rate": 0.0001819587104647615, "loss": 1.0733, "step": 893 }, { "epoch": 0.1817442569627973, "grad_norm": 0.12791374325752258, "learning_rate": 0.00018193837079223026, "loss": 1.0701, "step": 894 }, { "epoch": 0.1819475503151047, "grad_norm": 0.12342046946287155, "learning_rate": 0.00018191803111969898, "loss": 1.1255, "step": 895 }, { "epoch": 0.18215084366741208, "grad_norm": 0.12089495360851288, "learning_rate": 0.0001818976914471677, "loss": 1.0177, "step": 896 }, { "epoch": 0.18235413701971945, "grad_norm": 0.12383720278739929, "learning_rate": 0.00018187735177463643, "loss": 1.0188, "step": 897 }, { "epoch": 0.18255743037202685, "grad_norm": 0.12089379876852036, "learning_rate": 0.00018185701210210518, "loss": 1.1106, "step": 898 }, { "epoch": 0.18276072372433422, "grad_norm": 0.12939763069152832, "learning_rate": 0.0001818366724295739, "loss": 1.1939, "step": 899 }, { "epoch": 0.1829640170766416, "grad_norm": 0.14534543454647064, "learning_rate": 0.0001818163327570426, "loss": 1.252, "step": 900 }, { "epoch": 0.18316731042894896, "grad_norm": 0.13002236187458038, "learning_rate": 0.00018179599308451133, "loss": 0.9607, "step": 901 }, { "epoch": 0.18337060378125636, "grad_norm": 0.11892438679933548, "learning_rate": 0.00018177565341198008, "loss": 0.9641, "step": 902 }, { "epoch": 0.18357389713356373, "grad_norm": 0.11869879812002182, "learning_rate": 0.0001817553137394488, "loss": 0.886, "step": 903 }, { "epoch": 0.1837771904858711, "grad_norm": 0.11826761066913605, "learning_rate": 0.00018173497406691753, "loss": 1.2055, "step": 904 }, { "epoch": 0.1839804838381785, "grad_norm": 0.1275918185710907, "learning_rate": 0.00018171463439438625, "loss": 1.0468, "step": 905 }, { "epoch": 0.18418377719048587, "grad_norm": 0.12289033085107803, "learning_rate": 0.000181694294721855, "loss": 1.1464, "step": 906 }, { "epoch": 0.18438707054279324, "grad_norm": 0.11647521704435349, "learning_rate": 0.00018167395504932373, "loss": 0.8912, "step": 907 }, { "epoch": 0.18459036389510064, "grad_norm": 0.12756259739398956, "learning_rate": 0.00018165361537679243, "loss": 1.0672, "step": 908 }, { "epoch": 0.184793657247408, "grad_norm": 0.12525498867034912, "learning_rate": 0.00018163327570426115, "loss": 1.1493, "step": 909 }, { "epoch": 0.18499695059971538, "grad_norm": 0.11629681289196014, "learning_rate": 0.0001816129360317299, "loss": 1.0021, "step": 910 }, { "epoch": 0.18520024395202278, "grad_norm": 0.1350405514240265, "learning_rate": 0.00018159259635919863, "loss": 1.1597, "step": 911 }, { "epoch": 0.18540353730433015, "grad_norm": 0.10785862803459167, "learning_rate": 0.00018157225668666735, "loss": 0.9035, "step": 912 }, { "epoch": 0.18560683065663752, "grad_norm": 0.13618353009223938, "learning_rate": 0.00018155191701413608, "loss": 1.4084, "step": 913 }, { "epoch": 0.18581012400894492, "grad_norm": 0.12942783534526825, "learning_rate": 0.00018153157734160483, "loss": 1.0529, "step": 914 }, { "epoch": 0.1860134173612523, "grad_norm": 0.12829767167568207, "learning_rate": 0.00018151123766907355, "loss": 1.1734, "step": 915 }, { "epoch": 0.18621671071355966, "grad_norm": 0.11795412003993988, "learning_rate": 0.00018149089799654225, "loss": 1.0891, "step": 916 }, { "epoch": 0.18642000406586703, "grad_norm": 0.13184364140033722, "learning_rate": 0.00018147055832401098, "loss": 1.0332, "step": 917 }, { "epoch": 0.18662329741817443, "grad_norm": 0.13445381820201874, "learning_rate": 0.00018145021865147973, "loss": 1.1417, "step": 918 }, { "epoch": 0.1868265907704818, "grad_norm": 0.1418420672416687, "learning_rate": 0.00018142987897894845, "loss": 1.1279, "step": 919 }, { "epoch": 0.18702988412278918, "grad_norm": 0.11725430935621262, "learning_rate": 0.00018140953930641718, "loss": 0.9878, "step": 920 }, { "epoch": 0.18723317747509657, "grad_norm": 0.13889212906360626, "learning_rate": 0.0001813891996338859, "loss": 1.1153, "step": 921 }, { "epoch": 0.18743647082740394, "grad_norm": 0.12875622510910034, "learning_rate": 0.00018136885996135462, "loss": 1.0345, "step": 922 }, { "epoch": 0.18763976417971132, "grad_norm": 0.12533831596374512, "learning_rate": 0.00018134852028882338, "loss": 1.0741, "step": 923 }, { "epoch": 0.18784305753201871, "grad_norm": 0.12448123842477798, "learning_rate": 0.00018132818061629207, "loss": 1.0161, "step": 924 }, { "epoch": 0.18804635088432609, "grad_norm": 0.13820883631706238, "learning_rate": 0.0001813078409437608, "loss": 1.2834, "step": 925 }, { "epoch": 0.18824964423663346, "grad_norm": 0.1304212063550949, "learning_rate": 0.00018128750127122952, "loss": 1.0927, "step": 926 }, { "epoch": 0.18845293758894086, "grad_norm": 0.12558777630329132, "learning_rate": 0.00018126716159869827, "loss": 1.1516, "step": 927 }, { "epoch": 0.18865623094124823, "grad_norm": 0.13149550557136536, "learning_rate": 0.000181246821926167, "loss": 1.0791, "step": 928 }, { "epoch": 0.1888595242935556, "grad_norm": 0.12774059176445007, "learning_rate": 0.00018122648225363572, "loss": 1.108, "step": 929 }, { "epoch": 0.18906281764586297, "grad_norm": 0.12127216160297394, "learning_rate": 0.00018120614258110445, "loss": 1.1254, "step": 930 }, { "epoch": 0.18926611099817037, "grad_norm": 0.1251489520072937, "learning_rate": 0.0001811858029085732, "loss": 1.1306, "step": 931 }, { "epoch": 0.18946940435047774, "grad_norm": 0.12320549786090851, "learning_rate": 0.00018116546323604192, "loss": 1.082, "step": 932 }, { "epoch": 0.1896726977027851, "grad_norm": 0.12626154720783234, "learning_rate": 0.00018114512356351062, "loss": 1.176, "step": 933 }, { "epoch": 0.1898759910550925, "grad_norm": 0.12401305884122849, "learning_rate": 0.00018112478389097935, "loss": 1.107, "step": 934 }, { "epoch": 0.19007928440739988, "grad_norm": 0.13284708559513092, "learning_rate": 0.0001811044442184481, "loss": 1.1977, "step": 935 }, { "epoch": 0.19028257775970725, "grad_norm": 0.11293178796768188, "learning_rate": 0.00018108410454591682, "loss": 0.8484, "step": 936 }, { "epoch": 0.19048587111201465, "grad_norm": 0.12113649398088455, "learning_rate": 0.00018106376487338555, "loss": 1.0833, "step": 937 }, { "epoch": 0.19068916446432202, "grad_norm": 0.12353657186031342, "learning_rate": 0.00018104342520085427, "loss": 1.1154, "step": 938 }, { "epoch": 0.1908924578166294, "grad_norm": 0.13213786482810974, "learning_rate": 0.00018102308552832302, "loss": 1.0866, "step": 939 }, { "epoch": 0.1910957511689368, "grad_norm": 0.12303278595209122, "learning_rate": 0.00018100274585579175, "loss": 0.9889, "step": 940 }, { "epoch": 0.19129904452124416, "grad_norm": 0.12523289024829865, "learning_rate": 0.00018098240618326044, "loss": 0.9564, "step": 941 }, { "epoch": 0.19150233787355153, "grad_norm": 0.12457413971424103, "learning_rate": 0.00018096206651072917, "loss": 1.168, "step": 942 }, { "epoch": 0.1917056312258589, "grad_norm": 0.13440296053886414, "learning_rate": 0.00018094172683819792, "loss": 1.1655, "step": 943 }, { "epoch": 0.1919089245781663, "grad_norm": 0.11574854701757431, "learning_rate": 0.00018092138716566664, "loss": 0.9982, "step": 944 }, { "epoch": 0.19211221793047367, "grad_norm": 0.1216878592967987, "learning_rate": 0.00018090104749313537, "loss": 0.97, "step": 945 }, { "epoch": 0.19231551128278104, "grad_norm": 0.11920405179262161, "learning_rate": 0.0001808807078206041, "loss": 0.9783, "step": 946 }, { "epoch": 0.19251880463508844, "grad_norm": 0.12107307463884354, "learning_rate": 0.00018086036814807284, "loss": 1.0843, "step": 947 }, { "epoch": 0.1927220979873958, "grad_norm": 0.12287328392267227, "learning_rate": 0.00018084002847554157, "loss": 1.1068, "step": 948 }, { "epoch": 0.19292539133970318, "grad_norm": 0.12466049194335938, "learning_rate": 0.00018081968880301027, "loss": 0.9383, "step": 949 }, { "epoch": 0.19312868469201058, "grad_norm": 0.11762560158967972, "learning_rate": 0.000180799349130479, "loss": 0.9855, "step": 950 }, { "epoch": 0.19333197804431795, "grad_norm": 0.12275755405426025, "learning_rate": 0.00018077900945794774, "loss": 1.0528, "step": 951 }, { "epoch": 0.19353527139662532, "grad_norm": 0.12033812701702118, "learning_rate": 0.00018075866978541647, "loss": 1.0828, "step": 952 }, { "epoch": 0.19373856474893272, "grad_norm": 0.13380326330661774, "learning_rate": 0.0001807383301128852, "loss": 1.0634, "step": 953 }, { "epoch": 0.1939418581012401, "grad_norm": 0.13521994650363922, "learning_rate": 0.00018071799044035392, "loss": 1.1512, "step": 954 }, { "epoch": 0.19414515145354747, "grad_norm": 0.1331789344549179, "learning_rate": 0.00018069765076782267, "loss": 1.2343, "step": 955 }, { "epoch": 0.19434844480585484, "grad_norm": 0.12130323797464371, "learning_rate": 0.0001806773110952914, "loss": 1.291, "step": 956 }, { "epoch": 0.19455173815816224, "grad_norm": 0.10274801403284073, "learning_rate": 0.0001806569714227601, "loss": 0.8534, "step": 957 }, { "epoch": 0.1947550315104696, "grad_norm": 0.1255219727754593, "learning_rate": 0.00018063663175022881, "loss": 1.1804, "step": 958 }, { "epoch": 0.19495832486277698, "grad_norm": 0.13403509557247162, "learning_rate": 0.00018061629207769757, "loss": 1.1882, "step": 959 }, { "epoch": 0.19516161821508438, "grad_norm": 0.1277134269475937, "learning_rate": 0.0001805959524051663, "loss": 1.1059, "step": 960 }, { "epoch": 0.19536491156739175, "grad_norm": 0.1148851290345192, "learning_rate": 0.00018057561273263501, "loss": 1.125, "step": 961 }, { "epoch": 0.19556820491969912, "grad_norm": 0.10984671115875244, "learning_rate": 0.00018055527306010374, "loss": 1.0396, "step": 962 }, { "epoch": 0.19577149827200652, "grad_norm": 0.13988138735294342, "learning_rate": 0.00018053493338757246, "loss": 1.1672, "step": 963 }, { "epoch": 0.1959747916243139, "grad_norm": 0.12106659263372421, "learning_rate": 0.00018051459371504121, "loss": 1.0142, "step": 964 }, { "epoch": 0.19617808497662126, "grad_norm": 0.10751524567604065, "learning_rate": 0.0001804942540425099, "loss": 1.0027, "step": 965 }, { "epoch": 0.19638137832892866, "grad_norm": 0.12096796184778214, "learning_rate": 0.00018047391436997864, "loss": 1.0965, "step": 966 }, { "epoch": 0.19658467168123603, "grad_norm": 0.12069959938526154, "learning_rate": 0.00018045357469744736, "loss": 0.9869, "step": 967 }, { "epoch": 0.1967879650335434, "grad_norm": 0.13281071186065674, "learning_rate": 0.0001804332350249161, "loss": 1.0361, "step": 968 }, { "epoch": 0.19699125838585077, "grad_norm": 0.12690961360931396, "learning_rate": 0.00018041289535238484, "loss": 1.005, "step": 969 }, { "epoch": 0.19719455173815817, "grad_norm": 0.1329599916934967, "learning_rate": 0.00018039255567985356, "loss": 1.1184, "step": 970 }, { "epoch": 0.19739784509046554, "grad_norm": 0.12807321548461914, "learning_rate": 0.00018037221600732229, "loss": 1.1918, "step": 971 }, { "epoch": 0.1976011384427729, "grad_norm": 0.12155921012163162, "learning_rate": 0.00018035187633479104, "loss": 1.1934, "step": 972 }, { "epoch": 0.1978044317950803, "grad_norm": 0.11720109730958939, "learning_rate": 0.00018033153666225973, "loss": 1.2674, "step": 973 }, { "epoch": 0.19800772514738768, "grad_norm": 0.12774553894996643, "learning_rate": 0.00018031119698972846, "loss": 1.1394, "step": 974 }, { "epoch": 0.19821101849969505, "grad_norm": 0.11617007106542587, "learning_rate": 0.00018029085731719718, "loss": 1.0772, "step": 975 }, { "epoch": 0.19841431185200245, "grad_norm": 0.1182067021727562, "learning_rate": 0.00018027051764466594, "loss": 1.0433, "step": 976 }, { "epoch": 0.19861760520430982, "grad_norm": 0.128327414393425, "learning_rate": 0.00018025017797213466, "loss": 1.0616, "step": 977 }, { "epoch": 0.1988208985566172, "grad_norm": 0.12075836956501007, "learning_rate": 0.00018022983829960338, "loss": 1.2187, "step": 978 }, { "epoch": 0.1990241919089246, "grad_norm": 0.132186159491539, "learning_rate": 0.0001802094986270721, "loss": 1.0614, "step": 979 }, { "epoch": 0.19922748526123196, "grad_norm": 0.135267972946167, "learning_rate": 0.00018018915895454086, "loss": 1.3447, "step": 980 }, { "epoch": 0.19943077861353933, "grad_norm": 0.13122640550136566, "learning_rate": 0.00018016881928200956, "loss": 1.3336, "step": 981 }, { "epoch": 0.1996340719658467, "grad_norm": 0.11631322652101517, "learning_rate": 0.00018014847960947828, "loss": 1.024, "step": 982 }, { "epoch": 0.1998373653181541, "grad_norm": 0.12409427016973495, "learning_rate": 0.000180128139936947, "loss": 0.9806, "step": 983 }, { "epoch": 0.20004065867046147, "grad_norm": 0.1337365210056305, "learning_rate": 0.00018010780026441576, "loss": 1.1875, "step": 984 }, { "epoch": 0.20024395202276885, "grad_norm": 0.12941214442253113, "learning_rate": 0.00018008746059188448, "loss": 1.1962, "step": 985 }, { "epoch": 0.20044724537507624, "grad_norm": 0.12374356389045715, "learning_rate": 0.0001800671209193532, "loss": 1.1213, "step": 986 }, { "epoch": 0.20065053872738361, "grad_norm": 0.13427360355854034, "learning_rate": 0.00018004678124682193, "loss": 1.209, "step": 987 }, { "epoch": 0.20085383207969099, "grad_norm": 0.11423162370920181, "learning_rate": 0.00018002644157429068, "loss": 0.8628, "step": 988 }, { "epoch": 0.20105712543199838, "grad_norm": 0.12818945944309235, "learning_rate": 0.0001800061019017594, "loss": 1.1162, "step": 989 }, { "epoch": 0.20126041878430576, "grad_norm": 0.11825679987668991, "learning_rate": 0.0001799857622292281, "loss": 0.9222, "step": 990 }, { "epoch": 0.20146371213661313, "grad_norm": 0.11358822882175446, "learning_rate": 0.00017996542255669683, "loss": 0.992, "step": 991 }, { "epoch": 0.20166700548892053, "grad_norm": 0.12839291989803314, "learning_rate": 0.00017994508288416558, "loss": 1.1777, "step": 992 }, { "epoch": 0.2018702988412279, "grad_norm": 0.12416979670524597, "learning_rate": 0.0001799247432116343, "loss": 1.2411, "step": 993 }, { "epoch": 0.20207359219353527, "grad_norm": 0.12002628296613693, "learning_rate": 0.00017990440353910303, "loss": 1.0961, "step": 994 }, { "epoch": 0.20227688554584264, "grad_norm": 0.1268136203289032, "learning_rate": 0.00017988406386657175, "loss": 1.1405, "step": 995 }, { "epoch": 0.20248017889815004, "grad_norm": 0.12864577770233154, "learning_rate": 0.0001798637241940405, "loss": 1.136, "step": 996 }, { "epoch": 0.2026834722504574, "grad_norm": 0.11293767392635345, "learning_rate": 0.00017984338452150923, "loss": 1.0633, "step": 997 }, { "epoch": 0.20288676560276478, "grad_norm": 0.11901193857192993, "learning_rate": 0.00017982304484897793, "loss": 1.1404, "step": 998 }, { "epoch": 0.20309005895507218, "grad_norm": 0.14368772506713867, "learning_rate": 0.00017980270517644665, "loss": 1.2092, "step": 999 }, { "epoch": 0.20329335230737955, "grad_norm": 0.1403762251138687, "learning_rate": 0.0001797823655039154, "loss": 1.1212, "step": 1000 }, { "epoch": 0.20349664565968692, "grad_norm": 0.10853412747383118, "learning_rate": 0.00017976202583138413, "loss": 0.9489, "step": 1001 }, { "epoch": 0.20369993901199432, "grad_norm": 0.11670242995023727, "learning_rate": 0.00017974168615885285, "loss": 1.0397, "step": 1002 }, { "epoch": 0.2039032323643017, "grad_norm": 0.12957903742790222, "learning_rate": 0.00017972134648632158, "loss": 0.9898, "step": 1003 }, { "epoch": 0.20410652571660906, "grad_norm": 0.1174166351556778, "learning_rate": 0.0001797010068137903, "loss": 1.1126, "step": 1004 }, { "epoch": 0.20430981906891646, "grad_norm": 0.12919628620147705, "learning_rate": 0.00017968066714125905, "loss": 1.0282, "step": 1005 }, { "epoch": 0.20451311242122383, "grad_norm": 0.12586313486099243, "learning_rate": 0.00017966032746872775, "loss": 1.1674, "step": 1006 }, { "epoch": 0.2047164057735312, "grad_norm": 0.12239197641611099, "learning_rate": 0.00017963998779619647, "loss": 1.0543, "step": 1007 }, { "epoch": 0.20491969912583857, "grad_norm": 0.11404930055141449, "learning_rate": 0.0001796196481236652, "loss": 1.0193, "step": 1008 }, { "epoch": 0.20512299247814597, "grad_norm": 0.14286890625953674, "learning_rate": 0.00017959930845113395, "loss": 0.994, "step": 1009 }, { "epoch": 0.20532628583045334, "grad_norm": 0.12723585963249207, "learning_rate": 0.00017957896877860268, "loss": 1.1854, "step": 1010 }, { "epoch": 0.2055295791827607, "grad_norm": 0.13282720744609833, "learning_rate": 0.0001795586291060714, "loss": 1.0967, "step": 1011 }, { "epoch": 0.2057328725350681, "grad_norm": 0.11795739084482193, "learning_rate": 0.00017953828943354012, "loss": 1.0024, "step": 1012 }, { "epoch": 0.20593616588737548, "grad_norm": 0.123084157705307, "learning_rate": 0.00017951794976100888, "loss": 1.0015, "step": 1013 }, { "epoch": 0.20613945923968285, "grad_norm": 0.13757507503032684, "learning_rate": 0.00017949761008847757, "loss": 1.1926, "step": 1014 }, { "epoch": 0.20634275259199025, "grad_norm": 0.13981647789478302, "learning_rate": 0.0001794772704159463, "loss": 1.1296, "step": 1015 }, { "epoch": 0.20654604594429762, "grad_norm": 0.12356757372617722, "learning_rate": 0.00017945693074341502, "loss": 1.1867, "step": 1016 }, { "epoch": 0.206749339296605, "grad_norm": 0.11218491941690445, "learning_rate": 0.00017943659107088377, "loss": 0.9909, "step": 1017 }, { "epoch": 0.2069526326489124, "grad_norm": 0.11628386378288269, "learning_rate": 0.0001794162513983525, "loss": 1.1048, "step": 1018 }, { "epoch": 0.20715592600121976, "grad_norm": 0.1266728788614273, "learning_rate": 0.00017939591172582122, "loss": 1.123, "step": 1019 }, { "epoch": 0.20735921935352714, "grad_norm": 0.1243995800614357, "learning_rate": 0.00017937557205328995, "loss": 1.1161, "step": 1020 }, { "epoch": 0.2075625127058345, "grad_norm": 0.10625866800546646, "learning_rate": 0.0001793552323807587, "loss": 0.9773, "step": 1021 }, { "epoch": 0.2077658060581419, "grad_norm": 0.11653080582618713, "learning_rate": 0.0001793348927082274, "loss": 1.0115, "step": 1022 }, { "epoch": 0.20796909941044928, "grad_norm": 0.12603938579559326, "learning_rate": 0.00017931455303569612, "loss": 0.9879, "step": 1023 }, { "epoch": 0.20817239276275665, "grad_norm": 0.11850478500127792, "learning_rate": 0.00017929421336316485, "loss": 1.0412, "step": 1024 }, { "epoch": 0.20837568611506405, "grad_norm": 0.13597136735916138, "learning_rate": 0.0001792738736906336, "loss": 1.1984, "step": 1025 }, { "epoch": 0.20857897946737142, "grad_norm": 0.12899504601955414, "learning_rate": 0.00017925353401810232, "loss": 1.2007, "step": 1026 }, { "epoch": 0.2087822728196788, "grad_norm": 0.12255753576755524, "learning_rate": 0.00017923319434557105, "loss": 1.0632, "step": 1027 }, { "epoch": 0.2089855661719862, "grad_norm": 0.11182371526956558, "learning_rate": 0.00017921285467303977, "loss": 1.0428, "step": 1028 }, { "epoch": 0.20918885952429356, "grad_norm": 0.10728685557842255, "learning_rate": 0.00017919251500050852, "loss": 0.9964, "step": 1029 }, { "epoch": 0.20939215287660093, "grad_norm": 0.1301811784505844, "learning_rate": 0.00017917217532797722, "loss": 1.0812, "step": 1030 }, { "epoch": 0.20959544622890833, "grad_norm": 0.12470284849405289, "learning_rate": 0.00017915183565544594, "loss": 0.9967, "step": 1031 }, { "epoch": 0.2097987395812157, "grad_norm": 0.12017293274402618, "learning_rate": 0.00017913149598291467, "loss": 1.0986, "step": 1032 }, { "epoch": 0.21000203293352307, "grad_norm": 0.14881430566310883, "learning_rate": 0.00017911115631038342, "loss": 1.2407, "step": 1033 }, { "epoch": 0.21020532628583044, "grad_norm": 0.11730567365884781, "learning_rate": 0.00017909081663785214, "loss": 0.9414, "step": 1034 }, { "epoch": 0.21040861963813784, "grad_norm": 0.12763184309005737, "learning_rate": 0.00017907047696532087, "loss": 1.0773, "step": 1035 }, { "epoch": 0.2106119129904452, "grad_norm": 0.11463324725627899, "learning_rate": 0.0001790501372927896, "loss": 0.965, "step": 1036 }, { "epoch": 0.21081520634275258, "grad_norm": 0.13079042732715607, "learning_rate": 0.00017902979762025834, "loss": 1.0491, "step": 1037 }, { "epoch": 0.21101849969505998, "grad_norm": 0.13902175426483154, "learning_rate": 0.00017900945794772704, "loss": 0.9453, "step": 1038 }, { "epoch": 0.21122179304736735, "grad_norm": 0.12852630019187927, "learning_rate": 0.00017898911827519577, "loss": 1.221, "step": 1039 }, { "epoch": 0.21142508639967472, "grad_norm": 0.10965081304311752, "learning_rate": 0.0001789687786026645, "loss": 0.9923, "step": 1040 }, { "epoch": 0.21162837975198212, "grad_norm": 0.1155104711651802, "learning_rate": 0.00017894843893013324, "loss": 0.8918, "step": 1041 }, { "epoch": 0.2118316731042895, "grad_norm": 0.13126857578754425, "learning_rate": 0.00017892809925760197, "loss": 1.1116, "step": 1042 }, { "epoch": 0.21203496645659686, "grad_norm": 0.11619725823402405, "learning_rate": 0.0001789077595850707, "loss": 0.9448, "step": 1043 }, { "epoch": 0.21223825980890426, "grad_norm": 0.12041871249675751, "learning_rate": 0.00017888741991253942, "loss": 1.0778, "step": 1044 }, { "epoch": 0.21244155316121163, "grad_norm": 0.1230979636311531, "learning_rate": 0.00017886708024000814, "loss": 1.0807, "step": 1045 }, { "epoch": 0.212644846513519, "grad_norm": 0.1263006180524826, "learning_rate": 0.0001788467405674769, "loss": 1.1668, "step": 1046 }, { "epoch": 0.21284813986582637, "grad_norm": 0.11430171877145767, "learning_rate": 0.0001788264008949456, "loss": 1.1096, "step": 1047 }, { "epoch": 0.21305143321813377, "grad_norm": 0.1243266835808754, "learning_rate": 0.0001788060612224143, "loss": 0.9583, "step": 1048 }, { "epoch": 0.21325472657044114, "grad_norm": 0.12808263301849365, "learning_rate": 0.00017878572154988304, "loss": 1.0896, "step": 1049 }, { "epoch": 0.21345801992274852, "grad_norm": 0.13576021790504456, "learning_rate": 0.0001787653818773518, "loss": 1.0807, "step": 1050 }, { "epoch": 0.21366131327505591, "grad_norm": 0.10852668434381485, "learning_rate": 0.00017874504220482051, "loss": 0.9132, "step": 1051 }, { "epoch": 0.21386460662736329, "grad_norm": 0.13336928188800812, "learning_rate": 0.00017872470253228924, "loss": 1.1131, "step": 1052 }, { "epoch": 0.21406789997967066, "grad_norm": 0.12640543282032013, "learning_rate": 0.00017870436285975796, "loss": 1.0568, "step": 1053 }, { "epoch": 0.21427119333197805, "grad_norm": 0.12157181650400162, "learning_rate": 0.00017868402318722671, "loss": 1.033, "step": 1054 }, { "epoch": 0.21447448668428543, "grad_norm": 0.12272074073553085, "learning_rate": 0.0001786636835146954, "loss": 1.0301, "step": 1055 }, { "epoch": 0.2146777800365928, "grad_norm": 0.1594497114419937, "learning_rate": 0.00017864334384216414, "loss": 1.1618, "step": 1056 }, { "epoch": 0.2148810733889002, "grad_norm": 0.14059504866600037, "learning_rate": 0.00017862300416963286, "loss": 1.1223, "step": 1057 }, { "epoch": 0.21508436674120757, "grad_norm": 0.12746313214302063, "learning_rate": 0.0001786026644971016, "loss": 1.1294, "step": 1058 }, { "epoch": 0.21528766009351494, "grad_norm": 0.13382786512374878, "learning_rate": 0.00017858232482457034, "loss": 1.0969, "step": 1059 }, { "epoch": 0.2154909534458223, "grad_norm": 0.1192721351981163, "learning_rate": 0.00017856198515203906, "loss": 0.9751, "step": 1060 }, { "epoch": 0.2156942467981297, "grad_norm": 0.1318022906780243, "learning_rate": 0.00017854164547950779, "loss": 1.267, "step": 1061 }, { "epoch": 0.21589754015043708, "grad_norm": 0.12069433927536011, "learning_rate": 0.00017852130580697654, "loss": 1.0525, "step": 1062 }, { "epoch": 0.21610083350274445, "grad_norm": 0.12405405938625336, "learning_rate": 0.00017850096613444523, "loss": 1.1731, "step": 1063 }, { "epoch": 0.21630412685505185, "grad_norm": 0.11893291026353836, "learning_rate": 0.00017848062646191396, "loss": 1.1609, "step": 1064 }, { "epoch": 0.21650742020735922, "grad_norm": 0.11019967496395111, "learning_rate": 0.00017846028678938268, "loss": 0.9735, "step": 1065 }, { "epoch": 0.2167107135596666, "grad_norm": 0.11663123220205307, "learning_rate": 0.00017843994711685144, "loss": 1.2882, "step": 1066 }, { "epoch": 0.216914006911974, "grad_norm": 0.12803837656974792, "learning_rate": 0.00017841960744432016, "loss": 1.0365, "step": 1067 }, { "epoch": 0.21711730026428136, "grad_norm": 0.13295085728168488, "learning_rate": 0.00017839926777178888, "loss": 1.1925, "step": 1068 }, { "epoch": 0.21732059361658873, "grad_norm": 0.12314966320991516, "learning_rate": 0.0001783789280992576, "loss": 1.1064, "step": 1069 }, { "epoch": 0.21752388696889613, "grad_norm": 0.12015377730131149, "learning_rate": 0.00017835858842672636, "loss": 1.1056, "step": 1070 }, { "epoch": 0.2177271803212035, "grad_norm": 0.11665552854537964, "learning_rate": 0.00017833824875419506, "loss": 1.0592, "step": 1071 }, { "epoch": 0.21793047367351087, "grad_norm": 0.11458134651184082, "learning_rate": 0.00017831790908166378, "loss": 1.1203, "step": 1072 }, { "epoch": 0.21813376702581824, "grad_norm": 0.10290549695491791, "learning_rate": 0.0001782975694091325, "loss": 0.9427, "step": 1073 }, { "epoch": 0.21833706037812564, "grad_norm": 0.12680476903915405, "learning_rate": 0.00017827722973660126, "loss": 1.1047, "step": 1074 }, { "epoch": 0.218540353730433, "grad_norm": 0.1253194808959961, "learning_rate": 0.00017825689006406998, "loss": 1.1482, "step": 1075 }, { "epoch": 0.21874364708274038, "grad_norm": 0.1381319910287857, "learning_rate": 0.0001782365503915387, "loss": 1.3134, "step": 1076 }, { "epoch": 0.21894694043504778, "grad_norm": 0.12798373401165009, "learning_rate": 0.00017821621071900743, "loss": 1.1119, "step": 1077 }, { "epoch": 0.21915023378735515, "grad_norm": 0.1302616447210312, "learning_rate": 0.00017819587104647618, "loss": 1.1038, "step": 1078 }, { "epoch": 0.21935352713966252, "grad_norm": 0.1357065588235855, "learning_rate": 0.00017817553137394488, "loss": 1.1385, "step": 1079 }, { "epoch": 0.21955682049196992, "grad_norm": 0.1307210475206375, "learning_rate": 0.0001781551917014136, "loss": 1.1692, "step": 1080 }, { "epoch": 0.2197601138442773, "grad_norm": 0.12304160743951797, "learning_rate": 0.00017813485202888233, "loss": 1.1044, "step": 1081 }, { "epoch": 0.21996340719658466, "grad_norm": 0.12165479362010956, "learning_rate": 0.00017811451235635108, "loss": 1.0762, "step": 1082 }, { "epoch": 0.22016670054889206, "grad_norm": 0.12440644204616547, "learning_rate": 0.0001780941726838198, "loss": 1.0296, "step": 1083 }, { "epoch": 0.22036999390119943, "grad_norm": 0.14743392169475555, "learning_rate": 0.00017807383301128853, "loss": 1.1824, "step": 1084 }, { "epoch": 0.2205732872535068, "grad_norm": 0.13372984528541565, "learning_rate": 0.00017805349333875725, "loss": 1.0795, "step": 1085 }, { "epoch": 0.22077658060581418, "grad_norm": 0.11515718698501587, "learning_rate": 0.00017803315366622598, "loss": 0.9869, "step": 1086 }, { "epoch": 0.22097987395812158, "grad_norm": 0.1197754368185997, "learning_rate": 0.0001780128139936947, "loss": 1.1101, "step": 1087 }, { "epoch": 0.22118316731042895, "grad_norm": 0.121689073741436, "learning_rate": 0.00017799247432116343, "loss": 1.0624, "step": 1088 }, { "epoch": 0.22138646066273632, "grad_norm": 0.12425584346055984, "learning_rate": 0.00017797213464863215, "loss": 0.9962, "step": 1089 }, { "epoch": 0.22158975401504372, "grad_norm": 0.11786684393882751, "learning_rate": 0.00017795179497610088, "loss": 0.8943, "step": 1090 }, { "epoch": 0.2217930473673511, "grad_norm": 0.13555578887462616, "learning_rate": 0.00017793145530356963, "loss": 1.2069, "step": 1091 }, { "epoch": 0.22199634071965846, "grad_norm": 0.12431347370147705, "learning_rate": 0.00017791111563103835, "loss": 1.1376, "step": 1092 }, { "epoch": 0.22219963407196586, "grad_norm": 0.12472493946552277, "learning_rate": 0.00017789077595850708, "loss": 1.1486, "step": 1093 }, { "epoch": 0.22240292742427323, "grad_norm": 0.12927775084972382, "learning_rate": 0.0001778704362859758, "loss": 1.1581, "step": 1094 }, { "epoch": 0.2226062207765806, "grad_norm": 0.12910224497318268, "learning_rate": 0.00017785009661344453, "loss": 0.988, "step": 1095 }, { "epoch": 0.222809514128888, "grad_norm": 0.11531752347946167, "learning_rate": 0.00017782975694091325, "loss": 0.9782, "step": 1096 }, { "epoch": 0.22301280748119537, "grad_norm": 0.1250569224357605, "learning_rate": 0.00017780941726838197, "loss": 1.0796, "step": 1097 }, { "epoch": 0.22321610083350274, "grad_norm": 0.1234661191701889, "learning_rate": 0.0001777890775958507, "loss": 1.1571, "step": 1098 }, { "epoch": 0.2234193941858101, "grad_norm": 0.11324235796928406, "learning_rate": 0.00017776873792331945, "loss": 1.1156, "step": 1099 }, { "epoch": 0.2236226875381175, "grad_norm": 0.12516295909881592, "learning_rate": 0.00017774839825078818, "loss": 1.0161, "step": 1100 }, { "epoch": 0.22382598089042488, "grad_norm": 0.13084611296653748, "learning_rate": 0.0001777280585782569, "loss": 1.1408, "step": 1101 }, { "epoch": 0.22402927424273225, "grad_norm": 0.1189943253993988, "learning_rate": 0.00017770771890572562, "loss": 0.9823, "step": 1102 }, { "epoch": 0.22423256759503965, "grad_norm": 0.11955268681049347, "learning_rate": 0.00017768737923319438, "loss": 0.8977, "step": 1103 }, { "epoch": 0.22443586094734702, "grad_norm": 0.12528367340564728, "learning_rate": 0.00017766703956066307, "loss": 1.1579, "step": 1104 }, { "epoch": 0.2246391542996544, "grad_norm": 0.12829215824604034, "learning_rate": 0.0001776466998881318, "loss": 1.1892, "step": 1105 }, { "epoch": 0.2248424476519618, "grad_norm": 0.12263132631778717, "learning_rate": 0.00017762636021560052, "loss": 1.1609, "step": 1106 }, { "epoch": 0.22504574100426916, "grad_norm": 0.12810589373111725, "learning_rate": 0.00017760602054306927, "loss": 1.0712, "step": 1107 }, { "epoch": 0.22524903435657653, "grad_norm": 0.1171211376786232, "learning_rate": 0.000177585680870538, "loss": 0.9694, "step": 1108 }, { "epoch": 0.22545232770888393, "grad_norm": 0.12270856648683548, "learning_rate": 0.00017756534119800672, "loss": 1.0896, "step": 1109 }, { "epoch": 0.2256556210611913, "grad_norm": 0.13578352332115173, "learning_rate": 0.00017754500152547545, "loss": 1.2125, "step": 1110 }, { "epoch": 0.22585891441349867, "grad_norm": 0.1315973401069641, "learning_rate": 0.0001775246618529442, "loss": 1.2441, "step": 1111 }, { "epoch": 0.22606220776580604, "grad_norm": 0.1222010925412178, "learning_rate": 0.0001775043221804129, "loss": 0.9894, "step": 1112 }, { "epoch": 0.22626550111811344, "grad_norm": 0.12425290793180466, "learning_rate": 0.00017748398250788162, "loss": 1.273, "step": 1113 }, { "epoch": 0.22646879447042081, "grad_norm": 0.10960794985294342, "learning_rate": 0.00017746364283535034, "loss": 0.8637, "step": 1114 }, { "epoch": 0.22667208782272819, "grad_norm": 0.13080738484859467, "learning_rate": 0.0001774433031628191, "loss": 1.1506, "step": 1115 }, { "epoch": 0.22687538117503558, "grad_norm": 0.11546586453914642, "learning_rate": 0.00017742296349028782, "loss": 1.1043, "step": 1116 }, { "epoch": 0.22707867452734296, "grad_norm": 0.12280496209859848, "learning_rate": 0.00017740262381775655, "loss": 1.1203, "step": 1117 }, { "epoch": 0.22728196787965033, "grad_norm": 0.11661294102668762, "learning_rate": 0.00017738228414522527, "loss": 1.0486, "step": 1118 }, { "epoch": 0.22748526123195772, "grad_norm": 0.12169715762138367, "learning_rate": 0.00017736194447269402, "loss": 1.2318, "step": 1119 }, { "epoch": 0.2276885545842651, "grad_norm": 0.12962935864925385, "learning_rate": 0.00017734160480016272, "loss": 1.0912, "step": 1120 }, { "epoch": 0.22789184793657247, "grad_norm": 0.14488789439201355, "learning_rate": 0.00017732126512763144, "loss": 0.9774, "step": 1121 }, { "epoch": 0.22809514128887987, "grad_norm": 0.11455550044775009, "learning_rate": 0.00017730092545510017, "loss": 0.9454, "step": 1122 }, { "epoch": 0.22829843464118724, "grad_norm": 0.11764731258153915, "learning_rate": 0.00017728058578256892, "loss": 1.0895, "step": 1123 }, { "epoch": 0.2285017279934946, "grad_norm": 0.12537989020347595, "learning_rate": 0.00017726024611003764, "loss": 1.141, "step": 1124 }, { "epoch": 0.22870502134580198, "grad_norm": 0.11639077961444855, "learning_rate": 0.00017723990643750637, "loss": 1.1259, "step": 1125 }, { "epoch": 0.22890831469810938, "grad_norm": 0.12202929705381393, "learning_rate": 0.0001772195667649751, "loss": 1.141, "step": 1126 }, { "epoch": 0.22911160805041675, "grad_norm": 0.11307729780673981, "learning_rate": 0.00017719922709244382, "loss": 0.9076, "step": 1127 }, { "epoch": 0.22931490140272412, "grad_norm": 0.11854063719511032, "learning_rate": 0.00017717888741991254, "loss": 1.0369, "step": 1128 }, { "epoch": 0.22951819475503152, "grad_norm": 0.11729457229375839, "learning_rate": 0.00017715854774738127, "loss": 1.0503, "step": 1129 }, { "epoch": 0.2297214881073389, "grad_norm": 0.13550931215286255, "learning_rate": 0.00017713820807485, "loss": 1.1211, "step": 1130 }, { "epoch": 0.22992478145964626, "grad_norm": 0.1215146854519844, "learning_rate": 0.00017711786840231871, "loss": 1.0483, "step": 1131 }, { "epoch": 0.23012807481195366, "grad_norm": 0.12911346554756165, "learning_rate": 0.00017709752872978747, "loss": 1.1133, "step": 1132 }, { "epoch": 0.23033136816426103, "grad_norm": 0.1176094263792038, "learning_rate": 0.0001770771890572562, "loss": 0.9717, "step": 1133 }, { "epoch": 0.2305346615165684, "grad_norm": 0.1320810616016388, "learning_rate": 0.00017705684938472492, "loss": 1.0978, "step": 1134 }, { "epoch": 0.2307379548688758, "grad_norm": 0.119644396007061, "learning_rate": 0.00017703650971219364, "loss": 1.03, "step": 1135 }, { "epoch": 0.23094124822118317, "grad_norm": 0.11813725531101227, "learning_rate": 0.00017701617003966236, "loss": 0.9804, "step": 1136 }, { "epoch": 0.23114454157349054, "grad_norm": 0.12088938802480698, "learning_rate": 0.0001769958303671311, "loss": 1.052, "step": 1137 }, { "epoch": 0.2313478349257979, "grad_norm": 0.11971927434206009, "learning_rate": 0.0001769754906945998, "loss": 0.8725, "step": 1138 }, { "epoch": 0.2315511282781053, "grad_norm": 0.1300465613603592, "learning_rate": 0.00017695515102206854, "loss": 1.1638, "step": 1139 }, { "epoch": 0.23175442163041268, "grad_norm": 0.14033594727516174, "learning_rate": 0.0001769348113495373, "loss": 1.2757, "step": 1140 }, { "epoch": 0.23195771498272005, "grad_norm": 0.13062700629234314, "learning_rate": 0.000176914471677006, "loss": 1.1734, "step": 1141 }, { "epoch": 0.23216100833502745, "grad_norm": 0.11161787062883377, "learning_rate": 0.00017689413200447474, "loss": 0.9733, "step": 1142 }, { "epoch": 0.23236430168733482, "grad_norm": 0.11497635394334793, "learning_rate": 0.00017687379233194346, "loss": 0.9751, "step": 1143 }, { "epoch": 0.2325675950396422, "grad_norm": 0.12658412754535675, "learning_rate": 0.0001768534526594122, "loss": 0.9667, "step": 1144 }, { "epoch": 0.2327708883919496, "grad_norm": 0.125930517911911, "learning_rate": 0.0001768331129868809, "loss": 1.2521, "step": 1145 }, { "epoch": 0.23297418174425696, "grad_norm": 0.1267358511686325, "learning_rate": 0.00017681277331434964, "loss": 1.2034, "step": 1146 }, { "epoch": 0.23317747509656434, "grad_norm": 0.11235269904136658, "learning_rate": 0.00017679243364181836, "loss": 1.0049, "step": 1147 }, { "epoch": 0.23338076844887173, "grad_norm": 0.13258063793182373, "learning_rate": 0.0001767720939692871, "loss": 1.0893, "step": 1148 }, { "epoch": 0.2335840618011791, "grad_norm": 0.108503058552742, "learning_rate": 0.00017675175429675584, "loss": 0.9366, "step": 1149 }, { "epoch": 0.23378735515348648, "grad_norm": 0.12689101696014404, "learning_rate": 0.00017673141462422456, "loss": 1.1286, "step": 1150 }, { "epoch": 0.23399064850579385, "grad_norm": 0.12492146342992783, "learning_rate": 0.00017671107495169329, "loss": 0.9183, "step": 1151 }, { "epoch": 0.23419394185810125, "grad_norm": 0.10324962437152863, "learning_rate": 0.000176690735279162, "loss": 1.0456, "step": 1152 }, { "epoch": 0.23439723521040862, "grad_norm": 0.11633274704217911, "learning_rate": 0.00017667039560663073, "loss": 0.917, "step": 1153 }, { "epoch": 0.234600528562716, "grad_norm": 0.11803746968507767, "learning_rate": 0.00017665005593409946, "loss": 1.0733, "step": 1154 }, { "epoch": 0.2348038219150234, "grad_norm": 0.128416046500206, "learning_rate": 0.00017662971626156818, "loss": 1.1525, "step": 1155 }, { "epoch": 0.23500711526733076, "grad_norm": 0.13254918158054352, "learning_rate": 0.00017660937658903693, "loss": 1.2412, "step": 1156 }, { "epoch": 0.23521040861963813, "grad_norm": 0.13515497744083405, "learning_rate": 0.00017658903691650566, "loss": 1.0627, "step": 1157 }, { "epoch": 0.23541370197194553, "grad_norm": 0.12952685356140137, "learning_rate": 0.00017656869724397438, "loss": 1.0841, "step": 1158 }, { "epoch": 0.2356169953242529, "grad_norm": 0.14173516631126404, "learning_rate": 0.0001765483575714431, "loss": 1.1436, "step": 1159 }, { "epoch": 0.23582028867656027, "grad_norm": 0.11358428746461868, "learning_rate": 0.00017652801789891183, "loss": 1.0707, "step": 1160 }, { "epoch": 0.23602358202886767, "grad_norm": 0.11959460377693176, "learning_rate": 0.00017650767822638056, "loss": 1.0038, "step": 1161 }, { "epoch": 0.23622687538117504, "grad_norm": 0.13181112706661224, "learning_rate": 0.00017648733855384928, "loss": 1.2132, "step": 1162 }, { "epoch": 0.2364301687334824, "grad_norm": 0.12374672293663025, "learning_rate": 0.000176466998881318, "loss": 1.0432, "step": 1163 }, { "epoch": 0.23663346208578978, "grad_norm": 0.1308983564376831, "learning_rate": 0.00017644665920878676, "loss": 1.1725, "step": 1164 }, { "epoch": 0.23683675543809718, "grad_norm": 0.11602329462766647, "learning_rate": 0.00017642631953625548, "loss": 1.0136, "step": 1165 }, { "epoch": 0.23704004879040455, "grad_norm": 0.1398748755455017, "learning_rate": 0.0001764059798637242, "loss": 1.0655, "step": 1166 }, { "epoch": 0.23724334214271192, "grad_norm": 0.1302013248205185, "learning_rate": 0.00017638564019119293, "loss": 1.1052, "step": 1167 }, { "epoch": 0.23744663549501932, "grad_norm": 0.11932185292243958, "learning_rate": 0.00017636530051866166, "loss": 1.1945, "step": 1168 }, { "epoch": 0.2376499288473267, "grad_norm": 0.11323782801628113, "learning_rate": 0.00017634496084613038, "loss": 1.0412, "step": 1169 }, { "epoch": 0.23785322219963406, "grad_norm": 0.1345479041337967, "learning_rate": 0.0001763246211735991, "loss": 1.082, "step": 1170 }, { "epoch": 0.23805651555194146, "grad_norm": 0.12548640370368958, "learning_rate": 0.00017630428150106783, "loss": 1.1213, "step": 1171 }, { "epoch": 0.23825980890424883, "grad_norm": 0.12849657237529755, "learning_rate": 0.00017628394182853655, "loss": 1.13, "step": 1172 }, { "epoch": 0.2384631022565562, "grad_norm": 0.11670655757188797, "learning_rate": 0.0001762636021560053, "loss": 0.8984, "step": 1173 }, { "epoch": 0.2386663956088636, "grad_norm": 0.11539500951766968, "learning_rate": 0.00017624326248347403, "loss": 1.1453, "step": 1174 }, { "epoch": 0.23886968896117097, "grad_norm": 0.13686025142669678, "learning_rate": 0.00017622292281094275, "loss": 1.2811, "step": 1175 }, { "epoch": 0.23907298231347834, "grad_norm": 0.13845805823802948, "learning_rate": 0.00017620258313841148, "loss": 1.2939, "step": 1176 }, { "epoch": 0.23927627566578574, "grad_norm": 0.12209935486316681, "learning_rate": 0.0001761822434658802, "loss": 0.8311, "step": 1177 }, { "epoch": 0.2394795690180931, "grad_norm": 0.11880161613225937, "learning_rate": 0.00017616190379334893, "loss": 1.2844, "step": 1178 }, { "epoch": 0.23968286237040048, "grad_norm": 0.1326730102300644, "learning_rate": 0.00017614156412081765, "loss": 1.1336, "step": 1179 }, { "epoch": 0.23988615572270786, "grad_norm": 0.11547461152076721, "learning_rate": 0.00017612122444828638, "loss": 0.9757, "step": 1180 }, { "epoch": 0.24008944907501525, "grad_norm": 0.1296636462211609, "learning_rate": 0.00017610088477575513, "loss": 1.084, "step": 1181 }, { "epoch": 0.24029274242732263, "grad_norm": 0.12076129764318466, "learning_rate": 0.00017608054510322385, "loss": 1.1151, "step": 1182 }, { "epoch": 0.24049603577963, "grad_norm": 0.12159736454486847, "learning_rate": 0.00017606020543069258, "loss": 1.0461, "step": 1183 }, { "epoch": 0.2406993291319374, "grad_norm": 0.13127025961875916, "learning_rate": 0.0001760398657581613, "loss": 1.0541, "step": 1184 }, { "epoch": 0.24090262248424477, "grad_norm": 0.14702552556991577, "learning_rate": 0.00017601952608563003, "loss": 1.2109, "step": 1185 }, { "epoch": 0.24110591583655214, "grad_norm": 0.11683522909879684, "learning_rate": 0.00017599918641309875, "loss": 1.1588, "step": 1186 }, { "epoch": 0.24130920918885954, "grad_norm": 0.1130138412117958, "learning_rate": 0.00017597884674056747, "loss": 1.0268, "step": 1187 }, { "epoch": 0.2415125025411669, "grad_norm": 0.10920488089323044, "learning_rate": 0.0001759585070680362, "loss": 0.9038, "step": 1188 }, { "epoch": 0.24171579589347428, "grad_norm": 0.12897898256778717, "learning_rate": 0.00017593816739550495, "loss": 1.3032, "step": 1189 }, { "epoch": 0.24191908924578168, "grad_norm": 0.1289346069097519, "learning_rate": 0.00017591782772297367, "loss": 1.1169, "step": 1190 }, { "epoch": 0.24212238259808905, "grad_norm": 0.12478041648864746, "learning_rate": 0.0001758974880504424, "loss": 1.1723, "step": 1191 }, { "epoch": 0.24232567595039642, "grad_norm": 0.13389204442501068, "learning_rate": 0.00017587714837791112, "loss": 1.1079, "step": 1192 }, { "epoch": 0.2425289693027038, "grad_norm": 0.12659893929958344, "learning_rate": 0.00017585680870537985, "loss": 1.078, "step": 1193 }, { "epoch": 0.2427322626550112, "grad_norm": 0.13224546611309052, "learning_rate": 0.00017583646903284857, "loss": 1.0784, "step": 1194 }, { "epoch": 0.24293555600731856, "grad_norm": 0.13924521207809448, "learning_rate": 0.0001758161293603173, "loss": 1.1491, "step": 1195 }, { "epoch": 0.24313884935962593, "grad_norm": 0.10379677265882492, "learning_rate": 0.00017579578968778602, "loss": 0.8733, "step": 1196 }, { "epoch": 0.24334214271193333, "grad_norm": 0.17008356750011444, "learning_rate": 0.00017577545001525477, "loss": 1.1597, "step": 1197 }, { "epoch": 0.2435454360642407, "grad_norm": 0.11082588881254196, "learning_rate": 0.0001757551103427235, "loss": 0.966, "step": 1198 }, { "epoch": 0.24374872941654807, "grad_norm": 0.12224634736776352, "learning_rate": 0.00017573477067019222, "loss": 0.9494, "step": 1199 }, { "epoch": 0.24395202276885547, "grad_norm": 0.12597164511680603, "learning_rate": 0.00017571443099766095, "loss": 0.9376, "step": 1200 }, { "epoch": 0.24415531612116284, "grad_norm": 0.12282256036996841, "learning_rate": 0.00017569409132512967, "loss": 1.1645, "step": 1201 }, { "epoch": 0.2443586094734702, "grad_norm": 0.10933969169855118, "learning_rate": 0.0001756737516525984, "loss": 1.0338, "step": 1202 }, { "epoch": 0.2445619028257776, "grad_norm": 0.12132111936807632, "learning_rate": 0.00017565341198006712, "loss": 1.1541, "step": 1203 }, { "epoch": 0.24476519617808498, "grad_norm": 0.12675434350967407, "learning_rate": 0.00017563307230753584, "loss": 1.0506, "step": 1204 }, { "epoch": 0.24496848953039235, "grad_norm": 0.14764836430549622, "learning_rate": 0.0001756127326350046, "loss": 1.0209, "step": 1205 }, { "epoch": 0.24517178288269972, "grad_norm": 0.11838477104902267, "learning_rate": 0.00017559239296247332, "loss": 0.9789, "step": 1206 }, { "epoch": 0.24537507623500712, "grad_norm": 0.11526069790124893, "learning_rate": 0.00017557205328994204, "loss": 1.0871, "step": 1207 }, { "epoch": 0.2455783695873145, "grad_norm": 0.12997418642044067, "learning_rate": 0.00017555171361741077, "loss": 1.215, "step": 1208 }, { "epoch": 0.24578166293962186, "grad_norm": 0.1175120398402214, "learning_rate": 0.0001755313739448795, "loss": 0.8355, "step": 1209 }, { "epoch": 0.24598495629192926, "grad_norm": 0.11287759989500046, "learning_rate": 0.00017551103427234822, "loss": 0.9071, "step": 1210 }, { "epoch": 0.24618824964423663, "grad_norm": 0.11898453533649445, "learning_rate": 0.00017549069459981694, "loss": 1.0175, "step": 1211 }, { "epoch": 0.246391542996544, "grad_norm": 0.13262607157230377, "learning_rate": 0.00017547035492728567, "loss": 1.1498, "step": 1212 }, { "epoch": 0.2465948363488514, "grad_norm": 0.12178485840559006, "learning_rate": 0.0001754500152547544, "loss": 1.0405, "step": 1213 }, { "epoch": 0.24679812970115877, "grad_norm": 0.13001886010169983, "learning_rate": 0.00017542967558222314, "loss": 1.0465, "step": 1214 }, { "epoch": 0.24700142305346615, "grad_norm": 0.12525972723960876, "learning_rate": 0.00017540933590969187, "loss": 1.1144, "step": 1215 }, { "epoch": 0.24720471640577354, "grad_norm": 0.11287079751491547, "learning_rate": 0.0001753889962371606, "loss": 0.9362, "step": 1216 }, { "epoch": 0.24740800975808092, "grad_norm": 0.13626334071159363, "learning_rate": 0.00017536865656462932, "loss": 1.2352, "step": 1217 }, { "epoch": 0.2476113031103883, "grad_norm": 0.12724994122982025, "learning_rate": 0.00017534831689209804, "loss": 1.0396, "step": 1218 }, { "epoch": 0.24781459646269566, "grad_norm": 0.11603401601314545, "learning_rate": 0.00017532797721956677, "loss": 0.9778, "step": 1219 }, { "epoch": 0.24801788981500306, "grad_norm": 0.12654529511928558, "learning_rate": 0.0001753076375470355, "loss": 1.0312, "step": 1220 }, { "epoch": 0.24822118316731043, "grad_norm": 0.13385628163814545, "learning_rate": 0.00017528729787450421, "loss": 1.1297, "step": 1221 }, { "epoch": 0.2484244765196178, "grad_norm": 0.12190620601177216, "learning_rate": 0.00017526695820197297, "loss": 1.1187, "step": 1222 }, { "epoch": 0.2486277698719252, "grad_norm": 0.11775553971529007, "learning_rate": 0.0001752466185294417, "loss": 1.0193, "step": 1223 }, { "epoch": 0.24883106322423257, "grad_norm": 0.10721298307180405, "learning_rate": 0.00017522627885691041, "loss": 0.9781, "step": 1224 }, { "epoch": 0.24903435657653994, "grad_norm": 0.11292947083711624, "learning_rate": 0.00017520593918437914, "loss": 1.122, "step": 1225 }, { "epoch": 0.24923764992884734, "grad_norm": 0.11116209626197815, "learning_rate": 0.00017518559951184786, "loss": 0.9238, "step": 1226 }, { "epoch": 0.2494409432811547, "grad_norm": 0.12392593175172806, "learning_rate": 0.0001751652598393166, "loss": 1.1305, "step": 1227 }, { "epoch": 0.24964423663346208, "grad_norm": 0.124233178794384, "learning_rate": 0.0001751449201667853, "loss": 1.0218, "step": 1228 }, { "epoch": 0.24984752998576948, "grad_norm": 0.1181500032544136, "learning_rate": 0.00017512458049425404, "loss": 0.9349, "step": 1229 }, { "epoch": 0.2500508233380768, "grad_norm": 0.13005246222019196, "learning_rate": 0.0001751042408217228, "loss": 1.1636, "step": 1230 }, { "epoch": 0.2502541166903842, "grad_norm": 0.12866559624671936, "learning_rate": 0.0001750839011491915, "loss": 1.1384, "step": 1231 }, { "epoch": 0.2504574100426916, "grad_norm": 0.11397498100996017, "learning_rate": 0.00017506356147666024, "loss": 1.0519, "step": 1232 }, { "epoch": 0.25066070339499896, "grad_norm": 0.11991407722234726, "learning_rate": 0.00017504322180412896, "loss": 1.025, "step": 1233 }, { "epoch": 0.25086399674730636, "grad_norm": 0.11384415626525879, "learning_rate": 0.0001750228821315977, "loss": 0.9845, "step": 1234 }, { "epoch": 0.25106729009961376, "grad_norm": 0.12114489823579788, "learning_rate": 0.0001750025424590664, "loss": 0.9427, "step": 1235 }, { "epoch": 0.2512705834519211, "grad_norm": 0.12967409193515778, "learning_rate": 0.00017498220278653514, "loss": 1.0714, "step": 1236 }, { "epoch": 0.2514738768042285, "grad_norm": 0.13375937938690186, "learning_rate": 0.00017496186311400386, "loss": 1.0678, "step": 1237 }, { "epoch": 0.2516771701565359, "grad_norm": 0.12456507235765457, "learning_rate": 0.0001749415234414726, "loss": 1.0189, "step": 1238 }, { "epoch": 0.25188046350884324, "grad_norm": 0.1372321993112564, "learning_rate": 0.00017492118376894134, "loss": 1.2524, "step": 1239 }, { "epoch": 0.25208375686115064, "grad_norm": 0.11218629777431488, "learning_rate": 0.00017490084409641006, "loss": 1.0237, "step": 1240 }, { "epoch": 0.25228705021345804, "grad_norm": 0.12430521845817566, "learning_rate": 0.00017488050442387878, "loss": 0.9717, "step": 1241 }, { "epoch": 0.2524903435657654, "grad_norm": 0.12222771346569061, "learning_rate": 0.0001748601647513475, "loss": 1.087, "step": 1242 }, { "epoch": 0.2526936369180728, "grad_norm": 0.12341856956481934, "learning_rate": 0.00017483982507881623, "loss": 1.0789, "step": 1243 }, { "epoch": 0.2528969302703802, "grad_norm": 0.13263435661792755, "learning_rate": 0.00017481948540628496, "loss": 1.1757, "step": 1244 }, { "epoch": 0.2531002236226875, "grad_norm": 0.12904416024684906, "learning_rate": 0.00017479914573375368, "loss": 1.0812, "step": 1245 }, { "epoch": 0.2533035169749949, "grad_norm": 0.12575136125087738, "learning_rate": 0.00017477880606122243, "loss": 1.0435, "step": 1246 }, { "epoch": 0.2535068103273023, "grad_norm": 0.11990928649902344, "learning_rate": 0.00017475846638869116, "loss": 0.9689, "step": 1247 }, { "epoch": 0.25371010367960967, "grad_norm": 0.12164648622274399, "learning_rate": 0.00017473812671615988, "loss": 0.9309, "step": 1248 }, { "epoch": 0.25391339703191707, "grad_norm": 0.12410687655210495, "learning_rate": 0.0001747177870436286, "loss": 1.0254, "step": 1249 }, { "epoch": 0.2541166903842244, "grad_norm": 0.13339757919311523, "learning_rate": 0.00017469744737109733, "loss": 1.0455, "step": 1250 }, { "epoch": 0.2543199837365318, "grad_norm": 0.14127810299396515, "learning_rate": 0.00017467710769856606, "loss": 1.1633, "step": 1251 }, { "epoch": 0.2545232770888392, "grad_norm": 0.10454534739255905, "learning_rate": 0.00017465676802603478, "loss": 0.7574, "step": 1252 }, { "epoch": 0.25472657044114655, "grad_norm": 0.14605766534805298, "learning_rate": 0.0001746364283535035, "loss": 1.1348, "step": 1253 }, { "epoch": 0.25492986379345395, "grad_norm": 0.11716707050800323, "learning_rate": 0.00017461608868097223, "loss": 1.0645, "step": 1254 }, { "epoch": 0.25513315714576135, "grad_norm": 0.13623961806297302, "learning_rate": 0.00017459574900844098, "loss": 1.1175, "step": 1255 }, { "epoch": 0.2553364504980687, "grad_norm": 0.11011240631341934, "learning_rate": 0.0001745754093359097, "loss": 1.0454, "step": 1256 }, { "epoch": 0.2555397438503761, "grad_norm": 0.13665513694286346, "learning_rate": 0.00017455506966337843, "loss": 1.1112, "step": 1257 }, { "epoch": 0.2557430372026835, "grad_norm": 0.11241257190704346, "learning_rate": 0.00017453472999084715, "loss": 1.0808, "step": 1258 }, { "epoch": 0.25594633055499083, "grad_norm": 0.1247948557138443, "learning_rate": 0.00017451439031831588, "loss": 0.9803, "step": 1259 }, { "epoch": 0.25614962390729823, "grad_norm": 0.14268344640731812, "learning_rate": 0.0001744940506457846, "loss": 1.1095, "step": 1260 }, { "epoch": 0.25635291725960563, "grad_norm": 0.11472602188587189, "learning_rate": 0.00017447371097325333, "loss": 0.9911, "step": 1261 }, { "epoch": 0.25655621061191297, "grad_norm": 0.14191444218158722, "learning_rate": 0.00017445337130072205, "loss": 1.0679, "step": 1262 }, { "epoch": 0.25675950396422037, "grad_norm": 0.12657268345355988, "learning_rate": 0.0001744330316281908, "loss": 0.9211, "step": 1263 }, { "epoch": 0.25696279731652777, "grad_norm": 0.1397320032119751, "learning_rate": 0.00017441269195565953, "loss": 1.1604, "step": 1264 }, { "epoch": 0.2571660906688351, "grad_norm": 0.12176384776830673, "learning_rate": 0.00017439235228312825, "loss": 1.0919, "step": 1265 }, { "epoch": 0.2573693840211425, "grad_norm": 0.13282664120197296, "learning_rate": 0.00017437201261059698, "loss": 1.259, "step": 1266 }, { "epoch": 0.2575726773734499, "grad_norm": 0.14279745519161224, "learning_rate": 0.0001743516729380657, "loss": 1.3582, "step": 1267 }, { "epoch": 0.25777597072575725, "grad_norm": 0.11482515186071396, "learning_rate": 0.00017433133326553443, "loss": 0.9338, "step": 1268 }, { "epoch": 0.25797926407806465, "grad_norm": 0.12177598476409912, "learning_rate": 0.00017431099359300315, "loss": 1.0996, "step": 1269 }, { "epoch": 0.25818255743037205, "grad_norm": 0.12271133065223694, "learning_rate": 0.00017429065392047188, "loss": 1.2357, "step": 1270 }, { "epoch": 0.2583858507826794, "grad_norm": 0.11448093503713608, "learning_rate": 0.00017427031424794063, "loss": 1.0057, "step": 1271 }, { "epoch": 0.2585891441349868, "grad_norm": 0.11486377567052841, "learning_rate": 0.00017424997457540935, "loss": 1.0429, "step": 1272 }, { "epoch": 0.2587924374872942, "grad_norm": 0.12816710770130157, "learning_rate": 0.00017422963490287808, "loss": 1.0849, "step": 1273 }, { "epoch": 0.25899573083960153, "grad_norm": 0.13030269742012024, "learning_rate": 0.0001742092952303468, "loss": 0.9698, "step": 1274 }, { "epoch": 0.25919902419190893, "grad_norm": 0.12305210530757904, "learning_rate": 0.00017418895555781553, "loss": 1.0847, "step": 1275 }, { "epoch": 0.2594023175442163, "grad_norm": 0.11980848014354706, "learning_rate": 0.00017416861588528425, "loss": 1.1857, "step": 1276 }, { "epoch": 0.2596056108965237, "grad_norm": 0.1268121749162674, "learning_rate": 0.00017414827621275297, "loss": 1.057, "step": 1277 }, { "epoch": 0.2598089042488311, "grad_norm": 0.119362972676754, "learning_rate": 0.0001741279365402217, "loss": 0.9978, "step": 1278 }, { "epoch": 0.2600121976011384, "grad_norm": 0.11040918529033661, "learning_rate": 0.00017410759686769045, "loss": 0.8646, "step": 1279 }, { "epoch": 0.2602154909534458, "grad_norm": 0.1263931393623352, "learning_rate": 0.00017408725719515917, "loss": 0.9089, "step": 1280 }, { "epoch": 0.2604187843057532, "grad_norm": 0.1311492770910263, "learning_rate": 0.0001740669175226279, "loss": 1.2096, "step": 1281 }, { "epoch": 0.26062207765806056, "grad_norm": 0.15105241537094116, "learning_rate": 0.00017404657785009662, "loss": 1.3313, "step": 1282 }, { "epoch": 0.26082537101036796, "grad_norm": 0.14878205955028534, "learning_rate": 0.00017402623817756535, "loss": 1.1671, "step": 1283 }, { "epoch": 0.26102866436267536, "grad_norm": 0.12554128468036652, "learning_rate": 0.00017400589850503407, "loss": 0.9834, "step": 1284 }, { "epoch": 0.2612319577149827, "grad_norm": 0.12347958981990814, "learning_rate": 0.0001739855588325028, "loss": 1.1064, "step": 1285 }, { "epoch": 0.2614352510672901, "grad_norm": 0.13344120979309082, "learning_rate": 0.00017396521915997152, "loss": 1.1599, "step": 1286 }, { "epoch": 0.2616385444195975, "grad_norm": 0.11492400616407394, "learning_rate": 0.00017394487948744027, "loss": 1.0664, "step": 1287 }, { "epoch": 0.26184183777190484, "grad_norm": 0.1381841003894806, "learning_rate": 0.000173924539814909, "loss": 1.2612, "step": 1288 }, { "epoch": 0.26204513112421224, "grad_norm": 0.1126202642917633, "learning_rate": 0.00017390420014237772, "loss": 1.0606, "step": 1289 }, { "epoch": 0.26224842447651964, "grad_norm": 0.12391757220029831, "learning_rate": 0.00017388386046984645, "loss": 1.1284, "step": 1290 }, { "epoch": 0.262451717828827, "grad_norm": 0.14284935593605042, "learning_rate": 0.00017386352079731517, "loss": 1.2237, "step": 1291 }, { "epoch": 0.2626550111811344, "grad_norm": 0.11940843611955643, "learning_rate": 0.0001738431811247839, "loss": 1.0164, "step": 1292 }, { "epoch": 0.2628583045334418, "grad_norm": 0.11453817039728165, "learning_rate": 0.00017382284145225262, "loss": 0.919, "step": 1293 }, { "epoch": 0.2630615978857491, "grad_norm": 0.11902697384357452, "learning_rate": 0.00017380250177972134, "loss": 1.0669, "step": 1294 }, { "epoch": 0.2632648912380565, "grad_norm": 0.12861910462379456, "learning_rate": 0.00017378216210719007, "loss": 1.051, "step": 1295 }, { "epoch": 0.2634681845903639, "grad_norm": 0.13415683805942535, "learning_rate": 0.00017376182243465882, "loss": 1.2085, "step": 1296 }, { "epoch": 0.26367147794267126, "grad_norm": 0.11324958503246307, "learning_rate": 0.00017374148276212754, "loss": 1.0347, "step": 1297 }, { "epoch": 0.26387477129497866, "grad_norm": 0.11437279731035233, "learning_rate": 0.00017372114308959627, "loss": 1.0386, "step": 1298 }, { "epoch": 0.26407806464728606, "grad_norm": 0.1309337615966797, "learning_rate": 0.000173700803417065, "loss": 1.0251, "step": 1299 }, { "epoch": 0.2642813579995934, "grad_norm": 0.12801750004291534, "learning_rate": 0.00017368046374453372, "loss": 1.0661, "step": 1300 }, { "epoch": 0.2644846513519008, "grad_norm": 0.12607401609420776, "learning_rate": 0.00017366012407200244, "loss": 1.1156, "step": 1301 }, { "epoch": 0.2646879447042082, "grad_norm": 0.1417655348777771, "learning_rate": 0.00017363978439947117, "loss": 1.3765, "step": 1302 }, { "epoch": 0.26489123805651554, "grad_norm": 0.12621742486953735, "learning_rate": 0.0001736194447269399, "loss": 0.9138, "step": 1303 }, { "epoch": 0.26509453140882294, "grad_norm": 0.12521621584892273, "learning_rate": 0.00017359910505440864, "loss": 1.1882, "step": 1304 }, { "epoch": 0.2652978247611303, "grad_norm": 0.11669400334358215, "learning_rate": 0.00017357876538187737, "loss": 1.0113, "step": 1305 }, { "epoch": 0.2655011181134377, "grad_norm": 0.12276088446378708, "learning_rate": 0.0001735584257093461, "loss": 1.0964, "step": 1306 }, { "epoch": 0.2657044114657451, "grad_norm": 0.11636564135551453, "learning_rate": 0.00017353808603681482, "loss": 1.0141, "step": 1307 }, { "epoch": 0.2659077048180524, "grad_norm": 0.10083210468292236, "learning_rate": 0.00017351774636428354, "loss": 1.0403, "step": 1308 }, { "epoch": 0.2661109981703598, "grad_norm": 0.12461689859628677, "learning_rate": 0.00017349740669175227, "loss": 1.0841, "step": 1309 }, { "epoch": 0.2663142915226672, "grad_norm": 0.12346909195184708, "learning_rate": 0.000173477067019221, "loss": 1.0268, "step": 1310 }, { "epoch": 0.26651758487497457, "grad_norm": 0.11846248060464859, "learning_rate": 0.00017345672734668971, "loss": 1.028, "step": 1311 }, { "epoch": 0.26672087822728197, "grad_norm": 0.1329965591430664, "learning_rate": 0.00017343638767415847, "loss": 1.0805, "step": 1312 }, { "epoch": 0.26692417157958936, "grad_norm": 0.12369682639837265, "learning_rate": 0.0001734160480016272, "loss": 1.0047, "step": 1313 }, { "epoch": 0.2671274649318967, "grad_norm": 0.12594352662563324, "learning_rate": 0.00017339570832909591, "loss": 1.209, "step": 1314 }, { "epoch": 0.2673307582842041, "grad_norm": 0.1423029899597168, "learning_rate": 0.00017337536865656464, "loss": 1.0829, "step": 1315 }, { "epoch": 0.2675340516365115, "grad_norm": 0.11651685833930969, "learning_rate": 0.00017335502898403336, "loss": 1.0249, "step": 1316 }, { "epoch": 0.26773734498881885, "grad_norm": 0.10999172925949097, "learning_rate": 0.0001733346893115021, "loss": 0.8872, "step": 1317 }, { "epoch": 0.26794063834112625, "grad_norm": 0.125168576836586, "learning_rate": 0.0001733143496389708, "loss": 1.0853, "step": 1318 }, { "epoch": 0.26814393169343365, "grad_norm": 0.1307574361562729, "learning_rate": 0.00017329400996643954, "loss": 0.9643, "step": 1319 }, { "epoch": 0.268347225045741, "grad_norm": 0.136819988489151, "learning_rate": 0.0001732736702939083, "loss": 1.0952, "step": 1320 }, { "epoch": 0.2685505183980484, "grad_norm": 0.12915043532848358, "learning_rate": 0.000173253330621377, "loss": 0.9278, "step": 1321 }, { "epoch": 0.2687538117503558, "grad_norm": 0.12452216446399689, "learning_rate": 0.00017323299094884574, "loss": 1.0679, "step": 1322 }, { "epoch": 0.26895710510266313, "grad_norm": 0.1167951300740242, "learning_rate": 0.00017321265127631446, "loss": 1.009, "step": 1323 }, { "epoch": 0.26916039845497053, "grad_norm": 0.12355060130357742, "learning_rate": 0.00017319231160378319, "loss": 1.1398, "step": 1324 }, { "epoch": 0.2693636918072779, "grad_norm": 0.14160853624343872, "learning_rate": 0.0001731719719312519, "loss": 1.1444, "step": 1325 }, { "epoch": 0.26956698515958527, "grad_norm": 0.12388666719198227, "learning_rate": 0.00017315163225872064, "loss": 1.1242, "step": 1326 }, { "epoch": 0.26977027851189267, "grad_norm": 0.11084824055433273, "learning_rate": 0.00017313129258618936, "loss": 0.9006, "step": 1327 }, { "epoch": 0.26997357186420007, "grad_norm": 0.11720530688762665, "learning_rate": 0.0001731109529136581, "loss": 0.9474, "step": 1328 }, { "epoch": 0.2701768652165074, "grad_norm": 0.13025008141994476, "learning_rate": 0.00017309061324112684, "loss": 1.0815, "step": 1329 }, { "epoch": 0.2703801585688148, "grad_norm": 0.14168627560138702, "learning_rate": 0.00017307027356859556, "loss": 1.0938, "step": 1330 }, { "epoch": 0.27058345192112215, "grad_norm": 0.14329680800437927, "learning_rate": 0.00017304993389606428, "loss": 1.2552, "step": 1331 }, { "epoch": 0.27078674527342955, "grad_norm": 0.12423396855592728, "learning_rate": 0.000173029594223533, "loss": 0.8778, "step": 1332 }, { "epoch": 0.27099003862573695, "grad_norm": 0.13177728652954102, "learning_rate": 0.00017300925455100173, "loss": 1.2632, "step": 1333 }, { "epoch": 0.2711933319780443, "grad_norm": 0.12286023795604706, "learning_rate": 0.00017298891487847046, "loss": 1.078, "step": 1334 }, { "epoch": 0.2713966253303517, "grad_norm": 0.10991277545690536, "learning_rate": 0.00017296857520593918, "loss": 1.0038, "step": 1335 }, { "epoch": 0.2715999186826591, "grad_norm": 0.1368594616651535, "learning_rate": 0.0001729482355334079, "loss": 1.0416, "step": 1336 }, { "epoch": 0.27180321203496643, "grad_norm": 0.11537830531597137, "learning_rate": 0.00017292789586087666, "loss": 1.0933, "step": 1337 }, { "epoch": 0.27200650538727383, "grad_norm": 0.11709605902433395, "learning_rate": 0.00017290755618834538, "loss": 0.958, "step": 1338 }, { "epoch": 0.27220979873958123, "grad_norm": 0.1164301261305809, "learning_rate": 0.0001728872165158141, "loss": 0.8833, "step": 1339 }, { "epoch": 0.2724130920918886, "grad_norm": 0.13498760759830475, "learning_rate": 0.00017286687684328283, "loss": 1.1173, "step": 1340 }, { "epoch": 0.272616385444196, "grad_norm": 0.11391112208366394, "learning_rate": 0.00017284653717075156, "loss": 0.9329, "step": 1341 }, { "epoch": 0.2728196787965034, "grad_norm": 0.12780262529850006, "learning_rate": 0.00017282619749822028, "loss": 1.1273, "step": 1342 }, { "epoch": 0.2730229721488107, "grad_norm": 0.11829452961683273, "learning_rate": 0.000172805857825689, "loss": 0.8299, "step": 1343 }, { "epoch": 0.2732262655011181, "grad_norm": 0.12499269843101501, "learning_rate": 0.00017278551815315773, "loss": 1.1501, "step": 1344 }, { "epoch": 0.2734295588534255, "grad_norm": 0.13114666938781738, "learning_rate": 0.00017276517848062648, "loss": 1.0625, "step": 1345 }, { "epoch": 0.27363285220573286, "grad_norm": 0.1208108589053154, "learning_rate": 0.0001727448388080952, "loss": 0.965, "step": 1346 }, { "epoch": 0.27383614555804026, "grad_norm": 0.12325561046600342, "learning_rate": 0.00017272449913556393, "loss": 1.0976, "step": 1347 }, { "epoch": 0.27403943891034765, "grad_norm": 0.12004940211772919, "learning_rate": 0.00017270415946303265, "loss": 0.9958, "step": 1348 }, { "epoch": 0.274242732262655, "grad_norm": 0.1253954917192459, "learning_rate": 0.00017268381979050138, "loss": 1.1158, "step": 1349 }, { "epoch": 0.2744460256149624, "grad_norm": 0.12844887375831604, "learning_rate": 0.0001726634801179701, "loss": 1.0849, "step": 1350 }, { "epoch": 0.2746493189672698, "grad_norm": 0.1340886950492859, "learning_rate": 0.00017264314044543883, "loss": 1.2566, "step": 1351 }, { "epoch": 0.27485261231957714, "grad_norm": 0.12355068325996399, "learning_rate": 0.00017262280077290755, "loss": 0.9769, "step": 1352 }, { "epoch": 0.27505590567188454, "grad_norm": 0.10396768152713776, "learning_rate": 0.0001726024611003763, "loss": 0.9058, "step": 1353 }, { "epoch": 0.27525919902419194, "grad_norm": 0.1249571368098259, "learning_rate": 0.00017258212142784503, "loss": 1.0982, "step": 1354 }, { "epoch": 0.2754624923764993, "grad_norm": 0.13168682157993317, "learning_rate": 0.00017256178175531375, "loss": 1.077, "step": 1355 }, { "epoch": 0.2756657857288067, "grad_norm": 0.11570144444704056, "learning_rate": 0.00017254144208278248, "loss": 0.9515, "step": 1356 }, { "epoch": 0.275869079081114, "grad_norm": 0.13097792863845825, "learning_rate": 0.0001725211024102512, "loss": 1.1836, "step": 1357 }, { "epoch": 0.2760723724334214, "grad_norm": 0.13371975719928741, "learning_rate": 0.00017250076273771993, "loss": 1.1521, "step": 1358 }, { "epoch": 0.2762756657857288, "grad_norm": 0.11649662256240845, "learning_rate": 0.00017248042306518865, "loss": 0.9173, "step": 1359 }, { "epoch": 0.27647895913803616, "grad_norm": 0.1347874402999878, "learning_rate": 0.00017246008339265738, "loss": 1.2533, "step": 1360 }, { "epoch": 0.27668225249034356, "grad_norm": 0.13108506798744202, "learning_rate": 0.00017243974372012613, "loss": 1.28, "step": 1361 }, { "epoch": 0.27688554584265096, "grad_norm": 0.12440016865730286, "learning_rate": 0.00017241940404759485, "loss": 1.0599, "step": 1362 }, { "epoch": 0.2770888391949583, "grad_norm": 0.14487305283546448, "learning_rate": 0.00017239906437506358, "loss": 0.9908, "step": 1363 }, { "epoch": 0.2772921325472657, "grad_norm": 0.1289856880903244, "learning_rate": 0.0001723787247025323, "loss": 1.0855, "step": 1364 }, { "epoch": 0.2774954258995731, "grad_norm": 0.12901484966278076, "learning_rate": 0.00017235838503000102, "loss": 1.1945, "step": 1365 }, { "epoch": 0.27769871925188044, "grad_norm": 0.12738290429115295, "learning_rate": 0.00017233804535746975, "loss": 1.1233, "step": 1366 }, { "epoch": 0.27790201260418784, "grad_norm": 0.13745670020580292, "learning_rate": 0.00017231770568493847, "loss": 1.174, "step": 1367 }, { "epoch": 0.27810530595649524, "grad_norm": 0.1181466281414032, "learning_rate": 0.0001722973660124072, "loss": 1.0206, "step": 1368 }, { "epoch": 0.2783085993088026, "grad_norm": 0.11488956212997437, "learning_rate": 0.00017227702633987595, "loss": 1.0443, "step": 1369 }, { "epoch": 0.27851189266111, "grad_norm": 0.1327381134033203, "learning_rate": 0.00017225668666734467, "loss": 1.1791, "step": 1370 }, { "epoch": 0.2787151860134174, "grad_norm": 0.13029593229293823, "learning_rate": 0.0001722363469948134, "loss": 1.1022, "step": 1371 }, { "epoch": 0.2789184793657247, "grad_norm": 0.10697850584983826, "learning_rate": 0.00017221600732228212, "loss": 0.9923, "step": 1372 }, { "epoch": 0.2791217727180321, "grad_norm": 0.11224257200956345, "learning_rate": 0.00017219566764975085, "loss": 0.9701, "step": 1373 }, { "epoch": 0.2793250660703395, "grad_norm": 0.11932025849819183, "learning_rate": 0.00017217532797721957, "loss": 1.0136, "step": 1374 }, { "epoch": 0.27952835942264687, "grad_norm": 0.11104830354452133, "learning_rate": 0.0001721549883046883, "loss": 1.1131, "step": 1375 }, { "epoch": 0.27973165277495426, "grad_norm": 0.136908620595932, "learning_rate": 0.00017213464863215702, "loss": 1.2888, "step": 1376 }, { "epoch": 0.27993494612726166, "grad_norm": 0.13100826740264893, "learning_rate": 0.00017211430895962575, "loss": 1.2221, "step": 1377 }, { "epoch": 0.280138239479569, "grad_norm": 0.1406666785478592, "learning_rate": 0.0001720939692870945, "loss": 1.2672, "step": 1378 }, { "epoch": 0.2803415328318764, "grad_norm": 0.10946685820817947, "learning_rate": 0.00017207362961456322, "loss": 0.8652, "step": 1379 }, { "epoch": 0.2805448261841838, "grad_norm": 0.11411663144826889, "learning_rate": 0.00017205328994203195, "loss": 0.8172, "step": 1380 }, { "epoch": 0.28074811953649115, "grad_norm": 0.132404625415802, "learning_rate": 0.00017203295026950067, "loss": 1.12, "step": 1381 }, { "epoch": 0.28095141288879855, "grad_norm": 0.12594282627105713, "learning_rate": 0.0001720126105969694, "loss": 1.2019, "step": 1382 }, { "epoch": 0.2811547062411059, "grad_norm": 0.14421536028385162, "learning_rate": 0.00017199227092443812, "loss": 1.301, "step": 1383 }, { "epoch": 0.2813579995934133, "grad_norm": 0.118538998067379, "learning_rate": 0.00017197193125190684, "loss": 1.0952, "step": 1384 }, { "epoch": 0.2815612929457207, "grad_norm": 0.1211504191160202, "learning_rate": 0.00017195159157937557, "loss": 1.0272, "step": 1385 }, { "epoch": 0.28176458629802803, "grad_norm": 0.13460633158683777, "learning_rate": 0.00017193125190684432, "loss": 1.1372, "step": 1386 }, { "epoch": 0.28196787965033543, "grad_norm": 0.11669941991567612, "learning_rate": 0.00017191091223431304, "loss": 1.0313, "step": 1387 }, { "epoch": 0.2821711730026428, "grad_norm": 0.1414983719587326, "learning_rate": 0.00017189057256178177, "loss": 1.3215, "step": 1388 }, { "epoch": 0.28237446635495017, "grad_norm": 0.11535824090242386, "learning_rate": 0.0001718702328892505, "loss": 1.0569, "step": 1389 }, { "epoch": 0.28257775970725757, "grad_norm": 0.11279894411563873, "learning_rate": 0.00017184989321671922, "loss": 0.9706, "step": 1390 }, { "epoch": 0.28278105305956497, "grad_norm": 0.12699778378009796, "learning_rate": 0.00017182955354418794, "loss": 1.0541, "step": 1391 }, { "epoch": 0.2829843464118723, "grad_norm": 0.13677164912223816, "learning_rate": 0.00017180921387165667, "loss": 1.0118, "step": 1392 }, { "epoch": 0.2831876397641797, "grad_norm": 0.1261303573846817, "learning_rate": 0.0001717888741991254, "loss": 1.0019, "step": 1393 }, { "epoch": 0.2833909331164871, "grad_norm": 0.15269511938095093, "learning_rate": 0.00017176853452659414, "loss": 1.1414, "step": 1394 }, { "epoch": 0.28359422646879445, "grad_norm": 0.11726024746894836, "learning_rate": 0.00017174819485406287, "loss": 1.071, "step": 1395 }, { "epoch": 0.28379751982110185, "grad_norm": 0.10793468356132507, "learning_rate": 0.0001717278551815316, "loss": 1.0911, "step": 1396 }, { "epoch": 0.28400081317340925, "grad_norm": 0.13417348265647888, "learning_rate": 0.00017170751550900032, "loss": 1.182, "step": 1397 }, { "epoch": 0.2842041065257166, "grad_norm": 0.1220618337392807, "learning_rate": 0.00017168717583646904, "loss": 1.1165, "step": 1398 }, { "epoch": 0.284407399878024, "grad_norm": 0.1326867640018463, "learning_rate": 0.00017166683616393776, "loss": 1.0426, "step": 1399 }, { "epoch": 0.2846106932303314, "grad_norm": 0.12562425434589386, "learning_rate": 0.0001716464964914065, "loss": 1.1779, "step": 1400 }, { "epoch": 0.28481398658263873, "grad_norm": 0.13102425634860992, "learning_rate": 0.00017162615681887521, "loss": 1.1402, "step": 1401 }, { "epoch": 0.28501727993494613, "grad_norm": 0.12704792618751526, "learning_rate": 0.00017160581714634397, "loss": 1.18, "step": 1402 }, { "epoch": 0.28522057328725353, "grad_norm": 0.12526075541973114, "learning_rate": 0.0001715854774738127, "loss": 1.109, "step": 1403 }, { "epoch": 0.2854238666395609, "grad_norm": 0.12174190580844879, "learning_rate": 0.00017156513780128141, "loss": 1.0839, "step": 1404 }, { "epoch": 0.2856271599918683, "grad_norm": 0.13030166923999786, "learning_rate": 0.00017154479812875014, "loss": 1.0982, "step": 1405 }, { "epoch": 0.2858304533441757, "grad_norm": 0.12179411202669144, "learning_rate": 0.00017152445845621886, "loss": 1.0617, "step": 1406 }, { "epoch": 0.286033746696483, "grad_norm": 0.12964552640914917, "learning_rate": 0.0001715041187836876, "loss": 1.117, "step": 1407 }, { "epoch": 0.2862370400487904, "grad_norm": 0.12146733701229095, "learning_rate": 0.0001714837791111563, "loss": 1.1715, "step": 1408 }, { "epoch": 0.28644033340109776, "grad_norm": 0.12994210422039032, "learning_rate": 0.00017146343943862504, "loss": 1.1975, "step": 1409 }, { "epoch": 0.28664362675340516, "grad_norm": 0.12996168434619904, "learning_rate": 0.0001714430997660938, "loss": 1.1968, "step": 1410 }, { "epoch": 0.28684692010571256, "grad_norm": 0.13590598106384277, "learning_rate": 0.0001714227600935625, "loss": 1.1045, "step": 1411 }, { "epoch": 0.2870502134580199, "grad_norm": 0.12337225675582886, "learning_rate": 0.00017140242042103124, "loss": 1.1262, "step": 1412 }, { "epoch": 0.2872535068103273, "grad_norm": 0.11442485451698303, "learning_rate": 0.00017138208074849996, "loss": 1.0697, "step": 1413 }, { "epoch": 0.2874568001626347, "grad_norm": 0.1333555281162262, "learning_rate": 0.00017136174107596869, "loss": 0.9691, "step": 1414 }, { "epoch": 0.28766009351494204, "grad_norm": 0.13435356318950653, "learning_rate": 0.0001713414014034374, "loss": 1.071, "step": 1415 }, { "epoch": 0.28786338686724944, "grad_norm": 0.11869612336158752, "learning_rate": 0.00017132106173090613, "loss": 1.2081, "step": 1416 }, { "epoch": 0.28806668021955684, "grad_norm": 0.13402745127677917, "learning_rate": 0.00017130072205837486, "loss": 1.1887, "step": 1417 }, { "epoch": 0.2882699735718642, "grad_norm": 0.1282026469707489, "learning_rate": 0.00017128038238584358, "loss": 1.1802, "step": 1418 }, { "epoch": 0.2884732669241716, "grad_norm": 0.12006261944770813, "learning_rate": 0.00017126004271331234, "loss": 1.0366, "step": 1419 }, { "epoch": 0.288676560276479, "grad_norm": 0.10971211642026901, "learning_rate": 0.00017123970304078106, "loss": 1.0502, "step": 1420 }, { "epoch": 0.2888798536287863, "grad_norm": 0.12401802092790604, "learning_rate": 0.00017121936336824978, "loss": 0.9525, "step": 1421 }, { "epoch": 0.2890831469810937, "grad_norm": 0.12699580192565918, "learning_rate": 0.0001711990236957185, "loss": 1.0588, "step": 1422 }, { "epoch": 0.2892864403334011, "grad_norm": 0.10931636393070221, "learning_rate": 0.00017117868402318723, "loss": 0.9512, "step": 1423 }, { "epoch": 0.28948973368570846, "grad_norm": 0.13325555622577667, "learning_rate": 0.00017115834435065596, "loss": 1.2333, "step": 1424 }, { "epoch": 0.28969302703801586, "grad_norm": 0.1266210675239563, "learning_rate": 0.00017113800467812468, "loss": 1.018, "step": 1425 }, { "epoch": 0.28989632039032326, "grad_norm": 0.12187005579471588, "learning_rate": 0.0001711176650055934, "loss": 1.0865, "step": 1426 }, { "epoch": 0.2900996137426306, "grad_norm": 0.10819690674543381, "learning_rate": 0.00017109732533306216, "loss": 0.8355, "step": 1427 }, { "epoch": 0.290302907094938, "grad_norm": 0.11890331655740738, "learning_rate": 0.00017107698566053088, "loss": 1.0521, "step": 1428 }, { "epoch": 0.2905062004472454, "grad_norm": 0.12693597376346588, "learning_rate": 0.0001710566459879996, "loss": 1.1304, "step": 1429 }, { "epoch": 0.29070949379955274, "grad_norm": 0.12627696990966797, "learning_rate": 0.00017103630631546833, "loss": 1.0533, "step": 1430 }, { "epoch": 0.29091278715186014, "grad_norm": 0.13593046367168427, "learning_rate": 0.00017101596664293706, "loss": 1.2738, "step": 1431 }, { "epoch": 0.29111608050416754, "grad_norm": 0.10364729166030884, "learning_rate": 0.00017099562697040578, "loss": 0.9938, "step": 1432 }, { "epoch": 0.2913193738564749, "grad_norm": 0.11455982178449631, "learning_rate": 0.0001709752872978745, "loss": 0.9798, "step": 1433 }, { "epoch": 0.2915226672087823, "grad_norm": 0.12030831724405289, "learning_rate": 0.00017095494762534323, "loss": 1.1068, "step": 1434 }, { "epoch": 0.2917259605610896, "grad_norm": 0.12434829771518707, "learning_rate": 0.00017093460795281198, "loss": 0.9511, "step": 1435 }, { "epoch": 0.291929253913397, "grad_norm": 0.13269619643688202, "learning_rate": 0.0001709142682802807, "loss": 1.0469, "step": 1436 }, { "epoch": 0.2921325472657044, "grad_norm": 0.12037021666765213, "learning_rate": 0.00017089392860774943, "loss": 1.1413, "step": 1437 }, { "epoch": 0.29233584061801177, "grad_norm": 0.1290545016527176, "learning_rate": 0.00017087358893521815, "loss": 0.9745, "step": 1438 }, { "epoch": 0.29253913397031917, "grad_norm": 0.13319085538387299, "learning_rate": 0.00017085324926268688, "loss": 1.3102, "step": 1439 }, { "epoch": 0.29274242732262656, "grad_norm": 0.11888034641742706, "learning_rate": 0.0001708329095901556, "loss": 0.982, "step": 1440 }, { "epoch": 0.2929457206749339, "grad_norm": 0.10824552178382874, "learning_rate": 0.00017081256991762433, "loss": 0.922, "step": 1441 }, { "epoch": 0.2931490140272413, "grad_norm": 0.11319594085216522, "learning_rate": 0.00017079223024509305, "loss": 0.997, "step": 1442 }, { "epoch": 0.2933523073795487, "grad_norm": 0.12176964432001114, "learning_rate": 0.0001707718905725618, "loss": 0.9845, "step": 1443 }, { "epoch": 0.29355560073185605, "grad_norm": 0.13725343346595764, "learning_rate": 0.00017075155090003053, "loss": 1.3378, "step": 1444 }, { "epoch": 0.29375889408416345, "grad_norm": 0.1362079679965973, "learning_rate": 0.00017073121122749925, "loss": 1.2632, "step": 1445 }, { "epoch": 0.29396218743647085, "grad_norm": 0.12925031781196594, "learning_rate": 0.00017071087155496798, "loss": 1.0776, "step": 1446 }, { "epoch": 0.2941654807887782, "grad_norm": 0.11627811938524246, "learning_rate": 0.0001706905318824367, "loss": 0.9881, "step": 1447 }, { "epoch": 0.2943687741410856, "grad_norm": 0.13387028872966766, "learning_rate": 0.00017067019220990543, "loss": 1.0899, "step": 1448 }, { "epoch": 0.294572067493393, "grad_norm": 0.12257883697748184, "learning_rate": 0.00017064985253737415, "loss": 1.1031, "step": 1449 }, { "epoch": 0.29477536084570033, "grad_norm": 0.14938175678253174, "learning_rate": 0.00017062951286484287, "loss": 1.1557, "step": 1450 }, { "epoch": 0.29497865419800773, "grad_norm": 0.12559346854686737, "learning_rate": 0.00017060917319231163, "loss": 1.1062, "step": 1451 }, { "epoch": 0.2951819475503151, "grad_norm": 0.12475700676441193, "learning_rate": 0.00017058883351978035, "loss": 1.0834, "step": 1452 }, { "epoch": 0.29538524090262247, "grad_norm": 0.1364937126636505, "learning_rate": 0.00017056849384724908, "loss": 1.0028, "step": 1453 }, { "epoch": 0.29558853425492987, "grad_norm": 0.12429028004407883, "learning_rate": 0.0001705481541747178, "loss": 1.1087, "step": 1454 }, { "epoch": 0.29579182760723727, "grad_norm": 0.1251228153705597, "learning_rate": 0.00017052781450218652, "loss": 0.9675, "step": 1455 }, { "epoch": 0.2959951209595446, "grad_norm": 0.12485919892787933, "learning_rate": 0.00017050747482965525, "loss": 1.0045, "step": 1456 }, { "epoch": 0.296198414311852, "grad_norm": 0.12948845326900482, "learning_rate": 0.00017048713515712397, "loss": 1.1154, "step": 1457 }, { "epoch": 0.2964017076641594, "grad_norm": 0.1288408488035202, "learning_rate": 0.0001704667954845927, "loss": 1.1203, "step": 1458 }, { "epoch": 0.29660500101646675, "grad_norm": 0.13588744401931763, "learning_rate": 0.00017044645581206142, "loss": 1.1436, "step": 1459 }, { "epoch": 0.29680829436877415, "grad_norm": 0.1264243721961975, "learning_rate": 0.00017042611613953017, "loss": 1.1903, "step": 1460 }, { "epoch": 0.2970115877210815, "grad_norm": 0.12819139659404755, "learning_rate": 0.0001704057764669989, "loss": 1.1337, "step": 1461 }, { "epoch": 0.2972148810733889, "grad_norm": 0.1189684271812439, "learning_rate": 0.00017038543679446762, "loss": 1.1437, "step": 1462 }, { "epoch": 0.2974181744256963, "grad_norm": 0.1304028183221817, "learning_rate": 0.00017036509712193635, "loss": 1.1653, "step": 1463 }, { "epoch": 0.29762146777800363, "grad_norm": 0.12161426246166229, "learning_rate": 0.00017034475744940507, "loss": 1.0482, "step": 1464 }, { "epoch": 0.29782476113031103, "grad_norm": 0.1224290132522583, "learning_rate": 0.0001703244177768738, "loss": 1.0592, "step": 1465 }, { "epoch": 0.29802805448261843, "grad_norm": 0.1365649402141571, "learning_rate": 0.00017030407810434252, "loss": 1.0758, "step": 1466 }, { "epoch": 0.2982313478349258, "grad_norm": 0.12406224012374878, "learning_rate": 0.00017028373843181124, "loss": 1.0901, "step": 1467 }, { "epoch": 0.2984346411872332, "grad_norm": 0.13438360393047333, "learning_rate": 0.00017026339875928, "loss": 1.1552, "step": 1468 }, { "epoch": 0.2986379345395406, "grad_norm": 0.14297276735305786, "learning_rate": 0.00017024305908674872, "loss": 1.1271, "step": 1469 }, { "epoch": 0.2988412278918479, "grad_norm": 0.11946640908718109, "learning_rate": 0.00017022271941421745, "loss": 1.0295, "step": 1470 }, { "epoch": 0.2990445212441553, "grad_norm": 0.12182927876710892, "learning_rate": 0.00017020237974168617, "loss": 1.0381, "step": 1471 }, { "epoch": 0.2992478145964627, "grad_norm": 0.1238449215888977, "learning_rate": 0.0001701820400691549, "loss": 1.0399, "step": 1472 }, { "epoch": 0.29945110794877006, "grad_norm": 0.12575775384902954, "learning_rate": 0.00017016170039662362, "loss": 1.1552, "step": 1473 }, { "epoch": 0.29965440130107746, "grad_norm": 0.14087268710136414, "learning_rate": 0.00017014136072409234, "loss": 1.1225, "step": 1474 }, { "epoch": 0.29985769465338485, "grad_norm": 0.13070684671401978, "learning_rate": 0.00017012102105156107, "loss": 1.1097, "step": 1475 }, { "epoch": 0.3000609880056922, "grad_norm": 0.12527720630168915, "learning_rate": 0.00017010068137902982, "loss": 1.0171, "step": 1476 }, { "epoch": 0.3002642813579996, "grad_norm": 0.12080081552267075, "learning_rate": 0.00017008034170649854, "loss": 1.0934, "step": 1477 }, { "epoch": 0.300467574710307, "grad_norm": 0.13225379586219788, "learning_rate": 0.00017006000203396727, "loss": 1.1286, "step": 1478 }, { "epoch": 0.30067086806261434, "grad_norm": 0.14612498879432678, "learning_rate": 0.000170039662361436, "loss": 1.4, "step": 1479 }, { "epoch": 0.30087416141492174, "grad_norm": 0.12612837553024292, "learning_rate": 0.00017001932268890472, "loss": 0.9265, "step": 1480 }, { "epoch": 0.30107745476722914, "grad_norm": 0.11075981706380844, "learning_rate": 0.00016999898301637344, "loss": 0.992, "step": 1481 }, { "epoch": 0.3012807481195365, "grad_norm": 0.11420360207557678, "learning_rate": 0.00016997864334384217, "loss": 1.033, "step": 1482 }, { "epoch": 0.3014840414718439, "grad_norm": 0.1344219148159027, "learning_rate": 0.0001699583036713109, "loss": 1.0934, "step": 1483 }, { "epoch": 0.3016873348241513, "grad_norm": 0.13956451416015625, "learning_rate": 0.00016993796399877964, "loss": 1.2297, "step": 1484 }, { "epoch": 0.3018906281764586, "grad_norm": 0.1293005496263504, "learning_rate": 0.00016991762432624837, "loss": 1.0928, "step": 1485 }, { "epoch": 0.302093921528766, "grad_norm": 0.11039478331804276, "learning_rate": 0.0001698972846537171, "loss": 0.9906, "step": 1486 }, { "epoch": 0.30229721488107336, "grad_norm": 0.13603124022483826, "learning_rate": 0.00016987694498118582, "loss": 1.3131, "step": 1487 }, { "epoch": 0.30250050823338076, "grad_norm": 0.14525099098682404, "learning_rate": 0.00016985660530865454, "loss": 1.1205, "step": 1488 }, { "epoch": 0.30270380158568816, "grad_norm": 0.14237269759178162, "learning_rate": 0.00016983626563612326, "loss": 1.1144, "step": 1489 }, { "epoch": 0.3029070949379955, "grad_norm": 0.10434848070144653, "learning_rate": 0.000169815925963592, "loss": 0.9038, "step": 1490 }, { "epoch": 0.3031103882903029, "grad_norm": 0.11946713179349899, "learning_rate": 0.0001697955862910607, "loss": 1.0611, "step": 1491 }, { "epoch": 0.3033136816426103, "grad_norm": 0.12547194957733154, "learning_rate": 0.00016977524661852946, "loss": 0.999, "step": 1492 }, { "epoch": 0.30351697499491764, "grad_norm": 0.13156647980213165, "learning_rate": 0.0001697549069459982, "loss": 1.1174, "step": 1493 }, { "epoch": 0.30372026834722504, "grad_norm": 0.13008251786231995, "learning_rate": 0.00016973456727346691, "loss": 1.1239, "step": 1494 }, { "epoch": 0.30392356169953244, "grad_norm": 0.1194852888584137, "learning_rate": 0.00016971422760093564, "loss": 0.9974, "step": 1495 }, { "epoch": 0.3041268550518398, "grad_norm": 0.12988907098770142, "learning_rate": 0.00016969388792840436, "loss": 1.0105, "step": 1496 }, { "epoch": 0.3043301484041472, "grad_norm": 0.13736090064048767, "learning_rate": 0.0001696735482558731, "loss": 1.167, "step": 1497 }, { "epoch": 0.3045334417564546, "grad_norm": 0.12946954369544983, "learning_rate": 0.0001696532085833418, "loss": 1.1628, "step": 1498 }, { "epoch": 0.3047367351087619, "grad_norm": 0.12599951028823853, "learning_rate": 0.00016963286891081054, "loss": 1.2581, "step": 1499 }, { "epoch": 0.3049400284610693, "grad_norm": 0.12264920026063919, "learning_rate": 0.00016961252923827926, "loss": 0.966, "step": 1500 }, { "epoch": 0.3051433218133767, "grad_norm": 0.12567077577114105, "learning_rate": 0.000169592189565748, "loss": 1.0898, "step": 1501 }, { "epoch": 0.30534661516568407, "grad_norm": 0.12665922939777374, "learning_rate": 0.00016957184989321674, "loss": 1.1621, "step": 1502 }, { "epoch": 0.30554990851799146, "grad_norm": 0.10949800908565521, "learning_rate": 0.00016955151022068546, "loss": 0.9312, "step": 1503 }, { "epoch": 0.30575320187029886, "grad_norm": 0.13273455202579498, "learning_rate": 0.00016953117054815419, "loss": 1.1535, "step": 1504 }, { "epoch": 0.3059564952226062, "grad_norm": 0.13857555389404297, "learning_rate": 0.0001695108308756229, "loss": 1.1564, "step": 1505 }, { "epoch": 0.3061597885749136, "grad_norm": 0.10915102064609528, "learning_rate": 0.00016949049120309163, "loss": 0.8977, "step": 1506 }, { "epoch": 0.306363081927221, "grad_norm": 0.11122920364141464, "learning_rate": 0.00016947015153056036, "loss": 0.9389, "step": 1507 }, { "epoch": 0.30656637527952835, "grad_norm": 0.13575953245162964, "learning_rate": 0.00016944981185802908, "loss": 1.2634, "step": 1508 }, { "epoch": 0.30676966863183575, "grad_norm": 0.12309823930263519, "learning_rate": 0.00016942947218549783, "loss": 1.0562, "step": 1509 }, { "epoch": 0.30697296198414314, "grad_norm": 0.13939395546913147, "learning_rate": 0.00016940913251296656, "loss": 1.2801, "step": 1510 }, { "epoch": 0.3071762553364505, "grad_norm": 0.11922150105237961, "learning_rate": 0.00016938879284043528, "loss": 1.0694, "step": 1511 }, { "epoch": 0.3073795486887579, "grad_norm": 0.12427409738302231, "learning_rate": 0.000169368453167904, "loss": 1.0676, "step": 1512 }, { "epoch": 0.30758284204106523, "grad_norm": 0.11560991406440735, "learning_rate": 0.00016934811349537273, "loss": 0.9416, "step": 1513 }, { "epoch": 0.30778613539337263, "grad_norm": 0.12494566291570663, "learning_rate": 0.00016932777382284146, "loss": 1.0484, "step": 1514 }, { "epoch": 0.30798942874568, "grad_norm": 0.14169259369373322, "learning_rate": 0.00016930743415031018, "loss": 1.2045, "step": 1515 }, { "epoch": 0.30819272209798737, "grad_norm": 0.13265348970890045, "learning_rate": 0.0001692870944777789, "loss": 0.9964, "step": 1516 }, { "epoch": 0.30839601545029477, "grad_norm": 0.1246609166264534, "learning_rate": 0.00016926675480524766, "loss": 1.0218, "step": 1517 }, { "epoch": 0.30859930880260217, "grad_norm": 0.1305045336484909, "learning_rate": 0.00016924641513271638, "loss": 1.0899, "step": 1518 }, { "epoch": 0.3088026021549095, "grad_norm": 0.1269298940896988, "learning_rate": 0.0001692260754601851, "loss": 1.1613, "step": 1519 }, { "epoch": 0.3090058955072169, "grad_norm": 0.13356846570968628, "learning_rate": 0.00016920573578765383, "loss": 1.2171, "step": 1520 }, { "epoch": 0.3092091888595243, "grad_norm": 0.12417469173669815, "learning_rate": 0.00016918539611512256, "loss": 1.0345, "step": 1521 }, { "epoch": 0.30941248221183165, "grad_norm": 0.12965606153011322, "learning_rate": 0.00016916505644259128, "loss": 1.003, "step": 1522 }, { "epoch": 0.30961577556413905, "grad_norm": 0.13075895607471466, "learning_rate": 0.00016914471677006, "loss": 1.1521, "step": 1523 }, { "epoch": 0.30981906891644645, "grad_norm": 0.1491623818874359, "learning_rate": 0.00016912437709752873, "loss": 1.1669, "step": 1524 }, { "epoch": 0.3100223622687538, "grad_norm": 0.13368669152259827, "learning_rate": 0.00016910403742499748, "loss": 1.1996, "step": 1525 }, { "epoch": 0.3102256556210612, "grad_norm": 0.12484747171401978, "learning_rate": 0.0001690836977524662, "loss": 0.9979, "step": 1526 }, { "epoch": 0.3104289489733686, "grad_norm": 0.11716404557228088, "learning_rate": 0.00016906335807993493, "loss": 1.0686, "step": 1527 }, { "epoch": 0.31063224232567593, "grad_norm": 0.1104549840092659, "learning_rate": 0.00016904301840740365, "loss": 0.9746, "step": 1528 }, { "epoch": 0.31083553567798333, "grad_norm": 0.1288052350282669, "learning_rate": 0.00016902267873487238, "loss": 1.0593, "step": 1529 }, { "epoch": 0.31103882903029073, "grad_norm": 0.13284744322299957, "learning_rate": 0.0001690023390623411, "loss": 1.1837, "step": 1530 }, { "epoch": 0.3112421223825981, "grad_norm": 0.10993791371583939, "learning_rate": 0.00016898199938980983, "loss": 0.9076, "step": 1531 }, { "epoch": 0.3114454157349055, "grad_norm": 0.1289556920528412, "learning_rate": 0.00016896165971727855, "loss": 1.124, "step": 1532 }, { "epoch": 0.31164870908721287, "grad_norm": 0.12656551599502563, "learning_rate": 0.0001689413200447473, "loss": 1.0672, "step": 1533 }, { "epoch": 0.3118520024395202, "grad_norm": 0.12359779328107834, "learning_rate": 0.00016892098037221603, "loss": 1.1162, "step": 1534 }, { "epoch": 0.3120552957918276, "grad_norm": 0.13356052339076996, "learning_rate": 0.00016890064069968475, "loss": 1.1927, "step": 1535 }, { "epoch": 0.312258589144135, "grad_norm": 0.12397721409797668, "learning_rate": 0.00016888030102715348, "loss": 1.0744, "step": 1536 }, { "epoch": 0.31246188249644236, "grad_norm": 0.14322160184383392, "learning_rate": 0.0001688599613546222, "loss": 1.076, "step": 1537 }, { "epoch": 0.31266517584874975, "grad_norm": 0.1378001719713211, "learning_rate": 0.00016883962168209093, "loss": 1.4044, "step": 1538 }, { "epoch": 0.3128684692010571, "grad_norm": 0.12438174337148666, "learning_rate": 0.00016881928200955965, "loss": 1.1154, "step": 1539 }, { "epoch": 0.3130717625533645, "grad_norm": 0.12698177993297577, "learning_rate": 0.00016879894233702837, "loss": 1.1988, "step": 1540 }, { "epoch": 0.3132750559056719, "grad_norm": 0.12074883282184601, "learning_rate": 0.00016877860266449713, "loss": 1.0175, "step": 1541 }, { "epoch": 0.31347834925797924, "grad_norm": 0.13820214569568634, "learning_rate": 0.00016875826299196585, "loss": 1.0498, "step": 1542 }, { "epoch": 0.31368164261028664, "grad_norm": 0.14697261154651642, "learning_rate": 0.00016873792331943458, "loss": 1.2677, "step": 1543 }, { "epoch": 0.31388493596259404, "grad_norm": 0.13973405957221985, "learning_rate": 0.0001687175836469033, "loss": 1.2233, "step": 1544 }, { "epoch": 0.3140882293149014, "grad_norm": 0.1303880661725998, "learning_rate": 0.000168697243974372, "loss": 1.1023, "step": 1545 }, { "epoch": 0.3142915226672088, "grad_norm": 0.13434049487113953, "learning_rate": 0.00016867690430184075, "loss": 1.1206, "step": 1546 }, { "epoch": 0.3144948160195162, "grad_norm": 0.11447029560804367, "learning_rate": 0.00016865656462930947, "loss": 0.978, "step": 1547 }, { "epoch": 0.3146981093718235, "grad_norm": 0.12716947495937347, "learning_rate": 0.0001686362249567782, "loss": 1.1544, "step": 1548 }, { "epoch": 0.3149014027241309, "grad_norm": 0.12545545399188995, "learning_rate": 0.00016861588528424692, "loss": 0.9976, "step": 1549 }, { "epoch": 0.3151046960764383, "grad_norm": 0.13446862995624542, "learning_rate": 0.00016859554561171567, "loss": 1.1167, "step": 1550 }, { "epoch": 0.31530798942874566, "grad_norm": 0.12542487680912018, "learning_rate": 0.0001685752059391844, "loss": 1.1148, "step": 1551 }, { "epoch": 0.31551128278105306, "grad_norm": 0.12793605029582977, "learning_rate": 0.00016855486626665312, "loss": 1.1139, "step": 1552 }, { "epoch": 0.31571457613336046, "grad_norm": 0.13481125235557556, "learning_rate": 0.00016853452659412182, "loss": 1.144, "step": 1553 }, { "epoch": 0.3159178694856678, "grad_norm": 0.11555742472410202, "learning_rate": 0.00016851418692159057, "loss": 1.0276, "step": 1554 }, { "epoch": 0.3161211628379752, "grad_norm": 0.11695119738578796, "learning_rate": 0.0001684938472490593, "loss": 0.9493, "step": 1555 }, { "epoch": 0.3163244561902826, "grad_norm": 0.13503003120422363, "learning_rate": 0.00016847350757652802, "loss": 1.0556, "step": 1556 }, { "epoch": 0.31652774954258994, "grad_norm": 0.1347092092037201, "learning_rate": 0.00016845316790399674, "loss": 1.0362, "step": 1557 }, { "epoch": 0.31673104289489734, "grad_norm": 0.12576071918010712, "learning_rate": 0.0001684328282314655, "loss": 1.085, "step": 1558 }, { "epoch": 0.31693433624720474, "grad_norm": 0.1280100792646408, "learning_rate": 0.00016841248855893422, "loss": 1.2004, "step": 1559 }, { "epoch": 0.3171376295995121, "grad_norm": 0.11573471873998642, "learning_rate": 0.00016839214888640295, "loss": 0.9058, "step": 1560 }, { "epoch": 0.3173409229518195, "grad_norm": 0.12192318588495255, "learning_rate": 0.00016837180921387167, "loss": 0.9789, "step": 1561 }, { "epoch": 0.3175442163041269, "grad_norm": 0.1251290738582611, "learning_rate": 0.0001683514695413404, "loss": 0.9818, "step": 1562 }, { "epoch": 0.3177475096564342, "grad_norm": 0.12726342678070068, "learning_rate": 0.00016833112986880912, "loss": 0.9911, "step": 1563 }, { "epoch": 0.3179508030087416, "grad_norm": 0.12146829068660736, "learning_rate": 0.00016831079019627784, "loss": 1.0005, "step": 1564 }, { "epoch": 0.318154096361049, "grad_norm": 0.12948118150234222, "learning_rate": 0.00016829045052374657, "loss": 0.9286, "step": 1565 }, { "epoch": 0.31835738971335636, "grad_norm": 0.1411774903535843, "learning_rate": 0.00016827011085121532, "loss": 1.031, "step": 1566 }, { "epoch": 0.31856068306566376, "grad_norm": 0.12407765537500381, "learning_rate": 0.00016824977117868404, "loss": 1.0538, "step": 1567 }, { "epoch": 0.3187639764179711, "grad_norm": 0.1235983669757843, "learning_rate": 0.00016822943150615277, "loss": 0.9356, "step": 1568 }, { "epoch": 0.3189672697702785, "grad_norm": 0.13756640255451202, "learning_rate": 0.0001682090918336215, "loss": 1.3008, "step": 1569 }, { "epoch": 0.3191705631225859, "grad_norm": 0.14735132455825806, "learning_rate": 0.00016818875216109022, "loss": 1.1271, "step": 1570 }, { "epoch": 0.31937385647489325, "grad_norm": 0.14694719016551971, "learning_rate": 0.00016816841248855894, "loss": 1.1222, "step": 1571 }, { "epoch": 0.31957714982720065, "grad_norm": 0.10828382521867752, "learning_rate": 0.00016814807281602767, "loss": 1.0565, "step": 1572 }, { "epoch": 0.31978044317950804, "grad_norm": 0.1332756131887436, "learning_rate": 0.0001681277331434964, "loss": 1.0085, "step": 1573 }, { "epoch": 0.3199837365318154, "grad_norm": 0.12354031950235367, "learning_rate": 0.00016810739347096514, "loss": 1.1101, "step": 1574 }, { "epoch": 0.3201870298841228, "grad_norm": 0.1273805797100067, "learning_rate": 0.00016808705379843387, "loss": 1.1402, "step": 1575 }, { "epoch": 0.3203903232364302, "grad_norm": 0.1219901368021965, "learning_rate": 0.0001680667141259026, "loss": 1.1955, "step": 1576 }, { "epoch": 0.32059361658873753, "grad_norm": 0.13021346926689148, "learning_rate": 0.00016804637445337132, "loss": 1.2073, "step": 1577 }, { "epoch": 0.3207969099410449, "grad_norm": 0.11928975582122803, "learning_rate": 0.00016802603478084004, "loss": 1.0758, "step": 1578 }, { "epoch": 0.3210002032933523, "grad_norm": 0.10524530708789825, "learning_rate": 0.00016800569510830876, "loss": 0.9655, "step": 1579 }, { "epoch": 0.32120349664565967, "grad_norm": 0.13994352519512177, "learning_rate": 0.0001679853554357775, "loss": 1.1405, "step": 1580 }, { "epoch": 0.32140678999796707, "grad_norm": 0.13520392775535583, "learning_rate": 0.0001679650157632462, "loss": 1.3525, "step": 1581 }, { "epoch": 0.32161008335027447, "grad_norm": 0.13306692242622375, "learning_rate": 0.00016794467609071496, "loss": 1.3286, "step": 1582 }, { "epoch": 0.3218133767025818, "grad_norm": 0.1361495852470398, "learning_rate": 0.0001679243364181837, "loss": 1.0468, "step": 1583 }, { "epoch": 0.3220166700548892, "grad_norm": 0.1192341074347496, "learning_rate": 0.0001679039967456524, "loss": 0.7855, "step": 1584 }, { "epoch": 0.3222199634071966, "grad_norm": 0.12359831482172012, "learning_rate": 0.00016788365707312114, "loss": 1.1627, "step": 1585 }, { "epoch": 0.32242325675950395, "grad_norm": 0.1272861659526825, "learning_rate": 0.00016786331740058984, "loss": 1.0442, "step": 1586 }, { "epoch": 0.32262655011181135, "grad_norm": 0.1261843740940094, "learning_rate": 0.0001678429777280586, "loss": 1.0033, "step": 1587 }, { "epoch": 0.32282984346411875, "grad_norm": 0.11822490394115448, "learning_rate": 0.0001678226380555273, "loss": 1.0815, "step": 1588 }, { "epoch": 0.3230331368164261, "grad_norm": 0.13497643172740936, "learning_rate": 0.00016780229838299604, "loss": 1.0485, "step": 1589 }, { "epoch": 0.3232364301687335, "grad_norm": 0.12484399974346161, "learning_rate": 0.00016778195871046476, "loss": 1.244, "step": 1590 }, { "epoch": 0.3234397235210409, "grad_norm": 0.12844592332839966, "learning_rate": 0.0001677616190379335, "loss": 1.2803, "step": 1591 }, { "epoch": 0.32364301687334823, "grad_norm": 0.12499992549419403, "learning_rate": 0.00016774127936540224, "loss": 1.0049, "step": 1592 }, { "epoch": 0.32384631022565563, "grad_norm": 0.12357242405414581, "learning_rate": 0.00016772093969287096, "loss": 1.0463, "step": 1593 }, { "epoch": 0.324049603577963, "grad_norm": 0.11749047785997391, "learning_rate": 0.00016770060002033966, "loss": 0.9639, "step": 1594 }, { "epoch": 0.3242528969302704, "grad_norm": 0.1409110128879547, "learning_rate": 0.0001676802603478084, "loss": 1.0898, "step": 1595 }, { "epoch": 0.32445619028257777, "grad_norm": 0.1287623941898346, "learning_rate": 0.00016765992067527713, "loss": 1.1735, "step": 1596 }, { "epoch": 0.3246594836348851, "grad_norm": 0.1255931705236435, "learning_rate": 0.00016763958100274586, "loss": 1.0209, "step": 1597 }, { "epoch": 0.3248627769871925, "grad_norm": 0.1277484893798828, "learning_rate": 0.00016761924133021458, "loss": 1.1324, "step": 1598 }, { "epoch": 0.3250660703394999, "grad_norm": 0.14885109663009644, "learning_rate": 0.00016759890165768333, "loss": 1.1919, "step": 1599 }, { "epoch": 0.32526936369180726, "grad_norm": 0.12765826284885406, "learning_rate": 0.00016757856198515206, "loss": 1.1301, "step": 1600 }, { "epoch": 0.32547265704411465, "grad_norm": 0.12677320837974548, "learning_rate": 0.00016755822231262078, "loss": 1.1406, "step": 1601 }, { "epoch": 0.32567595039642205, "grad_norm": 0.12238804996013641, "learning_rate": 0.00016753788264008948, "loss": 0.9797, "step": 1602 }, { "epoch": 0.3258792437487294, "grad_norm": 0.13958637416362762, "learning_rate": 0.00016751754296755823, "loss": 1.1974, "step": 1603 }, { "epoch": 0.3260825371010368, "grad_norm": 0.12978553771972656, "learning_rate": 0.00016749720329502696, "loss": 0.9885, "step": 1604 }, { "epoch": 0.3262858304533442, "grad_norm": 0.12407691776752472, "learning_rate": 0.00016747686362249568, "loss": 1.1167, "step": 1605 }, { "epoch": 0.32648912380565154, "grad_norm": 0.13904057443141937, "learning_rate": 0.0001674565239499644, "loss": 1.1454, "step": 1606 }, { "epoch": 0.32669241715795894, "grad_norm": 0.1415109634399414, "learning_rate": 0.00016743618427743316, "loss": 1.2619, "step": 1607 }, { "epoch": 0.32689571051026634, "grad_norm": 0.11249466240406036, "learning_rate": 0.00016741584460490188, "loss": 0.9385, "step": 1608 }, { "epoch": 0.3270990038625737, "grad_norm": 0.11592496186494827, "learning_rate": 0.0001673955049323706, "loss": 0.9985, "step": 1609 }, { "epoch": 0.3273022972148811, "grad_norm": 0.11594976484775543, "learning_rate": 0.0001673751652598393, "loss": 0.9626, "step": 1610 }, { "epoch": 0.3275055905671885, "grad_norm": 0.12570694088935852, "learning_rate": 0.00016735482558730806, "loss": 0.9815, "step": 1611 }, { "epoch": 0.3277088839194958, "grad_norm": 0.12933030724525452, "learning_rate": 0.00016733448591477678, "loss": 1.0988, "step": 1612 }, { "epoch": 0.3279121772718032, "grad_norm": 0.14309881627559662, "learning_rate": 0.0001673141462422455, "loss": 1.1589, "step": 1613 }, { "epoch": 0.3281154706241106, "grad_norm": 0.14047057926654816, "learning_rate": 0.00016729380656971423, "loss": 1.2038, "step": 1614 }, { "epoch": 0.32831876397641796, "grad_norm": 0.1269095540046692, "learning_rate": 0.00016727346689718298, "loss": 0.9248, "step": 1615 }, { "epoch": 0.32852205732872536, "grad_norm": 0.14122694730758667, "learning_rate": 0.0001672531272246517, "loss": 1.1879, "step": 1616 }, { "epoch": 0.32872535068103276, "grad_norm": 0.133163183927536, "learning_rate": 0.00016723278755212043, "loss": 1.0718, "step": 1617 }, { "epoch": 0.3289286440333401, "grad_norm": 0.13817080855369568, "learning_rate": 0.00016721244787958915, "loss": 1.1519, "step": 1618 }, { "epoch": 0.3291319373856475, "grad_norm": 0.12117751687765121, "learning_rate": 0.00016719210820705788, "loss": 1.0104, "step": 1619 }, { "epoch": 0.32933523073795484, "grad_norm": 0.1269875317811966, "learning_rate": 0.0001671717685345266, "loss": 1.0644, "step": 1620 }, { "epoch": 0.32953852409026224, "grad_norm": 0.13901706039905548, "learning_rate": 0.00016715142886199533, "loss": 1.3002, "step": 1621 }, { "epoch": 0.32974181744256964, "grad_norm": 0.1284133940935135, "learning_rate": 0.00016713108918946405, "loss": 1.1447, "step": 1622 }, { "epoch": 0.329945110794877, "grad_norm": 0.13423141837120056, "learning_rate": 0.0001671107495169328, "loss": 1.2566, "step": 1623 }, { "epoch": 0.3301484041471844, "grad_norm": 0.12908455729484558, "learning_rate": 0.00016709040984440153, "loss": 1.1041, "step": 1624 }, { "epoch": 0.3303516974994918, "grad_norm": 0.1317860186100006, "learning_rate": 0.00016707007017187025, "loss": 1.1273, "step": 1625 }, { "epoch": 0.3305549908517991, "grad_norm": 0.1394864320755005, "learning_rate": 0.00016704973049933898, "loss": 1.0699, "step": 1626 }, { "epoch": 0.3307582842041065, "grad_norm": 0.1309152990579605, "learning_rate": 0.00016702939082680767, "loss": 0.9554, "step": 1627 }, { "epoch": 0.3309615775564139, "grad_norm": 0.11993929743766785, "learning_rate": 0.00016700905115427643, "loss": 0.9747, "step": 1628 }, { "epoch": 0.33116487090872126, "grad_norm": 0.11589863151311874, "learning_rate": 0.00016698871148174515, "loss": 0.9217, "step": 1629 }, { "epoch": 0.33136816426102866, "grad_norm": 0.12004578858613968, "learning_rate": 0.00016696837180921387, "loss": 0.9453, "step": 1630 }, { "epoch": 0.33157145761333606, "grad_norm": 0.1407518982887268, "learning_rate": 0.0001669480321366826, "loss": 1.1528, "step": 1631 }, { "epoch": 0.3317747509656434, "grad_norm": 0.1286914050579071, "learning_rate": 0.00016692769246415135, "loss": 1.038, "step": 1632 }, { "epoch": 0.3319780443179508, "grad_norm": 0.13304589688777924, "learning_rate": 0.00016690735279162007, "loss": 1.1159, "step": 1633 }, { "epoch": 0.3321813376702582, "grad_norm": 0.13245166838169098, "learning_rate": 0.0001668870131190888, "loss": 1.0368, "step": 1634 }, { "epoch": 0.33238463102256555, "grad_norm": 0.12715977430343628, "learning_rate": 0.0001668666734465575, "loss": 1.024, "step": 1635 }, { "epoch": 0.33258792437487295, "grad_norm": 0.13726472854614258, "learning_rate": 0.00016684633377402625, "loss": 1.0574, "step": 1636 }, { "epoch": 0.33279121772718034, "grad_norm": 0.10961025953292847, "learning_rate": 0.00016682599410149497, "loss": 0.9979, "step": 1637 }, { "epoch": 0.3329945110794877, "grad_norm": 0.13879232108592987, "learning_rate": 0.0001668056544289637, "loss": 1.2669, "step": 1638 }, { "epoch": 0.3331978044317951, "grad_norm": 0.12887312471866608, "learning_rate": 0.00016678531475643242, "loss": 1.1217, "step": 1639 }, { "epoch": 0.3334010977841025, "grad_norm": 0.1309410184621811, "learning_rate": 0.00016676497508390117, "loss": 1.1331, "step": 1640 }, { "epoch": 0.33360439113640983, "grad_norm": 0.12577351927757263, "learning_rate": 0.0001667446354113699, "loss": 0.9315, "step": 1641 }, { "epoch": 0.3338076844887172, "grad_norm": 0.1263495236635208, "learning_rate": 0.00016672429573883862, "loss": 0.9782, "step": 1642 }, { "epoch": 0.3340109778410246, "grad_norm": 0.12090608477592468, "learning_rate": 0.00016670395606630732, "loss": 0.9817, "step": 1643 }, { "epoch": 0.33421427119333197, "grad_norm": 0.1330811232328415, "learning_rate": 0.00016668361639377607, "loss": 1.0682, "step": 1644 }, { "epoch": 0.33441756454563937, "grad_norm": 0.13265149295330048, "learning_rate": 0.0001666632767212448, "loss": 0.8999, "step": 1645 }, { "epoch": 0.3346208578979467, "grad_norm": 0.12737800180912018, "learning_rate": 0.00016664293704871352, "loss": 1.1103, "step": 1646 }, { "epoch": 0.3348241512502541, "grad_norm": 0.13904881477355957, "learning_rate": 0.00016662259737618224, "loss": 1.1776, "step": 1647 }, { "epoch": 0.3350274446025615, "grad_norm": 0.13159041106700897, "learning_rate": 0.000166602257703651, "loss": 1.0343, "step": 1648 }, { "epoch": 0.33523073795486885, "grad_norm": 0.12564794719219208, "learning_rate": 0.00016658191803111972, "loss": 0.9786, "step": 1649 }, { "epoch": 0.33543403130717625, "grad_norm": 0.1561056673526764, "learning_rate": 0.00016656157835858844, "loss": 1.1937, "step": 1650 }, { "epoch": 0.33563732465948365, "grad_norm": 0.13286349177360535, "learning_rate": 0.00016654123868605714, "loss": 1.0153, "step": 1651 }, { "epoch": 0.335840618011791, "grad_norm": 0.12319796532392502, "learning_rate": 0.0001665208990135259, "loss": 1.0423, "step": 1652 }, { "epoch": 0.3360439113640984, "grad_norm": 0.13758210837841034, "learning_rate": 0.00016650055934099462, "loss": 1.0618, "step": 1653 }, { "epoch": 0.3362472047164058, "grad_norm": 0.11521997302770615, "learning_rate": 0.00016648021966846334, "loss": 0.9787, "step": 1654 }, { "epoch": 0.33645049806871313, "grad_norm": 0.1308450698852539, "learning_rate": 0.00016645987999593207, "loss": 1.1284, "step": 1655 }, { "epoch": 0.33665379142102053, "grad_norm": 0.13632404804229736, "learning_rate": 0.00016643954032340082, "loss": 1.1278, "step": 1656 }, { "epoch": 0.33685708477332793, "grad_norm": 0.12073387950658798, "learning_rate": 0.00016641920065086954, "loss": 1.0399, "step": 1657 }, { "epoch": 0.3370603781256353, "grad_norm": 0.12028390169143677, "learning_rate": 0.00016639886097833827, "loss": 1.0384, "step": 1658 }, { "epoch": 0.3372636714779427, "grad_norm": 0.12499553710222244, "learning_rate": 0.00016637852130580696, "loss": 1.0769, "step": 1659 }, { "epoch": 0.33746696483025007, "grad_norm": 0.16057424247264862, "learning_rate": 0.00016635818163327572, "loss": 1.3772, "step": 1660 }, { "epoch": 0.3376702581825574, "grad_norm": 0.12566526234149933, "learning_rate": 0.00016633784196074444, "loss": 1.1722, "step": 1661 }, { "epoch": 0.3378735515348648, "grad_norm": 0.11908633261919022, "learning_rate": 0.00016631750228821317, "loss": 1.0332, "step": 1662 }, { "epoch": 0.3380768448871722, "grad_norm": 0.14457720518112183, "learning_rate": 0.0001662971626156819, "loss": 1.187, "step": 1663 }, { "epoch": 0.33828013823947956, "grad_norm": 0.12620577216148376, "learning_rate": 0.00016627682294315064, "loss": 1.2064, "step": 1664 }, { "epoch": 0.33848343159178695, "grad_norm": 0.1155720204114914, "learning_rate": 0.00016625648327061937, "loss": 0.936, "step": 1665 }, { "epoch": 0.33868672494409435, "grad_norm": 0.12141234427690506, "learning_rate": 0.0001662361435980881, "loss": 1.0185, "step": 1666 }, { "epoch": 0.3388900182964017, "grad_norm": 0.11690623313188553, "learning_rate": 0.0001662158039255568, "loss": 0.9533, "step": 1667 }, { "epoch": 0.3390933116487091, "grad_norm": 0.127701997756958, "learning_rate": 0.0001661954642530255, "loss": 1.0834, "step": 1668 }, { "epoch": 0.3392966050010165, "grad_norm": 0.12167434394359589, "learning_rate": 0.00016617512458049426, "loss": 0.9166, "step": 1669 }, { "epoch": 0.33949989835332384, "grad_norm": 0.1415378600358963, "learning_rate": 0.000166154784907963, "loss": 1.0921, "step": 1670 }, { "epoch": 0.33970319170563124, "grad_norm": 0.13397271931171417, "learning_rate": 0.0001661344452354317, "loss": 1.1214, "step": 1671 }, { "epoch": 0.3399064850579386, "grad_norm": 0.1336379051208496, "learning_rate": 0.00016611410556290044, "loss": 1.1034, "step": 1672 }, { "epoch": 0.340109778410246, "grad_norm": 0.1404540240764618, "learning_rate": 0.0001660937658903692, "loss": 1.1435, "step": 1673 }, { "epoch": 0.3403130717625534, "grad_norm": 0.10813318192958832, "learning_rate": 0.0001660734262178379, "loss": 0.8935, "step": 1674 }, { "epoch": 0.3405163651148607, "grad_norm": 0.1491374522447586, "learning_rate": 0.00016605308654530664, "loss": 1.1378, "step": 1675 }, { "epoch": 0.3407196584671681, "grad_norm": 0.12213015556335449, "learning_rate": 0.00016603274687277534, "loss": 1.1236, "step": 1676 }, { "epoch": 0.3409229518194755, "grad_norm": 0.12762251496315002, "learning_rate": 0.0001660124072002441, "loss": 1.1892, "step": 1677 }, { "epoch": 0.34112624517178286, "grad_norm": 0.14703615009784698, "learning_rate": 0.0001659920675277128, "loss": 1.1918, "step": 1678 }, { "epoch": 0.34132953852409026, "grad_norm": 0.13121256232261658, "learning_rate": 0.00016597172785518154, "loss": 1.084, "step": 1679 }, { "epoch": 0.34153283187639766, "grad_norm": 0.15220001339912415, "learning_rate": 0.00016595138818265026, "loss": 1.3663, "step": 1680 }, { "epoch": 0.341736125228705, "grad_norm": 0.1325935572385788, "learning_rate": 0.000165931048510119, "loss": 1.0294, "step": 1681 }, { "epoch": 0.3419394185810124, "grad_norm": 0.10651461035013199, "learning_rate": 0.00016591070883758774, "loss": 0.8741, "step": 1682 }, { "epoch": 0.3421427119333198, "grad_norm": 0.1287640780210495, "learning_rate": 0.00016589036916505646, "loss": 1.087, "step": 1683 }, { "epoch": 0.34234600528562714, "grad_norm": 0.1286855936050415, "learning_rate": 0.00016587002949252516, "loss": 0.9785, "step": 1684 }, { "epoch": 0.34254929863793454, "grad_norm": 0.12485534697771072, "learning_rate": 0.0001658496898199939, "loss": 0.9792, "step": 1685 }, { "epoch": 0.34275259199024194, "grad_norm": 0.11311212927103043, "learning_rate": 0.00016582935014746263, "loss": 0.9772, "step": 1686 }, { "epoch": 0.3429558853425493, "grad_norm": 0.13208623230457306, "learning_rate": 0.00016580901047493136, "loss": 1.1733, "step": 1687 }, { "epoch": 0.3431591786948567, "grad_norm": 0.11595738679170609, "learning_rate": 0.00016578867080240008, "loss": 1.0305, "step": 1688 }, { "epoch": 0.3433624720471641, "grad_norm": 0.14235566556453705, "learning_rate": 0.00016576833112986883, "loss": 1.1916, "step": 1689 }, { "epoch": 0.3435657653994714, "grad_norm": 0.12602582573890686, "learning_rate": 0.00016574799145733756, "loss": 1.0003, "step": 1690 }, { "epoch": 0.3437690587517788, "grad_norm": 0.1448718011379242, "learning_rate": 0.00016572765178480628, "loss": 1.1201, "step": 1691 }, { "epoch": 0.3439723521040862, "grad_norm": 0.12688006460666656, "learning_rate": 0.00016570731211227498, "loss": 0.9846, "step": 1692 }, { "epoch": 0.34417564545639356, "grad_norm": 0.12715177237987518, "learning_rate": 0.00016568697243974373, "loss": 1.1093, "step": 1693 }, { "epoch": 0.34437893880870096, "grad_norm": 0.14105954766273499, "learning_rate": 0.00016566663276721246, "loss": 1.2714, "step": 1694 }, { "epoch": 0.34458223216100836, "grad_norm": 0.12558870017528534, "learning_rate": 0.00016564629309468118, "loss": 0.9724, "step": 1695 }, { "epoch": 0.3447855255133157, "grad_norm": 0.11886492371559143, "learning_rate": 0.0001656259534221499, "loss": 1.1076, "step": 1696 }, { "epoch": 0.3449888188656231, "grad_norm": 0.13078825175762177, "learning_rate": 0.00016560561374961866, "loss": 1.1457, "step": 1697 }, { "epoch": 0.34519211221793045, "grad_norm": 0.12331999093294144, "learning_rate": 0.00016558527407708738, "loss": 1.0065, "step": 1698 }, { "epoch": 0.34539540557023785, "grad_norm": 0.12109193205833435, "learning_rate": 0.0001655649344045561, "loss": 1.1099, "step": 1699 }, { "epoch": 0.34559869892254524, "grad_norm": 0.1176178902387619, "learning_rate": 0.0001655445947320248, "loss": 0.9692, "step": 1700 }, { "epoch": 0.3458019922748526, "grad_norm": 0.1067582294344902, "learning_rate": 0.00016552425505949355, "loss": 0.8452, "step": 1701 }, { "epoch": 0.34600528562716, "grad_norm": 0.11509659141302109, "learning_rate": 0.00016550391538696228, "loss": 0.9062, "step": 1702 }, { "epoch": 0.3462085789794674, "grad_norm": 0.12043119221925735, "learning_rate": 0.000165483575714431, "loss": 1.112, "step": 1703 }, { "epoch": 0.34641187233177473, "grad_norm": 0.12769265472888947, "learning_rate": 0.00016546323604189973, "loss": 1.1163, "step": 1704 }, { "epoch": 0.3466151656840821, "grad_norm": 0.13460403680801392, "learning_rate": 0.00016544289636936848, "loss": 1.2376, "step": 1705 }, { "epoch": 0.3468184590363895, "grad_norm": 0.11211954802274704, "learning_rate": 0.0001654225566968372, "loss": 0.8811, "step": 1706 }, { "epoch": 0.34702175238869687, "grad_norm": 0.1208495944738388, "learning_rate": 0.00016540221702430593, "loss": 0.9858, "step": 1707 }, { "epoch": 0.34722504574100427, "grad_norm": 0.13525189459323883, "learning_rate": 0.00016538187735177463, "loss": 1.0372, "step": 1708 }, { "epoch": 0.34742833909331167, "grad_norm": 0.11987826973199844, "learning_rate": 0.00016536153767924335, "loss": 1.0966, "step": 1709 }, { "epoch": 0.347631632445619, "grad_norm": 0.12538312375545502, "learning_rate": 0.0001653411980067121, "loss": 1.0718, "step": 1710 }, { "epoch": 0.3478349257979264, "grad_norm": 0.12674830853939056, "learning_rate": 0.00016532085833418083, "loss": 1.09, "step": 1711 }, { "epoch": 0.3480382191502338, "grad_norm": 0.11861549317836761, "learning_rate": 0.00016530051866164955, "loss": 0.9924, "step": 1712 }, { "epoch": 0.34824151250254115, "grad_norm": 0.12545670568943024, "learning_rate": 0.00016528017898911828, "loss": 1.1363, "step": 1713 }, { "epoch": 0.34844480585484855, "grad_norm": 0.12180805951356888, "learning_rate": 0.00016525983931658703, "loss": 0.9195, "step": 1714 }, { "epoch": 0.34864809920715595, "grad_norm": 0.14458616077899933, "learning_rate": 0.00016523949964405575, "loss": 1.0096, "step": 1715 }, { "epoch": 0.3488513925594633, "grad_norm": 0.13006000220775604, "learning_rate": 0.00016521915997152445, "loss": 1.1037, "step": 1716 }, { "epoch": 0.3490546859117707, "grad_norm": 0.11734442412853241, "learning_rate": 0.00016519882029899317, "loss": 0.9942, "step": 1717 }, { "epoch": 0.3492579792640781, "grad_norm": 0.10168986022472382, "learning_rate": 0.00016517848062646193, "loss": 0.8829, "step": 1718 }, { "epoch": 0.34946127261638543, "grad_norm": 0.13804613053798676, "learning_rate": 0.00016515814095393065, "loss": 1.021, "step": 1719 }, { "epoch": 0.34966456596869283, "grad_norm": 0.13653217256069183, "learning_rate": 0.00016513780128139937, "loss": 1.0905, "step": 1720 }, { "epoch": 0.34986785932100023, "grad_norm": 0.12326166778802872, "learning_rate": 0.0001651174616088681, "loss": 1.0843, "step": 1721 }, { "epoch": 0.3500711526733076, "grad_norm": 0.1265186071395874, "learning_rate": 0.00016509712193633685, "loss": 1.1012, "step": 1722 }, { "epoch": 0.35027444602561497, "grad_norm": 0.12159296125173569, "learning_rate": 0.00016507678226380557, "loss": 1.0491, "step": 1723 }, { "epoch": 0.3504777393779223, "grad_norm": 0.12199139595031738, "learning_rate": 0.00016505644259127427, "loss": 1.1179, "step": 1724 }, { "epoch": 0.3506810327302297, "grad_norm": 0.13243111968040466, "learning_rate": 0.000165036102918743, "loss": 1.0576, "step": 1725 }, { "epoch": 0.3508843260825371, "grad_norm": 0.1342582106590271, "learning_rate": 0.00016501576324621175, "loss": 1.1035, "step": 1726 }, { "epoch": 0.35108761943484446, "grad_norm": 0.15361081063747406, "learning_rate": 0.00016499542357368047, "loss": 1.1772, "step": 1727 }, { "epoch": 0.35129091278715185, "grad_norm": 0.1446637064218521, "learning_rate": 0.0001649750839011492, "loss": 1.0995, "step": 1728 }, { "epoch": 0.35149420613945925, "grad_norm": 0.12943841516971588, "learning_rate": 0.00016495474422861792, "loss": 1.0938, "step": 1729 }, { "epoch": 0.3516974994917666, "grad_norm": 0.11111871153116226, "learning_rate": 0.00016493440455608667, "loss": 0.8568, "step": 1730 }, { "epoch": 0.351900792844074, "grad_norm": 0.12905767560005188, "learning_rate": 0.0001649140648835554, "loss": 1.0485, "step": 1731 }, { "epoch": 0.3521040861963814, "grad_norm": 0.14140938222408295, "learning_rate": 0.00016489372521102412, "loss": 1.0996, "step": 1732 }, { "epoch": 0.35230737954868874, "grad_norm": 0.120769202709198, "learning_rate": 0.00016487338553849282, "loss": 1.0729, "step": 1733 }, { "epoch": 0.35251067290099614, "grad_norm": 0.1240081861615181, "learning_rate": 0.00016485304586596157, "loss": 1.1505, "step": 1734 }, { "epoch": 0.35271396625330353, "grad_norm": 0.128762885928154, "learning_rate": 0.0001648327061934303, "loss": 1.09, "step": 1735 }, { "epoch": 0.3529172596056109, "grad_norm": 0.13550743460655212, "learning_rate": 0.00016481236652089902, "loss": 1.1706, "step": 1736 }, { "epoch": 0.3531205529579183, "grad_norm": 0.13279037177562714, "learning_rate": 0.00016479202684836774, "loss": 1.2231, "step": 1737 }, { "epoch": 0.3533238463102257, "grad_norm": 0.11756809800863266, "learning_rate": 0.0001647716871758365, "loss": 1.0519, "step": 1738 }, { "epoch": 0.353527139662533, "grad_norm": 0.11612554639577866, "learning_rate": 0.00016475134750330522, "loss": 0.9618, "step": 1739 }, { "epoch": 0.3537304330148404, "grad_norm": 0.12984800338745117, "learning_rate": 0.00016473100783077394, "loss": 1.1865, "step": 1740 }, { "epoch": 0.3539337263671478, "grad_norm": 0.12334571778774261, "learning_rate": 0.00016471066815824264, "loss": 1.1693, "step": 1741 }, { "epoch": 0.35413701971945516, "grad_norm": 0.13324569165706635, "learning_rate": 0.0001646903284857114, "loss": 1.0449, "step": 1742 }, { "epoch": 0.35434031307176256, "grad_norm": 0.12119297683238983, "learning_rate": 0.00016466998881318012, "loss": 0.9664, "step": 1743 }, { "epoch": 0.35454360642406996, "grad_norm": 0.12139979749917984, "learning_rate": 0.00016464964914064884, "loss": 1.1399, "step": 1744 }, { "epoch": 0.3547468997763773, "grad_norm": 0.13679492473602295, "learning_rate": 0.00016462930946811757, "loss": 1.2815, "step": 1745 }, { "epoch": 0.3549501931286847, "grad_norm": 0.14377973973751068, "learning_rate": 0.00016460896979558632, "loss": 1.1322, "step": 1746 }, { "epoch": 0.3551534864809921, "grad_norm": 0.130259171128273, "learning_rate": 0.00016458863012305504, "loss": 1.0832, "step": 1747 }, { "epoch": 0.35535677983329944, "grad_norm": 0.14110639691352844, "learning_rate": 0.00016456829045052377, "loss": 1.0277, "step": 1748 }, { "epoch": 0.35556007318560684, "grad_norm": 0.1513645052909851, "learning_rate": 0.00016454795077799246, "loss": 1.1862, "step": 1749 }, { "epoch": 0.3557633665379142, "grad_norm": 0.1150139644742012, "learning_rate": 0.0001645276111054612, "loss": 1.1027, "step": 1750 }, { "epoch": 0.3559666598902216, "grad_norm": 0.11318166553974152, "learning_rate": 0.00016450727143292994, "loss": 1.0458, "step": 1751 }, { "epoch": 0.356169953242529, "grad_norm": 0.12470010668039322, "learning_rate": 0.00016448693176039867, "loss": 1.0969, "step": 1752 }, { "epoch": 0.3563732465948363, "grad_norm": 0.12987849116325378, "learning_rate": 0.0001644665920878674, "loss": 1.2132, "step": 1753 }, { "epoch": 0.3565765399471437, "grad_norm": 0.1229574978351593, "learning_rate": 0.00016444625241533611, "loss": 1.01, "step": 1754 }, { "epoch": 0.3567798332994511, "grad_norm": 0.12658950686454773, "learning_rate": 0.00016442591274280487, "loss": 1.1362, "step": 1755 }, { "epoch": 0.35698312665175846, "grad_norm": 0.13322791457176208, "learning_rate": 0.0001644055730702736, "loss": 1.1059, "step": 1756 }, { "epoch": 0.35718642000406586, "grad_norm": 0.12812237441539764, "learning_rate": 0.0001643852333977423, "loss": 0.9566, "step": 1757 }, { "epoch": 0.35738971335637326, "grad_norm": 0.12379775196313858, "learning_rate": 0.000164364893725211, "loss": 1.0166, "step": 1758 }, { "epoch": 0.3575930067086806, "grad_norm": 0.12064617872238159, "learning_rate": 0.00016434455405267976, "loss": 1.001, "step": 1759 }, { "epoch": 0.357796300060988, "grad_norm": 0.12102466076612473, "learning_rate": 0.0001643242143801485, "loss": 1.1048, "step": 1760 }, { "epoch": 0.3579995934132954, "grad_norm": 0.1230425089597702, "learning_rate": 0.0001643038747076172, "loss": 0.962, "step": 1761 }, { "epoch": 0.35820288676560275, "grad_norm": 0.12632609903812408, "learning_rate": 0.00016428353503508594, "loss": 1.1316, "step": 1762 }, { "epoch": 0.35840618011791014, "grad_norm": 0.1422523409128189, "learning_rate": 0.0001642631953625547, "loss": 1.2044, "step": 1763 }, { "epoch": 0.35860947347021754, "grad_norm": 0.1147986575961113, "learning_rate": 0.0001642428556900234, "loss": 0.9114, "step": 1764 }, { "epoch": 0.3588127668225249, "grad_norm": 0.1134926900267601, "learning_rate": 0.0001642225160174921, "loss": 1.066, "step": 1765 }, { "epoch": 0.3590160601748323, "grad_norm": 0.10886301100254059, "learning_rate": 0.00016420217634496083, "loss": 0.9999, "step": 1766 }, { "epoch": 0.3592193535271397, "grad_norm": 0.12393435835838318, "learning_rate": 0.00016418183667242959, "loss": 1.0631, "step": 1767 }, { "epoch": 0.359422646879447, "grad_norm": 0.1252308487892151, "learning_rate": 0.0001641614969998983, "loss": 1.088, "step": 1768 }, { "epoch": 0.3596259402317544, "grad_norm": 0.13078045845031738, "learning_rate": 0.00016414115732736704, "loss": 1.0634, "step": 1769 }, { "epoch": 0.3598292335840618, "grad_norm": 0.12720254063606262, "learning_rate": 0.00016412081765483576, "loss": 1.1295, "step": 1770 }, { "epoch": 0.36003252693636917, "grad_norm": 0.12251488864421844, "learning_rate": 0.0001641004779823045, "loss": 0.9843, "step": 1771 }, { "epoch": 0.36023582028867657, "grad_norm": 0.12935830652713776, "learning_rate": 0.00016408013830977324, "loss": 1.0496, "step": 1772 }, { "epoch": 0.36043911364098397, "grad_norm": 0.12900424003601074, "learning_rate": 0.00016405979863724193, "loss": 0.9895, "step": 1773 }, { "epoch": 0.3606424069932913, "grad_norm": 0.14351366460323334, "learning_rate": 0.00016403945896471066, "loss": 1.2852, "step": 1774 }, { "epoch": 0.3608457003455987, "grad_norm": 0.12761393189430237, "learning_rate": 0.0001640191192921794, "loss": 1.086, "step": 1775 }, { "epoch": 0.36104899369790605, "grad_norm": 0.1086045354604721, "learning_rate": 0.00016399877961964813, "loss": 0.8948, "step": 1776 }, { "epoch": 0.36125228705021345, "grad_norm": 0.11502155661582947, "learning_rate": 0.00016397843994711686, "loss": 0.8168, "step": 1777 }, { "epoch": 0.36145558040252085, "grad_norm": 0.12591351568698883, "learning_rate": 0.00016395810027458558, "loss": 0.9193, "step": 1778 }, { "epoch": 0.3616588737548282, "grad_norm": 0.1310427188873291, "learning_rate": 0.00016393776060205433, "loss": 0.8922, "step": 1779 }, { "epoch": 0.3618621671071356, "grad_norm": 0.12844674289226532, "learning_rate": 0.00016391742092952306, "loss": 1.213, "step": 1780 }, { "epoch": 0.362065460459443, "grad_norm": 0.12577317655086517, "learning_rate": 0.00016389708125699176, "loss": 0.8967, "step": 1781 }, { "epoch": 0.36226875381175033, "grad_norm": 0.12694710493087769, "learning_rate": 0.00016387674158446048, "loss": 1.0951, "step": 1782 }, { "epoch": 0.36247204716405773, "grad_norm": 0.14658670127391815, "learning_rate": 0.00016385640191192923, "loss": 1.2866, "step": 1783 }, { "epoch": 0.36267534051636513, "grad_norm": 0.11917047947645187, "learning_rate": 0.00016383606223939796, "loss": 1.1441, "step": 1784 }, { "epoch": 0.3628786338686725, "grad_norm": 0.11192582547664642, "learning_rate": 0.00016381572256686668, "loss": 1.0195, "step": 1785 }, { "epoch": 0.36308192722097987, "grad_norm": 0.12563778460025787, "learning_rate": 0.0001637953828943354, "loss": 1.0646, "step": 1786 }, { "epoch": 0.36328522057328727, "grad_norm": 0.11741344630718231, "learning_rate": 0.00016377504322180416, "loss": 1.0123, "step": 1787 }, { "epoch": 0.3634885139255946, "grad_norm": 0.12519432604312897, "learning_rate": 0.00016375470354927288, "loss": 0.9654, "step": 1788 }, { "epoch": 0.363691807277902, "grad_norm": 0.13249295949935913, "learning_rate": 0.0001637343638767416, "loss": 1.1636, "step": 1789 }, { "epoch": 0.3638951006302094, "grad_norm": 0.14184780418872833, "learning_rate": 0.0001637140242042103, "loss": 1.2066, "step": 1790 }, { "epoch": 0.36409839398251675, "grad_norm": 0.13372722268104553, "learning_rate": 0.00016369368453167903, "loss": 1.0466, "step": 1791 }, { "epoch": 0.36430168733482415, "grad_norm": 0.14696893095970154, "learning_rate": 0.00016367334485914778, "loss": 1.0983, "step": 1792 }, { "epoch": 0.36450498068713155, "grad_norm": 0.1386573314666748, "learning_rate": 0.0001636530051866165, "loss": 1.1763, "step": 1793 }, { "epoch": 0.3647082740394389, "grad_norm": 0.1271977722644806, "learning_rate": 0.00016363266551408523, "loss": 1.0824, "step": 1794 }, { "epoch": 0.3649115673917463, "grad_norm": 0.13254235684871674, "learning_rate": 0.00016361232584155395, "loss": 0.9628, "step": 1795 }, { "epoch": 0.3651148607440537, "grad_norm": 0.1489454209804535, "learning_rate": 0.0001635919861690227, "loss": 1.256, "step": 1796 }, { "epoch": 0.36531815409636104, "grad_norm": 0.11988960951566696, "learning_rate": 0.00016357164649649143, "loss": 1.0667, "step": 1797 }, { "epoch": 0.36552144744866844, "grad_norm": 0.11505492776632309, "learning_rate": 0.00016355130682396013, "loss": 1.037, "step": 1798 }, { "epoch": 0.36572474080097583, "grad_norm": 0.1133279800415039, "learning_rate": 0.00016353096715142885, "loss": 1.0006, "step": 1799 }, { "epoch": 0.3659280341532832, "grad_norm": 0.14962686598300934, "learning_rate": 0.0001635106274788976, "loss": 1.3546, "step": 1800 }, { "epoch": 0.3661313275055906, "grad_norm": 0.13253025710582733, "learning_rate": 0.00016349028780636633, "loss": 1.1064, "step": 1801 }, { "epoch": 0.3663346208578979, "grad_norm": 0.11647074669599533, "learning_rate": 0.00016346994813383505, "loss": 0.9201, "step": 1802 }, { "epoch": 0.3665379142102053, "grad_norm": 0.12080147862434387, "learning_rate": 0.00016344960846130378, "loss": 1.0346, "step": 1803 }, { "epoch": 0.3667412075625127, "grad_norm": 0.12051571905612946, "learning_rate": 0.00016342926878877253, "loss": 1.0125, "step": 1804 }, { "epoch": 0.36694450091482006, "grad_norm": 0.11931899935007095, "learning_rate": 0.00016340892911624125, "loss": 1.0123, "step": 1805 }, { "epoch": 0.36714779426712746, "grad_norm": 0.12983456254005432, "learning_rate": 0.00016338858944370995, "loss": 0.9814, "step": 1806 }, { "epoch": 0.36735108761943486, "grad_norm": 0.14519883692264557, "learning_rate": 0.00016336824977117867, "loss": 1.1524, "step": 1807 }, { "epoch": 0.3675543809717422, "grad_norm": 0.11531595140695572, "learning_rate": 0.00016334791009864742, "loss": 0.9979, "step": 1808 }, { "epoch": 0.3677576743240496, "grad_norm": 0.13013306260108948, "learning_rate": 0.00016332757042611615, "loss": 0.9104, "step": 1809 }, { "epoch": 0.367960967676357, "grad_norm": 0.12455404549837112, "learning_rate": 0.00016330723075358487, "loss": 1.1263, "step": 1810 }, { "epoch": 0.36816426102866434, "grad_norm": 0.1386694461107254, "learning_rate": 0.0001632868910810536, "loss": 1.1143, "step": 1811 }, { "epoch": 0.36836755438097174, "grad_norm": 0.12970969080924988, "learning_rate": 0.00016326655140852235, "loss": 0.9942, "step": 1812 }, { "epoch": 0.36857084773327914, "grad_norm": 0.13323652744293213, "learning_rate": 0.00016324621173599107, "loss": 1.1296, "step": 1813 }, { "epoch": 0.3687741410855865, "grad_norm": 0.12079238891601562, "learning_rate": 0.00016322587206345977, "loss": 0.9846, "step": 1814 }, { "epoch": 0.3689774344378939, "grad_norm": 0.11877255141735077, "learning_rate": 0.0001632055323909285, "loss": 1.0053, "step": 1815 }, { "epoch": 0.3691807277902013, "grad_norm": 0.12340681999921799, "learning_rate": 0.00016318519271839725, "loss": 0.9803, "step": 1816 }, { "epoch": 0.3693840211425086, "grad_norm": 0.11937633156776428, "learning_rate": 0.00016316485304586597, "loss": 1.0217, "step": 1817 }, { "epoch": 0.369587314494816, "grad_norm": 0.12306183576583862, "learning_rate": 0.0001631445133733347, "loss": 1.005, "step": 1818 }, { "epoch": 0.3697906078471234, "grad_norm": 0.1307355761528015, "learning_rate": 0.00016312417370080342, "loss": 1.0241, "step": 1819 }, { "epoch": 0.36999390119943076, "grad_norm": 0.12387688457965851, "learning_rate": 0.00016310383402827217, "loss": 1.0489, "step": 1820 }, { "epoch": 0.37019719455173816, "grad_norm": 0.13056257367134094, "learning_rate": 0.0001630834943557409, "loss": 1.0639, "step": 1821 }, { "epoch": 0.37040048790404556, "grad_norm": 0.1329268217086792, "learning_rate": 0.0001630631546832096, "loss": 1.0949, "step": 1822 }, { "epoch": 0.3706037812563529, "grad_norm": 0.1100173369050026, "learning_rate": 0.00016304281501067832, "loss": 0.9644, "step": 1823 }, { "epoch": 0.3708070746086603, "grad_norm": 0.13045302033424377, "learning_rate": 0.00016302247533814707, "loss": 1.0773, "step": 1824 }, { "epoch": 0.3710103679609677, "grad_norm": 0.12959614396095276, "learning_rate": 0.0001630021356656158, "loss": 1.0633, "step": 1825 }, { "epoch": 0.37121366131327505, "grad_norm": 0.1272924393415451, "learning_rate": 0.00016298179599308452, "loss": 0.9854, "step": 1826 }, { "epoch": 0.37141695466558244, "grad_norm": 0.13959196209907532, "learning_rate": 0.00016296145632055324, "loss": 1.1186, "step": 1827 }, { "epoch": 0.37162024801788984, "grad_norm": 0.120680071413517, "learning_rate": 0.000162941116648022, "loss": 0.9871, "step": 1828 }, { "epoch": 0.3718235413701972, "grad_norm": 0.11955247074365616, "learning_rate": 0.00016292077697549072, "loss": 1.0019, "step": 1829 }, { "epoch": 0.3720268347225046, "grad_norm": 0.13293783366680145, "learning_rate": 0.00016290043730295942, "loss": 1.1245, "step": 1830 }, { "epoch": 0.37223012807481193, "grad_norm": 0.13701294362545013, "learning_rate": 0.00016288009763042814, "loss": 0.9353, "step": 1831 }, { "epoch": 0.3724334214271193, "grad_norm": 0.12601931393146515, "learning_rate": 0.00016285975795789687, "loss": 1.0166, "step": 1832 }, { "epoch": 0.3726367147794267, "grad_norm": 0.12148377299308777, "learning_rate": 0.00016283941828536562, "loss": 1.0335, "step": 1833 }, { "epoch": 0.37284000813173407, "grad_norm": 0.1322852075099945, "learning_rate": 0.00016281907861283434, "loss": 0.9585, "step": 1834 }, { "epoch": 0.37304330148404147, "grad_norm": 0.13737133145332336, "learning_rate": 0.00016279873894030307, "loss": 1.0907, "step": 1835 }, { "epoch": 0.37324659483634887, "grad_norm": 0.12207762897014618, "learning_rate": 0.0001627783992677718, "loss": 1.1334, "step": 1836 }, { "epoch": 0.3734498881886562, "grad_norm": 0.13265001773834229, "learning_rate": 0.00016275805959524054, "loss": 1.1315, "step": 1837 }, { "epoch": 0.3736531815409636, "grad_norm": 0.1349770426750183, "learning_rate": 0.00016273771992270924, "loss": 1.0977, "step": 1838 }, { "epoch": 0.373856474893271, "grad_norm": 0.1335778385400772, "learning_rate": 0.00016271738025017796, "loss": 1.038, "step": 1839 }, { "epoch": 0.37405976824557835, "grad_norm": 0.13259084522724152, "learning_rate": 0.0001626970405776467, "loss": 1.0422, "step": 1840 }, { "epoch": 0.37426306159788575, "grad_norm": 0.13083282113075256, "learning_rate": 0.00016267670090511544, "loss": 1.0672, "step": 1841 }, { "epoch": 0.37446635495019315, "grad_norm": 0.12019068002700806, "learning_rate": 0.00016265636123258416, "loss": 0.8895, "step": 1842 }, { "epoch": 0.3746696483025005, "grad_norm": 0.12882567942142487, "learning_rate": 0.0001626360215600529, "loss": 1.071, "step": 1843 }, { "epoch": 0.3748729416548079, "grad_norm": 0.12891016900539398, "learning_rate": 0.00016261568188752161, "loss": 0.9553, "step": 1844 }, { "epoch": 0.3750762350071153, "grad_norm": 0.12769286334514618, "learning_rate": 0.00016259534221499037, "loss": 1.0616, "step": 1845 }, { "epoch": 0.37527952835942263, "grad_norm": 0.14067451655864716, "learning_rate": 0.00016257500254245906, "loss": 1.2434, "step": 1846 }, { "epoch": 0.37548282171173003, "grad_norm": 0.14272430539131165, "learning_rate": 0.0001625546628699278, "loss": 1.1185, "step": 1847 }, { "epoch": 0.37568611506403743, "grad_norm": 0.1343206763267517, "learning_rate": 0.0001625343231973965, "loss": 1.1766, "step": 1848 }, { "epoch": 0.3758894084163448, "grad_norm": 0.11800689250230789, "learning_rate": 0.00016251398352486526, "loss": 1.0509, "step": 1849 }, { "epoch": 0.37609270176865217, "grad_norm": 0.126071959733963, "learning_rate": 0.000162493643852334, "loss": 1.0304, "step": 1850 }, { "epoch": 0.37629599512095957, "grad_norm": 0.1479204297065735, "learning_rate": 0.0001624733041798027, "loss": 1.2349, "step": 1851 }, { "epoch": 0.3764992884732669, "grad_norm": 0.1391003429889679, "learning_rate": 0.00016245296450727144, "loss": 0.9928, "step": 1852 }, { "epoch": 0.3767025818255743, "grad_norm": 0.14163215458393097, "learning_rate": 0.0001624326248347402, "loss": 1.2806, "step": 1853 }, { "epoch": 0.3769058751778817, "grad_norm": 0.113652303814888, "learning_rate": 0.0001624122851622089, "loss": 0.9222, "step": 1854 }, { "epoch": 0.37710916853018905, "grad_norm": 0.13163653016090393, "learning_rate": 0.0001623919454896776, "loss": 0.9817, "step": 1855 }, { "epoch": 0.37731246188249645, "grad_norm": 0.12150076776742935, "learning_rate": 0.00016237160581714633, "loss": 1.0522, "step": 1856 }, { "epoch": 0.3775157552348038, "grad_norm": 0.12493383884429932, "learning_rate": 0.00016235126614461509, "loss": 1.1715, "step": 1857 }, { "epoch": 0.3777190485871112, "grad_norm": 0.12059423327445984, "learning_rate": 0.0001623309264720838, "loss": 1.0005, "step": 1858 }, { "epoch": 0.3779223419394186, "grad_norm": 0.13585112988948822, "learning_rate": 0.00016231058679955253, "loss": 1.0747, "step": 1859 }, { "epoch": 0.37812563529172594, "grad_norm": 0.13678506016731262, "learning_rate": 0.00016229024712702126, "loss": 1.2324, "step": 1860 }, { "epoch": 0.37832892864403334, "grad_norm": 0.13325399160385132, "learning_rate": 0.00016226990745449, "loss": 1.0719, "step": 1861 }, { "epoch": 0.37853222199634073, "grad_norm": 0.13250133395195007, "learning_rate": 0.00016224956778195874, "loss": 1.2482, "step": 1862 }, { "epoch": 0.3787355153486481, "grad_norm": 0.13788394629955292, "learning_rate": 0.00016222922810942743, "loss": 1.209, "step": 1863 }, { "epoch": 0.3789388087009555, "grad_norm": 0.13350476324558258, "learning_rate": 0.00016220888843689616, "loss": 1.133, "step": 1864 }, { "epoch": 0.3791421020532629, "grad_norm": 0.13107924163341522, "learning_rate": 0.0001621885487643649, "loss": 1.0644, "step": 1865 }, { "epoch": 0.3793453954055702, "grad_norm": 0.12230812013149261, "learning_rate": 0.00016216820909183363, "loss": 0.8282, "step": 1866 }, { "epoch": 0.3795486887578776, "grad_norm": 0.11637227237224579, "learning_rate": 0.00016214786941930236, "loss": 0.9427, "step": 1867 }, { "epoch": 0.379751982110185, "grad_norm": 0.12177541106939316, "learning_rate": 0.00016212752974677108, "loss": 1.1639, "step": 1868 }, { "epoch": 0.37995527546249236, "grad_norm": 0.149050772190094, "learning_rate": 0.00016210719007423983, "loss": 1.225, "step": 1869 }, { "epoch": 0.38015856881479976, "grad_norm": 0.12929648160934448, "learning_rate": 0.00016208685040170856, "loss": 0.9552, "step": 1870 }, { "epoch": 0.38036186216710716, "grad_norm": 0.13179321587085724, "learning_rate": 0.00016206651072917726, "loss": 1.1057, "step": 1871 }, { "epoch": 0.3805651555194145, "grad_norm": 0.15877602994441986, "learning_rate": 0.00016204617105664598, "loss": 1.2518, "step": 1872 }, { "epoch": 0.3807684488717219, "grad_norm": 0.12015218287706375, "learning_rate": 0.00016202583138411473, "loss": 0.9414, "step": 1873 }, { "epoch": 0.3809717422240293, "grad_norm": 0.11854024976491928, "learning_rate": 0.00016200549171158346, "loss": 0.8964, "step": 1874 }, { "epoch": 0.38117503557633664, "grad_norm": 0.1399824023246765, "learning_rate": 0.00016198515203905218, "loss": 1.1772, "step": 1875 }, { "epoch": 0.38137832892864404, "grad_norm": 0.13480430841445923, "learning_rate": 0.0001619648123665209, "loss": 1.1182, "step": 1876 }, { "epoch": 0.38158162228095144, "grad_norm": 0.13443569839000702, "learning_rate": 0.00016194447269398963, "loss": 1.1376, "step": 1877 }, { "epoch": 0.3817849156332588, "grad_norm": 0.12492494285106659, "learning_rate": 0.00016192413302145838, "loss": 1.0976, "step": 1878 }, { "epoch": 0.3819882089855662, "grad_norm": 0.1359935700893402, "learning_rate": 0.00016190379334892708, "loss": 1.2954, "step": 1879 }, { "epoch": 0.3821915023378736, "grad_norm": 0.11808416247367859, "learning_rate": 0.0001618834536763958, "loss": 0.9972, "step": 1880 }, { "epoch": 0.3823947956901809, "grad_norm": 0.10978496074676514, "learning_rate": 0.00016186311400386453, "loss": 1.0534, "step": 1881 }, { "epoch": 0.3825980890424883, "grad_norm": 0.13503976166248322, "learning_rate": 0.00016184277433133328, "loss": 1.1353, "step": 1882 }, { "epoch": 0.38280138239479566, "grad_norm": 0.1396964192390442, "learning_rate": 0.000161822434658802, "loss": 1.1936, "step": 1883 }, { "epoch": 0.38300467574710306, "grad_norm": 0.1180117055773735, "learning_rate": 0.00016180209498627073, "loss": 1.1091, "step": 1884 }, { "epoch": 0.38320796909941046, "grad_norm": 0.13506156206130981, "learning_rate": 0.00016178175531373945, "loss": 1.12, "step": 1885 }, { "epoch": 0.3834112624517178, "grad_norm": 0.12191524356603622, "learning_rate": 0.0001617614156412082, "loss": 1.1521, "step": 1886 }, { "epoch": 0.3836145558040252, "grad_norm": 0.13350510597229004, "learning_rate": 0.0001617410759686769, "loss": 1.1081, "step": 1887 }, { "epoch": 0.3838178491563326, "grad_norm": 0.1178809106349945, "learning_rate": 0.00016172073629614563, "loss": 1.0511, "step": 1888 }, { "epoch": 0.38402114250863995, "grad_norm": 0.13383956253528595, "learning_rate": 0.00016170039662361435, "loss": 1.0467, "step": 1889 }, { "epoch": 0.38422443586094734, "grad_norm": 0.12543490529060364, "learning_rate": 0.0001616800569510831, "loss": 1.038, "step": 1890 }, { "epoch": 0.38442772921325474, "grad_norm": 0.12253366410732269, "learning_rate": 0.00016165971727855183, "loss": 1.0494, "step": 1891 }, { "epoch": 0.3846310225655621, "grad_norm": 0.10339358448982239, "learning_rate": 0.00016163937760602055, "loss": 0.8586, "step": 1892 }, { "epoch": 0.3848343159178695, "grad_norm": 0.14473773539066315, "learning_rate": 0.00016161903793348927, "loss": 1.0651, "step": 1893 }, { "epoch": 0.3850376092701769, "grad_norm": 0.12131127715110779, "learning_rate": 0.00016159869826095803, "loss": 1.0674, "step": 1894 }, { "epoch": 0.3852409026224842, "grad_norm": 0.1297827512025833, "learning_rate": 0.00016157835858842672, "loss": 1.1171, "step": 1895 }, { "epoch": 0.3854441959747916, "grad_norm": 0.14175108075141907, "learning_rate": 0.00016155801891589545, "loss": 1.151, "step": 1896 }, { "epoch": 0.385647489327099, "grad_norm": 0.12038639187812805, "learning_rate": 0.00016153767924336417, "loss": 1.0481, "step": 1897 }, { "epoch": 0.38585078267940637, "grad_norm": 0.11626328527927399, "learning_rate": 0.00016151733957083292, "loss": 0.9606, "step": 1898 }, { "epoch": 0.38605407603171377, "grad_norm": 0.127833291888237, "learning_rate": 0.00016149699989830165, "loss": 1.0139, "step": 1899 }, { "epoch": 0.38625736938402117, "grad_norm": 0.13045917451381683, "learning_rate": 0.00016147666022577037, "loss": 1.035, "step": 1900 }, { "epoch": 0.3864606627363285, "grad_norm": 0.1294708251953125, "learning_rate": 0.0001614563205532391, "loss": 1.1797, "step": 1901 }, { "epoch": 0.3866639560886359, "grad_norm": 0.12301066517829895, "learning_rate": 0.00016143598088070785, "loss": 1.0155, "step": 1902 }, { "epoch": 0.3868672494409433, "grad_norm": 0.12555493414402008, "learning_rate": 0.00016141564120817655, "loss": 1.0664, "step": 1903 }, { "epoch": 0.38707054279325065, "grad_norm": 0.1144699901342392, "learning_rate": 0.00016139530153564527, "loss": 0.9436, "step": 1904 }, { "epoch": 0.38727383614555805, "grad_norm": 0.12643945217132568, "learning_rate": 0.000161374961863114, "loss": 1.0495, "step": 1905 }, { "epoch": 0.38747712949786545, "grad_norm": 0.13180217146873474, "learning_rate": 0.00016135462219058275, "loss": 1.0142, "step": 1906 }, { "epoch": 0.3876804228501728, "grad_norm": 0.12776418030261993, "learning_rate": 0.00016133428251805147, "loss": 1.0508, "step": 1907 }, { "epoch": 0.3878837162024802, "grad_norm": 0.12063184380531311, "learning_rate": 0.0001613139428455202, "loss": 1.0565, "step": 1908 }, { "epoch": 0.38808700955478753, "grad_norm": 0.12819765508174896, "learning_rate": 0.00016129360317298892, "loss": 1.0611, "step": 1909 }, { "epoch": 0.38829030290709493, "grad_norm": 0.12989814579486847, "learning_rate": 0.00016127326350045767, "loss": 0.9904, "step": 1910 }, { "epoch": 0.38849359625940233, "grad_norm": 0.11423414200544357, "learning_rate": 0.0001612529238279264, "loss": 0.9869, "step": 1911 }, { "epoch": 0.3886968896117097, "grad_norm": 0.14000189304351807, "learning_rate": 0.0001612325841553951, "loss": 1.0534, "step": 1912 }, { "epoch": 0.38890018296401707, "grad_norm": 0.13626928627490997, "learning_rate": 0.00016121224448286382, "loss": 1.2318, "step": 1913 }, { "epoch": 0.38910347631632447, "grad_norm": 0.15019413828849792, "learning_rate": 0.00016119190481033257, "loss": 1.2027, "step": 1914 }, { "epoch": 0.3893067696686318, "grad_norm": 0.1226695328950882, "learning_rate": 0.0001611715651378013, "loss": 0.9538, "step": 1915 }, { "epoch": 0.3895100630209392, "grad_norm": 0.12817354500293732, "learning_rate": 0.00016115122546527002, "loss": 1.0928, "step": 1916 }, { "epoch": 0.3897133563732466, "grad_norm": 0.12198452651500702, "learning_rate": 0.00016113088579273874, "loss": 0.9358, "step": 1917 }, { "epoch": 0.38991664972555395, "grad_norm": 0.11782688647508621, "learning_rate": 0.00016111054612020747, "loss": 0.8752, "step": 1918 }, { "epoch": 0.39011994307786135, "grad_norm": 0.10750327259302139, "learning_rate": 0.00016109020644767622, "loss": 0.897, "step": 1919 }, { "epoch": 0.39032323643016875, "grad_norm": 0.12854933738708496, "learning_rate": 0.00016106986677514492, "loss": 1.1868, "step": 1920 }, { "epoch": 0.3905265297824761, "grad_norm": 0.12874183058738708, "learning_rate": 0.00016104952710261364, "loss": 1.1017, "step": 1921 }, { "epoch": 0.3907298231347835, "grad_norm": 0.11504833400249481, "learning_rate": 0.00016102918743008237, "loss": 1.0192, "step": 1922 }, { "epoch": 0.3909331164870909, "grad_norm": 0.13493189215660095, "learning_rate": 0.00016100884775755112, "loss": 1.1141, "step": 1923 }, { "epoch": 0.39113640983939824, "grad_norm": 0.1267276406288147, "learning_rate": 0.00016098850808501984, "loss": 1.1147, "step": 1924 }, { "epoch": 0.39133970319170563, "grad_norm": 0.1132739931344986, "learning_rate": 0.00016096816841248857, "loss": 0.9775, "step": 1925 }, { "epoch": 0.39154299654401303, "grad_norm": 0.1274060159921646, "learning_rate": 0.0001609478287399573, "loss": 1.1937, "step": 1926 }, { "epoch": 0.3917462898963204, "grad_norm": 0.11706088483333588, "learning_rate": 0.00016092748906742604, "loss": 0.9774, "step": 1927 }, { "epoch": 0.3919495832486278, "grad_norm": 0.12287326157093048, "learning_rate": 0.00016090714939489474, "loss": 1.0348, "step": 1928 }, { "epoch": 0.3921528766009352, "grad_norm": 0.12456396222114563, "learning_rate": 0.00016088680972236346, "loss": 0.9097, "step": 1929 }, { "epoch": 0.3923561699532425, "grad_norm": 0.11585330218076706, "learning_rate": 0.0001608664700498322, "loss": 1.0469, "step": 1930 }, { "epoch": 0.3925594633055499, "grad_norm": 0.1536455601453781, "learning_rate": 0.00016084613037730094, "loss": 1.1005, "step": 1931 }, { "epoch": 0.3927627566578573, "grad_norm": 0.12221349030733109, "learning_rate": 0.00016082579070476966, "loss": 0.9721, "step": 1932 }, { "epoch": 0.39296605001016466, "grad_norm": 0.12621140480041504, "learning_rate": 0.0001608054510322384, "loss": 1.0791, "step": 1933 }, { "epoch": 0.39316934336247206, "grad_norm": 0.13487426936626434, "learning_rate": 0.0001607851113597071, "loss": 0.9798, "step": 1934 }, { "epoch": 0.3933726367147794, "grad_norm": 0.13655559718608856, "learning_rate": 0.00016076477168717586, "loss": 1.1909, "step": 1935 }, { "epoch": 0.3935759300670868, "grad_norm": 0.12217934429645538, "learning_rate": 0.00016074443201464456, "loss": 0.9678, "step": 1936 }, { "epoch": 0.3937792234193942, "grad_norm": 0.13106785714626312, "learning_rate": 0.0001607240923421133, "loss": 1.1034, "step": 1937 }, { "epoch": 0.39398251677170154, "grad_norm": 0.13911622762680054, "learning_rate": 0.000160703752669582, "loss": 1.136, "step": 1938 }, { "epoch": 0.39418581012400894, "grad_norm": 0.12952151894569397, "learning_rate": 0.00016068341299705076, "loss": 1.0292, "step": 1939 }, { "epoch": 0.39438910347631634, "grad_norm": 0.12866811454296112, "learning_rate": 0.0001606630733245195, "loss": 1.1424, "step": 1940 }, { "epoch": 0.3945923968286237, "grad_norm": 0.12720058858394623, "learning_rate": 0.0001606427336519882, "loss": 1.1688, "step": 1941 }, { "epoch": 0.3947956901809311, "grad_norm": 0.13742884993553162, "learning_rate": 0.00016062239397945694, "loss": 1.0547, "step": 1942 }, { "epoch": 0.3949989835332385, "grad_norm": 0.12812934815883636, "learning_rate": 0.0001606020543069257, "loss": 1.0, "step": 1943 }, { "epoch": 0.3952022768855458, "grad_norm": 0.1265181452035904, "learning_rate": 0.00016058171463439439, "loss": 1.2084, "step": 1944 }, { "epoch": 0.3954055702378532, "grad_norm": 0.11929038912057877, "learning_rate": 0.0001605613749618631, "loss": 1.0126, "step": 1945 }, { "epoch": 0.3956088635901606, "grad_norm": 0.128428652882576, "learning_rate": 0.00016054103528933183, "loss": 1.1729, "step": 1946 }, { "epoch": 0.39581215694246796, "grad_norm": 0.14802579581737518, "learning_rate": 0.00016052069561680059, "loss": 1.1445, "step": 1947 }, { "epoch": 0.39601545029477536, "grad_norm": 0.1259651482105255, "learning_rate": 0.0001605003559442693, "loss": 1.0906, "step": 1948 }, { "epoch": 0.39621874364708276, "grad_norm": 0.12911193072795868, "learning_rate": 0.00016048001627173803, "loss": 1.0483, "step": 1949 }, { "epoch": 0.3964220369993901, "grad_norm": 0.1306496411561966, "learning_rate": 0.00016045967659920676, "loss": 1.105, "step": 1950 }, { "epoch": 0.3966253303516975, "grad_norm": 0.12834158539772034, "learning_rate": 0.0001604393369266755, "loss": 1.0814, "step": 1951 }, { "epoch": 0.3968286237040049, "grad_norm": 0.11149043589830399, "learning_rate": 0.0001604189972541442, "loss": 0.9313, "step": 1952 }, { "epoch": 0.39703191705631224, "grad_norm": 0.13247650861740112, "learning_rate": 0.00016039865758161293, "loss": 1.0523, "step": 1953 }, { "epoch": 0.39723521040861964, "grad_norm": 0.13839392364025116, "learning_rate": 0.00016037831790908166, "loss": 1.1873, "step": 1954 }, { "epoch": 0.39743850376092704, "grad_norm": 0.13359107077121735, "learning_rate": 0.0001603579782365504, "loss": 1.1791, "step": 1955 }, { "epoch": 0.3976417971132344, "grad_norm": 0.13618066906929016, "learning_rate": 0.00016033763856401913, "loss": 1.1673, "step": 1956 }, { "epoch": 0.3978450904655418, "grad_norm": 0.13163338601589203, "learning_rate": 0.00016031729889148786, "loss": 1.1715, "step": 1957 }, { "epoch": 0.3980483838178492, "grad_norm": 0.12153584510087967, "learning_rate": 0.00016029695921895658, "loss": 1.0582, "step": 1958 }, { "epoch": 0.3982516771701565, "grad_norm": 0.13559706509113312, "learning_rate": 0.0001602766195464253, "loss": 1.1622, "step": 1959 }, { "epoch": 0.3984549705224639, "grad_norm": 0.12290627509355545, "learning_rate": 0.00016025627987389403, "loss": 0.9632, "step": 1960 }, { "epoch": 0.39865826387477127, "grad_norm": 0.1298772245645523, "learning_rate": 0.00016023594020136276, "loss": 1.1786, "step": 1961 }, { "epoch": 0.39886155722707867, "grad_norm": 0.13533517718315125, "learning_rate": 0.00016021560052883148, "loss": 0.9701, "step": 1962 }, { "epoch": 0.39906485057938607, "grad_norm": 0.12389865517616272, "learning_rate": 0.0001601952608563002, "loss": 0.9966, "step": 1963 }, { "epoch": 0.3992681439316934, "grad_norm": 0.11722499132156372, "learning_rate": 0.00016017492118376896, "loss": 0.7836, "step": 1964 }, { "epoch": 0.3994714372840008, "grad_norm": 0.142978236079216, "learning_rate": 0.00016015458151123768, "loss": 1.1237, "step": 1965 }, { "epoch": 0.3996747306363082, "grad_norm": 0.12385619431734085, "learning_rate": 0.0001601342418387064, "loss": 1.0396, "step": 1966 }, { "epoch": 0.39987802398861555, "grad_norm": 0.12524884939193726, "learning_rate": 0.00016011390216617513, "loss": 1.1242, "step": 1967 }, { "epoch": 0.40008131734092295, "grad_norm": 0.13346579670906067, "learning_rate": 0.00016009356249364388, "loss": 1.1125, "step": 1968 }, { "epoch": 0.40028461069323035, "grad_norm": 0.12904873490333557, "learning_rate": 0.00016007322282111258, "loss": 1.0506, "step": 1969 }, { "epoch": 0.4004879040455377, "grad_norm": 0.12711603939533234, "learning_rate": 0.0001600528831485813, "loss": 1.0445, "step": 1970 }, { "epoch": 0.4006911973978451, "grad_norm": 0.13451595604419708, "learning_rate": 0.00016003254347605003, "loss": 1.0552, "step": 1971 }, { "epoch": 0.4008944907501525, "grad_norm": 0.146467387676239, "learning_rate": 0.00016001220380351878, "loss": 1.0093, "step": 1972 }, { "epoch": 0.40109778410245983, "grad_norm": 0.12398801743984222, "learning_rate": 0.0001599918641309875, "loss": 0.9897, "step": 1973 }, { "epoch": 0.40130107745476723, "grad_norm": 0.10917028784751892, "learning_rate": 0.00015997152445845623, "loss": 0.8371, "step": 1974 }, { "epoch": 0.40150437080707463, "grad_norm": 0.14363138377666473, "learning_rate": 0.00015995118478592495, "loss": 1.0559, "step": 1975 }, { "epoch": 0.40170766415938197, "grad_norm": 0.12954387068748474, "learning_rate": 0.0001599308451133937, "loss": 1.1048, "step": 1976 }, { "epoch": 0.40191095751168937, "grad_norm": 0.12329546362161636, "learning_rate": 0.0001599105054408624, "loss": 1.0156, "step": 1977 }, { "epoch": 0.40211425086399677, "grad_norm": 0.11060404032468796, "learning_rate": 0.00015989016576833113, "loss": 0.8623, "step": 1978 }, { "epoch": 0.4023175442163041, "grad_norm": 0.1489768773317337, "learning_rate": 0.00015986982609579985, "loss": 1.0878, "step": 1979 }, { "epoch": 0.4025208375686115, "grad_norm": 0.12081994861364365, "learning_rate": 0.0001598494864232686, "loss": 0.9822, "step": 1980 }, { "epoch": 0.4027241309209189, "grad_norm": 0.14777058362960815, "learning_rate": 0.00015982914675073733, "loss": 1.2367, "step": 1981 }, { "epoch": 0.40292742427322625, "grad_norm": 0.12689609825611115, "learning_rate": 0.00015980880707820605, "loss": 1.1029, "step": 1982 }, { "epoch": 0.40313071762553365, "grad_norm": 0.12985149025917053, "learning_rate": 0.00015978846740567477, "loss": 1.1604, "step": 1983 }, { "epoch": 0.40333401097784105, "grad_norm": 0.11619044095277786, "learning_rate": 0.00015976812773314353, "loss": 0.9736, "step": 1984 }, { "epoch": 0.4035373043301484, "grad_norm": 0.13137032091617584, "learning_rate": 0.00015974778806061222, "loss": 1.0877, "step": 1985 }, { "epoch": 0.4037405976824558, "grad_norm": 0.13488256931304932, "learning_rate": 0.00015972744838808095, "loss": 1.2048, "step": 1986 }, { "epoch": 0.40394389103476314, "grad_norm": 0.2605299949645996, "learning_rate": 0.00015970710871554967, "loss": 0.9133, "step": 1987 }, { "epoch": 0.40414718438707053, "grad_norm": 0.13198648393154144, "learning_rate": 0.00015968676904301842, "loss": 0.9512, "step": 1988 }, { "epoch": 0.40435047773937793, "grad_norm": 0.1144537478685379, "learning_rate": 0.00015966642937048715, "loss": 1.0429, "step": 1989 }, { "epoch": 0.4045537710916853, "grad_norm": 0.1217435896396637, "learning_rate": 0.00015964608969795587, "loss": 1.0744, "step": 1990 }, { "epoch": 0.4047570644439927, "grad_norm": 0.12606003880500793, "learning_rate": 0.0001596257500254246, "loss": 1.0945, "step": 1991 }, { "epoch": 0.4049603577963001, "grad_norm": 0.13098153471946716, "learning_rate": 0.00015960541035289335, "loss": 1.2815, "step": 1992 }, { "epoch": 0.4051636511486074, "grad_norm": 0.11636700481176376, "learning_rate": 0.00015958507068036205, "loss": 0.9849, "step": 1993 }, { "epoch": 0.4053669445009148, "grad_norm": 0.12562847137451172, "learning_rate": 0.00015956473100783077, "loss": 1.0519, "step": 1994 }, { "epoch": 0.4055702378532222, "grad_norm": 0.13366295397281647, "learning_rate": 0.0001595443913352995, "loss": 1.2831, "step": 1995 }, { "epoch": 0.40577353120552956, "grad_norm": 0.12537652254104614, "learning_rate": 0.00015952405166276825, "loss": 0.9735, "step": 1996 }, { "epoch": 0.40597682455783696, "grad_norm": 0.12336364388465881, "learning_rate": 0.00015950371199023697, "loss": 1.0355, "step": 1997 }, { "epoch": 0.40618011791014436, "grad_norm": 0.13342751562595367, "learning_rate": 0.0001594833723177057, "loss": 1.1506, "step": 1998 }, { "epoch": 0.4063834112624517, "grad_norm": 0.13104167580604553, "learning_rate": 0.00015946303264517442, "loss": 1.2302, "step": 1999 }, { "epoch": 0.4065867046147591, "grad_norm": 0.1287468671798706, "learning_rate": 0.00015944269297264314, "loss": 1.1503, "step": 2000 }, { "epoch": 0.4067899979670665, "grad_norm": 0.1316487044095993, "learning_rate": 0.00015942235330011187, "loss": 1.0732, "step": 2001 }, { "epoch": 0.40699329131937384, "grad_norm": 0.12111165374517441, "learning_rate": 0.0001594020136275806, "loss": 0.9384, "step": 2002 }, { "epoch": 0.40719658467168124, "grad_norm": 0.11894603073596954, "learning_rate": 0.00015938167395504932, "loss": 1.0157, "step": 2003 }, { "epoch": 0.40739987802398864, "grad_norm": 0.13228829205036163, "learning_rate": 0.00015936133428251804, "loss": 1.11, "step": 2004 }, { "epoch": 0.407603171376296, "grad_norm": 0.12275683134794235, "learning_rate": 0.0001593409946099868, "loss": 1.0183, "step": 2005 }, { "epoch": 0.4078064647286034, "grad_norm": 0.1273687779903412, "learning_rate": 0.00015932065493745552, "loss": 1.024, "step": 2006 }, { "epoch": 0.4080097580809108, "grad_norm": 0.13763071596622467, "learning_rate": 0.00015930031526492424, "loss": 1.1465, "step": 2007 }, { "epoch": 0.4082130514332181, "grad_norm": 0.13428914546966553, "learning_rate": 0.00015927997559239297, "loss": 1.1119, "step": 2008 }, { "epoch": 0.4084163447855255, "grad_norm": 0.12992502748966217, "learning_rate": 0.0001592596359198617, "loss": 1.0341, "step": 2009 }, { "epoch": 0.4086196381378329, "grad_norm": 0.11928235739469528, "learning_rate": 0.00015923929624733042, "loss": 0.9454, "step": 2010 }, { "epoch": 0.40882293149014026, "grad_norm": 0.13093991577625275, "learning_rate": 0.00015921895657479914, "loss": 0.9528, "step": 2011 }, { "epoch": 0.40902622484244766, "grad_norm": 0.1252833604812622, "learning_rate": 0.00015919861690226787, "loss": 1.0849, "step": 2012 }, { "epoch": 0.409229518194755, "grad_norm": 0.13304093480110168, "learning_rate": 0.00015917827722973662, "loss": 1.1173, "step": 2013 }, { "epoch": 0.4094328115470624, "grad_norm": 0.11735294759273529, "learning_rate": 0.00015915793755720534, "loss": 0.9961, "step": 2014 }, { "epoch": 0.4096361048993698, "grad_norm": 0.133205845952034, "learning_rate": 0.00015913759788467407, "loss": 1.0818, "step": 2015 }, { "epoch": 0.40983939825167714, "grad_norm": 0.12949281930923462, "learning_rate": 0.0001591172582121428, "loss": 1.0151, "step": 2016 }, { "epoch": 0.41004269160398454, "grad_norm": 0.12819421291351318, "learning_rate": 0.00015909691853961151, "loss": 1.0449, "step": 2017 }, { "epoch": 0.41024598495629194, "grad_norm": 0.12694479525089264, "learning_rate": 0.00015907657886708024, "loss": 0.9756, "step": 2018 }, { "epoch": 0.4104492783085993, "grad_norm": 0.11785703897476196, "learning_rate": 0.00015905623919454896, "loss": 0.8295, "step": 2019 }, { "epoch": 0.4106525716609067, "grad_norm": 0.12432985007762909, "learning_rate": 0.0001590358995220177, "loss": 1.0375, "step": 2020 }, { "epoch": 0.4108558650132141, "grad_norm": 0.12006914615631104, "learning_rate": 0.00015901555984948644, "loss": 0.9818, "step": 2021 }, { "epoch": 0.4110591583655214, "grad_norm": 0.13180270791053772, "learning_rate": 0.00015899522017695516, "loss": 1.165, "step": 2022 }, { "epoch": 0.4112624517178288, "grad_norm": 0.13662603497505188, "learning_rate": 0.0001589748805044239, "loss": 1.0978, "step": 2023 }, { "epoch": 0.4114657450701362, "grad_norm": 0.137676402926445, "learning_rate": 0.0001589545408318926, "loss": 1.0695, "step": 2024 }, { "epoch": 0.41166903842244357, "grad_norm": 0.13281960785388947, "learning_rate": 0.00015893420115936136, "loss": 0.9717, "step": 2025 }, { "epoch": 0.41187233177475097, "grad_norm": 0.1159568652510643, "learning_rate": 0.00015891386148683006, "loss": 0.9863, "step": 2026 }, { "epoch": 0.41207562512705836, "grad_norm": 0.12235623598098755, "learning_rate": 0.00015889352181429879, "loss": 1.167, "step": 2027 }, { "epoch": 0.4122789184793657, "grad_norm": 0.11709940433502197, "learning_rate": 0.0001588731821417675, "loss": 1.0618, "step": 2028 }, { "epoch": 0.4124822118316731, "grad_norm": 0.13078409433364868, "learning_rate": 0.00015885284246923626, "loss": 1.1512, "step": 2029 }, { "epoch": 0.4126855051839805, "grad_norm": 0.12789343297481537, "learning_rate": 0.000158832502796705, "loss": 1.2586, "step": 2030 }, { "epoch": 0.41288879853628785, "grad_norm": 0.12031058967113495, "learning_rate": 0.0001588121631241737, "loss": 0.9543, "step": 2031 }, { "epoch": 0.41309209188859525, "grad_norm": 0.1303958296775818, "learning_rate": 0.00015879182345164244, "loss": 1.1823, "step": 2032 }, { "epoch": 0.41329538524090265, "grad_norm": 0.12915648519992828, "learning_rate": 0.0001587714837791112, "loss": 1.1199, "step": 2033 }, { "epoch": 0.41349867859321, "grad_norm": 0.13749873638153076, "learning_rate": 0.00015875114410657988, "loss": 1.1359, "step": 2034 }, { "epoch": 0.4137019719455174, "grad_norm": 0.120378777384758, "learning_rate": 0.0001587308044340486, "loss": 1.0311, "step": 2035 }, { "epoch": 0.4139052652978248, "grad_norm": 0.13083983957767487, "learning_rate": 0.00015871046476151733, "loss": 1.1491, "step": 2036 }, { "epoch": 0.41410855865013213, "grad_norm": 0.1264946162700653, "learning_rate": 0.00015869012508898609, "loss": 1.0575, "step": 2037 }, { "epoch": 0.41431185200243953, "grad_norm": 0.13813161849975586, "learning_rate": 0.0001586697854164548, "loss": 1.1596, "step": 2038 }, { "epoch": 0.4145151453547469, "grad_norm": 0.1298746019601822, "learning_rate": 0.00015864944574392353, "loss": 1.0107, "step": 2039 }, { "epoch": 0.41471843870705427, "grad_norm": 0.13159529864788055, "learning_rate": 0.00015862910607139226, "loss": 0.9777, "step": 2040 }, { "epoch": 0.41492173205936167, "grad_norm": 0.13329805433750153, "learning_rate": 0.00015860876639886098, "loss": 1.1749, "step": 2041 }, { "epoch": 0.415125025411669, "grad_norm": 0.12069873511791229, "learning_rate": 0.0001585884267263297, "loss": 1.1532, "step": 2042 }, { "epoch": 0.4153283187639764, "grad_norm": 0.11439201235771179, "learning_rate": 0.00015856808705379843, "loss": 0.8484, "step": 2043 }, { "epoch": 0.4155316121162838, "grad_norm": 0.14021088182926178, "learning_rate": 0.00015854774738126716, "loss": 0.9592, "step": 2044 }, { "epoch": 0.41573490546859115, "grad_norm": 0.12401128560304642, "learning_rate": 0.00015852740770873588, "loss": 0.9658, "step": 2045 }, { "epoch": 0.41593819882089855, "grad_norm": 0.1366535723209381, "learning_rate": 0.00015850706803620463, "loss": 1.1259, "step": 2046 }, { "epoch": 0.41614149217320595, "grad_norm": 0.11328650265932083, "learning_rate": 0.00015848672836367336, "loss": 0.9499, "step": 2047 }, { "epoch": 0.4163447855255133, "grad_norm": 0.13193942606449127, "learning_rate": 0.00015846638869114208, "loss": 1.0833, "step": 2048 }, { "epoch": 0.4165480788778207, "grad_norm": 0.1413910835981369, "learning_rate": 0.0001584460490186108, "loss": 1.0693, "step": 2049 }, { "epoch": 0.4167513722301281, "grad_norm": 0.11957409977912903, "learning_rate": 0.00015842570934607953, "loss": 1.0102, "step": 2050 }, { "epoch": 0.41695466558243544, "grad_norm": 0.12360769510269165, "learning_rate": 0.00015840536967354825, "loss": 1.1312, "step": 2051 }, { "epoch": 0.41715795893474283, "grad_norm": 0.12471318989992142, "learning_rate": 0.00015838503000101698, "loss": 1.1255, "step": 2052 }, { "epoch": 0.41736125228705023, "grad_norm": 0.129171222448349, "learning_rate": 0.0001583646903284857, "loss": 1.103, "step": 2053 }, { "epoch": 0.4175645456393576, "grad_norm": 0.14544697105884552, "learning_rate": 0.00015834435065595446, "loss": 1.0373, "step": 2054 }, { "epoch": 0.417767838991665, "grad_norm": 0.12571415305137634, "learning_rate": 0.00015832401098342318, "loss": 1.0675, "step": 2055 }, { "epoch": 0.4179711323439724, "grad_norm": 0.12805119156837463, "learning_rate": 0.0001583036713108919, "loss": 1.0713, "step": 2056 }, { "epoch": 0.4181744256962797, "grad_norm": 0.1386822909116745, "learning_rate": 0.00015828333163836063, "loss": 1.1084, "step": 2057 }, { "epoch": 0.4183777190485871, "grad_norm": 0.14066076278686523, "learning_rate": 0.00015826299196582935, "loss": 1.2577, "step": 2058 }, { "epoch": 0.4185810124008945, "grad_norm": 0.1231965720653534, "learning_rate": 0.00015824265229329808, "loss": 0.9684, "step": 2059 }, { "epoch": 0.41878430575320186, "grad_norm": 0.11889393627643585, "learning_rate": 0.0001582223126207668, "loss": 1.0393, "step": 2060 }, { "epoch": 0.41898759910550926, "grad_norm": 0.13274893164634705, "learning_rate": 0.00015820197294823553, "loss": 1.1742, "step": 2061 }, { "epoch": 0.41919089245781666, "grad_norm": 0.14034253358840942, "learning_rate": 0.00015818163327570428, "loss": 0.9673, "step": 2062 }, { "epoch": 0.419394185810124, "grad_norm": 0.14197202026844025, "learning_rate": 0.000158161293603173, "loss": 1.0931, "step": 2063 }, { "epoch": 0.4195974791624314, "grad_norm": 0.12458556890487671, "learning_rate": 0.00015814095393064173, "loss": 0.9713, "step": 2064 }, { "epoch": 0.41980077251473874, "grad_norm": 0.1311383694410324, "learning_rate": 0.00015812061425811045, "loss": 1.2641, "step": 2065 }, { "epoch": 0.42000406586704614, "grad_norm": 0.13218726217746735, "learning_rate": 0.00015810027458557918, "loss": 1.2132, "step": 2066 }, { "epoch": 0.42020735921935354, "grad_norm": 0.14619286358356476, "learning_rate": 0.0001580799349130479, "loss": 1.1031, "step": 2067 }, { "epoch": 0.4204106525716609, "grad_norm": 0.13700971007347107, "learning_rate": 0.00015805959524051662, "loss": 1.0364, "step": 2068 }, { "epoch": 0.4206139459239683, "grad_norm": 0.12797488272190094, "learning_rate": 0.00015803925556798535, "loss": 1.1025, "step": 2069 }, { "epoch": 0.4208172392762757, "grad_norm": 0.1373629868030548, "learning_rate": 0.0001580189158954541, "loss": 1.1924, "step": 2070 }, { "epoch": 0.421020532628583, "grad_norm": 0.11743365973234177, "learning_rate": 0.00015799857622292283, "loss": 0.9271, "step": 2071 }, { "epoch": 0.4212238259808904, "grad_norm": 0.12701068818569183, "learning_rate": 0.00015797823655039155, "loss": 1.0388, "step": 2072 }, { "epoch": 0.4214271193331978, "grad_norm": 0.14013634622097015, "learning_rate": 0.00015795789687786027, "loss": 1.1594, "step": 2073 }, { "epoch": 0.42163041268550516, "grad_norm": 0.12524859607219696, "learning_rate": 0.000157937557205329, "loss": 1.0191, "step": 2074 }, { "epoch": 0.42183370603781256, "grad_norm": 0.11082011461257935, "learning_rate": 0.00015791721753279772, "loss": 0.9211, "step": 2075 }, { "epoch": 0.42203699939011996, "grad_norm": 0.12579189240932465, "learning_rate": 0.00015789687786026645, "loss": 1.0269, "step": 2076 }, { "epoch": 0.4222402927424273, "grad_norm": 0.13665060698986053, "learning_rate": 0.00015787653818773517, "loss": 1.0662, "step": 2077 }, { "epoch": 0.4224435860947347, "grad_norm": 0.12775637209415436, "learning_rate": 0.00015785619851520392, "loss": 1.2206, "step": 2078 }, { "epoch": 0.4226468794470421, "grad_norm": 0.13883183896541595, "learning_rate": 0.00015783585884267265, "loss": 1.1484, "step": 2079 }, { "epoch": 0.42285017279934944, "grad_norm": 0.12899038195610046, "learning_rate": 0.00015781551917014137, "loss": 1.128, "step": 2080 }, { "epoch": 0.42305346615165684, "grad_norm": 0.12957079708576202, "learning_rate": 0.0001577951794976101, "loss": 1.1362, "step": 2081 }, { "epoch": 0.42325675950396424, "grad_norm": 0.12619063258171082, "learning_rate": 0.00015777483982507882, "loss": 1.1022, "step": 2082 }, { "epoch": 0.4234600528562716, "grad_norm": 0.12516093254089355, "learning_rate": 0.00015775450015254755, "loss": 1.1149, "step": 2083 }, { "epoch": 0.423663346208579, "grad_norm": 0.12650391459465027, "learning_rate": 0.00015773416048001627, "loss": 0.9535, "step": 2084 }, { "epoch": 0.4238666395608864, "grad_norm": 0.14746572077274323, "learning_rate": 0.000157713820807485, "loss": 1.222, "step": 2085 }, { "epoch": 0.4240699329131937, "grad_norm": 0.10933158546686172, "learning_rate": 0.00015769348113495372, "loss": 0.8832, "step": 2086 }, { "epoch": 0.4242732262655011, "grad_norm": 0.13668397068977356, "learning_rate": 0.00015767314146242247, "loss": 1.1808, "step": 2087 }, { "epoch": 0.4244765196178085, "grad_norm": 0.13031315803527832, "learning_rate": 0.0001576528017898912, "loss": 0.9752, "step": 2088 }, { "epoch": 0.42467981297011587, "grad_norm": 0.14403130114078522, "learning_rate": 0.00015763246211735992, "loss": 1.1372, "step": 2089 }, { "epoch": 0.42488310632242327, "grad_norm": 0.13902884721755981, "learning_rate": 0.00015761212244482864, "loss": 1.1492, "step": 2090 }, { "epoch": 0.42508639967473066, "grad_norm": 0.11546601355075836, "learning_rate": 0.00015759178277229737, "loss": 1.0091, "step": 2091 }, { "epoch": 0.425289693027038, "grad_norm": 0.12731419503688812, "learning_rate": 0.0001575714430997661, "loss": 1.0627, "step": 2092 }, { "epoch": 0.4254929863793454, "grad_norm": 0.11023043841123581, "learning_rate": 0.00015755110342723482, "loss": 0.8111, "step": 2093 }, { "epoch": 0.42569627973165275, "grad_norm": 0.14930586516857147, "learning_rate": 0.00015753076375470354, "loss": 1.206, "step": 2094 }, { "epoch": 0.42589957308396015, "grad_norm": 0.1300898790359497, "learning_rate": 0.0001575104240821723, "loss": 1.0915, "step": 2095 }, { "epoch": 0.42610286643626755, "grad_norm": 0.13914939761161804, "learning_rate": 0.00015749008440964102, "loss": 1.0512, "step": 2096 }, { "epoch": 0.4263061597885749, "grad_norm": 0.15199615061283112, "learning_rate": 0.00015746974473710974, "loss": 1.2611, "step": 2097 }, { "epoch": 0.4265094531408823, "grad_norm": 0.13422483205795288, "learning_rate": 0.00015744940506457847, "loss": 1.1213, "step": 2098 }, { "epoch": 0.4267127464931897, "grad_norm": 0.1285259872674942, "learning_rate": 0.0001574290653920472, "loss": 1.1144, "step": 2099 }, { "epoch": 0.42691603984549703, "grad_norm": 0.11812227219343185, "learning_rate": 0.00015740872571951592, "loss": 1.0033, "step": 2100 }, { "epoch": 0.42711933319780443, "grad_norm": 0.13837237656116486, "learning_rate": 0.00015738838604698464, "loss": 1.111, "step": 2101 }, { "epoch": 0.42732262655011183, "grad_norm": 0.1241428554058075, "learning_rate": 0.00015736804637445336, "loss": 1.0571, "step": 2102 }, { "epoch": 0.42752591990241917, "grad_norm": 0.1367318034172058, "learning_rate": 0.00015734770670192212, "loss": 1.1312, "step": 2103 }, { "epoch": 0.42772921325472657, "grad_norm": 0.12532354891300201, "learning_rate": 0.00015732736702939084, "loss": 1.0407, "step": 2104 }, { "epoch": 0.42793250660703397, "grad_norm": 0.12487448751926422, "learning_rate": 0.00015730702735685957, "loss": 1.072, "step": 2105 }, { "epoch": 0.4281357999593413, "grad_norm": 0.1195039302110672, "learning_rate": 0.0001572866876843283, "loss": 0.9995, "step": 2106 }, { "epoch": 0.4283390933116487, "grad_norm": 0.16366197168827057, "learning_rate": 0.00015726634801179701, "loss": 1.4101, "step": 2107 }, { "epoch": 0.4285423866639561, "grad_norm": 0.1324339210987091, "learning_rate": 0.00015724600833926574, "loss": 1.1455, "step": 2108 }, { "epoch": 0.42874568001626345, "grad_norm": 0.1581498682498932, "learning_rate": 0.00015722566866673446, "loss": 1.3092, "step": 2109 }, { "epoch": 0.42894897336857085, "grad_norm": 0.13514567911624908, "learning_rate": 0.0001572053289942032, "loss": 1.1998, "step": 2110 }, { "epoch": 0.42915226672087825, "grad_norm": 0.1304248571395874, "learning_rate": 0.00015718498932167194, "loss": 1.1488, "step": 2111 }, { "epoch": 0.4293555600731856, "grad_norm": 0.1261410117149353, "learning_rate": 0.00015716464964914066, "loss": 1.128, "step": 2112 }, { "epoch": 0.429558853425493, "grad_norm": 0.12761110067367554, "learning_rate": 0.0001571443099766094, "loss": 1.1816, "step": 2113 }, { "epoch": 0.4297621467778004, "grad_norm": 0.1307440996170044, "learning_rate": 0.0001571239703040781, "loss": 1.1362, "step": 2114 }, { "epoch": 0.42996544013010773, "grad_norm": 0.12130671739578247, "learning_rate": 0.00015710363063154684, "loss": 0.903, "step": 2115 }, { "epoch": 0.43016873348241513, "grad_norm": 0.12277641892433167, "learning_rate": 0.00015708329095901556, "loss": 0.9964, "step": 2116 }, { "epoch": 0.43037202683472253, "grad_norm": 0.13223405182361603, "learning_rate": 0.00015706295128648429, "loss": 0.9409, "step": 2117 }, { "epoch": 0.4305753201870299, "grad_norm": 0.12146423757076263, "learning_rate": 0.000157042611613953, "loss": 1.0116, "step": 2118 }, { "epoch": 0.4307786135393373, "grad_norm": 0.1319243311882019, "learning_rate": 0.00015702227194142176, "loss": 1.1354, "step": 2119 }, { "epoch": 0.4309819068916446, "grad_norm": 0.10945885628461838, "learning_rate": 0.0001570019322688905, "loss": 0.9314, "step": 2120 }, { "epoch": 0.431185200243952, "grad_norm": 0.12822148203849792, "learning_rate": 0.0001569815925963592, "loss": 0.9862, "step": 2121 }, { "epoch": 0.4313884935962594, "grad_norm": 0.13050609827041626, "learning_rate": 0.00015696125292382794, "loss": 1.1618, "step": 2122 }, { "epoch": 0.43159178694856676, "grad_norm": 0.1293252408504486, "learning_rate": 0.00015694091325129666, "loss": 1.0692, "step": 2123 }, { "epoch": 0.43179508030087416, "grad_norm": 0.15059755742549896, "learning_rate": 0.00015692057357876538, "loss": 0.9984, "step": 2124 }, { "epoch": 0.43199837365318156, "grad_norm": 0.13384853303432465, "learning_rate": 0.0001569002339062341, "loss": 1.1272, "step": 2125 }, { "epoch": 0.4322016670054889, "grad_norm": 0.1330154538154602, "learning_rate": 0.00015687989423370283, "loss": 0.958, "step": 2126 }, { "epoch": 0.4324049603577963, "grad_norm": 0.13418689370155334, "learning_rate": 0.00015685955456117156, "loss": 1.0939, "step": 2127 }, { "epoch": 0.4326082537101037, "grad_norm": 0.1272914856672287, "learning_rate": 0.0001568392148886403, "loss": 1.0839, "step": 2128 }, { "epoch": 0.43281154706241104, "grad_norm": 0.13827690482139587, "learning_rate": 0.00015681887521610903, "loss": 1.2304, "step": 2129 }, { "epoch": 0.43301484041471844, "grad_norm": 0.13577015697956085, "learning_rate": 0.00015679853554357776, "loss": 1.0749, "step": 2130 }, { "epoch": 0.43321813376702584, "grad_norm": 0.1302756816148758, "learning_rate": 0.00015677819587104648, "loss": 1.1381, "step": 2131 }, { "epoch": 0.4334214271193332, "grad_norm": 0.13045424222946167, "learning_rate": 0.0001567578561985152, "loss": 1.0538, "step": 2132 }, { "epoch": 0.4336247204716406, "grad_norm": 0.12351250648498535, "learning_rate": 0.00015673751652598393, "loss": 1.0061, "step": 2133 }, { "epoch": 0.433828013823948, "grad_norm": 0.1451653242111206, "learning_rate": 0.00015671717685345266, "loss": 1.1764, "step": 2134 }, { "epoch": 0.4340313071762553, "grad_norm": 0.12439122051000595, "learning_rate": 0.00015669683718092138, "loss": 0.9121, "step": 2135 }, { "epoch": 0.4342346005285627, "grad_norm": 0.1257990002632141, "learning_rate": 0.00015667649750839013, "loss": 1.0822, "step": 2136 }, { "epoch": 0.4344378938808701, "grad_norm": 0.13994207978248596, "learning_rate": 0.00015665615783585886, "loss": 1.2759, "step": 2137 }, { "epoch": 0.43464118723317746, "grad_norm": 0.14246414601802826, "learning_rate": 0.00015663581816332758, "loss": 1.2934, "step": 2138 }, { "epoch": 0.43484448058548486, "grad_norm": 0.1359516978263855, "learning_rate": 0.0001566154784907963, "loss": 1.1651, "step": 2139 }, { "epoch": 0.43504777393779226, "grad_norm": 0.12099796533584595, "learning_rate": 0.00015659513881826503, "loss": 0.9028, "step": 2140 }, { "epoch": 0.4352510672900996, "grad_norm": 0.12675108015537262, "learning_rate": 0.00015657479914573375, "loss": 1.026, "step": 2141 }, { "epoch": 0.435454360642407, "grad_norm": 0.13229331374168396, "learning_rate": 0.00015655445947320248, "loss": 1.0562, "step": 2142 }, { "epoch": 0.4356576539947144, "grad_norm": 0.12163117527961731, "learning_rate": 0.0001565341198006712, "loss": 1.1147, "step": 2143 }, { "epoch": 0.43586094734702174, "grad_norm": 0.13495270907878876, "learning_rate": 0.00015651378012813995, "loss": 1.0544, "step": 2144 }, { "epoch": 0.43606424069932914, "grad_norm": 0.11769222468137741, "learning_rate": 0.00015649344045560868, "loss": 1.0549, "step": 2145 }, { "epoch": 0.4362675340516365, "grad_norm": 0.14370734989643097, "learning_rate": 0.0001564731007830774, "loss": 1.1513, "step": 2146 }, { "epoch": 0.4364708274039439, "grad_norm": 0.1351398378610611, "learning_rate": 0.00015645276111054613, "loss": 1.0814, "step": 2147 }, { "epoch": 0.4366741207562513, "grad_norm": 0.13881100714206696, "learning_rate": 0.00015643242143801485, "loss": 1.1576, "step": 2148 }, { "epoch": 0.4368774141085586, "grad_norm": 0.11548882722854614, "learning_rate": 0.00015641208176548358, "loss": 1.0153, "step": 2149 }, { "epoch": 0.437080707460866, "grad_norm": 0.1512657254934311, "learning_rate": 0.0001563917420929523, "loss": 1.3081, "step": 2150 }, { "epoch": 0.4372840008131734, "grad_norm": 0.14576168358325958, "learning_rate": 0.00015637140242042103, "loss": 1.244, "step": 2151 }, { "epoch": 0.43748729416548077, "grad_norm": 0.14067471027374268, "learning_rate": 0.00015635106274788978, "loss": 1.023, "step": 2152 }, { "epoch": 0.43769058751778817, "grad_norm": 0.12753300368785858, "learning_rate": 0.0001563307230753585, "loss": 1.0292, "step": 2153 }, { "epoch": 0.43789388087009556, "grad_norm": 0.13246901333332062, "learning_rate": 0.00015631038340282723, "loss": 1.1607, "step": 2154 }, { "epoch": 0.4380971742224029, "grad_norm": 0.13709375262260437, "learning_rate": 0.00015629004373029595, "loss": 0.9848, "step": 2155 }, { "epoch": 0.4383004675747103, "grad_norm": 0.14666500687599182, "learning_rate": 0.00015626970405776468, "loss": 1.1349, "step": 2156 }, { "epoch": 0.4385037609270177, "grad_norm": 0.13992977142333984, "learning_rate": 0.0001562493643852334, "loss": 1.2692, "step": 2157 }, { "epoch": 0.43870705427932505, "grad_norm": 0.1444278359413147, "learning_rate": 0.00015622902471270212, "loss": 1.0354, "step": 2158 }, { "epoch": 0.43891034763163245, "grad_norm": 0.12587270140647888, "learning_rate": 0.00015620868504017085, "loss": 0.9783, "step": 2159 }, { "epoch": 0.43911364098393985, "grad_norm": 0.1311299055814743, "learning_rate": 0.0001561883453676396, "loss": 1.0541, "step": 2160 }, { "epoch": 0.4393169343362472, "grad_norm": 0.1246386170387268, "learning_rate": 0.00015616800569510832, "loss": 1.0384, "step": 2161 }, { "epoch": 0.4395202276885546, "grad_norm": 0.1286439597606659, "learning_rate": 0.00015614766602257705, "loss": 1.0909, "step": 2162 }, { "epoch": 0.439723521040862, "grad_norm": 0.12758703529834747, "learning_rate": 0.00015612732635004577, "loss": 1.0835, "step": 2163 }, { "epoch": 0.43992681439316933, "grad_norm": 0.1129162386059761, "learning_rate": 0.0001561069866775145, "loss": 0.9967, "step": 2164 }, { "epoch": 0.44013010774547673, "grad_norm": 0.12939536571502686, "learning_rate": 0.00015608664700498322, "loss": 1.133, "step": 2165 }, { "epoch": 0.4403334010977841, "grad_norm": 0.1393767148256302, "learning_rate": 0.00015606630733245195, "loss": 1.2379, "step": 2166 }, { "epoch": 0.44053669445009147, "grad_norm": 0.11744683235883713, "learning_rate": 0.00015604596765992067, "loss": 0.9686, "step": 2167 }, { "epoch": 0.44073998780239887, "grad_norm": 0.12241906672716141, "learning_rate": 0.0001560256279873894, "loss": 1.0188, "step": 2168 }, { "epoch": 0.44094328115470627, "grad_norm": 0.1316422075033188, "learning_rate": 0.00015600528831485815, "loss": 1.0292, "step": 2169 }, { "epoch": 0.4411465745070136, "grad_norm": 0.13489259779453278, "learning_rate": 0.00015598494864232687, "loss": 1.1332, "step": 2170 }, { "epoch": 0.441349867859321, "grad_norm": 0.13207697868347168, "learning_rate": 0.0001559646089697956, "loss": 1.0482, "step": 2171 }, { "epoch": 0.44155316121162835, "grad_norm": 0.13211561739444733, "learning_rate": 0.00015594426929726432, "loss": 1.064, "step": 2172 }, { "epoch": 0.44175645456393575, "grad_norm": 0.14821046590805054, "learning_rate": 0.00015592392962473305, "loss": 1.1058, "step": 2173 }, { "epoch": 0.44195974791624315, "grad_norm": 0.11314582824707031, "learning_rate": 0.00015590358995220177, "loss": 0.9351, "step": 2174 }, { "epoch": 0.4421630412685505, "grad_norm": 0.12001941353082657, "learning_rate": 0.0001558832502796705, "loss": 0.9875, "step": 2175 }, { "epoch": 0.4423663346208579, "grad_norm": 0.1433805674314499, "learning_rate": 0.00015586291060713922, "loss": 1.1552, "step": 2176 }, { "epoch": 0.4425696279731653, "grad_norm": 0.11265136301517487, "learning_rate": 0.00015584257093460797, "loss": 0.9431, "step": 2177 }, { "epoch": 0.44277292132547263, "grad_norm": 0.12973473966121674, "learning_rate": 0.0001558222312620767, "loss": 1.0591, "step": 2178 }, { "epoch": 0.44297621467778003, "grad_norm": 0.13749995827674866, "learning_rate": 0.00015580189158954542, "loss": 1.161, "step": 2179 }, { "epoch": 0.44317950803008743, "grad_norm": 0.12814348936080933, "learning_rate": 0.00015578155191701414, "loss": 1.1314, "step": 2180 }, { "epoch": 0.4433828013823948, "grad_norm": 0.1292744129896164, "learning_rate": 0.00015576121224448287, "loss": 0.9885, "step": 2181 }, { "epoch": 0.4435860947347022, "grad_norm": 0.11668647080659866, "learning_rate": 0.0001557408725719516, "loss": 0.9546, "step": 2182 }, { "epoch": 0.4437893880870096, "grad_norm": 0.13656672835350037, "learning_rate": 0.00015572053289942032, "loss": 1.1105, "step": 2183 }, { "epoch": 0.4439926814393169, "grad_norm": 0.13544489443302155, "learning_rate": 0.00015570019322688904, "loss": 1.0632, "step": 2184 }, { "epoch": 0.4441959747916243, "grad_norm": 0.14713092148303986, "learning_rate": 0.0001556798535543578, "loss": 1.0508, "step": 2185 }, { "epoch": 0.4443992681439317, "grad_norm": 0.1292864978313446, "learning_rate": 0.00015565951388182652, "loss": 0.8771, "step": 2186 }, { "epoch": 0.44460256149623906, "grad_norm": 0.12545311450958252, "learning_rate": 0.00015563917420929524, "loss": 1.1117, "step": 2187 }, { "epoch": 0.44480585484854646, "grad_norm": 0.1412542760372162, "learning_rate": 0.00015561883453676397, "loss": 1.3364, "step": 2188 }, { "epoch": 0.44500914820085385, "grad_norm": 0.11404701322317123, "learning_rate": 0.0001555984948642327, "loss": 0.9512, "step": 2189 }, { "epoch": 0.4452124415531612, "grad_norm": 0.11949559301137924, "learning_rate": 0.00015557815519170142, "loss": 1.0406, "step": 2190 }, { "epoch": 0.4454157349054686, "grad_norm": 0.10985735058784485, "learning_rate": 0.00015555781551917014, "loss": 0.9292, "step": 2191 }, { "epoch": 0.445619028257776, "grad_norm": 0.14047123491764069, "learning_rate": 0.00015553747584663886, "loss": 1.0895, "step": 2192 }, { "epoch": 0.44582232161008334, "grad_norm": 0.1520707756280899, "learning_rate": 0.00015551713617410762, "loss": 1.118, "step": 2193 }, { "epoch": 0.44602561496239074, "grad_norm": 0.13202513754367828, "learning_rate": 0.00015549679650157634, "loss": 1.0031, "step": 2194 }, { "epoch": 0.44622890831469814, "grad_norm": 0.13803257048130035, "learning_rate": 0.00015547645682904507, "loss": 1.106, "step": 2195 }, { "epoch": 0.4464322016670055, "grad_norm": 0.13089017570018768, "learning_rate": 0.0001554561171565138, "loss": 1.0706, "step": 2196 }, { "epoch": 0.4466354950193129, "grad_norm": 0.12646476924419403, "learning_rate": 0.00015543577748398251, "loss": 1.0548, "step": 2197 }, { "epoch": 0.4468387883716202, "grad_norm": 0.14208228886127472, "learning_rate": 0.00015541543781145124, "loss": 1.1952, "step": 2198 }, { "epoch": 0.4470420817239276, "grad_norm": 0.1471976786851883, "learning_rate": 0.00015539509813891996, "loss": 1.1987, "step": 2199 }, { "epoch": 0.447245375076235, "grad_norm": 0.11970525234937668, "learning_rate": 0.0001553747584663887, "loss": 0.9523, "step": 2200 }, { "epoch": 0.44744866842854236, "grad_norm": 0.1178225502371788, "learning_rate": 0.00015535441879385744, "loss": 0.9105, "step": 2201 }, { "epoch": 0.44765196178084976, "grad_norm": 0.11376915872097015, "learning_rate": 0.00015533407912132616, "loss": 0.9041, "step": 2202 }, { "epoch": 0.44785525513315716, "grad_norm": 0.12055668234825134, "learning_rate": 0.0001553137394487949, "loss": 0.9057, "step": 2203 }, { "epoch": 0.4480585484854645, "grad_norm": 0.1293669193983078, "learning_rate": 0.0001552933997762636, "loss": 1.047, "step": 2204 }, { "epoch": 0.4482618418377719, "grad_norm": 0.14985014498233795, "learning_rate": 0.00015527306010373234, "loss": 1.2782, "step": 2205 }, { "epoch": 0.4484651351900793, "grad_norm": 0.12716402113437653, "learning_rate": 0.00015525272043120106, "loss": 1.0629, "step": 2206 }, { "epoch": 0.44866842854238664, "grad_norm": 0.13868549466133118, "learning_rate": 0.00015523238075866979, "loss": 1.248, "step": 2207 }, { "epoch": 0.44887172189469404, "grad_norm": 0.12867020070552826, "learning_rate": 0.0001552120410861385, "loss": 1.0579, "step": 2208 }, { "epoch": 0.44907501524700144, "grad_norm": 0.14104703068733215, "learning_rate": 0.00015519170141360723, "loss": 1.2559, "step": 2209 }, { "epoch": 0.4492783085993088, "grad_norm": 0.13124023377895355, "learning_rate": 0.00015517136174107599, "loss": 0.9115, "step": 2210 }, { "epoch": 0.4494816019516162, "grad_norm": 0.1268378347158432, "learning_rate": 0.0001551510220685447, "loss": 1.0753, "step": 2211 }, { "epoch": 0.4496848953039236, "grad_norm": 0.12500889599323273, "learning_rate": 0.00015513068239601344, "loss": 1.1563, "step": 2212 }, { "epoch": 0.4498881886562309, "grad_norm": 0.15399597585201263, "learning_rate": 0.00015511034272348216, "loss": 1.3699, "step": 2213 }, { "epoch": 0.4500914820085383, "grad_norm": 0.13191649317741394, "learning_rate": 0.00015509000305095088, "loss": 1.0768, "step": 2214 }, { "epoch": 0.4502947753608457, "grad_norm": 0.13257142901420593, "learning_rate": 0.0001550696633784196, "loss": 1.2508, "step": 2215 }, { "epoch": 0.45049806871315307, "grad_norm": 0.1298341304063797, "learning_rate": 0.00015504932370588833, "loss": 1.0288, "step": 2216 }, { "epoch": 0.45070136206546046, "grad_norm": 0.12427882850170135, "learning_rate": 0.00015502898403335706, "loss": 1.0185, "step": 2217 }, { "epoch": 0.45090465541776786, "grad_norm": 0.13115955889225006, "learning_rate": 0.0001550086443608258, "loss": 1.0496, "step": 2218 }, { "epoch": 0.4511079487700752, "grad_norm": 0.12085919827222824, "learning_rate": 0.00015498830468829453, "loss": 1.0642, "step": 2219 }, { "epoch": 0.4513112421223826, "grad_norm": 0.14256790280342102, "learning_rate": 0.00015496796501576326, "loss": 1.199, "step": 2220 }, { "epoch": 0.45151453547469, "grad_norm": 0.14703063666820526, "learning_rate": 0.00015494762534323198, "loss": 1.232, "step": 2221 }, { "epoch": 0.45171782882699735, "grad_norm": 0.13513730466365814, "learning_rate": 0.0001549272856707007, "loss": 1.0598, "step": 2222 }, { "epoch": 0.45192112217930475, "grad_norm": 0.12173596769571304, "learning_rate": 0.00015490694599816943, "loss": 0.9541, "step": 2223 }, { "epoch": 0.4521244155316121, "grad_norm": 0.13201670348644257, "learning_rate": 0.00015488660632563816, "loss": 1.0898, "step": 2224 }, { "epoch": 0.4523277088839195, "grad_norm": 0.1306207776069641, "learning_rate": 0.00015486626665310688, "loss": 1.1851, "step": 2225 }, { "epoch": 0.4525310022362269, "grad_norm": 0.14152634143829346, "learning_rate": 0.00015484592698057563, "loss": 1.1348, "step": 2226 }, { "epoch": 0.45273429558853423, "grad_norm": 0.12412508577108383, "learning_rate": 0.00015482558730804436, "loss": 0.943, "step": 2227 }, { "epoch": 0.45293758894084163, "grad_norm": 0.1367032378911972, "learning_rate": 0.00015480524763551308, "loss": 1.1014, "step": 2228 }, { "epoch": 0.453140882293149, "grad_norm": 0.13346408307552338, "learning_rate": 0.0001547849079629818, "loss": 1.0675, "step": 2229 }, { "epoch": 0.45334417564545637, "grad_norm": 0.1253054141998291, "learning_rate": 0.00015476456829045053, "loss": 1.0117, "step": 2230 }, { "epoch": 0.45354746899776377, "grad_norm": 0.12263582646846771, "learning_rate": 0.00015474422861791925, "loss": 1.0491, "step": 2231 }, { "epoch": 0.45375076235007117, "grad_norm": 0.11762181669473648, "learning_rate": 0.00015472388894538798, "loss": 0.9007, "step": 2232 }, { "epoch": 0.4539540557023785, "grad_norm": 0.1264612227678299, "learning_rate": 0.0001547035492728567, "loss": 1.0378, "step": 2233 }, { "epoch": 0.4541573490546859, "grad_norm": 0.13340885937213898, "learning_rate": 0.00015468320960032545, "loss": 1.0248, "step": 2234 }, { "epoch": 0.4543606424069933, "grad_norm": 0.13584084808826447, "learning_rate": 0.00015466286992779418, "loss": 1.1549, "step": 2235 }, { "epoch": 0.45456393575930065, "grad_norm": 0.13265646994113922, "learning_rate": 0.0001546425302552629, "loss": 1.1589, "step": 2236 }, { "epoch": 0.45476722911160805, "grad_norm": 0.13352257013320923, "learning_rate": 0.00015462219058273163, "loss": 1.3275, "step": 2237 }, { "epoch": 0.45497052246391545, "grad_norm": 0.13025180995464325, "learning_rate": 0.00015460185091020035, "loss": 1.1053, "step": 2238 }, { "epoch": 0.4551738158162228, "grad_norm": 0.13518528640270233, "learning_rate": 0.00015458151123766908, "loss": 1.043, "step": 2239 }, { "epoch": 0.4553771091685302, "grad_norm": 0.13065437972545624, "learning_rate": 0.0001545611715651378, "loss": 1.1238, "step": 2240 }, { "epoch": 0.4555804025208376, "grad_norm": 0.12943416833877563, "learning_rate": 0.00015454083189260653, "loss": 1.2424, "step": 2241 }, { "epoch": 0.45578369587314493, "grad_norm": 0.1298946738243103, "learning_rate": 0.00015452049222007528, "loss": 1.0909, "step": 2242 }, { "epoch": 0.45598698922545233, "grad_norm": 0.1344904750585556, "learning_rate": 0.000154500152547544, "loss": 1.2078, "step": 2243 }, { "epoch": 0.45619028257775973, "grad_norm": 0.11595308780670166, "learning_rate": 0.00015447981287501273, "loss": 0.9557, "step": 2244 }, { "epoch": 0.4563935759300671, "grad_norm": 0.11699800193309784, "learning_rate": 0.00015445947320248145, "loss": 0.969, "step": 2245 }, { "epoch": 0.4565968692823745, "grad_norm": 0.13354718685150146, "learning_rate": 0.00015443913352995018, "loss": 1.2055, "step": 2246 }, { "epoch": 0.45680016263468187, "grad_norm": 0.12961523234844208, "learning_rate": 0.0001544187938574189, "loss": 1.1531, "step": 2247 }, { "epoch": 0.4570034559869892, "grad_norm": 0.12993821501731873, "learning_rate": 0.00015439845418488762, "loss": 1.0975, "step": 2248 }, { "epoch": 0.4572067493392966, "grad_norm": 0.12171147763729095, "learning_rate": 0.00015437811451235635, "loss": 1.1262, "step": 2249 }, { "epoch": 0.45741004269160396, "grad_norm": 0.1307455450296402, "learning_rate": 0.00015435777483982507, "loss": 1.0757, "step": 2250 }, { "epoch": 0.45761333604391136, "grad_norm": 0.12793178856372833, "learning_rate": 0.00015433743516729382, "loss": 1.1616, "step": 2251 }, { "epoch": 0.45781662939621875, "grad_norm": 0.13869251310825348, "learning_rate": 0.00015431709549476255, "loss": 1.0674, "step": 2252 }, { "epoch": 0.4580199227485261, "grad_norm": 0.13465169072151184, "learning_rate": 0.00015429675582223127, "loss": 1.0685, "step": 2253 }, { "epoch": 0.4582232161008335, "grad_norm": 0.12114840000867844, "learning_rate": 0.0001542764161497, "loss": 1.0562, "step": 2254 }, { "epoch": 0.4584265094531409, "grad_norm": 0.11819116771221161, "learning_rate": 0.00015425607647716872, "loss": 0.9489, "step": 2255 }, { "epoch": 0.45862980280544824, "grad_norm": 0.1262710839509964, "learning_rate": 0.00015423573680463745, "loss": 1.014, "step": 2256 }, { "epoch": 0.45883309615775564, "grad_norm": 0.1145327240228653, "learning_rate": 0.00015421539713210617, "loss": 1.0435, "step": 2257 }, { "epoch": 0.45903638951006304, "grad_norm": 0.13413353264331818, "learning_rate": 0.0001541950574595749, "loss": 1.2098, "step": 2258 }, { "epoch": 0.4592396828623704, "grad_norm": 0.14301779866218567, "learning_rate": 0.00015417471778704365, "loss": 1.2555, "step": 2259 }, { "epoch": 0.4594429762146778, "grad_norm": 0.12918636202812195, "learning_rate": 0.00015415437811451237, "loss": 1.2728, "step": 2260 }, { "epoch": 0.4596462695669852, "grad_norm": 0.13419827818870544, "learning_rate": 0.0001541340384419811, "loss": 1.1594, "step": 2261 }, { "epoch": 0.4598495629192925, "grad_norm": 0.132028728723526, "learning_rate": 0.00015411369876944982, "loss": 1.0049, "step": 2262 }, { "epoch": 0.4600528562715999, "grad_norm": 0.12330999970436096, "learning_rate": 0.00015409335909691855, "loss": 1.0211, "step": 2263 }, { "epoch": 0.4602561496239073, "grad_norm": 0.12041660398244858, "learning_rate": 0.00015407301942438727, "loss": 0.9111, "step": 2264 }, { "epoch": 0.46045944297621466, "grad_norm": 0.13959679007530212, "learning_rate": 0.000154052679751856, "loss": 1.2186, "step": 2265 }, { "epoch": 0.46066273632852206, "grad_norm": 0.12078391015529633, "learning_rate": 0.00015403234007932472, "loss": 0.9896, "step": 2266 }, { "epoch": 0.46086602968082946, "grad_norm": 0.13155217468738556, "learning_rate": 0.00015401200040679347, "loss": 1.1405, "step": 2267 }, { "epoch": 0.4610693230331368, "grad_norm": 0.13416320085525513, "learning_rate": 0.0001539916607342622, "loss": 1.1094, "step": 2268 }, { "epoch": 0.4612726163854442, "grad_norm": 0.13319726288318634, "learning_rate": 0.00015397132106173092, "loss": 1.0477, "step": 2269 }, { "epoch": 0.4614759097377516, "grad_norm": 0.1303132325410843, "learning_rate": 0.00015395098138919964, "loss": 1.1049, "step": 2270 }, { "epoch": 0.46167920309005894, "grad_norm": 0.1119418814778328, "learning_rate": 0.00015393064171666837, "loss": 0.8764, "step": 2271 }, { "epoch": 0.46188249644236634, "grad_norm": 0.13639549911022186, "learning_rate": 0.0001539103020441371, "loss": 1.2873, "step": 2272 }, { "epoch": 0.46208578979467374, "grad_norm": 0.1421010047197342, "learning_rate": 0.00015388996237160582, "loss": 1.2249, "step": 2273 }, { "epoch": 0.4622890831469811, "grad_norm": 0.12574367225170135, "learning_rate": 0.00015386962269907454, "loss": 1.063, "step": 2274 }, { "epoch": 0.4624923764992885, "grad_norm": 0.1510375589132309, "learning_rate": 0.0001538492830265433, "loss": 1.2542, "step": 2275 }, { "epoch": 0.4626956698515958, "grad_norm": 0.13016802072525024, "learning_rate": 0.00015382894335401202, "loss": 1.0646, "step": 2276 }, { "epoch": 0.4628989632039032, "grad_norm": 0.11884848028421402, "learning_rate": 0.00015380860368148074, "loss": 1.0169, "step": 2277 }, { "epoch": 0.4631022565562106, "grad_norm": 0.12734943628311157, "learning_rate": 0.00015378826400894947, "loss": 1.0399, "step": 2278 }, { "epoch": 0.46330554990851797, "grad_norm": 0.11856262385845184, "learning_rate": 0.0001537679243364182, "loss": 0.9773, "step": 2279 }, { "epoch": 0.46350884326082537, "grad_norm": 0.12701541185379028, "learning_rate": 0.00015374758466388692, "loss": 0.9503, "step": 2280 }, { "epoch": 0.46371213661313276, "grad_norm": 0.12200977653265, "learning_rate": 0.00015372724499135564, "loss": 0.9685, "step": 2281 }, { "epoch": 0.4639154299654401, "grad_norm": 0.1607646942138672, "learning_rate": 0.00015370690531882436, "loss": 1.2651, "step": 2282 }, { "epoch": 0.4641187233177475, "grad_norm": 0.1287887990474701, "learning_rate": 0.00015368656564629312, "loss": 1.0041, "step": 2283 }, { "epoch": 0.4643220166700549, "grad_norm": 0.11581754684448242, "learning_rate": 0.00015366622597376184, "loss": 0.8802, "step": 2284 }, { "epoch": 0.46452531002236225, "grad_norm": 0.12691698968410492, "learning_rate": 0.00015364588630123056, "loss": 0.9865, "step": 2285 }, { "epoch": 0.46472860337466965, "grad_norm": 0.15262743830680847, "learning_rate": 0.0001536255466286993, "loss": 1.2384, "step": 2286 }, { "epoch": 0.46493189672697705, "grad_norm": 0.12902504205703735, "learning_rate": 0.00015360520695616801, "loss": 1.0832, "step": 2287 }, { "epoch": 0.4651351900792844, "grad_norm": 0.13777056336402893, "learning_rate": 0.00015358486728363674, "loss": 1.2671, "step": 2288 }, { "epoch": 0.4653384834315918, "grad_norm": 0.12391048669815063, "learning_rate": 0.00015356452761110546, "loss": 1.1296, "step": 2289 }, { "epoch": 0.4655417767838992, "grad_norm": 0.13558468222618103, "learning_rate": 0.0001535441879385742, "loss": 1.3, "step": 2290 }, { "epoch": 0.46574507013620653, "grad_norm": 0.13611246645450592, "learning_rate": 0.0001535238482660429, "loss": 1.2885, "step": 2291 }, { "epoch": 0.46594836348851393, "grad_norm": 0.11027907580137253, "learning_rate": 0.00015350350859351166, "loss": 0.8807, "step": 2292 }, { "epoch": 0.4661516568408213, "grad_norm": 0.1303076446056366, "learning_rate": 0.0001534831689209804, "loss": 0.9479, "step": 2293 }, { "epoch": 0.46635495019312867, "grad_norm": 0.12296570837497711, "learning_rate": 0.0001534628292484491, "loss": 0.9483, "step": 2294 }, { "epoch": 0.46655824354543607, "grad_norm": 0.13646475970745087, "learning_rate": 0.00015344248957591784, "loss": 1.0576, "step": 2295 }, { "epoch": 0.46676153689774347, "grad_norm": 0.12281665205955505, "learning_rate": 0.00015342214990338656, "loss": 1.0582, "step": 2296 }, { "epoch": 0.4669648302500508, "grad_norm": 0.12840229272842407, "learning_rate": 0.00015340181023085529, "loss": 1.0531, "step": 2297 }, { "epoch": 0.4671681236023582, "grad_norm": 0.13027642667293549, "learning_rate": 0.000153381470558324, "loss": 1.1658, "step": 2298 }, { "epoch": 0.4673714169546656, "grad_norm": 0.13270190358161926, "learning_rate": 0.00015336113088579273, "loss": 1.2343, "step": 2299 }, { "epoch": 0.46757471030697295, "grad_norm": 0.12298402190208435, "learning_rate": 0.00015334079121326149, "loss": 1.0148, "step": 2300 }, { "epoch": 0.46777800365928035, "grad_norm": 0.11776307225227356, "learning_rate": 0.0001533204515407302, "loss": 1.0423, "step": 2301 }, { "epoch": 0.4679812970115877, "grad_norm": 0.1274150162935257, "learning_rate": 0.00015330011186819893, "loss": 1.0485, "step": 2302 }, { "epoch": 0.4681845903638951, "grad_norm": 0.12356690317392349, "learning_rate": 0.00015327977219566766, "loss": 1.0259, "step": 2303 }, { "epoch": 0.4683878837162025, "grad_norm": 0.11949564516544342, "learning_rate": 0.00015325943252313638, "loss": 1.0544, "step": 2304 }, { "epoch": 0.46859117706850983, "grad_norm": 0.11649688333272934, "learning_rate": 0.0001532390928506051, "loss": 1.0703, "step": 2305 }, { "epoch": 0.46879447042081723, "grad_norm": 0.12657220661640167, "learning_rate": 0.00015321875317807383, "loss": 0.9195, "step": 2306 }, { "epoch": 0.46899776377312463, "grad_norm": 0.11678668856620789, "learning_rate": 0.00015319841350554256, "loss": 1.0412, "step": 2307 }, { "epoch": 0.469201057125432, "grad_norm": 0.1137353926897049, "learning_rate": 0.0001531780738330113, "loss": 0.9843, "step": 2308 }, { "epoch": 0.4694043504777394, "grad_norm": 0.11690492928028107, "learning_rate": 0.00015315773416048003, "loss": 1.0313, "step": 2309 }, { "epoch": 0.4696076438300468, "grad_norm": 0.14086581766605377, "learning_rate": 0.00015313739448794876, "loss": 1.2184, "step": 2310 }, { "epoch": 0.4698109371823541, "grad_norm": 0.13605134189128876, "learning_rate": 0.00015311705481541748, "loss": 1.0273, "step": 2311 }, { "epoch": 0.4700142305346615, "grad_norm": 0.12567712366580963, "learning_rate": 0.0001530967151428862, "loss": 1.0822, "step": 2312 }, { "epoch": 0.4702175238869689, "grad_norm": 0.12103762477636337, "learning_rate": 0.00015307637547035493, "loss": 0.9511, "step": 2313 }, { "epoch": 0.47042081723927626, "grad_norm": 0.13223135471343994, "learning_rate": 0.00015305603579782366, "loss": 1.116, "step": 2314 }, { "epoch": 0.47062411059158366, "grad_norm": 0.12696783244609833, "learning_rate": 0.00015303569612529238, "loss": 1.046, "step": 2315 }, { "epoch": 0.47082740394389105, "grad_norm": 0.13583315908908844, "learning_rate": 0.00015301535645276113, "loss": 1.2365, "step": 2316 }, { "epoch": 0.4710306972961984, "grad_norm": 0.1245473176240921, "learning_rate": 0.00015299501678022986, "loss": 1.1478, "step": 2317 }, { "epoch": 0.4712339906485058, "grad_norm": 0.1365327388048172, "learning_rate": 0.00015297467710769858, "loss": 1.1697, "step": 2318 }, { "epoch": 0.4714372840008132, "grad_norm": 0.13741904497146606, "learning_rate": 0.0001529543374351673, "loss": 1.1585, "step": 2319 }, { "epoch": 0.47164057735312054, "grad_norm": 0.13385626673698425, "learning_rate": 0.00015293399776263603, "loss": 1.1195, "step": 2320 }, { "epoch": 0.47184387070542794, "grad_norm": 0.12970289587974548, "learning_rate": 0.00015291365809010475, "loss": 1.1, "step": 2321 }, { "epoch": 0.47204716405773534, "grad_norm": 0.13030849397182465, "learning_rate": 0.00015289331841757348, "loss": 1.1754, "step": 2322 }, { "epoch": 0.4722504574100427, "grad_norm": 0.1363505721092224, "learning_rate": 0.0001528729787450422, "loss": 1.2234, "step": 2323 }, { "epoch": 0.4724537507623501, "grad_norm": 0.1340765804052353, "learning_rate": 0.00015285263907251095, "loss": 0.9473, "step": 2324 }, { "epoch": 0.4726570441146575, "grad_norm": 0.12515921890735626, "learning_rate": 0.00015283229939997968, "loss": 1.0829, "step": 2325 }, { "epoch": 0.4728603374669648, "grad_norm": 0.1202256977558136, "learning_rate": 0.0001528119597274484, "loss": 0.9616, "step": 2326 }, { "epoch": 0.4730636308192722, "grad_norm": 0.10012631863355637, "learning_rate": 0.00015279162005491713, "loss": 0.7739, "step": 2327 }, { "epoch": 0.47326692417157956, "grad_norm": 0.12161195278167725, "learning_rate": 0.00015277128038238585, "loss": 1.0245, "step": 2328 }, { "epoch": 0.47347021752388696, "grad_norm": 0.12597283720970154, "learning_rate": 0.00015275094070985458, "loss": 1.1309, "step": 2329 }, { "epoch": 0.47367351087619436, "grad_norm": 0.12898840010166168, "learning_rate": 0.0001527306010373233, "loss": 1.0073, "step": 2330 }, { "epoch": 0.4738768042285017, "grad_norm": 0.11734145879745483, "learning_rate": 0.00015271026136479203, "loss": 0.8884, "step": 2331 }, { "epoch": 0.4740800975808091, "grad_norm": 0.11760027706623077, "learning_rate": 0.00015268992169226075, "loss": 1.0386, "step": 2332 }, { "epoch": 0.4742833909331165, "grad_norm": 0.13076893985271454, "learning_rate": 0.0001526695820197295, "loss": 1.1217, "step": 2333 }, { "epoch": 0.47448668428542384, "grad_norm": 0.12086467444896698, "learning_rate": 0.00015264924234719823, "loss": 1.1314, "step": 2334 }, { "epoch": 0.47468997763773124, "grad_norm": 0.1257351189851761, "learning_rate": 0.00015262890267466695, "loss": 1.0988, "step": 2335 }, { "epoch": 0.47489327099003864, "grad_norm": 0.13056614995002747, "learning_rate": 0.00015260856300213567, "loss": 1.0929, "step": 2336 }, { "epoch": 0.475096564342346, "grad_norm": 0.1115044355392456, "learning_rate": 0.0001525882233296044, "loss": 0.9365, "step": 2337 }, { "epoch": 0.4752998576946534, "grad_norm": 0.11613184213638306, "learning_rate": 0.00015256788365707312, "loss": 0.9492, "step": 2338 }, { "epoch": 0.4755031510469608, "grad_norm": 0.13431620597839355, "learning_rate": 0.00015254754398454185, "loss": 1.0483, "step": 2339 }, { "epoch": 0.4757064443992681, "grad_norm": 0.13704031705856323, "learning_rate": 0.00015252720431201057, "loss": 1.324, "step": 2340 }, { "epoch": 0.4759097377515755, "grad_norm": 0.14616814255714417, "learning_rate": 0.00015250686463947932, "loss": 1.2488, "step": 2341 }, { "epoch": 0.4761130311038829, "grad_norm": 0.14007219672203064, "learning_rate": 0.00015248652496694805, "loss": 1.1427, "step": 2342 }, { "epoch": 0.47631632445619027, "grad_norm": 0.14786280691623688, "learning_rate": 0.00015246618529441677, "loss": 1.3224, "step": 2343 }, { "epoch": 0.47651961780849766, "grad_norm": 0.13280178606510162, "learning_rate": 0.0001524458456218855, "loss": 1.2878, "step": 2344 }, { "epoch": 0.47672291116080506, "grad_norm": 0.13446266949176788, "learning_rate": 0.00015242550594935422, "loss": 0.9997, "step": 2345 }, { "epoch": 0.4769262045131124, "grad_norm": 0.1296195685863495, "learning_rate": 0.00015240516627682295, "loss": 1.196, "step": 2346 }, { "epoch": 0.4771294978654198, "grad_norm": 0.13888056576251984, "learning_rate": 0.00015238482660429167, "loss": 1.1782, "step": 2347 }, { "epoch": 0.4773327912177272, "grad_norm": 0.14144721627235413, "learning_rate": 0.0001523644869317604, "loss": 1.0023, "step": 2348 }, { "epoch": 0.47753608457003455, "grad_norm": 0.1382543295621872, "learning_rate": 0.00015234414725922915, "loss": 1.128, "step": 2349 }, { "epoch": 0.47773937792234195, "grad_norm": 0.14320622384548187, "learning_rate": 0.00015232380758669787, "loss": 1.1825, "step": 2350 }, { "epoch": 0.47794267127464934, "grad_norm": 0.13087749481201172, "learning_rate": 0.0001523034679141666, "loss": 1.1676, "step": 2351 }, { "epoch": 0.4781459646269567, "grad_norm": 0.12107618153095245, "learning_rate": 0.00015228312824163532, "loss": 1.0275, "step": 2352 }, { "epoch": 0.4783492579792641, "grad_norm": 0.12728255987167358, "learning_rate": 0.00015226278856910404, "loss": 0.9566, "step": 2353 }, { "epoch": 0.4785525513315715, "grad_norm": 0.13032306730747223, "learning_rate": 0.00015224244889657277, "loss": 0.9879, "step": 2354 }, { "epoch": 0.47875584468387883, "grad_norm": 0.13414493203163147, "learning_rate": 0.0001522221092240415, "loss": 1.1314, "step": 2355 }, { "epoch": 0.4789591380361862, "grad_norm": 0.13473325967788696, "learning_rate": 0.00015220176955151022, "loss": 1.0828, "step": 2356 }, { "epoch": 0.47916243138849357, "grad_norm": 0.13013584911823273, "learning_rate": 0.00015218142987897897, "loss": 0.9822, "step": 2357 }, { "epoch": 0.47936572474080097, "grad_norm": 0.13635900616645813, "learning_rate": 0.0001521610902064477, "loss": 1.1549, "step": 2358 }, { "epoch": 0.47956901809310837, "grad_norm": 0.14560578763484955, "learning_rate": 0.00015214075053391642, "loss": 1.1529, "step": 2359 }, { "epoch": 0.4797723114454157, "grad_norm": 0.13965454697608948, "learning_rate": 0.00015212041086138514, "loss": 1.1511, "step": 2360 }, { "epoch": 0.4799756047977231, "grad_norm": 0.14002491533756256, "learning_rate": 0.00015210007118885387, "loss": 1.1332, "step": 2361 }, { "epoch": 0.4801788981500305, "grad_norm": 0.14013326168060303, "learning_rate": 0.0001520797315163226, "loss": 1.1585, "step": 2362 }, { "epoch": 0.48038219150233785, "grad_norm": 0.114499032497406, "learning_rate": 0.00015205939184379132, "loss": 0.8636, "step": 2363 }, { "epoch": 0.48058548485464525, "grad_norm": 0.14330022037029266, "learning_rate": 0.00015203905217126004, "loss": 1.0499, "step": 2364 }, { "epoch": 0.48078877820695265, "grad_norm": 0.13167035579681396, "learning_rate": 0.0001520187124987288, "loss": 1.047, "step": 2365 }, { "epoch": 0.48099207155926, "grad_norm": 0.12093020975589752, "learning_rate": 0.00015199837282619752, "loss": 1.0635, "step": 2366 }, { "epoch": 0.4811953649115674, "grad_norm": 0.13088001310825348, "learning_rate": 0.00015197803315366624, "loss": 1.1499, "step": 2367 }, { "epoch": 0.4813986582638748, "grad_norm": 0.13969479501247406, "learning_rate": 0.00015195769348113497, "loss": 1.2346, "step": 2368 }, { "epoch": 0.48160195161618213, "grad_norm": 0.129147008061409, "learning_rate": 0.0001519373538086037, "loss": 0.9817, "step": 2369 }, { "epoch": 0.48180524496848953, "grad_norm": 0.13874943554401398, "learning_rate": 0.00015191701413607242, "loss": 1.0194, "step": 2370 }, { "epoch": 0.48200853832079693, "grad_norm": 0.13884292542934418, "learning_rate": 0.00015189667446354114, "loss": 1.1071, "step": 2371 }, { "epoch": 0.4822118316731043, "grad_norm": 0.13045528531074524, "learning_rate": 0.00015187633479100986, "loss": 1.1242, "step": 2372 }, { "epoch": 0.4824151250254117, "grad_norm": 0.15773905813694, "learning_rate": 0.0001518559951184786, "loss": 1.2639, "step": 2373 }, { "epoch": 0.48261841837771907, "grad_norm": 0.11095882952213287, "learning_rate": 0.00015183565544594734, "loss": 1.0023, "step": 2374 }, { "epoch": 0.4828217117300264, "grad_norm": 0.1181846410036087, "learning_rate": 0.00015181531577341606, "loss": 0.9518, "step": 2375 }, { "epoch": 0.4830250050823338, "grad_norm": 0.11797620356082916, "learning_rate": 0.0001517949761008848, "loss": 0.9792, "step": 2376 }, { "epoch": 0.4832282984346412, "grad_norm": 0.11560335010290146, "learning_rate": 0.0001517746364283535, "loss": 0.9539, "step": 2377 }, { "epoch": 0.48343159178694856, "grad_norm": 0.1399577260017395, "learning_rate": 0.00015175429675582224, "loss": 1.1419, "step": 2378 }, { "epoch": 0.48363488513925595, "grad_norm": 0.12643292546272278, "learning_rate": 0.00015173395708329096, "loss": 1.07, "step": 2379 }, { "epoch": 0.48383817849156335, "grad_norm": 0.11252279579639435, "learning_rate": 0.0001517136174107597, "loss": 0.9261, "step": 2380 }, { "epoch": 0.4840414718438707, "grad_norm": 0.12694686651229858, "learning_rate": 0.0001516932777382284, "loss": 1.0992, "step": 2381 }, { "epoch": 0.4842447651961781, "grad_norm": 0.11446068435907364, "learning_rate": 0.00015167293806569716, "loss": 0.9807, "step": 2382 }, { "epoch": 0.48444805854848544, "grad_norm": 0.12001042813062668, "learning_rate": 0.0001516525983931659, "loss": 0.9139, "step": 2383 }, { "epoch": 0.48465135190079284, "grad_norm": 0.12721174955368042, "learning_rate": 0.0001516322587206346, "loss": 1.0866, "step": 2384 }, { "epoch": 0.48485464525310024, "grad_norm": 0.12574180960655212, "learning_rate": 0.00015161191904810334, "loss": 1.1573, "step": 2385 }, { "epoch": 0.4850579386054076, "grad_norm": 0.12667550146579742, "learning_rate": 0.00015159157937557206, "loss": 1.0796, "step": 2386 }, { "epoch": 0.485261231957715, "grad_norm": 0.13312119245529175, "learning_rate": 0.00015157123970304079, "loss": 1.261, "step": 2387 }, { "epoch": 0.4854645253100224, "grad_norm": 0.13041463494300842, "learning_rate": 0.0001515509000305095, "loss": 1.0956, "step": 2388 }, { "epoch": 0.4856678186623297, "grad_norm": 0.12114804238080978, "learning_rate": 0.00015153056035797823, "loss": 0.887, "step": 2389 }, { "epoch": 0.4858711120146371, "grad_norm": 0.144356831908226, "learning_rate": 0.00015151022068544699, "loss": 1.1402, "step": 2390 }, { "epoch": 0.4860744053669445, "grad_norm": 0.12829992175102234, "learning_rate": 0.0001514898810129157, "loss": 0.9359, "step": 2391 }, { "epoch": 0.48627769871925186, "grad_norm": 0.12318047136068344, "learning_rate": 0.00015146954134038443, "loss": 1.0306, "step": 2392 }, { "epoch": 0.48648099207155926, "grad_norm": 0.12492537498474121, "learning_rate": 0.00015144920166785316, "loss": 1.0563, "step": 2393 }, { "epoch": 0.48668428542386666, "grad_norm": 0.130072683095932, "learning_rate": 0.00015142886199532188, "loss": 1.1365, "step": 2394 }, { "epoch": 0.486887578776174, "grad_norm": 0.11817184090614319, "learning_rate": 0.0001514085223227906, "loss": 1.0596, "step": 2395 }, { "epoch": 0.4870908721284814, "grad_norm": 0.1323062777519226, "learning_rate": 0.00015138818265025933, "loss": 1.0337, "step": 2396 }, { "epoch": 0.4872941654807888, "grad_norm": 0.13455109298229218, "learning_rate": 0.00015136784297772806, "loss": 1.1477, "step": 2397 }, { "epoch": 0.48749745883309614, "grad_norm": 0.11852074414491653, "learning_rate": 0.0001513475033051968, "loss": 1.1026, "step": 2398 }, { "epoch": 0.48770075218540354, "grad_norm": 0.11237514764070511, "learning_rate": 0.00015132716363266553, "loss": 0.9384, "step": 2399 }, { "epoch": 0.48790404553771094, "grad_norm": 0.10929456353187561, "learning_rate": 0.00015130682396013426, "loss": 0.9844, "step": 2400 }, { "epoch": 0.4881073388900183, "grad_norm": 0.13844764232635498, "learning_rate": 0.00015128648428760298, "loss": 1.1771, "step": 2401 }, { "epoch": 0.4883106322423257, "grad_norm": 0.13155733048915863, "learning_rate": 0.0001512661446150717, "loss": 1.085, "step": 2402 }, { "epoch": 0.4885139255946331, "grad_norm": 0.13567966222763062, "learning_rate": 0.00015124580494254043, "loss": 1.089, "step": 2403 }, { "epoch": 0.4887172189469404, "grad_norm": 0.1349712312221527, "learning_rate": 0.00015122546527000916, "loss": 1.1356, "step": 2404 }, { "epoch": 0.4889205122992478, "grad_norm": 0.11694735288619995, "learning_rate": 0.00015120512559747788, "loss": 0.8803, "step": 2405 }, { "epoch": 0.4891238056515552, "grad_norm": 0.11674166470766068, "learning_rate": 0.00015118478592494663, "loss": 0.9611, "step": 2406 }, { "epoch": 0.48932709900386256, "grad_norm": 0.1268279105424881, "learning_rate": 0.00015116444625241536, "loss": 1.0746, "step": 2407 }, { "epoch": 0.48953039235616996, "grad_norm": 0.1330219954252243, "learning_rate": 0.00015114410657988408, "loss": 1.1476, "step": 2408 }, { "epoch": 0.4897336857084773, "grad_norm": 0.13246414065361023, "learning_rate": 0.0001511237669073528, "loss": 1.0918, "step": 2409 }, { "epoch": 0.4899369790607847, "grad_norm": 0.12214238941669464, "learning_rate": 0.00015110342723482153, "loss": 1.0092, "step": 2410 }, { "epoch": 0.4901402724130921, "grad_norm": 0.1193271204829216, "learning_rate": 0.00015108308756229025, "loss": 0.8615, "step": 2411 }, { "epoch": 0.49034356576539945, "grad_norm": 0.12478460371494293, "learning_rate": 0.00015106274788975898, "loss": 1.0334, "step": 2412 }, { "epoch": 0.49054685911770685, "grad_norm": 0.14054545760154724, "learning_rate": 0.0001510424082172277, "loss": 1.1424, "step": 2413 }, { "epoch": 0.49075015247001424, "grad_norm": 0.15053215622901917, "learning_rate": 0.00015102206854469643, "loss": 1.0335, "step": 2414 }, { "epoch": 0.4909534458223216, "grad_norm": 0.13923850655555725, "learning_rate": 0.00015100172887216518, "loss": 1.1668, "step": 2415 }, { "epoch": 0.491156739174629, "grad_norm": 0.13445380330085754, "learning_rate": 0.0001509813891996339, "loss": 1.1683, "step": 2416 }, { "epoch": 0.4913600325269364, "grad_norm": 0.14007751643657684, "learning_rate": 0.00015096104952710263, "loss": 1.1405, "step": 2417 }, { "epoch": 0.49156332587924373, "grad_norm": 0.1334713101387024, "learning_rate": 0.00015094070985457135, "loss": 1.1468, "step": 2418 }, { "epoch": 0.4917666192315511, "grad_norm": 0.12781627476215363, "learning_rate": 0.00015092037018204008, "loss": 1.1051, "step": 2419 }, { "epoch": 0.4919699125838585, "grad_norm": 0.1371796876192093, "learning_rate": 0.0001509000305095088, "loss": 1.1274, "step": 2420 }, { "epoch": 0.49217320593616587, "grad_norm": 0.15052980184555054, "learning_rate": 0.00015087969083697753, "loss": 1.1741, "step": 2421 }, { "epoch": 0.49237649928847327, "grad_norm": 0.12332694232463837, "learning_rate": 0.00015085935116444625, "loss": 1.0382, "step": 2422 }, { "epoch": 0.49257979264078067, "grad_norm": 0.1108141764998436, "learning_rate": 0.000150839011491915, "loss": 1.0011, "step": 2423 }, { "epoch": 0.492783085993088, "grad_norm": 0.13298697769641876, "learning_rate": 0.00015081867181938373, "loss": 1.1736, "step": 2424 }, { "epoch": 0.4929863793453954, "grad_norm": 0.1383012980222702, "learning_rate": 0.00015079833214685245, "loss": 1.2248, "step": 2425 }, { "epoch": 0.4931896726977028, "grad_norm": 0.13049232959747314, "learning_rate": 0.00015077799247432117, "loss": 1.0214, "step": 2426 }, { "epoch": 0.49339296605001015, "grad_norm": 0.14081017673015594, "learning_rate": 0.0001507576528017899, "loss": 1.1836, "step": 2427 }, { "epoch": 0.49359625940231755, "grad_norm": 0.14135879278182983, "learning_rate": 0.00015073731312925862, "loss": 1.0908, "step": 2428 }, { "epoch": 0.49379955275462495, "grad_norm": 0.12276162207126617, "learning_rate": 0.00015071697345672735, "loss": 1.0615, "step": 2429 }, { "epoch": 0.4940028461069323, "grad_norm": 0.13314439356327057, "learning_rate": 0.00015069663378419607, "loss": 1.0126, "step": 2430 }, { "epoch": 0.4942061394592397, "grad_norm": 0.13110828399658203, "learning_rate": 0.00015067629411166482, "loss": 1.0914, "step": 2431 }, { "epoch": 0.4944094328115471, "grad_norm": 0.14637964963912964, "learning_rate": 0.00015065595443913355, "loss": 1.1895, "step": 2432 }, { "epoch": 0.49461272616385443, "grad_norm": 0.13631272315979004, "learning_rate": 0.00015063561476660227, "loss": 1.0877, "step": 2433 }, { "epoch": 0.49481601951616183, "grad_norm": 0.12627999484539032, "learning_rate": 0.000150615275094071, "loss": 1.0902, "step": 2434 }, { "epoch": 0.4950193128684692, "grad_norm": 0.1452523022890091, "learning_rate": 0.00015059493542153972, "loss": 1.2531, "step": 2435 }, { "epoch": 0.4952226062207766, "grad_norm": 0.12937428057193756, "learning_rate": 0.00015057459574900845, "loss": 1.149, "step": 2436 }, { "epoch": 0.49542589957308397, "grad_norm": 0.1313169300556183, "learning_rate": 0.00015055425607647717, "loss": 1.1124, "step": 2437 }, { "epoch": 0.4956291929253913, "grad_norm": 0.13300736248493195, "learning_rate": 0.0001505339164039459, "loss": 1.2259, "step": 2438 }, { "epoch": 0.4958324862776987, "grad_norm": 0.12567725777626038, "learning_rate": 0.00015051357673141465, "loss": 1.1431, "step": 2439 }, { "epoch": 0.4960357796300061, "grad_norm": 0.12322575598955154, "learning_rate": 0.00015049323705888337, "loss": 1.0824, "step": 2440 }, { "epoch": 0.49623907298231346, "grad_norm": 0.11976869404315948, "learning_rate": 0.0001504728973863521, "loss": 1.057, "step": 2441 }, { "epoch": 0.49644236633462085, "grad_norm": 0.13577309250831604, "learning_rate": 0.00015045255771382082, "loss": 1.1027, "step": 2442 }, { "epoch": 0.49664565968692825, "grad_norm": 0.13949300348758698, "learning_rate": 0.00015043221804128954, "loss": 1.1102, "step": 2443 }, { "epoch": 0.4968489530392356, "grad_norm": 0.1493709832429886, "learning_rate": 0.00015041187836875827, "loss": 1.2468, "step": 2444 }, { "epoch": 0.497052246391543, "grad_norm": 0.13680393993854523, "learning_rate": 0.000150391538696227, "loss": 1.0607, "step": 2445 }, { "epoch": 0.4972555397438504, "grad_norm": 0.12200003862380981, "learning_rate": 0.00015037119902369572, "loss": 0.931, "step": 2446 }, { "epoch": 0.49745883309615774, "grad_norm": 0.12486010044813156, "learning_rate": 0.00015035085935116447, "loss": 1.0529, "step": 2447 }, { "epoch": 0.49766212644846514, "grad_norm": 0.14312241971492767, "learning_rate": 0.0001503305196786332, "loss": 1.1921, "step": 2448 }, { "epoch": 0.49786541980077254, "grad_norm": 0.1357506513595581, "learning_rate": 0.00015031018000610192, "loss": 1.0561, "step": 2449 }, { "epoch": 0.4980687131530799, "grad_norm": 0.14294788241386414, "learning_rate": 0.00015028984033357064, "loss": 0.9931, "step": 2450 }, { "epoch": 0.4982720065053873, "grad_norm": 0.1087241843342781, "learning_rate": 0.00015026950066103937, "loss": 1.0038, "step": 2451 }, { "epoch": 0.4984752998576947, "grad_norm": 0.11965546011924744, "learning_rate": 0.0001502491609885081, "loss": 1.0055, "step": 2452 }, { "epoch": 0.498678593210002, "grad_norm": 0.13440768420696259, "learning_rate": 0.00015022882131597682, "loss": 0.9883, "step": 2453 }, { "epoch": 0.4988818865623094, "grad_norm": 0.12496986985206604, "learning_rate": 0.00015020848164344554, "loss": 1.1175, "step": 2454 }, { "epoch": 0.4990851799146168, "grad_norm": 0.1410161703824997, "learning_rate": 0.00015018814197091427, "loss": 1.0976, "step": 2455 }, { "epoch": 0.49928847326692416, "grad_norm": 0.12262056767940521, "learning_rate": 0.00015016780229838302, "loss": 0.9429, "step": 2456 }, { "epoch": 0.49949176661923156, "grad_norm": 0.12424588203430176, "learning_rate": 0.00015014746262585174, "loss": 1.01, "step": 2457 }, { "epoch": 0.49969505997153896, "grad_norm": 0.12014136463403702, "learning_rate": 0.00015012712295332047, "loss": 1.0769, "step": 2458 }, { "epoch": 0.4998983533238463, "grad_norm": 0.12330099940299988, "learning_rate": 0.00015010678328078916, "loss": 0.9982, "step": 2459 }, { "epoch": 0.5001016466761536, "grad_norm": 0.12830835580825806, "learning_rate": 0.00015008644360825791, "loss": 1.1893, "step": 2460 }, { "epoch": 0.500304940028461, "grad_norm": 0.12516823410987854, "learning_rate": 0.00015006610393572664, "loss": 0.9871, "step": 2461 }, { "epoch": 0.5005082333807684, "grad_norm": 0.13631972670555115, "learning_rate": 0.00015004576426319536, "loss": 1.1454, "step": 2462 }, { "epoch": 0.5007115267330758, "grad_norm": 0.1340373456478119, "learning_rate": 0.0001500254245906641, "loss": 1.0478, "step": 2463 }, { "epoch": 0.5009148200853832, "grad_norm": 0.13719302415847778, "learning_rate": 0.00015000508491813284, "loss": 0.9423, "step": 2464 }, { "epoch": 0.5011181134376906, "grad_norm": 0.12467597424983978, "learning_rate": 0.00014998474524560156, "loss": 1.0493, "step": 2465 }, { "epoch": 0.5013214067899979, "grad_norm": 0.11926814168691635, "learning_rate": 0.0001499644055730703, "loss": 0.9844, "step": 2466 }, { "epoch": 0.5015247001423053, "grad_norm": 0.12327981740236282, "learning_rate": 0.00014994406590053899, "loss": 1.0336, "step": 2467 }, { "epoch": 0.5017279934946127, "grad_norm": 0.126510351896286, "learning_rate": 0.00014992372622800774, "loss": 1.0061, "step": 2468 }, { "epoch": 0.5019312868469201, "grad_norm": 0.12146785855293274, "learning_rate": 0.00014990338655547646, "loss": 1.011, "step": 2469 }, { "epoch": 0.5021345801992275, "grad_norm": 0.12402217090129852, "learning_rate": 0.00014988304688294519, "loss": 1.0177, "step": 2470 }, { "epoch": 0.5023378735515349, "grad_norm": 0.13122454285621643, "learning_rate": 0.0001498627072104139, "loss": 0.9743, "step": 2471 }, { "epoch": 0.5025411669038422, "grad_norm": 0.13217094540596008, "learning_rate": 0.00014984236753788266, "loss": 1.0227, "step": 2472 }, { "epoch": 0.5027444602561496, "grad_norm": 0.14987598359584808, "learning_rate": 0.0001498220278653514, "loss": 1.1522, "step": 2473 }, { "epoch": 0.502947753608457, "grad_norm": 0.13689711689949036, "learning_rate": 0.0001498016881928201, "loss": 1.0014, "step": 2474 }, { "epoch": 0.5031510469607644, "grad_norm": 0.11815892159938812, "learning_rate": 0.00014978134852028884, "loss": 0.8749, "step": 2475 }, { "epoch": 0.5033543403130718, "grad_norm": 0.11772647500038147, "learning_rate": 0.00014976100884775756, "loss": 0.9894, "step": 2476 }, { "epoch": 0.5035576336653791, "grad_norm": 0.13443076610565186, "learning_rate": 0.00014974066917522628, "loss": 1.1191, "step": 2477 }, { "epoch": 0.5037609270176865, "grad_norm": 0.13787920773029327, "learning_rate": 0.000149720329502695, "loss": 1.0513, "step": 2478 }, { "epoch": 0.5039642203699939, "grad_norm": 0.13152827322483063, "learning_rate": 0.00014969998983016373, "loss": 1.1012, "step": 2479 }, { "epoch": 0.5041675137223013, "grad_norm": 0.12392322719097137, "learning_rate": 0.00014967965015763249, "loss": 1.0068, "step": 2480 }, { "epoch": 0.5043708070746087, "grad_norm": 0.13253094255924225, "learning_rate": 0.0001496593104851012, "loss": 1.0683, "step": 2481 }, { "epoch": 0.5045741004269161, "grad_norm": 0.12664328515529633, "learning_rate": 0.00014963897081256993, "loss": 1.0333, "step": 2482 }, { "epoch": 0.5047773937792234, "grad_norm": 0.13020643591880798, "learning_rate": 0.00014961863114003866, "loss": 1.0567, "step": 2483 }, { "epoch": 0.5049806871315308, "grad_norm": 0.1261332482099533, "learning_rate": 0.00014959829146750738, "loss": 0.9574, "step": 2484 }, { "epoch": 0.5051839804838382, "grad_norm": 0.13825035095214844, "learning_rate": 0.0001495779517949761, "loss": 1.2168, "step": 2485 }, { "epoch": 0.5053872738361456, "grad_norm": 0.1333974152803421, "learning_rate": 0.00014955761212244483, "loss": 1.0775, "step": 2486 }, { "epoch": 0.505590567188453, "grad_norm": 0.12436322122812271, "learning_rate": 0.00014953727244991356, "loss": 1.0043, "step": 2487 }, { "epoch": 0.5057938605407604, "grad_norm": 0.13626371324062347, "learning_rate": 0.0001495169327773823, "loss": 1.1736, "step": 2488 }, { "epoch": 0.5059971538930677, "grad_norm": 0.13061967492103577, "learning_rate": 0.00014949659310485103, "loss": 0.9288, "step": 2489 }, { "epoch": 0.506200447245375, "grad_norm": 0.12033544480800629, "learning_rate": 0.00014947625343231976, "loss": 1.0222, "step": 2490 }, { "epoch": 0.5064037405976825, "grad_norm": 0.14046040177345276, "learning_rate": 0.00014945591375978848, "loss": 1.1382, "step": 2491 }, { "epoch": 0.5066070339499898, "grad_norm": 0.1174360066652298, "learning_rate": 0.0001494355740872572, "loss": 0.8663, "step": 2492 }, { "epoch": 0.5068103273022972, "grad_norm": 0.13645724952220917, "learning_rate": 0.00014941523441472593, "loss": 1.1139, "step": 2493 }, { "epoch": 0.5070136206546046, "grad_norm": 0.1309158205986023, "learning_rate": 0.00014939489474219465, "loss": 1.1784, "step": 2494 }, { "epoch": 0.5072169140069119, "grad_norm": 0.12230408936738968, "learning_rate": 0.00014937455506966338, "loss": 1.1231, "step": 2495 }, { "epoch": 0.5074202073592193, "grad_norm": 0.1440531313419342, "learning_rate": 0.0001493542153971321, "loss": 1.2055, "step": 2496 }, { "epoch": 0.5076235007115267, "grad_norm": 0.13199447095394135, "learning_rate": 0.00014933387572460086, "loss": 1.1044, "step": 2497 }, { "epoch": 0.5078267940638341, "grad_norm": 0.12693634629249573, "learning_rate": 0.00014931353605206958, "loss": 1.0495, "step": 2498 }, { "epoch": 0.5080300874161415, "grad_norm": 0.1196681559085846, "learning_rate": 0.0001492931963795383, "loss": 0.9505, "step": 2499 }, { "epoch": 0.5082333807684488, "grad_norm": 0.1331620216369629, "learning_rate": 0.000149272856707007, "loss": 1.0419, "step": 2500 }, { "epoch": 0.5084366741207562, "grad_norm": 0.12307044863700867, "learning_rate": 0.00014925251703447575, "loss": 1.0216, "step": 2501 }, { "epoch": 0.5086399674730636, "grad_norm": 0.14936399459838867, "learning_rate": 0.00014923217736194448, "loss": 1.2894, "step": 2502 }, { "epoch": 0.508843260825371, "grad_norm": 0.1165819764137268, "learning_rate": 0.0001492118376894132, "loss": 1.0173, "step": 2503 }, { "epoch": 0.5090465541776784, "grad_norm": 0.13525764644145966, "learning_rate": 0.00014919149801688193, "loss": 1.0883, "step": 2504 }, { "epoch": 0.5092498475299858, "grad_norm": 0.13654504716396332, "learning_rate": 0.00014917115834435068, "loss": 0.9356, "step": 2505 }, { "epoch": 0.5094531408822931, "grad_norm": 0.12151267379522324, "learning_rate": 0.0001491508186718194, "loss": 0.9508, "step": 2506 }, { "epoch": 0.5096564342346005, "grad_norm": 0.13334833085536957, "learning_rate": 0.00014913047899928813, "loss": 1.2175, "step": 2507 }, { "epoch": 0.5098597275869079, "grad_norm": 0.13975641131401062, "learning_rate": 0.00014911013932675682, "loss": 1.0501, "step": 2508 }, { "epoch": 0.5100630209392153, "grad_norm": 0.13203707337379456, "learning_rate": 0.00014908979965422558, "loss": 0.9835, "step": 2509 }, { "epoch": 0.5102663142915227, "grad_norm": 0.154182568192482, "learning_rate": 0.0001490694599816943, "loss": 1.1632, "step": 2510 }, { "epoch": 0.5104696076438301, "grad_norm": 0.13297821581363678, "learning_rate": 0.00014904912030916302, "loss": 0.9965, "step": 2511 }, { "epoch": 0.5106729009961374, "grad_norm": 0.123105987906456, "learning_rate": 0.00014902878063663175, "loss": 0.9264, "step": 2512 }, { "epoch": 0.5108761943484448, "grad_norm": 0.1457197219133377, "learning_rate": 0.0001490084409641005, "loss": 1.1452, "step": 2513 }, { "epoch": 0.5110794877007522, "grad_norm": 0.12882955372333527, "learning_rate": 0.00014898810129156923, "loss": 1.0295, "step": 2514 }, { "epoch": 0.5112827810530596, "grad_norm": 0.1381346881389618, "learning_rate": 0.00014896776161903795, "loss": 1.1416, "step": 2515 }, { "epoch": 0.511486074405367, "grad_norm": 0.12074743956327438, "learning_rate": 0.00014894742194650665, "loss": 0.9549, "step": 2516 }, { "epoch": 0.5116893677576744, "grad_norm": 0.12559756636619568, "learning_rate": 0.0001489270822739754, "loss": 1.014, "step": 2517 }, { "epoch": 0.5118926611099817, "grad_norm": 0.13586939871311188, "learning_rate": 0.00014890674260144412, "loss": 1.1621, "step": 2518 }, { "epoch": 0.5120959544622891, "grad_norm": 0.1177433580160141, "learning_rate": 0.00014888640292891285, "loss": 0.9666, "step": 2519 }, { "epoch": 0.5122992478145965, "grad_norm": 0.12881316244602203, "learning_rate": 0.00014886606325638157, "loss": 1.1345, "step": 2520 }, { "epoch": 0.5125025411669039, "grad_norm": 0.1258634775876999, "learning_rate": 0.00014884572358385032, "loss": 1.05, "step": 2521 }, { "epoch": 0.5127058345192113, "grad_norm": 0.12486784160137177, "learning_rate": 0.00014882538391131905, "loss": 1.144, "step": 2522 }, { "epoch": 0.5129091278715187, "grad_norm": 0.13641564548015594, "learning_rate": 0.00014880504423878777, "loss": 1.2183, "step": 2523 }, { "epoch": 0.5131124212238259, "grad_norm": 0.13277971744537354, "learning_rate": 0.00014878470456625647, "loss": 1.1815, "step": 2524 }, { "epoch": 0.5133157145761333, "grad_norm": 0.14261163771152496, "learning_rate": 0.00014876436489372522, "loss": 1.217, "step": 2525 }, { "epoch": 0.5135190079284407, "grad_norm": 0.13848505914211273, "learning_rate": 0.00014874402522119395, "loss": 1.2031, "step": 2526 }, { "epoch": 0.5137223012807481, "grad_norm": 0.10906849801540375, "learning_rate": 0.00014872368554866267, "loss": 0.9407, "step": 2527 }, { "epoch": 0.5139255946330555, "grad_norm": 0.13533109426498413, "learning_rate": 0.0001487033458761314, "loss": 1.1576, "step": 2528 }, { "epoch": 0.5141288879853628, "grad_norm": 0.13062264025211334, "learning_rate": 0.00014868300620360015, "loss": 1.1019, "step": 2529 }, { "epoch": 0.5143321813376702, "grad_norm": 0.1373278796672821, "learning_rate": 0.00014866266653106887, "loss": 0.9672, "step": 2530 }, { "epoch": 0.5145354746899776, "grad_norm": 0.15875272452831268, "learning_rate": 0.0001486423268585376, "loss": 1.3291, "step": 2531 }, { "epoch": 0.514738768042285, "grad_norm": 0.1146063432097435, "learning_rate": 0.0001486219871860063, "loss": 1.0362, "step": 2532 }, { "epoch": 0.5149420613945924, "grad_norm": 0.13759560883045197, "learning_rate": 0.00014860164751347504, "loss": 1.0423, "step": 2533 }, { "epoch": 0.5151453547468998, "grad_norm": 0.1348053216934204, "learning_rate": 0.00014858130784094377, "loss": 1.0733, "step": 2534 }, { "epoch": 0.5153486480992071, "grad_norm": 0.12033452838659286, "learning_rate": 0.0001485609681684125, "loss": 0.9471, "step": 2535 }, { "epoch": 0.5155519414515145, "grad_norm": 0.12116893380880356, "learning_rate": 0.00014854062849588122, "loss": 0.8554, "step": 2536 }, { "epoch": 0.5157552348038219, "grad_norm": 0.13480456173419952, "learning_rate": 0.00014852028882334994, "loss": 1.0257, "step": 2537 }, { "epoch": 0.5159585281561293, "grad_norm": 0.1279120147228241, "learning_rate": 0.0001484999491508187, "loss": 1.1841, "step": 2538 }, { "epoch": 0.5161618215084367, "grad_norm": 0.12960465252399445, "learning_rate": 0.00014847960947828742, "loss": 1.022, "step": 2539 }, { "epoch": 0.5163651148607441, "grad_norm": 0.12386467307806015, "learning_rate": 0.00014845926980575614, "loss": 0.9364, "step": 2540 }, { "epoch": 0.5165684082130514, "grad_norm": 0.1340230405330658, "learning_rate": 0.00014843893013322484, "loss": 1.1693, "step": 2541 }, { "epoch": 0.5167717015653588, "grad_norm": 0.13475503027439117, "learning_rate": 0.0001484185904606936, "loss": 1.1208, "step": 2542 }, { "epoch": 0.5169749949176662, "grad_norm": 0.13605645298957825, "learning_rate": 0.00014839825078816232, "loss": 1.0327, "step": 2543 }, { "epoch": 0.5171782882699736, "grad_norm": 0.11159854382276535, "learning_rate": 0.00014837791111563104, "loss": 0.9095, "step": 2544 }, { "epoch": 0.517381581622281, "grad_norm": 0.12562917172908783, "learning_rate": 0.00014835757144309976, "loss": 1.0037, "step": 2545 }, { "epoch": 0.5175848749745884, "grad_norm": 0.12805363535881042, "learning_rate": 0.00014833723177056852, "loss": 1.053, "step": 2546 }, { "epoch": 0.5177881683268957, "grad_norm": 0.1303015947341919, "learning_rate": 0.00014831689209803724, "loss": 1.0277, "step": 2547 }, { "epoch": 0.5179914616792031, "grad_norm": 0.13903219997882843, "learning_rate": 0.00014829655242550597, "loss": 1.1639, "step": 2548 }, { "epoch": 0.5181947550315105, "grad_norm": 0.13119028508663177, "learning_rate": 0.00014827621275297466, "loss": 0.9134, "step": 2549 }, { "epoch": 0.5183980483838179, "grad_norm": 0.12713825702667236, "learning_rate": 0.00014825587308044341, "loss": 1.0313, "step": 2550 }, { "epoch": 0.5186013417361253, "grad_norm": 0.13641834259033203, "learning_rate": 0.00014823553340791214, "loss": 1.0787, "step": 2551 }, { "epoch": 0.5188046350884326, "grad_norm": 0.1124555915594101, "learning_rate": 0.00014821519373538086, "loss": 0.9135, "step": 2552 }, { "epoch": 0.51900792844074, "grad_norm": 0.10946158319711685, "learning_rate": 0.0001481948540628496, "loss": 0.8105, "step": 2553 }, { "epoch": 0.5192112217930474, "grad_norm": 0.12753844261169434, "learning_rate": 0.00014817451439031834, "loss": 1.0308, "step": 2554 }, { "epoch": 0.5194145151453547, "grad_norm": 0.14424805343151093, "learning_rate": 0.00014815417471778706, "loss": 1.0104, "step": 2555 }, { "epoch": 0.5196178084976621, "grad_norm": 0.13107620179653168, "learning_rate": 0.0001481338350452558, "loss": 1.1754, "step": 2556 }, { "epoch": 0.5198211018499695, "grad_norm": 0.11977977305650711, "learning_rate": 0.00014811349537272449, "loss": 1.0019, "step": 2557 }, { "epoch": 0.5200243952022768, "grad_norm": 0.11917620897293091, "learning_rate": 0.00014809315570019324, "loss": 0.9636, "step": 2558 }, { "epoch": 0.5202276885545842, "grad_norm": 0.12576279044151306, "learning_rate": 0.00014807281602766196, "loss": 1.1341, "step": 2559 }, { "epoch": 0.5204309819068916, "grad_norm": 0.1402411311864853, "learning_rate": 0.00014805247635513069, "loss": 1.1186, "step": 2560 }, { "epoch": 0.520634275259199, "grad_norm": 0.15055212378501892, "learning_rate": 0.0001480321366825994, "loss": 1.1878, "step": 2561 }, { "epoch": 0.5208375686115064, "grad_norm": 0.11402598023414612, "learning_rate": 0.00014801179701006816, "loss": 0.7465, "step": 2562 }, { "epoch": 0.5210408619638138, "grad_norm": 0.12650637328624725, "learning_rate": 0.0001479914573375369, "loss": 1.0605, "step": 2563 }, { "epoch": 0.5212441553161211, "grad_norm": 0.13538390398025513, "learning_rate": 0.0001479711176650056, "loss": 0.9924, "step": 2564 }, { "epoch": 0.5214474486684285, "grad_norm": 0.12981672585010529, "learning_rate": 0.0001479507779924743, "loss": 1.0908, "step": 2565 }, { "epoch": 0.5216507420207359, "grad_norm": 0.13389542698860168, "learning_rate": 0.00014793043831994306, "loss": 1.0369, "step": 2566 }, { "epoch": 0.5218540353730433, "grad_norm": 0.1256348341703415, "learning_rate": 0.00014791009864741178, "loss": 1.1209, "step": 2567 }, { "epoch": 0.5220573287253507, "grad_norm": 0.13984240591526031, "learning_rate": 0.0001478897589748805, "loss": 1.2199, "step": 2568 }, { "epoch": 0.5222606220776581, "grad_norm": 0.12872397899627686, "learning_rate": 0.00014786941930234923, "loss": 1.0793, "step": 2569 }, { "epoch": 0.5224639154299654, "grad_norm": 0.12694962322711945, "learning_rate": 0.00014784907962981798, "loss": 0.9623, "step": 2570 }, { "epoch": 0.5226672087822728, "grad_norm": 0.13034392893314362, "learning_rate": 0.0001478287399572867, "loss": 1.2404, "step": 2571 }, { "epoch": 0.5228705021345802, "grad_norm": 0.1416521519422531, "learning_rate": 0.00014780840028475543, "loss": 1.2426, "step": 2572 }, { "epoch": 0.5230737954868876, "grad_norm": 0.12421387434005737, "learning_rate": 0.00014778806061222413, "loss": 1.0685, "step": 2573 }, { "epoch": 0.523277088839195, "grad_norm": 0.1387767344713211, "learning_rate": 0.00014776772093969288, "loss": 1.202, "step": 2574 }, { "epoch": 0.5234803821915024, "grad_norm": 0.13308827579021454, "learning_rate": 0.0001477473812671616, "loss": 1.2395, "step": 2575 }, { "epoch": 0.5236836755438097, "grad_norm": 0.15293751657009125, "learning_rate": 0.00014772704159463033, "loss": 1.1062, "step": 2576 }, { "epoch": 0.5238869688961171, "grad_norm": 0.1332782655954361, "learning_rate": 0.00014770670192209906, "loss": 1.1205, "step": 2577 }, { "epoch": 0.5240902622484245, "grad_norm": 0.11857607960700989, "learning_rate": 0.0001476863622495678, "loss": 1.1111, "step": 2578 }, { "epoch": 0.5242935556007319, "grad_norm": 0.13509269058704376, "learning_rate": 0.00014766602257703653, "loss": 1.0806, "step": 2579 }, { "epoch": 0.5244968489530393, "grad_norm": 0.12904144823551178, "learning_rate": 0.00014764568290450526, "loss": 1.021, "step": 2580 }, { "epoch": 0.5247001423053466, "grad_norm": 0.1381101906299591, "learning_rate": 0.00014762534323197395, "loss": 1.2025, "step": 2581 }, { "epoch": 0.524903435657654, "grad_norm": 0.13160142302513123, "learning_rate": 0.00014760500355944268, "loss": 1.0126, "step": 2582 }, { "epoch": 0.5251067290099614, "grad_norm": 0.14287696778774261, "learning_rate": 0.00014758466388691143, "loss": 1.1466, "step": 2583 }, { "epoch": 0.5253100223622688, "grad_norm": 0.13337363302707672, "learning_rate": 0.00014756432421438015, "loss": 1.0036, "step": 2584 }, { "epoch": 0.5255133157145762, "grad_norm": 0.14575807750225067, "learning_rate": 0.00014754398454184888, "loss": 1.1033, "step": 2585 }, { "epoch": 0.5257166090668836, "grad_norm": 0.12519006431102753, "learning_rate": 0.0001475236448693176, "loss": 1.004, "step": 2586 }, { "epoch": 0.5259199024191908, "grad_norm": 0.12951436638832092, "learning_rate": 0.00014750330519678635, "loss": 0.9679, "step": 2587 }, { "epoch": 0.5261231957714982, "grad_norm": 0.1465519517660141, "learning_rate": 0.00014748296552425508, "loss": 1.19, "step": 2588 }, { "epoch": 0.5263264891238056, "grad_norm": 0.12192967534065247, "learning_rate": 0.00014746262585172378, "loss": 1.0086, "step": 2589 }, { "epoch": 0.526529782476113, "grad_norm": 0.13444490730762482, "learning_rate": 0.0001474422861791925, "loss": 0.9185, "step": 2590 }, { "epoch": 0.5267330758284204, "grad_norm": 0.13128428161144257, "learning_rate": 0.00014742194650666125, "loss": 0.8979, "step": 2591 }, { "epoch": 0.5269363691807278, "grad_norm": 0.14445891976356506, "learning_rate": 0.00014740160683412998, "loss": 1.1736, "step": 2592 }, { "epoch": 0.5271396625330351, "grad_norm": 0.13069060444831848, "learning_rate": 0.0001473812671615987, "loss": 1.0791, "step": 2593 }, { "epoch": 0.5273429558853425, "grad_norm": 0.11903716623783112, "learning_rate": 0.00014736092748906743, "loss": 1.0983, "step": 2594 }, { "epoch": 0.5275462492376499, "grad_norm": 0.14502301812171936, "learning_rate": 0.00014734058781653618, "loss": 1.1266, "step": 2595 }, { "epoch": 0.5277495425899573, "grad_norm": 0.12276476621627808, "learning_rate": 0.0001473202481440049, "loss": 0.9658, "step": 2596 }, { "epoch": 0.5279528359422647, "grad_norm": 0.1322438269853592, "learning_rate": 0.00014729990847147363, "loss": 1.068, "step": 2597 }, { "epoch": 0.5281561292945721, "grad_norm": 0.12933704257011414, "learning_rate": 0.00014727956879894232, "loss": 0.9955, "step": 2598 }, { "epoch": 0.5283594226468794, "grad_norm": 0.13503174483776093, "learning_rate": 0.00014725922912641108, "loss": 1.117, "step": 2599 }, { "epoch": 0.5285627159991868, "grad_norm": 0.13893373310565948, "learning_rate": 0.0001472388894538798, "loss": 1.1355, "step": 2600 }, { "epoch": 0.5287660093514942, "grad_norm": 0.13064657151699066, "learning_rate": 0.00014721854978134852, "loss": 0.9111, "step": 2601 }, { "epoch": 0.5289693027038016, "grad_norm": 0.13640174269676208, "learning_rate": 0.00014719821010881725, "loss": 1.1903, "step": 2602 }, { "epoch": 0.529172596056109, "grad_norm": 0.13113752007484436, "learning_rate": 0.000147177870436286, "loss": 0.926, "step": 2603 }, { "epoch": 0.5293758894084164, "grad_norm": 0.15011656284332275, "learning_rate": 0.00014715753076375472, "loss": 1.0861, "step": 2604 }, { "epoch": 0.5295791827607237, "grad_norm": 0.1330660730600357, "learning_rate": 0.00014713719109122345, "loss": 1.1027, "step": 2605 }, { "epoch": 0.5297824761130311, "grad_norm": 0.1252673864364624, "learning_rate": 0.00014711685141869215, "loss": 0.9304, "step": 2606 }, { "epoch": 0.5299857694653385, "grad_norm": 0.12724831700325012, "learning_rate": 0.0001470965117461609, "loss": 1.0286, "step": 2607 }, { "epoch": 0.5301890628176459, "grad_norm": 0.12352915853261948, "learning_rate": 0.00014707617207362962, "loss": 1.0159, "step": 2608 }, { "epoch": 0.5303923561699533, "grad_norm": 0.1302500218153, "learning_rate": 0.00014705583240109835, "loss": 1.0642, "step": 2609 }, { "epoch": 0.5305956495222606, "grad_norm": 0.12427016347646713, "learning_rate": 0.00014703549272856707, "loss": 0.9496, "step": 2610 }, { "epoch": 0.530798942874568, "grad_norm": 0.13810168206691742, "learning_rate": 0.00014701515305603582, "loss": 1.0421, "step": 2611 }, { "epoch": 0.5310022362268754, "grad_norm": 0.1359987109899521, "learning_rate": 0.00014699481338350455, "loss": 0.9605, "step": 2612 }, { "epoch": 0.5312055295791828, "grad_norm": 0.1282379925251007, "learning_rate": 0.00014697447371097327, "loss": 1.0654, "step": 2613 }, { "epoch": 0.5314088229314902, "grad_norm": 0.1283995509147644, "learning_rate": 0.00014695413403844197, "loss": 1.0312, "step": 2614 }, { "epoch": 0.5316121162837976, "grad_norm": 0.12052475661039352, "learning_rate": 0.00014693379436591072, "loss": 1.1057, "step": 2615 }, { "epoch": 0.5318154096361049, "grad_norm": 0.13645312190055847, "learning_rate": 0.00014691345469337945, "loss": 1.0701, "step": 2616 }, { "epoch": 0.5320187029884123, "grad_norm": 0.13875778019428253, "learning_rate": 0.00014689311502084817, "loss": 1.2868, "step": 2617 }, { "epoch": 0.5322219963407196, "grad_norm": 0.12762780487537384, "learning_rate": 0.0001468727753483169, "loss": 0.9696, "step": 2618 }, { "epoch": 0.532425289693027, "grad_norm": 0.14250846207141876, "learning_rate": 0.00014685243567578565, "loss": 1.1762, "step": 2619 }, { "epoch": 0.5326285830453344, "grad_norm": 0.10621387511491776, "learning_rate": 0.00014683209600325437, "loss": 0.8195, "step": 2620 }, { "epoch": 0.5328318763976418, "grad_norm": 0.14604990184307098, "learning_rate": 0.0001468117563307231, "loss": 1.0794, "step": 2621 }, { "epoch": 0.5330351697499491, "grad_norm": 0.13326723873615265, "learning_rate": 0.0001467914166581918, "loss": 1.0018, "step": 2622 }, { "epoch": 0.5332384631022565, "grad_norm": 0.12089519202709198, "learning_rate": 0.00014677107698566052, "loss": 1.0615, "step": 2623 }, { "epoch": 0.5334417564545639, "grad_norm": 0.1269814819097519, "learning_rate": 0.00014675073731312927, "loss": 0.9591, "step": 2624 }, { "epoch": 0.5336450498068713, "grad_norm": 0.13674674928188324, "learning_rate": 0.000146730397640598, "loss": 1.0315, "step": 2625 }, { "epoch": 0.5338483431591787, "grad_norm": 0.1372392177581787, "learning_rate": 0.00014671005796806672, "loss": 1.0483, "step": 2626 }, { "epoch": 0.5340516365114861, "grad_norm": 0.12088494002819061, "learning_rate": 0.00014668971829553544, "loss": 0.9172, "step": 2627 }, { "epoch": 0.5342549298637934, "grad_norm": 0.1240740641951561, "learning_rate": 0.0001466693786230042, "loss": 1.0149, "step": 2628 }, { "epoch": 0.5344582232161008, "grad_norm": 0.13450276851654053, "learning_rate": 0.00014664903895047292, "loss": 1.0719, "step": 2629 }, { "epoch": 0.5346615165684082, "grad_norm": 0.12809321284294128, "learning_rate": 0.00014662869927794162, "loss": 0.9452, "step": 2630 }, { "epoch": 0.5348648099207156, "grad_norm": 0.1411091536283493, "learning_rate": 0.00014660835960541034, "loss": 1.2103, "step": 2631 }, { "epoch": 0.535068103273023, "grad_norm": 0.12086781114339828, "learning_rate": 0.0001465880199328791, "loss": 0.8857, "step": 2632 }, { "epoch": 0.5352713966253303, "grad_norm": 0.13093651831150055, "learning_rate": 0.00014656768026034782, "loss": 1.1842, "step": 2633 }, { "epoch": 0.5354746899776377, "grad_norm": 0.11652904748916626, "learning_rate": 0.00014654734058781654, "loss": 1.0096, "step": 2634 }, { "epoch": 0.5356779833299451, "grad_norm": 0.13243702054023743, "learning_rate": 0.00014652700091528526, "loss": 1.1378, "step": 2635 }, { "epoch": 0.5358812766822525, "grad_norm": 0.14085280895233154, "learning_rate": 0.00014650666124275402, "loss": 1.0759, "step": 2636 }, { "epoch": 0.5360845700345599, "grad_norm": 0.126717671751976, "learning_rate": 0.00014648632157022274, "loss": 1.0012, "step": 2637 }, { "epoch": 0.5362878633868673, "grad_norm": 0.12660568952560425, "learning_rate": 0.00014646598189769144, "loss": 0.9195, "step": 2638 }, { "epoch": 0.5364911567391746, "grad_norm": 0.12521329522132874, "learning_rate": 0.00014644564222516016, "loss": 1.1181, "step": 2639 }, { "epoch": 0.536694450091482, "grad_norm": 0.1392340511083603, "learning_rate": 0.00014642530255262891, "loss": 1.1255, "step": 2640 }, { "epoch": 0.5368977434437894, "grad_norm": 0.1406872570514679, "learning_rate": 0.00014640496288009764, "loss": 1.0753, "step": 2641 }, { "epoch": 0.5371010367960968, "grad_norm": 0.12615209817886353, "learning_rate": 0.00014638462320756636, "loss": 0.9859, "step": 2642 }, { "epoch": 0.5373043301484042, "grad_norm": 0.12144862115383148, "learning_rate": 0.0001463642835350351, "loss": 0.9186, "step": 2643 }, { "epoch": 0.5375076235007116, "grad_norm": 0.12902086973190308, "learning_rate": 0.00014634394386250384, "loss": 1.1739, "step": 2644 }, { "epoch": 0.5377109168530189, "grad_norm": 0.12960048019886017, "learning_rate": 0.00014632360418997256, "loss": 1.0439, "step": 2645 }, { "epoch": 0.5379142102053263, "grad_norm": 0.12488772720098495, "learning_rate": 0.00014630326451744126, "loss": 1.0363, "step": 2646 }, { "epoch": 0.5381175035576337, "grad_norm": 0.14255747199058533, "learning_rate": 0.00014628292484490999, "loss": 1.1972, "step": 2647 }, { "epoch": 0.5383207969099411, "grad_norm": 0.11950040608644485, "learning_rate": 0.00014626258517237874, "loss": 0.931, "step": 2648 }, { "epoch": 0.5385240902622485, "grad_norm": 0.1382722705602646, "learning_rate": 0.00014624224549984746, "loss": 1.1622, "step": 2649 }, { "epoch": 0.5387273836145559, "grad_norm": 0.13348785042762756, "learning_rate": 0.00014622190582731619, "loss": 1.1186, "step": 2650 }, { "epoch": 0.5389306769668631, "grad_norm": 0.1255137175321579, "learning_rate": 0.0001462015661547849, "loss": 1.1545, "step": 2651 }, { "epoch": 0.5391339703191705, "grad_norm": 0.12063666433095932, "learning_rate": 0.00014618122648225366, "loss": 0.9628, "step": 2652 }, { "epoch": 0.5393372636714779, "grad_norm": 0.1361551582813263, "learning_rate": 0.00014616088680972239, "loss": 1.1738, "step": 2653 }, { "epoch": 0.5395405570237853, "grad_norm": 0.14640627801418304, "learning_rate": 0.0001461405471371911, "loss": 1.2436, "step": 2654 }, { "epoch": 0.5397438503760927, "grad_norm": 0.13391757011413574, "learning_rate": 0.0001461202074646598, "loss": 1.0063, "step": 2655 }, { "epoch": 0.5399471437284001, "grad_norm": 0.13022476434707642, "learning_rate": 0.00014609986779212856, "loss": 0.9757, "step": 2656 }, { "epoch": 0.5401504370807074, "grad_norm": 0.12605974078178406, "learning_rate": 0.00014607952811959728, "loss": 1.0168, "step": 2657 }, { "epoch": 0.5403537304330148, "grad_norm": 0.12972256541252136, "learning_rate": 0.000146059188447066, "loss": 1.1375, "step": 2658 }, { "epoch": 0.5405570237853222, "grad_norm": 0.12093812972307205, "learning_rate": 0.00014603884877453473, "loss": 1.0279, "step": 2659 }, { "epoch": 0.5407603171376296, "grad_norm": 0.13197238743305206, "learning_rate": 0.00014601850910200348, "loss": 1.1084, "step": 2660 }, { "epoch": 0.540963610489937, "grad_norm": 0.14289307594299316, "learning_rate": 0.0001459981694294722, "loss": 0.9985, "step": 2661 }, { "epoch": 0.5411669038422443, "grad_norm": 0.12929311394691467, "learning_rate": 0.00014597782975694093, "loss": 1.1129, "step": 2662 }, { "epoch": 0.5413701971945517, "grad_norm": 0.12893937528133392, "learning_rate": 0.00014595749008440963, "loss": 0.9588, "step": 2663 }, { "epoch": 0.5415734905468591, "grad_norm": 0.1215519979596138, "learning_rate": 0.00014593715041187836, "loss": 1.0356, "step": 2664 }, { "epoch": 0.5417767838991665, "grad_norm": 0.12775017321109772, "learning_rate": 0.0001459168107393471, "loss": 0.9293, "step": 2665 }, { "epoch": 0.5419800772514739, "grad_norm": 0.13559330999851227, "learning_rate": 0.00014589647106681583, "loss": 1.0579, "step": 2666 }, { "epoch": 0.5421833706037813, "grad_norm": 0.13883750140666962, "learning_rate": 0.00014587613139428456, "loss": 1.1288, "step": 2667 }, { "epoch": 0.5423866639560886, "grad_norm": 0.12956243753433228, "learning_rate": 0.00014585579172175328, "loss": 1.0156, "step": 2668 }, { "epoch": 0.542589957308396, "grad_norm": 0.12133780866861343, "learning_rate": 0.00014583545204922203, "loss": 1.0124, "step": 2669 }, { "epoch": 0.5427932506607034, "grad_norm": 0.13446684181690216, "learning_rate": 0.00014581511237669076, "loss": 1.1288, "step": 2670 }, { "epoch": 0.5429965440130108, "grad_norm": 0.1329856663942337, "learning_rate": 0.00014579477270415945, "loss": 1.009, "step": 2671 }, { "epoch": 0.5431998373653182, "grad_norm": 0.1257416158914566, "learning_rate": 0.00014577443303162818, "loss": 0.9678, "step": 2672 }, { "epoch": 0.5434031307176256, "grad_norm": 0.118684783577919, "learning_rate": 0.00014575409335909693, "loss": 0.9732, "step": 2673 }, { "epoch": 0.5436064240699329, "grad_norm": 0.12146252393722534, "learning_rate": 0.00014573375368656565, "loss": 1.0225, "step": 2674 }, { "epoch": 0.5438097174222403, "grad_norm": 0.13205134868621826, "learning_rate": 0.00014571341401403438, "loss": 1.1626, "step": 2675 }, { "epoch": 0.5440130107745477, "grad_norm": 0.1180446445941925, "learning_rate": 0.0001456930743415031, "loss": 0.9787, "step": 2676 }, { "epoch": 0.5442163041268551, "grad_norm": 0.12436480075120926, "learning_rate": 0.00014567273466897185, "loss": 0.9211, "step": 2677 }, { "epoch": 0.5444195974791625, "grad_norm": 0.13441622257232666, "learning_rate": 0.00014565239499644058, "loss": 1.082, "step": 2678 }, { "epoch": 0.5446228908314699, "grad_norm": 0.13546496629714966, "learning_rate": 0.00014563205532390928, "loss": 0.9564, "step": 2679 }, { "epoch": 0.5448261841837772, "grad_norm": 0.13210104405879974, "learning_rate": 0.000145611715651378, "loss": 1.1043, "step": 2680 }, { "epoch": 0.5450294775360846, "grad_norm": 0.12021714448928833, "learning_rate": 0.00014559137597884675, "loss": 1.0093, "step": 2681 }, { "epoch": 0.545232770888392, "grad_norm": 0.14060239493846893, "learning_rate": 0.00014557103630631548, "loss": 0.8708, "step": 2682 }, { "epoch": 0.5454360642406993, "grad_norm": 0.10503846406936646, "learning_rate": 0.0001455506966337842, "loss": 0.8049, "step": 2683 }, { "epoch": 0.5456393575930067, "grad_norm": 0.1391855627298355, "learning_rate": 0.00014553035696125293, "loss": 1.1862, "step": 2684 }, { "epoch": 0.545842650945314, "grad_norm": 0.13078033924102783, "learning_rate": 0.00014551001728872168, "loss": 1.0238, "step": 2685 }, { "epoch": 0.5460459442976214, "grad_norm": 0.12442688643932343, "learning_rate": 0.0001454896776161904, "loss": 0.9966, "step": 2686 }, { "epoch": 0.5462492376499288, "grad_norm": 0.11848010122776031, "learning_rate": 0.0001454693379436591, "loss": 0.9754, "step": 2687 }, { "epoch": 0.5464525310022362, "grad_norm": 0.13601583242416382, "learning_rate": 0.00014544899827112782, "loss": 1.0994, "step": 2688 }, { "epoch": 0.5466558243545436, "grad_norm": 0.13946221768856049, "learning_rate": 0.00014542865859859658, "loss": 1.1127, "step": 2689 }, { "epoch": 0.546859117706851, "grad_norm": 0.1452294886112213, "learning_rate": 0.0001454083189260653, "loss": 1.146, "step": 2690 }, { "epoch": 0.5470624110591583, "grad_norm": 0.14542357623577118, "learning_rate": 0.00014538797925353402, "loss": 1.1916, "step": 2691 }, { "epoch": 0.5472657044114657, "grad_norm": 0.11541703343391418, "learning_rate": 0.00014536763958100275, "loss": 0.9437, "step": 2692 }, { "epoch": 0.5474689977637731, "grad_norm": 0.12495800107717514, "learning_rate": 0.0001453472999084715, "loss": 1.0014, "step": 2693 }, { "epoch": 0.5476722911160805, "grad_norm": 0.13895189762115479, "learning_rate": 0.00014532696023594022, "loss": 1.1139, "step": 2694 }, { "epoch": 0.5478755844683879, "grad_norm": 0.12779201567173004, "learning_rate": 0.00014530662056340892, "loss": 1.0702, "step": 2695 }, { "epoch": 0.5480788778206953, "grad_norm": 0.14240634441375732, "learning_rate": 0.00014528628089087765, "loss": 1.1233, "step": 2696 }, { "epoch": 0.5482821711730026, "grad_norm": 0.12415528297424316, "learning_rate": 0.0001452659412183464, "loss": 1.0954, "step": 2697 }, { "epoch": 0.54848546452531, "grad_norm": 0.13816578686237335, "learning_rate": 0.00014524560154581512, "loss": 1.2772, "step": 2698 }, { "epoch": 0.5486887578776174, "grad_norm": 0.12729184329509735, "learning_rate": 0.00014522526187328385, "loss": 1.0519, "step": 2699 }, { "epoch": 0.5488920512299248, "grad_norm": 0.12732116878032684, "learning_rate": 0.00014520492220075257, "loss": 1.0562, "step": 2700 }, { "epoch": 0.5490953445822322, "grad_norm": 0.12312710285186768, "learning_rate": 0.00014518458252822132, "loss": 1.0082, "step": 2701 }, { "epoch": 0.5492986379345396, "grad_norm": 0.1302732676267624, "learning_rate": 0.00014516424285569005, "loss": 1.1847, "step": 2702 }, { "epoch": 0.5495019312868469, "grad_norm": 0.13683298230171204, "learning_rate": 0.00014514390318315874, "loss": 1.1784, "step": 2703 }, { "epoch": 0.5497052246391543, "grad_norm": 0.1429167538881302, "learning_rate": 0.00014512356351062747, "loss": 1.2792, "step": 2704 }, { "epoch": 0.5499085179914617, "grad_norm": 0.136098712682724, "learning_rate": 0.0001451032238380962, "loss": 1.2149, "step": 2705 }, { "epoch": 0.5501118113437691, "grad_norm": 0.1201593205332756, "learning_rate": 0.00014508288416556495, "loss": 0.9673, "step": 2706 }, { "epoch": 0.5503151046960765, "grad_norm": 0.10153687000274658, "learning_rate": 0.00014506254449303367, "loss": 0.8142, "step": 2707 }, { "epoch": 0.5505183980483839, "grad_norm": 0.11609897762537003, "learning_rate": 0.0001450422048205024, "loss": 1.0168, "step": 2708 }, { "epoch": 0.5507216914006912, "grad_norm": 0.12856177985668182, "learning_rate": 0.00014502186514797112, "loss": 1.1785, "step": 2709 }, { "epoch": 0.5509249847529986, "grad_norm": 0.11091580241918564, "learning_rate": 0.00014500152547543987, "loss": 0.9282, "step": 2710 }, { "epoch": 0.551128278105306, "grad_norm": 0.13458651304244995, "learning_rate": 0.0001449811858029086, "loss": 1.1647, "step": 2711 }, { "epoch": 0.5513315714576134, "grad_norm": 0.12265376001596451, "learning_rate": 0.0001449608461303773, "loss": 1.0149, "step": 2712 }, { "epoch": 0.5515348648099208, "grad_norm": 0.12033109366893768, "learning_rate": 0.00014494050645784602, "loss": 0.9313, "step": 2713 }, { "epoch": 0.551738158162228, "grad_norm": 0.13308046758174896, "learning_rate": 0.00014492016678531477, "loss": 1.044, "step": 2714 }, { "epoch": 0.5519414515145354, "grad_norm": 0.12852205336093903, "learning_rate": 0.0001448998271127835, "loss": 1.0578, "step": 2715 }, { "epoch": 0.5521447448668428, "grad_norm": 0.13972130417823792, "learning_rate": 0.00014487948744025222, "loss": 1.0903, "step": 2716 }, { "epoch": 0.5523480382191502, "grad_norm": 0.14152394235134125, "learning_rate": 0.00014485914776772094, "loss": 1.2561, "step": 2717 }, { "epoch": 0.5525513315714576, "grad_norm": 0.1381314992904663, "learning_rate": 0.0001448388080951897, "loss": 1.1794, "step": 2718 }, { "epoch": 0.552754624923765, "grad_norm": 0.11829142272472382, "learning_rate": 0.00014481846842265842, "loss": 0.9385, "step": 2719 }, { "epoch": 0.5529579182760723, "grad_norm": 0.13279980421066284, "learning_rate": 0.00014479812875012711, "loss": 1.0034, "step": 2720 }, { "epoch": 0.5531612116283797, "grad_norm": 0.1229550689458847, "learning_rate": 0.00014477778907759584, "loss": 0.9929, "step": 2721 }, { "epoch": 0.5533645049806871, "grad_norm": 0.12663327157497406, "learning_rate": 0.0001447574494050646, "loss": 1.0095, "step": 2722 }, { "epoch": 0.5535677983329945, "grad_norm": 0.14191538095474243, "learning_rate": 0.00014473710973253332, "loss": 1.0165, "step": 2723 }, { "epoch": 0.5537710916853019, "grad_norm": 0.12460799515247345, "learning_rate": 0.00014471677006000204, "loss": 0.8327, "step": 2724 }, { "epoch": 0.5539743850376093, "grad_norm": 0.11980767548084259, "learning_rate": 0.00014469643038747076, "loss": 0.9807, "step": 2725 }, { "epoch": 0.5541776783899166, "grad_norm": 0.12429416179656982, "learning_rate": 0.00014467609071493952, "loss": 1.0604, "step": 2726 }, { "epoch": 0.554380971742224, "grad_norm": 0.14179259538650513, "learning_rate": 0.00014465575104240824, "loss": 1.1217, "step": 2727 }, { "epoch": 0.5545842650945314, "grad_norm": 0.12223639339208603, "learning_rate": 0.00014463541136987694, "loss": 0.9083, "step": 2728 }, { "epoch": 0.5547875584468388, "grad_norm": 0.13745662569999695, "learning_rate": 0.00014461507169734566, "loss": 1.0864, "step": 2729 }, { "epoch": 0.5549908517991462, "grad_norm": 0.12111254036426544, "learning_rate": 0.00014459473202481441, "loss": 0.995, "step": 2730 }, { "epoch": 0.5551941451514536, "grad_norm": 0.14073847234249115, "learning_rate": 0.00014457439235228314, "loss": 1.0003, "step": 2731 }, { "epoch": 0.5553974385037609, "grad_norm": 0.13188788294792175, "learning_rate": 0.00014455405267975186, "loss": 1.1628, "step": 2732 }, { "epoch": 0.5556007318560683, "grad_norm": 0.10727431625127792, "learning_rate": 0.0001445337130072206, "loss": 0.9385, "step": 2733 }, { "epoch": 0.5558040252083757, "grad_norm": 0.12671469151973724, "learning_rate": 0.00014451337333468934, "loss": 0.984, "step": 2734 }, { "epoch": 0.5560073185606831, "grad_norm": 0.12647178769111633, "learning_rate": 0.00014449303366215806, "loss": 1.0865, "step": 2735 }, { "epoch": 0.5562106119129905, "grad_norm": 0.1198342889547348, "learning_rate": 0.00014447269398962676, "loss": 1.0589, "step": 2736 }, { "epoch": 0.5564139052652978, "grad_norm": 0.13245652616024017, "learning_rate": 0.00014445235431709548, "loss": 0.9953, "step": 2737 }, { "epoch": 0.5566171986176052, "grad_norm": 0.11206847429275513, "learning_rate": 0.00014443201464456424, "loss": 0.8762, "step": 2738 }, { "epoch": 0.5568204919699126, "grad_norm": 0.16584132611751556, "learning_rate": 0.00014441167497203296, "loss": 1.2808, "step": 2739 }, { "epoch": 0.55702378532222, "grad_norm": 0.1278923898935318, "learning_rate": 0.00014439133529950169, "loss": 1.1515, "step": 2740 }, { "epoch": 0.5572270786745274, "grad_norm": 0.1336185187101364, "learning_rate": 0.0001443709956269704, "loss": 1.0372, "step": 2741 }, { "epoch": 0.5574303720268348, "grad_norm": 0.13731592893600464, "learning_rate": 0.00014435065595443916, "loss": 1.0837, "step": 2742 }, { "epoch": 0.557633665379142, "grad_norm": 0.13053496181964874, "learning_rate": 0.00014433031628190789, "loss": 0.9402, "step": 2743 }, { "epoch": 0.5578369587314495, "grad_norm": 0.14074589312076569, "learning_rate": 0.00014430997660937658, "loss": 1.1168, "step": 2744 }, { "epoch": 0.5580402520837568, "grad_norm": 0.1500421017408371, "learning_rate": 0.0001442896369368453, "loss": 1.0726, "step": 2745 }, { "epoch": 0.5582435454360642, "grad_norm": 0.14489975571632385, "learning_rate": 0.00014426929726431403, "loss": 1.263, "step": 2746 }, { "epoch": 0.5584468387883716, "grad_norm": 0.14446121454238892, "learning_rate": 0.00014424895759178278, "loss": 1.2142, "step": 2747 }, { "epoch": 0.558650132140679, "grad_norm": 0.13410677015781403, "learning_rate": 0.0001442286179192515, "loss": 1.0715, "step": 2748 }, { "epoch": 0.5588534254929863, "grad_norm": 0.1425483077764511, "learning_rate": 0.00014420827824672023, "loss": 0.9858, "step": 2749 }, { "epoch": 0.5590567188452937, "grad_norm": 0.13073715567588806, "learning_rate": 0.00014418793857418896, "loss": 1.0657, "step": 2750 }, { "epoch": 0.5592600121976011, "grad_norm": 0.1257767379283905, "learning_rate": 0.0001441675989016577, "loss": 1.0749, "step": 2751 }, { "epoch": 0.5594633055499085, "grad_norm": 0.1408379077911377, "learning_rate": 0.0001441472592291264, "loss": 1.1656, "step": 2752 }, { "epoch": 0.5596665989022159, "grad_norm": 0.13191954791545868, "learning_rate": 0.00014412691955659513, "loss": 1.0291, "step": 2753 }, { "epoch": 0.5598698922545233, "grad_norm": 0.12902916967868805, "learning_rate": 0.00014410657988406385, "loss": 1.075, "step": 2754 }, { "epoch": 0.5600731856068306, "grad_norm": 0.13078373670578003, "learning_rate": 0.0001440862402115326, "loss": 1.1693, "step": 2755 }, { "epoch": 0.560276478959138, "grad_norm": 0.1379525512456894, "learning_rate": 0.00014406590053900133, "loss": 1.1614, "step": 2756 }, { "epoch": 0.5604797723114454, "grad_norm": 0.12570443749427795, "learning_rate": 0.00014404556086647006, "loss": 1.0245, "step": 2757 }, { "epoch": 0.5606830656637528, "grad_norm": 0.13668902218341827, "learning_rate": 0.00014402522119393878, "loss": 1.1636, "step": 2758 }, { "epoch": 0.5608863590160602, "grad_norm": 0.13914015889167786, "learning_rate": 0.00014400488152140753, "loss": 1.0136, "step": 2759 }, { "epoch": 0.5610896523683676, "grad_norm": 0.13811741769313812, "learning_rate": 0.00014398454184887623, "loss": 1.2955, "step": 2760 }, { "epoch": 0.5612929457206749, "grad_norm": 0.14095258712768555, "learning_rate": 0.00014396420217634495, "loss": 1.1373, "step": 2761 }, { "epoch": 0.5614962390729823, "grad_norm": 0.11365115642547607, "learning_rate": 0.00014394386250381368, "loss": 0.827, "step": 2762 }, { "epoch": 0.5616995324252897, "grad_norm": 0.1321718692779541, "learning_rate": 0.00014392352283128243, "loss": 1.0739, "step": 2763 }, { "epoch": 0.5619028257775971, "grad_norm": 0.13008981943130493, "learning_rate": 0.00014390318315875115, "loss": 1.0253, "step": 2764 }, { "epoch": 0.5621061191299045, "grad_norm": 0.11360891908407211, "learning_rate": 0.00014388284348621988, "loss": 0.8921, "step": 2765 }, { "epoch": 0.5623094124822118, "grad_norm": 0.1246936097741127, "learning_rate": 0.0001438625038136886, "loss": 0.8497, "step": 2766 }, { "epoch": 0.5625127058345192, "grad_norm": 0.14330574870109558, "learning_rate": 0.00014384216414115735, "loss": 1.1843, "step": 2767 }, { "epoch": 0.5627159991868266, "grad_norm": 0.1149834543466568, "learning_rate": 0.00014382182446862608, "loss": 0.8757, "step": 2768 }, { "epoch": 0.562919292539134, "grad_norm": 0.13841336965560913, "learning_rate": 0.00014380148479609478, "loss": 1.0849, "step": 2769 }, { "epoch": 0.5631225858914414, "grad_norm": 0.12189842760562897, "learning_rate": 0.0001437811451235635, "loss": 1.0182, "step": 2770 }, { "epoch": 0.5633258792437488, "grad_norm": 0.14273017644882202, "learning_rate": 0.00014376080545103225, "loss": 1.1301, "step": 2771 }, { "epoch": 0.5635291725960561, "grad_norm": 0.13799621164798737, "learning_rate": 0.00014374046577850098, "loss": 1.0078, "step": 2772 }, { "epoch": 0.5637324659483635, "grad_norm": 0.1299772560596466, "learning_rate": 0.0001437201261059697, "loss": 0.9765, "step": 2773 }, { "epoch": 0.5639357593006709, "grad_norm": 0.13939563930034637, "learning_rate": 0.00014369978643343843, "loss": 1.1519, "step": 2774 }, { "epoch": 0.5641390526529783, "grad_norm": 0.14570674300193787, "learning_rate": 0.00014367944676090718, "loss": 1.0858, "step": 2775 }, { "epoch": 0.5643423460052857, "grad_norm": 0.12805186212062836, "learning_rate": 0.0001436591070883759, "loss": 0.916, "step": 2776 }, { "epoch": 0.564545639357593, "grad_norm": 0.12251273542642593, "learning_rate": 0.0001436387674158446, "loss": 1.0465, "step": 2777 }, { "epoch": 0.5647489327099003, "grad_norm": 0.1256076544523239, "learning_rate": 0.00014361842774331332, "loss": 0.9972, "step": 2778 }, { "epoch": 0.5649522260622077, "grad_norm": 0.12593501806259155, "learning_rate": 0.00014359808807078207, "loss": 0.961, "step": 2779 }, { "epoch": 0.5651555194145151, "grad_norm": 0.1273297369480133, "learning_rate": 0.0001435777483982508, "loss": 0.9951, "step": 2780 }, { "epoch": 0.5653588127668225, "grad_norm": 0.1263994574546814, "learning_rate": 0.00014355740872571952, "loss": 1.0616, "step": 2781 }, { "epoch": 0.5655621061191299, "grad_norm": 0.11736489087343216, "learning_rate": 0.00014353706905318825, "loss": 0.9839, "step": 2782 }, { "epoch": 0.5657653994714373, "grad_norm": 0.12970155477523804, "learning_rate": 0.000143516729380657, "loss": 0.9299, "step": 2783 }, { "epoch": 0.5659686928237446, "grad_norm": 0.13361741602420807, "learning_rate": 0.00014349638970812572, "loss": 1.0209, "step": 2784 }, { "epoch": 0.566171986176052, "grad_norm": 0.13938020169734955, "learning_rate": 0.00014347605003559442, "loss": 1.0303, "step": 2785 }, { "epoch": 0.5663752795283594, "grad_norm": 0.13315965235233307, "learning_rate": 0.00014345571036306315, "loss": 1.1152, "step": 2786 }, { "epoch": 0.5665785728806668, "grad_norm": 0.14047378301620483, "learning_rate": 0.00014343537069053187, "loss": 1.2173, "step": 2787 }, { "epoch": 0.5667818662329742, "grad_norm": 0.1367003172636032, "learning_rate": 0.00014341503101800062, "loss": 1.0284, "step": 2788 }, { "epoch": 0.5669851595852815, "grad_norm": 0.1463545858860016, "learning_rate": 0.00014339469134546935, "loss": 1.0506, "step": 2789 }, { "epoch": 0.5671884529375889, "grad_norm": 0.12741826474666595, "learning_rate": 0.00014337435167293807, "loss": 1.0226, "step": 2790 }, { "epoch": 0.5673917462898963, "grad_norm": 0.1232975423336029, "learning_rate": 0.0001433540120004068, "loss": 0.9259, "step": 2791 }, { "epoch": 0.5675950396422037, "grad_norm": 0.13350965082645416, "learning_rate": 0.00014333367232787555, "loss": 1.0739, "step": 2792 }, { "epoch": 0.5677983329945111, "grad_norm": 0.1262935996055603, "learning_rate": 0.00014331333265534424, "loss": 1.1412, "step": 2793 }, { "epoch": 0.5680016263468185, "grad_norm": 0.1304781287908554, "learning_rate": 0.00014329299298281297, "loss": 1.0605, "step": 2794 }, { "epoch": 0.5682049196991258, "grad_norm": 0.13018850982189178, "learning_rate": 0.0001432726533102817, "loss": 1.048, "step": 2795 }, { "epoch": 0.5684082130514332, "grad_norm": 0.13948385417461395, "learning_rate": 0.00014325231363775044, "loss": 1.2018, "step": 2796 }, { "epoch": 0.5686115064037406, "grad_norm": 0.1164885088801384, "learning_rate": 0.00014323197396521917, "loss": 0.9532, "step": 2797 }, { "epoch": 0.568814799756048, "grad_norm": 0.1407950073480606, "learning_rate": 0.0001432116342926879, "loss": 1.0816, "step": 2798 }, { "epoch": 0.5690180931083554, "grad_norm": 0.12568843364715576, "learning_rate": 0.00014319129462015662, "loss": 0.9222, "step": 2799 }, { "epoch": 0.5692213864606628, "grad_norm": 0.14112015068531036, "learning_rate": 0.00014317095494762537, "loss": 1.057, "step": 2800 }, { "epoch": 0.5694246798129701, "grad_norm": 0.1322345733642578, "learning_rate": 0.00014315061527509407, "loss": 1.0804, "step": 2801 }, { "epoch": 0.5696279731652775, "grad_norm": 0.13166458904743195, "learning_rate": 0.0001431302756025628, "loss": 0.9637, "step": 2802 }, { "epoch": 0.5698312665175849, "grad_norm": 0.13725675642490387, "learning_rate": 0.00014310993593003152, "loss": 0.9894, "step": 2803 }, { "epoch": 0.5700345598698923, "grad_norm": 0.1358625739812851, "learning_rate": 0.00014308959625750027, "loss": 1.1097, "step": 2804 }, { "epoch": 0.5702378532221997, "grad_norm": 0.14208373427391052, "learning_rate": 0.000143069256584969, "loss": 1.1789, "step": 2805 }, { "epoch": 0.5704411465745071, "grad_norm": 0.12727318704128265, "learning_rate": 0.00014304891691243772, "loss": 0.9598, "step": 2806 }, { "epoch": 0.5706444399268144, "grad_norm": 0.12927868962287903, "learning_rate": 0.00014302857723990644, "loss": 1.0194, "step": 2807 }, { "epoch": 0.5708477332791217, "grad_norm": 0.14685644209384918, "learning_rate": 0.0001430082375673752, "loss": 1.1379, "step": 2808 }, { "epoch": 0.5710510266314291, "grad_norm": 0.14648008346557617, "learning_rate": 0.0001429878978948439, "loss": 1.2296, "step": 2809 }, { "epoch": 0.5712543199837365, "grad_norm": 0.12980784475803375, "learning_rate": 0.00014296755822231261, "loss": 1.0982, "step": 2810 }, { "epoch": 0.571457613336044, "grad_norm": 0.11192413419485092, "learning_rate": 0.00014294721854978134, "loss": 0.9545, "step": 2811 }, { "epoch": 0.5716609066883513, "grad_norm": 0.15568038821220398, "learning_rate": 0.0001429268788772501, "loss": 1.1671, "step": 2812 }, { "epoch": 0.5718642000406586, "grad_norm": 0.14970743656158447, "learning_rate": 0.00014290653920471881, "loss": 1.0711, "step": 2813 }, { "epoch": 0.572067493392966, "grad_norm": 0.13441245257854462, "learning_rate": 0.00014288619953218754, "loss": 1.0231, "step": 2814 }, { "epoch": 0.5722707867452734, "grad_norm": 0.12407507002353668, "learning_rate": 0.00014286585985965626, "loss": 1.0276, "step": 2815 }, { "epoch": 0.5724740800975808, "grad_norm": 0.13431482017040253, "learning_rate": 0.00014284552018712502, "loss": 1.1361, "step": 2816 }, { "epoch": 0.5726773734498882, "grad_norm": 0.132259339094162, "learning_rate": 0.0001428251805145937, "loss": 1.2343, "step": 2817 }, { "epoch": 0.5728806668021955, "grad_norm": 0.1342546045780182, "learning_rate": 0.00014280484084206244, "loss": 1.0906, "step": 2818 }, { "epoch": 0.5730839601545029, "grad_norm": 0.12521067261695862, "learning_rate": 0.00014278450116953116, "loss": 1.0881, "step": 2819 }, { "epoch": 0.5732872535068103, "grad_norm": 0.12174705415964127, "learning_rate": 0.0001427641614969999, "loss": 0.8563, "step": 2820 }, { "epoch": 0.5734905468591177, "grad_norm": 0.14310669898986816, "learning_rate": 0.00014274382182446864, "loss": 1.2119, "step": 2821 }, { "epoch": 0.5736938402114251, "grad_norm": 0.11739708483219147, "learning_rate": 0.00014272348215193736, "loss": 0.8849, "step": 2822 }, { "epoch": 0.5738971335637325, "grad_norm": 0.14041262865066528, "learning_rate": 0.0001427031424794061, "loss": 1.3593, "step": 2823 }, { "epoch": 0.5741004269160398, "grad_norm": 0.13473278284072876, "learning_rate": 0.00014268280280687484, "loss": 1.0379, "step": 2824 }, { "epoch": 0.5743037202683472, "grad_norm": 0.12364168465137482, "learning_rate": 0.00014266246313434354, "loss": 1.0167, "step": 2825 }, { "epoch": 0.5745070136206546, "grad_norm": 0.1333821415901184, "learning_rate": 0.00014264212346181226, "loss": 1.0472, "step": 2826 }, { "epoch": 0.574710306972962, "grad_norm": 0.11603229492902756, "learning_rate": 0.00014262178378928098, "loss": 0.8045, "step": 2827 }, { "epoch": 0.5749136003252694, "grad_norm": 0.13383187353610992, "learning_rate": 0.0001426014441167497, "loss": 1.1617, "step": 2828 }, { "epoch": 0.5751168936775768, "grad_norm": 0.1249544620513916, "learning_rate": 0.00014258110444421846, "loss": 1.0211, "step": 2829 }, { "epoch": 0.5753201870298841, "grad_norm": 0.12109317630529404, "learning_rate": 0.00014256076477168719, "loss": 0.9672, "step": 2830 }, { "epoch": 0.5755234803821915, "grad_norm": 0.1185065507888794, "learning_rate": 0.0001425404250991559, "loss": 0.857, "step": 2831 }, { "epoch": 0.5757267737344989, "grad_norm": 0.162327840924263, "learning_rate": 0.00014252008542662463, "loss": 1.2834, "step": 2832 }, { "epoch": 0.5759300670868063, "grad_norm": 0.12928487360477448, "learning_rate": 0.00014249974575409339, "loss": 1.1067, "step": 2833 }, { "epoch": 0.5761333604391137, "grad_norm": 0.12098827958106995, "learning_rate": 0.00014247940608156208, "loss": 0.8984, "step": 2834 }, { "epoch": 0.5763366537914211, "grad_norm": 0.12587502598762512, "learning_rate": 0.0001424590664090308, "loss": 1.0488, "step": 2835 }, { "epoch": 0.5765399471437284, "grad_norm": 0.12398620694875717, "learning_rate": 0.00014243872673649953, "loss": 0.9838, "step": 2836 }, { "epoch": 0.5767432404960358, "grad_norm": 0.12822575867176056, "learning_rate": 0.00014241838706396828, "loss": 1.0103, "step": 2837 }, { "epoch": 0.5769465338483432, "grad_norm": 0.13499167561531067, "learning_rate": 0.000142398047391437, "loss": 1.1553, "step": 2838 }, { "epoch": 0.5771498272006506, "grad_norm": 0.12537875771522522, "learning_rate": 0.00014237770771890573, "loss": 0.9383, "step": 2839 }, { "epoch": 0.577353120552958, "grad_norm": 0.13840174674987793, "learning_rate": 0.00014235736804637446, "loss": 1.0803, "step": 2840 }, { "epoch": 0.5775564139052652, "grad_norm": 0.11736918240785599, "learning_rate": 0.0001423370283738432, "loss": 0.908, "step": 2841 }, { "epoch": 0.5777597072575726, "grad_norm": 0.12442715466022491, "learning_rate": 0.0001423166887013119, "loss": 0.9844, "step": 2842 }, { "epoch": 0.57796300060988, "grad_norm": 0.13206282258033752, "learning_rate": 0.00014229634902878063, "loss": 1.063, "step": 2843 }, { "epoch": 0.5781662939621874, "grad_norm": 0.1393408477306366, "learning_rate": 0.00014227600935624935, "loss": 1.0633, "step": 2844 }, { "epoch": 0.5783695873144948, "grad_norm": 0.140583336353302, "learning_rate": 0.0001422556696837181, "loss": 1.1749, "step": 2845 }, { "epoch": 0.5785728806668022, "grad_norm": 0.1310548186302185, "learning_rate": 0.00014223533001118683, "loss": 1.1205, "step": 2846 }, { "epoch": 0.5787761740191095, "grad_norm": 0.1283491551876068, "learning_rate": 0.00014221499033865556, "loss": 1.0956, "step": 2847 }, { "epoch": 0.5789794673714169, "grad_norm": 0.12449255585670471, "learning_rate": 0.00014219465066612428, "loss": 1.0153, "step": 2848 }, { "epoch": 0.5791827607237243, "grad_norm": 0.13952034711837769, "learning_rate": 0.00014217431099359303, "loss": 1.0639, "step": 2849 }, { "epoch": 0.5793860540760317, "grad_norm": 0.1438504010438919, "learning_rate": 0.00014215397132106173, "loss": 1.1237, "step": 2850 }, { "epoch": 0.5795893474283391, "grad_norm": 0.13687646389007568, "learning_rate": 0.00014213363164853045, "loss": 1.0719, "step": 2851 }, { "epoch": 0.5797926407806465, "grad_norm": 0.14046727120876312, "learning_rate": 0.00014211329197599918, "loss": 1.2391, "step": 2852 }, { "epoch": 0.5799959341329538, "grad_norm": 0.1313040554523468, "learning_rate": 0.00014209295230346793, "loss": 1.0689, "step": 2853 }, { "epoch": 0.5801992274852612, "grad_norm": 0.1264270395040512, "learning_rate": 0.00014207261263093665, "loss": 0.9856, "step": 2854 }, { "epoch": 0.5804025208375686, "grad_norm": 0.12176066637039185, "learning_rate": 0.00014205227295840538, "loss": 1.0131, "step": 2855 }, { "epoch": 0.580605814189876, "grad_norm": 0.13929857313632965, "learning_rate": 0.0001420319332858741, "loss": 1.134, "step": 2856 }, { "epoch": 0.5808091075421834, "grad_norm": 0.12523682415485382, "learning_rate": 0.00014201159361334285, "loss": 1.0868, "step": 2857 }, { "epoch": 0.5810124008944908, "grad_norm": 0.13270434737205505, "learning_rate": 0.00014199125394081155, "loss": 1.183, "step": 2858 }, { "epoch": 0.5812156942467981, "grad_norm": 0.1330588310956955, "learning_rate": 0.00014197091426828028, "loss": 1.2487, "step": 2859 }, { "epoch": 0.5814189875991055, "grad_norm": 0.130279541015625, "learning_rate": 0.000141950574595749, "loss": 1.0885, "step": 2860 }, { "epoch": 0.5816222809514129, "grad_norm": 0.1529773771762848, "learning_rate": 0.00014193023492321775, "loss": 1.098, "step": 2861 }, { "epoch": 0.5818255743037203, "grad_norm": 0.14715005457401276, "learning_rate": 0.00014190989525068648, "loss": 1.086, "step": 2862 }, { "epoch": 0.5820288676560277, "grad_norm": 0.12468834221363068, "learning_rate": 0.0001418895555781552, "loss": 1.0349, "step": 2863 }, { "epoch": 0.5822321610083351, "grad_norm": 0.1332579404115677, "learning_rate": 0.00014186921590562393, "loss": 1.0514, "step": 2864 }, { "epoch": 0.5824354543606424, "grad_norm": 0.13424143195152283, "learning_rate": 0.00014184887623309268, "loss": 1.0859, "step": 2865 }, { "epoch": 0.5826387477129498, "grad_norm": 0.11994919180870056, "learning_rate": 0.00014182853656056137, "loss": 0.9416, "step": 2866 }, { "epoch": 0.5828420410652572, "grad_norm": 0.13324035704135895, "learning_rate": 0.0001418081968880301, "loss": 1.112, "step": 2867 }, { "epoch": 0.5830453344175646, "grad_norm": 0.14520680904388428, "learning_rate": 0.00014178785721549882, "loss": 1.0231, "step": 2868 }, { "epoch": 0.583248627769872, "grad_norm": 0.14066869020462036, "learning_rate": 0.00014176751754296755, "loss": 1.281, "step": 2869 }, { "epoch": 0.5834519211221793, "grad_norm": 0.1384185403585434, "learning_rate": 0.0001417471778704363, "loss": 1.0393, "step": 2870 }, { "epoch": 0.5836552144744866, "grad_norm": 0.1287851184606552, "learning_rate": 0.00014172683819790502, "loss": 1.0967, "step": 2871 }, { "epoch": 0.583858507826794, "grad_norm": 0.11896179616451263, "learning_rate": 0.00014170649852537375, "loss": 0.844, "step": 2872 }, { "epoch": 0.5840618011791014, "grad_norm": 0.1319238543510437, "learning_rate": 0.00014168615885284247, "loss": 1.0279, "step": 2873 }, { "epoch": 0.5842650945314088, "grad_norm": 0.1428615152835846, "learning_rate": 0.0001416658191803112, "loss": 1.0823, "step": 2874 }, { "epoch": 0.5844683878837162, "grad_norm": 0.11939448863267899, "learning_rate": 0.00014164547950777992, "loss": 1.0417, "step": 2875 }, { "epoch": 0.5846716812360235, "grad_norm": 0.13555167615413666, "learning_rate": 0.00014162513983524865, "loss": 1.0476, "step": 2876 }, { "epoch": 0.5848749745883309, "grad_norm": 0.12872137129306793, "learning_rate": 0.00014160480016271737, "loss": 0.9306, "step": 2877 }, { "epoch": 0.5850782679406383, "grad_norm": 0.12111514061689377, "learning_rate": 0.00014158446049018612, "loss": 0.9841, "step": 2878 }, { "epoch": 0.5852815612929457, "grad_norm": 0.12589818239212036, "learning_rate": 0.00014156412081765485, "loss": 1.0237, "step": 2879 }, { "epoch": 0.5854848546452531, "grad_norm": 0.12264888733625412, "learning_rate": 0.00014154378114512357, "loss": 0.9417, "step": 2880 }, { "epoch": 0.5856881479975605, "grad_norm": 0.14193598926067352, "learning_rate": 0.0001415234414725923, "loss": 1.2845, "step": 2881 }, { "epoch": 0.5858914413498678, "grad_norm": 0.14116251468658447, "learning_rate": 0.00014150310180006102, "loss": 1.001, "step": 2882 }, { "epoch": 0.5860947347021752, "grad_norm": 0.14120200276374817, "learning_rate": 0.00014148276212752974, "loss": 1.2458, "step": 2883 }, { "epoch": 0.5862980280544826, "grad_norm": 0.13560935854911804, "learning_rate": 0.00014146242245499847, "loss": 1.1217, "step": 2884 }, { "epoch": 0.58650132140679, "grad_norm": 0.14672443270683289, "learning_rate": 0.0001414420827824672, "loss": 1.188, "step": 2885 }, { "epoch": 0.5867046147590974, "grad_norm": 0.12481992691755295, "learning_rate": 0.00014142174310993594, "loss": 1.0063, "step": 2886 }, { "epoch": 0.5869079081114048, "grad_norm": 0.13482870161533356, "learning_rate": 0.00014140140343740467, "loss": 1.0498, "step": 2887 }, { "epoch": 0.5871112014637121, "grad_norm": 0.16956381499767303, "learning_rate": 0.0001413810637648734, "loss": 1.1202, "step": 2888 }, { "epoch": 0.5873144948160195, "grad_norm": 0.1285228431224823, "learning_rate": 0.00014136072409234212, "loss": 0.9175, "step": 2889 }, { "epoch": 0.5875177881683269, "grad_norm": 0.12045499682426453, "learning_rate": 0.00014134038441981087, "loss": 1.0062, "step": 2890 }, { "epoch": 0.5877210815206343, "grad_norm": 0.11959182471036911, "learning_rate": 0.00014132004474727957, "loss": 0.9041, "step": 2891 }, { "epoch": 0.5879243748729417, "grad_norm": 0.12867799401283264, "learning_rate": 0.0001412997050747483, "loss": 0.9985, "step": 2892 }, { "epoch": 0.5881276682252491, "grad_norm": 0.12349910289049149, "learning_rate": 0.00014127936540221702, "loss": 0.9357, "step": 2893 }, { "epoch": 0.5883309615775564, "grad_norm": 0.12842735648155212, "learning_rate": 0.00014125902572968577, "loss": 1.0862, "step": 2894 }, { "epoch": 0.5885342549298638, "grad_norm": 0.1375754177570343, "learning_rate": 0.0001412386860571545, "loss": 1.1911, "step": 2895 }, { "epoch": 0.5887375482821712, "grad_norm": 0.13770340383052826, "learning_rate": 0.00014121834638462322, "loss": 1.0785, "step": 2896 }, { "epoch": 0.5889408416344786, "grad_norm": 0.13841983675956726, "learning_rate": 0.00014119800671209194, "loss": 1.044, "step": 2897 }, { "epoch": 0.589144134986786, "grad_norm": 0.12044288218021393, "learning_rate": 0.0001411776670395607, "loss": 1.0482, "step": 2898 }, { "epoch": 0.5893474283390933, "grad_norm": 0.11521141231060028, "learning_rate": 0.0001411573273670294, "loss": 0.8782, "step": 2899 }, { "epoch": 0.5895507216914007, "grad_norm": 0.13133427500724792, "learning_rate": 0.00014113698769449811, "loss": 1.0602, "step": 2900 }, { "epoch": 0.5897540150437081, "grad_norm": 0.12748554348945618, "learning_rate": 0.00014111664802196684, "loss": 1.0442, "step": 2901 }, { "epoch": 0.5899573083960155, "grad_norm": 0.13325203955173492, "learning_rate": 0.0001410963083494356, "loss": 1.0469, "step": 2902 }, { "epoch": 0.5901606017483229, "grad_norm": 0.12706689536571503, "learning_rate": 0.00014107596867690431, "loss": 1.0692, "step": 2903 }, { "epoch": 0.5903638951006303, "grad_norm": 0.12228814512491226, "learning_rate": 0.00014105562900437304, "loss": 0.8914, "step": 2904 }, { "epoch": 0.5905671884529375, "grad_norm": 0.1334328055381775, "learning_rate": 0.00014103528933184176, "loss": 1.028, "step": 2905 }, { "epoch": 0.5907704818052449, "grad_norm": 0.11916909366846085, "learning_rate": 0.00014101494965931052, "loss": 0.9827, "step": 2906 }, { "epoch": 0.5909737751575523, "grad_norm": 0.12943509221076965, "learning_rate": 0.0001409946099867792, "loss": 1.1302, "step": 2907 }, { "epoch": 0.5911770685098597, "grad_norm": 0.1251513957977295, "learning_rate": 0.00014097427031424794, "loss": 0.9508, "step": 2908 }, { "epoch": 0.5913803618621671, "grad_norm": 0.13130627572536469, "learning_rate": 0.00014095393064171666, "loss": 1.0461, "step": 2909 }, { "epoch": 0.5915836552144745, "grad_norm": 0.13331666588783264, "learning_rate": 0.0001409335909691854, "loss": 1.1964, "step": 2910 }, { "epoch": 0.5917869485667818, "grad_norm": 0.12930695712566376, "learning_rate": 0.00014091325129665414, "loss": 1.0012, "step": 2911 }, { "epoch": 0.5919902419190892, "grad_norm": 0.1442381590604782, "learning_rate": 0.00014089291162412286, "loss": 1.2208, "step": 2912 }, { "epoch": 0.5921935352713966, "grad_norm": 0.12667718529701233, "learning_rate": 0.00014087257195159159, "loss": 0.9417, "step": 2913 }, { "epoch": 0.592396828623704, "grad_norm": 0.12730923295021057, "learning_rate": 0.0001408522322790603, "loss": 1.0481, "step": 2914 }, { "epoch": 0.5926001219760114, "grad_norm": 0.11554036289453506, "learning_rate": 0.00014083189260652904, "loss": 0.8343, "step": 2915 }, { "epoch": 0.5928034153283188, "grad_norm": 0.13052915036678314, "learning_rate": 0.00014081155293399776, "loss": 1.0713, "step": 2916 }, { "epoch": 0.5930067086806261, "grad_norm": 0.12292870879173279, "learning_rate": 0.00014079121326146648, "loss": 1.0347, "step": 2917 }, { "epoch": 0.5932100020329335, "grad_norm": 0.13543544709682465, "learning_rate": 0.0001407708735889352, "loss": 1.1291, "step": 2918 }, { "epoch": 0.5934132953852409, "grad_norm": 0.13335563242435455, "learning_rate": 0.00014075053391640396, "loss": 1.0373, "step": 2919 }, { "epoch": 0.5936165887375483, "grad_norm": 0.12110266089439392, "learning_rate": 0.00014073019424387268, "loss": 0.9317, "step": 2920 }, { "epoch": 0.5938198820898557, "grad_norm": 0.11466968059539795, "learning_rate": 0.0001407098545713414, "loss": 0.922, "step": 2921 }, { "epoch": 0.594023175442163, "grad_norm": 0.13369932770729065, "learning_rate": 0.00014068951489881013, "loss": 1.0871, "step": 2922 }, { "epoch": 0.5942264687944704, "grad_norm": 0.12968046963214874, "learning_rate": 0.00014066917522627886, "loss": 0.971, "step": 2923 }, { "epoch": 0.5944297621467778, "grad_norm": 0.12824739515781403, "learning_rate": 0.00014064883555374758, "loss": 1.1196, "step": 2924 }, { "epoch": 0.5946330554990852, "grad_norm": 0.12155873328447342, "learning_rate": 0.0001406284958812163, "loss": 0.886, "step": 2925 }, { "epoch": 0.5948363488513926, "grad_norm": 0.12435124814510345, "learning_rate": 0.00014060815620868503, "loss": 1.0395, "step": 2926 }, { "epoch": 0.5950396422037, "grad_norm": 0.1359453648328781, "learning_rate": 0.00014058781653615378, "loss": 1.1477, "step": 2927 }, { "epoch": 0.5952429355560073, "grad_norm": 0.10797560214996338, "learning_rate": 0.0001405674768636225, "loss": 0.8632, "step": 2928 }, { "epoch": 0.5954462289083147, "grad_norm": 0.12806884944438934, "learning_rate": 0.00014054713719109123, "loss": 0.9678, "step": 2929 }, { "epoch": 0.5956495222606221, "grad_norm": 0.13405455648899078, "learning_rate": 0.00014052679751855996, "loss": 1.1721, "step": 2930 }, { "epoch": 0.5958528156129295, "grad_norm": 0.11106649786233902, "learning_rate": 0.00014050645784602868, "loss": 0.8613, "step": 2931 }, { "epoch": 0.5960561089652369, "grad_norm": 0.1545085906982422, "learning_rate": 0.0001404861181734974, "loss": 1.2441, "step": 2932 }, { "epoch": 0.5962594023175443, "grad_norm": 0.1290442794561386, "learning_rate": 0.00014046577850096613, "loss": 1.0544, "step": 2933 }, { "epoch": 0.5964626956698516, "grad_norm": 0.11824672669172287, "learning_rate": 0.00014044543882843485, "loss": 0.953, "step": 2934 }, { "epoch": 0.596665989022159, "grad_norm": 0.13066919147968292, "learning_rate": 0.0001404250991559036, "loss": 1.1097, "step": 2935 }, { "epoch": 0.5968692823744663, "grad_norm": 0.13196654617786407, "learning_rate": 0.00014040475948337233, "loss": 0.9026, "step": 2936 }, { "epoch": 0.5970725757267737, "grad_norm": 0.1255139410495758, "learning_rate": 0.00014038441981084105, "loss": 0.9072, "step": 2937 }, { "epoch": 0.5972758690790811, "grad_norm": 0.12293802946805954, "learning_rate": 0.00014036408013830978, "loss": 0.9046, "step": 2938 }, { "epoch": 0.5974791624313885, "grad_norm": 0.15161147713661194, "learning_rate": 0.0001403437404657785, "loss": 1.1338, "step": 2939 }, { "epoch": 0.5976824557836958, "grad_norm": 0.1161181703209877, "learning_rate": 0.00014032340079324723, "loss": 0.8716, "step": 2940 }, { "epoch": 0.5978857491360032, "grad_norm": 0.14142772555351257, "learning_rate": 0.00014030306112071595, "loss": 1.0611, "step": 2941 }, { "epoch": 0.5980890424883106, "grad_norm": 0.13781876862049103, "learning_rate": 0.00014028272144818468, "loss": 1.149, "step": 2942 }, { "epoch": 0.598292335840618, "grad_norm": 0.12525886297225952, "learning_rate": 0.00014026238177565343, "loss": 0.9765, "step": 2943 }, { "epoch": 0.5984956291929254, "grad_norm": 0.11980410665273666, "learning_rate": 0.00014024204210312215, "loss": 0.979, "step": 2944 }, { "epoch": 0.5986989225452328, "grad_norm": 0.14083100855350494, "learning_rate": 0.00014022170243059088, "loss": 1.1407, "step": 2945 }, { "epoch": 0.5989022158975401, "grad_norm": 0.12020063400268555, "learning_rate": 0.0001402013627580596, "loss": 0.9946, "step": 2946 }, { "epoch": 0.5991055092498475, "grad_norm": 0.13902409374713898, "learning_rate": 0.00014018102308552835, "loss": 1.0152, "step": 2947 }, { "epoch": 0.5993088026021549, "grad_norm": 0.12778332829475403, "learning_rate": 0.00014016068341299705, "loss": 1.0196, "step": 2948 }, { "epoch": 0.5995120959544623, "grad_norm": 0.12210957705974579, "learning_rate": 0.00014014034374046578, "loss": 1.136, "step": 2949 }, { "epoch": 0.5997153893067697, "grad_norm": 0.1324332356452942, "learning_rate": 0.0001401200040679345, "loss": 1.055, "step": 2950 }, { "epoch": 0.599918682659077, "grad_norm": 0.14248095452785492, "learning_rate": 0.00014009966439540325, "loss": 1.2809, "step": 2951 }, { "epoch": 0.6001219760113844, "grad_norm": 0.12518227100372314, "learning_rate": 0.00014007932472287198, "loss": 1.0029, "step": 2952 }, { "epoch": 0.6003252693636918, "grad_norm": 0.14796386659145355, "learning_rate": 0.0001400589850503407, "loss": 1.2305, "step": 2953 }, { "epoch": 0.6005285627159992, "grad_norm": 0.1290920078754425, "learning_rate": 0.00014003864537780942, "loss": 0.9874, "step": 2954 }, { "epoch": 0.6007318560683066, "grad_norm": 0.12988100945949554, "learning_rate": 0.00014001830570527815, "loss": 0.9438, "step": 2955 }, { "epoch": 0.600935149420614, "grad_norm": 0.12497319281101227, "learning_rate": 0.00013999796603274687, "loss": 0.8797, "step": 2956 }, { "epoch": 0.6011384427729213, "grad_norm": 0.1346983015537262, "learning_rate": 0.0001399776263602156, "loss": 1.0419, "step": 2957 }, { "epoch": 0.6013417361252287, "grad_norm": 0.11253220587968826, "learning_rate": 0.00013995728668768432, "loss": 0.9803, "step": 2958 }, { "epoch": 0.6015450294775361, "grad_norm": 0.1398647278547287, "learning_rate": 0.00013993694701515305, "loss": 1.1748, "step": 2959 }, { "epoch": 0.6017483228298435, "grad_norm": 0.14113448560237885, "learning_rate": 0.0001399166073426218, "loss": 1.1243, "step": 2960 }, { "epoch": 0.6019516161821509, "grad_norm": 0.1343860626220703, "learning_rate": 0.00013989626767009052, "loss": 1.1181, "step": 2961 }, { "epoch": 0.6021549095344583, "grad_norm": 0.13300351798534393, "learning_rate": 0.00013987592799755925, "loss": 1.108, "step": 2962 }, { "epoch": 0.6023582028867656, "grad_norm": 0.1379079967737198, "learning_rate": 0.00013985558832502797, "loss": 1.1145, "step": 2963 }, { "epoch": 0.602561496239073, "grad_norm": 0.13258612155914307, "learning_rate": 0.0001398352486524967, "loss": 1.0888, "step": 2964 }, { "epoch": 0.6027647895913804, "grad_norm": 0.1152709499001503, "learning_rate": 0.00013981490897996542, "loss": 0.8821, "step": 2965 }, { "epoch": 0.6029680829436878, "grad_norm": 0.14803390204906464, "learning_rate": 0.00013979456930743415, "loss": 1.1625, "step": 2966 }, { "epoch": 0.6031713762959952, "grad_norm": 0.12902309000492096, "learning_rate": 0.00013977422963490287, "loss": 1.0304, "step": 2967 }, { "epoch": 0.6033746696483026, "grad_norm": 0.1235414445400238, "learning_rate": 0.00013975388996237162, "loss": 1.0143, "step": 2968 }, { "epoch": 0.6035779630006098, "grad_norm": 0.1427546590566635, "learning_rate": 0.00013973355028984035, "loss": 1.2047, "step": 2969 }, { "epoch": 0.6037812563529172, "grad_norm": 0.1456848382949829, "learning_rate": 0.00013971321061730907, "loss": 1.0673, "step": 2970 }, { "epoch": 0.6039845497052246, "grad_norm": 0.13823378086090088, "learning_rate": 0.0001396928709447778, "loss": 0.9845, "step": 2971 }, { "epoch": 0.604187843057532, "grad_norm": 0.15567836165428162, "learning_rate": 0.00013967253127224652, "loss": 1.1844, "step": 2972 }, { "epoch": 0.6043911364098394, "grad_norm": 0.12227654457092285, "learning_rate": 0.00013965219159971524, "loss": 1.0443, "step": 2973 }, { "epoch": 0.6045944297621467, "grad_norm": 0.14952129125595093, "learning_rate": 0.00013963185192718397, "loss": 1.1285, "step": 2974 }, { "epoch": 0.6047977231144541, "grad_norm": 0.1279451698064804, "learning_rate": 0.0001396115122546527, "loss": 0.9304, "step": 2975 }, { "epoch": 0.6050010164667615, "grad_norm": 0.13317649066448212, "learning_rate": 0.00013959117258212144, "loss": 1.0864, "step": 2976 }, { "epoch": 0.6052043098190689, "grad_norm": 0.13362491130828857, "learning_rate": 0.00013957083290959017, "loss": 1.0547, "step": 2977 }, { "epoch": 0.6054076031713763, "grad_norm": 0.13469024002552032, "learning_rate": 0.0001395504932370589, "loss": 1.0941, "step": 2978 }, { "epoch": 0.6056108965236837, "grad_norm": 0.1265508234500885, "learning_rate": 0.00013953015356452762, "loss": 0.9379, "step": 2979 }, { "epoch": 0.605814189875991, "grad_norm": 0.1344381868839264, "learning_rate": 0.00013950981389199634, "loss": 1.1047, "step": 2980 }, { "epoch": 0.6060174832282984, "grad_norm": 0.13309423625469208, "learning_rate": 0.00013948947421946507, "loss": 1.0916, "step": 2981 }, { "epoch": 0.6062207765806058, "grad_norm": 0.1394202560186386, "learning_rate": 0.0001394691345469338, "loss": 1.0521, "step": 2982 }, { "epoch": 0.6064240699329132, "grad_norm": 0.12950794398784637, "learning_rate": 0.00013944879487440252, "loss": 1.0414, "step": 2983 }, { "epoch": 0.6066273632852206, "grad_norm": 0.15191194415092468, "learning_rate": 0.00013942845520187127, "loss": 1.1255, "step": 2984 }, { "epoch": 0.606830656637528, "grad_norm": 0.142736554145813, "learning_rate": 0.00013940811552934, "loss": 1.1004, "step": 2985 }, { "epoch": 0.6070339499898353, "grad_norm": 0.13812166452407837, "learning_rate": 0.00013938777585680872, "loss": 1.0688, "step": 2986 }, { "epoch": 0.6072372433421427, "grad_norm": 0.1332339346408844, "learning_rate": 0.00013936743618427744, "loss": 1.0214, "step": 2987 }, { "epoch": 0.6074405366944501, "grad_norm": 0.11382775753736496, "learning_rate": 0.00013934709651174616, "loss": 0.9555, "step": 2988 }, { "epoch": 0.6076438300467575, "grad_norm": 0.1476142406463623, "learning_rate": 0.0001393267568392149, "loss": 1.1355, "step": 2989 }, { "epoch": 0.6078471233990649, "grad_norm": 0.15201976895332336, "learning_rate": 0.00013930641716668361, "loss": 1.2253, "step": 2990 }, { "epoch": 0.6080504167513723, "grad_norm": 0.13920465111732483, "learning_rate": 0.00013928607749415234, "loss": 1.1671, "step": 2991 }, { "epoch": 0.6082537101036796, "grad_norm": 0.11285021156072617, "learning_rate": 0.0001392657378216211, "loss": 0.8908, "step": 2992 }, { "epoch": 0.608457003455987, "grad_norm": 0.14005322754383087, "learning_rate": 0.00013924539814908981, "loss": 1.0991, "step": 2993 }, { "epoch": 0.6086602968082944, "grad_norm": 0.14553718268871307, "learning_rate": 0.00013922505847655854, "loss": 1.1135, "step": 2994 }, { "epoch": 0.6088635901606018, "grad_norm": 0.1322544664144516, "learning_rate": 0.00013920471880402726, "loss": 1.0209, "step": 2995 }, { "epoch": 0.6090668835129092, "grad_norm": 0.12357106804847717, "learning_rate": 0.000139184379131496, "loss": 0.9475, "step": 2996 }, { "epoch": 0.6092701768652166, "grad_norm": 0.1173151507973671, "learning_rate": 0.0001391640394589647, "loss": 0.9806, "step": 2997 }, { "epoch": 0.6094734702175238, "grad_norm": 0.12091773003339767, "learning_rate": 0.00013914369978643344, "loss": 0.9308, "step": 2998 }, { "epoch": 0.6096767635698312, "grad_norm": 0.12371361255645752, "learning_rate": 0.00013912336011390216, "loss": 1.001, "step": 2999 }, { "epoch": 0.6098800569221386, "grad_norm": 0.11926256865262985, "learning_rate": 0.00013910302044137089, "loss": 0.9655, "step": 3000 }, { "epoch": 0.610083350274446, "grad_norm": 0.12953068315982819, "learning_rate": 0.00013908268076883964, "loss": 1.0736, "step": 3001 }, { "epoch": 0.6102866436267534, "grad_norm": 0.12367159873247147, "learning_rate": 0.00013906234109630836, "loss": 1.0428, "step": 3002 }, { "epoch": 0.6104899369790607, "grad_norm": 0.12180911749601364, "learning_rate": 0.00013904200142377709, "loss": 0.9661, "step": 3003 }, { "epoch": 0.6106932303313681, "grad_norm": 0.13220947980880737, "learning_rate": 0.0001390216617512458, "loss": 0.8966, "step": 3004 }, { "epoch": 0.6108965236836755, "grad_norm": 0.15283820033073425, "learning_rate": 0.00013900132207871453, "loss": 1.2692, "step": 3005 }, { "epoch": 0.6110998170359829, "grad_norm": 0.1325535923242569, "learning_rate": 0.00013898098240618326, "loss": 0.9849, "step": 3006 }, { "epoch": 0.6113031103882903, "grad_norm": 0.14417356252670288, "learning_rate": 0.00013896064273365198, "loss": 1.2099, "step": 3007 }, { "epoch": 0.6115064037405977, "grad_norm": 0.1250670701265335, "learning_rate": 0.0001389403030611207, "loss": 0.9963, "step": 3008 }, { "epoch": 0.611709697092905, "grad_norm": 0.1299847513437271, "learning_rate": 0.00013891996338858946, "loss": 1.0131, "step": 3009 }, { "epoch": 0.6119129904452124, "grad_norm": 0.13631494343280792, "learning_rate": 0.00013889962371605818, "loss": 1.106, "step": 3010 }, { "epoch": 0.6121162837975198, "grad_norm": 0.12008505314588547, "learning_rate": 0.0001388792840435269, "loss": 0.9617, "step": 3011 }, { "epoch": 0.6123195771498272, "grad_norm": 0.13346195220947266, "learning_rate": 0.00013885894437099563, "loss": 1.1796, "step": 3012 }, { "epoch": 0.6125228705021346, "grad_norm": 0.12372852861881256, "learning_rate": 0.00013883860469846436, "loss": 0.8494, "step": 3013 }, { "epoch": 0.612726163854442, "grad_norm": 0.14432121813297272, "learning_rate": 0.00013881826502593308, "loss": 1.1595, "step": 3014 }, { "epoch": 0.6129294572067493, "grad_norm": 0.13419228792190552, "learning_rate": 0.0001387979253534018, "loss": 1.0886, "step": 3015 }, { "epoch": 0.6131327505590567, "grad_norm": 0.1301155984401703, "learning_rate": 0.00013877758568087053, "loss": 0.9456, "step": 3016 }, { "epoch": 0.6133360439113641, "grad_norm": 0.11167372018098831, "learning_rate": 0.00013875724600833928, "loss": 0.9077, "step": 3017 }, { "epoch": 0.6135393372636715, "grad_norm": 0.11222781985998154, "learning_rate": 0.000138736906335808, "loss": 0.8517, "step": 3018 }, { "epoch": 0.6137426306159789, "grad_norm": 0.1456783413887024, "learning_rate": 0.00013871656666327673, "loss": 1.1327, "step": 3019 }, { "epoch": 0.6139459239682863, "grad_norm": 0.12238568812608719, "learning_rate": 0.00013869622699074546, "loss": 1.0502, "step": 3020 }, { "epoch": 0.6141492173205936, "grad_norm": 0.1362997442483902, "learning_rate": 0.00013867588731821418, "loss": 0.9795, "step": 3021 }, { "epoch": 0.614352510672901, "grad_norm": 0.12421485036611557, "learning_rate": 0.0001386555476456829, "loss": 0.9604, "step": 3022 }, { "epoch": 0.6145558040252084, "grad_norm": 0.11413677036762238, "learning_rate": 0.00013863520797315163, "loss": 1.0068, "step": 3023 }, { "epoch": 0.6147590973775158, "grad_norm": 0.1455029845237732, "learning_rate": 0.00013861486830062035, "loss": 1.2662, "step": 3024 }, { "epoch": 0.6149623907298232, "grad_norm": 0.12818849086761475, "learning_rate": 0.0001385945286280891, "loss": 0.9195, "step": 3025 }, { "epoch": 0.6151656840821305, "grad_norm": 0.1426313817501068, "learning_rate": 0.00013857418895555783, "loss": 1.2388, "step": 3026 }, { "epoch": 0.6153689774344379, "grad_norm": 0.14491280913352966, "learning_rate": 0.00013855384928302655, "loss": 1.1537, "step": 3027 }, { "epoch": 0.6155722707867453, "grad_norm": 0.11689125746488571, "learning_rate": 0.00013853350961049528, "loss": 0.9963, "step": 3028 }, { "epoch": 0.6157755641390527, "grad_norm": 0.1245650127530098, "learning_rate": 0.000138513169937964, "loss": 0.9193, "step": 3029 }, { "epoch": 0.61597885749136, "grad_norm": 0.14654415845870972, "learning_rate": 0.00013849283026543273, "loss": 1.271, "step": 3030 }, { "epoch": 0.6161821508436675, "grad_norm": 0.13708455860614777, "learning_rate": 0.00013847249059290145, "loss": 1.1169, "step": 3031 }, { "epoch": 0.6163854441959747, "grad_norm": 0.13598188757896423, "learning_rate": 0.00013845215092037018, "loss": 1.1565, "step": 3032 }, { "epoch": 0.6165887375482821, "grad_norm": 0.13055184483528137, "learning_rate": 0.00013843181124783893, "loss": 0.9636, "step": 3033 }, { "epoch": 0.6167920309005895, "grad_norm": 0.12075616419315338, "learning_rate": 0.00013841147157530765, "loss": 1.0404, "step": 3034 }, { "epoch": 0.6169953242528969, "grad_norm": 0.12068097293376923, "learning_rate": 0.00013839113190277638, "loss": 0.9913, "step": 3035 }, { "epoch": 0.6171986176052043, "grad_norm": 0.13300339877605438, "learning_rate": 0.0001383707922302451, "loss": 1.1589, "step": 3036 }, { "epoch": 0.6174019109575117, "grad_norm": 0.13343989849090576, "learning_rate": 0.00013835045255771383, "loss": 1.1602, "step": 3037 }, { "epoch": 0.617605204309819, "grad_norm": 0.13028277456760406, "learning_rate": 0.00013833011288518255, "loss": 1.0985, "step": 3038 }, { "epoch": 0.6178084976621264, "grad_norm": 0.1125851422548294, "learning_rate": 0.00013830977321265128, "loss": 1.0002, "step": 3039 }, { "epoch": 0.6180117910144338, "grad_norm": 0.12342289090156555, "learning_rate": 0.00013828943354012, "loss": 1.0013, "step": 3040 }, { "epoch": 0.6182150843667412, "grad_norm": 0.12776073813438416, "learning_rate": 0.00013826909386758872, "loss": 0.9747, "step": 3041 }, { "epoch": 0.6184183777190486, "grad_norm": 0.12842942774295807, "learning_rate": 0.00013824875419505748, "loss": 0.9877, "step": 3042 }, { "epoch": 0.618621671071356, "grad_norm": 0.13102072477340698, "learning_rate": 0.0001382284145225262, "loss": 0.9636, "step": 3043 }, { "epoch": 0.6188249644236633, "grad_norm": 0.12905801832675934, "learning_rate": 0.00013820807484999492, "loss": 1.1595, "step": 3044 }, { "epoch": 0.6190282577759707, "grad_norm": 0.1274825781583786, "learning_rate": 0.00013818773517746365, "loss": 0.9346, "step": 3045 }, { "epoch": 0.6192315511282781, "grad_norm": 0.1235279068350792, "learning_rate": 0.00013816739550493237, "loss": 0.9774, "step": 3046 }, { "epoch": 0.6194348444805855, "grad_norm": 0.13355652987957, "learning_rate": 0.0001381470558324011, "loss": 1.1034, "step": 3047 }, { "epoch": 0.6196381378328929, "grad_norm": 0.12585759162902832, "learning_rate": 0.00013812671615986982, "loss": 1.0536, "step": 3048 }, { "epoch": 0.6198414311852003, "grad_norm": 0.12993231415748596, "learning_rate": 0.00013810637648733855, "loss": 1.2062, "step": 3049 }, { "epoch": 0.6200447245375076, "grad_norm": 0.1431044191122055, "learning_rate": 0.0001380860368148073, "loss": 1.1605, "step": 3050 }, { "epoch": 0.620248017889815, "grad_norm": 0.134634330868721, "learning_rate": 0.00013806569714227602, "loss": 1.101, "step": 3051 }, { "epoch": 0.6204513112421224, "grad_norm": 0.126140758395195, "learning_rate": 0.00013804535746974475, "loss": 1.0472, "step": 3052 }, { "epoch": 0.6206546045944298, "grad_norm": 0.1231079027056694, "learning_rate": 0.00013802501779721347, "loss": 0.9879, "step": 3053 }, { "epoch": 0.6208578979467372, "grad_norm": 0.12733492255210876, "learning_rate": 0.0001380046781246822, "loss": 1.0918, "step": 3054 }, { "epoch": 0.6210611912990445, "grad_norm": 0.14148791134357452, "learning_rate": 0.00013798433845215092, "loss": 1.145, "step": 3055 }, { "epoch": 0.6212644846513519, "grad_norm": 0.13087992370128632, "learning_rate": 0.00013796399877961965, "loss": 1.1101, "step": 3056 }, { "epoch": 0.6214677780036593, "grad_norm": 0.14443303644657135, "learning_rate": 0.00013794365910708837, "loss": 1.0992, "step": 3057 }, { "epoch": 0.6216710713559667, "grad_norm": 0.13422155380249023, "learning_rate": 0.00013792331943455712, "loss": 1.2044, "step": 3058 }, { "epoch": 0.6218743647082741, "grad_norm": 0.13146667182445526, "learning_rate": 0.00013790297976202585, "loss": 1.0769, "step": 3059 }, { "epoch": 0.6220776580605815, "grad_norm": 0.12982682883739471, "learning_rate": 0.00013788264008949457, "loss": 1.1232, "step": 3060 }, { "epoch": 0.6222809514128887, "grad_norm": 0.13256913423538208, "learning_rate": 0.0001378623004169633, "loss": 0.9969, "step": 3061 }, { "epoch": 0.6224842447651961, "grad_norm": 0.11935515701770782, "learning_rate": 0.00013784196074443202, "loss": 1.0282, "step": 3062 }, { "epoch": 0.6226875381175035, "grad_norm": 0.14199033379554749, "learning_rate": 0.00013782162107190074, "loss": 1.1328, "step": 3063 }, { "epoch": 0.622890831469811, "grad_norm": 0.12896639108657837, "learning_rate": 0.00013780128139936947, "loss": 1.1422, "step": 3064 }, { "epoch": 0.6230941248221183, "grad_norm": 0.12972599267959595, "learning_rate": 0.0001377809417268382, "loss": 1.0686, "step": 3065 }, { "epoch": 0.6232974181744257, "grad_norm": 0.14466549456119537, "learning_rate": 0.00013776060205430694, "loss": 1.3486, "step": 3066 }, { "epoch": 0.623500711526733, "grad_norm": 0.129892960190773, "learning_rate": 0.00013774026238177567, "loss": 0.9945, "step": 3067 }, { "epoch": 0.6237040048790404, "grad_norm": 0.1326766312122345, "learning_rate": 0.0001377199227092444, "loss": 1.0583, "step": 3068 }, { "epoch": 0.6239072982313478, "grad_norm": 0.14068090915679932, "learning_rate": 0.00013769958303671312, "loss": 1.1597, "step": 3069 }, { "epoch": 0.6241105915836552, "grad_norm": 0.12544094026088715, "learning_rate": 0.00013767924336418184, "loss": 0.9624, "step": 3070 }, { "epoch": 0.6243138849359626, "grad_norm": 0.13259856402873993, "learning_rate": 0.00013765890369165057, "loss": 1.0218, "step": 3071 }, { "epoch": 0.62451717828827, "grad_norm": 0.13529850542545319, "learning_rate": 0.0001376385640191193, "loss": 1.1063, "step": 3072 }, { "epoch": 0.6247204716405773, "grad_norm": 0.1389310508966446, "learning_rate": 0.00013761822434658802, "loss": 1.067, "step": 3073 }, { "epoch": 0.6249237649928847, "grad_norm": 0.1326620876789093, "learning_rate": 0.00013759788467405677, "loss": 1.1228, "step": 3074 }, { "epoch": 0.6251270583451921, "grad_norm": 0.1371268332004547, "learning_rate": 0.0001375775450015255, "loss": 1.056, "step": 3075 }, { "epoch": 0.6253303516974995, "grad_norm": 0.15050175786018372, "learning_rate": 0.00013755720532899422, "loss": 1.1679, "step": 3076 }, { "epoch": 0.6255336450498069, "grad_norm": 0.14462800323963165, "learning_rate": 0.00013753686565646294, "loss": 1.1155, "step": 3077 }, { "epoch": 0.6257369384021142, "grad_norm": 0.12994062900543213, "learning_rate": 0.00013751652598393166, "loss": 1.0954, "step": 3078 }, { "epoch": 0.6259402317544216, "grad_norm": 0.12979595363140106, "learning_rate": 0.0001374961863114004, "loss": 1.0912, "step": 3079 }, { "epoch": 0.626143525106729, "grad_norm": 0.12296707928180695, "learning_rate": 0.0001374758466388691, "loss": 0.9646, "step": 3080 }, { "epoch": 0.6263468184590364, "grad_norm": 0.14658544957637787, "learning_rate": 0.00013745550696633784, "loss": 1.1149, "step": 3081 }, { "epoch": 0.6265501118113438, "grad_norm": 0.12885436415672302, "learning_rate": 0.00013743516729380656, "loss": 1.0285, "step": 3082 }, { "epoch": 0.6267534051636512, "grad_norm": 0.13237449526786804, "learning_rate": 0.00013741482762127531, "loss": 1.1388, "step": 3083 }, { "epoch": 0.6269566985159585, "grad_norm": 0.11667048186063766, "learning_rate": 0.00013739448794874404, "loss": 0.9625, "step": 3084 }, { "epoch": 0.6271599918682659, "grad_norm": 0.09962797164916992, "learning_rate": 0.00013737414827621276, "loss": 0.8283, "step": 3085 }, { "epoch": 0.6273632852205733, "grad_norm": 0.11563806235790253, "learning_rate": 0.0001373538086036815, "loss": 0.9379, "step": 3086 }, { "epoch": 0.6275665785728807, "grad_norm": 0.14020705223083496, "learning_rate": 0.0001373334689311502, "loss": 1.0649, "step": 3087 }, { "epoch": 0.6277698719251881, "grad_norm": 0.1255711168050766, "learning_rate": 0.00013731312925861894, "loss": 1.0555, "step": 3088 }, { "epoch": 0.6279731652774955, "grad_norm": 0.1265256702899933, "learning_rate": 0.00013729278958608766, "loss": 0.9377, "step": 3089 }, { "epoch": 0.6281764586298028, "grad_norm": 0.13861151039600372, "learning_rate": 0.00013727244991355639, "loss": 1.2492, "step": 3090 }, { "epoch": 0.6283797519821102, "grad_norm": 0.1353643387556076, "learning_rate": 0.00013725211024102514, "loss": 1.0395, "step": 3091 }, { "epoch": 0.6285830453344176, "grad_norm": 0.14273463189601898, "learning_rate": 0.00013723177056849386, "loss": 1.2017, "step": 3092 }, { "epoch": 0.628786338686725, "grad_norm": 0.12992137670516968, "learning_rate": 0.00013721143089596259, "loss": 1.1135, "step": 3093 }, { "epoch": 0.6289896320390324, "grad_norm": 0.13525742292404175, "learning_rate": 0.0001371910912234313, "loss": 1.1695, "step": 3094 }, { "epoch": 0.6291929253913398, "grad_norm": 0.12449081242084503, "learning_rate": 0.00013717075155090003, "loss": 1.0187, "step": 3095 }, { "epoch": 0.629396218743647, "grad_norm": 0.12699362635612488, "learning_rate": 0.00013715041187836876, "loss": 1.0876, "step": 3096 }, { "epoch": 0.6295995120959544, "grad_norm": 0.12526580691337585, "learning_rate": 0.00013713007220583748, "loss": 1.0352, "step": 3097 }, { "epoch": 0.6298028054482618, "grad_norm": 0.1089174896478653, "learning_rate": 0.0001371097325333062, "loss": 0.9695, "step": 3098 }, { "epoch": 0.6300060988005692, "grad_norm": 0.1343061774969101, "learning_rate": 0.00013708939286077496, "loss": 1.0601, "step": 3099 }, { "epoch": 0.6302093921528766, "grad_norm": 0.14272217452526093, "learning_rate": 0.00013706905318824368, "loss": 1.1642, "step": 3100 }, { "epoch": 0.630412685505184, "grad_norm": 0.14062613248825073, "learning_rate": 0.0001370487135157124, "loss": 1.1965, "step": 3101 }, { "epoch": 0.6306159788574913, "grad_norm": 0.12888343632221222, "learning_rate": 0.00013702837384318113, "loss": 0.9552, "step": 3102 }, { "epoch": 0.6308192722097987, "grad_norm": 0.1350019872188568, "learning_rate": 0.00013700803417064986, "loss": 1.1513, "step": 3103 }, { "epoch": 0.6310225655621061, "grad_norm": 0.12076770514249802, "learning_rate": 0.00013698769449811858, "loss": 0.9807, "step": 3104 }, { "epoch": 0.6312258589144135, "grad_norm": 0.12005645036697388, "learning_rate": 0.0001369673548255873, "loss": 0.9309, "step": 3105 }, { "epoch": 0.6314291522667209, "grad_norm": 0.13432009518146515, "learning_rate": 0.00013694701515305603, "loss": 1.0728, "step": 3106 }, { "epoch": 0.6316324456190282, "grad_norm": 0.14083653688430786, "learning_rate": 0.00013692667548052478, "loss": 0.9339, "step": 3107 }, { "epoch": 0.6318357389713356, "grad_norm": 0.12383510172367096, "learning_rate": 0.0001369063358079935, "loss": 0.9525, "step": 3108 }, { "epoch": 0.632039032323643, "grad_norm": 0.12858064472675323, "learning_rate": 0.00013688599613546223, "loss": 1.1277, "step": 3109 }, { "epoch": 0.6322423256759504, "grad_norm": 0.1366434544324875, "learning_rate": 0.00013686565646293096, "loss": 1.0272, "step": 3110 }, { "epoch": 0.6324456190282578, "grad_norm": 0.12631452083587646, "learning_rate": 0.00013684531679039968, "loss": 1.0147, "step": 3111 }, { "epoch": 0.6326489123805652, "grad_norm": 0.1388847827911377, "learning_rate": 0.0001368249771178684, "loss": 1.1982, "step": 3112 }, { "epoch": 0.6328522057328725, "grad_norm": 0.1357526183128357, "learning_rate": 0.00013680463744533713, "loss": 1.1804, "step": 3113 }, { "epoch": 0.6330554990851799, "grad_norm": 0.1273118555545807, "learning_rate": 0.00013678429777280585, "loss": 1.0268, "step": 3114 }, { "epoch": 0.6332587924374873, "grad_norm": 0.13186684250831604, "learning_rate": 0.0001367639581002746, "loss": 1.135, "step": 3115 }, { "epoch": 0.6334620857897947, "grad_norm": 0.1217605397105217, "learning_rate": 0.00013674361842774333, "loss": 1.0364, "step": 3116 }, { "epoch": 0.6336653791421021, "grad_norm": 0.11785151809453964, "learning_rate": 0.00013672327875521205, "loss": 0.9248, "step": 3117 }, { "epoch": 0.6338686724944095, "grad_norm": 0.12986084818840027, "learning_rate": 0.00013670293908268078, "loss": 1.0988, "step": 3118 }, { "epoch": 0.6340719658467168, "grad_norm": 0.14195957779884338, "learning_rate": 0.0001366825994101495, "loss": 1.2182, "step": 3119 }, { "epoch": 0.6342752591990242, "grad_norm": 0.12939682602882385, "learning_rate": 0.00013666225973761823, "loss": 1.0573, "step": 3120 }, { "epoch": 0.6344785525513316, "grad_norm": 0.12343540787696838, "learning_rate": 0.00013664192006508695, "loss": 1.0057, "step": 3121 }, { "epoch": 0.634681845903639, "grad_norm": 0.12308801710605621, "learning_rate": 0.00013662158039255568, "loss": 0.9345, "step": 3122 }, { "epoch": 0.6348851392559464, "grad_norm": 0.13453471660614014, "learning_rate": 0.0001366012407200244, "loss": 1.1593, "step": 3123 }, { "epoch": 0.6350884326082538, "grad_norm": 0.14599518477916718, "learning_rate": 0.00013658090104749315, "loss": 1.19, "step": 3124 }, { "epoch": 0.635291725960561, "grad_norm": 0.13644537329673767, "learning_rate": 0.00013656056137496188, "loss": 1.1541, "step": 3125 }, { "epoch": 0.6354950193128684, "grad_norm": 0.1313880980014801, "learning_rate": 0.0001365402217024306, "loss": 1.0607, "step": 3126 }, { "epoch": 0.6356983126651758, "grad_norm": 0.12381511926651001, "learning_rate": 0.00013651988202989933, "loss": 1.0003, "step": 3127 }, { "epoch": 0.6359016060174832, "grad_norm": 0.1361168473958969, "learning_rate": 0.00013649954235736805, "loss": 0.9445, "step": 3128 }, { "epoch": 0.6361048993697906, "grad_norm": 0.13545829057693481, "learning_rate": 0.00013647920268483677, "loss": 1.046, "step": 3129 }, { "epoch": 0.636308192722098, "grad_norm": 0.1335272639989853, "learning_rate": 0.0001364588630123055, "loss": 1.0611, "step": 3130 }, { "epoch": 0.6365114860744053, "grad_norm": 0.13092759251594543, "learning_rate": 0.00013643852333977422, "loss": 0.991, "step": 3131 }, { "epoch": 0.6367147794267127, "grad_norm": 0.1328737437725067, "learning_rate": 0.00013641818366724298, "loss": 1.2151, "step": 3132 }, { "epoch": 0.6369180727790201, "grad_norm": 0.13247033953666687, "learning_rate": 0.0001363978439947117, "loss": 1.0918, "step": 3133 }, { "epoch": 0.6371213661313275, "grad_norm": 0.1463424563407898, "learning_rate": 0.00013637750432218042, "loss": 1.0753, "step": 3134 }, { "epoch": 0.6373246594836349, "grad_norm": 0.13314956426620483, "learning_rate": 0.00013635716464964915, "loss": 1.1224, "step": 3135 }, { "epoch": 0.6375279528359422, "grad_norm": 0.12841732800006866, "learning_rate": 0.00013633682497711787, "loss": 0.9049, "step": 3136 }, { "epoch": 0.6377312461882496, "grad_norm": 0.1303834468126297, "learning_rate": 0.0001363164853045866, "loss": 1.1208, "step": 3137 }, { "epoch": 0.637934539540557, "grad_norm": 0.1288985162973404, "learning_rate": 0.00013629614563205532, "loss": 1.0416, "step": 3138 }, { "epoch": 0.6381378328928644, "grad_norm": 0.13632969558238983, "learning_rate": 0.00013627580595952405, "loss": 1.1293, "step": 3139 }, { "epoch": 0.6383411262451718, "grad_norm": 0.12471256405115128, "learning_rate": 0.0001362554662869928, "loss": 1.1095, "step": 3140 }, { "epoch": 0.6385444195974792, "grad_norm": 0.13156485557556152, "learning_rate": 0.00013623512661446152, "loss": 1.0952, "step": 3141 }, { "epoch": 0.6387477129497865, "grad_norm": 0.13472090661525726, "learning_rate": 0.00013621478694193025, "loss": 1.0665, "step": 3142 }, { "epoch": 0.6389510063020939, "grad_norm": 0.1464674472808838, "learning_rate": 0.00013619444726939897, "loss": 1.3084, "step": 3143 }, { "epoch": 0.6391542996544013, "grad_norm": 0.13103194534778595, "learning_rate": 0.0001361741075968677, "loss": 1.0512, "step": 3144 }, { "epoch": 0.6393575930067087, "grad_norm": 0.13378995656967163, "learning_rate": 0.00013615376792433642, "loss": 1.0625, "step": 3145 }, { "epoch": 0.6395608863590161, "grad_norm": 0.13924111425876617, "learning_rate": 0.00013613342825180514, "loss": 1.1813, "step": 3146 }, { "epoch": 0.6397641797113235, "grad_norm": 0.13989883661270142, "learning_rate": 0.00013611308857927387, "loss": 1.0973, "step": 3147 }, { "epoch": 0.6399674730636308, "grad_norm": 0.12374843657016754, "learning_rate": 0.00013609274890674262, "loss": 0.9683, "step": 3148 }, { "epoch": 0.6401707664159382, "grad_norm": 0.14824433624744415, "learning_rate": 0.00013607240923421135, "loss": 1.1632, "step": 3149 }, { "epoch": 0.6403740597682456, "grad_norm": 0.13298064470291138, "learning_rate": 0.00013605206956168007, "loss": 1.0621, "step": 3150 }, { "epoch": 0.640577353120553, "grad_norm": 0.13271810114383698, "learning_rate": 0.0001360317298891488, "loss": 1.1239, "step": 3151 }, { "epoch": 0.6407806464728604, "grad_norm": 0.12920920550823212, "learning_rate": 0.00013601139021661752, "loss": 1.0332, "step": 3152 }, { "epoch": 0.6409839398251678, "grad_norm": 0.12078989297151566, "learning_rate": 0.00013599105054408624, "loss": 0.9747, "step": 3153 }, { "epoch": 0.6411872331774751, "grad_norm": 0.1309296190738678, "learning_rate": 0.00013597071087155497, "loss": 1.0871, "step": 3154 }, { "epoch": 0.6413905265297825, "grad_norm": 0.13290594518184662, "learning_rate": 0.0001359503711990237, "loss": 0.9874, "step": 3155 }, { "epoch": 0.6415938198820899, "grad_norm": 0.12248789519071579, "learning_rate": 0.00013593003152649244, "loss": 1.0674, "step": 3156 }, { "epoch": 0.6417971132343973, "grad_norm": 0.13262233138084412, "learning_rate": 0.00013590969185396117, "loss": 1.0918, "step": 3157 }, { "epoch": 0.6420004065867047, "grad_norm": 0.11638560891151428, "learning_rate": 0.0001358893521814299, "loss": 0.94, "step": 3158 }, { "epoch": 0.6422036999390119, "grad_norm": 0.13623739778995514, "learning_rate": 0.00013586901250889862, "loss": 0.9925, "step": 3159 }, { "epoch": 0.6424069932913193, "grad_norm": 0.13399013876914978, "learning_rate": 0.00013584867283636734, "loss": 0.9553, "step": 3160 }, { "epoch": 0.6426102866436267, "grad_norm": 0.12274351716041565, "learning_rate": 0.00013582833316383607, "loss": 1.0406, "step": 3161 }, { "epoch": 0.6428135799959341, "grad_norm": 0.13038837909698486, "learning_rate": 0.0001358079934913048, "loss": 1.1596, "step": 3162 }, { "epoch": 0.6430168733482415, "grad_norm": 0.13271398842334747, "learning_rate": 0.00013578765381877351, "loss": 1.0792, "step": 3163 }, { "epoch": 0.6432201667005489, "grad_norm": 0.1319563090801239, "learning_rate": 0.00013576731414624224, "loss": 1.1321, "step": 3164 }, { "epoch": 0.6434234600528562, "grad_norm": 0.13448521494865417, "learning_rate": 0.000135746974473711, "loss": 1.0278, "step": 3165 }, { "epoch": 0.6436267534051636, "grad_norm": 0.1246679350733757, "learning_rate": 0.00013572663480117972, "loss": 0.9818, "step": 3166 }, { "epoch": 0.643830046757471, "grad_norm": 0.11772032827138901, "learning_rate": 0.00013570629512864844, "loss": 0.9398, "step": 3167 }, { "epoch": 0.6440333401097784, "grad_norm": 0.12182223796844482, "learning_rate": 0.00013568595545611716, "loss": 0.9794, "step": 3168 }, { "epoch": 0.6442366334620858, "grad_norm": 0.11464784294366837, "learning_rate": 0.0001356656157835859, "loss": 0.9508, "step": 3169 }, { "epoch": 0.6444399268143932, "grad_norm": 0.12462913244962692, "learning_rate": 0.0001356452761110546, "loss": 1.0784, "step": 3170 }, { "epoch": 0.6446432201667005, "grad_norm": 0.14886057376861572, "learning_rate": 0.00013562493643852334, "loss": 1.1937, "step": 3171 }, { "epoch": 0.6448465135190079, "grad_norm": 0.12092513591051102, "learning_rate": 0.00013560459676599206, "loss": 0.9333, "step": 3172 }, { "epoch": 0.6450498068713153, "grad_norm": 0.13768193125724792, "learning_rate": 0.00013558425709346081, "loss": 1.0512, "step": 3173 }, { "epoch": 0.6452531002236227, "grad_norm": 0.13496732711791992, "learning_rate": 0.00013556391742092954, "loss": 1.0672, "step": 3174 }, { "epoch": 0.6454563935759301, "grad_norm": 0.1316104531288147, "learning_rate": 0.00013554357774839826, "loss": 1.0944, "step": 3175 }, { "epoch": 0.6456596869282375, "grad_norm": 0.12093289196491241, "learning_rate": 0.000135523238075867, "loss": 0.8777, "step": 3176 }, { "epoch": 0.6458629802805448, "grad_norm": 0.12371384352445602, "learning_rate": 0.0001355028984033357, "loss": 0.8997, "step": 3177 }, { "epoch": 0.6460662736328522, "grad_norm": 0.13598783314228058, "learning_rate": 0.00013548255873080444, "loss": 1.1167, "step": 3178 }, { "epoch": 0.6462695669851596, "grad_norm": 0.1385606974363327, "learning_rate": 0.00013546221905827316, "loss": 1.032, "step": 3179 }, { "epoch": 0.646472860337467, "grad_norm": 0.12756818532943726, "learning_rate": 0.00013544187938574188, "loss": 1.0912, "step": 3180 }, { "epoch": 0.6466761536897744, "grad_norm": 0.12240833789110184, "learning_rate": 0.00013542153971321064, "loss": 1.0065, "step": 3181 }, { "epoch": 0.6468794470420818, "grad_norm": 0.14103402197360992, "learning_rate": 0.00013540120004067936, "loss": 1.1357, "step": 3182 }, { "epoch": 0.6470827403943891, "grad_norm": 0.12278808653354645, "learning_rate": 0.00013538086036814809, "loss": 1.0462, "step": 3183 }, { "epoch": 0.6472860337466965, "grad_norm": 0.13968375325202942, "learning_rate": 0.0001353605206956168, "loss": 1.1164, "step": 3184 }, { "epoch": 0.6474893270990039, "grad_norm": 0.12311102449893951, "learning_rate": 0.00013534018102308553, "loss": 1.0793, "step": 3185 }, { "epoch": 0.6476926204513113, "grad_norm": 0.1307074874639511, "learning_rate": 0.00013531984135055426, "loss": 1.0874, "step": 3186 }, { "epoch": 0.6478959138036187, "grad_norm": 0.1303715705871582, "learning_rate": 0.00013529950167802298, "loss": 1.1111, "step": 3187 }, { "epoch": 0.648099207155926, "grad_norm": 0.13313518464565277, "learning_rate": 0.0001352791620054917, "loss": 0.9861, "step": 3188 }, { "epoch": 0.6483025005082333, "grad_norm": 0.13007265329360962, "learning_rate": 0.00013525882233296046, "loss": 0.9644, "step": 3189 }, { "epoch": 0.6485057938605407, "grad_norm": 0.14151926338672638, "learning_rate": 0.00013523848266042918, "loss": 1.1387, "step": 3190 }, { "epoch": 0.6487090872128481, "grad_norm": 0.13587616384029388, "learning_rate": 0.0001352181429878979, "loss": 1.0802, "step": 3191 }, { "epoch": 0.6489123805651555, "grad_norm": 0.14267796277999878, "learning_rate": 0.00013519780331536663, "loss": 1.1885, "step": 3192 }, { "epoch": 0.6491156739174629, "grad_norm": 0.11519461125135422, "learning_rate": 0.00013517746364283536, "loss": 0.9333, "step": 3193 }, { "epoch": 0.6493189672697702, "grad_norm": 0.14246360957622528, "learning_rate": 0.00013515712397030408, "loss": 1.1757, "step": 3194 }, { "epoch": 0.6495222606220776, "grad_norm": 0.14482155442237854, "learning_rate": 0.0001351367842977728, "loss": 1.097, "step": 3195 }, { "epoch": 0.649725553974385, "grad_norm": 0.1291578710079193, "learning_rate": 0.00013511644462524153, "loss": 0.9938, "step": 3196 }, { "epoch": 0.6499288473266924, "grad_norm": 0.13155002892017365, "learning_rate": 0.00013509610495271028, "loss": 1.0634, "step": 3197 }, { "epoch": 0.6501321406789998, "grad_norm": 0.1477162092924118, "learning_rate": 0.000135075765280179, "loss": 1.0995, "step": 3198 }, { "epoch": 0.6503354340313072, "grad_norm": 0.12841352820396423, "learning_rate": 0.00013505542560764773, "loss": 1.1185, "step": 3199 }, { "epoch": 0.6505387273836145, "grad_norm": 0.13000524044036865, "learning_rate": 0.00013503508593511646, "loss": 1.2428, "step": 3200 }, { "epoch": 0.6507420207359219, "grad_norm": 0.1218332052230835, "learning_rate": 0.00013501474626258518, "loss": 1.026, "step": 3201 }, { "epoch": 0.6509453140882293, "grad_norm": 0.12599121034145355, "learning_rate": 0.0001349944065900539, "loss": 1.0013, "step": 3202 }, { "epoch": 0.6511486074405367, "grad_norm": 0.15027253329753876, "learning_rate": 0.00013497406691752263, "loss": 1.2587, "step": 3203 }, { "epoch": 0.6513519007928441, "grad_norm": 0.12841476500034332, "learning_rate": 0.00013495372724499135, "loss": 0.9977, "step": 3204 }, { "epoch": 0.6515551941451515, "grad_norm": 0.13236485421657562, "learning_rate": 0.00013493338757246008, "loss": 1.0202, "step": 3205 }, { "epoch": 0.6517584874974588, "grad_norm": 0.1356945037841797, "learning_rate": 0.00013491304789992883, "loss": 1.1599, "step": 3206 }, { "epoch": 0.6519617808497662, "grad_norm": 0.13879364728927612, "learning_rate": 0.00013489270822739755, "loss": 1.0116, "step": 3207 }, { "epoch": 0.6521650742020736, "grad_norm": 0.15575814247131348, "learning_rate": 0.00013487236855486628, "loss": 1.0991, "step": 3208 }, { "epoch": 0.652368367554381, "grad_norm": 0.11463279277086258, "learning_rate": 0.000134852028882335, "loss": 0.8655, "step": 3209 }, { "epoch": 0.6525716609066884, "grad_norm": 0.1258864849805832, "learning_rate": 0.00013483168920980373, "loss": 0.9616, "step": 3210 }, { "epoch": 0.6527749542589957, "grad_norm": 0.13992567360401154, "learning_rate": 0.00013481134953727245, "loss": 1.078, "step": 3211 }, { "epoch": 0.6529782476113031, "grad_norm": 0.14613211154937744, "learning_rate": 0.00013479100986474118, "loss": 1.0828, "step": 3212 }, { "epoch": 0.6531815409636105, "grad_norm": 0.11749006807804108, "learning_rate": 0.0001347706701922099, "loss": 0.8586, "step": 3213 }, { "epoch": 0.6533848343159179, "grad_norm": 0.13639944791793823, "learning_rate": 0.00013475033051967865, "loss": 1.0437, "step": 3214 }, { "epoch": 0.6535881276682253, "grad_norm": 0.12744362652301788, "learning_rate": 0.00013472999084714738, "loss": 1.1059, "step": 3215 }, { "epoch": 0.6537914210205327, "grad_norm": 0.12434601038694382, "learning_rate": 0.0001347096511746161, "loss": 1.0041, "step": 3216 }, { "epoch": 0.65399471437284, "grad_norm": 0.12143322080373764, "learning_rate": 0.00013468931150208483, "loss": 0.9148, "step": 3217 }, { "epoch": 0.6541980077251474, "grad_norm": 0.142898291349411, "learning_rate": 0.00013466897182955355, "loss": 1.1715, "step": 3218 }, { "epoch": 0.6544013010774548, "grad_norm": 0.12720847129821777, "learning_rate": 0.00013464863215702227, "loss": 1.013, "step": 3219 }, { "epoch": 0.6546045944297622, "grad_norm": 0.1172272264957428, "learning_rate": 0.000134628292484491, "loss": 0.9205, "step": 3220 }, { "epoch": 0.6548078877820696, "grad_norm": 0.15361227095127106, "learning_rate": 0.00013460795281195972, "loss": 1.2636, "step": 3221 }, { "epoch": 0.655011181134377, "grad_norm": 0.1317681223154068, "learning_rate": 0.00013458761313942847, "loss": 1.1478, "step": 3222 }, { "epoch": 0.6552144744866842, "grad_norm": 0.1296282410621643, "learning_rate": 0.0001345672734668972, "loss": 1.0402, "step": 3223 }, { "epoch": 0.6554177678389916, "grad_norm": 0.1406709998846054, "learning_rate": 0.00013454693379436592, "loss": 1.1656, "step": 3224 }, { "epoch": 0.655621061191299, "grad_norm": 0.13919825851917267, "learning_rate": 0.00013452659412183465, "loss": 1.0382, "step": 3225 }, { "epoch": 0.6558243545436064, "grad_norm": 0.14981389045715332, "learning_rate": 0.00013450625444930337, "loss": 1.1494, "step": 3226 }, { "epoch": 0.6560276478959138, "grad_norm": 0.13149550557136536, "learning_rate": 0.0001344859147767721, "loss": 1.2005, "step": 3227 }, { "epoch": 0.6562309412482212, "grad_norm": 0.11929726600646973, "learning_rate": 0.00013446557510424082, "loss": 0.8001, "step": 3228 }, { "epoch": 0.6564342346005285, "grad_norm": 0.1304064244031906, "learning_rate": 0.00013444523543170955, "loss": 0.9621, "step": 3229 }, { "epoch": 0.6566375279528359, "grad_norm": 0.1286899745464325, "learning_rate": 0.0001344248957591783, "loss": 0.9954, "step": 3230 }, { "epoch": 0.6568408213051433, "grad_norm": 0.13308082520961761, "learning_rate": 0.00013440455608664702, "loss": 0.9092, "step": 3231 }, { "epoch": 0.6570441146574507, "grad_norm": 0.14997734129428864, "learning_rate": 0.00013438421641411575, "loss": 1.1089, "step": 3232 }, { "epoch": 0.6572474080097581, "grad_norm": 0.14065352082252502, "learning_rate": 0.00013436387674158447, "loss": 1.0664, "step": 3233 }, { "epoch": 0.6574507013620655, "grad_norm": 0.12980201840400696, "learning_rate": 0.0001343435370690532, "loss": 1.1631, "step": 3234 }, { "epoch": 0.6576539947143728, "grad_norm": 0.11543235182762146, "learning_rate": 0.00013432319739652192, "loss": 0.9523, "step": 3235 }, { "epoch": 0.6578572880666802, "grad_norm": 0.14717644453048706, "learning_rate": 0.00013430285772399064, "loss": 1.0466, "step": 3236 }, { "epoch": 0.6580605814189876, "grad_norm": 0.12715165317058563, "learning_rate": 0.00013428251805145937, "loss": 0.988, "step": 3237 }, { "epoch": 0.658263874771295, "grad_norm": 0.14531929790973663, "learning_rate": 0.00013426217837892812, "loss": 1.1871, "step": 3238 }, { "epoch": 0.6584671681236024, "grad_norm": 0.139459490776062, "learning_rate": 0.00013424183870639684, "loss": 1.1572, "step": 3239 }, { "epoch": 0.6586704614759097, "grad_norm": 0.11804230511188507, "learning_rate": 0.00013422149903386557, "loss": 0.9844, "step": 3240 }, { "epoch": 0.6588737548282171, "grad_norm": 0.14333584904670715, "learning_rate": 0.0001342011593613343, "loss": 1.1719, "step": 3241 }, { "epoch": 0.6590770481805245, "grad_norm": 0.14224494993686676, "learning_rate": 0.00013418081968880302, "loss": 1.1448, "step": 3242 }, { "epoch": 0.6592803415328319, "grad_norm": 0.11388222128152847, "learning_rate": 0.00013416048001627174, "loss": 0.9178, "step": 3243 }, { "epoch": 0.6594836348851393, "grad_norm": 0.12758168578147888, "learning_rate": 0.00013414014034374047, "loss": 0.9446, "step": 3244 }, { "epoch": 0.6596869282374467, "grad_norm": 0.159623384475708, "learning_rate": 0.0001341198006712092, "loss": 1.3143, "step": 3245 }, { "epoch": 0.659890221589754, "grad_norm": 0.13925635814666748, "learning_rate": 0.00013409946099867792, "loss": 1.131, "step": 3246 }, { "epoch": 0.6600935149420614, "grad_norm": 0.12121693789958954, "learning_rate": 0.00013407912132614667, "loss": 1.0076, "step": 3247 }, { "epoch": 0.6602968082943688, "grad_norm": 0.12954868376255035, "learning_rate": 0.0001340587816536154, "loss": 0.9662, "step": 3248 }, { "epoch": 0.6605001016466762, "grad_norm": 0.13503266870975494, "learning_rate": 0.00013403844198108412, "loss": 1.0233, "step": 3249 }, { "epoch": 0.6607033949989836, "grad_norm": 0.13549566268920898, "learning_rate": 0.00013401810230855284, "loss": 1.0545, "step": 3250 }, { "epoch": 0.660906688351291, "grad_norm": 0.13881300389766693, "learning_rate": 0.00013399776263602157, "loss": 1.0844, "step": 3251 }, { "epoch": 0.6611099817035982, "grad_norm": 0.13221535086631775, "learning_rate": 0.0001339774229634903, "loss": 1.0159, "step": 3252 }, { "epoch": 0.6613132750559056, "grad_norm": 0.1378117799758911, "learning_rate": 0.00013395708329095901, "loss": 1.2071, "step": 3253 }, { "epoch": 0.661516568408213, "grad_norm": 0.1307571530342102, "learning_rate": 0.00013393674361842774, "loss": 1.0874, "step": 3254 }, { "epoch": 0.6617198617605204, "grad_norm": 0.1532752364873886, "learning_rate": 0.0001339164039458965, "loss": 1.263, "step": 3255 }, { "epoch": 0.6619231551128278, "grad_norm": 0.14829877018928528, "learning_rate": 0.00013389606427336521, "loss": 1.1919, "step": 3256 }, { "epoch": 0.6621264484651352, "grad_norm": 0.12832298874855042, "learning_rate": 0.00013387572460083394, "loss": 1.0, "step": 3257 }, { "epoch": 0.6623297418174425, "grad_norm": 0.12127513438463211, "learning_rate": 0.00013385538492830266, "loss": 1.03, "step": 3258 }, { "epoch": 0.6625330351697499, "grad_norm": 0.1351458579301834, "learning_rate": 0.0001338350452557714, "loss": 1.0167, "step": 3259 }, { "epoch": 0.6627363285220573, "grad_norm": 0.11357429623603821, "learning_rate": 0.0001338147055832401, "loss": 0.9135, "step": 3260 }, { "epoch": 0.6629396218743647, "grad_norm": 0.14391832053661346, "learning_rate": 0.00013379436591070884, "loss": 1.1626, "step": 3261 }, { "epoch": 0.6631429152266721, "grad_norm": 0.1359371840953827, "learning_rate": 0.00013377402623817756, "loss": 1.1155, "step": 3262 }, { "epoch": 0.6633462085789794, "grad_norm": 0.14570018649101257, "learning_rate": 0.0001337536865656463, "loss": 1.0961, "step": 3263 }, { "epoch": 0.6635495019312868, "grad_norm": 0.12299071252346039, "learning_rate": 0.00013373334689311504, "loss": 0.9879, "step": 3264 }, { "epoch": 0.6637527952835942, "grad_norm": 0.1427142471075058, "learning_rate": 0.00013371300722058376, "loss": 1.2364, "step": 3265 }, { "epoch": 0.6639560886359016, "grad_norm": 0.1400018036365509, "learning_rate": 0.0001336926675480525, "loss": 1.1366, "step": 3266 }, { "epoch": 0.664159381988209, "grad_norm": 0.14757339656352997, "learning_rate": 0.0001336723278755212, "loss": 1.2415, "step": 3267 }, { "epoch": 0.6643626753405164, "grad_norm": 0.1404561698436737, "learning_rate": 0.00013365198820298994, "loss": 1.0536, "step": 3268 }, { "epoch": 0.6645659686928237, "grad_norm": 0.13608767092227936, "learning_rate": 0.00013363164853045866, "loss": 1.0977, "step": 3269 }, { "epoch": 0.6647692620451311, "grad_norm": 0.14513832330703735, "learning_rate": 0.00013361130885792738, "loss": 1.1665, "step": 3270 }, { "epoch": 0.6649725553974385, "grad_norm": 0.12752074003219604, "learning_rate": 0.00013359096918539614, "loss": 1.0611, "step": 3271 }, { "epoch": 0.6651758487497459, "grad_norm": 0.1297471970319748, "learning_rate": 0.00013357062951286486, "loss": 1.0336, "step": 3272 }, { "epoch": 0.6653791421020533, "grad_norm": 0.13528691232204437, "learning_rate": 0.00013355028984033359, "loss": 1.1205, "step": 3273 }, { "epoch": 0.6655824354543607, "grad_norm": 0.13278824090957642, "learning_rate": 0.0001335299501678023, "loss": 1.1608, "step": 3274 }, { "epoch": 0.665785728806668, "grad_norm": 0.1527799665927887, "learning_rate": 0.00013350961049527103, "loss": 1.1947, "step": 3275 }, { "epoch": 0.6659890221589754, "grad_norm": 0.11764834076166153, "learning_rate": 0.00013348927082273976, "loss": 0.8985, "step": 3276 }, { "epoch": 0.6661923155112828, "grad_norm": 0.12094051390886307, "learning_rate": 0.00013346893115020848, "loss": 0.8954, "step": 3277 }, { "epoch": 0.6663956088635902, "grad_norm": 0.1273156702518463, "learning_rate": 0.0001334485914776772, "loss": 1.0629, "step": 3278 }, { "epoch": 0.6665989022158976, "grad_norm": 0.12444844841957092, "learning_rate": 0.00013342825180514596, "loss": 1.0771, "step": 3279 }, { "epoch": 0.666802195568205, "grad_norm": 0.13100309669971466, "learning_rate": 0.00013340791213261468, "loss": 1.0665, "step": 3280 }, { "epoch": 0.6670054889205123, "grad_norm": 0.14003531634807587, "learning_rate": 0.0001333875724600834, "loss": 1.1218, "step": 3281 }, { "epoch": 0.6672087822728197, "grad_norm": 0.13837094604969025, "learning_rate": 0.00013336723278755213, "loss": 1.1864, "step": 3282 }, { "epoch": 0.667412075625127, "grad_norm": 0.1185075119137764, "learning_rate": 0.00013334689311502086, "loss": 0.9776, "step": 3283 }, { "epoch": 0.6676153689774345, "grad_norm": 0.1384880095720291, "learning_rate": 0.00013332655344248958, "loss": 1.117, "step": 3284 }, { "epoch": 0.6678186623297419, "grad_norm": 0.1331661343574524, "learning_rate": 0.0001333062137699583, "loss": 1.1523, "step": 3285 }, { "epoch": 0.6680219556820493, "grad_norm": 0.12203952670097351, "learning_rate": 0.00013328587409742703, "loss": 1.0236, "step": 3286 }, { "epoch": 0.6682252490343565, "grad_norm": 0.1446705311536789, "learning_rate": 0.00013326553442489575, "loss": 1.2011, "step": 3287 }, { "epoch": 0.6684285423866639, "grad_norm": 0.15075799822807312, "learning_rate": 0.0001332451947523645, "loss": 1.1135, "step": 3288 }, { "epoch": 0.6686318357389713, "grad_norm": 0.13888481259346008, "learning_rate": 0.00013322485507983323, "loss": 1.1357, "step": 3289 }, { "epoch": 0.6688351290912787, "grad_norm": 0.13847656548023224, "learning_rate": 0.00013320451540730196, "loss": 1.1679, "step": 3290 }, { "epoch": 0.6690384224435861, "grad_norm": 0.14227357506752014, "learning_rate": 0.00013318417573477068, "loss": 1.184, "step": 3291 }, { "epoch": 0.6692417157958934, "grad_norm": 0.13490445911884308, "learning_rate": 0.0001331638360622394, "loss": 1.026, "step": 3292 }, { "epoch": 0.6694450091482008, "grad_norm": 0.14282800257205963, "learning_rate": 0.00013314349638970813, "loss": 1.1685, "step": 3293 }, { "epoch": 0.6696483025005082, "grad_norm": 0.1398768126964569, "learning_rate": 0.00013312315671717685, "loss": 1.1653, "step": 3294 }, { "epoch": 0.6698515958528156, "grad_norm": 0.1359616070985794, "learning_rate": 0.00013310281704464558, "loss": 0.9866, "step": 3295 }, { "epoch": 0.670054889205123, "grad_norm": 0.14484332501888275, "learning_rate": 0.00013308247737211433, "loss": 1.119, "step": 3296 }, { "epoch": 0.6702581825574304, "grad_norm": 0.12202159315347672, "learning_rate": 0.00013306213769958305, "loss": 1.0366, "step": 3297 }, { "epoch": 0.6704614759097377, "grad_norm": 0.1427534818649292, "learning_rate": 0.00013304179802705178, "loss": 1.1238, "step": 3298 }, { "epoch": 0.6706647692620451, "grad_norm": 0.12576861679553986, "learning_rate": 0.0001330214583545205, "loss": 1.1628, "step": 3299 }, { "epoch": 0.6708680626143525, "grad_norm": 0.11372304707765579, "learning_rate": 0.00013300111868198923, "loss": 1.0396, "step": 3300 }, { "epoch": 0.6710713559666599, "grad_norm": 0.12820537388324738, "learning_rate": 0.00013298077900945795, "loss": 0.9094, "step": 3301 }, { "epoch": 0.6712746493189673, "grad_norm": 0.1097426563501358, "learning_rate": 0.00013296043933692668, "loss": 0.887, "step": 3302 }, { "epoch": 0.6714779426712747, "grad_norm": 0.13616250455379486, "learning_rate": 0.0001329400996643954, "loss": 1.0729, "step": 3303 }, { "epoch": 0.671681236023582, "grad_norm": 0.14476965367794037, "learning_rate": 0.00013291975999186415, "loss": 1.0224, "step": 3304 }, { "epoch": 0.6718845293758894, "grad_norm": 0.13365976512432098, "learning_rate": 0.00013289942031933288, "loss": 1.1, "step": 3305 }, { "epoch": 0.6720878227281968, "grad_norm": 0.12170373648405075, "learning_rate": 0.0001328790806468016, "loss": 0.9986, "step": 3306 }, { "epoch": 0.6722911160805042, "grad_norm": 0.1351754069328308, "learning_rate": 0.00013285874097427033, "loss": 1.0089, "step": 3307 }, { "epoch": 0.6724944094328116, "grad_norm": 0.13269051909446716, "learning_rate": 0.00013283840130173905, "loss": 0.9508, "step": 3308 }, { "epoch": 0.672697702785119, "grad_norm": 0.12628872692584991, "learning_rate": 0.00013281806162920777, "loss": 0.96, "step": 3309 }, { "epoch": 0.6729009961374263, "grad_norm": 0.13434316217899323, "learning_rate": 0.0001327977219566765, "loss": 1.0935, "step": 3310 }, { "epoch": 0.6731042894897337, "grad_norm": 0.137080579996109, "learning_rate": 0.00013277738228414522, "loss": 1.151, "step": 3311 }, { "epoch": 0.6733075828420411, "grad_norm": 0.1294548362493515, "learning_rate": 0.00013275704261161397, "loss": 1.0094, "step": 3312 }, { "epoch": 0.6735108761943485, "grad_norm": 0.14055456221103668, "learning_rate": 0.0001327367029390827, "loss": 1.056, "step": 3313 }, { "epoch": 0.6737141695466559, "grad_norm": 0.12785248458385468, "learning_rate": 0.00013271636326655142, "loss": 1.0964, "step": 3314 }, { "epoch": 0.6739174628989631, "grad_norm": 0.14090466499328613, "learning_rate": 0.00013269602359402015, "loss": 1.1419, "step": 3315 }, { "epoch": 0.6741207562512705, "grad_norm": 0.12105811387300491, "learning_rate": 0.00013267568392148887, "loss": 0.929, "step": 3316 }, { "epoch": 0.674324049603578, "grad_norm": 0.1410580724477768, "learning_rate": 0.0001326553442489576, "loss": 1.0849, "step": 3317 }, { "epoch": 0.6745273429558853, "grad_norm": 0.13689137995243073, "learning_rate": 0.00013263500457642632, "loss": 1.0011, "step": 3318 }, { "epoch": 0.6747306363081927, "grad_norm": 0.12887214124202728, "learning_rate": 0.00013261466490389505, "loss": 1.0231, "step": 3319 }, { "epoch": 0.6749339296605001, "grad_norm": 0.12463674694299698, "learning_rate": 0.0001325943252313638, "loss": 0.864, "step": 3320 }, { "epoch": 0.6751372230128074, "grad_norm": 0.13897714018821716, "learning_rate": 0.00013257398555883252, "loss": 1.1127, "step": 3321 }, { "epoch": 0.6753405163651148, "grad_norm": 0.1311863511800766, "learning_rate": 0.00013255364588630125, "loss": 1.0822, "step": 3322 }, { "epoch": 0.6755438097174222, "grad_norm": 0.1215839833021164, "learning_rate": 0.00013253330621376997, "loss": 0.9599, "step": 3323 }, { "epoch": 0.6757471030697296, "grad_norm": 0.12233379483222961, "learning_rate": 0.0001325129665412387, "loss": 0.8926, "step": 3324 }, { "epoch": 0.675950396422037, "grad_norm": 0.1159176304936409, "learning_rate": 0.00013249262686870742, "loss": 0.79, "step": 3325 }, { "epoch": 0.6761536897743444, "grad_norm": 0.1344752311706543, "learning_rate": 0.00013247228719617614, "loss": 1.0011, "step": 3326 }, { "epoch": 0.6763569831266517, "grad_norm": 0.14110898971557617, "learning_rate": 0.00013245194752364487, "loss": 1.1465, "step": 3327 }, { "epoch": 0.6765602764789591, "grad_norm": 0.12130746990442276, "learning_rate": 0.0001324316078511136, "loss": 0.9631, "step": 3328 }, { "epoch": 0.6767635698312665, "grad_norm": 0.12850743532180786, "learning_rate": 0.00013241126817858234, "loss": 1.0909, "step": 3329 }, { "epoch": 0.6769668631835739, "grad_norm": 0.14836134016513824, "learning_rate": 0.00013239092850605107, "loss": 1.3898, "step": 3330 }, { "epoch": 0.6771701565358813, "grad_norm": 0.1397714763879776, "learning_rate": 0.0001323705888335198, "loss": 1.1767, "step": 3331 }, { "epoch": 0.6773734498881887, "grad_norm": 0.13022536039352417, "learning_rate": 0.0001323502491609885, "loss": 1.0389, "step": 3332 }, { "epoch": 0.677576743240496, "grad_norm": 0.12649066746234894, "learning_rate": 0.00013232990948845724, "loss": 0.8931, "step": 3333 }, { "epoch": 0.6777800365928034, "grad_norm": 0.1422676295042038, "learning_rate": 0.00013230956981592597, "loss": 1.1758, "step": 3334 }, { "epoch": 0.6779833299451108, "grad_norm": 0.12162751704454422, "learning_rate": 0.0001322892301433947, "loss": 1.0629, "step": 3335 }, { "epoch": 0.6781866232974182, "grad_norm": 0.14175549149513245, "learning_rate": 0.00013226889047086342, "loss": 1.2327, "step": 3336 }, { "epoch": 0.6783899166497256, "grad_norm": 0.13854654133319855, "learning_rate": 0.00013224855079833217, "loss": 1.1758, "step": 3337 }, { "epoch": 0.678593210002033, "grad_norm": 0.11496133357286453, "learning_rate": 0.0001322282111258009, "loss": 0.8925, "step": 3338 }, { "epoch": 0.6787965033543403, "grad_norm": 0.1376158595085144, "learning_rate": 0.00013220787145326962, "loss": 1.2326, "step": 3339 }, { "epoch": 0.6789997967066477, "grad_norm": 0.12731988728046417, "learning_rate": 0.00013218753178073834, "loss": 1.0195, "step": 3340 }, { "epoch": 0.6792030900589551, "grad_norm": 0.1400342583656311, "learning_rate": 0.00013216719210820707, "loss": 1.1155, "step": 3341 }, { "epoch": 0.6794063834112625, "grad_norm": 0.11408770084381104, "learning_rate": 0.0001321468524356758, "loss": 0.8986, "step": 3342 }, { "epoch": 0.6796096767635699, "grad_norm": 0.13925215601921082, "learning_rate": 0.00013212651276314451, "loss": 1.0844, "step": 3343 }, { "epoch": 0.6798129701158772, "grad_norm": 0.13174065947532654, "learning_rate": 0.00013210617309061324, "loss": 0.9927, "step": 3344 }, { "epoch": 0.6800162634681846, "grad_norm": 0.12421359866857529, "learning_rate": 0.000132085833418082, "loss": 0.8822, "step": 3345 }, { "epoch": 0.680219556820492, "grad_norm": 0.14170731604099274, "learning_rate": 0.00013206549374555071, "loss": 1.0215, "step": 3346 }, { "epoch": 0.6804228501727994, "grad_norm": 0.13698481023311615, "learning_rate": 0.00013204515407301944, "loss": 1.1608, "step": 3347 }, { "epoch": 0.6806261435251068, "grad_norm": 0.12675851583480835, "learning_rate": 0.00013202481440048816, "loss": 1.0425, "step": 3348 }, { "epoch": 0.6808294368774142, "grad_norm": 0.13038714230060577, "learning_rate": 0.0001320044747279569, "loss": 1.0598, "step": 3349 }, { "epoch": 0.6810327302297214, "grad_norm": 0.1283421814441681, "learning_rate": 0.0001319841350554256, "loss": 0.9638, "step": 3350 }, { "epoch": 0.6812360235820288, "grad_norm": 0.1362680047750473, "learning_rate": 0.00013196379538289434, "loss": 1.1957, "step": 3351 }, { "epoch": 0.6814393169343362, "grad_norm": 0.12494239211082458, "learning_rate": 0.00013194345571036306, "loss": 1.0474, "step": 3352 }, { "epoch": 0.6816426102866436, "grad_norm": 0.11277607828378677, "learning_rate": 0.0001319231160378318, "loss": 0.9065, "step": 3353 }, { "epoch": 0.681845903638951, "grad_norm": 0.13010768592357635, "learning_rate": 0.00013190277636530054, "loss": 1.0045, "step": 3354 }, { "epoch": 0.6820491969912584, "grad_norm": 0.13375157117843628, "learning_rate": 0.00013188243669276926, "loss": 0.9506, "step": 3355 }, { "epoch": 0.6822524903435657, "grad_norm": 0.13150712847709656, "learning_rate": 0.00013186209702023799, "loss": 1.0485, "step": 3356 }, { "epoch": 0.6824557836958731, "grad_norm": 0.13057585060596466, "learning_rate": 0.0001318417573477067, "loss": 1.0687, "step": 3357 }, { "epoch": 0.6826590770481805, "grad_norm": 0.13433004915714264, "learning_rate": 0.00013182141767517544, "loss": 0.985, "step": 3358 }, { "epoch": 0.6828623704004879, "grad_norm": 0.1338491439819336, "learning_rate": 0.00013180107800264416, "loss": 1.1384, "step": 3359 }, { "epoch": 0.6830656637527953, "grad_norm": 0.13416750729084015, "learning_rate": 0.00013178073833011288, "loss": 1.0868, "step": 3360 }, { "epoch": 0.6832689571051027, "grad_norm": 0.13917329907417297, "learning_rate": 0.00013176039865758164, "loss": 1.1072, "step": 3361 }, { "epoch": 0.68347225045741, "grad_norm": 0.1197846531867981, "learning_rate": 0.00013174005898505036, "loss": 0.9208, "step": 3362 }, { "epoch": 0.6836755438097174, "grad_norm": 0.1425098180770874, "learning_rate": 0.00013171971931251908, "loss": 1.2693, "step": 3363 }, { "epoch": 0.6838788371620248, "grad_norm": 0.13614432513713837, "learning_rate": 0.0001316993796399878, "loss": 0.9667, "step": 3364 }, { "epoch": 0.6840821305143322, "grad_norm": 0.1563062071800232, "learning_rate": 0.00013167903996745653, "loss": 1.1234, "step": 3365 }, { "epoch": 0.6842854238666396, "grad_norm": 0.1402071714401245, "learning_rate": 0.00013165870029492526, "loss": 1.0228, "step": 3366 }, { "epoch": 0.684488717218947, "grad_norm": 0.14747624099254608, "learning_rate": 0.00013163836062239398, "loss": 1.2746, "step": 3367 }, { "epoch": 0.6846920105712543, "grad_norm": 0.11560353636741638, "learning_rate": 0.0001316180209498627, "loss": 0.9313, "step": 3368 }, { "epoch": 0.6848953039235617, "grad_norm": 0.12440039217472076, "learning_rate": 0.00013159768127733143, "loss": 0.986, "step": 3369 }, { "epoch": 0.6850985972758691, "grad_norm": 0.13954605162143707, "learning_rate": 0.00013157734160480018, "loss": 1.2206, "step": 3370 }, { "epoch": 0.6853018906281765, "grad_norm": 0.139942929148674, "learning_rate": 0.0001315570019322689, "loss": 0.9075, "step": 3371 }, { "epoch": 0.6855051839804839, "grad_norm": 0.13854482769966125, "learning_rate": 0.00013153666225973763, "loss": 1.1007, "step": 3372 }, { "epoch": 0.6857084773327912, "grad_norm": 0.12603192031383514, "learning_rate": 0.00013151632258720633, "loss": 1.1533, "step": 3373 }, { "epoch": 0.6859117706850986, "grad_norm": 0.12680287659168243, "learning_rate": 0.00013149598291467508, "loss": 1.0463, "step": 3374 }, { "epoch": 0.686115064037406, "grad_norm": 0.12043260782957077, "learning_rate": 0.0001314756432421438, "loss": 0.8653, "step": 3375 }, { "epoch": 0.6863183573897134, "grad_norm": 0.15314915776252747, "learning_rate": 0.00013145530356961253, "loss": 1.1384, "step": 3376 }, { "epoch": 0.6865216507420208, "grad_norm": 0.12305079400539398, "learning_rate": 0.00013143496389708125, "loss": 0.9134, "step": 3377 }, { "epoch": 0.6867249440943282, "grad_norm": 0.12972278892993927, "learning_rate": 0.00013141462422455, "loss": 1.1011, "step": 3378 }, { "epoch": 0.6869282374466354, "grad_norm": 0.12650032341480255, "learning_rate": 0.00013139428455201873, "loss": 1.0518, "step": 3379 }, { "epoch": 0.6871315307989428, "grad_norm": 0.13137362897396088, "learning_rate": 0.00013137394487948745, "loss": 1.0093, "step": 3380 }, { "epoch": 0.6873348241512502, "grad_norm": 0.1400621086359024, "learning_rate": 0.00013135360520695615, "loss": 1.2115, "step": 3381 }, { "epoch": 0.6875381175035576, "grad_norm": 0.1252133697271347, "learning_rate": 0.0001313332655344249, "loss": 1.0329, "step": 3382 }, { "epoch": 0.687741410855865, "grad_norm": 0.13961845636367798, "learning_rate": 0.00013131292586189363, "loss": 1.0093, "step": 3383 }, { "epoch": 0.6879447042081724, "grad_norm": 0.1432250738143921, "learning_rate": 0.00013129258618936235, "loss": 1.2186, "step": 3384 }, { "epoch": 0.6881479975604797, "grad_norm": 0.1433638036251068, "learning_rate": 0.00013127224651683108, "loss": 1.2933, "step": 3385 }, { "epoch": 0.6883512909127871, "grad_norm": 0.13323669135570526, "learning_rate": 0.00013125190684429983, "loss": 1.0775, "step": 3386 }, { "epoch": 0.6885545842650945, "grad_norm": 0.15013840794563293, "learning_rate": 0.00013123156717176855, "loss": 1.0446, "step": 3387 }, { "epoch": 0.6887578776174019, "grad_norm": 0.13675931096076965, "learning_rate": 0.00013121122749923728, "loss": 1.1743, "step": 3388 }, { "epoch": 0.6889611709697093, "grad_norm": 0.13321883976459503, "learning_rate": 0.00013119088782670597, "loss": 1.2053, "step": 3389 }, { "epoch": 0.6891644643220167, "grad_norm": 0.14458970725536346, "learning_rate": 0.00013117054815417473, "loss": 1.0808, "step": 3390 }, { "epoch": 0.689367757674324, "grad_norm": 0.12558375298976898, "learning_rate": 0.00013115020848164345, "loss": 0.9879, "step": 3391 }, { "epoch": 0.6895710510266314, "grad_norm": 0.13324345648288727, "learning_rate": 0.00013112986880911218, "loss": 1.0561, "step": 3392 }, { "epoch": 0.6897743443789388, "grad_norm": 0.1250324845314026, "learning_rate": 0.0001311095291365809, "loss": 1.0982, "step": 3393 }, { "epoch": 0.6899776377312462, "grad_norm": 0.13437926769256592, "learning_rate": 0.00013108918946404965, "loss": 1.0323, "step": 3394 }, { "epoch": 0.6901809310835536, "grad_norm": 0.1360880434513092, "learning_rate": 0.00013106884979151838, "loss": 1.079, "step": 3395 }, { "epoch": 0.6903842244358609, "grad_norm": 0.14753840863704681, "learning_rate": 0.0001310485101189871, "loss": 1.1558, "step": 3396 }, { "epoch": 0.6905875177881683, "grad_norm": 0.1305796205997467, "learning_rate": 0.00013102817044645582, "loss": 1.093, "step": 3397 }, { "epoch": 0.6907908111404757, "grad_norm": 0.14020781219005585, "learning_rate": 0.00013100783077392455, "loss": 1.1657, "step": 3398 }, { "epoch": 0.6909941044927831, "grad_norm": 0.1320771425962448, "learning_rate": 0.00013098749110139327, "loss": 0.9718, "step": 3399 }, { "epoch": 0.6911973978450905, "grad_norm": 0.13931889832019806, "learning_rate": 0.000130967151428862, "loss": 1.1547, "step": 3400 }, { "epoch": 0.6914006911973979, "grad_norm": 0.13345004618167877, "learning_rate": 0.00013094681175633072, "loss": 1.1103, "step": 3401 }, { "epoch": 0.6916039845497052, "grad_norm": 0.1303638517856598, "learning_rate": 0.00013092647208379947, "loss": 1.06, "step": 3402 }, { "epoch": 0.6918072779020126, "grad_norm": 0.12979425489902496, "learning_rate": 0.0001309061324112682, "loss": 1.025, "step": 3403 }, { "epoch": 0.69201057125432, "grad_norm": 0.1420203149318695, "learning_rate": 0.00013088579273873692, "loss": 1.3724, "step": 3404 }, { "epoch": 0.6922138646066274, "grad_norm": 0.13811589777469635, "learning_rate": 0.00013086545306620565, "loss": 1.1415, "step": 3405 }, { "epoch": 0.6924171579589348, "grad_norm": 0.1472085863351822, "learning_rate": 0.00013084511339367437, "loss": 1.2374, "step": 3406 }, { "epoch": 0.6926204513112422, "grad_norm": 0.12186230719089508, "learning_rate": 0.0001308247737211431, "loss": 0.8606, "step": 3407 }, { "epoch": 0.6928237446635495, "grad_norm": 0.14273689687252045, "learning_rate": 0.00013080443404861182, "loss": 0.9656, "step": 3408 }, { "epoch": 0.6930270380158569, "grad_norm": 0.1363956779241562, "learning_rate": 0.00013078409437608055, "loss": 1.1867, "step": 3409 }, { "epoch": 0.6932303313681643, "grad_norm": 0.1353340446949005, "learning_rate": 0.00013076375470354927, "loss": 1.0755, "step": 3410 }, { "epoch": 0.6934336247204717, "grad_norm": 0.12223875522613525, "learning_rate": 0.00013074341503101802, "loss": 0.9282, "step": 3411 }, { "epoch": 0.693636918072779, "grad_norm": 0.13030283153057098, "learning_rate": 0.00013072307535848675, "loss": 1.0846, "step": 3412 }, { "epoch": 0.6938402114250865, "grad_norm": 0.14898596704006195, "learning_rate": 0.00013070273568595547, "loss": 1.1225, "step": 3413 }, { "epoch": 0.6940435047773937, "grad_norm": 0.13688309490680695, "learning_rate": 0.00013068239601342417, "loss": 1.1666, "step": 3414 }, { "epoch": 0.6942467981297011, "grad_norm": 0.1352292150259018, "learning_rate": 0.00013066205634089292, "loss": 1.1107, "step": 3415 }, { "epoch": 0.6944500914820085, "grad_norm": 0.1321742832660675, "learning_rate": 0.00013064171666836164, "loss": 1.1059, "step": 3416 }, { "epoch": 0.6946533848343159, "grad_norm": 0.11616258323192596, "learning_rate": 0.00013062137699583037, "loss": 0.9273, "step": 3417 }, { "epoch": 0.6948566781866233, "grad_norm": 0.13355232775211334, "learning_rate": 0.0001306010373232991, "loss": 0.9926, "step": 3418 }, { "epoch": 0.6950599715389307, "grad_norm": 0.12835095822811127, "learning_rate": 0.00013058069765076784, "loss": 0.9222, "step": 3419 }, { "epoch": 0.695263264891238, "grad_norm": 0.13715249300003052, "learning_rate": 0.00013056035797823657, "loss": 1.0299, "step": 3420 }, { "epoch": 0.6954665582435454, "grad_norm": 0.12749621272087097, "learning_rate": 0.0001305400183057053, "loss": 0.8392, "step": 3421 }, { "epoch": 0.6956698515958528, "grad_norm": 0.12953422963619232, "learning_rate": 0.000130519678633174, "loss": 0.9364, "step": 3422 }, { "epoch": 0.6958731449481602, "grad_norm": 0.1335253268480301, "learning_rate": 0.00013049933896064274, "loss": 1.1024, "step": 3423 }, { "epoch": 0.6960764383004676, "grad_norm": 0.1350051760673523, "learning_rate": 0.00013047899928811147, "loss": 0.9491, "step": 3424 }, { "epoch": 0.6962797316527749, "grad_norm": 0.12581254541873932, "learning_rate": 0.0001304586596155802, "loss": 1.1341, "step": 3425 }, { "epoch": 0.6964830250050823, "grad_norm": 0.12518788874149323, "learning_rate": 0.00013043831994304892, "loss": 1.0329, "step": 3426 }, { "epoch": 0.6966863183573897, "grad_norm": 0.12527361512184143, "learning_rate": 0.00013041798027051767, "loss": 0.9014, "step": 3427 }, { "epoch": 0.6968896117096971, "grad_norm": 0.13964787125587463, "learning_rate": 0.0001303976405979864, "loss": 1.1231, "step": 3428 }, { "epoch": 0.6970929050620045, "grad_norm": 0.1401492953300476, "learning_rate": 0.00013037730092545512, "loss": 1.0639, "step": 3429 }, { "epoch": 0.6972961984143119, "grad_norm": 0.1398945450782776, "learning_rate": 0.0001303569612529238, "loss": 1.0882, "step": 3430 }, { "epoch": 0.6974994917666192, "grad_norm": 0.15390872955322266, "learning_rate": 0.00013033662158039256, "loss": 1.3927, "step": 3431 }, { "epoch": 0.6977027851189266, "grad_norm": 0.11634422838687897, "learning_rate": 0.0001303162819078613, "loss": 0.9213, "step": 3432 }, { "epoch": 0.697906078471234, "grad_norm": 0.14000141620635986, "learning_rate": 0.00013029594223533001, "loss": 1.1035, "step": 3433 }, { "epoch": 0.6981093718235414, "grad_norm": 0.13036206364631653, "learning_rate": 0.00013027560256279874, "loss": 1.0366, "step": 3434 }, { "epoch": 0.6983126651758488, "grad_norm": 0.1375044733285904, "learning_rate": 0.0001302552628902675, "loss": 1.1924, "step": 3435 }, { "epoch": 0.6985159585281562, "grad_norm": 0.13283680379390717, "learning_rate": 0.00013023492321773621, "loss": 1.1097, "step": 3436 }, { "epoch": 0.6987192518804635, "grad_norm": 0.14721041917800903, "learning_rate": 0.00013021458354520494, "loss": 1.1784, "step": 3437 }, { "epoch": 0.6989225452327709, "grad_norm": 0.1452692449092865, "learning_rate": 0.00013019424387267364, "loss": 1.1948, "step": 3438 }, { "epoch": 0.6991258385850783, "grad_norm": 0.12445453554391861, "learning_rate": 0.0001301739042001424, "loss": 1.0154, "step": 3439 }, { "epoch": 0.6993291319373857, "grad_norm": 0.13780944049358368, "learning_rate": 0.0001301535645276111, "loss": 1.1673, "step": 3440 }, { "epoch": 0.6995324252896931, "grad_norm": 0.14468298852443695, "learning_rate": 0.00013013322485507984, "loss": 1.2753, "step": 3441 }, { "epoch": 0.6997357186420005, "grad_norm": 0.13938096165657043, "learning_rate": 0.00013011288518254856, "loss": 1.1033, "step": 3442 }, { "epoch": 0.6999390119943077, "grad_norm": 0.12781304121017456, "learning_rate": 0.0001300925455100173, "loss": 0.8622, "step": 3443 }, { "epoch": 0.7001423053466151, "grad_norm": 0.12039446085691452, "learning_rate": 0.00013007220583748604, "loss": 0.923, "step": 3444 }, { "epoch": 0.7003455986989225, "grad_norm": 0.11263223737478256, "learning_rate": 0.00013005186616495476, "loss": 0.889, "step": 3445 }, { "epoch": 0.7005488920512299, "grad_norm": 0.10796971619129181, "learning_rate": 0.00013003152649242346, "loss": 0.852, "step": 3446 }, { "epoch": 0.7007521854035373, "grad_norm": 0.12779220938682556, "learning_rate": 0.0001300111868198922, "loss": 0.9844, "step": 3447 }, { "epoch": 0.7009554787558446, "grad_norm": 0.12436182051897049, "learning_rate": 0.00012999084714736093, "loss": 1.0729, "step": 3448 }, { "epoch": 0.701158772108152, "grad_norm": 0.12066857516765594, "learning_rate": 0.00012997050747482966, "loss": 0.9179, "step": 3449 }, { "epoch": 0.7013620654604594, "grad_norm": 0.12307177484035492, "learning_rate": 0.00012995016780229838, "loss": 0.9204, "step": 3450 }, { "epoch": 0.7015653588127668, "grad_norm": 0.1301327794790268, "learning_rate": 0.0001299298281297671, "loss": 1.1486, "step": 3451 }, { "epoch": 0.7017686521650742, "grad_norm": 0.13147859275341034, "learning_rate": 0.00012990948845723586, "loss": 1.0035, "step": 3452 }, { "epoch": 0.7019719455173816, "grad_norm": 0.13557538390159607, "learning_rate": 0.00012988914878470458, "loss": 1.1641, "step": 3453 }, { "epoch": 0.7021752388696889, "grad_norm": 0.11187610030174255, "learning_rate": 0.0001298688091121733, "loss": 0.9894, "step": 3454 }, { "epoch": 0.7023785322219963, "grad_norm": 0.12350699305534363, "learning_rate": 0.000129848469439642, "loss": 1.1538, "step": 3455 }, { "epoch": 0.7025818255743037, "grad_norm": 0.1363372653722763, "learning_rate": 0.00012982812976711076, "loss": 1.0104, "step": 3456 }, { "epoch": 0.7027851189266111, "grad_norm": 0.12748870253562927, "learning_rate": 0.00012980779009457948, "loss": 0.9978, "step": 3457 }, { "epoch": 0.7029884122789185, "grad_norm": 0.12273624539375305, "learning_rate": 0.0001297874504220482, "loss": 0.967, "step": 3458 }, { "epoch": 0.7031917056312259, "grad_norm": 0.13453403115272522, "learning_rate": 0.00012976711074951693, "loss": 1.1081, "step": 3459 }, { "epoch": 0.7033949989835332, "grad_norm": 0.13335007429122925, "learning_rate": 0.00012974677107698568, "loss": 0.9803, "step": 3460 }, { "epoch": 0.7035982923358406, "grad_norm": 0.13500504195690155, "learning_rate": 0.0001297264314044544, "loss": 0.967, "step": 3461 }, { "epoch": 0.703801585688148, "grad_norm": 0.145028218626976, "learning_rate": 0.00012970609173192313, "loss": 1.1643, "step": 3462 }, { "epoch": 0.7040048790404554, "grad_norm": 0.14210622012615204, "learning_rate": 0.00012968575205939183, "loss": 1.1898, "step": 3463 }, { "epoch": 0.7042081723927628, "grad_norm": 0.1239437535405159, "learning_rate": 0.00012966541238686058, "loss": 1.0432, "step": 3464 }, { "epoch": 0.7044114657450702, "grad_norm": 0.14510378241539001, "learning_rate": 0.0001296450727143293, "loss": 1.025, "step": 3465 }, { "epoch": 0.7046147590973775, "grad_norm": 0.13489870727062225, "learning_rate": 0.00012962473304179803, "loss": 1.1407, "step": 3466 }, { "epoch": 0.7048180524496849, "grad_norm": 0.12685105204582214, "learning_rate": 0.00012960439336926675, "loss": 1.0256, "step": 3467 }, { "epoch": 0.7050213458019923, "grad_norm": 0.14244306087493896, "learning_rate": 0.0001295840536967355, "loss": 1.1148, "step": 3468 }, { "epoch": 0.7052246391542997, "grad_norm": 0.13121604919433594, "learning_rate": 0.00012956371402420423, "loss": 0.9663, "step": 3469 }, { "epoch": 0.7054279325066071, "grad_norm": 0.14584699273109436, "learning_rate": 0.00012954337435167295, "loss": 1.1258, "step": 3470 }, { "epoch": 0.7056312258589145, "grad_norm": 0.130800262093544, "learning_rate": 0.00012952303467914165, "loss": 0.9679, "step": 3471 }, { "epoch": 0.7058345192112218, "grad_norm": 0.13025017082691193, "learning_rate": 0.0001295026950066104, "loss": 1.0312, "step": 3472 }, { "epoch": 0.7060378125635292, "grad_norm": 0.13392165303230286, "learning_rate": 0.00012948235533407913, "loss": 1.1851, "step": 3473 }, { "epoch": 0.7062411059158366, "grad_norm": 0.1399383693933487, "learning_rate": 0.00012946201566154785, "loss": 1.1286, "step": 3474 }, { "epoch": 0.706444399268144, "grad_norm": 0.12997640669345856, "learning_rate": 0.00012944167598901658, "loss": 1.0042, "step": 3475 }, { "epoch": 0.7066476926204514, "grad_norm": 0.1388452649116516, "learning_rate": 0.00012942133631648533, "loss": 1.1952, "step": 3476 }, { "epoch": 0.7068509859727586, "grad_norm": 0.13053801655769348, "learning_rate": 0.00012940099664395405, "loss": 0.9092, "step": 3477 }, { "epoch": 0.707054279325066, "grad_norm": 0.1334877759218216, "learning_rate": 0.00012938065697142278, "loss": 0.9635, "step": 3478 }, { "epoch": 0.7072575726773734, "grad_norm": 0.13902603089809418, "learning_rate": 0.00012936031729889147, "loss": 1.0507, "step": 3479 }, { "epoch": 0.7074608660296808, "grad_norm": 0.13863757252693176, "learning_rate": 0.00012933997762636023, "loss": 0.9798, "step": 3480 }, { "epoch": 0.7076641593819882, "grad_norm": 0.12283840775489807, "learning_rate": 0.00012931963795382895, "loss": 0.8997, "step": 3481 }, { "epoch": 0.7078674527342956, "grad_norm": 0.13135948777198792, "learning_rate": 0.00012929929828129768, "loss": 1.2408, "step": 3482 }, { "epoch": 0.7080707460866029, "grad_norm": 0.15379171073436737, "learning_rate": 0.0001292789586087664, "loss": 1.195, "step": 3483 }, { "epoch": 0.7082740394389103, "grad_norm": 0.13256476819515228, "learning_rate": 0.00012925861893623515, "loss": 1.0582, "step": 3484 }, { "epoch": 0.7084773327912177, "grad_norm": 0.12100596725940704, "learning_rate": 0.00012923827926370388, "loss": 0.8826, "step": 3485 }, { "epoch": 0.7086806261435251, "grad_norm": 0.13334119319915771, "learning_rate": 0.0001292179395911726, "loss": 1.2063, "step": 3486 }, { "epoch": 0.7088839194958325, "grad_norm": 0.14578770101070404, "learning_rate": 0.0001291975999186413, "loss": 1.2067, "step": 3487 }, { "epoch": 0.7090872128481399, "grad_norm": 0.13182413578033447, "learning_rate": 0.00012917726024611005, "loss": 1.0382, "step": 3488 }, { "epoch": 0.7092905062004472, "grad_norm": 0.13377144932746887, "learning_rate": 0.00012915692057357877, "loss": 1.0798, "step": 3489 }, { "epoch": 0.7094937995527546, "grad_norm": 0.15311647951602936, "learning_rate": 0.0001291365809010475, "loss": 1.1925, "step": 3490 }, { "epoch": 0.709697092905062, "grad_norm": 0.13385489583015442, "learning_rate": 0.00012911624122851622, "loss": 1.0437, "step": 3491 }, { "epoch": 0.7099003862573694, "grad_norm": 0.12438102066516876, "learning_rate": 0.00012909590155598495, "loss": 0.9325, "step": 3492 }, { "epoch": 0.7101036796096768, "grad_norm": 0.11423248052597046, "learning_rate": 0.0001290755618834537, "loss": 0.955, "step": 3493 }, { "epoch": 0.7103069729619842, "grad_norm": 0.1257968544960022, "learning_rate": 0.00012905522221092242, "loss": 0.9734, "step": 3494 }, { "epoch": 0.7105102663142915, "grad_norm": 0.12875302135944366, "learning_rate": 0.00012903488253839112, "loss": 0.9762, "step": 3495 }, { "epoch": 0.7107135596665989, "grad_norm": 0.15575377643108368, "learning_rate": 0.00012901454286585984, "loss": 1.199, "step": 3496 }, { "epoch": 0.7109168530189063, "grad_norm": 0.12835876643657684, "learning_rate": 0.0001289942031933286, "loss": 1.0026, "step": 3497 }, { "epoch": 0.7111201463712137, "grad_norm": 0.13237829506397247, "learning_rate": 0.00012897386352079732, "loss": 1.1309, "step": 3498 }, { "epoch": 0.7113234397235211, "grad_norm": 0.13372915983200073, "learning_rate": 0.00012895352384826605, "loss": 1.0218, "step": 3499 }, { "epoch": 0.7115267330758284, "grad_norm": 0.1394553929567337, "learning_rate": 0.00012893318417573477, "loss": 1.1678, "step": 3500 }, { "epoch": 0.7117300264281358, "grad_norm": 0.12859494984149933, "learning_rate": 0.00012891284450320352, "loss": 0.9039, "step": 3501 }, { "epoch": 0.7119333197804432, "grad_norm": 0.13142433762550354, "learning_rate": 0.00012889250483067225, "loss": 0.9692, "step": 3502 }, { "epoch": 0.7121366131327506, "grad_norm": 0.15247346460819244, "learning_rate": 0.00012887216515814094, "loss": 1.1068, "step": 3503 }, { "epoch": 0.712339906485058, "grad_norm": 0.1271810233592987, "learning_rate": 0.00012885182548560967, "loss": 1.05, "step": 3504 }, { "epoch": 0.7125431998373654, "grad_norm": 0.12222661077976227, "learning_rate": 0.00012883148581307842, "loss": 0.9563, "step": 3505 }, { "epoch": 0.7127464931896726, "grad_norm": 0.14147427678108215, "learning_rate": 0.00012881114614054714, "loss": 1.0697, "step": 3506 }, { "epoch": 0.71294978654198, "grad_norm": 0.12644895911216736, "learning_rate": 0.00012879080646801587, "loss": 0.9251, "step": 3507 }, { "epoch": 0.7131530798942874, "grad_norm": 0.126128152012825, "learning_rate": 0.0001287704667954846, "loss": 0.9617, "step": 3508 }, { "epoch": 0.7133563732465948, "grad_norm": 0.12538930773735046, "learning_rate": 0.00012875012712295334, "loss": 1.084, "step": 3509 }, { "epoch": 0.7135596665989022, "grad_norm": 0.1261541098356247, "learning_rate": 0.00012872978745042207, "loss": 1.0671, "step": 3510 }, { "epoch": 0.7137629599512096, "grad_norm": 0.15382623672485352, "learning_rate": 0.00012870944777789077, "loss": 1.1679, "step": 3511 }, { "epoch": 0.7139662533035169, "grad_norm": 0.14954978227615356, "learning_rate": 0.0001286891081053595, "loss": 1.0822, "step": 3512 }, { "epoch": 0.7141695466558243, "grad_norm": 0.12342054396867752, "learning_rate": 0.00012866876843282824, "loss": 1.0252, "step": 3513 }, { "epoch": 0.7143728400081317, "grad_norm": 0.13561514019966125, "learning_rate": 0.00012864842876029697, "loss": 1.0025, "step": 3514 }, { "epoch": 0.7145761333604391, "grad_norm": 0.13942426443099976, "learning_rate": 0.0001286280890877657, "loss": 1.0978, "step": 3515 }, { "epoch": 0.7147794267127465, "grad_norm": 0.13418523967266083, "learning_rate": 0.00012860774941523442, "loss": 0.9401, "step": 3516 }, { "epoch": 0.7149827200650539, "grad_norm": 0.13293065130710602, "learning_rate": 0.00012858740974270317, "loss": 0.9563, "step": 3517 }, { "epoch": 0.7151860134173612, "grad_norm": 0.13507983088493347, "learning_rate": 0.0001285670700701719, "loss": 1.1482, "step": 3518 }, { "epoch": 0.7153893067696686, "grad_norm": 0.13518783450126648, "learning_rate": 0.00012854673039764062, "loss": 1.0677, "step": 3519 }, { "epoch": 0.715592600121976, "grad_norm": 0.14185848832130432, "learning_rate": 0.0001285263907251093, "loss": 1.1828, "step": 3520 }, { "epoch": 0.7157958934742834, "grad_norm": 0.14050935208797455, "learning_rate": 0.00012850605105257806, "loss": 1.137, "step": 3521 }, { "epoch": 0.7159991868265908, "grad_norm": 0.12821073830127716, "learning_rate": 0.0001284857113800468, "loss": 0.9265, "step": 3522 }, { "epoch": 0.7162024801788982, "grad_norm": 0.14910835027694702, "learning_rate": 0.0001284653717075155, "loss": 1.1298, "step": 3523 }, { "epoch": 0.7164057735312055, "grad_norm": 0.12309451401233673, "learning_rate": 0.00012844503203498424, "loss": 0.9552, "step": 3524 }, { "epoch": 0.7166090668835129, "grad_norm": 0.13226357102394104, "learning_rate": 0.000128424692362453, "loss": 1.0618, "step": 3525 }, { "epoch": 0.7168123602358203, "grad_norm": 0.14502473175525665, "learning_rate": 0.00012840435268992171, "loss": 1.0766, "step": 3526 }, { "epoch": 0.7170156535881277, "grad_norm": 0.15114335715770721, "learning_rate": 0.00012838401301739044, "loss": 1.3117, "step": 3527 }, { "epoch": 0.7172189469404351, "grad_norm": 0.14016559720039368, "learning_rate": 0.00012836367334485914, "loss": 1.0747, "step": 3528 }, { "epoch": 0.7174222402927424, "grad_norm": 0.10465826839208603, "learning_rate": 0.0001283433336723279, "loss": 0.9244, "step": 3529 }, { "epoch": 0.7176255336450498, "grad_norm": 0.14475956559181213, "learning_rate": 0.0001283229939997966, "loss": 1.1458, "step": 3530 }, { "epoch": 0.7178288269973572, "grad_norm": 0.15967129170894623, "learning_rate": 0.00012830265432726534, "loss": 1.218, "step": 3531 }, { "epoch": 0.7180321203496646, "grad_norm": 0.16239500045776367, "learning_rate": 0.00012828231465473406, "loss": 1.1088, "step": 3532 }, { "epoch": 0.718235413701972, "grad_norm": 0.13778537511825562, "learning_rate": 0.00012826197498220279, "loss": 1.2284, "step": 3533 }, { "epoch": 0.7184387070542794, "grad_norm": 0.15743795037269592, "learning_rate": 0.00012824163530967154, "loss": 1.2057, "step": 3534 }, { "epoch": 0.7186420004065867, "grad_norm": 0.13260531425476074, "learning_rate": 0.00012822129563714026, "loss": 1.1239, "step": 3535 }, { "epoch": 0.718845293758894, "grad_norm": 0.12460935115814209, "learning_rate": 0.00012820095596460896, "loss": 1.0504, "step": 3536 }, { "epoch": 0.7190485871112015, "grad_norm": 0.13355574011802673, "learning_rate": 0.00012818061629207768, "loss": 1.0719, "step": 3537 }, { "epoch": 0.7192518804635089, "grad_norm": 0.1280195564031601, "learning_rate": 0.00012816027661954643, "loss": 1.012, "step": 3538 }, { "epoch": 0.7194551738158163, "grad_norm": 0.13986103236675262, "learning_rate": 0.00012813993694701516, "loss": 1.0658, "step": 3539 }, { "epoch": 0.7196584671681237, "grad_norm": 0.1170068234205246, "learning_rate": 0.00012811959727448388, "loss": 0.9056, "step": 3540 }, { "epoch": 0.7198617605204309, "grad_norm": 0.13569694757461548, "learning_rate": 0.0001280992576019526, "loss": 1.116, "step": 3541 }, { "epoch": 0.7200650538727383, "grad_norm": 0.1323375403881073, "learning_rate": 0.00012807891792942136, "loss": 1.0629, "step": 3542 }, { "epoch": 0.7202683472250457, "grad_norm": 0.12855368852615356, "learning_rate": 0.00012805857825689008, "loss": 0.89, "step": 3543 }, { "epoch": 0.7204716405773531, "grad_norm": 0.12019526958465576, "learning_rate": 0.00012803823858435878, "loss": 1.063, "step": 3544 }, { "epoch": 0.7206749339296605, "grad_norm": 0.13612791895866394, "learning_rate": 0.0001280178989118275, "loss": 0.9844, "step": 3545 }, { "epoch": 0.7208782272819679, "grad_norm": 0.1345546394586563, "learning_rate": 0.00012799755923929626, "loss": 0.9661, "step": 3546 }, { "epoch": 0.7210815206342752, "grad_norm": 0.12953819334506989, "learning_rate": 0.00012797721956676498, "loss": 0.968, "step": 3547 }, { "epoch": 0.7212848139865826, "grad_norm": 0.15265563130378723, "learning_rate": 0.0001279568798942337, "loss": 1.2455, "step": 3548 }, { "epoch": 0.72148810733889, "grad_norm": 0.16196173429489136, "learning_rate": 0.00012793654022170243, "loss": 1.1605, "step": 3549 }, { "epoch": 0.7216914006911974, "grad_norm": 0.13228391110897064, "learning_rate": 0.00012791620054917118, "loss": 1.1216, "step": 3550 }, { "epoch": 0.7218946940435048, "grad_norm": 0.1355789303779602, "learning_rate": 0.0001278958608766399, "loss": 1.1065, "step": 3551 }, { "epoch": 0.7220979873958121, "grad_norm": 0.13458067178726196, "learning_rate": 0.0001278755212041086, "loss": 1.173, "step": 3552 }, { "epoch": 0.7223012807481195, "grad_norm": 0.12841463088989258, "learning_rate": 0.00012785518153157733, "loss": 0.9425, "step": 3553 }, { "epoch": 0.7225045741004269, "grad_norm": 0.1273353099822998, "learning_rate": 0.00012783484185904608, "loss": 0.9527, "step": 3554 }, { "epoch": 0.7227078674527343, "grad_norm": 0.13753145933151245, "learning_rate": 0.0001278145021865148, "loss": 0.9917, "step": 3555 }, { "epoch": 0.7229111608050417, "grad_norm": 0.15175598859786987, "learning_rate": 0.00012779416251398353, "loss": 1.1534, "step": 3556 }, { "epoch": 0.7231144541573491, "grad_norm": 0.12491641193628311, "learning_rate": 0.00012777382284145225, "loss": 0.9861, "step": 3557 }, { "epoch": 0.7233177475096564, "grad_norm": 0.135353222489357, "learning_rate": 0.000127753483168921, "loss": 1.1638, "step": 3558 }, { "epoch": 0.7235210408619638, "grad_norm": 0.14735917747020721, "learning_rate": 0.00012773314349638973, "loss": 1.1341, "step": 3559 }, { "epoch": 0.7237243342142712, "grad_norm": 0.11300304532051086, "learning_rate": 0.00012771280382385843, "loss": 0.9734, "step": 3560 }, { "epoch": 0.7239276275665786, "grad_norm": 0.12589031457901, "learning_rate": 0.00012769246415132715, "loss": 0.9325, "step": 3561 }, { "epoch": 0.724130920918886, "grad_norm": 0.14478862285614014, "learning_rate": 0.0001276721244787959, "loss": 1.2025, "step": 3562 }, { "epoch": 0.7243342142711934, "grad_norm": 0.12382597476243973, "learning_rate": 0.00012765178480626463, "loss": 1.0629, "step": 3563 }, { "epoch": 0.7245375076235007, "grad_norm": 0.13786040246486664, "learning_rate": 0.00012763144513373335, "loss": 1.142, "step": 3564 }, { "epoch": 0.7247408009758081, "grad_norm": 0.13986682891845703, "learning_rate": 0.00012761110546120208, "loss": 1.1434, "step": 3565 }, { "epoch": 0.7249440943281155, "grad_norm": 0.13523870706558228, "learning_rate": 0.00012759076578867083, "loss": 0.889, "step": 3566 }, { "epoch": 0.7251473876804229, "grad_norm": 0.12312185764312744, "learning_rate": 0.00012757042611613955, "loss": 1.0227, "step": 3567 }, { "epoch": 0.7253506810327303, "grad_norm": 0.1324312686920166, "learning_rate": 0.00012755008644360825, "loss": 0.9739, "step": 3568 }, { "epoch": 0.7255539743850377, "grad_norm": 0.13704247772693634, "learning_rate": 0.00012752974677107697, "loss": 1.0356, "step": 3569 }, { "epoch": 0.725757267737345, "grad_norm": 0.12928558886051178, "learning_rate": 0.00012750940709854573, "loss": 0.9112, "step": 3570 }, { "epoch": 0.7259605610896523, "grad_norm": 0.12993620336055756, "learning_rate": 0.00012748906742601445, "loss": 1.0499, "step": 3571 }, { "epoch": 0.7261638544419597, "grad_norm": 0.13459739089012146, "learning_rate": 0.00012746872775348317, "loss": 1.1546, "step": 3572 }, { "epoch": 0.7263671477942671, "grad_norm": 0.14553983509540558, "learning_rate": 0.0001274483880809519, "loss": 1.1466, "step": 3573 }, { "epoch": 0.7265704411465745, "grad_norm": 0.1270923614501953, "learning_rate": 0.00012742804840842065, "loss": 0.9953, "step": 3574 }, { "epoch": 0.7267737344988819, "grad_norm": 0.11883358657360077, "learning_rate": 0.00012740770873588938, "loss": 0.9983, "step": 3575 }, { "epoch": 0.7269770278511892, "grad_norm": 0.13899964094161987, "learning_rate": 0.0001273873690633581, "loss": 1.0463, "step": 3576 }, { "epoch": 0.7271803212034966, "grad_norm": 0.12887227535247803, "learning_rate": 0.0001273670293908268, "loss": 0.9673, "step": 3577 }, { "epoch": 0.727383614555804, "grad_norm": 0.15297862887382507, "learning_rate": 0.00012734668971829552, "loss": 1.1517, "step": 3578 }, { "epoch": 0.7275869079081114, "grad_norm": 0.12056870758533478, "learning_rate": 0.00012732635004576427, "loss": 0.9494, "step": 3579 }, { "epoch": 0.7277902012604188, "grad_norm": 0.13357582688331604, "learning_rate": 0.000127306010373233, "loss": 0.9368, "step": 3580 }, { "epoch": 0.7279934946127261, "grad_norm": 0.1344243586063385, "learning_rate": 0.00012728567070070172, "loss": 0.9977, "step": 3581 }, { "epoch": 0.7281967879650335, "grad_norm": 0.12713217735290527, "learning_rate": 0.00012726533102817045, "loss": 0.9283, "step": 3582 }, { "epoch": 0.7284000813173409, "grad_norm": 0.1435747891664505, "learning_rate": 0.0001272449913556392, "loss": 1.0784, "step": 3583 }, { "epoch": 0.7286033746696483, "grad_norm": 0.1342409998178482, "learning_rate": 0.00012722465168310792, "loss": 0.99, "step": 3584 }, { "epoch": 0.7288066680219557, "grad_norm": 0.13497351109981537, "learning_rate": 0.00012720431201057662, "loss": 1.0655, "step": 3585 }, { "epoch": 0.7290099613742631, "grad_norm": 0.13522464036941528, "learning_rate": 0.00012718397233804534, "loss": 1.1785, "step": 3586 }, { "epoch": 0.7292132547265704, "grad_norm": 0.12016705423593521, "learning_rate": 0.0001271636326655141, "loss": 0.93, "step": 3587 }, { "epoch": 0.7294165480788778, "grad_norm": 0.1265437752008438, "learning_rate": 0.00012714329299298282, "loss": 1.0289, "step": 3588 }, { "epoch": 0.7296198414311852, "grad_norm": 0.12697303295135498, "learning_rate": 0.00012712295332045154, "loss": 1.0539, "step": 3589 }, { "epoch": 0.7298231347834926, "grad_norm": 0.14529366791248322, "learning_rate": 0.00012710261364792027, "loss": 1.134, "step": 3590 }, { "epoch": 0.7300264281358, "grad_norm": 0.1143953874707222, "learning_rate": 0.00012708227397538902, "loss": 0.7966, "step": 3591 }, { "epoch": 0.7302297214881074, "grad_norm": 0.14083142578601837, "learning_rate": 0.00012706193430285775, "loss": 1.1776, "step": 3592 }, { "epoch": 0.7304330148404147, "grad_norm": 0.12843115627765656, "learning_rate": 0.00012704159463032644, "loss": 1.075, "step": 3593 }, { "epoch": 0.7306363081927221, "grad_norm": 0.12742142379283905, "learning_rate": 0.00012702125495779517, "loss": 1.1087, "step": 3594 }, { "epoch": 0.7308396015450295, "grad_norm": 0.13479192554950714, "learning_rate": 0.00012700091528526392, "loss": 1.0954, "step": 3595 }, { "epoch": 0.7310428948973369, "grad_norm": 0.13784924149513245, "learning_rate": 0.00012698057561273264, "loss": 1.1773, "step": 3596 }, { "epoch": 0.7312461882496443, "grad_norm": 0.13474421203136444, "learning_rate": 0.00012696023594020137, "loss": 1.0023, "step": 3597 }, { "epoch": 0.7314494816019517, "grad_norm": 0.14545200765132904, "learning_rate": 0.0001269398962676701, "loss": 1.1166, "step": 3598 }, { "epoch": 0.731652774954259, "grad_norm": 0.11699052155017853, "learning_rate": 0.00012691955659513884, "loss": 0.982, "step": 3599 }, { "epoch": 0.7318560683065664, "grad_norm": 0.13108402490615845, "learning_rate": 0.00012689921692260757, "loss": 0.9805, "step": 3600 }, { "epoch": 0.7320593616588738, "grad_norm": 0.12493366003036499, "learning_rate": 0.00012687887725007627, "loss": 1.03, "step": 3601 }, { "epoch": 0.7322626550111812, "grad_norm": 0.12673288583755493, "learning_rate": 0.000126858537577545, "loss": 0.99, "step": 3602 }, { "epoch": 0.7324659483634886, "grad_norm": 0.14298030734062195, "learning_rate": 0.00012683819790501374, "loss": 0.9816, "step": 3603 }, { "epoch": 0.7326692417157958, "grad_norm": 0.1383986473083496, "learning_rate": 0.00012681785823248247, "loss": 1.0635, "step": 3604 }, { "epoch": 0.7328725350681032, "grad_norm": 0.13229741156101227, "learning_rate": 0.0001267975185599512, "loss": 1.0752, "step": 3605 }, { "epoch": 0.7330758284204106, "grad_norm": 0.12566420435905457, "learning_rate": 0.00012677717888741991, "loss": 1.0984, "step": 3606 }, { "epoch": 0.733279121772718, "grad_norm": 0.14375749230384827, "learning_rate": 0.00012675683921488867, "loss": 1.0459, "step": 3607 }, { "epoch": 0.7334824151250254, "grad_norm": 0.1541428565979004, "learning_rate": 0.0001267364995423574, "loss": 1.2405, "step": 3608 }, { "epoch": 0.7336857084773328, "grad_norm": 0.1277463287115097, "learning_rate": 0.0001267161598698261, "loss": 0.9784, "step": 3609 }, { "epoch": 0.7338890018296401, "grad_norm": 0.11879061907529831, "learning_rate": 0.0001266958201972948, "loss": 0.9301, "step": 3610 }, { "epoch": 0.7340922951819475, "grad_norm": 0.1343902200460434, "learning_rate": 0.00012667548052476356, "loss": 1.2015, "step": 3611 }, { "epoch": 0.7342955885342549, "grad_norm": 0.12574651837348938, "learning_rate": 0.0001266551408522323, "loss": 1.0215, "step": 3612 }, { "epoch": 0.7344988818865623, "grad_norm": 0.12160508334636688, "learning_rate": 0.000126634801179701, "loss": 1.0208, "step": 3613 }, { "epoch": 0.7347021752388697, "grad_norm": 0.13637933135032654, "learning_rate": 0.00012661446150716974, "loss": 1.1179, "step": 3614 }, { "epoch": 0.7349054685911771, "grad_norm": 0.14247237145900726, "learning_rate": 0.0001265941218346385, "loss": 1.1746, "step": 3615 }, { "epoch": 0.7351087619434844, "grad_norm": 0.14084017276763916, "learning_rate": 0.00012657378216210721, "loss": 0.9984, "step": 3616 }, { "epoch": 0.7353120552957918, "grad_norm": 0.1475144922733307, "learning_rate": 0.0001265534424895759, "loss": 1.0763, "step": 3617 }, { "epoch": 0.7355153486480992, "grad_norm": 0.12147875130176544, "learning_rate": 0.00012653310281704464, "loss": 1.0814, "step": 3618 }, { "epoch": 0.7357186420004066, "grad_norm": 0.12373865395784378, "learning_rate": 0.00012651276314451336, "loss": 0.9432, "step": 3619 }, { "epoch": 0.735921935352714, "grad_norm": 0.11395063996315002, "learning_rate": 0.0001264924234719821, "loss": 0.8629, "step": 3620 }, { "epoch": 0.7361252287050214, "grad_norm": 0.12742386758327484, "learning_rate": 0.00012647208379945084, "loss": 1.1148, "step": 3621 }, { "epoch": 0.7363285220573287, "grad_norm": 0.13474571704864502, "learning_rate": 0.00012645174412691956, "loss": 1.0862, "step": 3622 }, { "epoch": 0.7365318154096361, "grad_norm": 0.14104367792606354, "learning_rate": 0.00012643140445438828, "loss": 1.1224, "step": 3623 }, { "epoch": 0.7367351087619435, "grad_norm": 0.1266336888074875, "learning_rate": 0.00012641106478185704, "loss": 1.0388, "step": 3624 }, { "epoch": 0.7369384021142509, "grad_norm": 0.1418471783399582, "learning_rate": 0.00012639072510932573, "loss": 1.0211, "step": 3625 }, { "epoch": 0.7371416954665583, "grad_norm": 0.14462773501873016, "learning_rate": 0.00012637038543679446, "loss": 1.0212, "step": 3626 }, { "epoch": 0.7373449888188657, "grad_norm": 0.14509986340999603, "learning_rate": 0.00012635004576426318, "loss": 1.1922, "step": 3627 }, { "epoch": 0.737548282171173, "grad_norm": 0.13801227509975433, "learning_rate": 0.00012632970609173193, "loss": 1.1082, "step": 3628 }, { "epoch": 0.7377515755234804, "grad_norm": 0.13551753759384155, "learning_rate": 0.00012630936641920066, "loss": 1.0822, "step": 3629 }, { "epoch": 0.7379548688757878, "grad_norm": 0.12872062623500824, "learning_rate": 0.00012628902674666938, "loss": 0.9366, "step": 3630 }, { "epoch": 0.7381581622280952, "grad_norm": 0.13623321056365967, "learning_rate": 0.0001262686870741381, "loss": 1.1129, "step": 3631 }, { "epoch": 0.7383614555804026, "grad_norm": 0.14300891757011414, "learning_rate": 0.00012624834740160686, "loss": 1.1259, "step": 3632 }, { "epoch": 0.7385647489327098, "grad_norm": 0.13372913002967834, "learning_rate": 0.00012622800772907558, "loss": 1.0011, "step": 3633 }, { "epoch": 0.7387680422850172, "grad_norm": 0.11722072213888168, "learning_rate": 0.00012620766805654428, "loss": 1.0085, "step": 3634 }, { "epoch": 0.7389713356373246, "grad_norm": 0.13151319324970245, "learning_rate": 0.000126187328384013, "loss": 1.1878, "step": 3635 }, { "epoch": 0.739174628989632, "grad_norm": 0.13933278620243073, "learning_rate": 0.00012616698871148176, "loss": 1.1763, "step": 3636 }, { "epoch": 0.7393779223419394, "grad_norm": 0.13536275923252106, "learning_rate": 0.00012614664903895048, "loss": 1.2087, "step": 3637 }, { "epoch": 0.7395812156942468, "grad_norm": 0.13037016987800598, "learning_rate": 0.0001261263093664192, "loss": 1.0033, "step": 3638 }, { "epoch": 0.7397845090465541, "grad_norm": 0.1575489491224289, "learning_rate": 0.00012610596969388793, "loss": 1.1683, "step": 3639 }, { "epoch": 0.7399878023988615, "grad_norm": 0.12989576160907745, "learning_rate": 0.00012608563002135668, "loss": 1.127, "step": 3640 }, { "epoch": 0.7401910957511689, "grad_norm": 0.14293938875198364, "learning_rate": 0.0001260652903488254, "loss": 1.1641, "step": 3641 }, { "epoch": 0.7403943891034763, "grad_norm": 0.12480568885803223, "learning_rate": 0.0001260449506762941, "loss": 0.9612, "step": 3642 }, { "epoch": 0.7405976824557837, "grad_norm": 0.1387239396572113, "learning_rate": 0.00012602461100376283, "loss": 1.1289, "step": 3643 }, { "epoch": 0.7408009758080911, "grad_norm": 0.12974587082862854, "learning_rate": 0.00012600427133123158, "loss": 1.0032, "step": 3644 }, { "epoch": 0.7410042691603984, "grad_norm": 0.14558175206184387, "learning_rate": 0.0001259839316587003, "loss": 1.0957, "step": 3645 }, { "epoch": 0.7412075625127058, "grad_norm": 0.1346643716096878, "learning_rate": 0.00012596359198616903, "loss": 1.0644, "step": 3646 }, { "epoch": 0.7414108558650132, "grad_norm": 0.1275978684425354, "learning_rate": 0.00012594325231363775, "loss": 1.0638, "step": 3647 }, { "epoch": 0.7416141492173206, "grad_norm": 0.11669638752937317, "learning_rate": 0.0001259229126411065, "loss": 0.9147, "step": 3648 }, { "epoch": 0.741817442569628, "grad_norm": 0.12056609243154526, "learning_rate": 0.00012590257296857523, "loss": 0.8193, "step": 3649 }, { "epoch": 0.7420207359219354, "grad_norm": 0.1389569789171219, "learning_rate": 0.00012588223329604393, "loss": 1.1273, "step": 3650 }, { "epoch": 0.7422240292742427, "grad_norm": 0.1116948276758194, "learning_rate": 0.00012586189362351265, "loss": 0.8484, "step": 3651 }, { "epoch": 0.7424273226265501, "grad_norm": 0.13268932700157166, "learning_rate": 0.0001258415539509814, "loss": 1.0092, "step": 3652 }, { "epoch": 0.7426306159788575, "grad_norm": 0.13985766470432281, "learning_rate": 0.00012582121427845013, "loss": 1.0907, "step": 3653 }, { "epoch": 0.7428339093311649, "grad_norm": 0.13794921338558197, "learning_rate": 0.00012580087460591885, "loss": 1.1482, "step": 3654 }, { "epoch": 0.7430372026834723, "grad_norm": 0.13911883533000946, "learning_rate": 0.00012578053493338758, "loss": 1.1641, "step": 3655 }, { "epoch": 0.7432404960357797, "grad_norm": 0.11809851974248886, "learning_rate": 0.00012576019526085633, "loss": 0.9128, "step": 3656 }, { "epoch": 0.743443789388087, "grad_norm": 0.17306208610534668, "learning_rate": 0.00012573985558832505, "loss": 1.1575, "step": 3657 }, { "epoch": 0.7436470827403944, "grad_norm": 0.14551490545272827, "learning_rate": 0.00012571951591579375, "loss": 1.2794, "step": 3658 }, { "epoch": 0.7438503760927018, "grad_norm": 0.14065933227539062, "learning_rate": 0.00012569917624326247, "loss": 1.0652, "step": 3659 }, { "epoch": 0.7440536694450092, "grad_norm": 0.13010179996490479, "learning_rate": 0.0001256788365707312, "loss": 0.9541, "step": 3660 }, { "epoch": 0.7442569627973166, "grad_norm": 0.1264103651046753, "learning_rate": 0.00012565849689819995, "loss": 0.9767, "step": 3661 }, { "epoch": 0.7444602561496239, "grad_norm": 0.12325896322727203, "learning_rate": 0.00012563815722566867, "loss": 1.1016, "step": 3662 }, { "epoch": 0.7446635495019313, "grad_norm": 0.12374068796634674, "learning_rate": 0.0001256178175531374, "loss": 0.9282, "step": 3663 }, { "epoch": 0.7448668428542387, "grad_norm": 0.14597944915294647, "learning_rate": 0.00012559747788060612, "loss": 1.1961, "step": 3664 }, { "epoch": 0.745070136206546, "grad_norm": 0.128509983420372, "learning_rate": 0.00012557713820807487, "loss": 0.9838, "step": 3665 }, { "epoch": 0.7452734295588535, "grad_norm": 0.1421680897474289, "learning_rate": 0.00012555679853554357, "loss": 1.1563, "step": 3666 }, { "epoch": 0.7454767229111608, "grad_norm": 0.1468690037727356, "learning_rate": 0.0001255364588630123, "loss": 1.1203, "step": 3667 }, { "epoch": 0.7456800162634681, "grad_norm": 0.1539076417684555, "learning_rate": 0.00012551611919048102, "loss": 1.1775, "step": 3668 }, { "epoch": 0.7458833096157755, "grad_norm": 0.15628856420516968, "learning_rate": 0.00012549577951794977, "loss": 1.1265, "step": 3669 }, { "epoch": 0.7460866029680829, "grad_norm": 0.1251571774482727, "learning_rate": 0.0001254754398454185, "loss": 1.0525, "step": 3670 }, { "epoch": 0.7462898963203903, "grad_norm": 0.13868333399295807, "learning_rate": 0.00012545510017288722, "loss": 1.0449, "step": 3671 }, { "epoch": 0.7464931896726977, "grad_norm": 0.15435542166233063, "learning_rate": 0.00012543476050035595, "loss": 1.2156, "step": 3672 }, { "epoch": 0.7466964830250051, "grad_norm": 0.13579222559928894, "learning_rate": 0.0001254144208278247, "loss": 0.9683, "step": 3673 }, { "epoch": 0.7468997763773124, "grad_norm": 0.14346475899219513, "learning_rate": 0.0001253940811552934, "loss": 1.1188, "step": 3674 }, { "epoch": 0.7471030697296198, "grad_norm": 0.13663546741008759, "learning_rate": 0.00012537374148276212, "loss": 0.9814, "step": 3675 }, { "epoch": 0.7473063630819272, "grad_norm": 0.14386685192584991, "learning_rate": 0.00012535340181023084, "loss": 1.2206, "step": 3676 }, { "epoch": 0.7475096564342346, "grad_norm": 0.1263144165277481, "learning_rate": 0.0001253330621376996, "loss": 0.9859, "step": 3677 }, { "epoch": 0.747712949786542, "grad_norm": 0.12757907807826996, "learning_rate": 0.00012531272246516832, "loss": 0.9453, "step": 3678 }, { "epoch": 0.7479162431388494, "grad_norm": 0.13055284321308136, "learning_rate": 0.00012529238279263704, "loss": 0.9623, "step": 3679 }, { "epoch": 0.7481195364911567, "grad_norm": 0.15445955097675323, "learning_rate": 0.00012527204312010577, "loss": 1.3087, "step": 3680 }, { "epoch": 0.7483228298434641, "grad_norm": 0.1479884386062622, "learning_rate": 0.00012525170344757452, "loss": 1.2992, "step": 3681 }, { "epoch": 0.7485261231957715, "grad_norm": 0.14582955837249756, "learning_rate": 0.00012523136377504322, "loss": 1.0265, "step": 3682 }, { "epoch": 0.7487294165480789, "grad_norm": 0.1448071002960205, "learning_rate": 0.00012521102410251194, "loss": 1.0637, "step": 3683 }, { "epoch": 0.7489327099003863, "grad_norm": 0.13632971048355103, "learning_rate": 0.00012519068442998067, "loss": 1.204, "step": 3684 }, { "epoch": 0.7491360032526936, "grad_norm": 0.11724304407835007, "learning_rate": 0.00012517034475744942, "loss": 0.8373, "step": 3685 }, { "epoch": 0.749339296605001, "grad_norm": 0.12346580624580383, "learning_rate": 0.00012515000508491814, "loss": 0.9118, "step": 3686 }, { "epoch": 0.7495425899573084, "grad_norm": 0.12630046904087067, "learning_rate": 0.00012512966541238687, "loss": 1.0533, "step": 3687 }, { "epoch": 0.7497458833096158, "grad_norm": 0.14778174459934235, "learning_rate": 0.0001251093257398556, "loss": 1.2604, "step": 3688 }, { "epoch": 0.7499491766619232, "grad_norm": 0.13751018047332764, "learning_rate": 0.00012508898606732434, "loss": 1.0252, "step": 3689 }, { "epoch": 0.7501524700142306, "grad_norm": 0.14556734263896942, "learning_rate": 0.00012506864639479307, "loss": 1.0671, "step": 3690 }, { "epoch": 0.7503557633665379, "grad_norm": 0.12826183438301086, "learning_rate": 0.00012504830672226177, "loss": 1.0401, "step": 3691 }, { "epoch": 0.7505590567188453, "grad_norm": 0.12293746322393417, "learning_rate": 0.0001250279670497305, "loss": 1.0505, "step": 3692 }, { "epoch": 0.7507623500711527, "grad_norm": 0.14679206907749176, "learning_rate": 0.00012500762737719924, "loss": 1.1754, "step": 3693 }, { "epoch": 0.7509656434234601, "grad_norm": 0.13845571875572205, "learning_rate": 0.00012498728770466797, "loss": 1.1254, "step": 3694 }, { "epoch": 0.7511689367757675, "grad_norm": 0.126956969499588, "learning_rate": 0.0001249669480321367, "loss": 0.9822, "step": 3695 }, { "epoch": 0.7513722301280749, "grad_norm": 0.13764221966266632, "learning_rate": 0.00012494660835960541, "loss": 1.1326, "step": 3696 }, { "epoch": 0.7515755234803821, "grad_norm": 0.13586993515491486, "learning_rate": 0.00012492626868707417, "loss": 1.0949, "step": 3697 }, { "epoch": 0.7517788168326895, "grad_norm": 0.1523975282907486, "learning_rate": 0.0001249059290145429, "loss": 1.1545, "step": 3698 }, { "epoch": 0.7519821101849969, "grad_norm": 0.13115784525871277, "learning_rate": 0.0001248855893420116, "loss": 1.0304, "step": 3699 }, { "epoch": 0.7521854035373043, "grad_norm": 0.13575038313865662, "learning_rate": 0.0001248652496694803, "loss": 1.1775, "step": 3700 }, { "epoch": 0.7523886968896117, "grad_norm": 0.14293211698532104, "learning_rate": 0.00012484490999694904, "loss": 0.987, "step": 3701 }, { "epoch": 0.7525919902419191, "grad_norm": 0.13629594445228577, "learning_rate": 0.0001248245703244178, "loss": 0.9952, "step": 3702 }, { "epoch": 0.7527952835942264, "grad_norm": 0.1200501024723053, "learning_rate": 0.0001248042306518865, "loss": 0.9087, "step": 3703 }, { "epoch": 0.7529985769465338, "grad_norm": 0.12878622114658356, "learning_rate": 0.00012478389097935524, "loss": 1.0632, "step": 3704 }, { "epoch": 0.7532018702988412, "grad_norm": 0.13195644319057465, "learning_rate": 0.00012476355130682396, "loss": 1.1865, "step": 3705 }, { "epoch": 0.7534051636511486, "grad_norm": 0.13144764304161072, "learning_rate": 0.0001247432116342927, "loss": 0.9257, "step": 3706 }, { "epoch": 0.753608457003456, "grad_norm": 0.12141410261392593, "learning_rate": 0.0001247228719617614, "loss": 0.9196, "step": 3707 }, { "epoch": 0.7538117503557634, "grad_norm": 0.13238899409770966, "learning_rate": 0.00012470253228923014, "loss": 1.1404, "step": 3708 }, { "epoch": 0.7540150437080707, "grad_norm": 0.1436709612607956, "learning_rate": 0.00012468219261669886, "loss": 1.1169, "step": 3709 }, { "epoch": 0.7542183370603781, "grad_norm": 0.14147412776947021, "learning_rate": 0.0001246618529441676, "loss": 1.2736, "step": 3710 }, { "epoch": 0.7544216304126855, "grad_norm": 0.13145607709884644, "learning_rate": 0.00012464151327163634, "loss": 1.0299, "step": 3711 }, { "epoch": 0.7546249237649929, "grad_norm": 0.11213693022727966, "learning_rate": 0.00012462117359910506, "loss": 0.8738, "step": 3712 }, { "epoch": 0.7548282171173003, "grad_norm": 0.14354929327964783, "learning_rate": 0.00012460083392657378, "loss": 1.0871, "step": 3713 }, { "epoch": 0.7550315104696076, "grad_norm": 0.15012142062187195, "learning_rate": 0.00012458049425404254, "loss": 1.0701, "step": 3714 }, { "epoch": 0.755234803821915, "grad_norm": 0.15194512903690338, "learning_rate": 0.00012456015458151123, "loss": 1.0018, "step": 3715 }, { "epoch": 0.7554380971742224, "grad_norm": 0.13199283182621002, "learning_rate": 0.00012453981490897996, "loss": 0.9806, "step": 3716 }, { "epoch": 0.7556413905265298, "grad_norm": 0.1443110853433609, "learning_rate": 0.00012451947523644868, "loss": 1.1558, "step": 3717 }, { "epoch": 0.7558446838788372, "grad_norm": 0.1215786412358284, "learning_rate": 0.00012449913556391743, "loss": 1.0642, "step": 3718 }, { "epoch": 0.7560479772311446, "grad_norm": 0.12462542206048965, "learning_rate": 0.00012447879589138616, "loss": 0.9799, "step": 3719 }, { "epoch": 0.7562512705834519, "grad_norm": 0.1319034993648529, "learning_rate": 0.00012445845621885488, "loss": 1.0646, "step": 3720 }, { "epoch": 0.7564545639357593, "grad_norm": 0.14364975690841675, "learning_rate": 0.0001244381165463236, "loss": 1.0853, "step": 3721 }, { "epoch": 0.7566578572880667, "grad_norm": 0.13716979324817657, "learning_rate": 0.00012441777687379236, "loss": 1.2145, "step": 3722 }, { "epoch": 0.7568611506403741, "grad_norm": 0.1348930448293686, "learning_rate": 0.00012439743720126106, "loss": 1.1096, "step": 3723 }, { "epoch": 0.7570644439926815, "grad_norm": 0.15214388072490692, "learning_rate": 0.00012437709752872978, "loss": 1.1986, "step": 3724 }, { "epoch": 0.7572677373449889, "grad_norm": 0.14679096639156342, "learning_rate": 0.0001243567578561985, "loss": 1.2809, "step": 3725 }, { "epoch": 0.7574710306972962, "grad_norm": 0.1401345431804657, "learning_rate": 0.00012433641818366726, "loss": 1.013, "step": 3726 }, { "epoch": 0.7576743240496036, "grad_norm": 0.14611610770225525, "learning_rate": 0.00012431607851113598, "loss": 1.2145, "step": 3727 }, { "epoch": 0.757877617401911, "grad_norm": 0.13954514265060425, "learning_rate": 0.0001242957388386047, "loss": 1.1382, "step": 3728 }, { "epoch": 0.7580809107542184, "grad_norm": 0.1354246586561203, "learning_rate": 0.00012427539916607343, "loss": 1.0877, "step": 3729 }, { "epoch": 0.7582842041065257, "grad_norm": 0.12606988847255707, "learning_rate": 0.00012425505949354218, "loss": 1.0592, "step": 3730 }, { "epoch": 0.7584874974588331, "grad_norm": 0.13141503930091858, "learning_rate": 0.00012423471982101088, "loss": 0.9414, "step": 3731 }, { "epoch": 0.7586907908111404, "grad_norm": 0.1343068927526474, "learning_rate": 0.0001242143801484796, "loss": 0.9862, "step": 3732 }, { "epoch": 0.7588940841634478, "grad_norm": 0.13875959813594818, "learning_rate": 0.00012419404047594833, "loss": 1.3412, "step": 3733 }, { "epoch": 0.7590973775157552, "grad_norm": 0.14184454083442688, "learning_rate": 0.00012417370080341708, "loss": 1.0852, "step": 3734 }, { "epoch": 0.7593006708680626, "grad_norm": 0.13765336573123932, "learning_rate": 0.0001241533611308858, "loss": 1.1018, "step": 3735 }, { "epoch": 0.75950396422037, "grad_norm": 0.11607436835765839, "learning_rate": 0.00012413302145835453, "loss": 0.8447, "step": 3736 }, { "epoch": 0.7597072575726773, "grad_norm": 0.13869017362594604, "learning_rate": 0.00012411268178582325, "loss": 1.125, "step": 3737 }, { "epoch": 0.7599105509249847, "grad_norm": 0.14673906564712524, "learning_rate": 0.000124092342113292, "loss": 1.1876, "step": 3738 }, { "epoch": 0.7601138442772921, "grad_norm": 0.1397872269153595, "learning_rate": 0.0001240720024407607, "loss": 1.0398, "step": 3739 }, { "epoch": 0.7603171376295995, "grad_norm": 0.1220252513885498, "learning_rate": 0.00012405166276822943, "loss": 0.9359, "step": 3740 }, { "epoch": 0.7605204309819069, "grad_norm": 0.1372562199831009, "learning_rate": 0.00012403132309569815, "loss": 1.0704, "step": 3741 }, { "epoch": 0.7607237243342143, "grad_norm": 0.12001727521419525, "learning_rate": 0.00012401098342316688, "loss": 1.0201, "step": 3742 }, { "epoch": 0.7609270176865216, "grad_norm": 0.12373898923397064, "learning_rate": 0.00012399064375063563, "loss": 0.9927, "step": 3743 }, { "epoch": 0.761130311038829, "grad_norm": 0.1459614783525467, "learning_rate": 0.00012397030407810435, "loss": 1.23, "step": 3744 }, { "epoch": 0.7613336043911364, "grad_norm": 0.14972059428691864, "learning_rate": 0.00012394996440557308, "loss": 1.2213, "step": 3745 }, { "epoch": 0.7615368977434438, "grad_norm": 0.125379741191864, "learning_rate": 0.0001239296247330418, "loss": 0.9302, "step": 3746 }, { "epoch": 0.7617401910957512, "grad_norm": 0.13220852613449097, "learning_rate": 0.00012390928506051055, "loss": 0.9221, "step": 3747 }, { "epoch": 0.7619434844480586, "grad_norm": 0.1334318369626999, "learning_rate": 0.00012388894538797925, "loss": 1.1046, "step": 3748 }, { "epoch": 0.7621467778003659, "grad_norm": 0.11617275327444077, "learning_rate": 0.00012386860571544797, "loss": 0.8787, "step": 3749 }, { "epoch": 0.7623500711526733, "grad_norm": 0.12812359631061554, "learning_rate": 0.0001238482660429167, "loss": 1.0109, "step": 3750 }, { "epoch": 0.7625533645049807, "grad_norm": 0.1491006761789322, "learning_rate": 0.00012382792637038545, "loss": 1.1522, "step": 3751 }, { "epoch": 0.7627566578572881, "grad_norm": 0.1386028379201889, "learning_rate": 0.00012380758669785417, "loss": 1.0567, "step": 3752 }, { "epoch": 0.7629599512095955, "grad_norm": 0.12961892783641815, "learning_rate": 0.0001237872470253229, "loss": 0.9436, "step": 3753 }, { "epoch": 0.7631632445619029, "grad_norm": 0.13355448842048645, "learning_rate": 0.00012376690735279162, "loss": 1.063, "step": 3754 }, { "epoch": 0.7633665379142102, "grad_norm": 0.1302691102027893, "learning_rate": 0.00012374656768026037, "loss": 1.019, "step": 3755 }, { "epoch": 0.7635698312665176, "grad_norm": 0.12183891981840134, "learning_rate": 0.00012372622800772907, "loss": 0.964, "step": 3756 }, { "epoch": 0.763773124618825, "grad_norm": 0.12347770482301712, "learning_rate": 0.0001237058883351978, "loss": 0.9031, "step": 3757 }, { "epoch": 0.7639764179711324, "grad_norm": 0.12646906077861786, "learning_rate": 0.00012368554866266652, "loss": 1.009, "step": 3758 }, { "epoch": 0.7641797113234398, "grad_norm": 0.15650388598442078, "learning_rate": 0.00012366520899013527, "loss": 1.2043, "step": 3759 }, { "epoch": 0.7643830046757472, "grad_norm": 0.13092441856861115, "learning_rate": 0.000123644869317604, "loss": 1.038, "step": 3760 }, { "epoch": 0.7645862980280544, "grad_norm": 0.11747883260250092, "learning_rate": 0.00012362452964507272, "loss": 1.0147, "step": 3761 }, { "epoch": 0.7647895913803618, "grad_norm": 0.13621081411838531, "learning_rate": 0.00012360418997254145, "loss": 1.0798, "step": 3762 }, { "epoch": 0.7649928847326692, "grad_norm": 0.1359243243932724, "learning_rate": 0.0001235838503000102, "loss": 1.0795, "step": 3763 }, { "epoch": 0.7651961780849766, "grad_norm": 0.14412851631641388, "learning_rate": 0.0001235635106274789, "loss": 1.0907, "step": 3764 }, { "epoch": 0.765399471437284, "grad_norm": 0.14425703883171082, "learning_rate": 0.00012354317095494762, "loss": 1.1903, "step": 3765 }, { "epoch": 0.7656027647895913, "grad_norm": 0.15288187563419342, "learning_rate": 0.00012352283128241634, "loss": 1.1775, "step": 3766 }, { "epoch": 0.7658060581418987, "grad_norm": 0.149856299161911, "learning_rate": 0.0001235024916098851, "loss": 1.172, "step": 3767 }, { "epoch": 0.7660093514942061, "grad_norm": 0.1374143660068512, "learning_rate": 0.00012348215193735382, "loss": 1.2566, "step": 3768 }, { "epoch": 0.7662126448465135, "grad_norm": 0.13301679491996765, "learning_rate": 0.00012346181226482254, "loss": 1.0046, "step": 3769 }, { "epoch": 0.7664159381988209, "grad_norm": 0.1274683177471161, "learning_rate": 0.00012344147259229127, "loss": 0.8951, "step": 3770 }, { "epoch": 0.7666192315511283, "grad_norm": 0.12883058190345764, "learning_rate": 0.00012342113291976002, "loss": 0.955, "step": 3771 }, { "epoch": 0.7668225249034356, "grad_norm": 0.13394391536712646, "learning_rate": 0.00012340079324722872, "loss": 1.1987, "step": 3772 }, { "epoch": 0.767025818255743, "grad_norm": 0.1280052363872528, "learning_rate": 0.00012338045357469744, "loss": 0.9552, "step": 3773 }, { "epoch": 0.7672291116080504, "grad_norm": 0.13542263209819794, "learning_rate": 0.00012336011390216617, "loss": 0.9411, "step": 3774 }, { "epoch": 0.7674324049603578, "grad_norm": 0.13187946379184723, "learning_rate": 0.00012333977422963492, "loss": 1.0447, "step": 3775 }, { "epoch": 0.7676356983126652, "grad_norm": 0.13274554908275604, "learning_rate": 0.00012331943455710364, "loss": 1.0556, "step": 3776 }, { "epoch": 0.7678389916649726, "grad_norm": 0.1356000155210495, "learning_rate": 0.00012329909488457237, "loss": 1.0791, "step": 3777 }, { "epoch": 0.7680422850172799, "grad_norm": 0.1446497142314911, "learning_rate": 0.0001232787552120411, "loss": 1.1708, "step": 3778 }, { "epoch": 0.7682455783695873, "grad_norm": 0.14726495742797852, "learning_rate": 0.00012325841553950984, "loss": 1.0312, "step": 3779 }, { "epoch": 0.7684488717218947, "grad_norm": 0.1248805895447731, "learning_rate": 0.00012323807586697854, "loss": 1.001, "step": 3780 }, { "epoch": 0.7686521650742021, "grad_norm": 0.13720335066318512, "learning_rate": 0.00012321773619444726, "loss": 1.1051, "step": 3781 }, { "epoch": 0.7688554584265095, "grad_norm": 0.12258980423212051, "learning_rate": 0.000123197396521916, "loss": 1.014, "step": 3782 }, { "epoch": 0.7690587517788169, "grad_norm": 0.14602990448474884, "learning_rate": 0.00012317705684938471, "loss": 1.1083, "step": 3783 }, { "epoch": 0.7692620451311242, "grad_norm": 0.12902162969112396, "learning_rate": 0.00012315671717685347, "loss": 0.9431, "step": 3784 }, { "epoch": 0.7694653384834316, "grad_norm": 0.1396799236536026, "learning_rate": 0.0001231363775043222, "loss": 1.1026, "step": 3785 }, { "epoch": 0.769668631835739, "grad_norm": 0.13856211304664612, "learning_rate": 0.00012311603783179091, "loss": 1.2084, "step": 3786 }, { "epoch": 0.7698719251880464, "grad_norm": 0.12457921355962753, "learning_rate": 0.00012309569815925964, "loss": 1.0894, "step": 3787 }, { "epoch": 0.7700752185403538, "grad_norm": 0.12745535373687744, "learning_rate": 0.00012307535848672836, "loss": 0.9223, "step": 3788 }, { "epoch": 0.770278511892661, "grad_norm": 0.12804381549358368, "learning_rate": 0.0001230550188141971, "loss": 1.139, "step": 3789 }, { "epoch": 0.7704818052449685, "grad_norm": 0.12827259302139282, "learning_rate": 0.0001230346791416658, "loss": 1.0397, "step": 3790 }, { "epoch": 0.7706850985972759, "grad_norm": 0.14576175808906555, "learning_rate": 0.00012301433946913454, "loss": 1.1592, "step": 3791 }, { "epoch": 0.7708883919495833, "grad_norm": 0.13071264326572418, "learning_rate": 0.0001229939997966033, "loss": 1.1051, "step": 3792 }, { "epoch": 0.7710916853018907, "grad_norm": 0.1294952780008316, "learning_rate": 0.000122973660124072, "loss": 0.8815, "step": 3793 }, { "epoch": 0.771294978654198, "grad_norm": 0.13996455073356628, "learning_rate": 0.00012295332045154074, "loss": 1.1277, "step": 3794 }, { "epoch": 0.7714982720065053, "grad_norm": 0.14250068366527557, "learning_rate": 0.00012293298077900946, "loss": 1.2124, "step": 3795 }, { "epoch": 0.7717015653588127, "grad_norm": 0.12336855381727219, "learning_rate": 0.00012291264110647819, "loss": 1.0764, "step": 3796 }, { "epoch": 0.7719048587111201, "grad_norm": 0.14124532043933868, "learning_rate": 0.0001228923014339469, "loss": 1.156, "step": 3797 }, { "epoch": 0.7721081520634275, "grad_norm": 0.12716175615787506, "learning_rate": 0.00012287196176141563, "loss": 0.9228, "step": 3798 }, { "epoch": 0.7723114454157349, "grad_norm": 0.1456788033246994, "learning_rate": 0.00012285162208888436, "loss": 1.1462, "step": 3799 }, { "epoch": 0.7725147387680423, "grad_norm": 0.14236094057559967, "learning_rate": 0.0001228312824163531, "loss": 1.2849, "step": 3800 }, { "epoch": 0.7727180321203496, "grad_norm": 0.12564775347709656, "learning_rate": 0.00012281094274382184, "loss": 1.1172, "step": 3801 }, { "epoch": 0.772921325472657, "grad_norm": 0.11949034035205841, "learning_rate": 0.00012279060307129056, "loss": 0.8353, "step": 3802 }, { "epoch": 0.7731246188249644, "grad_norm": 0.13106048107147217, "learning_rate": 0.00012277026339875928, "loss": 1.0445, "step": 3803 }, { "epoch": 0.7733279121772718, "grad_norm": 0.11542949080467224, "learning_rate": 0.000122749923726228, "loss": 0.8345, "step": 3804 }, { "epoch": 0.7735312055295792, "grad_norm": 0.1267216056585312, "learning_rate": 0.00012272958405369673, "loss": 0.9304, "step": 3805 }, { "epoch": 0.7737344988818866, "grad_norm": 0.14101152122020721, "learning_rate": 0.00012270924438116546, "loss": 1.1243, "step": 3806 }, { "epoch": 0.7739377922341939, "grad_norm": 0.12236955761909485, "learning_rate": 0.00012268890470863418, "loss": 0.855, "step": 3807 }, { "epoch": 0.7741410855865013, "grad_norm": 0.11732099950313568, "learning_rate": 0.00012266856503610293, "loss": 0.8707, "step": 3808 }, { "epoch": 0.7743443789388087, "grad_norm": 0.12826688587665558, "learning_rate": 0.00012264822536357166, "loss": 1.0896, "step": 3809 }, { "epoch": 0.7745476722911161, "grad_norm": 0.13263994455337524, "learning_rate": 0.00012262788569104038, "loss": 1.0278, "step": 3810 }, { "epoch": 0.7747509656434235, "grad_norm": 0.12591175734996796, "learning_rate": 0.0001226075460185091, "loss": 0.8749, "step": 3811 }, { "epoch": 0.7749542589957309, "grad_norm": 0.13671188056468964, "learning_rate": 0.00012258720634597786, "loss": 1.0305, "step": 3812 }, { "epoch": 0.7751575523480382, "grad_norm": 0.13743267953395844, "learning_rate": 0.00012256686667344656, "loss": 1.0444, "step": 3813 }, { "epoch": 0.7753608457003456, "grad_norm": 0.12400873005390167, "learning_rate": 0.00012254652700091528, "loss": 0.8696, "step": 3814 }, { "epoch": 0.775564139052653, "grad_norm": 0.13085900247097015, "learning_rate": 0.000122526187328384, "loss": 1.0268, "step": 3815 }, { "epoch": 0.7757674324049604, "grad_norm": 0.13684894144535065, "learning_rate": 0.00012250584765585276, "loss": 1.1531, "step": 3816 }, { "epoch": 0.7759707257572678, "grad_norm": 0.12287990748882294, "learning_rate": 0.00012248550798332148, "loss": 0.9642, "step": 3817 }, { "epoch": 0.7761740191095751, "grad_norm": 0.1277002990245819, "learning_rate": 0.0001224651683107902, "loss": 0.9986, "step": 3818 }, { "epoch": 0.7763773124618825, "grad_norm": 0.13747401535511017, "learning_rate": 0.00012244482863825893, "loss": 1.084, "step": 3819 }, { "epoch": 0.7765806058141899, "grad_norm": 0.14822441339492798, "learning_rate": 0.00012242448896572768, "loss": 0.9895, "step": 3820 }, { "epoch": 0.7767838991664973, "grad_norm": 0.13352279365062714, "learning_rate": 0.00012240414929319638, "loss": 1.0556, "step": 3821 }, { "epoch": 0.7769871925188047, "grad_norm": 0.14219939708709717, "learning_rate": 0.0001223838096206651, "loss": 1.2207, "step": 3822 }, { "epoch": 0.7771904858711121, "grad_norm": 0.14143721759319305, "learning_rate": 0.00012236346994813383, "loss": 1.1582, "step": 3823 }, { "epoch": 0.7773937792234193, "grad_norm": 0.14316944777965546, "learning_rate": 0.00012234313027560255, "loss": 1.1662, "step": 3824 }, { "epoch": 0.7775970725757267, "grad_norm": 0.1315951645374298, "learning_rate": 0.0001223227906030713, "loss": 0.9502, "step": 3825 }, { "epoch": 0.7778003659280341, "grad_norm": 0.14173437654972076, "learning_rate": 0.00012230245093054003, "loss": 1.0675, "step": 3826 }, { "epoch": 0.7780036592803415, "grad_norm": 0.11854992806911469, "learning_rate": 0.00012228211125800875, "loss": 1.0158, "step": 3827 }, { "epoch": 0.7782069526326489, "grad_norm": 0.1441982090473175, "learning_rate": 0.00012226177158547748, "loss": 0.9432, "step": 3828 }, { "epoch": 0.7784102459849563, "grad_norm": 0.13182631134986877, "learning_rate": 0.0001222414319129462, "loss": 0.9372, "step": 3829 }, { "epoch": 0.7786135393372636, "grad_norm": 0.12839631736278534, "learning_rate": 0.00012222109224041493, "loss": 1.0888, "step": 3830 }, { "epoch": 0.778816832689571, "grad_norm": 0.14066271483898163, "learning_rate": 0.00012220075256788365, "loss": 1.0749, "step": 3831 }, { "epoch": 0.7790201260418784, "grad_norm": 0.13321231305599213, "learning_rate": 0.00012218041289535237, "loss": 0.9994, "step": 3832 }, { "epoch": 0.7792234193941858, "grad_norm": 0.12681256234645844, "learning_rate": 0.00012216007322282113, "loss": 0.9195, "step": 3833 }, { "epoch": 0.7794267127464932, "grad_norm": 0.14184892177581787, "learning_rate": 0.00012213973355028985, "loss": 1.2513, "step": 3834 }, { "epoch": 0.7796300060988006, "grad_norm": 0.10922446101903915, "learning_rate": 0.00012211939387775858, "loss": 0.8768, "step": 3835 }, { "epoch": 0.7798332994511079, "grad_norm": 0.135145902633667, "learning_rate": 0.0001220990542052273, "loss": 1.1626, "step": 3836 }, { "epoch": 0.7800365928034153, "grad_norm": 0.1317375898361206, "learning_rate": 0.00012207871453269602, "loss": 1.1827, "step": 3837 }, { "epoch": 0.7802398861557227, "grad_norm": 0.1291121393442154, "learning_rate": 0.00012205837486016476, "loss": 1.0689, "step": 3838 }, { "epoch": 0.7804431795080301, "grad_norm": 0.12500061094760895, "learning_rate": 0.00012203803518763349, "loss": 0.9881, "step": 3839 }, { "epoch": 0.7806464728603375, "grad_norm": 0.1250467747449875, "learning_rate": 0.0001220176955151022, "loss": 0.953, "step": 3840 }, { "epoch": 0.7808497662126448, "grad_norm": 0.1281813234090805, "learning_rate": 0.00012199735584257095, "loss": 1.0164, "step": 3841 }, { "epoch": 0.7810530595649522, "grad_norm": 0.11305128782987595, "learning_rate": 0.00012197701617003967, "loss": 0.8044, "step": 3842 }, { "epoch": 0.7812563529172596, "grad_norm": 0.14512300491333008, "learning_rate": 0.0001219566764975084, "loss": 1.121, "step": 3843 }, { "epoch": 0.781459646269567, "grad_norm": 0.1404501348733902, "learning_rate": 0.00012193633682497711, "loss": 1.1358, "step": 3844 }, { "epoch": 0.7816629396218744, "grad_norm": 0.13621416687965393, "learning_rate": 0.00012191599715244586, "loss": 1.0535, "step": 3845 }, { "epoch": 0.7818662329741818, "grad_norm": 0.11842742562294006, "learning_rate": 0.00012189565747991459, "loss": 0.9138, "step": 3846 }, { "epoch": 0.7820695263264891, "grad_norm": 0.14051960408687592, "learning_rate": 0.00012187531780738331, "loss": 1.0012, "step": 3847 }, { "epoch": 0.7822728196787965, "grad_norm": 0.12643176317214966, "learning_rate": 0.00012185497813485202, "loss": 0.9594, "step": 3848 }, { "epoch": 0.7824761130311039, "grad_norm": 0.13830742239952087, "learning_rate": 0.00012183463846232077, "loss": 1.0394, "step": 3849 }, { "epoch": 0.7826794063834113, "grad_norm": 0.12013565003871918, "learning_rate": 0.0001218142987897895, "loss": 0.8788, "step": 3850 }, { "epoch": 0.7828826997357187, "grad_norm": 0.14404936134815216, "learning_rate": 0.00012179395911725822, "loss": 1.0755, "step": 3851 }, { "epoch": 0.7830859930880261, "grad_norm": 0.1242976263165474, "learning_rate": 0.00012177361944472693, "loss": 0.9733, "step": 3852 }, { "epoch": 0.7832892864403334, "grad_norm": 0.13720235228538513, "learning_rate": 0.00012175327977219568, "loss": 1.1624, "step": 3853 }, { "epoch": 0.7834925797926408, "grad_norm": 0.11972963809967041, "learning_rate": 0.00012173294009966441, "loss": 0.8956, "step": 3854 }, { "epoch": 0.7836958731449482, "grad_norm": 0.1539568156003952, "learning_rate": 0.00012171260042713313, "loss": 1.1909, "step": 3855 }, { "epoch": 0.7838991664972556, "grad_norm": 0.11814553290605545, "learning_rate": 0.00012169226075460186, "loss": 0.8093, "step": 3856 }, { "epoch": 0.784102459849563, "grad_norm": 0.1346539407968521, "learning_rate": 0.0001216719210820706, "loss": 1.0815, "step": 3857 }, { "epoch": 0.7843057532018703, "grad_norm": 0.12297804653644562, "learning_rate": 0.00012165158140953932, "loss": 0.9216, "step": 3858 }, { "epoch": 0.7845090465541776, "grad_norm": 0.13503801822662354, "learning_rate": 0.00012163124173700804, "loss": 1.2251, "step": 3859 }, { "epoch": 0.784712339906485, "grad_norm": 0.1486554741859436, "learning_rate": 0.00012161090206447677, "loss": 1.0849, "step": 3860 }, { "epoch": 0.7849156332587924, "grad_norm": 0.13975729048252106, "learning_rate": 0.0001215905623919455, "loss": 1.0914, "step": 3861 }, { "epoch": 0.7851189266110998, "grad_norm": 0.14087165892124176, "learning_rate": 0.00012157022271941423, "loss": 1.2099, "step": 3862 }, { "epoch": 0.7853222199634072, "grad_norm": 0.14210177958011627, "learning_rate": 0.00012154988304688296, "loss": 1.1608, "step": 3863 }, { "epoch": 0.7855255133157146, "grad_norm": 0.14731276035308838, "learning_rate": 0.00012152954337435168, "loss": 1.1699, "step": 3864 }, { "epoch": 0.7857288066680219, "grad_norm": 0.12955504655838013, "learning_rate": 0.00012150920370182039, "loss": 1.0048, "step": 3865 }, { "epoch": 0.7859321000203293, "grad_norm": 0.13563545048236847, "learning_rate": 0.00012148886402928914, "loss": 1.0157, "step": 3866 }, { "epoch": 0.7861353933726367, "grad_norm": 0.12085787951946259, "learning_rate": 0.00012146852435675787, "loss": 0.9674, "step": 3867 }, { "epoch": 0.7863386867249441, "grad_norm": 0.12295902520418167, "learning_rate": 0.00012144818468422659, "loss": 0.9919, "step": 3868 }, { "epoch": 0.7865419800772515, "grad_norm": 0.12418414652347565, "learning_rate": 0.0001214278450116953, "loss": 1.0134, "step": 3869 }, { "epoch": 0.7867452734295588, "grad_norm": 0.13931810855865479, "learning_rate": 0.00012140750533916405, "loss": 0.9817, "step": 3870 }, { "epoch": 0.7869485667818662, "grad_norm": 0.14642778038978577, "learning_rate": 0.00012138716566663278, "loss": 1.0308, "step": 3871 }, { "epoch": 0.7871518601341736, "grad_norm": 0.1386035829782486, "learning_rate": 0.0001213668259941015, "loss": 1.0426, "step": 3872 }, { "epoch": 0.787355153486481, "grad_norm": 0.1395215541124344, "learning_rate": 0.00012134648632157021, "loss": 1.1342, "step": 3873 }, { "epoch": 0.7875584468387884, "grad_norm": 0.11670932918787003, "learning_rate": 0.00012132614664903896, "loss": 1.0713, "step": 3874 }, { "epoch": 0.7877617401910958, "grad_norm": 0.13401034474372864, "learning_rate": 0.00012130580697650769, "loss": 1.1358, "step": 3875 }, { "epoch": 0.7879650335434031, "grad_norm": 0.14385886490345, "learning_rate": 0.00012128546730397641, "loss": 1.1176, "step": 3876 }, { "epoch": 0.7881683268957105, "grad_norm": 0.12759087979793549, "learning_rate": 0.00012126512763144512, "loss": 1.0423, "step": 3877 }, { "epoch": 0.7883716202480179, "grad_norm": 0.155388742685318, "learning_rate": 0.00012124478795891388, "loss": 1.2041, "step": 3878 }, { "epoch": 0.7885749136003253, "grad_norm": 0.13593384623527527, "learning_rate": 0.0001212244482863826, "loss": 1.1223, "step": 3879 }, { "epoch": 0.7887782069526327, "grad_norm": 0.12947037816047668, "learning_rate": 0.00012120410861385133, "loss": 0.8951, "step": 3880 }, { "epoch": 0.7889815003049401, "grad_norm": 0.13151758909225464, "learning_rate": 0.00012118376894132004, "loss": 1.0879, "step": 3881 }, { "epoch": 0.7891847936572474, "grad_norm": 0.12490543723106384, "learning_rate": 0.00012116342926878879, "loss": 1.0054, "step": 3882 }, { "epoch": 0.7893880870095548, "grad_norm": 0.13789287209510803, "learning_rate": 0.00012114308959625751, "loss": 1.1069, "step": 3883 }, { "epoch": 0.7895913803618622, "grad_norm": 0.13065920770168304, "learning_rate": 0.00012112274992372624, "loss": 1.034, "step": 3884 }, { "epoch": 0.7897946737141696, "grad_norm": 0.1365562081336975, "learning_rate": 0.00012110241025119495, "loss": 1.1702, "step": 3885 }, { "epoch": 0.789997967066477, "grad_norm": 0.14647583663463593, "learning_rate": 0.0001210820705786637, "loss": 1.1919, "step": 3886 }, { "epoch": 0.7902012604187844, "grad_norm": 0.1417173445224762, "learning_rate": 0.00012106173090613242, "loss": 0.9846, "step": 3887 }, { "epoch": 0.7904045537710916, "grad_norm": 0.12423622608184814, "learning_rate": 0.00012104139123360115, "loss": 1.0007, "step": 3888 }, { "epoch": 0.790607847123399, "grad_norm": 0.1554161161184311, "learning_rate": 0.00012102105156106986, "loss": 1.3904, "step": 3889 }, { "epoch": 0.7908111404757064, "grad_norm": 0.14858123660087585, "learning_rate": 0.00012100071188853861, "loss": 1.1642, "step": 3890 }, { "epoch": 0.7910144338280138, "grad_norm": 0.1301809698343277, "learning_rate": 0.00012098037221600733, "loss": 1.0904, "step": 3891 }, { "epoch": 0.7912177271803212, "grad_norm": 0.13561727106571198, "learning_rate": 0.00012096003254347606, "loss": 1.1204, "step": 3892 }, { "epoch": 0.7914210205326286, "grad_norm": 0.13255447149276733, "learning_rate": 0.00012093969287094477, "loss": 1.0225, "step": 3893 }, { "epoch": 0.7916243138849359, "grad_norm": 0.14348706603050232, "learning_rate": 0.00012091935319841352, "loss": 1.1129, "step": 3894 }, { "epoch": 0.7918276072372433, "grad_norm": 0.1501035988330841, "learning_rate": 0.00012089901352588225, "loss": 1.1329, "step": 3895 }, { "epoch": 0.7920309005895507, "grad_norm": 0.11853793263435364, "learning_rate": 0.00012087867385335097, "loss": 1.0178, "step": 3896 }, { "epoch": 0.7922341939418581, "grad_norm": 0.12795880436897278, "learning_rate": 0.00012085833418081968, "loss": 0.9846, "step": 3897 }, { "epoch": 0.7924374872941655, "grad_norm": 0.13203004002571106, "learning_rate": 0.00012083799450828843, "loss": 1.0961, "step": 3898 }, { "epoch": 0.7926407806464728, "grad_norm": 0.1426658183336258, "learning_rate": 0.00012081765483575716, "loss": 1.101, "step": 3899 }, { "epoch": 0.7928440739987802, "grad_norm": 0.1364053338766098, "learning_rate": 0.00012079731516322588, "loss": 1.0818, "step": 3900 }, { "epoch": 0.7930473673510876, "grad_norm": 0.1505763679742813, "learning_rate": 0.00012077697549069459, "loss": 1.4502, "step": 3901 }, { "epoch": 0.793250660703395, "grad_norm": 0.133381187915802, "learning_rate": 0.00012075663581816334, "loss": 1.0036, "step": 3902 }, { "epoch": 0.7934539540557024, "grad_norm": 0.12915650010108948, "learning_rate": 0.00012073629614563207, "loss": 0.8927, "step": 3903 }, { "epoch": 0.7936572474080098, "grad_norm": 0.12571905553340912, "learning_rate": 0.0001207159564731008, "loss": 0.8493, "step": 3904 }, { "epoch": 0.7938605407603171, "grad_norm": 0.13056504726409912, "learning_rate": 0.0001206956168005695, "loss": 1.0281, "step": 3905 }, { "epoch": 0.7940638341126245, "grad_norm": 0.13789808750152588, "learning_rate": 0.00012067527712803826, "loss": 1.1499, "step": 3906 }, { "epoch": 0.7942671274649319, "grad_norm": 0.136823832988739, "learning_rate": 0.00012065493745550698, "loss": 1.122, "step": 3907 }, { "epoch": 0.7944704208172393, "grad_norm": 0.1468329280614853, "learning_rate": 0.0001206345977829757, "loss": 1.1591, "step": 3908 }, { "epoch": 0.7946737141695467, "grad_norm": 0.12451114505529404, "learning_rate": 0.00012061425811044442, "loss": 0.9909, "step": 3909 }, { "epoch": 0.7948770075218541, "grad_norm": 0.13311980664730072, "learning_rate": 0.00012059391843791314, "loss": 1.0117, "step": 3910 }, { "epoch": 0.7950803008741614, "grad_norm": 0.13651201128959656, "learning_rate": 0.00012057357876538189, "loss": 1.0057, "step": 3911 }, { "epoch": 0.7952835942264688, "grad_norm": 0.14184725284576416, "learning_rate": 0.00012055323909285062, "loss": 1.1766, "step": 3912 }, { "epoch": 0.7954868875787762, "grad_norm": 0.12303798645734787, "learning_rate": 0.00012053289942031933, "loss": 0.9439, "step": 3913 }, { "epoch": 0.7956901809310836, "grad_norm": 0.12423896044492722, "learning_rate": 0.00012051255974778805, "loss": 1.0675, "step": 3914 }, { "epoch": 0.795893474283391, "grad_norm": 0.16398456692695618, "learning_rate": 0.0001204922200752568, "loss": 1.2481, "step": 3915 }, { "epoch": 0.7960967676356984, "grad_norm": 0.13194435834884644, "learning_rate": 0.00012047188040272553, "loss": 1.0274, "step": 3916 }, { "epoch": 0.7963000609880057, "grad_norm": 0.13115055859088898, "learning_rate": 0.00012045154073019425, "loss": 0.9719, "step": 3917 }, { "epoch": 0.796503354340313, "grad_norm": 0.1376492977142334, "learning_rate": 0.00012043120105766296, "loss": 1.1586, "step": 3918 }, { "epoch": 0.7967066476926205, "grad_norm": 0.13110294938087463, "learning_rate": 0.00012041086138513171, "loss": 1.0523, "step": 3919 }, { "epoch": 0.7969099410449278, "grad_norm": 0.13019250333309174, "learning_rate": 0.00012039052171260044, "loss": 0.9233, "step": 3920 }, { "epoch": 0.7971132343972352, "grad_norm": 0.14026646316051483, "learning_rate": 0.00012037018204006916, "loss": 1.1442, "step": 3921 }, { "epoch": 0.7973165277495425, "grad_norm": 0.13189871609210968, "learning_rate": 0.00012034984236753787, "loss": 1.0618, "step": 3922 }, { "epoch": 0.7975198211018499, "grad_norm": 0.1411222368478775, "learning_rate": 0.00012032950269500663, "loss": 1.161, "step": 3923 }, { "epoch": 0.7977231144541573, "grad_norm": 0.13838204741477966, "learning_rate": 0.00012030916302247535, "loss": 0.9388, "step": 3924 }, { "epoch": 0.7979264078064647, "grad_norm": 0.13982777297496796, "learning_rate": 0.00012028882334994408, "loss": 1.2115, "step": 3925 }, { "epoch": 0.7981297011587721, "grad_norm": 0.16525013744831085, "learning_rate": 0.00012026848367741279, "loss": 1.3161, "step": 3926 }, { "epoch": 0.7983329945110795, "grad_norm": 0.12277159839868546, "learning_rate": 0.00012024814400488154, "loss": 0.9172, "step": 3927 }, { "epoch": 0.7985362878633868, "grad_norm": 0.13560567796230316, "learning_rate": 0.00012022780433235026, "loss": 1.1087, "step": 3928 }, { "epoch": 0.7987395812156942, "grad_norm": 0.13598614931106567, "learning_rate": 0.00012020746465981899, "loss": 1.0942, "step": 3929 }, { "epoch": 0.7989428745680016, "grad_norm": 0.12996844947338104, "learning_rate": 0.0001201871249872877, "loss": 1.1133, "step": 3930 }, { "epoch": 0.799146167920309, "grad_norm": 0.14834141731262207, "learning_rate": 0.00012016678531475645, "loss": 1.0568, "step": 3931 }, { "epoch": 0.7993494612726164, "grad_norm": 0.14238953590393066, "learning_rate": 0.00012014644564222517, "loss": 1.0885, "step": 3932 }, { "epoch": 0.7995527546249238, "grad_norm": 0.14690124988555908, "learning_rate": 0.0001201261059696939, "loss": 1.1593, "step": 3933 }, { "epoch": 0.7997560479772311, "grad_norm": 0.12939292192459106, "learning_rate": 0.00012010576629716261, "loss": 1.0157, "step": 3934 }, { "epoch": 0.7999593413295385, "grad_norm": 0.14343731105327606, "learning_rate": 0.00012008542662463136, "loss": 1.1929, "step": 3935 }, { "epoch": 0.8001626346818459, "grad_norm": 0.14443133771419525, "learning_rate": 0.00012006508695210008, "loss": 1.1939, "step": 3936 }, { "epoch": 0.8003659280341533, "grad_norm": 0.1503942608833313, "learning_rate": 0.00012004474727956881, "loss": 1.2368, "step": 3937 }, { "epoch": 0.8005692213864607, "grad_norm": 0.14112812280654907, "learning_rate": 0.00012002440760703752, "loss": 1.2037, "step": 3938 }, { "epoch": 0.8007725147387681, "grad_norm": 0.1394345462322235, "learning_rate": 0.00012000406793450627, "loss": 1.0076, "step": 3939 }, { "epoch": 0.8009758080910754, "grad_norm": 0.12623324990272522, "learning_rate": 0.000119983728261975, "loss": 1.0405, "step": 3940 }, { "epoch": 0.8011791014433828, "grad_norm": 0.14469188451766968, "learning_rate": 0.00011996338858944372, "loss": 1.2748, "step": 3941 }, { "epoch": 0.8013823947956902, "grad_norm": 0.14259637892246246, "learning_rate": 0.00011994304891691243, "loss": 1.1081, "step": 3942 }, { "epoch": 0.8015856881479976, "grad_norm": 0.13012677431106567, "learning_rate": 0.00011992270924438118, "loss": 0.9882, "step": 3943 }, { "epoch": 0.801788981500305, "grad_norm": 0.1390579491853714, "learning_rate": 0.00011990236957184991, "loss": 1.0806, "step": 3944 }, { "epoch": 0.8019922748526124, "grad_norm": 0.11860685795545578, "learning_rate": 0.00011988202989931863, "loss": 0.9036, "step": 3945 }, { "epoch": 0.8021955682049197, "grad_norm": 0.12979279458522797, "learning_rate": 0.00011986169022678734, "loss": 0.9918, "step": 3946 }, { "epoch": 0.8023988615572271, "grad_norm": 0.1302417367696762, "learning_rate": 0.0001198413505542561, "loss": 1.0498, "step": 3947 }, { "epoch": 0.8026021549095345, "grad_norm": 0.12292234599590302, "learning_rate": 0.00011982101088172482, "loss": 1.0181, "step": 3948 }, { "epoch": 0.8028054482618419, "grad_norm": 0.12002125382423401, "learning_rate": 0.00011980067120919354, "loss": 0.9503, "step": 3949 }, { "epoch": 0.8030087416141493, "grad_norm": 0.1403089463710785, "learning_rate": 0.00011978033153666225, "loss": 1.033, "step": 3950 }, { "epoch": 0.8032120349664565, "grad_norm": 0.13033491373062134, "learning_rate": 0.00011975999186413098, "loss": 0.9713, "step": 3951 }, { "epoch": 0.8034153283187639, "grad_norm": 0.14898493885993958, "learning_rate": 0.00011973965219159973, "loss": 1.1566, "step": 3952 }, { "epoch": 0.8036186216710713, "grad_norm": 0.1320907324552536, "learning_rate": 0.00011971931251906845, "loss": 0.9786, "step": 3953 }, { "epoch": 0.8038219150233787, "grad_norm": 0.14695419371128082, "learning_rate": 0.00011969897284653717, "loss": 1.1951, "step": 3954 }, { "epoch": 0.8040252083756861, "grad_norm": 0.13139761984348297, "learning_rate": 0.00011967863317400589, "loss": 1.0117, "step": 3955 }, { "epoch": 0.8042285017279935, "grad_norm": 0.13509678840637207, "learning_rate": 0.00011965829350147464, "loss": 1.0577, "step": 3956 }, { "epoch": 0.8044317950803008, "grad_norm": 0.12979759275913239, "learning_rate": 0.00011963795382894337, "loss": 1.0552, "step": 3957 }, { "epoch": 0.8046350884326082, "grad_norm": 0.12381764501333237, "learning_rate": 0.00011961761415641208, "loss": 1.045, "step": 3958 }, { "epoch": 0.8048383817849156, "grad_norm": 0.1273113489151001, "learning_rate": 0.0001195972744838808, "loss": 1.0352, "step": 3959 }, { "epoch": 0.805041675137223, "grad_norm": 0.13506780564785004, "learning_rate": 0.00011957693481134955, "loss": 1.0183, "step": 3960 }, { "epoch": 0.8052449684895304, "grad_norm": 0.13643690943717957, "learning_rate": 0.00011955659513881828, "loss": 0.9271, "step": 3961 }, { "epoch": 0.8054482618418378, "grad_norm": 0.13394352793693542, "learning_rate": 0.00011953625546628699, "loss": 1.01, "step": 3962 }, { "epoch": 0.8056515551941451, "grad_norm": 0.12335560470819473, "learning_rate": 0.00011951591579375571, "loss": 1.0166, "step": 3963 }, { "epoch": 0.8058548485464525, "grad_norm": 0.13454771041870117, "learning_rate": 0.00011949557612122446, "loss": 0.9962, "step": 3964 }, { "epoch": 0.8060581418987599, "grad_norm": 0.13157734274864197, "learning_rate": 0.00011947523644869319, "loss": 1.0938, "step": 3965 }, { "epoch": 0.8062614352510673, "grad_norm": 0.1234944686293602, "learning_rate": 0.0001194548967761619, "loss": 0.962, "step": 3966 }, { "epoch": 0.8064647286033747, "grad_norm": 0.1396668702363968, "learning_rate": 0.00011943455710363062, "loss": 1.1383, "step": 3967 }, { "epoch": 0.8066680219556821, "grad_norm": 0.14107537269592285, "learning_rate": 0.00011941421743109938, "loss": 0.9585, "step": 3968 }, { "epoch": 0.8068713153079894, "grad_norm": 0.14701543748378754, "learning_rate": 0.0001193938777585681, "loss": 1.1863, "step": 3969 }, { "epoch": 0.8070746086602968, "grad_norm": 0.13169316947460175, "learning_rate": 0.00011937353808603681, "loss": 1.1441, "step": 3970 }, { "epoch": 0.8072779020126042, "grad_norm": 0.1322471648454666, "learning_rate": 0.00011935319841350554, "loss": 1.0807, "step": 3971 }, { "epoch": 0.8074811953649116, "grad_norm": 0.1394157111644745, "learning_rate": 0.00011933285874097429, "loss": 1.1423, "step": 3972 }, { "epoch": 0.807684488717219, "grad_norm": 0.13044828176498413, "learning_rate": 0.00011931251906844301, "loss": 1.0136, "step": 3973 }, { "epoch": 0.8078877820695263, "grad_norm": 0.14347662031650543, "learning_rate": 0.00011929217939591174, "loss": 1.0989, "step": 3974 }, { "epoch": 0.8080910754218337, "grad_norm": 0.13873308897018433, "learning_rate": 0.00011927183972338045, "loss": 1.0194, "step": 3975 }, { "epoch": 0.8082943687741411, "grad_norm": 0.1266692876815796, "learning_rate": 0.0001192515000508492, "loss": 1.0114, "step": 3976 }, { "epoch": 0.8084976621264485, "grad_norm": 0.13810694217681885, "learning_rate": 0.00011923116037831792, "loss": 1.0433, "step": 3977 }, { "epoch": 0.8087009554787559, "grad_norm": 0.14580698311328888, "learning_rate": 0.00011921082070578665, "loss": 1.1527, "step": 3978 }, { "epoch": 0.8089042488310633, "grad_norm": 0.14972800016403198, "learning_rate": 0.00011919048103325536, "loss": 1.2265, "step": 3979 }, { "epoch": 0.8091075421833706, "grad_norm": 0.14041751623153687, "learning_rate": 0.00011917014136072411, "loss": 1.0299, "step": 3980 }, { "epoch": 0.809310835535678, "grad_norm": 0.14178511500358582, "learning_rate": 0.00011914980168819283, "loss": 1.1151, "step": 3981 }, { "epoch": 0.8095141288879854, "grad_norm": 0.14620938897132874, "learning_rate": 0.00011912946201566156, "loss": 1.1133, "step": 3982 }, { "epoch": 0.8097174222402928, "grad_norm": 0.14342685043811798, "learning_rate": 0.00011910912234313027, "loss": 0.9168, "step": 3983 }, { "epoch": 0.8099207155926001, "grad_norm": 0.1431896686553955, "learning_rate": 0.00011908878267059902, "loss": 1.1063, "step": 3984 }, { "epoch": 0.8101240089449075, "grad_norm": 0.13744968175888062, "learning_rate": 0.00011906844299806775, "loss": 1.0406, "step": 3985 }, { "epoch": 0.8103273022972148, "grad_norm": 0.1373889297246933, "learning_rate": 0.00011904810332553647, "loss": 1.0499, "step": 3986 }, { "epoch": 0.8105305956495222, "grad_norm": 0.13776156306266785, "learning_rate": 0.00011902776365300518, "loss": 1.0377, "step": 3987 }, { "epoch": 0.8107338890018296, "grad_norm": 0.13153620064258575, "learning_rate": 0.00011900742398047393, "loss": 0.995, "step": 3988 }, { "epoch": 0.810937182354137, "grad_norm": 0.153978168964386, "learning_rate": 0.00011898708430794266, "loss": 1.1654, "step": 3989 }, { "epoch": 0.8111404757064444, "grad_norm": 0.14656215906143188, "learning_rate": 0.00011896674463541138, "loss": 0.9426, "step": 3990 }, { "epoch": 0.8113437690587518, "grad_norm": 0.13378344476222992, "learning_rate": 0.00011894640496288009, "loss": 0.9992, "step": 3991 }, { "epoch": 0.8115470624110591, "grad_norm": 0.14194630086421967, "learning_rate": 0.00011892606529034882, "loss": 0.9897, "step": 3992 }, { "epoch": 0.8117503557633665, "grad_norm": 0.12744341790676117, "learning_rate": 0.00011890572561781757, "loss": 1.0271, "step": 3993 }, { "epoch": 0.8119536491156739, "grad_norm": 0.12593428790569305, "learning_rate": 0.0001188853859452863, "loss": 0.8918, "step": 3994 }, { "epoch": 0.8121569424679813, "grad_norm": 0.1353382021188736, "learning_rate": 0.000118865046272755, "loss": 0.9951, "step": 3995 }, { "epoch": 0.8123602358202887, "grad_norm": 0.13705074787139893, "learning_rate": 0.00011884470660022373, "loss": 0.9689, "step": 3996 }, { "epoch": 0.8125635291725961, "grad_norm": 0.1512332260608673, "learning_rate": 0.00011882436692769248, "loss": 1.1077, "step": 3997 }, { "epoch": 0.8127668225249034, "grad_norm": 0.1288524866104126, "learning_rate": 0.0001188040272551612, "loss": 1.0678, "step": 3998 }, { "epoch": 0.8129701158772108, "grad_norm": 0.13065043091773987, "learning_rate": 0.00011878368758262992, "loss": 1.0389, "step": 3999 }, { "epoch": 0.8131734092295182, "grad_norm": 0.14276419579982758, "learning_rate": 0.00011876334791009864, "loss": 1.0811, "step": 4000 }, { "epoch": 0.8133767025818256, "grad_norm": 0.12206871807575226, "learning_rate": 0.00011874300823756739, "loss": 0.8628, "step": 4001 }, { "epoch": 0.813579995934133, "grad_norm": 0.13889804482460022, "learning_rate": 0.00011872266856503612, "loss": 1.026, "step": 4002 }, { "epoch": 0.8137832892864403, "grad_norm": 0.13615025579929352, "learning_rate": 0.00011870232889250483, "loss": 1.0019, "step": 4003 }, { "epoch": 0.8139865826387477, "grad_norm": 0.12381166964769363, "learning_rate": 0.00011868198921997355, "loss": 0.9913, "step": 4004 }, { "epoch": 0.8141898759910551, "grad_norm": 0.13503628969192505, "learning_rate": 0.0001186616495474423, "loss": 1.0766, "step": 4005 }, { "epoch": 0.8143931693433625, "grad_norm": 0.143154576420784, "learning_rate": 0.00011864130987491103, "loss": 1.1219, "step": 4006 }, { "epoch": 0.8145964626956699, "grad_norm": 0.15656810998916626, "learning_rate": 0.00011862097020237974, "loss": 1.193, "step": 4007 }, { "epoch": 0.8147997560479773, "grad_norm": 0.14031293988227844, "learning_rate": 0.00011860063052984846, "loss": 0.9923, "step": 4008 }, { "epoch": 0.8150030494002846, "grad_norm": 0.13734276592731476, "learning_rate": 0.00011858029085731721, "loss": 1.0802, "step": 4009 }, { "epoch": 0.815206342752592, "grad_norm": 0.1422613561153412, "learning_rate": 0.00011855995118478594, "loss": 1.1129, "step": 4010 }, { "epoch": 0.8154096361048994, "grad_norm": 0.12899209558963776, "learning_rate": 0.00011853961151225465, "loss": 0.8745, "step": 4011 }, { "epoch": 0.8156129294572068, "grad_norm": 0.1424800455570221, "learning_rate": 0.00011851927183972337, "loss": 1.1192, "step": 4012 }, { "epoch": 0.8158162228095142, "grad_norm": 0.12653465569019318, "learning_rate": 0.00011849893216719213, "loss": 1.0847, "step": 4013 }, { "epoch": 0.8160195161618216, "grad_norm": 0.13867273926734924, "learning_rate": 0.00011847859249466085, "loss": 1.104, "step": 4014 }, { "epoch": 0.8162228095141288, "grad_norm": 0.12459316849708557, "learning_rate": 0.00011845825282212956, "loss": 0.9407, "step": 4015 }, { "epoch": 0.8164261028664362, "grad_norm": 0.11136915534734726, "learning_rate": 0.00011843791314959829, "loss": 0.8423, "step": 4016 }, { "epoch": 0.8166293962187436, "grad_norm": 0.13761533796787262, "learning_rate": 0.00011841757347706704, "loss": 1.0824, "step": 4017 }, { "epoch": 0.816832689571051, "grad_norm": 0.12137118726968765, "learning_rate": 0.00011839723380453576, "loss": 0.8622, "step": 4018 }, { "epoch": 0.8170359829233584, "grad_norm": 0.13748018443584442, "learning_rate": 0.00011837689413200447, "loss": 1.0754, "step": 4019 }, { "epoch": 0.8172392762756658, "grad_norm": 0.13673032820224762, "learning_rate": 0.0001183565544594732, "loss": 1.1766, "step": 4020 }, { "epoch": 0.8174425696279731, "grad_norm": 0.13649223744869232, "learning_rate": 0.00011833621478694195, "loss": 1.0473, "step": 4021 }, { "epoch": 0.8176458629802805, "grad_norm": 0.1405959576368332, "learning_rate": 0.00011831587511441067, "loss": 1.1525, "step": 4022 }, { "epoch": 0.8178491563325879, "grad_norm": 0.13079403340816498, "learning_rate": 0.00011829553544187938, "loss": 1.0007, "step": 4023 }, { "epoch": 0.8180524496848953, "grad_norm": 0.1415160447359085, "learning_rate": 0.00011827519576934811, "loss": 1.1784, "step": 4024 }, { "epoch": 0.8182557430372027, "grad_norm": 0.15281791985034943, "learning_rate": 0.00011825485609681686, "loss": 1.2122, "step": 4025 }, { "epoch": 0.81845903638951, "grad_norm": 0.138424813747406, "learning_rate": 0.00011823451642428558, "loss": 1.171, "step": 4026 }, { "epoch": 0.8186623297418174, "grad_norm": 0.13693661987781525, "learning_rate": 0.0001182141767517543, "loss": 1.1098, "step": 4027 }, { "epoch": 0.8188656230941248, "grad_norm": 0.13167519867420197, "learning_rate": 0.00011819383707922302, "loss": 0.9446, "step": 4028 }, { "epoch": 0.8190689164464322, "grad_norm": 0.13005776703357697, "learning_rate": 0.00011817349740669177, "loss": 0.9349, "step": 4029 }, { "epoch": 0.8192722097987396, "grad_norm": 0.1340433955192566, "learning_rate": 0.0001181531577341605, "loss": 1.1293, "step": 4030 }, { "epoch": 0.819475503151047, "grad_norm": 0.13463421165943146, "learning_rate": 0.0001181328180616292, "loss": 0.995, "step": 4031 }, { "epoch": 0.8196787965033543, "grad_norm": 0.13709862530231476, "learning_rate": 0.00011811247838909793, "loss": 1.0882, "step": 4032 }, { "epoch": 0.8198820898556617, "grad_norm": 0.1316375732421875, "learning_rate": 0.00011809213871656666, "loss": 1.1652, "step": 4033 }, { "epoch": 0.8200853832079691, "grad_norm": 0.12954191863536835, "learning_rate": 0.00011807179904403541, "loss": 1.0258, "step": 4034 }, { "epoch": 0.8202886765602765, "grad_norm": 0.15265285968780518, "learning_rate": 0.00011805145937150413, "loss": 1.162, "step": 4035 }, { "epoch": 0.8204919699125839, "grad_norm": 0.14315763115882874, "learning_rate": 0.00011803111969897284, "loss": 1.1478, "step": 4036 }, { "epoch": 0.8206952632648913, "grad_norm": 0.1538948267698288, "learning_rate": 0.00011801078002644157, "loss": 1.2476, "step": 4037 }, { "epoch": 0.8208985566171986, "grad_norm": 0.13834591209888458, "learning_rate": 0.00011799044035391032, "loss": 1.0098, "step": 4038 }, { "epoch": 0.821101849969506, "grad_norm": 0.12419674545526505, "learning_rate": 0.00011797010068137904, "loss": 0.9885, "step": 4039 }, { "epoch": 0.8213051433218134, "grad_norm": 0.12338082492351532, "learning_rate": 0.00011794976100884775, "loss": 0.8725, "step": 4040 }, { "epoch": 0.8215084366741208, "grad_norm": 0.12143974751234055, "learning_rate": 0.00011792942133631648, "loss": 0.8032, "step": 4041 }, { "epoch": 0.8217117300264282, "grad_norm": 0.11844722181558609, "learning_rate": 0.00011790908166378523, "loss": 0.9635, "step": 4042 }, { "epoch": 0.8219150233787356, "grad_norm": 0.12964794039726257, "learning_rate": 0.00011788874199125395, "loss": 0.9951, "step": 4043 }, { "epoch": 0.8221183167310429, "grad_norm": 0.11839304864406586, "learning_rate": 0.00011786840231872267, "loss": 0.9279, "step": 4044 }, { "epoch": 0.8223216100833503, "grad_norm": 0.1318419724702835, "learning_rate": 0.00011784806264619139, "loss": 0.9903, "step": 4045 }, { "epoch": 0.8225249034356577, "grad_norm": 0.12436816096305847, "learning_rate": 0.00011782772297366014, "loss": 0.9732, "step": 4046 }, { "epoch": 0.822728196787965, "grad_norm": 0.14113591611385345, "learning_rate": 0.00011780738330112887, "loss": 1.1141, "step": 4047 }, { "epoch": 0.8229314901402724, "grad_norm": 0.12076539546251297, "learning_rate": 0.00011778704362859758, "loss": 0.9827, "step": 4048 }, { "epoch": 0.8231347834925798, "grad_norm": 0.13398289680480957, "learning_rate": 0.0001177667039560663, "loss": 1.0446, "step": 4049 }, { "epoch": 0.8233380768448871, "grad_norm": 0.14761167764663696, "learning_rate": 0.00011774636428353505, "loss": 1.1347, "step": 4050 }, { "epoch": 0.8235413701971945, "grad_norm": 0.13124649226665497, "learning_rate": 0.00011772602461100378, "loss": 1.1484, "step": 4051 }, { "epoch": 0.8237446635495019, "grad_norm": 0.1205100268125534, "learning_rate": 0.00011770568493847249, "loss": 0.8889, "step": 4052 }, { "epoch": 0.8239479569018093, "grad_norm": 0.14440268278121948, "learning_rate": 0.00011768534526594121, "loss": 1.0888, "step": 4053 }, { "epoch": 0.8241512502541167, "grad_norm": 0.12991003692150116, "learning_rate": 0.00011766500559340996, "loss": 1.1111, "step": 4054 }, { "epoch": 0.824354543606424, "grad_norm": 0.13993045687675476, "learning_rate": 0.00011764466592087869, "loss": 1.1363, "step": 4055 }, { "epoch": 0.8245578369587314, "grad_norm": 0.14084355533123016, "learning_rate": 0.0001176243262483474, "loss": 1.2527, "step": 4056 }, { "epoch": 0.8247611303110388, "grad_norm": 0.13060720264911652, "learning_rate": 0.00011760398657581612, "loss": 0.9782, "step": 4057 }, { "epoch": 0.8249644236633462, "grad_norm": 0.13368849456310272, "learning_rate": 0.00011758364690328488, "loss": 1.0588, "step": 4058 }, { "epoch": 0.8251677170156536, "grad_norm": 0.1443461924791336, "learning_rate": 0.0001175633072307536, "loss": 1.0314, "step": 4059 }, { "epoch": 0.825371010367961, "grad_norm": 0.1611374020576477, "learning_rate": 0.00011754296755822231, "loss": 1.0414, "step": 4060 }, { "epoch": 0.8255743037202683, "grad_norm": 0.13659845292568207, "learning_rate": 0.00011752262788569104, "loss": 0.9511, "step": 4061 }, { "epoch": 0.8257775970725757, "grad_norm": 0.13321594893932343, "learning_rate": 0.00011750228821315979, "loss": 1.1095, "step": 4062 }, { "epoch": 0.8259808904248831, "grad_norm": 0.13120754063129425, "learning_rate": 0.00011748194854062851, "loss": 1.011, "step": 4063 }, { "epoch": 0.8261841837771905, "grad_norm": 0.12870921194553375, "learning_rate": 0.00011746160886809722, "loss": 1.0436, "step": 4064 }, { "epoch": 0.8263874771294979, "grad_norm": 0.14104719460010529, "learning_rate": 0.00011744126919556595, "loss": 1.1661, "step": 4065 }, { "epoch": 0.8265907704818053, "grad_norm": 0.12897245585918427, "learning_rate": 0.0001174209295230347, "loss": 0.9585, "step": 4066 }, { "epoch": 0.8267940638341126, "grad_norm": 0.13888487219810486, "learning_rate": 0.00011740058985050342, "loss": 1.1015, "step": 4067 }, { "epoch": 0.82699735718642, "grad_norm": 0.14110806584358215, "learning_rate": 0.00011738025017797213, "loss": 1.2207, "step": 4068 }, { "epoch": 0.8272006505387274, "grad_norm": 0.14423434436321259, "learning_rate": 0.00011735991050544086, "loss": 1.1922, "step": 4069 }, { "epoch": 0.8274039438910348, "grad_norm": 0.12947557866573334, "learning_rate": 0.00011733957083290961, "loss": 1.0836, "step": 4070 }, { "epoch": 0.8276072372433422, "grad_norm": 0.12978830933570862, "learning_rate": 0.00011731923116037833, "loss": 1.0118, "step": 4071 }, { "epoch": 0.8278105305956496, "grad_norm": 0.1388140469789505, "learning_rate": 0.00011729889148784705, "loss": 1.0495, "step": 4072 }, { "epoch": 0.8280138239479569, "grad_norm": 0.13801120221614838, "learning_rate": 0.00011727855181531577, "loss": 1.2402, "step": 4073 }, { "epoch": 0.8282171173002643, "grad_norm": 0.12440764904022217, "learning_rate": 0.0001172582121427845, "loss": 0.9277, "step": 4074 }, { "epoch": 0.8284204106525717, "grad_norm": 0.14315354824066162, "learning_rate": 0.00011723787247025325, "loss": 1.0844, "step": 4075 }, { "epoch": 0.8286237040048791, "grad_norm": 0.15074683725833893, "learning_rate": 0.00011721753279772196, "loss": 1.2032, "step": 4076 }, { "epoch": 0.8288269973571865, "grad_norm": 0.14017608761787415, "learning_rate": 0.00011719719312519068, "loss": 1.1352, "step": 4077 }, { "epoch": 0.8290302907094937, "grad_norm": 0.1432233303785324, "learning_rate": 0.0001171768534526594, "loss": 1.0653, "step": 4078 }, { "epoch": 0.8292335840618011, "grad_norm": 0.14064320921897888, "learning_rate": 0.00011715651378012816, "loss": 1.0208, "step": 4079 }, { "epoch": 0.8294368774141085, "grad_norm": 0.13602322340011597, "learning_rate": 0.00011713617410759687, "loss": 0.9782, "step": 4080 }, { "epoch": 0.8296401707664159, "grad_norm": 0.14761172235012054, "learning_rate": 0.00011711583443506559, "loss": 1.1826, "step": 4081 }, { "epoch": 0.8298434641187233, "grad_norm": 0.14076586067676544, "learning_rate": 0.00011709549476253432, "loss": 1.0883, "step": 4082 }, { "epoch": 0.8300467574710307, "grad_norm": 0.1385519951581955, "learning_rate": 0.00011707515509000307, "loss": 1.0841, "step": 4083 }, { "epoch": 0.830250050823338, "grad_norm": 0.1392289251089096, "learning_rate": 0.00011705481541747178, "loss": 1.1386, "step": 4084 }, { "epoch": 0.8304533441756454, "grad_norm": 0.14094628393650055, "learning_rate": 0.0001170344757449405, "loss": 0.983, "step": 4085 }, { "epoch": 0.8306566375279528, "grad_norm": 0.13715529441833496, "learning_rate": 0.00011701413607240923, "loss": 1.089, "step": 4086 }, { "epoch": 0.8308599308802602, "grad_norm": 0.13627447187900543, "learning_rate": 0.00011699379639987798, "loss": 1.2017, "step": 4087 }, { "epoch": 0.8310632242325676, "grad_norm": 0.12237659841775894, "learning_rate": 0.00011697345672734669, "loss": 0.9606, "step": 4088 }, { "epoch": 0.831266517584875, "grad_norm": 0.12152927368879318, "learning_rate": 0.00011695311705481542, "loss": 1.0262, "step": 4089 }, { "epoch": 0.8314698109371823, "grad_norm": 0.13828657567501068, "learning_rate": 0.00011693277738228414, "loss": 1.0506, "step": 4090 }, { "epoch": 0.8316731042894897, "grad_norm": 0.13322405517101288, "learning_rate": 0.00011691243770975289, "loss": 0.9554, "step": 4091 }, { "epoch": 0.8318763976417971, "grad_norm": 0.1526733636856079, "learning_rate": 0.00011689209803722162, "loss": 1.2395, "step": 4092 }, { "epoch": 0.8320796909941045, "grad_norm": 0.12804892659187317, "learning_rate": 0.00011687175836469033, "loss": 1.0469, "step": 4093 }, { "epoch": 0.8322829843464119, "grad_norm": 0.13679049909114838, "learning_rate": 0.00011685141869215905, "loss": 1.069, "step": 4094 }, { "epoch": 0.8324862776987193, "grad_norm": 0.15435020625591278, "learning_rate": 0.0001168310790196278, "loss": 1.2415, "step": 4095 }, { "epoch": 0.8326895710510266, "grad_norm": 0.13897407054901123, "learning_rate": 0.00011681073934709653, "loss": 1.1871, "step": 4096 }, { "epoch": 0.832892864403334, "grad_norm": 0.12856152653694153, "learning_rate": 0.00011679039967456524, "loss": 1.0499, "step": 4097 }, { "epoch": 0.8330961577556414, "grad_norm": 0.12705758213996887, "learning_rate": 0.00011677006000203396, "loss": 1.0097, "step": 4098 }, { "epoch": 0.8332994511079488, "grad_norm": 0.1295822411775589, "learning_rate": 0.00011674972032950271, "loss": 0.8506, "step": 4099 }, { "epoch": 0.8335027444602562, "grad_norm": 0.13831810653209686, "learning_rate": 0.00011672938065697144, "loss": 1.1784, "step": 4100 }, { "epoch": 0.8337060378125636, "grad_norm": 0.13451896607875824, "learning_rate": 0.00011670904098444015, "loss": 1.1079, "step": 4101 }, { "epoch": 0.8339093311648709, "grad_norm": 0.12430407106876373, "learning_rate": 0.00011668870131190887, "loss": 0.8755, "step": 4102 }, { "epoch": 0.8341126245171783, "grad_norm": 0.13857564330101013, "learning_rate": 0.00011666836163937763, "loss": 1.0993, "step": 4103 }, { "epoch": 0.8343159178694857, "grad_norm": 0.14329898357391357, "learning_rate": 0.00011664802196684635, "loss": 1.2359, "step": 4104 }, { "epoch": 0.8345192112217931, "grad_norm": 0.14642906188964844, "learning_rate": 0.00011662768229431506, "loss": 1.1454, "step": 4105 }, { "epoch": 0.8347225045741005, "grad_norm": 0.1252523809671402, "learning_rate": 0.00011660734262178379, "loss": 1.0017, "step": 4106 }, { "epoch": 0.8349257979264078, "grad_norm": 0.12852495908737183, "learning_rate": 0.00011658700294925254, "loss": 1.0273, "step": 4107 }, { "epoch": 0.8351290912787152, "grad_norm": 0.12575671076774597, "learning_rate": 0.00011656666327672126, "loss": 0.9608, "step": 4108 }, { "epoch": 0.8353323846310226, "grad_norm": 0.15008383989334106, "learning_rate": 0.00011654632360418997, "loss": 1.1065, "step": 4109 }, { "epoch": 0.83553567798333, "grad_norm": 0.14088520407676697, "learning_rate": 0.0001165259839316587, "loss": 1.1224, "step": 4110 }, { "epoch": 0.8357389713356373, "grad_norm": 0.12474369257688522, "learning_rate": 0.00011650564425912745, "loss": 0.9834, "step": 4111 }, { "epoch": 0.8359422646879447, "grad_norm": 0.1329812854528427, "learning_rate": 0.00011648530458659617, "loss": 1.0876, "step": 4112 }, { "epoch": 0.836145558040252, "grad_norm": 0.12517108023166656, "learning_rate": 0.00011646496491406488, "loss": 0.9772, "step": 4113 }, { "epoch": 0.8363488513925594, "grad_norm": 0.13506385684013367, "learning_rate": 0.00011644462524153361, "loss": 1.0004, "step": 4114 }, { "epoch": 0.8365521447448668, "grad_norm": 0.13172465562820435, "learning_rate": 0.00011642428556900233, "loss": 1.0687, "step": 4115 }, { "epoch": 0.8367554380971742, "grad_norm": 0.13177163898944855, "learning_rate": 0.00011640394589647108, "loss": 1.0225, "step": 4116 }, { "epoch": 0.8369587314494816, "grad_norm": 0.1380792260169983, "learning_rate": 0.0001163836062239398, "loss": 1.0243, "step": 4117 }, { "epoch": 0.837162024801789, "grad_norm": 0.13012027740478516, "learning_rate": 0.00011636326655140852, "loss": 0.9618, "step": 4118 }, { "epoch": 0.8373653181540963, "grad_norm": 0.13312657177448273, "learning_rate": 0.00011634292687887724, "loss": 0.9299, "step": 4119 }, { "epoch": 0.8375686115064037, "grad_norm": 0.1449914276599884, "learning_rate": 0.000116322587206346, "loss": 1.1712, "step": 4120 }, { "epoch": 0.8377719048587111, "grad_norm": 0.14391463994979858, "learning_rate": 0.0001163022475338147, "loss": 1.2165, "step": 4121 }, { "epoch": 0.8379751982110185, "grad_norm": 0.14427267014980316, "learning_rate": 0.00011628190786128343, "loss": 1.1486, "step": 4122 }, { "epoch": 0.8381784915633259, "grad_norm": 0.15920564532279968, "learning_rate": 0.00011626156818875216, "loss": 1.3503, "step": 4123 }, { "epoch": 0.8383817849156333, "grad_norm": 0.14215265214443207, "learning_rate": 0.00011624122851622091, "loss": 1.0607, "step": 4124 }, { "epoch": 0.8385850782679406, "grad_norm": 0.12936022877693176, "learning_rate": 0.00011622088884368962, "loss": 0.9739, "step": 4125 }, { "epoch": 0.838788371620248, "grad_norm": 0.13270482420921326, "learning_rate": 0.00011620054917115834, "loss": 1.006, "step": 4126 }, { "epoch": 0.8389916649725554, "grad_norm": 0.13230706751346588, "learning_rate": 0.00011618020949862707, "loss": 1.0016, "step": 4127 }, { "epoch": 0.8391949583248628, "grad_norm": 0.1272687166929245, "learning_rate": 0.00011615986982609582, "loss": 0.8975, "step": 4128 }, { "epoch": 0.8393982516771702, "grad_norm": 0.13361401855945587, "learning_rate": 0.00011613953015356453, "loss": 0.9294, "step": 4129 }, { "epoch": 0.8396015450294775, "grad_norm": 0.12465297430753708, "learning_rate": 0.00011611919048103325, "loss": 0.8893, "step": 4130 }, { "epoch": 0.8398048383817849, "grad_norm": 0.1458294540643692, "learning_rate": 0.00011609885080850198, "loss": 1.1928, "step": 4131 }, { "epoch": 0.8400081317340923, "grad_norm": 0.1325213760137558, "learning_rate": 0.00011607851113597073, "loss": 0.9254, "step": 4132 }, { "epoch": 0.8402114250863997, "grad_norm": 0.1297135353088379, "learning_rate": 0.00011605817146343944, "loss": 0.8737, "step": 4133 }, { "epoch": 0.8404147184387071, "grad_norm": 0.1350976824760437, "learning_rate": 0.00011603783179090817, "loss": 1.1101, "step": 4134 }, { "epoch": 0.8406180117910145, "grad_norm": 0.13058003783226013, "learning_rate": 0.00011601749211837689, "loss": 1.0, "step": 4135 }, { "epoch": 0.8408213051433218, "grad_norm": 0.13314960896968842, "learning_rate": 0.00011599715244584564, "loss": 0.9872, "step": 4136 }, { "epoch": 0.8410245984956292, "grad_norm": 0.12905332446098328, "learning_rate": 0.00011597681277331435, "loss": 1.0097, "step": 4137 }, { "epoch": 0.8412278918479366, "grad_norm": 0.12162060290575027, "learning_rate": 0.00011595647310078308, "loss": 0.8788, "step": 4138 }, { "epoch": 0.841431185200244, "grad_norm": 0.12525275349617004, "learning_rate": 0.0001159361334282518, "loss": 0.8742, "step": 4139 }, { "epoch": 0.8416344785525514, "grad_norm": 0.13911886513233185, "learning_rate": 0.00011591579375572055, "loss": 1.3075, "step": 4140 }, { "epoch": 0.8418377719048588, "grad_norm": 0.13579173386096954, "learning_rate": 0.00011589545408318926, "loss": 1.0646, "step": 4141 }, { "epoch": 0.842041065257166, "grad_norm": 0.14429797232151031, "learning_rate": 0.00011587511441065799, "loss": 1.1266, "step": 4142 }, { "epoch": 0.8422443586094734, "grad_norm": 0.11546068638563156, "learning_rate": 0.00011585477473812671, "loss": 0.8928, "step": 4143 }, { "epoch": 0.8424476519617808, "grad_norm": 0.14215877652168274, "learning_rate": 0.00011583443506559546, "loss": 1.1816, "step": 4144 }, { "epoch": 0.8426509453140882, "grad_norm": 0.12982290983200073, "learning_rate": 0.00011581409539306417, "loss": 0.9241, "step": 4145 }, { "epoch": 0.8428542386663956, "grad_norm": 0.13759194314479828, "learning_rate": 0.0001157937557205329, "loss": 1.0254, "step": 4146 }, { "epoch": 0.843057532018703, "grad_norm": 0.13671040534973145, "learning_rate": 0.00011577341604800162, "loss": 1.1823, "step": 4147 }, { "epoch": 0.8432608253710103, "grad_norm": 0.11964955925941467, "learning_rate": 0.00011575307637547038, "loss": 0.99, "step": 4148 }, { "epoch": 0.8434641187233177, "grad_norm": 0.1388668566942215, "learning_rate": 0.0001157327367029391, "loss": 1.0667, "step": 4149 }, { "epoch": 0.8436674120756251, "grad_norm": 0.12363268435001373, "learning_rate": 0.00011571239703040781, "loss": 1.0156, "step": 4150 }, { "epoch": 0.8438707054279325, "grad_norm": 0.14275164902210236, "learning_rate": 0.00011569205735787654, "loss": 1.2653, "step": 4151 }, { "epoch": 0.8440739987802399, "grad_norm": 0.13494303822517395, "learning_rate": 0.00011567171768534529, "loss": 1.148, "step": 4152 }, { "epoch": 0.8442772921325473, "grad_norm": 0.12230674922466278, "learning_rate": 0.00011565137801281401, "loss": 0.9444, "step": 4153 }, { "epoch": 0.8444805854848546, "grad_norm": 0.12223172187805176, "learning_rate": 0.00011563103834028272, "loss": 1.0189, "step": 4154 }, { "epoch": 0.844683878837162, "grad_norm": 0.12231465429067612, "learning_rate": 0.00011561069866775145, "loss": 0.9234, "step": 4155 }, { "epoch": 0.8448871721894694, "grad_norm": 0.13856825232505798, "learning_rate": 0.00011559035899522017, "loss": 1.0455, "step": 4156 }, { "epoch": 0.8450904655417768, "grad_norm": 0.12258224934339523, "learning_rate": 0.00011557001932268892, "loss": 0.8723, "step": 4157 }, { "epoch": 0.8452937588940842, "grad_norm": 0.13906900584697723, "learning_rate": 0.00011554967965015763, "loss": 1.0314, "step": 4158 }, { "epoch": 0.8454970522463915, "grad_norm": 0.13625988364219666, "learning_rate": 0.00011552933997762636, "loss": 1.0259, "step": 4159 }, { "epoch": 0.8457003455986989, "grad_norm": 0.13316601514816284, "learning_rate": 0.00011550900030509508, "loss": 0.9816, "step": 4160 }, { "epoch": 0.8459036389510063, "grad_norm": 0.1430322825908661, "learning_rate": 0.00011548866063256383, "loss": 1.0748, "step": 4161 }, { "epoch": 0.8461069323033137, "grad_norm": 0.14025886356830597, "learning_rate": 0.00011546832096003254, "loss": 1.0884, "step": 4162 }, { "epoch": 0.8463102256556211, "grad_norm": 0.12264370173215866, "learning_rate": 0.00011544798128750127, "loss": 0.844, "step": 4163 }, { "epoch": 0.8465135190079285, "grad_norm": 0.14598575234413147, "learning_rate": 0.00011542764161497, "loss": 1.1999, "step": 4164 }, { "epoch": 0.8467168123602358, "grad_norm": 0.139155313372612, "learning_rate": 0.00011540730194243875, "loss": 1.0555, "step": 4165 }, { "epoch": 0.8469201057125432, "grad_norm": 0.13013023138046265, "learning_rate": 0.00011538696226990746, "loss": 1.0511, "step": 4166 }, { "epoch": 0.8471233990648506, "grad_norm": 0.13424082100391388, "learning_rate": 0.00011536662259737618, "loss": 1.0551, "step": 4167 }, { "epoch": 0.847326692417158, "grad_norm": 0.14205624163150787, "learning_rate": 0.0001153462829248449, "loss": 1.1582, "step": 4168 }, { "epoch": 0.8475299857694654, "grad_norm": 0.13570645451545715, "learning_rate": 0.00011532594325231366, "loss": 1.0911, "step": 4169 }, { "epoch": 0.8477332791217728, "grad_norm": 0.1343654841184616, "learning_rate": 0.00011530560357978237, "loss": 1.0519, "step": 4170 }, { "epoch": 0.84793657247408, "grad_norm": 0.12332738190889359, "learning_rate": 0.00011528526390725109, "loss": 0.9967, "step": 4171 }, { "epoch": 0.8481398658263875, "grad_norm": 0.13061444461345673, "learning_rate": 0.00011526492423471982, "loss": 0.998, "step": 4172 }, { "epoch": 0.8483431591786949, "grad_norm": 0.11777007579803467, "learning_rate": 0.00011524458456218857, "loss": 0.8942, "step": 4173 }, { "epoch": 0.8485464525310022, "grad_norm": 0.13091976940631866, "learning_rate": 0.00011522424488965728, "loss": 1.0612, "step": 4174 }, { "epoch": 0.8487497458833096, "grad_norm": 0.13466595113277435, "learning_rate": 0.000115203905217126, "loss": 1.0211, "step": 4175 }, { "epoch": 0.848953039235617, "grad_norm": 0.12775756418704987, "learning_rate": 0.00011518356554459473, "loss": 0.9925, "step": 4176 }, { "epoch": 0.8491563325879243, "grad_norm": 0.144356831908226, "learning_rate": 0.00011516322587206348, "loss": 1.1657, "step": 4177 }, { "epoch": 0.8493596259402317, "grad_norm": 0.1248125433921814, "learning_rate": 0.00011514288619953219, "loss": 0.9492, "step": 4178 }, { "epoch": 0.8495629192925391, "grad_norm": 0.13141238689422607, "learning_rate": 0.00011512254652700091, "loss": 0.9265, "step": 4179 }, { "epoch": 0.8497662126448465, "grad_norm": 0.13851980865001678, "learning_rate": 0.00011510220685446964, "loss": 1.1475, "step": 4180 }, { "epoch": 0.8499695059971539, "grad_norm": 0.13344109058380127, "learning_rate": 0.00011508186718193839, "loss": 1.0393, "step": 4181 }, { "epoch": 0.8501727993494613, "grad_norm": 0.15251140296459198, "learning_rate": 0.0001150615275094071, "loss": 1.1769, "step": 4182 }, { "epoch": 0.8503760927017686, "grad_norm": 0.1376708298921585, "learning_rate": 0.00011504118783687583, "loss": 1.1571, "step": 4183 }, { "epoch": 0.850579386054076, "grad_norm": 0.1312796026468277, "learning_rate": 0.00011502084816434455, "loss": 1.0176, "step": 4184 }, { "epoch": 0.8507826794063834, "grad_norm": 0.13133344054222107, "learning_rate": 0.0001150005084918133, "loss": 1.1126, "step": 4185 }, { "epoch": 0.8509859727586908, "grad_norm": 0.1404520869255066, "learning_rate": 0.00011498016881928201, "loss": 1.0467, "step": 4186 }, { "epoch": 0.8511892661109982, "grad_norm": 0.13041868805885315, "learning_rate": 0.00011495982914675074, "loss": 0.9282, "step": 4187 }, { "epoch": 0.8513925594633055, "grad_norm": 0.1341453641653061, "learning_rate": 0.00011493948947421946, "loss": 0.9568, "step": 4188 }, { "epoch": 0.8515958528156129, "grad_norm": 0.13047213852405548, "learning_rate": 0.00011491914980168821, "loss": 0.9724, "step": 4189 }, { "epoch": 0.8517991461679203, "grad_norm": 0.12841585278511047, "learning_rate": 0.00011489881012915692, "loss": 0.9021, "step": 4190 }, { "epoch": 0.8520024395202277, "grad_norm": 0.1475822478532791, "learning_rate": 0.00011487847045662565, "loss": 1.1875, "step": 4191 }, { "epoch": 0.8522057328725351, "grad_norm": 0.12445596605539322, "learning_rate": 0.00011485813078409437, "loss": 1.0242, "step": 4192 }, { "epoch": 0.8524090262248425, "grad_norm": 0.12460153549909592, "learning_rate": 0.00011483779111156313, "loss": 1.048, "step": 4193 }, { "epoch": 0.8526123195771498, "grad_norm": 0.1400919258594513, "learning_rate": 0.00011481745143903184, "loss": 1.0866, "step": 4194 }, { "epoch": 0.8528156129294572, "grad_norm": 0.13624945282936096, "learning_rate": 0.00011479711176650056, "loss": 1.0692, "step": 4195 }, { "epoch": 0.8530189062817646, "grad_norm": 0.13561497628688812, "learning_rate": 0.00011477677209396928, "loss": 1.1576, "step": 4196 }, { "epoch": 0.853222199634072, "grad_norm": 0.13115760684013367, "learning_rate": 0.00011475643242143801, "loss": 1.0431, "step": 4197 }, { "epoch": 0.8534254929863794, "grad_norm": 0.14327464997768402, "learning_rate": 0.00011473609274890675, "loss": 1.1245, "step": 4198 }, { "epoch": 0.8536287863386868, "grad_norm": 0.14745375514030457, "learning_rate": 0.00011471575307637547, "loss": 1.1246, "step": 4199 }, { "epoch": 0.8538320796909941, "grad_norm": 0.13001082837581635, "learning_rate": 0.0001146954134038442, "loss": 0.979, "step": 4200 }, { "epoch": 0.8540353730433015, "grad_norm": 0.14172659814357758, "learning_rate": 0.00011467507373131292, "loss": 1.0395, "step": 4201 }, { "epoch": 0.8542386663956089, "grad_norm": 0.12964913249015808, "learning_rate": 0.00011465473405878166, "loss": 0.9479, "step": 4202 }, { "epoch": 0.8544419597479163, "grad_norm": 0.13716383278369904, "learning_rate": 0.00011463439438625038, "loss": 1.1041, "step": 4203 }, { "epoch": 0.8546452531002237, "grad_norm": 0.1309032440185547, "learning_rate": 0.00011461405471371911, "loss": 1.0235, "step": 4204 }, { "epoch": 0.854848546452531, "grad_norm": 0.13954511284828186, "learning_rate": 0.00011459371504118783, "loss": 1.077, "step": 4205 }, { "epoch": 0.8550518398048383, "grad_norm": 0.12911508977413177, "learning_rate": 0.00011457337536865657, "loss": 1.0156, "step": 4206 }, { "epoch": 0.8552551331571457, "grad_norm": 0.13105571269989014, "learning_rate": 0.0001145530356961253, "loss": 0.9734, "step": 4207 }, { "epoch": 0.8554584265094531, "grad_norm": 0.1499045491218567, "learning_rate": 0.00011453269602359402, "loss": 1.0985, "step": 4208 }, { "epoch": 0.8556617198617605, "grad_norm": 0.1255357265472412, "learning_rate": 0.00011451235635106274, "loss": 1.0147, "step": 4209 }, { "epoch": 0.8558650132140679, "grad_norm": 0.1335058957338333, "learning_rate": 0.0001144920166785315, "loss": 1.0306, "step": 4210 }, { "epoch": 0.8560683065663752, "grad_norm": 0.12359452992677689, "learning_rate": 0.0001144716770060002, "loss": 0.8835, "step": 4211 }, { "epoch": 0.8562715999186826, "grad_norm": 0.14083559811115265, "learning_rate": 0.00011445133733346893, "loss": 0.9393, "step": 4212 }, { "epoch": 0.85647489327099, "grad_norm": 0.13426551222801208, "learning_rate": 0.00011443099766093765, "loss": 0.8895, "step": 4213 }, { "epoch": 0.8566781866232974, "grad_norm": 0.13291719555854797, "learning_rate": 0.0001144106579884064, "loss": 1.0563, "step": 4214 }, { "epoch": 0.8568814799756048, "grad_norm": 0.14882031083106995, "learning_rate": 0.00011439031831587512, "loss": 1.1625, "step": 4215 }, { "epoch": 0.8570847733279122, "grad_norm": 0.13581587374210358, "learning_rate": 0.00011436997864334384, "loss": 1.065, "step": 4216 }, { "epoch": 0.8572880666802195, "grad_norm": 0.1269901543855667, "learning_rate": 0.00011434963897081257, "loss": 1.045, "step": 4217 }, { "epoch": 0.8574913600325269, "grad_norm": 0.13057155907154083, "learning_rate": 0.00011432929929828132, "loss": 0.8866, "step": 4218 }, { "epoch": 0.8576946533848343, "grad_norm": 0.1503707617521286, "learning_rate": 0.00011430895962575003, "loss": 1.1888, "step": 4219 }, { "epoch": 0.8578979467371417, "grad_norm": 0.1276797503232956, "learning_rate": 0.00011428861995321875, "loss": 1.0497, "step": 4220 }, { "epoch": 0.8581012400894491, "grad_norm": 0.15582577884197235, "learning_rate": 0.00011426828028068748, "loss": 1.2019, "step": 4221 }, { "epoch": 0.8583045334417565, "grad_norm": 0.1253650039434433, "learning_rate": 0.00011424794060815623, "loss": 0.9089, "step": 4222 }, { "epoch": 0.8585078267940638, "grad_norm": 0.13212646543979645, "learning_rate": 0.00011422760093562494, "loss": 0.8912, "step": 4223 }, { "epoch": 0.8587111201463712, "grad_norm": 0.13876405358314514, "learning_rate": 0.00011420726126309366, "loss": 1.0144, "step": 4224 }, { "epoch": 0.8589144134986786, "grad_norm": 0.15017178654670715, "learning_rate": 0.00011418692159056239, "loss": 1.2437, "step": 4225 }, { "epoch": 0.859117706850986, "grad_norm": 0.1450318694114685, "learning_rate": 0.00011416658191803114, "loss": 1.1114, "step": 4226 }, { "epoch": 0.8593210002032934, "grad_norm": 0.14307589828968048, "learning_rate": 0.00011414624224549985, "loss": 1.1785, "step": 4227 }, { "epoch": 0.8595242935556008, "grad_norm": 0.13084810972213745, "learning_rate": 0.00011412590257296858, "loss": 1.0871, "step": 4228 }, { "epoch": 0.8597275869079081, "grad_norm": 0.12914970517158508, "learning_rate": 0.0001141055629004373, "loss": 1.0432, "step": 4229 }, { "epoch": 0.8599308802602155, "grad_norm": 0.14787475764751434, "learning_rate": 0.00011408522322790605, "loss": 1.0795, "step": 4230 }, { "epoch": 0.8601341736125229, "grad_norm": 0.1235564798116684, "learning_rate": 0.00011406488355537476, "loss": 0.946, "step": 4231 }, { "epoch": 0.8603374669648303, "grad_norm": 0.11737848818302155, "learning_rate": 0.00011404454388284349, "loss": 0.9549, "step": 4232 }, { "epoch": 0.8605407603171377, "grad_norm": 0.12434041500091553, "learning_rate": 0.00011402420421031221, "loss": 1.0672, "step": 4233 }, { "epoch": 0.8607440536694451, "grad_norm": 0.14527469873428345, "learning_rate": 0.00011400386453778096, "loss": 1.1508, "step": 4234 }, { "epoch": 0.8609473470217524, "grad_norm": 0.14363646507263184, "learning_rate": 0.00011398352486524967, "loss": 1.1063, "step": 4235 }, { "epoch": 0.8611506403740598, "grad_norm": 0.14891605079174042, "learning_rate": 0.0001139631851927184, "loss": 1.1027, "step": 4236 }, { "epoch": 0.8613539337263671, "grad_norm": 0.13269458711147308, "learning_rate": 0.00011394284552018712, "loss": 0.9506, "step": 4237 }, { "epoch": 0.8615572270786745, "grad_norm": 0.13667765259742737, "learning_rate": 0.00011392250584765585, "loss": 1.0362, "step": 4238 }, { "epoch": 0.861760520430982, "grad_norm": 0.1343078464269638, "learning_rate": 0.00011390216617512459, "loss": 1.0644, "step": 4239 }, { "epoch": 0.8619638137832892, "grad_norm": 0.12829913198947906, "learning_rate": 0.00011388182650259331, "loss": 0.9775, "step": 4240 }, { "epoch": 0.8621671071355966, "grad_norm": 0.14578650891780853, "learning_rate": 0.00011386148683006203, "loss": 1.1502, "step": 4241 }, { "epoch": 0.862370400487904, "grad_norm": 0.12001452594995499, "learning_rate": 0.00011384114715753076, "loss": 0.9496, "step": 4242 }, { "epoch": 0.8625736938402114, "grad_norm": 0.14932505786418915, "learning_rate": 0.0001138208074849995, "loss": 1.0731, "step": 4243 }, { "epoch": 0.8627769871925188, "grad_norm": 0.12832188606262207, "learning_rate": 0.00011380046781246822, "loss": 0.9428, "step": 4244 }, { "epoch": 0.8629802805448262, "grad_norm": 0.15174297988414764, "learning_rate": 0.00011378012813993695, "loss": 1.259, "step": 4245 }, { "epoch": 0.8631835738971335, "grad_norm": 0.1397685408592224, "learning_rate": 0.00011375978846740567, "loss": 1.204, "step": 4246 }, { "epoch": 0.8633868672494409, "grad_norm": 0.1386864334344864, "learning_rate": 0.00011373944879487441, "loss": 1.2689, "step": 4247 }, { "epoch": 0.8635901606017483, "grad_norm": 0.13151347637176514, "learning_rate": 0.00011371910912234313, "loss": 0.9938, "step": 4248 }, { "epoch": 0.8637934539540557, "grad_norm": 0.15212032198905945, "learning_rate": 0.00011369876944981186, "loss": 1.0839, "step": 4249 }, { "epoch": 0.8639967473063631, "grad_norm": 0.1537848860025406, "learning_rate": 0.00011367842977728058, "loss": 1.1586, "step": 4250 }, { "epoch": 0.8642000406586705, "grad_norm": 0.12098225951194763, "learning_rate": 0.00011365809010474932, "loss": 0.9585, "step": 4251 }, { "epoch": 0.8644033340109778, "grad_norm": 0.1198868602514267, "learning_rate": 0.00011363775043221804, "loss": 0.9055, "step": 4252 }, { "epoch": 0.8646066273632852, "grad_norm": 0.13080951571464539, "learning_rate": 0.00011361741075968677, "loss": 1.0171, "step": 4253 }, { "epoch": 0.8648099207155926, "grad_norm": 0.13523563742637634, "learning_rate": 0.0001135970710871555, "loss": 0.9592, "step": 4254 }, { "epoch": 0.8650132140679, "grad_norm": 0.12738974392414093, "learning_rate": 0.00011357673141462423, "loss": 0.9856, "step": 4255 }, { "epoch": 0.8652165074202074, "grad_norm": 0.13558736443519592, "learning_rate": 0.00011355639174209296, "loss": 1.0518, "step": 4256 }, { "epoch": 0.8654198007725148, "grad_norm": 0.13021017611026764, "learning_rate": 0.00011353605206956168, "loss": 0.9969, "step": 4257 }, { "epoch": 0.8656230941248221, "grad_norm": 0.13365107774734497, "learning_rate": 0.0001135157123970304, "loss": 1.0569, "step": 4258 }, { "epoch": 0.8658263874771295, "grad_norm": 0.14550118148326874, "learning_rate": 0.00011349537272449914, "loss": 1.0975, "step": 4259 }, { "epoch": 0.8660296808294369, "grad_norm": 0.12725263833999634, "learning_rate": 0.00011347503305196787, "loss": 0.9558, "step": 4260 }, { "epoch": 0.8662329741817443, "grad_norm": 0.13404077291488647, "learning_rate": 0.00011345469337943659, "loss": 1.1282, "step": 4261 }, { "epoch": 0.8664362675340517, "grad_norm": 0.12755300104618073, "learning_rate": 0.00011343435370690532, "loss": 1.0461, "step": 4262 }, { "epoch": 0.866639560886359, "grad_norm": 0.13699626922607422, "learning_rate": 0.00011341401403437405, "loss": 1.0606, "step": 4263 }, { "epoch": 0.8668428542386664, "grad_norm": 0.13077600300312042, "learning_rate": 0.00011339367436184278, "loss": 1.0505, "step": 4264 }, { "epoch": 0.8670461475909738, "grad_norm": 0.13652461767196655, "learning_rate": 0.0001133733346893115, "loss": 0.9115, "step": 4265 }, { "epoch": 0.8672494409432812, "grad_norm": 0.1255892813205719, "learning_rate": 0.00011335299501678023, "loss": 0.9152, "step": 4266 }, { "epoch": 0.8674527342955886, "grad_norm": 0.13048523664474487, "learning_rate": 0.00011333265534424898, "loss": 1.0926, "step": 4267 }, { "epoch": 0.867656027647896, "grad_norm": 0.13757598400115967, "learning_rate": 0.00011331231567171769, "loss": 0.9713, "step": 4268 }, { "epoch": 0.8678593210002032, "grad_norm": 0.12873396277427673, "learning_rate": 0.00011329197599918641, "loss": 1.1876, "step": 4269 }, { "epoch": 0.8680626143525106, "grad_norm": 0.12738154828548431, "learning_rate": 0.00011327163632665514, "loss": 1.0222, "step": 4270 }, { "epoch": 0.868265907704818, "grad_norm": 0.11265822499990463, "learning_rate": 0.00011325129665412389, "loss": 0.8971, "step": 4271 }, { "epoch": 0.8684692010571254, "grad_norm": 0.1351097822189331, "learning_rate": 0.0001132309569815926, "loss": 1.0784, "step": 4272 }, { "epoch": 0.8686724944094328, "grad_norm": 0.1455054134130478, "learning_rate": 0.00011321061730906133, "loss": 1.1519, "step": 4273 }, { "epoch": 0.8688757877617402, "grad_norm": 0.13560084998607635, "learning_rate": 0.00011319027763653005, "loss": 1.1245, "step": 4274 }, { "epoch": 0.8690790811140475, "grad_norm": 0.13816951215267181, "learning_rate": 0.0001131699379639988, "loss": 1.1679, "step": 4275 }, { "epoch": 0.8692823744663549, "grad_norm": 0.13416263461112976, "learning_rate": 0.00011314959829146751, "loss": 1.0392, "step": 4276 }, { "epoch": 0.8694856678186623, "grad_norm": 0.1267019808292389, "learning_rate": 0.00011312925861893624, "loss": 0.9492, "step": 4277 }, { "epoch": 0.8696889611709697, "grad_norm": 0.14063285291194916, "learning_rate": 0.00011310891894640496, "loss": 1.1432, "step": 4278 }, { "epoch": 0.8698922545232771, "grad_norm": 0.149309441447258, "learning_rate": 0.00011308857927387371, "loss": 1.2309, "step": 4279 }, { "epoch": 0.8700955478755845, "grad_norm": 0.1392187476158142, "learning_rate": 0.00011306823960134242, "loss": 1.0393, "step": 4280 }, { "epoch": 0.8702988412278918, "grad_norm": 0.12659290432929993, "learning_rate": 0.00011304789992881115, "loss": 0.8555, "step": 4281 }, { "epoch": 0.8705021345801992, "grad_norm": 0.11759068816900253, "learning_rate": 0.00011302756025627987, "loss": 0.938, "step": 4282 }, { "epoch": 0.8707054279325066, "grad_norm": 0.13261142373085022, "learning_rate": 0.0001130072205837486, "loss": 1.0559, "step": 4283 }, { "epoch": 0.870908721284814, "grad_norm": 0.12003304809331894, "learning_rate": 0.00011298688091121734, "loss": 0.9442, "step": 4284 }, { "epoch": 0.8711120146371214, "grad_norm": 0.13861103355884552, "learning_rate": 0.00011296654123868606, "loss": 1.0972, "step": 4285 }, { "epoch": 0.8713153079894288, "grad_norm": 0.12716351449489594, "learning_rate": 0.00011294620156615478, "loss": 0.9511, "step": 4286 }, { "epoch": 0.8715186013417361, "grad_norm": 0.1347339004278183, "learning_rate": 0.00011292586189362351, "loss": 0.9783, "step": 4287 }, { "epoch": 0.8717218946940435, "grad_norm": 0.14212962985038757, "learning_rate": 0.00011290552222109225, "loss": 0.9568, "step": 4288 }, { "epoch": 0.8719251880463509, "grad_norm": 0.11800102889537811, "learning_rate": 0.00011288518254856097, "loss": 0.9524, "step": 4289 }, { "epoch": 0.8721284813986583, "grad_norm": 0.1591940075159073, "learning_rate": 0.0001128648428760297, "loss": 1.2547, "step": 4290 }, { "epoch": 0.8723317747509657, "grad_norm": 0.12767143547534943, "learning_rate": 0.00011284450320349842, "loss": 1.0574, "step": 4291 }, { "epoch": 0.872535068103273, "grad_norm": 0.1308542639017105, "learning_rate": 0.00011282416353096716, "loss": 0.9813, "step": 4292 }, { "epoch": 0.8727383614555804, "grad_norm": 0.15340617299079895, "learning_rate": 0.00011280382385843588, "loss": 1.2684, "step": 4293 }, { "epoch": 0.8729416548078878, "grad_norm": 0.14063572883605957, "learning_rate": 0.00011278348418590461, "loss": 1.0218, "step": 4294 }, { "epoch": 0.8731449481601952, "grad_norm": 0.12035755813121796, "learning_rate": 0.00011276314451337333, "loss": 1.0237, "step": 4295 }, { "epoch": 0.8733482415125026, "grad_norm": 0.1521058976650238, "learning_rate": 0.00011274280484084207, "loss": 1.1983, "step": 4296 }, { "epoch": 0.87355153486481, "grad_norm": 0.1308029145002365, "learning_rate": 0.0001127224651683108, "loss": 0.9343, "step": 4297 }, { "epoch": 0.8737548282171173, "grad_norm": 0.13655021786689758, "learning_rate": 0.00011270212549577952, "loss": 1.1253, "step": 4298 }, { "epoch": 0.8739581215694247, "grad_norm": 0.13754834234714508, "learning_rate": 0.00011268178582324824, "loss": 1.1018, "step": 4299 }, { "epoch": 0.874161414921732, "grad_norm": 0.14539092779159546, "learning_rate": 0.00011266144615071698, "loss": 1.1647, "step": 4300 }, { "epoch": 0.8743647082740394, "grad_norm": 0.1390954852104187, "learning_rate": 0.0001126411064781857, "loss": 1.0717, "step": 4301 }, { "epoch": 0.8745680016263468, "grad_norm": 0.13942857086658478, "learning_rate": 0.00011262076680565443, "loss": 1.0457, "step": 4302 }, { "epoch": 0.8747712949786542, "grad_norm": 0.13453049957752228, "learning_rate": 0.00011260042713312315, "loss": 1.1522, "step": 4303 }, { "epoch": 0.8749745883309615, "grad_norm": 0.13158947229385376, "learning_rate": 0.00011258008746059189, "loss": 0.969, "step": 4304 }, { "epoch": 0.8751778816832689, "grad_norm": 0.1394949108362198, "learning_rate": 0.00011255974778806062, "loss": 0.9924, "step": 4305 }, { "epoch": 0.8753811750355763, "grad_norm": 0.14436380565166473, "learning_rate": 0.00011253940811552934, "loss": 1.0416, "step": 4306 }, { "epoch": 0.8755844683878837, "grad_norm": 0.12444054335355759, "learning_rate": 0.00011251906844299807, "loss": 0.9734, "step": 4307 }, { "epoch": 0.8757877617401911, "grad_norm": 0.1411658078432083, "learning_rate": 0.0001124987287704668, "loss": 0.9828, "step": 4308 }, { "epoch": 0.8759910550924985, "grad_norm": 0.13278289139270782, "learning_rate": 0.00011247838909793553, "loss": 1.0184, "step": 4309 }, { "epoch": 0.8761943484448058, "grad_norm": 0.13630905747413635, "learning_rate": 0.00011245804942540425, "loss": 1.1146, "step": 4310 }, { "epoch": 0.8763976417971132, "grad_norm": 0.12063156068325043, "learning_rate": 0.00011243770975287298, "loss": 0.9559, "step": 4311 }, { "epoch": 0.8766009351494206, "grad_norm": 0.12756480276584625, "learning_rate": 0.00011241737008034172, "loss": 0.935, "step": 4312 }, { "epoch": 0.876804228501728, "grad_norm": 0.1388019174337387, "learning_rate": 0.00011239703040781044, "loss": 1.102, "step": 4313 }, { "epoch": 0.8770075218540354, "grad_norm": 0.13372951745986938, "learning_rate": 0.00011237669073527916, "loss": 0.9758, "step": 4314 }, { "epoch": 0.8772108152063427, "grad_norm": 0.12079128623008728, "learning_rate": 0.00011235635106274789, "loss": 0.9984, "step": 4315 }, { "epoch": 0.8774141085586501, "grad_norm": 0.1439303755760193, "learning_rate": 0.00011233601139021663, "loss": 1.131, "step": 4316 }, { "epoch": 0.8776174019109575, "grad_norm": 0.131261944770813, "learning_rate": 0.00011231567171768535, "loss": 0.9801, "step": 4317 }, { "epoch": 0.8778206952632649, "grad_norm": 0.1571865677833557, "learning_rate": 0.00011229533204515408, "loss": 1.0769, "step": 4318 }, { "epoch": 0.8780239886155723, "grad_norm": 0.1357412189245224, "learning_rate": 0.0001122749923726228, "loss": 0.992, "step": 4319 }, { "epoch": 0.8782272819678797, "grad_norm": 0.12698335945606232, "learning_rate": 0.00011225465270009154, "loss": 0.9678, "step": 4320 }, { "epoch": 0.878430575320187, "grad_norm": 0.15510526299476624, "learning_rate": 0.00011223431302756026, "loss": 1.2756, "step": 4321 }, { "epoch": 0.8786338686724944, "grad_norm": 0.13490548729896545, "learning_rate": 0.00011221397335502899, "loss": 0.9746, "step": 4322 }, { "epoch": 0.8788371620248018, "grad_norm": 0.1362731158733368, "learning_rate": 0.00011219363368249771, "loss": 1.1591, "step": 4323 }, { "epoch": 0.8790404553771092, "grad_norm": 0.12086111307144165, "learning_rate": 0.00011217329400996644, "loss": 0.9592, "step": 4324 }, { "epoch": 0.8792437487294166, "grad_norm": 0.13338525593280792, "learning_rate": 0.00011215295433743517, "loss": 1.0375, "step": 4325 }, { "epoch": 0.879447042081724, "grad_norm": 0.13681508600711823, "learning_rate": 0.0001121326146649039, "loss": 1.2068, "step": 4326 }, { "epoch": 0.8796503354340313, "grad_norm": 0.12971334159374237, "learning_rate": 0.00011211227499237262, "loss": 0.9959, "step": 4327 }, { "epoch": 0.8798536287863387, "grad_norm": 0.13908180594444275, "learning_rate": 0.00011209193531984135, "loss": 1.119, "step": 4328 }, { "epoch": 0.8800569221386461, "grad_norm": 0.13482098281383514, "learning_rate": 0.00011207159564731009, "loss": 1.0079, "step": 4329 }, { "epoch": 0.8802602154909535, "grad_norm": 0.14087046682834625, "learning_rate": 0.00011205125597477881, "loss": 1.0478, "step": 4330 }, { "epoch": 0.8804635088432609, "grad_norm": 0.12133046984672546, "learning_rate": 0.00011203091630224753, "loss": 0.8026, "step": 4331 }, { "epoch": 0.8806668021955683, "grad_norm": 0.12162627279758453, "learning_rate": 0.00011201057662971626, "loss": 1.0147, "step": 4332 }, { "epoch": 0.8808700955478755, "grad_norm": 0.1315440535545349, "learning_rate": 0.000111990236957185, "loss": 1.1736, "step": 4333 }, { "epoch": 0.8810733889001829, "grad_norm": 0.1336052566766739, "learning_rate": 0.00011196989728465372, "loss": 1.0814, "step": 4334 }, { "epoch": 0.8812766822524903, "grad_norm": 0.12887480854988098, "learning_rate": 0.00011194955761212245, "loss": 1.0392, "step": 4335 }, { "epoch": 0.8814799756047977, "grad_norm": 0.12557265162467957, "learning_rate": 0.00011192921793959117, "loss": 0.9376, "step": 4336 }, { "epoch": 0.8816832689571051, "grad_norm": 0.13946324586868286, "learning_rate": 0.00011190887826705991, "loss": 1.0847, "step": 4337 }, { "epoch": 0.8818865623094125, "grad_norm": 0.14429444074630737, "learning_rate": 0.00011188853859452863, "loss": 1.0925, "step": 4338 }, { "epoch": 0.8820898556617198, "grad_norm": 0.13866104185581207, "learning_rate": 0.00011186819892199736, "loss": 1.1063, "step": 4339 }, { "epoch": 0.8822931490140272, "grad_norm": 0.1266574114561081, "learning_rate": 0.00011184785924946608, "loss": 1.0786, "step": 4340 }, { "epoch": 0.8824964423663346, "grad_norm": 0.14879325032234192, "learning_rate": 0.00011182751957693482, "loss": 1.1059, "step": 4341 }, { "epoch": 0.882699735718642, "grad_norm": 0.11987625062465668, "learning_rate": 0.00011180717990440354, "loss": 0.8947, "step": 4342 }, { "epoch": 0.8829030290709494, "grad_norm": 0.13331225514411926, "learning_rate": 0.00011178684023187227, "loss": 1.1468, "step": 4343 }, { "epoch": 0.8831063224232567, "grad_norm": 0.13890080153942108, "learning_rate": 0.00011176650055934099, "loss": 1.0535, "step": 4344 }, { "epoch": 0.8833096157755641, "grad_norm": 0.14050957560539246, "learning_rate": 0.00011174616088680973, "loss": 1.1156, "step": 4345 }, { "epoch": 0.8835129091278715, "grad_norm": 0.14118660986423492, "learning_rate": 0.00011172582121427846, "loss": 0.9597, "step": 4346 }, { "epoch": 0.8837162024801789, "grad_norm": 0.13197362422943115, "learning_rate": 0.00011170548154174718, "loss": 0.9801, "step": 4347 }, { "epoch": 0.8839194958324863, "grad_norm": 0.1429329663515091, "learning_rate": 0.0001116851418692159, "loss": 1.1501, "step": 4348 }, { "epoch": 0.8841227891847937, "grad_norm": 0.14236941933631897, "learning_rate": 0.00011166480219668464, "loss": 1.0295, "step": 4349 }, { "epoch": 0.884326082537101, "grad_norm": 0.13247445225715637, "learning_rate": 0.00011164446252415337, "loss": 0.9995, "step": 4350 }, { "epoch": 0.8845293758894084, "grad_norm": 0.1475542187690735, "learning_rate": 0.00011162412285162209, "loss": 1.1062, "step": 4351 }, { "epoch": 0.8847326692417158, "grad_norm": 0.14314448833465576, "learning_rate": 0.00011160378317909082, "loss": 1.1257, "step": 4352 }, { "epoch": 0.8849359625940232, "grad_norm": 0.1297428011894226, "learning_rate": 0.00011158344350655955, "loss": 0.8739, "step": 4353 }, { "epoch": 0.8851392559463306, "grad_norm": 0.15738995373249054, "learning_rate": 0.00011156310383402828, "loss": 1.2495, "step": 4354 }, { "epoch": 0.885342549298638, "grad_norm": 0.13949069380760193, "learning_rate": 0.000111542764161497, "loss": 1.0207, "step": 4355 }, { "epoch": 0.8855458426509453, "grad_norm": 0.1462063193321228, "learning_rate": 0.00011152242448896573, "loss": 0.9444, "step": 4356 }, { "epoch": 0.8857491360032527, "grad_norm": 0.13881848752498627, "learning_rate": 0.00011150208481643447, "loss": 1.0071, "step": 4357 }, { "epoch": 0.8859524293555601, "grad_norm": 0.13828495144844055, "learning_rate": 0.00011148174514390319, "loss": 1.0035, "step": 4358 }, { "epoch": 0.8861557227078675, "grad_norm": 0.12428104132413864, "learning_rate": 0.00011146140547137191, "loss": 0.94, "step": 4359 }, { "epoch": 0.8863590160601749, "grad_norm": 0.14945100247859955, "learning_rate": 0.00011144106579884064, "loss": 1.262, "step": 4360 }, { "epoch": 0.8865623094124823, "grad_norm": 0.13491201400756836, "learning_rate": 0.00011142072612630938, "loss": 0.9794, "step": 4361 }, { "epoch": 0.8867656027647896, "grad_norm": 0.1441691815853119, "learning_rate": 0.0001114003864537781, "loss": 1.0569, "step": 4362 }, { "epoch": 0.886968896117097, "grad_norm": 0.14696361124515533, "learning_rate": 0.00011138004678124683, "loss": 1.263, "step": 4363 }, { "epoch": 0.8871721894694043, "grad_norm": 0.131379634141922, "learning_rate": 0.00011135970710871555, "loss": 0.8765, "step": 4364 }, { "epoch": 0.8873754828217117, "grad_norm": 0.13199898600578308, "learning_rate": 0.00011133936743618427, "loss": 0.9504, "step": 4365 }, { "epoch": 0.8875787761740191, "grad_norm": 0.12538810074329376, "learning_rate": 0.00011131902776365301, "loss": 0.9167, "step": 4366 }, { "epoch": 0.8877820695263264, "grad_norm": 0.14858978986740112, "learning_rate": 0.00011129868809112174, "loss": 1.1652, "step": 4367 }, { "epoch": 0.8879853628786338, "grad_norm": 0.12117012590169907, "learning_rate": 0.00011127834841859046, "loss": 0.9091, "step": 4368 }, { "epoch": 0.8881886562309412, "grad_norm": 0.13053376972675323, "learning_rate": 0.00011125800874605919, "loss": 0.9876, "step": 4369 }, { "epoch": 0.8883919495832486, "grad_norm": 0.15164178609848022, "learning_rate": 0.00011123766907352792, "loss": 1.1477, "step": 4370 }, { "epoch": 0.888595242935556, "grad_norm": 0.13139276206493378, "learning_rate": 0.00011121732940099665, "loss": 1.0332, "step": 4371 }, { "epoch": 0.8887985362878634, "grad_norm": 0.14275844395160675, "learning_rate": 0.00011119698972846537, "loss": 1.2397, "step": 4372 }, { "epoch": 0.8890018296401707, "grad_norm": 0.14269821345806122, "learning_rate": 0.0001111766500559341, "loss": 1.0065, "step": 4373 }, { "epoch": 0.8892051229924781, "grad_norm": 0.12749828398227692, "learning_rate": 0.00011115631038340284, "loss": 0.9364, "step": 4374 }, { "epoch": 0.8894084163447855, "grad_norm": 0.13233932852745056, "learning_rate": 0.00011113597071087156, "loss": 1.035, "step": 4375 }, { "epoch": 0.8896117096970929, "grad_norm": 0.14462941884994507, "learning_rate": 0.00011111563103834028, "loss": 1.1277, "step": 4376 }, { "epoch": 0.8898150030494003, "grad_norm": 0.14381466805934906, "learning_rate": 0.00011109529136580901, "loss": 1.166, "step": 4377 }, { "epoch": 0.8900182964017077, "grad_norm": 0.1264910101890564, "learning_rate": 0.00011107495169327775, "loss": 1.0343, "step": 4378 }, { "epoch": 0.890221589754015, "grad_norm": 0.12185248732566833, "learning_rate": 0.00011105461202074647, "loss": 1.0195, "step": 4379 }, { "epoch": 0.8904248831063224, "grad_norm": 0.13510321080684662, "learning_rate": 0.0001110342723482152, "loss": 0.9245, "step": 4380 }, { "epoch": 0.8906281764586298, "grad_norm": 0.13467377424240112, "learning_rate": 0.00011101393267568392, "loss": 0.9795, "step": 4381 }, { "epoch": 0.8908314698109372, "grad_norm": 0.1266263723373413, "learning_rate": 0.00011099359300315266, "loss": 0.9728, "step": 4382 }, { "epoch": 0.8910347631632446, "grad_norm": 0.12397301942110062, "learning_rate": 0.00011097325333062138, "loss": 0.9325, "step": 4383 }, { "epoch": 0.891238056515552, "grad_norm": 0.14966972172260284, "learning_rate": 0.00011095291365809011, "loss": 1.2973, "step": 4384 }, { "epoch": 0.8914413498678593, "grad_norm": 0.13662739098072052, "learning_rate": 0.00011093257398555883, "loss": 1.0907, "step": 4385 }, { "epoch": 0.8916446432201667, "grad_norm": 0.1289726197719574, "learning_rate": 0.00011091223431302757, "loss": 1.047, "step": 4386 }, { "epoch": 0.8918479365724741, "grad_norm": 0.13556358218193054, "learning_rate": 0.0001108918946404963, "loss": 0.9938, "step": 4387 }, { "epoch": 0.8920512299247815, "grad_norm": 0.13389402627944946, "learning_rate": 0.00011087155496796502, "loss": 1.1329, "step": 4388 }, { "epoch": 0.8922545232770889, "grad_norm": 0.13192865252494812, "learning_rate": 0.00011085121529543374, "loss": 0.945, "step": 4389 }, { "epoch": 0.8924578166293963, "grad_norm": 0.14545689523220062, "learning_rate": 0.00011083087562290248, "loss": 1.0199, "step": 4390 }, { "epoch": 0.8926611099817036, "grad_norm": 0.1357770413160324, "learning_rate": 0.0001108105359503712, "loss": 1.1277, "step": 4391 }, { "epoch": 0.892864403334011, "grad_norm": 0.1452401578426361, "learning_rate": 0.00011079019627783993, "loss": 1.2486, "step": 4392 }, { "epoch": 0.8930676966863184, "grad_norm": 0.12674301862716675, "learning_rate": 0.00011076985660530865, "loss": 0.9128, "step": 4393 }, { "epoch": 0.8932709900386258, "grad_norm": 0.14735066890716553, "learning_rate": 0.00011074951693277739, "loss": 1.0515, "step": 4394 }, { "epoch": 0.8934742833909332, "grad_norm": 0.14510585367679596, "learning_rate": 0.00011072917726024612, "loss": 1.106, "step": 4395 }, { "epoch": 0.8936775767432404, "grad_norm": 0.14333130419254303, "learning_rate": 0.00011070883758771484, "loss": 1.1362, "step": 4396 }, { "epoch": 0.8938808700955478, "grad_norm": 0.1307590752840042, "learning_rate": 0.00011068849791518357, "loss": 0.9749, "step": 4397 }, { "epoch": 0.8940841634478552, "grad_norm": 0.12639310956001282, "learning_rate": 0.0001106681582426523, "loss": 0.9695, "step": 4398 }, { "epoch": 0.8942874568001626, "grad_norm": 0.13830193877220154, "learning_rate": 0.00011064781857012103, "loss": 0.9268, "step": 4399 }, { "epoch": 0.89449075015247, "grad_norm": 0.1438985913991928, "learning_rate": 0.00011062747889758975, "loss": 1.0869, "step": 4400 }, { "epoch": 0.8946940435047774, "grad_norm": 0.1423654854297638, "learning_rate": 0.00011060713922505848, "loss": 1.1083, "step": 4401 }, { "epoch": 0.8948973368570847, "grad_norm": 0.1318962126970291, "learning_rate": 0.00011058679955252722, "loss": 0.8641, "step": 4402 }, { "epoch": 0.8951006302093921, "grad_norm": 0.13388904929161072, "learning_rate": 0.00011056645987999594, "loss": 0.9573, "step": 4403 }, { "epoch": 0.8953039235616995, "grad_norm": 0.13502460718154907, "learning_rate": 0.00011054612020746466, "loss": 1.0003, "step": 4404 }, { "epoch": 0.8955072169140069, "grad_norm": 0.13359855115413666, "learning_rate": 0.00011052578053493339, "loss": 1.0714, "step": 4405 }, { "epoch": 0.8957105102663143, "grad_norm": 0.12817350029945374, "learning_rate": 0.00011050544086240211, "loss": 1.0177, "step": 4406 }, { "epoch": 0.8959138036186217, "grad_norm": 0.13135068118572235, "learning_rate": 0.00011048510118987085, "loss": 0.9071, "step": 4407 }, { "epoch": 0.896117096970929, "grad_norm": 0.13310706615447998, "learning_rate": 0.00011046476151733958, "loss": 1.0575, "step": 4408 }, { "epoch": 0.8963203903232364, "grad_norm": 0.12109819054603577, "learning_rate": 0.0001104444218448083, "loss": 0.902, "step": 4409 }, { "epoch": 0.8965236836755438, "grad_norm": 0.1310720294713974, "learning_rate": 0.00011042408217227702, "loss": 0.9465, "step": 4410 }, { "epoch": 0.8967269770278512, "grad_norm": 0.1330663412809372, "learning_rate": 0.00011040374249974576, "loss": 1.0053, "step": 4411 }, { "epoch": 0.8969302703801586, "grad_norm": 0.14403831958770752, "learning_rate": 0.00011038340282721449, "loss": 1.1808, "step": 4412 }, { "epoch": 0.897133563732466, "grad_norm": 0.1323632299900055, "learning_rate": 0.00011036306315468321, "loss": 0.9314, "step": 4413 }, { "epoch": 0.8973368570847733, "grad_norm": 0.12776096165180206, "learning_rate": 0.00011034272348215194, "loss": 1.0343, "step": 4414 }, { "epoch": 0.8975401504370807, "grad_norm": 0.12130887806415558, "learning_rate": 0.00011032238380962067, "loss": 1.0066, "step": 4415 }, { "epoch": 0.8977434437893881, "grad_norm": 0.11282986402511597, "learning_rate": 0.0001103020441370894, "loss": 0.8551, "step": 4416 }, { "epoch": 0.8979467371416955, "grad_norm": 0.14610666036605835, "learning_rate": 0.00011028170446455812, "loss": 1.2288, "step": 4417 }, { "epoch": 0.8981500304940029, "grad_norm": 0.14186285436153412, "learning_rate": 0.00011026136479202685, "loss": 1.183, "step": 4418 }, { "epoch": 0.8983533238463103, "grad_norm": 0.1389775425195694, "learning_rate": 0.00011024102511949559, "loss": 1.1684, "step": 4419 }, { "epoch": 0.8985566171986176, "grad_norm": 0.12318051606416702, "learning_rate": 0.00011022068544696431, "loss": 0.8705, "step": 4420 }, { "epoch": 0.898759910550925, "grad_norm": 0.12933410704135895, "learning_rate": 0.00011020034577443303, "loss": 1.0982, "step": 4421 }, { "epoch": 0.8989632039032324, "grad_norm": 0.14935623109340668, "learning_rate": 0.00011018000610190176, "loss": 1.1293, "step": 4422 }, { "epoch": 0.8991664972555398, "grad_norm": 0.13630087673664093, "learning_rate": 0.0001101596664293705, "loss": 1.0667, "step": 4423 }, { "epoch": 0.8993697906078472, "grad_norm": 0.14735549688339233, "learning_rate": 0.00011013932675683922, "loss": 1.0931, "step": 4424 }, { "epoch": 0.8995730839601545, "grad_norm": 0.13349930942058563, "learning_rate": 0.00011011898708430795, "loss": 1.0843, "step": 4425 }, { "epoch": 0.8997763773124619, "grad_norm": 0.13748763501644135, "learning_rate": 0.00011009864741177667, "loss": 1.0533, "step": 4426 }, { "epoch": 0.8999796706647692, "grad_norm": 0.1320018768310547, "learning_rate": 0.00011007830773924541, "loss": 1.0631, "step": 4427 }, { "epoch": 0.9001829640170766, "grad_norm": 0.1377144604921341, "learning_rate": 0.00011005796806671413, "loss": 1.0616, "step": 4428 }, { "epoch": 0.900386257369384, "grad_norm": 0.13794207572937012, "learning_rate": 0.00011003762839418286, "loss": 1.043, "step": 4429 }, { "epoch": 0.9005895507216914, "grad_norm": 0.12091651558876038, "learning_rate": 0.00011001728872165158, "loss": 0.9347, "step": 4430 }, { "epoch": 0.9007928440739987, "grad_norm": 0.13244852423667908, "learning_rate": 0.00010999694904912032, "loss": 0.9094, "step": 4431 }, { "epoch": 0.9009961374263061, "grad_norm": 0.1419922262430191, "learning_rate": 0.00010997660937658904, "loss": 1.0749, "step": 4432 }, { "epoch": 0.9011994307786135, "grad_norm": 0.138065367937088, "learning_rate": 0.00010995626970405777, "loss": 1.0559, "step": 4433 }, { "epoch": 0.9014027241309209, "grad_norm": 0.13192395865917206, "learning_rate": 0.00010993593003152649, "loss": 0.9556, "step": 4434 }, { "epoch": 0.9016060174832283, "grad_norm": 0.13181698322296143, "learning_rate": 0.00010991559035899523, "loss": 1.0103, "step": 4435 }, { "epoch": 0.9018093108355357, "grad_norm": 0.1360086053609848, "learning_rate": 0.00010989525068646396, "loss": 0.9737, "step": 4436 }, { "epoch": 0.902012604187843, "grad_norm": 0.14762909710407257, "learning_rate": 0.00010987491101393268, "loss": 1.0592, "step": 4437 }, { "epoch": 0.9022158975401504, "grad_norm": 0.13677798211574554, "learning_rate": 0.0001098545713414014, "loss": 1.0893, "step": 4438 }, { "epoch": 0.9024191908924578, "grad_norm": 0.13737376034259796, "learning_rate": 0.00010983423166887014, "loss": 1.1157, "step": 4439 }, { "epoch": 0.9026224842447652, "grad_norm": 0.13454869389533997, "learning_rate": 0.00010981389199633887, "loss": 1.0837, "step": 4440 }, { "epoch": 0.9028257775970726, "grad_norm": 0.1382821798324585, "learning_rate": 0.00010979355232380759, "loss": 0.9586, "step": 4441 }, { "epoch": 0.90302907094938, "grad_norm": 0.12248346954584122, "learning_rate": 0.00010977321265127632, "loss": 0.9251, "step": 4442 }, { "epoch": 0.9032323643016873, "grad_norm": 0.13722175359725952, "learning_rate": 0.00010975287297874505, "loss": 1.192, "step": 4443 }, { "epoch": 0.9034356576539947, "grad_norm": 0.14339371025562286, "learning_rate": 0.00010973253330621378, "loss": 1.1055, "step": 4444 }, { "epoch": 0.9036389510063021, "grad_norm": 0.1536564826965332, "learning_rate": 0.0001097121936336825, "loss": 1.1969, "step": 4445 }, { "epoch": 0.9038422443586095, "grad_norm": 0.12401420623064041, "learning_rate": 0.00010969185396115123, "loss": 1.0346, "step": 4446 }, { "epoch": 0.9040455377109169, "grad_norm": 0.12466490268707275, "learning_rate": 0.00010967151428861995, "loss": 0.903, "step": 4447 }, { "epoch": 0.9042488310632242, "grad_norm": 0.1398215889930725, "learning_rate": 0.00010965117461608869, "loss": 1.0548, "step": 4448 }, { "epoch": 0.9044521244155316, "grad_norm": 0.1224413737654686, "learning_rate": 0.00010963083494355741, "loss": 0.9738, "step": 4449 }, { "epoch": 0.904655417767839, "grad_norm": 0.13140305876731873, "learning_rate": 0.00010961049527102614, "loss": 1.1668, "step": 4450 }, { "epoch": 0.9048587111201464, "grad_norm": 0.13816101849079132, "learning_rate": 0.00010959015559849486, "loss": 1.0785, "step": 4451 }, { "epoch": 0.9050620044724538, "grad_norm": 0.19513925909996033, "learning_rate": 0.0001095698159259636, "loss": 1.2728, "step": 4452 }, { "epoch": 0.9052652978247612, "grad_norm": 0.1294509470462799, "learning_rate": 0.00010954947625343233, "loss": 0.9757, "step": 4453 }, { "epoch": 0.9054685911770685, "grad_norm": 0.13822956383228302, "learning_rate": 0.00010952913658090105, "loss": 1.064, "step": 4454 }, { "epoch": 0.9056718845293759, "grad_norm": 0.13722215592861176, "learning_rate": 0.00010950879690836977, "loss": 1.0321, "step": 4455 }, { "epoch": 0.9058751778816833, "grad_norm": 0.1313597559928894, "learning_rate": 0.00010948845723583851, "loss": 1.117, "step": 4456 }, { "epoch": 0.9060784712339907, "grad_norm": 0.13262207806110382, "learning_rate": 0.00010946811756330724, "loss": 1.0395, "step": 4457 }, { "epoch": 0.906281764586298, "grad_norm": 0.15121689438819885, "learning_rate": 0.00010944777789077596, "loss": 1.1235, "step": 4458 }, { "epoch": 0.9064850579386055, "grad_norm": 0.14262863993644714, "learning_rate": 0.00010942743821824469, "loss": 1.2066, "step": 4459 }, { "epoch": 0.9066883512909127, "grad_norm": 0.13933706283569336, "learning_rate": 0.00010940709854571342, "loss": 1.15, "step": 4460 }, { "epoch": 0.9068916446432201, "grad_norm": 0.14884263277053833, "learning_rate": 0.00010938675887318215, "loss": 1.1161, "step": 4461 }, { "epoch": 0.9070949379955275, "grad_norm": 0.1426582783460617, "learning_rate": 0.00010936641920065087, "loss": 1.1462, "step": 4462 }, { "epoch": 0.9072982313478349, "grad_norm": 0.14341098070144653, "learning_rate": 0.0001093460795281196, "loss": 1.0028, "step": 4463 }, { "epoch": 0.9075015247001423, "grad_norm": 0.13192780315876007, "learning_rate": 0.00010932573985558834, "loss": 0.9766, "step": 4464 }, { "epoch": 0.9077048180524497, "grad_norm": 0.13691288232803345, "learning_rate": 0.00010930540018305706, "loss": 1.0718, "step": 4465 }, { "epoch": 0.907908111404757, "grad_norm": 0.1597934365272522, "learning_rate": 0.00010928506051052578, "loss": 1.2067, "step": 4466 }, { "epoch": 0.9081114047570644, "grad_norm": 0.128030464053154, "learning_rate": 0.00010926472083799451, "loss": 0.9896, "step": 4467 }, { "epoch": 0.9083146981093718, "grad_norm": 0.13701699674129486, "learning_rate": 0.00010924438116546325, "loss": 1.0203, "step": 4468 }, { "epoch": 0.9085179914616792, "grad_norm": 0.13079933822155, "learning_rate": 0.00010922404149293197, "loss": 1.0287, "step": 4469 }, { "epoch": 0.9087212848139866, "grad_norm": 0.15257249772548676, "learning_rate": 0.0001092037018204007, "loss": 1.0392, "step": 4470 }, { "epoch": 0.908924578166294, "grad_norm": 0.134558767080307, "learning_rate": 0.00010918336214786942, "loss": 1.1618, "step": 4471 }, { "epoch": 0.9091278715186013, "grad_norm": 0.13755445182323456, "learning_rate": 0.00010916302247533816, "loss": 1.0579, "step": 4472 }, { "epoch": 0.9093311648709087, "grad_norm": 0.14956828951835632, "learning_rate": 0.00010914268280280688, "loss": 1.0457, "step": 4473 }, { "epoch": 0.9095344582232161, "grad_norm": 0.138174906373024, "learning_rate": 0.0001091223431302756, "loss": 0.9902, "step": 4474 }, { "epoch": 0.9097377515755235, "grad_norm": 0.14548815786838531, "learning_rate": 0.00010910200345774433, "loss": 1.1674, "step": 4475 }, { "epoch": 0.9099410449278309, "grad_norm": 0.13372185826301575, "learning_rate": 0.00010908166378521307, "loss": 1.1141, "step": 4476 }, { "epoch": 0.9101443382801382, "grad_norm": 0.1349831521511078, "learning_rate": 0.0001090613241126818, "loss": 1.0383, "step": 4477 }, { "epoch": 0.9103476316324456, "grad_norm": 0.12056616693735123, "learning_rate": 0.00010904098444015052, "loss": 0.7852, "step": 4478 }, { "epoch": 0.910550924984753, "grad_norm": 0.14333753287792206, "learning_rate": 0.00010902064476761924, "loss": 1.0264, "step": 4479 }, { "epoch": 0.9107542183370604, "grad_norm": 0.1312333047389984, "learning_rate": 0.00010900030509508798, "loss": 0.8746, "step": 4480 }, { "epoch": 0.9109575116893678, "grad_norm": 0.14129756391048431, "learning_rate": 0.0001089799654225567, "loss": 1.1576, "step": 4481 }, { "epoch": 0.9111608050416752, "grad_norm": 0.135942742228508, "learning_rate": 0.00010895962575002543, "loss": 1.1842, "step": 4482 }, { "epoch": 0.9113640983939825, "grad_norm": 0.1423972100019455, "learning_rate": 0.00010893928607749415, "loss": 1.051, "step": 4483 }, { "epoch": 0.9115673917462899, "grad_norm": 0.13322605192661285, "learning_rate": 0.00010891894640496289, "loss": 0.9931, "step": 4484 }, { "epoch": 0.9117706850985973, "grad_norm": 0.14480316638946533, "learning_rate": 0.00010889860673243162, "loss": 1.0453, "step": 4485 }, { "epoch": 0.9119739784509047, "grad_norm": 0.1365094780921936, "learning_rate": 0.00010887826705990034, "loss": 1.0413, "step": 4486 }, { "epoch": 0.9121772718032121, "grad_norm": 0.128956139087677, "learning_rate": 0.00010885792738736907, "loss": 1.0774, "step": 4487 }, { "epoch": 0.9123805651555195, "grad_norm": 0.11314928531646729, "learning_rate": 0.00010883758771483779, "loss": 0.8729, "step": 4488 }, { "epoch": 0.9125838585078268, "grad_norm": 0.13904598355293274, "learning_rate": 0.00010881724804230653, "loss": 1.07, "step": 4489 }, { "epoch": 0.9127871518601341, "grad_norm": 0.1325247436761856, "learning_rate": 0.00010879690836977525, "loss": 1.1768, "step": 4490 }, { "epoch": 0.9129904452124415, "grad_norm": 0.13978269696235657, "learning_rate": 0.00010877656869724398, "loss": 1.0877, "step": 4491 }, { "epoch": 0.913193738564749, "grad_norm": 0.13564588129520416, "learning_rate": 0.0001087562290247127, "loss": 1.0527, "step": 4492 }, { "epoch": 0.9133970319170563, "grad_norm": 0.14008729159832, "learning_rate": 0.00010873588935218144, "loss": 1.0295, "step": 4493 }, { "epoch": 0.9136003252693637, "grad_norm": 0.14307157695293427, "learning_rate": 0.00010871554967965016, "loss": 1.0371, "step": 4494 }, { "epoch": 0.913803618621671, "grad_norm": 0.13670316338539124, "learning_rate": 0.00010869521000711889, "loss": 1.0557, "step": 4495 }, { "epoch": 0.9140069119739784, "grad_norm": 0.138756662607193, "learning_rate": 0.00010867487033458761, "loss": 0.9865, "step": 4496 }, { "epoch": 0.9142102053262858, "grad_norm": 0.132290780544281, "learning_rate": 0.00010865453066205635, "loss": 1.041, "step": 4497 }, { "epoch": 0.9144134986785932, "grad_norm": 0.13535267114639282, "learning_rate": 0.00010863419098952508, "loss": 1.0592, "step": 4498 }, { "epoch": 0.9146167920309006, "grad_norm": 0.12333885580301285, "learning_rate": 0.0001086138513169938, "loss": 1.0101, "step": 4499 }, { "epoch": 0.9148200853832079, "grad_norm": 0.14777310192584991, "learning_rate": 0.00010859351164446252, "loss": 1.0622, "step": 4500 }, { "epoch": 0.9150233787355153, "grad_norm": 0.11419006437063217, "learning_rate": 0.00010857317197193126, "loss": 0.8548, "step": 4501 }, { "epoch": 0.9152266720878227, "grad_norm": 0.12761832773685455, "learning_rate": 0.00010855283229939999, "loss": 0.9198, "step": 4502 }, { "epoch": 0.9154299654401301, "grad_norm": 0.1387338936328888, "learning_rate": 0.00010853249262686871, "loss": 1.0402, "step": 4503 }, { "epoch": 0.9156332587924375, "grad_norm": 0.13915283977985382, "learning_rate": 0.00010851215295433744, "loss": 1.1062, "step": 4504 }, { "epoch": 0.9158365521447449, "grad_norm": 0.13649246096611023, "learning_rate": 0.00010849181328180617, "loss": 1.1802, "step": 4505 }, { "epoch": 0.9160398454970522, "grad_norm": 0.15227414667606354, "learning_rate": 0.0001084714736092749, "loss": 1.3089, "step": 4506 }, { "epoch": 0.9162431388493596, "grad_norm": 0.1522645801305771, "learning_rate": 0.00010845113393674362, "loss": 1.1328, "step": 4507 }, { "epoch": 0.916446432201667, "grad_norm": 0.13502533733844757, "learning_rate": 0.00010843079426421235, "loss": 1.0838, "step": 4508 }, { "epoch": 0.9166497255539744, "grad_norm": 0.1440073549747467, "learning_rate": 0.00010841045459168108, "loss": 1.1632, "step": 4509 }, { "epoch": 0.9168530189062818, "grad_norm": 0.1380605548620224, "learning_rate": 0.00010839011491914981, "loss": 1.0128, "step": 4510 }, { "epoch": 0.9170563122585892, "grad_norm": 0.14944829046726227, "learning_rate": 0.00010836977524661853, "loss": 1.2041, "step": 4511 }, { "epoch": 0.9172596056108965, "grad_norm": 0.13469955325126648, "learning_rate": 0.00010834943557408726, "loss": 1.1208, "step": 4512 }, { "epoch": 0.9174628989632039, "grad_norm": 0.1321646124124527, "learning_rate": 0.000108329095901556, "loss": 0.9967, "step": 4513 }, { "epoch": 0.9176661923155113, "grad_norm": 0.1304931789636612, "learning_rate": 0.00010830875622902472, "loss": 1.0428, "step": 4514 }, { "epoch": 0.9178694856678187, "grad_norm": 0.12599384784698486, "learning_rate": 0.00010828841655649345, "loss": 1.0719, "step": 4515 }, { "epoch": 0.9180727790201261, "grad_norm": 0.12788186967372894, "learning_rate": 0.00010826807688396217, "loss": 1.0551, "step": 4516 }, { "epoch": 0.9182760723724335, "grad_norm": 0.16241435706615448, "learning_rate": 0.00010824773721143091, "loss": 1.3277, "step": 4517 }, { "epoch": 0.9184793657247408, "grad_norm": 0.12297213822603226, "learning_rate": 0.00010822739753889963, "loss": 0.9117, "step": 4518 }, { "epoch": 0.9186826590770482, "grad_norm": 0.13010992109775543, "learning_rate": 0.00010820705786636836, "loss": 0.9313, "step": 4519 }, { "epoch": 0.9188859524293556, "grad_norm": 0.13779647648334503, "learning_rate": 0.00010818671819383708, "loss": 1.1851, "step": 4520 }, { "epoch": 0.919089245781663, "grad_norm": 0.15298517048358917, "learning_rate": 0.00010816637852130582, "loss": 1.2739, "step": 4521 }, { "epoch": 0.9192925391339704, "grad_norm": 0.1386537402868271, "learning_rate": 0.00010814603884877454, "loss": 1.1061, "step": 4522 }, { "epoch": 0.9194958324862778, "grad_norm": 0.14241141080856323, "learning_rate": 0.00010812569917624327, "loss": 1.1449, "step": 4523 }, { "epoch": 0.919699125838585, "grad_norm": 0.14428827166557312, "learning_rate": 0.00010810535950371199, "loss": 0.9884, "step": 4524 }, { "epoch": 0.9199024191908924, "grad_norm": 0.15264667570590973, "learning_rate": 0.00010808501983118073, "loss": 1.2018, "step": 4525 }, { "epoch": 0.9201057125431998, "grad_norm": 0.14881928265094757, "learning_rate": 0.00010806468015864945, "loss": 1.0599, "step": 4526 }, { "epoch": 0.9203090058955072, "grad_norm": 0.12393801659345627, "learning_rate": 0.00010804434048611818, "loss": 1.0684, "step": 4527 }, { "epoch": 0.9205122992478146, "grad_norm": 0.1288781762123108, "learning_rate": 0.0001080240008135869, "loss": 0.9667, "step": 4528 }, { "epoch": 0.9207155926001219, "grad_norm": 0.12993919849395752, "learning_rate": 0.00010800366114105563, "loss": 0.985, "step": 4529 }, { "epoch": 0.9209188859524293, "grad_norm": 0.14005163311958313, "learning_rate": 0.00010798332146852437, "loss": 1.0121, "step": 4530 }, { "epoch": 0.9211221793047367, "grad_norm": 0.1298326551914215, "learning_rate": 0.00010796298179599309, "loss": 1.0357, "step": 4531 }, { "epoch": 0.9213254726570441, "grad_norm": 0.1444677710533142, "learning_rate": 0.00010794264212346182, "loss": 1.0837, "step": 4532 }, { "epoch": 0.9215287660093515, "grad_norm": 0.1372900754213333, "learning_rate": 0.00010792230245093054, "loss": 1.1198, "step": 4533 }, { "epoch": 0.9217320593616589, "grad_norm": 0.13712218403816223, "learning_rate": 0.00010790196277839928, "loss": 1.1203, "step": 4534 }, { "epoch": 0.9219353527139662, "grad_norm": 0.13176938891410828, "learning_rate": 0.000107881623105868, "loss": 0.9814, "step": 4535 }, { "epoch": 0.9221386460662736, "grad_norm": 0.14285510778427124, "learning_rate": 0.00010786128343333673, "loss": 1.0725, "step": 4536 }, { "epoch": 0.922341939418581, "grad_norm": 0.14509692788124084, "learning_rate": 0.00010784094376080545, "loss": 1.2281, "step": 4537 }, { "epoch": 0.9225452327708884, "grad_norm": 0.12854382395744324, "learning_rate": 0.00010782060408827419, "loss": 0.95, "step": 4538 }, { "epoch": 0.9227485261231958, "grad_norm": 0.13784833252429962, "learning_rate": 0.00010780026441574291, "loss": 1.0091, "step": 4539 }, { "epoch": 0.9229518194755032, "grad_norm": 0.12507863342761993, "learning_rate": 0.00010777992474321164, "loss": 0.9746, "step": 4540 }, { "epoch": 0.9231551128278105, "grad_norm": 0.14005503058433533, "learning_rate": 0.00010775958507068036, "loss": 0.9599, "step": 4541 }, { "epoch": 0.9233584061801179, "grad_norm": 0.15629933774471283, "learning_rate": 0.0001077392453981491, "loss": 1.1292, "step": 4542 }, { "epoch": 0.9235616995324253, "grad_norm": 0.12826746702194214, "learning_rate": 0.00010771890572561782, "loss": 1.0791, "step": 4543 }, { "epoch": 0.9237649928847327, "grad_norm": 0.1537964642047882, "learning_rate": 0.00010769856605308655, "loss": 1.2648, "step": 4544 }, { "epoch": 0.9239682862370401, "grad_norm": 0.13459934294223785, "learning_rate": 0.00010767822638055527, "loss": 1.1297, "step": 4545 }, { "epoch": 0.9241715795893475, "grad_norm": 0.1457410752773285, "learning_rate": 0.00010765788670802401, "loss": 1.0294, "step": 4546 }, { "epoch": 0.9243748729416548, "grad_norm": 0.12394455820322037, "learning_rate": 0.00010763754703549274, "loss": 0.9905, "step": 4547 }, { "epoch": 0.9245781662939622, "grad_norm": 0.14204509556293488, "learning_rate": 0.00010761720736296146, "loss": 1.1691, "step": 4548 }, { "epoch": 0.9247814596462696, "grad_norm": 0.1345042586326599, "learning_rate": 0.00010759686769043019, "loss": 1.0468, "step": 4549 }, { "epoch": 0.924984752998577, "grad_norm": 0.13902144134044647, "learning_rate": 0.00010757652801789892, "loss": 1.0194, "step": 4550 }, { "epoch": 0.9251880463508844, "grad_norm": 0.1317700892686844, "learning_rate": 0.00010755618834536765, "loss": 0.923, "step": 4551 }, { "epoch": 0.9253913397031917, "grad_norm": 0.15080450475215912, "learning_rate": 0.00010753584867283637, "loss": 1.2244, "step": 4552 }, { "epoch": 0.925594633055499, "grad_norm": 0.14415398240089417, "learning_rate": 0.0001075155090003051, "loss": 1.0809, "step": 4553 }, { "epoch": 0.9257979264078064, "grad_norm": 0.12147921323776245, "learning_rate": 0.00010749516932777383, "loss": 0.8445, "step": 4554 }, { "epoch": 0.9260012197601138, "grad_norm": 0.1352618932723999, "learning_rate": 0.00010747482965524256, "loss": 1.0397, "step": 4555 }, { "epoch": 0.9262045131124212, "grad_norm": 0.1354973316192627, "learning_rate": 0.00010745448998271128, "loss": 1.0103, "step": 4556 }, { "epoch": 0.9264078064647286, "grad_norm": 0.13657426834106445, "learning_rate": 0.00010743415031018001, "loss": 0.9979, "step": 4557 }, { "epoch": 0.9266110998170359, "grad_norm": 0.13294103741645813, "learning_rate": 0.00010741381063764875, "loss": 0.9317, "step": 4558 }, { "epoch": 0.9268143931693433, "grad_norm": 0.14303997159004211, "learning_rate": 0.00010739347096511747, "loss": 1.1488, "step": 4559 }, { "epoch": 0.9270176865216507, "grad_norm": 0.12142444401979446, "learning_rate": 0.0001073731312925862, "loss": 0.9833, "step": 4560 }, { "epoch": 0.9272209798739581, "grad_norm": 0.1350148767232895, "learning_rate": 0.00010735279162005492, "loss": 0.9737, "step": 4561 }, { "epoch": 0.9274242732262655, "grad_norm": 0.15613560378551483, "learning_rate": 0.00010733245194752366, "loss": 1.1749, "step": 4562 }, { "epoch": 0.9276275665785729, "grad_norm": 0.13186268508434296, "learning_rate": 0.00010731211227499238, "loss": 1.0103, "step": 4563 }, { "epoch": 0.9278308599308802, "grad_norm": 0.14699916541576385, "learning_rate": 0.0001072917726024611, "loss": 1.1501, "step": 4564 }, { "epoch": 0.9280341532831876, "grad_norm": 0.13133716583251953, "learning_rate": 0.00010727143292992983, "loss": 1.026, "step": 4565 }, { "epoch": 0.928237446635495, "grad_norm": 0.1365920603275299, "learning_rate": 0.00010725109325739857, "loss": 1.0866, "step": 4566 }, { "epoch": 0.9284407399878024, "grad_norm": 0.12985709309577942, "learning_rate": 0.0001072307535848673, "loss": 1.1098, "step": 4567 }, { "epoch": 0.9286440333401098, "grad_norm": 0.14012043178081512, "learning_rate": 0.00010721041391233602, "loss": 0.9724, "step": 4568 }, { "epoch": 0.9288473266924172, "grad_norm": 0.12195601314306259, "learning_rate": 0.00010719007423980474, "loss": 0.9906, "step": 4569 }, { "epoch": 0.9290506200447245, "grad_norm": 0.12102338671684265, "learning_rate": 0.00010716973456727347, "loss": 0.8993, "step": 4570 }, { "epoch": 0.9292539133970319, "grad_norm": 0.16343897581100464, "learning_rate": 0.0001071493948947422, "loss": 1.3669, "step": 4571 }, { "epoch": 0.9294572067493393, "grad_norm": 0.12324689328670502, "learning_rate": 0.00010712905522221093, "loss": 1.018, "step": 4572 }, { "epoch": 0.9296605001016467, "grad_norm": 0.14391222596168518, "learning_rate": 0.00010710871554967965, "loss": 1.0502, "step": 4573 }, { "epoch": 0.9298637934539541, "grad_norm": 0.13690593838691711, "learning_rate": 0.00010708837587714838, "loss": 1.0015, "step": 4574 }, { "epoch": 0.9300670868062615, "grad_norm": 0.11955592036247253, "learning_rate": 0.00010706803620461712, "loss": 0.8207, "step": 4575 }, { "epoch": 0.9302703801585688, "grad_norm": 0.12728698551654816, "learning_rate": 0.00010704769653208584, "loss": 0.8813, "step": 4576 }, { "epoch": 0.9304736735108762, "grad_norm": 0.14534975588321686, "learning_rate": 0.00010702735685955457, "loss": 1.0923, "step": 4577 }, { "epoch": 0.9306769668631836, "grad_norm": 0.12908664345741272, "learning_rate": 0.00010700701718702329, "loss": 1.1333, "step": 4578 }, { "epoch": 0.930880260215491, "grad_norm": 0.14262458682060242, "learning_rate": 0.00010698667751449203, "loss": 1.0474, "step": 4579 }, { "epoch": 0.9310835535677984, "grad_norm": 0.13423089683055878, "learning_rate": 0.00010696633784196075, "loss": 1.0581, "step": 4580 }, { "epoch": 0.9312868469201057, "grad_norm": 0.1267002373933792, "learning_rate": 0.00010694599816942948, "loss": 0.958, "step": 4581 }, { "epoch": 0.9314901402724131, "grad_norm": 0.13516265153884888, "learning_rate": 0.0001069256584968982, "loss": 1.0636, "step": 4582 }, { "epoch": 0.9316934336247205, "grad_norm": 0.14232146739959717, "learning_rate": 0.00010690531882436694, "loss": 1.1371, "step": 4583 }, { "epoch": 0.9318967269770279, "grad_norm": 0.13286015391349792, "learning_rate": 0.00010688497915183566, "loss": 0.9935, "step": 4584 }, { "epoch": 0.9321000203293353, "grad_norm": 0.1338234841823578, "learning_rate": 0.00010686463947930439, "loss": 1.0371, "step": 4585 }, { "epoch": 0.9323033136816427, "grad_norm": 0.13574783504009247, "learning_rate": 0.00010684429980677311, "loss": 1.0583, "step": 4586 }, { "epoch": 0.9325066070339499, "grad_norm": 0.1322636902332306, "learning_rate": 0.00010682396013424185, "loss": 0.9872, "step": 4587 }, { "epoch": 0.9327099003862573, "grad_norm": 0.13177639245986938, "learning_rate": 0.00010680362046171057, "loss": 0.9844, "step": 4588 }, { "epoch": 0.9329131937385647, "grad_norm": 0.13709305226802826, "learning_rate": 0.0001067832807891793, "loss": 1.0352, "step": 4589 }, { "epoch": 0.9331164870908721, "grad_norm": 0.13158872723579407, "learning_rate": 0.00010676294111664802, "loss": 0.9291, "step": 4590 }, { "epoch": 0.9333197804431795, "grad_norm": 0.1440209448337555, "learning_rate": 0.00010674260144411676, "loss": 1.1299, "step": 4591 }, { "epoch": 0.9335230737954869, "grad_norm": 0.14185591042041779, "learning_rate": 0.00010672226177158549, "loss": 1.0265, "step": 4592 }, { "epoch": 0.9337263671477942, "grad_norm": 0.13720087707042694, "learning_rate": 0.00010670192209905421, "loss": 1.065, "step": 4593 }, { "epoch": 0.9339296605001016, "grad_norm": 0.1312158852815628, "learning_rate": 0.00010668158242652294, "loss": 0.97, "step": 4594 }, { "epoch": 0.934132953852409, "grad_norm": 0.13442127406597137, "learning_rate": 0.00010666124275399167, "loss": 1.0517, "step": 4595 }, { "epoch": 0.9343362472047164, "grad_norm": 0.1302952766418457, "learning_rate": 0.0001066409030814604, "loss": 1.0052, "step": 4596 }, { "epoch": 0.9345395405570238, "grad_norm": 0.14878568053245544, "learning_rate": 0.00010662056340892912, "loss": 1.2183, "step": 4597 }, { "epoch": 0.9347428339093312, "grad_norm": 0.13958996534347534, "learning_rate": 0.00010660022373639785, "loss": 1.0758, "step": 4598 }, { "epoch": 0.9349461272616385, "grad_norm": 0.14994315803050995, "learning_rate": 0.00010657988406386658, "loss": 1.1696, "step": 4599 }, { "epoch": 0.9351494206139459, "grad_norm": 0.13476385176181793, "learning_rate": 0.00010655954439133531, "loss": 0.9507, "step": 4600 }, { "epoch": 0.9353527139662533, "grad_norm": 0.13115908205509186, "learning_rate": 0.00010653920471880403, "loss": 1.1128, "step": 4601 }, { "epoch": 0.9355560073185607, "grad_norm": 0.12260119616985321, "learning_rate": 0.00010651886504627276, "loss": 0.8933, "step": 4602 }, { "epoch": 0.9357593006708681, "grad_norm": 0.12978796660900116, "learning_rate": 0.0001064985253737415, "loss": 1.0094, "step": 4603 }, { "epoch": 0.9359625940231754, "grad_norm": 0.13168974220752716, "learning_rate": 0.00010647818570121022, "loss": 1.0007, "step": 4604 }, { "epoch": 0.9361658873754828, "grad_norm": 0.13790659606456757, "learning_rate": 0.00010645784602867894, "loss": 0.9609, "step": 4605 }, { "epoch": 0.9363691807277902, "grad_norm": 0.13622581958770752, "learning_rate": 0.00010643750635614767, "loss": 0.9742, "step": 4606 }, { "epoch": 0.9365724740800976, "grad_norm": 0.13826538622379303, "learning_rate": 0.00010641716668361641, "loss": 1.1061, "step": 4607 }, { "epoch": 0.936775767432405, "grad_norm": 0.13676097989082336, "learning_rate": 0.00010639682701108513, "loss": 1.1522, "step": 4608 }, { "epoch": 0.9369790607847124, "grad_norm": 0.13370144367218018, "learning_rate": 0.00010637648733855386, "loss": 0.9657, "step": 4609 }, { "epoch": 0.9371823541370197, "grad_norm": 0.12708503007888794, "learning_rate": 0.00010635614766602258, "loss": 1.0229, "step": 4610 }, { "epoch": 0.9373856474893271, "grad_norm": 0.14301814138889313, "learning_rate": 0.00010633580799349132, "loss": 0.9909, "step": 4611 }, { "epoch": 0.9375889408416345, "grad_norm": 0.14644454419612885, "learning_rate": 0.00010631546832096004, "loss": 1.1386, "step": 4612 }, { "epoch": 0.9377922341939419, "grad_norm": 0.13054661452770233, "learning_rate": 0.00010629512864842877, "loss": 0.9233, "step": 4613 }, { "epoch": 0.9379955275462493, "grad_norm": 0.13898830115795135, "learning_rate": 0.00010627478897589749, "loss": 1.2265, "step": 4614 }, { "epoch": 0.9381988208985567, "grad_norm": 0.13503706455230713, "learning_rate": 0.00010625444930336622, "loss": 1.0694, "step": 4615 }, { "epoch": 0.938402114250864, "grad_norm": 0.12382601946592331, "learning_rate": 0.00010623410963083495, "loss": 0.8756, "step": 4616 }, { "epoch": 0.9386054076031713, "grad_norm": 0.12934790551662445, "learning_rate": 0.00010621376995830368, "loss": 1.0435, "step": 4617 }, { "epoch": 0.9388087009554787, "grad_norm": 0.14618442952632904, "learning_rate": 0.0001061934302857724, "loss": 1.209, "step": 4618 }, { "epoch": 0.9390119943077861, "grad_norm": 0.1417202651500702, "learning_rate": 0.00010617309061324113, "loss": 1.1805, "step": 4619 }, { "epoch": 0.9392152876600935, "grad_norm": 0.14158600568771362, "learning_rate": 0.00010615275094070987, "loss": 1.0833, "step": 4620 }, { "epoch": 0.9394185810124009, "grad_norm": 0.13389776647090912, "learning_rate": 0.00010613241126817859, "loss": 1.1245, "step": 4621 }, { "epoch": 0.9396218743647082, "grad_norm": 0.1260322481393814, "learning_rate": 0.00010611207159564731, "loss": 0.8908, "step": 4622 }, { "epoch": 0.9398251677170156, "grad_norm": 0.1375802904367447, "learning_rate": 0.00010609173192311604, "loss": 1.0062, "step": 4623 }, { "epoch": 0.940028461069323, "grad_norm": 0.13388384878635406, "learning_rate": 0.00010607139225058478, "loss": 1.082, "step": 4624 }, { "epoch": 0.9402317544216304, "grad_norm": 0.13197797536849976, "learning_rate": 0.0001060510525780535, "loss": 0.9478, "step": 4625 }, { "epoch": 0.9404350477739378, "grad_norm": 0.1293218582868576, "learning_rate": 0.00010603071290552223, "loss": 0.9416, "step": 4626 }, { "epoch": 0.9406383411262452, "grad_norm": 0.1269448846578598, "learning_rate": 0.00010601037323299095, "loss": 0.9556, "step": 4627 }, { "epoch": 0.9408416344785525, "grad_norm": 0.15124647319316864, "learning_rate": 0.00010599003356045969, "loss": 1.1097, "step": 4628 }, { "epoch": 0.9410449278308599, "grad_norm": 0.12264547497034073, "learning_rate": 0.00010596969388792841, "loss": 1.0336, "step": 4629 }, { "epoch": 0.9412482211831673, "grad_norm": 0.13190335035324097, "learning_rate": 0.00010594935421539714, "loss": 1.0159, "step": 4630 }, { "epoch": 0.9414515145354747, "grad_norm": 0.13107061386108398, "learning_rate": 0.00010592901454286586, "loss": 1.0756, "step": 4631 }, { "epoch": 0.9416548078877821, "grad_norm": 0.13843277096748352, "learning_rate": 0.0001059086748703346, "loss": 0.9752, "step": 4632 }, { "epoch": 0.9418581012400894, "grad_norm": 0.12323298305273056, "learning_rate": 0.00010588833519780332, "loss": 0.8268, "step": 4633 }, { "epoch": 0.9420613945923968, "grad_norm": 0.136516734957695, "learning_rate": 0.00010586799552527205, "loss": 1.0694, "step": 4634 }, { "epoch": 0.9422646879447042, "grad_norm": 0.13739456236362457, "learning_rate": 0.00010584765585274077, "loss": 1.0251, "step": 4635 }, { "epoch": 0.9424679812970116, "grad_norm": 0.13358846306800842, "learning_rate": 0.00010582731618020951, "loss": 1.1104, "step": 4636 }, { "epoch": 0.942671274649319, "grad_norm": 0.13964349031448364, "learning_rate": 0.00010580697650767824, "loss": 0.987, "step": 4637 }, { "epoch": 0.9428745680016264, "grad_norm": 0.1372976303100586, "learning_rate": 0.00010578663683514696, "loss": 0.9547, "step": 4638 }, { "epoch": 0.9430778613539337, "grad_norm": 0.14359022676944733, "learning_rate": 0.00010576629716261568, "loss": 1.1619, "step": 4639 }, { "epoch": 0.9432811547062411, "grad_norm": 0.12636056542396545, "learning_rate": 0.00010574595749008442, "loss": 0.9485, "step": 4640 }, { "epoch": 0.9434844480585485, "grad_norm": 0.15746049582958221, "learning_rate": 0.00010572561781755315, "loss": 1.2212, "step": 4641 }, { "epoch": 0.9436877414108559, "grad_norm": 0.13888253271579742, "learning_rate": 0.00010570527814502187, "loss": 1.1069, "step": 4642 }, { "epoch": 0.9438910347631633, "grad_norm": 0.12905919551849365, "learning_rate": 0.0001056849384724906, "loss": 1.0055, "step": 4643 }, { "epoch": 0.9440943281154707, "grad_norm": 0.14576807618141174, "learning_rate": 0.00010566459879995933, "loss": 1.1185, "step": 4644 }, { "epoch": 0.944297621467778, "grad_norm": 0.15471163392066956, "learning_rate": 0.00010564425912742806, "loss": 1.2705, "step": 4645 }, { "epoch": 0.9445009148200854, "grad_norm": 0.1389993131160736, "learning_rate": 0.00010562391945489678, "loss": 1.0862, "step": 4646 }, { "epoch": 0.9447042081723928, "grad_norm": 0.1482502818107605, "learning_rate": 0.00010560357978236551, "loss": 1.1031, "step": 4647 }, { "epoch": 0.9449075015247002, "grad_norm": 0.12542898952960968, "learning_rate": 0.00010558324010983425, "loss": 0.8559, "step": 4648 }, { "epoch": 0.9451107948770076, "grad_norm": 0.12403812259435654, "learning_rate": 0.00010556290043730297, "loss": 0.9117, "step": 4649 }, { "epoch": 0.945314088229315, "grad_norm": 0.11655326187610626, "learning_rate": 0.0001055425607647717, "loss": 0.9568, "step": 4650 }, { "epoch": 0.9455173815816222, "grad_norm": 0.13780179619789124, "learning_rate": 0.00010552222109224042, "loss": 1.0537, "step": 4651 }, { "epoch": 0.9457206749339296, "grad_norm": 0.13035158812999725, "learning_rate": 0.00010550188141970916, "loss": 1.0461, "step": 4652 }, { "epoch": 0.945923968286237, "grad_norm": 0.13049277663230896, "learning_rate": 0.00010548154174717788, "loss": 0.8681, "step": 4653 }, { "epoch": 0.9461272616385444, "grad_norm": 0.149881973862648, "learning_rate": 0.0001054612020746466, "loss": 1.2271, "step": 4654 }, { "epoch": 0.9463305549908518, "grad_norm": 0.11799302697181702, "learning_rate": 0.00010544086240211533, "loss": 0.8407, "step": 4655 }, { "epoch": 0.9465338483431591, "grad_norm": 0.16021724045276642, "learning_rate": 0.00010542052272958404, "loss": 1.2769, "step": 4656 }, { "epoch": 0.9467371416954665, "grad_norm": 0.14058107137680054, "learning_rate": 0.00010540018305705279, "loss": 1.0519, "step": 4657 }, { "epoch": 0.9469404350477739, "grad_norm": 0.14473353326320648, "learning_rate": 0.00010537984338452152, "loss": 1.0002, "step": 4658 }, { "epoch": 0.9471437284000813, "grad_norm": 0.12458368390798569, "learning_rate": 0.00010535950371199024, "loss": 0.9717, "step": 4659 }, { "epoch": 0.9473470217523887, "grad_norm": 0.13984310626983643, "learning_rate": 0.00010533916403945897, "loss": 1.0642, "step": 4660 }, { "epoch": 0.9475503151046961, "grad_norm": 0.13739560544490814, "learning_rate": 0.0001053188243669277, "loss": 1.0281, "step": 4661 }, { "epoch": 0.9477536084570034, "grad_norm": 0.1382581740617752, "learning_rate": 0.00010529848469439643, "loss": 1.0816, "step": 4662 }, { "epoch": 0.9479569018093108, "grad_norm": 0.14696218073368073, "learning_rate": 0.00010527814502186515, "loss": 1.2176, "step": 4663 }, { "epoch": 0.9481601951616182, "grad_norm": 0.12849698960781097, "learning_rate": 0.00010525780534933388, "loss": 1.0097, "step": 4664 }, { "epoch": 0.9483634885139256, "grad_norm": 0.14687961339950562, "learning_rate": 0.00010523746567680262, "loss": 1.1417, "step": 4665 }, { "epoch": 0.948566781866233, "grad_norm": 0.14504985511302948, "learning_rate": 0.00010521712600427134, "loss": 1.1261, "step": 4666 }, { "epoch": 0.9487700752185404, "grad_norm": 0.12274103611707687, "learning_rate": 0.00010519678633174006, "loss": 0.9646, "step": 4667 }, { "epoch": 0.9489733685708477, "grad_norm": 0.11958125233650208, "learning_rate": 0.00010517644665920879, "loss": 0.9111, "step": 4668 }, { "epoch": 0.9491766619231551, "grad_norm": 0.14991825819015503, "learning_rate": 0.00010515610698667753, "loss": 1.1415, "step": 4669 }, { "epoch": 0.9493799552754625, "grad_norm": 0.14164093136787415, "learning_rate": 0.00010513576731414625, "loss": 1.2477, "step": 4670 }, { "epoch": 0.9495832486277699, "grad_norm": 0.13947711884975433, "learning_rate": 0.00010511542764161498, "loss": 1.0371, "step": 4671 }, { "epoch": 0.9497865419800773, "grad_norm": 0.12946373224258423, "learning_rate": 0.0001050950879690837, "loss": 1.0401, "step": 4672 }, { "epoch": 0.9499898353323847, "grad_norm": 0.1340869963169098, "learning_rate": 0.00010507474829655244, "loss": 1.0419, "step": 4673 }, { "epoch": 0.950193128684692, "grad_norm": 0.1419667899608612, "learning_rate": 0.00010505440862402116, "loss": 1.0027, "step": 4674 }, { "epoch": 0.9503964220369994, "grad_norm": 0.13332538306713104, "learning_rate": 0.00010503406895148989, "loss": 0.8884, "step": 4675 }, { "epoch": 0.9505997153893068, "grad_norm": 0.13271865248680115, "learning_rate": 0.00010501372927895861, "loss": 1.005, "step": 4676 }, { "epoch": 0.9508030087416142, "grad_norm": 0.15168990194797516, "learning_rate": 0.00010499338960642735, "loss": 1.168, "step": 4677 }, { "epoch": 0.9510063020939216, "grad_norm": 0.12381100654602051, "learning_rate": 0.00010497304993389607, "loss": 1.0467, "step": 4678 }, { "epoch": 0.951209595446229, "grad_norm": 0.14847104251384735, "learning_rate": 0.0001049527102613648, "loss": 1.1576, "step": 4679 }, { "epoch": 0.9514128887985362, "grad_norm": 0.1450118124485016, "learning_rate": 0.00010493237058883352, "loss": 1.0424, "step": 4680 }, { "epoch": 0.9516161821508436, "grad_norm": 0.13652002811431885, "learning_rate": 0.00010491203091630226, "loss": 1.0449, "step": 4681 }, { "epoch": 0.951819475503151, "grad_norm": 0.14836739003658295, "learning_rate": 0.00010489169124377099, "loss": 1.1076, "step": 4682 }, { "epoch": 0.9520227688554584, "grad_norm": 0.12465627491474152, "learning_rate": 0.00010487135157123971, "loss": 1.0594, "step": 4683 }, { "epoch": 0.9522260622077658, "grad_norm": 0.14319440722465515, "learning_rate": 0.00010485101189870843, "loss": 1.0954, "step": 4684 }, { "epoch": 0.9524293555600731, "grad_norm": 0.1305132359266281, "learning_rate": 0.00010483067222617717, "loss": 1.126, "step": 4685 }, { "epoch": 0.9526326489123805, "grad_norm": 0.14411622285842896, "learning_rate": 0.0001048103325536459, "loss": 1.069, "step": 4686 }, { "epoch": 0.9528359422646879, "grad_norm": 0.1547628790140152, "learning_rate": 0.00010478999288111462, "loss": 1.185, "step": 4687 }, { "epoch": 0.9530392356169953, "grad_norm": 0.1339641660451889, "learning_rate": 0.00010476965320858335, "loss": 1.1261, "step": 4688 }, { "epoch": 0.9532425289693027, "grad_norm": 0.15180015563964844, "learning_rate": 0.00010474931353605208, "loss": 1.2143, "step": 4689 }, { "epoch": 0.9534458223216101, "grad_norm": 0.13662739098072052, "learning_rate": 0.00010472897386352081, "loss": 1.1649, "step": 4690 }, { "epoch": 0.9536491156739174, "grad_norm": 0.14575301110744476, "learning_rate": 0.00010470863419098953, "loss": 1.0256, "step": 4691 }, { "epoch": 0.9538524090262248, "grad_norm": 0.13986723124980927, "learning_rate": 0.00010468829451845826, "loss": 1.1673, "step": 4692 }, { "epoch": 0.9540557023785322, "grad_norm": 0.14431442320346832, "learning_rate": 0.000104667954845927, "loss": 1.1316, "step": 4693 }, { "epoch": 0.9542589957308396, "grad_norm": 0.13795843720436096, "learning_rate": 0.00010464761517339572, "loss": 1.0331, "step": 4694 }, { "epoch": 0.954462289083147, "grad_norm": 0.1303069293498993, "learning_rate": 0.00010462727550086444, "loss": 0.9968, "step": 4695 }, { "epoch": 0.9546655824354544, "grad_norm": 0.15158216655254364, "learning_rate": 0.00010460693582833317, "loss": 1.1864, "step": 4696 }, { "epoch": 0.9548688757877617, "grad_norm": 0.1332157850265503, "learning_rate": 0.00010458659615580188, "loss": 1.0648, "step": 4697 }, { "epoch": 0.9550721691400691, "grad_norm": 0.14668650925159454, "learning_rate": 0.00010456625648327063, "loss": 1.0517, "step": 4698 }, { "epoch": 0.9552754624923765, "grad_norm": 0.15106475353240967, "learning_rate": 0.00010454591681073936, "loss": 1.2643, "step": 4699 }, { "epoch": 0.9554787558446839, "grad_norm": 0.1326945424079895, "learning_rate": 0.00010452557713820808, "loss": 1.0154, "step": 4700 }, { "epoch": 0.9556820491969913, "grad_norm": 0.14156071841716766, "learning_rate": 0.00010450523746567679, "loss": 0.988, "step": 4701 }, { "epoch": 0.9558853425492987, "grad_norm": 0.13619528710842133, "learning_rate": 0.00010448489779314554, "loss": 0.9761, "step": 4702 }, { "epoch": 0.956088635901606, "grad_norm": 0.1572863608598709, "learning_rate": 0.00010446455812061427, "loss": 1.2895, "step": 4703 }, { "epoch": 0.9562919292539134, "grad_norm": 0.13423630595207214, "learning_rate": 0.00010444421844808299, "loss": 0.9817, "step": 4704 }, { "epoch": 0.9564952226062208, "grad_norm": 0.13150696456432343, "learning_rate": 0.0001044238787755517, "loss": 0.9353, "step": 4705 }, { "epoch": 0.9566985159585282, "grad_norm": 0.13118426501750946, "learning_rate": 0.00010440353910302045, "loss": 0.956, "step": 4706 }, { "epoch": 0.9569018093108356, "grad_norm": 0.1445060670375824, "learning_rate": 0.00010438319943048918, "loss": 1.1343, "step": 4707 }, { "epoch": 0.957105102663143, "grad_norm": 0.12421584874391556, "learning_rate": 0.0001043628597579579, "loss": 0.9257, "step": 4708 }, { "epoch": 0.9573083960154503, "grad_norm": 0.1518603265285492, "learning_rate": 0.00010434252008542661, "loss": 1.0934, "step": 4709 }, { "epoch": 0.9575116893677577, "grad_norm": 0.13642071187496185, "learning_rate": 0.00010432218041289537, "loss": 1.1008, "step": 4710 }, { "epoch": 0.9577149827200651, "grad_norm": 0.13501964509487152, "learning_rate": 0.00010430184074036409, "loss": 1.0246, "step": 4711 }, { "epoch": 0.9579182760723725, "grad_norm": 0.14059419929981232, "learning_rate": 0.00010428150106783281, "loss": 1.0548, "step": 4712 }, { "epoch": 0.9581215694246799, "grad_norm": 0.13295401632785797, "learning_rate": 0.00010426116139530153, "loss": 1.0868, "step": 4713 }, { "epoch": 0.9583248627769871, "grad_norm": 0.1419042944908142, "learning_rate": 0.00010424082172277028, "loss": 1.0969, "step": 4714 }, { "epoch": 0.9585281561292945, "grad_norm": 0.13607093691825867, "learning_rate": 0.000104220482050239, "loss": 1.1988, "step": 4715 }, { "epoch": 0.9587314494816019, "grad_norm": 0.15295760333538055, "learning_rate": 0.00010420014237770773, "loss": 1.1384, "step": 4716 }, { "epoch": 0.9589347428339093, "grad_norm": 0.13830776512622833, "learning_rate": 0.00010417980270517644, "loss": 1.1107, "step": 4717 }, { "epoch": 0.9591380361862167, "grad_norm": 0.15392519533634186, "learning_rate": 0.00010415946303264519, "loss": 1.3216, "step": 4718 }, { "epoch": 0.9593413295385241, "grad_norm": 0.1344476342201233, "learning_rate": 0.00010413912336011391, "loss": 1.1368, "step": 4719 }, { "epoch": 0.9595446228908314, "grad_norm": 0.142112597823143, "learning_rate": 0.00010411878368758264, "loss": 0.997, "step": 4720 }, { "epoch": 0.9597479162431388, "grad_norm": 0.12999044358730316, "learning_rate": 0.00010409844401505136, "loss": 0.978, "step": 4721 }, { "epoch": 0.9599512095954462, "grad_norm": 0.13146638870239258, "learning_rate": 0.0001040781043425201, "loss": 1.0563, "step": 4722 }, { "epoch": 0.9601545029477536, "grad_norm": 0.15361693501472473, "learning_rate": 0.00010405776466998882, "loss": 1.1783, "step": 4723 }, { "epoch": 0.960357796300061, "grad_norm": 0.13315477967262268, "learning_rate": 0.00010403742499745755, "loss": 0.97, "step": 4724 }, { "epoch": 0.9605610896523684, "grad_norm": 0.13661111891269684, "learning_rate": 0.00010401708532492627, "loss": 1.06, "step": 4725 }, { "epoch": 0.9607643830046757, "grad_norm": 0.13284547626972198, "learning_rate": 0.00010399674565239501, "loss": 1.0313, "step": 4726 }, { "epoch": 0.9609676763569831, "grad_norm": 0.13400302827358246, "learning_rate": 0.00010397640597986374, "loss": 1.0858, "step": 4727 }, { "epoch": 0.9611709697092905, "grad_norm": 0.12329299002885818, "learning_rate": 0.00010395606630733246, "loss": 0.8973, "step": 4728 }, { "epoch": 0.9613742630615979, "grad_norm": 0.14118091762065887, "learning_rate": 0.00010393572663480118, "loss": 1.0407, "step": 4729 }, { "epoch": 0.9615775564139053, "grad_norm": 0.13104970753192902, "learning_rate": 0.00010391538696226992, "loss": 0.9438, "step": 4730 }, { "epoch": 0.9617808497662127, "grad_norm": 0.12976235151290894, "learning_rate": 0.00010389504728973865, "loss": 0.989, "step": 4731 }, { "epoch": 0.96198414311852, "grad_norm": 0.12546932697296143, "learning_rate": 0.00010387470761720737, "loss": 0.9445, "step": 4732 }, { "epoch": 0.9621874364708274, "grad_norm": 0.14191336929798126, "learning_rate": 0.0001038543679446761, "loss": 1.132, "step": 4733 }, { "epoch": 0.9623907298231348, "grad_norm": 0.14218741655349731, "learning_rate": 0.00010383402827214483, "loss": 1.046, "step": 4734 }, { "epoch": 0.9625940231754422, "grad_norm": 0.15692010521888733, "learning_rate": 0.00010381368859961356, "loss": 1.1352, "step": 4735 }, { "epoch": 0.9627973165277496, "grad_norm": 0.1295771300792694, "learning_rate": 0.00010379334892708228, "loss": 1.0675, "step": 4736 }, { "epoch": 0.9630006098800569, "grad_norm": 0.15568415820598602, "learning_rate": 0.00010377300925455101, "loss": 1.132, "step": 4737 }, { "epoch": 0.9632039032323643, "grad_norm": 0.12996648252010345, "learning_rate": 0.00010375266958201972, "loss": 1.0338, "step": 4738 }, { "epoch": 0.9634071965846717, "grad_norm": 0.14026613533496857, "learning_rate": 0.00010373232990948847, "loss": 1.1937, "step": 4739 }, { "epoch": 0.9636104899369791, "grad_norm": 0.13028547167778015, "learning_rate": 0.0001037119902369572, "loss": 0.8526, "step": 4740 }, { "epoch": 0.9638137832892865, "grad_norm": 0.12742145359516144, "learning_rate": 0.00010369165056442592, "loss": 0.85, "step": 4741 }, { "epoch": 0.9640170766415939, "grad_norm": 0.11644089221954346, "learning_rate": 0.00010367131089189463, "loss": 0.8878, "step": 4742 }, { "epoch": 0.9642203699939011, "grad_norm": 0.1318705528974533, "learning_rate": 0.00010365097121936338, "loss": 1.0346, "step": 4743 }, { "epoch": 0.9644236633462085, "grad_norm": 0.14477600157260895, "learning_rate": 0.0001036306315468321, "loss": 1.1933, "step": 4744 }, { "epoch": 0.964626956698516, "grad_norm": 0.13914555311203003, "learning_rate": 0.00010361029187430083, "loss": 1.1292, "step": 4745 }, { "epoch": 0.9648302500508233, "grad_norm": 0.1304524540901184, "learning_rate": 0.00010358995220176954, "loss": 0.9745, "step": 4746 }, { "epoch": 0.9650335434031307, "grad_norm": 0.1401352435350418, "learning_rate": 0.00010356961252923829, "loss": 1.072, "step": 4747 }, { "epoch": 0.9652368367554381, "grad_norm": 0.1341739445924759, "learning_rate": 0.00010354927285670702, "loss": 0.9751, "step": 4748 }, { "epoch": 0.9654401301077454, "grad_norm": 0.13538521528244019, "learning_rate": 0.00010352893318417574, "loss": 1.0814, "step": 4749 }, { "epoch": 0.9656434234600528, "grad_norm": 0.14047326147556305, "learning_rate": 0.00010350859351164445, "loss": 1.1741, "step": 4750 }, { "epoch": 0.9658467168123602, "grad_norm": 0.13722112774848938, "learning_rate": 0.0001034882538391132, "loss": 0.928, "step": 4751 }, { "epoch": 0.9660500101646676, "grad_norm": 0.12714186310768127, "learning_rate": 0.00010346791416658193, "loss": 0.7725, "step": 4752 }, { "epoch": 0.966253303516975, "grad_norm": 0.12423626333475113, "learning_rate": 0.00010344757449405065, "loss": 0.8907, "step": 4753 }, { "epoch": 0.9664565968692824, "grad_norm": 0.1605733186006546, "learning_rate": 0.00010342723482151936, "loss": 1.2174, "step": 4754 }, { "epoch": 0.9666598902215897, "grad_norm": 0.14010462164878845, "learning_rate": 0.00010340689514898812, "loss": 1.0363, "step": 4755 }, { "epoch": 0.9668631835738971, "grad_norm": 0.13562703132629395, "learning_rate": 0.00010338655547645684, "loss": 0.9911, "step": 4756 }, { "epoch": 0.9670664769262045, "grad_norm": 0.1469573825597763, "learning_rate": 0.00010336621580392556, "loss": 1.0719, "step": 4757 }, { "epoch": 0.9672697702785119, "grad_norm": 0.1374790370464325, "learning_rate": 0.00010334587613139428, "loss": 1.1695, "step": 4758 }, { "epoch": 0.9674730636308193, "grad_norm": 0.12282276153564453, "learning_rate": 0.00010332553645886303, "loss": 0.9288, "step": 4759 }, { "epoch": 0.9676763569831267, "grad_norm": 0.12597915530204773, "learning_rate": 0.00010330519678633175, "loss": 0.9853, "step": 4760 }, { "epoch": 0.967879650335434, "grad_norm": 0.13501757383346558, "learning_rate": 0.00010328485711380048, "loss": 1.0153, "step": 4761 }, { "epoch": 0.9680829436877414, "grad_norm": 0.1333313286304474, "learning_rate": 0.00010326451744126919, "loss": 1.0339, "step": 4762 }, { "epoch": 0.9682862370400488, "grad_norm": 0.13838358223438263, "learning_rate": 0.00010324417776873794, "loss": 0.9584, "step": 4763 }, { "epoch": 0.9684895303923562, "grad_norm": 0.14973820745944977, "learning_rate": 0.00010322383809620666, "loss": 0.9757, "step": 4764 }, { "epoch": 0.9686928237446636, "grad_norm": 0.12162914872169495, "learning_rate": 0.00010320349842367539, "loss": 0.9844, "step": 4765 }, { "epoch": 0.9688961170969709, "grad_norm": 0.1409245729446411, "learning_rate": 0.0001031831587511441, "loss": 1.0499, "step": 4766 }, { "epoch": 0.9690994104492783, "grad_norm": 0.1342407464981079, "learning_rate": 0.00010316281907861285, "loss": 1.0489, "step": 4767 }, { "epoch": 0.9693027038015857, "grad_norm": 0.12758475542068481, "learning_rate": 0.00010314247940608157, "loss": 0.8951, "step": 4768 }, { "epoch": 0.9695059971538931, "grad_norm": 0.13202863931655884, "learning_rate": 0.0001031221397335503, "loss": 1.0063, "step": 4769 }, { "epoch": 0.9697092905062005, "grad_norm": 0.14444518089294434, "learning_rate": 0.00010310180006101901, "loss": 1.1248, "step": 4770 }, { "epoch": 0.9699125838585079, "grad_norm": 0.12190812826156616, "learning_rate": 0.00010308146038848776, "loss": 0.9866, "step": 4771 }, { "epoch": 0.9701158772108152, "grad_norm": 0.1404780000448227, "learning_rate": 0.00010306112071595649, "loss": 1.1731, "step": 4772 }, { "epoch": 0.9703191705631226, "grad_norm": 0.13559852540493011, "learning_rate": 0.00010304078104342521, "loss": 0.8862, "step": 4773 }, { "epoch": 0.97052246391543, "grad_norm": 0.12813040614128113, "learning_rate": 0.00010302044137089392, "loss": 1.0137, "step": 4774 }, { "epoch": 0.9707257572677374, "grad_norm": 0.14801639318466187, "learning_rate": 0.00010300010169836267, "loss": 1.1613, "step": 4775 }, { "epoch": 0.9709290506200448, "grad_norm": 0.13504531979560852, "learning_rate": 0.0001029797620258314, "loss": 1.0006, "step": 4776 }, { "epoch": 0.9711323439723522, "grad_norm": 0.14465901255607605, "learning_rate": 0.00010295942235330012, "loss": 1.2234, "step": 4777 }, { "epoch": 0.9713356373246594, "grad_norm": 0.1441653072834015, "learning_rate": 0.00010293908268076885, "loss": 1.1343, "step": 4778 }, { "epoch": 0.9715389306769668, "grad_norm": 0.14294147491455078, "learning_rate": 0.00010291874300823756, "loss": 1.0931, "step": 4779 }, { "epoch": 0.9717422240292742, "grad_norm": 0.13316182792186737, "learning_rate": 0.00010289840333570631, "loss": 0.9781, "step": 4780 }, { "epoch": 0.9719455173815816, "grad_norm": 0.12570516765117645, "learning_rate": 0.00010287806366317503, "loss": 0.9451, "step": 4781 }, { "epoch": 0.972148810733889, "grad_norm": 0.14120420813560486, "learning_rate": 0.00010285772399064376, "loss": 1.0823, "step": 4782 }, { "epoch": 0.9723521040861964, "grad_norm": 0.12957200407981873, "learning_rate": 0.00010283738431811247, "loss": 0.9029, "step": 4783 }, { "epoch": 0.9725553974385037, "grad_norm": 0.1534145623445511, "learning_rate": 0.00010281704464558122, "loss": 1.0809, "step": 4784 }, { "epoch": 0.9727586907908111, "grad_norm": 0.1441192328929901, "learning_rate": 0.00010279670497304994, "loss": 1.0464, "step": 4785 }, { "epoch": 0.9729619841431185, "grad_norm": 0.14043961465358734, "learning_rate": 0.00010277636530051867, "loss": 1.2524, "step": 4786 }, { "epoch": 0.9731652774954259, "grad_norm": 0.1316906362771988, "learning_rate": 0.00010275602562798738, "loss": 0.9884, "step": 4787 }, { "epoch": 0.9733685708477333, "grad_norm": 0.14289647340774536, "learning_rate": 0.00010273568595545613, "loss": 1.0516, "step": 4788 }, { "epoch": 0.9735718642000406, "grad_norm": 0.14448580145835876, "learning_rate": 0.00010271534628292486, "loss": 1.0681, "step": 4789 }, { "epoch": 0.973775157552348, "grad_norm": 0.1232059895992279, "learning_rate": 0.00010269500661039358, "loss": 0.8642, "step": 4790 }, { "epoch": 0.9739784509046554, "grad_norm": 0.14698442816734314, "learning_rate": 0.00010267466693786229, "loss": 1.3623, "step": 4791 }, { "epoch": 0.9741817442569628, "grad_norm": 0.13110321760177612, "learning_rate": 0.00010265432726533104, "loss": 0.9757, "step": 4792 }, { "epoch": 0.9743850376092702, "grad_norm": 0.14491407573223114, "learning_rate": 0.00010263398759279977, "loss": 1.0556, "step": 4793 }, { "epoch": 0.9745883309615776, "grad_norm": 0.13034255802631378, "learning_rate": 0.00010261364792026849, "loss": 1.0508, "step": 4794 }, { "epoch": 0.9747916243138849, "grad_norm": 0.14367951452732086, "learning_rate": 0.0001025933082477372, "loss": 1.0724, "step": 4795 }, { "epoch": 0.9749949176661923, "grad_norm": 0.14647988975048065, "learning_rate": 0.00010257296857520595, "loss": 1.2706, "step": 4796 }, { "epoch": 0.9751982110184997, "grad_norm": 0.1294867992401123, "learning_rate": 0.00010255262890267468, "loss": 0.8556, "step": 4797 }, { "epoch": 0.9754015043708071, "grad_norm": 0.1383471041917801, "learning_rate": 0.0001025322892301434, "loss": 1.0519, "step": 4798 }, { "epoch": 0.9756047977231145, "grad_norm": 0.12977120280265808, "learning_rate": 0.00010251194955761211, "loss": 0.8823, "step": 4799 }, { "epoch": 0.9758080910754219, "grad_norm": 0.13635462522506714, "learning_rate": 0.00010249160988508087, "loss": 1.0164, "step": 4800 }, { "epoch": 0.9760113844277292, "grad_norm": 0.14203500747680664, "learning_rate": 0.00010247127021254959, "loss": 1.0384, "step": 4801 }, { "epoch": 0.9762146777800366, "grad_norm": 0.12203938513994217, "learning_rate": 0.00010245093054001831, "loss": 0.9222, "step": 4802 }, { "epoch": 0.976417971132344, "grad_norm": 0.16445662081241608, "learning_rate": 0.00010243059086748703, "loss": 1.2655, "step": 4803 }, { "epoch": 0.9766212644846514, "grad_norm": 0.13465256989002228, "learning_rate": 0.00010241025119495578, "loss": 1.0028, "step": 4804 }, { "epoch": 0.9768245578369588, "grad_norm": 0.13303688168525696, "learning_rate": 0.0001023899115224245, "loss": 0.9123, "step": 4805 }, { "epoch": 0.9770278511892662, "grad_norm": 0.15964846312999725, "learning_rate": 0.00010236957184989323, "loss": 1.203, "step": 4806 }, { "epoch": 0.9772311445415734, "grad_norm": 0.13582561910152435, "learning_rate": 0.00010234923217736194, "loss": 1.0838, "step": 4807 }, { "epoch": 0.9774344378938808, "grad_norm": 0.12856504321098328, "learning_rate": 0.00010232889250483069, "loss": 1.075, "step": 4808 }, { "epoch": 0.9776377312461882, "grad_norm": 0.15734675526618958, "learning_rate": 0.00010230855283229941, "loss": 1.0715, "step": 4809 }, { "epoch": 0.9778410245984956, "grad_norm": 0.12550866603851318, "learning_rate": 0.00010228821315976814, "loss": 0.9564, "step": 4810 }, { "epoch": 0.978044317950803, "grad_norm": 0.14596353471279144, "learning_rate": 0.00010226787348723685, "loss": 1.1543, "step": 4811 }, { "epoch": 0.9782476113031104, "grad_norm": 0.13755320012569427, "learning_rate": 0.0001022475338147056, "loss": 1.1465, "step": 4812 }, { "epoch": 0.9784509046554177, "grad_norm": 0.15162555873394012, "learning_rate": 0.00010222719414217432, "loss": 1.1004, "step": 4813 }, { "epoch": 0.9786541980077251, "grad_norm": 0.1351086050271988, "learning_rate": 0.00010220685446964305, "loss": 1.0812, "step": 4814 }, { "epoch": 0.9788574913600325, "grad_norm": 0.14810827374458313, "learning_rate": 0.00010218651479711176, "loss": 1.1604, "step": 4815 }, { "epoch": 0.9790607847123399, "grad_norm": 0.13627979159355164, "learning_rate": 0.00010216617512458051, "loss": 1.0392, "step": 4816 }, { "epoch": 0.9792640780646473, "grad_norm": 0.14900024235248566, "learning_rate": 0.00010214583545204924, "loss": 1.0946, "step": 4817 }, { "epoch": 0.9794673714169546, "grad_norm": 0.14076335728168488, "learning_rate": 0.00010212549577951796, "loss": 1.0511, "step": 4818 }, { "epoch": 0.979670664769262, "grad_norm": 0.12886211276054382, "learning_rate": 0.00010210515610698667, "loss": 1.0244, "step": 4819 }, { "epoch": 0.9798739581215694, "grad_norm": 0.1440308839082718, "learning_rate": 0.0001020848164344554, "loss": 1.1237, "step": 4820 }, { "epoch": 0.9800772514738768, "grad_norm": 0.1480611264705658, "learning_rate": 0.00010206447676192415, "loss": 1.1623, "step": 4821 }, { "epoch": 0.9802805448261842, "grad_norm": 0.1361786127090454, "learning_rate": 0.00010204413708939287, "loss": 1.0342, "step": 4822 }, { "epoch": 0.9804838381784916, "grad_norm": 0.13941383361816406, "learning_rate": 0.00010202379741686158, "loss": 1.2155, "step": 4823 }, { "epoch": 0.9806871315307989, "grad_norm": 0.13382181525230408, "learning_rate": 0.0001020034577443303, "loss": 1.0683, "step": 4824 }, { "epoch": 0.9808904248831063, "grad_norm": 0.14079181849956512, "learning_rate": 0.00010198311807179906, "loss": 1.1914, "step": 4825 }, { "epoch": 0.9810937182354137, "grad_norm": 0.1520659178495407, "learning_rate": 0.00010196277839926778, "loss": 1.0614, "step": 4826 }, { "epoch": 0.9812970115877211, "grad_norm": 0.12844201922416687, "learning_rate": 0.0001019424387267365, "loss": 0.9881, "step": 4827 }, { "epoch": 0.9815003049400285, "grad_norm": 0.12539590895175934, "learning_rate": 0.00010192209905420522, "loss": 1.0109, "step": 4828 }, { "epoch": 0.9817035982923359, "grad_norm": 0.11923157423734665, "learning_rate": 0.00010190175938167397, "loss": 0.8702, "step": 4829 }, { "epoch": 0.9819068916446432, "grad_norm": 0.1370554268360138, "learning_rate": 0.0001018814197091427, "loss": 0.9956, "step": 4830 }, { "epoch": 0.9821101849969506, "grad_norm": 0.14605580270290375, "learning_rate": 0.0001018610800366114, "loss": 1.1405, "step": 4831 }, { "epoch": 0.982313478349258, "grad_norm": 0.13594669103622437, "learning_rate": 0.00010184074036408013, "loss": 1.04, "step": 4832 }, { "epoch": 0.9825167717015654, "grad_norm": 0.13905544579029083, "learning_rate": 0.00010182040069154888, "loss": 1.0421, "step": 4833 }, { "epoch": 0.9827200650538728, "grad_norm": 0.13536664843559265, "learning_rate": 0.0001018000610190176, "loss": 0.952, "step": 4834 }, { "epoch": 0.9829233584061802, "grad_norm": 0.12906044721603394, "learning_rate": 0.00010177972134648633, "loss": 1.0468, "step": 4835 }, { "epoch": 0.9831266517584875, "grad_norm": 0.14076603949069977, "learning_rate": 0.00010175938167395504, "loss": 1.03, "step": 4836 }, { "epoch": 0.9833299451107949, "grad_norm": 0.12814532220363617, "learning_rate": 0.00010173904200142379, "loss": 0.9141, "step": 4837 }, { "epoch": 0.9835332384631023, "grad_norm": 0.13187144696712494, "learning_rate": 0.00010171870232889252, "loss": 0.9983, "step": 4838 }, { "epoch": 0.9837365318154097, "grad_norm": 0.12513421475887299, "learning_rate": 0.00010169836265636124, "loss": 0.9726, "step": 4839 }, { "epoch": 0.983939825167717, "grad_norm": 0.1411403864622116, "learning_rate": 0.00010167802298382995, "loss": 1.1544, "step": 4840 }, { "epoch": 0.9841431185200243, "grad_norm": 0.13814745843410492, "learning_rate": 0.0001016576833112987, "loss": 1.0902, "step": 4841 }, { "epoch": 0.9843464118723317, "grad_norm": 0.15102095901966095, "learning_rate": 0.00010163734363876743, "loss": 1.2006, "step": 4842 }, { "epoch": 0.9845497052246391, "grad_norm": 0.12632615864276886, "learning_rate": 0.00010161700396623615, "loss": 0.9465, "step": 4843 }, { "epoch": 0.9847529985769465, "grad_norm": 0.14967390894889832, "learning_rate": 0.00010159666429370486, "loss": 1.1275, "step": 4844 }, { "epoch": 0.9849562919292539, "grad_norm": 0.12356138229370117, "learning_rate": 0.00010157632462117362, "loss": 1.0253, "step": 4845 }, { "epoch": 0.9851595852815613, "grad_norm": 0.12783940136432648, "learning_rate": 0.00010155598494864234, "loss": 1.0647, "step": 4846 }, { "epoch": 0.9853628786338686, "grad_norm": 0.12966394424438477, "learning_rate": 0.00010153564527611106, "loss": 0.961, "step": 4847 }, { "epoch": 0.985566171986176, "grad_norm": 0.13176754117012024, "learning_rate": 0.00010151530560357977, "loss": 0.9036, "step": 4848 }, { "epoch": 0.9857694653384834, "grad_norm": 0.1355212777853012, "learning_rate": 0.00010149496593104853, "loss": 1.1102, "step": 4849 }, { "epoch": 0.9859727586907908, "grad_norm": 0.14852295815944672, "learning_rate": 0.00010147462625851725, "loss": 1.1925, "step": 4850 }, { "epoch": 0.9861760520430982, "grad_norm": 0.1206142008304596, "learning_rate": 0.00010145428658598598, "loss": 0.8656, "step": 4851 }, { "epoch": 0.9863793453954056, "grad_norm": 0.1338338702917099, "learning_rate": 0.00010143394691345469, "loss": 1.0639, "step": 4852 }, { "epoch": 0.9865826387477129, "grad_norm": 0.1332140564918518, "learning_rate": 0.00010141360724092344, "loss": 1.1448, "step": 4853 }, { "epoch": 0.9867859321000203, "grad_norm": 0.14103251695632935, "learning_rate": 0.00010139326756839216, "loss": 1.174, "step": 4854 }, { "epoch": 0.9869892254523277, "grad_norm": 0.13589176535606384, "learning_rate": 0.00010137292789586089, "loss": 0.9713, "step": 4855 }, { "epoch": 0.9871925188046351, "grad_norm": 0.13444828987121582, "learning_rate": 0.0001013525882233296, "loss": 1.0798, "step": 4856 }, { "epoch": 0.9873958121569425, "grad_norm": 0.15302203595638275, "learning_rate": 0.00010133224855079835, "loss": 1.1335, "step": 4857 }, { "epoch": 0.9875991055092499, "grad_norm": 0.1333010047674179, "learning_rate": 0.00010131190887826707, "loss": 1.0179, "step": 4858 }, { "epoch": 0.9878023988615572, "grad_norm": 0.1340804398059845, "learning_rate": 0.0001012915692057358, "loss": 0.9451, "step": 4859 }, { "epoch": 0.9880056922138646, "grad_norm": 0.14129623770713806, "learning_rate": 0.00010127122953320451, "loss": 1.1247, "step": 4860 }, { "epoch": 0.988208985566172, "grad_norm": 0.1130819022655487, "learning_rate": 0.00010125088986067323, "loss": 0.9078, "step": 4861 }, { "epoch": 0.9884122789184794, "grad_norm": 0.13647128641605377, "learning_rate": 0.00010123055018814199, "loss": 1.0182, "step": 4862 }, { "epoch": 0.9886155722707868, "grad_norm": 0.1271669566631317, "learning_rate": 0.00010121021051561071, "loss": 1.067, "step": 4863 }, { "epoch": 0.9888188656230942, "grad_norm": 0.12524248659610748, "learning_rate": 0.00010118987084307942, "loss": 0.9665, "step": 4864 }, { "epoch": 0.9890221589754015, "grad_norm": 0.13268783688545227, "learning_rate": 0.00010116953117054814, "loss": 1.1719, "step": 4865 }, { "epoch": 0.9892254523277089, "grad_norm": 0.13032928109169006, "learning_rate": 0.0001011491914980169, "loss": 1.1512, "step": 4866 }, { "epoch": 0.9894287456800163, "grad_norm": 0.1325322538614273, "learning_rate": 0.00010112885182548562, "loss": 0.954, "step": 4867 }, { "epoch": 0.9896320390323237, "grad_norm": 0.13537730276584625, "learning_rate": 0.00010110851215295433, "loss": 0.9071, "step": 4868 }, { "epoch": 0.9898353323846311, "grad_norm": 0.13004449009895325, "learning_rate": 0.00010108817248042306, "loss": 1.0135, "step": 4869 }, { "epoch": 0.9900386257369383, "grad_norm": 0.1332450807094574, "learning_rate": 0.00010106783280789181, "loss": 0.9139, "step": 4870 }, { "epoch": 0.9902419190892457, "grad_norm": 0.1302601844072342, "learning_rate": 0.00010104749313536053, "loss": 1.0315, "step": 4871 }, { "epoch": 0.9904452124415531, "grad_norm": 0.13632024824619293, "learning_rate": 0.00010102715346282924, "loss": 0.8711, "step": 4872 }, { "epoch": 0.9906485057938605, "grad_norm": 0.1373220980167389, "learning_rate": 0.00010100681379029797, "loss": 1.1581, "step": 4873 }, { "epoch": 0.9908517991461679, "grad_norm": 0.11876034736633301, "learning_rate": 0.00010098647411776672, "loss": 0.9445, "step": 4874 }, { "epoch": 0.9910550924984753, "grad_norm": 0.1467241793870926, "learning_rate": 0.00010096613444523544, "loss": 1.1618, "step": 4875 }, { "epoch": 0.9912583858507826, "grad_norm": 0.13242608308792114, "learning_rate": 0.00010094579477270415, "loss": 0.9251, "step": 4876 }, { "epoch": 0.99146167920309, "grad_norm": 0.1570916622877121, "learning_rate": 0.00010092545510017288, "loss": 1.1391, "step": 4877 }, { "epoch": 0.9916649725553974, "grad_norm": 0.1410514861345291, "learning_rate": 0.00010090511542764163, "loss": 1.067, "step": 4878 }, { "epoch": 0.9918682659077048, "grad_norm": 0.13417792320251465, "learning_rate": 0.00010088477575511036, "loss": 1.0545, "step": 4879 }, { "epoch": 0.9920715592600122, "grad_norm": 0.14980773627758026, "learning_rate": 0.00010086443608257907, "loss": 1.1653, "step": 4880 }, { "epoch": 0.9922748526123196, "grad_norm": 0.1144283264875412, "learning_rate": 0.00010084409641004779, "loss": 0.9679, "step": 4881 }, { "epoch": 0.9924781459646269, "grad_norm": 0.13899140059947968, "learning_rate": 0.00010082375673751654, "loss": 1.0324, "step": 4882 }, { "epoch": 0.9926814393169343, "grad_norm": 0.13620680570602417, "learning_rate": 0.00010080341706498527, "loss": 1.0691, "step": 4883 }, { "epoch": 0.9928847326692417, "grad_norm": 0.13614429533481598, "learning_rate": 0.00010078307739245398, "loss": 1.0911, "step": 4884 }, { "epoch": 0.9930880260215491, "grad_norm": 0.15049585700035095, "learning_rate": 0.0001007627377199227, "loss": 1.2096, "step": 4885 }, { "epoch": 0.9932913193738565, "grad_norm": 0.11920803040266037, "learning_rate": 0.00010074239804739145, "loss": 0.8579, "step": 4886 }, { "epoch": 0.9934946127261639, "grad_norm": 0.148103266954422, "learning_rate": 0.00010072205837486018, "loss": 0.9659, "step": 4887 }, { "epoch": 0.9936979060784712, "grad_norm": 0.14606502652168274, "learning_rate": 0.00010070171870232889, "loss": 1.1369, "step": 4888 }, { "epoch": 0.9939011994307786, "grad_norm": 0.15074527263641357, "learning_rate": 0.00010068137902979761, "loss": 1.1467, "step": 4889 }, { "epoch": 0.994104492783086, "grad_norm": 0.1285044550895691, "learning_rate": 0.00010066103935726636, "loss": 0.8631, "step": 4890 }, { "epoch": 0.9943077861353934, "grad_norm": 0.14599795639514923, "learning_rate": 0.00010064069968473509, "loss": 1.136, "step": 4891 }, { "epoch": 0.9945110794877008, "grad_norm": 0.15505965054035187, "learning_rate": 0.0001006203600122038, "loss": 1.2104, "step": 4892 }, { "epoch": 0.9947143728400081, "grad_norm": 0.13279956579208374, "learning_rate": 0.00010060002033967252, "loss": 1.0342, "step": 4893 }, { "epoch": 0.9949176661923155, "grad_norm": 0.1346520632505417, "learning_rate": 0.00010057968066714128, "loss": 1.0414, "step": 4894 }, { "epoch": 0.9951209595446229, "grad_norm": 0.1434224247932434, "learning_rate": 0.00010055934099461, "loss": 1.1259, "step": 4895 }, { "epoch": 0.9953242528969303, "grad_norm": 0.1336824744939804, "learning_rate": 0.00010053900132207873, "loss": 1.1013, "step": 4896 }, { "epoch": 0.9955275462492377, "grad_norm": 0.13659413158893585, "learning_rate": 0.00010051866164954744, "loss": 0.9543, "step": 4897 }, { "epoch": 0.9957308396015451, "grad_norm": 0.13040059804916382, "learning_rate": 0.00010049832197701619, "loss": 0.9669, "step": 4898 }, { "epoch": 0.9959341329538524, "grad_norm": 0.1415984034538269, "learning_rate": 0.00010047798230448491, "loss": 1.0195, "step": 4899 }, { "epoch": 0.9961374263061598, "grad_norm": 0.13746584951877594, "learning_rate": 0.00010045764263195364, "loss": 1.0605, "step": 4900 }, { "epoch": 0.9963407196584672, "grad_norm": 0.1430116593837738, "learning_rate": 0.00010043730295942235, "loss": 1.107, "step": 4901 }, { "epoch": 0.9965440130107746, "grad_norm": 0.11865589022636414, "learning_rate": 0.00010041696328689107, "loss": 0.8887, "step": 4902 }, { "epoch": 0.996747306363082, "grad_norm": 0.11495467275381088, "learning_rate": 0.00010039662361435982, "loss": 0.8365, "step": 4903 }, { "epoch": 0.9969505997153894, "grad_norm": 0.1354401409626007, "learning_rate": 0.00010037628394182855, "loss": 1.1705, "step": 4904 }, { "epoch": 0.9971538930676966, "grad_norm": 0.13998205959796906, "learning_rate": 0.00010035594426929726, "loss": 1.0365, "step": 4905 }, { "epoch": 0.997357186420004, "grad_norm": 0.15044035017490387, "learning_rate": 0.00010033560459676598, "loss": 1.1061, "step": 4906 }, { "epoch": 0.9975604797723114, "grad_norm": 0.1416459083557129, "learning_rate": 0.00010031526492423473, "loss": 1.1155, "step": 4907 }, { "epoch": 0.9977637731246188, "grad_norm": 0.13485343754291534, "learning_rate": 0.00010029492525170346, "loss": 0.9937, "step": 4908 }, { "epoch": 0.9979670664769262, "grad_norm": 0.14948885142803192, "learning_rate": 0.00010027458557917217, "loss": 1.1689, "step": 4909 }, { "epoch": 0.9981703598292336, "grad_norm": 0.1309768706560135, "learning_rate": 0.0001002542459066409, "loss": 0.9428, "step": 4910 }, { "epoch": 0.9983736531815409, "grad_norm": 0.11928943544626236, "learning_rate": 0.00010023390623410965, "loss": 0.8238, "step": 4911 }, { "epoch": 0.9985769465338483, "grad_norm": 0.1389857530593872, "learning_rate": 0.00010021356656157837, "loss": 1.0459, "step": 4912 }, { "epoch": 0.9987802398861557, "grad_norm": 0.14047744870185852, "learning_rate": 0.00010019322688904708, "loss": 0.9594, "step": 4913 }, { "epoch": 0.9989835332384631, "grad_norm": 0.1307019144296646, "learning_rate": 0.0001001728872165158, "loss": 1.1549, "step": 4914 }, { "epoch": 0.9991868265907705, "grad_norm": 0.13652239739894867, "learning_rate": 0.00010015254754398456, "loss": 1.142, "step": 4915 }, { "epoch": 0.9993901199430779, "grad_norm": 0.1404002457857132, "learning_rate": 0.00010013220787145328, "loss": 1.0275, "step": 4916 }, { "epoch": 0.9995934132953852, "grad_norm": 0.14137892425060272, "learning_rate": 0.00010011186819892199, "loss": 1.1169, "step": 4917 }, { "epoch": 0.9997967066476926, "grad_norm": 0.12362517416477203, "learning_rate": 0.00010009152852639072, "loss": 0.9733, "step": 4918 }, { "epoch": 1.0, "grad_norm": 0.16257604956626892, "learning_rate": 0.00010007118885385947, "loss": 1.214, "step": 4919 }, { "epoch": 1.0002032933523073, "grad_norm": 0.13455824553966522, "learning_rate": 0.0001000508491813282, "loss": 1.1717, "step": 4920 }, { "epoch": 1.0004065867046148, "grad_norm": 0.1244397908449173, "learning_rate": 0.0001000305095087969, "loss": 0.9873, "step": 4921 }, { "epoch": 1.000609880056922, "grad_norm": 0.13148358464241028, "learning_rate": 0.00010001016983626563, "loss": 1.0512, "step": 4922 }, { "epoch": 1.0008131734092296, "grad_norm": 0.14207464456558228, "learning_rate": 9.998983016373437e-05, "loss": 1.1071, "step": 4923 }, { "epoch": 1.0010164667615369, "grad_norm": 0.1350506693124771, "learning_rate": 9.99694904912031e-05, "loss": 1.1134, "step": 4924 }, { "epoch": 1.0012197601138442, "grad_norm": 0.14575833082199097, "learning_rate": 9.994915081867182e-05, "loss": 1.0793, "step": 4925 }, { "epoch": 1.0014230534661517, "grad_norm": 0.13254649937152863, "learning_rate": 9.992881114614055e-05, "loss": 0.9843, "step": 4926 }, { "epoch": 1.001626346818459, "grad_norm": 0.13385853171348572, "learning_rate": 9.990847147360928e-05, "loss": 1.0446, "step": 4927 }, { "epoch": 1.0018296401707665, "grad_norm": 0.13908478617668152, "learning_rate": 9.988813180107802e-05, "loss": 0.9968, "step": 4928 }, { "epoch": 1.0020329335230738, "grad_norm": 0.13923251628875732, "learning_rate": 9.986779212854673e-05, "loss": 1.0023, "step": 4929 }, { "epoch": 1.0022362268753813, "grad_norm": 0.1373911201953888, "learning_rate": 9.984745245601547e-05, "loss": 1.1753, "step": 4930 }, { "epoch": 1.0024395202276886, "grad_norm": 0.13491371273994446, "learning_rate": 9.982711278348419e-05, "loss": 0.893, "step": 4931 }, { "epoch": 1.0026428135799959, "grad_norm": 0.12279137223958969, "learning_rate": 9.980677311095293e-05, "loss": 0.8334, "step": 4932 }, { "epoch": 1.0028461069323034, "grad_norm": 0.1489049643278122, "learning_rate": 9.978643343842164e-05, "loss": 1.2196, "step": 4933 }, { "epoch": 1.0030494002846106, "grad_norm": 0.15800416469573975, "learning_rate": 9.976609376589038e-05, "loss": 1.1065, "step": 4934 }, { "epoch": 1.0032526936369182, "grad_norm": 0.12695717811584473, "learning_rate": 9.97457540933591e-05, "loss": 0.8969, "step": 4935 }, { "epoch": 1.0034559869892254, "grad_norm": 0.12970462441444397, "learning_rate": 9.972541442082784e-05, "loss": 0.9748, "step": 4936 }, { "epoch": 1.0036592803415327, "grad_norm": 0.13583384454250336, "learning_rate": 9.970507474829655e-05, "loss": 0.9943, "step": 4937 }, { "epoch": 1.0038625736938402, "grad_norm": 0.13171210885047913, "learning_rate": 9.968473507576529e-05, "loss": 1.0066, "step": 4938 }, { "epoch": 1.0040658670461475, "grad_norm": 0.140077605843544, "learning_rate": 9.966439540323401e-05, "loss": 1.0276, "step": 4939 }, { "epoch": 1.004269160398455, "grad_norm": 0.13248348236083984, "learning_rate": 9.964405573070275e-05, "loss": 0.9836, "step": 4940 }, { "epoch": 1.0044724537507623, "grad_norm": 0.1502828449010849, "learning_rate": 9.962371605817146e-05, "loss": 1.175, "step": 4941 }, { "epoch": 1.0046757471030698, "grad_norm": 0.14695493876934052, "learning_rate": 9.96033763856402e-05, "loss": 0.963, "step": 4942 }, { "epoch": 1.0048790404553771, "grad_norm": 0.14214938879013062, "learning_rate": 9.958303671310892e-05, "loss": 1.0651, "step": 4943 }, { "epoch": 1.0050823338076844, "grad_norm": 0.14761728048324585, "learning_rate": 9.956269704057765e-05, "loss": 0.9907, "step": 4944 }, { "epoch": 1.005285627159992, "grad_norm": 0.13151785731315613, "learning_rate": 9.954235736804637e-05, "loss": 0.8793, "step": 4945 }, { "epoch": 1.0054889205122992, "grad_norm": 0.1452670693397522, "learning_rate": 9.95220176955151e-05, "loss": 1.0906, "step": 4946 }, { "epoch": 1.0056922138646067, "grad_norm": 0.13930079340934753, "learning_rate": 9.950167802298384e-05, "loss": 0.9598, "step": 4947 }, { "epoch": 1.005895507216914, "grad_norm": 0.12317246198654175, "learning_rate": 9.948133835045256e-05, "loss": 0.9429, "step": 4948 }, { "epoch": 1.0060988005692213, "grad_norm": 0.13415516912937164, "learning_rate": 9.946099867792128e-05, "loss": 1.0848, "step": 4949 }, { "epoch": 1.0063020939215288, "grad_norm": 0.13976556062698364, "learning_rate": 9.944065900539001e-05, "loss": 0.934, "step": 4950 }, { "epoch": 1.006505387273836, "grad_norm": 0.13384398818016052, "learning_rate": 9.942031933285875e-05, "loss": 0.955, "step": 4951 }, { "epoch": 1.0067086806261436, "grad_norm": 0.14308519661426544, "learning_rate": 9.939997966032747e-05, "loss": 0.9543, "step": 4952 }, { "epoch": 1.006911973978451, "grad_norm": 0.14340607821941376, "learning_rate": 9.937963998779621e-05, "loss": 1.047, "step": 4953 }, { "epoch": 1.0071152673307582, "grad_norm": 0.14457905292510986, "learning_rate": 9.935930031526492e-05, "loss": 0.9937, "step": 4954 }, { "epoch": 1.0073185606830657, "grad_norm": 0.13555844128131866, "learning_rate": 9.933896064273366e-05, "loss": 1.0211, "step": 4955 }, { "epoch": 1.007521854035373, "grad_norm": 0.1536429524421692, "learning_rate": 9.931862097020238e-05, "loss": 1.188, "step": 4956 }, { "epoch": 1.0077251473876805, "grad_norm": 0.13193362951278687, "learning_rate": 9.929828129767112e-05, "loss": 0.9143, "step": 4957 }, { "epoch": 1.0079284407399878, "grad_norm": 0.14066417515277863, "learning_rate": 9.927794162513983e-05, "loss": 1.0662, "step": 4958 }, { "epoch": 1.0081317340922953, "grad_norm": 0.13579119741916656, "learning_rate": 9.925760195260857e-05, "loss": 0.8999, "step": 4959 }, { "epoch": 1.0083350274446026, "grad_norm": 0.14911122620105743, "learning_rate": 9.92372622800773e-05, "loss": 1.3171, "step": 4960 }, { "epoch": 1.0085383207969099, "grad_norm": 0.1447262316942215, "learning_rate": 9.921692260754603e-05, "loss": 1.0899, "step": 4961 }, { "epoch": 1.0087416141492174, "grad_norm": 0.1513487845659256, "learning_rate": 9.919658293501474e-05, "loss": 1.0844, "step": 4962 }, { "epoch": 1.0089449075015247, "grad_norm": 0.1470583975315094, "learning_rate": 9.917624326248348e-05, "loss": 1.1176, "step": 4963 }, { "epoch": 1.0091482008538322, "grad_norm": 0.13596630096435547, "learning_rate": 9.91559035899522e-05, "loss": 1.0829, "step": 4964 }, { "epoch": 1.0093514942061395, "grad_norm": 0.1411203145980835, "learning_rate": 9.913556391742094e-05, "loss": 1.0523, "step": 4965 }, { "epoch": 1.0095547875584467, "grad_norm": 0.14842981100082397, "learning_rate": 9.911522424488965e-05, "loss": 1.0513, "step": 4966 }, { "epoch": 1.0097580809107543, "grad_norm": 0.1505335569381714, "learning_rate": 9.909488457235839e-05, "loss": 0.9964, "step": 4967 }, { "epoch": 1.0099613742630615, "grad_norm": 0.12677620351314545, "learning_rate": 9.907454489982712e-05, "loss": 0.9546, "step": 4968 }, { "epoch": 1.010164667615369, "grad_norm": 0.13651777803897858, "learning_rate": 9.905420522729585e-05, "loss": 1.0823, "step": 4969 }, { "epoch": 1.0103679609676763, "grad_norm": 0.1392572969198227, "learning_rate": 9.903386555476457e-05, "loss": 0.9032, "step": 4970 }, { "epoch": 1.0105712543199838, "grad_norm": 0.16775289177894592, "learning_rate": 9.90135258822333e-05, "loss": 1.1434, "step": 4971 }, { "epoch": 1.0107745476722911, "grad_norm": 0.1534387320280075, "learning_rate": 9.899318620970203e-05, "loss": 1.166, "step": 4972 }, { "epoch": 1.0109778410245984, "grad_norm": 0.14180676639080048, "learning_rate": 9.897284653717077e-05, "loss": 1.0688, "step": 4973 }, { "epoch": 1.011181134376906, "grad_norm": 0.13633224368095398, "learning_rate": 9.895250686463948e-05, "loss": 1.0413, "step": 4974 }, { "epoch": 1.0113844277292132, "grad_norm": 0.15582099556922913, "learning_rate": 9.893216719210822e-05, "loss": 1.256, "step": 4975 }, { "epoch": 1.0115877210815207, "grad_norm": 0.16052106022834778, "learning_rate": 9.891182751957694e-05, "loss": 1.3048, "step": 4976 }, { "epoch": 1.011791014433828, "grad_norm": 0.15733475983142853, "learning_rate": 9.889148784704568e-05, "loss": 1.1024, "step": 4977 }, { "epoch": 1.0119943077861353, "grad_norm": 0.1398230642080307, "learning_rate": 9.887114817451439e-05, "loss": 1.0691, "step": 4978 }, { "epoch": 1.0121976011384428, "grad_norm": 0.15575705468654633, "learning_rate": 9.885080850198313e-05, "loss": 1.0019, "step": 4979 }, { "epoch": 1.01240089449075, "grad_norm": 0.13900624215602875, "learning_rate": 9.883046882945185e-05, "loss": 1.0318, "step": 4980 }, { "epoch": 1.0126041878430576, "grad_norm": 0.1266520619392395, "learning_rate": 9.881012915692059e-05, "loss": 0.9455, "step": 4981 }, { "epoch": 1.012807481195365, "grad_norm": 0.14327497780323029, "learning_rate": 9.87897894843893e-05, "loss": 1.1133, "step": 4982 }, { "epoch": 1.0130107745476722, "grad_norm": 0.14177127182483673, "learning_rate": 9.876944981185804e-05, "loss": 0.9969, "step": 4983 }, { "epoch": 1.0132140678999797, "grad_norm": 0.14066456258296967, "learning_rate": 9.874911013932676e-05, "loss": 0.9261, "step": 4984 }, { "epoch": 1.013417361252287, "grad_norm": 0.14441144466400146, "learning_rate": 9.872877046679549e-05, "loss": 1.0065, "step": 4985 }, { "epoch": 1.0136206546045945, "grad_norm": 0.12858086824417114, "learning_rate": 9.870843079426421e-05, "loss": 0.9306, "step": 4986 }, { "epoch": 1.0138239479569018, "grad_norm": 0.1305333971977234, "learning_rate": 9.868809112173294e-05, "loss": 1.0058, "step": 4987 }, { "epoch": 1.0140272413092093, "grad_norm": 0.1652311384677887, "learning_rate": 9.866775144920167e-05, "loss": 1.1992, "step": 4988 }, { "epoch": 1.0142305346615166, "grad_norm": 0.1123913899064064, "learning_rate": 9.86474117766704e-05, "loss": 0.8779, "step": 4989 }, { "epoch": 1.0144338280138239, "grad_norm": 0.15201310813426971, "learning_rate": 9.862707210413912e-05, "loss": 1.1553, "step": 4990 }, { "epoch": 1.0146371213661314, "grad_norm": 0.13241463899612427, "learning_rate": 9.860673243160785e-05, "loss": 0.9276, "step": 4991 }, { "epoch": 1.0148404147184387, "grad_norm": 0.15238632261753082, "learning_rate": 9.858639275907659e-05, "loss": 1.1528, "step": 4992 }, { "epoch": 1.0150437080707462, "grad_norm": 0.13771474361419678, "learning_rate": 9.856605308654531e-05, "loss": 1.1871, "step": 4993 }, { "epoch": 1.0152470014230535, "grad_norm": 0.135041743516922, "learning_rate": 9.854571341401403e-05, "loss": 0.9718, "step": 4994 }, { "epoch": 1.0154502947753608, "grad_norm": 0.14199897646903992, "learning_rate": 9.852537374148276e-05, "loss": 1.0454, "step": 4995 }, { "epoch": 1.0156535881276683, "grad_norm": 0.14556720852851868, "learning_rate": 9.85050340689515e-05, "loss": 1.102, "step": 4996 }, { "epoch": 1.0158568814799755, "grad_norm": 0.1287354975938797, "learning_rate": 9.848469439642022e-05, "loss": 0.929, "step": 4997 }, { "epoch": 1.016060174832283, "grad_norm": 0.15297791361808777, "learning_rate": 9.846435472388895e-05, "loss": 1.0234, "step": 4998 }, { "epoch": 1.0162634681845903, "grad_norm": 0.1549387276172638, "learning_rate": 9.844401505135767e-05, "loss": 1.1666, "step": 4999 }, { "epoch": 1.0164667615368979, "grad_norm": 0.15455321967601776, "learning_rate": 9.842367537882641e-05, "loss": 1.0845, "step": 5000 } ], "logging_steps": 1, "max_steps": 9838, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.801020652405637e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }