| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.24395202276885547, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004065867046147591, | |
| "grad_norm": 0.22144322097301483, | |
| "learning_rate": 0.0, | |
| "loss": 1.3598, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0008131734092295182, | |
| "grad_norm": 0.199473574757576, | |
| "learning_rate": 4e-05, | |
| "loss": 1.405, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0012197601138442774, | |
| "grad_norm": 0.20758001506328583, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2815, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0016263468184590363, | |
| "grad_norm": 0.21362783014774323, | |
| "learning_rate": 0.00012, | |
| "loss": 1.245, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0020329335230737954, | |
| "grad_norm": 0.24631692469120026, | |
| "learning_rate": 0.00016, | |
| "loss": 1.3086, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002439520227688555, | |
| "grad_norm": 0.20009225606918335, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2443, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0028461069323033137, | |
| "grad_norm": 0.1735246330499649, | |
| "learning_rate": 0.00019995929167514756, | |
| "loss": 1.1878, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0032526936369180726, | |
| "grad_norm": 0.18904437124729156, | |
| "learning_rate": 0.00019991858335029514, | |
| "loss": 1.2478, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.003659280341532832, | |
| "grad_norm": 0.1645248979330063, | |
| "learning_rate": 0.0001998778750254427, | |
| "loss": 1.2098, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.004065867046147591, | |
| "grad_norm": 0.22034819424152374, | |
| "learning_rate": 0.00019983716670059028, | |
| "loss": 1.1183, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00447245375076235, | |
| "grad_norm": 0.3233634829521179, | |
| "learning_rate": 0.00019979645837573783, | |
| "loss": 1.0974, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.00487904045537711, | |
| "grad_norm": 0.2592090368270874, | |
| "learning_rate": 0.00019975575005088542, | |
| "loss": 1.1611, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005285627159991868, | |
| "grad_norm": 0.14754348993301392, | |
| "learning_rate": 0.000199715041726033, | |
| "loss": 1.1932, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.005692213864606627, | |
| "grad_norm": 0.09341374039649963, | |
| "learning_rate": 0.00019967433340118055, | |
| "loss": 1.348, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006098800569221387, | |
| "grad_norm": 0.10229193419218063, | |
| "learning_rate": 0.00019963362507632813, | |
| "loss": 1.0927, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.006505387273836145, | |
| "grad_norm": 0.14015386998653412, | |
| "learning_rate": 0.00019959291675147569, | |
| "loss": 1.2263, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006911973978450905, | |
| "grad_norm": 0.17507047951221466, | |
| "learning_rate": 0.00019955220842662327, | |
| "loss": 1.1951, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.007318560683065664, | |
| "grad_norm": 0.17176274955272675, | |
| "learning_rate": 0.00019951150010177082, | |
| "loss": 1.1895, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.007725147387680423, | |
| "grad_norm": 0.13839803636074066, | |
| "learning_rate": 0.00019947079177691838, | |
| "loss": 0.9549, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.008131734092295182, | |
| "grad_norm": 0.0970696285367012, | |
| "learning_rate": 0.00019943008345206596, | |
| "loss": 1.0867, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008538320796909941, | |
| "grad_norm": 0.08836886286735535, | |
| "learning_rate": 0.0001993893751272135, | |
| "loss": 1.155, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0089449075015247, | |
| "grad_norm": 0.11885025352239609, | |
| "learning_rate": 0.0001993486668023611, | |
| "loss": 1.1231, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.00935149420613946, | |
| "grad_norm": 0.15120816230773926, | |
| "learning_rate": 0.00019930795847750865, | |
| "loss": 1.1078, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.00975808091075422, | |
| "grad_norm": 0.16326424479484558, | |
| "learning_rate": 0.00019926725015265623, | |
| "loss": 1.079, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.010164667615368977, | |
| "grad_norm": 0.1179085448384285, | |
| "learning_rate": 0.0001992265418278038, | |
| "loss": 0.932, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010571254319983736, | |
| "grad_norm": 0.10621985793113708, | |
| "learning_rate": 0.00019918583350295136, | |
| "loss": 1.1386, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.010977841024598495, | |
| "grad_norm": 0.08408638089895248, | |
| "learning_rate": 0.00019914512517809894, | |
| "loss": 1.0987, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.011384427729213255, | |
| "grad_norm": 0.08222135156393051, | |
| "learning_rate": 0.0001991044168532465, | |
| "loss": 1.0378, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.011791014433828014, | |
| "grad_norm": 0.08763129264116287, | |
| "learning_rate": 0.00019906370852839408, | |
| "loss": 0.983, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.012197601138442773, | |
| "grad_norm": 0.10638878494501114, | |
| "learning_rate": 0.00019902300020354163, | |
| "loss": 1.0258, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012604187843057533, | |
| "grad_norm": 0.10155023634433746, | |
| "learning_rate": 0.0001989822918786892, | |
| "loss": 0.9579, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01301077454767229, | |
| "grad_norm": 0.08844579011201859, | |
| "learning_rate": 0.00019894158355383677, | |
| "loss": 1.1007, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01341736125228705, | |
| "grad_norm": 0.10394158959388733, | |
| "learning_rate": 0.00019890087522898432, | |
| "loss": 1.0459, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01382394795690181, | |
| "grad_norm": 0.08938682824373245, | |
| "learning_rate": 0.0001988601669041319, | |
| "loss": 1.0985, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.014230534661516568, | |
| "grad_norm": 0.08639086782932281, | |
| "learning_rate": 0.00019881945857927948, | |
| "loss": 1.0712, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.014637121366131328, | |
| "grad_norm": 0.08568435162305832, | |
| "learning_rate": 0.00019877875025442704, | |
| "loss": 1.0549, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.015043708070746087, | |
| "grad_norm": 0.0859316885471344, | |
| "learning_rate": 0.00019873804192957462, | |
| "loss": 1.1042, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.015450294775360847, | |
| "grad_norm": 0.09534381330013275, | |
| "learning_rate": 0.00019869733360472217, | |
| "loss": 1.0127, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.015856881479975604, | |
| "grad_norm": 0.09103580564260483, | |
| "learning_rate": 0.00019865662527986976, | |
| "loss": 0.9347, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.016263468184590364, | |
| "grad_norm": 0.0928095132112503, | |
| "learning_rate": 0.0001986159169550173, | |
| "loss": 1.0559, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016670054889205123, | |
| "grad_norm": 0.09370871633291245, | |
| "learning_rate": 0.0001985752086301649, | |
| "loss": 1.1473, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.017076641593819882, | |
| "grad_norm": 0.07691123336553574, | |
| "learning_rate": 0.00019853450030531244, | |
| "loss": 1.0128, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01748322829843464, | |
| "grad_norm": 0.09201047569513321, | |
| "learning_rate": 0.00019849379198046, | |
| "loss": 1.1296, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0178898150030494, | |
| "grad_norm": 0.08490074425935745, | |
| "learning_rate": 0.00019845308365560758, | |
| "loss": 1.0444, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01829640170766416, | |
| "grad_norm": 0.08623114228248596, | |
| "learning_rate": 0.00019841237533075513, | |
| "loss": 1.066, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01870298841227892, | |
| "grad_norm": 0.09486474096775055, | |
| "learning_rate": 0.00019837166700590271, | |
| "loss": 1.0788, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01910957511689368, | |
| "grad_norm": 0.08024484664201736, | |
| "learning_rate": 0.0001983309586810503, | |
| "loss": 1.0262, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.01951616182150844, | |
| "grad_norm": 0.09256327897310257, | |
| "learning_rate": 0.00019829025035619785, | |
| "loss": 1.107, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.019922748526123194, | |
| "grad_norm": 0.09877921640872955, | |
| "learning_rate": 0.00019824954203134543, | |
| "loss": 1.1731, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.020329335230737954, | |
| "grad_norm": 0.08699575811624527, | |
| "learning_rate": 0.00019820883370649299, | |
| "loss": 1.0809, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.020735921935352713, | |
| "grad_norm": 0.089649498462677, | |
| "learning_rate": 0.00019816812538164057, | |
| "loss": 1.1564, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.021142508639967472, | |
| "grad_norm": 0.08757214993238449, | |
| "learning_rate": 0.00019812741705678812, | |
| "loss": 1.0272, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.02154909534458223, | |
| "grad_norm": 0.08320939540863037, | |
| "learning_rate": 0.0001980867087319357, | |
| "loss": 0.9931, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02195568204919699, | |
| "grad_norm": 0.08898070454597473, | |
| "learning_rate": 0.00019804600040708326, | |
| "loss": 0.9421, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.02236226875381175, | |
| "grad_norm": 0.08072236180305481, | |
| "learning_rate": 0.0001980052920822308, | |
| "loss": 1.0304, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02276885545842651, | |
| "grad_norm": 0.09354112297296524, | |
| "learning_rate": 0.0001979645837573784, | |
| "loss": 1.1041, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.02317544216304127, | |
| "grad_norm": 0.09214304387569427, | |
| "learning_rate": 0.00019792387543252595, | |
| "loss": 1.0666, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.02358202886765603, | |
| "grad_norm": 0.08546210825443268, | |
| "learning_rate": 0.00019788316710767353, | |
| "loss": 1.0795, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.023988615572270788, | |
| "grad_norm": 0.09029046446084976, | |
| "learning_rate": 0.0001978424587828211, | |
| "loss": 1.199, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.024395202276885547, | |
| "grad_norm": 0.08200937509536743, | |
| "learning_rate": 0.00019780175045796866, | |
| "loss": 0.9853, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.024801788981500306, | |
| "grad_norm": 0.08928566426038742, | |
| "learning_rate": 0.00019776104213311624, | |
| "loss": 0.9948, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.025208375686115066, | |
| "grad_norm": 0.08067034929990768, | |
| "learning_rate": 0.0001977203338082638, | |
| "loss": 0.9824, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02561496239072982, | |
| "grad_norm": 0.07509499788284302, | |
| "learning_rate": 0.00019767962548341138, | |
| "loss": 0.9166, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.02602154909534458, | |
| "grad_norm": 0.10127029567956924, | |
| "learning_rate": 0.00019763891715855893, | |
| "loss": 0.978, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02642813579995934, | |
| "grad_norm": 0.08480218052864075, | |
| "learning_rate": 0.0001975982088337065, | |
| "loss": 1.0019, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0268347225045741, | |
| "grad_norm": 0.0922696441411972, | |
| "learning_rate": 0.00019755750050885407, | |
| "loss": 1.0213, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02724130920918886, | |
| "grad_norm": 0.0819278433918953, | |
| "learning_rate": 0.00019751679218400162, | |
| "loss": 0.9792, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02764789591380362, | |
| "grad_norm": 0.09971120208501816, | |
| "learning_rate": 0.0001974760838591492, | |
| "loss": 0.9605, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.028054482618418378, | |
| "grad_norm": 0.09195531904697418, | |
| "learning_rate": 0.00019743537553429676, | |
| "loss": 1.1203, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.028461069323033137, | |
| "grad_norm": 0.09179981052875519, | |
| "learning_rate": 0.00019739466720944434, | |
| "loss": 1.0586, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.028867656027647896, | |
| "grad_norm": 0.0866156816482544, | |
| "learning_rate": 0.00019735395888459192, | |
| "loss": 1.0558, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.029274242732262656, | |
| "grad_norm": 0.09198956191539764, | |
| "learning_rate": 0.00019731325055973947, | |
| "loss": 1.117, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.029680829436877415, | |
| "grad_norm": 0.0912180244922638, | |
| "learning_rate": 0.00019727254223488705, | |
| "loss": 1.0235, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.030087416141492174, | |
| "grad_norm": 0.092186838388443, | |
| "learning_rate": 0.0001972318339100346, | |
| "loss": 1.0119, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.030494002846106934, | |
| "grad_norm": 0.091013602912426, | |
| "learning_rate": 0.0001971911255851822, | |
| "loss": 1.0523, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.030900589550721693, | |
| "grad_norm": 0.0932595282793045, | |
| "learning_rate": 0.00019715041726032974, | |
| "loss": 1.0471, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03130717625533645, | |
| "grad_norm": 0.089345782995224, | |
| "learning_rate": 0.0001971097089354773, | |
| "loss": 1.0214, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03171376295995121, | |
| "grad_norm": 0.09476006776094437, | |
| "learning_rate": 0.00019706900061062488, | |
| "loss": 0.9888, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.03212034966456597, | |
| "grad_norm": 0.09379832446575165, | |
| "learning_rate": 0.00019702829228577243, | |
| "loss": 1.1039, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.03252693636918073, | |
| "grad_norm": 0.10659569501876831, | |
| "learning_rate": 0.00019698758396092001, | |
| "loss": 1.1377, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03293352307379549, | |
| "grad_norm": 0.09652398526668549, | |
| "learning_rate": 0.0001969468756360676, | |
| "loss": 1.0194, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.033340109778410246, | |
| "grad_norm": 0.08641666918992996, | |
| "learning_rate": 0.00019690616731121515, | |
| "loss": 1.0239, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03374669648302501, | |
| "grad_norm": 0.0956072062253952, | |
| "learning_rate": 0.00019686545898636273, | |
| "loss": 1.032, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.034153283187639764, | |
| "grad_norm": 0.08402691036462784, | |
| "learning_rate": 0.00019682475066151029, | |
| "loss": 0.9802, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03455986989225452, | |
| "grad_norm": 0.08827648311853409, | |
| "learning_rate": 0.00019678404233665787, | |
| "loss": 1.1805, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03496645659686928, | |
| "grad_norm": 0.08757660537958145, | |
| "learning_rate": 0.00019674333401180542, | |
| "loss": 0.952, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03537304330148404, | |
| "grad_norm": 0.09728538244962692, | |
| "learning_rate": 0.000196702625686953, | |
| "loss": 1.0875, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.0357796300060988, | |
| "grad_norm": 0.08561044931411743, | |
| "learning_rate": 0.00019666191736210056, | |
| "loss": 0.9818, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03618621671071356, | |
| "grad_norm": 0.08389468491077423, | |
| "learning_rate": 0.0001966212090372481, | |
| "loss": 0.9962, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.03659280341532832, | |
| "grad_norm": 0.08847957849502563, | |
| "learning_rate": 0.0001965805007123957, | |
| "loss": 1.0138, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.036999390119943076, | |
| "grad_norm": 0.08515489101409912, | |
| "learning_rate": 0.00019653979238754324, | |
| "loss": 1.0119, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.03740597682455784, | |
| "grad_norm": 0.09340325742959976, | |
| "learning_rate": 0.00019649908406269083, | |
| "loss": 1.0635, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.037812563529172595, | |
| "grad_norm": 0.09383916854858398, | |
| "learning_rate": 0.0001964583757378384, | |
| "loss": 1.0999, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.03821915023378736, | |
| "grad_norm": 0.09956547617912292, | |
| "learning_rate": 0.00019641766741298596, | |
| "loss": 1.0186, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.038625736938402114, | |
| "grad_norm": 0.09809234738349915, | |
| "learning_rate": 0.00019637695908813354, | |
| "loss": 1.0641, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03903232364301688, | |
| "grad_norm": 0.08520065993070602, | |
| "learning_rate": 0.0001963362507632811, | |
| "loss": 0.9255, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.03943891034763163, | |
| "grad_norm": 0.09007880836725235, | |
| "learning_rate": 0.00019629554243842868, | |
| "loss": 1.0963, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.03984549705224639, | |
| "grad_norm": 0.08900373429059982, | |
| "learning_rate": 0.00019625483411357623, | |
| "loss": 0.9908, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.04025208375686115, | |
| "grad_norm": 0.09613076597452164, | |
| "learning_rate": 0.0001962141257887238, | |
| "loss": 0.9729, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.04065867046147591, | |
| "grad_norm": 0.09987878054380417, | |
| "learning_rate": 0.00019617341746387137, | |
| "loss": 1.0554, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04106525716609067, | |
| "grad_norm": 0.10209144651889801, | |
| "learning_rate": 0.00019613270913901892, | |
| "loss": 1.1162, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.041471843870705426, | |
| "grad_norm": 0.10085388273000717, | |
| "learning_rate": 0.0001960920008141665, | |
| "loss": 1.1355, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04187843057532019, | |
| "grad_norm": 0.08966121822595596, | |
| "learning_rate": 0.00019605129248931406, | |
| "loss": 0.9275, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.042285017279934944, | |
| "grad_norm": 0.10507562756538391, | |
| "learning_rate": 0.00019601058416446166, | |
| "loss": 1.081, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.04269160398454971, | |
| "grad_norm": 0.09719648957252502, | |
| "learning_rate": 0.00019596987583960922, | |
| "loss": 1.0884, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04309819068916446, | |
| "grad_norm": 0.09457529336214066, | |
| "learning_rate": 0.00019592916751475677, | |
| "loss": 1.0413, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.043504777393779226, | |
| "grad_norm": 0.11330179125070572, | |
| "learning_rate": 0.00019588845918990435, | |
| "loss": 1.0937, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04391136409839398, | |
| "grad_norm": 0.09778840839862823, | |
| "learning_rate": 0.0001958477508650519, | |
| "loss": 1.1316, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.044317950803008745, | |
| "grad_norm": 0.09848835319280624, | |
| "learning_rate": 0.0001958070425401995, | |
| "loss": 1.1244, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0447245375076235, | |
| "grad_norm": 0.0965428277850151, | |
| "learning_rate": 0.00019576633421534704, | |
| "loss": 0.9952, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.045131124212238256, | |
| "grad_norm": 0.0857444629073143, | |
| "learning_rate": 0.00019572562589049462, | |
| "loss": 0.9822, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.04553771091685302, | |
| "grad_norm": 0.10461942851543427, | |
| "learning_rate": 0.00019568491756564218, | |
| "loss": 1.1463, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.045944297621467775, | |
| "grad_norm": 0.08575154095888138, | |
| "learning_rate": 0.00019564420924078973, | |
| "loss": 0.8976, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.04635088432608254, | |
| "grad_norm": 0.0948256254196167, | |
| "learning_rate": 0.00019560350091593731, | |
| "loss": 1.1205, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.046757471030697294, | |
| "grad_norm": 0.09214090555906296, | |
| "learning_rate": 0.00019556279259108487, | |
| "loss": 1.1416, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.04716405773531206, | |
| "grad_norm": 0.09885852038860321, | |
| "learning_rate": 0.00019552208426623248, | |
| "loss": 1.079, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04757064443992681, | |
| "grad_norm": 0.09071148931980133, | |
| "learning_rate": 0.00019548137594138003, | |
| "loss": 1.0128, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.047977231144541575, | |
| "grad_norm": 0.09190430492162704, | |
| "learning_rate": 0.00019544066761652758, | |
| "loss": 0.9631, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04838381784915633, | |
| "grad_norm": 0.08024870604276657, | |
| "learning_rate": 0.00019539995929167517, | |
| "loss": 0.9086, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.048790404553771094, | |
| "grad_norm": 0.09223239868879318, | |
| "learning_rate": 0.00019535925096682272, | |
| "loss": 1.0255, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04919699125838585, | |
| "grad_norm": 0.09259685128927231, | |
| "learning_rate": 0.0001953185426419703, | |
| "loss": 1.0221, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.04960357796300061, | |
| "grad_norm": 0.08371948450803757, | |
| "learning_rate": 0.00019527783431711786, | |
| "loss": 0.966, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.05001016466761537, | |
| "grad_norm": 0.0957912728190422, | |
| "learning_rate": 0.00019523712599226544, | |
| "loss": 1.0919, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.05041675137223013, | |
| "grad_norm": 0.09397678077220917, | |
| "learning_rate": 0.000195196417667413, | |
| "loss": 0.9666, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05082333807684489, | |
| "grad_norm": 0.1014254167675972, | |
| "learning_rate": 0.00019515570934256054, | |
| "loss": 0.9321, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05122992478145964, | |
| "grad_norm": 0.09339801222085953, | |
| "learning_rate": 0.00019511500101770813, | |
| "loss": 1.0487, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.051636511486074406, | |
| "grad_norm": 0.08642175793647766, | |
| "learning_rate": 0.0001950742926928557, | |
| "loss": 1.0606, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05204309819068916, | |
| "grad_norm": 0.09092641621828079, | |
| "learning_rate": 0.0001950335843680033, | |
| "loss": 0.904, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.052449684895303925, | |
| "grad_norm": 0.09896791726350784, | |
| "learning_rate": 0.00019499287604315084, | |
| "loss": 1.0325, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.05285627159991868, | |
| "grad_norm": 0.08731307834386826, | |
| "learning_rate": 0.0001949521677182984, | |
| "loss": 0.9258, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05326285830453344, | |
| "grad_norm": 0.09673330187797546, | |
| "learning_rate": 0.00019491145939344598, | |
| "loss": 1.1198, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.0536694450091482, | |
| "grad_norm": 0.09038975089788437, | |
| "learning_rate": 0.00019487075106859353, | |
| "loss": 1.0295, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.05407603171376296, | |
| "grad_norm": 0.0918399840593338, | |
| "learning_rate": 0.0001948300427437411, | |
| "loss": 1.0127, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.05448261841837772, | |
| "grad_norm": 0.08970967680215836, | |
| "learning_rate": 0.00019478933441888867, | |
| "loss": 1.0238, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.05488920512299248, | |
| "grad_norm": 0.09728217124938965, | |
| "learning_rate": 0.00019474862609403625, | |
| "loss": 1.069, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.05529579182760724, | |
| "grad_norm": 0.10240956395864487, | |
| "learning_rate": 0.0001947079177691838, | |
| "loss": 1.1467, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.055702378532222, | |
| "grad_norm": 0.10397852212190628, | |
| "learning_rate": 0.00019466720944433136, | |
| "loss": 1.0415, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.056108965236836755, | |
| "grad_norm": 0.10451675951480865, | |
| "learning_rate": 0.00019462650111947894, | |
| "loss": 1.0309, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.05651555194145151, | |
| "grad_norm": 0.09685720503330231, | |
| "learning_rate": 0.00019458579279462652, | |
| "loss": 1.11, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.056922138646066274, | |
| "grad_norm": 0.09885822236537933, | |
| "learning_rate": 0.00019454508446977407, | |
| "loss": 0.993, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05732872535068103, | |
| "grad_norm": 0.10943586379289627, | |
| "learning_rate": 0.00019450437614492165, | |
| "loss": 0.9749, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.05773531205529579, | |
| "grad_norm": 0.10964591801166534, | |
| "learning_rate": 0.0001944636678200692, | |
| "loss": 1.1108, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05814189875991055, | |
| "grad_norm": 0.10109028965234756, | |
| "learning_rate": 0.0001944229594952168, | |
| "loss": 1.0897, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.05854848546452531, | |
| "grad_norm": 0.11243695765733719, | |
| "learning_rate": 0.00019438225117036434, | |
| "loss": 1.0338, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05895507216914007, | |
| "grad_norm": 0.1047658622264862, | |
| "learning_rate": 0.00019434154284551192, | |
| "loss": 0.9566, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.05936165887375483, | |
| "grad_norm": 0.09534204006195068, | |
| "learning_rate": 0.00019430083452065948, | |
| "loss": 1.0313, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.059768245578369586, | |
| "grad_norm": 0.10418044775724411, | |
| "learning_rate": 0.00019426012619580706, | |
| "loss": 0.9759, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06017483228298435, | |
| "grad_norm": 0.10020595043897629, | |
| "learning_rate": 0.00019421941787095461, | |
| "loss": 0.9368, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.060581418987599105, | |
| "grad_norm": 0.09832129627466202, | |
| "learning_rate": 0.00019417870954610217, | |
| "loss": 1.0494, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06098800569221387, | |
| "grad_norm": 0.09458506107330322, | |
| "learning_rate": 0.00019413800122124978, | |
| "loss": 0.9631, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06139459239682862, | |
| "grad_norm": 0.10380101203918457, | |
| "learning_rate": 0.00019409729289639733, | |
| "loss": 1.1003, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.061801179101443386, | |
| "grad_norm": 0.107131227850914, | |
| "learning_rate": 0.00019405658457154488, | |
| "loss": 1.0819, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06220776580605814, | |
| "grad_norm": 0.10330741852521896, | |
| "learning_rate": 0.00019401587624669247, | |
| "loss": 1.128, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.0626143525106729, | |
| "grad_norm": 0.08829359710216522, | |
| "learning_rate": 0.00019397516792184002, | |
| "loss": 0.8754, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.06302093921528766, | |
| "grad_norm": 0.10422427207231522, | |
| "learning_rate": 0.0001939344595969876, | |
| "loss": 0.9633, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.06342752591990242, | |
| "grad_norm": 0.11499015986919403, | |
| "learning_rate": 0.00019389375127213515, | |
| "loss": 0.9735, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.06383411262451717, | |
| "grad_norm": 0.0938427522778511, | |
| "learning_rate": 0.00019385304294728274, | |
| "loss": 0.9219, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.06424069932913194, | |
| "grad_norm": 0.1080261766910553, | |
| "learning_rate": 0.0001938123346224303, | |
| "loss": 0.9678, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.0646472860337467, | |
| "grad_norm": 0.10001271218061447, | |
| "learning_rate": 0.00019377162629757784, | |
| "loss": 1.0854, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.06505387273836145, | |
| "grad_norm": 0.10731212794780731, | |
| "learning_rate": 0.00019373091797272543, | |
| "loss": 1.0108, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06546045944297621, | |
| "grad_norm": 0.10019373893737793, | |
| "learning_rate": 0.00019369020964787298, | |
| "loss": 1.0315, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.06586704614759098, | |
| "grad_norm": 0.0947297066450119, | |
| "learning_rate": 0.0001936495013230206, | |
| "loss": 1.0634, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.06627363285220574, | |
| "grad_norm": 0.12204254418611526, | |
| "learning_rate": 0.00019360879299816814, | |
| "loss": 1.0635, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.06668021955682049, | |
| "grad_norm": 0.10462553054094315, | |
| "learning_rate": 0.0001935680846733157, | |
| "loss": 1.0248, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.06708680626143525, | |
| "grad_norm": 0.09576130658388138, | |
| "learning_rate": 0.00019352737634846328, | |
| "loss": 0.9671, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.06749339296605002, | |
| "grad_norm": 0.10027123987674713, | |
| "learning_rate": 0.00019348666802361083, | |
| "loss": 0.9317, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.06789997967066477, | |
| "grad_norm": 0.10674256086349487, | |
| "learning_rate": 0.0001934459596987584, | |
| "loss": 1.0058, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.06830656637527953, | |
| "grad_norm": 0.12352320551872253, | |
| "learning_rate": 0.00019340525137390597, | |
| "loss": 1.0926, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.06871315307989428, | |
| "grad_norm": 0.09426864236593246, | |
| "learning_rate": 0.00019336454304905355, | |
| "loss": 1.0876, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.06911973978450904, | |
| "grad_norm": 0.09280996024608612, | |
| "learning_rate": 0.0001933238347242011, | |
| "loss": 0.977, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06952632648912381, | |
| "grad_norm": 0.11547420918941498, | |
| "learning_rate": 0.00019328312639934866, | |
| "loss": 1.0598, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.06993291319373857, | |
| "grad_norm": 0.12538915872573853, | |
| "learning_rate": 0.00019324241807449624, | |
| "loss": 1.0996, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.07033949989835332, | |
| "grad_norm": 0.08110898733139038, | |
| "learning_rate": 0.00019320170974964382, | |
| "loss": 0.8776, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.07074608660296808, | |
| "grad_norm": 0.10475198924541473, | |
| "learning_rate": 0.0001931610014247914, | |
| "loss": 1.0876, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.07115267330758285, | |
| "grad_norm": 0.1095360517501831, | |
| "learning_rate": 0.00019312029309993895, | |
| "loss": 1.054, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0715592600121976, | |
| "grad_norm": 0.09516473114490509, | |
| "learning_rate": 0.0001930795847750865, | |
| "loss": 1.0558, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.07196584671681236, | |
| "grad_norm": 0.09316466003656387, | |
| "learning_rate": 0.0001930388764502341, | |
| "loss": 0.9467, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.07237243342142712, | |
| "grad_norm": 0.11777061969041824, | |
| "learning_rate": 0.00019299816812538164, | |
| "loss": 1.1441, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.07277902012604189, | |
| "grad_norm": 0.09438811987638474, | |
| "learning_rate": 0.00019295745980052922, | |
| "loss": 0.9521, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.07318560683065664, | |
| "grad_norm": 0.08892639726400375, | |
| "learning_rate": 0.00019291675147567678, | |
| "loss": 0.9804, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0735921935352714, | |
| "grad_norm": 0.08963356912136078, | |
| "learning_rate": 0.00019287604315082436, | |
| "loss": 1.0427, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.07399878023988615, | |
| "grad_norm": 0.09870661795139313, | |
| "learning_rate": 0.0001928353348259719, | |
| "loss": 1.051, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.07440536694450091, | |
| "grad_norm": 0.11843609809875488, | |
| "learning_rate": 0.00019279462650111947, | |
| "loss": 1.0109, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.07481195364911568, | |
| "grad_norm": 0.08860404789447784, | |
| "learning_rate": 0.00019275391817626705, | |
| "loss": 1.0035, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.07521854035373043, | |
| "grad_norm": 0.09085170924663544, | |
| "learning_rate": 0.00019271320985141463, | |
| "loss": 0.9461, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.07562512705834519, | |
| "grad_norm": 0.09071815758943558, | |
| "learning_rate": 0.0001926725015265622, | |
| "loss": 0.9542, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.07603171376295995, | |
| "grad_norm": 0.09566846489906311, | |
| "learning_rate": 0.00019263179320170976, | |
| "loss": 0.9958, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.07643830046757472, | |
| "grad_norm": 0.11846338212490082, | |
| "learning_rate": 0.00019259108487685732, | |
| "loss": 1.0737, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.07684488717218947, | |
| "grad_norm": 0.09295649081468582, | |
| "learning_rate": 0.0001925503765520049, | |
| "loss": 1.0162, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.07725147387680423, | |
| "grad_norm": 0.0917876660823822, | |
| "learning_rate": 0.00019250966822715245, | |
| "loss": 1.0432, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07765806058141898, | |
| "grad_norm": 0.10864109545946121, | |
| "learning_rate": 0.00019246895990230004, | |
| "loss": 1.1107, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.07806464728603375, | |
| "grad_norm": 0.09689877927303314, | |
| "learning_rate": 0.0001924282515774476, | |
| "loss": 1.0421, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07847123399064851, | |
| "grad_norm": 0.09406042098999023, | |
| "learning_rate": 0.00019238754325259517, | |
| "loss": 1.1042, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.07887782069526326, | |
| "grad_norm": 0.08346063643693924, | |
| "learning_rate": 0.00019234683492774272, | |
| "loss": 0.9554, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.07928440739987802, | |
| "grad_norm": 0.10317754745483398, | |
| "learning_rate": 0.00019230612660289028, | |
| "loss": 1.0835, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07969099410449278, | |
| "grad_norm": 0.08712919056415558, | |
| "learning_rate": 0.0001922654182780379, | |
| "loss": 0.9799, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.08009758080910755, | |
| "grad_norm": 0.0860556811094284, | |
| "learning_rate": 0.00019222470995318544, | |
| "loss": 0.8661, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.0805041675137223, | |
| "grad_norm": 0.07940655201673508, | |
| "learning_rate": 0.00019218400162833302, | |
| "loss": 0.8305, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.08091075421833706, | |
| "grad_norm": 0.09200199693441391, | |
| "learning_rate": 0.00019214329330348058, | |
| "loss": 0.9774, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.08131734092295181, | |
| "grad_norm": 0.09980164468288422, | |
| "learning_rate": 0.00019210258497862813, | |
| "loss": 0.9791, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08172392762756658, | |
| "grad_norm": 0.09660688042640686, | |
| "learning_rate": 0.0001920618766537757, | |
| "loss": 1.027, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.08213051433218134, | |
| "grad_norm": 0.09518909454345703, | |
| "learning_rate": 0.00019202116832892327, | |
| "loss": 0.9939, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.0825371010367961, | |
| "grad_norm": 0.0886114165186882, | |
| "learning_rate": 0.00019198046000407085, | |
| "loss": 0.985, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.08294368774141085, | |
| "grad_norm": 0.09820783883333206, | |
| "learning_rate": 0.0001919397516792184, | |
| "loss": 1.0064, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.08335027444602562, | |
| "grad_norm": 0.0957496389746666, | |
| "learning_rate": 0.00019189904335436598, | |
| "loss": 1.1126, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.08375686115064038, | |
| "grad_norm": 0.09990067780017853, | |
| "learning_rate": 0.00019185833502951354, | |
| "loss": 1.1517, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.08416344785525513, | |
| "grad_norm": 0.0953991562128067, | |
| "learning_rate": 0.0001918176267046611, | |
| "loss": 1.087, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.08457003455986989, | |
| "grad_norm": 0.10291532427072525, | |
| "learning_rate": 0.0001917769183798087, | |
| "loss": 1.0366, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.08497662126448464, | |
| "grad_norm": 0.09986121207475662, | |
| "learning_rate": 0.00019173621005495625, | |
| "loss": 0.9581, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.08538320796909941, | |
| "grad_norm": 0.09369988739490509, | |
| "learning_rate": 0.00019169550173010383, | |
| "loss": 1.0048, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08578979467371417, | |
| "grad_norm": 0.0968063622713089, | |
| "learning_rate": 0.0001916547934052514, | |
| "loss": 1.0005, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.08619638137832893, | |
| "grad_norm": 0.11241315305233002, | |
| "learning_rate": 0.00019161408508039894, | |
| "loss": 1.0316, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.08660296808294368, | |
| "grad_norm": 0.09230878949165344, | |
| "learning_rate": 0.00019157337675554652, | |
| "loss": 0.917, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.08700955478755845, | |
| "grad_norm": 0.08461520820856094, | |
| "learning_rate": 0.00019153266843069408, | |
| "loss": 0.9144, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.08741614149217321, | |
| "grad_norm": 0.09011861681938171, | |
| "learning_rate": 0.00019149196010584166, | |
| "loss": 1.0092, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.08782272819678796, | |
| "grad_norm": 0.09200841188430786, | |
| "learning_rate": 0.0001914512517809892, | |
| "loss": 1.0552, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.08822931490140272, | |
| "grad_norm": 0.09052886068820953, | |
| "learning_rate": 0.0001914105434561368, | |
| "loss": 0.9067, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.08863590160601749, | |
| "grad_norm": 0.08740741014480591, | |
| "learning_rate": 0.00019136983513128435, | |
| "loss": 0.9182, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.08904248831063225, | |
| "grad_norm": 0.08494284749031067, | |
| "learning_rate": 0.00019132912680643193, | |
| "loss": 0.8321, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.089449075015247, | |
| "grad_norm": 0.0890796035528183, | |
| "learning_rate": 0.0001912884184815795, | |
| "loss": 0.9801, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08985566171986176, | |
| "grad_norm": 0.094822458922863, | |
| "learning_rate": 0.00019124771015672706, | |
| "loss": 0.9779, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.09026224842447651, | |
| "grad_norm": 0.09756983071565628, | |
| "learning_rate": 0.00019120700183187465, | |
| "loss": 1.0385, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.09066883512909128, | |
| "grad_norm": 0.09434107691049576, | |
| "learning_rate": 0.0001911662935070222, | |
| "loss": 1.063, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.09107542183370604, | |
| "grad_norm": 0.0925639271736145, | |
| "learning_rate": 0.00019112558518216975, | |
| "loss": 0.9061, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.0914820085383208, | |
| "grad_norm": 0.10531201958656311, | |
| "learning_rate": 0.00019108487685731734, | |
| "loss": 1.1593, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09188859524293555, | |
| "grad_norm": 0.08259832113981247, | |
| "learning_rate": 0.0001910441685324649, | |
| "loss": 0.8463, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.09229518194755032, | |
| "grad_norm": 431.5063171386719, | |
| "learning_rate": 0.00019100346020761247, | |
| "loss": 1.0632, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.09270176865216508, | |
| "grad_norm": 0.10764740407466888, | |
| "learning_rate": 0.00019096275188276002, | |
| "loss": 1.0083, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.09310835535677983, | |
| "grad_norm": 0.08872029185295105, | |
| "learning_rate": 0.0001909220435579076, | |
| "loss": 0.9301, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.09351494206139459, | |
| "grad_norm": 0.1006346270442009, | |
| "learning_rate": 0.00019088133523305516, | |
| "loss": 1.0103, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09392152876600936, | |
| "grad_norm": 0.0970514565706253, | |
| "learning_rate": 0.00019084062690820274, | |
| "loss": 1.0522, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.09432811547062411, | |
| "grad_norm": 0.09807727485895157, | |
| "learning_rate": 0.00019079991858335032, | |
| "loss": 1.0498, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.09473470217523887, | |
| "grad_norm": 0.09828022867441177, | |
| "learning_rate": 0.00019075921025849788, | |
| "loss": 0.9871, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.09514128887985362, | |
| "grad_norm": 0.10089042782783508, | |
| "learning_rate": 0.00019071850193364543, | |
| "loss": 0.977, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.0955478755844684, | |
| "grad_norm": 0.09905245155096054, | |
| "learning_rate": 0.000190677793608793, | |
| "loss": 1.0135, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.09595446228908315, | |
| "grad_norm": 0.1002473533153534, | |
| "learning_rate": 0.00019063708528394057, | |
| "loss": 1.0219, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.0963610489936979, | |
| "grad_norm": 0.09028339385986328, | |
| "learning_rate": 0.00019059637695908815, | |
| "loss": 0.909, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.09676763569831266, | |
| "grad_norm": 0.0950377881526947, | |
| "learning_rate": 0.0001905556686342357, | |
| "loss": 0.9749, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.09717422240292742, | |
| "grad_norm": 0.09866049885749817, | |
| "learning_rate": 0.00019051496030938328, | |
| "loss": 1.0927, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.09758080910754219, | |
| "grad_norm": 0.09754758328199387, | |
| "learning_rate": 0.00019047425198453084, | |
| "loss": 1.059, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09798739581215694, | |
| "grad_norm": 0.09261766821146011, | |
| "learning_rate": 0.00019043354365967842, | |
| "loss": 1.0912, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.0983939825167717, | |
| "grad_norm": 0.08637125045061111, | |
| "learning_rate": 0.000190392835334826, | |
| "loss": 0.8925, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.09880056922138646, | |
| "grad_norm": 0.0962812602519989, | |
| "learning_rate": 0.00019035212700997355, | |
| "loss": 1.0435, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.09920715592600123, | |
| "grad_norm": 0.09047430753707886, | |
| "learning_rate": 0.00019031141868512113, | |
| "loss": 1.0787, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.09961374263061598, | |
| "grad_norm": 0.09183438867330551, | |
| "learning_rate": 0.0001902707103602687, | |
| "loss": 0.9338, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.10002032933523074, | |
| "grad_norm": 0.09977632761001587, | |
| "learning_rate": 0.00019023000203541624, | |
| "loss": 1.1605, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.10042691603984549, | |
| "grad_norm": 0.10386580228805542, | |
| "learning_rate": 0.00019018929371056382, | |
| "loss": 1.0493, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.10083350274446026, | |
| "grad_norm": 0.09106533974409103, | |
| "learning_rate": 0.00019014858538571138, | |
| "loss": 0.9891, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.10124008944907502, | |
| "grad_norm": 0.09407884627580643, | |
| "learning_rate": 0.00019010787706085896, | |
| "loss": 1.0367, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.10164667615368977, | |
| "grad_norm": 0.10133463889360428, | |
| "learning_rate": 0.0001900671687360065, | |
| "loss": 1.0743, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10205326285830453, | |
| "grad_norm": 0.11877205967903137, | |
| "learning_rate": 0.0001900264604111541, | |
| "loss": 1.1572, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.10245984956291929, | |
| "grad_norm": 0.10216309130191803, | |
| "learning_rate": 0.00018998575208630165, | |
| "loss": 1.0687, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.10286643626753406, | |
| "grad_norm": 0.09023922681808472, | |
| "learning_rate": 0.0001899450437614492, | |
| "loss": 0.9153, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.10327302297214881, | |
| "grad_norm": 0.09972742944955826, | |
| "learning_rate": 0.0001899043354365968, | |
| "loss": 0.9059, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.10367960967676357, | |
| "grad_norm": 0.1175752505660057, | |
| "learning_rate": 0.00018986362711174436, | |
| "loss": 1.0659, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.10408619638137832, | |
| "grad_norm": 0.09030337631702423, | |
| "learning_rate": 0.00018982291878689195, | |
| "loss": 0.9577, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.1044927830859931, | |
| "grad_norm": 0.08850797265768051, | |
| "learning_rate": 0.0001897822104620395, | |
| "loss": 0.9193, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.10489936979060785, | |
| "grad_norm": 1767.7669677734375, | |
| "learning_rate": 0.00018974150213718705, | |
| "loss": 0.9977, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1053059564952226, | |
| "grad_norm": 0.11435185372829437, | |
| "learning_rate": 0.00018970079381233463, | |
| "loss": 1.0468, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.10571254319983736, | |
| "grad_norm": 0.10342080891132355, | |
| "learning_rate": 0.0001896600854874822, | |
| "loss": 1.0119, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10611912990445213, | |
| "grad_norm": 0.11568263173103333, | |
| "learning_rate": 0.00018961937716262977, | |
| "loss": 1.025, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.10652571660906689, | |
| "grad_norm": 0.12752321362495422, | |
| "learning_rate": 0.00018957866883777732, | |
| "loss": 1.1283, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.10693230331368164, | |
| "grad_norm": 0.10688795894384384, | |
| "learning_rate": 0.0001895379605129249, | |
| "loss": 0.9052, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.1073388900182964, | |
| "grad_norm": 0.10426552593708038, | |
| "learning_rate": 0.00018949725218807246, | |
| "loss": 0.9556, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.10774547672291115, | |
| "grad_norm": 0.09953362494707108, | |
| "learning_rate": 0.00018945654386322004, | |
| "loss": 1.0734, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.10815206342752592, | |
| "grad_norm": 0.09143470227718353, | |
| "learning_rate": 0.00018941583553836762, | |
| "loss": 1.0063, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.10855865013214068, | |
| "grad_norm": 0.10831563919782639, | |
| "learning_rate": 0.00018937512721351518, | |
| "loss": 1.011, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.10896523683675544, | |
| "grad_norm": 0.10352573543787003, | |
| "learning_rate": 0.00018933441888866276, | |
| "loss": 1.0625, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.10937182354137019, | |
| "grad_norm": 0.09499429166316986, | |
| "learning_rate": 0.0001892937105638103, | |
| "loss": 0.8775, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.10977841024598496, | |
| "grad_norm": 0.10296636819839478, | |
| "learning_rate": 0.00018925300223895787, | |
| "loss": 0.985, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11018499695059972, | |
| "grad_norm": 0.10464894771575928, | |
| "learning_rate": 0.00018921229391410545, | |
| "loss": 1.0051, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.11059158365521447, | |
| "grad_norm": 0.09429532289505005, | |
| "learning_rate": 0.000189171585589253, | |
| "loss": 0.9793, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.11099817035982923, | |
| "grad_norm": 0.09751992672681808, | |
| "learning_rate": 0.00018913087726440058, | |
| "loss": 1.0756, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.111404757064444, | |
| "grad_norm": 0.11418993026018143, | |
| "learning_rate": 0.00018909016893954814, | |
| "loss": 1.0742, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.11181134376905875, | |
| "grad_norm": 0.10320629924535751, | |
| "learning_rate": 0.00018904946061469572, | |
| "loss": 1.036, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.11221793047367351, | |
| "grad_norm": 0.09697311371564865, | |
| "learning_rate": 0.00018900875228984327, | |
| "loss": 1.0317, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.11262451717828827, | |
| "grad_norm": 0.09579788893461227, | |
| "learning_rate": 0.00018896804396499085, | |
| "loss": 0.9621, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.11303110388290302, | |
| "grad_norm": 0.09918879717588425, | |
| "learning_rate": 0.00018892733564013843, | |
| "loss": 1.0292, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.11343769058751779, | |
| "grad_norm": 0.0923212468624115, | |
| "learning_rate": 0.000188886627315286, | |
| "loss": 1.0611, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.11384427729213255, | |
| "grad_norm": 0.09480055421590805, | |
| "learning_rate": 0.00018884591899043357, | |
| "loss": 0.9809, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1142508639967473, | |
| "grad_norm": 0.09431526064872742, | |
| "learning_rate": 0.00018880521066558112, | |
| "loss": 1.0326, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.11465745070136206, | |
| "grad_norm": 0.09080514311790466, | |
| "learning_rate": 0.00018876450234072868, | |
| "loss": 0.9115, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.11506403740597683, | |
| "grad_norm": 0.10855970531702042, | |
| "learning_rate": 0.00018872379401587626, | |
| "loss": 1.0422, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.11547062411059159, | |
| "grad_norm": 0.0941060334444046, | |
| "learning_rate": 0.0001886830856910238, | |
| "loss": 1.0352, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.11587721081520634, | |
| "grad_norm": 0.08903583139181137, | |
| "learning_rate": 0.0001886423773661714, | |
| "loss": 0.964, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.1162837975198211, | |
| "grad_norm": 0.08521820604801178, | |
| "learning_rate": 0.00018860166904131895, | |
| "loss": 0.917, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.11669038422443587, | |
| "grad_norm": 0.1058691143989563, | |
| "learning_rate": 0.00018856096071646653, | |
| "loss": 1.0375, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.11709697092905062, | |
| "grad_norm": 0.09435714781284332, | |
| "learning_rate": 0.0001885202523916141, | |
| "loss": 0.9766, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.11750355763366538, | |
| "grad_norm": 0.09868729114532471, | |
| "learning_rate": 0.00018847954406676166, | |
| "loss": 1.1059, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.11791014433828013, | |
| "grad_norm": 0.08855635672807693, | |
| "learning_rate": 0.00018843883574190924, | |
| "loss": 0.9424, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11831673104289489, | |
| "grad_norm": 0.09142837673425674, | |
| "learning_rate": 0.0001883981274170568, | |
| "loss": 1.0425, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.11872331774750966, | |
| "grad_norm": 0.0971277505159378, | |
| "learning_rate": 0.00018835741909220438, | |
| "loss": 1.108, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.11912990445212442, | |
| "grad_norm": 0.09940122812986374, | |
| "learning_rate": 0.00018831671076735193, | |
| "loss": 1.0172, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.11953649115673917, | |
| "grad_norm": 0.10263317078351974, | |
| "learning_rate": 0.0001882760024424995, | |
| "loss": 1.0956, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.11994307786135393, | |
| "grad_norm": 0.1092846542596817, | |
| "learning_rate": 0.00018823529411764707, | |
| "loss": 0.9454, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.1203496645659687, | |
| "grad_norm": 0.10364726930856705, | |
| "learning_rate": 0.00018819458579279462, | |
| "loss": 0.8884, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.12075625127058345, | |
| "grad_norm": 0.0889100730419159, | |
| "learning_rate": 0.0001881538774679422, | |
| "loss": 0.9922, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.12116283797519821, | |
| "grad_norm": 0.09209653735160828, | |
| "learning_rate": 0.00018811316914308976, | |
| "loss": 0.977, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.12156942467981297, | |
| "grad_norm": 0.11542046815156937, | |
| "learning_rate": 0.00018807246081823734, | |
| "loss": 1.0694, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.12197601138442773, | |
| "grad_norm": 0.10896503180265427, | |
| "learning_rate": 0.00018803175249338492, | |
| "loss": 1.0508, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12238259808904249, | |
| "grad_norm": 0.09302002936601639, | |
| "learning_rate": 0.00018799104416853248, | |
| "loss": 1.0512, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.12278918479365725, | |
| "grad_norm": 0.09081271290779114, | |
| "learning_rate": 0.00018795033584368006, | |
| "loss": 0.9688, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.123195771498272, | |
| "grad_norm": 0.1059931218624115, | |
| "learning_rate": 0.0001879096275188276, | |
| "loss": 1.0483, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.12360235820288677, | |
| "grad_norm": 0.1018669605255127, | |
| "learning_rate": 0.0001878689191939752, | |
| "loss": 1.019, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.12400894490750153, | |
| "grad_norm": 0.1040007546544075, | |
| "learning_rate": 0.00018782821086912275, | |
| "loss": 1.037, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.12441553161211628, | |
| "grad_norm": 0.10204601287841797, | |
| "learning_rate": 0.0001877875025442703, | |
| "loss": 0.9816, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.12482211831673104, | |
| "grad_norm": 0.10591764748096466, | |
| "learning_rate": 0.00018774679421941788, | |
| "loss": 1.0939, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.1252287050213458, | |
| "grad_norm": 0.09306305646896362, | |
| "learning_rate": 0.00018770608589456544, | |
| "loss": 1.0476, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.12563529172596055, | |
| "grad_norm": 11.22681713104248, | |
| "learning_rate": 0.00018766537756971302, | |
| "loss": 1.0573, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.12604187843057532, | |
| "grad_norm": 0.09422402083873749, | |
| "learning_rate": 0.00018762466924486057, | |
| "loss": 0.9993, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1264484651351901, | |
| "grad_norm": 0.0982229933142662, | |
| "learning_rate": 0.00018758396092000815, | |
| "loss": 0.9159, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.12685505183980483, | |
| "grad_norm": 0.12579265236854553, | |
| "learning_rate": 0.00018754325259515573, | |
| "loss": 1.0935, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.1272616385444196, | |
| "grad_norm": 0.10069390386343002, | |
| "learning_rate": 0.0001875025442703033, | |
| "loss": 1.0127, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.12766822524903434, | |
| "grad_norm": 0.10948827862739563, | |
| "learning_rate": 0.00018746183594545087, | |
| "loss": 1.0576, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.12807481195364911, | |
| "grad_norm": 0.09232445061206818, | |
| "learning_rate": 0.00018742112762059842, | |
| "loss": 0.9856, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.12848139865826388, | |
| "grad_norm": 0.08319563418626785, | |
| "learning_rate": 0.000187380419295746, | |
| "loss": 0.9172, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.12888798536287863, | |
| "grad_norm": 0.09697309136390686, | |
| "learning_rate": 0.00018733971097089356, | |
| "loss": 1.0567, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.1292945720674934, | |
| "grad_norm": 0.09254255145788193, | |
| "learning_rate": 0.0001872990026460411, | |
| "loss": 1.0177, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.12970115877210814, | |
| "grad_norm": 0.09254108369350433, | |
| "learning_rate": 0.0001872582943211887, | |
| "loss": 1.0079, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.1301077454767229, | |
| "grad_norm": 0.09095866233110428, | |
| "learning_rate": 0.00018721758599633625, | |
| "loss": 1.0633, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13051433218133768, | |
| "grad_norm": 0.09073010087013245, | |
| "learning_rate": 0.00018717687767148383, | |
| "loss": 0.9059, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.13092091888595242, | |
| "grad_norm": 0.09842764586210251, | |
| "learning_rate": 0.00018713616934663138, | |
| "loss": 1.0766, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1313275055905672, | |
| "grad_norm": 0.09325529634952545, | |
| "learning_rate": 0.00018709546102177896, | |
| "loss": 1.066, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.13173409229518196, | |
| "grad_norm": 0.09692969918251038, | |
| "learning_rate": 0.00018705475269692654, | |
| "loss": 0.9743, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1321406789997967, | |
| "grad_norm": 0.09432708472013474, | |
| "learning_rate": 0.0001870140443720741, | |
| "loss": 1.0141, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.13254726570441147, | |
| "grad_norm": 0.09226994961500168, | |
| "learning_rate": 0.00018697333604722168, | |
| "loss": 0.9837, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.1329538524090262, | |
| "grad_norm": 0.10843974351882935, | |
| "learning_rate": 0.00018693262772236923, | |
| "loss": 1.0248, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.13336043911364098, | |
| "grad_norm": 0.09324774891138077, | |
| "learning_rate": 0.00018689191939751681, | |
| "loss": 1.0642, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.13376702581825575, | |
| "grad_norm": 0.08934729546308517, | |
| "learning_rate": 0.00018685121107266437, | |
| "loss": 0.9792, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.1341736125228705, | |
| "grad_norm": 0.09125274419784546, | |
| "learning_rate": 0.00018681050274781192, | |
| "loss": 1.0093, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13458019922748526, | |
| "grad_norm": 0.09645108133554459, | |
| "learning_rate": 0.0001867697944229595, | |
| "loss": 0.9503, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.13498678593210003, | |
| "grad_norm": 0.09900861978530884, | |
| "learning_rate": 0.00018672908609810706, | |
| "loss": 0.9966, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.13539337263671478, | |
| "grad_norm": 0.09018311649560928, | |
| "learning_rate": 0.00018668837777325464, | |
| "loss": 0.965, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.13579995934132955, | |
| "grad_norm": 0.10296136885881424, | |
| "learning_rate": 0.00018664766944840222, | |
| "loss": 1.1011, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1362065460459443, | |
| "grad_norm": 0.09104129672050476, | |
| "learning_rate": 0.00018660696112354977, | |
| "loss": 0.9814, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.13661313275055906, | |
| "grad_norm": 0.09881450235843658, | |
| "learning_rate": 0.00018656625279869736, | |
| "loss": 1.0989, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.13701971945517383, | |
| "grad_norm": 0.09691241383552551, | |
| "learning_rate": 0.0001865255444738449, | |
| "loss": 1.0967, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.13742630615978857, | |
| "grad_norm": 0.10152243077754974, | |
| "learning_rate": 0.0001864848361489925, | |
| "loss": 1.0951, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.13783289286440334, | |
| "grad_norm": 0.10802541673183441, | |
| "learning_rate": 0.00018644412782414005, | |
| "loss": 0.8742, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.13823947956901808, | |
| "grad_norm": 0.09942565858364105, | |
| "learning_rate": 0.0001864034194992876, | |
| "loss": 0.9961, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.13864606627363285, | |
| "grad_norm": 0.08618199825286865, | |
| "learning_rate": 0.00018636271117443518, | |
| "loss": 0.9645, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.13905265297824762, | |
| "grad_norm": 0.1056099608540535, | |
| "learning_rate": 0.00018632200284958273, | |
| "loss": 0.9885, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.13945923968286236, | |
| "grad_norm": 0.08862382173538208, | |
| "learning_rate": 0.00018628129452473032, | |
| "loss": 0.9316, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.13986582638747713, | |
| "grad_norm": 0.09923135489225388, | |
| "learning_rate": 0.00018624058619987787, | |
| "loss": 0.9959, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1402724130920919, | |
| "grad_norm": 0.09120538830757141, | |
| "learning_rate": 0.00018619987787502545, | |
| "loss": 0.968, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.14067899979670664, | |
| "grad_norm": 0.09669141471385956, | |
| "learning_rate": 0.00018615916955017303, | |
| "loss": 1.085, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.1410855865013214, | |
| "grad_norm": 0.08598754554986954, | |
| "learning_rate": 0.00018611846122532059, | |
| "loss": 0.9504, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.14149217320593616, | |
| "grad_norm": 0.09238371253013611, | |
| "learning_rate": 0.00018607775290046817, | |
| "loss": 0.9742, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.14189875991055093, | |
| "grad_norm": 0.091258205473423, | |
| "learning_rate": 0.00018603704457561572, | |
| "loss": 0.9341, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.1423053466151657, | |
| "grad_norm": 0.10129548609256744, | |
| "learning_rate": 0.0001859963362507633, | |
| "loss": 1.0814, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14271193331978044, | |
| "grad_norm": 0.09523019194602966, | |
| "learning_rate": 0.00018595562792591086, | |
| "loss": 0.9848, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.1431185200243952, | |
| "grad_norm": 0.09485248476266861, | |
| "learning_rate": 0.0001859149196010584, | |
| "loss": 0.9828, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.14352510672900995, | |
| "grad_norm": 0.09963666647672653, | |
| "learning_rate": 0.000185874211276206, | |
| "loss": 1.1075, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.14393169343362472, | |
| "grad_norm": 0.09067155420780182, | |
| "learning_rate": 0.00018583350295135355, | |
| "loss": 0.971, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.1443382801382395, | |
| "grad_norm": 0.09153544157743454, | |
| "learning_rate": 0.00018579279462650113, | |
| "loss": 0.9405, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.14474486684285423, | |
| "grad_norm": 0.1024472787976265, | |
| "learning_rate": 0.00018575208630164868, | |
| "loss": 0.9967, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.145151453547469, | |
| "grad_norm": 0.09804495424032211, | |
| "learning_rate": 0.00018571137797679626, | |
| "loss": 0.9578, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.14555804025208377, | |
| "grad_norm": 0.099054716527462, | |
| "learning_rate": 0.00018567066965194384, | |
| "loss": 0.9999, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.1459646269566985, | |
| "grad_norm": 0.09781336784362793, | |
| "learning_rate": 0.0001856299613270914, | |
| "loss": 1.09, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.14637121366131328, | |
| "grad_norm": 0.08993211388587952, | |
| "learning_rate": 0.00018558925300223898, | |
| "loss": 1.0719, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14677780036592802, | |
| "grad_norm": 0.09146003425121307, | |
| "learning_rate": 0.00018554854467738653, | |
| "loss": 1.0008, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.1471843870705428, | |
| "grad_norm": 0.09643495827913284, | |
| "learning_rate": 0.00018550783635253411, | |
| "loss": 1.0791, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.14759097377515756, | |
| "grad_norm": 0.09078676998615265, | |
| "learning_rate": 0.00018546712802768167, | |
| "loss": 0.8641, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.1479975604797723, | |
| "grad_norm": 0.08719085901975632, | |
| "learning_rate": 0.00018542641970282922, | |
| "loss": 0.985, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.14840414718438708, | |
| "grad_norm": 0.09189736843109131, | |
| "learning_rate": 0.0001853857113779768, | |
| "loss": 0.9638, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.14881073388900182, | |
| "grad_norm": 0.09381456673145294, | |
| "learning_rate": 0.00018534500305312436, | |
| "loss": 1.0036, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1492173205936166, | |
| "grad_norm": 0.0922684445977211, | |
| "learning_rate": 0.00018530429472827194, | |
| "loss": 1.0391, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.14962390729823136, | |
| "grad_norm": 0.09465248882770538, | |
| "learning_rate": 0.0001852635864034195, | |
| "loss": 0.8874, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1500304940028461, | |
| "grad_norm": 0.0938408225774765, | |
| "learning_rate": 0.00018522287807856707, | |
| "loss": 1.0269, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.15043708070746087, | |
| "grad_norm": 0.09377933293581009, | |
| "learning_rate": 0.00018518216975371466, | |
| "loss": 1.0142, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15084366741207564, | |
| "grad_norm": 0.1117277517914772, | |
| "learning_rate": 0.0001851414614288622, | |
| "loss": 1.0371, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.15125025411669038, | |
| "grad_norm": 0.10293183475732803, | |
| "learning_rate": 0.0001851007531040098, | |
| "loss": 1.0, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.15165684082130515, | |
| "grad_norm": 0.09216313809156418, | |
| "learning_rate": 0.00018506004477915734, | |
| "loss": 0.9703, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.1520634275259199, | |
| "grad_norm": 0.09088669717311859, | |
| "learning_rate": 0.00018501933645430493, | |
| "loss": 0.8766, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.15247001423053466, | |
| "grad_norm": 0.09916643798351288, | |
| "learning_rate": 0.00018497862812945248, | |
| "loss": 1.0958, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.15287660093514943, | |
| "grad_norm": 0.08404985070228577, | |
| "learning_rate": 0.00018493791980460003, | |
| "loss": 0.9602, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.15328318763976417, | |
| "grad_norm": 0.10011377185583115, | |
| "learning_rate": 0.00018489721147974762, | |
| "loss": 1.0377, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.15368977434437894, | |
| "grad_norm": 0.09958089143037796, | |
| "learning_rate": 0.00018485650315489517, | |
| "loss": 1.0213, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.15409636104899369, | |
| "grad_norm": 0.09488838911056519, | |
| "learning_rate": 0.00018481579483004275, | |
| "loss": 0.941, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.15450294775360846, | |
| "grad_norm": 0.09099314361810684, | |
| "learning_rate": 0.00018477508650519033, | |
| "loss": 0.8913, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15490953445822322, | |
| "grad_norm": 0.0956854447722435, | |
| "learning_rate": 0.00018473437818033789, | |
| "loss": 1.1478, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.15531612116283797, | |
| "grad_norm": 0.11225584149360657, | |
| "learning_rate": 0.00018469366985548547, | |
| "loss": 1.0795, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.15572270786745274, | |
| "grad_norm": 0.11592987924814224, | |
| "learning_rate": 0.00018465296153063302, | |
| "loss": 1.0863, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.1561292945720675, | |
| "grad_norm": 0.09232570976018906, | |
| "learning_rate": 0.0001846122532057806, | |
| "loss": 0.9551, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.15653588127668225, | |
| "grad_norm": 0.08860056847333908, | |
| "learning_rate": 0.00018457154488092816, | |
| "loss": 1.0206, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.15694246798129702, | |
| "grad_norm": 0.10788331180810928, | |
| "learning_rate": 0.00018453083655607574, | |
| "loss": 0.9378, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.15734905468591176, | |
| "grad_norm": 0.10758615285158157, | |
| "learning_rate": 0.0001844901282312233, | |
| "loss": 1.1149, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.15775564139052653, | |
| "grad_norm": 0.10551386326551437, | |
| "learning_rate": 0.00018444941990637085, | |
| "loss": 1.0729, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.1581622280951413, | |
| "grad_norm": 0.08733198046684265, | |
| "learning_rate": 0.00018440871158151843, | |
| "loss": 1.0058, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.15856881479975604, | |
| "grad_norm": 0.1095399409532547, | |
| "learning_rate": 0.00018436800325666598, | |
| "loss": 1.0566, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1589754015043708, | |
| "grad_norm": 0.12356330454349518, | |
| "learning_rate": 0.00018432729493181356, | |
| "loss": 1.0173, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.15938198820898555, | |
| "grad_norm": 0.09934639930725098, | |
| "learning_rate": 0.00018428658660696114, | |
| "loss": 1.1237, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.15978857491360032, | |
| "grad_norm": 0.09402013570070267, | |
| "learning_rate": 0.0001842458782821087, | |
| "loss": 1.0018, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.1601951616182151, | |
| "grad_norm": 0.10511749237775803, | |
| "learning_rate": 0.00018420516995725628, | |
| "loss": 0.9844, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.16060174832282983, | |
| "grad_norm": 0.11193688213825226, | |
| "learning_rate": 0.00018416446163240383, | |
| "loss": 0.9888, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1610083350274446, | |
| "grad_norm": 0.09895443916320801, | |
| "learning_rate": 0.00018412375330755141, | |
| "loss": 1.1045, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.16141492173205937, | |
| "grad_norm": 0.09660319238901138, | |
| "learning_rate": 0.00018408304498269897, | |
| "loss": 1.0457, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.16182150843667412, | |
| "grad_norm": 0.1339186728000641, | |
| "learning_rate": 0.00018404233665784655, | |
| "loss": 1.1266, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.16222809514128889, | |
| "grad_norm": 0.1154564693570137, | |
| "learning_rate": 0.0001840016283329941, | |
| "loss": 1.0299, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.16263468184590363, | |
| "grad_norm": 0.09698904305696487, | |
| "learning_rate": 0.00018396092000814166, | |
| "loss": 1.1101, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1630412685505184, | |
| "grad_norm": 0.09455164521932602, | |
| "learning_rate": 0.00018392021168328924, | |
| "loss": 0.9928, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.16344785525513317, | |
| "grad_norm": 0.09728690981864929, | |
| "learning_rate": 0.0001838795033584368, | |
| "loss": 1.0603, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.1638544419597479, | |
| "grad_norm": 0.10577269643545151, | |
| "learning_rate": 0.0001838387950335844, | |
| "loss": 0.9922, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.16426102866436268, | |
| "grad_norm": 0.08850935101509094, | |
| "learning_rate": 0.00018379808670873196, | |
| "loss": 0.9758, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.16466761536897742, | |
| "grad_norm": 0.09496256709098816, | |
| "learning_rate": 0.0001837573783838795, | |
| "loss": 1.0949, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1650742020735922, | |
| "grad_norm": 0.09768050909042358, | |
| "learning_rate": 0.0001837166700590271, | |
| "loss": 1.0054, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.16548078877820696, | |
| "grad_norm": 0.09913921356201172, | |
| "learning_rate": 0.00018367596173417464, | |
| "loss": 1.0272, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.1658873754828217, | |
| "grad_norm": 0.0901927724480629, | |
| "learning_rate": 0.00018363525340932223, | |
| "loss": 1.0264, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.16629396218743647, | |
| "grad_norm": 0.09796515852212906, | |
| "learning_rate": 0.00018359454508446978, | |
| "loss": 1.0338, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.16670054889205124, | |
| "grad_norm": 0.1018638014793396, | |
| "learning_rate": 0.00018355383675961736, | |
| "loss": 1.0409, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16710713559666598, | |
| "grad_norm": 0.10666611790657043, | |
| "learning_rate": 0.00018351312843476492, | |
| "loss": 1.0924, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.16751372230128075, | |
| "grad_norm": 0.0986141785979271, | |
| "learning_rate": 0.00018347242010991247, | |
| "loss": 0.9468, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.1679203090058955, | |
| "grad_norm": 0.09429168701171875, | |
| "learning_rate": 0.00018343171178506005, | |
| "loss": 0.9706, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.16832689571051027, | |
| "grad_norm": 0.09704872965812683, | |
| "learning_rate": 0.0001833910034602076, | |
| "loss": 1.0692, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.16873348241512504, | |
| "grad_norm": 0.0980519950389862, | |
| "learning_rate": 0.00018335029513535519, | |
| "loss": 1.0218, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.16914006911973978, | |
| "grad_norm": 0.08980212360620499, | |
| "learning_rate": 0.00018330958681050277, | |
| "loss": 0.9243, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.16954665582435455, | |
| "grad_norm": 0.09630506485700607, | |
| "learning_rate": 0.00018326887848565032, | |
| "loss": 0.9599, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.1699532425289693, | |
| "grad_norm": 0.08608522266149521, | |
| "learning_rate": 0.0001832281701607979, | |
| "loss": 0.9577, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.17035982923358406, | |
| "grad_norm": 0.09151248633861542, | |
| "learning_rate": 0.00018318746183594546, | |
| "loss": 0.9956, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.17076641593819883, | |
| "grad_norm": 0.09689094871282578, | |
| "learning_rate": 0.00018314675351109304, | |
| "loss": 1.0999, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17117300264281357, | |
| "grad_norm": 0.09316612035036087, | |
| "learning_rate": 0.0001831060451862406, | |
| "loss": 0.8572, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.17157958934742834, | |
| "grad_norm": 0.11449979990720749, | |
| "learning_rate": 0.00018306533686138817, | |
| "loss": 1.0328, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1719861760520431, | |
| "grad_norm": 0.10802194476127625, | |
| "learning_rate": 0.00018302462853653573, | |
| "loss": 0.9785, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.17239276275665785, | |
| "grad_norm": 0.09997294098138809, | |
| "learning_rate": 0.00018298392021168328, | |
| "loss": 0.9778, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.17279934946127262, | |
| "grad_norm": 0.10244690626859665, | |
| "learning_rate": 0.00018294321188683086, | |
| "loss": 1.0874, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.17320593616588736, | |
| "grad_norm": 0.10659472644329071, | |
| "learning_rate": 0.00018290250356197844, | |
| "loss": 1.0196, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.17361252287050213, | |
| "grad_norm": 0.09812036156654358, | |
| "learning_rate": 0.000182861795237126, | |
| "loss": 0.9051, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1740191095751169, | |
| "grad_norm": 0.845235288143158, | |
| "learning_rate": 0.00018282108691227358, | |
| "loss": 1.0531, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.17442569627973165, | |
| "grad_norm": 0.109995998442173, | |
| "learning_rate": 0.00018278037858742113, | |
| "loss": 1.001, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.17483228298434642, | |
| "grad_norm": 0.12578758597373962, | |
| "learning_rate": 0.00018273967026256871, | |
| "loss": 0.9513, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17523886968896116, | |
| "grad_norm": 0.1585826873779297, | |
| "learning_rate": 0.00018269896193771627, | |
| "loss": 1.0091, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.17564545639357593, | |
| "grad_norm": 0.15150819718837738, | |
| "learning_rate": 0.00018265825361286385, | |
| "loss": 1.1045, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1760520430981907, | |
| "grad_norm": 0.1110219806432724, | |
| "learning_rate": 0.0001826175452880114, | |
| "loss": 0.9877, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.17645862980280544, | |
| "grad_norm": 0.11296675354242325, | |
| "learning_rate": 0.00018257683696315896, | |
| "loss": 1.1317, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1768652165074202, | |
| "grad_norm": 0.11464451253414154, | |
| "learning_rate": 0.00018253612863830654, | |
| "loss": 0.9485, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.17727180321203498, | |
| "grad_norm": 0.08836513012647629, | |
| "learning_rate": 0.0001824954203134541, | |
| "loss": 0.8667, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.17767838991664972, | |
| "grad_norm": 0.10697431862354279, | |
| "learning_rate": 0.00018245471198860167, | |
| "loss": 1.0692, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.1780849766212645, | |
| "grad_norm": 0.10565032064914703, | |
| "learning_rate": 0.00018241400366374925, | |
| "loss": 1.0723, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.17849156332587923, | |
| "grad_norm": 0.11343531310558319, | |
| "learning_rate": 0.0001823732953388968, | |
| "loss": 1.1038, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.178898150030494, | |
| "grad_norm": 0.10002034902572632, | |
| "learning_rate": 0.0001823325870140444, | |
| "loss": 0.9859, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17930473673510877, | |
| "grad_norm": 0.10602378845214844, | |
| "learning_rate": 0.00018229187868919194, | |
| "loss": 1.1091, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.1797113234397235, | |
| "grad_norm": 0.09775001555681229, | |
| "learning_rate": 0.00018225117036433953, | |
| "loss": 1.0473, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.18011791014433828, | |
| "grad_norm": 0.09872320294380188, | |
| "learning_rate": 0.00018221046203948708, | |
| "loss": 1.0657, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.18052449684895303, | |
| "grad_norm": 0.0893816128373146, | |
| "learning_rate": 0.00018216975371463466, | |
| "loss": 0.915, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1809310835535678, | |
| "grad_norm": 0.09870447218418121, | |
| "learning_rate": 0.00018212904538978221, | |
| "loss": 0.8847, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.18133767025818257, | |
| "grad_norm": 0.09775330871343613, | |
| "learning_rate": 0.00018208833706492977, | |
| "loss": 0.841, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.1817442569627973, | |
| "grad_norm": 0.10025996714830399, | |
| "learning_rate": 0.00018204762874007735, | |
| "loss": 0.9965, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.18215084366741208, | |
| "grad_norm": 0.09369905292987823, | |
| "learning_rate": 0.0001820069204152249, | |
| "loss": 0.9998, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.18255743037202685, | |
| "grad_norm": 0.09244808554649353, | |
| "learning_rate": 0.0001819662120903725, | |
| "loss": 0.9938, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.1829640170766416, | |
| "grad_norm": 0.12163155525922775, | |
| "learning_rate": 0.00018192550376552007, | |
| "loss": 1.1384, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18337060378125636, | |
| "grad_norm": 0.08755457401275635, | |
| "learning_rate": 0.00018188479544066762, | |
| "loss": 0.9002, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.1837771904858711, | |
| "grad_norm": 0.0917607769370079, | |
| "learning_rate": 0.0001818440871158152, | |
| "loss": 0.9874, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.18418377719048587, | |
| "grad_norm": 0.09113719314336777, | |
| "learning_rate": 0.00018180337879096276, | |
| "loss": 1.0187, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.18459036389510064, | |
| "grad_norm": 0.08795943111181259, | |
| "learning_rate": 0.00018176267046611034, | |
| "loss": 0.902, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.18499695059971538, | |
| "grad_norm": 0.1016731783747673, | |
| "learning_rate": 0.0001817219621412579, | |
| "loss": 0.9933, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.18540353730433015, | |
| "grad_norm": 0.09413068741559982, | |
| "learning_rate": 0.00018168125381640547, | |
| "loss": 0.9448, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.18581012400894492, | |
| "grad_norm": 0.10015012323856354, | |
| "learning_rate": 0.00018164054549155303, | |
| "loss": 1.1458, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.18621671071355966, | |
| "grad_norm": 0.09086768329143524, | |
| "learning_rate": 0.00018159983716670058, | |
| "loss": 1.0543, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.18662329741817443, | |
| "grad_norm": 0.10910352319478989, | |
| "learning_rate": 0.00018155912884184816, | |
| "loss": 1.0078, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.18702988412278918, | |
| "grad_norm": 0.09674135595560074, | |
| "learning_rate": 0.00018151842051699572, | |
| "loss": 0.9758, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18743647082740394, | |
| "grad_norm": 0.09108126163482666, | |
| "learning_rate": 0.00018147771219214332, | |
| "loss": 1.0038, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.18784305753201871, | |
| "grad_norm": 0.09710326045751572, | |
| "learning_rate": 0.00018143700386729088, | |
| "loss": 0.9693, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.18824964423663346, | |
| "grad_norm": 0.10069318860769272, | |
| "learning_rate": 0.00018139629554243843, | |
| "loss": 1.1005, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.18865623094124823, | |
| "grad_norm": 0.09434141218662262, | |
| "learning_rate": 0.000181355587217586, | |
| "loss": 1.0359, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.18906281764586297, | |
| "grad_norm": 0.09208261221647263, | |
| "learning_rate": 0.00018131487889273357, | |
| "loss": 1.0374, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.18946940435047774, | |
| "grad_norm": 0.09581121802330017, | |
| "learning_rate": 0.00018127417056788115, | |
| "loss": 1.0267, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1898759910550925, | |
| "grad_norm": 0.09809669107198715, | |
| "learning_rate": 0.0001812334622430287, | |
| "loss": 1.0652, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.19028257775970725, | |
| "grad_norm": 0.08496394008398056, | |
| "learning_rate": 0.00018119275391817628, | |
| "loss": 0.9468, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.19068916446432202, | |
| "grad_norm": 0.09247399121522903, | |
| "learning_rate": 0.00018115204559332384, | |
| "loss": 1.0247, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.1910957511689368, | |
| "grad_norm": 0.10010971128940582, | |
| "learning_rate": 0.0001811113372684714, | |
| "loss": 0.9674, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19150233787355153, | |
| "grad_norm": 0.09562191367149353, | |
| "learning_rate": 0.00018107062894361897, | |
| "loss": 0.9819, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.1919089245781663, | |
| "grad_norm": 0.09223975241184235, | |
| "learning_rate": 0.00018102992061876655, | |
| "loss": 1.0051, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.19231551128278104, | |
| "grad_norm": 0.09564565122127533, | |
| "learning_rate": 0.00018098921229391414, | |
| "loss": 0.908, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.1927220979873958, | |
| "grad_norm": 0.09371364116668701, | |
| "learning_rate": 0.0001809485039690617, | |
| "loss": 1.0195, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.19312868469201058, | |
| "grad_norm": 0.0895533412694931, | |
| "learning_rate": 0.00018090779564420924, | |
| "loss": 0.8912, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.19353527139662532, | |
| "grad_norm": 0.08874888718128204, | |
| "learning_rate": 0.00018086708731935682, | |
| "loss": 0.9941, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.1939418581012401, | |
| "grad_norm": 8989.1748046875, | |
| "learning_rate": 0.00018082637899450438, | |
| "loss": 1.0191, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.19434844480585484, | |
| "grad_norm": 0.09893982112407684, | |
| "learning_rate": 0.00018078567066965196, | |
| "loss": 1.1682, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1947550315104696, | |
| "grad_norm": 0.09100797772407532, | |
| "learning_rate": 0.00018074496234479951, | |
| "loss": 0.9466, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.19516161821508438, | |
| "grad_norm": 0.10540256649255753, | |
| "learning_rate": 0.0001807042540199471, | |
| "loss": 1.0735, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19556820491969912, | |
| "grad_norm": 0.09110235422849655, | |
| "learning_rate": 0.00018066354569509465, | |
| "loss": 1.0097, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.1959747916243139, | |
| "grad_norm": 0.10651825368404388, | |
| "learning_rate": 0.0001806228373702422, | |
| "loss": 1.014, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.19638137832892866, | |
| "grad_norm": 0.08685674518346786, | |
| "learning_rate": 0.00018058212904538978, | |
| "loss": 0.9755, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.1967879650335434, | |
| "grad_norm": 0.10092045366764069, | |
| "learning_rate": 0.00018054142072053737, | |
| "loss": 0.9397, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.19719455173815817, | |
| "grad_norm": 0.1056622639298439, | |
| "learning_rate": 0.00018050071239568495, | |
| "loss": 0.9864, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1976011384427729, | |
| "grad_norm": 0.10525202006101608, | |
| "learning_rate": 0.0001804600040708325, | |
| "loss": 1.1085, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.19800772514738768, | |
| "grad_norm": 0.10073073953390121, | |
| "learning_rate": 0.00018041929574598006, | |
| "loss": 1.1264, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.19841431185200245, | |
| "grad_norm": 0.09659091383218765, | |
| "learning_rate": 0.00018037858742112764, | |
| "loss": 0.9848, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.1988208985566172, | |
| "grad_norm": 0.09986629337072372, | |
| "learning_rate": 0.0001803378790962752, | |
| "loss": 1.0732, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.19922748526123196, | |
| "grad_norm": 0.11215290427207947, | |
| "learning_rate": 0.00018029717077142277, | |
| "loss": 1.1259, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1996340719658467, | |
| "grad_norm": 0.11136343330144882, | |
| "learning_rate": 0.00018025646244657033, | |
| "loss": 1.0857, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.20004065867046147, | |
| "grad_norm": 0.10452030599117279, | |
| "learning_rate": 0.0001802157541217179, | |
| "loss": 0.9997, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.20044724537507624, | |
| "grad_norm": 0.10394178330898285, | |
| "learning_rate": 0.00018017504579686546, | |
| "loss": 1.0852, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.20085383207969099, | |
| "grad_norm": 0.10206598043441772, | |
| "learning_rate": 0.00018013433747201302, | |
| "loss": 0.9629, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.20126041878430576, | |
| "grad_norm": 0.09365608543157578, | |
| "learning_rate": 0.00018009362914716062, | |
| "loss": 0.9504, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.20166700548892053, | |
| "grad_norm": 0.09425178170204163, | |
| "learning_rate": 0.00018005292082230818, | |
| "loss": 1.0038, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.20207359219353527, | |
| "grad_norm": 0.09562011808156967, | |
| "learning_rate": 0.00018001221249745576, | |
| "loss": 1.0877, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.20248017889815004, | |
| "grad_norm": 0.11452426016330719, | |
| "learning_rate": 0.0001799715041726033, | |
| "loss": 1.0688, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.20288676560276478, | |
| "grad_norm": 0.0930696651339531, | |
| "learning_rate": 0.00017993079584775087, | |
| "loss": 1.0255, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.20329335230737955, | |
| "grad_norm": 0.10522327572107315, | |
| "learning_rate": 0.00017989008752289845, | |
| "loss": 1.085, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20369993901199432, | |
| "grad_norm": 0.08499190211296082, | |
| "learning_rate": 0.000179849379198046, | |
| "loss": 0.9235, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.20410652571660906, | |
| "grad_norm": 0.09169955551624298, | |
| "learning_rate": 0.00017980867087319358, | |
| "loss": 0.9836, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.20451311242122383, | |
| "grad_norm": 0.10331466048955917, | |
| "learning_rate": 0.00017976796254834114, | |
| "loss": 1.0255, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.20491969912583857, | |
| "grad_norm": 0.0900363028049469, | |
| "learning_rate": 0.00017972725422348872, | |
| "loss": 0.9691, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.20532628583045334, | |
| "grad_norm": 0.10095544904470444, | |
| "learning_rate": 0.00017968654589863627, | |
| "loss": 1.0289, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2057328725350681, | |
| "grad_norm": 0.0992627814412117, | |
| "learning_rate": 0.00017964583757378383, | |
| "loss": 0.9785, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.20613945923968285, | |
| "grad_norm": 0.0954422652721405, | |
| "learning_rate": 0.00017960512924893144, | |
| "loss": 1.0105, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.20654604594429762, | |
| "grad_norm": 0.0994410440325737, | |
| "learning_rate": 0.000179564420924079, | |
| "loss": 1.0894, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2069526326489124, | |
| "grad_norm": 0.08866444230079651, | |
| "learning_rate": 0.00017952371259922654, | |
| "loss": 0.9725, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.20735921935352714, | |
| "grad_norm": 0.09361348301172256, | |
| "learning_rate": 0.00017948300427437412, | |
| "loss": 1.0441, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2077658060581419, | |
| "grad_norm": 0.08215323090553284, | |
| "learning_rate": 0.00017944229594952168, | |
| "loss": 0.9214, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.20817239276275665, | |
| "grad_norm": 0.09752262383699417, | |
| "learning_rate": 0.00017940158762466926, | |
| "loss": 0.9456, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.20857897946737142, | |
| "grad_norm": 0.10021419823169708, | |
| "learning_rate": 0.00017936087929981681, | |
| "loss": 1.1158, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.2089855661719862, | |
| "grad_norm": 0.09550227969884872, | |
| "learning_rate": 0.0001793201709749644, | |
| "loss": 0.9789, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.20939215287660093, | |
| "grad_norm": 0.09059977531433105, | |
| "learning_rate": 0.00017927946265011195, | |
| "loss": 0.9649, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.2097987395812157, | |
| "grad_norm": 0.09227627515792847, | |
| "learning_rate": 0.00017923875432525953, | |
| "loss": 0.9779, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.21020532628583044, | |
| "grad_norm": 0.09919798374176025, | |
| "learning_rate": 0.00017919804600040708, | |
| "loss": 1.0155, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.2106119129904452, | |
| "grad_norm": 0.09044051915407181, | |
| "learning_rate": 0.00017915733767555464, | |
| "loss": 0.9428, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.21101849969505998, | |
| "grad_norm": 0.09017504006624222, | |
| "learning_rate": 0.00017911662935070225, | |
| "loss": 0.9244, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.21142508639967472, | |
| "grad_norm": 0.09257036447525024, | |
| "learning_rate": 0.0001790759210258498, | |
| "loss": 1.0168, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2118316731042895, | |
| "grad_norm": 0.0926235020160675, | |
| "learning_rate": 0.00017903521270099735, | |
| "loss": 0.9363, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.21223825980890426, | |
| "grad_norm": 0.08785069733858109, | |
| "learning_rate": 0.00017899450437614494, | |
| "loss": 0.9428, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.212644846513519, | |
| "grad_norm": 0.09824348986148834, | |
| "learning_rate": 0.0001789537960512925, | |
| "loss": 1.0378, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.21305143321813377, | |
| "grad_norm": 0.0915142148733139, | |
| "learning_rate": 0.00017891308772644007, | |
| "loss": 0.9603, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.21345801992274852, | |
| "grad_norm": 0.09466978907585144, | |
| "learning_rate": 0.00017887237940158763, | |
| "loss": 1.013, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.21386460662736329, | |
| "grad_norm": 0.09305880963802338, | |
| "learning_rate": 0.0001788316710767352, | |
| "loss": 0.9386, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.21427119333197805, | |
| "grad_norm": 0.09210691601037979, | |
| "learning_rate": 0.00017879096275188276, | |
| "loss": 0.9797, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.2146777800365928, | |
| "grad_norm": 0.10415366291999817, | |
| "learning_rate": 0.00017875025442703031, | |
| "loss": 1.0125, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.21508436674120757, | |
| "grad_norm": 0.10259640216827393, | |
| "learning_rate": 0.0001787095461021779, | |
| "loss": 1.0473, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2154909534458223, | |
| "grad_norm": 0.09523239731788635, | |
| "learning_rate": 0.00017866883777732548, | |
| "loss": 0.9603, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21589754015043708, | |
| "grad_norm": 0.10005185008049011, | |
| "learning_rate": 0.00017862812945247306, | |
| "loss": 1.0768, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.21630412685505185, | |
| "grad_norm": 0.09643250703811646, | |
| "learning_rate": 0.0001785874211276206, | |
| "loss": 1.0799, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.2167107135596666, | |
| "grad_norm": 0.09473159909248352, | |
| "learning_rate": 0.00017854671280276817, | |
| "loss": 1.0657, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.21711730026428136, | |
| "grad_norm": 0.09550385922193527, | |
| "learning_rate": 0.00017850600447791575, | |
| "loss": 1.0389, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.21752388696889613, | |
| "grad_norm": 0.09414463490247726, | |
| "learning_rate": 0.0001784652961530633, | |
| "loss": 1.0317, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.21793047367351087, | |
| "grad_norm": 0.090250164270401, | |
| "learning_rate": 0.00017842458782821088, | |
| "loss": 1.0212, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.21833706037812564, | |
| "grad_norm": 0.09635050594806671, | |
| "learning_rate": 0.00017838387950335844, | |
| "loss": 0.9473, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.21874364708274038, | |
| "grad_norm": 0.0985347330570221, | |
| "learning_rate": 0.00017834317117850602, | |
| "loss": 1.1372, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.21915023378735515, | |
| "grad_norm": 0.09789203107357025, | |
| "learning_rate": 0.00017830246285365357, | |
| "loss": 1.0369, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.21955682049196992, | |
| "grad_norm": 0.09777568280696869, | |
| "learning_rate": 0.00017826175452880113, | |
| "loss": 1.0746, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.21996340719658466, | |
| "grad_norm": 0.09013503789901733, | |
| "learning_rate": 0.0001782210462039487, | |
| "loss": 1.0124, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.22036999390119943, | |
| "grad_norm": 0.10604355484247208, | |
| "learning_rate": 0.0001781803378790963, | |
| "loss": 1.0158, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.22077658060581418, | |
| "grad_norm": 0.09194648265838623, | |
| "learning_rate": 0.00017813962955424387, | |
| "loss": 0.9544, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.22118316731042895, | |
| "grad_norm": 0.09223110228776932, | |
| "learning_rate": 0.00017809892122939142, | |
| "loss": 1.0094, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.22158975401504372, | |
| "grad_norm": 0.09049870073795319, | |
| "learning_rate": 0.00017805821290453898, | |
| "loss": 0.8829, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.22199634071965846, | |
| "grad_norm": 0.10157813131809235, | |
| "learning_rate": 0.00017801750457968656, | |
| "loss": 1.0904, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.22240292742427323, | |
| "grad_norm": 0.09934356063604355, | |
| "learning_rate": 0.0001779767962548341, | |
| "loss": 1.0708, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.222809514128888, | |
| "grad_norm": 0.09037156403064728, | |
| "learning_rate": 0.0001779360879299817, | |
| "loss": 0.916, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.22321610083350274, | |
| "grad_norm": 0.09347829967737198, | |
| "learning_rate": 0.00017789537960512925, | |
| "loss": 1.0328, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.2236226875381175, | |
| "grad_norm": 0.087796151638031, | |
| "learning_rate": 0.00017785467128027683, | |
| "loss": 0.9961, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22402927424273225, | |
| "grad_norm": 0.09518422931432724, | |
| "learning_rate": 0.00017781396295542438, | |
| "loss": 0.9855, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.22443586094734702, | |
| "grad_norm": 0.09606748074293137, | |
| "learning_rate": 0.00017777325463057194, | |
| "loss": 0.954, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.2248424476519618, | |
| "grad_norm": 0.09338165074586868, | |
| "learning_rate": 0.00017773254630571955, | |
| "loss": 1.0876, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.22524903435657653, | |
| "grad_norm": 0.09242440015077591, | |
| "learning_rate": 0.0001776918379808671, | |
| "loss": 0.9418, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.2256556210611913, | |
| "grad_norm": 0.0990302637219429, | |
| "learning_rate": 0.00017765112965601468, | |
| "loss": 1.0641, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.22606220776580604, | |
| "grad_norm": 0.09444238990545273, | |
| "learning_rate": 0.00017761042133116224, | |
| "loss": 1.0315, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.22646879447042081, | |
| "grad_norm": 0.08771083503961563, | |
| "learning_rate": 0.0001775697130063098, | |
| "loss": 0.9898, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.22687538117503558, | |
| "grad_norm": 0.10041147470474243, | |
| "learning_rate": 0.00017752900468145737, | |
| "loss": 1.0478, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.22728196787965033, | |
| "grad_norm": 0.0933571383357048, | |
| "learning_rate": 0.00017748829635660492, | |
| "loss": 1.0002, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.2276885545842651, | |
| "grad_norm": 0.0912991389632225, | |
| "learning_rate": 0.0001774475880317525, | |
| "loss": 1.0807, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22809514128887987, | |
| "grad_norm": 0.09350984543561935, | |
| "learning_rate": 0.00017740687970690006, | |
| "loss": 0.8962, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.2285017279934946, | |
| "grad_norm": 0.0978541299700737, | |
| "learning_rate": 0.00017736617138204764, | |
| "loss": 1.0339, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.22890831469810938, | |
| "grad_norm": 0.08964958041906357, | |
| "learning_rate": 0.0001773254630571952, | |
| "loss": 1.051, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.22931490140272412, | |
| "grad_norm": 0.09241898357868195, | |
| "learning_rate": 0.00017728475473234275, | |
| "loss": 0.903, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.2297214881073389, | |
| "grad_norm": 0.09366483986377716, | |
| "learning_rate": 0.00017724404640749036, | |
| "loss": 1.0055, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.23012807481195366, | |
| "grad_norm": 0.10184673964977264, | |
| "learning_rate": 0.0001772033380826379, | |
| "loss": 1.004, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.2305346615165684, | |
| "grad_norm": 0.09287306666374207, | |
| "learning_rate": 0.0001771626297577855, | |
| "loss": 0.9667, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.23094124822118317, | |
| "grad_norm": 0.08905091136693954, | |
| "learning_rate": 0.00017712192143293305, | |
| "loss": 0.9295, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.2313478349257979, | |
| "grad_norm": 0.0908786877989769, | |
| "learning_rate": 0.0001770812131080806, | |
| "loss": 0.8957, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.23175442163041268, | |
| "grad_norm": 0.10284281522035599, | |
| "learning_rate": 0.00017704050478322818, | |
| "loss": 1.1311, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23216100833502745, | |
| "grad_norm": 0.09007006883621216, | |
| "learning_rate": 0.00017699979645837574, | |
| "loss": 0.9919, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.2325675950396422, | |
| "grad_norm": 0.09025272727012634, | |
| "learning_rate": 0.00017695908813352332, | |
| "loss": 0.9057, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.23297418174425696, | |
| "grad_norm": 0.0994710698723793, | |
| "learning_rate": 0.00017691837980867087, | |
| "loss": 1.1472, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.23338076844887173, | |
| "grad_norm": 0.09117428958415985, | |
| "learning_rate": 0.00017687767148381845, | |
| "loss": 0.9665, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.23378735515348648, | |
| "grad_norm": 0.0893009826540947, | |
| "learning_rate": 0.000176836963158966, | |
| "loss": 0.951, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.23419394185810125, | |
| "grad_norm": 0.08649599552154541, | |
| "learning_rate": 0.0001767962548341136, | |
| "loss": 0.925, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.234600528562716, | |
| "grad_norm": 0.0928448736667633, | |
| "learning_rate": 0.00017675554650926117, | |
| "loss": 0.9253, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.23500711526733076, | |
| "grad_norm": 0.10335158556699753, | |
| "learning_rate": 0.00017671483818440872, | |
| "loss": 1.1171, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.23541370197194553, | |
| "grad_norm": 0.09889842569828033, | |
| "learning_rate": 0.0001766741298595563, | |
| "loss": 1.0005, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.23582028867656027, | |
| "grad_norm": 0.09655506163835526, | |
| "learning_rate": 0.00017663342153470386, | |
| "loss": 1.0273, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23622687538117504, | |
| "grad_norm": 0.09516560286283493, | |
| "learning_rate": 0.0001765927132098514, | |
| "loss": 1.024, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.23663346208578978, | |
| "grad_norm": 0.10024843364953995, | |
| "learning_rate": 0.000176552004884999, | |
| "loss": 1.0299, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.23704004879040455, | |
| "grad_norm": 0.10152596235275269, | |
| "learning_rate": 0.00017651129656014655, | |
| "loss": 0.9658, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.23744663549501932, | |
| "grad_norm": 0.09654249995946884, | |
| "learning_rate": 0.00017647058823529413, | |
| "loss": 1.0722, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.23785322219963406, | |
| "grad_norm": 0.09112072736024857, | |
| "learning_rate": 0.00017642987991044168, | |
| "loss": 0.9846, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.23825980890424883, | |
| "grad_norm": 0.09640034288167953, | |
| "learning_rate": 0.00017638917158558926, | |
| "loss": 1.0501, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2386663956088636, | |
| "grad_norm": 0.09564584493637085, | |
| "learning_rate": 0.00017634846326073682, | |
| "loss": 0.955, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.23907298231347834, | |
| "grad_norm": 0.10815359652042389, | |
| "learning_rate": 0.0001763077549358844, | |
| "loss": 1.203, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.2394795690180931, | |
| "grad_norm": 0.09078256040811539, | |
| "learning_rate": 0.00017626704661103198, | |
| "loss": 0.9881, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.23988615572270786, | |
| "grad_norm": 0.09075487405061722, | |
| "learning_rate": 0.00017622633828617954, | |
| "loss": 0.984, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24029274242732263, | |
| "grad_norm": 0.09048381447792053, | |
| "learning_rate": 0.00017618562996132712, | |
| "loss": 1.0235, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.2406993291319374, | |
| "grad_norm": 0.09820905327796936, | |
| "learning_rate": 0.00017614492163647467, | |
| "loss": 0.9763, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.24110591583655214, | |
| "grad_norm": 0.0961097925901413, | |
| "learning_rate": 0.00017610421331162222, | |
| "loss": 1.1035, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.2415125025411669, | |
| "grad_norm": 0.0877358540892601, | |
| "learning_rate": 0.0001760635049867698, | |
| "loss": 0.8962, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.24191908924578168, | |
| "grad_norm": 0.09730017930269241, | |
| "learning_rate": 0.00017602279666191736, | |
| "loss": 1.1232, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.24232567595039642, | |
| "grad_norm": 0.09486240148544312, | |
| "learning_rate": 0.00017598208833706494, | |
| "loss": 1.0566, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.2427322626550112, | |
| "grad_norm": 0.09367606788873672, | |
| "learning_rate": 0.0001759413800122125, | |
| "loss": 0.9934, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.24313884935962593, | |
| "grad_norm": 0.09046703577041626, | |
| "learning_rate": 0.00017590067168736008, | |
| "loss": 0.9137, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.2435454360642407, | |
| "grad_norm": 0.09512536972761154, | |
| "learning_rate": 0.00017585996336250766, | |
| "loss": 0.9733, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.24395202276885547, | |
| "grad_norm": 0.08619649708271027, | |
| "learning_rate": 0.0001758192550376552, | |
| "loss": 0.8777, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 4918, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.906257354398122e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |